From 60147c6034e32f687ef82bafe3f0d7fdf451072a Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 20 Jan 2022 14:49:12 +0100
Subject: [PATCH 001/946] [EarlyCSE] Regenerate test checks (NFC)

---
 llvm/test/Transforms/EarlyCSE/atomics.ll      | 194 +++++++++------
 llvm/test/Transforms/EarlyCSE/basic.ll        | 226 +++++++++++-------
 .../Transforms/EarlyCSE/const-speculation.ll  |  18 +-
 .../test/Transforms/EarlyCSE/floatingpoint.ll |  37 +--
 llvm/test/Transforms/EarlyCSE/memoryssa.ll    | 173 +++++++++-----
 llvm/test/Transforms/EarlyCSE/pr33406.ll      |  20 +-
 .../Transforms/EarlyCSE/readnone-mayunwind.ll |   9 +-
 llvm/test/Transforms/EarlyCSE/writeonly.ll    |   8 +-
 8 files changed, 439 insertions(+), 246 deletions(-)

diff --git a/llvm/test/Transforms/EarlyCSE/atomics.ll b/llvm/test/Transforms/EarlyCSE/atomics.ll
index 4a4b76666344a..4d67858237bc9 100644
--- a/llvm/test/Transforms/EarlyCSE/atomics.ll
+++ b/llvm/test/Transforms/EarlyCSE/atomics.ll
@@ -1,71 +1,86 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s
 ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s
 
-; CHECK-LABEL: @test12(
 define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i32, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, i32* [[P2:%.*]] seq_cst, align 4
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, i32* [[P1]], align 4
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[B:%.*]], i32 [[LOAD0]], i32 [[LOAD1]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
   %load0 = load i32, i32* %P1
   %1 = load atomic i32, i32* %P2 seq_cst, align 4
   %load1 = load i32, i32* %P1
   %sel = select i1 %B, i32 %load0, i32 %load1
   ret i32 %sel
-  ; CHECK: load i32, i32* %P1
-  ; CHECK: load i32, i32* %P1
 }
 
-; CHECK-LABEL: @test13(
 ; atomic to non-atomic forwarding is legal
 define i32 @test13(i1 %B, i32* %P1) {
+; CHECK-LABEL: @test13(
+; CHECK-NEXT:    [[A:%.*]] = load atomic i32, i32* [[P1:%.*]] seq_cst, align 4
+; CHECK-NEXT:    ret i32 0
+;
   %a = load atomic i32, i32* %P1 seq_cst, align 4
   %b = load i32, i32* %P1
   %res = sub i32 %a, %b
   ret i32 %res
-  ; CHECK: load atomic i32, i32* %P1
-  ; CHECK: ret i32 0
 }
 
-; CHECK-LABEL: @test14(
 ; atomic to unordered atomic forwarding is legal
 define i32 @test14(i1 %B, i32* %P1) {
+; CHECK-LABEL: @test14(
+; CHECK-NEXT:    [[A:%.*]] = load atomic i32, i32* [[P1:%.*]] seq_cst, align 4
+; CHECK-NEXT:    ret i32 0
+;
   %a = load atomic i32, i32* %P1 seq_cst, align 4
   %b = load atomic i32, i32* %P1 unordered, align 4
   %res = sub i32 %a, %b
   ret i32 %res
-  ; CHECK: load atomic i32, i32* %P1 seq_cst
-  ; CHECK-NEXT: ret i32 0
 }
 
-; CHECK-LABEL: @test15(
 ; implementation restriction: can't forward to stonger
 ; than unordered
 define i32 @test15(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT:    [[A:%.*]] = load atomic i32, i32* [[P1:%.*]] seq_cst, align 4
+; CHECK-NEXT:    [[B:%.*]] = load atomic i32, i32* [[P1]] seq_cst, align 4
+; CHECK-NEXT:    [[RES:%.*]] = sub i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
   %a = load atomic i32, i32* %P1 seq_cst, align 4
   %b = load atomic i32, i32* %P1 seq_cst, align 4
   %res = sub i32 %a, %b
   ret i32 %res
-  ; CHECK: load atomic i32, i32* %P1
-  ; CHECK: load atomic i32, i32* %P1
 }
 
-; CHECK-LABEL: @test16(
 ; forwarding non-atomic to atomic is wrong! (However,
 ; it would be legal to use the later value in place of the
 ; former in this particular example.  We just don't
 ; do that right now.)
 define i32 @test16(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    [[B:%.*]] = load atomic i32, i32* [[P1]] unordered, align 4
+; CHECK-NEXT:    [[RES:%.*]] = sub i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
   %a = load i32, i32* %P1, align 4
   %b = load atomic i32, i32* %P1 unordered, align 4
   %res = sub i32 %a, %b
   ret i32 %res
-  ; CHECK: load i32, i32* %P1
-  ; CHECK: load atomic i32, i32* %P1
 }
 
 ; Can't DSE across a full fence
 define void @fence_seq_cst_store(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @fence_seq_cst_store
-; CHECK: store
-; CHECK: store atomic
-; CHECK: store
+; CHECK-LABEL: @fence_seq_cst_store(
+; CHECK-NEXT:    store i32 0, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    store atomic i32 0, i32* [[P2:%.*]] seq_cst, align 4
+; CHECK-NEXT:    store i32 0, i32* [[P1]], align 4
+; CHECK-NEXT:    ret void
+;
   store i32 0, i32* %P1, align 4
   store atomic i32 0, i32* %P2 seq_cst, align 4
   store i32 0, i32* %P1, align 4
@@ -74,10 +89,12 @@ define void @fence_seq_cst_store(i1 %B, i32* %P1, i32* %P2) {
 
 ; Can't DSE across a full fence
 define void @fence_seq_cst(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @fence_seq_cst
-; CHECK: store
-; CHECK: fence seq_cst
-; CHECK: store
+; CHECK-LABEL: @fence_seq_cst(
+; CHECK-NEXT:    store i32 0, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    store i32 0, i32* [[P1]], align 4
+; CHECK-NEXT:    ret void
+;
   store i32 0, i32* %P1, align 4
   fence seq_cst
   store i32 0, i32* %P1, align 4
@@ -86,10 +103,12 @@ define void @fence_seq_cst(i1 %B, i32* %P1, i32* %P2) {
 
 ; Can't DSE across a full fence
 define void @fence_asm_sideeffect(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @fence_asm_sideeffect
-; CHECK: store
-; CHECK: call void asm sideeffect
-; CHECK: store
+; CHECK-LABEL: @fence_asm_sideeffect(
+; CHECK-NEXT:    store i32 0, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    call void asm sideeffect "", ""()
+; CHECK-NEXT:    store i32 0, i32* [[P1]], align 4
+; CHECK-NEXT:    ret void
+;
   store i32 0, i32* %P1, align 4
   call void asm sideeffect "", ""()
   store i32 0, i32* %P1, align 4
@@ -98,10 +117,12 @@ define void @fence_asm_sideeffect(i1 %B, i32* %P1, i32* %P2) {
 
 ; Can't DSE across a full fence
 define void @fence_asm_memory(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @fence_asm_memory
-; CHECK: store
-; CHECK: call void asm
-; CHECK: store
+; CHECK-LABEL: @fence_asm_memory(
+; CHECK-NEXT:    store i32 0, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    call void asm "", "~{memory}"()
+; CHECK-NEXT:    store i32 0, i32* [[P1]], align 4
+; CHECK-NEXT:    ret void
+;
   store i32 0, i32* %P1, align 4
   call void asm "", "~{memory}"()
   store i32 0, i32* %P1, align 4
@@ -110,32 +131,39 @@ define void @fence_asm_memory(i1 %B, i32* %P1, i32* %P2) {
 
 ; Can't remove a volatile load
 define i32 @volatile_load(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @volatile_load(
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    [[B:%.*]] = load volatile i32, i32* [[P1]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = sub i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
   %a = load i32, i32* %P1, align 4
   %b = load volatile i32, i32* %P1, align 4
   %res = sub i32 %a, %b
   ret i32 %res
-  ; CHECK-LABEL: @volatile_load
-  ; CHECK: load i32, i32* %P1
-  ; CHECK: load volatile i32, i32* %P1
 }
 
 ; Can't remove redundant volatile loads
 define i32 @redundant_volatile_load(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @redundant_volatile_load(
+; CHECK-NEXT:    [[A:%.*]] = load volatile i32, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    [[B:%.*]] = load volatile i32, i32* [[P1]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = sub i32 [[A]], [[B]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
   %a = load volatile i32, i32* %P1, align 4
   %b = load volatile i32, i32* %P1, align 4
   %res = sub i32 %a, %b
   ret i32 %res
-  ; CHECK-LABEL: @redundant_volatile_load
-  ; CHECK: load volatile i32, i32* %P1
-  ; CHECK: load volatile i32, i32* %P1
-  ; CHECK: sub
 }
 
 ; Can't DSE a volatile store
 define void @volatile_store(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @volatile_store
-; CHECK: store volatile
-; CHECK: store
+; CHECK-LABEL: @volatile_store(
+; CHECK-NEXT:    store volatile i32 0, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[P1]], align 4
+; CHECK-NEXT:    ret void
+;
   store volatile i32 0, i32* %P1, align 4
   store i32 3, i32* %P1, align 4
   ret void
@@ -143,9 +171,11 @@ define void @volatile_store(i1 %B, i32* %P1, i32* %P2) {
 
 ; Can't DSE a redundant volatile store
 define void @redundant_volatile_store(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @redundant_volatile_store
-; CHECK: store volatile
-; CHECK: store volatile
+; CHECK-LABEL: @redundant_volatile_store(
+; CHECK-NEXT:    store volatile i32 0, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    store volatile i32 0, i32* [[P1]], align 4
+; CHECK-NEXT:    ret void
+;
   store volatile i32 0, i32* %P1, align 4
   store volatile i32 0, i32* %P1, align 4
   ret void
@@ -153,21 +183,24 @@ define void @redundant_volatile_store(i1 %B, i32* %P1, i32* %P2) {
 
 ; Can value forward from volatiles
 define i32 @test20(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT:    [[A:%.*]] = load volatile i32, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    ret i32 0
+;
   %a = load volatile i32, i32* %P1, align 4
   %b = load i32, i32* %P1, align 4
   %res = sub i32 %a, %b
   ret i32 %res
-  ; CHECK-LABEL: @test20
-  ; CHECK: load volatile i32, i32* %P1
-  ; CHECK: ret i32 0
 }
 
 ; Can DSE a non-volatile store in favor of a volatile one
 ; currently a missed optimization
 define void @test21(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @test21
-; CHECK: store 
-; CHECK: store volatile
+; CHECK-LABEL: @test21(
+; CHECK-NEXT:    store i32 0, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    store volatile i32 3, i32* [[P1]], align 4
+; CHECK-NEXT:    ret void
+;
   store i32 0, i32* %P1, align 4
   store volatile i32 3, i32* %P1, align 4
   ret void
@@ -175,8 +208,10 @@ define void @test21(i1 %B, i32* %P1, i32* %P2) {
 
 ; Can DSE a normal store in favor of a unordered one
 define void @test22(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @test22
-; CHECK-NEXT: store atomic
+; CHECK-LABEL: @test22(
+; CHECK-NEXT:    store atomic i32 3, i32* [[P1:%.*]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
   store i32 0, i32* %P1, align 4
   store atomic i32 3, i32* %P1 unordered, align 4
   ret void
@@ -184,8 +219,10 @@ define void @test22(i1 %B, i32* %P1, i32* %P2) {
 
 ; Can also DSE a unordered store in favor of a normal one
 define void @test23(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @test23
-; CHECK-NEXT: store i32 0
+; CHECK-LABEL: @test23(
+; CHECK-NEXT:    store i32 0, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
   store atomic i32 3, i32* %P1 unordered, align 4
   store i32 0, i32* %P1, align 4
   ret void
@@ -195,9 +232,11 @@ define void @test23(i1 %B, i32* %P1, i32* %P2) {
 ; Note that we could remove the earlier store if we could
 ; represent the required ordering.
 define void @test24(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @test24
-; CHECK-NEXT: store atomic
-; CHECK-NEXT: store i32 0
+; CHECK-LABEL: @test24(
+; CHECK-NEXT:    store atomic i32 3, i32* [[P1:%.*]] release, align 4
+; CHECK-NEXT:    store i32 0, i32* [[P1]], align 4
+; CHECK-NEXT:    ret void
+;
   store atomic i32 3, i32* %P1 release, align 4
   store i32 0, i32* %P1, align 4
   ret void
@@ -206,9 +245,11 @@ define void @test24(i1 %B, i32* %P1, i32* %P2) {
 ; Can't remove volatile stores - each is independently observable and
 ; the count of such stores is an observable program side effect.
 define void @test25(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @test25
-; CHECK-NEXT: store volatile
-; CHECK-NEXT: store volatile
+; CHECK-LABEL: @test25(
+; CHECK-NEXT:    store volatile i32 3, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    store volatile i32 0, i32* [[P1]], align 4
+; CHECK-NEXT:    ret void
+;
   store volatile i32 3, i32* %P1, align 4
   store volatile i32 0, i32* %P1, align 4
   ret void
@@ -216,9 +257,10 @@ define void @test25(i1 %B, i32* %P1, i32* %P2) {
 
 ; Can DSE a unordered store in favor of a unordered one
 define void @test26(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @test26
-; CHECK-NEXT: store atomic i32 3, i32* %P1 unordered, align 4
-; CHECK-NEXT: ret
+; CHECK-LABEL: @test26(
+; CHECK-NEXT:    store atomic i32 3, i32* [[P1:%.*]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
   store atomic i32 0, i32* %P1 unordered, align 4
   store atomic i32 3, i32* %P1 unordered, align 4
   ret void
@@ -227,10 +269,11 @@ define void @test26(i1 %B, i32* %P1, i32* %P2) {
 ; Can DSE a unordered store in favor of a ordered one,
 ; but current don't due to implementation limits
 define void @test27(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @test27
-; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4
-; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4
-; CHECK-NEXT: ret
+; CHECK-LABEL: @test27(
+; CHECK-NEXT:    store atomic i32 0, i32* [[P1:%.*]] unordered, align 4
+; CHECK-NEXT:    store atomic i32 3, i32* [[P1]] release, align 4
+; CHECK-NEXT:    ret void
+;
   store atomic i32 0, i32* %P1 unordered, align 4
   store atomic i32 3, i32* %P1 release, align 4
   ret void
@@ -239,10 +282,11 @@ define void @test27(i1 %B, i32* %P1, i32* %P2) {
 ; Can DSE an unordered atomic store in favor of an
 ; ordered one, but current don't due to implementation limits
 define void @test28(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @test28
-; CHECK-NEXT: store atomic i32 0, i32* %P1 unordered, align 4
-; CHECK-NEXT: store atomic i32 3, i32* %P1 release, align 4
-; CHECK-NEXT: ret
+; CHECK-LABEL: @test28(
+; CHECK-NEXT:    store atomic i32 0, i32* [[P1:%.*]] unordered, align 4
+; CHECK-NEXT:    store atomic i32 3, i32* [[P1]] release, align 4
+; CHECK-NEXT:    ret void
+;
   store atomic i32 0, i32* %P1 unordered, align 4
   store atomic i32 3, i32* %P1 release, align 4
   ret void
@@ -251,9 +295,11 @@ define void @test28(i1 %B, i32* %P1, i32* %P2) {
 ; As an implementation limitation, can't remove ordered stores
 ; see also: @test24
 define void @test29(i1 %B, i32* %P1, i32* %P2) {
-; CHECK-LABEL: @test29
-; CHECK-NEXT: store atomic
-; CHECK-NEXT: store atomic
+; CHECK-LABEL: @test29(
+; CHECK-NEXT:    store atomic i32 3, i32* [[P1:%.*]] release, align 4
+; CHECK-NEXT:    store atomic i32 0, i32* [[P1]] unordered, align 4
+; CHECK-NEXT:    ret void
+;
   store atomic i32 3, i32* %P1 release, align 4
   store atomic i32 0, i32* %P1 unordered, align 4
   ret void
diff --git a/llvm/test/Transforms/EarlyCSE/basic.ll b/llvm/test/Transforms/EarlyCSE/basic.ll
index 5178e5a89e205..df4c5c6c13ac3 100644
--- a/llvm/test/Transforms/EarlyCSE/basic.ll
+++ b/llvm/test/Transforms/EarlyCSE/basic.ll
@@ -1,62 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s
 ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s
 ; RUN: opt < %s -S -passes=early-cse | FileCheck %s
 
 declare void @llvm.assume(i1) nounwind
 
-; CHECK-LABEL: @test1(
 define void @test1(i8 %V, i32 *%P) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    store i32 23, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[C:%.*]] = zext i8 [[V:%.*]] to i32
+; CHECK-NEXT:    store volatile i32 [[C]], i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 [[C]], i32* [[P]], align 4
+; CHECK-NEXT:    [[E:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    store volatile i32 [[E]], i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 [[E]], i32* [[P]], align 4
+; CHECK-NEXT:    store volatile i32 [[E]], i32* [[P]], align 4
+; CHECK-NEXT:    ret void
+;
   %A = bitcast i64 42 to double  ;; dead
   %B = add i32 4, 19             ;; constant folds
   store i32 %B, i32* %P
-  ; CHECK-NEXT: store i32 23, i32* %P
-  
+
   %C = zext i8 %V to i32
   %D = zext i8 %V to i32  ;; CSE
   store volatile i32 %C, i32* %P
   store volatile i32 %D, i32* %P
-  ; CHECK-NEXT: %C = zext i8 %V to i32
-  ; CHECK-NEXT: store volatile i32 %C
-  ; CHECK-NEXT: store volatile i32 %C
-  
+
   %E = add i32 %C, %C
   %F = add i32 %C, %C
   store volatile i32 %E, i32* %P
   store volatile i32 %F, i32* %P
-  ; CHECK-NEXT: %E = add i32 %C, %C
-  ; CHECK-NEXT: store volatile i32 %E
-  ; CHECK-NEXT: store volatile i32 %E
 
   %G = add nuw i32 %C, %C
   store volatile i32 %G, i32* %P
-  ; CHECK-NEXT: store volatile i32 %E
   ret void
 }
 
 
 ;; Simple load value numbering.
-; CHECK-LABEL: @test2(
 define i32 @test2(i32 *%P) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret i32 0
+;
   %V1 = load i32, i32* %P
   %V2 = load i32, i32* %P
   %Diff = sub i32 %V1, %V2
   ret i32 %Diff
-  ; CHECK: ret i32 0
 }
 
-; CHECK-LABEL: @test2a(
 define i32 @test2a(i32 *%P, i1 %b) {
+; CHECK-LABEL: @test2a(
+; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[B:%.*]])
+; CHECK-NEXT:    ret i32 0
+;
   %V1 = load i32, i32* %P
   tail call void @llvm.assume(i1 %b)
   %V2 = load i32, i32* %P
   %Diff = sub i32 %V1, %V2
   ret i32 %Diff
-  ; CHECK: ret i32 0
 }
 
 ;; Cross block load value numbering.
-; CHECK-LABEL: @test3(
 define i32 @test3(i32 *%P, i1 %Cond) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    store i32 4, i32* [[P]], align 4
+; CHECK-NEXT:    ret i32 42
+; CHECK:       F:
+; CHECK-NEXT:    ret i32 0
+;
   %V1 = load i32, i32* %P
   br i1 %Cond, label %T, label %F
 T:
@@ -66,12 +82,19 @@ F:
   %V2 = load i32, i32* %P
   %Diff = sub i32 %V1, %V2
   ret i32 %Diff
-  ; CHECK: F:
-  ; CHECK: ret i32 0
 }
 
-; CHECK-LABEL: @test3a(
 define i32 @test3a(i32 *%P, i1 %Cond, i1 %b) {
+; CHECK-LABEL: @test3a(
+; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    store i32 4, i32* [[P]], align 4
+; CHECK-NEXT:    ret i32 42
+; CHECK:       F:
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[B:%.*]])
+; CHECK-NEXT:    ret i32 0
+;
   %V1 = load i32, i32* %P
   br i1 %Cond, label %T, label %F
 T:
@@ -82,13 +105,20 @@ F:
   %V2 = load i32, i32* %P
   %Diff = sub i32 %V1, %V2
   ret i32 %Diff
-  ; CHECK: F:
-  ; CHECK: ret i32 0
 }
 
 ;; Cross block load value numbering stops when stores happen.
-; CHECK-LABEL: @test4(
 define i32 @test4(i32 *%P, i1 %Cond) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[T:%.*]], label [[F:%.*]]
+; CHECK:       T:
+; CHECK-NEXT:    ret i32 42
+; CHECK:       F:
+; CHECK-NEXT:    store i32 42, i32* [[P]], align 4
+; CHECK-NEXT:    [[DIFF:%.*]] = sub i32 [[V1]], 42
+; CHECK-NEXT:    ret i32 [[DIFF]]
+;
   %V1 = load i32, i32* %P
   br i1 %Cond, label %T, label %F
 T:
@@ -96,142 +126,166 @@ T:
 F:
   ; Clobbers V1
   store i32 42, i32* %P
-  
+
   %V2 = load i32, i32* %P
   %Diff = sub i32 %V1, %V2
   ret i32 %Diff
-  ; CHECK: F:
-  ; CHECK: ret i32 %Diff
 }
 
 declare i32 @func(i32 *%P) readonly
 
 ;; Simple call CSE'ing.
-; CHECK-LABEL: @test5(
 define i32 @test5(i32 *%P) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:    [[V1:%.*]] = call i32 @func(i32* [[P:%.*]])
+; CHECK-NEXT:    ret i32 0
+;
   %V1 = call i32 @func(i32* %P)
   %V2 = call i32 @func(i32* %P)
   %Diff = sub i32 %V1, %V2
   ret i32 %Diff
-  ; CHECK: ret i32 0
 }
 
 ;; Trivial Store->load forwarding
-; CHECK-LABEL: @test6(
 define i32 @test6(i32 *%P) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT:    store i32 42, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret i32 42
+;
   store i32 42, i32* %P
   %V1 = load i32, i32* %P
   ret i32 %V1
-  ; CHECK: ret i32 42
 }
 
-; CHECK-LABEL: @test6a(
 define i32 @test6a(i32 *%P, i1 %b) {
+; CHECK-LABEL: @test6a(
+; CHECK-NEXT:    store i32 42, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[B:%.*]])
+; CHECK-NEXT:    ret i32 42
+;
   store i32 42, i32* %P
   tail call void @llvm.assume(i1 %b)
   %V1 = load i32, i32* %P
   ret i32 %V1
-  ; CHECK: ret i32 42
 }
 
 ;; Trivial dead store elimination.
-; CHECK-LABEL: @test7(
 define void @test7(i32 *%P) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT:    store i32 45, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
   store i32 42, i32* %P
   store i32 45, i32* %P
   ret void
-  ; CHECK-NEXT: store i32 45
-  ; CHECK-NEXT: ret void
 }
 
 ;; Readnone functions aren't invalidated by stores.
-; CHECK-LABEL: @test8(
 define i32 @test8(i32 *%P) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT:    [[V1:%.*]] = call i32 @func(i32* [[P:%.*]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    store i32 4, i32* [[P]], align 4
+; CHECK-NEXT:    ret i32 0
+;
   %V1 = call i32 @func(i32* %P) readnone
   store i32 4, i32* %P
   %V2 = call i32 @func(i32* %P) readnone
   %Diff = sub i32 %V1, %V2
   ret i32 %Diff
-  ; CHECK: ret i32 0
 }
 
 ;; Trivial DSE can't be performed across a readonly call.  The call
 ;; can observe the earlier write.
-; CHECK-LABEL: @test9(
 define i32 @test9(i32 *%P) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT:    store i32 4, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[V1:%.*]] = call i32 @func(i32* [[P]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    store i32 5, i32* [[P]], align 4
+; CHECK-NEXT:    ret i32 [[V1]]
+;
   store i32 4, i32* %P
   %V1 = call i32 @func(i32* %P) readonly
-  store i32 5, i32* %P        
+  store i32 5, i32* %P
   ret i32 %V1
-  ; CHECK: store i32 4, i32* %P        
-  ; CHECK-NEXT: %V1 = call i32 @func(i32* %P)
-  ; CHECK-NEXT: store i32 5, i32* %P        
-  ; CHECK-NEXT: ret i32 %V1
 }
 
 ;; Trivial DSE can be performed across a readnone call.
-; CHECK-LABEL: @test10
 define i32 @test10(i32 *%P) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT:    [[V1:%.*]] = call i32 @func(i32* [[P:%.*]]) #[[ATTR2]]
+; CHECK-NEXT:    store i32 5, i32* [[P]], align 4
+; CHECK-NEXT:    ret i32 [[V1]]
+;
   store i32 4, i32* %P
   %V1 = call i32 @func(i32* %P) readnone
-  store i32 5, i32* %P        
+  store i32 5, i32* %P
   ret i32 %V1
-  ; CHECK-NEXT: %V1 = call i32 @func(i32* %P)
-  ; CHECK-NEXT: store i32 5, i32* %P        
-  ; CHECK-NEXT: ret i32 %V1
 }
 
 ;; Trivial dead store elimination - should work for an entire series of dead stores too.
-; CHECK-LABEL: @test11(
 define void @test11(i32 *%P) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    store i32 45, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
   store i32 42, i32* %P
   store i32 43, i32* %P
   store i32 44, i32* %P
   store i32 45, i32* %P
   ret void
-  ; CHECK-NEXT: store i32 45
-  ; CHECK-NEXT: ret void
 }
 
-; CHECK-LABEL: @test12(
 define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT:    [[LOAD0:%.*]] = load i32, i32* [[P1:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load atomic i32, i32* [[P2:%.*]] seq_cst, align 4
+; CHECK-NEXT:    [[LOAD1:%.*]] = load i32, i32* [[P1]], align 4
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[B:%.*]], i32 [[LOAD0]], i32 [[LOAD1]]
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
   %load0 = load i32, i32* %P1
   %1 = load atomic i32, i32* %P2 seq_cst, align 4
   %load1 = load i32, i32* %P1
   %sel = select i1 %B, i32 %load0, i32 %load1
   ret i32 %sel
-  ; CHECK: load i32, i32* %P1
-  ; CHECK: load i32, i32* %P1
 }
 
 define void @dse1(i32 *%P) {
-; CHECK-LABEL: @dse1
-; CHECK-NOT: store
+; CHECK-LABEL: @dse1(
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
   %v = load i32, i32* %P
   store i32 %v, i32* %P
   ret void
 }
 
 define void @dse2(i32 *%P) {
-; CHECK-LABEL: @dse2
-; CHECK-NOT: store
+; CHECK-LABEL: @dse2(
+; CHECK-NEXT:    [[V:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
   %v = load atomic i32, i32* %P seq_cst, align 4
   store i32 %v, i32* %P
   ret void
 }
 
 define void @dse3(i32 *%P) {
-; CHECK-LABEL: @dse3
-; CHECK-NOT: store
+; CHECK-LABEL: @dse3(
+; CHECK-NEXT:    [[V:%.*]] = load atomic i32, i32* [[P:%.*]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
   %v = load atomic i32, i32* %P seq_cst, align 4
   store atomic i32 %v, i32* %P unordered, align 4
   ret void
 }
 
 define i32 @dse4(i32 *%P, i32 *%Q) {
-; CHECK-LABEL: @dse4
-; CHECK-NOT: store
-; CHECK: ret i32 0
+; CHECK-LABEL: @dse4(
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[Q:%.*]], align 4
+; CHECK-NEXT:    [[V:%.*]] = load atomic i32, i32* [[P:%.*]] unordered, align 4
+; CHECK-NEXT:    ret i32 0
+;
   %a = load i32, i32* %Q
   %v = load atomic i32, i32* %P unordered, align 4
   store atomic i32 %v, i32* %P unordered, align 4
@@ -242,14 +296,16 @@ define i32 @dse4(i32 *%P, i32 *%Q) {
 
 ; Note that in this example, %P and %Q could in fact be the same
 ; pointer.  %v could be different than the value observed for %a
-; and that's okay because we're using relaxed memory ordering.  
-; The only guarantee we have to provide is that each of the loads 
-; has to observe some value written to that location.  We  do 
-; not have to respect the order in which those writes were done.  
+; and that's okay because we're using relaxed memory ordering.
+; The only guarantee we have to provide is that each of the loads
+; has to observe some value written to that location.  We  do
+; not have to respect the order in which those writes were done.
 define i32 @dse5(i32 *%P, i32 *%Q) {
-; CHECK-LABEL: @dse5
-; CHECK-NOT: store
-; CHECK: ret i32 0
+; CHECK-LABEL: @dse5(
+; CHECK-NEXT:    [[V:%.*]] = load atomic i32, i32* [[P:%.*]] unordered, align 4
+; CHECK-NEXT:    [[A:%.*]] = load atomic i32, i32* [[Q:%.*]] unordered, align 4
+; CHECK-NEXT:    ret i32 0
+;
   %v = load atomic i32, i32* %P unordered, align 4
   %a = load atomic i32, i32* %Q unordered, align 4
   store atomic i32 %v, i32* %P unordered, align 4
@@ -260,8 +316,10 @@ define i32 @dse5(i32 *%P, i32 *%Q) {
 
 
 define void @dse_neg1(i32 *%P) {
-; CHECK-LABEL: @dse_neg1
-; CHECK: store
+; CHECK-LABEL: @dse_neg1(
+; CHECK-NEXT:    store i32 5, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
   %v = load i32, i32* %P
   store i32 5, i32* %P
   ret void
@@ -270,8 +328,11 @@ define void @dse_neg1(i32 *%P) {
 ; Could remove the store, but only if ordering was somehow
 ; encoded.
 define void @dse_neg2(i32 *%P) {
-; CHECK-LABEL: @dse_neg2
-; CHECK: store
+; CHECK-LABEL: @dse_neg2(
+; CHECK-NEXT:    [[V:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    store atomic i32 [[V]], i32* [[P]] seq_cst, align 4
+; CHECK-NEXT:    ret void
+;
   %v = load i32, i32* %P
   store atomic i32 %v, i32* %P seq_cst, align 4
   ret void
@@ -280,11 +341,14 @@ define void @dse_neg2(i32 *%P) {
 @c = external global i32, align 4
 declare i32 @reads_c(i32 returned)
 define void @pr28763() {
-entry:
 ; CHECK-LABEL: @pr28763(
-; CHECK: store i32 0, i32* @c, align 4
-; CHECK: call i32 @reads_c(i32 0)
-; CHECK: store i32 2, i32* @c, align 4
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 0, i32* @c, align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @reads_c(i32 0)
+; CHECK-NEXT:    store i32 2, i32* @c, align 4
+; CHECK-NEXT:    ret void
+;
+entry:
   %load = load i32, i32* @c, align 4
   store i32 0, i32* @c, align 4
   %call = call i32 @reads_c(i32 0)
@@ -293,10 +357,12 @@ entry:
 }
 
 define i1 @cse_freeze(i1 %a) {
-entry:
 ; CHECK-LABEL: @cse_freeze(
-; CHECK: %b = freeze i1 %a
-; CHECK: ret i1 %b
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = freeze i1 [[A:%.*]]
+; CHECK-NEXT:    ret i1 [[B]]
+;
+entry:
   %b = freeze i1 %a
   %c = freeze i1 %a
   %and = and i1 %b, %c
diff --git a/llvm/test/Transforms/EarlyCSE/const-speculation.ll b/llvm/test/Transforms/EarlyCSE/const-speculation.ll
index a531c14da770c..bf4469ca37331 100644
--- a/llvm/test/Transforms/EarlyCSE/const-speculation.ll
+++ b/llvm/test/Transforms/EarlyCSE/const-speculation.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -early-cse -earlycse-debug-hash -S %s | FileCheck %s
 
 %mystruct = type { i32 }
@@ -15,15 +16,20 @@
 ; crash.
 
 define i1 @test_constant_speculation() {
-; CHECK-LABEL: define i1 @test_constant_speculation
+; CHECK-LABEL: @test_constant_speculation(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 undef, label [[END:%.*]], label [[SELECT:%.*]]
+; CHECK:       select:
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[TMP:%.*]] = phi i32* [ null, [[ENTRY:%.*]] ], [ getelementptr inbounds ([[MYSTRUCT:%.*]], %mystruct* @var, i64 0, i32 0), [[SELECT]] ]
+; CHECK-NEXT:    [[RES:%.*]] = icmp eq i32* [[TMP]], null
+; CHECK-NEXT:    ret i1 [[RES]]
+;
 entry:
   br i1 undef, label %end, label %select
 
 select:
-; CHECK: select:
-; CHECK-NOT: icmp
-; CHECK-NOT: getelementptr
-; CHECK-NOT: select
 
   %tst = icmp eq i32 1, 0
   %elt = getelementptr %mystruct, %mystruct* @var, i64 0, i32 0
@@ -31,8 +37,6 @@ select:
   br label %end
 
 end:
-; CHECK: end:
-; CHECK: %tmp = phi i32* [ null, %entry ], [ getelementptr inbounds (%mystruct, %mystruct* @var, i64 0, i32 0), %select ]
   %tmp = phi i32* [null, %entry], [%sel, %select]
   %res = icmp eq i32* %tmp, null
   ret i1 %res
diff --git a/llvm/test/Transforms/EarlyCSE/floatingpoint.ll b/llvm/test/Transforms/EarlyCSE/floatingpoint.ll
index a4293f5eed9c1..c7579adfdd3cd 100644
--- a/llvm/test/Transforms/EarlyCSE/floatingpoint.ll
+++ b/llvm/test/Transforms/EarlyCSE/floatingpoint.ll
@@ -1,27 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s
 ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s
 
 ; Ensure we don't simplify away additions vectors of +0.0's (same as scalars).
 define <4 x float> @fV( <4 x float> %a) {
-       ; CHECK: %b = fadd <4 x float> %a, zeroinitializer
-       %b = fadd  <4 x float> %a, <float 0.0,float 0.0,float 0.0,float 0.0>
-       ret <4 x float> %b
+; CHECK-LABEL: @fV(
+; CHECK-NEXT:    [[B:%.*]] = fadd <4 x float> [[A:%.*]], zeroinitializer
+; CHECK-NEXT:    ret <4 x float> [[B]]
+;
+  %b = fadd  <4 x float> %a, <float 0.0,float 0.0,float 0.0,float 0.0>
+  ret <4 x float> %b
 }
 
 define <4 x float> @fW( <4 x float> %a) {
-       ; CHECK: ret <4 x float> %a
-       %b = fadd  <4 x float> %a, <float -0.0,float -0.0,float -0.0,float -0.0>
-       ret <4 x float> %b
+; CHECK-LABEL: @fW(
+; CHECK-NEXT:    ret <4 x float> [[A:%.*]]
+;
+  %b = fadd  <4 x float> %a, <float -0.0,float -0.0,float -0.0,float -0.0>
+  ret <4 x float> %b
 }
 
 ; CSE unary fnegs.
 define void @fX(<4 x float> *%p, <4 x float> %a) {
-       ; CHECK: %x = fneg <4 x float> %a
-       ; CHECK-NEXT: store volatile <4 x float> %x, <4 x float>* %p
-       ; CHECK-NEXT: store volatile <4 x float> %x, <4 x float>* %p
-       %x = fneg <4 x float> %a
-       %y = fneg <4 x float> %a
-       store volatile <4 x float> %x, <4 x float>* %p
-       store volatile <4 x float> %y, <4 x float>* %p
-       ret void
+; CHECK-LABEL: @fX(
+; CHECK-NEXT:    [[X:%.*]] = fneg <4 x float> [[A:%.*]]
+; CHECK-NEXT:    store volatile <4 x float> [[X]], <4 x float>* [[P:%.*]], align 16
+; CHECK-NEXT:    store volatile <4 x float> [[X]], <4 x float>* [[P]], align 16
+; CHECK-NEXT:    ret void
+;
+  %x = fneg <4 x float> %a
+  %y = fneg <4 x float> %a
+  store volatile <4 x float> %x, <4 x float>* %p
+  store volatile <4 x float> %y, <4 x float>* %p
+  ret void
 }
diff --git a/llvm/test/Transforms/EarlyCSE/memoryssa.ll b/llvm/test/Transforms/EarlyCSE/memoryssa.ll
index 23c7137ca8c63..730e8104452dc 100644
--- a/llvm/test/Transforms/EarlyCSE/memoryssa.ll
+++ b/llvm/test/Transforms/EarlyCSE/memoryssa.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -S -early-cse -earlycse-debug-hash | FileCheck %s --check-prefix=CHECK-NOMEMSSA
 ; RUN: opt < %s -S -basic-aa -early-cse-memssa | FileCheck %s
 ; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefix=CHECK-NOMEMSSA
@@ -8,61 +9,86 @@
 @G3 = global i32 zeroinitializer
 
 ;; Simple load value numbering across non-clobbering store.
-; CHECK-LABEL: @test1(
-; CHECK-NOMEMSSA-LABEL: @test1(
 define i32 @test1() {
+; CHECK-NOMEMSSA-LABEL: @test1(
+; CHECK-NOMEMSSA-NEXT:    [[V1:%.*]] = load i32, i32* @G1, align 4
+; CHECK-NOMEMSSA-NEXT:    store i32 0, i32* @G2, align 4
+; CHECK-NOMEMSSA-NEXT:    [[V2:%.*]] = load i32, i32* @G1, align 4
+; CHECK-NOMEMSSA-NEXT:    [[DIFF:%.*]] = sub i32 [[V1]], [[V2]]
+; CHECK-NOMEMSSA-NEXT:    ret i32 [[DIFF]]
+;
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* @G1, align 4
+; CHECK-NEXT:    store i32 0, i32* @G2, align 4
+; CHECK-NEXT:    ret i32 0
+;
   %V1 = load i32, i32* @G1
   store i32 0, i32* @G2
   %V2 = load i32, i32* @G1
-  ; CHECK-NOMEMSSA: sub i32 %V1, %V2
   %Diff = sub i32 %V1, %V2
   ret i32 %Diff
-  ; CHECK: ret i32 0
 }
 
 ;; Simple dead store elimination across non-clobbering store.
-; CHECK-LABEL: @test2(
-; CHECK-NOMEMSSA-LABEL: @test2(
 define void @test2() {
+; CHECK-NOMEMSSA-LABEL: @test2(
+; CHECK-NOMEMSSA-NEXT:  entry:
+; CHECK-NOMEMSSA-NEXT:    [[V1:%.*]] = load i32, i32* @G1, align 4
+; CHECK-NOMEMSSA-NEXT:    store i32 0, i32* @G2, align 4
+; CHECK-NOMEMSSA-NEXT:    store i32 [[V1]], i32* @G1, align 4
+; CHECK-NOMEMSSA-NEXT:    ret void
+;
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* @G1, align 4
+; CHECK-NEXT:    store i32 0, i32* @G2, align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   %V1 = load i32, i32* @G1
-  ; CHECK: store i32 0, i32* @G2
   store i32 0, i32* @G2
-  ; CHECK-NOT: store
-  ; CHECK-NOMEMSSA: store i32 %V1, i32* @G1
   store i32 %V1, i32* @G1
   ret void
 }
 
 ;; Check that memoryphi optimization happens during EarlyCSE, enabling
 ;; more load CSE opportunities.
-; CHECK-LABEL: @test_memphiopt(
-; CHECK-NOMEMSSA-LABEL: @test_memphiopt(
 define void @test_memphiopt(i1 %c, i32* %p) {
-; CHECK-LABEL: entry:
-; CHECK-NOMEMSSA-LABEL: entry:
+; CHECK-NOMEMSSA-LABEL: @test_memphiopt(
+; CHECK-NOMEMSSA-NEXT:  entry:
+; CHECK-NOMEMSSA-NEXT:    [[V1:%.*]] = load i32, i32* @G1, align 4
+; CHECK-NOMEMSSA-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[END:%.*]]
+; CHECK-NOMEMSSA:       then:
+; CHECK-NOMEMSSA-NEXT:    [[PV:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NOMEMSSA-NEXT:    br label [[END]]
+; CHECK-NOMEMSSA:       end:
+; CHECK-NOMEMSSA-NEXT:    [[V2:%.*]] = load i32, i32* @G1, align 4
+; CHECK-NOMEMSSA-NEXT:    [[SUM:%.*]] = add i32 [[V1]], [[V2]]
+; CHECK-NOMEMSSA-NEXT:    store i32 [[SUM]], i32* @G2, align 4
+; CHECK-NOMEMSSA-NEXT:    ret void
+;
+; CHECK-LABEL: @test_memphiopt(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* @G1, align 4
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[END:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[PV:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[SUM:%.*]] = add i32 [[V1]], [[V1]]
+; CHECK-NEXT:    store i32 [[SUM]], i32* @G2, align 4
+; CHECK-NEXT:    ret void
+;
 entry:
-; CHECK: load
-; CHECK-NOMEMSSA: load
   %v1 = load i32, i32* @G1
   br i1 %c, label %then, label %end
 
-; CHECK-LABEL: then:
-; CHECK-NOMEMSSA-LABEL: then:
 then:
-; CHECK: load
-; CHECK-NOMEMSSA: load
   %pv = load i32, i32* %p
-; CHECK-NOT: store
-; CHECK-NOMEMSSA-NOT: store
   store i32 %pv, i32* %p
   br label %end
 
-; CHECK-LABEL: end:
-; CHECK-NOMEMSSA-LABEL: end:
 end:
-; CHECK-NOT: load
-; CHECK-NOMEMSSA: load
   %v2 = load i32, i32* @G1
   %sum = add i32 %v1, %v2
   store i32 %sum, i32* @G2
@@ -72,36 +98,43 @@ end:
 
 ;; Check that MemoryPhi optimization and MemoryUse re-optimization
 ;; happens during EarlyCSE, enabling more load CSE opportunities.
-; CHECK-LABEL: @test_memphiopt2(
-; CHECK-NOMEMSSA-LABEL: @test_memphiopt2(
 define void @test_memphiopt2(i1 %c, i32* %p) {
-; CHECK-LABEL: entry:
-; CHECK-NOMEMSSA-LABEL: entry:
+; CHECK-NOMEMSSA-LABEL: @test_memphiopt2(
+; CHECK-NOMEMSSA-NEXT:  entry:
+; CHECK-NOMEMSSA-NEXT:    [[V1:%.*]] = load i32, i32* @G1, align 4
+; CHECK-NOMEMSSA-NEXT:    store i32 [[V1]], i32* @G2, align 4
+; CHECK-NOMEMSSA-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[END:%.*]]
+; CHECK-NOMEMSSA:       then:
+; CHECK-NOMEMSSA-NEXT:    [[PV:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NOMEMSSA-NEXT:    br label [[END]]
+; CHECK-NOMEMSSA:       end:
+; CHECK-NOMEMSSA-NEXT:    [[V2:%.*]] = load i32, i32* @G1, align 4
+; CHECK-NOMEMSSA-NEXT:    store i32 [[V2]], i32* @G3, align 4
+; CHECK-NOMEMSSA-NEXT:    ret void
+;
+; CHECK-LABEL: @test_memphiopt2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* @G1, align 4
+; CHECK-NEXT:    store i32 [[V1]], i32* @G2, align 4
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[THEN:%.*]], label [[END:%.*]]
+; CHECK:       then:
+; CHECK-NEXT:    [[PV:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    store i32 [[V1]], i32* @G3, align 4
+; CHECK-NEXT:    ret void
+;
 entry:
-; CHECK: load
-; CHECK-NOMEMSSA: load
   %v1 = load i32, i32* @G1
-; CHECK: store
-; CHECK-NOMEMSSA: store
   store i32 %v1, i32* @G2
   br i1 %c, label %then, label %end
 
-; CHECK-LABEL: then:
-; CHECK-NOMEMSSA-LABEL: then:
 then:
-; CHECK: load
-; CHECK-NOMEMSSA: load
   %pv = load i32, i32* %p
-; CHECK-NOT: store
-; CHECK-NOMEMSSA-NOT: store
   store i32 %pv, i32* %p
   br label %end
 
-; CHECK-LABEL: end:
-; CHECK-NOMEMSSA-LABEL: end:
 end:
-; CHECK-NOT: load
-; CHECK-NOMEMSSA: load
   %v2 = load i32, i32* @G1
   store i32 %v2, i32* @G3
   ret void
@@ -109,39 +142,69 @@ end:
 
 ;; Check that we respect lifetime.start/lifetime.end intrinsics when deleting
 ;; stores that, without the lifetime calls, would be writebacks.
-; CHECK-LABEL: @test_writeback_lifetimes(
-; CHECK-NOMEMSSA-LABEL: @test_writeback_lifetimes(
 define void @test_writeback_lifetimes(i32* %p) {
+; CHECK-NOMEMSSA-LABEL: @test_writeback_lifetimes(
+; CHECK-NOMEMSSA-NEXT:  entry:
+; CHECK-NOMEMSSA-NEXT:    [[Q:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 1
+; CHECK-NOMEMSSA-NEXT:    [[PV:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NOMEMSSA-NEXT:    [[QV:%.*]] = load i32, i32* [[Q]], align 4
+; CHECK-NOMEMSSA-NEXT:    call void @llvm.lifetime.end.p0i32(i64 8, i32* [[P]])
+; CHECK-NOMEMSSA-NEXT:    call void @llvm.lifetime.start.p0i32(i64 8, i32* [[P]])
+; CHECK-NOMEMSSA-NEXT:    store i32 [[PV]], i32* [[P]], align 4
+; CHECK-NOMEMSSA-NEXT:    store i32 [[QV]], i32* [[Q]], align 4
+; CHECK-NOMEMSSA-NEXT:    ret void
+;
+; CHECK-LABEL: @test_writeback_lifetimes(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[Q:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 1
+; CHECK-NEXT:    [[PV:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT:    [[QV:%.*]] = load i32, i32* [[Q]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i32(i64 8, i32* [[P]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i32(i64 8, i32* [[P]])
+; CHECK-NEXT:    store i32 [[PV]], i32* [[P]], align 4
+; CHECK-NEXT:    store i32 [[QV]], i32* [[Q]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   %q = getelementptr i32, i32* %p, i64 1
   %pv = load i32, i32* %p
   %qv = load i32, i32* %q
   call void @llvm.lifetime.end.p0i8(i64 8, i32* %p)
   call void @llvm.lifetime.start.p0i8(i64 8, i32* %p)
-  ; CHECK: store i32 %pv
-  ; CHECK-NOMEMSSA-LABEL: store i32 %pv
   store i32 %pv, i32* %p
-  ; CHECK: store i32 %qv, i32* %q
-  ; CHECK-NOMEMSSA-LABEL: store i32 %qv, i32* %q
   store i32 %qv, i32* %q
   ret void
 }
 
 ;; Check that we respect lifetime.start/lifetime.end intrinsics when deleting
 ;; stores that, without the lifetime calls, would be writebacks.
-; CHECK-LABEL: @test_writeback_lifetimes_multi_arg(
-; CHECK-NOMEMSSA-LABEL: @test_writeback_lifetimes_multi_arg(
 define void @test_writeback_lifetimes_multi_arg(i32* %p, i32* %q) {
+; CHECK-NOMEMSSA-LABEL: @test_writeback_lifetimes_multi_arg(
+; CHECK-NOMEMSSA-NEXT:  entry:
+; CHECK-NOMEMSSA-NEXT:    [[PV:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NOMEMSSA-NEXT:    [[QV:%.*]] = load i32, i32* [[Q:%.*]], align 4
+; CHECK-NOMEMSSA-NEXT:    call void @llvm.lifetime.end.p0i32(i64 8, i32* [[P]])
+; CHECK-NOMEMSSA-NEXT:    call void @llvm.lifetime.start.p0i32(i64 8, i32* [[P]])
+; CHECK-NOMEMSSA-NEXT:    store i32 [[PV]], i32* [[P]], align 4
+; CHECK-NOMEMSSA-NEXT:    store i32 [[QV]], i32* [[Q]], align 4
+; CHECK-NOMEMSSA-NEXT:    ret void
+;
+; CHECK-LABEL: @test_writeback_lifetimes_multi_arg(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PV:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[QV:%.*]] = load i32, i32* [[Q:%.*]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i32(i64 8, i32* [[P]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i32(i64 8, i32* [[P]])
+; CHECK-NEXT:    store i32 [[PV]], i32* [[P]], align 4
+; CHECK-NEXT:    store i32 [[QV]], i32* [[Q]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   %pv = load i32, i32* %p
   %qv = load i32, i32* %q
   call void @llvm.lifetime.end.p0i8(i64 8, i32* %p)
   call void @llvm.lifetime.start.p0i8(i64 8, i32* %p)
-  ; CHECK: store i32 %pv
-  ; CHECK-NOMEMSSA-LABEL: store i32 %pv
   store i32 %pv, i32* %p
-  ; CHECK: store i32 %qv, i32* %q
-  ; CHECK-NOMEMSSA-LABEL: store i32 %qv, i32* %q
   store i32 %qv, i32* %q
   ret void
 }
diff --git a/llvm/test/Transforms/EarlyCSE/pr33406.ll b/llvm/test/Transforms/EarlyCSE/pr33406.ll
index 903b8bc9f2ace..e0d2cccb48ac1 100644
--- a/llvm/test/Transforms/EarlyCSE/pr33406.ll
+++ b/llvm/test/Transforms/EarlyCSE/pr33406.ll
@@ -1,18 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -early-cse-memssa -earlycse-debug-hash -S %s | FileCheck %s
 
-; CHECK: define void @patatino() {
-; CHECK:  for.cond:
-; CHECK-NEXT:  br i1 true, label %if.end, label %for.inc
-; CHECK:  if.end:
-; CHECK-NEXT:  %tinkywinky = load i32, i32* @b
-; CHECK-NEXT:  br i1 true, label %for.inc, label %for.inc
-; CHECK:  for.inc:
-; CHECK-NEXT:  ret void
-
-
 @b = external global i32
 
 define void @patatino() {
+; CHECK-LABEL: @patatino(
+; CHECK-NEXT:  for.cond:
+; CHECK-NEXT:    br i1 true, label [[IF_END:%.*]], label [[FOR_INC:%.*]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TINKYWINKY:%.*]] = load i32, i32* @b, align 4
+; CHECK-NEXT:    br i1 true, label [[FOR_INC]], label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    ret void
+;
 for.cond:
   br i1 true, label %if.end, label %for.inc
 
diff --git a/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll b/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll
index d83a42780c647..baa050a433d80 100644
--- a/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll
+++ b/llvm/test/Transforms/EarlyCSE/readnone-mayunwind.ll
@@ -1,12 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s
 
 declare void @readnone_may_unwind() readnone
 
 define void @f(i32* %ptr) {
 ; CHECK-LABEL: @f(
-; CHECK: store i32 100, i32* %ptr
-; CHECK: call void @readnone_may_unwind()
-; CHECK: store i32 200, i32* %ptr
+; CHECK-NEXT:    store i32 100, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    call void @readnone_may_unwind()
+; CHECK-NEXT:    store i32 200, i32* [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
 
   store i32 100, i32* %ptr
   call void @readnone_may_unwind()
diff --git a/llvm/test/Transforms/EarlyCSE/writeonly.ll b/llvm/test/Transforms/EarlyCSE/writeonly.ll
index b28af8535083c..3c95efb012a86 100644
--- a/llvm/test/Transforms/EarlyCSE/writeonly.ll
+++ b/llvm/test/Transforms/EarlyCSE/writeonly.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -S -early-cse -earlycse-debug-hash < %s | FileCheck %s
 
 @var = global i32 undef
@@ -5,11 +6,12 @@ declare void @foo() nounwind
 
 define void @test() {
 ; CHECK-LABEL: @test(
-; CHECK-NOT: store
+; CHECK-NEXT:    call void @foo() #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    store i32 2, i32* @var, align 4
+; CHECK-NEXT:    ret void
+;
   store i32 1, i32* @var
-; CHECK: call void @foo()
   call void @foo() writeonly
-; CHECK: store i32 2, i32* @var
   store i32 2, i32* @var
   ret void
 }

From 35737df4dcd28534bd3090157c224c19b501278a Mon Sep 17 00:00:00 2001
From: Mubashar Ahmad <mubashar.ahmad@arm.com>
Date: Thu, 23 Dec 2021 16:37:44 +0000
Subject: [PATCH 002/946] [Clang][AArch64][ARM] Unaligned Access Warning Added

Added warning for potential cases of
unaligned access when option
-mno-unaligned-access has been specified

Differential Revision: https://reviews.llvm.org/D116221
---
 .../include/clang/Basic/DiagnosticASTKinds.td |   5 +
 clang/include/clang/Basic/DiagnosticGroups.td |   1 +
 clang/lib/AST/RecordLayoutBuilder.cpp         |  17 +
 clang/lib/Driver/ToolChains/Arch/AArch64.cpp  |  11 +-
 clang/lib/Driver/ToolChains/Arch/AArch64.h    |   1 +
 clang/lib/Driver/ToolChains/Arch/ARM.cpp      |  28 +-
 clang/lib/Driver/ToolChains/Clang.cpp         |   3 +-
 clang/test/Sema/test-wunaligned-access.c      | 516 ++++++++++++++++++
 clang/test/Sema/test-wunaligned-access.cpp    | 274 ++++++++++
 9 files changed, 846 insertions(+), 10 deletions(-)
 create mode 100644 clang/test/Sema/test-wunaligned-access.c
 create mode 100644 clang/test/Sema/test-wunaligned-access.cpp

diff --git a/clang/include/clang/Basic/DiagnosticASTKinds.td b/clang/include/clang/Basic/DiagnosticASTKinds.td
index d788c85179142..a89bdff1a10c2 100644
--- a/clang/include/clang/Basic/DiagnosticASTKinds.td
+++ b/clang/include/clang/Basic/DiagnosticASTKinds.td
@@ -590,4 +590,9 @@ def warn_padded_struct_size : Warning<
   InGroup<Padded>, DefaultIgnore;
 def warn_unnecessary_packed : Warning<
   "packed attribute is unnecessary for %0">, InGroup<Packed>, DefaultIgnore;
+
+// -Wunaligned-access
+def warn_unaligned_access : Warning<
+  "field %1 within %0 is less aligned than %2 and is usually due to %0 being "
+  "packed, which can lead to unaligned accesses">, InGroup<UnalignedAccess>, DefaultIgnore;
 }
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 1bc879a68a8c7..608e16147b1cd 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -543,6 +543,7 @@ def ExplicitInitializeCall : DiagGroup<"explicit-initialize-call">;
 def OrderedCompareFunctionPointers : DiagGroup<"ordered-compare-function-pointers">;
 def Packed : DiagGroup<"packed">;
 def Padded : DiagGroup<"padded">;
+def UnalignedAccess : DiagGroup<"unaligned-access">;
 
 def PessimizingMove : DiagGroup<"pessimizing-move">;
 def ReturnStdMove : DiagGroup<"return-std-move">;
diff --git a/clang/lib/AST/RecordLayoutBuilder.cpp b/clang/lib/AST/RecordLayoutBuilder.cpp
index 3e39ec1c718d1..61a30ead165ef 100644
--- a/clang/lib/AST/RecordLayoutBuilder.cpp
+++ b/clang/lib/AST/RecordLayoutBuilder.cpp
@@ -2021,6 +2021,7 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
   CharUnits UnpackedFieldAlign =
       !DefaultsToAIXPowerAlignment ? FieldAlign : PreferredAlign;
   CharUnits UnpackedFieldOffset = FieldOffset;
+  CharUnits OriginalFieldAlign = UnpackedFieldAlign;
 
   if (FieldPacked) {
     FieldAlign = CharUnits::One();
@@ -2105,6 +2106,22 @@ void ItaniumRecordLayoutBuilder::LayoutField(const FieldDecl *D,
   // Remember max struct/class ABI-specified alignment.
   UnadjustedAlignment = std::max(UnadjustedAlignment, FieldAlign);
   UpdateAlignment(FieldAlign, UnpackedFieldAlign, PreferredAlign);
+
+  // For checking the alignment of inner fields against
+  // the alignment of its parent record.
+  if (const RecordDecl *RD = D->getParent()) {
+    // Check if packed attribute or pragma pack is present.
+    if (RD->hasAttr<PackedAttr>() || !MaxFieldAlignment.isZero())
+      if (FieldAlign < OriginalFieldAlign)
+        if (D->getType()->isRecordType()) {
+          // If the offset is a multiple of the alignment of
+          // the type, raise the warning.
+          // TODO: Takes no account the alignment of the outer struct
+          if (FieldOffset % OriginalFieldAlign != 0)
+            Diag(D->getLocation(), diag::warn_unaligned_access)
+                << Context.getTypeDeclType(RD) << D->getName() << D->getType();
+        }
+  }
 }
 
 void ItaniumRecordLayoutBuilder::FinishLayout(const NamedDecl *D) {
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index 9ffb5d73b2aad..89a77a368ef02 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -221,6 +221,7 @@ getAArch64MicroArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu,
 void aarch64::getAArch64TargetFeatures(const Driver &D,
                                        const llvm::Triple &Triple,
                                        const ArgList &Args,
+                                       llvm::opt::ArgStringList &CmdArgs,
                                        std::vector<StringRef> &Features,
                                        bool ForAS) {
   Arg *A;
@@ -464,10 +465,16 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
 
   if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
                                options::OPT_munaligned_access)) {
-    if (A->getOption().matches(options::OPT_mno_unaligned_access))
+    if (A->getOption().matches(options::OPT_mno_unaligned_access)) {
       Features.push_back("+strict-align");
-  } else if (Triple.isOSOpenBSD())
+      if (!ForAS)
+        CmdArgs.push_back("-Wunaligned-access");
+    }
+  } else if (Triple.isOSOpenBSD()) {
     Features.push_back("+strict-align");
+    if (!ForAS)
+      CmdArgs.push_back("-Wunaligned-access");
+  }
 
   if (Args.hasArg(options::OPT_ffixed_x1))
     Features.push_back("+reserve-x1");
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.h b/clang/lib/Driver/ToolChains/Arch/AArch64.h
index d47c402d4a42d..0cdc2ec725e02 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.h
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.h
@@ -22,6 +22,7 @@ namespace aarch64 {
 
 void getAArch64TargetFeatures(const Driver &D, const llvm::Triple &Triple,
                               const llvm::opt::ArgList &Args,
+                              llvm::opt::ArgStringList &CmdArgs,
                               std::vector<llvm::StringRef> &Features,
                               bool ForAS);
 
diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
index 4013cf230026b..1055d7800b63e 100644
--- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -769,10 +769,12 @@ void arm::getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   }
 
   // Kernel code has more strict alignment requirements.
-  if (KernelOrKext)
+  if (KernelOrKext) {
     Features.push_back("+strict-align");
-  else if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
-                                    options::OPT_munaligned_access)) {
+    if (!ForAS)
+      CmdArgs.push_back("-Wunaligned-access");
+  } else if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
+                                      options::OPT_munaligned_access)) {
     if (A->getOption().matches(options::OPT_munaligned_access)) {
       // No v6M core supports unaligned memory access (v6M ARM ARM A3.2).
       if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m)
@@ -781,8 +783,11 @@ void arm::getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple,
       // access either.
       else if (Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v8m_baseline)
         D.Diag(diag::err_target_unsupported_unaligned) << "v8m.base";
-    } else
+    } else {
       Features.push_back("+strict-align");
+      if (!ForAS)
+        CmdArgs.push_back("-Wunaligned-access");
+    }
   } else {
     // Assume pre-ARMv6 doesn't support unaligned accesses.
     //
@@ -801,14 +806,23 @@ void arm::getARMTargetFeatures(const Driver &D, const llvm::Triple &Triple,
     int VersionNum = getARMSubArchVersionNumber(Triple);
     if (Triple.isOSDarwin() || Triple.isOSNetBSD()) {
       if (VersionNum < 6 ||
-          Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m)
+          Triple.getSubArch() == llvm::Triple::SubArchType::ARMSubArch_v6m) {
         Features.push_back("+strict-align");
+        if (!ForAS)
+          CmdArgs.push_back("-Wunaligned-access");
+      }
     } else if (Triple.isOSLinux() || Triple.isOSNaCl() ||
                Triple.isOSWindows()) {
-      if (VersionNum < 7)
+      if (VersionNum < 7) {
         Features.push_back("+strict-align");
-    } else
+        if (!ForAS)
+          CmdArgs.push_back("-Wunaligned-access");
+      }
+    } else {
       Features.push_back("+strict-align");
+      if (!ForAS)
+        CmdArgs.push_back("-Wunaligned-access");
+    }
   }
 
   // llvm does not support reserving registers in general. There is support
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 253c52cf0ba85..96d949be17eea 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -346,7 +346,8 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   case llvm::Triple::aarch64:
   case llvm::Triple::aarch64_32:
   case llvm::Triple::aarch64_be:
-    aarch64::getAArch64TargetFeatures(D, Triple, Args, Features, ForAS);
+    aarch64::getAArch64TargetFeatures(D, Triple, Args, CmdArgs, Features,
+                                      ForAS);
     break;
   case llvm::Triple::x86:
   case llvm::Triple::x86_64:
diff --git a/clang/test/Sema/test-wunaligned-access.c b/clang/test/Sema/test-wunaligned-access.c
new file mode 100644
index 0000000000000..55c2149634d1a
--- /dev/null
+++ b/clang/test/Sema/test-wunaligned-access.c
@@ -0,0 +1,516 @@
+// RUN: %clang_cc1 %s -triple=armv7-none-none-eabi -verify -Wunaligned-access -S -emit-llvm
+// REQUIRES: arm-registered-target
+//
+// This test suite tests the warning triggered by the -Wunaligned-access option.
+// The warning occurs when a struct or other type of record contains a field
+// that is itself a record. The outer record must be a packed structure, while
+// while the inner record must be unpacked. This is the fundamental condition
+// for the warning to be triggered. Some of these tests may have three layers.
+//
+// The command line option -fsyntax-only is not used as Clang needs to be
+// forced to layout the structs used in this test.
+// The triple in the command line above is used for the assumptions about
+// size and alignment of types.
+
+// Set 1
+struct T1 {
+  char a;
+  int b;
+};
+
+struct __attribute__((packed)) U1 {
+  char a;
+  struct T1 b; // expected-warning {{field b within 'struct U1' is less aligned than 'struct T1' and is usually due to 'struct U1' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct __attribute__((packed)) U2 {
+  char a;
+  struct T1 b __attribute__((aligned(2))); // expected-warning {{field b within 'struct U2' is less aligned than 'struct T1' and is usually due to 'struct U2' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct __attribute__((packed)) U3 {
+  char a;
+  struct T1 b __attribute__((aligned(4)));
+  int c;
+};
+
+struct __attribute__((aligned(2))) U4 {
+  char a;
+  struct T1 b;
+  int c;
+};
+
+struct U5 {
+  char a;
+  struct T1 b;
+  int c;
+};
+
+struct U6 {
+  char a;
+  int b;
+  struct T1 c __attribute__((aligned(2)));
+};
+
+struct __attribute__((packed)) U7 {
+  short a;
+  short b;
+  char c;
+  struct T1 d; // expected-warning {{field d within 'struct U7' is less aligned than 'struct T1' and is usually due to 'struct U7' being packed, which can lead to unaligned accesses}}
+};
+
+struct U8 {
+  short a;
+  short b;
+  char c;
+  struct T1 d;
+};
+
+struct __attribute__((packed)) U9 {
+  short a;
+  short b;
+  char c;
+  struct T1 d __attribute__((aligned(4)));
+};
+
+struct __attribute__((packed)) U10 {
+  short a;
+  short b;
+  char c;
+  struct T1 d __attribute__((aligned(2))); // expected-warning {{field d within 'struct U10' is less aligned than 'struct T1' and is usually due to 'struct U10' being packed, which can lead to unaligned accesses}}
+};
+
+struct __attribute__((aligned(2))) U11 {
+  short a;
+  short b;
+  char c;
+  struct T1 d;
+};
+
+// Set 2
+#pragma pack(push, 1)
+
+struct U12 {
+  char a;
+  struct T1 b; // expected-warning {{field b within 'struct U12' is less aligned than 'struct T1' and is usually due to 'struct U12' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct __attribute__((packed)) U13 {
+  char a;
+  struct T1 b; // expected-warning {{field b within 'struct U13' is less aligned than 'struct T1' and is usually due to 'struct U13' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct __attribute__((packed)) U14 {
+  char a;
+  struct T1 b __attribute__((aligned(4))); // expected-warning {{field b within 'struct U14' is less aligned than 'struct T1' and is usually due to 'struct U14' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct __attribute__((aligned(2))) U15 {
+  char a;
+  struct T1 b; // expected-warning {{field b within 'struct U15' is less aligned than 'struct T1' and is usually due to 'struct U15' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct U16 {
+  char a;
+  char b;
+  short c;
+  struct T1 d;
+};
+
+struct U17 {
+  char a;
+  char b;
+  short c;
+  struct T1 d __attribute__((aligned(4)));
+};
+
+struct __attribute__((packed)) U18 {
+  char a;
+  short b;
+  struct T1 c __attribute__((aligned(4))); // expected-warning {{field c within 'struct U18' is less aligned than 'struct T1' and is usually due to 'struct U18' being packed, which can lead to unaligned accesses}}
+};
+
+struct __attribute__((aligned(4))) U19 {
+  char a;
+  struct T1 b; // expected-warning {{field b within 'struct U19' is less aligned than 'struct T1' and is usually due to 'struct U19' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct __attribute__((aligned(4))) U20 {
+  char a[4];
+  struct T1 b;
+  int c;
+};
+
+struct U21 {
+  char a;
+  short c;
+  struct T1 d; // expected-warning {{field d within 'struct U21' is less aligned than 'struct T1' and is usually due to 'struct U21' being packed, which can lead to unaligned accesses}}
+};
+
+struct U22 {
+  char a;
+  short c;
+  struct T1 d __attribute__((aligned(4))); // expected-warning {{field d within 'struct U22' is less aligned than 'struct T1' and is usually due to 'struct U22' being packed, which can lead to unaligned accesses}}
+};
+
+#pragma pack(pop)
+
+// Set 3
+#pragma pack(push, 2)
+
+struct __attribute__((packed)) U23 {
+  char a;
+  struct T1 b; // expected-warning {{field b within 'struct U23' is less aligned than 'struct T1' and is usually due to 'struct U23' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct U24 {
+  char a;
+  struct T1 b; // expected-warning {{field b within 'struct U24' is less aligned than 'struct T1' and is usually due to 'struct U24' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct U25 {
+  char a;
+  char b;
+  short c;
+  struct T1 d;
+};
+
+struct U26 {
+  char a;
+  char b;
+  short c;
+  struct T1 d;
+};
+
+#pragma pack(pop)
+
+// Set 4
+
+struct __attribute__((packed)) T2 {
+  char a;
+  struct T1 b; // expected-warning {{field b within 'struct T2' is less aligned than 'struct T1' and is usually due to 'struct T2' being packed, which can lead to unaligned accesses}}
+};
+
+struct T3 {
+  char a;
+  struct T1 b;
+};
+
+struct __attribute__((packed)) U27 {
+  char a;
+  struct T2 b;
+  int c;
+};
+
+struct U28 {
+  char a;
+  char _p[2];
+  struct T2 b;
+  int c;
+};
+
+struct U29 {
+  char a;
+  struct T3 b;
+  int c;
+};
+
+struct __attribute__((packed)) U30 {
+  char a;
+  struct T3 b; // expected-warning {{field b within 'struct U30' is less aligned than 'struct T3' and is usually due to 'struct U30' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct __attribute__((packed)) U31 {
+  char a;
+  struct T2 b __attribute__((aligned(4)));
+};
+
+struct __attribute__((packed)) U32 {
+  char a;
+  char b;
+  char c;
+  char d;
+  struct T3 e;
+};
+
+struct __attribute__((packed)) U33 {
+  char a;
+  char b;
+  char c;
+  char d;
+  struct T2 e __attribute__((aligned(4)));
+};
+
+struct __attribute__((packed)) U34 {
+  char a;
+  struct T1 b __attribute__((packed)); // expected-warning {{field b within 'struct U34' is less aligned than 'struct T1' and is usually due to 'struct U34' being packed, which can lead to unaligned accesses}}
+  struct T2 c;
+};
+
+struct __attribute__((packed)) U35 {
+  char a;
+  struct T4 {
+    char b;
+    struct T1 c;
+  } d; // expected-warning {{field d within 'struct U35' is less aligned than 'struct T4' and is usually due to 'struct U35' being packed, which can lead to unaligned accesses}}
+};
+
+// Set 5
+
+#pragma pack(push, 1)
+struct T5 {
+  char a;
+  struct T1 b; // expected-warning {{field b within 'struct T5' is less aligned than 'struct T1' and is usually due to 'struct T5' being packed, which can lead to unaligned accesses}}
+};
+#pragma pack(pop)
+
+#pragma pack(push, 1)
+struct U36 {
+  char a;
+  struct T5 b;
+  int c;
+};
+
+struct U37 {
+  char a;
+  struct T3 b; // expected-warning {{field b within 'struct U37' is less aligned than 'struct T3' and is usually due to 'struct U37' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+#pragma pack(pop)
+struct U38 {
+  char a;
+  struct T5 b __attribute__((aligned(4)));
+  int c;
+};
+
+#pragma pack(push, 1)
+
+#pragma pack(push, 4)
+struct U39 {
+  char a;
+  struct T5 b;
+  int c;
+};
+#pragma pack(pop)
+
+#pragma pack(pop)
+
+// Set 6
+
+struct __attribute__((packed)) A1 {
+  char a;
+  struct T1 b; // expected-warning {{field b within 'struct A1' is less aligned than 'struct T1' and is usually due to 'struct A1' being packed, which can lead to unaligned accesses}}
+};
+
+struct A2 {
+  char a;
+  struct T1 b;
+};
+
+struct __attribute__((packed)) A3 {
+  char a;
+  struct T1 b __attribute__((aligned(4)));
+};
+
+#pragma pack(push, 1)
+struct A4 {
+  char a;
+  struct T1 b; // expected-warning {{field b within 'struct A4' is less aligned than 'struct T1' and is usually due to 'struct A4' being packed, which can lead to unaligned accesses}}
+};
+
+struct A5 {
+  char a;
+  struct T1 b __attribute__((aligned(4))); // expected-warning {{field b within 'struct A5' is less aligned than 'struct T1' and is usually due to 'struct A5' being packed, which can lead to unaligned accesses}}
+};
+#pragma pack(pop)
+
+struct __attribute__((packed)) A6 {
+  struct T1 a;
+};
+
+struct A7 {
+  char a;
+  struct T1 b __attribute__((packed));
+};
+
+struct A8 {
+  char a;
+  char b;
+  short c;
+  struct T1 d;
+};
+
+struct A9 {
+  char a;
+  struct T2 b;
+};
+
+struct A10 {
+  char a;
+  struct T2 b __attribute__((aligned(4)));
+};
+
+struct __attribute__((packed)) A11 {
+  char a;
+  struct T2 b;
+};
+
+struct __attribute__((packed)) U40 {
+  char a;
+  struct A1 b;
+  int c;
+};
+
+struct __attribute__((packed)) U41 {
+  char a;
+  struct A3 b; // expected-warning {{field b within 'struct U41' is less aligned than 'struct A3' and is usually due to 'struct U41' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+#pragma pack(push, 1)
+struct U42 {
+  char a;
+  struct A1 b;
+  int c;
+};
+#pragma pack(pop)
+
+struct __attribute__((packed)) U43 {
+  char a;
+  struct A9 b;
+  int c;
+};
+
+struct __attribute__((packed)) U44 {
+  char a;
+  struct A10 b; // expected-warning {{field b within 'struct U44' is less aligned than 'struct A10' and is usually due to 'struct U44' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+#pragma pack(push, 1)
+
+struct U45 {
+  char a;
+  struct A10 b; // expected-warning {{field b within 'struct U45' is less aligned than 'struct A10' and is usually due to 'struct U45' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+#pragma pack(pop)
+
+struct __attribute__((packed)) U46 {
+  char a;
+  struct A2 b; // expected-warning {{field b within 'struct U46' is less aligned than 'struct A2' and is usually due to 'struct U46' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct __attribute__((packed)) U47 {
+  char a;
+  struct A8 b; // expected-warning {{field b within 'struct U47' is less aligned than 'struct A8' and is usually due to 'struct U47' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+#pragma pack(push, 1)
+struct U48 {
+  char a;
+  struct A8 b; // expected-warning {{field b within 'struct U48' is less aligned than 'struct A8' and is usually due to 'struct U48' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+#pragma pack(pop)
+
+struct U49 {
+  char a;
+  struct A11 b;
+  int c;
+};
+
+struct U50 {
+  char a;
+  struct A1 b;
+  int c;
+};
+
+struct U51 {
+  char a;
+  struct A5 b;
+  int c;
+};
+
+struct __attribute__((packed)) U52 {
+  char a;
+  struct A6 b;
+};
+
+struct U53 {
+  char a;
+  struct A4 b;
+};
+
+struct U54 {
+  char b;
+  struct A7 c;
+};
+
+struct U1 s1;
+struct U2 s2;
+struct U3 s3;
+struct U4 s4;
+struct U5 s5;
+struct U6 s6;
+struct U7 s7;
+struct U8 s8;
+struct U9 s9;
+struct U10 s10;
+struct U11 s11;
+struct U12 s12;
+struct U13 s13;
+struct U14 s14;
+struct U15 s15;
+struct U16 s16;
+struct U17 s17;
+struct U18 s18;
+struct U19 s19;
+struct U20 s20;
+struct U21 s21;
+struct U22 s22;
+struct U23 s23;
+struct U24 s24;
+struct U25 s25;
+struct U26 s26;
+struct U27 s27;
+struct U28 s28;
+struct U29 s29;
+struct U30 s30;
+struct U31 s31;
+struct U32 s32;
+struct U33 s33;
+struct U34 s34;
+struct U35 s35;
+struct U36 s36;
+struct U37 s37;
+struct U38 s38;
+struct U39 s39;
+struct U40 s40;
+struct U41 s41;
+struct U42 s42;
+struct U43 s43;
+struct U44 s44;
+struct U45 s45;
+struct U46 s46;
+struct U47 s47;
+struct U48 s48;
+struct U49 s49;
+struct U50 s50;
+struct U51 s51;
+struct U52 s52;
+struct U53 s53;
+struct U54 s54;
diff --git a/clang/test/Sema/test-wunaligned-access.cpp b/clang/test/Sema/test-wunaligned-access.cpp
new file mode 100644
index 0000000000000..33f518310b0b1
--- /dev/null
+++ b/clang/test/Sema/test-wunaligned-access.cpp
@@ -0,0 +1,274 @@
+// RUN: %clang_cc1 %s -triple=armv7-none-none-eabi -verify -Wunaligned-access -S -emit-llvm -o %t
+// REQUIRES: arm-registered-target
+//
+// This test suite tests the warning triggered by the -Wunaligned-access option.
+// The warning occurs when a struct or other type of record contains a field
+// that is itself a record. The outer record must be a packed structure, while
+// while the inner record must be unpacked. This is the fundamental condition
+// for the warning to be triggered. Some of these tests may have three layers.
+//
+// The command line option -fsyntax-only is not used as Clang needs to be
+// forced to layout the structs used in this test.
+// The triple in the command line above is used for the assumptions about
+// size and alignment of types.
+
+// Packed-Unpacked Tests (No Pragma)
+
+struct T1 {
+  char a;
+  int b;
+};
+
+struct __attribute__((packed)) U1 {
+  char a;
+  T1 b; // expected-warning {{field b within 'U1' is less aligned than 'T1' and is usually due to 'U1' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct __attribute__((packed)) U2 {
+  char a;
+  T1 b __attribute__((aligned(4)));
+  int c;
+};
+
+struct __attribute__((packed)) U3 {
+  char a;
+  char b;
+  short c;
+  T1 d;
+};
+
+struct __attribute__((packed)) U4 {
+  T1 a;
+  int b;
+};
+
+struct __attribute__((aligned(4), packed)) U5 {
+  char a;
+  T1 b; // expected-warning {{field b within 'U5' is less aligned than 'T1' and is usually due to 'U5' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct __attribute__((aligned(4), packed)) U6 {
+  char a;
+  char b;
+  short c;
+  T1 d;
+};
+
+// Packed-Unpacked Tests with Pragma
+
+#pragma pack(push, 1)
+
+struct __attribute__((packed)) U7 {
+  char a;
+  T1 b; // expected-warning {{field b within 'U7' is less aligned than 'T1' and is usually due to 'U7' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct __attribute__((packed)) U8 {
+  char a;
+  T1 b __attribute__((aligned(4))); // expected-warning {{field b within 'U8' is less aligned than 'T1' and is usually due to 'U8' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct __attribute__((aligned(4))) U9 {
+  char a;
+  T1 b; // expected-warning {{field b within 'U9' is less aligned than 'T1' and is usually due to 'U9' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct U10 {
+  char a;
+  T1 b; // expected-warning {{field b within 'U10' is less aligned than 'T1' and is usually due to 'U10' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+#pragma pack(pop)
+
+// Packed-Packed Tests
+
+struct __attribute__((packed)) T2 {
+  char a;
+  int b;
+};
+
+struct __attribute__((packed)) U11 {
+  char a;
+  T2 b;
+  int c;
+};
+
+#pragma pack(push, 1)
+struct U12 {
+  char a;
+  T2 b;
+  int c;
+};
+#pragma pack(pop)
+
+// Unpacked-Packed Tests
+
+struct U13 {
+  char a;
+  T2 b;
+  int c;
+};
+
+struct U14 {
+  char a;
+  T2 b __attribute__((aligned(4)));
+  int c;
+};
+
+// Unpacked-Unpacked Test
+
+struct T3 {
+  char a;
+  int b;
+};
+
+struct U15 {
+  char a;
+  T3 b;
+  int c;
+};
+
+// Packed-Packed-Unpacked Test (No pragma)
+
+struct __attribute__((packed)) A1 {
+  char a;
+  T1 b; // expected-warning {{field b within 'A1' is less aligned than 'T1' and is usually due to 'A1' being packed, which can lead to unaligned accesses}}
+};
+
+struct __attribute__((packed)) U16 {
+  char a;
+  A1 b;
+  int c;
+};
+
+struct __attribute__((packed)) A2 {
+  char a;
+  T1 b __attribute__((aligned(4)));
+};
+
+struct __attribute__((packed)) U17 {
+  char a;
+  A2 b; // expected-warning {{field b within 'U17' is less aligned than 'A2' and is usually due to 'U17' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+// Packed-Unpacked-Packed tests
+
+struct A3 {
+  char a;
+  T2 b;
+};
+
+struct __attribute__((packed)) U18 {
+  char a;
+  A3 b;
+  int c;
+};
+
+struct A4 {
+  char a;
+  T2 b;
+  int c;
+};
+
+#pragma pack(push, 1)
+struct U19 {
+  char a;
+  A4 b; // expected-warning {{field b within 'U19' is less aligned than 'A4' and is usually due to 'U19' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+#pragma pack(pop)
+
+// Packed-Unpacked-Unpacked tests
+
+struct A5 {
+  char a;
+  T1 b;
+};
+
+struct __attribute__((packed)) U20 {
+  char a;
+  A5 b; // expected-warning {{field b within 'U20' is less aligned than 'A5' and is usually due to 'U20' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+
+struct A6 {
+  char a;
+  T1 b;
+};
+
+#pragma pack(push, 1)
+struct U21 {
+  char a;
+  A6 b; // expected-warning {{field b within 'U21' is less aligned than 'A6' and is usually due to 'U21' being packed, which can lead to unaligned accesses}}
+  int c;
+};
+#pragma pack(pop)
+
+// Unpacked-Packed-Packed test
+
+struct __attribute__((packed)) A7 {
+  char a;
+  T2 b;
+};
+
+struct U22 {
+  char a;
+  A7 b;
+  int c;
+};
+
+// Unpacked-Packed-Unpacked tests
+
+struct __attribute__((packed)) A8 {
+  char a;
+  T1 b; // expected-warning {{field b within 'A8' is less aligned than 'T1' and is usually due to 'A8' being packed, which can lead to unaligned accesses}}
+};
+
+struct U23 {
+  char a;
+  A8 b;
+  int c;
+};
+
+struct __attribute__((packed)) A9 {
+  char a;
+  T1 b __attribute__((aligned(4)));
+};
+
+struct U24 {
+  char a;
+  A9 b;
+  int c;
+};
+
+struct U1 s1;
+struct U2 s2;
+struct U3 s3;
+struct U4 s4;
+struct U5 s5;
+struct U6 s6;
+struct U7 s7;
+struct U8 s8;
+struct U9 s9;
+struct U10 s10;
+struct U11 s11;
+struct U12 s12;
+struct U13 s13;
+struct U14 s14;
+struct U15 s15;
+struct U16 s16;
+struct U17 s17;
+struct U18 s18;
+struct U19 s19;
+struct U20 s20;
+struct U21 s21;
+struct U22 s22;
+struct U23 s23;
+struct U24 s24;
\ No newline at end of file

From 010a10b738915568d9d04d1f0caa09b5d25dc7b3 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Thu, 20 Jan 2022 15:18:47 +0100
Subject: [PATCH 003/946] [flang][NFC] Remove extra braces

Noticed during the upstreaming process.
---
 flang/lib/Optimizer/CodeGen/TargetRewrite.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
index 1e671dbb72093..f5616d21d1340 100644
--- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
+++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
@@ -76,9 +76,8 @@ class TargetRewrite : public TargetRewriteBase<TargetRewrite> {
     mlir::OpBuilder rewriter(&context);
 
     auto mod = getModule();
-    if (!forcedTargetTriple.empty()) {
+    if (!forcedTargetTriple.empty())
       setTargetTriple(mod, forcedTargetTriple);
-    }
 
     auto specifics = CodeGenSpecifics::get(getOperation().getContext(),
                                            getTargetTriple(getOperation()),

From c95cb4de1b6674e52aebdb7d02c6431843001282 Mon Sep 17 00:00:00 2001
From: Stanislav Gatev <sgatev@google.com>
Date: Thu, 20 Jan 2022 09:28:25 +0000
Subject: [PATCH 004/946] [clang][dataflow] Intersect ExprToLoc when joining
 environments

This is part of the implementation of the dataflow analysis framework.
See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev.

Reviewed-by: xazax.hun

Differential Revision: https://reviews.llvm.org/D117754
---
 .../FlowSensitive/DataflowEnvironment.cpp     |  5 +++
 .../TypeErasedDataflowAnalysis.cpp            |  1 +
 .../Analysis/FlowSensitive/TransferTest.cpp   | 37 +++++++++++++++++++
 3 files changed, 43 insertions(+)

diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index e1d420fb55b82..6fbc3ec42465c 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -83,6 +83,11 @@ LatticeJoinEffect Environment::join(const Environment &Other) {
   if (DeclToLocSizeBefore != DeclToLoc.size())
     Effect = LatticeJoinEffect::Changed;
 
+  const unsigned ExprToLocSizeBefore = ExprToLoc.size();
+  ExprToLoc = intersectDenseMaps(ExprToLoc, Other.ExprToLoc);
+  if (ExprToLocSizeBefore != ExprToLoc.size())
+    Effect = LatticeJoinEffect::Changed;
+
   // FIXME: Add support for joining distinct values that are assigned to the
   // same storage locations in `LocToVal` and `Other.LocToVal`.
   const unsigned LocToValSizeBefore = LocToVal.size();
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 538cdce206b2f..3782f0f5f69ac 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -23,6 +23,7 @@
 #include "clang/Analysis/FlowSensitive/Transfer.h"
 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
 #include "clang/Analysis/FlowSensitive/Value.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index 5979870f858b1..c1eaf281ddc49 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -1828,4 +1828,41 @@ TEST_F(TransferTest, VarDeclInitAssignConditionalOperator) {
       });
 }
 
+TEST_F(TransferTest, VarDeclInDoWhile) {
+  std::string Code = R"(
+    void target(int *Foo) {
+      do {
+        int Bar = *Foo;
+      } while (true);
+      (void)0;
+      /*[[p]]*/
+    }
+  )";
+  runDataflow(Code,
+              [](llvm::ArrayRef<
+                     std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
+                     Results,
+                 ASTContext &ASTCtx) {
+                ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
+                const Environment &Env = Results[0].second.Env;
+
+                const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
+                ASSERT_THAT(FooDecl, NotNull());
+
+                const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar");
+                ASSERT_THAT(BarDecl, NotNull());
+
+                const auto *FooVal =
+                    cast<PointerValue>(Env.getValue(*FooDecl, SkipPast::None));
+                const auto *FooPointeeVal =
+                    cast<IntegerValue>(Env.getValue(FooVal->getPointeeLoc()));
+
+                const auto *BarVal = dyn_cast_or_null<IntegerValue>(
+                    Env.getValue(*BarDecl, SkipPast::None));
+                ASSERT_THAT(BarVal, NotNull());
+
+                EXPECT_EQ(BarVal, FooPointeeVal);
+              });
+}
+
 } // namespace

From 6d45284618f08fa28dc515cab96fa573c4c4479e Mon Sep 17 00:00:00 2001
From: Stephan Herhut <herhut@google.com>
Date: Thu, 20 Jan 2022 14:02:04 +0100
Subject: [PATCH 005/946] [mlir][memref] Add better support for identity
 layouts in memref.collapse_shape canonicalizer

When computing the new type of a collapse_shape operation, we need to at least
take into account whether the type has an identity layout, in which case we can
easily support dynamic strides. Otherwise, the canonicalizer creates invalid
IR.

Longer term, both the verifier and the canoncializer need to be extended to
support the general case.

Differential Revision: https://reviews.llvm.org/D117772
---
 mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp   | 16 ++++++++++++----
 mlir/test/Dialect/MemRef/canonicalize.mlir | 17 +++++++++++++++++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index 55499f63295f0..211af3045b9d8 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -1334,6 +1334,7 @@ computeReshapeCollapsedType(MemRefType type,
   AffineExpr offset;
   SmallVector<AffineExpr, 4> strides;
   auto status = getStridesAndOffset(type, strides, offset);
+  auto isIdentityLayout = type.getLayout().isIdentity();
   (void)status;
   assert(succeeded(status) && "expected strided memref");
 
@@ -1350,12 +1351,19 @@ computeReshapeCollapsedType(MemRefType type,
     unsigned dim = m.getNumResults();
     int64_t size = 1;
     AffineExpr stride = strides[currentDim + dim - 1];
-    if (!isReshapableDimBand(currentDim, dim, sizes, strides)) {
+    if (isIdentityLayout ||
+        isReshapableDimBand(currentDim, dim, sizes, strides)) {
+      for (unsigned d = 0; d < dim; ++d) {
+        int64_t currentSize = sizes[currentDim + d];
+        if (ShapedType::isDynamic(currentSize)) {
+          size = ShapedType::kDynamicSize;
+          break;
+        }
+        size *= currentSize;
+      }
+    } else {
       size = ShapedType::kDynamicSize;
       stride = AffineExpr();
-    } else {
-      for (unsigned d = 0; d < dim; ++d)
-        size *= sizes[currentDim + d];
     }
     newSizes.push_back(size);
     newStrides.push_back(stride);
diff --git a/mlir/test/Dialect/MemRef/canonicalize.mlir b/mlir/test/Dialect/MemRef/canonicalize.mlir
index bd7a8dd830a80..58083437ca47e 100644
--- a/mlir/test/Dialect/MemRef/canonicalize.mlir
+++ b/mlir/test/Dialect/MemRef/canonicalize.mlir
@@ -406,6 +406,8 @@ func @collapse_after_memref_cast_type_change(%arg0 : memref<?x512x1x1xf32>) -> m
   return %collapsed : memref<?x?xf32>
 }
 
+// -----
+
 // CHECK-LABEL:   func @collapse_after_memref_cast(
 // CHECK-SAME:      %[[INPUT:.*]]: memref<?x512x1x?xf32>) -> memref<?x?xf32> {
 // CHECK:           %[[COLLAPSED:.*]] = memref.collapse_shape %[[INPUT]]
@@ -419,6 +421,21 @@ func @collapse_after_memref_cast(%arg0 : memref<?x512x1x?xf32>) -> memref<?x?xf3
 
 // -----
 
+// CHECK-LABEL:   func @collapse_after_memref_cast_type_change_dynamic(
+// CHECK-SAME:      %[[INPUT:.*]]: memref<1x1x1x?xi64>) -> memref<?x?xi64> {
+// CHECK:           %[[COLLAPSED:.*]] = memref.collapse_shape %[[INPUT]]
+// CHECK_SAME:        {{\[\[}}0, 1, 2], [3]] : memref<1x1x1x?xi64> into memref<1x?xi64>
+// CHECK:           %[[DYNAMIC:.*]] = memref.cast %[[COLLAPSED]] :
+// CHECK-SAME:         memref<1x?xi64> to memref<?x?xi64>
+// CHECK:           return %[[DYNAMIC]] : memref<?x?xi64>
+func @collapse_after_memref_cast_type_change_dynamic(%arg0: memref<1x1x1x?xi64>) -> memref<?x?xi64> {
+  %casted = memref.cast %arg0 : memref<1x1x1x?xi64> to memref<1x1x?x?xi64>
+  %collapsed = memref.collapse_shape %casted [[0, 1, 2], [3]] : memref<1x1x?x?xi64> into memref<?x?xi64>
+  return %collapsed : memref<?x?xi64>
+}
+
+// -----
+
 func @reduced_memref(%arg0: memref<2x5x7x1xf32>, %arg1 :index)
     -> memref<1x4x1xf32, affine_map<(d0, d1, d2)[s0] -> (d0 * 35 + s0 + d1 * 7 + d2)>> {
   %c0 = arith.constant 0 : index

From 9e24d14ac89f44bb6c9141561ca849ccdd09e6a8 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda@apple.com>
Date: Thu, 20 Jan 2022 15:05:38 +0100
Subject: [PATCH 006/946] [llvm][vfs] NFC: Virtualize in-memory `getStatus`

This patch virtualizes the `getStatus` function on `InMemoryNode` in LLVM VFS. Currently, this is implemented via top-level function `getNodeStatus` that tries to cast `InMemoryNode *` into each subtype. Virtual functions seem to be the simpler solution here.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D117649
---
 llvm/lib/Support/VirtualFileSystem.cpp | 29 +++++++++++---------------
 1 file changed, 12 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index 7b752b557f8e6..1536f4de2e267 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -574,6 +574,11 @@ class InMemoryNode {
   }
   virtual ~InMemoryNode() = default;
 
+  /// Return the \p Status for this node. \p RequestedName should be the name
+  /// through which the caller referred to this node. It will override
+  /// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
+  virtual Status getStatus(const Twine &RequestedName) const = 0;
+
   /// Get the filename of this node (the name without the directory part).
   StringRef getFileName() const { return FileName; }
   InMemoryNodeKind getKind() const { return Kind; }
@@ -589,10 +594,7 @@ class InMemoryFile : public InMemoryNode {
       : InMemoryNode(Stat.getName(), IME_File), Stat(std::move(Stat)),
         Buffer(std::move(Buffer)) {}
 
-  /// Return the \p Status for this node. \p RequestedName should be the name
-  /// through which the caller referred to this node. It will override
-  /// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
-  Status getStatus(const Twine &RequestedName) const {
+  Status getStatus(const Twine &RequestedName) const override {
     return Status::copyWithNewName(Stat, RequestedName);
   }
   llvm::MemoryBuffer *getBuffer() const { return Buffer.get(); }
@@ -616,6 +618,10 @@ class InMemoryHardLink : public InMemoryNode {
       : InMemoryNode(Path, IME_HardLink), ResolvedFile(ResolvedFile) {}
   const InMemoryFile &getResolvedFile() const { return ResolvedFile; }
 
+  Status getStatus(const Twine &RequestedName) const override {
+    return ResolvedFile.getStatus(RequestedName);
+  }
+
   std::string toString(unsigned Indent) const override {
     return std::string(Indent, ' ') + "HardLink to -> " +
            ResolvedFile.toString(0);
@@ -668,7 +674,7 @@ class InMemoryDirectory : public InMemoryNode {
   /// Return the \p Status for this node. \p RequestedName should be the name
   /// through which the caller referred to this node. It will override
   /// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
-  Status getStatus(const Twine &RequestedName) const {
+  Status getStatus(const Twine &RequestedName) const override {
     return Status::copyWithNewName(Stat, RequestedName);
   }
 
@@ -704,17 +710,6 @@ class InMemoryDirectory : public InMemoryNode {
   }
 };
 
-namespace {
-Status getNodeStatus(const InMemoryNode *Node, const Twine &RequestedName) {
-  if (auto Dir = dyn_cast<detail::InMemoryDirectory>(Node))
-    return Dir->getStatus(RequestedName);
-  if (auto File = dyn_cast<detail::InMemoryFile>(Node))
-    return File->getStatus(RequestedName);
-  if (auto Link = dyn_cast<detail::InMemoryHardLink>(Node))
-    return Link->getResolvedFile().getStatus(RequestedName);
-  llvm_unreachable("Unknown node type");
-}
-} // namespace
 } // namespace detail
 
 // The UniqueID of in-memory files is derived from path and content.
@@ -923,7 +918,7 @@ bool InMemoryFileSystem::addHardLink(const Twine &FromPath,
 llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) {
   auto Node = lookupInMemoryNode(*this, Root.get(), Path);
   if (Node)
-    return detail::getNodeStatus(*Node, Path);
+    return (*Node)->getStatus(Path);
   return Node.getError();
 }
 

From 9011903e3613af360c3a1d05c106e464538efd08 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda@apple.com>
Date: Thu, 20 Jan 2022 15:07:39 +0100
Subject: [PATCH 007/946] [llvm][vfs] Abstract in-memory node creation

The creation of in-memory VFS nodes happens in a single function that deduces what kind of node to create from the arguments. This leads to complicated if-then-else logic that's difficult to cleanly extend.

This patch abstracts away in-memory node creation via a type-erased factory function that's passed instead.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D117648
---
 llvm/include/llvm/Support/VirtualFileSystem.h | 24 +++++--
 llvm/lib/Support/VirtualFileSystem.cpp        | 65 +++++++++++--------
 2 files changed, 58 insertions(+), 31 deletions(-)

diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index 9b5ff8f20ae2d..305096dff67eb 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -419,6 +419,21 @@ namespace detail {
 
 class InMemoryDirectory;
 class InMemoryFile;
+class InMemoryNode;
+
+struct NewInMemoryNodeInfo {
+  llvm::sys::fs::UniqueID DirUID;
+  StringRef Path;
+  StringRef Name;
+  time_t ModificationTime;
+  std::unique_ptr<llvm::MemoryBuffer> Buffer;
+  uint32_t User;
+  uint32_t Group;
+  llvm::sys::fs::file_type Type;
+  llvm::sys::fs::perms Perms;
+
+  Status makeStatus() const;
+};
 
 } // namespace detail
 
@@ -428,14 +443,15 @@ class InMemoryFileSystem : public FileSystem {
   std::string WorkingDirectory;
   bool UseNormalizedPaths = true;
 
-  /// If HardLinkTarget is non-null, a hardlink is created to the To path which
-  /// must be a file. If it is null then it adds the file as the public addFile.
+  using MakeNodeFn = llvm::function_ref<std::unique_ptr<detail::InMemoryNode>(
+      detail::NewInMemoryNodeInfo)>;
+
+  /// Create node with \p MakeNode and add it into this filesystem at \p Path.
   bool addFile(const Twine &Path, time_t ModificationTime,
                std::unique_ptr<llvm::MemoryBuffer> Buffer,
                Optional<uint32_t> User, Optional<uint32_t> Group,
                Optional<llvm::sys::fs::file_type> Type,
-               Optional<llvm::sys::fs::perms> Perms,
-               const detail::InMemoryFile *HardLinkTarget);
+               Optional<llvm::sys::fs::perms> Perms, MakeNodeFn MakeNode);
 
 public:
   explicit InMemoryFileSystem(bool UseNormalizedPaths = true);
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index 1536f4de2e267..a963beb180bae 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -729,6 +729,16 @@ static sys::fs::UniqueID getDirectoryID(sys::fs::UniqueID Parent,
   return getUniqueID(llvm::hash_combine(Parent.getFile(), Name));
 }
 
+Status detail::NewInMemoryNodeInfo::makeStatus() const {
+  UniqueID UID =
+      (Type == sys::fs::file_type::directory_file)
+          ? getDirectoryID(DirUID, Name)
+          : getFileID(DirUID, Name, Buffer ? Buffer->getBuffer() : "");
+
+  return Status(Path, UID, llvm::sys::toTimePoint(ModificationTime), User,
+                Group, Buffer ? Buffer->getBufferSize() : 0, Type, Perms);
+}
+
 InMemoryFileSystem::InMemoryFileSystem(bool UseNormalizedPaths)
     : Root(new detail::InMemoryDirectory(
           Status("", getDirectoryID(llvm::sys::fs::UniqueID(), ""),
@@ -749,7 +759,7 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
                                  Optional<uint32_t> Group,
                                  Optional<llvm::sys::fs::file_type> Type,
                                  Optional<llvm::sys::fs::perms> Perms,
-                                 const detail::InMemoryFile *HardLinkTarget) {
+                                 MakeNodeFn MakeNode) {
   SmallString<128> Path;
   P.toVector(Path);
 
@@ -770,7 +780,6 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
   const auto ResolvedGroup = Group.getValueOr(0);
   const auto ResolvedType = Type.getValueOr(sys::fs::file_type::regular_file);
   const auto ResolvedPerms = Perms.getValueOr(sys::fs::all_all);
-  assert(!(HardLinkTarget && Buffer) && "HardLink cannot have a buffer");
   // Any intermediate directories we create should be accessible by
   // the owner, even if Perms says otherwise for the final path.
   const auto NewDirectoryPerms = ResolvedPerms | sys::fs::owner_all;
@@ -781,27 +790,10 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
     if (!Node) {
       if (I == E) {
         // End of the path.
-        std::unique_ptr<detail::InMemoryNode> Child;
-        if (HardLinkTarget)
-          Child.reset(new detail::InMemoryHardLink(P.str(), *HardLinkTarget));
-        else {
-          // Create a new file or directory.
-          Status Stat(
-              P.str(),
-              (ResolvedType == sys::fs::file_type::directory_file)
-                  ? getDirectoryID(Dir->getUniqueID(), Name)
-                  : getFileID(Dir->getUniqueID(), Name, Buffer->getBuffer()),
-              llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
-              ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
-              ResolvedPerms);
-          if (ResolvedType == sys::fs::file_type::directory_file) {
-            Child.reset(new detail::InMemoryDirectory(std::move(Stat)));
-          } else {
-            Child.reset(
-                new detail::InMemoryFile(std::move(Stat), std::move(Buffer)));
-          }
-        }
-        Dir->addChild(Name, std::move(Child));
+        Dir->addChild(
+            Name, MakeNode({Dir->getUniqueID(), Path, Name, ModificationTime,
+                            std::move(Buffer), ResolvedUser, ResolvedGroup,
+                            ResolvedType, ResolvedPerms}));
         return true;
       }
 
@@ -845,7 +837,15 @@ bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
                                  Optional<llvm::sys::fs::file_type> Type,
                                  Optional<llvm::sys::fs::perms> Perms) {
   return addFile(P, ModificationTime, std::move(Buffer), User, Group, Type,
-                 Perms, /*HardLinkTarget=*/nullptr);
+                 Perms,
+                 [](detail::NewInMemoryNodeInfo NNI)
+                     -> std::unique_ptr<detail::InMemoryNode> {
+                   Status Stat = NNI.makeStatus();
+                   if (Stat.getType() == sys::fs::file_type::directory_file)
+                     return std::make_unique<detail::InMemoryDirectory>(Stat);
+                   return std::make_unique<detail::InMemoryFile>(
+                       Stat, std::move(NNI.Buffer));
+                 });
 }
 
 bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime,
@@ -856,7 +856,15 @@ bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime,
                                       Optional<llvm::sys::fs::perms> Perms) {
   return addFile(P, ModificationTime, llvm::MemoryBuffer::getMemBuffer(Buffer),
                  std::move(User), std::move(Group), std::move(Type),
-                 std::move(Perms));
+                 std::move(Perms),
+                 [](detail::NewInMemoryNodeInfo NNI)
+                     -> std::unique_ptr<detail::InMemoryNode> {
+                   Status Stat = NNI.makeStatus();
+                   if (Stat.getType() == sys::fs::file_type::directory_file)
+                     return std::make_unique<detail::InMemoryDirectory>(Stat);
+                   return std::make_unique<detail::InMemoryFile>(
+                       Stat, std::move(NNI.Buffer));
+                 });
 }
 
 static ErrorOr<const detail::InMemoryNode *>
@@ -911,8 +919,11 @@ bool InMemoryFileSystem::addHardLink(const Twine &FromPath,
   // before. Resolved ToPath must be a File.
   if (!ToNode || FromNode || !isa<detail::InMemoryFile>(*ToNode))
     return false;
-  return this->addFile(FromPath, 0, nullptr, None, None, None, None,
-                       cast<detail::InMemoryFile>(*ToNode));
+  return addFile(FromPath, 0, nullptr, None, None, None, None,
+                 [&](detail::NewInMemoryNodeInfo NNI) {
+                   return std::make_unique<detail::InMemoryHardLink>(
+                       NNI.Path.str(), *cast<detail::InMemoryFile>(*ToNode));
+                 });
 }
 
 llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) {

From 14c5fd920b0e4e05c5deed358af47707e579dd6d Mon Sep 17 00:00:00 2001
From: eopXD <eop.chen@sifive.com>
Date: Sat, 6 Nov 2021 07:54:58 -0700
Subject: [PATCH 008/946] [Clang][RISCV] Change TARGET_BUILTIN to require
 zve32x for vector instruction

According to v-spec v1.0, `zve-32x` is the new minimum extension to include
to have vector instructions.

Reviewed By: kito-cheng

Differential Revision: https://reviews.llvm.org/D112613
---
 clang/utils/TableGen/RISCVVEmitter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index 08724fb353972..d3f1d63185f4a 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -1027,7 +1027,7 @@ void RVVEmitter::createBuiltins(raw_ostream &OS) {
 
   OS << "#if defined(TARGET_BUILTIN) && !defined(RISCVV_BUILTIN)\n";
   OS << "#define RISCVV_BUILTIN(ID, TYPE, ATTRS) TARGET_BUILTIN(ID, TYPE, "
-        "ATTRS, \"experimental-v\")\n";
+        "ATTRS, \"experimental-zve32x\")\n";
   OS << "#endif\n";
   for (auto &Def : Defs) {
     auto P =

From 4130357f96bf44e73032ada6d86acd1e8f9f74a1 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 20 Jan 2022 14:58:23 +0000
Subject: [PATCH 009/946] [X86] Fix v16f32 ADDSUB test

This was supposed to ensure we're not generating 512-bit ADDSUB nodes, but cut+paste typos meant we weren't generating a full 512-bit pattern
---
 llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll | 61 ++++++++++++++++------
 1 file changed, 46 insertions(+), 15 deletions(-)

diff --git a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
index 95587ee7ae6b0..1087ef9193d79 100644
--- a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
+++ b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
@@ -519,14 +519,14 @@ define <16 x float> @test17(<16 x float> %A, <16 x float> %B) {
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    addsubps %xmm4, %xmm0
 ; SSE-NEXT:    addsubps %xmm5, %xmm1
-; SSE-NEXT:    movaps %xmm0, %xmm2
-; SSE-NEXT:    movaps %xmm1, %xmm3
+; SSE-NEXT:    addsubps %xmm6, %xmm2
+; SSE-NEXT:    addsubps %xmm7, %xmm3
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: test17:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vaddsubps %ymm2, %ymm0, %ymm0
-; AVX1-NEXT:    vmovaps %ymm0, %ymm1
+; AVX1-NEXT:    vaddsubps %ymm3, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
 ;
 ; AVX512-LABEL: test17:
@@ -543,9 +543,39 @@ define <16 x float> @test17(<16 x float> %A, <16 x float> %B) {
 ; AVX512-NEXT:    vpermilps {{.*#+}} xmm3 = xmm0[3,3,3,3]
 ; AVX512-NEXT:    vpermilps {{.*#+}} xmm4 = xmm1[3,3,3,3]
 ; AVX512-NEXT:    vaddss %xmm4, %xmm3, %xmm3
-; AVX512-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0]
-; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX512-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX512-NEXT:    vinsertps {{.*#+}} xmm8 = xmm2[0,1,2],xmm3[0]
+; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX512-NEXT:    vextractf128 $1, %ymm1, %xmm4
+; AVX512-NEXT:    vsubss %xmm4, %xmm3, %xmm5
+; AVX512-NEXT:    vpermilpd {{.*#+}} xmm6 = xmm3[1,0]
+; AVX512-NEXT:    vpermilpd {{.*#+}} xmm7 = xmm4[1,0]
+; AVX512-NEXT:    vsubss %xmm7, %xmm6, %xmm6
+; AVX512-NEXT:    vmovshdup {{.*#+}} xmm7 = xmm3[1,1,3,3]
+; AVX512-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; AVX512-NEXT:    vaddss %xmm2, %xmm7, %xmm2
+; AVX512-NEXT:    vinsertps {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[2,3]
+; AVX512-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm6[0],xmm2[3]
+; AVX512-NEXT:    vpermilps {{.*#+}} xmm3 = xmm3[3,3,3,3]
+; AVX512-NEXT:    vpermilps {{.*#+}} xmm4 = xmm4[3,3,3,3]
+; AVX512-NEXT:    vaddss %xmm4, %xmm3, %xmm3
+; AVX512-NEXT:    vinsertps {{.*#+}} xmm9 = xmm2[0,1,2],xmm3[0]
+; AVX512-NEXT:    vextractf32x4 $2, %zmm0, %xmm2
+; AVX512-NEXT:    vextractf32x4 $2, %zmm1, %xmm4
+; AVX512-NEXT:    vsubss %xmm4, %xmm2, %xmm5
+; AVX512-NEXT:    vpermilpd {{.*#+}} xmm6 = xmm2[1,0]
+; AVX512-NEXT:    vpermilpd {{.*#+}} xmm7 = xmm4[1,0]
+; AVX512-NEXT:    vsubss %xmm7, %xmm6, %xmm6
+; AVX512-NEXT:    vmovshdup {{.*#+}} xmm7 = xmm2[1,1,3,3]
+; AVX512-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; AVX512-NEXT:    vaddss %xmm3, %xmm7, %xmm3
+; AVX512-NEXT:    vinsertps {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[2,3]
+; AVX512-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm6[0],xmm3[3]
+; AVX512-NEXT:    vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
+; AVX512-NEXT:    vpermilps {{.*#+}} xmm4 = xmm4[3,3,3,3]
+; AVX512-NEXT:    vaddss %xmm4, %xmm2, %xmm2
+; AVX512-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0]
+; AVX512-NEXT:    vextractf32x4 $3, %zmm0, %xmm0
+; AVX512-NEXT:    vextractf32x4 $3, %zmm1, %xmm1
 ; AVX512-NEXT:    vsubss %xmm1, %xmm0, %xmm3
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm0[1,0]
 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm5 = xmm1[1,0]
@@ -560,7 +590,8 @@ define <16 x float> @test17(<16 x float> %A, <16 x float> %B) {
 ; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
 ; AVX512-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
-; AVX512-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512-NEXT:    vinsertf128 $1, %xmm9, %ymm8, %ymm1
+; AVX512-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
 ; AVX512-NEXT:    retq
   %1 = extractelement <16 x float> %A, i32 0
   %2 = extractelement <16 x float> %B, i32 0
@@ -588,28 +619,28 @@ define <16 x float> @test17(<16 x float> %A, <16 x float> %B) {
   %add4 = fadd float %15, %16
   %17 = extractelement <16 x float> %A, i32 8
   %18 = extractelement <16 x float> %B, i32 8
-  %sub5 = fsub float %1, %2
+  %sub5 = fsub float %17, %18
   %19 = extractelement <16 x float> %A, i32 10
   %20 = extractelement <16 x float> %B, i32 10
-  %sub6 = fsub float %3, %4
+  %sub6 = fsub float %19, %20
   %21 = extractelement <16 x float> %A, i32 9
   %22 = extractelement <16 x float> %B, i32 9
-  %add5 = fadd float %5, %6
+  %add5 = fadd float %21, %22
   %23 = extractelement <16 x float> %A, i32 11
   %24 = extractelement <16 x float> %B, i32 11
-  %add6 = fadd float %7, %8
+  %add6 = fadd float %23, %24
   %25 = extractelement <16 x float> %A, i32 12
   %26 = extractelement <16 x float> %B, i32 12
-  %sub7 = fsub float %9, %10
+  %sub7 = fsub float %25, %26
   %27 = extractelement <16 x float> %A, i32 14
   %28 = extractelement <16 x float> %B, i32 14
-  %sub8 = fsub float %11, %12
+  %sub8 = fsub float %27, %28
   %29 = extractelement <16 x float> %A, i32 13
   %30 = extractelement <16 x float> %B, i32 13
-  %add7 = fadd float %13, %14
+  %add7 = fadd float %29, %30
   %31 = extractelement <16 x float> %A, i32 15
   %32 = extractelement <16 x float> %B, i32 15
-  %add8 = fadd float %15, %16
+  %add8 = fadd float %31, %32
   %vecinsert1 = insertelement <16 x float> undef, float %add, i32 1
   %vecinsert2 = insertelement <16 x float> %vecinsert1, float %add2, i32 3
   %vecinsert3 = insertelement <16 x float> %vecinsert2, float %sub, i32 0

From 81d35f27ddec579f616e3f5f15b25fe4cc92236e Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 18 Jan 2022 17:38:08 +0100
Subject: [PATCH 010/946] [DebugInstrRef] Memoize variable order during sorting
 (NFC)

Instead of constructing DebugVariables and looking up the order
in the comparison function, compute the order upfront and then sort
a vector of (order, instr).

This improves compile-time by -0.4% geomean on CTMark ReleaseLTO-g.

Differential Revision: https://reviews.llvm.org/D117575
---
 .../LiveDebugValues/InstrRefBasedImpl.cpp     | 40 +++++++++----------
 1 file changed, 18 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
index 849f2895b4a58..8a190e7699414 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/InstrRefBasedImpl.cpp
@@ -2806,31 +2806,28 @@ void InstrRefBasedLDV::emitLocations(
     }
   }
 
-  // We have to insert DBG_VALUEs in a consistent order, otherwise they appeaer
-  // in DWARF in different orders. Use the order that they appear when walking
-  // through each block / each instruction, stored in AllVarsNumbering.
-  auto OrderDbgValues = [&](const MachineInstr *A,
-                            const MachineInstr *B) -> bool {
-    DebugVariable VarA(A->getDebugVariable(), A->getDebugExpression(),
-                       A->getDebugLoc()->getInlinedAt());
-    DebugVariable VarB(B->getDebugVariable(), B->getDebugExpression(),
-                       B->getDebugLoc()->getInlinedAt());
-    return AllVarsNumbering.find(VarA)->second <
-           AllVarsNumbering.find(VarB)->second;
-  };
-
   // Go through all the transfers recorded in the TransferTracker -- this is
   // both the live-ins to a block, and any movements of values that happen
   // in the middle.
-  for (auto &P : TTracker->Transfers) {
-    // Sort them according to appearance order.
-    llvm::sort(P.Insts, OrderDbgValues);
+  for (const auto &P : TTracker->Transfers) {
+    // We have to insert DBG_VALUEs in a consistent order, otherwise they
+    // appear in DWARF in different orders. Use the order that they appear
+    // when walking through each block / each instruction, stored in
+    // AllVarsNumbering.
+    SmallVector<std::pair<unsigned, MachineInstr *>> Insts;
+    for (MachineInstr *MI : P.Insts) {
+      DebugVariable Var(MI->getDebugVariable(), MI->getDebugExpression(),
+                        MI->getDebugLoc()->getInlinedAt());
+      Insts.emplace_back(AllVarsNumbering.find(Var)->second, MI);
+    }
+    llvm::sort(Insts,
+               [](const auto &A, const auto &B) { return A.first < B.first; });
+
     // Insert either before or after the designated point...
     if (P.MBB) {
       MachineBasicBlock &MBB = *P.MBB;
-      for (auto *MI : P.Insts) {
-        MBB.insert(P.Pos, MI);
-      }
+      for (const auto &Pair : Insts)
+        MBB.insert(P.Pos, Pair.second);
     } else {
       // Terminators, like tail calls, can clobber things. Don't try and place
       // transfers after them.
@@ -2838,9 +2835,8 @@ void InstrRefBasedLDV::emitLocations(
         continue;
 
       MachineBasicBlock &MBB = *P.Pos->getParent();
-      for (auto *MI : P.Insts) {
-        MBB.insertAfterBundle(P.Pos, MI);
-      }
+      for (const auto &Pair : Insts)
+        MBB.insertAfterBundle(P.Pos, Pair.second);
     }
   }
 }

From f29256a64ac11cf59cea878c8d1ba9537db4f523 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Wed, 19 Jan 2022 21:19:53 -0800
Subject: [PATCH 011/946] [MLGO] Improved support for AOT cross-targeting
 scenarios

The tensorflow AOT compiler can cross-target, but it can't run on (for
example) arm64. We added earlier support where the AOT-ed header and object
would be built on a separate builder and then passed at build time to
a build host where the AOT compiler can't run, but clang can be otherwise
built.

To simplify such scenarios given we now support more than one AOT-able
case (regalloc and inliner), we make the AOT scenario centered on whether
files are generated, case by case (this includes the "passed from a
different builder" scenario).
This means we shouldn't need an 'umbrella' LLVM_HAVE_TF_AOT, in favor of
case by case control. A builder can opt out of an AOT case by passing that case's
model path as `none`. Note that the overrides still take precedence.

This patch controls conditional compilation with case-specific flags,
which can be enabled locally, for the component where those are
available. We still keep an overall flag for some tests.

The 'development/training' mode is unchanged, because there the model is
passed from the command line and interpreted.

Differential Revision: https://reviews.llvm.org/D117752
---
 llvm/CMakeLists.txt                           |   8 +-
 llvm/cmake/modules/TensorFlowCompile.cmake    | 100 +++++++++---------
 llvm/include/llvm/Analysis/InlineAdvisor.h    |   4 -
 llvm/include/llvm/Config/llvm-config.h.cmake  |   3 -
 llvm/lib/Analysis/InlineAdvisor.cpp           |   3 +
 llvm/lib/Analysis/MLInlineAdvisor.cpp         |  26 +++--
 llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp   |   4 +-
 llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp  |   3 +
 llvm/lib/CodeGen/RegAllocEvictionAdvisor.h    |   5 -
 .../CodeGen/MLRegalloc/dev-rel-equivalence.ll |   1 -
 .../Transforms/Inline/ML/bounds-checks.ll     |   1 -
 .../Inline/ML/ml-test-release-mode.ll         |   1 -
 12 files changed, 80 insertions(+), 79 deletions(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 4f248c58e1822..687bc6489b4ac 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -891,13 +891,17 @@ if (NOT TENSORFLOW_AOT_PATH STREQUAL "")
     ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX} COMPONENT tf_xla_runtime)
   set_property(GLOBAL APPEND PROPERTY LLVM_EXPORTS tf_xla_runtime)
   # Once we add more modules, we should handle this more automatically.
-  if (NOT DEFINED LLVM_INLINER_MODEL_PATH
+  if (DEFINED LLVM_OVERRIDE_MODEL_HEADER_INLINERSIZEMODEL)
+    set(LLVM_INLINER_MODEL_PATH "none")
+  elseif(NOT DEFINED LLVM_INLINER_MODEL_PATH
       OR "${LLVM_INLINER_MODEL_PATH}" STREQUAL ""
       OR "${LLVM_INLINER_MODEL_PATH}" STREQUAL "autogenerate")
     set(LLVM_INLINER_MODEL_PATH "autogenerate")
     set(LLVM_INLINER_MODEL_AUTOGENERATED 1)
   endif()
-  if (NOT DEFINED LLVM_RAEVICT_MODEL_PATH
+  if (DEFINED LLVM_OVERRIDE_MODEL_HEADER_REGALLOCEVICTMODEL)
+    set(LLVM_RAEVICT_MODEL_PATH "none")
+  elseif(NOT DEFINED LLVM_RAEVICT_MODEL_PATH
       OR "${LLVM_RAEVICT_MODEL_PATH}" STREQUAL ""
       OR "${LLVM_RAEVICT_MODEL_PATH}" STREQUAL "autogenerate")
     set(LLVM_RAEVICT_MODEL_PATH "autogenerate")
diff --git a/llvm/cmake/modules/TensorFlowCompile.cmake b/llvm/cmake/modules/TensorFlowCompile.cmake
index 9427a26de8be6..4f79f6653c88f 100644
--- a/llvm/cmake/modules/TensorFlowCompile.cmake
+++ b/llvm/cmake/modules/TensorFlowCompile.cmake
@@ -44,35 +44,21 @@ endfunction()
 # Produce a pair of files called ${fname}.h and  ${fname}.o in the
 # ${CMAKE_CURRENT_BINARY_DIR}. The generated header will define a C++ class
 # called ${cpp_class} - which may be a namespace-qualified class name.
-function(tfcompile model tag_set signature_def_key fname cpp_class)
-  set(prefix ${CMAKE_CURRENT_BINARY_DIR}/${fname})
-  set(obj_file ${prefix}.o)
-  set(hdr_file ${prefix}.h)
-  string(TOUPPER ${fname} fname_allcaps)
-  set(override_header ${LLVM_OVERRIDE_MODEL_HEADER_${fname_allcaps}})
-  set(override_object ${LLVM_OVERRIDE_MODEL_OBJECT_${fname_allcaps}})
-  if (EXISTS "${override_header}" AND EXISTS "${override_object}")
-    configure_file(${override_header} ${hdr_file} COPYONLY)
-    configure_file(${override_object} ${obj_file} COPYONLY)
-    message("Using provided header "
-      ${hdr_file} " and object "   ${obj_file}
-      " files for model " ${model})
-  else()
-    tf_get_absolute_path(${model} ${CMAKE_CURRENT_BINARY_DIR} LLVM_ML_MODELS_ABSOLUTE)
-    message("Using model at " ${LLVM_ML_MODELS_ABSOLUTE})
-    add_custom_command(OUTPUT ${obj_file} ${hdr_file}
-      COMMAND ${TENSORFLOW_AOT_COMPILER} aot_compile_cpu
-            --multithreading false
-            --dir ${LLVM_ML_MODELS_ABSOLUTE}
-            --tag_set ${tag_set}
-            --signature_def_key ${signature_def_key}
-            --output_prefix ${prefix}
-            --cpp_class ${cpp_class}
-            --target_triple ${LLVM_HOST_TRIPLE}
-    )
-  endif()
+function(tf_compile model tag_set signature_def_key fname cpp_class hdr_file obj_file)
+  tf_get_absolute_path(${model} ${CMAKE_CURRENT_BINARY_DIR} LLVM_ML_MODELS_ABSOLUTE)
+  message("Using model at " ${LLVM_ML_MODELS_ABSOLUTE})
+  add_custom_command(OUTPUT ${obj_file} ${hdr_file}
+    COMMAND ${TENSORFLOW_AOT_COMPILER} aot_compile_cpu
+          --multithreading false
+          --dir ${LLVM_ML_MODELS_ABSOLUTE}
+          --tag_set ${tag_set}
+          --signature_def_key ${signature_def_key}
+          --output_prefix ${prefix}
+          --cpp_class ${cpp_class}
+          --target_triple ${LLVM_HOST_TRIPLE}
+  )
 
-  # Aggregate the objects so that results of different tfcompile calls may be
+  # Aggregate the objects so that results of different tf_compile calls may be
   # grouped into one target.
   set(GENERATED_OBJS ${GENERATED_OBJS} ${obj_file} PARENT_SCOPE)
   set_source_files_properties(${obj_file} PROPERTIES
@@ -82,36 +68,50 @@ function(tfcompile model tag_set signature_def_key fname cpp_class)
   set_source_files_properties(${hdr_file} PROPERTIES
     GENERATED 1)
 
-endfunction()
+  endfunction()
 
 function(tf_find_and_compile model default_url default_path test_model_generator tag_set signature_def_key fname cpp_class)
-  if ("${model}" STREQUAL "download")
-    # Crash if the user wants to download a model but a URL is set to "TO_BE_UPDATED"
-    if ("${default_url}" STREQUAL "TO_BE_UPDATED")
-        message(FATAL_ERROR "Default URL was set to 'download' but there is no model url currently specified in cmake - likely, the model interface recently changed, and so there is not a released model available.")
+  set(prefix ${CMAKE_CURRENT_BINARY_DIR}/${fname})
+  set(obj_file ${prefix}.o)
+  set(hdr_file ${prefix}.h)
+  string(TOUPPER ${fname} fname_allcaps)
+  set(override_header ${LLVM_OVERRIDE_MODEL_HEADER_${fname_allcaps}})
+  set(override_object ${LLVM_OVERRIDE_MODEL_OBJECT_${fname_allcaps}})
+  # If the user specified overrides, that indicates intent to use AOT and we
+  # don't care what the model path is
+  if (EXISTS "${override_header}" AND EXISTS "${override_object}")
+    configure_file(${override_header} ${hdr_file} COPYONLY)
+    configure_file(${override_object} ${obj_file} COPYONLY)
+    message(STATUS "Using provided header " ${hdr_file} " and object " ${obj_file} "
+      files for model " ${fname})  
+    set(GENERATED_OBJS ${GENERATED_OBJS} ${obj_file})
+    set(GENERATED_HEADERS ${GENERATED_HEADERS} ${hdr_file})
+  elseif("${model}" STREQUAL "none")
+    message(STATUS "Will skip enabling mlgo for ${fname}")
+    return()
+  else()
+    if ("${model}" STREQUAL "download")
+      # Crash if the user wants to download a model but a URL is set to "TO_BE_UPDATED"
+      if ("${default_url}" STREQUAL "TO_BE_UPDATED")
+          message(FATAL_ERROR "Default URL was set to 'download' but there is no"
+          " model url currently specified in cmake - likely, the model interface"
+          " recently changed, and so there is not a released model available.")
+      endif()
+
+      set(model ${default_url})
     endif()
 
-    set(model ${default_url})
-  endif()
+    if ("${model}" STREQUAL "autogenerate")
+      set(model ${default_path}-autogenerated)
+      generate_mock_model(${test_model_generator} ${model})
+    endif()
 
-  if ("${model}" STREQUAL "autogenerate")
-    set(model ${default_path}-autogenerated)  
-    generate_mock_model(${test_model_generator} ${model})
+    tf_get_model(${model} LLVM_ML_MODELS_ABSOLUTE)
+    tf_compile(${LLVM_ML_MODELS_ABSOLUTE} ${tag_set} ${signature_def_key} ${fname} ${cpp_class} ${hdr_file} ${obj_file})
   endif()
 
-  tf_get_model(${model} LLVM_ML_MODELS_ABSOLUTE)
-  tfcompile(${LLVM_ML_MODELS_ABSOLUTE} ${tag_set} ${signature_def_key} ${fname} ${cpp_class})
-
-  set(GENERATED_OBJS ${GENERATED_OBJS} ${obj_file} PARENT_SCOPE)
-  set_source_files_properties(${obj_file} PROPERTIES
-    GENERATED 1 EXTERNAL_OBJECT 1)
-
-  set(GENERATED_HEADERS ${GENERATED_HEADERS} ${hdr_file} PARENT_SCOPE)
-  set_source_files_properties(${hdr_file} PROPERTIES
-    GENERATED 1)
-
   set(GeneratedMLSources ${GeneratedMLSources} ${GENERATED_HEADERS} PARENT_SCOPE)
   set(MLDeps ${MLDeps} tf_xla_runtime PARENT_SCOPE)
   set(MLLinkDeps ${MLLinkDeps} tf_xla_runtime ${GENERATED_OBJS} PARENT_SCOPE)
-
+  add_definitions(-DLLVM_HAVE_TF_AOT_${fname_allcaps})
 endfunction()
diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 9403f207e7055..0103ee7f83863 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -246,16 +246,12 @@ class InlineAdvisorAnalysisPrinterPass
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
 };
 
-#ifdef LLVM_HAVE_TF_AOT
 std::unique_ptr<InlineAdvisor>
 getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM);
-#endif
 
-#ifdef LLVM_HAVE_TF_API
 std::unique_ptr<InlineAdvisor>
 getDevelopmentModeAdvisor(Module &M, ModuleAnalysisManager &MAM,
                           std::function<bool(CallBase &)> GetDefaultAdvice);
-#endif
 
 // Default (manual policy) decision making helper APIs. Shared with the legacy
 // pass manager inliner.
diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake
index a5edc2084a8a5..ec18b40fe04d9 100644
--- a/llvm/include/llvm/Config/llvm-config.h.cmake
+++ b/llvm/include/llvm/Config/llvm-config.h.cmake
@@ -91,9 +91,6 @@
 /* Define if LLVM was built with a dependency to the libtensorflow dynamic library */
 #cmakedefine LLVM_HAVE_TF_API
 
-/* Define if LLVM was built with a dependency to the tensorflow compiler */
-#cmakedefine LLVM_HAVE_TF_AOT
-
 /* Define to 1 if you have the <sysexits.h> header file. */
 #cmakedefine HAVE_SYSEXITS_H ${HAVE_SYSEXITS_H}
 
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index 53fa7a6824d30..f6e3dd354ff8e 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -27,6 +27,9 @@
 
 using namespace llvm;
 #define DEBUG_TYPE "inline"
+#ifdef LLVM_HAVE_TF_AOT_INLINERSIZEMODEL
+#define LLVM_HAVE_TF_AOT
+#endif
 
 // This weirdly named statistic tracks the number of times that, when attempting
 // to inline a function A into B, we analyze the callers of B in order to see
diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index c6ad1e05dd72c..203e0b025e6c6 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -11,36 +11,38 @@
 // 'release' mode) or a runtime-loaded model (the 'development' case).
 //
 //===----------------------------------------------------------------------===//
-#include "llvm/Config/config.h"
-#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
-
-#include <limits>
-#include <unordered_map>
-#include <unordered_set>
-
+#include "llvm/Analysis/MLInlineAdvisor.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/FunctionPropertiesAnalysis.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/InlineModelFeatureMaps.h"
 #include "llvm/Analysis/LazyCallGraph.h"
-#include "llvm/Analysis/MLInlineAdvisor.h"
 #include "llvm/Analysis/MLModelRunner.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ReleaseModeModelRunner.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Config/config.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Path.h"
 
+#include <limits>
+#include <unordered_map>
+#include <unordered_set>
+
 using namespace llvm;
 
-#ifdef LLVM_HAVE_TF_AOT
-#include "llvm/Analysis/ReleaseModeModelRunner.h"
+#ifdef LLVM_HAVE_TF_AOT_INLINERSIZEMODEL
+#define LLVM_HAVE_TF_AOT
+#endif
+
+#if defined(LLVM_HAVE_TF_AOT)
 // codegen-ed file
 #include "InlinerSizeModel.h" // NOLINT
-#include "llvm/Analysis/InlineModelFeatureMaps.h"
 
 std::unique_ptr<InlineAdvisor>
 llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
@@ -53,6 +55,8 @@ llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
 
 #define DEBUG_TYPE "inline-ml"
 
+#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
+
 static cl::opt<float> SizeIncreaseThreshold(
     "ml-advisor-size-increase-threshold", cl::Hidden,
     cl::desc("Maximum factor by which expected native size may increase before "
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
index 01c843724fe1b..848f63da288de 100644
--- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -42,7 +42,9 @@
 using namespace llvm;
 
 #define DEBUG_TYPE "ml-regalloc"
-
+#ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL
+#define LLVM_HAVE_TF_AOT
+#endif
 // Generated header in release (AOT) mode
 #if defined LLVM_HAVE_TF_AOT
 #include "RegallocEvictModel.h"
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
index d64e8cd06492b..87df7bb4a6896 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp
@@ -43,6 +43,9 @@ static cl::opt<bool> EnableLocalReassignment(
     cl::init(false));
 
 #define DEBUG_TYPE "regalloc"
+#ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL
+#define LLVM_HAVE_TF_AOT
+#endif
 
 char RegAllocEvictionAdvisorAnalysis::ID = 0;
 INITIALIZE_PASS(RegAllocEvictionAdvisorAnalysis, "regalloc-evict",
diff --git a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
index 8ad45c02285c4..33e03aed81a77 100644
--- a/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
+++ b/llvm/lib/CodeGen/RegAllocEvictionAdvisor.h
@@ -192,14 +192,9 @@ class RegAllocEvictionAdvisorAnalysis : public ImmutablePass {
 /// an instance of the eviction advisor.
 template <> Pass *callDefaultCtor<RegAllocEvictionAdvisorAnalysis>();
 
-// TODO(mtrofin): implement these.
-#ifdef LLVM_HAVE_TF_AOT
 RegAllocEvictionAdvisorAnalysis *createReleaseModeAdvisor();
-#endif
 
-#ifdef LLVM_HAVE_TF_API
 RegAllocEvictionAdvisorAnalysis *createDevelopmentModeAdvisor();
-#endif
 
 // TODO: move to RegAllocEvictionAdvisor.cpp when we move implementation
 // out of RegAllocGreedy.cpp
diff --git a/llvm/test/CodeGen/MLRegalloc/dev-rel-equivalence.ll b/llvm/test/CodeGen/MLRegalloc/dev-rel-equivalence.ll
index cedf19964996d..6f1b265480b42 100644
--- a/llvm/test/CodeGen/MLRegalloc/dev-rel-equivalence.ll
+++ b/llvm/test/CodeGen/MLRegalloc/dev-rel-equivalence.ll
@@ -1,5 +1,4 @@
 ; REQUIRES: have_tf_api
-; REQUIRES: have_tf_aot
 ; REQUIRES: llvm_raevict_model_autogenerated
 ; REQUIRES: x86_64-linux
 ;
diff --git a/llvm/test/Transforms/Inline/ML/bounds-checks.ll b/llvm/test/Transforms/Inline/ML/bounds-checks.ll
index a462c6b4722fb..7270fa3a93b1e 100644
--- a/llvm/test/Transforms/Inline/ML/bounds-checks.ll
+++ b/llvm/test/Transforms/Inline/ML/bounds-checks.ll
@@ -2,7 +2,6 @@
 ; In all cases, the end result is the same: mandatory inlinings must happen.
 ; However, when we discover we 'trip' over the artificially-low size increase
 ; factor, we don't inline anymore.
-; REQUIRES: have_tf_aot
 ; REQUIRES: llvm_inliner_model_autogenerated
 ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -ml-advisor-size-increase-threshold=10.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=NOBOUNDS
 ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -ml-advisor-size-increase-threshold=1.0 -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=BOUNDS
diff --git a/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll b/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll
index 7b0a253b0509b..9cbbfdd124e1f 100644
--- a/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll
+++ b/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll
@@ -5,7 +5,6 @@
 ; This test uses Inputs/test-module.ll, as it will share it with a similar test
 ; for the 'development' mode.
 ;
-; REQUIRES: have_tf_aot
 ; REQUIRES: llvm_inliner_model_autogenerated
 ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=CHECK
 ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=default -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=DEFAULT

From 866311e71c8f1f3724754d006eb50299424d5b1b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 20 Jan 2022 15:15:46 +0000
Subject: [PATCH 012/946] [X86] lowerToAddSubOrFMAddSub - lower 512-bit ADDSUB
 patterns to blend(fsub,fadd)

AVX512 doesn't provide a ADDSUB instruction, but if we've built this from a build vector of scalar fsub/fadd elements we can still lower to blend(fsub,fadd)
---
 llvm/lib/Target/X86/X86ISelLowering.cpp    | 19 +++--
 llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll | 98 ++--------------------
 2 files changed, 20 insertions(+), 97 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0d12cf519a8b1..71c80d518f998 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -10079,13 +10079,18 @@ static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
   if (IsSubAdd)
     return SDValue();
 
-  // Do not generate X86ISD::ADDSUB node for 512-bit types even though
-  // the ADDSUB idiom has been successfully recognized. There are no known
-  // X86 targets with 512-bit ADDSUB instructions!
-  // 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom
-  // recognition.
-  if (VT.is512BitVector())
-    return SDValue();
+  // There are no known X86 targets with 512-bit ADDSUB instructions!
+  // Convert to blend(fsub,fadd).
+  if (VT.is512BitVector()) {
+    SmallVector<int> Mask;
+    for (int I = 0, E = VT.getVectorNumElements(); I != E; I += 2) {
+        Mask.push_back(I);
+        Mask.push_back(I + E + 1);
+    }
+    SDValue Sub = DAG.getNode(ISD::FSUB, DL, VT, Opnd0, Opnd1);
+    SDValue Add = DAG.getNode(ISD::FADD, DL, VT, Opnd0, Opnd1);
+    return DAG.getVectorShuffle(VT, DL, Sub, Add, Mask);
+  }
 
   return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
 }
diff --git a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
index 1087ef9193d79..3ae8a1fd6659a 100644
--- a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
+++ b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll
@@ -531,67 +531,11 @@ define <16 x float> @test17(<16 x float> %A, <16 x float> %B) {
 ;
 ; AVX512-LABEL: test17:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vsubss %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm0[1,0]
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm1[1,0]
-; AVX512-NEXT:    vsubss %xmm4, %xmm3, %xmm3
-; AVX512-NEXT:    vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
-; AVX512-NEXT:    vmovshdup {{.*#+}} xmm5 = xmm1[1,1,3,3]
-; AVX512-NEXT:    vaddss %xmm5, %xmm4, %xmm4
-; AVX512-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[2,3]
-; AVX512-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3]
-; AVX512-NEXT:    vpermilps {{.*#+}} xmm3 = xmm0[3,3,3,3]
-; AVX512-NEXT:    vpermilps {{.*#+}} xmm4 = xmm1[3,3,3,3]
-; AVX512-NEXT:    vaddss %xmm4, %xmm3, %xmm3
-; AVX512-NEXT:    vinsertps {{.*#+}} xmm8 = xmm2[0,1,2],xmm3[0]
-; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX512-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX512-NEXT:    vsubss %xmm4, %xmm3, %xmm5
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm6 = xmm3[1,0]
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm7 = xmm4[1,0]
-; AVX512-NEXT:    vsubss %xmm7, %xmm6, %xmm6
-; AVX512-NEXT:    vmovshdup {{.*#+}} xmm7 = xmm3[1,1,3,3]
-; AVX512-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm4[1,1,3,3]
-; AVX512-NEXT:    vaddss %xmm2, %xmm7, %xmm2
-; AVX512-NEXT:    vinsertps {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[2,3]
-; AVX512-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm6[0],xmm2[3]
-; AVX512-NEXT:    vpermilps {{.*#+}} xmm3 = xmm3[3,3,3,3]
-; AVX512-NEXT:    vpermilps {{.*#+}} xmm4 = xmm4[3,3,3,3]
-; AVX512-NEXT:    vaddss %xmm4, %xmm3, %xmm3
-; AVX512-NEXT:    vinsertps {{.*#+}} xmm9 = xmm2[0,1,2],xmm3[0]
-; AVX512-NEXT:    vextractf32x4 $2, %zmm0, %xmm2
-; AVX512-NEXT:    vextractf32x4 $2, %zmm1, %xmm4
-; AVX512-NEXT:    vsubss %xmm4, %xmm2, %xmm5
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm6 = xmm2[1,0]
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm7 = xmm4[1,0]
-; AVX512-NEXT:    vsubss %xmm7, %xmm6, %xmm6
-; AVX512-NEXT:    vmovshdup {{.*#+}} xmm7 = xmm2[1,1,3,3]
-; AVX512-NEXT:    vmovshdup {{.*#+}} xmm3 = xmm4[1,1,3,3]
-; AVX512-NEXT:    vaddss %xmm3, %xmm7, %xmm3
-; AVX512-NEXT:    vinsertps {{.*#+}} xmm3 = xmm5[0],xmm3[0],xmm5[2,3]
-; AVX512-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm6[0],xmm3[3]
-; AVX512-NEXT:    vpermilps {{.*#+}} xmm2 = xmm2[3,3,3,3]
-; AVX512-NEXT:    vpermilps {{.*#+}} xmm4 = xmm4[3,3,3,3]
-; AVX512-NEXT:    vaddss %xmm4, %xmm2, %xmm2
-; AVX512-NEXT:    vinsertps {{.*#+}} xmm2 = xmm3[0,1,2],xmm2[0]
-; AVX512-NEXT:    vextractf32x4 $3, %zmm0, %xmm0
-; AVX512-NEXT:    vextractf32x4 $3, %zmm1, %xmm1
-; AVX512-NEXT:    vsubss %xmm1, %xmm0, %xmm3
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm0[1,0]
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm5 = xmm1[1,0]
-; AVX512-NEXT:    vsubss %xmm5, %xmm4, %xmm4
-; AVX512-NEXT:    vmovshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
-; AVX512-NEXT:    vmovshdup {{.*#+}} xmm6 = xmm1[1,1,3,3]
-; AVX512-NEXT:    vaddss %xmm6, %xmm5, %xmm5
-; AVX512-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[2,3]
-; AVX512-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0],xmm3[3]
-; AVX512-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; AVX512-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[3,3,3,3]
-; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
-; AVX512-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
-; AVX512-NEXT:    vinsertf128 $1, %xmm9, %ymm8, %ymm1
-; AVX512-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; AVX512-NEXT:    vsubps %zmm1, %zmm0, %zmm2
+; AVX512-NEXT:    movw $-21846, %ax # imm = 0xAAAA
+; AVX512-NEXT:    kmovw %eax, %k1
+; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512-NEXT:    vmovaps %zmm2, %zmm0
 ; AVX512-NEXT:    retq
   %1 = extractelement <16 x float> %A, i32 0
   %2 = extractelement <16 x float> %B, i32 0
@@ -677,35 +621,9 @@ define <8 x double> @test18(<8 x double> %A, <8 x double> %B) {
 ;
 ; AVX512-LABEL: test18:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vsubsd %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX512-NEXT:    vextractf128 $1, %ymm1, %xmm4
-; AVX512-NEXT:    vsubsd %xmm4, %xmm3, %xmm5
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm6 = xmm0[1,0]
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm7 = xmm1[1,0]
-; AVX512-NEXT:    vaddsd %xmm7, %xmm6, %xmm6
-; AVX512-NEXT:    vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm6[0]
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512-NEXT:    vaddsd %xmm4, %xmm3, %xmm3
-; AVX512-NEXT:    vunpcklpd {{.*#+}} xmm3 = xmm5[0],xmm3[0]
-; AVX512-NEXT:    vextractf32x4 $2, %zmm0, %xmm4
-; AVX512-NEXT:    vextractf32x4 $2, %zmm1, %xmm5
-; AVX512-NEXT:    vsubsd %xmm5, %xmm4, %xmm6
-; AVX512-NEXT:    vextractf32x4 $3, %zmm0, %xmm0
-; AVX512-NEXT:    vextractf32x4 $3, %zmm1, %xmm1
-; AVX512-NEXT:    vsubsd %xmm1, %xmm0, %xmm7
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512-NEXT:    vaddsd %xmm5, %xmm4, %xmm4
-; AVX512-NEXT:    vunpcklpd {{.*#+}} xmm4 = xmm6[0],xmm4[0]
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
-; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm7[0],xmm0[0]
-; AVX512-NEXT:    vinsertf128 $1, %xmm0, %ymm4, %ymm0
-; AVX512-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX512-NEXT:    vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
+; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm2
+; AVX512-NEXT:    vsubpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[0],zmm2[1],zmm0[2],zmm2[3],zmm0[4],zmm2[5],zmm0[6],zmm2[7]
 ; AVX512-NEXT:    retq
   %1 = extractelement <8 x double> %A, i32 0
   %2 = extractelement <8 x double> %B, i32 0

From 0f283de9d195e873006ae522660698b10bd81452 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 20 Jan 2022 16:25:22 +0100
Subject: [PATCH 013/946] [InstSimplify] Add test for reinterpret load of
 pointer type (NFC)

---
 llvm/test/Transforms/InstSimplify/ConstProp/loads.ll | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
index afdc9980ab4f9..fb56cb342f4a3 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
@@ -336,3 +336,14 @@ define i32 @load_all_undef() {
   %v = load i32, i32* getelementptr (i32, i32* bitcast ({ i32, [4 x i8] }* @g_all_undef to i32*), i64 1)
   ret i32 %v
 }
+
+@g_i8_data = constant [16 x i8] c"\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"
+
+define i64* @load_ptr_from_i8_data() {
+; CHECK-LABEL: @load_ptr_from_i8_data(
+; CHECK-NEXT:    [[V:%.*]] = load i64*, i64** bitcast ([16 x i8]* @g_i8_data to i64**), align 8
+; CHECK-NEXT:    ret i64* [[V]]
+;
+  %v = load i64*, i64** bitcast ([16 x i8]* @g_i8_data to i64**)
+  ret i64* %v
+}

From 3da69fb5a26c7bf83320b3b4ad0859be9b47b3fb Mon Sep 17 00:00:00 2001
From: Mubashar Ahmad <mubashar.ahmad@arm.com>
Date: Thu, 20 Jan 2022 15:20:57 +0000
Subject: [PATCH 014/946] [Clang][AArch64][ARM] Unaligned Access Test Fix

Test fixed for the unaligned access warning.
---
 clang/test/Sema/test-wunaligned-access.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/Sema/test-wunaligned-access.c b/clang/test/Sema/test-wunaligned-access.c
index 55c2149634d1a..909cda45f489b 100644
--- a/clang/test/Sema/test-wunaligned-access.c
+++ b/clang/test/Sema/test-wunaligned-access.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -triple=armv7-none-none-eabi -verify -Wunaligned-access -S -emit-llvm
+// RUN: %clang_cc1 %s -triple=armv7-none-none-eabi -verify -Wunaligned-access -S -emit-llvm -o %t
 // REQUIRES: arm-registered-target
 //
 // This test suite tests the warning triggered by the -Wunaligned-access option.

From 805bc24868670309187d49907ce12583902db47d Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 20 Jan 2022 16:48:19 +0100
Subject: [PATCH 015/946] [InstSimplify] Add test for load of non-integral
 pointer (NFC)

---
 .../test/Transforms/InstSimplify/ConstProp/loads.ll | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
index fb56cb342f4a3..65daa324f7468 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -data-layout="e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instsimplify -S | FileCheck %s --check-prefixes=CHECK,LE
-; RUN: opt < %s -data-layout="E-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instsimplify -S | FileCheck %s --check-prefixes=CHECK,BE
+; RUN: opt < %s -data-layout="e-p:64:64:64-p1:16:16:16-p2:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-ni:2" -instsimplify -S | FileCheck %s --check-prefixes=CHECK,LE
+; RUN: opt < %s -data-layout="E-p:64:64:64-p1:16:16:16-p2:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-ni:2" -instsimplify -S | FileCheck %s --check-prefixes=CHECK,BE
 
 ; {{ 0xDEADBEEF, 0xBA }, 0xCAFEBABE}
 @g1 = constant {{i32,i8},i32} {{i32,i8} { i32 -559038737, i8 186 }, i32 -889275714 }
@@ -347,3 +347,12 @@ define i64* @load_ptr_from_i8_data() {
   %v = load i64*, i64** bitcast ([16 x i8]* @g_i8_data to i64**)
   ret i64* %v
 }
+
+define i64 addrspace(2)* @load_non_integral_ptr_from_i8_data() {
+; CHECK-LABEL: @load_non_integral_ptr_from_i8_data(
+; CHECK-NEXT:    [[V:%.*]] = load i64 addrspace(2)*, i64 addrspace(2)** bitcast ([16 x i8]* @g_i8_data to i64 addrspace(2)**), align 8
+; CHECK-NEXT:    ret i64 addrspace(2)* [[V]]
+;
+  %v = load i64 addrspace(2)*, i64 addrspace(2)** bitcast ([16 x i8]* @g_i8_data to i64 addrspace(2)**)
+  ret i64 addrspace(2)* %v
+}

From 91eca967b9eb115afda52e3a7d158a97a24bfe7f Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Thu, 20 Jan 2022 11:02:06 -0500
Subject: [PATCH 016/946] [gn build] (manually) port f29256a64a

---
 llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
index 489d5abec9e27..3efbdde7d85b4 100644
--- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
@@ -335,7 +335,6 @@ write_cmake_config("llvm-config") {
     "LLVM_ENABLE_NEW_PASS_MANAGER=1",
     "LLVM_FORCE_ENABLE_STATS=",
     "LLVM_HAS_ATOMICS=1",
-    "LLVM_HAVE_TF_AOT=",
     "LLVM_HAVE_TF_API=",
     "LLVM_HOST_TRIPLE=$llvm_current_triple",
     "LLVM_NATIVE_ARCH=$native_target",

From 616f77172f0a48ef02c1700882ca0ba2215066d1 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project@meinersbur.de>
Date: Thu, 20 Jan 2022 08:18:08 -0600
Subject: [PATCH 017/946] [OpenMPIRBuilder] Detect and fix ambiguous
 InsertPoints for createParallel.

When a Builder methods accepts multiple InsertPoints, when both point to
the same position, inserting instructions at one position will "move" the
other after the inserted position since the InsertPoint is pegged to the
instruction following the intended InsertPoint. For instance, when
creating a parallel region at Loc and passing the same position as AllocaIP,
creating instructions at Loc will "move" the AllocIP behind the Loc
position.

To avoid this ambiguity, add an assertion checking this condition and
fix the unittests.

In case of AllocaIP, an alternative solution could be to implicitly
split BasicBlock at InsertPoint, using the first as AllocaIP, the second
for inserting the instructions themselves. However, this solution is
specific to AllocaIP since AllocaIP will always have to be first. Hence,
this is an argument to generally handling ambiguous InsertPoints as API
sage error.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D117226
---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     | 16 +++++++++++++
 .../Frontend/OpenMPIRBuilderTest.cpp          | 23 +++++++++++++++++++
 .../OpenMP/OpenMPToLLVMIRTranslation.cpp      | 13 +++++++++++
 3 files changed, 52 insertions(+)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 62bef5259877a..0bf0c832d5ddf 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -60,6 +60,20 @@ static cl::opt<double> UnrollThresholdFactor(
              "simplifications still taking place"),
     cl::init(1.5));
 
+#ifndef NDEBUG
+/// Return whether IP1 and IP2 are ambiguous, i.e. that inserting instructions
+/// at position IP1 may change the meaning of IP2 or vice-versa. This is because
+/// an InsertPoint stores the instruction before something is inserted. For
+/// instance, if both point to the same instruction, two IRBuilders alternating
+/// creating instruction will cause the instructions to be interleaved.
+static bool isConflictIP(IRBuilder<>::InsertPoint IP1,
+                         IRBuilder<>::InsertPoint IP2) {
+  if (!IP1.isSet() || !IP2.isSet())
+    return false;
+  return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
+}
+#endif
+
 void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID, Function &Fn) {
   LLVMContext &Ctx = Fn.getContext();
 
@@ -527,6 +541,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
     BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
     FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads,
     omp::ProcBindKind ProcBind, bool IsCancellable) {
+  assert(!isConflictIP(Loc.IP, OuterAllocaIP) && "IPs must not be ambiguous");
+
   if (!updateToLocation(Loc))
     return Loc.IP;
 
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index d00f799d30c0e..bc2d3ec8e7abe 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -483,6 +483,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
   F->setName("func");
   IRBuilder<> Builder(BB);
 
+  BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
+  Builder.CreateBr(EnterBB);
+  Builder.SetInsertPoint(EnterBB);
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
 
   AllocaInst *PrivAI = nullptr;
@@ -589,6 +592,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested) {
   F->setName("func");
   IRBuilder<> Builder(BB);
 
+  BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
+  Builder.CreateBr(EnterBB);
+  Builder.SetInsertPoint(EnterBB);
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
 
   unsigned NumInnerBodiesGenerated = 0;
@@ -682,6 +688,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelNested2Inner) {
   F->setName("func");
   IRBuilder<> Builder(BB);
 
+  BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
+  Builder.CreateBr(EnterBB);
+  Builder.SetInsertPoint(EnterBB);
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
 
   unsigned NumInnerBodiesGenerated = 0;
@@ -790,6 +799,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
   F->setName("func");
   IRBuilder<> Builder(BB);
 
+  BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
+  Builder.CreateBr(EnterBB);
+  Builder.SetInsertPoint(EnterBB);
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
 
   AllocaInst *PrivAI = nullptr;
@@ -913,6 +925,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelCancelBarrier) {
   F->setName("func");
   IRBuilder<> Builder(BB);
 
+  BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
+  Builder.CreateBr(EnterBB);
+  Builder.SetInsertPoint(EnterBB);
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
 
   unsigned NumBodiesGenerated = 0;
@@ -3108,6 +3123,10 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
+
+  BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
+  Builder.CreateBr(EnterBB);
+  Builder.SetInsertPoint(EnterBB);
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
 
   // Create variables to be reduced.
@@ -3345,6 +3364,10 @@ TEST_F(OpenMPIRBuilderTest, CreateTwoReductions) {
   OMPBuilder.initialize();
   F->setName("func");
   IRBuilder<> Builder(BB);
+
+  BasicBlock *EnterBB = BasicBlock::Create(Ctx, "parallel.enter", F);
+  Builder.CreateBr(EnterBB);
+  Builder.SetInsertPoint(EnterBB);
   OpenMPIRBuilder::LocationDescription Loc({Builder.saveIP(), DL});
 
   // Create variables to be reduced.
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 14d38d44f6ab8..d1f261e52bf2e 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -249,6 +249,19 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
   // TODO: Is the Parallel construct cancellable?
   bool isCancellable = false;
 
+  // Ensure that the BasicBlock for the the parallel region is sparate from the
+  // function entry which we may need to insert allocas.
+  if (builder.GetInsertBlock() ==
+      &builder.GetInsertBlock()->getParent()->getEntryBlock()) {
+    assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end() &&
+           "Assuming end of basic block");
+    llvm::BasicBlock *entryBB =
+        llvm::BasicBlock::Create(builder.getContext(), "parallel.entry",
+                                 builder.GetInsertBlock()->getParent(),
+                                 builder.GetInsertBlock()->getNextNode());
+    builder.CreateBr(entryBB);
+    builder.SetInsertPoint(entryBB);
+  }
   llvm::OpenMPIRBuilder::LocationDescription ompLoc(
       builder.saveIP(), builder.getCurrentDebugLocation());
   builder.restoreIP(moduleTranslation.getOpenMPBuilder()->createParallel(

From 3c422cbe6b7e2ea70fdd97883a9cd4d7d6a2efb2 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Thu, 20 Jan 2022 08:23:51 -0800
Subject: [PATCH 018/946] [SLP] Add an asser to make a non-obvious precondition
 clear [NFC]

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 015973d65f4ff..37e4a4e5732fd 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7539,6 +7539,8 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
         ScheduleData *DepDest = BundleMember->NextLoadStore;
         if (DepDest) {
           Instruction *SrcInst = BundleMember->Inst;
+          assert(SrcInst->mayReadOrWriteMemory() &&
+                 "NextLoadStore list for non memory effecting bundle?");
           MemoryLocation SrcLoc = getLocation(SrcInst, SLP->AA);
           bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
           unsigned numAliased = 0;

From 54ba376d0802e74c84d0bf79c0a919c040884398 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Thu, 20 Jan 2022 08:33:08 -0800
Subject: [PATCH 019/946] Add missing include to fix modular build

---
 lld/Common/Timer.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lld/Common/Timer.cpp b/lld/Common/Timer.cpp
index 40fecd8892c18..29838c9720b73 100644
--- a/lld/Common/Timer.cpp
+++ b/lld/Common/Timer.cpp
@@ -9,6 +9,7 @@
 #include "lld/Common/Timer.h"
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/Support/Format.h"
+#include <ratio>
 
 using namespace lld;
 using namespace llvm;

From c0957bd61794cc37012e7a5f7d89f00fe02ff2ef Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Thu, 20 Jan 2022 08:35:33 -0800
Subject: [PATCH 020/946] Add missing include to fix modular build

---
 lldb/include/lldb/Target/Statistics.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lldb/include/lldb/Target/Statistics.h b/lldb/include/lldb/Target/Statistics.h
index dbf3554986aae..185389b2eeafe 100644
--- a/lldb/include/lldb/Target/Statistics.h
+++ b/lldb/include/lldb/Target/Statistics.h
@@ -14,6 +14,7 @@
 #include "llvm/Support/JSON.h"
 #include <atomic>
 #include <chrono>
+#include <ratio>
 #include <string>
 #include <vector>
 

From 990bab89fff75b9afb98762f6e90eb634afc6d42 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Thu, 20 Jan 2022 15:13:32 +0000
Subject: [PATCH 021/946] [ScalableVectors] Warn instead of error for invalid
 size requests.

This was intended to be fixed by D98856, but that only seemed to have
the desired behaviour when compiling to assembly using `-S`, not when
compiling into an object file or executable. Given that this was not
the intention of D98856, this patch fixes the behaviour.
---
 clang/lib/Driver/ToolChains/Clang.cpp  | 3 ++-
 clang/test/Driver/fsanitize-coverage.c | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 96d949be17eea..c48c6fd59bec3 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5318,7 +5318,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
   // as errors, but until then, we can live with a warning being emitted by the
   // compiler. This way, Clang can be used to compile code with scalable vectors
   // and identify possible issues.
-  if (isa<BackendJobAction>(JA)) {
+  if (isa<AssembleJobAction>(JA) || isa<CompileJobAction>(JA) ||
+      isa<BackendJobAction>(JA)) {
     CmdArgs.push_back("-mllvm");
     CmdArgs.push_back("-treat-scalable-fixed-error-as-warning");
   }
diff --git a/clang/test/Driver/fsanitize-coverage.c b/clang/test/Driver/fsanitize-coverage.c
index ab8a8871877e6..23953af6e6697 100644
--- a/clang/test/Driver/fsanitize-coverage.c
+++ b/clang/test/Driver/fsanitize-coverage.c
@@ -106,7 +106,7 @@
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=inline-8bit-counters %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_INLINE8BIT
 // RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=bb,inline-8bit-counters %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_INLINE8BIT
-// CHECK_INLINE8BIT-NOT: warning
+// CHECK_INLINE8BIT-NOT: warning:
 // CHECK_INLINE8BIT: -fsanitize-coverage-inline-8bit-counters
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=inline-8bit-counters,pc-table %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_PC_TABLE_FOR_INLINE8BIT
@@ -115,7 +115,7 @@
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=inline-bool-flag %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_INLINE_BOOL_FLAG
 // RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=bb,inline-bool-flag %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_INLINE_BOOL_FLAG
-// CHECK_INLINE_BOOL_FLAG-NOT: warning
+// CHECK_INLINE_BOOL_FLAG-NOT: warning:
 // CHECK_INLINE_BOOL_FLAG: -fsanitize-coverage-inline-bool-flag
 
 // RUN: %clang -target x86_64-linux-gnu -fsanitize-coverage=inline-bool-flag,pc-table %s -### 2>&1 | FileCheck %s --check-prefix=CHECK_PC_TABLE_FOR_INLINEBOOL

From c43ebae838de35ddcbccd1af7a94ddadd8dfd7f9 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Thu, 20 Jan 2022 08:46:06 -0800
Subject: [PATCH 022/946] [SLP] Reduce nesting depth in calculateDependencies
 via for loop and early continue [NFC]

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 192 +++++++++---------
 1 file changed, 95 insertions(+), 97 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 37e4a4e5732fd..5dbd975f58dac 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7491,113 +7491,111 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
 
   while (!WorkList.empty()) {
     ScheduleData *SD = WorkList.pop_back_val();
-
-    ScheduleData *BundleMember = SD;
-    while (BundleMember) {
+    for (ScheduleData *BundleMember = SD; BundleMember;
+         BundleMember = BundleMember->NextInBundle) {
       assert(isInSchedulingRegion(BundleMember));
-      if (!BundleMember->hasValidDependencies()) {
-
-        LLVM_DEBUG(dbgs() << "SLP:       update deps of " << *BundleMember
-                          << "\n");
-        BundleMember->Dependencies = 0;
-        BundleMember->resetUnscheduledDeps();
+      if (BundleMember->hasValidDependencies())
+        continue;
 
-        // Handle def-use chain dependencies.
-        if (BundleMember->OpValue != BundleMember->Inst) {
-          ScheduleData *UseSD = getScheduleData(BundleMember->Inst);
-          if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
-            BundleMember->Dependencies++;
-            ScheduleData *DestBundle = UseSD->FirstInBundle;
-            if (!DestBundle->IsScheduled)
-              BundleMember->incrementUnscheduledDeps(1);
-            if (!DestBundle->hasValidDependencies())
-              WorkList.push_back(DestBundle);
-          }
-        } else {
-          for (User *U : BundleMember->Inst->users()) {
-            if (isa<Instruction>(U)) {
-              ScheduleData *UseSD = getScheduleData(U);
-              if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
-                BundleMember->Dependencies++;
-                ScheduleData *DestBundle = UseSD->FirstInBundle;
-                if (!DestBundle->IsScheduled)
-                  BundleMember->incrementUnscheduledDeps(1);
-                if (!DestBundle->hasValidDependencies())
-                  WorkList.push_back(DestBundle);
-              }
-            } else {
-              // I'm not sure if this can ever happen. But we need to be safe.
-              // This lets the instruction/bundle never be scheduled and
-              // eventually disable vectorization.
-              BundleMember->Dependencies++;
-              BundleMember->incrementUnscheduledDeps(1);
-            }
-          }
+      LLVM_DEBUG(dbgs() << "SLP:       update deps of " << *BundleMember
+                 << "\n");
+      BundleMember->Dependencies = 0;
+      BundleMember->resetUnscheduledDeps();
+
+      // Handle def-use chain dependencies.
+      if (BundleMember->OpValue != BundleMember->Inst) {
+        ScheduleData *UseSD = getScheduleData(BundleMember->Inst);
+        if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
+          BundleMember->Dependencies++;
+          ScheduleData *DestBundle = UseSD->FirstInBundle;
+          if (!DestBundle->IsScheduled)
+            BundleMember->incrementUnscheduledDeps(1);
+          if (!DestBundle->hasValidDependencies())
+            WorkList.push_back(DestBundle);
         }
-
-        // Handle the memory dependencies.
-        ScheduleData *DepDest = BundleMember->NextLoadStore;
-        if (DepDest) {
-          Instruction *SrcInst = BundleMember->Inst;
-          assert(SrcInst->mayReadOrWriteMemory() &&
-                 "NextLoadStore list for non memory effecting bundle?");
-          MemoryLocation SrcLoc = getLocation(SrcInst, SLP->AA);
-          bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
-          unsigned numAliased = 0;
-          unsigned DistToSrc = 1;
-
-          while (DepDest) {
-            assert(isInSchedulingRegion(DepDest));
-
-            // We have two limits to reduce the complexity:
-            // 1) AliasedCheckLimit: It's a small limit to reduce calls to
-            //    SLP->isAliased (which is the expensive part in this loop).
-            // 2) MaxMemDepDistance: It's for very large blocks and it aborts
-            //    the whole loop (even if the loop is fast, it's quadratic).
-            //    It's important for the loop break condition (see below) to
-            //    check this limit even between two read-only instructions.
-            if (DistToSrc >= MaxMemDepDistance ||
-                    ((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) &&
-                     (numAliased >= AliasedCheckLimit ||
-                      SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) {
-
-              // We increment the counter only if the locations are aliased
-              // (instead of counting all alias checks). This gives a better
-              // balance between reduced runtime and accurate dependencies.
-              numAliased++;
-
-              DepDest->MemoryDependencies.push_back(BundleMember);
+      } else {
+        for (User *U : BundleMember->Inst->users()) {
+          if (isa<Instruction>(U)) {
+            ScheduleData *UseSD = getScheduleData(U);
+            if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
               BundleMember->Dependencies++;
-              ScheduleData *DestBundle = DepDest->FirstInBundle;
-              if (!DestBundle->IsScheduled) {
+              ScheduleData *DestBundle = UseSD->FirstInBundle;
+              if (!DestBundle->IsScheduled)
                 BundleMember->incrementUnscheduledDeps(1);
-              }
-              if (!DestBundle->hasValidDependencies()) {
+              if (!DestBundle->hasValidDependencies())
                 WorkList.push_back(DestBundle);
-              }
             }
-            DepDest = DepDest->NextLoadStore;
-
-            // Example, explaining the loop break condition: Let's assume our
-            // starting instruction is i0 and MaxMemDepDistance = 3.
-            //
-            //                      +--------v--v--v
-            //             i0,i1,i2,i3,i4,i5,i6,i7,i8
-            //             +--------^--^--^
-            //
-            // MaxMemDepDistance let us stop alias-checking at i3 and we add
-            // dependencies from i0 to i3,i4,.. (even if they are not aliased).
-            // Previously we already added dependencies from i3 to i6,i7,i8
-            // (because of MaxMemDepDistance). As we added a dependency from
-            // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8
-            // and we can abort this loop at i6.
-            if (DistToSrc >= 2 * MaxMemDepDistance)
-              break;
-            DistToSrc++;
+          } else {
+            // I'm not sure if this can ever happen. But we need to be safe.
+            // This lets the instruction/bundle never be scheduled and
+            // eventually disable vectorization.
+            BundleMember->Dependencies++;
+            BundleMember->incrementUnscheduledDeps(1);
           }
         }
       }
-      BundleMember = BundleMember->NextInBundle;
+
+      // Handle the memory dependencies (if any).
+      ScheduleData *DepDest = BundleMember->NextLoadStore;
+      if (!DepDest)
+        continue;
+      Instruction *SrcInst = BundleMember->Inst;
+      assert(SrcInst->mayReadOrWriteMemory() &&
+             "NextLoadStore list for non memory effecting bundle?");
+      MemoryLocation SrcLoc = getLocation(SrcInst, SLP->AA);
+      bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
+      unsigned numAliased = 0;
+      unsigned DistToSrc = 1;
+
+      while (DepDest) {
+        assert(isInSchedulingRegion(DepDest));
+
+        // We have two limits to reduce the complexity:
+        // 1) AliasedCheckLimit: It's a small limit to reduce calls to
+        //    SLP->isAliased (which is the expensive part in this loop).
+        // 2) MaxMemDepDistance: It's for very large blocks and it aborts
+        //    the whole loop (even if the loop is fast, it's quadratic).
+        //    It's important for the loop break condition (see below) to
+        //    check this limit even between two read-only instructions.
+        if (DistToSrc >= MaxMemDepDistance ||
+            ((SrcMayWrite || DepDest->Inst->mayWriteToMemory()) &&
+             (numAliased >= AliasedCheckLimit ||
+              SLP->isAliased(SrcLoc, SrcInst, DepDest->Inst)))) {
+
+          // We increment the counter only if the locations are aliased
+          // (instead of counting all alias checks). This gives a better
+          // balance between reduced runtime and accurate dependencies.
+          numAliased++;
+
+          DepDest->MemoryDependencies.push_back(BundleMember);
+          BundleMember->Dependencies++;
+          ScheduleData *DestBundle = DepDest->FirstInBundle;
+          if (!DestBundle->IsScheduled) {
+            BundleMember->incrementUnscheduledDeps(1);
+          }
+          if (!DestBundle->hasValidDependencies()) {
+            WorkList.push_back(DestBundle);
+          }
+        }
+        DepDest = DepDest->NextLoadStore;
+
+        // Example, explaining the loop break condition: Let's assume our
+        // starting instruction is i0 and MaxMemDepDistance = 3.
+        //
+        //                      +--------v--v--v
+        //             i0,i1,i2,i3,i4,i5,i6,i7,i8
+        //             +--------^--^--^
+        //
+        // MaxMemDepDistance let us stop alias-checking at i3 and we add
+        // dependencies from i0 to i3,i4,.. (even if they are not aliased).
+        // Previously we already added dependencies from i3 to i6,i7,i8
+        // (because of MaxMemDepDistance). As we added a dependency from
+        // i0 to i3, we have transitive dependencies from i0 to i6,i7,i8
+        // and we can abort this loop at i6.
+        if (DistToSrc >= 2 * MaxMemDepDistance)
+          break;
+        DistToSrc++;
+      }
     }
     if (InsertInReadyList && SD->isReady()) {
       ReadyInsts.push_back(SD);

From c104fca36b96203a36ef339e7548f14888013a2e Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Thu, 20 Jan 2022 08:53:01 -0800
Subject: [PATCH 023/946] {SLP] Delete dead code in favor of proper assert
 [NFC]

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 24 +++++++------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5dbd975f58dac..43119b9c80cb3 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7515,22 +7515,16 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
         }
       } else {
         for (User *U : BundleMember->Inst->users()) {
-          if (isa<Instruction>(U)) {
-            ScheduleData *UseSD = getScheduleData(U);
-            if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
-              BundleMember->Dependencies++;
-              ScheduleData *DestBundle = UseSD->FirstInBundle;
-              if (!DestBundle->IsScheduled)
-                BundleMember->incrementUnscheduledDeps(1);
-              if (!DestBundle->hasValidDependencies())
-                WorkList.push_back(DestBundle);
-            }
-          } else {
-            // I'm not sure if this can ever happen. But we need to be safe.
-            // This lets the instruction/bundle never be scheduled and
-            // eventually disable vectorization.
+          assert(isa<Instruction>(U) &&
+                 "user of instruction must be instruction");
+          ScheduleData *UseSD = getScheduleData(U);
+          if (UseSD && isInSchedulingRegion(UseSD->FirstInBundle)) {
             BundleMember->Dependencies++;
-            BundleMember->incrementUnscheduledDeps(1);
+            ScheduleData *DestBundle = UseSD->FirstInBundle;
+            if (!DestBundle->IsScheduled)
+              BundleMember->incrementUnscheduledDeps(1);
+            if (!DestBundle->hasValidDependencies())
+              WorkList.push_back(DestBundle);
           }
         }
       }

From fabf1de13202030151899786c21d624b96605ca3 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 20 Jan 2022 16:56:31 +0000
Subject: [PATCH 024/946] [FuncSpec] Add a reference, and some other clarifying
 comments. NFC.

---
 .../Transforms/IPO/FunctionSpecialization.cpp | 29 ++++++++++++++-----
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 2425646455bd9..6c3cc39143372 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -6,15 +6,24 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This specialises functions with constant parameters (e.g. functions,
-// globals). Constant parameters like function pointers and constant globals
-// are propagated to the callee by specializing the function.
+// This specialises functions with constant parameters. Constant parameters
+// like function pointers and constant globals are propagated to the callee by
+// specializing the function. The main benefit of this pass at the moment is
+// that indirect calls are transformed into direct calls, which provides inline
+// opportunities that the inliner would not have been able to achieve. That's
+// why function specialisation is run before the inliner in the optimisation
+// pipeline; that is by design. Otherwise, we would only benefit from constant
+// passing, which is a valid use-case too, but hasn't been explored much in
+// terms of performance uplifts, cost-model and compile-time impact.
 //
 // Current limitations:
-// - It does not yet handle integer ranges.
+// - It does not yet handle integer ranges. We do support "literal constants",
+//   but that's off by default under an option.
 // - Only 1 argument per function is specialised,
-// - The cost-model could be further looked into,
-// - We are not yet caching analysis results.
+// - The cost-model could be further looked into (it mainly focuses on inlining
+//   benefits),
+// - We are not yet caching analysis results, but profiling and checking where
+//   extra compile time is spent didn't suggest this to be a problem.
 //
 // Ideas:
 // - With a function specialization attribute for arguments, we could have
@@ -30,8 +39,12 @@
 //   https://reviews.llvm.org/D106426 for details. Perhaps there is a
 //   compile-time friendlier way to control/limit the number of specialisations
 //   for recursive functions.
-// - Don't transform the function if there is no function specialization
-//   happens.
+// - Don't transform the function if function specialization does not trigger;
+//   the SCCPSolver may make IR changes.
+//
+// References:
+// - 2021 LLVM Dev Mtg “Introducing function specialisation, and can we enable
+//   it by default?”, https://www.youtube.com/watch?v=zJiCjeXgV5Q
 //
 //===----------------------------------------------------------------------===//
 

From 283f5a198a0e3c9978ca426e64a3011b566c2581 Mon Sep 17 00:00:00 2001
From: Lucas Prates <lucas.prates@arm.com>
Date: Mon, 10 Jan 2022 10:19:27 +0000
Subject: [PATCH 025/946] [GlobalISel] Fix incorrect sign extension when
 combining G_INTTOPTR and G_PTR_ADD

The GlobalISel combiner currently uses sign extension when manipulating
the LHS constant when combining a sequence of the following sequence of
machine instructions into a single constant:
```
  %0:_(s32) = G_CONSTANT i32 <CONSTANT>
  %1:_(p0) = G_INTTOPTR %0:_(s32)
  %2:_(s64) = G_CONSTANT i64 <CONSTANT>
  %3:_(p0) = G_PTR_ADD %1:_, %2:_(s64)
```

This causes an issue when the bit width of the first contant and the
target pointer size are different, as G_INTTOPTR has no sign extension
semantics.

This patch fixes this by capture an arbitrary precision in when matching
the constant, allowing the matching function to correctly zero extend
it.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D116941
---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |  4 +--
 .../llvm/CodeGen/GlobalISel/MIPatternMatch.h  | 31 ++++++++++++++++---
 .../include/llvm/Target/GlobalISel/Combine.td |  2 +-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 13 +++++---
 .../GlobalISel/combine-ptradd-int2ptr.mir     | 15 +++++++++
 .../AArch64/GlobalISel/inttoptr_add.ll        | 15 +++++++++
 6 files changed, 67 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index f3fa652b01754..1d07d7d6e7aed 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -353,8 +353,8 @@ class CombinerHelper {
                                   std::pair<Register, bool> &PtrRegAndCommute);
 
   // Transform G_PTR_ADD (G_PTRTOINT C1), C2 -> C1 + C2
-  bool matchCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst);
-  void applyCombineConstPtrAddToI2P(MachineInstr &MI, int64_t &NewCst);
+  bool matchCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst);
+  void applyCombineConstPtrAddToI2P(MachineInstr &MI, APInt &NewCst);
 
   /// Transform anyext(trunc(x)) to x.
   bool matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg);
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
index 28bb8de117628..daf1ff052983f 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_CODEGEN_GLOBALISEL_MIPATTERNMATCH_H
 #define LLVM_CODEGEN_GLOBALISEL_MIPATTERNMATCH_H
 
+#include "llvm/ADT/APInt.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/InstrTypes.h"
@@ -59,11 +60,26 @@ inline OneNonDBGUse_match<SubPat> m_OneNonDBGUse(const SubPat &SP) {
   return SP;
 }
 
-struct ConstantMatch {
-  int64_t &CR;
-  ConstantMatch(int64_t &C) : CR(C) {}
+template <typename ConstT>
+inline Optional<ConstT> matchConstant(Register, const MachineRegisterInfo &);
+
+template <>
+inline Optional<APInt> matchConstant(Register Reg,
+                                     const MachineRegisterInfo &MRI) {
+  return getIConstantVRegVal(Reg, MRI);
+}
+
+template <>
+inline Optional<int64_t> matchConstant(Register Reg,
+                                       const MachineRegisterInfo &MRI) {
+  return getIConstantVRegSExtVal(Reg, MRI);
+}
+
+template <typename ConstT> struct ConstantMatch {
+  ConstT &CR;
+  ConstantMatch(ConstT &C) : CR(C) {}
   bool match(const MachineRegisterInfo &MRI, Register Reg) {
-    if (auto MaybeCst = getIConstantVRegSExtVal(Reg, MRI)) {
+    if (auto MaybeCst = matchConstant<ConstT>(Reg, MRI)) {
       CR = *MaybeCst;
       return true;
     }
@@ -71,7 +87,12 @@ struct ConstantMatch {
   }
 };
 
-inline ConstantMatch m_ICst(int64_t &Cst) { return ConstantMatch(Cst); }
+inline ConstantMatch<APInt> m_ICst(APInt &Cst) {
+  return ConstantMatch<APInt>(Cst);
+}
+inline ConstantMatch<int64_t> m_ICst(int64_t &Cst) {
+  return ConstantMatch<int64_t>(Cst);
+}
 
 struct GCstAndRegMatch {
   Optional<ValueAndVReg> &ValReg;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 1d189c6dea6d8..9736e52a7b5bb 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -391,7 +391,7 @@ def add_p2i_to_ptradd : GICombineRule<
 >;
 
 // Fold (ptr_add (int2ptr C1), C2) -> C1 + C2
-def const_ptradd_to_i2p_matchinfo : GIDefMatchData<"int64_t">;
+def const_ptradd_to_i2p_matchinfo : GIDefMatchData<"APInt">;
 def const_ptradd_to_i2p: GICombineRule<
   (defs root:$root, const_ptradd_to_i2p_matchinfo:$info),
   (match (wip_match_opcode G_PTR_ADD):$root,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9ba8cf0cd7c25..ed1aa9d80840c 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2025,16 +2025,19 @@ void CombinerHelper::applyCombineAddP2IToPtrAdd(
 }
 
 bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
-                                                  int64_t &NewCst) {
+                                                  APInt &NewCst) {
   auto &PtrAdd = cast<GPtrAdd>(MI);
   Register LHS = PtrAdd.getBaseReg();
   Register RHS = PtrAdd.getOffsetReg();
   MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
 
-  if (auto RHSCst = getIConstantVRegSExtVal(RHS, MRI)) {
-    int64_t Cst;
+  if (auto RHSCst = getIConstantVRegVal(RHS, MRI)) {
+    APInt Cst;
     if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
-      NewCst = Cst + *RHSCst;
+      auto DstTy = MRI.getType(PtrAdd.getReg(0));
+      // G_INTTOPTR uses zero-extension
+      NewCst = Cst.zextOrTrunc(DstTy.getSizeInBits());
+      NewCst += RHSCst->sextOrTrunc(DstTy.getSizeInBits());
       return true;
     }
   }
@@ -2043,7 +2046,7 @@ bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
 }
 
 void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
-                                                  int64_t &NewCst) {
+                                                  APInt &NewCst) {
   auto &PtrAdd = cast<GPtrAdd>(MI);
   Register Dst = PtrAdd.getReg(0);
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir
index c6ee994b21b38..40e5e8ebb7731 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-ptradd-int2ptr.mir
@@ -50,3 +50,18 @@ body:             |
     %4:_(s64) = G_PTRTOINT %3
     $x0 = COPY %4(s64)
 ...
+---
+name:            test_combine_zero_extend
+body:             |
+  bb.1:
+  liveins: $x0
+
+    ; CHECK-LABEL: name: test_combine_zero_extend
+    ; CHECK: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i64 4291891236
+    ; CHECK-NEXT: $x0 = COPY [[C]](p0)
+    %0:_(s32) = G_CONSTANT i32 -3076096
+    %1:_(p0) = G_INTTOPTR %0:_(s32)
+    %2:_(s64) = G_CONSTANT i64 36
+    %3:_(p0) = G_PTR_ADD %1:_, %2:_(s64)
+    $x0 = COPY %3
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll b/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll
new file mode 100644
index 0000000000000..b7349b1484248
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/inttoptr_add.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+
+define dso_local void @fn() {
+; CHECK-LABEL: fn:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x8, #4132
+; CHECK-NEXT:    mov w9, #1
+; CHECK-NEXT:    movk x8, #65489, lsl #16
+; CHECK-NEXT:    str w9, [x8]
+; CHECK-NEXT:    ret
+entry:
+  store i32 1, i32* bitcast (i8* getelementptr inbounds (i8, i8* inttoptr (i32 -3076096 to i8*), i64 36) to i32*), align 4
+  ret void
+}

From ee198df2e14c139340421d4f5310dbaf9f725518 Mon Sep 17 00:00:00 2001
From: Random06457 <28494085+Random06457@users.noreply.github.com>
Date: Thu, 20 Jan 2022 20:03:49 +0300
Subject: [PATCH 026/946] [mips] Improve vr4300 mulmul bugfix pass

When compiling with dwarf info, the mfix4300 flag introduced in
https://reviews.llvm.org/D116238 can miss some occurrences of the vr4300
mulmul bug if a debug instruction happens to be between two `muls`
instructions. This change skips debug instructions in order to fix
the mulmul bug detection.

Fixes https://github.com/llvm/llvm-project/issues/53094

Differential Revision: https://reviews.llvm.org/D117615
---
 llvm/lib/Target/Mips/MipsMulMulBugPass.cpp |  8 ++++---
 llvm/test/CodeGen/Mips/vr4300-mulmul.ll    | 25 ++++++++++++++++++++--
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp b/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
index cb112ca1dfffe..daaf1135c2b13 100644
--- a/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
+++ b/llvm/lib/Target/Mips/MipsMulMulBugPass.cpp
@@ -110,17 +110,19 @@ bool MipsMulMulBugFix::fixMulMulBB(MachineBasicBlock &MBB,
                                    const MipsInstrInfo &MipsII) {
   bool Modified = false;
 
+  MachineBasicBlock::instr_iterator NextMII;
+
   // Iterate through the instructions in the basic block
   for (MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),
                                          E = MBB.instr_end();
-       MII != E; ++MII) {
+       MII != E; MII = NextMII) {
 
-    MachineBasicBlock::instr_iterator NextMII = std::next(MII);
+    NextMII = next_nodbg(MII, E);
 
     // Trigger when the current instruction is a mul and the next instruction
     // is either a mul or a branch in case the branch target start with a mul
     if (NextMII != E && isFirstMul(*MII) && isSecondMulOrBranch(*NextMII)) {
-      LLVM_DEBUG(dbgs() << "Found mulmul!");
+      LLVM_DEBUG(dbgs() << "Found mulmul!\n");
 
       const MCInstrDesc &NewMCID = MipsII.get(Mips::NOP);
       BuildMI(MBB, NextMII, DebugLoc(), NewMCID);
diff --git a/llvm/test/CodeGen/Mips/vr4300-mulmul.ll b/llvm/test/CodeGen/Mips/vr4300-mulmul.ll
index f20cc169825ee..33d909c7b4369 100644
--- a/llvm/test/CodeGen/Mips/vr4300-mulmul.ll
+++ b/llvm/test/CodeGen/Mips/vr4300-mulmul.ll
@@ -1,13 +1,14 @@
 ; RUN: llc -march=mips -mfix4300 -verify-machineinstrs < %s | FileCheck %s
 
 ; Function Attrs: mustprogress nofree norecurse nosync nounwind readnone willreturn
-define dso_local float @fun_s(float %x) local_unnamed_addr #0 {
+define dso_local float @fun_s(float %x) local_unnamed_addr !dbg !7  {
 entry:
 ; CHECK-LABEL: fun_s
 ; CHECK: mul.s
+; CHECK-NEXT: #DEBUG_VALUE: i <- 1
 ; CHECK-NEXT: nop
-; CHECK: mul.s
   %mul = fmul float %x, %x
+  call void @llvm.dbg.value(metadata i32 1, metadata !13, metadata !DIExpression()), !dbg !17
   %mul1 = fmul float %mul, %x
   ret float %mul1
 }
@@ -22,3 +23,23 @@ entry:
   %mul1 = fmul double %mul, %x
   ret double %mul1
 }
+
+
+; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "vr4300-mulmul.ll", directory: "/")
+!2 = !{i32 7, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!7 = distinct !DISubprogram(name: "fun_s", linkageName: "fun_s", scope: !1, file: !1, line: 1, type: !8, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10, !10}
+!10 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
+!13 = !DILocalVariable(name: "i", scope: !14, file: !1, line: 3, type: !15)
+!14 = distinct !DILexicalBlock(scope: !7, file: !1, line: 3, column: 5)
+!15 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!17 = !DILocation(line: 0, scope: !14)

From 89c447e4e6b564636bbf32b15d67e40cf6c60387 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sat, 15 Jan 2022 19:16:03 -0500
Subject: [PATCH 027/946] AMDGPU: Stop reserving 36-bytes before kernel
 arguments for amdpal

This was inheriting the mesa behavior, and as far as I know nobody is
using opencl kernels with amdpal. The isMesaKernel check was
irrelevant because this property needs to be held for all functions.
---
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h      |  14 +-
 .../AMDGPU/GlobalISel/irtranslator-call.ll    |   2 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll | 272 ++++----
 .../AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll     | 162 ++---
 .../AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll     | 184 +++---
 .../AMDGPU/GlobalISel/shl-ext-reduce.ll       |  16 +-
 .../AMDGPU/GlobalISel/store-local.128.ll      |  72 +--
 .../AMDGPU/GlobalISel/store-local.96.ll       |  72 +--
 llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll     |  12 +-
 llvm/test/CodeGen/AMDGPU/ds_read2.ll          | 170 ++---
 llvm/test/CodeGen/AMDGPU/ds_write2.ll         |  96 +--
 llvm/test/CodeGen/AMDGPU/flat-scratch.ll      |  20 +-
 .../AMDGPU/memory-legalizer-flat-agent.ll     | 352 +++++-----
 .../memory-legalizer-flat-nontemporal.ll      |   8 +-
 .../memory-legalizer-flat-singlethread.ll     | 352 +++++-----
 .../AMDGPU/memory-legalizer-flat-system.ll    | 352 +++++-----
 .../AMDGPU/memory-legalizer-flat-volatile.ll  |  14 +-
 .../AMDGPU/memory-legalizer-flat-wavefront.ll | 348 +++++-----
 .../AMDGPU/memory-legalizer-flat-workgroup.ll | 336 +++++-----
 .../AMDGPU/memory-legalizer-global-agent.ll   | 348 +++++-----
 .../memory-legalizer-global-nontemporal.ll    |   8 +-
 .../memory-legalizer-global-singlethread.ll   | 352 +++++-----
 .../AMDGPU/memory-legalizer-global-system.ll  | 332 +++++-----
 .../memory-legalizer-global-volatile.ll       |  14 +-
 .../memory-legalizer-global-wavefront.ll      | 352 +++++-----
 .../memory-legalizer-global-workgroup.ll      | 352 +++++-----
 .../AMDGPU/memory-legalizer-local-agent.ll    | 604 ++++++++----------
 .../memory-legalizer-local-nontemporal.ll     |  16 +-
 .../memory-legalizer-local-singlethread.ll    | 604 ++++++++----------
 .../AMDGPU/memory-legalizer-local-system.ll   | 604 ++++++++----------
 .../AMDGPU/memory-legalizer-local-volatile.ll |  20 +-
 .../memory-legalizer-local-wavefront.ll       | 604 ++++++++----------
 .../memory-legalizer-local-workgroup.ll       | 604 ++++++++----------
 .../memory-legalizer-private-nontemporal.ll   |  16 +-
 .../memory-legalizer-private-volatile.ll      |  16 +-
 llvm/test/CodeGen/AMDGPU/store-local.128.ll   |  96 +--
 llvm/test/CodeGen/AMDGPU/store-local.96.ll    |  96 +--
 37 files changed, 3802 insertions(+), 4090 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 88ed4b2b7a24e..7f1b94be4ffea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -212,7 +212,19 @@ class AMDGPUSubtarget {
   /// Returns the offset in bytes from the start of the input buffer
   ///        of the first explicit kernel argument.
   unsigned getExplicitKernelArgOffset(const Function &F) const {
-    return isAmdHsaOrMesa(F) ? 0 : 36;
+    switch (TargetTriple.getOS()) {
+    case Triple::AMDHSA:
+    case Triple::AMDPAL:
+    case Triple::Mesa3D:
+      return 0;
+    case Triple::UnknownOS:
+    default:
+      // For legacy reasons unknown/other is treated as a different version of
+      // mesa.
+      return 36;
+    }
+
+    llvm_unreachable("invalid triple OS");
   }
 
   /// \returns Maximum number of work groups per compute unit supported by the
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
index 452616e9ca3cf..baf0b4d29d819 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll
@@ -5183,7 +5183,7 @@ define amdgpu_ps void @amdgpu_ps_call_default_cc() {
   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p4) = COPY [[DEF]](p4)
   ; CHECK-NEXT:   [[C1:%[0-9]+]]:_(p4) = G_CONSTANT i64 0
-  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
+  ; CHECK-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
   ; CHECK-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[C1]], [[C2]](s64)
   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
   ; CHECK-NEXT:   [[DEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
index 01d5307693ad2..3adb7550d233f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
@@ -211,11 +211,11 @@ define amdgpu_ps double @s_div_fmas_f64(double inreg %a, double inreg %b, double
 define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f32:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s2, s[0:1], 0x13
-; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x1c
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x25
-; GFX7-NEXT:    s_load_dword s5, s[0:1], 0x2e
-; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dword s2, s[0:1], 0xa
+; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x13
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x1c
+; GFX7-NEXT:    s_load_dword s5, s[0:1], 0x25
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s3
@@ -231,11 +231,11 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32]
 ;
 ; GFX8-LABEL: test_div_fmas_f32:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x4c
-; GFX8-NEXT:    s_load_dword s3, s[0:1], 0x70
-; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x94
-; GFX8-NEXT:    s_load_dword s5, s[0:1], 0xb8
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x28
+; GFX8-NEXT:    s_load_dword s3, s[0:1], 0x4c
+; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x70
+; GFX8-NEXT:    s_load_dword s5, s[0:1], 0x94
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
@@ -252,11 +252,11 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32]
 ; GFX10_W32-LABEL: test_div_fmas_f32:
 ; GFX10_W32:       ; %bb.0:
 ; GFX10_W32-NEXT:    s_clause 0x4
-; GFX10_W32-NEXT:    s_load_dword s4, s[0:1], 0xb8
-; GFX10_W32-NEXT:    s_load_dword s5, s[0:1], 0x70
-; GFX10_W32-NEXT:    s_load_dword s6, s[0:1], 0x94
-; GFX10_W32-NEXT:    s_load_dword s7, s[0:1], 0x4c
-; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W32-NEXT:    s_load_dword s4, s[0:1], 0x94
+; GFX10_W32-NEXT:    s_load_dword s5, s[0:1], 0x4c
+; GFX10_W32-NEXT:    s_load_dword s6, s[0:1], 0x70
+; GFX10_W32-NEXT:    s_load_dword s7, s[0:1], 0x28
+; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W32-NEXT:    s_and_b32 s0, 1, s4
 ; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s5
@@ -270,11 +270,11 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32]
 ; GFX10_W64-LABEL: test_div_fmas_f32:
 ; GFX10_W64:       ; %bb.0:
 ; GFX10_W64-NEXT:    s_clause 0x4
-; GFX10_W64-NEXT:    s_load_dword s4, s[0:1], 0xb8
-; GFX10_W64-NEXT:    s_load_dword s5, s[0:1], 0x70
-; GFX10_W64-NEXT:    s_load_dword s6, s[0:1], 0x94
-; GFX10_W64-NEXT:    s_load_dword s7, s[0:1], 0x4c
-; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W64-NEXT:    s_load_dword s4, s[0:1], 0x94
+; GFX10_W64-NEXT:    s_load_dword s5, s[0:1], 0x4c
+; GFX10_W64-NEXT:    s_load_dword s6, s[0:1], 0x70
+; GFX10_W64-NEXT:    s_load_dword s7, s[0:1], 0x28
+; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W64-NEXT:    s_and_b32 s0, 1, s4
 ; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s5
@@ -292,10 +292,10 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32]
 define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_0:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s2, s[0:1], 0x1c
-; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x25
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x2e
-; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dword s2, s[0:1], 0x13
+; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x1c
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x25
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s3
@@ -310,10 +310,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o
 ;
 ; GFX8-LABEL: test_div_fmas_f32_inline_imm_0:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x70
-; GFX8-NEXT:    s_load_dword s3, s[0:1], 0x94
-; GFX8-NEXT:    s_load_dword s4, s[0:1], 0xb8
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x4c
+; GFX8-NEXT:    s_load_dword s3, s[0:1], 0x70
+; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x94
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
@@ -329,10 +329,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o
 ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_0:
 ; GFX10_W32:       ; %bb.0:
 ; GFX10_W32-NEXT:    s_clause 0x3
-; GFX10_W32-NEXT:    s_load_dword s4, s[0:1], 0xb8
-; GFX10_W32-NEXT:    s_load_dword s5, s[0:1], 0x94
-; GFX10_W32-NEXT:    s_load_dword s6, s[0:1], 0x70
-; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W32-NEXT:    s_load_dword s4, s[0:1], 0x94
+; GFX10_W32-NEXT:    s_load_dword s5, s[0:1], 0x70
+; GFX10_W32-NEXT:    s_load_dword s6, s[0:1], 0x4c
+; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W32-NEXT:    s_and_b32 s0, 1, s4
@@ -345,10 +345,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o
 ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_0:
 ; GFX10_W64:       ; %bb.0:
 ; GFX10_W64-NEXT:    s_clause 0x3
-; GFX10_W64-NEXT:    s_load_dword s4, s[0:1], 0xb8
-; GFX10_W64-NEXT:    s_load_dword s5, s[0:1], 0x94
-; GFX10_W64-NEXT:    s_load_dword s6, s[0:1], 0x70
-; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W64-NEXT:    s_load_dword s4, s[0:1], 0x94
+; GFX10_W64-NEXT:    s_load_dword s5, s[0:1], 0x70
+; GFX10_W64-NEXT:    s_load_dword s6, s[0:1], 0x4c
+; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W64-NEXT:    s_and_b32 s0, 1, s4
@@ -365,10 +365,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_0(float addrspace(1)* %o
 define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %out, float %a, float %b, float %c, [8 x i32], i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_1:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s2, s[0:1], 0xb
-; GFX7-NEXT:    s_load_dword s3, s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x16
-; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dword s2, s[0:1], 0x2
+; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0xd
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s3
@@ -383,10 +383,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o
 ;
 ; GFX8-LABEL: test_div_fmas_f32_inline_imm_1:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; GFX8-NEXT:    s_load_dword s3, s[0:1], 0x34
-; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x58
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x8
+; GFX8-NEXT:    s_load_dword s3, s[0:1], 0x10
+; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
@@ -402,10 +402,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o
 ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_1:
 ; GFX10_W32:       ; %bb.0:
 ; GFX10_W32-NEXT:    s_clause 0x3
-; GFX10_W32-NEXT:    s_load_dword s4, s[0:1], 0x58
-; GFX10_W32-NEXT:    s_load_dword s5, s[0:1], 0x34
-; GFX10_W32-NEXT:    s_load_dword s6, s[0:1], 0x2c
-; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W32-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX10_W32-NEXT:    s_load_dword s5, s[0:1], 0x10
+; GFX10_W32-NEXT:    s_load_dword s6, s[0:1], 0x8
+; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W32-NEXT:    s_and_b32 s0, 1, s4
@@ -418,10 +418,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o
 ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_1:
 ; GFX10_W64:       ; %bb.0:
 ; GFX10_W64-NEXT:    s_clause 0x3
-; GFX10_W64-NEXT:    s_load_dword s4, s[0:1], 0x58
-; GFX10_W64-NEXT:    s_load_dword s5, s[0:1], 0x34
-; GFX10_W64-NEXT:    s_load_dword s6, s[0:1], 0x2c
-; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W64-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX10_W64-NEXT:    s_load_dword s5, s[0:1], 0x10
+; GFX10_W64-NEXT:    s_load_dword s6, s[0:1], 0x8
+; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W64-NEXT:    s_and_b32 s0, 1, s4
@@ -438,10 +438,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_1(float addrspace(1)* %o
 define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c, [8 x i32], i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f32_inline_imm_2:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s2, s[0:1], 0x13
-; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x1c
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x2e
-; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dword s2, s[0:1], 0xa
+; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x13
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x25
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s3
@@ -456,10 +456,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o
 ;
 ; GFX8-LABEL: test_div_fmas_f32_inline_imm_2:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x4c
-; GFX8-NEXT:    s_load_dword s3, s[0:1], 0x70
-; GFX8-NEXT:    s_load_dword s4, s[0:1], 0xb8
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x28
+; GFX8-NEXT:    s_load_dword s3, s[0:1], 0x4c
+; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x94
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
@@ -475,10 +475,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o
 ; GFX10_W32-LABEL: test_div_fmas_f32_inline_imm_2:
 ; GFX10_W32:       ; %bb.0:
 ; GFX10_W32-NEXT:    s_clause 0x3
-; GFX10_W32-NEXT:    s_load_dword s4, s[0:1], 0xb8
-; GFX10_W32-NEXT:    s_load_dword s5, s[0:1], 0x70
-; GFX10_W32-NEXT:    s_load_dword s6, s[0:1], 0x4c
-; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W32-NEXT:    s_load_dword s4, s[0:1], 0x94
+; GFX10_W32-NEXT:    s_load_dword s5, s[0:1], 0x4c
+; GFX10_W32-NEXT:    s_load_dword s6, s[0:1], 0x28
+; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W32-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W32-NEXT:    s_and_b32 s0, 1, s4
@@ -491,10 +491,10 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o
 ; GFX10_W64-LABEL: test_div_fmas_f32_inline_imm_2:
 ; GFX10_W64:       ; %bb.0:
 ; GFX10_W64-NEXT:    s_clause 0x3
-; GFX10_W64-NEXT:    s_load_dword s4, s[0:1], 0xb8
-; GFX10_W64-NEXT:    s_load_dword s5, s[0:1], 0x70
-; GFX10_W64-NEXT:    s_load_dword s6, s[0:1], 0x4c
-; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W64-NEXT:    s_load_dword s4, s[0:1], 0x94
+; GFX10_W64-NEXT:    s_load_dword s5, s[0:1], 0x4c
+; GFX10_W64-NEXT:    s_load_dword s6, s[0:1], 0x28
+; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W64-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W64-NEXT:    s_and_b32 s0, 1, s4
@@ -511,8 +511,8 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o
 define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f64:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x9
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x11
+; GFX7-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x0
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x8
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX7-NEXT:    v_mov_b32_e32 v2, s8
@@ -531,8 +531,8 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %
 ;
 ; GFX8-LABEL: test_div_fmas_f64:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x24
-; GFX8-NEXT:    s_load_dword s0, s[0:1], 0x44
+; GFX8-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x0
+; GFX8-NEXT:    s_load_dword s0, s[0:1], 0x20
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s8
@@ -552,8 +552,8 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %
 ; GFX10_W32-LABEL: test_div_fmas_f64:
 ; GFX10_W32:       ; %bb.0:
 ; GFX10_W32-NEXT:    s_clause 0x1
-; GFX10_W32-NEXT:    s_load_dword s2, s[0:1], 0x44
-; GFX10_W32-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x24
+; GFX10_W32-NEXT:    s_load_dword s2, s[0:1], 0x20
+; GFX10_W32-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x0
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W32-NEXT:    s_and_b32 s0, 1, s2
 ; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s8
@@ -569,8 +569,8 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %
 ; GFX10_W64-LABEL: test_div_fmas_f64:
 ; GFX10_W64:       ; %bb.0:
 ; GFX10_W64-NEXT:    s_clause 0x1
-; GFX10_W64-NEXT:    s_load_dword s2, s[0:1], 0x44
-; GFX10_W64-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x24
+; GFX10_W64-NEXT:    s_load_dword s2, s[0:1], 0x20
+; GFX10_W64-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x0
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W64-NEXT:    s_and_b32 s0, 1, s2
 ; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s8
@@ -590,8 +590,8 @@ define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %
 define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %out, float %a, float %b, float %c, i32 %i) {
 ; GFX7-LABEL: test_div_fmas_f32_cond_to_vcc:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
-; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    s_cmp_eq_u32 s7, 0
@@ -609,8 +609,8 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou
 ;
 ; GFX8-LABEL: test_div_fmas_f32_cond_to_vcc:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x8
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    s_cmp_eq_u32 s7, 0
 ; GFX8-NEXT:    s_cselect_b32 s2, 1, 0
@@ -629,8 +629,8 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou
 ; GFX10_W32-LABEL: test_div_fmas_f32_cond_to_vcc:
 ; GFX10_W32:       ; %bb.0:
 ; GFX10_W32-NEXT:    s_clause 0x1
-; GFX10_W32-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
-; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W32-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x8
+; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W32-NEXT:    s_cmp_eq_u32 s7, 0
 ; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s5
@@ -646,8 +646,8 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou
 ; GFX10_W64-LABEL: test_div_fmas_f32_cond_to_vcc:
 ; GFX10_W64:       ; %bb.0:
 ; GFX10_W64-NEXT:    s_clause 0x1
-; GFX10_W64-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
-; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W64-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x8
+; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W64-NEXT:    s_cmp_eq_u32 s7, 0
 ; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s5
@@ -668,10 +668,10 @@ define amdgpu_kernel void @test_div_fmas_f32_cond_to_vcc(float addrspace(1)* %ou
 define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) {
 ; GFX7-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s2, s[0:1], 0x13
-; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x1c
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x25
-; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dword s2, s[0:1], 0xa
+; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x13
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x1c
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b64 vcc, 0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s2
@@ -685,11 +685,11 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa
 ;
 ; GFX8-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x4c
-; GFX8-NEXT:    s_load_dword s3, s[0:1], 0x70
-; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x94
+; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x28
+; GFX8-NEXT:    s_load_dword s3, s[0:1], 0x4c
+; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x70
 ; GFX8-NEXT:    s_mov_b64 vcc, 0
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
@@ -703,10 +703,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa
 ; GFX10_W32-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
 ; GFX10_W32:       ; %bb.0:
 ; GFX10_W32-NEXT:    s_clause 0x3
-; GFX10_W32-NEXT:    s_load_dword s4, s[0:1], 0x70
-; GFX10_W32-NEXT:    s_load_dword s5, s[0:1], 0x94
-; GFX10_W32-NEXT:    s_load_dword s6, s[0:1], 0x4c
-; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W32-NEXT:    s_load_dword s4, s[0:1], 0x4c
+; GFX10_W32-NEXT:    s_load_dword s5, s[0:1], 0x70
+; GFX10_W32-NEXT:    s_load_dword s6, s[0:1], 0x28
+; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W32-NEXT:    s_mov_b32 vcc_lo, 0
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s4
@@ -719,10 +719,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa
 ; GFX10_W64-LABEL: test_div_fmas_f32_imm_false_cond_to_vcc:
 ; GFX10_W64:       ; %bb.0:
 ; GFX10_W64-NEXT:    s_clause 0x3
-; GFX10_W64-NEXT:    s_load_dword s4, s[0:1], 0x70
-; GFX10_W64-NEXT:    s_load_dword s5, s[0:1], 0x94
-; GFX10_W64-NEXT:    s_load_dword s6, s[0:1], 0x4c
-; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W64-NEXT:    s_load_dword s4, s[0:1], 0x4c
+; GFX10_W64-NEXT:    s_load_dword s5, s[0:1], 0x70
+; GFX10_W64-NEXT:    s_load_dword s6, s[0:1], 0x28
+; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W64-NEXT:    s_mov_b64 vcc, 0
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s4
@@ -739,10 +739,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_false_cond_to_vcc(float addrspa
 define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspace(1)* %out, [8 x i32], float %a, [8 x i32], float %b, [8 x i32], float %c) {
 ; GFX7-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s2, s[0:1], 0x13
-; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x1c
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x25
-; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dword s2, s[0:1], 0xa
+; GFX7-NEXT:    s_load_dword s3, s[0:1], 0x13
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x1c
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b64 vcc, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s2
@@ -756,11 +756,11 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac
 ;
 ; GFX8-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x4c
-; GFX8-NEXT:    s_load_dword s3, s[0:1], 0x70
-; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x94
+; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x28
+; GFX8-NEXT:    s_load_dword s3, s[0:1], 0x4c
+; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x70
 ; GFX8-NEXT:    s_mov_b64 vcc, -1
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
@@ -774,10 +774,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac
 ; GFX10_W32-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
 ; GFX10_W32:       ; %bb.0:
 ; GFX10_W32-NEXT:    s_clause 0x3
-; GFX10_W32-NEXT:    s_load_dword s4, s[0:1], 0x70
-; GFX10_W32-NEXT:    s_load_dword s5, s[0:1], 0x94
-; GFX10_W32-NEXT:    s_load_dword s6, s[0:1], 0x4c
-; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W32-NEXT:    s_load_dword s4, s[0:1], 0x4c
+; GFX10_W32-NEXT:    s_load_dword s5, s[0:1], 0x70
+; GFX10_W32-NEXT:    s_load_dword s6, s[0:1], 0x28
+; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W32-NEXT:    s_mov_b32 vcc_lo, -1
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W32-NEXT:    v_mov_b32_e32 v0, s4
@@ -790,10 +790,10 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac
 ; GFX10_W64-LABEL: test_div_fmas_f32_imm_true_cond_to_vcc:
 ; GFX10_W64:       ; %bb.0:
 ; GFX10_W64-NEXT:    s_clause 0x3
-; GFX10_W64-NEXT:    s_load_dword s4, s[0:1], 0x70
-; GFX10_W64-NEXT:    s_load_dword s5, s[0:1], 0x94
-; GFX10_W64-NEXT:    s_load_dword s6, s[0:1], 0x4c
-; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W64-NEXT:    s_load_dword s4, s[0:1], 0x4c
+; GFX10_W64-NEXT:    s_load_dword s5, s[0:1], 0x70
+; GFX10_W64-NEXT:    s_load_dword s6, s[0:1], 0x28
+; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W64-NEXT:    s_mov_b64 vcc, -1
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W64-NEXT:    v_mov_b32_e32 v0, s4
@@ -810,8 +810,8 @@ define amdgpu_kernel void @test_div_fmas_f32_imm_true_cond_to_vcc(float addrspac
 define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace(1)* %out, float addrspace(1)* %in, [8 x i32], i32 %d) {
 ; GFX7-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GFX7-NEXT:    s_load_dword s8, s[0:1], 0x15
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x0
+; GFX7-NEXT:    s_load_dword s8, s[0:1], 0xc
 ; GFX7-NEXT:    s_mov_b32 s2, 0
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v2, 0
@@ -838,8 +838,8 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace
 ;
 ; GFX8-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x54
+; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x0
+; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x30
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 2, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s6
@@ -873,9 +873,9 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace
 ;
 ; GFX10_W32-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
 ; GFX10_W32:       ; %bb.0:
-; GFX10_W32-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX10_W32-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x0
 ; GFX10_W32-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX10_W32-NEXT:    s_load_dword s0, s[0:1], 0x54
+; GFX10_W32-NEXT:    s_load_dword s0, s[0:1], 0x30
 ; GFX10_W32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W32-NEXT:    global_load_dword v2, v1, s[6:7] glc dlc
@@ -896,9 +896,9 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace
 ;
 ; GFX10_W64-LABEL: test_div_fmas_f32_logical_cond_to_vcc:
 ; GFX10_W64:       ; %bb.0:
-; GFX10_W64-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX10_W64-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x0
 ; GFX10_W64-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
-; GFX10_W64-NEXT:    s_load_dword s0, s[0:1], 0x54
+; GFX10_W64-NEXT:    s_load_dword s0, s[0:1], 0x30
 ; GFX10_W64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W64-NEXT:    global_load_dword v2, v1, s[6:7] glc dlc
@@ -938,20 +938,20 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace
 define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out, [8 x i32], float addrspace(1)* %in, [8 x i32], i32 addrspace(1)* %dummy) {
 ; GFX7-LABEL: test_div_fmas_f32_i1_phi_vcc:
 ; GFX7:       ; %bb.0: ; %entry
-; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x13
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xa
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    buffer_load_dwordx3 v[1:3], v[1:2], s[4:7], 0 addr64
-; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX7-NEXT:    s_and_saveexec_b64 s[2:3], vcc
 ; GFX7-NEXT:    s_cbranch_execz .LBB13_2
 ; GFX7-NEXT:  ; %bb.1: ; %bb
-; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x1d
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x14
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
@@ -971,7 +971,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
 ;
 ; GFX8-LABEL: test_div_fmas_f32_i1_phi_vcc:
 ; GFX8:       ; %bb.0: ; %entry
-; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
+; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x28
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 2, v0
 ; GFX8-NEXT:    s_mov_b32 s6, 0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
@@ -980,12 +980,12 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
 ; GFX8-NEXT:    v_addc_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX8-NEXT:    flat_load_dwordx3 v[1:3], v[1:2]
-; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX8-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX8-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GFX8-NEXT:    s_cbranch_execz .LBB13_2
 ; GFX8-NEXT:  ; %bb.1: ; %bb
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x74
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x50
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1008,18 +1008,18 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
 ;
 ; GFX10_W32-LABEL: test_div_fmas_f32_i1_phi_vcc:
 ; GFX10_W32:       ; %bb.0: ; %entry
-; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
+; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x28
 ; GFX10_W32-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX10_W32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
 ; GFX10_W32-NEXT:    s_mov_b32 s5, 0
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W32-NEXT:    global_load_dwordx3 v[1:3], v1, s[2:3]
 ; GFX10_W32-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W32-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W32-NEXT:    s_and_saveexec_b32 s4, vcc_lo
 ; GFX10_W32-NEXT:    s_cbranch_execz .LBB13_2
 ; GFX10_W32-NEXT:  ; %bb.1: ; %bb
-; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x74
+; GFX10_W32-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x50
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W32-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX10_W32-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1038,18 +1038,18 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
 ;
 ; GFX10_W64-LABEL: test_div_fmas_f32_i1_phi_vcc:
 ; GFX10_W64:       ; %bb.0: ; %entry
-; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4c
+; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x28
 ; GFX10_W64-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX10_W64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX10_W64-NEXT:    s_mov_b32 s6, 0
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W64-NEXT:    global_load_dwordx3 v[1:3], v1, s[2:3]
 ; GFX10_W64-NEXT:    s_waitcnt_depctr 0xffe3
-; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10_W64-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX10_W64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
 ; GFX10_W64-NEXT:    s_cbranch_execz .LBB13_2
 ; GFX10_W64-NEXT:  ; %bb.1: ; %bb
-; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x74
+; GFX10_W64-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x50
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10_W64-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX10_W64-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
index 2ece383a03247..fa0d86214e65e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
@@ -44,9 +44,9 @@ define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i3
 define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
 ; GFX6-LABEL: bfe_i32_arg_arg_imm:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xc
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x3
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_and_b32 s3, s3, 63
@@ -64,9 +64,9 @@ define amdgpu_kernel void @bfe_i32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0
 define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 {
 ; GFX6-LABEL: bfe_i32_arg_imm_arg:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xc
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x3
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
@@ -84,9 +84,9 @@ define amdgpu_kernel void @bfe_i32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0
 define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 {
 ; GFX6-LABEL: bfe_i32_imm_arg_arg:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0xc
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x3
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_and_b32 s3, s3, 63
@@ -105,10 +105,10 @@ define amdgpu_kernel void @bfe_i32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1
 define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace(1)* %src0) #0 {
 ; GFX6-LABEL: v_bfe_print_arg:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 0x80002
@@ -125,9 +125,9 @@ define amdgpu_kernel void @v_bfe_print_arg(i32 addrspace(1)* %out, i32 addrspace
 define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
 ; GFX6-LABEL: bfe_i32_arg_0_width_reg_offset:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xc
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x3
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_and_b32 s3, s3, 63
@@ -144,8 +144,8 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_reg_offset(i32 addrspace(1)* %out
 define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
 ; GFX6-LABEL: bfe_i32_arg_0_width_imm_offset:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 8
@@ -161,10 +161,10 @@ define amdgpu_kernel void @bfe_i32_arg_0_width_imm_offset(i32 addrspace(1)* %out
 define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_6:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 31
@@ -183,10 +183,10 @@ define amdgpu_kernel void @bfe_i32_test_6(i32 addrspace(1)* %out, i32 addrspace(
 define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_7:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 31
@@ -205,10 +205,10 @@ define amdgpu_kernel void @bfe_i32_test_7(i32 addrspace(1)* %out, i32 addrspace(
 define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_8:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 31
@@ -227,10 +227,10 @@ define amdgpu_kernel void @bfe_i32_test_8(i32 addrspace(1)* %out, i32 addrspace(
 define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_9:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 0x1001f
@@ -247,10 +247,10 @@ define amdgpu_kernel void @bfe_i32_test_9(i32 addrspace(1)* %out, i32 addrspace(
 define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_10:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 0x1f0001
@@ -267,10 +267,10 @@ define amdgpu_kernel void @bfe_i32_test_10(i32 addrspace(1)* %out, i32 addrspace
 define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_11:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 0x180008
@@ -287,10 +287,10 @@ define amdgpu_kernel void @bfe_i32_test_11(i32 addrspace(1)* %out, i32 addrspace
 define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_12:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 0x80018
@@ -307,10 +307,10 @@ define amdgpu_kernel void @bfe_i32_test_12(i32 addrspace(1)* %out, i32 addrspace
 define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_13:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_ashr_i32 s3, s3, 31
@@ -328,10 +328,10 @@ define amdgpu_kernel void @bfe_i32_test_13(i32 addrspace(1)* %out, i32 addrspace
 define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_i32_test_14:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshr_b32 s3, s3, 31
@@ -349,7 +349,7 @@ define amdgpu_kernel void @bfe_i32_test_14(i32 addrspace(1)* %out, i32 addrspace
 define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_0:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_bfe_i32 s2, 0, 0
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -365,7 +365,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_0(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_1:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_bfe_i32 s2, 0x302e, 0
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -381,7 +381,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_1(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_2:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_bfe_i32 s2, 0, 0x10000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -397,7 +397,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_2(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_3:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_bfe_i32 s2, 1, 0x10000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -413,7 +413,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_3(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_4:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_bfe_i32 s2, -1, 0x10000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -429,7 +429,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_4(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_5:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x10007
 ; GFX6-NEXT:    s_bfe_i32 s2, 0x80, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -446,7 +446,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_5(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_6:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x80000
 ; GFX6-NEXT:    s_bfe_i32 s2, 0x80, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -463,7 +463,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_6(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_7:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x80000
 ; GFX6-NEXT:    s_bfe_i32 s2, 0x7f, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -480,7 +480,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_7(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_8:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x80006
 ; GFX6-NEXT:    s_bfe_i32 s2, 0x7f, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -497,7 +497,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_8(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_9:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x80010
 ; GFX6-NEXT:    s_bfe_i32 s2, 0x10000, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -514,7 +514,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_9(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_10:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x100010
 ; GFX6-NEXT:    s_bfe_i32 s2, 0xffff, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -531,7 +531,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_10(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_11:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x40004
 ; GFX6-NEXT:    s_bfe_i32 s2, 0xa0, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -548,7 +548,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_11(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_12:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x1001f
 ; GFX6-NEXT:    s_bfe_i32 s2, 0xa0, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -565,7 +565,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_12(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_13:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x100010
 ; GFX6-NEXT:    s_bfe_i32 s2, 0x1fffe, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -582,7 +582,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_13(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_14:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x1e0002
 ; GFX6-NEXT:    s_bfe_i32 s2, 0xa0, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -599,7 +599,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_14(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_15:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x1c0004
 ; GFX6-NEXT:    s_bfe_i32 s2, 0xa0, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -616,7 +616,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_15(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_16:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_bfe_i32 s2, -1, 0x70001
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -632,7 +632,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_16(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_17:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x1f0001
 ; GFX6-NEXT:    s_bfe_i32 s2, 0xff, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -649,7 +649,7 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_17(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_i32_constant_fold_test_18:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x1001f
 ; GFX6-NEXT:    s_bfe_i32 s2, 0xff, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -666,10 +666,10 @@ define amdgpu_kernel void @bfe_i32_constant_fold_test_18(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_sext_in_reg_i24:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 0x180000
@@ -690,8 +690,8 @@ define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i3
 ; GFX6-LABEL: simplify_demanded_bfe_sdiv:
 ; GFX6:       ; %bb.0:
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, 2.0
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
@@ -731,10 +731,10 @@ define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i3
 define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
 ; GFX6-LABEL: bfe_0_width:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 8
@@ -751,11 +751,11 @@ define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)*
 define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
 ; GFX6-LABEL: bfe_8_bfe_8:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_mov_b32 s4, 0x80000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, s4
@@ -774,10 +774,10 @@ define amdgpu_kernel void @bfe_8_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)*
 define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
 ; GFX6-LABEL: bfe_8_bfe_16:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 0x80000
@@ -797,10 +797,10 @@ define amdgpu_kernel void @bfe_8_bfe_16(i32 addrspace(1)* %out, i32 addrspace(1)
 define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
 ; GFX6-LABEL: bfe_16_bfe_8:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 0x100000
@@ -820,9 +820,9 @@ define amdgpu_kernel void @bfe_16_bfe_8(i32 addrspace(1)* %out, i32 addrspace(1)
 define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
 ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0xc
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x3
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_add_i32 s3, s3, s4
@@ -843,9 +843,9 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe(i32 addrspace(1)* %out, i32
 define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
 ; GFX6-LABEL: sext_in_reg_i8_to_i32_bfe_wrong:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0xc
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x3
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_add_i32 s3, s3, s4
@@ -866,12 +866,12 @@ define amdgpu_kernel void @sext_in_reg_i8_to_i32_bfe_wrong(i32 addrspace(1)* %ou
 define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 {
 ; GFX6-LABEL: sextload_i8_to_i32_bfe:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
 ; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    buffer_load_sbyte v0, off, s[4:7], 0
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
 ; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
@@ -890,12 +890,12 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe(i32 addrspace(1)* %out, i8 add
 define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 addrspace(1)* %ptr) #0 {
 ; GFX6-LABEL: sextload_i8_to_i32_bfe_0:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
 ; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    buffer_load_sbyte v0, off, s[4:7], 0
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_bfe_i32 v0, v0, 8, 0
 ; GFX6-NEXT:    v_bfe_i32 v0, v0, 0, 8
@@ -914,10 +914,10 @@ define amdgpu_kernel void @sextload_i8_to_i32_bfe_0(i32 addrspace(1)* %out, i8 a
 define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_0:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 0x10000
@@ -937,10 +937,10 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_0(i32 addrspace(1)* %out, i
 define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: sext_in_reg_i1_bfe_offset_1:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 0x20000
@@ -960,10 +960,10 @@ define amdgpu_kernel void @sext_in_reg_i1_bfe_offset_1(i32 addrspace(1)* %out, i
 define amdgpu_kernel void @sext_in_reg_i2_bfe_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: sext_in_reg_i2_bfe_offset_1:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 0x20000
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
index 821d0fdd435c3..782d0b970a4ea 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
@@ -44,9 +44,9 @@ define amdgpu_ps i64 @s_bfe_i64_arg_arg_arg(i64 inreg %src0, i32 inreg %src1, i3
 define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #0 {
 ; GFX6-LABEL: bfe_u32_arg_arg_arg:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s2, s[0:1], 0xc
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0xb
+; GFX6-NEXT:    s_load_dword s2, s[0:1], 0x3
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
@@ -65,9 +65,9 @@ define amdgpu_kernel void @bfe_u32_arg_arg_arg(i32 addrspace(1)* %out, i32 %src0
 define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
 ; GFX6-LABEL: bfe_u32_arg_arg_imm:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xc
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x3
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_and_b32 s3, s3, 63
@@ -85,9 +85,9 @@ define amdgpu_kernel void @bfe_u32_arg_arg_imm(i32 addrspace(1)* %out, i32 %src0
 define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #0 {
 ; GFX6-LABEL: bfe_u32_arg_imm_arg:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xc
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x3
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
@@ -105,9 +105,9 @@ define amdgpu_kernel void @bfe_u32_arg_imm_arg(i32 addrspace(1)* %out, i32 %src0
 define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1, i32 %src2) #0 {
 ; GFX6-LABEL: bfe_u32_imm_arg_arg:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0xc
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x3
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_and_b32 s3, s3, 63
@@ -126,9 +126,9 @@ define amdgpu_kernel void @bfe_u32_imm_arg_arg(i32 addrspace(1)* %out, i32 %src1
 define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
 ; GFX6-LABEL: bfe_u32_arg_0_width_reg_offset:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xc
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x3
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_and_b32 s3, s3, 63
@@ -145,8 +145,8 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_reg_offset(i32 addrspace(1)* %out
 define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #0 {
 ; GFX6-LABEL: bfe_u32_arg_0_width_imm_offset:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_u32 s3, s3, 8
@@ -162,12 +162,12 @@ define amdgpu_kernel void @bfe_u32_arg_0_width_imm_offset(i32 addrspace(1)* %out
 define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_zextload_i8:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
 ; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    buffer_load_ubyte v0, off, s[4:7], 0
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    v_bfe_u32 v0, v0, 0, 8
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
@@ -184,10 +184,10 @@ define amdgpu_kernel void @bfe_u32_zextload_i8(i32 addrspace(1)* %out, i8 addrsp
 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_add_i32 s3, s3, 1
@@ -208,10 +208,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8(i32 addrspace(1)* %out, i32 ad
 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i16:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_add_i32 s3, s3, 1
@@ -232,10 +232,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16(i32 addrspace(1)* %out, i32 a
 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_1:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_add_i32 s3, s3, 1
@@ -256,10 +256,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_1(i32 addrspace(1)* %ou
 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_3:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_add_i32 s3, s3, 1
@@ -280,10 +280,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_3(i32 addrspace(1)* %ou
 define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i8_offset_7:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_add_i32 s3, s3, 1
@@ -304,10 +304,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i8_offset_7(i32 addrspace(1)* %ou
 define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_zext_in_reg_i16_offset_8:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_add_i32 s3, s3, 1
@@ -328,10 +328,10 @@ define amdgpu_kernel void @bfe_u32_zext_in_reg_i16_offset_8(i32 addrspace(1)* %o
 define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_1:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x10000
@@ -348,10 +348,10 @@ define amdgpu_kernel void @bfe_u32_test_1(i32 addrspace(1)* %out, i32 addrspace(
 define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_2:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 31
@@ -370,10 +370,10 @@ define amdgpu_kernel void @bfe_u32_test_2(i32 addrspace(1)* %out, i32 addrspace(
 define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_3:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 31
@@ -392,10 +392,10 @@ define amdgpu_kernel void @bfe_u32_test_3(i32 addrspace(1)* %out, i32 addrspace(
 define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_4:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x10000
@@ -415,10 +415,10 @@ define amdgpu_kernel void @bfe_u32_test_4(i32 addrspace(1)* %out, i32 addrspace(
 define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_5:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_i32 s3, s3, 0x10000
@@ -438,10 +438,10 @@ define amdgpu_kernel void @bfe_u32_test_5(i32 addrspace(1)* %out, i32 addrspace(
 define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_6:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 31
@@ -460,10 +460,10 @@ define amdgpu_kernel void @bfe_u32_test_6(i32 addrspace(1)* %out, i32 addrspace(
 define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_7:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 31
@@ -482,10 +482,10 @@ define amdgpu_kernel void @bfe_u32_test_7(i32 addrspace(1)* %out, i32 addrspace(
 define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_8:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 31
@@ -504,10 +504,10 @@ define amdgpu_kernel void @bfe_u32_test_8(i32 addrspace(1)* %out, i32 addrspace(
 define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_9:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x1001f
@@ -524,10 +524,10 @@ define amdgpu_kernel void @bfe_u32_test_9(i32 addrspace(1)* %out, i32 addrspace(
 define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_10:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x1f0001
@@ -544,10 +544,10 @@ define amdgpu_kernel void @bfe_u32_test_10(i32 addrspace(1)* %out, i32 addrspace
 define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_11:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x180008
@@ -564,10 +564,10 @@ define amdgpu_kernel void @bfe_u32_test_11(i32 addrspace(1)* %out, i32 addrspace
 define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_12:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x80018
@@ -585,10 +585,10 @@ define amdgpu_kernel void @bfe_u32_test_12(i32 addrspace(1)* %out, i32 addrspace
 define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_13:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_ashr_i32 s3, s3, 31
@@ -606,10 +606,10 @@ define amdgpu_kernel void @bfe_u32_test_13(i32 addrspace(1)* %out, i32 addrspace
 define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
 ; GFX6-LABEL: bfe_u32_test_14:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s3, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshr_b32 s3, s3, 31
@@ -627,7 +627,7 @@ define amdgpu_kernel void @bfe_u32_test_14(i32 addrspace(1)* %out, i32 addrspace
 define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_0:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_bfe_u32 s2, 0, 0
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -643,7 +643,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_0(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_1:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_bfe_u32 s2, 0x302e, 0
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -659,7 +659,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_1(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_2:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_bfe_u32 s2, 0, 0x10000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -675,7 +675,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_2(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_3:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_bfe_u32 s2, 1, 0x10000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -691,7 +691,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_3(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_4:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_bfe_u32 s2, -1, 0x10000
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -707,7 +707,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_4(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_5:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x10007
 ; GFX6-NEXT:    s_bfe_u32 s2, 0x80, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -724,7 +724,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_5(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_6:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x80000
 ; GFX6-NEXT:    s_bfe_u32 s2, 0x80, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -741,7 +741,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_6(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_7:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x80000
 ; GFX6-NEXT:    s_bfe_u32 s2, 0x7f, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -758,7 +758,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_7(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_8:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x80006
 ; GFX6-NEXT:    s_bfe_u32 s2, 0x7f, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -775,7 +775,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_8(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_9:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x80010
 ; GFX6-NEXT:    s_bfe_u32 s2, 0x10000, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -792,7 +792,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_9(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_10:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x100010
 ; GFX6-NEXT:    s_bfe_u32 s2, 0xffff, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -809,7 +809,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_10(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_11:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x40004
 ; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -826,7 +826,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_11(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_12:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x1001f
 ; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -843,7 +843,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_12(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_13:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x100010
 ; GFX6-NEXT:    s_bfe_u32 s2, 0x1fffe, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -860,7 +860,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_13(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_14:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x1e0002
 ; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -877,7 +877,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_14(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_15:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x1c0004
 ; GFX6-NEXT:    s_bfe_u32 s2, 0xa0, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -894,7 +894,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_15(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_16:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_bfe_u32 s2, -1, 0x70001
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -910,7 +910,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_16(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_17:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x1f0001
 ; GFX6-NEXT:    s_bfe_u32 s2, 0xff, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -927,7 +927,7 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_17(i32 addrspace(1)* %out)
 define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out) #0 {
 ; GFX6-LABEL: bfe_u32_constant_fold_test_18:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, 0x1001f
 ; GFX6-NEXT:    s_bfe_u32 s2, 0xff, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s2
@@ -948,13 +948,13 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out)
 define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
 ; GFX6-LABEL: simplify_bfe_u32_multi_use_arg:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x4
 ; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_load_dword s8, s[2:3], 0x0
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; GFX6-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_and_b32 s8, s8, 63
@@ -977,8 +977,8 @@ define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out
 define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 {
 ; GFX6-LABEL: lshr_and:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x30006
@@ -995,9 +995,9 @@ define amdgpu_kernel void @lshr_and(i32 addrspace(1)* %out, i32 %a) #0 {
 define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
 ; GFX6-LABEL: v_lshr_and:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0xc
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x3
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_lshr_b32 s3, s3, s4
@@ -1015,8 +1015,8 @@ define amdgpu_kernel void @v_lshr_and(i32 addrspace(1)* %out, i32 %a, i32 %b) #0
 define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
 ; GFX6-LABEL: and_lshr:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x30006
@@ -1033,8 +1033,8 @@ define amdgpu_kernel void @and_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
 define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 {
 ; GFX6-LABEL: and_lshr2:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x30006
@@ -1051,8 +1051,8 @@ define amdgpu_kernel void @and_lshr2(i32 addrspace(1)* %out, i32 %a) #0 {
 define amdgpu_kernel void @shl_lshr(i32 addrspace(1)* %out, i32 %a) #0 {
 ; GFX6-LABEL: shl_lshr:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x2
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_bfe_u32 s3, s3, 0x150002
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
index 34db791ac9b5c..7e5ecaac8d9c7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
@@ -213,7 +213,7 @@ define i64 @v_shl_i64_sext_i32_overflow(i32 %x) {
 define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) {
 ; GFX7-LABEL: mulu24_shl64:
 ; GFX7:       ; %bb.0: ; %bb
-; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX7-NEXT:    v_and_b32_e32 v0, 6, v0
 ; GFX7-NEXT:    v_mul_u32_u24_e32 v0, 7, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v1, 0
@@ -226,7 +226,7 @@ define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) {
 ;
 ; GFX8-LABEL: mulu24_shl64:
 ; GFX8:       ; %bb.0: ; %bb
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX8-NEXT:    v_and_b32_e32 v0, 6, v0
 ; GFX8-NEXT:    v_mul_u32_u24_e32 v0, 7, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0
@@ -241,7 +241,7 @@ define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) {
 ;
 ; GFX9-LABEL: mulu24_shl64:
 ; GFX9:       ; %bb.0: ; %bb
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    v_and_b32_e32 v0, 6, v0
 ; GFX9-NEXT:    v_mul_u32_u24_e32 v0, 7, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
@@ -256,7 +256,7 @@ define amdgpu_kernel void @mulu24_shl64(i32 addrspace(1)* nocapture %arg) {
 ;
 ; GFX10-LABEL: mulu24_shl64:
 ; GFX10:       ; %bb.0: ; %bb
-; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX10-NEXT:    v_and_b32_e32 v0, 6, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX10-NEXT:    v_mul_u32_u24_e32 v0, 7, v0
@@ -281,7 +281,7 @@ bb:
 define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 addrspace(1)* nocapture readonly %arg1) {
 ; GFX7-LABEL: muli24_shl64:
 ; GFX7:       ; %bb.0: ; %bb
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; GFX7-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX7-NEXT:    s_mov_b32 s6, 0
@@ -303,7 +303,7 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad
 ;
 ; GFX8-LABEL: muli24_shl64:
 ; GFX8:       ; %bb.0: ; %bb
-; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v3, 2, v0
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v5, 3, v0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
@@ -326,7 +326,7 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad
 ;
 ; GFX9-LABEL: muli24_shl64:
 ; GFX9:       ; %bb.0: ; %bb
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
@@ -341,7 +341,7 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad
 ;
 ; GFX10-LABEL: muli24_shl64:
 ; GFX10:       ; %bb.0: ; %bb
-; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
index 244325b25302a..b662b219abdb4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll
@@ -9,8 +9,8 @@
 define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s5
@@ -22,8 +22,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3
 ;
 ; GFX7-LABEL: store_lds_v4i32:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -37,8 +37,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3
 ; GFX10-LABEL: store_lds_v4i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s5
@@ -54,8 +54,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3
 define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align1:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_bfe_u32 s0, 8, 0x100000
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_bfe_u32 s3, s4, 0x100000
@@ -111,8 +111,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v4i32_align1:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 s1, 0x80008
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
@@ -166,8 +166,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v4i32_align1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_bfe_u32 s0, 8, 0x100000
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_bfe_u32 s3, s4, 0x100000
@@ -227,8 +227,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align2:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_lshr_b32 s0, s4, 16
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
@@ -255,8 +255,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v4i32_align2:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    s_lshr_b32 s1, s4, 16
@@ -285,8 +285,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v4i32_align2:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
@@ -317,8 +317,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align4:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s2
@@ -331,8 +331,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v4i32_align4:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -347,8 +347,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v4i32_align4:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
@@ -365,8 +365,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align8:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s5
@@ -378,8 +378,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v4i32_align8:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -393,8 +393,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v4i32_align8:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s5
@@ -410,8 +410,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align16:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s5
@@ -423,8 +423,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v4i32_align16:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -438,8 +438,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v4i32_align16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s5
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
index 7246858843ac7..d19684bcbff9f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll
@@ -9,8 +9,8 @@
 define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s12
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s13
@@ -21,8 +21,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3
 ;
 ; GFX7-LABEL: store_lds_v3i32:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s12
@@ -35,8 +35,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3
 ; GFX10-LABEL: store_lds_v3i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s12
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s13
@@ -51,8 +51,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3
 define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align1:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_bfe_u32 s0, 8, 0x100000
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_bfe_u32 s3, s4, 0x100000
@@ -96,8 +96,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v3i32_align1:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 s1, 0x80008
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
@@ -140,8 +140,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v3i32_align1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_bfe_u32 s0, 8, 0x100000
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_lshr_b32 s1, s4, 16
@@ -189,8 +189,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align2:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_lshr_b32 s0, s4, 16
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
@@ -212,8 +212,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v3i32_align2:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    s_lshr_b32 s1, s4, 16
@@ -237,8 +237,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v3i32_align2:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
@@ -264,8 +264,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align4:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s2
@@ -277,8 +277,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v3i32_align4:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -292,8 +292,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v3i32_align4:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
@@ -309,8 +309,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align8:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s2
@@ -322,8 +322,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v3i32_align8:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -337,8 +337,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v3i32_align8:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
@@ -354,8 +354,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align16:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s12
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s13
@@ -366,8 +366,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v3i32_align16:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s12
@@ -380,8 +380,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v3i32_align16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s12
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s13
diff --git a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
index bd173aaed0a07..41d33f96fc218 100644
--- a/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds-sub-offset.ll
@@ -44,7 +44,7 @@ entry:
 define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.val) #0 {
 ; CI-LABEL: write_ds_sub0_offset0_global_clamp_bit:
 ; CI:       ; %bb.0: ; %entry
-; CI-NEXT:    s_load_dword s0, s[0:1], 0x9
+; CI-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0, v0
 ; CI-NEXT:    s_mov_b64 vcc, 0
@@ -64,7 +64,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v
 ;
 ; GFX9-LABEL: write_ds_sub0_offset0_global_clamp_bit:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX9-NEXT:    s_mov_b64 vcc, 0
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v3, 0, v0
@@ -81,7 +81,7 @@ define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.v
 ;
 ; GFX10-LABEL: write_ds_sub0_offset0_global_clamp_bit:
 ; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-NEXT:    s_mov_b32 vcc_lo, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v3, 0x7b
@@ -291,7 +291,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 {
 define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit(float %dummy.val) #1 {
 ; CI-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dword s0, s[0:1], 0x9
+; CI-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CI-NEXT:    v_sub_i32_e32 v0, vcc, 0x3fb, v0
 ; CI-NEXT:    s_mov_b64 vcc, 0
@@ -312,7 +312,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_
 ;
 ; GFX9-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX9-NEXT:    s_mov_b64 vcc, 0
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    v_sub_u32_e32 v3, 0x3fb, v0
@@ -330,7 +330,7 @@ define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_
 ;
 ; GFX10-LABEL: add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-NEXT:    s_mov_b32 vcc_lo, 0
 ; GFX10-NEXT:    v_mov_b32_e32 v3, 0x7b
diff --git a/llvm/test/CodeGen/AMDGPU/ds_read2.ll b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
index 260c6385c4781..c44c597775966 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_read2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_read2.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 {
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read2_b32 v[1:2], v0 offset1:8
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -28,7 +28,7 @@ define amdgpu_kernel void @simple_read2_f32(float addrspace(1)* %out) #0 {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; GFX9-NEXT:    ds_read2_b32 v[0:1], v2 offset1:8
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    global_store_dword v2, v0, s[0:1]
@@ -51,7 +51,7 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out)
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read2_b32 v[1:2], v0 offset1:255
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -64,7 +64,7 @@ define amdgpu_kernel void @simple_read2_f32_max_offset(float addrspace(1)* %out)
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; GFX9-NEXT:    ds_read2_b32 v[0:1], v2 offset1:255
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    global_store_dword v2, v0, s[0:1]
@@ -88,7 +88,7 @@ define amdgpu_kernel void @simple_read2_f32_too_far(float addrspace(1)* %out) #0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read_b32 v1, v0
 ; CI-NEXT:    ds_read_b32 v2, v0 offset:1028
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -102,7 +102,7 @@ define amdgpu_kernel void @simple_read2_f32_too_far(float addrspace(1)* %out) #0
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    ds_read_b32 v1, v0
 ; GFX9-NEXT:    ds_read_b32 v2, v0 offset:1028
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
@@ -126,7 +126,7 @@ define amdgpu_kernel void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read2_b32 v[1:2], v0 offset1:8
 ; CI-NEXT:    ds_read2_b32 v[3:4], v0 offset0:11 offset1:27
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -142,7 +142,7 @@ define amdgpu_kernel void @simple_read2_f32_x2(float addrspace(1)* %out) #0 {
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
 ; GFX9-NEXT:    ds_read2_b32 v[0:1], v4 offset1:8
 ; GFX9-NEXT:    ds_read2_b32 v[2:3], v4 offset0:11 offset1:27
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_add_f32_e32 v1, v2, v3
@@ -184,7 +184,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(float addrspace(1)* %out)
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_barrier
 ; CI-NEXT:    ds_read2_b32 v[3:4], v0 offset0:11 offset1:27
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    v_add_f32_e32 v1, v1, v2
 ; CI-NEXT:    s_mov_b32 s2, 0
@@ -202,7 +202,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_barrier(float addrspace(1)* %out)
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_barrier
 ; GFX9-NEXT:    ds_read2_b32 v[2:3], v4 offset0:11 offset1:27
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v1, v2, v3
@@ -245,7 +245,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)*
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read2_b32 v[1:2], v0 offset0:2 offset1:8
 ; CI-NEXT:    ds_read2_b32 v[3:4], v0 offset0:11 offset1:27
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -261,7 +261,7 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)*
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
 ; GFX9-NEXT:    ds_read2_b32 v[0:1], v4 offset0:2 offset1:8
 ; GFX9-NEXT:    ds_read2_b32 v[2:3], v4 offset0:11 offset1:27
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_add_f32_e32 v1, v2, v3
@@ -301,9 +301,9 @@ define amdgpu_kernel void @simple_read2_f32_x2_nonzero_base(float addrspace(1)*
 define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
 ; CI-LABEL: read2_ptr_is_subreg_arg_f32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_mov_b32_e32 v1, s2
@@ -320,9 +320,9 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out,
 ;
 ; GFX9-LABEL: read2_ptr_is_subreg_arg_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s3
@@ -354,9 +354,9 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_f32(float addrspace(1)* %out,
 define amdgpu_kernel void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)* %out, <2 x float addrspace(3)*> %lds.ptr) #0 {
 ; CI-LABEL: read2_ptr_is_subreg_arg_offset_f32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_mov_b32_e32 v1, s2
@@ -373,9 +373,9 @@ define amdgpu_kernel void @read2_ptr_is_subreg_arg_offset_f32(float addrspace(1)
 ;
 ; GFX9-LABEL: read2_ptr_is_subreg_arg_offset_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s3
@@ -410,7 +410,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read2_b32 v[1:2], v0 offset1:8
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -423,7 +423,7 @@ define amdgpu_kernel void @read2_ptr_is_subreg_f32(float addrspace(1)* %out) #0
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; GFX9-NEXT:    ds_read2_b32 v[0:1], v2 offset1:8
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    global_store_dword v2, v0, s[0:1]
@@ -453,7 +453,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_0(float addrspace(1)* %out)
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read_b32 v1, v0
 ; CI-NEXT:    ds_read_b32 v2, v0 offset:32
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -467,7 +467,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_0(float addrspace(1)* %out)
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    ds_read_b32 v1, v0
 ; GFX9-NEXT:    ds_read_b32 v2, v0 offset:32
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
@@ -491,7 +491,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out)
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read_b32 v1, v0
 ; CI-NEXT:    ds_read_b32 v2, v0 offset:32
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -505,7 +505,7 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out)
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    ds_read_b32 v1, v0
 ; GFX9-NEXT:    ds_read_b32 v2, v0 offset:32
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
@@ -526,10 +526,10 @@ define amdgpu_kernel void @simple_read2_f32_volatile_1(float addrspace(1)* %out)
 define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
 ; CI-LABEL: unaligned_read2_f32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dword s2, s[0:1], 0xb
+; CI-NEXT:    s_load_dword s2, s[0:1], 0x2
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_add_i32_e32 v1, vcc, s2, v0
@@ -565,8 +565,8 @@ define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float a
 ;
 ; GFX9-ALIGNED-LABEL: unaligned_read2_f32:
 ; GFX9-ALIGNED:       ; %bb.0:
-; GFX9-ALIGNED-NEXT:    s_load_dword s4, s[0:1], 0x2c
-; GFX9-ALIGNED-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-ALIGNED-NEXT:    s_load_dword s4, s[0:1], 0x8
+; GFX9-ALIGNED-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX9-ALIGNED-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-ALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-ALIGNED-NEXT:    v_add_u32_e32 v1, s4, v0
@@ -594,9 +594,9 @@ define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float a
 ;
 ; GFX9-UNALIGNED-LABEL: unaligned_read2_f32:
 ; GFX9-UNALIGNED:       ; %bb.0:
-; GFX9-UNALIGNED-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GFX9-UNALIGNED-NEXT:    s_load_dword s2, s[0:1], 0x8
 ; GFX9-UNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-UNALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-UNALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-UNALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-UNALIGNED-NEXT:    v_add_u32_e32 v0, s2, v2
 ; GFX9-UNALIGNED-NEXT:    ds_read2_b32 v[0:1], v0 offset1:8
@@ -619,10 +619,10 @@ define amdgpu_kernel void @unaligned_read2_f32(float addrspace(1)* %out, float a
 define amdgpu_kernel void @unaligned_offset_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
 ; CI-LABEL: unaligned_offset_read2_f32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dword s2, s[0:1], 0xb
+; CI-NEXT:    s_load_dword s2, s[0:1], 0x2
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_add_i32_e32 v1, vcc, s2, v0
@@ -658,8 +658,8 @@ define amdgpu_kernel void @unaligned_offset_read2_f32(float addrspace(1)* %out,
 ;
 ; GFX9-ALIGNED-LABEL: unaligned_offset_read2_f32:
 ; GFX9-ALIGNED:       ; %bb.0:
-; GFX9-ALIGNED-NEXT:    s_load_dword s4, s[0:1], 0x2c
-; GFX9-ALIGNED-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-ALIGNED-NEXT:    s_load_dword s4, s[0:1], 0x8
+; GFX9-ALIGNED-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX9-ALIGNED-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-ALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-ALIGNED-NEXT:    v_add_u32_e32 v1, s4, v0
@@ -687,9 +687,9 @@ define amdgpu_kernel void @unaligned_offset_read2_f32(float addrspace(1)* %out,
 ;
 ; GFX9-UNALIGNED-LABEL: unaligned_offset_read2_f32:
 ; GFX9-UNALIGNED:       ; %bb.0:
-; GFX9-UNALIGNED-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GFX9-UNALIGNED-NEXT:    s_load_dword s2, s[0:1], 0x8
 ; GFX9-UNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-UNALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-UNALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-UNALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-UNALIGNED-NEXT:    v_add3_u32 v0, s2, v2, 5
 ; GFX9-UNALIGNED-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
@@ -715,10 +715,10 @@ define amdgpu_kernel void @unaligned_offset_read2_f32(float addrspace(1)* %out,
 define amdgpu_kernel void @misaligned_2_simple_read2_f32(float addrspace(1)* %out, float addrspace(3)* %lds) #0 {
 ; CI-LABEL: misaligned_2_simple_read2_f32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dword s2, s[0:1], 0xb
+; CI-NEXT:    s_load_dword s2, s[0:1], 0x2
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_add_i32_e32 v1, vcc, s2, v0
@@ -741,9 +741,9 @@ define amdgpu_kernel void @misaligned_2_simple_read2_f32(float addrspace(1)* %ou
 ;
 ; GFX9-ALIGNED-LABEL: misaligned_2_simple_read2_f32:
 ; GFX9-ALIGNED:       ; %bb.0:
-; GFX9-ALIGNED-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GFX9-ALIGNED-NEXT:    s_load_dword s2, s[0:1], 0x8
 ; GFX9-ALIGNED-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-ALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-ALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-ALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-ALIGNED-NEXT:    v_add_u32_e32 v1, s2, v0
 ; GFX9-ALIGNED-NEXT:    ds_read_u16 v2, v1
@@ -760,9 +760,9 @@ define amdgpu_kernel void @misaligned_2_simple_read2_f32(float addrspace(1)* %ou
 ;
 ; GFX9-UNALIGNED-LABEL: misaligned_2_simple_read2_f32:
 ; GFX9-UNALIGNED:       ; %bb.0:
-; GFX9-UNALIGNED-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GFX9-UNALIGNED-NEXT:    s_load_dword s2, s[0:1], 0x8
 ; GFX9-UNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-UNALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-UNALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-UNALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-UNALIGNED-NEXT:    v_add_u32_e32 v0, s2, v2
 ; GFX9-UNALIGNED-NEXT:    ds_read2_b32 v[0:1], v0 offset1:8
@@ -788,7 +788,7 @@ define amdgpu_kernel void @simple_read2_f64(double addrspace(1)* %out) #0 {
 ; CI-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read2_b64 v[0:3], v4 offset1:8
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_mov_b32_e32 v5, 0
@@ -801,7 +801,7 @@ define amdgpu_kernel void @simple_read2_f64(double addrspace(1)* %out) #0 {
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; GFX9-NEXT:    ds_read2_b64 v[0:3], v4 offset1:8
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -824,7 +824,7 @@ define amdgpu_kernel void @simple_read2_f64_max_offset(double addrspace(1)* %out
 ; CI-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read2_b64 v[0:3], v4 offset1:255
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_mov_b32_e32 v5, 0
@@ -837,7 +837,7 @@ define amdgpu_kernel void @simple_read2_f64_max_offset(double addrspace(1)* %out
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; GFX9-NEXT:    ds_read2_b64 v[0:3], v4 offset1:255
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -861,7 +861,7 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) #
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read_b64 v[1:2], v0
 ; CI-NEXT:    ds_read_b64 v[3:4], v0 offset:2056
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -875,7 +875,7 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) #
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; GFX9-NEXT:    ds_read_b64 v[0:1], v4
 ; GFX9-NEXT:    ds_read_b64 v[2:3], v4 offset:2056
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f64 v[0:1], v[0:1], v[2:3]
 ; GFX9-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -896,10 +896,10 @@ define amdgpu_kernel void @simple_read2_f64_too_far(double addrspace(1)* %out) #
 define amdgpu_kernel void @misaligned_read2_f64(double addrspace(1)* %out, double addrspace(3)* %lds) #0 {
 ; CI-LABEL: misaligned_read2_f64:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dword s2, s[0:1], 0xb
+; CI-NEXT:    s_load_dword s2, s[0:1], 0x2
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_add_i32_e32 v3, vcc, s2, v0
@@ -914,9 +914,9 @@ define amdgpu_kernel void @misaligned_read2_f64(double addrspace(1)* %out, doubl
 ;
 ; GFX9-LABEL: misaligned_read2_f64:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_u32_e32 v2, s2, v4
 ; GFX9-NEXT:    ds_read2_b32 v[0:1], v2 offset1:1
@@ -945,7 +945,7 @@ define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out
 ; CI-NEXT:    v_mov_b32_e32 v0, 0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read_b64 v[0:1], v0
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, -1
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -957,7 +957,7 @@ define amdgpu_kernel void @load_constant_adjacent_offsets(i32 addrspace(1)* %out
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    ds_read_b64 v[0:1], v2
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
 ; GFX9-NEXT:    global_store_dword v2, v0, s[0:1]
@@ -975,7 +975,7 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out
 ; CI-NEXT:    v_mov_b32_e32 v0, 0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read2_b32 v[0:1], v0 offset1:2
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, -1
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -987,7 +987,7 @@ define amdgpu_kernel void @load_constant_disjoint_offsets(i32 addrspace(1)* %out
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    ds_read2_b32 v[0:1], v2 offset1:2
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
 ; GFX9-NEXT:    global_store_dword v2, v0, s[0:1]
@@ -1007,7 +1007,7 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)*
 ; CI-NEXT:    v_mov_b32_e32 v0, 0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read_b128 v[0:3], v0
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, -1
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1020,7 +1020,7 @@ define amdgpu_kernel void @load_misaligned64_constant_offsets(i64 addrspace(1)*
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    ds_read_b128 v[0:3], v4
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
@@ -1042,7 +1042,7 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspac
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    ds_read_b64 v[0:1], v2 offset:16384
 ; CI-NEXT:    ds_read_b64 v[2:3], v2 offset:32760
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, -1
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1056,7 +1056,7 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspac
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    ds_read_b64 v[0:1], v4 offset:16384
 ; GFX9-NEXT:    ds_read_b64 v[2:3], v4 offset:32760
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
@@ -1075,7 +1075,7 @@ define amdgpu_kernel void @load_misaligned64_constant_large_offsets(i64 addrspac
 define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb) #0 {
 ; CI-LABEL: sgemm_inner_loop_read2_sequence:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; CI-NEXT:    s_lshl_b32 s0, s2, 2
 ; CI-NEXT:    s_add_i32 s1, s0, 0xc20
 ; CI-NEXT:    s_addk_i32 s0, 0xc60
@@ -1123,7 +1123,7 @@ define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v3
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(2)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v4
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v5
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v6
@@ -1179,8 +1179,8 @@ define amdgpu_kernel void @sgemm_inner_loop_read2_sequence(float addrspace(1)* %
 define amdgpu_kernel void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(3)* %in) #0 {
 ; CI-LABEL: misaligned_read2_v2i32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dword s2, s[0:1], 0xb
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dword s2, s[0:1], 0x2
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1193,8 +1193,8 @@ define amdgpu_kernel void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out,
 ;
 ; GFX9-LABEL: misaligned_read2_v2i32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x8
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
@@ -1210,8 +1210,8 @@ define amdgpu_kernel void @misaligned_read2_v2i32(<2 x i32> addrspace(1)* %out,
 define amdgpu_kernel void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addrspace(3)* %in) #0 {
 ; CI-LABEL: misaligned_read2_i64:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dword s2, s[0:1], 0xb
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dword s2, s[0:1], 0x2
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1224,8 +1224,8 @@ define amdgpu_kernel void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addr
 ;
 ; GFX9-LABEL: misaligned_read2_i64:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x8
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
@@ -1241,8 +1241,8 @@ define amdgpu_kernel void @misaligned_read2_i64(i64 addrspace(1)* %out, i64 addr
 define amdgpu_kernel void @ds_read_diff_base_interleaving(
 ; CI-LABEL: ds_read_diff_base_interleaving:
 ; CI:       ; %bb.0: ; %bb
-; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    v_lshlrev_b32_e32 v1, 4, v1
 ; CI-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
@@ -1272,10 +1272,10 @@ define amdgpu_kernel void @ds_read_diff_base_interleaving(
 ;
 ; GFX9-LABEL: ds_read_diff_base_interleaving:
 ; GFX9:       ; %bb.0: ; %bb
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 4, v1
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_add_u32_e32 v2, s4, v1
@@ -1348,10 +1348,10 @@ define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspa
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_add_u32 s40, s40, s11
 ; CI-NEXT:    s_mov_b64 s[10:11], s[6:7]
-; CI-NEXT:    s_load_dwordx2 s[36:37], s[4:5], 0x9
-; CI-NEXT:    s_load_dword s6, s[4:5], 0xb
+; CI-NEXT:    s_load_dwordx2 s[36:37], s[4:5], 0x0
+; CI-NEXT:    s_load_dword s6, s[4:5], 0x2
 ; CI-NEXT:    s_addc_u32 s41, s41, 0
-; CI-NEXT:    s_add_u32 s8, s4, 48
+; CI-NEXT:    s_add_u32 s8, s4, 12
 ; CI-NEXT:    s_addc_u32 s9, s5, 0
 ; CI-NEXT:    s_getpc_b64 s[4:5]
 ; CI-NEXT:    s_add_u32 s4, s4, void_func_void@gotpcrel32@lo+4
@@ -1393,9 +1393,9 @@ define amdgpu_kernel void @ds_read_call_read(i32 addrspace(1)* %out, i32 addrspa
 ; GFX9-NEXT:    s_add_u32 s36, s36, s11
 ; GFX9-NEXT:    s_addc_u32 s37, s37, 0
 ; GFX9-NEXT:    s_mov_b64 s[10:11], s[6:7]
-; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x2c
-; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[4:5], 0x24
-; GFX9-NEXT:    s_add_u32 s8, s4, 48
+; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x8
+; GFX9-NEXT:    s_load_dwordx2 s[34:35], s[4:5], 0x0
+; GFX9-NEXT:    s_add_u32 s8, s4, 12
 ; GFX9-NEXT:    s_addc_u32 s9, s5, 0
 ; GFX9-NEXT:    s_getpc_b64 s[4:5]
 ; GFX9-NEXT:    s_add_u32 s4, s4, void_func_void@gotpcrel32@lo+4
@@ -1487,7 +1487,7 @@ define amdgpu_kernel void @read2_v2i32_align1_odd_offset(<2 x i32> addrspace(1)*
 ; CI-NEXT:    ds_read_u8 v6, v0 offset:66
 ; CI-NEXT:    ds_read_u8 v0, v0 offset:65
 ; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; CI-NEXT:    v_or_b32_e32 v1, v2, v1
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_lshlrev_b32_e32 v2, 8, v6
@@ -1514,7 +1514,7 @@ define amdgpu_kernel void @read2_v2i32_align1_odd_offset(<2 x i32> addrspace(1)*
 ; GFX9-ALIGNED-NEXT:    ds_read_u8 v8, v2 offset:71
 ; GFX9-ALIGNED-NEXT:    s_waitcnt lgkmcnt(7)
 ; GFX9-ALIGNED-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; GFX9-ALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-ALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-ALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-ALIGNED-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX9-ALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 8, v7
@@ -1531,7 +1531,7 @@ define amdgpu_kernel void @read2_v2i32_align1_odd_offset(<2 x i32> addrspace(1)*
 ; GFX9-UNALIGNED-LABEL: read2_v2i32_align1_odd_offset:
 ; GFX9-UNALIGNED:       ; %bb.0: ; %entry
 ; GFX9-UNALIGNED-NEXT:    v_mov_b32_e32 v0, 0x41
-; GFX9-UNALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-UNALIGNED-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-UNALIGNED-NEXT:    ds_read2_b32 v[0:1], v0 offset1:1
 ; GFX9-UNALIGNED-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-UNALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/ds_write2.ll b/llvm/test/CodeGen/AMDGPU/ds_write2.ll
index 501282d7f5573..268e8bbeb8a12 100644
--- a/llvm/test/CodeGen/AMDGPU/ds_write2.ll
+++ b/llvm/test/CodeGen/AMDGPU/ds_write2.ll
@@ -9,7 +9,7 @@
 define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
 ; CI-LABEL: simple_write2_one_val_f32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -23,7 +23,7 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, flo
 ;
 ; GFX9-LABEL: simple_write2_one_val_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
@@ -44,7 +44,7 @@ define amdgpu_kernel void @simple_write2_one_val_f32(float addrspace(1)* %C, flo
 define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
 ; CI-LABEL: simple_write2_two_val_f32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -60,7 +60,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, flo
 ;
 ; GFX9-LABEL: simple_write2_two_val_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1] glc
@@ -85,7 +85,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32(float addrspace(1)* %C, flo
 define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
 ; CI-LABEL: simple_write2_two_val_f32_volatile_0:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
 ; CI-NEXT:    s_mov_b32 s6, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -105,7 +105,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace(
 ;
 ; GFX9-LABEL: simple_write2_two_val_f32_volatile_0:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1] glc
@@ -131,7 +131,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_0(float addrspace(
 define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
 ; CI-LABEL: simple_write2_two_val_f32_volatile_1:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
 ; CI-NEXT:    s_mov_b32 s6, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -151,7 +151,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace(
 ;
 ; GFX9-LABEL: simple_write2_two_val_f32_volatile_1:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1] glc
@@ -182,7 +182,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_volatile_1(float addrspace(
 define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
 ; CI-LABEL: simple_write2_two_val_subreg2_mixed_f32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
@@ -199,7 +199,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspa
 ;
 ; GFX9-LABEL: simple_write2_two_val_subreg2_mixed_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    ; kill: killed $vgpr4
@@ -229,7 +229,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_mixed_f32(float addrspa
 define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)* %C, <2 x float> addrspace(1)* %in) #0 {
 ; CI-LABEL: simple_write2_two_val_subreg2_f32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
@@ -244,7 +244,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)*
 ;
 ; GFX9-LABEL: simple_write2_two_val_subreg2_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -268,7 +268,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg2_f32(float addrspace(1)*
 define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)* %C, <4 x float> addrspace(1)* %in) #0 {
 ; CI-LABEL: simple_write2_two_val_subreg4_f32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v1, 4, v0
@@ -283,7 +283,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)*
 ;
 ; GFX9-LABEL: simple_write2_two_val_subreg4_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 4, v0
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -307,7 +307,7 @@ define amdgpu_kernel void @simple_write2_two_val_subreg4_f32(float addrspace(1)*
 define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace(1)* %C, float addrspace(1)* %in) #0 {
 ; CI-LABEL: simple_write2_two_val_max_offset_f32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -323,7 +323,7 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace(
 ;
 ; GFX9-LABEL: simple_write2_two_val_max_offset_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1] glc
@@ -348,7 +348,7 @@ define amdgpu_kernel void @simple_write2_two_val_max_offset_f32(float addrspace(
 define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
 ; CI-LABEL: simple_write2_two_val_too_far_f32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
 ; CI-NEXT:    s_mov_b32 s6, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -368,7 +368,7 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)*
 ;
 ; GFX9-LABEL: simple_write2_two_val_too_far_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
@@ -394,7 +394,7 @@ define amdgpu_kernel void @simple_write2_two_val_too_far_f32(float addrspace(1)*
 define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
 ; CI-LABEL: simple_write2_two_val_f32_x2:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
 ; CI-NEXT:    s_mov_b32 s6, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -413,7 +413,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C,
 ;
 ; GFX9-LABEL: simple_write2_two_val_f32_x2:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
@@ -450,7 +450,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2(float addrspace(1)* %C,
 define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1) #0 {
 ; CI-LABEL: simple_write2_two_val_f32_x2_nonzero_base:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
 ; CI-NEXT:    s_mov_b32 s6, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -469,7 +469,7 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrs
 ;
 ; GFX9-LABEL: simple_write2_two_val_f32_x2_nonzero_base:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[0:1]
@@ -506,8 +506,8 @@ define amdgpu_kernel void @simple_write2_two_val_f32_x2_nonzero_base(float addrs
 define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)* %C, float addrspace(1)* %in0, float addrspace(1)* %in1, <2 x float addrspace(3)*> %lds.ptr) #0 {
 ; CI-LABEL: write2_ptr_subreg_arg_two_val_f32:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
-; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xf
+; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2
+; CI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x6
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -529,8 +529,8 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)*
 ;
 ; GFX9-LABEL: write2_ptr_subreg_arg_two_val_f32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x3c
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x8
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x18
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[4:5]
@@ -566,7 +566,7 @@ define amdgpu_kernel void @write2_ptr_subreg_arg_two_val_f32(float addrspace(1)*
 define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
 ; CI-LABEL: simple_write2_one_val_f64:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
@@ -580,7 +580,7 @@ define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, do
 ;
 ; GFX9-LABEL: simple_write2_one_val_f64:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v2, s[0:1]
@@ -601,8 +601,8 @@ define amdgpu_kernel void @simple_write2_one_val_f64(double addrspace(1)* %C, do
 define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
 ; CI-LABEL: misaligned_simple_write2_one_val_f64:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; CI-NEXT:    s_load_dword s0, s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; CI-NEXT:    s_load_dword s0, s[0:1], 0x4
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
 ; CI-NEXT:    s_mov_b32 s6, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
@@ -618,8 +618,8 @@ define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace
 ;
 ; GFX9-LABEL: misaligned_simple_write2_one_val_f64:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
-; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x8
+; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x10
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v2, s[2:3]
@@ -642,8 +642,8 @@ define amdgpu_kernel void @misaligned_simple_write2_one_val_f64(double addrspace
 define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(double addrspace(1)* %C, double addrspace(1)* %in, double addrspace(3)* %lds) #0 {
 ; CI-LABEL: unaligned_offset_simple_write2_one_val_f64:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; CI-NEXT:    s_load_dword s0, s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; CI-NEXT:    s_load_dword s0, s[0:1], 0x4
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
 ; CI-NEXT:    s_mov_b32 s6, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
@@ -675,8 +675,8 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(double add
 ;
 ; GFX9-ALIGNED-LABEL: unaligned_offset_simple_write2_one_val_f64:
 ; GFX9-ALIGNED:       ; %bb.0:
-; GFX9-ALIGNED-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
-; GFX9-ALIGNED-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX9-ALIGNED-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x8
+; GFX9-ALIGNED-NEXT:    s_load_dword s4, s[0:1], 0x10
 ; GFX9-ALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; GFX9-ALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-ALIGNED-NEXT:    global_load_dwordx2 v[0:1], v2, s[2:3]
@@ -702,8 +702,8 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(double add
 ;
 ; GFX9-UNALIGNED-LABEL: unaligned_offset_simple_write2_one_val_f64:
 ; GFX9-UNALIGNED:       ; %bb.0:
-; GFX9-UNALIGNED-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
-; GFX9-UNALIGNED-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX9-UNALIGNED-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x8
+; GFX9-UNALIGNED-NEXT:    s_load_dword s4, s[0:1], 0x10
 ; GFX9-UNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; GFX9-UNALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-UNALIGNED-NEXT:    global_load_dwordx2 v[0:1], v2, s[2:3]
@@ -731,7 +731,7 @@ define amdgpu_kernel void @unaligned_offset_simple_write2_one_val_f64(double add
 define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, double addrspace(1)* %in) #0 {
 ; CI-LABEL: simple_write2_two_val_f64:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
@@ -747,7 +747,7 @@ define amdgpu_kernel void @simple_write2_two_val_f64(double addrspace(1)* %C, do
 ;
 ; GFX9-LABEL: simple_write2_two_val_f64:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x8
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v4, s[0:1] glc
@@ -873,7 +873,7 @@ define amdgpu_kernel void @store_misaligned64_constant_large_offsets() {
 define amdgpu_kernel void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %lda, i32 %ldb, float addrspace(1)* %in) #0 {
 ; CI-LABEL: write2_sgemm_sequence:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x4
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_load_dword s0, s[0:1], 0x0
@@ -895,7 +895,7 @@ define amdgpu_kernel void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %ld
 ;
 ; GFX9-LABEL: write2_sgemm_sequence:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x10
 ; GFX9-NEXT:    s_lshl_b32 s2, s2, 2
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_load_dword s0, s[0:1], 0x0
@@ -950,8 +950,8 @@ define amdgpu_kernel void @write2_sgemm_sequence(float addrspace(1)* %C, i32 %ld
 define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(<4 x float> addrspace(3)* %out, <4 x float> addrspace(1)* %in) #0 {
 ; CI-LABEL: simple_write2_v4f32_superreg_align4:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; CI-NEXT:    s_load_dword s4, s[0:1], 0x9
+; CI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; CI-NEXT:    s_load_dword s4, s[0:1], 0x0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -968,8 +968,8 @@ define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(<4 x float> addrs
 ;
 ; GFX9-ALIGNED-LABEL: simple_write2_v4f32_superreg_align4:
 ; GFX9-ALIGNED:       ; %bb.0:
-; GFX9-ALIGNED-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
-; GFX9-ALIGNED-NEXT:    s_load_dword s4, s[0:1], 0x24
+; GFX9-ALIGNED-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x8
+; GFX9-ALIGNED-NEXT:    s_load_dword s4, s[0:1], 0x0
 ; GFX9-ALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-ALIGNED-NEXT:    v_lshl_add_u32 v0, v0, 4, s4
 ; GFX9-ALIGNED-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
@@ -984,8 +984,8 @@ define amdgpu_kernel void @simple_write2_v4f32_superreg_align4(<4 x float> addrs
 ;
 ; GFX9-UNALIGNED-LABEL: simple_write2_v4f32_superreg_align4:
 ; GFX9-UNALIGNED:       ; %bb.0:
-; GFX9-UNALIGNED-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
-; GFX9-UNALIGNED-NEXT:    s_load_dword s4, s[0:1], 0x24
+; GFX9-UNALIGNED-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x8
+; GFX9-UNALIGNED-NEXT:    s_load_dword s4, s[0:1], 0x0
 ; GFX9-UNALIGNED-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-UNALIGNED-NEXT:    v_lshl_add_u32 v4, v0, 4, s4
 ; GFX9-UNALIGNED-NEXT:    s_load_dwordx4 s[0:3], s[2:3], 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
index 09e2e90feed96..abb1204512db0 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-scratch.ll
@@ -262,7 +262,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_mov_b32 s4, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v0, 15
-; GFX9-PAL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX9-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s5, s5, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s4, s3
@@ -289,7 +289,7 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
 ; GFX10-PAL-NEXT:    s_addc_u32 s5, s5, 0
 ; GFX10-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4
 ; GFX10-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5
-; GFX10-PAL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX10-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v0, 15
 ; GFX10-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-PAL-NEXT:    s_and_b32 s1, s0, 15
@@ -893,7 +893,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_mov_b32 s4, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
 ; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX9-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s5, s5, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s4, s3
@@ -923,7 +923,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
 ; GFX1010-PAL-NEXT:    s_addc_u32 s5, s5, 0
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5
-; GFX1010-PAL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX1010-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
 ; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4 glc dlc
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
@@ -951,7 +951,7 @@ define amdgpu_kernel void @store_load_sindex_small_offset_kernel(i32 %idx) {
 ; GFX1030-PAL-NEXT:    s_addc_u32 s5, s5, 0
 ; GFX1030-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4
 ; GFX1030-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5
-; GFX1030-PAL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX1030-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX1030-PAL-NEXT:    scratch_load_dword v0, off, off offset:4 glc dlc
 ; GFX1030-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1030-PAL-NEXT:    v_mov_b32_e32 v0, 15
@@ -1665,7 +1665,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX9-PAL-NEXT:    s_mov_b32 s4, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
 ; GFX9-PAL-NEXT:    s_mov_b32 vcc_hi, 0
-; GFX9-PAL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX9-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s5, s5, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s4, s3
@@ -1695,7 +1695,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX1010-PAL-NEXT:    s_addc_u32 s5, s5, 0
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4
 ; GFX1010-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5
-; GFX1010-PAL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX1010-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX1010-PAL-NEXT:    s_mov_b32 vcc_lo, 0
 ; GFX1010-PAL-NEXT:    scratch_load_dword v0, off, vcc_lo offset:4 glc dlc
 ; GFX1010-PAL-NEXT:    s_waitcnt vmcnt(0)
@@ -1723,7 +1723,7 @@ define amdgpu_kernel void @store_load_sindex_large_offset_kernel(i32 %idx) {
 ; GFX1030-PAL-NEXT:    s_addc_u32 s5, s5, 0
 ; GFX1030-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4
 ; GFX1030-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5
-; GFX1030-PAL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX1030-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX1030-PAL-NEXT:    scratch_load_dword v0, off, off offset:4 glc dlc
 ; GFX1030-PAL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1030-PAL-NEXT:    v_mov_b32_e32 v0, 15
@@ -2329,7 +2329,7 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
 ; GFX9-PAL-NEXT:    s_mov_b32 s4, s0
 ; GFX9-PAL-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
 ; GFX9-PAL-NEXT:    v_mov_b32_e32 v1, 4
-; GFX9-PAL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX9-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX9-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-PAL-NEXT:    s_and_b32 s5, s5, 0xffff
 ; GFX9-PAL-NEXT:    s_add_u32 flat_scratch_lo, s4, s3
@@ -2354,7 +2354,7 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
 ; GFX10-PAL-NEXT:    s_addc_u32 s5, s5, 0
 ; GFX10-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s4
 ; GFX10-PAL-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s5
-; GFX10-PAL-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX10-PAL-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX10-PAL-NEXT:    v_mov_b32_e32 v1, 15
 ; GFX10-PAL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-PAL-NEXT:    v_add_nc_u32_e32 v0, s0, v0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
index ff90763f51e9c..4ef2355f2bd03 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll
@@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_agent_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_agent_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -223,7 +223,7 @@ define amdgpu_kernel void @flat_agent_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -320,7 +320,7 @@ define amdgpu_kernel void @flat_agent_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -406,8 +406,8 @@ define amdgpu_kernel void @flat_agent_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -478,8 +478,8 @@ define amdgpu_kernel void @flat_agent_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -555,8 +555,8 @@ define amdgpu_kernel void @flat_agent_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -635,8 +635,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -710,8 +710,8 @@ define amdgpu_kernel void @flat_agent_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -792,8 +792,8 @@ define amdgpu_kernel void @flat_agent_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -874,8 +874,8 @@ define amdgpu_kernel void @flat_agent_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -964,8 +964,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1059,8 +1059,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1150,8 +1150,8 @@ define amdgpu_kernel void @flat_agent_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1247,8 +1247,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1347,8 +1347,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1440,8 +1440,8 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1535,8 +1535,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1630,8 +1630,8 @@ define amdgpu_kernel void @flat_agent_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1733,8 +1733,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1841,8 +1841,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1944,8 +1944,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2044,8 +2044,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2149,8 +2149,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2257,8 +2257,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2365,8 +2365,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2473,8 +2473,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2581,8 +2581,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2689,8 +2689,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2797,8 +2797,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2905,8 +2905,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -3010,8 +3010,8 @@ define amdgpu_kernel void @flat_agent_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3122,8 +3122,8 @@ define amdgpu_kernel void @flat_agent_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3236,8 +3236,8 @@ define amdgpu_kernel void @flat_agent_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3356,8 +3356,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3478,8 +3478,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3595,8 +3595,8 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3709,8 +3709,8 @@ define amdgpu_kernel void @flat_agent_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3828,8 +3828,8 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3950,8 +3950,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4072,8 +4072,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4194,8 +4194,8 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4316,8 +4316,8 @@ define amdgpu_kernel void @flat_agent_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4438,8 +4438,8 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4560,8 +4560,8 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4682,8 +4682,8 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4777,7 +4777,7 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4862,7 +4862,7 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4955,7 +4955,7 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -5057,7 +5057,7 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -5145,8 +5145,8 @@ define amdgpu_kernel void @flat_agent_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5217,8 +5217,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5294,8 +5294,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5374,8 +5374,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5449,8 +5449,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5529,8 +5529,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5611,8 +5611,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5699,8 +5699,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5792,8 +5792,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5886,8 +5886,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5987,8 +5987,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -6091,8 +6091,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -6185,8 +6185,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6278,8 +6278,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6373,8 +6373,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6474,8 +6474,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6580,8 +6580,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6681,8 +6681,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6779,8 +6779,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6882,8 +6882,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6988,8 +6988,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7094,8 +7094,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7200,8 +7200,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7306,8 +7306,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7412,8 +7412,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7518,8 +7518,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7624,8 +7624,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7729,8 +7729,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7844,8 +7844,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7960,8 +7960,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8083,8 +8083,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8210,8 +8210,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8332,8 +8332,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8451,8 +8451,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8575,8 +8575,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8702,8 +8702,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8829,8 +8829,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8956,8 +8956,8 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -9083,8 +9083,8 @@ define amdgpu_kernel void @flat_agent_one_as_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -9210,8 +9210,8 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -9337,8 +9337,8 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -9464,8 +9464,8 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
index f8b7bd9adb737..ab79a4cb73af5 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-nontemporal.ll
@@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_nontemporal_load_0:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -137,7 +137,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_nontemporal_load_1:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -230,7 +230,7 @@ define amdgpu_kernel void @flat_nontemporal_store_0(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_0:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -319,7 +319,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_1:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
index ce46d51bbbb8d..49f2733fa2acd 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-singlethread.ll
@@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_singlethread_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_singlethread_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -218,7 +218,7 @@ define amdgpu_kernel void @flat_singlethread_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -303,7 +303,7 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -384,8 +384,8 @@ define amdgpu_kernel void @flat_singlethread_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -456,8 +456,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -528,8 +528,8 @@ define amdgpu_kernel void @flat_singlethread_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -600,8 +600,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -672,8 +672,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -744,8 +744,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -816,8 +816,8 @@ define amdgpu_kernel void @flat_singlethread_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -888,8 +888,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -960,8 +960,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1038,8 +1038,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1123,8 +1123,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1208,8 +1208,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1296,8 +1296,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1381,8 +1381,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1466,8 +1466,8 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1551,8 +1551,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1636,8 +1636,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1721,8 +1721,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1806,8 +1806,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1891,8 +1891,8 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1976,8 +1976,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2061,8 +2061,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2146,8 +2146,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2231,8 +2231,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2316,8 +2316,8 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2401,8 +2401,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2486,8 +2486,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2583,8 +2583,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -2690,8 +2690,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -2797,8 +2797,8 @@ define amdgpu_kernel void @flat_singlethread_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -2904,8 +2904,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3011,8 +3011,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3118,8 +3118,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3225,8 +3225,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3332,8 +3332,8 @@ define amdgpu_kernel void @flat_singlethread_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3439,8 +3439,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3546,8 +3546,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3653,8 +3653,8 @@ define amdgpu_kernel void @flat_singlethread_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3760,8 +3760,8 @@ define amdgpu_kernel void @flat_singlethread_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3867,8 +3867,8 @@ define amdgpu_kernel void @flat_singlethread_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3974,8 +3974,8 @@ define amdgpu_kernel void @flat_singlethread_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4081,8 +4081,8 @@ define amdgpu_kernel void @flat_singlethread_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4171,7 +4171,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4256,7 +4256,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4341,7 +4341,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4426,7 +4426,7 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4507,8 +4507,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4579,8 +4579,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4651,8 +4651,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4723,8 +4723,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4795,8 +4795,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4867,8 +4867,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4939,8 +4939,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5011,8 +5011,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5083,8 +5083,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5161,8 +5161,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5246,8 +5246,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5331,8 +5331,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5419,8 +5419,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5504,8 +5504,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5589,8 +5589,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5674,8 +5674,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5759,8 +5759,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5844,8 +5844,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5929,8 +5929,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6014,8 +6014,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6099,8 +6099,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6184,8 +6184,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6269,8 +6269,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6354,8 +6354,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6439,8 +6439,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6524,8 +6524,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6609,8 +6609,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6706,8 +6706,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_monotonic_ret_cmpx
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -6813,8 +6813,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_monotonic_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -6920,8 +6920,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_monotonic_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7027,8 +7027,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7134,8 +7134,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7241,8 +7241,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_acquire_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7348,8 +7348,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7455,8 +7455,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7562,8 +7562,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7669,8 +7669,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7776,8 +7776,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7883,8 +7883,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7990,8 +7990,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8097,8 +8097,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8204,8 +8204,8 @@ define amdgpu_kernel void @flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
index 200037724ef17..b9a307074492b 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll
@@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_system_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_system_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -223,7 +223,7 @@ define amdgpu_kernel void @flat_system_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -322,7 +322,7 @@ define amdgpu_kernel void @flat_system_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -410,8 +410,8 @@ define amdgpu_kernel void @flat_system_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -482,8 +482,8 @@ define amdgpu_kernel void @flat_system_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -559,8 +559,8 @@ define amdgpu_kernel void @flat_system_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -641,8 +641,8 @@ define amdgpu_kernel void @flat_system_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -718,8 +718,8 @@ define amdgpu_kernel void @flat_system_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -800,8 +800,8 @@ define amdgpu_kernel void @flat_system_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -884,8 +884,8 @@ define amdgpu_kernel void @flat_system_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -976,8 +976,8 @@ define amdgpu_kernel void @flat_system_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1075,8 +1075,8 @@ define amdgpu_kernel void @flat_system_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1170,8 +1170,8 @@ define amdgpu_kernel void @flat_system_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1269,8 +1269,8 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1373,8 +1373,8 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1470,8 +1470,8 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1565,8 +1565,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1662,8 +1662,8 @@ define amdgpu_kernel void @flat_system_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1767,8 +1767,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1879,8 +1879,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1986,8 +1986,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2088,8 +2088,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2195,8 +2195,8 @@ define amdgpu_kernel void @flat_system_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2307,8 +2307,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2419,8 +2419,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2531,8 +2531,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2643,8 +2643,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2755,8 +2755,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2867,8 +2867,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2979,8 +2979,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -3088,8 +3088,8 @@ define amdgpu_kernel void @flat_system_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3200,8 +3200,8 @@ define amdgpu_kernel void @flat_system_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3316,8 +3316,8 @@ define amdgpu_kernel void @flat_system_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3438,8 +3438,8 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3564,8 +3564,8 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3685,8 +3685,8 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3801,8 +3801,8 @@ define amdgpu_kernel void @flat_system_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3922,8 +3922,8 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4048,8 +4048,8 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4174,8 +4174,8 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4300,8 +4300,8 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4426,8 +4426,8 @@ define amdgpu_kernel void @flat_system_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4552,8 +4552,8 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4678,8 +4678,8 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4804,8 +4804,8 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4903,7 +4903,7 @@ define amdgpu_kernel void @flat_system_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4988,7 +4988,7 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -5081,7 +5081,7 @@ define amdgpu_kernel void @flat_system_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -5185,7 +5185,7 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -5275,8 +5275,8 @@ define amdgpu_kernel void @flat_system_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5347,8 +5347,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5424,8 +5424,8 @@ define amdgpu_kernel void @flat_system_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5506,8 +5506,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5583,8 +5583,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5663,8 +5663,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5747,8 +5747,8 @@ define amdgpu_kernel void @flat_system_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5837,8 +5837,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5934,8 +5934,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -6032,8 +6032,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -6135,8 +6135,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -6243,8 +6243,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -6341,8 +6341,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6434,8 +6434,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6531,8 +6531,8 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6634,8 +6634,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6744,8 +6744,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6849,8 +6849,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6949,8 +6949,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7054,8 +7054,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7164,8 +7164,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7274,8 +7274,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7384,8 +7384,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7494,8 +7494,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7604,8 +7604,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7714,8 +7714,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7824,8 +7824,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -7933,8 +7933,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8048,8 +8048,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8166,8 +8166,8 @@ define amdgpu_kernel void @flat_system_one_as_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8291,8 +8291,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8422,8 +8422,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8548,8 +8548,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8669,8 +8669,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8795,8 +8795,8 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8926,8 +8926,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -9057,8 +9057,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -9188,8 +9188,8 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -9319,8 +9319,8 @@ define amdgpu_kernel void @flat_system_one_as_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -9450,8 +9450,8 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -9581,8 +9581,8 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -9712,8 +9712,8 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
index 397ab8b5448e0..88a5ccb5de9d1 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
@@ -49,7 +49,7 @@ define amdgpu_kernel void @flat_nontemporal_load_0(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_nontemporal_load_0:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -116,7 +116,7 @@ define amdgpu_kernel void @flat_nontemporal_load_1(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_nontemporal_load_1:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -183,7 +183,7 @@ define amdgpu_kernel void @flat_nontemporal_store_0(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_0:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -250,7 +250,7 @@ define amdgpu_kernel void @flat_nontemporal_store_1(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_nontemporal_store_1:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
@@ -317,7 +317,7 @@ define amdgpu_kernel void @flat_volatile_workgroup_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_volatile_workgroup_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -377,8 +377,8 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_volatile_workgroup_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
index 47c0cbc57a835..1569af9d257f2 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-wavefront.ll
@@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_wavefront_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_wavefront_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -218,7 +218,7 @@ define amdgpu_kernel void @flat_wavefront_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -303,7 +303,7 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -384,8 +384,8 @@ define amdgpu_kernel void @flat_wavefront_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -456,8 +456,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -528,8 +528,8 @@ define amdgpu_kernel void @flat_wavefront_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -600,8 +600,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -672,8 +672,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -744,8 +744,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -816,8 +816,8 @@ define amdgpu_kernel void @flat_wavefront_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -888,8 +888,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -960,8 +960,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1038,8 +1038,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1123,8 +1123,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1208,8 +1208,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1296,8 +1296,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1381,8 +1381,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1466,8 +1466,8 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1551,8 +1551,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1636,8 +1636,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1721,8 +1721,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1806,8 +1806,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1891,8 +1891,8 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1976,8 +1976,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2061,8 +2061,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2146,8 +2146,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2231,8 +2231,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2316,8 +2316,8 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2401,8 +2401,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2486,8 +2486,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2583,8 +2583,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -2690,8 +2690,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -2797,8 +2797,8 @@ define amdgpu_kernel void @flat_wavefront_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -2904,8 +2904,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3011,8 +3011,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3118,8 +3118,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3225,8 +3225,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3332,8 +3332,8 @@ define amdgpu_kernel void @flat_wavefront_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3439,8 +3439,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3546,8 +3546,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3653,8 +3653,8 @@ define amdgpu_kernel void @flat_wavefront_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3760,8 +3760,8 @@ define amdgpu_kernel void @flat_wavefront_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3867,8 +3867,8 @@ define amdgpu_kernel void @flat_wavefront_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3974,8 +3974,8 @@ define amdgpu_kernel void @flat_wavefront_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4081,8 +4081,8 @@ define amdgpu_kernel void @flat_wavefront_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4171,7 +4171,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4256,7 +4256,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4341,7 +4341,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4426,7 +4426,7 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4507,8 +4507,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4579,8 +4579,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4651,8 +4651,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4723,8 +4723,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4795,8 +4795,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4867,8 +4867,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4939,8 +4939,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5011,8 +5011,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5083,8 +5083,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5161,8 +5161,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5246,8 +5246,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5331,8 +5331,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5419,8 +5419,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5504,8 +5504,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5589,8 +5589,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5674,8 +5674,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5759,8 +5759,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5844,8 +5844,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5929,8 +5929,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6014,8 +6014,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6099,8 +6099,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6184,8 +6184,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6269,8 +6269,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6354,8 +6354,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6439,8 +6439,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6524,8 +6524,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6609,8 +6609,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6706,8 +6706,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -6813,8 +6813,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -6920,8 +6920,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7027,8 +7027,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7134,8 +7134,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7241,8 +7241,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7348,8 +7348,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7455,8 +7455,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7562,8 +7562,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7669,8 +7669,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7776,8 +7776,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7883,8 +7883,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7990,8 +7990,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_acq_relc_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8097,8 +8097,8 @@ define amdgpu_kernel void @flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
index 179de4dc05ffa..ead08ed9fb2bd 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll
@@ -48,7 +48,7 @@ define amdgpu_kernel void @flat_workgroup_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -133,7 +133,7 @@ define amdgpu_kernel void @flat_workgroup_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -221,7 +221,7 @@ define amdgpu_kernel void @flat_workgroup_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -316,7 +316,7 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -403,8 +403,8 @@ define amdgpu_kernel void @flat_workgroup_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -475,8 +475,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -551,8 +551,8 @@ define amdgpu_kernel void @flat_workgroup_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -630,8 +630,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -705,8 +705,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -782,8 +782,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -862,8 +862,8 @@ define amdgpu_kernel void @flat_workgroup_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -946,8 +946,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1034,8 +1034,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1120,8 +1120,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1211,8 +1211,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1305,8 +1305,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -1397,8 +1397,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1487,8 +1487,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1580,8 +1580,8 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1677,8 +1677,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1778,8 +1778,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1875,8 +1875,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -1969,8 +1969,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2067,8 +2067,8 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2168,8 +2168,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2269,8 +2269,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2370,8 +2370,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -2474,8 +2474,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -2584,8 +2584,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -2697,8 +2697,8 @@ define amdgpu_kernel void @flat_workgroup_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -2814,8 +2814,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -2933,8 +2933,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3048,8 +3048,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3160,8 +3160,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3276,8 +3276,8 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3395,8 +3395,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3514,8 +3514,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3633,8 +3633,8 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3752,8 +3752,8 @@ define amdgpu_kernel void @flat_workgroup_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3871,8 +3871,8 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -3990,8 +3990,8 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4109,8 +4109,8 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -4204,7 +4204,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4289,7 +4289,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4376,7 +4376,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4466,7 +4466,7 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s1
@@ -4549,8 +4549,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4621,8 +4621,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4695,8 +4695,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4770,8 +4770,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4843,8 +4843,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4917,8 +4917,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -4993,8 +4993,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5070,8 +5070,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5149,8 +5149,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5232,8 +5232,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5322,8 +5322,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5413,8 +5413,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s3
@@ -5503,8 +5503,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5590,8 +5590,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5679,8 +5679,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5769,8 +5769,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5861,8 +5861,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -5951,8 +5951,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6040,8 +6040,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6131,8 +6131,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6223,8 +6223,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6315,8 +6315,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6407,8 +6407,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6499,8 +6499,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6591,8 +6591,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6683,8 +6683,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6775,8 +6775,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s2, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s3, s3, 0
@@ -6875,8 +6875,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonicmonotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -6984,8 +6984,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7094,8 +7094,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7206,8 +7206,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7319,8 +7319,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7430,8 +7430,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7540,8 +7540,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7652,8 +7652,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7765,8 +7765,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7878,8 +7878,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -7991,8 +7991,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8104,8 +8104,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8217,8 +8217,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8330,8 +8330,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
@@ -8443,8 +8443,8 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s2, 16
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s3, 0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
index d57b42dcc8f04..43120f5132077 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll
@@ -58,7 +58,7 @@ define amdgpu_kernel void @global_agent_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -148,7 +148,7 @@ define amdgpu_kernel void @global_agent_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -244,7 +244,7 @@ define amdgpu_kernel void @global_agent_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -346,7 +346,7 @@ define amdgpu_kernel void @global_agent_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -435,8 +435,8 @@ define amdgpu_kernel void @global_agent_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -516,8 +516,8 @@ define amdgpu_kernel void @global_agent_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -603,8 +603,8 @@ define amdgpu_kernel void @global_agent_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -693,8 +693,8 @@ define amdgpu_kernel void @global_agent_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -777,8 +777,8 @@ define amdgpu_kernel void @global_agent_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -868,8 +868,8 @@ define amdgpu_kernel void @global_agent_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -960,8 +960,8 @@ define amdgpu_kernel void @global_agent_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1060,8 +1060,8 @@ define amdgpu_kernel void @global_agent_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1165,8 +1165,8 @@ define amdgpu_kernel void @global_agent_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1268,8 +1268,8 @@ define amdgpu_kernel void @global_agent_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1378,8 +1378,8 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1491,8 +1491,8 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1590,8 +1590,8 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1689,8 +1689,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1789,8 +1789,8 @@ define amdgpu_kernel void @global_agent_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1897,8 +1897,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2010,8 +2010,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2117,8 +2117,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2221,8 +2221,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2331,8 +2331,8 @@ define amdgpu_kernel void @global_agent_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2444,8 +2444,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2557,8 +2557,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2670,8 +2670,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2783,8 +2783,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2896,8 +2896,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3009,8 +3009,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3122,8 +3122,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3229,8 +3229,8 @@ define amdgpu_kernel void @global_agent_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3342,8 +3342,8 @@ define amdgpu_kernel void @global_agent_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3457,8 +3457,8 @@ define amdgpu_kernel void @global_agent_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3579,8 +3579,8 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3703,8 +3703,8 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3821,8 +3821,8 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3936,8 +3936,8 @@ define amdgpu_kernel void @global_agent_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4057,8 +4057,8 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4181,8 +4181,8 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4305,8 +4305,8 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4429,8 +4429,8 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4553,8 +4553,8 @@ define amdgpu_kernel void @global_agent_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4677,8 +4677,8 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4801,8 +4801,8 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4925,8 +4925,8 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5025,7 +5025,7 @@ define amdgpu_kernel void @global_agent_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5115,7 +5115,7 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5211,7 +5211,7 @@ define amdgpu_kernel void @global_agent_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5313,7 +5313,7 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5402,8 +5402,8 @@ define amdgpu_kernel void @global_agent_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5483,8 +5483,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5570,8 +5570,8 @@ define amdgpu_kernel void @global_agent_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5660,8 +5660,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5744,8 +5744,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5835,8 +5835,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5927,8 +5927,8 @@ define amdgpu_kernel void @global_agent_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6027,8 +6027,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6132,8 +6132,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6235,8 +6235,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6345,8 +6345,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6458,8 +6458,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6557,8 +6557,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6656,8 +6656,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6756,8 +6756,8 @@ define amdgpu_kernel void @global_agent_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6864,8 +6864,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6977,8 +6977,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7084,8 +7084,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7188,8 +7188,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7298,8 +7298,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7411,8 +7411,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7524,8 +7524,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7637,8 +7637,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7750,8 +7750,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7863,8 +7863,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7976,8 +7976,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8089,8 +8089,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8196,8 +8196,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8309,8 +8309,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8430,8 +8430,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8554,8 +8554,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8672,8 +8672,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8787,8 +8787,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8908,8 +8908,8 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9032,8 +9032,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9156,8 +9156,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9280,8 +9280,8 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9404,8 +9404,8 @@ define amdgpu_kernel void @global_agent_one_as_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9528,8 +9528,8 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9652,8 +9652,8 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9776,8 +9776,8 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
index d7be355e61a06..02616719c90c6 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-nontemporal.ll
@@ -58,7 +58,7 @@ define amdgpu_kernel void @global_nontemporal_load_0(
 ;
 ; SKIP-CACHE-INV-LABEL: global_nontemporal_load_0:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -155,7 +155,7 @@ define amdgpu_kernel void @global_nontemporal_load_1(
 ;
 ; SKIP-CACHE-INV-LABEL: global_nontemporal_load_1:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, 0
@@ -251,7 +251,7 @@ define amdgpu_kernel void @global_nontemporal_store_0(
 ;
 ; SKIP-CACHE-INV-LABEL: global_nontemporal_store_0:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -345,7 +345,7 @@ define amdgpu_kernel void @global_nontemporal_store_1(
 ;
 ; SKIP-CACHE-INV-LABEL: global_nontemporal_store_1:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
index d881b3d0e16b2..949b5a5ce4761 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-singlethread.ll
@@ -58,7 +58,7 @@ define amdgpu_kernel void @global_singlethread_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -148,7 +148,7 @@ define amdgpu_kernel void @global_singlethread_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -238,7 +238,7 @@ define amdgpu_kernel void @global_singlethread_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -328,7 +328,7 @@ define amdgpu_kernel void @global_singlethread_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -414,8 +414,8 @@ define amdgpu_kernel void @global_singlethread_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -495,8 +495,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -576,8 +576,8 @@ define amdgpu_kernel void @global_singlethread_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -657,8 +657,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -738,8 +738,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -819,8 +819,8 @@ define amdgpu_kernel void @global_singlethread_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -900,8 +900,8 @@ define amdgpu_kernel void @global_singlethread_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -981,8 +981,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1062,8 +1062,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1151,8 +1151,8 @@ define amdgpu_kernel void @global_singlethread_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1247,8 +1247,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1343,8 +1343,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1437,8 +1437,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1526,8 +1526,8 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1615,8 +1615,8 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1704,8 +1704,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1793,8 +1793,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1882,8 +1882,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1971,8 +1971,8 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2060,8 +2060,8 @@ define amdgpu_kernel void @global_singlethread_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2149,8 +2149,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2238,8 +2238,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2327,8 +2327,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2416,8 +2416,8 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2505,8 +2505,8 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2594,8 +2594,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2683,8 +2683,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2782,8 +2782,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2889,8 +2889,8 @@ define amdgpu_kernel void @global_singlethread_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2996,8 +2996,8 @@ define amdgpu_kernel void @global_singlethread_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3103,8 +3103,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3210,8 +3210,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3317,8 +3317,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3424,8 +3424,8 @@ define amdgpu_kernel void @global_singlethread_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3531,8 +3531,8 @@ define amdgpu_kernel void @global_singlethread_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3638,8 +3638,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3745,8 +3745,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3852,8 +3852,8 @@ define amdgpu_kernel void @global_singlethread_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3959,8 +3959,8 @@ define amdgpu_kernel void @global_singlethread_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4066,8 +4066,8 @@ define amdgpu_kernel void @global_singlethread_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4173,8 +4173,8 @@ define amdgpu_kernel void @global_singlethread_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4280,8 +4280,8 @@ define amdgpu_kernel void @global_singlethread_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4375,7 +4375,7 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4465,7 +4465,7 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4555,7 +4555,7 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4645,7 +4645,7 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4731,8 +4731,8 @@ define amdgpu_kernel void @global_singlethread_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4812,8 +4812,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4893,8 +4893,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4974,8 +4974,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5055,8 +5055,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5136,8 +5136,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5217,8 +5217,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5298,8 +5298,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5379,8 +5379,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5468,8 +5468,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5564,8 +5564,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5660,8 +5660,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5754,8 +5754,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5843,8 +5843,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5932,8 +5932,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6021,8 +6021,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6110,8 +6110,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6199,8 +6199,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6288,8 +6288,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6377,8 +6377,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6466,8 +6466,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6555,8 +6555,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6644,8 +6644,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6733,8 +6733,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6822,8 +6822,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6911,8 +6911,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7000,8 +7000,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7099,8 +7099,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_monotonic_ret_cm
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7206,8 +7206,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_monotonic_ret_cmpx
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7313,8 +7313,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_monotonic_ret_cmpx
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7420,8 +7420,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_monotonic_ret_cmpx
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7527,8 +7527,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_monotonic_ret_cmpx
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7634,8 +7634,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_acquire_ret_cmpx
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7741,8 +7741,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_acquire_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7848,8 +7848,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_acquire_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7955,8 +7955,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_acquire_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8062,8 +8062,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_acquire_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8169,8 +8169,8 @@ define amdgpu_kernel void @global_singlethread_one_as_monotonic_seq_cst_ret_cmpx
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8276,8 +8276,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acquire_seq_cst_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8383,8 +8383,8 @@ define amdgpu_kernel void @global_singlethread_one_as_release_seq_cst_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8490,8 +8490,8 @@ define amdgpu_kernel void @global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8597,8 +8597,8 @@ define amdgpu_kernel void @global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: global_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
index da895c684c430..93a58c17153ae 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll
@@ -58,7 +58,7 @@ define amdgpu_kernel void @global_system_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -148,7 +148,7 @@ define amdgpu_kernel void @global_system_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -244,7 +244,7 @@ define amdgpu_kernel void @global_system_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -348,7 +348,7 @@ define amdgpu_kernel void @global_system_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -439,8 +439,8 @@ define amdgpu_kernel void @global_system_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -520,8 +520,8 @@ define amdgpu_kernel void @global_system_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -607,8 +607,8 @@ define amdgpu_kernel void @global_system_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -699,8 +699,8 @@ define amdgpu_kernel void @global_system_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -785,8 +785,8 @@ define amdgpu_kernel void @global_system_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -876,8 +876,8 @@ define amdgpu_kernel void @global_system_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -970,8 +970,8 @@ define amdgpu_kernel void @global_system_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1072,8 +1072,8 @@ define amdgpu_kernel void @global_system_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1181,8 +1181,8 @@ define amdgpu_kernel void @global_system_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1288,8 +1288,8 @@ define amdgpu_kernel void @global_system_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1400,8 +1400,8 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1517,8 +1517,8 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1620,8 +1620,8 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1719,8 +1719,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1821,8 +1821,8 @@ define amdgpu_kernel void @global_system_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1931,8 +1931,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2048,8 +2048,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2159,8 +2159,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2265,8 +2265,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2377,8 +2377,8 @@ define amdgpu_kernel void @global_system_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2494,8 +2494,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2611,8 +2611,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2728,8 +2728,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2839,8 +2839,8 @@ define amdgpu_kernel void @global_system_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2952,8 +2952,8 @@ define amdgpu_kernel void @global_system_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3075,8 +3075,8 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3203,8 +3203,8 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3325,8 +3325,8 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3442,8 +3442,8 @@ define amdgpu_kernel void @global_system_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3565,8 +3565,8 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3693,8 +3693,8 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3821,8 +3821,8 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3949,8 +3949,8 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4077,8 +4077,8 @@ define amdgpu_kernel void @global_system_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4205,8 +4205,8 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_relese_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4333,8 +4333,8 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4461,8 +4461,8 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4565,7 +4565,7 @@ define amdgpu_kernel void @global_system_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4655,7 +4655,7 @@ define amdgpu_kernel void @global_system_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4751,7 +4751,7 @@ define amdgpu_kernel void @global_system_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4855,7 +4855,7 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4946,8 +4946,8 @@ define amdgpu_kernel void @global_system_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5027,8 +5027,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5114,8 +5114,8 @@ define amdgpu_kernel void @global_system_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5206,8 +5206,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5292,8 +5292,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5383,8 +5383,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5477,8 +5477,8 @@ define amdgpu_kernel void @global_system_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5579,8 +5579,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5688,8 +5688,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5795,8 +5795,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5907,8 +5907,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6024,8 +6024,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6127,8 +6127,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6226,8 +6226,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6328,8 +6328,8 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6438,8 +6438,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6555,8 +6555,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6666,8 +6666,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6772,8 +6772,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6884,8 +6884,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7001,8 +7001,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7118,8 +7118,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7235,8 +7235,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7352,8 +7352,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7469,8 +7469,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7586,8 +7586,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7703,8 +7703,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7814,8 +7814,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7927,8 +7927,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8044,8 +8044,8 @@ define amdgpu_kernel void @global_system_one_as_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8168,8 +8168,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8296,8 +8296,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8418,8 +8418,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8535,8 +8535,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8658,8 +8658,8 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8786,8 +8786,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8914,8 +8914,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9042,8 +9042,8 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9170,8 +9170,8 @@ define amdgpu_kernel void @global_system_one_as_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9298,8 +9298,8 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9426,8 +9426,8 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9554,8 +9554,8 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
index b2ee94f20983e..dacc965f269c1 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
@@ -56,7 +56,7 @@ define amdgpu_kernel void @global_volatile_load_0(
 ;
 ; SKIP-CACHE-INV-LABEL: global_volatile_load_0:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -132,7 +132,7 @@ define amdgpu_kernel void @global_volatile_load_1(
 ;
 ; SKIP-CACHE-INV-LABEL: global_volatile_load_1:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, 0
@@ -210,7 +210,7 @@ define amdgpu_kernel void @global_volatile_store_0(
 ;
 ; SKIP-CACHE-INV-LABEL: global_volatile_store_0:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -287,7 +287,7 @@ define amdgpu_kernel void @global_volatile_store_1(
 ;
 ; SKIP-CACHE-INV-LABEL: global_volatile_store_1:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, 0
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
@@ -361,7 +361,7 @@ define amdgpu_kernel void @global_volatile_workgroup_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_volatile_workgroup_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -432,8 +432,8 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_volatile_workgroup_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
index 5c674278ae3cd..3fe2c7bbc5e30 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-wavefront.ll
@@ -58,7 +58,7 @@ define amdgpu_kernel void @global_wavefront_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -148,7 +148,7 @@ define amdgpu_kernel void @global_wavefront_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -238,7 +238,7 @@ define amdgpu_kernel void @global_wavefront_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -328,7 +328,7 @@ define amdgpu_kernel void @global_wavefront_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -414,8 +414,8 @@ define amdgpu_kernel void @global_wavefront_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -495,8 +495,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -576,8 +576,8 @@ define amdgpu_kernel void @global_wavefront_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -657,8 +657,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -738,8 +738,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -819,8 +819,8 @@ define amdgpu_kernel void @global_wavefront_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -900,8 +900,8 @@ define amdgpu_kernel void @global_wavefront_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -981,8 +981,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1062,8 +1062,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1151,8 +1151,8 @@ define amdgpu_kernel void @global_wavefront_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1247,8 +1247,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1343,8 +1343,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1437,8 +1437,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1526,8 +1526,8 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1615,8 +1615,8 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1704,8 +1704,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1793,8 +1793,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1882,8 +1882,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1971,8 +1971,8 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2060,8 +2060,8 @@ define amdgpu_kernel void @global_wavefront_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2149,8 +2149,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2238,8 +2238,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2327,8 +2327,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2416,8 +2416,8 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2505,8 +2505,8 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2594,8 +2594,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2683,8 +2683,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2782,8 +2782,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2889,8 +2889,8 @@ define amdgpu_kernel void @global_wavefront_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2996,8 +2996,8 @@ define amdgpu_kernel void @global_wavefront_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3103,8 +3103,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3210,8 +3210,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3317,8 +3317,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3424,8 +3424,8 @@ define amdgpu_kernel void @global_wavefront_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3531,8 +3531,8 @@ define amdgpu_kernel void @global_wavefront_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3638,8 +3638,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3745,8 +3745,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3852,8 +3852,8 @@ define amdgpu_kernel void @global_wavefront_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3959,8 +3959,8 @@ define amdgpu_kernel void @global_wavefront_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4066,8 +4066,8 @@ define amdgpu_kernel void @global_wavefront_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4173,8 +4173,8 @@ define amdgpu_kernel void @global_wavefront_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4280,8 +4280,8 @@ define amdgpu_kernel void @global_wavefront_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4375,7 +4375,7 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4465,7 +4465,7 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4555,7 +4555,7 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4645,7 +4645,7 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4731,8 +4731,8 @@ define amdgpu_kernel void @global_wavefront_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4812,8 +4812,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4893,8 +4893,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4974,8 +4974,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5055,8 +5055,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5136,8 +5136,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5217,8 +5217,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5298,8 +5298,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5379,8 +5379,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5468,8 +5468,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5564,8 +5564,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5660,8 +5660,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5754,8 +5754,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5843,8 +5843,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5932,8 +5932,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6021,8 +6021,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6110,8 +6110,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6199,8 +6199,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6288,8 +6288,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6377,8 +6377,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6466,8 +6466,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6555,8 +6555,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6644,8 +6644,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6733,8 +6733,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6822,8 +6822,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6911,8 +6911,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7000,8 +7000,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7099,8 +7099,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_monotonic_ret_cmpxc
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7206,8 +7206,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_monotonic_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7313,8 +7313,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_monotonic_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7420,8 +7420,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7527,8 +7527,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7634,8 +7634,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_acquire_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7741,8 +7741,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7848,8 +7848,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7955,8 +7955,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8062,8 +8062,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8169,8 +8169,8 @@ define amdgpu_kernel void @global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8276,8 +8276,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8383,8 +8383,8 @@ define amdgpu_kernel void @global_wavefront_one_as_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8490,8 +8490,8 @@ define amdgpu_kernel void @global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8597,8 +8597,8 @@ define amdgpu_kernel void @global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
index 2f36a6d914a1e..339cb981b3f2c 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll
@@ -58,7 +58,7 @@ define amdgpu_kernel void @global_workgroup_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -148,7 +148,7 @@ define amdgpu_kernel void @global_workgroup_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -239,7 +239,7 @@ define amdgpu_kernel void @global_workgroup_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -334,7 +334,7 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -422,8 +422,8 @@ define amdgpu_kernel void @global_workgroup_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -503,8 +503,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -589,8 +589,8 @@ define amdgpu_kernel void @global_workgroup_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -678,8 +678,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -762,8 +762,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -845,8 +845,8 @@ define amdgpu_kernel void @global_workgroup_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -933,8 +933,8 @@ define amdgpu_kernel void @global_workgroup_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1024,8 +1024,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1117,8 +1117,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1212,8 +1212,8 @@ define amdgpu_kernel void @global_workgroup_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1315,8 +1315,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1421,8 +1421,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1519,8 +1519,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1610,8 +1610,8 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1706,8 +1706,8 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1805,8 +1805,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1906,8 +1906,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2002,8 +2002,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2095,8 +2095,8 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2193,8 +2193,8 @@ define amdgpu_kernel void @global_workgroup_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2294,8 +2294,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2395,8 +2395,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2496,8 +2496,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2597,8 +2597,8 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2698,8 +2698,8 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2799,8 +2799,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2900,8 +2900,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3004,8 +3004,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3112,8 +3112,8 @@ define amdgpu_kernel void @global_workgroup_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3225,8 +3225,8 @@ define amdgpu_kernel void @global_workgroup_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3341,8 +3341,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3458,8 +3458,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3570,8 +3570,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3679,8 +3679,8 @@ define amdgpu_kernel void @global_workgroup_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3793,8 +3793,8 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3910,8 +3910,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4027,8 +4027,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4144,8 +4144,8 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4261,8 +4261,8 @@ define amdgpu_kernel void @global_workgroup_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4378,8 +4378,8 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4495,8 +4495,8 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4612,8 +4612,8 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4711,7 +4711,7 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4801,7 +4801,7 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4892,7 +4892,7 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4985,7 +4985,7 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s7, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s6, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5072,8 +5072,8 @@ define amdgpu_kernel void @global_workgroup_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5153,8 +5153,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5236,8 +5236,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5320,8 +5320,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5402,8 +5402,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5485,8 +5485,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5570,8 +5570,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5656,8 +5656,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5744,8 +5744,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5837,8 +5837,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -5937,8 +5937,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6038,8 +6038,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dword s4, s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6134,8 +6134,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6225,8 +6225,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6318,8 +6318,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6412,8 +6412,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6508,8 +6508,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6602,8 +6602,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6695,8 +6695,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6790,8 +6790,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6886,8 +6886,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -6982,8 +6982,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7078,8 +7078,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7174,8 +7174,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7270,8 +7270,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7366,8 +7366,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7462,8 +7462,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7564,8 +7564,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_monotonic_ret_cmpxc
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7672,8 +7672,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_monotonic_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7782,8 +7782,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_monotonic_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7893,8 +7893,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8005,8 +8005,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8115,8 +8115,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8224,8 +8224,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8335,8 +8335,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8447,8 +8447,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8559,8 +8559,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8671,8 +8671,8 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8783,8 +8783,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8895,8 +8895,8 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9007,8 +9007,8 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9119,8 +9119,8 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s2, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
index b3a3059a3be31..a328fd34ba1f1 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
@@ -57,7 +57,7 @@ define amdgpu_kernel void @local_agent_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -145,7 +145,7 @@ define amdgpu_kernel void @local_agent_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -234,7 +234,7 @@ define amdgpu_kernel void @local_agent_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -329,7 +329,7 @@ define amdgpu_kernel void @local_agent_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -413,7 +413,7 @@ define amdgpu_kernel void @local_agent_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -486,7 +486,7 @@ define amdgpu_kernel void @local_agent_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -564,7 +564,7 @@ define amdgpu_kernel void @local_agent_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -645,7 +645,7 @@ define amdgpu_kernel void @local_agent_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -721,7 +721,7 @@ define amdgpu_kernel void @local_agent_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -799,7 +799,7 @@ define amdgpu_kernel void @local_agent_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -880,7 +880,7 @@ define amdgpu_kernel void @local_agent_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -966,7 +966,7 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1055,7 +1055,7 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1143,7 +1143,7 @@ define amdgpu_kernel void @local_agent_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1238,7 +1238,7 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1336,7 +1336,7 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1424,13 +1424,12 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1511,13 +1510,12 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
@@ -1601,13 +1599,12 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
@@ -1696,13 +1693,12 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1794,13 +1790,12 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1887,13 +1882,12 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
@@ -1977,13 +1971,12 @@ define amdgpu_kernel void @local_agent_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
@@ -2072,13 +2065,12 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2170,13 +2162,12 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2268,13 +2259,12 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2366,13 +2356,12 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2464,13 +2453,12 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2562,13 +2550,12 @@ define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2660,13 +2647,12 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2758,13 +2744,12 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2854,13 +2839,12 @@ define amdgpu_kernel void @local_agent_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -2953,13 +2937,12 @@ define amdgpu_kernel void @local_agent_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3057,13 +3040,12 @@ define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3164,13 +3146,12 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3272,13 +3253,12 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3375,13 +3355,12 @@ define amdgpu_kernel void @local_agent_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3475,13 +3454,12 @@ define amdgpu_kernel void @local_agent_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3580,13 +3558,12 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3688,13 +3665,12 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3796,13 +3772,12 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3904,13 +3879,12 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4012,13 +3986,12 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4120,13 +4093,12 @@ define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4228,13 +4200,12 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4336,13 +4307,12 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4434,7 +4404,7 @@ define amdgpu_kernel void @local_agent_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4522,7 +4492,7 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4610,7 +4580,7 @@ define amdgpu_kernel void @local_agent_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4698,7 +4668,7 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4778,7 +4748,7 @@ define amdgpu_kernel void @local_agent_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4851,7 +4821,7 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4924,7 +4894,7 @@ define amdgpu_kernel void @local_agent_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4997,7 +4967,7 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -5070,7 +5040,7 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5143,7 +5113,7 @@ define amdgpu_kernel void @local_agent_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5216,7 +5186,7 @@ define amdgpu_kernel void @local_agent_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5289,7 +5259,7 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5362,7 +5332,7 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5443,7 +5413,7 @@ define amdgpu_kernel void @local_agent_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5531,7 +5501,7 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5619,7 +5589,7 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5703,13 +5673,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5785,13 +5754,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5867,13 +5835,12 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5949,13 +5916,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6031,13 +5997,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6113,13 +6078,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6195,13 +6159,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6277,13 +6240,12 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6359,13 +6321,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6441,13 +6402,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6523,13 +6483,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6605,13 +6564,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6687,13 +6645,12 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6769,13 +6726,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6851,13 +6807,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6941,13 +6896,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7039,13 +6993,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7137,13 +7090,12 @@ define amdgpu_kernel void @local_agent_one_as_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7235,13 +7187,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7333,13 +7284,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7431,13 +7381,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7529,13 +7478,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7627,13 +7575,12 @@ define amdgpu_kernel void @local_agent_one_as_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7725,13 +7672,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7823,13 +7769,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7921,13 +7866,12 @@ define amdgpu_kernel void @local_agent_one_as_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -8019,13 +7963,12 @@ define amdgpu_kernel void @local_agent_one_as_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -8117,13 +8060,12 @@ define amdgpu_kernel void @local_agent_one_as_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -8215,13 +8157,12 @@ define amdgpu_kernel void @local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -8313,13 +8254,12 @@ define amdgpu_kernel void @local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_agent_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
index 664d69d2d0cec..df3e5226c539f 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-nontemporal.ll
@@ -64,8 +64,8 @@ define amdgpu_kernel void @local_nontemporal_load_0(
 ;
 ; SKIP-CACHE-INV-LABEL: local_nontemporal_load_0:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -165,8 +165,8 @@ define amdgpu_kernel void @local_nontemporal_load_1(
 ;
 ; SKIP-CACHE-INV-LABEL: local_nontemporal_load_1:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
@@ -265,8 +265,8 @@ define amdgpu_kernel void @local_nontemporal_store_0(
 ;
 ; SKIP-CACHE-INV-LABEL: local_nontemporal_store_0:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_load_dword s1, s[2:3], 0x0
@@ -363,8 +363,8 @@ define amdgpu_kernel void @local_nontemporal_store_1(
 ;
 ; SKIP-CACHE-INV-LABEL: local_nontemporal_store_1:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
index 846245094b629..67ce190072b5b 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-singlethread.ll
@@ -57,7 +57,7 @@ define amdgpu_kernel void @local_singlethread_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -145,7 +145,7 @@ define amdgpu_kernel void @local_singlethread_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -233,7 +233,7 @@ define amdgpu_kernel void @local_singlethread_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -321,7 +321,7 @@ define amdgpu_kernel void @local_singlethread_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -401,7 +401,7 @@ define amdgpu_kernel void @local_singlethread_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -474,7 +474,7 @@ define amdgpu_kernel void @local_singlethread_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -547,7 +547,7 @@ define amdgpu_kernel void @local_singlethread_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -620,7 +620,7 @@ define amdgpu_kernel void @local_singlethread_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -693,7 +693,7 @@ define amdgpu_kernel void @local_singlethread_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -766,7 +766,7 @@ define amdgpu_kernel void @local_singlethread_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -839,7 +839,7 @@ define amdgpu_kernel void @local_singlethread_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -912,7 +912,7 @@ define amdgpu_kernel void @local_singlethread_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -985,7 +985,7 @@ define amdgpu_kernel void @local_singlethread_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1066,7 +1066,7 @@ define amdgpu_kernel void @local_singlethread_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1154,7 +1154,7 @@ define amdgpu_kernel void @local_singlethread_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1242,7 +1242,7 @@ define amdgpu_kernel void @local_singlethread_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1326,13 +1326,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1408,13 +1407,12 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1490,13 +1488,12 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1572,13 +1569,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1654,13 +1650,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1736,13 +1731,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1818,13 +1812,12 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1900,13 +1893,12 @@ define amdgpu_kernel void @local_singlethread_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1982,13 +1974,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2064,13 +2055,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2146,13 +2136,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2228,13 +2217,12 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2310,13 +2298,12 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2392,13 +2379,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2474,13 +2460,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2564,13 +2549,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -2662,13 +2646,12 @@ define amdgpu_kernel void @local_singlethread_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -2760,13 +2743,12 @@ define amdgpu_kernel void @local_singlethread_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -2858,13 +2840,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -2956,13 +2937,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3054,13 +3034,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3152,13 +3131,12 @@ define amdgpu_kernel void @local_singlethread_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3250,13 +3228,12 @@ define amdgpu_kernel void @local_singlethread_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3348,13 +3325,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3446,13 +3422,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3544,13 +3519,12 @@ define amdgpu_kernel void @local_singlethread_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3642,13 +3616,12 @@ define amdgpu_kernel void @local_singlethread_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3740,13 +3713,12 @@ define amdgpu_kernel void @local_singlethread_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3838,13 +3810,12 @@ define amdgpu_kernel void @local_singlethread_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3936,13 +3907,12 @@ define amdgpu_kernel void @local_singlethread_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -4030,7 +4000,7 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4118,7 +4088,7 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4206,7 +4176,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4294,7 +4264,7 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4374,7 +4344,7 @@ define amdgpu_kernel void @local_singlethread_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4447,7 +4417,7 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4520,7 +4490,7 @@ define amdgpu_kernel void @local_singlethread_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4593,7 +4563,7 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4666,7 +4636,7 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4739,7 +4709,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4812,7 +4782,7 @@ define amdgpu_kernel void @local_singlethread_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4885,7 +4855,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4958,7 +4928,7 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5039,7 +5009,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5127,7 +5097,7 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5215,7 +5185,7 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5299,13 +5269,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5381,13 +5350,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5463,13 +5431,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5545,13 +5512,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5627,13 +5593,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5709,13 +5674,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5791,13 +5755,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5873,13 +5836,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5955,13 +5917,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6037,13 +5998,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6119,13 +6079,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6201,13 +6160,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6283,13 +6241,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6365,13 +6322,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6447,13 +6403,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6537,13 +6492,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_monotonic_ret_cmp
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -6635,13 +6589,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_monotonic_ret_cmpxc
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -6733,13 +6686,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_monotonic_ret_cmpxc
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -6831,13 +6783,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_monotonic_ret_cmpxc
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -6929,13 +6880,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_monotonic_ret_cmpxc
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7027,13 +6977,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_acquire_ret_cmpxc
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7125,13 +7074,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_acquire_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7223,13 +7171,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_acquire_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7321,13 +7268,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7419,13 +7365,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7517,13 +7462,12 @@ define amdgpu_kernel void @local_singlethread_one_as_monotonic_seq_cst_ret_cmpxc
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7615,13 +7559,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7713,13 +7656,12 @@ define amdgpu_kernel void @local_singlethread_one_as_release_seq_cst_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7811,13 +7753,12 @@ define amdgpu_kernel void @local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7909,13 +7850,12 @@ define amdgpu_kernel void @local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg
 ;
 ; SKIP-CACHE-INV-LABEL: local_singlethread_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
index 64129cdd666bd..4efd46d271600 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
@@ -57,7 +57,7 @@ define amdgpu_kernel void @local_system_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -145,7 +145,7 @@ define amdgpu_kernel void @local_system_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -234,7 +234,7 @@ define amdgpu_kernel void @local_system_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -329,7 +329,7 @@ define amdgpu_kernel void @local_system_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -413,7 +413,7 @@ define amdgpu_kernel void @local_system_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -486,7 +486,7 @@ define amdgpu_kernel void @local_system_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -564,7 +564,7 @@ define amdgpu_kernel void @local_system_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -645,7 +645,7 @@ define amdgpu_kernel void @local_system_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -721,7 +721,7 @@ define amdgpu_kernel void @local_system_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -799,7 +799,7 @@ define amdgpu_kernel void @local_system_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -880,7 +880,7 @@ define amdgpu_kernel void @local_system_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -966,7 +966,7 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1055,7 +1055,7 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1143,7 +1143,7 @@ define amdgpu_kernel void @local_system_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1238,7 +1238,7 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1336,7 +1336,7 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1424,13 +1424,12 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1511,13 +1510,12 @@ define amdgpu_kernel void @local_system_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
@@ -1601,13 +1599,12 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
@@ -1696,13 +1693,12 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1794,13 +1790,12 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1887,13 +1882,12 @@ define amdgpu_kernel void @local_system_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
@@ -1977,13 +1971,12 @@ define amdgpu_kernel void @local_system_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
@@ -2072,13 +2065,12 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2170,13 +2162,12 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2268,13 +2259,12 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2366,13 +2356,12 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2464,13 +2453,12 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2562,13 +2550,12 @@ define amdgpu_kernel void @local_system_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2660,13 +2647,12 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2758,13 +2744,12 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2854,13 +2839,12 @@ define amdgpu_kernel void @local_system_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -2953,13 +2937,12 @@ define amdgpu_kernel void @local_system_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3057,13 +3040,12 @@ define amdgpu_kernel void @local_system_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3164,13 +3146,12 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3272,13 +3253,12 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3375,13 +3355,12 @@ define amdgpu_kernel void @local_system_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3475,13 +3454,12 @@ define amdgpu_kernel void @local_system_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3580,13 +3558,12 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3688,13 +3665,12 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3796,13 +3772,12 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3904,13 +3879,12 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4012,13 +3986,12 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4120,13 +4093,12 @@ define amdgpu_kernel void @local_system_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4228,13 +4200,12 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4336,13 +4307,12 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4434,7 +4404,7 @@ define amdgpu_kernel void @local_system_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4522,7 +4492,7 @@ define amdgpu_kernel void @local_system_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4610,7 +4580,7 @@ define amdgpu_kernel void @local_system_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4698,7 +4668,7 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4778,7 +4748,7 @@ define amdgpu_kernel void @local_system_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4851,7 +4821,7 @@ define amdgpu_kernel void @local_system_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4924,7 +4894,7 @@ define amdgpu_kernel void @local_system_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4997,7 +4967,7 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -5070,7 +5040,7 @@ define amdgpu_kernel void @local_system_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5143,7 +5113,7 @@ define amdgpu_kernel void @local_system_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5216,7 +5186,7 @@ define amdgpu_kernel void @local_system_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5289,7 +5259,7 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5362,7 +5332,7 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5443,7 +5413,7 @@ define amdgpu_kernel void @local_system_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5531,7 +5501,7 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5619,7 +5589,7 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5703,13 +5673,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5785,13 +5754,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5867,13 +5835,12 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5949,13 +5916,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6031,13 +5997,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6113,13 +6078,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6195,13 +6159,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6277,13 +6240,12 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6359,13 +6321,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6441,13 +6402,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6523,13 +6483,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6605,13 +6564,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6687,13 +6645,12 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6769,13 +6726,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6851,13 +6807,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6941,13 +6896,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7039,13 +6993,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7137,13 +7090,12 @@ define amdgpu_kernel void @local_system_one_as_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7235,13 +7187,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7333,13 +7284,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7431,13 +7381,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7529,13 +7478,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7627,13 +7575,12 @@ define amdgpu_kernel void @local_system_one_as_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7725,13 +7672,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7823,13 +7769,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7921,13 +7866,12 @@ define amdgpu_kernel void @local_system_one_as_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -8019,13 +7963,12 @@ define amdgpu_kernel void @local_system_one_as_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -8117,13 +8060,12 @@ define amdgpu_kernel void @local_system_one_as_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -8215,13 +8157,12 @@ define amdgpu_kernel void @local_system_one_as_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -8313,13 +8254,12 @@ define amdgpu_kernel void @local_system_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_system_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
index 2f47903916565..97aced3707afb 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
@@ -62,8 +62,8 @@ define amdgpu_kernel void @local_volatile_load_0(
 ;
 ; SKIP-CACHE-INV-LABEL: local_volatile_load_0:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -139,8 +139,8 @@ define amdgpu_kernel void @local_volatile_load_1(
 ;
 ; SKIP-CACHE-INV-LABEL: local_volatile_load_1:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s3, 0xf000
@@ -215,8 +215,8 @@ define amdgpu_kernel void @local_volatile_store_0(
 ;
 ; SKIP-CACHE-INV-LABEL: local_volatile_store_0:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_load_dword s1, s[2:3], 0x0
@@ -289,8 +289,8 @@ define amdgpu_kernel void @local_volatile_store_1(
 ;
 ; SKIP-CACHE-INV-LABEL: local_volatile_store_1:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -360,7 +360,7 @@ define amdgpu_kernel void @local_volatile_workgroup_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_volatile_workgroup_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -423,7 +423,7 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_volatile_workgroup_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
index 3fde622dee480..046325f5437f8 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-wavefront.ll
@@ -57,7 +57,7 @@ define amdgpu_kernel void @local_wavefront_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -145,7 +145,7 @@ define amdgpu_kernel void @local_wavefront_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -233,7 +233,7 @@ define amdgpu_kernel void @local_wavefront_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -321,7 +321,7 @@ define amdgpu_kernel void @local_wavefront_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -401,7 +401,7 @@ define amdgpu_kernel void @local_wavefront_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -474,7 +474,7 @@ define amdgpu_kernel void @local_wavefront_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -547,7 +547,7 @@ define amdgpu_kernel void @local_wavefront_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -620,7 +620,7 @@ define amdgpu_kernel void @local_wavefront_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -693,7 +693,7 @@ define amdgpu_kernel void @local_wavefront_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -766,7 +766,7 @@ define amdgpu_kernel void @local_wavefront_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -839,7 +839,7 @@ define amdgpu_kernel void @local_wavefront_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -912,7 +912,7 @@ define amdgpu_kernel void @local_wavefront_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -985,7 +985,7 @@ define amdgpu_kernel void @local_wavefront_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1066,7 +1066,7 @@ define amdgpu_kernel void @local_wavefront_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1154,7 +1154,7 @@ define amdgpu_kernel void @local_wavefront_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1242,7 +1242,7 @@ define amdgpu_kernel void @local_wavefront_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1326,13 +1326,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1408,13 +1407,12 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1490,13 +1488,12 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1572,13 +1569,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1654,13 +1650,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1736,13 +1731,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1818,13 +1812,12 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1900,13 +1893,12 @@ define amdgpu_kernel void @local_wavefront_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1982,13 +1974,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2064,13 +2055,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2146,13 +2136,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2228,13 +2217,12 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2310,13 +2298,12 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2392,13 +2379,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2474,13 +2460,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -2564,13 +2549,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -2662,13 +2646,12 @@ define amdgpu_kernel void @local_wavefront_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -2760,13 +2743,12 @@ define amdgpu_kernel void @local_wavefront_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -2858,13 +2840,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -2956,13 +2937,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3054,13 +3034,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3152,13 +3131,12 @@ define amdgpu_kernel void @local_wavefront_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3250,13 +3228,12 @@ define amdgpu_kernel void @local_wavefront_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3348,13 +3325,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3446,13 +3422,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3544,13 +3519,12 @@ define amdgpu_kernel void @local_wavefront_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3642,13 +3616,12 @@ define amdgpu_kernel void @local_wavefront_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3740,13 +3713,12 @@ define amdgpu_kernel void @local_wavefront_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3838,13 +3810,12 @@ define amdgpu_kernel void @local_wavefront_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3936,13 +3907,12 @@ define amdgpu_kernel void @local_wavefront_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -4030,7 +4000,7 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4118,7 +4088,7 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4206,7 +4176,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4294,7 +4264,7 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4374,7 +4344,7 @@ define amdgpu_kernel void @local_wavefront_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4447,7 +4417,7 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4520,7 +4490,7 @@ define amdgpu_kernel void @local_wavefront_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4593,7 +4563,7 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4666,7 +4636,7 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4739,7 +4709,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4812,7 +4782,7 @@ define amdgpu_kernel void @local_wavefront_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4885,7 +4855,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4958,7 +4928,7 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5039,7 +5009,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5127,7 +5097,7 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5215,7 +5185,7 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5299,13 +5269,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5381,13 +5350,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5463,13 +5431,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5545,13 +5512,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5627,13 +5593,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5709,13 +5674,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5791,13 +5755,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5873,13 +5836,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5955,13 +5917,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6037,13 +5998,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6119,13 +6079,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6201,13 +6160,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6283,13 +6241,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6365,13 +6322,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6447,13 +6403,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6537,13 +6492,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_monotonic_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -6635,13 +6589,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -6733,13 +6686,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -6831,13 +6783,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -6929,13 +6880,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7027,13 +6977,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7125,13 +7074,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7223,13 +7171,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7321,13 +7268,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7419,13 +7365,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7517,13 +7462,12 @@ define amdgpu_kernel void @local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7615,13 +7559,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7713,13 +7656,12 @@ define amdgpu_kernel void @local_wavefront_one_as_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7811,13 +7753,12 @@ define amdgpu_kernel void @local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7909,13 +7850,12 @@ define amdgpu_kernel void @local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_wavefront_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
index 7094634268eed..580d7a814a7dd 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
@@ -57,7 +57,7 @@ define amdgpu_kernel void @local_workgroup_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -145,7 +145,7 @@ define amdgpu_kernel void @local_workgroup_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -234,7 +234,7 @@ define amdgpu_kernel void @local_workgroup_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -329,7 +329,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -413,7 +413,7 @@ define amdgpu_kernel void @local_workgroup_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -486,7 +486,7 @@ define amdgpu_kernel void @local_workgroup_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -564,7 +564,7 @@ define amdgpu_kernel void @local_workgroup_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -645,7 +645,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -721,7 +721,7 @@ define amdgpu_kernel void @local_workgroup_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -799,7 +799,7 @@ define amdgpu_kernel void @local_workgroup_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -880,7 +880,7 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -966,7 +966,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1055,7 +1055,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1143,7 +1143,7 @@ define amdgpu_kernel void @local_workgroup_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1238,7 +1238,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1336,7 +1336,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -1424,13 +1424,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -1511,13 +1510,12 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
@@ -1601,13 +1599,12 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
@@ -1696,13 +1693,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1794,13 +1790,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1887,13 +1882,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
@@ -1977,13 +1971,12 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
@@ -2072,13 +2065,12 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2170,13 +2162,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2268,13 +2259,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2366,13 +2356,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2464,13 +2453,12 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2562,13 +2550,12 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2660,13 +2647,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2758,13 +2744,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -2854,13 +2839,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -2953,13 +2937,12 @@ define amdgpu_kernel void @local_workgroup_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3057,13 +3040,12 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3164,13 +3146,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3272,13 +3253,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3375,13 +3355,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3475,13 +3454,12 @@ define amdgpu_kernel void @local_workgroup_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -3580,13 +3558,12 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3688,13 +3665,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3796,13 +3772,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -3904,13 +3879,12 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4012,13 +3986,12 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4120,13 +4093,12 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4228,13 +4200,12 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4336,13 +4307,12 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
@@ -4434,7 +4404,7 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_unordered_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4522,7 +4492,7 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4610,7 +4580,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4698,7 +4668,7 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_load(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_load:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -4778,7 +4748,7 @@ define amdgpu_kernel void @local_workgroup_one_as_unordered_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_unordered_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4851,7 +4821,7 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4924,7 +4894,7 @@ define amdgpu_kernel void @local_workgroup_one_as_release_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -4997,7 +4967,7 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_store(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_store:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s1
@@ -5070,7 +5040,7 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5143,7 +5113,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5216,7 +5186,7 @@ define amdgpu_kernel void @local_workgroup_one_as_release_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5289,7 +5259,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5362,7 +5332,7 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5443,7 +5413,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5531,7 +5501,7 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5619,7 +5589,7 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_ret_atomicrmw(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_ret_atomicrmw:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
@@ -5703,13 +5673,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5785,13 +5754,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5867,13 +5835,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -5949,13 +5916,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6031,13 +5997,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_monotonic_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6113,13 +6078,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6195,13 +6159,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6277,13 +6240,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6359,13 +6321,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6441,13 +6402,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_acquire_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6523,13 +6483,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6605,13 +6564,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6687,13 +6645,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6769,13 +6726,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6851,13 +6807,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_seq_cst_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_b32 v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_endpgm
 ;
@@ -6941,13 +6896,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_monotonic_ret_cmpxch
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7039,13 +6993,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7137,13 +7090,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7235,13 +7187,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7333,13 +7284,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7431,13 +7381,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7529,13 +7478,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7627,13 +7575,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7725,13 +7672,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7823,13 +7769,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_acquire_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -7921,13 +7866,12 @@ define amdgpu_kernel void @local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -8019,13 +7963,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acquire_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -8117,13 +8060,12 @@ define amdgpu_kernel void @local_workgroup_one_as_release_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_release_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -8215,13 +8157,12 @@ define amdgpu_kernel void @local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
@@ -8313,13 +8254,12 @@ define amdgpu_kernel void @local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg(
 ;
 ; SKIP-CACHE-INV-LABEL: local_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg:
 ; SKIP-CACHE-INV:       ; %bb.0: ; %entry
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 m0, -1
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s0
-; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s3
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s0
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v1, s2
+; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v2, s1
 ; SKIP-CACHE-INV-NEXT:    ds_cmpst_rtn_b32 v1, v0, v1, v2 offset:16
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    ds_write_b32 v0, v1
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
index 2af31ad68e050..0fd1e7a04e4b5 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-nontemporal.ll
@@ -81,8 +81,8 @@ define amdgpu_kernel void @private_nontemporal_load_0(
 ; SKIP-CACHE-INV-NEXT:    s_getpc_b64 s[4:5]
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s4, s0
 ; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
-; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s4, s3
@@ -208,8 +208,8 @@ define amdgpu_kernel void @private_nontemporal_load_1(
 ; SKIP-CACHE-INV-NEXT:    s_getpc_b64 s[4:5]
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s4, s0
 ; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
-; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_add_i32_e32 v0, vcc, s2, v0
@@ -336,8 +336,8 @@ define amdgpu_kernel void @private_nontemporal_store_0(
 ; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s4, s3
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s5, 0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_load_dword s1, s[2:3], 0x0
@@ -462,8 +462,8 @@ define amdgpu_kernel void @private_nontemporal_store_1(
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s4, s3
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s5, 0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_load_dword s1, s[2:3], 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
index c4c60fe16fb38..f605514382075 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-volatile.ll
@@ -81,8 +81,8 @@ define amdgpu_kernel void @private_volatile_load_0(
 ; SKIP-CACHE-INV-NEXT:    s_getpc_b64 s[4:5]
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s4, s0
 ; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
-; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_mov_b32_e32 v0, s2
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s4, s3
@@ -178,8 +178,8 @@ define amdgpu_kernel void @private_volatile_load_1(
 ; SKIP-CACHE-INV-NEXT:    s_getpc_b64 s[4:5]
 ; SKIP-CACHE-INV-NEXT:    s_mov_b32 s4, s0
 ; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
-; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dword s2, s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    v_add_i32_e32 v0, vcc, s2, v0
@@ -280,8 +280,8 @@ define amdgpu_kernel void @private_volatile_store_0(
 ; SKIP-CACHE-INV-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s4, s3
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s5, 0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_load_dword s1, s[2:3], 0x0
@@ -381,8 +381,8 @@ define amdgpu_kernel void @private_volatile_store_1(
 ; SKIP-CACHE-INV-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_add_u32 s4, s4, s3
-; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
-; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0xb
+; SKIP-CACHE-INV-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x0
+; SKIP-CACHE-INV-NEXT:    s_load_dword s0, s[0:1], 0x2
 ; SKIP-CACHE-INV-NEXT:    s_addc_u32 s5, s5, 0
 ; SKIP-CACHE-INV-NEXT:    s_waitcnt lgkmcnt(0)
 ; SKIP-CACHE-INV-NEXT:    s_load_dword s1, s[2:3], 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/store-local.128.ll b/llvm/test/CodeGen/AMDGPU/store-local.128.ll
index c432d9b4d46d4..d63c7890bc06a 100644
--- a/llvm/test/CodeGen/AMDGPU/store-local.128.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-local.128.ll
@@ -7,8 +7,8 @@
 define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v4, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
@@ -20,8 +20,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3
 ;
 ; GFX7-LABEL: store_lds_v4i32:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v4, s4
@@ -34,8 +34,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3
 ;
 ; GFX6-LABEL: store_lds_v4i32:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s6
@@ -49,8 +49,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3
 ; GFX10-LABEL: store_lds_v4i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v4, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
@@ -66,8 +66,8 @@ define amdgpu_kernel void @store_lds_v4i32(<4 x i32> addrspace(3)* %out, <4 x i3
 define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align1:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s7
@@ -110,8 +110,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v4i32_align1:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -163,8 +163,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX6-LABEL: store_lds_v4i32_align1:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s4
@@ -217,8 +217,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v4i32_align1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s7
@@ -265,8 +265,8 @@ define amdgpu_kernel void @store_lds_v4i32_align1(<4 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align2:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s7
@@ -285,8 +285,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v4i32_align2:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -314,8 +314,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX6-LABEL: store_lds_v4i32_align2:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s4
@@ -344,8 +344,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v4i32_align2:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s7
@@ -368,8 +368,8 @@ define amdgpu_kernel void @store_lds_v4i32_align2(<4 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align4:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s4
@@ -382,8 +382,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v4i32_align4:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -397,8 +397,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX6-LABEL: store_lds_v4i32_align4:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s4
@@ -413,8 +413,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v4i32_align4:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s4
@@ -431,8 +431,8 @@ define amdgpu_kernel void @store_lds_v4i32_align4(<4 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align8:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v4, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
@@ -444,8 +444,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v4i32_align8:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v4, s4
@@ -458,8 +458,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX6-LABEL: store_lds_v4i32_align8:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s6
@@ -473,8 +473,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v4i32_align8:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v4, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
@@ -490,8 +490,8 @@ define amdgpu_kernel void @store_lds_v4i32_align8(<4 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out, <4 x i32> %x) {
 ; GFX9-LABEL: store_lds_v4i32_align16:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v4, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
@@ -503,8 +503,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v4i32_align16:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v4, s4
@@ -517,8 +517,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out,
 ;
 ; GFX6-LABEL: store_lds_v4i32_align16:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s6
@@ -532,8 +532,8 @@ define amdgpu_kernel void @store_lds_v4i32_align16(<4 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v4i32_align16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v4, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
diff --git a/llvm/test/CodeGen/AMDGPU/store-local.96.ll b/llvm/test/CodeGen/AMDGPU/store-local.96.ll
index 913b7e46f4edc..816ab02bcff20 100644
--- a/llvm/test/CodeGen/AMDGPU/store-local.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-local.96.ll
@@ -7,8 +7,8 @@
 define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s5
@@ -19,8 +19,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3
 ;
 ; GFX7-LABEL: store_lds_v3i32:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -32,8 +32,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3
 ;
 ; GFX6-LABEL: store_lds_v3i32:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s4
@@ -47,8 +47,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3
 ; GFX10-LABEL: store_lds_v3i32:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s5
@@ -63,8 +63,8 @@ define amdgpu_kernel void @store_lds_v3i32(<3 x i32> addrspace(3)* %out, <3 x i3
 define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align1:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s6
@@ -98,8 +98,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v3i32_align1:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -140,8 +140,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX6-LABEL: store_lds_v3i32_align1:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s4
@@ -183,8 +183,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v3i32_align1:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s6
@@ -222,8 +222,8 @@ define amdgpu_kernel void @store_lds_v3i32_align1(<3 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align2:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s6
@@ -239,8 +239,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v3i32_align2:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -263,8 +263,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX6-LABEL: store_lds_v3i32_align2:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s4
@@ -288,8 +288,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v3i32_align2:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s6
@@ -309,8 +309,8 @@ define amdgpu_kernel void @store_lds_v3i32_align2(<3 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align4:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s4
@@ -322,8 +322,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v3i32_align4:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -336,8 +336,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX6-LABEL: store_lds_v3i32_align4:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s4
@@ -351,8 +351,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v3i32_align4:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s4
@@ -368,8 +368,8 @@ define amdgpu_kernel void @store_lds_v3i32_align4(<3 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align8:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s6
@@ -381,8 +381,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v3i32_align8:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX7-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX7-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v2, s4
@@ -395,8 +395,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX6-LABEL: store_lds_v3i32_align8:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s4
@@ -410,8 +410,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v3i32_align8:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX10-NEXT:    v_mov_b32_e32 v3, s6
@@ -427,8 +427,8 @@ define amdgpu_kernel void @store_lds_v3i32_align8(<3 x i32> addrspace(3)* %out,
 define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out, <3 x i32> %x) {
 ; GFX9-LABEL: store_lds_v3i32_align16:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s5
@@ -439,8 +439,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX7-LABEL: store_lds_v3i32_align16:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x4
+; GFX7-NEXT:    s_load_dword s0, s[0:1], 0x0
 ; GFX7-NEXT:    s_mov_b32 m0, -1
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX7-NEXT:    v_mov_b32_e32 v0, s4
@@ -452,8 +452,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out,
 ;
 ; GFX6-LABEL: store_lds_v3i32_align16:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dword s4, s[0:1], 0x0
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x4
 ; GFX6-NEXT:    s_mov_b32 m0, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s4
@@ -467,8 +467,8 @@ define amdgpu_kernel void @store_lds_v3i32_align16(<3 x i32> addrspace(3)* %out,
 ; GFX10-LABEL: store_lds_v3i32_align16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x10
+; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s5

From 8ff3c9e0be7ad4b50edfd30710b9c6eaf36713ff Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 18 Jan 2022 21:08:01 -0500
Subject: [PATCH 028/946] AMDGPU/GlobalISel: Fix selection of gfx90a FP atomics

The struct/raw forms for the buffer atomics now work as
expected. However, we're incorrectly handling the legacy form (which
we probably shouldn't handle at all). We also are not diagnosing the
use of the return value on gfx908. These will be addressed separately.
---
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  12 +
 llvm/lib/Target/AMDGPU/FLATInstructions.td    |   2 +-
 .../AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll  | 477 +++++++++++++++++-
 3 files changed, 487 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index ec86c446f564e..cc0c6b5ecf441 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1296,6 +1296,18 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
   if (ST.hasAtomicFaddInsts())
     Atomic.legalFor({{S32, GlobalPtr}});
 
+  if (ST.hasGFX90AInsts()) {
+    // These are legal with some caveats, and should have undergone expansion in
+    // the IR in most situations
+    // TODO: Move atomic expansion into legalizer
+    // TODO: Also supports <2 x f16>
+    Atomic.legalFor({
+        {S32, GlobalPtr},
+        {S64, GlobalPtr},
+        {S64, FlatPtr}
+      });
+  }
+
   // BUFFER/FLAT_ATOMIC_CMP_SWAP on GCN GPUs needs input marshalling, and output
   // demarshalling
   getActionDefinitionsBuilder(G_ATOMIC_CMPXCHG)
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index c7ec5308e6d09..c530d3cb49f04 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -915,7 +915,7 @@ class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueT
 class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
                            ValueType data_vt = vt> : GCNPat <
   (vt (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)),
-  (inst $vaddr, $data, $offset)
+  (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)
 >;
 
 class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat <
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
index dcd447eae9766..be4737d46c5a4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel < %s -march=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s -check-prefix=GFX90A
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX90A
 
 declare double @llvm.amdgcn.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i1)
 declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
@@ -408,6 +408,102 @@ main_body:
   ret void
 }
 
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)* %ptr) #1 {
+; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX90A-NEXT:    s_mov_b64 s[2:3], 0
+; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
+; GFX90A-NEXT:  .LBB24_1: ; %atomicrmw.start
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
+; GFX90A-NEXT:    buffer_wbl2
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    buffer_invl2
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
+; GFX90A-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
+; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX90A-NEXT:    s_cbranch_execnz .LBB24_1
+; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX90A-NEXT:    s_endpgm
+main_body:
+  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 seq_cst
+  ret void
+}
+
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent(double addrspace(1)* %ptr) #1 {
+; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
+; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    global_atomic_add_f64 v[0:1], v2, v[0:1], s[0:1] glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    s_endpgm
+main_body:
+  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst
+  ret void
+}
+
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrspace(1)* %ptr) #1 {
+; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_system:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX90A-NEXT:    s_mov_b64 s[2:3], 0
+; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
+; GFX90A-NEXT:  .LBB26_1: ; %atomicrmw.start
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
+; GFX90A-NEXT:    buffer_wbl2
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    buffer_invl2
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
+; GFX90A-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
+; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX90A-NEXT:    s_cbranch_execnz .LBB26_1
+; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX90A-NEXT:    s_endpgm
+main_body:
+  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("one-as") seq_cst
+  ret void
+}
+
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_flush(double addrspace(1)* %ptr) #0 {
+; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_flush:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
+; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    global_atomic_add_f64 v[0:1], v2, v[0:1], s[0:1] glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    s_endpgm
+main_body:
+  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst
+  ret void
+}
+
 define double @global_atomic_fadd_f64_rtn(double addrspace(1)* %ptr, double %data) {
 ; GFX90A-LABEL: global_atomic_fadd_f64_rtn:
 ; GFX90A:       ; %bb.0: ; %main_body
@@ -420,6 +516,84 @@ main_body:
   ret double %ret
 }
 
+define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double %data) #1 {
+; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    global_load_dwordx2 v[2:3], v[0:1], off
+; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
+; GFX90A-NEXT:  .LBB29_1: ; %atomicrmw.start
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
+; GFX90A-NEXT:    v_add_f64 v[2:3], v[4:5], 4.0
+; GFX90A-NEXT:    buffer_wbl2
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    buffer_invl2
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
+; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX90A-NEXT:    s_cbranch_execnz .LBB29_1
+; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+main_body:
+  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 seq_cst
+  ret double %ret
+}
+
+define double @global_atomic_fadd_f64_rtn_pat_agent(double addrspace(1)* %ptr, double %data) #1 {
+; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_agent:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
+; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+main_body:
+  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst
+  ret double %ret
+}
+
+define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr, double %data) #1 {
+; GFX90A-LABEL: global_atomic_fadd_f64_rtn_pat_system:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    global_load_dwordx2 v[2:3], v[0:1], off
+; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
+; GFX90A-NEXT:  .LBB31_1: ; %atomicrmw.start
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
+; GFX90A-NEXT:    v_add_f64 v[2:3], v[4:5], 4.0
+; GFX90A-NEXT:    buffer_wbl2
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5], off glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    buffer_invl2
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
+; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX90A-NEXT:    s_cbranch_execnz .LBB31_1
+; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+main_body:
+  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("one-as") seq_cst
+  ret double %ret
+}
+
 define double @global_atomic_fmax_f64_rtn(double addrspace(1)* %ptr, double %data) {
 ; GFX90A-LABEL: global_atomic_fmax_f64_rtn:
 ; GFX90A:       ; %bb.0: ; %main_body
@@ -444,6 +618,195 @@ main_body:
   ret double %ret
 }
 
+define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double addrspace(1)* %ptr) {
+; GFX90A-LABEL: global_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX90A-NEXT:    s_mov_b64 s[2:3], 0
+; GFX90A-NEXT:    v_mov_b32_e32 v4, 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
+; GFX90A-NEXT:  .LBB34_1: ; %atomicrmw.start
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    global_atomic_cmpswap_x2 v[0:1], v4, v[0:3], s[0:1] glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
+; GFX90A-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
+; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX90A-NEXT:    s_cbranch_execnz .LBB34_1
+; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX90A-NEXT:    s_endpgm
+main_body:
+  %ret = atomicrmw fadd double addrspace(1)* %ptr, double 4.0 syncscope("agent") seq_cst
+  ret void
+}
+
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 {
+; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX90A-NEXT:    s_mov_b64 s[2:3], 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
+; GFX90A-NEXT:  .LBB35_1: ; %atomicrmw.start
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
+; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90A-NEXT:    buffer_wbl2
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    buffer_invl2
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
+; GFX90A-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
+; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX90A-NEXT:    s_cbranch_execnz .LBB35_1
+; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX90A-NEXT:    s_endpgm
+main_body:
+  %ret = atomicrmw fadd double* %ptr, double 4.0 seq_cst
+  ret void
+}
+
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent(double* %ptr) #1 {
+; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
+; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    s_endpgm
+main_body:
+  %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst
+  ret void
+}
+
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #1 {
+; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_system:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX90A-NEXT:    s_mov_b64 s[2:3], 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
+; GFX90A-NEXT:  .LBB37_1: ; %atomicrmw.start
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
+; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90A-NEXT:    buffer_wbl2
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    buffer_invl2
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
+; GFX90A-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
+; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX90A-NEXT:    s_cbranch_execnz .LBB37_1
+; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX90A-NEXT:    s_endpgm
+main_body:
+  %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("one-as") seq_cst
+  ret void
+}
+
+define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 {
+; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
+; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
+; GFX90A-NEXT:  .LBB38_1: ; %atomicrmw.start
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
+; GFX90A-NEXT:    v_add_f64 v[2:3], v[4:5], 4.0
+; GFX90A-NEXT:    buffer_wbl2
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    buffer_invl2
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
+; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX90A-NEXT:    s_cbranch_execnz .LBB38_1
+; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+main_body:
+  %ret = atomicrmw fadd double* %ptr, double 4.0 seq_cst
+  ret double %ret
+}
+
+define double @flat_atomic_fadd_f64_rtn_pat_agent(double* %ptr) #1 {
+; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_agent:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
+; GFX90A-NEXT:    v_mov_b32_e32 v3, 0x40100000
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+main_body:
+  %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst
+  ret double %ret
+}
+
+define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 {
+; GFX90A-LABEL: flat_atomic_fadd_f64_rtn_pat_system:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
+; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
+; GFX90A-NEXT:  .LBB40_1: ; %atomicrmw.start
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
+; GFX90A-NEXT:    v_add_f64 v[2:3], v[4:5], 4.0
+; GFX90A-NEXT:    buffer_wbl2
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[2:3], v[0:1], v[2:5] glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0)
+; GFX90A-NEXT:    buffer_invl2
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
+; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
+; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
+; GFX90A-NEXT:    s_cbranch_execnz .LBB40_1
+; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
+; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
+; GFX90A-NEXT:    v_mov_b32_e32 v1, v3
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+main_body:
+  %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("one-as") seq_cst
+  ret double %ret
+}
+
 define amdgpu_kernel void @flat_atomic_fadd_f64_noret(double* %ptr, double %data) {
 ; GFX90A-LABEL: flat_atomic_fadd_f64_noret:
 ; GFX90A:       ; %bb.0: ; %main_body
@@ -470,6 +833,35 @@ main_body:
   ret double %ret
 }
 
+define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %ptr) {
+; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX90A-NEXT:    s_mov_b64 s[2:3], 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
+; GFX90A-NEXT:  .LBB43_1: ; %atomicrmw.start
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
+; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], s[0:1], s[0:1] op_sel:[0,1]
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    buffer_wbinvl1_vol
+; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
+; GFX90A-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
+; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
+; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[2:3]
+; GFX90A-NEXT:    s_cbranch_execnz .LBB43_1
+; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX90A-NEXT:    s_endpgm
+main_body:
+  %ret = atomicrmw fadd double* %ptr, double 4.0 syncscope("agent") seq_cst
+  ret void
+}
+
 define amdgpu_kernel void @flat_atomic_fmin_f64_noret(double* %ptr, double %data) {
 ; GFX90A-LABEL: flat_atomic_fmin_f64_noret:
 ; GFX90A:       ; %bb.0: ; %main_body
@@ -551,7 +943,7 @@ main_body:
   ret double %ret
 }
 
-define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(double addrspace(3)* %ptr) {
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(double addrspace(3)* %ptr) #1 {
 ; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_load_dword s0, s[0:1], 0x24
@@ -568,7 +960,52 @@ main_body:
   ret void
 }
 
-define double @local_atomic_fadd_f64_rtn_pat(double addrspace(3)* %ptr, double %data) {
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush(double addrspace(3)* %ptr) #0 {
+; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_load_dword s0, s[0:1], 0x24
+; GFX90A-NEXT:    v_mov_b32_e32 v0, 0
+; GFX90A-NEXT:    v_mov_b32_e32 v1, 0x40100000
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v2, s0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    ds_add_rtn_f64 v[0:1], v2, v[0:1]
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_endpgm
+main_body:
+  %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
+  ret void
+}
+
+define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double addrspace(3)* %ptr) #4 {
+; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat_flush_safe:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_load_dword s2, s[0:1], 0x24
+; GFX90A-NEXT:    s_mov_b64 s[0:1], 0
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v0, s2
+; GFX90A-NEXT:    ds_read_b64 v[0:1], v0
+; GFX90A-NEXT:  .LBB52_1: ; %atomicrmw.start
+; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    v_add_f64 v[2:3], v[0:1], 4.0
+; GFX90A-NEXT:    v_mov_b32_e32 v4, s2
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    ds_cmpst_rtn_b64 v[2:3], v4, v[0:1], v[2:3]
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[0:1]
+; GFX90A-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
+; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[0,1]
+; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[0:1]
+; GFX90A-NEXT:    s_cbranch_execnz .LBB52_1
+; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
+; GFX90A-NEXT:    s_endpgm
+main_body:
+  %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
+  ret void
+}
+
+define double @local_atomic_fadd_f64_rtn_pat(double addrspace(3)* %ptr, double %data) #1 {
 ; GFX90A-LABEL: local_atomic_fadd_f64_rtn_pat:
 ; GFX90A:       ; %bb.0: ; %main_body
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -582,3 +1019,37 @@ main_body:
   %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst
   ret double %ret
 }
+
+define double @local_atomic_fadd_f64_rtn_ieee_unsafe(double addrspace(3)* %ptr, double %data) #2 {
+; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_unsafe:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
+; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
+; GFX90A-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[4:5]
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+main_body:
+  %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0)
+  ret double %ret
+}
+
+define double @local_atomic_fadd_f64_rtn_ieee_safe(double addrspace(3)* %ptr, double %data) #3 {
+; GFX90A-LABEL: local_atomic_fadd_f64_rtn_ieee_safe:
+; GFX90A:       ; %bb.0: ; %main_body
+; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-NEXT:    v_mov_b32_e32 v4, v1
+; GFX90A-NEXT:    v_mov_b32_e32 v5, v2
+; GFX90A-NEXT:    ds_add_rtn_f64 v[0:1], v0, v[4:5]
+; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX90A-NEXT:    s_setpc_b64 s[30:31]
+main_body:
+  %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0)
+  ret double %ret
+}
+
+attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
+attributes #1 = { "amdgpu-unsafe-fp-atomics"="true" }
+attributes #2 = { "denormal-fp-math"="ieee,ieee" "amdgpu-unsafe-fp-atomics"="true" }
+attributes #3 = { "denormal-fp-math"="ieee,ieee" }
+attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" }

From be7e938e2712f3d84d1775ecdb1bc6784ea114f7 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 19 Jan 2022 10:45:37 -0500
Subject: [PATCH 029/946] AMDGPU/GlobalISel: Stop handling
 llvm.amdgcn.buffer.atomic.fadd

This code is not structured to handle the legacy buffer intrinsics and
was miscompiling them.
---
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  2 -
 .../AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll  | 95 +++++--------------
 2 files changed, 22 insertions(+), 75 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index cc0c6b5ecf441..93bddd9681ed7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4167,7 +4167,6 @@ static unsigned getBufferAtomicPseudo(Intrinsic::ID IntrID) {
   case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
   case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
     return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_CMPSWAP;
-  case Intrinsic::amdgcn_buffer_atomic_fadd:
   case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
   case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
     return AMDGPU::G_AMDGPU_BUFFER_ATOMIC_FADD;
@@ -5186,7 +5185,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
   case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
   case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
   case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
-  case Intrinsic::amdgcn_buffer_atomic_fadd:
   case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
   case Intrinsic::amdgcn_struct_buffer_atomic_fmin:
   case Intrinsic::amdgcn_raw_buffer_atomic_fmax:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
index be4737d46c5a4..f6a84f553f3b7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fp64-atomics-gfx90a.ll
@@ -1,7 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck %s -check-prefix=GFX90A
 
-declare double @llvm.amdgcn.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i1)
 declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
 declare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg)
 declare double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32, i32 immarg)
@@ -16,56 +15,6 @@ declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double
 declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data)
 declare double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* nocapture, double, i32, i32, i1)
 
-define amdgpu_kernel void @buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
-; GFX90A-LABEL: buffer_atomic_add_noret_f64:
-; GFX90A:       ; %bb.0: ; %main_body
-; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX90A-NEXT:    s_load_dword s8, s[0:1], 0x3c
-; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
-; GFX90A-NEXT:    v_mov_b32_e32 v2, s8
-; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 offen glc
-; GFX90A-NEXT:    s_endpgm
-main_body:
-  %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
-  ret void
-}
-
-define amdgpu_ps void @buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
-; GFX90A-LABEL: buffer_atomic_add_rtn_f64:
-; GFX90A:       ; %bb.0: ; %main_body
-; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen glc
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
-; GFX90A-NEXT:    s_endpgm
-main_body:
-  %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0)
-  store double %ret, double* undef
-  ret void
-}
-
-define amdgpu_kernel void @buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) {
-; GFX90A-LABEL: buffer_atomic_add_rtn_f64_off4_slc:
-; GFX90A:       ; %bb.0: ; %main_body
-; GFX90A-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX90A-NEXT:    s_load_dword s10, s[0:1], 0x3c
-; GFX90A-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX90A-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x44
-; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
-; GFX90A-NEXT:    v_mov_b32_e32 v2, s10
-; GFX90A-NEXT:    buffer_atomic_add_f64 v[0:1], v2, s[4:7], 4 offen glc slc scc /* unexpected cache policy bit */
-; GFX90A-NEXT:    v_mov_b32_e32 v2, 0
-; GFX90A-NEXT:    s_waitcnt vmcnt(0)
-; GFX90A-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
-; GFX90A-NEXT:    s_endpgm
-main_body:
-  %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1)
-  store double %ret, double addrspace(1)* %out, align 8
-  ret void
-}
-
 define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) {
 ; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64:
 ; GFX90A:       ; %bb.0: ; %main_body
@@ -418,7 +367,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)*
 ; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
-; GFX90A-NEXT:  .LBB24_1: ; %atomicrmw.start
+; GFX90A-NEXT:  .LBB21_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
 ; GFX90A-NEXT:    buffer_wbl2
@@ -431,7 +380,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)*
 ; GFX90A-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
 ; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX90A-NEXT:    s_cbranch_execnz .LBB24_1
+; GFX90A-NEXT:    s_cbranch_execnz .LBB21_1
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:
@@ -466,7 +415,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp
 ; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
-; GFX90A-NEXT:  .LBB26_1: ; %atomicrmw.start
+; GFX90A-NEXT:  .LBB23_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
 ; GFX90A-NEXT:    buffer_wbl2
@@ -479,7 +428,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp
 ; GFX90A-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
 ; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX90A-NEXT:    s_cbranch_execnz .LBB26_1
+; GFX90A-NEXT:    s_cbranch_execnz .LBB23_1
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:
@@ -522,7 +471,7 @@ define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    global_load_dwordx2 v[2:3], v[0:1], off
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:  .LBB29_1: ; %atomicrmw.start
+; GFX90A-NEXT:  .LBB26_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
@@ -536,7 +485,7 @@ define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
 ; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX90A-NEXT:    s_cbranch_execnz .LBB29_1
+; GFX90A-NEXT:    s_cbranch_execnz .LBB26_1
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
@@ -569,7 +518,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr,
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    global_load_dwordx2 v[2:3], v[0:1], off
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:  .LBB31_1: ; %atomicrmw.start
+; GFX90A-NEXT:  .LBB28_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0)
 ; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
@@ -583,7 +532,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr,
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
 ; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX90A-NEXT:    s_cbranch_execnz .LBB31_1
+; GFX90A-NEXT:    s_cbranch_execnz .LBB28_1
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
@@ -628,7 +577,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double ad
 ; GFX90A-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x0
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
-; GFX90A-NEXT:  .LBB34_1: ; %atomicrmw.start
+; GFX90A-NEXT:  .LBB31_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
@@ -639,7 +588,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double ad
 ; GFX90A-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
 ; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX90A-NEXT:    s_cbranch_execnz .LBB34_1
+; GFX90A-NEXT:    s_cbranch_execnz .LBB31_1
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:
@@ -655,7 +604,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 {
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
 ; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
-; GFX90A-NEXT:  .LBB35_1: ; %atomicrmw.start
+; GFX90A-NEXT:  .LBB32_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
@@ -670,7 +619,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 {
 ; GFX90A-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
 ; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX90A-NEXT:    s_cbranch_execnz .LBB35_1
+; GFX90A-NEXT:    s_cbranch_execnz .LBB32_1
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:
@@ -704,7 +653,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
 ; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
-; GFX90A-NEXT:  .LBB37_1: ; %atomicrmw.start
+; GFX90A-NEXT:  .LBB34_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
@@ -720,7 +669,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #
 ; GFX90A-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
 ; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX90A-NEXT:    s_cbranch_execnz .LBB37_1
+; GFX90A-NEXT:    s_cbranch_execnz .LBB34_1
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:
@@ -734,7 +683,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:  .LBB38_1: ; %atomicrmw.start
+; GFX90A-NEXT:  .LBB35_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
@@ -748,7 +697,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 {
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
 ; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX90A-NEXT:    s_cbranch_execnz .LBB38_1
+; GFX90A-NEXT:    s_cbranch_execnz .LBB35_1
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
@@ -781,7 +730,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 {
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
 ; GFX90A-NEXT:    s_mov_b64 s[4:5], 0
-; GFX90A-NEXT:  .LBB40_1: ; %atomicrmw.start
+; GFX90A-NEXT:  .LBB37_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
@@ -796,7 +745,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 {
 ; GFX90A-NEXT:    v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
 ; GFX90A-NEXT:    s_or_b64 s[4:5], vcc, s[4:5]
 ; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[4:5]
-; GFX90A-NEXT:    s_cbranch_execnz .LBB40_1
+; GFX90A-NEXT:    s_cbranch_execnz .LBB37_1
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX90A-NEXT:    v_mov_b32_e32 v0, v2
@@ -841,7 +790,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %pt
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
 ; GFX90A-NEXT:    flat_load_dwordx2 v[2:3], v[0:1]
-; GFX90A-NEXT:  .LBB43_1: ; %atomicrmw.start
+; GFX90A-NEXT:  .LBB40_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX90A-NEXT:    v_add_f64 v[0:1], v[2:3], 4.0
@@ -854,7 +803,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %pt
 ; GFX90A-NEXT:    s_or_b64 s[2:3], vcc, s[2:3]
 ; GFX90A-NEXT:    v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
 ; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[2:3]
-; GFX90A-NEXT:    s_cbranch_execnz .LBB43_1
+; GFX90A-NEXT:    s_cbranch_execnz .LBB40_1
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:
@@ -985,7 +934,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double add
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX90A-NEXT:    ds_read_b64 v[0:1], v0
-; GFX90A-NEXT:  .LBB52_1: ; %atomicrmw.start
+; GFX90A-NEXT:  .LBB49_1: ; %atomicrmw.start
 ; GFX90A-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX90A-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX90A-NEXT:    v_add_f64 v[2:3], v[0:1], 4.0
@@ -997,7 +946,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double add
 ; GFX90A-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
 ; GFX90A-NEXT:    v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[0,1]
 ; GFX90A-NEXT:    s_andn2_b64 exec, exec, s[0:1]
-; GFX90A-NEXT:    s_cbranch_execnz .LBB52_1
+; GFX90A-NEXT:    s_cbranch_execnz .LBB49_1
 ; GFX90A-NEXT:  ; %bb.2: ; %atomicrmw.end
 ; GFX90A-NEXT:    s_endpgm
 main_body:

From 75017db08cd3d138aa46b996ce03394d02632ef5 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser@codeplay.com>
Date: Thu, 20 Jan 2022 17:07:11 +0000
Subject: [PATCH 030/946] [RISCV] Add tests for commuted vector/scalar VP
 patterns

This patch adds a variety of tests checking that we can match
vector/scalar instructions against masked VP intrinsics when the splat
is on the LHS. At this stage, we can't, despite us having
ostensibly-commutable ISel patterns for them. The use of V0 as the mask
operand interferes with the auto-generated ISel table.
---
 .../RISCV/rvv/fixed-vectors-vadd-vp.ll        | 62 ++++++++++++-------
 .../RISCV/rvv/fixed-vectors-vand-vp.ll        | 28 +++++++++
 .../RISCV/rvv/fixed-vectors-vfadd-vp.ll       | 28 +++++++++
 .../RISCV/rvv/fixed-vectors-vmul-vp.ll        | 14 +++++
 .../CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll | 14 +++++
 .../RISCV/rvv/fixed-vectors-vxor-vp.ll        | 14 +++++
 llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll        | 54 ++++++++++------
 llvm/test/CodeGen/RISCV/rvv/vand-vp.ll        | 14 +++++
 llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll       | 30 +++++++++
 llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll        | 14 +++++
 llvm/test/CodeGen/RISCV/rvv/vor-vp.ll         | 28 +++++++++
 llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll        | 14 +++++
 12 files changed, 270 insertions(+), 44 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
index 3f89d62501b8e..bd29f51a93919 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
@@ -128,6 +128,20 @@ define <4 x i8> @vadd_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl
   ret <4 x i8> %v
 }
 
+define <4 x i8> @vadd_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_v4i8_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.add.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
 define <4 x i8> @vadd_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
 ; CHECK-LABEL: vadd_vx_v4i8_unmasked:
 ; CHECK:       # %bb.0:
@@ -407,17 +421,17 @@ define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %ev
 ; CHECK-NEXT:    addi a3, a1, -128
 ; CHECK-NEXT:    vmv1r.v v24, v0
 ; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:    bltu a1, a3, .LBB31_2
+; CHECK-NEXT:    bltu a1, a3, .LBB32_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a3
-; CHECK-NEXT:  .LBB31_2:
+; CHECK-NEXT:  .LBB32_2:
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v25
 ; CHECK-NEXT:    vadd.vi v16, v16, -1, v0.t
-; CHECK-NEXT:    bltu a1, a2, .LBB31_4
+; CHECK-NEXT:    bltu a1, a2, .LBB32_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    li a1, 128
-; CHECK-NEXT:  .LBB31_4:
+; CHECK-NEXT:  .LBB32_4:
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vadd.vi v8, v8, -1, v0.t
@@ -433,17 +447,17 @@ define <256 x i8> @vadd_vi_v258i8_unmasked(<256 x i8> %va, i32 zeroext %evl) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi a1, a0, -128
 ; CHECK-NEXT:    li a2, 0
-; CHECK-NEXT:    bltu a0, a1, .LBB32_2
+; CHECK-NEXT:    bltu a0, a1, .LBB33_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a1
-; CHECK-NEXT:  .LBB32_2:
+; CHECK-NEXT:  .LBB33_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, mu
 ; CHECK-NEXT:    li a1, 128
 ; CHECK-NEXT:    vadd.vi v16, v16, -1
-; CHECK-NEXT:    bltu a0, a1, .LBB32_4
+; CHECK-NEXT:    bltu a0, a1, .LBB33_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    li a0, 128
-; CHECK-NEXT:  .LBB32_4:
+; CHECK-NEXT:  .LBB33_4:
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
 ; CHECK-NEXT:    vadd.vi v8, v8, -1
 ; CHECK-NEXT:    ret
@@ -1528,17 +1542,17 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
 ; RV32-NEXT:    addi a2, a0, -16
 ; RV32-NEXT:    vmv.v.i v24, -1
-; RV32-NEXT:    bltu a0, a2, .LBB107_2
+; RV32-NEXT:    bltu a0, a2, .LBB108_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a1, a2
-; RV32-NEXT:  .LBB107_2:
+; RV32-NEXT:  .LBB108_2:
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    li a1, 16
 ; RV32-NEXT:    vadd.vv v16, v16, v24, v0.t
-; RV32-NEXT:    bltu a0, a1, .LBB107_4
+; RV32-NEXT:    bltu a0, a1, .LBB108_4
 ; RV32-NEXT:  # %bb.3:
 ; RV32-NEXT:    li a0, 16
-; RV32-NEXT:  .LBB107_4:
+; RV32-NEXT:  .LBB108_4:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
 ; RV32-NEXT:    vmv1r.v v0, v1
 ; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
@@ -1551,17 +1565,17 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
 ; RV64-NEXT:    addi a2, a0, -16
 ; RV64-NEXT:    vslidedown.vi v0, v0, 2
-; RV64-NEXT:    bltu a0, a2, .LBB107_2
+; RV64-NEXT:    bltu a0, a2, .LBB108_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a1, a2
-; RV64-NEXT:  .LBB107_2:
+; RV64-NEXT:  .LBB108_2:
 ; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV64-NEXT:    li a1, 16
 ; RV64-NEXT:    vadd.vi v16, v16, -1, v0.t
-; RV64-NEXT:    bltu a0, a1, .LBB107_4
+; RV64-NEXT:    bltu a0, a1, .LBB108_4
 ; RV64-NEXT:  # %bb.3:
 ; RV64-NEXT:    li a0, 16
-; RV64-NEXT:  .LBB107_4:
+; RV64-NEXT:  .LBB108_4:
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    vadd.vi v8, v8, -1, v0.t
@@ -1580,17 +1594,17 @@ define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
 ; RV32-NEXT:    addi a2, a0, -16
 ; RV32-NEXT:    vmv.v.i v24, -1
-; RV32-NEXT:    bltu a0, a2, .LBB108_2
+; RV32-NEXT:    bltu a0, a2, .LBB109_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a1, a2
-; RV32-NEXT:  .LBB108_2:
+; RV32-NEXT:  .LBB109_2:
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    li a1, 16
 ; RV32-NEXT:    vadd.vv v16, v16, v24
-; RV32-NEXT:    bltu a0, a1, .LBB108_4
+; RV32-NEXT:    bltu a0, a1, .LBB109_4
 ; RV32-NEXT:  # %bb.3:
 ; RV32-NEXT:    li a0, 16
-; RV32-NEXT:  .LBB108_4:
+; RV32-NEXT:  .LBB109_4:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
 ; RV32-NEXT:    vadd.vv v8, v8, v24
 ; RV32-NEXT:    ret
@@ -1599,17 +1613,17 @@ define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    addi a1, a0, -16
 ; RV64-NEXT:    li a2, 0
-; RV64-NEXT:    bltu a0, a1, .LBB108_2
+; RV64-NEXT:    bltu a0, a1, .LBB109_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:  .LBB108_2:
+; RV64-NEXT:  .LBB109_2:
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
 ; RV64-NEXT:    li a1, 16
 ; RV64-NEXT:    vadd.vi v16, v16, -1
-; RV64-NEXT:    bltu a0, a1, .LBB108_4
+; RV64-NEXT:    bltu a0, a1, .LBB109_4
 ; RV64-NEXT:  # %bb.3:
 ; RV64-NEXT:    li a0, 16
-; RV64-NEXT:  .LBB108_4:
+; RV64-NEXT:  .LBB109_4:
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
 ; RV64-NEXT:    vadd.vi v8, v8, -1
 ; RV64-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll
index ea41aba5e5d71..419a9d4b5b1bc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll
@@ -52,6 +52,20 @@ define <2 x i8> @vand_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl
   ret <2 x i8> %v
 }
 
+define <2 x i8> @vand_vx_v2i8_commute(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vand_vx_v2i8_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
+; CHECK-NEXT:    vand.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
 define <2 x i8> @vand_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
 ; CHECK-LABEL: vand_vx_v2i8_unmasked:
 ; CHECK:       # %bb.0:
@@ -66,6 +80,20 @@ define <2 x i8> @vand_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
   ret <2 x i8> %v
 }
 
+define <2 x i8> @vand_vx_v2i8_unmasked_commute(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vand_vx_v2i8_unmasked_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
+; CHECK-NEXT:    vand.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> undef, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.and.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
 define <2 x i8> @vand_vi_v2i8(<2 x i8> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vand_vi_v2i8:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
index c368a2f8bc40d..215daf4f92f51 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
@@ -252,6 +252,20 @@ define <2 x float> @vfadd_vf_v2f32(<2 x float> %va, float %b, <2 x i1> %m, i32 z
   ret <2 x float> %v
 }
 
+define <2 x float> @vfadd_vf_v2f32_commute(<2 x float> %va, float %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_v2f32_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmv.v.f v9, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x float> undef, float %b, i32 0
+  %vb = shufflevector <2 x float> %elt.head, <2 x float> undef, <2 x i32> zeroinitializer
+  %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %vb, <2 x float> %va, <2 x i1> %m, i32 %evl)
+  ret <2 x float> %v
+}
+
 define <2 x float> @vfadd_vf_v2f32_unmasked(<2 x float> %va, float %b, i32 zeroext %evl) {
 ; CHECK-LABEL: vfadd_vf_v2f32_unmasked:
 ; CHECK:       # %bb.0:
@@ -266,6 +280,20 @@ define <2 x float> @vfadd_vf_v2f32_unmasked(<2 x float> %va, float %b, i32 zeroe
   ret <2 x float> %v
 }
 
+define <2 x float> @vfadd_vf_v2f32_unmasked_commute(<2 x float> %va, float %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_v2f32_unmasked_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x float> undef, float %b, i32 0
+  %vb = shufflevector <2 x float> %elt.head, <2 x float> undef, <2 x i32> zeroinitializer
+  %head = insertelement <2 x i1> undef, i1 true, i32 0
+  %m = shufflevector <2 x i1> %head, <2 x i1> undef, <2 x i32> zeroinitializer
+  %v = call <2 x float> @llvm.vp.fadd.v2f32(<2 x float> %vb, <2 x float> %va, <2 x i1> %m, i32 %evl)
+  ret <2 x float> %v
+}
+
 declare <4 x float> @llvm.vp.fadd.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
 
 define <4 x float> @vfadd_vv_v4f32(<4 x float> %va, <4 x float> %b, <4 x i1> %m, i32 zeroext %evl) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
index 6907f6ae61688..976d12eab3d72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
@@ -352,6 +352,20 @@ define <8 x i16> @vmul_vx_v8i16(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext
   ret <8 x i16> %v
 }
 
+define <8 x i16> @vmul_vx_v8i16_commute(<8 x i16> %va, i16 %b, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_v8i16_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vmul.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> undef, i16 %b, i32 0
+  %vb = shufflevector <8 x i16> %elt.head, <8 x i16> undef, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.mul.v8i16(<8 x i16> %vb, <8 x i16> %va, <8 x i1> %m, i32 %evl)
+  ret <8 x i16> %v
+}
+
 define <8 x i16> @vmul_vx_v8i16_unmasked(<8 x i16> %va, i16 %b, i32 zeroext %evl) {
 ; CHECK-LABEL: vmul_vx_v8i16_unmasked:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll
index f623f11a56e20..b1807c1b29de6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll
@@ -128,6 +128,20 @@ define <4 x i8> @vor_vx_v4i8(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl)
   ret <4 x i8> %v
 }
 
+define <4 x i8> @vor_vx_v4i8_commute(<4 x i8> %va, i8 %b, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vor_vx_v4i8_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> undef, i8 %b, i32 0
+  %vb = shufflevector <4 x i8> %elt.head, <4 x i8> undef, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.or.v4i8(<4 x i8> %vb, <4 x i8> %va, <4 x i1> %m, i32 %evl)
+  ret <4 x i8> %v
+}
+
 define <4 x i8> @vor_vx_v4i8_unmasked(<4 x i8> %va, i8 %b, i32 zeroext %evl) {
 ; CHECK-LABEL: vor_vx_v4i8_unmasked:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll
index 4edd829aeb1e2..207f12ff3822e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll
@@ -52,6 +52,20 @@ define <2 x i8> @vxor_vx_v2i8(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl
   ret <2 x i8> %v
 }
 
+define <2 x i8> @vxor_vx_v2i8_commute(<2 x i8> %va, i8 %b, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vxor_vx_v2i8_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
+; CHECK-NEXT:    vxor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> undef, i8 %b, i32 0
+  %vb = shufflevector <2 x i8> %elt.head, <2 x i8> undef, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.xor.v2i8(<2 x i8> %vb, <2 x i8> %va, <2 x i1> %m, i32 %evl)
+  ret <2 x i8> %v
+}
+
 define <2 x i8> @vxor_vx_v2i8_unmasked(<2 x i8> %va, i8 %b, i32 zeroext %evl) {
 ; CHECK-LABEL: vxor_vx_v2i8_unmasked:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
index 13c41718676f9..4206edadadb85 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
@@ -54,6 +54,20 @@ define <vscale x 1 x i8> @vadd_vx_nxv1i8(<vscale x 1 x i8> %va, i8 %b, <vscale x
   ret <vscale x 1 x i8> %v
 }
 
+define <vscale x 1 x i8> @vadd_vx_nxv1i8_commute(<vscale x 1 x i8> %va, i8 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vadd_vx_nxv1i8_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
+; CHECK-NEXT:    vadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> undef, i8 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> undef, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.add.nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i8> %va, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
 define <vscale x 1 x i8> @vadd_vx_nxv1i8_unmasked(<vscale x 1 x i8> %va, i8 %b, i32 zeroext %evl) {
 ; CHECK-LABEL: vadd_vx_nxv1i8_unmasked:
 ; CHECK:       # %bb.0:
@@ -636,20 +650,20 @@ define <vscale x 128 x i8> @vadd_vi_nxv128i8(<vscale x 128 x i8> %va, <vscale x
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 3
 ; CHECK-NEXT:    mv a3, a1
-; CHECK-NEXT:    bltu a1, a2, .LBB49_2
+; CHECK-NEXT:    bltu a1, a2, .LBB50_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a3, a2
-; CHECK-NEXT:  .LBB49_2:
+; CHECK-NEXT:  .LBB50_2:
 ; CHECK-NEXT:    li a4, 0
 ; CHECK-NEXT:    vsetvli a5, zero, e8, m8, ta, mu
 ; CHECK-NEXT:    vlm.v v24, (a0)
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, mu
 ; CHECK-NEXT:    sub a0, a1, a2
 ; CHECK-NEXT:    vadd.vi v8, v8, -1, v0.t
-; CHECK-NEXT:    bltu a1, a0, .LBB49_4
+; CHECK-NEXT:    bltu a1, a0, .LBB50_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a4, a0
-; CHECK-NEXT:  .LBB49_4:
+; CHECK-NEXT:  .LBB50_4:
 ; CHECK-NEXT:    vsetvli zero, a4, e8, m8, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vadd.vi v16, v16, -1, v0.t
@@ -666,18 +680,18 @@ define <vscale x 128 x i8> @vadd_vi_nxv128i8_unmasked(<vscale x 128 x i8> %va, i
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    mv a2, a0
-; CHECK-NEXT:    bltu a0, a1, .LBB50_2
+; CHECK-NEXT:    bltu a0, a1, .LBB51_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a1
-; CHECK-NEXT:  .LBB50_2:
+; CHECK-NEXT:  .LBB51_2:
 ; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m8, ta, mu
 ; CHECK-NEXT:    sub a1, a0, a1
 ; CHECK-NEXT:    vadd.vi v8, v8, -1
-; CHECK-NEXT:    bltu a0, a1, .LBB50_4
+; CHECK-NEXT:    bltu a0, a1, .LBB51_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a3, a1
-; CHECK-NEXT:  .LBB50_4:
+; CHECK-NEXT:  .LBB51_4:
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, ta, mu
 ; CHECK-NEXT:    vadd.vi v16, v16, -1
 ; CHECK-NEXT:    ret
@@ -1540,16 +1554,16 @@ define <vscale x 32 x i32> @vadd_vi_nxv32i32(<vscale x 32 x i32> %va, <vscale x
 ; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vslidedown.vx v0, v0, a4
-; CHECK-NEXT:    bltu a0, a3, .LBB117_2
+; CHECK-NEXT:    bltu a0, a3, .LBB118_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a3
-; CHECK-NEXT:  .LBB117_2:
+; CHECK-NEXT:  .LBB118_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
 ; CHECK-NEXT:    vadd.vi v16, v16, -1, v0.t
-; CHECK-NEXT:    bltu a0, a1, .LBB117_4
+; CHECK-NEXT:    bltu a0, a1, .LBB118_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB117_4:
+; CHECK-NEXT:  .LBB118_4:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vadd.vi v8, v8, -1, v0.t
@@ -1574,16 +1588,16 @@ define <vscale x 32 x i32> @vadd_vi_nxv32i32_unmasked(<vscale x 32 x i32> %va, i
 ; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vslidedown.vx v0, v24, a4
-; CHECK-NEXT:    bltu a0, a3, .LBB118_2
+; CHECK-NEXT:    bltu a0, a3, .LBB119_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a3
-; CHECK-NEXT:  .LBB118_2:
+; CHECK-NEXT:  .LBB119_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
 ; CHECK-NEXT:    vadd.vi v16, v16, -1, v0.t
-; CHECK-NEXT:    bltu a0, a1, .LBB118_4
+; CHECK-NEXT:    bltu a0, a1, .LBB119_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB118_4:
+; CHECK-NEXT:  .LBB119_4:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vadd.vi v8, v8, -1, v0.t
@@ -1614,16 +1628,16 @@ define <vscale x 32 x i32> @vadd_vi_nxv32i32_evl_nx8(<vscale x 32 x i32> %va, <v
 ; CHECK-NEXT:    slli a1, a0, 1
 ; CHECK-NEXT:    sub a3, a0, a1
 ; CHECK-NEXT:    vslidedown.vx v0, v0, a4
-; CHECK-NEXT:    bltu a0, a3, .LBB119_2
+; CHECK-NEXT:    bltu a0, a3, .LBB120_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a2, a3
-; CHECK-NEXT:  .LBB119_2:
+; CHECK-NEXT:  .LBB120_2:
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
 ; CHECK-NEXT:    vadd.vi v16, v16, -1, v0.t
-; CHECK-NEXT:    bltu a0, a1, .LBB119_4
+; CHECK-NEXT:    bltu a0, a1, .LBB120_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:  .LBB119_4:
+; CHECK-NEXT:  .LBB120_4:
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vadd.vi v8, v8, -1, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll
index 84a8fa65ea66a..b9782bb4b60db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll
@@ -1042,6 +1042,20 @@ define <vscale x 32 x i16> @vand_vx_nxv32i16(<vscale x 32 x i16> %va, i16 %b, <v
   ret <vscale x 32 x i16> %v
 }
 
+define <vscale x 32 x i16> @vand_vx_nxv32i16_commute(<vscale x 32 x i16> %va, i16 %b, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vand_vx_nxv32i16_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e16, m8, ta, mu
+; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
+; CHECK-NEXT:    vand.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> undef, i16 %b, i32 0
+  %vb = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> undef, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.and.nxv32i16(<vscale x 32 x i16> %vb, <vscale x 32 x i16> %va, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
 define <vscale x 32 x i16> @vand_vx_nxv32i16_unmasked(<vscale x 32 x i16> %va, i16 %b, i32 zeroext %evl) {
 ; CHECK-LABEL: vand_vx_nxv32i16_unmasked:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
index 7e0291b6156aa..019a72b02be72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
@@ -42,6 +42,20 @@ define <vscale x 1 x half> @vfadd_vf_nxv1f16(<vscale x 1 x half> %va, half %b, <
   ret <vscale x 1 x half> %v
 }
 
+define <vscale x 1 x half> @vfadd_vf_nxv1f16_commute(<vscale x 1 x half> %va, half %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv1f16_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmv.v.f v9, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfadd.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x half> undef, half %b, i32 0
+  %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> undef, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x half> %v
+}
+
 define <vscale x 1 x half> @vfadd_vf_nxv1f16_unmasked(<vscale x 1 x half> %va, half %b, i32 zeroext %evl) {
 ; CHECK-LABEL: vfadd_vf_nxv1f16_unmasked:
 ; CHECK:       # %bb.0:
@@ -58,6 +72,22 @@ define <vscale x 1 x half> @vfadd_vf_nxv1f16_unmasked(<vscale x 1 x half> %va, h
   ret <vscale x 1 x half> %v
 }
 
+define <vscale x 1 x half> @vfadd_vf_nxv1f16_unmasked_commute(<vscale x 1 x half> %va, half %b, i32 zeroext %evl) {
+; CHECK-LABEL: vfadd_vf_nxv1f16_unmasked_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmv.v.f v9, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfadd.vv v8, v9, v8
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x half> undef, half %b, i32 0
+  %vb = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> undef, <vscale x 1 x i32> zeroinitializer
+  %head = insertelement <vscale x 1 x i1> undef, i1 true, i32 0
+  %m = shufflevector <vscale x 1 x i1> %head, <vscale x 1 x i1> undef, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half> %vb, <vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x half> %v
+}
+
 declare <vscale x 2 x half> @llvm.vp.fadd.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x i1>, i32)
 
 define <vscale x 2 x half> @vfadd_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
index bcf43854f94e6..ecf33e3c2ca20 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
@@ -954,6 +954,20 @@ define <vscale x 16 x i32> @vmul_vx_nxv16i32(<vscale x 16 x i32> %va, i32 %b, <v
   ret <vscale x 16 x i32> %v
 }
 
+define <vscale x 16 x i32> @vmul_vx_nxv16i32_commute(<vscale x 16 x i32> %va, i32 %b, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vmul_vx_nxv16i32_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e32, m8, ta, mu
+; CHECK-NEXT:    vmv.v.x v16, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; CHECK-NEXT:    vmul.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> undef, i32 %b, i32 0
+  %vb = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> undef, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.mul.nxv16i32(<vscale x 16 x i32> %vb, <vscale x 16 x i32> %va, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
 define <vscale x 16 x i32> @vmul_vx_nxv16i32_unmasked(<vscale x 16 x i32> %va, i32 %b, i32 zeroext %evl) {
 ; CHECK-LABEL: vmul_vx_nxv16i32_unmasked:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll
index 5c08d547204d8..469d0489c5d83 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll
@@ -1118,6 +1118,20 @@ define <vscale x 2 x i32> @vor_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %b, <vscal
   ret <vscale x 2 x i32> %v
 }
 
+define <vscale x 2 x i32> @vor_vx_nxv2i32_commute(<vscale x 2 x i32> %va, i32 %b, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vor_vx_nxv2i32_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.or.nxv2i32(<vscale x 2 x i32> %vb, <vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
 define <vscale x 2 x i32> @vor_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
 ; CHECK-LABEL: vor_vx_nxv2i32_unmasked:
 ; CHECK:       # %bb.0:
@@ -1132,6 +1146,20 @@ define <vscale x 2 x i32> @vor_vx_nxv2i32_unmasked(<vscale x 2 x i32> %va, i32 %
   ret <vscale x 2 x i32> %v
 }
 
+define <vscale x 2 x i32> @vor_vx_nxv2i32_unmasked_commute(<vscale x 2 x i32> %va, i32 %b, i32 zeroext %evl) {
+; CHECK-LABEL: vor_vx_nxv2i32_unmasked_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
+; CHECK-NEXT:    vor.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> undef, i32 %b, i32 0
+  %vb = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
+  %head = insertelement <vscale x 2 x i1> undef, i1 true, i32 0
+  %m = shufflevector <vscale x 2 x i1> %head, <vscale x 2 x i1> undef, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.or.nxv2i32(<vscale x 2 x i32> %vb, <vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
 define <vscale x 2 x i32> @vor_vi_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vor_vi_nxv2i32:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll
index 7d7d67d673bd0..975c4e926fbd1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll
@@ -870,6 +870,20 @@ define <vscale x 1 x i16> @vxor_vx_nxv1i16(<vscale x 1 x i16> %va, i16 %b, <vsca
   ret <vscale x 1 x i16> %v
 }
 
+define <vscale x 1 x i16> @vxor_vx_nxv1i16_commute(<vscale x 1 x i16> %va, i16 %b, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vxor_vx_nxv1i16_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a2, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vmv.v.x v9, a0
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
+; CHECK-NEXT:    vxor.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> undef, i16 %b, i32 0
+  %vb = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> undef, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.xor.nxv1i16(<vscale x 1 x i16> %vb, <vscale x 1 x i16> %va, <vscale x 1 x i1> %m, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
 define <vscale x 1 x i16> @vxor_vx_nxv1i16_unmasked(<vscale x 1 x i16> %va, i16 %b, i32 zeroext %evl) {
 ; CHECK-LABEL: vxor_vx_nxv1i16_unmasked:
 ; CHECK:       # %bb.0:

From 81cbbe3e17a4a9b2e0d227b591553d90eeab1400 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Thu, 20 Jan 2022 18:30:09 +0100
Subject: [PATCH 031/946] [flang][NFC] Remove unused/duplicated kStridePosInDim

kStridePosInDim is a duplicate of kDimStridePos and is not used. Just
remove it.

Reviewed By: kiranchandramohan

Differential Revision: https://reviews.llvm.org/D117784
---
 flang/lib/Optimizer/CodeGen/TypeConverter.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.h b/flang/lib/Optimizer/CodeGen/TypeConverter.h
index d96b752be1b81..478c2592b10b1 100644
--- a/flang/lib/Optimizer/CodeGen/TypeConverter.h
+++ b/flang/lib/Optimizer/CodeGen/TypeConverter.h
@@ -31,9 +31,9 @@ static constexpr unsigned kTypePosInBox = 4;
 static constexpr unsigned kAttributePosInBox = 5;
 static constexpr unsigned kF18AddendumPosInBox = 6;
 static constexpr unsigned kDimsPosInBox = 7;
-static constexpr unsigned kStridePosInDim = 2;
 static constexpr unsigned kOptTypePtrPosInBox = 8;
 static constexpr unsigned kOptRowTypePosInBox = 9;
+
 // Position of the different values in [dims]
 static constexpr unsigned kDimLowerBoundPos = 0;
 static constexpr unsigned kDimExtentPos = 1;

From 191a6e9dfa1a54b616e12bde2efa849ad8e03f48 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nadav256@gmail.com>
Date: Thu, 20 Jan 2022 09:25:45 -0800
Subject: [PATCH 032/946] optimize icmp-ugt-ashr

This diff optimizes the sequence icmp-ugt(ashr,C_1) C_2. InstCombine
already implements this optimization for sgt, and this patch adds
support ugt. This patch adds the check for UGT.

@craig.topper came up with the idea and proof:

  define i1 @src(i8 %x, i8 %y, i8 %c) {
    %cp1 = add i8 %c, 1
    %i = shl i8 %cp1, %y
    %i.2 = ashr i8 %i, %y
    %cmp = icmp eq i8 %cp1, %i.2
    ;Assume: C + 1 == (((C + 1) << y) >> y)
    call void @llvm.assume(i1 %cmp)

    ; uncomment for the sgt case
    %j = shl i8 %cp1, %y
    %j.2 = sub i8 %j, 1
    %cmp2 = icmp ne i8 %j.2, 127
    ;Assume (((c + 1 ) << y) - 1) != 127
    call void @llvm.assume(i1 %cmp2)

    %s = ashr i8 %x, %y
    %r = icmp sgt i8 %s, %c
    ret i1 %r
  }

  define i1 @tgt(i8 %x, i8 %y, i8 %c) {
    %cp1 = add i8 %c, 1
    %j = shl i8 %cp1, %y
    %j.2 = sub i8 %j, 1

    %r = icmp sgt i8 %x, %j.2
    ret i1 %r
  }

  declare void @llvm.assume(i1)

  This change is related to the optimizations in D117252.

  Differential Revision: https://reviews.llvm.org/D117365
---
 .../InstCombine/InstCombineCompares.cpp       |  8 +++++++-
 .../Transforms/InstCombine/icmp-shr-lt-gt.ll  | 15 ++++++++++----
 llvm/test/Transforms/InstCombine/icmp-shr.ll  | 20 ++++---------------
 3 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 7dc9ae1d9d064..fd58a44504b3c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2239,7 +2239,7 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
   // those conditions rather than checking them. This is difficult because of
   // undef/poison (PR34838).
   if (IsAShr) {
-    if (Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT || IsExact) {
+    if (IsExact || Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_ULT) {
       // When ShAmtC can be shifted losslessly:
       // icmp PRED (ashr exact X, ShAmtC), C --> icmp PRED X, (C << ShAmtC)
       // icmp slt/ult (ashr X, ShAmtC), C --> icmp slt/ult X, (C << ShAmtC)
@@ -2254,6 +2254,12 @@ Instruction *InstCombinerImpl::foldICmpShrConstant(ICmpInst &Cmp,
           (ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
         return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
     }
+    if (Pred == CmpInst::ICMP_UGT) {
+      // icmp ugt (ashr X, ShAmtC), C --> icmp ugt X, ((C + 1) << ShAmtC) - 1
+      APInt ShiftedC = (C + 1).shl(ShAmtVal) - 1;
+      if ((ShiftedC + 1).ashr(ShAmtVal) == (C + 1))
+        return new ICmpInst(Pred, X, ConstantInt::get(ShrTy, ShiftedC));
+    }
 
     // If the compare constant has significant bits above the lowest sign-bit,
     // then convert an unsigned cmp to a test of the sign-bit:
diff --git a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
index a9fb4b1f217d4..82f5a6dad7608 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shr-lt-gt.ll
@@ -2344,8 +2344,7 @@ define i1 @ashr_ne_noexact(i8 %x) {
 
 define i1 @ashr_ugt_noexact(i8 %x) {
 ; CHECK-LABEL: @ashr_ugt_noexact(
-; CHECK-NEXT:    [[S:%.*]] = ashr i8 [[X:%.*]], 3
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 [[S]], 10
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 [[X:%.*]], 87
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i8 %x, 3
@@ -2356,8 +2355,7 @@ define i1 @ashr_ugt_noexact(i8 %x) {
 
 define i1 @ashr_uge_noexact(i8 %x) {
 ; CHECK-LABEL: @ashr_uge_noexact(
-; CHECK-NEXT:    [[S:%.*]] = ashr i8 [[X:%.*]], 3
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 [[S]], 9
+; CHECK-NEXT:    [[C:%.*]] = icmp ugt i8 [[X:%.*]], 79
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %s = ashr i8 %x, 3
@@ -2451,6 +2449,15 @@ define <4 x i1> @ashr_00_00_vec(<4 x i8> %x) {
   ret <4 x i1> %c
 }
 
+define i1 @ashr_sgt_overflow(i8 %x) {
+; CHECK-LABEL: @ashr_sgt_overflow(
+; CHECK-NEXT:    ret i1 false
+;
+  %s = ashr i8 %x, 1
+  %c = icmp sgt i8 %s, 63
+  ret i1 %c
+}
+
 define i1 @lshrult_01_00_exact(i4 %x) {
 ; CHECK-LABEL: @lshrult_01_00_exact(
 ; CHECK-NEXT:    ret i1 false
diff --git a/llvm/test/Transforms/InstCombine/icmp-shr.ll b/llvm/test/Transforms/InstCombine/icmp-shr.ll
index 09732fea5e38f..168aa81874f68 100644
--- a/llvm/test/Transforms/InstCombine/icmp-shr.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-shr.ll
@@ -574,12 +574,9 @@ define i1 @ashr_ugt_0(i4 %x) {
   ret i1 %r
 }
 
-; negative test
-
 define i1 @ashr_ugt_1(i4 %x) {
 ; CHECK-LABEL: @ashr_ugt_1(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 [[X:%.*]], 1
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt i4 [[S]], 1
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i4 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = ashr i4 %x, 1
@@ -587,12 +584,9 @@ define i1 @ashr_ugt_1(i4 %x) {
   ret i1 %r
 }
 
-; negative test
-
 define i1 @ashr_ugt_2(i4 %x) {
 ; CHECK-LABEL: @ashr_ugt_2(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 [[X:%.*]], 1
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt i4 [[S]], 2
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i4 [[X:%.*]], 5
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = ashr i4 %x, 1
@@ -694,12 +688,9 @@ define i1 @ashr_ugt_11(i4 %x) {
   ret i1 %r
 }
 
-; negative test
-
 define i1 @ashr_ugt_12(i4 %x) {
 ; CHECK-LABEL: @ashr_ugt_12(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 [[X:%.*]], 1
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt i4 [[S]], -4
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i4 [[X:%.*]], -7
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = ashr i4 %x, 1
@@ -707,12 +698,9 @@ define i1 @ashr_ugt_12(i4 %x) {
   ret i1 %r
 }
 
-; negative test
-
 define i1 @ashr_ugt_13(i4 %x) {
 ; CHECK-LABEL: @ashr_ugt_13(
-; CHECK-NEXT:    [[S:%.*]] = ashr i4 [[X:%.*]], 1
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt i4 [[S]], -3
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i4 [[X:%.*]], -5
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %s = ashr i4 %x, 1

From feddf1150227eb43500106cfd6cf01456f2b87e4 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Wed, 19 Jan 2022 18:41:39 -0800
Subject: [PATCH 033/946] [lld][WebAssemlby] Convert test to check disassembly
 output. NFC

Differential Revision: https://reviews.llvm.org/D117739
---
 lld/test/wasm/shared.s   | 51 ++++++++++++++++++++++++++++++++--------
 lld/test/wasm/shared64.s | 51 ++++++++++++++++++++++++++++++++--------
 2 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/lld/test/wasm/shared.s b/lld/test/wasm/shared.s
index 7861485470ffa..01dc8d51474d9 100644
--- a/lld/test/wasm/shared.s
+++ b/lld/test/wasm/shared.s
@@ -1,6 +1,7 @@
 # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s
 # RUN: wasm-ld --experimental-pic -shared -o %t.wasm %t.o
 # RUN: obj2yaml %t.wasm | FileCheck %s
+# RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs --no-show-raw-insn --no-leading-addr %t.wasm | FileCheck %s --check-prefixes DIS
 
 .functype func_external () -> ()
 
@@ -205,16 +206,46 @@ get_local_func_address:
 # CHECK-NEXT:         Functions:       [ 3, 2 ]
 
 # check the generated code in __wasm_call_ctors and __wasm_apply_data_relocs functions
-# TODO(sbc): Disassemble and verify instructions.
-
-# CHECK:        - Type:            CODE
-# CHECK-NEXT:     Functions:
-# CHECK-NEXT:       - Index:           0
-# CHECK-NEXT:         Locals:          []
-# CHECK-NEXT:         Body:            10010B
-# CHECK-NEXT:       - Index:           1
-# CHECK-NEXT:         Locals:          []
-# CHECK-NEXT:         Body:            230141046A2304360200230141086A230241016A3602002301410C6A230141006A360200230141106A2305360200230141146A230641046A3602000B
+
+# DIS:      <__wasm_call_ctors>:
+# DIS-EMPTY:
+# DIS-NEXT:                 call    1
+# DIS-NEXT:                 end
+
+# DIS:      <__wasm_apply_data_relocs>:
+# DIS-EMPTY:
+# DIS-NEXT:                 global.get      1
+# DIS-NEXT:                 i32.const       4
+# DIS-NEXT:                 i32.add
+# DIS-NEXT:                 global.get      4
+# DIS-NEXT:                 i32.store       0
+# DIS-NEXT:                 global.get      1
+# DIS-NEXT:                 i32.const       8
+# DIS-NEXT:                 i32.add
+# DIS-NEXT:                 global.get      2
+# DIS-NEXT:                 i32.const       1
+# DIS-NEXT:                 i32.add
+# DIS-NEXT:                 i32.store       0
+# DIS-NEXT:                 global.get      1
+# DIS-NEXT:                 i32.const       12
+# DIS-NEXT:                 i32.add
+# DIS-NEXT:                 global.get      1
+# DIS-NEXT:                 i32.const       0
+# DIS-NEXT:                 i32.add
+# DIS-NEXT:                 i32.store       0
+# DIS-NEXT:                 global.get      1
+# DIS-NEXT:                 i32.const       16
+# DIS-NEXT:                 i32.add
+# DIS-NEXT:                 global.get      5
+# DIS-NEXT:                 i32.store       0
+# DIS-NEXT:                 global.get      1
+# DIS-NEXT:                 i32.const       20
+# DIS-NEXT:                 i32.add
+# DIS-NEXT:                 global.get      6
+# DIS-NEXT:                 i32.const       4
+# DIS-NEXT:                 i32.add
+# DIS-NEXT:                 i32.store       0
+# DIS-NEXT:                 end
 
 # check the data segment initialized with __memory_base global as offset
 
diff --git a/lld/test/wasm/shared64.s b/lld/test/wasm/shared64.s
index 86d5a521ab334..080b05213a04a 100644
--- a/lld/test/wasm/shared64.s
+++ b/lld/test/wasm/shared64.s
@@ -1,6 +1,7 @@
 # RUN: llvm-mc -filetype=obj -triple=wasm64-unknown-unknown -o %t.o %s
 # RUN: wasm-ld -mwasm64 --experimental-pic -shared -o %t.wasm %t.o
 # RUN: obj2yaml %t.wasm | FileCheck %s
+# RUN: llvm-objdump --disassemble-symbols=__wasm_call_ctors,__wasm_apply_data_relocs --no-show-raw-insn --no-leading-addr %t.wasm | FileCheck %s --check-prefixes DIS
 
 .functype func_external () -> ()
 
@@ -212,16 +213,46 @@ get_local_func_address:
 # CHECK-NEXT:         Functions:       [ 3, 2 ]
 
 # check the generated code in __wasm_call_ctors and __wasm_apply_data_relocs functions
-# TODO(sbc): Disassemble and verify instructions.
-
-# CHECK:        - Type:            CODE
-# CHECK-NEXT:     Functions:
-# CHECK-NEXT:       - Index:           0
-# CHECK-NEXT:         Locals:          []
-# CHECK-NEXT:         Body:            10010B
-# CHECK-NEXT:       - Index:           1
-# CHECK-NEXT:         Locals:          []
-# CHECK-NEXT:         Body:            230142047C23053702002301420C7C230242017C370200230142147C230141006A360200230142187C2306370200230142207C230741046A3602000B
+
+# DIS:      <__wasm_call_ctors>:
+# DIS-EMPTY:
+# DIS-NEXT:                 call    1
+# DIS-NEXT:                 end
+
+# DIS:      <__wasm_apply_data_relocs>:
+# DIS-EMPTY:
+# DIS-NEXT:                 global.get      1
+# DIS-NEXT:                 i64.const       4
+# DIS-NEXT:                 i64.add
+# DIS-NEXT:                 global.get      5
+# DIS-NEXT:                 i64.store       0:p2align=2
+# DIS-NEXT:                 global.get      1
+# DIS-NEXT:                 i64.const       12
+# DIS-NEXT:                 i64.add
+# DIS-NEXT:                 global.get      2
+# DIS-NEXT:                 i64.const       1
+# DIS-NEXT:                 i64.add
+# DIS-NEXT:                 i64.store       0:p2align=2
+# DIS-NEXT:                 global.get      1
+# DIS-NEXT:                 i64.const       20
+# DIS-NEXT:                 i64.add
+# DIS-NEXT:                 global.get      1
+# DIS-NEXT:                 i32.const       0
+# DIS-NEXT:                 i32.add
+# DIS-NEXT:                 i32.store       0
+# DIS-NEXT:                 global.get      1
+# DIS-NEXT:                 i64.const       24
+# DIS-NEXT:                 i64.add
+# DIS-NEXT:                 global.get      6
+# DIS-NEXT:                 i64.store       0:p2align=2
+# DIS-NEXT:                 global.get      1
+# DIS-NEXT:                 i64.const       32
+# DIS-NEXT:                 i64.add
+# DIS-NEXT:                 global.get      7
+# DIS-NEXT:                 i32.const       4
+# DIS-NEXT:                 i32.add
+# DIS-NEXT:                 i32.store       0
+# DIS-NEXT:                 end
 
 # check the data segment initialized with __memory_base global as offset
 

From 1455eddcf71d09b31aaea77f4b33d23741519bd8 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 20 Jan 2022 20:31:47 +0300
Subject: [PATCH 034/946] [NFC][SimplifyCFG] Add some tests for `invoke`
 merging

---
 ...patible-invokes-of-landingpad-debuginfo.ll |  103 ++
 .../merge-compatible-invokes-of-landingpad.ll | 1367 +++++++++++++++++
 2 files changed, 1470 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll
 create mode 100644 llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll

diff --git a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll
new file mode 100644
index 0000000000000..1b39e731c94d5
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals
+; RUN: opt < %s -debugify -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -sink-common-insts -S | FileCheck %s
+; RUN: opt < %s -passes='debugify,simplifycfg<sink-common-insts>' -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; More interesting test, here we can merge the invokes.
+define void @t1_mergeable_invoke() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t1_mergeable_invoke(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond(), !dbg [[DBG12:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i1 [[C0]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12]]
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]], !dbg [[DBG13:![0-9]+]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG14:![0-9]+]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable, !dbg [[DBG15:![0-9]+]]
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup, !dbg [[DBG16:![0-9]+]]
+; CHECK-NEXT:    call void @destructor(), !dbg [[DBG17:![0-9]+]]
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]], !dbg [[DBG18:![0-9]+]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond(), !dbg [[DBG19:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i1 [[C1]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19]]
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]], !dbg [[DBG20:![0-9]+]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]], !dbg [[DBG21:![0-9]+]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable, !dbg [[DBG22:![0-9]+]]
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect(), !dbg [[DBG23:![0-9]+]]
+; CHECK-NEXT:    ret void, !dbg [[DBG24:![0-9]+]]
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+declare i1 @cond()
+declare void @sideeffect()
+declare void @simple_throw() noreturn
+declare void @destructor()
+
+declare dso_local i32 @__gxx_personality_v0(...)
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { noreturn }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind readnone speculatable willreturn }
+;.
+; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "<stdin>", directory: "/")
+; CHECK: [[META2:![0-9]+]] = !{i32 13}
+; CHECK: [[META3:![0-9]+]] = !{i32 2}
+; CHECK: [[META4:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3}
+; CHECK: [[META5:![0-9]+]] = distinct !DISubprogram(name: "t1_mergeable_invoke", linkageName: "t1_mergeable_invoke", scope: null, file: !1, line: 1, type: !6, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !8)
+; CHECK: [[META6:![0-9]+]] = !DISubroutineType(types: !7)
+; CHECK: [[META7:![0-9]+]] = !{}
+; CHECK: [[META8:![0-9]+]] = !{!9, !11}
+; CHECK: [[META9]] = !DILocalVariable(name: "1", scope: !5, file: !1, line: 1, type: !10)
+; CHECK: [[META10:![0-9]+]] = !DIBasicType(name: "ty8", size: 8, encoding: DW_ATE_unsigned)
+; CHECK: [[META11]] = !DILocalVariable(name: "2", scope: !5, file: !1, line: 8, type: !10)
+; CHECK: [[DBG12]] = !DILocation(line: 1, column: 1, scope: !5)
+; CHECK: [[DBG13]] = !DILocation(line: 2, column: 1, scope: !5)
+; CHECK: [[DBG14]] = !DILocation(line: 3, column: 1, scope: !5)
+; CHECK: [[DBG15]] = !DILocation(line: 4, column: 1, scope: !5)
+; CHECK: [[DBG16]] = !DILocation(line: 5, column: 1, scope: !5)
+; CHECK: [[DBG17]] = !DILocation(line: 6, column: 1, scope: !5)
+; CHECK: [[DBG18]] = !DILocation(line: 7, column: 1, scope: !5)
+; CHECK: [[DBG19]] = !DILocation(line: 8, column: 1, scope: !5)
+; CHECK: [[DBG20]] = !DILocation(line: 9, column: 1, scope: !5)
+; CHECK: [[DBG21]] = !DILocation(line: 10, column: 1, scope: !5)
+; CHECK: [[DBG22]] = !DILocation(line: 11, column: 1, scope: !5)
+; CHECK: [[DBG23]] = !DILocation(line: 12, column: 1, scope: !5)
+; CHECK: [[DBG24]] = !DILocation(line: 13, column: 1, scope: !5)
+;.
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll
new file mode 100644
index 0000000000000..a9b93193f7d87
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad.ll
@@ -0,0 +1,1367 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals
+; RUN: opt < %s -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -sink-common-insts -S | FileCheck %s
+; RUN: opt < %s -passes='simplifycfg<sink-common-insts>' -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Simple test, nothing interesting happens here.
+define void @t0_noop() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t0_noop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c = call i1 @cond()
+  br i1 %c, label %if.then, label %if.end
+
+if.then:
+  invoke void @simple_throw() to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; More interesting test, here we can merge the invokes.
+define void @t1_mergeable_invoke() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t1_mergeable_invoke(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; 'unreachable' block is shared, but it is unreachable, so we are fine.
+define void @t2_shared_normal_dest() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t2_shared_normal_dest(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT]] unwind label [[LPAD]]
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() to label %invoke.cont unwind label %lpad
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; shared normal destination is not unreachable.
+define void @t3_bad_shared_normal_dest() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t3_bad_shared_normal_dest(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT]] unwind label [[LPAD]]
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  call void @sideeffect()
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() to label %invoke.cont unwind label %lpad
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; normal destinations are not unreachable.
+define void @t4_normal_dests() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t4_normal_dests(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    call void @another_sideeffect()
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  call void @sideeffect()
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  call void @another_sideeffect()
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; Invokes lead to different landing pads.
+define void @t5_different_landingpads() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t5_different_landingpads(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD0:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       common.resume:
+; CHECK-NEXT:    [[COMMON_RESUME_OP:%.*]] = phi { i8*, i32 } [ [[EH0:%.*]], [[LPAD0]] ], [ [[EH1:%.*]], [[LPAD1:%.*]] ]
+; CHECK-NEXT:    resume { i8*, i32 } [[COMMON_RESUME_OP]]
+; CHECK:       lpad0:
+; CHECK-NEXT:    [[EH0]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    br label [[COMMON_RESUME:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD1]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad1:
+; CHECK-NEXT:    [[EH1]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @another_destructor()
+; CHECK-NEXT:    br label [[COMMON_RESUME]]
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad0
+
+invoke.cont0:
+  unreachable
+
+lpad0:
+  %eh0 = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh0
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad1
+
+invoke.cont2:
+  unreachable
+
+lpad1:
+  %eh1 = landingpad { i8*, i32 } cleanup
+  call void @another_destructor()
+  resume { i8*, i32 } %eh1
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; The invoked functions are different
+define void @t6_different_invokes() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t6_different_invokes(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @another_simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @another_simple_throw() to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; Merging of this invoke is disallowed
+define void @t7_nomerge0() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t7_nomerge0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw() #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() nomerge to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+define void @t8_nomerge1() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t8_nomerge1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw() #[[ATTR1]]
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() nomerge to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+define void @t9_nomerge2() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t9_nomerge2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw() #[[ATTR1]]
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw() #[[ATTR1]]
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() nomerge to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() nomerge to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; Just don't deal with inlineasm.
+define void @t10_inlineasm() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t10_inlineasm(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void asm sideeffect "something bad", ""()
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void asm sideeffect "something bad", ""()
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void asm sideeffect "something bad", ""() to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void asm sideeffect "something bad", ""() to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; More interesting test, here we can merge the invokes.
+define void @t11_phi_in_landingpad() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t11_phi_in_landingpad(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[IF_THEN0]] ], [ 1, [[IF_THEN1:%.*]] ]
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @consume(i32 [[PHI]])
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %phi = phi i32 [ 0, %if.then0 ], [ 1, %if.then1 ]
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @consume(i32 %phi)
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; It is okay for the invoke to take arguments
+define void @t12_arguments_are_fine() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t12_arguments_are_fine(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw_taking_argument(i32 42)
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw_taking_argument(i32 42)
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw_taking_argument(i32 42) to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw_taking_argument(i32 42) to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; It is okay for the invoke to take different arguments
+define void @t13_different_arguments_are_fine() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t13_different_arguments_are_fine(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw_taking_argument(i32 0)
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw_taking_argument(i32 42)
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw_taking_argument(i32 0) to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw_taking_argument(i32 42) to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; There can be more than two invokes in a set
+define void @t14_three_invokes_only_two_compatible() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t14_three_invokes_only_two_compatible(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE0:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else0:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_ELSE1:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.else1:
+; CHECK-NEXT:    [[C2:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C2]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then2:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont3:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else0
+
+if.then0:
+  invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else0:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.else1
+
+if.then1:
+  invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.else1:
+  %c2 = call i1 @cond()
+  br i1 %c2, label %if.then2, label %if.end
+
+if.then2:
+  invoke void @simple_throw() to label %invoke.cont3 unwind label %lpad
+
+invoke.cont3:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; If not all invokes of landingpad are compatible then we still merge compatible ones.
+define void @t15_three_invokes_only_two_compatible() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t15_three_invokes_only_two_compatible(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE0:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else0:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_ELSE1:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.else1:
+; CHECK-NEXT:    [[C2:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C2]], label [[IF_THEN2:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then2:
+; CHECK-NEXT:    invoke void @another_simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont3:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else0
+
+if.then0:
+  invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else0:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.else1
+
+if.then1:
+  invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.else1:
+  %c2 = call i1 @cond()
+  br i1 %c2, label %if.then2, label %if.end
+
+if.then2:
+  invoke void @another_simple_throw() to label %invoke.cont3 unwind label %lpad
+
+invoke.cont3:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; We succeed in merging invokes into two sets
+define void @t16_four_invokes_forming_two_sets() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t16_four_invokes_forming_two_sets(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE0:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else0:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_ELSE1:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.else1:
+; CHECK-NEXT:    [[C2:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C2]], label [[IF_THEN2:%.*]], label [[IF_ELSE2:%.*]]
+; CHECK:       if.then2:
+; CHECK-NEXT:    invoke void @another_simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT3:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont3:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.else2:
+; CHECK-NEXT:    [[C3:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C3]], label [[IF_THEN3:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then3:
+; CHECK-NEXT:    invoke void @another_simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT4:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont4:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else0
+
+if.then0:
+  invoke void @simple_throw() to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else0:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.else1
+
+if.then1:
+  invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.else1:
+  %c2 = call i1 @cond()
+  br i1 %c2, label %if.then2, label %if.else2
+
+if.then2:
+  invoke void @another_simple_throw() to label %invoke.cont3 unwind label %lpad
+
+invoke.cont3:
+  unreachable
+
+if.else2:
+  %c3 = call i1 @cond()
+  br i1 %c3, label %if.then3, label %if.end
+
+if.then3:
+  invoke void @another_simple_throw() to label %invoke.cont4 unwind label %lpad
+
+invoke.cont4:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; Attributes must match
+define void @t17_mismatched_attrs_prevent_merge() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t17_mismatched_attrs_prevent_merge(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw() #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() readnone to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; Common attributes are preserved
+define void @t18_attributes_are_preserved() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t18_attributes_are_preserved(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw() #[[ATTR2]]
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw() #[[ATTR2]]
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() readnone to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() readnone to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; Fully identical operand bundles are good.
+define void @t19_compatible_operand_bundle() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t19_compatible_operand_bundle(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw() [ "abc"(i32 42) ]
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw() [ "abc"(i32 42) ]
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() [ "abc"(i32 42) ] to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() [ "abc"(i32 42) ] to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; Operand bundles must be compatible, else we can't merge.
+define void @t20_incompatible_operand_bundle() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t20_incompatible_operand_bundle(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw() [ "abc"(i32 42) ]
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw() [ "def"(i32 0) ]
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() [ "abc"(i32 42) ] to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() [ "def"(i32 0) ] to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+; We need to PHI together the arguments of the operand bundles.
+define void @t21_semicompatible_operand_bundle() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t21_semicompatible_operand_bundle(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C0:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C0]], label [[IF_THEN0:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then0:
+; CHECK-NEXT:    invoke void @simple_throw() [ "abc"(i32 42) ]
+; CHECK-NEXT:    to label [[INVOKE_CONT0:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont0:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[C1:%.*]] = call i1 @cond()
+; CHECK-NEXT:    br i1 [[C1]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    invoke void @simple_throw() [ "abc"(i32 0) ]
+; CHECK-NEXT:    to label [[INVOKE_CONT2:%.*]] unwind label [[LPAD]]
+; CHECK:       invoke.cont2:
+; CHECK-NEXT:    unreachable
+; CHECK:       if.end:
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c0 = call i1 @cond()
+  br i1 %c0, label %if.then0, label %if.else
+
+if.then0:
+  invoke void @simple_throw() [ "abc"(i32 42) ] to label %invoke.cont0 unwind label %lpad
+
+invoke.cont0:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.else:
+  %c1 = call i1 @cond()
+  br i1 %c1, label %if.then1, label %if.end
+
+if.then1:
+  invoke void @simple_throw() [ "abc"(i32 0) ] to label %invoke.cont2 unwind label %lpad
+
+invoke.cont2:
+  unreachable
+
+if.end:
+  call void @sideeffect()
+  ret void
+}
+
+declare i1 @cond()
+
+declare void @sideeffect()
+declare void @another_sideeffect()
+
+declare void @simple_throw() noreturn
+declare void @another_simple_throw() noreturn
+
+declare void @simple_throw_taking_argument(i32) noreturn
+
+declare void @destructor()
+declare void @another_destructor()
+
+declare void @consume(i32)
+
+declare dso_local i32 @__gxx_personality_v0(...)
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { noreturn }
+; CHECK: attributes #[[ATTR1]] = { nomerge }
+; CHECK: attributes #[[ATTR2]] = { readnone }
+;.

From 48224475222d14e6f661d649f4363dcc197cc7ef Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser@berlin.de>
Date: Thu, 20 Jan 2022 13:53:59 +0100
Subject: [PATCH 035/946] [libc++] basic_string::resize_and_overwrite: Adopt
 LWG3645 (Not voted in yet)

Adopt LWG3645, which fixes the value categories of basic_string::resize_and_overwrite
https://timsong-cpp.github.io/lwg-issues/3645

Reviewed By: ldionne, #libc

Spies: libcxx-commits

Differential Revision: https://reviews.llvm.org/D116815
---
 libcxx/docs/Status/Cxx2bIssues.csv                             | 2 ++
 libcxx/include/string                                          | 3 +--
 .../basic.string/string.capacity/resize_and_overwrite.pass.cpp | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index 1a8f6a4792c18..726668f3bdb2a 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -137,3 +137,5 @@
 `3593 <https://wg21.link/LWG3593>`__,"Several iterators' ``base() const &`` and ``lazy_split_view::outer-iterator::value_type::end()`` missing ``noexcept``","October 2021","","","|ranges|"
 `3595 <https://wg21.link/LWG3595>`__,"Exposition-only classes proxy and postfix-proxy for ``common_iterator`` should be fully ``constexpr``","October 2021","","","|ranges|"
 "","","","",""
+`3645 <https://wg21.link/LWG3645>`__,"``resize_and_overwrite`` is overspecified to call its callback with lvalues", "Not voted in","|Complete|","14.0",""
+"","","","",""
\ No newline at end of file
diff --git a/libcxx/include/string b/libcxx/include/string
index c4f2b008cafe1..b2eef646f9827 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -979,8 +979,7 @@ public:
     _LIBCPP_HIDE_FROM_ABI constexpr
     void resize_and_overwrite(size_type __n, _Op __op) {
       __resize_default_init(__n);
-      pointer __data = data();
-      __erase_to_end(_VSTD::move(__op)(__data, __n));
+      __erase_to_end(_VSTD::move(__op)(data(), _LIBCPP_AUTO_CAST(__n)));
     }
 #endif
 
diff --git a/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp
index 3f97537dd2df0..61312fa5ec49a 100644
--- a/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.capacity/resize_and_overwrite.pass.cpp
@@ -76,7 +76,8 @@ constexpr bool test() {
 
 void test_value_categories() {
   std::string s;
-  s.resize_and_overwrite(10, [](char*&, size_t&) { return 0; });
+  s.resize_and_overwrite(10, [](char*&&, size_t&&) { return 0; });
+  s.resize_and_overwrite(10, [](char* const&, const size_t&) { return 0; });
   struct RefQualified {
     int operator()(char*, size_t) && { return 0; }
   };

From 2e49e0cfde43fa96e50ee89573ec6ff8a31c7c24 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 19 Jan 2022 11:06:25 -0500
Subject: [PATCH 036/946] AMDGPU/GlobalISel: Directly diagnose return value use
 for FP atomics

Emit an error if the return value is used on subtargets that do not
support them. Previously we were falling back to the DAG on selection
failure, where it would emit this error and then fail again.
---
 llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 18 ++++++++++++++++--
 ...m.amdgcn.raw.buffer.atomic.fadd-with-ret.ll |  3 ++-
 ...mdgcn.struct.buffer.atomic.fadd-with-ret.ll |  6 ++++--
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 93bddd9681ed7..20b2b0f1be0ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -5181,8 +5181,6 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
   case Intrinsic::amdgcn_struct_buffer_atomic_inc:
   case Intrinsic::amdgcn_raw_buffer_atomic_dec:
   case Intrinsic::amdgcn_struct_buffer_atomic_dec:
-  case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
-  case Intrinsic::amdgcn_struct_buffer_atomic_fadd:
   case Intrinsic::amdgcn_raw_buffer_atomic_cmpswap:
   case Intrinsic::amdgcn_struct_buffer_atomic_cmpswap:
   case Intrinsic::amdgcn_raw_buffer_atomic_fmin:
@@ -5190,6 +5188,22 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
   case Intrinsic::amdgcn_raw_buffer_atomic_fmax:
   case Intrinsic::amdgcn_struct_buffer_atomic_fmax:
     return legalizeBufferAtomic(MI, B, IntrID);
+  case Intrinsic::amdgcn_raw_buffer_atomic_fadd:
+  case Intrinsic::amdgcn_struct_buffer_atomic_fadd: {
+    Register DstReg = MI.getOperand(0).getReg();
+    if (!MRI.use_empty(DstReg) && !ST.hasGFX90AInsts()) {
+      Function &F = B.getMF().getFunction();
+      DiagnosticInfoUnsupported NoFpRet(
+          F, "return versions of fp atomics not supported", B.getDebugLoc(),
+          DS_Error);
+      F.getContext().diagnose(NoFpRet);
+      B.buildUndef(DstReg);
+      MI.eraseFromParent();
+      return true;
+    }
+
+    return legalizeBufferAtomic(MI, B, IntrID);
+  }
   case Intrinsic::amdgcn_atomic_inc:
     return legalizeAtomicIncDec(MI, B, true);
   case Intrinsic::amdgcn_atomic_dec:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll
index 257785fc6bffa..8a0ea3baa68ab 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.atomic.fadd-with-ret.ll
@@ -1,9 +1,10 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s
-; RUN: not --crash llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
+; RUN: not llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
 
 declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg)
 declare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
 
+; GFX908: error: {{.*}} return versions of fp atomics not supported
 ; GFX908: error: {{.*}} return versions of fp atomics not supported
 
 ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32_rtn:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
index 3a31fad29010a..74f9704452020 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.struct.buffer.atomic.fadd-with-ret.ll
@@ -1,10 +1,12 @@
 ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s
-; RUN: not --crash llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
+; RUN: not llc -global-isel < %s -march=amdgcn -mcpu=gfx908 -verify-machineinstrs 2>&1 | FileCheck %s -check-prefix=GFX908
+
+; GFX908: error: {{.*}} return versions of fp atomics not supported
+; GFX908: error: {{.*}} return versions of fp atomics not supported
 
 declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg)
 declare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
 
-; GFX908: error: {{.*}} return versions of fp atomics not supported
 
 ; GFX90A-LABEL: {{^}}buffer_atomic_add_f32_rtn:
 ; GFX90A: buffer_atomic_add_f32 v{{[0-9]+}}, v[{{[0-9:]+}}], s[{{[0-9:]+}}], s{{[0-9]+}} idxen offen glc

From c0f9592daae3be17e5fd1528a8f6067cb0c3bd91 Mon Sep 17 00:00:00 2001
From: zijunzhao <zijunzhao@google.com>
Date: Thu, 20 Jan 2022 09:30:51 +0000
Subject: [PATCH 037/946] add tsan shared library

Add tsan shared library on Android. Only build tsan when minSdkVersion is above 23.

Reviewed By: danalbert, vitalybuka

Differential Revision: https://reviews.llvm.org/D108394
---
 compiler-rt/cmake/config-ix.cmake | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index d196deff5dc19..33693ce60321d 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -749,9 +749,14 @@ else()
   set(COMPILER_RT_HAS_PROFILE FALSE)
 endif()
 
-if (COMPILER_RT_HAS_SANITIZER_COMMON AND TSAN_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Darwin|Linux|FreeBSD|Android|NetBSD")
-  set(COMPILER_RT_HAS_TSAN TRUE)
+if (COMPILER_RT_HAS_SANITIZER_COMMON AND TSAN_SUPPORTED_ARCH)
+  if (OS_NAME MATCHES "Linux|Darwin|FreeBSD|NetBSD")
+    set(COMPILER_RT_HAS_TSAN TRUE)
+  elseif (OS_NAME MATCHES "Android" AND ANDROID_PLATFORM_LEVEL GREATER 23)
+    set(COMPILER_RT_HAS_TSAN TRUE)
+  else()
+    set(COMPILER_RT_HAS_TSAN FALSE)
+  endif()
 else()
   set(COMPILER_RT_HAS_TSAN FALSE)
 endif()

From 08549ba51e11600ff89606d3de59d91d95c21645 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 19 Jan 2022 16:46:49 -0500
Subject: [PATCH 038/946] AMDGPU/GlobalISel: Explicitly set -global-isel-abort
 in failure tests

If the default mode is the fallback, this would fail since it would
end up seeing the DAG failure message instead.
---
 llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll | 2 +-
 llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll           | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll
index 7e75fb037323a..a216a8b509e87 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/global-value.illegal.ll
@@ -1,4 +1,4 @@
-; RUN: not --crash llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
+; RUN: not --crash llc -global-isel -global-isel-abort=1 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -o /dev/null %s 2>&1 | FileCheck -check-prefix=ERR %s
 
 ; FIXME: Should produce context error for each one
 ; ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(p5) = G_GLOBAL_VALUE @external_private (in function: fn_external_private)
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll
index d1ab4cb03e3f7..42b81236e55ef 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-a16.ll
@@ -1,8 +1,8 @@
 ; RUN: not --crash llc -global-isel=0 -march=amdgcn -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=SDAG-ERR %s
-; RUN: not --crash llc -global-isel=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=GISEL-ERR %s
+; RUN: not --crash llc -global-isel=1 -global-isel-abort=1 -march=amdgcn -mcpu=fiji -verify-machineinstrs -o /dev/null %s 2>&1 | FileCheck -check-prefix=GISEL-ERR %s
 
 ; Make sure this doesn't assert on targets without the r128-16
-; feature, and instead generates a slection error.
+; feature, and instead generates a selection error.
 
 ; SDAG-ERR: LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.image.load.1d
 ; GISEL-ERR: LLVM ERROR: unable to legalize instruction: %{{[0-9]+}}:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.1d), 15, %{{[0-9]+}}:_(s16), %{{[0-9]+}}:_(<8 x s32>), 0, 0 :: (dereferenceable load (<4 x s32>) from custom "ImageResource") (in function: load_1d)

From 2d1f9aa27dc443287ea308e340b0ec3c284e14ba Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 19 Jan 2022 16:14:56 -0500
Subject: [PATCH 039/946] AMDGPU/GlobalISel: Regenerate test checks with -NEXT

---
 .../AMDGPU/GlobalISel/inst-select-ptrmask.mir | 446 +++++++++---------
 1 file changed, 223 insertions(+), 223 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
index a7f875fcdd428..d0ee43b20b2d9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
@@ -12,9 +12,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_sgpr
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc
-    ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], [[COPY1]], implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
     %1:sgpr(s32) = COPY $sgpr1
     %2:sgpr(p3) = G_PTRMASK %0, %1
@@ -33,9 +33,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0xf0f0f0f0
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 -252645136
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
-    ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -252645136
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
     %const:sgpr(s32) = G_CONSTANT i32 -252645136
     %1:sgpr(p3) = G_PTRMASK %0, %const
@@ -54,9 +54,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0xffffffff
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 -1
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
-    ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -1
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
     %const:sgpr(s32) = G_CONSTANT i32 -1
     %1:sgpr(p3) = G_PTRMASK %0, %const
@@ -75,9 +75,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_0x00000000
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 0
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
-    ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 0
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
     %const:sgpr(s32) = G_CONSTANT i32 0
     %1:sgpr(p3) = G_PTRMASK %0, %const
@@ -96,9 +96,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearhi1
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 -2147483648
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
-    ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -2147483648
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
     %const:sgpr(s32) = G_CONSTANT i32  -2147483648
     %1:sgpr(p3) = G_PTRMASK %0, %const
@@ -117,9 +117,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearhi2
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 -1073741824
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
-    ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -1073741824
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
     %const:sgpr(s32) = G_CONSTANT i32  -1073741824
     %1:sgpr(p3) = G_PTRMASK %0, %const
@@ -138,9 +138,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo1
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 -2
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
-    ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -2
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
     %const:sgpr(s32) = G_CONSTANT i32 -2
     %1:sgpr(p3) = G_PTRMASK %0, %const
@@ -159,9 +159,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo2
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 -4
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
-    ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -4
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
     %const:sgpr(s32) = G_CONSTANT i32 -4
     %1:sgpr(p3) = G_PTRMASK %0, %const
@@ -180,9 +180,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo3
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 -8
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
-    ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -8
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
     %const:sgpr(s32) = G_CONSTANT i32 -8
     %1:sgpr(p3) = G_PTRMASK %0, %const
@@ -201,9 +201,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo4
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 -16
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
-    ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -16
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
     %const:sgpr(s32) = G_CONSTANT i32 -16
     %1:sgpr(p3) = G_PTRMASK %0, %const
@@ -222,9 +222,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_s32_sgpr_sgpr_clearlo29
     ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; CHECK: %const:sreg_32 = S_MOV_B32 -536870912
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
-    ; CHECK: S_ENDPGM 0, implicit [[S_AND_B32_]]
+    ; CHECK-NEXT: %const:sreg_32 = S_MOV_B32 -536870912
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], %const, implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B32_]]
     %0:sgpr(p3) = COPY $sgpr0
     %const:sgpr(s32) = G_CONSTANT i32 -536870912
     %1:sgpr(p3) = G_PTRMASK %0, %const
@@ -243,15 +243,15 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
-    ; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
-    ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[COPY5]], implicit-def $scc
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
+    ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[COPY5]], implicit-def $scc
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %1:sgpr(s64) = COPY $sgpr2_sgpr3
     %2:sgpr(p0) = G_PTRMASK %0, %1
@@ -270,10 +270,10 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0xffffffffffffffff
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %1:sgpr(s64) = G_CONSTANT i64 -1
     %2:sgpr(p0) = G_PTRMASK %0, %1
@@ -292,15 +292,15 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0x0000000000000000
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-    ; CHECK: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
-    ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
+    ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %1:sgpr(s64) = G_CONSTANT i64 0
     %2:sgpr(p0) = G_PTRMASK %0, %1
@@ -319,17 +319,17 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0xf0f0f0f0f0f0f0f0
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160
-    ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-    ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
-    ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
+    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
+    ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
+    ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096
     %2:sgpr(p0) = G_PTRMASK %0, %1
@@ -348,17 +348,17 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearhi1
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-    ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
-    ; CHECK: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1
-    ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
+    ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1
+    ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %const:sgpr(s64) = G_CONSTANT i64 -9223372036854775808
     %1:sgpr(p0) = G_PTRMASK %0, %const
@@ -377,15 +377,15 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearhi32
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-    ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
-    ; CHECK: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+    ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %const:sgpr(s64) = G_CONSTANT i64 -4294967296
     %1:sgpr(p0) = G_PTRMASK %0, %const
@@ -404,17 +404,17 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clear_32
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
-    ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
-    ; CHECK: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1
-    ; CHECK: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
+    ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1
+    ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %const:sgpr(s64) = G_CONSTANT i64 4294967296
     %1:sgpr(p0) = G_PTRMASK %0, %const
@@ -433,13 +433,13 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo1
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-    ; CHECK: %const:sreg_64 = S_MOV_B64 -2
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -2
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %const:sgpr(s64) = G_CONSTANT i64 -2
     %1:sgpr(p0) = G_PTRMASK %0, %const
@@ -458,13 +458,13 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo2
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-    ; CHECK: %const:sreg_64 = S_MOV_B64 -4
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -4
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %const:sgpr(s64) = G_CONSTANT i64 -4
     %1:sgpr(p0) = G_PTRMASK %0, %const
@@ -483,13 +483,13 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo3
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-    ; CHECK: %const:sreg_64 = S_MOV_B64 -8
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -8
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %const:sgpr(s64) = G_CONSTANT i64 -8
     %1:sgpr(p0) = G_PTRMASK %0, %const
@@ -508,13 +508,13 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo4
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-    ; CHECK: %const:sreg_64 = S_MOV_B64 -16
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: %const:sreg_64 = S_MOV_B64 -16
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %const:sgpr(s64) = G_CONSTANT i64 -16
     %1:sgpr(p0) = G_PTRMASK %0, %const
@@ -533,15 +533,15 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_clearlo29
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3758096384
-    ; CHECK: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
-    ; CHECK: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
-    ; CHECK: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
-    ; CHECK: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 3758096384
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1
+    ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
+    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %const:sgpr(s64) = G_CONSTANT i64 -536870912
     %1:sgpr(p0) = G_PTRMASK %0, %const
@@ -560,9 +560,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_0xf0f0f0f0
     ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec
-    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
-    ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
+    ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
     %0:vgpr(p3) = COPY $vgpr0
     %const:vgpr(s32) = G_CONSTANT i32 -252645136
     %1:vgpr(p3) = G_PTRMASK %0, %const
@@ -581,9 +581,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo1
     ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec
-    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
-    ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
+    ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -2, implicit $exec
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
     %0:vgpr(p3) = COPY $vgpr0
     %const:vgpr(s32) = G_CONSTANT i32 -2
     %1:vgpr(p3) = G_PTRMASK %0, %const
@@ -602,9 +602,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo2
     ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec
-    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
-    ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
+    ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -4, implicit $exec
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
     %0:vgpr(p3) = COPY $vgpr0
     %const:vgpr(s32) = G_CONSTANT i32 -4
     %1:vgpr(p3) = G_PTRMASK %0, %const
@@ -623,9 +623,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo3
     ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -8, implicit $exec
-    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
-    ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
+    ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -8, implicit $exec
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
     %0:vgpr(p3) = COPY $vgpr0
     %const:vgpr(s32) = G_CONSTANT i32 -8
     %1:vgpr(p3) = G_PTRMASK %0, %const
@@ -644,9 +644,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo4
     ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec
-    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
-    ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
+    ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -16, implicit $exec
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
     %0:vgpr(p3) = COPY $vgpr0
     %const:vgpr(s32) = G_CONSTANT i32 -16
     %1:vgpr(p3) = G_PTRMASK %0, %const
@@ -665,9 +665,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_vgpr_vgpr_clearlo29
     ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK: %const:vgpr_32 = V_MOV_B32_e32 -536870912, implicit $exec
-    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
-    ; CHECK: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
+    ; CHECK-NEXT: %const:vgpr_32 = V_MOV_B32_e32 -536870912, implicit $exec
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], %const, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_AND_B32_e64_]]
     %0:vgpr(p3) = COPY $vgpr0
     %const:vgpr(s32) = G_CONSTANT i32 -536870912
     %1:vgpr(p3) = G_PTRMASK %0, %const
@@ -686,15 +686,15 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_vgpr
     ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-    ; CHECK: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
-    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec
-    ; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; CHECK: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY3]], [[COPY5]], implicit $exec
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
+    ; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY3]], [[COPY5]], implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = COPY $vgpr2_vgpr3
     %2:vgpr(p0) = G_PTRMASK %0, %1
@@ -713,17 +713,17 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_vgpr_0xf0f0f0f0f0f0f0f0
     ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4042322160, implicit $exec
-    ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
-    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
-    ; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; CHECK: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec
-    ; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4042322160, implicit $exec
+    ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -252645136, implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
+    ; CHECK-NEXT: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], [[COPY4]], implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = G_CONSTANT i64 -1085102592571150096
     %2:vgpr(p0) = G_PTRMASK %0, %1
@@ -742,15 +742,15 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo1
     ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967294, implicit $exec
-    ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
-    ; CHECK: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0
-    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967294, implicit $exec
+    ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %const:vgpr(s64) = G_CONSTANT i64 -2
     %1:vgpr(p0) = G_PTRMASK %0, %const
@@ -769,15 +769,15 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo2
     ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
-    ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
-    ; CHECK: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0
-    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
+    ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %const:vgpr(s64) = G_CONSTANT i64 -4
     %1:vgpr(p0) = G_PTRMASK %0, %const
@@ -796,15 +796,15 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo3
     ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
-    ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
-    ; CHECK: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0
-    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967292, implicit $exec
+    ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %const:vgpr(s64) = G_CONSTANT i64 -4
     %1:vgpr(p0) = G_PTRMASK %0, %const
@@ -823,15 +823,15 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo4
     ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967280, implicit $exec
-    ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
-    ; CHECK: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0
-    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967280, implicit $exec
+    ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %const:vgpr(s64) = G_CONSTANT i64 -16
     %1:vgpr(p0) = G_PTRMASK %0, %const
@@ -850,15 +850,15 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_vgpr_clearlo29
     ; CHECK: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
-    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3758096384, implicit $exec
-    ; CHECK: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
-    ; CHECK: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
-    ; CHECK: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
-    ; CHECK: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
-    ; CHECK: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0
-    ; CHECK: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
-    ; CHECK: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
-    ; CHECK: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 3758096384, implicit $exec
+    ; CHECK-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
+    ; CHECK-NEXT: %const:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY %const.sub0
+    ; CHECK-NEXT: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], [[COPY3]], implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[COPY2]], %subreg.sub1
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %const:vgpr(s64) = G_CONSTANT i64 -536870912
     %1:vgpr(p0) = G_PTRMASK %0, %const
@@ -877,9 +877,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p3_vgpr_sgpr_clearlo2
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(p3) = COPY $sgpr0
-    ; CHECK: %const:sgpr(s32) = G_CONSTANT i32 -4
-    ; CHECK: [[PTRMASK:%[0-9]+]]:vgpr(p3) = G_PTRMASK [[COPY]], %const(s32)
-    ; CHECK: S_ENDPGM 0, implicit [[PTRMASK]](p3)
+    ; CHECK-NEXT: %const:sgpr(s32) = G_CONSTANT i32 -4
+    ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p3) = G_PTRMASK [[COPY]], %const(s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p3)
     %0:sgpr(p3) = COPY $sgpr0
     %const:sgpr(s32) = G_CONSTANT i32 -4
     %1:vgpr(p3) = G_PTRMASK %0, %const
@@ -898,9 +898,9 @@ body: |
 
     ; CHECK-LABEL: name: ptrmask_p0_s64_vgpr_sgpr_clearlo2
     ; CHECK: [[COPY:%[0-9]+]]:sgpr(p0) = COPY $sgpr0_sgpr1
-    ; CHECK: %const:sgpr(s32) = G_CONSTANT i32 -4
-    ; CHECK: [[PTRMASK:%[0-9]+]]:vgpr(p0) = G_PTRMASK [[COPY]], %const(s32)
-    ; CHECK: S_ENDPGM 0, implicit [[PTRMASK]](p0)
+    ; CHECK-NEXT: %const:sgpr(s32) = G_CONSTANT i32 -4
+    ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:vgpr(p0) = G_PTRMASK [[COPY]], %const(s32)
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[PTRMASK]](p0)
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %const:sgpr(s32) = G_CONSTANT i32 -4
     %1:vgpr(p0) = G_PTRMASK %0, %const

From 064cea9c9a02e8fee77f9e28b3e06cb4908f7a0e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 19 Jan 2022 15:20:39 -0500
Subject: [PATCH 040/946] AMDGPU/GlobalISel: Try to use s_and_b64 in ptrmask
 selection

Avoids a test diff with SDAG.
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      | 32 ++++++++----
 .../AMDGPU/GlobalISel/inst-select-ptrmask.mir | 50 ++++---------------
 2 files changed, 32 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index d40d5873eacf8..6ab2cb6df3bda 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2552,6 +2552,8 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
   Register MaskReg = I.getOperand(2).getReg();
   LLT Ty = MRI->getType(DstReg);
   LLT MaskTy = MRI->getType(MaskReg);
+  MachineBasicBlock *BB = I.getParent();
+  const DebugLoc &DL = I.getDebugLoc();
 
   const RegisterBank *DstRB = RBI.getRegBank(DstReg, *MRI, TRI);
   const RegisterBank *SrcRB = RBI.getRegBank(SrcReg, *MRI, TRI);
@@ -2560,6 +2562,24 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
   if (DstRB != SrcRB) // Should only happen for hand written MIR.
     return false;
 
+  // Try to avoid emitting a bit operation when we only need to touch half of
+  // the 64-bit pointer.
+  APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zextOrSelf(64);
+  const APInt MaskHi32 = APInt::getHighBitsSet(64, 32);
+  const APInt MaskLo32 = APInt::getLowBitsSet(64, 32);
+
+  const bool CanCopyLow32 = (MaskOnes & MaskLo32) == MaskLo32;
+  const bool CanCopyHi32 = (MaskOnes & MaskHi32) == MaskHi32;
+
+  if (!IsVGPR && Ty.getSizeInBits() == 64 &&
+      !CanCopyLow32 && !CanCopyHi32) {
+    auto MIB = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_AND_B64), DstReg)
+      .addReg(SrcReg)
+      .addReg(MaskReg);
+    I.eraseFromParent();
+    return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+  }
+
   unsigned NewOpc = IsVGPR ? AMDGPU::V_AND_B32_e64 : AMDGPU::S_AND_B32;
   const TargetRegisterClass &RegRC
     = IsVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::SReg_32RegClass;
@@ -2576,8 +2596,6 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
       !RBI.constrainGenericRegister(MaskReg, *MaskRC, *MRI))
     return false;
 
-  MachineBasicBlock *BB = I.getParent();
-  const DebugLoc &DL = I.getDebugLoc();
   if (Ty.getSizeInBits() == 32) {
     assert(MaskTy.getSizeInBits() == 32 &&
            "ptrmask should have been narrowed during legalize");
@@ -2600,13 +2618,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
 
   Register MaskedLo, MaskedHi;
 
-  // Try to avoid emitting a bit operation when we only need to touch half of
-  // the 64-bit pointer.
-  APInt MaskOnes = KnownBits->getKnownOnes(MaskReg).zextOrSelf(64);
-
-  const APInt MaskHi32 = APInt::getHighBitsSet(64, 32);
-  const APInt MaskLo32 = APInt::getLowBitsSet(64, 32);
-  if ((MaskOnes & MaskLo32) == MaskLo32) {
+  if (CanCopyLow32) {
     // If all the bits in the low half are 1, we only need a copy for it.
     MaskedLo = LoReg;
   } else {
@@ -2621,7 +2633,7 @@ bool AMDGPUInstructionSelector::selectG_PTRMASK(MachineInstr &I) const {
       .addReg(MaskLo);
   }
 
-  if ((MaskOnes & MaskHi32) == MaskHi32) {
+  if (CanCopyHi32) {
     // If all the bits in the high half are 1, we only need a copy for it.
     MaskedHi = HiReg;
   } else {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
index d0ee43b20b2d9..4861a891e059f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptrmask.mir
@@ -244,14 +244,8 @@ body: |
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_64 = COPY $sgpr2_sgpr3
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub0
-    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
-    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY [[COPY1]].sub1
-    ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], [[COPY5]], implicit-def $scc
-    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
-    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[COPY1]], implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %1:sgpr(s64) = COPY $sgpr2_sgpr3
     %2:sgpr(p0) = G_PTRMASK %0, %1
@@ -293,14 +287,8 @@ body: |
     ; CHECK-LABEL: name: ptrmask_p0_s64_sgpr_sgpr_sgpr_0x0000000000000000
     ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
     ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub0
-    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_MOV_B64_]].sub1
-    ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
-    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
-    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[S_MOV_B64_]], implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %1:sgpr(s64) = G_CONSTANT i64 0
     %2:sgpr(p0) = G_PTRMASK %0, %1
@@ -322,14 +310,8 @@ body: |
     ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4042322160
     ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -252645136
     ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub0
-    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE]].sub1
-    ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
-    ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
-    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE1]]
+    ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], [[REG_SEQUENCE]], implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %1:sgpr(s64) = G_CONSTANT i64 -1085102592571150096
     %2:sgpr(p0) = G_PTRMASK %0, %1
@@ -351,14 +333,8 @@ body: |
     ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648
     ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
-    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1
-    ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
-    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
-    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %const:sgpr(s64) = G_CONSTANT i64 -9223372036854775808
     %1:sgpr(p0) = G_PTRMASK %0, %const
@@ -407,14 +383,8 @@ body: |
     ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1
     ; CHECK-NEXT: %const:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
-    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
-    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
-    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY %const.sub0
-    ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[COPY3]], implicit-def $scc
-    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY %const.sub1
-    ; CHECK-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[COPY4]], implicit-def $scc
-    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_AND_B32_]], %subreg.sub0, [[S_AND_B32_1]], %subreg.sub1
-    ; CHECK-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], %const, implicit-def $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_AND_B64_]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %const:sgpr(s64) = G_CONSTANT i64 4294967296
     %1:sgpr(p0) = G_PTRMASK %0, %const

From 237502c1a478a68ee4a0e173efc2d4684e58d0bb Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 19 Jan 2022 17:39:27 -0500
Subject: [PATCH 041/946] AMDGPU: Fix asm in test using wrong IR type for
 physical register

---
 llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll b/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll
index c4c887b1906a0..91d2ec82c81e7 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll
@@ -55,14 +55,14 @@ define amdgpu_kernel void @test() #1 {
 ; GFX10-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10-NEXT:    s_mov_b64 exec, s[6:7]
 ; GFX10-NEXT:    ;;#ASMSTART
-; GFX10-NEXT:    ; use s[0:3]
+; GFX10-NEXT:    ; use s[0:4]
 ; GFX10-NEXT:    ;;#ASMEND
 ; GFX10-NEXT:    s_endpgm
   %wide.sgpr0 = call <8 x i32> asm sideeffect "; def $0", "={s[0:7]}" () #0
-  %wide.sgpr2 = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}" () #0
+  %wide.sgpr2 = call <5 x i32> asm sideeffect "; def $0", "={s[8:12]}" () #0
   call void asm sideeffect "", "~{v[0:7]}" () #0
   call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr0) #0
-  call void asm sideeffect "; use $0", "s"(<4 x i32> %wide.sgpr2) #0
+  call void asm sideeffect "; use $0", "s"(<5 x i32> %wide.sgpr2) #0
   ret void
 }
 

From 5af2433e1794ebf7e58e848aa612c7912d71dc78 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@legionlabs.com>
Date: Thu, 20 Jan 2022 11:04:46 -0500
Subject: [PATCH 042/946] [clang-cl] Support the /HOTPATCH flag

This patch adds support for the MSVC /HOTPATCH flag: https://docs.microsoft.com/sv-se/cpp/build/reference/hotpatch-create-hotpatchable-image?view=msvc-170&viewFallbackFrom=vs-2019

The flag is translated to a new -fms-hotpatch flag, which in turn adds a 'patchable-function' attribute for each function in the TU. This is then picked up by the PatchableFunction pass which would generate a TargetOpcode::PATCHABLE_OP of minsize = 2 (which means the target instruction must resolve to at least two bytes). TargetOpcode::PATCHABLE_OP is only implemented for x86/x64. When targetting ARM/ARM64, /HOTPATCH isn't required (instructions are always 2/4 bytes and suitable for hotpatching).

Additionally, when using /Z7, we generate a 'hot patchable' flag in the CodeView debug stream, in the S_COMPILE3 record. This flag is then picked up by LLD (or link.exe) and is used in conjunction with the linker /FUNCTIONPADMIN flag to generate extra space before each function, to accommodate for live patching long jumps. Please see: https://github.com/llvm/llvm-project/blob/d703b922961e0d02a5effdd4bfbb23ad50a3cc9f/lld/COFF/Writer.cpp#L1298

The outcome is that we can finally use Live++ or Recode along with clang-cl.

NOTE: It seems that MSVC cl.exe always enables /HOTPATCH on x64 by default, although if we did the same I thought we might generate sub-optimal code (if this flag was active by default). Additionally, MSVC always generates a .debug$S section and a S_COMPILE3 record, which Clang doesn't do without /Z7. Therefore, the following MSVC command-line "cl /c file.cpp" would have to be written with Clang such as "clang-cl /c file.cpp /HOTPATCH /Z7" in order to obtain the same result.

Depends on D43002, D80833 and D81301 for the full feature.

Differential Revision: https://reviews.llvm.org/D116511
---
 clang/docs/ReleaseNotes.rst                   | 10 +++++++
 clang/include/clang/Basic/CodeGenOptions.def  |  3 +++
 clang/include/clang/Driver/Options.td         |  6 ++++-
 clang/lib/CodeGen/BackendUtil.cpp             |  1 +
 clang/lib/CodeGen/CodeGenFunction.cpp         |  7 +++++
 clang/lib/Driver/ToolChains/Clang.cpp         |  2 ++
 clang/lib/Driver/ToolChains/MSVC.cpp          |  5 ++++
 clang/test/CodeGen/patchable-function-entry.c |  5 ++++
 .../CodeGenCXX/debug-info-hotpatch-arm.cpp    | 26 +++++++++++++++++++
 clang/test/CodeGenCXX/debug-info-hotpatch.cpp | 20 ++++++++++++++
 clang/test/Driver/cl-options.c                |  4 ++-
 llvm/include/llvm/Target/TargetOptions.h      |  9 ++++---
 llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp |  6 +++++
 .../COFF/ARMNT/arm-register-variables.ll      |  3 ++-
 llvm/test/MC/AArch64/coff-debug.ll            |  3 ++-
 15 files changed, 103 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp
 create mode 100644 clang/test/CodeGenCXX/debug-info-hotpatch.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index de395b5d035ec..08e4d75299d2b 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -136,6 +136,16 @@ Windows Support
   or pass ``/permissive`` to disable C++ operator names altogether. See
   `PR42427 <https://llvm.org/pr42427>` for more info.
 
+- Add support for MSVC-compatible ``/hotpatch`` flag in clang-cl, and equivalent
+  -cc1 flag ``-fms-hotpatch``. Along with the linker flag ``/functionpadmin``
+  this creates executable images suitable for runtime code patching. This flag
+  is only required for x86/x64 targets; ARM/ARM64 simply needs the linker
+  ``/functionpadmin``.
+
+  With this addition, clang-cl can be used in live code patching scenarios,
+  along with tools such as Live++ or Recode. Microsoft Edit and Continue isn't
+  currently supported.
+
 C Language Changes in Clang
 ---------------------------
 
diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index d4742cddd00c9..3526b8a4a9044 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -139,6 +139,9 @@ VALUE_CODEGENOPT(XRaySelectedFunctionGroup, 32, 0)
 VALUE_CODEGENOPT(PatchableFunctionEntryCount , 32, 0) ///< Number of NOPs at function entry
 VALUE_CODEGENOPT(PatchableFunctionEntryOffset , 32, 0)
 
+CODEGENOPT(HotPatch, 1, 0) ///< Supports the Microsoft /HOTPATCH flag and
+                           ///< generates a 'patchable-function' attribute.
+
 CODEGENOPT(InstrumentForProfiling , 1, 0) ///< Set when -pg is enabled.
 CODEGENOPT(CallFEntry , 1, 0) ///< Set when -mfentry is enabled.
 CODEGENOPT(MNopMCount , 1, 0) ///< Set when -mnop-mcount is enabled.
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 1a9fd61328f83..b66363b1d3e92 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2498,6 +2498,9 @@ defm pascal_strings : BoolFOption<"pascal-strings",
 def fpatchable_function_entry_EQ : Joined<["-"], "fpatchable-function-entry=">, Group<f_Group>, Flags<[CC1Option]>,
   MetaVarName<"<N,M>">, HelpText<"Generate M NOPs before function entry and N-M NOPs after function entry">,
   MarshallingInfoInt<CodeGenOpts<"PatchableFunctionEntryCount">>;
+def fms_hotpatch : Flag<["-"], "fms-hotpatch">, Group<f_Group>, Flags<[CC1Option, CoreOption]>,
+  HelpText<"Ensure that all functions can be hotpatched at runtime">,
+  MarshallingInfoFlag<CodeGenOpts<"HotPatch">>;
 def fpcc_struct_return : Flag<["-"], "fpcc-struct-return">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Override the default ABI to return all structs on the stack">;
 def fpch_preprocess : Flag<["-"], "fpch-preprocess">, Group<f_Group>;
@@ -6124,6 +6127,8 @@ def _SLASH_Gw_ : CLFlag<"Gw-">,
 def _SLASH_help : CLFlag<"help">, Alias<help>,
   HelpText<"Display available options">;
 def _SLASH_HELP : CLFlag<"HELP">, Alias<help>;
+def _SLASH_hotpatch : CLFlag<"hotpatch">, Alias<fms_hotpatch>,
+  HelpText<"Create hotpatchable image">;
 def _SLASH_I : CLJoinedOrSeparate<"I">,
   HelpText<"Add directory to include search path">, MetaVarName<"<dir>">,
   Alias<I>;
@@ -6480,7 +6485,6 @@ def _SLASH_headerUnit : CLJoinedOrSeparate<"headerUnit">;
 def _SLASH_headerUnitAngle : CLJoinedOrSeparate<"headerUnit:angle">;
 def _SLASH_headerUnitQuote : CLJoinedOrSeparate<"headerUnit:quote">;
 def _SLASH_homeparams : CLFlag<"homeparams">;
-def _SLASH_hotpatch : CLFlag<"hotpatch">;
 def _SLASH_kernel : CLFlag<"kernel">;
 def _SLASH_LN : CLFlag<"LN">;
 def _SLASH_MP : CLJoined<"MP">;
diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 67fee7f35ca17..6b8e052305b49 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -645,6 +645,7 @@ static bool initTargetOptions(DiagnosticsEngine &Diags,
   Options.MCOptions.CommandLineArgs = CodeGenOpts.CommandLineArgs;
   Options.DebugStrictDwarf = CodeGenOpts.DebugStrictDwarf;
   Options.ObjectFilenameForDebug = CodeGenOpts.ObjectFilenameForDebug;
+  Options.Hotpatch = CodeGenOpts.HotPatch;
 
   return true;
 }
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 54ddbff3fb038..50e1638924d1a 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -882,6 +882,13 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy,
     if (Offset)
       Fn->addFnAttr("patchable-function-prefix", std::to_string(Offset));
   }
+  // Instruct that functions for COFF/CodeView targets should start with a
+  // patchable instruction, but only on x86/x64. Don't forward this to ARM/ARM64
+  // backends as they don't need it -- instructions on these architectures are
+  // always atomically patchable at runtime.
+  if (CGM.getCodeGenOpts().HotPatch &&
+      getContext().getTargetInfo().getTriple().isX86())
+    Fn->addFnAttr("patchable-function", "prologue-short-redirect");
 
   // Add no-jump-tables value.
   if (CGM.getCodeGenOpts().NoUseJumpTables)
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index c48c6fd59bec3..a22f03a488486 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6001,6 +6001,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
+  Args.AddLastArg(CmdArgs, options::OPT_fms_hotpatch);
+
   if (TC.SupportsProfiling()) {
     Args.AddLastArg(CmdArgs, options::OPT_pg);
 
diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp
index 4e15c3ab51cef..7e8636adc2728 100644
--- a/clang/lib/Driver/ToolChains/MSVC.cpp
+++ b/clang/lib/Driver/ToolChains/MSVC.cpp
@@ -511,6 +511,11 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (Args.hasArg(options::OPT_g_Group, options::OPT__SLASH_Z7))
     CmdArgs.push_back("-debug");
 
+  // If we specify /hotpatch, let the linker add padding in front of each
+  // function, like MSVC does.
+  if (Args.hasArg(options::OPT_fms_hotpatch, options::OPT__SLASH_hotpatch))
+    CmdArgs.push_back("-functionpadmin");
+
   // Pass on /Brepro if it was passed to the compiler.
   // Note that /Brepro maps to -mno-incremental-linker-compatible.
   bool DefaultIncrementalLinkerCompatible =
diff --git a/clang/test/CodeGen/patchable-function-entry.c b/clang/test/CodeGen/patchable-function-entry.c
index 3065eb2efa551..6e8d0d743cf45 100644
--- a/clang/test/CodeGen/patchable-function-entry.c
+++ b/clang/test/CodeGen/patchable-function-entry.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -triple aarch64 -emit-llvm %s -o - | FileCheck %s
 // RUN: %clang_cc1 -triple x86_64 -emit-llvm %s -fpatchable-function-entry=1 -o - | FileCheck --check-prefixes=CHECK,OPT %s
+// RUN: %clang_cc1 -triple x86_64 -emit-llvm %s -fms-hotpatch -o - | FileCheck --check-prefixes=HOTPATCH %s
 
 // CHECK: define{{.*}} void @f0() #0
 __attribute__((patchable_function_entry(0))) void f0() {}
@@ -34,3 +35,7 @@ void f() {}
 // CHECK: attributes #2 = { {{.*}} "patchable-function-entry"="0" "patchable-function-prefix"="4"
 // CHECK: attributes #3 = { {{.*}} "patchable-function-entry"="3" "patchable-function-prefix"="2"
 // OPT:   attributes #4 = { {{.*}} "patchable-function-entry"="1"
+// HOTPATCH: attributes #0 = { {{.*}} "patchable-function"="prologue-short-redirect"
+// HOTPATCH: attributes #1 = { {{.*}} "patchable-function"="prologue-short-redirect"
+// HOTPATCH: attributes #2 = { {{.*}} "patchable-function"="prologue-short-redirect"
+// HOTPATCH: attributes #3 = { {{.*}} "patchable-function"="prologue-short-redirect"
diff --git a/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp b/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp
new file mode 100644
index 0000000000000..6176f1788760a
--- /dev/null
+++ b/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp
@@ -0,0 +1,26 @@
+// REQUIRES: aarch64-registered-target || arm-registered-target
+///
+/// Check that using /hotpatch doesn't generate an error.
+/// Binaries are always hotpatchable on ARM/ARM64.
+///
+// RUN: %clang_cl --target=arm-pc-windows-msvc /c /hotpatch /Z7 -- %s 2>&1
+// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c /hotpatch /Z7 -- %s 2>&1
+///
+/// Ensure that we set the hotpatchable flag in the debug information.
+///
+// RUN: %clang_cl --target=arm-pc-windows-msvc /c /Z7 -o %t.obj -- %s
+// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=HOTPATCH
+// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c /Z7 -o %t.obj -- %s
+// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=HOTPATCH
+// HOTPATCH: S_COMPILE3 [size = [[#]]]
+// HOTPATCH: flags = hot patchable
+///
+/// Unfortunately we need /Z7, Clang does not systematically generate S_COMPILE3.
+///
+// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c -o %t.obj -- %s
+// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=NO-HOTPATCH
+// NO-HOTPATCH-NOT: flags = hot patchable
+
+int main() {
+  return 0;
+}
diff --git a/clang/test/CodeGenCXX/debug-info-hotpatch.cpp b/clang/test/CodeGenCXX/debug-info-hotpatch.cpp
new file mode 100644
index 0000000000000..fde1a6ad085ea
--- /dev/null
+++ b/clang/test/CodeGenCXX/debug-info-hotpatch.cpp
@@ -0,0 +1,20 @@
+// REQUIRES: x86-registered-target
+///
+// RUN: %clang_cl --target=x86_64-windows-msvc /c /hotpatch /Z7 -o %t.obj -- %s
+// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=HOTPATCH
+// HOTPATCH: S_COMPILE3 [size = [[#]]]
+// HOTPATCH: flags = hot patchable
+///
+// RUN: %clang_cl --target=x86_64-windows-msvc /c /Z7 -o %t.obj -- %s
+// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=NO-HOTPATCH
+// NO-HOTPATCH-NOT: flags = hot patchable
+///
+// RUN: %clang_cl --target=x86_64-windows-msvc /hotpatch -### -- %s 2>&1 \
+// RUN:    | FileCheck %s --check-prefix=FUNCTIONPADMIN
+// FUNCTIONPADMIN: clang{{.*}}
+// FUNCTIONPADMIN: {{link.*"}}
+// FUNCTIONPADMIN: -functionpadmin
+
+int main() {
+  return 0;
+}
diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c
index 618be2d230f94..f39db87660125 100644
--- a/clang/test/Driver/cl-options.c
+++ b/clang/test/Driver/cl-options.c
@@ -118,6 +118,9 @@
 // RUN: %clang_cl /Gw /Gw- -### -- %s 2>&1 | FileCheck -check-prefix=Gw_ %s
 // Gw_-NOT: -fdata-sections
 
+// RUN: %clang_cl /hotpatch -### -- %s 2>&1 | FileCheck -check-prefix=hotpatch %s
+// hotpatch: -fms-hotpatch
+
 // RUN: %clang_cl /Imyincludedir -### -- %s 2>&1 | FileCheck -check-prefix=SLASH_I %s
 // RUN: %clang_cl /I myincludedir -### -- %s 2>&1 | FileCheck -check-prefix=SLASH_I %s
 // SLASH_I: "-I" "myincludedir"
@@ -483,7 +486,6 @@
 // RUN:     /GZ \
 // RUN:     /H \
 // RUN:     /homeparams \
-// RUN:     /hotpatch \
 // RUN:     /JMC \
 // RUN:     /kernel \
 // RUN:     /LN \
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
index c639f326abc9d..a636c48228325 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -140,9 +140,9 @@ namespace llvm {
           EnableMachineFunctionSplitter(false), SupportsDefaultOutlining(false),
           EmitAddrsig(false), EmitCallSiteInfo(false),
           SupportsDebugEntryValues(false), EnableDebugEntryValues(false),
-          ValueTrackingVariableLocations(false),
-          ForceDwarfFrameSection(false), XRayOmitFunctionIndex(false),
-          DebugStrictDwarf(false),
+          ValueTrackingVariableLocations(false), ForceDwarfFrameSection(false),
+          XRayOmitFunctionIndex(false), DebugStrictDwarf(false),
+          Hotpatch(false),
           FPDenormalMode(DenormalMode::IEEE, DenormalMode::IEEE) {}
 
     /// DisableFramePointerElim - This returns true if frame pointer elimination
@@ -342,6 +342,9 @@ namespace llvm {
     /// By default, it is set to false.
     unsigned DebugStrictDwarf : 1;
 
+    /// Emit the hotpatch flag in CodeView debug.
+    unsigned Hotpatch : 1;
+
     /// Name of the stack usage file (i.e., .su file) if user passes
     /// -fstack-usage. If empty, it can be implied that -fstack-usage is not
     /// passed on the command line.
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 9a8188e5cb468..52c74713551cb 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -846,6 +846,12 @@ void CodeViewDebug::emitCompilerInformation() {
   if (MMI->getModule()->getProfileSummary(/*IsCS*/ false) != nullptr) {
     Flags |= static_cast<uint32_t>(CompileSym3Flags::PGO);
   }
+  using ArchType = llvm::Triple::ArchType;
+  ArchType Arch = Triple(MMI->getModule()->getTargetTriple()).getArch();
+  if (Asm->TM.Options.Hotpatch || Arch == ArchType::thumb ||
+      Arch == ArchType::aarch64) {
+    Flags |= static_cast<uint32_t>(CompileSym3Flags::HotPatch);
+  }
 
   OS.AddComment("Flags and language");
   OS.emitInt32(Flags);
diff --git a/llvm/test/DebugInfo/COFF/ARMNT/arm-register-variables.ll b/llvm/test/DebugInfo/COFF/ARMNT/arm-register-variables.ll
index d19cca330b2d3..781a4c65abc90 100644
--- a/llvm/test/DebugInfo/COFF/ARMNT/arm-register-variables.ll
+++ b/llvm/test/DebugInfo/COFF/ARMNT/arm-register-variables.ll
@@ -24,7 +24,8 @@
 ;      OBJ:   Compile3Sym {
 ; OBJ-NEXT:     Kind: S_COMPILE3 (0x113C)
 ; OBJ-NEXT:     Language: C (0x0)
-; OBJ-NEXT:     Flags [ (0x0)
+; OBJ-NEXT:     Flags [ (0x4000)
+; OBJ-NEXT:       HotPatch (0x4000)
 ; OBJ-NEXT:     ]
 ; OBJ-NEXT:     Machine: ARMNT (0xF4)
 
diff --git a/llvm/test/MC/AArch64/coff-debug.ll b/llvm/test/MC/AArch64/coff-debug.ll
index 6099b3d570b46..af3459ace3531 100644
--- a/llvm/test/MC/AArch64/coff-debug.ll
+++ b/llvm/test/MC/AArch64/coff-debug.ll
@@ -77,7 +77,8 @@ attributes #0 = { noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-ma
 ; CHECK:     Compile3Sym {
 ; CHECK:       Kind: S_COMPILE3 (0x113C)
 ; CHECK:       Language: C (0x0)
-; CHECK:       Flags [ (0x0)
+; CHECK:       Flags [ (0x4000
+; CHECK:        HotPatch (0x4000)
 ; CHECK:       ]
 ; CHECK:     }
 ; CHECK:   ]

From 5abf1163224549902bf34c9d07b822e5283beb7a Mon Sep 17 00:00:00 2001
From: Sergei Grechanik <sergei.grechanik@intel.com>
Date: Thu, 20 Jan 2022 08:54:38 -0800
Subject: [PATCH 043/946] [mlir][vector] Allow values outside of [0; dim-size]
 in create_mask

This commits explicitly states that negative values and values exceeding
vector dimensions are allowed in vector.create_mask (but not in
vector.constant_mask). These values are now truncated when
canonicalizing vector.create_mask to vector.constant_mask.

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D116069
---
 mlir/include/mlir/Dialect/Vector/VectorOps.td |  8 +++--
 mlir/lib/Dialect/Vector/VectorOps.cpp         | 15 +++++++--
 mlir/test/Dialect/Vector/canonicalize.mlir    | 33 +++++++++++++++++++
 .../Dialect/Vector/CPU/test-create-mask.mlir  |  6 +++-
 4 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td
index 20b431a7b7b25..826c7d0338f0b 100644
--- a/mlir/include/mlir/Dialect/Vector/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td
@@ -2131,7 +2131,9 @@ def Vector_ConstantMaskOp :
     specifies an exclusive upper bound [0, mask-dim-size-element-value)
     for a unique dimension in the vector result. The conjunction of the ranges
     define a hyper-rectangular region within which elements values are set to 1
-    (otherwise element values are set to 0).
+    (otherwise element values are set to 0). Each value of 'mask_dim_sizes' must
+    be non-negative and not greater than the size of the corresponding vector
+    dimension (as opposed to vector.create_mask which allows this).
 
     Example:
 
@@ -2169,7 +2171,9 @@ def Vector_CreateMaskOp :
     each operand specifies a range [0, operand-value) for a unique dimension in
     the vector result. The conjunction of the operand ranges define a
     hyper-rectangular region within which elements values are set to 1
-    (otherwise element values are set to 0).
+    (otherwise element values are set to 0). If operand-value is negative, it is
+    treated as if it were zero, and if it is greater than the corresponding
+    dimension size, it is treated as if it were equal to the dimension size.
 
     Example:
 
diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp
index eaa4f4e97e1dd..2e22fc0495bf2 100644
--- a/mlir/lib/Dialect/Vector/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/VectorOps.cpp
@@ -4235,9 +4235,18 @@ class CreateMaskFolder final : public OpRewritePattern<CreateMaskOp> {
       return failure();
     // Gather constant mask dimension sizes.
     SmallVector<int64_t, 4> maskDimSizes;
-    for (auto operand : createMaskOp.operands()) {
-      auto *defOp = operand.getDefiningOp();
-      maskDimSizes.push_back(cast<arith::ConstantIndexOp>(defOp).value());
+    for (auto it : llvm::zip(createMaskOp.operands(),
+                             createMaskOp.getType().getShape())) {
+      auto *defOp = std::get<0>(it).getDefiningOp();
+      int64_t maxDimSize = std::get<1>(it);
+      int64_t dimSize = cast<arith::ConstantIndexOp>(defOp).value();
+      dimSize = std::min(dimSize, maxDimSize);
+      // If one of dim sizes is zero, set all dims to zero.
+      if (dimSize <= 0) {
+        maskDimSizes.assign(createMaskOp.getType().getRank(), 0);
+        break;
+      }
+      maskDimSizes.push_back(dimSize);
     }
     // Replace 'createMaskOp' with ConstantMaskOp.
     rewriter.replaceOpWithNewOp<ConstantMaskOp>(
diff --git a/mlir/test/Dialect/Vector/canonicalize.mlir b/mlir/test/Dialect/Vector/canonicalize.mlir
index 5da33821eeaf0..3d1923ac09ace 100644
--- a/mlir/test/Dialect/Vector/canonicalize.mlir
+++ b/mlir/test/Dialect/Vector/canonicalize.mlir
@@ -13,6 +13,39 @@ func @create_vector_mask_to_constant_mask() -> (vector<4x3xi1>) {
 
 // -----
 
+// CHECK-LABEL: create_vector_mask_to_constant_mask_truncation
+func @create_vector_mask_to_constant_mask_truncation() -> (vector<4x3xi1>) {
+  %c2 = arith.constant 2 : index
+  %c5 = arith.constant 5 : index
+  // CHECK: vector.constant_mask [4, 2] : vector<4x3xi1>
+  %0 = vector.create_mask %c5, %c2 : vector<4x3xi1>
+  return %0 : vector<4x3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_neg
+func @create_vector_mask_to_constant_mask_truncation_neg() -> (vector<4x3xi1>) {
+  %cneg2 = arith.constant -2 : index
+  %c5 = arith.constant 5 : index
+  // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1>
+  %0 = vector.create_mask %c5, %cneg2 : vector<4x3xi1>
+  return %0 : vector<4x3xi1>
+}
+
+// -----
+
+// CHECK-LABEL: create_vector_mask_to_constant_mask_truncation_zero
+func @create_vector_mask_to_constant_mask_truncation_zero() -> (vector<4x3xi1>) {
+  %c2 = arith.constant 2 : index
+  %c0 = arith.constant 0 : index
+  // CHECK: vector.constant_mask [0, 0] : vector<4x3xi1>
+  %0 = vector.create_mask %c0, %c2 : vector<4x3xi1>
+  return %0 : vector<4x3xi1>
+}
+
+// -----
+
 func @extract_strided_slice_of_constant_mask() -> (vector<2x2xi1>) {
   %0 = vector.constant_mask [2, 2] : vector<4x3xi1>
   %1 = vector.extract_strided_slice %0
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir b/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir
index 4a3113bdbe5a1..5834f14c6d22a 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/test-create-mask.mlir
@@ -4,11 +4,13 @@
 // RUN: FileCheck %s
 
 func @entry() {
+  %cneg1 = arith.constant -1 : index
   %c0 = arith.constant 0 : index
   %c1 = arith.constant 1 : index
   %c2 = arith.constant 2 : index
   %c3 = arith.constant 3 : index
   %c6 = arith.constant 6 : index
+  %c7 = arith.constant 7 : index
 
   //
   // 1-D.
@@ -18,16 +20,18 @@ func @entry() {
   vector.print %1 : vector<5xi1>
   // CHECK: ( 1, 1, 0, 0, 0 )
 
-  scf.for %i = %c0 to %c6 step %c1 {
+  scf.for %i = %cneg1 to %c7 step %c1 {
     %2 = vector.create_mask %i : vector<5xi1>
     vector.print %2 : vector<5xi1>
   }
   // CHECK: ( 0, 0, 0, 0, 0 )
+  // CHECK: ( 0, 0, 0, 0, 0 )
   // CHECK: ( 1, 0, 0, 0, 0 )
   // CHECK: ( 1, 1, 0, 0, 0 )
   // CHECK: ( 1, 1, 1, 0, 0 )
   // CHECK: ( 1, 1, 1, 1, 0 )
   // CHECK: ( 1, 1, 1, 1, 1 )
+  // CHECK: ( 1, 1, 1, 1, 1 )
 
   //
   // 2-D.

From af5600420b93769a5c7981d247d37ac4d61cce54 Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6@vols.utk.edu>
Date: Thu, 20 Jan 2022 08:29:16 -0500
Subject: [PATCH 044/946] [OpenMP] Don't pass empty files to nvlink

This patch adds and exception to the nvlink wrapper tool to not pass
empty cubin files to the nvlink job. If an empty file is passed to
nvlink it will cause an error indicating that the file could not be
opened. This would occur if the user tried to link object files that
contained offloading code with a file that didnt. This will act as a
workaround until the new OpenMP offloading driver becomes the default.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D117777
---
 .../Inputs/openmp_static_device_link/empty.o     |   0
 .../Inputs/openmp_static_device_link/lib.bc      | Bin 0 -> 1092 bytes
 clang/test/Driver/fat_archive_nvptx.cpp          |   3 ++-
 .../clang-nvlink-wrapper/ClangNvlinkWrapper.cpp  |  12 +++++++++++-
 4 files changed, 13 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Driver/Inputs/openmp_static_device_link/empty.o
 create mode 100644 clang/test/Driver/Inputs/openmp_static_device_link/lib.bc

diff --git a/clang/test/Driver/Inputs/openmp_static_device_link/empty.o b/clang/test/Driver/Inputs/openmp_static_device_link/empty.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/openmp_static_device_link/lib.bc b/clang/test/Driver/Inputs/openmp_static_device_link/lib.bc
new file mode 100644
index 0000000000000000000000000000000000000000..1a87fd836dba2c8b03f53733e4782e15996b96b9
GIT binary patch
literal 1092
zcmXX_VQ3p=82*xLb7vdBi#2Qsy*uyLOv7~bme!i=5-`0vBAb|@KRT$G%SvpulxvMi
z+6_&59bbkKvQjWVDzwl){)^Bb{ZXXJbu*SQ>JTXyN@LTmN>+mFWa57KrSrJ&{dn(t
zdGC3i_xbwewe2^o0JH)C!e-I&?$sY6U;n%OUT4X1!Qg5If*F8+@L9W207kr@z>7?E
z-S(GS-Z5ERo>{|;`E)B~=UToBCtKb&_RqHWJ1j1%o3dSMb9JNx>blT%p#x<s{#oNt
ziTd8k%NNu9PX<@VjO~o`O!nN3{NQcAP{Mt0@c=xBch{)^#fL8f;Z;YjML3Ov{T1`g
zrEI8UXpna()4YuWFTNbllZgfLMxHo7OvLjj%A+WS`fO;5N9rb}1g`mwUtV?eQP#&K
z0O>op?>Bh$P)TOJ%0#RNUs`t0=$ZZ`Ihi0o%Z;1&0sYVdkCL#*+|LyY?7c-MBDNlv
zX`N-lSCV}h!dFgYqAGcvA}0e#Rmmxph$WE9BYXJ;SkJ<$NRNs1ZUZ)gu*b5_ZVSwo
zz!nuQBHof#HEC1!EEC>U%{xO#<%TsDBCijVZw{jg9Fs`RA~i(DoMh}tD{dQ4qK_yv
zZbK6}G?`1icG!T2JMi%utcygrw8+#JneuV0D9nbytf$xw#q?`d_LO8#?l`XK?m5|=
zCZsw2b^Oz()6y@5l-JEajv#daO{iLQm`sFF)P|x9$bRpA`Vi261z{}<ci*C`Jgj3D
zypydIxT2rCFEEu9SJ`1|RrXH<7qMM&=jHSBnp6%t{wESb@dTO3UAcUerJD+HpKu!r
zb0DzW1@;RA*90uE4aM}qP0d}_rM&FJf;=sHYqFHash+vhhMA%`#zT=)PI4lb8r-$O
z`WoB|!bf6P;l7`%RZnhJ*=-CUV3+cmZ&~Y{(cKx@Q`WsJ1y4!G9KGeh@;F6eRO48{
z0c8K}F06;>hdZVQPw$EJ9#8)rgku)CD?*$;+A;mH18YI}MC|&#t}w+EyH!0|7ubD)
zT^Cq9!2{u?2>-3LD*ILl&z$Bd53J(pW3p2iX8^@jY_jic00`J)?QIiIJUen~K+WNL
z9joNo)Tj5EyNk~s_?y<kZJ%j!qIWk09|W<~bgd~k{U8XB^2roA={$Bq0$~<S9R}2S
z-Mws(mNeg-Zu}IRim6nY3eVJ(W;{r7MTOa1WJ=G#8-BI`V8U3)vUf?7=CJ_?--71F
zX2|LW{C%7U$M@L<EvH&=-h4j9cOAc=gP039^K-r3Vz>D5NOUZwMiYI#XUAeALnF%B
SYo8A(qa)$&(a~lV0RI6M5`u96

literal 0
HcmV?d00001

diff --git a/clang/test/Driver/fat_archive_nvptx.cpp b/clang/test/Driver/fat_archive_nvptx.cpp
index a46c44ff998cc..5413445925dd3 100644
--- a/clang/test/Driver/fat_archive_nvptx.cpp
+++ b/clang/test/Driver/fat_archive_nvptx.cpp
@@ -10,7 +10,8 @@
 // CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda"{{.*}}"-target-cpu" "[[GPU:sm_[0-9]+]]"{{.*}}"-o" "[[HOSTBC:.*.s]]" "-x" "c++"{{.*}}.cpp
 // CHECK: clang-offload-bundler" "-unbundle" "-type=a" "-inputs={{.*}}/Inputs/openmp_static_device_link/libFatArchive.a" "-targets=openmp-nvptx64-nvidia-cuda-[[GPU]]" "-outputs=[[DEVICESPECIFICARCHIVE:.*.a]]" "-allow-missing-bundles"
 // CHECK: clang-nvlink-wrapper{{.*}}"-o" "{{.*}}.out" "-arch" "[[GPU]]" "{{.*}}[[DEVICESPECIFICARCHIVE]]"
-// expected-no-diagnostics
+// RUN: not %clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda %s %S/Inputs/openmp_static_device_link/empty.o --libomptarget-nvptx-bc-path=%S/Inputs/openmp_static_device_link/lib.bc 2>&1 | FileCheck %s --check-prefix=EMPTY
+// EMPTY-NOT: Could not open input file
 
 #ifndef HEADER
 #define HEADER
diff --git a/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp b/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
index 46a4f30ba8817..7ccc284a48314 100644
--- a/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
+++ b/clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp
@@ -55,12 +55,22 @@ static cl::opt<std::string> NvlinkUserPath("nvlink-path",
 static cl::list<std::string>
     NVArgs(cl::Sink, cl::desc("<options to be passed to nvlink>..."));
 
+static bool isEmptyFile(StringRef Filename) {
+  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+      MemoryBuffer::getFileOrSTDIN(Filename, false, false);
+  if (std::error_code EC = BufOrErr.getError())
+    return false;
+  return (*BufOrErr)->getBuffer().empty();
+}
+
 static Error runNVLink(std::string NVLinkPath,
                        SmallVectorImpl<std::string> &Args) {
   std::vector<StringRef> NVLArgs;
   NVLArgs.push_back(NVLinkPath);
+  StringRef Output = *(llvm::find(Args, "-o") + 1);
   for (auto &Arg : Args) {
-    NVLArgs.push_back(Arg);
+    if (!(sys::fs::exists(Arg) && Arg != Output && isEmptyFile(Arg)))
+      NVLArgs.push_back(Arg);
   }
 
   if (sys::ExecuteAndWait(NVLinkPath, NVLArgs))

From aad04534c41979fe04d7b4bad70de95c7ba646a1 Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue@google.com>
Date: Tue, 18 Jan 2022 13:46:18 -0500
Subject: [PATCH 045/946] [libc] Implement correct rounding with all rounding
 modes for hypot functions.

Update the rounding logic for generic hypot function so that it will round correctly with all rounding modes.

Reviewed By: sivachandra, zimmermann6

Differential Revision: https://reviews.llvm.org/D117590
---
 libc/src/__support/FPUtil/Hypot.h             |   26 +-
 libc/src/math/generic/CMakeLists.txt          |    8 +-
 libc/test/src/math/CMakeLists.txt             |    4 +
 libc/test/src/math/HypotTest.h                |   16 +-
 .../math/differential_testing/CMakeLists.txt  |    2 +
 libc/test/src/math/hypotf_hard_to_round.h     | 1238 +++++++++++++++++
 libc/test/src/math/hypotf_test.cpp            |    5 +
 7 files changed, 1289 insertions(+), 10 deletions(-)
 create mode 100644 libc/test/src/math/hypotf_hard_to_round.h

diff --git a/libc/src/__support/FPUtil/Hypot.h b/libc/src/__support/FPUtil/Hypot.h
index 78a3571021a4a..4b2987138d1bd 100644
--- a/libc/src/__support/FPUtil/Hypot.h
+++ b/libc/src/__support/FPUtil/Hypot.h
@@ -10,6 +10,7 @@
 #define LLVM_LIBC_SRC_SUPPORT_FPUTIL_HYPOT_H
 
 #include "BasicOperations.h"
+#include "FEnvImpl.h"
 #include "FPBits.h"
 #include "src/__support/CPP/TypeTraits.h"
 
@@ -143,11 +144,22 @@ static inline T hypot(T x, T y) {
   if ((x_bits.get_unbiased_exponent() >=
        y_bits.get_unbiased_exponent() + MantissaWidth<T>::VALUE + 2) ||
       (y == 0)) {
+    // Check if the rounding mode is FE_UPWARD, will need -frounding-math so
+    // that the compiler does not optimize it away.
+    if ((y != 0) && (0x1p0f + 0x1p-24f != 0x1p0f)) {
+      UIntType out_bits = FPBits_t(abs(x)).uintval();
+      return T(FPBits_t(++out_bits));
+    }
     return abs(x);
   } else if ((y_bits.get_unbiased_exponent() >=
               x_bits.get_unbiased_exponent() + MantissaWidth<T>::VALUE + 2) ||
              (x == 0)) {
-    y_bits.set_sign(0);
+    // Check if the rounding mode is FE_UPWARD, will need -frounding-math so
+    // that the compiler does not optimize it away.
+    if ((x != 0) && (0x1p0f + 0x1p-24f != 0x1p0f)) {
+      UIntType out_bits = FPBits_t(abs(y)).uintval();
+      return T(FPBits_t(++out_bits));
+    }
     return abs(y);
   }
 
@@ -250,8 +262,16 @@ static inline T hypot(T x, T y) {
   y_new >>= 1;
 
   // Round to the nearest, tie to even.
-  if (round_bit && (lsb || sticky_bits || (r != 0))) {
-    ++y_new;
+  switch (get_round()) {
+  case FE_TONEAREST:
+    // Round to nearest, ties to even
+    if (round_bit && (lsb || sticky_bits || (r != 0)))
+      ++y_new;
+    break;
+  case FE_UPWARD:
+    if (round_bit || sticky_bits || (r != 0))
+      ++y_new;
+    break;
   }
 
   if (y_new >= (ONE >> 1)) {
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 6f650d2403d12..59bca76fc5f84 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -954,7 +954,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.fputil
   COMPILE_OPTIONS
-    -O2
+    -O3
+    -frounding-math
+    -Wno-c++17-extensions
 )
 
 add_entrypoint_object(
@@ -1002,7 +1004,9 @@ add_entrypoint_object(
   DEPENDS
     libc.src.__support.FPUtil.fputil
   COMPILE_OPTIONS
-    -O2
+    -O3
+    -frounding-math
+    -Wno-c++17-extensions
 )
 
 add_entrypoint_object(
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 88592b9725bf3..7c51dc00d49db 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1060,6 +1060,8 @@ add_fp_unittest(
     libc.include.math
     libc.src.math.hypotf
     libc.src.__support.FPUtil.fputil
+  COMPILE_OPTIONS
+    -Wno-c++17-extensions
 )
 
 add_fp_unittest(
@@ -1073,6 +1075,8 @@ add_fp_unittest(
     libc.include.math
     libc.src.math.hypot
     libc.src.__support.FPUtil.fputil
+  COMPILE_OPTIONS
+    -Wno-c++17-extensions
 )
 
 add_fp_unittest(
diff --git a/libc/test/src/math/HypotTest.h b/libc/test/src/math/HypotTest.h
index 584b5aef3359c..b7a7ffdf5312a 100644
--- a/libc/test/src/math/HypotTest.h
+++ b/libc/test/src/math/HypotTest.h
@@ -10,7 +10,6 @@
 #define LLVM_LIBC_TEST_SRC_MATH_HYPOTTEST_H
 
 #include "src/__support/FPUtil/FPBits.h"
-#include "src/__support/FPUtil/Hypot.h"
 #include "utils/MPFRWrapper/MPFRUtils.h"
 #include "utils/UnitTest/FPMatcher.h"
 #include "utils/UnitTest/Test.h"
@@ -62,9 +61,9 @@ class HypotTestTemplate : public __llvm_libc::testing::Test {
             y = -y;
           }
 
-          T result = func(x, y);
           mpfr::BinaryInput<T> input{x, y};
-          ASSERT_MPFR_MATCH(mpfr::Operation::Hypot, input, result, 0.5);
+          ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Hypot, input,
+                                         func(x, y), 0.5);
         }
       }
     }
@@ -85,12 +84,19 @@ class HypotTestTemplate : public __llvm_libc::testing::Test {
           y = -y;
         }
 
-        T result = func(x, y);
         mpfr::BinaryInput<T> input{x, y};
-        ASSERT_MPFR_MATCH(mpfr::Operation::Hypot, input, result, 0.5);
+        ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Hypot, input,
+                                       func(x, y), 0.5);
       }
     }
   }
+
+  void test_input_list(Func func, int n, const mpfr::BinaryInput<T> *inputs) {
+    for (int i = 0; i < n; ++i) {
+      ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Hypot, inputs[i],
+                                     func(inputs[i].x, inputs[i].y), 0.5);
+    }
+  }
 };
 
 #endif // LLVM_LIBC_TEST_SRC_MATH_HYPOTTEST_H
diff --git a/libc/test/src/math/differential_testing/CMakeLists.txt b/libc/test/src/math/differential_testing/CMakeLists.txt
index 0bf7855e2c91f..f2ac818fbd3b3 100644
--- a/libc/test/src/math/differential_testing/CMakeLists.txt
+++ b/libc/test/src/math/differential_testing/CMakeLists.txt
@@ -406,6 +406,7 @@ add_diff_binary(
     libc.src.math.hypotf
   COMPILE_OPTIONS
     -fno-builtin
+    -Wno-c++17-extensions
 )
 
 add_diff_binary(
@@ -417,4 +418,5 @@ add_diff_binary(
     libc.src.math.hypot
   COMPILE_OPTIONS
     -fno-builtin
+    -Wno-c++17-extensions
 )
diff --git a/libc/test/src/math/hypotf_hard_to_round.h b/libc/test/src/math/hypotf_hard_to_round.h
new file mode 100644
index 0000000000000..06817c7a3a104
--- /dev/null
+++ b/libc/test/src/math/hypotf_hard_to_round.h
@@ -0,0 +1,1238 @@
+//===-- Hard-to-round inputs for hypotf ------------------------------C++--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TEST_SRC_MATH_HYPOTTEST_HARD_TO_ROUND_H
+#define LLVM_LIBC_TEST_SRC_MATH_HYPOTTEST_HARD_TO_ROUND_H
+
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+
+constexpr int N_HARD_TO_ROUND = 1216;
+constexpr mpfr::BinaryInput<float> HYPOTF_HARD_TO_ROUND[N_HARD_TO_ROUND] = {
+    {0x1.ffffecp-1f, 0x1.000002p+27},
+    {0x1.900004p+34, 0x1.400002p+23}, /* 45 identical bits */
+    {0x1.05555p+34, 0x1.bffffep+23},  /* 44 identical bits */
+    {0x1.e5fffap+34, 0x1.affffep+23}, /* 45 identical bits */
+    {0x1.260002p+34, 0x1.500002p+23}, /* 45 identical bits */
+    {0x1.fffffap+34, 0x1.fffffep+23}, /* 45 identical bits */
+    {0x1.8ffffap+34, 0x1.3ffffep+23}, /* 45 identical bits */
+    {0x1.87fffcp+35, 0x1.bffffep+23}, /* 47 identical bits */
+    {0x1.b8e50ap-52, -0x1.db1e78p-64},
+    {0x1.03b54cp-33, 0x1.6ca6bep-45},
+    {0x1.e2eff6p+97, -0x1.044cb2p+108},
+    {-0x1.6b05c4p-127, 0x1.6b3146p-126},
+    {-0x1.6b05c4p-127, 0x1.6b3146p-126},
+    {0x1.26b188p-127, -0x1.a4f2fp-128},
+    {0x1.e2eff6p+97, -0x1.044cb2p+108},
+    // only inexact hard-to-round case with ulp(x) = 2^12*ulp(y):
+    {0x1.87fffcp+35, 0x1.bffffep+23},
+    // 1200 exact cases with ulp(x) = 2^12*ulp(y):
+    {0x1.f0008p+35, 0x1.f7fp+23},
+    {0x1.6401dp+35, 0x1.aafp+23},
+    {0x1.c0029p+35, 0x1.defp+23},
+    {0x1.100274p+35, 0x1.753p+23},
+    {0x1.38030cp+35, 0x1.8fbp+23},
+    {0x1.940334p+35, 0x1.c6dp+23},
+    {0x1.cc041cp+35, 0x1.e55p+23},
+    {0x1.0c03f8p+35, 0x1.727p+23},
+    {0x1.e404a4p+35, 0x1.f1dp+23},
+    {0x1.b004ecp+35, 0x1.d65p+23},
+    {0x1.a804ep+35, 0x1.d1fp+23},
+    {0x1.5804ccp+35, 0x1.a3bp+23},
+    {0x1.98051p+35, 0x1.c91p+23},
+    {0x1.480524p+35, 0x1.99dp+23},
+    {0x1.900668p+35, 0x1.c49p+23},
+    {0x1.28061cp+35, 0x1.855p+23},
+    {0x1.7406d8p+35, 0x1.b47p+23},
+    {0x1.dc07acp+35, 0x1.edbp+23},
+    {0x1.2c078p+35, 0x1.87fp+23},
+    {0x1.4407bcp+35, 0x1.975p+23},
+    {0x1.4c08ccp+35, 0x1.9c5p+23},
+    {0x1.14088p+35, 0x1.77fp+23},
+    {0x1.0008ap+35, 0x1.6a1p+23},
+    {0x1.bc0b14p+35, 0x1.dcdp+23},
+    {0x1.9c0bfcp+35, 0x1.cb5p+23},
+    {0x1.240b9cp+35, 0x1.82bp+23},
+    {0x1.d00da8p+35, 0x1.e77p+23},
+    {0x1.800e68p+35, 0x1.bb7p+23},
+    {0x1.8c0eacp+35, 0x1.c25p+23},
+    {0x1.080d0cp+35, 0x1.6fbp+23},
+    {0x1.600ea8p+35, 0x1.a89p+23},
+    {0x1.300fc8p+35, 0x1.8a9p+23},
+    {0x1.ec12ap+35, 0x1.f5fp+23},
+    {0x1.401094p+35, 0x1.94dp+23},
+    {0x1.5012b4p+35, 0x1.9edp+23},
+    {0x1.781634p+35, 0x1.b6dp+23},
+    {0x1.b8181cp+35, 0x1.dabp+23},
+    {0x1.a017f8p+35, 0x1.cd9p+23},
+    {0x1.18161cp+35, 0x1.7abp+23},
+    {0x1.d41bb8p+35, 0x1.e99p+23},
+    {0x1.2018p+35, 0x1.801p+23},
+    {0x1.881cp+35, 0x1.c01p+23},
+    {0x1.ac1fc4p+35, 0x1.d43p+23},
+    {0x1.fc21ep+35, 0x1.fe1p+23},
+    {0x1.e02304p+35, 0x1.efdp+23},
+    {0x1.341ef4p+35, 0x1.8d3p+23},
+    {0x1.3c1facp+35, 0x1.925p+23},
+    {0x1.041dbp+35, 0x1.6cfp+23},
+    {0x1.5c2124p+35, 0x1.a63p+23},
+    {0x1.f827cp+35, 0x1.fc1p+23},
+    {0x1.5422dcp+35, 0x1.a15p+23},
+    {0x1.e828cp+35, 0x1.f3fp+23},
+    {0x1.b429a8p+35, 0x1.d89p+23},
+    {0x1.a42904p+35, 0x1.cfdp+23},
+    {0x1.d82e4cp+35, 0x1.ebbp+23},
+    {0x1.7c2b34p+35, 0x1.b93p+23},
+    {0x1.f431ap+35, 0x1.fa1p+23},
+    {0x1.842e64p+35, 0x1.bddp+23},
+    {0x1.6c2f04p+35, 0x1.afdp+23},
+    {0x1.1c2b48p+35, 0x1.7d7p+23},
+    {0x1.683048p+35, 0x1.ad7p+23},
+    {0x1.703364p+35, 0x1.b23p+23},
+    {0x1.10311cp+35, 0x1.755p+23},
+    {0x1.c43ab4p+35, 0x1.e13p+23},
+    {0x1.c83b7cp+35, 0x1.e35p+23},
+    {0x1.383504p+35, 0x1.8fdp+23},
+    {0x1.0c3248p+35, 0x1.729p+23},
+    {0x1.64373p+35, 0x1.ab1p+23},
+    {0x1.f03f8p+35, 0x1.f81p+23},
+    {0x1.943c1p+35, 0x1.c6fp+23},
+    {0x1.c03e7p+35, 0x1.df1p+23},
+    {0x1.583944p+35, 0x1.a3dp+23},
+    {0x1.48386p+35, 0x1.99fp+23},
+    {0x1.2836c8p+35, 0x1.857p+23},
+    {0x1.cc40c8p+35, 0x1.e57p+23},
+    {0x1.a83f2p+35, 0x1.d21p+23},
+    {0x1.b03fb8p+35, 0x1.d67p+23},
+    {0x1.983e34p+35, 0x1.c93p+23},
+    {0x1.2c388p+35, 0x1.881p+23},
+    {0x1.e442ep+35, 0x1.f1fp+23},
+    {0x1.0035e4p+35, 0x1.6a3p+23},
+    {0x1.903efcp+35, 0x1.c4bp+23},
+    {0x1.743d68p+35, 0x1.b49p+23},
+    {0x1.443aa8p+35, 0x1.977p+23},
+    {0x1.14378p+35, 0x1.781p+23},
+    {0x1.4c3c58p+35, 0x1.9c7p+23},
+    {0x1.dc4564p+35, 0x1.eddp+23},
+    {0x1.243bf4p+35, 0x1.82dp+23},
+    {0x1.bc46bp+35, 0x1.dcfp+23},
+    {0x1.083b04p+35, 0x1.6fdp+23},
+    {0x1.9c4568p+35, 0x1.cb7p+23},
+    {0x1.d04a98p+35, 0x1.e79p+23},
+    {0x1.6043bcp+35, 0x1.a8bp+23},
+    {0x1.8045d8p+35, 0x1.bb9p+23},
+    {0x1.8c46f8p+35, 0x1.c27p+23},
+    {0x1.30411cp+35, 0x1.8abp+23},
+    {0x1.40433p+35, 0x1.94fp+23},
+    {0x1.50469p+35, 0x1.9efp+23},
+    {0x1.ec516p+35, 0x1.f61p+23},
+    {0x1.184574p+35, 0x1.7adp+23},
+    {0x1.784d1p+35, 0x1.b6fp+23},
+    {0x1.b85374p+35, 0x1.dadp+23},
+    {0x1.204804p+35, 0x1.803p+23},
+    {0x1.a051acp+35, 0x1.cdbp+23},
+    {0x1.d458ecp+35, 0x1.e9bp+23},
+    {0x1.885404p+35, 0x1.c03p+23},
+    {0x1.044b5p+35, 0x1.6d1p+23},
+    {0x1.34509cp+35, 0x1.8d5p+23},
+    {0x1.ac5a4cp+35, 0x1.d45p+23},
+    {0x1.3c51f8p+35, 0x1.927p+23},
+    {0x1.fc61a4p+35, 0x1.fe3p+23},
+    {0x1.5c55ecp+35, 0x1.a65p+23},
+    {0x1.e061p+35, 0x1.effp+23},
+    {0x1.545708p+35, 0x1.a17p+23},
+    {0x1.f86744p+35, 0x1.fc3p+23},
+    {0x1.e8674p+35, 0x1.f41p+23},
+    {0x1.b464bcp+35, 0x1.d8bp+23},
+    {0x1.a463p+35, 0x1.cffp+23},
+    {0x1.7c625cp+35, 0x1.b95p+23},
+    {0x1.d86bc4p+35, 0x1.ebdp+23},
+    {0x1.1c5af8p+35, 0x1.7d9p+23},
+    {0x1.84662p+35, 0x1.bdfp+23},
+    {0x1.f470e4p+35, 0x1.fa3p+23},
+    {0x1.6c65p+35, 0x1.affp+23},
+    {0x1.6865f8p+35, 0x1.ad9p+23},
+    {0x1.7069acp+35, 0x1.b25p+23},
+    {0x1.105fc8p+35, 0x1.757p+23},
+    {0x1.0c609cp+35, 0x1.72bp+23},
+    {0x1.3867p+35, 0x1.8ffp+23},
+    {0x1.646c94p+35, 0x1.ab3p+23},
+    {0x1.c476dcp+35, 0x1.e15p+23},
+    {0x1.c877e8p+35, 0x1.e37p+23},
+    {0x1.286778p+35, 0x1.859p+23},
+    {0x1.486bap+35, 0x1.9a1p+23},
+    {0x1.586dcp+35, 0x1.a3fp+23},
+    {0x1.00632cp+35, 0x1.6a5p+23},
+    {0x1.2c6984p+35, 0x1.883p+23},
+    {0x1.146684p+35, 0x1.783p+23},
+    {0x1.9474fp+35, 0x1.c71p+23},
+    {0x1.f07e84p+35, 0x1.f83p+23},
+    {0x1.c07a54p+35, 0x1.df3p+23},
+    {0x1.446d98p+35, 0x1.979p+23},
+    {0x1.98775cp+35, 0x1.c95p+23},
+    {0x1.a87964p+35, 0x1.d23p+23},
+    {0x1.4c6fe8p+35, 0x1.9c9p+23},
+    {0x1.b07a88p+35, 0x1.d69p+23},
+    {0x1.7473fcp+35, 0x1.b4bp+23},
+    {0x1.cc7d78p+35, 0x1.e59p+23},
+    {0x1.907794p+35, 0x1.c4dp+23},
+    {0x1.246c5p+35, 0x1.82fp+23},
+    {0x1.e4812p+35, 0x1.f21p+23},
+    {0x1.0869p+35, 0x1.6ffp+23},
+    {0x1.dc832p+35, 0x1.edfp+23},
+    {0x1.307274p+35, 0x1.8adp+23},
+    {0x1.bc825p+35, 0x1.dd1p+23},
+    {0x1.9c7ed8p+35, 0x1.cb9p+23},
+    {0x1.6078d4p+35, 0x1.a8dp+23},
+    {0x1.4075dp+35, 0x1.951p+23},
+    {0x1.807d4cp+35, 0x1.bbbp+23},
+    {0x1.8c7f48p+35, 0x1.c29p+23},
+    {0x1.d0878cp+35, 0x1.e7bp+23},
+    {0x1.507a7p+35, 0x1.9f1p+23},
+    {0x1.1874dp+35, 0x1.7afp+23},
+    {0x1.ec9024p+35, 0x1.f63p+23},
+    {0x1.20780cp+35, 0x1.805p+23},
+    {0x1.7883fp+35, 0x1.b71p+23},
+    {0x1.b88edp+35, 0x1.dafp+23},
+    {0x1.a08b64p+35, 0x1.cddp+23},
+    {0x1.0478f4p+35, 0x1.6d3p+23},
+    {0x1.888c0cp+35, 0x1.c05p+23},
+    {0x1.d49624p+35, 0x1.e9dp+23},
+    {0x1.348248p+35, 0x1.8d7p+23},
+    {0x1.3c8448p+35, 0x1.929p+23},
+    {0x1.ac94d8p+35, 0x1.d47p+23},
+    {0x1.5c8ab8p+35, 0x1.a67p+23},
+    {0x1.548b38p+35, 0x1.a19p+23},
+    {0x1.fca16cp+35, 0x1.fe5p+23},
+    {0x1.e09fp+35, 0x1.f01p+23},
+    {0x1.f8a6ccp+35, 0x1.fc5p+23},
+    {0x1.b49fd4p+35, 0x1.d8dp+23},
+    {0x1.a49dp+35, 0x1.d01p+23},
+    {0x1.e8a5c4p+35, 0x1.f43p+23},
+    {0x1.1c8aacp+35, 0x1.7dbp+23},
+    {0x1.7c9988p+35, 0x1.b97p+23},
+    {0x1.849dep+35, 0x1.be1p+23},
+    {0x1.d8a94p+35, 0x1.ebfp+23},
+    {0x1.6c9bp+35, 0x1.b01p+23},
+    {0x1.689bacp+35, 0x1.adbp+23},
+    {0x1.108e78p+35, 0x1.759p+23},
+    {0x1.f4b02cp+35, 0x1.fa5p+23},
+    {0x1.0c8ef4p+35, 0x1.72dp+23},
+    {0x1.709ff8p+35, 0x1.b27p+23},
+    {0x1.3899p+35, 0x1.901p+23},
+    {0x1.28982cp+35, 0x1.85bp+23},
+    {0x1.009078p+35, 0x1.6a7p+23},
+    {0x1.64a1fcp+35, 0x1.ab5p+23},
+    {0x1.489ee4p+35, 0x1.9a3p+23},
+    {0x1.2c9a8cp+35, 0x1.885p+23},
+    {0x1.14958cp+35, 0x1.785p+23},
+    {0x1.58a24p+35, 0x1.a41p+23},
+    {0x1.c4b308p+35, 0x1.e17p+23},
+    {0x1.c8b458p+35, 0x1.e39p+23},
+    {0x1.44a08cp+35, 0x1.97bp+23},
+    {0x1.94add4p+35, 0x1.c73p+23},
+    {0x1.4ca37cp+35, 0x1.9cbp+23},
+    {0x1.249cbp+35, 0x1.831p+23},
+    {0x1.0897p+35, 0x1.701p+23},
+    {0x1.c0b63cp+35, 0x1.df5p+23},
+    {0x1.74aa94p+35, 0x1.b4dp+23},
+    {0x1.98b088p+35, 0x1.c97p+23},
+    {0x1.f0bd8cp+35, 0x1.f85p+23},
+    {0x1.a8b3acp+35, 0x1.d25p+23},
+    {0x1.b0b55cp+35, 0x1.d6bp+23},
+    {0x1.90b03p+35, 0x1.c4fp+23},
+    {0x1.ccba2cp+35, 0x1.e5bp+23},
+    {0x1.e4bf64p+35, 0x1.f23p+23},
+    {0x1.30a3dp+35, 0x1.8afp+23},
+    {0x1.dcc0ep+35, 0x1.ee1p+23},
+    {0x1.40a874p+35, 0x1.953p+23},
+    {0x1.60adfp+35, 0x1.a8fp+23},
+    {0x1.9cb84cp+35, 0x1.cbbp+23},
+    {0x1.bcbdf4p+35, 0x1.dd3p+23},
+    {0x1.80b4c4p+35, 0x1.bbdp+23},
+    {0x1.8cb79cp+35, 0x1.c2bp+23},
+    {0x1.50ae54p+35, 0x1.9f3p+23},
+    {0x1.18a43p+35, 0x1.7b1p+23},
+    {0x1.d0c484p+35, 0x1.e7dp+23},
+    {0x1.20a818p+35, 0x1.807p+23},
+    {0x1.78bad4p+35, 0x1.b73p+23},
+    {0x1.ecceecp+35, 0x1.f65p+23},
+    {0x1.04a69cp+35, 0x1.6d5p+23},
+    {0x1.a0c52p+35, 0x1.cdfp+23},
+    {0x1.b8ca3p+35, 0x1.db1p+23},
+    {0x1.34b3f8p+35, 0x1.8d9p+23},
+    {0x1.88c418p+35, 0x1.c07p+23},
+    {0x1.3cb69cp+35, 0x1.92bp+23},
+    {0x1.d4d36p+35, 0x1.e9fp+23},
+    {0x1.5cbf88p+35, 0x1.a69p+23},
+    {0x1.accf68p+35, 0x1.d49p+23},
+    {0x1.54bf6cp+35, 0x1.a1bp+23},
+    {0x1.e0dd04p+35, 0x1.f03p+23},
+    {0x1.fce138p+35, 0x1.fe7p+23},
+    {0x1.1cba64p+35, 0x1.7ddp+23},
+    {0x1.b4dafp+35, 0x1.d8fp+23},
+    {0x1.a4d704p+35, 0x1.d03p+23},
+    {0x1.7cd0b8p+35, 0x1.b99p+23},
+    {0x1.f8e658p+35, 0x1.fc7p+23},
+    {0x1.e8e44cp+35, 0x1.f45p+23},
+    {0x1.10bd2cp+35, 0x1.75bp+23},
+    {0x1.84d5a4p+35, 0x1.be3p+23},
+    {0x1.6cd104p+35, 0x1.b03p+23},
+    {0x1.0cbd5p+35, 0x1.72fp+23},
+    {0x1.68d164p+35, 0x1.addp+23},
+    {0x1.d8e6cp+35, 0x1.ec1p+23},
+    {0x1.38cb04p+35, 0x1.903p+23},
+    {0x1.70d648p+35, 0x1.b29p+23},
+    {0x1.00bdc8p+35, 0x1.6a9p+23},
+    {0x1.f4ef78p+35, 0x1.fa7p+23},
+    {0x1.28c8e4p+35, 0x1.85dp+23},
+    {0x1.14c498p+35, 0x1.787p+23},
+    {0x1.2ccb98p+35, 0x1.887p+23},
+    {0x1.48d22cp+35, 0x1.9a5p+23},
+    {0x1.64d768p+35, 0x1.ab7p+23},
+    {0x1.58d6c4p+35, 0x1.a43p+23},
+    {0x1.44d384p+35, 0x1.97dp+23},
+    {0x1.08c504p+35, 0x1.703p+23},
+    {0x1.24cd14p+35, 0x1.833p+23},
+    {0x1.4cd714p+35, 0x1.9cdp+23},
+    {0x1.c4ef38p+35, 0x1.e19p+23},
+    {0x1.c8f0ccp+35, 0x1.e3bp+23},
+    {0x1.94e6bcp+35, 0x1.c75p+23},
+    {0x1.74e13p+35, 0x1.b4fp+23},
+    {0x1.c0f228p+35, 0x1.df7p+23},
+    {0x1.98e9b8p+35, 0x1.c99p+23},
+    {0x1.90e8dp+35, 0x1.c51p+23},
+    {0x1.a8edf8p+35, 0x1.d27p+23},
+    {0x1.30d53p+35, 0x1.8b1p+23},
+    {0x1.b0f034p+35, 0x1.d6dp+23},
+    {0x1.f0fc98p+35, 0x1.f87p+23},
+    {0x1.ccf6e4p+35, 0x1.e5dp+23},
+    {0x1.40db1cp+35, 0x1.955p+23},
+    {0x1.60e31p+35, 0x1.a91p+23},
+    {0x1.e4fdacp+35, 0x1.f25p+23},
+    {0x1.18d394p+35, 0x1.7b3p+23},
+    {0x1.dcfea4p+35, 0x1.ee3p+23},
+    {0x1.9cf1c4p+35, 0x1.cbdp+23},
+    {0x1.80ec4p+35, 0x1.bbfp+23},
+    {0x1.50e23cp+35, 0x1.9f5p+23},
+    {0x1.bcf99cp+35, 0x1.dd5p+23},
+    {0x1.8ceff4p+35, 0x1.c2dp+23},
+    {0x1.20d828p+35, 0x1.809p+23},
+    {0x1.d1018p+35, 0x1.e7fp+23},
+    {0x1.04d448p+35, 0x1.6d7p+23},
+    {0x1.78f1bcp+35, 0x1.b75p+23},
+    {0x1.34e5acp+35, 0x1.8dbp+23},
+    {0x1.ed0db8p+35, 0x1.f67p+23},
+    {0x1.3ce8f4p+35, 0x1.92dp+23},
+    {0x1.a0feep+35, 0x1.ce1p+23},
+    {0x1.b90594p+35, 0x1.db3p+23},
+    {0x1.88fc28p+35, 0x1.c09p+23},
+    {0x1.5cf45cp+35, 0x1.a6bp+23},
+    {0x1.54f3a4p+35, 0x1.a1dp+23},
+    {0x1.d510ap+35, 0x1.ea1p+23},
+    {0x1.ad09fcp+35, 0x1.d4bp+23},
+    {0x1.1cea2p+35, 0x1.7dfp+23},
+    {0x1.e11b0cp+35, 0x1.f05p+23},
+    {0x1.fd2108p+35, 0x1.fe9p+23},
+    {0x1.7d07ecp+35, 0x1.b9bp+23},
+    {0x1.b5161p+35, 0x1.d91p+23},
+    {0x1.a5110cp+35, 0x1.d05p+23},
+    {0x1.10ebe4p+35, 0x1.75dp+23},
+    {0x1.0cebbp+35, 0x1.731p+23},
+    {0x1.e922d8p+35, 0x1.f47p+23},
+    {0x1.f925e8p+35, 0x1.fc9p+23},
+    {0x1.850d6cp+35, 0x1.be5p+23},
+    {0x1.6d070cp+35, 0x1.b05p+23},
+    {0x1.69072p+35, 0x1.adfp+23},
+    {0x1.00eb1cp+35, 0x1.6abp+23},
+    {0x1.38fd0cp+35, 0x1.905p+23},
+    {0x1.d92444p+35, 0x1.ec3p+23},
+    {0x1.28f9ap+35, 0x1.85fp+23},
+    {0x1.14f3a8p+35, 0x1.789p+23},
+    {0x1.710c9cp+35, 0x1.b2bp+23},
+    {0x1.2cfca8p+35, 0x1.889p+23},
+    {0x1.490578p+35, 0x1.9a7p+23},
+    {0x1.f52ec8p+35, 0x1.fa9p+23},
+    {0x1.650cd8p+35, 0x1.ab9p+23},
+    {0x1.590b4cp+35, 0x1.a45p+23},
+    {0x1.08f30cp+35, 0x1.705p+23},
+    {0x1.45068p+35, 0x1.97fp+23},
+    {0x1.24fd7cp+35, 0x1.835p+23},
+    {0x1.4d0abp+35, 0x1.9cfp+23},
+    {0x1.c52b6cp+35, 0x1.e1bp+23},
+    {0x1.951fa8p+35, 0x1.c77p+23},
+    {0x1.7517dp+35, 0x1.b51p+23},
+    {0x1.310694p+35, 0x1.8b3p+23},
+    {0x1.c92d44p+35, 0x1.e3dp+23},
+    {0x1.9922ecp+35, 0x1.c9bp+23},
+    {0x1.912174p+35, 0x1.c53p+23},
+    {0x1.c12e18p+35, 0x1.df9p+23},
+    {0x1.a92848p+35, 0x1.d29p+23},
+    {0x1.b12b1p+35, 0x1.d6fp+23},
+    {0x1.410dc8p+35, 0x1.957p+23},
+    {0x1.1902fcp+35, 0x1.7b5p+23},
+    {0x1.cd33ap+35, 0x1.e5fp+23},
+    {0x1.f13ba8p+35, 0x1.f89p+23},
+    {0x1.611834p+35, 0x1.a93p+23},
+    {0x1.511628p+35, 0x1.9f7p+23},
+    {0x1.21083cp+35, 0x1.80bp+23},
+    {0x1.e53bf8p+35, 0x1.f27p+23},
+    {0x1.8123cp+35, 0x1.bc1p+23},
+    {0x1.9d2b4p+35, 0x1.cbfp+23},
+    {0x1.dd3c6cp+35, 0x1.ee5p+23},
+    {0x1.8d285p+35, 0x1.c2fp+23},
+    {0x1.bd3548p+35, 0x1.dd7p+23},
+    {0x1.0501f8p+35, 0x1.6d9p+23},
+    {0x1.d13e8p+35, 0x1.e81p+23},
+    {0x1.7928a8p+35, 0x1.b77p+23},
+    {0x1.351764p+35, 0x1.8ddp+23},
+    {0x1.3d1b5p+35, 0x1.92fp+23},
+    {0x1.ed4c88p+35, 0x1.f69p+23},
+    {0x1.a138a4p+35, 0x1.ce3p+23},
+    {0x1.b940fcp+35, 0x1.db5p+23},
+    {0x1.89343cp+35, 0x1.c0bp+23},
+    {0x1.5d2934p+35, 0x1.a6dp+23},
+    {0x1.5527ep+35, 0x1.a1fp+23},
+    {0x1.d54de4p+35, 0x1.ea3p+23},
+    {0x1.ad4494p+35, 0x1.d4dp+23},
+    {0x1.1d19ep+35, 0x1.7e1p+23},
+    {0x1.111aap+35, 0x1.75fp+23},
+    {0x1.e15918p+35, 0x1.f07p+23},
+    {0x1.0d1a14p+35, 0x1.733p+23},
+    {0x1.7d3f24p+35, 0x1.b9dp+23},
+    {0x1.fd60dcp+35, 0x1.febp+23},
+    {0x1.a54b18p+35, 0x1.d07p+23},
+    {0x1.b55134p+35, 0x1.d93p+23},
+    {0x1.011874p+35, 0x1.6adp+23},
+    {0x1.6d3d18p+35, 0x1.b07p+23},
+    {0x1.854538p+35, 0x1.be7p+23},
+    {0x1.693cep+35, 0x1.ae1p+23},
+    {0x1.392f18p+35, 0x1.907p+23},
+    {0x1.292a6p+35, 0x1.861p+23},
+    {0x1.e96168p+35, 0x1.f49p+23},
+    {0x1.f9657cp+35, 0x1.fcbp+23},
+    {0x1.1522bcp+35, 0x1.78bp+23},
+    {0x1.2d2dbcp+35, 0x1.88bp+23},
+    {0x1.7142f4p+35, 0x1.b2dp+23},
+    {0x1.4938c8p+35, 0x1.9a9p+23},
+    {0x1.d961ccp+35, 0x1.ec5p+23},
+    {0x1.092118p+35, 0x1.707p+23},
+    {0x1.252de8p+35, 0x1.837p+23},
+    {0x1.65424cp+35, 0x1.abbp+23},
+    {0x1.593fd8p+35, 0x1.a47p+23},
+    {0x1.45398p+35, 0x1.981p+23},
+    {0x1.4d3e5p+35, 0x1.9d1p+23},
+    {0x1.f56e1cp+35, 0x1.fabp+23},
+    {0x1.3137fcp+35, 0x1.8b5p+23},
+    {0x1.754e74p+35, 0x1.b53p+23},
+    {0x1.955898p+35, 0x1.c79p+23},
+    {0x1.c567a4p+35, 0x1.e1dp+23},
+    {0x1.c969cp+35, 0x1.e3fp+23},
+    {0x1.193268p+35, 0x1.7b7p+23},
+    {0x1.414078p+35, 0x1.959p+23},
+    {0x1.915a1cp+35, 0x1.c55p+23},
+    {0x1.995c24p+35, 0x1.c9dp+23},
+    {0x1.a9629cp+35, 0x1.d2bp+23},
+    {0x1.c16a0cp+35, 0x1.dfbp+23},
+    {0x1.b165fp+35, 0x1.d71p+23},
+    {0x1.614d5cp+35, 0x1.a95p+23},
+    {0x1.213854p+35, 0x1.80dp+23},
+    {0x1.cd706p+35, 0x1.e61p+23},
+    {0x1.514a18p+35, 0x1.9f9p+23},
+    {0x1.f17abcp+35, 0x1.f8bp+23},
+    {0x1.815b44p+35, 0x1.bc3p+23},
+    {0x1.052facp+35, 0x1.6dbp+23},
+    {0x1.9d64cp+35, 0x1.cc1p+23},
+    {0x1.8d60bp+35, 0x1.c31p+23},
+    {0x1.e57a48p+35, 0x1.f29p+23},
+    {0x1.bd70f8p+35, 0x1.dd9p+23},
+    {0x1.dd7a38p+35, 0x1.ee7p+23},
+    {0x1.35492p+35, 0x1.8dfp+23},
+    {0x1.d17b84p+35, 0x1.e83p+23},
+    {0x1.795f98p+35, 0x1.b79p+23},
+    {0x1.3d4dbp+35, 0x1.931p+23},
+    {0x1.a1726cp+35, 0x1.ce5p+23},
+    {0x1.b97c68p+35, 0x1.db7p+23},
+    {0x1.896c54p+35, 0x1.c0dp+23},
+    {0x1.5d5e1p+35, 0x1.a6fp+23},
+    {0x1.ed8b5cp+35, 0x1.f6bp+23},
+    {0x1.555c2p+35, 0x1.a21p+23},
+    {0x1.1d49a4p+35, 0x1.7e3p+23},
+    {0x1.ad7f3p+35, 0x1.d4fp+23},
+    {0x1.d58b2cp+35, 0x1.ea5p+23},
+    {0x1.11496p+35, 0x1.761p+23},
+    {0x1.0d487cp+35, 0x1.735p+23},
+    {0x1.0145dp+35, 0x1.6afp+23},
+    {0x1.7d766p+35, 0x1.b9fp+23},
+    {0x1.e19728p+35, 0x1.f09p+23},
+    {0x1.a58528p+35, 0x1.d09p+23},
+    {0x1.b58c5cp+35, 0x1.d95p+23},
+    {0x1.396128p+35, 0x1.909p+23},
+    {0x1.295b24p+35, 0x1.863p+23},
+    {0x1.1551d4p+35, 0x1.78dp+23},
+    {0x1.6d7328p+35, 0x1.b09p+23},
+    {0x1.fda0b4p+35, 0x1.fedp+23},
+    {0x1.6972a4p+35, 0x1.ae3p+23},
+    {0x1.857d08p+35, 0x1.be9p+23},
+    {0x1.2d5ed4p+35, 0x1.88dp+23},
+    {0x1.094f28p+35, 0x1.709p+23},
+    {0x1.496c1cp+35, 0x1.9abp+23},
+    {0x1.e99ffcp+35, 0x1.f4bp+23},
+    {0x1.f9a514p+35, 0x1.fcdp+23},
+    {0x1.71795p+35, 0x1.b2fp+23},
+    {0x1.255e58p+35, 0x1.839p+23},
+    {0x1.597468p+35, 0x1.a49p+23},
+    {0x1.456c84p+35, 0x1.983p+23},
+    {0x1.6577c4p+35, 0x1.abdp+23},
+    {0x1.d99f58p+35, 0x1.ec7p+23},
+    {0x1.4d71f4p+35, 0x1.9d3p+23},
+    {0x1.316968p+35, 0x1.8b7p+23},
+    {0x1.f5ad74p+35, 0x1.fadp+23},
+    {0x1.1961d8p+35, 0x1.7b9p+23},
+    {0x1.75851cp+35, 0x1.b55p+23},
+    {0x1.95918cp+35, 0x1.c7bp+23},
+    {0x1.41732cp+35, 0x1.95bp+23},
+    {0x1.c5a3ep+35, 0x1.e1fp+23},
+    {0x1.9192c8p+35, 0x1.c57p+23},
+    {0x1.c9a64p+35, 0x1.e41p+23},
+    {0x1.21687p+35, 0x1.80fp+23},
+    {0x1.99956p+35, 0x1.c9fp+23},
+    {0x1.618288p+35, 0x1.a97p+23},
+    {0x1.a99cf4p+35, 0x1.d2dp+23},
+    {0x1.c1a604p+35, 0x1.dfdp+23},
+    {0x1.b1a0d4p+35, 0x1.d73p+23},
+    {0x1.517e0cp+35, 0x1.9fbp+23},
+    {0x1.055d64p+35, 0x1.6ddp+23},
+    {0x1.cdad24p+35, 0x1.e63p+23},
+    {0x1.8192ccp+35, 0x1.bc5p+23},
+    {0x1.f1b9d4p+35, 0x1.f8dp+23},
+    {0x1.8d9914p+35, 0x1.c33p+23},
+    {0x1.9d9e44p+35, 0x1.cc3p+23},
+    {0x1.bdacacp+35, 0x1.ddbp+23},
+    {0x1.e5b89cp+35, 0x1.f2bp+23},
+    {0x1.357aep+35, 0x1.8e1p+23},
+    {0x1.ddb808p+35, 0x1.ee9p+23},
+    {0x1.3d8014p+35, 0x1.933p+23},
+    {0x1.79968cp+35, 0x1.b7bp+23},
+    {0x1.d1b88cp+35, 0x1.e85p+23},
+    {0x1.5d92fp+35, 0x1.a71p+23},
+    {0x1.a1ac38p+35, 0x1.ce7p+23},
+    {0x1.559064p+35, 0x1.a23p+23},
+    {0x1.89a47p+35, 0x1.c0fp+23},
+    {0x1.b9b7d8p+35, 0x1.db9p+23},
+    {0x1.1d796cp+35, 0x1.7e5p+23},
+    {0x1.edca34p+35, 0x1.f6dp+23},
+    {0x1.117824p+35, 0x1.763p+23},
+    {0x1.adb9dp+35, 0x1.d51p+23},
+    {0x1.0d76e8p+35, 0x1.737p+23},
+    {0x1.d5c878p+35, 0x1.ea7p+23},
+    {0x1.01733p+35, 0x1.6b1p+23},
+    {0x1.1580fp+35, 0x1.78fp+23},
+    {0x1.7dadap+35, 0x1.ba1p+23},
+    {0x1.298becp+35, 0x1.865p+23},
+    {0x1.39933cp+35, 0x1.90bp+23},
+    {0x1.2d8ffp+35, 0x1.88fp+23},
+    {0x1.e1d53cp+35, 0x1.f0bp+23},
+    {0x1.a5bf3cp+35, 0x1.d0bp+23},
+    {0x1.b5c788p+35, 0x1.d97p+23},
+    {0x1.6da93cp+35, 0x1.b0bp+23},
+    {0x1.69a86cp+35, 0x1.ae5p+23},
+    {0x1.097d3cp+35, 0x1.70bp+23},
+    {0x1.85b4dcp+35, 0x1.bebp+23},
+    {0x1.499f74p+35, 0x1.9adp+23},
+    {0x1.fde09p+35, 0x1.fefp+23},
+    {0x1.258eccp+35, 0x1.83bp+23},
+    {0x1.71afbp+35, 0x1.b31p+23},
+    {0x1.459f8cp+35, 0x1.985p+23},
+    {0x1.59a8fcp+35, 0x1.a4bp+23},
+    {0x1.e9de94p+35, 0x1.f4dp+23},
+    {0x1.f9e4bp+35, 0x1.fcfp+23},
+    {0x1.65ad4p+35, 0x1.abfp+23},
+    {0x1.4da59cp+35, 0x1.9d5p+23},
+    {0x1.319ad8p+35, 0x1.8b9p+23},
+    {0x1.d9dce8p+35, 0x1.ec9p+23},
+    {0x1.19914cp+35, 0x1.7bbp+23},
+    {0x1.41a5e4p+35, 0x1.95dp+23},
+    {0x1.75bbc8p+35, 0x1.b57p+23},
+    {0x1.f5ecdp+35, 0x1.fafp+23},
+    {0x1.21989p+35, 0x1.811p+23},
+    {0x1.95ca84p+35, 0x1.c7dp+23},
+    {0x1.61b7b8p+35, 0x1.a99p+23},
+    {0x1.c5e02p+35, 0x1.e21p+23},
+    {0x1.91cb78p+35, 0x1.c59p+23},
+    {0x1.058b2p+35, 0x1.6dfp+23},
+    {0x1.99ceap+35, 0x1.ca1p+23},
+    {0x1.51b204p+35, 0x1.9fdp+23},
+    {0x1.c9e2c4p+35, 0x1.e43p+23},
+    {0x1.a9d75p+35, 0x1.d2fp+23},
+    {0x1.b1dbbcp+35, 0x1.d75p+23},
+    {0x1.c1e2p+35, 0x1.dffp+23},
+    {0x1.cde9ecp+35, 0x1.e65p+23},
+    {0x1.81ca58p+35, 0x1.bc7p+23},
+    {0x1.8dd17cp+35, 0x1.c35p+23},
+    {0x1.35aca4p+35, 0x1.8e3p+23},
+    {0x1.9dd7ccp+35, 0x1.cc5p+23},
+    {0x1.f1f8fp+35, 0x1.f8fp+23},
+    {0x1.bde864p+35, 0x1.dddp+23},
+    {0x1.3db27cp+35, 0x1.935p+23},
+    {0x1.e5f6f4p+35, 0x1.f2dp+23},
+    {0x1.ddf5dcp+35, 0x1.eebp+23},
+    {0x1.79cd84p+35, 0x1.b7dp+23},
+    {0x1.d1f598p+35, 0x1.e87p+23},
+    {0x1.5dc7d4p+35, 0x1.a73p+23},
+    {0x1.55c4acp+35, 0x1.a25p+23},
+    {0x1.1da938p+35, 0x1.7e7p+23},
+    {0x1.a1e608p+35, 0x1.ce9p+23},
+    {0x1.89dc9p+35, 0x1.c11p+23},
+    {0x1.b9f34cp+35, 0x1.dbbp+23},
+    {0x1.11a6ecp+35, 0x1.765p+23},
+    {0x1.ee091p+35, 0x1.f6fp+23},
+    {0x1.0da558p+35, 0x1.739p+23},
+    {0x1.01a094p+35, 0x1.6b3p+23},
+    {0x1.adf474p+35, 0x1.d53p+23},
+    {0x1.d605c8p+35, 0x1.ea9p+23},
+    {0x1.15b01p+35, 0x1.791p+23},
+    {0x1.29bcb8p+35, 0x1.867p+23},
+    {0x1.39c554p+35, 0x1.90dp+23},
+    {0x1.2dc11p+35, 0x1.891p+23},
+    {0x1.7de4e4p+35, 0x1.ba3p+23},
+    {0x1.09ab54p+35, 0x1.70dp+23},
+    {0x1.69de38p+35, 0x1.ae7p+23},
+    {0x1.6ddf54p+35, 0x1.b0dp+23},
+    {0x1.a5f954p+35, 0x1.d0dp+23},
+    {0x1.b602b8p+35, 0x1.d99p+23},
+    {0x1.49d2dp+35, 0x1.9afp+23},
+    {0x1.85ecb4p+35, 0x1.bedp+23},
+    {0x1.25bf44p+35, 0x1.83dp+23},
+    {0x1.e21354p+35, 0x1.f0dp+23},
+    {0x1.45d298p+35, 0x1.987p+23},
+    {0x1.71e614p+35, 0x1.b33p+23},
+    {0x1.59dd94p+35, 0x1.a4dp+23},
+    {0x1.fe207p+35, 0x1.ff1p+23},
+    {0x1.4dd948p+35, 0x1.9d7p+23},
+    {0x1.65e2cp+35, 0x1.ac1p+23},
+    {0x1.31cc4cp+35, 0x1.8bbp+23},
+    {0x1.ea1d3p+35, 0x1.f4fp+23},
+    {0x1.19c0c4p+35, 0x1.7bdp+23},
+    {0x1.fa245p+35, 0x1.fd1p+23},
+    {0x1.da1a7cp+35, 0x1.ecbp+23},
+    {0x1.41d8ap+35, 0x1.95fp+23},
+    {0x1.21c8b4p+35, 0x1.813p+23},
+    {0x1.75f278p+35, 0x1.b59p+23},
+    {0x1.05b8ep+35, 0x1.6e1p+23},
+    {0x1.96038p+35, 0x1.c7fp+23},
+    {0x1.61ececp+35, 0x1.a9bp+23},
+    {0x1.f62c3p+35, 0x1.fb1p+23},
+    {0x1.51e6p+35, 0x1.9ffp+23},
+    {0x1.92042cp+35, 0x1.c5bp+23},
+    {0x1.c61c64p+35, 0x1.e23p+23},
+    {0x1.9a07e4p+35, 0x1.ca3p+23},
+    {0x1.aa11bp+35, 0x1.d31p+23},
+    {0x1.ca1f4cp+35, 0x1.e45p+23},
+    {0x1.b216a8p+35, 0x1.d77p+23},
+    {0x1.c21ep+35, 0x1.e01p+23},
+    {0x1.35de6cp+35, 0x1.8e5p+23},
+    {0x1.8201e8p+35, 0x1.bc9p+23},
+    {0x1.ce26b8p+35, 0x1.e67p+23},
+    {0x1.8e09e8p+35, 0x1.c37p+23},
+    {0x1.3de4e8p+35, 0x1.937p+23},
+    {0x1.9e1158p+35, 0x1.cc7p+23},
+    {0x1.f2381p+35, 0x1.f91p+23},
+    {0x1.be242p+35, 0x1.ddfp+23},
+    {0x1.7a048p+35, 0x1.b7fp+23},
+    {0x1.e6355p+35, 0x1.f2fp+23},
+    {0x1.de33b4p+35, 0x1.eedp+23},
+    {0x1.1dd908p+35, 0x1.7e9p+23},
+    {0x1.d232a8p+35, 0x1.e89p+23},
+    {0x1.55f8f8p+35, 0x1.a27p+23},
+    {0x1.5dfcbcp+35, 0x1.a75p+23},
+    {0x1.11d5b8p+35, 0x1.767p+23},
+    {0x1.8a14b4p+35, 0x1.c13p+23},
+    {0x1.a21fdcp+35, 0x1.cebp+23},
+    {0x1.0dd3ccp+35, 0x1.73bp+23},
+    {0x1.ba2ec4p+35, 0x1.dbdp+23},
+    {0x1.01cdfcp+35, 0x1.6b5p+23},
+    {0x1.ee47fp+35, 0x1.f71p+23},
+    {0x1.15df34p+35, 0x1.793p+23},
+    {0x1.ae2f1cp+35, 0x1.d55p+23},
+    {0x1.29ed88p+35, 0x1.869p+23},
+    {0x1.39f77p+35, 0x1.90fp+23},
+    {0x1.d6431cp+35, 0x1.eabp+23},
+    {0x1.09d97p+35, 0x1.70fp+23},
+    {0x1.2df234p+35, 0x1.893p+23},
+    {0x1.7e1c2cp+35, 0x1.ba5p+23},
+    {0x1.25efcp+35, 0x1.83fp+23},
+    {0x1.6a1408p+35, 0x1.ae9p+23},
+    {0x1.4a063p+35, 0x1.9b1p+23},
+    {0x1.6e157p+35, 0x1.b0fp+23},
+    {0x1.a6337p+35, 0x1.d0fp+23},
+    {0x1.86249p+35, 0x1.befp+23},
+    {0x1.b63decp+35, 0x1.d9bp+23},
+    {0x1.4605a8p+35, 0x1.989p+23},
+    {0x1.e2517p+35, 0x1.f0fp+23},
+    {0x1.5a123p+35, 0x1.a4fp+23},
+    {0x1.721c7cp+35, 0x1.b35p+23},
+    {0x1.31fdc4p+35, 0x1.8bdp+23},
+    {0x1.4e0cf8p+35, 0x1.9d9p+23},
+    {0x1.661844p+35, 0x1.ac3p+23},
+    {0x1.19f04p+35, 0x1.7bfp+23},
+    {0x1.fe6054p+35, 0x1.ff3p+23},
+    {0x1.ea5bdp+35, 0x1.f51p+23},
+    {0x1.21f8dcp+35, 0x1.815p+23},
+    {0x1.fa63f4p+35, 0x1.fd3p+23},
+    {0x1.420b6p+35, 0x1.961p+23},
+    {0x1.05e6a4p+35, 0x1.6e3p+23},
+    {0x1.da5814p+35, 0x1.ecdp+23},
+    {0x1.76292cp+35, 0x1.b5bp+23},
+    {0x1.622224p+35, 0x1.a9dp+23},
+    {0x1.963c8p+35, 0x1.c81p+23},
+    {0x1.521ap+35, 0x1.a01p+23},
+    {0x1.923ce4p+35, 0x1.c5dp+23},
+    {0x1.f66b94p+35, 0x1.fb3p+23},
+    {0x1.9a412cp+35, 0x1.ca5p+23},
+    {0x1.c658acp+35, 0x1.e25p+23},
+    {0x1.aa4c14p+35, 0x1.d33p+23},
+    {0x1.361038p+35, 0x1.8e7p+23},
+    {0x1.ca5bd8p+35, 0x1.e47p+23},
+    {0x1.b25198p+35, 0x1.d79p+23},
+    {0x1.c25a04p+35, 0x1.e03p+23},
+    {0x1.82397cp+35, 0x1.bcbp+23},
+    {0x1.3e1758p+35, 0x1.939p+23},
+    {0x1.8e4258p+35, 0x1.c39p+23},
+    {0x1.ce6388p+35, 0x1.e69p+23},
+    {0x1.9e4ae8p+35, 0x1.cc9p+23},
+    {0x1.7a3b8p+35, 0x1.b81p+23},
+    {0x1.be5fep+35, 0x1.de1p+23},
+    {0x1.f27734p+35, 0x1.f93p+23},
+    {0x1.1e08dcp+35, 0x1.7ebp+23},
+    {0x1.de719p+35, 0x1.eefp+23},
+    {0x1.e673bp+35, 0x1.f31p+23},
+    {0x1.120488p+35, 0x1.769p+23},
+    {0x1.562d48p+35, 0x1.a29p+23},
+    {0x1.5e31a8p+35, 0x1.a77p+23},
+    {0x1.d26fbcp+35, 0x1.e8bp+23},
+    {0x1.0e0244p+35, 0x1.73dp+23},
+    {0x1.01fb68p+35, 0x1.6b7p+23},
+    {0x1.8a4cdcp+35, 0x1.c15p+23},
+    {0x1.a259b4p+35, 0x1.cedp+23},
+    {0x1.ba6a4p+35, 0x1.dbfp+23},
+    {0x1.160e5cp+35, 0x1.795p+23},
+    {0x1.2a1e5cp+35, 0x1.86bp+23},
+    {0x1.ee86d4p+35, 0x1.f73p+23},
+    {0x1.0a079p+35, 0x1.711p+23},
+    {0x1.3a299p+35, 0x1.911p+23},
+    {0x1.2e235cp+35, 0x1.895p+23},
+    {0x1.ae69c8p+35, 0x1.d57p+23},
+    {0x1.d68074p+35, 0x1.eadp+23},
+    {0x1.26204p+35, 0x1.841p+23},
+    {0x1.7e5378p+35, 0x1.ba7p+23},
+    {0x1.4a3994p+35, 0x1.9b3p+23},
+    {0x1.6a49dcp+35, 0x1.aebp+23},
+    {0x1.6e4b9p+35, 0x1.b11p+23},
+    {0x1.865c7p+35, 0x1.bf1p+23},
+    {0x1.4638bcp+35, 0x1.98bp+23},
+    {0x1.a66d9p+35, 0x1.d11p+23},
+    {0x1.b67924p+35, 0x1.d9dp+23},
+    {0x1.322f4p+35, 0x1.8bfp+23},
+    {0x1.5a46dp+35, 0x1.a51p+23},
+    {0x1.1a1fcp+35, 0x1.7c1p+23},
+    {0x1.4e40acp+35, 0x1.9dbp+23},
+    {0x1.7252e8p+35, 0x1.b37p+23},
+    {0x1.e28f9p+35, 0x1.f11p+23},
+    {0x1.664dccp+35, 0x1.ac5p+23},
+    {0x1.222908p+35, 0x1.817p+23},
+    {0x1.06146cp+35, 0x1.6e5p+23},
+    {0x1.423e24p+35, 0x1.963p+23},
+    {0x1.fea03cp+35, 0x1.ff5p+23},
+    {0x1.ea9a74p+35, 0x1.f53p+23},
+    {0x1.faa39cp+35, 0x1.fd5p+23},
+    {0x1.765fe4p+35, 0x1.b5dp+23},
+    {0x1.da95bp+35, 0x1.ecfp+23},
+    {0x1.524e04p+35, 0x1.a03p+23},
+    {0x1.62576p+35, 0x1.a9fp+23},
+    {0x1.967584p+35, 0x1.c83p+23},
+    {0x1.9275ap+35, 0x1.c5fp+23},
+    {0x1.364208p+35, 0x1.8e9p+23},
+    {0x1.9a7a78p+35, 0x1.ca7p+23},
+    {0x1.c694f8p+35, 0x1.e27p+23},
+    {0x1.f6aafcp+35, 0x1.fb5p+23},
+    {0x1.aa867cp+35, 0x1.d35p+23},
+    {0x1.3e49ccp+35, 0x1.93bp+23},
+    {0x1.b28c8cp+35, 0x1.d7bp+23},
+    {0x1.ca9868p+35, 0x1.e49p+23},
+    {0x1.c2960cp+35, 0x1.e05p+23},
+    {0x1.827114p+35, 0x1.bcdp+23},
+    {0x1.8e7accp+35, 0x1.c3bp+23},
+    {0x1.9e847cp+35, 0x1.ccbp+23},
+    {0x1.cea05cp+35, 0x1.e6bp+23},
+    {0x1.1e38b4p+35, 0x1.7edp+23},
+    {0x1.7a7284p+35, 0x1.b83p+23},
+    {0x1.be9ba4p+35, 0x1.de3p+23},
+    {0x1.12335cp+35, 0x1.76bp+23},
+    {0x1.f2b65cp+35, 0x1.f95p+23},
+    {0x1.0e30cp+35, 0x1.73fp+23},
+    {0x1.56619cp+35, 0x1.a2bp+23},
+    {0x1.5e6698p+35, 0x1.a79p+23},
+    {0x1.deaf7p+35, 0x1.ef1p+23},
+    {0x1.0228d8p+35, 0x1.6b9p+23},
+    {0x1.e6b214p+35, 0x1.f33p+23},
+    {0x1.d2acd4p+35, 0x1.e8dp+23},
+    {0x1.8a8508p+35, 0x1.c17p+23},
+    {0x1.163d88p+35, 0x1.797p+23},
+    {0x1.a2939p+35, 0x1.cefp+23},
+    {0x1.2a4f34p+35, 0x1.86dp+23},
+    {0x1.baa5cp+35, 0x1.dc1p+23},
+    {0x1.0a35b4p+35, 0x1.713p+23},
+    {0x1.3a5bb4p+35, 0x1.913p+23},
+    {0x1.2e5488p+35, 0x1.897p+23},
+    {0x1.aea478p+35, 0x1.d59p+23},
+    {0x1.eec5bcp+35, 0x1.f75p+23},
+    {0x1.2650c4p+35, 0x1.843p+23},
+    {0x1.4a6cfcp+35, 0x1.9b5p+23},
+    {0x1.7e8ac8p+35, 0x1.ba9p+23},
+    {0x1.d6bddp+35, 0x1.eafp+23},
+    {0x1.6a7fb4p+35, 0x1.aedp+23},
+    {0x1.6e81b4p+35, 0x1.b13p+23},
+    {0x1.466bd4p+35, 0x1.98dp+23},
+    {0x1.3260cp+35, 0x1.8c1p+23},
+    {0x1.1a4f44p+35, 0x1.7c3p+23},
+    {0x1.869454p+35, 0x1.bf3p+23},
+    {0x1.5a7b74p+35, 0x1.a53p+23},
+    {0x1.a6a7b4p+35, 0x1.d13p+23},
+    {0x1.4e7464p+35, 0x1.9ddp+23},
+    {0x1.b6b46p+35, 0x1.d9fp+23},
+    {0x1.728958p+35, 0x1.b39p+23},
+    {0x1.668358p+35, 0x1.ac7p+23},
+    {0x1.225938p+35, 0x1.819p+23},
+    {0x1.064238p+35, 0x1.6e7p+23},
+    {0x1.e2cdb4p+35, 0x1.f13p+23},
+    {0x1.4270ecp+35, 0x1.965p+23},
+    {0x1.fee028p+35, 0x1.ff7p+23},
+    {0x1.7696ap+35, 0x1.b5fp+23},
+    {0x1.ead91cp+35, 0x1.f55p+23},
+    {0x1.52820cp+35, 0x1.a05p+23},
+    {0x1.628cap+35, 0x1.aa1p+23},
+    {0x1.fae348p+35, 0x1.fd7p+23},
+    {0x1.dad35p+35, 0x1.ed1p+23},
+    {0x1.96ae8cp+35, 0x1.c85p+23},
+    {0x1.3673dcp+35, 0x1.8ebp+23},
+    {0x1.92ae6p+35, 0x1.c61p+23},
+    {0x1.3e7c44p+35, 0x1.93dp+23},
+    {0x1.9ab3c8p+35, 0x1.ca9p+23},
+    {0x1.aac0e8p+35, 0x1.d37p+23},
+    {0x1.c6d148p+35, 0x1.e29p+23},
+    {0x1.b2c784p+35, 0x1.d7dp+23},
+    {0x1.f6ea68p+35, 0x1.fb7p+23},
+    {0x1.82a8bp+35, 0x1.bcfp+23},
+    {0x1.cad4fcp+35, 0x1.e4bp+23},
+    {0x1.c2d218p+35, 0x1.e07p+23},
+    {0x1.1e689p+35, 0x1.7efp+23},
+    {0x1.8eb344p+35, 0x1.c3dp+23},
+    {0x1.126234p+35, 0x1.76dp+23},
+    {0x1.9ebe14p+35, 0x1.ccdp+23},
+    {0x1.cedd34p+35, 0x1.e6dp+23},
+    {0x1.7aa98cp+35, 0x1.b85p+23},
+    {0x1.0e5f4p+35, 0x1.741p+23},
+    {0x1.02564cp+35, 0x1.6bbp+23},
+    {0x1.5695f4p+35, 0x1.a2dp+23},
+    {0x1.bed76cp+35, 0x1.de5p+23},
+    {0x1.5e9b8cp+35, 0x1.a7bp+23},
+    {0x1.f2f588p+35, 0x1.f97p+23},
+    {0x1.deed54p+35, 0x1.ef3p+23},
+    {0x1.e6f07cp+35, 0x1.f35p+23},
+    {0x1.166cb8p+35, 0x1.799p+23},
+    {0x1.d2e9fp+35, 0x1.e8fp+23},
+    {0x1.8abd38p+35, 0x1.c19p+23},
+    {0x1.2a801p+35, 0x1.86fp+23},
+    {0x1.0a63dcp+35, 0x1.715p+23},
+    {0x1.a2cd7p+35, 0x1.cf1p+23},
+    {0x1.2e85b8p+35, 0x1.899p+23},
+    {0x1.3a8ddcp+35, 0x1.915p+23},
+    {0x1.bae144p+35, 0x1.dc3p+23},
+    {0x1.26814cp+35, 0x1.845p+23},
+    {0x1.4aa068p+35, 0x1.9b7p+23},
+    {0x1.aedf2cp+35, 0x1.d5bp+23},
+    {0x1.ef04a8p+35, 0x1.f77p+23},
+    {0x1.7ec21cp+35, 0x1.babp+23},
+    {0x1.6ab59p+35, 0x1.aefp+23},
+    {0x1.329244p+35, 0x1.8c3p+23},
+    {0x1.6eb7dcp+35, 0x1.b15p+23},
+    {0x1.469efp+35, 0x1.98fp+23},
+    {0x1.1a7eccp+35, 0x1.7c5p+23},
+    {0x1.d6fb3p+35, 0x1.eb1p+23},
+    {0x1.5ab01cp+35, 0x1.a55p+23},
+    {0x1.4ea82p+35, 0x1.9dfp+23},
+    {0x1.86cc3cp+35, 0x1.bf5p+23},
+    {0x1.a6e1dcp+35, 0x1.d15p+23},
+    {0x1.22896cp+35, 0x1.81bp+23},
+    {0x1.067008p+35, 0x1.6e9p+23},
+    {0x1.b6efap+35, 0x1.da1p+23},
+    {0x1.72bfccp+35, 0x1.b3bp+23},
+    {0x1.66b8e8p+35, 0x1.ac9p+23},
+    {0x1.42a3b8p+35, 0x1.967p+23},
+    {0x1.e30bdcp+35, 0x1.f15p+23},
+    {0x1.52b618p+35, 0x1.a07p+23},
+    {0x1.76cd6p+35, 0x1.b61p+23},
+    {0x1.62c1e4p+35, 0x1.aa3p+23},
+    {0x1.ff2018p+35, 0x1.ff9p+23},
+    {0x1.eb17c8p+35, 0x1.f57p+23},
+    {0x1.36a5b4p+35, 0x1.8edp+23},
+    {0x1.96e798p+35, 0x1.c87p+23},
+    {0x1.fb22f8p+35, 0x1.fd9p+23},
+    {0x1.db10f4p+35, 0x1.ed3p+23},
+    {0x1.3eaecp+35, 0x1.93fp+23},
+    {0x1.92e724p+35, 0x1.c63p+23},
+    {0x1.9aed1cp+35, 0x1.cabp+23},
+    {0x1.aafb58p+35, 0x1.d39p+23},
+    {0x1.1e987p+35, 0x1.7f1p+23},
+    {0x1.c70d9cp+35, 0x1.e2bp+23},
+    {0x1.82e05p+35, 0x1.bd1p+23},
+    {0x1.b3028p+35, 0x1.d7fp+23},
+    {0x1.12911p+35, 0x1.76fp+23},
+    {0x1.cb1194p+35, 0x1.e4dp+23},
+    {0x1.f729d8p+35, 0x1.fb9p+23},
+    {0x1.c30e28p+35, 0x1.e09p+23},
+    {0x1.8eebcp+35, 0x1.c3fp+23},
+    {0x1.0e8dc4p+35, 0x1.743p+23},
+    {0x1.0283c4p+35, 0x1.6bdp+23},
+    {0x1.7ae098p+35, 0x1.b87p+23},
+    {0x1.9ef7bp+35, 0x1.ccfp+23},
+    {0x1.cf1a1p+35, 0x1.e6fp+23},
+    {0x1.56ca5p+35, 0x1.a2fp+23},
+    {0x1.5ed084p+35, 0x1.a7dp+23},
+    {0x1.bf1338p+35, 0x1.de7p+23},
+    {0x1.169becp+35, 0x1.79bp+23},
+    {0x1.0a9208p+35, 0x1.717p+23},
+    {0x1.2ab0fp+35, 0x1.871p+23},
+    {0x1.f334b8p+35, 0x1.f99p+23},
+    {0x1.df2b3cp+35, 0x1.ef5p+23},
+    {0x1.e72ee8p+35, 0x1.f37p+23},
+    {0x1.8af56cp+35, 0x1.c1bp+23},
+    {0x1.2eb6ecp+35, 0x1.89bp+23},
+    {0x1.d3271p+35, 0x1.e91p+23},
+    {0x1.3ac008p+35, 0x1.917p+23},
+    {0x1.a30754p+35, 0x1.cf3p+23},
+    {0x1.26b1d8p+35, 0x1.847p+23},
+    {0x1.bb1cccp+35, 0x1.dc5p+23},
+    {0x1.4ad3d8p+35, 0x1.9b9p+23},
+    {0x1.af19e4p+35, 0x1.d5dp+23},
+    {0x1.1aae58p+35, 0x1.7c7p+23},
+    {0x1.32c3ccp+35, 0x1.8c5p+23},
+    {0x1.6aeb7p+35, 0x1.af1p+23},
+    {0x1.46d21p+35, 0x1.991p+23},
+    {0x1.7ef974p+35, 0x1.badp+23},
+    {0x1.ef4398p+35, 0x1.f79p+23},
+    {0x1.6eee08p+35, 0x1.b17p+23},
+    {0x1.5ae4c8p+35, 0x1.a57p+23},
+    {0x1.4edbep+35, 0x1.9e1p+23},
+    {0x1.d73894p+35, 0x1.eb3p+23},
+    {0x1.069ddcp+35, 0x1.6ebp+23},
+    {0x1.22b9a4p+35, 0x1.81dp+23},
+    {0x1.870428p+35, 0x1.bf7p+23},
+    {0x1.72f644p+35, 0x1.b3dp+23},
+    {0x1.a71c08p+35, 0x1.d17p+23},
+    {0x1.66ee7cp+35, 0x1.acbp+23},
+    {0x1.b72ae4p+35, 0x1.da3p+23},
+    {0x1.42d688p+35, 0x1.969p+23},
+    {0x1.e34a08p+35, 0x1.f17p+23},
+    {0x1.52ea28p+35, 0x1.a09p+23},
+    {0x1.62f72cp+35, 0x1.aa5p+23},
+    {0x1.770424p+35, 0x1.b63p+23},
+    {0x1.36d79p+35, 0x1.8efp+23},
+    {0x1.ff600cp+35, 0x1.ffbp+23},
+    {0x1.3ee14p+35, 0x1.941p+23},
+    {0x1.eb5678p+35, 0x1.f59p+23},
+    {0x1.9720a8p+35, 0x1.c89p+23},
+    {0x1.931fecp+35, 0x1.c65p+23},
+    {0x1.db4e9cp+35, 0x1.ed5p+23},
+    {0x1.fb62acp+35, 0x1.fdbp+23},
+    {0x1.1ec854p+35, 0x1.7f3p+23},
+    {0x1.12bffp+35, 0x1.771p+23},
+    {0x1.9b2674p+35, 0x1.cadp+23},
+    {0x1.0ebc4cp+35, 0x1.745p+23},
+    {0x1.8317f4p+35, 0x1.bd3p+23},
+    {0x1.ab35ccp+35, 0x1.d3bp+23},
+    {0x1.b33d8p+35, 0x1.d81p+23},
+    {0x1.02b14p+35, 0x1.6bfp+23},
+    {0x1.c749f4p+35, 0x1.e2dp+23},
+    {0x1.8f244p+35, 0x1.c41p+23},
+    {0x1.c34a3cp+35, 0x1.e0bp+23},
+    {0x1.cb4e3p+35, 0x1.e4fp+23},
+    {0x1.f7694cp+35, 0x1.fbbp+23},
+    {0x1.7b17a8p+35, 0x1.b89p+23},
+    {0x1.56febp+35, 0x1.a31p+23},
+    {0x1.9f315p+35, 0x1.cd1p+23},
+    {0x1.5f058p+35, 0x1.a7fp+23},
+    {0x1.16cb24p+35, 0x1.79dp+23},
+    {0x1.cf56fp+35, 0x1.e71p+23},
+    {0x1.0ac038p+35, 0x1.719p+23},
+    {0x1.bf4f08p+35, 0x1.de9p+23},
+    {0x1.2ae1d4p+35, 0x1.873p+23},
+    {0x1.2ee824p+35, 0x1.89dp+23},
+    {0x1.3af238p+35, 0x1.919p+23},
+    {0x1.f373ecp+35, 0x1.f9bp+23},
+    {0x1.8b2da4p+35, 0x1.c1dp+23},
+    {0x1.df6928p+35, 0x1.ef7p+23},
+    {0x1.26e268p+35, 0x1.849p+23},
+    {0x1.e76d58p+35, 0x1.f39p+23},
+    {0x1.d36434p+35, 0x1.e93p+23},
+    {0x1.a3413cp+35, 0x1.cf5p+23},
+    {0x1.4b074cp+35, 0x1.9bbp+23},
+    {0x1.bb5858p+35, 0x1.dc7p+23},
+    {0x1.1adde8p+35, 0x1.7c9p+23},
+    {0x1.32f558p+35, 0x1.8c7p+23},
+    {0x1.470534p+35, 0x1.993p+23},
+    {0x1.af54ap+35, 0x1.d5fp+23},
+    {0x1.6b2154p+35, 0x1.af3p+23},
+    {0x1.7f30dp+35, 0x1.bafp+23},
+    {0x1.06cbb4p+35, 0x1.6edp+23},
+    {0x1.6f2438p+35, 0x1.b19p+23},
+    {0x1.22e9ep+35, 0x1.81fp+23},
+    {0x1.5b1978p+35, 0x1.a59p+23},
+    {0x1.4f0fa4p+35, 0x1.9e3p+23},
+    {0x1.ef828cp+35, 0x1.f7bp+23},
+    {0x1.873c18p+35, 0x1.bf9p+23},
+    {0x1.d775fcp+35, 0x1.eb5p+23},
+    {0x1.732ccp+35, 0x1.b3fp+23},
+    {0x1.672414p+35, 0x1.acdp+23},
+    {0x1.43095cp+35, 0x1.96bp+23},
+    {0x1.a75638p+35, 0x1.d19p+23},
+    {0x1.b7662cp+35, 0x1.da5p+23},
+    {0x1.531e3cp+35, 0x1.a0bp+23},
+    {0x1.37097p+35, 0x1.8f1p+23},
+    {0x1.e38838p+35, 0x1.f19p+23},
+    {0x1.632c78p+35, 0x1.aa7p+23},
+    {0x1.773aecp+35, 0x1.b65p+23},
+    {0x1.3f13c4p+35, 0x1.943p+23},
+    {0x1.9759bcp+35, 0x1.c8bp+23},
+    {0x1.1ef83cp+35, 0x1.7f5p+23},
+    {0x1.12eed4p+35, 0x1.773p+23},
+    {0x1.eb952cp+35, 0x1.f5bp+23},
+    {0x1.ffa004p+35, 0x1.ffdp+23},
+    {0x1.9358b8p+35, 0x1.c67p+23},
+    {0x1.0eead8p+35, 0x1.747p+23},
+    {0x1.db8c48p+35, 0x1.ed7p+23},
+    {0x1.02decp+35, 0x1.6c1p+23},
+    {0x1.fba264p+35, 0x1.fddp+23},
+    {0x1.9b5fdp+35, 0x1.cafp+23},
+    {0x1.834f9cp+35, 0x1.bd5p+23},
+    {0x1.ab7044p+35, 0x1.d3dp+23},
+    {0x1.b37884p+35, 0x1.d83p+23},
+    {0x1.c7865p+35, 0x1.e2fp+23},
+    {0x1.8f5cc4p+35, 0x1.c43p+23},
+    {0x1.c38654p+35, 0x1.e0dp+23},
+    {0x1.7b4ebcp+35, 0x1.b8bp+23},
+    {0x1.16fa6p+35, 0x1.79fp+23},
+    {0x1.cb8adp+35, 0x1.e51p+23},
+    {0x1.573314p+35, 0x1.a33p+23},
+    {0x1.5f3a8p+35, 0x1.a81p+23},
+    {0x1.0aee6cp+35, 0x1.71bp+23},
+    {0x1.f7a8c4p+35, 0x1.fbdp+23},
+    {0x1.9f6af4p+35, 0x1.cd3p+23},
+    {0x1.2b12bcp+35, 0x1.875p+23},
+    {0x1.cf93d4p+35, 0x1.e73p+23},
+    {0x1.2f196p+35, 0x1.89fp+23},
+    {0x1.bf8adcp+35, 0x1.debp+23},
+    {0x1.3b246cp+35, 0x1.91bp+23},
+    {0x1.2712fcp+35, 0x1.84bp+23},
+    {0x1.8b65ep+35, 0x1.c1fp+23},
+    {0x1.dfa718p+35, 0x1.ef9p+23},
+    {0x1.f3b324p+35, 0x1.f9dp+23},
+    {0x1.a37b28p+35, 0x1.cf7p+23},
+    {0x1.d3a15cp+35, 0x1.e95p+23},
+    {0x1.e7abccp+35, 0x1.f3bp+23},
+    {0x1.4b3ac4p+35, 0x1.9bdp+23},
+    {0x1.1b0d7cp+35, 0x1.7cbp+23},
+    {0x1.3326e8p+35, 0x1.8c9p+23},
+    {0x1.bb93e8p+35, 0x1.dc9p+23},
+    {0x1.47385cp+35, 0x1.995p+23},
+    {0x1.06f99p+35, 0x1.6efp+23},
+    {0x1.6b573cp+35, 0x1.af5p+23},
+    {0x1.231a2p+35, 0x1.821p+23},
+    {0x1.af8f6p+35, 0x1.d61p+23},
+    {0x1.7f683p+35, 0x1.bb1p+23},
+    {0x1.6f5a6cp+35, 0x1.b1bp+23},
+    {0x1.4f436cp+35, 0x1.9e5p+23},
+    {0x1.5b4e2cp+35, 0x1.a5bp+23},
+    {0x1.efc184p+35, 0x1.f7dp+23},
+    {0x1.87740cp+35, 0x1.bfbp+23},
+    {0x1.73634p+35, 0x1.b41p+23},
+    {0x1.433c34p+35, 0x1.96dp+23},
+    {0x1.6759bp+35, 0x1.acfp+23},
+    {0x1.d7b368p+35, 0x1.eb7p+23},
+    {0x1.a7906cp+35, 0x1.d1bp+23},
+    {0x1.b7a178p+35, 0x1.da7p+23},
+    {0x1.535254p+35, 0x1.a0dp+23},
+    {0x1.373b54p+35, 0x1.8f3p+23},
+    {0x1.6361c8p+35, 0x1.aa9p+23},
+    {0x1.7771b8p+35, 0x1.b67p+23},
+    {0x1.e3c66cp+35, 0x1.f1bp+23},
+    {0x1.3f464cp+35, 0x1.945p+23},
+    {0x1.1f2828p+35, 0x1.7f7p+23},
+    {0x1.131dbcp+35, 0x1.775p+23},
+    {0x1.0f1968p+35, 0x1.749p+23},
+    {0x1.9792d4p+35, 0x1.c8dp+23},
+    {0x1.030c44p+35, 0x1.6c3p+23},
+    {0x1.939188p+35, 0x1.c69p+23},
+    {0x1.ebd3e4p+35, 0x1.f5dp+23},
+    {0x1.ffep+35, 0x1.fffp+23},
+    {0x1.9b993p+35, 0x1.cb1p+23},
+    {0x1.dbc9f8p+35, 0x1.ed9p+23},
+    {0x1.838748p+35, 0x1.bd7p+23},
+    {0x1.fbe22p+35, 0x1.fdfp+23},
+    {0x1.abaacp+35, 0x1.d3fp+23},
+    {0x1.1729ap+35, 0x1.7a1p+23},
+    {0x1.b3b38cp+35, 0x1.d85p+23},
+    {0x1.8f954cp+35, 0x1.c45p+23},
+    {0x1.0b1ca4p+35, 0x1.71dp+23},
+    {0x1.c7c2bp+35, 0x1.e31p+23},
+    {0x1.57677cp+35, 0x1.a35p+23},
+    {0x1.7b85d4p+35, 0x1.b8dp+23},
+    {0x1.5f6f84p+35, 0x1.a83p+23},
+    {0x1.c3c27p+35, 0x1.e0fp+23},
+    {0x1.2b43a8p+35, 0x1.877p+23},
+    {0x1.cbc774p+35, 0x1.e53p+23},
+    {0x1.9fa49cp+35, 0x1.cd5p+23},
+    {0x1.2f4aap+35, 0x1.8a1p+23},
+    {0x1.f7e84p+35, 0x1.fbfp+23},
+    {0x1.3b56a4p+35, 0x1.91dp+23},
+    {0x1.cfd0bcp+35, 0x1.e75p+23},
+    {0x1.274394p+35, 0x1.84dp+23},
+    {0x1.bfc6b4p+35, 0x1.dedp+23},
+    {0x1.8b9e2p+35, 0x1.c21p+23},
+    {0x1.1b3d14p+35, 0x1.7cdp+23},
+    {0x1.4b6e4p+35, 0x1.9bfp+23},
+    {0x1.dfe50cp+35, 0x1.efbp+23},
+    {0x1.a3b518p+35, 0x1.cf9p+23},
+    {0x1.33587cp+35, 0x1.8cbp+23},
+    {0x1.f3f26p+35, 0x1.f9fp+23},
+    {0x1.d3de88p+35, 0x1.e97p+23},
+    {0x1.e7ea44p+35, 0x1.f3dp+23},
+    {0x1.07277p+35, 0x1.6f1p+23},
+    {0x1.476b88p+35, 0x1.997p+23},
+    {0x1.bbcf7cp+35, 0x1.dcbp+23},
+    {0x1.234a64p+35, 0x1.823p+23},
+    {0x1.6b8d28p+35, 0x1.af7p+23},
+    {0x1.4f7738p+35, 0x1.9e7p+23},
+    {0x1.5b82e4p+35, 0x1.a5dp+23},
+    {0x1.6f90a4p+35, 0x1.b1dp+23},
+    {0x1.7f9f94p+35, 0x1.bb3p+23},
+    {0x1.afca24p+35, 0x1.d63p+23},
+    {0x1.436f1p+35, 0x1.96fp+23},
+    {0x1.87ac04p+35, 0x1.bfdp+23},
+    {0x1.a7caa4p+35, 0x1.d1dp+23},
+    {0x1.7399c4p+35, 0x1.b43p+23},
+    {0x1.678f5p+35, 0x1.ad1p+23},
+    {0x1.d7f0d8p+35, 0x1.eb9p+23},
+    {0x1.b7dcc8p+35, 0x1.da9p+23},
+    {0x1.376d3cp+35, 0x1.8f5p+23},
+    {0x1.53867p+35, 0x1.a0fp+23},
+    {0x1.63971cp+35, 0x1.aabp+23},
+    {0x1.3f78d8p+35, 0x1.947p+23},
+    {0x1.77a888p+35, 0x1.b69p+23},
+    {0x1.134ca8p+35, 0x1.777p+23},
+    {0x1.1f5818p+35, 0x1.7f9p+23},
+    {0x1.b3ee98p+35, 0x1.d87p+23},
+    {0x1.0f47fcp+35, 0x1.74bp+23},
+    {0x1.0339ccp+35, 0x1.6c5p+23},
+    {0x1.9bd294p+35, 0x1.cb3p+23},
+    {0x1.97cbfp+35, 0x1.c8fp+23},
+    {0x1.abe54p+35, 0x1.d41p+23},
+    {0x1.93ca5cp+35, 0x1.c6bp+23},
+    {0x1.83bef8p+35, 0x1.bd9p+23},
+    {0x1.8fcdd8p+35, 0x1.c47p+23},
+    {0x1.1758e4p+35, 0x1.7a3p+23},
+    {0x1.0b4aep+35, 0x1.71fp+23},
+    {0x1.c7ff14p+35, 0x1.e33p+23},
+    {0x1.1b6cbp+35, 0x1.7cfp+23},
+    {0x1.579be8p+35, 0x1.a37p+23},
+    {0x1.2b7498p+35, 0x1.879p+23},
+    {0x1.7bbcfp+35, 0x1.b8fp+23},
+    {0x1.5fa48cp+35, 0x1.a85p+23},
+    {0x1.a3ef0cp+35, 0x1.cfbp+23},
+    {0x1.2f7be4p+35, 0x1.8a3p+23},
+    {0x1.9fde48p+35, 0x1.cd7p+23},
+    {0x1.c3fe9p+35, 0x1.e11p+23},
+    {0x1.5bb7ap+35, 0x1.a5fp+23},
+    {0x1.8bd664p+35, 0x1.c23p+23},
+    {0x1.3b88ep+35, 0x1.91fp+23},
+    {0x1.4ba1cp+35, 0x1.9c1p+23},
+    {0x1.27743p+35, 0x1.84fp+23},
+    {0x1.075554p+35, 0x1.6f3p+23},
+    {0x1.87e4p+35, 0x1.bffp+23},
+    {0x1.237aacp+35, 0x1.825p+23},
+    {0x1.4fab08p+35, 0x1.9e9p+23},
+    {0x1.338a14p+35, 0x1.8cdp+23},
+    {0x1.6bc318p+35, 0x1.af9p+23},
+    {0x1.73d04cp+35, 0x1.b45p+23},
+    {0x1.0f7694p+35, 0x1.74dp+23},
+    {0x1.479eb8p+35, 0x1.999p+23},
+    {0x1.67c4f4p+35, 0x1.ad3p+23},
+    {0x1.6fc6ep+35, 0x1.b1fp+23},
+    {0x1.7fd6fcp+35, 0x1.bb5p+23},
+    {0x1.137b98p+35, 0x1.779p+23},
+    {0x1.53ba9p+35, 0x1.a11p+23},
+    {0x1.83f6acp+35, 0x1.bdbp+23},
+    {0x1.379f28p+35, 0x1.8f7p+23},
+    {0x1.43a1fp+35, 0x1.971p+23},
+    {0x1.1b9c5p+35, 0x1.7d1p+23},
+    {0x1.0b792p+35, 0x1.721p+23},
+    {0x1.2ba58cp+35, 0x1.87bp+23},
+    {0x1.77df5cp+35, 0x1.b6bp+23},
+    {0x1.63cc74p+35, 0x1.aadp+23},
+    {0x1.5bec6p+35, 0x1.a61p+23},
+    {0x1.5fd998p+35, 0x1.a87p+23},
+    {0x1.4bd544p+35, 0x1.9c3p+23},
+    {0x1.07833cp+35, 0x1.6f5p+23},
+    {0x1.3fab68p+35, 0x1.949p+23},
+    {0x1.2fad2cp+35, 0x1.8a5p+23},
+    {0x1.1f880cp+35, 0x1.7fbp+23},
+    {0x1.23aaf8p+35, 0x1.827p+23},
+    {0x1.036758p+35, 0x1.6c7p+23},
+    {0x1.7bf41p+35, 0x1.b91p+23},
+    {0x1.4fdedcp+35, 0x1.9ebp+23},
+    {0x1.3bbb2p+35, 0x1.921p+23},
+    {0x1.6bf90cp+35, 0x1.afbp+23},
+    {0x1.0fa53p+35, 0x1.74fp+23},
+    {0x1.57d058p+35, 0x1.a39p+23},
+    {0x1.17882cp+35, 0x1.7a5p+23},
+    {0x1.27a4dp+35, 0x1.851p+23},
+    {0x1.33bbbp+35, 0x1.8cfp+23},
+    {0x1.67fa9cp+35, 0x1.ad5p+23},
+    {0x1.53eeb4p+35, 0x1.a13p+23},
+    {0x1.37d118p+35, 0x1.8f9p+23},
+    {0x1.13aa8cp+35, 0x1.77bp+23},
+    {0x1.1bcbf4p+35, 0x1.7d3p+23},
+    {0x1.0ba764p+35, 0x1.723p+23},
+    {0x1.2bd684p+35, 0x1.87dp+23},
+    {0x1.47d1ecp+35, 0x1.99bp+23},
+    {0x1.43d4d4p+35, 0x1.973p+23},
+    {0x1.6ffd2p+35, 0x1.b21p+23},
+    {0x1.3fddfcp+35, 0x1.94bp+23},
+    {0x1.07b128p+35, 0x1.6f7p+23},
+    {0x1.2fde78p+35, 0x1.8a7p+23},
+    {0x1.23db48p+35, 0x1.829p+23},
+    {0x1.0394e8p+35, 0x1.6c9p+23},
+    {0x1.1fb804p+35, 0x1.7fdp+23},
+    {0x1.3bed64p+35, 0x1.923p+23},
+    {0x1.0fd3dp+35, 0x1.751p+23},
+    {0x1.17b778p+35, 0x1.7a7p+23},
+    {0x1.27d574p+35, 0x1.853p+23},
+    {0x1.33ed5p+35, 0x1.8d1p+23},
+    {0x1.13d984p+35, 0x1.77dp+23},
+    {0x1.1bfb9cp+35, 0x1.7d5p+23},
+    {0x1.0bd5acp+35, 0x1.725p+23},
+    {0x1.07df18p+35, 0x1.6f9p+23},
+    {0x1.03c27cp+35, 0x1.6cbp+23},
+    {0x1.1fe8p+35, 0x1.7ffp+23},
+    {0x1.17e6c8p+35, 0x1.7a9p+23},
+    {0x1.03f014p+35, 0x1.6cdp+23},
+};
+
+#endif // LLVM_LIBC_TEST_SRC_MATH_HYPOTTEST_HARD_TO_ROUND_H
diff --git a/libc/test/src/math/hypotf_test.cpp b/libc/test/src/math/hypotf_test.cpp
index 2e958d63d84ad..437e0fc29b76b 100644
--- a/libc/test/src/math/hypotf_test.cpp
+++ b/libc/test/src/math/hypotf_test.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "HypotTest.h"
+#include "hypotf_hard_to_round.h"
 
 #include "src/math/hypotf.h"
 
@@ -23,3 +24,7 @@ TEST_F(LlvmLibcHypotfTest, SubnormalRange) {
 TEST_F(LlvmLibcHypotfTest, NormalRange) {
   test_normal_range(&__llvm_libc::hypotf);
 }
+
+TEST_F(LlvmLibcHypotfTest, TrickyInputs) {
+  test_input_list(&__llvm_libc::hypotf, N_HARD_TO_ROUND, HYPOTF_HARD_TO_ROUND);
+}

From b80db150cdba17b3d4970389025f95b1c93482b8 Mon Sep 17 00:00:00 2001
From: Evgeny Shulgin <izaronplatz@gmail.com>
Date: Thu, 20 Jan 2022 13:34:28 -0500
Subject: [PATCH 046/946] Add `isConsteval` matcher

Support C++20 consteval functions and C++2b if consteval for AST Matchers.
---
 clang/docs/LibASTMatchersReference.html       | 32 +++++++++++++++++++
 clang/docs/ReleaseNotes.rst                   |  2 ++
 clang/include/clang/ASTMatchers/ASTMatchers.h | 19 +++++++++++
 clang/lib/ASTMatchers/Dynamic/Registry.cpp    |  1 +
 .../ASTMatchers/ASTMatchersNarrowingTest.cpp  | 29 +++++++++++++++++
 5 files changed, 83 insertions(+)

diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html
index 9e71608a86250..fb856de0936dc 100644
--- a/clang/docs/LibASTMatchersReference.html
+++ b/clang/docs/LibASTMatchersReference.html
@@ -4201,6 +4201,22 @@ <h2 id="narrowing-matchers">Narrowing Matchers</h2>
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isConsteval1')"><a name="isConsteval1Anchor">isConsteval</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isConsteval1"><pre>Matches consteval function declarations and if consteval/if ! consteval
+statements.
+
+Given:
+  consteval int a();
+  void b() { if consteval {} }
+  void c() { if ! consteval {} }
+  void d() { if ! consteval {} else {} }
+functionDecl(isConsteval())
+  matches the declaration of "int a()".
+ifStmt(isConsteval())
+  matches the if statement in "void b()", "void c()", "void d()".
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1FunctionDecl.html">FunctionDecl</a>&gt;</td><td class="name" onclick="toggle('isConstexpr1')"><a name="isConstexpr1Anchor">isConstexpr</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isConstexpr1"><pre>Matches constexpr variable and function declarations,
        and if constexpr.
@@ -4473,6 +4489,22 @@ <h2 id="narrowing-matchers">Narrowing Matchers</h2>
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1IfStmt.html">IfStmt</a>&gt;</td><td class="name" onclick="toggle('isConsteval2')"><a name="isConsteval2Anchor">isConsteval</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isConsteval2"><pre>Matches consteval function declarations and if consteval/if ! consteval
+statements.
+
+Given:
+  consteval int a();
+  void b() { if consteval {} }
+  void c() { if ! consteval {} }
+  void d() { if ! consteval {} else {} }
+functionDecl(isConsteval())
+  matches the declaration of "int a()".
+ifStmt(isConsteval())
+  matches the if statement in "void b()", "void c()", "void d()".
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1IfStmt.html">IfStmt</a>&gt;</td><td class="name" onclick="toggle('isConstexpr2')"><a name="isConstexpr2Anchor">isConstexpr</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isConstexpr2"><pre>Matches constexpr variable and function declarations,
        and if constexpr.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 08e4d75299d2b..c787d355a3148 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -316,6 +316,8 @@ AST Matchers
   and the underlying ``Type`` with ``hasUnderlyingType``.
   ``hasDeclaration`` continues to see through the alias and apply to the
   underlying type.
+- Added the ``isConsteval`` matcher to match ``consteval`` function
+  declarations as well as `if consteval` and `if ! consteval` statements.
 
 clang-format
 ------------
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index 599ab407c442b..c934b708cb96c 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -5170,6 +5170,25 @@ AST_POLYMORPHIC_MATCHER(isNoThrow,
   return FnTy->isNothrow();
 }
 
+/// Matches consteval function declarations and if consteval/if ! consteval
+/// statements.
+///
+/// Given:
+/// \code
+///   consteval int a();
+///   void b() { if consteval {} }
+///   void c() { if ! consteval {} }
+///   void d() { if ! consteval {} else {} }
+/// \endcode
+/// functionDecl(isConsteval())
+///   matches the declaration of "int a()".
+/// ifStmt(isConsteval())
+///   matches the if statement in "void b()", "void c()", "void d()".
+AST_POLYMORPHIC_MATCHER(isConsteval,
+                        AST_POLYMORPHIC_SUPPORTED_TYPES(FunctionDecl, IfStmt)) {
+  return Node.isConsteval();
+}
+
 /// Matches constexpr variable and function declarations,
 ///        and if constexpr.
 ///
diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
index 4f3efdb0a6630..2210c5413cc5a 100644
--- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -404,6 +404,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(isComparisonOperator);
   REGISTER_MATCHER(isConst);
   REGISTER_MATCHER(isConstQualified);
+  REGISTER_MATCHER(isConsteval);
   REGISTER_MATCHER(isConstexpr);
   REGISTER_MATCHER(isCopyAssignmentOperator);
   REGISTER_MATCHER(isCopyConstructor);
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
index f604d0a19e18f..51946e1430cf6 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
@@ -1790,6 +1790,35 @@ TEST_P(ASTMatchersTest, IsNoThrow_CXX11) {
   EXPECT_TRUE(matches("void f() noexcept;", functionProtoType(isNoThrow())));
 }
 
+TEST_P(ASTMatchersTest, IsConsteval) {
+  if (!GetParam().isCXX20OrLater())
+    return;
+
+  EXPECT_TRUE(matches("consteval int bar();",
+                      functionDecl(hasName("bar"), isConsteval())));
+  EXPECT_TRUE(notMatches("constexpr int bar();",
+                         functionDecl(hasName("bar"), isConsteval())));
+  EXPECT_TRUE(
+      notMatches("int bar();", functionDecl(hasName("bar"), isConsteval())));
+}
+
+TEST_P(ASTMatchersTest, IsConsteval_MatchesIfConsteval) {
+  if (!GetParam().isCXX20OrLater())
+    return;
+
+  EXPECT_TRUE(matches("void baz() { if consteval {} }", ifStmt(isConsteval())));
+  EXPECT_TRUE(
+      matches("void baz() { if ! consteval {} }", ifStmt(isConsteval())));
+  EXPECT_TRUE(matches("void baz() { if ! consteval {} else {} }",
+                      ifStmt(isConsteval())));
+  EXPECT_TRUE(
+      matches("void baz() { if not consteval {} }", ifStmt(isConsteval())));
+  EXPECT_TRUE(notMatches("void baz() { if constexpr(1 > 0) {} }",
+                         ifStmt(isConsteval())));
+  EXPECT_TRUE(
+      notMatches("void baz() { if (1 > 0) {} }", ifStmt(isConsteval())));
+}
+
 TEST_P(ASTMatchersTest, IsConstexpr) {
   if (!GetParam().isCXX11OrLater()) {
     return;

From eb6c6e60585df1a05c11a7b2625cbecffb88e085 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 20 Jan 2022 21:37:26 +0300
Subject: [PATCH 047/946] [NFC][InstCombine] Add test showing failure to sink
 into `resume` block

---
 .../InstCombine/sink-into-resume-block.ll     | 63 +++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/sink-into-resume-block.ll

diff --git a/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll b/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll
new file mode 100644
index 0000000000000..a687977d4b975
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; Check that InstCombine can sink instructions to the landingpad of the invoke.
+
+define void @t0_noop(i32 %arg) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @t0_noop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[C:%.*]] = call i1 @cond()
+; CHECK-NEXT:    [[V0:%.*]] = add i32 [[ARG:%.*]], 42
+; CHECK-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    invoke void @simple_throw()
+; CHECK-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]]
+; CHECK:       invoke.cont:
+; CHECK-NEXT:    unreachable
+; CHECK:       lpad:
+; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    call void @consume(i32 [[V0]])
+; CHECK-NEXT:    call void @destructor()
+; CHECK-NEXT:    resume { i8*, i32 } [[EH]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[V1:%.*]] = add i32 [[ARG]], 24
+; CHECK-NEXT:    call void @consume(i32 [[V1]])
+; CHECK-NEXT:    call void @sideeffect()
+; CHECK-NEXT:    ret void
+;
+entry:
+  %c = call i1 @cond()
+  %v0 = add i32 %arg, 42
+  %v1 = add i32 %arg, 24
+  br i1 %c, label %if.then, label %if.end
+
+if.then:
+  invoke void @simple_throw() to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+  unreachable
+
+lpad:
+  %eh = landingpad { i8*, i32 } cleanup
+  call void @consume(i32 %v0)
+  call void @destructor()
+  resume { i8*, i32 } %eh
+
+if.end:
+  call void @consume(i32 %v1)
+  call void @sideeffect()
+  ret void
+}
+
+declare i1 @cond()
+
+declare void @sideeffect()
+
+declare void @simple_throw() noreturn
+
+declare void @destructor()
+
+declare void @consume(i32)
+
+declare dso_local i32 @__gxx_personality_v0(...)

From 9abc593e98891b4cd8ffd2ca308cabe6ea5d142f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 20 Jan 2022 10:36:21 -0800
Subject: [PATCH 048/946] [TargetLowering][InstCombine] Simplify BSwap demanded
 bits code a little. NFC

Use alignDown instead of &= ~7.

Replace ResultBit with NLZ. (BitWidth - NLZ - NTZ == 8) so
(BitWidth - NTZ - 8 == NLZ).

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D117804
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 12 ++++-------
 .../InstCombineSimplifyDemanded.cpp           | 21 +++++++++----------
 2 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 6af5609993795..3b53a5b8b7532 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1815,20 +1815,16 @@ bool TargetLowering::SimplifyDemandedBits(
     // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
     // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
     // have 14 leading zeros, round to 8.
-    NLZ &= ~7;
-    NTZ &= ~7;
+    NLZ = alignDown(NLZ, 8);
+    NTZ = alignDown(NTZ, 8);
     // If we need exactly one byte, we can do this transformation.
     if (BitWidth - NLZ - NTZ == 8) {
-      unsigned ResultBit = NTZ;
-      unsigned InputBit = BitWidth - NTZ - 8;
-
       // Replace this with either a left or right shift to get the byte into
       // the right place.
-      unsigned ShiftOpcode = InputBit > ResultBit ? ISD::SRL : ISD::SHL;
+      unsigned ShiftOpcode = NLZ > NTZ ? ISD::SRL : ISD::SHL;
       if (!TLO.LegalOperations() || isOperationLegal(ShiftOpcode, VT)) {
         EVT ShiftAmtTy = getShiftAmountTy(VT, DL);
-        unsigned ShiftAmount =
-            InputBit > ResultBit ? InputBit - ResultBit : ResultBit - InputBit;
+        unsigned ShiftAmount = NLZ > NTZ ? NLZ - NTZ : NTZ - NLZ;
         SDValue ShAmt = TLO.DAG.getConstant(ShiftAmount, dl, ShiftAmtTy);
         SDValue NewOp = TLO.DAG.getNode(ShiftOpcode, dl, VT, Src, ShAmt);
         return TLO.CombineTo(Op, NewOp);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 4dc712f325362..71a5ae24eead9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -800,22 +800,21 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
         // Round NTZ down to the next byte.  If we have 11 trailing zeros, then
         // we need all the bits down to bit 8.  Likewise, round NLZ.  If we
         // have 14 leading zeros, round to 8.
-        NLZ &= ~7;
-        NTZ &= ~7;
+        NLZ = alignDown(NLZ, 8);
+        NTZ = alignDown(NTZ, 8);
         // If we need exactly one byte, we can do this transformation.
-        if (BitWidth-NLZ-NTZ == 8) {
-          unsigned ResultBit = NTZ;
-          unsigned InputBit = BitWidth-NTZ-8;
-
+        if (BitWidth - NLZ - NTZ == 8) {
           // Replace this with either a left or right shift to get the byte into
           // the right place.
           Instruction *NewVal;
-          if (InputBit > ResultBit)
-            NewVal = BinaryOperator::CreateLShr(II->getArgOperand(0),
-                    ConstantInt::get(I->getType(), InputBit-ResultBit));
+          if (NLZ > NTZ)
+            NewVal = BinaryOperator::CreateLShr(
+                II->getArgOperand(0),
+                ConstantInt::get(I->getType(), NLZ - NTZ));
           else
-            NewVal = BinaryOperator::CreateShl(II->getArgOperand(0),
-                    ConstantInt::get(I->getType(), ResultBit-InputBit));
+            NewVal = BinaryOperator::CreateShl(
+                II->getArgOperand(0),
+                ConstantInt::get(I->getType(), NTZ - NLZ));
           NewVal->takeName(I);
           return InsertNewInstWith(NewVal, *I);
         }

From b58cc9fb23486e73ad7502a451bf338df3b2d444 Mon Sep 17 00:00:00 2001
From: eopXD <eop.chen@sifive.com>
Date: Thu, 20 Jan 2022 10:36:23 -0800
Subject: [PATCH 049/946] [NFC][RISCV] Add end-of-line symbol in target-feature
 testcases

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117808
---
 .../test/Preprocessor/riscv-target-features.c | 64 +++++++++----------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index 7c685b50e873e..ba310229f14e5 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -7,14 +7,14 @@
 // CHECK-NOT: __riscv_m
 // CHECK-NOT: __riscv_mul
 // CHECK-NOT: __riscv_muldiv
-// CHECK-NOT: __riscv_a 2000000
+// CHECK-NOT: __riscv_a 2000000{{$}}
 // CHECK-NOT: __riscv_atomic
-// CHECK-NOT: __riscv_f 2000000
+// CHECK-NOT: __riscv_f 2000000{{$}}
 // CHECK-NOT: __riscv_d
 // CHECK-NOT: __riscv_flen
 // CHECK-NOT: __riscv_fdiv
 // CHECK-NOT: __riscv_fsqrt
-// CHECK-NOT: __riscv_c 2000000
+// CHECK-NOT: __riscv_c 2000000{{$}}
 // CHECK-NOT: __riscv_compressed
 // CHECK-NOT: __riscv_b
 // CHECK-NOT: __riscv_bitmanip
@@ -38,7 +38,7 @@
 // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64im -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-M-EXT %s
 // CHECK-M-EXT: __riscv_div 1
-// CHECK-M-EXT: __riscv_m 2000000
+// CHECK-M-EXT: __riscv_m 2000000{{$}}
 // CHECK-M-EXT: __riscv_mul 1
 // CHECK-M-EXT: __riscv_muldiv 1
 
@@ -46,14 +46,14 @@
 // RUN: -o - | FileCheck --check-prefix=CHECK-A-EXT %s
 // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64ia -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-A-EXT %s
-// CHECK-A-EXT: __riscv_a 2000000
+// CHECK-A-EXT: __riscv_a 2000000{{$}}
 // CHECK-A-EXT: __riscv_atomic 1
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -march=rv32if -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-F-EXT %s
 // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64if -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-F-EXT %s
-// CHECK-F-EXT: __riscv_f 2000000
+// CHECK-F-EXT: __riscv_f 2000000{{$}}
 // CHECK-F-EXT: __riscv_fdiv 1
 // CHECK-F-EXT: __riscv_flen 32
 // CHECK-F-EXT: __riscv_fsqrt 1
@@ -62,7 +62,7 @@
 // RUN: -o - | FileCheck --check-prefix=CHECK-D-EXT %s
 // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64ifd -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-D-EXT %s
-// CHECK-D-EXT: __riscv_d 2000000
+// CHECK-D-EXT: __riscv_d 2000000{{$}}
 // CHECK-D-EXT: __riscv_fdiv 1
 // CHECK-D-EXT: __riscv_flen 64
 // CHECK-D-EXT: __riscv_fsqrt 1
@@ -95,7 +95,7 @@
 // RUN: -o - | FileCheck --check-prefix=CHECK-C-EXT %s
 // RUN: %clang -target riscv64-unknown-linux-gnu -march=rv64ic -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-C-EXT %s
-// CHECK-C-EXT: __riscv_c 2000000
+// CHECK-C-EXT: __riscv_c 2000000{{$}}
 // CHECK-C-EXT: __riscv_compressed 1
 
 // RUN: %clang -target riscv32-unknown-linux-gnu \
@@ -150,7 +150,7 @@
 // RUN: -march=rv64izbe0p93 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZBE-EXT %s
 // CHECK-ZBE-NOT: __riscv_b
-// CHECK-ZBE-EXT: __riscv_zbe 93000
+// CHECK-ZBE-EXT: __riscv_zbe 93000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv32izbf0p93 -x c -E -dM %s \
@@ -159,7 +159,7 @@
 // RUN: -march=rv64izbf0p93 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZBF-EXT %s
 // CHECK-ZBF-NOT: __riscv_b
-// CHECK-ZBF-EXT: __riscv_zbf 93000
+// CHECK-ZBF-EXT: __riscv_zbf 93000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv32izbm0p93 -x c -E -dM %s \
@@ -168,7 +168,7 @@
 // RUN: -march=rv64izbm0p93 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZBM-EXT %s
 // CHECK-ZBM-NOT: __riscv_b
-// CHECK-ZBM-EXT: __riscv_zbm 93000
+// CHECK-ZBM-EXT: __riscv_zbm 93000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv32izbp0p93 -x c -E -dM %s \
@@ -177,7 +177,7 @@
 // RUN: -march=rv64izbp0p93 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZBP-EXT %s
 // CHECK-ZBP-NOT: __riscv_b
-// CHECK-ZBP-EXT: __riscv_zbp 93000
+// CHECK-ZBP-EXT: __riscv_zbp 93000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv32izbr0p93 -x c -E -dM %s \
@@ -186,7 +186,7 @@
 // RUN: -march=rv64izbr0p93 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZBR-EXT %s
 // CHECK-ZBR-NOT: __riscv_b
-// CHECK-ZBR-EXT: __riscv_zbr 93000
+// CHECK-ZBR-EXT: __riscv_zbr 93000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv32izbs1p0 -x c -E -dM %s \
@@ -210,7 +210,7 @@
 // RUN: -march=rv64izbt0p93 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZBT-EXT %s
 // CHECK-ZBT-NOT: __riscv_b
-// CHECK-ZBT-EXT: __riscv_zbt 93000
+// CHECK-ZBT-EXT: __riscv_zbt 93000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv32iv0p10 -x c -E -dM %s \
@@ -218,9 +218,9 @@
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv64iv0p10 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s
-// CHECK-V-EXT: __riscv_v 10000
+// CHECK-V-EXT: __riscv_v 10000{{$}}
 // CHECK-V-EXT: __riscv_vector 1
-// CHECK-V-EXT: __riscv_zvlsseg 10000
+// CHECK-V-EXT: __riscv_zvlsseg 10000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv32izfhmin1p0 -x c -E -dM %s \
@@ -228,7 +228,7 @@
 // RUN: %clang -target riscv64-unknown-linux-gnu \
 // RUN: -march=rv64izfhmin1p0 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZFHMIN-EXT %s
-// CHECK-ZFHMIN-EXT: __riscv_zfhmin 10000
+// CHECK-ZFHMIN-EXT: __riscv_zfhmin 1000000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv32izfh1p0 -x c -E -dM %s \
@@ -236,7 +236,7 @@
 // RUN: %clang -target riscv64-unknown-linux-gnu \
 // RUN: -march=rv64izfh1p0 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZFH-EXT %s
-// CHECK-ZFH-EXT: __riscv_zfh 10000
+// CHECK-ZFH-EXT: __riscv_zfh 1000000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv64iv0p10 -x c -E -dM %s -o - \
@@ -297,11 +297,11 @@
 // CHECK-ZVE64D-EXT: __riscv_v_elen_fp 64
 // CHECK-ZVE64D-EXT: __riscv_v_min_vlen 64
 // CHECK-ZVE64D-EXT: __riscv_vector 1
-// CHECK-ZVE64D-EXT: __riscv_zve32f 10000
-// CHECK-ZVE64D-EXT: __riscv_zve32x 10000
-// CHECK-ZVE64D-EXT: __riscv_zve64d 10000
-// CHECK-ZVE64D-EXT: __riscv_zve64f 10000
-// CHECK-ZVE64D-EXT: __riscv_zve64x 10000
+// CHECK-ZVE64D-EXT: __riscv_zve32f 10000{{$}}
+// CHECK-ZVE64D-EXT: __riscv_zve32x 10000{{$}}
+// CHECK-ZVE64D-EXT: __riscv_zve64d 10000{{$}}
+// CHECK-ZVE64D-EXT: __riscv_zve64f 10000{{$}}
+// CHECK-ZVE64D-EXT: __riscv_zve64x 10000{{$}}
 
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv64ifzve64f0p10 -x c -E -dM %s -o - \
@@ -310,10 +310,10 @@
 // CHECK-ZVE64F-EXT: __riscv_v_elen_fp 32
 // CHECK-ZVE64F-EXT: __riscv_v_min_vlen 64
 // CHECK-ZVE64F-EXT: __riscv_vector 1
-// CHECK-ZVE64F-EXT: __riscv_zve32f 10000
-// CHECK-ZVE64F-EXT: __riscv_zve32x 10000
-// CHECK-ZVE64F-EXT: __riscv_zve64f 10000
-// CHECK-ZVE64F-EXT: __riscv_zve64x 10000
+// CHECK-ZVE64F-EXT: __riscv_zve32f 10000{{$}}
+// CHECK-ZVE64F-EXT: __riscv_zve32x 10000{{$}}
+// CHECK-ZVE64F-EXT: __riscv_zve64f 10000{{$}}
+// CHECK-ZVE64F-EXT: __riscv_zve64x 10000{{$}}
 
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv64izve64x0p10 -x c -E -dM %s -o - \
@@ -322,8 +322,8 @@
 // CHECK-ZVE64X-EXT: __riscv_v_elen_fp 0
 // CHECK-ZVE64X-EXT: __riscv_v_min_vlen 64
 // CHECK-ZVE64X-EXT: __riscv_vector 1
-// CHECK-ZVE64X-EXT: __riscv_zve32x 10000
-// CHECK-ZVE64X-EXT: __riscv_zve64x 10000
+// CHECK-ZVE64X-EXT: __riscv_zve32x 10000{{$}}
+// CHECK-ZVE64X-EXT: __riscv_zve64x 10000{{$}}
 
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv64ifzve32f0p10 -x c -E -dM %s -o - \
@@ -332,8 +332,8 @@
 // CHECK-ZVE32F-EXT: __riscv_v_elen_fp 32
 // CHECK-ZVE32F-EXT: __riscv_v_min_vlen 32
 // CHECK-ZVE32F-EXT: __riscv_vector 1
-// CHECK-ZVE32F-EXT: __riscv_zve32f 10000
-// CHECK-ZVE32F-EXT: __riscv_zve32x 10000
+// CHECK-ZVE32F-EXT: __riscv_zve32f 10000{{$}}
+// CHECK-ZVE32F-EXT: __riscv_zve32x 10000{{$}}
 
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
 // RUN: -march=rv64izve32x0p10 -x c -E -dM %s -o - \
@@ -342,4 +342,4 @@
 // CHECK-ZVE32X-EXT: __riscv_v_elen_fp 0
 // CHECK-ZVE32X-EXT: __riscv_v_min_vlen 32
 // CHECK-ZVE32X-EXT: __riscv_vector 1
-// CHECK-ZVE32X-EXT: __riscv_zve32x 10000
+// CHECK-ZVE32X-EXT: __riscv_zve32x 10000{{$}}

From 587dccfb1238724c3365b12f24f7fc343d60974b Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 20 Jan 2022 12:29:12 -0500
Subject: [PATCH 050/946] [InstCombine] avoid 'tmp' usage in test files; NFC

The update script ( utils/update_test_checks.py ) warns against this
because it can conflict with the default FileCheck names given to
anonymous values in the IR.
---
 .../canonicalize-lshr-shl-to-masking.ll       | 216 ++++++++---------
 .../canonicalize-shl-lshr-to-masking.ll       | 218 +++++++++---------
 2 files changed, 217 insertions(+), 217 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
index 40bc4aaab21c2..c8ccd8f6549a5 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-lshr-shl-to-masking.ll
@@ -19,8 +19,8 @@ define i8 @positive_samevar(i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, %y
-  %ret = shl i8 %tmp0, %y
+  %t0 = lshr i8 %x, %y
+  %ret = shl i8 %t0, %y
   ret i8 %ret
 }
 
@@ -29,8 +29,8 @@ define i8 @positive_sameconst(i8 %x) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], -8
 ; CHECK-NEXT:    ret i8 [[TMP0]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl i8 %tmp0, 3
+  %t0 = lshr i8 %x, 3
+  %ret = shl i8 %t0, 3
   ret i8 %ret
 }
 
@@ -40,8 +40,8 @@ define i8 @positive_biggerlshr(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], 24
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 6
-  %ret = shl i8 %tmp0, 3
+  %t0 = lshr i8 %x, 6
+  %ret = shl i8 %t0, 3
   ret i8 %ret
 }
 
@@ -51,8 +51,8 @@ define i8 @positive_biggershl(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], -64
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl i8 %tmp0, 6
+  %t0 = lshr i8 %x, 3
+  %ret = shl i8 %t0, 6
   ret i8 %ret
 }
 
@@ -66,8 +66,8 @@ define i8 @positive_samevar_shlnuw(i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, %y
-  %ret = shl nuw i8 %tmp0, %y
+  %t0 = lshr i8 %x, %y
+  %ret = shl nuw i8 %t0, %y
   ret i8 %ret
 }
 
@@ -76,8 +76,8 @@ define i8 @positive_sameconst_shlnuw(i8 %x) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], -8
 ; CHECK-NEXT:    ret i8 [[TMP0]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl nuw i8 %tmp0, 3
+  %t0 = lshr i8 %x, 3
+  %ret = shl nuw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -87,8 +87,8 @@ define i8 @positive_biggerlshr_shlnuw(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], 24
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 6
-  %ret = shl nuw i8 %tmp0, 3
+  %t0 = lshr i8 %x, 6
+  %ret = shl nuw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -98,8 +98,8 @@ define i8 @positive_biggershl_shlnuw(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], -64
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl nuw i8 %tmp0, 6
+  %t0 = lshr i8 %x, 3
+  %ret = shl nuw i8 %t0, 6
   ret i8 %ret
 }
 
@@ -113,8 +113,8 @@ define i8 @positive_samevar_shlnsw(i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, %y
-  %ret = shl nsw i8 %tmp0, %y
+  %t0 = lshr i8 %x, %y
+  %ret = shl nsw i8 %t0, %y
   ret i8 %ret
 }
 
@@ -123,8 +123,8 @@ define i8 @positive_sameconst_shlnsw(i8 %x) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], -8
 ; CHECK-NEXT:    ret i8 [[TMP0]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl nsw i8 %tmp0, 3
+  %t0 = lshr i8 %x, 3
+  %ret = shl nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -134,8 +134,8 @@ define i8 @positive_biggerlshr_shlnsw(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], 24
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 6
-  %ret = shl nsw i8 %tmp0, 3
+  %t0 = lshr i8 %x, 6
+  %ret = shl nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -145,8 +145,8 @@ define i8 @positive_biggershl_shlnsw(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], -64
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl nsw i8 %tmp0, 6
+  %t0 = lshr i8 %x, 3
+  %ret = shl nsw i8 %t0, 6
   ret i8 %ret
 }
 
@@ -160,8 +160,8 @@ define i8 @positive_samevar_shlnuwnsw(i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, %y
-  %ret = shl nuw nsw i8 %tmp0, %y
+  %t0 = lshr i8 %x, %y
+  %ret = shl nuw nsw i8 %t0, %y
   ret i8 %ret
 }
 
@@ -170,8 +170,8 @@ define i8 @positive_sameconst_shlnuwnsw(i8 %x) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], -8
 ; CHECK-NEXT:    ret i8 [[TMP0]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl nuw nsw i8 %tmp0, 3
+  %t0 = lshr i8 %x, 3
+  %ret = shl nuw nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -181,8 +181,8 @@ define i8 @positive_biggerlshr_shlnuwnsw(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], 24
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 6
-  %ret = shl nuw nsw i8 %tmp0, 3
+  %t0 = lshr i8 %x, 6
+  %ret = shl nuw nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -192,8 +192,8 @@ define i8 @positive_biggershl_shlnuwnsw(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[TMP1]], -64
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  %ret = shl nuw nsw i8 %tmp0, 6
+  %t0 = lshr i8 %x, 3
+  %ret = shl nuw nsw i8 %t0, 6
   ret i8 %ret
 }
 
@@ -205,8 +205,8 @@ define i8 @positive_samevar_lshrexact(i8 %x, i8 %y) {
 ; CHECK-LABEL: @positive_samevar_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, %y
-  %ret = shl i8 %tmp0, %y
+  %t0 = lshr exact i8 %x, %y
+  %ret = shl i8 %t0, %y
   ret i8 %ret
 }
 
@@ -214,8 +214,8 @@ define i8 @positive_sameconst_lshrexact(i8 %x) {
 ; CHECK-LABEL: @positive_sameconst_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl i8 %t0, 3
   ret i8 %ret
 }
 
@@ -224,8 +224,8 @@ define i8 @positive_biggerlshr_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = lshr exact i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 6
-  %ret = shl i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 6
+  %ret = shl i8 %t0, 3
   ret i8 %ret
 }
 
@@ -234,8 +234,8 @@ define i8 @positive_biggershl_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl i8 %tmp0, 6
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl i8 %t0, 6
   ret i8 %ret
 }
 
@@ -247,8 +247,8 @@ define i8 @positive_samevar_shlnsw_lshrexact(i8 %x, i8 %y) {
 ; CHECK-LABEL: @positive_samevar_shlnsw_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, %y
-  %ret = shl nsw i8 %tmp0, %y
+  %t0 = lshr exact i8 %x, %y
+  %ret = shl nsw i8 %t0, %y
   ret i8 %ret
 }
 
@@ -256,8 +256,8 @@ define i8 @positive_sameconst_shlnsw_lshrexact(i8 %x) {
 ; CHECK-LABEL: @positive_sameconst_shlnsw_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl nsw i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -266,8 +266,8 @@ define i8 @positive_biggerlshr_shlnsw_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = lshr exact i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 6
-  %ret = shl nsw i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 6
+  %ret = shl nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -276,8 +276,8 @@ define i8 @positive_biggershl_shlnsw_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl nsw i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl nsw i8 %tmp0, 6
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl nsw i8 %t0, 6
   ret i8 %ret
 }
 
@@ -289,8 +289,8 @@ define i8 @positive_samevar_shlnuw_lshrexact(i8 %x, i8 %y) {
 ; CHECK-LABEL: @positive_samevar_shlnuw_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, %y
-  %ret = shl nuw i8 %tmp0, %y
+  %t0 = lshr exact i8 %x, %y
+  %ret = shl nuw i8 %t0, %y
   ret i8 %ret
 }
 
@@ -298,8 +298,8 @@ define i8 @positive_sameconst_shlnuw_lshrexact(i8 %x) {
 ; CHECK-LABEL: @positive_sameconst_shlnuw_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl nuw i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl nuw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -308,8 +308,8 @@ define i8 @positive_biggerlshr_shlnuw_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = lshr exact i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 6
-  %ret = shl nuw i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 6
+  %ret = shl nuw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -318,8 +318,8 @@ define i8 @positive_biggershl_shlnuw_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl nuw i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl nuw i8 %tmp0, 6
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl nuw i8 %t0, 6
   ret i8 %ret
 }
 
@@ -331,8 +331,8 @@ define i8 @positive_samevar_shlnuwnsw_lshrexact(i8 %x, i8 %y) {
 ; CHECK-LABEL: @positive_samevar_shlnuwnsw_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, %y
-  %ret = shl nuw nsw i8 %tmp0, %y
+  %t0 = lshr exact i8 %x, %y
+  %ret = shl nuw nsw i8 %t0, %y
   ret i8 %ret
 }
 
@@ -340,8 +340,8 @@ define i8 @positive_sameconst_shlnuwnsw_lshrexact(i8 %x) {
 ; CHECK-LABEL: @positive_sameconst_shlnuwnsw_lshrexact(
 ; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl nuw nsw i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl nuw nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -350,8 +350,8 @@ define i8 @positive_biggerlshr_shlnuwnsw_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = lshr exact i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 6
-  %ret = shl nuw nsw i8 %tmp0, 3
+  %t0 = lshr exact i8 %x, 6
+  %ret = shl nuw nsw i8 %t0, 3
   ret i8 %ret
 }
 
@@ -360,8 +360,8 @@ define i8 @positive_biggershl_shlnuwnsw_lshrexact(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl nuw nsw i8 [[X:%.*]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr exact i8 %x, 3
-  %ret = shl nuw nsw i8 %tmp0, 6
+  %t0 = lshr exact i8 %x, 3
+  %ret = shl nuw nsw i8 %t0, 6
   ret i8 %ret
 }
 
@@ -375,8 +375,8 @@ define <2 x i8> @positive_samevar_vec(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
-  %tmp0 = lshr <2 x i8> %x, %y
-  %ret = shl <2 x i8> %tmp0, %y
+  %t0 = lshr <2 x i8> %x, %y
+  %ret = shl <2 x i8> %t0, %y
   ret <2 x i8> %ret
 }
 
@@ -389,8 +389,8 @@ define <2 x i8> @positive_sameconst_vec(<2 x i8> %x) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i8> [[X:%.*]], <i8 -8, i8 -8>
 ; CHECK-NEXT:    ret <2 x i8> [[TMP0]]
 ;
-  %tmp0 = lshr <2 x i8> %x, <i8 3, i8 3>
-  %ret = shl <2 x i8> %tmp0, <i8 3, i8 3>
+  %t0 = lshr <2 x i8> %x, <i8 3, i8 3>
+  %ret = shl <2 x i8> %t0, <i8 3, i8 3>
   ret <2 x i8> %ret
 }
 
@@ -400,8 +400,8 @@ define <3 x i8> @positive_sameconst_vec_undef0(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 3, i8 3, i8 3>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, <i8 3, i8 undef, i8 3>
-  %ret = shl <3 x i8> %tmp0, <i8 3, i8 3, i8 3>
+  %t0 = lshr <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = shl <3 x i8> %t0, <i8 3, i8 3, i8 3>
   ret <3 x i8> %ret
 }
 
@@ -411,8 +411,8 @@ define <3 x i8> @positive_sameconst_vec_undef1(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 3, i8 undef, i8 3>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, <i8 3, i8 3, i8 3>
-  %ret = shl <3 x i8> %tmp0, <i8 3, i8 undef, i8 3>
+  %t0 = lshr <3 x i8> %x, <i8 3, i8 3, i8 3>
+  %ret = shl <3 x i8> %t0, <i8 3, i8 undef, i8 3>
   ret <3 x i8> %ret
 }
 
@@ -421,8 +421,8 @@ define <3 x i8> @positive_sameconst_vec_undef2(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <3 x i8> [[X:%.*]], <i8 -8, i8 poison, i8 -8>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, <i8 3, i8 undef, i8 3>
-  %ret = shl <3 x i8> %tmp0, <i8 3, i8 undef, i8 3>
+  %t0 = lshr <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = shl <3 x i8> %t0, <i8 3, i8 undef, i8 3>
   ret <3 x i8> %ret
 }
 
@@ -432,8 +432,8 @@ define <2 x i8> @positive_biggerlshr_vec(<2 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], <i8 24, i8 24>
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
-  %tmp0 = lshr <2 x i8> %x, <i8 6, i8 6>
-  %ret = shl <2 x i8> %tmp0, <i8 3, i8 3>
+  %t0 = lshr <2 x i8> %x, <i8 6, i8 6>
+  %ret = shl <2 x i8> %t0, <i8 3, i8 3>
   ret <2 x i8> %ret
 }
 
@@ -443,8 +443,8 @@ define <3 x i8> @positive_biggerlshr_vec_undef0(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 3, i8 3, i8 3>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, <i8 6, i8 undef, i8 6>
-  %ret = shl <3 x i8> %tmp0, <i8 3, i8 3, i8 3>
+  %t0 = lshr <3 x i8> %x, <i8 6, i8 undef, i8 6>
+  %ret = shl <3 x i8> %t0, <i8 3, i8 3, i8 3>
   ret <3 x i8> %ret
 }
 
@@ -454,8 +454,8 @@ define <3 x i8> @positive_biggerlshr_vec_undef1(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 3, i8 undef, i8 3>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, <i8 6, i8 6, i8 6>
-  %ret = shl <3 x i8> %tmp0, <i8 3, i8 undef, i8 3>
+  %t0 = lshr <3 x i8> %x, <i8 6, i8 6, i8 6>
+  %ret = shl <3 x i8> %t0, <i8 3, i8 undef, i8 3>
   ret <3 x i8> %ret
 }
 
@@ -465,8 +465,8 @@ define <3 x i8> @positive_biggerlshr_vec_undef2(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 3, i8 undef, i8 3>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, <i8 6, i8 undef, i8 6>
-  %ret = shl <3 x i8> %tmp0, <i8 3, i8 undef, i8 3>
+  %t0 = lshr <3 x i8> %x, <i8 6, i8 undef, i8 6>
+  %ret = shl <3 x i8> %t0, <i8 3, i8 undef, i8 3>
   ret <3 x i8> %ret
 }
 
@@ -476,8 +476,8 @@ define <2 x i8> @positive_biggershl_vec(<2 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i8> [[TMP1]], <i8 -64, i8 -64>
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
-  %tmp0 = lshr <2 x i8> %x, <i8 3, i8 3>
-  %ret = shl <2 x i8> %tmp0, <i8 6, i8 6>
+  %t0 = lshr <2 x i8> %x, <i8 3, i8 3>
+  %ret = shl <2 x i8> %t0, <i8 6, i8 6>
   ret <2 x i8> %ret
 }
 
@@ -487,8 +487,8 @@ define <3 x i8> @positive_biggershl_vec_undef0(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 6, i8 6, i8 6>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, <i8 3, i8 undef, i8 3>
-  %ret = shl <3 x i8> %tmp0, <i8 6, i8 6, i8 6>
+  %t0 = lshr <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = shl <3 x i8> %t0, <i8 6, i8 6, i8 6>
   ret <3 x i8> %ret
 }
 
@@ -498,8 +498,8 @@ define <3 x i8> @positive_biggershl_vec_undef1(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 6, i8 undef, i8 6>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, <i8 3, i8 3, i8 3>
-  %ret = shl <3 x i8> %tmp0, <i8 6, i8 undef, i8 6>
+  %t0 = lshr <3 x i8> %x, <i8 3, i8 3, i8 3>
+  %ret = shl <3 x i8> %t0, <i8 6, i8 undef, i8 6>
   ret <3 x i8> %ret
 }
 
@@ -509,8 +509,8 @@ define <3 x i8> @positive_biggershl_vec_undef2(<3 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <3 x i8> [[TMP0]], <i8 6, i8 undef, i8 6>
 ; CHECK-NEXT:    ret <3 x i8> [[RET]]
 ;
-  %tmp0 = lshr <3 x i8> %x, <i8 3, i8 undef, i8 3>
-  %ret = shl <3 x i8> %tmp0, <i8 6, i8 undef, i8 6>
+  %t0 = lshr <3 x i8> %x, <i8 3, i8 undef, i8 3>
+  %ret = shl <3 x i8> %t0, <i8 6, i8 undef, i8 6>
   ret <3 x i8> %ret
 }
 
@@ -525,9 +525,9 @@ define i8 @positive_sameconst_multiuse(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i8 [[X]], -8
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  call void @use32(i8 %tmp0)
-  %ret = shl i8 %tmp0, 3
+  %t0 = lshr i8 %x, 3
+  call void @use32(i8 %t0)
+  %ret = shl i8 %t0, 3
   ret i8 %ret
 }
 
@@ -538,9 +538,9 @@ define i8 @positive_biggerlshr_multiuse(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl nuw nsw i8 [[TMP0]], 3
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 6
-  call void @use32(i8 %tmp0)
-  %ret = shl i8 %tmp0, 3
+  %t0 = lshr i8 %x, 6
+  call void @use32(i8 %t0)
+  %ret = shl i8 %t0, 3
   ret i8 %ret
 }
 
@@ -551,9 +551,9 @@ define i8 @positive_biggershl_multiuse(i8 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl i8 [[TMP0]], 6
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, 3
-  call void @use32(i8 %tmp0)
-  %ret = shl i8 %tmp0, 6
+  %t0 = lshr i8 %x, 3
+  call void @use32(i8 %t0)
+  %ret = shl i8 %t0, 6
   ret i8 %ret
 }
 
@@ -567,8 +567,8 @@ define <2 x i8> @positive_biggerlshr_vec_nonsplat(<2 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i8> [[TMP0]], <i8 3, i8 6>
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
-  %tmp0 = lshr <2 x i8> %x, <i8 3, i8 3>
-  %ret = shl <2 x i8> %tmp0, <i8 3, i8 6>
+  %t0 = lshr <2 x i8> %x, <i8 3, i8 3>
+  %ret = shl <2 x i8> %t0, <i8 3, i8 6>
   ret <2 x i8> %ret
 }
 
@@ -578,8 +578,8 @@ define <2 x i8> @positive_biggerLlshr_vec_nonsplat(<2 x i8> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl <2 x i8> [[TMP0]], <i8 3, i8 3>
 ; CHECK-NEXT:    ret <2 x i8> [[RET]]
 ;
-  %tmp0 = lshr <2 x i8> %x, <i8 3, i8 6>
-  %ret = shl <2 x i8> %tmp0, <i8 3, i8 3>
+  %t0 = lshr <2 x i8> %x, <i8 3, i8 6>
+  %ret = shl <2 x i8> %t0, <i8 3, i8 3>
   ret <2 x i8> %ret
 }
 
@@ -593,8 +593,8 @@ define i8 @negative_twovars(i8 %x, i8 %y, i8 %z) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl i8 [[TMP0]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, %y
-  %ret = shl i8 %tmp0, %z ; $z, not %y
+  %t0 = lshr i8 %x, %y
+  %ret = shl i8 %t0, %z ; $z, not %y
   ret i8 %ret
 }
 
@@ -608,8 +608,8 @@ define i8 @negative_oneuse(i8 %x, i8 %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl i8 [[TMP0]], [[Y]]
 ; CHECK-NEXT:    ret i8 [[RET]]
 ;
-  %tmp0 = lshr i8 %x, %y
-  call void @use32(i8 %tmp0)
-  %ret = shl i8 %tmp0, %y
+  %t0 = lshr i8 %x, %y
+  call void @use32(i8 %t0)
+  %ret = shl i8 %t0, %y
   ret i8 %ret
 }
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
index 45aa22aa808f3..22952145d54a3 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
@@ -18,18 +18,18 @@ define i32 @positive_samevar(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, %y
-  %ret = lshr i32 %tmp0, %y
+  %t0 = shl i32 %x, %y
+  %ret = lshr i32 %t0, %y
   ret i32 %ret
 }
 
 define i32 @positive_sameconst(i32 %x) {
 ; CHECK-LABEL: @positive_sameconst(
-; CHECK-NEXT:    [[TMP0:%.*]] = and i32 [[X:%.*]], 134217727
-; CHECK-NEXT:    ret i32 [[TMP0]]
+; CHECK-NEXT:    [[T0:%.*]] = and i32 [[X:%.*]], 134217727
+; CHECK-NEXT:    ret i32 [[T0]]
 ;
-  %tmp0 = shl i32 %x, 5
-  %ret = lshr i32 %tmp0, 5
+  %t0 = shl i32 %x, 5
+  %ret = lshr i32 %t0, 5
   ret i32 %ret
 }
 
@@ -39,8 +39,8 @@ define i32 @positive_biggerShl(i32 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 134217696
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, 10
-  %ret = lshr i32 %tmp0, 5
+  %t0 = shl i32 %x, 10
+  %ret = lshr i32 %t0, 5
   ret i32 %ret
 }
 
@@ -50,8 +50,8 @@ define i32 @positive_biggerLshr(i32 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 4194303
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, 5
-  %ret = lshr i32 %tmp0, 10
+  %t0 = shl i32 %x, 5
+  %ret = lshr i32 %t0, 10
   ret i32 %ret
 }
 
@@ -61,8 +61,8 @@ define i32 @positive_biggerLshr_lshrexact(i32 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 4194303
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, 5
-  %ret = lshr exact i32 %tmp0, 10
+  %t0 = shl i32 %x, 5
+  %ret = lshr exact i32 %t0, 10
   ret i32 %ret
 }
 
@@ -74,8 +74,8 @@ define i32 @positive_samevar_shlnuw(i32 %x, i32 %y) {
 ; CHECK-LABEL: @positive_samevar_shlnuw(
 ; CHECK-NEXT:    ret i32 [[X:%.*]]
 ;
-  %tmp0 = shl nuw i32 %x, %y
-  %ret = lshr i32 %tmp0, %y ; this one is obviously 'exact'.
+  %t0 = shl nuw i32 %x, %y
+  %ret = lshr i32 %t0, %y ; this one is obviously 'exact'.
   ret i32 %ret
 }
 
@@ -83,8 +83,8 @@ define i32 @positive_sameconst_shlnuw(i32 %x) {
 ; CHECK-LABEL: @positive_sameconst_shlnuw(
 ; CHECK-NEXT:    ret i32 [[X:%.*]]
 ;
-  %tmp0 = shl nuw i32 %x, 5
-  %ret = lshr i32 %tmp0, 5 ; this one is obviously 'exact'.
+  %t0 = shl nuw i32 %x, 5
+  %ret = lshr i32 %t0, 5 ; this one is obviously 'exact'.
   ret i32 %ret
 }
 
@@ -93,8 +93,8 @@ define i32 @positive_biggerShl_shlnuw(i32 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = shl nuw i32 [[X:%.*]], 5
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl nuw i32 %x, 10
-  %ret = lshr i32 %tmp0, 5 ; this one is obviously 'exact'.
+  %t0 = shl nuw i32 %x, 10
+  %ret = lshr i32 %t0, 5 ; this one is obviously 'exact'.
   ret i32 %ret
 }
 
@@ -103,8 +103,8 @@ define i32 @positive_biggerLshr_shlnuw(i32 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[X:%.*]], 5
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl nuw i32 %x, 5
-  %ret = lshr i32 %tmp0, 10
+  %t0 = shl nuw i32 %x, 5
+  %ret = lshr i32 %t0, 10
   ret i32 %ret
 }
 
@@ -113,8 +113,8 @@ define i32 @positive_biggerLshr_shlnuw_lshrexact(i32 %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = lshr exact i32 [[X:%.*]], 5
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl nuw i32 %x, 5
-  %ret = lshr exact i32 %tmp0, 10
+  %t0 = shl nuw i32 %x, 5
+  %ret = lshr exact i32 %t0, 10
   ret i32 %ret
 }
 
@@ -128,8 +128,8 @@ define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
-  %tmp0 = shl <2 x i32> %x, %y
-  %ret = lshr <2 x i32> %tmp0, %y
+  %t0 = shl <2 x i32> %x, %y
+  %ret = lshr <2 x i32> %t0, %y
   ret <2 x i32> %ret
 }
 
@@ -139,33 +139,33 @@ define <2 x i32> @positive_samevar_vec(<2 x i32> %x, <2 x i32> %y) {
 
 define <2 x i32> @positive_sameconst_vec(<2 x i32> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec(
-; CHECK-NEXT:    [[TMP0:%.*]] = and <2 x i32> [[X:%.*]], <i32 134217727, i32 134217727>
-; CHECK-NEXT:    ret <2 x i32> [[TMP0]]
+; CHECK-NEXT:    [[T0:%.*]] = and <2 x i32> [[X:%.*]], <i32 134217727, i32 134217727>
+; CHECK-NEXT:    ret <2 x i32> [[T0]]
 ;
-  %tmp0 = shl <2 x i32> %x, <i32 5, i32 5>
-  %ret = lshr <2 x i32> %tmp0, <i32 5, i32 5>
+  %t0 = shl <2 x i32> %x, <i32 5, i32 5>
+  %ret = lshr <2 x i32> %t0, <i32 5, i32 5>
   ret <2 x i32> %ret
 }
 
 define <3 x i32> @positive_sameconst_vec_undef0(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec_undef0(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 5, i32 5, i32 5>
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
-  %ret = lshr <3 x i32> %tmp0, <i32 5, i32 5, i32 5>
+  %t0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = lshr <3 x i32> %t0, <i32 5, i32 5, i32 5>
   ret <3 x i32> %ret
 }
 
 define <3 x i32> @positive_sameconst_vec_undef1(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_sameconst_vec_undef1(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 5, i32 undef, i32 5>
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, <i32 5, i32 5, i32 5>
-  %ret = lshr <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  %t0 = shl <3 x i32> %x, <i32 5, i32 5, i32 5>
+  %ret = lshr <3 x i32> %t0, <i32 5, i32 undef, i32 5>
   ret <3 x i32> %ret
 }
 
@@ -174,8 +174,8 @@ define <3 x i32> @positive_sameconst_vec_undef2(<3 x i32> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <3 x i32> [[X:%.*]], <i32 134217727, i32 poison, i32 134217727>
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
-  %ret = lshr <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  %t0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = lshr <3 x i32> %t0, <i32 5, i32 undef, i32 5>
   ret <3 x i32> %ret
 }
 
@@ -185,41 +185,41 @@ define <2 x i32> @positive_biggerShl_vec(<2 x i32> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], <i32 134217696, i32 134217696>
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
-  %tmp0 = shl <2 x i32> %x, <i32 10, i32 10>
-  %ret = lshr <2 x i32> %tmp0, <i32 5, i32 5>
+  %t0 = shl <2 x i32> %x, <i32 10, i32 10>
+  %ret = lshr <2 x i32> %t0, <i32 5, i32 5>
   ret <2 x i32> %ret
 }
 
 define <3 x i32> @positive_biggerShl_vec_undef0(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerShl_vec_undef0(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 5, i32 5, i32 5>
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, <i32 10, i32 undef, i32 10>
-  %ret = lshr <3 x i32> %tmp0, <i32 5, i32 5, i32 5>
+  %t0 = shl <3 x i32> %x, <i32 10, i32 undef, i32 10>
+  %ret = lshr <3 x i32> %t0, <i32 5, i32 5, i32 5>
   ret <3 x i32> %ret
 }
 
 define <3 x i32> @positive_biggerShl_vec_undef1(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerShl_vec_undef1(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 10, i32 10, i32 10>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 5, i32 undef, i32 5>
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, <i32 10, i32 10, i32 10>
-  %ret = lshr <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  %t0 = shl <3 x i32> %x, <i32 10, i32 10, i32 10>
+  %ret = lshr <3 x i32> %t0, <i32 5, i32 undef, i32 5>
   ret <3 x i32> %ret
 }
 
 define <3 x i32> @positive_biggerShl_vec_undef2(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerShl_vec_undef2(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 5, i32 undef, i32 5>
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, <i32 10, i32 undef, i32 10>
-  %ret = lshr <3 x i32> %tmp0, <i32 5, i32 undef, i32 5>
+  %t0 = shl <3 x i32> %x, <i32 10, i32 undef, i32 10>
+  %ret = lshr <3 x i32> %t0, <i32 5, i32 undef, i32 5>
   ret <3 x i32> %ret
 }
 
@@ -229,41 +229,41 @@ define <2 x i32> @positive_biggerLshr_vec(<2 x i32> %x) {
 ; CHECK-NEXT:    [[RET:%.*]] = and <2 x i32> [[TMP1]], <i32 4194303, i32 4194303>
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
-  %tmp0 = shl <2 x i32> %x, <i32 5, i32 5>
-  %ret = lshr <2 x i32> %tmp0, <i32 10, i32 10>
+  %t0 = shl <2 x i32> %x, <i32 5, i32 5>
+  %ret = lshr <2 x i32> %t0, <i32 10, i32 10>
   ret <2 x i32> %ret
 }
 
 define <3 x i32> @positive_biggerLshr_vec_undef0(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerLshr_vec_undef0(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 10, i32 10, i32 10>
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 10, i32 10, i32 10>
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
-  %ret = lshr <3 x i32> %tmp0, <i32 10, i32 10, i32 10>
+  %t0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = lshr <3 x i32> %t0, <i32 10, i32 10, i32 10>
   ret <3 x i32> %ret
 }
 
 define <3 x i32> @positive_biggerLshr_vec_undef1(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerLshr_vec_undef1(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 10, i32 undef, i32 10>
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, <i32 5, i32 5, i32 5>
-  %ret = lshr <3 x i32> %tmp0, <i32 10, i32 undef, i32 10>
+  %t0 = shl <3 x i32> %x, <i32 5, i32 5, i32 5>
+  %ret = lshr <3 x i32> %t0, <i32 10, i32 undef, i32 10>
   ret <3 x i32> %ret
 }
 
 define <3 x i32> @positive_biggerLshr_vec_undef2(<3 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerLshr_vec_undef2(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[TMP0]], <i32 10, i32 undef, i32 10>
+; CHECK-NEXT:    [[T0:%.*]] = shl <3 x i32> [[X:%.*]], <i32 5, i32 undef, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <3 x i32> [[T0]], <i32 10, i32 undef, i32 10>
 ; CHECK-NEXT:    ret <3 x i32> [[RET]]
 ;
-  %tmp0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
-  %ret = lshr <3 x i32> %tmp0, <i32 10, i32 undef, i32 10>
+  %t0 = shl <3 x i32> %x, <i32 5, i32 undef, i32 5>
+  %ret = lshr <3 x i32> %t0, <i32 10, i32 undef, i32 10>
   ret <3 x i32> %ret
 }
 
@@ -273,70 +273,70 @@ define <3 x i32> @positive_biggerLshr_vec_undef2(<3 x i32> %x) {
 
 define i32 @positive_sameconst_multiuse(i32 %x) {
 ; CHECK-LABEL: @positive_sameconst_multiuse(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], 5
-; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[T0]])
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[X]], 134217727
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, 5
-  call void @use32(i32 %tmp0)
-  %ret = lshr i32 %tmp0, 5
+  %t0 = shl i32 %x, 5
+  call void @use32(i32 %t0)
+  %ret = lshr i32 %t0, 5
   ret i32 %ret
 }
 
 define i32 @positive_biggerShl_shlnuw_multiuse(i32 %x) {
 ; CHECK-LABEL: @positive_biggerShl_shlnuw_multiuse(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw i32 [[X:%.*]], 10
-; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw i32 [[X:%.*]], 10
+; CHECK-NEXT:    call void @use32(i32 [[T0]])
 ; CHECK-NEXT:    [[RET:%.*]] = shl nuw i32 [[X]], 5
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl nuw i32 %x, 10
-  call void @use32(i32 %tmp0)
-  %ret = lshr i32 %tmp0, 5
+  %t0 = shl nuw i32 %x, 10
+  call void @use32(i32 %t0)
+  %ret = lshr i32 %t0, 5
   ret i32 %ret
 }
 
 define i32 @positive_biggerLshr_shlnuw_multiuse(i32 %x) {
 ; CHECK-LABEL: @positive_biggerLshr_shlnuw_multiuse(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw i32 [[X:%.*]], 5
-; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[T0:%.*]] = shl nuw i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[T0]])
 ; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[X]], 5
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl nuw i32 %x, 5
-  call void @use32(i32 %tmp0)
-  %ret = lshr i32 %tmp0, 10
+  %t0 = shl nuw i32 %x, 5
+  call void @use32(i32 %t0)
+  %ret = lshr i32 %t0, 10
   ret i32 %ret
 }
 
 ; NOTE: creates one extra instruction, but this seems intentional.
 define i32 @positive_biggerShl_multiuse_extrainstr(i32 %x) {
 ; CHECK-LABEL: @positive_biggerShl_multiuse_extrainstr(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], 10
-; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], 10
+; CHECK-NEXT:    call void @use32(i32 [[T0]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X]], 5
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 134217696
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, 10
-  call void @use32(i32 %tmp0)
-  %ret = lshr i32 %tmp0, 5
+  %t0 = shl i32 %x, 10
+  call void @use32(i32 %t0)
+  %ret = lshr i32 %t0, 5
   ret i32 %ret
 }
 
 ; NOTE: creates one extra instruction, but this seems intentional.
 define i32 @positive_biggerLshr_multiuse_extrainstr(i32 %x) {
 ; CHECK-LABEL: @positive_biggerLshr_multiuse_extrainstr(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], 5
-; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
+; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], 5
+; CHECK-NEXT:    call void @use32(i32 [[T0]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X]], 5
 ; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 4194303
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, 5
-  call void @use32(i32 %tmp0)
-  %ret = lshr i32 %tmp0, 10
+  %t0 = shl i32 %x, 5
+  call void @use32(i32 %t0)
+  %ret = lshr i32 %t0, 10
   ret i32 %ret
 }
 
@@ -346,23 +346,23 @@ define i32 @positive_biggerLshr_multiuse_extrainstr(i32 %x) {
 
 define <2 x i32> @positive_biggerShl_vec_nonsplat(<2 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerShl_vec_nonsplat(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 5>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <2 x i32> [[TMP0]], <i32 5, i32 10>
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 5>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <2 x i32> [[T0]], <i32 5, i32 10>
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
-  %tmp0 = shl <2 x i32> %x, <i32 5, i32 5>
-  %ret = lshr <2 x i32> %tmp0, <i32 5, i32 10>
+  %t0 = shl <2 x i32> %x, <i32 5, i32 5>
+  %ret = lshr <2 x i32> %t0, <i32 5, i32 10>
   ret <2 x i32> %ret
 }
 
 define <2 x i32> @positive_biggerLshl_vec_nonsplat(<2 x i32> %x) {
 ; CHECK-LABEL: @positive_biggerLshl_vec_nonsplat(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 10>
-; CHECK-NEXT:    [[RET:%.*]] = lshr <2 x i32> [[TMP0]], <i32 5, i32 5>
+; CHECK-NEXT:    [[T0:%.*]] = shl <2 x i32> [[X:%.*]], <i32 5, i32 10>
+; CHECK-NEXT:    [[RET:%.*]] = lshr <2 x i32> [[T0]], <i32 5, i32 5>
 ; CHECK-NEXT:    ret <2 x i32> [[RET]]
 ;
-  %tmp0 = shl <2 x i32> %x, <i32 5, i32 10>
-  %ret = lshr <2 x i32> %tmp0, <i32 5, i32 5>
+  %t0 = shl <2 x i32> %x, <i32 5, i32 10>
+  %ret = lshr <2 x i32> %t0, <i32 5, i32 5>
   ret <2 x i32> %ret
 }
 
@@ -372,12 +372,12 @@ define <2 x i32> @positive_biggerLshl_vec_nonsplat(<2 x i32> %x) {
 
 define i32 @negative_twovars(i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @negative_twovars(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[TMP0]], [[Z:%.*]]
+; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[T0]], [[Z:%.*]]
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, %y
-  %ret = lshr i32 %tmp0, %z ; $z, not %y
+  %t0 = shl i32 %x, %y
+  %ret = lshr i32 %t0, %z ; $z, not %y
   ret i32 %ret
 }
 
@@ -386,13 +386,13 @@ declare void @use32(i32)
 ; One use only.
 define i32 @negative_oneuse(i32 %x, i32 %y) {
 ; CHECK-LABEL: @negative_oneuse(
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    call void @use32(i32 [[TMP0]])
-; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[TMP0]], [[Y]]
+; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    call void @use32(i32 [[T0]])
+; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[T0]], [[Y]]
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
-  %tmp0 = shl i32 %x, %y
-  call void @use32(i32 %tmp0)
-  %ret = lshr i32 %tmp0, %y
+  %t0 = shl i32 %x, %y
+  call void @use32(i32 %t0)
+  %ret = lshr i32 %t0, %y
   ret i32 %ret
 }

From 2d031ec5e53f4e28ea5cc02b4cfdead98a9c0007 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 20 Jan 2022 13:35:58 -0500
Subject: [PATCH 051/946] [InstCombine] add one-use check to opposite shift
 folds

Test comments say this might be intentional, but I don't
see any hard evidence to support it. The extra instruction
shows up as a potential regression in D117680.

One test does show a missed fold that might be recovered
with better demanded bits analysis.
---
 .../InstCombine/InstCombineShifts.cpp         | 30 +++++++++++--------
 .../canonicalize-shl-lshr-to-masking.ll       | 12 ++++----
 llvm/test/Transforms/InstCombine/shift.ll     |  2 +-
 3 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 0ade25f768253..9acad19df9df5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -1032,12 +1032,13 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
           NewLShr->setIsExact(I.isExact());
           return NewLShr;
         }
-        // (X << C1) >>u C  --> (X >>u (C - C1)) & (-1 >> C)
-        Value *NewLShr = Builder.CreateLShr(X, ShiftDiff, "", I.isExact());
-        APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
-        return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask));
-      }
-      if (C1->ugt(ShAmtC)) {
+        if (Op0->hasOneUse()) {
+          // (X << C1) >>u C  --> (X >>u (C - C1)) & (-1 >> C)
+          Value *NewLShr = Builder.CreateLShr(X, ShiftDiff, "", I.isExact());
+          APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
+          return BinaryOperator::CreateAnd(NewLShr, ConstantInt::get(Ty, Mask));
+        }
+      } else if (C1->ugt(ShAmtC)) {
         unsigned ShlAmtC = C1->getZExtValue();
         Constant *ShiftDiff = ConstantInt::get(Ty, ShlAmtC - ShAmtC);
         if (cast<BinaryOperator>(Op0)->hasNoUnsignedWrap()) {
@@ -1046,15 +1047,18 @@ Instruction *InstCombinerImpl::visitLShr(BinaryOperator &I) {
           NewShl->setHasNoUnsignedWrap(true);
           return NewShl;
         }
-        // (X << C1) >>u C  --> X << (C1 - C) & (-1 >> C)
-        Value *NewShl = Builder.CreateShl(X, ShiftDiff);
+        if (Op0->hasOneUse()) {
+          // (X << C1) >>u C  --> X << (C1 - C) & (-1 >> C)
+          Value *NewShl = Builder.CreateShl(X, ShiftDiff);
+          APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
+          return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
+        }
+      } else {
+        assert(*C1 == ShAmtC);
+        // (X << C) >>u C --> X & (-1 >>u C)
         APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
-        return BinaryOperator::CreateAnd(NewShl, ConstantInt::get(Ty, Mask));
+        return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
       }
-      assert(*C1 == ShAmtC);
-      // (X << C) >>u C --> X & (-1 >>u C)
-      APInt Mask(APInt::getLowBitsSet(BitWidth, BitWidth - ShAmtC));
-      return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, Mask));
     }
 
     // ((X << C) + Y) >>u C --> (X + (Y >>u C)) & (-1 >>u C)
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
index 22952145d54a3..265570dfe6d56 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-shl-lshr-to-masking.ll
@@ -310,13 +310,13 @@ define i32 @positive_biggerLshr_shlnuw_multiuse(i32 %x) {
   ret i32 %ret
 }
 
-; NOTE: creates one extra instruction, but this seems intentional.
+; negative test - don't create extra instructions
+
 define i32 @positive_biggerShl_multiuse_extrainstr(i32 %x) {
 ; CHECK-LABEL: @positive_biggerShl_multiuse_extrainstr(
 ; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], 10
 ; CHECK-NEXT:    call void @use32(i32 [[T0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[X]], 5
-; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 134217696
+; CHECK-NEXT:    [[RET:%.*]] = lshr exact i32 [[T0]], 5
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
   %t0 = shl i32 %x, 10
@@ -325,13 +325,13 @@ define i32 @positive_biggerShl_multiuse_extrainstr(i32 %x) {
   ret i32 %ret
 }
 
-; NOTE: creates one extra instruction, but this seems intentional.
+; negative test - don't create extra instructions
+
 define i32 @positive_biggerLshr_multiuse_extrainstr(i32 %x) {
 ; CHECK-LABEL: @positive_biggerLshr_multiuse_extrainstr(
 ; CHECK-NEXT:    [[T0:%.*]] = shl i32 [[X:%.*]], 5
 ; CHECK-NEXT:    call void @use32(i32 [[T0]])
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X]], 5
-; CHECK-NEXT:    [[RET:%.*]] = and i32 [[TMP1]], 4194303
+; CHECK-NEXT:    [[RET:%.*]] = lshr i32 [[T0]], 10
 ; CHECK-NEXT:    ret i32 [[RET]]
 ;
   %t0 = shl i32 %x, 5
diff --git a/llvm/test/Transforms/InstCombine/shift.ll b/llvm/test/Transforms/InstCombine/shift.ll
index 11d9be073b025..d165c687849ce 100644
--- a/llvm/test/Transforms/InstCombine/shift.ll
+++ b/llvm/test/Transforms/InstCombine/shift.ll
@@ -650,7 +650,7 @@ define i8 @test39(i32 %a0) {
 ; CHECK-NEXT:    [[I49:%.*]] = shl i8 [[I4]], 6
 ; CHECK-NEXT:    [[I50:%.*]] = and i8 [[I49]], 64
 ; CHECK-NEXT:    [[I51:%.*]] = xor i8 [[I50]], [[I5]]
-; CHECK-NEXT:    [[TMP0:%.*]] = shl i8 [[I4]], 2
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr exact i8 [[I5]], 3
 ; CHECK-NEXT:    [[I54:%.*]] = and i8 [[TMP0]], 16
 ; CHECK-NEXT:    [[I551:%.*]] = or i8 [[I54]], [[I51]]
 ; CHECK-NEXT:    ret i8 [[I551]]

From c65186c89f35b7b599c41183def666a2bde62ddd Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Thu, 20 Jan 2022 19:36:16 +0100
Subject: [PATCH 052/946] [clang] Improve -Wdeclaration-after-statement

With 118f966b46cf, Clang matches GCC's behaviour and allows enabling
-Wdeclaration-after-statement with C99 and later.

However, the check for mixing declarations and code is not a constant time
algorithm, and therefore should be guarded with Diags.isIgnored().

Furthermore, improve test coverage with: non-pedantic C89 with the
warning; C11 with the warning; and when using -Wall.

Finally, mention the changed behaviour in ReleaseNotes.rst.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D117232
---
 clang/docs/ReleaseNotes.rst        |  5 +++++
 clang/lib/Sema/SemaStmt.cpp        |  8 +++++---
 clang/test/Sema/warn-mixed-decls.c | 10 ++++++++++
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c787d355a3148..2eec63901932e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -58,6 +58,11 @@ Improvements to Clang's diagnostics
   release being diagnosed against). These new groups are automatically implied
   when passing ``-Wc++N-extensions``. Resolves PR33518.
 
+- Support ``-Wdeclaration-after-statement`` with C99 and later standards, and
+  not just C89, matching GCC's behaviour. A notable usecase is supporting style
+  guides that forbid mixing declarations and code, but want to move to newer C
+  standards.
+
 Non-comprehensive list of changes in this release
 -------------------------------------------------
 
diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index ef498f9a52282..746eb82a5bdc7 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -413,7 +413,10 @@ StmtResult Sema::ActOnCompoundStmt(SourceLocation L, SourceLocation R,
   // If we're in C mode, check that we don't have any decls after stmts.  If
   // so, emit an extension diagnostic in C89 and potentially a warning in later
   // versions.
-  if (!getLangOpts().CPlusPlus) {
+  const unsigned MixedDeclsCodeID = getLangOpts().C99
+                                        ? diag::warn_mixed_decls_code
+                                        : diag::ext_mixed_decls_code;
+  if (!getLangOpts().CPlusPlus && !Diags.isIgnored(MixedDeclsCodeID, L)) {
     // Note that __extension__ can be around a decl.
     unsigned i = 0;
     // Skip over all declarations.
@@ -426,8 +429,7 @@ StmtResult Sema::ActOnCompoundStmt(SourceLocation L, SourceLocation R,
 
     if (i != NumElts) {
       Decl *D = *cast<DeclStmt>(Elts[i])->decl_begin();
-      Diag(D->getLocation(), !getLangOpts().C99 ? diag::ext_mixed_decls_code
-                                                : diag::warn_mixed_decls_code);
+      Diag(D->getLocation(), MixedDeclsCodeID);
     }
   }
 
diff --git a/clang/test/Sema/warn-mixed-decls.c b/clang/test/Sema/warn-mixed-decls.c
index 219d64472b589..b8a7dc1e2bc09 100644
--- a/clang/test/Sema/warn-mixed-decls.c
+++ b/clang/test/Sema/warn-mixed-decls.c
@@ -1,13 +1,23 @@
 /* RUN: %clang_cc1 -fsyntax-only -verify -std=c89 -pedantic %s
  */
+/* RUN: %clang_cc1 -fsyntax-only -verify -std=c89 -Wdeclaration-after-statement %s
+ */
 /* RUN: %clang_cc1 -fsyntax-only -verify -std=c99 -Wdeclaration-after-statement %s
  */
+/* RUN: %clang_cc1 -fsyntax-only -verify -std=c11 -Wdeclaration-after-statement %s
+ */
 
 /* Should not emit diagnostic when not pedantic, not enabled or in C++ Code*/
 /* RUN: %clang_cc1 -fsyntax-only -verify=none -std=c89 %s
  */
 /* RUN: %clang_cc1 -fsyntax-only -verify=none -std=c99 %s
  */
+/* RUN: %clang_cc1 -fsyntax-only -verify=none -std=c89 -Wall %s
+ */
+/* RUN: %clang_cc1 -fsyntax-only -verify=none -std=c99 -Wall -pedantic %s
+ */
+/* RUN: %clang_cc1 -fsyntax-only -verify=none -std=c11 -Wall -pedantic %s
+ */
 /* RUN: %clang_cc1 -fsyntax-only -verify=none -x c++ %s
  */
 /* RUN: %clang_cc1 -fsyntax-only -verify=none -x c++ -Wdeclaration-after-statement %s

From df31ff1b29bc4c2308ec5df8a7ff0ec2ab0942d4 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Wed, 19 Jan 2022 06:45:07 +0000
Subject: [PATCH 053/946] [cmake] Make include(GNUInstallDirs) always below
 project(..)

Its defaulting logic must go after `project(..)` to work correctly,  but `project(..)` is often in a standalone condition making this
awkward, since the rest of the condition code may also need GNUInstallDirs.

The good thing is there are the various standalone booleans, which I had missed before. This makes splitting the conditional blocks less awkward.

Reviewed By: arichardson, phosek, beanz, ldionne, #libunwind, #libc, #libc_abi

Differential Revision: https://reviews.llvm.org/D117639
---
 flang/CMakeLists.txt                    | 10 +++++++---
 libcxx/CMakeLists.txt                   | 11 ++++++-----
 libcxxabi/CMakeLists.txt                | 12 +++++++-----
 libunwind/CMakeLists.txt                | 19 ++++++++++++-------
 lld/CMakeLists.txt                      | 10 ++++++----
 lldb/CMakeLists.txt                     |  9 +++++++--
 lldb/cmake/modules/LLDBStandalone.cmake |  2 --
 lldb/tools/debugserver/CMakeLists.txt   |  5 +++++
 llvm/CMakeLists.txt                     |  5 +++--
 mlir/CMakeLists.txt                     |  9 +++++++--
 polly/CMakeLists.txt                    |  8 ++++++--
 pstl/CMakeLists.txt                     |  5 +++--
 12 files changed, 69 insertions(+), 36 deletions(-)

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index abb9a47d3abb4..5caa79e8da477 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -7,8 +7,6 @@ set(CMAKE_CXX_STANDARD 17)
 set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
 set(CMAKE_CXX_EXTENSIONS OFF)
 
-include(GNUInstallDirs)
-
 set(FLANG_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
 if (CMAKE_SOURCE_DIR STREQUAL CMAKE_BINARY_DIR AND NOT MSVC_IDE)
@@ -27,7 +25,14 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   message("Building Flang as a standalone project.")
   project(Flang)
   set(FLANG_STANDALONE_BUILD ON)
+else()
+  set(FLANG_STANDALONE_BUILD OFF)
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
 
+if (FLANG_STANDALONE_BUILD)
   set(FLANG_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
   if (NOT MSVC_IDE)
     set(LLVM_ENABLE_ASSERTIONS ${ENABLE_ASSERTIONS}
@@ -179,7 +184,6 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   endif()
 
 else()
-  set(FLANG_STANDALONE_BUILD OFF)
   option(FLANG_INCLUDE_TESTS
          "Generate build targets for the Flang unit tests."
          ${LLVM_INCLUDE_TESTS})
diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index b44b16088effe..77df59e4cd755 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -10,8 +10,6 @@ endif()
 #===============================================================================
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake")
 
 # Add path for custom modules
@@ -39,14 +37,17 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUIL
   # In a standalone build, we don't have llvm to automatically generate the
   # llvm-lit script for us.  So we need to provide an explicit directory that
   # the configurator should write the script into.
-  set(LIBCXX_STANDALONE_BUILD 1)
+  set(LIBCXX_STANDALONE_BUILD TRUE)
   set(LLVM_LIT_OUTPUT_DIR "${LIBCXX_BINARY_DIR}/bin")
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
 
+if (LIBCXX_STANDALONE_BUILD)
   # Find the LLVM sources and simulate LLVM CMake options.
   include(HandleOutOfTreeLLVM)
-endif()
 
-if (LIBCXX_STANDALONE_BUILD)
   find_package(Python3 COMPONENTS Interpreter)
   if(NOT Python3_Interpreter_FOUND)
     message(SEND_ERROR "Python3 not found. Python3 is required")
diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt
index 78f486418af79..ecbc7091864e7 100644
--- a/libcxxabi/CMakeLists.txt
+++ b/libcxxabi/CMakeLists.txt
@@ -10,8 +10,6 @@ endif()
 
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake")
 
 # Add path for custom modules
@@ -37,17 +35,21 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXXABI_STANDALONE_B
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
 
+  set(LIBCXXABI_STANDALONE_BUILD TRUE)
+
   # In a standalone build, we don't have llvm to automatically generate the
   # llvm-lit script for us.  So we need to provide an explicit directory that
   # the configurator should write the script into.
-  set(LIBCXXABI_STANDALONE_BUILD 1)
   set(LLVM_LIT_OUTPUT_DIR "${LIBCXXABI_BINARY_DIR}/bin")
+endif()
 
+# Must go below project(..)
+include(GNUInstallDirs)
+
+if (LIBCXXABI_STANDALONE_BUILD)
   # Find the LLVM sources and simulate LLVM CMake options.
   include(HandleOutOfTreeLLVM)
-endif()
 
-if (LIBCXXABI_STANDALONE_BUILD)
   find_package(Python3 COMPONENTS Interpreter)
   if(NOT Python3_Interpreter_FOUND)
     message(WARNING "Python3 not found, using python2 as a fallback")
diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index 03bd316d331cb..04550ae51a422 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -8,8 +8,6 @@ endif()
 
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake")
 
 # Add path for custom modules
@@ -30,21 +28,28 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_B
   # linking.
   include(EnableLanguageNolink)
   project(libunwind LANGUAGES NONE)
-  llvm_enable_language_nolink(C CXX ASM)
 
   set(PACKAGE_NAME libunwind)
   set(PACKAGE_VERSION 14.0.0git)
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
 
+  set(LIBUNWIND_STANDALONE_BUILD TRUE)
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
+
+if(LIBUNWIND_STANDALONE_BUILD)
+  llvm_enable_language_nolink(C CXX ASM)
+
+  # Find the LLVM sources and simulate LLVM CMake options.
+  include(HandleOutOfTreeLLVM)
+
   # In a standalone build, we don't have llvm to automatically generate the
   # llvm-lit script for us.  So we need to provide an explicit directory that
   # the configurator should write the script into.
-  set(LIBUNWIND_STANDALONE_BUILD 1)
   set(LLVM_LIT_OUTPUT_DIR "${LIBUNWIND_BINARY_DIR}/bin")
-
-  # Find the LLVM sources and simulate LLVM CMake options.
-  include(HandleOutOfTreeLLVM)
 else()
   set(LLVM_LIT "${CMAKE_SOURCE_DIR}/utils/lit/lit.py")
 endif()
diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt
index e1a29b884d178..1a53c5bf47a8d 100644
--- a/lld/CMakeLists.txt
+++ b/lld/CMakeLists.txt
@@ -1,12 +1,16 @@
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 # If we are not building as a part of LLVM, build LLD as an
 # standalone project, using LLVM as an external library:
 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   project(lld)
+  set(LLD_BUILT_STANDALONE TRUE)
+endif()
 
+# Must go below project(..)
+include(GNUInstallDirs)
+
+if(LLD_BUILT_STANDALONE)
   set(CMAKE_INCLUDE_CURRENT_DIR ON)
 
   # Rely on llvm-config.
@@ -140,8 +144,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   if(LLVM_HAVE_LIBXAR)
     set(XAR_LIB xar)
   endif()
-
-  set(LLD_BUILT_STANDALONE TRUE)
 endif() # standalone
 
 set(LLD_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt
index 79d451965ed43..edb4c513a64d2 100644
--- a/lldb/CMakeLists.txt
+++ b/lldb/CMakeLists.txt
@@ -1,7 +1,5 @@
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 # Add path for custom modules.
 set(CMAKE_MODULE_PATH
   ${CMAKE_MODULE_PATH}
@@ -13,6 +11,13 @@ set(CMAKE_MODULE_PATH
 # using LLVM as an external library.
 if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   project(lldb)
+  set(LLDB_BUILT_STANDALONE TRUE)
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
+
+if(LLDB_BUILT_STANDALONE)
   include(LLDBStandalone)
 
   set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ standard to conform to")
diff --git a/lldb/cmake/modules/LLDBStandalone.cmake b/lldb/cmake/modules/LLDBStandalone.cmake
index 5be9e57f23bfc..1d3d1bbcc25da 100644
--- a/lldb/cmake/modules/LLDBStandalone.cmake
+++ b/lldb/cmake/modules/LLDBStandalone.cmake
@@ -108,5 +108,3 @@ include_directories(
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX})
-
-set(LLDB_BUILT_STANDALONE 1)
diff --git a/lldb/tools/debugserver/CMakeLists.txt b/lldb/tools/debugserver/CMakeLists.txt
index eba5c41491329..3a585a2e3d48d 100644
--- a/lldb/tools/debugserver/CMakeLists.txt
+++ b/lldb/tools/debugserver/CMakeLists.txt
@@ -2,7 +2,12 @@ cmake_minimum_required(VERSION 3.13.4)
 
 project(Debugserver LANGUAGES C CXX ASM-ATT)
 
+# Must go below project(..)
+include(GNUInstallDirs)
+
 if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
+  set(LLDB_BUILT_STANDALONE TRUE)
+
   set(CMAKE_MODULE_PATH
     ${CMAKE_MODULE_PATH}
     "${CMAKE_SOURCE_DIR}/../../cmake"
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 687bc6489b4ac..76c918306b225 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -2,8 +2,6 @@
 
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 # CMP0116: Ninja generators transform `DEPFILE`s from `add_custom_command()`
 # New in CMake 3.20. https://cmake.org/cmake/help/latest/policy/CMP0116.html
 if(POLICY CMP0116)
@@ -47,6 +45,9 @@ project(LLVM
   VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}
   LANGUAGES C CXX ASM)
 
+# Must go after project(..)
+include(GNUInstallDirs)
+
 set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ standard to conform to")
 set(CMAKE_CXX_STANDARD_REQUIRED YES)
 if (CYGWIN)
diff --git a/mlir/CMakeLists.txt b/mlir/CMakeLists.txt
index 63f69ed96d3f5..3612a6ccd0533 100644
--- a/mlir/CMakeLists.txt
+++ b/mlir/CMakeLists.txt
@@ -1,10 +1,15 @@
 # MLIR project.
 
-include(GNUInstallDirs)
-
 # Check if MLIR is built as a standalone project.
 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   project(mlir)
+  set(MLIR_STANDALONE_BUILD TRUE)
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
+
+if(MLIR_STANDALONE_BUILD)
   cmake_minimum_required(VERSION 3.13.4)
 
   find_package(LLVM CONFIG REQUIRED)
diff --git a/polly/CMakeLists.txt b/polly/CMakeLists.txt
index d6bcc8f7f285e..6a6e78b06bb2d 100644
--- a/polly/CMakeLists.txt
+++ b/polly/CMakeLists.txt
@@ -1,10 +1,14 @@
-include(GNUInstallDirs)
-
 # Check if this is a in tree build.
 if (NOT DEFINED LLVM_MAIN_SRC_DIR)
   project(Polly)
   cmake_minimum_required(VERSION 3.13.4)
+  set(POLLY_STANDALONE_BUILD TRUE)
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
 
+if(POLLY_STANDALONE_BUILD)
   # Where is LLVM installed?
   find_package(LLVM CONFIG REQUIRED)
   set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${LLVM_CMAKE_DIR})
diff --git a/pstl/CMakeLists.txt b/pstl/CMakeLists.txt
index 8784eb07b5742..2461522349ee0 100644
--- a/pstl/CMakeLists.txt
+++ b/pstl/CMakeLists.txt
@@ -7,8 +7,6 @@
 #===----------------------------------------------------------------------===##
 cmake_minimum_required(VERSION 3.13.4)
 
-include(GNUInstallDirs)
-
 set(PARALLELSTL_VERSION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/include/pstl/internal/pstl_config.h")
 file(STRINGS "${PARALLELSTL_VERSION_FILE}" PARALLELSTL_VERSION_SOURCE REGEX "#define _PSTL_VERSION .*$")
 string(REGEX REPLACE "#define _PSTL_VERSION (.*)$" "\\1" PARALLELSTL_VERSION_SOURCE "${PARALLELSTL_VERSION_SOURCE}")
@@ -18,6 +16,9 @@ math(EXPR VERSION_PATCH "(${PARALLELSTL_VERSION_SOURCE} % 10)")
 
 project(ParallelSTL VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH} LANGUAGES CXX)
 
+# Must go below project(..)
+include(GNUInstallDirs)
+
 set(PSTL_PARALLEL_BACKEND "serial" CACHE STRING "Threading backend to use. Valid choices are 'serial', 'omp', and 'tbb'. The default is 'serial'.")
 set(PSTL_HIDE_FROM_ABI_PER_TU OFF CACHE BOOL "Whether to constrain ABI-unstable symbols to each translation unit (basically, mark them with C's static keyword).")
 set(_PSTL_HIDE_FROM_ABI_PER_TU ${PSTL_HIDE_FROM_ABI_PER_TU}) # For __pstl_config_site

From 792853cb786b360fad6dedb9066b76ecd958cb93 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulsson@linux.vnet.ibm.com>
Date: Tue, 18 Jan 2022 17:40:26 -0600
Subject: [PATCH 054/946] [SystemZ] Remove the ManipulatesSP flag from backend
 (NFC).

This flag was set in the presence of stacksave/stackrestore in order to force
a frame pointer.

This should however not be needed per the comment in MachineFrameInfo.h
stating that a a variable sized object "...is the sole condition which
prevents frame pointer elimination", and experiments have also shown that
there seems to be no effect whatsoever on code generation with ManipulatesSP.

Review: Ulrich Weigand
---
 llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp     | 3 +--
 llvm/lib/Target/SystemZ/SystemZISelLowering.cpp      | 4 ----
 llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h | 9 +--------
 3 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index 1adc8994e0f4d..ccc7d0737f531 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -764,8 +764,7 @@ void SystemZELFFrameLowering::inlineStackProbe(
 
 bool SystemZELFFrameLowering::hasFP(const MachineFunction &MF) const {
   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
-          MF.getFrameInfo().hasVarSizedObjects() ||
-          MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP());
+          MF.getFrameInfo().hasVarSizedObjects());
 }
 
 StackOffset SystemZELFFrameLowering::getFrameIndexReference(
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index e1549f3012f95..881346bbe47ed 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -318,8 +318,6 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
   setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
 
-  // Use custom expanders so that we can force the function to use
-  // a frame pointer.
   setOperationAction(ISD::STACKSAVE,    MVT::Other, Custom);
   setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
 
@@ -4194,7 +4192,6 @@ SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
   auto *Regs = Subtarget->getSpecialRegisters();
-  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
     report_fatal_error("Variable-sized stack allocations are not supported "
                        "in GHC calling convention");
@@ -4207,7 +4204,6 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
   MachineFunction &MF = DAG.getMachineFunction();
   const SystemZSubtarget *Subtarget = &MF.getSubtarget<SystemZSubtarget>();
   auto *Regs = Subtarget->getSpecialRegisters();
-  MF.getInfo<SystemZMachineFunctionInfo>()->setManipulatesSP(true);
   bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
 
   if (MF.getFunction().getCallingConv() == CallingConv::GHC)
diff --git a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index f755d5cd3d5b2..ec4b812eb0e1a 100644
--- a/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -34,14 +34,12 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
   unsigned VarArgsFrameIndex;
   unsigned RegSaveFrameIndex;
   int FramePointerSaveIndex;
-  bool ManipulatesSP;
   unsigned NumLocalDynamics;
 
 public:
   explicit SystemZMachineFunctionInfo(MachineFunction &MF)
     : VarArgsFirstGPR(0), VarArgsFirstFPR(0), VarArgsFrameIndex(0),
-      RegSaveFrameIndex(0), FramePointerSaveIndex(0), ManipulatesSP(false),
-      NumLocalDynamics(0) {}
+      RegSaveFrameIndex(0), FramePointerSaveIndex(0), NumLocalDynamics(0) {}
 
   // Get and set the first and last call-saved GPR that should be saved by
   // this function and the SP offset for the STMG.  These are 0 if no GPRs
@@ -85,11 +83,6 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
   int getFramePointerSaveIndex() const { return FramePointerSaveIndex; }
   void setFramePointerSaveIndex(int Idx) { FramePointerSaveIndex = Idx; }
 
-  // Get and set whether the function directly manipulates the stack pointer,
-  // e.g. through STACKSAVE or STACKRESTORE.
-  bool getManipulatesSP() const { return ManipulatesSP; }
-  void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; }
-
   // Count number of local-dynamic TLS symbols used.
   unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
   void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }

From 5fa4cf82dfa075e7983ce92d0042480b7b8f4fbc Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@legionlabs.com>
Date: Thu, 20 Jan 2022 13:38:32 -0500
Subject: [PATCH 055/946] [Clang] Separate the 'debug-info-hotpatch' test in
 two parts: one for ARM and another for AArch64

After 5af2433e1794ebf7e58e848aa612c7912d71dc78, this shall fix: https://lab.llvm.org/buildbot/#/builders/188/builds/8400 - if not I'll revert this patch and 5af2433e1794ebf7e58e848aa612c7912d71dc78.
---
 .../debug-info-hotpatch-aarch64.cpp           | 23 +++++++++++++++++++
 .../CodeGenCXX/debug-info-hotpatch-arm.cpp    |  7 ++----
 2 files changed, 25 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/CodeGenCXX/debug-info-hotpatch-aarch64.cpp

diff --git a/clang/test/CodeGenCXX/debug-info-hotpatch-aarch64.cpp b/clang/test/CodeGenCXX/debug-info-hotpatch-aarch64.cpp
new file mode 100644
index 0000000000000..10fb1750f2c55
--- /dev/null
+++ b/clang/test/CodeGenCXX/debug-info-hotpatch-aarch64.cpp
@@ -0,0 +1,23 @@
+// REQUIRES: aarch64-registered-target
+///
+/// Check that using /hotpatch doesn't generate an error.
+/// Binaries are always hotpatchable on ARM/ARM64.
+///
+// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c /hotpatch /Z7 -- %s 2>&1
+///
+/// Ensure that we set the hotpatchable flag in the debug information.
+///
+// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c /Z7 -o %t.obj -- %s
+// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=HOTPATCH
+// HOTPATCH: S_COMPILE3 [size = [[#]]]
+// HOTPATCH: flags = hot patchable
+///
+/// Unfortunately we need /Z7, Clang does not systematically generate S_COMPILE3.
+///
+// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c -o %t.obj -- %s
+// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=NO-HOTPATCH
+// NO-HOTPATCH-NOT: flags = hot patchable
+
+int main() {
+  return 0;
+}
diff --git a/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp b/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp
index 6176f1788760a..48a61f7fb1977 100644
--- a/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp
+++ b/clang/test/CodeGenCXX/debug-info-hotpatch-arm.cpp
@@ -1,23 +1,20 @@
-// REQUIRES: aarch64-registered-target || arm-registered-target
+// REQUIRES: arm-registered-target
 ///
 /// Check that using /hotpatch doesn't generate an error.
 /// Binaries are always hotpatchable on ARM/ARM64.
 ///
 // RUN: %clang_cl --target=arm-pc-windows-msvc /c /hotpatch /Z7 -- %s 2>&1
-// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c /hotpatch /Z7 -- %s 2>&1
 ///
 /// Ensure that we set the hotpatchable flag in the debug information.
 ///
 // RUN: %clang_cl --target=arm-pc-windows-msvc /c /Z7 -o %t.obj -- %s
 // RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=HOTPATCH
-// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c /Z7 -o %t.obj -- %s
-// RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=HOTPATCH
 // HOTPATCH: S_COMPILE3 [size = [[#]]]
 // HOTPATCH: flags = hot patchable
 ///
 /// Unfortunately we need /Z7, Clang does not systematically generate S_COMPILE3.
 ///
-// RUN: %clang_cl --target=aarch64-pc-windows-msvc /c -o %t.obj -- %s
+// RUN: %clang_cl --target=arm-pc-windows-msvc /c -o %t.obj -- %s
 // RUN: llvm-pdbutil dump -symbols %t.obj | FileCheck %s --check-prefix=NO-HOTPATCH
 // NO-HOTPATCH-NOT: flags = hot patchable
 

From 94a0660c14dabff715c0f0b51e89f6f4db406544 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Thu, 20 Jan 2022 11:05:01 -0800
Subject: [PATCH 056/946] [AMDGPU] Regenerate remat-vop.mir. NFC.

---
 llvm/test/CodeGen/AMDGPU/remat-vop.mir | 2210 ++++++++++++------------
 1 file changed, 1105 insertions(+), 1105 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
index 2eb2e1b7c399b..a677b0815ee7b 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
@@ -8,12 +8,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mov_b32_e32
     ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
     %2:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
@@ -31,17 +31,17 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_mov_b32_e32_impuse
     ; GCN: $m0 = IMPLICIT_DEF
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec, implicit $m0
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec, implicit $m0
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec, implicit $m0
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec, implicit $m0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec, implicit $m0
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec, implicit $m0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     $m0 = IMPLICIT_DEF
     %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec, implicit $m0
     %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec, implicit $m0
@@ -60,13 +60,13 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mov_b32_e32_exec_def
     ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: $exec = S_ANDN2_B64_term $exec, undef renamable $sgpr0_sgpr1, implicit-def $scc
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: $exec = S_ANDN2_B64_term $exec, undef renamable $sgpr0_sgpr1, implicit-def $scc
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
     %2:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
@@ -83,12 +83,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mov_b32_e64
     ; GCN: renamable $vgpr0 = V_MOV_B32_e64 1, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MOV_B32_e64 2, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MOV_B32_e64 3, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_e64 2, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e64 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e64 1, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e64 2, implicit $exec
     %2:vgpr_32 = V_MOV_B32_e64 3, implicit $exec
@@ -106,16 +106,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_mov_b32_dpp
     ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, undef $vgpr0, 1, 15, 15, 1, implicit $exec
-    ; GCN: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_MOV_B32_dpp undef $vgpr1, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_dpp undef $vgpr0, undef $vgpr0, 1, 15, 15, 1, implicit $exec
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %2:vgpr_32 = V_MOV_B32_dpp undef %2:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_MOV_B32_dpp undef %3:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
@@ -131,12 +131,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_accvgpr_read_b32
     ; GCN: renamable $vgpr0 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
     %1:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
     %2:vgpr_32 = V_ACCVGPR_READ_B32_e64 undef $agpr0, implicit $exec
@@ -152,12 +152,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_accvgpr_write_b32
     ; GCN: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
-    ; GCN: renamable $agpr1 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $agpr0
-    ; GCN: S_NOP 0, implicit killed renamable $agpr1
-    ; GCN: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $agpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $agpr1 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr1
+    ; GCN-NEXT: renamable $agpr0 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:agpr_32 = V_ACCVGPR_WRITE_B32_e64 1, implicit $exec
     %1:agpr_32 = V_ACCVGPR_WRITE_B32_e64 2, implicit $exec
     %2:agpr_32 = V_ACCVGPR_WRITE_B32_e64 3, implicit $exec
@@ -173,12 +173,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mov_b64_pseudo
     ; GCN: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 1, implicit $exec
-    ; GCN: renamable $vgpr2_vgpr3 = V_MOV_B64_PSEUDO 2, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 3, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_MOV_B64_PSEUDO 2, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 3, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec
     %1:vreg_64_align2 = V_MOV_B64_PSEUDO 2, implicit $exec
     %2:vreg_64_align2 = V_MOV_B64_PSEUDO 3, implicit $exec
@@ -194,12 +194,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
@@ -217,16 +217,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_fp_except
     ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
@@ -245,17 +245,17 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_mode_def
     ; GCN: $mode = IMPLICIT_DEF
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     $mode = IMPLICIT_DEF
     %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode
@@ -272,12 +272,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 1, 0, 0, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 2, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, 3, 0, 0, implicit $exec, implicit $mode
@@ -295,12 +295,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_i32_f64_e64_undef
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F64_e64 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_CVT_I32_F64_e64 0, undef %0:vreg_64, 0, 0, implicit $exec, implicit $mode
@@ -318,16 +318,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_dpp
     ; GCN: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CVT_I32_F64_dpp undef $vgpr1, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_dpp undef $vgpr1, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_dpp undef $vgpr0, 0, undef $vgpr0_vgpr1, 336, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_I32_F64_dpp undef %1:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_I32_F64_dpp undef %2:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_CVT_I32_F64_dpp undef %3:vgpr_32, 0, undef %0:vreg_64_align2, 336, 0, 0, 0, implicit $exec, implicit $mode
@@ -345,16 +345,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_def
     ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
-    ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit-def $m0
     %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit-def $m0
     %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit-def $m0
@@ -372,16 +372,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_i32_f64_e32_imp_use
     ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0
-    ; GCN: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_I32_F64_e32 1, implicit $exec, implicit $mode, implicit $m0
     %1:vgpr_32 = V_CVT_I32_F64_e32 2, implicit $exec, implicit $mode, implicit $m0
     %2:vgpr_32 = V_CVT_I32_F64_e32 3, implicit $exec, implicit $mode, implicit $m0
@@ -397,12 +397,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f64_i32_e32
     ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = V_CVT_F64_I32_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = V_CVT_F64_I32_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = V_CVT_F64_I32_e32 3, implicit $exec, implicit $mode
@@ -418,12 +418,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_f64_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_F32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_F32_F64_e32 3, implicit $exec, implicit $mode
@@ -439,12 +439,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f64_f32_e32
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_CVT_F64_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_CVT_F64_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_CVT_F64_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_CVT_F64_F32_e32 3, implicit $exec, implicit $mode
@@ -460,12 +460,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_u32_f64_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_U32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_U32_F64_e32 3, implicit $exec, implicit $mode
@@ -481,12 +481,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f64_u32_e32
     ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_U32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = V_CVT_F64_U32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_CVT_F64_U32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = V_CVT_F64_U32_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = V_CVT_F64_U32_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = V_CVT_F64_U32_e32 3, implicit $exec, implicit $mode
@@ -502,12 +502,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_i32_e32
     ; GCN: renamable $vgpr0 = V_CVT_F32_I32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_F32_I32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_F32_I32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_F32_I32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_F32_I32_e32 3, implicit $exec, implicit $mode
@@ -523,12 +523,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_i32_sdwa
     ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
@@ -548,16 +548,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cvt_f32_i32_sdwa_dst_unused_preserve
     ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr0(tied-def 0)
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr1(tied-def 0)
-    ; GCN: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr0(tied-def 0)
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr1(tied-def 0)
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_I32_sdwa 0, undef $vgpr0, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef $vgpr0(tied-def 0)
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef %1:vgpr_32(tied-def 0)
     %2:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef %2:vgpr_32(tied-def 0)
     %3:vgpr_32 = V_CVT_F32_I32_sdwa 0, undef %0:vgpr_32, 0, 0, 0, 2, 0, implicit $exec, implicit $mode, implicit undef %3:vgpr_32(tied-def 0)
@@ -573,12 +573,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_u32_e32
     ; GCN: renamable $vgpr0 = V_CVT_F32_U32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_F32_U32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_U32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_F32_U32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_F32_U32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_F32_U32_e32 3, implicit $exec, implicit $mode
@@ -594,12 +594,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_u32_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_U32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_U32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_U32_F32_e32 3, implicit $exec, implicit $mode
@@ -615,12 +615,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_i32_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_I32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_I32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_I32_F32_e32 3, implicit $exec, implicit $mode
@@ -636,12 +636,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_f16_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F16_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_F32_F16_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_F32_F16_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_F32_F16_e32 3, implicit $exec, implicit $mode
@@ -657,12 +657,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_rpi_i32_f32_e32
     ; GCN: renamable $vgpr0 = V_CVT_RPI_I32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_RPI_I32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_RPI_I32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_RPI_I32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_RPI_I32_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_RPI_I32_F32_e32 3, implicit $exec, implicit $mode
@@ -678,12 +678,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_flr_i32_f32_e32
     ; GCN: renamable $vgpr0 = V_CVT_FLR_I32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_FLR_I32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_FLR_I32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_FLR_I32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_FLR_I32_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_FLR_I32_F32_e32 3, implicit $exec, implicit $mode
@@ -699,12 +699,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_off_f32_i4_e32
     ; GCN: renamable $vgpr0 = V_CVT_OFF_F32_I4_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_OFF_F32_I4_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_OFF_F32_I4_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_OFF_F32_I4_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_OFF_F32_I4_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_OFF_F32_I4_e32 3, implicit $exec, implicit $mode
@@ -720,12 +720,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_f32_ubyte0_e32
     ; GCN: renamable $vgpr0 = V_CVT_F32_UBYTE0_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_F32_UBYTE0_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_F32_UBYTE0_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_CVT_F32_UBYTE0_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_CVT_F32_UBYTE0_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CVT_F32_UBYTE0_e32 3, implicit $exec, implicit $mode
@@ -741,12 +741,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fract_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_FRACT_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FRACT_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FRACT_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FRACT_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FRACT_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FRACT_F32_e32 3, implicit $exec, implicit $mode
@@ -762,12 +762,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_trunc_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_TRUNC_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_TRUNC_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_TRUNC_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_TRUNC_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_TRUNC_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_TRUNC_F32_e32 3, implicit $exec, implicit $mode
@@ -783,12 +783,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ceil_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CEIL_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CEIL_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CEIL_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_CEIL_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_CEIL_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CEIL_F32_e32 3, implicit $exec, implicit $mode
@@ -804,12 +804,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rndne_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_RNDNE_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_RNDNE_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RNDNE_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_RNDNE_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_RNDNE_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_RNDNE_F32_e32 3, implicit $exec, implicit $mode
@@ -825,12 +825,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_floor_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_FLOOR_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FLOOR_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FLOOR_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FLOOR_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FLOOR_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FLOOR_F32_e32 3, implicit $exec, implicit $mode
@@ -846,12 +846,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_exp_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_EXP_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_EXP_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_EXP_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_EXP_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_EXP_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_EXP_F32_e32 3, implicit $exec, implicit $mode
@@ -867,12 +867,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_log_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_LOG_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_LOG_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LOG_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_LOG_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_LOG_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_LOG_F32_e32 3, implicit $exec, implicit $mode
@@ -888,12 +888,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rcp_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_RCP_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_RCP_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RCP_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_RCP_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_RCP_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_RCP_F32_e32 3, implicit $exec, implicit $mode
@@ -909,12 +909,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rcp_iflag_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_RCP_IFLAG_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_RCP_IFLAG_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RCP_IFLAG_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e32 3, implicit $exec, implicit $mode
@@ -930,12 +930,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rsq_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_RSQ_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_RSQ_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_RSQ_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_RSQ_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_RSQ_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_RSQ_F32_e32 3, implicit $exec, implicit $mode
@@ -951,12 +951,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sqrt_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_SQRT_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_SQRT_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SQRT_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_SQRT_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_SQRT_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_SQRT_F32_e32 3, implicit $exec, implicit $mode
@@ -972,12 +972,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rcp_f64_e32
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RCP_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RCP_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_RCP_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_RCP_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_RCP_F64_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_RCP_F64_e32 3, implicit $exec, implicit $mode
@@ -993,12 +993,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_rsq_f64_e32
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RSQ_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_RSQ_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_RSQ_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_RSQ_F64_e32 3, implicit $exec, implicit $mode
@@ -1014,12 +1014,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sqrt_f64_e32
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_SQRT_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_SQRT_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_SQRT_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_SQRT_F64_e32 3, implicit $exec, implicit $mode
@@ -1035,12 +1035,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sin_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_SIN_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_SIN_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SIN_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_SIN_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_SIN_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_SIN_F32_e32 3, implicit $exec, implicit $mode
@@ -1056,12 +1056,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cos_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_COS_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_COS_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_COS_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_COS_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_COS_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_COS_F32_e32 3, implicit $exec, implicit $mode
@@ -1077,12 +1077,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_not_b32_e32
     ; GCN: renamable $vgpr0 = V_NOT_B32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_NOT_B32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_NOT_B32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_NOT_B32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_NOT_B32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_NOT_B32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_NOT_B32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_NOT_B32_e32 3, implicit $exec, implicit $mode
@@ -1098,12 +1098,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfrev_b32_e32
     ; GCN: renamable $vgpr0 = V_BFREV_B32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_BFREV_B32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_BFREV_B32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_BFREV_B32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_BFREV_B32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_BFREV_B32_e32 3, implicit $exec, implicit $mode
@@ -1119,12 +1119,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ffbh_u32_e32
     ; GCN: renamable $vgpr0 = V_FFBH_U32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_FFBH_U32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_FFBH_U32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_FFBH_U32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_FFBH_U32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_FFBH_U32_e32 3, implicit $exec, implicit $mode
@@ -1140,12 +1140,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ffbl_b32_e32
     ; GCN: renamable $vgpr0 = V_FFBL_B32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_FFBL_B32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_FFBL_B32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_FFBL_B32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_FFBL_B32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_FFBL_B32_e32 3, implicit $exec, implicit $mode
@@ -1161,12 +1161,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ffbh_i32_e32
     ; GCN: renamable $vgpr0 = V_FFBH_I32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_FFBH_I32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_FFBH_I32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_FFBH_I32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_FFBH_I32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_FFBH_I32_e32 3, implicit $exec, implicit $mode
@@ -1182,12 +1182,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_frexp_exp_i32_f64_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F64_e32 3, implicit $exec, implicit $mode
@@ -1203,12 +1203,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_frexp_mant_f64_e32
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FREXP_MANT_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FREXP_MANT_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FREXP_MANT_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_FREXP_MANT_F64_e32 3, implicit $exec, implicit $mode
@@ -1224,12 +1224,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fract_f64_e32
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FRACT_F64_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FRACT_F64_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FRACT_F64_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 1, implicit $exec, implicit $mode
     %1:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 2, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_FRACT_F64_e32 3, implicit $exec, implicit $mode
@@ -1245,12 +1245,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_frexp_exp_i32_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_EXP_I32_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FREXP_EXP_I32_F32_e32 3, implicit $exec, implicit $mode
@@ -1266,12 +1266,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_frexp_mant_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_MANT_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FREXP_MANT_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FREXP_MANT_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FREXP_MANT_F32_e32 3, implicit $exec, implicit $mode
@@ -1287,12 +1287,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_exp_legacy_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_EXP_LEGACY_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_EXP_LEGACY_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_EXP_LEGACY_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_EXP_LEGACY_F32_e32 3, implicit $exec, implicit $mode
@@ -1308,12 +1308,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_log_legacy_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_LOG_LEGACY_F32_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_LOG_LEGACY_F32_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LOG_LEGACY_F32_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_LOG_LEGACY_F32_e32 3, implicit $exec, implicit $mode
@@ -1329,12 +1329,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sat_pk_u8_i16_e32
     ; GCN: renamable $vgpr0 = V_SAT_PK_U8_I16_e32 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SAT_PK_U8_I16_e32 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SAT_PK_U8_I16_e32 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_SAT_PK_U8_I16_e32 1, implicit $exec, implicit $mode
     %1:vgpr_32 = V_SAT_PK_U8_I16_e32 2, implicit $exec, implicit $mode
     %2:vgpr_32 = V_SAT_PK_U8_I16_e32 3, implicit $exec, implicit $mode
@@ -1350,12 +1350,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_accvgpr_mov_b32
     ; GCN: renamable $agpr0 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
-    ; GCN: renamable $agpr1 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $agpr0
-    ; GCN: S_NOP 0, implicit killed renamable $agpr1
-    ; GCN: renamable $agpr0 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $agpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $agpr1 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr1
+    ; GCN-NEXT: renamable $agpr0 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $agpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %0:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
     %1:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
     %2:agpr_32 = V_ACCVGPR_MOV_B32 undef $agpr0, implicit $exec
@@ -1373,16 +1373,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cndmask_b32_e32
     ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
-    ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e32 1, undef $vgpr0, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CNDMASK_B32_e32 1, undef %0:vgpr_32, implicit $exec, implicit undef $vcc
     %2:vgpr_32 = V_CNDMASK_B32_e32 1, undef %0:vgpr_32, implicit $exec, implicit undef $vcc
     %3:vgpr_32 = V_CNDMASK_B32_e32 1, undef %0:vgpr_32, implicit $exec, implicit undef $vcc
@@ -1400,16 +1400,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cndmask_b32_sdwa
     ; GCN: renamable $vgpr0 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
-    ; GCN: renamable $vgpr0 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
     %2:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
     %3:vgpr_32 = V_CNDMASK_B32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
@@ -1427,16 +1427,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_cndmask_b32_dpp
     ; GCN: renamable $vgpr0 = V_CNDMASK_B32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_CNDMASK_B32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
-    ; GCN: renamable $vgpr0 = V_CNDMASK_B32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CNDMASK_B32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
     %2:vgpr_32 = V_CNDMASK_B32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
     %3:vgpr_32 = V_CNDMASK_B32_dpp undef %3:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
@@ -1452,12 +1452,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cndmask_b32_e64
     ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
-    ; GCN: renamable $vgpr1 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef $sgpr0_sgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef %0:sreg_64_xexec, implicit $exec
     %2:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef %0:sreg_64_xexec, implicit $exec
     %3:vgpr_32 = V_CNDMASK_B32_e64 0, 1, 0, 2, undef %0:sreg_64_xexec, implicit $exec
@@ -1473,12 +1473,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_madmk_f32
     ; GCN: renamable $vgpr0 = nofpexcept V_MADMK_F32 1, 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MADMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MADMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MADMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MADMK_F32 1, 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MADMK_F32 2, 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MADMK_F32 3, 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1494,12 +1494,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_ADD_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_ADD_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_ADD_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_ADD_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1515,12 +1515,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_f32_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -1536,12 +1536,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_f32_sdwa
     ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_sdwa 0, undef $vgpr0, 0, undef $vgpr0, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_ADD_F32_sdwa 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 0, 0, 0, 0, 0, 0, implicit $exec, implicit $mode
@@ -1559,16 +1559,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_add_f32_dpp
     ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = nofpexcept V_ADD_F32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr0 = nofpexcept V_ADD_F32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_ADD_F32_dpp undef $vgpr1, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_ADD_F32_dpp undef $vgpr0, 0, undef $vgpr0, 0, undef $vgpr0, 1, 15, 15, 10, implicit $exec, implicit $mode
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %1:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %2:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %3:vgpr_32, 0, undef %0:vgpr_32, 0, undef %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit $mode
@@ -1584,12 +1584,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sub_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_SUB_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_SUB_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_SUB_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUB_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SUB_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_SUB_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_SUB_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_SUB_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1605,12 +1605,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_subrev_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_SUBREV_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_SUBREV_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_SUBREV_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_SUBREV_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_SUBREV_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_SUBREV_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_SUBREV_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_SUBREV_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1626,12 +1626,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_legacy_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_MUL_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MUL_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MUL_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MUL_LEGACY_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1647,12 +1647,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MUL_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MUL_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MUL_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MUL_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MUL_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1668,12 +1668,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_i32_i24_e32
     ; GCN: renamable $vgpr0 = V_MUL_I32_I24_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_I32_I24_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_I32_I24_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_I32_I24_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_I32_I24_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_I32_I24_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_I32_I24_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_I32_I24_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1689,12 +1689,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_hi_i32_i24_e32
     ; GCN: renamable $vgpr0 = V_MUL_HI_I32_I24_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_HI_I32_I24_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_HI_I32_I24_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_I32_I24_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_I32_I24_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_HI_I32_I24_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_HI_I32_I24_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_HI_I32_I24_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1710,12 +1710,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_u32_u24_e32
     ; GCN: renamable $vgpr0 = V_MUL_U32_U24_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_U32_U24_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_U32_U24_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_U32_U24_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_U32_U24_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_U32_U24_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_U32_U24_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_U32_U24_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1731,12 +1731,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_hi_u32_u24_e32
     ; GCN: renamable $vgpr0 = V_MUL_HI_U32_U24_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_HI_U32_U24_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_HI_U32_U24_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_U32_U24_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_U32_U24_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_HI_U32_U24_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_HI_U32_U24_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_HI_U32_U24_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1752,12 +1752,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_MIN_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MIN_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MIN_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MIN_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MIN_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MIN_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MIN_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1773,12 +1773,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MAX_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAX_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MAX_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MAX_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MAX_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -1794,12 +1794,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_i32_e32
     ; GCN: renamable $vgpr0 = V_MIN_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MIN_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MIN_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MIN_I32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MIN_I32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN_I32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MIN_I32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1815,12 +1815,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_i32_e32
     ; GCN: renamable $vgpr0 = V_MAX_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MAX_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAX_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAX_I32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAX_I32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX_I32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MAX_I32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1836,12 +1836,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_u32_e32
     ; GCN: renamable $vgpr0 = V_MIN_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MIN_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MIN_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MIN_U32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MIN_U32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN_U32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MIN_U32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1857,12 +1857,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_u32_e32
     ; GCN: renamable $vgpr0 = V_MAX_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MAX_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAX_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAX_U32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAX_U32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX_U32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MAX_U32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1878,12 +1878,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshrrev_b32_e32
     ; GCN: renamable $vgpr0 = V_LSHRREV_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LSHRREV_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LSHRREV_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHRREV_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHRREV_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHRREV_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHRREV_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1899,12 +1899,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshlrev_b32_e32
     ; GCN: renamable $vgpr0 = V_LSHLREV_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LSHLREV_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LSHLREV_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHLREV_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHLREV_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHLREV_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1920,12 +1920,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ashrrev_i32_e32
     ; GCN: renamable $vgpr0 = V_ASHRREV_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ASHRREV_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ASHRREV_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ASHRREV_I32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ASHRREV_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ASHRREV_I32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ASHRREV_I32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1941,12 +1941,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_and_b32_e32
     ; GCN: renamable $vgpr0 = V_AND_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_AND_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_AND_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_AND_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_AND_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_AND_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_AND_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_AND_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1962,12 +1962,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_or_b32_e32
     ; GCN: renamable $vgpr0 = V_OR_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_OR_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_OR_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_OR_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_OR_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_OR_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_OR_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_OR_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -1983,12 +1983,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_xor_b32_e32
     ; GCN: renamable $vgpr0 = V_XOR_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_XOR_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_XOR_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_XOR_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_XOR_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_XOR_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_XOR_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_XOR_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2004,12 +2004,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_madak_f32
     ; GCN: renamable $vgpr0 = nofpexcept V_MADAK_F32 1, undef $vgpr0, 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MADAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MADAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MADAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MADAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MADAK_F32 1, undef %0:vgpr_32, 1, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MADAK_F32 2, undef %0:vgpr_32, 2, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MADAK_F32 3, undef %0:vgpr_32, 3, implicit $exec, implicit $mode
@@ -2025,12 +2025,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_u32_e32
     ; GCN: renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ADD_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ADD_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ADD_U32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ADD_U32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ADD_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ADD_U32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ADD_U32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2046,12 +2046,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sub_u32_e32
     ; GCN: renamable $vgpr0 = V_SUB_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SUB_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SUB_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SUB_U32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SUB_U32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SUB_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_SUB_U32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_SUB_U32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2067,12 +2067,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_subrev_u32_e32
     ; GCN: renamable $vgpr0 = V_SUBREV_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SUBREV_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SUBREV_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SUBREV_U32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SUBREV_U32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SUBREV_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_SUBREV_U32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_SUBREV_U32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2088,12 +2088,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfm_b32_e32
     ; GCN: renamable $vgpr0 = V_BFM_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_BFM_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_BFM_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_BFM_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_BFM_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BFM_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BFM_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_BFM_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2109,12 +2109,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bcnt_u32_b32_e32
     ; GCN: renamable $vgpr0 = V_BCNT_U32_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_BCNT_U32_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_BCNT_U32_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_BCNT_U32_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_BCNT_U32_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BCNT_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BCNT_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_BCNT_U32_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2130,12 +2130,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mbcnt_lo_u32_b32_e32
     ; GCN: renamable $vgpr0 = V_MBCNT_LO_U32_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MBCNT_LO_U32_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MBCNT_LO_U32_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MBCNT_LO_U32_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MBCNT_LO_U32_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MBCNT_LO_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MBCNT_LO_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MBCNT_LO_U32_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2151,12 +2151,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mbcnt_hi_u32_b32_e32
     ; GCN: renamable $vgpr0 = V_MBCNT_HI_U32_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MBCNT_HI_U32_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MBCNT_HI_U32_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MBCNT_HI_U32_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MBCNT_HI_U32_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MBCNT_HI_U32_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MBCNT_HI_U32_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MBCNT_HI_U32_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2172,12 +2172,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ldexp_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_LDEXP_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_LDEXP_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_LDEXP_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LDEXP_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_LDEXP_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_LDEXP_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_LDEXP_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2193,12 +2193,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pknorm_i16_f32_e32
     ; GCN: renamable $vgpr0 = V_CVT_PKNORM_I16_F32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_CVT_PKNORM_I16_F32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_PKNORM_I16_F32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_PKNORM_I16_F32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_PKNORM_I16_F32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_CVT_PKNORM_I16_F32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2214,12 +2214,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pknorm_u16_f32_e32
     ; GCN: renamable $vgpr0 = V_CVT_PKNORM_U16_F32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_CVT_PKNORM_U16_F32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_PKNORM_U16_F32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_PKNORM_U16_F32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_PKNORM_U16_F32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_CVT_PKNORM_U16_F32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2235,12 +2235,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pkrtz_f16_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_CVT_PKRTZ_F16_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2256,12 +2256,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pk_u16_u32_e32
     ; GCN: renamable $vgpr0 = V_CVT_PK_U16_U32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_CVT_PK_U16_U32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_PK_U16_U32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_PK_U16_U32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_PK_U16_U32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_PK_U16_U32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_CVT_PK_U16_U32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_CVT_PK_U16_U32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2277,12 +2277,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pk_i16_i32_e32
     ; GCN: renamable $vgpr0 = V_CVT_PK_I16_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_CVT_PK_I16_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CVT_PK_I16_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CVT_PK_I16_I32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CVT_PK_I16_I32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CVT_PK_I16_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_CVT_PK_I16_I32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_CVT_PK_I16_I32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2298,12 +2298,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_legacy_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_MIN_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MIN_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MIN_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MIN_LEGACY_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2319,12 +2319,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_legacy_f32_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_MAX_LEGACY_F32_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MAX_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAX_LEGACY_F32_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MAX_LEGACY_F32_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2340,12 +2340,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshr_b32_e32
     ; GCN: renamable $vgpr0 = V_LSHR_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LSHR_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LSHR_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHR_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHR_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHR_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHR_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHR_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2361,12 +2361,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshl_b32_e32
     ; GCN: renamable $vgpr0 = V_LSHL_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LSHL_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LSHL_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHL_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHL_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHL_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHL_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHL_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2382,12 +2382,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ashr_i32_e32
     ; GCN: renamable $vgpr0 = V_ASHR_I32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ASHR_I32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ASHR_I32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ASHR_I32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ASHR_I32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ASHR_I32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ASHR_I32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ASHR_I32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2405,16 +2405,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_ldexp_f16_e32
     ; GCN: renamable $vgpr0 = nofpexcept V_LDEXP_F16_e32 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e32 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr0 = nofpexcept V_LDEXP_F16_e32 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_LDEXP_F16_e32 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_LDEXP_F16_e32 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_LDEXP_F16_e32 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_LDEXP_F16_e32 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_LDEXP_F16_e32 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2432,16 +2432,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_lshrrev_b16_e32
     ; GCN: renamable $vgpr0 = V_LSHRREV_B16_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_LSHRREV_B16_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr0 = V_LSHRREV_B16_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHRREV_B16_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHRREV_B16_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHRREV_B16_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHRREV_B16_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHRREV_B16_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2459,16 +2459,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_lshlrev_b16_e32
     ; GCN: renamable $vgpr0 = V_LSHLREV_B16_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_LSHLREV_B16_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr0 = V_LSHLREV_B16_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHLREV_B16_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHLREV_B16_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHLREV_B16_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHLREV_B16_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHLREV_B16_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2486,16 +2486,16 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_no_remat_v_ashrrev_i16_e32
     ; GCN: renamable $vgpr0 = V_ASHRREV_I16_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN: renamable $vgpr1 = V_ASHRREV_I16_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr0 = V_ASHRREV_I16_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr1 = V_ASHRREV_I16_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: renamable $vgpr0 = V_ASHRREV_I16_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ASHRREV_I16_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ASHRREV_I16_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ASHRREV_I16_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2511,12 +2511,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_xnor_b32_e32
     ; GCN: renamable $vgpr0 = V_XNOR_B32_e32 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_XNOR_B32_e32 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_XNOR_B32_e32 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_XNOR_B32_e32 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_XNOR_B32_e32 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_XNOR_B32_e32 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_XNOR_B32_e32 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_XNOR_B32_e32 3, undef %0:vgpr_32, implicit $exec
@@ -2532,12 +2532,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fmamk_f32
     ; GCN: renamable $vgpr0 = nofpexcept V_FMAMK_F32 1, 1, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FMAMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FMAMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAMK_F32 2, 2, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMAMK_F32 3, 3, undef $vgpr0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_FMAMK_F32 1, 1, undef %0:vgpr_32, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FMAMK_F32 2, 2, undef %0:vgpr_32, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_FMAMK_F32 3, 3, undef %0:vgpr_32, implicit $exec, implicit $mode
@@ -2553,12 +2553,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fmaak_f32
     ; GCN: renamable $vgpr0 = nofpexcept V_FMAAK_F32 1, undef $vgpr0, 1, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FMAAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FMAAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMAAK_F32 2, undef $vgpr0, 2, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMAAK_F32 3, undef $vgpr0, 3, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_FMAAK_F32 1, undef %0:vgpr_32, 1, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FMAAK_F32 2, undef %0:vgpr_32, 2, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_FMAAK_F32 3, undef %0:vgpr_32, 3, implicit $exec, implicit $mode
@@ -2574,12 +2574,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mad_legacy_f32_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_MAD_LEGACY_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MAD_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAD_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MAD_LEGACY_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2595,12 +2595,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mad_f32_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_MAD_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_MAD_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_MAD_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_MAD_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2616,12 +2616,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fma_legacy_f32_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_FMA_LEGACY_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FMA_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMA_LEGACY_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_FMA_LEGACY_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2637,12 +2637,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fma_f32_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_FMA_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_FMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_FMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_FMA_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2658,12 +2658,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mad_i32_i24_e64
     ; GCN: renamable $vgpr0 = V_MAD_I32_I24_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MAD_I32_I24_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAD_I32_I24_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAD_I32_I24_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAD_I32_I24_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAD_I32_I24_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_MAD_I32_I24_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_MAD_I32_I24_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -2679,12 +2679,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mad_u32_u24_e64
     ; GCN: renamable $vgpr0 = V_MAD_U32_U24_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MAD_U32_U24_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAD_U32_U24_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAD_U32_U24_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAD_U32_U24_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAD_U32_U24_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_MAD_U32_U24_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_MAD_U32_U24_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -2700,12 +2700,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lerp_u8_e64
     ; GCN: renamable $vgpr0 = V_LERP_U8_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LERP_U8_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LERP_U8_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LERP_U8_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LERP_U8_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LERP_U8_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LERP_U8_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LERP_U8_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -2721,12 +2721,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_fma_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, 1, 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 1, 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 2, 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_FMA_F64_e64 0, 3, 0, 3, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2742,12 +2742,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_ADD_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_ADD_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_ADD_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_ADD_F64_e64 0, 3, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2763,12 +2763,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MUL_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MUL_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MUL_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_MUL_F64_e64 0, 3, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2784,12 +2784,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MIN_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MIN_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MIN_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_MIN_F64_e64 0, 3, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2805,12 +2805,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MAX_F64_e64 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_MAX_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_MAX_F64_e64 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_MAX_F64_e64 0, 3, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -2826,12 +2826,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_lo_u32_e64
     ; GCN: renamable $vgpr0 = V_MUL_LO_U32_e64 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_LO_U32_e64 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_LO_U32_e64 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_U32_e64 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_LO_U32_e64 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_LO_U32_e64 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_LO_U32_e64 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_LO_U32_e64 3, undef %0:vgpr_32, implicit $exec
@@ -2847,12 +2847,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_hi_u32_e64
     ; GCN: renamable $vgpr0 = V_MUL_HI_U32_e64 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_HI_U32_e64 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_HI_U32_e64 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_U32_e64 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_U32_e64 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_HI_U32_e64 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_HI_U32_e64 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_HI_U32_e64 3, undef %0:vgpr_32, implicit $exec
@@ -2868,12 +2868,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_lo_i32_e64
     ; GCN: renamable $vgpr0 = V_MUL_LO_I32_e64 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_LO_I32_e64 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_LO_I32_e64 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_LO_I32_e64 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_LO_I32_e64 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_LO_I32_e64 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_LO_I32_e64 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_LO_I32_e64 3, undef %0:vgpr_32, implicit $exec
@@ -2889,12 +2889,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_mul_hi_i32_e64
     ; GCN: renamable $vgpr0 = V_MUL_HI_I32_e64 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MUL_HI_I32_e64 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MUL_HI_I32_e64 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MUL_HI_I32_e64 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MUL_HI_I32_e64 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MUL_HI_I32_e64 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MUL_HI_I32_e64 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MUL_HI_I32_e64 3, undef %0:vgpr_32, implicit $exec
@@ -2910,12 +2910,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cubeid_f32_e64
     ; GCN: renamable $vgpr0 = V_CUBEID_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CUBEID_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CUBEID_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CUBEID_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CUBEID_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_CUBEID_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2931,12 +2931,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cubesc_f32_e64
     ; GCN: renamable $vgpr0 = V_CUBESC_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CUBESC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CUBESC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CUBESC_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CUBESC_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_CUBESC_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2952,12 +2952,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cubetc_f32_e64
     ; GCN: renamable $vgpr0 = V_CUBETC_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CUBETC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CUBETC_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CUBETC_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CUBETC_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_CUBETC_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2973,12 +2973,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cubema_f32_e64
     ; GCN: renamable $vgpr0 = V_CUBEMA_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_CUBEMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_CUBEMA_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_CUBEMA_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_CUBEMA_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_CUBEMA_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -2994,12 +2994,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfe_u32_e64
     ; GCN: renamable $vgpr0 = V_BFE_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_BFE_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_BFE_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_BFE_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_BFE_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BFE_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BFE_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_BFE_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3015,12 +3015,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfe_i32_e64
     ; GCN: renamable $vgpr0 = V_BFE_I32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_BFE_I32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_BFE_I32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_BFE_I32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_BFE_I32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BFE_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BFE_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_BFE_I32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3036,12 +3036,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_bfi_b32_e64
     ; GCN: renamable $vgpr0 = V_BFI_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_BFI_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_BFI_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_BFI_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_BFI_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_BFI_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_BFI_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_BFI_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3057,12 +3057,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_alignbit_b32_e64
     ; GCN: renamable $vgpr0 = V_ALIGNBIT_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ALIGNBIT_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ALIGNBIT_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ALIGNBIT_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ALIGNBIT_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ALIGNBIT_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ALIGNBIT_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ALIGNBIT_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3078,12 +3078,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_alignbyte_b32_e64
     ; GCN: renamable $vgpr0 = V_ALIGNBYTE_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ALIGNBYTE_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ALIGNBYTE_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ALIGNBYTE_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ALIGNBYTE_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ALIGNBYTE_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ALIGNBYTE_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ALIGNBYTE_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3099,12 +3099,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min3_i32_e64
     ; GCN: renamable $vgpr0 = V_MIN3_I32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MIN3_I32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MIN3_I32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MIN3_I32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MIN3_I32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MIN3_I32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3120,12 +3120,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min3_u32_e64
     ; GCN: renamable $vgpr0 = V_MIN3_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MIN3_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MIN3_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MIN3_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MIN3_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MIN3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MIN3_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3141,12 +3141,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max3_i32_e64
     ; GCN: renamable $vgpr0 = V_MAX3_I32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MAX3_I32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAX3_I32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAX3_I32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAX3_I32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MAX3_I32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3162,12 +3162,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max3_u32_e64
     ; GCN: renamable $vgpr0 = V_MAX3_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MAX3_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAX3_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAX3_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAX3_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MAX3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MAX3_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3183,12 +3183,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_med3_i32_e64
     ; GCN: renamable $vgpr0 = V_MED3_I32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MED3_I32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MED3_I32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MED3_I32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MED3_I32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MED3_I32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MED3_I32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MED3_I32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3204,12 +3204,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_med3_u32_e64
     ; GCN: renamable $vgpr0 = V_MED3_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MED3_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MED3_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MED3_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MED3_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MED3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_MED3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_MED3_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3225,12 +3225,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_min3_f32_e64
     ; GCN: renamable $vgpr0 = V_MIN3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MIN3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MIN3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MIN3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_MIN3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_MIN3_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3246,12 +3246,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_max3_f32_e64
     ; GCN: renamable $vgpr0 = V_MAX3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MAX3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MAX3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MAX3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_MAX3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_MAX3_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3267,12 +3267,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_med3_f32_e64
     ; GCN: renamable $vgpr0 = V_MED3_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MED3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MED3_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MED3_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = V_MED3_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = V_MED3_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3288,12 +3288,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sad_u8_e64
     ; GCN: renamable $vgpr0 = V_SAD_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SAD_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SAD_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_SAD_U8_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3309,12 +3309,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sad_hi_u8_e64
     ; GCN: renamable $vgpr0 = V_SAD_HI_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SAD_HI_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SAD_HI_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SAD_HI_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SAD_HI_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SAD_HI_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SAD_HI_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_SAD_HI_U8_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3330,12 +3330,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sad_u16_e64
     ; GCN: renamable $vgpr0 = V_SAD_U16_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SAD_U16_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SAD_U16_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SAD_U16_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SAD_U16_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SAD_U16_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SAD_U16_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_SAD_U16_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3351,12 +3351,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sad_u32_e64
     ; GCN: renamable $vgpr0 = V_SAD_U32_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SAD_U32_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SAD_U32_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SAD_U32_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SAD_U32_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SAD_U32_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SAD_U32_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_SAD_U32_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3372,12 +3372,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_cvt_pk_u8_f32_e64
     ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PK_U8_F32_e64 0, 1, 0, 1, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr1 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = nofpexcept V_CVT_PK_U8_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = nofpexcept V_CVT_PK_U8_F32_e64 0, 3, 0, 3, 0, undef $vgpr0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 1, 0, 1, 0, undef %0:vgpr_32, 0, implicit $exec, implicit $mode
     %2:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 2, 0, 2, 0, undef %0:vgpr_32, 0, implicit $exec, implicit $mode
     %3:vgpr_32 = nofpexcept V_CVT_PK_U8_F32_e64 0, 3, 0, 3, 0, undef %0:vgpr_32, 0, implicit $exec, implicit $mode
@@ -3393,12 +3393,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_div_fixup_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_DIV_FIXUP_F64_e64 0, 1, 0, 1, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_DIV_FIXUP_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_DIV_FIXUP_F64_e64 0, 3, 0, 3, 0, undef $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 1, 0, 1, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 2, 0, 2, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_DIV_FIXUP_F64_e64 0, 3, 0, 3, 0, undef %0:vreg_64_align2, 0, 0, implicit $exec, implicit $mode
@@ -3414,12 +3414,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ldexp_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_LDEXP_F64_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_LDEXP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_LDEXP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_LDEXP_F64_e64 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3435,12 +3435,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_msad_u8_e64
     ; GCN: renamable $vgpr0 = V_MSAD_U8_e64 1, 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_MSAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_MSAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_MSAD_U8_e64 2, 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_MSAD_U8_e64 3, 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_MSAD_U8_e64 1, 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_MSAD_U8_e64 2, 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_MSAD_U8_e64 3, 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3456,12 +3456,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_trig_preop_f64_e64
     ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_TRIG_PREOP_F64_e64 0, 1, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: renamable $vgpr2_vgpr3 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = nofpexcept V_TRIG_PREOP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = nofpexcept V_TRIG_PREOP_F64_e64 0, 3, 0, undef $vgpr0, 0, 0, implicit $exec, implicit $mode
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 1, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %2:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 2, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
     %3:vreg_64_align2 = nofpexcept V_TRIG_PREOP_F64_e64 0, 3, 0, undef %0:vgpr_32, 0, 0, implicit $exec, implicit $mode
@@ -3477,12 +3477,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshlrev_b64_e64
     ; GCN: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 1, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_LSHLREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_LSHLREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = V_LSHLREV_B64_e64 1, undef %0:vreg_64_align2, implicit $exec
     %2:vreg_64_align2 = V_LSHLREV_B64_e64 2, undef %0:vreg_64_align2, implicit $exec
     %3:vreg_64_align2 = V_LSHLREV_B64_e64 3, undef %0:vreg_64_align2, implicit $exec
@@ -3498,12 +3498,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshrrev_b64_e64
     ; GCN: renamable $vgpr0_vgpr1 = V_LSHRREV_B64_e64 1, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: renamable $vgpr2_vgpr3 = V_LSHRREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = V_LSHRREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_LSHRREV_B64_e64 2, undef $vgpr0_vgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_LSHRREV_B64_e64 3, undef $vgpr0_vgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = V_LSHRREV_B64_e64 1, undef %0:vreg_64_align2, implicit $exec
     %2:vreg_64_align2 = V_LSHRREV_B64_e64 2, undef %0:vreg_64_align2, implicit $exec
     %3:vreg_64_align2 = V_LSHRREV_B64_e64 3, undef %0:vreg_64_align2, implicit $exec
@@ -3519,12 +3519,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_ashrrev_i64_e64
     ; GCN: renamable $vgpr0_vgpr1 = V_ASHRREV_I64_e64 1, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: renamable $vgpr2_vgpr3 = V_ASHRREV_I64_e64 2, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
-    ; GCN: renamable $vgpr0_vgpr1 = V_ASHRREV_I64_e64 3, undef $vgpr0_vgpr1, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr2_vgpr3 = V_ASHRREV_I64_e64 2, undef $vgpr0_vgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr2_vgpr3
+    ; GCN-NEXT: renamable $vgpr0_vgpr1 = V_ASHRREV_I64_e64 3, undef $vgpr0_vgpr1, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0_vgpr1
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vreg_64_align2 = V_ASHRREV_I64_e64 1, undef %0:vreg_64_align2, implicit $exec
     %2:vreg_64_align2 = V_ASHRREV_I64_e64 2, undef %0:vreg_64_align2, implicit $exec
     %3:vreg_64_align2 = V_ASHRREV_I64_e64 3, undef %0:vreg_64_align2, implicit $exec
@@ -3540,12 +3540,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_perm_b32_e64
     ; GCN: renamable $vgpr0 = V_PERM_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_PERM_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_PERM_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_PERM_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_PERM_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_PERM_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_PERM_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_PERM_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3561,12 +3561,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add3_u32_e64
     ; GCN: renamable $vgpr0 = V_ADD3_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ADD3_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ADD3_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ADD3_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ADD3_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ADD3_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ADD3_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ADD3_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3582,12 +3582,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_and_or_b32_e64
     ; GCN: renamable $vgpr0 = V_AND_OR_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_AND_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_AND_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_AND_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_AND_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_AND_OR_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_AND_OR_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_AND_OR_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3603,12 +3603,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_or3_b32_e64
     ; GCN: renamable $vgpr0 = V_OR3_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_OR3_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_OR3_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_OR3_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_OR3_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_OR3_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_OR3_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_OR3_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3624,12 +3624,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_xad_u32_e64
     ; GCN: renamable $vgpr0 = V_XAD_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_XAD_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_XAD_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_XAD_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_XAD_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_XAD_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_XAD_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_XAD_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3645,12 +3645,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_i32_e64
     ; GCN: renamable $vgpr0 = V_ADD_I32_e64 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ADD_I32_e64 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ADD_I32_e64 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ADD_I32_e64 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ADD_I32_e64 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ADD_I32_e64 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_ADD_I32_e64 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_ADD_I32_e64 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3666,12 +3666,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_add_lshl_u32_e64
     ; GCN: renamable $vgpr0 = V_ADD_LSHL_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_ADD_LSHL_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_ADD_LSHL_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_ADD_LSHL_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_ADD_LSHL_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_ADD_LSHL_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_ADD_LSHL_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_ADD_LSHL_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3687,12 +3687,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_sub_i32_e64
     ; GCN: renamable $vgpr0 = V_SUB_I32_e64 1, undef $vgpr0, 0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_SUB_I32_e64 2, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_SUB_I32_e64 3, undef $vgpr0, 0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_SUB_I32_e64 2, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_SUB_I32_e64 3, undef $vgpr0, 0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_SUB_I32_e64 1, undef %0:vgpr_32, 0, implicit $exec
     %2:vgpr_32 = V_SUB_I32_e64 2, undef %0:vgpr_32, 0, implicit $exec
     %3:vgpr_32 = V_SUB_I32_e64 3, undef %0:vgpr_32, 0, implicit $exec
@@ -3708,12 +3708,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshl_add_u32_e64
     ; GCN: renamable $vgpr0 = V_LSHL_ADD_U32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LSHL_ADD_U32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LSHL_ADD_U32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHL_ADD_U32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHL_ADD_U32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHL_ADD_U32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHL_ADD_U32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHL_ADD_U32_e64 3, 3, undef %0:vgpr_32, implicit $exec
@@ -3729,12 +3729,12 @@ body:             |
   bb.0:
     ; GCN-LABEL: name: test_remat_v_lshl_or_b32_e64
     ; GCN: renamable $vgpr0 = V_LSHL_OR_B32_e64 1, 1, undef $vgpr0, implicit $exec
-    ; GCN: renamable $vgpr1 = V_LSHL_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr1
-    ; GCN: renamable $vgpr0 = V_LSHL_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec
-    ; GCN: S_NOP 0, implicit killed renamable $vgpr0
-    ; GCN: S_ENDPGM 0
+    ; GCN-NEXT: renamable $vgpr1 = V_LSHL_OR_B32_e64 2, 2, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr1
+    ; GCN-NEXT: renamable $vgpr0 = V_LSHL_OR_B32_e64 3, 3, undef $vgpr0, implicit $exec
+    ; GCN-NEXT: S_NOP 0, implicit killed renamable $vgpr0
+    ; GCN-NEXT: S_ENDPGM 0
     %1:vgpr_32 = V_LSHL_OR_B32_e64 1, 1, undef %0:vgpr_32, implicit $exec
     %2:vgpr_32 = V_LSHL_OR_B32_e64 2, 2, undef %0:vgpr_32, implicit $exec
     %3:vgpr_32 = V_LSHL_OR_B32_e64 3, 3, undef %0:vgpr_32, implicit $exec

From 493c856484015873737d7c995cac9e34101fb9e9 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell <nathan@acm.org>
Date: Thu, 20 Jan 2022 07:39:22 -0800
Subject: [PATCH 057/946] [clang][NFC] Small mangler cleanups

In working on a module mangling problem I noticed a few cleanups to the mangler.

1) Use 'if (auto x = ...' idiom in a couple of places.

2) I noticed both 'isFileContext' and 'isNamespace || isTranslationUnit'
   synonyms. Let's use the former.

3) The control flow in the seqId mangling was misordered. Let's channel Count
   von Count. Also fix the inconsistent bracing.

Differential Revision: https://reviews.llvm.org/D117799
---
 clang/lib/AST/ItaniumMangle.cpp | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 7afc1250a36f4..2e734e2b28cdb 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -659,8 +659,7 @@ bool ItaniumMangleContextImpl::isUniqueInternalLinkageDecl(
 }
 
 bool ItaniumMangleContextImpl::shouldMangleCXXName(const NamedDecl *D) {
-  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
-  if (FD) {
+  if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
     LanguageLinkage L = FD->getLanguageLinkage();
     // Overloadable functions need mangling.
     if (FD->hasAttr<OverloadableAttr>())
@@ -696,21 +695,24 @@ bool ItaniumMangleContextImpl::shouldMangleCXXName(const NamedDecl *D) {
   if (!getASTContext().getLangOpts().CPlusPlus)
     return false;
 
-  const VarDecl *VD = dyn_cast<VarDecl>(D);
-  if (VD && !isa<DecompositionDecl>(D)) {
+  if (const auto *VD = dyn_cast<VarDecl>(D)) {
+    // Decompositions are mangled.
+    if (isa<DecompositionDecl>(VD))
+      return true;
+
     // C variables are not mangled.
     if (VD->isExternC())
       return false;
 
-    // Variables at global scope with non-internal linkage are not mangled
+    // Variables at global scope with non-internal linkage are not mangled.
     const DeclContext *DC = getEffectiveDeclContext(D);
     // Check for extern variable declared locally.
     if (DC->isFunctionOrMethod() && D->hasLinkage())
-      while (!DC->isNamespace() && !DC->isTranslationUnit())
+      while (!DC->isFileContext())
         DC = getEffectiveParentContext(DC);
     if (DC->isTranslationUnit() && D->getFormalLinkage() != InternalLinkage &&
         !CXXNameMangler::shouldHaveAbiTags(*this, VD) &&
-        !isa<VarTemplateSpecializationDecl>(D))
+        !isa<VarTemplateSpecializationDecl>(VD))
       return false;
   }
 
@@ -5889,9 +5891,11 @@ void CXXNameMangler::mangleTemplateParameter(unsigned Depth, unsigned Index) {
 }
 
 void CXXNameMangler::mangleSeqID(unsigned SeqID) {
-  if (SeqID == 1)
+  if (SeqID == 0) {
+    // Nothing.
+  } else if (SeqID == 1) {
     Out << '0';
-  else if (SeqID > 1) {
+  } else {
     SeqID--;
 
     // <seq-id> is encoded in base-36, using digits and upper case letters.

From 6b92bb47901f3a2d4a9aa683b0365088113a729e Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn@google.com>
Date: Fri, 14 Jan 2022 23:10:37 +0000
Subject: [PATCH 058/946] [Support] [DebugInfo] Lazily create cache dir.

This change defers creating Support/Caching.cpp's cache directory until
it actually writes to the cache.

This allows using Caching library in a read-only fashion. If read-only,
the cache is guaranteed not to write to disk. This keeps tools using
DebugInfod (currently llvm-symbolizer) hermetic when not configured to
perform remote lookups.

Reviewed By: phosek

Differential Revision: https://reviews.llvm.org/D117589
---
 llvm/include/llvm/Support/Caching.h           |  9 +++----
 llvm/lib/Support/Caching.cpp                  |  8 +++++--
 llvm/test/ThinLTO/X86/cache.ll                |  2 +-
 .../ThinLTO/X86/empty_module_with_cache.ll    |  2 +-
 llvm/unittests/Debuginfod/DebuginfodTests.cpp | 24 +++++++++++++++----
 5 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Support/Caching.h b/llvm/include/llvm/Support/Caching.h
index 5c30a822ef388..bef23ae757f2e 100644
--- a/llvm/include/llvm/Support/Caching.h
+++ b/llvm/include/llvm/Support/Caching.h
@@ -62,10 +62,11 @@ using AddBufferFn =
     std::function<void(unsigned Task, std::unique_ptr<MemoryBuffer> MB)>;
 
 /// Create a local file system cache which uses the given cache name, temporary
-/// file prefix, cache directory and file callback. This function also creates
-/// the cache directory if it does not already exist. The cache name appears in
-/// error messages for errors during caching. The temporary file prefix is used
-/// in the temporary file naming scheme used when writing files atomically.
+/// file prefix, cache directory and file callback.  This function does not
+/// immediately create the cache directory if it does not yet exist; this is
+/// done lazily the first time a file is added.  The cache name appears in error
+/// messages for errors during caching. The temporary file prefix is used in the
+/// temporary file naming scheme used when writing files atomically.
 Expected<FileCache> localCache(
     Twine CacheNameRef, Twine TempFilePrefixRef, Twine CacheDirectoryPathRef,
     AddBufferFn AddBuffer = [](size_t Task, std::unique_ptr<MemoryBuffer> MB) {
diff --git a/llvm/lib/Support/Caching.cpp b/llvm/lib/Support/Caching.cpp
index 8c685640f791a..d6902f660e39e 100644
--- a/llvm/lib/Support/Caching.cpp
+++ b/llvm/lib/Support/Caching.cpp
@@ -30,8 +30,6 @@ Expected<FileCache> llvm::localCache(Twine CacheNameRef,
                                      Twine TempFilePrefixRef,
                                      Twine CacheDirectoryPathRef,
                                      AddBufferFn AddBuffer) {
-  if (std::error_code EC = sys::fs::create_directories(CacheDirectoryPathRef))
-    return errorCodeToError(EC);
 
   // Create local copies which are safely captured-by-copy in lambdas
   SmallString<64> CacheName, TempFilePrefix, CacheDirectoryPath;
@@ -140,6 +138,12 @@ Expected<FileCache> llvm::localCache(Twine CacheNameRef,
     };
 
     return [=](size_t Task) -> Expected<std::unique_ptr<CachedFileStream>> {
+      // Create the cache directory if not already done. Doing this lazily
+      // ensures the filesystem isn't mutated until the cache is.
+      if (std::error_code EC = sys::fs::create_directories(
+              CacheDirectoryPath, /*IgnoreExisting=*/true))
+        return errorCodeToError(EC);
+
       // Write to a temporary to avoid race condition
       SmallString<64> TempFilenameModel;
       sys::path::append(TempFilenameModel, CacheDirectoryPath,
diff --git a/llvm/test/ThinLTO/X86/cache.ll b/llvm/test/ThinLTO/X86/cache.ll
index 406a1a456f89c..009b78713316a 100644
--- a/llvm/test/ThinLTO/X86/cache.ll
+++ b/llvm/test/ThinLTO/X86/cache.ll
@@ -20,7 +20,7 @@
 ; RUN:  -r=%t2.bc,_main,plx \
 ; RUN:  -r=%t2.bc,_globalfunc,lx \
 ; RUN:  -r=%t.bc,_globalfunc,plx
-; RUN: ls %t.cache.noindex | count 0
+; RUN: not ls %t.cache.noindex
 
 
 ; Repeat again, *with* hash this time.
diff --git a/llvm/test/ThinLTO/X86/empty_module_with_cache.ll b/llvm/test/ThinLTO/X86/empty_module_with_cache.ll
index 8e58d9f0db959..693f264b8ff26 100644
--- a/llvm/test/ThinLTO/X86/empty_module_with_cache.ll
+++ b/llvm/test/ThinLTO/X86/empty_module_with_cache.ll
@@ -28,7 +28,7 @@
 ; RUN: rm -Rf %t.cache
 ; RUN: llvm-lto2 run -o %t.o %t2.bc  %t.bc -cache-dir %t.cache \
 ; RUN:  -r=%t2.bc,_main,plx
-; RUN: ls %t.cache | count 0
+; RUN: not ls %t.cache
 
 
 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/unittests/Debuginfod/DebuginfodTests.cpp b/llvm/unittests/Debuginfod/DebuginfodTests.cpp
index fc5eb705969e7..5312912599e93 100644
--- a/llvm/unittests/Debuginfod/DebuginfodTests.cpp
+++ b/llvm/unittests/Debuginfod/DebuginfodTests.cpp
@@ -17,6 +17,17 @@
 #define setenv(name, var, ignore) _putenv_s(name, var)
 #endif
 
+#define ASSERT_NO_ERROR(x)                                                     \
+  if (std::error_code ASSERT_NO_ERROR_ec = x) {                                \
+    SmallString<128> MessageStorage;                                           \
+    raw_svector_ostream Message(MessageStorage);                               \
+    Message << #x ": did not return errc::success.\n"                          \
+            << "error number: " << ASSERT_NO_ERROR_ec.value() << "\n"          \
+            << "error message: " << ASSERT_NO_ERROR_ec.message() << "\n";      \
+    GTEST_FATAL_FAILURE_(MessageStorage.c_str());                              \
+  } else {                                                                     \
+  }
+
 using namespace llvm;
 
 // Check that the Debuginfod client can find locally cached artifacts.
@@ -40,11 +51,12 @@ TEST(DebuginfodClient, CacheHit) {
 // Check that the Debuginfod client returns an Error when it fails to find an
 // artifact.
 TEST(DebuginfodClient, CacheMiss) {
-  // Set the cache path to a temp directory to avoid permissions issues if $HOME
-  // is not writable.
-  SmallString<32> TempDir;
-  sys::path::system_temp_directory(true, TempDir);
-  setenv("DEBUGINFOD_CACHE_PATH", TempDir.c_str(),
+  SmallString<32> CacheDir;
+  ASSERT_NO_ERROR(
+      sys::fs::createUniqueDirectory("debuginfod-unittest", CacheDir));
+  sys::path::append(CacheDir, "cachedir");
+  ASSERT_FALSE(sys::fs::exists(CacheDir));
+  setenv("DEBUGINFOD_CACHE_PATH", CacheDir.c_str(),
          /*replace=*/1);
   // Ensure there are no urls to guarantee a cache miss.
   setenv("DEBUGINFOD_URLS", "", /*replace=*/1);
@@ -52,4 +64,6 @@ TEST(DebuginfodClient, CacheMiss) {
   Expected<std::string> PathOrErr = getCachedOrDownloadArtifact(
       /*UniqueKey=*/"nonexistent-key", /*UrlPath=*/"/null");
   EXPECT_THAT_EXPECTED(PathOrErr, Failed<StringError>());
+  // A cache miss with no possible URLs should not create the cache directory.
+  EXPECT_FALSE(sys::fs::exists(CacheDir));
 }

From 94e69fbb4f3a9719d4d8cc7268dd5db5d0be7e8f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 20 Jan 2022 11:32:26 -0800
Subject: [PATCH 059/946] [RISCV] Add DAG combine to fold (fp_to_int_sat
 (ffloor X)) -> (select X == nan, 0, (fcvt X, rdn))

Similar for ceil, trunc, round, and roundeven. This allows us to use
static rounding modes to avoid a libcall.

This is similar to D116771, but for the saturating conversions.

This optimization is done for AArch64 as isel patterns.
RISCV doesn't have instructions for ceil/floor/trunc/round/roundeven
so the operations don't stick around until isel to enable a pattern
match. Thus I've implemented a DAG combine.

I'm only handling saturating to i64 or i32. This could be extended
to other sizes in the future.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D116864
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  87 +-
 .../CodeGen/RISCV/double-round-conv-sat.ll    | 940 +++++++++++++++++
 .../CodeGen/RISCV/float-round-conv-sat.ll     | 940 +++++++++++++++++
 .../test/CodeGen/RISCV/half-round-conv-sat.ll | 970 ++++++++++++++++++
 4 files changed, 2927 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
 create mode 100644 llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
 create mode 100644 llvm/test/CodeGen/RISCV/half-round-conv-sat.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 304c05d9378f2..f942f395d5328 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1051,6 +1051,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setTargetDAGCombine(ISD::ZERO_EXTEND);
     setTargetDAGCombine(ISD::FP_TO_SINT);
     setTargetDAGCombine(ISD::FP_TO_UINT);
+    setTargetDAGCombine(ISD::FP_TO_SINT_SAT);
+    setTargetDAGCombine(ISD::FP_TO_UINT_SAT);
   }
   if (Subtarget.hasVInstructions()) {
     setTargetDAGCombine(ISD::FCOPYSIGN);
@@ -7180,13 +7182,24 @@ static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1,
   return DAG.getNode(WMulOpc, DL, VT, Op0, Op1, Mask, VL);
 }
 
+static RISCVFPRndMode::RoundingMode matchRoundingOp(SDValue Op) {
+  switch (Op.getOpcode()) {
+  case ISD::FROUNDEVEN: return RISCVFPRndMode::RNE;
+  case ISD::FTRUNC:     return RISCVFPRndMode::RTZ;
+  case ISD::FFLOOR:     return RISCVFPRndMode::RDN;
+  case ISD::FCEIL:      return RISCVFPRndMode::RUP;
+  case ISD::FROUND:     return RISCVFPRndMode::RMM;
+  }
+
+  return RISCVFPRndMode::Invalid;
+}
+
 // Fold
 //   (fp_to_int (froundeven X)) -> fcvt X, rne
 //   (fp_to_int (ftrunc X))     -> fcvt X, rtz
 //   (fp_to_int (ffloor X))     -> fcvt X, rdn
 //   (fp_to_int (fceil X))      -> fcvt X, rup
 //   (fp_to_int (fround X))     -> fcvt X, rmm
-// FIXME: We should also do this for fp_to_int_sat.
 static SDValue performFP_TO_INTCombine(SDNode *N,
                                        TargetLowering::DAGCombinerInfo &DCI,
                                        const RISCVSubtarget &Subtarget) {
@@ -7210,16 +7223,9 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
   if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
     return SDValue();
 
-  RISCVFPRndMode::RoundingMode FRM;
-  switch (Src->getOpcode()) {
-  default:
+  RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
+  if (FRM == RISCVFPRndMode::Invalid)
     return SDValue();
-  case ISD::FROUNDEVEN: FRM = RISCVFPRndMode::RNE; break;
-  case ISD::FTRUNC:     FRM = RISCVFPRndMode::RTZ; break;
-  case ISD::FFLOOR:     FRM = RISCVFPRndMode::RDN; break;
-  case ISD::FCEIL:      FRM = RISCVFPRndMode::RUP; break;
-  case ISD::FROUND:     FRM = RISCVFPRndMode::RMM; break;
-  }
 
   bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
 
@@ -7235,6 +7241,64 @@ static SDValue performFP_TO_INTCombine(SDNode *N,
   return DAG.getNode(ISD::TRUNCATE, DL, VT, FpToInt);
 }
 
+// Fold
+//   (fp_to_int_sat (froundeven X)) -> (select X == nan, 0, (fcvt X, rne))
+//   (fp_to_int_sat (ftrunc X))     -> (select X == nan, 0, (fcvt X, rtz))
+//   (fp_to_int_sat (ffloor X))     -> (select X == nan, 0, (fcvt X, rdn))
+//   (fp_to_int_sat (fceil X))      -> (select X == nan, 0, (fcvt X, rup))
+//   (fp_to_int_sat (fround X))     -> (select X == nan, 0, (fcvt X, rmm))
+static SDValue performFP_TO_INT_SATCombine(SDNode *N,
+                                       TargetLowering::DAGCombinerInfo &DCI,
+                                       const RISCVSubtarget &Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MVT XLenVT = Subtarget.getXLenVT();
+
+  // Only handle XLen types. Other types narrower than XLen will eventually be
+  // legalized to XLenVT.
+  EVT DstVT = N->getValueType(0);
+  if (DstVT != XLenVT)
+    return SDValue();
+
+  SDValue Src = N->getOperand(0);
+
+  // Ensure the FP type is also legal.
+  if (!TLI.isTypeLegal(Src.getValueType()))
+    return SDValue();
+
+  // Don't do this for f16 with Zfhmin and not Zfh.
+  if (Src.getValueType() == MVT::f16 && !Subtarget.hasStdExtZfh())
+    return SDValue();
+
+  EVT SatVT = cast<VTSDNode>(N->getOperand(1))->getVT();
+
+  RISCVFPRndMode::RoundingMode FRM = matchRoundingOp(Src);
+  if (FRM == RISCVFPRndMode::Invalid)
+    return SDValue();
+
+  bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT_SAT;
+
+  unsigned Opc;
+  if (SatVT == DstVT)
+    Opc = IsSigned ? RISCVISD::FCVT_X : RISCVISD::FCVT_XU;
+  else if (DstVT == MVT::i64 && SatVT == MVT::i32)
+    Opc = IsSigned ? RISCVISD::FCVT_W_RV64 : RISCVISD::FCVT_WU_RV64;
+  else
+    return SDValue();
+  // FIXME: Support other SatVTs by clamping before or after the conversion.
+
+  Src = Src.getOperand(0);
+
+  SDLoc DL(N);
+  SDValue FpToInt = DAG.getNode(Opc, DL, XLenVT, Src,
+                                DAG.getTargetConstant(FRM, DL, XLenVT));
+
+  // RISCV FP-to-int conversions saturate to the destination register size, but
+  // don't produce 0 for nan.
+  SDValue ZeroInt = DAG.getConstant(0, DL, DstVT);
+  return DAG.getSelectCC(DL, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO);
+}
+
 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                                                DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -7548,6 +7612,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
     return performFP_TO_INTCombine(N, DCI, Subtarget);
+  case ISD::FP_TO_SINT_SAT:
+  case ISD::FP_TO_UINT_SAT:
+    return performFP_TO_INT_SATCombine(N, DCI, Subtarget);
   case ISD::FCOPYSIGN: {
     EVT VT = N->getValueType(0);
     if (!VT.isVector())
diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
new file mode 100644
index 0000000000000..38d82f6e46ff2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll
@@ -0,0 +1,940 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \
+; RUN:   -target-abi=ilp32d | FileCheck -check-prefix=RV32IFD %s
+; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \
+; RUN:   -target-abi=lp64d | FileCheck -check-prefix=RV64IFD %s
+
+define signext i32 @test_floor_si32(double %x) {
+; RV32IFD-LABEL: test_floor_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB0_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB0_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rdn
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_floor_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB0_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB0_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rdn
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.floor.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_floor_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call floor@plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI1_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI1_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi@plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB1_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB1_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI1_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI1_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB1_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB1_10
+; RV32IFD-NEXT:  .LBB1_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB1_11
+; RV32IFD-NEXT:  .LBB1_5:
+; RV32IFD-NEXT:    bnez a3, .LBB1_12
+; RV32IFD-NEXT:  .LBB1_6:
+; RV32IFD-NEXT:    bnez a2, .LBB1_8
+; RV32IFD-NEXT:  .LBB1_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB1_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB1_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB1_4
+; RV32IFD-NEXT:  .LBB1_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB1_5
+; RV32IFD-NEXT:  .LBB1_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB1_6
+; RV32IFD-NEXT:  .LBB1_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB1_7
+; RV32IFD-NEXT:    j .LBB1_8
+;
+; RV64IFD-LABEL: test_floor_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB1_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB1_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rdn
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.floor.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_floor_ui32(double %x) {
+; RV32IFD-LABEL: test_floor_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB2_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB2_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rdn
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_floor_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB2_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB2_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rdn
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.floor.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_floor_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call floor@plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi@plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB3_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB3_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI3_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI3_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB3_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB3_8
+; RV32IFD-NEXT:  .LBB3_4:
+; RV32IFD-NEXT:    bnez a4, .LBB3_6
+; RV32IFD-NEXT:  .LBB3_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB3_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB3_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB3_4
+; RV32IFD-NEXT:  .LBB3_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB3_5
+; RV32IFD-NEXT:    j .LBB3_6
+;
+; RV64IFD-LABEL: test_floor_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB3_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB3_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rdn
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.floor.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_si32(double %x) {
+; RV32IFD-LABEL: test_ceil_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB4_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB4_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rup
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_ceil_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB4_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB4_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rup
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.ceil.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_ceil_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call ceil@plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI5_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI5_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi@plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB5_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB5_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI5_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI5_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB5_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB5_10
+; RV32IFD-NEXT:  .LBB5_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB5_11
+; RV32IFD-NEXT:  .LBB5_5:
+; RV32IFD-NEXT:    bnez a3, .LBB5_12
+; RV32IFD-NEXT:  .LBB5_6:
+; RV32IFD-NEXT:    bnez a2, .LBB5_8
+; RV32IFD-NEXT:  .LBB5_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB5_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB5_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB5_4
+; RV32IFD-NEXT:  .LBB5_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB5_5
+; RV32IFD-NEXT:  .LBB5_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB5_6
+; RV32IFD-NEXT:  .LBB5_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB5_7
+; RV32IFD-NEXT:    j .LBB5_8
+;
+; RV64IFD-LABEL: test_ceil_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB5_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB5_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rup
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.ceil.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_ui32(double %x) {
+; RV32IFD-LABEL: test_ceil_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB6_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB6_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rup
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_ceil_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB6_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB6_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rup
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.ceil.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_ceil_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call ceil@plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi@plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB7_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB7_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI7_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI7_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB7_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB7_8
+; RV32IFD-NEXT:  .LBB7_4:
+; RV32IFD-NEXT:    bnez a4, .LBB7_6
+; RV32IFD-NEXT:  .LBB7_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB7_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB7_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB7_4
+; RV32IFD-NEXT:  .LBB7_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB7_5
+; RV32IFD-NEXT:    j .LBB7_6
+;
+; RV64IFD-LABEL: test_ceil_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB7_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB7_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rup
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.ceil.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_si32(double %x) {
+; RV32IFD-LABEL: test_trunc_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB8_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB8_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rtz
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_trunc_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB8_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB8_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rtz
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.trunc.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_trunc_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call trunc@plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI9_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI9_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi@plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB9_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB9_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI9_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI9_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB9_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB9_10
+; RV32IFD-NEXT:  .LBB9_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB9_11
+; RV32IFD-NEXT:  .LBB9_5:
+; RV32IFD-NEXT:    bnez a3, .LBB9_12
+; RV32IFD-NEXT:  .LBB9_6:
+; RV32IFD-NEXT:    bnez a2, .LBB9_8
+; RV32IFD-NEXT:  .LBB9_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB9_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB9_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB9_4
+; RV32IFD-NEXT:  .LBB9_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB9_5
+; RV32IFD-NEXT:  .LBB9_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB9_6
+; RV32IFD-NEXT:  .LBB9_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB9_7
+; RV32IFD-NEXT:    j .LBB9_8
+;
+; RV64IFD-LABEL: test_trunc_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB9_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB9_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rtz
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.trunc.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_ui32(double %x) {
+; RV32IFD-LABEL: test_trunc_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB10_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB10_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_trunc_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB10_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB10_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rtz
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.trunc.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_trunc_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call trunc@plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi@plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB11_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB11_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI11_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB11_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB11_8
+; RV32IFD-NEXT:  .LBB11_4:
+; RV32IFD-NEXT:    bnez a4, .LBB11_6
+; RV32IFD-NEXT:  .LBB11_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB11_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB11_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB11_4
+; RV32IFD-NEXT:  .LBB11_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB11_5
+; RV32IFD-NEXT:    j .LBB11_6
+;
+; RV64IFD-LABEL: test_trunc_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB11_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB11_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rtz
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.trunc.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_si32(double %x) {
+; RV32IFD-LABEL: test_round_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB12_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB12_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rmm
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_round_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB12_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB12_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rmm
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.round.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_round_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_round_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call round@plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI13_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi@plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB13_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB13_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI13_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI13_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB13_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB13_10
+; RV32IFD-NEXT:  .LBB13_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB13_11
+; RV32IFD-NEXT:  .LBB13_5:
+; RV32IFD-NEXT:    bnez a3, .LBB13_12
+; RV32IFD-NEXT:  .LBB13_6:
+; RV32IFD-NEXT:    bnez a2, .LBB13_8
+; RV32IFD-NEXT:  .LBB13_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB13_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB13_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB13_4
+; RV32IFD-NEXT:  .LBB13_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB13_5
+; RV32IFD-NEXT:  .LBB13_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB13_6
+; RV32IFD-NEXT:  .LBB13_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB13_7
+; RV32IFD-NEXT:    j .LBB13_8
+;
+; RV64IFD-LABEL: test_round_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB13_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB13_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rmm
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.round.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_ui32(double %x) {
+; RV32IFD-LABEL: test_round_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB14_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB14_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rmm
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_round_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB14_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB14_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rmm
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.round.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_round_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_round_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call round@plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi@plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB15_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB15_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI15_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB15_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB15_8
+; RV32IFD-NEXT:  .LBB15_4:
+; RV32IFD-NEXT:    bnez a4, .LBB15_6
+; RV32IFD-NEXT:  .LBB15_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB15_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB15_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB15_4
+; RV32IFD-NEXT:  .LBB15_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB15_5
+; RV32IFD-NEXT:    j .LBB15_6
+;
+; RV64IFD-LABEL: test_round_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB15_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB15_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rmm
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.round.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_si32(double %x) {
+; RV32IFD-LABEL: test_roundeven_si32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB16_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB16_2:
+; RV32IFD-NEXT:    fcvt.w.d a0, fa0, rne
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_roundeven_si32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB16_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB16_2:
+; RV64IFD-NEXT:    fcvt.w.d a0, fa0, rne
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.roundeven.f64(double %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_si64(double %x) nounwind {
+; RV32IFD-LABEL: test_roundeven_si64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call roundeven@plt
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI17_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI17_0)(a0)
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixdfdi@plt
+; RV32IFD-NEXT:    mv a2, a0
+; RV32IFD-NEXT:    bnez s0, .LBB17_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a2, 0
+; RV32IFD-NEXT:  .LBB17_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI17_1)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI17_1)(a0)
+; RV32IFD-NEXT:    flt.d a3, ft0, fs0
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a3, .LBB17_9
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    beqz a2, .LBB17_10
+; RV32IFD-NEXT:  .LBB17_4:
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    beqz s0, .LBB17_11
+; RV32IFD-NEXT:  .LBB17_5:
+; RV32IFD-NEXT:    bnez a3, .LBB17_12
+; RV32IFD-NEXT:  .LBB17_6:
+; RV32IFD-NEXT:    bnez a2, .LBB17_8
+; RV32IFD-NEXT:  .LBB17_7:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:  .LBB17_8:
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB17_9:
+; RV32IFD-NEXT:    mv a0, a2
+; RV32IFD-NEXT:    feq.d a2, fs0, fs0
+; RV32IFD-NEXT:    bnez a2, .LBB17_4
+; RV32IFD-NEXT:  .LBB17_10:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    lui a4, 524288
+; RV32IFD-NEXT:    bnez s0, .LBB17_5
+; RV32IFD-NEXT:  .LBB17_11:
+; RV32IFD-NEXT:    lui a1, 524288
+; RV32IFD-NEXT:    beqz a3, .LBB17_6
+; RV32IFD-NEXT:  .LBB17_12:
+; RV32IFD-NEXT:    addi a1, a4, -1
+; RV32IFD-NEXT:    beqz a2, .LBB17_7
+; RV32IFD-NEXT:    j .LBB17_8
+;
+; RV64IFD-LABEL: test_roundeven_si64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB17_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB17_2:
+; RV64IFD-NEXT:    fcvt.l.d a0, fa0, rne
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.roundeven.f64(double %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_ui32(double %x) {
+; RV32IFD-LABEL: test_roundeven_ui32:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    feq.d a0, fa0, fa0
+; RV32IFD-NEXT:    bnez a0, .LBB18_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB18_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, fa0, rne
+; RV32IFD-NEXT:    ret
+;
+; RV64IFD-LABEL: test_roundeven_ui32:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB18_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB18_2:
+; RV64IFD-NEXT:    fcvt.wu.d a0, fa0, rne
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.roundeven.f64(double %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f64(double %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_ui64(double %x) nounwind {
+; RV32IFD-LABEL: test_roundeven_ui64:
+; RV32IFD:       # %bb.0:
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IFD-NEXT:    fsd fs0, 0(sp) # 8-byte Folded Spill
+; RV32IFD-NEXT:    call roundeven@plt
+; RV32IFD-NEXT:    fmv.d fs0, fa0
+; RV32IFD-NEXT:    fcvt.d.w ft0, zero
+; RV32IFD-NEXT:    fle.d s0, ft0, fa0
+; RV32IFD-NEXT:    call __fixunsdfdi@plt
+; RV32IFD-NEXT:    mv a3, a0
+; RV32IFD-NEXT:    bnez s0, .LBB19_2
+; RV32IFD-NEXT:  # %bb.1:
+; RV32IFD-NEXT:    li a3, 0
+; RV32IFD-NEXT:  .LBB19_2:
+; RV32IFD-NEXT:    lui a0, %hi(.LCPI19_0)
+; RV32IFD-NEXT:    fld ft0, %lo(.LCPI19_0)(a0)
+; RV32IFD-NEXT:    flt.d a4, ft0, fs0
+; RV32IFD-NEXT:    li a2, -1
+; RV32IFD-NEXT:    li a0, -1
+; RV32IFD-NEXT:    beqz a4, .LBB19_7
+; RV32IFD-NEXT:  # %bb.3:
+; RV32IFD-NEXT:    beqz s0, .LBB19_8
+; RV32IFD-NEXT:  .LBB19_4:
+; RV32IFD-NEXT:    bnez a4, .LBB19_6
+; RV32IFD-NEXT:  .LBB19_5:
+; RV32IFD-NEXT:    mv a2, a1
+; RV32IFD-NEXT:  .LBB19_6:
+; RV32IFD-NEXT:    mv a1, a2
+; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB19_7:
+; RV32IFD-NEXT:    mv a0, a3
+; RV32IFD-NEXT:    bnez s0, .LBB19_4
+; RV32IFD-NEXT:  .LBB19_8:
+; RV32IFD-NEXT:    li a1, 0
+; RV32IFD-NEXT:    beqz a4, .LBB19_5
+; RV32IFD-NEXT:    j .LBB19_6
+;
+; RV64IFD-LABEL: test_roundeven_ui64:
+; RV64IFD:       # %bb.0:
+; RV64IFD-NEXT:    feq.d a0, fa0, fa0
+; RV64IFD-NEXT:    bnez a0, .LBB19_2
+; RV64IFD-NEXT:  # %bb.1:
+; RV64IFD-NEXT:    li a0, 0
+; RV64IFD-NEXT:    ret
+; RV64IFD-NEXT:  .LBB19_2:
+; RV64IFD-NEXT:    fcvt.lu.d a0, fa0, rne
+; RV64IFD-NEXT:    ret
+  %a = call double @llvm.roundeven.f64(double %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f64(double %a)
+  ret i64 %b
+}
+
+declare double @llvm.floor.f64(double)
+declare double @llvm.ceil.f64(double)
+declare double @llvm.trunc.f64(double)
+declare double @llvm.round.f64(double)
+declare double @llvm.roundeven.f64(double)
+declare i32 @llvm.fptosi.sat.i32.f64(double)
+declare i64 @llvm.fptosi.sat.i64.f64(double)
+declare i32 @llvm.fptoui.sat.i32.f64(double)
+declare i64 @llvm.fptoui.sat.i64.f64(double)
diff --git a/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
new file mode 100644
index 0000000000000..9893b697af294
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/float-round-conv-sat.ll
@@ -0,0 +1,940 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
+; RUN:   -target-abi=ilp32f | FileCheck -check-prefix=RV32IF %s
+; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
+; RUN:   -target-abi=lp64f | FileCheck -check-prefix=RV64IF %s
+
+define signext i32 @test_floor_si32(float %x) {
+; RV32IF-LABEL: test_floor_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB0_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB0_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rdn
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_floor_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB0_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB0_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rdn
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.floor.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_si64(float %x) nounwind {
+; RV32IF-LABEL: test_floor_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call floorf@plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI1_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI1_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi@plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB1_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB1_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI1_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI1_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB1_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB1_10
+; RV32IF-NEXT:  .LBB1_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB1_11
+; RV32IF-NEXT:  .LBB1_5:
+; RV32IF-NEXT:    bnez a3, .LBB1_12
+; RV32IF-NEXT:  .LBB1_6:
+; RV32IF-NEXT:    bnez a2, .LBB1_8
+; RV32IF-NEXT:  .LBB1_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB1_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB1_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB1_4
+; RV32IF-NEXT:  .LBB1_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB1_5
+; RV32IF-NEXT:  .LBB1_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB1_6
+; RV32IF-NEXT:  .LBB1_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB1_7
+; RV32IF-NEXT:    j .LBB1_8
+;
+; RV64IF-LABEL: test_floor_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB1_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB1_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rdn
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.floor.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_floor_ui32(float %x) {
+; RV32IF-LABEL: test_floor_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB2_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB2_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rdn
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_floor_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB2_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB2_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rdn
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.floor.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_floor_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call floorf@plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi@plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB3_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB3_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI3_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI3_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB3_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB3_8
+; RV32IF-NEXT:  .LBB3_4:
+; RV32IF-NEXT:    bnez a4, .LBB3_6
+; RV32IF-NEXT:  .LBB3_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB3_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB3_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB3_4
+; RV32IF-NEXT:  .LBB3_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB3_5
+; RV32IF-NEXT:    j .LBB3_6
+;
+; RV64IF-LABEL: test_floor_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB3_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB3_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rdn
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.floor.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_si32(float %x) {
+; RV32IF-LABEL: test_ceil_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB4_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB4_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rup
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_ceil_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB4_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB4_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rup
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.ceil.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_si64(float %x) nounwind {
+; RV32IF-LABEL: test_ceil_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call ceilf@plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI5_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI5_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi@plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB5_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB5_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI5_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI5_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB5_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB5_10
+; RV32IF-NEXT:  .LBB5_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB5_11
+; RV32IF-NEXT:  .LBB5_5:
+; RV32IF-NEXT:    bnez a3, .LBB5_12
+; RV32IF-NEXT:  .LBB5_6:
+; RV32IF-NEXT:    bnez a2, .LBB5_8
+; RV32IF-NEXT:  .LBB5_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB5_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB5_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB5_4
+; RV32IF-NEXT:  .LBB5_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB5_5
+; RV32IF-NEXT:  .LBB5_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB5_6
+; RV32IF-NEXT:  .LBB5_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB5_7
+; RV32IF-NEXT:    j .LBB5_8
+;
+; RV64IF-LABEL: test_ceil_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB5_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB5_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rup
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.ceil.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_ui32(float %x) {
+; RV32IF-LABEL: test_ceil_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB6_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB6_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rup
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_ceil_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB6_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB6_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rup
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.ceil.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_ceil_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call ceilf@plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi@plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB7_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB7_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI7_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI7_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB7_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB7_8
+; RV32IF-NEXT:  .LBB7_4:
+; RV32IF-NEXT:    bnez a4, .LBB7_6
+; RV32IF-NEXT:  .LBB7_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB7_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB7_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB7_4
+; RV32IF-NEXT:  .LBB7_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB7_5
+; RV32IF-NEXT:    j .LBB7_6
+;
+; RV64IF-LABEL: test_ceil_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB7_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB7_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rup
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.ceil.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_si32(float %x) {
+; RV32IF-LABEL: test_trunc_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB8_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB8_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rtz
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_trunc_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB8_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB8_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rtz
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.trunc.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_si64(float %x) nounwind {
+; RV32IF-LABEL: test_trunc_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call truncf@plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI9_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI9_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi@plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB9_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB9_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI9_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI9_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB9_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB9_10
+; RV32IF-NEXT:  .LBB9_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB9_11
+; RV32IF-NEXT:  .LBB9_5:
+; RV32IF-NEXT:    bnez a3, .LBB9_12
+; RV32IF-NEXT:  .LBB9_6:
+; RV32IF-NEXT:    bnez a2, .LBB9_8
+; RV32IF-NEXT:  .LBB9_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB9_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB9_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB9_4
+; RV32IF-NEXT:  .LBB9_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB9_5
+; RV32IF-NEXT:  .LBB9_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB9_6
+; RV32IF-NEXT:  .LBB9_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB9_7
+; RV32IF-NEXT:    j .LBB9_8
+;
+; RV64IF-LABEL: test_trunc_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB9_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB9_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rtz
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.trunc.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_ui32(float %x) {
+; RV32IF-LABEL: test_trunc_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB10_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB10_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rtz
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_trunc_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB10_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB10_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rtz
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.trunc.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_trunc_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call truncf@plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi@plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB11_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB11_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI11_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB11_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB11_8
+; RV32IF-NEXT:  .LBB11_4:
+; RV32IF-NEXT:    bnez a4, .LBB11_6
+; RV32IF-NEXT:  .LBB11_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB11_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB11_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB11_4
+; RV32IF-NEXT:  .LBB11_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB11_5
+; RV32IF-NEXT:    j .LBB11_6
+;
+; RV64IF-LABEL: test_trunc_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB11_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB11_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rtz
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.trunc.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_si32(float %x) {
+; RV32IF-LABEL: test_round_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB12_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB12_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rmm
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_round_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB12_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB12_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rmm
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.round.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_round_si64(float %x) nounwind {
+; RV32IF-LABEL: test_round_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call roundf@plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI13_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi@plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB13_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB13_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI13_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI13_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB13_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB13_10
+; RV32IF-NEXT:  .LBB13_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB13_11
+; RV32IF-NEXT:  .LBB13_5:
+; RV32IF-NEXT:    bnez a3, .LBB13_12
+; RV32IF-NEXT:  .LBB13_6:
+; RV32IF-NEXT:    bnez a2, .LBB13_8
+; RV32IF-NEXT:  .LBB13_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB13_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB13_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB13_4
+; RV32IF-NEXT:  .LBB13_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB13_5
+; RV32IF-NEXT:  .LBB13_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB13_6
+; RV32IF-NEXT:  .LBB13_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB13_7
+; RV32IF-NEXT:    j .LBB13_8
+;
+; RV64IF-LABEL: test_round_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB13_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB13_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rmm
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.round.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_ui32(float %x) {
+; RV32IF-LABEL: test_round_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB14_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB14_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rmm
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_round_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB14_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB14_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rmm
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.round.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_round_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_round_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call roundf@plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi@plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB15_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB15_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI15_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB15_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB15_8
+; RV32IF-NEXT:  .LBB15_4:
+; RV32IF-NEXT:    bnez a4, .LBB15_6
+; RV32IF-NEXT:  .LBB15_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB15_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB15_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB15_4
+; RV32IF-NEXT:  .LBB15_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB15_5
+; RV32IF-NEXT:    j .LBB15_6
+;
+; RV64IF-LABEL: test_round_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB15_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB15_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rmm
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.round.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_si32(float %x) {
+; RV32IF-LABEL: test_roundeven_si32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB16_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB16_2:
+; RV32IF-NEXT:    fcvt.w.s a0, fa0, rne
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_roundeven_si32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB16_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB16_2:
+; RV64IF-NEXT:    fcvt.w.s a0, fa0, rne
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.roundeven.f32(float %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_si64(float %x) nounwind {
+; RV32IF-LABEL: test_roundeven_si64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call roundevenf@plt
+; RV32IF-NEXT:    lui a0, %hi(.LCPI17_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI17_0)(a0)
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixsfdi@plt
+; RV32IF-NEXT:    mv a2, a0
+; RV32IF-NEXT:    bnez s0, .LBB17_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a2, 0
+; RV32IF-NEXT:  .LBB17_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI17_1)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI17_1)(a0)
+; RV32IF-NEXT:    flt.s a3, ft0, fs0
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a3, .LBB17_9
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    beqz a2, .LBB17_10
+; RV32IF-NEXT:  .LBB17_4:
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    beqz s0, .LBB17_11
+; RV32IF-NEXT:  .LBB17_5:
+; RV32IF-NEXT:    bnez a3, .LBB17_12
+; RV32IF-NEXT:  .LBB17_6:
+; RV32IF-NEXT:    bnez a2, .LBB17_8
+; RV32IF-NEXT:  .LBB17_7:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:  .LBB17_8:
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB17_9:
+; RV32IF-NEXT:    mv a0, a2
+; RV32IF-NEXT:    feq.s a2, fs0, fs0
+; RV32IF-NEXT:    bnez a2, .LBB17_4
+; RV32IF-NEXT:  .LBB17_10:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    lui a4, 524288
+; RV32IF-NEXT:    bnez s0, .LBB17_5
+; RV32IF-NEXT:  .LBB17_11:
+; RV32IF-NEXT:    lui a1, 524288
+; RV32IF-NEXT:    beqz a3, .LBB17_6
+; RV32IF-NEXT:  .LBB17_12:
+; RV32IF-NEXT:    addi a1, a4, -1
+; RV32IF-NEXT:    beqz a2, .LBB17_7
+; RV32IF-NEXT:    j .LBB17_8
+;
+; RV64IF-LABEL: test_roundeven_si64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB17_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB17_2:
+; RV64IF-NEXT:    fcvt.l.s a0, fa0, rne
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.roundeven.f32(float %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_ui32(float %x) {
+; RV32IF-LABEL: test_roundeven_ui32:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    feq.s a0, fa0, fa0
+; RV32IF-NEXT:    bnez a0, .LBB18_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a0, 0
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB18_2:
+; RV32IF-NEXT:    fcvt.wu.s a0, fa0, rne
+; RV32IF-NEXT:    ret
+;
+; RV64IF-LABEL: test_roundeven_ui32:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB18_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB18_2:
+; RV64IF-NEXT:    fcvt.wu.s a0, fa0, rne
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.roundeven.f32(float %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f32(float %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_ui64(float %x) nounwind {
+; RV32IF-LABEL: test_roundeven_ui64:
+; RV32IF:       # %bb.0:
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    call roundevenf@plt
+; RV32IF-NEXT:    fmv.s fs0, fa0
+; RV32IF-NEXT:    fmv.w.x ft0, zero
+; RV32IF-NEXT:    fle.s s0, ft0, fa0
+; RV32IF-NEXT:    call __fixunssfdi@plt
+; RV32IF-NEXT:    mv a3, a0
+; RV32IF-NEXT:    bnez s0, .LBB19_2
+; RV32IF-NEXT:  # %bb.1:
+; RV32IF-NEXT:    li a3, 0
+; RV32IF-NEXT:  .LBB19_2:
+; RV32IF-NEXT:    lui a0, %hi(.LCPI19_0)
+; RV32IF-NEXT:    flw ft0, %lo(.LCPI19_0)(a0)
+; RV32IF-NEXT:    flt.s a4, ft0, fs0
+; RV32IF-NEXT:    li a2, -1
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:    beqz a4, .LBB19_7
+; RV32IF-NEXT:  # %bb.3:
+; RV32IF-NEXT:    beqz s0, .LBB19_8
+; RV32IF-NEXT:  .LBB19_4:
+; RV32IF-NEXT:    bnez a4, .LBB19_6
+; RV32IF-NEXT:  .LBB19_5:
+; RV32IF-NEXT:    mv a2, a1
+; RV32IF-NEXT:  .LBB19_6:
+; RV32IF-NEXT:    mv a1, a2
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
+; RV32IF-NEXT:  .LBB19_7:
+; RV32IF-NEXT:    mv a0, a3
+; RV32IF-NEXT:    bnez s0, .LBB19_4
+; RV32IF-NEXT:  .LBB19_8:
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a4, .LBB19_5
+; RV32IF-NEXT:    j .LBB19_6
+;
+; RV64IF-LABEL: test_roundeven_ui64:
+; RV64IF:       # %bb.0:
+; RV64IF-NEXT:    feq.s a0, fa0, fa0
+; RV64IF-NEXT:    bnez a0, .LBB19_2
+; RV64IF-NEXT:  # %bb.1:
+; RV64IF-NEXT:    li a0, 0
+; RV64IF-NEXT:    ret
+; RV64IF-NEXT:  .LBB19_2:
+; RV64IF-NEXT:    fcvt.lu.s a0, fa0, rne
+; RV64IF-NEXT:    ret
+  %a = call float @llvm.roundeven.f32(float %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f32(float %a)
+  ret i64 %b
+}
+
+declare float @llvm.floor.f32(float)
+declare float @llvm.ceil.f32(float)
+declare float @llvm.trunc.f32(float)
+declare float @llvm.round.f32(float)
+declare float @llvm.roundeven.f32(float)
+declare i32 @llvm.fptosi.sat.i32.f32(float)
+declare i64 @llvm.fptosi.sat.i64.f32(float)
+declare i32 @llvm.fptoui.sat.i32.f32(float)
+declare i64 @llvm.fptoui.sat.i64.f32(float)
diff --git a/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
new file mode 100644
index 0000000000000..7b3104c69bef6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/half-round-conv-sat.ll
@@ -0,0 +1,970 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+zfh -verify-machineinstrs < %s \
+; RUN:   -target-abi=ilp32f | FileCheck -check-prefix=RV32IZFH %s
+; RUN: llc -mtriple=riscv64 -mattr=+zfh -verify-machineinstrs < %s \
+; RUN:   -target-abi=lp64f | FileCheck -check-prefix=RV64IZFH %s
+
+define signext i32 @test_floor_si32(half %x) {
+; RV32IZFH-LABEL: test_floor_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB0_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB0_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rdn
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_floor_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB0_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB0_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rdn
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.floor.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_floor_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call floorf@plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI1_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI1_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi@plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB1_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB1_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI1_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI1_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB1_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB1_10
+; RV32IZFH-NEXT:  .LBB1_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB1_11
+; RV32IZFH-NEXT:  .LBB1_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB1_12
+; RV32IZFH-NEXT:  .LBB1_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB1_8
+; RV32IZFH-NEXT:  .LBB1_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB1_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB1_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB1_4
+; RV32IZFH-NEXT:  .LBB1_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB1_5
+; RV32IZFH-NEXT:  .LBB1_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB1_6
+; RV32IZFH-NEXT:  .LBB1_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB1_7
+; RV32IZFH-NEXT:    j .LBB1_8
+;
+; RV64IZFH-LABEL: test_floor_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB1_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB1_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rdn
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.floor.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_floor_ui32(half %x) {
+; RV32IZFH-LABEL: test_floor_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB2_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB2_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rdn
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_floor_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB2_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB2_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rdn
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.floor.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_floor_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_floor_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call floorf@plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi@plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB3_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB3_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI3_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI3_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB3_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB3_8
+; RV32IZFH-NEXT:  .LBB3_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB3_6
+; RV32IZFH-NEXT:  .LBB3_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB3_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB3_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB3_4
+; RV32IZFH-NEXT:  .LBB3_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB3_5
+; RV32IZFH-NEXT:    j .LBB3_6
+;
+; RV64IZFH-LABEL: test_floor_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB3_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB3_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rdn
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.floor.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_si32(half %x) {
+; RV32IZFH-LABEL: test_ceil_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB4_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB4_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rup
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_ceil_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB4_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB4_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rup
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.ceil.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_ceil_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call ceilf@plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI5_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI5_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi@plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB5_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB5_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI5_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI5_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB5_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB5_10
+; RV32IZFH-NEXT:  .LBB5_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB5_11
+; RV32IZFH-NEXT:  .LBB5_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB5_12
+; RV32IZFH-NEXT:  .LBB5_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB5_8
+; RV32IZFH-NEXT:  .LBB5_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB5_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB5_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB5_4
+; RV32IZFH-NEXT:  .LBB5_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB5_5
+; RV32IZFH-NEXT:  .LBB5_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB5_6
+; RV32IZFH-NEXT:  .LBB5_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB5_7
+; RV32IZFH-NEXT:    j .LBB5_8
+;
+; RV64IZFH-LABEL: test_ceil_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB5_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB5_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rup
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.ceil.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_ceil_ui32(half %x) {
+; RV32IZFH-LABEL: test_ceil_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB6_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB6_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rup
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_ceil_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB6_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB6_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rup
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.ceil.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_ceil_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_ceil_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call ceilf@plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi@plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB7_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB7_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI7_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI7_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB7_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB7_8
+; RV32IZFH-NEXT:  .LBB7_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB7_6
+; RV32IZFH-NEXT:  .LBB7_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB7_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB7_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB7_4
+; RV32IZFH-NEXT:  .LBB7_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB7_5
+; RV32IZFH-NEXT:    j .LBB7_6
+;
+; RV64IZFH-LABEL: test_ceil_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB7_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB7_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rup
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.ceil.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_si32(half %x) {
+; RV32IZFH-LABEL: test_trunc_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB8_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB8_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rtz
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_trunc_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB8_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB8_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rtz
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.trunc.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_trunc_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call truncf@plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI9_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI9_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi@plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB9_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB9_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI9_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI9_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB9_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB9_10
+; RV32IZFH-NEXT:  .LBB9_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB9_11
+; RV32IZFH-NEXT:  .LBB9_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB9_12
+; RV32IZFH-NEXT:  .LBB9_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB9_8
+; RV32IZFH-NEXT:  .LBB9_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB9_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB9_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB9_4
+; RV32IZFH-NEXT:  .LBB9_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB9_5
+; RV32IZFH-NEXT:  .LBB9_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB9_6
+; RV32IZFH-NEXT:  .LBB9_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB9_7
+; RV32IZFH-NEXT:    j .LBB9_8
+;
+; RV64IZFH-LABEL: test_trunc_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB9_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB9_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rtz
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.trunc.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_trunc_ui32(half %x) {
+; RV32IZFH-LABEL: test_trunc_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB10_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB10_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rtz
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_trunc_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB10_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB10_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rtz
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.trunc.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_trunc_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_trunc_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call truncf@plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi@plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB11_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB11_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI11_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI11_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB11_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB11_8
+; RV32IZFH-NEXT:  .LBB11_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB11_6
+; RV32IZFH-NEXT:  .LBB11_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB11_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB11_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB11_4
+; RV32IZFH-NEXT:  .LBB11_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB11_5
+; RV32IZFH-NEXT:    j .LBB11_6
+;
+; RV64IZFH-LABEL: test_trunc_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB11_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB11_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rtz
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.trunc.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_si32(half %x) {
+; RV32IZFH-LABEL: test_round_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB12_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB12_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rmm
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_round_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB12_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB12_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rmm
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.round.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_round_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_round_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call roundf@plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI13_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI13_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi@plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB13_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB13_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI13_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI13_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB13_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB13_10
+; RV32IZFH-NEXT:  .LBB13_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB13_11
+; RV32IZFH-NEXT:  .LBB13_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB13_12
+; RV32IZFH-NEXT:  .LBB13_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB13_8
+; RV32IZFH-NEXT:  .LBB13_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB13_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB13_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB13_4
+; RV32IZFH-NEXT:  .LBB13_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB13_5
+; RV32IZFH-NEXT:  .LBB13_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB13_6
+; RV32IZFH-NEXT:  .LBB13_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB13_7
+; RV32IZFH-NEXT:    j .LBB13_8
+;
+; RV64IZFH-LABEL: test_round_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB13_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB13_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rmm
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.round.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_round_ui32(half %x) {
+; RV32IZFH-LABEL: test_round_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB14_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB14_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rmm
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_round_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB14_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB14_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rmm
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.round.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_round_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_round_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call roundf@plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi@plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB15_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB15_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI15_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB15_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB15_8
+; RV32IZFH-NEXT:  .LBB15_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB15_6
+; RV32IZFH-NEXT:  .LBB15_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB15_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB15_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB15_4
+; RV32IZFH-NEXT:  .LBB15_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB15_5
+; RV32IZFH-NEXT:    j .LBB15_6
+;
+; RV64IZFH-LABEL: test_round_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB15_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB15_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rmm
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.round.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_si32(half %x) {
+; RV32IZFH-LABEL: test_roundeven_si32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB16_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB16_2:
+; RV32IZFH-NEXT:    fcvt.w.h a0, fa0, rne
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_roundeven_si32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB16_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB16_2:
+; RV64IZFH-NEXT:    fcvt.w.h a0, fa0, rne
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.roundeven.f16(half %x)
+  %b = call i32 @llvm.fptosi.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_si64(half %x) nounwind {
+; RV32IZFH-LABEL: test_roundeven_si64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call roundevenf@plt
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI17_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI17_0)(a0)
+; RV32IZFH-NEXT:    fcvt.h.s ft1, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft1
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixsfdi@plt
+; RV32IZFH-NEXT:    mv a2, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB17_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a2, 0
+; RV32IZFH-NEXT:  .LBB17_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI17_1)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI17_1)(a0)
+; RV32IZFH-NEXT:    flt.s a3, ft0, fs0
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a3, .LBB17_9
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    beqz a2, .LBB17_10
+; RV32IZFH-NEXT:  .LBB17_4:
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    beqz s0, .LBB17_11
+; RV32IZFH-NEXT:  .LBB17_5:
+; RV32IZFH-NEXT:    bnez a3, .LBB17_12
+; RV32IZFH-NEXT:  .LBB17_6:
+; RV32IZFH-NEXT:    bnez a2, .LBB17_8
+; RV32IZFH-NEXT:  .LBB17_7:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:  .LBB17_8:
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB17_9:
+; RV32IZFH-NEXT:    mv a0, a2
+; RV32IZFH-NEXT:    feq.s a2, fs0, fs0
+; RV32IZFH-NEXT:    bnez a2, .LBB17_4
+; RV32IZFH-NEXT:  .LBB17_10:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    lui a4, 524288
+; RV32IZFH-NEXT:    bnez s0, .LBB17_5
+; RV32IZFH-NEXT:  .LBB17_11:
+; RV32IZFH-NEXT:    lui a1, 524288
+; RV32IZFH-NEXT:    beqz a3, .LBB17_6
+; RV32IZFH-NEXT:  .LBB17_12:
+; RV32IZFH-NEXT:    addi a1, a4, -1
+; RV32IZFH-NEXT:    beqz a2, .LBB17_7
+; RV32IZFH-NEXT:    j .LBB17_8
+;
+; RV64IZFH-LABEL: test_roundeven_si64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB17_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB17_2:
+; RV64IZFH-NEXT:    fcvt.l.h a0, fa0, rne
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.roundeven.f16(half %x)
+  %b = call i64 @llvm.fptosi.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+define signext i32 @test_roundeven_ui32(half %x) {
+; RV32IZFH-LABEL: test_roundeven_ui32:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV32IZFH-NEXT:    bnez a0, .LBB18_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a0, 0
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB18_2:
+; RV32IZFH-NEXT:    fcvt.wu.h a0, fa0, rne
+; RV32IZFH-NEXT:    ret
+;
+; RV64IZFH-LABEL: test_roundeven_ui32:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB18_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB18_2:
+; RV64IZFH-NEXT:    fcvt.wu.h a0, fa0, rne
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.roundeven.f16(half %x)
+  %b = call i32 @llvm.fptoui.sat.i32.f16(half %a)
+  ret i32 %b
+}
+
+define i64 @test_roundeven_ui64(half %x) nounwind {
+; RV32IZFH-LABEL: test_roundeven_ui64:
+; RV32IZFH:       # %bb.0:
+; RV32IZFH-NEXT:    addi sp, sp, -16
+; RV32IZFH-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fsw fs0, 4(sp) # 4-byte Folded Spill
+; RV32IZFH-NEXT:    fcvt.s.h fa0, fa0
+; RV32IZFH-NEXT:    call roundevenf@plt
+; RV32IZFH-NEXT:    fcvt.h.s ft0, fa0
+; RV32IZFH-NEXT:    fcvt.s.h fs0, ft0
+; RV32IZFH-NEXT:    fmv.w.x ft0, zero
+; RV32IZFH-NEXT:    fle.s s0, ft0, fs0
+; RV32IZFH-NEXT:    fmv.s fa0, fs0
+; RV32IZFH-NEXT:    call __fixunssfdi@plt
+; RV32IZFH-NEXT:    mv a3, a0
+; RV32IZFH-NEXT:    bnez s0, .LBB19_2
+; RV32IZFH-NEXT:  # %bb.1:
+; RV32IZFH-NEXT:    li a3, 0
+; RV32IZFH-NEXT:  .LBB19_2:
+; RV32IZFH-NEXT:    lui a0, %hi(.LCPI19_0)
+; RV32IZFH-NEXT:    flw ft0, %lo(.LCPI19_0)(a0)
+; RV32IZFH-NEXT:    flt.s a4, ft0, fs0
+; RV32IZFH-NEXT:    li a2, -1
+; RV32IZFH-NEXT:    li a0, -1
+; RV32IZFH-NEXT:    beqz a4, .LBB19_7
+; RV32IZFH-NEXT:  # %bb.3:
+; RV32IZFH-NEXT:    beqz s0, .LBB19_8
+; RV32IZFH-NEXT:  .LBB19_4:
+; RV32IZFH-NEXT:    bnez a4, .LBB19_6
+; RV32IZFH-NEXT:  .LBB19_5:
+; RV32IZFH-NEXT:    mv a2, a1
+; RV32IZFH-NEXT:  .LBB19_6:
+; RV32IZFH-NEXT:    mv a1, a2
+; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    addi sp, sp, 16
+; RV32IZFH-NEXT:    ret
+; RV32IZFH-NEXT:  .LBB19_7:
+; RV32IZFH-NEXT:    mv a0, a3
+; RV32IZFH-NEXT:    bnez s0, .LBB19_4
+; RV32IZFH-NEXT:  .LBB19_8:
+; RV32IZFH-NEXT:    li a1, 0
+; RV32IZFH-NEXT:    beqz a4, .LBB19_5
+; RV32IZFH-NEXT:    j .LBB19_6
+;
+; RV64IZFH-LABEL: test_roundeven_ui64:
+; RV64IZFH:       # %bb.0:
+; RV64IZFH-NEXT:    feq.h a0, fa0, fa0
+; RV64IZFH-NEXT:    bnez a0, .LBB19_2
+; RV64IZFH-NEXT:  # %bb.1:
+; RV64IZFH-NEXT:    li a0, 0
+; RV64IZFH-NEXT:    ret
+; RV64IZFH-NEXT:  .LBB19_2:
+; RV64IZFH-NEXT:    fcvt.lu.h a0, fa0, rne
+; RV64IZFH-NEXT:    ret
+  %a = call half @llvm.roundeven.f16(half %x)
+  %b = call i64 @llvm.fptoui.sat.i64.f16(half %a)
+  ret i64 %b
+}
+
+declare half @llvm.floor.f16(half)
+declare half @llvm.ceil.f16(half)
+declare half @llvm.trunc.f16(half)
+declare half @llvm.round.f16(half)
+declare half @llvm.roundeven.f16(half)
+declare i32 @llvm.fptosi.sat.i32.f16(half)
+declare i64 @llvm.fptosi.sat.i64.f16(half)
+declare i32 @llvm.fptoui.sat.i32.f16(half)
+declare i64 @llvm.fptoui.sat.i64.f16(half)

From 63a991d0358970d76700d084f05eb95cd29234c0 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer <arthur.j.odwyer@gmail.com>
Date: Mon, 3 Jan 2022 20:28:00 -0500
Subject: [PATCH 060/946] [libc++] Eliminate the `__function_like` helper.

As prefigured in the comments on D115315.
This gives us one unified style for all niebloids,
and also simplifies the modulemap.

Differential Revision: https://reviews.llvm.org/D116570
---
 libcxx/include/CMakeLists.txt                 |   1 -
 libcxx/include/__function_like.h              |  51 -----
 libcxx/include/__iterator/advance.h           |   7 +-
 libcxx/include/__iterator/next.h              |   8 +-
 libcxx/include/__iterator/prev.h              |   8 +-
 libcxx/include/__memory/ranges_construct_at.h |  26 +--
 .../ranges_uninitialized_algorithms.h         |  78 ++------
 libcxx/include/module.modulemap               |  26 +--
 .../function_like.h.module.verify.cpp         |  15 --
 .../special_function.compile.pass.cpp         |  23 ---
 .../special_function.compile.pass.cpp         |  23 ---
 .../special_function.compile.pass.cpp         |  23 ---
 .../niebloid.compile.pass.cpp                 | 188 ++++++++++++++++++
 libcxx/test/support/is_niebloid.h             |  39 ----
 14 files changed, 230 insertions(+), 286 deletions(-)
 delete mode 100644 libcxx/include/__function_like.h
 delete mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/function_like.h.module.verify.cpp
 delete mode 100644 libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/special_function.compile.pass.cpp
 delete mode 100644 libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/special_function.compile.pass.cpp
 delete mode 100644 libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.prev/special_function.compile.pass.cpp
 create mode 100644 libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp
 delete mode 100644 libcxx/test/support/is_niebloid.h

diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 7d56123a69d1a..b7222540846fa 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -182,7 +182,6 @@ set(files
   __format/formatter_integral.h
   __format/formatter_string.h
   __format/parser_std_format_spec.h
-  __function_like.h
   __functional/binary_function.h
   __functional/binary_negate.h
   __functional/bind.h
diff --git a/libcxx/include/__function_like.h b/libcxx/include/__function_like.h
deleted file mode 100644
index 4075355174d99..0000000000000
--- a/libcxx/include/__function_like.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// -*- C++ -*-
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef _LIBCPP___ITERATOR_FUNCTION_LIKE_H
-#define _LIBCPP___ITERATOR_FUNCTION_LIKE_H
-
-#include <__config>
-
-#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
-#pragma GCC system_header
-#endif
-
-_LIBCPP_BEGIN_NAMESPACE_STD
-
-#if !defined(_LIBCPP_HAS_NO_RANGES)
-
-namespace ranges {
-// Per [range.iter.ops.general] and [algorithms.requirements], functions in namespace std::ranges
-// can't be found by ADL and inhibit ADL when found by unqualified lookup. The easiest way to
-// facilitate this is to use function objects.
-//
-// Since these are still standard library functions, we use `__function_like` to eliminate most of
-// the properties that function objects get by default (e.g. semiregularity, addressability), to
-// limit the surface area of the unintended public interface, so as to curb the effect of Hyrum's
-// law.
-struct __function_like {
-  __function_like() = delete;
-  __function_like(__function_like const&) = delete;
-  __function_like& operator=(__function_like const&) = delete;
-
-  void operator&() const = delete;
-
-  struct __tag { };
-
-protected:
-  constexpr explicit __function_like(__tag) noexcept {}
-  ~__function_like() = default;
-};
-} // namespace ranges
-
-#endif // !defined(_LIBCPP_HAS_NO_RANGES)
-
-_LIBCPP_END_NAMESPACE_STD
-
-#endif // _LIBCPP___ITERATOR_FUNCTION_LIKE_H
diff --git a/libcxx/include/__iterator/advance.h b/libcxx/include/__iterator/advance.h
index ee3fba30dc1e1..831f88f462744 100644
--- a/libcxx/include/__iterator/advance.h
+++ b/libcxx/include/__iterator/advance.h
@@ -12,7 +12,6 @@
 
 #include <__config>
 #include <__debug>
-#include <__function_like.h>
 #include <__iterator/concepts.h>
 #include <__iterator/incrementable_traits.h>
 #include <__iterator/iterator_traits.h>
@@ -72,7 +71,7 @@ void advance(_InputIter& __i, _Distance __orig_n) {
 namespace ranges {
 namespace __advance {
 
-struct __fn final : private __function_like {
+struct __fn {
 private:
   template <class _Tp>
   _LIBCPP_HIDE_FROM_ABI
@@ -99,8 +98,6 @@ struct __fn final : private __function_like {
   }
 
 public:
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
   // Preconditions: If `I` does not model `bidirectional_iterator`, `n` is not negative.
   template <input_or_output_iterator _Ip>
   _LIBCPP_HIDE_FROM_ABI
@@ -191,7 +188,7 @@ struct __fn final : private __function_like {
 } // namespace __advance
 
 inline namespace __cpo {
-  inline constexpr auto advance = __advance::__fn(__function_like::__tag());
+  inline constexpr auto advance = __advance::__fn{};
 } // namespace __cpo
 } // namespace ranges
 
diff --git a/libcxx/include/__iterator/next.h b/libcxx/include/__iterator/next.h
index 12c213a1e4d7c..b9bdd6b27e05a 100644
--- a/libcxx/include/__iterator/next.h
+++ b/libcxx/include/__iterator/next.h
@@ -12,7 +12,6 @@
 
 #include <__config>
 #include <__debug>
-#include <__function_like.h>
 #include <__iterator/advance.h>
 #include <__iterator/concepts.h>
 #include <__iterator/incrementable_traits.h>
@@ -43,10 +42,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 namespace ranges {
 namespace __next {
 
-struct __fn final : private __function_like {
-  _LIBCPP_HIDE_FROM_ABI
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <input_or_output_iterator _Ip>
   _LIBCPP_HIDE_FROM_ABI
   constexpr _Ip operator()(_Ip __x) const {
@@ -79,7 +75,7 @@ struct __fn final : private __function_like {
 } // namespace __next
 
 inline namespace __cpo {
-  inline constexpr auto next = __next::__fn(__function_like::__tag());
+  inline constexpr auto next = __next::__fn{};
 } // namespace __cpo
 } // namespace ranges
 
diff --git a/libcxx/include/__iterator/prev.h b/libcxx/include/__iterator/prev.h
index 84c69f9c13af6..870cbe64eaeee 100644
--- a/libcxx/include/__iterator/prev.h
+++ b/libcxx/include/__iterator/prev.h
@@ -12,7 +12,6 @@
 
 #include <__config>
 #include <__debug>
-#include <__function_like.h>
 #include <__iterator/advance.h>
 #include <__iterator/concepts.h>
 #include <__iterator/incrementable_traits.h>
@@ -42,10 +41,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX14
 namespace ranges {
 namespace __prev {
 
-struct __fn final : private __function_like {
-  _LIBCPP_HIDE_FROM_ABI
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <bidirectional_iterator _Ip>
   _LIBCPP_HIDE_FROM_ABI
   constexpr _Ip operator()(_Ip __x) const {
@@ -71,7 +67,7 @@ struct __fn final : private __function_like {
 } // namespace __prev
 
 inline namespace __cpo {
-  inline constexpr auto prev = __prev::__fn(__function_like::__tag());
+  inline constexpr auto prev = __prev::__fn{};
 } // namespace __cpo
 } // namespace ranges
 
diff --git a/libcxx/include/__memory/ranges_construct_at.h b/libcxx/include/__memory/ranges_construct_at.h
index 9b0edb7c2562d..1a72da7396821 100644
--- a/libcxx/include/__memory/ranges_construct_at.h
+++ b/libcxx/include/__memory/ranges_construct_at.h
@@ -12,7 +12,6 @@
 
 #include <__concepts/destructible.h>
 #include <__config>
-#include <__function_like.h>
 #include <__iterator/incrementable_traits.h>
 #include <__iterator/readable_traits.h>
 #include <__memory/concepts.h>
@@ -37,9 +36,7 @@ namespace ranges {
 
 namespace __construct_at {
 
-struct __fn final : private __function_like {
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template<class _Tp, class... _Args, class = decltype(
     ::new (declval<void*>()) _Tp(declval<_Args>()...)
   )>
@@ -52,16 +49,14 @@ struct __fn final : private __function_like {
 } // namespace __construct_at
 
 inline namespace __cpo {
-  inline constexpr auto construct_at = __construct_at::__fn(__function_like::__tag());
+  inline constexpr auto construct_at = __construct_at::__fn{};
 } // namespace __cpo
 
 // destroy_at
 
 namespace __destroy_at {
 
-struct __fn final : private __function_like {
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <destructible _Tp>
   _LIBCPP_HIDE_FROM_ABI
   constexpr void operator()(_Tp* __location) const noexcept {
@@ -72,16 +67,14 @@ struct __fn final : private __function_like {
 } // namespace __destroy_at
 
 inline namespace __cpo {
-  inline constexpr auto destroy_at = __destroy_at::__fn(__function_like::__tag());
+  inline constexpr auto destroy_at = __destroy_at::__fn{};
 } // namespace __cpo
 
 // destroy
 
 namespace __destroy {
 
-struct __fn final : private __function_like {
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_input_iterator _InputIterator, __nothrow_sentinel_for<_InputIterator> _Sentinel>
     requires destructible<iter_value_t<_InputIterator>>
   _LIBCPP_HIDE_FROM_ABI
@@ -100,16 +93,14 @@ struct __fn final : private __function_like {
 } // namespace __destroy
 
 inline namespace __cpo {
-  inline constexpr auto destroy = __destroy::__fn(__function_like::__tag());
+  inline constexpr auto destroy = __destroy::__fn{};
 } // namespace __cpo
 
 // destroy_n
 
 namespace __destroy_n {
 
-struct __fn final : private __function_like {
-  _LIBCPP_HIDE_FROM_ABI constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_input_iterator _InputIterator>
     requires destructible<iter_value_t<_InputIterator>>
   _LIBCPP_HIDE_FROM_ABI
@@ -121,10 +112,11 @@ struct __fn final : private __function_like {
 } // namespace __destroy_n
 
 inline namespace __cpo {
-  inline constexpr auto destroy_n = __destroy_n::__fn(__function_like::__tag());
+  inline constexpr auto destroy_n = __destroy_n::__fn{};
 } // namespace __cpo
 
 } // namespace ranges
+
 #endif // !defined(_LIBCPP_HAS_NO_RANGES)
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/include/__memory/ranges_uninitialized_algorithms.h b/libcxx/include/__memory/ranges_uninitialized_algorithms.h
index 8cd2748e45c97..6a8f9f070ed70 100644
--- a/libcxx/include/__memory/ranges_uninitialized_algorithms.h
+++ b/libcxx/include/__memory/ranges_uninitialized_algorithms.h
@@ -13,7 +13,6 @@
 #include <__algorithm/in_out_result.h>
 #include <__concepts/constructible.h>
 #include <__config>
-#include <__function_like.h>
 #include <__iterator/concepts.h>
 #include <__iterator/incrementable_traits.h>
 #include <__iterator/iter_move.h>
@@ -40,9 +39,7 @@ namespace ranges {
 
 namespace __uninitialized_default_construct {
 
-struct __fn final : private __function_like {
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_forward_iterator _ForwardIterator,
             __nothrow_sentinel_for<_ForwardIterator> _Sentinel>
     requires default_initializable<iter_value_t<_ForwardIterator>>
@@ -62,16 +59,14 @@ struct __fn final : private __function_like {
 } // namespace __uninitialized_default_construct
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_default_construct = __uninitialized_default_construct::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_default_construct = __uninitialized_default_construct::__fn{};
 } // namespace __cpo
 
 // uninitialized_default_construct_n
 
 namespace __uninitialized_default_construct_n {
 
-struct __fn final : private __function_like {
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_forward_iterator _ForwardIterator>
     requires default_initializable<iter_value_t<_ForwardIterator>>
   _ForwardIterator operator()(_ForwardIterator __first,
@@ -84,18 +79,14 @@ struct __fn final : private __function_like {
 } // namespace __uninitialized_default_construct_n
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_default_construct_n =
-      __uninitialized_default_construct_n::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_default_construct_n = __uninitialized_default_construct_n::__fn{};
 } // namespace __cpo
 
 // uninitialized_value_construct
 
 namespace __uninitialized_value_construct {
 
-struct __fn final : private __function_like {
-
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_forward_iterator _ForwardIterator,
             __nothrow_sentinel_for<_ForwardIterator> _Sentinel>
     requires default_initializable<iter_value_t<_ForwardIterator>>
@@ -110,24 +101,19 @@ struct __fn final : private __function_like {
   borrowed_iterator_t<_ForwardRange> operator()(_ForwardRange&& __range) const {
     return (*this)(ranges::begin(__range), ranges::end(__range));
   }
-
 };
 
 } // namespace __uninitialized_value_construct
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_value_construct =
-      __uninitialized_value_construct::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_value_construct = __uninitialized_value_construct::__fn{};
 } // namespace __cpo
 
 // uninitialized_value_construct_n
 
 namespace __uninitialized_value_construct_n {
 
-struct __fn final : private __function_like {
-
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_forward_iterator _ForwardIterator>
     requires default_initializable<iter_value_t<_ForwardIterator>>
   _ForwardIterator operator()(_ForwardIterator __first,
@@ -135,24 +121,19 @@ struct __fn final : private __function_like {
     using _ValueType = remove_reference_t<iter_reference_t<_ForwardIterator>>;
     return _VSTD::__uninitialized_value_construct_n<_ValueType>(_VSTD::move(__first), __n);
   }
-
 };
 
 } // namespace __uninitialized_value_construct_n
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_value_construct_n =
-      __uninitialized_value_construct_n::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_value_construct_n = __uninitialized_value_construct_n::__fn{};
 } // namespace __cpo
 
 // uninitialized_fill
 
 namespace __uninitialized_fill {
 
-struct __fn final : private __function_like {
-
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_forward_iterator _ForwardIterator,
             __nothrow_sentinel_for<_ForwardIterator> _Sentinel,
             class _Tp>
@@ -167,23 +148,19 @@ struct __fn final : private __function_like {
   borrowed_iterator_t<_ForwardRange> operator()(_ForwardRange&& __range, const _Tp& __x) const {
     return (*this)(ranges::begin(__range), ranges::end(__range), __x);
   }
-
 };
 
 } // namespace __uninitialized_fill
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_fill = __uninitialized_fill::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_fill = __uninitialized_fill::__fn{};
 } // namespace __cpo
 
 // uninitialized_fill_n
 
 namespace __uninitialized_fill_n {
 
-struct __fn final : private __function_like {
-
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <__nothrow_forward_iterator _ForwardIterator, class _Tp>
     requires constructible_from<iter_value_t<_ForwardIterator>, const _Tp&>
   _ForwardIterator operator()(_ForwardIterator __first,
@@ -192,13 +169,12 @@ struct __fn final : private __function_like {
     using _ValueType = remove_reference_t<iter_reference_t<_ForwardIterator>>;
     return _VSTD::__uninitialized_fill_n<_ValueType>(_VSTD::move(__first), __n, __x);
   }
-
 };
 
 } // namespace __uninitialized_fill_n
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_fill_n = __uninitialized_fill_n::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_fill_n = __uninitialized_fill_n::__fn{};
 } // namespace __cpo
 
 // uninitialized_copy
@@ -208,9 +184,7 @@ using uninitialized_copy_result = in_out_result<_InputIterator, _OutputIterator>
 
 namespace __uninitialized_copy {
 
-struct __fn final : private __function_like {
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <input_iterator _InputIterator,
             sentinel_for<_InputIterator> _Sentinel1,
             __nothrow_forward_iterator _OutputIterator,
@@ -237,7 +211,7 @@ struct __fn final : private __function_like {
 } // namespace __uninitialized_copy
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_copy = __uninitialized_copy::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_copy = __uninitialized_copy::__fn{};
 } // namespace __cpo
 
 // uninitialized_copy_n
@@ -247,9 +221,7 @@ using uninitialized_copy_n_result = in_out_result<_InputIterator, _OutputIterato
 
 namespace __uninitialized_copy_n {
 
-struct __fn final : private __function_like {
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <input_iterator _InputIterator,
            __nothrow_forward_iterator _OutputIterator,
            __nothrow_sentinel_for<_OutputIterator> _Sentinel>
@@ -267,7 +239,7 @@ struct __fn final : private __function_like {
 } // namespace __uninitialized_copy_n
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_copy_n = __uninitialized_copy_n::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_copy_n = __uninitialized_copy_n::__fn{};
 } // namespace __cpo
 
 // uninitialized_move
@@ -277,9 +249,7 @@ using uninitialized_move_result = in_out_result<_InputIterator, _OutputIterator>
 
 namespace __uninitialized_move {
 
-struct __fn final : private __function_like {
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <input_iterator _InputIterator,
             sentinel_for<_InputIterator> _Sentinel1,
             __nothrow_forward_iterator _OutputIterator,
@@ -289,7 +259,6 @@ struct __fn final : private __function_like {
   operator()(_InputIterator __ifirst, _Sentinel1 __ilast, _OutputIterator __ofirst, _Sentinel2 __olast) const {
     using _ValueType = remove_reference_t<iter_reference_t<_OutputIterator>>;
     auto __iter_move = [](auto&& __iter) -> decltype(auto) { return ranges::iter_move(__iter); };
-
     auto __result = _VSTD::__uninitialized_move<_ValueType>(_VSTD::move(__ifirst), _VSTD::move(__ilast),
                                                             _VSTD::move(__ofirst), _VSTD::move(__olast), __iter_move);
     return {_VSTD::move(__result.first), _VSTD::move(__result.second)};
@@ -307,7 +276,7 @@ struct __fn final : private __function_like {
 } // namespace __uninitialized_move
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_move = __uninitialized_move::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_move = __uninitialized_move::__fn{};
 } // namespace __cpo
 
 // uninitialized_move_n
@@ -317,9 +286,7 @@ using uninitialized_move_n_result = in_out_result<_InputIterator, _OutputIterato
 
 namespace __uninitialized_move_n {
 
-struct __fn final : private __function_like {
-  constexpr explicit __fn(__tag __x) noexcept : __function_like(__x) {}
-
+struct __fn {
   template <input_iterator _InputIterator,
            __nothrow_forward_iterator _OutputIterator,
            __nothrow_sentinel_for<_OutputIterator> _Sentinel>
@@ -329,9 +296,8 @@ struct __fn final : private __function_like {
              _OutputIterator __ofirst, _Sentinel __olast) const {
     using _ValueType = remove_reference_t<iter_reference_t<_OutputIterator>>;
     auto __iter_move = [](auto&& __iter) -> decltype(auto) { return ranges::iter_move(__iter); };
-
-    auto __result = _VSTD::__uninitialized_move_n<_ValueType>(_VSTD::move(__ifirst), __n, _VSTD::move(__ofirst),
-                                                              _VSTD::move(__olast), __iter_move);
+    auto __result = _VSTD::__uninitialized_move_n<_ValueType>(_VSTD::move(__ifirst), __n,
+                                                              _VSTD::move(__ofirst), _VSTD::move(__olast), __iter_move);
     return {_VSTD::move(__result.first), _VSTD::move(__result.second)};
   }
 };
@@ -339,7 +305,7 @@ struct __fn final : private __function_like {
 } // namespace __uninitialized_move_n
 
 inline namespace __cpo {
-  inline constexpr auto uninitialized_move_n = __uninitialized_move_n::__fn(__function_like::__tag());
+  inline constexpr auto uninitialized_move_n = __uninitialized_move_n::__fn{};
 } // namespace __cpo
 
 } // namespace ranges
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index c17ecc98aa5d9..a927f9d0e6700 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -592,10 +592,7 @@ module std [system] {
 
     module __iterator {
       module access                { private header "__iterator/access.h" }
-      module advance               {
-        private header "__iterator/advance.h"
-        export __function_like
-      }
+      module advance               { private header "__iterator/advance.h" }
       module back_insert_iterator  { private header "__iterator/back_insert_iterator.h" }
       module common_iterator       { private header "__iterator/common_iterator.h" }
       module concepts              { private header "__iterator/concepts.h" }
@@ -616,16 +613,10 @@ module std [system] {
       module iterator              { private header "__iterator/iterator.h" }
       module iterator_traits       { private header "__iterator/iterator_traits.h" }
       module move_iterator         { private header "__iterator/move_iterator.h" }
-      module next                  {
-        private header "__iterator/next.h"
-        export __function_like
-      }
+      module next                  { private header "__iterator/next.h" }
       module ostream_iterator      { private header "__iterator/ostream_iterator.h" }
       module ostreambuf_iterator   { private header "__iterator/ostreambuf_iterator.h" }
-      module prev                  {
-        private header "__iterator/prev.h"
-        export __function_like
-      }
+      module prev                  { private header "__iterator/prev.h" }
       module projected             { private header "__iterator/projected.h" }
       module readable_traits       { private header "__iterator/readable_traits.h" }
       module reverse_access        { private header "__iterator/reverse_access.h" }
@@ -673,14 +664,8 @@ module std [system] {
       module concepts                        { private header "__memory/concepts.h" }
       module construct_at                    { private header "__memory/construct_at.h" }
       module pointer_traits                  { private header "__memory/pointer_traits.h" }
-      module ranges_construct_at {
-        private header "__memory/ranges_construct_at.h"
-        export __function_like
-      }
-      module ranges_uninitialized_algorithms {
-        private header "__memory/ranges_uninitialized_algorithms.h"
-        export __function_like
-      }
+      module ranges_construct_at             { private header "__memory/ranges_construct_at.h" }
+      module ranges_uninitialized_algorithms { private header "__memory/ranges_uninitialized_algorithms.h" }
       module raw_storage_iterator            { private header "__memory/raw_storage_iterator.h" }
       module shared_ptr                      { private header "__memory/shared_ptr.h" }
       module temporary_buffer                { private header "__memory/temporary_buffer.h" }
@@ -988,7 +973,6 @@ module std [system] {
   module __bits              { private header "__bits"              export * }
   module __debug             {         header "__debug"             export * }
   module __errc              { private header "__errc"              export * }
-  module __function_like     { private header "__function_like.h"   export * }
   module __hash_table        {         header "__hash_table"        export * }
   module __locale            { private header "__locale"            export * }
   module __mbstate_t         { private header "__mbstate_t.h"       export * }
diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/function_like.h.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/function_like.h.module.verify.cpp
deleted file mode 100644
index 40622e2e5c98a..0000000000000
--- a/libcxx/test/libcxx/diagnostics/detail.headers/function_like.h.module.verify.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// REQUIRES: modules-build
-
-// WARNING: This test was generated by 'generate_private_header_tests.py'
-// and should not be edited manually.
-
-// expected-error@*:* {{use of private header from outside its module: '__function_like.h'}}
-#include <__function_like.h>
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/special_function.compile.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/special_function.compile.pass.cpp
deleted file mode 100644
index 8ce5a8e9a643d..0000000000000
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/special_function.compile.pass.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: libcpp-no-concepts
-
-// ranges::advance
-
-#include <iterator>
-
-#include "is_niebloid.h"
-#include "test_macros.h"
-
-// Because this is a variable and not a function, it's guaranteed that ADL won't be used. However,
-// implementations are allowed to use a different mechanism to achieve this effect, so this check is
-// libc++-specific.
-LIBCPP_STATIC_ASSERT(std::is_class_v<decltype(std::ranges::advance)>);
-LIBCPP_STATIC_ASSERT(is_niebloid<decltype(std::ranges::advance)>());
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/special_function.compile.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/special_function.compile.pass.cpp
deleted file mode 100644
index 6f5e4b3783f3d..0000000000000
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.next/special_function.compile.pass.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: libcpp-no-concepts
-
-// ranges::next
-
-#include <iterator>
-
-#include "is_niebloid.h"
-#include "test_macros.h"
-
-// Because this is a variable and not a function, it's guaranteed that ADL won't be used. However,
-// implementations are allowed to use a different mechanism to achieve this effect, so this check is
-// libc++-specific.
-LIBCPP_STATIC_ASSERT(std::is_class_v<decltype(std::ranges::next)>);
-LIBCPP_STATIC_ASSERT(is_niebloid<decltype(std::ranges::next)>());
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.prev/special_function.compile.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.prev/special_function.compile.pass.cpp
deleted file mode 100644
index a87464feb25f5..0000000000000
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.prev/special_function.compile.pass.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: libcpp-no-concepts
-
-// ranges::prev
-
-#include <iterator>
-
-#include "is_niebloid.h"
-#include "test_macros.h"
-
-// Because this is a variable and not a function, it's guaranteed that ADL won't be used. However,
-// implementations are allowed to use a different mechanism to achieve this effect, so this check is
-// libc++-specific.
-LIBCPP_STATIC_ASSERT(std::is_class_v<decltype(std::ranges::prev)>);
-LIBCPP_STATIC_ASSERT(is_niebloid<decltype(std::ranges::prev)>());
diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp
new file mode 100644
index 0000000000000..fcca5813dcb31
--- /dev/null
+++ b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp
@@ -0,0 +1,188 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts, libcpp-has-no-incomplete-ranges
+// REQUIRES: stdlib=libc++
+
+// [algorithms.requirements]/2
+// [range.iter.ops.general]/2
+
+#include <algorithm>
+#include <concepts>
+#include <iterator>
+#include <memory>
+#include <random>
+#include <ranges>
+#include <type_traits>
+#include <utility>
+
+// Niebloids, unlike CPOs, are *not* required to be semiregular or even to have
+// a declared type at all; they are specified as "magic" overload sets whose
+// names are not found by argument-dependent lookup and which inhibit
+// argument-dependent lookup if they are found via a `using`-declaration.
+//
+// libc++ implements them using the same function-object technique we use for CPOs;
+// therefore this file should stay in sync with ./cpo.compile.pass.cpp.
+
+template <class CPO, class... Args>
+constexpr bool test(CPO& o, Args&&...) {
+  static_assert(std::is_class_v<CPO>);
+  static_assert(std::is_trivial_v<CPO>);
+
+  auto p = o;
+  using T = decltype(p);
+
+  // The type of a customization point object, ignoring cv-qualifiers, shall model semiregular.
+  static_assert(std::semiregular<T>);
+
+  // The type T of a customization point object, ignoring cv-qualifiers, shall model...
+  static_assert(std::invocable<T&, Args...>);
+  static_assert(std::invocable<const T&, Args...>);
+  static_assert(std::invocable<T, Args...>);
+  static_assert(std::invocable<const T, Args...>);
+
+  return true;
+}
+
+int *p;
+int a[10];
+//auto odd = [](int x) { return x % 2 != 0; };
+//auto triple = [](int x) { return 3*x; };
+//auto plus = [](int x, int y) { return x == y; };
+//std::mt19937 g;
+
+// [algorithm.syn]
+
+//static_assert(test(std::ranges::adjacent_find, a));
+//static_assert(test(std::ranges::all_of, a, odd));
+//static_assert(test(std::ranges::any_of, a, odd));
+//static_assert(test(std::ranges::binary_search, a, 42));
+//static_assert(test(std::ranges::clamp, 42, 42, 42));
+//static_assert(test(std::ranges::copy, a, a));
+//static_assert(test(std::ranges::copy_backward, a, a));
+//static_assert(test(std::ranges::copy_if, a, a, odd));
+//static_assert(test(std::ranges::copy_n, a, 10, a));
+//static_assert(test(std::ranges::count, a, 42));
+//static_assert(test(std::ranges::count_if, a, odd));
+//static_assert(test(std::ranges::ends_with, a, a));
+//static_assert(test(std::ranges::equal, a, a));
+//static_assert(test(std::ranges::equal_range, a, 42));
+//static_assert(test(std::ranges::fill, a, 42));
+//static_assert(test(std::ranges::fill_n, a, 10, 42));
+//static_assert(test(std::ranges::find, a, 42));
+//static_assert(test(std::ranges::find_end, a, a));
+//static_assert(test(std::ranges::find_first_of, a, a));
+//static_assert(test(std::ranges::find_if, a, odd));
+//static_assert(test(std::ranges::find_if_not, a, odd));
+//static_assert(test(std::ranges::for_each, a, odd));
+//static_assert(test(std::ranges::for_each_n, a, 10, odd));
+//static_assert(test(std::ranges::generate, a, 42));
+//static_assert(test(std::ranges::generate_n, a, 10, 42));
+//static_assert(test(std::ranges::includes, a, a));
+//static_assert(test(std::ranges::inplace_merge, a, a+5));
+//static_assert(test(std::ranges::is_heap, a));
+//static_assert(test(std::ranges::is_heap_until, a));
+//static_assert(test(std::ranges::is_partitioned, a, odd));
+//static_assert(test(std::ranges::is_permutation, a, a));
+//static_assert(test(std::ranges::is_sorted, a));
+//static_assert(test(std::ranges::is_sorted_until, a));
+//static_assert(test(std::ranges::lexicographical_compare, a, a));
+//static_assert(test(std::ranges::lower_bound, a, 42));
+//static_assert(test(std::ranges::make_heap, a));
+//static_assert(test(std::ranges::max, a));
+//static_assert(test(std::ranges::max_element, a));
+//static_assert(test(std::ranges::merge, a, a, a));
+//static_assert(test(std::ranges::min, a));
+//static_assert(test(std::ranges::min_element, a));
+//static_assert(test(std::ranges::minmax, a));
+//static_assert(test(std::ranges::minmax_element, a));
+//static_assert(test(std::ranges::mismatch, a, a));
+//static_assert(test(std::ranges::move, a, a));
+//static_assert(test(std::ranges::move_backward, a, a));
+//static_assert(test(std::ranges::next_permutation, a));
+//static_assert(test(std::ranges::none_of, a, odd));
+//static_assert(test(std::ranges::nth_element, a, a+5));
+//static_assert(test(std::ranges::partial_sort, a, a+5));
+//static_assert(test(std::ranges::partial_sort_copy, a, a));
+//static_assert(test(std::ranges::partition, a, odd));
+//static_assert(test(std::ranges::partition_copy, a, a, a, odd));
+//static_assert(test(std::ranges::partition_point, a, odd));
+//static_assert(test(std::ranges::pop_heap, a));
+//static_assert(test(std::ranges::prev_permutation, a));
+//static_assert(test(std::ranges::push_heap, a));
+//static_assert(test(std::ranges::remove, a, 42));
+//static_assert(test(std::ranges::remove_copy, a, a, 42));
+//static_assert(test(std::ranges::remove_copy_if, a, a, odd));
+//static_assert(test(std::ranges::remove_if, a, odd));
+//static_assert(test(std::ranges::replace, a, 42, 43));
+//static_assert(test(std::ranges::replace_copy, a, a, 42, 43));
+//static_assert(test(std::ranges::replace_copy_if, a, a, odd, 43));
+//static_assert(test(std::ranges::replace_if, a, odd, 43));
+//static_assert(test(std::ranges::reverse, a));
+//static_assert(test(std::ranges::reverse_copy, a, a));
+//static_assert(test(std::ranges::rotate, a, a+5));
+//static_assert(test(std::ranges::rotate_copy, a, a+5, a));
+//static_assert(test(std::ranges::sample, a, a, 5));
+//static_assert(test(std::ranges::search, a, a));
+//static_assert(test(std::ranges::search_n, a, 10, 42));
+//static_assert(test(std::ranges::set_difference, a, a, a));
+//static_assert(test(std::ranges::set_intersection, a, a, a));
+//static_assert(test(std::ranges::set_symmetric_difference, a, a, a));
+//static_assert(test(std::ranges::set_union, a, a, a));
+//static_assert(test(std::ranges::shuffle, a, g));
+//static_assert(test(std::ranges::sort, a));
+//static_assert(test(std::ranges::sort_heap, a));
+//static_assert(test(std::ranges::stable_partition, a, odd));
+//static_assert(test(std::ranges::stable_sort, a));
+//static_assert(test(std::ranges::starts_with, a, a));
+//static_assert(test(std::ranges::swap_ranges, a, a));
+//static_assert(test(std::ranges::transform, a, a, triple));
+//static_assert(test(std::ranges::unique, a));
+//static_assert(test(std::ranges::unique_copy, a, a));
+//static_assert(test(std::ranges::upper_bound, a, 42));
+
+// [memory.syn]
+
+static_assert(test(std::ranges::construct_at, a, 42));
+static_assert(test(std::ranges::destroy, a));
+static_assert(test(std::ranges::destroy, a, a+10));
+static_assert(test(std::ranges::destroy_at, a));
+static_assert(test(std::ranges::destroy_n, a, 10));
+static_assert(test(std::ranges::uninitialized_copy, a, a));
+static_assert(test(std::ranges::uninitialized_copy, a, a+10, a, a+10));
+static_assert(test(std::ranges::uninitialized_copy_n, a, 10, a, a+10));
+static_assert(test(std::ranges::uninitialized_default_construct, a));
+static_assert(test(std::ranges::uninitialized_default_construct, a, a+10));
+static_assert(test(std::ranges::uninitialized_default_construct_n, a, 10));
+static_assert(test(std::ranges::uninitialized_fill, a, 42));
+static_assert(test(std::ranges::uninitialized_fill, a, a+10, 42));
+static_assert(test(std::ranges::uninitialized_fill_n, a, 10, 42));
+static_assert(test(std::ranges::uninitialized_move, a, a));
+static_assert(test(std::ranges::uninitialized_move, a, a+10, a, a+10));
+static_assert(test(std::ranges::uninitialized_move_n, a, 10, a, a+10));
+static_assert(test(std::ranges::uninitialized_value_construct, a));
+static_assert(test(std::ranges::uninitialized_value_construct, a, a+10));
+static_assert(test(std::ranges::uninitialized_value_construct_n, a, 10));
+
+// [numeric.ops.overview] currently has no ranges algorithms. See P1813, P2214
+
+// [range.iter.ops]
+
+static_assert(test(std::ranges::advance, p, 5));
+static_assert(test(std::ranges::advance, p, 5, a+10));
+static_assert(test(std::ranges::advance, p, a+10));
+//static_assert(test(std::ranges::distance, a));
+//static_assert(test(std::ranges::distance, a, a+10));
+static_assert(test(std::ranges::next, a));
+static_assert(test(std::ranges::next, a, 5));
+static_assert(test(std::ranges::next, a, 5, a+10));
+static_assert(test(std::ranges::next, a, a+10));
+static_assert(test(std::ranges::prev, a+10));
+static_assert(test(std::ranges::prev, a+10, 5));
+static_assert(test(std::ranges::prev, a+10, 5, a));
diff --git a/libcxx/test/support/is_niebloid.h b/libcxx/test/support/is_niebloid.h
deleted file mode 100644
index 2405d498939d2..0000000000000
--- a/libcxx/test/support/is_niebloid.h
+++ /dev/null
@@ -1,39 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-#ifndef LIBCXX_TEST_SUPPORT_IS_NIEBLOID_H
-#define LIBCXX_TEST_SUPPORT_IS_NIEBLOID_H
-
-#include "test_macros.h"
-
-#if TEST_STD_VER >= 20
-template <class T>
-constexpr bool is_addressable = requires(T t) {
-  &t;
-};
-
-template <class T>
-constexpr bool is_niebloid() {
-  using X = std::remove_cvref_t<T>;
-  static_assert(!is_addressable<X>);
-  static_assert(!is_addressable<X const>);
-
-  static_assert(std::destructible<X> && !std::default_initializable<X>);
-
-  static_assert(!std::move_constructible<X>);
-  static_assert(!std::assignable_from<X&, X>);
-
-  static_assert(!std::copy_constructible<X>);
-  static_assert(!std::assignable_from<X&, X const>);
-  static_assert(!std::assignable_from<X&, X&>);
-  static_assert(!std::assignable_from<X&, X const&>);
-  static_assert(std::is_final_v<X>);
-  return true;
-}
-#endif
-
-#endif // LIBCXX_TEST_SUPPORT_IS_NIEBLOID_H

From ba8eb31bd9542828f6424e15a3014f80f14522c8 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 20 Jan 2022 22:41:31 +0300
Subject: [PATCH 061/946] [InstCombine] Instruction sinking: fix check for
 function terminating block

Checking for specific function terminating opcodes
means we don't handle other non-hardcoded ones :)

This should probably be generalized to something
similar to the `IsBlockFollowedByDeoptOrUnreachable()`.

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D117810
---
 llvm/lib/Transforms/InstCombine/InstructionCombining.cpp   | 5 ++---
 llvm/test/Transforms/InstCombine/sink-into-resume-block.ll | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index d8544bf3211b8..89c5fef18eca8 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3966,12 +3966,11 @@ bool InstCombinerImpl::run() {
       // predecessor, so that we don't have to split the critical edge.
       // Another option where we can sink is a block that ends with a
       // terminator that does not pass control to other block (such as
-      // return or unreachable). In this case:
+      // return or unreachable or resume). In this case:
       //   - I dominates the User (by SSA form);
       //   - the User will be executed at most once.
       // So sinking I down to User is always profitable or neutral.
-      if (UserParent->getUniquePredecessor() == BB ||
-          (isa<ReturnInst>(Term) || isa<UnreachableInst>(Term))) {
+      if (UserParent->getUniquePredecessor() == BB || succ_empty(Term)) {
         assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
         return UserParent;
       }
diff --git a/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll b/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll
index a687977d4b975..cb7de75276cfc 100644
--- a/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll
+++ b/llvm/test/Transforms/InstCombine/sink-into-resume-block.ll
@@ -7,7 +7,6 @@ define void @t0_noop(i32 %arg) personality i8* bitcast (i32 (...)* @__gxx_person
 ; CHECK-LABEL: @t0_noop(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[C:%.*]] = call i1 @cond()
-; CHECK-NEXT:    [[V0:%.*]] = add i32 [[ARG:%.*]], 42
 ; CHECK-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    invoke void @simple_throw()
@@ -17,6 +16,7 @@ define void @t0_noop(i32 %arg) personality i8* bitcast (i32 (...)* @__gxx_person
 ; CHECK:       lpad:
 ; CHECK-NEXT:    [[EH:%.*]] = landingpad { i8*, i32 }
 ; CHECK-NEXT:    cleanup
+; CHECK-NEXT:    [[V0:%.*]] = add i32 [[ARG:%.*]], 42
 ; CHECK-NEXT:    call void @consume(i32 [[V0]])
 ; CHECK-NEXT:    call void @destructor()
 ; CHECK-NEXT:    resume { i8*, i32 } [[EH]]

From 8105e404f186c6f31e08185b37f81e6da904f6d7 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell <nathan@acm.org>
Date: Thu, 20 Jan 2022 07:40:12 -0800
Subject: [PATCH 062/946] [demangler][NFC] Small cleanups and sync

Some precursor work to adding module demangling.

* some mismatched comment and code in the demangler

* a const fn was not marked thusly

* we use std::islower.  A direct range check is smaller code (no function call),
  and we know we're in ASCII-land and later in that same function make the same
  assumption about upper-case contiguity.  Heck, maybe just drop the switch's
  precondition and rely on the optimizer to do its thing?

* the directory is cloned in two places, which had gotten out of sync.

Differential Revision: https://reviews.llvm.org/D117800
---
 libcxxabi/src/demangle/ItaniumDemangle.h     |  9 +++++----
 llvm/include/llvm/Demangle/ItaniumDemangle.h | 13 +++++++------
 llvm/include/llvm/Demangle/StringView.h      |  4 ++--
 llvm/include/llvm/Demangle/Utility.h         |  4 ++--
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h
index 85e1511346c0b..b25139d8a72ba 100644
--- a/libcxxabi/src/demangle/ItaniumDemangle.h
+++ b/libcxxabi/src/demangle/ItaniumDemangle.h
@@ -310,7 +310,7 @@ class Node {
       printRight(OB);
   }
 
-  // Print the "left" side of this Node into OutputString.
+  // Print the "left" side of this Node into OutputBuffer.
   virtual void printLeft(OutputBuffer &) const = 0;
 
   // Print the "right". This distinction is necessary to represent C++ types
@@ -1210,7 +1210,8 @@ class TemplateParamPackDecl final : public Node {
 class ParameterPack final : public Node {
   NodeArray Data;
 
-  // Setup OutputString for a pack expansion unless we're already expanding one.
+  // Setup OutputBuffer for a pack expansion, unless we're already expanding
+  // one.
   void initializePackExpansion(OutputBuffer &OB) const {
     if (OB.CurrentPackMax == std::numeric_limits<unsigned>::max()) {
       OB.CurrentPackMax = static_cast<unsigned>(Data.size());
@@ -2473,7 +2474,7 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
 
   char consume() { return First != Last ? *First++ : '\0'; }
 
-  char look(unsigned Lookahead = 0) {
+  char look(unsigned Lookahead = 0) const {
     if (static_cast<size_t>(Last - First) <= Lookahead)
       return '\0';
     return First[Lookahead];
@@ -5437,7 +5438,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSubstitution() {
   if (!consumeIf('S'))
     return nullptr;
 
-  if (std::islower(look())) {
+  if (look() >= 'a' && look() <= 'z') {
     Node *SpecialSub;
     switch (look()) {
     case 'a':
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 86f5c992b63d1..b25139d8a72ba 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEMANGLE_ITANIUMDEMANGLE_H
-#define LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+#ifndef DEMANGLE_ITANIUMDEMANGLE_H
+#define DEMANGLE_ITANIUMDEMANGLE_H
 
 // FIXME: (possibly) incomplete list of features that clang mangles that this
 // file does not yet support:
@@ -1210,7 +1210,8 @@ class TemplateParamPackDecl final : public Node {
 class ParameterPack final : public Node {
   NodeArray Data;
 
-  // Setup OutputBuffer for a pack expansion unless we're already expanding one.
+  // Setup OutputBuffer for a pack expansion, unless we're already expanding
+  // one.
   void initializePackExpansion(OutputBuffer &OB) const {
     if (OB.CurrentPackMax == std::numeric_limits<unsigned>::max()) {
       OB.CurrentPackMax = static_cast<unsigned>(Data.size());
@@ -2473,7 +2474,7 @@ template <typename Derived, typename Alloc> struct AbstractManglingParser {
 
   char consume() { return First != Last ? *First++ : '\0'; }
 
-  char look(unsigned Lookahead = 0) {
+  char look(unsigned Lookahead = 0) const {
     if (static_cast<size_t>(Last - First) <= Lookahead)
       return '\0';
     return First[Lookahead];
@@ -5437,7 +5438,7 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSubstitution() {
   if (!consumeIf('S'))
     return nullptr;
 
-  if (std::islower(look())) {
+  if (look() >= 'a' && look() <= 'z') {
     Node *SpecialSub;
     switch (look()) {
     case 'a':
@@ -5747,4 +5748,4 @@ struct ManglingParser : AbstractManglingParser<ManglingParser<Alloc>, Alloc> {
 
 DEMANGLE_NAMESPACE_END
 
-#endif // LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+#endif // DEMANGLE_ITANIUMDEMANGLE_H
diff --git a/llvm/include/llvm/Demangle/StringView.h b/llvm/include/llvm/Demangle/StringView.h
index 378e853416376..1e4d3803f06cd 100644
--- a/llvm/include/llvm/Demangle/StringView.h
+++ b/llvm/include/llvm/Demangle/StringView.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEMANGLE_STRINGVIEW_H
-#define LLVM_DEMANGLE_STRINGVIEW_H
+#ifndef DEMANGLE_STRINGVIEW_H
+#define DEMANGLE_STRINGVIEW_H
 
 #include "DemangleConfig.h"
 #include <algorithm>
diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index 4fea9351a4bfc..733d83ad1b6ba 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEMANGLE_UTILITY_H
-#define LLVM_DEMANGLE_UTILITY_H
+#ifndef DEMANGLE_UTILITY_H
+#define DEMANGLE_UTILITY_H
 
 #include "StringView.h"
 #include <cstdint>

From 864b5b49fd3f6639efbc16d9b1c827d0194eeb0d Mon Sep 17 00:00:00 2001
From: Casey Carter <Casey@Carter.net>
Date: Tue, 18 Jan 2022 22:50:15 -0800
Subject: [PATCH 063/946] [libcxx] chrono::month_weekday should not be default
 constructible

It was not in P0355R7, nor has it ever been so in a working draft.

Drive-by:
* tests should test something: fix loop bounds so initial value is not >= final value
* calender type streaming tests are useless - let's remove them
* don't declare printf, especially if you don't intend to use it

Differential Revision: https://reviews.llvm.org/D117638
---
 libcxx/docs/ReleaseNotes.rst                  |  3 +
 libcxx/include/__chrono/calendar.h            |  1 -
 .../comparisons.pass.cpp                      |  2 +-
 .../streaming.pass.cpp                        | 59 -------------------
 .../time.cal.md.members/ok.pass.cpp           |  2 +-
 .../comparisons.pass.cpp                      |  4 +-
 .../time.cal.md.nonmembers/streaming.pass.cpp | 42 -------------
 .../time.cal.mdlast/comparisons.pass.cpp      | 10 ++--
 .../time/time.cal/time.cal.mdlast/ok.pass.cpp |  2 +-
 .../time.cal.mdlast/streaming.pass.cpp        | 35 -----------
 .../comparisons.pass.cpp                      |  6 +-
 .../time.cal.month.nonmembers/minus.pass.cpp  | 15 ++---
 .../time.cal.month.nonmembers/plus.pass.cpp   | 10 ++--
 .../streaming.pass.cpp                        | 54 -----------------
 .../time.cal.mwd.members/month.pass.cpp       |  2 -
 .../time.cal.mwd.members/ok.pass.cpp          |  4 +-
 .../streaming.pass.cpp                        | 37 ------------
 .../time.cal.mwdlast.members/ctor.pass.cpp    |  8 +--
 .../comparisons.pass.cpp                      | 16 ++---
 .../streaming.pass.cpp                        | 38 ------------
 .../month_day_last.pass.cpp                   |  9 +--
 .../time.cal.wdidx.members/ok.pass.cpp        |  4 +-
 .../comparisons.pass.cpp                      |  4 +-
 .../streaming.pass.cpp                        | 37 ------------
 .../comparisons.pass.cpp                      |  4 +-
 .../streaming.pass.cpp                        | 35 -----------
 .../time.cal.weekday.members/ctor.pass.cpp    |  4 +-
 .../iso_encoding.pass.cpp                     |  6 +-
 .../comparisons.pass.cpp                      |  4 +-
 .../minus.pass.cpp                            | 12 ++--
 .../time.cal.weekday.nonmembers/plus.pass.cpp | 10 ++--
 .../streaming.pass.cpp                        | 57 ------------------
 .../comparisons.pass.cpp                      |  4 +-
 .../time.cal.year.nonmembers/minus.pass.cpp   |  2 -
 .../streaming.pass.cpp                        | 56 ------------------
 .../comparisons.pass.cpp                      | 10 ++--
 .../time.cal.ym.nonmembers/minus.pass.cpp     | 12 ++--
 .../time.cal.ym.nonmembers/streaming.pass.cpp | 58 ------------------
 .../ctor.sys_days.pass.cpp                    |  6 +-
 .../time.cal.ymd.members/ok.pass.cpp          |  6 +-
 .../op.local_days.pass.cpp                    |  4 +-
 .../time.cal.ymd.members/op.sys_days.pass.cpp |  6 +-
 .../comparisons.pass.cpp                      | 26 ++++----
 .../streaming.pass.cpp                        | 59 -------------------
 .../time.cal.ymdlast.members/day.pass.cpp     |  4 +-
 .../comparisons.pass.cpp                      | 18 +++---
 .../minus.pass.cpp                            |  4 +-
 .../streaming.pass.cpp                        | 38 ------------
 .../op.local_days.pass.cpp                    |  4 +-
 .../op.sys_days.pass.cpp                      |  4 +-
 .../comparisons.pass.cpp                      | 26 ++++----
 .../streaming.pass.cpp                        | 58 ------------------
 .../comparisons.pass.cpp                      | 26 ++++----
 .../streaming.pass.cpp                        | 39 ------------
 54 files changed, 148 insertions(+), 858 deletions(-)
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/streaming.pass.cpp
 delete mode 100644 libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/streaming.pass.cpp

diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst
index 8f57df3f9af4f..e1be3a6c114c1 100644
--- a/libcxx/docs/ReleaseNotes.rst
+++ b/libcxx/docs/ReleaseNotes.rst
@@ -108,6 +108,9 @@ API Changes
   as either ``for (auto&& c : path)`` or ``for (const auto& c : path)``.
   ``std::reverse_iterator<path::iterator>`` is no longer rejected.
 
+- Removed the nonstandard default constructor from ``std::chrono::month_weekday``.
+  You must now explicitly initialize with a ``chrono::month`` and
+  ``chrono::weekday_indexed`` instead of "meh, whenever".
 
 ABI Changes
 -----------
diff --git a/libcxx/include/__chrono/calendar.h b/libcxx/include/__chrono/calendar.h
index 0854ca60d3dfb..745f7f5cf5290 100644
--- a/libcxx/include/__chrono/calendar.h
+++ b/libcxx/include/__chrono/calendar.h
@@ -540,7 +540,6 @@ class month_weekday {
     chrono::month __m;
     chrono::weekday_indexed __wdi;
 public:
-    month_weekday() = default;
     constexpr month_weekday(const chrono::month& __mval, const chrono::weekday_indexed& __wdival) noexcept
         : __m{__mval}, __wdi{__wdival} {}
     inline constexpr chrono::month                     month() const noexcept { return __m; }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/comparisons.pass.cpp
index 252a1728da9de..fbae12e057883 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/comparisons.pass.cpp
@@ -33,7 +33,7 @@ int main(int, char**)
     static_assert(testComparisons6Values<day>(0U, 0U), "");
     static_assert(testComparisons6Values<day>(0U, 1U), "");
 
-//  Some 'ok' values as well
+    //  Some 'ok' values as well
     static_assert(testComparisons6Values<day>( 5U,  5U), "");
     static_assert(testComparisons6Values<day>( 5U, 10U), "");
 
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/streaming.pass.cpp
deleted file mode 100644
index d53c67c492d92..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.day/time.cal.day.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: libcpp-has-no-localization
-// XFAIL: *
-
-// <chrono>
-// class day;
-
-// template<class charT, class traits>
-//   basic_ostream<charT, traits>&
-//   operator<<(basic_ostream<charT, traits>& os, const day& d);
-//
-//   Effects: Inserts format(fmt, d) where fmt is "%d" widened to charT.
-//                If !d.ok(), appends with " is not a valid day".
-//
-// template<class charT, class traits>
-//   basic_ostream<charT, traits>&
-//   to_stream(basic_ostream<charT, traits>& os, const charT* fmt, const day& d);
-//
-//   Effects: Streams d into os using the format specified by the NTCTS fmt.
-//              fmt encoding follows the rules specified in 25.11.
-//
-// template<class charT, class traits, class Alloc = allocator<charT>>
-//   basic_istream<charT, traits>&
-//   from_stream(basic_istream<charT, traits>& is, const charT* fmt,
-//             day& d, basic_string<charT, traits, Alloc>* abbrev = nullptr,
-//             minutes* offset = nullptr);
-//
-//   Effects: Attempts to parse the input stream is into the day d using the format flags
-//             given in the NTCTS fmt as specified in 25.12.
-//             If the parse fails to decode a valid day, is.setstate(ios_base::failbit)
-//             shall be called and d shall not be modified.
-//             If %Z is used and successfully parsed, that value will be assigned to *abbrev
-//             if abbrev is non-null. If %z (or a modified variant) is used and
-//             successfully parsed, that value will be assigned to *offset if offset is non-null.
-//
-
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-   using day = std::chrono::day;
-   std::cout << day{1};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.members/ok.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.members/ok.pass.cpp
index 7f25b2d52bd2f..2bc7a5aadd16c 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.members/ok.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.members/ok.pass.cpp
@@ -48,7 +48,7 @@ int main(int, char**)
         assert(!(month_day{month{i}, day{32}}.ok()));
     }
 
-//  If the month is not ok, all the days are bad
+    //  If the month is not ok, all the days are bad
     for (unsigned i = 1; i <= 35; ++i)
         assert(!(month_day{month{13}, day{i}}.ok()));
 
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/comparisons.pass.cpp
index 573ec1a9030b2..d64e0e194ceaa 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/comparisons.pass.cpp
@@ -50,7 +50,7 @@ int main(int, char**)
         month_day{std::chrono::February, day{1}},
         false, true), "");
 
-//  same day, different months
+    //  same day, different months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons6(
@@ -58,7 +58,7 @@ int main(int, char**)
                 month_day{month{j}, day{1}},
                 i == j, i < j )));
 
-//  same month, different days
+    //  same month, different days
     for (unsigned i = 1; i < 31; ++i)
         for (unsigned j = 1; j < 31; ++j)
             assert((testComparisons6(
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 84305e4a6d89d..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.md/time.cal.md.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class month_day;
-
-// template<class charT, class traits>
-//     basic_ostream<charT, traits>&
-//     operator<<(basic_ostream<charT, traits>& os, const month_day& md);
-//
-//     Returns: os << md.month() << '/' << md.day().
-//
-// template<class charT, class traits>
-//     basic_ostream<charT, traits>&
-//     to_stream(basic_ostream<charT, traits>& os, const charT* fmt, const month_day& md);
-//
-// Effects: Streams md into os using the format specified by the NTCTS fmt.
-//          fmt encoding follows the rules specified in 25.11.
-
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using month_day = std::chrono::month_day;
-    using month     = std::chrono::month;
-    using day       = std::chrono::day;
-    std::cout << month_day{month{1}, day{1}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/comparisons.pass.cpp
index ebccc9f14ccba..a83b045beb4a3 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/comparisons.pass.cpp
@@ -35,10 +35,10 @@ int main(int, char**)
     static_assert( testComparisons6Values<month_day_last>(month{1}, month{1}), "");
     static_assert( testComparisons6Values<month_day_last>(month{1}, month{2}), "");
 
-//  same day, different months
-    for (unsigned i = 1; i < 12; ++i)
-        for (unsigned j = 1; j < 12; ++j)
-            assert((testComparisons6Values<month_day_last>(month{i}, month{j})));
+    // same day, different months
+    for (unsigned i = 1; i <= 12; ++i)
+        for (unsigned j = 1; j <= 12; ++j)
+            assert(testComparisons6Values<month_day_last>(month{i}, month{j}));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/ok.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/ok.pass.cpp
index b9d6535890e3c..c6bec5d33a592 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/ok.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/ok.pass.cpp
@@ -36,7 +36,7 @@ int main(int, char**)
         assert( mdl.ok());
     }
 
-//  If the month is not ok, all the days are bad
+    //  If the month is not ok, all the days are bad
     for (unsigned i = 13; i <= 50; ++i)
     {
         month_day_last mdl{month{i}};
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/streaming.pass.cpp
deleted file mode 100644
index 5960d0239f881..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mdlast/streaming.pass.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class month_day_last;
-//
-// template<class charT, class traits>
-//     basic_ostream<charT, traits>&
-//     operator<<(basic_ostream<charT, traits>& os, const month_day_last& mdl);
-//
-//     Returns: os << mdl.month() << "/last".
-
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using month_day_last = std::chrono::month_day_last;
-    using month          = std::chrono::month;
-    std::cout << month_day_last{month{1}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/comparisons.pass.cpp
index df709a5cb0432..41f31cd084de2 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/comparisons.pass.cpp
@@ -36,12 +36,12 @@ int main(int, char**)
     static_assert(testComparisons6Values<month>(0U ,0U), "");
     static_assert(testComparisons6Values<month>(0U, 1U), "");
 
-//  Some 'ok' values as well
+    //  Some 'ok' values as well
     static_assert(testComparisons6Values<month>( 5U,  5U), "");
     static_assert(testComparisons6Values<month>( 5U, 10U), "");
 
-    for (unsigned i = 1; i < 10; ++i)
-        for (unsigned j = 10; j < 10; ++j)
+    for (unsigned i = 1; i <= 12; ++i)
+        for (unsigned j = 1; j <= 12; ++j)
             assert(testComparisons6Values<month>(i, j));
 
   return 0;
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/minus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/minus.pass.cpp
index 0f1bf7f0e87a8..0fe2f68e0e8ea 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/minus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/minus.pass.cpp
@@ -31,12 +31,13 @@ constexpr bool testConstexpr()
     {
     M m{5};
     Ms offset{3};
-    if (m - offset != M{2}) return false;
-    if (m - M{2} != offset) return false;
+    assert(m - offset == M{2});
+    assert(m - M{2} == offset);
     }
 
-//  Check the example
-    if (M{1} - M{2} != Ms{11}) return false;
+    //  Check the example
+    assert(M{1} - M{2} == Ms{11});
+
     return true;
 }
 
@@ -51,18 +52,18 @@ int main(int, char**)
     ASSERT_SAME_TYPE(month , decltype(std::declval<month>() - std::declval<months>()));
     ASSERT_SAME_TYPE(months, decltype(std::declval<month>() - std::declval<month> ()));
 
-static_assert(testConstexpr<month, months>(), "");
+    static_assert(testConstexpr<month, months>(), "");
 
     month m{6};
     for (unsigned i = 1; i <= 12; ++i)
     {
         month m1   = m - months{i};
-//      months off = m - month {i};
+        // months off = m - month {i};
         int exp = 6 - i;
         if (exp < 1)
             exp += 12;
         assert(static_cast<unsigned>(m1) == static_cast<unsigned>(exp));
-//          assert(off.count()               == static_cast<unsigned>(exp));
+        // assert(off.count()            == static_cast<unsigned>(exp));
     }
 
   return 0;
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/plus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/plus.pass.cpp
index 538efd32d8633..c40aef055301f 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/plus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/plus.pass.cpp
@@ -36,10 +36,10 @@ constexpr bool testConstexpr()
 {
     M m{1};
     Ms offset{4};
-    if (m + offset != M{5}) return false;
-    if (offset + m != M{5}) return false;
-//  Check the example
-    if (M{2} + Ms{11} != M{1}) return false;
+    assert(m + offset == M{5});
+    assert(offset + m == M{5});
+    //  Check the example
+    assert(M{2} + Ms{11} == M{1});
     return true;
 }
 
@@ -69,5 +69,5 @@ int main(int, char**)
         assert(static_cast<unsigned>(m2) == exp);
     }
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 369fa6f4c2179..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.month/time.cal.month.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class month;
-
-// template<class charT, class traits>
-//   basic_ostream<charT, traits>&
-//   operator<<(basic_ostream<charT, traits>& os, const month& m);
-//
-//   Effects: If m.ok() == true inserts format(os.getloc(), fmt, m) where fmt is "%b" widened to charT.
-//   Otherwise inserts int{m} << " is not a valid month".
-//
-// template<class charT, class traits>
-//   basic_ostream<charT, traits>&
-//   to_stream(basic_ostream<charT, traits>& os, const charT* fmt, const month& m);
-//
-//   Effects: Streams m into os using the format specified by the NTCTS fmt.
-//   fmt encoding follows the rules specified in 25.11.
-//
-// template<class charT, class traits, class Alloc = allocator<charT>>
-//   basic_istream<charT, traits>&
-//   from_stream(basic_istream<charT, traits>& is, const charT* fmt,
-//             month& m, basic_string<charT, traits, Alloc>* abbrev = nullptr,
-//             minutes* offset = nullptr);
-//
-//   Effects: Attempts to parse the input stream is into the month m using the format flags
-//   given in the NTCTS fmt as specified in 25.12. If the parse fails to decode a valid month,
-//   is.setstate(ios_- base::failbit) shall be called and m shall not be modified.
-//   If %Z is used and successfully parsed, that value will be assigned to *abbrev if
-//   abbrev is non-null. If %z (or a modified variant) is used and successfully parsed,
-//   that value will be assigned to *offset if offset is non-null.
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-   using month = std::chrono::month;
-   std::cout << month{1};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/month.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/month.pass.cpp
index daf959e424c69..f6e79f5a90d09 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/month.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/month.pass.cpp
@@ -31,8 +31,6 @@ int main(int, char**)
     ASSERT_NOEXCEPT(                 std::declval<const month_weekday>().month());
     ASSERT_SAME_TYPE(month, decltype(std::declval<const month_weekday>().month()));
 
-    static_assert( month_weekday{}.month() == month{}, "");
-
     for (unsigned i = 1; i <= 50; ++i)
     {
         month_weekday md(month{i}, weekday_indexed{Sunday, 1});
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/ok.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/ok.pass.cpp
index f76797cbae511..a46ffabb9b71d 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/ok.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.members/ok.pass.cpp
@@ -44,9 +44,9 @@ int main(int, char**)
             assert(mwd.ok() == (j >= 1 && j <= 5));
         }
 
-//  If the month is not ok, all the weekday_indexed are bad
+    //  If the month is not ok, all the weekday_indexed are bad
     for (unsigned i = 1; i <= 10; ++i)
         assert(!(month_weekday{month{13}, weekday_indexed{Sunday, i}}.ok()));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 96555e36238d8..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mwd/time.cal.mwd.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class month_weekday;
-
-// template<class charT, class traits>
-//     basic_ostream<charT, traits>&
-//     operator<<(basic_ostream<charT, traits>& os, const month_weekday& mwd);
-//
-//     Returns: os << mwd.month() << '/' << mwd.weekday_indexed().
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using month_weekday   = std::chrono::month_weekday;
-    using month           = std::chrono::month;
-    using weekday_indexed = std::chrono::weekday_indexed;
-    using weekday         = std::chrono::weekday;
-
-    std::cout << month_weekday{month{1}, weekday_indexed{weekday{3}, 3}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.members/ctor.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.members/ctor.pass.cpp
index af9c93d5820bc..69e58d3fc1963 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.members/ctor.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.members/ctor.pass.cpp
@@ -39,23 +39,23 @@ int main(int, char**)
 
     ASSERT_NOEXCEPT(month_weekday_last{January, weekday_last{Tuesday}});
 
-//  bad month
+    //  bad month
     constexpr month_weekday_last mwdl1{month{}, weekday_last{Tuesday}};
     static_assert( mwdl1.month() == month{},                      "");
     static_assert( mwdl1.weekday_last() == weekday_last{Tuesday}, "");
     static_assert(!mwdl1.ok(),                                    "");
 
-//  bad weekday_last
+    //  bad weekday_last
     constexpr month_weekday_last mwdl2{January, weekday_last{weekday{16}}};
     static_assert( mwdl2.month() == January,                          "");
     static_assert( mwdl2.weekday_last() == weekday_last{weekday{16}}, "");
     static_assert(!mwdl2.ok(),                                        "");
 
-//  Good month and weekday_last
+    //  Good month and weekday_last
     constexpr month_weekday_last mwdl3{January, weekday_last{weekday{4}}};
     static_assert( mwdl3.month() == January,                         "");
     static_assert( mwdl3.weekday_last() == weekday_last{weekday{4}}, "");
     static_assert( mwdl3.ok(),                                       "");
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/comparisons.pass.cpp
index 38f85570650ef..91b5e4f63773e 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/comparisons.pass.cpp
@@ -48,7 +48,7 @@ int main(int, char**)
         month_weekday_last{std::chrono::January, weekday_last{Wednesday}},
         false), "");
 
-//  vary the months
+    //  vary the months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons2(
@@ -56,7 +56,7 @@ int main(int, char**)
                 month_weekday_last{month{j}, weekday_last{Tuesday}},
             i == j)));
 
-//  vary the weekday
+    //  vary the weekday
     for (unsigned i = 0; i < 6; ++i)
         for (unsigned j = 0; j < 6; ++j)
             assert((testComparisons2(
@@ -64,11 +64,11 @@ int main(int, char**)
                 month_weekday_last{January, weekday_last{weekday{j}}},
             i == j)));
 
-//  both different
-        assert((testComparisons2(
-            month_weekday_last{month{1}, weekday_last{weekday{1}}},
-            month_weekday_last{month{2}, weekday_last{weekday{2}}},
-        false)));
+    //  both different
+    assert((testComparisons2(
+        month_weekday_last{month{1}, weekday_last{weekday{1}}},
+        month_weekday_last{month{2}, weekday_last{weekday{2}}},
+    false)));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 3e7eb9f38bd2e..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.mwdlast/time.cal.mwdlast.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class month_weekday_last;
-//
-// template<class charT, class traits>
-//     basic_ostream<charT, traits>&
-//     operator<<(basic_ostream<charT, traits>& os, const month_weekday_last& mdl);
-//
-//     Returns: os << mdl.month() << "/last".
-
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using month_weekday_last = std::chrono::month_weekday_last;
-    using month              = std::chrono::month;
-    using weekday            = std::chrono::weekday;
-    using weekday_last       = std::chrono::weekday_last;
-
-    std::cout << month_weekday_last{month{1}, weekday_last{weekday{3}}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.operators/month_day_last.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.operators/month_day_last.pass.cpp
index ddc9cd52464d1..ae9d7e4fcab56 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.operators/month_day_last.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.operators/month_day_last.pass.cpp
@@ -34,11 +34,6 @@
 //     static_assert(mdl.month() == February);
 // --end example]
 
-
-
-
-
-
 #include <chrono>
 #include <type_traits>
 #include <cassert>
@@ -57,7 +52,7 @@ int main(int, char**)
     ASSERT_SAME_TYPE(month_day_last, decltype(last/February));
     ASSERT_SAME_TYPE(month_day_last, decltype(February/last));
 
-//  Run the example
+    //  Run the example
     {
     constexpr auto mdl = February/std::chrono::last;
     static_assert(mdl.month() == February, "");
@@ -104,5 +99,5 @@ int main(int, char**)
     }
 
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.members/ok.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.members/ok.pass.cpp
index a4ed7bf539cea..3c96412bcefd1 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.members/ok.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.members/ok.pass.cpp
@@ -43,8 +43,8 @@ int main(int, char**)
         assert(!wdi.ok());
     }
 
-//  Not a valid weekday
+    //  Not a valid weekday
     assert(!(weekday_indexed(weekday{9U}, 1).ok()));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/comparisons.pass.cpp
index 58520205c45a8..c56c2442c2aed 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/comparisons.pass.cpp
@@ -36,7 +36,7 @@ int main(int, char**)
     static_assert(!(weekday_indexed{} == weekday_indexed{std::chrono::Tuesday, 1}), "");
     static_assert( (weekday_indexed{} != weekday_indexed{std::chrono::Tuesday, 1}), "");
 
-//  Some 'ok' values as well
+    //  Some 'ok' values as well
     static_assert( (weekday_indexed{weekday{1}, 2} == weekday_indexed{weekday{1}, 2}), "");
     static_assert(!(weekday_indexed{weekday{1}, 2} != weekday_indexed{weekday{1}, 2}), "");
 
@@ -45,5 +45,5 @@ int main(int, char**)
     static_assert(!(weekday_indexed{weekday{1}, 2} == weekday_indexed{weekday{2}, 2}),  "");
     static_assert( (weekday_indexed{weekday{1}, 2} != weekday_indexed{weekday{2}, 2}),  "");
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/streaming.pass.cpp
deleted file mode 100644
index be16c8fa883ac..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.wdidx/time.cal.wdidx.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,37 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class weekday_indexed;
-
-// template<class charT, class traits>
-//   basic_ostream<charT, traits>&
-//   operator<<(basic_ostream<charT, traits>& os, const weekday_indexed& wdi);
-//
-//   Effects: os << wdi.weekday() << '[' << wdi.index().
-//     If wdi.index() is in the range [1, 5], appends with ']',
-//       otherwise appends with " is not a valid index]".
-
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using weekday_indexed = std::chrono::weekday_indexed;
-    using weekday         = std::chrono::weekday;
-
-    std::cout << weekday_indexed{weekday{3}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/comparisons.pass.cpp
index 03f4c3af8ecbc..bb02729552a09 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/comparisons.pass.cpp
@@ -32,7 +32,7 @@ int main(int, char**)
     static_assert(testComparisons2Values<weekday_last>(weekday{0}, weekday{0}), "");
     static_assert(testComparisons2Values<weekday_last>(weekday{0}, weekday{1}), "");
 
-//  Some 'ok' values as well
+    //  Some 'ok' values as well
     static_assert(testComparisons2Values<weekday_last>(weekday{2}, weekday{2}), "");
     static_assert(testComparisons2Values<weekday_last>(weekday{2}, weekday{3}), "");
 
@@ -40,5 +40,5 @@ int main(int, char**)
         for (unsigned j = 0; j < 6; ++j)
             assert(testComparisons2Values<weekday_last>(weekday{i}, weekday{j}));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/streaming.pass.cpp
deleted file mode 100644
index a3941a5744bc5..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.wdlast/time.cal.wdlast.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class weekday_last;
-
-//   template<class charT, class traits>
-//     basic_ostream<charT, traits>&
-//     operator<<(basic_ostream<charT, traits>& os, const weekday_last& wdl);
-//
-//   Returns: os << wdl.weekday() << "[last]".
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-   using weekday_last = std::chrono::weekday_last;
-   using weekday      = std::chrono::weekday;
-
-   std::cout << weekday_last{weekday{3}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/ctor.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/ctor.pass.cpp
index 6d7be0a4ead72..d59df23b563a8 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/ctor.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/ctor.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
         assert(m.c_encoding() == (i == 7 ? 0 : i));
     }
 
-// TODO - sys_days and local_days ctor tests
+    // TODO - sys_days and local_days ctor tests
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/iso_encoding.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/iso_encoding.pass.cpp
index afb6a81a9d5ef..0e56a3e0f79fe 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/iso_encoding.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.members/iso_encoding.pass.cpp
@@ -36,13 +36,13 @@ int main(int, char**)
 
     static_assert(testConstexpr<weekday>(), "");
 
-//  This is different than all the other tests, because the '7' gets converted to
-//  a zero in the constructor, but then back to '7' by iso_encoding().
+    //  This is different than all the other tests, because the '7' gets converted to
+    //  a zero in the constructor, but then back to '7' by iso_encoding().
     for (unsigned i = 0; i <= 10; ++i)
     {
         weekday wd(i);
         assert(wd.iso_encoding() == (i == 0 ? 7 : i));
     }
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/comparisons.pass.cpp
index 213f490559c20..759a78b61a5ce 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/comparisons.pass.cpp
@@ -31,7 +31,7 @@ int main(int, char**)
     static_assert(testComparisons2Values<weekday>(0U ,0U), "");
     static_assert(testComparisons2Values<weekday>(0U, 1U), "");
 
-//  Some 'ok' values as well
+    //  Some 'ok' values as well
     static_assert(testComparisons2Values<weekday>(5U, 5U), "");
     static_assert(testComparisons2Values<weekday>(5U, 2U), "");
 
@@ -39,5 +39,5 @@ int main(int, char**)
         for (unsigned j = 0; j < 6; ++j)
             assert(testComparisons2Values<weekday>(i, j));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/minus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/minus.pass.cpp
index 7bac92761e4e8..434662ba6c8a5 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/minus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/minus.pass.cpp
@@ -19,9 +19,6 @@
 // Otherwise the value returned is unspecified.
 // [Example: Sunday - Monday == days{6}. —end example]
 
-
-extern "C" int printf(const char *, ...);
-
 #include <chrono>
 #include <type_traits>
 #include <cassert>
@@ -35,12 +32,13 @@ constexpr bool testConstexpr()
     {
     WD wd{5};
     Ds offset{3};
-    if (wd - offset != WD{2}) return false;
-    if (wd - WD{2} != offset) return false;
+    assert(wd - offset == WD{2});
+    assert(wd - WD{2} == offset);
     }
 
-//  Check the example
-    if (WD{0} - WD{1} != Ds{6}) return false;
+    //  Check the example
+    assert(WD{0} - WD{1} == Ds{6});
+
     return true;
 }
 
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/plus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/plus.pass.cpp
index 7d98b471faa22..1adcee8f8ede2 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/plus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/plus.pass.cpp
@@ -37,10 +37,10 @@ constexpr bool testConstexpr()
 {
     M m{1};
     Ms offset{4};
-    if (m + offset != M{5}) return false;
-    if (offset + m != M{5}) return false;
-//  Check the example
-    if (M{1} + Ms{6} != M{0}) return false;
+    assert(m + offset == M{5});
+    assert(offset + m == M{5});
+    //  Check the example
+    assert(M{1} + Ms{6} == M{0});
     return true;
 }
 
@@ -67,5 +67,5 @@ int main(int, char**)
             assert((wd2.c_encoding() == euclidian_addition<unsigned, 0, 6>(i, j)));
         }
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 300d12f0fae1c..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.weekday/time.cal.weekday.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class weekday;
-
-// template<class charT, class traits>
-//   basic_ostream<charT, traits>&
-//   operator<<(basic_ostream<charT, traits>& os, const weekday& wd);
-//
-//   Effects: If wd.ok() == true inserts format(os.getloc(), fmt, wd) where fmt is "%a" widened to charT.
-//     Otherwise inserts unsigned{wd} << " is not a valid weekday".
-//
-// template<class charT, class traits>
-//   basic_ostream<charT, traits>&
-//   to_stream(basic_ostream<charT, traits>& os, const charT* fmt, const weekday& wd);
-//
-//   Effects: Streams wd into os using the format specified by the NTCTS fmt.
-//   fmt encoding follows the rules specified in 25.11.
-//
-// template<class charT, class traits, class Alloc = allocator<charT>>
-//   basic_istream<charT, traits>&
-//   from_stream(basic_istream<charT, traits>& is, const charT* fmt,
-//             weekday& wd, basic_string<charT, traits, Alloc>* abbrev = nullptr,
-//             minutes* offset = nullptr);
-//
-//   Effects: Attempts to parse the input stream is into the weekday wd using
-//       the format flags given in the NTCTS fmt as specified in 25.12.
-//     If the parse fails to decode a valid weekday, is.setstate(ios_- base::failbit)
-//       shall be called and wd shall not be modified.
-//     If %Z is used and successfully parsed, that value will be assigned
-//       to *abbrev if abbrev is non-null.
-//     If %z (or a modified variant) is used and successfully parsed,
-//       that value will be assigned to *offset if offset is non-null.
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-   using weekday = std::chrono::weekday;
-
-   std::cout << weekday{3};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/comparisons.pass.cpp
index 1e4e2a16babf5..9d92055e64ba7 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/comparisons.pass.cpp
@@ -36,7 +36,7 @@ int main(int, char**)
     static_assert(testComparisons6Values<year>(0,0), "");
     static_assert(testComparisons6Values<year>(0,1), "");
 
-//  Some 'ok' values as well
+    //  Some 'ok' values as well
     static_assert(testComparisons6Values<year>( 5, 5), "");
     static_assert(testComparisons6Values<year>( 5,10), "");
 
@@ -44,5 +44,5 @@ int main(int, char**)
         for (int j = 1; j < 10; ++j)
             assert(testComparisons6Values<year>(i, j));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/minus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/minus.pass.cpp
index 0681152c439ef..b38110772fc51 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/minus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/minus.pass.cpp
@@ -19,8 +19,6 @@
 //   Otherwise the value returned is unspecified.
 //   [Example: January - February == years{11}. —end example]
 
-extern "C" int printf(const char *, ...);
-
 #include <chrono>
 #include <type_traits>
 #include <cassert>
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 9ba502f63ea2a..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.year/time.cal.year.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class year;
-
-// template<class charT, class traits>
-//   basic_ostream<charT, traits>&
-//   operator<<(basic_ostream<charT, traits>& os, const year& y);
-//
-//   Effects: Inserts format(fmt, y) where fmt is "%Y" widened to charT.
-//   If !y.ok(), appends with " is not a valid year".
-//
-// template<class charT, class traits>
-//   basic_ostream<charT, traits>&
-//   to_stream(basic_ostream<charT, traits>& os, const charT* fmt, const year& y);
-//
-//   Effects: Streams y into os using the format specified by the NTCTS fmt.
-//     fmt encoding follows the rules specified in 25.11.
-//
-// template<class charT, class traits, class Alloc = allocator<charT>>
-//   basic_istream<charT, traits>&
-//   from_stream(basic_istream<charT, traits>& is, const charT* fmt,
-//               year& y, basic_string<charT, traits, Alloc>* abbrev = nullptr,
-//               minutes* offset = nullptr);
-//
-//   Effects: Attempts to parse the input stream is into the year y using the format flags
-//     given in the NTCTS fmt as specified in 25.12. If the parse fails to decode a valid year,
-//     is.setstate(ios_base::failbit) shall be called and y shall not be modified. If %Z is used
-//     and successfully parsed, that value will be assigned to *abbrev if abbrev is non-null.
-//     If %z (or a modified variant) is used and successfully parsed, that value will be
-//     assigned to *offset if offset is non-null.
-
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-   using year = std::chrono::year;
-
-   std::cout << year{2018};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/comparisons.pass.cpp
index fce247d42d37f..031b512e4e3d1 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/comparisons.pass.cpp
@@ -50,7 +50,7 @@ int main(int, char**)
         year_month{year{1235}, std::chrono::January},
         false, true), "");
 
-//  same year, different months
+    //  same year, different months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons6(
@@ -58,13 +58,13 @@ int main(int, char**)
                 year_month{year{1234}, month{j}},
                 i == j, i < j )));
 
-//  same month, different years
-    for (int i = 1000; i < 20; ++i)
-        for (int j = 1000; j < 20; ++j)
+    //  same month, different years
+    for (int i = 1000; i < 2000; ++i)
+        for (int j = 1000; j < 2000; ++j)
         assert((testComparisons6(
             year_month{year{i}, std::chrono::January},
             year_month{year{j}, std::chrono::January},
             i == j, i < j )));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/minus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/minus.pass.cpp
index 73cec3b7607f5..3c58ed0e14019 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/minus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/minus.pass.cpp
@@ -39,7 +39,7 @@ int main(int, char**)
     ASSERT_NOEXCEPT(                      std::declval<year_month>() - std::declval<years>());
     ASSERT_SAME_TYPE(year_month, decltype(std::declval<year_month>() - std::declval<years>()));
 
-//  static_assert(testConstexprYears (year_month{year{1}, month{1}}), "");
+    // static_assert(testConstexprYears (year_month{year{1}, month{1}}), "");
 
     year_month ym{year{1234}, std::chrono::January};
     for (int i = 0; i <= 10; ++i)
@@ -54,7 +54,7 @@ int main(int, char**)
     ASSERT_NOEXCEPT(                      std::declval<year_month>() - std::declval<months>());
     ASSERT_SAME_TYPE(year_month, decltype(std::declval<year_month>() - std::declval<months>()));
 
-//  static_assert(testConstexprMonths(year_month{year{1}, month{1}}), "");
+    // static_assert(testConstexprMonths(year_month{year{1}, month{1}}), "");
 
     year_month ym{year{1234}, std::chrono::November};
     for (int i = 0; i <= 10; ++i)  // TODO test wrap-around
@@ -69,9 +69,9 @@ int main(int, char**)
     ASSERT_NOEXCEPT(                  std::declval<year_month>() - std::declval<year_month>());
     ASSERT_SAME_TYPE(months, decltype(std::declval<year_month>() - std::declval<year_month>()));
 
-//  static_assert(testConstexprMonths(year_month{year{1}, month{1}}), "");
+    // static_assert(testConstexprMonths(year_month{year{1}, month{1}}), "");
 
-//  Same year
+    //  Same year
     year y{2345};
     for (int i = 1; i <= 12; ++i)
         for (int j = 1; j <= 12; ++j)
@@ -80,9 +80,9 @@ int main(int, char**)
         assert(diff.count() == i - j);
     }
 
-//  TODO: different year
+    // TODO: different year
 
     }
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/streaming.pass.cpp
deleted file mode 100644
index d2aee69fe0ae0..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ym/time.cal.ym.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class year_month;
-
-// template<class charT, class traits>
-//     basic_ostream<charT, traits>&
-//     operator<<(basic_ostream<charT, traits>& os, const year_month& ym);
-//
-// Returns: os << ym.year() << '/' << ym.month().
-//
-//
-// template<class charT, class traits>
-//     basic_ostream<charT, traits>&
-//     to_stream(basic_ostream<charT, traits>& os, const charT* fmt, const year_month& ym);
-//
-// Effects: Streams ym into os using the format specified by the NTCTS fmt. fmt encoding follows the rules specified in 25.11.
-//
-// template<class charT, class traits, class Alloc = allocator<charT>>
-//     basic_istream<charT, traits>&
-//   from_stream(basic_istream<charT, traits>& is, const charT* fmt,
-//               year_month& ym, basic_string<charT, traits, Alloc>* abbrev = nullptr,
-//               minutes* offset = nullptr);
-//
-// Effects: Attempts to parse the input stream is into the year_month ym using the format
-//         flags given in the NTCTS fmt as specified in 25.12. If the parse fails to decode
-//         a valid year_month, is.setstate(ios_- base::failbit) shall be called and ym shall
-//         not be modified. If %Z is used and successfully parsed, that value will be assigned
-//         to *abbrev if abbrev is non-null. If %z (or a modified variant) is used and
-//         successfully parsed, that value will be assigned to *offset if offset is non-null.
-
-
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using year_month = std::chrono::year_month;
-    using year       = std::chrono::year;
-    using month      = std::chrono::month;
-
-    std::cout << year_month{year{2018}, month{3}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ctor.sys_days.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ctor.sys_days.pass.cpp
index 01311ce66b0ec..b0d963a58a4d9 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ctor.sys_days.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ctor.sys_days.pass.cpp
@@ -59,8 +59,8 @@ int main(int, char**)
     }
 
 
-//  There's one more leap day between 1/1/40 and 1/1/70
-//  when compared to 1/1/70 -> 1/1/2000
+    //  There's one more leap day between 1/1/40 and 1/1/70
+    //  when compared to 1/1/70 -> 1/1/2000
     {
     constexpr sys_days sd{days{-10957}};
     constexpr year_month_day ymd{sd};
@@ -81,5 +81,5 @@ int main(int, char**)
     assert( ymd.day()   == day{29});
     }
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ok.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ok.pass.cpp
index 078cc8591cd7b..8d38a6f024dc8 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ok.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/ok.pass.cpp
@@ -43,7 +43,7 @@ int main(int, char**)
 
     static_assert( year_month_day{year{2019},   January, day{1}}.ok(), ""); // All OK
 
-//  Some months have a 31st
+    // Some months have a 31st
     static_assert( year_month_day{year{2020},   month{ 1}, day{31}}.ok(), "");
     static_assert(!year_month_day{year{2020},   month{ 2}, day{31}}.ok(), "");
     static_assert( year_month_day{year{2020},   month{ 3}, day{31}}.ok(), "");
@@ -57,7 +57,7 @@ int main(int, char**)
     static_assert(!year_month_day{year{2020},   month{11}, day{31}}.ok(), "");
     static_assert( year_month_day{year{2020},   month{12}, day{31}}.ok(), "");
 
-//  Everyone except FEB has a 30th
+    // Everyone except FEB has a 30th
     static_assert( year_month_day{year{2020},   month{ 1}, day{30}}.ok(), "");
     static_assert(!year_month_day{year{2020},   month{ 2}, day{30}}.ok(), "");
     static_assert( year_month_day{year{2020},   month{ 3}, day{30}}.ok(), "");
@@ -93,5 +93,5 @@ int main(int, char**)
         assert( ym.ok() == year{i}.ok());
     }
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.local_days.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.local_days.pass.cpp
index 76292f5da3fc3..54f6d84452f2c 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.local_days.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.local_days.pass.cpp
@@ -72,8 +72,8 @@ int main(int, char**)
     static_assert( year_month_day{sd} == ymd, ""); // and back
     }
 
-//  There's one more leap day between 1/1/40 and 1/1/70
-//  when compared to 1/1/70 -> 1/1/2000
+    // There's one more leap day between 1/1/40 and 1/1/70
+    // when compared to 1/1/70 -> 1/1/2000
     {
     constexpr year_month_day ymd{year{1940}, month{1}, day{2}};
     constexpr local_days sd{ymd};
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.sys_days.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.sys_days.pass.cpp
index bc801924d4c66..195ddfb72d053 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.sys_days.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.members/op.sys_days.pass.cpp
@@ -72,8 +72,8 @@ int main(int, char**)
     static_assert( year_month_day{sd} == ymd, ""); // and back
     }
 
-//  There's one more leap day between 1/1/40 and 1/1/70
-//  when compared to 1/1/70 -> 1/1/2000
+    // There's one more leap day between 1/1/40 and 1/1/70
+    // when compared to 1/1/70 -> 1/1/2000
     {
     constexpr year_month_day ymd{year{1940}, month{1}, day{2}};
     constexpr sys_days sd{ymd};
@@ -90,7 +90,7 @@ int main(int, char**)
     assert( year_month_day{sd} == ymd); // and back
     }
 
-//  These two tests check the wording for LWG 3206
+    // These two tests check the wording for LWG 3206
     {
     constexpr year_month_day ymd{year{1971}, month{1}, day{0}}; // bad day
     static_assert(!ymd.ok(),         "");
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/comparisons.pass.cpp
index 5290759783750..287dc3a33924f 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/comparisons.pass.cpp
@@ -47,51 +47,51 @@ int main(int, char**)
         year_month_day{year{1234}, January, day{1}},
         true, false), "");
 
-//  different day
+    // different day
     static_assert( testComparisons6(
         year_month_day{year{1234}, January, day{1}},
         year_month_day{year{1234}, January, day{2}},
         false, true), "");
 
-//  different month
+    // different month
     static_assert( testComparisons6(
         year_month_day{year{1234}, January, day{1}},
         year_month_day{year{1234}, February, day{1}},
         false, true), "");
 
-//  different year
+    // different year
     static_assert( testComparisons6(
         year_month_day{year{1234}, January, day{1}},
         year_month_day{year{1235}, January, day{1}},
         false, true), "");
 
 
-//  different month and day
+    // different month and day
     static_assert( testComparisons6(
         year_month_day{year{1234}, January, day{2}},
         year_month_day{year{1234}, February, day{1}},
         false, true), "");
 
-//  different year and month
+    // different year and month
     static_assert( testComparisons6(
         year_month_day{year{1234}, February, day{1}},
         year_month_day{year{1235}, January, day{1}},
         false, true), "");
 
-//  different year and day
+    // different year and day
     static_assert( testComparisons6(
         year_month_day{year{1234}, January, day{2}},
         year_month_day{year{1235}, January, day{1}},
         false, true), "");
 
-//  different year, month and day
+    // different year, month and day
     static_assert( testComparisons6(
         year_month_day{year{1234}, February, day{2}},
         year_month_day{year{1235}, January, day{1}},
         false, true), "");
 
 
-//  same year, different days
+    // same year, different days
     for (unsigned i = 1; i < 28; ++i)
         for (unsigned j = 1; j < 28; ++j)
             assert((testComparisons6(
@@ -99,7 +99,7 @@ int main(int, char**)
                 year_month_day{year{1234}, January, day{j}},
                 i == j, i < j )));
 
-//  same year, different months
+    // same year, different months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons6(
@@ -107,13 +107,13 @@ int main(int, char**)
                 year_month_day{year{1234}, month{j}, day{12}},
                 i == j, i < j )));
 
-//  same month, different years
-    for (int i = 1000; i < 20; ++i)
-        for (int j = 1000; j < 20; ++j)
+    // same month, different years
+    for (int i = 1000; i < 2000; ++i)
+        for (int j = 1000; j < 2000; ++j)
         assert((testComparisons6(
             year_month_day{year{i}, January, day{12}},
             year_month_day{year{j}, January, day{12}},
             i == j, i < j )));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 3991efad078af..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymd/time.cal.ymd.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class year_month_day;
-
-// template<class charT, class traits>
-//     basic_ostream<charT, traits>&
-//     operator<<(basic_ostream<charT, traits>& os, const year_month_day& ym);
-//
-// Returns: os << ym.year() << '/' << ym.month().
-//
-//
-// template<class charT, class traits>
-//     basic_ostream<charT, traits>&
-//     to_stream(basic_ostream<charT, traits>& os, const charT* fmt, const year_month_day& ym);
-//
-// Effects: Streams ym into os using the format specified by the NTCTS fmt. fmt encoding follows the rules specified in 25.11.
-//
-// template<class charT, class traits, class Alloc = allocator<charT>>
-//     basic_istream<charT, traits>&
-//   from_stream(basic_istream<charT, traits>& is, const charT* fmt,
-//               year_month_day& ym, basic_string<charT, traits, Alloc>* abbrev = nullptr,
-//               minutes* offset = nullptr);
-//
-// Effects: Attempts to parse the input stream is into the year_month_day ym using the format
-//         flags given in the NTCTS fmt as specified in 25.12. If the parse fails to decode
-//         a valid year_month_day, is.setstate(ios_- base::failbit) shall be called and ym shall
-//         not be modified. If %Z is used and successfully parsed, that value will be assigned
-//         to *abbrev if abbrev is non-null. If %z (or a modified variant) is used and
-//         successfully parsed, that value will be assigned to *offset if offset is non-null.
-
-
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using year_month_day = std::chrono::year_month_day;
-    using year           = std::chrono::year;
-    using month          = std::chrono::month;
-    using day            = std::chrono::day;
-
-    std::cout << year_month_day{year{2018}, month{3}, day{12}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.members/day.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.members/day.pass.cpp
index 0dc3adab81d71..cf3c109fb1351 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.members/day.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.members/day.pass.cpp
@@ -30,7 +30,7 @@ int main(int, char**)
     ASSERT_NOEXCEPT(               std::declval<const year_month_day_last>().day());
     ASSERT_SAME_TYPE(day, decltype(std::declval<const year_month_day_last>().day()));
 
-//  Some months have a 31st
+    // Some months have a 31st
     static_assert( year_month_day_last{year{2020}, month_day_last{month{ 1}}}.day() == day{31}, "");
     static_assert( year_month_day_last{year{2020}, month_day_last{month{ 2}}}.day() == day{29}, "");
     static_assert( year_month_day_last{year{2020}, month_day_last{month{ 3}}}.day() == day{31}, "");
@@ -48,5 +48,5 @@ int main(int, char**)
     assert((year_month_day_last{year{2020}, month_day_last{month{ 2}}}.day() == day{29}));
     assert((year_month_day_last{year{2021}, month_day_last{month{ 2}}}.day() == day{28}));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/comparisons.pass.cpp
index 4e4a59b9f220b..54b470750ec64 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/comparisons.pass.cpp
@@ -44,31 +44,31 @@ int main(int, char**)
         year_month_day_last{year{1234}, month_day_last{January}},
         true, false), "");
 
-//  different month
+    // different month
     static_assert( testComparisons6(
         year_month_day_last{year{1234}, month_day_last{January}},
         year_month_day_last{year{1234}, month_day_last{February}},
         false, true), "");
 
-//  different year
+    // different year
     static_assert( testComparisons6(
         year_month_day_last{year{1234}, month_day_last{January}},
         year_month_day_last{year{1235}, month_day_last{January}},
         false, true), "");
 
-//  different month
+    // different month
     static_assert( testComparisons6(
         year_month_day_last{year{1234}, month_day_last{January}},
         year_month_day_last{year{1234}, month_day_last{February}},
         false, true), "");
 
-//  different year and month
+    // different year and month
     static_assert( testComparisons6(
         year_month_day_last{year{1234}, month_day_last{February}},
         year_month_day_last{year{1235}, month_day_last{January}},
         false, true), "");
 
-//  same year, different months
+    // same year, different months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons6(
@@ -76,13 +76,13 @@ int main(int, char**)
                 year_month_day_last{year{1234}, month_day_last{month{j}}},
                 i == j, i < j )));
 
-//  same month, different years
-    for (int i = 1000; i < 20; ++i)
-        for (int j = 1000; j < 20; ++j)
+    // same month, different years
+    for (int i = 1000; i < 2000; ++i)
+        for (int j = 1000; j < 2000; ++j)
         assert((testComparisons6(
             year_month_day_last{year{i}, month_day_last{January}},
             year_month_day_last{year{j}, month_day_last{January}},
             i == j, i < j )));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/minus.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/minus.pass.cpp
index d8d58b0506023..59dca841e7f78 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/minus.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/minus.pass.cpp
@@ -75,7 +75,7 @@ int main(int, char**)
     ASSERT_SAME_TYPE(year_month_day_last, decltype(std::declval<year_month_day_last>() - std::declval<months>()));
 
     static_assert(testConstexprMonths(year_month_day_last{year{1234}, month_day_last{December}}), "");
-//  TODO test wrapping
+    // TODO test wrapping
     year_month_day_last ym{year{1234}, month_day_last{December}};
     for (unsigned i = 0; i <= 10; ++i)
     {
@@ -86,5 +86,5 @@ int main(int, char**)
     }
 
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 1476fc824bb96..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymdlast/time.cal.ymdlast.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class year_month_day_last;
-
-// template<class charT, class traits>
-//   basic_ostream<charT, traits>&
-//   operator<<(basic_ostream<charT, traits>& os, const year_month_day_last& ymdl);
-//
-// Returns: os << ymdl.year() << '/' << ymdl.month_day_last().
-
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using year_month_day_last = std::chrono::year_month_day_last;
-    using year                = std::chrono::year;
-    using month               = std::chrono::month;
-    using month_day_last      = std::chrono::month_day_last;
-
-    std::cout << year_month_day_last{year{2018}, month_day_last{month{3}}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.local_days.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.local_days.pass.cpp
index ea25715211f01..51d1942d3905b 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.local_days.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.local_days.pass.cpp
@@ -52,8 +52,8 @@ int main(int, char**)
     static_assert( year_month_weekday{sd} == ymwd, ""); // and back
     }
 
-//  There's one more leap day between 1/1/40 and 1/1/70
-//  when compared to 1/1/70 -> 1/1/2000
+    // There's one more leap day between 1/1/40 and 1/1/70
+    // when compared to 1/1/70 -> 1/1/2000
     {
     constexpr year_month_weekday ymwd{year{1940}, month{1},weekday_indexed{std::chrono::Tuesday, 1}};
     constexpr local_days sd{ymwd};
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.sys_days.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.sys_days.pass.cpp
index 52ca0fc061375..9bfa0472f7bce 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.sys_days.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.members/op.sys_days.pass.cpp
@@ -52,8 +52,8 @@ int main(int, char**)
     static_assert( year_month_weekday{sd} == ymwd, ""); // and back
     }
 
-//  There's one more leap day between 1/1/40 and 1/1/70
-//  when compared to 1/1/70 -> 1/1/2000
+    // There's one more leap day between 1/1/40 and 1/1/70
+    // when compared to 1/1/70 -> 1/1/2000
     {
     constexpr year_month_weekday ymwd{year{1940}, month{1},weekday_indexed{std::chrono::Tuesday, 1}};
     constexpr sys_days sd{ymwd};
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/comparisons.pass.cpp
index 438fb48bd9d13..be0a84cae535e 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/comparisons.pass.cpp
@@ -42,51 +42,51 @@ int main(int, char**)
         year_month_weekday{year{1234}, January, weekday_indexed{Tuesday, 1}},
         true), "");
 
-//  different day
+    // different day
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, January, weekday_indexed{Tuesday, 1}},
         year_month_weekday{year{1234}, January, weekday_indexed{Tuesday, 2}},
         false), "");
 
-//  different month
+    // different month
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, January,  weekday_indexed{Tuesday, 1}},
         year_month_weekday{year{1234}, February, weekday_indexed{Tuesday, 1}},
         false), "");
 
-//  different year
+    // different year
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, January, weekday_indexed{Tuesday, 1}},
         year_month_weekday{year{1235}, January, weekday_indexed{Tuesday, 1}},
         false), "");
 
 
-//  different month and day
+    // different month and day
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, January,  weekday_indexed{Tuesday, 1}},
         year_month_weekday{year{1234}, February, weekday_indexed{Tuesday, 2}},
         false), "");
 
-//  different year and month
+    // different year and month
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, February, weekday_indexed{Tuesday, 1}},
         year_month_weekday{year{1235}, January,  weekday_indexed{Tuesday, 1}},
         false), "");
 
-//  different year and day
+    // different year and day
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, January, weekday_indexed{Tuesday, 2}},
         year_month_weekday{year{1235}, January, weekday_indexed{Tuesday, 1}},
         false), "");
 
-//  different year, month and day
+    // different year, month and day
     static_assert( testComparisons2(
         year_month_weekday{year{1234}, February, weekday_indexed{Tuesday, 2}},
         year_month_weekday{year{1235}, January,  weekday_indexed{Tuesday, 1}},
         false), "");
 
 
-//  same year, different days
+    // same year, different days
     for (unsigned i = 1; i < 28; ++i)
         for (unsigned j = 1; j < 28; ++j)
             assert((testComparisons2(
@@ -94,7 +94,7 @@ int main(int, char**)
                 year_month_weekday{year{1234}, January, weekday_indexed{Tuesday, j}},
                 i == j)));
 
-//  same year, different months
+    // same year, different months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons2(
@@ -102,13 +102,13 @@ int main(int, char**)
                 year_month_weekday{year{1234}, month{j}, weekday_indexed{Tuesday, 1}},
                 i == j)));
 
-//  same month, different years
-    for (int i = 1000; i < 20; ++i)
-        for (int j = 1000; j < 20; ++j)
+    // same month, different years
+    for (int i = 1000; i < 2000; ++i)
+        for (int j = 1000; j < 2000; ++j)
         assert((testComparisons2(
             year_month_weekday{year{i}, January, weekday_indexed{Tuesday, 1}},
             year_month_weekday{year{j}, January, weekday_indexed{Tuesday, 1}},
             i == j)));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 21660825d0e0b..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwd/time.cal.ymwd.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class year_month_weekday;
-
-// template<class charT, class traits>
-//     basic_ostream<charT, traits>&
-//     operator<<(basic_ostream<charT, traits>& os, const year_month_weekday& ym);
-//
-// Returns: os << ym.year() << '/' << ym.month().
-//
-//
-// template<class charT, class traits>
-//     basic_ostream<charT, traits>&
-//     to_stream(basic_ostream<charT, traits>& os, const charT* fmt, const year_month_weekday& ym);
-//
-// Effects: Streams ym into os using the format specified by the NTCTS fmt. fmt encoding follows the rules specified in 25.11.
-//
-// template<class charT, class traits, class Alloc = allocator<charT>>
-//     basic_istream<charT, traits>&
-//   from_stream(basic_istream<charT, traits>& is, const charT* fmt,
-//               year_month_weekday& ym, basic_string<charT, traits, Alloc>* abbrev = nullptr,
-//               minutes* offset = nullptr);
-//
-// Effects: Attempts to parse the input stream is into the year_month_weekday ym using the format
-//         flags given in the NTCTS fmt as specified in 25.12. If the parse fails to decode
-//         a valid year_month_weekday, is.setstate(ios_- base::failbit) shall be called and ym shall
-//         not be modified. If %Z is used and successfully parsed, that value will be assigned
-//         to *abbrev if abbrev is non-null. If %z (or a modified variant) is used and
-//         successfully parsed, that value will be assigned to *offset if offset is non-null.
-
-
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using year_month_weekday = std::chrono::year_month_weekday;
-    using year                = std::chrono::year;
-    using month               = std::chrono::month;
-    using weekday             = std::chrono::weekday;
-
-    std::cout << year_month_weekday{year{2018}, month{3}, weekday{4}};
-
-  return 0;
-}
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/comparisons.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/comparisons.pass.cpp
index 2e9dbad81b82f..5c008c76b21d2 100644
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/comparisons.pass.cpp
+++ b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/comparisons.pass.cpp
@@ -43,51 +43,51 @@ int main(int, char**)
         year_month_weekday_last{year{1234}, January, weekday_last{Tuesday}},
         true), "");
 
-//  different day
+    // different day
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, January, weekday_last{Tuesday}},
         year_month_weekday_last{year{1234}, January, weekday_last{Wednesday}},
         false), "");
 
-//  different month
+    // different month
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, January,  weekday_last{Tuesday}},
         year_month_weekday_last{year{1234}, February, weekday_last{Tuesday}},
         false), "");
 
-//  different year
+    // different year
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, January, weekday_last{Tuesday}},
         year_month_weekday_last{year{1235}, January, weekday_last{Tuesday}},
         false), "");
 
 
-//  different month and day
+    // different month and day
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, January,  weekday_last{Tuesday}},
         year_month_weekday_last{year{1234}, February, weekday_last{Wednesday}},
         false), "");
 
-//  different year and month
+    // different year and month
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, February, weekday_last{Tuesday}},
         year_month_weekday_last{year{1235}, January,  weekday_last{Tuesday}},
         false), "");
 
-//  different year and day
+    // different year and day
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, January, weekday_last{Wednesday}},
         year_month_weekday_last{year{1235}, January, weekday_last{Tuesday}},
         false), "");
 
-//  different year, month and day
+    // different year, month and day
     static_assert( testComparisons2(
         year_month_weekday_last{year{1234}, February, weekday_last{Wednesday}},
         year_month_weekday_last{year{1235}, January,  weekday_last{Tuesday}},
         false), "");
 
 
-//  same year, different days
+    // same year, different days
     for (unsigned i = 1; i < 28; ++i)
         for (unsigned j = 1; j < 28; ++j)
             assert((testComparisons2(
@@ -95,7 +95,7 @@ int main(int, char**)
                 year_month_weekday_last{year{1234}, January, weekday_last{weekday{j}}},
                 i == j)));
 
-//  same year, different months
+    // same year, different months
     for (unsigned i = 1; i < 12; ++i)
         for (unsigned j = 1; j < 12; ++j)
             assert((testComparisons2(
@@ -103,13 +103,13 @@ int main(int, char**)
                 year_month_weekday_last{year{1234}, month{j}, weekday_last{Tuesday}},
                 i == j)));
 
-//  same month, different years
-    for (int i = 1000; i < 20; ++i)
-        for (int j = 1000; j < 20; ++j)
+    // same month, different years
+    for (int i = 1000; i < 2000; ++i)
+        for (int j = 1000; j < 2000; ++j)
         assert((testComparisons2(
             year_month_weekday_last{year{i}, January, weekday_last{Tuesday}},
             year_month_weekday_last{year{j}, January, weekday_last{Tuesday}},
             i == j)));
 
-  return 0;
+    return 0;
 }
diff --git a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/streaming.pass.cpp b/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/streaming.pass.cpp
deleted file mode 100644
index 3f792fd1bd81c..0000000000000
--- a/libcxx/test/std/utilities/time/time.cal/time.cal.ymwdlast/time.cal.ymwdlast.nonmembers/streaming.pass.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// XFAIL: *
-
-// <chrono>
-// class year_month_weekday_last;
-
-// template<class charT, class traits>
-//     basic_ostream<charT, traits>&
-//     operator<<(basic_ostream<charT, traits>& os, const year_month_weekday_last& ymwdl);
-//
-//   Returns: os << ymwdl.year() << '/' << ymwdl.month() << '/' << ymwdl.weekday_last().
-
-
-#include <chrono>
-#include <type_traits>
-#include <cassert>
-#include <iostream>
-
-#include "test_macros.h"
-
-int main(int, char**)
-{
-    using year_month_weekday_last = std::chrono::year_month_weekday_last;
-    using year                    = std::chrono::year;
-    using month                   = std::chrono::month;
-    using weekday                 = std::chrono::weekday;
-    using weekday_last            = std::chrono::weekday_last;
-
-    std::cout << year_month_weekday_last{year{2018}, month{3}, weekday_last{weekday{4}}};
-
-  return 0;
-}

From 57ebfea38c03e5cd2d0677eabd2abf761b336097 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 20 Jan 2022 20:36:14 +0100
Subject: [PATCH 064/946] [lldb] Surround LLDB_API-defining code with #ifndef
 LLDB_API

This enables power-users to annotate lldb api functions with arbitrary
attributes. The motivation for this is being able to build liblldb as a
static library on windows (see discussion on D117564).

This should not be interpreted to mean that building liblldb is
supported in any way, but this does not cause any problems for us, and
can help users who really know what they are doing (or have no other
choice).
---
 lldb/include/lldb/API/SBDefines.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lldb/include/lldb/API/SBDefines.h b/lldb/include/lldb/API/SBDefines.h
index d0ee09368d029..ecf1dc34d8c58 100644
--- a/lldb/include/lldb/API/SBDefines.h
+++ b/lldb/include/lldb/API/SBDefines.h
@@ -15,6 +15,7 @@
 #include "lldb/lldb-types.h"
 #include "lldb/lldb-versioning.h"
 
+#ifndef LLDB_API
 #if defined(_WIN32)
 #if defined(LLDB_IN_LIBLLDB)
 #define LLDB_API __declspec(dllexport)
@@ -24,6 +25,7 @@
 #else // defined (_WIN32)
 #define LLDB_API
 #endif
+#endif
 
 // Forward Declarations
 namespace lldb {

From 83d59e05b201760e3f364ff6316301d347cbad95 Mon Sep 17 00:00:00 2001
From: Alexandre Ganea <alexandre.ganea@legionlabs.com>
Date: Thu, 20 Jan 2022 14:53:18 -0500
Subject: [PATCH 065/946] Re-land [LLD] Remove global state in lldCommon

Move all variables at file-scope or function-static-scope into a hosting structure (lld::CommonLinkerContext) that lives at lldMain()-scope. Drivers will inherit from this structure and add their own global state, in the same way as for the existing COFFLinkerContext.

See discussion in https://lists.llvm.org/pipermail/llvm-dev/2021-June/151184.html

The previous land f860fe362282ed69b9d4503a20e5d20b9a041189 caused issues in https://lab.llvm.org/buildbot/#/builders/123/builds/8383, fixed by 22ee510dac9440a74b2e5b3fe3ff13ccdbf55af3.

Differential Revision: https://reviews.llvm.org/D108850
---
 clang/lib/Driver/ToolChains/MSVC.cpp          |  7 ++
 lld/COFF/COFFLinkerContext.h                  |  3 +-
 lld/COFF/Chunks.cpp                           |  3 +-
 lld/COFF/DLL.cpp                              |  4 +-
 lld/COFF/Driver.cpp                           | 70 +++++++------------
 lld/COFF/DriverUtils.cpp                      | 24 +++----
 lld/COFF/InputFiles.cpp                       | 26 +++----
 lld/COFF/LTO.cpp                              |  6 +-
 lld/COFF/MinGW.cpp                            |  7 +-
 lld/COFF/PDB.cpp                              | 13 ++--
 lld/COFF/SymbolTable.cpp                      |  2 +-
 lld/COFF/Writer.cpp                           |  2 +-
 lld/Common/CMakeLists.txt                     |  1 +
 lld/Common/CommonLinkerContext.cpp            | 45 ++++++++++++
 lld/Common/ErrorHandler.cpp                   | 69 +++++++++++-------
 lld/Common/Memory.cpp                         | 19 ++---
 lld/Common/TargetOptionsCommandFlags.cpp      |  3 -
 lld/ELF/AArch64ErrataFix.cpp                  |  8 +--
 lld/ELF/ARMErrataFix.cpp                      |  6 +-
 lld/ELF/Arch/PPC64.cpp                        |  5 +-
 lld/ELF/Driver.cpp                            | 42 ++++-------
 lld/ELF/DriverUtils.cpp                       |  7 +-
 lld/ELF/InputFiles.cpp                        | 28 ++++----
 lld/ELF/InputSection.cpp                      |  7 +-
 lld/ELF/LinkerScript.cpp                      |  6 +-
 lld/ELF/MarkLive.cpp                          |  6 +-
 lld/ELF/ScriptParser.cpp                      |  8 +--
 lld/ELF/SyntheticSections.cpp                 |  7 +-
 lld/ELF/Thunks.cpp                            | 57 +++++++--------
 lld/ELF/Writer.cpp                            |  6 +-
 lld/MachO/ConcatOutputSection.cpp             |  7 +-
 lld/MachO/Driver.cpp                          | 44 +++++-------
 lld/MachO/DriverUtils.cpp                     | 11 ++-
 lld/MachO/InputFiles.cpp                      | 32 +++++----
 lld/MachO/LTO.cpp                             |  4 +-
 lld/MachO/SyntheticSections.cpp               |  7 +-
 lld/MachO/Writer.cpp                          | 13 ++--
 lld/MinGW/Driver.cpp                          | 19 ++---
 lld/include/lld/Common/CommonLinkerContext.h  | 65 +++++++++++++++++
 lld/include/lld/Common/Driver.h               | 21 +++---
 lld/include/lld/Common/ErrorHandler.h         | 32 ++++++---
 lld/include/lld/Common/Memory.h               | 43 ++++++------
 lld/include/lld/Core/LinkingContext.h         |  3 +-
 lld/tools/lld/lld.cpp                         | 54 +++++++++-----
 lld/wasm/Driver.cpp                           | 33 ++++-----
 lld/wasm/InputFiles.cpp                       | 11 ++-
 lld/wasm/SymbolTable.cpp                      |  8 +--
 lld/wasm/Writer.cpp                           | 11 ++-
 .../llvm/DebugInfo/PDB/DIA/DIASupport.h       |  7 ++
 49 files changed, 522 insertions(+), 400 deletions(-)
 create mode 100644 lld/Common/CommonLinkerContext.cpp
 create mode 100644 lld/include/lld/Common/CommonLinkerContext.h

diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp
index 7e8636adc2728..18cef288f018a 100644
--- a/clang/lib/Driver/ToolChains/MSVC.cpp
+++ b/clang/lib/Driver/ToolChains/MSVC.cpp
@@ -47,7 +47,14 @@
 // Make sure this comes before MSVCSetupApi.h
 #include <comdef.h>
 
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+#endif
 #include "MSVCSetupApi.h"
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
 #include "llvm/Support/COM.h"
 _COM_SMARTPTR_TYPEDEF(ISetupConfiguration, __uuidof(ISetupConfiguration));
 _COM_SMARTPTR_TYPEDEF(ISetupConfiguration2, __uuidof(ISetupConfiguration2));
diff --git a/lld/COFF/COFFLinkerContext.h b/lld/COFF/COFFLinkerContext.h
index e5223da86ef83..a3a6f94a94137 100644
--- a/lld/COFF/COFFLinkerContext.h
+++ b/lld/COFF/COFFLinkerContext.h
@@ -15,12 +15,13 @@
 #include "InputFiles.h"
 #include "SymbolTable.h"
 #include "Writer.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Timer.h"
 
 namespace lld {
 namespace coff {
 
-class COFFLinkerContext {
+class COFFLinkerContext : public CommonLinkerContext {
 public:
   COFFLinkerContext();
   COFFLinkerContext(const COFFLinkerContext &) = delete;
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 54cb8c99071de..6cabb22d98cf2 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -12,7 +12,6 @@
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "Writer.h"
-#include "lld/Common/ErrorHandler.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/Object/COFF.h"
@@ -430,7 +429,7 @@ void SectionChunk::sortRelocations() {
     return;
   warn("some relocations in " + file->getName() + " are not sorted");
   MutableArrayRef<coff_relocation> newRelocs(
-      bAlloc.Allocate<coff_relocation>(relocsSize), relocsSize);
+      bAlloc().Allocate<coff_relocation>(relocsSize), relocsSize);
   memcpy(newRelocs.data(), relocsData, relocsSize * sizeof(coff_relocation));
   llvm::sort(newRelocs, cmpByVa);
   setRelocs(newRelocs);
diff --git a/lld/COFF/DLL.cpp b/lld/COFF/DLL.cpp
index 6fec9df5617db..bfa2a6910e2b7 100644
--- a/lld/COFF/DLL.cpp
+++ b/lld/COFF/DLL.cpp
@@ -659,14 +659,14 @@ void DelayLoadContents::create(COFFLinkerContext &ctx, Defined *h) {
         // Add a syntentic symbol for this load thunk, using the "__imp_load"
         // prefix, in case this thunk needs to be added to the list of valid
         // call targets for Control Flow Guard.
-        StringRef symName = saver.save("__imp_load_" + extName);
+        StringRef symName = saver().save("__imp_load_" + extName);
         s->loadThunkSym =
             cast<DefinedSynthetic>(ctx.symtab.addSynthetic(symName, t));
       }
     }
     thunks.push_back(tm);
     StringRef tmName =
-        saver.save("__tailMerge_" + syms[0]->getDLLName().lower());
+        saver().save("__tailMerge_" + syms[0]->getDLLName().lower());
     ctx.symtab.addSynthetic(tmName, tm);
     // Terminate with null values.
     addresses.push_back(make<NullChunk>(8));
diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp
index 3076871fd98b5..1546291e16c6b 100644
--- a/lld/COFF/Driver.cpp
+++ b/lld/COFF/Driver.cpp
@@ -19,9 +19,7 @@
 #include "Writer.h"
 #include "lld/Common/Args.h"
 #include "lld/Common/Driver.h"
-#include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Filesystem.h"
-#include "lld/Common/Memory.h"
 #include "lld/Common/Timer.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/Optional.h"
@@ -63,36 +61,22 @@ namespace coff {
 std::unique_ptr<Configuration> config;
 std::unique_ptr<LinkerDriver> driver;
 
-bool link(ArrayRef<const char *> args, bool canExitEarly, raw_ostream &stdoutOS,
-          raw_ostream &stderrOS) {
-  lld::stdoutOS = &stdoutOS;
-  lld::stderrOS = &stderrOS;
+bool link(ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput) {
+  // This driver-specific context will be freed later by lldMain().
+  auto *ctx = new COFFLinkerContext;
 
-  errorHandler().cleanupCallback = []() {
-    freeArena();
-  };
-
-  errorHandler().logName = args::getFilenameWithoutExe(args[0]);
-  errorHandler().errorLimitExceededMsg =
-      "too many errors emitted, stopping now"
-      " (use /errorlimit:0 to see all errors)";
-  errorHandler().exitEarly = canExitEarly;
-  stderrOS.enable_colors(stderrOS.has_colors());
+  ctx->e.initialize(stdoutOS, stderrOS, exitEarly, disableOutput);
+  ctx->e.logName = args::getFilenameWithoutExe(args[0]);
+  ctx->e.errorLimitExceededMsg = "too many errors emitted, stopping now"
+                                 " (use /errorlimit:0 to see all errors)";
 
-  COFFLinkerContext ctx;
   config = std::make_unique<Configuration>();
-  driver = std::make_unique<LinkerDriver>(ctx);
+  driver = std::make_unique<LinkerDriver>(*ctx);
 
   driver->linkerMain(args);
 
-  // Call exit() if we can to avoid calling destructors.
-  if (canExitEarly)
-    exitLld(errorCount() ? 1 : 0);
-
-  bool ret = errorCount() == 0;
-  if (!canExitEarly)
-    errorHandler().reset();
-  return ret;
+  return errorCount() == 0;
 }
 
 // Parse options of the form "old;new".
@@ -162,7 +146,7 @@ static std::future<MBErrPair> createFutureForFile(std::string path) {
 static StringRef mangle(StringRef sym) {
   assert(config->machine != IMAGE_FILE_MACHINE_UNKNOWN);
   if (config->machine == I386)
-    return saver.save("_" + sym);
+    return saver().save("_" + sym);
   return sym;
 }
 
@@ -358,9 +342,9 @@ void LinkerDriver::parseDirectives(InputFile *file) {
     Export exp = parseExport(e);
     if (config->machine == I386 && config->mingw) {
       if (!isDecorated(exp.name))
-        exp.name = saver.save("_" + exp.name);
+        exp.name = saver().save("_" + exp.name);
       if (!exp.extName.empty() && !isDecorated(exp.extName))
-        exp.extName = saver.save("_" + exp.extName);
+        exp.extName = saver().save("_" + exp.extName);
     }
     exp.directives = true;
     config->exports.push_back(exp);
@@ -442,11 +426,11 @@ StringRef LinkerDriver::doFindFile(StringRef filename) {
     SmallString<128> path = dir;
     sys::path::append(path, filename);
     if (sys::fs::exists(path.str()))
-      return saver.save(path.str());
+      return saver().save(path.str());
     if (!hasExt) {
       path.append(".obj");
       if (sys::fs::exists(path.str()))
-        return saver.save(path.str());
+        return saver().save(path.str());
     }
   }
   return filename;
@@ -483,7 +467,7 @@ StringRef LinkerDriver::doFindLibMinGW(StringRef filename) {
 
   SmallString<128> s = filename;
   sys::path::replace_extension(s, ".a");
-  StringRef libName = saver.save("lib" + s.str());
+  StringRef libName = saver().save("lib" + s.str());
   return doFindFile(libName);
 }
 
@@ -492,7 +476,7 @@ StringRef LinkerDriver::doFindLib(StringRef filename) {
   // Add ".lib" to Filename if that has no file extension.
   bool hasExt = filename.contains('.');
   if (!hasExt)
-    filename = saver.save(filename + ".lib");
+    filename = saver().save(filename + ".lib");
   StringRef ret = doFindFile(filename);
   // For MinGW, if the find above didn't turn up anything, try
   // looking for a MinGW formatted library name.
@@ -525,7 +509,7 @@ void LinkerDriver::addLibSearchPaths() {
   Optional<std::string> envOpt = Process::GetEnv("LIB");
   if (!envOpt.hasValue())
     return;
-  StringRef env = saver.save(*envOpt);
+  StringRef env = saver().save(*envOpt);
   while (!env.empty()) {
     StringRef path;
     std::tie(path, env) = env.split(';');
@@ -873,8 +857,8 @@ static void parseModuleDefs(StringRef path) {
   driver->takeBuffer(std::move(mb));
 
   if (config->outputFile.empty())
-    config->outputFile = std::string(saver.save(m.OutputFile));
-  config->importName = std::string(saver.save(m.ImportName));
+    config->outputFile = std::string(saver().save(m.OutputFile));
+  config->importName = std::string(saver().save(m.ImportName));
   if (m.ImageBase)
     config->imageBase = m.ImageBase;
   if (m.StackReserve)
@@ -902,14 +886,14 @@ static void parseModuleDefs(StringRef path) {
     // DLL instead. This is supported by both MS and GNU linkers.
     if (!e1.ExtName.empty() && e1.ExtName != e1.Name &&
         StringRef(e1.Name).contains('.')) {
-      e2.name = saver.save(e1.ExtName);
-      e2.forwardTo = saver.save(e1.Name);
+      e2.name = saver().save(e1.ExtName);
+      e2.forwardTo = saver().save(e1.Name);
       config->exports.push_back(e2);
       continue;
     }
-    e2.name = saver.save(e1.Name);
-    e2.extName = saver.save(e1.ExtName);
-    e2.aliasTarget = saver.save(e1.AliasTarget);
+    e2.name = saver().save(e1.Name);
+    e2.extName = saver().save(e1.ExtName);
+    e2.aliasTarget = saver().save(e1.AliasTarget);
     e2.ordinal = e1.Ordinal;
     e2.noname = e1.Noname;
     e2.data = e1.Data;
@@ -1906,9 +1890,9 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
     Export e = parseExport(arg->getValue());
     if (config->machine == I386) {
       if (!isDecorated(e.name))
-        e.name = saver.save("_" + e.name);
+        e.name = saver().save("_" + e.name);
       if (!e.extName.empty() && !isDecorated(e.extName))
-        e.extName = saver.save("_" + e.extName);
+        e.extName = saver().save("_" + e.extName);
     }
     config->exports.push_back(e);
   }
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index 0921c8e27f5ae..ac0f1f972c798 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -48,17 +48,17 @@ const uint16_t RT_MANIFEST = 24;
 
 class Executor {
 public:
-  explicit Executor(StringRef s) : prog(saver.save(s)) {}
-  void add(StringRef s) { args.push_back(saver.save(s)); }
-  void add(std::string &s) { args.push_back(saver.save(s)); }
-  void add(Twine s) { args.push_back(saver.save(s)); }
-  void add(const char *s) { args.push_back(saver.save(s)); }
+  explicit Executor(StringRef s) : prog(saver().save(s)) {}
+  void add(StringRef s) { args.push_back(saver().save(s)); }
+  void add(std::string &s) { args.push_back(saver().save(s)); }
+  void add(Twine s) { args.push_back(saver().save(s)); }
+  void add(const char *s) { args.push_back(saver().save(s)); }
 
   void run() {
     ErrorOr<std::string> exeOrErr = sys::findProgramByName(prog);
     if (auto ec = exeOrErr.getError())
       fatal("unable to find " + prog + " in PATH: " + ec.message());
-    StringRef exe = saver.save(*exeOrErr);
+    StringRef exe = saver().save(*exeOrErr);
     args.insert(args.begin(), exe);
 
     if (sys::ExecuteAndWait(args[0], args) != 0)
@@ -636,14 +636,14 @@ static StringRef killAt(StringRef sym, bool prefix) {
   sym = sym.substr(0, sym.find('@', 1));
   if (!sym.startswith("@")) {
     if (prefix && !sym.startswith("_"))
-      return saver.save("_" + sym);
+      return saver().save("_" + sym);
     return sym;
   }
   // For fastcall, remove the leading @ and replace it with an
   // underscore, if prefixes are used.
   sym = sym.substr(1);
   if (prefix)
-    sym = saver.save("_" + sym);
+    sym = saver().save("_" + sym);
   return sym;
 }
 
@@ -854,7 +854,7 @@ opt::InputArgList ArgParser::parse(ArrayRef<const char *> argv) {
                                               argv.data() + argv.size());
   if (!args.hasArg(OPT_lldignoreenv))
     addLINK(expandedArgv);
-  cl::ExpandResponseFiles(saver, getQuotingStyle(args), expandedArgv);
+  cl::ExpandResponseFiles(saver(), getQuotingStyle(args), expandedArgv);
   args = optTable.ParseArgs(makeArrayRef(expandedArgv).drop_front(),
                             missingIndex, missingCount);
 
@@ -901,7 +901,7 @@ ParsedDirectives ArgParser::parseDirectives(StringRef s) {
   // Handle /EXPORT and /INCLUDE in a fast path. These directives can appear for
   // potentially every symbol in the object, so they must be handled quickly.
   SmallVector<StringRef, 16> tokens;
-  cl::TokenizeWindowsCommandLineNoCopy(s, saver, tokens);
+  cl::TokenizeWindowsCommandLineNoCopy(s, saver(), tokens);
   for (StringRef tok : tokens) {
     if (tok.startswith_insensitive("/export:") ||
         tok.startswith_insensitive("-export:"))
@@ -914,7 +914,7 @@ ParsedDirectives ArgParser::parseDirectives(StringRef s) {
       // already copied quoted arguments for us, so those do not need to be
       // copied again.
       bool HasNul = tok.end() != s.end() && tok.data()[tok.size()] == '\0';
-      rest.push_back(HasNul ? tok.data() : saver.save(tok).data());
+      rest.push_back(HasNul ? tok.data() : saver().save(tok).data());
     }
   }
 
@@ -948,7 +948,7 @@ void ArgParser::addLINK(SmallVector<const char *, 256> &argv) {
 
 std::vector<const char *> ArgParser::tokenize(StringRef s) {
   SmallVector<const char *, 16> tokens;
-  cl::TokenizeWindowsCommandLine(s, saver, tokens);
+  cl::TokenizeWindowsCommandLine(s, saver(), tokens);
   return std::vector<const char *>(tokens.begin(), tokens.end());
 }
 
diff --git a/lld/COFF/InputFiles.cpp b/lld/COFF/InputFiles.cpp
index 1d79d97acad26..0f3f5e0ffe7c5 100644
--- a/lld/COFF/InputFiles.cpp
+++ b/lld/COFF/InputFiles.cpp
@@ -15,8 +15,6 @@
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "lld/Common/DWARF.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
 #include "llvm-c/lto.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Triple.h"
@@ -905,7 +903,7 @@ ObjFile::getVariableLocation(StringRef var) {
   Optional<std::pair<std::string, unsigned>> ret = dwarf->getVariableLoc(var);
   if (!ret)
     return None;
-  return std::make_pair(saver.save(ret->first), ret->second);
+  return std::make_pair(saver().save(ret->first), ret->second);
 }
 
 // Used only for DWARF debug info, which is not common (except in MinGW
@@ -940,8 +938,8 @@ void ImportFile::parse() {
     fatal("broken import library");
 
   // Read names and create an __imp_ symbol.
-  StringRef name = saver.save(StringRef(buf + sizeof(*hdr)));
-  StringRef impName = saver.save("__imp_" + name);
+  StringRef name = saver().save(StringRef(buf + sizeof(*hdr)));
+  StringRef impName = saver().save("__imp_" + name);
   const char *nameStart = buf + sizeof(coff_import_header) + name.size() + 1;
   dllName = std::string(StringRef(nameStart));
   StringRef extName;
@@ -995,11 +993,12 @@ BitcodeFile::BitcodeFile(COFFLinkerContext &ctx, MemoryBufferRef mb,
   // into consideration at LTO time (which very likely causes undefined
   // symbols later in the link stage). So we append file offset to make
   // filename unique.
-  MemoryBufferRef mbref(
-      mb.getBuffer(),
-      saver.save(archiveName.empty() ? path
-                                     : archiveName + sys::path::filename(path) +
-                                           utostr(offsetInArchive)));
+  MemoryBufferRef mbref(mb.getBuffer(),
+                        saver().save(archiveName.empty()
+                                         ? path
+                                         : archiveName +
+                                               sys::path::filename(path) +
+                                               utostr(offsetInArchive)));
 
   obj = check(lto::InputFile::create(mbref));
 }
@@ -1035,6 +1034,7 @@ FakeSectionChunk ltoDataSectionChunk(&ltoDataSection.section);
 } // namespace
 
 void BitcodeFile::parse() {
+  llvm::StringSaver &saver = lld::saver();
   std::vector<std::pair<Symbol *, bool>> comdat(obj->getComdatTable().size());
   for (size_t i = 0; i != obj->getComdatTable().size(); ++i)
     // FIXME: Check nodeduplicate
@@ -1156,11 +1156,11 @@ void DLLFile::parse() {
     s->nameType = ImportNameType::IMPORT_NAME;
 
     if (coffObj->getMachine() == I386) {
-      s->symbolName = symbolName = saver.save("_" + symbolName);
+      s->symbolName = symbolName = saver().save("_" + symbolName);
       s->nameType = ImportNameType::IMPORT_NAME_NOPREFIX;
     }
 
-    StringRef impName = saver.save("__imp_" + symbolName);
+    StringRef impName = saver().save("__imp_" + symbolName);
     ctx.symtab.addLazyDLLSymbol(this, s, impName);
     if (code)
       ctx.symtab.addLazyDLLSymbol(this, s, symbolName);
@@ -1179,7 +1179,7 @@ void DLLFile::makeImport(DLLFile::Symbol *s) {
 
   size_t impSize = s->dllName.size() + s->symbolName.size() + 2; // +2 for NULs
   size_t size = sizeof(coff_import_header) + impSize;
-  char *buf = bAlloc.Allocate<char>(size);
+  char *buf = bAlloc().Allocate<char>(size);
   memset(buf, 0, size);
   char *p = buf;
   auto *imp = reinterpret_cast<coff_import_header *>(p);
diff --git a/lld/COFF/LTO.cpp b/lld/COFF/LTO.cpp
index f117b62192c84..2dbe7b146402e 100644
--- a/lld/COFF/LTO.cpp
+++ b/lld/COFF/LTO.cpp
@@ -11,7 +11,7 @@
 #include "InputFiles.h"
 #include "Symbols.h"
 #include "lld/Common/Args.h"
-#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "lld/Common/TargetOptionsCommandFlags.h"
 #include "llvm/ADT/STLExtras.h"
@@ -209,8 +209,8 @@ std::vector<InputFile *> BitcodeCompiler::compile(COFFLinkerContext &ctx) {
     // - foo.exe.lto.1.obj
     // - ...
     StringRef ltoObjName =
-        saver.save(Twine(config->outputFile) + ".lto" +
-                   (i == 0 ? Twine("") : Twine('.') + Twine(i)) + ".obj");
+        saver().save(Twine(config->outputFile) + ".lto" +
+                     (i == 0 ? Twine("") : Twine('.') + Twine(i)) + ".obj");
 
     // Get the native object contents either from the cache or from memory.  Do
     // not use the cached MemoryBuffer directly, or the PDB will not be
diff --git a/lld/COFF/MinGW.cpp b/lld/COFF/MinGW.cpp
index 148ebe5eea66d..7a3a3853572f3 100644
--- a/lld/COFF/MinGW.cpp
+++ b/lld/COFF/MinGW.cpp
@@ -11,7 +11,6 @@
 #include "Driver.h"
 #include "InputFiles.h"
 #include "SymbolTable.h"
-#include "lld/Common/ErrorHandler.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/Object/COFF.h"
@@ -184,8 +183,8 @@ void lld::coff::writeDefFile(StringRef name) {
 static StringRef mangle(Twine sym) {
   assert(config->machine != IMAGE_FILE_MACHINE_UNKNOWN);
   if (config->machine == I386)
-    return saver.save("_" + sym);
-  return saver.save(sym);
+    return saver().save("_" + sym);
+  return saver().save(sym);
 }
 
 // Handles -wrap option.
@@ -249,7 +248,7 @@ void lld::coff::wrapSymbols(COFFLinkerContext &ctx,
       // referenced it or not, though.)
       if (imp) {
         DefinedLocalImport *wrapimp = make<DefinedLocalImport>(
-            saver.save("__imp_" + w.wrap->getName()), d);
+            saver().save("__imp_" + w.wrap->getName()), d);
         ctx.symtab.localImportChunks.push_back(wrapimp->getChunk());
         map[imp] = wrapimp;
       }
diff --git a/lld/COFF/PDB.cpp b/lld/COFF/PDB.cpp
index a4cef1d0df3b2..dea84eca5b121 100644
--- a/lld/COFF/PDB.cpp
+++ b/lld/COFF/PDB.cpp
@@ -16,7 +16,6 @@
 #include "Symbols.h"
 #include "TypeMerger.h"
 #include "Writer.h"
-#include "lld/Common/ErrorHandler.h"
 #include "lld/Common/Timer.h"
 #include "llvm/DebugInfo/CodeView/DebugFrameDataSubsection.h"
 #include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
@@ -75,7 +74,7 @@ class PDBLinker {
 
 public:
   PDBLinker(COFFLinkerContext &ctx)
-      : builder(bAlloc), tMerger(ctx, bAlloc), ctx(ctx) {
+      : builder(bAlloc()), tMerger(ctx, bAlloc()), ctx(ctx) {
     // This isn't strictly necessary, but link.exe usually puts an empty string
     // as the first "valid" string in the string table, so we do the same in
     // order to maintain as much byte-for-byte compatibility as possible.
@@ -501,7 +500,7 @@ static void addGlobalSymbol(pdb::GSIStreamBuilder &builder, uint16_t modIndex,
   case SymbolKind::S_LPROCREF: {
     // sym is a temporary object, so we have to copy and reallocate the record
     // to stabilize it.
-    uint8_t *mem = bAlloc.Allocate<uint8_t>(sym.length());
+    uint8_t *mem = bAlloc().Allocate<uint8_t>(sym.length());
     memcpy(mem, sym.data().data(), sym.length());
     builder.addGlobalSymbol(CVSymbol(makeArrayRef(mem, sym.length())));
     break;
@@ -1003,7 +1002,7 @@ static void warnUnusable(InputFile *f, Error e) {
 
 // Allocate memory for a .debug$S / .debug$F section and relocate it.
 static ArrayRef<uint8_t> relocateDebugChunk(SectionChunk &debugChunk) {
-  uint8_t *buffer = bAlloc.Allocate<uint8_t>(debugChunk.getSize());
+  uint8_t *buffer = bAlloc().Allocate<uint8_t>(debugChunk.getSize());
   assert(debugChunk.getOutputSectionIdx() == 0 &&
          "debug sections should not be in output sections");
   debugChunk.writeTo(buffer);
@@ -1417,6 +1416,7 @@ static void addCommonLinkerModuleSymbols(StringRef path,
   ebs.Fields.push_back(path);
   ebs.Fields.push_back("cmd");
   ebs.Fields.push_back(argStr);
+  llvm::BumpPtrAllocator &bAlloc = lld::bAlloc();
   mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
       ons, bAlloc, CodeViewContainer::Pdb));
   mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
@@ -1448,7 +1448,7 @@ static void addLinkerModuleCoffGroup(PartialSection *sec,
     cgs.Characteristics |= llvm::COFF::IMAGE_SCN_MEM_WRITE;
 
   mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
-      cgs, bAlloc, CodeViewContainer::Pdb));
+      cgs, bAlloc(), CodeViewContainer::Pdb));
 }
 
 static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &mod,
@@ -1461,7 +1461,7 @@ static void addLinkerModuleSectionSymbol(pdb::DbiModuleDescriptorBuilder &mod,
   sym.Rva = os.getRVA();
   sym.SectionNumber = os.sectionIndex;
   mod.addSymbol(codeview::SymbolSerializer::writeOneSymbol(
-      sym, bAlloc, CodeViewContainer::Pdb));
+      sym, bAlloc(), CodeViewContainer::Pdb));
 
   // Skip COFF groups in MinGW because it adds a significant footprint to the
   // PDB, due to each function being in its own section
@@ -1536,6 +1536,7 @@ void PDBLinker::addImportFilesToPDB() {
     ts.Segment = thunkOS->sectionIndex;
     ts.Offset = thunkChunk->getRVA() - thunkOS->getRVA();
 
+    llvm::BumpPtrAllocator &bAlloc = lld::bAlloc();
     mod->addSymbol(codeview::SymbolSerializer::writeOneSymbol(
         ons, bAlloc, CodeViewContainer::Pdb));
     mod->addSymbol(codeview::SymbolSerializer::writeOneSymbol(
diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp
index 9ceac7af7f914..db2db9c9272eb 100644
--- a/lld/COFF/SymbolTable.cpp
+++ b/lld/COFF/SymbolTable.cpp
@@ -134,7 +134,7 @@ getFileLineDwarf(const SectionChunk *c, uint32_t addr) {
   const DILineInfo &lineInfo = *optionalLineInfo;
   if (lineInfo.FileName == DILineInfo::BadString)
     return None;
-  return std::make_pair(saver.save(lineInfo.FileName), lineInfo.Line);
+  return std::make_pair(saver().save(lineInfo.FileName), lineInfo.Line);
 }
 
 static Optional<std::pair<StringRef, uint32_t>>
diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 4a41c541ee7f9..12db942f1db55 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -485,7 +485,7 @@ static bool createThunks(OutputSection *os, int margin) {
     MutableArrayRef<coff_relocation> newRelocs;
     if (originalRelocs.data() == curRelocs.data()) {
       newRelocs = makeMutableArrayRef(
-          bAlloc.Allocate<coff_relocation>(originalRelocs.size()),
+          bAlloc().Allocate<coff_relocation>(originalRelocs.size()),
           originalRelocs.size());
     } else {
       newRelocs = makeMutableArrayRef(
diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt
index 9fdc67be79012..1ae7da1f5f7f0 100644
--- a/lld/Common/CMakeLists.txt
+++ b/lld/Common/CMakeLists.txt
@@ -28,6 +28,7 @@ set_source_files_properties("${version_inc}"
 
 add_lld_library(lldCommon
   Args.cpp
+  CommonLinkerContext.cpp
   DWARF.cpp
   ErrorHandler.cpp
   Filesystem.cpp
diff --git a/lld/Common/CommonLinkerContext.cpp b/lld/Common/CommonLinkerContext.cpp
new file mode 100644
index 0000000000000..50ccbb37c7966
--- /dev/null
+++ b/lld/Common/CommonLinkerContext.cpp
@@ -0,0 +1,45 @@
+//===- CommonLinkerContext.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lld/Common/CommonLinkerContext.h"
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+
+using namespace llvm;
+using namespace lld;
+
+// Reference to the current LLD instance. This is a temporary situation, until
+// we pass this context everywhere by reference, or we make it a thread_local,
+// as in https://reviews.llvm.org/D108850?id=370678 where each thread can be
+// associated with a LLD instance. Only then will LLD be free of global
+// state.
+static CommonLinkerContext *lctx;
+
+CommonLinkerContext::CommonLinkerContext() { lctx = this; }
+
+CommonLinkerContext::~CommonLinkerContext() {
+  assert(lctx);
+  // Explicitly call the destructors since we created the objects with placement
+  // new in SpecificAlloc::create().
+  for (auto &it : instances)
+    it.second->~SpecificAllocBase();
+  lctx = nullptr;
+}
+
+CommonLinkerContext &lld::commonContext() {
+  assert(lctx);
+  return *lctx;
+}
+
+bool lld::hasContext() { return lctx != nullptr; }
+
+void CommonLinkerContext::destroy() {
+  if (lctx == nullptr)
+    return;
+  delete lctx;
+}
diff --git a/lld/Common/ErrorHandler.cpp b/lld/Common/ErrorHandler.cpp
index 399b6cac75470..15b3bd058ee9b 100644
--- a/lld/Common/ErrorHandler.cpp
+++ b/lld/Common/ErrorHandler.cpp
@@ -10,6 +10,7 @@
 
 #include "llvm/Support/Parallel.h"
 
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
@@ -18,51 +19,69 @@
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
-#include <mutex>
 #include <regex>
 
 using namespace llvm;
 using namespace lld;
 
-// The functions defined in this file can be called from multiple threads,
-// but lld::outs() or lld::errs() are not thread-safe. We protect them using a
-// mutex.
-static std::mutex mu;
-
-// We want to separate multi-line messages with a newline. `sep` is "\n"
-// if the last messages was multi-line. Otherwise "".
-static StringRef sep;
-
 static StringRef getSeparator(const Twine &msg) {
   if (StringRef(msg.str()).contains('\n'))
     return "\n";
   return "";
 }
 
-raw_ostream *lld::stdoutOS;
-raw_ostream *lld::stderrOS;
+ErrorHandler::~ErrorHandler() {
+  if (cleanupCallback)
+    cleanupCallback();
+}
+
+void ErrorHandler::initialize(llvm::raw_ostream &stdoutOS,
+                              llvm::raw_ostream &stderrOS, bool exitEarly,
+                              bool disableOutput) {
+  this->stdoutOS = &stdoutOS;
+  this->stderrOS = &stderrOS;
+  stderrOS.enable_colors(stderrOS.has_colors());
+  this->exitEarly = exitEarly;
+  this->disableOutput = disableOutput;
+}
 
-ErrorHandler &lld::errorHandler() {
-  static ErrorHandler handler;
-  return handler;
+void ErrorHandler::flushStreams() {
+  std::lock_guard<std::mutex> lock(mu);
+  outs().flush();
+  errs().flush();
 }
 
+ErrorHandler &lld::errorHandler() { return context().e; }
+
 raw_ostream &lld::outs() {
-  if (errorHandler().disableOutput)
+  ErrorHandler &e = errorHandler();
+  return e.outs();
+}
+
+raw_ostream &lld::errs() {
+  ErrorHandler &e = errorHandler();
+  return e.errs();
+}
+
+raw_ostream &ErrorHandler::outs() {
+  if (disableOutput)
     return llvm::nulls();
   return stdoutOS ? *stdoutOS : llvm::outs();
 }
 
-raw_ostream &lld::errs() {
-  if (errorHandler().disableOutput)
+raw_ostream &ErrorHandler::errs() {
+  if (disableOutput)
     return llvm::nulls();
   return stderrOS ? *stderrOS : llvm::errs();
 }
 
 void lld::exitLld(int val) {
-  // Delete any temporary file, while keeping the memory mapping open.
-  if (errorHandler().outputBuffer)
-    errorHandler().outputBuffer->discard();
+  if (hasContext()) {
+    ErrorHandler &e = errorHandler();
+    // Delete any temporary file, while keeping the memory mapping open.
+    if (e.outputBuffer)
+      e.outputBuffer->discard();
+  }
 
   // Re-throw a possible signal or exception once/if it was catched by
   // safeLldMain().
@@ -75,11 +94,9 @@ void lld::exitLld(int val) {
   if (!CrashRecoveryContext::GetCurrent())
     llvm_shutdown();
 
-  {
-    std::lock_guard<std::mutex> lock(mu);
-    lld::outs().flush();
-    lld::errs().flush();
-  }
+  if (hasContext())
+    lld::errorHandler().flushStreams();
+
   // When running inside safeLldMain(), restore the control flow back to the
   // CrashRecoveryContext. Otherwise simply use _exit(), meanning no cleanup,
   // since we want to avoid further crashes on shutdown.
diff --git a/lld/Common/Memory.cpp b/lld/Common/Memory.cpp
index c53e1d3e6cfc7..7c90ff1d799c8 100644
--- a/lld/Common/Memory.cpp
+++ b/lld/Common/Memory.cpp
@@ -7,16 +7,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 
 using namespace llvm;
 using namespace lld;
 
-BumpPtrAllocator lld::bAlloc;
-StringSaver lld::saver{bAlloc};
-std::vector<SpecificAllocBase *> lld::SpecificAllocBase::instances;
-
-void lld::freeArena() {
-  for (SpecificAllocBase *alloc : SpecificAllocBase::instances)
-    alloc->reset();
-  bAlloc.Reset();
+SpecificAllocBase *
+lld::SpecificAllocBase::getOrCreate(void *tag, size_t size, size_t align,
+                                    SpecificAllocBase *(&creator)(void *)) {
+  auto &instances = context().instances;
+  auto &instance = instances[tag];
+  if (instance == nullptr) {
+    void *storage = context().bAlloc.Allocate(size, align);
+    instance = creator(storage);
+  }
+  return instance;
 }
diff --git a/lld/Common/TargetOptionsCommandFlags.cpp b/lld/Common/TargetOptionsCommandFlags.cpp
index d39477ed89adc..b7749c4a20325 100644
--- a/lld/Common/TargetOptionsCommandFlags.cpp
+++ b/lld/Common/TargetOptionsCommandFlags.cpp
@@ -7,12 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "lld/Common/TargetOptionsCommandFlags.h"
-
 #include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/Target/TargetOptions.h"
 
-static llvm::codegen::RegisterCodeGenFlags CGF;
-
 llvm::TargetOptions lld::initTargetOptionsFromCodeGenFlags() {
   return llvm::codegen::InitTargetOptionsFromCodeGenFlags(llvm::Triple());
 }
diff --git a/lld/ELF/AArch64ErrataFix.cpp b/lld/ELF/AArch64ErrataFix.cpp
index a1e276ea9c77e..d45edf9bd8ff4 100644
--- a/lld/ELF/AArch64ErrataFix.cpp
+++ b/lld/ELF/AArch64ErrataFix.cpp
@@ -33,7 +33,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/raw_ostream.h"
@@ -398,9 +398,9 @@ Patch843419Section::Patch843419Section(InputSection *p, uint64_t off)
       patchee(p), patcheeOffset(off) {
   this->parent = p->getParent();
   patchSym = addSyntheticLocal(
-      saver.save("__CortexA53843419_" + utohexstr(getLDSTAddr())), STT_FUNC, 0,
-      getSize(), *this);
-  addSyntheticLocal(saver.save("$x"), STT_NOTYPE, 0, 0, *this);
+      saver().save("__CortexA53843419_" + utohexstr(getLDSTAddr())), STT_FUNC,
+      0, getSize(), *this);
+  addSyntheticLocal(saver().save("$x"), STT_NOTYPE, 0, 0, *this);
 }
 
 uint64_t Patch843419Section::getLDSTAddr() const {
diff --git a/lld/ELF/ARMErrataFix.cpp b/lld/ELF/ARMErrataFix.cpp
index cfaa3109afe88..25b47b90cef81 100644
--- a/lld/ELF/ARMErrataFix.cpp
+++ b/lld/ELF/ARMErrataFix.cpp
@@ -22,7 +22,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/raw_ostream.h"
@@ -142,9 +142,9 @@ Patch657417Section::Patch657417Section(InputSection *p, uint64_t off,
       patchee(p), patcheeOffset(off), instr(instr), isARM(isARM) {
   parent = p->getParent();
   patchSym = addSyntheticLocal(
-      saver.save("__CortexA8657417_" + utohexstr(getBranchAddr())), STT_FUNC,
+      saver().save("__CortexA8657417_" + utohexstr(getBranchAddr())), STT_FUNC,
       isARM ? 0 : 1, getSize(), *this);
-  addSyntheticLocal(saver.save(isARM ? "$a" : "$t"), STT_NOTYPE, 0, 0, *this);
+  addSyntheticLocal(saver().save(isARM ? "$a" : "$t"), STT_NOTYPE, 0, 0, *this);
 }
 
 uint64_t Patch657417Section::getBranchAddr() const {
diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp
index 69a9118ca30ea..d9e4fc97ea0be 100644
--- a/lld/ELF/Arch/PPC64.cpp
+++ b/lld/ELF/Arch/PPC64.cpp
@@ -11,8 +11,7 @@
 #include "SyntheticSections.h"
 #include "Target.h"
 #include "Thunks.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/Support/Endian.h"
 
 using namespace llvm;
@@ -197,7 +196,7 @@ static bool addOptional(StringRef name, uint64_t value,
   Symbol *sym = symtab->find(name);
   if (!sym || sym->isDefined())
     return false;
-  sym->resolve(Defined{/*file=*/nullptr, saver.save(name), STB_GLOBAL,
+  sym->resolve(Defined{/*file=*/nullptr, saver().save(name), STB_GLOBAL,
                        STV_HIDDEN, STT_FUNC, value,
                        /*size=*/0, /*section=*/nullptr});
   defined.push_back(cast<Defined>(sym));
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index abec642bed1b5..de26afddd28b8 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -77,14 +77,14 @@ std::unique_ptr<LinkerDriver> elf::driver;
 static void setConfigs(opt::InputArgList &args);
 static void readConfigs(opt::InputArgList &args);
 
-bool elf::link(ArrayRef<const char *> args, bool canExitEarly,
-               raw_ostream &stdoutOS, raw_ostream &stderrOS) {
-  lld::stdoutOS = &stdoutOS;
-  lld::stderrOS = &stderrOS;
-
-  errorHandler().cleanupCallback = []() {
-    freeArena();
-
+bool elf::link(ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
+               llvm::raw_ostream &stderrOS, bool exitEarly,
+               bool disableOutput) {
+  // This driver-specific context will be freed later by lldMain().
+  auto *ctx = new CommonLinkerContext;
+
+  ctx->e.initialize(stdoutOS, stderrOS, exitEarly, disableOutput);
+  ctx->e.cleanupCallback = []() {
     inputSections.clear();
     outputSections.clear();
     memoryBuffers.clear();
@@ -106,13 +106,9 @@ bool elf::link(ArrayRef<const char *> args, bool canExitEarly,
 
     SharedFile::vernauxNum = 0;
   };
-
-  errorHandler().logName = args::getFilenameWithoutExe(args[0]);
-  errorHandler().errorLimitExceededMsg =
-      "too many errors emitted, stopping now (use "
-      "-error-limit=0 to see all errors)";
-  errorHandler().exitEarly = canExitEarly;
-  stderrOS.enable_colors(stderrOS.has_colors());
+  ctx->e.logName = args::getFilenameWithoutExe(args[0]);
+  ctx->e.errorLimitExceededMsg = "too many errors emitted, stopping now (use "
+                                 "-error-limit=0 to see all errors)";
 
   config = std::make_unique<Configuration>();
   driver = std::make_unique<LinkerDriver>();
@@ -126,15 +122,7 @@ bool elf::link(ArrayRef<const char *> args, bool canExitEarly,
 
   driver->linkerMain(args);
 
-  // Exit immediately if we don't need to return to the caller.
-  // This saves time because the overhead of calling destructors
-  // for all globally-allocated objects is not negligible.
-  int hasError = errorCount() ? 1 : 0;
-  if (canExitEarly)
-    exitLld(hasError);
-  else
-    errorHandler().reset();
-  return !hasError;
+  return errorCount() == 0;
 }
 
 // Parses a linker -m option.
@@ -1258,7 +1246,7 @@ static void readConfigs(opt::InputArgList &args) {
 
   // Parse LTO options.
   if (auto *arg = args.getLastArg(OPT_plugin_opt_mcpu_eq))
-    parseClangOption(saver.save("-mcpu=" + StringRef(arg->getValue())),
+    parseClangOption(saver().save("-mcpu=" + StringRef(arg->getValue())),
                      arg->getSpelling());
 
   for (opt::Arg *arg : args.filtered(OPT_plugin_opt_eq_minus))
@@ -2071,9 +2059,9 @@ static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &args) {
     if (!sym)
       continue;
 
-    Symbol *real = addUnusedUndefined(saver.save("__real_" + name));
+    Symbol *real = addUnusedUndefined(saver().save("__real_" + name));
     Symbol *wrap =
-        addUnusedUndefined(saver.save("__wrap_" + name), sym->binding);
+        addUnusedUndefined(saver().save("__wrap_" + name), sym->binding);
     v.push_back({sym, real, wrap});
 
     // We want to tell LTO not to inline symbols to be overwritten
diff --git a/lld/ELF/DriverUtils.cpp b/lld/ELF/DriverUtils.cpp
index 54d2d0ae6fb9f..ac29b47abcc96 100644
--- a/lld/ELF/DriverUtils.cpp
+++ b/lld/ELF/DriverUtils.cpp
@@ -13,8 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Driver.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Reproduce.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/Optional.h"
@@ -102,7 +101,7 @@ static void concatLTOPluginOptions(SmallVectorImpl<const char *> &args) {
   for (size_t i = 0, e = args.size(); i != e; ++i) {
     StringRef s = args[i];
     if ((s == "-plugin-opt" || s == "--plugin-opt") && i + 1 != e) {
-      v.push_back(saver.save(s + "=" + args[i + 1]).data());
+      v.push_back(saver().save(s + "=" + args[i + 1]).data());
       ++i;
     } else {
       v.push_back(args[i]);
@@ -125,7 +124,7 @@ opt::InputArgList ELFOptTable::parse(ArrayRef<const char *> argv) {
 
   // Expand response files (arguments in the form of @<filename>)
   // and then parse the argument again.
-  cl::ExpandResponseFiles(saver, getQuotingStyle(args), vec);
+  cl::ExpandResponseFiles(saver(), getQuotingStyle(args), vec);
   concatLTOPluginOptions(vec);
   args = this->ParseArgs(vec, missingIndex, missingCount);
 
diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index ca16a64e50836..4da371c619f40 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -13,9 +13,8 @@
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/DWARF.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/IR/LLVMContext.h"
@@ -111,7 +110,7 @@ Optional<MemoryBufferRef> elf::readFile(StringRef path) {
   // The --chroot option changes our virtual root directory.
   // This is useful when you are dealing with files created by --reproduce.
   if (!config->chroot.empty() && path.startswith("/"))
-    path = saver.save(config->chroot + path);
+    path = saver().save(config->chroot + path);
 
   log(path);
   config->dependencyFiles.insert(llvm::CachedHashString(path));
@@ -1518,8 +1517,8 @@ template <class ELFT> void SharedFile::parse() {
         }
         StringRef verName = stringTable.data() + verneeds[idx];
         versionedNameBuffer.clear();
-        name =
-            saver.save((name + "@" + verName).toStringRef(versionedNameBuffer));
+        name = saver().save(
+            (name + "@" + verName).toStringRef(versionedNameBuffer));
       }
       Symbol *s = symtab.addSymbol(
           Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()});
@@ -1561,7 +1560,7 @@ template <class ELFT> void SharedFile::parse() {
         reinterpret_cast<const Elf_Verdef *>(verdefs[idx])->getAux()->vda_name;
     versionedNameBuffer.clear();
     name = (name + "@" + verName).toStringRef(versionedNameBuffer);
-    symtab.addSymbol(SharedSymbol{*this, saver.save(name), sym.getBinding(),
+    symtab.addSymbol(SharedSymbol{*this, saver().save(name), sym.getBinding(),
                                   sym.st_other, sym.getType(), sym.st_value,
                                   sym.st_size, alignment, idx});
   }
@@ -1644,11 +1643,10 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
   // into consideration at LTO time (which very likely causes undefined
   // symbols later in the link stage). So we append file offset to make
   // filename unique.
-  StringRef name =
-      archiveName.empty()
-          ? saver.save(path)
-          : saver.save(archiveName + "(" + path::filename(path) + " at " +
-                       utostr(offsetInArchive) + ")");
+  StringRef name = archiveName.empty()
+                       ? saver().save(path)
+                       : saver().save(archiveName + "(" + path::filename(path) +
+                                      " at " + utostr(offsetInArchive) + ")");
   MemoryBufferRef mbref(mb.getBuffer(), name);
 
   obj = CHECK(lto::InputFile::create(mbref), this);
@@ -1684,7 +1682,7 @@ createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
   if (sym) {
     name = sym->getName();
   } else {
-    name = saver.save(objSym.getName());
+    name = saver().save(objSym.getName());
     sym = symtab->insert(name);
   }
 
@@ -1734,8 +1732,8 @@ void BitcodeFile::parseLazy() {
   symbols.resize(obj->symbols().size());
   for (auto it : llvm::enumerate(obj->symbols()))
     if (!it.value().isUndefined())
-      symbols[it.index()] =
-          symtab.addSymbol(LazyObject{*this, saver.save(it.value().getName())});
+      symbols[it.index()] = symtab.addSymbol(
+          LazyObject{*this, saver().save(it.value().getName())});
 }
 
 void BinaryFile::parse() {
@@ -1753,6 +1751,8 @@ void BinaryFile::parse() {
     if (!isAlnum(s[i]))
       s[i] = '_';
 
+  llvm::StringSaver &saver = lld::saver();
+
   symtab->addSymbol(Defined{nullptr, saver.save(s + "_start"), STB_GLOBAL,
                             STV_DEFAULT, STT_OBJECT, 0, 0, section});
   symtab->addSymbol(Defined{nullptr, saver.save(s + "_end"), STB_GLOBAL,
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 5b6242646605f..e6ce2c399ac9d 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -18,8 +18,7 @@
 #include "SyntheticSections.h"
 #include "Target.h"
 #include "Thunks.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/Endian.h"
@@ -143,7 +142,7 @@ void InputSectionBase::uncompress() const {
   {
     static std::mutex mu;
     std::lock_guard<std::mutex> lock(mu);
-    uncompressedBuf = bAlloc.Allocate<char>(size);
+    uncompressedBuf = bAlloc().Allocate<char>(size);
   }
 
   if (Error e = zlib::uncompress(toStringRef(rawData), uncompressedBuf, size))
@@ -237,7 +236,7 @@ template <typename ELFT> void InputSectionBase::parseCompressedHeader() {
 
     // Restore the original section name.
     // (e.g. ".zdebug_info" -> ".debug_info")
-    name = saver.save("." + name.substr(2));
+    name = saver().save("." + name.substr(2));
     return;
   }
 
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
index 3dd3df80e36f4..bfb583453735e 100644
--- a/lld/ELF/LinkerScript.cpp
+++ b/lld/ELF/LinkerScript.cpp
@@ -19,7 +19,7 @@
 #include "SyntheticSections.h"
 #include "Target.h"
 #include "Writer.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -64,8 +64,8 @@ static StringRef getOutputSectionName(const InputSectionBase *s) {
     if (InputSectionBase *rel = isec->getRelocatedSection()) {
       OutputSection *out = rel->getOutputSection();
       if (s->type == SHT_RELA)
-        return saver.save(".rela" + out->name);
-      return saver.save(".rel" + out->name);
+        return saver().save(".rela" + out->name);
+      return saver().save(".rel" + out->name);
     }
   }
 
diff --git a/lld/ELF/MarkLive.cpp b/lld/ELF/MarkLive.cpp
index 4d3d79d4ee80b..597c0684b8b2b 100644
--- a/lld/ELF/MarkLive.cpp
+++ b/lld/ELF/MarkLive.cpp
@@ -27,7 +27,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Object/ELF.h"
@@ -308,8 +308,8 @@ template <class ELFT> void MarkLive<ELFT>::run() {
       // As a workaround for glibc libc.a before 2.34
       // (https://sourceware.org/PR27492), retain __libc_atexit and similar
       // sections regardless of zStartStopGC.
-      cNamedSections[saver.save("__start_" + sec->name)].push_back(sec);
-      cNamedSections[saver.save("__stop_" + sec->name)].push_back(sec);
+      cNamedSections[saver().save("__start_" + sec->name)].push_back(sec);
+      cNamedSections[saver().save("__stop_" + sec->name)].push_back(sec);
     }
   }
 
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index f26b6c41adf20..7331d1156f278 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -20,7 +20,7 @@
 #include "ScriptLexer.h"
 #include "Symbols.h"
 #include "Target.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSet.h"
@@ -290,7 +290,7 @@ void ScriptParser::addFile(StringRef s) {
     SmallString<128> pathData;
     StringRef path = (config->sysroot + s).toStringRef(pathData);
     if (sys::fs::exists(path))
-      driver->addFile(saver.save(path), /*withLOption=*/false);
+      driver->addFile(saver().save(path), /*withLOption=*/false);
     else
       setError("cannot find " + s + " inside " + config->sysroot);
     return;
@@ -304,7 +304,7 @@ void ScriptParser::addFile(StringRef s) {
     if (config->sysroot.empty())
       driver->addFile(s.substr(1), /*withLOption=*/false);
     else
-      driver->addFile(saver.save(config->sysroot + "/" + s.substr(1)),
+      driver->addFile(saver().save(config->sysroot + "/" + s.substr(1)),
                       /*withLOption=*/false);
   } else if (s.startswith("-l")) {
     // Case 3: search in the list of library paths.
@@ -327,7 +327,7 @@ void ScriptParser::addFile(StringRef s) {
     } else {
       // Finally, search in the list of library paths.
       if (Optional<std::string> path = findFromSearchPaths(s))
-        driver->addFile(saver.save(*path), /*withLOption=*/true);
+        driver->addFile(saver().save(*path), /*withLOption=*/true);
       else
         setError("unable to find " + s);
     }
diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index c1eb53e9d44b0..87f4269b83917 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -22,9 +22,8 @@
 #include "Symbols.h"
 #include "Target.h"
 #include "Writer.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/DWARF.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
 #include "lld/Common/Version.h"
 #include "llvm/ADT/SetOperations.h"
@@ -73,7 +72,7 @@ static ArrayRef<uint8_t> getVersion() {
   // This is only for testing.
   StringRef s = getenv("LLD_VERSION");
   if (s.empty())
-    s = saver.save(Twine("Linker: ") + getLLDVersion());
+    s = saver().save(Twine("Linker: ") + getLLDVersion());
 
   // +1 to include the terminating '\0'.
   return {(const uint8_t *)s.data(), s.size() + 1};
@@ -255,7 +254,7 @@ MipsReginfoSection<ELFT> *MipsReginfoSection<ELFT>::create() {
 
 InputSection *elf::createInterpSection() {
   // StringSaver guarantees that the returned string ends with '\0'.
-  StringRef s = saver.save(config->dynamicLinker);
+  StringRef s = saver().save(config->dynamicLinker);
   ArrayRef<uint8_t> contents = {(const uint8_t *)s.data(), s.size() + 1};
 
   return make<InputSection>(nullptr, SHF_ALLOC, SHT_PROGBITS, 1, contents,
diff --git a/lld/ELF/Thunks.cpp b/lld/ELF/Thunks.cpp
index 38de4db191f45..ae740810acb57 100644
--- a/lld/ELF/Thunks.cpp
+++ b/lld/ELF/Thunks.cpp
@@ -27,8 +27,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Endian.h"
@@ -434,7 +433,7 @@ void AArch64ABSLongThunk::writeTo(uint8_t *buf) {
 }
 
 void AArch64ABSLongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__AArch64AbsLongThunk_" + destination.getName()),
+  addSymbol(saver().save("__AArch64AbsLongThunk_" + destination.getName()),
             STT_FUNC, 0, isec);
   addSymbol("$x", STT_NOTYPE, 0, isec);
   addSymbol("$d", STT_NOTYPE, 8, isec);
@@ -460,8 +459,8 @@ void AArch64ADRPThunk::writeTo(uint8_t *buf) {
 }
 
 void AArch64ADRPThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__AArch64ADRPThunk_" + destination.getName()), STT_FUNC,
-            0, isec);
+  addSymbol(saver().save("__AArch64ADRPThunk_" + destination.getName()),
+            STT_FUNC, 0, isec);
   addSymbol("$x", STT_NOTYPE, 0, isec);
 }
 
@@ -560,7 +559,7 @@ void ARMV7ABSLongThunk::writeLong(uint8_t *buf) {
 }
 
 void ARMV7ABSLongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__ARMv7ABSLongThunk_" + destination.getName()),
+  addSymbol(saver().save("__ARMv7ABSLongThunk_" + destination.getName()),
             STT_FUNC, 0, isec);
   addSymbol("$a", STT_NOTYPE, 0, isec);
 }
@@ -578,7 +577,7 @@ void ThumbV7ABSLongThunk::writeLong(uint8_t *buf) {
 }
 
 void ThumbV7ABSLongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__Thumbv7ABSLongThunk_" + destination.getName()),
+  addSymbol(saver().save("__Thumbv7ABSLongThunk_" + destination.getName()),
             STT_FUNC, 1, isec);
   addSymbol("$t", STT_NOTYPE, 0, isec);
 }
@@ -599,8 +598,8 @@ void ARMV7PILongThunk::writeLong(uint8_t *buf) {
 }
 
 void ARMV7PILongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__ARMV7PILongThunk_" + destination.getName()), STT_FUNC,
-            0, isec);
+  addSymbol(saver().save("__ARMV7PILongThunk_" + destination.getName()),
+            STT_FUNC, 0, isec);
   addSymbol("$a", STT_NOTYPE, 0, isec);
 }
 
@@ -620,7 +619,7 @@ void ThumbV7PILongThunk::writeLong(uint8_t *buf) {
 }
 
 void ThumbV7PILongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__ThumbV7PILongThunk_" + destination.getName()),
+  addSymbol(saver().save("__ThumbV7PILongThunk_" + destination.getName()),
             STT_FUNC, 1, isec);
   addSymbol("$t", STT_NOTYPE, 0, isec);
 }
@@ -635,7 +634,7 @@ void ARMV5ABSLongThunk::writeLong(uint8_t *buf) {
 }
 
 void ARMV5ABSLongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__ARMv5ABSLongThunk_" + destination.getName()),
+  addSymbol(saver().save("__ARMv5ABSLongThunk_" + destination.getName()),
             STT_FUNC, 0, isec);
   addSymbol("$a", STT_NOTYPE, 0, isec);
   addSymbol("$d", STT_NOTYPE, 4, isec);
@@ -661,8 +660,8 @@ void ARMV5PILongThunk::writeLong(uint8_t *buf) {
 }
 
 void ARMV5PILongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__ARMV5PILongThunk_" + destination.getName()), STT_FUNC,
-            0, isec);
+  addSymbol(saver().save("__ARMV5PILongThunk_" + destination.getName()),
+            STT_FUNC, 0, isec);
   addSymbol("$a", STT_NOTYPE, 0, isec);
   addSymbol("$d", STT_NOTYPE, 12, isec);
 }
@@ -691,7 +690,7 @@ void ThumbV6MABSLongThunk::writeLong(uint8_t *buf) {
 }
 
 void ThumbV6MABSLongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__Thumbv6MABSLongThunk_" + destination.getName()),
+  addSymbol(saver().save("__Thumbv6MABSLongThunk_" + destination.getName()),
             STT_FUNC, 1, isec);
   addSymbol("$t", STT_NOTYPE, 0, isec);
   addSymbol("$d", STT_NOTYPE, 8, isec);
@@ -717,7 +716,7 @@ void ThumbV6MPILongThunk::writeLong(uint8_t *buf) {
 }
 
 void ThumbV6MPILongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__Thumbv6MPILongThunk_" + destination.getName()),
+  addSymbol(saver().save("__Thumbv6MPILongThunk_" + destination.getName()),
             STT_FUNC, 1, isec);
   addSymbol("$t", STT_NOTYPE, 0, isec);
   addSymbol("$d", STT_NOTYPE, 12, isec);
@@ -735,7 +734,7 @@ void MipsThunk::writeTo(uint8_t *buf) {
 }
 
 void MipsThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__LA25Thunk_" + destination.getName()), STT_FUNC, 0,
+  addSymbol(saver().save("__LA25Thunk_" + destination.getName()), STT_FUNC, 0,
             isec);
 }
 
@@ -758,8 +757,9 @@ void MicroMipsThunk::writeTo(uint8_t *buf) {
 }
 
 void MicroMipsThunk::addSymbols(ThunkSection &isec) {
-  Defined *d = addSymbol(
-      saver.save("__microLA25Thunk_" + destination.getName()), STT_FUNC, 0, isec);
+  Defined *d =
+      addSymbol(saver().save("__microLA25Thunk_" + destination.getName()),
+                STT_FUNC, 0, isec);
   d->stOther |= STO_MIPS_MICROMIPS;
 }
 
@@ -782,8 +782,9 @@ void MicroMipsR6Thunk::writeTo(uint8_t *buf) {
 }
 
 void MicroMipsR6Thunk::addSymbols(ThunkSection &isec) {
-  Defined *d = addSymbol(
-      saver.save("__microLA25Thunk_" + destination.getName()), STT_FUNC, 0, isec);
+  Defined *d =
+      addSymbol(saver().save("__microLA25Thunk_" + destination.getName()),
+                STT_FUNC, 0, isec);
   d->stOther |= STO_MIPS_MICROMIPS;
 }
 
@@ -843,7 +844,7 @@ void PPC32PltCallStub::addSymbols(ThunkSection &isec) {
   else
     os << ".plt_pic32.";
   os << destination.getName();
-  addSymbol(saver.save(os.str()), STT_FUNC, 0, isec);
+  addSymbol(saver().save(os.str()), STT_FUNC, 0, isec);
 }
 
 bool PPC32PltCallStub::isCompatibleWith(const InputSection &isec,
@@ -852,7 +853,7 @@ bool PPC32PltCallStub::isCompatibleWith(const InputSection &isec,
 }
 
 void PPC32LongThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__LongThunk_" + destination.getName()), STT_FUNC, 0,
+  addSymbol(saver().save("__LongThunk_" + destination.getName()), STT_FUNC, 0,
             isec);
 }
 
@@ -896,8 +897,8 @@ void PPC64PltCallStub::writeTo(uint8_t *buf) {
 }
 
 void PPC64PltCallStub::addSymbols(ThunkSection &isec) {
-  Defined *s = addSymbol(saver.save("__plt_" + destination.getName()), STT_FUNC,
-                         0, isec);
+  Defined *s = addSymbol(saver().save("__plt_" + destination.getName()),
+                         STT_FUNC, 0, isec);
   s->needsTocRestore = true;
   s->file = destination.file;
 }
@@ -947,7 +948,7 @@ void PPC64R2SaveStub::writeTo(uint8_t *buf) {
 }
 
 void PPC64R2SaveStub::addSymbols(ThunkSection &isec) {
-  Defined *s = addSymbol(saver.save("__toc_save_" + destination.getName()),
+  Defined *s = addSymbol(saver().save("__toc_save_" + destination.getName()),
                          STT_FUNC, 0, isec);
   s->needsTocRestore = true;
 }
@@ -983,7 +984,7 @@ void PPC64R12SetupStub::writeTo(uint8_t *buf) {
 }
 
 void PPC64R12SetupStub::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__gep_setup_" + destination.getName()), STT_FUNC, 0,
+  addSymbol(saver().save("__gep_setup_" + destination.getName()), STT_FUNC, 0,
             isec);
 }
 
@@ -1019,7 +1020,7 @@ void PPC64PCRelPLTStub::writeTo(uint8_t *buf) {
 }
 
 void PPC64PCRelPLTStub::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__plt_pcrel_" + destination.getName()), STT_FUNC, 0,
+  addSymbol(saver().save("__plt_pcrel_" + destination.getName()), STT_FUNC, 0,
             isec);
 }
 
@@ -1035,7 +1036,7 @@ void PPC64LongBranchThunk::writeTo(uint8_t *buf) {
 }
 
 void PPC64LongBranchThunk::addSymbols(ThunkSection &isec) {
-  addSymbol(saver.save("__long_branch_" + destination.getName()), STT_FUNC, 0,
+  addSymbol(saver().save("__long_branch_" + destination.getName()), STT_FUNC, 0,
             isec);
 }
 
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 9eefe5c84a114..2b14c07120c91 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -20,8 +20,8 @@
 #include "SyntheticSections.h"
 #include "Target.h"
 #include "lld/Common/Arrays.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Filesystem.h"
-#include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -2216,9 +2216,9 @@ void Writer<ELFT>::addStartStopSymbols(OutputSection *sec) {
   StringRef s = sec->name;
   if (!isValidCIdentifier(s))
     return;
-  addOptionalRegular(saver.save("__start_" + s), sec, 0,
+  addOptionalRegular(saver().save("__start_" + s), sec, 0,
                      config->zStartStopVisibility);
-  addOptionalRegular(saver.save("__stop_" + s), sec, -1,
+  addOptionalRegular(saver().save("__stop_" + s), sec, -1,
                      config->zStartStopVisibility);
 }
 
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp
index f63c2e6eb321c..4fae93469b5ff 100644
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -13,8 +13,7 @@
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/TimeProfiler.h"
@@ -322,8 +321,8 @@ void ConcatOutputSection::finalize() {
       // get written are happy.
       thunkInfo.isec->live = true;
 
-      StringRef thunkName = saver.save(funcSym->getName() + ".thunk." +
-                                       std::to_string(thunkInfo.sequence++));
+      StringRef thunkName = saver().save(funcSym->getName() + ".thunk." +
+                                         std::to_string(thunkInfo.sequence++));
       r.referent = thunkInfo.sym = symtab->addDefined(
           thunkName, /*file=*/nullptr, thunkInfo.isec, /*value=*/0,
           /*size=*/thunkSize, /*isWeakDef=*/false, /*isPrivateExtern=*/true,
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index a903add32e5ad..50f5c96c61f35 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -128,7 +128,7 @@ static Optional<StringRef> findFramework(StringRef name) {
         // only append suffix if realpath() succeeds
         Twine suffixed = location + suffix;
         if (fs::exists(suffixed))
-          return resolvedFrameworks[key] = saver.save(suffixed.str());
+          return resolvedFrameworks[key] = saver().save(suffixed.str());
       }
       // Suffix lookup failed, fall through to the no-suffix case.
     }
@@ -165,7 +165,7 @@ getSearchPaths(unsigned optionCode, InputArgList &args,
         path::append(buffer, path);
         // Do not warn about paths that are computed via the syslib roots
         if (fs::is_directory(buffer)) {
-          paths.push_back(saver.save(buffer.str()));
+          paths.push_back(saver().save(buffer.str()));
           found = true;
         }
       }
@@ -183,7 +183,7 @@ getSearchPaths(unsigned optionCode, InputArgList &args,
       SmallString<261> buffer(root);
       path::append(buffer, path);
       if (fs::is_directory(buffer))
-        paths.push_back(saver.save(buffer.str()));
+        paths.push_back(saver().save(buffer.str()));
     }
   }
   return paths;
@@ -1126,14 +1126,14 @@ static void referenceStubBinder() {
   symtab->addUndefined("dyld_stub_binder", /*file=*/nullptr, /*isWeak=*/false);
 }
 
-bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
-                 raw_ostream &stdoutOS, raw_ostream &stderrOS) {
-  lld::stdoutOS = &stdoutOS;
-  lld::stderrOS = &stderrOS;
-
-  errorHandler().cleanupCallback = []() {
-    freeArena();
+bool macho::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
+                 llvm::raw_ostream &stderrOS, bool exitEarly,
+                 bool disableOutput) {
+  // This driver-specific context will be freed later by lldMain().
+  auto *ctx = new CommonLinkerContext;
 
+  ctx->e.initialize(stdoutOS, stderrOS, exitEarly, disableOutput);
+  ctx->e.cleanupCallback = []() {
     resolvedFrameworks.clear();
     resolvedLibraries.clear();
     cachedReads.clear();
@@ -1154,17 +1154,15 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
     InputFile::resetIdCount();
   };
 
-  errorHandler().logName = args::getFilenameWithoutExe(argsArr[0]);
-  stderrOS.enable_colors(stderrOS.has_colors());
+  ctx->e.logName = args::getFilenameWithoutExe(argsArr[0]);
 
   MachOOptTable parser;
   InputArgList args = parser.parse(argsArr.slice(1));
 
-  errorHandler().errorLimitExceededMsg =
-      "too many errors emitted, stopping now "
-      "(use --error-limit=0 to see all errors)";
-  errorHandler().errorLimit = args::getInteger(args, OPT_error_limit_eq, 20);
-  errorHandler().verbose = args.hasArg(OPT_verbose);
+  ctx->e.errorLimitExceededMsg = "too many errors emitted, stopping now "
+                                 "(use --error-limit=0 to see all errors)";
+  ctx->e.errorLimit = args::getInteger(args, OPT_error_limit_eq, 20);
+  ctx->e.verbose = args.hasArg(OPT_verbose);
 
   if (args.hasArg(OPT_help_hidden)) {
     parser.printHelp(argsArr[0], /*showHidden=*/true);
@@ -1208,7 +1206,7 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
       // these are meaningful for our text based stripping
       if (config->osoPrefix.equals(".") || config->osoPrefix.endswith(sep))
         expanded += sep;
-      config->osoPrefix = saver.save(expanded.str());
+      config->osoPrefix = saver().save(expanded.str());
     }
   }
 
@@ -1496,7 +1494,7 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
 
     // Parse LTO options.
     if (const Arg *arg = args.getLastArg(OPT_mcpu))
-      parseClangOption(saver.save("-mcpu=" + StringRef(arg->getValue())),
+      parseClangOption(saver().save("-mcpu=" + StringRef(arg->getValue())),
                        arg->getSpelling());
 
     for (const Arg *arg : args.filtered(OPT_mllvm))
@@ -1587,11 +1585,5 @@ bool macho::link(ArrayRef<const char *> argsArr, bool canExitEarly,
 
     timeTraceProfilerCleanup();
   }
-
-  if (canExitEarly)
-    exitLld(errorCount() ? 1 : 0);
-
-  bool ret = errorCount() == 0;
-  errorHandler().reset();
-  return ret;
+  return errorCount() == 0;
 }
diff --git a/lld/MachO/DriverUtils.cpp b/lld/MachO/DriverUtils.cpp
index 3c5440544614c..83940b54486ff 100644
--- a/lld/MachO/DriverUtils.cpp
+++ b/lld/MachO/DriverUtils.cpp
@@ -13,8 +13,7 @@
 #include "Target.h"
 
 #include "lld/Common/Args.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Reproduce.h"
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/DenseMap.h"
@@ -82,7 +81,7 @@ InputArgList MachOOptTable::parse(ArrayRef<const char *> argv) {
 
   // Expand response files (arguments in the form of @<filename>)
   // and then parse the argument again.
-  cl::ExpandResponseFiles(saver, cl::TokenizeGNUCommandLine, vec);
+  cl::ExpandResponseFiles(saver(), cl::TokenizeGNUCommandLine, vec);
   InputArgList args = ParseArgs(vec, missingIndex, missingCount);
 
   // Handle -fatal_warnings early since it converts missing argument warnings
@@ -191,12 +190,12 @@ Optional<StringRef> macho::resolveDylibPath(StringRef dylibPath) {
   bool tbdExists = fs::exists(tbdPath);
   searchedDylib(tbdPath, tbdExists);
   if (tbdExists)
-    return saver.save(tbdPath.str());
+    return saver().save(tbdPath.str());
 
   bool dylibExists = fs::exists(dylibPath);
   searchedDylib(dylibPath, dylibExists);
   if (dylibExists)
-    return saver.save(dylibPath);
+    return saver().save(dylibPath);
   return {};
 }
 
@@ -261,7 +260,7 @@ macho::findPathCombination(const Twine &name,
       bool exists = fs::exists(location);
       searchedDylib(location, exists);
       if (exists)
-        return saver.save(location.str());
+        return saver().save(location.str());
     }
   }
   return {};
diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp
index 9ab5a18aefbc8..bbeb2dc09bf0f 100644
--- a/lld/MachO/InputFiles.cpp
+++ b/lld/MachO/InputFiles.cpp
@@ -56,9 +56,8 @@
 #include "SyntheticSections.h"
 #include "Target.h"
 
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/DWARF.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
 #include "lld/Common/Reproduce.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/BinaryFormat/MachO.h"
@@ -210,6 +209,8 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) {
     return cachedReads[key] = mbref;
   }
 
+  llvm::BumpPtrAllocator &bAlloc = lld::bAlloc();
+
   // Object files and archive files may be fat files, which contain multiple
   // real files for different CPU ISAs. Here, we search for a file that matches
   // with the current link target and returns it as a MemoryBufferRef.
@@ -241,7 +242,7 @@ Optional<MemoryBufferRef> macho::readFile(StringRef path) {
 }
 
 InputFile::InputFile(Kind kind, const InterfaceFile &interface)
-    : id(idCount++), fileKind(kind), name(saver.save(interface.getPath())) {}
+    : id(idCount++), fileKind(kind), name(saver().save(interface.getPath())) {}
 
 // Some sections comprise of fixed-size records, so instead of splitting them at
 // symbol boundaries, we split them based on size. Records are distinct from
@@ -1211,7 +1212,7 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
     // Find all the $ld$* symbols to process first.
     parseTrie(buf + c->export_off, c->export_size,
               [&](const Twine &name, uint64_t flags) {
-                StringRef savedName = saver.save(name);
+                StringRef savedName = saver().save(name);
                 if (handleLDSymbol(savedName))
                   return;
                 entries.push_back({savedName, flags});
@@ -1285,7 +1286,7 @@ DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
     umbrella = this;
   this->umbrella = umbrella;
 
-  installName = saver.save(interface.getInstallName());
+  installName = saver().save(interface.getInstallName());
   compatibilityVersion = interface.getCompatibilityVersion().rawValue();
   currentVersion = interface.getCurrentVersion().rawValue();
 
@@ -1304,7 +1305,7 @@ DylibFile::DylibFile(const InterfaceFile &interface, DylibFile *umbrella,
 
   exportingFile = isImplicitlyLinked(installName) ? this : umbrella;
   auto addSymbol = [&](const Twine &name) -> void {
-    StringRef savedName = saver.save(name);
+    StringRef savedName = saver().save(name);
     if (exportingFile->hiddenSymbols.contains(CachedHashStringRef(savedName)))
       return;
 
@@ -1423,7 +1424,7 @@ void DylibFile::handleLDPreviousSymbol(StringRef name, StringRef originalName) {
       config->platformInfo.minimum >= end)
     return;
 
-  this->installName = saver.save(installName);
+  this->installName = saver().save(installName);
 
   if (!compatVersion.empty()) {
     VersionTuple cVersion;
@@ -1445,7 +1446,7 @@ void DylibFile::handleLDInstallNameSymbol(StringRef name,
   if (!condition.consume_front("os") || version.tryParse(condition))
     warn("failed to parse os version, symbol '" + originalName + "' ignored");
   else if (version == config->platformInfo.minimum)
-    this->installName = saver.save(installName);
+    this->installName = saver().save(installName);
 }
 
 void DylibFile::handleLDHideSymbol(StringRef name, StringRef originalName) {
@@ -1550,7 +1551,7 @@ void ArchiveFile::fetch(const object::Archive::Symbol &sym) {
 
 static macho::Symbol *createBitcodeSymbol(const lto::InputFile::Symbol &objSym,
                                           BitcodeFile &file) {
-  StringRef name = saver.save(objSym.getName());
+  StringRef name = saver().save(objSym.getName());
 
   if (objSym.isUndefined())
     return symtab->addUndefined(name, &file, /*isWeakRef=*/objSym.isWeak());
@@ -1592,11 +1593,12 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
   // So, we append the archive name to disambiguate two members with the same
   // name from multiple different archives, and offset within the archive to
   // disambiguate two members of the same name from a single archive.
-  MemoryBufferRef mbref(
-      mb.getBuffer(),
-      saver.save(archiveName.empty() ? path
-                                     : archiveName + sys::path::filename(path) +
-                                           utostr(offsetInArchive)));
+  MemoryBufferRef mbref(mb.getBuffer(),
+                        saver().save(archiveName.empty()
+                                         ? path
+                                         : archiveName +
+                                               sys::path::filename(path) +
+                                               utostr(offsetInArchive)));
 
   obj = check(lto::InputFile::create(mbref));
   if (lazy)
@@ -1620,7 +1622,7 @@ void BitcodeFile::parseLazy() {
     const lto::InputFile::Symbol &objSym = it.value();
     if (!objSym.isUndefined()) {
       symbols[it.index()] =
-          symtab->addLazyObject(saver.save(objSym.getName()), *this);
+          symtab->addLazyObject(saver().save(objSym.getName()), *this);
       if (!lazy)
         break;
     }
diff --git a/lld/MachO/LTO.cpp b/lld/MachO/LTO.cpp
index c71ea33d28965..fd49a09229d11 100644
--- a/lld/MachO/LTO.cpp
+++ b/lld/MachO/LTO.cpp
@@ -14,7 +14,7 @@
 #include "Target.h"
 
 #include "lld/Common/Args.h"
-#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "lld/Common/TargetOptionsCommandFlags.h"
 #include "llvm/LTO/Config.h"
@@ -148,7 +148,7 @@ std::vector<ObjFile *> BitcodeCompiler::compile() {
       modTime = getModTime(filePath);
     }
     ret.push_back(make<ObjFile>(
-        MemoryBufferRef(buf[i], saver.save(filePath.str())), modTime, ""));
+        MemoryBufferRef(buf[i], saver().save(filePath.str())), modTime, ""));
   }
   for (std::unique_ptr<MemoryBuffer> &file : files)
     if (file)
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index 7e4b83ec1903d..3fe551f5684b6 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -16,8 +16,7 @@
 #include "SymbolTable.h"
 #include "Symbols.h"
 
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/EndianStream.h"
@@ -834,7 +833,7 @@ void SymtabSection::emitBeginSourceStab(DWARFUnit *compileUnit) {
   if (!dir.endswith(sep))
     dir += sep;
   stab.strx = stringTableSection.addString(
-      saver.save(dir + compileUnit->getUnitDIE().getShortName()));
+      saver().save(dir + compileUnit->getUnitDIE().getShortName()));
   stabs.emplace_back(std::move(stab));
 }
 
@@ -856,7 +855,7 @@ void SymtabSection::emitObjectFileStab(ObjFile *file) {
   if (!file->archiveName.empty())
     path.append({"(", file->getName(), ")"});
 
-  StringRef adjustedPath = saver.save(path.str());
+  StringRef adjustedPath = saver().save(path.str());
   adjustedPath.consume_front(config->osoPrefix);
 
   stab.strx = stringTableSection.addString(adjustedPath);
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 4c7d1c2eeb32b..c76dc691346e6 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -22,8 +22,7 @@
 #include "UnwindInfoSection.h"
 
 #include "lld/Common/Arrays.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/LEB128.h"
@@ -611,7 +610,7 @@ static bool needsBinding(const Symbol *sym) {
 }
 
 static void prepareSymbolRelocation(Symbol *sym, const InputSection *isec,
-                                    const Reloc &r) {
+                                    const lld::macho::Reloc &r) {
   assert(sym->isLive());
   const RelocAttrs &relocAttrs = target->getRelocAttrs(r.type);
 
@@ -644,7 +643,7 @@ void Writer::scanRelocations() {
       continue;
 
     for (auto it = isec->relocs.begin(); it != isec->relocs.end(); ++it) {
-      Reloc &r = *it;
+      lld::macho::Reloc &r = *it;
       if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) {
         // Skip over the following UNSIGNED relocation -- it's just there as the
         // minuend, and doesn't have the usual UNSIGNED semantics. We don't want
@@ -858,8 +857,8 @@ static size_t getSymbolPriority(const SymbolPriorityEntry &entry,
   if (f->archiveName.empty())
     filename = path::filename(f->getName());
   else
-    filename = saver.save(path::filename(f->archiveName) + "(" +
-                          path::filename(f->getName()) + ")");
+    filename = saver().save(path::filename(f->archiveName) + "(" +
+                            path::filename(f->getName()) + ")");
   return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile);
 }
 
@@ -1216,7 +1215,7 @@ void macho::createSyntheticSections() {
 
   // This section contains space for just a single word, and will be used by
   // dyld to cache an address to the image loader it uses.
-  uint8_t *arr = bAlloc.Allocate<uint8_t>(target->wordSize);
+  uint8_t *arr = bAlloc().Allocate<uint8_t>(target->wordSize);
   memset(arr, 0, target->wordSize);
   in.imageLoaderCache = make<ConcatInputSection>(
       segment_names::data, section_names::data, /*file=*/nullptr,
diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp
index 7c6b865a2e398..10ff7cf5a7123 100644
--- a/lld/MinGW/Driver.cpp
+++ b/lld/MinGW/Driver.cpp
@@ -100,7 +100,7 @@ opt::InputArgList MinGWOptTable::parse(ArrayRef<const char *> argv) {
   unsigned missingCount;
 
   SmallVector<const char *, 256> vec(argv.data(), argv.data() + argv.size());
-  cl::ExpandResponseFiles(saver, getQuotingStyle(), vec);
+  cl::ExpandResponseFiles(saver(), getQuotingStyle(), vec);
   opt::InputArgList args = this->ParseArgs(vec, missingIndex, missingCount);
 
   if (missingCount)
@@ -154,12 +154,11 @@ searchLibrary(StringRef name, ArrayRef<StringRef> searchPaths, bool bStatic) {
 
 // Convert Unix-ish command line arguments to Windows-ish ones and
 // then call coff::link.
-bool mingw::link(ArrayRef<const char *> argsArr, bool canExitEarly,
-                 raw_ostream &stdoutOS, raw_ostream &stderrOS) {
-  lld::stdoutOS = &stdoutOS;
-  lld::stderrOS = &stderrOS;
-
-  stderrOS.enable_colors(stderrOS.has_colors());
+bool mingw::link(ArrayRef<const char *> argsArr, llvm::raw_ostream &stdoutOS,
+                 llvm::raw_ostream &stderrOS, bool exitEarly,
+                 bool disableOutput) {
+  auto *ctx = new CommonLinkerContext;
+  ctx->e.initialize(stdoutOS, stderrOS, exitEarly, disableOutput);
 
   MinGWOptTable parser;
   opt::InputArgList args = parser.parse(argsArr.slice(1));
@@ -445,5 +444,9 @@ bool mingw::link(ArrayRef<const char *> argsArr, bool canExitEarly,
   // Pass the actual binary name, to make error messages be printed with
   // the right prefix.
   vec[0] = argsArr[0];
-  return coff::link(vec, canExitEarly, stdoutOS, stderrOS);
+
+  // The context will be re-created in the COFF driver.
+  lld::CommonLinkerContext::destroy();
+
+  return coff::link(vec, stdoutOS, stderrOS, exitEarly, disableOutput);
 }
diff --git a/lld/include/lld/Common/CommonLinkerContext.h b/lld/include/lld/Common/CommonLinkerContext.h
new file mode 100644
index 0000000000000..3954d38ded636
--- /dev/null
+++ b/lld/include/lld/Common/CommonLinkerContext.h
@@ -0,0 +1,65 @@
+//===- CommonLinkerContext.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Entry point for all global state in lldCommon. The objective is for LLD to be
+// used "as a library" in a thread-safe manner.
+//
+// Instead of program-wide globals or function-local statics, we prefer
+// aggregating all "global" states into a heap-based structure
+// (CommonLinkerContext). This also achieves deterministic initialization &
+// shutdown for all "global" states.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLD_COMMON_COMMONLINKINGCONTEXT_H
+#define LLD_COMMON_COMMONLINKINGCONTEXT_H
+
+#include "lld/Common/ErrorHandler.h"
+#include "lld/Common/Memory.h"
+#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/Support/StringSaver.h"
+
+namespace llvm {
+class raw_ostream;
+} // namespace llvm
+
+namespace lld {
+struct SpecificAllocBase;
+class CommonLinkerContext {
+public:
+  CommonLinkerContext();
+  virtual ~CommonLinkerContext();
+
+  static void destroy();
+
+  llvm::BumpPtrAllocator bAlloc;
+  llvm::StringSaver saver{bAlloc};
+  llvm::DenseMap<void *, SpecificAllocBase *> instances;
+
+  ErrorHandler e;
+
+private:
+  llvm::codegen::RegisterCodeGenFlags cgf;
+};
+
+// Retrieve the global state. Currently only one state can exist per process,
+// but in the future we plan on supporting an arbitrary number of LLD instances
+// in a single process.
+CommonLinkerContext &commonContext();
+
+template <typename T = CommonLinkerContext> T &context() {
+  return static_cast<T &>(commonContext());
+}
+
+bool hasContext();
+
+inline llvm::StringSaver &saver() { return context().saver; }
+inline llvm::BumpPtrAllocator &bAlloc() { return context().bAlloc; }
+} // namespace lld
+
+#endif
diff --git a/lld/include/lld/Common/Driver.h b/lld/include/lld/Common/Driver.h
index 0e505a16463e7..91cb91b9f8082 100644
--- a/lld/include/lld/Common/Driver.h
+++ b/lld/include/lld/Common/Driver.h
@@ -9,6 +9,7 @@
 #ifndef LLD_COMMON_DRIVER_H
 #define LLD_COMMON_DRIVER_H
 
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -28,28 +29,28 @@ SafeReturn safeLldMain(int argc, const char **argv, llvm::raw_ostream &stdoutOS,
                        llvm::raw_ostream &stderrOS);
 
 namespace coff {
-bool link(llvm::ArrayRef<const char *> args, bool canExitEarly,
-          llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS);
+bool link(llvm::ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput);
 }
 
 namespace mingw {
-bool link(llvm::ArrayRef<const char *> args, bool canExitEarly,
-          llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS);
+bool link(llvm::ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput);
 }
 
 namespace elf {
-bool link(llvm::ArrayRef<const char *> args, bool canExitEarly,
-          llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS);
+bool link(llvm::ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput);
 }
 
 namespace macho {
-bool link(llvm::ArrayRef<const char *> args, bool canExitEarly,
-          llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS);
+bool link(llvm::ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput);
 }
 
 namespace wasm {
-bool link(llvm::ArrayRef<const char *> args, bool canExitEarly,
-          llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS);
+bool link(llvm::ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput);
 }
 }
 
diff --git a/lld/include/lld/Common/ErrorHandler.h b/lld/include/lld/Common/ErrorHandler.h
index d95a2537c1f2c..ce077290d60b3 100644
--- a/lld/include/lld/Common/ErrorHandler.h
+++ b/lld/include/lld/Common/ErrorHandler.h
@@ -73,6 +73,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileOutputBuffer.h"
+#include <mutex>
 
 namespace llvm {
 class DiagnosticInfo;
@@ -81,11 +82,6 @@ class raw_ostream;
 
 namespace lld {
 
-// We wrap stdout and stderr so that you can pass alternative stdout/stderr as
-// arguments to lld::*::link() functions.
-extern llvm::raw_ostream *stdoutOS;
-extern llvm::raw_ostream *stderrOS;
-
 llvm::raw_ostream &outs();
 llvm::raw_ostream &errs();
 
@@ -93,6 +89,11 @@ enum class ErrorTag { LibNotFound, SymbolNotFound };
 
 class ErrorHandler {
 public:
+  ~ErrorHandler();
+
+  void initialize(llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS,
+                  bool exitEarly, bool disableOutput);
+
   uint64_t errorCount = 0;
   uint64_t errorLimit = 20;
   StringRef errorLimitExceededMsg = "too many errors emitted, stopping now";
@@ -112,11 +113,9 @@ class ErrorHandler {
   void message(const Twine &msg, llvm::raw_ostream &s);
   void warn(const Twine &msg);
 
-  void reset() {
-    if (cleanupCallback)
-      cleanupCallback();
-    *this = ErrorHandler();
-  }
+  raw_ostream &outs();
+  raw_ostream &errs();
+  void flushStreams();
 
   std::unique_ptr<llvm::FileOutputBuffer> outputBuffer;
 
@@ -126,6 +125,19 @@ class ErrorHandler {
   std::string getLocation(const Twine &msg);
   void reportDiagnostic(StringRef location, Colors c, StringRef diagKind,
                         const Twine &msg);
+
+  // We want to separate multi-line messages with a newline. `sep` is "\n"
+  // if the last messages was multi-line. Otherwise "".
+  llvm::StringRef sep;
+
+  // We wrap stdout and stderr so that you can pass alternative stdout/stderr as
+  // arguments to lld::*::link() functions. Since lld::outs() or lld::errs() can
+  // be indirectly called from multiple threads, we protect them using a mutex.
+  // In the future, we plan on supporting several concurent linker contexts,
+  // which explains why the mutex is not a global but part of this context.
+  std::mutex mu;
+  llvm::raw_ostream *stdoutOS{};
+  llvm::raw_ostream *stderrOS{};
 };
 
 /// Returns the default error handler.
diff --git a/lld/include/lld/Common/Memory.h b/lld/include/lld/Common/Memory.h
index f516a327cfb2b..0b2f474c30135 100644
--- a/lld/include/lld/Common/Memory.h
+++ b/lld/include/lld/Common/Memory.h
@@ -22,42 +22,41 @@
 #define LLD_COMMON_MEMORY_H
 
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/StringSaver.h"
-#include <vector>
 
 namespace lld {
-
-// Use this arena if your object doesn't have a destructor.
-extern llvm::BumpPtrAllocator bAlloc;
-extern llvm::StringSaver saver;
-
-void freeArena();
-
-// These two classes are hack to keep track of all
-// SpecificBumpPtrAllocator instances.
+// A base class only used by the CommonLinkerContext to keep track of the
+// SpecificAlloc<> instances.
 struct SpecificAllocBase {
-  SpecificAllocBase() { instances.push_back(this); }
   virtual ~SpecificAllocBase() = default;
-  virtual void reset() = 0;
-  static std::vector<SpecificAllocBase *> instances;
+  static SpecificAllocBase *getOrCreate(void *tag, size_t size, size_t align,
+                                        SpecificAllocBase *(&creator)(void *));
 };
 
+// An arena of specific types T, created on-demand.
 template <class T> struct SpecificAlloc : public SpecificAllocBase {
-  void reset() override { alloc.DestroyAll(); }
+  static SpecificAllocBase *create(void *storage) {
+    return new (storage) SpecificAlloc<T>();
+  }
   llvm::SpecificBumpPtrAllocator<T> alloc;
+  static int tag;
 };
 
-// Use a static local for these singletons so they are only registered if an
-// object of this instance is ever constructed. Otherwise we will create and
-// register ELF allocators for COFF and the reverse.
+// The address of this static member is only used as a key in
+// CommonLinkerContext::instances. Its value does not matter.
+template <class T> int SpecificAlloc<T>::tag = 0;
+
+// Creates the arena on-demand on the first call; or returns it, if it was
+// already created.
 template <typename T>
 inline llvm::SpecificBumpPtrAllocator<T> &getSpecificAllocSingleton() {
-  static SpecificAlloc<T> instance;
-  return instance.alloc;
+  SpecificAllocBase *instance = SpecificAllocBase::getOrCreate(
+      &SpecificAlloc<T>::tag, sizeof(SpecificAlloc<T>),
+      alignof(SpecificAlloc<T>), SpecificAlloc<T>::create);
+  return ((SpecificAlloc<T> *)instance)->alloc;
 }
 
-// Use this arena if your object has a destructor.
-// Your destructor will be invoked from freeArena().
+// Creates new instances of T off a (almost) contiguous arena/object pool. The
+// instances are destroyed whenever lldMain() goes out of scope.
 template <typename T, typename... U> T *make(U &&... args) {
   return new (getSpecificAllocSingleton<T>().Allocate())
       T(std::forward<U>(args)...);
diff --git a/lld/include/lld/Core/LinkingContext.h b/lld/include/lld/Core/LinkingContext.h
index e090ff9902318..091369e143190 100644
--- a/lld/include/lld/Core/LinkingContext.h
+++ b/lld/include/lld/Core/LinkingContext.h
@@ -9,6 +9,7 @@
 #ifndef LLD_CORE_LINKING_CONTEXT_H
 #define LLD_CORE_LINKING_CONTEXT_H
 
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Core/Node.h"
 #include "lld/Core/Reader.h"
 #include "llvm/ADT/ArrayRef.h"
@@ -34,7 +35,7 @@ class SharedLibraryFile;
 /// The base class LinkingContext contains the options needed by core linking.
 /// Subclasses of LinkingContext have additional options needed by specific
 /// Writers.
-class LinkingContext {
+class LinkingContext : public CommonLinkerContext {
 public:
   virtual ~LinkingContext();
 
diff --git a/lld/tools/lld/lld.cpp b/lld/tools/lld/lld.cpp
index cad97f2153c28..0a6439fff2a27 100644
--- a/lld/tools/lld/lld.cpp
+++ b/lld/tools/lld/lld.cpp
@@ -87,6 +87,8 @@ static bool isPETarget(std::vector<const char *> &v) {
   // Expand response files (arguments in the form of @<filename>)
   // to allow detecting the -m argument from arguments in them.
   SmallVector<const char *, 256> expandedArgs(v.data(), v.data() + v.size());
+  BumpPtrAllocator a;
+  StringSaver saver(a);
   cl::ExpandResponseFiles(saver, getDefaultQuotingStyle(), expandedArgs);
   for (auto it = expandedArgs.begin(); it + 1 != expandedArgs.end(); ++it) {
     if (StringRef(*it) != "-m")
@@ -134,27 +136,42 @@ static Flavor parseFlavor(std::vector<const char *> &v) {
   return parseProgname(arg0);
 }
 
+bool inTestOutputDisabled = false;
+
 /// Universal linker main(). This linker emulates the gnu, darwin, or
 /// windows linker based on the argv[0] or -flavor option.
 static int lldMain(int argc, const char **argv, llvm::raw_ostream &stdoutOS,
                    llvm::raw_ostream &stderrOS, bool exitEarly = true) {
   std::vector<const char *> args(argv, argv + argc);
-  switch (parseFlavor(args)) {
-  case Gnu:
-    if (isPETarget(args))
-      return !mingw::link(args, exitEarly, stdoutOS, stderrOS);
-    return !elf::link(args, exitEarly, stdoutOS, stderrOS);
-  case WinLink:
-    return !coff::link(args, exitEarly, stdoutOS, stderrOS);
-  case Darwin:
-    return !macho::link(args, exitEarly, stdoutOS, stderrOS);
-  case Wasm:
-    return !lld::wasm::link(args, exitEarly, stdoutOS, stderrOS);
-  default:
-    die("lld is a generic driver.\n"
-        "Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld"
-        " (WebAssembly) instead");
-  }
+  auto link = [&args]() {
+    Flavor f = parseFlavor(args);
+    if (f == Gnu && isPETarget(args))
+      return mingw::link;
+    else if (f == Gnu)
+      return elf::link;
+    else if (f == WinLink)
+      return coff::link;
+    else if (f == Darwin)
+      return macho::link;
+    else if (f == Wasm)
+      return lld::wasm::link;
+    else
+      die("lld is a generic driver.\n"
+          "Invoke ld.lld (Unix), ld64.lld (macOS), lld-link (Windows), wasm-ld"
+          " (WebAssembly) instead");
+  };
+  // Run the driver. If an error occurs, false will be returned.
+  bool r = link()(args, stdoutOS, stderrOS, exitEarly, inTestOutputDisabled);
+
+  // Call exit() if we can to avoid calling destructors.
+  if (exitEarly)
+    exitLld(!r ? 1 : 0);
+
+  // Delete the global context and clear the global context pointer, so that it
+  // cannot be accessed anymore.
+  CommonLinkerContext::destroy();
+
+  return !r ? 1 : 0;
 }
 
 // Similar to lldMain except that exceptions are caught.
@@ -176,7 +193,7 @@ SafeReturn lld::safeLldMain(int argc, const char **argv,
   // Cleanup memory and reset everything back in pristine condition. This path
   // is only taken when LLD is in test, or when it is used as a library.
   llvm::CrashRecoveryContext crc;
-  if (!crc.RunSafely([&]() { errorHandler().reset(); })) {
+  if (!crc.RunSafely([&]() { CommonLinkerContext::destroy(); })) {
     // The memory is corrupted beyond any possible recovery.
     return {r, /*canRunAgain=*/false};
   }
@@ -207,8 +224,7 @@ int main(int argc, const char **argv) {
 
   for (unsigned i = inTestVerbosity(); i > 0; --i) {
     // Disable stdout/stderr for all iterations but the last one.
-    if (i != 1)
-      errorHandler().disableOutput = true;
+    inTestOutputDisabled = (i != 1);
 
     // Execute one iteration.
     auto r = safeLldMain(argc, argv, llvm::outs(), llvm::errs());
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 59abfaadf3989..7523755806ab9 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -81,18 +81,15 @@ class LinkerDriver {
 };
 } // anonymous namespace
 
-bool link(ArrayRef<const char *> args, bool canExitEarly, raw_ostream &stdoutOS,
-          raw_ostream &stderrOS) {
-  lld::stdoutOS = &stdoutOS;
-  lld::stderrOS = &stderrOS;
+bool link(ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
+          llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput) {
+  // This driver-specific context will be freed later by lldMain().
+  auto *ctx = new CommonLinkerContext;
 
-  errorHandler().cleanupCallback = []() { freeArena(); };
-
-  errorHandler().logName = args::getFilenameWithoutExe(args[0]);
-  errorHandler().errorLimitExceededMsg =
-      "too many errors emitted, stopping now (use "
-      "-error-limit=0 to see all errors)";
-  stderrOS.enable_colors(stderrOS.has_colors());
+  ctx->e.initialize(stdoutOS, stderrOS, exitEarly, disableOutput);
+  ctx->e.logName = args::getFilenameWithoutExe(args[0]);
+  ctx->e.errorLimitExceededMsg = "too many errors emitted, stopping now (use "
+                                 "-error-limit=0 to see all errors)";
 
   config = make<Configuration>();
   symtab = make<SymbolTable>();
@@ -100,13 +97,7 @@ bool link(ArrayRef<const char *> args, bool canExitEarly, raw_ostream &stdoutOS,
   initLLVM();
   LinkerDriver().linkerMain(args);
 
-  // Exit immediately if we don't need to return to the caller.
-  // This saves time because the overhead of calling destructors
-  // for all globally-allocated objects is not negligible.
-  if (canExitEarly)
-    exitLld(errorCount() ? 1 : 0);
-
-  return !errorCount();
+  return errorCount() == 0;
 }
 
 // Create prefix string literals used in Options.td
@@ -189,7 +180,7 @@ opt::InputArgList WasmOptTable::parse(ArrayRef<const char *> argv) {
 
   // Expand response files (arguments in the form of @<filename>)
   // and then parse the argument again.
-  cl::ExpandResponseFiles(saver, getQuotingStyle(args), vec);
+  cl::ExpandResponseFiles(saver(), getQuotingStyle(args), vec);
   args = this->ParseArgs(vec, missingIndex, missingCount);
 
   handleColorDiagnostics(args);
@@ -760,8 +751,8 @@ static std::vector<WrappedSymbol> addWrappedSymbols(opt::InputArgList &args) {
     if (!sym)
       continue;
 
-    Symbol *real = addUndefined(saver.save("__real_" + name));
-    Symbol *wrap = addUndefined(saver.save("__wrap_" + name));
+    Symbol *real = addUndefined(saver().save("__real_" + name));
+    Symbol *wrap = addUndefined(saver().save("__wrap_" + name));
     v.push_back({sym, real, wrap});
 
     // We want to tell LTO not to inline symbols to be overwritten
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index e3a7f56ab884a..3e3b31ab1ee5e 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -12,8 +12,7 @@
 #include "InputElement.h"
 #include "OutputSegment.h"
 #include "SymbolTable.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Reproduce.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Wasm.h"
@@ -721,7 +720,7 @@ static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
 static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats,
                                    const lto::InputFile::Symbol &objSym,
                                    BitcodeFile &f) {
-  StringRef name = saver.save(objSym.getName());
+  StringRef name = saver().save(objSym.getName());
 
   uint32_t flags = objSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
   flags |= mapVisibility(objSym.getVisibility());
@@ -756,9 +755,9 @@ BitcodeFile::BitcodeFile(MemoryBufferRef m, StringRef archiveName,
   // symbols later in the link stage). So we append file offset to make
   // filename unique.
   StringRef name = archiveName.empty()
-                       ? saver.save(path)
-                       : saver.save(archiveName + "(" + path::filename(path) +
-                                    " at " + utostr(offsetInArchive) + ")");
+                       ? saver().save(path)
+                       : saver().save(archiveName + "(" + path::filename(path) +
+                                      " at " + utostr(offsetInArchive) + ")");
   MemoryBufferRef mbref(mb.getBuffer(), name);
 
   obj = check(lto::InputFile::create(mbref));
diff --git a/lld/wasm/SymbolTable.cpp b/lld/wasm/SymbolTable.cpp
index 63e66c145747e..ef1402248eec8 100644
--- a/lld/wasm/SymbolTable.cpp
+++ b/lld/wasm/SymbolTable.cpp
@@ -11,8 +11,7 @@
 #include "InputChunks.h"
 #include "InputElement.h"
 #include "WriterUtils.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "llvm/ADT/SetVector.h"
 
 #define DEBUG_TYPE "lld"
@@ -843,7 +842,7 @@ InputFunction *SymbolTable::replaceWithUnreachable(Symbol *sym,
 void SymbolTable::replaceWithUndefined(Symbol *sym) {
   // Add a synthetic dummy for weak undefined functions.  These dummies will
   // be GC'd if not used as the target of any "call" instructions.
-  StringRef debugName = saver.save("undefined_weak:" + toString(*sym));
+  StringRef debugName = saver().save("undefined_weak:" + toString(*sym));
   replaceWithUnreachable(sym, *sym->getSignature(), debugName);
   // Hide our dummy to prevent export.
   sym->setHidden(true);
@@ -941,7 +940,8 @@ void SymbolTable::handleSymbolVariants() {
       if (symbol != defined) {
         auto *f = cast<FunctionSymbol>(symbol);
         reportFunctionSignatureMismatch(symName, f, defined, false);
-        StringRef debugName = saver.save("signature_mismatch:" + toString(*f));
+        StringRef debugName =
+            saver().save("signature_mismatch:" + toString(*f));
         replaceWithUnreachable(f, *f->signature, debugName);
       }
     }
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index f7589b2cd684d..95f6483e9e591 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -17,8 +17,7 @@
 #include "SymbolTable.h"
 #include "SyntheticSections.h"
 #include "WriterUtils.h"
-#include "lld/Common/ErrorHandler.h"
-#include "lld/Common/Memory.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/Strings.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -190,7 +189,7 @@ void Writer::createRelocSections() {
     else if (sec->type == WASM_SEC_CODE)
       name = "reloc.CODE";
     else if (sec->type == WASM_SEC_CUSTOM)
-      name = saver.save("reloc." + sec->name);
+      name = saver().save("reloc." + sec->name);
     else
       llvm_unreachable(
           "relocations only supported for code, data, or custom sections");
@@ -389,8 +388,8 @@ static void addStartStopSymbols(const OutputSegment *seg) {
   LLVM_DEBUG(dbgs() << "addStartStopSymbols: " << name << "\n");
   uint64_t start = seg->startVA;
   uint64_t stop = start + seg->size;
-  symtab->addOptionalDataSymbol(saver.save("__start_" + name), start);
-  symtab->addOptionalDataSymbol(saver.save("__stop_" + name), stop);
+  symtab->addOptionalDataSymbol(saver().save("__start_" + name), start);
+  symtab->addOptionalDataSymbol(saver().save("__stop_" + name), stop);
 }
 
 void Writer::addSections() {
@@ -958,7 +957,7 @@ static void createFunction(DefinedFunction *func, StringRef bodyContent) {
     writeUleb128(os, bodyContent.size(), "function size");
     os << bodyContent;
   }
-  ArrayRef<uint8_t> body = arrayRefFromStringRef(saver.save(functionBody));
+  ArrayRef<uint8_t> body = arrayRefFromStringRef(saver().save(functionBody));
   cast<SyntheticFunction>(func->function)->setBody(body);
 }
 
diff --git a/llvm/include/llvm/DebugInfo/PDB/DIA/DIASupport.h b/llvm/include/llvm/DebugInfo/PDB/DIA/DIASupport.h
index 1a7c2f3aeeaba..570b40c70578d 100644
--- a/llvm/include/llvm/DebugInfo/PDB/DIA/DIASupport.h
+++ b/llvm/include/llvm/DebugInfo/PDB/DIA/DIASupport.h
@@ -27,7 +27,14 @@
 
 // DIA headers must come after windows headers.
 #include <cvconst.h>
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wnon-virtual-dtor"
+#endif
 #include <dia2.h>
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
 #include <diacreate.h>
 
 #endif // LLVM_DEBUGINFO_PDB_DIA_DIASUPPORT_H

From d4baf3b1322b84816aa623d8e8cb45a49cb68b84 Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue@google.com>
Date: Thu, 20 Jan 2022 14:43:09 -0500
Subject: [PATCH 066/946] [libc] Use get_round() instead of floating point
 tricks in generic hypot implementation.

The floating point tricks used to get rounding mode require -frounding-math flag, which behaves differently on aarch64.  Reverting back to use get_round instead.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D117824
---
 libc/src/__support/FPUtil/Hypot.h    | 8 ++------
 libc/src/math/generic/CMakeLists.txt | 4 ----
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/libc/src/__support/FPUtil/Hypot.h b/libc/src/__support/FPUtil/Hypot.h
index 4b2987138d1bd..15b26798ccb54 100644
--- a/libc/src/__support/FPUtil/Hypot.h
+++ b/libc/src/__support/FPUtil/Hypot.h
@@ -144,9 +144,7 @@ static inline T hypot(T x, T y) {
   if ((x_bits.get_unbiased_exponent() >=
        y_bits.get_unbiased_exponent() + MantissaWidth<T>::VALUE + 2) ||
       (y == 0)) {
-    // Check if the rounding mode is FE_UPWARD, will need -frounding-math so
-    // that the compiler does not optimize it away.
-    if ((y != 0) && (0x1p0f + 0x1p-24f != 0x1p0f)) {
+    if ((y != 0) && (get_round() == FE_UPWARD)) {
       UIntType out_bits = FPBits_t(abs(x)).uintval();
       return T(FPBits_t(++out_bits));
     }
@@ -154,9 +152,7 @@ static inline T hypot(T x, T y) {
   } else if ((y_bits.get_unbiased_exponent() >=
               x_bits.get_unbiased_exponent() + MantissaWidth<T>::VALUE + 2) ||
              (x == 0)) {
-    // Check if the rounding mode is FE_UPWARD, will need -frounding-math so
-    // that the compiler does not optimize it away.
-    if ((x != 0) && (0x1p0f + 0x1p-24f != 0x1p0f)) {
+    if ((x != 0) && (get_round() == FE_UPWARD)) {
       UIntType out_bits = FPBits_t(abs(y)).uintval();
       return T(FPBits_t(++out_bits));
     }
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 59bca76fc5f84..c3914b8c45af3 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -955,8 +955,6 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.fputil
   COMPILE_OPTIONS
     -O3
-    -frounding-math
-    -Wno-c++17-extensions
 )
 
 add_entrypoint_object(
@@ -1005,8 +1003,6 @@ add_entrypoint_object(
     libc.src.__support.FPUtil.fputil
   COMPILE_OPTIONS
     -O3
-    -frounding-math
-    -Wno-c++17-extensions
 )
 
 add_entrypoint_object(

From 8b4fa2c98e07997469f53bee30c0d24a61dc7c8c Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Thu, 20 Jan 2022 14:59:30 -0500
Subject: [PATCH 067/946] clang: Auto-cleanup left-over file from before
 3da69fb5a26c7b on bots

---
 clang/test/Sema/test-wunaligned-access.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/test/Sema/test-wunaligned-access.c b/clang/test/Sema/test-wunaligned-access.c
index 909cda45f489b..74945a8539eb8 100644
--- a/clang/test/Sema/test-wunaligned-access.c
+++ b/clang/test/Sema/test-wunaligned-access.c
@@ -1,3 +1,6 @@
+// FIXME: Remove rm after a few days.
+// RUN: rm -f %S/test-wunaligned-access.ll
+
 // RUN: %clang_cc1 %s -triple=armv7-none-none-eabi -verify -Wunaligned-access -S -emit-llvm -o %t
 // REQUIRES: arm-registered-target
 //

From 9122b5072aa77e98804b2bf8ca2a60d152198b2e Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Thu, 20 Jan 2022 15:02:35 -0500
Subject: [PATCH 068/946] [llvm] Remove an old bot cleanup command

---
 llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index 660595a380db9..d3ae8bb876224 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -1,7 +1,5 @@
 ; When EXPENSIVE_CHECKS are enabled, the machine verifier appears between each
 ; pass. Ignore it with 'grep -v'.
-; fixme: the following line is added to cleanup bots, will be removed in weeks.
-; RUN: rm -f %S/llc-pipeline.s
 ; RUN: llc -O0 -mtriple=amdgcn--amdhsa -disable-verify -debug-pass=Structure < %s 2>&1 \
 ; RUN:   | grep -v 'Verify generated machine code' | FileCheck -match-full-lines -strict-whitespace -check-prefix=GCN-O0 %s
 ; RUN: llc -O1 -mtriple=amdgcn--amdhsa -disable-verify -debug-pass=Structure < %s 2>&1 \

From 608a9c0e7909e25ed8f121c356cd9c5897cdf22c Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Thu, 20 Jan 2022 20:02:49 +0000
Subject: [PATCH 069/946] [gn build] Port 63a991d03589

---
 llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index 43703f603d794..3e28727f082ed 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -242,7 +242,6 @@ if (current_toolchain == default_toolchain) {
       "__format/formatter_integral.h",
       "__format/formatter_string.h",
       "__format/parser_std_format_spec.h",
-      "__function_like.h",
       "__functional/binary_function.h",
       "__functional/binary_negate.h",
       "__functional/bind.h",

From 14a2964698647cb46ee7602e2581177585c03170 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Thu, 20 Jan 2022 20:02:50 +0000
Subject: [PATCH 070/946] [gn build] Port 83d59e05b201

---
 llvm/utils/gn/secondary/lld/Common/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/lld/Common/BUILD.gn b/llvm/utils/gn/secondary/lld/Common/BUILD.gn
index ed87426362d6f..a50d5e4eda964 100644
--- a/llvm/utils/gn/secondary/lld/Common/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/Common/BUILD.gn
@@ -32,6 +32,7 @@ static_library("Common") {
   ]
   sources = [
     "Args.cpp",
+    "CommonLinkerContext.cpp",
     "DWARF.cpp",
     "ErrorHandler.cpp",
     "Filesystem.cpp",

From 860038e0d775632776abb5373539baa53693d749 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Thu, 20 Jan 2022 12:13:22 -0800
Subject: [PATCH 071/946] [SLP] Rename a couple lambdas to be more clearly
 separate from method names

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 43119b9c80cb3..d9d02cb56c447 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7246,7 +7246,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
   bool ReSchedule = false;
   LLVM_DEBUG(dbgs() << "SLP:  bundle: " << *S.OpValue << "\n");
 
-  auto &&TryScheduleBundle = [this, OldScheduleEnd, SLP](bool ReSchedule,
+  auto TryScheduleBundleImpl = [this, OldScheduleEnd, SLP](bool ReSchedule,
                                                          ScheduleData *Bundle) {
     // The scheduling region got new instructions at the lower end (or it is a
     // new region for the first bundle). This makes it necessary to
@@ -7290,7 +7290,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
       // Otherwise the compiler may crash trying to incorrectly calculate
       // dependencies and emit instruction in the wrong order at the actual
       // scheduling.
-      TryScheduleBundle(/*ReSchedule=*/false, nullptr);
+      TryScheduleBundleImpl(/*ReSchedule=*/false, nullptr);
       return None;
     }
   }
@@ -7322,7 +7322,7 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
     PrevInBundle = BundleMember;
   }
   assert(Bundle && "Failed to find schedule bundle");
-  TryScheduleBundle(ReSchedule, Bundle);
+  TryScheduleBundleImpl(ReSchedule, Bundle);
   if (!Bundle->isReady()) {
     cancelScheduling(VL, S.OpValue);
     return None;
@@ -9618,7 +9618,7 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
                        function_ref<unsigned(T *)> Limit,
                        function_ref<bool(T *, T *)> Comparator,
                        function_ref<bool(T *, T *)> AreCompatible,
-                       function_ref<bool(ArrayRef<T *>, bool)> TryToVectorize,
+                       function_ref<bool(ArrayRef<T *>, bool)> TryToVectorizeHelper,
                        bool LimitForRegisterSize) {
   bool Changed = false;
   // Sort by type, parent, operands.
@@ -9647,7 +9647,7 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
     // same/alternate ops only, this may result in some extra final
     // vectorization.
     if (NumElts > 1 &&
-        TryToVectorize(makeArrayRef(IncIt, NumElts), LimitForRegisterSize)) {
+        TryToVectorizeHelper(makeArrayRef(IncIt, NumElts), LimitForRegisterSize)) {
       // Success start over because instructions might have been changed.
       Changed = true;
     } else if (NumElts < Limit(*IncIt) &&
@@ -9658,7 +9658,7 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
     // Final attempt to vectorize instructions with the same types.
     if (Candidates.size() > 1 &&
         (SameTypeIt == E || (*SameTypeIt)->getType() != (*IncIt)->getType())) {
-      if (TryToVectorize(Candidates, /*LimitForRegisterSize=*/false)) {
+      if (TryToVectorizeHelper(Candidates, /*LimitForRegisterSize=*/false)) {
         // Success start over because instructions might have been changed.
         Changed = true;
       } else if (LimitForRegisterSize) {
@@ -9669,7 +9669,7 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
           while (SameTypeIt != End && AreCompatible(*SameTypeIt, *It))
             ++SameTypeIt;
           unsigned NumElts = (SameTypeIt - It);
-          if (NumElts > 1 && TryToVectorize(makeArrayRef(It, NumElts),
+          if (NumElts > 1 && TryToVectorizeHelper(makeArrayRef(It, NumElts),
                                             /*LimitForRegisterSize=*/false))
             Changed = true;
           It = SameTypeIt;

From 82452be5cbd7fb48e36dd4f7b2b7eaf598d34bd6 Mon Sep 17 00:00:00 2001
From: Marek Kurdej <marek.kurdej+llvm.org@gmail.com>
Date: Thu, 20 Jan 2022 21:05:54 +0100
Subject: [PATCH 072/946] [clang-format] Refactor: add
 FormatToken::hasWhitespaceBefore(). NFC.

This factors out a pattern that comes up from time to time.

Reviewed By: MyDeveloperDay, HazardyKnusperkeks, owenpan

Differential Revision: https://reviews.llvm.org/D117769
---
 clang/lib/Format/Format.cpp              |  8 +++-----
 clang/lib/Format/FormatToken.h           |  6 ++++++
 clang/lib/Format/FormatTokenLexer.cpp    |  6 ++----
 clang/lib/Format/TokenAnnotator.cpp      | 15 ++++++---------
 clang/lib/Format/UnwrappedLineParser.cpp |  3 +--
 5 files changed, 18 insertions(+), 20 deletions(-)

diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 12020c945ea99..04e2915e3af69 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -1928,7 +1928,7 @@ class Formatter : public TokenAnalyzer {
       if (hasCpp03IncompatibleFormat(Line->Children))
         return true;
       for (FormatToken *Tok = Line->First->Next; Tok; Tok = Tok->Next) {
-        if (Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) {
+        if (!Tok->hasWhitespaceBefore()) {
           if (Tok->is(tok::coloncolon) && Tok->Previous->is(TT_TemplateOpener))
             return true;
           if (Tok->is(TT_TemplateCloser) &&
@@ -1947,10 +1947,8 @@ class Formatter : public TokenAnalyzer {
       for (FormatToken *Tok = Line->First; Tok && Tok->Next; Tok = Tok->Next) {
         if (!Tok->is(TT_PointerOrReference))
           continue;
-        bool SpaceBefore =
-            Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd();
-        bool SpaceAfter = Tok->Next->WhitespaceRange.getBegin() !=
-                          Tok->Next->WhitespaceRange.getEnd();
+        bool SpaceBefore = Tok->hasWhitespaceBefore();
+        bool SpaceAfter = Tok->Next->hasWhitespaceBefore();
         if (SpaceBefore && !SpaceAfter)
           ++AlignmentDiff;
         if (!SpaceBefore && SpaceAfter)
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index 8377273263737..b087f9f120411 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -637,6 +637,12 @@ struct FormatToken {
     return WhitespaceRange.getEnd();
   }
 
+  /// Returns \c true if the range of whitespace immediately preceding the \c
+  /// Token is not empty.
+  bool hasWhitespaceBefore() const {
+    return WhitespaceRange.getBegin() != WhitespaceRange.getEnd();
+  }
+
   prec::Level getPrecedence() const {
     return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true,
                               /*CPlusPlus11=*/true);
diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index 04629fec1bcaf..e8b9b3d61c888 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -446,8 +446,7 @@ bool FormatTokenLexer::tryMergeLessLess() {
     return false;
 
   // Only merge if there currently is no whitespace between the two "<".
-  if (First[1]->WhitespaceRange.getBegin() !=
-      First[1]->WhitespaceRange.getEnd())
+  if (First[1]->hasWhitespaceBefore())
     return false;
 
   First[0]->Tok.setKind(tok::lessless);
@@ -468,8 +467,7 @@ bool FormatTokenLexer::tryMergeTokens(ArrayRef<tok::TokenKind> Kinds,
     return false;
   unsigned AddLength = 0;
   for (unsigned i = 1; i < Kinds.size(); ++i) {
-    if (!First[i]->is(Kinds[i]) || First[i]->WhitespaceRange.getBegin() !=
-                                       First[i]->WhitespaceRange.getEnd())
+    if (!First[i]->is(Kinds[i]) || First[i]->hasWhitespaceBefore())
       return false;
     AddLength += First[i]->TokenText.size();
   }
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 71f29e8c010e5..7fe0d319e5703 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3304,14 +3304,11 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
 bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
                                          const FormatToken &Right) {
   const FormatToken &Left = *Right.Previous;
-  auto HasExistingWhitespace = [&Right]() {
-    return Right.WhitespaceRange.getBegin() != Right.WhitespaceRange.getEnd();
-  };
 
   // If the token is finalized don't touch it (as it could be in a
   // clang-format-off section).
   if (Left.Finalized)
-    return HasExistingWhitespace();
+    return Right.hasWhitespaceBefore();
 
   if (Right.Tok.getIdentifierInfo() && Left.Tok.getIdentifierInfo())
     return true; // Never ever merge two identifiers.
@@ -3373,7 +3370,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     // Preserve the existence of a space before a percent for cases like 0x%04x
     // and "%d %d"
     if (Left.is(tok::numeric_constant) && Right.is(tok::percent))
-      return HasExistingWhitespace();
+      return Right.hasWhitespaceBefore();
   } else if (Style.isJson()) {
     if (Right.is(tok::colon))
       return false;
@@ -3554,7 +3551,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
       return true;
   }
   if (Left.is(TT_ImplicitStringLiteral))
-    return HasExistingWhitespace();
+    return Right.hasWhitespaceBefore();
   if (Line.Type == LT_ObjCMethodDecl) {
     if (Left.is(TT_ObjCMethodSpecifier))
       return true;
@@ -3639,11 +3636,11 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     return Style.SpaceAfterCStyleCast ||
            Right.isOneOf(TT_BinaryOperator, TT_SelectorName);
 
-  auto ShouldAddSpacesInAngles = [this, &HasExistingWhitespace]() {
+  auto ShouldAddSpacesInAngles = [this, &Right]() {
     if (this->Style.SpacesInAngles == FormatStyle::SIAS_Always)
       return true;
     if (this->Style.SpacesInAngles == FormatStyle::SIAS_Leave)
-      return HasExistingWhitespace();
+      return Right.hasWhitespaceBefore();
     return false;
   };
 
@@ -3669,7 +3666,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     // Generally don't remove existing spaces between an identifier and "::".
     // The identifier might actually be a macro name such as ALWAYS_INLINE. If
     // this turns out to be too lenient, add analysis of the identifier itself.
-    return HasExistingWhitespace();
+    return Right.hasWhitespaceBefore();
   if (Right.is(tok::coloncolon) &&
       !Left.isOneOf(tok::l_brace, tok::comment, tok::l_paren))
     // Put a space between < and :: in vector< ::std::string >
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 69fe21cd87f01..f466111260962 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -1042,8 +1042,7 @@ void UnwrappedLineParser::parsePPDefine() {
 
   nextToken();
   if (FormatTok->Tok.getKind() == tok::l_paren &&
-      FormatTok->WhitespaceRange.getBegin() ==
-          FormatTok->WhitespaceRange.getEnd()) {
+      !FormatTok->hasWhitespaceBefore()) {
     parseParens();
   }
   if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)

From f84023a812b6499c506358912691b22a6424f897 Mon Sep 17 00:00:00 2001
From: Roger Kim <rgr@fb.com>
Date: Thu, 20 Jan 2022 12:13:04 -0800
Subject: [PATCH 073/946] [lld][macho] Stop grouping symbols by sections in
 mapfile.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As per [Bug 50689](https://bugs.llvm.org/show_bug.cgi?id=50689),

```
2. getSectionSyms() puts all the symbols into a map of section -> symbols, but this seems unnecessary. This was likely copied from the ELF port, which prints a section header before the list of symbols it contains. But the Mach-O map file doesn't print these headers.
```

This diff removes `getSectionSyms()` and keeps all symbols in a flat vector.

What does ld64's mapfile look like?
```
$ llvm-mc -filetype=obj -triple=x86_64-apple-darwin test.s -o test.o
$ llvm-mc -filetype=obj -triple=x86_64-apple-darwin foo.s -o foo.o
$ ld -map map test.o foo.o -o out -L/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib -lSystem
```

```
[  0] linker synthesized
[  1] test.o
[  2] foo.o
0x100003FB7     0x00000001      __TEXT  __text
0x100003FB8     0x00000000      __TEXT  obj
0x100003FB8     0x00000048      __TEXT  __unwind_info
0x100004000     0x00000001      __DATA  __common
0x100003FB7     0x00000001      [  1] _main
0x100003FB8     0x00000000      [  2] _foo
0x100003FB8     0x00000048      [  0] compact unwind info
0x100004000     0x00000001      [  1] _number
```

Perf numbers when linking chromium framework on a 16-Core Intel Xeon W Mac Pro:
```
base           diff           difference (95% CI)
sys_time   1.406 ± 0.020  1.388 ± 0.019  [  -1.9% ..   -0.6%]
user_time  5.557 ± 0.023  5.914 ± 0.020  [  +6.2% ..   +6.6%]
wall_time  4.455 ± 0.041  4.436 ± 0.035  [  -0.8% ..   -0.0%]
samples    35             35
```

Reviewed By: #lld-macho, int3

Differential Revision: https://reviews.llvm.org/D114735
---
 lld/MachO/MapFile.cpp     | 37 +++++++------------------------------
 lld/test/MachO/map-file.s |  2 +-
 2 files changed, 8 insertions(+), 31 deletions(-)

diff --git a/lld/MachO/MapFile.cpp b/lld/MachO/MapFile.cpp
index 79471eecbd528..93abea2ed08b2 100644
--- a/lld/MachO/MapFile.cpp
+++ b/lld/MachO/MapFile.cpp
@@ -40,26 +40,6 @@ using namespace llvm::sys;
 using namespace lld;
 using namespace lld::macho;
 
-using SymbolMapTy = DenseMap<const InputSection *, SmallVector<Defined *, 4>>;
-
-// Returns a map from sections to their symbols.
-static SymbolMapTy getSectionSyms(ArrayRef<Defined *> syms) {
-  SymbolMapTy ret;
-  for (Defined *dr : syms)
-    ret[dr->isec].push_back(dr);
-
-  // Sort symbols by address. We want to print out symbols in the order they
-  // appear in the output file rather than the order they appeared in the input
-  // files.
-  for (auto &it : ret)
-    parallelSort(
-        it.second.begin(), it.second.end(), [](Defined *a, Defined *b) {
-          return a->getVA() != b->getVA() ? a->getVA() < b->getVA()
-                                          : a->getName() < b->getName();
-        });
-  return ret;
-}
-
 // Returns a list of all symbols that we want to print out.
 static std::vector<Defined *> getSymbols() {
   std::vector<Defined *> v;
@@ -126,7 +106,10 @@ void macho::writeMapFile() {
 
   // Collect symbol info that we want to print out.
   std::vector<Defined *> syms = getSymbols();
-  SymbolMapTy sectionSyms = getSectionSyms(syms);
+  parallelSort(syms.begin(), syms.end(), [](Defined *a, Defined *b) {
+    return a->getVA() != b->getVA() ? a->getVA() < b->getVA()
+                                    : a->getName() < b->getName();
+  });
   DenseMap<Symbol *, std::string> symStr = getSymbolStrings(syms);
 
   // Dump table of sections
@@ -144,15 +127,9 @@ void macho::writeMapFile() {
   // Dump table of symbols
   os << "# Symbols:\n";
   os << "# Address\t    File  Name\n";
-  for (InputSection *isec : inputSections) {
-    auto symsIt = sectionSyms.find(isec);
-    assert(!shouldOmitFromOutput(isec) || (symsIt == sectionSyms.end()));
-    if (symsIt == sectionSyms.end())
-      continue;
-    for (Symbol *sym : symsIt->second) {
-      os << format("0x%08llX\t[%3u] %s\n", sym->getVA(),
-                   readerToFileOrdinal[sym->getFile()], symStr[sym].c_str());
-    }
+  for (Symbol *sym : syms) {
+    os << format("0x%08llX\t[%3u] %s\n", sym->getVA(),
+                 readerToFileOrdinal[sym->getFile()], symStr[sym].c_str());
   }
 
   // TODO: when we implement -dead_strip, we should dump dead stripped symbols
diff --git a/lld/test/MachO/map-file.s b/lld/test/MachO/map-file.s
index 67a44eb2dcd32..85c23e763e9ef 100644
--- a/lld/test/MachO/map-file.s
+++ b/lld/test/MachO/map-file.s
@@ -47,8 +47,8 @@ _main:
 
 # CHECK-NEXT: # Symbols:
 # CHECK-NEXT: # Address        File  Name
-# CHECK-NEXT: 0x[[#NUMBER]]    [  1]  _number
 # CHECK-NEXT: 0x[[#MAIN]]      [  1]  _main
 # CHECK-NEXT: 0x[[#FOO]]       [  2]  _foo
+# CHECK-NEXT: 0x[[#NUMBER]]    [  1]  _number
 
 # MAPFILE: "name":"Total Write map file"

From b8d38e8b4fcab071c5c4cb698e154023d06de69e Mon Sep 17 00:00:00 2001
From: Casey Carter <Casey@Carter.net>
Date: Wed, 29 Dec 2021 15:58:25 -0800
Subject: [PATCH 074/946] [libcxx][test] view_interface need not derive from
 view_base

... after LWG-3549.

Differential Revision: https://reviews.llvm.org/D117608
---
 .../ranges/range.utility/view.interface/view.interface.pass.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp b/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp
index bc2bc95c1c2d2..ebc443ebe08d3 100644
--- a/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp
+++ b/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp
@@ -32,8 +32,6 @@ static_assert(!ValidViewInterfaceType<Empty const>);
 static_assert(!ValidViewInterfaceType<Empty &>);
 static_assert( ValidViewInterfaceType<Empty>);
 
-static_assert(std::derived_from<std::ranges::view_interface<Empty>, std::ranges::view_base>);
-
 using InputIter = cpp20_input_iterator<const int*>;
 
 struct InputRange : std::ranges::view_interface<InputRange> {

From d0cace5087145b6bd8c833cc25d3e6d08442326c Mon Sep 17 00:00:00 2001
From: Mogball <jeffniu22@gmail.com>
Date: Thu, 20 Jan 2022 20:17:14 +0000
Subject: [PATCH 075/946] [mlir][pdl] Some ops are missing `NoSideEffect`

Querying or building constraints on types, operands, results, and attributes are side-effect free in both the matcher and rewriter. The ops should be marked as such.

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D117826
---
 mlir/include/mlir/Dialect/PDL/IR/PDLOps.td | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
index 87918c3e4cb35..4a97c17da4aca 100644
--- a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
+++ b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
@@ -123,7 +123,7 @@ def PDL_ApplyNativeRewriteOp
 // pdl::AttributeOp
 //===----------------------------------------------------------------------===//
 
-def PDL_AttributeOp : PDL_Op<"attribute"> {
+def PDL_AttributeOp : PDL_Op<"attribute", [NoSideEffect]> {
   let summary = "Define an input attribute in a pattern";
   let description = [{
     `pdl.attribute` operations capture named attribute edges into an operation.
@@ -191,7 +191,8 @@ def PDL_EraseOp : PDL_Op<"erase", [HasParent<"pdl::RewriteOp">]> {
 // pdl::OperandOp
 //===----------------------------------------------------------------------===//
 
-def PDL_OperandOp : PDL_Op<"operand", [HasParent<"pdl::PatternOp">]> {
+def PDL_OperandOp
+    : PDL_Op<"operand", [HasParent<"pdl::PatternOp">, NoSideEffect]> {
   let summary = "Define an external input operand in a pattern";
   let description = [{
     `pdl.operand` operations capture external operand edges into an operation
@@ -228,7 +229,8 @@ def PDL_OperandOp : PDL_Op<"operand", [HasParent<"pdl::PatternOp">]> {
 // pdl::OperandsOp
 //===----------------------------------------------------------------------===//
 
-def PDL_OperandsOp : PDL_Op<"operands", [HasParent<"pdl::PatternOp">]> {
+def PDL_OperandsOp
+    : PDL_Op<"operands", [HasParent<"pdl::PatternOp">, NoSideEffect]> {
   let summary = "Define a range of input operands in a pattern";
   let description = [{
     `pdl.operands` operations capture external operand range edges into an
@@ -495,7 +497,7 @@ def PDL_ReplaceOp : PDL_Op<"replace", [
 // pdl::ResultOp
 //===----------------------------------------------------------------------===//
 
-def PDL_ResultOp : PDL_Op<"result"> {
+def PDL_ResultOp : PDL_Op<"result", [NoSideEffect]> {
   let summary = "Extract a result from an operation";
   let description = [{
     `pdl.result` operations extract result edges from an operation node within
@@ -528,7 +530,7 @@ def PDL_ResultOp : PDL_Op<"result"> {
 // pdl::ResultsOp
 //===----------------------------------------------------------------------===//
 
-def PDL_ResultsOp : PDL_Op<"results"> {
+def PDL_ResultsOp : PDL_Op<"results", [NoSideEffect]> {
   let summary = "Extract a result group from an operation";
   let description = [{
     `pdl.results` operations extract a result group from an operation within a
@@ -631,7 +633,7 @@ def PDL_RewriteOp : PDL_Op<"rewrite", [
 // pdl::TypeOp
 //===----------------------------------------------------------------------===//
 
-def PDL_TypeOp : PDL_Op<"type"> {
+def PDL_TypeOp : PDL_Op<"type", [NoSideEffect]> {
   let summary = "Define a type handle within a pattern";
   let description = [{
     `pdl.type` operations capture result type constraints of `Attributes`,
@@ -659,7 +661,7 @@ def PDL_TypeOp : PDL_Op<"type"> {
 // pdl::TypesOp
 //===----------------------------------------------------------------------===//
 
-def PDL_TypesOp : PDL_Op<"types"> {
+def PDL_TypesOp : PDL_Op<"types", [NoSideEffect]> {
   let summary = "Define a range of type handles within a pattern";
   let description = [{
     `pdl.types` operations capture result type constraints of `Value`s, and

From 7c471b56f2c22b984847100b318b01e31bf5f9cb Mon Sep 17 00:00:00 2001
From: Mogball <jeffniu22@gmail.com>
Date: Thu, 20 Jan 2022 20:17:26 +0000
Subject: [PATCH 076/946] [mlir][pdl] OperationOp should not be side-effect
 free

Unbound OperationOp in the matcher (i.e. one with no uses) is already disallowed by the verifier. However, an OperationOp in the rewriter is not side-effect free -- it's creating an op!

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D117825
---
 mlir/include/mlir/Dialect/PDL/IR/PDLOps.td     |  3 +--
 .../pdl-to-pdl-interp-rewriter.mlir            | 18 ++++++++++++++++++
 mlir/test/Dialect/PDL/canonicalize.mlir        | 10 ++++++++++
 3 files changed, 29 insertions(+), 2 deletions(-)
 create mode 100644 mlir/test/Dialect/PDL/canonicalize.mlir

diff --git a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
index 4a97c17da4aca..258ad41292cc1 100644
--- a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
+++ b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
@@ -268,8 +268,7 @@ def PDL_OperandsOp
 // pdl::OperationOp
 //===----------------------------------------------------------------------===//
 
-def PDL_OperationOp
-    : PDL_Op<"operation", [AttrSizedOperandSegments, NoSideEffect]> {
+def PDL_OperationOp : PDL_Op<"operation", [AttrSizedOperandSegments]> {
   let summary = "Define an operation within a pattern";
   let description = [{
     `pdl.operation` operations define operation nodes within a pattern. Within
diff --git a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
index d2f135864071f..f9415b3c45802 100644
--- a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
+++ b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
@@ -202,3 +202,21 @@ module @apply_native_rewrite {
     }
   }
 }
+
+// -----
+
+// CHECK-LABEL: module @unbound_rewrite_op
+module @unbound_rewrite_op {
+  // CHECK: module @rewriters
+  // CHECK:   func @pdl_generated_rewriter()
+  // CHECK:     %[[UNUSED:.*]] = pdl_interp.create_operation "bar.op"
+  // CHECK:     pdl_interp.finalize
+  pdl.pattern : benefit(1) {
+    %root = pdl.operation "foo.op"
+    pdl.rewrite %root {
+      %unused = pdl.operation "bar.op"
+    }
+  }
+}
+
+// -----
diff --git a/mlir/test/Dialect/PDL/canonicalize.mlir b/mlir/test/Dialect/PDL/canonicalize.mlir
new file mode 100644
index 0000000000000..5cb08acd884e3
--- /dev/null
+++ b/mlir/test/Dialect/PDL/canonicalize.mlir
@@ -0,0 +1,10 @@
+// RUN: mlir-opt -canonicalize %s | FileCheck %s
+
+pdl.pattern @operation_op : benefit(1) {
+  %root = pdl.operation "foo.op"
+  pdl.rewrite %root {
+    // CHECK: pdl.operation "bar.unused"
+    %unused_rewrite = pdl.operation "bar.unused"
+    pdl.erase %root
+  }
+}

From e99835ffedc23cc132c8a9c769c85ac20b66cb3a Mon Sep 17 00:00:00 2001
From: Mogball <jeffniu22@gmail.com>
Date: Thu, 20 Jan 2022 20:17:40 +0000
Subject: [PATCH 077/946] [mlir][pdl] Make `pdl` the default dialect when
 parsing/printing

PDLDialect being a somewhat user-facing dialect and whose ops contain exclusively other PDL ops in their regions can take advantage of `OpAsmOpInterface` to provide nicer IR.

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D117828
---
 mlir/include/mlir/Dialect/PDL/IR/PDLOps.td    |   7 +-
 mlir/lib/Dialect/PDL/IR/PDL.cpp               |  10 +
 .../pdl-to-pdl-interp-matcher.mlir            | 298 +++++++++---------
 .../pdl-to-pdl-interp-rewriter.mlir           | 122 +++----
 mlir/test/Dialect/PDL/canonicalize.mlir       |  10 +-
 mlir/test/Dialect/PDL/invalid.mlir            | 136 ++++----
 mlir/test/Dialect/PDL/ops.mlir                | 140 ++++----
 mlir/test/python/dialects/pdl_ops.py          | 152 ++++-----
 8 files changed, 444 insertions(+), 431 deletions(-)

diff --git a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
index 258ad41292cc1..a094381e81632 100644
--- a/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
+++ b/mlir/include/mlir/Dialect/PDL/IR/PDLOps.td
@@ -15,6 +15,7 @@
 
 include "mlir/Dialect/PDL/IR/PDLTypes.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/IR/OpAsmInterface.td"
 include "mlir/IR/SymbolInterfaces.td"
 
 //===----------------------------------------------------------------------===//
@@ -402,7 +403,8 @@ def PDL_OperationOp : PDL_Op<"operation", [AttrSizedOperandSegments]> {
 //===----------------------------------------------------------------------===//
 
 def PDL_PatternOp : PDL_Op<"pattern", [
-    IsolatedFromAbove, SingleBlock, Symbol
+    IsolatedFromAbove, SingleBlock, Symbol,
+    DeclareOpInterfaceMethods<OpAsmOpInterface, ["getDefaultDialect"]>
   ]> {
   let summary = "Define a rewrite pattern";
   let description = [{
@@ -573,7 +575,8 @@ def PDL_ResultsOp : PDL_Op<"results", [NoSideEffect]> {
 
 def PDL_RewriteOp : PDL_Op<"rewrite", [
      Terminator, HasParent<"pdl::PatternOp">, NoTerminator, NoRegionArguments,
-     SingleBlock, AttrSizedOperandSegments
+     SingleBlock, AttrSizedOperandSegments,
+     DeclareOpInterfaceMethods<OpAsmOpInterface, ["getDefaultDialect"]>
   ]> {
   let summary = "Specify the rewrite of a matched pattern";
   let description = [{
diff --git a/mlir/lib/Dialect/PDL/IR/PDL.cpp b/mlir/lib/Dialect/PDL/IR/PDL.cpp
index 95a3fb742fa11..e0f6753743e13 100644
--- a/mlir/lib/Dialect/PDL/IR/PDL.cpp
+++ b/mlir/lib/Dialect/PDL/IR/PDL.cpp
@@ -355,6 +355,11 @@ RewriteOp PatternOp::getRewriter() {
   return cast<RewriteOp>(body().front().getTerminator());
 }
 
+/// The default dialect is `pdl`.
+StringRef PatternOp::getDefaultDialect() {
+  return PDLDialect::getDialectNamespace();
+}
+
 //===----------------------------------------------------------------------===//
 // pdl::ReplaceOp
 //===----------------------------------------------------------------------===//
@@ -431,6 +436,11 @@ static LogicalResult verify(RewriteOp op) {
   return success();
 }
 
+/// The default dialect is `pdl`.
+StringRef RewriteOp::getDefaultDialect() {
+  return PDLDialect::getDialectNamespace();
+}
+
 //===----------------------------------------------------------------------===//
 // pdl::TypeOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-matcher.mlir b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-matcher.mlir
index fd6cfe5fa7c5f..d9a8706471fe4 100644
--- a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-matcher.mlir
+++ b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-matcher.mlir
@@ -27,8 +27,8 @@ module @simple {
   // CHECK:     pdl_interp.apply_rewrite "rewriter"(%[[REWRITE_ROOT]]
   // CHECK:     pdl_interp.finalize
   pdl.pattern : benefit(1) {
-    %root = pdl.operation "foo.op"
-    pdl.rewrite %root with "rewriter"
+    %root = operation "foo.op"
+    rewrite %root with "rewriter"
   }
 }
 
@@ -48,11 +48,11 @@ module @attributes {
   // CHECK-DAG:   %[[ATTR1_TYPE:.*]] = pdl_interp.get_attribute_type of %[[ATTR1]]
   // CHECK-DAG:   pdl_interp.check_type %[[ATTR1_TYPE]] is i64
   pdl.pattern : benefit(1) {
-    %type = pdl.type : i64
-    %attr = pdl.attribute 10 : i64
-    %attr1 = pdl.attribute : %type
-    %root = pdl.operation {"attr" = %attr, "attr1" = %attr1}
-    pdl.rewrite %root with "rewriter"
+    %type = type : i64
+    %attr = attribute 10 : i64
+    %attr1 = attribute : %type
+    %root = operation {"attr" = %attr, "attr1" = %attr1}
+    rewrite %root with "rewriter"
   }
 }
 
@@ -67,13 +67,13 @@ module @constraints {
   // CHECK:       pdl_interp.apply_constraint "multi_constraint" [true](%[[INPUT]], %[[INPUT1]], %[[RESULT]]
 
   pdl.pattern : benefit(1) {
-    %input0 = pdl.operand
-    %input1 = pdl.operand
-    %root = pdl.operation(%input0, %input1 : !pdl.value, !pdl.value)
-    %result0 = pdl.result 0 of %root
+    %input0 = operand
+    %input1 = operand
+    %root = operation(%input0, %input1 : !pdl.value, !pdl.value)
+    %result0 = result 0 of %root
 
     pdl.apply_native_constraint "multi_constraint"[true](%input0, %input1, %result0 : !pdl.value, !pdl.value, !pdl.value)
-    pdl.rewrite %root with "rewriter"
+    rewrite %root with "rewriter"
   }
 }
 
@@ -94,10 +94,10 @@ module @inputs {
   // CHECK-DAG:  %[[INPUT1:.*]] = pdl_interp.get_operand 1 of %[[ROOT]]
   // CHECK-DAG:  pdl_interp.are_equal %[[INPUT]], %[[INPUT1]] : !pdl.value
   pdl.pattern : benefit(1) {
-    %type = pdl.type : i64
-    %input = pdl.operand : %type
-    %root = pdl.operation(%input, %input : !pdl.value, !pdl.value)
-    pdl.rewrite %root with "rewriter"
+    %type = type : i64
+    %input = operand : %type
+    %root = operation(%input, %input : !pdl.value, !pdl.value)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -124,11 +124,11 @@ module @variadic_inputs {
   // CHECK-DAG:  %[[INPUT2:.*]] = pdl_interp.get_operands 2 of %[[ROOT]] : !pdl.value
   // CHECK-DAG:  pdl_interp.are_equal %[[INPUT]], %[[INPUT2]] : !pdl.value
   pdl.pattern : benefit(1) {
-    %types = pdl.types : [i64]
-    %inputs = pdl.operands : %types
-    %input = pdl.operand
-    %root = pdl.operation(%input, %inputs, %input : !pdl.value, !pdl.range<value>, !pdl.value)
-    pdl.rewrite %root with "rewriter"
+    %types = types : [i64]
+    %inputs = operands : %types
+    %input = operand
+    %root = operation(%input, %inputs, %input : !pdl.value, !pdl.range<value>, !pdl.value)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -147,10 +147,10 @@ module @single_operand_range {
   // The operand count is unknown, so there is no need to check for it.
   // CHECK-NOT: pdl_interp.check_operand_count
   pdl.pattern : benefit(1) {
-    %types = pdl.types : [i64]
-    %operands = pdl.operands : %types
-    %root = pdl.operation(%operands : !pdl.range<value>)
-    pdl.rewrite %root with "rewriter"
+    %types = types : [i64]
+    %operands = operands : %types
+    %root = operation(%operands : !pdl.range<value>)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -171,10 +171,10 @@ module @results {
   // access for it.
   // CHECK-NOT:   pdl_interp.get_result 1 of %[[ROOT]]
   pdl.pattern : benefit(1) {
-    %type1 = pdl.type : i32
-    %type2 = pdl.type
-    %root = pdl.operation -> (%type1, %type2 : !pdl.type, !pdl.type)
-    pdl.rewrite %root with "rewriter"
+    %type1 = type : i32
+    %type2 = type
+    %root = operation -> (%type1, %type2 : !pdl.type, !pdl.type)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -201,10 +201,10 @@ module @variadic_results {
   // CHECK-DAG:  %[[RESULT2:.*]] = pdl_interp.get_results 2 of %[[ROOT]] : !pdl.value
   // CHECK-DAG:   pdl_interp.is_not_null %[[RESULT2]] : !pdl.value
   pdl.pattern : benefit(1) {
-    %types = pdl.types : [i64]
-    %type = pdl.type
-    %root = pdl.operation -> (%type, %types, %type : !pdl.type, !pdl.range<type>, !pdl.type)
-    pdl.rewrite %root with "rewriter"
+    %types = types : [i64]
+    %type = type
+    %root = operation -> (%type, %types, %type : !pdl.type, !pdl.range<type>, !pdl.type)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -223,9 +223,9 @@ module @single_result_range {
   // The result count is unknown, so there is no need to check for it.
   // CHECK-NOT: pdl_interp.check_result_count
   pdl.pattern : benefit(1) {
-    %types = pdl.types : [i64]
-    %root = pdl.operation -> (%types : !pdl.range<type>)
-    pdl.rewrite %root with "rewriter"
+    %types = types : [i64]
+    %root = operation -> (%types : !pdl.range<type>)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -251,14 +251,14 @@ module @results_as_operands {
   // CHECK-DAG:   pdl_interp.are_equal %[[DEF_OP_0]], %[[DEF_OP_1]]
 
   pdl.pattern : benefit(1) {
-    %type1 = pdl.type : i32
-    %type2 = pdl.type
-    %inputOp = pdl.operation -> (%type1, %type2 : !pdl.type, !pdl.type)
-    %result1 = pdl.result 0 of %inputOp
-    %result2 = pdl.result 1 of %inputOp
-
-    %root = pdl.operation(%result1, %result2 : !pdl.value, !pdl.value)
-    pdl.rewrite %root with "rewriter"
+    %type1 = type : i32
+    %type2 = type
+    %inputOp = operation -> (%type1, %type2 : !pdl.type, !pdl.type)
+    %result1 = result 0 of %inputOp
+    %result2 = result 1 of %inputOp
+
+    %root = operation(%result1, %result2 : !pdl.value, !pdl.value)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -274,12 +274,12 @@ module @single_result_range_as_operands {
   // CHECK-DAG:  pdl_interp.are_equal %[[RESULTS]], %[[OPERANDS]] : !pdl.range<value>
 
   pdl.pattern : benefit(1) {
-    %types = pdl.types
-    %inputOp = pdl.operation -> (%types : !pdl.range<type>)
-    %results = pdl.results of %inputOp
+    %types = types
+    %inputOp = operation -> (%types : !pdl.range<type>)
+    %results = results of %inputOp
 
-    %root = pdl.operation(%results : !pdl.range<value>)
-    pdl.rewrite %root with "rewriter"
+    %root = operation(%results : !pdl.range<value>)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -292,14 +292,14 @@ module @switch_single_result_type {
   // CHECK:   %[[RESULT_TYPE:.*]] = pdl_interp.get_value_type of %[[RESULT]]
   // CHECK:   pdl_interp.switch_type %[[RESULT_TYPE]] to [i32, i64]
   pdl.pattern : benefit(1) {
-    %type = pdl.type : i32
-    %root = pdl.operation -> (%type : !pdl.type)
-    pdl.rewrite %root with "rewriter"
+    %type = type : i32
+    %root = operation -> (%type : !pdl.type)
+    rewrite %root with "rewriter"
   }
   pdl.pattern : benefit(1) {
-    %type = pdl.type : i64
-    %root = pdl.operation -> (%type : !pdl.type)
-    pdl.rewrite %root with "rewriter"
+    %type = type : i64
+    %root = operation -> (%type : !pdl.type)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -312,14 +312,14 @@ module @switch_result_types {
   // CHECK:   %[[RESULT_TYPES:.*]] = pdl_interp.get_value_type of %[[RESULTS]]
   // CHECK:   pdl_interp.switch_types %[[RESULT_TYPES]] to {{\[\[}}i32], [i64, i32]]
   pdl.pattern : benefit(1) {
-    %types = pdl.types : [i32]
-    %root = pdl.operation -> (%types : !pdl.range<type>)
-    pdl.rewrite %root with "rewriter"
+    %types = types : [i32]
+    %root = operation -> (%types : !pdl.range<type>)
+    rewrite %root with "rewriter"
   }
   pdl.pattern : benefit(1) {
-    %types = pdl.types : [i64, i32]
-    %root = pdl.operation -> (%types : !pdl.range<type>)
-    pdl.rewrite %root with "rewriter"
+    %types = types : [i64, i32]
+    %root = operation -> (%types : !pdl.range<type>)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -337,17 +337,17 @@ module @switch_operand_count_at_least {
   // CHECK: ^[[PATTERN_1_NEXT_BLOCK]]:
   // CHECK-NEXT: {{.*}} -> ^{{.*}}, ^bb2
   pdl.pattern : benefit(1) {
-    %operand = pdl.operand
-    %operands = pdl.operands
-    %root = pdl.operation(%operand, %operands : !pdl.value, !pdl.range<value>)
-    pdl.rewrite %root with "rewriter"
+    %operand = operand
+    %operands = operands
+    %root = operation(%operand, %operands : !pdl.value, !pdl.range<value>)
+    rewrite %root with "rewriter"
   }
   pdl.pattern : benefit(1) {
-    %operand = pdl.operand
-    %operand2 = pdl.operand
-    %operands = pdl.operands
-    %root = pdl.operation(%operand, %operand2, %operands : !pdl.value, !pdl.value, !pdl.range<value>)
-    pdl.rewrite %root with "rewriter"
+    %operand = operand
+    %operand2 = operand
+    %operands = operands
+    %root = operation(%operand, %operand2, %operands : !pdl.value, !pdl.value, !pdl.range<value>)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -366,17 +366,17 @@ module @switch_result_count_at_least {
   // CHECK-NEXT: pdl_interp.get_result
   // CHECK-NEXT: pdl_interp.is_not_null {{.*}} -> ^{{.*}}, ^[[PATTERN_2_BLOCK]]
   pdl.pattern : benefit(1) {
-    %type = pdl.type
-    %types = pdl.types
-    %root = pdl.operation -> (%type, %types : !pdl.type, !pdl.range<type>)
-    pdl.rewrite %root with "rewriter"
+    %type = type
+    %types = types
+    %root = operation -> (%type, %types : !pdl.type, !pdl.range<type>)
+    rewrite %root with "rewriter"
   }
   pdl.pattern : benefit(1) {
-    %type = pdl.type
-    %type2 = pdl.type
-    %types = pdl.types
-    %root = pdl.operation -> (%type, %type2, %types : !pdl.type, !pdl.type, !pdl.range<type>)
-    pdl.rewrite %root with "rewriter"
+    %type = type
+    %type2 = type
+    %types = types
+    %root = operation -> (%type, %type2, %types : !pdl.type, !pdl.type, !pdl.range<type>)
+    rewrite %root with "rewriter"
   }
 }
 
@@ -396,16 +396,16 @@ module @predicate_ordering {
   // CHECK: pdl_interp.apply_constraint "typeConstraint" [](%[[RESULT_TYPE]]
 
   pdl.pattern : benefit(1) {
-    %resultType = pdl.type
+    %resultType = type
     pdl.apply_native_constraint "typeConstraint"[](%resultType : !pdl.type)
-    %root = pdl.operation -> (%resultType : !pdl.type)
-    pdl.rewrite %root with "rewriter"
+    %root = operation -> (%resultType : !pdl.type)
+    rewrite %root with "rewriter"
   }
 
   pdl.pattern : benefit(1) {
-    %resultType = pdl.type
-    %apply = pdl.operation -> (%resultType : !pdl.type)
-    pdl.rewrite %apply with "rewriter"
+    %resultType = type
+    %apply = operation -> (%resultType : !pdl.type)
+    rewrite %apply with "rewriter"
   }
 }
 
@@ -435,16 +435,16 @@ module @multi_root {
   // CHECK-DAG:   pdl_interp.is_not_null %[[ROOT2]] : !pdl.operation
 
   pdl.pattern @rewrite_multi_root : benefit(1) {
-    %input1 = pdl.operand
-    %input2 = pdl.operand
-    %type = pdl.type
-    %op1 = pdl.operation(%input1 : !pdl.value) -> (%type : !pdl.type)
-    %val1 = pdl.result 0 of %op1
-    %root1 = pdl.operation(%val1 : !pdl.value)
-    %op2 = pdl.operation(%input2 : !pdl.value) -> (%type : !pdl.type)
-    %val2 = pdl.result 0 of %op2
-    %root2 = pdl.operation(%val1, %val2 : !pdl.value, !pdl.value)
-    pdl.rewrite %root1 with "rewriter"(%root2 : !pdl.operation)
+    %input1 = operand
+    %input2 = operand
+    %type = type
+    %op1 = operation(%input1 : !pdl.value) -> (%type : !pdl.type)
+    %val1 = result 0 of %op1
+    %root1 = operation(%val1 : !pdl.value)
+    %op2 = operation(%input2 : !pdl.value) -> (%type : !pdl.type)
+    %val2 = result 0 of %op2
+    %root2 = operation(%val1, %val2 : !pdl.value, !pdl.value)
+    rewrite %root1 with "rewriter"(%root2 : !pdl.operation)
   }
 }
 
@@ -467,13 +467,13 @@ module @overlapping_roots {
   // CHECK-DAG: pdl_interp.is_not_null %[[INPUT2]] : !pdl.value
 
   pdl.pattern @rewrite_overlapping_roots : benefit(1) {
-    %input1 = pdl.operand
-    %input2 = pdl.operand
-    %type = pdl.type
-    %op = pdl.operation(%input1, %input2 : !pdl.value, !pdl.value) -> (%type : !pdl.type)
-    %val = pdl.result 0 of %op
-    %root = pdl.operation(%val : !pdl.value)
-    pdl.rewrite with "rewriter"(%root : !pdl.operation)
+    %input1 = operand
+    %input2 = operand
+    %type = type
+    %op = operation(%input1, %input2 : !pdl.value, !pdl.value) -> (%type : !pdl.type)
+    %val = result 0 of %op
+    %root = operation(%val : !pdl.value)
+    rewrite with "rewriter"(%root : !pdl.operation)
   }
 }
 
@@ -499,13 +499,13 @@ module @force_overlapped_root {
   // CHECK-DAG:   pdl_interp.check_operand_count of %[[OP]] is 1
 
   pdl.pattern @rewrite_forced_overlapped_root : benefit(1) {
-    %input1 = pdl.operand
-    %input2 = pdl.operand
-    %type = pdl.type
-    %root = pdl.operation(%input1, %input2 : !pdl.value, !pdl.value) -> (%type : !pdl.type)
-    %val = pdl.result 0 of %root
-    %op = pdl.operation(%val : !pdl.value)
-    pdl.rewrite %root with "rewriter"(%op : !pdl.operation)
+    %input1 = operand
+    %input2 = operand
+    %type = type
+    %root = operation(%input1, %input2 : !pdl.value, !pdl.value) -> (%type : !pdl.type)
+    %val = result 0 of %root
+    %op = operation(%val : !pdl.value)
+    rewrite %root with "rewriter"(%op : !pdl.operation)
   }
 }
 
@@ -527,11 +527,11 @@ module @variadic_results_all {
   // CHECK-DAG:   pdl_interp.is_not_null %[[OP]]
   // CHECK-DAG:   pdl_interp.check_result_count of %[[OP]] is 0
   pdl.pattern @variadic_results_all : benefit(1) {
-    %types = pdl.types
-    %root = pdl.operation -> (%types : !pdl.range<type>)
-    %vals = pdl.results of %root
-    %op = pdl.operation(%vals : !pdl.range<value>)
-    pdl.rewrite %root with "rewriter"(%op : !pdl.operation)
+    %types = types
+    %root = operation -> (%types : !pdl.range<type>)
+    %vals = results of %root
+    %op = operation(%vals : !pdl.range<value>)
+    rewrite %root with "rewriter"(%op : !pdl.operation)
   }
 }
 
@@ -562,14 +562,14 @@ module @variadic_results_at {
   // CHECK-DAG:   pdl_interp.check_operand_count of %[[OP]] is 0
   // CHECK-DAG:   pdl_interp.check_result_count of %[[OP]] is at_least 1
   pdl.pattern @variadic_results_at : benefit(1) {
-    %type = pdl.type
-    %types = pdl.types
-    %val = pdl.operand
-    %op = pdl.operation -> (%types, %type : !pdl.range<type>, !pdl.type)
-    %vals = pdl.results 0 of %op -> !pdl.range<value>
-    %root1 = pdl.operation(%vals, %val : !pdl.range<value>, !pdl.value)
-    %root2 = pdl.operation(%val, %vals : !pdl.value, !pdl.range<value>)
-    pdl.rewrite with "rewriter"(%root1, %root2 : !pdl.operation, !pdl.operation)
+    %type = type
+    %types = types
+    %val = operand
+    %op = operation -> (%types, %type : !pdl.range<type>, !pdl.type)
+    %vals = results 0 of %op -> !pdl.range<value>
+    %root1 = operation(%vals, %val : !pdl.range<value>, !pdl.value)
+    %root2 = operation(%val, %vals : !pdl.value, !pdl.range<value>)
+    rewrite with "rewriter"(%root1, %root2 : !pdl.operation, !pdl.operation)
   }
 }
 
@@ -583,11 +583,11 @@ module @attribute_literal {
 
   // Check the correct lowering of an attribute that hasn't been bound.
   pdl.pattern : benefit(1) {
-    %attr = pdl.attribute 10
+    %attr = attribute 10
     pdl.apply_native_constraint "constraint"(%attr: !pdl.attribute)
 
-    %root = pdl.operation
-    pdl.rewrite %root with "rewriter"
+    %root = operation
+    rewrite %root with "rewriter"
   }
 }
 
@@ -602,12 +602,12 @@ module @type_literal {
 
   // Check the correct lowering of a type that hasn't been bound.
   pdl.pattern : benefit(1) {
-    %type = pdl.type : i32
-    %types = pdl.types : [i32, i64]
+    %type = type : i32
+    %types = types : [i32, i64]
     pdl.apply_native_constraint "constraint"(%type, %types: !pdl.type, !pdl.range<type>)
 
-    %root = pdl.operation
-    pdl.rewrite %root with "rewriter"
+    %root = operation
+    rewrite %root with "rewriter"
   }
 }
 
@@ -638,16 +638,16 @@ module @common_connector {
   // CHECK-DAG:     pdl_interp.are_equal %[[ROOTB_OP]], %[[VAL0]] : !pdl.value
   // CHECK-DAG    } -> ^[[CONTA:.*]]
   pdl.pattern @common_connector : benefit(1) {
-      %type = pdl.type
-      %op = pdl.operation -> (%type, %type : !pdl.type, !pdl.type)
-      %val0 = pdl.result 0 of %op
-      %val1 = pdl.result 1 of %op
-      %rootA = pdl.operation (%val0 : !pdl.value)
-      %rootB = pdl.operation (%val0 : !pdl.value)
-      %inter = pdl.operation (%val1 : !pdl.value) -> (%type : !pdl.type)
-      %val2 = pdl.result 0 of %inter
-      %rootC = pdl.operation (%val2 : !pdl.value)
-      pdl.rewrite with "rewriter"(%rootA, %rootB, %rootC : !pdl.operation, !pdl.operation, !pdl.operation)
+      %type = type
+      %op = operation -> (%type, %type : !pdl.type, !pdl.type)
+      %val0 = result 0 of %op
+      %val1 = result 1 of %op
+      %rootA = operation (%val0 : !pdl.value)
+      %rootB = operation (%val0 : !pdl.value)
+      %inter = operation (%val1 : !pdl.value) -> (%type : !pdl.type)
+      %val2 = result 0 of %inter
+      %rootC = operation (%val2 : !pdl.value)
+      rewrite with "rewriter"(%rootA, %rootB, %rootC : !pdl.operation, !pdl.operation, !pdl.operation)
   }
 }
 
@@ -679,15 +679,15 @@ module @common_connector_range {
   // CHECK-DAG:     pdl_interp.are_equal %[[ROOTB_OPS]], %[[VALS0]] : !pdl.range<value>
   // CHECK-DAG    } -> ^[[CONTA:.*]]
   pdl.pattern @common_connector_range : benefit(1) {
-    %types = pdl.types
-    %op = pdl.operation -> (%types, %types : !pdl.range<type>, !pdl.range<type>)
-    %vals0 = pdl.results 0 of %op -> !pdl.range<value>
-    %vals1 = pdl.results 1 of %op -> !pdl.range<value>
-    %rootA = pdl.operation (%vals0 : !pdl.range<value>)
-    %rootB = pdl.operation (%vals0 : !pdl.range<value>)
-    %inter = pdl.operation (%vals1 : !pdl.range<value>) -> (%types : !pdl.range<type>)
-    %vals2 = pdl.results of %inter
-    %rootC = pdl.operation (%vals2 : !pdl.range<value>)
-    pdl.rewrite with "rewriter"(%rootA, %rootB, %rootC : !pdl.operation, !pdl.operation, !pdl.operation)
+    %types = types
+    %op = operation -> (%types, %types : !pdl.range<type>, !pdl.range<type>)
+    %vals0 = results 0 of %op -> !pdl.range<value>
+    %vals1 = results 1 of %op -> !pdl.range<value>
+    %rootA = operation (%vals0 : !pdl.range<value>)
+    %rootB = operation (%vals0 : !pdl.range<value>)
+    %inter = operation (%vals1 : !pdl.range<value>) -> (%types : !pdl.range<type>)
+    %vals2 = results of %inter
+    %rootC = operation (%vals2 : !pdl.range<value>)
+    rewrite with "rewriter"(%rootA, %rootB, %rootC : !pdl.operation, !pdl.operation, !pdl.operation)
   }
 }
diff --git a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
index f9415b3c45802..8ca771f87f657 100644
--- a/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
+++ b/mlir/test/Conversion/PDLToPDLInterp/pdl-to-pdl-interp-rewriter.mlir
@@ -8,9 +8,9 @@ module @external {
   // CHECK:   func @pdl_generated_rewriter(%[[ROOT:.*]]: !pdl.operation, %[[INPUT:.*]]: !pdl.value)
   // CHECK:     pdl_interp.apply_rewrite "rewriter" [true](%[[ROOT]], %[[INPUT]] : !pdl.operation, !pdl.value)
   pdl.pattern : benefit(1) {
-    %input = pdl.operand
-    %root = pdl.operation "foo.op"(%input : !pdl.value)
-    pdl.rewrite %root with "rewriter"[true](%input : !pdl.value)
+    %input = operand
+    %root = operation "foo.op"(%input : !pdl.value)
+    rewrite %root with "rewriter"[true](%input : !pdl.value)
   }
 }
 
@@ -23,9 +23,9 @@ module @erase {
   // CHECK:     pdl_interp.erase %[[ROOT]]
   // CHECK:     pdl_interp.finalize
   pdl.pattern : benefit(1) {
-    %root = pdl.operation "foo.op"
-    pdl.rewrite %root {
-      pdl.erase %root
+    %root = operation "foo.op"
+    rewrite %root {
+      erase %root
     }
   }
 }
@@ -39,12 +39,12 @@ module @operation_attributes {
   // CHECK:     %[[ATTR1:.*]] = pdl_interp.create_attribute true
   // CHECK:     pdl_interp.create_operation "foo.op" {"attr" = %[[ATTR]], "attr1" = %[[ATTR1]]}
   pdl.pattern : benefit(1) {
-    %attr = pdl.attribute
-    %root = pdl.operation "foo.op" {"attr" = %attr}
-    pdl.rewrite %root {
-      %attr1 = pdl.attribute true
-      %newOp = pdl.operation "foo.op" {"attr" = %attr, "attr1" = %attr1}
-      pdl.erase %root
+    %attr = attribute
+    %root = operation "foo.op" {"attr" = %attr}
+    rewrite %root {
+      %attr1 = attribute true
+      %newOp = operation "foo.op" {"attr" = %attr, "attr1" = %attr1}
+      erase %root
     }
   }
 }
@@ -59,14 +59,14 @@ module @operation_operands {
   // CHECK:     %[[OPERAND1:.*]] = pdl_interp.get_result 0 of %[[NEWOP]]
   // CHECK:     pdl_interp.create_operation "foo.op2"(%[[OPERAND1]] : !pdl.value)
   pdl.pattern : benefit(1) {
-    %operand = pdl.operand
-    %root = pdl.operation "foo.op"(%operand : !pdl.value)
-    pdl.rewrite %root {
-      %type = pdl.type : i32
-      %newOp = pdl.operation "foo.op"(%operand : !pdl.value) -> (%type : !pdl.type)
-      %result = pdl.result 0 of %newOp
-      %newOp1 = pdl.operation "foo.op2"(%result : !pdl.value)
-      pdl.erase %root
+    %operand = operand
+    %root = operation "foo.op"(%operand : !pdl.value)
+    rewrite %root {
+      %type = type : i32
+      %newOp = operation "foo.op"(%operand : !pdl.value) -> (%type : !pdl.type)
+      %result = result 0 of %newOp
+      %newOp1 = operation "foo.op2"(%result : !pdl.value)
+      erase %root
     }
   }
 }
@@ -81,13 +81,13 @@ module @operation_infer_types_from_replaceop {
   // CHECK:     %[[RESULT_TYPES:.*]] = pdl_interp.get_value_type of %[[RESULTS]]
   // CHECK:     pdl_interp.create_operation "foo.op" -> (%[[RESULT_TYPES]] : !pdl.range<type>)
   pdl.pattern : benefit(1) {
-    %rootType = pdl.type
-    %rootType1 = pdl.type
-    %root = pdl.operation "foo.op" -> (%rootType, %rootType1 : !pdl.type, !pdl.type)
-    pdl.rewrite %root {
-      %newType1 = pdl.type
-      %newOp = pdl.operation "foo.op" -> (%rootType, %newType1 : !pdl.type, !pdl.type)
-      pdl.replace %root with %newOp
+    %rootType = type
+    %rootType1 = type
+    %root = operation "foo.op" -> (%rootType, %rootType1 : !pdl.type, !pdl.type)
+    rewrite %root {
+      %newType1 = type
+      %newOp = operation "foo.op" -> (%rootType, %newType1 : !pdl.type, !pdl.type)
+      replace %root with %newOp
     }
   }
 }
@@ -100,11 +100,11 @@ module @operation_infer_types_from_otherop_individual_results {
   // CHECK:   func @pdl_generated_rewriter(%[[TYPE:.*]]: !pdl.type, %[[TYPES:.*]]: !pdl.range<type>
   // CHECK:     pdl_interp.create_operation "foo.op" -> (%[[TYPE]], %[[TYPES]] : !pdl.type, !pdl.range<type>)
   pdl.pattern : benefit(1) {
-    %rootType = pdl.type
-    %rootTypes = pdl.types
-    %root = pdl.operation "foo.op" -> (%rootType, %rootTypes : !pdl.type, !pdl.range<type>)
-    pdl.rewrite %root {
-      %newOp = pdl.operation "foo.op" -> (%rootType, %rootTypes : !pdl.type, !pdl.range<type>)
+    %rootType = type
+    %rootTypes = types
+    %root = operation "foo.op" -> (%rootType, %rootTypes : !pdl.type, !pdl.range<type>)
+    rewrite %root {
+      %newOp = operation "foo.op" -> (%rootType, %rootTypes : !pdl.type, !pdl.range<type>)
     }
   }
 }
@@ -117,10 +117,10 @@ module @operation_infer_types_from_otherop_results {
   // CHECK:   func @pdl_generated_rewriter(%[[TYPES:.*]]: !pdl.range<type>
   // CHECK:     pdl_interp.create_operation "foo.op" -> (%[[TYPES]] : !pdl.range<type>)
   pdl.pattern : benefit(1) {
-    %rootTypes = pdl.types
-    %root = pdl.operation "foo.op" -> (%rootTypes : !pdl.range<type>)
-    pdl.rewrite %root {
-      %newOp = pdl.operation "foo.op" -> (%rootTypes : !pdl.range<type>)
+    %rootTypes = types
+    %root = operation "foo.op" -> (%rootTypes : !pdl.range<type>)
+    rewrite %root {
+      %newOp = operation "foo.op" -> (%rootTypes : !pdl.range<type>)
     }
   }
 }
@@ -135,11 +135,11 @@ module @replace_with_op {
   // CHECK:     %[[RESULTS:.*]] = pdl_interp.get_results of %[[NEWOP]]
   // CHECK:     pdl_interp.replace %[[ROOT]] with (%[[RESULTS]] : !pdl.range<value>)
   pdl.pattern : benefit(1) {
-    %type = pdl.type : i32
-    %root = pdl.operation "foo.op" -> (%type : !pdl.type)
-    pdl.rewrite %root {
-      %newOp = pdl.operation "foo.op" -> (%type : !pdl.type)
-      pdl.replace %root with %newOp
+    %type = type : i32
+    %root = operation "foo.op" -> (%type : !pdl.type)
+    rewrite %root {
+      %newOp = operation "foo.op" -> (%type : !pdl.type)
+      replace %root with %newOp
     }
   }
 }
@@ -156,14 +156,14 @@ module @replace_with_values {
   // CHECK:     %[[RESULTS_2:.*]] = pdl_interp.get_results 2 of %[[NEWOP]] : !pdl.value
   // CHECK:     pdl_interp.replace %[[ROOT]] with (%[[RESULT]], %[[RESULTS]], %[[RESULTS_2]] : !pdl.value, !pdl.range<value>, !pdl.value)
   pdl.pattern : benefit(1) {
-    %types = pdl.types
-    %root = pdl.operation "foo.op" -> (%types : !pdl.range<type>)
-    pdl.rewrite %root {
-      %newOp = pdl.operation "foo.op" -> (%types : !pdl.range<type>)
-      %newResult = pdl.result 0 of %newOp
-      %newResults = pdl.results 1 of %newOp -> !pdl.range<value>
-      %newResults2 = pdl.results 2 of %newOp -> !pdl.value
-      pdl.replace %root with (%newResult, %newResults, %newResults2 : !pdl.value, !pdl.range<value>, !pdl.value)
+    %types = types
+    %root = operation "foo.op" -> (%types : !pdl.range<type>)
+    rewrite %root {
+      %newOp = operation "foo.op" -> (%types : !pdl.range<type>)
+      %newResult = result 0 of %newOp
+      %newResults = results 1 of %newOp -> !pdl.range<value>
+      %newResults2 = results 2 of %newOp -> !pdl.value
+      replace %root with (%newResult, %newResults, %newResults2 : !pdl.value, !pdl.range<value>, !pdl.value)
     }
   }
 }
@@ -177,10 +177,10 @@ module @replace_with_no_results {
   // CHECK:     pdl_interp.create_operation "foo.op"
   // CHECK:     pdl_interp.erase %[[ROOT]]
   pdl.pattern : benefit(1) {
-    %root = pdl.operation "foo.op"
-    pdl.rewrite %root {
-      %newOp = pdl.operation "foo.op"
-      pdl.replace %root with %newOp
+    %root = operation "foo.op"
+    rewrite %root {
+      %newOp = operation "foo.op"
+      replace %root with %newOp
     }
   }
 }
@@ -194,11 +194,11 @@ module @apply_native_rewrite {
   // CHECK:     %[[TYPE:.*]] = pdl_interp.apply_rewrite "functor" [true](%[[ROOT]] : !pdl.operation) : !pdl.type
   // CHECK:     pdl_interp.create_operation "foo.op" -> (%[[TYPE]] : !pdl.type)
   pdl.pattern : benefit(1) {
-    %type = pdl.type
-    %root = pdl.operation "foo.op" -> (%type : !pdl.type)
-    pdl.rewrite %root {
-      %newType = pdl.apply_native_rewrite "functor"[true](%root : !pdl.operation) : !pdl.type
-      %newOp = pdl.operation "foo.op" -> (%newType : !pdl.type)
+    %type = type
+    %root = operation "foo.op" -> (%type : !pdl.type)
+    rewrite %root {
+      %newType = apply_native_rewrite "functor"[true](%root : !pdl.operation) : !pdl.type
+      %newOp = operation "foo.op" -> (%newType : !pdl.type)
     }
   }
 }
@@ -212,9 +212,9 @@ module @unbound_rewrite_op {
   // CHECK:     %[[UNUSED:.*]] = pdl_interp.create_operation "bar.op"
   // CHECK:     pdl_interp.finalize
   pdl.pattern : benefit(1) {
-    %root = pdl.operation "foo.op"
-    pdl.rewrite %root {
-      %unused = pdl.operation "bar.op"
+    %root = operation "foo.op"
+    rewrite %root {
+      %unused = operation "bar.op"
     }
   }
 }
diff --git a/mlir/test/Dialect/PDL/canonicalize.mlir b/mlir/test/Dialect/PDL/canonicalize.mlir
index 5cb08acd884e3..94688a281625b 100644
--- a/mlir/test/Dialect/PDL/canonicalize.mlir
+++ b/mlir/test/Dialect/PDL/canonicalize.mlir
@@ -1,10 +1,10 @@
 // RUN: mlir-opt -canonicalize %s | FileCheck %s
 
 pdl.pattern @operation_op : benefit(1) {
-  %root = pdl.operation "foo.op"
-  pdl.rewrite %root {
-    // CHECK: pdl.operation "bar.unused"
-    %unused_rewrite = pdl.operation "bar.unused"
-    pdl.erase %root
+  %root = operation "foo.op"
+  rewrite %root {
+    // CHECK: operation "bar.unused"
+    %unused_rewrite = operation "bar.unused"
+    erase %root
   }
 }
diff --git a/mlir/test/Dialect/PDL/invalid.mlir b/mlir/test/Dialect/PDL/invalid.mlir
index 17b7370292b0c..2c630269f80d4 100644
--- a/mlir/test/Dialect/PDL/invalid.mlir
+++ b/mlir/test/Dialect/PDL/invalid.mlir
@@ -5,11 +5,11 @@
 //===----------------------------------------------------------------------===//
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
+  %op = operation "foo.op"
 
   // expected-error@below {{expected at least one argument}}
   "pdl.apply_native_constraint"() {name = "foo", params = []} : () -> ()
-  pdl.rewrite %op with "rewriter"
+  rewrite %op with "rewriter"
 }
 
 // -----
@@ -19,8 +19,8 @@ pdl.pattern : benefit(1) {
 //===----------------------------------------------------------------------===//
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op {
+  %op = operation "foo.op"
+  rewrite %op {
     // expected-error@below {{expected at least one argument}}
     "pdl.apply_native_rewrite"() {name = "foo", params = []} : () -> ()
   }
@@ -33,34 +33,34 @@ pdl.pattern : benefit(1) {
 //===----------------------------------------------------------------------===//
 
 pdl.pattern : benefit(1) {
-  %type = pdl.type
+  %type = type
 
   // expected-error@below {{expected only one of [`type`, `value`] to be set}}
-  %attr = pdl.attribute : %type 10
+  %attr = attribute : %type 10
 
-  %op = pdl.operation "foo.op" {"attr" = %attr} -> (%type : !pdl.type)
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op" {"attr" = %attr} -> (%type : !pdl.type)
+  rewrite %op with "rewriter"
 }
 
 // -----
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op {
-    %type = pdl.type
+  %op = operation "foo.op"
+  rewrite %op {
+    %type = type
 
     // expected-error@below {{expected constant value when specified within a `pdl.rewrite`}}
-    %attr = pdl.attribute : %type
+    %attr = attribute : %type
   }
 }
 
 // -----
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op {
+  %op = operation "foo.op"
+  rewrite %op {
     // expected-error@below {{expected constant value when specified within a `pdl.rewrite`}}
-    %attr = pdl.attribute
+    %attr = attribute
   }
 }
 
@@ -68,10 +68,10 @@ pdl.pattern : benefit(1) {
 
 pdl.pattern : benefit(1) {
   // expected-error@below {{expected a bindable user when defined in the matcher body of a `pdl.pattern`}}
-  %unused = pdl.attribute
+  %unused = attribute
 
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op"
+  rewrite %op with "rewriter"
 }
 
 // -----
@@ -82,10 +82,10 @@ pdl.pattern : benefit(1) {
 
 pdl.pattern : benefit(1) {
   // expected-error@below {{expected a bindable user when defined in the matcher body of a `pdl.pattern`}}
-  %unused = pdl.operand
+  %unused = operand
 
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op"
+  rewrite %op with "rewriter"
 }
 
 // -----
@@ -96,10 +96,10 @@ pdl.pattern : benefit(1) {
 
 pdl.pattern : benefit(1) {
   // expected-error@below {{expected a bindable user when defined in the matcher body of a `pdl.pattern`}}
-  %unused = pdl.operands
+  %unused = operands
 
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op"
+  rewrite %op with "rewriter"
 }
 
 // -----
@@ -109,10 +109,10 @@ pdl.pattern : benefit(1) {
 //===----------------------------------------------------------------------===//
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op {
+  %op = operation "foo.op"
+  rewrite %op {
     // expected-error@below {{must have an operation name when nested within a `pdl.rewrite`}}
-    %newOp = pdl.operation
+    %newOp = operation
   }
 }
 
@@ -124,19 +124,19 @@ pdl.pattern : benefit(1) {
     attributeNames = ["attr"],
     operand_segment_sizes = dense<0> : vector<3xi32>
   } : () -> (!pdl.operation)
-  pdl.rewrite %op with "rewriter"
+  rewrite %op with "rewriter"
 }
 
 // -----
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op {
-    %type = pdl.type
+  %op = operation "foo.op"
+  rewrite %op {
+    %type = type
 
     // expected-error@below {{op must have inferable or constrained result types when nested within `pdl.rewrite`}}
     // expected-note@below {{result type #0 was not constrained}}
-    %newOp = pdl.operation "foo.op" -> (%type : !pdl.type)
+    %newOp = operation "foo.op" -> (%type : !pdl.type)
   }
 }
 
@@ -144,10 +144,10 @@ pdl.pattern : benefit(1) {
 
 pdl.pattern : benefit(1) {
   // expected-error@below {{expected a bindable user when defined in the matcher body of a `pdl.pattern`}}
-  %unused = pdl.operation "foo.op"
+  %unused = operation "foo.op"
 
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op"
+  rewrite %op with "rewriter"
 }
 
 // -----
@@ -166,7 +166,7 @@ pdl.pattern : benefit(1) {
 
 // expected-error@below {{the pattern must contain at least one `pdl.operation`}}
 pdl.pattern : benefit(1) {
-  pdl.rewrite with "foo"
+  rewrite with "foo"
 }
 
 // -----
@@ -175,44 +175,44 @@ pdl.pattern : benefit(1) {
   // expected-note@below {{see non-`pdl` operation defined here}}
   "test.foo.other_op"() : () -> ()
 
-  %root = pdl.operation "foo.op"
-  pdl.rewrite %root with "foo"
+  %root = operation "foo.op"
+  rewrite %root with "foo"
 }
 
 // -----
 // expected-error@below {{the operations must form a connected component}}
 pdl.pattern : benefit(1) {
-  %op1 = pdl.operation "foo.op"
-  %op2 = pdl.operation "bar.op"
+  %op1 = operation "foo.op"
+  %op2 = operation "bar.op"
   // expected-note@below {{see a disconnected value / operation here}}
-  %val = pdl.result 0 of %op2
-  pdl.rewrite %op1 with "foo"(%val : !pdl.value)
+  %val = result 0 of %op2
+  rewrite %op1 with "foo"(%val : !pdl.value)
 }
 
 // -----
 // expected-error@below {{the operations must form a connected component}}
 pdl.pattern : benefit(1) {
-  %type = pdl.type
-  %op1 = pdl.operation "foo.op" -> (%type : !pdl.type)
-  %val = pdl.result 0 of %op1
-  %op2 = pdl.operation "bar.op"(%val : !pdl.value)
+  %type = type
+  %op1 = operation "foo.op" -> (%type : !pdl.type)
+  %val = result 0 of %op1
+  %op2 = operation "bar.op"(%val : !pdl.value)
   // expected-note@below {{see a disconnected value / operation here}}
-  %op3 = pdl.operation "baz.op"
-  pdl.rewrite {
-    pdl.erase %op1
-    pdl.erase %op2
-    pdl.erase %op3
+  %op3 = operation "baz.op"
+  rewrite {
+    erase %op1
+    erase %op2
+    erase %op3
   }
 }
 
 // -----
 
 pdl.pattern : benefit(1) {
-  %type = pdl.type : i32
-  %root = pdl.operation "foo.op" -> (%type : !pdl.type)
-  pdl.rewrite %root {
-    %newOp = pdl.operation "foo.op" -> (%type : !pdl.type)
-    %newResult = pdl.result 0 of %newOp
+  %type = type : i32
+  %root = operation "foo.op" -> (%type : !pdl.type)
+  rewrite %root {
+    %newOp = operation "foo.op" -> (%type : !pdl.type)
+    %newResult = result 0 of %newOp
 
     // expected-error@below {{expected no replacement values to be provided when the replacement operation is present}}
     "pdl.replace"(%root, %newOp, %newResult) {
@@ -228,10 +228,10 @@ pdl.pattern : benefit(1) {
 //===----------------------------------------------------------------------===//
 
 pdl.pattern : benefit(1) {
-  %root = pdl.operation "foo.op"
+  %root = operation "foo.op"
   // expected-error@below {{expected `pdl.range<value>` result type when no index is specified, but got: '!pdl.value'}}
   %results = "pdl.results"(%root) : (!pdl.operation) -> !pdl.value
-  pdl.rewrite %root with "rewriter"
+  rewrite %root with "rewriter"
 }
 
 // -----
@@ -241,7 +241,7 @@ pdl.pattern : benefit(1) {
 //===----------------------------------------------------------------------===//
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
+  %op = operation "foo.op"
 
   // expected-error@below {{expected rewrite region to be non-empty if external name is not specified}}
   "pdl.rewrite"(%op) ({}) {
@@ -252,7 +252,7 @@ pdl.pattern : benefit(1) {
 // -----
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
+  %op = operation "foo.op"
 
   // expected-error@below {{expected no external arguments when the rewrite is specified inline}}
   "pdl.rewrite"(%op, %op) ({
@@ -265,7 +265,7 @@ pdl.pattern : benefit(1) {
 // -----
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
+  %op = operation "foo.op"
 
   // expected-error@below {{expected no external constant parameters when the rewrite is specified inline}}
   "pdl.rewrite"(%op) ({
@@ -278,7 +278,7 @@ pdl.pattern : benefit(1) {
 // -----
 
 pdl.pattern : benefit(1) {
-  %op = pdl.operation "foo.op"
+  %op = operation "foo.op"
 
   // expected-error@below {{expected rewrite region to be empty when rewrite is external}}
   "pdl.rewrite"(%op) ({
@@ -297,10 +297,10 @@ pdl.pattern : benefit(1) {
 
 pdl.pattern : benefit(1) {
   // expected-error@below {{expected a bindable user when defined in the matcher body of a `pdl.pattern`}}
-  %unused = pdl.type
+  %unused = type
 
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op"
+  rewrite %op with "rewriter"
 }
 
 // -----
@@ -311,8 +311,8 @@ pdl.pattern : benefit(1) {
 
 pdl.pattern : benefit(1) {
   // expected-error@below {{expected a bindable user when defined in the matcher body of a `pdl.pattern`}}
-  %unused = pdl.types
+  %unused = types
 
-  %op = pdl.operation "foo.op"
-  pdl.rewrite %op with "rewriter"
+  %op = operation "foo.op"
+  rewrite %op with "rewriter"
 }
diff --git a/mlir/test/Dialect/PDL/ops.mlir b/mlir/test/Dialect/PDL/ops.mlir
index 9c7daf46a0907..1e2261a3c2a44 100644
--- a/mlir/test/Dialect/PDL/ops.mlir
+++ b/mlir/test/Dialect/PDL/ops.mlir
@@ -6,68 +6,68 @@
 
 pdl.pattern @operations : benefit(1) {
   // Operation with attributes and results.
-  %attribute = pdl.attribute
-  %type = pdl.type
-  %op0 = pdl.operation {"attr" = %attribute} -> (%type : !pdl.type)
+  %attribute = attribute
+  %type = type
+  %op0 = operation {"attr" = %attribute} -> (%type : !pdl.type)
   %op0_result = pdl.result 0 of %op0
 
   // Operation with input.
-  %input = pdl.operand
-  %root = pdl.operation(%op0_result, %input : !pdl.value, !pdl.value)
-  pdl.rewrite %root with "rewriter"
+  %input = operand
+  %root = operation(%op0_result, %input : !pdl.value, !pdl.value)
+  rewrite %root with "rewriter"
 }
 
 // -----
 
 pdl.pattern @rewrite_with_args : benefit(1) {
-  %input = pdl.operand
-  %root = pdl.operation(%input : !pdl.value)
-  pdl.rewrite %root with "rewriter"(%input : !pdl.value)
+  %input = operand
+  %root = operation(%input : !pdl.value)
+  rewrite %root with "rewriter"(%input : !pdl.value)
 }
 
 // -----
 
 pdl.pattern @rewrite_with_params : benefit(1) {
-  %root = pdl.operation
-  pdl.rewrite %root with "rewriter"["I am param"]
+  %root = operation
+  rewrite %root with "rewriter"["I am param"]
 }
 
 // -----
 
 pdl.pattern @rewrite_with_args_and_params : benefit(1) {
-  %input = pdl.operand
-  %root = pdl.operation(%input : !pdl.value)
-  pdl.rewrite %root with "rewriter"["I am param"](%input : !pdl.value)
+  %input = operand
+  %root = operation(%input : !pdl.value)
+  rewrite %root with "rewriter"["I am param"](%input : !pdl.value)
 }
 
 // -----
 
 pdl.pattern @rewrite_multi_root_optimal : benefit(2) {
-  %input1 = pdl.operand
-  %input2 = pdl.operand
-  %type = pdl.type
-  %op1 = pdl.operation(%input1 : !pdl.value) -> (%type : !pdl.type)
-  %val1 = pdl.result 0 of %op1
-  %root1 = pdl.operation(%val1 : !pdl.value)
-  %op2 = pdl.operation(%input2 : !pdl.value) -> (%type : !pdl.type)
-  %val2 = pdl.result 0 of %op2
-  %root2 = pdl.operation(%val1, %val2 : !pdl.value, !pdl.value)
-  pdl.rewrite with "rewriter"["I am param"](%root1, %root2 : !pdl.operation, !pdl.operation)
+  %input1 = operand
+  %input2 = operand
+  %type = type
+  %op1 = operation(%input1 : !pdl.value) -> (%type : !pdl.type)
+  %val1 = result 0 of %op1
+  %root1 = operation(%val1 : !pdl.value)
+  %op2 = operation(%input2 : !pdl.value) -> (%type : !pdl.type)
+  %val2 = result 0 of %op2
+  %root2 = operation(%val1, %val2 : !pdl.value, !pdl.value)
+  rewrite with "rewriter"["I am param"](%root1, %root2 : !pdl.operation, !pdl.operation)
 }
 
 // -----
 
 pdl.pattern @rewrite_multi_root_forced : benefit(2) {
-  %input1 = pdl.operand
-  %input2 = pdl.operand
-  %type = pdl.type
-  %op1 = pdl.operation(%input1 : !pdl.value) -> (%type : !pdl.type)
-  %val1 = pdl.result 0 of %op1
-  %root1 = pdl.operation(%val1 : !pdl.value)
-  %op2 = pdl.operation(%input2 : !pdl.value) -> (%type : !pdl.type)
-  %val2 = pdl.result 0 of %op2
-  %root2 = pdl.operation(%val1, %val2 : !pdl.value, !pdl.value)
-  pdl.rewrite %root1 with "rewriter"["I am param"](%root2 : !pdl.operation)
+  %input1 = operand
+  %input2 = operand
+  %type = type
+  %op1 = operation(%input1 : !pdl.value) -> (%type : !pdl.type)
+  %val1 = result 0 of %op1
+  %root1 = operation(%val1 : !pdl.value)
+  %op2 = operation(%input2 : !pdl.value) -> (%type : !pdl.type)
+  %val2 = result 0 of %op2
+  %root2 = operation(%val1, %val2 : !pdl.value, !pdl.value)
+  rewrite %root1 with "rewriter"["I am param"](%root2 : !pdl.operation)
 }
 
 // -----
@@ -75,13 +75,13 @@ pdl.pattern @rewrite_multi_root_forced : benefit(2) {
 // Check that the result type of an operation within a rewrite can be inferred
 // from a pdl.replace.
 pdl.pattern @infer_type_from_operation_replace : benefit(1) {
-  %type1 = pdl.type : i32
-  %type2 = pdl.type
-  %root = pdl.operation -> (%type1, %type2 : !pdl.type, !pdl.type)
-  pdl.rewrite %root {
-    %type3 = pdl.type
-    %newOp = pdl.operation "foo.op" -> (%type1, %type3 : !pdl.type, !pdl.type)
-    pdl.replace %root with %newOp
+  %type1 = type : i32
+  %type2 = type
+  %root = operation -> (%type1, %type2 : !pdl.type, !pdl.type)
+  rewrite %root {
+    %type3 = type
+    %newOp = operation "foo.op" -> (%type1, %type3 : !pdl.type, !pdl.type)
+    replace %root with %newOp
   }
 }
 
@@ -90,11 +90,11 @@ pdl.pattern @infer_type_from_operation_replace : benefit(1) {
 // Check that the result type of an operation within a rewrite can be inferred
 // from the result types of an operation within the match block.
 pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
-  %type1 = pdl.type : i32
-  %type2 = pdl.type
-  %root = pdl.operation -> (%type1, %type2 : !pdl.type, !pdl.type)
-  pdl.rewrite %root {
-    %newOp = pdl.operation "foo.op" -> (%type1, %type2 : !pdl.type, !pdl.type)
+  %type1 = type : i32
+  %type2 = type
+  %root = operation -> (%type1, %type2 : !pdl.type, !pdl.type)
+  rewrite %root {
+    %newOp = operation "foo.op" -> (%type1, %type2 : !pdl.type, !pdl.type)
   }
 }
 
@@ -103,11 +103,11 @@ pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
 // Check that the result type of an operation within a rewrite can be inferred
 // from the result types of an operation within the match block.
 pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
-  %types = pdl.types
-  %root = pdl.operation -> (%types : !pdl.range<type>)
-  pdl.rewrite %root {
-    %otherTypes = pdl.types : [i32, i64]
-    %newOp = pdl.operation "foo.op" -> (%types, %otherTypes : !pdl.range<type>, !pdl.range<type>)
+  %types = types
+  %root = operation -> (%types : !pdl.range<type>)
+  rewrite %root {
+    %otherTypes = types : [i32, i64]
+    %newOp = operation "foo.op" -> (%types, %otherTypes : !pdl.range<type>, !pdl.range<type>)
   }
 }
 
@@ -116,13 +116,13 @@ pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
 // Check that the result type of an operation within a rewrite can be inferred
 // from the type of an operand within the match block.
 pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
-  %type1 = pdl.type
-  %type2 = pdl.type
-  %operand1 = pdl.operand : %type1
-  %operand2 = pdl.operand : %type2
-  %root = pdl.operation (%operand1, %operand2 : !pdl.value, !pdl.value)
-  pdl.rewrite %root {
-    %newOp = pdl.operation "foo.op" -> (%type1, %type2 : !pdl.type, !pdl.type)
+  %type1 = type
+  %type2 = type
+  %operand1 = operand : %type1
+  %operand2 = operand : %type2
+  %root = operation (%operand1, %operand2 : !pdl.value, !pdl.value)
+  rewrite %root {
+    %newOp = operation "foo.op" -> (%type1, %type2 : !pdl.type, !pdl.type)
   }
 }
 
@@ -131,29 +131,29 @@ pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
 // Check that the result type of an operation within a rewrite can be inferred
 // from the types of operands within the match block.
 pdl.pattern @infer_type_from_type_used_in_match : benefit(1) {
-  %types = pdl.types
-  %operands = pdl.operands : %types
-  %root = pdl.operation (%operands : !pdl.range<value>)
-  pdl.rewrite %root {
-    %newOp = pdl.operation "foo.op" -> (%types : !pdl.range<type>)
+  %types = types
+  %operands = operands : %types
+  %root = operation (%operands : !pdl.range<value>)
+  rewrite %root {
+    %newOp = operation "foo.op" -> (%types : !pdl.range<type>)
   }
 }
 
 // -----
 
 pdl.pattern @apply_rewrite_with_no_results : benefit(1) {
-  %root = pdl.operation
-  pdl.rewrite %root {
-    pdl.apply_native_rewrite "NativeRewrite"(%root : !pdl.operation)
+  %root = operation
+  rewrite %root {
+    apply_native_rewrite "NativeRewrite"(%root : !pdl.operation)
   }
 }
 
 // -----
 
 pdl.pattern @attribute_with_dict : benefit(1) {
-  %root = pdl.operation
-  pdl.rewrite %root {
-    %attr = pdl.attribute {some_unit_attr} attributes {pdl.special_attribute}
-    pdl.apply_native_rewrite "NativeRewrite"(%attr : !pdl.attribute)
+  %root = operation
+  rewrite %root {
+    %attr = attribute {some_unit_attr} attributes {pdl.special_attribute}
+    apply_native_rewrite "NativeRewrite"(%attr : !pdl.attribute)
   }
 }
diff --git a/mlir/test/python/dialects/pdl_ops.py b/mlir/test/python/dialects/pdl_ops.py
index 9b5ce4c533b8e..2388ccadab2e3 100644
--- a/mlir/test/python/dialects/pdl_ops.py
+++ b/mlir/test/python/dialects/pdl_ops.py
@@ -16,13 +16,13 @@ def constructAndPrintInModule(f):
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @operations : benefit(1)  {
-# CHECK:     %0 = pdl.attribute
-# CHECK:     %1 = pdl.type
-# CHECK:     %2 = pdl.operation  {"attr" = %0} -> (%1 : !pdl.type)
-# CHECK:     %3 = pdl.result 0 of %2
-# CHECK:     %4 = pdl.operand
-# CHECK:     %5 = pdl.operation(%3, %4 : !pdl.value, !pdl.value)
-# CHECK:     pdl.rewrite %5 with "rewriter"
+# CHECK:     %0 = attribute
+# CHECK:     %1 = type
+# CHECK:     %2 = operation  {"attr" = %0} -> (%1 : !pdl.type)
+# CHECK:     %3 = result 0 of %2
+# CHECK:     %4 = operand
+# CHECK:     %5 = operation(%3, %4 : !pdl.value, !pdl.value)
+# CHECK:     rewrite %5 with "rewriter"
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -40,9 +40,9 @@ def test_operations():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_with_args : benefit(1)  {
-# CHECK:     %0 = pdl.operand
-# CHECK:     %1 = pdl.operation(%0 : !pdl.value)
-# CHECK:     pdl.rewrite %1 with "rewriter"(%0 : !pdl.value)
+# CHECK:     %0 = operand
+# CHECK:     %1 = operation(%0 : !pdl.value)
+# CHECK:     rewrite %1 with "rewriter"(%0 : !pdl.value)
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -55,8 +55,8 @@ def test_rewrite_with_args():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_with_params : benefit(1)  {
-# CHECK:     %0 = pdl.operation
-# CHECK:     pdl.rewrite %0 with "rewriter" ["I am param"]
+# CHECK:     %0 = operation
+# CHECK:     rewrite %0 with "rewriter" ["I am param"]
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -68,9 +68,9 @@ def test_rewrite_with_params():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_with_args_and_params : benefit(1)  {
-# CHECK:     %0 = pdl.operand
-# CHECK:     %1 = pdl.operation(%0 : !pdl.value)
-# CHECK:     pdl.rewrite %1 with "rewriter" ["I am param"](%0 : !pdl.value)
+# CHECK:     %0 = operand
+# CHECK:     %1 = operation(%0 : !pdl.value)
+# CHECK:     rewrite %1 with "rewriter" ["I am param"](%0 : !pdl.value)
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -83,16 +83,16 @@ def test_rewrite_with_args_and_params():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_multi_root_optimal : benefit(1)  {
-# CHECK:     %0 = pdl.operand
-# CHECK:     %1 = pdl.operand
-# CHECK:     %2 = pdl.type
-# CHECK:     %3 = pdl.operation(%0 : !pdl.value)  -> (%2 : !pdl.type)
-# CHECK:     %4 = pdl.result 0 of %3
-# CHECK:     %5 = pdl.operation(%4 : !pdl.value)
-# CHECK:     %6 = pdl.operation(%1 : !pdl.value)  -> (%2 : !pdl.type)
-# CHECK:     %7 = pdl.result 0 of %6
-# CHECK:     %8 = pdl.operation(%4, %7 : !pdl.value, !pdl.value)
-# CHECK:     pdl.rewrite with "rewriter" ["I am param"](%5, %8 : !pdl.operation, !pdl.operation)
+# CHECK:     %0 = operand
+# CHECK:     %1 = operand
+# CHECK:     %2 = type
+# CHECK:     %3 = operation(%0 : !pdl.value)  -> (%2 : !pdl.type)
+# CHECK:     %4 = result 0 of %3
+# CHECK:     %5 = operation(%4 : !pdl.value)
+# CHECK:     %6 = operation(%1 : !pdl.value)  -> (%2 : !pdl.type)
+# CHECK:     %7 = result 0 of %6
+# CHECK:     %8 = operation(%4, %7 : !pdl.value, !pdl.value)
+# CHECK:     rewrite with "rewriter" ["I am param"](%5, %8 : !pdl.operation, !pdl.operation)
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -112,16 +112,16 @@ def test_rewrite_multi_root_optimal():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_multi_root_forced : benefit(1)  {
-# CHECK:     %0 = pdl.operand
-# CHECK:     %1 = pdl.operand
-# CHECK:     %2 = pdl.type
-# CHECK:     %3 = pdl.operation(%0 : !pdl.value)  -> (%2 : !pdl.type)
-# CHECK:     %4 = pdl.result 0 of %3
-# CHECK:     %5 = pdl.operation(%4 : !pdl.value)
-# CHECK:     %6 = pdl.operation(%1 : !pdl.value)  -> (%2 : !pdl.type)
-# CHECK:     %7 = pdl.result 0 of %6
-# CHECK:     %8 = pdl.operation(%4, %7 : !pdl.value, !pdl.value)
-# CHECK:     pdl.rewrite %5 with "rewriter" ["I am param"](%8 : !pdl.operation)
+# CHECK:     %0 = operand
+# CHECK:     %1 = operand
+# CHECK:     %2 = type
+# CHECK:     %3 = operation(%0 : !pdl.value)  -> (%2 : !pdl.type)
+# CHECK:     %4 = result 0 of %3
+# CHECK:     %5 = operation(%4 : !pdl.value)
+# CHECK:     %6 = operation(%1 : !pdl.value)  -> (%2 : !pdl.type)
+# CHECK:     %7 = result 0 of %6
+# CHECK:     %8 = operation(%4, %7 : !pdl.value, !pdl.value)
+# CHECK:     rewrite %5 with "rewriter" ["I am param"](%8 : !pdl.operation)
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -141,13 +141,13 @@ def test_rewrite_multi_root_forced():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_add_body : benefit(1)  {
-# CHECK:     %0 = pdl.type : i32
-# CHECK:     %1 = pdl.type
-# CHECK:     %2 = pdl.operation  -> (%0, %1 : !pdl.type, !pdl.type)
-# CHECK:     pdl.rewrite %2  {
-# CHECK:       %3 = pdl.type
-# CHECK:       %4 = pdl.operation "foo.op"  -> (%0, %3 : !pdl.type, !pdl.type)
-# CHECK:       pdl.replace %2 with %4
+# CHECK:     %0 = type : i32
+# CHECK:     %1 = type
+# CHECK:     %2 = operation  -> (%0, %1 : !pdl.type, !pdl.type)
+# CHECK:     rewrite %2  {
+# CHECK:       %3 = type
+# CHECK:       %4 = operation "foo.op"  -> (%0, %3 : !pdl.type, !pdl.type)
+# CHECK:       replace %2 with %4
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -166,11 +166,11 @@ def test_rewrite_add_body():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_type : benefit(1)  {
-# CHECK:     %0 = pdl.type : i32
-# CHECK:     %1 = pdl.type
-# CHECK:     %2 = pdl.operation  -> (%0, %1 : !pdl.type, !pdl.type)
-# CHECK:     pdl.rewrite %2  {
-# CHECK:       %3 = pdl.operation "foo.op"  -> (%0, %1 : !pdl.type, !pdl.type)
+# CHECK:     %0 = type : i32
+# CHECK:     %1 = type
+# CHECK:     %2 = operation  -> (%0, %1 : !pdl.type, !pdl.type)
+# CHECK:     rewrite %2  {
+# CHECK:       %3 = operation "foo.op"  -> (%0, %1 : !pdl.type, !pdl.type)
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -187,11 +187,11 @@ def test_rewrite_type():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_types : benefit(1)  {
-# CHECK:     %0 = pdl.types
-# CHECK:     %1 = pdl.operation  -> (%0 : !pdl.range<type>)
-# CHECK:     pdl.rewrite %1  {
-# CHECK:       %2 = pdl.types : [i32, i64]
-# CHECK:       %3 = pdl.operation "foo.op"  -> (%0, %2 : !pdl.range<type>, !pdl.range<type>)
+# CHECK:     %0 = types
+# CHECK:     %1 = operation  -> (%0 : !pdl.range<type>)
+# CHECK:     rewrite %1  {
+# CHECK:       %2 = types : [i32, i64]
+# CHECK:       %3 = operation "foo.op"  -> (%0, %2 : !pdl.range<type>, !pdl.range<type>)
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -208,11 +208,11 @@ def test_rewrite_types():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @rewrite_operands : benefit(1)  {
-# CHECK:     %0 = pdl.types
-# CHECK:     %1 = pdl.operands : %0
-# CHECK:     %2 = pdl.operation(%1 : !pdl.range<value>)
-# CHECK:     pdl.rewrite %2  {
-# CHECK:       %3 = pdl.operation "foo.op"  -> (%0 : !pdl.range<type>)
+# CHECK:     %0 = types
+# CHECK:     %1 = operands : %0
+# CHECK:     %2 = operation(%1 : !pdl.range<value>)
+# CHECK:     rewrite %2  {
+# CHECK:       %3 = operation "foo.op"  -> (%0 : !pdl.range<type>)
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -229,9 +229,9 @@ def test_rewrite_operands():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @native_rewrite : benefit(1)  {
-# CHECK:     %0 = pdl.operation
-# CHECK:     pdl.rewrite %0  {
-# CHECK:       pdl.apply_native_rewrite "NativeRewrite"(%0 : !pdl.operation)
+# CHECK:     %0 = operation
+# CHECK:     rewrite %0  {
+# CHECK:       apply_native_rewrite "NativeRewrite"(%0 : !pdl.operation)
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -246,10 +246,10 @@ def test_native_rewrite():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @attribute_with_value : benefit(1)  {
-# CHECK:     %0 = pdl.operation
-# CHECK:     pdl.rewrite %0  {
-# CHECK:       %1 = pdl.attribute "value"
-# CHECK:       pdl.apply_native_rewrite "NativeRewrite"(%1 : !pdl.attribute)
+# CHECK:     %0 = operation
+# CHECK:     rewrite %0  {
+# CHECK:       %1 = attribute "value"
+# CHECK:       apply_native_rewrite "NativeRewrite"(%1 : !pdl.attribute)
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -265,9 +265,9 @@ def test_attribute_with_value():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @erase : benefit(1)  {
-# CHECK:     %0 = pdl.operation
-# CHECK:     pdl.rewrite %0  {
-# CHECK:       pdl.erase %0
+# CHECK:     %0 = operation
+# CHECK:     rewrite %0  {
+# CHECK:       erase %0
 # CHECK:     }
 # CHECK:   }
 # CHECK: }
@@ -282,11 +282,11 @@ def test_erase():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern @operation_results : benefit(1)  {
-# CHECK:     %0 = pdl.types
-# CHECK:     %1 = pdl.operation  -> (%0 : !pdl.range<type>)
-# CHECK:     %2 = pdl.results of %1
-# CHECK:     %3 = pdl.operation(%2 : !pdl.range<value>)
-# CHECK:     pdl.rewrite %3 with "rewriter"
+# CHECK:     %0 = types
+# CHECK:     %1 = operation  -> (%0 : !pdl.range<type>)
+# CHECK:     %2 = results of %1
+# CHECK:     %3 = operation(%2 : !pdl.range<value>)
+# CHECK:     rewrite %3 with "rewriter"
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule
@@ -302,10 +302,10 @@ def test_operation_results():
 
 # CHECK: module  {
 # CHECK:   pdl.pattern : benefit(1)  {
-# CHECK:     %0 = pdl.type
-# CHECK:     pdl.apply_native_constraint "typeConstraint" [](%0 : !pdl.type)
-# CHECK:     %1 = pdl.operation  -> (%0 : !pdl.type)
-# CHECK:     pdl.rewrite %1 with "rewrite"
+# CHECK:     %0 = type
+# CHECK:     apply_native_constraint "typeConstraint" [](%0 : !pdl.type)
+# CHECK:     %1 = operation  -> (%0 : !pdl.type)
+# CHECK:     rewrite %1 with "rewrite"
 # CHECK:   }
 # CHECK: }
 @constructAndPrintInModule

From 7a275dc35411b8c3f510166f40c225cd10dc5eec Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 20 Jan 2022 11:49:35 -0800
Subject: [PATCH 078/946] [RISCV] Remove Zvlsseg extension.

This string no longer appears in the Vector Extension specification.
The segment load/store instructions are just part of the vector
instruction set.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D117724
---
 clang/include/clang/Basic/RISCVVTypes.def     |    4 +-
 clang/include/clang/Basic/riscv_vector.td     |    6 +-
 .../RISCV/rvv-intrinsics-overloaded/vloxseg.c |    2 +-
 .../RISCV/rvv-intrinsics-overloaded/vlseg.c   |    4 +-
 .../RISCV/rvv-intrinsics-overloaded/vlsegff.c |    4 +-
 .../RISCV/rvv-intrinsics-overloaded/vlsseg.c  |    2 +-
 .../RISCV/rvv-intrinsics-overloaded/vluxseg.c |    2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsoxseg.c |    2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsseg.c   |    2 +-
 .../RISCV/rvv-intrinsics-overloaded/vssseg.c  |    2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsuxseg.c |    2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vloxseg.c    |    2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vlseg.c |    4 +-
 .../CodeGen/RISCV/rvv-intrinsics/vlsegff.c    |    4 +-
 .../CodeGen/RISCV/rvv-intrinsics/vlsseg.c     |    2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vluxseg.c    |    2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vsoxseg.c    |    2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vsseg.c |    2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vssseg.c     |    2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vsuxseg.c    |    2 +-
 clang/test/Driver/riscv-arch.c                |   19 -
 .../test/Preprocessor/riscv-target-features.c |    2 -
 clang/utils/TableGen/RISCVVEmitter.cpp        |   11 +-
 llvm/lib/Support/RISCVISAInfo.cpp             |   13 +-
 llvm/lib/Target/RISCV/RISCV.td                |   10 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoV.td      |   12 +-
 llvm/lib/Target/RISCV/RISCVSchedRocket.td     |    2 +-
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td    |    2 +-
 llvm/lib/Target/RISCV/RISCVSubtarget.h        |    2 -
 llvm/test/CodeGen/RISCV/attributes.ll         |   12 +-
 llvm/test/MC/RISCV/attribute-arch-invalid.s   |    4 +-
 llvm/test/MC/RISCV/attribute-arch.s           |   40 +-
 llvm/test/MC/RISCV/rvv/zvlsseg.s              | 1018 ++++++++---------
 33 files changed, 576 insertions(+), 625 deletions(-)

diff --git a/clang/include/clang/Basic/RISCVVTypes.def b/clang/include/clang/Basic/RISCVVTypes.def
index f6ef62a646363..1d4024dfb20d3 100644
--- a/clang/include/clang/Basic/RISCVVTypes.def
+++ b/clang/include/clang/Basic/RISCVVTypes.def
@@ -30,8 +30,8 @@
 //
 // - ElBits is the size of one element in bits (SEW).
 //
-// - NF is the number of fields (NFIELDS) used in the Zvlsseg instructions
-//   (TODO).
+// - NF is the number of fields (NFIELDS) used in the Load/Store Segment
+//   instructions (TODO).
 //
 // - IsSigned is true for vectors of signed integer elements and
 //   for vectors of floating-point elements.
diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index 03e16be96abee..28c57cc6afeeb 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -215,10 +215,10 @@ class RVVBuiltin<string suffix, string prototype, string type_range,
   // an automatic definition in header is emitted.
   string HeaderCode = "";
 
-  // Sub extension of vector spec. Currently only support Zvlsseg.
+  // Sub extension of vector spec.
   list<string> RequiredExtensions = [];
 
-  // Number of fields for Zvlsseg.
+  // Number of fields for Load/Store Segment instructions.
   int NF = 1;
 }
 
@@ -1567,7 +1567,6 @@ defm vle32ff: RVVVLEFFBuiltin<["i", "f"]>;
 defm vle64ff: RVVVLEFFBuiltin<["l", "d"]>;
 
 // 7.8 Vector Load/Store Segment Instructions
-let RequiredExtensions = ["Zvlsseg"] in {
 defm : RVVUnitStridedSegLoad<"vlseg">;
 defm : RVVUnitStridedSegLoadFF<"vlseg">;
 defm : RVVStridedSegLoad<"vlsseg">;
@@ -1577,7 +1576,6 @@ defm : RVVUnitStridedSegStore<"vsseg">;
 defm : RVVStridedSegStore<"vssseg">;
 defm : RVVIndexedSegStore<"vsuxseg">;
 defm : RVVIndexedSegStore<"vsoxseg">;
-}
 
 // 12. Vector Integer Arithmetic Instructions
 // 12.1. Vector Single-Width Integer Add and Subtract
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c
index 9cb3325218561..0df229dc803da 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c
index a0f04dc4375b4..ac49608830570 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c
@@ -2,12 +2,12 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c
index f2b9e445b605a..22df2b948634b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c
@@ -2,12 +2,12 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c
index 82c4c57982acb..4f79022bcfd92 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c
index a2dbaaf48084f..8db5e8039cafe 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c
index 5f41c6bcd28ec..d3454be64ca57 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c
index 662ee6b7dbaed..b72721984afa8 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c
index 8952c2d02ba03..2a95d46f3ae92 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c
index f6bfd7844f5a0..3a338ad821e27 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c
index 41f4f6997dffc..f278bb76ae630 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c
index 11cd459f552ff..deaaf0ed5ff93 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c
@@ -2,12 +2,12 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c
index 20a79659f72dc..fba2c179a4446 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c
@@ -2,12 +2,12 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +experimental-v -target-feature +zfh \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone \
+// RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c
index 5584d841789dc..cfdf8275483f8 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c
index bd499de5a1683..b83ff9833e7b9 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c
index 03b8667883d71..ac8c4a412e7d0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c
index 6f3927c22ae01..0829100f58ca4 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c
index 488a93c5f3a52..b91c023aeede7 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c
index aa559161eade1..a6bdc1ad883f3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c
@@ -2,7 +2,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
 // RUN:   -target-feature +zfh -target-feature +experimental-v \
-// RUN:   -target-feature +experimental-zvlsseg -disable-O0-optnone -emit-llvm %s \
+// RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c
index c81a87c8182ef..656abde4f75e4 100644
--- a/clang/test/Driver/riscv-arch.c
+++ b/clang/test/Driver/riscv-arch.c
@@ -416,25 +416,6 @@
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-V-GOODVERS %s
 // RV32-EXPERIMENTAL-V-GOODVERS: "-target-feature" "+experimental-v"
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvlsseg -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVLSSEG-NOFLAG %s
-// RV32-EXPERIMENTAL-ZVLSSEG-NOFLAG: error: invalid arch name 'rv32iv0p10_zvlsseg'
-// RV32-EXPERIMENTAL-ZVLSSEG-NOFLAG: requires '-menable-experimental-extensions'
-
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvlsseg -menable-experimental-extensions -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVLSSEG-NOVERS %s
-// RV32-EXPERIMENTAL-ZVLSSEG-NOVERS: error: invalid arch name 'rv32iv0p10_zvlsseg'
-// RV32-EXPERIMENTAL-ZVLSSEG-NOVERS: experimental extension requires explicit version number
-
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvlsseg0p1 -menable-experimental-extensions -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVLSSEG-BADVERS %s
-// RV32-EXPERIMENTAL-ZVLSSEG-BADVERS: error: invalid arch name 'rv32iv0p10_zvlsseg0p1'
-// RV32-EXPERIMENTAL-ZVLSSEG-BADVERS: unsupported version number 0.1 for experimental extension 'zvlsseg'
-
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvlsseg0p10 -menable-experimental-extensions -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVLSSEG-GOODVERS %s
-// RV32-EXPERIMENTAL-ZVLSSEG-GOODVERS: "-target-feature" "+experimental-zvlsseg"
-
 // RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvl32b0p10 -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-NOFLAG %s
 // RV32-EXPERIMENTAL-ZVL-NOFLAG: error: invalid arch name 'rv32iv0p10_zvl32b0p10'
diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index ba310229f14e5..c69285f6e3996 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -31,7 +31,6 @@
 // CHECK-NOT: __riscv_zfh
 // CHECK-NOT: __riscv_v
 // CHECK-NOT: __riscv_vector
-// CHECK-NOT: __riscv_zvlsseg
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -march=rv32im -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-M-EXT %s
@@ -220,7 +219,6 @@
 // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s
 // CHECK-V-EXT: __riscv_v 10000{{$}}
 // CHECK-V-EXT: __riscv_vector 1
-// CHECK-V-EXT: __riscv_zvlsseg 10000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv32izfhmin1p0 -x c -E -dM %s \
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index d3f1d63185f4a..84da6a5901a43 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -140,8 +140,7 @@ enum RISCVExtension : uint8_t {
   F = 1 << 1,
   D = 1 << 2,
   Zfh = 1 << 3,
-  Zvlsseg = 1 << 4,
-  RV64 = 1 << 5,
+  RV64 = 1 << 4,
 };
 
 // TODO refactor RVVIntrinsic class design after support all intrinsic
@@ -445,8 +444,8 @@ void RVVType::initBuiltinStr() {
     return;
   }
   BuiltinStr = "q" + utostr(Scale.getValue()) + BuiltinStr;
-  // Pointer to vector types. Defined for Zvlsseg load intrinsics.
-  // Zvlsseg load intrinsics have pointer type arguments to store the loaded
+  // Pointer to vector types. Defined for segment load intrinsics.
+  // segment load intrinsics have pointer type arguments to store the loaded
   // vector values.
   if (IsPointer)
     BuiltinStr += "*";
@@ -797,8 +796,6 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
       RISCVExtensions |= RISCVExtension::D;
   }
   for (auto Extension : RequiredExtensions) {
-    if (Extension == "Zvlsseg")
-      RISCVExtensions |= RISCVExtension::Zvlsseg;
     if (Extension == "RV64")
       RISCVExtensions |= RISCVExtension::RV64;
   }
@@ -1311,8 +1308,6 @@ bool RVVEmitter::emitExtDefStr(uint8_t Extents, raw_ostream &OS) {
     OS << LS << "defined(__riscv_d)";
   if (Extents & RISCVExtension::Zfh)
     OS << LS << "defined(__riscv_zfh)";
-  if (Extents & RISCVExtension::Zvlsseg)
-    OS << LS << "defined(__riscv_zvlsseg)";
   if (Extents & RISCVExtension::RV64)
     OS << LS << "(__riscv_xlen == 64)";
   OS << "\n";
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index d1d222d3c0eb0..fc52fc6803439 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -66,7 +66,6 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
     {"zbr", RISCVExtensionVersion{0, 93}},
     {"zbt", RISCVExtensionVersion{0, 93}},
 
-    {"zvlsseg", RISCVExtensionVersion{0, 10}},
     {"zvl32b", RISCVExtensionVersion{0, 10}},
     {"zvl64b", RISCVExtensionVersion{0, 10}},
     {"zvl128b", RISCVExtensionVersion{0, 10}},
@@ -301,9 +300,7 @@ void RISCVISAInfo::toFeatures(
     if (ExtName == "i")
       continue;
 
-    if (ExtName == "zvlsseg") {
-      Features.push_back("+experimental-zvlsseg");
-    } else if (isExperimentalExtension(ExtName)) {
+    if (isExperimentalExtension(ExtName)) {
       Features.push_back(StrAlloc("+experimental-" + ExtName));
     } else {
       Features.push_back(StrAlloc("+" + ExtName));
@@ -691,7 +688,6 @@ Error RISCVISAInfo::checkDependency() {
   bool HasE = Exts.count("e") == 1;
   bool HasD = Exts.count("d") == 1;
   bool HasF = Exts.count("f") == 1;
-  bool HasZvlsseg = Exts.count("zvlsseg") == 1;
   bool HasVector = Exts.count("zve32x") == 1;
   bool HasZve32f = Exts.count("zve32f") == 1;
   bool HasZve64d = Exts.count("zve64d") == 1;
@@ -710,11 +706,6 @@ Error RISCVISAInfo::checkDependency() {
     return createStringError(errc::invalid_argument,
                              "d requires f extension to also be specified");
 
-  if (HasZvlsseg && !HasVector)
-    return createStringError(
-        errc::invalid_argument,
-        "zvlsseg requires v or zve* extension to also be specified");
-
   // FIXME: Consider Zfinx in the future
   if (HasZve32f && !HasF)
     return createStringError(
@@ -745,7 +736,7 @@ static const char *ImpliedExtsZve64d[] = {"zve64f"};
 static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"};
 static const char *ImpliedExtsZve64x[] = {"zve32x", "zvl64b"};
 static const char *ImpliedExtsZve32f[] = {"zve32x"};
-static const char *ImpliedExtsZve32x[] = {"zvlsseg", "zvl32b"};
+static const char *ImpliedExtsZve32x[] = {"zvl32b"};
 static const char *ImpliedExtsZvl65536b[] = {"zvl32768b"};
 static const char *ImpliedExtsZvl32768b[] = {"zvl16384b"};
 static const char *ImpliedExtsZvl16384b[] = {"zvl8192b"};
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index fda400e490a02..36c7263235ab5 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -161,19 +161,11 @@ foreach i = { 6-15 } in {
                         [!cast<SubtargetFeature>("FeatureStdExtZvl"#!srl(I, 1)#"b")]>;
 }
 
-def FeatureStdExtZvlsseg
-    : SubtargetFeature<"experimental-zvlsseg", "HasStdExtZvlsseg", "true",
-                       "'Zvlsseg' (Vector segment load/store instructions)",
-                       []>;
-def HasStdExtZvlsseg : Predicate<"Subtarget->hasStdExtZvlsseg()">,
-                                 AssemblerPredicate<(all_of FeatureStdExtZvlsseg),
-                                 "'Zvlsseg' (Vector segment load/store instructions)">;
-
 def FeatureStdExtZve32x
     : SubtargetFeature<"experimental-zve32x", "HasStdExtZve32x", "true",
                        "'Zve32x' (Vector Extensions for Embedded Processors "
                        "with maximal 32 EEW)",
-                       [FeatureStdExtZvlsseg, FeatureStdExtZvl32b]>;
+                       [FeatureStdExtZvl32b]>;
 def HasStdExtZve32x : Predicate<"SubTarget->hasStdExtZve32x()">,
                                  AssemblerPredicate<(all_of FeatureStdExtZve32x),
                                  "'Zve32x' (Vector Extensions for Embedded Processors "
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index afdd7c4e8b3a8..306024a3e4fd4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -1461,7 +1461,7 @@ foreach n = [2, 4, 8] in {
 } // hasSideEffects = 0, mayLoad = 0, mayStore = 0
 } // Predicates = [HasVInstructions]
 
-let Predicates = [HasStdExtZvlsseg] in {
+let Predicates = [HasVInstructions] in {
   foreach nf=2-8 in {
     foreach eew = [8, 16, 32] in {
       defvar w = !cast<RISCVWidth>("LSWidth"#eew);
@@ -1494,9 +1494,9 @@ let Predicates = [HasStdExtZvlsseg] in {
                              "vsoxseg"#nf#"ei"#eew#".v">;
     }
   }
-} // Predicates = [HasStdExtZvlsseg]
+} // Predicates = [HasVInstructions]
 
-let Predicates = [HasStdExtZvlsseg, HasVInstructionsI64] in {
+let Predicates = [HasVInstructionsI64] in {
   foreach nf=2-8 in {
     // Vector Unit-strided Segment Instructions
     def VLSEG#nf#E64_V :
@@ -1512,8 +1512,8 @@ let Predicates = [HasStdExtZvlsseg, HasVInstructionsI64] in {
     def VSSSEG#nf#E64_V :
       VStridedSegmentStore<!add(nf, -1), LSWidth64, "vssseg"#nf#"e64.v">;
   }
-} // Predicates = [HasStdExtZvlsseg, HasVInstructionsI64]
-let Predicates = [HasStdExtZvlsseg, HasVInstructionsI64, IsRV64] in {
+} // Predicates = [HasVInstructionsI64]
+let Predicates = [HasVInstructionsI64, IsRV64] in {
   foreach nf=2-8 in {
     // Vector Indexed Segment Instructions
     def VLUXSEG#nf#EI64_V :
@@ -1529,6 +1529,6 @@ let Predicates = [HasStdExtZvlsseg, HasVInstructionsI64, IsRV64] in {
       VIndexedSegmentStore<!add(nf, -1), MOPSTIndexedOrder, LSWidth64,
                            "vsoxseg"#nf#"ei64.v">;
   }
-} // Predicates = [HasStdExtZvlsseg, HasVInstructionsI64, IsRV64]
+} // Predicates = [HasVInstructionsI64, IsRV64]
 
 include "RISCVInstrInfoVPseudos.td"
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index 2d80d9d670403..4655015a9d1ec 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -17,7 +17,7 @@ def RocketModel : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = false;
-  let UnsupportedFeatures = [HasStdExtV, HasVInstructions, HasVInstructionsI64, HasStdExtZvlsseg];
+  let UnsupportedFeatures = [HasStdExtV, HasVInstructions, HasVInstructionsI64];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index b39082f153543..3b3e2699d6b60 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -15,7 +15,7 @@ def SiFive7Model : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = 0;
-  let UnsupportedFeatures = [HasStdExtV, HasStdExtZvlsseg];
+  let UnsupportedFeatures = [HasStdExtV];
 }
 
 // The SiFive7 microarchitecture has two pipelines: A and B.
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index f8eb8e000a6a3..d55affd0539be 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -81,7 +81,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool HasStdExtZve64x = false;
   bool HasStdExtZve64f = false;
   bool HasStdExtZve64d = false;
-  bool HasStdExtZvlsseg = false;
   bool HasStdExtZfhmin = false;
   bool HasStdExtZfh = false;
   bool HasRV64 = false;
@@ -160,7 +159,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool hasStdExtZve64x() const { return HasStdExtZve64x; }
   bool hasStdExtZve64f() const { return HasStdExtZve64f; }
   bool hasStdExtZve64d() const { return HasStdExtZve64d; }
-  bool hasStdExtZvlsseg() const { return HasStdExtZvlsseg; }
   bool hasStdExtZvl() const { return ZvlLen != ExtZvl::NotSet; }
   bool hasStdExtZfhmin() const { return HasStdExtZfhmin; }
   bool hasStdExtZfh() const { return HasStdExtZfh; }
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index f04eb26282ca5..59790c9967111 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -18,7 +18,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+zbs %s -o - | FileCheck --check-prefix=RV32ZBS %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt %s -o - | FileCheck --check-prefix=RV32ZBT %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV32V %s
-; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+experimental-v,+f,+experimental-zvlsseg %s -o - | FileCheck --check-prefix=RV32COMBINED %s
+; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV32COMBINED %s
 ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefix=RV64M %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefix=RV64A %s
 ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefix=RV64F %s
@@ -37,7 +37,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+zbs %s -o - | FileCheck --check-prefix=RV64ZBS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt %s -o - | FileCheck --check-prefix=RV64ZBT %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV64V %s
-; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+experimental-v,+f,+experimental-zvlsseg %s -o - | FileCheck --check-prefix=RV64COMBINED %s
+; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV64COMBINED %s
 
 
 ; RV32M: .attribute 5, "rv32i2p0_m2p0"
@@ -57,8 +57,8 @@
 ; RV32ZBR: .attribute 5, "rv32i2p0_zbr0p93"
 ; RV32ZBS: .attribute 5, "rv32i2p0_zbs1p0"
 ; RV32ZBT: .attribute 5, "rv32i2p0_zbt0p93"
-; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
-; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
+; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 ; RV64M: .attribute 5, "rv64i2p0_m2p0"
 ; RV64A: .attribute 5, "rv64i2p0_a2p0"
@@ -77,8 +77,8 @@
 ; RV64ZBR: .attribute 5, "rv64i2p0_zbr0p93"
 ; RV64ZBS: .attribute 5, "rv64i2p0_zbs1p0"
 ; RV64ZBT: .attribute 5, "rv64i2p0_zbt0p93"
-; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
-; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
+; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 define i32 @addi(i32 %a) {
   %1 = add i32 %a, 1
diff --git a/llvm/test/MC/RISCV/attribute-arch-invalid.s b/llvm/test/MC/RISCV/attribute-arch-invalid.s
index 933de2963dbbc..1dd5621d128ba 100644
--- a/llvm/test/MC/RISCV/attribute-arch-invalid.s
+++ b/llvm/test/MC/RISCV/attribute-arch-invalid.s
@@ -26,5 +26,5 @@
 .attribute arch, "rv32izbt"
 # CHECK: error: invalid arch name 'rv32izbt', experimental extension requires explicit version number `zbt`
 
-.attribute arch, "rv32ivzvlsseg"
-# CHECK: error: invalid arch name 'rv32ivzvlsseg', experimental extension requires explicit version number `v`
+.attribute arch, "rv32iv"
+# CHECK: error: invalid arch name 'rv32iv', experimental extension requires explicit version number `v`
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index 54e496c61f4ad..3d7be42caeb56 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -36,7 +36,7 @@
 ## Experimental extensions require version string to be explicitly specified
 
 .attribute arch, "rv32iv0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32izba1p0"
 # CHECK: attribute      5, "rv32i2p0_zba1p0"
@@ -74,56 +74,56 @@
 .attribute arch, "rv32ifzfh1p0"
 # CHECK: attribute      5, "rv32i2p0_f2p0_zfh1p0_zfhmin1p0"
 
-.attribute arch, "rv32iv0p10zvlsseg0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+.attribute arch, "rv32iv0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl32b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl64b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl128b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl256b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl512b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl1024b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl2048b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl4096b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10"
 
 .attribute arch, "rv32iv0p10zvl8192b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10"
 
 .attribute arch, "rv32iv0p10zvl16384b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10"
 
 .attribute arch, "rv32iv0p10zvl32768b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32768b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32768b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10"
 
 .attribute arch, "rv32iv0p10zvl65536b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32768b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl65536b0p10_zvl8192b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32768b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl65536b0p10_zvl8192b0p10"
 
 .attribute arch, "rv32i_zve32x0p10"
-# CHECK: attribute      5, "rv32i2p0_zve32x0p10_zvl32b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_zve32x0p10_zvl32b0p10"
 
 .attribute arch, "rv32if_zve32f0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f0p10_zve32x0p10_zvl32b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f0p10_zve32x0p10_zvl32b0p10"
 
 .attribute arch, "rv32i_zve64x0p10"
-# CHECK: attribute      5, "rv32i2p0_zve32x0p10_zve64x0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_zve32x0p10_zve64x0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32if_zve64f0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f0p10_zve32x0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f0p10_zve32x0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10"
 
 .attribute arch, "rv32ifd_zve64d0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10_zvlsseg0p10"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10"
diff --git a/llvm/test/MC/RISCV/rvv/zvlsseg.s b/llvm/test/MC/RISCV/rvv/zvlsseg.s
index 99f5e157131fe..6845839fcaa33 100644
--- a/llvm/test/MC/RISCV/rvv/zvlsseg.s
+++ b/llvm/test/MC/RISCV/rvv/zvlsseg.s
@@ -1,3036 +1,3034 @@
 # RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
-# RUN:   --mattr=+experimental-zvlsseg --riscv-no-aliases \
+# RUN:   --riscv-no-aliases \
 # RUN:   | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:   | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v \
-# RUN:   --mattr=+experimental-zvlsseg %s \
-# RUN:   | llvm-objdump -d --mattr=+experimental-v --mattr=+experimental-zvlsseg -M no-aliases - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN:   | llvm-objdump -d --mattr=+experimental-v -M no-aliases - \
 # RUN:   | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v \
-# RUN:   --mattr=+experimental-zvlsseg %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
 # RUN:   | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vlseg2e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 20 <unknown>
 
 vlseg2e8.v v8, (a0)
 # CHECK-INST: vlseg2e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 22 <unknown>
 
 vlseg2e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 20 <unknown>
 
 vlseg2e16.v v8, (a0)
 # CHECK-INST: vlseg2e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 22 <unknown>
 
 vlseg2e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 20 <unknown>
 
 vlseg2e32.v v8, (a0)
 # CHECK-INST: vlseg2e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 22 <unknown>
 
 vlseg2e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 20 <unknown>
 
 vlseg2e64.v v8, (a0)
 # CHECK-INST: vlseg2e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 22 <unknown>
 
 vlseg2e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x21]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 21 <unknown>
 
 vlseg2e8ff.v v8, (a0)
 # CHECK-INST: vlseg2e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x23]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 23 <unknown>
 
 vlseg2e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x21]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 21 <unknown>
 
 vlseg2e16ff.v v8, (a0)
 # CHECK-INST: vlseg2e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x23]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 23 <unknown>
 
 vlseg2e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x21]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 21 <unknown>
 
 vlseg2e32ff.v v8, (a0)
 # CHECK-INST: vlseg2e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x23]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 23 <unknown>
 
 vlseg2e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg2e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x21]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 21 <unknown>
 
 vlseg2e64ff.v v8, (a0)
 # CHECK-INST: vlseg2e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x23]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 23 <unknown>
 
 vlsseg2e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg2e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 28 <unknown>
 
 vlsseg2e8.v v8, (a0), a1
 # CHECK-INST: vlsseg2e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 2a <unknown>
 
 vlsseg2e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg2e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 28 <unknown>
 
 vlsseg2e16.v v8, (a0), a1
 # CHECK-INST: vlsseg2e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 2a <unknown>
 
 vlsseg2e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg2e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 28 <unknown>
 
 vlsseg2e32.v v8, (a0), a1
 # CHECK-INST: vlsseg2e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 2a <unknown>
 
 vlsseg2e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg2e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 28 <unknown>
 
 vlsseg2e64.v v8, (a0), a1
 # CHECK-INST: vlsseg2e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 2a <unknown>
 
 vluxseg2ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg2ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 24 <unknown>
 
 vluxseg2ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg2ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 26 <unknown>
 
 vluxseg2ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg2ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 24 <unknown>
 
 vluxseg2ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg2ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 26 <unknown>
 
 vluxseg2ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg2ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 24 <unknown>
 
 vluxseg2ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg2ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 26 <unknown>
 
 vluxseg2ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg2ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 24 <unknown>
 
 vluxseg2ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg2ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 26 <unknown>
 
 vloxseg2ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg2ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 2c <unknown>
 
 vloxseg2ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg2ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 2e <unknown>
 
 vloxseg2ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg2ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 2c <unknown>
 
 vloxseg2ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg2ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 2e <unknown>
 
 vloxseg2ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg2ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 2c <unknown>
 
 vloxseg2ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg2ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 2e <unknown>
 
 vloxseg2ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg2ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 2c <unknown>
 
 vloxseg2ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg2ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 2e <unknown>
 
 vlseg3e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 40 <unknown>
 
 vlseg3e8.v v8, (a0)
 # CHECK-INST: vlseg3e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 42 <unknown>
 
 vlseg3e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 40 <unknown>
 
 vlseg3e16.v v8, (a0)
 # CHECK-INST: vlseg3e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 42 <unknown>
 
 vlseg3e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 40 <unknown>
 
 vlseg3e32.v v8, (a0)
 # CHECK-INST: vlseg3e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 42 <unknown>
 
 vlseg3e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 40 <unknown>
 
 vlseg3e64.v v8, (a0)
 # CHECK-INST: vlseg3e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 42 <unknown>
 
 vlseg3e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x41]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 41 <unknown>
 
 vlseg3e8ff.v v8, (a0)
 # CHECK-INST: vlseg3e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x43]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 43 <unknown>
 
 vlseg3e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x41]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 41 <unknown>
 
 vlseg3e16ff.v v8, (a0)
 # CHECK-INST: vlseg3e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x43]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 43 <unknown>
 
 vlseg3e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x41]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 41 <unknown>
 
 vlseg3e32ff.v v8, (a0)
 # CHECK-INST: vlseg3e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x43]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 43 <unknown>
 
 vlseg3e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg3e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x41]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 41 <unknown>
 
 vlseg3e64ff.v v8, (a0)
 # CHECK-INST: vlseg3e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x43]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 43 <unknown>
 
 vlsseg3e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg3e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 48 <unknown>
 
 vlsseg3e8.v v8, (a0), a1
 # CHECK-INST: vlsseg3e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 4a <unknown>
 
 vlsseg3e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg3e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 48 <unknown>
 
 vlsseg3e16.v v8, (a0), a1
 # CHECK-INST: vlsseg3e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 4a <unknown>
 
 vlsseg3e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg3e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 48 <unknown>
 
 vlsseg3e32.v v8, (a0), a1
 # CHECK-INST: vlsseg3e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 4a <unknown>
 
 vlsseg3e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg3e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 48 <unknown>
 
 vlsseg3e64.v v8, (a0), a1
 # CHECK-INST: vlsseg3e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 4a <unknown>
 
 vluxseg3ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg3ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 44 <unknown>
 
 vluxseg3ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg3ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 46 <unknown>
 
 vluxseg3ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg3ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 44 <unknown>
 
 vluxseg3ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg3ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 46 <unknown>
 
 vluxseg3ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg3ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 44 <unknown>
 
 vluxseg3ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg3ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 46 <unknown>
 
 vluxseg3ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg3ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 44 <unknown>
 
 vluxseg3ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg3ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 46 <unknown>
 
 vloxseg3ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg3ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 4c <unknown>
 
 vloxseg3ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg3ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 4e <unknown>
 
 vloxseg3ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg3ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 4c <unknown>
 
 vloxseg3ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg3ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 4e <unknown>
 
 vloxseg3ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg3ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 4c <unknown>
 
 vloxseg3ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg3ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 4e <unknown>
 
 vloxseg3ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg3ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 4c <unknown>
 
 vloxseg3ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg3ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 4e <unknown>
 
 vlseg4e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 60 <unknown>
 
 vlseg4e8.v v8, (a0)
 # CHECK-INST: vlseg4e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 62 <unknown>
 
 vlseg4e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 60 <unknown>
 
 vlseg4e16.v v8, (a0)
 # CHECK-INST: vlseg4e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 62 <unknown>
 
 vlseg4e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 60 <unknown>
 
 vlseg4e32.v v8, (a0)
 # CHECK-INST: vlseg4e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 62 <unknown>
 
 vlseg4e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 60 <unknown>
 
 vlseg4e64.v v8, (a0)
 # CHECK-INST: vlseg4e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 62 <unknown>
 
 vlseg4e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x61]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 61 <unknown>
 
 vlseg4e8ff.v v8, (a0)
 # CHECK-INST: vlseg4e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x63]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 63 <unknown>
 
 vlseg4e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x61]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 61 <unknown>
 
 vlseg4e16ff.v v8, (a0)
 # CHECK-INST: vlseg4e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x63]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 63 <unknown>
 
 vlseg4e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x61]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 61 <unknown>
 
 vlseg4e32ff.v v8, (a0)
 # CHECK-INST: vlseg4e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x63]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 63 <unknown>
 
 vlseg4e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg4e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x61]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 61 <unknown>
 
 vlseg4e64ff.v v8, (a0)
 # CHECK-INST: vlseg4e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x63]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 63 <unknown>
 
 vlsseg4e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg4e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 68 <unknown>
 
 vlsseg4e8.v v8, (a0), a1
 # CHECK-INST: vlsseg4e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 6a <unknown>
 
 vlsseg4e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg4e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 68 <unknown>
 
 vlsseg4e16.v v8, (a0), a1
 # CHECK-INST: vlsseg4e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 6a <unknown>
 
 vlsseg4e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg4e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 68 <unknown>
 
 vlsseg4e32.v v8, (a0), a1
 # CHECK-INST: vlsseg4e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 6a <unknown>
 
 vlsseg4e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg4e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 68 <unknown>
 
 vlsseg4e64.v v8, (a0), a1
 # CHECK-INST: vlsseg4e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 6a <unknown>
 
 vluxseg4ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg4ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 64 <unknown>
 
 vluxseg4ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg4ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 66 <unknown>
 
 vluxseg4ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg4ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 64 <unknown>
 
 vluxseg4ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg4ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 66 <unknown>
 
 vluxseg4ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg4ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 64 <unknown>
 
 vluxseg4ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg4ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 66 <unknown>
 
 vluxseg4ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg4ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 64 <unknown>
 
 vluxseg4ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg4ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 66 <unknown>
 
 vloxseg4ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg4ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 6c <unknown>
 
 vloxseg4ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg4ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 6e <unknown>
 
 vloxseg4ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg4ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 6c <unknown>
 
 vloxseg4ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg4ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 6e <unknown>
 
 vloxseg4ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg4ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 6c <unknown>
 
 vloxseg4ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg4ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 6e <unknown>
 
 vloxseg4ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg4ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 6c <unknown>
 
 vloxseg4ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg4ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 6e <unknown>
 
 vlseg5e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 80 <unknown>
 
 vlseg5e8.v v8, (a0)
 # CHECK-INST: vlseg5e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 82 <unknown>
 
 vlseg5e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 80 <unknown>
 
 vlseg5e16.v v8, (a0)
 # CHECK-INST: vlseg5e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 82 <unknown>
 
 vlseg5e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 80 <unknown>
 
 vlseg5e32.v v8, (a0)
 # CHECK-INST: vlseg5e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 82 <unknown>
 
 vlseg5e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 80 <unknown>
 
 vlseg5e64.v v8, (a0)
 # CHECK-INST: vlseg5e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 82 <unknown>
 
 vlseg5e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0x81]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 81 <unknown>
 
 vlseg5e8ff.v v8, (a0)
 # CHECK-INST: vlseg5e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0x83]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 83 <unknown>
 
 vlseg5e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0x81]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 81 <unknown>
 
 vlseg5e16ff.v v8, (a0)
 # CHECK-INST: vlseg5e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0x83]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 83 <unknown>
 
 vlseg5e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0x81]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 81 <unknown>
 
 vlseg5e32ff.v v8, (a0)
 # CHECK-INST: vlseg5e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0x83]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 83 <unknown>
 
 vlseg5e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg5e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0x81]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 81 <unknown>
 
 vlseg5e64ff.v v8, (a0)
 # CHECK-INST: vlseg5e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0x83]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 83 <unknown>
 
 vlsseg5e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg5e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 88 <unknown>
 
 vlsseg5e8.v v8, (a0), a1
 # CHECK-INST: vlsseg5e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 8a <unknown>
 
 vlsseg5e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg5e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 88 <unknown>
 
 vlsseg5e16.v v8, (a0), a1
 # CHECK-INST: vlsseg5e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 8a <unknown>
 
 vlsseg5e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg5e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 88 <unknown>
 
 vlsseg5e32.v v8, (a0), a1
 # CHECK-INST: vlsseg5e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 8a <unknown>
 
 vlsseg5e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg5e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 88 <unknown>
 
 vlsseg5e64.v v8, (a0), a1
 # CHECK-INST: vlsseg5e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 8a <unknown>
 
 vluxseg5ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg5ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 84 <unknown>
 
 vluxseg5ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg5ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 86 <unknown>
 
 vluxseg5ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg5ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 84 <unknown>
 
 vluxseg5ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg5ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 86 <unknown>
 
 vluxseg5ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg5ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 84 <unknown>
 
 vluxseg5ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg5ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 86 <unknown>
 
 vluxseg5ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg5ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 84 <unknown>
 
 vluxseg5ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg5ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 86 <unknown>
 
 vloxseg5ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg5ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 8c <unknown>
 
 vloxseg5ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg5ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 8e <unknown>
 
 vloxseg5ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg5ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 8c <unknown>
 
 vloxseg5ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg5ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 8e <unknown>
 
 vloxseg5ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg5ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 8c <unknown>
 
 vloxseg5ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg5ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 8e <unknown>
 
 vloxseg5ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg5ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 8c <unknown>
 
 vloxseg5ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg5ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 8e <unknown>
 
 vlseg6e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 a0 <unknown>
 
 vlseg6e8.v v8, (a0)
 # CHECK-INST: vlseg6e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 a2 <unknown>
 
 vlseg6e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 a0 <unknown>
 
 vlseg6e16.v v8, (a0)
 # CHECK-INST: vlseg6e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 a2 <unknown>
 
 vlseg6e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 a0 <unknown>
 
 vlseg6e32.v v8, (a0)
 # CHECK-INST: vlseg6e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 a2 <unknown>
 
 vlseg6e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 a0 <unknown>
 
 vlseg6e64.v v8, (a0)
 # CHECK-INST: vlseg6e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 a2 <unknown>
 
 vlseg6e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xa1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 a1 <unknown>
 
 vlseg6e8ff.v v8, (a0)
 # CHECK-INST: vlseg6e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0xa3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 a3 <unknown>
 
 vlseg6e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0xa1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 a1 <unknown>
 
 vlseg6e16ff.v v8, (a0)
 # CHECK-INST: vlseg6e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0xa3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 a3 <unknown>
 
 vlseg6e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0xa1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 a1 <unknown>
 
 vlseg6e32ff.v v8, (a0)
 # CHECK-INST: vlseg6e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0xa3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 a3 <unknown>
 
 vlseg6e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg6e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0xa1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 a1 <unknown>
 
 vlseg6e64ff.v v8, (a0)
 # CHECK-INST: vlseg6e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0xa3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 a3 <unknown>
 
 vlsseg6e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg6e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 a8 <unknown>
 
 vlsseg6e8.v v8, (a0), a1
 # CHECK-INST: vlsseg6e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 aa <unknown>
 
 vlsseg6e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg6e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 a8 <unknown>
 
 vlsseg6e16.v v8, (a0), a1
 # CHECK-INST: vlsseg6e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 aa <unknown>
 
 vlsseg6e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg6e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 a8 <unknown>
 
 vlsseg6e32.v v8, (a0), a1
 # CHECK-INST: vlsseg6e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 aa <unknown>
 
 vlsseg6e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg6e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 a8 <unknown>
 
 vlsseg6e64.v v8, (a0), a1
 # CHECK-INST: vlsseg6e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 aa <unknown>
 
 vluxseg6ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg6ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 a4 <unknown>
 
 vluxseg6ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg6ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 a6 <unknown>
 
 vluxseg6ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg6ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 a4 <unknown>
 
 vluxseg6ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg6ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 a6 <unknown>
 
 vluxseg6ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg6ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 a4 <unknown>
 
 vluxseg6ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg6ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 a6 <unknown>
 
 vluxseg6ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg6ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 a4 <unknown>
 
 vluxseg6ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg6ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 a6 <unknown>
 
 vloxseg6ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg6ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 ac <unknown>
 
 vloxseg6ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg6ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 ae <unknown>
 
 vloxseg6ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg6ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 ac <unknown>
 
 vloxseg6ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg6ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 ae <unknown>
 
 vloxseg6ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg6ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 ac <unknown>
 
 vloxseg6ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg6ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 ae <unknown>
 
 vloxseg6ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg6ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 ac <unknown>
 
 vloxseg6ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg6ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 ae <unknown>
 
 vlseg7e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 c0 <unknown>
 
 vlseg7e8.v v8, (a0)
 # CHECK-INST: vlseg7e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 c2 <unknown>
 
 vlseg7e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 c0 <unknown>
 
 vlseg7e16.v v8, (a0)
 # CHECK-INST: vlseg7e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 c2 <unknown>
 
 vlseg7e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 c0 <unknown>
 
 vlseg7e32.v v8, (a0)
 # CHECK-INST: vlseg7e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 c2 <unknown>
 
 vlseg7e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 c0 <unknown>
 
 vlseg7e64.v v8, (a0)
 # CHECK-INST: vlseg7e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 c2 <unknown>
 
 vlseg7e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xc1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 c1 <unknown>
 
 vlseg7e8ff.v v8, (a0)
 # CHECK-INST: vlseg7e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0xc3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 c3 <unknown>
 
 vlseg7e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0xc1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 c1 <unknown>
 
 vlseg7e16ff.v v8, (a0)
 # CHECK-INST: vlseg7e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0xc3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 c3 <unknown>
 
 vlseg7e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0xc1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 c1 <unknown>
 
 vlseg7e32ff.v v8, (a0)
 # CHECK-INST: vlseg7e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0xc3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 c3 <unknown>
 
 vlseg7e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg7e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0xc1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 c1 <unknown>
 
 vlseg7e64ff.v v8, (a0)
 # CHECK-INST: vlseg7e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0xc3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 c3 <unknown>
 
 vlsseg7e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg7e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 c8 <unknown>
 
 vlsseg7e8.v v8, (a0), a1
 # CHECK-INST: vlsseg7e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 ca <unknown>
 
 vlsseg7e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg7e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 c8 <unknown>
 
 vlsseg7e16.v v8, (a0), a1
 # CHECK-INST: vlsseg7e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 ca <unknown>
 
 vlsseg7e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg7e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 c8 <unknown>
 
 vlsseg7e32.v v8, (a0), a1
 # CHECK-INST: vlsseg7e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 ca <unknown>
 
 vlsseg7e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg7e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 c8 <unknown>
 
 vlsseg7e64.v v8, (a0), a1
 # CHECK-INST: vlsseg7e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 ca <unknown>
 
 vluxseg7ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg7ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 c4 <unknown>
 
 vluxseg7ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg7ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 c6 <unknown>
 
 vluxseg7ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg7ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 c4 <unknown>
 
 vluxseg7ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg7ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 c6 <unknown>
 
 vluxseg7ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg7ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 c4 <unknown>
 
 vluxseg7ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg7ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 c6 <unknown>
 
 vluxseg7ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg7ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 c4 <unknown>
 
 vluxseg7ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg7ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 c6 <unknown>
 
 vloxseg7ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg7ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 cc <unknown>
 
 vloxseg7ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg7ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 ce <unknown>
 
 vloxseg7ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg7ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 cc <unknown>
 
 vloxseg7ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg7ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 ce <unknown>
 
 vloxseg7ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg7ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 cc <unknown>
 
 vloxseg7ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg7ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 ce <unknown>
 
 vloxseg7ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg7ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 cc <unknown>
 
 vloxseg7ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg7ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 ce <unknown>
 
 vlseg8e8.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e8.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 e0 <unknown>
 
 vlseg8e8.v v8, (a0)
 # CHECK-INST: vlseg8e8.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 e2 <unknown>
 
 vlseg8e16.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e16.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 e0 <unknown>
 
 vlseg8e16.v v8, (a0)
 # CHECK-INST: vlseg8e16.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 e2 <unknown>
 
 vlseg8e32.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e32.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 e0 <unknown>
 
 vlseg8e32.v v8, (a0)
 # CHECK-INST: vlseg8e32.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 e2 <unknown>
 
 vlseg8e64.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e64.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 e0 <unknown>
 
 vlseg8e64.v v8, (a0)
 # CHECK-INST: vlseg8e64.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 e2 <unknown>
 
 vlseg8e8ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e8ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x04,0x05,0xe1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 e1 <unknown>
 
 vlseg8e8ff.v v8, (a0)
 # CHECK-INST: vlseg8e8ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x04,0x05,0xe3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 05 e3 <unknown>
 
 vlseg8e16ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e16ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x54,0x05,0xe1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 e1 <unknown>
 
 vlseg8e16ff.v v8, (a0)
 # CHECK-INST: vlseg8e16ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x54,0x05,0xe3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 05 e3 <unknown>
 
 vlseg8e32ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e32ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x64,0x05,0xe1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 e1 <unknown>
 
 vlseg8e32ff.v v8, (a0)
 # CHECK-INST: vlseg8e32ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x64,0x05,0xe3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 05 e3 <unknown>
 
 vlseg8e64ff.v v8, (a0), v0.t
 # CHECK-INST: vlseg8e64ff.v v8, (a0), v0.t
 # CHECK-ENCODING: [0x07,0x74,0x05,0xe1]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 e1 <unknown>
 
 vlseg8e64ff.v v8, (a0)
 # CHECK-INST: vlseg8e64ff.v v8, (a0)
 # CHECK-ENCODING: [0x07,0x74,0x05,0xe3]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 05 e3 <unknown>
 
 vlsseg8e8.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg8e8.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 e8 <unknown>
 
 vlsseg8e8.v v8, (a0), a1
 # CHECK-INST: vlsseg8e8.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x04,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 b5 ea <unknown>
 
 vlsseg8e16.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg8e16.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x54,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 e8 <unknown>
 
 vlsseg8e16.v v8, (a0), a1
 # CHECK-INST: vlsseg8e16.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x54,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 b5 ea <unknown>
 
 vlsseg8e32.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg8e32.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x64,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 e8 <unknown>
 
 vlsseg8e32.v v8, (a0), a1
 # CHECK-INST: vlsseg8e32.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x64,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 b5 ea <unknown>
 
 vlsseg8e64.v v8, (a0), a1, v0.t
 # CHECK-INST: vlsseg8e64.v v8, (a0), a1, v0.t
 # CHECK-ENCODING: [0x07,0x74,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 e8 <unknown>
 
 vlsseg8e64.v v8, (a0), a1
 # CHECK-INST: vlsseg8e64.v v8, (a0), a1
 # CHECK-ENCODING: [0x07,0x74,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 b5 ea <unknown>
 
 vluxseg8ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg8ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 e4 <unknown>
 
 vluxseg8ei8.v v8, (a0), v4
 # CHECK-INST: vluxseg8ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 e6 <unknown>
 
 vluxseg8ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg8ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 e4 <unknown>
 
 vluxseg8ei16.v v8, (a0), v4
 # CHECK-INST: vluxseg8ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 e6 <unknown>
 
 vluxseg8ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg8ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 e4 <unknown>
 
 vluxseg8ei32.v v8, (a0), v4
 # CHECK-INST: vluxseg8ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 e6 <unknown>
 
 vluxseg8ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vluxseg8ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 e4 <unknown>
 
 vluxseg8ei64.v v8, (a0), v4
 # CHECK-INST: vluxseg8ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 e6 <unknown>
 
 vloxseg8ei8.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg8ei8.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x04,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 ec <unknown>
 
 vloxseg8ei8.v v8, (a0), v4
 # CHECK-INST: vloxseg8ei8.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x04,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 04 45 ee <unknown>
 
 vloxseg8ei16.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg8ei16.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x54,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 ec <unknown>
 
 vloxseg8ei16.v v8, (a0), v4
 # CHECK-INST: vloxseg8ei16.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x54,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 54 45 ee <unknown>
 
 vloxseg8ei32.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg8ei32.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x64,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 ec <unknown>
 
 vloxseg8ei32.v v8, (a0), v4
 # CHECK-INST: vloxseg8ei32.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x64,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 64 45 ee <unknown>
 
 vloxseg8ei64.v v8, (a0), v4, v0.t
 # CHECK-INST: vloxseg8ei64.v v8, (a0), v4, v0.t
 # CHECK-ENCODING: [0x07,0x74,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 ec <unknown>
 
 vloxseg8ei64.v v8, (a0), v4
 # CHECK-INST: vloxseg8ei64.v v8, (a0), v4
 # CHECK-ENCODING: [0x07,0x74,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 07 74 45 ee <unknown>
 
 vsseg2e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg2e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 20 <unknown>
 
 vsseg2e8.v v24, (a0)
 # CHECK-INST: vsseg2e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 22 <unknown>
 
 vsseg2e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg2e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 20 <unknown>
 
 vsseg2e16.v v24, (a0)
 # CHECK-INST: vsseg2e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 22 <unknown>
 
 vsseg2e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg2e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 20 <unknown>
 
 vsseg2e32.v v24, (a0)
 # CHECK-INST: vsseg2e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 22 <unknown>
 
 vsseg2e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg2e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x20]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 20 <unknown>
 
 vsseg2e64.v v24, (a0)
 # CHECK-INST: vsseg2e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x22]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 22 <unknown>
 
 vssseg2e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg2e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 28 <unknown>
 
 vssseg2e8.v v24, (a0), a1
 # CHECK-INST: vssseg2e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 2a <unknown>
 
 vssseg2e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg2e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 28 <unknown>
 
 vssseg2e16.v v24, (a0), a1
 # CHECK-INST: vssseg2e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 2a <unknown>
 
 vssseg2e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg2e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 28 <unknown>
 
 vssseg2e32.v v24, (a0), a1
 # CHECK-INST: vssseg2e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 2a <unknown>
 
 vssseg2e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg2e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x28]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 28 <unknown>
 
 vssseg2e64.v v24, (a0), a1
 # CHECK-INST: vssseg2e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x2a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 2a <unknown>
 
 vsuxseg2ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg2ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 24 <unknown>
 
 vsuxseg2ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg2ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 26 <unknown>
 
 vsuxseg2ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg2ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 24 <unknown>
 
 vsuxseg2ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg2ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 26 <unknown>
 
 vsuxseg2ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg2ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 24 <unknown>
 
 vsuxseg2ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg2ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 26 <unknown>
 
 vsuxseg2ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg2ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x24]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 24 <unknown>
 
 vsuxseg2ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg2ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x26]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 26 <unknown>
 
 vsoxseg2ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg2ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 2c <unknown>
 
 vsoxseg2ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg2ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 2e <unknown>
 
 vsoxseg2ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg2ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 2c <unknown>
 
 vsoxseg2ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg2ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 2e <unknown>
 
 vsoxseg2ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg2ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 2c <unknown>
 
 vsoxseg2ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg2ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 2e <unknown>
 
 vsoxseg2ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg2ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x2c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 2c <unknown>
 
 vsoxseg2ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg2ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x2e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 2e <unknown>
 
 vsseg3e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg3e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 40 <unknown>
 
 vsseg3e8.v v24, (a0)
 # CHECK-INST: vsseg3e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 42 <unknown>
 
 vsseg3e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg3e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 40 <unknown>
 
 vsseg3e16.v v24, (a0)
 # CHECK-INST: vsseg3e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 42 <unknown>
 
 vsseg3e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg3e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 40 <unknown>
 
 vsseg3e32.v v24, (a0)
 # CHECK-INST: vsseg3e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 42 <unknown>
 
 vsseg3e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg3e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x40]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 40 <unknown>
 
 vsseg3e64.v v24, (a0)
 # CHECK-INST: vsseg3e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x42]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 42 <unknown>
 
 vssseg3e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg3e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 48 <unknown>
 
 vssseg3e8.v v24, (a0), a1
 # CHECK-INST: vssseg3e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 4a <unknown>
 
 vssseg3e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg3e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 48 <unknown>
 
 vssseg3e16.v v24, (a0), a1
 # CHECK-INST: vssseg3e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 4a <unknown>
 
 vssseg3e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg3e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 48 <unknown>
 
 vssseg3e32.v v24, (a0), a1
 # CHECK-INST: vssseg3e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 4a <unknown>
 
 vssseg3e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg3e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x48]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 48 <unknown>
 
 vssseg3e64.v v24, (a0), a1
 # CHECK-INST: vssseg3e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x4a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 4a <unknown>
 
 vsuxseg3ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg3ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 44 <unknown>
 
 vsuxseg3ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg3ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 46 <unknown>
 
 vsuxseg3ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg3ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 44 <unknown>
 
 vsuxseg3ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg3ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 46 <unknown>
 
 vsuxseg3ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg3ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 44 <unknown>
 
 vsuxseg3ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg3ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 46 <unknown>
 
 vsuxseg3ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg3ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x44]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 44 <unknown>
 
 vsuxseg3ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg3ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x46]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 46 <unknown>
 
 vsoxseg3ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg3ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 4c <unknown>
 
 vsoxseg3ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg3ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 4e <unknown>
 
 vsoxseg3ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg3ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 4c <unknown>
 
 vsoxseg3ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg3ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 4e <unknown>
 
 vsoxseg3ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg3ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 4c <unknown>
 
 vsoxseg3ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg3ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 4e <unknown>
 
 vsoxseg3ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg3ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x4c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 4c <unknown>
 
 vsoxseg3ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg3ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x4e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 4e <unknown>
 
 vsseg4e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg4e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 60 <unknown>
 
 vsseg4e8.v v24, (a0)
 # CHECK-INST: vsseg4e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 62 <unknown>
 
 vsseg4e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg4e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 60 <unknown>
 
 vsseg4e16.v v24, (a0)
 # CHECK-INST: vsseg4e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 62 <unknown>
 
 vsseg4e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg4e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 60 <unknown>
 
 vsseg4e32.v v24, (a0)
 # CHECK-INST: vsseg4e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 62 <unknown>
 
 vsseg4e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg4e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x60]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 60 <unknown>
 
 vsseg4e64.v v24, (a0)
 # CHECK-INST: vsseg4e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x62]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 62 <unknown>
 
 vssseg4e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg4e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 68 <unknown>
 
 vssseg4e8.v v24, (a0), a1
 # CHECK-INST: vssseg4e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 6a <unknown>
 
 vssseg4e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg4e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 68 <unknown>
 
 vssseg4e16.v v24, (a0), a1
 # CHECK-INST: vssseg4e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 6a <unknown>
 
 vssseg4e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg4e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 68 <unknown>
 
 vssseg4e32.v v24, (a0), a1
 # CHECK-INST: vssseg4e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 6a <unknown>
 
 vssseg4e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg4e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x68]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 68 <unknown>
 
 vssseg4e64.v v24, (a0), a1
 # CHECK-INST: vssseg4e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x6a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 6a <unknown>
 
 vsuxseg4ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg4ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 64 <unknown>
 
 vsuxseg4ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg4ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 66 <unknown>
 
 vsuxseg4ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg4ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 64 <unknown>
 
 vsuxseg4ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg4ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 66 <unknown>
 
 vsuxseg4ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg4ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 64 <unknown>
 
 vsuxseg4ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg4ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 66 <unknown>
 
 vsuxseg4ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg4ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x64]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 64 <unknown>
 
 vsuxseg4ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg4ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x66]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 66 <unknown>
 
 vsoxseg4ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg4ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 6c <unknown>
 
 vsoxseg4ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg4ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 6e <unknown>
 
 vsoxseg4ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg4ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 6c <unknown>
 
 vsoxseg4ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg4ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 6e <unknown>
 
 vsoxseg4ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg4ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 6c <unknown>
 
 vsoxseg4ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg4ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 6e <unknown>
 
 vsoxseg4ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg4ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x6c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 6c <unknown>
 
 vsoxseg4ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg4ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x6e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 6e <unknown>
 
 vsseg5e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg5e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 80 <unknown>
 
 vsseg5e8.v v24, (a0)
 # CHECK-INST: vsseg5e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 82 <unknown>
 
 vsseg5e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg5e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 80 <unknown>
 
 vsseg5e16.v v24, (a0)
 # CHECK-INST: vsseg5e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 82 <unknown>
 
 vsseg5e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg5e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 80 <unknown>
 
 vsseg5e32.v v24, (a0)
 # CHECK-INST: vsseg5e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 82 <unknown>
 
 vsseg5e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg5e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x80]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 80 <unknown>
 
 vsseg5e64.v v24, (a0)
 # CHECK-INST: vsseg5e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0x82]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 82 <unknown>
 
 vssseg5e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg5e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 88 <unknown>
 
 vssseg5e8.v v24, (a0), a1
 # CHECK-INST: vssseg5e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 8a <unknown>
 
 vssseg5e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg5e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 88 <unknown>
 
 vssseg5e16.v v24, (a0), a1
 # CHECK-INST: vssseg5e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 8a <unknown>
 
 vssseg5e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg5e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 88 <unknown>
 
 vssseg5e32.v v24, (a0), a1
 # CHECK-INST: vssseg5e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 8a <unknown>
 
 vssseg5e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg5e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x88]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 88 <unknown>
 
 vssseg5e64.v v24, (a0), a1
 # CHECK-INST: vssseg5e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0x8a]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 8a <unknown>
 
 vsuxseg5ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg5ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 84 <unknown>
 
 vsuxseg5ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg5ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 86 <unknown>
 
 vsuxseg5ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg5ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 84 <unknown>
 
 vsuxseg5ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg5ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 86 <unknown>
 
 vsuxseg5ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg5ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 84 <unknown>
 
 vsuxseg5ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg5ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 86 <unknown>
 
 vsuxseg5ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg5ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x84]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 84 <unknown>
 
 vsuxseg5ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg5ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x86]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 86 <unknown>
 
 vsoxseg5ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg5ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 8c <unknown>
 
 vsoxseg5ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg5ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 8e <unknown>
 
 vsoxseg5ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg5ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 8c <unknown>
 
 vsoxseg5ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg5ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 8e <unknown>
 
 vsoxseg5ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg5ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 8c <unknown>
 
 vsoxseg5ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg5ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 8e <unknown>
 
 vsoxseg5ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg5ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x8c]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 8c <unknown>
 
 vsoxseg5ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg5ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0x8e]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 8e <unknown>
 
 vsseg6e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg6e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 a0 <unknown>
 
 vsseg6e8.v v24, (a0)
 # CHECK-INST: vsseg6e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 a2 <unknown>
 
 vsseg6e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg6e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 a0 <unknown>
 
 vsseg6e16.v v24, (a0)
 # CHECK-INST: vsseg6e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 a2 <unknown>
 
 vsseg6e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg6e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 a0 <unknown>
 
 vsseg6e32.v v24, (a0)
 # CHECK-INST: vsseg6e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 a2 <unknown>
 
 vsseg6e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg6e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0xa0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 a0 <unknown>
 
 vsseg6e64.v v24, (a0)
 # CHECK-INST: vsseg6e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0xa2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 a2 <unknown>
 
 vssseg6e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg6e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 a8 <unknown>
 
 vssseg6e8.v v24, (a0), a1
 # CHECK-INST: vssseg6e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 aa <unknown>
 
 vssseg6e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg6e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 a8 <unknown>
 
 vssseg6e16.v v24, (a0), a1
 # CHECK-INST: vssseg6e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 aa <unknown>
 
 vssseg6e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg6e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 a8 <unknown>
 
 vssseg6e32.v v24, (a0), a1
 # CHECK-INST: vssseg6e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 aa <unknown>
 
 vssseg6e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg6e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0xa8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 a8 <unknown>
 
 vssseg6e64.v v24, (a0), a1
 # CHECK-INST: vssseg6e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0xaa]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 aa <unknown>
 
 vsuxseg6ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg6ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 a4 <unknown>
 
 vsuxseg6ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg6ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 a6 <unknown>
 
 vsuxseg6ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg6ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 a4 <unknown>
 
 vsuxseg6ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg6ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 a6 <unknown>
 
 vsuxseg6ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg6ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 a4 <unknown>
 
 vsuxseg6ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg6ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 a6 <unknown>
 
 vsuxseg6ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg6ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xa4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 a4 <unknown>
 
 vsuxseg6ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg6ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xa6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 a6 <unknown>
 
 vsoxseg6ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg6ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 ac <unknown>
 
 vsoxseg6ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg6ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 ae <unknown>
 
 vsoxseg6ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg6ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 ac <unknown>
 
 vsoxseg6ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg6ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 ae <unknown>
 
 vsoxseg6ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg6ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 ac <unknown>
 
 vsoxseg6ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg6ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 ae <unknown>
 
 vsoxseg6ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg6ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xac]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 ac <unknown>
 
 vsoxseg6ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg6ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xae]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 ae <unknown>
 
 vsseg7e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg7e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 c0 <unknown>
 
 vsseg7e8.v v24, (a0)
 # CHECK-INST: vsseg7e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 c2 <unknown>
 
 vsseg7e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg7e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 c0 <unknown>
 
 vsseg7e16.v v24, (a0)
 # CHECK-INST: vsseg7e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 c2 <unknown>
 
 vsseg7e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg7e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 c0 <unknown>
 
 vsseg7e32.v v24, (a0)
 # CHECK-INST: vsseg7e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 c2 <unknown>
 
 vsseg7e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg7e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0xc0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 c0 <unknown>
 
 vsseg7e64.v v24, (a0)
 # CHECK-INST: vsseg7e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0xc2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 c2 <unknown>
 
 vssseg7e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg7e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 c8 <unknown>
 
 vssseg7e8.v v24, (a0), a1
 # CHECK-INST: vssseg7e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 ca <unknown>
 
 vssseg7e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg7e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 c8 <unknown>
 
 vssseg7e16.v v24, (a0), a1
 # CHECK-INST: vssseg7e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 ca <unknown>
 
 vssseg7e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg7e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 c8 <unknown>
 
 vssseg7e32.v v24, (a0), a1
 # CHECK-INST: vssseg7e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 ca <unknown>
 
 vssseg7e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg7e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0xc8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 c8 <unknown>
 
 vssseg7e64.v v24, (a0), a1
 # CHECK-INST: vssseg7e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0xca]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 ca <unknown>
 
 vsuxseg7ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg7ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 c4 <unknown>
 
 vsuxseg7ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg7ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 c6 <unknown>
 
 vsuxseg7ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg7ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 c4 <unknown>
 
 vsuxseg7ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg7ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 c6 <unknown>
 
 vsuxseg7ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg7ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 c4 <unknown>
 
 vsuxseg7ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg7ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 c6 <unknown>
 
 vsuxseg7ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg7ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xc4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 c4 <unknown>
 
 vsuxseg7ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg7ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xc6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 c6 <unknown>
 
 vsoxseg7ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg7ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 cc <unknown>
 
 vsoxseg7ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg7ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 ce <unknown>
 
 vsoxseg7ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg7ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 cc <unknown>
 
 vsoxseg7ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg7ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 ce <unknown>
 
 vsoxseg7ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg7ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 cc <unknown>
 
 vsoxseg7ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg7ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 ce <unknown>
 
 vsoxseg7ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg7ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xcc]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 cc <unknown>
 
 vsoxseg7ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg7ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xce]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 ce <unknown>
 
 vsseg8e8.v v24, (a0), v0.t
 # CHECK-INST: vsseg8e8.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 e0 <unknown>
 
 vsseg8e8.v v24, (a0)
 # CHECK-INST: vsseg8e8.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x0c,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 05 e2 <unknown>
 
 vsseg8e16.v v24, (a0), v0.t
 # CHECK-INST: vsseg8e16.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 e0 <unknown>
 
 vsseg8e16.v v24, (a0)
 # CHECK-INST: vsseg8e16.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x5c,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 05 e2 <unknown>
 
 vsseg8e32.v v24, (a0), v0.t
 # CHECK-INST: vsseg8e32.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 e0 <unknown>
 
 vsseg8e32.v v24, (a0)
 # CHECK-INST: vsseg8e32.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x6c,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 05 e2 <unknown>
 
 vsseg8e64.v v24, (a0), v0.t
 # CHECK-INST: vsseg8e64.v v24, (a0), v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x05,0xe0]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 e0 <unknown>
 
 vsseg8e64.v v24, (a0)
 # CHECK-INST: vsseg8e64.v v24, (a0)
 # CHECK-ENCODING: [0x27,0x7c,0x05,0xe2]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 05 e2 <unknown>
 
 vssseg8e8.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg8e8.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 e8 <unknown>
 
 vssseg8e8.v v24, (a0), a1
 # CHECK-INST: vssseg8e8.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x0c,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c b5 ea <unknown>
 
 vssseg8e16.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg8e16.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 e8 <unknown>
 
 vssseg8e16.v v24, (a0), a1
 # CHECK-INST: vssseg8e16.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x5c,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c b5 ea <unknown>
 
 vssseg8e32.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg8e32.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 e8 <unknown>
 
 vssseg8e32.v v24, (a0), a1
 # CHECK-INST: vssseg8e32.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x6c,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c b5 ea <unknown>
 
 vssseg8e64.v v24, (a0), a1, v0.t
 # CHECK-INST: vssseg8e64.v v24, (a0), a1, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0xe8]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 e8 <unknown>
 
 vssseg8e64.v v24, (a0), a1
 # CHECK-INST: vssseg8e64.v v24, (a0), a1
 # CHECK-ENCODING: [0x27,0x7c,0xb5,0xea]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c b5 ea <unknown>
 
 vsuxseg8ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg8ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 e4 <unknown>
 
 vsuxseg8ei8.v v24, (a0), v4
 # CHECK-INST: vsuxseg8ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 e6 <unknown>
 
 vsuxseg8ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg8ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 e4 <unknown>
 
 vsuxseg8ei16.v v24, (a0), v4
 # CHECK-INST: vsuxseg8ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 e6 <unknown>
 
 vsuxseg8ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg8ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 e4 <unknown>
 
 vsuxseg8ei32.v v24, (a0), v4
 # CHECK-INST: vsuxseg8ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 e6 <unknown>
 
 vsuxseg8ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsuxseg8ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xe4]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 e4 <unknown>
 
 vsuxseg8ei64.v v24, (a0), v4
 # CHECK-INST: vsuxseg8ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xe6]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 e6 <unknown>
 
 vsoxseg8ei8.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg8ei8.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 ec <unknown>
 
 vsoxseg8ei8.v v24, (a0), v4
 # CHECK-INST: vsoxseg8ei8.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x0c,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 0c 45 ee <unknown>
 
 vsoxseg8ei16.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg8ei16.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 ec <unknown>
 
 vsoxseg8ei16.v v24, (a0), v4
 # CHECK-INST: vsoxseg8ei16.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x5c,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 5c 45 ee <unknown>
 
 vsoxseg8ei32.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg8ei32.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 ec <unknown>
 
 vsoxseg8ei32.v v24, (a0), v4
 # CHECK-INST: vsoxseg8ei32.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x6c,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 6c 45 ee <unknown>
 
 vsoxseg8ei64.v v24, (a0), v4, v0.t
 # CHECK-INST: vsoxseg8ei64.v v24, (a0), v4, v0.t
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xec]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 ec <unknown>
 
 vsoxseg8ei64.v v24, (a0), v4
 # CHECK-INST: vsoxseg8ei64.v v24, (a0), v4
 # CHECK-ENCODING: [0x27,0x7c,0x45,0xee]
-# CHECK-ERROR: instruction requires the following: 'Zvlsseg'
+# CHECK-ERROR: instruction requires the following: 'V'
 # CHECK-UNKNOWN: 27 7c 45 ee <unknown>

From 118babe67adfe3eed1a6d89e3c2d5a70ba8322a4 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Thu, 20 Jan 2022 12:44:20 -0800
Subject: [PATCH 079/946] [SLP] Use for loops for walking bundle elements

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d9d02cb56c447..dfda6a97303c2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2521,12 +2521,11 @@ class BoUpSLP {
       SD->IsScheduled = true;
       LLVM_DEBUG(dbgs() << "SLP:   schedule " << *SD << "\n");
 
-      ScheduleData *BundleMember = SD;
-      while (BundleMember) {
-        if (BundleMember->Inst != BundleMember->OpValue) {
-          BundleMember = BundleMember->NextInBundle;
+      for (ScheduleData *BundleMember = SD; BundleMember;
+           BundleMember = BundleMember->NextInBundle) {
+        if (BundleMember->Inst != BundleMember->OpValue)
           continue;
-        }
+        
         // Handle the def-use chain dependencies.
 
         // Decrement the unscheduled counter and insert to ready list if ready.
@@ -2591,7 +2590,6 @@ class BoUpSLP {
                        << "SLP:    gets ready (mem): " << *DepBundle << "\n");
           }
         }
-        BundleMember = BundleMember->NextInBundle;
       }
     }
 
@@ -7659,8 +7657,8 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
 
     // Move the scheduled instruction(s) to their dedicated places, if not
     // there yet.
-    ScheduleData *BundleMember = picked;
-    while (BundleMember) {
+    for (ScheduleData *BundleMember = picked; BundleMember;
+         BundleMember = BundleMember->NextInBundle) {
       Instruction *pickedInst = BundleMember->Inst;
       if (pickedInst->getNextNode() != LastScheduledInst) {
         BS->BB->getInstList().remove(pickedInst);
@@ -7668,7 +7666,6 @@ void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
                                      pickedInst);
       }
       LastScheduledInst = pickedInst;
-      BundleMember = BundleMember->NextInBundle;
     }
 
     BS->schedule(picked, ReadyInsts);

From 40aef79db0b02b171a65b3a13053ae963a3e8753 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak@amd.com>
Date: Mon, 10 Jan 2022 23:53:58 +0000
Subject: [PATCH 080/946] [MLIR][GPU] Add debug output to enable dumping GPU
 assembly

- Set the DEBUG_TYPE of SerializeToBlob to serialize-to-blob
- Add debug output to print the assembly or PTX for GPU modules before
  they are assembled and linked

Note that, as SerializeToBlob is a superclass of SerializeToCubin and
SerializeToHsaco, --debug-only=serialize-to-blom will dump the
intermediate compiler result for both of these passes.

In addition, if LLVM options such as --stop-after are used to control
the GPU kernel compilation process, the debug output will contain the
appropriate intermediate IR.

Reviewed By: herhut

Differential Revision: https://reviews.llvm.org/D117519
---
 mlir/include/mlir/Dialect/GPU/Passes.h               |  7 +++++++
 mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp  | 10 ++++++++++
 mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp | 11 +++++++++++
 3 files changed, 28 insertions(+)

diff --git a/mlir/include/mlir/Dialect/GPU/Passes.h b/mlir/include/mlir/Dialect/GPU/Passes.h
index c9b396edfdeb0..c9c6f8668b4d3 100644
--- a/mlir/include/mlir/Dialect/GPU/Passes.h
+++ b/mlir/include/mlir/Dialect/GPU/Passes.h
@@ -102,6 +102,13 @@ void registerGpuSerializeToCubinPass();
 /// annotation.
 void registerGpuSerializeToHsacoPass();
 
+/// Create an instance of the GPU kernel function to HSAco binary serialization
+/// pass.
+std::unique_ptr<Pass> createGpuSerializeToHsacoPass(StringRef triple,
+                                                    StringRef arch,
+                                                    StringRef features,
+                                                    int optLevel);
+
 /// Generate the code for registering passes.
 #define GEN_PASS_REGISTRATION
 #include "mlir/Dialect/GPU/Passes.h.inc"
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
index 6ae4662006089..8dadb630f4a94 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp
@@ -21,6 +21,10 @@
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Target/TargetMachine.h"
 
+#include <string>
+
+#define DEBUG_TYPE "serialize-to-blob"
+
 using namespace mlir;
 
 std::string gpu::getDefaultGpuBinaryAnnotation() { return "gpu.binary"; }
@@ -76,6 +80,12 @@ void gpu::SerializeToBlobPass::runOnOperation() {
 
   std::string targetISA = std::move(maybeTargetISA.getValue());
 
+  LLVM_DEBUG({
+    llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n";
+    llvm::dbgs() << targetISA << "\n";
+    llvm::dbgs().flush();
+  });
+
   // Serialize the target ISA.
   std::unique_ptr<std::vector<char>> blob = serializeISA(targetISA);
   if (!blob)
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
index 75d14d2bb93ea..d9209b9012dd5 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
@@ -479,6 +479,17 @@ void mlir::registerGpuSerializeToHsacoPass() {
                                                       "", 2);
       });
 }
+
+/// Create an instance of the GPU kernel function to HSAco binary serialization
+/// pass.
+std::unique_ptr<Pass> mlir::createGpuSerializeToHsacoPass(StringRef triple,
+                                                          StringRef arch,
+                                                          StringRef features,
+                                                          int optLevel) {
+  return std::make_unique<SerializeToHsacoPass>(triple, arch, features,
+                                                optLevel);
+}
+
 #else  // MLIR_GPU_TO_HSACO_PASS_ENABLE
 void mlir::registerGpuSerializeToHsacoPass() {}
 #endif // MLIR_GPU_TO_HSACO_PASS_ENABLE

From dd7b69a61fa382737f06ec36a133d6db645f4cb0 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 20 Jan 2022 12:53:12 -0800
Subject: [PATCH 081/946] [RISCV] Remove HadStdExtV and HasStdZve* Predicates
 from tablegen.

No instructions should be using these. Everything should use
HasVInstructions* Predicates. Remove them so that they can't be
used by accident.
---
 llvm/lib/Target/RISCV/RISCV.td             | 9 ---------
 llvm/lib/Target/RISCV/RISCVSchedRocket.td  | 2 +-
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 2 +-
 3 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 36c7263235ab5..7972ced08edd8 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -166,43 +166,34 @@ def FeatureStdExtZve32x
                        "'Zve32x' (Vector Extensions for Embedded Processors "
                        "with maximal 32 EEW)",
                        [FeatureStdExtZvl32b]>;
-def HasStdExtZve32x : Predicate<"SubTarget->hasStdExtZve32x()">,
-                                 AssemblerPredicate<(all_of FeatureStdExtZve32x),
-                                 "'Zve32x' (Vector Extensions for Embedded Processors "
-                                 "with maximal 32 EEW)">;
 
 def FeatureStdExtZve32f
     : SubtargetFeature<"experimental-zve32f", "HasStdExtZve32f", "true",
                        "'Zve32f' (Vector Extensions for Embedded Processors "
                        "with maximal 32 EEW and F extension)",
                        [FeatureStdExtZve32x]>;
-def HasStdExtZve32f : Predicate<"SubTarget->hasStdExtZve32f()">;
 
 def FeatureStdExtZve64x
     : SubtargetFeature<"experimental-zve64x", "HasStdExtZve64x", "true",
                        "'Zve64x' (Vector Extensions for Embedded Processors "
                        "with maximal 64 EEW)", [FeatureStdExtZve32x, FeatureStdExtZvl64b]>;
-def HasStdExtZve64x : Predicate<"SubTarget->hasStdExtZve64x()">;
 
 def FeatureStdExtZve64f
     : SubtargetFeature<"experimental-zve64f", "HasStdExtZve64f", "true",
                        "'Zve64f' (Vector Extensions for Embedded Processors "
                        "with maximal 64 EEW and F extension)",
                        [FeatureStdExtZve32f, FeatureStdExtZve64x]>;
-def HasStdExtZve64f : Predicate<"SubTarget->hasStdExtZve64f()">;
 
 def FeatureStdExtZve64d
     : SubtargetFeature<"experimental-zve64d", "HasStdExtZve64d", "true",
                        "'Zve64d' (Vector Extensions for Embedded Processors "
                        "with maximal 64 EEW, F and D extension)",
                        [FeatureStdExtZve64f]>;
-def HasStdExtZve64d : Predicate<"SubTarget->hasStdExtZve64d()">;
 
 def FeatureStdExtV
     : SubtargetFeature<"experimental-v", "HasStdExtV", "true",
                        "'V' (Vector Extension for Application Processors)",
                        [FeatureStdExtZvl128b, FeatureStdExtZve64d, FeatureStdExtF, FeatureStdExtD]>;
-def HasStdExtV : Predicate<"Subtarget->hasStdExtV()">;
 
 def HasVInstructions    : Predicate<"Subtarget->hasVInstructions()">,
       AssemblerPredicate<
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index 4655015a9d1ec..b907ada3a1d5a 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -17,7 +17,7 @@ def RocketModel : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = false;
-  let UnsupportedFeatures = [HasStdExtV, HasVInstructions, HasVInstructionsI64];
+  let UnsupportedFeatures = [HasVInstructions, HasVInstructionsI64];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 3b3e2699d6b60..5672637a40cc2 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -15,7 +15,7 @@ def SiFive7Model : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = 0;
-  let UnsupportedFeatures = [HasStdExtV];
+  let UnsupportedFeatures = [HasVInstructions];
 }
 
 // The SiFive7 microarchitecture has two pipelines: A and B.

From 4c1dc65015ae8ba4b7e0ac56f4d88d29e712ce25 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 20 Jan 2022 14:41:01 -0500
Subject: [PATCH 082/946] [InstCombine] add/adjust tests for multiply with
 extended bool; NFC

---
 llvm/test/Transforms/InstCombine/mul.ll | 51 +++++++++++++++++++++----
 1 file changed, 44 insertions(+), 7 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll
index 76dc39598dc7f..e99d1e9cfd181 100644
--- a/llvm/test/Transforms/InstCombine/mul.ll
+++ b/llvm/test/Transforms/InstCombine/mul.ll
@@ -103,32 +103,69 @@ define i32 @mul_bool(i32 %x, i1 %y) {
 ; CHECK-NEXT:    ret i32 [[M]]
 ;
   %z = zext i1 %y to i32
-  %m = mul i32 %x, %z
+  %m = mul i32 %z, %x
   ret i32 %m
 }
 
-; Commute and test vector type.
-
 define <2 x i32> @mul_bool_vec(<2 x i32> %x, <2 x i1> %y) {
 ; CHECK-LABEL: @mul_bool_vec(
 ; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[M]]
 ;
   %z = zext <2 x i1> %y to <2 x i32>
-  %m = mul <2 x i32> %x, %z
+  %m = mul <2 x i32> %z, %x
   ret <2 x i32> %m
 }
 
-define <2 x i32> @mul_bool_vec_commute(<2 x i32> %x, <2 x i1> %y) {
+define <2 x i32> @mul_bool_vec_commute(<2 x i32> %px, <2 x i1> %y) {
 ; CHECK-LABEL: @mul_bool_vec_commute(
-; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> [[X:%.*]], <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[X:%.*]] = mul <2 x i32> [[PX:%.*]], [[PX]]
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[Y:%.*]], <2 x i32> [[X]], <2 x i32> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i32> [[M]]
 ;
+  %x = mul <2 x i32> %px, %px  ; thwart complexity-based canonicalization
   %z = zext <2 x i1> %y to <2 x i32>
-  %m = mul <2 x i32> %z, %x
+  %m = mul <2 x i32> %x, %z
   ret <2 x i32> %m
 }
 
+; X * C (when X is a sext boolean) --> X ? -C : 0
+
+define i32 @mul_sext_bool(i1 %x) {
+; CHECK-LABEL: @mul_sext_bool(
+; CHECK-NEXT:    [[S:%.*]] = sext i1 [[X:%.*]] to i32
+; CHECK-NEXT:    [[M:%.*]] = mul nsw i32 [[S]], 42
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %s = sext i1 %x to i32
+  %m = mul i32 %s, 42
+  ret i32 %m
+}
+
+define i32 @mul_sext_bool_use(i1 %x) {
+; CHECK-LABEL: @mul_sext_bool_use(
+; CHECK-NEXT:    [[S:%.*]] = sext i1 [[X:%.*]] to i32
+; CHECK-NEXT:    call void @use32(i32 [[S]])
+; CHECK-NEXT:    [[M:%.*]] = mul nsw i32 [[S]], 42
+; CHECK-NEXT:    ret i32 [[M]]
+;
+  %s = sext i1 %x to i32
+  call void @use32(i32 %s)
+  %m = mul i32 %s, 42
+  ret i32 %m
+}
+
+define <2 x i8> @mul_sext_bool_vec(<2 x i1> %x) {
+; CHECK-LABEL: @mul_sext_bool_vec(
+; CHECK-NEXT:    [[S:%.*]] = sext <2 x i1> [[X:%.*]] to <2 x i8>
+; CHECK-NEXT:    [[M:%.*]] = mul <2 x i8> [[S]], <i8 42, i8 -128>
+; CHECK-NEXT:    ret <2 x i8> [[M]]
+;
+  %s = sext <2 x i1> %x to <2 x i8>
+  %m = mul <2 x i8> %s, <i8 42, i8 -128>
+  ret <2 x i8> %m
+}
+
 define <3 x i7> @mul_bools(<3 x i1> %x, <3 x i1> %y) {
 ; CHECK-LABEL: @mul_bools(
 ; CHECK-NEXT:    [[MULBOOL:%.*]] = and <3 x i1> [[X:%.*]], [[Y:%.*]]

From a7a2860d0eee37d9e0fd0b6a8e3d884f8ee4ec16 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 20 Jan 2022 14:51:45 -0500
Subject: [PATCH 083/946] [InstCombine] convert mul with sexted bool and
 constant to select

We already have the related folds for zext-of-bool, so it
should make things more consistent to have this transform
to select for sext-of-bool too:
https://alive2.llvm.org/ce/z/YikdfA

Fixes #53319
---
 .../Transforms/InstCombine/InstCombineMulDivRem.cpp  | 12 ++++++++++--
 llvm/test/Transforms/InstCombine/mul.ll              |  8 +++-----
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index aca7ec8d7325a..076c3134d0782 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -348,13 +348,21 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
     return CastInst::Create(Instruction::SExt, And, I.getType());
   }
 
-  // (bool X) * Y --> X ? Y : 0
-  // Y * (bool X) --> X ? Y : 0
+  // (zext bool X) * Y --> X ? Y : 0
+  // Y * (zext bool X) --> X ? Y : 0
   if (match(Op0, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
     return SelectInst::Create(X, Op1, ConstantInt::get(I.getType(), 0));
   if (match(Op1, m_ZExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1))
     return SelectInst::Create(X, Op0, ConstantInt::get(I.getType(), 0));
 
+  // (sext bool X) * C --> X ? -C : 0
+  Constant *ImmC;
+  if (match(Op0, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1) &&
+      match(Op1, m_ImmConstant(ImmC))) {
+    Constant *NegC = ConstantExpr::getNeg(ImmC);
+    return SelectInst::Create(X, NegC, ConstantInt::getNullValue(I.getType()));
+  }
+
   // (lshr X, 31) * Y --> (ashr X, 31) & Y
   // Y * (lshr X, 31) --> (ashr X, 31) & Y
   // TODO: We are not checking one-use because the elimination of the multiply
diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll
index e99d1e9cfd181..56947040f78a9 100644
--- a/llvm/test/Transforms/InstCombine/mul.ll
+++ b/llvm/test/Transforms/InstCombine/mul.ll
@@ -133,8 +133,7 @@ define <2 x i32> @mul_bool_vec_commute(<2 x i32> %px, <2 x i1> %y) {
 
 define i32 @mul_sext_bool(i1 %x) {
 ; CHECK-LABEL: @mul_sext_bool(
-; CHECK-NEXT:    [[S:%.*]] = sext i1 [[X:%.*]] to i32
-; CHECK-NEXT:    [[M:%.*]] = mul nsw i32 [[S]], 42
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[X:%.*]], i32 -42, i32 0
 ; CHECK-NEXT:    ret i32 [[M]]
 ;
   %s = sext i1 %x to i32
@@ -146,7 +145,7 @@ define i32 @mul_sext_bool_use(i1 %x) {
 ; CHECK-LABEL: @mul_sext_bool_use(
 ; CHECK-NEXT:    [[S:%.*]] = sext i1 [[X:%.*]] to i32
 ; CHECK-NEXT:    call void @use32(i32 [[S]])
-; CHECK-NEXT:    [[M:%.*]] = mul nsw i32 [[S]], 42
+; CHECK-NEXT:    [[M:%.*]] = select i1 [[X]], i32 -42, i32 0
 ; CHECK-NEXT:    ret i32 [[M]]
 ;
   %s = sext i1 %x to i32
@@ -157,8 +156,7 @@ define i32 @mul_sext_bool_use(i1 %x) {
 
 define <2 x i8> @mul_sext_bool_vec(<2 x i1> %x) {
 ; CHECK-LABEL: @mul_sext_bool_vec(
-; CHECK-NEXT:    [[S:%.*]] = sext <2 x i1> [[X:%.*]] to <2 x i8>
-; CHECK-NEXT:    [[M:%.*]] = mul <2 x i8> [[S]], <i8 42, i8 -128>
+; CHECK-NEXT:    [[M:%.*]] = select <2 x i1> [[X:%.*]], <2 x i8> <i8 -42, i8 -128>, <2 x i8> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i8> [[M]]
 ;
   %s = sext <2 x i1> %x to <2 x i8>

From 60f61918795b91f3fc9d310bc0957b75783f826b Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Thu, 20 Jan 2022 13:06:55 -0800
Subject: [PATCH 084/946] [SLP] Extract formBundle helper for readability [NFC]

---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 67 ++++++++++++-------
 1 file changed, 42 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index dfda6a97303c2..1b4e90e78d955 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2618,6 +2618,10 @@ class BoUpSLP {
       }
     }
 
+    /// Build a bundle from the ScheduleData nodes corresponding to the
+    /// scalar instruction for each lane.
+    ScheduleData *buildBundle(ArrayRef<Value *> VL);
+
     /// Checks if a bundle of instructions can be scheduled, i.e. has no
     /// cyclic dependencies. This is only a dry-run, no instructions are
     /// actually moved at this stage.
@@ -7227,6 +7231,33 @@ void BoUpSLP::optimizeGatherSequence() {
   GatherShuffleSeq.clear();
 }
 
+BoUpSLP::ScheduleData *
+BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
+  ScheduleData *Bundle = nullptr;  
+  ScheduleData *PrevInBundle = nullptr;
+  for (Value *V : VL) {
+    ScheduleData *BundleMember = getScheduleData(V);
+    assert(BundleMember &&
+           "no ScheduleData for bundle member "
+           "(maybe not in same basic block)");
+    assert(BundleMember->isSchedulingEntity() &&
+           "bundle member already part of other bundle");
+    if (PrevInBundle) {
+      PrevInBundle->NextInBundle = BundleMember;
+    } else {
+      Bundle = BundleMember;
+    }
+    BundleMember->UnscheduledDepsInBundle = 0;
+    Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps;
+
+    // Group the instructions to a bundle.
+    BundleMember->FirstInBundle = Bundle;
+    PrevInBundle = BundleMember;
+  }
+  assert(Bundle && "Failed to find schedule bundle");
+  return Bundle;
+};
+
 // Groups the instructions to a bundle (which is then a single scheduling entity)
 // and schedules instructions until the bundle gets ready.
 Optional<BoUpSLP::ScheduleData *>
@@ -7239,9 +7270,6 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
 
   // Initialize the instruction bundle.
   Instruction *OldScheduleEnd = ScheduleEnd;
-  ScheduleData *PrevInBundle = nullptr;
-  ScheduleData *Bundle = nullptr;
-  bool ReSchedule = false;
   LLVM_DEBUG(dbgs() << "SLP:  bundle: " << *S.OpValue << "\n");
 
   auto TryScheduleBundleImpl = [this, OldScheduleEnd, SLP](bool ReSchedule,
@@ -7293,33 +7321,22 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
     }
   }
 
+  bool ReSchedule = false;
   for (Value *V : VL) {
     ScheduleData *BundleMember = getScheduleData(V);
     assert(BundleMember &&
            "no ScheduleData for bundle member (maybe not in same basic block)");
-    if (BundleMember->IsScheduled) {
-      // A bundle member was scheduled as single instruction before and now
-      // needs to be scheduled as part of the bundle. We just get rid of the
-      // existing schedule.
-      LLVM_DEBUG(dbgs() << "SLP:  reset schedule because " << *BundleMember
-                        << " was already scheduled\n");
-      ReSchedule = true;
-    }
-    assert(BundleMember->isSchedulingEntity() &&
-           "bundle member already part of other bundle");
-    if (PrevInBundle) {
-      PrevInBundle->NextInBundle = BundleMember;
-    } else {
-      Bundle = BundleMember;
-    }
-    BundleMember->UnscheduledDepsInBundle = 0;
-    Bundle->UnscheduledDepsInBundle += BundleMember->UnscheduledDeps;
-
-    // Group the instructions to a bundle.
-    BundleMember->FirstInBundle = Bundle;
-    PrevInBundle = BundleMember;
+    if (!BundleMember->IsScheduled)
+      continue;
+    // A bundle member was scheduled as single instruction before and now
+    // needs to be scheduled as part of the bundle. We just get rid of the
+    // existing schedule.
+    LLVM_DEBUG(dbgs() << "SLP:  reset schedule because " << *BundleMember
+                      << " was already scheduled\n");
+    ReSchedule = true;
   }
-  assert(Bundle && "Failed to find schedule bundle");
+
+  auto *Bundle = buildBundle(VL);
   TryScheduleBundleImpl(ReSchedule, Bundle);
   if (!Bundle->isReady()) {
     cancelScheduling(VL, S.OpValue);

From 5ef7abbc6f99b46c25c95ad462844990861e8927 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Thu, 20 Jan 2022 12:54:03 -0800
Subject: [PATCH 085/946] [mlir:TiingInterface] Remove unnecessary include of
 Tensor.h

Interfaces in Interfaces/ should not depend on any dialects, and this include
is unnecessary anyways.
---
 mlir/lib/Interfaces/TilingInterface.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlir/lib/Interfaces/TilingInterface.cpp b/mlir/lib/Interfaces/TilingInterface.cpp
index 6e252ca5418fc..67ddb5b1c1c50 100644
--- a/mlir/lib/Interfaces/TilingInterface.cpp
+++ b/mlir/lib/Interfaces/TilingInterface.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Interfaces/TilingInterface.h"
-#include "mlir/Dialect/Tensor/IR/Tensor.h"
 
 using namespace mlir;
 

From 1f3f90ab8869887561acadb2540836e7959419f5 Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue@google.com>
Date: Thu, 20 Jan 2022 13:51:04 -0500
Subject: [PATCH 086/946] [libc] Make log2f correctly rounded for all rounding
 modes when FMA is not available.

Add to log2f 2 more exceptional cases got when not using fma for polyeval.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D117812
---
 libc/src/math/generic/log2f.cpp   | 15 ++++++++++++++-
 libc/test/src/math/log2f_test.cpp |  8 ++++----
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/libc/src/math/generic/log2f.cpp b/libc/src/math/generic/log2f.cpp
index d2f40fe6763d0..dc5a6b670afd5 100644
--- a/libc/src/math/generic/log2f.cpp
+++ b/libc/src/math/generic/log2f.cpp
@@ -105,11 +105,24 @@ LLVM_LIBC_FUNCTION(float, log2f, (float x)) {
   int m = 0;
 
   // Hard to round value(s).
-  if (FPBits(x).uintval() == 0x3f81d0b5U) {
+  switch (FPBits(x).uintval()) {
+  case 0x3f81d0b5U: {
     int rounding_mode = fputil::get_round();
     if (rounding_mode == FE_DOWNWARD || rounding_mode == FE_TOWARDZERO) {
       return 0x1.4cdc4cp-6f;
     }
+    break;
+  }
+  case 0x3f7e3274U:
+    if (fputil::get_round() == FE_TONEAREST) {
+      return -0x1.4e1d16p-7f;
+    }
+    break;
+  case 0x3f7d57f5U:
+    if (fputil::get_round() == FE_TOWARDZERO) {
+      return -0x1.ed1c32p-7f;
+    }
+    break;
   }
 
   // Exceptional inputs.
diff --git a/libc/test/src/math/log2f_test.cpp b/libc/test/src/math/log2f_test.cpp
index 1708552f0c291..ecedfb68cc7a7 100644
--- a/libc/test/src/math/log2f_test.cpp
+++ b/libc/test/src/math/log2f_test.cpp
@@ -31,10 +31,10 @@ TEST(LlvmLibcLog2fTest, SpecialNumbers) {
 }
 
 TEST(LlvmLibcLog2fTest, TrickyInputs) {
-  constexpr int N = 9;
-  constexpr uint32_t INPUTS[N] = {0x3f7d57f5U, 0x3f7ed848U, 0x3f7fd6ccU,
-                                  0x3f7fffffU, 0x3f80079bU, 0x3f81d0b5U,
-                                  0x3f82e602U, 0x3f83c98dU, 0x3f8cba39U};
+  constexpr int N = 10;
+  constexpr uint32_t INPUTS[N] = {
+      0x3f7d57f5U, 0x3f7e3274U, 0x3f7ed848U, 0x3f7fd6ccU, 0x3f7fffffU,
+      0x3f80079bU, 0x3f81d0b5U, 0x3f82e602U, 0x3f83c98dU, 0x3f8cba39U};
 
   for (int i = 0; i < N; ++i) {
     float x = float(FPBits(INPUTS[i]));

From 8c9f62ea90c70d538766a81ef5980c9223b8566b Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Thu, 20 Jan 2022 19:04:15 +0000
Subject: [PATCH 087/946] [compiler-rt][cmake] Use HandleOutOfTreeLLVM like
 libcxx and friends

This gives us the option of using CMake modules from LLVM, and other
things. We will use that to deduplicate code later.

Reviewed By: phosek

Differential Revision: https://reviews.llvm.org/D117815
---
 compiler-rt/CMakeLists.txt | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 12946d74c797b..974e2333c7abd 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -5,13 +5,6 @@
 
 cmake_minimum_required(VERSION 3.13.4)
 
-# Check if compiler-rt is built as a standalone project.
-if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR COMPILER_RT_STANDALONE_BUILD)
-  project(CompilerRT C CXX ASM)
-  set(COMPILER_RT_STANDALONE_BUILD TRUE)
-  set_property(GLOBAL PROPERTY USE_FOLDERS ON)
-endif()
-
 set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake")
 
 # Add path for custom compiler-rt modules.
@@ -22,6 +15,16 @@ list(INSERT CMAKE_MODULE_PATH 0
   "${LLVM_COMMON_CMAKE_UTILS}/Modules"
   )
 
+# Check if compiler-rt is built as a standalone project.
+if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR COMPILER_RT_STANDALONE_BUILD)
+  project(CompilerRT C CXX ASM)
+  set(COMPILER_RT_STANDALONE_BUILD TRUE)
+  set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+
+  # Find the LLVM sources and simulate LLVM CMake options.
+  include(HandleOutOfTreeLLVM)
+endif()
+
 if(CMAKE_CONFIGURATION_TYPES)
   set(CMAKE_CFG_RESOLVED_INTDIR "${CMAKE_CFG_INTDIR}/")
 else()

From 4af11272f57a4a6fed2932e9e0857b2c1a707c51 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Tue, 18 Jan 2022 23:34:54 +0000
Subject: [PATCH 088/946] [cmake] Duplicate
 `{llvm,compiler_rt}_check_linker_flag` for runtime libs and llvm

We previously had a few varied definitions of this floating around. I made the one installed with LLVM handle all the cases, and then made the others use it.

This issue was reported to me in https://reviews.llvm.org/D116521#3248117 as
D116521 made clang and llvm use the common cmake utils.

Reviewed By: sebastian-ne, phosek, #libunwind, #libc, #libc_abi, ldionne

Differential Revision: https://reviews.llvm.org/D117537
---
 cmake/Modules/CheckLinkerFlag.cmake          | 17 -----------------
 compiler-rt/cmake/config-ix.cmake            | 20 +++++++-------------
 libcxx/cmake/config-ix.cmake                 |  4 ++--
 libunwind/cmake/config-ix.cmake              |  8 ++++----
 llvm/cmake/modules/LLVMCheckLinkerFlag.cmake | 12 ++++++++++--
 runtimes/CMakeLists.txt                      |  6 +++---
 6 files changed, 26 insertions(+), 41 deletions(-)
 delete mode 100644 cmake/Modules/CheckLinkerFlag.cmake

diff --git a/cmake/Modules/CheckLinkerFlag.cmake b/cmake/Modules/CheckLinkerFlag.cmake
deleted file mode 100644
index 722fe5b1b8ead..0000000000000
--- a/cmake/Modules/CheckLinkerFlag.cmake
+++ /dev/null
@@ -1,17 +0,0 @@
-include(CMakePushCheckState)
-include(CheckCCompilerFlag)
-
-function(llvm_check_linker_flag flag dest)
-  # If testing a flag with check_c_compiler_flag, it gets added to the compile
-  # command only, but not to the linker command in that test. If the flag
-  # is vital for linking to succeed, the test would fail even if it would
-  # have succeeded if it was included on both commands.
-  #
-  # Therefore, try adding the flag to CMAKE_REQUIRED_FLAGS, which gets
-  # added to both compiling and linking commands in the tests.
-
-  cmake_push_check_state()
-  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${flag}")
-  check_c_compiler_flag("" ${dest})
-  cmake_pop_check_state()
-endfunction()
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 33693ce60321d..596f61e8c82ec 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -1,4 +1,5 @@
 include(CMakePushCheckState)
+include(LLVMCheckLinkerFlag) # Compat until CMake 3.18
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(CheckIncludeFiles)
@@ -6,13 +7,6 @@ include(CheckLibraryExists)
 include(CheckSymbolExists)
 include(TestBigEndian)
 
-function(compiler_rt_check_linker_flag flag out_var)
-  cmake_push_check_state()
-  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${flag}")
-  check_cxx_compiler_flag("" ${out_var})
-  cmake_pop_check_state()
-endfunction()
-
 check_library_exists(c fopen "" COMPILER_RT_HAS_LIBC)
 if (COMPILER_RT_USE_BUILTINS_LIBRARY)
   include(HandleCompilerRT)
@@ -171,12 +165,12 @@ check_library_exists(c++ __cxa_throw "" COMPILER_RT_HAS_LIBCXX)
 check_library_exists(stdc++ __cxa_throw "" COMPILER_RT_HAS_LIBSTDCXX)
 
 # Linker flags.
-compiler_rt_check_linker_flag("-Wl,-z,text" COMPILER_RT_HAS_Z_TEXT)
-compiler_rt_check_linker_flag("-fuse-ld=lld" COMPILER_RT_HAS_FUSE_LD_LLD_FLAG)
+llvm_check_linker_flag(CXX "-Wl,-z,text" COMPILER_RT_HAS_Z_TEXT)
+llvm_check_linker_flag(CXX "-fuse-ld=lld" COMPILER_RT_HAS_FUSE_LD_LLD_FLAG)
 
 if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
   set(VERS_COMPAT_OPTION "-Wl,-z,gnu-version-script-compat")
-  compiler_rt_check_linker_flag("${VERS_COMPAT_OPTION}" COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT)
+  llvm_check_linker_flag(CXX "${VERS_COMPAT_OPTION}" COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT)
 endif()
 
 set(DUMMY_VERS ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/dummy.vers)
@@ -187,10 +181,10 @@ if(COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT)
   # -z gnu-version-script-compat.
   string(APPEND VERS_OPTION " ${VERS_COMPAT_OPTION}")
 endif()
-compiler_rt_check_linker_flag("${VERS_OPTION}" COMPILER_RT_HAS_VERSION_SCRIPT)
+llvm_check_linker_flag(CXX "${VERS_OPTION}" COMPILER_RT_HAS_VERSION_SCRIPT)
 
 if(ANDROID)
-  compiler_rt_check_linker_flag("-Wl,-z,global" COMPILER_RT_HAS_Z_GLOBAL)
+  llvm_check_linker_flag(CXX "-Wl,-z,global" COMPILER_RT_HAS_Z_GLOBAL)
   check_library_exists(log __android_log_write "" COMPILER_RT_HAS_LIBLOG)
 endif()
 
@@ -436,7 +430,7 @@ if(APPLE)
     -lc++
     -lc++abi)
 
-  compiler_rt_check_linker_flag("-fapplication-extension" COMPILER_RT_HAS_APP_EXTENSION)
+  llvm_check_linker_flag(CXX "-fapplication-extension" COMPILER_RT_HAS_APP_EXTENSION)
   if(COMPILER_RT_HAS_APP_EXTENSION)
     list(APPEND DARWIN_COMMON_LINK_FLAGS "-fapplication-extension")
   endif()
diff --git a/libcxx/cmake/config-ix.cmake b/libcxx/cmake/config-ix.cmake
index 689a9d09c0179..e114337f081a3 100644
--- a/libcxx/cmake/config-ix.cmake
+++ b/libcxx/cmake/config-ix.cmake
@@ -1,6 +1,6 @@
 include(CMakePushCheckState)
 include(CheckLibraryExists)
-include(CheckLinkerFlag)
+include(LLVMCheckLinkerFlag) # Compat until CMake 3.18
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(CheckCSourceCompiles)
@@ -12,7 +12,7 @@ include(CheckCSourceCompiles)
 # libunwind (and the compiler implicit -lunwind wouldn't succeed as the newly
 # built libunwind isn't installed yet). For those cases, it'd be good to
 # link with --uwnindlib=none. Check if that option works.
-llvm_check_linker_flag("--unwindlib=none" LIBCXX_SUPPORTS_UNWINDLIB_NONE_FLAG)
+llvm_check_linker_flag(C "--unwindlib=none" LIBCXX_SUPPORTS_UNWINDLIB_NONE_FLAG)
 if (LIBCXX_SUPPORTS_UNWINDLIB_NONE_FLAG)
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --unwindlib=none")
 endif()
diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake
index c814611d43785..c1e1b4631abfe 100644
--- a/libunwind/cmake/config-ix.cmake
+++ b/libunwind/cmake/config-ix.cmake
@@ -2,14 +2,14 @@ include(CMakePushCheckState)
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(CheckLibraryExists)
-include(CheckLinkerFlag)
+include(LLVMCheckLinkerFlag) # Compat until CMake 3.18
 include(CheckSymbolExists)
 include(CheckCSourceCompiles)
 
 # The compiler driver may be implicitly trying to link against libunwind, which
 # might not work if libunwind doesn't exist yet. Try to check if
 # --unwindlib=none is supported, and use that if possible.
-llvm_check_linker_flag("--unwindlib=none" LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG)
+llvm_check_linker_flag(C "--unwindlib=none" LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG)
 if (LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG)
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --unwindlib=none")
 endif()
@@ -34,11 +34,11 @@ endif()
 # required for the link to go through. We remove sanitizers from the
 # configuration checks to avoid spurious link errors.
 
-llvm_check_linker_flag(-nostdlib++ LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG)
+llvm_check_linker_flag(C "-nostdlib++" LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG)
 if (LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG)
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nostdlib++")
 else()
-  llvm_check_linker_flag(-nodefaultlibs LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG)
+  llvm_check_linker_flag(C "-nodefaultlibs" LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG)
   if (LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG)
     set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nodefaultlibs")
   endif()
diff --git a/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake b/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake
index 253dd768654a2..79c4e2cb4c2cd 100644
--- a/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake
+++ b/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake
@@ -5,14 +5,22 @@ if (COMMAND check_linker_flag)
     check_linker_flag(${ARGN})
   endmacro()
 else()
+  # Until the minimum CMAKE version is 3.18
+
   include(CheckCXXCompilerFlag)
   include(CMakePushCheckState)
 
-  # cmake builtin compatible, except we assume lang is CXX
+  # cmake builtin compatible, except we assume lang is C or CXX
   function(llvm_check_linker_flag lang flag out_var)
     cmake_push_check_state()
     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${flag}")
-    check_cxx_compiler_flag("" ${out_var})
+    if("${lang}" STREQUAL "C")
+      check_c_compiler_flag("" ${out_var})
+    elseif("${lang}" STREQUAL "CXX")
+      check_cxx_compiler_flag("" ${out_var})
+    else()
+      message(FATAL_ERROR "\"${lang}\" is not C or CXX")
+    endif()
     cmake_pop_check_state()
   endfunction()
 endif()
diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt
index cedce7b3541e5..1a50d9e8c98b3 100644
--- a/runtimes/CMakeLists.txt
+++ b/runtimes/CMakeLists.txt
@@ -88,7 +88,7 @@ set(LLVM_CMAKE_DIR ${LLVM_MAIN_SRC_DIR}/cmake/modules)
 set(LLVM_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../llvm)
 
 include(CheckLibraryExists)
-include(CheckLinkerFlag)
+include(LLVMCheckLinkerFlag) # Compat until CMake 3.18
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 
@@ -100,7 +100,7 @@ if (NOT LLVM_RUNTIMES_LINKING_WORKS)
   # --unwindlib=none is supported, and use that if possible.
   # Don't add this if not necessary to fix linking, as it can break using
   # e.g. ASAN/TSAN.
-  llvm_check_linker_flag("--unwindlib=none" LLVM_RUNTIMES_SUPPORT_UNWINDLIB_NONE_FLAG)
+  llvm_check_linker_flag(C "--unwindlib=none" LLVM_RUNTIMES_SUPPORT_UNWINDLIB_NONE_FLAG)
   if (LLVM_RUNTIMES_SUPPORT_UNWINDLIB_NONE_FLAG)
     set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --unwindlib=none")
   endif()
@@ -110,7 +110,7 @@ endif()
 # Check for -nostdlib++ first; if there's no C++ standard library yet,
 # all check_cxx_compiler_flag commands will fail until we add -nostdlib++
 # (or -nodefaultlibs).
-llvm_check_linker_flag(-nostdlib++ LLVM_RUNTIMES_SUPPORT_NOSTDLIBXX_FLAG)
+llvm_check_linker_flag(C "-nostdlib++" LLVM_RUNTIMES_SUPPORT_NOSTDLIBXX_FLAG)
 if (LLVM_RUNTIMES_SUPPORT_NOSTDLIBXX_FLAG)
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nostdlib++")
 endif()

From 36cb29cbbe1b22dcd298ad65e1fabe899b7d7249 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Thu, 20 Jan 2022 13:36:55 -0800
Subject: [PATCH 089/946] Work around a module build failure on the bots.

This patch works around what looks like a bug in Clang itself.

The error on the bot is:

https://green.lab.llvm.org/green/view/LLDB/job/lldb-cmake/40466/consoleText

In module 'LLVM_Utils' imported from /Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h:18:
/Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/Support/Error.h:720:3: error: 'llvm::Expected<bool>::(anonymous)' from module 'LLVM_Utils.Support.Error' is not present in definition of 'llvm::Expected<bool>' in module 'LLVM_Utils.Support.Error'
  union {
  ^
/Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/Support/Error.h:720:3: note: declaration of '' does not match
/Users/buildslave/jenkins/workspace/lldb-cmake/llvm-project/llvm/include/llvm/Support/Error.h:720:3: note: declaration of '' does not match
1 error generated.

The intention is to revert this as soon as a proper fix has been identified!

rdar://87845391
---
 lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h b/lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h
index 48f27b09b95c5..c99372fa110cd 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/lldb-python.h
@@ -9,6 +9,13 @@
 #ifndef LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_LLDB_PYTHON_H
 #define LLDB_PLUGINS_SCRIPTINTERPRETER_PYTHON_LLDB_PYTHON_H
 
+// BEGIN FIXME
+// This declaration works around a clang module build failure.
+// It should be deleted ASAP.
+#include "llvm/Support/Error.h"
+static llvm::Expected<bool> *g_fcxx_modules_workaround;
+// END
+
 #include "lldb/Host/Config.h"
 
 // Python.h needs to be included before any system headers in order to avoid

From 5a670f1378aef27e8fe21fdf4aa47eea0e97f7f5 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Thu, 20 Jan 2022 13:58:13 -0800
Subject: [PATCH 090/946] [SLP] Kill an unused param and use a for-loop in
 calculateDependencies [NFC]

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1b4e90e78d955..f0a031168f48e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -581,7 +581,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
 }
 
 /// \returns the AA location that is being access by the instruction.
-static MemoryLocation getLocation(Instruction *I, AAResults *AA) {
+static MemoryLocation getLocation(Instruction *I) {
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
     return MemoryLocation::get(SI);
   if (LoadInst *LI = dyn_cast<LoadInst>(I))
@@ -7551,12 +7551,12 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
       Instruction *SrcInst = BundleMember->Inst;
       assert(SrcInst->mayReadOrWriteMemory() &&
              "NextLoadStore list for non memory effecting bundle?");
-      MemoryLocation SrcLoc = getLocation(SrcInst, SLP->AA);
+      MemoryLocation SrcLoc = getLocation(SrcInst);
       bool SrcMayWrite = BundleMember->Inst->mayWriteToMemory();
       unsigned numAliased = 0;
       unsigned DistToSrc = 1;
 
-      while (DepDest) {
+      for ( ; DepDest; DepDest = DepDest->NextLoadStore) {
         assert(isInSchedulingRegion(DepDest));
 
         // We have two limits to reduce the complexity:
@@ -7586,7 +7586,6 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleData *SD,
             WorkList.push_back(DestBundle);
           }
         }
-        DepDest = DepDest->NextLoadStore;
 
         // Example, explaining the loop break condition: Let's assume our
         // starting instruction is i0 and MaxMemDepDistance = 3.

From 41ebd1968165d8f9646f3dc2bfbed834baaede8e Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Wed, 1 Dec 2021 13:44:42 -0800
Subject: [PATCH 091/946] [AMDGPU] Do not ignore exec use where exec is read as
 data

Compares, v_cndmask_b32, and v_readfirstlane_b32 use EXEC
in a way which modifies the result. This implicit EXEC use
shall not be ignored for the purposes of instruction moves.

Differential Revision: https://reviews.llvm.org/D117814
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  21 +-
 llvm/test/CodeGen/AMDGPU/licm-valu.mir        | 245 ++++++++++++++++++
 .../CodeGen/AMDGPU/mul24-pass-ordering.ll     |  20 +-
 3 files changed, 275 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/licm-valu.mir

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 82776a77c58da..3ac04ee717dea 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -130,10 +130,29 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   return false;
 }
 
+static bool readsExecAsData(const MachineInstr &MI) {
+  if (MI.isCompare())
+    return true;
+
+  switch (MI.getOpcode()) {
+  default:
+    break;
+  case AMDGPU::V_READFIRSTLANE_B32:
+  case AMDGPU::V_CNDMASK_B64_PSEUDO:
+  case AMDGPU::V_CNDMASK_B32_dpp:
+  case AMDGPU::V_CNDMASK_B32_e32:
+  case AMDGPU::V_CNDMASK_B32_e64:
+  case AMDGPU::V_CNDMASK_B32_sdwa:
+    return true;
+  }
+
+  return false;
+}
+
 bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const {
   // Any implicit use of exec by VALU is not a real register read.
   return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() &&
-         isVALU(*MO.getParent());
+         isVALU(*MO.getParent()) && !readsExecAsData(*MO.getParent());
 }
 
 bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
diff --git a/llvm/test/CodeGen/AMDGPU/licm-valu.mir b/llvm/test/CodeGen/AMDGPU/licm-valu.mir
new file mode 100644
index 0000000000000..45a050cffde9c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/licm-valu.mir
@@ -0,0 +1,245 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=machinelicm -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+
+---
+name: hoist_move
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: hoist_move
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    S_BRANCH %bb.1
+
+  bb.1:
+    %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name: no_hoist_cmp
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: no_hoist_cmp
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    S_BRANCH %bb.1
+
+  bb.1:
+    %0:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name: no_hoist_readfirstlane
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: no_hoist_readfirstlane
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[DEF]], implicit $exec
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    S_BRANCH %bb.1
+
+  bb.1:
+    %1:sgpr_32 = V_READFIRSTLANE_B32 %0:vgpr_32, implicit $exec
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name: no_hoist_cndmask_e64
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: no_hoist_cndmask_e64
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[DEF]], 0, [[DEF]], [[DEF1]], implicit $exec
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_64_xexec = IMPLICIT_DEF
+    S_BRANCH %bb.1
+
+  bb.1:
+    %2:vgpr_32 = V_CNDMASK_B32_e64 0, %0, 0, %0, %1, implicit $exec
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name: no_hoist_cndmask_e32
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: no_hoist_cndmask_e32
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 [[DEF]], [[DEF]], implicit undef $vcc, implicit $exec
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_64_xexec = IMPLICIT_DEF
+    S_BRANCH %bb.1
+
+  bb.1:
+    %2:vgpr_32 = V_CNDMASK_B32_e32 %0, %0, implicit undef $vcc, implicit $exec
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name: no_hoist_cndmask_dpp
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: no_hoist_cndmask_dpp
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_CNDMASK_B32_dpp:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp [[DEF]], 0, [[DEF]], 0, [[DEF]], 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_64_xexec = IMPLICIT_DEF
+    S_BRANCH %bb.1
+
+  bb.1:
+    %2:vgpr_32 = V_CNDMASK_B32_dpp %0:vgpr_32, 0, %0:vgpr_32, 0, %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
+---
+name: no_hoist_cndmask_sdwa
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: no_hoist_cndmask_sdwa
+  ; GCN: bb.0:
+  ; GCN-NEXT:   successors: %bb.1(0x80000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+  ; GCN-NEXT:   S_BRANCH %bb.1
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.1:
+  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT:   [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, [[DEF]], 0, [[DEF]], 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
+  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN-NEXT:   S_BRANCH %bb.2
+  ; GCN-NEXT: {{  $}}
+  ; GCN-NEXT: bb.2:
+  ; GCN-NEXT:   S_ENDPGM 0
+  bb.0:
+    %0:vgpr_32 = IMPLICIT_DEF
+    %1:sreg_64_xexec = IMPLICIT_DEF
+    S_BRANCH %bb.1
+
+  bb.1:
+    %2:vgpr_32 = V_CNDMASK_B32_sdwa 0, %0:vgpr_32, 0, %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
+    $exec = S_OR_B64 $exec, 1, implicit-def $scc
+    S_CBRANCH_EXECNZ %bb.1, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.2:
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
index d1970a735a17a..2fac70d96bc31 100644
--- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@@ -54,17 +54,17 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
 ; GFX9-LABEL: lsr_order_mul24_1:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_cmp_lt_u32_e32 vcc, v0, v1
-; GFX9-NEXT:    s_and_saveexec_b64 s[8:9], vcc
+; GFX9-NEXT:    v_and_b32_e32 v5, 1, v18
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v5
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[4:5], v0, v1
+; GFX9-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
 ; GFX9-NEXT:    s_cbranch_execz .LBB1_3
 ; GFX9-NEXT:  ; %bb.1: ; %bb19
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v7, v6
-; GFX9-NEXT:    v_and_b32_e32 v5, 0xffffff, v6
-; GFX9-NEXT:    v_and_b32_e32 v8, 1, v18
 ; GFX9-NEXT:    v_add_u32_e32 v4, v4, v0
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v7
-; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v8
-; GFX9-NEXT:    v_lshl_add_u32 v7, v4, 2, v3
+; GFX9-NEXT:    v_and_b32_e32 v5, 0xffffff, v6
+; GFX9-NEXT:    v_lshl_add_u32 v6, v4, 2, v3
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v7, v7
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v8, 2, v2
 ; GFX9-NEXT:    v_add_u32_e32 v9, v17, v12
 ; GFX9-NEXT:    s_mov_b64 s[10:11], 0
@@ -76,7 +76,7 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
 ; GFX9-NEXT:    v_add_u32_e32 v12, v17, v0
 ; GFX9-NEXT:    v_add_u32_e32 v19, v9, v0
 ; GFX9-NEXT:    v_add_u32_e32 v0, v0, v2
-; GFX9-NEXT:    v_madak_f32 v3, v3, v6, 0x3727c5ac
+; GFX9-NEXT:    v_madak_f32 v3, v3, v7, 0x3727c5ac
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GFX9-NEXT:    v_mul_u32_u24_e32 v18, v3, v5
 ; GFX9-NEXT:    v_add_u32_e32 v3, v3, v16
@@ -97,8 +97,8 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
 ; GFX9-NEXT:    s_or_b64 s[10:11], s[6:7], s[10:11]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[4:5]
-; GFX9-NEXT:    ds_write_b32 v7, v3
-; GFX9-NEXT:    v_add_u32_e32 v7, v7, v8
+; GFX9-NEXT:    ds_write_b32 v6, v3
+; GFX9-NEXT:    v_add_u32_e32 v6, v6, v8
 ; GFX9-NEXT:    s_andn2_b64 exec, exec, s[10:11]
 ; GFX9-NEXT:    s_cbranch_execnz .LBB1_2
 ; GFX9-NEXT:  .LBB1_3: ; %Flow3

From c0906f6b21a1f64f493c9180622d5978d05b17c2 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Thu, 20 Jan 2022 14:07:46 -0800
Subject: [PATCH 092/946] [SLP] Remove stray semicolon to make bots happy

Certain bots (e.g. sanitizer-x86_64-linux-android) appear to be running with strict c++98 flags which disallow ; at global scope.
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f0a031168f48e..475ab7e1f495e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7256,7 +7256,7 @@ BoUpSLP::BlockScheduling::buildBundle(ArrayRef<Value *> VL) {
   }
   assert(Bundle && "Failed to find schedule bundle");
   return Bundle;
-};
+}
 
 // Groups the instructions to a bundle (which is then a single scheduling entity)
 // and schedules instructions until the bundle gets ready.

From 1d4ca42b43805f7199b921f35c1257b2cbad72c9 Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project@meinersbur.de>
Date: Thu, 20 Jan 2022 10:42:17 -0600
Subject: [PATCH 093/946] [OpenMPIRBuilder] Detect ambiguous InsertPoints for
 apply*WorkshareLoop. NFC.

Follow-up on D117226 for applyStaticWorkshareLoop and
applyDynamicWorkshareLoop checking for conflicting InertPoints via an
assert. There is no in-tree code that violates this assertion, hence
nothing changes.
---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 0bf0c832d5ddf..177ae9a47db75 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1516,6 +1516,8 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
                                           InsertPointTy AllocaIP,
                                           bool NeedsBarrier, Value *Chunk) {
   assert(CLI->isValid() && "Requires a valid canonical loop");
+  assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
+         "Require dedicated allocate IP");
 
   // Set up the source location value for OpenMP runtime.
   Builder.restoreIP(CLI->getPreheaderIP());
@@ -1646,6 +1648,8 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
     DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
     OMPScheduleType SchedType, bool NeedsBarrier, Value *Chunk) {
   assert(CLI->isValid() && "Requires a valid canonical loop");
+  assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
+         "Require dedicated allocate IP");
 
   // Set up the source location value for OpenMP runtime.
   Builder.SetCurrentDebugLocation(DL);

From 39f779afb35420c51e2c7094b2643a53baed9f59 Mon Sep 17 00:00:00 2001
From: Nathan James <n.james93@hotmail.co.uk>
Date: Thu, 20 Jan 2022 22:20:10 +0000
Subject: [PATCH 094/946] [clang-tidy][NFC] Remove redundant string creation
 for comparison

---
 .../clang-tidy/utils/IncludeSorter.cpp          | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp
index fbc1dc6d52a0b..8d620ca3af681 100644
--- a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp
+++ b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp
@@ -84,15 +84,14 @@ determineIncludeKind(StringRef CanonicalFile, StringRef IncludeFile,
   if ((Style == IncludeSorter::IS_Google) ||
       (Style == IncludeSorter::IS_Google_ObjC)) {
     std::pair<StringRef, StringRef> Parts = CanonicalInclude.split("/public/");
-    std::string AltCanonicalInclude =
-        Parts.first.str() + "/internal/" + Parts.second.str();
-    std::string ProtoCanonicalInclude =
-        Parts.first.str() + "/proto/" + Parts.second.str();
-
-    // Determine the kind of this inclusion.
-    if (CanonicalFile.equals(AltCanonicalInclude) ||
-        CanonicalFile.equals(ProtoCanonicalInclude)) {
-      return IncludeSorter::IK_MainTUInclude;
+    StringRef FileCopy = CanonicalFile;
+    if (FileCopy.consume_front(Parts.first) &&
+        FileCopy.consume_back(Parts.second)) {
+      // Determine the kind of this inclusion.
+      if (FileCopy.equals("/internal/") ||
+          FileCopy.equals("/proto/")) {
+        return IncludeSorter::IK_MainTUInclude;
+      }
     }
   }
   if (Style == IncludeSorter::IS_Google_ObjC) {

From c95afac89e000b65edc51dc7d05610250a62c86f Mon Sep 17 00:00:00 2001
From: owenca <owenpiano@gmail.com>
Date: Thu, 20 Jan 2022 01:59:52 -0800
Subject: [PATCH 095/946] [clang-format][NFC] Clean up tryMergeLessLess()

Differential Revision: https://reviews.llvm.org/D117759
---
 clang/lib/Format/FormatTokenLexer.cpp | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp
index e8b9b3d61c888..c9166f4b17aab 100644
--- a/clang/lib/Format/FormatTokenLexer.cpp
+++ b/clang/lib/Format/FormatTokenLexer.cpp
@@ -430,25 +430,22 @@ bool FormatTokenLexer::tryMergeLessLess() {
     return false;
 
   auto First = Tokens.end() - 3;
-  bool FourthTokenIsLess = false;
-
-  if (Tokens.size() > 3) {
-    auto Fourth = (Tokens.end() - 4)[0];
-    FourthTokenIsLess = Fourth->is(tok::less);
-
-    // Do not remove a whitespace between the two "<" e.g. "operator< <>".
-    if (First[2]->is(tok::greater) && Fourth->is(tok::kw_operator))
-      return false;
-  }
-
-  if (First[2]->is(tok::less) || First[1]->isNot(tok::less) ||
-      First[0]->isNot(tok::less) || FourthTokenIsLess)
+  if (First[0]->isNot(tok::less) || First[1]->isNot(tok::less))
     return false;
 
   // Only merge if there currently is no whitespace between the two "<".
   if (First[1]->hasWhitespaceBefore())
     return false;
 
+  auto X = Tokens.size() > 3 ? First[-1] : nullptr;
+  auto Y = First[2];
+  if ((X && X->is(tok::less)) || Y->is(tok::less))
+    return false;
+
+  // Do not remove a whitespace between the two "<" e.g. "operator< <>".
+  if (X && X->is(tok::kw_operator) && Y->is(tok::greater))
+    return false;
+
   First[0]->Tok.setKind(tok::lessless);
   First[0]->TokenText = "<<";
   First[0]->ColumnWidth += 1;

From cd2d7369639e70df17e7977ac3a4a5b7854043fa Mon Sep 17 00:00:00 2001
From: Rob Suderman <rob.suderman@gmail.com>
Date: Thu, 20 Jan 2022 14:32:19 -0800
Subject: [PATCH 096/946] [mlir][tosa] Limit right-shift to 31 bits

Right shift can occur that is a 32-bit right shift. This is undefined behavior.

Reviewed By: mehdi_amini

Differential Revision: https://reviews.llvm.org/D117732
---
 mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp b/mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp
index ff82acec21e97..3fa471c37f312 100644
--- a/mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp
+++ b/mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp
@@ -46,8 +46,8 @@ static void computeMultiplierAndShiftTosaScale16(double scale,
 
   // Shifting tops out at 63 bits. Right shift to make 63 bits the max.
   if (shift > 63) {
-    // Shifting the multiplier by more than 32-bits is unnecessary.
-    multiplier = multiplier >> std::min<int32_t>(32, shift - 63);
+    // Shifting the multiplier by more than 31-bits is unnecessary.
+    multiplier = multiplier >> std::min<int32_t>(31, shift - 63);
     shift = 63;
   }
 }
@@ -82,7 +82,7 @@ static void computeMultiplierAndShiftTosaScale32(double scale,
   // Shifting tops out at 63 bits. Right shift to make 63 bits the max.
   if (shift > 63) {
     // Shifting the multiplier by more than 32-bits is unnecessary.
-    multiplier = multiplier >> std::min<int32_t>(32, shift - 63);
+    multiplier = multiplier >> std::min<int32_t>(31, shift - 63);
     shift = 63;
   }
 }

From fa8bb224661dfb38cb2a246f7d98dc61fd45602e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 20 Jan 2022 14:16:37 -0800
Subject: [PATCH 097/946] [RISCV] Optimize vector_shuffles that are
 interleaving the lowest elements of two vectors.

RISCV only has a unary shuffle that requires places indices in a
register. For interleaving two vectors this means we need at least
two vrgathers and a vmerge to do a shuffle of two vectors.

This patch teaches shuffle lowering to use a widening addu followed
by a widening vmaccu to implement the interleave. First we extract
the low half of both V1 and V2. Then we implement
(zext(V1) + zext(V2)) + (zext(V2) * zext(2^eltbits - 1)) which
simplifies to (zext(V1) + zext(V2) * 2^eltbits). This further
simplifies to (zext(V1) + zext(V2) << eltbits). Then we bitcast the
result back to the original type splitting the wide elements in half.

We can only do this if we have a type with wider elements available.
Because we're using extends we also have to be careful with fractional
lmuls. Floating point types are supported by bitcasting to/from integer.

The tests test a varied combination of LMULs split across VLEN>=128 and
VLEN>=512 tests. There a few tests with shuffle indices commuted as well
as tests for undef indices. There's one test for a vXi64/vXf64 vector which
we can't optimize, but verifies we don't crash.

Reviewed By: rogfer01

Differential Revision: https://reviews.llvm.org/D117743
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 122 ++++-
 llvm/lib/Target/RISCV/RISCVISelLowering.h     |   1 +
 .../Target/RISCV/RISCVInstrInfoVVLPatterns.td |  10 +-
 .../RISCV/rvv/fixed-vectors-fp-interleave.ll  | 378 ++++++++++++++
 .../RISCV/rvv/fixed-vectors-int-interleave.ll | 484 ++++++++++++++++++
 .../RISCV/rvv/fixed-vectors-int-shuffles.ll   |  15 +-
 6 files changed, 995 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f942f395d5328..507a21b16e4eb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2328,6 +2328,48 @@ static int matchShuffleAsSlideDown(ArrayRef<int> Mask) {
   return -1;
 }
 
+static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
+                                const RISCVSubtarget &Subtarget) {
+  // We need to be able to widen elements to the next larger integer type.
+  if (VT.getScalarSizeInBits() >= Subtarget.getMaxELENForFixedLengthVectors())
+    return false;
+
+  int Size = Mask.size();
+  assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+  int Srcs[] = {-1, -1};
+  for (int i = 0; i != Size; ++i) {
+    // Ignore undef elements.
+    if (Mask[i] < 0)
+      continue;
+
+    // Is this an even or odd element.
+    int Pol = i % 2;
+
+    // Ensure we consistently use the same source for this element polarity.
+    int Src = Mask[i] / Size;
+    if (Srcs[Pol] < 0)
+      Srcs[Pol] = Src;
+    if (Srcs[Pol] != Src)
+      return false;
+
+    // Make sure the element within the source is appropriate for this element
+    // in the destination.
+    int Elt = Mask[i] % Size;
+    if (Elt != i / 2)
+      return false;
+  }
+
+  // We need to find a source for each polarity and they can't be the same.
+  if (Srcs[0] < 0 || Srcs[1] < 0 || Srcs[0] == Srcs[1])
+    return false;
+
+  // Swap the sources if the second source was in the even polarity.
+  SwapSources = Srcs[0] > Srcs[1];
+
+  return true;
+}
+
 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
                                    const RISCVSubtarget &Subtarget) {
   SDValue V1 = Op.getOperand(0);
@@ -2413,8 +2455,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
     }
   }
 
+  ArrayRef<int> Mask = SVN->getMask();
+
   // Try to match as a slidedown.
-  int SlideAmt = matchShuffleAsSlideDown(SVN->getMask());
+  int SlideAmt = matchShuffleAsSlideDown(Mask);
   if (SlideAmt >= 0) {
     // TODO: Should we reduce the VL to account for the upper undef elements?
     // Requires additional vsetvlis, but might be faster to execute.
@@ -2427,10 +2471,81 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
     return convertFromScalableVector(VT, SlideDown, DAG, Subtarget);
   }
 
+  // Detect an interleave shuffle and lower to
+  // (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
+  bool SwapSources;
+  if (isInterleaveShuffle(Mask, VT, SwapSources, Subtarget)) {
+    // Swap sources if needed.
+    if (SwapSources)
+      std::swap(V1, V2);
+
+    // Extract the lower half of the vectors.
+    MVT HalfVT = VT.getHalfNumVectorElementsVT();
+    V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1,
+                     DAG.getConstant(0, DL, XLenVT));
+    V2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V2,
+                     DAG.getConstant(0, DL, XLenVT));
+
+    // Double the element width and halve the number of elements in an int type.
+    unsigned EltBits = VT.getScalarSizeInBits();
+    MVT WideIntEltVT = MVT::getIntegerVT(EltBits * 2);
+    MVT WideIntVT =
+        MVT::getVectorVT(WideIntEltVT, VT.getVectorNumElements() / 2);
+    // Convert this to a scalable vector. We need to base this on the
+    // destination size to ensure there's always a type with a smaller LMUL.
+    MVT WideIntContainerVT =
+        getContainerForFixedLengthVector(DAG, WideIntVT, Subtarget);
+
+    // Convert sources to scalable vectors with the same element count as the
+    // larger type.
+    MVT HalfContainerVT = MVT::getVectorVT(
+        VT.getVectorElementType(), WideIntContainerVT.getVectorElementCount());
+    V1 = convertToScalableVector(HalfContainerVT, V1, DAG, Subtarget);
+    V2 = convertToScalableVector(HalfContainerVT, V2, DAG, Subtarget);
+
+    // Cast sources to integer.
+    MVT IntEltVT = MVT::getIntegerVT(EltBits);
+    MVT IntHalfVT =
+        MVT::getVectorVT(IntEltVT, HalfContainerVT.getVectorElementCount());
+    V1 = DAG.getBitcast(IntHalfVT, V1);
+    V2 = DAG.getBitcast(IntHalfVT, V2);
+
+    // Freeze V2 since we use it twice and we need to be sure that the add and
+    // multiply see the same value.
+    V2 = DAG.getNode(ISD::FREEZE, DL, IntHalfVT, V2);
+
+    // Recreate TrueMask using the widened type's element count.
+    MVT MaskVT =
+        MVT::getVectorVT(MVT::i1, HalfContainerVT.getVectorElementCount());
+    TrueMask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
+
+    // Widen V1 and V2 with 0s and add one copy of V2 to V1.
+    SDValue Add = DAG.getNode(RISCVISD::VWADDU_VL, DL, WideIntContainerVT, V1,
+                              V2, TrueMask, VL);
+    // Create 2^eltbits - 1 copies of V2 by multiplying by the largest integer.
+    SDValue Multiplier = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, IntHalfVT,
+                                     DAG.getAllOnesConstant(DL, XLenVT));
+    SDValue WidenMul = DAG.getNode(RISCVISD::VWMULU_VL, DL, WideIntContainerVT,
+                                   V2, Multiplier, TrueMask, VL);
+    // Add the new copies to our previous addition giving us 2^eltbits copies of
+    // V2. This is equivalent to shifting V2 left by eltbits. This should
+    // combine with the vwmulu.vv above to form vwmaccu.vv.
+    Add = DAG.getNode(RISCVISD::ADD_VL, DL, WideIntContainerVT, Add, WidenMul,
+                      TrueMask, VL);
+    // Cast back to ContainerVT. We need to re-create a new ContainerVT in case
+    // WideIntContainerVT is a larger fractional LMUL than implied by the fixed
+    // vector VT.
+    ContainerVT =
+        MVT::getVectorVT(VT.getVectorElementType(),
+                         WideIntContainerVT.getVectorElementCount() * 2);
+    Add = DAG.getBitcast(ContainerVT, Add);
+    return convertFromScalableVector(VT, Add, DAG, Subtarget);
+  }
+
   // Detect shuffles which can be re-expressed as vector selects; these are
   // shuffles in which each element in the destination is taken from an element
   // at the corresponding index in either source vectors.
-  bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
+  bool IsSelect = all_of(enumerate(Mask), [&](const auto &MaskIdx) {
     int MaskIndex = MaskIdx.value();
     return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
   });
@@ -2456,7 +2571,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
   // Now construct the mask that will be used by the vselect or blended
   // vrgather operation. For vrgathers, construct the appropriate indices into
   // each vector.
-  for (int MaskIndex : SVN->getMask()) {
+  for (int MaskIndex : Mask) {
     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
     if (!IsSelect) {
@@ -9941,6 +10056,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(FP_ROUND_VL)
   NODE_NAME_CASE(VWMUL_VL)
   NODE_NAME_CASE(VWMULU_VL)
+  NODE_NAME_CASE(VWADDU_VL)
   NODE_NAME_CASE(SETCC_VL)
   NODE_NAME_CASE(VSELECT_VL)
   NODE_NAME_CASE(VMAND_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 76b778831fae0..23857f93e0159 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -245,6 +245,7 @@ enum NodeType : unsigned {
   // Widening instructions
   VWMUL_VL,
   VWMULU_VL,
+  VWADDU_VL,
 
   // Vector compare producing a mask. Fourth operand is input mask. Fifth
   // operand is VL.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index f646a4605e339..9745c13863823 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -221,14 +221,15 @@ def riscv_trunc_vector_vl : SDNode<"RISCVISD::TRUNCATE_VECTOR_VL",
                                                         SDTCVecEltisVT<2, i1>,
                                                         SDTCisVT<3, XLenVT>]>>;
 
-def SDT_RISCVVWMUL_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
+def SDT_RISCVVWBinOp_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
                                              SDTCisSameNumEltsAs<0, 1>,
                                              SDTCisSameAs<1, 2>,
                                              SDTCisSameNumEltsAs<1, 3>,
                                              SDTCVecEltisVT<3, i1>,
                                              SDTCisVT<4, XLenVT>]>;
-def riscv_vwmul_vl  : SDNode<"RISCVISD::VWMUL_VL",  SDT_RISCVVWMUL_VL, [SDNPCommutative]>;
-def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWMUL_VL, [SDNPCommutative]>;
+def riscv_vwmul_vl  : SDNode<"RISCVISD::VWMUL_VL",  SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
+def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
 
 def SDTRVVVecReduce : SDTypeProfile<1, 5, [
   SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisSameAs<0, 3>,
@@ -712,6 +713,9 @@ foreach vti = AllIntegerVectors in {
                  (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
 }
 
+// 12.2. Vector Widening Integer Add/Subtract
+defm : VPatBinaryWVL_VV_VX<riscv_vwaddu_vl, "PseudoVWADDU">;
+
 // 12.3. Vector Integer Extension
 defm : VPatExtendSDNode_V_VL<riscv_zext_vl, "PseudoVZEXT", "VF2",
                              AllFractionableVF2IntVectors>;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
new file mode 100644
index 0000000000000..3f57fcea6acc5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -0,0 +1,378 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512
+
+; Test optimizing interleaves to widening arithmetic.
+
+define <4 x half> @interleave_v2f16(<2 x half> %x, <2 x half> %y) {
+; CHECK-LABEL: interleave_v2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf4, ta, mu
+; CHECK-NEXT:    vwaddu.vv v10, v8, v9
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+  %a = shufflevector <2 x half> %x, <2 x half> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x half> %a
+}
+
+; Vector order switched for coverage.
+define <4 x float> @interleave_v2f32(<2 x float> %x, <2 x float> %y) {
+; CHECK-LABEL: interleave_v2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, mf2, ta, mu
+; CHECK-NEXT:    vwaddu.vv v10, v9, v8
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v10, a0, v8
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+  %a = shufflevector <2 x float> %x, <2 x float> %y, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
+  ret <4 x float> %a
+}
+
+; One vXf64 test case to very that we don't optimize it.
+; FIXME: Is there better codegen we can do here?
+define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) {
+; RV32-V128-LABEL: interleave_v2f64:
+; RV32-V128:       # %bb.0:
+; RV32-V128-NEXT:    vmv1r.v v12, v9
+; RV32-V128-NEXT:    # kill: def $v8 killed $v8 def $v8m2
+; RV32-V128-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
+; RV32-V128-NEXT:    vid.v v10
+; RV32-V128-NEXT:    vsrl.vi v14, v10, 1
+; RV32-V128-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-V128-NEXT:    vrgatherei16.vv v10, v8, v14
+; RV32-V128-NEXT:    li a0, 10
+; RV32-V128-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV32-V128-NEXT:    vmv.s.x v0, a0
+; RV32-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV32-V128-NEXT:    vrgatherei16.vv v10, v12, v14, v0.t
+; RV32-V128-NEXT:    vmv.v.v v8, v10
+; RV32-V128-NEXT:    ret
+;
+; RV64-V128-LABEL: interleave_v2f64:
+; RV64-V128:       # %bb.0:
+; RV64-V128-NEXT:    vmv1r.v v12, v9
+; RV64-V128-NEXT:    # kill: def $v8 killed $v8 def $v8m2
+; RV64-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV64-V128-NEXT:    vid.v v10
+; RV64-V128-NEXT:    vsrl.vi v14, v10, 1
+; RV64-V128-NEXT:    vrgather.vv v10, v8, v14
+; RV64-V128-NEXT:    li a0, 10
+; RV64-V128-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV64-V128-NEXT:    vmv.s.x v0, a0
+; RV64-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV64-V128-NEXT:    vrgather.vv v10, v12, v14, v0.t
+; RV64-V128-NEXT:    vmv.v.v v8, v10
+; RV64-V128-NEXT:    ret
+;
+; RV32-V512-LABEL: interleave_v2f64:
+; RV32-V512:       # %bb.0:
+; RV32-V512-NEXT:    vsetivli zero, 4, e16, mf4, ta, mu
+; RV32-V512-NEXT:    vid.v v10
+; RV32-V512-NEXT:    vsrl.vi v11, v10, 1
+; RV32-V512-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-V512-NEXT:    vrgatherei16.vv v10, v8, v11
+; RV32-V512-NEXT:    li a0, 10
+; RV32-V512-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV32-V512-NEXT:    vmv.s.x v0, a0
+; RV32-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; RV32-V512-NEXT:    vrgatherei16.vv v10, v9, v11, v0.t
+; RV32-V512-NEXT:    vmv.v.v v8, v10
+; RV32-V512-NEXT:    ret
+;
+; RV64-V512-LABEL: interleave_v2f64:
+; RV64-V512:       # %bb.0:
+; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; RV64-V512-NEXT:    vid.v v10
+; RV64-V512-NEXT:    vsrl.vi v11, v10, 1
+; RV64-V512-NEXT:    vrgather.vv v10, v8, v11
+; RV64-V512-NEXT:    li a0, 10
+; RV64-V512-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV64-V512-NEXT:    vmv.s.x v0, a0
+; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; RV64-V512-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; RV64-V512-NEXT:    vmv.v.v v8, v10
+; RV64-V512-NEXT:    ret
+  %a = shufflevector <2 x double> %x, <2 x double> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x double> %a
+}
+
+; Undef elements for coverage
+define <8 x half> @interleave_v4f16(<4 x half> %x, <4 x half> %y) {
+; V128-LABEL: interleave_v4f16:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v8, v9
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v9
+; V128-NEXT:    vmv1r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v4f16:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 8, e16, mf4, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <4 x half> %x, <4 x half> %y, <8 x i32> <i32 0, i32 4, i32 undef, i32 5, i32 2, i32 undef, i32 3, i32 7>
+  ret <8 x half> %a
+}
+
+define <8 x float> @interleave_v4f32(<4 x float> %x, <4 x float> %y) {
+; V128-LABEL: interleave_v4f32:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 8, e32, m1, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v8, v9
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v9
+; V128-NEXT:    vmv2r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v4f32:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 8, e32, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <4 x float> %x, <4 x float> %y, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+  ret <8 x float> %a
+}
+
+; Vector order switched for coverage.
+define <16 x half> @interleave_v8f16(<8 x half> %x, <8 x half> %y) {
+; V128-LABEL: interleave_v8f16:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 16, e16, m1, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v9, v8
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v8
+; V128-NEXT:    vmv2r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v8f16:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 16, e16, mf4, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v9, v8
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v8
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <8 x half> %x, <8 x half> %y, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
+  ret <16 x half> %a
+}
+
+define <16 x float> @interleave_v8f32(<8 x float> %x, <8 x float> %y) {
+; V128-LABEL: interleave_v8f32:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 16, e32, m2, ta, mu
+; V128-NEXT:    vwaddu.vv v12, v8, v10
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v12, a0, v10
+; V128-NEXT:    vmv4r.v v8, v12
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v8f32:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 16, e32, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <8 x float> %x, <8 x float> %y, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <16 x float> %a
+}
+
+define <32 x half> @interleave_v16f16(<16 x half> %x, <16 x half> %y) {
+; V128-LABEL: interleave_v16f16:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 32
+; V128-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; V128-NEXT:    vwaddu.vv v12, v8, v10
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v12, a0, v10
+; V128-NEXT:    vmv4r.v v8, v12
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v16f16:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 32
+; V512-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <16 x half> %x, <16 x half> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <32 x half> %a
+}
+
+define <32 x float> @interleave_v16f32(<16 x float> %x, <16 x float> %y) {
+; V128-LABEL: interleave_v16f32:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 32
+; V128-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; V128-NEXT:    vwaddu.vv v16, v8, v12
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v16, a0, v12
+; V128-NEXT:    vmv8r.v v8, v16
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v16f32:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 32
+; V512-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv2r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <16 x float> %x, <16 x float> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <32 x float> %a
+}
+
+define <64 x half> @interleave_v32f16(<32 x half> %x, <32 x half> %y) {
+; V128-LABEL: interleave_v32f16:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 64
+; V128-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; V128-NEXT:    vwaddu.vv v16, v8, v12
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v16, a0, v12
+; V128-NEXT:    vmv8r.v v8, v16
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v32f16:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 64
+; V512-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv2r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <32 x half> %x, <32 x half> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <64 x half> %a
+}
+
+define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
+; RV32-V128-LABEL: interleave_v32f32:
+; RV32-V128:       # %bb.0:
+; RV32-V128-NEXT:    addi sp, sp, -16
+; RV32-V128-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 4
+; RV32-V128-NEXT:    sub sp, sp, a0
+; RV32-V128-NEXT:    lui a0, %hi(.LCPI10_0)
+; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI10_0)
+; RV32-V128-NEXT:    li a1, 32
+; RV32-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV32-V128-NEXT:    vle32.v v0, (a0)
+; RV32-V128-NEXT:    vmv8r.v v24, v8
+; RV32-V128-NEXT:    addi a0, sp, 16
+; RV32-V128-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-V128-NEXT:    vrgather.vv v8, v24, v0
+; RV32-V128-NEXT:    lui a0, %hi(.LCPI10_1)
+; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI10_1)
+; RV32-V128-NEXT:    vle32.v v24, (a0)
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 3
+; RV32-V128-NEXT:    add a0, sp, a0
+; RV32-V128-NEXT:    addi a0, a0, 16
+; RV32-V128-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; RV32-V128-NEXT:    lui a0, 699051
+; RV32-V128-NEXT:    addi a0, a0, -1366
+; RV32-V128-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
+; RV32-V128-NEXT:    vmv.s.x v0, a0
+; RV32-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 3
+; RV32-V128-NEXT:    add a0, sp, a0
+; RV32-V128-NEXT:    addi a0, a0, 16
+; RV32-V128-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV32-V128-NEXT:    vrgather.vv v8, v16, v24, v0.t
+; RV32-V128-NEXT:    vmv.v.v v24, v8
+; RV32-V128-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; RV32-V128-NEXT:    addi a0, sp, 16
+; RV32-V128-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; RV32-V128-NEXT:    vwaddu.vv v0, v8, v16
+; RV32-V128-NEXT:    li a0, -1
+; RV32-V128-NEXT:    vwmaccu.vx v0, a0, v16
+; RV32-V128-NEXT:    vmv8r.v v8, v0
+; RV32-V128-NEXT:    vmv8r.v v16, v24
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 4
+; RV32-V128-NEXT:    add sp, sp, a0
+; RV32-V128-NEXT:    addi sp, sp, 16
+; RV32-V128-NEXT:    ret
+;
+; RV64-V128-LABEL: interleave_v32f32:
+; RV64-V128:       # %bb.0:
+; RV64-V128-NEXT:    addi sp, sp, -16
+; RV64-V128-NEXT:    .cfi_def_cfa_offset 16
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 4
+; RV64-V128-NEXT:    sub sp, sp, a0
+; RV64-V128-NEXT:    lui a0, %hi(.LCPI10_0)
+; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI10_0)
+; RV64-V128-NEXT:    li a1, 32
+; RV64-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV64-V128-NEXT:    vle32.v v0, (a0)
+; RV64-V128-NEXT:    vmv8r.v v24, v8
+; RV64-V128-NEXT:    addi a0, sp, 16
+; RV64-V128-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-V128-NEXT:    vrgather.vv v8, v24, v0
+; RV64-V128-NEXT:    lui a0, %hi(.LCPI10_1)
+; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI10_1)
+; RV64-V128-NEXT:    vle32.v v24, (a0)
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 3
+; RV64-V128-NEXT:    add a0, sp, a0
+; RV64-V128-NEXT:    addi a0, a0, 16
+; RV64-V128-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; RV64-V128-NEXT:    lui a0, 699051
+; RV64-V128-NEXT:    addiw a0, a0, -1366
+; RV64-V128-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
+; RV64-V128-NEXT:    vmv.s.x v0, a0
+; RV64-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 3
+; RV64-V128-NEXT:    add a0, sp, a0
+; RV64-V128-NEXT:    addi a0, a0, 16
+; RV64-V128-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV64-V128-NEXT:    vrgather.vv v8, v16, v24, v0.t
+; RV64-V128-NEXT:    vmv.v.v v24, v8
+; RV64-V128-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; RV64-V128-NEXT:    addi a0, sp, 16
+; RV64-V128-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; RV64-V128-NEXT:    vwaddu.vv v0, v8, v16
+; RV64-V128-NEXT:    li a0, -1
+; RV64-V128-NEXT:    vwmaccu.vx v0, a0, v16
+; RV64-V128-NEXT:    vmv8r.v v8, v0
+; RV64-V128-NEXT:    vmv8r.v v16, v24
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 4
+; RV64-V128-NEXT:    add sp, sp, a0
+; RV64-V128-NEXT:    addi sp, sp, 16
+; RV64-V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v32f32:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 64
+; V512-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; V512-NEXT:    vwaddu.vv v12, v8, v10
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v12, a0, v10
+; V512-NEXT:    vmv4r.v v8, v12
+; V512-NEXT:    ret
+  %a = shufflevector <32 x float> %x, <32 x float> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <64 x float> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
new file mode 100644
index 0000000000000..a17a83169373b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -0,0 +1,484 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512
+
+; Test optimizing interleaves to widening arithmetic.
+
+define <4 x i8> @interleave_v2i8(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: interleave_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf8, ta, mu
+; CHECK-NEXT:    vwaddu.vv v10, v8, v9
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+  %a = shufflevector <2 x i8> %x, <2 x i8> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x i8> %a
+}
+
+define <4 x i16> @interleave_v2i16(<2 x i16> %x, <2 x i16> %y) {
+; CHECK-LABEL: interleave_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf4, ta, mu
+; CHECK-NEXT:    vwaddu.vv v10, v8, v9
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v10, a0, v9
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+  %a = shufflevector <2 x i16> %x, <2 x i16> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x i16> %a
+}
+
+; Vector order switched for coverage.
+define <4 x i32> @interleave_v2i32(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: interleave_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, mf2, ta, mu
+; CHECK-NEXT:    vwaddu.vv v10, v9, v8
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v10, a0, v8
+; CHECK-NEXT:    vmv1r.v v8, v10
+; CHECK-NEXT:    ret
+  %a = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
+  ret <4 x i32> %a
+}
+
+; One vXi64 test case to very that we don't optimize it.
+; FIXME: Is there better codegen we can do here?
+define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
+; RV32-V128-LABEL: interleave_v2i64:
+; RV32-V128:       # %bb.0:
+; RV32-V128-NEXT:    vmv1r.v v12, v9
+; RV32-V128-NEXT:    # kill: def $v8 killed $v8 def $v8m2
+; RV32-V128-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
+; RV32-V128-NEXT:    vid.v v10
+; RV32-V128-NEXT:    vsrl.vi v14, v10, 1
+; RV32-V128-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
+; RV32-V128-NEXT:    vrgatherei16.vv v10, v8, v14
+; RV32-V128-NEXT:    li a0, 10
+; RV32-V128-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV32-V128-NEXT:    vmv.s.x v0, a0
+; RV32-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV32-V128-NEXT:    vrgatherei16.vv v10, v12, v14, v0.t
+; RV32-V128-NEXT:    vmv.v.v v8, v10
+; RV32-V128-NEXT:    ret
+;
+; RV64-V128-LABEL: interleave_v2i64:
+; RV64-V128:       # %bb.0:
+; RV64-V128-NEXT:    vmv1r.v v12, v9
+; RV64-V128-NEXT:    # kill: def $v8 killed $v8 def $v8m2
+; RV64-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV64-V128-NEXT:    vid.v v10
+; RV64-V128-NEXT:    vsrl.vi v14, v10, 1
+; RV64-V128-NEXT:    vrgather.vv v10, v8, v14
+; RV64-V128-NEXT:    li a0, 10
+; RV64-V128-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV64-V128-NEXT:    vmv.s.x v0, a0
+; RV64-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV64-V128-NEXT:    vrgather.vv v10, v12, v14, v0.t
+; RV64-V128-NEXT:    vmv.v.v v8, v10
+; RV64-V128-NEXT:    ret
+;
+; RV32-V512-LABEL: interleave_v2i64:
+; RV32-V512:       # %bb.0:
+; RV32-V512-NEXT:    vsetivli zero, 4, e16, mf4, ta, mu
+; RV32-V512-NEXT:    vid.v v10
+; RV32-V512-NEXT:    vsrl.vi v11, v10, 1
+; RV32-V512-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
+; RV32-V512-NEXT:    vrgatherei16.vv v10, v8, v11
+; RV32-V512-NEXT:    li a0, 10
+; RV32-V512-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV32-V512-NEXT:    vmv.s.x v0, a0
+; RV32-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; RV32-V512-NEXT:    vrgatherei16.vv v10, v9, v11, v0.t
+; RV32-V512-NEXT:    vmv.v.v v8, v10
+; RV32-V512-NEXT:    ret
+;
+; RV64-V512-LABEL: interleave_v2i64:
+; RV64-V512:       # %bb.0:
+; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; RV64-V512-NEXT:    vid.v v10
+; RV64-V512-NEXT:    vsrl.vi v11, v10, 1
+; RV64-V512-NEXT:    vrgather.vv v10, v8, v11
+; RV64-V512-NEXT:    li a0, 10
+; RV64-V512-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
+; RV64-V512-NEXT:    vmv.s.x v0, a0
+; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, mu
+; RV64-V512-NEXT:    vrgather.vv v10, v9, v11, v0.t
+; RV64-V512-NEXT:    vmv.v.v v8, v10
+; RV64-V512-NEXT:    ret
+  %a = shufflevector <2 x i64> %x, <2 x i64> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x i64> %a
+}
+
+; Vector order switched for coverage.
+define <8 x i8> @interleave_v4i8(<4 x i8> %x, <4 x i8> %y) {
+; V128-LABEL: interleave_v4i8:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 8, e8, mf4, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v9, v8
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v8
+; V128-NEXT:    vmv1r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v4i8:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 8, e8, mf8, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v9, v8
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v8
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
+  ret <8 x i8> %a
+}
+
+; Undef elements for coverage
+define <8 x i16> @interleave_v4i16(<4 x i16> %x, <4 x i16> %y) {
+; V128-LABEL: interleave_v4i16:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 8, e16, mf2, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v8, v9
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v9
+; V128-NEXT:    vmv1r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v4i16:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 8, e16, mf4, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <4 x i16> %x, <4 x i16> %y, <8 x i32> <i32 0, i32 4, i32 undef, i32 5, i32 2, i32 undef, i32 3, i32 7>
+  ret <8 x i16> %a
+}
+
+define <8 x i32> @interleave_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; V128-LABEL: interleave_v4i32:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 8, e32, m1, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v8, v9
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v9
+; V128-NEXT:    vmv2r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v4i32:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 8, e32, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <4 x i32> %x, <4 x i32> %y, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+  ret <8 x i32> %a
+}
+
+define <16 x i8> @interleave_v8i8(<8 x i8> %x, <8 x i8> %y) {
+; V128-LABEL: interleave_v8i8:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 16, e8, mf2, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v8, v9
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v9
+; V128-NEXT:    vmv1r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v8i8:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 16, e8, mf8, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <8 x i8> %x, <8 x i8> %y, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <16 x i8> %a
+}
+
+; Vector order switched for coverage.
+define <16 x i16> @interleave_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; V128-LABEL: interleave_v8i16:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 16, e16, m1, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v9, v8
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v8
+; V128-NEXT:    vmv2r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v8i16:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 16, e16, mf4, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v9, v8
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v8
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <8 x i16> %x, <8 x i16> %y, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7>
+  ret <16 x i16> %a
+}
+
+define <16 x i32> @interleave_v8i32(<8 x i32> %x, <8 x i32> %y) {
+; V128-LABEL: interleave_v8i32:
+; V128:       # %bb.0:
+; V128-NEXT:    vsetivli zero, 16, e32, m2, ta, mu
+; V128-NEXT:    vwaddu.vv v12, v8, v10
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v12, a0, v10
+; V128-NEXT:    vmv4r.v v8, v12
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v8i32:
+; V512:       # %bb.0:
+; V512-NEXT:    vsetivli zero, 16, e32, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <8 x i32> %x, <8 x i32> %y, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <16 x i32> %a
+}
+
+define <32 x i8> @interleave_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; V128-LABEL: interleave_v16i8:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 32
+; V128-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
+; V128-NEXT:    vwaddu.vv v10, v8, v9
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v10, a0, v9
+; V128-NEXT:    vmv2r.v v8, v10
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v16i8:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 32
+; V512-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <16 x i8> %x, <16 x i8> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <32 x i8> %a
+}
+
+define <32 x i16> @interleave_v16i16(<16 x i16> %x, <16 x i16> %y) {
+; V128-LABEL: interleave_v16i16:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 32
+; V128-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; V128-NEXT:    vwaddu.vv v12, v8, v10
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v12, a0, v10
+; V128-NEXT:    vmv4r.v v8, v12
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v16i16:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 32
+; V512-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <16 x i16> %x, <16 x i16> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <32 x i16> %a
+}
+
+define <32 x i32> @interleave_v16i32(<16 x i32> %x, <16 x i32> %y) {
+; V128-LABEL: interleave_v16i32:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 32
+; V128-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; V128-NEXT:    vwaddu.vv v16, v8, v12
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v16, a0, v12
+; V128-NEXT:    vmv8r.v v8, v16
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v16i32:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 32
+; V512-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv2r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <16 x i32> %x, <16 x i32> %y, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <32 x i32> %a
+}
+
+define <64 x i8> @interleave_v32i8(<32 x i8> %x, <32 x i8> %y) {
+; V128-LABEL: interleave_v32i8:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 64
+; V128-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
+; V128-NEXT:    vwaddu.vv v12, v8, v10
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v12, a0, v10
+; V128-NEXT:    vmv4r.v v8, v12
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v32i8:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 64
+; V512-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv1r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <32 x i8> %x, <32 x i8> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <64 x i8> %a
+}
+
+define <64 x i16> @interleave_v32i16(<32 x i16> %x, <32 x i16> %y) {
+; V128-LABEL: interleave_v32i16:
+; V128:       # %bb.0:
+; V128-NEXT:    li a0, 64
+; V128-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; V128-NEXT:    vwaddu.vv v16, v8, v12
+; V128-NEXT:    li a0, -1
+; V128-NEXT:    vwmaccu.vx v16, a0, v12
+; V128-NEXT:    vmv8r.v v8, v16
+; V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v32i16:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 64
+; V512-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; V512-NEXT:    vwaddu.vv v10, v8, v9
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v10, a0, v9
+; V512-NEXT:    vmv2r.v v8, v10
+; V512-NEXT:    ret
+  %a = shufflevector <32 x i16> %x, <32 x i16> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <64 x i16> %a
+}
+
+define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
+; RV32-V128-LABEL: interleave_v32i32:
+; RV32-V128:       # %bb.0:
+; RV32-V128-NEXT:    addi sp, sp, -16
+; RV32-V128-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 4
+; RV32-V128-NEXT:    sub sp, sp, a0
+; RV32-V128-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI15_0)
+; RV32-V128-NEXT:    li a1, 32
+; RV32-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV32-V128-NEXT:    vle32.v v0, (a0)
+; RV32-V128-NEXT:    vmv8r.v v24, v8
+; RV32-V128-NEXT:    addi a0, sp, 16
+; RV32-V128-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-V128-NEXT:    vrgather.vv v8, v24, v0
+; RV32-V128-NEXT:    lui a0, %hi(.LCPI15_1)
+; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI15_1)
+; RV32-V128-NEXT:    vle32.v v24, (a0)
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 3
+; RV32-V128-NEXT:    add a0, sp, a0
+; RV32-V128-NEXT:    addi a0, a0, 16
+; RV32-V128-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; RV32-V128-NEXT:    lui a0, 699051
+; RV32-V128-NEXT:    addi a0, a0, -1366
+; RV32-V128-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
+; RV32-V128-NEXT:    vmv.s.x v0, a0
+; RV32-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 3
+; RV32-V128-NEXT:    add a0, sp, a0
+; RV32-V128-NEXT:    addi a0, a0, 16
+; RV32-V128-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV32-V128-NEXT:    vrgather.vv v8, v16, v24, v0.t
+; RV32-V128-NEXT:    vmv.v.v v24, v8
+; RV32-V128-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; RV32-V128-NEXT:    addi a0, sp, 16
+; RV32-V128-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; RV32-V128-NEXT:    vwaddu.vv v0, v8, v16
+; RV32-V128-NEXT:    li a0, -1
+; RV32-V128-NEXT:    vwmaccu.vx v0, a0, v16
+; RV32-V128-NEXT:    vmv8r.v v8, v0
+; RV32-V128-NEXT:    vmv8r.v v16, v24
+; RV32-V128-NEXT:    csrr a0, vlenb
+; RV32-V128-NEXT:    slli a0, a0, 4
+; RV32-V128-NEXT:    add sp, sp, a0
+; RV32-V128-NEXT:    addi sp, sp, 16
+; RV32-V128-NEXT:    ret
+;
+; RV64-V128-LABEL: interleave_v32i32:
+; RV64-V128:       # %bb.0:
+; RV64-V128-NEXT:    addi sp, sp, -16
+; RV64-V128-NEXT:    .cfi_def_cfa_offset 16
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 4
+; RV64-V128-NEXT:    sub sp, sp, a0
+; RV64-V128-NEXT:    lui a0, %hi(.LCPI15_0)
+; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI15_0)
+; RV64-V128-NEXT:    li a1, 32
+; RV64-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV64-V128-NEXT:    vle32.v v0, (a0)
+; RV64-V128-NEXT:    vmv8r.v v24, v8
+; RV64-V128-NEXT:    addi a0, sp, 16
+; RV64-V128-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-V128-NEXT:    vrgather.vv v8, v24, v0
+; RV64-V128-NEXT:    lui a0, %hi(.LCPI15_1)
+; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI15_1)
+; RV64-V128-NEXT:    vle32.v v24, (a0)
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 3
+; RV64-V128-NEXT:    add a0, sp, a0
+; RV64-V128-NEXT:    addi a0, a0, 16
+; RV64-V128-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; RV64-V128-NEXT:    lui a0, 699051
+; RV64-V128-NEXT:    addiw a0, a0, -1366
+; RV64-V128-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
+; RV64-V128-NEXT:    vmv.s.x v0, a0
+; RV64-V128-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 3
+; RV64-V128-NEXT:    add a0, sp, a0
+; RV64-V128-NEXT:    addi a0, a0, 16
+; RV64-V128-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV64-V128-NEXT:    vrgather.vv v8, v16, v24, v0.t
+; RV64-V128-NEXT:    vmv.v.v v24, v8
+; RV64-V128-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
+; RV64-V128-NEXT:    addi a0, sp, 16
+; RV64-V128-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; RV64-V128-NEXT:    vwaddu.vv v0, v8, v16
+; RV64-V128-NEXT:    li a0, -1
+; RV64-V128-NEXT:    vwmaccu.vx v0, a0, v16
+; RV64-V128-NEXT:    vmv8r.v v8, v0
+; RV64-V128-NEXT:    vmv8r.v v16, v24
+; RV64-V128-NEXT:    csrr a0, vlenb
+; RV64-V128-NEXT:    slli a0, a0, 4
+; RV64-V128-NEXT:    add sp, sp, a0
+; RV64-V128-NEXT:    addi sp, sp, 16
+; RV64-V128-NEXT:    ret
+;
+; V512-LABEL: interleave_v32i32:
+; V512:       # %bb.0:
+; V512-NEXT:    li a0, 64
+; V512-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; V512-NEXT:    vwaddu.vv v12, v8, v10
+; V512-NEXT:    li a0, -1
+; V512-NEXT:    vwmaccu.vx v12, a0, v10
+; V512-NEXT:    vmv4r.v v8, v12
+; V512-NEXT:    ret
+  %a = shufflevector <32 x i32> %x, <32 x i32> %y, <64 x i32> <i32 0, i32 32, i32 1, i32 33, i32 2, i32 34, i32 3, i32 35, i32 4, i32 36, i32 5, i32 37, i32 6, i32 38, i32 7, i32 39, i32 8, i32 40, i32 9, i32 41, i32 10, i32 42, i32 11, i32 43, i32 12, i32 44, i32 13, i32 45, i32 14, i32 46, i32 15, i32 47, i32 16, i32 48, i32 17, i32 49, i32 18, i32 50, i32 19, i32 51, i32 20, i32 52, i32 21, i32 53, i32 22, i32 54, i32 23, i32 55, i32 24, i32 56, i32 25, i32 57, i32 26, i32 58, i32 27, i32 59, i32 28, i32 60, i32 29, i32 61, i32 30, i32 62, i32 31, i32 63>
+  ret <64 x i32> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 97ec81c64cebe..1b8f1d246cd20 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -314,16 +314,13 @@ define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) {
 define <4 x i8> @interleave_shuffles(<4 x i8> %x) {
 ; CHECK-LABEL: interleave_shuffles:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv.x.s a0, v8
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT:    vrgather.vi v9, v8, 1
-; CHECK-NEXT:    li a1, 10
-; CHECK-NEXT:    vmv.s.x v0, a1
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    vsrl.vi v10, v8, 1
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vrgather.vv v8, v9, v10, v0.t
+; CHECK-NEXT:    vrgather.vi v9, v8, 0
+; CHECK-NEXT:    vrgather.vi v10, v8, 1
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf8, ta, mu
+; CHECK-NEXT:    vwaddu.vv v8, v9, v10
+; CHECK-NEXT:    li a0, -1
+; CHECK-NEXT:    vwmaccu.vx v8, a0, v10
 ; CHECK-NEXT:    ret
   %y = shufflevector <4 x i8> %x, <4 x i8> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   %z = shufflevector <4 x i8> %x, <4 x i8> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>

From 284cd693f1c695a4ae9c8de7e3ada645eb19d22b Mon Sep 17 00:00:00 2001
From: Siva Chandra Reddy <sivachandra@google.com>
Date: Thu, 20 Jan 2022 08:11:54 +0000
Subject: [PATCH 098/946] [libc] Move the remaining public types to their own
 type headers.

Reviewed By: michaelrj

Differential Revision: https://reviews.llvm.org/D117838
---
 libc/config/linux/api.td                      | 98 ++++++-------------
 libc/include/CMakeLists.txt                   | 27 ++++-
 libc/include/llvm-libc-types/CMakeLists.txt   | 19 ++++
 libc/include/llvm-libc-types/FILE.h           | 14 +++
 .../llvm-libc-types/__bsearchcompare_t.h      | 14 +++
 .../llvm-libc-types/__call_once_func_t.h      | 14 +++
 .../llvm-libc-types/__qsortcompare_t.h        | 14 +++
 libc/include/llvm-libc-types/__sigaction.h    | 22 +++++
 libc/include/llvm-libc-types/__sighandler_t.h | 14 +++
 libc/include/llvm-libc-types/cnd_t.h          | 21 ++++
 libc/include/llvm-libc-types/div_t.h          | 17 ++++
 libc/include/llvm-libc-types/fenv_t.h         | 25 +++++
 libc/include/llvm-libc-types/fexcept_t.h      | 14 +++
 libc/include/llvm-libc-types/imaxdiv_t.h      | 17 ++++
 libc/include/llvm-libc-types/ldiv_t.h         | 17 ++++
 libc/include/llvm-libc-types/lldiv_t.h        | 17 ++++
 libc/include/llvm-libc-types/mtx_t.h          | 17 ++++
 libc/include/llvm-libc-types/once_flag.h      | 14 +++
 libc/include/llvm-libc-types/thrd_start_t.h   | 14 +++
 .../llvm-libc-types/thrd_t.h}                 |  7 +-
 libc/include/llvm-libc-types/time_t.h         | 14 +++
 libc/include/llvm-libc-types/tm.h             | 24 +++++
 libc/include/threads.h.def                    |  2 -
 23 files changed, 378 insertions(+), 78 deletions(-)
 create mode 100644 libc/include/llvm-libc-types/FILE.h
 create mode 100644 libc/include/llvm-libc-types/__bsearchcompare_t.h
 create mode 100644 libc/include/llvm-libc-types/__call_once_func_t.h
 create mode 100644 libc/include/llvm-libc-types/__qsortcompare_t.h
 create mode 100644 libc/include/llvm-libc-types/__sigaction.h
 create mode 100644 libc/include/llvm-libc-types/__sighandler_t.h
 create mode 100644 libc/include/llvm-libc-types/cnd_t.h
 create mode 100644 libc/include/llvm-libc-types/div_t.h
 create mode 100644 libc/include/llvm-libc-types/fenv_t.h
 create mode 100644 libc/include/llvm-libc-types/fexcept_t.h
 create mode 100644 libc/include/llvm-libc-types/imaxdiv_t.h
 create mode 100644 libc/include/llvm-libc-types/ldiv_t.h
 create mode 100644 libc/include/llvm-libc-types/lldiv_t.h
 create mode 100644 libc/include/llvm-libc-types/mtx_t.h
 create mode 100644 libc/include/llvm-libc-types/once_flag.h
 create mode 100644 libc/include/llvm-libc-types/thrd_start_t.h
 rename libc/{config/linux/threads.h.in => include/llvm-libc-types/thrd_t.h} (70%)
 create mode 100644 libc/include/llvm-libc-types/time_t.h
 create mode 100644 libc/include/llvm-libc-types/tm.h

diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 8201edb76682b..f882ec304f192 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -24,23 +24,13 @@ def SSizeT : TypeDecl<"ssize_t"> {
 
 def StructTm: TypeDecl<"struct tm"> {
   let Decl = [{
-    struct tm {
-      int tm_sec; // seconds after the minute
-      int tm_min; // minutes after the hour
-      int tm_hour; // hours since midnight
-      int tm_mday; // day of the month
-      int tm_mon; // months since January
-      int tm_year; // years since 1900
-      int tm_wday; // days since Sunday
-      int tm_yday; // days since January
-      int tm_isdst; // Daylight Saving Time flag
-    };
+    #include <llvm-libc-types/tm.h>
   }];
 }
 
 def TimeT: TypeDecl<"time_t"> {
-   let Decl = [{
-   typedef long time_t;
+  let Decl = [{
+    #include <llvm-libc-types/time_t.h>
   }];
 }
 
@@ -52,7 +42,7 @@ def OffT : TypeDecl<"off_t"> {
 
 def FILE : TypeDecl<"FILE"> {
   let Decl = [{
-    typedef struct FILE FILE;
+    #include <llvm-libc-types/FILE.h>
   }];
 }
 
@@ -114,10 +104,7 @@ def CTypeAPI : PublicAPI<"ctype.h"> {
 
 def IMaxDivT : TypeDecl<"imaxdiv_t"> {
   let Decl = [{
-    typedef struct {
-      intmax_t quot;
-      intmax_t rem;
-    } imaxdiv_t;
+    #include <llvm-libc-types/imaxdiv_t.h>
   }];
 }
 
@@ -195,24 +182,13 @@ def MathAPI : PublicAPI<"math.h"> {
 
 def FEnvT : TypeDecl<"fenv_t"> {
   let Decl = [{
-    #ifdef __aarch64__
-    typedef struct {
-      unsigned char __control_word[4];
-      unsigned char __status_word[4];
-    } fenv_t;
-    #endif
-    #ifdef __x86_64__
-    typedef struct {
-      unsigned char __x86_status[28];
-      unsigned char __mxcsr[4];
-    } fenv_t;
-    #endif
+    #include <llvm-libc-types/fenv_t.h>
   }];
 }
 
 def FExceptT : TypeDecl<"fexcept_t"> {
   let Decl = [{
-    typedef int fexcept_t;
+    #include <llvm-libc-types/fexcept_t.h>
   }];
 }
 
@@ -257,40 +233,31 @@ def StdIOAPI : PublicAPI<"stdio.h"> {
 
 def DivT : TypeDecl<"div_t"> {
   let Decl = [{
-    typedef struct {
-      int quot;
-      int rem;
-    } div_t;
+    #include <llvm-libc-types/div_t.h>
   }];
 }
 
 def LDivT : TypeDecl<"ldiv_t"> {
   let Decl = [{
-    typedef struct {
-      long quot;
-      long rem;
-    } ldiv_t;
+    #include <llvm-libc-types/ldiv_t.h>
   }];
 }
 
 def LLDivT : TypeDecl<"lldiv_t"> {
   let Decl = [{
-    typedef struct {
-      long long quot;
-      long long rem;
-    } lldiv_t;
+    #include <llvm-libc-types/lldiv_t.h>
   }];
 }
 
 def BSearchCompareTDefn : TypeDecl<"__bsearchcompare_t"> {
   let Decl = [{
-    typedef int(*__bsearchcompare_t)(const void *, const void *);
+    #include <llvm-libc-types/__bsearchcompare_t.h>
   }];
 }
 
 def QSortCompareTDefn : TypeDecl<"__qsortcompare_t"> {
   let Decl = [{
-    typedef int(*__qsortcompare_t)(const void *, const void *);
+    #include <llvm-libc-types/__qsortcompare_t.h>
   }];
 }
 
@@ -367,21 +334,13 @@ def SysMManAPI : PublicAPI<"sys/mman.h"> {
 
 def StructSigactionDefn : TypeDecl<"struct sigaction"> {
   let Decl = [{
-    struct __sigaction {
-      union {
-        void (*sa_handler)(int);
-        void (*sa_action)(int, siginfo_t *, void *);
-      };
-      sigset_t sa_mask;
-      int sa_flags;
-      void (*sa_restorer)(void);
-    };
+    #include <llvm-libc-types/__sigaction.h>
   }];
 }
 
 def SighandlerTDefn : TypeDecl<"__sighandler_t"> {
   let Decl = [{
-    typedef void(*__sighandler_t)(int);
+    #include <llvm-libc-types/__sighandler_t.h>
   }];
 }
 
@@ -394,39 +353,37 @@ def SignalAPI : PublicAPI<"signal.h"> {
 
 def OnceFlag : TypeDecl<"once_flag"> {
   let Decl = [{
-    typedef unsigned int once_flag;
+    #include <llvm-libc-types/once_flag.h>
   }];
 }
 
 def MtxT : TypeDecl<"mtx_t"> {
   let Decl = [{
-    typedef struct {
-      unsigned char __internal_data[4];
-      int __mtx_type;
-    } mtx_t;
+    #include <llvm-libc-types/mtx_t.h>
   }];
 }
 
 def CndT : TypeDecl<"cnd_t"> {
   let Decl = [{
-    typedef struct {
-      void *__qfront;
-      void *__qback;
-      struct {
-        unsigned char __w[4];
-        int __t;
-      } __qmtx;
-    } cnd_t;
+    #include <llvm-libc-types/cnd_t.h>
+  }];
+}
+
+def ThrdT : TypeDecl<"thrd_t"> {
+  let Decl = [{
+    #include <llvm-libc-types/thrd_t.h>
   }];
 }
 
 def ThreadStartT : TypeDecl<"thrd_start_t"> {
-  let Decl = "typedef int (*thrd_start_t)(void *);";
+  let Decl = [{
+    #include <llvm-libc-types/thrd_start_t.h>
+  }];
 }
 
 def CallOnceFuncT : TypeDecl<"__call_once_func_t"> {
   let Decl = [{
-    typedef void(*__call_once_func_t)(void);
+    #include <llvm-libc-types/__call_once_func_t.h>
   }];
 }
 
@@ -440,6 +397,7 @@ def ThreadsAPI : PublicAPI<"threads.h"> {
     CallOnceFuncT,
     MtxT,
     CndT,
+    ThrdT,
     ThreadStartT,
   ];
 
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index ba6b47c3d1859..0b62563f3170a 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -20,6 +20,8 @@ add_gen_header(
   GEN_HDR fenv.h
   DEPENDS
     .llvm_libc_common_h
+    .llvm-libc-types.fenv_t
+    .llvm-libc-types.fexcept_t
 )
 
 add_gen_header(
@@ -28,6 +30,7 @@ add_gen_header(
   GEN_HDR inttypes.h
   DEPENDS
     .llvm_libc_common_h
+    .llvm-libc-types.imaxdiv_t
 )
 
 add_gen_header(
@@ -54,6 +57,7 @@ add_gen_header(
   GEN_HDR string.h
   DEPENDS
     .llvm_libc_common_h
+    .llvm-libc-types.size_t
 )
 
 add_gen_header(
@@ -62,18 +66,22 @@ add_gen_header(
   GEN_HDR time.h
   DEPENDS
     .llvm_libc_common_h
+    .llvm-libc-types.time_t
+    .llvm-libc-types.tm
 )
 
 add_gen_header(
   threads
   DEF_FILE threads.h.def
   GEN_HDR threads.h
-  PARAMS
-    platform_threads=../config/${LIBC_TARGET_OS}/threads.h.in
   DEPENDS
     .llvm_libc_common_h
-  DATA_FILES
-    ../config/${LIBC_TARGET_OS}/threads.h.in
+    .llvm-libc-types.__call_once_func_t
+    .llvm-libc-types.cnd_t
+    .llvm-libc-types.mtx_t
+    .llvm-libc-types.once_flag
+    .llvm-libc-types.thrd_start_t
+    .llvm-libc-types.thrd_t
 )
 
 add_gen_header(
@@ -94,6 +102,9 @@ add_gen_header(
   GEN_HDR signal.h
   DATA_FILES
     ../config/${LIBC_TARGET_OS}/signal.h.in
+  DEPENDS
+    .llvm-libc-types.__sigaction
+    .llvm-libc-types.__sighandler_t
 )
 
 add_gen_header(
@@ -102,6 +113,8 @@ add_gen_header(
   GEN_HDR stdio.h
   DEPENDS
     .llvm_libc_common_h
+    .llvm-libc-types.FILE
+    .llvm-libc-types.size_t
 )
 
 add_gen_header(
@@ -110,6 +123,12 @@ add_gen_header(
   GEN_HDR stdlib.h
   DEPENDS
     .llvm_libc_common_h
+    .llvm-libc-types.__bsearchcompare_t
+    .llvm-libc-types.__qsortcompare_t
+    .llvm-libc-types.div_t
+    .llvm-libc-types.ldiv_t
+    .llvm-libc-types.lldiv_t
+    .llvm-libc-types.size_t
 )
 
 add_gen_header(
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index ced6a38a2de3f..73c295843e2a4 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -1,5 +1,24 @@
+add_header(__bsearchcompare_t HDR __bsearchcompare_t.h)
+add_header(__call_once_func_t HDR __call_once_func_t.h)
+add_header(__qsortcompare_t HDR __qsortcompare_t.h)
+add_header(__sigaction HDR __sigaction.h)
+add_header(__sighandler_t HDR __sighandler_t.h)
+add_header(cnd_t HDR cnd_t.h)
 add_header(double_t HDR double_t.h)
+add_header(div_t HDR div_t.h)
+add_header(ldiv_t HDR ldiv_t.h)
+add_header(lldiv_t HDR lldiv_t.h)
+add_header(FILE HDR FILE.h)
+add_header(fenv_t HDR fenv_t.h)
+add_header(fexcept_t HDR fexcept_t.h)
 add_header(float_t HDR float_t.h)
+add_header(imaxdiv_t HDR imaxdiv_t.h)
+add_header(mtx_t HDR mtx_t.h)
 add_header(off_t HDR off_t.h)
+add_header(once_flag HDR once_flag.h)
 add_header(size_t HDR size_t.h)
 add_header(ssize_t HDR ssize_t.h)
+add_header(thrd_start_t HDR thrd_start_t.h)
+add_header(thrd_t HDR thrd_t.h)
+add_header(time_t HDR time_t.h)
+add_header(tm HDR tm.h)
diff --git a/libc/include/llvm-libc-types/FILE.h b/libc/include/llvm-libc-types/FILE.h
new file mode 100644
index 0000000000000..1c1ff97ec86a5
--- /dev/null
+++ b/libc/include/llvm-libc-types/FILE.h
@@ -0,0 +1,14 @@
+//===-- Definition of the type FILE ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_FILE_H__
+#define __LLVM_LIBC_TYPES_FILE_H__
+
+typedef struct FILE FILE;
+
+#endif // __LLVM_LIBC_TYPES_FILE_H__
diff --git a/libc/include/llvm-libc-types/__bsearchcompare_t.h b/libc/include/llvm-libc-types/__bsearchcompare_t.h
new file mode 100644
index 0000000000000..40ebc7f356688
--- /dev/null
+++ b/libc/include/llvm-libc-types/__bsearchcompare_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of type __bsearchcompare_t -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_BSEARCHCOMPARE_T_H__
+#define __LLVM_LIBC_TYPES_BSEARCHCOMPARE_T_H__
+
+typedef int (*__bsearchcompare_t)(const void *, const void *);
+
+#endif // __LLVM_LIBC_TYPES_BSEARCHCOMPARE_T_H__
diff --git a/libc/include/llvm-libc-types/__call_once_func_t.h b/libc/include/llvm-libc-types/__call_once_func_t.h
new file mode 100644
index 0000000000000..bc8ed8331bd80
--- /dev/null
+++ b/libc/include/llvm-libc-types/__call_once_func_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of __call_once_func_t type -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_CALL_ONCE_FUNC_T_H__
+#define __LLVM_LIBC_TYPES_CALL_ONCE_FUNC_T_H__
+
+typedef void (*__call_once_func_t)(void);
+
+#endif // __LLVM_LIBC_TYPES_CALL_ONCE_FUNC_T_H__
diff --git a/libc/include/llvm-libc-types/__qsortcompare_t.h b/libc/include/llvm-libc-types/__qsortcompare_t.h
new file mode 100644
index 0000000000000..82bd4cc1fcd03
--- /dev/null
+++ b/libc/include/llvm-libc-types/__qsortcompare_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of type __qsortcompare_t -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_QSORTCOMPARE_T_H__
+#define __LLVM_LIBC_TYPES_QSORTCOMPARE_T_H__
+
+typedef int (*__qsortcompare_t)(const void *, const void *);
+
+#endif // __LLVM_LIBC_TYPES_QSORTCOMPARE_T_H__
diff --git a/libc/include/llvm-libc-types/__sigaction.h b/libc/include/llvm-libc-types/__sigaction.h
new file mode 100644
index 0000000000000..1c7243c0e921f
--- /dev/null
+++ b/libc/include/llvm-libc-types/__sigaction.h
@@ -0,0 +1,22 @@
+//===-- Definition of struct __sigaction ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_SIGACTION_H__
+#define __LLVM_LIBC_TYPES_SIGACTION_H__
+
+struct __sigaction {
+  union {
+    void (*sa_handler)(int);
+    void (*sa_action)(int, siginfo_t *, void *);
+  };
+  sigset_t sa_mask;
+  int sa_flags;
+  void (*sa_restorer)(void);
+};
+
+#endif // __LLVM_LIBC_TYPES_SIGACTION_H__
diff --git a/libc/include/llvm-libc-types/__sighandler_t.h b/libc/include/llvm-libc-types/__sighandler_t.h
new file mode 100644
index 0000000000000..bd0ad98d85295
--- /dev/null
+++ b/libc/include/llvm-libc-types/__sighandler_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of struct __sighandler_t -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_SIGHANDLER_T_H__
+#define __LLVM_LIBC_TYPES_SIGHANDLER_T_H__
+
+typedef void (*__sighandler_t)(int);
+
+#endif // __LLVM_LIBC_TYPES_SIGHANDLER_T_H__
diff --git a/libc/include/llvm-libc-types/cnd_t.h b/libc/include/llvm-libc-types/cnd_t.h
new file mode 100644
index 0000000000000..62f0636fc7bcf
--- /dev/null
+++ b/libc/include/llvm-libc-types/cnd_t.h
@@ -0,0 +1,21 @@
+//===-- Definition of cnd_t type ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_CND_T_H__
+#define __LLVM_LIBC_TYPES_CND_T_H__
+
+typedef struct {
+  void *__qfront;
+  void *__qback;
+  struct {
+    unsigned char __w[4];
+    int __t;
+  } __qmtx;
+} cnd_t;
+
+#endif // __LLVM_LIBC_TYPES_CND_T_H__
diff --git a/libc/include/llvm-libc-types/div_t.h b/libc/include/llvm-libc-types/div_t.h
new file mode 100644
index 0000000000000..e495a1c3f9dcc
--- /dev/null
+++ b/libc/include/llvm-libc-types/div_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of type div_t ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_DIV_T_H__
+#define __LLVM_LIBC_TYPES_DIV_T_H__
+
+typedef struct {
+  int quot;
+  int rem;
+} div_t;
+
+#endif // __LLVM_LIBC_TYPES_DIV_T_H__
diff --git a/libc/include/llvm-libc-types/fenv_t.h b/libc/include/llvm-libc-types/fenv_t.h
new file mode 100644
index 0000000000000..a95e08179cceb
--- /dev/null
+++ b/libc/include/llvm-libc-types/fenv_t.h
@@ -0,0 +1,25 @@
+//===-- Definition of type fenv_t -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_FENV_T_H__
+#define __LLVM_LIBC_TYPES_FENV_T_H__
+
+#ifdef __aarch64__
+typedef struct {
+  unsigned char __control_word[4];
+  unsigned char __status_word[4];
+} fenv_t;
+#endif
+#ifdef __x86_64__
+typedef struct {
+  unsigned char __x86_status[28];
+  unsigned char __mxcsr[4];
+} fenv_t;
+#endif
+
+#endif // __LLVM_LIBC_TYPES_FENV_T_H__
diff --git a/libc/include/llvm-libc-types/fexcept_t.h b/libc/include/llvm-libc-types/fexcept_t.h
new file mode 100644
index 0000000000000..6e7969c1be0a8
--- /dev/null
+++ b/libc/include/llvm-libc-types/fexcept_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of fexcept_t type --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_FEXCEPT_T_H__
+#define __LLVM_LIBC_TYPES_FEXCEPT_T_H__
+
+typedef int fexcept_t;
+
+#endif // __LLVM_LIBC_TYPES_FEXCEPT_T_H__
diff --git a/libc/include/llvm-libc-types/imaxdiv_t.h b/libc/include/llvm-libc-types/imaxdiv_t.h
new file mode 100644
index 0000000000000..5062b643065a7
--- /dev/null
+++ b/libc/include/llvm-libc-types/imaxdiv_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of type imaxdiv_t --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_IMAXDIV_T_H__
+#define __LLVM_LIBC_TYPES_IMAXDIV_T_H__
+
+typedef struct {
+  intmax_t quot;
+  intmax_t rem;
+} imaxdiv_t;
+
+#endif // __LLVM_LIBC_TYPES_IMAXDIV_T_H__
diff --git a/libc/include/llvm-libc-types/ldiv_t.h b/libc/include/llvm-libc-types/ldiv_t.h
new file mode 100644
index 0000000000000..9bd8d253330a0
--- /dev/null
+++ b/libc/include/llvm-libc-types/ldiv_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of type ldiv_t -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_LDIV_T_H__
+#define __LLVM_LIBC_TYPES_LDIV_T_H__
+
+typedef struct {
+  long quot;
+  long rem;
+} ldiv_t;
+
+#endif // __LLVM_LIBC_TYPES_LDIV_T_H__
diff --git a/libc/include/llvm-libc-types/lldiv_t.h b/libc/include/llvm-libc-types/lldiv_t.h
new file mode 100644
index 0000000000000..109304d120787
--- /dev/null
+++ b/libc/include/llvm-libc-types/lldiv_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of type lldiv_t ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_LLDIV_T_H__
+#define __LLVM_LIBC_TYPES_LLDIV_T_H__
+
+typedef struct {
+  long long quot;
+  long long rem;
+} lldiv_t;
+
+#endif // __LLVM_LIBC_TYPES_LLDIV_T_H__
diff --git a/libc/include/llvm-libc-types/mtx_t.h b/libc/include/llvm-libc-types/mtx_t.h
new file mode 100644
index 0000000000000..42f27297bd0af
--- /dev/null
+++ b/libc/include/llvm-libc-types/mtx_t.h
@@ -0,0 +1,17 @@
+//===-- Definition of mtx_t type ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_MTX_T_H__
+#define __LLVM_LIBC_TYPES_MTX_T_H__
+
+typedef struct {
+  unsigned char __internal_data[4];
+  int __mtx_type;
+} mtx_t;
+
+#endif // __LLVM_LIBC_TYPES_MTX_T_H__
diff --git a/libc/include/llvm-libc-types/once_flag.h b/libc/include/llvm-libc-types/once_flag.h
new file mode 100644
index 0000000000000..4987bda38bbe4
--- /dev/null
+++ b/libc/include/llvm-libc-types/once_flag.h
@@ -0,0 +1,14 @@
+//===-- Definition of once_flag type --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_ONCE_FLAG_H__
+#define __LLVM_LIBC_TYPES_ONCE_FLAG_H__
+
+typedef unsigned int once_flag;
+
+#endif // __LLVM_LIBC_TYPES_ONCE_FLAg_H__
diff --git a/libc/include/llvm-libc-types/thrd_start_t.h b/libc/include/llvm-libc-types/thrd_start_t.h
new file mode 100644
index 0000000000000..83fc32cbd1f87
--- /dev/null
+++ b/libc/include/llvm-libc-types/thrd_start_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of thrd_start_t type -----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_THRD_START_T_H__
+#define __LLVM_LIBC_TYPES_THRD_START_T_H__
+
+typedef int (*thrd_start_t)(void *);
+
+#endif // __LLVM_LIBC_TYPES_THRD_START_T_H__
diff --git a/libc/config/linux/threads.h.in b/libc/include/llvm-libc-types/thrd_t.h
similarity index 70%
rename from libc/config/linux/threads.h.in
rename to libc/include/llvm-libc-types/thrd_t.h
index cd45ebfec9bc1..ebbf9b0a36506 100644
--- a/libc/config/linux/threads.h.in
+++ b/libc/include/llvm-libc-types/thrd_t.h
@@ -1,4 +1,4 @@
-//===-- Linux specific definitions of types from threads.h ----------------===//
+//===-- Definition of thrd_t type -----------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-%%begin()
+#ifndef __LLVM_LIBC_TYPES_THRD_T_H__
+#define __LLVM_LIBC_TYPES_THRD_T_H__
 
 typedef struct {
   unsigned char __clear_tid[4];
@@ -15,3 +16,5 @@ typedef struct {
   int __stack_size;
   int __retval;
 } thrd_t;
+
+#endif // __LLVM_LIBC_TYPES_THRD_T_H__
diff --git a/libc/include/llvm-libc-types/time_t.h b/libc/include/llvm-libc-types/time_t.h
new file mode 100644
index 0000000000000..13c33b07c2238
--- /dev/null
+++ b/libc/include/llvm-libc-types/time_t.h
@@ -0,0 +1,14 @@
+//===-- Definition of the type time_t -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_TIME_T_H__
+#define __LLVM_LIBC_TYPES_TIME_T_H__
+
+typedef long time_t;
+
+#endif // __LLVM_LIBC_TYPES_TIME_T_H__
diff --git a/libc/include/llvm-libc-types/tm.h b/libc/include/llvm-libc-types/tm.h
new file mode 100644
index 0000000000000..953e12e819c3a
--- /dev/null
+++ b/libc/include/llvm-libc-types/tm.h
@@ -0,0 +1,24 @@
+//===-- Definition of struct tm -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __LLVM_LIBC_TYPES_TM_H__
+#define __LLVM_LIBC_TYPES_TM_H__
+
+struct tm {
+  int tm_sec;   // seconds after the minute
+  int tm_min;   // minutes after the hour
+  int tm_hour;  // hours since midnight
+  int tm_mday;  // day of the month
+  int tm_mon;   // months since January
+  int tm_year;  // years since 1900
+  int tm_wday;  // days since Sunday
+  int tm_yday;  // days since January
+  int tm_isdst; // Daylight Saving Time flag
+};
+
+#endif // __LLVM_LIBC_TYPES_TM_H__
diff --git a/libc/include/threads.h.def b/libc/include/threads.h.def
index c8a24e0979db9..93541b8d3bac4 100644
--- a/libc/include/threads.h.def
+++ b/libc/include/threads.h.def
@@ -11,8 +11,6 @@
 
 #include <__llvm-libc-common.h>
 
-%%include_file(${platform_threads})
-
 %%public_api()
 
 #endif // LLVM_LIBC_THREADS_H

From 5e88f527da2175019e443d3600ade8f23e84f116 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 20 Jan 2022 14:57:31 -0800
Subject: [PATCH 099/946] [RISCV] Remove RISCVSubtarget::hasStdExtV() and
 hasStdExtZve*(). NFC

All code should use one of the cleaner named hasVInstructions*
functions. Fix the two uses that weren't and delete the methods
so no new uses can be created.
---
 llvm/lib/Target/RISCV/RISCV.td         | 4 ++--
 llvm/lib/Target/RISCV/RISCVSubtarget.h | 6 ------
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 7972ced08edd8..b5d2bd01d3552 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -200,12 +200,12 @@ def HasVInstructions    : Predicate<"Subtarget->hasVInstructions()">,
           (any_of FeatureStdExtZve32x),
           "'V' (Vector Extension for Application Processors), 'Zve32x' or "
           "'Zve64x' (Vector Extensions for Embedded Processors)">;
-def HasVInstructionsI64 : Predicate<"Subtarget->hasStdExtZve64x()">,
+def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">,
       AssemblerPredicate<
           (any_of FeatureStdExtZve64x),
           "'V' (Vector Extension for Application Processors) or 'Zve64x' "
           "(Vector Extensions for Embedded Processors)">;
-def HasVInstructionsAnyF : Predicate<"Subtarget->hasStdExtZve32f()">,
+def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">,
       AssemblerPredicate<
           (any_of FeatureStdExtZve32f),
           "'V' (Vector Extension for Application Processors), 'Zve32f', "
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index d55affd0539be..4b5958ad38d9c 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -153,12 +153,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool hasStdExtZbr() const { return HasStdExtZbr; }
   bool hasStdExtZbs() const { return HasStdExtZbs; }
   bool hasStdExtZbt() const { return HasStdExtZbt; }
-  bool hasStdExtV() const { return HasStdExtV; }
-  bool hasStdExtZve32x() const { return HasStdExtZve32x; }
-  bool hasStdExtZve32f() const { return HasStdExtZve32f; }
-  bool hasStdExtZve64x() const { return HasStdExtZve64x; }
-  bool hasStdExtZve64f() const { return HasStdExtZve64f; }
-  bool hasStdExtZve64d() const { return HasStdExtZve64d; }
   bool hasStdExtZvl() const { return ZvlLen != ExtZvl::NotSet; }
   bool hasStdExtZfhmin() const { return HasStdExtZfhmin; }
   bool hasStdExtZfh() const { return HasStdExtZfh; }

From a5684114445a72b5c0bb5b7b68a5c6eb3486b66d Mon Sep 17 00:00:00 2001
From: CJ Johnson <johnsoncj@google.com>
Date: Thu, 20 Jan 2022 18:05:07 -0500
Subject: [PATCH 100/946] [clang-tidy] Update bugprone-stringview-nullptr to
 consistently prefer the empty string when passing arguments to
 constructors/functions

Previously, function(nullptr) would have been fixed with function({}). This unfortunately can change overload resolution and even become ambiguous. T(nullptr) was already being fixed with T(""), so this change just brings function calls in line with that.

Differential Revision: https://reviews.llvm.org/D117840
---
 .../bugprone/StringviewNullptrCheck.cpp       | 19 ++++++++--------
 .../checks/bugprone-stringview-nullptr.rst    |  4 ++--
 .../checkers/bugprone-stringview-nullptr.cpp  | 22 +++++++++----------
 3 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp
index a0ae262318914..b45aa93533b08 100644
--- a/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/StringviewNullptrCheck.cpp
@@ -44,6 +44,9 @@ RewriteRule StringviewNullptrCheckImpl() {
   auto static_cast_warning =
       cat("casting to basic_string_view from null is undefined; replace with "
           "the empty string");
+  auto argument_construction_warning =
+      cat("passing null as basic_string_view is undefined; replace with the "
+          "empty string");
   auto assignment_warning =
       cat("assignment to basic_string_view from null is undefined; replace "
           "with the default constructor");
@@ -53,9 +56,6 @@ RewriteRule StringviewNullptrCheckImpl() {
   auto equality_comparison_warning =
       cat("comparing basic_string_view to null is undefined; replace with the "
           "emptiness query");
-  auto constructor_argument_warning =
-      cat("passing null as basic_string_view is undefined; replace with the "
-          "empty string");
 
   // Matches declarations and expressions of type `basic_string_view`
   auto HasBasicStringViewType = hasType(hasUnqualifiedDesugaredType(recordType(
@@ -211,11 +211,12 @@ RewriteRule StringviewNullptrCheckImpl() {
       remove(node("null_arg_expr")), construction_warning);
 
   // `function(null_arg_expr)`
-  auto HandleFunctionArgumentInitialization = makeRule(
-      callExpr(hasAnyArgument(
-                   ignoringImpCasts(BasicStringViewConstructingFromNullExpr)),
-               unless(cxxOperatorCallExpr())),
-      changeTo(node("construct_expr"), cat("{}")), construction_warning);
+  auto HandleFunctionArgumentInitialization =
+      makeRule(callExpr(hasAnyArgument(ignoringImpCasts(
+                            BasicStringViewConstructingFromNullExpr)),
+                        unless(cxxOperatorCallExpr())),
+               changeTo(node("construct_expr"), cat("\"\"")),
+               argument_construction_warning);
 
   // `sv = null_arg_expr`
   auto HandleAssignment = makeRule(
@@ -268,7 +269,7 @@ RewriteRule StringviewNullptrCheckImpl() {
                                       BasicStringViewConstructingFromNullExpr)),
                    unless(HasBasicStringViewType)),
                changeTo(node("construct_expr"), cat("\"\"")),
-               constructor_argument_warning);
+               argument_construction_warning);
 
   return applyFirst(
       {HandleTemporaryCXXFunctionalCastExpr,
diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-stringview-nullptr.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-stringview-nullptr.rst
index 198ad398ec7b7..7138c97b745ae 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-stringview-nullptr.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-stringview-nullptr.rst
@@ -43,9 +43,9 @@ is translated into...
   bool is_empty = sv.empty();
   bool isnt_empty = !sv.empty();
 
-  accepts_sv({});
+  accepts_sv("");
 
-  accepts_sv({});  // A
+  accepts_sv("");  // A
 
   accepts_sv({nullptr, 0});  // B
 
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-stringview-nullptr.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-stringview-nullptr.cpp
index 322c8eeca754e..02fcab31dcf3e 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-stringview-nullptr.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-stringview-nullptr.cpp
@@ -1039,24 +1039,24 @@ void function_argument_initialization() /* f */ {
   // Function Argument Initialization
   {
     function(nullptr) /* f1 */;
-    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: constructing{{.*}}default
-    // CHECK-FIXES: {{^}}    function({}) /* f1 */;
+    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: passing null as basic_string_view is undefined; replace with the empty string
+    // CHECK-FIXES: {{^}}    function("") /* f1 */;
 
     function((nullptr)) /* f2 */;
-    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: constructing{{.*}}default
-    // CHECK-FIXES: {{^}}    function({}) /* f2 */;
+    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: passing{{.*}}empty string
+    // CHECK-FIXES: {{^}}    function("") /* f2 */;
 
     function({nullptr}) /* f3 */;
-    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: constructing{{.*}}default
-    // CHECK-FIXES: {{^}}    function({}) /* f3 */;
+    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: passing{{.*}}empty string
+    // CHECK-FIXES: {{^}}    function("") /* f3 */;
 
     function({(nullptr)}) /* f4 */;
-    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: constructing{{.*}}default
-    // CHECK-FIXES: {{^}}    function({}) /* f4 */;
+    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: passing{{.*}}empty string
+    // CHECK-FIXES: {{^}}    function("") /* f4 */;
 
     function({{}}) /* f5 */; // Default `const CharT*`
-    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: constructing{{.*}}default
-    // CHECK-FIXES: {{^}}    function({}) /* f5 */;
+    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: passing{{.*}}empty string
+    // CHECK-FIXES: {{^}}    function("") /* f5 */;
   }
 
   // Function Argument Initialization With Temporary
@@ -1599,7 +1599,7 @@ void constructor_invocation() /* r */ {
   struct AcceptsSV {
     explicit AcceptsSV(std::string_view) {}
   } r1(nullptr);
-  // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: passing null as basic_string_view is undefined; replace with the empty string
+  // CHECK-MESSAGES: :[[@LINE-1]]:8: warning: passing{{.*}}empty string
   // CHECK-FIXES: {{^}}  } r1("");
 
   (void)(AcceptsSV{nullptr}) /* r2 */;

From 8457b61699e8b8315579c8c2ae2a2d47f28d6fc7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Bylica?= <chfast@gmail.com>
Date: Thu, 20 Jan 2022 21:16:46 +0100
Subject: [PATCH 101/946] [test] Add tests for bswap combining. NFC

---
 .../test/Transforms/InstCombine/bswap-fold.ll | 270 ++++++++++++++++++
 1 file changed, 270 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll
index da7380e0ab74e..9ffae93bbbc7b 100644
--- a/llvm/test/Transforms/InstCombine/bswap-fold.ll
+++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll
@@ -355,6 +355,276 @@ define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 {
   ret i64 %t3
 }
 
+
+define i64 @bs_active_high8(i64 %0) {
+; CHECK-LABEL: @bs_active_high8(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %2 = shl i64 %0, 56
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  ret i64 %3
+}
+
+define i32 @bs_active_high7(i32 %0) {
+; CHECK-LABEL: @bs_active_high7(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], -33554432
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %2 = and i32 %0, -33554432  ; 0xfe000000
+  %3 = call i32 @llvm.bswap.i32(i32 %2)
+  ret i32 %3
+}
+
+define <2 x i64> @bs_active_high4(<2 x i64> %0) {
+; CHECK-LABEL: @bs_active_high4(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 60, i64 60>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %2 = shl <2 x i64> %0, <i64 60, i64 60>
+  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
+  ret <2 x i64> %3
+}
+
+define <2 x i64> @bs_active_high_different(<2 x i64> %0) {
+; CHECK-LABEL: @bs_active_high_different(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 56, i64 57>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %2 = shl <2 x i64> %0, <i64 56, i64 57>
+  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
+  ret <2 x i64> %3
+}
+
+; negative test
+define <2 x i64> @bs_active_high_different_negative(<2 x i64> %0) {
+; CHECK-LABEL: @bs_active_high_different_negative(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 56, i64 55>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %2 = shl <2 x i64> %0, <i64 56, i64 55>  ; second elem has 9 active high bits
+  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
+  ret <2 x i64> %3
+}
+
+; negative test
+define <2 x i64> @bs_active_high_undef(<2 x i64> %0) {
+; CHECK-LABEL: @bs_active_high_undef(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 56, i64 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %2 = shl <2 x i64> %0, <i64 56, i64 undef>
+  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
+  ret <2 x i64> %3
+}
+
+define i64 @bs_active_high8_multiuse(i64 %0) {
+; CHECK-LABEL: @bs_active_high8_multiuse(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %2 = shl i64 %0, 56
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  %4 = mul i64 %2, %3  ; increase use of shl and bswap
+  ret i64 %4
+}
+
+define i64 @bs_active_high7_multiuse(i64 %0) {
+; CHECK-LABEL: @bs_active_high7_multiuse(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 57
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %2 = shl i64 %0, 57
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  %4 = mul i64 %2, %3  ; increase use of shl and bswap
+  ret i64 %4
+}
+
+define i64 @bs_active_byte_6h(i64 %0) {
+; CHECK-LABEL: @bs_active_byte_6h(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 280375465082880
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %2 = and i64 %0, 280375465082880  ; 0xff00'00000000
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  ret i64 %3
+}
+
+define i32 @bs_active_byte_3h(i32 %0) {
+; CHECK-LABEL: @bs_active_byte_3h(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 393216
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %2 = and i32 %0, 393216  ; 0x0006'0000
+  %3 = call i32 @llvm.bswap.i32(i32 %2)
+  ret i32 %3
+}
+
+define <2 x i32> @bs_active_byte_3h_v2(<2 x i32> %0) {
+; CHECK-LABEL: @bs_active_byte_3h_v2(
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 8388608, i32 65536>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %2 = and <2 x i32> %0, <i32 8388608, i32 65536>  ; 0x0080'0000, 0x0001'0000
+  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
+  ret <2 x i32> %3
+}
+
+; negative test
+define i64 @bs_active_byte_78h(i64 %0) {
+; CHECK-LABEL: @bs_active_byte_78h(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 108086391056891904
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %2 = and i64 %0, 108086391056891904  ; 0x01800000'00000000
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  ret i64 %3
+}
+
+
+define i16 @bs_active_low1(i16 %0) {
+; CHECK-LABEL: @bs_active_low1(
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 15
+; CHECK-NEXT:    [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT:    ret i16 [[TMP3]]
+;
+  %2 = lshr i16 %0, 15
+  %3 = call i16 @llvm.bswap.i16(i16 %2)
+  ret i16 %3
+}
+
+define <2 x i32> @bs_active_low8(<2 x i32> %0) {
+; CHECK-LABEL: @bs_active_low8(
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 255, i32 255>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %2 = and <2 x i32> %0, <i32 255, i32 255>
+  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
+  ret <2 x i32> %3
+}
+
+define <2 x i32> @bs_active_low_different(<2 x i32> %0) {
+; CHECK-LABEL: @bs_active_low_different(
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 2, i32 128>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %2 = and <2 x i32> %0, <i32 2, i32 128>
+  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
+  ret <2 x i32> %3
+}
+
+; negative test
+define <2 x i32> @bs_active_low_different_negative(<2 x i32> %0) {
+; CHECK-LABEL: @bs_active_low_different_negative(
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 256, i32 255>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %2 = and <2 x i32> %0, <i32 256, i32 255>
+  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
+  ret <2 x i32> %3
+}
+
+; negative test
+define <2 x i32> @bs_active_low_undef(<2 x i32> %0) {
+; CHECK-LABEL: @bs_active_low_undef(
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 255, i32 undef>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+;
+  %2 = and <2 x i32> %0, <i32 255, i32 undef>
+  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
+  ret <2 x i32> %3
+}
+
+define i64 @bs_active_low8_multiuse(i64 %0) {
+; CHECK-LABEL: @bs_active_low8_multiuse(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %2 = and i64 %0, 255
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  %4 = mul i64 %2, %3  ; increase use of and and bswap
+  ret i64 %4
+}
+
+define i64 @bs_active_low7_multiuse(i64 %0) {
+; CHECK-LABEL: @bs_active_low7_multiuse(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 127
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP4]]
+;
+  %2 = and i64 %0, 127
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  %4 = mul i64 %2, %3  ; increase use of and and bswap
+  ret i64 %4
+}
+
+define i64 @bs_active_byte_4l(i64 %0) {
+; CHECK-LABEL: @bs_active_byte_4l(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 1140850688
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %2 = and i64 %0, 1140850688  ; 0x44000000
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  ret i64 %3
+}
+
+define i32 @bs_active_byte_2l(i32 %0) {
+; CHECK-LABEL: @bs_active_byte_2l(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 65280
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %2 = and i32 %0, 65280  ; 0xff00
+  %3 = call i32 @llvm.bswap.i32(i32 %2)
+  ret i32 %3
+}
+
+define <2 x i64> @bs_active_byte_2l_v2(<2 x i64> %0) {
+; CHECK-LABEL: @bs_active_byte_2l_v2(
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP0:%.*]], <i64 256, i64 65280>
+; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+;
+  %2 = and <2 x i64> %0, <i64 256, i64 65280>  ; 0x0100, 0xff00
+  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
+  ret <2 x i64> %3
+}
+
+; negative test
+define i64 @bs_active_byte_12l(i64 %0) {
+; CHECK-LABEL: @bs_active_byte_12l(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 384
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %2 = and i64 %0, 384  ; 0x0180
+  %3 = call i64 @llvm.bswap.i64(i64 %2)
+  ret i64 %3
+}
+
+
 declare i16 @llvm.bswap.i16(i16)
 declare i32 @llvm.bswap.i32(i32)
 declare i64 @llvm.bswap.i64(i64)

From ac2f3df8396ac5bc507bd84eec185d756420e47c Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Wed, 19 Jan 2022 18:17:08 -0800
Subject: [PATCH 102/946] [lld][WebAssembly] Remove redundant config setting

Unresolved symbols are not currently reported when building with
`-shared` or `-pie` so setting unresolvedSymbols doesn't have any
effect.

Differential Revision: https://reviews.llvm.org/D117737
---
 lld/wasm/Driver.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index 7523755806ab9..cb7ca13ebd6ae 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -474,7 +474,6 @@ static void setConfigs() {
   if (config->shared) {
     config->importMemory = true;
     config->importUndefined = true;
-    config->unresolvedSymbols = UnresolvedPolicy::Ignore;
   }
 }
 

From d92e5412ea571158b5b524855d19b5eafa0567ce Mon Sep 17 00:00:00 2001
From: Clint Caywood <caywood@google.com>
Date: Thu, 20 Jan 2022 23:27:05 +0000
Subject: [PATCH 103/946] [libc] Use __builtin_clz to find leading 1 in hypot

This is an optimization that using a single CPU instruction on supported
architectures (amd64 and aarch64, but possibly others) to replace what was
previously an iterative look-up-table algorithm.

Originally I suggested using inline assembly for this in
https://reviews.llvm.org/D117584.

Reviewed By: lntue, sivachandra

Differential Revision: https://reviews.llvm.org/D117684
---
 libc/src/__support/FPUtil/Hypot.h | 44 ++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/libc/src/__support/FPUtil/Hypot.h b/libc/src/__support/FPUtil/Hypot.h
index 15b26798ccb54..bb658b0085fea 100644
--- a/libc/src/__support/FPUtil/Hypot.h
+++ b/libc/src/__support/FPUtil/Hypot.h
@@ -22,33 +22,39 @@ namespace internal {
 template <typename T>
 static inline T find_leading_one(T mant, int &shift_length);
 
+// The following overloads are matched based on what is accepted by
+// __builtin_clz* rather than using the exactly-sized aliases from stdint.h
+// (such as uint32_t). There are 3 overloads even though 2 will only ever be
+// used by a specific platform, since unsigned long varies in size depending on
+// the word size of the architecture.
+
 template <>
-inline uint32_t find_leading_one<uint32_t>(uint32_t mant, int &shift_length) {
+inline unsigned int find_leading_one<unsigned int>(unsigned int mant,
+                                                   int &shift_length) {
   shift_length = 0;
-  constexpr int NSTEPS = 5;
-  constexpr uint32_t BOUNDS[NSTEPS] = {1 << 16, 1 << 8, 1 << 4, 1 << 2, 1 << 1};
-  constexpr int SHIFTS[NSTEPS] = {16, 8, 4, 2, 1};
-  for (int i = 0; i < NSTEPS; ++i) {
-    if (mant >= BOUNDS[i]) {
-      shift_length += SHIFTS[i];
-      mant >>= SHIFTS[i];
-    }
+  if (mant > 0) {
+    shift_length = (sizeof(mant) * 8) - 1 - __builtin_clz(mant);
   }
   return 1U << shift_length;
 }
 
 template <>
-inline uint64_t find_leading_one<uint64_t>(uint64_t mant, int &shift_length) {
+inline unsigned long find_leading_one<unsigned long>(unsigned long mant,
+                                                     int &shift_length) {
   shift_length = 0;
-  constexpr int NSTEPS = 6;
-  constexpr uint64_t BOUNDS[NSTEPS] = {1ULL << 32, 1ULL << 16, 1ULL << 8,
-                                       1ULL << 4,  1ULL << 2,  1ULL << 1};
-  constexpr int SHIFTS[NSTEPS] = {32, 16, 8, 4, 2, 1};
-  for (int i = 0; i < NSTEPS; ++i) {
-    if (mant >= BOUNDS[i]) {
-      shift_length += SHIFTS[i];
-      mant >>= SHIFTS[i];
-    }
+  if (mant > 0) {
+    shift_length = (sizeof(mant) * 8) - 1 - __builtin_clzl(mant);
+  }
+  return 1UL << shift_length;
+}
+
+template <>
+inline unsigned long long
+find_leading_one<unsigned long long>(unsigned long long mant,
+                                     int &shift_length) {
+  shift_length = 0;
+  if (mant > 0) {
+    shift_length = (sizeof(mant) * 8) - 1 - __builtin_clzll(mant);
   }
   return 1ULL << shift_length;
 }

From 5501c16edf0f0211386edee9029af5a2b92fcc17 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler@nvidia.com>
Date: Tue, 11 Jan 2022 14:54:31 -0800
Subject: [PATCH 104/946] [flang] Fix OPEN/WRITE(SIGN='SUPPRESS')

The keyword value was misspelled in the runtime.

Differential Revision: https://reviews.llvm.org/D117816
---
 flang/runtime/io-api.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp
index 811095417ffb8..2f12bfcad350d 100644
--- a/flang/runtime/io-api.cpp
+++ b/flang/runtime/io-api.cpp
@@ -593,7 +593,8 @@ bool IONAME(SetRound)(Cookie cookie, const char *keyword, std::size_t length) {
 bool IONAME(SetSign)(Cookie cookie, const char *keyword, std::size_t length) {
   IoStatementState &io{*cookie};
   ConnectionState &connection{io.GetConnectionState()};
-  static const char *keywords[]{"PLUS", "YES", "PROCESSOR_DEFINED", nullptr};
+  static const char *keywords[]{
+      "PLUS", "SUPPRESS", "PROCESSOR_DEFINED", nullptr};
   switch (IdentifyValue(keyword, length, keywords)) {
   case 0:
     connection.modes.editingFlags |= signPlus;

From 9ddd07922f65ec7b633228b8b71076031355937e Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler@nvidia.com>
Date: Wed, 12 Jan 2022 15:48:06 -0800
Subject: [PATCH 105/946] [flang] Handle FLUSH(unknown unit)

The unit number passed to a FLUSH statement is not required to
be a valid open unit; nothing happens (esp. not the creation of
an empty fort.n file) in this case.

Differential Revision: https://reviews.llvm.org/D117819
---
 flang/runtime/io-api.cpp | 19 ++++++++++++-------
 flang/runtime/io-stmt.h  |  8 ++++----
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp
index 2f12bfcad350d..03c878d1a8506 100644
--- a/flang/runtime/io-api.cpp
+++ b/flang/runtime/io-api.cpp
@@ -317,7 +317,7 @@ Cookie IONAME(BeginClose)(
   } else {
     // CLOSE(UNIT=bad unit) is just a no-op
     Terminator oom{sourceFile, sourceLine};
-    return &New<NoopCloseStatementState>{oom}(sourceFile, sourceLine)
+    return &New<NoopStatementState>{oom}(sourceFile, sourceLine)
                 .release()
                 ->ioStatementState();
   }
@@ -325,11 +325,16 @@ Cookie IONAME(BeginClose)(
 
 Cookie IONAME(BeginFlush)(
     ExternalUnit unitNumber, const char *sourceFile, int sourceLine) {
-  Terminator terminator{sourceFile, sourceLine};
-  ExternalFileUnit &unit{
-      ExternalFileUnit::LookUpOrCrash(unitNumber, terminator)};
-  return &unit.BeginIoStatement<ExternalMiscIoStatementState>(
-      unit, ExternalMiscIoStatementState::Flush, sourceFile, sourceLine);
+  if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) {
+    return &unit->BeginIoStatement<ExternalMiscIoStatementState>(
+        *unit, ExternalMiscIoStatementState::Flush, sourceFile, sourceLine);
+  } else {
+    // FLUSH(UNIT=unknown) is a no-op
+    Terminator oom{sourceFile, sourceLine};
+    return &New<NoopStatementState>{oom}(sourceFile, sourceLine)
+                .release()
+                ->ioStatementState();
+  }
 }
 
 Cookie IONAME(BeginBackspace)(
@@ -880,7 +885,7 @@ bool IONAME(SetStatus)(Cookie cookie, const char *keyword, std::size_t length) {
     }
     return false;
   }
-  if (io.get_if<NoopCloseStatementState>()) {
+  if (io.get_if<NoopStatementState>()) {
     return true; // don't bother validating STATUS= in a no-op CLOSE
   }
   io.GetIoErrorHandler().Crash(
diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h
index bf84f4a3369a0..8327326c7f9ef 100644
--- a/flang/runtime/io-stmt.h
+++ b/flang/runtime/io-stmt.h
@@ -34,7 +34,7 @@ class InquireUnconnectedFileState;
 class InquireIOLengthState;
 class ExternalMiscIoStatementState;
 class CloseStatementState;
-class NoopCloseStatementState;
+class NoopStatementState; // CLOSE or FLUSH on unknown unit
 
 template <Direction, typename CHAR = char>
 class InternalFormattedIoStatementState;
@@ -238,7 +238,7 @@ class IoStatementState {
 private:
   std::variant<std::reference_wrapper<OpenStatementState>,
       std::reference_wrapper<CloseStatementState>,
-      std::reference_wrapper<NoopCloseStatementState>,
+      std::reference_wrapper<NoopStatementState>,
       std::reference_wrapper<
           InternalFormattedIoStatementState<Direction::Output>>,
       std::reference_wrapper<
@@ -616,9 +616,9 @@ class NoUnitIoStatementState : public IoStatementBase {
   ConnectionState connection_;
 };
 
-class NoopCloseStatementState : public NoUnitIoStatementState {
+class NoopStatementState : public NoUnitIoStatementState {
 public:
-  NoopCloseStatementState(const char *sourceFile, int sourceLine)
+  NoopStatementState(const char *sourceFile, int sourceLine)
       : NoUnitIoStatementState{sourceFile, sourceLine, *this} {}
   void set_status(CloseStatus) {} // discards
 };

From e1b7bd911d9e491e50db1aa00d340fb3b6b907ef Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 21 Jan 2022 09:49:41 +1000
Subject: [PATCH 106/946] [OpenCL] opencl-c.h: add __opencl_c_images and 
 __opencl_c_read_write_images

This wraps the image and rw images usages in the correct macros

Reviewed By: Anastasia

Differential Revision: https://reviews.llvm.org/D107539
---
 clang/lib/Headers/opencl-c.h | 94 +++++++++++++++++++++---------------
 1 file changed, 54 insertions(+), 40 deletions(-)

diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index 77a7a8b9bb3a1..e65e3634d010d 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -11,11 +11,11 @@
 
 #include "opencl-c-base.h"
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_images)
 #ifndef cl_khr_depth_images
 #define cl_khr_depth_images
 #endif //cl_khr_depth_images
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_images)
 
 #if __OPENCL_C_VERSION__ < CL_VERSION_2_0
 #ifdef cl_khr_3d_image_writes
@@ -15585,7 +15585,7 @@ half4 __purefn __ovld read_imageh(read_only image1d_buffer_t image, int coord);
 #endif //cl_khr_fp16
 
 // Image read functions for read_write images
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 float4 __purefn __ovld read_imagef(read_write image1d_t image, int coord);
 int4 __purefn __ovld read_imagei(read_write image1d_t image, int coord);
 uint4 __purefn __ovld read_imageui(read_write image1d_t image, int coord);
@@ -15628,7 +15628,6 @@ float __purefn __ovld read_imagef(read_write image2d_msaa_depth_t image, int2 co
 float __purefn __ovld read_imagef(read_write image2d_array_msaa_depth_t image, int4 coord, int sample);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
 #ifdef cl_khr_mipmap_image
 float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod);
 int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod);
@@ -15679,7 +15678,6 @@ int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler,
 uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY);
 
 #endif //cl_khr_mipmap_image
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
 
 // Image read functions returning half4 type
 #ifdef cl_khr_fp16
@@ -15690,7 +15688,7 @@ half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord);
 half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord);
 half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord);
 #endif //cl_khr_fp16
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images
 
 /**
  * Write color value to location specified by coordinate
@@ -15834,7 +15832,7 @@ void __ovld write_imageh(write_only image1d_buffer_t image, int coord, half4 col
 #endif //cl_khr_fp16
 
 // Image write functions for read_write images
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 void __ovld write_imagef(read_write image2d_t image, int2 coord, float4 color);
 void __ovld write_imagei(read_write image2d_t image, int2 coord, int4 color);
 void __ovld write_imageui(read_write image2d_t image, int2 coord, uint4 color);
@@ -15866,7 +15864,6 @@ void __ovld write_imagef(read_write image2d_depth_t image, int2 coord, float col
 void __ovld write_imagef(read_write image2d_array_depth_t image, int4 coord, float color);
 #endif //cl_khr_depth_images
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
 #if defined(cl_khr_mipmap_image_writes)
 void __ovld write_imagef(read_write image1d_t image, int coord, int lod, float4 color);
 void __ovld write_imagei(read_write image1d_t image, int coord, int lod, int4 color);
@@ -15894,7 +15891,6 @@ void __ovld write_imageui(read_write image3d_t image, int4 coord, int lod, uint4
 #endif //cl_khr_3d_image_writes
 
 #endif //cl_khr_mipmap_image_writes
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
 
 // Image write functions for half4 type
 #ifdef cl_khr_fp16
@@ -15907,7 +15903,7 @@ void __ovld write_imageh(read_write image1d_array_t image, int2 coord, half4 col
 void __ovld write_imageh(read_write image2d_array_t image, int4 coord, half4 color);
 void __ovld write_imageh(read_write image1d_buffer_t image, int coord, half4 color);
 #endif //cl_khr_fp16
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 // Note: In OpenCL v1.0/1.1/1.2, image argument of image query builtin functions does not have
 // access qualifier, which by default assume read_only access qualifier. Image query builtin
@@ -15955,7 +15951,7 @@ int __ovld __cnfn get_image_width(write_only image2d_array_msaa_t image);
 int __ovld __cnfn get_image_width(write_only image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int __ovld __cnfn get_image_width(read_write image1d_t image);
 int __ovld __cnfn get_image_width(read_write image1d_buffer_t image);
 int __ovld __cnfn get_image_width(read_write image2d_t image);
@@ -15972,7 +15968,7 @@ int __ovld __cnfn get_image_width(read_write image2d_msaa_depth_t image);
 int __ovld __cnfn get_image_width(read_write image2d_array_msaa_t image);
 int __ovld __cnfn get_image_width(read_write image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
  * Return the image height in pixels.
@@ -16007,7 +16003,7 @@ int __ovld __cnfn get_image_height(write_only image2d_array_msaa_t image);
 int __ovld __cnfn get_image_height(write_only image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int __ovld __cnfn get_image_height(read_write image2d_t image);
 int __ovld __cnfn get_image_height(read_write image3d_t image);
 int __ovld __cnfn get_image_height(read_write image2d_array_t image);
@@ -16021,7 +16017,7 @@ int __ovld __cnfn get_image_height(read_write image2d_msaa_depth_t image);
 int __ovld __cnfn get_image_height(read_write image2d_array_msaa_t image);
 int __ovld __cnfn get_image_height(read_write image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
  * Return the image depth in pixels.
@@ -16032,9 +16028,9 @@ int __ovld __cnfn get_image_depth(read_only image3d_t image);
 int __ovld __cnfn get_image_depth(write_only image3d_t image);
 #endif
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int __ovld __cnfn get_image_depth(read_write image3d_t image);
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 // OpenCL Extension v2.0 s9.18 - Mipmaps
 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
@@ -16130,7 +16126,7 @@ int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_t im
 int __ovld __cnfn get_image_channel_data_type(write_only image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int __ovld __cnfn get_image_channel_data_type(read_write image1d_t image);
 int __ovld __cnfn get_image_channel_data_type(read_write image1d_buffer_t image);
 int __ovld __cnfn get_image_channel_data_type(read_write image2d_t image);
@@ -16147,7 +16143,7 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_msaa_depth_t im
 int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_t image);
 int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
  * Return the image channel order. Valid values are:
@@ -16202,7 +16198,7 @@ int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_t image)
 int __ovld __cnfn get_image_channel_order(write_only image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int __ovld __cnfn get_image_channel_order(read_write image1d_t image);
 int __ovld __cnfn get_image_channel_order(read_write image1d_buffer_t image);
 int __ovld __cnfn get_image_channel_order(read_write image2d_t image);
@@ -16219,7 +16215,7 @@ int __ovld __cnfn get_image_channel_order(read_write image2d_msaa_depth_t image)
 int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_t image);
 int __ovld __cnfn get_image_channel_order(read_write image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
  * Return the 2D image width and height as an int2
@@ -16252,7 +16248,7 @@ int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_t image);
 int2 __ovld __cnfn get_image_dim(write_only image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int2 __ovld __cnfn get_image_dim(read_write image2d_t image);
 int2 __ovld __cnfn get_image_dim(read_write image2d_array_t image);
 #ifdef cl_khr_depth_images
@@ -16265,7 +16261,7 @@ int2 __ovld __cnfn get_image_dim(read_write image2d_msaa_depth_t image);
 int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_t image);
 int2 __ovld __cnfn get_image_dim(read_write image2d_array_msaa_depth_t image);
 #endif //cl_khr_gl_msaa_sharing
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
  * Return the 3D image width, height, and depth as an
@@ -16277,9 +16273,9 @@ int4 __ovld __cnfn get_image_dim(read_only image3d_t image);
 #ifdef cl_khr_3d_image_writes
 int4 __ovld __cnfn get_image_dim(write_only image3d_t image);
 #endif
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int4 __ovld __cnfn get_image_dim(read_write image3d_t image);
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
  * Return the image array size.
@@ -16305,7 +16301,7 @@ size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_t image_
 size_t __ovld __cnfn get_image_array_size(write_only image2d_array_msaa_depth_t image_array);
 #endif //cl_khr_gl_msaa_sharing
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 size_t __ovld __cnfn get_image_array_size(read_write image1d_array_t image_array);
 size_t __ovld __cnfn get_image_array_size(read_write image2d_array_t image_array);
 #ifdef cl_khr_depth_images
@@ -16315,7 +16311,7 @@ size_t __ovld __cnfn get_image_array_size(read_write image2d_array_depth_t image
 size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_t image_array);
 size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t image_array);
 #endif //cl_khr_gl_msaa_sharing
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 
 /**
 * Return the number of samples associated with image
@@ -16331,12 +16327,12 @@ int __ovld get_image_num_samples(write_only image2d_msaa_depth_t image);
 int __ovld get_image_num_samples(write_only image2d_array_msaa_t image);
 int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image);
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 int __ovld get_image_num_samples(read_write image2d_msaa_t image);
 int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image);
 int __ovld get_image_num_samples(read_write image2d_array_msaa_t image);
 int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image);
-#endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif //defined(__opencl_c_read_write_images)
 #endif
 
 // OpenCL v2.0 s6.13.15 - Work-group Functions
@@ -17572,34 +17568,38 @@ uint16  __ovld __conv intel_sub_group_shuffle_xor( uint16 x, uint c );
 long    __ovld __conv intel_sub_group_shuffle_xor( long x, uint c );
 ulong   __ovld __conv intel_sub_group_shuffle_xor( ulong x, uint c );
 
+#if defined(__opencl_c_images)
 uint    __ovld __conv intel_sub_group_block_read( read_only image2d_t image, int2 coord );
 uint2   __ovld __conv intel_sub_group_block_read2( read_only image2d_t image, int2 coord );
 uint4   __ovld __conv intel_sub_group_block_read4( read_only image2d_t image, int2 coord );
 uint8   __ovld __conv intel_sub_group_block_read8( read_only image2d_t image, int2 coord );
+#endif
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 uint    __ovld __conv intel_sub_group_block_read(read_write image2d_t image, int2 coord);
 uint2   __ovld __conv intel_sub_group_block_read2(read_write image2d_t image, int2 coord);
 uint4   __ovld __conv intel_sub_group_block_read4(read_write image2d_t image, int2 coord);
 uint8   __ovld __conv intel_sub_group_block_read8(read_write image2d_t image, int2 coord);
-#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif // defined(__opencl_c_read_write_images)
 
 uint    __ovld __conv intel_sub_group_block_read( const __global uint* p );
 uint2   __ovld __conv intel_sub_group_block_read2( const __global uint* p );
 uint4   __ovld __conv intel_sub_group_block_read4( const __global uint* p );
 uint8   __ovld __conv intel_sub_group_block_read8( const __global uint* p );
 
+#if defined(__opencl_c_images)
 void    __ovld __conv intel_sub_group_block_write(write_only image2d_t image, int2 coord, uint data);
 void    __ovld __conv intel_sub_group_block_write2(write_only image2d_t image, int2 coord, uint2 data);
 void    __ovld __conv intel_sub_group_block_write4(write_only image2d_t image, int2 coord, uint4 data);
 void    __ovld __conv intel_sub_group_block_write8(write_only image2d_t image, int2 coord, uint8 data);
+#endif // defined(__opencl_c_images)
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 void    __ovld __conv intel_sub_group_block_write(read_write image2d_t image, int2 coord, uint data);
 void    __ovld __conv intel_sub_group_block_write2(read_write image2d_t image, int2 coord, uint2 data);
 void    __ovld __conv intel_sub_group_block_write4(read_write image2d_t image, int2 coord, uint4 data);
 void    __ovld __conv intel_sub_group_block_write8(read_write image2d_t image, int2 coord, uint8 data);
-#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif // defined(__opencl_c_read_write_images)
 
 void    __ovld __conv intel_sub_group_block_write( __global uint* p, uint data );
 void    __ovld __conv intel_sub_group_block_write2( __global uint* p, uint2 data );
@@ -17712,68 +17712,76 @@ ushort      __ovld __conv intel_sub_group_scan_inclusive_min( ushort  x );
 short       __ovld __conv intel_sub_group_scan_inclusive_max( short   x );
 ushort      __ovld __conv intel_sub_group_scan_inclusive_max( ushort  x );
 
+#if defined(__opencl_c_images)
 uint       __ovld __conv intel_sub_group_block_read_ui( read_only image2d_t image, int2 byte_coord );
 uint2      __ovld __conv intel_sub_group_block_read_ui2( read_only image2d_t image, int2 byte_coord );
 uint4      __ovld __conv intel_sub_group_block_read_ui4( read_only image2d_t image, int2 byte_coord );
 uint8      __ovld __conv intel_sub_group_block_read_ui8( read_only image2d_t image, int2 byte_coord );
+#endif // defined(__opencl_c_images)
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 uint       __ovld __conv intel_sub_group_block_read_ui( read_write image2d_t image, int2 byte_coord );
 uint2      __ovld __conv intel_sub_group_block_read_ui2( read_write image2d_t image, int2 byte_coord );
 uint4      __ovld __conv intel_sub_group_block_read_ui4( read_write image2d_t image, int2 byte_coord );
 uint8      __ovld __conv intel_sub_group_block_read_ui8( read_write image2d_t image, int2 byte_coord );
-#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif // defined(__opencl_c_read_write_images)
 
 uint       __ovld __conv intel_sub_group_block_read_ui( const __global uint* p );
 uint2      __ovld __conv intel_sub_group_block_read_ui2( const __global uint* p );
 uint4      __ovld __conv intel_sub_group_block_read_ui4( const __global uint* p );
 uint8      __ovld __conv intel_sub_group_block_read_ui8( const __global uint* p );
 
+#if defined(__opencl_c_images)
 void       __ovld __conv intel_sub_group_block_write_ui( read_only image2d_t image, int2 byte_coord, uint data );
 void       __ovld __conv intel_sub_group_block_write_ui2( read_only image2d_t image, int2 byte_coord, uint2 data );
 void       __ovld __conv intel_sub_group_block_write_ui4( read_only image2d_t image, int2 byte_coord, uint4 data );
 void       __ovld __conv intel_sub_group_block_write_ui8( read_only image2d_t image, int2 byte_coord, uint8 data );
+#endif //defined(__opencl_c_images)
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 void       __ovld __conv intel_sub_group_block_write_ui( read_write image2d_t image, int2 byte_coord, uint data );
 void       __ovld __conv intel_sub_group_block_write_ui2( read_write image2d_t image, int2 byte_coord, uint2 data );
 void       __ovld __conv intel_sub_group_block_write_ui4( read_write image2d_t image, int2 byte_coord, uint4 data );
 void       __ovld __conv intel_sub_group_block_write_ui8( read_write image2d_t image, int2 byte_coord, uint8 data );
-#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif // defined(__opencl_c_read_write_images)
 
 void       __ovld __conv intel_sub_group_block_write_ui( __global uint* p, uint data );
 void       __ovld __conv intel_sub_group_block_write_ui2( __global uint* p, uint2 data );
 void       __ovld __conv intel_sub_group_block_write_ui4( __global uint* p, uint4 data );
 void       __ovld __conv intel_sub_group_block_write_ui8( __global uint* p, uint8 data );
 
+#if defined(__opencl_c_images)
 ushort      __ovld __conv intel_sub_group_block_read_us( read_only image2d_t image, int2 coord );
 ushort2     __ovld __conv intel_sub_group_block_read_us2( read_only image2d_t image, int2 coord );
 ushort4     __ovld __conv intel_sub_group_block_read_us4( read_only image2d_t image, int2 coord );
 ushort8     __ovld __conv intel_sub_group_block_read_us8( read_only image2d_t image, int2 coord );
+#endif // defined(__opencl_c_images)
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 ushort      __ovld __conv intel_sub_group_block_read_us(read_write image2d_t image, int2 coord);
 ushort2     __ovld __conv intel_sub_group_block_read_us2(read_write image2d_t image, int2 coord);
 ushort4     __ovld __conv intel_sub_group_block_read_us4(read_write image2d_t image, int2 coord);
 ushort8     __ovld __conv intel_sub_group_block_read_us8(read_write image2d_t image, int2 coord);
-#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif // defined(__opencl_c_read_write_images)
 
 ushort      __ovld __conv intel_sub_group_block_read_us(  const __global ushort* p );
 ushort2     __ovld __conv intel_sub_group_block_read_us2( const __global ushort* p );
 ushort4     __ovld __conv intel_sub_group_block_read_us4( const __global ushort* p );
 ushort8     __ovld __conv intel_sub_group_block_read_us8( const __global ushort* p );
 
+#if defined(__opencl_c_images)
 void        __ovld __conv intel_sub_group_block_write_us(write_only image2d_t image, int2 coord, ushort  data);
 void        __ovld __conv intel_sub_group_block_write_us2(write_only image2d_t image, int2 coord, ushort2 data);
 void        __ovld __conv intel_sub_group_block_write_us4(write_only image2d_t image, int2 coord, ushort4 data);
 void        __ovld __conv intel_sub_group_block_write_us8(write_only image2d_t image, int2 coord, ushort8 data);
+#endif // defined(__opencl_c_images)
 
-#if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#if defined(__opencl_c_read_write_images)
 void        __ovld __conv intel_sub_group_block_write_us(read_write image2d_t image, int2 coord, ushort  data);
 void        __ovld __conv intel_sub_group_block_write_us2(read_write image2d_t image, int2 coord, ushort2 data);
 void        __ovld __conv intel_sub_group_block_write_us4(read_write image2d_t image, int2 coord, ushort4 data);
 void        __ovld __conv intel_sub_group_block_write_us8(read_write image2d_t image, int2 coord, ushort8 data);
-#endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
+#endif // defined(__opencl_c_read_write_images)
 
 void        __ovld __conv intel_sub_group_block_write_us(  __global ushort* p, ushort  data );
 void        __ovld __conv intel_sub_group_block_write_us2( __global ushort* p, ushort2 data );
@@ -17891,6 +17899,7 @@ short2 __ovld intel_sub_group_avc_ime_adjust_ref_offset(
     short2 ref_offset, ushort2 src_coord, ushort2 ref_window_size,
     ushort2 image_size);
 
+#if defined(__opencl_c_images)
 intel_sub_group_avc_ime_result_t __ovld
 intel_sub_group_avc_ime_evaluate_with_single_reference(
     read_only image2d_t src_image, read_only image2d_t ref_image,
@@ -17931,6 +17940,7 @@ intel_sub_group_avc_ime_evaluate_with_dual_reference_streaminout(
     read_only image2d_t bwd_ref_image, sampler_t vme_media_sampler,
     intel_sub_group_avc_ime_payload_t payload,
     intel_sub_group_avc_ime_dual_reference_streamin_t streamin_components);
+#endif
 
 intel_sub_group_avc_ime_single_reference_streamin_t __ovld
 intel_sub_group_avc_ime_get_single_reference_streamin(
@@ -17995,6 +18005,7 @@ intel_sub_group_avc_ref_payload_t __ovld
 intel_sub_group_avc_ref_set_bilinear_filter_enable(
     intel_sub_group_avc_ref_payload_t payload);
 
+#if defined(__opencl_c_images)
 intel_sub_group_avc_ref_result_t __ovld
 intel_sub_group_avc_ref_evaluate_with_single_reference(
     read_only image2d_t src_image, read_only image2d_t ref_image,
@@ -18013,6 +18024,7 @@ intel_sub_group_avc_ref_evaluate_with_multi_reference(
     read_only image2d_t src_image, uint packed_reference_ids,
     uchar packed_reference_field_polarities, sampler_t vme_media_sampler,
     intel_sub_group_avc_ref_payload_t payload);
+#endif //defined(__opencl_c_images)
 
 // SIC built-in functions
 intel_sub_group_avc_sic_payload_t __ovld
@@ -18063,6 +18075,7 @@ intel_sub_group_avc_sic_set_block_based_raw_skip_sad(
     uchar block_based_skip_type,
     intel_sub_group_avc_sic_payload_t payload);
 
+#if defined(__opencl_c_images)
 intel_sub_group_avc_sic_result_t __ovld
 intel_sub_group_avc_sic_evaluate_ipe(
     read_only image2d_t src_image, sampler_t vme_media_sampler,
@@ -18085,6 +18098,7 @@ intel_sub_group_avc_sic_evaluate_with_multi_reference(
     read_only image2d_t src_image, uint packed_reference_ids,
     uchar packed_reference_field_polarities, sampler_t vme_media_sampler,
     intel_sub_group_avc_sic_payload_t payload);
+#endif //defined(__opencl_c_images)
 
 uchar __ovld intel_sub_group_avc_sic_get_ipe_luma_shape(
     intel_sub_group_avc_sic_result_t result);

From d3b188a2d72f9c398e4b1a36d23888c4ac783e9f Mon Sep 17 00:00:00 2001
From: Fabian Wolff <fabian.wolff@alumni.ethz.ch>
Date: Thu, 20 Jan 2022 22:51:53 +0100
Subject: [PATCH 107/946] [clang-tidy] Include constructor initializers in
 `bugprone-exception-escape` check

Fixes PR#52435.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D113507
---
 .../clang-tidy/utils/ExceptionAnalyzer.cpp    | 18 ++++++++
 .../checkers/bugprone-exception-escape.cpp    | 43 +++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp b/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp
index 97895115a7d5c..1f22c1d936561 100644
--- a/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp
+++ b/clang-tools-extra/clang-tidy/utils/ExceptionAnalyzer.cpp
@@ -119,6 +119,16 @@ ExceptionAnalyzer::ExceptionInfo ExceptionAnalyzer::throwsException(
     CallStack.insert(Func);
     ExceptionInfo Result =
         throwsException(Body, ExceptionInfo::Throwables(), CallStack);
+
+    // For a constructor, we also have to check the initializers.
+    if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(Func)) {
+      for (const CXXCtorInitializer *Init : Ctor->inits()) {
+        ExceptionInfo Excs = throwsException(
+            Init->getInit(), ExceptionInfo::Throwables(), CallStack);
+        Result.merge(Excs);
+      }
+    }
+
     CallStack.erase(Func);
     return Result;
   }
@@ -195,6 +205,14 @@ ExceptionAnalyzer::ExceptionInfo ExceptionAnalyzer::throwsException(
       ExceptionInfo Excs = throwsException(Func, CallStack);
       Results.merge(Excs);
     }
+  } else if (const auto *Construct = dyn_cast<CXXConstructExpr>(St)) {
+    ExceptionInfo Excs =
+        throwsException(Construct->getConstructor(), CallStack);
+    Results.merge(Excs);
+  } else if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(St)) {
+    ExceptionInfo Excs =
+        throwsException(DefaultInit->getExpr(), Caught, CallStack);
+    Results.merge(Excs);
   } else {
     for (const Stmt *Child : St->children()) {
       ExceptionInfo Excs = throwsException(Child, Caught, CallStack);
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-exception-escape.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-exception-escape.cpp
index ebb44f84f67cc..769064d74adc5 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-exception-escape.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-exception-escape.cpp
@@ -288,6 +288,49 @@ int indirectly_recursive(int n) noexcept {
   return recursion_helper(n);
 }
 
+struct super_throws {
+  super_throws() noexcept(false) { throw 42; }
+};
+
+struct sub_throws : super_throws {
+  sub_throws() noexcept : super_throws() {}
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: an exception may be thrown in function 'sub_throws' which should not throw exceptions
+};
+
+struct super_throws_again {
+  super_throws_again() throw(int);
+};
+
+struct sub_throws_again : super_throws_again {
+  sub_throws_again() noexcept : super_throws_again() {}
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: an exception may be thrown in function 'sub_throws_again' which should not throw exceptions
+};
+
+struct init_member_throws {
+  super_throws s;
+
+  init_member_throws() noexcept : s() {}
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: an exception may be thrown in function 'init_member_throws' which should not throw exceptions
+};
+
+struct implicit_init_member_throws {
+  super_throws s;
+
+  implicit_init_member_throws() noexcept {}
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: an exception may be thrown in function 'implicit_init_member_throws' which should not throw exceptions
+};
+
+struct init {
+  explicit init(int, int) noexcept(false) { throw 42; }
+};
+
+struct in_class_init_throws {
+  init i{1, 2};
+
+  in_class_init_throws() noexcept {}
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: an exception may be thrown in function 'in_class_init_throws' which should not throw exceptions
+};
+
 int main() {
   // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: an exception may be thrown in function 'main' which should not throw exceptions
   throw 1;

From 7bf9065ad7d9f6c908ab109fa85bbc36ae6a1130 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Thu, 20 Jan 2022 18:04:32 -0600
Subject: [PATCH 108/946] [Attributor][NFC] Clang format

---
 llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 1a7763cbf49c0..71caa027498b7 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -6007,9 +6007,10 @@ struct AAHeapToStackFunction final : public AAHeapToStack {
 
   Optional<APInt> getSize(Attributor &A, const AbstractAttribute &AA,
                           AllocationInfo &AI) {
-    auto Mapper = [&](const Value *V) -> const Value* {
+    auto Mapper = [&](const Value *V) -> const Value * {
       bool UsedAssumedInformation = false;
-      if (Optional<Constant *> SimpleV = A.getAssumedConstant(*V, AA, UsedAssumedInformation))
+      if (Optional<Constant *> SimpleV =
+              A.getAssumedConstant(*V, AA, UsedAssumedInformation))
         if (*SimpleV)
           return *SimpleV;
       return V;
@@ -6266,8 +6267,7 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
       if (!Size.hasValue() || Size.getValue().ugt(MaxHeapToStackSize)) {
         LLVM_DEBUG({
           if (!Size.hasValue())
-            dbgs() << "[H2S] Unknown allocation size: " << *AI.CB
-                   << "\n";
+            dbgs() << "[H2S] Unknown allocation size: " << *AI.CB << "\n";
           else
             dbgs() << "[H2S] Allocation size too large: " << *AI.CB << " vs. "
                    << MaxHeapToStackSize << "\n";

From 37e0c58559ad367f16960fd65c8efb7bb1a6414f Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Thu, 20 Jan 2022 17:52:41 -0600
Subject: [PATCH 109/946] [Attributor][FIX] AAValueConstantRange should not
 loop unconstrained

The old method to avoid unconstrained expansion of the constant range in
a loop did not work as soon as there were multiple instructions in
between the phi and its input. We now take a generic approach and limit
the number of updates as a fallback. The old method is kept as it
catches "the common case" early.
---
 .../Transforms/IPO/AttributorAttributes.cpp   |  19 +-
 llvm/test/Transforms/Attributor/range.ll      | 433 +++++++-----------
 2 files changed, 195 insertions(+), 257 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 71caa027498b7..0723402c19ee0 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -8497,13 +8497,30 @@ struct AAValueConstantRangeFloating : AAValueConstantRangeImpl {
                                                   /* UseValueSimplify */ false))
       return indicatePessimisticFixpoint();
 
-    return clampStateAndIndicateChange(getState(), T);
+    // Ensure that long def-use chains can't cause circular reasoning either by
+    // introducing a cutoff below.
+    if (clampStateAndIndicateChange(getState(), T) == ChangeStatus::UNCHANGED)
+      return ChangeStatus::UNCHANGED;
+    if (++NumChanges > MaxNumChanges) {
+      LLVM_DEBUG(dbgs() << "[AAValueConstantRange] performed " << NumChanges
+                        << " but only " << MaxNumChanges
+                        << " are allowed to avoid cyclic reasoning.");
+      return indicatePessimisticFixpoint();
+    }
+    return ChangeStatus::CHANGED;
   }
 
   /// See AbstractAttribute::trackStatistics()
   void trackStatistics() const override {
     STATS_DECLTRACK_FLOATING_ATTR(value_range)
   }
+
+  /// Tracker to bail after too many widening steps of the constant range.
+  int NumChanges = 0;
+
+  /// Upper bound for the number of allowed changes (=widening steps) for the
+  /// constant range before we give up.
+  static constexpr int MaxNumChanges = 5;
 };
 
 struct AAValueConstantRangeFunction : AAValueConstantRangeImpl {
diff --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll
index 58b888a767d97..8c120acbb8900 100644
--- a/llvm/test/Transforms/Attributor/range.ll
+++ b/llvm/test/Transforms/Attributor/range.ll
@@ -24,29 +24,17 @@ define i32 @test0(i32* %p) {
 }
 
 define i32 @test0-range-check(i32* %p) {
-; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test0-range-check
-; IS__TUNIT_OPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] {
-; IS__TUNIT_OPM-NEXT:    [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3:[0-9]+]], !range [[RNG0]]
-; IS__TUNIT_OPM-NEXT:    ret i32 [[A]]
-;
-; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test0-range-check
-; IS__TUNIT_NPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] {
-; IS__TUNIT_NPM-NEXT:    [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR2:[0-9]+]], !range [[RNG0]]
-; IS__TUNIT_NPM-NEXT:    ret i32 [[A]]
-;
-; IS__CGSCC_OPM: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test0-range-check
-; IS__CGSCC_OPM-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_OPM-NEXT:    [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR3:[0-9]+]], !range [[RNG0]]
-; IS__CGSCC_OPM-NEXT:    ret i32 [[A]]
+; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test0-range-check
+; IS__TUNIT____-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] {
+; IS__TUNIT____-NEXT:    [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3:[0-9]+]], !range [[RNG0]]
+; IS__TUNIT____-NEXT:    ret i32 [[A]]
 ;
-; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test0-range-check
-; IS__CGSCC_NPM-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR2:[0-9]+]], !range [[RNG0]]
-; IS__CGSCC_NPM-NEXT:    ret i32 [[A]]
+; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
+; IS__CGSCC____-LABEL: define {{[^@]+}}@test0-range-check
+; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:    [[A:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR3:[0-9]+]], !range [[RNG0]]
+; IS__CGSCC____-NEXT:    ret i32 [[A]]
 ;
   %a = tail call i32 @test0(i32* %p)
   ret i32 %a
@@ -66,193 +54,99 @@ define void @use3(i1, i1, i1) {
 ; TEST0 icmp test
 define void @test0-icmp-check(i32* %p){
   ; ret = [0, 10)
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test0-icmp-check
-; IS__TUNIT_OPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) {
-; IS__TUNIT_OPM-NEXT:    [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3]], !range [[RNG0]]
-; IS__TUNIT_OPM-NEXT:    [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
-; IS__TUNIT_OPM-NEXT:    [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 noundef false)
-; IS__TUNIT_OPM-NEXT:    [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9
-; IS__TUNIT_OPM-NEXT:    [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 noundef true)
-; IS__TUNIT_OPM-NEXT:    [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_UGT_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 noundef false)
-; IS__TUNIT_OPM-NEXT:    [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9
-; IS__TUNIT_OPM-NEXT:    [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_UGE_4]], i1 noundef true, i1 noundef false)
-; IS__TUNIT_OPM-NEXT:    [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_SGT_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 noundef true)
-; IS__TUNIT_OPM-NEXT:    [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9
-; IS__TUNIT_OPM-NEXT:    [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_GTE_4]], i1 noundef true, i1 noundef true)
-; IS__TUNIT_OPM-NEXT:    [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9
-; IS__TUNIT_OPM-NEXT:    [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_SLT_4]], i1 noundef false, i1 noundef false)
-; IS__TUNIT_OPM-NEXT:    [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8
-; IS__TUNIT_OPM-NEXT:    [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1
-; IS__TUNIT_OPM-NEXT:    [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 noundef true, i1 noundef true, i1 [[CMP_LTE_3]])
-; IS__TUNIT_OPM-NEXT:    tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 noundef false)
-; IS__TUNIT_OPM-NEXT:    ret void
-;
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test0-icmp-check
-; IS__TUNIT_NPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) {
-; IS__TUNIT_NPM-NEXT:    [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR2]], !range [[RNG0]]
-; IS__TUNIT_NPM-NEXT:    [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
-; IS__TUNIT_NPM-NEXT:    [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 noundef false)
-; IS__TUNIT_NPM-NEXT:    [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9
-; IS__TUNIT_NPM-NEXT:    [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 noundef true)
-; IS__TUNIT_NPM-NEXT:    [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_UGT_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 noundef false)
-; IS__TUNIT_NPM-NEXT:    [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9
-; IS__TUNIT_NPM-NEXT:    [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_UGE_4]], i1 noundef true, i1 noundef false)
-; IS__TUNIT_NPM-NEXT:    [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_SGT_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 noundef true)
-; IS__TUNIT_NPM-NEXT:    [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9
-; IS__TUNIT_NPM-NEXT:    [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_GTE_4]], i1 noundef true, i1 noundef true)
-; IS__TUNIT_NPM-NEXT:    [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9
-; IS__TUNIT_NPM-NEXT:    [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_SLT_4]], i1 noundef false, i1 noundef false)
-; IS__TUNIT_NPM-NEXT:    [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8
-; IS__TUNIT_NPM-NEXT:    [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1
-; IS__TUNIT_NPM-NEXT:    [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 noundef true, i1 noundef true, i1 [[CMP_LTE_3]])
-; IS__TUNIT_NPM-NEXT:    tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 noundef false)
-; IS__TUNIT_NPM-NEXT:    ret void
-;
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test0-icmp-check
-; IS__CGSCC_OPM-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) {
-; IS__CGSCC_OPM-NEXT:    [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR3]], !range [[RNG0]]
-; IS__CGSCC_OPM-NEXT:    [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
-; IS__CGSCC_OPM-NEXT:    [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 noundef false)
-; IS__CGSCC_OPM-NEXT:    [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9
-; IS__CGSCC_OPM-NEXT:    [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 noundef true)
-; IS__CGSCC_OPM-NEXT:    [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_UGT_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 noundef false)
-; IS__CGSCC_OPM-NEXT:    [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9
-; IS__CGSCC_OPM-NEXT:    [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_UGE_4]], i1 noundef true, i1 noundef false)
-; IS__CGSCC_OPM-NEXT:    [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_SGT_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 noundef true)
-; IS__CGSCC_OPM-NEXT:    [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9
-; IS__CGSCC_OPM-NEXT:    [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_GTE_4]], i1 noundef true, i1 noundef true)
-; IS__CGSCC_OPM-NEXT:    [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9
-; IS__CGSCC_OPM-NEXT:    [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_SLT_4]], i1 noundef false, i1 noundef false)
-; IS__CGSCC_OPM-NEXT:    [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8
-; IS__CGSCC_OPM-NEXT:    [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1
-; IS__CGSCC_OPM-NEXT:    [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 noundef true, i1 noundef true, i1 [[CMP_LTE_3]])
-; IS__CGSCC_OPM-NEXT:    tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 noundef false)
-; IS__CGSCC_OPM-NEXT:    ret void
-;
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test0-icmp-check
-; IS__CGSCC_NPM-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) {
-; IS__CGSCC_NPM-NEXT:    [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR2]], !range [[RNG0]]
-; IS__CGSCC_NPM-NEXT:    [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
-; IS__CGSCC_NPM-NEXT:    [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 noundef false)
-; IS__CGSCC_NPM-NEXT:    [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9
-; IS__CGSCC_NPM-NEXT:    [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 noundef true)
-; IS__CGSCC_NPM-NEXT:    [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_UGT_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 noundef false)
-; IS__CGSCC_NPM-NEXT:    [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9
-; IS__CGSCC_NPM-NEXT:    [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_UGE_4]], i1 noundef true, i1 noundef false)
-; IS__CGSCC_NPM-NEXT:    [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_SGT_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 noundef true)
-; IS__CGSCC_NPM-NEXT:    [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9
-; IS__CGSCC_NPM-NEXT:    [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_GTE_4]], i1 noundef true, i1 noundef true)
-; IS__CGSCC_NPM-NEXT:    [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9
-; IS__CGSCC_NPM-NEXT:    [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_SLT_4]], i1 noundef false, i1 noundef false)
-; IS__CGSCC_NPM-NEXT:    [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8
-; IS__CGSCC_NPM-NEXT:    [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1
-; IS__CGSCC_NPM-NEXT:    [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 noundef true, i1 noundef true, i1 [[CMP_LTE_3]])
-; IS__CGSCC_NPM-NEXT:    tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 noundef false)
-; IS__CGSCC_NPM-NEXT:    ret void
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test0-icmp-check
+; IS__TUNIT____-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) {
+; IS__TUNIT____-NEXT:    [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3]], !range [[RNG0]]
+; IS__TUNIT____-NEXT:    [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
+; IS__TUNIT____-NEXT:    [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 noundef false)
+; IS__TUNIT____-NEXT:    [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9
+; IS__TUNIT____-NEXT:    [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 noundef true)
+; IS__TUNIT____-NEXT:    [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_UGT_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 noundef false)
+; IS__TUNIT____-NEXT:    [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9
+; IS__TUNIT____-NEXT:    [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_UGE_4]], i1 noundef true, i1 noundef false)
+; IS__TUNIT____-NEXT:    [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_SGT_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 noundef true)
+; IS__TUNIT____-NEXT:    [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9
+; IS__TUNIT____-NEXT:    [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_GTE_4]], i1 noundef true, i1 noundef true)
+; IS__TUNIT____-NEXT:    [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9
+; IS__TUNIT____-NEXT:    [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_SLT_4]], i1 noundef false, i1 noundef false)
+; IS__TUNIT____-NEXT:    [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8
+; IS__TUNIT____-NEXT:    [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1
+; IS__TUNIT____-NEXT:    [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 noundef true, i1 noundef true, i1 [[CMP_LTE_3]])
+; IS__TUNIT____-NEXT:    tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 noundef false)
+; IS__TUNIT____-NEXT:    ret void
+;
+; IS__CGSCC____-LABEL: define {{[^@]+}}@test0-icmp-check
+; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) {
+; IS__CGSCC____-NEXT:    [[RET:%.*]] = tail call i32 @test0(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR3]], !range [[RNG0]]
+; IS__CGSCC____-NEXT:    [[CMP_EQ_2:%.*]] = icmp eq i32 [[RET]], 9
+; IS__CGSCC____-NEXT:    [[CMP_EQ_3:%.*]] = icmp eq i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_EQ_4:%.*]] = icmp eq i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    [[CMP_EQ_5:%.*]] = icmp eq i32 [[RET]], 0
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_EQ_2]], i1 [[CMP_EQ_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_EQ_4]], i1 [[CMP_EQ_5]], i1 noundef false)
+; IS__CGSCC____-NEXT:    [[CMP_NE_2:%.*]] = icmp ne i32 [[RET]], 9
+; IS__CGSCC____-NEXT:    [[CMP_NE_3:%.*]] = icmp ne i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_NE_4:%.*]] = icmp ne i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    [[CMP_NE_5:%.*]] = icmp ne i32 [[RET]], 0
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_NE_2]], i1 [[CMP_NE_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_NE_4]], i1 [[CMP_NE_5]], i1 noundef true)
+; IS__CGSCC____-NEXT:    [[CMP_UGT_3:%.*]] = icmp ugt i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_UGT_4:%.*]] = icmp ugt i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    [[CMP_UGT_5:%.*]] = icmp ugt i32 [[RET]], 0
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_UGT_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_UGT_4]], i1 [[CMP_UGT_5]], i1 noundef false)
+; IS__CGSCC____-NEXT:    [[CMP_UGE_2:%.*]] = icmp uge i32 [[RET]], 9
+; IS__CGSCC____-NEXT:    [[CMP_UGE_3:%.*]] = icmp uge i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_UGE_4:%.*]] = icmp uge i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_UGE_2]], i1 [[CMP_UGE_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_UGE_4]], i1 noundef true, i1 noundef false)
+; IS__CGSCC____-NEXT:    [[CMP_SGT_3:%.*]] = icmp sgt i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_SGT_4:%.*]] = icmp sgt i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    [[CMP_SGT_5:%.*]] = icmp sgt i32 [[RET]], 0
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef false, i1 noundef false, i1 [[CMP_SGT_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_SGT_4]], i1 [[CMP_SGT_5]], i1 noundef true)
+; IS__CGSCC____-NEXT:    [[CMP_GTE_2:%.*]] = icmp sge i32 [[RET]], 9
+; IS__CGSCC____-NEXT:    [[CMP_GTE_3:%.*]] = icmp sge i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_GTE_4:%.*]] = icmp sge i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef false, i1 [[CMP_GTE_2]], i1 [[CMP_GTE_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_GTE_4]], i1 noundef true, i1 noundef true)
+; IS__CGSCC____-NEXT:    [[CMP_SLT_2:%.*]] = icmp slt i32 [[RET]], 9
+; IS__CGSCC____-NEXT:    [[CMP_SLT_3:%.*]] = icmp slt i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_SLT_4:%.*]] = icmp slt i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef true, i1 [[CMP_SLT_2]], i1 [[CMP_SLT_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_SLT_4]], i1 noundef false, i1 noundef false)
+; IS__CGSCC____-NEXT:    [[CMP_LTE_3:%.*]] = icmp sle i32 [[RET]], 8
+; IS__CGSCC____-NEXT:    [[CMP_LTE_4:%.*]] = icmp sle i32 [[RET]], 1
+; IS__CGSCC____-NEXT:    [[CMP_LTE_5:%.*]] = icmp sle i32 [[RET]], 0
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 noundef true, i1 noundef true, i1 [[CMP_LTE_3]])
+; IS__CGSCC____-NEXT:    tail call void @use3(i1 [[CMP_LTE_4]], i1 [[CMP_LTE_5]], i1 noundef false)
+; IS__CGSCC____-NEXT:    ret void
 ;
   %ret = tail call i32 @test0(i32 *%p)
 
@@ -363,33 +257,19 @@ define i32 @test1(i32* %p) {
 
 define i1 @test1-check(i32* %p) {
 ;
-; IS__TUNIT_OPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@test1-check
-; IS__TUNIT_OPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] {
-; IS__TUNIT_OPM-NEXT:    [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3]], !range [[RNG2:![0-9]+]]
-; IS__TUNIT_OPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RES]], 500
-; IS__TUNIT_OPM-NEXT:    ret i1 [[CMP]]
-;
-; IS__TUNIT_NPM: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@test1-check
-; IS__TUNIT_NPM-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] {
-; IS__TUNIT_NPM-NEXT:    [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR2]], !range [[RNG2:![0-9]+]]
-; IS__TUNIT_NPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RES]], 500
-; IS__TUNIT_NPM-NEXT:    ret i1 [[CMP]]
-;
-; IS__CGSCC_OPM: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test1-check
-; IS__CGSCC_OPM-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_OPM-NEXT:    [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR3]], !range [[RNG2:![0-9]+]]
-; IS__CGSCC_OPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RES]], 500
-; IS__CGSCC_OPM-NEXT:    ret i1 [[CMP]]
+; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
+; IS__TUNIT____-LABEL: define {{[^@]+}}@test1-check
+; IS__TUNIT____-SAME: (i32* nocapture nofree readonly align 4 [[P:%.*]]) #[[ATTR0]] {
+; IS__TUNIT____-NEXT:    [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree readonly align 4 [[P]]) #[[ATTR3]], !range [[RNG2:![0-9]+]]
+; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RES]], 500
+; IS__TUNIT____-NEXT:    ret i1 [[CMP]]
 ;
-; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test1-check
-; IS__CGSCC_NPM-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] {
-; IS__CGSCC_NPM-NEXT:    [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR2]], !range [[RNG2:![0-9]+]]
-; IS__CGSCC_NPM-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RES]], 500
-; IS__CGSCC_NPM-NEXT:    ret i1 [[CMP]]
+; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind readonly willreturn
+; IS__CGSCC____-LABEL: define {{[^@]+}}@test1-check
+; IS__CGSCC____-SAME: (i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P:%.*]]) #[[ATTR0]] {
+; IS__CGSCC____-NEXT:    [[RES:%.*]] = tail call i32 @test1(i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[P]]) #[[ATTR3]], !range [[RNG2:![0-9]+]]
+; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RES]], 500
+; IS__CGSCC____-NEXT:    ret i1 [[CMP]]
 ;
   %res = tail call i32 @test1(i32* %p)
   %cmp = icmp eq i32 %res, 500
@@ -698,7 +578,7 @@ define dso_local i32 @test4-g1(i32 %u) {
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test4-g1
 ; IS__CGSCC_NPM-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR3:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f1(i32 [[U]]) #[[ATTR4:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[CALL]]
 ;
 ; FIXME: %call should have range [0, inf]
@@ -790,7 +670,7 @@ define dso_local i32 @test4-g2(i32 %u) {
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test4-g2
 ; IS__CGSCC_NPM-SAME: (i32 [[U:%.*]]) #[[ATTR1]] {
 ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR3]], !range [[RNG3:![0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = tail call i32 @test4-f2(i32 [[U]]) #[[ATTR4]], !range [[RNG3:![0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[CALL]]
 ;
 entry:
@@ -1161,10 +1041,10 @@ define i1 @fcmp_caller(float %fa, float %fb, double %da, double %db, double* %dp
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fcmp_caller
 ; IS__CGSCC_NPM-SAME: (float [[FA:%.*]], float [[FB:%.*]], double [[DA:%.*]], double [[DB:%.*]], double* nofree readnone [[DPA:%.*]], double* nofree readnone [[DPB:%.*]], i8* nofree readnone [[IPA:%.*]], i8* nofree readnone [[IPB:%.*]]) #[[ATTR1]] {
-; IS__CGSCC_NPM-NEXT:    [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR3]]
-; IS__CGSCC_NPM-NEXT:    [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR3]]
-; IS__CGSCC_NPM-NEXT:    [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR3]]
-; IS__CGSCC_NPM-NEXT:    [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR3]]
+; IS__CGSCC_NPM-NEXT:    [[R1:%.*]] = call i1 @f_fcmp(float [[FA]], float [[FB]]) #[[ATTR4]]
+; IS__CGSCC_NPM-NEXT:    [[R2:%.*]] = call i1 @d_fcmp(double [[DA]], double [[DB]]) #[[ATTR4]]
+; IS__CGSCC_NPM-NEXT:    [[R3:%.*]] = call i1 @dp_icmp(double* noalias nofree readnone [[DPA]], double* noalias nofree readnone [[DPB]]) #[[ATTR4]]
+; IS__CGSCC_NPM-NEXT:    [[R4:%.*]] = call i1 @ip_icmp(i8* noalias nofree readnone [[IPA]], i8* noalias nofree readnone [[IPB]]) #[[ATTR4]]
 ; IS__CGSCC_NPM-NEXT:    [[O1:%.*]] = or i1 [[R1]], [[R2]]
 ; IS__CGSCC_NPM-NEXT:    [[O2:%.*]] = or i1 [[R3]], [[R4]]
 ; IS__CGSCC_NPM-NEXT:    [[O3:%.*]] = or i1 [[O1]], [[O2]]
@@ -1331,8 +1211,8 @@ define i1 @callee_range_2(i1 %c1, i1 %c2) {
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_range_2
 ; IS__CGSCC_NPM-SAME: (i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR1]] {
-; IS__CGSCC_NPM-NEXT:    [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR3]], !range [[RNG5:![0-9]+]]
-; IS__CGSCC_NPM-NEXT:    [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR3]], !range [[RNG5]]
+; IS__CGSCC_NPM-NEXT:    [[R1:%.*]] = call i32 @ret1or2(i1 [[C1]]) #[[ATTR4]], !range [[RNG5:![0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR4]], !range [[RNG5]]
 ; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = add i32 [[R1]], [[R2]]
 ; IS__CGSCC_NPM-NEXT:    [[I1:%.*]] = icmp sle i32 [[A]], 3
 ; IS__CGSCC_NPM-NEXT:    ret i1 [[I1]]
@@ -1489,10 +1369,10 @@ define i32 @simplify_callsite_argument(i1 %d) {
 ; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = select i1 [[D]], i1 true, i1 false
 ; IS__CGSCC_NPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__CGSCC_NPM:       t:
-; IS__CGSCC_NPM-NEXT:    [[RET1:%.*]] = call noundef i32 @func(i1 noundef true) #[[ATTR3]], !range [[RNG4]]
+; IS__CGSCC_NPM-NEXT:    [[RET1:%.*]] = call noundef i32 @func(i1 noundef true) #[[ATTR4]], !range [[RNG4]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[RET1]]
 ; IS__CGSCC_NPM:       f:
-; IS__CGSCC_NPM-NEXT:    [[RET2:%.*]] = call noundef i32 @func(i1 noundef false) #[[ATTR3]], !range [[RNG4]]
+; IS__CGSCC_NPM-NEXT:    [[RET2:%.*]] = call noundef i32 @func(i1 noundef false) #[[ATTR4]], !range [[RNG4]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[RET2]]
 ;
   %c = select i1 %d, i1 true, i1 false
@@ -1898,6 +1778,50 @@ bb3:                                              ; preds = %bb2, %bb1
   ret void
 }
 
+define i1 @loop_1(i32 %N) {
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone
+; IS__TUNIT____-LABEL: define {{[^@]+}}@loop_1
+; IS__TUNIT____-SAME: (i32 [[N:%.*]]) #[[ATTR2:[0-9]+]] {
+; IS__TUNIT____-NEXT:  entry:
+; IS__TUNIT____-NEXT:    br label [[HEADER:%.*]]
+; IS__TUNIT____:       header:
+; IS__TUNIT____-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[AND:%.*]], [[HEADER]] ]
+; IS__TUNIT____-NEXT:    [[INC:%.*]] = add i32 [[I]], 1
+; IS__TUNIT____-NEXT:    [[AND]] = and i32 [[INC]], 9999
+; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp ne i32 [[N]], [[AND]]
+; IS__TUNIT____-NEXT:    br i1 [[CMP]], label [[HEADER]], label [[EXIT:%.*]]
+; IS__TUNIT____:       exit:
+; IS__TUNIT____-NEXT:    [[R:%.*]] = icmp sle i32 [[I]], 5
+; IS__TUNIT____-NEXT:    ret i1 [[R]]
+;
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone
+; IS__CGSCC____-LABEL: define {{[^@]+}}@loop_1
+; IS__CGSCC____-SAME: (i32 [[N:%.*]]) #[[ATTR2:[0-9]+]] {
+; IS__CGSCC____-NEXT:  entry:
+; IS__CGSCC____-NEXT:    br label [[HEADER:%.*]]
+; IS__CGSCC____:       header:
+; IS__CGSCC____-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[AND:%.*]], [[HEADER]] ]
+; IS__CGSCC____-NEXT:    [[INC:%.*]] = add i32 [[I]], 1
+; IS__CGSCC____-NEXT:    [[AND]] = and i32 [[INC]], 9999
+; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp ne i32 [[N]], [[AND]]
+; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[HEADER]], label [[EXIT:%.*]]
+; IS__CGSCC____:       exit:
+; IS__CGSCC____-NEXT:    [[R:%.*]] = icmp sle i32 [[I]], 5
+; IS__CGSCC____-NEXT:    ret i1 [[R]]
+;
+entry:
+  br label %header
+header:
+  %i = phi i32 [0, %entry], [%and, %header]
+  %inc = add i32 %i, 1
+  %and = and i32 %inc, 9999
+  %cmp = icmp ne i32 %N, %and
+  br i1 %cmp, label %header, label %exit
+exit:
+  %r = icmp sle i32 %i, 5
+  ret i1 %r
+}
+
 declare void @ham(i32)
 
 declare void @barney(i32 signext, i32 signext)
@@ -1906,14 +1830,10 @@ declare void @barney(i32 signext, i32 signext)
 !0 = !{i32 0, i32 10}
 !1 = !{i32 10, i32 100}
 ;.
-; IS__TUNIT_OPM: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind readonly willreturn }
-; IS__TUNIT_OPM: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
-; IS__TUNIT_OPM: attributes #[[ATTR2]] = { nofree nosync nounwind readnone }
-; IS__TUNIT_OPM: attributes #[[ATTR3]] = { nofree nosync nounwind readonly willreturn }
-;.
-; IS__TUNIT_NPM: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind readonly willreturn }
-; IS__TUNIT_NPM: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
-; IS__TUNIT_NPM: attributes #[[ATTR2]] = { nofree nosync nounwind readonly willreturn }
+; IS__TUNIT____: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind readonly willreturn }
+; IS__TUNIT____: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn }
+; IS__TUNIT____: attributes #[[ATTR2]] = { nofree nosync nounwind readnone }
+; IS__TUNIT____: attributes #[[ATTR3]] = { nofree nosync nounwind readonly willreturn }
 ;.
 ; IS__CGSCC_OPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
 ; IS__CGSCC_OPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn }
@@ -1924,8 +1844,9 @@ declare void @barney(i32 signext, i32 signext)
 ;.
 ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn }
 ; IS__CGSCC_NPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR2]] = { readonly willreturn }
-; IS__CGSCC_NPM: attributes #[[ATTR3]] = { readnone willreturn }
+; IS__CGSCC_NPM: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone }
+; IS__CGSCC_NPM: attributes #[[ATTR3]] = { readonly willreturn }
+; IS__CGSCC_NPM: attributes #[[ATTR4]] = { readnone willreturn }
 ;.
 ; IS__TUNIT_OPM: [[RNG0]] = !{i32 0, i32 10}
 ; IS__TUNIT_OPM: [[RNG1]] = !{i32 10, i32 100}
@@ -1935,7 +1856,7 @@ declare void @barney(i32 signext, i32 signext)
 ;.
 ; IS________NPM: [[RNG0]] = !{i32 0, i32 10}
 ; IS________NPM: [[RNG1]] = !{i32 10, i32 100}
-; IS________NPM: [[META2:![0-9]+]] = !{i32 200, i32 1091}
+; IS________NPM: [[RNG2]] = !{i32 200, i32 1091}
 ; IS________NPM: [[META3:![0-9]+]] = !{i32 1, i32 -2147483648}
 ; IS________NPM: [[RNG4]] = !{i32 0, i32 2}
 ; IS________NPM: [[META5:![0-9]+]] = !{i32 1, i32 3}

From 1d7604fdcebda9cc431b7f19c2f4cb769efdca02 Mon Sep 17 00:00:00 2001
From: Pawe Bylica <chfast@gmail.com>
Date: Fri, 21 Jan 2022 00:56:38 +0100
Subject: [PATCH 110/946] [InstCombine] Simplify bswap -> shift

Simplify bswap(x) to shl(x) or lshr(x) if x has exactly one
"active byte", i.e. all active bits are contained in boundaries
of a single byte of x.

https://alive2.llvm.org/ce/z/nvbbU5
https://alive2.llvm.org/ce/z/KiiL3J

Reviewed By: spatel, craig.topper, lebedev.ri

Differential Revision: https://reviews.llvm.org/D117680
---
 .../InstCombine/InstCombineCalls.cpp          | 15 ++++++
 .../test/Transforms/InstCombine/bswap-fold.ll | 54 +++++++++----------
 2 files changed, 41 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index ad97e7265404c..e3a9e806abdba 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1215,6 +1215,21 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
     Value *IIOperand = II->getArgOperand(0);
     Value *X = nullptr;
 
+    KnownBits Known = computeKnownBits(IIOperand, 0, II);
+    uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
+    uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
+
+    // bswap(x) -> shift(x) if x has exactly one "active byte"
+    if (Known.getBitWidth() - LZ - TZ == 8) {
+      assert(LZ != TZ && "active byte cannot be in the middle");
+      if (LZ > TZ)  // -> shl(x) if the "active byte" is in the low part of x
+        return BinaryOperator::CreateNUWShl(
+            IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
+      // -> lshr(x) if the "active byte" is in the high part of x
+      return BinaryOperator::CreateExactLShr(
+            IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
+    }
+
     // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
     if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
       unsigned C = X->getType()->getScalarSizeInBits() -
diff --git a/llvm/test/Transforms/InstCombine/bswap-fold.ll b/llvm/test/Transforms/InstCombine/bswap-fold.ll
index 9ffae93bbbc7b..47083b815e9fe 100644
--- a/llvm/test/Transforms/InstCombine/bswap-fold.ll
+++ b/llvm/test/Transforms/InstCombine/bswap-fold.ll
@@ -358,9 +358,8 @@ define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 {
 
 define i64 @bs_active_high8(i64 %0) {
 ; CHECK-LABEL: @bs_active_high8(
-; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
+; CHECK-NEXT:    ret i64 [[TMP2]]
 ;
   %2 = shl i64 %0, 56
   %3 = call i64 @llvm.bswap.i64(i64 %2)
@@ -369,8 +368,8 @@ define i64 @bs_active_high8(i64 %0) {
 
 define i32 @bs_active_high7(i32 %0) {
 ; CHECK-LABEL: @bs_active_high7(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], -33554432
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 24
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 254
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %2 = and i32 %0, -33554432  ; 0xfe000000
@@ -380,8 +379,8 @@ define i32 @bs_active_high7(i32 %0) {
 
 define <2 x i64> @bs_active_high4(<2 x i64> %0) {
 ; CHECK-LABEL: @bs_active_high4(
-; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 60, i64 60>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 4, i64 4>
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], <i64 240, i64 240>
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %2 = shl <2 x i64> %0, <i64 60, i64 60>
@@ -392,7 +391,7 @@ define <2 x i64> @bs_active_high4(<2 x i64> %0) {
 define <2 x i64> @bs_active_high_different(<2 x i64> %0) {
 ; CHECK-LABEL: @bs_active_high_different(
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 56, i64 57>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], <i64 56, i64 56>
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %2 = shl <2 x i64> %0, <i64 56, i64 57>
@@ -427,7 +426,7 @@ define <2 x i64> @bs_active_high_undef(<2 x i64> %0) {
 define i64 @bs_active_high8_multiuse(i64 %0) {
 ; CHECK-LABEL: @bs_active_high8_multiuse(
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP0]], 255
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
@@ -440,7 +439,7 @@ define i64 @bs_active_high8_multiuse(i64 %0) {
 define i64 @bs_active_high7_multiuse(i64 %0) {
 ; CHECK-LABEL: @bs_active_high7_multiuse(
 ; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 57
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 56
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
@@ -452,8 +451,8 @@ define i64 @bs_active_high7_multiuse(i64 %0) {
 
 define i64 @bs_active_byte_6h(i64 %0) {
 ; CHECK-LABEL: @bs_active_byte_6h(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 280375465082880
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 24
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 16711680
 ; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %2 = and i64 %0, 280375465082880  ; 0xff00'00000000
@@ -463,8 +462,8 @@ define i64 @bs_active_byte_6h(i64 %0) {
 
 define i32 @bs_active_byte_3h(i32 %0) {
 ; CHECK-LABEL: @bs_active_byte_3h(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 393216
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1536
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %2 = and i32 %0, 393216  ; 0x0006'0000
@@ -475,7 +474,7 @@ define i32 @bs_active_byte_3h(i32 %0) {
 define <2 x i32> @bs_active_byte_3h_v2(<2 x i32> %0) {
 ; CHECK-LABEL: @bs_active_byte_3h_v2(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 8388608, i32 65536>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], <i32 8, i32 8>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %2 = and <2 x i32> %0, <i32 8388608, i32 65536>  ; 0x0080'0000, 0x0001'0000
@@ -498,8 +497,8 @@ define i64 @bs_active_byte_78h(i64 %0) {
 
 define i16 @bs_active_low1(i16 %0) {
 ; CHECK-LABEL: @bs_active_low1(
-; CHECK-NEXT:    [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 15
-; CHECK-NEXT:    [[TMP3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 7
+; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP2]], 256
 ; CHECK-NEXT:    ret i16 [[TMP3]]
 ;
   %2 = lshr i16 %0, 15
@@ -509,9 +508,8 @@ define i16 @bs_active_low1(i16 %0) {
 
 define <2 x i32> @bs_active_low8(<2 x i32> %0) {
 ; CHECK-LABEL: @bs_active_low8(
-; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 255, i32 255>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
-; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], <i32 24, i32 24>
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %2 = and <2 x i32> %0, <i32 255, i32 255>
   %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
@@ -521,7 +519,7 @@ define <2 x i32> @bs_active_low8(<2 x i32> %0) {
 define <2 x i32> @bs_active_low_different(<2 x i32> %0) {
 ; CHECK-LABEL: @bs_active_low_different(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 2, i32 128>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], <i32 24, i32 24>
 ; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %2 = and <2 x i32> %0, <i32 2, i32 128>
@@ -556,7 +554,7 @@ define <2 x i32> @bs_active_low_undef(<2 x i32> %0) {
 define i64 @bs_active_low8_multiuse(i64 %0) {
 ; CHECK-LABEL: @bs_active_low8_multiuse(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 56
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
@@ -569,7 +567,7 @@ define i64 @bs_active_low8_multiuse(i64 %0) {
 define i64 @bs_active_low7_multiuse(i64 %0) {
 ; CHECK-LABEL: @bs_active_low7_multiuse(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 127
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 56
 ; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
 ; CHECK-NEXT:    ret i64 [[TMP4]]
 ;
@@ -581,8 +579,8 @@ define i64 @bs_active_low7_multiuse(i64 %0) {
 
 define i64 @bs_active_byte_4l(i64 %0) {
 ; CHECK-LABEL: @bs_active_byte_4l(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 1140850688
-; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 292057776128
 ; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %2 = and i64 %0, 1140850688  ; 0x44000000
@@ -592,8 +590,8 @@ define i64 @bs_active_byte_4l(i64 %0) {
 
 define i32 @bs_active_byte_2l(i32 %0) {
 ; CHECK-LABEL: @bs_active_byte_2l(
-; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP0:%.*]], 65280
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP2]])
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 16711680
 ; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %2 = and i32 %0, 65280  ; 0xff00
@@ -604,7 +602,7 @@ define i32 @bs_active_byte_2l(i32 %0) {
 define <2 x i64> @bs_active_byte_2l_v2(<2 x i64> %0) {
 ; CHECK-LABEL: @bs_active_byte_2l_v2(
 ; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP0:%.*]], <i64 256, i64 65280>
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], <i64 40, i64 40>
 ; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
 ;
   %2 = and <2 x i64> %0, <i64 256, i64 65280>  ; 0x0100, 0xff00

From cfae2c65dbbe1a252958b4db2e32574e8e8dcec0 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 20 Jan 2022 15:24:06 -0800
Subject: [PATCH 111/946] [RISCV] Factor Zve32 support into
 RISCVSubtarget::getMaxELENForFixedLengthVectors.

This is needed to properly limit fractional LMULs for Zve32.

Add new RUN Zve32 RUN lines to the existing tests for the
-riscv-v-fixed-length-vector-elen-max command line option.
---
 llvm/lib/Target/RISCV/RISCVSubtarget.cpp          | 3 ++-
 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll | 6 ++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 96f119ed3bc15..de6b0df2df8a8 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -199,8 +199,9 @@ unsigned RISCVSubtarget::getMaxELENForFixedLengthVectors() const {
   assert(RVVVectorELENMax <= 64 && RVVVectorELENMax >= 8 &&
          isPowerOf2_32(RVVVectorELENMax) &&
          "V extension requires a ELEN to be a power of 2 between 8 and 64!");
+  unsigned ELEN = hasVInstructionsI64() ? 64 : 32;
   return PowerOf2Floor(
-      std::max<unsigned>(std::min<unsigned>(RVVVectorELENMax, 64), 8));
+      std::max<unsigned>(std::min<unsigned>(RVVVectorELENMax, ELEN), 8));
 }
 
 bool RISCVSubtarget::useRVVForFixedLengthVectors() const {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
index 9e6da01784589..cbccf73c32ed3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
@@ -1,9 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zve32f -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zve32f -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
-; Test that limiting ELEN, scalarizes elements larger than that and disables
-; some fractional LMULs.
+; Test that limiting ELEN, either through the command line or zve32, scalarizes
+; elements larger than that and disables some fractional LMULs.
 
 ; This should use LMUL=1.
 define void @add_v4i32(<4 x i32>* %x, <4 x i32>* %y) {

From 922c29ccf143aa6c42d5887eef74d23e4ee83cb2 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler@nvidia.com>
Date: Wed, 12 Jan 2022 15:10:20 -0800
Subject: [PATCH 112/946] [flang] Allow explicit '+' in NAMELIST input
 subscripts

Array subscripts and substring limits in NAMELIST input are
allowed to bear an explicit plus sign.

Differential Revision: https://reviews.llvm.org/D117818
---
 flang/runtime/namelist.cpp           | 2 +-
 flang/unittests/Runtime/Namelist.cpp | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/runtime/namelist.cpp b/flang/runtime/namelist.cpp
index 2804679db01e2..205212ccfb66e 100644
--- a/flang/runtime/namelist.cpp
+++ b/flang/runtime/namelist.cpp
@@ -99,7 +99,7 @@ static std::optional<SubscriptValue> GetSubscriptValue(IoStatementState &io) {
   std::optional<SubscriptValue> value;
   std::optional<char32_t> ch{io.GetCurrentChar()};
   bool negate{ch && *ch == '-'};
-  if (negate) {
+  if ((ch && *ch == '+') || negate) {
     io.HandleRelativePosition(1);
     ch = io.GetCurrentChar();
   }
diff --git a/flang/unittests/Runtime/Namelist.cpp b/flang/unittests/Runtime/Namelist.cpp
index 4770e26048de9..f4f5a30e101eb 100644
--- a/flang/unittests/Runtime/Namelist.cpp
+++ b/flang/unittests/Runtime/Namelist.cpp
@@ -135,7 +135,7 @@ TEST(NamelistTests, Subscripts) {
   aDesc->GetDimension(1).SetBounds(-1, 1);
   const NamelistGroup::Item items[]{{"a", *aDesc}};
   const NamelistGroup group{"justa", 1, items};
-  static char t1[]{"&justa A(0,1:-1:-2)=1 2/"};
+  static char t1[]{"&justa A(0,+1:-1:-2)=1 2/"};
   StaticDescriptor<1, true> statDesc;
   Descriptor &internalDesc{statDesc.descriptor()};
   internalDesc.Establish(TypeCode{CFI_type_char},
@@ -223,7 +223,7 @@ TEST(NamelistTypes, ArraySubstring) {
           std::vector<std::string>{"abcdefgh", "ijklmnop"}, 8)};
   const NamelistGroup::Item items[]{{"a", *scDesc}};
   const NamelistGroup group{"justa", 1, items};
-  static char t1[]{"&justa A(:)(2:5)='BCDE' 'JKLM'/"};
+  static char t1[]{"&justa A(:)(2:+5)='BCDE' 'JKLM'/"};
   StaticDescriptor<1, true> statDesc;
   Descriptor &internalDesc{statDesc.descriptor()};
   internalDesc.Establish(TypeCode{CFI_type_char},

From d1123e36922d18e2b93b01e85ef706bc8f819fb5 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler@nvidia.com>
Date: Wed, 12 Jan 2022 17:34:52 -0800
Subject: [PATCH 113/946] [flang] Extension: skip over NAMELIST groups

Implements a near-universal extension in which NAMELIST
input will skip over unrelated namelist groups in the
input stream until the group with the requested name appears.

Differential Revision: https://reviews.llvm.org/D117843
---
 flang/docs/Extensions.md             |  3 ++
 flang/runtime/namelist.cpp           | 66 +++++++++++++++++++++-------
 flang/unittests/Runtime/Namelist.cpp | 32 +++++++++++++-
 3 files changed, 82 insertions(+), 19 deletions(-)

diff --git a/flang/docs/Extensions.md b/flang/docs/Extensions.md
index aa60800246ba3..270ec2dfd93c7 100644
--- a/flang/docs/Extensions.md
+++ b/flang/docs/Extensions.md
@@ -212,6 +212,9 @@ end
   This legacy extension supports pre-Fortran'77 usage in which
   variables initialized in DATA statements with Hollerith literals
   as modifiable formats.
+* At runtime, `NAMELIST` input will skip over `NAMELIST` groups
+  with other names, and will treat text before and between groups
+  as if they were comment lines, even if not begun with `!`.
 
 ### Extensions supported when enabled by options
 
diff --git a/flang/runtime/namelist.cpp b/flang/runtime/namelist.cpp
index 205212ccfb66e..fde828fddf443 100644
--- a/flang/runtime/namelist.cpp
+++ b/flang/runtime/namelist.cpp
@@ -322,6 +322,29 @@ static bool HandleComponent(IoStatementState &io, Descriptor &desc,
   return false;
 }
 
+// Advance to the terminal '/' of a namelist group.
+static void SkipNamelistGroup(IoStatementState &io) {
+  while (auto ch{io.GetNextNonBlank()}) {
+    io.HandleRelativePosition(1);
+    if (*ch == '/') {
+      break;
+    } else if (*ch == '\'' || *ch == '"') {
+      // Skip quoted character literal
+      char32_t quote{*ch};
+      while (true) {
+        if ((ch = io.GetCurrentChar())) {
+          io.HandleRelativePosition(1);
+          if (*ch == quote) {
+            break;
+          }
+        } else if (!io.AdvanceRecord()) {
+          return;
+        }
+      }
+    }
+  }
+}
+
 bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) {
   IoStatementState &io{*cookie};
   io.CheckFormattedStmtType<Direction::Input>("InputNamelist");
@@ -330,26 +353,35 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) {
   IoErrorHandler &handler{io.GetIoErrorHandler()};
   auto *listInput{io.get_if<ListDirectedStatementState<Direction::Input>>()};
   RUNTIME_CHECK(handler, listInput != nullptr);
-  // Check the group header
+  // Find this namelist group's header in the input
   io.BeginReadingRecord();
-  std::optional<char32_t> next{io.GetNextNonBlank()};
-  if (!next || *next != '&') {
-    handler.SignalError(
-        "NAMELIST input group does not begin with '&' (at '%lc')", *next);
-    return false;
-  }
-  io.HandleRelativePosition(1);
+  std::optional<char32_t> next;
   char name[nameBufferSize];
-  if (!GetLowerCaseName(io, name, sizeof name)) {
-    handler.SignalError("NAMELIST input group has no name");
-    return false;
-  }
   RUNTIME_CHECK(handler, group.groupName != nullptr);
-  if (std::strcmp(group.groupName, name) != 0) {
-    handler.SignalError(
-        "NAMELIST input group name '%s' is not the expected '%s'", name,
-        group.groupName);
-    return false;
+  while (true) {
+    next = io.GetNextNonBlank();
+    while (next && *next != '&') {
+      // Extension: comment lines without ! before namelist groups
+      if (!io.AdvanceRecord()) {
+        next.reset();
+      } else {
+        next = io.GetNextNonBlank();
+      }
+    }
+    if (!next || *next != '&') {
+      handler.SignalError(
+          "NAMELIST input group does not begin with '&' (at '%lc')", *next);
+      return false;
+    }
+    io.HandleRelativePosition(1);
+    if (!GetLowerCaseName(io, name, sizeof name)) {
+      handler.SignalError("NAMELIST input group has no name");
+      return false;
+    }
+    if (std::strcmp(group.groupName, name) == 0) {
+      break; // found it
+    }
+    SkipNamelistGroup(io);
   }
   // Read the group's items
   while (true) {
diff --git a/flang/unittests/Runtime/Namelist.cpp b/flang/unittests/Runtime/Namelist.cpp
index f4f5a30e101eb..38305f729b145 100644
--- a/flang/unittests/Runtime/Namelist.cpp
+++ b/flang/unittests/Runtime/Namelist.cpp
@@ -189,7 +189,7 @@ TEST(NamelistTests, ShortArrayInput) {
   EXPECT_EQ(*bDesc->ZeroBasedIndexedElement<int>(1), -2);
 }
 
-TEST(NamelistTypes, ScalarSubstring) {
+TEST(NamelistTests, ScalarSubstring) {
   OwningPtr<Descriptor> scDesc{MakeArray<TypeCategory::Character, 1>(
       std::vector<int>{}, std::vector<std::string>{"abcdefgh"}, 8)};
   const NamelistGroup::Item items[]{{"a", *scDesc}};
@@ -217,7 +217,7 @@ TEST(NamelistTypes, ScalarSubstring) {
   EXPECT_EQ(got, expect);
 }
 
-TEST(NamelistTypes, ArraySubstring) {
+TEST(NamelistTests, ArraySubstring) {
   OwningPtr<Descriptor> scDesc{
       MakeArray<TypeCategory::Character, 1>(std::vector<int>{2},
           std::vector<std::string>{"abcdefgh", "ijklmnop"}, 8)};
@@ -246,4 +246,32 @@ TEST(NamelistTypes, ArraySubstring) {
   EXPECT_EQ(got, expect);
 }
 
+TEST(NamelistTests, Skip) {
+  OwningPtr<Descriptor> scDesc{
+      MakeArray<TypeCategory::Integer, static_cast<int>(sizeof(int))>(
+          std::vector<int>{}, std::vector<int>{-1})};
+  const NamelistGroup::Item items[]{{"j", *scDesc}};
+  const NamelistGroup group{"nml", 1, items};
+  static char t1[]{"&skip a='str''ing'/&nml j=123/"};
+  StaticDescriptor<1, true> statDesc;
+  Descriptor &internalDesc{statDesc.descriptor()};
+  internalDesc.Establish(TypeCode{CFI_type_char},
+      /*elementBytes=*/std::strlen(t1), t1, 0, nullptr, CFI_attribute_pointer);
+  auto inCookie{IONAME(BeginInternalArrayListInput)(
+      internalDesc, nullptr, 0, __FILE__, __LINE__)};
+  ASSERT_TRUE(IONAME(InputNamelist)(inCookie, group));
+  ASSERT_EQ(IONAME(EndIoStatement)(inCookie), IostatOk)
+      << "namelist input with skipping";
+  char out[20];
+  internalDesc.Establish(TypeCode{CFI_type_char}, /*elementBytes=*/sizeof out,
+      out, 0, nullptr, CFI_attribute_pointer);
+  auto outCookie{IONAME(BeginInternalArrayListOutput)(
+      internalDesc, nullptr, 0, __FILE__, __LINE__)};
+  ASSERT_TRUE(IONAME(OutputNamelist)(outCookie, group));
+  ASSERT_EQ(IONAME(EndIoStatement)(outCookie), IostatOk) << "namelist output";
+  std::string got{out, sizeof out};
+  static const std::string expect{"&NML J= 123/        "};
+  EXPECT_EQ(got, expect);
+}
+
 // TODO: Internal NAMELIST error tests

From ad06e65dc4da45be2cda3e07664502651679622a Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang <hsiangkai@google.com>
Date: Wed, 19 Jan 2022 11:32:27 +0000
Subject: [PATCH 114/946] [RISCV] Fix the bug in the register allocator caused
 by reserved BP.

Originally, hasRVVFrameObject() will scan all the stack objects to check
whether if there is any scalable vector object on the stack or not.
However, it causes errors in the register allocator. In issue 53016, it
returns false before RA because there is no RVV stack objects. After RA,
it returns true because there are spilling slots for RVV values during RA.
The compiler will not reserve BP during register allocation and generate BP
access in the PEI pass due to the inconsistent behavior of the function.

The function is changed to use hasStdExtV() as the return value. It is
not precise, but it can make the register allocation correct.

Refer to https://github.com/llvm/llvm-project/issues/53016.

Differential Revision: https://reviews.llvm.org/D117663
---
 llvm/lib/Target/RISCV/RISCVFrameLowering.cpp  | 24 +++--
 .../test/CodeGen/RISCV/rvv/emergency-slot.mir | 93 ++++++++-----------
 .../RISCV/rvv/fixed-vectors-calling-conv.ll   | 24 +++--
 .../CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll | 35 +++----
 .../CodeGen/RISCV/srem-seteq-illegal-types.ll | 52 +++++------
 5 files changed, 119 insertions(+), 109 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index f5d4919380504..ad003404d793e 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -242,7 +242,8 @@ bool RISCVFrameLowering::hasBP(const MachineFunction &MF) const {
   // adjustment, we can not use SP to access the stack objects for the
   // arguments. Instead, use BP to access these stack objects.
   return (MFI.hasVarSizedObjects() ||
-          (!hasReservedCallFrame(MF) && MFI.getMaxCallFrameSize() != 0)) &&
+          (!hasReservedCallFrame(MF) && (!MFI.isMaxCallFrameSizeComputed() ||
+                                         MFI.getMaxCallFrameSize() != 0))) &&
          TRI->hasStackRealignment(MF);
 }
 
@@ -940,11 +941,22 @@ void RISCVFrameLowering::processFunctionBeforeFrameFinalized(
 }
 
 static bool hasRVVFrameObject(const MachineFunction &MF) {
-  const MachineFrameInfo &MFI = MF.getFrameInfo();
-  for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I)
-    if (MFI.getStackID(I) == TargetStackID::ScalableVector)
-      return true;
-  return false;
+  // Originally, the function will scan all the stack objects to check whether
+  // if there is any scalable vector object on the stack or not. However, it
+  // causes errors in the register allocator. In issue 53016, it returns false
+  // before RA because there is no RVV stack objects. After RA, it returns true
+  // because there are spilling slots for RVV values during RA. It will not
+  // reserve BP during register allocation and generate BP access in the PEI
+  // pass due to the inconsistent behavior of the function.
+  //
+  // The function is changed to use hasVInstructions() as the return value. It
+  // is not precise, but it can make the register allocation correct.
+  //
+  // FIXME: Find a better way to make the decision or revisit the solution in
+  // D103622.
+  //
+  // Refer to https://github.com/llvm/llvm-project/issues/53016.
+  return MF.getSubtarget<RISCVSubtarget>().hasVInstructions();
 }
 
 // Not preserve stack space within prologue for outgoing variables when the
diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
index 4b62b8ead3b55..8b33e981854d3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
@@ -51,36 +51,34 @@ body:             |
   ; CHECK-LABEL: name: spillslot
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK-NEXT:   liveins: $x12, $x1, $x9, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27
+  ; CHECK-NEXT:   liveins: $x12, $x1, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $x2 = frame-setup ADDI $x2, -2032
   ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa_offset 2032
   ; CHECK-NEXT:   SD killed $x1, $x2, 2024 :: (store (s64) into %stack.3)
   ; CHECK-NEXT:   SD killed $x8, $x2, 2016 :: (store (s64) into %stack.4)
-  ; CHECK-NEXT:   SD killed $x9, $x2, 2008 :: (store (s64) into %stack.5)
-  ; CHECK-NEXT:   SD killed $x18, $x2, 2000 :: (store (s64) into %stack.6)
-  ; CHECK-NEXT:   SD killed $x19, $x2, 1992 :: (store (s64) into %stack.7)
-  ; CHECK-NEXT:   SD killed $x20, $x2, 1984 :: (store (s64) into %stack.8)
-  ; CHECK-NEXT:   SD killed $x21, $x2, 1976 :: (store (s64) into %stack.9)
-  ; CHECK-NEXT:   SD killed $x22, $x2, 1968 :: (store (s64) into %stack.10)
-  ; CHECK-NEXT:   SD killed $x23, $x2, 1960 :: (store (s64) into %stack.11)
-  ; CHECK-NEXT:   SD killed $x24, $x2, 1952 :: (store (s64) into %stack.12)
-  ; CHECK-NEXT:   SD killed $x25, $x2, 1944 :: (store (s64) into %stack.13)
-  ; CHECK-NEXT:   SD killed $x26, $x2, 1936 :: (store (s64) into %stack.14)
-  ; CHECK-NEXT:   SD killed $x27, $x2, 1928 :: (store (s64) into %stack.15)
+  ; CHECK-NEXT:   SD killed $x18, $x2, 2008 :: (store (s64) into %stack.5)
+  ; CHECK-NEXT:   SD killed $x19, $x2, 2000 :: (store (s64) into %stack.6)
+  ; CHECK-NEXT:   SD killed $x20, $x2, 1992 :: (store (s64) into %stack.7)
+  ; CHECK-NEXT:   SD killed $x21, $x2, 1984 :: (store (s64) into %stack.8)
+  ; CHECK-NEXT:   SD killed $x22, $x2, 1976 :: (store (s64) into %stack.9)
+  ; CHECK-NEXT:   SD killed $x23, $x2, 1968 :: (store (s64) into %stack.10)
+  ; CHECK-NEXT:   SD killed $x24, $x2, 1960 :: (store (s64) into %stack.11)
+  ; CHECK-NEXT:   SD killed $x25, $x2, 1952 :: (store (s64) into %stack.12)
+  ; CHECK-NEXT:   SD killed $x26, $x2, 1944 :: (store (s64) into %stack.13)
+  ; CHECK-NEXT:   SD killed $x27, $x2, 1936 :: (store (s64) into %stack.14)
   ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x1, -8
   ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x8, -16
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x9, -24
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x18, -32
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x19, -40
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x20, -48
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x21, -56
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x22, -64
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x23, -72
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x24, -80
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x25, -88
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x26, -96
-  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x27, -104
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x18, -24
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x19, -32
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x20, -40
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x21, -48
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x22, -56
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x23, -64
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x24, -72
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x25, -80
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x26, -88
+  ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION offset $x27, -96
   ; CHECK-NEXT:   $x8 = frame-setup ADDI $x2, 2032
   ; CHECK-NEXT:   frame-setup CFI_INSTRUCTION def_cfa $x8, 0
   ; CHECK-NEXT:   $x2 = frame-setup ADDI $x2, -272
@@ -95,7 +93,7 @@ body:             |
   ; CHECK-NEXT:   $x10 = ADDI $x0, 50
   ; CHECK-NEXT:   $x11 = MUL killed $x11, killed $x10
   ; CHECK-NEXT:   $x10 = LUI 1
-  ; CHECK-NEXT:   $x10 = ADDIW killed $x10, -1896
+  ; CHECK-NEXT:   $x10 = ADDIW killed $x10, -1888
   ; CHECK-NEXT:   $x10 = ADD $x2, killed $x10
   ; CHECK-NEXT:   $x10 = ADD killed $x10, killed $x11
   ; CHECK-NEXT:   PseudoVSPILL_M1 killed renamable $v25, killed $x10 :: (store unknown-size into %stack.1, align 8)
@@ -113,8 +111,8 @@ body:             |
   ; CHECK-NEXT:   renamable $x21 = ADDI $x2, 1664
   ; CHECK-NEXT:   renamable $x22 = ADDI $x2, 1792
   ; CHECK-NEXT:   renamable $x23 = ADDI $x2, 1920
-  ; CHECK-NEXT:   SD killed $x1, $x2, 8 :: (store (s64) into %stack.16)
-  ; CHECK-NEXT:   SD killed $x5, $x2, 0 :: (store (s64) into %stack.17)
+  ; CHECK-NEXT:   SD killed $x1, $x2, 8 :: (store (s64) into %stack.15)
+  ; CHECK-NEXT:   SD killed $x5, $x2, 0 :: (store (s64) into %stack.16)
   ; CHECK-NEXT:   $x11 = LUI 1
   ; CHECK-NEXT:   $x11 = ADDIW killed $x11, -2048
   ; CHECK-NEXT:   $x24 = ADD $x2, killed $x11
@@ -130,23 +128,19 @@ body:             |
   ; CHECK-NEXT:   renamable $x13 = SLLI renamable $x11, 3
   ; CHECK-NEXT:   renamable $x13 = ADD renamable $x26, killed renamable $x13
   ; CHECK-NEXT:   renamable $x13 = LD killed renamable $x13, 0 :: (load (s64))
-  ; CHECK-NEXT:   renamable $x9 = SRAI renamable $x13, 63
-  ; CHECK-NEXT:   renamable $x9 = SRLI killed renamable $x9, 62
-  ; CHECK-NEXT:   renamable $x9 = ADD renamable $x13, killed renamable $x9
-  ; CHECK-NEXT:   renamable $x9 = ANDI killed renamable $x9, -4
-  ; CHECK-NEXT:   renamable $x16 = SUB killed renamable $x13, renamable $x9
+  ; CHECK-NEXT:   renamable $x16 = SUB killed renamable $x13, renamable $x13
   ; CHECK-NEXT:   dead renamable $x13 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype
   ; CHECK-NEXT:   renamable $x13 = nsw ADDI renamable $x16, -2
   ; CHECK-NEXT:   $x5 = PseudoReadVLENB
   ; CHECK-NEXT:   $x1 = ADDI $x0, 50
   ; CHECK-NEXT:   $x5 = MUL killed $x5, killed $x1
   ; CHECK-NEXT:   $x1 = LUI 1
-  ; CHECK-NEXT:   $x1 = ADDIW killed $x1, -1896
+  ; CHECK-NEXT:   $x1 = ADDIW killed $x1, -1888
   ; CHECK-NEXT:   $x1 = ADD $x2, killed $x1
   ; CHECK-NEXT:   $x1 = ADD killed $x1, killed $x5
-  ; CHECK-NEXT:   $x5 = LD $x2, 0 :: (load (s64) from %stack.17)
+  ; CHECK-NEXT:   $x5 = LD $x2, 0 :: (load (s64) from %stack.16)
   ; CHECK-NEXT:   renamable $v0 = PseudoVRELOAD_M1 killed $x1 :: (load unknown-size from %stack.1, align 8)
-  ; CHECK-NEXT:   $x1 = LD $x2, 8 :: (load (s64) from %stack.16)
+  ; CHECK-NEXT:   $x1 = LD $x2, 8 :: (load (s64) from %stack.15)
   ; CHECK-NEXT:   renamable $v0 = PseudoVSLIDEDOWN_VX_M1 undef renamable $v0, killed renamable $v0, killed renamable $x13, $noreg, 8, implicit $vl, implicit $vtype
   ; CHECK-NEXT:   renamable $x13 = PseudoVMV_X_S_M1 killed renamable $v0, 8, implicit $vl, implicit $vtype
   ; CHECK-NEXT:   BLT killed renamable $x16, renamable $x27, %bb.2
@@ -155,7 +149,7 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.2(0x80000000)
   ; CHECK-NEXT:   liveins: $x1, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   renamable $x9 = COPY killed renamable $x13
+  ; CHECK-NEXT:   renamable $x10 = COPY killed renamable $x13
   ; CHECK-NEXT:   PseudoBR %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
@@ -165,17 +159,16 @@ body:             |
   ; CHECK-NEXT:   $x2 = frame-destroy ADDI $x2, 272
   ; CHECK-NEXT:   $x1 = LD $x2, 2024 :: (load (s64) from %stack.3)
   ; CHECK-NEXT:   $x8 = LD $x2, 2016 :: (load (s64) from %stack.4)
-  ; CHECK-NEXT:   $x9 = LD $x2, 2008 :: (load (s64) from %stack.5)
-  ; CHECK-NEXT:   $x18 = LD $x2, 2000 :: (load (s64) from %stack.6)
-  ; CHECK-NEXT:   $x19 = LD $x2, 1992 :: (load (s64) from %stack.7)
-  ; CHECK-NEXT:   $x20 = LD $x2, 1984 :: (load (s64) from %stack.8)
-  ; CHECK-NEXT:   $x21 = LD $x2, 1976 :: (load (s64) from %stack.9)
-  ; CHECK-NEXT:   $x22 = LD $x2, 1968 :: (load (s64) from %stack.10)
-  ; CHECK-NEXT:   $x23 = LD $x2, 1960 :: (load (s64) from %stack.11)
-  ; CHECK-NEXT:   $x24 = LD $x2, 1952 :: (load (s64) from %stack.12)
-  ; CHECK-NEXT:   $x25 = LD $x2, 1944 :: (load (s64) from %stack.13)
-  ; CHECK-NEXT:   $x26 = LD $x2, 1936 :: (load (s64) from %stack.14)
-  ; CHECK-NEXT:   $x27 = LD $x2, 1928 :: (load (s64) from %stack.15)
+  ; CHECK-NEXT:   $x18 = LD $x2, 2008 :: (load (s64) from %stack.5)
+  ; CHECK-NEXT:   $x19 = LD $x2, 2000 :: (load (s64) from %stack.6)
+  ; CHECK-NEXT:   $x20 = LD $x2, 1992 :: (load (s64) from %stack.7)
+  ; CHECK-NEXT:   $x21 = LD $x2, 1984 :: (load (s64) from %stack.8)
+  ; CHECK-NEXT:   $x22 = LD $x2, 1976 :: (load (s64) from %stack.9)
+  ; CHECK-NEXT:   $x23 = LD $x2, 1968 :: (load (s64) from %stack.10)
+  ; CHECK-NEXT:   $x24 = LD $x2, 1960 :: (load (s64) from %stack.11)
+  ; CHECK-NEXT:   $x25 = LD $x2, 1952 :: (load (s64) from %stack.12)
+  ; CHECK-NEXT:   $x26 = LD $x2, 1944 :: (load (s64) from %stack.13)
+  ; CHECK-NEXT:   $x27 = LD $x2, 1936 :: (load (s64) from %stack.14)
   ; CHECK-NEXT:   $x2 = frame-destroy ADDI $x2, 2032
   ; CHECK-NEXT:   PseudoRET
   bb.0:
@@ -212,11 +205,7 @@ body:             |
     renamable $x13 = SLLI renamable $x11, 3
     renamable $x13 = ADD renamable $x26, killed renamable $x13
     renamable $x13 = LD killed renamable $x13, 0 :: (load (s64))
-    renamable $x9 = SRAI renamable $x13, 63
-    renamable $x9 = SRLI killed renamable $x9, 62
-    renamable $x9 = ADD renamable $x13, killed renamable $x9
-    renamable $x9 = ANDI killed renamable $x9, -4
-    renamable $x16 = SUB killed renamable $x13, renamable $x9
+    renamable $x16 = SUB killed renamable $x13, renamable $x13
     dead renamable $x13 = PseudoVSETIVLI 1, 64, implicit-def $vl, implicit-def $vtype
     renamable $x13 = nsw ADDI renamable $x16, -2
     renamable $v0 = PseudoVRELOAD_M1 %stack.1 :: (load unknown-size from %stack.1, align 8)
@@ -228,7 +217,7 @@ body:             |
     successors: %bb.2
     liveins: $x1, $x5, $x6, $x7, $x9, $x10, $x11, $x12, $x13, $x14, $x15, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x26, $x27, $x28, $x29, $x30, $x31
 
-    renamable $x9 = COPY killed renamable $x13
+    renamable $x10 = COPY killed renamable $x13
     PseudoBR %bb.2
 
   bb.2:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
index 23e41d67e0a54..13b856e00a7c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
@@ -882,11 +882,14 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x
 ; LMULMAX1-NEXT:    .cfi_def_cfa_offset 384
 ; LMULMAX1-NEXT:    sd ra, 376(sp) # 8-byte Folded Spill
 ; LMULMAX1-NEXT:    sd s0, 368(sp) # 8-byte Folded Spill
+; LMULMAX1-NEXT:    sd s1, 360(sp) # 8-byte Folded Spill
 ; LMULMAX1-NEXT:    .cfi_offset ra, -8
 ; LMULMAX1-NEXT:    .cfi_offset s0, -16
+; LMULMAX1-NEXT:    .cfi_offset s1, -24
 ; LMULMAX1-NEXT:    addi s0, sp, 384
 ; LMULMAX1-NEXT:    .cfi_def_cfa s0, 0
 ; LMULMAX1-NEXT:    andi sp, sp, -128
+; LMULMAX1-NEXT:    mv s1, sp
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; LMULMAX1-NEXT:    vle32.v v24, (a0)
 ; LMULMAX1-NEXT:    addi a1, a0, 16
@@ -904,25 +907,26 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x
 ; LMULMAX1-NEXT:    addi a0, a0, 112
 ; LMULMAX1-NEXT:    vle32.v v31, (a0)
 ; LMULMAX1-NEXT:    ld a0, 0(s0)
-; LMULMAX1-NEXT:    addi a1, sp, 240
+; LMULMAX1-NEXT:    addi sp, sp, -16
+; LMULMAX1-NEXT:    addi a1, s1, 240
 ; LMULMAX1-NEXT:    vse32.v v15, (a1)
-; LMULMAX1-NEXT:    addi a1, sp, 224
+; LMULMAX1-NEXT:    addi a1, s1, 224
 ; LMULMAX1-NEXT:    vse32.v v14, (a1)
-; LMULMAX1-NEXT:    addi a1, sp, 208
+; LMULMAX1-NEXT:    addi a1, s1, 208
 ; LMULMAX1-NEXT:    vse32.v v13, (a1)
-; LMULMAX1-NEXT:    addi a1, sp, 192
+; LMULMAX1-NEXT:    addi a1, s1, 192
 ; LMULMAX1-NEXT:    vse32.v v12, (a1)
-; LMULMAX1-NEXT:    addi a1, sp, 176
+; LMULMAX1-NEXT:    addi a1, s1, 176
 ; LMULMAX1-NEXT:    vse32.v v11, (a1)
-; LMULMAX1-NEXT:    addi a1, sp, 160
+; LMULMAX1-NEXT:    addi a1, s1, 160
 ; LMULMAX1-NEXT:    vse32.v v10, (a1)
-; LMULMAX1-NEXT:    addi a1, sp, 144
+; LMULMAX1-NEXT:    addi a1, s1, 144
 ; LMULMAX1-NEXT:    vse32.v v9, (a1)
 ; LMULMAX1-NEXT:    li a1, 42
 ; LMULMAX1-NEXT:    sd a1, 8(sp)
 ; LMULMAX1-NEXT:    sd a0, 0(sp)
-; LMULMAX1-NEXT:    addi a0, sp, 128
-; LMULMAX1-NEXT:    addi a1, sp, 128
+; LMULMAX1-NEXT:    addi a0, s1, 128
+; LMULMAX1-NEXT:    addi a1, s1, 128
 ; LMULMAX1-NEXT:    vse32.v v8, (a1)
 ; LMULMAX1-NEXT:    vmv.v.v v8, v24
 ; LMULMAX1-NEXT:    vmv.v.v v9, v25
@@ -933,9 +937,11 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x
 ; LMULMAX1-NEXT:    vmv.v.v v14, v30
 ; LMULMAX1-NEXT:    vmv.v.v v15, v31
 ; LMULMAX1-NEXT:    call ext3@plt
+; LMULMAX1-NEXT:    addi sp, sp, 16
 ; LMULMAX1-NEXT:    addi sp, s0, -384
 ; LMULMAX1-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
 ; LMULMAX1-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
+; LMULMAX1-NEXT:    ld s1, 360(sp) # 8-byte Folded Reload
 ; LMULMAX1-NEXT:    addi sp, sp, 384
 ; LMULMAX1-NEXT:    ret
   %t = call <32 x i32> @ext3(<32 x i32> %z, <32 x i32> %y, <32 x i32> %x, i32 %w, i32 42)
diff --git a/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll b/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll
index 3eccd6c84d9c3..5f926efd9012c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll
@@ -5,25 +5,27 @@
 define void @foo(i32* nocapture noundef %p1) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -128
-; CHECK-NEXT:    .cfi_def_cfa_offset 128
-; CHECK-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    addi sp, sp, -192
+; CHECK-NEXT:    .cfi_def_cfa_offset 192
+; CHECK-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s1, 168(sp) # 8-byte Folded Spill
+; CHECK-NEXT:    sd s2, 160(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
-; CHECK-NEXT:    addi s0, sp, 128
+; CHECK-NEXT:    .cfi_offset s2, -32
+; CHECK-NEXT:    addi s0, sp, 192
 ; CHECK-NEXT:    .cfi_def_cfa s0, 0
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 1
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    andi sp, sp, -64
 ; CHECK-NEXT:    mv s1, sp
-; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    mv s2, a0
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    addi a0, s1, 104
+; CHECK-NEXT:    addi a0, s1, 160
 ; CHECK-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    addi sp, sp, -16
 ; CHECK-NEXT:    addi t0, s1, 64
@@ -39,16 +41,17 @@ define void @foo(i32* nocapture noundef %p1) {
 ; CHECK-NEXT:    call bar@plt
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vle32.v v8, (s1)
-; CHECK-NEXT:    addi a0, s1, 104
+; CHECK-NEXT:    vle32.v v8, (s2)
+; CHECK-NEXT:    addi a0, s1, 160
 ; CHECK-NEXT:    vl2re8.v v10, (a0) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vfadd.vv v8, v10, v8
-; CHECK-NEXT:    vse32.v v8, (s1)
-; CHECK-NEXT:    addi sp, s0, -128
-; CHECK-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 128
+; CHECK-NEXT:    vse32.v v8, (s2)
+; CHECK-NEXT:    addi sp, s0, -192
+; CHECK-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s1, 168(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s2, 160(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    addi sp, sp, 192
 ; CHECK-NEXT:    ret
 entry:
   %vla = alloca [10 x i32], align 64
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 5f52fdba85c30..6bae641408887 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -600,30 +600,30 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32MV-NEXT:    addi sp, sp, -96
 ; RV32MV-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
 ; RV32MV-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s1, 84(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s2, 80(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s3, 76(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s4, 72(sp) # 4-byte Folded Spill
-; RV32MV-NEXT:    sw s5, 68(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s2, 84(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s3, 80(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s4, 76(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s5, 72(sp) # 4-byte Folded Spill
+; RV32MV-NEXT:    sw s6, 68(sp) # 4-byte Folded Spill
 ; RV32MV-NEXT:    addi s0, sp, 96
 ; RV32MV-NEXT:    andi sp, sp, -32
-; RV32MV-NEXT:    mv s1, a0
+; RV32MV-NEXT:    mv s2, a0
 ; RV32MV-NEXT:    lw a0, 8(a0)
-; RV32MV-NEXT:    lw a1, 4(s1)
+; RV32MV-NEXT:    lw a1, 4(s2)
 ; RV32MV-NEXT:    slli a2, a0, 31
 ; RV32MV-NEXT:    srli a3, a1, 1
-; RV32MV-NEXT:    or s2, a3, a2
-; RV32MV-NEXT:    lbu a2, 12(s1)
+; RV32MV-NEXT:    or s3, a3, a2
+; RV32MV-NEXT:    lbu a2, 12(s2)
 ; RV32MV-NEXT:    srli a3, a0, 1
 ; RV32MV-NEXT:    andi a3, a3, 1
-; RV32MV-NEXT:    neg s3, a3
+; RV32MV-NEXT:    neg s4, a3
 ; RV32MV-NEXT:    slli a3, a2, 30
 ; RV32MV-NEXT:    srli a0, a0, 2
-; RV32MV-NEXT:    or s4, a0, a3
+; RV32MV-NEXT:    or s5, a0, a3
 ; RV32MV-NEXT:    srli a0, a2, 2
 ; RV32MV-NEXT:    andi a2, a0, 1
-; RV32MV-NEXT:    lw a0, 0(s1)
-; RV32MV-NEXT:    neg s5, a2
+; RV32MV-NEXT:    lw a0, 0(s2)
+; RV32MV-NEXT:    neg s6, a2
 ; RV32MV-NEXT:    andi a1, a1, 1
 ; RV32MV-NEXT:    neg a1, a1
 ; RV32MV-NEXT:    li a2, 6
@@ -633,14 +633,14 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32MV-NEXT:    sw a0, 32(sp)
 ; RV32MV-NEXT:    li a2, -5
 ; RV32MV-NEXT:    li a3, -1
-; RV32MV-NEXT:    mv a0, s4
-; RV32MV-NEXT:    mv a1, s5
+; RV32MV-NEXT:    mv a0, s5
+; RV32MV-NEXT:    mv a1, s6
 ; RV32MV-NEXT:    call __moddi3@plt
 ; RV32MV-NEXT:    sw a1, 52(sp)
 ; RV32MV-NEXT:    sw a0, 48(sp)
 ; RV32MV-NEXT:    li a2, 7
-; RV32MV-NEXT:    mv a0, s2
-; RV32MV-NEXT:    mv a1, s3
+; RV32MV-NEXT:    mv a0, s3
+; RV32MV-NEXT:    mv a1, s4
 ; RV32MV-NEXT:    li a3, 0
 ; RV32MV-NEXT:    call __moddi3@plt
 ; RV32MV-NEXT:    sw a1, 44(sp)
@@ -662,14 +662,14 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32MV-NEXT:    vmv.v.i v8, 0
 ; RV32MV-NEXT:    vmerge.vim v8, v8, -1, v0
 ; RV32MV-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
-; RV32MV-NEXT:    vse32.v v8, (s1)
+; RV32MV-NEXT:    vse32.v v8, (s2)
 ; RV32MV-NEXT:    vslidedown.vi v10, v8, 1
 ; RV32MV-NEXT:    vmv.x.s a0, v10
 ; RV32MV-NEXT:    vslidedown.vi v10, v8, 2
 ; RV32MV-NEXT:    vmv.x.s a1, v10
 ; RV32MV-NEXT:    slli a2, a1, 1
 ; RV32MV-NEXT:    sub a0, a2, a0
-; RV32MV-NEXT:    sw a0, 4(s1)
+; RV32MV-NEXT:    sw a0, 4(s2)
 ; RV32MV-NEXT:    vslidedown.vi v10, v8, 4
 ; RV32MV-NEXT:    vmv.x.s a0, v10
 ; RV32MV-NEXT:    srli a2, a0, 30
@@ -678,7 +678,7 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32MV-NEXT:    slli a3, a3, 2
 ; RV32MV-NEXT:    or a2, a3, a2
 ; RV32MV-NEXT:    andi a2, a2, 7
-; RV32MV-NEXT:    sb a2, 12(s1)
+; RV32MV-NEXT:    sb a2, 12(s2)
 ; RV32MV-NEXT:    srli a1, a1, 31
 ; RV32MV-NEXT:    vslidedown.vi v8, v8, 3
 ; RV32MV-NEXT:    vmv.x.s a2, v8
@@ -687,15 +687,15 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32MV-NEXT:    or a1, a1, a2
 ; RV32MV-NEXT:    slli a0, a0, 2
 ; RV32MV-NEXT:    or a0, a1, a0
-; RV32MV-NEXT:    sw a0, 8(s1)
+; RV32MV-NEXT:    sw a0, 8(s2)
 ; RV32MV-NEXT:    addi sp, s0, -96
 ; RV32MV-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
 ; RV32MV-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s1, 84(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s2, 80(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s3, 76(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s4, 72(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s5, 68(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s2, 84(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s3, 80(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s4, 76(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s5, 72(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s6, 68(sp) # 4-byte Folded Reload
 ; RV32MV-NEXT:    addi sp, sp, 96
 ; RV32MV-NEXT:    ret
 ;

From f811cb82a6cd811dbb4730009d0f060503aa1c76 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 20 Jan 2022 12:08:20 -0800
Subject: [PATCH 115/946] [lldb] Revive lldb-instr

I revived lldb-instr to update the macros for D117712. I think the new
macros are simple enough that we add them by hand, but this tool can do
it automatically for you.

Differential revision: https://reviews.llvm.org/D117748
---
 lldb/tools/CMakeLists.txt            |   1 +
 lldb/tools/lldb-instr/CMakeLists.txt |  16 +++
 lldb/tools/lldb-instr/Instrument.cpp | 173 +++++++++++++++++++++++++++
 3 files changed, 190 insertions(+)
 create mode 100644 lldb/tools/lldb-instr/CMakeLists.txt
 create mode 100644 lldb/tools/lldb-instr/Instrument.cpp

diff --git a/lldb/tools/CMakeLists.txt b/lldb/tools/CMakeLists.txt
index a5f4ca8ec7eb3..1585fd4dc4b9e 100644
--- a/lldb/tools/CMakeLists.txt
+++ b/lldb/tools/CMakeLists.txt
@@ -7,6 +7,7 @@ add_subdirectory(intel-features)
 # example is `check-lldb`. So, we pass EXCLUDE_FROM_ALL here.
 add_subdirectory(lldb-test EXCLUDE_FROM_ALL)
 
+add_lldb_tool_subdirectory(lldb-instr)
 add_lldb_tool_subdirectory(lldb-vscode)
 
 if (CMAKE_SYSTEM_NAME MATCHES "Darwin")
diff --git a/lldb/tools/lldb-instr/CMakeLists.txt b/lldb/tools/lldb-instr/CMakeLists.txt
new file mode 100644
index 0000000000000..8da453b2894fd
--- /dev/null
+++ b/lldb/tools/lldb-instr/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_lldb_tool(lldb-instr
+  Instrument.cpp
+
+  CLANG_LIBS
+    clangAST
+    clangBasic
+    clangCodeGen
+    clangFrontend
+    clangLex
+    clangRewrite
+    clangSerialization
+    clangTooling
+
+  LINK_COMPONENTS
+    Support
+  )
diff --git a/lldb/tools/lldb-instr/Instrument.cpp b/lldb/tools/lldb-instr/Instrument.cpp
new file mode 100644
index 0000000000000..4b8725396a61f
--- /dev/null
+++ b/lldb/tools/lldb-instr/Instrument.cpp
@@ -0,0 +1,173 @@
+#include "clang/AST/AST.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/CodeGen/ObjectFilePCHContainerOperations.h"
+#include "clang/Frontend/ASTConsumers.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendActions.h"
+#include "clang/Rewrite/Core/Rewriter.h"
+#include "clang/Tooling/CommonOptionsParser.h"
+#include "clang/Tooling/Tooling.h"
+
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <sstream>
+#include <string>
+
+using namespace clang;
+using namespace clang::driver;
+using namespace clang::tooling;
+
+static llvm::cl::OptionCategory InstrCategory("LLDB Instrumentation Generator");
+
+class SBVisitor : public RecursiveASTVisitor<SBVisitor> {
+public:
+  SBVisitor(Rewriter &R, ASTContext &Context)
+      : MyRewriter(R), Context(Context) {}
+
+  bool VisitCXXMethodDecl(CXXMethodDecl *Decl) {
+    // Not all decls should be registered. Please refer to that method's
+    // comment for details.
+    if (ShouldSkip(Decl))
+      return false;
+
+    // Print 'bool' instead of '_Bool'.
+    PrintingPolicy Policy(Context.getLangOpts());
+    Policy.Bool = true;
+
+    // Collect the functions parameter types and names.
+    std::vector<std::string> ParamNames;
+    if (!Decl->isStatic())
+      ParamNames.push_back("this");
+    for (auto *P : Decl->parameters())
+      ParamNames.push_back(P->getNameAsString());
+
+    // Construct the macros.
+    std::string Buffer;
+    llvm::raw_string_ostream Macro(Buffer);
+    if (ParamNames.empty()) {
+      Macro << "LLDB_INSTRUMENT()";
+    } else {
+      Macro << "LLDB_INSTRUMENT_VA(" << llvm::join(ParamNames, ", ") << ")";
+    }
+
+    Stmt *Body = Decl->getBody();
+    for (auto &C : Body->children()) {
+      if (C->getBeginLoc().isMacroID()) {
+        CharSourceRange Range =
+            MyRewriter.getSourceMgr().getExpansionRange(C->getSourceRange());
+        MyRewriter.ReplaceText(Range, Macro.str());
+      } else {
+        Macro << ";";
+        SourceLocation InsertLoc = Lexer::getLocForEndOfToken(
+            Body->getBeginLoc(), 0, MyRewriter.getSourceMgr(),
+            MyRewriter.getLangOpts());
+        MyRewriter.InsertTextAfter(InsertLoc, Macro.str());
+      }
+      break;
+    }
+
+    return true;
+  }
+
+private:
+  /// Determine whether we need to consider the given CXXMethodDecl.
+  ///
+  /// Currently we skip the following cases:
+  ///  1. Decls outside the main source file,
+  ///  2. Decls that are only present in the source file,
+  ///  3. Decls that are not definitions,
+  ///  4. Non-public methods,
+  ///  5. Variadic methods.
+  ///  6. Destructors.
+  bool ShouldSkip(CXXMethodDecl *Decl) {
+    // Skip anything outside the main file.
+    if (!MyRewriter.getSourceMgr().isInMainFile(Decl->getBeginLoc()))
+      return true;
+
+    // Skip if the canonical decl in the current decl. It means that the method
+    // is declared in the implementation and is therefore not exposed as part
+    // of the API.
+    if (Decl == Decl->getCanonicalDecl())
+      return true;
+
+    // Skip decls that have no body, i.e. are just declarations.
+    Stmt *Body = Decl->getBody();
+    if (!Body)
+      return true;
+
+    // Skip non-public methods.
+    AccessSpecifier AS = Decl->getAccess();
+    if (AS != AccessSpecifier::AS_public)
+      return true;
+
+    // Skip variadic methods.
+    if (Decl->isVariadic())
+      return true;
+
+    // Skip destructors.
+    if (isa<CXXDestructorDecl>(Decl))
+      return true;
+
+    return false;
+  }
+
+  Rewriter &MyRewriter;
+  ASTContext &Context;
+};
+
+class SBConsumer : public ASTConsumer {
+public:
+  SBConsumer(Rewriter &R, ASTContext &Context) : Visitor(R, Context) {}
+
+  // Override the method that gets called for each parsed top-level
+  // declaration.
+  bool HandleTopLevelDecl(DeclGroupRef DR) override {
+    for (DeclGroupRef::iterator b = DR.begin(), e = DR.end(); b != e; ++b) {
+      Visitor.TraverseDecl(*b);
+    }
+    return true;
+  }
+
+private:
+  SBVisitor Visitor;
+};
+
+class SBAction : public ASTFrontendAction {
+public:
+  SBAction() = default;
+
+  bool BeginSourceFileAction(CompilerInstance &CI) override { return true; }
+
+  void EndSourceFileAction() override { MyRewriter.overwriteChangedFiles(); }
+
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+                                                 StringRef File) override {
+    MyRewriter.setSourceMgr(CI.getSourceManager(), CI.getLangOpts());
+    return std::make_unique<SBConsumer>(MyRewriter, CI.getASTContext());
+  }
+
+private:
+  Rewriter MyRewriter;
+};
+
+int main(int argc, const char **argv) {
+  auto ExpectedParser = CommonOptionsParser::create(
+      argc, argv, InstrCategory, llvm::cl::OneOrMore,
+      "Utility for generating the macros for LLDB's "
+      "instrumentation framework.");
+  if (!ExpectedParser) {
+    llvm::errs() << ExpectedParser.takeError();
+    return 1;
+  }
+  CommonOptionsParser &OP = ExpectedParser.get();
+
+  auto PCHOpts = std::make_shared<PCHContainerOperations>();
+  PCHOpts->registerWriter(std::make_unique<ObjectFilePCHContainerWriter>());
+  PCHOpts->registerReader(std::make_unique<ObjectFilePCHContainerReader>());
+
+  ClangTool T(OP.getCompilations(), OP.getSourcePathList(), PCHOpts);
+  return T.run(newFrontendActionFactory<SBAction>().get());
+}

From 1755f5b1d7b7871672abdf0fde5ccd091b8dbc04 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Wed, 19 Jan 2022 11:38:26 -0800
Subject: [PATCH 116/946] [lldb] Decouple instrumentation from the reproducers

Remove the last remaining references to the reproducers from the
instrumentation. This patch renames the relevant files and macros.

Differential revision: https://reviews.llvm.org/D117712
---
 lldb/include/lldb/Utility/Instrumentation.h   | 103 ++++
 .../lldb/Utility/ReproducerInstrumentation.h  | 151 ------
 lldb/source/API/SBAddress.cpp                 |  59 +--
 lldb/source/API/SBAttachInfo.cpp              |  81 ++--
 lldb/source/API/SBBlock.cpp                   |  55 +--
 lldb/source/API/SBBreakpoint.cpp              | 172 +++----
 lldb/source/API/SBBreakpointLocation.cpp      |  98 ++--
 lldb/source/API/SBBreakpointName.cpp          | 115 ++---
 lldb/source/API/SBBreakpointOptionCommon.cpp  |  24 +-
 lldb/source/API/SBBroadcaster.cpp             |  52 +-
 lldb/source/API/SBCommandInterpreter.cpp      | 183 +++----
 .../API/SBCommandInterpreterRunOptions.cpp    |  86 ++--
 lldb/source/API/SBCommandReturnObject.cpp     | 102 ++--
 lldb/source/API/SBCommunication.cpp           |  51 +-
 lldb/source/API/SBCompileUnit.cpp             |  55 +--
 lldb/source/API/SBData.cpp                    | 131 ++---
 lldb/source/API/SBDebugger.cpp                | 368 ++++++--------
 lldb/source/API/SBDeclaration.cpp             |  41 +-
 lldb/source/API/SBEnvironment.cpp             |  36 +-
 lldb/source/API/SBError.cpp                   |  38 +-
 lldb/source/API/SBEvent.cpp                   |  47 +-
 lldb/source/API/SBExecutionContext.cpp        |  36 +-
 lldb/source/API/SBExpressionOptions.cpp       | 103 ++--
 lldb/source/API/SBFile.cpp                    |  35 +-
 lldb/source/API/SBFileSpec.cpp                |  47 +-
 lldb/source/API/SBFileSpecList.cpp            |  30 +-
 lldb/source/API/SBFrame.cpp                   | 134 ++----
 lldb/source/API/SBFunction.cpp                |  50 +-
 lldb/source/API/SBHostOS.cpp                  |  36 +-
 lldb/source/API/SBInstruction.cpp             |  57 +--
 lldb/source/API/SBInstructionList.cpp         |  45 +-
 lldb/source/API/SBLanguageRuntime.cpp         |   9 +-
 lldb/source/API/SBLaunchInfo.cpp              | 117 ++---
 lldb/source/API/SBLineEntry.cpp               |  40 +-
 lldb/source/API/SBListener.cpp                |  72 +--
 lldb/source/API/SBMemoryRegionInfo.cpp        |  52 +-
 lldb/source/API/SBMemoryRegionInfoList.cpp    |  29 +-
 lldb/source/API/SBModule.cpp                  | 129 ++---
 lldb/source/API/SBModuleSpec.cpp              |  80 ++-
 lldb/source/API/SBPlatform.cpp                | 156 +++---
 lldb/source/API/SBProcess.cpp                 | 245 ++++------
 lldb/source/API/SBProcessInfo.cpp             |  43 +-
 lldb/source/API/SBQueue.cpp                   |  41 +-
 lldb/source/API/SBQueueItem.cpp               |  27 +-
 lldb/source/API/SBReproducer.cpp              |  29 +-
 lldb/source/API/SBSection.cpp                 |  56 +--
 lldb/source/API/SBSourceManager.cpp           |  26 +-
 lldb/source/API/SBStream.cpp                  |  29 +-
 lldb/source/API/SBStringList.cpp              |  31 +-
 lldb/source/API/SBStructuredData.cpp          |  60 +--
 lldb/source/API/SBSymbol.cpp                  |  46 +-
 lldb/source/API/SBSymbolContext.cpp           |  58 +--
 lldb/source/API/SBSymbolContextList.cpp       |  31 +-
 lldb/source/API/SBTarget.cpp                  | 454 ++++++------------
 lldb/source/API/SBThread.cpp                  | 176 +++----
 lldb/source/API/SBThreadCollection.cpp        |  22 +-
 lldb/source/API/SBThreadPlan.cpp              |  97 ++--
 lldb/source/API/SBTrace.cpp                   |  23 +-
 lldb/source/API/SBType.cpp                    | 202 ++++----
 lldb/source/API/SBTypeCategory.cpp            | 113 ++---
 lldb/source/API/SBTypeEnumMember.cpp          |  51 +-
 lldb/source/API/SBTypeFilter.cpp              |  46 +-
 lldb/source/API/SBTypeFormat.cpp              |  44 +-
 lldb/source/API/SBTypeNameSpecifier.cpp       |  41 +-
 lldb/source/API/SBTypeSummary.cpp             |  95 ++--
 lldb/source/API/SBTypeSynthetic.cpp           |  52 +-
 lldb/source/API/SBUnixSignals.cpp             |  47 +-
 lldb/source/API/SBValue.cpp                   | 221 ++++-----
 lldb/source/API/SBValueList.cpp               |  33 +-
 lldb/source/API/SBVariablesOptions.cpp        |  56 +--
 lldb/source/API/SBWatchpoint.cpp              |  69 ++-
 .../Python/ScriptInterpreterPython.cpp        |   2 +-
 lldb/source/Utility/CMakeLists.txt            |   2 +-
 ...nstrumentation.cpp => Instrumentation.cpp} |  31 +-
 74 files changed, 2230 insertions(+), 3674 deletions(-)
 create mode 100644 lldb/include/lldb/Utility/Instrumentation.h
 delete mode 100644 lldb/include/lldb/Utility/ReproducerInstrumentation.h
 rename lldb/source/Utility/{ReproducerInstrumentation.cpp => Instrumentation.cpp} (52%)

diff --git a/lldb/include/lldb/Utility/Instrumentation.h b/lldb/include/lldb/Utility/Instrumentation.h
new file mode 100644
index 0000000000000..6962270bb89db
--- /dev/null
+++ b/lldb/include/lldb/Utility/Instrumentation.h
@@ -0,0 +1,103 @@
+//===-- Instrumentation.h ---------------------------------------*- C++ -*-===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_UTILITY_INSTRUMENTATION_H
+#define LLDB_UTILITY_INSTRUMENTATION_H
+
+#include "lldb/Utility/FileSpec.h"
+#include "lldb/Utility/Log.h"
+#include "lldb/Utility/Logging.h"
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include <map>
+#include <thread>
+#include <type_traits>
+
+namespace lldb_private {
+namespace instrumentation {
+
+template <typename T,
+          typename std::enable_if<std::is_fundamental<T>::value, int>::type = 0>
+inline void stringify_append(llvm::raw_string_ostream &ss, const T &t) {
+  ss << t;
+}
+
+template <typename T, typename std::enable_if<!std::is_fundamental<T>::value,
+                                              int>::type = 0>
+inline void stringify_append(llvm::raw_string_ostream &ss, const T &t) {
+  ss << &t;
+}
+
+template <typename T>
+inline void stringify_append(llvm::raw_string_ostream &ss, T *t) {
+  ss << reinterpret_cast<void *>(t);
+}
+
+template <typename T>
+inline void stringify_append(llvm::raw_string_ostream &ss, const T *t) {
+  ss << reinterpret_cast<const void *>(t);
+}
+
+template <>
+inline void stringify_append<char>(llvm::raw_string_ostream &ss,
+                                   const char *t) {
+  ss << '\"' << t << '\"';
+}
+
+template <>
+inline void stringify_append<std::nullptr_t>(llvm::raw_string_ostream &ss,
+                                             const std::nullptr_t &t) {
+  ss << "\"nullptr\"";
+}
+
+template <typename Head>
+inline void stringify_helper(llvm::raw_string_ostream &ss, const Head &head) {
+  stringify_append(ss, head);
+}
+
+template <typename Head, typename... Tail>
+inline void stringify_helper(llvm::raw_string_ostream &ss, const Head &head,
+                             const Tail &...tail) {
+  stringify_append(ss, head);
+  ss << ", ";
+  stringify_helper(ss, tail...);
+}
+
+template <typename... Ts> inline std::string stringify_args(const Ts &...ts) {
+  std::string buffer;
+  llvm::raw_string_ostream ss(buffer);
+  stringify_helper(ss, ts...);
+  return ss.str();
+}
+
+/// RAII object for instrumenting LLDB API functions.
+class Instrumenter {
+public:
+  Instrumenter(llvm::StringRef pretty_func, std::string &&pretty_args = {});
+  ~Instrumenter();
+
+private:
+  void UpdateBoundary();
+
+  /// Whether this function call was the one crossing the API boundary.
+  bool m_local_boundary = false;
+};
+} // namespace instrumentation
+} // namespace lldb_private
+
+#define LLDB_INSTRUMENT()                                                      \
+  lldb_private::instrumentation::Instrumenter _instr(LLVM_PRETTY_FUNCTION);
+
+#define LLDB_INSTRUMENT_VA(...)                                                \
+  lldb_private::instrumentation::Instrumenter _instr(                          \
+      LLVM_PRETTY_FUNCTION,                                                    \
+      lldb_private::instrumentation::stringify_args(__VA_ARGS__));
+
+#endif // LLDB_UTILITY_INSTRUMENTATION_H
diff --git a/lldb/include/lldb/Utility/ReproducerInstrumentation.h b/lldb/include/lldb/Utility/ReproducerInstrumentation.h
deleted file mode 100644
index 3bce7b8f3a6f1..0000000000000
--- a/lldb/include/lldb/Utility/ReproducerInstrumentation.h
+++ /dev/null
@@ -1,151 +0,0 @@
-//===-- ReproducerInstrumentation.h -----------------------------*- C++ -*-===//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLDB_UTILITY_REPRODUCERINSTRUMENTATION_H
-#define LLDB_UTILITY_REPRODUCERINSTRUMENTATION_H
-
-#include "lldb/Utility/FileSpec.h"
-#include "lldb/Utility/Log.h"
-#include "lldb/Utility/Logging.h"
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/ErrorHandling.h"
-
-#include <map>
-#include <thread>
-#include <type_traits>
-
-template <typename T,
-          typename std::enable_if<std::is_fundamental<T>::value, int>::type = 0>
-inline void stringify_append(llvm::raw_string_ostream &ss, const T &t) {
-  ss << t;
-}
-
-template <typename T, typename std::enable_if<!std::is_fundamental<T>::value,
-                                              int>::type = 0>
-inline void stringify_append(llvm::raw_string_ostream &ss, const T &t) {
-  ss << &t;
-}
-
-template <typename T>
-inline void stringify_append(llvm::raw_string_ostream &ss, T *t) {
-  ss << reinterpret_cast<void *>(t);
-}
-
-template <typename T>
-inline void stringify_append(llvm::raw_string_ostream &ss, const T *t) {
-  ss << reinterpret_cast<const void *>(t);
-}
-
-template <>
-inline void stringify_append<char>(llvm::raw_string_ostream &ss,
-                                   const char *t) {
-  ss << '\"' << t << '\"';
-}
-
-template <>
-inline void stringify_append<std::nullptr_t>(llvm::raw_string_ostream &ss,
-                                             const std::nullptr_t &t) {
-  ss << "\"nullptr\"";
-}
-
-template <typename Head>
-inline void stringify_helper(llvm::raw_string_ostream &ss, const Head &head) {
-  stringify_append(ss, head);
-}
-
-template <typename Head, typename... Tail>
-inline void stringify_helper(llvm::raw_string_ostream &ss, const Head &head,
-                             const Tail &... tail) {
-  stringify_append(ss, head);
-  ss << ", ";
-  stringify_helper(ss, tail...);
-}
-
-template <typename... Ts> inline std::string stringify_args(const Ts &... ts) {
-  std::string buffer;
-  llvm::raw_string_ostream ss(buffer);
-  stringify_helper(ss, ts...);
-  return ss.str();
-}
-
-#define LLDB_CONSTRUCT_(T, Class, ...)                                         \
-  lldb_private::repro::Recorder _recorder(LLVM_PRETTY_FUNCTION);
-
-#define LLDB_RECORD_CONSTRUCTOR(Class, Signature, ...)                         \
-  LLDB_CONSTRUCT_(Class Signature, this, __VA_ARGS__)
-
-#define LLDB_RECORD_CONSTRUCTOR_NO_ARGS(Class)                                 \
-  LLDB_CONSTRUCT_(Class(), this, lldb_private::repro::EmptyArg())
-
-#define LLDB_RECORD_(T1, T2, ...)                                              \
-  lldb_private::repro::Recorder _recorder(LLVM_PRETTY_FUNCTION,                \
-                                          stringify_args(__VA_ARGS__));
-
-#define LLDB_RECORD_METHOD(Result, Class, Method, Signature, ...)              \
-  LLDB_RECORD_(Result(Class::*) Signature, (&Class::Method), this, __VA_ARGS__)
-
-#define LLDB_RECORD_METHOD_CONST(Result, Class, Method, Signature, ...)        \
-  LLDB_RECORD_(Result(Class::*) Signature const, (&Class::Method), this,       \
-               __VA_ARGS__)
-
-#define LLDB_RECORD_METHOD_NO_ARGS(Result, Class, Method)                      \
-  LLDB_RECORD_(Result (Class::*)(), (&Class::Method), this)
-
-#define LLDB_RECORD_METHOD_CONST_NO_ARGS(Result, Class, Method)                \
-  LLDB_RECORD_(Result (Class::*)() const, (&Class::Method), this)
-
-#define LLDB_RECORD_STATIC_METHOD(Result, Class, Method, Signature, ...)       \
-  LLDB_RECORD_(Result(*) Signature, (&Class::Method), __VA_ARGS__)
-
-#define LLDB_RECORD_STATIC_METHOD_NO_ARGS(Result, Class, Method)               \
-  LLDB_RECORD_(Result (*)(), (&Class::Method), lldb_private::repro::EmptyArg())
-
-/// The LLDB_RECORD_DUMMY macro is special because it doesn't actually record
-/// anything. It's used to track API boundaries when we cannot record for
-/// technical reasons.
-#define LLDB_RECORD_DUMMY(Result, Class, Method, Signature, ...)               \
-  lldb_private::repro::Recorder _recorder;
-
-#define LLDB_RECORD_DUMMY_NO_ARGS(Result, Class, Method)                       \
-  lldb_private::repro::Recorder _recorder;
-
-namespace lldb_private {
-namespace repro {
-
-struct EmptyArg {};
-
-/// RAII object that records function invocations and their return value.
-///
-/// API calls are only captured when the API boundary is crossed. Once we're in
-/// the API layer, and another API function is called, it doesn't need to be
-/// recorded.
-///
-/// When a call is recored, its result is always recorded as well, even if the
-/// function returns a void. For functions that return by value, RecordResult
-/// should be used. Otherwise a sentinel value (0) will be serialized.
-///
-/// Because of the functional overlap between logging and recording API calls,
-/// this class is also used for logging.
-class Recorder {
-public:
-  Recorder();
-  Recorder(llvm::StringRef pretty_func, std::string &&pretty_args = {});
-  ~Recorder();
-
-private:
-  void UpdateBoundary();
-
-  /// Whether this function call was the one crossing the API boundary.
-  bool m_local_boundary = false;
-};
-
-} // namespace repro
-} // namespace lldb_private
-
-#endif // LLDB_UTILITY_REPRODUCERINSTRUMENTATION_H
diff --git a/lldb/source/API/SBAddress.cpp b/lldb/source/API/SBAddress.cpp
index af1b876d803d7..e519f0bcc83c6 100644
--- a/lldb/source/API/SBAddress.cpp
+++ b/lldb/source/API/SBAddress.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBAddress.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBProcess.h"
 #include "lldb/API/SBSection.h"
@@ -16,35 +15,34 @@
 #include "lldb/Core/Module.h"
 #include "lldb/Symbol/LineEntry.h"
 #include "lldb/Target/Target.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/StreamString.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
 SBAddress::SBAddress() : m_opaque_up(new Address()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBAddress);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBAddress::SBAddress(const Address &address)
     : m_opaque_up(std::make_unique<Address>(address)) {}
 
 SBAddress::SBAddress(const SBAddress &rhs) : m_opaque_up(new Address()) {
-  LLDB_RECORD_CONSTRUCTOR(SBAddress, (const lldb::SBAddress &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
 
 SBAddress::SBAddress(lldb::SBSection section, lldb::addr_t offset)
     : m_opaque_up(new Address(section.GetSP(), offset)) {
-  LLDB_RECORD_CONSTRUCTOR(SBAddress, (lldb::SBSection, lldb::addr_t), section,
-                          offset);
+  LLDB_INSTRUMENT_VA(this, section, offset);
 }
 
 // Create an address by resolving a load address using the supplied target
 SBAddress::SBAddress(lldb::addr_t load_addr, lldb::SBTarget &target)
     : m_opaque_up(new Address()) {
-  LLDB_RECORD_CONSTRUCTOR(SBAddress, (lldb::addr_t, lldb::SBTarget &),
-                          load_addr, target);
+  LLDB_INSTRUMENT_VA(this, load_addr, target);
 
   SetLoadAddress(load_addr, target);
 }
@@ -52,8 +50,7 @@ SBAddress::SBAddress(lldb::addr_t load_addr, lldb::SBTarget &target)
 SBAddress::~SBAddress() = default;
 
 const SBAddress &SBAddress::operator=(const SBAddress &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBAddress &,
-                     SBAddress, operator=,(const lldb::SBAddress &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -67,31 +64,29 @@ bool lldb::operator==(const SBAddress &lhs, const SBAddress &rhs) {
 }
 
 bool SBAddress::operator!=(const SBAddress &rhs) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBAddress, operator!=,(const SBAddress &),
-                           &rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return !(*this == rhs);
 }
 
 bool SBAddress::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBAddress, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBAddress::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBAddress, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up != nullptr && m_opaque_up->IsValid();
 }
 
 void SBAddress::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBAddress, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_up = std::make_unique<Address>();
 }
 
 void SBAddress::SetAddress(lldb::SBSection section, lldb::addr_t offset) {
-  LLDB_RECORD_METHOD(void, SBAddress, SetAddress,
-                     (lldb::SBSection, lldb::addr_t), section, offset);
+  LLDB_INSTRUMENT_VA(this, section, offset);
 
   Address &addr = ref();
   addr.SetSection(section.GetSP());
@@ -101,7 +96,7 @@ void SBAddress::SetAddress(lldb::SBSection section, lldb::addr_t offset) {
 void SBAddress::SetAddress(const Address &address) { ref() = address; }
 
 lldb::addr_t SBAddress::GetFileAddress() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::addr_t, SBAddress, GetFileAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up->IsValid())
     return m_opaque_up->GetFileAddress();
@@ -110,8 +105,7 @@ lldb::addr_t SBAddress::GetFileAddress() const {
 }
 
 lldb::addr_t SBAddress::GetLoadAddress(const SBTarget &target) const {
-  LLDB_RECORD_METHOD_CONST(lldb::addr_t, SBAddress, GetLoadAddress,
-                           (const lldb::SBTarget &), target);
+  LLDB_INSTRUMENT_VA(this, target);
 
   lldb::addr_t addr = LLDB_INVALID_ADDRESS;
   TargetSP target_sp(target.GetSP());
@@ -126,8 +120,7 @@ lldb::addr_t SBAddress::GetLoadAddress(const SBTarget &target) const {
 }
 
 void SBAddress::SetLoadAddress(lldb::addr_t load_addr, lldb::SBTarget &target) {
-  LLDB_RECORD_METHOD(void, SBAddress, SetLoadAddress,
-                     (lldb::addr_t, lldb::SBTarget &), load_addr, target);
+  LLDB_INSTRUMENT_VA(this, load_addr, target);
 
   // Create the address object if we don't already have one
   ref();
@@ -144,7 +137,7 @@ void SBAddress::SetLoadAddress(lldb::addr_t load_addr, lldb::SBTarget &target) {
 }
 
 bool SBAddress::OffsetAddress(addr_t offset) {
-  LLDB_RECORD_METHOD(bool, SBAddress, OffsetAddress, (lldb::addr_t), offset);
+  LLDB_INSTRUMENT_VA(this, offset);
 
   if (m_opaque_up->IsValid()) {
     addr_t addr_offset = m_opaque_up->GetOffset();
@@ -157,7 +150,7 @@ bool SBAddress::OffsetAddress(addr_t offset) {
 }
 
 lldb::SBSection SBAddress::GetSection() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBSection, SBAddress, GetSection);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBSection sb_section;
   if (m_opaque_up->IsValid())
@@ -166,7 +159,7 @@ lldb::SBSection SBAddress::GetSection() {
 }
 
 lldb::addr_t SBAddress::GetOffset() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBAddress, GetOffset);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up->IsValid())
     return m_opaque_up->GetOffset();
@@ -193,8 +186,7 @@ const Address &SBAddress::ref() const {
 Address *SBAddress::get() { return m_opaque_up.get(); }
 
 bool SBAddress::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBAddress, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   // Call "ref()" on the stream to make sure it creates a backing stream in
   // case there isn't one already...
@@ -209,7 +201,7 @@ bool SBAddress::GetDescription(SBStream &description) {
 }
 
 SBModule SBAddress::GetModule() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBModule, SBAddress, GetModule);
+  LLDB_INSTRUMENT_VA(this);
 
   SBModule sb_module;
   if (m_opaque_up->IsValid())
@@ -218,8 +210,7 @@ SBModule SBAddress::GetModule() {
 }
 
 SBSymbolContext SBAddress::GetSymbolContext(uint32_t resolve_scope) {
-  LLDB_RECORD_METHOD(lldb::SBSymbolContext, SBAddress, GetSymbolContext,
-                     (uint32_t), resolve_scope);
+  LLDB_INSTRUMENT_VA(this, resolve_scope);
 
   SBSymbolContext sb_sc;
   SymbolContextItem scope = static_cast<SymbolContextItem>(resolve_scope);
@@ -229,7 +220,7 @@ SBSymbolContext SBAddress::GetSymbolContext(uint32_t resolve_scope) {
 }
 
 SBCompileUnit SBAddress::GetCompileUnit() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBCompileUnit, SBAddress, GetCompileUnit);
+  LLDB_INSTRUMENT_VA(this);
 
   SBCompileUnit sb_comp_unit;
   if (m_opaque_up->IsValid())
@@ -238,7 +229,7 @@ SBCompileUnit SBAddress::GetCompileUnit() {
 }
 
 SBFunction SBAddress::GetFunction() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFunction, SBAddress, GetFunction);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFunction sb_function;
   if (m_opaque_up->IsValid())
@@ -247,7 +238,7 @@ SBFunction SBAddress::GetFunction() {
 }
 
 SBBlock SBAddress::GetBlock() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBAddress, GetBlock);
+  LLDB_INSTRUMENT_VA(this);
 
   SBBlock sb_block;
   if (m_opaque_up->IsValid())
@@ -256,7 +247,7 @@ SBBlock SBAddress::GetBlock() {
 }
 
 SBSymbol SBAddress::GetSymbol() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBSymbol, SBAddress, GetSymbol);
+  LLDB_INSTRUMENT_VA(this);
 
   SBSymbol sb_symbol;
   if (m_opaque_up->IsValid())
@@ -265,7 +256,7 @@ SBSymbol SBAddress::GetSymbol() {
 }
 
 SBLineEntry SBAddress::GetLineEntry() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBLineEntry, SBAddress, GetLineEntry);
+  LLDB_INSTRUMENT_VA(this);
 
   SBLineEntry sb_line_entry;
   if (m_opaque_up->IsValid()) {
diff --git a/lldb/source/API/SBAttachInfo.cpp b/lldb/source/API/SBAttachInfo.cpp
index 8118088a38ec3..edb4f7104d411 100644
--- a/lldb/source/API/SBAttachInfo.cpp
+++ b/lldb/source/API/SBAttachInfo.cpp
@@ -7,29 +7,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBAttachInfo.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBFileSpec.h"
 #include "lldb/API/SBListener.h"
 #include "lldb/Target/Process.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
 SBAttachInfo::SBAttachInfo() : m_opaque_sp(new ProcessAttachInfo()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBAttachInfo);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBAttachInfo::SBAttachInfo(lldb::pid_t pid)
     : m_opaque_sp(new ProcessAttachInfo()) {
-  LLDB_RECORD_CONSTRUCTOR(SBAttachInfo, (lldb::pid_t), pid);
+  LLDB_INSTRUMENT_VA(this, pid);
 
   m_opaque_sp->SetProcessID(pid);
 }
 
 SBAttachInfo::SBAttachInfo(const char *path, bool wait_for)
     : m_opaque_sp(new ProcessAttachInfo()) {
-  LLDB_RECORD_CONSTRUCTOR(SBAttachInfo, (const char *, bool), path, wait_for);
+  LLDB_INSTRUMENT_VA(this, path, wait_for);
 
   if (path && path[0])
     m_opaque_sp->GetExecutableFile().SetFile(path, FileSpec::Style::native);
@@ -38,8 +38,7 @@ SBAttachInfo::SBAttachInfo(const char *path, bool wait_for)
 
 SBAttachInfo::SBAttachInfo(const char *path, bool wait_for, bool async)
     : m_opaque_sp(new ProcessAttachInfo()) {
-  LLDB_RECORD_CONSTRUCTOR(SBAttachInfo, (const char *, bool, bool), path,
-                          wait_for, async);
+  LLDB_INSTRUMENT_VA(this, path, wait_for, async);
 
   if (path && path[0])
     m_opaque_sp->GetExecutableFile().SetFile(path, FileSpec::Style::native);
@@ -49,7 +48,7 @@ SBAttachInfo::SBAttachInfo(const char *path, bool wait_for, bool async)
 
 SBAttachInfo::SBAttachInfo(const SBAttachInfo &rhs)
     : m_opaque_sp(new ProcessAttachInfo()) {
-  LLDB_RECORD_CONSTRUCTOR(SBAttachInfo, (const lldb::SBAttachInfo &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_sp = clone(rhs.m_opaque_sp);
 }
@@ -59,8 +58,7 @@ SBAttachInfo::~SBAttachInfo() = default;
 lldb_private::ProcessAttachInfo &SBAttachInfo::ref() { return *m_opaque_sp; }
 
 SBAttachInfo &SBAttachInfo::operator=(const SBAttachInfo &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBAttachInfo &,
-                     SBAttachInfo, operator=,(const lldb::SBAttachInfo &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_sp = clone(rhs.m_opaque_sp);
@@ -68,44 +66,43 @@ SBAttachInfo &SBAttachInfo::operator=(const SBAttachInfo &rhs) {
 }
 
 lldb::pid_t SBAttachInfo::GetProcessID() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::pid_t, SBAttachInfo, GetProcessID);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetProcessID();
 }
 
 void SBAttachInfo::SetProcessID(lldb::pid_t pid) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetProcessID, (lldb::pid_t), pid);
+  LLDB_INSTRUMENT_VA(this, pid);
 
   m_opaque_sp->SetProcessID(pid);
 }
 
 uint32_t SBAttachInfo::GetResumeCount() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBAttachInfo, GetResumeCount);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetResumeCount();
 }
 
 void SBAttachInfo::SetResumeCount(uint32_t c) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetResumeCount, (uint32_t), c);
+  LLDB_INSTRUMENT_VA(this, c);
 
   m_opaque_sp->SetResumeCount(c);
 }
 
 const char *SBAttachInfo::GetProcessPluginName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBAttachInfo, GetProcessPluginName);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetProcessPluginName();
 }
 
 void SBAttachInfo::SetProcessPluginName(const char *plugin_name) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetProcessPluginName, (const char *),
-                     plugin_name);
+  LLDB_INSTRUMENT_VA(this, plugin_name);
 
   return m_opaque_sp->SetProcessPluginName(plugin_name);
 }
 
 void SBAttachInfo::SetExecutable(const char *path) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetExecutable, (const char *), path);
+  LLDB_INSTRUMENT_VA(this, path);
 
   if (path && path[0])
     m_opaque_sp->GetExecutableFile().SetFile(path, FileSpec::Style::native);
@@ -114,8 +111,7 @@ void SBAttachInfo::SetExecutable(const char *path) {
 }
 
 void SBAttachInfo::SetExecutable(SBFileSpec exe_file) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetExecutable, (lldb::SBFileSpec),
-                     exe_file);
+  LLDB_INSTRUMENT_VA(this, exe_file);
 
   if (exe_file.IsValid())
     m_opaque_sp->GetExecutableFile() = exe_file.ref();
@@ -124,137 +120,134 @@ void SBAttachInfo::SetExecutable(SBFileSpec exe_file) {
 }
 
 bool SBAttachInfo::GetWaitForLaunch() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, GetWaitForLaunch);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetWaitForLaunch();
 }
 
 void SBAttachInfo::SetWaitForLaunch(bool b) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetWaitForLaunch, (bool), b);
+  LLDB_INSTRUMENT_VA(this, b);
 
   m_opaque_sp->SetWaitForLaunch(b);
 }
 
 void SBAttachInfo::SetWaitForLaunch(bool b, bool async) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetWaitForLaunch, (bool, bool), b,
-                     async);
+  LLDB_INSTRUMENT_VA(this, b, async);
 
   m_opaque_sp->SetWaitForLaunch(b);
   m_opaque_sp->SetAsync(async);
 }
 
 bool SBAttachInfo::GetIgnoreExisting() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, GetIgnoreExisting);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetIgnoreExisting();
 }
 
 void SBAttachInfo::SetIgnoreExisting(bool b) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetIgnoreExisting, (bool), b);
+  LLDB_INSTRUMENT_VA(this, b);
 
   m_opaque_sp->SetIgnoreExisting(b);
 }
 
 uint32_t SBAttachInfo::GetUserID() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBAttachInfo, GetUserID);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetUserID();
 }
 
 uint32_t SBAttachInfo::GetGroupID() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBAttachInfo, GetGroupID);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetGroupID();
 }
 
 bool SBAttachInfo::UserIDIsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, UserIDIsValid);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->UserIDIsValid();
 }
 
 bool SBAttachInfo::GroupIDIsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, GroupIDIsValid);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GroupIDIsValid();
 }
 
 void SBAttachInfo::SetUserID(uint32_t uid) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetUserID, (uint32_t), uid);
+  LLDB_INSTRUMENT_VA(this, uid);
 
   m_opaque_sp->SetUserID(uid);
 }
 
 void SBAttachInfo::SetGroupID(uint32_t gid) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetGroupID, (uint32_t), gid);
+  LLDB_INSTRUMENT_VA(this, gid);
 
   m_opaque_sp->SetGroupID(gid);
 }
 
 uint32_t SBAttachInfo::GetEffectiveUserID() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBAttachInfo, GetEffectiveUserID);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetEffectiveUserID();
 }
 
 uint32_t SBAttachInfo::GetEffectiveGroupID() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBAttachInfo, GetEffectiveGroupID);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetEffectiveGroupID();
 }
 
 bool SBAttachInfo::EffectiveUserIDIsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, EffectiveUserIDIsValid);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->EffectiveUserIDIsValid();
 }
 
 bool SBAttachInfo::EffectiveGroupIDIsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, EffectiveGroupIDIsValid);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->EffectiveGroupIDIsValid();
 }
 
 void SBAttachInfo::SetEffectiveUserID(uint32_t uid) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetEffectiveUserID, (uint32_t), uid);
+  LLDB_INSTRUMENT_VA(this, uid);
 
   m_opaque_sp->SetEffectiveUserID(uid);
 }
 
 void SBAttachInfo::SetEffectiveGroupID(uint32_t gid) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetEffectiveGroupID, (uint32_t), gid);
+  LLDB_INSTRUMENT_VA(this, gid);
 
   m_opaque_sp->SetEffectiveGroupID(gid);
 }
 
 lldb::pid_t SBAttachInfo::GetParentProcessID() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::pid_t, SBAttachInfo, GetParentProcessID);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetParentProcessID();
 }
 
 void SBAttachInfo::SetParentProcessID(lldb::pid_t pid) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetParentProcessID, (lldb::pid_t),
-                     pid);
+  LLDB_INSTRUMENT_VA(this, pid);
 
   m_opaque_sp->SetParentProcessID(pid);
 }
 
 bool SBAttachInfo::ParentProcessIDIsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBAttachInfo, ParentProcessIDIsValid);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->ParentProcessIDIsValid();
 }
 
 SBListener SBAttachInfo::GetListener() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBListener, SBAttachInfo, GetListener);
+  LLDB_INSTRUMENT_VA(this);
 
   return SBListener(m_opaque_sp->GetListener());
 }
 
 void SBAttachInfo::SetListener(SBListener &listener) {
-  LLDB_RECORD_METHOD(void, SBAttachInfo, SetListener, (lldb::SBListener &),
-                     listener);
+  LLDB_INSTRUMENT_VA(this, listener);
 
   m_opaque_sp->SetListener(listener.GetSP());
 }
diff --git a/lldb/source/API/SBBlock.cpp b/lldb/source/API/SBBlock.cpp
index 720d630491f84..7d7565340836b 100644
--- a/lldb/source/API/SBBlock.cpp
+++ b/lldb/source/API/SBBlock.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBBlock.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBAddress.h"
 #include "lldb/API/SBFileSpec.h"
 #include "lldb/API/SBFrame.h"
@@ -21,22 +20,22 @@
 #include "lldb/Symbol/VariableList.h"
 #include "lldb/Target/StackFrame.h"
 #include "lldb/Target/Target.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
-SBBlock::SBBlock() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBBlock); }
+SBBlock::SBBlock() { LLDB_INSTRUMENT_VA(this); }
 
 SBBlock::SBBlock(lldb_private::Block *lldb_object_ptr)
     : m_opaque_ptr(lldb_object_ptr) {}
 
 SBBlock::SBBlock(const SBBlock &rhs) : m_opaque_ptr(rhs.m_opaque_ptr) {
-  LLDB_RECORD_CONSTRUCTOR(SBBlock, (const lldb::SBBlock &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 const SBBlock &SBBlock::operator=(const SBBlock &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBBlock &,
-                     SBBlock, operator=,(const lldb::SBBlock &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_ptr = rhs.m_opaque_ptr;
   return *this;
@@ -45,17 +44,17 @@ const SBBlock &SBBlock::operator=(const SBBlock &rhs) {
 SBBlock::~SBBlock() { m_opaque_ptr = nullptr; }
 
 bool SBBlock::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBlock, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBBlock::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBlock, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_ptr != nullptr;
 }
 
 bool SBBlock::IsInlined() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBlock, IsInlined);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr)
     return m_opaque_ptr->GetInlinedFunctionInfo() != nullptr;
@@ -63,7 +62,7 @@ bool SBBlock::IsInlined() const {
 }
 
 const char *SBBlock::GetInlinedName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBlock, GetInlinedName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr) {
     const InlineFunctionInfo *inlined_info =
@@ -76,8 +75,7 @@ const char *SBBlock::GetInlinedName() const {
 }
 
 SBFileSpec SBBlock::GetInlinedCallSiteFile() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBBlock,
-                                   GetInlinedCallSiteFile);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFileSpec sb_file;
   if (m_opaque_ptr) {
@@ -90,7 +88,7 @@ SBFileSpec SBBlock::GetInlinedCallSiteFile() const {
 }
 
 uint32_t SBBlock::GetInlinedCallSiteLine() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBlock, GetInlinedCallSiteLine);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr) {
     const InlineFunctionInfo *inlined_info =
@@ -102,7 +100,7 @@ uint32_t SBBlock::GetInlinedCallSiteLine() const {
 }
 
 uint32_t SBBlock::GetInlinedCallSiteColumn() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBlock, GetInlinedCallSiteColumn);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr) {
     const InlineFunctionInfo *inlined_info =
@@ -123,7 +121,7 @@ void SBBlock::AppendVariables(bool can_create, bool get_parent_variables,
 }
 
 SBBlock SBBlock::GetParent() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBBlock, GetParent);
+  LLDB_INSTRUMENT_VA(this);
 
   SBBlock sb_block;
   if (m_opaque_ptr)
@@ -132,7 +130,7 @@ SBBlock SBBlock::GetParent() {
 }
 
 lldb::SBBlock SBBlock::GetContainingInlinedBlock() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBBlock, GetContainingInlinedBlock);
+  LLDB_INSTRUMENT_VA(this);
 
   SBBlock sb_block;
   if (m_opaque_ptr)
@@ -141,7 +139,7 @@ lldb::SBBlock SBBlock::GetContainingInlinedBlock() {
 }
 
 SBBlock SBBlock::GetSibling() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBBlock, GetSibling);
+  LLDB_INSTRUMENT_VA(this);
 
   SBBlock sb_block;
   if (m_opaque_ptr)
@@ -150,7 +148,7 @@ SBBlock SBBlock::GetSibling() {
 }
 
 SBBlock SBBlock::GetFirstChild() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBBlock, GetFirstChild);
+  LLDB_INSTRUMENT_VA(this);
 
   SBBlock sb_block;
   if (m_opaque_ptr)
@@ -163,8 +161,7 @@ lldb_private::Block *SBBlock::GetPtr() { return m_opaque_ptr; }
 void SBBlock::SetPtr(lldb_private::Block *block) { m_opaque_ptr = block; }
 
 bool SBBlock::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBBlock, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
@@ -188,7 +185,7 @@ bool SBBlock::GetDescription(SBStream &description) {
 }
 
 uint32_t SBBlock::GetNumRanges() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBBlock, GetNumRanges);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr)
     return m_opaque_ptr->GetNumRanges();
@@ -196,8 +193,7 @@ uint32_t SBBlock::GetNumRanges() {
 }
 
 lldb::SBAddress SBBlock::GetRangeStartAddress(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBAddress, SBBlock, GetRangeStartAddress, (uint32_t),
-                     idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   lldb::SBAddress sb_addr;
   if (m_opaque_ptr) {
@@ -210,8 +206,7 @@ lldb::SBAddress SBBlock::GetRangeStartAddress(uint32_t idx) {
 }
 
 lldb::SBAddress SBBlock::GetRangeEndAddress(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBAddress, SBBlock, GetRangeEndAddress, (uint32_t),
-                     idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   lldb::SBAddress sb_addr;
   if (m_opaque_ptr) {
@@ -225,8 +220,7 @@ lldb::SBAddress SBBlock::GetRangeEndAddress(uint32_t idx) {
 }
 
 uint32_t SBBlock::GetRangeIndexForBlockAddress(lldb::SBAddress block_addr) {
-  LLDB_RECORD_METHOD(uint32_t, SBBlock, GetRangeIndexForBlockAddress,
-                     (lldb::SBAddress), block_addr);
+  LLDB_INSTRUMENT_VA(this, block_addr);
 
   if (m_opaque_ptr && block_addr.IsValid()) {
     return m_opaque_ptr->GetRangeIndexContainingAddress(block_addr.ref());
@@ -238,10 +232,7 @@ uint32_t SBBlock::GetRangeIndexForBlockAddress(lldb::SBAddress block_addr) {
 lldb::SBValueList SBBlock::GetVariables(lldb::SBFrame &frame, bool arguments,
                                         bool locals, bool statics,
                                         lldb::DynamicValueType use_dynamic) {
-  LLDB_RECORD_METHOD(
-      lldb::SBValueList, SBBlock, GetVariables,
-      (lldb::SBFrame &, bool, bool, bool, lldb::DynamicValueType), frame,
-      arguments, locals, statics, use_dynamic);
+  LLDB_INSTRUMENT_VA(this, frame, arguments, locals, statics, use_dynamic);
 
   Block *block = GetPtr();
   SBValueList value_list;
@@ -294,9 +285,7 @@ lldb::SBValueList SBBlock::GetVariables(lldb::SBFrame &frame, bool arguments,
 
 lldb::SBValueList SBBlock::GetVariables(lldb::SBTarget &target, bool arguments,
                                         bool locals, bool statics) {
-  LLDB_RECORD_METHOD(lldb::SBValueList, SBBlock, GetVariables,
-                     (lldb::SBTarget &, bool, bool, bool), target, arguments,
-                     locals, statics);
+  LLDB_INSTRUMENT_VA(this, target, arguments, locals, statics);
 
   Block *block = GetPtr();
 
diff --git a/lldb/source/API/SBBreakpoint.cpp b/lldb/source/API/SBBreakpoint.cpp
index 709895e8bbc9f..5fe8f7fe05837 100644
--- a/lldb/source/API/SBBreakpoint.cpp
+++ b/lldb/source/API/SBBreakpoint.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBBreakpoint.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBBreakpointLocation.h"
 #include "lldb/API/SBDebugger.h"
 #include "lldb/API/SBEvent.h"
@@ -16,6 +15,7 @@
 #include "lldb/API/SBStringList.h"
 #include "lldb/API/SBStructuredData.h"
 #include "lldb/API/SBThread.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/Breakpoint/Breakpoint.h"
 #include "lldb/Breakpoint/BreakpointIDList.h"
@@ -45,44 +45,41 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBBreakpoint::SBBreakpoint() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBBreakpoint); }
+SBBreakpoint::SBBreakpoint() { LLDB_INSTRUMENT_VA(this); }
 
 SBBreakpoint::SBBreakpoint(const SBBreakpoint &rhs)
     : m_opaque_wp(rhs.m_opaque_wp) {
-  LLDB_RECORD_CONSTRUCTOR(SBBreakpoint, (const lldb::SBBreakpoint &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBBreakpoint::SBBreakpoint(const lldb::BreakpointSP &bp_sp)
     : m_opaque_wp(bp_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBBreakpoint, (const lldb::BreakpointSP &), bp_sp);
+  LLDB_INSTRUMENT_VA(this, bp_sp);
 }
 
 SBBreakpoint::~SBBreakpoint() = default;
 
 const SBBreakpoint &SBBreakpoint::operator=(const SBBreakpoint &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBBreakpoint &,
-                     SBBreakpoint, operator=,(const lldb::SBBreakpoint &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_wp = rhs.m_opaque_wp;
   return *this;
 }
 
 bool SBBreakpoint::operator==(const lldb::SBBreakpoint &rhs) {
-  LLDB_RECORD_METHOD(
-      bool, SBBreakpoint, operator==,(const lldb::SBBreakpoint &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_wp.lock() == rhs.m_opaque_wp.lock();
 }
 
 bool SBBreakpoint::operator!=(const lldb::SBBreakpoint &rhs) {
-  LLDB_RECORD_METHOD(
-      bool, SBBreakpoint, operator!=,(const lldb::SBBreakpoint &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_wp.lock() != rhs.m_opaque_wp.lock();
 }
 
 SBTarget SBBreakpoint::GetTarget() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBTarget, SBBreakpoint, GetTarget);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp)
@@ -92,7 +89,7 @@ SBTarget SBBreakpoint::GetTarget() const {
 }
 
 break_id_t SBBreakpoint::GetID() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::break_id_t, SBBreakpoint, GetID);
+  LLDB_INSTRUMENT_VA(this);
 
   break_id_t break_id = LLDB_INVALID_BREAK_ID;
   BreakpointSP bkpt_sp = GetSP();
@@ -103,11 +100,11 @@ break_id_t SBBreakpoint::GetID() const {
 }
 
 bool SBBreakpoint::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpoint, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBBreakpoint::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpoint, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointSP bkpt_sp = GetSP();
   if (!bkpt_sp)
@@ -119,7 +116,7 @@ SBBreakpoint::operator bool() const {
 }
 
 void SBBreakpoint::ClearAllBreakpointSites() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBBreakpoint, ClearAllBreakpointSites);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp) {
@@ -130,8 +127,7 @@ void SBBreakpoint::ClearAllBreakpointSites() {
 }
 
 SBBreakpointLocation SBBreakpoint::FindLocationByAddress(addr_t vm_addr) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpointLocation, SBBreakpoint,
-                     FindLocationByAddress, (lldb::addr_t), vm_addr);
+  LLDB_INSTRUMENT_VA(this, vm_addr);
 
   SBBreakpointLocation sb_bp_location;
 
@@ -152,8 +148,7 @@ SBBreakpointLocation SBBreakpoint::FindLocationByAddress(addr_t vm_addr) {
 }
 
 break_id_t SBBreakpoint::FindLocationIDByAddress(addr_t vm_addr) {
-  LLDB_RECORD_METHOD(lldb::break_id_t, SBBreakpoint, FindLocationIDByAddress,
-                     (lldb::addr_t), vm_addr);
+  LLDB_INSTRUMENT_VA(this, vm_addr);
 
   break_id_t break_id = LLDB_INVALID_BREAK_ID;
   BreakpointSP bkpt_sp = GetSP();
@@ -173,8 +168,7 @@ break_id_t SBBreakpoint::FindLocationIDByAddress(addr_t vm_addr) {
 }
 
 SBBreakpointLocation SBBreakpoint::FindLocationByID(break_id_t bp_loc_id) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpointLocation, SBBreakpoint, FindLocationByID,
-                     (lldb::break_id_t), bp_loc_id);
+  LLDB_INSTRUMENT_VA(this, bp_loc_id);
 
   SBBreakpointLocation sb_bp_location;
   BreakpointSP bkpt_sp = GetSP();
@@ -189,8 +183,7 @@ SBBreakpointLocation SBBreakpoint::FindLocationByID(break_id_t bp_loc_id) {
 }
 
 SBBreakpointLocation SBBreakpoint::GetLocationAtIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpointLocation, SBBreakpoint,
-                     GetLocationAtIndex, (uint32_t), index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   SBBreakpointLocation sb_bp_location;
   BreakpointSP bkpt_sp = GetSP();
@@ -205,7 +198,7 @@ SBBreakpointLocation SBBreakpoint::GetLocationAtIndex(uint32_t index) {
 }
 
 void SBBreakpoint::SetEnabled(bool enable) {
-  LLDB_RECORD_METHOD(void, SBBreakpoint, SetEnabled, (bool), enable);
+  LLDB_INSTRUMENT_VA(this, enable);
 
   BreakpointSP bkpt_sp = GetSP();
 
@@ -217,7 +210,7 @@ void SBBreakpoint::SetEnabled(bool enable) {
 }
 
 bool SBBreakpoint::IsEnabled() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpoint, IsEnabled);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp) {
@@ -229,7 +222,7 @@ bool SBBreakpoint::IsEnabled() {
 }
 
 void SBBreakpoint::SetOneShot(bool one_shot) {
-  LLDB_RECORD_METHOD(void, SBBreakpoint, SetOneShot, (bool), one_shot);
+  LLDB_INSTRUMENT_VA(this, one_shot);
 
   BreakpointSP bkpt_sp = GetSP();
 
@@ -241,7 +234,7 @@ void SBBreakpoint::SetOneShot(bool one_shot) {
 }
 
 bool SBBreakpoint::IsOneShot() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpoint, IsOneShot);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp) {
@@ -253,7 +246,7 @@ bool SBBreakpoint::IsOneShot() const {
 }
 
 bool SBBreakpoint::IsInternal() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpoint, IsInternal);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp) {
@@ -265,7 +258,7 @@ bool SBBreakpoint::IsInternal() {
 }
 
 void SBBreakpoint::SetIgnoreCount(uint32_t count) {
-  LLDB_RECORD_METHOD(void, SBBreakpoint, SetIgnoreCount, (uint32_t), count);
+  LLDB_INSTRUMENT_VA(this, count);
 
   BreakpointSP bkpt_sp = GetSP();
 
@@ -277,8 +270,7 @@ void SBBreakpoint::SetIgnoreCount(uint32_t count) {
 }
 
 void SBBreakpoint::SetCondition(const char *condition) {
-  LLDB_RECORD_METHOD(void, SBBreakpoint, SetCondition, (const char *),
-                     condition);
+  LLDB_INSTRUMENT_VA(this, condition);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp) {
@@ -289,7 +281,7 @@ void SBBreakpoint::SetCondition(const char *condition) {
 }
 
 const char *SBBreakpoint::GetCondition() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBBreakpoint, GetCondition);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp) {
@@ -301,8 +293,7 @@ const char *SBBreakpoint::GetCondition() {
 }
 
 void SBBreakpoint::SetAutoContinue(bool auto_continue) {
-  LLDB_RECORD_METHOD(void, SBBreakpoint, SetAutoContinue, (bool),
-                     auto_continue);
+  LLDB_INSTRUMENT_VA(this, auto_continue);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp) {
@@ -313,7 +304,7 @@ void SBBreakpoint::SetAutoContinue(bool auto_continue) {
 }
 
 bool SBBreakpoint::GetAutoContinue() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpoint, GetAutoContinue);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp) {
@@ -325,7 +316,7 @@ bool SBBreakpoint::GetAutoContinue() {
 }
 
 uint32_t SBBreakpoint::GetHitCount() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBreakpoint, GetHitCount);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t count = 0;
   BreakpointSP bkpt_sp = GetSP();
@@ -339,7 +330,7 @@ uint32_t SBBreakpoint::GetHitCount() const {
 }
 
 uint32_t SBBreakpoint::GetIgnoreCount() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBreakpoint, GetIgnoreCount);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t count = 0;
   BreakpointSP bkpt_sp = GetSP();
@@ -353,7 +344,7 @@ uint32_t SBBreakpoint::GetIgnoreCount() const {
 }
 
 void SBBreakpoint::SetThreadID(tid_t tid) {
-  LLDB_RECORD_METHOD(void, SBBreakpoint, SetThreadID, (lldb::tid_t), tid);
+  LLDB_INSTRUMENT_VA(this, tid);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp) {
@@ -364,7 +355,7 @@ void SBBreakpoint::SetThreadID(tid_t tid) {
 }
 
 tid_t SBBreakpoint::GetThreadID() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::tid_t, SBBreakpoint, GetThreadID);
+  LLDB_INSTRUMENT_VA(this);
 
   tid_t tid = LLDB_INVALID_THREAD_ID;
   BreakpointSP bkpt_sp = GetSP();
@@ -378,7 +369,7 @@ tid_t SBBreakpoint::GetThreadID() {
 }
 
 void SBBreakpoint::SetThreadIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(void, SBBreakpoint, SetThreadIndex, (uint32_t), index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp) {
@@ -389,7 +380,7 @@ void SBBreakpoint::SetThreadIndex(uint32_t index) {
 }
 
 uint32_t SBBreakpoint::GetThreadIndex() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBreakpoint, GetThreadIndex);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t thread_idx = UINT32_MAX;
   BreakpointSP bkpt_sp = GetSP();
@@ -406,8 +397,7 @@ uint32_t SBBreakpoint::GetThreadIndex() const {
 }
 
 void SBBreakpoint::SetThreadName(const char *thread_name) {
-  LLDB_RECORD_METHOD(void, SBBreakpoint, SetThreadName, (const char *),
-                     thread_name);
+  LLDB_INSTRUMENT_VA(this, thread_name);
 
   BreakpointSP bkpt_sp = GetSP();
 
@@ -419,7 +409,7 @@ void SBBreakpoint::SetThreadName(const char *thread_name) {
 }
 
 const char *SBBreakpoint::GetThreadName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpoint, GetThreadName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *name = nullptr;
   BreakpointSP bkpt_sp = GetSP();
@@ -436,8 +426,7 @@ const char *SBBreakpoint::GetThreadName() const {
 }
 
 void SBBreakpoint::SetQueueName(const char *queue_name) {
-  LLDB_RECORD_METHOD(void, SBBreakpoint, SetQueueName, (const char *),
-                     queue_name);
+  LLDB_INSTRUMENT_VA(this, queue_name);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp) {
@@ -448,7 +437,7 @@ void SBBreakpoint::SetQueueName(const char *queue_name) {
 }
 
 const char *SBBreakpoint::GetQueueName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpoint, GetQueueName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *name = nullptr;
   BreakpointSP bkpt_sp = GetSP();
@@ -465,8 +454,7 @@ const char *SBBreakpoint::GetQueueName() const {
 }
 
 size_t SBBreakpoint::GetNumResolvedLocations() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(size_t, SBBreakpoint,
-                                   GetNumResolvedLocations);
+  LLDB_INSTRUMENT_VA(this);
 
   size_t num_resolved = 0;
   BreakpointSP bkpt_sp = GetSP();
@@ -479,7 +467,7 @@ size_t SBBreakpoint::GetNumResolvedLocations() const {
 }
 
 size_t SBBreakpoint::GetNumLocations() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(size_t, SBBreakpoint, GetNumLocations);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointSP bkpt_sp = GetSP();
   size_t num_locs = 0;
@@ -492,8 +480,7 @@ size_t SBBreakpoint::GetNumLocations() const {
 }
 
 void SBBreakpoint::SetCommandLineCommands(SBStringList &commands) {
-  LLDB_RECORD_METHOD(void, SBBreakpoint, SetCommandLineCommands,
-                     (lldb::SBStringList &), commands);
+  LLDB_INSTRUMENT_VA(this, commands);
 
   BreakpointSP bkpt_sp = GetSP();
   if (!bkpt_sp)
@@ -510,8 +497,7 @@ void SBBreakpoint::SetCommandLineCommands(SBStringList &commands) {
 }
 
 bool SBBreakpoint::GetCommandLineCommands(SBStringList &commands) {
-  LLDB_RECORD_METHOD(bool, SBBreakpoint, GetCommandLineCommands,
-                     (lldb::SBStringList &), commands);
+  LLDB_INSTRUMENT_VA(this, commands);
 
   BreakpointSP bkpt_sp = GetSP();
   if (!bkpt_sp)
@@ -525,14 +511,13 @@ bool SBBreakpoint::GetCommandLineCommands(SBStringList &commands) {
 }
 
 bool SBBreakpoint::GetDescription(SBStream &s) {
-  LLDB_RECORD_METHOD(bool, SBBreakpoint, GetDescription, (lldb::SBStream &), s);
+  LLDB_INSTRUMENT_VA(this, s);
 
   return GetDescription(s, true);
 }
 
 bool SBBreakpoint::GetDescription(SBStream &s, bool include_locations) {
-  LLDB_RECORD_METHOD(bool, SBBreakpoint, GetDescription,
-                     (lldb::SBStream &, bool), s, include_locations);
+  LLDB_INSTRUMENT_VA(this, s, include_locations);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp) {
@@ -552,8 +537,7 @@ bool SBBreakpoint::GetDescription(SBStream &s, bool include_locations) {
 }
 
 SBError SBBreakpoint::AddLocation(SBAddress &address) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBBreakpoint, AddLocation,
-                     (lldb::SBAddress &), address);
+  LLDB_INSTRUMENT_VA(this, address);
 
   BreakpointSP bkpt_sp = GetSP();
   SBError error;
@@ -586,8 +570,7 @@ SBError SBBreakpoint::AddLocation(SBAddress &address) {
 }
 
 SBStructuredData SBBreakpoint::SerializeToStructuredData() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBStructuredData, SBBreakpoint,
-                             SerializeToStructuredData);
+  LLDB_INSTRUMENT_VA(this);
 
   SBStructuredData data;
   BreakpointSP bkpt_sp = GetSP();
@@ -601,8 +584,7 @@ SBStructuredData SBBreakpoint::SerializeToStructuredData() {
 }
 
 void SBBreakpoint::SetCallback(SBBreakpointHitCallback callback, void *baton) {
-  LLDB_RECORD_METHOD(void, SBBreakpoint, SetCallback,
-                     (lldb::SBBreakpointHitCallback, void *), callback, baton);
+  LLDB_INSTRUMENT_VA(this, callback, baton);
 
   BreakpointSP bkpt_sp = GetSP();
 
@@ -618,8 +600,7 @@ void SBBreakpoint::SetCallback(SBBreakpointHitCallback callback, void *baton) {
 
 void SBBreakpoint::SetScriptCallbackFunction(
   const char *callback_function_name) {
-LLDB_RECORD_METHOD(void, SBBreakpoint, SetScriptCallbackFunction,
-                   (const char *), callback_function_name);
+  LLDB_INSTRUMENT_VA(this, callback_function_name);
   SBStructuredData empty_args;
   SetScriptCallbackFunction(callback_function_name, empty_args);
 }
@@ -627,8 +608,7 @@ LLDB_RECORD_METHOD(void, SBBreakpoint, SetScriptCallbackFunction,
 SBError SBBreakpoint::SetScriptCallbackFunction(
     const char *callback_function_name,
     SBStructuredData &extra_args) {
-  LLDB_RECORD_METHOD(SBError, SBBreakpoint, SetScriptCallbackFunction,
-  (const char *, SBStructuredData &), callback_function_name, extra_args);
+  LLDB_INSTRUMENT_VA(this, callback_function_name, extra_args);
   SBError sb_error;
   BreakpointSP bkpt_sp = GetSP();
 
@@ -652,8 +632,7 @@ SBError SBBreakpoint::SetScriptCallbackFunction(
 }
 
 SBError SBBreakpoint::SetScriptCallbackBody(const char *callback_body_text) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBBreakpoint, SetScriptCallbackBody,
-                     (const char *), callback_body_text);
+  LLDB_INSTRUMENT_VA(this, callback_body_text);
 
   BreakpointSP bkpt_sp = GetSP();
 
@@ -675,15 +654,14 @@ SBError SBBreakpoint::SetScriptCallbackBody(const char *callback_body_text) {
 }
 
 bool SBBreakpoint::AddName(const char *new_name) {
-  LLDB_RECORD_METHOD(bool, SBBreakpoint, AddName, (const char *), new_name);
+  LLDB_INSTRUMENT_VA(this, new_name);
 
   SBError status = AddNameWithErrorHandling(new_name);
   return status.Success();
 }
 
 SBError SBBreakpoint::AddNameWithErrorHandling(const char *new_name) {
-  LLDB_RECORD_METHOD(SBError, SBBreakpoint, AddNameWithErrorHandling,
-                     (const char *), new_name);
+  LLDB_INSTRUMENT_VA(this, new_name);
 
   BreakpointSP bkpt_sp = GetSP();
 
@@ -702,8 +680,7 @@ SBError SBBreakpoint::AddNameWithErrorHandling(const char *new_name) {
 }
 
 void SBBreakpoint::RemoveName(const char *name_to_remove) {
-  LLDB_RECORD_METHOD(void, SBBreakpoint, RemoveName, (const char *),
-                     name_to_remove);
+  LLDB_INSTRUMENT_VA(this, name_to_remove);
 
   BreakpointSP bkpt_sp = GetSP();
 
@@ -716,7 +693,7 @@ void SBBreakpoint::RemoveName(const char *name_to_remove) {
 }
 
 bool SBBreakpoint::MatchesName(const char *name) {
-  LLDB_RECORD_METHOD(bool, SBBreakpoint, MatchesName, (const char *), name);
+  LLDB_INSTRUMENT_VA(this, name);
 
   BreakpointSP bkpt_sp = GetSP();
 
@@ -730,8 +707,7 @@ bool SBBreakpoint::MatchesName(const char *name) {
 }
 
 void SBBreakpoint::GetNames(SBStringList &names) {
-  LLDB_RECORD_METHOD(void, SBBreakpoint, GetNames, (lldb::SBStringList &),
-                     names);
+  LLDB_INSTRUMENT_VA(this, names);
 
   BreakpointSP bkpt_sp = GetSP();
 
@@ -747,8 +723,7 @@ void SBBreakpoint::GetNames(SBStringList &names) {
 }
 
 bool SBBreakpoint::EventIsBreakpointEvent(const lldb::SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBBreakpoint, EventIsBreakpointEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   return Breakpoint::BreakpointEventData::GetEventDataFromEvent(event.get()) !=
          nullptr;
@@ -756,9 +731,7 @@ bool SBBreakpoint::EventIsBreakpointEvent(const lldb::SBEvent &event) {
 
 BreakpointEventType
 SBBreakpoint::GetBreakpointEventTypeFromEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(lldb::BreakpointEventType, SBBreakpoint,
-                            GetBreakpointEventTypeFromEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   if (event.IsValid())
     return Breakpoint::BreakpointEventData::GetBreakpointEventTypeFromEvent(
@@ -767,9 +740,7 @@ SBBreakpoint::GetBreakpointEventTypeFromEvent(const SBEvent &event) {
 }
 
 SBBreakpoint SBBreakpoint::GetBreakpointFromEvent(const lldb::SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBBreakpoint, SBBreakpoint,
-                            GetBreakpointFromEvent, (const lldb::SBEvent &),
-                            event);
+  LLDB_INSTRUMENT_VA(event);
 
   if (event.IsValid())
     return SBBreakpoint(
@@ -780,9 +751,7 @@ SBBreakpoint SBBreakpoint::GetBreakpointFromEvent(const lldb::SBEvent &event) {
 SBBreakpointLocation
 SBBreakpoint::GetBreakpointLocationAtIndexFromEvent(const lldb::SBEvent &event,
                                                     uint32_t loc_idx) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBBreakpointLocation, SBBreakpoint,
-                            GetBreakpointLocationAtIndexFromEvent,
-                            (const lldb::SBEvent &, uint32_t), event, loc_idx);
+  LLDB_INSTRUMENT_VA(event, loc_idx);
 
   SBBreakpointLocation sb_breakpoint_loc;
   if (event.IsValid())
@@ -794,9 +763,7 @@ SBBreakpoint::GetBreakpointLocationAtIndexFromEvent(const lldb::SBEvent &event,
 
 uint32_t
 SBBreakpoint::GetNumBreakpointLocationsFromEvent(const lldb::SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(uint32_t, SBBreakpoint,
-                            GetNumBreakpointLocationsFromEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   uint32_t num_locations = 0;
   if (event.IsValid())
@@ -807,7 +774,7 @@ SBBreakpoint::GetNumBreakpointLocationsFromEvent(const lldb::SBEvent &event) {
 }
 
 bool SBBreakpoint::IsHardware() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpoint, IsHardware);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointSP bkpt_sp = GetSP();
   if (bkpt_sp)
@@ -903,13 +870,13 @@ class SBBreakpointListImpl {
 
 SBBreakpointList::SBBreakpointList(SBTarget &target)
     : m_opaque_sp(new SBBreakpointListImpl(target.GetSP())) {
-  LLDB_RECORD_CONSTRUCTOR(SBBreakpointList, (lldb::SBTarget &), target);
+  LLDB_INSTRUMENT_VA(this, target);
 }
 
 SBBreakpointList::~SBBreakpointList() = default;
 
 size_t SBBreakpointList::GetSize() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(size_t, SBBreakpointList, GetSize);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!m_opaque_sp)
     return 0;
@@ -918,8 +885,7 @@ size_t SBBreakpointList::GetSize() const {
 }
 
 SBBreakpoint SBBreakpointList::GetBreakpointAtIndex(size_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBBreakpointList, GetBreakpointAtIndex,
-                     (size_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   if (!m_opaque_sp)
     return SBBreakpoint();
@@ -929,8 +895,7 @@ SBBreakpoint SBBreakpointList::GetBreakpointAtIndex(size_t idx) {
 }
 
 SBBreakpoint SBBreakpointList::FindBreakpointByID(lldb::break_id_t id) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBBreakpointList, FindBreakpointByID,
-                     (lldb::break_id_t), id);
+  LLDB_INSTRUMENT_VA(this, id);
 
   if (!m_opaque_sp)
     return SBBreakpoint();
@@ -939,8 +904,7 @@ SBBreakpoint SBBreakpointList::FindBreakpointByID(lldb::break_id_t id) {
 }
 
 void SBBreakpointList::Append(const SBBreakpoint &sb_bkpt) {
-  LLDB_RECORD_METHOD(void, SBBreakpointList, Append,
-                     (const lldb::SBBreakpoint &), sb_bkpt);
+  LLDB_INSTRUMENT_VA(this, sb_bkpt);
 
   if (!sb_bkpt.IsValid())
     return;
@@ -950,8 +914,7 @@ void SBBreakpointList::Append(const SBBreakpoint &sb_bkpt) {
 }
 
 void SBBreakpointList::AppendByID(lldb::break_id_t id) {
-  LLDB_RECORD_METHOD(void, SBBreakpointList, AppendByID, (lldb::break_id_t),
-                     id);
+  LLDB_INSTRUMENT_VA(this, id);
 
   if (!m_opaque_sp)
     return;
@@ -959,8 +922,7 @@ void SBBreakpointList::AppendByID(lldb::break_id_t id) {
 }
 
 bool SBBreakpointList::AppendIfUnique(const SBBreakpoint &sb_bkpt) {
-  LLDB_RECORD_METHOD(bool, SBBreakpointList, AppendIfUnique,
-                     (const lldb::SBBreakpoint &), sb_bkpt);
+  LLDB_INSTRUMENT_VA(this, sb_bkpt);
 
   if (!sb_bkpt.IsValid())
     return false;
@@ -970,7 +932,7 @@ bool SBBreakpointList::AppendIfUnique(const SBBreakpoint &sb_bkpt) {
 }
 
 void SBBreakpointList::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBBreakpointList, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp)
     m_opaque_sp->Clear();
diff --git a/lldb/source/API/SBBreakpointLocation.cpp b/lldb/source/API/SBBreakpointLocation.cpp
index c2e0001bd6c5a..9143174377236 100644
--- a/lldb/source/API/SBBreakpointLocation.cpp
+++ b/lldb/source/API/SBBreakpointLocation.cpp
@@ -7,13 +7,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBBreakpointLocation.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBAddress.h"
 #include "lldb/API/SBDebugger.h"
 #include "lldb/API/SBDefines.h"
 #include "lldb/API/SBStream.h"
-#include "lldb/API/SBStructuredData.h"
 #include "lldb/API/SBStringList.h"
+#include "lldb/API/SBStructuredData.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/Breakpoint/Breakpoint.h"
 #include "lldb/Breakpoint/BreakpointLocation.h"
@@ -31,29 +31,22 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBBreakpointLocation::SBBreakpointLocation() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBBreakpointLocation);
-}
+SBBreakpointLocation::SBBreakpointLocation() { LLDB_INSTRUMENT_VA(this); }
 
 SBBreakpointLocation::SBBreakpointLocation(
     const lldb::BreakpointLocationSP &break_loc_sp)
     : m_opaque_wp(break_loc_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBBreakpointLocation,
-                          (const lldb::BreakpointLocationSP &), break_loc_sp);
+  LLDB_INSTRUMENT_VA(this, break_loc_sp);
 }
 
 SBBreakpointLocation::SBBreakpointLocation(const SBBreakpointLocation &rhs)
     : m_opaque_wp(rhs.m_opaque_wp) {
-  LLDB_RECORD_CONSTRUCTOR(SBBreakpointLocation,
-                          (const lldb::SBBreakpointLocation &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 const SBBreakpointLocation &SBBreakpointLocation::
 operator=(const SBBreakpointLocation &rhs) {
-  LLDB_RECORD_METHOD(
-      const lldb::SBBreakpointLocation &,
-      SBBreakpointLocation, operator=,(const lldb::SBBreakpointLocation &),
-      rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_wp = rhs.m_opaque_wp;
   return *this;
@@ -66,17 +59,17 @@ BreakpointLocationSP SBBreakpointLocation::GetSP() const {
 }
 
 bool SBBreakpointLocation::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpointLocation, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBBreakpointLocation::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpointLocation, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return bool(GetSP());
 }
 
 SBAddress SBBreakpointLocation::GetAddress() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBBreakpointLocation, GetAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -87,8 +80,7 @@ SBAddress SBBreakpointLocation::GetAddress() {
 }
 
 addr_t SBBreakpointLocation::GetLoadAddress() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBBreakpointLocation,
-                             GetLoadAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   addr_t ret_addr = LLDB_INVALID_ADDRESS;
   BreakpointLocationSP loc_sp = GetSP();
@@ -103,7 +95,7 @@ addr_t SBBreakpointLocation::GetLoadAddress() {
 }
 
 void SBBreakpointLocation::SetEnabled(bool enabled) {
-  LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetEnabled, (bool), enabled);
+  LLDB_INSTRUMENT_VA(this, enabled);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -114,7 +106,7 @@ void SBBreakpointLocation::SetEnabled(bool enabled) {
 }
 
 bool SBBreakpointLocation::IsEnabled() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointLocation, IsEnabled);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -126,7 +118,7 @@ bool SBBreakpointLocation::IsEnabled() {
 }
 
 uint32_t SBBreakpointLocation::GetHitCount() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBBreakpointLocation, GetHitCount);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -138,7 +130,7 @@ uint32_t SBBreakpointLocation::GetHitCount() {
 }
 
 uint32_t SBBreakpointLocation::GetIgnoreCount() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBBreakpointLocation, GetIgnoreCount);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -150,7 +142,7 @@ uint32_t SBBreakpointLocation::GetIgnoreCount() {
 }
 
 void SBBreakpointLocation::SetIgnoreCount(uint32_t n) {
-  LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetIgnoreCount, (uint32_t), n);
+  LLDB_INSTRUMENT_VA(this, n);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -161,8 +153,7 @@ void SBBreakpointLocation::SetIgnoreCount(uint32_t n) {
 }
 
 void SBBreakpointLocation::SetCondition(const char *condition) {
-  LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetCondition, (const char *),
-                     condition);
+  LLDB_INSTRUMENT_VA(this, condition);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -173,7 +164,7 @@ void SBBreakpointLocation::SetCondition(const char *condition) {
 }
 
 const char *SBBreakpointLocation::GetCondition() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBBreakpointLocation, GetCondition);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -185,8 +176,7 @@ const char *SBBreakpointLocation::GetCondition() {
 }
 
 void SBBreakpointLocation::SetAutoContinue(bool auto_continue) {
-  LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetAutoContinue, (bool),
-                     auto_continue);
+  LLDB_INSTRUMENT_VA(this, auto_continue);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -197,7 +187,7 @@ void SBBreakpointLocation::SetAutoContinue(bool auto_continue) {
 }
 
 bool SBBreakpointLocation::GetAutoContinue() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointLocation, GetAutoContinue);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -210,16 +200,13 @@ bool SBBreakpointLocation::GetAutoContinue() {
 
 void SBBreakpointLocation::SetScriptCallbackFunction(
   const char *callback_function_name) {
-LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetScriptCallbackFunction,
-                   (const char *), callback_function_name);
+  LLDB_INSTRUMENT_VA(this, callback_function_name);
 }
 
 SBError SBBreakpointLocation::SetScriptCallbackFunction(
     const char *callback_function_name,
     SBStructuredData &extra_args) {
-  LLDB_RECORD_METHOD(SBError, SBBreakpointLocation, SetScriptCallbackFunction,
-                     (const char *, SBStructuredData &), callback_function_name,
-                     extra_args);
+  LLDB_INSTRUMENT_VA(this, callback_function_name, extra_args);
   SBError sb_error;
   BreakpointLocationSP loc_sp = GetSP();
 
@@ -245,8 +232,7 @@ SBError SBBreakpointLocation::SetScriptCallbackFunction(
 
 SBError
 SBBreakpointLocation::SetScriptCallbackBody(const char *callback_body_text) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBBreakpointLocation, SetScriptCallbackBody,
-                     (const char *), callback_body_text);
+  LLDB_INSTRUMENT_VA(this, callback_body_text);
 
   BreakpointLocationSP loc_sp = GetSP();
 
@@ -269,8 +255,7 @@ SBBreakpointLocation::SetScriptCallbackBody(const char *callback_body_text) {
 }
 
 void SBBreakpointLocation::SetCommandLineCommands(SBStringList &commands) {
-  LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetCommandLineCommands,
-                     (lldb::SBStringList &), commands);
+  LLDB_INSTRUMENT_VA(this, commands);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (!loc_sp)
@@ -287,8 +272,7 @@ void SBBreakpointLocation::SetCommandLineCommands(SBStringList &commands) {
 }
 
 bool SBBreakpointLocation::GetCommandLineCommands(SBStringList &commands) {
-  LLDB_RECORD_METHOD(bool, SBBreakpointLocation, GetCommandLineCommands,
-                     (lldb::SBStringList &), commands);
+  LLDB_INSTRUMENT_VA(this, commands);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (!loc_sp)
@@ -302,8 +286,7 @@ bool SBBreakpointLocation::GetCommandLineCommands(SBStringList &commands) {
 }
 
 void SBBreakpointLocation::SetThreadID(tid_t thread_id) {
-  LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetThreadID, (lldb::tid_t),
-                     thread_id);
+  LLDB_INSTRUMENT_VA(this, thread_id);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -314,7 +297,7 @@ void SBBreakpointLocation::SetThreadID(tid_t thread_id) {
 }
 
 tid_t SBBreakpointLocation::GetThreadID() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::tid_t, SBBreakpointLocation, GetThreadID);
+  LLDB_INSTRUMENT_VA(this);
 
   tid_t tid = LLDB_INVALID_THREAD_ID;
   BreakpointLocationSP loc_sp = GetSP();
@@ -327,8 +310,7 @@ tid_t SBBreakpointLocation::GetThreadID() {
 }
 
 void SBBreakpointLocation::SetThreadIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetThreadIndex, (uint32_t),
-                     index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -339,8 +321,7 @@ void SBBreakpointLocation::SetThreadIndex(uint32_t index) {
 }
 
 uint32_t SBBreakpointLocation::GetThreadIndex() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBreakpointLocation,
-                                   GetThreadIndex);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t thread_idx = UINT32_MAX;
   BreakpointLocationSP loc_sp = GetSP();
@@ -353,8 +334,7 @@ uint32_t SBBreakpointLocation::GetThreadIndex() const {
 }
 
 void SBBreakpointLocation::SetThreadName(const char *thread_name) {
-  LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetThreadName, (const char *),
-                     thread_name);
+  LLDB_INSTRUMENT_VA(this, thread_name);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -365,8 +345,7 @@ void SBBreakpointLocation::SetThreadName(const char *thread_name) {
 }
 
 const char *SBBreakpointLocation::GetThreadName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpointLocation,
-                                   GetThreadName);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -378,8 +357,7 @@ const char *SBBreakpointLocation::GetThreadName() const {
 }
 
 void SBBreakpointLocation::SetQueueName(const char *queue_name) {
-  LLDB_RECORD_METHOD(void, SBBreakpointLocation, SetQueueName, (const char *),
-                     queue_name);
+  LLDB_INSTRUMENT_VA(this, queue_name);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -390,8 +368,7 @@ void SBBreakpointLocation::SetQueueName(const char *queue_name) {
 }
 
 const char *SBBreakpointLocation::GetQueueName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpointLocation,
-                                   GetQueueName);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -403,7 +380,7 @@ const char *SBBreakpointLocation::GetQueueName() const {
 }
 
 bool SBBreakpointLocation::IsResolved() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointLocation, IsResolved);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -422,9 +399,7 @@ void SBBreakpointLocation::SetLocation(
 
 bool SBBreakpointLocation::GetDescription(SBStream &description,
                                           DescriptionLevel level) {
-  LLDB_RECORD_METHOD(bool, SBBreakpointLocation, GetDescription,
-                     (lldb::SBStream &, lldb::DescriptionLevel), description,
-                     level);
+  LLDB_INSTRUMENT_VA(this, description, level);
 
   Stream &strm = description.ref();
   BreakpointLocationSP loc_sp = GetSP();
@@ -441,7 +416,7 @@ bool SBBreakpointLocation::GetDescription(SBStream &description,
 }
 
 break_id_t SBBreakpointLocation::GetID() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::break_id_t, SBBreakpointLocation, GetID);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointLocationSP loc_sp = GetSP();
   if (loc_sp) {
@@ -453,8 +428,7 @@ break_id_t SBBreakpointLocation::GetID() {
 }
 
 SBBreakpoint SBBreakpointLocation::GetBreakpoint() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBreakpoint, SBBreakpointLocation,
-                             GetBreakpoint);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointLocationSP loc_sp = GetSP();
 
diff --git a/lldb/source/API/SBBreakpointName.cpp b/lldb/source/API/SBBreakpointName.cpp
index 2937cb9e03d8f..796229d04ce49 100644
--- a/lldb/source/API/SBBreakpointName.cpp
+++ b/lldb/source/API/SBBreakpointName.cpp
@@ -7,13 +7,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBBreakpointName.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBDebugger.h"
 #include "lldb/API/SBError.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/API/SBStringList.h"
 #include "lldb/API/SBStructuredData.h"
 #include "lldb/API/SBTarget.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/Breakpoint/BreakpointName.h"
 #include "lldb/Breakpoint/StoppointCallbackContext.h"
@@ -107,13 +107,10 @@ lldb_private::BreakpointName *SBBreakpointNameImpl::GetBreakpointName() const {
 
 } // namespace lldb
 
-SBBreakpointName::SBBreakpointName() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBBreakpointName);
-}
+SBBreakpointName::SBBreakpointName() { LLDB_INSTRUMENT_VA(this); }
 
 SBBreakpointName::SBBreakpointName(SBTarget &sb_target, const char *name) {
-  LLDB_RECORD_CONSTRUCTOR(SBBreakpointName, (lldb::SBTarget &, const char *),
-                          sb_target, name);
+  LLDB_INSTRUMENT_VA(this, sb_target, name);
 
   m_impl_up = std::make_unique<SBBreakpointNameImpl>(sb_target, name);
   // Call FindBreakpointName here to make sure the name is valid, reset if not:
@@ -123,8 +120,7 @@ SBBreakpointName::SBBreakpointName(SBTarget &sb_target, const char *name) {
 }
 
 SBBreakpointName::SBBreakpointName(SBBreakpoint &sb_bkpt, const char *name) {
-  LLDB_RECORD_CONSTRUCTOR(SBBreakpointName,
-                          (lldb::SBBreakpoint &, const char *), sb_bkpt, name);
+  LLDB_INSTRUMENT_VA(this, sb_bkpt, name);
 
   if (!sb_bkpt.IsValid()) {
     m_impl_up.reset();
@@ -149,8 +145,7 @@ SBBreakpointName::SBBreakpointName(SBBreakpoint &sb_bkpt, const char *name) {
 }
 
 SBBreakpointName::SBBreakpointName(const SBBreakpointName &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBBreakpointName, (const lldb::SBBreakpointName &),
-                          rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!rhs.m_impl_up)
     return;
@@ -163,9 +158,7 @@ SBBreakpointName::~SBBreakpointName() = default;
 
 const SBBreakpointName &SBBreakpointName::
 operator=(const SBBreakpointName &rhs) {
-  LLDB_RECORD_METHOD(
-      const lldb::SBBreakpointName &,
-      SBBreakpointName, operator=,(const lldb::SBBreakpointName &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!rhs.m_impl_up) {
     m_impl_up.reset();
@@ -178,25 +171,23 @@ operator=(const SBBreakpointName &rhs) {
 }
 
 bool SBBreakpointName::operator==(const lldb::SBBreakpointName &rhs) {
-  LLDB_RECORD_METHOD(
-      bool, SBBreakpointName, operator==,(const lldb::SBBreakpointName &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return *m_impl_up == *rhs.m_impl_up;
 }
 
 bool SBBreakpointName::operator!=(const lldb::SBBreakpointName &rhs) {
-  LLDB_RECORD_METHOD(
-      bool, SBBreakpointName, operator!=,(const lldb::SBBreakpointName &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return *m_impl_up != *rhs.m_impl_up;
 }
 
 bool SBBreakpointName::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpointName, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBBreakpointName::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpointName, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!m_impl_up)
     return false;
@@ -204,7 +195,7 @@ SBBreakpointName::operator bool() const {
 }
 
 const char *SBBreakpointName::GetName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpointName, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!m_impl_up)
     return "<Invalid Breakpoint Name Object>";
@@ -212,7 +203,7 @@ const char *SBBreakpointName::GetName() const {
 }
 
 void SBBreakpointName::SetEnabled(bool enable) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetEnabled, (bool), enable);
+  LLDB_INSTRUMENT_VA(this, enable);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -236,7 +227,7 @@ void SBBreakpointName::UpdateName(BreakpointName &bp_name) {
 }
 
 bool SBBreakpointName::IsEnabled() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointName, IsEnabled);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -249,7 +240,7 @@ bool SBBreakpointName::IsEnabled() {
 }
 
 void SBBreakpointName::SetOneShot(bool one_shot) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetOneShot, (bool), one_shot);
+  LLDB_INSTRUMENT_VA(this, one_shot);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -263,7 +254,7 @@ void SBBreakpointName::SetOneShot(bool one_shot) {
 }
 
 bool SBBreakpointName::IsOneShot() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpointName, IsOneShot);
+  LLDB_INSTRUMENT_VA(this);
 
   const BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -276,7 +267,7 @@ bool SBBreakpointName::IsOneShot() const {
 }
 
 void SBBreakpointName::SetIgnoreCount(uint32_t count) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetIgnoreCount, (uint32_t), count);
+  LLDB_INSTRUMENT_VA(this, count);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -290,7 +281,7 @@ void SBBreakpointName::SetIgnoreCount(uint32_t count) {
 }
 
 uint32_t SBBreakpointName::GetIgnoreCount() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBreakpointName, GetIgnoreCount);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -303,8 +294,7 @@ uint32_t SBBreakpointName::GetIgnoreCount() const {
 }
 
 void SBBreakpointName::SetCondition(const char *condition) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetCondition, (const char *),
-                     condition);
+  LLDB_INSTRUMENT_VA(this, condition);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -318,7 +308,7 @@ void SBBreakpointName::SetCondition(const char *condition) {
 }
 
 const char *SBBreakpointName::GetCondition() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBBreakpointName, GetCondition);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -331,8 +321,7 @@ const char *SBBreakpointName::GetCondition() {
 }
 
 void SBBreakpointName::SetAutoContinue(bool auto_continue) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetAutoContinue, (bool),
-                     auto_continue);
+  LLDB_INSTRUMENT_VA(this, auto_continue);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -346,7 +335,7 @@ void SBBreakpointName::SetAutoContinue(bool auto_continue) {
 }
 
 bool SBBreakpointName::GetAutoContinue() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointName, GetAutoContinue);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -359,7 +348,7 @@ bool SBBreakpointName::GetAutoContinue() {
 }
 
 void SBBreakpointName::SetThreadID(tid_t tid) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetThreadID, (lldb::tid_t), tid);
+  LLDB_INSTRUMENT_VA(this, tid);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -373,7 +362,7 @@ void SBBreakpointName::SetThreadID(tid_t tid) {
 }
 
 tid_t SBBreakpointName::GetThreadID() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::tid_t, SBBreakpointName, GetThreadID);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -386,7 +375,7 @@ tid_t SBBreakpointName::GetThreadID() {
 }
 
 void SBBreakpointName::SetThreadIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetThreadIndex, (uint32_t), index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -400,7 +389,7 @@ void SBBreakpointName::SetThreadIndex(uint32_t index) {
 }
 
 uint32_t SBBreakpointName::GetThreadIndex() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBBreakpointName, GetThreadIndex);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -413,8 +402,7 @@ uint32_t SBBreakpointName::GetThreadIndex() const {
 }
 
 void SBBreakpointName::SetThreadName(const char *thread_name) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetThreadName, (const char *),
-                     thread_name);
+  LLDB_INSTRUMENT_VA(this, thread_name);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -428,8 +416,7 @@ void SBBreakpointName::SetThreadName(const char *thread_name) {
 }
 
 const char *SBBreakpointName::GetThreadName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpointName,
-                                   GetThreadName);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -442,8 +429,7 @@ const char *SBBreakpointName::GetThreadName() const {
 }
 
 void SBBreakpointName::SetQueueName(const char *queue_name) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetQueueName, (const char *),
-                     queue_name);
+  LLDB_INSTRUMENT_VA(this, queue_name);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -457,8 +443,7 @@ void SBBreakpointName::SetQueueName(const char *queue_name) {
 }
 
 const char *SBBreakpointName::GetQueueName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpointName,
-                                   GetQueueName);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -471,8 +456,7 @@ const char *SBBreakpointName::GetQueueName() const {
 }
 
 void SBBreakpointName::SetCommandLineCommands(SBStringList &commands) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetCommandLineCommands,
-                     (lldb::SBStringList &), commands);
+  LLDB_INSTRUMENT_VA(this, commands);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -491,8 +475,7 @@ void SBBreakpointName::SetCommandLineCommands(SBStringList &commands) {
 }
 
 bool SBBreakpointName::GetCommandLineCommands(SBStringList &commands) {
-  LLDB_RECORD_METHOD(bool, SBBreakpointName, GetCommandLineCommands,
-                     (lldb::SBStringList &), commands);
+  LLDB_INSTRUMENT_VA(this, commands);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -507,8 +490,7 @@ bool SBBreakpointName::GetCommandLineCommands(SBStringList &commands) {
 }
 
 const char *SBBreakpointName::GetHelpString() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBreakpointName,
-                                   GetHelpString);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -518,8 +500,7 @@ const char *SBBreakpointName::GetHelpString() const {
 }
 
 void SBBreakpointName::SetHelpString(const char *help_string) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetHelpString, (const char *),
-                     help_string);
+  LLDB_INSTRUMENT_VA(this, help_string);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -532,8 +513,7 @@ void SBBreakpointName::SetHelpString(const char *help_string) {
 }
 
 bool SBBreakpointName::GetDescription(SBStream &s) {
-  LLDB_RECORD_METHOD(bool, SBBreakpointName, GetDescription, (lldb::SBStream &),
-                     s);
+  LLDB_INSTRUMENT_VA(this, s);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -550,8 +530,7 @@ bool SBBreakpointName::GetDescription(SBStream &s) {
 
 void SBBreakpointName::SetCallback(SBBreakpointHitCallback callback,
                                    void *baton) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetCallback,
-                     (lldb::SBBreakpointHitCallback, void *), callback, baton);
+  LLDB_INSTRUMENT_VA(this, callback, baton);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -569,8 +548,7 @@ void SBBreakpointName::SetCallback(SBBreakpointHitCallback callback,
 
 void SBBreakpointName::SetScriptCallbackFunction(
   const char *callback_function_name) {
-LLDB_RECORD_METHOD(void, SBBreakpointName, SetScriptCallbackFunction,
-                   (const char *), callback_function_name);
+  LLDB_INSTRUMENT_VA(this, callback_function_name);
   SBStructuredData empty_args;
   SetScriptCallbackFunction(callback_function_name, empty_args);
 }
@@ -578,9 +556,7 @@ LLDB_RECORD_METHOD(void, SBBreakpointName, SetScriptCallbackFunction,
 SBError SBBreakpointName::SetScriptCallbackFunction(
     const char *callback_function_name, 
     SBStructuredData &extra_args) {
-  LLDB_RECORD_METHOD(SBError, SBBreakpointName, SetScriptCallbackFunction,
-                     (const char *, SBStructuredData &), 
-                     callback_function_name, extra_args);
+  LLDB_INSTRUMENT_VA(this, callback_function_name, extra_args);
   SBError sb_error;
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name) {
@@ -606,8 +582,7 @@ SBError SBBreakpointName::SetScriptCallbackFunction(
 
 SBError
 SBBreakpointName::SetScriptCallbackBody(const char *callback_body_text) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBBreakpointName, SetScriptCallbackBody,
-                     (const char *), callback_body_text);
+  LLDB_INSTRUMENT_VA(this, callback_body_text);
 
   SBError sb_error;
   BreakpointName *bp_name = GetBreakpointName();
@@ -631,7 +606,7 @@ SBBreakpointName::SetScriptCallbackBody(const char *callback_body_text) {
 }
 
 bool SBBreakpointName::GetAllowList() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBreakpointName, GetAllowList);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -640,8 +615,7 @@ bool SBBreakpointName::GetAllowList() const {
 }
 
 void SBBreakpointName::SetAllowList(bool value) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetAllowList, (bool), value);
-
+  LLDB_INSTRUMENT_VA(this, value);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -650,7 +624,7 @@ void SBBreakpointName::SetAllowList(bool value) {
 }
 
 bool SBBreakpointName::GetAllowDelete() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointName, GetAllowDelete);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -659,8 +633,7 @@ bool SBBreakpointName::GetAllowDelete() {
 }
 
 void SBBreakpointName::SetAllowDelete(bool value) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetAllowDelete, (bool), value);
-
+  LLDB_INSTRUMENT_VA(this, value);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -669,7 +642,7 @@ void SBBreakpointName::SetAllowDelete(bool value) {
 }
 
 bool SBBreakpointName::GetAllowDisable() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBBreakpointName, GetAllowDisable);
+  LLDB_INSTRUMENT_VA(this);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
@@ -678,7 +651,7 @@ bool SBBreakpointName::GetAllowDisable() {
 }
 
 void SBBreakpointName::SetAllowDisable(bool value) {
-  LLDB_RECORD_METHOD(void, SBBreakpointName, SetAllowDisable, (bool), value);
+  LLDB_INSTRUMENT_VA(this, value);
 
   BreakpointName *bp_name = GetBreakpointName();
   if (!bp_name)
diff --git a/lldb/source/API/SBBreakpointOptionCommon.cpp b/lldb/source/API/SBBreakpointOptionCommon.cpp
index 2ee47ff7795c6..685ed172c820a 100644
--- a/lldb/source/API/SBBreakpointOptionCommon.cpp
+++ b/lldb/source/API/SBBreakpointOptionCommon.cpp
@@ -26,6 +26,7 @@
 #include "lldb/Target/Target.h"
 #include "lldb/Target/Thread.h"
 #include "lldb/Target/ThreadSpec.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Log.h"
 #include "lldb/Utility/Stream.h"
 
@@ -38,19 +39,18 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBBreakpointCallbackBaton::SBBreakpointCallbackBaton(SBBreakpointHitCallback 
-                                                         callback,
-                                                     void *baton)
-      : TypedBaton(std::make_unique<CallbackData>()) {
-    getItem()->callback = callback;
-    getItem()->callback_baton = baton;
-  }
+SBBreakpointCallbackBaton::SBBreakpointCallbackBaton(
+    SBBreakpointHitCallback callback, void *baton)
+    : TypedBaton(std::make_unique<CallbackData>()) {
+  LLDB_INSTRUMENT_VA(this, callback, baton);
+  getItem()->callback = callback;
+  getItem()->callback_baton = baton;
+}
 
- bool SBBreakpointCallbackBaton::PrivateBreakpointHitCallback(void *baton,
-                                                  StoppointCallbackContext *ctx,
-                                                  lldb::user_id_t break_id,
-                                                  lldb::user_id_t break_loc_id)
-{
+bool SBBreakpointCallbackBaton::PrivateBreakpointHitCallback(
+    void *baton, StoppointCallbackContext *ctx, lldb::user_id_t break_id,
+    lldb::user_id_t break_loc_id) {
+  LLDB_INSTRUMENT_VA(baton, ctx, break_id, break_loc_id);
   ExecutionContext exe_ctx(ctx->exe_ctx_ref);
   BreakpointSP bp_sp(
       exe_ctx.GetTargetRef().GetBreakpointList().FindBreakpointByID(break_id));
diff --git a/lldb/source/API/SBBroadcaster.cpp b/lldb/source/API/SBBroadcaster.cpp
index 4fa11cab5d9ff..f145bc6e99d8e 100644
--- a/lldb/source/API/SBBroadcaster.cpp
+++ b/lldb/source/API/SBBroadcaster.cpp
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/Utility/Broadcaster.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBBroadcaster.h"
 #include "lldb/API/SBEvent.h"
@@ -16,13 +16,11 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBBroadcaster::SBBroadcaster() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBBroadcaster);
-}
+SBBroadcaster::SBBroadcaster() { LLDB_INSTRUMENT_VA(this); }
 
 SBBroadcaster::SBBroadcaster(const char *name)
     : m_opaque_sp(new Broadcaster(nullptr, name)), m_opaque_ptr(nullptr) {
-  LLDB_RECORD_CONSTRUCTOR(SBBroadcaster, (const char *), name);
+  LLDB_INSTRUMENT_VA(this, name);
 
   m_opaque_ptr = m_opaque_sp.get();
 }
@@ -32,13 +30,11 @@ SBBroadcaster::SBBroadcaster(lldb_private::Broadcaster *broadcaster, bool owns)
 
 SBBroadcaster::SBBroadcaster(const SBBroadcaster &rhs)
     : m_opaque_sp(rhs.m_opaque_sp), m_opaque_ptr(rhs.m_opaque_ptr) {
-  LLDB_RECORD_CONSTRUCTOR(SBBroadcaster, (const lldb::SBBroadcaster &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 const SBBroadcaster &SBBroadcaster::operator=(const SBBroadcaster &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBBroadcaster &,
-                     SBBroadcaster, operator=,(const lldb::SBBroadcaster &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_opaque_sp = rhs.m_opaque_sp;
@@ -50,8 +46,7 @@ const SBBroadcaster &SBBroadcaster::operator=(const SBBroadcaster &rhs) {
 SBBroadcaster::~SBBroadcaster() { reset(nullptr, false); }
 
 void SBBroadcaster::BroadcastEventByType(uint32_t event_type, bool unique) {
-  LLDB_RECORD_METHOD(void, SBBroadcaster, BroadcastEventByType,
-                     (uint32_t, bool), event_type, unique);
+  LLDB_INSTRUMENT_VA(this, event_type, unique);
 
   if (m_opaque_ptr == nullptr)
     return;
@@ -63,8 +58,7 @@ void SBBroadcaster::BroadcastEventByType(uint32_t event_type, bool unique) {
 }
 
 void SBBroadcaster::BroadcastEvent(const SBEvent &event, bool unique) {
-  LLDB_RECORD_METHOD(void, SBBroadcaster, BroadcastEvent,
-                     (const lldb::SBEvent &, bool), event, unique);
+  LLDB_INSTRUMENT_VA(this, event, unique);
 
   if (m_opaque_ptr == nullptr)
     return;
@@ -78,9 +72,7 @@ void SBBroadcaster::BroadcastEvent(const SBEvent &event, bool unique) {
 
 void SBBroadcaster::AddInitialEventsToListener(const SBListener &listener,
                                                uint32_t requested_events) {
-  LLDB_RECORD_METHOD(void, SBBroadcaster, AddInitialEventsToListener,
-                     (const lldb::SBListener &, uint32_t), listener,
-                     requested_events);
+  LLDB_INSTRUMENT_VA(this, listener, requested_events);
 
   if (m_opaque_ptr)
     m_opaque_ptr->AddInitialEventsToListener(listener.m_opaque_sp,
@@ -89,9 +81,7 @@ void SBBroadcaster::AddInitialEventsToListener(const SBListener &listener,
 
 uint32_t SBBroadcaster::AddListener(const SBListener &listener,
                                     uint32_t event_mask) {
-  LLDB_RECORD_METHOD(uint32_t, SBBroadcaster, AddListener,
-                     (const lldb::SBListener &, uint32_t), listener,
-                     event_mask);
+  LLDB_INSTRUMENT_VA(this, listener, event_mask);
 
   if (m_opaque_ptr)
     return m_opaque_ptr->AddListener(listener.m_opaque_sp, event_mask);
@@ -99,7 +89,7 @@ uint32_t SBBroadcaster::AddListener(const SBListener &listener,
 }
 
 const char *SBBroadcaster::GetName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBBroadcaster, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr)
     return m_opaque_ptr->GetBroadcasterName().GetCString();
@@ -107,8 +97,7 @@ const char *SBBroadcaster::GetName() const {
 }
 
 bool SBBroadcaster::EventTypeHasListeners(uint32_t event_type) {
-  LLDB_RECORD_METHOD(bool, SBBroadcaster, EventTypeHasListeners, (uint32_t),
-                     event_type);
+  LLDB_INSTRUMENT_VA(this, event_type);
 
   if (m_opaque_ptr)
     return m_opaque_ptr->EventTypeHasListeners(event_type);
@@ -117,9 +106,7 @@ bool SBBroadcaster::EventTypeHasListeners(uint32_t event_type) {
 
 bool SBBroadcaster::RemoveListener(const SBListener &listener,
                                    uint32_t event_mask) {
-  LLDB_RECORD_METHOD(bool, SBBroadcaster, RemoveListener,
-                     (const lldb::SBListener &, uint32_t), listener,
-                     event_mask);
+  LLDB_INSTRUMENT_VA(this, listener, event_mask);
 
   if (m_opaque_ptr)
     return m_opaque_ptr->RemoveListener(listener.m_opaque_sp, event_mask);
@@ -137,39 +124,36 @@ void SBBroadcaster::reset(Broadcaster *broadcaster, bool owns) {
 }
 
 bool SBBroadcaster::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBroadcaster, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBBroadcaster::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBBroadcaster, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_ptr != nullptr;
 }
 
 void SBBroadcaster::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBBroadcaster, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_sp.reset();
   m_opaque_ptr = nullptr;
 }
 
 bool SBBroadcaster::operator==(const SBBroadcaster &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBBroadcaster, operator==,(const lldb::SBBroadcaster &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_ptr == rhs.m_opaque_ptr;
 }
 
 bool SBBroadcaster::operator!=(const SBBroadcaster &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBBroadcaster, operator!=,(const lldb::SBBroadcaster &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_ptr != rhs.m_opaque_ptr;
 }
 
 bool SBBroadcaster::operator<(const SBBroadcaster &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBBroadcaster, operator<,(const lldb::SBBroadcaster &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_ptr < rhs.m_opaque_ptr;
 }
diff --git a/lldb/source/API/SBCommandInterpreter.cpp b/lldb/source/API/SBCommandInterpreter.cpp
index d543246edcb44..073c1a1b042c2 100644
--- a/lldb/source/API/SBCommandInterpreter.cpp
+++ b/lldb/source/API/SBCommandInterpreter.cpp
@@ -8,11 +8,11 @@
 
 #include "lldb/lldb-types.h"
 
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/Interpreter/CommandInterpreter.h"
 #include "lldb/Interpreter/CommandObjectMultiword.h"
 #include "lldb/Interpreter/CommandReturnObject.h"
 #include "lldb/Target/Target.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Listener.h"
 
 #include "lldb/API/SBBroadcaster.h"
@@ -78,71 +78,62 @@ class CommandPluginInterfaceImplementation : public CommandObjectParsed {
 
 SBCommandInterpreter::SBCommandInterpreter(CommandInterpreter *interpreter)
     : m_opaque_ptr(interpreter) {
-  LLDB_RECORD_CONSTRUCTOR(SBCommandInterpreter,
-                          (lldb_private::CommandInterpreter *), interpreter);
-
+  LLDB_INSTRUMENT_VA(this, interpreter);
 }
 
 SBCommandInterpreter::SBCommandInterpreter(const SBCommandInterpreter &rhs)
     : m_opaque_ptr(rhs.m_opaque_ptr) {
-  LLDB_RECORD_CONSTRUCTOR(SBCommandInterpreter,
-                          (const lldb::SBCommandInterpreter &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBCommandInterpreter::~SBCommandInterpreter() = default;
 
 const SBCommandInterpreter &SBCommandInterpreter::
 operator=(const SBCommandInterpreter &rhs) {
-  LLDB_RECORD_METHOD(
-      const lldb::SBCommandInterpreter &,
-      SBCommandInterpreter, operator=,(const lldb::SBCommandInterpreter &),
-      rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_ptr = rhs.m_opaque_ptr;
   return *this;
 }
 
 bool SBCommandInterpreter::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreter, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBCommandInterpreter::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreter, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_ptr != nullptr;
 }
 
 bool SBCommandInterpreter::CommandExists(const char *cmd) {
-  LLDB_RECORD_METHOD(bool, SBCommandInterpreter, CommandExists, (const char *),
-                     cmd);
+  LLDB_INSTRUMENT_VA(this, cmd);
 
   return (((cmd != nullptr) && IsValid()) ? m_opaque_ptr->CommandExists(cmd)
                                           : false);
 }
 
 bool SBCommandInterpreter::AliasExists(const char *cmd) {
-  LLDB_RECORD_METHOD(bool, SBCommandInterpreter, AliasExists, (const char *),
-                     cmd);
+  LLDB_INSTRUMENT_VA(this, cmd);
 
   return (((cmd != nullptr) && IsValid()) ? m_opaque_ptr->AliasExists(cmd)
                                           : false);
 }
 
 bool SBCommandInterpreter::IsActive() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandInterpreter, IsActive);
+  LLDB_INSTRUMENT_VA(this);
 
   return (IsValid() ? m_opaque_ptr->IsActive() : false);
 }
 
 bool SBCommandInterpreter::WasInterrupted() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreter, WasInterrupted);
+  LLDB_INSTRUMENT_VA(this);
 
   return (IsValid() ? m_opaque_ptr->WasInterrupted() : false);
 }
 
 const char *SBCommandInterpreter::GetIOHandlerControlSequence(char ch) {
-  LLDB_RECORD_METHOD(const char *, SBCommandInterpreter,
-                     GetIOHandlerControlSequence, (char), ch);
+  LLDB_INSTRUMENT_VA(this, ch);
 
   return (IsValid()
               ? m_opaque_ptr->GetDebugger()
@@ -155,9 +146,7 @@ lldb::ReturnStatus
 SBCommandInterpreter::HandleCommand(const char *command_line,
                                     SBCommandReturnObject &result,
                                     bool add_to_history) {
-  LLDB_RECORD_METHOD(lldb::ReturnStatus, SBCommandInterpreter, HandleCommand,
-                     (const char *, lldb::SBCommandReturnObject &, bool),
-                     command_line, result, add_to_history);
+  LLDB_INSTRUMENT_VA(this, command_line, result, add_to_history);
 
   SBExecutionContext sb_exe_ctx;
   return HandleCommand(command_line, sb_exe_ctx, result, add_to_history);
@@ -166,10 +155,8 @@ SBCommandInterpreter::HandleCommand(const char *command_line,
 lldb::ReturnStatus SBCommandInterpreter::HandleCommand(
     const char *command_line, SBExecutionContext &override_context,
     SBCommandReturnObject &result, bool add_to_history) {
-  LLDB_RECORD_METHOD(lldb::ReturnStatus, SBCommandInterpreter, HandleCommand,
-                     (const char *, lldb::SBExecutionContext &,
-                      lldb::SBCommandReturnObject &, bool),
-                     command_line, override_context, result, add_to_history);
+  LLDB_INSTRUMENT_VA(this, command_line, override_context, result,
+                     add_to_history);
 
   result.Clear();
   if (command_line && IsValid()) {
@@ -194,11 +181,7 @@ void SBCommandInterpreter::HandleCommandsFromFile(
     lldb::SBFileSpec &file, lldb::SBExecutionContext &override_context,
     lldb::SBCommandInterpreterRunOptions &options,
     lldb::SBCommandReturnObject result) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreter, HandleCommandsFromFile,
-                     (lldb::SBFileSpec &, lldb::SBExecutionContext &,
-                      lldb::SBCommandInterpreterRunOptions &,
-                      lldb::SBCommandReturnObject),
-                     file, override_context, options, result);
+  LLDB_INSTRUMENT_VA(this, file, override_context, options, result);
 
   if (!IsValid()) {
     result->AppendError("SBCommandInterpreter is not valid.");
@@ -225,10 +208,7 @@ void SBCommandInterpreter::HandleCommandsFromFile(
 int SBCommandInterpreter::HandleCompletion(
     const char *current_line, const char *cursor, const char *last_char,
     int match_start_point, int max_return_elements, SBStringList &matches) {
-  LLDB_RECORD_METHOD(int, SBCommandInterpreter, HandleCompletion,
-                     (const char *, const char *, const char *, int, int,
-                      lldb::SBStringList &),
-                     current_line, cursor, last_char, match_start_point,
+  LLDB_INSTRUMENT_VA(this, current_line, cursor, last_char, match_start_point,
                      max_return_elements, matches);
 
   SBStringList dummy_descriptions;
@@ -241,11 +221,7 @@ int SBCommandInterpreter::HandleCompletionWithDescriptions(
     const char *current_line, const char *cursor, const char *last_char,
     int match_start_point, int max_return_elements, SBStringList &matches,
     SBStringList &descriptions) {
-  LLDB_RECORD_METHOD(int, SBCommandInterpreter,
-                     HandleCompletionWithDescriptions,
-                     (const char *, const char *, const char *, int, int,
-                      lldb::SBStringList &, lldb::SBStringList &),
-                     current_line, cursor, last_char, match_start_point,
+  LLDB_INSTRUMENT_VA(this, current_line, cursor, last_char, match_start_point,
                      max_return_elements, matches, descriptions);
 
   // Sanity check the arguments that are passed in: cursor & last_char have to
@@ -311,11 +287,7 @@ int SBCommandInterpreter::HandleCompletionWithDescriptions(
     const char *current_line, uint32_t cursor_pos, int match_start_point,
     int max_return_elements, SBStringList &matches,
     SBStringList &descriptions) {
-  LLDB_RECORD_METHOD(int, SBCommandInterpreter,
-                     HandleCompletionWithDescriptions,
-                     (const char *, uint32_t, int, int, lldb::SBStringList &,
-                      lldb::SBStringList &),
-                     current_line, cursor_pos, match_start_point,
+  LLDB_INSTRUMENT_VA(this, current_line, cursor_pos, match_start_point,
                      max_return_elements, matches, descriptions);
 
   const char *cursor = current_line + cursor_pos;
@@ -330,9 +302,7 @@ int SBCommandInterpreter::HandleCompletion(const char *current_line,
                                            int match_start_point,
                                            int max_return_elements,
                                            lldb::SBStringList &matches) {
-  LLDB_RECORD_METHOD(int, SBCommandInterpreter, HandleCompletion,
-                     (const char *, uint32_t, int, int, lldb::SBStringList &),
-                     current_line, cursor_pos, match_start_point,
+  LLDB_INSTRUMENT_VA(this, current_line, cursor_pos, match_start_point,
                      max_return_elements, matches);
 
   const char *cursor = current_line + cursor_pos;
@@ -342,25 +312,25 @@ int SBCommandInterpreter::HandleCompletion(const char *current_line,
 }
 
 bool SBCommandInterpreter::HasCommands() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandInterpreter, HasCommands);
+  LLDB_INSTRUMENT_VA(this);
 
   return (IsValid() ? m_opaque_ptr->HasCommands() : false);
 }
 
 bool SBCommandInterpreter::HasAliases() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandInterpreter, HasAliases);
+  LLDB_INSTRUMENT_VA(this);
 
   return (IsValid() ? m_opaque_ptr->HasAliases() : false);
 }
 
 bool SBCommandInterpreter::HasAliasOptions() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandInterpreter, HasAliasOptions);
+  LLDB_INSTRUMENT_VA(this);
 
   return (IsValid() ? m_opaque_ptr->HasAliasOptions() : false);
 }
 
 SBProcess SBCommandInterpreter::GetProcess() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBProcess, SBCommandInterpreter, GetProcess);
+  LLDB_INSTRUMENT_VA(this);
 
   SBProcess sb_process;
   ProcessSP process_sp;
@@ -377,8 +347,7 @@ SBProcess SBCommandInterpreter::GetProcess() {
 }
 
 SBDebugger SBCommandInterpreter::GetDebugger() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBDebugger, SBCommandInterpreter,
-                             GetDebugger);
+  LLDB_INSTRUMENT_VA(this);
 
   SBDebugger sb_debugger;
   if (IsValid())
@@ -388,28 +357,27 @@ SBDebugger SBCommandInterpreter::GetDebugger() {
 }
 
 bool SBCommandInterpreter::GetPromptOnQuit() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandInterpreter, GetPromptOnQuit);
+  LLDB_INSTRUMENT_VA(this);
 
   return (IsValid() ? m_opaque_ptr->GetPromptOnQuit() : false);
 }
 
 void SBCommandInterpreter::SetPromptOnQuit(bool b) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreter, SetPromptOnQuit, (bool), b);
+  LLDB_INSTRUMENT_VA(this, b);
 
   if (IsValid())
     m_opaque_ptr->SetPromptOnQuit(b);
 }
 
 void SBCommandInterpreter::AllowExitCodeOnQuit(bool allow) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreter, AllowExitCodeOnQuit, (bool),
-                     allow);
+  LLDB_INSTRUMENT_VA(this, allow);
 
   if (m_opaque_ptr)
     m_opaque_ptr->AllowExitCodeOnQuit(allow);
 }
 
 bool SBCommandInterpreter::HasCustomQuitExitCode() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandInterpreter, HasCustomQuitExitCode);
+  LLDB_INSTRUMENT_VA(this);
 
   bool exited = false;
   if (m_opaque_ptr)
@@ -418,7 +386,7 @@ bool SBCommandInterpreter::HasCustomQuitExitCode() {
 }
 
 int SBCommandInterpreter::GetQuitStatus() {
-  LLDB_RECORD_METHOD_NO_ARGS(int, SBCommandInterpreter, GetQuitStatus);
+  LLDB_INSTRUMENT_VA(this);
 
   bool exited = false;
   return (m_opaque_ptr ? m_opaque_ptr->GetQuitExitCode(exited) : 0);
@@ -426,9 +394,7 @@ int SBCommandInterpreter::GetQuitStatus() {
 
 void SBCommandInterpreter::ResolveCommand(const char *command_line,
                                           SBCommandReturnObject &result) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreter, ResolveCommand,
-                     (const char *, lldb::SBCommandReturnObject &),
-                     command_line, result);
+  LLDB_INSTRUMENT_VA(this, command_line, result);
 
   result.Clear();
   if (command_line && IsValid()) {
@@ -453,8 +419,7 @@ void SBCommandInterpreter::reset(
 
 void SBCommandInterpreter::SourceInitFileInHomeDirectory(
     SBCommandReturnObject &result) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreter, SourceInitFileInHomeDirectory,
-                     (lldb::SBCommandReturnObject &), result);
+  LLDB_INSTRUMENT_VA(this, result);
 
   result.Clear();
   if (IsValid()) {
@@ -470,8 +435,7 @@ void SBCommandInterpreter::SourceInitFileInHomeDirectory(
 
 void SBCommandInterpreter::SourceInitFileInHomeDirectory(
     SBCommandReturnObject &result, bool is_repl) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreter, SourceInitFileInHomeDirectory,
-                     (lldb::SBCommandReturnObject &, bool), result, is_repl);
+  LLDB_INSTRUMENT_VA(this, result, is_repl);
 
   result.Clear();
   if (IsValid()) {
@@ -487,9 +451,7 @@ void SBCommandInterpreter::SourceInitFileInHomeDirectory(
 
 void SBCommandInterpreter::SourceInitFileInCurrentWorkingDirectory(
     SBCommandReturnObject &result) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreter,
-                     SourceInitFileInCurrentWorkingDirectory,
-                     (lldb::SBCommandReturnObject &), result);
+  LLDB_INSTRUMENT_VA(this, result);
 
   result.Clear();
   if (IsValid()) {
@@ -504,9 +466,7 @@ void SBCommandInterpreter::SourceInitFileInCurrentWorkingDirectory(
 }
 
 SBBroadcaster SBCommandInterpreter::GetBroadcaster() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBroadcaster, SBCommandInterpreter,
-                             GetBroadcaster);
-
+  LLDB_INSTRUMENT_VA(this);
 
   SBBroadcaster broadcaster(m_opaque_ptr, false);
 
@@ -514,35 +474,28 @@ SBBroadcaster SBCommandInterpreter::GetBroadcaster() {
 }
 
 const char *SBCommandInterpreter::GetBroadcasterClass() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBCommandInterpreter,
-                                    GetBroadcasterClass);
+  LLDB_INSTRUMENT();
 
   return CommandInterpreter::GetStaticBroadcasterClass().AsCString();
 }
 
 const char *SBCommandInterpreter::GetArgumentTypeAsCString(
     const lldb::CommandArgumentType arg_type) {
-  LLDB_RECORD_STATIC_METHOD(const char *, SBCommandInterpreter,
-                            GetArgumentTypeAsCString,
-                            (const lldb::CommandArgumentType), arg_type);
+  LLDB_INSTRUMENT_VA(arg_type);
 
   return CommandObject::GetArgumentTypeAsCString(arg_type);
 }
 
 const char *SBCommandInterpreter::GetArgumentDescriptionAsCString(
     const lldb::CommandArgumentType arg_type) {
-  LLDB_RECORD_STATIC_METHOD(const char *, SBCommandInterpreter,
-                            GetArgumentDescriptionAsCString,
-                            (const lldb::CommandArgumentType), arg_type);
+  LLDB_INSTRUMENT_VA(arg_type);
 
   return CommandObject::GetArgumentDescriptionAsCString(arg_type);
 }
 
 bool SBCommandInterpreter::EventIsCommandInterpreterEvent(
     const lldb::SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBCommandInterpreter,
-                            EventIsCommandInterpreterEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   return event.GetBroadcasterClass() ==
          SBCommandInterpreter::GetBroadcasterClass();
@@ -551,9 +504,7 @@ bool SBCommandInterpreter::EventIsCommandInterpreterEvent(
 bool SBCommandInterpreter::SetCommandOverrideCallback(
     const char *command_name, lldb::CommandOverrideCallback callback,
     void *baton) {
-  LLDB_RECORD_METHOD(bool, SBCommandInterpreter, SetCommandOverrideCallback,
-                     (const char *, lldb::CommandOverrideCallback, void *),
-                     command_name, callback, baton);
+  LLDB_INSTRUMENT_VA(this, command_name, callback, baton);
 
   if (command_name && command_name[0] && IsValid()) {
     llvm::StringRef command_name_str = command_name;
@@ -570,8 +521,7 @@ bool SBCommandInterpreter::SetCommandOverrideCallback(
 
 lldb::SBCommand SBCommandInterpreter::AddMultiwordCommand(const char *name,
                                                           const char *help) {
-  LLDB_RECORD_METHOD(lldb::SBCommand, SBCommandInterpreter, AddMultiwordCommand,
-                     (const char *, const char *), name, help);
+  LLDB_INSTRUMENT_VA(this, name, help);
 
   lldb::CommandObjectSP new_command_sp(
       new CommandObjectMultiword(*m_opaque_ptr, name, help));
@@ -584,10 +534,7 @@ lldb::SBCommand SBCommandInterpreter::AddMultiwordCommand(const char *name,
 
 lldb::SBCommand SBCommandInterpreter::AddCommand(
     const char *name, lldb::SBCommandPluginInterface *impl, const char *help) {
-  LLDB_RECORD_METHOD(
-      lldb::SBCommand, SBCommandInterpreter, AddCommand,
-      (const char *, lldb::SBCommandPluginInterface *, const char *), name,
-      impl, help);
+  LLDB_INSTRUMENT_VA(this, name, impl, help);
 
   return AddCommand(name, impl, help, /*syntax=*/nullptr,
                     /*auto_repeat_command=*/"");
@@ -597,20 +544,14 @@ lldb::SBCommand
 SBCommandInterpreter::AddCommand(const char *name,
                                  lldb::SBCommandPluginInterface *impl,
                                  const char *help, const char *syntax) {
-  LLDB_RECORD_METHOD(lldb::SBCommand, SBCommandInterpreter, AddCommand,
-                     (const char *, lldb::SBCommandPluginInterface *,
-                      const char *, const char *),
-                     name, impl, help, syntax);
+  LLDB_INSTRUMENT_VA(this, name, impl, help, syntax);
   return AddCommand(name, impl, help, syntax, /*auto_repeat_command=*/"");
 }
 
 lldb::SBCommand SBCommandInterpreter::AddCommand(
     const char *name, lldb::SBCommandPluginInterface *impl, const char *help,
     const char *syntax, const char *auto_repeat_command) {
-  LLDB_RECORD_METHOD(lldb::SBCommand, SBCommandInterpreter, AddCommand,
-                     (const char *, lldb::SBCommandPluginInterface *,
-                      const char *, const char *, const char *),
-                     name, impl, help, syntax, auto_repeat_command);
+  LLDB_INSTRUMENT_VA(this, name, impl, help, syntax, auto_repeat_command);
 
   lldb::CommandObjectSP new_command_sp;
   new_command_sp = std::make_shared<CommandPluginInterfaceImplementation>(
@@ -623,49 +564,49 @@ lldb::SBCommand SBCommandInterpreter::AddCommand(
   return lldb::SBCommand();
 }
 
-SBCommand::SBCommand() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommand); }
+SBCommand::SBCommand() { LLDB_INSTRUMENT_VA(this); }
 
 SBCommand::SBCommand(lldb::CommandObjectSP cmd_sp) : m_opaque_sp(cmd_sp) {}
 
 bool SBCommand::IsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommand, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBCommand::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommand, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get() != nullptr;
 }
 
 const char *SBCommand::GetName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBCommand, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   return (IsValid() ? ConstString(m_opaque_sp->GetCommandName()).AsCString() : nullptr);
 }
 
 const char *SBCommand::GetHelp() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBCommand, GetHelp);
+  LLDB_INSTRUMENT_VA(this);
 
   return (IsValid() ? ConstString(m_opaque_sp->GetHelp()).AsCString()
                     : nullptr);
 }
 
 const char *SBCommand::GetHelpLong() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBCommand, GetHelpLong);
+  LLDB_INSTRUMENT_VA(this);
 
   return (IsValid() ? ConstString(m_opaque_sp->GetHelpLong()).AsCString()
                     : nullptr);
 }
 
 void SBCommand::SetHelp(const char *help) {
-  LLDB_RECORD_METHOD(void, SBCommand, SetHelp, (const char *), help);
+  LLDB_INSTRUMENT_VA(this, help);
 
   if (IsValid())
     m_opaque_sp->SetHelp(help);
 }
 
 void SBCommand::SetHelpLong(const char *help) {
-  LLDB_RECORD_METHOD(void, SBCommand, SetHelpLong, (const char *), help);
+  LLDB_INSTRUMENT_VA(this, help);
 
   if (IsValid())
     m_opaque_sp->SetHelpLong(help);
@@ -673,8 +614,7 @@ void SBCommand::SetHelpLong(const char *help) {
 
 lldb::SBCommand SBCommand::AddMultiwordCommand(const char *name,
                                                const char *help) {
-  LLDB_RECORD_METHOD(lldb::SBCommand, SBCommand, AddMultiwordCommand,
-                     (const char *, const char *), name, help);
+  LLDB_INSTRUMENT_VA(this, name, help);
 
   if (!IsValid())
     return lldb::SBCommand();
@@ -692,10 +632,7 @@ lldb::SBCommand SBCommand::AddMultiwordCommand(const char *name,
 lldb::SBCommand SBCommand::AddCommand(const char *name,
                                       lldb::SBCommandPluginInterface *impl,
                                       const char *help) {
-  LLDB_RECORD_METHOD(
-      lldb::SBCommand, SBCommand, AddCommand,
-      (const char *, lldb::SBCommandPluginInterface *, const char *), name,
-      impl, help);
+  LLDB_INSTRUMENT_VA(this, name, impl, help);
   return AddCommand(name, impl, help, /*syntax=*/nullptr,
                     /*auto_repeat_command=*/"");
 }
@@ -703,10 +640,7 @@ lldb::SBCommand SBCommand::AddCommand(const char *name,
 lldb::SBCommand SBCommand::AddCommand(const char *name,
                                       lldb::SBCommandPluginInterface *impl,
                                       const char *help, const char *syntax) {
-  LLDB_RECORD_METHOD(lldb::SBCommand, SBCommand, AddCommand,
-                     (const char *, lldb::SBCommandPluginInterface *,
-                      const char *, const char *),
-                     name, impl, help, syntax);
+  LLDB_INSTRUMENT_VA(this, name, impl, help, syntax);
   return AddCommand(name, impl, help, syntax, /*auto_repeat_command=*/"");
 }
 
@@ -714,10 +648,7 @@ lldb::SBCommand SBCommand::AddCommand(const char *name,
                                       lldb::SBCommandPluginInterface *impl,
                                       const char *help, const char *syntax,
                                       const char *auto_repeat_command) {
-  LLDB_RECORD_METHOD(lldb::SBCommand, SBCommand, AddCommand,
-                     (const char *, lldb::SBCommandPluginInterface *,
-                      const char *, const char *, const char *),
-                     name, impl, help, syntax, auto_repeat_command);
+  LLDB_INSTRUMENT_VA(this, name, impl, help, syntax, auto_repeat_command);
 
   if (!IsValid())
     return lldb::SBCommand();
@@ -733,13 +664,13 @@ lldb::SBCommand SBCommand::AddCommand(const char *name,
 }
 
 uint32_t SBCommand::GetFlags() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBCommand, GetFlags);
+  LLDB_INSTRUMENT_VA(this);
 
   return (IsValid() ? m_opaque_sp->GetFlags().Get() : 0);
 }
 
 void SBCommand::SetFlags(uint32_t flags) {
-  LLDB_RECORD_METHOD(void, SBCommand, SetFlags, (uint32_t), flags);
+  LLDB_INSTRUMENT_VA(this, flags);
 
   if (IsValid())
     m_opaque_sp->GetFlags().Set(flags);
diff --git a/lldb/source/API/SBCommandInterpreterRunOptions.cpp b/lldb/source/API/SBCommandInterpreterRunOptions.cpp
index eb1a4885f2079..6c6b2aa15a792 100644
--- a/lldb/source/API/SBCommandInterpreterRunOptions.cpp
+++ b/lldb/source/API/SBCommandInterpreterRunOptions.cpp
@@ -8,7 +8,7 @@
 
 #include "lldb/lldb-types.h"
 
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBCommandInterpreterRunOptions.h"
 #include "lldb/Interpreter/CommandInterpreter.h"
@@ -19,15 +19,14 @@ using namespace lldb;
 using namespace lldb_private;
 
 SBCommandInterpreterRunOptions::SBCommandInterpreterRunOptions() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommandInterpreterRunOptions);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_up = std::make_unique<CommandInterpreterRunOptions>();
 }
 
 SBCommandInterpreterRunOptions::SBCommandInterpreterRunOptions(
     const SBCommandInterpreterRunOptions &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBCommandInterpreterRunOptions,
-                          (const lldb::SBCommandInterpreterRunOptions &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = std::make_unique<CommandInterpreterRunOptions>(rhs.ref());
 }
@@ -36,9 +35,7 @@ SBCommandInterpreterRunOptions::~SBCommandInterpreterRunOptions() = default;
 
 SBCommandInterpreterRunOptions &SBCommandInterpreterRunOptions::operator=(
     const SBCommandInterpreterRunOptions &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBCommandInterpreterRunOptions &,
-                     SBCommandInterpreterRunOptions, operator=,
-                     (const lldb::SBCommandInterpreterRunOptions &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this == &rhs)
     return *this;
@@ -47,142 +44,122 @@ SBCommandInterpreterRunOptions &SBCommandInterpreterRunOptions::operator=(
 }
 
 bool SBCommandInterpreterRunOptions::GetStopOnContinue() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions,
-                                   GetStopOnContinue);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetStopOnContinue();
 }
 
 void SBCommandInterpreterRunOptions::SetStopOnContinue(bool stop_on_continue) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnContinue,
-                     (bool), stop_on_continue);
+  LLDB_INSTRUMENT_VA(this, stop_on_continue);
 
   m_opaque_up->SetStopOnContinue(stop_on_continue);
 }
 
 bool SBCommandInterpreterRunOptions::GetStopOnError() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions,
-                                   GetStopOnError);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetStopOnError();
 }
 
 void SBCommandInterpreterRunOptions::SetStopOnError(bool stop_on_error) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnError,
-                     (bool), stop_on_error);
+  LLDB_INSTRUMENT_VA(this, stop_on_error);
 
   m_opaque_up->SetStopOnError(stop_on_error);
 }
 
 bool SBCommandInterpreterRunOptions::GetStopOnCrash() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions,
-                                   GetStopOnCrash);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetStopOnCrash();
 }
 
 void SBCommandInterpreterRunOptions::SetStopOnCrash(bool stop_on_crash) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetStopOnCrash,
-                     (bool), stop_on_crash);
+  LLDB_INSTRUMENT_VA(this, stop_on_crash);
 
   m_opaque_up->SetStopOnCrash(stop_on_crash);
 }
 
 bool SBCommandInterpreterRunOptions::GetEchoCommands() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions,
-                                   GetEchoCommands);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetEchoCommands();
 }
 
 void SBCommandInterpreterRunOptions::SetEchoCommands(bool echo_commands) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetEchoCommands,
-                     (bool), echo_commands);
+  LLDB_INSTRUMENT_VA(this, echo_commands);
 
   m_opaque_up->SetEchoCommands(echo_commands);
 }
 
 bool SBCommandInterpreterRunOptions::GetEchoCommentCommands() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions,
-                                   GetEchoCommentCommands);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetEchoCommentCommands();
 }
 
 void SBCommandInterpreterRunOptions::SetEchoCommentCommands(bool echo) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions,
-                     SetEchoCommentCommands, (bool), echo);
+  LLDB_INSTRUMENT_VA(this, echo);
 
   m_opaque_up->SetEchoCommentCommands(echo);
 }
 
 bool SBCommandInterpreterRunOptions::GetPrintResults() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions,
-                                   GetPrintResults);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetPrintResults();
 }
 
 void SBCommandInterpreterRunOptions::SetPrintResults(bool print_results) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetPrintResults,
-                     (bool), print_results);
+  LLDB_INSTRUMENT_VA(this, print_results);
 
   m_opaque_up->SetPrintResults(print_results);
 }
 
 bool SBCommandInterpreterRunOptions::GetPrintErrors() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions,
-                                   GetPrintErrors);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetPrintErrors();
 }
 
 void SBCommandInterpreterRunOptions::SetPrintErrors(bool print_errors) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetPrintErrors,
-                     (bool), print_errors);
+  LLDB_INSTRUMENT_VA(this, print_errors);
 
   m_opaque_up->SetPrintErrors(print_errors);
 }
 
 bool SBCommandInterpreterRunOptions::GetAddToHistory() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions,
-                                   GetAddToHistory);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetAddToHistory();
 }
 
 void SBCommandInterpreterRunOptions::SetAddToHistory(bool add_to_history) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetAddToHistory,
-                     (bool), add_to_history);
+  LLDB_INSTRUMENT_VA(this, add_to_history);
 
   m_opaque_up->SetAddToHistory(add_to_history);
 }
 
 bool SBCommandInterpreterRunOptions::GetAutoHandleEvents() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions,
-                                   GetAutoHandleEvents);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetAutoHandleEvents();
 }
 
 void SBCommandInterpreterRunOptions::SetAutoHandleEvents(
     bool auto_handle_events) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetAutoHandleEvents,
-                     (bool), auto_handle_events);
+  LLDB_INSTRUMENT_VA(this, auto_handle_events);
 
   m_opaque_up->SetAutoHandleEvents(auto_handle_events);
 }
 
 bool SBCommandInterpreterRunOptions::GetSpawnThread() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandInterpreterRunOptions,
-                                   GetSpawnThread);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetSpawnThread();
 }
 
 void SBCommandInterpreterRunOptions::SetSpawnThread(bool spawn_thread) {
-  LLDB_RECORD_METHOD(void, SBCommandInterpreterRunOptions, SetSpawnThread,
-                     (bool), spawn_thread);
+  LLDB_INSTRUMENT_VA(this, spawn_thread);
 
   m_opaque_up->SetSpawnThread(spawn_thread);
 }
@@ -201,14 +178,13 @@ SBCommandInterpreterRunResult::SBCommandInterpreterRunResult()
     : m_opaque_up(new CommandInterpreterRunResult())
 
 {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommandInterpreterRunResult);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBCommandInterpreterRunResult::SBCommandInterpreterRunResult(
     const SBCommandInterpreterRunResult &rhs)
     : m_opaque_up(new CommandInterpreterRunResult()) {
-  LLDB_RECORD_CONSTRUCTOR(SBCommandInterpreterRunResult,
-                          (const lldb::SBCommandInterpreterRunResult &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   *m_opaque_up = *rhs.m_opaque_up;
 }
@@ -222,9 +198,7 @@ SBCommandInterpreterRunResult::~SBCommandInterpreterRunResult() = default;
 
 SBCommandInterpreterRunResult &SBCommandInterpreterRunResult::operator=(
     const SBCommandInterpreterRunResult &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBCommandInterpreterRunResult &,
-                     SBCommandInterpreterRunResult, operator=,
-                     (const lldb::SBCommandInterpreterRunResult &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this == &rhs)
     return *this;
@@ -233,16 +207,14 @@ SBCommandInterpreterRunResult &SBCommandInterpreterRunResult::operator=(
 }
 
 int SBCommandInterpreterRunResult::GetNumberOfErrors() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(int, SBCommandInterpreterRunResult,
-                                   GetNumberOfErrors);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetNumErrors();
 }
 
 lldb::CommandInterpreterResult
 SBCommandInterpreterRunResult::GetResult() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::CommandInterpreterResult,
-                                   SBCommandInterpreterRunResult, GetResult);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetResult();
 }
diff --git a/lldb/source/API/SBCommandReturnObject.cpp b/lldb/source/API/SBCommandReturnObject.cpp
index 505145d75ce8c..7d2c102b3d8c1 100644
--- a/lldb/source/API/SBCommandReturnObject.cpp
+++ b/lldb/source/API/SBCommandReturnObject.cpp
@@ -7,13 +7,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBCommandReturnObject.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBError.h"
 #include "lldb/API/SBFile.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Interpreter/CommandReturnObject.h"
 #include "lldb/Utility/ConstString.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Status.h"
 
 using namespace lldb;
@@ -46,28 +46,23 @@ class lldb_private::SBCommandReturnObjectImpl {
 
 SBCommandReturnObject::SBCommandReturnObject()
     : m_opaque_up(new SBCommandReturnObjectImpl()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommandReturnObject);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBCommandReturnObject::SBCommandReturnObject(CommandReturnObject &ref)
     : m_opaque_up(new SBCommandReturnObjectImpl(ref)) {
-  LLDB_RECORD_CONSTRUCTOR(SBCommandReturnObject,
-                          (lldb_private::CommandReturnObject &), ref);
+  LLDB_INSTRUMENT_VA(this, ref);
 }
 
 SBCommandReturnObject::SBCommandReturnObject(const SBCommandReturnObject &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBCommandReturnObject,
-                          (const lldb::SBCommandReturnObject &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
 
 SBCommandReturnObject &SBCommandReturnObject::
 operator=(const SBCommandReturnObject &rhs) {
-  LLDB_RECORD_METHOD(
-      lldb::SBCommandReturnObject &,
-      SBCommandReturnObject, operator=,(const lldb::SBCommandReturnObject &),
-      rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -77,44 +72,44 @@ operator=(const SBCommandReturnObject &rhs) {
 SBCommandReturnObject::~SBCommandReturnObject() = default;
 
 bool SBCommandReturnObject::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandReturnObject, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBCommandReturnObject::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommandReturnObject, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   // This method is not useful but it needs to stay to keep SB API stable.
   return true;
 }
 
 const char *SBCommandReturnObject::GetOutput() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBCommandReturnObject, GetOutput);
+  LLDB_INSTRUMENT_VA(this);
 
   ConstString output(ref().GetOutputData());
   return output.AsCString(/*value_if_empty*/ "");
 }
 
 const char *SBCommandReturnObject::GetError() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBCommandReturnObject, GetError);
+  LLDB_INSTRUMENT_VA(this);
 
   ConstString output(ref().GetErrorData());
   return output.AsCString(/*value_if_empty*/ "");
 }
 
 size_t SBCommandReturnObject::GetOutputSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBCommandReturnObject, GetOutputSize);
+  LLDB_INSTRUMENT_VA(this);
 
   return ref().GetOutputData().size();
 }
 
 size_t SBCommandReturnObject::GetErrorSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBCommandReturnObject, GetErrorSize);
+  LLDB_INSTRUMENT_VA(this);
 
   return ref().GetErrorData().size();
 }
 
 size_t SBCommandReturnObject::PutOutput(FILE *fh) {
-  LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutOutput, (FILE *), fh);
+  LLDB_INSTRUMENT_VA(this, fh);
   if (fh) {
     size_t num_bytes = GetOutputSize();
     if (num_bytes)
@@ -124,22 +119,21 @@ size_t SBCommandReturnObject::PutOutput(FILE *fh) {
 }
 
 size_t SBCommandReturnObject::PutOutput(FileSP file_sp) {
-  LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutOutput, (FileSP),
-                     file_sp);
+  LLDB_INSTRUMENT_VA(this, file_sp);
   if (!file_sp)
     return 0;
   return file_sp->Printf("%s", GetOutput());
 }
 
 size_t SBCommandReturnObject::PutOutput(SBFile file) {
-  LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutOutput, (SBFile), file);
+  LLDB_INSTRUMENT_VA(this, file);
   if (!file.m_opaque_sp)
     return 0;
   return file.m_opaque_sp->Printf("%s", GetOutput());
 }
 
 size_t SBCommandReturnObject::PutError(FILE *fh) {
-  LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutError, (FILE *), fh);
+  LLDB_INSTRUMENT_VA(this, fh);
   if (fh) {
     size_t num_bytes = GetErrorSize();
     if (num_bytes)
@@ -149,62 +143,57 @@ size_t SBCommandReturnObject::PutError(FILE *fh) {
 }
 
 size_t SBCommandReturnObject::PutError(FileSP file_sp) {
-  LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutError, (FileSP),
-                     file_sp);
+  LLDB_INSTRUMENT_VA(this, file_sp);
   if (!file_sp)
     return 0;
   return file_sp->Printf("%s", GetError());
 }
 
 size_t SBCommandReturnObject::PutError(SBFile file) {
-  LLDB_RECORD_METHOD(size_t, SBCommandReturnObject, PutError, (SBFile), file);
+  LLDB_INSTRUMENT_VA(this, file);
   if (!file.m_opaque_sp)
     return 0;
   return file.m_opaque_sp->Printf("%s", GetError());
 }
 
 void SBCommandReturnObject::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBCommandReturnObject, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   ref().Clear();
 }
 
 lldb::ReturnStatus SBCommandReturnObject::GetStatus() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::ReturnStatus, SBCommandReturnObject,
-                             GetStatus);
+  LLDB_INSTRUMENT_VA(this);
 
   return ref().GetStatus();
 }
 
 void SBCommandReturnObject::SetStatus(lldb::ReturnStatus status) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetStatus,
-                     (lldb::ReturnStatus), status);
+  LLDB_INSTRUMENT_VA(this, status);
 
   ref().SetStatus(status);
 }
 
 bool SBCommandReturnObject::Succeeded() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandReturnObject, Succeeded);
+  LLDB_INSTRUMENT_VA(this);
 
   return ref().Succeeded();
 }
 
 bool SBCommandReturnObject::HasResult() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommandReturnObject, HasResult);
+  LLDB_INSTRUMENT_VA(this);
 
   return ref().HasResult();
 }
 
 void SBCommandReturnObject::AppendMessage(const char *message) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, AppendMessage, (const char *),
-                     message);
+  LLDB_INSTRUMENT_VA(this, message);
 
   ref().AppendMessage(message);
 }
 
 void SBCommandReturnObject::AppendWarning(const char *message) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, AppendWarning, (const char *),
-                     message);
+  LLDB_INSTRUMENT_VA(this, message);
 
   ref().AppendWarning(message);
 }
@@ -226,8 +215,7 @@ CommandReturnObject &SBCommandReturnObject::ref() const {
 }
 
 bool SBCommandReturnObject::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBCommandReturnObject, GetDescription,
-                     (lldb::SBStream &), description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
@@ -252,62 +240,53 @@ bool SBCommandReturnObject::GetDescription(SBStream &description) {
 }
 
 void SBCommandReturnObject::SetImmediateOutputFile(FILE *fh) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile,
-                     (FILE *), fh);
+  LLDB_INSTRUMENT_VA(this, fh);
 
   SetImmediateOutputFile(fh, false);
 }
 
 void SBCommandReturnObject::SetImmediateErrorFile(FILE *fh) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile,
-                     (FILE *), fh);
+  LLDB_INSTRUMENT_VA(this, fh);
 
   SetImmediateErrorFile(fh, false);
 }
 
 void SBCommandReturnObject::SetImmediateOutputFile(FILE *fh,
                                                    bool transfer_ownership) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile,
-                     (FILE *, bool), fh, transfer_ownership);
+  LLDB_INSTRUMENT_VA(this, fh, transfer_ownership);
   FileSP file = std::make_shared<NativeFile>(fh, transfer_ownership);
   ref().SetImmediateOutputFile(file);
 }
 
 void SBCommandReturnObject::SetImmediateErrorFile(FILE *fh,
                                                   bool transfer_ownership) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile,
-                     (FILE *, bool), fh, transfer_ownership);
+  LLDB_INSTRUMENT_VA(this, fh, transfer_ownership);
   FileSP file = std::make_shared<NativeFile>(fh, transfer_ownership);
   ref().SetImmediateErrorFile(file);
 }
 
 void SBCommandReturnObject::SetImmediateOutputFile(SBFile file) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile,
-                     (SBFile), file);
+  LLDB_INSTRUMENT_VA(this, file);
   ref().SetImmediateOutputFile(file.m_opaque_sp);
 }
 
 void SBCommandReturnObject::SetImmediateErrorFile(SBFile file) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile,
-                     (SBFile), file);
+  LLDB_INSTRUMENT_VA(this, file);
   ref().SetImmediateErrorFile(file.m_opaque_sp);
 }
 
 void SBCommandReturnObject::SetImmediateOutputFile(FileSP file_sp) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateOutputFile,
-                     (FileSP), file_sp);
+  LLDB_INSTRUMENT_VA(this, file_sp);
   SetImmediateOutputFile(SBFile(file_sp));
 }
 
 void SBCommandReturnObject::SetImmediateErrorFile(FileSP file_sp) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetImmediateErrorFile,
-                     (FileSP), file_sp);
+  LLDB_INSTRUMENT_VA(this, file_sp);
   SetImmediateErrorFile(SBFile(file_sp));
 }
 
 void SBCommandReturnObject::PutCString(const char *string, int len) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, PutCString,
-                     (const char *, int), string, len);
+  LLDB_INSTRUMENT_VA(this, string, len);
 
   if (len == 0 || string == nullptr || *string == 0) {
     return;
@@ -319,8 +298,7 @@ void SBCommandReturnObject::PutCString(const char *string, int len) {
 }
 
 const char *SBCommandReturnObject::GetOutput(bool only_if_no_immediate) {
-  LLDB_RECORD_METHOD(const char *, SBCommandReturnObject, GetOutput, (bool),
-                     only_if_no_immediate);
+  LLDB_INSTRUMENT_VA(this, only_if_no_immediate);
 
   if (!only_if_no_immediate ||
       ref().GetImmediateOutputStream().get() == nullptr)
@@ -329,8 +307,7 @@ const char *SBCommandReturnObject::GetOutput(bool only_if_no_immediate) {
 }
 
 const char *SBCommandReturnObject::GetError(bool only_if_no_immediate) {
-  LLDB_RECORD_METHOD(const char *, SBCommandReturnObject, GetError, (bool),
-                     only_if_no_immediate);
+  LLDB_INSTRUMENT_VA(this, only_if_no_immediate);
 
   if (!only_if_no_immediate || ref().GetImmediateErrorStream().get() == nullptr)
     return GetError();
@@ -347,9 +324,7 @@ size_t SBCommandReturnObject::Printf(const char *format, ...) {
 
 void SBCommandReturnObject::SetError(lldb::SBError &error,
                                      const char *fallback_error_cstr) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetError,
-                     (lldb::SBError &, const char *), error,
-                     fallback_error_cstr);
+  LLDB_INSTRUMENT_VA(this, error, fallback_error_cstr);
 
   if (error.IsValid())
     ref().SetError(error.ref(), fallback_error_cstr);
@@ -358,8 +333,7 @@ void SBCommandReturnObject::SetError(lldb::SBError &error,
 }
 
 void SBCommandReturnObject::SetError(const char *error_cstr) {
-  LLDB_RECORD_METHOD(void, SBCommandReturnObject, SetError, (const char *),
-                     error_cstr);
+  LLDB_INSTRUMENT_VA(this, error_cstr);
 
   if (error_cstr)
     ref().AppendError(error_cstr);
diff --git a/lldb/source/API/SBCommunication.cpp b/lldb/source/API/SBCommunication.cpp
index 79a95c9dad626..0a1dad1e2e8fb 100644
--- a/lldb/source/API/SBCommunication.cpp
+++ b/lldb/source/API/SBCommunication.cpp
@@ -7,22 +7,20 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBCommunication.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBBroadcaster.h"
 #include "lldb/Core/Communication.h"
 #include "lldb/Host/ConnectionFileDescriptor.h"
 #include "lldb/Host/Host.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
-SBCommunication::SBCommunication() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCommunication);
-}
+SBCommunication::SBCommunication() { LLDB_INSTRUMENT_VA(this); }
 
 SBCommunication::SBCommunication(const char *broadcaster_name)
     : m_opaque(new Communication(broadcaster_name)), m_opaque_owned(true) {
-  LLDB_RECORD_CONSTRUCTOR(SBCommunication, (const char *), broadcaster_name);
+  LLDB_INSTRUMENT_VA(this, broadcaster_name);
 }
 
 SBCommunication::~SBCommunication() {
@@ -33,17 +31,17 @@ SBCommunication::~SBCommunication() {
 }
 
 bool SBCommunication::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommunication, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBCommunication::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommunication, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque != nullptr;
 }
 
 bool SBCommunication::GetCloseOnEOF() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommunication, GetCloseOnEOF);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque)
     return m_opaque->GetCloseOnEOF();
@@ -51,15 +49,14 @@ bool SBCommunication::GetCloseOnEOF() {
 }
 
 void SBCommunication::SetCloseOnEOF(bool b) {
-  LLDB_RECORD_METHOD(void, SBCommunication, SetCloseOnEOF, (bool), b);
+  LLDB_INSTRUMENT_VA(this, b);
 
   if (m_opaque)
     m_opaque->SetCloseOnEOF(b);
 }
 
 ConnectionStatus SBCommunication::Connect(const char *url) {
-  LLDB_RECORD_METHOD(lldb::ConnectionStatus, SBCommunication, Connect,
-                     (const char *), url);
+  LLDB_INSTRUMENT_VA(this, url);
 
   if (m_opaque) {
     if (!m_opaque->HasConnection())
@@ -70,8 +67,7 @@ ConnectionStatus SBCommunication::Connect(const char *url) {
 }
 
 ConnectionStatus SBCommunication::AdoptFileDesriptor(int fd, bool owns_fd) {
-  LLDB_RECORD_METHOD(lldb::ConnectionStatus, SBCommunication,
-                     AdoptFileDesriptor, (int, bool), fd, owns_fd);
+  LLDB_INSTRUMENT_VA(this, fd, owns_fd);
 
   ConnectionStatus status = eConnectionStatusNoConnection;
   if (m_opaque) {
@@ -90,8 +86,7 @@ ConnectionStatus SBCommunication::AdoptFileDesriptor(int fd, bool owns_fd) {
 }
 
 ConnectionStatus SBCommunication::Disconnect() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::ConnectionStatus, SBCommunication,
-                             Disconnect);
+  LLDB_INSTRUMENT_VA(this);
 
   ConnectionStatus status = eConnectionStatusNoConnection;
   if (m_opaque)
@@ -100,16 +95,14 @@ ConnectionStatus SBCommunication::Disconnect() {
 }
 
 bool SBCommunication::IsConnected() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCommunication, IsConnected);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque ? m_opaque->IsConnected() : false;
 }
 
 size_t SBCommunication::Read(void *dst, size_t dst_len, uint32_t timeout_usec,
                              ConnectionStatus &status) {
-  LLDB_RECORD_METHOD(size_t, SBCommunication, Read,
-                     (void *, size_t, uint32_t, lldb::ConnectionStatus &), dst,
-                     dst_len, timeout_usec, status);
+  LLDB_INSTRUMENT_VA(this, dst, dst_len, timeout_usec, status);
 
   size_t bytes_read = 0;
   Timeout<std::micro> timeout = timeout_usec == UINT32_MAX
@@ -125,9 +118,7 @@ size_t SBCommunication::Read(void *dst, size_t dst_len, uint32_t timeout_usec,
 
 size_t SBCommunication::Write(const void *src, size_t src_len,
                               ConnectionStatus &status) {
-  LLDB_RECORD_METHOD(size_t, SBCommunication, Write,
-                     (const void *, size_t, lldb::ConnectionStatus &), src,
-                     src_len, status);
+  LLDB_INSTRUMENT_VA(this, src, src_len, status);
 
   size_t bytes_written = 0;
   if (m_opaque)
@@ -139,28 +130,26 @@ size_t SBCommunication::Write(const void *src, size_t src_len,
 }
 
 bool SBCommunication::ReadThreadStart() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommunication, ReadThreadStart);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque ? m_opaque->StartReadThread() : false;
 }
 
 bool SBCommunication::ReadThreadStop() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommunication, ReadThreadStop);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque ? m_opaque->StopReadThread() : false;
 }
 
 bool SBCommunication::ReadThreadIsRunning() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBCommunication, ReadThreadIsRunning);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque ? m_opaque->ReadThreadIsRunning() : false;
 }
 
 bool SBCommunication::SetReadThreadBytesReceivedCallback(
     ReadThreadBytesReceived callback, void *callback_baton) {
-  LLDB_RECORD_METHOD(bool, SBCommunication, SetReadThreadBytesReceivedCallback,
-                     (lldb::SBCommunication::ReadThreadBytesReceived, void *),
-                     callback, callback_baton);
+  LLDB_INSTRUMENT_VA(this, callback, callback_baton);
 
   bool result = false;
   if (m_opaque) {
@@ -171,16 +160,14 @@ bool SBCommunication::SetReadThreadBytesReceivedCallback(
 }
 
 SBBroadcaster SBCommunication::GetBroadcaster() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBroadcaster, SBCommunication,
-                             GetBroadcaster);
+  LLDB_INSTRUMENT_VA(this);
 
   SBBroadcaster broadcaster(m_opaque, false);
   return broadcaster;
 }
 
 const char *SBCommunication::GetBroadcasterClass() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBCommunication,
-                                    GetBroadcasterClass);
+  LLDB_INSTRUMENT();
 
   return Communication::GetStaticBroadcasterClass().AsCString();
 }
diff --git a/lldb/source/API/SBCompileUnit.cpp b/lldb/source/API/SBCompileUnit.cpp
index f3f3246ef2b5e..46a319c6b7a3a 100644
--- a/lldb/source/API/SBCompileUnit.cpp
+++ b/lldb/source/API/SBCompileUnit.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBCompileUnit.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBLineEntry.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Core/Module.h"
@@ -17,26 +16,23 @@
 #include "lldb/Symbol/SymbolFile.h"
 #include "lldb/Symbol/Type.h"
 #include "lldb/Symbol/TypeList.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
-SBCompileUnit::SBCompileUnit() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBCompileUnit);
-}
+SBCompileUnit::SBCompileUnit() { LLDB_INSTRUMENT_VA(this); }
 
 SBCompileUnit::SBCompileUnit(lldb_private::CompileUnit *lldb_object_ptr)
     : m_opaque_ptr(lldb_object_ptr) {}
 
 SBCompileUnit::SBCompileUnit(const SBCompileUnit &rhs)
     : m_opaque_ptr(rhs.m_opaque_ptr) {
-  LLDB_RECORD_CONSTRUCTOR(SBCompileUnit, (const lldb::SBCompileUnit &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 const SBCompileUnit &SBCompileUnit::operator=(const SBCompileUnit &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBCompileUnit &,
-                     SBCompileUnit, operator=,(const lldb::SBCompileUnit &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_ptr = rhs.m_opaque_ptr;
   return *this;
@@ -45,8 +41,7 @@ const SBCompileUnit &SBCompileUnit::operator=(const SBCompileUnit &rhs) {
 SBCompileUnit::~SBCompileUnit() { m_opaque_ptr = nullptr; }
 
 SBFileSpec SBCompileUnit::GetFileSpec() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBCompileUnit,
-                                   GetFileSpec);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFileSpec file_spec;
   if (m_opaque_ptr)
@@ -55,7 +50,7 @@ SBFileSpec SBCompileUnit::GetFileSpec() const {
 }
 
 uint32_t SBCompileUnit::GetNumLineEntries() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBCompileUnit, GetNumLineEntries);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr) {
     LineTable *line_table = m_opaque_ptr->GetLineTable();
@@ -67,8 +62,7 @@ uint32_t SBCompileUnit::GetNumLineEntries() const {
 }
 
 SBLineEntry SBCompileUnit::GetLineEntryAtIndex(uint32_t idx) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBLineEntry, SBCompileUnit,
-                           GetLineEntryAtIndex, (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBLineEntry sb_line_entry;
   if (m_opaque_ptr) {
@@ -85,9 +79,7 @@ SBLineEntry SBCompileUnit::GetLineEntryAtIndex(uint32_t idx) const {
 
 uint32_t SBCompileUnit::FindLineEntryIndex(uint32_t start_idx, uint32_t line,
                                            SBFileSpec *inline_file_spec) const {
-  LLDB_RECORD_METHOD_CONST(uint32_t, SBCompileUnit, FindLineEntryIndex,
-                           (uint32_t, uint32_t, lldb::SBFileSpec *), start_idx,
-                           line, inline_file_spec);
+  LLDB_INSTRUMENT_VA(this, start_idx, line, inline_file_spec);
 
   const bool exact = true;
   return FindLineEntryIndex(start_idx, line, inline_file_spec, exact);
@@ -96,9 +88,7 @@ uint32_t SBCompileUnit::FindLineEntryIndex(uint32_t start_idx, uint32_t line,
 uint32_t SBCompileUnit::FindLineEntryIndex(uint32_t start_idx, uint32_t line,
                                            SBFileSpec *inline_file_spec,
                                            bool exact) const {
-  LLDB_RECORD_METHOD_CONST(uint32_t, SBCompileUnit, FindLineEntryIndex,
-                           (uint32_t, uint32_t, lldb::SBFileSpec *, bool),
-                           start_idx, line, inline_file_spec, exact);
+  LLDB_INSTRUMENT_VA(this, start_idx, line, inline_file_spec, exact);
 
   uint32_t index = UINT32_MAX;
   if (m_opaque_ptr) {
@@ -118,7 +108,7 @@ uint32_t SBCompileUnit::FindLineEntryIndex(uint32_t start_idx, uint32_t line,
 }
 
 uint32_t SBCompileUnit::GetNumSupportFiles() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBCompileUnit, GetNumSupportFiles);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr)
     return m_opaque_ptr->GetSupportFiles().GetSize();
@@ -127,8 +117,7 @@ uint32_t SBCompileUnit::GetNumSupportFiles() const {
 }
 
 lldb::SBTypeList SBCompileUnit::GetTypes(uint32_t type_mask) {
-  LLDB_RECORD_METHOD(lldb::SBTypeList, SBCompileUnit, GetTypes, (uint32_t),
-                     type_mask);
+  LLDB_INSTRUMENT_VA(this, type_mask);
 
   SBTypeList sb_type_list;
 
@@ -151,8 +140,7 @@ lldb::SBTypeList SBCompileUnit::GetTypes(uint32_t type_mask) {
 }
 
 SBFileSpec SBCompileUnit::GetSupportFileAtIndex(uint32_t idx) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBFileSpec, SBCompileUnit,
-                           GetSupportFileAtIndex, (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBFileSpec sb_file_spec;
   if (m_opaque_ptr) {
@@ -166,9 +154,7 @@ SBFileSpec SBCompileUnit::GetSupportFileAtIndex(uint32_t idx) const {
 uint32_t SBCompileUnit::FindSupportFileIndex(uint32_t start_idx,
                                              const SBFileSpec &sb_file,
                                              bool full) {
-  LLDB_RECORD_METHOD(uint32_t, SBCompileUnit, FindSupportFileIndex,
-                     (uint32_t, const lldb::SBFileSpec &, bool), start_idx,
-                     sb_file, full);
+  LLDB_INSTRUMENT_VA(this, start_idx, sb_file, full);
 
   if (m_opaque_ptr) {
     const FileSpecList &support_files = m_opaque_ptr->GetSupportFiles();
@@ -178,7 +164,7 @@ uint32_t SBCompileUnit::FindSupportFileIndex(uint32_t start_idx,
 }
 
 lldb::LanguageType SBCompileUnit::GetLanguage() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::LanguageType, SBCompileUnit, GetLanguage);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr)
     return m_opaque_ptr->GetLanguage();
@@ -186,25 +172,23 @@ lldb::LanguageType SBCompileUnit::GetLanguage() {
 }
 
 bool SBCompileUnit::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCompileUnit, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBCompileUnit::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBCompileUnit, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_ptr != nullptr;
 }
 
 bool SBCompileUnit::operator==(const SBCompileUnit &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBCompileUnit, operator==,(const lldb::SBCompileUnit &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_ptr == rhs.m_opaque_ptr;
 }
 
 bool SBCompileUnit::operator!=(const SBCompileUnit &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBCompileUnit, operator!=,(const lldb::SBCompileUnit &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_ptr != rhs.m_opaque_ptr;
 }
@@ -224,8 +208,7 @@ void SBCompileUnit::reset(lldb_private::CompileUnit *lldb_object_ptr) {
 }
 
 bool SBCompileUnit::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBCompileUnit, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
diff --git a/lldb/source/API/SBData.cpp b/lldb/source/API/SBData.cpp
index e58f032a4f9cb..5232bdde1dedd 100644
--- a/lldb/source/API/SBData.cpp
+++ b/lldb/source/API/SBData.cpp
@@ -7,9 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBData.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBError.h"
 #include "lldb/API/SBStream.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/Core/DumpDataExtractor.h"
 #include "lldb/Utility/DataBufferHeap.h"
@@ -23,18 +23,17 @@ using namespace lldb;
 using namespace lldb_private;
 
 SBData::SBData() : m_opaque_sp(new DataExtractor()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBData);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBData::SBData(const lldb::DataExtractorSP &data_sp) : m_opaque_sp(data_sp) {}
 
 SBData::SBData(const SBData &rhs) : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBData, (const lldb::SBData &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 const SBData &SBData::operator=(const SBData &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBData &,
-                     SBData, operator=,(const lldb::SBData &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_sp = rhs.m_opaque_sp;
@@ -58,17 +57,17 @@ lldb::DataExtractorSP &SBData::operator*() { return m_opaque_sp; }
 const lldb::DataExtractorSP &SBData::operator*() const { return m_opaque_sp; }
 
 bool SBData::IsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBData, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBData::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBData, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get() != nullptr;
 }
 
 uint8_t SBData::GetAddressByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint8_t, SBData, GetAddressByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   uint8_t value = 0;
   if (m_opaque_sp.get())
@@ -77,22 +76,21 @@ uint8_t SBData::GetAddressByteSize() {
 }
 
 void SBData::SetAddressByteSize(uint8_t addr_byte_size) {
-  LLDB_RECORD_METHOD(void, SBData, SetAddressByteSize, (uint8_t),
-                     addr_byte_size);
+  LLDB_INSTRUMENT_VA(this, addr_byte_size);
 
   if (m_opaque_sp.get())
     m_opaque_sp->SetAddressByteSize(addr_byte_size);
 }
 
 void SBData::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBData, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp.get())
     m_opaque_sp->Clear();
 }
 
 size_t SBData::GetByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBData, GetByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   size_t value = 0;
   if (m_opaque_sp.get())
@@ -101,7 +99,7 @@ size_t SBData::GetByteSize() {
 }
 
 lldb::ByteOrder SBData::GetByteOrder() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::ByteOrder, SBData, GetByteOrder);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::ByteOrder value = eByteOrderInvalid;
   if (m_opaque_sp.get())
@@ -110,15 +108,14 @@ lldb::ByteOrder SBData::GetByteOrder() {
 }
 
 void SBData::SetByteOrder(lldb::ByteOrder endian) {
-  LLDB_RECORD_METHOD(void, SBData, SetByteOrder, (lldb::ByteOrder), endian);
+  LLDB_INSTRUMENT_VA(this, endian);
 
   if (m_opaque_sp.get())
     m_opaque_sp->SetByteOrder(endian);
 }
 
 float SBData::GetFloat(lldb::SBError &error, lldb::offset_t offset) {
-  LLDB_RECORD_METHOD(float, SBData, GetFloat, (lldb::SBError &, lldb::offset_t),
-                     error, offset);
+  LLDB_INSTRUMENT_VA(this, error, offset);
 
   float value = 0;
   if (!m_opaque_sp.get()) {
@@ -133,8 +130,7 @@ float SBData::GetFloat(lldb::SBError &error, lldb::offset_t offset) {
 }
 
 double SBData::GetDouble(lldb::SBError &error, lldb::offset_t offset) {
-  LLDB_RECORD_METHOD(double, SBData, GetDouble,
-                     (lldb::SBError &, lldb::offset_t), error, offset);
+  LLDB_INSTRUMENT_VA(this, error, offset);
 
   double value = 0;
   if (!m_opaque_sp.get()) {
@@ -149,8 +145,7 @@ double SBData::GetDouble(lldb::SBError &error, lldb::offset_t offset) {
 }
 
 long double SBData::GetLongDouble(lldb::SBError &error, lldb::offset_t offset) {
-  LLDB_RECORD_METHOD(long double, SBData, GetLongDouble,
-                     (lldb::SBError &, lldb::offset_t), error, offset);
+  LLDB_INSTRUMENT_VA(this, error, offset);
 
   long double value = 0;
   if (!m_opaque_sp.get()) {
@@ -165,8 +160,7 @@ long double SBData::GetLongDouble(lldb::SBError &error, lldb::offset_t offset) {
 }
 
 lldb::addr_t SBData::GetAddress(lldb::SBError &error, lldb::offset_t offset) {
-  LLDB_RECORD_METHOD(lldb::addr_t, SBData, GetAddress,
-                     (lldb::SBError &, lldb::offset_t), error, offset);
+  LLDB_INSTRUMENT_VA(this, error, offset);
 
   lldb::addr_t value = 0;
   if (!m_opaque_sp.get()) {
@@ -181,8 +175,7 @@ lldb::addr_t SBData::GetAddress(lldb::SBError &error, lldb::offset_t offset) {
 }
 
 uint8_t SBData::GetUnsignedInt8(lldb::SBError &error, lldb::offset_t offset) {
-  LLDB_RECORD_METHOD(uint8_t, SBData, GetUnsignedInt8,
-                     (lldb::SBError &, lldb::offset_t), error, offset);
+  LLDB_INSTRUMENT_VA(this, error, offset);
 
   uint8_t value = 0;
   if (!m_opaque_sp.get()) {
@@ -197,8 +190,7 @@ uint8_t SBData::GetUnsignedInt8(lldb::SBError &error, lldb::offset_t offset) {
 }
 
 uint16_t SBData::GetUnsignedInt16(lldb::SBError &error, lldb::offset_t offset) {
-  LLDB_RECORD_METHOD(uint16_t, SBData, GetUnsignedInt16,
-                     (lldb::SBError &, lldb::offset_t), error, offset);
+  LLDB_INSTRUMENT_VA(this, error, offset);
 
   uint16_t value = 0;
   if (!m_opaque_sp.get()) {
@@ -213,8 +205,7 @@ uint16_t SBData::GetUnsignedInt16(lldb::SBError &error, lldb::offset_t offset) {
 }
 
 uint32_t SBData::GetUnsignedInt32(lldb::SBError &error, lldb::offset_t offset) {
-  LLDB_RECORD_METHOD(uint32_t, SBData, GetUnsignedInt32,
-                     (lldb::SBError &, lldb::offset_t), error, offset);
+  LLDB_INSTRUMENT_VA(this, error, offset);
 
   uint32_t value = 0;
   if (!m_opaque_sp.get()) {
@@ -229,8 +220,7 @@ uint32_t SBData::GetUnsignedInt32(lldb::SBError &error, lldb::offset_t offset) {
 }
 
 uint64_t SBData::GetUnsignedInt64(lldb::SBError &error, lldb::offset_t offset) {
-  LLDB_RECORD_METHOD(uint64_t, SBData, GetUnsignedInt64,
-                     (lldb::SBError &, lldb::offset_t), error, offset);
+  LLDB_INSTRUMENT_VA(this, error, offset);
 
   uint64_t value = 0;
   if (!m_opaque_sp.get()) {
@@ -245,8 +235,7 @@ uint64_t SBData::GetUnsignedInt64(lldb::SBError &error, lldb::offset_t offset) {
 }
 
 int8_t SBData::GetSignedInt8(lldb::SBError &error, lldb::offset_t offset) {
-  LLDB_RECORD_METHOD(int8_t, SBData, GetSignedInt8,
-                     (lldb::SBError &, lldb::offset_t), error, offset);
+  LLDB_INSTRUMENT_VA(this, error, offset);
 
   int8_t value = 0;
   if (!m_opaque_sp.get()) {
@@ -261,8 +250,7 @@ int8_t SBData::GetSignedInt8(lldb::SBError &error, lldb::offset_t offset) {
 }
 
 int16_t SBData::GetSignedInt16(lldb::SBError &error, lldb::offset_t offset) {
-  LLDB_RECORD_METHOD(int16_t, SBData, GetSignedInt16,
-                     (lldb::SBError &, lldb::offset_t), error, offset);
+  LLDB_INSTRUMENT_VA(this, error, offset);
 
   int16_t value = 0;
   if (!m_opaque_sp.get()) {
@@ -277,8 +265,7 @@ int16_t SBData::GetSignedInt16(lldb::SBError &error, lldb::offset_t offset) {
 }
 
 int32_t SBData::GetSignedInt32(lldb::SBError &error, lldb::offset_t offset) {
-  LLDB_RECORD_METHOD(int32_t, SBData, GetSignedInt32,
-                     (lldb::SBError &, lldb::offset_t), error, offset);
+  LLDB_INSTRUMENT_VA(this, error, offset);
 
   int32_t value = 0;
   if (!m_opaque_sp.get()) {
@@ -293,8 +280,7 @@ int32_t SBData::GetSignedInt32(lldb::SBError &error, lldb::offset_t offset) {
 }
 
 int64_t SBData::GetSignedInt64(lldb::SBError &error, lldb::offset_t offset) {
-  LLDB_RECORD_METHOD(int64_t, SBData, GetSignedInt64,
-                     (lldb::SBError &, lldb::offset_t), error, offset);
+  LLDB_INSTRUMENT_VA(this, error, offset);
 
   int64_t value = 0;
   if (!m_opaque_sp.get()) {
@@ -309,8 +295,7 @@ int64_t SBData::GetSignedInt64(lldb::SBError &error, lldb::offset_t offset) {
 }
 
 const char *SBData::GetString(lldb::SBError &error, lldb::offset_t offset) {
-  LLDB_RECORD_METHOD(const char *, SBData, GetString,
-                     (lldb::SBError &, lldb::offset_t), error, offset);
+  LLDB_INSTRUMENT_VA(this, error, offset);
 
   const char *value = nullptr;
   if (!m_opaque_sp.get()) {
@@ -326,8 +311,7 @@ const char *SBData::GetString(lldb::SBError &error, lldb::offset_t offset) {
 
 bool SBData::GetDescription(lldb::SBStream &description,
                             lldb::addr_t base_addr) {
-  LLDB_RECORD_METHOD(bool, SBData, GetDescription,
-                     (lldb::SBStream &, lldb::addr_t), description, base_addr);
+  LLDB_INSTRUMENT_VA(this, description, base_addr);
 
   Stream &strm = description.ref();
 
@@ -342,9 +326,7 @@ bool SBData::GetDescription(lldb::SBStream &description,
 
 size_t SBData::ReadRawData(lldb::SBError &error, lldb::offset_t offset,
                            void *buf, size_t size) {
-  LLDB_RECORD_METHOD(size_t, SBData, ReadRawData,
-                     (lldb::SBError &, lldb::offset_t, void *, size_t), error,
-                     offset, buf, size);
+  LLDB_INSTRUMENT_VA(this, error, offset, buf, size);
 
   void *ok = nullptr;
   if (!m_opaque_sp.get()) {
@@ -360,10 +342,7 @@ size_t SBData::ReadRawData(lldb::SBError &error, lldb::offset_t offset,
 
 void SBData::SetData(lldb::SBError &error, const void *buf, size_t size,
                      lldb::ByteOrder endian, uint8_t addr_size) {
-  LLDB_RECORD_METHOD(
-      void, SBData, SetData,
-      (lldb::SBError &, const void *, size_t, lldb::ByteOrder, uint8_t), error,
-      buf, size, endian, addr_size);
+  LLDB_INSTRUMENT_VA(this, error, buf, size, endian, addr_size);
 
   if (!m_opaque_sp.get())
     m_opaque_sp = std::make_shared<DataExtractor>(buf, size, endian, addr_size);
@@ -377,10 +356,7 @@ void SBData::SetData(lldb::SBError &error, const void *buf, size_t size,
 void SBData::SetDataWithOwnership(lldb::SBError &error, const void *buf,
                                   size_t size, lldb::ByteOrder endian,
                                   uint8_t addr_size) {
-  LLDB_RECORD_METHOD(
-      void, SBData, SetData,
-      (lldb::SBError &, const void *, size_t, lldb::ByteOrder, uint8_t, bool),
-      error, buf, size, endian, addr_size);
+  LLDB_INSTRUMENT_VA(this, error, buf, size, endian, addr_size);
 
   lldb::DataBufferSP buffer_sp = std::make_shared<DataBufferHeap>(buf, size);
 
@@ -394,7 +370,7 @@ void SBData::SetDataWithOwnership(lldb::SBError &error, const void *buf,
 }
 
 bool SBData::Append(const SBData &rhs) {
-  LLDB_RECORD_METHOD(bool, SBData, Append, (const lldb::SBData &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   bool value = false;
   if (m_opaque_sp.get() && rhs.m_opaque_sp.get())
@@ -405,9 +381,7 @@ bool SBData::Append(const SBData &rhs) {
 lldb::SBData SBData::CreateDataFromCString(lldb::ByteOrder endian,
                                            uint32_t addr_byte_size,
                                            const char *data) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBData, SBData, CreateDataFromCString,
-                            (lldb::ByteOrder, uint32_t, const char *), endian,
-                            addr_byte_size, data);
+  LLDB_INSTRUMENT_VA(endian, addr_byte_size, data);
 
   if (!data || !data[0])
     return SBData();
@@ -427,9 +401,7 @@ lldb::SBData SBData::CreateDataFromUInt64Array(lldb::ByteOrder endian,
                                                uint32_t addr_byte_size,
                                                uint64_t *array,
                                                size_t array_len) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBData, SBData, CreateDataFromUInt64Array,
-                            (lldb::ByteOrder, uint32_t, uint64_t *, size_t),
-                            endian, addr_byte_size, array, array_len);
+  LLDB_INSTRUMENT_VA(endian, addr_byte_size, array, array_len);
 
   if (!array || array_len == 0)
     return SBData();
@@ -449,9 +421,7 @@ lldb::SBData SBData::CreateDataFromUInt32Array(lldb::ByteOrder endian,
                                                uint32_t addr_byte_size,
                                                uint32_t *array,
                                                size_t array_len) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBData, SBData, CreateDataFromUInt32Array,
-                            (lldb::ByteOrder, uint32_t, uint32_t *, size_t),
-                            endian, addr_byte_size, array, array_len);
+  LLDB_INSTRUMENT_VA(endian, addr_byte_size, array, array_len);
 
   if (!array || array_len == 0)
     return SBData();
@@ -471,9 +441,7 @@ lldb::SBData SBData::CreateDataFromSInt64Array(lldb::ByteOrder endian,
                                                uint32_t addr_byte_size,
                                                int64_t *array,
                                                size_t array_len) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBData, SBData, CreateDataFromSInt64Array,
-                            (lldb::ByteOrder, uint32_t, int64_t *, size_t),
-                            endian, addr_byte_size, array, array_len);
+  LLDB_INSTRUMENT_VA(endian, addr_byte_size, array, array_len);
 
   if (!array || array_len == 0)
     return SBData();
@@ -493,9 +461,7 @@ lldb::SBData SBData::CreateDataFromSInt32Array(lldb::ByteOrder endian,
                                                uint32_t addr_byte_size,
                                                int32_t *array,
                                                size_t array_len) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBData, SBData, CreateDataFromSInt32Array,
-                            (lldb::ByteOrder, uint32_t, int32_t *, size_t),
-                            endian, addr_byte_size, array, array_len);
+  LLDB_INSTRUMENT_VA(endian, addr_byte_size, array, array_len);
 
   if (!array || array_len == 0)
     return SBData();
@@ -515,9 +481,7 @@ lldb::SBData SBData::CreateDataFromDoubleArray(lldb::ByteOrder endian,
                                                uint32_t addr_byte_size,
                                                double *array,
                                                size_t array_len) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBData, SBData, CreateDataFromDoubleArray,
-                            (lldb::ByteOrder, uint32_t, double *, size_t),
-                            endian, addr_byte_size, array, array_len);
+  LLDB_INSTRUMENT_VA(endian, addr_byte_size, array, array_len);
 
   if (!array || array_len == 0)
     return SBData();
@@ -534,8 +498,7 @@ lldb::SBData SBData::CreateDataFromDoubleArray(lldb::ByteOrder endian,
 }
 
 bool SBData::SetDataFromCString(const char *data) {
-  LLDB_RECORD_METHOD(bool, SBData, SetDataFromCString, (const char *), data);
-
+  LLDB_INSTRUMENT_VA(this, data);
 
   if (!data) {
     return false;
@@ -556,9 +519,7 @@ bool SBData::SetDataFromCString(const char *data) {
 }
 
 bool SBData::SetDataFromUInt64Array(uint64_t *array, size_t array_len) {
-  LLDB_RECORD_METHOD(bool, SBData, SetDataFromUInt64Array, (uint64_t *, size_t),
-                     array, array_len);
-
+  LLDB_INSTRUMENT_VA(this, array, array_len);
 
   if (!array || array_len == 0) {
     return false;
@@ -579,9 +540,7 @@ bool SBData::SetDataFromUInt64Array(uint64_t *array, size_t array_len) {
 }
 
 bool SBData::SetDataFromUInt32Array(uint32_t *array, size_t array_len) {
-  LLDB_RECORD_METHOD(bool, SBData, SetDataFromUInt32Array, (uint32_t *, size_t),
-                     array, array_len);
-
+  LLDB_INSTRUMENT_VA(this, array, array_len);
 
   if (!array || array_len == 0) {
     return false;
@@ -601,9 +560,7 @@ bool SBData::SetDataFromUInt32Array(uint32_t *array, size_t array_len) {
 }
 
 bool SBData::SetDataFromSInt64Array(int64_t *array, size_t array_len) {
-  LLDB_RECORD_METHOD(bool, SBData, SetDataFromSInt64Array, (int64_t *, size_t),
-                     array, array_len);
-
+  LLDB_INSTRUMENT_VA(this, array, array_len);
 
   if (!array || array_len == 0) {
     return false;
@@ -623,9 +580,7 @@ bool SBData::SetDataFromSInt64Array(int64_t *array, size_t array_len) {
 }
 
 bool SBData::SetDataFromSInt32Array(int32_t *array, size_t array_len) {
-  LLDB_RECORD_METHOD(bool, SBData, SetDataFromSInt32Array, (int32_t *, size_t),
-                     array, array_len);
-
+  LLDB_INSTRUMENT_VA(this, array, array_len);
 
   if (!array || array_len == 0) {
     return false;
@@ -645,9 +600,7 @@ bool SBData::SetDataFromSInt32Array(int32_t *array, size_t array_len) {
 }
 
 bool SBData::SetDataFromDoubleArray(double *array, size_t array_len) {
-  LLDB_RECORD_METHOD(bool, SBData, SetDataFromDoubleArray, (double *, size_t),
-                     array, array_len);
-
+  LLDB_INSTRUMENT_VA(this, array, array_len);
 
   if (!array || array_len == 0) {
     return false;
diff --git a/lldb/source/API/SBDebugger.cpp b/lldb/source/API/SBDebugger.cpp
index e8d723810fb4b..8b09d6a8e435c 100644
--- a/lldb/source/API/SBDebugger.cpp
+++ b/lldb/source/API/SBDebugger.cpp
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "SystemInitializerFull.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBDebugger.h"
 
@@ -105,43 +105,34 @@ SBError SBInputReader::Initialize(
                               unsigned long),
     void *a, lldb::InputReaderGranularity b, char const *c, char const *d,
     bool e) {
-  LLDB_RECORD_METHOD(
-      lldb::SBError, SBInputReader, Initialize,
-      (lldb::SBDebugger &,
-       unsigned long (*)(void *, lldb::SBInputReader *, lldb::InputReaderAction,
-                         const char *, unsigned long),
-       void *, lldb::InputReaderGranularity, const char *, const char *, bool),
-      sb_debugger, callback, a, b, c, d, e);
+  LLDB_INSTRUMENT_VA(this, sb_debugger, callback, a, b, c, d, e);
 
   return SBError();
 }
 
-void SBInputReader::SetIsDone(bool b) {
-  LLDB_RECORD_METHOD(void, SBInputReader, SetIsDone, (bool), b);
-}
+void SBInputReader::SetIsDone(bool b) { LLDB_INSTRUMENT_VA(this, b); }
 
 bool SBInputReader::IsActive() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBInputReader, IsActive);
+  LLDB_INSTRUMENT_VA(this);
 
   return false;
 }
 
-SBDebugger::SBDebugger() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBDebugger); }
+SBDebugger::SBDebugger() { LLDB_INSTRUMENT_VA(this); }
 
 SBDebugger::SBDebugger(const lldb::DebuggerSP &debugger_sp)
     : m_opaque_sp(debugger_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBDebugger, (const lldb::DebuggerSP &), debugger_sp);
+  LLDB_INSTRUMENT_VA(this, debugger_sp);
 }
 
 SBDebugger::SBDebugger(const SBDebugger &rhs) : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBDebugger, (const lldb::SBDebugger &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBDebugger::~SBDebugger() = default;
 
 SBDebugger &SBDebugger::operator=(const SBDebugger &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBDebugger &,
-                     SBDebugger, operator=,(const lldb::SBDebugger &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_opaque_sp = rhs.m_opaque_sp;
@@ -150,8 +141,7 @@ SBDebugger &SBDebugger::operator=(const SBDebugger &rhs) {
 }
 
 const char *SBDebugger::GetBroadcasterClass() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBDebugger,
-                                    GetBroadcasterClass);
+  LLDB_INSTRUMENT();
 
   return Debugger::GetStaticBroadcasterClass().AsCString();
 }
@@ -161,6 +151,8 @@ const char *SBDebugger::GetProgressFromEvent(const lldb::SBEvent &event,
                                              uint64_t &completed,
                                              uint64_t &total,
                                              bool &is_debugger_specific) {
+  LLDB_INSTRUMENT_VA(event, progress_id, completed, total,
+                     is_debugger_specific);
   const Debugger::ProgressEventData *progress_data =
       Debugger::ProgressEventData::GetEventDataFromEvent(event.get());
   if (progress_data == nullptr)
@@ -169,29 +161,22 @@ const char *SBDebugger::GetProgressFromEvent(const lldb::SBEvent &event,
   completed = progress_data->GetCompleted();
   total = progress_data->GetTotal();
   is_debugger_specific = progress_data->IsDebuggerSpecific();
-  // We must record the static method _after_ the out parameters have been
-  // filled in.
-  LLDB_RECORD_STATIC_METHOD(
-      const char *, SBDebugger, GetProgressFromEvent,
-      (const lldb::SBEvent &, uint64_t &, uint64_t &, uint64_t &, bool &),
-      event, progress_id, completed, total, is_debugger_specific);
   return progress_data->GetMessage().c_str();
 }
 
 SBBroadcaster SBDebugger::GetBroadcaster() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBroadcaster, SBDebugger, GetBroadcaster);
+  LLDB_INSTRUMENT_VA(this);
   SBBroadcaster broadcaster(&m_opaque_sp->GetBroadcaster(), false);
   return broadcaster;
 }
 
 void SBDebugger::Initialize() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(void, SBDebugger, Initialize);
+  LLDB_INSTRUMENT();
   SBError ignored = SBDebugger::InitializeWithErrorHandling();
 }
 
 lldb::SBError SBDebugger::InitializeWithErrorHandling() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBError, SBDebugger,
-                                    InitializeWithErrorHandling);
+  LLDB_INSTRUMENT();
 
   SBError error;
   if (auto e = g_debugger_lifetime->Initialize(
@@ -202,13 +187,13 @@ lldb::SBError SBDebugger::InitializeWithErrorHandling() {
 }
 
 void SBDebugger::Terminate() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(void, SBDebugger, Terminate);
+  LLDB_INSTRUMENT();
 
   g_debugger_lifetime->Terminate();
 }
 
 void SBDebugger::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBDebugger, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp)
     m_opaque_sp->ClearIOHandlers();
@@ -217,14 +202,13 @@ void SBDebugger::Clear() {
 }
 
 SBDebugger SBDebugger::Create() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBDebugger, SBDebugger, Create);
+  LLDB_INSTRUMENT();
 
   return SBDebugger::Create(false, nullptr, nullptr);
 }
 
 SBDebugger SBDebugger::Create(bool source_init_files) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBDebugger, SBDebugger, Create, (bool),
-                            source_init_files);
+  LLDB_INSTRUMENT_VA(source_init_files);
 
   return SBDebugger::Create(source_init_files, nullptr, nullptr);
 }
@@ -233,9 +217,7 @@ SBDebugger SBDebugger::Create(bool source_init_files,
                               lldb::LogOutputCallback callback, void *baton)
 
 {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBDebugger, SBDebugger, Create,
-                            (bool, lldb::LogOutputCallback, void *),
-                            source_init_files, callback, baton);
+  LLDB_INSTRUMENT_VA(source_init_files, callback, baton);
 
   SBDebugger debugger;
 
@@ -263,8 +245,7 @@ SBDebugger SBDebugger::Create(bool source_init_files,
 }
 
 void SBDebugger::Destroy(SBDebugger &debugger) {
-  LLDB_RECORD_STATIC_METHOD(void, SBDebugger, Destroy, (lldb::SBDebugger &),
-                            debugger);
+  LLDB_INSTRUMENT_VA(debugger);
 
   Debugger::Destroy(debugger.m_opaque_sp);
 
@@ -273,7 +254,7 @@ void SBDebugger::Destroy(SBDebugger &debugger) {
 }
 
 void SBDebugger::MemoryPressureDetected() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(void, SBDebugger, MemoryPressureDetected);
+  LLDB_INSTRUMENT();
 
   // Since this function can be call asynchronously, we allow it to be non-
   // mandatory. We have seen deadlocks with this function when called so we
@@ -286,52 +267,51 @@ void SBDebugger::MemoryPressureDetected() {
 }
 
 bool SBDebugger::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDebugger, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBDebugger::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDebugger, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get() != nullptr;
 }
 
 void SBDebugger::SetAsync(bool b) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SetAsync, (bool), b);
+  LLDB_INSTRUMENT_VA(this, b);
 
   if (m_opaque_sp)
     m_opaque_sp->SetAsyncExecution(b);
 }
 
 bool SBDebugger::GetAsync() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBDebugger, GetAsync);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_sp ? m_opaque_sp->GetAsyncExecution() : false);
 }
 
 void SBDebugger::SkipLLDBInitFiles(bool b) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SkipLLDBInitFiles, (bool), b);
+  LLDB_INSTRUMENT_VA(this, b);
 
   if (m_opaque_sp)
     m_opaque_sp->GetCommandInterpreter().SkipLLDBInitFiles(b);
 }
 
 void SBDebugger::SkipAppInitFiles(bool b) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SkipAppInitFiles, (bool), b);
+  LLDB_INSTRUMENT_VA(this, b);
 
   if (m_opaque_sp)
     m_opaque_sp->GetCommandInterpreter().SkipAppInitFiles(b);
 }
 
 void SBDebugger::SetInputFileHandle(FILE *fh, bool transfer_ownership) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SetInputFileHandle, (FILE *, bool), fh,
-                     transfer_ownership);
+  LLDB_INSTRUMENT_VA(this, fh, transfer_ownership);
   if (m_opaque_sp)
     m_opaque_sp->SetInputFile(
         (FileSP)std::make_shared<NativeFile>(fh, transfer_ownership));
 }
 
 SBError SBDebugger::SetInputString(const char *data) {
-  LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputString, (const char *), data);
+  LLDB_INSTRUMENT_VA(this, data);
   SBError sb_error;
   if (data == nullptr) {
     sb_error.SetErrorString("String data is null");
@@ -357,7 +337,7 @@ SBError SBDebugger::SetInputString(const char *data) {
 // of problems; don't want users trying to switch modes in the middle of a
 // debugging session.
 SBError SBDebugger::SetInputFile(SBFile file) {
-  LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputFile, (SBFile), file);
+  LLDB_INSTRUMENT_VA(this, file);
 
   SBError error;
   if (!m_opaque_sp) {
@@ -369,23 +349,22 @@ SBError SBDebugger::SetInputFile(SBFile file) {
 }
 
 SBError SBDebugger::SetInputFile(FileSP file_sp) {
-  LLDB_RECORD_METHOD(SBError, SBDebugger, SetInputFile, (FileSP), file_sp);
+  LLDB_INSTRUMENT_VA(this, file_sp);
   return SetInputFile(SBFile(file_sp));
 }
 
 SBError SBDebugger::SetOutputFile(FileSP file_sp) {
-  LLDB_RECORD_METHOD(SBError, SBDebugger, SetOutputFile, (FileSP), file_sp);
+  LLDB_INSTRUMENT_VA(this, file_sp);
   return SetOutputFile(SBFile(file_sp));
 }
 
 void SBDebugger::SetOutputFileHandle(FILE *fh, bool transfer_ownership) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SetOutputFileHandle, (FILE *, bool), fh,
-                     transfer_ownership);
+  LLDB_INSTRUMENT_VA(this, fh, transfer_ownership);
   SetOutputFile((FileSP)std::make_shared<NativeFile>(fh, transfer_ownership));
 }
 
 SBError SBDebugger::SetOutputFile(SBFile file) {
-  LLDB_RECORD_METHOD(SBError, SBDebugger, SetOutputFile, (SBFile file), file);
+  LLDB_INSTRUMENT_VA(this, file);
   SBError error;
   if (!m_opaque_sp) {
     error.ref().SetErrorString("invalid debugger");
@@ -400,18 +379,17 @@ SBError SBDebugger::SetOutputFile(SBFile file) {
 }
 
 void SBDebugger::SetErrorFileHandle(FILE *fh, bool transfer_ownership) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SetErrorFileHandle, (FILE *, bool), fh,
-                     transfer_ownership);
+  LLDB_INSTRUMENT_VA(this, fh, transfer_ownership);
   SetErrorFile((FileSP)std::make_shared<NativeFile>(fh, transfer_ownership));
 }
 
 SBError SBDebugger::SetErrorFile(FileSP file_sp) {
-  LLDB_RECORD_METHOD(SBError, SBDebugger, SetErrorFile, (FileSP), file_sp);
+  LLDB_INSTRUMENT_VA(this, file_sp);
   return SetErrorFile(SBFile(file_sp));
 }
 
 SBError SBDebugger::SetErrorFile(SBFile file) {
-  LLDB_RECORD_METHOD(SBError, SBDebugger, SetErrorFile, (SBFile file), file);
+  LLDB_INSTRUMENT_VA(this, file);
   SBError error;
   if (!m_opaque_sp) {
     error.ref().SetErrorString("invalid debugger");
@@ -426,7 +404,7 @@ SBError SBDebugger::SetErrorFile(SBFile file) {
 }
 
 FILE *SBDebugger::GetInputFileHandle() {
-  LLDB_RECORD_METHOD_NO_ARGS(FILE *, SBDebugger, GetInputFileHandle);
+  LLDB_INSTRUMENT_VA(this);
   if (m_opaque_sp) {
     File &file_sp = m_opaque_sp->GetInputFile();
     return file_sp.GetStream();
@@ -435,7 +413,7 @@ FILE *SBDebugger::GetInputFileHandle() {
 }
 
 SBFile SBDebugger::GetInputFile() {
-  LLDB_RECORD_METHOD_NO_ARGS(SBFile, SBDebugger, GetInputFile);
+  LLDB_INSTRUMENT_VA(this);
   if (m_opaque_sp) {
     return SBFile(m_opaque_sp->GetInputFileSP());
   }
@@ -443,7 +421,7 @@ SBFile SBDebugger::GetInputFile() {
 }
 
 FILE *SBDebugger::GetOutputFileHandle() {
-  LLDB_RECORD_METHOD_NO_ARGS(FILE *, SBDebugger, GetOutputFileHandle);
+  LLDB_INSTRUMENT_VA(this);
   if (m_opaque_sp) {
     StreamFile &stream_file = m_opaque_sp->GetOutputStream();
     return stream_file.GetFile().GetStream();
@@ -452,7 +430,7 @@ FILE *SBDebugger::GetOutputFileHandle() {
 }
 
 SBFile SBDebugger::GetOutputFile() {
-  LLDB_RECORD_METHOD_NO_ARGS(SBFile, SBDebugger, GetOutputFile);
+  LLDB_INSTRUMENT_VA(this);
   if (m_opaque_sp) {
     SBFile file(m_opaque_sp->GetOutputStream().GetFileSP());
     return file;
@@ -461,7 +439,7 @@ SBFile SBDebugger::GetOutputFile() {
 }
 
 FILE *SBDebugger::GetErrorFileHandle() {
-  LLDB_RECORD_METHOD_NO_ARGS(FILE *, SBDebugger, GetErrorFileHandle);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp) {
     StreamFile &stream_file = m_opaque_sp->GetErrorStream();
@@ -471,7 +449,7 @@ FILE *SBDebugger::GetErrorFileHandle() {
 }
 
 SBFile SBDebugger::GetErrorFile() {
-  LLDB_RECORD_METHOD_NO_ARGS(SBFile, SBDebugger, GetErrorFile);
+  LLDB_INSTRUMENT_VA(this);
   SBFile file;
   if (m_opaque_sp) {
     SBFile file(m_opaque_sp->GetErrorStream().GetFileSP());
@@ -481,21 +459,20 @@ SBFile SBDebugger::GetErrorFile() {
 }
 
 void SBDebugger::SaveInputTerminalState() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBDebugger, SaveInputTerminalState);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp)
     m_opaque_sp->SaveInputTerminalState();
 }
 
 void SBDebugger::RestoreInputTerminalState() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBDebugger, RestoreInputTerminalState);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp)
     m_opaque_sp->RestoreInputTerminalState();
 }
 SBCommandInterpreter SBDebugger::GetCommandInterpreter() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBCommandInterpreter, SBDebugger,
-                             GetCommandInterpreter);
+  LLDB_INSTRUMENT_VA(this);
 
   SBCommandInterpreter sb_interpreter;
   if (m_opaque_sp)
@@ -505,7 +482,7 @@ SBCommandInterpreter SBDebugger::GetCommandInterpreter() {
 }
 
 void SBDebugger::HandleCommand(const char *command) {
-  LLDB_RECORD_METHOD(void, SBDebugger, HandleCommand, (const char *), command);
+  LLDB_INSTRUMENT_VA(this, command);
 
   if (m_opaque_sp) {
     TargetSP target_sp(m_opaque_sp->GetSelectedTarget());
@@ -538,7 +515,7 @@ void SBDebugger::HandleCommand(const char *command) {
 }
 
 SBListener SBDebugger::GetListener() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBListener, SBDebugger, GetListener);
+  LLDB_INSTRUMENT_VA(this);
 
   SBListener sb_listener;
   if (m_opaque_sp)
@@ -550,10 +527,7 @@ SBListener SBDebugger::GetListener() {
 void SBDebugger::HandleProcessEvent(const SBProcess &process,
                                     const SBEvent &event, SBFile out,
                                     SBFile err) {
-  LLDB_RECORD_METHOD(
-      void, SBDebugger, HandleProcessEvent,
-      (const lldb::SBProcess &, const lldb::SBEvent &, SBFile, SBFile), process,
-      event, out, err);
+  LLDB_INSTRUMENT_VA(this, process, event, out, err);
 
   return HandleProcessEvent(process, event, out.m_opaque_sp, err.m_opaque_sp);
 }
@@ -561,10 +535,7 @@ void SBDebugger::HandleProcessEvent(const SBProcess &process,
 void SBDebugger::HandleProcessEvent(const SBProcess &process,
                                     const SBEvent &event, FILE *out,
                                     FILE *err) {
-  LLDB_RECORD_METHOD(
-      void, SBDebugger, HandleProcessEvent,
-      (const lldb::SBProcess &, const lldb::SBEvent &, FILE *, FILE *), process,
-      event, out, err);
+  LLDB_INSTRUMENT_VA(this, process, event, out, err);
 
   FileSP outfile = std::make_shared<NativeFile>(out, false);
   FileSP errfile = std::make_shared<NativeFile>(err, false);
@@ -575,10 +546,7 @@ void SBDebugger::HandleProcessEvent(const SBProcess &process,
                                     const SBEvent &event, FileSP out_sp,
                                     FileSP err_sp) {
 
-  LLDB_RECORD_METHOD(
-      void, SBDebugger, HandleProcessEvent,
-      (const lldb::SBProcess &, const lldb::SBEvent &, FileSP, FileSP), process,
-      event, out_sp, err_sp);
+  LLDB_INSTRUMENT_VA(this, process, event, out_sp, err_sp);
 
   if (!process.IsValid())
     return;
@@ -622,16 +590,14 @@ void SBDebugger::HandleProcessEvent(const SBProcess &process,
 }
 
 SBSourceManager SBDebugger::GetSourceManager() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBSourceManager, SBDebugger,
-                             GetSourceManager);
+  LLDB_INSTRUMENT_VA(this);
 
   SBSourceManager sb_source_manager(*this);
   return sb_source_manager;
 }
 
 bool SBDebugger::GetDefaultArchitecture(char *arch_name, size_t arch_name_len) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBDebugger, GetDefaultArchitecture,
-                            (char *, size_t), arch_name, "", arch_name_len);
+  LLDB_INSTRUMENT_VA(arch_name, arch_name_len);
 
   if (arch_name && arch_name_len) {
     ArchSpec default_arch = Target::GetDefaultArchitecture();
@@ -652,8 +618,7 @@ bool SBDebugger::GetDefaultArchitecture(char *arch_name, size_t arch_name_len) {
 }
 
 bool SBDebugger::SetDefaultArchitecture(const char *arch_name) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBDebugger, SetDefaultArchitecture,
-                            (const char *), arch_name);
+  LLDB_INSTRUMENT_VA(arch_name);
 
   if (arch_name) {
     ArchSpec arch(arch_name);
@@ -667,8 +632,7 @@ bool SBDebugger::SetDefaultArchitecture(const char *arch_name) {
 
 ScriptLanguage
 SBDebugger::GetScriptingLanguage(const char *script_language_name) {
-  LLDB_RECORD_METHOD(lldb::ScriptLanguage, SBDebugger, GetScriptingLanguage,
-                     (const char *), script_language_name);
+  LLDB_INSTRUMENT_VA(this, script_language_name);
 
   if (!script_language_name)
     return eScriptLanguageDefault;
@@ -678,8 +642,7 @@ SBDebugger::GetScriptingLanguage(const char *script_language_name) {
 
 SBStructuredData
 SBDebugger::GetScriptInterpreterInfo(lldb::ScriptLanguage language) {
-  LLDB_RECORD_METHOD(SBStructuredData, SBDebugger, GetScriptInterpreterInfo,
-                     (lldb::ScriptLanguage), language);
+  LLDB_INSTRUMENT_VA(this, language);
   SBStructuredData data;
   if (m_opaque_sp) {
     lldb_private::ScriptInterpreter *interp =
@@ -692,14 +655,13 @@ SBDebugger::GetScriptInterpreterInfo(lldb::ScriptLanguage language) {
 }
 
 const char *SBDebugger::GetVersionString() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBDebugger, GetVersionString);
+  LLDB_INSTRUMENT();
 
   return lldb_private::GetVersion();
 }
 
 const char *SBDebugger::StateAsCString(StateType state) {
-  LLDB_RECORD_STATIC_METHOD(const char *, SBDebugger, StateAsCString,
-                            (lldb::StateType), state);
+  LLDB_INSTRUMENT_VA(state);
 
   return lldb_private::StateAsCString(state);
 }
@@ -725,8 +687,7 @@ static void AddLLVMTargets(StructuredData::Dictionary &dict) {
 }
 
 SBStructuredData SBDebugger::GetBuildConfiguration() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBStructuredData, SBDebugger,
-                                    GetBuildConfiguration);
+  LLDB_INSTRUMENT();
 
   auto config_up = std::make_unique<StructuredData::Dictionary>();
   AddBoolConfigEntry(
@@ -758,8 +719,7 @@ SBStructuredData SBDebugger::GetBuildConfiguration() {
 }
 
 bool SBDebugger::StateIsRunningState(StateType state) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBDebugger, StateIsRunningState,
-                            (lldb::StateType), state);
+  LLDB_INSTRUMENT_VA(state);
 
   const bool result = lldb_private::StateIsRunningState(state);
 
@@ -767,8 +727,7 @@ bool SBDebugger::StateIsRunningState(StateType state) {
 }
 
 bool SBDebugger::StateIsStoppedState(StateType state) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBDebugger, StateIsStoppedState,
-                            (lldb::StateType), state);
+  LLDB_INSTRUMENT_VA(state);
 
   const bool result = lldb_private::StateIsStoppedState(state, false);
 
@@ -780,10 +739,8 @@ lldb::SBTarget SBDebugger::CreateTarget(const char *filename,
                                         const char *platform_name,
                                         bool add_dependent_modules,
                                         lldb::SBError &sb_error) {
-  LLDB_RECORD_METHOD(
-      lldb::SBTarget, SBDebugger, CreateTarget,
-      (const char *, const char *, const char *, bool, lldb::SBError &),
-      filename, target_triple, platform_name, add_dependent_modules, sb_error);
+  LLDB_INSTRUMENT_VA(this, filename, target_triple, platform_name,
+                     add_dependent_modules, sb_error);
 
   SBTarget sb_target;
   TargetSP target_sp;
@@ -818,9 +775,7 @@ lldb::SBTarget SBDebugger::CreateTarget(const char *filename,
 SBTarget
 SBDebugger::CreateTargetWithFileAndTargetTriple(const char *filename,
                                                 const char *target_triple) {
-  LLDB_RECORD_METHOD(lldb::SBTarget, SBDebugger,
-                     CreateTargetWithFileAndTargetTriple,
-                     (const char *, const char *), filename, target_triple);
+  LLDB_INSTRUMENT_VA(this, filename, target_triple);
 
   SBTarget sb_target;
   TargetSP target_sp;
@@ -845,8 +800,7 @@ SBDebugger::CreateTargetWithFileAndTargetTriple(const char *filename,
 
 SBTarget SBDebugger::CreateTargetWithFileAndArch(const char *filename,
                                                  const char *arch_cstr) {
-  LLDB_RECORD_METHOD(lldb::SBTarget, SBDebugger, CreateTargetWithFileAndArch,
-                     (const char *, const char *), filename, arch_cstr);
+  LLDB_INSTRUMENT_VA(this, filename, arch_cstr);
 
   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API));
 
@@ -858,16 +812,18 @@ SBTarget SBDebugger::CreateTargetWithFileAndArch(const char *filename,
       // The version of CreateTarget that takes an ArchSpec won't accept an
       // empty ArchSpec, so when the arch hasn't been specified, we need to
       // call the target triple version.
-      error = m_opaque_sp->GetTargetList().CreateTarget(*m_opaque_sp, filename, 
-          arch_cstr, eLoadDependentsYes, nullptr, target_sp);
+      error = m_opaque_sp->GetTargetList().CreateTarget(
+          *m_opaque_sp, filename, arch_cstr, eLoadDependentsYes, nullptr,
+          target_sp);
     } else {
-      PlatformSP platform_sp = m_opaque_sp->GetPlatformList()
-          .GetSelectedPlatform();
-      ArchSpec arch = Platform::GetAugmentedArchSpec(platform_sp.get(), 
-          arch_cstr);
+      PlatformSP platform_sp =
+          m_opaque_sp->GetPlatformList().GetSelectedPlatform();
+      ArchSpec arch =
+          Platform::GetAugmentedArchSpec(platform_sp.get(), arch_cstr);
       if (arch.IsValid())
-        error = m_opaque_sp->GetTargetList().CreateTarget(*m_opaque_sp, filename, 
-            arch, eLoadDependentsYes, platform_sp, target_sp);
+        error = m_opaque_sp->GetTargetList().CreateTarget(
+            *m_opaque_sp, filename, arch, eLoadDependentsYes, platform_sp,
+            target_sp);
       else
         error.SetErrorStringWithFormat("invalid arch_cstr: %s", arch_cstr);
     }
@@ -887,8 +843,7 @@ SBTarget SBDebugger::CreateTargetWithFileAndArch(const char *filename,
 }
 
 SBTarget SBDebugger::CreateTarget(const char *filename) {
-  LLDB_RECORD_METHOD(lldb::SBTarget, SBDebugger, CreateTarget, (const char *),
-                     filename);
+  LLDB_INSTRUMENT_VA(this, filename);
 
   SBTarget sb_target;
   TargetSP target_sp;
@@ -912,7 +867,7 @@ SBTarget SBDebugger::CreateTarget(const char *filename) {
 }
 
 SBTarget SBDebugger::GetDummyTarget() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTarget, SBDebugger, GetDummyTarget);
+  LLDB_INSTRUMENT_VA(this);
 
   SBTarget sb_target;
   if (m_opaque_sp) {
@@ -926,8 +881,7 @@ SBTarget SBDebugger::GetDummyTarget() {
 }
 
 bool SBDebugger::DeleteTarget(lldb::SBTarget &target) {
-  LLDB_RECORD_METHOD(bool, SBDebugger, DeleteTarget, (lldb::SBTarget &),
-                     target);
+  LLDB_INSTRUMENT_VA(this, target);
 
   bool result = false;
   if (m_opaque_sp) {
@@ -949,8 +903,7 @@ bool SBDebugger::DeleteTarget(lldb::SBTarget &target) {
 }
 
 SBTarget SBDebugger::GetTargetAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBTarget, SBDebugger, GetTargetAtIndex, (uint32_t),
-                     idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBTarget sb_target;
   if (m_opaque_sp) {
@@ -961,8 +914,7 @@ SBTarget SBDebugger::GetTargetAtIndex(uint32_t idx) {
 }
 
 uint32_t SBDebugger::GetIndexOfTarget(lldb::SBTarget target) {
-  LLDB_RECORD_METHOD(uint32_t, SBDebugger, GetIndexOfTarget, (lldb::SBTarget),
-                     target);
+  LLDB_INSTRUMENT_VA(this, target);
 
   lldb::TargetSP target_sp = target.GetSP();
   if (!target_sp)
@@ -975,8 +927,7 @@ uint32_t SBDebugger::GetIndexOfTarget(lldb::SBTarget target) {
 }
 
 SBTarget SBDebugger::FindTargetWithProcessID(lldb::pid_t pid) {
-  LLDB_RECORD_METHOD(lldb::SBTarget, SBDebugger, FindTargetWithProcessID,
-                     (lldb::pid_t), pid);
+  LLDB_INSTRUMENT_VA(this, pid);
 
   SBTarget sb_target;
   if (m_opaque_sp) {
@@ -988,8 +939,7 @@ SBTarget SBDebugger::FindTargetWithProcessID(lldb::pid_t pid) {
 
 SBTarget SBDebugger::FindTargetWithFileAndArch(const char *filename,
                                                const char *arch_name) {
-  LLDB_RECORD_METHOD(lldb::SBTarget, SBDebugger, FindTargetWithFileAndArch,
-                     (const char *, const char *), filename, arch_name);
+  LLDB_INSTRUMENT_VA(this, filename, arch_name);
 
   SBTarget sb_target;
   if (m_opaque_sp && filename && filename[0]) {
@@ -1015,7 +965,7 @@ SBTarget SBDebugger::FindTargetWithLLDBProcess(const ProcessSP &process_sp) {
 }
 
 uint32_t SBDebugger::GetNumTargets() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBDebugger, GetNumTargets);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp) {
     // No need to lock, the target list is thread safe
@@ -1025,7 +975,7 @@ uint32_t SBDebugger::GetNumTargets() {
 }
 
 SBTarget SBDebugger::GetSelectedTarget() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTarget, SBDebugger, GetSelectedTarget);
+  LLDB_INSTRUMENT_VA(this);
 
   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API));
 
@@ -1049,8 +999,7 @@ SBTarget SBDebugger::GetSelectedTarget() {
 }
 
 void SBDebugger::SetSelectedTarget(SBTarget &sb_target) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SetSelectedTarget, (lldb::SBTarget &),
-                     sb_target);
+  LLDB_INSTRUMENT_VA(this, sb_target);
 
   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API));
 
@@ -1068,7 +1017,7 @@ void SBDebugger::SetSelectedTarget(SBTarget &sb_target) {
 }
 
 SBPlatform SBDebugger::GetSelectedPlatform() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBPlatform, SBDebugger, GetSelectedPlatform);
+  LLDB_INSTRUMENT_VA(this);
 
   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API));
 
@@ -1085,8 +1034,7 @@ SBPlatform SBDebugger::GetSelectedPlatform() {
 }
 
 void SBDebugger::SetSelectedPlatform(SBPlatform &sb_platform) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SetSelectedPlatform,
-                     (lldb::SBPlatform &), sb_platform);
+  LLDB_INSTRUMENT_VA(this, sb_platform);
 
   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API));
 
@@ -1102,7 +1050,7 @@ void SBDebugger::SetSelectedPlatform(SBPlatform &sb_platform) {
 }
 
 uint32_t SBDebugger::GetNumPlatforms() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBDebugger, GetNumPlatforms);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp) {
     // No need to lock, the platform list is thread safe
@@ -1112,8 +1060,7 @@ uint32_t SBDebugger::GetNumPlatforms() {
 }
 
 SBPlatform SBDebugger::GetPlatformAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBPlatform, SBDebugger, GetPlatformAtIndex,
-                     (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBPlatform sb_platform;
   if (m_opaque_sp) {
@@ -1124,7 +1071,7 @@ SBPlatform SBDebugger::GetPlatformAtIndex(uint32_t idx) {
 }
 
 uint32_t SBDebugger::GetNumAvailablePlatforms() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBDebugger, GetNumAvailablePlatforms);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t idx = 0;
   while (true) {
@@ -1138,8 +1085,7 @@ uint32_t SBDebugger::GetNumAvailablePlatforms() {
 }
 
 SBStructuredData SBDebugger::GetAvailablePlatformInfoAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBStructuredData, SBDebugger,
-                     GetAvailablePlatformInfoAtIndex, (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBStructuredData data;
   auto platform_dict = std::make_unique<StructuredData::Dictionary>();
@@ -1169,15 +1115,13 @@ SBStructuredData SBDebugger::GetAvailablePlatformInfoAtIndex(uint32_t idx) {
 }
 
 void SBDebugger::DispatchInput(void *baton, const void *data, size_t data_len) {
-  LLDB_RECORD_METHOD(void, SBDebugger, DispatchInput,
-                     (void *, const void *, size_t), baton, data, data_len);
+  LLDB_INSTRUMENT_VA(this, baton, data, data_len);
 
   DispatchInput(data, data_len);
 }
 
 void SBDebugger::DispatchInput(const void *data, size_t data_len) {
-  LLDB_RECORD_METHOD(void, SBDebugger, DispatchInput, (const void *, size_t),
-                     data, data_len);
+  LLDB_INSTRUMENT_VA(this, data, data_len);
 
   //    Log *log(GetLogIfAllCategoriesSet (LIBLLDB_LOG_API));
   //
@@ -1194,28 +1138,26 @@ void SBDebugger::DispatchInput(const void *data, size_t data_len) {
 }
 
 void SBDebugger::DispatchInputInterrupt() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBDebugger, DispatchInputInterrupt);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp)
     m_opaque_sp->DispatchInputInterrupt();
 }
 
 void SBDebugger::DispatchInputEndOfFile() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBDebugger, DispatchInputEndOfFile);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp)
     m_opaque_sp->DispatchInputEndOfFile();
 }
 
 void SBDebugger::PushInputReader(SBInputReader &reader) {
-  LLDB_RECORD_METHOD(void, SBDebugger, PushInputReader, (lldb::SBInputReader &),
-                     reader);
+  LLDB_INSTRUMENT_VA(this, reader);
 }
 
 void SBDebugger::RunCommandInterpreter(bool auto_handle_events,
                                        bool spawn_thread) {
-  LLDB_RECORD_METHOD(void, SBDebugger, RunCommandInterpreter, (bool, bool),
-                     auto_handle_events, spawn_thread);
+  LLDB_INSTRUMENT_VA(this, auto_handle_events, spawn_thread);
 
   if (m_opaque_sp) {
     CommandInterpreterRunOptions options;
@@ -1232,11 +1174,8 @@ void SBDebugger::RunCommandInterpreter(bool auto_handle_events,
                                        bool &stopped_for_crash)
 
 {
-  LLDB_RECORD_METHOD(void, SBDebugger, RunCommandInterpreter,
-                     (bool, bool, lldb::SBCommandInterpreterRunOptions &, int &,
-                      bool &, bool &),
-                     auto_handle_events, spawn_thread, options, num_errors,
-                     quit_requested, stopped_for_crash);
+  LLDB_INSTRUMENT_VA(this, auto_handle_events, spawn_thread, options,
+                     num_errors, quit_requested, stopped_for_crash);
 
   if (m_opaque_sp) {
     options.SetAutoHandleEvents(auto_handle_events);
@@ -1254,9 +1193,7 @@ void SBDebugger::RunCommandInterpreter(bool auto_handle_events,
 
 SBCommandInterpreterRunResult SBDebugger::RunCommandInterpreter(
     const SBCommandInterpreterRunOptions &options) {
-  LLDB_RECORD_METHOD(lldb::SBCommandInterpreterRunResult, SBDebugger,
-                     RunCommandInterpreter,
-                     (const lldb::SBCommandInterpreterRunOptions &), options);
+  LLDB_INSTRUMENT_VA(this, options);
 
   if (!m_opaque_sp)
     return SBCommandInterpreterRunResult();
@@ -1270,9 +1207,7 @@ SBCommandInterpreterRunResult SBDebugger::RunCommandInterpreter(
 
 SBError SBDebugger::RunREPL(lldb::LanguageType language,
                             const char *repl_options) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBDebugger, RunREPL,
-                     (lldb::LanguageType, const char *), language,
-                     repl_options);
+  LLDB_INSTRUMENT_VA(this, language, repl_options);
 
   SBError error;
   if (m_opaque_sp)
@@ -1296,8 +1231,7 @@ Debugger &SBDebugger::ref() const {
 const lldb::DebuggerSP &SBDebugger::get_sp() const { return m_opaque_sp; }
 
 SBDebugger SBDebugger::FindDebuggerWithID(int id) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBDebugger, SBDebugger, FindDebuggerWithID,
-                            (int), id);
+  LLDB_INSTRUMENT_VA(id);
 
   // No need to lock, the debugger list is thread safe
   SBDebugger sb_debugger;
@@ -1308,16 +1242,14 @@ SBDebugger SBDebugger::FindDebuggerWithID(int id) {
 }
 
 const char *SBDebugger::GetInstanceName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBDebugger, GetInstanceName);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_sp ? m_opaque_sp->GetInstanceName().AsCString() : nullptr);
 }
 
 SBError SBDebugger::SetInternalVariable(const char *var_name, const char *value,
                                         const char *debugger_instance_name) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBError, SBDebugger, SetInternalVariable,
-                            (const char *, const char *, const char *),
-                            var_name, value, debugger_instance_name);
+  LLDB_INSTRUMENT_VA(var_name, value, debugger_instance_name);
 
   SBError sb_error;
   DebuggerSP debugger_sp(Debugger::FindDebuggerWithInstanceName(
@@ -1340,9 +1272,7 @@ SBError SBDebugger::SetInternalVariable(const char *var_name, const char *value,
 SBStringList
 SBDebugger::GetInternalVariableValue(const char *var_name,
                                      const char *debugger_instance_name) {
-  LLDB_RECORD_STATIC_METHOD(
-      lldb::SBStringList, SBDebugger, GetInternalVariableValue,
-      (const char *, const char *), var_name, debugger_instance_name);
+  LLDB_INSTRUMENT_VA(var_name, debugger_instance_name);
 
   DebuggerSP debugger_sp(Debugger::FindDebuggerWithInstanceName(
       ConstString(debugger_instance_name)));
@@ -1367,21 +1297,20 @@ SBDebugger::GetInternalVariableValue(const char *var_name,
 }
 
 uint32_t SBDebugger::GetTerminalWidth() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBDebugger, GetTerminalWidth);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_sp ? m_opaque_sp->GetTerminalWidth() : 0);
 }
 
 void SBDebugger::SetTerminalWidth(uint32_t term_width) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SetTerminalWidth, (uint32_t),
-                     term_width);
+  LLDB_INSTRUMENT_VA(this, term_width);
 
   if (m_opaque_sp)
     m_opaque_sp->SetTerminalWidth(term_width);
 }
 
 const char *SBDebugger::GetPrompt() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBDebugger, GetPrompt);
+  LLDB_INSTRUMENT_VA(this);
 
   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API));
 
@@ -1394,14 +1323,14 @@ const char *SBDebugger::GetPrompt() const {
 }
 
 void SBDebugger::SetPrompt(const char *prompt) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SetPrompt, (const char *), prompt);
+  LLDB_INSTRUMENT_VA(this, prompt);
 
   if (m_opaque_sp)
     m_opaque_sp->SetPrompt(llvm::StringRef(prompt));
 }
 
 const char *SBDebugger::GetReproducerPath() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBDebugger, GetReproducerPath);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_sp
               ? ConstString(m_opaque_sp->GetReproducerPath()).GetCString()
@@ -1409,15 +1338,13 @@ const char *SBDebugger::GetReproducerPath() const {
 }
 
 ScriptLanguage SBDebugger::GetScriptLanguage() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::ScriptLanguage, SBDebugger,
-                                   GetScriptLanguage);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_sp ? m_opaque_sp->GetScriptLanguage() : eScriptLanguageNone);
 }
 
 void SBDebugger::SetScriptLanguage(ScriptLanguage script_lang) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SetScriptLanguage,
-                     (lldb::ScriptLanguage), script_lang);
+  LLDB_INSTRUMENT_VA(this, script_lang);
 
   if (m_opaque_sp) {
     m_opaque_sp->SetScriptLanguage(script_lang);
@@ -1425,15 +1352,13 @@ void SBDebugger::SetScriptLanguage(ScriptLanguage script_lang) {
 }
 
 LanguageType SBDebugger::GetREPLLanguage() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::LanguageType, SBDebugger,
-                                   GetREPLLanguage);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_sp ? m_opaque_sp->GetREPLLanguage() : eLanguageTypeUnknown);
 }
 
 void SBDebugger::SetREPLLanguage(LanguageType repl_lang) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SetREPLLanguage, (lldb::LanguageType),
-                     repl_lang);
+  LLDB_INSTRUMENT_VA(this, repl_lang);
 
   if (m_opaque_sp) {
     m_opaque_sp->SetREPLLanguage(repl_lang);
@@ -1441,44 +1366,43 @@ void SBDebugger::SetREPLLanguage(LanguageType repl_lang) {
 }
 
 bool SBDebugger::SetUseExternalEditor(bool value) {
-  LLDB_RECORD_METHOD(bool, SBDebugger, SetUseExternalEditor, (bool), value);
+  LLDB_INSTRUMENT_VA(this, value);
 
   return (m_opaque_sp ? m_opaque_sp->SetUseExternalEditor(value) : false);
 }
 
 bool SBDebugger::GetUseExternalEditor() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBDebugger, GetUseExternalEditor);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_sp ? m_opaque_sp->GetUseExternalEditor() : false);
 }
 
 bool SBDebugger::SetUseColor(bool value) {
-  LLDB_RECORD_METHOD(bool, SBDebugger, SetUseColor, (bool), value);
+  LLDB_INSTRUMENT_VA(this, value);
 
   return (m_opaque_sp ? m_opaque_sp->SetUseColor(value) : false);
 }
 
 bool SBDebugger::GetUseColor() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDebugger, GetUseColor);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_sp ? m_opaque_sp->GetUseColor() : false);
 }
 
 bool SBDebugger::SetUseSourceCache(bool value) {
-  LLDB_RECORD_METHOD(bool, SBDebugger, SetUseSourceCache, (bool), value);
+  LLDB_INSTRUMENT_VA(this, value);
 
   return (m_opaque_sp ? m_opaque_sp->SetUseSourceCache(value) : false);
 }
 
 bool SBDebugger::GetUseSourceCache() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDebugger, GetUseSourceCache);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_sp ? m_opaque_sp->GetUseSourceCache() : false);
 }
 
 bool SBDebugger::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBDebugger, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
@@ -1493,14 +1417,13 @@ bool SBDebugger::GetDescription(SBStream &description) {
 }
 
 user_id_t SBDebugger::GetID() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::user_id_t, SBDebugger, GetID);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_sp ? m_opaque_sp->GetID() : LLDB_INVALID_UID);
 }
 
 SBError SBDebugger::SetCurrentPlatform(const char *platform_name_cstr) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBDebugger, SetCurrentPlatform,
-                     (const char *), platform_name_cstr);
+  LLDB_INSTRUMENT_VA(this, platform_name_cstr);
 
   SBError sb_error;
   if (m_opaque_sp) {
@@ -1530,8 +1453,7 @@ SBError SBDebugger::SetCurrentPlatform(const char *platform_name_cstr) {
 }
 
 bool SBDebugger::SetCurrentPlatformSDKRoot(const char *sysroot) {
-  LLDB_RECORD_METHOD(bool, SBDebugger, SetCurrentPlatformSDKRoot,
-                     (const char *), sysroot);
+  LLDB_INSTRUMENT_VA(this, sysroot);
 
   if (SBPlatform platform = GetSelectedPlatform()) {
     platform.SetSDKRoot(sysroot);
@@ -1541,21 +1463,20 @@ bool SBDebugger::SetCurrentPlatformSDKRoot(const char *sysroot) {
 }
 
 bool SBDebugger::GetCloseInputOnEOF() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDebugger, GetCloseInputOnEOF);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_sp ? m_opaque_sp->GetCloseInputOnEOF() : false);
 }
 
 void SBDebugger::SetCloseInputOnEOF(bool b) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SetCloseInputOnEOF, (bool), b);
+  LLDB_INSTRUMENT_VA(this, b);
 
   if (m_opaque_sp)
     m_opaque_sp->SetCloseInputOnEOF(b);
 }
 
 SBTypeCategory SBDebugger::GetCategory(const char *category_name) {
-  LLDB_RECORD_METHOD(lldb::SBTypeCategory, SBDebugger, GetCategory,
-                     (const char *), category_name);
+  LLDB_INSTRUMENT_VA(this, category_name);
 
   if (!category_name || *category_name == 0)
     return SBTypeCategory();
@@ -1571,8 +1492,7 @@ SBTypeCategory SBDebugger::GetCategory(const char *category_name) {
 }
 
 SBTypeCategory SBDebugger::GetCategory(lldb::LanguageType lang_type) {
-  LLDB_RECORD_METHOD(lldb::SBTypeCategory, SBDebugger, GetCategory,
-                     (lldb::LanguageType), lang_type);
+  LLDB_INSTRUMENT_VA(this, lang_type);
 
   TypeCategoryImplSP category_sp;
   if (DataVisualization::Categories::GetCategory(lang_type, category_sp)) {
@@ -1583,8 +1503,7 @@ SBTypeCategory SBDebugger::GetCategory(lldb::LanguageType lang_type) {
 }
 
 SBTypeCategory SBDebugger::CreateCategory(const char *category_name) {
-  LLDB_RECORD_METHOD(lldb::SBTypeCategory, SBDebugger, CreateCategory,
-                     (const char *), category_name);
+  LLDB_INSTRUMENT_VA(this, category_name);
 
   if (!category_name || *category_name == 0)
     return SBTypeCategory();
@@ -1600,8 +1519,7 @@ SBTypeCategory SBDebugger::CreateCategory(const char *category_name) {
 }
 
 bool SBDebugger::DeleteCategory(const char *category_name) {
-  LLDB_RECORD_METHOD(bool, SBDebugger, DeleteCategory, (const char *),
-                     category_name);
+  LLDB_INSTRUMENT_VA(this, category_name);
 
   if (!category_name || *category_name == 0)
     return false;
@@ -1610,29 +1528,26 @@ bool SBDebugger::DeleteCategory(const char *category_name) {
 }
 
 uint32_t SBDebugger::GetNumCategories() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBDebugger, GetNumCategories);
+  LLDB_INSTRUMENT_VA(this);
 
   return DataVisualization::Categories::GetCount();
 }
 
 SBTypeCategory SBDebugger::GetCategoryAtIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(lldb::SBTypeCategory, SBDebugger, GetCategoryAtIndex,
-                     (uint32_t), index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   return SBTypeCategory(
       DataVisualization::Categories::GetCategoryAtIndex(index));
 }
 
 SBTypeCategory SBDebugger::GetDefaultCategory() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeCategory, SBDebugger,
-                             GetDefaultCategory);
+  LLDB_INSTRUMENT_VA(this);
 
   return GetCategory("default");
 }
 
 SBTypeFormat SBDebugger::GetFormatForType(SBTypeNameSpecifier type_name) {
-  LLDB_RECORD_METHOD(lldb::SBTypeFormat, SBDebugger, GetFormatForType,
-                     (lldb::SBTypeNameSpecifier), type_name);
+  LLDB_INSTRUMENT_VA(this, type_name);
 
   SBTypeCategory default_category_sb = GetDefaultCategory();
   if (default_category_sb.GetEnabled())
@@ -1641,8 +1556,7 @@ SBTypeFormat SBDebugger::GetFormatForType(SBTypeNameSpecifier type_name) {
 }
 
 SBTypeSummary SBDebugger::GetSummaryForType(SBTypeNameSpecifier type_name) {
-  LLDB_RECORD_METHOD(lldb::SBTypeSummary, SBDebugger, GetSummaryForType,
-                     (lldb::SBTypeNameSpecifier), type_name);
+  LLDB_INSTRUMENT_VA(this, type_name);
 
   if (!type_name.IsValid())
     return SBTypeSummary();
@@ -1650,8 +1564,7 @@ SBTypeSummary SBDebugger::GetSummaryForType(SBTypeNameSpecifier type_name) {
 }
 
 SBTypeFilter SBDebugger::GetFilterForType(SBTypeNameSpecifier type_name) {
-  LLDB_RECORD_METHOD(lldb::SBTypeFilter, SBDebugger, GetFilterForType,
-                     (lldb::SBTypeNameSpecifier), type_name);
+  LLDB_INSTRUMENT_VA(this, type_name);
 
   if (!type_name.IsValid())
     return SBTypeFilter();
@@ -1659,8 +1572,7 @@ SBTypeFilter SBDebugger::GetFilterForType(SBTypeNameSpecifier type_name) {
 }
 
 SBTypeSynthetic SBDebugger::GetSyntheticForType(SBTypeNameSpecifier type_name) {
-  LLDB_RECORD_METHOD(lldb::SBTypeSynthetic, SBDebugger, GetSyntheticForType,
-                     (lldb::SBTypeNameSpecifier), type_name);
+  LLDB_INSTRUMENT_VA(this, type_name);
 
   if (!type_name.IsValid())
     return SBTypeSynthetic();
@@ -1678,8 +1590,7 @@ static llvm::ArrayRef<const char *> GetCategoryArray(const char **categories) {
 }
 
 bool SBDebugger::EnableLog(const char *channel, const char **categories) {
-  LLDB_RECORD_METHOD(bool, SBDebugger, EnableLog, (const char *, const char **),
-                     channel, categories);
+  LLDB_INSTRUMENT_VA(this, channel, categories);
 
   if (m_opaque_sp) {
     uint32_t log_options =
@@ -1694,8 +1605,7 @@ bool SBDebugger::EnableLog(const char *channel, const char **categories) {
 
 void SBDebugger::SetLoggingCallback(lldb::LogOutputCallback log_callback,
                                     void *baton) {
-  LLDB_RECORD_METHOD(void, SBDebugger, SetLoggingCallback,
-                     (lldb::LogOutputCallback, void *), log_callback, baton);
+  LLDB_INSTRUMENT_VA(this, log_callback, baton);
 
   if (m_opaque_sp) {
     return m_opaque_sp->SetLoggingCallback(log_callback, baton);
diff --git a/lldb/source/API/SBDeclaration.cpp b/lldb/source/API/SBDeclaration.cpp
index f0273b081bb86..5b7def09b5ccf 100644
--- a/lldb/source/API/SBDeclaration.cpp
+++ b/lldb/source/API/SBDeclaration.cpp
@@ -7,11 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBDeclaration.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Core/Declaration.h"
 #include "lldb/Host/PosixApi.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Stream.h"
 
 #include <climits>
@@ -19,12 +19,10 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBDeclaration::SBDeclaration() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBDeclaration);
-}
+SBDeclaration::SBDeclaration() { LLDB_INSTRUMENT_VA(this); }
 
 SBDeclaration::SBDeclaration(const SBDeclaration &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBDeclaration, (const lldb::SBDeclaration &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
@@ -35,9 +33,7 @@ SBDeclaration::SBDeclaration(const lldb_private::Declaration *lldb_object_ptr) {
 }
 
 const SBDeclaration &SBDeclaration::operator=(const SBDeclaration &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBDeclaration &,
-                     SBDeclaration, operator=,(const lldb::SBDeclaration &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -52,19 +48,17 @@ void SBDeclaration::SetDeclaration(
 SBDeclaration::~SBDeclaration() = default;
 
 bool SBDeclaration::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDeclaration, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBDeclaration::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBDeclaration, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up.get() && m_opaque_up->IsValid();
 }
 
 SBFileSpec SBDeclaration::GetFileSpec() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBDeclaration,
-                                   GetFileSpec);
-
+  LLDB_INSTRUMENT_VA(this);
 
   SBFileSpec sb_file_spec;
   if (m_opaque_up.get() && m_opaque_up->GetFile())
@@ -74,8 +68,7 @@ SBFileSpec SBDeclaration::GetFileSpec() const {
 }
 
 uint32_t SBDeclaration::GetLine() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBDeclaration, GetLine);
-
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t line = 0;
   if (m_opaque_up)
@@ -86,7 +79,7 @@ uint32_t SBDeclaration::GetLine() const {
 }
 
 uint32_t SBDeclaration::GetColumn() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBDeclaration, GetColumn);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up)
     return m_opaque_up->GetColumn();
@@ -94,8 +87,7 @@ uint32_t SBDeclaration::GetColumn() const {
 }
 
 void SBDeclaration::SetFileSpec(lldb::SBFileSpec filespec) {
-  LLDB_RECORD_METHOD(void, SBDeclaration, SetFileSpec, (lldb::SBFileSpec),
-                     filespec);
+  LLDB_INSTRUMENT_VA(this, filespec);
 
   if (filespec.IsValid())
     ref().SetFile(filespec.ref());
@@ -103,20 +95,19 @@ void SBDeclaration::SetFileSpec(lldb::SBFileSpec filespec) {
     ref().SetFile(FileSpec());
 }
 void SBDeclaration::SetLine(uint32_t line) {
-  LLDB_RECORD_METHOD(void, SBDeclaration, SetLine, (uint32_t), line);
+  LLDB_INSTRUMENT_VA(this, line);
 
   ref().SetLine(line);
 }
 
 void SBDeclaration::SetColumn(uint32_t column) {
-  LLDB_RECORD_METHOD(void, SBDeclaration, SetColumn, (uint32_t), column);
+  LLDB_INSTRUMENT_VA(this, column);
 
   ref().SetColumn(column);
 }
 
 bool SBDeclaration::operator==(const SBDeclaration &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBDeclaration, operator==,(const lldb::SBDeclaration &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   lldb_private::Declaration *lhs_ptr = m_opaque_up.get();
   lldb_private::Declaration *rhs_ptr = rhs.m_opaque_up.get();
@@ -128,8 +119,7 @@ bool SBDeclaration::operator==(const SBDeclaration &rhs) const {
 }
 
 bool SBDeclaration::operator!=(const SBDeclaration &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBDeclaration, operator!=,(const lldb::SBDeclaration &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   lldb_private::Declaration *lhs_ptr = m_opaque_up.get();
   lldb_private::Declaration *rhs_ptr = rhs.m_opaque_up.get();
@@ -155,8 +145,7 @@ const lldb_private::Declaration &SBDeclaration::ref() const {
 }
 
 bool SBDeclaration::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBDeclaration, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
diff --git a/lldb/source/API/SBEnvironment.cpp b/lldb/source/API/SBEnvironment.cpp
index d1df3ad123450..5fafabe02e014 100644
--- a/lldb/source/API/SBEnvironment.cpp
+++ b/lldb/source/API/SBEnvironment.cpp
@@ -7,22 +7,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBEnvironment.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBStringList.h"
 #include "lldb/Utility/ConstString.h"
 #include "lldb/Utility/Environment.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
 SBEnvironment::SBEnvironment() : m_opaque_up(new Environment()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBEnvironment);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBEnvironment::SBEnvironment(const SBEnvironment &rhs)
     : m_opaque_up(clone(rhs.m_opaque_up)) {
-  LLDB_RECORD_CONSTRUCTOR(SBEnvironment, (const lldb::SBEnvironment &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBEnvironment::SBEnvironment(Environment rhs)
@@ -31,9 +31,7 @@ SBEnvironment::SBEnvironment(Environment rhs)
 SBEnvironment::~SBEnvironment() = default;
 
 const SBEnvironment &SBEnvironment::operator=(const SBEnvironment &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBEnvironment &,
-                     SBEnvironment, operator=,(const lldb::SBEnvironment &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -41,13 +39,13 @@ const SBEnvironment &SBEnvironment::operator=(const SBEnvironment &rhs) {
 }
 
 size_t SBEnvironment::GetNumValues() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBEnvironment, GetNumValues);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->size();
 }
 
 const char *SBEnvironment::Get(const char *name) {
-  LLDB_RECORD_METHOD(const char *, SBEnvironment, Get, (const char *), name);
+  LLDB_INSTRUMENT_VA(this, name);
 
   auto entry = m_opaque_up->find(name);
   if (entry == m_opaque_up->end()) {
@@ -57,8 +55,7 @@ const char *SBEnvironment::Get(const char *name) {
 }
 
 const char *SBEnvironment::GetNameAtIndex(size_t index) {
-  LLDB_RECORD_METHOD(const char *, SBEnvironment, GetNameAtIndex, (size_t),
-                     index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   if (index >= GetNumValues())
     return nullptr;
@@ -67,8 +64,7 @@ const char *SBEnvironment::GetNameAtIndex(size_t index) {
 }
 
 const char *SBEnvironment::GetValueAtIndex(size_t index) {
-  LLDB_RECORD_METHOD(const char *, SBEnvironment, GetValueAtIndex, (size_t),
-                     index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   if (index >= GetNumValues())
     return nullptr;
@@ -77,9 +73,7 @@ const char *SBEnvironment::GetValueAtIndex(size_t index) {
 }
 
 bool SBEnvironment::Set(const char *name, const char *value, bool overwrite) {
-  LLDB_RECORD_METHOD(bool, SBEnvironment, Set,
-                     (const char *, const char *, bool), name, value,
-                     overwrite);
+  LLDB_INSTRUMENT_VA(this, name, value, overwrite);
 
   if (overwrite) {
     m_opaque_up->insert_or_assign(name, std::string(value));
@@ -89,13 +83,13 @@ bool SBEnvironment::Set(const char *name, const char *value, bool overwrite) {
 }
 
 bool SBEnvironment::Unset(const char *name) {
-  LLDB_RECORD_METHOD(bool, SBEnvironment, Unset, (const char *), name);
+  LLDB_INSTRUMENT_VA(this, name);
 
   return m_opaque_up->erase(name);
 }
 
 SBStringList SBEnvironment::GetEntries() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBStringList, SBEnvironment, GetEntries);
+  LLDB_INSTRUMENT_VA(this);
 
   SBStringList entries;
   for (const auto &KV : *m_opaque_up) {
@@ -105,16 +99,14 @@ SBStringList SBEnvironment::GetEntries() {
 }
 
 void SBEnvironment::PutEntry(const char *name_and_value) {
-  LLDB_RECORD_METHOD(void, SBEnvironment, PutEntry, (const char *),
-                     name_and_value);
+  LLDB_INSTRUMENT_VA(this, name_and_value);
 
   auto split = llvm::StringRef(name_and_value).split('=');
   m_opaque_up->insert_or_assign(split.first.str(), split.second.str());
 }
 
 void SBEnvironment::SetEntries(const SBStringList &entries, bool append) {
-  LLDB_RECORD_METHOD(void, SBEnvironment, SetEntries,
-                     (const lldb::SBStringList &, bool), entries, append);
+  LLDB_INSTRUMENT_VA(this, entries, append);
 
   if (!append)
     m_opaque_up->clear();
@@ -124,7 +116,7 @@ void SBEnvironment::SetEntries(const SBStringList &entries, bool append) {
 }
 
 void SBEnvironment::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBEnvironment, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_up->clear();
 }
diff --git a/lldb/source/API/SBError.cpp b/lldb/source/API/SBError.cpp
index 793ebb7df5eb9..ef4f7266f083b 100644
--- a/lldb/source/API/SBError.cpp
+++ b/lldb/source/API/SBError.cpp
@@ -7,9 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBError.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBStream.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Status.h"
 
 #include <cstdarg>
@@ -17,10 +17,10 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBError::SBError() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBError); }
+SBError::SBError() { LLDB_INSTRUMENT_VA(this); }
 
 SBError::SBError(const SBError &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBError, (const lldb::SBError &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
@@ -28,8 +28,7 @@ SBError::SBError(const SBError &rhs) {
 SBError::~SBError() = default;
 
 const SBError &SBError::operator=(const SBError &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBError &,
-                     SBError, operator=,(const lldb::SBError &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -37,7 +36,7 @@ const SBError &SBError::operator=(const SBError &rhs) {
 }
 
 const char *SBError::GetCString() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBError, GetCString);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up)
     return m_opaque_up->AsCString();
@@ -45,14 +44,14 @@ const char *SBError::GetCString() const {
 }
 
 void SBError::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBError, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up)
     m_opaque_up->Clear();
 }
 
 bool SBError::Fail() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBError, Fail);
+  LLDB_INSTRUMENT_VA(this);
 
   bool ret_value = false;
   if (m_opaque_up)
@@ -63,7 +62,7 @@ bool SBError::Fail() const {
 }
 
 bool SBError::Success() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBError, Success);
+  LLDB_INSTRUMENT_VA(this);
 
   bool ret_value = true;
   if (m_opaque_up)
@@ -73,8 +72,7 @@ bool SBError::Success() const {
 }
 
 uint32_t SBError::GetError() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBError, GetError);
-
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t err = 0;
   if (m_opaque_up)
@@ -85,7 +83,7 @@ uint32_t SBError::GetError() const {
 }
 
 ErrorType SBError::GetType() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::ErrorType, SBError, GetType);
+  LLDB_INSTRUMENT_VA(this);
 
   ErrorType err_type = eErrorTypeInvalid;
   if (m_opaque_up)
@@ -95,8 +93,7 @@ ErrorType SBError::GetType() const {
 }
 
 void SBError::SetError(uint32_t err, ErrorType type) {
-  LLDB_RECORD_METHOD(void, SBError, SetError, (uint32_t, lldb::ErrorType), err,
-                     type);
+  LLDB_INSTRUMENT_VA(this, err, type);
 
   CreateIfNeeded();
   m_opaque_up->SetError(err, type);
@@ -108,21 +105,21 @@ void SBError::SetError(const Status &lldb_error) {
 }
 
 void SBError::SetErrorToErrno() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBError, SetErrorToErrno);
+  LLDB_INSTRUMENT_VA(this);
 
   CreateIfNeeded();
   m_opaque_up->SetErrorToErrno();
 }
 
 void SBError::SetErrorToGenericError() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBError, SetErrorToGenericError);
+  LLDB_INSTRUMENT_VA(this);
 
   CreateIfNeeded();
   m_opaque_up->SetErrorToGenericError();
 }
 
 void SBError::SetErrorString(const char *err_str) {
-  LLDB_RECORD_METHOD(void, SBError, SetErrorString, (const char *), err_str);
+  LLDB_INSTRUMENT_VA(this, err_str);
 
   CreateIfNeeded();
   m_opaque_up->SetErrorString(err_str);
@@ -138,11 +135,11 @@ int SBError::SetErrorStringWithFormat(const char *format, ...) {
 }
 
 bool SBError::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBError, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBError::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBError, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up != nullptr;
 }
@@ -167,8 +164,7 @@ const lldb_private::Status &SBError::operator*() const {
 }
 
 bool SBError::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBError, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   if (m_opaque_up) {
     if (m_opaque_up->Success())
diff --git a/lldb/source/API/SBEvent.cpp b/lldb/source/API/SBEvent.cpp
index 710ef2cf81188..536680bd1c5ed 100644
--- a/lldb/source/API/SBEvent.cpp
+++ b/lldb/source/API/SBEvent.cpp
@@ -7,9 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBEvent.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBBroadcaster.h"
 #include "lldb/API/SBStream.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/Breakpoint/Breakpoint.h"
 #include "lldb/Core/StreamFile.h"
@@ -22,32 +22,30 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBEvent::SBEvent() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBEvent); }
+SBEvent::SBEvent() { LLDB_INSTRUMENT_VA(this); }
 
 SBEvent::SBEvent(uint32_t event_type, const char *cstr, uint32_t cstr_len)
     : m_event_sp(new Event(event_type, new EventDataBytes(cstr, cstr_len))),
       m_opaque_ptr(m_event_sp.get()) {
-  LLDB_RECORD_CONSTRUCTOR(SBEvent, (uint32_t, const char *, uint32_t),
-                          event_type, cstr, cstr_len);
+  LLDB_INSTRUMENT_VA(this, event_type, cstr, cstr_len);
 }
 
 SBEvent::SBEvent(EventSP &event_sp)
     : m_event_sp(event_sp), m_opaque_ptr(event_sp.get()) {
-  LLDB_RECORD_CONSTRUCTOR(SBEvent, (lldb::EventSP &), event_sp);
+  LLDB_INSTRUMENT_VA(this, event_sp);
 }
 
 SBEvent::SBEvent(Event *event_ptr) : m_opaque_ptr(event_ptr) {
-  LLDB_RECORD_CONSTRUCTOR(SBEvent, (lldb_private::Event *), event_ptr);
+  LLDB_INSTRUMENT_VA(this, event_ptr);
 }
 
 SBEvent::SBEvent(const SBEvent &rhs)
     : m_event_sp(rhs.m_event_sp), m_opaque_ptr(rhs.m_opaque_ptr) {
-  LLDB_RECORD_CONSTRUCTOR(SBEvent, (const lldb::SBEvent &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 const SBEvent &SBEvent::operator=(const SBEvent &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBEvent &,
-                     SBEvent, operator=,(const lldb::SBEvent &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_event_sp = rhs.m_event_sp;
@@ -59,7 +57,7 @@ const SBEvent &SBEvent::operator=(const SBEvent &rhs) {
 SBEvent::~SBEvent() = default;
 
 const char *SBEvent::GetDataFlavor() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBEvent, GetDataFlavor);
+  LLDB_INSTRUMENT_VA(this);
 
   Event *lldb_event = get();
   if (lldb_event) {
@@ -71,8 +69,7 @@ const char *SBEvent::GetDataFlavor() {
 }
 
 uint32_t SBEvent::GetType() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBEvent, GetType);
-
+  LLDB_INSTRUMENT_VA(this);
 
   const Event *lldb_event = get();
   uint32_t event_type = 0;
@@ -84,8 +81,7 @@ uint32_t SBEvent::GetType() const {
 }
 
 SBBroadcaster SBEvent::GetBroadcaster() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBBroadcaster, SBEvent,
-                                   GetBroadcaster);
+  LLDB_INSTRUMENT_VA(this);
 
   SBBroadcaster broadcaster;
   const Event *lldb_event = get();
@@ -95,7 +91,7 @@ SBBroadcaster SBEvent::GetBroadcaster() const {
 }
 
 const char *SBEvent::GetBroadcasterClass() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBEvent, GetBroadcasterClass);
+  LLDB_INSTRUMENT_VA(this);
 
   const Event *lldb_event = get();
   if (lldb_event)
@@ -105,8 +101,7 @@ const char *SBEvent::GetBroadcasterClass() const {
 }
 
 bool SBEvent::BroadcasterMatchesPtr(const SBBroadcaster *broadcaster) {
-  LLDB_RECORD_METHOD(bool, SBEvent, BroadcasterMatchesPtr,
-                     (const lldb::SBBroadcaster *), broadcaster);
+  LLDB_INSTRUMENT_VA(this, broadcaster);
 
   if (broadcaster)
     return BroadcasterMatchesRef(*broadcaster);
@@ -114,8 +109,7 @@ bool SBEvent::BroadcasterMatchesPtr(const SBBroadcaster *broadcaster) {
 }
 
 bool SBEvent::BroadcasterMatchesRef(const SBBroadcaster &broadcaster) {
-  LLDB_RECORD_METHOD(bool, SBEvent, BroadcasterMatchesRef,
-                     (const lldb::SBBroadcaster &), broadcaster);
+  LLDB_INSTRUMENT_VA(this, broadcaster);
 
   Event *lldb_event = get();
   bool success = false;
@@ -127,7 +121,7 @@ bool SBEvent::BroadcasterMatchesRef(const SBBroadcaster &broadcaster) {
 }
 
 void SBEvent::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBEvent, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   Event *lldb_event = get();
   if (lldb_event)
@@ -158,11 +152,11 @@ void SBEvent::reset(Event *event_ptr) {
 }
 
 bool SBEvent::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBEvent, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBEvent::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBEvent, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   // Do NOT use m_opaque_ptr directly!!! Must use the SBEvent::get() accessor.
   // See comments in SBEvent::get()....
@@ -170,16 +164,14 @@ SBEvent::operator bool() const {
 }
 
 const char *SBEvent::GetCStringFromEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(const char *, SBEvent, GetCStringFromEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   return static_cast<const char *>(
       EventDataBytes::GetBytesFromEvent(event.get()));
 }
 
 bool SBEvent::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBEvent, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
@@ -192,8 +184,7 @@ bool SBEvent::GetDescription(SBStream &description) {
 }
 
 bool SBEvent::GetDescription(SBStream &description) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBEvent, GetDescription, (lldb::SBStream &),
-                           description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
diff --git a/lldb/source/API/SBExecutionContext.cpp b/lldb/source/API/SBExecutionContext.cpp
index 1f53e84ac6e64..a0b68e6efe384 100644
--- a/lldb/source/API/SBExecutionContext.cpp
+++ b/lldb/source/API/SBExecutionContext.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBExecutionContext.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBFrame.h"
 #include "lldb/API/SBProcess.h"
@@ -19,48 +19,43 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBExecutionContext::SBExecutionContext() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBExecutionContext);
-}
+SBExecutionContext::SBExecutionContext() { LLDB_INSTRUMENT_VA(this); }
 
 SBExecutionContext::SBExecutionContext(const lldb::SBExecutionContext &rhs)
     : m_exe_ctx_sp(rhs.m_exe_ctx_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBExecutionContext,
-                          (const lldb::SBExecutionContext &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBExecutionContext::SBExecutionContext(
     lldb::ExecutionContextRefSP exe_ctx_ref_sp)
     : m_exe_ctx_sp(exe_ctx_ref_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBExecutionContext, (lldb::ExecutionContextRefSP),
-                          exe_ctx_ref_sp);
+  LLDB_INSTRUMENT_VA(this, exe_ctx_ref_sp);
 }
 
 SBExecutionContext::SBExecutionContext(const lldb::SBTarget &target)
     : m_exe_ctx_sp(new ExecutionContextRef()) {
-  LLDB_RECORD_CONSTRUCTOR(SBExecutionContext, (const lldb::SBTarget &), target);
+  LLDB_INSTRUMENT_VA(this, target);
 
   m_exe_ctx_sp->SetTargetSP(target.GetSP());
 }
 
 SBExecutionContext::SBExecutionContext(const lldb::SBProcess &process)
     : m_exe_ctx_sp(new ExecutionContextRef()) {
-  LLDB_RECORD_CONSTRUCTOR(SBExecutionContext, (const lldb::SBProcess &),
-                          process);
+  LLDB_INSTRUMENT_VA(this, process);
 
   m_exe_ctx_sp->SetProcessSP(process.GetSP());
 }
 
 SBExecutionContext::SBExecutionContext(lldb::SBThread thread)
     : m_exe_ctx_sp(new ExecutionContextRef()) {
-  LLDB_RECORD_CONSTRUCTOR(SBExecutionContext, (lldb::SBThread), thread);
+  LLDB_INSTRUMENT_VA(this, thread);
 
   m_exe_ctx_sp->SetThreadPtr(thread.get());
 }
 
 SBExecutionContext::SBExecutionContext(const lldb::SBFrame &frame)
     : m_exe_ctx_sp(new ExecutionContextRef()) {
-  LLDB_RECORD_CONSTRUCTOR(SBExecutionContext, (const lldb::SBFrame &), frame);
+  LLDB_INSTRUMENT_VA(this, frame);
 
   m_exe_ctx_sp->SetFrameSP(frame.GetFrameSP());
 }
@@ -69,9 +64,7 @@ SBExecutionContext::~SBExecutionContext() = default;
 
 const SBExecutionContext &SBExecutionContext::
 operator=(const lldb::SBExecutionContext &rhs) {
-  LLDB_RECORD_METHOD(
-      const lldb::SBExecutionContext &,
-      SBExecutionContext, operator=,(const lldb::SBExecutionContext &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_exe_ctx_sp = rhs.m_exe_ctx_sp;
   return *this;
@@ -82,8 +75,7 @@ ExecutionContextRef *SBExecutionContext::get() const {
 }
 
 SBTarget SBExecutionContext::GetTarget() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBTarget, SBExecutionContext,
-                                   GetTarget);
+  LLDB_INSTRUMENT_VA(this);
 
   SBTarget sb_target;
   if (m_exe_ctx_sp) {
@@ -95,8 +87,7 @@ SBTarget SBExecutionContext::GetTarget() const {
 }
 
 SBProcess SBExecutionContext::GetProcess() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBProcess, SBExecutionContext,
-                                   GetProcess);
+  LLDB_INSTRUMENT_VA(this);
 
   SBProcess sb_process;
   if (m_exe_ctx_sp) {
@@ -108,8 +99,7 @@ SBProcess SBExecutionContext::GetProcess() const {
 }
 
 SBThread SBExecutionContext::GetThread() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBThread, SBExecutionContext,
-                                   GetThread);
+  LLDB_INSTRUMENT_VA(this);
 
   SBThread sb_thread;
   if (m_exe_ctx_sp) {
@@ -121,7 +111,7 @@ SBThread SBExecutionContext::GetThread() const {
 }
 
 SBFrame SBExecutionContext::GetFrame() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFrame, SBExecutionContext, GetFrame);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFrame sb_frame;
   if (m_exe_ctx_sp) {
diff --git a/lldb/source/API/SBExpressionOptions.cpp b/lldb/source/API/SBExpressionOptions.cpp
index 6dfff7ee46036..191e38fe5cfc4 100644
--- a/lldb/source/API/SBExpressionOptions.cpp
+++ b/lldb/source/API/SBExpressionOptions.cpp
@@ -7,31 +7,28 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBExpressionOptions.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Target/Target.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
 SBExpressionOptions::SBExpressionOptions()
     : m_opaque_up(new EvaluateExpressionOptions()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBExpressionOptions);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBExpressionOptions::SBExpressionOptions(const SBExpressionOptions &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBExpressionOptions,
-                          (const lldb::SBExpressionOptions &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
 
 const SBExpressionOptions &SBExpressionOptions::
 operator=(const SBExpressionOptions &rhs) {
-  LLDB_RECORD_METHOD(
-      const lldb::SBExpressionOptions &,
-      SBExpressionOptions, operator=,(const lldb::SBExpressionOptions &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -41,78 +38,68 @@ operator=(const SBExpressionOptions &rhs) {
 SBExpressionOptions::~SBExpressionOptions() = default;
 
 bool SBExpressionOptions::GetCoerceResultToId() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBExpressionOptions,
-                                   GetCoerceResultToId);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->DoesCoerceToId();
 }
 
 void SBExpressionOptions::SetCoerceResultToId(bool coerce) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetCoerceResultToId, (bool),
-                     coerce);
+  LLDB_INSTRUMENT_VA(this, coerce);
 
   m_opaque_up->SetCoerceToId(coerce);
 }
 
 bool SBExpressionOptions::GetUnwindOnError() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBExpressionOptions, GetUnwindOnError);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->DoesUnwindOnError();
 }
 
 void SBExpressionOptions::SetUnwindOnError(bool unwind) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetUnwindOnError, (bool),
-                     unwind);
+  LLDB_INSTRUMENT_VA(this, unwind);
 
   m_opaque_up->SetUnwindOnError(unwind);
 }
 
 bool SBExpressionOptions::GetIgnoreBreakpoints() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBExpressionOptions,
-                                   GetIgnoreBreakpoints);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->DoesIgnoreBreakpoints();
 }
 
 void SBExpressionOptions::SetIgnoreBreakpoints(bool ignore) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetIgnoreBreakpoints, (bool),
-                     ignore);
+  LLDB_INSTRUMENT_VA(this, ignore);
 
   m_opaque_up->SetIgnoreBreakpoints(ignore);
 }
 
 lldb::DynamicValueType SBExpressionOptions::GetFetchDynamicValue() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::DynamicValueType, SBExpressionOptions,
-                                   GetFetchDynamicValue);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetUseDynamic();
 }
 
 void SBExpressionOptions::SetFetchDynamicValue(lldb::DynamicValueType dynamic) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetFetchDynamicValue,
-                     (lldb::DynamicValueType), dynamic);
+  LLDB_INSTRUMENT_VA(this, dynamic);
 
   m_opaque_up->SetUseDynamic(dynamic);
 }
 
 uint32_t SBExpressionOptions::GetTimeoutInMicroSeconds() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBExpressionOptions,
-                                   GetTimeoutInMicroSeconds);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetTimeout() ? m_opaque_up->GetTimeout()->count() : 0;
 }
 
 void SBExpressionOptions::SetTimeoutInMicroSeconds(uint32_t timeout) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetTimeoutInMicroSeconds,
-                     (uint32_t), timeout);
+  LLDB_INSTRUMENT_VA(this, timeout);
 
   m_opaque_up->SetTimeout(timeout == 0 ? Timeout<std::micro>(llvm::None)
                                        : std::chrono::microseconds(timeout));
 }
 
 uint32_t SBExpressionOptions::GetOneThreadTimeoutInMicroSeconds() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBExpressionOptions,
-                                   GetOneThreadTimeoutInMicroSeconds);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetOneThreadTimeout()
              ? m_opaque_up->GetOneThreadTimeout()->count()
@@ -120,8 +107,7 @@ uint32_t SBExpressionOptions::GetOneThreadTimeoutInMicroSeconds() const {
 }
 
 void SBExpressionOptions::SetOneThreadTimeoutInMicroSeconds(uint32_t timeout) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions,
-                     SetOneThreadTimeoutInMicroSeconds, (uint32_t), timeout);
+  LLDB_INSTRUMENT_VA(this, timeout);
 
   m_opaque_up->SetOneThreadTimeout(timeout == 0
                                        ? Timeout<std::micro>(llvm::None)
@@ -129,148 +115,135 @@ void SBExpressionOptions::SetOneThreadTimeoutInMicroSeconds(uint32_t timeout) {
 }
 
 bool SBExpressionOptions::GetTryAllThreads() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBExpressionOptions, GetTryAllThreads);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetTryAllThreads();
 }
 
 void SBExpressionOptions::SetTryAllThreads(bool run_others) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetTryAllThreads, (bool),
-                     run_others);
+  LLDB_INSTRUMENT_VA(this, run_others);
 
   m_opaque_up->SetTryAllThreads(run_others);
 }
 
 bool SBExpressionOptions::GetStopOthers() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBExpressionOptions, GetStopOthers);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetStopOthers();
 }
 
 void SBExpressionOptions::SetStopOthers(bool run_others) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetStopOthers, (bool),
-                     run_others);
+  LLDB_INSTRUMENT_VA(this, run_others);
 
   m_opaque_up->SetStopOthers(run_others);
 }
 
 bool SBExpressionOptions::GetTrapExceptions() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBExpressionOptions,
-                                   GetTrapExceptions);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetTrapExceptions();
 }
 
 void SBExpressionOptions::SetTrapExceptions(bool trap_exceptions) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetTrapExceptions, (bool),
-                     trap_exceptions);
+  LLDB_INSTRUMENT_VA(this, trap_exceptions);
 
   m_opaque_up->SetTrapExceptions(trap_exceptions);
 }
 
 void SBExpressionOptions::SetLanguage(lldb::LanguageType language) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetLanguage,
-                     (lldb::LanguageType), language);
+  LLDB_INSTRUMENT_VA(this, language);
 
   m_opaque_up->SetLanguage(language);
 }
 
 void SBExpressionOptions::SetCancelCallback(
     lldb::ExpressionCancelCallback callback, void *baton) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetCancelCallback,
-                     (lldb::ExpressionCancelCallback, void *), callback, baton);
+  LLDB_INSTRUMENT_VA(this, callback, baton);
 
   m_opaque_up->SetCancelCallback(callback, baton);
 }
 
 bool SBExpressionOptions::GetGenerateDebugInfo() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBExpressionOptions, GetGenerateDebugInfo);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetGenerateDebugInfo();
 }
 
 void SBExpressionOptions::SetGenerateDebugInfo(bool b) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetGenerateDebugInfo, (bool),
-                     b);
+  LLDB_INSTRUMENT_VA(this, b);
 
   return m_opaque_up->SetGenerateDebugInfo(b);
 }
 
 bool SBExpressionOptions::GetSuppressPersistentResult() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBExpressionOptions,
-                             GetSuppressPersistentResult);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetResultIsInternal();
 }
 
 void SBExpressionOptions::SetSuppressPersistentResult(bool b) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetSuppressPersistentResult,
-                     (bool), b);
+  LLDB_INSTRUMENT_VA(this, b);
 
   return m_opaque_up->SetResultIsInternal(b);
 }
 
 const char *SBExpressionOptions::GetPrefix() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBExpressionOptions,
-                                   GetPrefix);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetPrefix();
 }
 
 void SBExpressionOptions::SetPrefix(const char *prefix) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetPrefix, (const char *),
-                     prefix);
+  LLDB_INSTRUMENT_VA(this, prefix);
 
   return m_opaque_up->SetPrefix(prefix);
 }
 
 bool SBExpressionOptions::GetAutoApplyFixIts() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBExpressionOptions, GetAutoApplyFixIts);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetAutoApplyFixIts();
 }
 
 void SBExpressionOptions::SetAutoApplyFixIts(bool b) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetAutoApplyFixIts, (bool), b);
+  LLDB_INSTRUMENT_VA(this, b);
 
   return m_opaque_up->SetAutoApplyFixIts(b);
 }
 
 uint64_t SBExpressionOptions::GetRetriesWithFixIts() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBExpressionOptions,
-                             GetRetriesWithFixIts);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetRetriesWithFixIts();
 }
 
 void SBExpressionOptions::SetRetriesWithFixIts(uint64_t retries) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetRetriesWithFixIts,
-                     (uint64_t), retries);
+  LLDB_INSTRUMENT_VA(this, retries);
 
   return m_opaque_up->SetRetriesWithFixIts(retries);
 }
 
 bool SBExpressionOptions::GetTopLevel() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBExpressionOptions, GetTopLevel);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetExecutionPolicy() == eExecutionPolicyTopLevel;
 }
 
 void SBExpressionOptions::SetTopLevel(bool b) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetTopLevel, (bool), b);
+  LLDB_INSTRUMENT_VA(this, b);
 
   m_opaque_up->SetExecutionPolicy(b ? eExecutionPolicyTopLevel
                                     : m_opaque_up->default_execution_policy);
 }
 
 bool SBExpressionOptions::GetAllowJIT() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBExpressionOptions, GetAllowJIT);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetExecutionPolicy() != eExecutionPolicyNever;
 }
 
 void SBExpressionOptions::SetAllowJIT(bool allow) {
-  LLDB_RECORD_METHOD(void, SBExpressionOptions, SetAllowJIT, (bool), allow);
+  LLDB_INSTRUMENT_VA(this, allow);
 
   m_opaque_up->SetExecutionPolicy(allow ? m_opaque_up->default_execution_policy
                                         : eExecutionPolicyNever);
diff --git a/lldb/source/API/SBFile.cpp b/lldb/source/API/SBFile.cpp
index 4fae46ea10cb4..0db859c3b7468 100644
--- a/lldb/source/API/SBFile.cpp
+++ b/lldb/source/API/SBFile.cpp
@@ -7,9 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBFile.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBError.h"
 #include "lldb/Host/File.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
@@ -19,33 +19,31 @@ SBFile::~SBFile() = default;
 SBFile::SBFile(FileSP file_sp) : m_opaque_sp(file_sp) {
   // We have no way to capture the incoming FileSP as the class isn't
   // instrumented, so pretend that it's always null.
-  LLDB_RECORD_CONSTRUCTOR(SBFile, (lldb::FileSP), nullptr);
+  LLDB_INSTRUMENT_VA(this, file_sp);
 }
 
 SBFile::SBFile(const SBFile &rhs) : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBFile, (const lldb::SBFile&), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBFile &SBFile ::operator=(const SBFile &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBFile &,
-                     SBFile, operator=,(const lldb::SBFile &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_sp = rhs.m_opaque_sp;
   return *this;
 }
 
-SBFile::SBFile() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFile); }
+SBFile::SBFile() { LLDB_INSTRUMENT_VA(this); }
 
 SBFile::SBFile(FILE *file, bool transfer_ownership) {
-  LLDB_RECORD_CONSTRUCTOR(SBFile, (FILE *, bool), file, transfer_ownership);
+  LLDB_INSTRUMENT_VA(this, file, transfer_ownership);
 
   m_opaque_sp = std::make_shared<NativeFile>(file, transfer_ownership);
 }
 
 SBFile::SBFile(int fd, const char *mode, bool transfer_owndership) {
-  LLDB_RECORD_CONSTRUCTOR(SBFile, (int, const char *, bool), fd, mode,
-                          transfer_owndership);
+  LLDB_INSTRUMENT_VA(this, fd, mode, transfer_owndership);
 
   auto options = File::GetOptionsFromMode(mode);
   if (!options) {
@@ -57,8 +55,7 @@ SBFile::SBFile(int fd, const char *mode, bool transfer_owndership) {
 }
 
 SBError SBFile::Read(uint8_t *buf, size_t num_bytes, size_t *bytes_read) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBFile, Read, (uint8_t *, size_t, size_t *),
-                     buf, num_bytes, bytes_read);
+  LLDB_INSTRUMENT_VA(this, buf, num_bytes, bytes_read);
 
   SBError error;
   if (!m_opaque_sp) {
@@ -74,9 +71,7 @@ SBError SBFile::Read(uint8_t *buf, size_t num_bytes, size_t *bytes_read) {
 
 SBError SBFile::Write(const uint8_t *buf, size_t num_bytes,
                       size_t *bytes_written) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBFile, Write,
-                     (const uint8_t *, size_t, size_t *), buf, num_bytes,
-                     bytes_written);
+  LLDB_INSTRUMENT_VA(this, buf, num_bytes, bytes_written);
 
   SBError error;
   if (!m_opaque_sp) {
@@ -91,7 +86,7 @@ SBError SBFile::Write(const uint8_t *buf, size_t num_bytes,
 }
 
 SBError SBFile::Flush() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBFile, Flush);
+  LLDB_INSTRUMENT_VA(this);
 
   SBError error;
   if (!m_opaque_sp) {
@@ -104,12 +99,12 @@ SBError SBFile::Flush() {
 }
 
 bool SBFile::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFile, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return m_opaque_sp && m_opaque_sp->IsValid();
 }
 
 SBError SBFile::Close() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBFile, Close);
+  LLDB_INSTRUMENT_VA(this);
   SBError error;
   if (m_opaque_sp) {
     Status status = m_opaque_sp->Close();
@@ -119,16 +114,16 @@ SBError SBFile::Close() {
 }
 
 SBFile::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFile, operator bool);
+  LLDB_INSTRUMENT_VA(this);
   return IsValid();
 }
 
 bool SBFile::operator!() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFile, operator!);
+  LLDB_INSTRUMENT_VA(this);
   return !IsValid();
 }
 
 FileSP SBFile::GetFile() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(FileSP, SBFile, GetFile);
+  LLDB_INSTRUMENT_VA(this);
   return m_opaque_sp;
 }
diff --git a/lldb/source/API/SBFileSpec.cpp b/lldb/source/API/SBFileSpec.cpp
index b1b676af2d5c0..2bec9a7a1e770 100644
--- a/lldb/source/API/SBFileSpec.cpp
+++ b/lldb/source/API/SBFileSpec.cpp
@@ -7,12 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBFileSpec.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Host/FileSystem.h"
 #include "lldb/Host/PosixApi.h"
 #include "lldb/Utility/FileSpec.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Stream.h"
 
 #include "llvm/ADT/SmallString.h"
@@ -24,11 +24,11 @@ using namespace lldb;
 using namespace lldb_private;
 
 SBFileSpec::SBFileSpec() : m_opaque_up(new lldb_private::FileSpec()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFileSpec);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBFileSpec::SBFileSpec(const SBFileSpec &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBFileSpec, (const lldb::SBFileSpec &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
@@ -38,14 +38,14 @@ SBFileSpec::SBFileSpec(const lldb_private::FileSpec &fspec)
 
 // Deprecated!!!
 SBFileSpec::SBFileSpec(const char *path) : m_opaque_up(new FileSpec(path)) {
-  LLDB_RECORD_CONSTRUCTOR(SBFileSpec, (const char *), path);
+  LLDB_INSTRUMENT_VA(this, path);
 
   FileSystem::Instance().Resolve(*m_opaque_up);
 }
 
 SBFileSpec::SBFileSpec(const char *path, bool resolve)
     : m_opaque_up(new FileSpec(path)) {
-  LLDB_RECORD_CONSTRUCTOR(SBFileSpec, (const char *, bool), path, resolve);
+  LLDB_INSTRUMENT_VA(this, path, resolve);
 
   if (resolve)
     FileSystem::Instance().Resolve(*m_opaque_up);
@@ -54,8 +54,7 @@ SBFileSpec::SBFileSpec(const char *path, bool resolve)
 SBFileSpec::~SBFileSpec() = default;
 
 const SBFileSpec &SBFileSpec::operator=(const SBFileSpec &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBFileSpec &,
-                     SBFileSpec, operator=,(const lldb::SBFileSpec &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -63,46 +62,42 @@ const SBFileSpec &SBFileSpec::operator=(const SBFileSpec &rhs) {
 }
 
 bool SBFileSpec::operator==(const SBFileSpec &rhs) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBFileSpec, operator==,(const SBFileSpec &rhs),
-                           rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return ref() == rhs.ref();
 }
 
 bool SBFileSpec::operator!=(const SBFileSpec &rhs) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBFileSpec, operator!=,(const SBFileSpec &rhs),
-                           rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return !(*this == rhs);
 }
 
 bool SBFileSpec::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFileSpec, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBFileSpec::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFileSpec, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->operator bool();
 }
 
 bool SBFileSpec::Exists() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFileSpec, Exists);
+  LLDB_INSTRUMENT_VA(this);
 
   return FileSystem::Instance().Exists(*m_opaque_up);
 }
 
 bool SBFileSpec::ResolveExecutableLocation() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBFileSpec, ResolveExecutableLocation);
+  LLDB_INSTRUMENT_VA(this);
 
   return FileSystem::Instance().ResolveExecutableLocation(*m_opaque_up);
 }
 
 int SBFileSpec::ResolvePath(const char *src_path, char *dst_path,
                             size_t dst_len) {
-  LLDB_RECORD_STATIC_METHOD(int, SBFileSpec, ResolvePath,
-                            (const char *, char *, size_t), src_path, dst_path,
-                            dst_len);
+  LLDB_INSTRUMENT_VA(src_path, dst_path, dst_len);
 
   llvm::SmallString<64> result(src_path);
   FileSystem::Instance().Resolve(result);
@@ -111,13 +106,13 @@ int SBFileSpec::ResolvePath(const char *src_path, char *dst_path,
 }
 
 const char *SBFileSpec::GetFilename() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFileSpec, GetFilename);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetFilename().AsCString();
 }
 
 const char *SBFileSpec::GetDirectory() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFileSpec, GetDirectory);
+  LLDB_INSTRUMENT_VA(this);
 
   FileSpec directory{*m_opaque_up};
   directory.GetFilename().Clear();
@@ -125,7 +120,7 @@ const char *SBFileSpec::GetDirectory() const {
 }
 
 void SBFileSpec::SetFilename(const char *filename) {
-  LLDB_RECORD_METHOD(void, SBFileSpec, SetFilename, (const char *), filename);
+  LLDB_INSTRUMENT_VA(this, filename);
 
   if (filename && filename[0])
     m_opaque_up->GetFilename().SetCString(filename);
@@ -134,7 +129,7 @@ void SBFileSpec::SetFilename(const char *filename) {
 }
 
 void SBFileSpec::SetDirectory(const char *directory) {
-  LLDB_RECORD_METHOD(void, SBFileSpec, SetDirectory, (const char *), directory);
+  LLDB_INSTRUMENT_VA(this, directory);
 
   if (directory && directory[0])
     m_opaque_up->GetDirectory().SetCString(directory);
@@ -143,8 +138,7 @@ void SBFileSpec::SetDirectory(const char *directory) {
 }
 
 uint32_t SBFileSpec::GetPath(char *dst_path, size_t dst_len) const {
-  LLDB_RECORD_METHOD_CONST(uint32_t, SBFileSpec, GetPath, (char *, size_t),
-                           dst_path, "", dst_len);
+  LLDB_INSTRUMENT_VA(this, dst_path, dst_len);
 
   uint32_t result = m_opaque_up->GetPath(dst_path, dst_len);
 
@@ -172,8 +166,7 @@ void SBFileSpec::SetFileSpec(const lldb_private::FileSpec &fs) {
 }
 
 bool SBFileSpec::GetDescription(SBStream &description) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBFileSpec, GetDescription, (lldb::SBStream &),
-                           description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
   char path[PATH_MAX];
@@ -183,7 +176,7 @@ bool SBFileSpec::GetDescription(SBStream &description) const {
 }
 
 void SBFileSpec::AppendPathComponent(const char *fn) {
-  LLDB_RECORD_METHOD(void, SBFileSpec, AppendPathComponent, (const char *), fn);
+  LLDB_INSTRUMENT_VA(this, fn);
 
   m_opaque_up->AppendPathComponent(fn);
 }
diff --git a/lldb/source/API/SBFileSpecList.cpp b/lldb/source/API/SBFileSpecList.cpp
index 6e85957cb088a..cf81c42340879 100644
--- a/lldb/source/API/SBFileSpecList.cpp
+++ b/lldb/source/API/SBFileSpecList.cpp
@@ -7,13 +7,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBFileSpecList.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBFileSpec.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Core/FileSpecList.h"
 #include "lldb/Host/PosixApi.h"
 #include "lldb/Utility/FileSpec.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Stream.h"
 
 #include <climits>
@@ -22,11 +22,11 @@ using namespace lldb;
 using namespace lldb_private;
 
 SBFileSpecList::SBFileSpecList() : m_opaque_up(new FileSpecList()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFileSpecList);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBFileSpecList::SBFileSpecList(const SBFileSpecList &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBFileSpecList, (const lldb::SBFileSpecList &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
@@ -34,9 +34,7 @@ SBFileSpecList::SBFileSpecList(const SBFileSpecList &rhs) {
 SBFileSpecList::~SBFileSpecList() = default;
 
 const SBFileSpecList &SBFileSpecList::operator=(const SBFileSpecList &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBFileSpecList &,
-                     SBFileSpecList, operator=,(const lldb::SBFileSpecList &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -44,43 +42,38 @@ const SBFileSpecList &SBFileSpecList::operator=(const SBFileSpecList &rhs) {
 }
 
 uint32_t SBFileSpecList::GetSize() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBFileSpecList, GetSize);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetSize();
 }
 
 void SBFileSpecList::Append(const SBFileSpec &sb_file) {
-  LLDB_RECORD_METHOD(void, SBFileSpecList, Append, (const lldb::SBFileSpec &),
-                     sb_file);
+  LLDB_INSTRUMENT_VA(this, sb_file);
 
   m_opaque_up->Append(sb_file.ref());
 }
 
 bool SBFileSpecList::AppendIfUnique(const SBFileSpec &sb_file) {
-  LLDB_RECORD_METHOD(bool, SBFileSpecList, AppendIfUnique,
-                     (const lldb::SBFileSpec &), sb_file);
+  LLDB_INSTRUMENT_VA(this, sb_file);
 
   return m_opaque_up->AppendIfUnique(sb_file.ref());
 }
 
 void SBFileSpecList::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBFileSpecList, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_up->Clear();
 }
 
 uint32_t SBFileSpecList::FindFileIndex(uint32_t idx, const SBFileSpec &sb_file,
                                        bool full) {
-  LLDB_RECORD_METHOD(uint32_t, SBFileSpecList, FindFileIndex,
-                     (uint32_t, const lldb::SBFileSpec &, bool), idx, sb_file,
-                     full);
+  LLDB_INSTRUMENT_VA(this, idx, sb_file, full);
 
   return m_opaque_up->FindFileIndex(idx, sb_file.ref(), full);
 }
 
 const SBFileSpec SBFileSpecList::GetFileSpecAtIndex(uint32_t idx) const {
-  LLDB_RECORD_METHOD_CONST(const lldb::SBFileSpec, SBFileSpecList,
-                           GetFileSpecAtIndex, (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBFileSpec new_spec;
   new_spec.SetFileSpec(m_opaque_up->GetFileSpecAtIndex(idx));
@@ -104,8 +97,7 @@ const lldb_private::FileSpecList &SBFileSpecList::ref() const {
 }
 
 bool SBFileSpecList::GetDescription(SBStream &description) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBFileSpecList, GetDescription,
-                           (lldb::SBStream &), description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
diff --git a/lldb/source/API/SBFrame.cpp b/lldb/source/API/SBFrame.cpp
index ab3886e24d6fb..ffbbed00f8e21 100644
--- a/lldb/source/API/SBFrame.cpp
+++ b/lldb/source/API/SBFrame.cpp
@@ -14,7 +14,6 @@
 
 #include "lldb/lldb-types.h"
 
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/Core/Address.h"
 #include "lldb/Core/StreamFile.h"
@@ -38,6 +37,7 @@
 #include "lldb/Target/Target.h"
 #include "lldb/Target/Thread.h"
 #include "lldb/Utility/ConstString.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Stream.h"
 
 #include "lldb/API/SBAddress.h"
@@ -55,17 +55,16 @@ using namespace lldb;
 using namespace lldb_private;
 
 SBFrame::SBFrame() : m_opaque_sp(new ExecutionContextRef()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFrame);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBFrame::SBFrame(const StackFrameSP &lldb_object_sp)
     : m_opaque_sp(new ExecutionContextRef(lldb_object_sp)) {
-  LLDB_RECORD_CONSTRUCTOR(SBFrame, (const lldb::StackFrameSP &),
-                          lldb_object_sp);
+  LLDB_INSTRUMENT_VA(this, lldb_object_sp);
 }
 
 SBFrame::SBFrame(const SBFrame &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBFrame, (const lldb::SBFrame &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_sp = clone(rhs.m_opaque_sp);
 }
@@ -73,8 +72,7 @@ SBFrame::SBFrame(const SBFrame &rhs) {
 SBFrame::~SBFrame() = default;
 
 const SBFrame &SBFrame::operator=(const SBFrame &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBFrame &,
-                     SBFrame, operator=,(const lldb::SBFrame &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_sp = clone(rhs.m_opaque_sp);
@@ -90,11 +88,11 @@ void SBFrame::SetFrameSP(const StackFrameSP &lldb_object_sp) {
 }
 
 bool SBFrame::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFrame, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBFrame::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFrame, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -112,8 +110,7 @@ SBFrame::operator bool() const {
 }
 
 SBSymbolContext SBFrame::GetSymbolContext(uint32_t resolve_scope) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBSymbolContext, SBFrame, GetSymbolContext,
-                           (uint32_t), resolve_scope);
+  LLDB_INSTRUMENT_VA(this, resolve_scope);
 
   SBSymbolContext sb_sym_ctx;
   std::unique_lock<std::recursive_mutex> lock;
@@ -133,7 +130,7 @@ SBSymbolContext SBFrame::GetSymbolContext(uint32_t resolve_scope) const {
 }
 
 SBModule SBFrame::GetModule() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBModule, SBFrame, GetModule);
+  LLDB_INSTRUMENT_VA(this);
 
   SBModule sb_module;
   ModuleSP module_sp;
@@ -158,8 +155,7 @@ SBModule SBFrame::GetModule() const {
 }
 
 SBCompileUnit SBFrame::GetCompileUnit() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBCompileUnit, SBFrame,
-                                   GetCompileUnit);
+  LLDB_INSTRUMENT_VA(this);
 
   SBCompileUnit sb_comp_unit;
   std::unique_lock<std::recursive_mutex> lock;
@@ -183,7 +179,7 @@ SBCompileUnit SBFrame::GetCompileUnit() const {
 }
 
 SBFunction SBFrame::GetFunction() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFunction, SBFrame, GetFunction);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFunction sb_function;
   std::unique_lock<std::recursive_mutex> lock;
@@ -207,7 +203,7 @@ SBFunction SBFrame::GetFunction() const {
 }
 
 SBSymbol SBFrame::GetSymbol() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBSymbol, SBFrame, GetSymbol);
+  LLDB_INSTRUMENT_VA(this);
 
   SBSymbol sb_symbol;
   std::unique_lock<std::recursive_mutex> lock;
@@ -230,7 +226,7 @@ SBSymbol SBFrame::GetSymbol() const {
 }
 
 SBBlock SBFrame::GetBlock() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBBlock, SBFrame, GetBlock);
+  LLDB_INSTRUMENT_VA(this);
 
   SBBlock sb_block;
   std::unique_lock<std::recursive_mutex> lock;
@@ -251,7 +247,7 @@ SBBlock SBFrame::GetBlock() const {
 }
 
 SBBlock SBFrame::GetFrameBlock() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBBlock, SBFrame, GetFrameBlock);
+  LLDB_INSTRUMENT_VA(this);
 
   SBBlock sb_block;
   std::unique_lock<std::recursive_mutex> lock;
@@ -272,7 +268,7 @@ SBBlock SBFrame::GetFrameBlock() const {
 }
 
 SBLineEntry SBFrame::GetLineEntry() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBLineEntry, SBFrame, GetLineEntry);
+  LLDB_INSTRUMENT_VA(this);
 
   SBLineEntry sb_line_entry;
   std::unique_lock<std::recursive_mutex> lock;
@@ -295,7 +291,7 @@ SBLineEntry SBFrame::GetLineEntry() const {
 }
 
 uint32_t SBFrame::GetFrameID() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBFrame, GetFrameID);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t frame_idx = UINT32_MAX;
 
@@ -310,7 +306,7 @@ uint32_t SBFrame::GetFrameID() const {
 }
 
 lldb::addr_t SBFrame::GetCFA() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::addr_t, SBFrame, GetCFA);
+  LLDB_INSTRUMENT_VA(this);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -322,7 +318,7 @@ lldb::addr_t SBFrame::GetCFA() const {
 }
 
 addr_t SBFrame::GetPC() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::addr_t, SBFrame, GetPC);
+  LLDB_INSTRUMENT_VA(this);
 
   addr_t addr = LLDB_INVALID_ADDRESS;
   std::unique_lock<std::recursive_mutex> lock;
@@ -346,7 +342,7 @@ addr_t SBFrame::GetPC() const {
 }
 
 bool SBFrame::SetPC(addr_t new_pc) {
-  LLDB_RECORD_METHOD(bool, SBFrame, SetPC, (lldb::addr_t), new_pc);
+  LLDB_INSTRUMENT_VA(this, new_pc);
 
   bool ret_val = false;
   std::unique_lock<std::recursive_mutex> lock;
@@ -369,7 +365,7 @@ bool SBFrame::SetPC(addr_t new_pc) {
 }
 
 addr_t SBFrame::GetSP() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::addr_t, SBFrame, GetSP);
+  LLDB_INSTRUMENT_VA(this);
 
   addr_t addr = LLDB_INVALID_ADDRESS;
   std::unique_lock<std::recursive_mutex> lock;
@@ -392,7 +388,7 @@ addr_t SBFrame::GetSP() const {
 }
 
 addr_t SBFrame::GetFP() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::addr_t, SBFrame, GetFP);
+  LLDB_INSTRUMENT_VA(this);
 
   addr_t addr = LLDB_INVALID_ADDRESS;
   std::unique_lock<std::recursive_mutex> lock;
@@ -415,7 +411,7 @@ addr_t SBFrame::GetFP() const {
 }
 
 SBAddress SBFrame::GetPCAddress() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBAddress, SBFrame, GetPCAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   SBAddress sb_addr;
   std::unique_lock<std::recursive_mutex> lock;
@@ -436,14 +432,13 @@ SBAddress SBFrame::GetPCAddress() const {
 }
 
 void SBFrame::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBFrame, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_sp->Clear();
 }
 
 lldb::SBValue SBFrame::GetValueForVariablePath(const char *var_path) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, GetValueForVariablePath,
-                     (const char *), var_path);
+  LLDB_INSTRUMENT_VA(this, var_path);
 
   SBValue sb_value;
   std::unique_lock<std::recursive_mutex> lock;
@@ -461,9 +456,7 @@ lldb::SBValue SBFrame::GetValueForVariablePath(const char *var_path) {
 
 lldb::SBValue SBFrame::GetValueForVariablePath(const char *var_path,
                                                DynamicValueType use_dynamic) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, GetValueForVariablePath,
-                     (const char *, lldb::DynamicValueType), var_path,
-                     use_dynamic);
+  LLDB_INSTRUMENT_VA(this, var_path, use_dynamic);
 
   SBValue sb_value;
   if (var_path == nullptr || var_path[0] == '\0') {
@@ -496,8 +489,7 @@ lldb::SBValue SBFrame::GetValueForVariablePath(const char *var_path,
 }
 
 SBValue SBFrame::FindVariable(const char *name) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, FindVariable, (const char *),
-                     name);
+  LLDB_INSTRUMENT_VA(this, name);
 
   SBValue value;
   std::unique_lock<std::recursive_mutex> lock;
@@ -515,8 +507,7 @@ SBValue SBFrame::FindVariable(const char *name) {
 
 SBValue SBFrame::FindVariable(const char *name,
                               lldb::DynamicValueType use_dynamic) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, FindVariable,
-                     (const char *, lldb::DynamicValueType), name, use_dynamic);
+  LLDB_INSTRUMENT_VA(this, name, use_dynamic);
 
   VariableSP var_sp;
   SBValue sb_value;
@@ -549,8 +540,7 @@ SBValue SBFrame::FindVariable(const char *name,
 }
 
 SBValue SBFrame::FindValue(const char *name, ValueType value_type) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, FindValue,
-                     (const char *, lldb::ValueType), name, value_type);
+  LLDB_INSTRUMENT_VA(this, name, value_type);
 
   SBValue value;
   std::unique_lock<std::recursive_mutex> lock;
@@ -568,9 +558,7 @@ SBValue SBFrame::FindValue(const char *name, ValueType value_type) {
 
 SBValue SBFrame::FindValue(const char *name, ValueType value_type,
                            lldb::DynamicValueType use_dynamic) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, FindValue,
-                     (const char *, lldb::ValueType, lldb::DynamicValueType),
-                     name, value_type, use_dynamic);
+  LLDB_INSTRUMENT_VA(this, name, value_type, use_dynamic);
 
   SBValue sb_value;
 
@@ -682,8 +670,7 @@ SBValue SBFrame::FindValue(const char *name, ValueType value_type,
 }
 
 bool SBFrame::IsEqual(const SBFrame &that) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBFrame, IsEqual, (const lldb::SBFrame &),
-                           that);
+  LLDB_INSTRUMENT_VA(this, that);
 
   lldb::StackFrameSP this_sp = GetFrameSP();
   lldb::StackFrameSP that_sp = that.GetFrameSP();
@@ -691,21 +678,19 @@ bool SBFrame::IsEqual(const SBFrame &that) const {
 }
 
 bool SBFrame::operator==(const SBFrame &rhs) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBFrame, operator==,(const lldb::SBFrame &),
-                           rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return IsEqual(rhs);
 }
 
 bool SBFrame::operator!=(const SBFrame &rhs) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBFrame, operator!=,(const lldb::SBFrame &),
-                           rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return !IsEqual(rhs);
 }
 
 SBThread SBFrame::GetThread() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBThread, SBFrame, GetThread);
+  LLDB_INSTRUMENT_VA(this);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -717,7 +702,7 @@ SBThread SBFrame::GetThread() const {
 }
 
 const char *SBFrame::Disassemble() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFrame, Disassemble);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *disassembly = nullptr;
   std::unique_lock<std::recursive_mutex> lock;
@@ -741,9 +726,7 @@ const char *SBFrame::Disassemble() const {
 
 SBValueList SBFrame::GetVariables(bool arguments, bool locals, bool statics,
                                   bool in_scope_only) {
-  LLDB_RECORD_METHOD(lldb::SBValueList, SBFrame, GetVariables,
-                     (bool, bool, bool, bool), arguments, locals, statics,
-                     in_scope_only);
+  LLDB_INSTRUMENT_VA(this, arguments, locals, statics, in_scope_only);
 
   SBValueList value_list;
   std::unique_lock<std::recursive_mutex> lock;
@@ -773,9 +756,8 @@ SBValueList SBFrame::GetVariables(bool arguments, bool locals, bool statics,
 lldb::SBValueList SBFrame::GetVariables(bool arguments, bool locals,
                                         bool statics, bool in_scope_only,
                                         lldb::DynamicValueType use_dynamic) {
-  LLDB_RECORD_METHOD(lldb::SBValueList, SBFrame, GetVariables,
-                     (bool, bool, bool, bool, lldb::DynamicValueType),
-                     arguments, locals, statics, in_scope_only, use_dynamic);
+  LLDB_INSTRUMENT_VA(this, arguments, locals, statics, in_scope_only,
+                     use_dynamic);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -794,8 +776,7 @@ lldb::SBValueList SBFrame::GetVariables(bool arguments, bool locals,
 }
 
 SBValueList SBFrame::GetVariables(const lldb::SBVariablesOptions &options) {
-  LLDB_RECORD_METHOD(lldb::SBValueList, SBFrame, GetVariables,
-                     (const lldb::SBVariablesOptions &), options);
+  LLDB_INSTRUMENT_VA(this, options);
 
   SBValueList value_list;
   std::unique_lock<std::recursive_mutex> lock;
@@ -895,7 +876,7 @@ SBValueList SBFrame::GetVariables(const lldb::SBVariablesOptions &options) {
 }
 
 SBValueList SBFrame::GetRegisters() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValueList, SBFrame, GetRegisters);
+  LLDB_INSTRUMENT_VA(this);
 
   SBValueList value_list;
   std::unique_lock<std::recursive_mutex> lock;
@@ -925,8 +906,7 @@ SBValueList SBFrame::GetRegisters() {
 }
 
 SBValue SBFrame::FindRegister(const char *name) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, FindRegister, (const char *),
-                     name);
+  LLDB_INSTRUMENT_VA(this, name);
 
   SBValue result;
   ValueObjectSP value_sp;
@@ -957,8 +937,7 @@ SBValue SBFrame::FindRegister(const char *name) {
 }
 
 bool SBFrame::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBFrame, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
@@ -984,8 +963,7 @@ bool SBFrame::GetDescription(SBStream &description) {
 }
 
 SBValue SBFrame::EvaluateExpression(const char *expr) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, EvaluateExpression, (const char *),
-                     expr);
+  LLDB_INSTRUMENT_VA(this, expr);
 
   SBValue result;
   std::unique_lock<std::recursive_mutex> lock;
@@ -1012,9 +990,7 @@ SBValue SBFrame::EvaluateExpression(const char *expr) {
 SBValue
 SBFrame::EvaluateExpression(const char *expr,
                             lldb::DynamicValueType fetch_dynamic_value) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, EvaluateExpression,
-                     (const char *, lldb::DynamicValueType), expr,
-                     fetch_dynamic_value);
+  LLDB_INSTRUMENT_VA(this, expr, fetch_dynamic_value);
 
   SBExpressionOptions options;
   options.SetFetchDynamicValue(fetch_dynamic_value);
@@ -1035,9 +1011,7 @@ SBFrame::EvaluateExpression(const char *expr,
 SBValue SBFrame::EvaluateExpression(const char *expr,
                                     lldb::DynamicValueType fetch_dynamic_value,
                                     bool unwind_on_error) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, EvaluateExpression,
-                     (const char *, lldb::DynamicValueType, bool), expr,
-                     fetch_dynamic_value, unwind_on_error);
+  LLDB_INSTRUMENT_VA(this, expr, fetch_dynamic_value, unwind_on_error);
 
   SBExpressionOptions options;
   std::unique_lock<std::recursive_mutex> lock;
@@ -1057,9 +1031,7 @@ SBValue SBFrame::EvaluateExpression(const char *expr,
 
 lldb::SBValue SBFrame::EvaluateExpression(const char *expr,
                                           const SBExpressionOptions &options) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBFrame, EvaluateExpression,
-                     (const char *, const lldb::SBExpressionOptions &), expr,
-                     options);
+  LLDB_INSTRUMENT_VA(this, expr, options);
 
   Log *expr_log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
 
@@ -1110,13 +1082,13 @@ lldb::SBValue SBFrame::EvaluateExpression(const char *expr,
 }
 
 bool SBFrame::IsInlined() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBFrame, IsInlined);
+  LLDB_INSTRUMENT_VA(this);
 
   return static_cast<const SBFrame *>(this)->IsInlined();
 }
 
 bool SBFrame::IsInlined() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFrame, IsInlined);
+  LLDB_INSTRUMENT_VA(this);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -1140,13 +1112,13 @@ bool SBFrame::IsInlined() const {
 }
 
 bool SBFrame::IsArtificial() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBFrame, IsArtificial);
+  LLDB_INSTRUMENT_VA(this);
 
   return static_cast<const SBFrame *>(this)->IsArtificial();
 }
 
 bool SBFrame::IsArtificial() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFrame, IsArtificial);
+  LLDB_INSTRUMENT_VA(this);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -1159,13 +1131,13 @@ bool SBFrame::IsArtificial() const {
 }
 
 const char *SBFrame::GetFunctionName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBFrame, GetFunctionName);
+  LLDB_INSTRUMENT_VA(this);
 
   return static_cast<const SBFrame *>(this)->GetFunctionName();
 }
 
 lldb::LanguageType SBFrame::GuessLanguage() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::LanguageType, SBFrame, GuessLanguage);
+  LLDB_INSTRUMENT_VA(this);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -1186,7 +1158,7 @@ lldb::LanguageType SBFrame::GuessLanguage() const {
 }
 
 const char *SBFrame::GetFunctionName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFrame, GetFunctionName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *name = nullptr;
   std::unique_lock<std::recursive_mutex> lock;
@@ -1228,7 +1200,7 @@ const char *SBFrame::GetFunctionName() const {
 }
 
 const char *SBFrame::GetDisplayFunctionName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBFrame, GetDisplayFunctionName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *name = nullptr;
 
diff --git a/lldb/source/API/SBFunction.cpp b/lldb/source/API/SBFunction.cpp
index 2b1bc05e33561..562cae4e89069 100644
--- a/lldb/source/API/SBFunction.cpp
+++ b/lldb/source/API/SBFunction.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBFunction.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBProcess.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Core/Disassembler.h"
@@ -18,23 +17,23 @@
 #include "lldb/Symbol/VariableList.h"
 #include "lldb/Target/ExecutionContext.h"
 #include "lldb/Target/Target.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
-SBFunction::SBFunction() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBFunction); }
+SBFunction::SBFunction() { LLDB_INSTRUMENT_VA(this); }
 
 SBFunction::SBFunction(lldb_private::Function *lldb_object_ptr)
     : m_opaque_ptr(lldb_object_ptr) {}
 
 SBFunction::SBFunction(const lldb::SBFunction &rhs)
     : m_opaque_ptr(rhs.m_opaque_ptr) {
-  LLDB_RECORD_CONSTRUCTOR(SBFunction, (const lldb::SBFunction &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 const SBFunction &SBFunction::operator=(const SBFunction &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBFunction &,
-                     SBFunction, operator=,(const lldb::SBFunction &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_ptr = rhs.m_opaque_ptr;
   return *this;
@@ -43,17 +42,17 @@ const SBFunction &SBFunction::operator=(const SBFunction &rhs) {
 SBFunction::~SBFunction() { m_opaque_ptr = nullptr; }
 
 bool SBFunction::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFunction, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBFunction::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBFunction, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_ptr != nullptr;
 }
 
 const char *SBFunction::GetName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFunction, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *cstr = nullptr;
   if (m_opaque_ptr)
@@ -63,7 +62,7 @@ const char *SBFunction::GetName() const {
 }
 
 const char *SBFunction::GetDisplayName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFunction, GetDisplayName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *cstr = nullptr;
   if (m_opaque_ptr)
@@ -73,7 +72,7 @@ const char *SBFunction::GetDisplayName() const {
 }
 
 const char *SBFunction::GetMangledName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBFunction, GetMangledName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *cstr = nullptr;
   if (m_opaque_ptr)
@@ -82,21 +81,19 @@ const char *SBFunction::GetMangledName() const {
 }
 
 bool SBFunction::operator==(const SBFunction &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBFunction, operator==,(const lldb::SBFunction &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_ptr == rhs.m_opaque_ptr;
 }
 
 bool SBFunction::operator!=(const SBFunction &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBFunction, operator!=,(const lldb::SBFunction &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_ptr != rhs.m_opaque_ptr;
 }
 
 bool SBFunction::GetDescription(SBStream &s) {
-  LLDB_RECORD_METHOD(bool, SBFunction, GetDescription, (lldb::SBStream &), s);
+  LLDB_INSTRUMENT_VA(this, s);
 
   if (m_opaque_ptr) {
     s.Printf("SBFunction: id = 0x%8.8" PRIx64 ", name = %s",
@@ -111,16 +108,14 @@ bool SBFunction::GetDescription(SBStream &s) {
 }
 
 SBInstructionList SBFunction::GetInstructions(SBTarget target) {
-  LLDB_RECORD_METHOD(lldb::SBInstructionList, SBFunction, GetInstructions,
-                     (lldb::SBTarget), target);
+  LLDB_INSTRUMENT_VA(this, target);
 
   return GetInstructions(target, nullptr);
 }
 
 SBInstructionList SBFunction::GetInstructions(SBTarget target,
                                               const char *flavor) {
-  LLDB_RECORD_METHOD(lldb::SBInstructionList, SBFunction, GetInstructions,
-                     (lldb::SBTarget, const char *), target, flavor);
+  LLDB_INSTRUMENT_VA(this, target, flavor);
 
   SBInstructionList sb_instructions;
   if (m_opaque_ptr) {
@@ -146,7 +141,7 @@ void SBFunction::reset(lldb_private::Function *lldb_object_ptr) {
 }
 
 SBAddress SBFunction::GetStartAddress() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBFunction, GetStartAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   SBAddress addr;
   if (m_opaque_ptr)
@@ -155,7 +150,7 @@ SBAddress SBFunction::GetStartAddress() {
 }
 
 SBAddress SBFunction::GetEndAddress() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBFunction, GetEndAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   SBAddress addr;
   if (m_opaque_ptr) {
@@ -169,8 +164,7 @@ SBAddress SBFunction::GetEndAddress() {
 }
 
 const char *SBFunction::GetArgumentName(uint32_t arg_idx) {
-  LLDB_RECORD_METHOD(const char *, SBFunction, GetArgumentName, (uint32_t),
-                     arg_idx);
+  LLDB_INSTRUMENT_VA(this, arg_idx);
 
   if (m_opaque_ptr) {
     Block &block = m_opaque_ptr->GetBlock(true);
@@ -188,7 +182,7 @@ const char *SBFunction::GetArgumentName(uint32_t arg_idx) {
 }
 
 uint32_t SBFunction::GetPrologueByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBFunction, GetPrologueByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr)
     return m_opaque_ptr->GetPrologueByteSize();
@@ -196,7 +190,7 @@ uint32_t SBFunction::GetPrologueByteSize() {
 }
 
 SBType SBFunction::GetType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBFunction, GetType);
+  LLDB_INSTRUMENT_VA(this);
 
   SBType sb_type;
   if (m_opaque_ptr) {
@@ -208,7 +202,7 @@ SBType SBFunction::GetType() {
 }
 
 SBBlock SBFunction::GetBlock() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBFunction, GetBlock);
+  LLDB_INSTRUMENT_VA(this);
 
   SBBlock sb_block;
   if (m_opaque_ptr)
@@ -217,7 +211,7 @@ SBBlock SBFunction::GetBlock() {
 }
 
 lldb::LanguageType SBFunction::GetLanguage() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::LanguageType, SBFunction, GetLanguage);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr) {
     if (m_opaque_ptr->GetCompileUnit())
@@ -227,7 +221,7 @@ lldb::LanguageType SBFunction::GetLanguage() {
 }
 
 bool SBFunction::GetIsOptimized() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBFunction, GetIsOptimized);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr) {
     if (m_opaque_ptr->GetCompileUnit())
diff --git a/lldb/source/API/SBHostOS.cpp b/lldb/source/API/SBHostOS.cpp
index 5bcb83d95ae7a..06cf654031a1d 100644
--- a/lldb/source/API/SBHostOS.cpp
+++ b/lldb/source/API/SBHostOS.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBHostOS.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBError.h"
 #include "lldb/Host/Config.h"
 #include "lldb/Host/FileSystem.h"
@@ -17,6 +16,7 @@
 #include "lldb/Host/HostThread.h"
 #include "lldb/Host/ThreadLauncher.h"
 #include "lldb/Utility/FileSpec.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "Plugins/ExpressionParser/Clang/ClangHost.h"
 #if LLDB_ENABLE_PYTHON
@@ -30,8 +30,7 @@ using namespace lldb;
 using namespace lldb_private;
 
 SBFileSpec SBHostOS::GetProgramFileSpec() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBFileSpec, SBHostOS,
-                                    GetProgramFileSpec);
+  LLDB_INSTRUMENT();
 
   SBFileSpec sb_filespec;
   sb_filespec.SetFileSpec(HostInfo::GetProgramFileSpec());
@@ -39,15 +38,13 @@ SBFileSpec SBHostOS::GetProgramFileSpec() {
 }
 
 SBFileSpec SBHostOS::GetLLDBPythonPath() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBFileSpec, SBHostOS,
-                                    GetLLDBPythonPath);
+  LLDB_INSTRUMENT();
 
   return GetLLDBPath(ePathTypePythonDir);
 }
 
 SBFileSpec SBHostOS::GetLLDBPath(lldb::PathType path_type) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBFileSpec, SBHostOS, GetLLDBPath,
-                            (lldb::PathType), path_type);
+  LLDB_INSTRUMENT_VA(path_type);
 
   FileSpec fspec;
   switch (path_type) {
@@ -88,8 +85,7 @@ SBFileSpec SBHostOS::GetLLDBPath(lldb::PathType path_type) {
 }
 
 SBFileSpec SBHostOS::GetUserHomeDirectory() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBFileSpec, SBHostOS,
-                                    GetUserHomeDirectory);
+  LLDB_INSTRUMENT();
 
   FileSpec homedir;
   FileSystem::Instance().GetHomeDirectory(homedir);
@@ -104,9 +100,7 @@ SBFileSpec SBHostOS::GetUserHomeDirectory() {
 lldb::thread_t SBHostOS::ThreadCreate(const char *name,
                                       lldb::thread_func_t thread_function,
                                       void *thread_arg, SBError *error_ptr) {
-  LLDB_RECORD_STATIC_METHOD(lldb::thread_t, SBHostOS, ThreadCreate,
-                            (lldb::thread_func_t, void *, SBError *), name,
-                            thread_function, thread_arg, error_ptr);
+  LLDB_INSTRUMENT_VA(name, thread_function, thread_arg, error_ptr);
   llvm::Expected<HostThread> thread =
       ThreadLauncher::LaunchThread(name, thread_function, thread_arg);
   if (!thread) {
@@ -120,15 +114,10 @@ lldb::thread_t SBHostOS::ThreadCreate(const char *name,
   return thread->Release();
 }
 
-void SBHostOS::ThreadCreated(const char *name) {
-  LLDB_RECORD_STATIC_METHOD(void, SBHostOS, ThreadCreated, (const char *),
-                            name);
-}
+void SBHostOS::ThreadCreated(const char *name) { LLDB_INSTRUMENT_VA(name); }
 
 bool SBHostOS::ThreadCancel(lldb::thread_t thread, SBError *error_ptr) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBHostOS, ThreadCancel,
-                            (lldb::thread_t, lldb::SBError *), thread,
-                            error_ptr);
+  LLDB_INSTRUMENT_VA(thread, error_ptr);
 
   Status error;
   HostThread host_thread(thread);
@@ -140,9 +129,7 @@ bool SBHostOS::ThreadCancel(lldb::thread_t thread, SBError *error_ptr) {
 }
 
 bool SBHostOS::ThreadDetach(lldb::thread_t thread, SBError *error_ptr) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBHostOS, ThreadDetach,
-                            (lldb::thread_t, lldb::SBError *), thread,
-                            error_ptr);
+  LLDB_INSTRUMENT_VA(thread, error_ptr);
 
   Status error;
 #if defined(_WIN32)
@@ -160,10 +147,7 @@ bool SBHostOS::ThreadDetach(lldb::thread_t thread, SBError *error_ptr) {
 
 bool SBHostOS::ThreadJoin(lldb::thread_t thread, lldb::thread_result_t *result,
                           SBError *error_ptr) {
-  LLDB_RECORD_STATIC_METHOD(
-      bool, SBHostOS, ThreadJoin,
-      (lldb::thread_t, lldb::thread_result_t *, lldb::SBError *), thread,
-      result, error_ptr);
+  LLDB_INSTRUMENT_VA(thread, result, error_ptr);
 
   Status error;
   HostThread host_thread(thread);
diff --git a/lldb/source/API/SBInstruction.cpp b/lldb/source/API/SBInstruction.cpp
index f9a75686d906b..6cb9e5dbc1afb 100644
--- a/lldb/source/API/SBInstruction.cpp
+++ b/lldb/source/API/SBInstruction.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBInstruction.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBAddress.h"
 #include "lldb/API/SBFrame.h"
@@ -66,9 +66,7 @@ class InstructionImpl {
 using namespace lldb;
 using namespace lldb_private;
 
-SBInstruction::SBInstruction() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBInstruction);
-}
+SBInstruction::SBInstruction() { LLDB_INSTRUMENT_VA(this); }
 
 SBInstruction::SBInstruction(const lldb::DisassemblerSP &disasm_sp,
                              const lldb::InstructionSP &inst_sp)
@@ -76,13 +74,11 @@ SBInstruction::SBInstruction(const lldb::DisassemblerSP &disasm_sp,
 
 SBInstruction::SBInstruction(const SBInstruction &rhs)
     : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBInstruction, (const lldb::SBInstruction &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 const SBInstruction &SBInstruction::operator=(const SBInstruction &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBInstruction &,
-                     SBInstruction, operator=,(const lldb::SBInstruction &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_sp = rhs.m_opaque_sp;
@@ -92,17 +88,17 @@ const SBInstruction &SBInstruction::operator=(const SBInstruction &rhs) {
 SBInstruction::~SBInstruction() = default;
 
 bool SBInstruction::IsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBInstruction, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBInstruction::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBInstruction, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp && m_opaque_sp->IsValid();
 }
 
 SBAddress SBInstruction::GetAddress() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBInstruction, GetAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   SBAddress sb_addr;
   lldb::InstructionSP inst_sp(GetOpaque());
@@ -112,8 +108,7 @@ SBAddress SBInstruction::GetAddress() {
 }
 
 const char *SBInstruction::GetMnemonic(SBTarget target) {
-  LLDB_RECORD_METHOD(const char *, SBInstruction, GetMnemonic, (lldb::SBTarget),
-                     target);
+  LLDB_INSTRUMENT_VA(this, target);
 
   lldb::InstructionSP inst_sp(GetOpaque());
   if (inst_sp) {
@@ -132,8 +127,7 @@ const char *SBInstruction::GetMnemonic(SBTarget target) {
 }
 
 const char *SBInstruction::GetOperands(SBTarget target) {
-  LLDB_RECORD_METHOD(const char *, SBInstruction, GetOperands, (lldb::SBTarget),
-                     target);
+  LLDB_INSTRUMENT_VA(this, target);
 
   lldb::InstructionSP inst_sp(GetOpaque());
   if (inst_sp) {
@@ -152,8 +146,7 @@ const char *SBInstruction::GetOperands(SBTarget target) {
 }
 
 const char *SBInstruction::GetComment(SBTarget target) {
-  LLDB_RECORD_METHOD(const char *, SBInstruction, GetComment, (lldb::SBTarget),
-                     target);
+  LLDB_INSTRUMENT_VA(this, target);
 
   lldb::InstructionSP inst_sp(GetOpaque());
   if (inst_sp) {
@@ -172,7 +165,7 @@ const char *SBInstruction::GetComment(SBTarget target) {
 }
 
 size_t SBInstruction::GetByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBInstruction, GetByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::InstructionSP inst_sp(GetOpaque());
   if (inst_sp)
@@ -181,8 +174,7 @@ size_t SBInstruction::GetByteSize() {
 }
 
 SBData SBInstruction::GetData(SBTarget target) {
-  LLDB_RECORD_METHOD(lldb::SBData, SBInstruction, GetData, (lldb::SBTarget),
-                     target);
+  LLDB_INSTRUMENT_VA(this, target);
 
   lldb::SBData sb_data;
   lldb::InstructionSP inst_sp(GetOpaque());
@@ -196,7 +188,7 @@ SBData SBInstruction::GetData(SBTarget target) {
 }
 
 bool SBInstruction::DoesBranch() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBInstruction, DoesBranch);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::InstructionSP inst_sp(GetOpaque());
   if (inst_sp)
@@ -205,7 +197,7 @@ bool SBInstruction::DoesBranch() {
 }
 
 bool SBInstruction::HasDelaySlot() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBInstruction, HasDelaySlot);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::InstructionSP inst_sp(GetOpaque());
   if (inst_sp)
@@ -214,7 +206,7 @@ bool SBInstruction::HasDelaySlot() {
 }
 
 bool SBInstruction::CanSetBreakpoint() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBInstruction, CanSetBreakpoint);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::InstructionSP inst_sp(GetOpaque());
   if (inst_sp)
@@ -235,8 +227,7 @@ void SBInstruction::SetOpaque(const lldb::DisassemblerSP &disasm_sp,
 }
 
 bool SBInstruction::GetDescription(lldb::SBStream &s) {
-  LLDB_RECORD_METHOD(bool, SBInstruction, GetDescription, (lldb::SBStream &),
-                     s);
+  LLDB_INSTRUMENT_VA(this, s);
 
   lldb::InstructionSP inst_sp(GetOpaque());
   if (inst_sp) {
@@ -257,18 +248,18 @@ bool SBInstruction::GetDescription(lldb::SBStream &s) {
 }
 
 void SBInstruction::Print(FILE *outp) {
-  LLDB_RECORD_METHOD(void, SBInstruction, Print, (FILE *), outp);
+  LLDB_INSTRUMENT_VA(this, outp);
   FileSP out = std::make_shared<NativeFile>(outp, /*take_ownership=*/false);
   Print(out);
 }
 
 void SBInstruction::Print(SBFile out) {
-  LLDB_RECORD_METHOD(void, SBInstruction, Print, (SBFile), out);
+  LLDB_INSTRUMENT_VA(this, out);
   Print(out.m_opaque_sp);
 }
 
 void SBInstruction::Print(FileSP out_sp) {
-  LLDB_RECORD_METHOD(void, SBInstruction, Print, (FileSP), out_sp);
+  LLDB_INSTRUMENT_VA(this, out_sp);
 
   if (!out_sp || !out_sp->IsValid())
     return;
@@ -291,8 +282,7 @@ void SBInstruction::Print(FileSP out_sp) {
 
 bool SBInstruction::EmulateWithFrame(lldb::SBFrame &frame,
                                      uint32_t evaluate_options) {
-  LLDB_RECORD_METHOD(bool, SBInstruction, EmulateWithFrame,
-                     (lldb::SBFrame &, uint32_t), frame, evaluate_options);
+  LLDB_INSTRUMENT_VA(this, frame, evaluate_options);
 
   lldb::InstructionSP inst_sp(GetOpaque());
   if (inst_sp) {
@@ -316,8 +306,7 @@ bool SBInstruction::EmulateWithFrame(lldb::SBFrame &frame,
 }
 
 bool SBInstruction::DumpEmulation(const char *triple) {
-  LLDB_RECORD_METHOD(bool, SBInstruction, DumpEmulation, (const char *),
-                     triple);
+  LLDB_INSTRUMENT_VA(this, triple);
 
   lldb::InstructionSP inst_sp(GetOpaque());
   if (inst_sp && triple) {
@@ -328,9 +317,7 @@ bool SBInstruction::DumpEmulation(const char *triple) {
 
 bool SBInstruction::TestEmulation(lldb::SBStream &output_stream,
                                   const char *test_file) {
-  LLDB_RECORD_METHOD(bool, SBInstruction, TestEmulation,
-                     (lldb::SBStream &, const char *), output_stream,
-                     test_file);
+  LLDB_INSTRUMENT_VA(this, output_stream, test_file);
 
   if (!m_opaque_sp)
     SetOpaque(lldb::DisassemblerSP(),
diff --git a/lldb/source/API/SBInstructionList.cpp b/lldb/source/API/SBInstructionList.cpp
index 241c497b7fc39..e289e8e9343d9 100644
--- a/lldb/source/API/SBInstructionList.cpp
+++ b/lldb/source/API/SBInstructionList.cpp
@@ -7,35 +7,30 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBInstructionList.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBAddress.h"
+#include "lldb/API/SBFile.h"
 #include "lldb/API/SBInstruction.h"
 #include "lldb/API/SBStream.h"
-#include "lldb/API/SBFile.h"
 #include "lldb/Core/Disassembler.h"
 #include "lldb/Core/Module.h"
 #include "lldb/Core/StreamFile.h"
 #include "lldb/Symbol/SymbolContext.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Stream.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
-SBInstructionList::SBInstructionList() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBInstructionList);
-}
+SBInstructionList::SBInstructionList() { LLDB_INSTRUMENT_VA(this); }
 
 SBInstructionList::SBInstructionList(const SBInstructionList &rhs)
     : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBInstructionList, (const lldb::SBInstructionList &),
-                          rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 const SBInstructionList &SBInstructionList::
 operator=(const SBInstructionList &rhs) {
-  LLDB_RECORD_METHOD(
-      const lldb::SBInstructionList &,
-      SBInstructionList, operator=,(const lldb::SBInstructionList &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_sp = rhs.m_opaque_sp;
@@ -45,17 +40,17 @@ operator=(const SBInstructionList &rhs) {
 SBInstructionList::~SBInstructionList() = default;
 
 bool SBInstructionList::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBInstructionList, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBInstructionList::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBInstructionList, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get() != nullptr;
 }
 
 size_t SBInstructionList::GetSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBInstructionList, GetSize);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp)
     return m_opaque_sp->GetInstructionList().GetSize();
@@ -63,8 +58,7 @@ size_t SBInstructionList::GetSize() {
 }
 
 SBInstruction SBInstructionList::GetInstructionAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBInstruction, SBInstructionList,
-                     GetInstructionAtIndex, (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBInstruction inst;
   if (m_opaque_sp && idx < m_opaque_sp->GetInstructionList().GetSize())
@@ -77,9 +71,7 @@ SBInstruction SBInstructionList::GetInstructionAtIndex(uint32_t idx) {
 size_t SBInstructionList::GetInstructionsCount(const SBAddress &start,
                                                const SBAddress &end,
                                                bool canSetBreakpoint) {
-  LLDB_RECORD_METHOD(size_t, SBInstructionList, GetInstructionsCount,
-                     (const lldb::SBAddress &, const lldb::SBAddress &, bool),
-                     start, end, canSetBreakpoint);
+  LLDB_INSTRUMENT_VA(this, start, end, canSetBreakpoint);
 
   size_t num_instructions = GetSize();
   size_t i = 0;
@@ -104,14 +96,13 @@ size_t SBInstructionList::GetInstructionsCount(const SBAddress &start,
 }
 
 void SBInstructionList::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBInstructionList, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_sp.reset();
 }
 
 void SBInstructionList::AppendInstruction(SBInstruction insn) {
-  LLDB_RECORD_METHOD(void, SBInstructionList, AppendInstruction,
-                     (lldb::SBInstruction), insn);
+  LLDB_INSTRUMENT_VA(this, insn);
 }
 
 void SBInstructionList::SetDisassembler(const lldb::DisassemblerSP &opaque_sp) {
@@ -119,7 +110,7 @@ void SBInstructionList::SetDisassembler(const lldb::DisassemblerSP &opaque_sp) {
 }
 
 void SBInstructionList::Print(FILE *out) {
-  LLDB_RECORD_METHOD(void, SBInstructionList, Print, (FILE *), out);
+  LLDB_INSTRUMENT_VA(this, out);
   if (out == nullptr)
     return;
   StreamFile stream(out, false);
@@ -127,7 +118,7 @@ void SBInstructionList::Print(FILE *out) {
 }
 
 void SBInstructionList::Print(SBFile out) {
-  LLDB_RECORD_METHOD(void, SBInstructionList, Print, (SBFile), out);
+  LLDB_INSTRUMENT_VA(this, out);
   if (!out.IsValid())
     return;
   StreamFile stream(out.m_opaque_sp);
@@ -135,7 +126,7 @@ void SBInstructionList::Print(SBFile out) {
 }
 
 void SBInstructionList::Print(FileSP out_sp) {
-  LLDB_RECORD_METHOD(void, SBInstructionList, Print, (FileSP), out_sp);
+  LLDB_INSTRUMENT_VA(this, out_sp);
   if (!out_sp || !out_sp->IsValid())
     return;
   StreamFile stream(out_sp);
@@ -143,8 +134,7 @@ void SBInstructionList::Print(FileSP out_sp) {
 }
 
 bool SBInstructionList::GetDescription(lldb::SBStream &stream) {
-  LLDB_RECORD_METHOD(bool, SBInstructionList, GetDescription,
-                     (lldb::SBStream &), stream);
+  LLDB_INSTRUMENT_VA(this, stream);
   return GetDescription(stream.ref());
 }
 
@@ -186,8 +176,7 @@ bool SBInstructionList::GetDescription(Stream &sref) {
 }
 
 bool SBInstructionList::DumpEmulationForAllInstructions(const char *triple) {
-  LLDB_RECORD_METHOD(bool, SBInstructionList, DumpEmulationForAllInstructions,
-                     (const char *), triple);
+  LLDB_INSTRUMENT_VA(this, triple);
 
   if (m_opaque_sp) {
     size_t len = GetSize();
diff --git a/lldb/source/API/SBLanguageRuntime.cpp b/lldb/source/API/SBLanguageRuntime.cpp
index 781c985fc042b..d571f282fce03 100644
--- a/lldb/source/API/SBLanguageRuntime.cpp
+++ b/lldb/source/API/SBLanguageRuntime.cpp
@@ -7,25 +7,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBLanguageRuntime.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/Target/Language.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
 lldb::LanguageType
 SBLanguageRuntime::GetLanguageTypeFromString(const char *string) {
-  LLDB_RECORD_STATIC_METHOD(lldb::LanguageType, SBLanguageRuntime,
-                            GetLanguageTypeFromString, (const char *), string);
+  LLDB_INSTRUMENT_VA(string);
 
   return Language::GetLanguageTypeFromString(llvm::StringRef(string));
 }
 
 const char *
 SBLanguageRuntime::GetNameForLanguageType(lldb::LanguageType language) {
-  LLDB_RECORD_STATIC_METHOD(const char *, SBLanguageRuntime,
-                            GetNameForLanguageType, (lldb::LanguageType),
-                            language);
+  LLDB_INSTRUMENT_VA(language);
 
   return Language::GetNameForLanguageType(language);
 }
diff --git a/lldb/source/API/SBLaunchInfo.cpp b/lldb/source/API/SBLaunchInfo.cpp
index f47fa5a4ba200..5149feba5e0ba 100644
--- a/lldb/source/API/SBLaunchInfo.cpp
+++ b/lldb/source/API/SBLaunchInfo.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBLaunchInfo.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBEnvironment.h"
 #include "lldb/API/SBError.h"
@@ -40,7 +40,7 @@ class lldb_private::SBLaunchInfoImpl : public ProcessLaunchInfo {
 
 SBLaunchInfo::SBLaunchInfo(const char **argv)
     : m_opaque_sp(new SBLaunchInfoImpl()) {
-  LLDB_RECORD_CONSTRUCTOR(SBLaunchInfo, (const char **), argv);
+  LLDB_INSTRUMENT_VA(this, argv);
 
   m_opaque_sp->GetFlags().Reset(eLaunchFlagDebug | eLaunchFlagDisableASLR);
   if (argv && argv[0])
@@ -48,14 +48,13 @@ SBLaunchInfo::SBLaunchInfo(const char **argv)
 }
 
 SBLaunchInfo::SBLaunchInfo(const SBLaunchInfo &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBLaunchInfo, (const lldb::SBLaunchInfo &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_sp = rhs.m_opaque_sp;
 }
 
 SBLaunchInfo &SBLaunchInfo::operator=(const SBLaunchInfo &rhs) {
-  LLDB_RECORD_METHOD(SBLaunchInfo &,
-                     SBLaunchInfo, operator=,(const lldb::SBLaunchInfo &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_sp = rhs.m_opaque_sp;
   return *this;
@@ -72,90 +71,86 @@ void SBLaunchInfo::set_ref(const ProcessLaunchInfo &info) {
 }
 
 lldb::pid_t SBLaunchInfo::GetProcessID() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::pid_t, SBLaunchInfo, GetProcessID);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetProcessID();
 }
 
 uint32_t SBLaunchInfo::GetUserID() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBLaunchInfo, GetUserID);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetUserID();
 }
 
 uint32_t SBLaunchInfo::GetGroupID() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBLaunchInfo, GetGroupID);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetGroupID();
 }
 
 bool SBLaunchInfo::UserIDIsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBLaunchInfo, UserIDIsValid);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->UserIDIsValid();
 }
 
 bool SBLaunchInfo::GroupIDIsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBLaunchInfo, GroupIDIsValid);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GroupIDIsValid();
 }
 
 void SBLaunchInfo::SetUserID(uint32_t uid) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetUserID, (uint32_t), uid);
+  LLDB_INSTRUMENT_VA(this, uid);
 
   m_opaque_sp->SetUserID(uid);
 }
 
 void SBLaunchInfo::SetGroupID(uint32_t gid) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetGroupID, (uint32_t), gid);
+  LLDB_INSTRUMENT_VA(this, gid);
 
   m_opaque_sp->SetGroupID(gid);
 }
 
 SBFileSpec SBLaunchInfo::GetExecutableFile() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBLaunchInfo, GetExecutableFile);
+  LLDB_INSTRUMENT_VA(this);
 
   return SBFileSpec(m_opaque_sp->GetExecutableFile());
 }
 
 void SBLaunchInfo::SetExecutableFile(SBFileSpec exe_file,
                                      bool add_as_first_arg) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetExecutableFile,
-                     (lldb::SBFileSpec, bool), exe_file, add_as_first_arg);
+  LLDB_INSTRUMENT_VA(this, exe_file, add_as_first_arg);
 
   m_opaque_sp->SetExecutableFile(exe_file.ref(), add_as_first_arg);
 }
 
 SBListener SBLaunchInfo::GetListener() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBListener, SBLaunchInfo, GetListener);
+  LLDB_INSTRUMENT_VA(this);
 
   return SBListener(m_opaque_sp->GetListener());
 }
 
 void SBLaunchInfo::SetListener(SBListener &listener) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetListener, (lldb::SBListener &),
-                     listener);
+  LLDB_INSTRUMENT_VA(this, listener);
 
   m_opaque_sp->SetListener(listener.GetSP());
 }
 
 uint32_t SBLaunchInfo::GetNumArguments() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBLaunchInfo, GetNumArguments);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetArguments().GetArgumentCount();
 }
 
 const char *SBLaunchInfo::GetArgumentAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(const char *, SBLaunchInfo, GetArgumentAtIndex, (uint32_t),
-                     idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   return m_opaque_sp->GetArguments().GetArgumentAtIndex(idx);
 }
 
 void SBLaunchInfo::SetArguments(const char **argv, bool append) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetArguments, (const char **, bool),
-                     argv, append);
+  LLDB_INSTRUMENT_VA(this, argv, append);
 
   if (append) {
     if (argv)
@@ -169,14 +164,13 @@ void SBLaunchInfo::SetArguments(const char **argv, bool append) {
 }
 
 uint32_t SBLaunchInfo::GetNumEnvironmentEntries() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBLaunchInfo, GetNumEnvironmentEntries);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetEnvironment().size();
 }
 
 const char *SBLaunchInfo::GetEnvironmentEntryAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(const char *, SBLaunchInfo, GetEnvironmentEntryAtIndex,
-                     (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   if (idx > GetNumEnvironmentEntries())
     return nullptr;
@@ -184,14 +178,12 @@ const char *SBLaunchInfo::GetEnvironmentEntryAtIndex(uint32_t idx) {
 }
 
 void SBLaunchInfo::SetEnvironmentEntries(const char **envp, bool append) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetEnvironmentEntries,
-                     (const char **, bool), envp, append);
+  LLDB_INSTRUMENT_VA(this, envp, append);
   SetEnvironment(SBEnvironment(Environment(envp)), append);
 }
 
 void SBLaunchInfo::SetEnvironment(const SBEnvironment &env, bool append) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetEnvironment,
-                     (const lldb::SBEnvironment &, bool), env, append);
+  LLDB_INSTRUMENT_VA(this, env, append);
   Environment &refEnv = env.ref();
   if (append) {
     for (auto &KV : refEnv)
@@ -202,57 +194,54 @@ void SBLaunchInfo::SetEnvironment(const SBEnvironment &env, bool append) {
 }
 
 SBEnvironment SBLaunchInfo::GetEnvironment() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBEnvironment, SBLaunchInfo, GetEnvironment);
+  LLDB_INSTRUMENT_VA(this);
   return SBEnvironment(Environment(m_opaque_sp->GetEnvironment()));
 }
 
 void SBLaunchInfo::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBLaunchInfo, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_sp->Clear();
 }
 
 const char *SBLaunchInfo::GetWorkingDirectory() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBLaunchInfo,
-                                   GetWorkingDirectory);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetWorkingDirectory().GetCString();
 }
 
 void SBLaunchInfo::SetWorkingDirectory(const char *working_dir) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetWorkingDirectory, (const char *),
-                     working_dir);
+  LLDB_INSTRUMENT_VA(this, working_dir);
 
   m_opaque_sp->SetWorkingDirectory(FileSpec(working_dir));
 }
 
 uint32_t SBLaunchInfo::GetLaunchFlags() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBLaunchInfo, GetLaunchFlags);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetFlags().Get();
 }
 
 void SBLaunchInfo::SetLaunchFlags(uint32_t flags) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetLaunchFlags, (uint32_t), flags);
+  LLDB_INSTRUMENT_VA(this, flags);
 
   m_opaque_sp->GetFlags().Reset(flags);
 }
 
 const char *SBLaunchInfo::GetProcessPluginName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBLaunchInfo, GetProcessPluginName);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetProcessPluginName();
 }
 
 void SBLaunchInfo::SetProcessPluginName(const char *plugin_name) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetProcessPluginName, (const char *),
-                     plugin_name);
+  LLDB_INSTRUMENT_VA(this, plugin_name);
 
   return m_opaque_sp->SetProcessPluginName(plugin_name);
 }
 
 const char *SBLaunchInfo::GetShell() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBLaunchInfo, GetShell);
+  LLDB_INSTRUMENT_VA(this);
 
   // Constify this string so that it is saved in the string pool.  Otherwise it
   // would be freed when this function goes out of scope.
@@ -261,93 +250,86 @@ const char *SBLaunchInfo::GetShell() {
 }
 
 void SBLaunchInfo::SetShell(const char *path) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetShell, (const char *), path);
+  LLDB_INSTRUMENT_VA(this, path);
 
   m_opaque_sp->SetShell(FileSpec(path));
 }
 
 bool SBLaunchInfo::GetShellExpandArguments() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBLaunchInfo, GetShellExpandArguments);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetShellExpandArguments();
 }
 
 void SBLaunchInfo::SetShellExpandArguments(bool expand) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetShellExpandArguments, (bool),
-                     expand);
+  LLDB_INSTRUMENT_VA(this, expand);
 
   m_opaque_sp->SetShellExpandArguments(expand);
 }
 
 uint32_t SBLaunchInfo::GetResumeCount() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBLaunchInfo, GetResumeCount);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetResumeCount();
 }
 
 void SBLaunchInfo::SetResumeCount(uint32_t c) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetResumeCount, (uint32_t), c);
+  LLDB_INSTRUMENT_VA(this, c);
 
   m_opaque_sp->SetResumeCount(c);
 }
 
 bool SBLaunchInfo::AddCloseFileAction(int fd) {
-  LLDB_RECORD_METHOD(bool, SBLaunchInfo, AddCloseFileAction, (int), fd);
+  LLDB_INSTRUMENT_VA(this, fd);
 
   return m_opaque_sp->AppendCloseFileAction(fd);
 }
 
 bool SBLaunchInfo::AddDuplicateFileAction(int fd, int dup_fd) {
-  LLDB_RECORD_METHOD(bool, SBLaunchInfo, AddDuplicateFileAction, (int, int), fd,
-                     dup_fd);
+  LLDB_INSTRUMENT_VA(this, fd, dup_fd);
 
   return m_opaque_sp->AppendDuplicateFileAction(fd, dup_fd);
 }
 
 bool SBLaunchInfo::AddOpenFileAction(int fd, const char *path, bool read,
                                      bool write) {
-  LLDB_RECORD_METHOD(bool, SBLaunchInfo, AddOpenFileAction,
-                     (int, const char *, bool, bool), fd, path, read, write);
+  LLDB_INSTRUMENT_VA(this, fd, path, read, write);
 
   return m_opaque_sp->AppendOpenFileAction(fd, FileSpec(path), read, write);
 }
 
 bool SBLaunchInfo::AddSuppressFileAction(int fd, bool read, bool write) {
-  LLDB_RECORD_METHOD(bool, SBLaunchInfo, AddSuppressFileAction,
-                     (int, bool, bool), fd, read, write);
+  LLDB_INSTRUMENT_VA(this, fd, read, write);
 
   return m_opaque_sp->AppendSuppressFileAction(fd, read, write);
 }
 
 void SBLaunchInfo::SetLaunchEventData(const char *data) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetLaunchEventData, (const char *),
-                     data);
+  LLDB_INSTRUMENT_VA(this, data);
 
   m_opaque_sp->SetLaunchEventData(data);
 }
 
 const char *SBLaunchInfo::GetLaunchEventData() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBLaunchInfo,
-                                   GetLaunchEventData);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetLaunchEventData();
 }
 
 void SBLaunchInfo::SetDetachOnError(bool enable) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetDetachOnError, (bool), enable);
+  LLDB_INSTRUMENT_VA(this, enable);
 
   m_opaque_sp->SetDetachOnError(enable);
 }
 
 bool SBLaunchInfo::GetDetachOnError() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBLaunchInfo, GetDetachOnError);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetDetachOnError();
 }
 
 const char *SBLaunchInfo::GetScriptedProcessClassName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBLaunchInfo,
-                                   GetScriptedProcessClassName);
+  LLDB_INSTRUMENT_VA(this);
 
   // Constify this string so that it is saved in the string pool.  Otherwise it
   // would be freed when this function goes out of scope.
@@ -356,15 +338,13 @@ const char *SBLaunchInfo::GetScriptedProcessClassName() const {
 }
 
 void SBLaunchInfo::SetScriptedProcessClassName(const char *class_name) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetScriptedProcessClassName,
-                     (const char *), class_name);
+  LLDB_INSTRUMENT_VA(this, class_name);
 
   m_opaque_sp->SetScriptedProcessClassName(class_name);
 }
 
 lldb::SBStructuredData SBLaunchInfo::GetScriptedProcessDictionary() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBStructuredData, SBLaunchInfo,
-                                   GetScriptedProcessDictionary);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb_private::StructuredData::DictionarySP dict_sp =
       m_opaque_sp->GetScriptedProcessDictionarySP();
@@ -376,8 +356,7 @@ lldb::SBStructuredData SBLaunchInfo::GetScriptedProcessDictionary() const {
 }
 
 void SBLaunchInfo::SetScriptedProcessDictionary(lldb::SBStructuredData dict) {
-  LLDB_RECORD_METHOD(void, SBLaunchInfo, SetScriptedProcessDictionary,
-                     (lldb::SBStructuredData), dict);
+  LLDB_INSTRUMENT_VA(this, dict);
   if (!dict.IsValid() || !dict.m_impl_up)
     return;
 
diff --git a/lldb/source/API/SBLineEntry.cpp b/lldb/source/API/SBLineEntry.cpp
index 52623b6245346..28d12e65fdaf8 100644
--- a/lldb/source/API/SBLineEntry.cpp
+++ b/lldb/source/API/SBLineEntry.cpp
@@ -7,11 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBLineEntry.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Host/PosixApi.h"
 #include "lldb/Symbol/LineEntry.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/StreamString.h"
 
 #include <climits>
@@ -19,10 +19,10 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBLineEntry::SBLineEntry() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBLineEntry); }
+SBLineEntry::SBLineEntry() { LLDB_INSTRUMENT_VA(this); }
 
 SBLineEntry::SBLineEntry(const SBLineEntry &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBLineEntry, (const lldb::SBLineEntry &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
@@ -33,8 +33,7 @@ SBLineEntry::SBLineEntry(const lldb_private::LineEntry *lldb_object_ptr) {
 }
 
 const SBLineEntry &SBLineEntry::operator=(const SBLineEntry &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBLineEntry &,
-                     SBLineEntry, operator=,(const lldb::SBLineEntry &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -48,8 +47,7 @@ void SBLineEntry::SetLineEntry(const lldb_private::LineEntry &lldb_object_ref) {
 SBLineEntry::~SBLineEntry() = default;
 
 SBAddress SBLineEntry::GetStartAddress() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBAddress, SBLineEntry,
-                                   GetStartAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   SBAddress sb_address;
   if (m_opaque_up)
@@ -59,7 +57,7 @@ SBAddress SBLineEntry::GetStartAddress() const {
 }
 
 SBAddress SBLineEntry::GetEndAddress() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBAddress, SBLineEntry, GetEndAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   SBAddress sb_address;
   if (m_opaque_up) {
@@ -70,17 +68,17 @@ SBAddress SBLineEntry::GetEndAddress() const {
 }
 
 bool SBLineEntry::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBLineEntry, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBLineEntry::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBLineEntry, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up.get() && m_opaque_up->IsValid();
 }
 
 SBFileSpec SBLineEntry::GetFileSpec() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBLineEntry, GetFileSpec);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFileSpec sb_file_spec;
   if (m_opaque_up.get() && m_opaque_up->file)
@@ -90,7 +88,7 @@ SBFileSpec SBLineEntry::GetFileSpec() const {
 }
 
 uint32_t SBLineEntry::GetLine() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBLineEntry, GetLine);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t line = 0;
   if (m_opaque_up)
@@ -100,7 +98,7 @@ uint32_t SBLineEntry::GetLine() const {
 }
 
 uint32_t SBLineEntry::GetColumn() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBLineEntry, GetColumn);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up)
     return m_opaque_up->column;
@@ -108,8 +106,7 @@ uint32_t SBLineEntry::GetColumn() const {
 }
 
 void SBLineEntry::SetFileSpec(lldb::SBFileSpec filespec) {
-  LLDB_RECORD_METHOD(void, SBLineEntry, SetFileSpec, (lldb::SBFileSpec),
-                     filespec);
+  LLDB_INSTRUMENT_VA(this, filespec);
 
   if (filespec.IsValid())
     ref().file = filespec.ref();
@@ -117,20 +114,19 @@ void SBLineEntry::SetFileSpec(lldb::SBFileSpec filespec) {
     ref().file.Clear();
 }
 void SBLineEntry::SetLine(uint32_t line) {
-  LLDB_RECORD_METHOD(void, SBLineEntry, SetLine, (uint32_t), line);
+  LLDB_INSTRUMENT_VA(this, line);
 
   ref().line = line;
 }
 
 void SBLineEntry::SetColumn(uint32_t column) {
-  LLDB_RECORD_METHOD(void, SBLineEntry, SetColumn, (uint32_t), column);
+  LLDB_INSTRUMENT_VA(this, column);
 
   ref().line = column;
 }
 
 bool SBLineEntry::operator==(const SBLineEntry &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBLineEntry, operator==,(const lldb::SBLineEntry &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   lldb_private::LineEntry *lhs_ptr = m_opaque_up.get();
   lldb_private::LineEntry *rhs_ptr = rhs.m_opaque_up.get();
@@ -142,8 +138,7 @@ bool SBLineEntry::operator==(const SBLineEntry &rhs) const {
 }
 
 bool SBLineEntry::operator!=(const SBLineEntry &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBLineEntry, operator!=,(const lldb::SBLineEntry &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   lldb_private::LineEntry *lhs_ptr = m_opaque_up.get();
   lldb_private::LineEntry *rhs_ptr = rhs.m_opaque_up.get();
@@ -167,8 +162,7 @@ lldb_private::LineEntry &SBLineEntry::ref() {
 const lldb_private::LineEntry &SBLineEntry::ref() const { return *m_opaque_up; }
 
 bool SBLineEntry::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBLineEntry, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
diff --git a/lldb/source/API/SBListener.cpp b/lldb/source/API/SBListener.cpp
index 50e7abd951d0b..2ce17a5f521d7 100644
--- a/lldb/source/API/SBListener.cpp
+++ b/lldb/source/API/SBListener.cpp
@@ -7,34 +7,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBListener.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBBroadcaster.h"
 #include "lldb/API/SBDebugger.h"
 #include "lldb/API/SBEvent.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Core/Debugger.h"
 #include "lldb/Utility/Broadcaster.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Listener.h"
 #include "lldb/Utility/StreamString.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
-SBListener::SBListener() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBListener); }
+SBListener::SBListener() { LLDB_INSTRUMENT_VA(this); }
 
 SBListener::SBListener(const char *name)
     : m_opaque_sp(Listener::MakeListener(name)), m_unused_ptr(nullptr) {
-  LLDB_RECORD_CONSTRUCTOR(SBListener, (const char *), name);
+  LLDB_INSTRUMENT_VA(this, name);
 }
 
 SBListener::SBListener(const SBListener &rhs)
     : m_opaque_sp(rhs.m_opaque_sp), m_unused_ptr(nullptr) {
-  LLDB_RECORD_CONSTRUCTOR(SBListener, (const lldb::SBListener &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 const lldb::SBListener &SBListener::operator=(const lldb::SBListener &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBListener &,
-                     SBListener, operator=,(const lldb::SBListener &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_opaque_sp = rhs.m_opaque_sp;
@@ -49,18 +48,17 @@ SBListener::SBListener(const lldb::ListenerSP &listener_sp)
 SBListener::~SBListener() = default;
 
 bool SBListener::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBListener, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBListener::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBListener, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp != nullptr;
 }
 
 void SBListener::AddEvent(const SBEvent &event) {
-  LLDB_RECORD_METHOD(void, SBListener, AddEvent, (const lldb::SBEvent &),
-                     event);
+  LLDB_INSTRUMENT_VA(this, event);
 
   EventSP &event_sp = event.GetSP();
   if (event_sp)
@@ -68,7 +66,7 @@ void SBListener::AddEvent(const SBEvent &event) {
 }
 
 void SBListener::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBListener, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp)
     m_opaque_sp->Clear();
@@ -77,9 +75,7 @@ void SBListener::Clear() {
 uint32_t SBListener::StartListeningForEventClass(SBDebugger &debugger,
                                                  const char *broadcaster_class,
                                                  uint32_t event_mask) {
-  LLDB_RECORD_METHOD(uint32_t, SBListener, StartListeningForEventClass,
-                     (lldb::SBDebugger &, const char *, uint32_t), debugger,
-                     broadcaster_class, event_mask);
+  LLDB_INSTRUMENT_VA(this, debugger, broadcaster_class, event_mask);
 
   if (m_opaque_sp) {
     Debugger *lldb_debugger = debugger.get();
@@ -95,9 +91,7 @@ uint32_t SBListener::StartListeningForEventClass(SBDebugger &debugger,
 bool SBListener::StopListeningForEventClass(SBDebugger &debugger,
                                             const char *broadcaster_class,
                                             uint32_t event_mask) {
-  LLDB_RECORD_METHOD(bool, SBListener, StopListeningForEventClass,
-                     (lldb::SBDebugger &, const char *, uint32_t), debugger,
-                     broadcaster_class, event_mask);
+  LLDB_INSTRUMENT_VA(this, debugger, broadcaster_class, event_mask);
 
   if (m_opaque_sp) {
     Debugger *lldb_debugger = debugger.get();
@@ -112,9 +106,7 @@ bool SBListener::StopListeningForEventClass(SBDebugger &debugger,
 
 uint32_t SBListener::StartListeningForEvents(const SBBroadcaster &broadcaster,
                                              uint32_t event_mask) {
-  LLDB_RECORD_METHOD(uint32_t, SBListener, StartListeningForEvents,
-                     (const lldb::SBBroadcaster &, uint32_t), broadcaster,
-                     event_mask);
+  LLDB_INSTRUMENT_VA(this, broadcaster, event_mask);
 
   uint32_t acquired_event_mask = 0;
   if (m_opaque_sp && broadcaster.IsValid()) {
@@ -127,9 +119,7 @@ uint32_t SBListener::StartListeningForEvents(const SBBroadcaster &broadcaster,
 
 bool SBListener::StopListeningForEvents(const SBBroadcaster &broadcaster,
                                         uint32_t event_mask) {
-  LLDB_RECORD_METHOD(bool, SBListener, StopListeningForEvents,
-                     (const lldb::SBBroadcaster &, uint32_t), broadcaster,
-                     event_mask);
+  LLDB_INSTRUMENT_VA(this, broadcaster, event_mask);
 
   if (m_opaque_sp && broadcaster.IsValid()) {
     return m_opaque_sp->StopListeningForEvents(broadcaster.get(), event_mask);
@@ -138,8 +128,7 @@ bool SBListener::StopListeningForEvents(const SBBroadcaster &broadcaster,
 }
 
 bool SBListener::WaitForEvent(uint32_t timeout_secs, SBEvent &event) {
-  LLDB_RECORD_METHOD(bool, SBListener, WaitForEvent,
-                     (uint32_t, lldb::SBEvent &), timeout_secs, event);
+  LLDB_INSTRUMENT_VA(this, timeout_secs, event);
 
   bool success = false;
 
@@ -165,9 +154,7 @@ bool SBListener::WaitForEvent(uint32_t timeout_secs, SBEvent &event) {
 bool SBListener::WaitForEventForBroadcaster(uint32_t num_seconds,
                                             const SBBroadcaster &broadcaster,
                                             SBEvent &event) {
-  LLDB_RECORD_METHOD(bool, SBListener, WaitForEventForBroadcaster,
-                     (uint32_t, const lldb::SBBroadcaster &, lldb::SBEvent &),
-                     num_seconds, broadcaster, event);
+  LLDB_INSTRUMENT_VA(this, num_seconds, broadcaster, event);
 
   if (m_opaque_sp && broadcaster.IsValid()) {
     Timeout<std::micro> timeout(llvm::None);
@@ -187,10 +174,7 @@ bool SBListener::WaitForEventForBroadcaster(uint32_t num_seconds,
 bool SBListener::WaitForEventForBroadcasterWithType(
     uint32_t num_seconds, const SBBroadcaster &broadcaster,
     uint32_t event_type_mask, SBEvent &event) {
-  LLDB_RECORD_METHOD(
-      bool, SBListener, WaitForEventForBroadcasterWithType,
-      (uint32_t, const lldb::SBBroadcaster &, uint32_t, lldb::SBEvent &),
-      num_seconds, broadcaster, event_type_mask, event);
+  LLDB_INSTRUMENT_VA(this, num_seconds, broadcaster, event_type_mask, event);
 
   if (m_opaque_sp && broadcaster.IsValid()) {
     Timeout<std::micro> timeout(llvm::None);
@@ -208,8 +192,7 @@ bool SBListener::WaitForEventForBroadcasterWithType(
 }
 
 bool SBListener::PeekAtNextEvent(SBEvent &event) {
-  LLDB_RECORD_METHOD(bool, SBListener, PeekAtNextEvent, (lldb::SBEvent &),
-                     event);
+  LLDB_INSTRUMENT_VA(this, event);
 
   if (m_opaque_sp) {
     event.reset(m_opaque_sp->PeekAtNextEvent());
@@ -221,9 +204,7 @@ bool SBListener::PeekAtNextEvent(SBEvent &event) {
 
 bool SBListener::PeekAtNextEventForBroadcaster(const SBBroadcaster &broadcaster,
                                                SBEvent &event) {
-  LLDB_RECORD_METHOD(bool, SBListener, PeekAtNextEventForBroadcaster,
-                     (const lldb::SBBroadcaster &, lldb::SBEvent &),
-                     broadcaster, event);
+  LLDB_INSTRUMENT_VA(this, broadcaster, event);
 
   if (m_opaque_sp && broadcaster.IsValid()) {
     event.reset(m_opaque_sp->PeekAtNextEventForBroadcaster(broadcaster.get()));
@@ -236,9 +217,7 @@ bool SBListener::PeekAtNextEventForBroadcaster(const SBBroadcaster &broadcaster,
 bool SBListener::PeekAtNextEventForBroadcasterWithType(
     const SBBroadcaster &broadcaster, uint32_t event_type_mask,
     SBEvent &event) {
-  LLDB_RECORD_METHOD(bool, SBListener, PeekAtNextEventForBroadcasterWithType,
-                     (const lldb::SBBroadcaster &, uint32_t, lldb::SBEvent &),
-                     broadcaster, event_type_mask, event);
+  LLDB_INSTRUMENT_VA(this, broadcaster, event_type_mask, event);
 
   if (m_opaque_sp && broadcaster.IsValid()) {
     event.reset(m_opaque_sp->PeekAtNextEventForBroadcasterWithType(
@@ -250,7 +229,7 @@ bool SBListener::PeekAtNextEventForBroadcasterWithType(
 }
 
 bool SBListener::GetNextEvent(SBEvent &event) {
-  LLDB_RECORD_METHOD(bool, SBListener, GetNextEvent, (lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(this, event);
 
   if (m_opaque_sp) {
     EventSP event_sp;
@@ -265,9 +244,7 @@ bool SBListener::GetNextEvent(SBEvent &event) {
 
 bool SBListener::GetNextEventForBroadcaster(const SBBroadcaster &broadcaster,
                                             SBEvent &event) {
-  LLDB_RECORD_METHOD(bool, SBListener, GetNextEventForBroadcaster,
-                     (const lldb::SBBroadcaster &, lldb::SBEvent &),
-                     broadcaster, event);
+  LLDB_INSTRUMENT_VA(this, broadcaster, event);
 
   if (m_opaque_sp && broadcaster.IsValid()) {
     EventSP event_sp;
@@ -284,9 +261,7 @@ bool SBListener::GetNextEventForBroadcaster(const SBBroadcaster &broadcaster,
 bool SBListener::GetNextEventForBroadcasterWithType(
     const SBBroadcaster &broadcaster, uint32_t event_type_mask,
     SBEvent &event) {
-  LLDB_RECORD_METHOD(bool, SBListener, GetNextEventForBroadcasterWithType,
-                     (const lldb::SBBroadcaster &, uint32_t, lldb::SBEvent &),
-                     broadcaster, event_type_mask, event);
+  LLDB_INSTRUMENT_VA(this, broadcaster, event_type_mask, event);
 
   if (m_opaque_sp && broadcaster.IsValid()) {
     EventSP event_sp;
@@ -302,8 +277,7 @@ bool SBListener::GetNextEventForBroadcasterWithType(
 }
 
 bool SBListener::HandleBroadcastEvent(const SBEvent &event) {
-  LLDB_RECORD_METHOD(bool, SBListener, HandleBroadcastEvent,
-                     (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(this, event);
 
   if (m_opaque_sp)
     return m_opaque_sp->HandleBroadcastEvent(event.GetSP());
diff --git a/lldb/source/API/SBMemoryRegionInfo.cpp b/lldb/source/API/SBMemoryRegionInfo.cpp
index 1a0fb7c74230c..7d9db478dde17 100644
--- a/lldb/source/API/SBMemoryRegionInfo.cpp
+++ b/lldb/source/API/SBMemoryRegionInfo.cpp
@@ -7,29 +7,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBMemoryRegionInfo.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBDefines.h"
 #include "lldb/API/SBError.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Target/MemoryRegionInfo.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/StreamString.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
 SBMemoryRegionInfo::SBMemoryRegionInfo() : m_opaque_up(new MemoryRegionInfo()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBMemoryRegionInfo);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBMemoryRegionInfo::SBMemoryRegionInfo(const char *name, lldb::addr_t begin,
                                        lldb::addr_t end, uint32_t permissions,
                                        bool mapped, bool stack_memory)
     : SBMemoryRegionInfo() {
-  LLDB_RECORD_CONSTRUCTOR(
-      SBMemoryRegionInfo,
-      (const char *, lldb::addr_t, lldb::addr_t, uint32_t, bool, bool), name,
-      begin, end, permissions, mapped, stack_memory);
+  LLDB_INSTRUMENT_VA(this, name, begin, end, permissions, mapped, stack_memory);
   m_opaque_up->SetName(name);
   m_opaque_up->GetRange().SetRangeBase(begin);
   m_opaque_up->GetRange().SetRangeEnd(end);
@@ -47,16 +44,13 @@ SBMemoryRegionInfo::SBMemoryRegionInfo(const MemoryRegionInfo *lldb_object_ptr)
 }
 
 SBMemoryRegionInfo::SBMemoryRegionInfo(const SBMemoryRegionInfo &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBMemoryRegionInfo,
-                          (const lldb::SBMemoryRegionInfo &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
   m_opaque_up = clone(rhs.m_opaque_up);
 }
 
 const SBMemoryRegionInfo &SBMemoryRegionInfo::
 operator=(const SBMemoryRegionInfo &rhs) {
-  LLDB_RECORD_METHOD(
-      const lldb::SBMemoryRegionInfo &,
-      SBMemoryRegionInfo, operator=,(const lldb::SBMemoryRegionInfo &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -66,23 +60,19 @@ operator=(const SBMemoryRegionInfo &rhs) {
 SBMemoryRegionInfo::~SBMemoryRegionInfo() = default;
 
 void SBMemoryRegionInfo::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBMemoryRegionInfo, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_up->Clear();
 }
 
 bool SBMemoryRegionInfo::operator==(const SBMemoryRegionInfo &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBMemoryRegionInfo, operator==,(const lldb::SBMemoryRegionInfo &),
-      rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return ref() == rhs.ref();
 }
 
 bool SBMemoryRegionInfo::operator!=(const SBMemoryRegionInfo &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBMemoryRegionInfo, operator!=,(const lldb::SBMemoryRegionInfo &),
-      rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return ref() != rhs.ref();
 }
@@ -92,55 +82,55 @@ MemoryRegionInfo &SBMemoryRegionInfo::ref() { return *m_opaque_up; }
 const MemoryRegionInfo &SBMemoryRegionInfo::ref() const { return *m_opaque_up; }
 
 lldb::addr_t SBMemoryRegionInfo::GetRegionBase() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBMemoryRegionInfo, GetRegionBase);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetRange().GetRangeBase();
 }
 
 lldb::addr_t SBMemoryRegionInfo::GetRegionEnd() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBMemoryRegionInfo, GetRegionEnd);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetRange().GetRangeEnd();
 }
 
 bool SBMemoryRegionInfo::IsReadable() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBMemoryRegionInfo, IsReadable);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetReadable() == MemoryRegionInfo::eYes;
 }
 
 bool SBMemoryRegionInfo::IsWritable() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBMemoryRegionInfo, IsWritable);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetWritable() == MemoryRegionInfo::eYes;
 }
 
 bool SBMemoryRegionInfo::IsExecutable() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBMemoryRegionInfo, IsExecutable);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetExecutable() == MemoryRegionInfo::eYes;
 }
 
 bool SBMemoryRegionInfo::IsMapped() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBMemoryRegionInfo, IsMapped);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetMapped() == MemoryRegionInfo::eYes;
 }
 
 const char *SBMemoryRegionInfo::GetName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBMemoryRegionInfo, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetName().AsCString();
 }
 
 bool SBMemoryRegionInfo::HasDirtyMemoryPageList() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBMemoryRegionInfo, HasDirtyMemoryPageList);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetDirtyPageList().hasValue();
 }
 
 uint32_t SBMemoryRegionInfo::GetNumDirtyPages() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBMemoryRegionInfo, GetNumDirtyPages);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t num_dirty_pages = 0;
   llvm::Optional<std::vector<addr_t>> dirty_page_list =
@@ -152,8 +142,7 @@ uint32_t SBMemoryRegionInfo::GetNumDirtyPages() {
 }
 
 addr_t SBMemoryRegionInfo::GetDirtyPageAddressAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::addr_t, SBMemoryRegionInfo,
-                     GetDirtyPageAddressAtIndex, (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   addr_t dirty_page_addr = LLDB_INVALID_ADDRESS;
   const llvm::Optional<std::vector<addr_t>> &dirty_page_list =
@@ -165,14 +154,13 @@ addr_t SBMemoryRegionInfo::GetDirtyPageAddressAtIndex(uint32_t idx) {
 }
 
 int SBMemoryRegionInfo::GetPageSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(int, SBMemoryRegionInfo, GetPageSize);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetPageSize();
 }
 
 bool SBMemoryRegionInfo::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBMemoryRegionInfo, GetDescription,
-                     (lldb::SBStream &), description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
   const addr_t load_addr = m_opaque_up->GetRange().base;
diff --git a/lldb/source/API/SBMemoryRegionInfoList.cpp b/lldb/source/API/SBMemoryRegionInfoList.cpp
index 2de0f07006664..39dee86dc3007 100644
--- a/lldb/source/API/SBMemoryRegionInfoList.cpp
+++ b/lldb/source/API/SBMemoryRegionInfoList.cpp
@@ -7,10 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBMemoryRegionInfoList.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBMemoryRegionInfo.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Target/MemoryRegionInfo.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include <vector>
 
@@ -83,24 +83,20 @@ const MemoryRegionInfos &SBMemoryRegionInfoList::ref() const {
 
 SBMemoryRegionInfoList::SBMemoryRegionInfoList()
     : m_opaque_up(new MemoryRegionInfoListImpl()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBMemoryRegionInfoList);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBMemoryRegionInfoList::SBMemoryRegionInfoList(
     const SBMemoryRegionInfoList &rhs)
     : m_opaque_up(new MemoryRegionInfoListImpl(*rhs.m_opaque_up)) {
-  LLDB_RECORD_CONSTRUCTOR(SBMemoryRegionInfoList,
-                          (const lldb::SBMemoryRegionInfoList &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBMemoryRegionInfoList::~SBMemoryRegionInfoList() = default;
 
 const SBMemoryRegionInfoList &SBMemoryRegionInfoList::
 operator=(const SBMemoryRegionInfoList &rhs) {
-  LLDB_RECORD_METHOD(
-      const lldb::SBMemoryRegionInfoList &,
-      SBMemoryRegionInfoList, operator=,(const lldb::SBMemoryRegionInfoList &),
-      rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     *m_opaque_up = *rhs.m_opaque_up;
@@ -109,44 +105,39 @@ operator=(const SBMemoryRegionInfoList &rhs) {
 }
 
 uint32_t SBMemoryRegionInfoList::GetSize() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBMemoryRegionInfoList, GetSize);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetSize();
 }
 
 bool SBMemoryRegionInfoList::GetMemoryRegionContainingAddress(
     lldb::addr_t addr, SBMemoryRegionInfo &region_info) {
-  LLDB_RECORD_METHOD(
-      bool, SBMemoryRegionInfoList, GetMemoryRegionContainingAddress,
-      (lldb::addr_t, lldb::SBMemoryRegionInfo &), addr, region_info);
+  LLDB_INSTRUMENT_VA(this, addr, region_info);
 
   return m_opaque_up->GetMemoryRegionContainingAddress(addr, region_info.ref());
 }
 
 bool SBMemoryRegionInfoList::GetMemoryRegionAtIndex(
     uint32_t idx, SBMemoryRegionInfo &region_info) {
-  LLDB_RECORD_METHOD(bool, SBMemoryRegionInfoList, GetMemoryRegionAtIndex,
-                     (uint32_t, lldb::SBMemoryRegionInfo &), idx, region_info);
+  LLDB_INSTRUMENT_VA(this, idx, region_info);
 
   return m_opaque_up->GetMemoryRegionInfoAtIndex(idx, region_info.ref());
 }
 
 void SBMemoryRegionInfoList::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBMemoryRegionInfoList, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_up->Clear();
 }
 
 void SBMemoryRegionInfoList::Append(SBMemoryRegionInfo &sb_region) {
-  LLDB_RECORD_METHOD(void, SBMemoryRegionInfoList, Append,
-                     (lldb::SBMemoryRegionInfo &), sb_region);
+  LLDB_INSTRUMENT_VA(this, sb_region);
 
   m_opaque_up->Append(sb_region.ref());
 }
 
 void SBMemoryRegionInfoList::Append(SBMemoryRegionInfoList &sb_region_list) {
-  LLDB_RECORD_METHOD(void, SBMemoryRegionInfoList, Append,
-                     (lldb::SBMemoryRegionInfoList &), sb_region_list);
+  LLDB_INSTRUMENT_VA(this, sb_region_list);
 
   m_opaque_up->Append(*sb_region_list);
 }
diff --git a/lldb/source/API/SBModule.cpp b/lldb/source/API/SBModule.cpp
index 366c920f5756a..2483495b97db0 100644
--- a/lldb/source/API/SBModule.cpp
+++ b/lldb/source/API/SBModule.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBModule.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBAddress.h"
 #include "lldb/API/SBFileSpec.h"
 #include "lldb/API/SBModuleSpec.h"
@@ -24,17 +23,18 @@
 #include "lldb/Symbol/TypeSystem.h"
 #include "lldb/Symbol/VariableList.h"
 #include "lldb/Target/Target.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/StreamString.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
-SBModule::SBModule() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBModule); }
+SBModule::SBModule() { LLDB_INSTRUMENT_VA(this); }
 
 SBModule::SBModule(const lldb::ModuleSP &module_sp) : m_opaque_sp(module_sp) {}
 
 SBModule::SBModule(const SBModuleSpec &module_spec) {
-  LLDB_RECORD_CONSTRUCTOR(SBModule, (const lldb::SBModuleSpec &), module_spec);
+  LLDB_INSTRUMENT_VA(this, module_spec);
 
   ModuleSP module_sp;
   Status error = ModuleList::GetSharedModule(
@@ -44,12 +44,11 @@ SBModule::SBModule(const SBModuleSpec &module_spec) {
 }
 
 SBModule::SBModule(const SBModule &rhs) : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBModule, (const lldb::SBModule &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBModule::SBModule(lldb::SBProcess &process, lldb::addr_t header_addr) {
-  LLDB_RECORD_CONSTRUCTOR(SBModule, (lldb::SBProcess &, lldb::addr_t), process,
-                          header_addr);
+  LLDB_INSTRUMENT_VA(this, process, header_addr);
 
   ProcessSP process_sp(process.GetSP());
   if (process_sp) {
@@ -64,8 +63,7 @@ SBModule::SBModule(lldb::SBProcess &process, lldb::addr_t header_addr) {
 }
 
 const SBModule &SBModule::operator=(const SBModule &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBModule &, SBModule, operator=,
-                     (const lldb::SBModule &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_sp = rhs.m_opaque_sp;
@@ -75,23 +73,23 @@ const SBModule &SBModule::operator=(const SBModule &rhs) {
 SBModule::~SBModule() = default;
 
 bool SBModule::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBModule, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBModule::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBModule, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get() != nullptr;
 }
 
 void SBModule::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBModule, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_sp.reset();
 }
 
 SBFileSpec SBModule::GetFileSpec() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBModule, GetFileSpec);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFileSpec file_spec;
   ModuleSP module_sp(GetSP());
@@ -102,8 +100,7 @@ SBFileSpec SBModule::GetFileSpec() const {
 }
 
 lldb::SBFileSpec SBModule::GetPlatformFileSpec() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBModule,
-                                   GetPlatformFileSpec);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFileSpec file_spec;
   ModuleSP module_sp(GetSP());
@@ -114,8 +111,7 @@ lldb::SBFileSpec SBModule::GetPlatformFileSpec() const {
 }
 
 bool SBModule::SetPlatformFileSpec(const lldb::SBFileSpec &platform_file) {
-  LLDB_RECORD_METHOD(bool, SBModule, SetPlatformFileSpec,
-                     (const lldb::SBFileSpec &), platform_file);
+  LLDB_INSTRUMENT_VA(this, platform_file);
 
   bool result = false;
 
@@ -129,8 +125,7 @@ bool SBModule::SetPlatformFileSpec(const lldb::SBFileSpec &platform_file) {
 }
 
 lldb::SBFileSpec SBModule::GetRemoteInstallFileSpec() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBModule,
-                             GetRemoteInstallFileSpec);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFileSpec sb_file_spec;
   ModuleSP module_sp(GetSP());
@@ -140,8 +135,7 @@ lldb::SBFileSpec SBModule::GetRemoteInstallFileSpec() {
 }
 
 bool SBModule::SetRemoteInstallFileSpec(lldb::SBFileSpec &file) {
-  LLDB_RECORD_METHOD(bool, SBModule, SetRemoteInstallFileSpec,
-                     (lldb::SBFileSpec &), file);
+  LLDB_INSTRUMENT_VA(this, file);
 
   ModuleSP module_sp(GetSP());
   if (module_sp) {
@@ -152,7 +146,7 @@ bool SBModule::SetRemoteInstallFileSpec(lldb::SBFileSpec &file) {
 }
 
 const uint8_t *SBModule::GetUUIDBytes() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const uint8_t *, SBModule, GetUUIDBytes);
+  LLDB_INSTRUMENT_VA(this);
 
   const uint8_t *uuid_bytes = nullptr;
   ModuleSP module_sp(GetSP());
@@ -163,7 +157,7 @@ const uint8_t *SBModule::GetUUIDBytes() const {
 }
 
 const char *SBModule::GetUUIDString() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBModule, GetUUIDString);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *uuid_cstr = nullptr;
   ModuleSP module_sp(GetSP());
@@ -183,8 +177,7 @@ const char *SBModule::GetUUIDString() const {
 }
 
 bool SBModule::operator==(const SBModule &rhs) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBModule, operator==, (const lldb::SBModule &),
-                           rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (m_opaque_sp)
     return m_opaque_sp.get() == rhs.m_opaque_sp.get();
@@ -192,8 +185,7 @@ bool SBModule::operator==(const SBModule &rhs) const {
 }
 
 bool SBModule::operator!=(const SBModule &rhs) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBModule, operator!=, (const lldb::SBModule &),
-                           rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (m_opaque_sp)
     return m_opaque_sp.get() != rhs.m_opaque_sp.get();
@@ -205,8 +197,7 @@ ModuleSP SBModule::GetSP() const { return m_opaque_sp; }
 void SBModule::SetSP(const ModuleSP &module_sp) { m_opaque_sp = module_sp; }
 
 SBAddress SBModule::ResolveFileAddress(lldb::addr_t vm_addr) {
-  LLDB_RECORD_METHOD(lldb::SBAddress, SBModule, ResolveFileAddress,
-                     (lldb::addr_t), vm_addr);
+  LLDB_INSTRUMENT_VA(this, vm_addr);
 
   lldb::SBAddress sb_addr;
   ModuleSP module_sp(GetSP());
@@ -221,9 +212,7 @@ SBAddress SBModule::ResolveFileAddress(lldb::addr_t vm_addr) {
 SBSymbolContext
 SBModule::ResolveSymbolContextForAddress(const SBAddress &addr,
                                          uint32_t resolve_scope) {
-  LLDB_RECORD_METHOD(lldb::SBSymbolContext, SBModule,
-                     ResolveSymbolContextForAddress,
-                     (const lldb::SBAddress &, uint32_t), addr, resolve_scope);
+  LLDB_INSTRUMENT_VA(this, addr, resolve_scope);
 
   SBSymbolContext sb_sc;
   ModuleSP module_sp(GetSP());
@@ -234,8 +223,7 @@ SBModule::ResolveSymbolContextForAddress(const SBAddress &addr,
 }
 
 bool SBModule::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBModule, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
@@ -249,7 +237,7 @@ bool SBModule::GetDescription(SBStream &description) {
 }
 
 uint32_t SBModule::GetNumCompileUnits() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBModule, GetNumCompileUnits);
+  LLDB_INSTRUMENT_VA(this);
 
   ModuleSP module_sp(GetSP());
   if (module_sp) {
@@ -259,8 +247,7 @@ uint32_t SBModule::GetNumCompileUnits() {
 }
 
 SBCompileUnit SBModule::GetCompileUnitAtIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(lldb::SBCompileUnit, SBModule, GetCompileUnitAtIndex,
-                     (uint32_t), index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   SBCompileUnit sb_cu;
   ModuleSP module_sp(GetSP());
@@ -272,8 +259,7 @@ SBCompileUnit SBModule::GetCompileUnitAtIndex(uint32_t index) {
 }
 
 SBSymbolContextList SBModule::FindCompileUnits(const SBFileSpec &sb_file_spec) {
-  LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBModule, FindCompileUnits,
-                     (const lldb::SBFileSpec &), sb_file_spec);
+  LLDB_INSTRUMENT_VA(this, sb_file_spec);
 
   SBSymbolContextList sb_sc_list;
   const ModuleSP module_sp(GetSP());
@@ -290,7 +276,7 @@ static Symtab *GetUnifiedSymbolTable(const lldb::ModuleSP &module_sp) {
 }
 
 size_t SBModule::GetNumSymbols() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBModule, GetNumSymbols);
+  LLDB_INSTRUMENT_VA(this);
 
   ModuleSP module_sp(GetSP());
   if (Symtab *symtab = GetUnifiedSymbolTable(module_sp))
@@ -299,7 +285,7 @@ size_t SBModule::GetNumSymbols() {
 }
 
 SBSymbol SBModule::GetSymbolAtIndex(size_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBSymbol, SBModule, GetSymbolAtIndex, (size_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBSymbol sb_symbol;
   ModuleSP module_sp(GetSP());
@@ -311,8 +297,7 @@ SBSymbol SBModule::GetSymbolAtIndex(size_t idx) {
 
 lldb::SBSymbol SBModule::FindSymbol(const char *name,
                                     lldb::SymbolType symbol_type) {
-  LLDB_RECORD_METHOD(lldb::SBSymbol, SBModule, FindSymbol,
-                     (const char *, lldb::SymbolType), name, symbol_type);
+  LLDB_INSTRUMENT_VA(this, name, symbol_type);
 
   SBSymbol sb_symbol;
   if (name && name[0]) {
@@ -328,8 +313,7 @@ lldb::SBSymbol SBModule::FindSymbol(const char *name,
 
 lldb::SBSymbolContextList SBModule::FindSymbols(const char *name,
                                                 lldb::SymbolType symbol_type) {
-  LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBModule, FindSymbols,
-                     (const char *, lldb::SymbolType), name, symbol_type);
+  LLDB_INSTRUMENT_VA(this, name, symbol_type);
 
   SBSymbolContextList sb_sc_list;
   if (name && name[0]) {
@@ -356,7 +340,7 @@ lldb::SBSymbolContextList SBModule::FindSymbols(const char *name,
 }
 
 size_t SBModule::GetNumSections() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBModule, GetNumSections);
+  LLDB_INSTRUMENT_VA(this);
 
   ModuleSP module_sp(GetSP());
   if (module_sp) {
@@ -370,8 +354,7 @@ size_t SBModule::GetNumSections() {
 }
 
 SBSection SBModule::GetSectionAtIndex(size_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBSection, SBModule, GetSectionAtIndex, (size_t),
-                     idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBSection sb_section;
   ModuleSP module_sp(GetSP());
@@ -388,8 +371,7 @@ SBSection SBModule::GetSectionAtIndex(size_t idx) {
 
 lldb::SBSymbolContextList SBModule::FindFunctions(const char *name,
                                                   uint32_t name_type_mask) {
-  LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBModule, FindFunctions,
-                     (const char *, uint32_t), name, name_type_mask);
+  LLDB_INSTRUMENT_VA(this, name, name_type_mask);
 
   lldb::SBSymbolContextList sb_sc_list;
   ModuleSP module_sp(GetSP());
@@ -407,9 +389,7 @@ lldb::SBSymbolContextList SBModule::FindFunctions(const char *name,
 
 SBValueList SBModule::FindGlobalVariables(SBTarget &target, const char *name,
                                           uint32_t max_matches) {
-  LLDB_RECORD_METHOD(lldb::SBValueList, SBModule, FindGlobalVariables,
-                     (lldb::SBTarget &, const char *, uint32_t), target, name,
-                     max_matches);
+  LLDB_INSTRUMENT_VA(this, target, name, max_matches);
 
   SBValueList sb_value_list;
   ModuleSP module_sp(GetSP());
@@ -431,8 +411,7 @@ SBValueList SBModule::FindGlobalVariables(SBTarget &target, const char *name,
 
 lldb::SBValue SBModule::FindFirstGlobalVariable(lldb::SBTarget &target,
                                                 const char *name) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBModule, FindFirstGlobalVariable,
-                     (lldb::SBTarget &, const char *), target, name);
+  LLDB_INSTRUMENT_VA(this, target, name);
 
   SBValueList sb_value_list(FindGlobalVariables(target, name, 1));
   if (sb_value_list.IsValid() && sb_value_list.GetSize() > 0)
@@ -441,8 +420,7 @@ lldb::SBValue SBModule::FindFirstGlobalVariable(lldb::SBTarget &target,
 }
 
 lldb::SBType SBModule::FindFirstType(const char *name_cstr) {
-  LLDB_RECORD_METHOD(lldb::SBType, SBModule, FindFirstType, (const char *),
-                     name_cstr);
+  LLDB_INSTRUMENT_VA(this, name_cstr);
 
   SBType sb_type;
   ModuleSP module_sp(GetSP());
@@ -467,8 +445,7 @@ lldb::SBType SBModule::FindFirstType(const char *name_cstr) {
 }
 
 lldb::SBType SBModule::GetBasicType(lldb::BasicType type) {
-  LLDB_RECORD_METHOD(lldb::SBType, SBModule, GetBasicType, (lldb::BasicType),
-                     type);
+  LLDB_INSTRUMENT_VA(this, type);
 
   ModuleSP module_sp(GetSP());
   if (module_sp) {
@@ -484,8 +461,7 @@ lldb::SBType SBModule::GetBasicType(lldb::BasicType type) {
 }
 
 lldb::SBTypeList SBModule::FindTypes(const char *type) {
-  LLDB_RECORD_METHOD(lldb::SBTypeList, SBModule, FindTypes, (const char *),
-                     type);
+  LLDB_INSTRUMENT_VA(this, type);
 
   SBTypeList retval;
 
@@ -521,8 +497,7 @@ lldb::SBTypeList SBModule::FindTypes(const char *type) {
 }
 
 lldb::SBType SBModule::GetTypeByID(lldb::user_id_t uid) {
-  LLDB_RECORD_METHOD(lldb::SBType, SBModule, GetTypeByID, (lldb::user_id_t),
-                     uid);
+  LLDB_INSTRUMENT_VA(this, uid);
 
   ModuleSP module_sp(GetSP());
   if (module_sp) {
@@ -536,8 +511,7 @@ lldb::SBType SBModule::GetTypeByID(lldb::user_id_t uid) {
 }
 
 lldb::SBTypeList SBModule::GetTypes(uint32_t type_mask) {
-  LLDB_RECORD_METHOD(lldb::SBTypeList, SBModule, GetTypes, (uint32_t),
-                     type_mask);
+  LLDB_INSTRUMENT_VA(this, type_mask);
 
   SBTypeList sb_type_list;
 
@@ -556,8 +530,7 @@ lldb::SBTypeList SBModule::GetTypes(uint32_t type_mask) {
 }
 
 SBSection SBModule::FindSection(const char *sect_name) {
-  LLDB_RECORD_METHOD(lldb::SBSection, SBModule, FindSection, (const char *),
-                     sect_name);
+  LLDB_INSTRUMENT_VA(this, sect_name);
 
   SBSection sb_section;
 
@@ -578,7 +551,7 @@ SBSection SBModule::FindSection(const char *sect_name) {
 }
 
 lldb::ByteOrder SBModule::GetByteOrder() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::ByteOrder, SBModule, GetByteOrder);
+  LLDB_INSTRUMENT_VA(this);
 
   ModuleSP module_sp(GetSP());
   if (module_sp)
@@ -587,7 +560,7 @@ lldb::ByteOrder SBModule::GetByteOrder() {
 }
 
 const char *SBModule::GetTriple() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBModule, GetTriple);
+  LLDB_INSTRUMENT_VA(this);
 
   ModuleSP module_sp(GetSP());
   if (module_sp) {
@@ -602,7 +575,7 @@ const char *SBModule::GetTriple() {
 }
 
 uint32_t SBModule::GetAddressByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBModule, GetAddressByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   ModuleSP module_sp(GetSP());
   if (module_sp)
@@ -611,8 +584,7 @@ uint32_t SBModule::GetAddressByteSize() {
 }
 
 uint32_t SBModule::GetVersion(uint32_t *versions, uint32_t num_versions) {
-  LLDB_RECORD_METHOD(uint32_t, SBModule, GetVersion, (uint32_t *, uint32_t),
-                     versions, num_versions);
+  LLDB_INSTRUMENT_VA(this, versions, num_versions);
 
   llvm::VersionTuple version;
   if (ModuleSP module_sp = GetSP())
@@ -640,8 +612,7 @@ uint32_t SBModule::GetVersion(uint32_t *versions, uint32_t num_versions) {
 }
 
 lldb::SBFileSpec SBModule::GetSymbolFileSpec() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBFileSpec, SBModule,
-                                   GetSymbolFileSpec);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBFileSpec sb_file_spec;
   ModuleSP module_sp(GetSP());
@@ -653,8 +624,7 @@ lldb::SBFileSpec SBModule::GetSymbolFileSpec() const {
 }
 
 lldb::SBAddress SBModule::GetObjectFileHeaderAddress() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBAddress, SBModule,
-                                   GetObjectFileHeaderAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBAddress sb_addr;
   ModuleSP module_sp(GetSP());
@@ -667,8 +637,7 @@ lldb::SBAddress SBModule::GetObjectFileHeaderAddress() const {
 }
 
 lldb::SBAddress SBModule::GetObjectFileEntryPointAddress() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBAddress, SBModule,
-                                   GetObjectFileEntryPointAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBAddress sb_addr;
   ModuleSP module_sp(GetSP());
@@ -681,15 +650,13 @@ lldb::SBAddress SBModule::GetObjectFileEntryPointAddress() const {
 }
 
 uint32_t SBModule::GetNumberAllocatedModules() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(uint32_t, SBModule,
-                                    GetNumberAllocatedModules);
+  LLDB_INSTRUMENT();
 
   return Module::GetNumberAllocatedModules();
 }
 
 void SBModule::GarbageCollectAllocatedModules() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(void, SBModule,
-                                    GarbageCollectAllocatedModules);
+  LLDB_INSTRUMENT();
 
   const bool mandatory = false;
   ModuleList::RemoveOrphanSharedModules(mandatory);
diff --git a/lldb/source/API/SBModuleSpec.cpp b/lldb/source/API/SBModuleSpec.cpp
index ce2bb3556d4df..7deba8e971f87 100644
--- a/lldb/source/API/SBModuleSpec.cpp
+++ b/lldb/source/API/SBModuleSpec.cpp
@@ -7,31 +7,30 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBModuleSpec.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Core/Module.h"
 #include "lldb/Core/ModuleSpec.h"
 #include "lldb/Host/Host.h"
 #include "lldb/Symbol/ObjectFile.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Stream.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
 SBModuleSpec::SBModuleSpec() : m_opaque_up(new lldb_private::ModuleSpec()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBModuleSpec);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBModuleSpec::SBModuleSpec(const SBModuleSpec &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBModuleSpec, (const lldb::SBModuleSpec &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
 
 const SBModuleSpec &SBModuleSpec::operator=(const SBModuleSpec &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBModuleSpec &,
-                     SBModuleSpec, operator=,(const lldb::SBModuleSpec &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -41,76 +40,72 @@ const SBModuleSpec &SBModuleSpec::operator=(const SBModuleSpec &rhs) {
 SBModuleSpec::~SBModuleSpec() = default;
 
 bool SBModuleSpec::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBModuleSpec, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBModuleSpec::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBModuleSpec, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->operator bool();
 }
 
 void SBModuleSpec::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBModuleSpec, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_up->Clear();
 }
 
 SBFileSpec SBModuleSpec::GetFileSpec() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBModuleSpec, GetFileSpec);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFileSpec sb_spec(m_opaque_up->GetFileSpec());
   return sb_spec;
 }
 
 void SBModuleSpec::SetFileSpec(const lldb::SBFileSpec &sb_spec) {
-  LLDB_RECORD_METHOD(void, SBModuleSpec, SetFileSpec,
-                     (const lldb::SBFileSpec &), sb_spec);
+  LLDB_INSTRUMENT_VA(this, sb_spec);
 
   m_opaque_up->GetFileSpec() = *sb_spec;
 }
 
 lldb::SBFileSpec SBModuleSpec::GetPlatformFileSpec() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBModuleSpec,
-                             GetPlatformFileSpec);
+  LLDB_INSTRUMENT_VA(this);
 
   return SBFileSpec(m_opaque_up->GetPlatformFileSpec());
 }
 
 void SBModuleSpec::SetPlatformFileSpec(const lldb::SBFileSpec &sb_spec) {
-  LLDB_RECORD_METHOD(void, SBModuleSpec, SetPlatformFileSpec,
-                     (const lldb::SBFileSpec &), sb_spec);
+  LLDB_INSTRUMENT_VA(this, sb_spec);
 
   m_opaque_up->GetPlatformFileSpec() = *sb_spec;
 }
 
 lldb::SBFileSpec SBModuleSpec::GetSymbolFileSpec() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBModuleSpec, GetSymbolFileSpec);
+  LLDB_INSTRUMENT_VA(this);
 
   return SBFileSpec(m_opaque_up->GetSymbolFileSpec());
 }
 
 void SBModuleSpec::SetSymbolFileSpec(const lldb::SBFileSpec &sb_spec) {
-  LLDB_RECORD_METHOD(void, SBModuleSpec, SetSymbolFileSpec,
-                     (const lldb::SBFileSpec &), sb_spec);
+  LLDB_INSTRUMENT_VA(this, sb_spec);
 
   m_opaque_up->GetSymbolFileSpec() = *sb_spec;
 }
 
 const char *SBModuleSpec::GetObjectName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBModuleSpec, GetObjectName);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetObjectName().GetCString();
 }
 
 void SBModuleSpec::SetObjectName(const char *name) {
-  LLDB_RECORD_METHOD(void, SBModuleSpec, SetObjectName, (const char *), name);
+  LLDB_INSTRUMENT_VA(this, name);
 
   m_opaque_up->GetObjectName().SetCString(name);
 }
 
 const char *SBModuleSpec::GetTriple() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBModuleSpec, GetTriple);
+  LLDB_INSTRUMENT_VA(this);
 
   std::string triple(m_opaque_up->GetArchitecture().GetTriple().str());
   // Unique the string so we don't run into ownership issues since the const
@@ -121,48 +116,46 @@ const char *SBModuleSpec::GetTriple() {
 }
 
 void SBModuleSpec::SetTriple(const char *triple) {
-  LLDB_RECORD_METHOD(void, SBModuleSpec, SetTriple, (const char *), triple);
+  LLDB_INSTRUMENT_VA(this, triple);
 
   m_opaque_up->GetArchitecture().SetTriple(triple);
 }
 
 const uint8_t *SBModuleSpec::GetUUIDBytes() {
+  LLDB_INSTRUMENT_VA(this)
   return m_opaque_up->GetUUID().GetBytes().data();
 }
 
 size_t SBModuleSpec::GetUUIDLength() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBModuleSpec, GetUUIDLength);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetUUID().GetBytes().size();
 }
 
 bool SBModuleSpec::SetUUIDBytes(const uint8_t *uuid, size_t uuid_len) {
+  LLDB_INSTRUMENT_VA(this, uuid, uuid_len)
   m_opaque_up->GetUUID() = UUID::fromOptionalData(uuid, uuid_len);
   return m_opaque_up->GetUUID().IsValid();
 }
 
 bool SBModuleSpec::GetDescription(lldb::SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBModuleSpec, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   m_opaque_up->Dump(description.ref());
   return true;
 }
 
 SBModuleSpecList::SBModuleSpecList() : m_opaque_up(new ModuleSpecList()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBModuleSpecList);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBModuleSpecList::SBModuleSpecList(const SBModuleSpecList &rhs)
     : m_opaque_up(new ModuleSpecList(*rhs.m_opaque_up)) {
-  LLDB_RECORD_CONSTRUCTOR(SBModuleSpecList, (const lldb::SBModuleSpecList &),
-                          rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBModuleSpecList &SBModuleSpecList::operator=(const SBModuleSpecList &rhs) {
-  LLDB_RECORD_METHOD(
-      lldb::SBModuleSpecList &,
-      SBModuleSpecList, operator=,(const lldb::SBModuleSpecList &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     *m_opaque_up = *rhs.m_opaque_up;
@@ -172,8 +165,7 @@ SBModuleSpecList &SBModuleSpecList::operator=(const SBModuleSpecList &rhs) {
 SBModuleSpecList::~SBModuleSpecList() = default;
 
 SBModuleSpecList SBModuleSpecList::GetModuleSpecifications(const char *path) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBModuleSpecList, SBModuleSpecList,
-                            GetModuleSpecifications, (const char *), path);
+  LLDB_INSTRUMENT_VA(path);
 
   SBModuleSpecList specs;
   FileSpec file_spec(path);
@@ -184,28 +176,25 @@ SBModuleSpecList SBModuleSpecList::GetModuleSpecifications(const char *path) {
 }
 
 void SBModuleSpecList::Append(const SBModuleSpec &spec) {
-  LLDB_RECORD_METHOD(void, SBModuleSpecList, Append,
-                     (const lldb::SBModuleSpec &), spec);
+  LLDB_INSTRUMENT_VA(this, spec);
 
   m_opaque_up->Append(*spec.m_opaque_up);
 }
 
 void SBModuleSpecList::Append(const SBModuleSpecList &spec_list) {
-  LLDB_RECORD_METHOD(void, SBModuleSpecList, Append,
-                     (const lldb::SBModuleSpecList &), spec_list);
+  LLDB_INSTRUMENT_VA(this, spec_list);
 
   m_opaque_up->Append(*spec_list.m_opaque_up);
 }
 
 size_t SBModuleSpecList::GetSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBModuleSpecList, GetSize);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetSize();
 }
 
 SBModuleSpec SBModuleSpecList::GetSpecAtIndex(size_t i) {
-  LLDB_RECORD_METHOD(lldb::SBModuleSpec, SBModuleSpecList, GetSpecAtIndex,
-                     (size_t), i);
+  LLDB_INSTRUMENT_VA(this, i);
 
   SBModuleSpec sb_module_spec;
   m_opaque_up->GetModuleSpecAtIndex(i, *sb_module_spec.m_opaque_up);
@@ -214,9 +203,7 @@ SBModuleSpec SBModuleSpecList::GetSpecAtIndex(size_t i) {
 
 SBModuleSpec
 SBModuleSpecList::FindFirstMatchingSpec(const SBModuleSpec &match_spec) {
-  LLDB_RECORD_METHOD(lldb::SBModuleSpec, SBModuleSpecList,
-                     FindFirstMatchingSpec, (const lldb::SBModuleSpec &),
-                     match_spec);
+  LLDB_INSTRUMENT_VA(this, match_spec);
 
   SBModuleSpec sb_module_spec;
   m_opaque_up->FindMatchingModuleSpec(*match_spec.m_opaque_up,
@@ -226,9 +213,7 @@ SBModuleSpecList::FindFirstMatchingSpec(const SBModuleSpec &match_spec) {
 
 SBModuleSpecList
 SBModuleSpecList::FindMatchingSpecs(const SBModuleSpec &match_spec) {
-  LLDB_RECORD_METHOD(lldb::SBModuleSpecList, SBModuleSpecList,
-                     FindMatchingSpecs, (const lldb::SBModuleSpec &),
-                     match_spec);
+  LLDB_INSTRUMENT_VA(this, match_spec);
 
   SBModuleSpecList specs;
   m_opaque_up->FindMatchingModuleSpecs(*match_spec.m_opaque_up,
@@ -237,8 +222,7 @@ SBModuleSpecList::FindMatchingSpecs(const SBModuleSpec &match_spec) {
 }
 
 bool SBModuleSpecList::GetDescription(lldb::SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBModuleSpecList, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   m_opaque_up->Dump(description.ref());
   return true;
diff --git a/lldb/source/API/SBPlatform.cpp b/lldb/source/API/SBPlatform.cpp
index 5b0f1c3e1e213..d521a38b30e8d 100644
--- a/lldb/source/API/SBPlatform.cpp
+++ b/lldb/source/API/SBPlatform.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBPlatform.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBEnvironment.h"
 #include "lldb/API/SBError.h"
 #include "lldb/API/SBFileSpec.h"
@@ -19,6 +18,7 @@
 #include "lldb/Target/Target.h"
 #include "lldb/Utility/ArchSpec.h"
 #include "lldb/Utility/Args.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Status.h"
 
 #include "llvm/Support/FileSystem.h"
@@ -75,14 +75,13 @@ struct PlatformShellCommand {
 // SBPlatformConnectOptions
 SBPlatformConnectOptions::SBPlatformConnectOptions(const char *url)
     : m_opaque_ptr(new PlatformConnectOptions(url)) {
-  LLDB_RECORD_CONSTRUCTOR(SBPlatformConnectOptions, (const char *), url);
+  LLDB_INSTRUMENT_VA(this, url);
 }
 
 SBPlatformConnectOptions::SBPlatformConnectOptions(
     const SBPlatformConnectOptions &rhs)
     : m_opaque_ptr(new PlatformConnectOptions()) {
-  LLDB_RECORD_CONSTRUCTOR(SBPlatformConnectOptions,
-                          (const lldb::SBPlatformConnectOptions &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   *m_opaque_ptr = *rhs.m_opaque_ptr;
 }
@@ -91,18 +90,14 @@ SBPlatformConnectOptions::~SBPlatformConnectOptions() { delete m_opaque_ptr; }
 
 SBPlatformConnectOptions &
 SBPlatformConnectOptions::operator=(const SBPlatformConnectOptions &rhs) {
-  LLDB_RECORD_METHOD(
-      SBPlatformConnectOptions &,
-      SBPlatformConnectOptions, operator=,(
-                                    const lldb::SBPlatformConnectOptions &),
-      rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   *m_opaque_ptr = *rhs.m_opaque_ptr;
   return *this;
 }
 
 const char *SBPlatformConnectOptions::GetURL() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformConnectOptions, GetURL);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr->m_url.empty())
     return nullptr;
@@ -110,8 +105,7 @@ const char *SBPlatformConnectOptions::GetURL() {
 }
 
 void SBPlatformConnectOptions::SetURL(const char *url) {
-  LLDB_RECORD_METHOD(void, SBPlatformConnectOptions, SetURL, (const char *),
-                     url);
+  LLDB_INSTRUMENT_VA(this, url);
 
   if (url && url[0])
     m_opaque_ptr->m_url = url;
@@ -120,7 +114,7 @@ void SBPlatformConnectOptions::SetURL(const char *url) {
 }
 
 bool SBPlatformConnectOptions::GetRsyncEnabled() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBPlatformConnectOptions, GetRsyncEnabled);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_ptr->m_rsync_enabled;
 }
@@ -128,9 +122,8 @@ bool SBPlatformConnectOptions::GetRsyncEnabled() {
 void SBPlatformConnectOptions::EnableRsync(
     const char *options, const char *remote_path_prefix,
     bool omit_hostname_from_remote_path) {
-  LLDB_RECORD_METHOD(void, SBPlatformConnectOptions, EnableRsync,
-                     (const char *, const char *, bool), options,
-                     remote_path_prefix, omit_hostname_from_remote_path);
+  LLDB_INSTRUMENT_VA(this, options, remote_path_prefix,
+                     omit_hostname_from_remote_path);
 
   m_opaque_ptr->m_rsync_enabled = true;
   m_opaque_ptr->m_rsync_omit_hostname_from_remote_path =
@@ -147,21 +140,19 @@ void SBPlatformConnectOptions::EnableRsync(
 }
 
 void SBPlatformConnectOptions::DisableRsync() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBPlatformConnectOptions, DisableRsync);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_ptr->m_rsync_enabled = false;
 }
 
 const char *SBPlatformConnectOptions::GetLocalCacheDirectory() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformConnectOptions,
-                             GetLocalCacheDirectory);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_ptr->m_local_cache_directory.GetCString();
 }
 
 void SBPlatformConnectOptions::SetLocalCacheDirectory(const char *path) {
-  LLDB_RECORD_METHOD(void, SBPlatformConnectOptions, SetLocalCacheDirectory,
-                     (const char *), path);
+  LLDB_INSTRUMENT_VA(this, path);
 
   if (path && path[0])
     m_opaque_ptr->m_local_cache_directory.SetCString(path);
@@ -173,21 +164,18 @@ void SBPlatformConnectOptions::SetLocalCacheDirectory(const char *path) {
 SBPlatformShellCommand::SBPlatformShellCommand(const char *shell_interpreter,
                                                const char *shell_command)
     : m_opaque_ptr(new PlatformShellCommand(shell_interpreter, shell_command)) {
-  LLDB_RECORD_CONSTRUCTOR(SBPlatformShellCommand, (const char *, const char *),
-                          shell_interpreter, shell_command);
+  LLDB_INSTRUMENT_VA(this, shell_interpreter, shell_command);
 }
 
 SBPlatformShellCommand::SBPlatformShellCommand(const char *shell_command)
     : m_opaque_ptr(new PlatformShellCommand(shell_command)) {
-  LLDB_RECORD_CONSTRUCTOR(SBPlatformShellCommand, (const char *),
-                          shell_command);
+  LLDB_INSTRUMENT_VA(this, shell_command);
 }
 
 SBPlatformShellCommand::SBPlatformShellCommand(
     const SBPlatformShellCommand &rhs)
     : m_opaque_ptr(new PlatformShellCommand()) {
-  LLDB_RECORD_CONSTRUCTOR(SBPlatformShellCommand,
-                          (const lldb::SBPlatformShellCommand &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   *m_opaque_ptr = *rhs.m_opaque_ptr;
 }
@@ -195,10 +183,7 @@ SBPlatformShellCommand::SBPlatformShellCommand(
 SBPlatformShellCommand &
 SBPlatformShellCommand::operator=(const SBPlatformShellCommand &rhs) {
 
-  LLDB_RECORD_METHOD(
-      SBPlatformShellCommand &,
-      SBPlatformShellCommand, operator=,(const lldb::SBPlatformShellCommand &),
-      rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   *m_opaque_ptr = *rhs.m_opaque_ptr;
   return *this;
@@ -207,7 +192,7 @@ SBPlatformShellCommand::operator=(const SBPlatformShellCommand &rhs) {
 SBPlatformShellCommand::~SBPlatformShellCommand() { delete m_opaque_ptr; }
 
 void SBPlatformShellCommand::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBPlatformShellCommand, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_ptr->m_output = std::string();
   m_opaque_ptr->m_status = 0;
@@ -215,7 +200,7 @@ void SBPlatformShellCommand::Clear() {
 }
 
 const char *SBPlatformShellCommand::GetShell() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand, GetShell);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr->m_shell.empty())
     return nullptr;
@@ -223,8 +208,7 @@ const char *SBPlatformShellCommand::GetShell() {
 }
 
 void SBPlatformShellCommand::SetShell(const char *shell_interpreter) {
-  LLDB_RECORD_METHOD(void, SBPlatformShellCommand, SetShell, (const char *),
-                     shell_interpreter);
+  LLDB_INSTRUMENT_VA(this, shell_interpreter);
 
   if (shell_interpreter && shell_interpreter[0])
     m_opaque_ptr->m_shell = shell_interpreter;
@@ -233,7 +217,7 @@ void SBPlatformShellCommand::SetShell(const char *shell_interpreter) {
 }
 
 const char *SBPlatformShellCommand::GetCommand() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand, GetCommand);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr->m_command.empty())
     return nullptr;
@@ -241,8 +225,7 @@ const char *SBPlatformShellCommand::GetCommand() {
 }
 
 void SBPlatformShellCommand::SetCommand(const char *shell_command) {
-  LLDB_RECORD_METHOD(void, SBPlatformShellCommand, SetCommand, (const char *),
-                     shell_command);
+  LLDB_INSTRUMENT_VA(this, shell_command);
 
   if (shell_command && shell_command[0])
     m_opaque_ptr->m_command = shell_command;
@@ -251,8 +234,7 @@ void SBPlatformShellCommand::SetCommand(const char *shell_command) {
 }
 
 const char *SBPlatformShellCommand::GetWorkingDirectory() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand,
-                             GetWorkingDirectory);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr->m_working_dir.empty())
     return nullptr;
@@ -260,8 +242,7 @@ const char *SBPlatformShellCommand::GetWorkingDirectory() {
 }
 
 void SBPlatformShellCommand::SetWorkingDirectory(const char *path) {
-  LLDB_RECORD_METHOD(void, SBPlatformShellCommand, SetWorkingDirectory,
-                     (const char *), path);
+  LLDB_INSTRUMENT_VA(this, path);
 
   if (path && path[0])
     m_opaque_ptr->m_working_dir = path;
@@ -270,8 +251,7 @@ void SBPlatformShellCommand::SetWorkingDirectory(const char *path) {
 }
 
 uint32_t SBPlatformShellCommand::GetTimeoutSeconds() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBPlatformShellCommand,
-                             GetTimeoutSeconds);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr->m_timeout)
     return m_opaque_ptr->m_timeout->count();
@@ -279,8 +259,7 @@ uint32_t SBPlatformShellCommand::GetTimeoutSeconds() {
 }
 
 void SBPlatformShellCommand::SetTimeoutSeconds(uint32_t sec) {
-  LLDB_RECORD_METHOD(void, SBPlatformShellCommand, SetTimeoutSeconds,
-                     (uint32_t), sec);
+  LLDB_INSTRUMENT_VA(this, sec);
 
   if (sec == UINT32_MAX)
     m_opaque_ptr->m_timeout = llvm::None;
@@ -289,19 +268,19 @@ void SBPlatformShellCommand::SetTimeoutSeconds(uint32_t sec) {
 }
 
 int SBPlatformShellCommand::GetSignal() {
-  LLDB_RECORD_METHOD_NO_ARGS(int, SBPlatformShellCommand, GetSignal);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_ptr->m_signo;
 }
 
 int SBPlatformShellCommand::GetStatus() {
-  LLDB_RECORD_METHOD_NO_ARGS(int, SBPlatformShellCommand, GetStatus);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_ptr->m_status;
 }
 
 const char *SBPlatformShellCommand::GetOutput() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatformShellCommand, GetOutput);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr->m_output.empty())
     return nullptr;
@@ -309,10 +288,10 @@ const char *SBPlatformShellCommand::GetOutput() {
 }
 
 // SBPlatform
-SBPlatform::SBPlatform() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBPlatform); }
+SBPlatform::SBPlatform() { LLDB_INSTRUMENT_VA(this); }
 
 SBPlatform::SBPlatform(const char *platform_name) {
-  LLDB_RECORD_CONSTRUCTOR(SBPlatform, (const char *), platform_name);
+  LLDB_INSTRUMENT_VA(this, platform_name);
 
   Status error;
   if (platform_name && platform_name[0])
@@ -320,14 +299,13 @@ SBPlatform::SBPlatform(const char *platform_name) {
 }
 
 SBPlatform::SBPlatform(const SBPlatform &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBPlatform, (const lldb::SBPlatform &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_sp = rhs.m_opaque_sp;
 }
 
 SBPlatform &SBPlatform::operator=(const SBPlatform &rhs) {
-  LLDB_RECORD_METHOD(SBPlatform &,
-                     SBPlatform, operator=,(const lldb::SBPlatform &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_sp = rhs.m_opaque_sp;
   return *this;
@@ -336,8 +314,7 @@ SBPlatform &SBPlatform::operator=(const SBPlatform &rhs) {
 SBPlatform::~SBPlatform() = default;
 
 SBPlatform SBPlatform::GetHostPlatform() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(lldb::SBPlatform, SBPlatform,
-                                    GetHostPlatform);
+  LLDB_INSTRUMENT();
 
   SBPlatform host_platform;
   host_platform.m_opaque_sp = Platform::GetHostPlatform();
@@ -345,23 +322,23 @@ SBPlatform SBPlatform::GetHostPlatform() {
 }
 
 bool SBPlatform::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBPlatform, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBPlatform::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBPlatform, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get() != nullptr;
 }
 
 void SBPlatform::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBPlatform, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_sp.reset();
 }
 
 const char *SBPlatform::GetName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatform, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   PlatformSP platform_sp(GetSP());
   if (platform_sp)
@@ -376,7 +353,7 @@ void SBPlatform::SetSP(const lldb::PlatformSP &platform_sp) {
 }
 
 const char *SBPlatform::GetWorkingDirectory() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatform, GetWorkingDirectory);
+  LLDB_INSTRUMENT_VA(this);
 
   PlatformSP platform_sp(GetSP());
   if (platform_sp)
@@ -385,8 +362,7 @@ const char *SBPlatform::GetWorkingDirectory() {
 }
 
 bool SBPlatform::SetWorkingDirectory(const char *path) {
-  LLDB_RECORD_METHOD(bool, SBPlatform, SetWorkingDirectory, (const char *),
-                     path);
+  LLDB_INSTRUMENT_VA(this, path);
 
   PlatformSP platform_sp(GetSP());
   if (platform_sp) {
@@ -400,8 +376,7 @@ bool SBPlatform::SetWorkingDirectory(const char *path) {
 }
 
 SBError SBPlatform::ConnectRemote(SBPlatformConnectOptions &connect_options) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, ConnectRemote,
-                     (lldb::SBPlatformConnectOptions &), connect_options);
+  LLDB_INSTRUMENT_VA(this, connect_options);
 
   SBError sb_error;
   PlatformSP platform_sp(GetSP());
@@ -416,7 +391,7 @@ SBError SBPlatform::ConnectRemote(SBPlatformConnectOptions &connect_options) {
 }
 
 void SBPlatform::DisconnectRemote() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBPlatform, DisconnectRemote);
+  LLDB_INSTRUMENT_VA(this);
 
   PlatformSP platform_sp(GetSP());
   if (platform_sp)
@@ -424,7 +399,7 @@ void SBPlatform::DisconnectRemote() {
 }
 
 bool SBPlatform::IsConnected() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBPlatform, IsConnected);
+  LLDB_INSTRUMENT_VA(this);
 
   PlatformSP platform_sp(GetSP());
   if (platform_sp)
@@ -433,7 +408,7 @@ bool SBPlatform::IsConnected() {
 }
 
 const char *SBPlatform::GetTriple() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatform, GetTriple);
+  LLDB_INSTRUMENT_VA(this);
 
   PlatformSP platform_sp(GetSP());
   if (platform_sp) {
@@ -448,7 +423,7 @@ const char *SBPlatform::GetTriple() {
 }
 
 const char *SBPlatform::GetOSBuild() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatform, GetOSBuild);
+  LLDB_INSTRUMENT_VA(this);
 
   PlatformSP platform_sp(GetSP());
   if (platform_sp) {
@@ -463,7 +438,7 @@ const char *SBPlatform::GetOSBuild() {
 }
 
 const char *SBPlatform::GetOSDescription() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatform, GetOSDescription);
+  LLDB_INSTRUMENT_VA(this);
 
   PlatformSP platform_sp(GetSP());
   if (platform_sp) {
@@ -478,7 +453,7 @@ const char *SBPlatform::GetOSDescription() {
 }
 
 const char *SBPlatform::GetHostname() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBPlatform, GetHostname);
+  LLDB_INSTRUMENT_VA(this);
 
   PlatformSP platform_sp(GetSP());
   if (platform_sp)
@@ -487,7 +462,7 @@ const char *SBPlatform::GetHostname() {
 }
 
 uint32_t SBPlatform::GetOSMajorVersion() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBPlatform, GetOSMajorVersion);
+  LLDB_INSTRUMENT_VA(this);
 
   llvm::VersionTuple version;
   if (PlatformSP platform_sp = GetSP())
@@ -496,7 +471,7 @@ uint32_t SBPlatform::GetOSMajorVersion() {
 }
 
 uint32_t SBPlatform::GetOSMinorVersion() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBPlatform, GetOSMinorVersion);
+  LLDB_INSTRUMENT_VA(this);
 
   llvm::VersionTuple version;
   if (PlatformSP platform_sp = GetSP())
@@ -505,7 +480,7 @@ uint32_t SBPlatform::GetOSMinorVersion() {
 }
 
 uint32_t SBPlatform::GetOSUpdateVersion() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBPlatform, GetOSUpdateVersion);
+  LLDB_INSTRUMENT_VA(this);
 
   llvm::VersionTuple version;
   if (PlatformSP platform_sp = GetSP())
@@ -514,14 +489,13 @@ uint32_t SBPlatform::GetOSUpdateVersion() {
 }
 
 void SBPlatform::SetSDKRoot(const char *sysroot) {
-  LLDB_RECORD_METHOD(void, SBPlatform, SetSDKRoot, (const char *), sysroot);
+  LLDB_INSTRUMENT_VA(this, sysroot);
   if (PlatformSP platform_sp = GetSP())
     platform_sp->SetSDKRootDirectory(ConstString(sysroot));
 }
 
 SBError SBPlatform::Get(SBFileSpec &src, SBFileSpec &dst) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, Get,
-                     (lldb::SBFileSpec &, lldb::SBFileSpec &), src, dst);
+  LLDB_INSTRUMENT_VA(this, src, dst);
 
   SBError sb_error;
   PlatformSP platform_sp(GetSP());
@@ -534,8 +508,7 @@ SBError SBPlatform::Get(SBFileSpec &src, SBFileSpec &dst) {
 }
 
 SBError SBPlatform::Put(SBFileSpec &src, SBFileSpec &dst) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, Put,
-                     (lldb::SBFileSpec &, lldb::SBFileSpec &), src, dst);
+  LLDB_INSTRUMENT_VA(this, src, dst);
   return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) {
     if (src.Exists()) {
       uint32_t permissions = FileSystem::Instance().GetPermissions(src.ref());
@@ -557,8 +530,7 @@ SBError SBPlatform::Put(SBFileSpec &src, SBFileSpec &dst) {
 }
 
 SBError SBPlatform::Install(SBFileSpec &src, SBFileSpec &dst) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, Install,
-                     (lldb::SBFileSpec &, lldb::SBFileSpec &), src, dst);
+  LLDB_INSTRUMENT_VA(this, src, dst);
   return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) {
     if (src.Exists())
       return platform_sp->Install(src.ref(), dst.ref());
@@ -571,8 +543,7 @@ SBError SBPlatform::Install(SBFileSpec &src, SBFileSpec &dst) {
 }
 
 SBError SBPlatform::Run(SBPlatformShellCommand &shell_command) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, Run,
-                     (lldb::SBPlatformShellCommand &), shell_command);
+  LLDB_INSTRUMENT_VA(this, shell_command);
   return ExecuteConnected(
       [&](const lldb::PlatformSP &platform_sp) {
         const char *command = shell_command.GetCommand();
@@ -595,8 +566,7 @@ SBError SBPlatform::Run(SBPlatformShellCommand &shell_command) {
 }
 
 SBError SBPlatform::Launch(SBLaunchInfo &launch_info) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, Launch, (lldb::SBLaunchInfo &),
-                     launch_info);
+  LLDB_INSTRUMENT_VA(this, launch_info);
   return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) {
     ProcessLaunchInfo info = launch_info.ref();
     Status error = platform_sp->LaunchProcess(info);
@@ -606,7 +576,7 @@ SBError SBPlatform::Launch(SBLaunchInfo &launch_info) {
 }
 
 SBError SBPlatform::Kill(const lldb::pid_t pid) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, Kill, (const lldb::pid_t), pid);
+  LLDB_INSTRUMENT_VA(this, pid);
   return ExecuteConnected([&](const lldb::PlatformSP &platform_sp) {
     return platform_sp->KillProcess(pid);
   });
@@ -628,8 +598,7 @@ SBError SBPlatform::ExecuteConnected(
 }
 
 SBError SBPlatform::MakeDirectory(const char *path, uint32_t file_permissions) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, MakeDirectory,
-                     (const char *, uint32_t), path, file_permissions);
+  LLDB_INSTRUMENT_VA(this, path, file_permissions);
 
   SBError sb_error;
   PlatformSP platform_sp(GetSP());
@@ -643,8 +612,7 @@ SBError SBPlatform::MakeDirectory(const char *path, uint32_t file_permissions) {
 }
 
 uint32_t SBPlatform::GetFilePermissions(const char *path) {
-  LLDB_RECORD_METHOD(uint32_t, SBPlatform, GetFilePermissions, (const char *),
-                     path);
+  LLDB_INSTRUMENT_VA(this, path);
 
   PlatformSP platform_sp(GetSP());
   if (platform_sp) {
@@ -657,8 +625,7 @@ uint32_t SBPlatform::GetFilePermissions(const char *path) {
 
 SBError SBPlatform::SetFilePermissions(const char *path,
                                        uint32_t file_permissions) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBPlatform, SetFilePermissions,
-                     (const char *, uint32_t), path, file_permissions);
+  LLDB_INSTRUMENT_VA(this, path, file_permissions);
 
   SBError sb_error;
   PlatformSP platform_sp(GetSP());
@@ -672,8 +639,7 @@ SBError SBPlatform::SetFilePermissions(const char *path,
 }
 
 SBUnixSignals SBPlatform::GetUnixSignals() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBUnixSignals, SBPlatform,
-                                   GetUnixSignals);
+  LLDB_INSTRUMENT_VA(this);
 
   if (auto platform_sp = GetSP())
     return SBUnixSignals{platform_sp};
@@ -682,7 +648,7 @@ SBUnixSignals SBPlatform::GetUnixSignals() const {
 }
 
 SBEnvironment SBPlatform::GetEnvironment() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBEnvironment, SBPlatform, GetEnvironment);
+  LLDB_INSTRUMENT_VA(this);
   PlatformSP platform_sp(GetSP());
 
   if (platform_sp) {
diff --git a/lldb/source/API/SBProcess.cpp b/lldb/source/API/SBProcess.cpp
index d598e483028df..2538013412b68 100644
--- a/lldb/source/API/SBProcess.cpp
+++ b/lldb/source/API/SBProcess.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBProcess.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include <cinttypes>
 
@@ -49,22 +49,21 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBProcess::SBProcess() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBProcess); }
+SBProcess::SBProcess() { LLDB_INSTRUMENT_VA(this); }
 
 // SBProcess constructor
 
 SBProcess::SBProcess(const SBProcess &rhs) : m_opaque_wp(rhs.m_opaque_wp) {
-  LLDB_RECORD_CONSTRUCTOR(SBProcess, (const lldb::SBProcess &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBProcess::SBProcess(const lldb::ProcessSP &process_sp)
     : m_opaque_wp(process_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBProcess, (const lldb::ProcessSP &), process_sp);
+  LLDB_INSTRUMENT_VA(this, process_sp);
 }
 
 const SBProcess &SBProcess::operator=(const SBProcess &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBProcess &,
-                     SBProcess, operator=,(const lldb::SBProcess &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_wp = rhs.m_opaque_wp;
@@ -75,14 +74,13 @@ const SBProcess &SBProcess::operator=(const SBProcess &rhs) {
 SBProcess::~SBProcess() = default;
 
 const char *SBProcess::GetBroadcasterClassName() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBProcess,
-                                    GetBroadcasterClassName);
+  LLDB_INSTRUMENT();
 
   return Process::GetStaticBroadcasterClass().AsCString();
 }
 
 const char *SBProcess::GetPluginName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBProcess, GetPluginName);
+  LLDB_INSTRUMENT_VA(this);
 
   ProcessSP process_sp(GetSP());
   if (process_sp) {
@@ -92,7 +90,7 @@ const char *SBProcess::GetPluginName() {
 }
 
 const char *SBProcess::GetShortPluginName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBProcess, GetShortPluginName);
+  LLDB_INSTRUMENT_VA(this);
 
   ProcessSP process_sp(GetSP());
   if (process_sp) {
@@ -106,17 +104,17 @@ lldb::ProcessSP SBProcess::GetSP() const { return m_opaque_wp.lock(); }
 void SBProcess::SetSP(const ProcessSP &process_sp) { m_opaque_wp = process_sp; }
 
 void SBProcess::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBProcess, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_wp.reset();
 }
 
 bool SBProcess::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBProcess, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBProcess::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBProcess, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   ProcessSP process_sp(m_opaque_wp.lock());
   return ((bool)process_sp && process_sp->IsValid());
@@ -128,11 +126,7 @@ bool SBProcess::RemoteLaunch(char const **argv, char const **envp,
                              const char *working_directory,
                              uint32_t launch_flags, bool stop_at_entry,
                              lldb::SBError &error) {
-  LLDB_RECORD_METHOD(bool, SBProcess, RemoteLaunch,
-                     (const char **, const char **, const char *, const char *,
-                      const char *, const char *, uint32_t, bool,
-                      lldb::SBError &),
-                     argv, envp, stdin_path, stdout_path, stderr_path,
+  LLDB_INSTRUMENT_VA(this, argv, envp, stdin_path, stdout_path, stderr_path,
                      working_directory, launch_flags, stop_at_entry, error);
 
   ProcessSP process_sp(GetSP());
@@ -165,8 +159,7 @@ bool SBProcess::RemoteLaunch(char const **argv, char const **envp,
 
 bool SBProcess::RemoteAttachToProcessWithID(lldb::pid_t pid,
                                             lldb::SBError &error) {
-  LLDB_RECORD_METHOD(bool, SBProcess, RemoteAttachToProcessWithID,
-                     (lldb::pid_t, lldb::SBError &), pid, error);
+  LLDB_INSTRUMENT_VA(this, pid, error);
 
   ProcessSP process_sp(GetSP());
   if (process_sp) {
@@ -188,7 +181,7 @@ bool SBProcess::RemoteAttachToProcessWithID(lldb::pid_t pid,
 }
 
 uint32_t SBProcess::GetNumThreads() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcess, GetNumThreads);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t num_threads = 0;
   ProcessSP process_sp(GetSP());
@@ -205,8 +198,7 @@ uint32_t SBProcess::GetNumThreads() {
 }
 
 SBThread SBProcess::GetSelectedThread() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBThread, SBProcess,
-                                   GetSelectedThread);
+  LLDB_INSTRUMENT_VA(this);
 
   SBThread sb_thread;
   ThreadSP thread_sp;
@@ -223,8 +215,7 @@ SBThread SBProcess::GetSelectedThread() const {
 
 SBThread SBProcess::CreateOSPluginThread(lldb::tid_t tid,
                                          lldb::addr_t context) {
-  LLDB_RECORD_METHOD(lldb::SBThread, SBProcess, CreateOSPluginThread,
-                     (lldb::tid_t, lldb::addr_t), tid, context);
+  LLDB_INSTRUMENT_VA(this, tid, context);
 
   SBThread sb_thread;
   ThreadSP thread_sp;
@@ -240,7 +231,7 @@ SBThread SBProcess::CreateOSPluginThread(lldb::tid_t tid,
 }
 
 SBTarget SBProcess::GetTarget() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBTarget, SBProcess, GetTarget);
+  LLDB_INSTRUMENT_VA(this);
 
   SBTarget sb_target;
   TargetSP target_sp;
@@ -254,8 +245,7 @@ SBTarget SBProcess::GetTarget() const {
 }
 
 size_t SBProcess::PutSTDIN(const char *src, size_t src_len) {
-  LLDB_RECORD_METHOD(size_t, SBProcess, PutSTDIN, (const char *, size_t), src,
-                     src_len);
+  LLDB_INSTRUMENT_VA(this, src, src_len);
 
   size_t ret_val = 0;
   ProcessSP process_sp(GetSP());
@@ -268,8 +258,7 @@ size_t SBProcess::PutSTDIN(const char *src, size_t src_len) {
 }
 
 size_t SBProcess::GetSTDOUT(char *dst, size_t dst_len) const {
-  LLDB_RECORD_METHOD_CONST(size_t, SBProcess, GetSTDOUT, (char *, size_t), dst,
-                           "", dst_len);
+  LLDB_INSTRUMENT_VA(this, dst, dst_len);
 
   size_t bytes_read = 0;
   ProcessSP process_sp(GetSP());
@@ -282,8 +271,7 @@ size_t SBProcess::GetSTDOUT(char *dst, size_t dst_len) const {
 }
 
 size_t SBProcess::GetSTDERR(char *dst, size_t dst_len) const {
-  LLDB_RECORD_METHOD_CONST(size_t, SBProcess, GetSTDERR, (char *, size_t), dst,
-                           "", dst_len);
+  LLDB_INSTRUMENT_VA(this, dst, dst_len);
 
   size_t bytes_read = 0;
   ProcessSP process_sp(GetSP());
@@ -296,8 +284,7 @@ size_t SBProcess::GetSTDERR(char *dst, size_t dst_len) const {
 }
 
 size_t SBProcess::GetAsyncProfileData(char *dst, size_t dst_len) const {
-  LLDB_RECORD_METHOD_CONST(size_t, SBProcess, GetAsyncProfileData,
-                           (char *, size_t), dst, "", dst_len);
+  LLDB_INSTRUMENT_VA(this, dst, dst_len);
 
   size_t bytes_read = 0;
   ProcessSP process_sp(GetSP());
@@ -310,23 +297,20 @@ size_t SBProcess::GetAsyncProfileData(char *dst, size_t dst_len) const {
 }
 
 void SBProcess::ReportEventState(const SBEvent &event, SBFile out) const {
-  LLDB_RECORD_METHOD_CONST(void, SBProcess, ReportEventState,
-                           (const SBEvent &, SBFile), event, out);
+  LLDB_INSTRUMENT_VA(this, event, out);
 
   return ReportEventState(event, out.m_opaque_sp);
 }
 
 void SBProcess::ReportEventState(const SBEvent &event, FILE *out) const {
-  LLDB_RECORD_METHOD_CONST(void, SBProcess, ReportEventState,
-                           (const lldb::SBEvent &, FILE *), event, out);
+  LLDB_INSTRUMENT_VA(this, event, out);
   FileSP outfile = std::make_shared<NativeFile>(out, false);
   return ReportEventState(event, outfile);
 }
 
 void SBProcess::ReportEventState(const SBEvent &event, FileSP out) const {
 
-  LLDB_RECORD_METHOD_CONST(void, SBProcess, ReportEventState,
-                           (const SBEvent &, FileSP), event, out);
+  LLDB_INSTRUMENT_VA(this, event, out);
 
   if (!out || !out->IsValid())
     return;
@@ -342,9 +326,7 @@ void SBProcess::ReportEventState(const SBEvent &event, FileSP out) const {
 
 void SBProcess::AppendEventStateReport(const SBEvent &event,
                                        SBCommandReturnObject &result) {
-  LLDB_RECORD_METHOD(void, SBProcess, AppendEventStateReport,
-                     (const lldb::SBEvent &, lldb::SBCommandReturnObject &),
-                     event, result);
+  LLDB_INSTRUMENT_VA(this, event, result);
 
   ProcessSP process_sp(GetSP());
   if (process_sp) {
@@ -358,8 +340,7 @@ void SBProcess::AppendEventStateReport(const SBEvent &event,
 }
 
 bool SBProcess::SetSelectedThread(const SBThread &thread) {
-  LLDB_RECORD_METHOD(bool, SBProcess, SetSelectedThread,
-                     (const lldb::SBThread &), thread);
+  LLDB_INSTRUMENT_VA(this, thread);
 
   ProcessSP process_sp(GetSP());
   if (process_sp) {
@@ -372,9 +353,7 @@ bool SBProcess::SetSelectedThread(const SBThread &thread) {
 }
 
 bool SBProcess::SetSelectedThreadByID(lldb::tid_t tid) {
-  LLDB_RECORD_METHOD(bool, SBProcess, SetSelectedThreadByID, (lldb::tid_t),
-                     tid);
-
+  LLDB_INSTRUMENT_VA(this, tid);
 
   bool ret_val = false;
   ProcessSP process_sp(GetSP());
@@ -388,8 +367,7 @@ bool SBProcess::SetSelectedThreadByID(lldb::tid_t tid) {
 }
 
 bool SBProcess::SetSelectedThreadByIndexID(uint32_t index_id) {
-  LLDB_RECORD_METHOD(bool, SBProcess, SetSelectedThreadByIndexID, (uint32_t),
-                     index_id);
+  LLDB_INSTRUMENT_VA(this, index_id);
 
   bool ret_val = false;
   ProcessSP process_sp(GetSP());
@@ -404,8 +382,7 @@ bool SBProcess::SetSelectedThreadByIndexID(uint32_t index_id) {
 }
 
 SBThread SBProcess::GetThreadAtIndex(size_t index) {
-  LLDB_RECORD_METHOD(lldb::SBThread, SBProcess, GetThreadAtIndex, (size_t),
-                     index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   SBThread sb_thread;
   ThreadSP thread_sp;
@@ -423,7 +400,7 @@ SBThread SBProcess::GetThreadAtIndex(size_t index) {
 }
 
 uint32_t SBProcess::GetNumQueues() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcess, GetNumQueues);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t num_queues = 0;
   ProcessSP process_sp(GetSP());
@@ -440,8 +417,7 @@ uint32_t SBProcess::GetNumQueues() {
 }
 
 SBQueue SBProcess::GetQueueAtIndex(size_t index) {
-  LLDB_RECORD_METHOD(lldb::SBQueue, SBProcess, GetQueueAtIndex, (size_t),
-                     index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   SBQueue sb_queue;
   QueueSP queue_sp;
@@ -460,8 +436,7 @@ SBQueue SBProcess::GetQueueAtIndex(size_t index) {
 }
 
 uint32_t SBProcess::GetStopID(bool include_expression_stops) {
-  LLDB_RECORD_METHOD(uint32_t, SBProcess, GetStopID, (bool),
-                     include_expression_stops);
+  LLDB_INSTRUMENT_VA(this, include_expression_stops);
 
   ProcessSP process_sp(GetSP());
   if (process_sp) {
@@ -476,8 +451,7 @@ uint32_t SBProcess::GetStopID(bool include_expression_stops) {
 }
 
 SBEvent SBProcess::GetStopEventForStopID(uint32_t stop_id) {
-  LLDB_RECORD_METHOD(lldb::SBEvent, SBProcess, GetStopEventForStopID,
-                     (uint32_t), stop_id);
+  LLDB_INSTRUMENT_VA(this, stop_id);
 
   SBEvent sb_event;
   EventSP event_sp;
@@ -493,7 +467,7 @@ SBEvent SBProcess::GetStopEventForStopID(uint32_t stop_id) {
 }
 
 StateType SBProcess::GetState() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::StateType, SBProcess, GetState);
+  LLDB_INSTRUMENT_VA(this);
 
   StateType ret_val = eStateInvalid;
   ProcessSP process_sp(GetSP());
@@ -507,7 +481,7 @@ StateType SBProcess::GetState() {
 }
 
 int SBProcess::GetExitStatus() {
-  LLDB_RECORD_METHOD_NO_ARGS(int, SBProcess, GetExitStatus);
+  LLDB_INSTRUMENT_VA(this);
 
   int exit_status = 0;
   ProcessSP process_sp(GetSP());
@@ -521,7 +495,7 @@ int SBProcess::GetExitStatus() {
 }
 
 const char *SBProcess::GetExitDescription() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBProcess, GetExitDescription);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *exit_desc = nullptr;
   ProcessSP process_sp(GetSP());
@@ -534,7 +508,7 @@ const char *SBProcess::GetExitDescription() {
 }
 
 lldb::pid_t SBProcess::GetProcessID() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::pid_t, SBProcess, GetProcessID);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::pid_t ret_val = LLDB_INVALID_PROCESS_ID;
   ProcessSP process_sp(GetSP());
@@ -545,7 +519,7 @@ lldb::pid_t SBProcess::GetProcessID() {
 }
 
 uint32_t SBProcess::GetUniqueID() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcess, GetUniqueID);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t ret_val = 0;
   ProcessSP process_sp(GetSP());
@@ -555,7 +529,7 @@ uint32_t SBProcess::GetUniqueID() {
 }
 
 ByteOrder SBProcess::GetByteOrder() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::ByteOrder, SBProcess, GetByteOrder);
+  LLDB_INSTRUMENT_VA(this);
 
   ByteOrder byteOrder = eByteOrderInvalid;
   ProcessSP process_sp(GetSP());
@@ -567,7 +541,7 @@ ByteOrder SBProcess::GetByteOrder() const {
 }
 
 uint32_t SBProcess::GetAddressByteSize() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBProcess, GetAddressByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t size = 0;
   ProcessSP process_sp(GetSP());
@@ -579,7 +553,7 @@ uint32_t SBProcess::GetAddressByteSize() const {
 }
 
 SBError SBProcess::Continue() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBProcess, Continue);
+  LLDB_INSTRUMENT_VA(this);
 
   SBError sb_error;
   ProcessSP process_sp(GetSP());
@@ -599,7 +573,7 @@ SBError SBProcess::Continue() {
 }
 
 SBError SBProcess::Destroy() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBProcess, Destroy);
+  LLDB_INSTRUMENT_VA(this);
 
   SBError sb_error;
   ProcessSP process_sp(GetSP());
@@ -614,7 +588,7 @@ SBError SBProcess::Destroy() {
 }
 
 SBError SBProcess::Stop() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBProcess, Stop);
+  LLDB_INSTRUMENT_VA(this);
 
   SBError sb_error;
   ProcessSP process_sp(GetSP());
@@ -629,7 +603,7 @@ SBError SBProcess::Stop() {
 }
 
 SBError SBProcess::Kill() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBProcess, Kill);
+  LLDB_INSTRUMENT_VA(this);
 
   SBError sb_error;
   ProcessSP process_sp(GetSP());
@@ -644,7 +618,7 @@ SBError SBProcess::Kill() {
 }
 
 SBError SBProcess::Detach() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBProcess, Detach);
+  LLDB_INSTRUMENT_VA(this);
 
   // FIXME: This should come from a process default.
   bool keep_stopped = false;
@@ -652,7 +626,7 @@ SBError SBProcess::Detach() {
 }
 
 SBError SBProcess::Detach(bool keep_stopped) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBProcess, Detach, (bool), keep_stopped);
+  LLDB_INSTRUMENT_VA(this, keep_stopped);
 
   SBError sb_error;
   ProcessSP process_sp(GetSP());
@@ -667,7 +641,7 @@ SBError SBProcess::Detach(bool keep_stopped) {
 }
 
 SBError SBProcess::Signal(int signo) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBProcess, Signal, (int), signo);
+  LLDB_INSTRUMENT_VA(this, signo);
 
   SBError sb_error;
   ProcessSP process_sp(GetSP());
@@ -682,7 +656,7 @@ SBError SBProcess::Signal(int signo) {
 }
 
 SBUnixSignals SBProcess::GetUnixSignals() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBUnixSignals, SBProcess, GetUnixSignals);
+  LLDB_INSTRUMENT_VA(this);
 
   if (auto process_sp = GetSP())
     return SBUnixSignals{process_sp};
@@ -691,7 +665,7 @@ SBUnixSignals SBProcess::GetUnixSignals() {
 }
 
 void SBProcess::SendAsyncInterrupt() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBProcess, SendAsyncInterrupt);
+  LLDB_INSTRUMENT_VA(this);
 
   ProcessSP process_sp(GetSP());
   if (process_sp) {
@@ -700,8 +674,7 @@ void SBProcess::SendAsyncInterrupt() {
 }
 
 SBThread SBProcess::GetThreadByID(tid_t tid) {
-  LLDB_RECORD_METHOD(lldb::SBThread, SBProcess, GetThreadByID, (lldb::tid_t),
-                     tid);
+  LLDB_INSTRUMENT_VA(this, tid);
 
   SBThread sb_thread;
   ThreadSP thread_sp;
@@ -719,8 +692,7 @@ SBThread SBProcess::GetThreadByID(tid_t tid) {
 }
 
 SBThread SBProcess::GetThreadByIndexID(uint32_t index_id) {
-  LLDB_RECORD_METHOD(lldb::SBThread, SBProcess, GetThreadByIndexID, (uint32_t),
-                     index_id);
+  LLDB_INSTRUMENT_VA(this, index_id);
 
   SBThread sb_thread;
   ThreadSP thread_sp;
@@ -739,8 +711,7 @@ SBThread SBProcess::GetThreadByIndexID(uint32_t index_id) {
 }
 
 StateType SBProcess::GetStateFromEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(lldb::StateType, SBProcess, GetStateFromEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   StateType ret_val = Process::ProcessEventData::GetStateFromEvent(event.get());
 
@@ -748,8 +719,7 @@ StateType SBProcess::GetStateFromEvent(const SBEvent &event) {
 }
 
 bool SBProcess::GetRestartedFromEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBProcess, GetRestartedFromEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   bool ret_val = Process::ProcessEventData::GetRestartedFromEvent(event.get());
 
@@ -757,8 +727,7 @@ bool SBProcess::GetRestartedFromEvent(const SBEvent &event) {
 }
 
 size_t SBProcess::GetNumRestartedReasonsFromEvent(const lldb::SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(size_t, SBProcess, GetNumRestartedReasonsFromEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   return Process::ProcessEventData::GetNumRestartedReasons(event.get());
 }
@@ -766,16 +735,13 @@ size_t SBProcess::GetNumRestartedReasonsFromEvent(const lldb::SBEvent &event) {
 const char *
 SBProcess::GetRestartedReasonAtIndexFromEvent(const lldb::SBEvent &event,
                                               size_t idx) {
-  LLDB_RECORD_STATIC_METHOD(const char *, SBProcess,
-                            GetRestartedReasonAtIndexFromEvent,
-                            (const lldb::SBEvent &, size_t), event, idx);
+  LLDB_INSTRUMENT_VA(event, idx);
 
   return Process::ProcessEventData::GetRestartedReasonAtIndex(event.get(), idx);
 }
 
 SBProcess SBProcess::GetProcessFromEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBProcess, SBProcess, GetProcessFromEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   ProcessSP process_sp =
       Process::ProcessEventData::GetProcessFromEvent(event.get());
@@ -788,32 +754,27 @@ SBProcess SBProcess::GetProcessFromEvent(const SBEvent &event) {
 }
 
 bool SBProcess::GetInterruptedFromEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBProcess, GetInterruptedFromEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   return Process::ProcessEventData::GetInterruptedFromEvent(event.get());
 }
 
 lldb::SBStructuredData
 SBProcess::GetStructuredDataFromEvent(const lldb::SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBStructuredData, SBProcess,
-                            GetStructuredDataFromEvent, (const lldb::SBEvent &),
-                            event);
+  LLDB_INSTRUMENT_VA(event);
 
   return SBStructuredData(event.GetSP());
 }
 
 bool SBProcess::EventIsProcessEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBProcess, EventIsProcessEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   return (event.GetBroadcasterClass() == SBProcess::GetBroadcasterClass()) &&
          !EventIsStructuredDataEvent(event);
 }
 
 bool SBProcess::EventIsStructuredDataEvent(const lldb::SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBProcess, EventIsStructuredDataEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   EventSP event_sp = event.GetSP();
   EventData *event_data = event_sp ? event_sp->GetData() : nullptr;
@@ -822,9 +783,7 @@ bool SBProcess::EventIsStructuredDataEvent(const lldb::SBEvent &event) {
 }
 
 SBBroadcaster SBProcess::GetBroadcaster() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBBroadcaster, SBProcess,
-                                   GetBroadcaster);
-
+  LLDB_INSTRUMENT_VA(this);
 
   ProcessSP process_sp(GetSP());
 
@@ -834,17 +793,14 @@ SBBroadcaster SBProcess::GetBroadcaster() const {
 }
 
 const char *SBProcess::GetBroadcasterClass() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBProcess,
-                                    GetBroadcasterClass);
+  LLDB_INSTRUMENT();
 
   return Process::GetStaticBroadcasterClass().AsCString();
 }
 
 size_t SBProcess::ReadMemory(addr_t addr, void *dst, size_t dst_len,
                              SBError &sb_error) {
-  LLDB_RECORD_METHOD(size_t, SBProcess, ReadMemory,
-                     (lldb::addr_t, void *, size_t, lldb::SBError &), addr, dst,
-                     dst_len, sb_error);
+  LLDB_INSTRUMENT_VA(this, addr, dst, dst_len, sb_error);
 
   size_t bytes_read = 0;
 
@@ -869,9 +825,7 @@ size_t SBProcess::ReadMemory(addr_t addr, void *dst, size_t dst_len,
 
 size_t SBProcess::ReadCStringFromMemory(addr_t addr, void *buf, size_t size,
                                         lldb::SBError &sb_error) {
-  LLDB_RECORD_METHOD(size_t, SBProcess, ReadCStringFromMemory,
-                     (lldb::addr_t, void *, size_t, lldb::SBError &), addr, buf,
-                     size, sb_error);
+  LLDB_INSTRUMENT_VA(this, addr, buf, size, sb_error);
 
   size_t bytes_read = 0;
   ProcessSP process_sp(GetSP());
@@ -893,9 +847,7 @@ size_t SBProcess::ReadCStringFromMemory(addr_t addr, void *buf, size_t size,
 
 uint64_t SBProcess::ReadUnsignedFromMemory(addr_t addr, uint32_t byte_size,
                                            lldb::SBError &sb_error) {
-  LLDB_RECORD_METHOD(uint64_t, SBProcess, ReadUnsignedFromMemory,
-                     (lldb::addr_t, uint32_t, lldb::SBError &), addr, byte_size,
-                     sb_error);
+  LLDB_INSTRUMENT_VA(this, addr, byte_size, sb_error);
 
   uint64_t value = 0;
   ProcessSP process_sp(GetSP());
@@ -917,8 +869,7 @@ uint64_t SBProcess::ReadUnsignedFromMemory(addr_t addr, uint32_t byte_size,
 
 lldb::addr_t SBProcess::ReadPointerFromMemory(addr_t addr,
                                               lldb::SBError &sb_error) {
-  LLDB_RECORD_METHOD(lldb::addr_t, SBProcess, ReadPointerFromMemory,
-                     (lldb::addr_t, lldb::SBError &), addr, sb_error);
+  LLDB_INSTRUMENT_VA(this, addr, sb_error);
 
   lldb::addr_t ptr = LLDB_INVALID_ADDRESS;
   ProcessSP process_sp(GetSP());
@@ -939,9 +890,7 @@ lldb::addr_t SBProcess::ReadPointerFromMemory(addr_t addr,
 
 size_t SBProcess::WriteMemory(addr_t addr, const void *src, size_t src_len,
                               SBError &sb_error) {
-  LLDB_RECORD_METHOD(size_t, SBProcess, WriteMemory,
-                     (lldb::addr_t, const void *, size_t, lldb::SBError &),
-                     addr, src, src_len, sb_error);
+  LLDB_INSTRUMENT_VA(this, addr, src, src_len, sb_error);
 
   size_t bytes_written = 0;
 
@@ -963,8 +912,7 @@ size_t SBProcess::WriteMemory(addr_t addr, const void *src, size_t src_len,
 }
 
 bool SBProcess::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBProcess, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
@@ -988,8 +936,7 @@ bool SBProcess::GetDescription(SBStream &description) {
 }
 
 SBStructuredData SBProcess::GetExtendedCrashInformation() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBStructuredData, SBProcess,
-                             GetExtendedCrashInformation);
+  LLDB_INSTRUMENT_VA(this);
   SBStructuredData data;
   ProcessSP process_sp(GetSP());
   if (!process_sp)
@@ -1013,9 +960,7 @@ SBStructuredData SBProcess::GetExtendedCrashInformation() {
 
 uint32_t
 SBProcess::GetNumSupportedHardwareWatchpoints(lldb::SBError &sb_error) const {
-  LLDB_RECORD_METHOD_CONST(uint32_t, SBProcess,
-                           GetNumSupportedHardwareWatchpoints,
-                           (lldb::SBError &), sb_error);
+  LLDB_INSTRUMENT_VA(this, sb_error);
 
   uint32_t num = 0;
   ProcessSP process_sp(GetSP());
@@ -1031,9 +976,7 @@ SBProcess::GetNumSupportedHardwareWatchpoints(lldb::SBError &sb_error) const {
 
 uint32_t SBProcess::LoadImage(lldb::SBFileSpec &sb_remote_image_spec,
                               lldb::SBError &sb_error) {
-  LLDB_RECORD_METHOD(uint32_t, SBProcess, LoadImage,
-                     (lldb::SBFileSpec &, lldb::SBError &),
-                     sb_remote_image_spec, sb_error);
+  LLDB_INSTRUMENT_VA(this, sb_remote_image_spec, sb_error);
 
   return LoadImage(SBFileSpec(), sb_remote_image_spec, sb_error);
 }
@@ -1041,10 +984,7 @@ uint32_t SBProcess::LoadImage(lldb::SBFileSpec &sb_remote_image_spec,
 uint32_t SBProcess::LoadImage(const lldb::SBFileSpec &sb_local_image_spec,
                               const lldb::SBFileSpec &sb_remote_image_spec,
                               lldb::SBError &sb_error) {
-  LLDB_RECORD_METHOD(
-      uint32_t, SBProcess, LoadImage,
-      (const lldb::SBFileSpec &, const lldb::SBFileSpec &, lldb::SBError &),
-      sb_local_image_spec, sb_remote_image_spec, sb_error);
+  LLDB_INSTRUMENT_VA(this, sb_local_image_spec, sb_remote_image_spec, sb_error);
 
   ProcessSP process_sp(GetSP());
   if (process_sp) {
@@ -1068,10 +1008,7 @@ uint32_t SBProcess::LoadImageUsingPaths(const lldb::SBFileSpec &image_spec,
                                         SBStringList &paths,
                                         lldb::SBFileSpec &loaded_path,
                                         lldb::SBError &error) {
-  LLDB_RECORD_METHOD(uint32_t, SBProcess, LoadImageUsingPaths,
-                     (const lldb::SBFileSpec &, lldb::SBStringList &,
-                      lldb::SBFileSpec &, lldb::SBError &),
-                     image_spec, paths, loaded_path, error);
+  LLDB_INSTRUMENT_VA(this, image_spec, paths, loaded_path, error);
 
   ProcessSP process_sp(GetSP());
   if (process_sp) {
@@ -1103,8 +1040,7 @@ uint32_t SBProcess::LoadImageUsingPaths(const lldb::SBFileSpec &image_spec,
 }
 
 lldb::SBError SBProcess::UnloadImage(uint32_t image_token) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBProcess, UnloadImage, (uint32_t),
-                     image_token);
+  LLDB_INSTRUMENT_VA(this, image_token);
 
   lldb::SBError sb_error;
   ProcessSP process_sp(GetSP());
@@ -1125,8 +1061,7 @@ lldb::SBError SBProcess::UnloadImage(uint32_t image_token) {
 }
 
 lldb::SBError SBProcess::SendEventData(const char *event_data) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBProcess, SendEventData, (const char *),
-                     event_data);
+  LLDB_INSTRUMENT_VA(this, event_data);
 
   lldb::SBError sb_error;
   ProcessSP process_sp(GetSP());
@@ -1145,7 +1080,7 @@ lldb::SBError SBProcess::SendEventData(const char *event_data) {
 }
 
 uint32_t SBProcess::GetNumExtendedBacktraceTypes() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcess, GetNumExtendedBacktraceTypes);
+  LLDB_INSTRUMENT_VA(this);
 
   ProcessSP process_sp(GetSP());
   if (process_sp && process_sp->GetSystemRuntime()) {
@@ -1156,8 +1091,7 @@ uint32_t SBProcess::GetNumExtendedBacktraceTypes() {
 }
 
 const char *SBProcess::GetExtendedBacktraceTypeAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(const char *, SBProcess, GetExtendedBacktraceTypeAtIndex,
-                     (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   ProcessSP process_sp(GetSP());
   if (process_sp && process_sp->GetSystemRuntime()) {
@@ -1172,8 +1106,7 @@ const char *SBProcess::GetExtendedBacktraceTypeAtIndex(uint32_t idx) {
 }
 
 SBThreadCollection SBProcess::GetHistoryThreads(addr_t addr) {
-  LLDB_RECORD_METHOD(lldb::SBThreadCollection, SBProcess, GetHistoryThreads,
-                     (lldb::addr_t), addr);
+  LLDB_INSTRUMENT_VA(this, addr);
 
   ProcessSP process_sp(GetSP());
   SBThreadCollection threads;
@@ -1185,8 +1118,7 @@ SBThreadCollection SBProcess::GetHistoryThreads(addr_t addr) {
 
 bool SBProcess::IsInstrumentationRuntimePresent(
     InstrumentationRuntimeType type) {
-  LLDB_RECORD_METHOD(bool, SBProcess, IsInstrumentationRuntimePresent,
-                     (lldb::InstrumentationRuntimeType), type);
+  LLDB_INSTRUMENT_VA(this, type);
 
   ProcessSP process_sp(GetSP());
   if (!process_sp)
@@ -1205,8 +1137,7 @@ bool SBProcess::IsInstrumentationRuntimePresent(
 }
 
 lldb::SBError SBProcess::SaveCore(const char *file_name) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBProcess, SaveCore, (const char *),
-                     file_name);
+  LLDB_INSTRUMENT_VA(this, file_name);
 
   lldb::SBError error;
   ProcessSP process_sp(GetSP());
@@ -1232,9 +1163,7 @@ lldb::SBError SBProcess::SaveCore(const char *file_name) {
 lldb::SBError
 SBProcess::GetMemoryRegionInfo(lldb::addr_t load_addr,
                                SBMemoryRegionInfo &sb_region_info) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBProcess, GetMemoryRegionInfo,
-                     (lldb::addr_t, lldb::SBMemoryRegionInfo &), load_addr,
-                     sb_region_info);
+  LLDB_INSTRUMENT_VA(this, load_addr, sb_region_info);
 
   lldb::SBError sb_error;
   ProcessSP process_sp(GetSP());
@@ -1256,8 +1185,7 @@ SBProcess::GetMemoryRegionInfo(lldb::addr_t load_addr,
 }
 
 lldb::SBMemoryRegionInfoList SBProcess::GetMemoryRegions() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBMemoryRegionInfoList, SBProcess,
-                             GetMemoryRegions);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBMemoryRegionInfoList sb_region_list;
 
@@ -1274,7 +1202,7 @@ lldb::SBMemoryRegionInfoList SBProcess::GetMemoryRegions() {
 }
 
 lldb::SBProcessInfo SBProcess::GetProcessInfo() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBProcessInfo, SBProcess, GetProcessInfo);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBProcessInfo sb_proc_info;
   ProcessSP process_sp(GetSP());
@@ -1287,9 +1215,7 @@ lldb::SBProcessInfo SBProcess::GetProcessInfo() {
 
 lldb::addr_t SBProcess::AllocateMemory(size_t size, uint32_t permissions,
                                        lldb::SBError &sb_error) {
-  LLDB_RECORD_METHOD(lldb::addr_t, SBProcess, AllocateMemory,
-                     (size_t, uint32_t, lldb::SBError &), size, permissions,
-                     sb_error);
+  LLDB_INSTRUMENT_VA(this, size, permissions, sb_error);
 
   lldb::addr_t addr = LLDB_INVALID_ADDRESS;
   ProcessSP process_sp(GetSP());
@@ -1309,8 +1235,7 @@ lldb::addr_t SBProcess::AllocateMemory(size_t size, uint32_t permissions,
 }
 
 lldb::SBError SBProcess::DeallocateMemory(lldb::addr_t ptr) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBProcess, DeallocateMemory, (lldb::addr_t),
-                     ptr);
+  LLDB_INSTRUMENT_VA(this, ptr);
 
   lldb::SBError sb_error;
   ProcessSP process_sp(GetSP());
diff --git a/lldb/source/API/SBProcessInfo.cpp b/lldb/source/API/SBProcessInfo.cpp
index 93d84d2880310..da3db75ff47ee 100644
--- a/lldb/source/API/SBProcessInfo.cpp
+++ b/lldb/source/API/SBProcessInfo.cpp
@@ -7,20 +7,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBProcessInfo.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBFileSpec.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/ProcessInfo.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
-SBProcessInfo::SBProcessInfo() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBProcessInfo);
-}
+SBProcessInfo::SBProcessInfo() { LLDB_INSTRUMENT_VA(this); }
 
 SBProcessInfo::SBProcessInfo(const SBProcessInfo &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBProcessInfo, (const lldb::SBProcessInfo &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
@@ -28,9 +26,7 @@ SBProcessInfo::SBProcessInfo(const SBProcessInfo &rhs) {
 SBProcessInfo::~SBProcessInfo() = default;
 
 SBProcessInfo &SBProcessInfo::operator=(const SBProcessInfo &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBProcessInfo &,
-                     SBProcessInfo, operator=,(const lldb::SBProcessInfo &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -49,17 +45,17 @@ void SBProcessInfo::SetProcessInfo(const ProcessInstanceInfo &proc_info_ref) {
 }
 
 bool SBProcessInfo::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBProcessInfo, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBProcessInfo::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBProcessInfo, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up != nullptr;
 }
 
 const char *SBProcessInfo::GetName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBProcessInfo, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *name = nullptr;
   if (m_opaque_up) {
@@ -69,8 +65,7 @@ const char *SBProcessInfo::GetName() {
 }
 
 SBFileSpec SBProcessInfo::GetExecutableFile() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBProcessInfo,
-                             GetExecutableFile);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFileSpec file_spec;
   if (m_opaque_up) {
@@ -80,7 +75,7 @@ SBFileSpec SBProcessInfo::GetExecutableFile() {
 }
 
 lldb::pid_t SBProcessInfo::GetProcessID() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::pid_t, SBProcessInfo, GetProcessID);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::pid_t proc_id = LLDB_INVALID_PROCESS_ID;
   if (m_opaque_up) {
@@ -90,7 +85,7 @@ lldb::pid_t SBProcessInfo::GetProcessID() {
 }
 
 uint32_t SBProcessInfo::GetUserID() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcessInfo, GetUserID);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t user_id = UINT32_MAX;
   if (m_opaque_up) {
@@ -100,7 +95,7 @@ uint32_t SBProcessInfo::GetUserID() {
 }
 
 uint32_t SBProcessInfo::GetGroupID() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcessInfo, GetGroupID);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t group_id = UINT32_MAX;
   if (m_opaque_up) {
@@ -110,7 +105,7 @@ uint32_t SBProcessInfo::GetGroupID() {
 }
 
 bool SBProcessInfo::UserIDIsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBProcessInfo, UserIDIsValid);
+  LLDB_INSTRUMENT_VA(this);
 
   bool is_valid = false;
   if (m_opaque_up) {
@@ -120,7 +115,7 @@ bool SBProcessInfo::UserIDIsValid() {
 }
 
 bool SBProcessInfo::GroupIDIsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBProcessInfo, GroupIDIsValid);
+  LLDB_INSTRUMENT_VA(this);
 
   bool is_valid = false;
   if (m_opaque_up) {
@@ -130,7 +125,7 @@ bool SBProcessInfo::GroupIDIsValid() {
 }
 
 uint32_t SBProcessInfo::GetEffectiveUserID() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcessInfo, GetEffectiveUserID);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t user_id = UINT32_MAX;
   if (m_opaque_up) {
@@ -140,7 +135,7 @@ uint32_t SBProcessInfo::GetEffectiveUserID() {
 }
 
 uint32_t SBProcessInfo::GetEffectiveGroupID() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBProcessInfo, GetEffectiveGroupID);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t group_id = UINT32_MAX;
   if (m_opaque_up) {
@@ -150,7 +145,7 @@ uint32_t SBProcessInfo::GetEffectiveGroupID() {
 }
 
 bool SBProcessInfo::EffectiveUserIDIsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBProcessInfo, EffectiveUserIDIsValid);
+  LLDB_INSTRUMENT_VA(this);
 
   bool is_valid = false;
   if (m_opaque_up) {
@@ -160,7 +155,7 @@ bool SBProcessInfo::EffectiveUserIDIsValid() {
 }
 
 bool SBProcessInfo::EffectiveGroupIDIsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBProcessInfo, EffectiveGroupIDIsValid);
+  LLDB_INSTRUMENT_VA(this);
 
   bool is_valid = false;
   if (m_opaque_up) {
@@ -170,7 +165,7 @@ bool SBProcessInfo::EffectiveGroupIDIsValid() {
 }
 
 lldb::pid_t SBProcessInfo::GetParentProcessID() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::pid_t, SBProcessInfo, GetParentProcessID);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::pid_t proc_id = LLDB_INVALID_PROCESS_ID;
   if (m_opaque_up) {
@@ -180,7 +175,7 @@ lldb::pid_t SBProcessInfo::GetParentProcessID() {
 }
 
 const char *SBProcessInfo::GetTriple() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBProcessInfo, GetTriple);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *triple = nullptr;
   if (m_opaque_up) {
diff --git a/lldb/source/API/SBQueue.cpp b/lldb/source/API/SBQueue.cpp
index debb82173067f..b2c143f6357e8 100644
--- a/lldb/source/API/SBQueue.cpp
+++ b/lldb/source/API/SBQueue.cpp
@@ -8,8 +8,8 @@
 
 #include <cinttypes>
 
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBQueue.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBProcess.h"
 #include "lldb/API/SBQueueItem.h"
@@ -215,17 +215,15 @@ class QueueImpl {
 };
 }
 
-SBQueue::SBQueue() : m_opaque_sp(new QueueImpl()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBQueue);
-}
+SBQueue::SBQueue() : m_opaque_sp(new QueueImpl()) { LLDB_INSTRUMENT_VA(this); }
 
 SBQueue::SBQueue(const QueueSP &queue_sp)
     : m_opaque_sp(new QueueImpl(queue_sp)) {
-  LLDB_RECORD_CONSTRUCTOR(SBQueue, (const lldb::QueueSP &), queue_sp);
+  LLDB_INSTRUMENT_VA(this, queue_sp);
 }
 
 SBQueue::SBQueue(const SBQueue &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBQueue, (const lldb::SBQueue &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (&rhs == this)
     return;
@@ -234,8 +232,7 @@ SBQueue::SBQueue(const SBQueue &rhs) {
 }
 
 const lldb::SBQueue &SBQueue::operator=(const lldb::SBQueue &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBQueue &,
-                     SBQueue, operator=,(const lldb::SBQueue &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_sp = rhs.m_opaque_sp;
   return *this;
@@ -244,17 +241,17 @@ const lldb::SBQueue &SBQueue::operator=(const lldb::SBQueue &rhs) {
 SBQueue::~SBQueue() = default;
 
 bool SBQueue::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBQueue, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBQueue::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBQueue, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->IsValid();
 }
 
 void SBQueue::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBQueue, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_sp->Clear();
 }
@@ -264,65 +261,63 @@ void SBQueue::SetQueue(const QueueSP &queue_sp) {
 }
 
 lldb::queue_id_t SBQueue::GetQueueID() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::queue_id_t, SBQueue, GetQueueID);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetQueueID();
 }
 
 uint32_t SBQueue::GetIndexID() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBQueue, GetIndexID);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t index_id = m_opaque_sp->GetIndexID();
   return index_id;
 }
 
 const char *SBQueue::GetName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBQueue, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetName();
 }
 
 uint32_t SBQueue::GetNumThreads() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBQueue, GetNumThreads);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetNumThreads();
 }
 
 SBThread SBQueue::GetThreadAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBThread, SBQueue, GetThreadAtIndex, (uint32_t),
-                     idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBThread th = m_opaque_sp->GetThreadAtIndex(idx);
   return th;
 }
 
 uint32_t SBQueue::GetNumPendingItems() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBQueue, GetNumPendingItems);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetNumPendingItems();
 }
 
 SBQueueItem SBQueue::GetPendingItemAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBQueueItem, SBQueue, GetPendingItemAtIndex,
-                     (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   return m_opaque_sp->GetPendingItemAtIndex(idx);
 }
 
 uint32_t SBQueue::GetNumRunningItems() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBQueue, GetNumRunningItems);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetNumRunningItems();
 }
 
 SBProcess SBQueue::GetProcess() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBProcess, SBQueue, GetProcess);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetProcess();
 }
 
 lldb::QueueKind SBQueue::GetKind() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::QueueKind, SBQueue, GetKind);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp->GetKind();
 }
diff --git a/lldb/source/API/SBQueueItem.cpp b/lldb/source/API/SBQueueItem.cpp
index f28f2d69aa9ee..b2204452c0fac 100644
--- a/lldb/source/API/SBQueueItem.cpp
+++ b/lldb/source/API/SBQueueItem.cpp
@@ -8,7 +8,6 @@
 
 #include "lldb/lldb-forward.h"
 
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBAddress.h"
 #include "lldb/API/SBQueueItem.h"
 #include "lldb/API/SBThread.h"
@@ -16,47 +15,46 @@
 #include "lldb/Target/Process.h"
 #include "lldb/Target/QueueItem.h"
 #include "lldb/Target/Thread.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
 // Constructors
-SBQueueItem::SBQueueItem() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBQueueItem); }
+SBQueueItem::SBQueueItem() { LLDB_INSTRUMENT_VA(this); }
 
 SBQueueItem::SBQueueItem(const QueueItemSP &queue_item_sp)
     : m_queue_item_sp(queue_item_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBQueueItem, (const lldb::QueueItemSP &),
-                          queue_item_sp);
+  LLDB_INSTRUMENT_VA(this, queue_item_sp);
 }
 
 // Destructor
 SBQueueItem::~SBQueueItem() { m_queue_item_sp.reset(); }
 
 bool SBQueueItem::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBQueueItem, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBQueueItem::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBQueueItem, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_queue_item_sp.get() != nullptr;
 }
 
 void SBQueueItem::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBQueueItem, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_queue_item_sp.reset();
 }
 
 void SBQueueItem::SetQueueItem(const QueueItemSP &queue_item_sp) {
-  LLDB_RECORD_METHOD(void, SBQueueItem, SetQueueItem,
-                     (const lldb::QueueItemSP &), queue_item_sp);
+  LLDB_INSTRUMENT_VA(this, queue_item_sp);
 
   m_queue_item_sp = queue_item_sp;
 }
 
 lldb::QueueItemKind SBQueueItem::GetKind() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::QueueItemKind, SBQueueItem, GetKind);
+  LLDB_INSTRUMENT_VA(this);
 
   QueueItemKind result = eQueueItemKindUnknown;
   if (m_queue_item_sp) {
@@ -66,7 +64,7 @@ lldb::QueueItemKind SBQueueItem::GetKind() const {
 }
 
 void SBQueueItem::SetKind(lldb::QueueItemKind kind) {
-  LLDB_RECORD_METHOD(void, SBQueueItem, SetKind, (lldb::QueueItemKind), kind);
+  LLDB_INSTRUMENT_VA(this, kind);
 
   if (m_queue_item_sp) {
     m_queue_item_sp->SetKind(kind);
@@ -74,7 +72,7 @@ void SBQueueItem::SetKind(lldb::QueueItemKind kind) {
 }
 
 SBAddress SBQueueItem::GetAddress() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBAddress, SBQueueItem, GetAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   SBAddress result;
   if (m_queue_item_sp) {
@@ -84,7 +82,7 @@ SBAddress SBQueueItem::GetAddress() const {
 }
 
 void SBQueueItem::SetAddress(SBAddress addr) {
-  LLDB_RECORD_METHOD(void, SBQueueItem, SetAddress, (lldb::SBAddress), addr);
+  LLDB_INSTRUMENT_VA(this, addr);
 
   if (m_queue_item_sp) {
     m_queue_item_sp->SetAddress(addr.ref());
@@ -92,8 +90,7 @@ void SBQueueItem::SetAddress(SBAddress addr) {
 }
 
 SBThread SBQueueItem::GetExtendedBacktraceThread(const char *type) {
-  LLDB_RECORD_METHOD(lldb::SBThread, SBQueueItem, GetExtendedBacktraceThread,
-                     (const char *), type);
+  LLDB_INSTRUMENT_VA(this, type);
 
   SBThread result;
   if (m_queue_item_sp) {
diff --git a/lldb/source/API/SBReproducer.cpp b/lldb/source/API/SBReproducer.cpp
index ba564c911b725..d3d27cc577480 100644
--- a/lldb/source/API/SBReproducer.cpp
+++ b/lldb/source/API/SBReproducer.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-
+#include "lldb/API/SBReproducer.h"
 #include "lldb/API/LLDB.h"
 #include "lldb/API/SBAddress.h"
 #include "lldb/API/SBAttachInfo.h"
@@ -20,12 +20,11 @@
 #include "lldb/API/SBError.h"
 #include "lldb/API/SBFileSpec.h"
 #include "lldb/API/SBHostOS.h"
-#include "lldb/API/SBReproducer.h"
 #include "lldb/Host/FileSystem.h"
-#include "lldb/Version/Version.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Reproducer.h"
 #include "lldb/Utility/ReproducerProvider.h"
+#include "lldb/Version/Version.h"
 
 using namespace lldb;
 using namespace lldb_private;
@@ -40,25 +39,33 @@ SBReplayOptions::SBReplayOptions(const SBReplayOptions &rhs)
 SBReplayOptions::~SBReplayOptions() = default;
 
 SBReplayOptions &SBReplayOptions::operator=(const SBReplayOptions &rhs) {
+  LLDB_INSTRUMENT_VA(this, rhs)
   if (this == &rhs)
     return *this;
   *m_opaque_up = *rhs.m_opaque_up;
   return *this;
 }
 
-void SBReplayOptions::SetVerify(bool verify) { m_opaque_up->verify = verify; }
+void SBReplayOptions::SetVerify(bool verify) {
+  LLDB_INSTRUMENT_VA(this, verify) m_opaque_up->verify = verify;
+}
 
-bool SBReplayOptions::GetVerify() const { return m_opaque_up->verify; }
+bool SBReplayOptions::GetVerify() const {
+  LLDB_INSTRUMENT_VA(this) return m_opaque_up->verify;
+}
 
 void SBReplayOptions::SetCheckVersion(bool check) {
+  LLDB_INSTRUMENT_VA(this, check)
   m_opaque_up->check_version = check;
 }
 
 bool SBReplayOptions::GetCheckVersion() const {
+  LLDB_INSTRUMENT_VA(this)
   return m_opaque_up->check_version;
 }
 
 const char *SBReproducer::Capture() {
+  LLDB_INSTRUMENT()
   static std::string error;
   if (auto e = Reproducer::Initialize(ReproducerMode::Capture, llvm::None)) {
     error = llvm::toString(std::move(e));
@@ -69,6 +76,7 @@ const char *SBReproducer::Capture() {
 }
 
 const char *SBReproducer::Capture(const char *path) {
+  LLDB_INSTRUMENT_VA(path)
   static std::string error;
   if (auto e =
           Reproducer::Initialize(ReproducerMode::Capture, FileSpec(path))) {
@@ -80,23 +88,28 @@ const char *SBReproducer::Capture(const char *path) {
 }
 
 const char *SBReproducer::PassiveReplay(const char *path) {
+  LLDB_INSTRUMENT_VA(path)
   return "Reproducer replay has been removed";
 }
 
 const char *SBReproducer::Replay(const char *path) {
+  LLDB_INSTRUMENT_VA(path)
   return "Reproducer replay has been removed";
 }
 
 const char *SBReproducer::Replay(const char *path, bool skip_version_check) {
+  LLDB_INSTRUMENT_VA(path, skip_version_check)
   return Replay(path);
 }
 
 const char *SBReproducer::Replay(const char *path,
                                  const SBReplayOptions &options) {
+  LLDB_INSTRUMENT_VA(path, options)
   return Replay(path);
 }
 
 const char *SBReproducer::Finalize(const char *path) {
+  LLDB_INSTRUMENT_VA(path)
   static std::string error;
 
   repro::Loader *loader = repro::Reproducer::Instance().GetLoader();
@@ -114,6 +127,7 @@ const char *SBReproducer::Finalize(const char *path) {
 }
 
 bool SBReproducer::Generate() {
+  LLDB_INSTRUMENT()
   auto &r = Reproducer::Instance();
   if (auto generator = r.GetGenerator()) {
     generator->Keep();
@@ -123,6 +137,7 @@ bool SBReproducer::Generate() {
 }
 
 bool SBReproducer::SetAutoGenerate(bool b) {
+  LLDB_INSTRUMENT_VA(b)
   auto &r = Reproducer::Instance();
   if (auto generator = r.GetGenerator()) {
     generator->SetAutoGenerate(b);
@@ -132,6 +147,7 @@ bool SBReproducer::SetAutoGenerate(bool b) {
 }
 
 const char *SBReproducer::GetPath() {
+  LLDB_INSTRUMENT()
   ConstString path;
   auto &r = Reproducer::Instance();
   if (FileSpec reproducer_path = Reproducer::Instance().GetReproducerPath())
@@ -140,6 +156,7 @@ const char *SBReproducer::GetPath() {
 }
 
 void SBReproducer::SetWorkingDirectory(const char *path) {
+  LLDB_INSTRUMENT_VA(path)
   if (auto *g = lldb_private::repro::Reproducer::Instance().GetGenerator()) {
     auto &wp = g->GetOrCreate<repro::WorkingDirectoryProvider>();
     wp.SetDirectory(path);
diff --git a/lldb/source/API/SBSection.cpp b/lldb/source/API/SBSection.cpp
index aa068518f9468..733e0db0b5bad 100644
--- a/lldb/source/API/SBSection.cpp
+++ b/lldb/source/API/SBSection.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBSection.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/API/SBTarget.h"
 #include "lldb/Core/Module.h"
@@ -15,15 +14,16 @@
 #include "lldb/Symbol/ObjectFile.h"
 #include "lldb/Utility/DataBuffer.h"
 #include "lldb/Utility/DataExtractor.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/StreamString.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
-SBSection::SBSection() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSection); }
+SBSection::SBSection() { LLDB_INSTRUMENT_VA(this); }
 
 SBSection::SBSection(const SBSection &rhs) : m_opaque_wp(rhs.m_opaque_wp) {
-  LLDB_RECORD_CONSTRUCTOR(SBSection, (const lldb::SBSection &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBSection::SBSection(const lldb::SectionSP &section_sp) {
@@ -34,8 +34,7 @@ SBSection::SBSection(const lldb::SectionSP &section_sp) {
 }
 
 const SBSection &SBSection::operator=(const SBSection &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBSection &,
-                     SBSection, operator=,(const lldb::SBSection &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_wp = rhs.m_opaque_wp;
   return *this;
@@ -44,18 +43,18 @@ const SBSection &SBSection::operator=(const SBSection &rhs) {
 SBSection::~SBSection() = default;
 
 bool SBSection::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSection, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBSection::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSection, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   SectionSP section_sp(GetSP());
   return section_sp && section_sp->GetModule().get() != nullptr;
 }
 
 const char *SBSection::GetName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBSection, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   SectionSP section_sp(GetSP());
   if (section_sp)
@@ -64,7 +63,7 @@ const char *SBSection::GetName() {
 }
 
 lldb::SBSection SBSection::GetParent() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBSection, SBSection, GetParent);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBSection sb_section;
   SectionSP section_sp(GetSP());
@@ -77,8 +76,7 @@ lldb::SBSection SBSection::GetParent() {
 }
 
 lldb::SBSection SBSection::FindSubSection(const char *sect_name) {
-  LLDB_RECORD_METHOD(lldb::SBSection, SBSection, FindSubSection, (const char *),
-                     sect_name);
+  LLDB_INSTRUMENT_VA(this, sect_name);
 
   lldb::SBSection sb_section;
   if (sect_name) {
@@ -93,7 +91,7 @@ lldb::SBSection SBSection::FindSubSection(const char *sect_name) {
 }
 
 size_t SBSection::GetNumSubSections() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBSection, GetNumSubSections);
+  LLDB_INSTRUMENT_VA(this);
 
   SectionSP section_sp(GetSP());
   if (section_sp)
@@ -102,8 +100,7 @@ size_t SBSection::GetNumSubSections() {
 }
 
 lldb::SBSection SBSection::GetSubSectionAtIndex(size_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBSection, SBSection, GetSubSectionAtIndex, (size_t),
-                     idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   lldb::SBSection sb_section;
   SectionSP section_sp(GetSP());
@@ -119,7 +116,7 @@ void SBSection::SetSP(const lldb::SectionSP &section_sp) {
 }
 
 lldb::addr_t SBSection::GetFileAddress() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBSection, GetFileAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::addr_t file_addr = LLDB_INVALID_ADDRESS;
   SectionSP section_sp(GetSP());
@@ -129,8 +126,7 @@ lldb::addr_t SBSection::GetFileAddress() {
 }
 
 lldb::addr_t SBSection::GetLoadAddress(lldb::SBTarget &sb_target) {
-  LLDB_RECORD_METHOD(lldb::addr_t, SBSection, GetLoadAddress,
-                     (lldb::SBTarget &), sb_target);
+  LLDB_INSTRUMENT_VA(this, sb_target);
 
   TargetSP target_sp(sb_target.GetSP());
   if (target_sp) {
@@ -142,7 +138,7 @@ lldb::addr_t SBSection::GetLoadAddress(lldb::SBTarget &sb_target) {
 }
 
 lldb::addr_t SBSection::GetByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBSection, GetByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   SectionSP section_sp(GetSP());
   if (section_sp)
@@ -151,7 +147,7 @@ lldb::addr_t SBSection::GetByteSize() {
 }
 
 uint64_t SBSection::GetFileOffset() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBSection, GetFileOffset);
+  LLDB_INSTRUMENT_VA(this);
 
   SectionSP section_sp(GetSP());
   if (section_sp) {
@@ -166,7 +162,7 @@ uint64_t SBSection::GetFileOffset() {
 }
 
 uint64_t SBSection::GetFileByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBSection, GetFileByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   SectionSP section_sp(GetSP());
   if (section_sp)
@@ -175,14 +171,13 @@ uint64_t SBSection::GetFileByteSize() {
 }
 
 SBData SBSection::GetSectionData() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBData, SBSection, GetSectionData);
+  LLDB_INSTRUMENT_VA(this);
 
   return GetSectionData(0, UINT64_MAX);
 }
 
 SBData SBSection::GetSectionData(uint64_t offset, uint64_t size) {
-  LLDB_RECORD_METHOD(lldb::SBData, SBSection, GetSectionData,
-                     (uint64_t, uint64_t), offset, size);
+  LLDB_INSTRUMENT_VA(this, offset, size);
 
   SBData sb_data;
   SectionSP section_sp(GetSP());
@@ -221,7 +216,7 @@ SBData SBSection::GetSectionData(uint64_t offset, uint64_t size) {
 }
 
 SectionType SBSection::GetSectionType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SectionType, SBSection, GetSectionType);
+  LLDB_INSTRUMENT_VA(this);
 
   SectionSP section_sp(GetSP());
   if (section_sp.get())
@@ -230,7 +225,7 @@ SectionType SBSection::GetSectionType() {
 }
 
 uint32_t SBSection::GetPermissions() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBSection, GetPermissions);
+  LLDB_INSTRUMENT_VA(this);
 
   SectionSP section_sp(GetSP());
   if (section_sp)
@@ -239,7 +234,7 @@ uint32_t SBSection::GetPermissions() const {
 }
 
 uint32_t SBSection::GetTargetByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBSection, GetTargetByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   SectionSP section_sp(GetSP());
   if (section_sp.get())
@@ -248,8 +243,7 @@ uint32_t SBSection::GetTargetByteSize() {
 }
 
 bool SBSection::operator==(const SBSection &rhs) {
-  LLDB_RECORD_METHOD(bool, SBSection, operator==,(const lldb::SBSection &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   SectionSP lhs_section_sp(GetSP());
   SectionSP rhs_section_sp(rhs.GetSP());
@@ -259,8 +253,7 @@ bool SBSection::operator==(const SBSection &rhs) {
 }
 
 bool SBSection::operator!=(const SBSection &rhs) {
-  LLDB_RECORD_METHOD(bool, SBSection, operator!=,(const lldb::SBSection &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   SectionSP lhs_section_sp(GetSP());
   SectionSP rhs_section_sp(rhs.GetSP());
@@ -268,8 +261,7 @@ bool SBSection::operator!=(const SBSection &rhs) {
 }
 
 bool SBSection::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBSection, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
diff --git a/lldb/source/API/SBSourceManager.cpp b/lldb/source/API/SBSourceManager.cpp
index e1cfb4dfde151..7729f5d9d69f8 100644
--- a/lldb/source/API/SBSourceManager.cpp
+++ b/lldb/source/API/SBSourceManager.cpp
@@ -7,10 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBSourceManager.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBDebugger.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/API/SBTarget.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBFileSpec.h"
 #include "lldb/Core/Debugger.h"
@@ -71,21 +71,19 @@ using namespace lldb;
 using namespace lldb_private;
 
 SBSourceManager::SBSourceManager(const SBDebugger &debugger) {
-  LLDB_RECORD_CONSTRUCTOR(SBSourceManager, (const lldb::SBDebugger &),
-                          debugger);
+  LLDB_INSTRUMENT_VA(this, debugger);
 
   m_opaque_up = std::make_unique<SourceManagerImpl>(debugger.get_sp());
 }
 
 SBSourceManager::SBSourceManager(const SBTarget &target) {
-  LLDB_RECORD_CONSTRUCTOR(SBSourceManager, (const lldb::SBTarget &), target);
+  LLDB_INSTRUMENT_VA(this, target);
 
   m_opaque_up = std::make_unique<SourceManagerImpl>(target.GetSP());
 }
 
 SBSourceManager::SBSourceManager(const SBSourceManager &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBSourceManager, (const lldb::SBSourceManager &),
-                          rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (&rhs == this)
     return;
@@ -95,9 +93,7 @@ SBSourceManager::SBSourceManager(const SBSourceManager &rhs) {
 
 const lldb::SBSourceManager &SBSourceManager::
 operator=(const lldb::SBSourceManager &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBSourceManager &,
-                     SBSourceManager, operator=,(const lldb::SBSourceManager &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = std::make_unique<SourceManagerImpl>(*(rhs.m_opaque_up.get()));
   return *this;
@@ -108,10 +104,7 @@ SBSourceManager::~SBSourceManager() = default;
 size_t SBSourceManager::DisplaySourceLinesWithLineNumbers(
     const SBFileSpec &file, uint32_t line, uint32_t context_before,
     uint32_t context_after, const char *current_line_cstr, SBStream &s) {
-  LLDB_RECORD_METHOD(size_t, SBSourceManager, DisplaySourceLinesWithLineNumbers,
-                     (const lldb::SBFileSpec &, uint32_t, uint32_t, uint32_t,
-                      const char *, lldb::SBStream &),
-                     file, line, context_before, context_after,
+  LLDB_INSTRUMENT_VA(this, file, line, context_before, context_after,
                      current_line_cstr, s);
 
   const uint32_t column = 0;
@@ -124,11 +117,8 @@ size_t SBSourceManager::DisplaySourceLinesWithLineNumbersAndColumn(
     const SBFileSpec &file, uint32_t line, uint32_t column,
     uint32_t context_before, uint32_t context_after,
     const char *current_line_cstr, SBStream &s) {
-  LLDB_RECORD_METHOD(
-      size_t, SBSourceManager, DisplaySourceLinesWithLineNumbersAndColumn,
-      (const lldb::SBFileSpec &, uint32_t, uint32_t, uint32_t, uint32_t,
-       const char *, lldb::SBStream &),
-      file, line, column, context_before, context_after, current_line_cstr, s);
+  LLDB_INSTRUMENT_VA(this, file, line, column, context_before, context_after,
+                     current_line_cstr, s);
 
   if (m_opaque_up == nullptr)
     return 0;
diff --git a/lldb/source/API/SBStream.cpp b/lldb/source/API/SBStream.cpp
index c3e344802148c..9ceef3466f937 100644
--- a/lldb/source/API/SBStream.cpp
+++ b/lldb/source/API/SBStream.cpp
@@ -8,10 +8,10 @@
 
 #include "lldb/API/SBStream.h"
 
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBFile.h"
 #include "lldb/Core/StreamFile.h"
 #include "lldb/Host/FileSystem.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Status.h"
 #include "lldb/Utility/Stream.h"
 #include "lldb/Utility/StreamString.h"
@@ -20,7 +20,7 @@ using namespace lldb;
 using namespace lldb_private;
 
 SBStream::SBStream() : m_opaque_up(new StreamString()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBStream);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBStream::SBStream(SBStream &&rhs)
@@ -29,11 +29,11 @@ SBStream::SBStream(SBStream &&rhs)
 SBStream::~SBStream() = default;
 
 bool SBStream::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStream, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBStream::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStream, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_up != nullptr);
 }
@@ -41,7 +41,7 @@ SBStream::operator bool() const {
 // If this stream is not redirected to a file, it will maintain a local cache
 // for the stream data which can be accessed using this accessor.
 const char *SBStream::GetData() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBStream, GetData);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_is_file || m_opaque_up == nullptr)
     return nullptr;
@@ -52,7 +52,7 @@ const char *SBStream::GetData() {
 // If this stream is not redirected to a file, it will maintain a local cache
 // for the stream output whose length can be accessed using this accessor.
 size_t SBStream::GetSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBStream, GetSize);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_is_file || m_opaque_up == nullptr)
     return 0;
@@ -61,7 +61,7 @@ size_t SBStream::GetSize() {
 }
 
 void SBStream::Print(const char *str) {
-  LLDB_RECORD_METHOD(void, SBStream, Print, (const char *), str);
+  LLDB_INSTRUMENT_VA(this, str);
 
   Printf("%s", str);
 }
@@ -76,8 +76,7 @@ void SBStream::Printf(const char *format, ...) {
 }
 
 void SBStream::RedirectToFile(const char *path, bool append) {
-  LLDB_RECORD_METHOD(void, SBStream, RedirectToFile, (const char *, bool), path,
-                     append);
+  LLDB_INSTRUMENT_VA(this, path, append);
 
   if (path == nullptr)
     return;
@@ -114,19 +113,18 @@ void SBStream::RedirectToFile(const char *path, bool append) {
 }
 
 void SBStream::RedirectToFileHandle(FILE *fh, bool transfer_fh_ownership) {
-  LLDB_RECORD_METHOD(void, SBStream, RedirectToFileHandle, (FILE *, bool), fh,
-                     transfer_fh_ownership);
+  LLDB_INSTRUMENT_VA(this, fh, transfer_fh_ownership);
   FileSP file = std::make_unique<NativeFile>(fh, transfer_fh_ownership);
   return RedirectToFile(file);
 }
 
 void SBStream::RedirectToFile(SBFile file) {
-  LLDB_RECORD_METHOD(void, SBStream, RedirectToFile, (SBFile), file)
+  LLDB_INSTRUMENT_VA(this, file)
   RedirectToFile(file.GetFile());
 }
 
 void SBStream::RedirectToFile(FileSP file_sp) {
-  LLDB_RECORD_METHOD(void, SBStream, RedirectToFile, (FileSP), file_sp);
+  LLDB_INSTRUMENT_VA(this, file_sp);
 
   if (!file_sp || !file_sp->IsValid())
     return;
@@ -150,8 +148,7 @@ void SBStream::RedirectToFile(FileSP file_sp) {
 }
 
 void SBStream::RedirectToFileDescriptor(int fd, bool transfer_fh_ownership) {
-  LLDB_RECORD_METHOD(void, SBStream, RedirectToFileDescriptor, (int, bool), fd,
-                     transfer_fh_ownership);
+  LLDB_INSTRUMENT_VA(this, fd, transfer_fh_ownership);
 
   std::string local_data;
   if (m_opaque_up) {
@@ -182,7 +179,7 @@ lldb_private::Stream &SBStream::ref() {
 }
 
 void SBStream::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBStream, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up) {
     // See if we have any locally backed data. If so, copy it so we can then
diff --git a/lldb/source/API/SBStringList.cpp b/lldb/source/API/SBStringList.cpp
index afafd7429fd76..dfb77b1ab32fb 100644
--- a/lldb/source/API/SBStringList.cpp
+++ b/lldb/source/API/SBStringList.cpp
@@ -7,14 +7,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBStringList.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/StringList.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
-SBStringList::SBStringList() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBStringList); }
+SBStringList::SBStringList() { LLDB_INSTRUMENT_VA(this); }
 
 SBStringList::SBStringList(const lldb_private::StringList *lldb_strings_ptr) {
   if (lldb_strings_ptr)
@@ -22,14 +22,13 @@ SBStringList::SBStringList(const lldb_private::StringList *lldb_strings_ptr) {
 }
 
 SBStringList::SBStringList(const SBStringList &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBStringList, (const lldb::SBStringList &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
 
 const SBStringList &SBStringList::operator=(const SBStringList &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBStringList &,
-                     SBStringList, operator=,(const lldb::SBStringList &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -47,17 +46,17 @@ const lldb_private::StringList &SBStringList::operator*() const {
 }
 
 bool SBStringList::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStringList, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBStringList::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStringList, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_up != nullptr);
 }
 
 void SBStringList::AppendString(const char *str) {
-  LLDB_RECORD_METHOD(void, SBStringList, AppendString, (const char *), str);
+  LLDB_INSTRUMENT_VA(this, str);
 
   if (str != nullptr) {
     if (IsValid())
@@ -68,8 +67,7 @@ void SBStringList::AppendString(const char *str) {
 }
 
 void SBStringList::AppendList(const char **strv, int strc) {
-  LLDB_RECORD_METHOD(void, SBStringList, AppendList, (const char **, int), strv,
-                     strc);
+  LLDB_INSTRUMENT_VA(this, strv, strc);
 
   if ((strv != nullptr) && (strc > 0)) {
     if (IsValid())
@@ -80,8 +78,7 @@ void SBStringList::AppendList(const char **strv, int strc) {
 }
 
 void SBStringList::AppendList(const SBStringList &strings) {
-  LLDB_RECORD_METHOD(void, SBStringList, AppendList,
-                     (const lldb::SBStringList &), strings);
+  LLDB_INSTRUMENT_VA(this, strings);
 
   if (strings.IsValid()) {
     if (!IsValid())
@@ -97,7 +94,7 @@ void SBStringList::AppendList(const StringList &strings) {
 }
 
 uint32_t SBStringList::GetSize() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBStringList, GetSize);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid()) {
     return m_opaque_up->GetSize();
@@ -106,8 +103,7 @@ uint32_t SBStringList::GetSize() const {
 }
 
 const char *SBStringList::GetStringAtIndex(size_t idx) {
-  LLDB_RECORD_METHOD(const char *, SBStringList, GetStringAtIndex, (size_t),
-                     idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   if (IsValid()) {
     return m_opaque_up->GetStringAtIndex(idx);
@@ -116,8 +112,7 @@ const char *SBStringList::GetStringAtIndex(size_t idx) {
 }
 
 const char *SBStringList::GetStringAtIndex(size_t idx) const {
-  LLDB_RECORD_METHOD_CONST(const char *, SBStringList, GetStringAtIndex,
-                           (size_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   if (IsValid()) {
     return m_opaque_up->GetStringAtIndex(idx);
@@ -126,7 +121,7 @@ const char *SBStringList::GetStringAtIndex(size_t idx) const {
 }
 
 void SBStringList::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBStringList, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid()) {
     m_opaque_up->Clear();
diff --git a/lldb/source/API/SBStructuredData.cpp b/lldb/source/API/SBStructuredData.cpp
index c607e305e0a6e..498bcdd39e448 100644
--- a/lldb/source/API/SBStructuredData.cpp
+++ b/lldb/source/API/SBStructuredData.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBStructuredData.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBStream.h"
 #include "lldb/API/SBStringList.h"
@@ -25,41 +25,36 @@ using namespace lldb_private;
 #pragma mark SBStructuredData
 
 SBStructuredData::SBStructuredData() : m_impl_up(new StructuredDataImpl()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBStructuredData);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBStructuredData::SBStructuredData(const lldb::SBStructuredData &rhs)
     : m_impl_up(new StructuredDataImpl(*rhs.m_impl_up)) {
-  LLDB_RECORD_CONSTRUCTOR(SBStructuredData, (const lldb::SBStructuredData &),
-                          rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBStructuredData::SBStructuredData(const lldb::EventSP &event_sp)
     : m_impl_up(new StructuredDataImpl(event_sp)) {
-  LLDB_RECORD_CONSTRUCTOR(SBStructuredData, (const lldb::EventSP &), event_sp);
+  LLDB_INSTRUMENT_VA(this, event_sp);
 }
 
 SBStructuredData::SBStructuredData(const lldb_private::StructuredDataImpl &impl)
     : m_impl_up(new StructuredDataImpl(impl)) {
-  LLDB_RECORD_CONSTRUCTOR(SBStructuredData,
-                          (const lldb_private::StructuredDataImpl &), impl);
+  LLDB_INSTRUMENT_VA(this, impl);
 }
 
 SBStructuredData::~SBStructuredData() = default;
 
 SBStructuredData &SBStructuredData::
 operator=(const lldb::SBStructuredData &rhs) {
-  LLDB_RECORD_METHOD(
-      lldb::SBStructuredData &,
-      SBStructuredData, operator=,(const lldb::SBStructuredData &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   *m_impl_up = *rhs.m_impl_up;
   return *this;
 }
 
 lldb::SBError SBStructuredData::SetFromJSON(lldb::SBStream &stream) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBStructuredData, SetFromJSON,
-                     (lldb::SBStream &), stream);
+  LLDB_INSTRUMENT_VA(this, stream);
 
   lldb::SBError error;
   std::string json_str(stream.GetData());
@@ -73,33 +68,31 @@ lldb::SBError SBStructuredData::SetFromJSON(lldb::SBStream &stream) {
 }
 
 lldb::SBError SBStructuredData::SetFromJSON(const char *json) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBStructuredData, SetFromJSON,
-                     (const char *), json);
+  LLDB_INSTRUMENT_VA(this, json);
   lldb::SBStream s;
   s.Print(json);
   return SetFromJSON(s);
 }
 
 bool SBStructuredData::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStructuredData, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 
 SBStructuredData::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBStructuredData, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_impl_up->IsValid();
 }
 
 void SBStructuredData::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBStructuredData, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_impl_up->Clear();
 }
 
 SBError SBStructuredData::GetAsJSON(lldb::SBStream &stream) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBError, SBStructuredData, GetAsJSON,
-                           (lldb::SBStream &), stream);
+  LLDB_INSTRUMENT_VA(this, stream);
 
   SBError error;
   error.SetError(m_impl_up->GetAsJSON(stream.ref()));
@@ -107,8 +100,7 @@ SBError SBStructuredData::GetAsJSON(lldb::SBStream &stream) const {
 }
 
 lldb::SBError SBStructuredData::GetDescription(lldb::SBStream &stream) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBError, SBStructuredData, GetDescription,
-                           (lldb::SBStream &), stream);
+  LLDB_INSTRUMENT_VA(this, stream);
 
   Status error = m_impl_up->GetDescription(stream.ref());
   SBError sb_error;
@@ -117,21 +109,19 @@ lldb::SBError SBStructuredData::GetDescription(lldb::SBStream &stream) const {
 }
 
 StructuredDataType SBStructuredData::GetType() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::StructuredDataType, SBStructuredData,
-                                   GetType);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_impl_up->GetType();
 }
 
 size_t SBStructuredData::GetSize() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(size_t, SBStructuredData, GetSize);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_impl_up->GetSize();
 }
 
 bool SBStructuredData::GetKeys(lldb::SBStringList &keys) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBStructuredData, GetKeys,
-                           (lldb::SBStringList &), keys);
+  LLDB_INSTRUMENT_VA(this, keys);
 
   if (GetType() != eStructuredDataTypeDictionary)
     return false;
@@ -157,8 +147,7 @@ bool SBStructuredData::GetKeys(lldb::SBStringList &keys) const {
 }
 
 lldb::SBStructuredData SBStructuredData::GetValueForKey(const char *key) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBStructuredData, SBStructuredData,
-                           GetValueForKey, (const char *), key);
+  LLDB_INSTRUMENT_VA(this, key);
 
   SBStructuredData result;
   result.m_impl_up->SetObjectSP(m_impl_up->GetValueForKey(key));
@@ -166,8 +155,7 @@ lldb::SBStructuredData SBStructuredData::GetValueForKey(const char *key) const {
 }
 
 lldb::SBStructuredData SBStructuredData::GetItemAtIndex(size_t idx) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBStructuredData, SBStructuredData,
-                           GetItemAtIndex, (size_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBStructuredData result;
   result.m_impl_up->SetObjectSP(m_impl_up->GetItemAtIndex(idx));
@@ -175,29 +163,25 @@ lldb::SBStructuredData SBStructuredData::GetItemAtIndex(size_t idx) const {
 }
 
 uint64_t SBStructuredData::GetIntegerValue(uint64_t fail_value) const {
-  LLDB_RECORD_METHOD_CONST(uint64_t, SBStructuredData, GetIntegerValue,
-                           (uint64_t), fail_value);
+  LLDB_INSTRUMENT_VA(this, fail_value);
 
   return m_impl_up->GetIntegerValue(fail_value);
 }
 
 double SBStructuredData::GetFloatValue(double fail_value) const {
-  LLDB_RECORD_METHOD_CONST(double, SBStructuredData, GetFloatValue, (double),
-                           fail_value);
+  LLDB_INSTRUMENT_VA(this, fail_value);
 
   return m_impl_up->GetFloatValue(fail_value);
 }
 
 bool SBStructuredData::GetBooleanValue(bool fail_value) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBStructuredData, GetBooleanValue, (bool),
-                           fail_value);
+  LLDB_INSTRUMENT_VA(this, fail_value);
 
   return m_impl_up->GetBooleanValue(fail_value);
 }
 
 size_t SBStructuredData::GetStringValue(char *dst, size_t dst_len) const {
-  LLDB_RECORD_METHOD_CONST(size_t, SBStructuredData, GetStringValue,
-                           (char *, size_t), dst, "", dst_len);
+  LLDB_INSTRUMENT_VA(this, dst, dst_len);
 
   return m_impl_up->GetStringValue(dst, dst_len);
 }
diff --git a/lldb/source/API/SBSymbol.cpp b/lldb/source/API/SBSymbol.cpp
index 96fe5708d344d..b671f987dc996 100644
--- a/lldb/source/API/SBSymbol.cpp
+++ b/lldb/source/API/SBSymbol.cpp
@@ -7,29 +7,28 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBSymbol.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Core/Disassembler.h"
 #include "lldb/Core/Module.h"
 #include "lldb/Symbol/Symbol.h"
 #include "lldb/Target/ExecutionContext.h"
 #include "lldb/Target/Target.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
-SBSymbol::SBSymbol() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSymbol); }
+SBSymbol::SBSymbol() { LLDB_INSTRUMENT_VA(this); }
 
 SBSymbol::SBSymbol(lldb_private::Symbol *lldb_object_ptr)
     : m_opaque_ptr(lldb_object_ptr) {}
 
 SBSymbol::SBSymbol(const lldb::SBSymbol &rhs) : m_opaque_ptr(rhs.m_opaque_ptr) {
-  LLDB_RECORD_CONSTRUCTOR(SBSymbol, (const lldb::SBSymbol &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 const SBSymbol &SBSymbol::operator=(const SBSymbol &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBSymbol &,
-                     SBSymbol, operator=,(const lldb::SBSymbol &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_ptr = rhs.m_opaque_ptr;
   return *this;
@@ -42,17 +41,17 @@ void SBSymbol::SetSymbol(lldb_private::Symbol *lldb_object_ptr) {
 }
 
 bool SBSymbol::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbol, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBSymbol::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbol, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_ptr != nullptr;
 }
 
 const char *SBSymbol::GetName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBSymbol, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *name = nullptr;
   if (m_opaque_ptr)
@@ -62,7 +61,7 @@ const char *SBSymbol::GetName() const {
 }
 
 const char *SBSymbol::GetDisplayName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBSymbol, GetDisplayName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *name = nullptr;
   if (m_opaque_ptr)
@@ -72,7 +71,7 @@ const char *SBSymbol::GetDisplayName() const {
 }
 
 const char *SBSymbol::GetMangledName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBSymbol, GetMangledName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *name = nullptr;
   if (m_opaque_ptr)
@@ -81,22 +80,19 @@ const char *SBSymbol::GetMangledName() const {
 }
 
 bool SBSymbol::operator==(const SBSymbol &rhs) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBSymbol, operator==,(const lldb::SBSymbol &),
-                           rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_ptr == rhs.m_opaque_ptr;
 }
 
 bool SBSymbol::operator!=(const SBSymbol &rhs) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBSymbol, operator!=,(const lldb::SBSymbol &),
-                           rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_ptr != rhs.m_opaque_ptr;
 }
 
 bool SBSymbol::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBSymbol, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
@@ -109,16 +105,14 @@ bool SBSymbol::GetDescription(SBStream &description) {
 }
 
 SBInstructionList SBSymbol::GetInstructions(SBTarget target) {
-  LLDB_RECORD_METHOD(lldb::SBInstructionList, SBSymbol, GetInstructions,
-                     (lldb::SBTarget), target);
+  LLDB_INSTRUMENT_VA(this, target);
 
   return GetInstructions(target, nullptr);
 }
 
 SBInstructionList SBSymbol::GetInstructions(SBTarget target,
                                             const char *flavor_string) {
-  LLDB_RECORD_METHOD(lldb::SBInstructionList, SBSymbol, GetInstructions,
-                     (lldb::SBTarget, const char *), target, flavor_string);
+  LLDB_INSTRUMENT_VA(this, target, flavor_string);
 
   SBInstructionList sb_instructions;
   if (m_opaque_ptr) {
@@ -145,7 +139,7 @@ lldb_private::Symbol *SBSymbol::get() { return m_opaque_ptr; }
 void SBSymbol::reset(lldb_private::Symbol *symbol) { m_opaque_ptr = symbol; }
 
 SBAddress SBSymbol::GetStartAddress() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBSymbol, GetStartAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   SBAddress addr;
   if (m_opaque_ptr && m_opaque_ptr->ValueIsAddress()) {
@@ -155,7 +149,7 @@ SBAddress SBSymbol::GetStartAddress() {
 }
 
 SBAddress SBSymbol::GetEndAddress() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBSymbol, GetEndAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   SBAddress addr;
   if (m_opaque_ptr && m_opaque_ptr->ValueIsAddress()) {
@@ -169,7 +163,7 @@ SBAddress SBSymbol::GetEndAddress() {
 }
 
 uint32_t SBSymbol::GetPrologueByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBSymbol, GetPrologueByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr)
     return m_opaque_ptr->GetPrologueByteSize();
@@ -177,7 +171,7 @@ uint32_t SBSymbol::GetPrologueByteSize() {
 }
 
 SymbolType SBSymbol::GetType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SymbolType, SBSymbol, GetType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr)
     return m_opaque_ptr->GetType();
@@ -185,7 +179,7 @@ SymbolType SBSymbol::GetType() {
 }
 
 bool SBSymbol::IsExternal() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBSymbol, IsExternal);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr)
     return m_opaque_ptr->IsExternal();
@@ -193,7 +187,7 @@ bool SBSymbol::IsExternal() {
 }
 
 bool SBSymbol::IsSynthetic() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBSymbol, IsSynthetic);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_ptr)
     return m_opaque_ptr->IsSynthetic();
diff --git a/lldb/source/API/SBSymbolContext.cpp b/lldb/source/API/SBSymbolContext.cpp
index ebe9bcfabb9fb..484399c895900 100644
--- a/lldb/source/API/SBSymbolContext.cpp
+++ b/lldb/source/API/SBSymbolContext.cpp
@@ -7,30 +7,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBSymbolContext.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Core/Module.h"
 #include "lldb/Symbol/Function.h"
 #include "lldb/Symbol/Symbol.h"
 #include "lldb/Symbol/SymbolContext.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
-SBSymbolContext::SBSymbolContext() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSymbolContext);
-}
+SBSymbolContext::SBSymbolContext() { LLDB_INSTRUMENT_VA(this); }
 
 SBSymbolContext::SBSymbolContext(const SymbolContext &sc)
     : m_opaque_up(std::make_unique<SymbolContext>(sc)) {
-  LLDB_RECORD_CONSTRUCTOR(SBSymbolContext,
-                          (const lldb_private::SymbolContext &), sc);
+  LLDB_INSTRUMENT_VA(this, sc);
 }
 
 SBSymbolContext::SBSymbolContext(const SBSymbolContext &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBSymbolContext, (const lldb::SBSymbolContext &),
-                          rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
@@ -38,9 +34,7 @@ SBSymbolContext::SBSymbolContext(const SBSymbolContext &rhs) {
 SBSymbolContext::~SBSymbolContext() = default;
 
 const SBSymbolContext &SBSymbolContext::operator=(const SBSymbolContext &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBSymbolContext &,
-                     SBSymbolContext, operator=,(const lldb::SBSymbolContext &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -48,17 +42,17 @@ const SBSymbolContext &SBSymbolContext::operator=(const SBSymbolContext &rhs) {
 }
 
 bool SBSymbolContext::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbolContext, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBSymbolContext::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbolContext, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up != nullptr;
 }
 
 SBModule SBSymbolContext::GetModule() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBModule, SBSymbolContext, GetModule);
+  LLDB_INSTRUMENT_VA(this);
 
   SBModule sb_module;
   ModuleSP module_sp;
@@ -71,14 +65,13 @@ SBModule SBSymbolContext::GetModule() {
 }
 
 SBCompileUnit SBSymbolContext::GetCompileUnit() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBCompileUnit, SBSymbolContext,
-                             GetCompileUnit);
+  LLDB_INSTRUMENT_VA(this);
 
   return SBCompileUnit(m_opaque_up ? m_opaque_up->comp_unit : nullptr);
 }
 
 SBFunction SBSymbolContext::GetFunction() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFunction, SBSymbolContext, GetFunction);
+  LLDB_INSTRUMENT_VA(this);
 
   Function *function = nullptr;
 
@@ -91,13 +84,13 @@ SBFunction SBSymbolContext::GetFunction() {
 }
 
 SBBlock SBSymbolContext::GetBlock() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBBlock, SBSymbolContext, GetBlock);
+  LLDB_INSTRUMENT_VA(this);
 
   return SBBlock(m_opaque_up ? m_opaque_up->block : nullptr);
 }
 
 SBLineEntry SBSymbolContext::GetLineEntry() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBLineEntry, SBSymbolContext, GetLineEntry);
+  LLDB_INSTRUMENT_VA(this);
 
   SBLineEntry sb_line_entry;
   if (m_opaque_up)
@@ -107,7 +100,7 @@ SBLineEntry SBSymbolContext::GetLineEntry() {
 }
 
 SBSymbol SBSymbolContext::GetSymbol() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBSymbol, SBSymbolContext, GetSymbol);
+  LLDB_INSTRUMENT_VA(this);
 
   Symbol *symbol = nullptr;
 
@@ -120,35 +113,31 @@ SBSymbol SBSymbolContext::GetSymbol() {
 }
 
 void SBSymbolContext::SetModule(lldb::SBModule module) {
-  LLDB_RECORD_METHOD(void, SBSymbolContext, SetModule, (lldb::SBModule),
-                     module);
+  LLDB_INSTRUMENT_VA(this, module);
 
   ref().module_sp = module.GetSP();
 }
 
 void SBSymbolContext::SetCompileUnit(lldb::SBCompileUnit compile_unit) {
-  LLDB_RECORD_METHOD(void, SBSymbolContext, SetCompileUnit,
-                     (lldb::SBCompileUnit), compile_unit);
+  LLDB_INSTRUMENT_VA(this, compile_unit);
 
   ref().comp_unit = compile_unit.get();
 }
 
 void SBSymbolContext::SetFunction(lldb::SBFunction function) {
-  LLDB_RECORD_METHOD(void, SBSymbolContext, SetFunction, (lldb::SBFunction),
-                     function);
+  LLDB_INSTRUMENT_VA(this, function);
 
   ref().function = function.get();
 }
 
 void SBSymbolContext::SetBlock(lldb::SBBlock block) {
-  LLDB_RECORD_METHOD(void, SBSymbolContext, SetBlock, (lldb::SBBlock), block);
+  LLDB_INSTRUMENT_VA(this, block);
 
   ref().block = block.GetPtr();
 }
 
 void SBSymbolContext::SetLineEntry(lldb::SBLineEntry line_entry) {
-  LLDB_RECORD_METHOD(void, SBSymbolContext, SetLineEntry, (lldb::SBLineEntry),
-                     line_entry);
+  LLDB_INSTRUMENT_VA(this, line_entry);
 
   if (line_entry.IsValid())
     ref().line_entry = line_entry.ref();
@@ -157,8 +146,7 @@ void SBSymbolContext::SetLineEntry(lldb::SBLineEntry line_entry) {
 }
 
 void SBSymbolContext::SetSymbol(lldb::SBSymbol symbol) {
-  LLDB_RECORD_METHOD(void, SBSymbolContext, SetSymbol, (lldb::SBSymbol),
-                     symbol);
+  LLDB_INSTRUMENT_VA(this, symbol);
 
   ref().symbol = symbol.get();
 }
@@ -189,8 +177,7 @@ lldb_private::SymbolContext *SBSymbolContext::get() const {
 }
 
 bool SBSymbolContext::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBSymbolContext, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
@@ -205,10 +192,7 @@ bool SBSymbolContext::GetDescription(SBStream &description) {
 SBSymbolContext
 SBSymbolContext::GetParentOfInlinedScope(const SBAddress &curr_frame_pc,
                                          SBAddress &parent_frame_addr) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBSymbolContext, SBSymbolContext,
-                           GetParentOfInlinedScope,
-                           (const lldb::SBAddress &, lldb::SBAddress &),
-                           curr_frame_pc, parent_frame_addr);
+  LLDB_INSTRUMENT_VA(this, curr_frame_pc, parent_frame_addr);
 
   SBSymbolContext sb_sc;
   if (m_opaque_up.get() && curr_frame_pc.IsValid()) {
diff --git a/lldb/source/API/SBSymbolContextList.cpp b/lldb/source/API/SBSymbolContextList.cpp
index 2dddd2805cd7b..baa558caebbc0 100644
--- a/lldb/source/API/SBSymbolContextList.cpp
+++ b/lldb/source/API/SBSymbolContextList.cpp
@@ -7,22 +7,21 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBSymbolContextList.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/Symbol/SymbolContext.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
 SBSymbolContextList::SBSymbolContextList()
     : m_opaque_up(new SymbolContextList()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBSymbolContextList);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBSymbolContextList::SBSymbolContextList(const SBSymbolContextList &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBSymbolContextList,
-                          (const lldb::SBSymbolContextList &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
@@ -31,9 +30,7 @@ SBSymbolContextList::~SBSymbolContextList() = default;
 
 const SBSymbolContextList &SBSymbolContextList::
 operator=(const SBSymbolContextList &rhs) {
-  LLDB_RECORD_METHOD(
-      const lldb::SBSymbolContextList &,
-      SBSymbolContextList, operator=,(const lldb::SBSymbolContextList &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_up = clone(rhs.m_opaque_up);
@@ -41,7 +38,7 @@ operator=(const SBSymbolContextList &rhs) {
 }
 
 uint32_t SBSymbolContextList::GetSize() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBSymbolContextList, GetSize);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up)
     return m_opaque_up->GetSize();
@@ -49,8 +46,7 @@ uint32_t SBSymbolContextList::GetSize() const {
 }
 
 SBSymbolContext SBSymbolContextList::GetContextAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBSymbolContext, SBSymbolContextList,
-                     GetContextAtIndex, (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBSymbolContext sb_sc;
   if (m_opaque_up) {
@@ -62,34 +58,32 @@ SBSymbolContext SBSymbolContextList::GetContextAtIndex(uint32_t idx) {
 }
 
 void SBSymbolContextList::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBSymbolContextList, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up)
     m_opaque_up->Clear();
 }
 
 void SBSymbolContextList::Append(SBSymbolContext &sc) {
-  LLDB_RECORD_METHOD(void, SBSymbolContextList, Append,
-                     (lldb::SBSymbolContext &), sc);
+  LLDB_INSTRUMENT_VA(this, sc);
 
   if (sc.IsValid() && m_opaque_up.get())
     m_opaque_up->Append(*sc);
 }
 
 void SBSymbolContextList::Append(SBSymbolContextList &sc_list) {
-  LLDB_RECORD_METHOD(void, SBSymbolContextList, Append,
-                     (lldb::SBSymbolContextList &), sc_list);
+  LLDB_INSTRUMENT_VA(this, sc_list);
 
   if (sc_list.IsValid() && m_opaque_up.get())
     m_opaque_up->Append(*sc_list);
 }
 
 bool SBSymbolContextList::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbolContextList, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBSymbolContextList::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBSymbolContextList, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up != nullptr;
 }
@@ -104,8 +98,7 @@ lldb_private::SymbolContextList &SBSymbolContextList::operator*() const {
 }
 
 bool SBSymbolContextList::GetDescription(lldb::SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBSymbolContextList, GetDescription,
-                     (lldb::SBStream &), description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
   if (m_opaque_up)
diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp
index 5bec5610fcff6..75534b2343d42 100644
--- a/lldb/source/API/SBTarget.cpp
+++ b/lldb/source/API/SBTarget.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBTarget.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/lldb-public.h"
 
@@ -93,19 +93,18 @@ static Status AttachToProcess(ProcessAttachInfo &attach_info, Target &target) {
 }
 
 // SBTarget constructor
-SBTarget::SBTarget() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTarget); }
+SBTarget::SBTarget() { LLDB_INSTRUMENT_VA(this); }
 
 SBTarget::SBTarget(const SBTarget &rhs) : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBTarget, (const lldb::SBTarget &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBTarget::SBTarget(const TargetSP &target_sp) : m_opaque_sp(target_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBTarget, (const lldb::TargetSP &), target_sp);
+  LLDB_INSTRUMENT_VA(this, target_sp);
 }
 
 const SBTarget &SBTarget::operator=(const SBTarget &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBTarget &,
-                     SBTarget, operator=,(const lldb::SBTarget &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_sp = rhs.m_opaque_sp;
@@ -116,22 +115,19 @@ const SBTarget &SBTarget::operator=(const SBTarget &rhs) {
 SBTarget::~SBTarget() = default;
 
 bool SBTarget::EventIsTargetEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBTarget, EventIsTargetEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   return Target::TargetEventData::GetEventDataFromEvent(event.get()) != nullptr;
 }
 
 SBTarget SBTarget::GetTargetFromEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBTarget, SBTarget, GetTargetFromEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   return Target::TargetEventData::GetTargetFromEvent(event.get());
 }
 
 uint32_t SBTarget::GetNumModulesFromEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(uint32_t, SBTarget, GetNumModulesFromEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   const ModuleList module_list =
       Target::TargetEventData::GetModuleListFromEvent(event.get());
@@ -140,9 +136,7 @@ uint32_t SBTarget::GetNumModulesFromEvent(const SBEvent &event) {
 
 SBModule SBTarget::GetModuleAtIndexFromEvent(const uint32_t idx,
                                              const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBModule, SBTarget, GetModuleAtIndexFromEvent,
-                            (const uint32_t, const lldb::SBEvent &), idx,
-                            event);
+  LLDB_INSTRUMENT_VA(idx, event);
 
   const ModuleList module_list =
       Target::TargetEventData::GetModuleListFromEvent(event.get());
@@ -150,24 +144,23 @@ SBModule SBTarget::GetModuleAtIndexFromEvent(const uint32_t idx,
 }
 
 const char *SBTarget::GetBroadcasterClassName() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBTarget,
-                                    GetBroadcasterClassName);
+  LLDB_INSTRUMENT();
 
   return Target::GetStaticBroadcasterClass().AsCString();
 }
 
 bool SBTarget::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTarget, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBTarget::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTarget, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get() != nullptr && m_opaque_sp->IsValid();
 }
 
 SBProcess SBTarget::GetProcess() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBProcess, SBTarget, GetProcess);
+  LLDB_INSTRUMENT_VA(this);
 
   SBProcess sb_process;
   ProcessSP process_sp;
@@ -181,7 +174,7 @@ SBProcess SBTarget::GetProcess() {
 }
 
 SBPlatform SBTarget::GetPlatform() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBPlatform, SBTarget, GetPlatform);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (!target_sp)
@@ -194,7 +187,7 @@ SBPlatform SBTarget::GetPlatform() {
 }
 
 SBDebugger SBTarget::GetDebugger() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBDebugger, SBTarget, GetDebugger);
+  LLDB_INSTRUMENT_VA(this);
 
   SBDebugger debugger;
   TargetSP target_sp(GetSP());
@@ -204,7 +197,7 @@ SBDebugger SBTarget::GetDebugger() const {
 }
 
 SBStructuredData SBTarget::GetStatistics() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBStructuredData, SBTarget, GetStatistics);
+  LLDB_INSTRUMENT_VA(this);
 
   SBStructuredData data;
   TargetSP target_sp(GetSP());
@@ -219,7 +212,7 @@ SBStructuredData SBTarget::GetStatistics() {
 }
 
 void SBTarget::SetCollectingStats(bool v) {
-  LLDB_RECORD_METHOD(void, SBTarget, SetCollectingStats, (bool), v);
+  LLDB_INSTRUMENT_VA(this, v);
 
   TargetSP target_sp(GetSP());
   if (!target_sp)
@@ -228,7 +221,7 @@ void SBTarget::SetCollectingStats(bool v) {
 }
 
 bool SBTarget::GetCollectingStats() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, GetCollectingStats);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (!target_sp)
@@ -237,16 +230,14 @@ bool SBTarget::GetCollectingStats() {
 }
 
 SBProcess SBTarget::LoadCore(const char *core_file) {
-  LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, LoadCore, (const char *),
-                     core_file);
+  LLDB_INSTRUMENT_VA(this, core_file);
 
   lldb::SBError error; // Ignored
   return LoadCore(core_file, error);
 }
 
 SBProcess SBTarget::LoadCore(const char *core_file, lldb::SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, LoadCore,
-                     (const char *, lldb::SBError &), core_file, error);
+  LLDB_INSTRUMENT_VA(this, core_file, error);
 
   SBProcess sb_process;
   TargetSP target_sp(GetSP());
@@ -270,9 +261,7 @@ SBProcess SBTarget::LoadCore(const char *core_file, lldb::SBError &error) {
 
 SBProcess SBTarget::LaunchSimple(char const **argv, char const **envp,
                                  const char *working_directory) {
-  LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, LaunchSimple,
-                     (const char **, const char **, const char *), argv, envp,
-                     working_directory);
+  LLDB_INSTRUMENT_VA(this, argv, envp, working_directory);
 
   TargetSP target_sp = GetSP();
   if (!target_sp)
@@ -295,7 +284,7 @@ SBProcess SBTarget::LaunchSimple(char const **argv, char const **envp,
 }
 
 SBError SBTarget::Install() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBTarget, Install);
+  LLDB_INSTRUMENT_VA(this);
 
   SBError sb_error;
   TargetSP target_sp(GetSP());
@@ -312,12 +301,9 @@ SBProcess SBTarget::Launch(SBListener &listener, char const **argv,
                            const char *working_directory,
                            uint32_t launch_flags, // See LaunchFlags
                            bool stop_at_entry, lldb::SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, Launch,
-                     (lldb::SBListener &, const char **, const char **,
-                      const char *, const char *, const char *, const char *,
-                      uint32_t, bool, lldb::SBError &),
-                     listener, argv, envp, stdin_path, stdout_path, stderr_path,
-                     working_directory, launch_flags, stop_at_entry, error);
+  LLDB_INSTRUMENT_VA(this, listener, argv, envp, stdin_path, stdout_path,
+                     stderr_path, working_directory, launch_flags,
+                     stop_at_entry, error);
 
   SBProcess sb_process;
   ProcessSP process_sp;
@@ -395,10 +381,7 @@ SBProcess SBTarget::Launch(SBListener &listener, char const **argv,
 }
 
 SBProcess SBTarget::Launch(SBLaunchInfo &sb_launch_info, SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, Launch,
-                     (lldb::SBLaunchInfo &, lldb::SBError &), sb_launch_info,
-                     error);
-
+  LLDB_INSTRUMENT_VA(this, sb_launch_info, error);
 
   SBProcess sb_process;
   TargetSP target_sp(GetSP());
@@ -444,9 +427,7 @@ SBProcess SBTarget::Launch(SBLaunchInfo &sb_launch_info, SBError &error) {
 }
 
 lldb::SBProcess SBTarget::Attach(SBAttachInfo &sb_attach_info, SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, Attach,
-                     (lldb::SBAttachInfo &, lldb::SBError &), sb_attach_info,
-                     error);
+  LLDB_INSTRUMENT_VA(this, sb_attach_info, error);
 
   SBProcess sb_process;
   TargetSP target_sp(GetSP());
@@ -483,9 +464,7 @@ lldb::SBProcess SBTarget::AttachToProcessWithID(
     lldb::pid_t pid, // The process ID to attach to
     SBError &error   // An error explaining what went wrong if attach fails
 ) {
-  LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, AttachToProcessWithID,
-                     (lldb::SBListener &, lldb::pid_t, lldb::SBError &),
-                     listener, pid, error);
+  LLDB_INSTRUMENT_VA(this, listener, pid, error);
 
   SBProcess sb_process;
   TargetSP target_sp(GetSP());
@@ -515,9 +494,7 @@ lldb::SBProcess SBTarget::AttachToProcessWithName(
     bool wait_for, // if true wait for a new instance of "name" to be launched
     SBError &error // An error explaining what went wrong if attach fails
 ) {
-  LLDB_RECORD_METHOD(lldb::SBProcess, SBTarget, AttachToProcessWithName,
-                     (lldb::SBListener &, const char *, bool, lldb::SBError &),
-                     listener, name, wait_for, error);
+  LLDB_INSTRUMENT_VA(this, listener, name, wait_for, error);
 
   SBProcess sb_process;
   TargetSP target_sp(GetSP());
@@ -541,10 +518,7 @@ lldb::SBProcess SBTarget::AttachToProcessWithName(
 lldb::SBProcess SBTarget::ConnectRemote(SBListener &listener, const char *url,
                                         const char *plugin_name,
                                         SBError &error) {
-  LLDB_RECORD_METHOD(
-      lldb::SBProcess, SBTarget, ConnectRemote,
-      (lldb::SBListener &, const char *, const char *, lldb::SBError &),
-      listener, url, plugin_name, error);
+  LLDB_INSTRUMENT_VA(this, listener, url, plugin_name, error);
 
   SBProcess sb_process;
   ProcessSP process_sp;
@@ -574,7 +548,7 @@ lldb::SBProcess SBTarget::ConnectRemote(SBListener &listener, const char *url,
 }
 
 SBFileSpec SBTarget::GetExecutable() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFileSpec, SBTarget, GetExecutable);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFileSpec exe_file_spec;
   TargetSP target_sp(GetSP());
@@ -588,15 +562,13 @@ SBFileSpec SBTarget::GetExecutable() {
 }
 
 bool SBTarget::operator==(const SBTarget &rhs) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBTarget, operator==,(const lldb::SBTarget &),
-                           rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_sp.get() == rhs.m_opaque_sp.get();
 }
 
 bool SBTarget::operator!=(const SBTarget &rhs) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBTarget, operator!=,(const lldb::SBTarget &),
-                           rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_sp.get() != rhs.m_opaque_sp.get();
 }
@@ -608,8 +580,7 @@ void SBTarget::SetSP(const lldb::TargetSP &target_sp) {
 }
 
 lldb::SBAddress SBTarget::ResolveLoadAddress(lldb::addr_t vm_addr) {
-  LLDB_RECORD_METHOD(lldb::SBAddress, SBTarget, ResolveLoadAddress,
-                     (lldb::addr_t), vm_addr);
+  LLDB_INSTRUMENT_VA(this, vm_addr);
 
   lldb::SBAddress sb_addr;
   Address &addr = sb_addr.ref();
@@ -627,8 +598,7 @@ lldb::SBAddress SBTarget::ResolveLoadAddress(lldb::addr_t vm_addr) {
 }
 
 lldb::SBAddress SBTarget::ResolveFileAddress(lldb::addr_t file_addr) {
-  LLDB_RECORD_METHOD(lldb::SBAddress, SBTarget, ResolveFileAddress,
-                     (lldb::addr_t), file_addr);
+  LLDB_INSTRUMENT_VA(this, file_addr);
 
   lldb::SBAddress sb_addr;
   Address &addr = sb_addr.ref();
@@ -645,8 +615,7 @@ lldb::SBAddress SBTarget::ResolveFileAddress(lldb::addr_t file_addr) {
 
 lldb::SBAddress SBTarget::ResolvePastLoadAddress(uint32_t stop_id,
                                                  lldb::addr_t vm_addr) {
-  LLDB_RECORD_METHOD(lldb::SBAddress, SBTarget, ResolvePastLoadAddress,
-                     (uint32_t, lldb::addr_t), stop_id, vm_addr);
+  LLDB_INSTRUMENT_VA(this, stop_id, vm_addr);
 
   lldb::SBAddress sb_addr;
   Address &addr = sb_addr.ref();
@@ -666,9 +635,7 @@ lldb::SBAddress SBTarget::ResolvePastLoadAddress(uint32_t stop_id,
 SBSymbolContext
 SBTarget::ResolveSymbolContextForAddress(const SBAddress &addr,
                                          uint32_t resolve_scope) {
-  LLDB_RECORD_METHOD(lldb::SBSymbolContext, SBTarget,
-                     ResolveSymbolContextForAddress,
-                     (const lldb::SBAddress &, uint32_t), addr, resolve_scope);
+  LLDB_INSTRUMENT_VA(this, addr, resolve_scope);
 
   SBSymbolContext sc;
   SymbolContextItem scope = static_cast<SymbolContextItem>(resolve_scope);
@@ -683,9 +650,7 @@ SBTarget::ResolveSymbolContextForAddress(const SBAddress &addr,
 
 size_t SBTarget::ReadMemory(const SBAddress addr, void *buf, size_t size,
                             lldb::SBError &error) {
-  LLDB_RECORD_METHOD(size_t, SBTarget, ReadMemory,
-                     (const lldb::SBAddress, void *, size_t, lldb::SBError &),
-                     addr, buf, size, error);
+  LLDB_INSTRUMENT_VA(this, addr, buf, size, error);
 
   SBError sb_error;
   size_t bytes_read = 0;
@@ -703,8 +668,7 @@ size_t SBTarget::ReadMemory(const SBAddress addr, void *buf, size_t size,
 
 SBBreakpoint SBTarget::BreakpointCreateByLocation(const char *file,
                                                   uint32_t line) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByLocation,
-                     (const char *, uint32_t), file, line);
+  LLDB_INSTRUMENT_VA(this, file, line);
 
   return SBBreakpoint(
       BreakpointCreateByLocation(SBFileSpec(file, false), line));
@@ -713,8 +677,7 @@ SBBreakpoint SBTarget::BreakpointCreateByLocation(const char *file,
 SBBreakpoint
 SBTarget::BreakpointCreateByLocation(const SBFileSpec &sb_file_spec,
                                      uint32_t line) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByLocation,
-                     (const lldb::SBFileSpec &, uint32_t), sb_file_spec, line);
+  LLDB_INSTRUMENT_VA(this, sb_file_spec, line);
 
   return BreakpointCreateByLocation(sb_file_spec, line, 0);
 }
@@ -722,9 +685,7 @@ SBTarget::BreakpointCreateByLocation(const SBFileSpec &sb_file_spec,
 SBBreakpoint
 SBTarget::BreakpointCreateByLocation(const SBFileSpec &sb_file_spec,
                                      uint32_t line, lldb::addr_t offset) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByLocation,
-                     (const lldb::SBFileSpec &, uint32_t, lldb::addr_t),
-                     sb_file_spec, line, offset);
+  LLDB_INSTRUMENT_VA(this, sb_file_spec, line, offset);
 
   SBFileSpecList empty_list;
   return BreakpointCreateByLocation(sb_file_spec, line, offset, empty_list);
@@ -734,10 +695,7 @@ SBBreakpoint
 SBTarget::BreakpointCreateByLocation(const SBFileSpec &sb_file_spec,
                                      uint32_t line, lldb::addr_t offset,
                                      SBFileSpecList &sb_module_list) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByLocation,
-                     (const lldb::SBFileSpec &, uint32_t, lldb::addr_t,
-                      lldb::SBFileSpecList &),
-                     sb_file_spec, line, offset, sb_module_list);
+  LLDB_INSTRUMENT_VA(this, sb_file_spec, line, offset, sb_module_list);
 
   return BreakpointCreateByLocation(sb_file_spec, line, 0, offset,
                                     sb_module_list);
@@ -746,10 +704,7 @@ SBTarget::BreakpointCreateByLocation(const SBFileSpec &sb_file_spec,
 SBBreakpoint SBTarget::BreakpointCreateByLocation(
     const SBFileSpec &sb_file_spec, uint32_t line, uint32_t column,
     lldb::addr_t offset, SBFileSpecList &sb_module_list) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByLocation,
-                     (const lldb::SBFileSpec &, uint32_t, uint32_t,
-                      lldb::addr_t, lldb::SBFileSpecList &),
-                     sb_file_spec, line, column, offset, sb_module_list);
+  LLDB_INSTRUMENT_VA(this, sb_file_spec, line, column, offset, sb_module_list);
 
   SBBreakpoint sb_bp;
   TargetSP target_sp(GetSP());
@@ -777,10 +732,7 @@ SBBreakpoint SBTarget::BreakpointCreateByLocation(
     const SBFileSpec &sb_file_spec, uint32_t line, uint32_t column,
     lldb::addr_t offset, SBFileSpecList &sb_module_list,
     bool move_to_nearest_code) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByLocation,
-                     (const lldb::SBFileSpec &, uint32_t, uint32_t,
-                      lldb::addr_t, lldb::SBFileSpecList &, bool),
-                     sb_file_spec, line, column, offset, sb_module_list,
+  LLDB_INSTRUMENT_VA(this, sb_file_spec, line, column, offset, sb_module_list,
                      move_to_nearest_code);
 
   SBBreakpoint sb_bp;
@@ -807,8 +759,7 @@ SBBreakpoint SBTarget::BreakpointCreateByLocation(
 
 SBBreakpoint SBTarget::BreakpointCreateByName(const char *symbol_name,
                                               const char *module_name) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByName,
-                     (const char *, const char *), symbol_name, module_name);
+  LLDB_INSTRUMENT_VA(this, symbol_name, module_name);
 
   SBBreakpoint sb_bp;
   TargetSP target_sp(GetSP());
@@ -839,10 +790,7 @@ lldb::SBBreakpoint
 SBTarget::BreakpointCreateByName(const char *symbol_name,
                                  const SBFileSpecList &module_list,
                                  const SBFileSpecList &comp_unit_list) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByName,
-                     (const char *, const lldb::SBFileSpecList &,
-                      const lldb::SBFileSpecList &),
-                     symbol_name, module_list, comp_unit_list);
+  LLDB_INSTRUMENT_VA(this, symbol_name, module_list, comp_unit_list);
 
   lldb::FunctionNameType name_type_mask = eFunctionNameTypeAuto;
   return BreakpointCreateByName(symbol_name, name_type_mask,
@@ -853,10 +801,8 @@ SBTarget::BreakpointCreateByName(const char *symbol_name,
 lldb::SBBreakpoint SBTarget::BreakpointCreateByName(
     const char *symbol_name, uint32_t name_type_mask,
     const SBFileSpecList &module_list, const SBFileSpecList &comp_unit_list) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByName,
-                     (const char *, uint32_t, const lldb::SBFileSpecList &,
-                      const lldb::SBFileSpecList &),
-                     symbol_name, name_type_mask, module_list, comp_unit_list);
+  LLDB_INSTRUMENT_VA(this, symbol_name, name_type_mask, module_list,
+                     comp_unit_list);
 
   return BreakpointCreateByName(symbol_name, name_type_mask,
                                 eLanguageTypeUnknown, module_list,
@@ -867,12 +813,8 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByName(
     const char *symbol_name, uint32_t name_type_mask,
     LanguageType symbol_language, const SBFileSpecList &module_list,
     const SBFileSpecList &comp_unit_list) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByName,
-                     (const char *, uint32_t, lldb::LanguageType,
-                      const lldb::SBFileSpecList &,
-                      const lldb::SBFileSpecList &),
-                     symbol_name, name_type_mask, symbol_language, module_list,
-                     comp_unit_list);
+  LLDB_INSTRUMENT_VA(this, symbol_name, name_type_mask, symbol_language,
+                     module_list, comp_unit_list);
 
   SBBreakpoint sb_bp;
   TargetSP target_sp(GetSP());
@@ -893,11 +835,8 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByName(
 lldb::SBBreakpoint SBTarget::BreakpointCreateByNames(
     const char *symbol_names[], uint32_t num_names, uint32_t name_type_mask,
     const SBFileSpecList &module_list, const SBFileSpecList &comp_unit_list) {
-  LLDB_RECORD_METHOD(
-      lldb::SBBreakpoint, SBTarget, BreakpointCreateByNames,
-      (const char **, uint32_t, uint32_t, const lldb::SBFileSpecList &,
-       const lldb::SBFileSpecList &),
-      symbol_names, num_names, name_type_mask, module_list, comp_unit_list);
+  LLDB_INSTRUMENT_VA(this, symbol_names, num_names, name_type_mask, module_list,
+                     comp_unit_list);
 
   return BreakpointCreateByNames(symbol_names, num_names, name_type_mask,
                                  eLanguageTypeUnknown, module_list,
@@ -908,12 +847,8 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByNames(
     const char *symbol_names[], uint32_t num_names, uint32_t name_type_mask,
     LanguageType symbol_language, const SBFileSpecList &module_list,
     const SBFileSpecList &comp_unit_list) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByNames,
-                     (const char **, uint32_t, uint32_t, lldb::LanguageType,
-                      const lldb::SBFileSpecList &,
-                      const lldb::SBFileSpecList &),
-                     symbol_names, num_names, name_type_mask, symbol_language,
-                     module_list, comp_unit_list);
+  LLDB_INSTRUMENT_VA(this, symbol_names, num_names, name_type_mask,
+                     symbol_language, module_list, comp_unit_list);
 
   return BreakpointCreateByNames(symbol_names, num_names, name_type_mask,
                                  eLanguageTypeUnknown, 0, module_list,
@@ -924,12 +859,8 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByNames(
     const char *symbol_names[], uint32_t num_names, uint32_t name_type_mask,
     LanguageType symbol_language, lldb::addr_t offset,
     const SBFileSpecList &module_list, const SBFileSpecList &comp_unit_list) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByNames,
-                     (const char **, uint32_t, uint32_t, lldb::LanguageType,
-                      lldb::addr_t, const lldb::SBFileSpecList &,
-                      const lldb::SBFileSpecList &),
-                     symbol_names, num_names, name_type_mask, symbol_language,
-                     offset, module_list, comp_unit_list);
+  LLDB_INSTRUMENT_VA(this, symbol_names, num_names, name_type_mask,
+                     symbol_language, offset, module_list, comp_unit_list);
 
   SBBreakpoint sb_bp;
   TargetSP target_sp(GetSP());
@@ -949,9 +880,7 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByNames(
 
 SBBreakpoint SBTarget::BreakpointCreateByRegex(const char *symbol_name_regex,
                                                const char *module_name) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByRegex,
-                     (const char *, const char *), symbol_name_regex,
-                     module_name);
+  LLDB_INSTRUMENT_VA(this, symbol_name_regex, module_name);
 
   SBFileSpecList module_spec_list;
   SBFileSpecList comp_unit_list;
@@ -966,10 +895,7 @@ lldb::SBBreakpoint
 SBTarget::BreakpointCreateByRegex(const char *symbol_name_regex,
                                   const SBFileSpecList &module_list,
                                   const SBFileSpecList &comp_unit_list) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByRegex,
-                     (const char *, const lldb::SBFileSpecList &,
-                      const lldb::SBFileSpecList &),
-                     symbol_name_regex, module_list, comp_unit_list);
+  LLDB_INSTRUMENT_VA(this, symbol_name_regex, module_list, comp_unit_list);
 
   return BreakpointCreateByRegex(symbol_name_regex, eLanguageTypeUnknown,
                                  module_list, comp_unit_list);
@@ -978,12 +904,8 @@ SBTarget::BreakpointCreateByRegex(const char *symbol_name_regex,
 lldb::SBBreakpoint SBTarget::BreakpointCreateByRegex(
     const char *symbol_name_regex, LanguageType symbol_language,
     const SBFileSpecList &module_list, const SBFileSpecList &comp_unit_list) {
-  LLDB_RECORD_METHOD(
-      lldb::SBBreakpoint, SBTarget, BreakpointCreateByRegex,
-      (const char *, lldb::LanguageType, const lldb::SBFileSpecList &,
-       const lldb::SBFileSpecList &),
-      symbol_name_regex, symbol_language, module_list, comp_unit_list);
-
+  LLDB_INSTRUMENT_VA(this, symbol_name_regex, symbol_language, module_list,
+                     comp_unit_list);
 
   SBBreakpoint sb_bp;
   TargetSP target_sp(GetSP());
@@ -1003,8 +925,7 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateByRegex(
 }
 
 SBBreakpoint SBTarget::BreakpointCreateByAddress(addr_t address) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateByAddress,
-                     (lldb::addr_t), address);
+  LLDB_INSTRUMENT_VA(this, address);
 
   SBBreakpoint sb_bp;
   TargetSP target_sp(GetSP());
@@ -1018,8 +939,7 @@ SBBreakpoint SBTarget::BreakpointCreateByAddress(addr_t address) {
 }
 
 SBBreakpoint SBTarget::BreakpointCreateBySBAddress(SBAddress &sb_address) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateBySBAddress,
-                     (lldb::SBAddress &), sb_address);
+  LLDB_INSTRUMENT_VA(this, sb_address);
 
   SBBreakpoint sb_bp;
   TargetSP target_sp(GetSP());
@@ -1040,10 +960,7 @@ lldb::SBBreakpoint
 SBTarget::BreakpointCreateBySourceRegex(const char *source_regex,
                                         const lldb::SBFileSpec &source_file,
                                         const char *module_name) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget,
-                     BreakpointCreateBySourceRegex,
-                     (const char *, const lldb::SBFileSpec &, const char *),
-                     source_regex, source_file, module_name);
+  LLDB_INSTRUMENT_VA(this, source_regex, source_file, module_name);
 
   SBFileSpecList module_spec_list;
 
@@ -1063,11 +980,7 @@ SBTarget::BreakpointCreateBySourceRegex(const char *source_regex,
 lldb::SBBreakpoint SBTarget::BreakpointCreateBySourceRegex(
     const char *source_regex, const SBFileSpecList &module_list,
     const lldb::SBFileSpecList &source_file_list) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget,
-                     BreakpointCreateBySourceRegex,
-                     (const char *, const lldb::SBFileSpecList &,
-                      const lldb::SBFileSpecList &),
-                     source_regex, module_list, source_file_list);
+  LLDB_INSTRUMENT_VA(this, source_regex, module_list, source_file_list);
 
   return BreakpointCreateBySourceRegex(source_regex, module_list,
                                        source_file_list, SBStringList());
@@ -1077,11 +990,8 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateBySourceRegex(
     const char *source_regex, const SBFileSpecList &module_list,
     const lldb::SBFileSpecList &source_file_list,
     const SBStringList &func_names) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget,
-                     BreakpointCreateBySourceRegex,
-                     (const char *, const lldb::SBFileSpecList &,
-                      const lldb::SBFileSpecList &, const lldb::SBStringList &),
-                     source_regex, module_list, source_file_list, func_names);
+  LLDB_INSTRUMENT_VA(this, source_regex, module_list, source_file_list,
+                     func_names);
 
   SBBreakpoint sb_bp;
   TargetSP target_sp(GetSP());
@@ -1106,9 +1016,7 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateBySourceRegex(
 lldb::SBBreakpoint
 SBTarget::BreakpointCreateForException(lldb::LanguageType language,
                                        bool catch_bp, bool throw_bp) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, BreakpointCreateForException,
-                     (lldb::LanguageType, bool, bool), language, catch_bp,
-                     throw_bp);
+  LLDB_INSTRUMENT_VA(this, language, catch_bp, throw_bp);
 
   SBBreakpoint sb_bp;
   TargetSP target_sp(GetSP());
@@ -1126,11 +1034,8 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateFromScript(
     const char *class_name, SBStructuredData &extra_args,
     const SBFileSpecList &module_list, const SBFileSpecList &file_list,
     bool request_hardware) {
-  LLDB_RECORD_METHOD(
-      lldb::SBBreakpoint, SBTarget, BreakpointCreateFromScript,
-      (const char *, lldb::SBStructuredData &, const lldb::SBFileSpecList &,
-       const lldb::SBFileSpecList &, bool),
-      class_name, extra_args, module_list, file_list, request_hardware);
+  LLDB_INSTRUMENT_VA(this, class_name, extra_args, module_list, file_list,
+                     request_hardware);
 
   SBBreakpoint sb_bp;
   TargetSP target_sp(GetSP());
@@ -1153,7 +1058,7 @@ lldb::SBBreakpoint SBTarget::BreakpointCreateFromScript(
 }
 
 uint32_t SBTarget::GetNumBreakpoints() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBTarget, GetNumBreakpoints);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1164,8 +1069,7 @@ uint32_t SBTarget::GetNumBreakpoints() const {
 }
 
 SBBreakpoint SBTarget::GetBreakpointAtIndex(uint32_t idx) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBBreakpoint, SBTarget, GetBreakpointAtIndex,
-                           (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBBreakpoint sb_breakpoint;
   TargetSP target_sp(GetSP());
@@ -1177,8 +1081,7 @@ SBBreakpoint SBTarget::GetBreakpointAtIndex(uint32_t idx) const {
 }
 
 bool SBTarget::BreakpointDelete(break_id_t bp_id) {
-  LLDB_RECORD_METHOD(bool, SBTarget, BreakpointDelete, (lldb::break_id_t),
-                     bp_id);
+  LLDB_INSTRUMENT_VA(this, bp_id);
 
   bool result = false;
   TargetSP target_sp(GetSP());
@@ -1191,8 +1094,7 @@ bool SBTarget::BreakpointDelete(break_id_t bp_id) {
 }
 
 SBBreakpoint SBTarget::FindBreakpointByID(break_id_t bp_id) {
-  LLDB_RECORD_METHOD(lldb::SBBreakpoint, SBTarget, FindBreakpointByID,
-                     (lldb::break_id_t), bp_id);
+  LLDB_INSTRUMENT_VA(this, bp_id);
 
   SBBreakpoint sb_breakpoint;
   TargetSP target_sp(GetSP());
@@ -1206,8 +1108,7 @@ SBBreakpoint SBTarget::FindBreakpointByID(break_id_t bp_id) {
 
 bool SBTarget::FindBreakpointsByName(const char *name,
                                      SBBreakpointList &bkpts) {
-  LLDB_RECORD_METHOD(bool, SBTarget, FindBreakpointsByName,
-                     (const char *, lldb::SBBreakpointList &), name, bkpts);
+  LLDB_INSTRUMENT_VA(this, name, bkpts);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1228,8 +1129,7 @@ bool SBTarget::FindBreakpointsByName(const char *name,
 }
 
 void SBTarget::GetBreakpointNames(SBStringList &names) {
-  LLDB_RECORD_METHOD(void, SBTarget, GetBreakpointNames, (lldb::SBStringList &),
-                     names);
+  LLDB_INSTRUMENT_VA(this, names);
 
   names.Clear();
 
@@ -1245,8 +1145,7 @@ void SBTarget::GetBreakpointNames(SBStringList &names) {
 }
 
 void SBTarget::DeleteBreakpointName(const char *name) {
-  LLDB_RECORD_METHOD(void, SBTarget, DeleteBreakpointName, (const char *),
-                     name);
+  LLDB_INSTRUMENT_VA(this, name);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1256,7 +1155,7 @@ void SBTarget::DeleteBreakpointName(const char *name) {
 }
 
 bool SBTarget::EnableAllBreakpoints() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, EnableAllBreakpoints);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1268,7 +1167,7 @@ bool SBTarget::EnableAllBreakpoints() {
 }
 
 bool SBTarget::DisableAllBreakpoints() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, DisableAllBreakpoints);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1280,7 +1179,7 @@ bool SBTarget::DisableAllBreakpoints() {
 }
 
 bool SBTarget::DeleteAllBreakpoints() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, DeleteAllBreakpoints);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1293,9 +1192,7 @@ bool SBTarget::DeleteAllBreakpoints() {
 
 lldb::SBError SBTarget::BreakpointsCreateFromFile(SBFileSpec &source_file,
                                                   SBBreakpointList &new_bps) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBTarget, BreakpointsCreateFromFile,
-                     (lldb::SBFileSpec &, lldb::SBBreakpointList &),
-                     source_file, new_bps);
+  LLDB_INSTRUMENT_VA(this, source_file, new_bps);
 
   SBStringList empty_name_list;
   return BreakpointsCreateFromFile(source_file, empty_name_list, new_bps);
@@ -1304,10 +1201,7 @@ lldb::SBError SBTarget::BreakpointsCreateFromFile(SBFileSpec &source_file,
 lldb::SBError SBTarget::BreakpointsCreateFromFile(SBFileSpec &source_file,
                                                   SBStringList &matching_names,
                                                   SBBreakpointList &new_bps) {
-  LLDB_RECORD_METHOD(
-      lldb::SBError, SBTarget, BreakpointsCreateFromFile,
-      (lldb::SBFileSpec &, lldb::SBStringList &, lldb::SBBreakpointList &),
-      source_file, matching_names, new_bps);
+  LLDB_INSTRUMENT_VA(this, source_file, matching_names, new_bps);
 
   SBError sberr;
   TargetSP target_sp(GetSP());
@@ -1339,8 +1233,7 @@ lldb::SBError SBTarget::BreakpointsCreateFromFile(SBFileSpec &source_file,
 }
 
 lldb::SBError SBTarget::BreakpointsWriteToFile(SBFileSpec &dest_file) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBTarget, BreakpointsWriteToFile,
-                     (lldb::SBFileSpec &), dest_file);
+  LLDB_INSTRUMENT_VA(this, dest_file);
 
   SBError sberr;
   TargetSP target_sp(GetSP());
@@ -1355,9 +1248,7 @@ lldb::SBError SBTarget::BreakpointsWriteToFile(SBFileSpec &dest_file) {
 lldb::SBError SBTarget::BreakpointsWriteToFile(SBFileSpec &dest_file,
                                                SBBreakpointList &bkpt_list,
                                                bool append) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBTarget, BreakpointsWriteToFile,
-                     (lldb::SBFileSpec &, lldb::SBBreakpointList &, bool),
-                     dest_file, bkpt_list, append);
+  LLDB_INSTRUMENT_VA(this, dest_file, bkpt_list, append);
 
   SBError sberr;
   TargetSP target_sp(GetSP());
@@ -1375,7 +1266,7 @@ lldb::SBError SBTarget::BreakpointsWriteToFile(SBFileSpec &dest_file,
 }
 
 uint32_t SBTarget::GetNumWatchpoints() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBTarget, GetNumWatchpoints);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1386,8 +1277,7 @@ uint32_t SBTarget::GetNumWatchpoints() const {
 }
 
 SBWatchpoint SBTarget::GetWatchpointAtIndex(uint32_t idx) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBWatchpoint, SBTarget, GetWatchpointAtIndex,
-                           (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBWatchpoint sb_watchpoint;
   TargetSP target_sp(GetSP());
@@ -1399,9 +1289,7 @@ SBWatchpoint SBTarget::GetWatchpointAtIndex(uint32_t idx) const {
 }
 
 bool SBTarget::DeleteWatchpoint(watch_id_t wp_id) {
-  LLDB_RECORD_METHOD(bool, SBTarget, DeleteWatchpoint, (lldb::watch_id_t),
-                     wp_id);
-
+  LLDB_INSTRUMENT_VA(this, wp_id);
 
   bool result = false;
   TargetSP target_sp(GetSP());
@@ -1416,9 +1304,7 @@ bool SBTarget::DeleteWatchpoint(watch_id_t wp_id) {
 }
 
 SBWatchpoint SBTarget::FindWatchpointByID(lldb::watch_id_t wp_id) {
-  LLDB_RECORD_METHOD(lldb::SBWatchpoint, SBTarget, FindWatchpointByID,
-                     (lldb::watch_id_t), wp_id);
-
+  LLDB_INSTRUMENT_VA(this, wp_id);
 
   SBWatchpoint sb_watchpoint;
   lldb::WatchpointSP watchpoint_sp;
@@ -1437,9 +1323,7 @@ SBWatchpoint SBTarget::FindWatchpointByID(lldb::watch_id_t wp_id) {
 lldb::SBWatchpoint SBTarget::WatchAddress(lldb::addr_t addr, size_t size,
                                           bool read, bool write,
                                           SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBWatchpoint, SBTarget, WatchAddress,
-                     (lldb::addr_t, size_t, bool, bool, lldb::SBError &), addr,
-                     size, read, write, error);
+  LLDB_INSTRUMENT_VA(this, addr, size, read, write, error);
 
   SBWatchpoint sb_watchpoint;
   lldb::WatchpointSP watchpoint_sp;
@@ -1472,7 +1356,7 @@ lldb::SBWatchpoint SBTarget::WatchAddress(lldb::addr_t addr, size_t size,
 }
 
 bool SBTarget::EnableAllWatchpoints() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, EnableAllWatchpoints);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1486,7 +1370,7 @@ bool SBTarget::EnableAllWatchpoints() {
 }
 
 bool SBTarget::DisableAllWatchpoints() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, DisableAllWatchpoints);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1501,9 +1385,7 @@ bool SBTarget::DisableAllWatchpoints() {
 
 SBValue SBTarget::CreateValueFromAddress(const char *name, SBAddress addr,
                                          SBType type) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBTarget, CreateValueFromAddress,
-                     (const char *, lldb::SBAddress, lldb::SBType), name, addr,
-                     type);
+  LLDB_INSTRUMENT_VA(this, name, addr, type);
 
   SBValue sb_value;
   lldb::ValueObjectSP new_value_sp;
@@ -1521,9 +1403,7 @@ SBValue SBTarget::CreateValueFromAddress(const char *name, SBAddress addr,
 
 lldb::SBValue SBTarget::CreateValueFromData(const char *name, lldb::SBData data,
                                             lldb::SBType type) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBTarget, CreateValueFromData,
-                     (const char *, lldb::SBData, lldb::SBType), name, data,
-                     type);
+  LLDB_INSTRUMENT_VA(this, name, data, type);
 
   SBValue sb_value;
   lldb::ValueObjectSP new_value_sp;
@@ -1541,8 +1421,7 @@ lldb::SBValue SBTarget::CreateValueFromData(const char *name, lldb::SBData data,
 
 lldb::SBValue SBTarget::CreateValueFromExpression(const char *name,
                                                   const char *expr) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBTarget, CreateValueFromExpression,
-                     (const char *, const char *), name, expr);
+  LLDB_INSTRUMENT_VA(this, name, expr);
 
   SBValue sb_value;
   lldb::ValueObjectSP new_value_sp;
@@ -1557,7 +1436,7 @@ lldb::SBValue SBTarget::CreateValueFromExpression(const char *name,
 }
 
 bool SBTarget::DeleteAllWatchpoints() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTarget, DeleteAllWatchpoints);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1572,9 +1451,7 @@ bool SBTarget::DeleteAllWatchpoints() {
 
 void SBTarget::AppendImageSearchPath(const char *from, const char *to,
                                      lldb::SBError &error) {
-  LLDB_RECORD_METHOD(void, SBTarget, AppendImageSearchPath,
-                     (const char *, const char *, lldb::SBError &), from, to,
-                     error);
+  LLDB_INSTRUMENT_VA(this, from, to, error);
 
   TargetSP target_sp(GetSP());
   if (!target_sp)
@@ -1591,18 +1468,14 @@ void SBTarget::AppendImageSearchPath(const char *from, const char *to,
 
 lldb::SBModule SBTarget::AddModule(const char *path, const char *triple,
                                    const char *uuid_cstr) {
-  LLDB_RECORD_METHOD(lldb::SBModule, SBTarget, AddModule,
-                     (const char *, const char *, const char *), path, triple,
-                     uuid_cstr);
+  LLDB_INSTRUMENT_VA(this, path, triple, uuid_cstr);
 
   return AddModule(path, triple, uuid_cstr, nullptr);
 }
 
 lldb::SBModule SBTarget::AddModule(const char *path, const char *triple,
                                    const char *uuid_cstr, const char *symfile) {
-  LLDB_RECORD_METHOD(lldb::SBModule, SBTarget, AddModule,
-                     (const char *, const char *, const char *, const char *),
-                     path, triple, uuid_cstr, symfile);
+  LLDB_INSTRUMENT_VA(this, path, triple, uuid_cstr, symfile);
 
   lldb::SBModule sb_module;
   TargetSP target_sp(GetSP());
@@ -1629,8 +1502,7 @@ lldb::SBModule SBTarget::AddModule(const char *path, const char *triple,
 }
 
 lldb::SBModule SBTarget::AddModule(const SBModuleSpec &module_spec) {
-  LLDB_RECORD_METHOD(lldb::SBModule, SBTarget, AddModule,
-                     (const lldb::SBModuleSpec &), module_spec);
+  LLDB_INSTRUMENT_VA(this, module_spec);
 
   lldb::SBModule sb_module;
   TargetSP target_sp(GetSP());
@@ -1641,7 +1513,7 @@ lldb::SBModule SBTarget::AddModule(const SBModuleSpec &module_spec) {
 }
 
 bool SBTarget::AddModule(lldb::SBModule &module) {
-  LLDB_RECORD_METHOD(bool, SBTarget, AddModule, (lldb::SBModule &), module);
+  LLDB_INSTRUMENT_VA(this, module);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1652,7 +1524,7 @@ bool SBTarget::AddModule(lldb::SBModule &module) {
 }
 
 uint32_t SBTarget::GetNumModules() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBTarget, GetNumModules);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t num = 0;
   TargetSP target_sp(GetSP());
@@ -1665,14 +1537,13 @@ uint32_t SBTarget::GetNumModules() const {
 }
 
 void SBTarget::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBTarget, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_sp.reset();
 }
 
 SBModule SBTarget::FindModule(const SBFileSpec &sb_file_spec) {
-  LLDB_RECORD_METHOD(lldb::SBModule, SBTarget, FindModule,
-                     (const lldb::SBFileSpec &), sb_file_spec);
+  LLDB_INSTRUMENT_VA(this, sb_file_spec);
 
   SBModule sb_module;
   TargetSP target_sp(GetSP());
@@ -1685,8 +1556,7 @@ SBModule SBTarget::FindModule(const SBFileSpec &sb_file_spec) {
 }
 
 SBSymbolContextList SBTarget::FindCompileUnits(const SBFileSpec &sb_file_spec) {
-  LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBTarget, FindCompileUnits,
-                     (const lldb::SBFileSpec &), sb_file_spec);
+  LLDB_INSTRUMENT_VA(this, sb_file_spec);
 
   SBSymbolContextList sb_sc_list;
   const TargetSP target_sp(GetSP());
@@ -1696,7 +1566,7 @@ SBSymbolContextList SBTarget::FindCompileUnits(const SBFileSpec &sb_file_spec) {
 }
 
 lldb::ByteOrder SBTarget::GetByteOrder() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::ByteOrder, SBTarget, GetByteOrder);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp)
@@ -1705,7 +1575,7 @@ lldb::ByteOrder SBTarget::GetByteOrder() {
 }
 
 const char *SBTarget::GetTriple() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTarget, GetTriple);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1720,7 +1590,7 @@ const char *SBTarget::GetTriple() {
 }
 
 uint32_t SBTarget::GetDataByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTarget, GetDataByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1730,7 +1600,7 @@ uint32_t SBTarget::GetDataByteSize() {
 }
 
 uint32_t SBTarget::GetCodeByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTarget, GetCodeByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1740,7 +1610,7 @@ uint32_t SBTarget::GetCodeByteSize() {
 }
 
 uint32_t SBTarget::GetMaximumNumberOfChildrenToDisplay() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBTarget, GetMaximumNumberOfChildrenToDisplay);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if(target_sp){
@@ -1750,7 +1620,7 @@ uint32_t SBTarget::GetMaximumNumberOfChildrenToDisplay() const {
 }
 
 uint32_t SBTarget::GetAddressByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTarget, GetAddressByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp)
@@ -1759,8 +1629,7 @@ uint32_t SBTarget::GetAddressByteSize() {
 }
 
 SBModule SBTarget::GetModuleAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBModule, SBTarget, GetModuleAtIndex, (uint32_t),
-                     idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBModule sb_module;
   ModuleSP module_sp;
@@ -1775,7 +1644,7 @@ SBModule SBTarget::GetModuleAtIndex(uint32_t idx) {
 }
 
 bool SBTarget::RemoveModule(lldb::SBModule module) {
-  LLDB_RECORD_METHOD(bool, SBTarget, RemoveModule, (lldb::SBModule), module);
+  LLDB_INSTRUMENT_VA(this, module);
 
   TargetSP target_sp(GetSP());
   if (target_sp)
@@ -1784,9 +1653,7 @@ bool SBTarget::RemoveModule(lldb::SBModule module) {
 }
 
 SBBroadcaster SBTarget::GetBroadcaster() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBBroadcaster, SBTarget,
-                                   GetBroadcaster);
-
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   SBBroadcaster broadcaster(target_sp.get(), false);
@@ -1796,9 +1663,7 @@ SBBroadcaster SBTarget::GetBroadcaster() const {
 
 bool SBTarget::GetDescription(SBStream &description,
                               lldb::DescriptionLevel description_level) {
-  LLDB_RECORD_METHOD(bool, SBTarget, GetDescription,
-                     (lldb::SBStream &, lldb::DescriptionLevel), description,
-                     description_level);
+  LLDB_INSTRUMENT_VA(this, description, description_level);
 
   Stream &strm = description.ref();
 
@@ -1813,8 +1678,7 @@ bool SBTarget::GetDescription(SBStream &description,
 
 lldb::SBSymbolContextList SBTarget::FindFunctions(const char *name,
                                                   uint32_t name_type_mask) {
-  LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBTarget, FindFunctions,
-                     (const char *, uint32_t), name, name_type_mask);
+  LLDB_INSTRUMENT_VA(this, name, name_type_mask);
 
   lldb::SBSymbolContextList sb_sc_list;
   if (!name || !name[0])
@@ -1837,9 +1701,7 @@ lldb::SBSymbolContextList SBTarget::FindFunctions(const char *name,
 lldb::SBSymbolContextList SBTarget::FindGlobalFunctions(const char *name,
                                                         uint32_t max_matches,
                                                         MatchType matchtype) {
-  LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBTarget, FindGlobalFunctions,
-                     (const char *, uint32_t, lldb::MatchType), name,
-                     max_matches, matchtype);
+  LLDB_INSTRUMENT_VA(this, name, max_matches, matchtype);
 
   lldb::SBSymbolContextList sb_sc_list;
   if (name && name[0]) {
@@ -1873,8 +1735,7 @@ lldb::SBSymbolContextList SBTarget::FindGlobalFunctions(const char *name,
 }
 
 lldb::SBType SBTarget::FindFirstType(const char *typename_cstr) {
-  LLDB_RECORD_METHOD(lldb::SBType, SBTarget, FindFirstType, (const char *),
-                     typename_cstr);
+  LLDB_INSTRUMENT_VA(this, typename_cstr);
 
   TargetSP target_sp(GetSP());
   if (typename_cstr && typename_cstr[0] && target_sp) {
@@ -1915,8 +1776,7 @@ lldb::SBType SBTarget::FindFirstType(const char *typename_cstr) {
 }
 
 SBType SBTarget::GetBasicType(lldb::BasicType type) {
-  LLDB_RECORD_METHOD(lldb::SBType, SBTarget, GetBasicType, (lldb::BasicType),
-                     type);
+  LLDB_INSTRUMENT_VA(this, type);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -1928,8 +1788,7 @@ SBType SBTarget::GetBasicType(lldb::BasicType type) {
 }
 
 lldb::SBTypeList SBTarget::FindTypes(const char *typename_cstr) {
-  LLDB_RECORD_METHOD(lldb::SBTypeList, SBTarget, FindTypes, (const char *),
-                     typename_cstr);
+  LLDB_INSTRUMENT_VA(this, typename_cstr);
 
   SBTypeList sb_type_list;
   TargetSP target_sp(GetSP());
@@ -1973,8 +1832,7 @@ lldb::SBTypeList SBTarget::FindTypes(const char *typename_cstr) {
 
 SBValueList SBTarget::FindGlobalVariables(const char *name,
                                           uint32_t max_matches) {
-  LLDB_RECORD_METHOD(lldb::SBValueList, SBTarget, FindGlobalVariables,
-                     (const char *, uint32_t), name, max_matches);
+  LLDB_INSTRUMENT_VA(this, name, max_matches);
 
   SBValueList sb_value_list;
 
@@ -2002,9 +1860,7 @@ SBValueList SBTarget::FindGlobalVariables(const char *name,
 SBValueList SBTarget::FindGlobalVariables(const char *name,
                                           uint32_t max_matches,
                                           MatchType matchtype) {
-  LLDB_RECORD_METHOD(lldb::SBValueList, SBTarget, FindGlobalVariables,
-                     (const char *, uint32_t, lldb::MatchType), name,
-                     max_matches, matchtype);
+  LLDB_INSTRUMENT_VA(this, name, max_matches, matchtype);
 
   SBValueList sb_value_list;
 
@@ -2046,8 +1902,7 @@ SBValueList SBTarget::FindGlobalVariables(const char *name,
 }
 
 lldb::SBValue SBTarget::FindFirstGlobalVariable(const char *name) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBTarget, FindFirstGlobalVariable,
-                     (const char *), name);
+  LLDB_INSTRUMENT_VA(this, name);
 
   SBValueList sb_value_list(FindGlobalVariables(name, 1));
   if (sb_value_list.IsValid() && sb_value_list.GetSize() > 0)
@@ -2056,7 +1911,7 @@ lldb::SBValue SBTarget::FindFirstGlobalVariable(const char *name) {
 }
 
 SBSourceManager SBTarget::GetSourceManager() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBSourceManager, SBTarget, GetSourceManager);
+  LLDB_INSTRUMENT_VA(this);
 
   SBSourceManager source_manager(*this);
   return source_manager;
@@ -2064,8 +1919,7 @@ SBSourceManager SBTarget::GetSourceManager() {
 
 lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr,
                                                    uint32_t count) {
-  LLDB_RECORD_METHOD(lldb::SBInstructionList, SBTarget, ReadInstructions,
-                     (lldb::SBAddress, uint32_t), base_addr, count);
+  LLDB_INSTRUMENT_VA(this, base_addr, count);
 
   return ReadInstructions(base_addr, count, nullptr);
 }
@@ -2073,9 +1927,7 @@ lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr,
 lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr,
                                                    uint32_t count,
                                                    const char *flavor_string) {
-  LLDB_RECORD_METHOD(lldb::SBInstructionList, SBTarget, ReadInstructions,
-                     (lldb::SBAddress, uint32_t, const char *), base_addr,
-                     count, flavor_string);
+  LLDB_INSTRUMENT_VA(this, base_addr, count, flavor_string);
 
   SBInstructionList sb_instructions;
 
@@ -2105,9 +1957,7 @@ lldb::SBInstructionList SBTarget::ReadInstructions(lldb::SBAddress base_addr,
 lldb::SBInstructionList SBTarget::GetInstructions(lldb::SBAddress base_addr,
                                                   const void *buf,
                                                   size_t size) {
-  LLDB_RECORD_METHOD(lldb::SBInstructionList, SBTarget, GetInstructions,
-                     (lldb::SBAddress, const void *, size_t), base_addr, buf,
-                     size);
+  LLDB_INSTRUMENT_VA(this, base_addr, buf, size);
 
   return GetInstructionsWithFlavor(base_addr, nullptr, buf, size);
 }
@@ -2116,10 +1966,7 @@ lldb::SBInstructionList
 SBTarget::GetInstructionsWithFlavor(lldb::SBAddress base_addr,
                                     const char *flavor_string, const void *buf,
                                     size_t size) {
-  LLDB_RECORD_METHOD(lldb::SBInstructionList, SBTarget,
-                     GetInstructionsWithFlavor,
-                     (lldb::SBAddress, const char *, const void *, size_t),
-                     base_addr, flavor_string, buf, size);
+  LLDB_INSTRUMENT_VA(this, base_addr, flavor_string, buf, size);
 
   SBInstructionList sb_instructions;
 
@@ -2143,9 +1990,7 @@ SBTarget::GetInstructionsWithFlavor(lldb::SBAddress base_addr,
 lldb::SBInstructionList SBTarget::GetInstructions(lldb::addr_t base_addr,
                                                   const void *buf,
                                                   size_t size) {
-  LLDB_RECORD_METHOD(lldb::SBInstructionList, SBTarget, GetInstructions,
-                     (lldb::addr_t, const void *, size_t), base_addr, buf,
-                     size);
+  LLDB_INSTRUMENT_VA(this, base_addr, buf, size);
 
   return GetInstructionsWithFlavor(ResolveLoadAddress(base_addr), nullptr, buf,
                                    size);
@@ -2155,10 +2000,7 @@ lldb::SBInstructionList
 SBTarget::GetInstructionsWithFlavor(lldb::addr_t base_addr,
                                     const char *flavor_string, const void *buf,
                                     size_t size) {
-  LLDB_RECORD_METHOD(lldb::SBInstructionList, SBTarget,
-                     GetInstructionsWithFlavor,
-                     (lldb::addr_t, const char *, const void *, size_t),
-                     base_addr, flavor_string, buf, size);
+  LLDB_INSTRUMENT_VA(this, base_addr, flavor_string, buf, size);
 
   return GetInstructionsWithFlavor(ResolveLoadAddress(base_addr), flavor_string,
                                    buf, size);
@@ -2166,9 +2008,7 @@ SBTarget::GetInstructionsWithFlavor(lldb::addr_t base_addr,
 
 SBError SBTarget::SetSectionLoadAddress(lldb::SBSection section,
                                         lldb::addr_t section_base_addr) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBTarget, SetSectionLoadAddress,
-                     (lldb::SBSection, lldb::addr_t), section,
-                     section_base_addr);
+  LLDB_INSTRUMENT_VA(this, section, section_base_addr);
 
   SBError sb_error;
   TargetSP target_sp(GetSP());
@@ -2204,8 +2044,7 @@ SBError SBTarget::SetSectionLoadAddress(lldb::SBSection section,
 }
 
 SBError SBTarget::ClearSectionLoadAddress(lldb::SBSection section) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBTarget, ClearSectionLoadAddress,
-                     (lldb::SBSection), section);
+  LLDB_INSTRUMENT_VA(this, section);
 
   SBError sb_error;
 
@@ -2240,8 +2079,7 @@ SBError SBTarget::ClearSectionLoadAddress(lldb::SBSection section) {
 
 SBError SBTarget::SetModuleLoadAddress(lldb::SBModule module,
                                        int64_t slide_offset) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBTarget, SetModuleLoadAddress,
-                     (lldb::SBModule, int64_t), module, slide_offset);
+  LLDB_INSTRUMENT_VA(this, module, slide_offset);
 
   SBError sb_error;
 
@@ -2274,8 +2112,7 @@ SBError SBTarget::SetModuleLoadAddress(lldb::SBModule module,
 }
 
 SBError SBTarget::ClearModuleLoadAddress(lldb::SBModule module) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBTarget, ClearModuleLoadAddress,
-                     (lldb::SBModule), module);
+  LLDB_INSTRUMENT_VA(this, module);
 
   SBError sb_error;
 
@@ -2327,8 +2164,7 @@ SBError SBTarget::ClearModuleLoadAddress(lldb::SBModule module) {
 
 lldb::SBSymbolContextList SBTarget::FindSymbols(const char *name,
                                                 lldb::SymbolType symbol_type) {
-  LLDB_RECORD_METHOD(lldb::SBSymbolContextList, SBTarget, FindSymbols,
-                     (const char *, lldb::SymbolType), name, symbol_type);
+  LLDB_INSTRUMENT_VA(this, name, symbol_type);
 
   SBSymbolContextList sb_sc_list;
   if (name && name[0]) {
@@ -2341,8 +2177,7 @@ lldb::SBSymbolContextList SBTarget::FindSymbols(const char *name,
 }
 
 lldb::SBValue SBTarget::EvaluateExpression(const char *expr) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBTarget, EvaluateExpression,
-                     (const char *), expr);
+  LLDB_INSTRUMENT_VA(this, expr);
 
   TargetSP target_sp(GetSP());
   if (!target_sp)
@@ -2358,9 +2193,7 @@ lldb::SBValue SBTarget::EvaluateExpression(const char *expr) {
 
 lldb::SBValue SBTarget::EvaluateExpression(const char *expr,
                                            const SBExpressionOptions &options) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBTarget, EvaluateExpression,
-                     (const char *, const lldb::SBExpressionOptions &), expr,
-                     options);
+  LLDB_INSTRUMENT_VA(this, expr, options);
 
   Log *expr_log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_EXPRESSIONS));
   SBValue expr_result;
@@ -2393,7 +2226,7 @@ lldb::SBValue SBTarget::EvaluateExpression(const char *expr,
 }
 
 lldb::addr_t SBTarget::GetStackRedZoneSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBTarget, GetStackRedZoneSize);
+  LLDB_INSTRUMENT_VA(this);
 
   TargetSP target_sp(GetSP());
   if (target_sp) {
@@ -2410,8 +2243,7 @@ lldb::addr_t SBTarget::GetStackRedZoneSize() {
 }
 
 bool SBTarget::IsLoaded(const SBModule &module) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBTarget, IsLoaded, (const lldb::SBModule &),
-                           module);
+  LLDB_INSTRUMENT_VA(this, module);
 
   TargetSP target_sp(GetSP());
   if (!target_sp)
@@ -2425,7 +2257,7 @@ bool SBTarget::IsLoaded(const SBModule &module) const {
 }
 
 lldb::SBLaunchInfo SBTarget::GetLaunchInfo() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBLaunchInfo, SBTarget, GetLaunchInfo);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBLaunchInfo launch_info(nullptr);
   TargetSP target_sp(GetSP());
@@ -2435,8 +2267,7 @@ lldb::SBLaunchInfo SBTarget::GetLaunchInfo() const {
 }
 
 void SBTarget::SetLaunchInfo(const lldb::SBLaunchInfo &launch_info) {
-  LLDB_RECORD_METHOD(void, SBTarget, SetLaunchInfo,
-                     (const lldb::SBLaunchInfo &), launch_info);
+  LLDB_INSTRUMENT_VA(this, launch_info);
 
   TargetSP target_sp(GetSP());
   if (target_sp)
@@ -2444,7 +2275,7 @@ void SBTarget::SetLaunchInfo(const lldb::SBLaunchInfo &launch_info) {
 }
 
 SBEnvironment SBTarget::GetEnvironment() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBEnvironment, SBTarget, GetEnvironment);
+  LLDB_INSTRUMENT_VA(this);
   TargetSP target_sp(GetSP());
 
   if (target_sp) {
@@ -2455,7 +2286,7 @@ SBEnvironment SBTarget::GetEnvironment() {
 }
 
 lldb::SBTrace SBTarget::GetTrace() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTrace, SBTarget, GetTrace);
+  LLDB_INSTRUMENT_VA(this);
   TargetSP target_sp(GetSP());
 
   if (target_sp)
@@ -2465,8 +2296,7 @@ lldb::SBTrace SBTarget::GetTrace() {
 }
 
 lldb::SBTrace SBTarget::CreateTrace(lldb::SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBTrace, SBTarget, CreateTrace, (lldb::SBError &),
-                     error);
+  LLDB_INSTRUMENT_VA(this, error);
   TargetSP target_sp(GetSP());
   error.Clear();
 
diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp
index f10477c553705..46a6c2759140b 100644
--- a/lldb/source/API/SBThread.cpp
+++ b/lldb/source/API/SBThread.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBThread.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBAddress.h"
 #include "lldb/API/SBDebugger.h"
@@ -40,6 +39,7 @@
 #include "lldb/Target/ThreadPlanStepInstruction.h"
 #include "lldb/Target/ThreadPlanStepOut.h"
 #include "lldb/Target/ThreadPlanStepRange.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/State.h"
 #include "lldb/Utility/Stream.h"
 #include "lldb/Utility/StructuredData.h"
@@ -51,24 +51,23 @@ using namespace lldb;
 using namespace lldb_private;
 
 const char *SBThread::GetBroadcasterClassName() {
-  LLDB_RECORD_STATIC_METHOD_NO_ARGS(const char *, SBThread,
-                                    GetBroadcasterClassName);
+  LLDB_INSTRUMENT();
 
   return Thread::GetStaticBroadcasterClass().AsCString();
 }
 
 // Constructors
 SBThread::SBThread() : m_opaque_sp(new ExecutionContextRef()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBThread);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBThread::SBThread(const ThreadSP &lldb_object_sp)
     : m_opaque_sp(new ExecutionContextRef(lldb_object_sp)) {
-  LLDB_RECORD_CONSTRUCTOR(SBThread, (const lldb::ThreadSP &), lldb_object_sp);
+  LLDB_INSTRUMENT_VA(this, lldb_object_sp);
 }
 
 SBThread::SBThread(const SBThread &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBThread, (const lldb::SBThread &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_sp = clone(rhs.m_opaque_sp);
 }
@@ -76,8 +75,7 @@ SBThread::SBThread(const SBThread &rhs) {
 // Assignment operator
 
 const lldb::SBThread &SBThread::operator=(const SBThread &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBThread &,
-                     SBThread, operator=,(const lldb::SBThread &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_sp = clone(rhs.m_opaque_sp);
@@ -88,7 +86,7 @@ const lldb::SBThread &SBThread::operator=(const SBThread &rhs) {
 SBThread::~SBThread() = default;
 
 lldb::SBQueue SBThread::GetQueue() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBQueue, SBThread, GetQueue);
+  LLDB_INSTRUMENT_VA(this);
 
   SBQueue sb_queue;
   QueueSP queue_sp;
@@ -109,11 +107,11 @@ lldb::SBQueue SBThread::GetQueue() const {
 }
 
 bool SBThread::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThread, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBThread::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThread, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -130,13 +128,13 @@ SBThread::operator bool() const {
 }
 
 void SBThread::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBThread, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_sp->Clear();
 }
 
 StopReason SBThread::GetStopReason() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::StopReason, SBThread, GetStopReason);
+  LLDB_INSTRUMENT_VA(this);
 
   StopReason reason = eStopReasonInvalid;
   std::unique_lock<std::recursive_mutex> lock;
@@ -153,7 +151,7 @@ StopReason SBThread::GetStopReason() {
 }
 
 size_t SBThread::GetStopReasonDataCount() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBThread, GetStopReasonDataCount);
+  LLDB_INSTRUMENT_VA(this);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -210,8 +208,7 @@ size_t SBThread::GetStopReasonDataCount() {
 }
 
 uint64_t SBThread::GetStopReasonDataAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(uint64_t, SBThread, GetStopReasonDataAtIndex, (uint32_t),
-                     idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -280,8 +277,7 @@ uint64_t SBThread::GetStopReasonDataAtIndex(uint32_t idx) {
 }
 
 bool SBThread::GetStopReasonExtendedInfoAsJSON(lldb::SBStream &stream) {
-  LLDB_RECORD_METHOD(bool, SBThread, GetStopReasonExtendedInfoAsJSON,
-                     (lldb::SBStream &), stream);
+  LLDB_INSTRUMENT_VA(this, stream);
 
   Stream &strm = stream.ref();
 
@@ -303,9 +299,7 @@ bool SBThread::GetStopReasonExtendedInfoAsJSON(lldb::SBStream &stream) {
 
 SBThreadCollection
 SBThread::GetStopReasonExtendedBacktraces(InstrumentationRuntimeType type) {
-  LLDB_RECORD_METHOD(lldb::SBThreadCollection, SBThread,
-                     GetStopReasonExtendedBacktraces,
-                     (lldb::InstrumentationRuntimeType), type);
+  LLDB_INSTRUMENT_VA(this, type);
 
   SBThreadCollection threads;
 
@@ -328,8 +322,7 @@ SBThread::GetStopReasonExtendedBacktraces(InstrumentationRuntimeType type) {
 }
 
 size_t SBThread::GetStopDescription(char *dst, size_t dst_len) {
-  LLDB_RECORD_METHOD(size_t, SBThread, GetStopDescription, (char *, size_t),
-                     dst, "", dst_len);
+  LLDB_INSTRUMENT_VA(this, dst, dst_len);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -357,7 +350,7 @@ size_t SBThread::GetStopDescription(char *dst, size_t dst_len) {
 }
 
 SBValue SBThread::GetStopReturnValue() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBThread, GetStopReturnValue);
+  LLDB_INSTRUMENT_VA(this);
 
   ValueObjectSP return_valobj_sp;
   std::unique_lock<std::recursive_mutex> lock;
@@ -381,7 +374,7 @@ void SBThread::SetThread(const ThreadSP &lldb_object_sp) {
 }
 
 lldb::tid_t SBThread::GetThreadID() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::tid_t, SBThread, GetThreadID);
+  LLDB_INSTRUMENT_VA(this);
 
   ThreadSP thread_sp(m_opaque_sp->GetThreadSP());
   if (thread_sp)
@@ -390,7 +383,7 @@ lldb::tid_t SBThread::GetThreadID() const {
 }
 
 uint32_t SBThread::GetIndexID() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBThread, GetIndexID);
+  LLDB_INSTRUMENT_VA(this);
 
   ThreadSP thread_sp(m_opaque_sp->GetThreadSP());
   if (thread_sp)
@@ -399,7 +392,7 @@ uint32_t SBThread::GetIndexID() const {
 }
 
 const char *SBThread::GetName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBThread, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *name = nullptr;
   std::unique_lock<std::recursive_mutex> lock;
@@ -416,7 +409,7 @@ const char *SBThread::GetName() const {
 }
 
 const char *SBThread::GetQueueName() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(const char *, SBThread, GetQueueName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *name = nullptr;
   std::unique_lock<std::recursive_mutex> lock;
@@ -433,7 +426,7 @@ const char *SBThread::GetQueueName() const {
 }
 
 lldb::queue_id_t SBThread::GetQueueID() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::queue_id_t, SBThread, GetQueueID);
+  LLDB_INSTRUMENT_VA(this);
 
   queue_id_t id = LLDB_INVALID_QUEUE_ID;
   std::unique_lock<std::recursive_mutex> lock;
@@ -450,8 +443,7 @@ lldb::queue_id_t SBThread::GetQueueID() const {
 }
 
 bool SBThread::GetInfoItemByPathAsString(const char *path, SBStream &strm) {
-  LLDB_RECORD_METHOD(bool, SBThread, GetInfoItemByPathAsString,
-                     (const char *, lldb::SBStream &), path, strm);
+  LLDB_INSTRUMENT_VA(this, path, strm);
 
   bool success = false;
   std::unique_lock<std::recursive_mutex> lock;
@@ -532,16 +524,14 @@ SBError SBThread::ResumeNewPlan(ExecutionContext &exe_ctx,
 }
 
 void SBThread::StepOver(lldb::RunMode stop_other_threads) {
-  LLDB_RECORD_METHOD(void, SBThread, StepOver, (lldb::RunMode),
-                     stop_other_threads);
+  LLDB_INSTRUMENT_VA(this, stop_other_threads);
 
   SBError error; // Ignored
   StepOver(stop_other_threads, error);
 }
 
 void SBThread::StepOver(lldb::RunMode stop_other_threads, SBError &error) {
-  LLDB_RECORD_METHOD(void, SBThread, StepOver, (lldb::RunMode, lldb::SBError &),
-                     stop_other_threads, error);
+  LLDB_INSTRUMENT_VA(this, stop_other_threads, error);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -573,16 +563,14 @@ void SBThread::StepOver(lldb::RunMode stop_other_threads, SBError &error) {
 }
 
 void SBThread::StepInto(lldb::RunMode stop_other_threads) {
-  LLDB_RECORD_METHOD(void, SBThread, StepInto, (lldb::RunMode),
-                     stop_other_threads);
+  LLDB_INSTRUMENT_VA(this, stop_other_threads);
 
   StepInto(nullptr, stop_other_threads);
 }
 
 void SBThread::StepInto(const char *target_name,
                         lldb::RunMode stop_other_threads) {
-  LLDB_RECORD_METHOD(void, SBThread, StepInto, (const char *, lldb::RunMode),
-                     target_name, stop_other_threads);
+  LLDB_INSTRUMENT_VA(this, target_name, stop_other_threads);
 
   SBError error; // Ignored
   StepInto(target_name, LLDB_INVALID_LINE_NUMBER, error, stop_other_threads);
@@ -590,10 +578,7 @@ void SBThread::StepInto(const char *target_name,
 
 void SBThread::StepInto(const char *target_name, uint32_t end_line,
                         SBError &error, lldb::RunMode stop_other_threads) {
-  LLDB_RECORD_METHOD(void, SBThread, StepInto,
-                     (const char *, uint32_t, lldb::SBError &, lldb::RunMode),
-                     target_name, end_line, error, stop_other_threads);
-
+  LLDB_INSTRUMENT_VA(this, target_name, end_line, error, stop_other_threads);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -640,14 +625,14 @@ void SBThread::StepInto(const char *target_name, uint32_t end_line,
 }
 
 void SBThread::StepOut() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBThread, StepOut);
+  LLDB_INSTRUMENT_VA(this);
 
   SBError error; // Ignored
   StepOut(error);
 }
 
 void SBThread::StepOut(SBError &error) {
-  LLDB_RECORD_METHOD(void, SBThread, StepOut, (lldb::SBError &), error);
+  LLDB_INSTRUMENT_VA(this, error);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -675,17 +660,14 @@ void SBThread::StepOut(SBError &error) {
 }
 
 void SBThread::StepOutOfFrame(SBFrame &sb_frame) {
-  LLDB_RECORD_METHOD(void, SBThread, StepOutOfFrame, (lldb::SBFrame &),
-                     sb_frame);
+  LLDB_INSTRUMENT_VA(this, sb_frame);
 
   SBError error; // Ignored
   StepOutOfFrame(sb_frame, error);
 }
 
 void SBThread::StepOutOfFrame(SBFrame &sb_frame, SBError &error) {
-  LLDB_RECORD_METHOD(void, SBThread, StepOutOfFrame,
-                     (lldb::SBFrame &, lldb::SBError &), sb_frame, error);
-
+  LLDB_INSTRUMENT_VA(this, sb_frame, error);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -722,15 +704,14 @@ void SBThread::StepOutOfFrame(SBFrame &sb_frame, SBError &error) {
 }
 
 void SBThread::StepInstruction(bool step_over) {
-  LLDB_RECORD_METHOD(void, SBThread, StepInstruction, (bool), step_over);
+  LLDB_INSTRUMENT_VA(this, step_over);
 
   SBError error; // Ignored
   StepInstruction(step_over, error);
 }
 
 void SBThread::StepInstruction(bool step_over, SBError &error) {
-  LLDB_RECORD_METHOD(void, SBThread, StepInstruction, (bool, lldb::SBError &),
-                     step_over, error);
+  LLDB_INSTRUMENT_VA(this, step_over, error);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -752,15 +733,14 @@ void SBThread::StepInstruction(bool step_over, SBError &error) {
 }
 
 void SBThread::RunToAddress(lldb::addr_t addr) {
-  LLDB_RECORD_METHOD(void, SBThread, RunToAddress, (lldb::addr_t), addr);
+  LLDB_INSTRUMENT_VA(this, addr);
 
   SBError error; // Ignored
   RunToAddress(addr, error);
 }
 
 void SBThread::RunToAddress(lldb::addr_t addr, SBError &error) {
-  LLDB_RECORD_METHOD(void, SBThread, RunToAddress,
-                     (lldb::addr_t, lldb::SBError &), addr, error);
+  LLDB_INSTRUMENT_VA(this, addr, error);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -789,9 +769,7 @@ void SBThread::RunToAddress(lldb::addr_t addr, SBError &error) {
 
 SBError SBThread::StepOverUntil(lldb::SBFrame &sb_frame,
                                 lldb::SBFileSpec &sb_file_spec, uint32_t line) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBThread, StepOverUntil,
-                     (lldb::SBFrame &, lldb::SBFileSpec &, uint32_t), sb_frame,
-                     sb_file_spec, line);
+  LLDB_INSTRUMENT_VA(this, sb_frame, sb_file_spec, line);
 
   SBError sb_error;
   char path[PATH_MAX];
@@ -908,17 +886,14 @@ SBError SBThread::StepOverUntil(lldb::SBFrame &sb_frame,
 }
 
 SBError SBThread::StepUsingScriptedThreadPlan(const char *script_class_name) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBThread, StepUsingScriptedThreadPlan,
-                     (const char *), script_class_name);
+  LLDB_INSTRUMENT_VA(this, script_class_name);
 
   return StepUsingScriptedThreadPlan(script_class_name, true);
 }
 
 SBError SBThread::StepUsingScriptedThreadPlan(const char *script_class_name,
                                             bool resume_immediately) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBThread, StepUsingScriptedThreadPlan,
-                     (const char *, bool), script_class_name,
-                     resume_immediately);
+  LLDB_INSTRUMENT_VA(this, script_class_name, resume_immediately);
 
   lldb::SBStructuredData no_data;
   return StepUsingScriptedThreadPlan(script_class_name, no_data,
@@ -928,9 +903,7 @@ SBError SBThread::StepUsingScriptedThreadPlan(const char *script_class_name,
 SBError SBThread::StepUsingScriptedThreadPlan(const char *script_class_name,
                                               SBStructuredData &args_data,
                                               bool resume_immediately) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBThread, StepUsingScriptedThreadPlan,
-                     (const char *, lldb::SBStructuredData &, bool),
-                     script_class_name, args_data, resume_immediately);
+  LLDB_INSTRUMENT_VA(this, script_class_name, args_data, resume_immediately);
 
   SBError error;
 
@@ -966,8 +939,7 @@ SBError SBThread::StepUsingScriptedThreadPlan(const char *script_class_name,
 }
 
 SBError SBThread::JumpToLine(lldb::SBFileSpec &file_spec, uint32_t line) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBThread, JumpToLine,
-                     (lldb::SBFileSpec &, uint32_t), file_spec, line);
+  LLDB_INSTRUMENT_VA(this, file_spec, line);
 
   SBError sb_error;
 
@@ -987,8 +959,7 @@ SBError SBThread::JumpToLine(lldb::SBFileSpec &file_spec, uint32_t line) {
 }
 
 SBError SBThread::ReturnFromFrame(SBFrame &frame, SBValue &return_value) {
-  LLDB_RECORD_METHOD(lldb::SBError, SBThread, ReturnFromFrame,
-                     (lldb::SBFrame &, lldb::SBValue &), frame, return_value);
+  LLDB_INSTRUMENT_VA(this, frame, return_value);
 
   SBError sb_error;
 
@@ -1005,8 +976,7 @@ SBError SBThread::ReturnFromFrame(SBFrame &frame, SBValue &return_value) {
 }
 
 SBError SBThread::UnwindInnermostExpression() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBThread,
-                             UnwindInnermostExpression);
+  LLDB_INSTRUMENT_VA(this);
 
   SBError sb_error;
 
@@ -1024,14 +994,14 @@ SBError SBThread::UnwindInnermostExpression() {
 }
 
 bool SBThread::Suspend() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBThread, Suspend);
+  LLDB_INSTRUMENT_VA(this);
 
   SBError error; // Ignored
   return Suspend(error);
 }
 
 bool SBThread::Suspend(SBError &error) {
-  LLDB_RECORD_METHOD(bool, SBThread, Suspend, (lldb::SBError &), error);
+  LLDB_INSTRUMENT_VA(this, error);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -1051,14 +1021,14 @@ bool SBThread::Suspend(SBError &error) {
 }
 
 bool SBThread::Resume() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBThread, Resume);
+  LLDB_INSTRUMENT_VA(this);
 
   SBError error; // Ignored
   return Resume(error);
 }
 
 bool SBThread::Resume(SBError &error) {
-  LLDB_RECORD_METHOD(bool, SBThread, Resume, (lldb::SBError &), error);
+  LLDB_INSTRUMENT_VA(this, error);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -1079,7 +1049,7 @@ bool SBThread::Resume(SBError &error) {
 }
 
 bool SBThread::IsSuspended() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBThread, IsSuspended);
+  LLDB_INSTRUMENT_VA(this);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -1090,7 +1060,7 @@ bool SBThread::IsSuspended() {
 }
 
 bool SBThread::IsStopped() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBThread, IsStopped);
+  LLDB_INSTRUMENT_VA(this);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -1101,7 +1071,7 @@ bool SBThread::IsStopped() {
 }
 
 SBProcess SBThread::GetProcess() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBProcess, SBThread, GetProcess);
+  LLDB_INSTRUMENT_VA(this);
 
   SBProcess sb_process;
   std::unique_lock<std::recursive_mutex> lock;
@@ -1117,7 +1087,7 @@ SBProcess SBThread::GetProcess() {
 }
 
 uint32_t SBThread::GetNumFrames() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBThread, GetNumFrames);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t num_frames = 0;
   std::unique_lock<std::recursive_mutex> lock;
@@ -1134,7 +1104,7 @@ uint32_t SBThread::GetNumFrames() {
 }
 
 SBFrame SBThread::GetFrameAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBFrame, SBThread, GetFrameAtIndex, (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBFrame sb_frame;
   StackFrameSP frame_sp;
@@ -1153,7 +1123,7 @@ SBFrame SBThread::GetFrameAtIndex(uint32_t idx) {
 }
 
 lldb::SBFrame SBThread::GetSelectedFrame() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFrame, SBThread, GetSelectedFrame);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFrame sb_frame;
   StackFrameSP frame_sp;
@@ -1172,8 +1142,7 @@ lldb::SBFrame SBThread::GetSelectedFrame() {
 }
 
 lldb::SBFrame SBThread::SetSelectedFrame(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBFrame, SBThread, SetSelectedFrame, (uint32_t),
-                     idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBFrame sb_frame;
   StackFrameSP frame_sp;
@@ -1196,45 +1165,39 @@ lldb::SBFrame SBThread::SetSelectedFrame(uint32_t idx) {
 }
 
 bool SBThread::EventIsThreadEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBThread, EventIsThreadEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   return Thread::ThreadEventData::GetEventDataFromEvent(event.get()) != nullptr;
 }
 
 SBFrame SBThread::GetStackFrameFromEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBFrame, SBThread, GetStackFrameFromEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   return Thread::ThreadEventData::GetStackFrameFromEvent(event.get());
 }
 
 SBThread SBThread::GetThreadFromEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBThread, SBThread, GetThreadFromEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   return Thread::ThreadEventData::GetThreadFromEvent(event.get());
 }
 
 bool SBThread::operator==(const SBThread &rhs) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBThread, operator==,(const lldb::SBThread &),
-                           rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_sp->GetThreadSP().get() ==
          rhs.m_opaque_sp->GetThreadSP().get();
 }
 
 bool SBThread::operator!=(const SBThread &rhs) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBThread, operator!=,(const lldb::SBThread &),
-                           rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return m_opaque_sp->GetThreadSP().get() !=
          rhs.m_opaque_sp->GetThreadSP().get();
 }
 
 bool SBThread::GetStatus(SBStream &status) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBThread, GetStatus, (lldb::SBStream &),
-                           status);
+  LLDB_INSTRUMENT_VA(this, status);
 
   Stream &strm = status.ref();
 
@@ -1250,15 +1213,13 @@ bool SBThread::GetStatus(SBStream &status) const {
 }
 
 bool SBThread::GetDescription(SBStream &description) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBThread, GetDescription, (lldb::SBStream &),
-                           description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   return GetDescription(description, false);
 }
 
 bool SBThread::GetDescription(SBStream &description, bool stop_format) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBThread, GetDescription,
-                           (lldb::SBStream &, bool), description, stop_format);
+  LLDB_INSTRUMENT_VA(this, description, stop_format);
 
   Stream &strm = description.ref();
 
@@ -1278,8 +1239,7 @@ bool SBThread::GetDescription(SBStream &description, bool stop_format) const {
 }
 
 SBThread SBThread::GetExtendedBacktraceThread(const char *type) {
-  LLDB_RECORD_METHOD(lldb::SBThread, SBThread, GetExtendedBacktraceThread,
-                     (const char *), type);
+  LLDB_INSTRUMENT_VA(this, type);
 
   std::unique_lock<std::recursive_mutex> lock;
   ExecutionContext exe_ctx(m_opaque_sp.get(), lock);
@@ -1313,8 +1273,7 @@ SBThread SBThread::GetExtendedBacktraceThread(const char *type) {
 }
 
 uint32_t SBThread::GetExtendedBacktraceOriginatingIndexID() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBThread,
-                             GetExtendedBacktraceOriginatingIndexID);
+  LLDB_INSTRUMENT_VA(this);
 
   ThreadSP thread_sp(m_opaque_sp->GetThreadSP());
   if (thread_sp)
@@ -1323,7 +1282,7 @@ uint32_t SBThread::GetExtendedBacktraceOriginatingIndexID() {
 }
 
 SBValue SBThread::GetCurrentException() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBThread, GetCurrentException);
+  LLDB_INSTRUMENT_VA(this);
 
   ThreadSP thread_sp(m_opaque_sp->GetThreadSP());
   if (!thread_sp)
@@ -1333,8 +1292,7 @@ SBValue SBThread::GetCurrentException() {
 }
 
 SBThread SBThread::GetCurrentExceptionBacktrace() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBThread, SBThread,
-                             GetCurrentExceptionBacktrace);
+  LLDB_INSTRUMENT_VA(this);
 
   ThreadSP thread_sp(m_opaque_sp->GetThreadSP());
   if (!thread_sp)
@@ -1344,7 +1302,7 @@ SBThread SBThread::GetCurrentExceptionBacktrace() {
 }
 
 bool SBThread::SafeToCallFunctions() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBThread, SafeToCallFunctions);
+  LLDB_INSTRUMENT_VA(this);
 
   ThreadSP thread_sp(m_opaque_sp->GetThreadSP());
   if (thread_sp)
diff --git a/lldb/source/API/SBThreadCollection.cpp b/lldb/source/API/SBThreadCollection.cpp
index fcf066381c98e..9d688e0122397 100644
--- a/lldb/source/API/SBThreadCollection.cpp
+++ b/lldb/source/API/SBThreadCollection.cpp
@@ -7,28 +7,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBThreadCollection.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBThread.h"
 #include "lldb/Target/ThreadList.h"
+#include "lldb/Utility/Instrumentation.h"
 
 using namespace lldb;
 using namespace lldb_private;
 
-SBThreadCollection::SBThreadCollection() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBThreadCollection);
-}
+SBThreadCollection::SBThreadCollection() { LLDB_INSTRUMENT_VA(this); }
 
 SBThreadCollection::SBThreadCollection(const SBThreadCollection &rhs)
     : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBThreadCollection,
-                          (const lldb::SBThreadCollection &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 const SBThreadCollection &SBThreadCollection::
 operator=(const SBThreadCollection &rhs) {
-  LLDB_RECORD_METHOD(
-      const lldb::SBThreadCollection &,
-      SBThreadCollection, operator=,(const lldb::SBThreadCollection &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_sp = rhs.m_opaque_sp;
@@ -61,17 +56,17 @@ const lldb::ThreadCollectionSP &SBThreadCollection::operator*() const {
 }
 
 bool SBThreadCollection::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThreadCollection, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBThreadCollection::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThreadCollection, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get() != nullptr;
 }
 
 size_t SBThreadCollection::GetSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBThreadCollection, GetSize);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp)
     return m_opaque_sp->GetSize();
@@ -79,8 +74,7 @@ size_t SBThreadCollection::GetSize() {
 }
 
 SBThread SBThreadCollection::GetThreadAtIndex(size_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBThread, SBThreadCollection, GetThreadAtIndex,
-                     (size_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBThread thread;
   if (m_opaque_sp && idx < m_opaque_sp->GetSize())
diff --git a/lldb/source/API/SBThreadPlan.cpp b/lldb/source/API/SBThreadPlan.cpp
index 26fcca5c2e23f..2e66ac1208392 100644
--- a/lldb/source/API/SBThreadPlan.cpp
+++ b/lldb/source/API/SBThreadPlan.cpp
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBThread.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBFileSpec.h"
 #include "lldb/API/SBStream.h"
@@ -50,22 +50,20 @@ using namespace lldb;
 using namespace lldb_private;
 
 // Constructors
-SBThreadPlan::SBThreadPlan() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBThreadPlan); }
+SBThreadPlan::SBThreadPlan() { LLDB_INSTRUMENT_VA(this); }
 
 SBThreadPlan::SBThreadPlan(const ThreadPlanSP &lldb_object_sp)
     : m_opaque_wp(lldb_object_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBThreadPlan, (const lldb::ThreadPlanSP &),
-                          lldb_object_sp);
+  LLDB_INSTRUMENT_VA(this, lldb_object_sp);
 }
 
 SBThreadPlan::SBThreadPlan(const SBThreadPlan &rhs)
     : m_opaque_wp(rhs.m_opaque_wp) {
-  LLDB_RECORD_CONSTRUCTOR(SBThreadPlan, (const lldb::SBThreadPlan &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBThreadPlan::SBThreadPlan(lldb::SBThread &sb_thread, const char *class_name) {
-  LLDB_RECORD_CONSTRUCTOR(SBThreadPlan, (lldb::SBThread &, const char *),
-                          sb_thread, class_name);
+  LLDB_INSTRUMENT_VA(this, sb_thread, class_name);
 
   Thread *thread = sb_thread.get();
   if (thread)
@@ -75,9 +73,7 @@ SBThreadPlan::SBThreadPlan(lldb::SBThread &sb_thread, const char *class_name) {
 
 SBThreadPlan::SBThreadPlan(lldb::SBThread &sb_thread, const char *class_name,
                            lldb::SBStructuredData &args_data) {
-  LLDB_RECORD_CONSTRUCTOR(SBThreadPlan, (lldb::SBThread &, const char *,
-                                         SBStructuredData &),
-                          sb_thread, class_name, args_data);
+  LLDB_INSTRUMENT_VA(this, sb_thread, class_name, args_data);
 
   Thread *thread = sb_thread.get();
   if (thread)
@@ -88,8 +84,7 @@ SBThreadPlan::SBThreadPlan(lldb::SBThread &sb_thread, const char *class_name,
 // Assignment operator
 
 const lldb::SBThreadPlan &SBThreadPlan::operator=(const SBThreadPlan &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBThreadPlan &,
-                     SBThreadPlan, operator=,(const lldb::SBThreadPlan &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_wp = rhs.m_opaque_wp;
@@ -99,42 +94,41 @@ const lldb::SBThreadPlan &SBThreadPlan::operator=(const SBThreadPlan &rhs) {
 SBThreadPlan::~SBThreadPlan() = default;
 
 bool SBThreadPlan::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThreadPlan, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBThreadPlan::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBThreadPlan, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return static_cast<bool>(GetSP());
 }
 
 void SBThreadPlan::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBThreadPlan, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_wp.reset();
 }
 
 lldb::StopReason SBThreadPlan::GetStopReason() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::StopReason, SBThreadPlan, GetStopReason);
+  LLDB_INSTRUMENT_VA(this);
 
   return eStopReasonNone;
 }
 
 size_t SBThreadPlan::GetStopReasonDataCount() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBThreadPlan, GetStopReasonDataCount);
+  LLDB_INSTRUMENT_VA(this);
 
   return 0;
 }
 
 uint64_t SBThreadPlan::GetStopReasonDataAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(uint64_t, SBThreadPlan, GetStopReasonDataAtIndex,
-                     (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   return 0;
 }
 
 SBThread SBThreadPlan::GetThread() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::SBThread, SBThreadPlan, GetThread);
+  LLDB_INSTRUMENT_VA(this);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp) {
@@ -144,8 +138,7 @@ SBThread SBThreadPlan::GetThread() const {
 }
 
 bool SBThreadPlan::GetDescription(lldb::SBStream &description) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBThreadPlan, GetDescription,
-                           (lldb::SBStream &), description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp) {
@@ -161,7 +154,7 @@ void SBThreadPlan::SetThreadPlan(const ThreadPlanSP &lldb_object_wp) {
 }
 
 void SBThreadPlan::SetPlanComplete(bool success) {
-  LLDB_RECORD_METHOD(void, SBThreadPlan, SetPlanComplete, (bool), success);
+  LLDB_INSTRUMENT_VA(this, success);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp)
@@ -169,7 +162,7 @@ void SBThreadPlan::SetPlanComplete(bool success) {
 }
 
 bool SBThreadPlan::IsPlanComplete() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBThreadPlan, IsPlanComplete);
+  LLDB_INSTRUMENT_VA(this);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp)
@@ -178,7 +171,7 @@ bool SBThreadPlan::IsPlanComplete() {
 }
 
 bool SBThreadPlan::IsPlanStale() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBThreadPlan, IsPlanStale);
+  LLDB_INSTRUMENT_VA(this);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp)
@@ -187,7 +180,7 @@ bool SBThreadPlan::IsPlanStale() {
 }
 
 bool SBThreadPlan::IsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBThreadPlan, IsValid);
+  LLDB_INSTRUMENT_VA(this);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp)
@@ -196,7 +189,7 @@ bool SBThreadPlan::IsValid() {
 }
 
 bool SBThreadPlan::GetStopOthers() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBThreadPlan, GetStopOthers);
+  LLDB_INSTRUMENT_VA(this);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp)
@@ -205,7 +198,7 @@ bool SBThreadPlan::GetStopOthers() {
 }
 
 void SBThreadPlan::SetStopOthers(bool stop_others) {
-  LLDB_RECORD_METHOD(void, SBThreadPlan, SetStopOthers, (bool), stop_others);
+  LLDB_INSTRUMENT_VA(this, stop_others);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp)
@@ -221,9 +214,7 @@ void SBThreadPlan::SetStopOthers(bool stop_others) {
 SBThreadPlan
 SBThreadPlan::QueueThreadPlanForStepOverRange(SBAddress &sb_start_address,
                                               lldb::addr_t size) {
-  LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan,
-                     QueueThreadPlanForStepOverRange,
-                     (lldb::SBAddress &, lldb::addr_t), sb_start_address, size);
+  LLDB_INSTRUMENT_VA(this, sb_start_address, size);
 
   SBError error;
   return QueueThreadPlanForStepOverRange(sb_start_address, size, error);
@@ -231,10 +222,7 @@ SBThreadPlan::QueueThreadPlanForStepOverRange(SBAddress &sb_start_address,
 
 SBThreadPlan SBThreadPlan::QueueThreadPlanForStepOverRange(
     SBAddress &sb_start_address, lldb::addr_t size, SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan,
-                     QueueThreadPlanForStepOverRange,
-                     (lldb::SBAddress &, lldb::addr_t, lldb::SBError &),
-                     sb_start_address, size, error);
+  LLDB_INSTRUMENT_VA(this, sb_start_address, size, error);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp) {
@@ -265,9 +253,7 @@ SBThreadPlan SBThreadPlan::QueueThreadPlanForStepOverRange(
 SBThreadPlan
 SBThreadPlan::QueueThreadPlanForStepInRange(SBAddress &sb_start_address,
                                             lldb::addr_t size) {
-  LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan,
-                     QueueThreadPlanForStepInRange,
-                     (lldb::SBAddress &, lldb::addr_t), sb_start_address, size);
+  LLDB_INSTRUMENT_VA(this, sb_start_address, size);
 
   SBError error;
   return QueueThreadPlanForStepInRange(sb_start_address, size, error);
@@ -276,10 +262,7 @@ SBThreadPlan::QueueThreadPlanForStepInRange(SBAddress &sb_start_address,
 SBThreadPlan
 SBThreadPlan::QueueThreadPlanForStepInRange(SBAddress &sb_start_address,
                                             lldb::addr_t size, SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan,
-                     QueueThreadPlanForStepInRange,
-                     (lldb::SBAddress &, lldb::addr_t, lldb::SBError &),
-                     sb_start_address, size, error);
+  LLDB_INSTRUMENT_VA(this, sb_start_address, size, error);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp) {
@@ -310,9 +293,7 @@ SBThreadPlan::QueueThreadPlanForStepInRange(SBAddress &sb_start_address,
 SBThreadPlan
 SBThreadPlan::QueueThreadPlanForStepOut(uint32_t frame_idx_to_step_to,
                                         bool first_insn) {
-  LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan,
-                     QueueThreadPlanForStepOut, (uint32_t, bool),
-                     frame_idx_to_step_to, first_insn);
+  LLDB_INSTRUMENT_VA(this, frame_idx_to_step_to, first_insn);
 
   SBError error;
   return QueueThreadPlanForStepOut(frame_idx_to_step_to, first_insn, error);
@@ -321,10 +302,7 @@ SBThreadPlan::QueueThreadPlanForStepOut(uint32_t frame_idx_to_step_to,
 SBThreadPlan
 SBThreadPlan::QueueThreadPlanForStepOut(uint32_t frame_idx_to_step_to,
                                         bool first_insn, SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan,
-                     QueueThreadPlanForStepOut,
-                     (uint32_t, bool, lldb::SBError &), frame_idx_to_step_to,
-                     first_insn, error);
+  LLDB_INSTRUMENT_VA(this, frame_idx_to_step_to, first_insn, error);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp) {
@@ -350,9 +328,7 @@ SBThreadPlan::QueueThreadPlanForStepOut(uint32_t frame_idx_to_step_to,
 
 SBThreadPlan
 SBThreadPlan::QueueThreadPlanForRunToAddress(SBAddress sb_address) {
-  LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan,
-                     QueueThreadPlanForRunToAddress, (lldb::SBAddress),
-                     sb_address);
+  LLDB_INSTRUMENT_VA(this, sb_address);
 
   SBError error;
   return QueueThreadPlanForRunToAddress(sb_address, error);
@@ -360,9 +336,7 @@ SBThreadPlan::QueueThreadPlanForRunToAddress(SBAddress sb_address) {
 
 SBThreadPlan SBThreadPlan::QueueThreadPlanForRunToAddress(SBAddress sb_address,
                                                           SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan,
-                     QueueThreadPlanForRunToAddress,
-                     (lldb::SBAddress, lldb::SBError &), sb_address, error);
+  LLDB_INSTRUMENT_VA(this, sb_address, error);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp) {
@@ -387,9 +361,7 @@ SBThreadPlan SBThreadPlan::QueueThreadPlanForRunToAddress(SBAddress sb_address,
 
 SBThreadPlan
 SBThreadPlan::QueueThreadPlanForStepScripted(const char *script_class_name) {
-  LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan,
-                     QueueThreadPlanForStepScripted, (const char *),
-                     script_class_name);
+  LLDB_INSTRUMENT_VA(this, script_class_name);
 
   SBError error;
   return QueueThreadPlanForStepScripted(script_class_name, error);
@@ -398,9 +370,7 @@ SBThreadPlan::QueueThreadPlanForStepScripted(const char *script_class_name) {
 SBThreadPlan
 SBThreadPlan::QueueThreadPlanForStepScripted(const char *script_class_name,
                                              SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan,
-                     QueueThreadPlanForStepScripted,
-                     (const char *, lldb::SBError &), script_class_name, error);
+  LLDB_INSTRUMENT_VA(this, script_class_name, error);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp) {
@@ -424,10 +394,7 @@ SBThreadPlan
 SBThreadPlan::QueueThreadPlanForStepScripted(const char *script_class_name,
                                              lldb::SBStructuredData &args_data,
                                              SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBThreadPlan, SBThreadPlan,
-                     QueueThreadPlanForStepScripted,
-                     (const char *, lldb::SBStructuredData &, lldb::SBError &), 
-                     script_class_name, args_data, error);
+  LLDB_INSTRUMENT_VA(this, script_class_name, args_data, error);
 
   ThreadPlanSP thread_plan_sp(GetSP());
   if (thread_plan_sp) {
diff --git a/lldb/source/API/SBTrace.cpp b/lldb/source/API/SBTrace.cpp
index 4e13e774e366c..64a675e2e16cd 100644
--- a/lldb/source/API/SBTrace.cpp
+++ b/lldb/source/API/SBTrace.cpp
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/Target/Process.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBStructuredData.h"
 #include "lldb/API/SBThread.h"
@@ -20,20 +20,19 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBTrace::SBTrace() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTrace); }
+SBTrace::SBTrace() { LLDB_INSTRUMENT_VA(this); }
 
 SBTrace::SBTrace(const lldb::TraceSP &trace_sp) : m_opaque_sp(trace_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBTrace, (const lldb::TraceSP &), trace_sp);
+  LLDB_INSTRUMENT_VA(this, trace_sp);
 }
 
 const char *SBTrace::GetStartConfigurationHelp() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTrace, GetStartConfigurationHelp);
+  LLDB_INSTRUMENT_VA(this);
   return m_opaque_sp ? m_opaque_sp->GetStartConfigurationHelp() : nullptr;
 }
 
 SBError SBTrace::Start(const SBStructuredData &configuration) {
-  LLDB_RECORD_METHOD(SBError, SBTrace, Start, (const SBStructuredData &),
-                     configuration);
+  LLDB_INSTRUMENT_VA(this, configuration);
   SBError error;
   if (!m_opaque_sp)
     error.SetErrorString("error: invalid trace");
@@ -45,9 +44,7 @@ SBError SBTrace::Start(const SBStructuredData &configuration) {
 
 SBError SBTrace::Start(const SBThread &thread,
                        const SBStructuredData &configuration) {
-  LLDB_RECORD_METHOD(SBError, SBTrace, Start,
-                     (const SBThread &, const SBStructuredData &), thread,
-                     configuration);
+  LLDB_INSTRUMENT_VA(this, thread, configuration);
 
   SBError error;
   if (!m_opaque_sp)
@@ -63,7 +60,7 @@ SBError SBTrace::Start(const SBThread &thread,
 }
 
 SBError SBTrace::Stop() {
-  LLDB_RECORD_METHOD_NO_ARGS(SBError, SBTrace, Stop);
+  LLDB_INSTRUMENT_VA(this);
   SBError error;
   if (!m_opaque_sp)
     error.SetErrorString("error: invalid trace");
@@ -73,7 +70,7 @@ SBError SBTrace::Stop() {
 }
 
 SBError SBTrace::Stop(const SBThread &thread) {
-  LLDB_RECORD_METHOD(SBError, SBTrace, Stop, (const SBThread &), thread);
+  LLDB_INSTRUMENT_VA(this, thread);
   SBError error;
   if (!m_opaque_sp)
     error.SetErrorString("error: invalid trace");
@@ -83,11 +80,11 @@ SBError SBTrace::Stop(const SBThread &thread) {
 }
 
 bool SBTrace::IsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTrace, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 
 SBTrace::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTrace, operator bool);
+  LLDB_INSTRUMENT_VA(this);
   return (bool)m_opaque_sp;
 }
diff --git a/lldb/source/API/SBType.cpp b/lldb/source/API/SBType.cpp
index bd759c8f6bf90..da9202bf9386b 100644
--- a/lldb/source/API/SBType.cpp
+++ b/lldb/source/API/SBType.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBType.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBDefines.h"
 #include "lldb/API/SBModule.h"
 #include "lldb/API/SBStream.h"
@@ -17,6 +16,7 @@
 #include "lldb/Symbol/Type.h"
 #include "lldb/Symbol/TypeSystem.h"
 #include "lldb/Utility/ConstString.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Stream.h"
 
 #include "llvm/ADT/APSInt.h"
@@ -26,7 +26,7 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBType::SBType() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBType); }
+SBType::SBType() { LLDB_INSTRUMENT_VA(this); }
 
 SBType::SBType(const CompilerType &type)
     : m_opaque_sp(new TypeImpl(
@@ -39,7 +39,7 @@ SBType::SBType(const lldb::TypeImplSP &type_impl_sp)
     : m_opaque_sp(type_impl_sp) {}
 
 SBType::SBType(const SBType &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBType, (const lldb::SBType &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_opaque_sp = rhs.m_opaque_sp;
@@ -51,7 +51,7 @@ SBType::SBType(const SBType &rhs) {
 //{}
 //
 bool SBType::operator==(SBType &rhs) {
-  LLDB_RECORD_METHOD(bool, SBType, operator==,(lldb::SBType &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
@@ -63,7 +63,7 @@ bool SBType::operator==(SBType &rhs) {
 }
 
 bool SBType::operator!=(SBType &rhs) {
-  LLDB_RECORD_METHOD(bool, SBType, operator!=,(lldb::SBType &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return rhs.IsValid();
@@ -81,8 +81,7 @@ void SBType::SetSP(const lldb::TypeImplSP &type_impl_sp) {
 }
 
 SBType &SBType::operator=(const SBType &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBType &, SBType, operator=,(const lldb::SBType &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_opaque_sp = rhs.m_opaque_sp;
@@ -107,11 +106,11 @@ const TypeImpl &SBType::ref() const {
 }
 
 bool SBType::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBType, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBType::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBType, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp.get() == nullptr)
     return false;
@@ -120,7 +119,7 @@ SBType::operator bool() const {
 }
 
 uint64_t SBType::GetByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBType, GetByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     if (llvm::Optional<uint64_t> size =
@@ -130,7 +129,7 @@ uint64_t SBType::GetByteSize() {
 }
 
 bool SBType::IsPointerType() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsPointerType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -138,7 +137,7 @@ bool SBType::IsPointerType() {
 }
 
 bool SBType::IsArrayType() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsArrayType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -147,7 +146,7 @@ bool SBType::IsArrayType() {
 }
 
 bool SBType::IsVectorType() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsVectorType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -155,7 +154,7 @@ bool SBType::IsVectorType() {
 }
 
 bool SBType::IsReferenceType() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsReferenceType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -163,7 +162,7 @@ bool SBType::IsReferenceType() {
 }
 
 SBType SBType::GetPointerType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetPointerType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return SBType();
@@ -172,7 +171,7 @@ SBType SBType::GetPointerType() {
 }
 
 SBType SBType::GetPointeeType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetPointeeType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return SBType();
@@ -180,7 +179,7 @@ SBType SBType::GetPointeeType() {
 }
 
 SBType SBType::GetReferenceType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetReferenceType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return SBType();
@@ -188,7 +187,7 @@ SBType SBType::GetReferenceType() {
 }
 
 SBType SBType::GetTypedefedType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetTypedefedType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return SBType();
@@ -196,7 +195,7 @@ SBType SBType::GetTypedefedType() {
 }
 
 SBType SBType::GetDereferencedType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetDereferencedType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return SBType();
@@ -204,7 +203,7 @@ SBType SBType::GetDereferencedType() {
 }
 
 SBType SBType::GetArrayElementType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetArrayElementType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return SBType();
@@ -213,7 +212,7 @@ SBType SBType::GetArrayElementType() {
 }
 
 SBType SBType::GetArrayType(uint64_t size) {
-  LLDB_RECORD_METHOD(lldb::SBType, SBType, GetArrayType, (uint64_t), size);
+  LLDB_INSTRUMENT_VA(this, size);
 
   if (!IsValid())
     return SBType();
@@ -222,7 +221,7 @@ SBType SBType::GetArrayType(uint64_t size) {
 }
 
 SBType SBType::GetVectorElementType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetVectorElementType);
+  LLDB_INSTRUMENT_VA(this);
 
   SBType type_sb;
   if (IsValid()) {
@@ -235,7 +234,7 @@ SBType SBType::GetVectorElementType() {
 }
 
 bool SBType::IsFunctionType() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsFunctionType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -243,7 +242,7 @@ bool SBType::IsFunctionType() {
 }
 
 bool SBType::IsPolymorphicClass() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsPolymorphicClass);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -251,7 +250,7 @@ bool SBType::IsPolymorphicClass() {
 }
 
 bool SBType::IsTypedefType() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsTypedefType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -259,7 +258,7 @@ bool SBType::IsTypedefType() {
 }
 
 bool SBType::IsAnonymousType() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsAnonymousType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -267,7 +266,7 @@ bool SBType::IsAnonymousType() {
 }
 
 bool SBType::IsScopedEnumerationType() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsScopedEnumerationType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -275,7 +274,7 @@ bool SBType::IsScopedEnumerationType() {
 }
 
 lldb::SBType SBType::GetFunctionReturnType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetFunctionReturnType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid()) {
     CompilerType return_type(
@@ -287,8 +286,7 @@ lldb::SBType SBType::GetFunctionReturnType() {
 }
 
 lldb::SBTypeList SBType::GetFunctionArgumentTypes() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeList, SBType,
-                             GetFunctionArgumentTypes);
+  LLDB_INSTRUMENT_VA(this);
 
   SBTypeList sb_type_list;
   if (IsValid()) {
@@ -302,7 +300,7 @@ lldb::SBTypeList SBType::GetFunctionArgumentTypes() {
 }
 
 uint32_t SBType::GetNumberOfMemberFunctions() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBType, GetNumberOfMemberFunctions);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid()) {
     return m_opaque_sp->GetCompilerType(true).GetNumMemberFunctions();
@@ -311,8 +309,7 @@ uint32_t SBType::GetNumberOfMemberFunctions() {
 }
 
 lldb::SBTypeMemberFunction SBType::GetMemberFunctionAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBTypeMemberFunction, SBType,
-                     GetMemberFunctionAtIndex, (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBTypeMemberFunction sb_func_type;
   if (IsValid())
@@ -322,7 +319,7 @@ lldb::SBTypeMemberFunction SBType::GetMemberFunctionAtIndex(uint32_t idx) {
 }
 
 lldb::SBType SBType::GetUnqualifiedType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetUnqualifiedType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return SBType();
@@ -330,7 +327,7 @@ lldb::SBType SBType::GetUnqualifiedType() {
 }
 
 lldb::SBType SBType::GetCanonicalType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetCanonicalType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     return SBType(TypeImplSP(new TypeImpl(m_opaque_sp->GetCanonicalType())));
@@ -338,7 +335,7 @@ lldb::SBType SBType::GetCanonicalType() {
 }
 
 SBType SBType::GetEnumerationIntegerType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBType, GetEnumerationIntegerType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid()) {
     return SBType(
@@ -348,7 +345,7 @@ SBType SBType::GetEnumerationIntegerType() {
 }
 
 lldb::BasicType SBType::GetBasicType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::BasicType, SBType, GetBasicType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     return m_opaque_sp->GetCompilerType(false).GetBasicTypeEnumeration();
@@ -356,8 +353,7 @@ lldb::BasicType SBType::GetBasicType() {
 }
 
 SBType SBType::GetBasicType(lldb::BasicType basic_type) {
-  LLDB_RECORD_METHOD(lldb::SBType, SBType, GetBasicType, (lldb::BasicType),
-                     basic_type);
+  LLDB_INSTRUMENT_VA(this, basic_type);
 
   if (IsValid() && m_opaque_sp->IsValid())
     return SBType(
@@ -366,7 +362,7 @@ SBType SBType::GetBasicType(lldb::BasicType basic_type) {
 }
 
 uint32_t SBType::GetNumberOfDirectBaseClasses() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBType, GetNumberOfDirectBaseClasses);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     return m_opaque_sp->GetCompilerType(true).GetNumDirectBaseClasses();
@@ -374,7 +370,7 @@ uint32_t SBType::GetNumberOfDirectBaseClasses() {
 }
 
 uint32_t SBType::GetNumberOfVirtualBaseClasses() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBType, GetNumberOfVirtualBaseClasses);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     return m_opaque_sp->GetCompilerType(true).GetNumVirtualBaseClasses();
@@ -382,7 +378,7 @@ uint32_t SBType::GetNumberOfVirtualBaseClasses() {
 }
 
 uint32_t SBType::GetNumberOfFields() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBType, GetNumberOfFields);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     return m_opaque_sp->GetCompilerType(true).GetNumFields();
@@ -391,9 +387,7 @@ uint32_t SBType::GetNumberOfFields() {
 
 bool SBType::GetDescription(SBStream &description,
                             lldb::DescriptionLevel description_level) {
-  LLDB_RECORD_METHOD(bool, SBType, GetDescription,
-                     (lldb::SBStream &, lldb::DescriptionLevel), description,
-                     description_level);
+  LLDB_INSTRUMENT_VA(this, description, description_level);
 
   Stream &strm = description.ref();
 
@@ -406,8 +400,7 @@ bool SBType::GetDescription(SBStream &description,
 }
 
 SBTypeMember SBType::GetDirectBaseClassAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBTypeMember, SBType, GetDirectBaseClassAtIndex,
-                     (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBTypeMember sb_type_member;
   if (IsValid()) {
@@ -423,8 +416,7 @@ SBTypeMember SBType::GetDirectBaseClassAtIndex(uint32_t idx) {
 }
 
 SBTypeMember SBType::GetVirtualBaseClassAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBTypeMember, SBType, GetVirtualBaseClassAtIndex,
-                     (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBTypeMember sb_type_member;
   if (IsValid()) {
@@ -440,8 +432,7 @@ SBTypeMember SBType::GetVirtualBaseClassAtIndex(uint32_t idx) {
 }
 
 SBTypeEnumMemberList SBType::GetEnumMembers() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeEnumMemberList, SBType,
-                             GetEnumMembers);
+  LLDB_INSTRUMENT_VA(this);
 
   SBTypeEnumMemberList sb_enum_member_list;
   if (IsValid()) {
@@ -463,8 +454,7 @@ SBTypeEnumMemberList SBType::GetEnumMembers() {
 }
 
 SBTypeMember SBType::GetFieldAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBTypeMember, SBType, GetFieldAtIndex, (uint32_t),
-                     idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBTypeMember sb_type_member;
   if (IsValid()) {
@@ -490,7 +480,7 @@ SBTypeMember SBType::GetFieldAtIndex(uint32_t idx) {
 }
 
 bool SBType::IsTypeComplete() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBType, IsTypeComplete);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -498,7 +488,7 @@ bool SBType::IsTypeComplete() {
 }
 
 uint32_t SBType::GetTypeFlags() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBType, GetTypeFlags);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return 0;
@@ -506,7 +496,7 @@ uint32_t SBType::GetTypeFlags() {
 }
 
 lldb::SBModule SBType::GetModule() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBModule, SBType, GetModule);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBModule sb_module;
   if (!IsValid())
@@ -517,7 +507,7 @@ lldb::SBModule SBType::GetModule() {
 }
 
 const char *SBType::GetName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBType, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return "";
@@ -525,7 +515,7 @@ const char *SBType::GetName() {
 }
 
 const char *SBType::GetDisplayTypeName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBType, GetDisplayTypeName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return "";
@@ -533,7 +523,7 @@ const char *SBType::GetDisplayTypeName() {
 }
 
 lldb::TypeClass SBType::GetTypeClass() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::TypeClass, SBType, GetTypeClass);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     return m_opaque_sp->GetCompilerType(true).GetTypeClass();
@@ -541,7 +531,7 @@ lldb::TypeClass SBType::GetTypeClass() {
 }
 
 uint32_t SBType::GetNumberOfTemplateArguments() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBType, GetNumberOfTemplateArguments);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     return m_opaque_sp->GetCompilerType(false).GetNumTemplateArguments();
@@ -549,8 +539,7 @@ uint32_t SBType::GetNumberOfTemplateArguments() {
 }
 
 lldb::SBType SBType::GetTemplateArgumentType(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBType, SBType, GetTemplateArgumentType, (uint32_t),
-                     idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   if (!IsValid())
     return SBType();
@@ -574,8 +563,7 @@ lldb::SBType SBType::GetTemplateArgumentType(uint32_t idx) {
 }
 
 lldb::TemplateArgumentKind SBType::GetTemplateArgumentKind(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::TemplateArgumentKind, SBType,
-                     GetTemplateArgumentKind, (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   if (IsValid())
     return m_opaque_sp->GetCompilerType(false).GetTemplateArgumentKind(idx);
@@ -583,12 +571,12 @@ lldb::TemplateArgumentKind SBType::GetTemplateArgumentKind(uint32_t idx) {
 }
 
 SBTypeList::SBTypeList() : m_opaque_up(new TypeListImpl()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeList);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBTypeList::SBTypeList(const SBTypeList &rhs)
     : m_opaque_up(new TypeListImpl()) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeList, (const lldb::SBTypeList &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   for (uint32_t i = 0, rhs_size = const_cast<SBTypeList &>(rhs).GetSize();
        i < rhs_size; i++)
@@ -596,18 +584,17 @@ SBTypeList::SBTypeList(const SBTypeList &rhs)
 }
 
 bool SBTypeList::IsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeList, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBTypeList::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeList, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_up != nullptr);
 }
 
 SBTypeList &SBTypeList::operator=(const SBTypeList &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBTypeList &,
-                     SBTypeList, operator=,(const lldb::SBTypeList &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_opaque_up = std::make_unique<TypeListImpl>();
@@ -619,15 +606,14 @@ SBTypeList &SBTypeList::operator=(const SBTypeList &rhs) {
 }
 
 void SBTypeList::Append(SBType type) {
-  LLDB_RECORD_METHOD(void, SBTypeList, Append, (lldb::SBType), type);
+  LLDB_INSTRUMENT_VA(this, type);
 
   if (type.IsValid())
     m_opaque_up->Append(type.m_opaque_sp);
 }
 
 SBType SBTypeList::GetTypeAtIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(lldb::SBType, SBTypeList, GetTypeAtIndex, (uint32_t),
-                     index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   if (m_opaque_up)
     return SBType(m_opaque_up->GetTypeAtIndex(index));
@@ -635,19 +621,19 @@ SBType SBTypeList::GetTypeAtIndex(uint32_t index) {
 }
 
 uint32_t SBTypeList::GetSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeList, GetSize);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetSize();
 }
 
 SBTypeList::~SBTypeList() = default;
 
-SBTypeMember::SBTypeMember() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeMember); }
+SBTypeMember::SBTypeMember() { LLDB_INSTRUMENT_VA(this); }
 
 SBTypeMember::~SBTypeMember() = default;
 
 SBTypeMember::SBTypeMember(const SBTypeMember &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeMember, (const lldb::SBTypeMember &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     if (rhs.IsValid())
@@ -656,8 +642,7 @@ SBTypeMember::SBTypeMember(const SBTypeMember &rhs) {
 }
 
 lldb::SBTypeMember &SBTypeMember::operator=(const lldb::SBTypeMember &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBTypeMember &,
-                     SBTypeMember, operator=,(const lldb::SBTypeMember &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     if (rhs.IsValid())
@@ -667,17 +652,17 @@ lldb::SBTypeMember &SBTypeMember::operator=(const lldb::SBTypeMember &rhs) {
 }
 
 bool SBTypeMember::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeMember, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBTypeMember::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeMember, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up.get();
 }
 
 const char *SBTypeMember::GetName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeMember, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up)
     return m_opaque_up->GetName().GetCString();
@@ -685,7 +670,7 @@ const char *SBTypeMember::GetName() {
 }
 
 SBType SBTypeMember::GetType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBTypeMember, GetType);
+  LLDB_INSTRUMENT_VA(this);
 
   SBType sb_type;
   if (m_opaque_up) {
@@ -695,7 +680,7 @@ SBType SBTypeMember::GetType() {
 }
 
 uint64_t SBTypeMember::GetOffsetInBytes() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBTypeMember, GetOffsetInBytes);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up)
     return m_opaque_up->GetBitOffset() / 8u;
@@ -703,7 +688,7 @@ uint64_t SBTypeMember::GetOffsetInBytes() {
 }
 
 uint64_t SBTypeMember::GetOffsetInBits() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBTypeMember, GetOffsetInBits);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up)
     return m_opaque_up->GetBitOffset();
@@ -711,7 +696,7 @@ uint64_t SBTypeMember::GetOffsetInBits() {
 }
 
 bool SBTypeMember::IsBitfield() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeMember, IsBitfield);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up)
     return m_opaque_up->GetIsBitfield();
@@ -719,7 +704,7 @@ bool SBTypeMember::IsBitfield() {
 }
 
 uint32_t SBTypeMember::GetBitfieldSizeInBits() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeMember, GetBitfieldSizeInBits);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_up)
     return m_opaque_up->GetBitfieldBitSize();
@@ -728,9 +713,7 @@ uint32_t SBTypeMember::GetBitfieldSizeInBits() {
 
 bool SBTypeMember::GetDescription(lldb::SBStream &description,
                                   lldb::DescriptionLevel description_level) {
-  LLDB_RECORD_METHOD(bool, SBTypeMember, GetDescription,
-                     (lldb::SBStream &, lldb::DescriptionLevel), description,
-                     description_level);
+  LLDB_INSTRUMENT_VA(this, description, description_level);
 
   Stream &strm = description.ref();
 
@@ -771,24 +754,18 @@ TypeMemberImpl &SBTypeMember::ref() {
 
 const TypeMemberImpl &SBTypeMember::ref() const { return *m_opaque_up; }
 
-SBTypeMemberFunction::SBTypeMemberFunction() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeMemberFunction);
-}
+SBTypeMemberFunction::SBTypeMemberFunction() { LLDB_INSTRUMENT_VA(this); }
 
 SBTypeMemberFunction::~SBTypeMemberFunction() = default;
 
 SBTypeMemberFunction::SBTypeMemberFunction(const SBTypeMemberFunction &rhs)
     : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeMemberFunction,
-                          (const lldb::SBTypeMemberFunction &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 lldb::SBTypeMemberFunction &SBTypeMemberFunction::
 operator=(const lldb::SBTypeMemberFunction &rhs) {
-  LLDB_RECORD_METHOD(
-      lldb::SBTypeMemberFunction &,
-      SBTypeMemberFunction, operator=,(const lldb::SBTypeMemberFunction &),
-      rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_sp = rhs.m_opaque_sp;
@@ -796,17 +773,17 @@ operator=(const lldb::SBTypeMemberFunction &rhs) {
 }
 
 bool SBTypeMemberFunction::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeMemberFunction, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBTypeMemberFunction::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeMemberFunction, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get();
 }
 
 const char *SBTypeMemberFunction::GetName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeMemberFunction, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp)
     return m_opaque_sp->GetName().GetCString();
@@ -814,8 +791,7 @@ const char *SBTypeMemberFunction::GetName() {
 }
 
 const char *SBTypeMemberFunction::GetDemangledName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeMemberFunction,
-                             GetDemangledName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp) {
     ConstString mangled_str = m_opaque_sp->GetMangledName();
@@ -828,8 +804,7 @@ const char *SBTypeMemberFunction::GetDemangledName() {
 }
 
 const char *SBTypeMemberFunction::GetMangledName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeMemberFunction,
-                             GetMangledName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp)
     return m_opaque_sp->GetMangledName().GetCString();
@@ -837,7 +812,7 @@ const char *SBTypeMemberFunction::GetMangledName() {
 }
 
 SBType SBTypeMemberFunction::GetType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBTypeMemberFunction, GetType);
+  LLDB_INSTRUMENT_VA(this);
 
   SBType sb_type;
   if (m_opaque_sp) {
@@ -847,7 +822,7 @@ SBType SBTypeMemberFunction::GetType() {
 }
 
 lldb::SBType SBTypeMemberFunction::GetReturnType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBTypeMemberFunction, GetReturnType);
+  LLDB_INSTRUMENT_VA(this);
 
   SBType sb_type;
   if (m_opaque_sp) {
@@ -857,8 +832,7 @@ lldb::SBType SBTypeMemberFunction::GetReturnType() {
 }
 
 uint32_t SBTypeMemberFunction::GetNumberOfArguments() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeMemberFunction,
-                             GetNumberOfArguments);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp)
     return m_opaque_sp->GetNumArguments();
@@ -866,8 +840,7 @@ uint32_t SBTypeMemberFunction::GetNumberOfArguments() {
 }
 
 lldb::SBType SBTypeMemberFunction::GetArgumentTypeAtIndex(uint32_t i) {
-  LLDB_RECORD_METHOD(lldb::SBType, SBTypeMemberFunction, GetArgumentTypeAtIndex,
-                     (uint32_t), i);
+  LLDB_INSTRUMENT_VA(this, i);
 
   SBType sb_type;
   if (m_opaque_sp) {
@@ -878,8 +851,7 @@ lldb::SBType SBTypeMemberFunction::GetArgumentTypeAtIndex(uint32_t i) {
 }
 
 lldb::MemberFunctionKind SBTypeMemberFunction::GetKind() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::MemberFunctionKind, SBTypeMemberFunction,
-                             GetKind);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp)
     return m_opaque_sp->GetKind();
@@ -888,9 +860,7 @@ lldb::MemberFunctionKind SBTypeMemberFunction::GetKind() {
 
 bool SBTypeMemberFunction::GetDescription(
     lldb::SBStream &description, lldb::DescriptionLevel description_level) {
-  LLDB_RECORD_METHOD(bool, SBTypeMemberFunction, GetDescription,
-                     (lldb::SBStream &, lldb::DescriptionLevel), description,
-                     description_level);
+  LLDB_INSTRUMENT_VA(this, description, description_level);
 
   Stream &strm = description.ref();
 
diff --git a/lldb/source/API/SBTypeCategory.cpp b/lldb/source/API/SBTypeCategory.cpp
index 17aca3267e676..7d929fe497954 100644
--- a/lldb/source/API/SBTypeCategory.cpp
+++ b/lldb/source/API/SBTypeCategory.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBTypeCategory.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBStream.h"
 #include "lldb/API/SBTypeFilter.h"
@@ -26,9 +26,7 @@ using namespace lldb_private;
 
 typedef std::pair<lldb::TypeCategoryImplSP, user_id_t> ImplType;
 
-SBTypeCategory::SBTypeCategory() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeCategory);
-}
+SBTypeCategory::SBTypeCategory() { LLDB_INSTRUMENT_VA(this); }
 
 SBTypeCategory::SBTypeCategory(const char *name) {
   DataVisualization::Categories::GetCategory(ConstString(name), m_opaque_sp);
@@ -36,23 +34,23 @@ SBTypeCategory::SBTypeCategory(const char *name) {
 
 SBTypeCategory::SBTypeCategory(const lldb::SBTypeCategory &rhs)
     : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeCategory, (const lldb::SBTypeCategory &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBTypeCategory::~SBTypeCategory() = default;
 
 bool SBTypeCategory::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeCategory, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBTypeCategory::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeCategory, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_sp.get() != nullptr);
 }
 
 bool SBTypeCategory::GetEnabled() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeCategory, GetEnabled);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -60,7 +58,7 @@ bool SBTypeCategory::GetEnabled() {
 }
 
 void SBTypeCategory::SetEnabled(bool enabled) {
-  LLDB_RECORD_METHOD(void, SBTypeCategory, SetEnabled, (bool), enabled);
+  LLDB_INSTRUMENT_VA(this, enabled);
 
   if (!IsValid())
     return;
@@ -71,7 +69,7 @@ void SBTypeCategory::SetEnabled(bool enabled) {
 }
 
 const char *SBTypeCategory::GetName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeCategory, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return nullptr;
@@ -79,8 +77,7 @@ const char *SBTypeCategory::GetName() {
 }
 
 lldb::LanguageType SBTypeCategory::GetLanguageAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::LanguageType, SBTypeCategory, GetLanguageAtIndex,
-                     (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   if (IsValid())
     return m_opaque_sp->GetLanguageAtIndex(idx);
@@ -88,7 +85,7 @@ lldb::LanguageType SBTypeCategory::GetLanguageAtIndex(uint32_t idx) {
 }
 
 uint32_t SBTypeCategory::GetNumLanguages() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeCategory, GetNumLanguages);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     return m_opaque_sp->GetNumLanguages();
@@ -96,15 +93,14 @@ uint32_t SBTypeCategory::GetNumLanguages() {
 }
 
 void SBTypeCategory::AddLanguage(lldb::LanguageType language) {
-  LLDB_RECORD_METHOD(void, SBTypeCategory, AddLanguage, (lldb::LanguageType),
-                     language);
+  LLDB_INSTRUMENT_VA(this, language);
 
   if (IsValid())
     m_opaque_sp->AddLanguage(language);
 }
 
 uint32_t SBTypeCategory::GetNumFormats() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeCategory, GetNumFormats);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return 0;
@@ -114,7 +110,7 @@ uint32_t SBTypeCategory::GetNumFormats() {
 }
 
 uint32_t SBTypeCategory::GetNumSummaries() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeCategory, GetNumSummaries);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return 0;
@@ -123,7 +119,7 @@ uint32_t SBTypeCategory::GetNumSummaries() {
 }
 
 uint32_t SBTypeCategory::GetNumFilters() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeCategory, GetNumFilters);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return 0;
@@ -132,7 +128,7 @@ uint32_t SBTypeCategory::GetNumFilters() {
 }
 
 uint32_t SBTypeCategory::GetNumSynthetics() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeCategory, GetNumSynthetics);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return 0;
@@ -142,8 +138,7 @@ uint32_t SBTypeCategory::GetNumSynthetics() {
 
 lldb::SBTypeNameSpecifier
 SBTypeCategory::GetTypeNameSpecifierForFilterAtIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(lldb::SBTypeNameSpecifier, SBTypeCategory,
-                     GetTypeNameSpecifierForFilterAtIndex, (uint32_t), index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   if (!IsValid())
     return SBTypeNameSpecifier();
@@ -153,8 +148,7 @@ SBTypeCategory::GetTypeNameSpecifierForFilterAtIndex(uint32_t index) {
 
 lldb::SBTypeNameSpecifier
 SBTypeCategory::GetTypeNameSpecifierForFormatAtIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(lldb::SBTypeNameSpecifier, SBTypeCategory,
-                     GetTypeNameSpecifierForFormatAtIndex, (uint32_t), index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   if (!IsValid())
     return SBTypeNameSpecifier();
@@ -164,8 +158,7 @@ SBTypeCategory::GetTypeNameSpecifierForFormatAtIndex(uint32_t index) {
 
 lldb::SBTypeNameSpecifier
 SBTypeCategory::GetTypeNameSpecifierForSummaryAtIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(lldb::SBTypeNameSpecifier, SBTypeCategory,
-                     GetTypeNameSpecifierForSummaryAtIndex, (uint32_t), index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   if (!IsValid())
     return SBTypeNameSpecifier();
@@ -175,9 +168,7 @@ SBTypeCategory::GetTypeNameSpecifierForSummaryAtIndex(uint32_t index) {
 
 lldb::SBTypeNameSpecifier
 SBTypeCategory::GetTypeNameSpecifierForSyntheticAtIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(lldb::SBTypeNameSpecifier, SBTypeCategory,
-                     GetTypeNameSpecifierForSyntheticAtIndex, (uint32_t),
-                     index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   if (!IsValid())
     return SBTypeNameSpecifier();
@@ -186,8 +177,7 @@ SBTypeCategory::GetTypeNameSpecifierForSyntheticAtIndex(uint32_t index) {
 }
 
 SBTypeFilter SBTypeCategory::GetFilterForType(SBTypeNameSpecifier spec) {
-  LLDB_RECORD_METHOD(lldb::SBTypeFilter, SBTypeCategory, GetFilterForType,
-                     (lldb::SBTypeNameSpecifier), spec);
+  LLDB_INSTRUMENT_VA(this, spec);
 
   if (!IsValid())
     return SBTypeFilter();
@@ -213,8 +203,7 @@ SBTypeFilter SBTypeCategory::GetFilterForType(SBTypeNameSpecifier spec) {
   return lldb::SBTypeFilter(filter_sp);
 }
 SBTypeFormat SBTypeCategory::GetFormatForType(SBTypeNameSpecifier spec) {
-  LLDB_RECORD_METHOD(lldb::SBTypeFormat, SBTypeCategory, GetFormatForType,
-                     (lldb::SBTypeNameSpecifier), spec);
+  LLDB_INSTRUMENT_VA(this, spec);
 
   if (!IsValid())
     return SBTypeFormat();
@@ -238,8 +227,7 @@ SBTypeFormat SBTypeCategory::GetFormatForType(SBTypeNameSpecifier spec) {
 }
 
 SBTypeSummary SBTypeCategory::GetSummaryForType(SBTypeNameSpecifier spec) {
-  LLDB_RECORD_METHOD(lldb::SBTypeSummary, SBTypeCategory, GetSummaryForType,
-                     (lldb::SBTypeNameSpecifier), spec);
+  LLDB_INSTRUMENT_VA(this, spec);
 
   if (!IsValid())
     return SBTypeSummary();
@@ -263,8 +251,7 @@ SBTypeSummary SBTypeCategory::GetSummaryForType(SBTypeNameSpecifier spec) {
 }
 
 SBTypeSynthetic SBTypeCategory::GetSyntheticForType(SBTypeNameSpecifier spec) {
-  LLDB_RECORD_METHOD(lldb::SBTypeSynthetic, SBTypeCategory, GetSyntheticForType,
-                     (lldb::SBTypeNameSpecifier), spec);
+  LLDB_INSTRUMENT_VA(this, spec);
 
   if (!IsValid())
     return SBTypeSynthetic();
@@ -291,8 +278,7 @@ SBTypeSynthetic SBTypeCategory::GetSyntheticForType(SBTypeNameSpecifier spec) {
 }
 
 SBTypeFilter SBTypeCategory::GetFilterAtIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(lldb::SBTypeFilter, SBTypeCategory, GetFilterAtIndex,
-                     (uint32_t), index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   if (!IsValid())
     return SBTypeFilter();
@@ -309,8 +295,7 @@ SBTypeFilter SBTypeCategory::GetFilterAtIndex(uint32_t index) {
 }
 
 SBTypeFormat SBTypeCategory::GetFormatAtIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(lldb::SBTypeFormat, SBTypeCategory, GetFormatAtIndex,
-                     (uint32_t), index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   if (!IsValid())
     return SBTypeFormat();
@@ -318,8 +303,7 @@ SBTypeFormat SBTypeCategory::GetFormatAtIndex(uint32_t index) {
 }
 
 SBTypeSummary SBTypeCategory::GetSummaryAtIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(lldb::SBTypeSummary, SBTypeCategory, GetSummaryAtIndex,
-                     (uint32_t), index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   if (!IsValid())
     return SBTypeSummary();
@@ -327,8 +311,7 @@ SBTypeSummary SBTypeCategory::GetSummaryAtIndex(uint32_t index) {
 }
 
 SBTypeSynthetic SBTypeCategory::GetSyntheticAtIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(lldb::SBTypeSynthetic, SBTypeCategory, GetSyntheticAtIndex,
-                     (uint32_t), index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   if (!IsValid())
     return SBTypeSynthetic();
@@ -346,9 +329,7 @@ SBTypeSynthetic SBTypeCategory::GetSyntheticAtIndex(uint32_t index) {
 
 bool SBTypeCategory::AddTypeFormat(SBTypeNameSpecifier type_name,
                                    SBTypeFormat format) {
-  LLDB_RECORD_METHOD(bool, SBTypeCategory, AddTypeFormat,
-                     (lldb::SBTypeNameSpecifier, lldb::SBTypeFormat), type_name,
-                     format);
+  LLDB_INSTRUMENT_VA(this, type_name, format);
 
   if (!IsValid())
     return false;
@@ -370,8 +351,7 @@ bool SBTypeCategory::AddTypeFormat(SBTypeNameSpecifier type_name,
 }
 
 bool SBTypeCategory::DeleteTypeFormat(SBTypeNameSpecifier type_name) {
-  LLDB_RECORD_METHOD(bool, SBTypeCategory, DeleteTypeFormat,
-                     (lldb::SBTypeNameSpecifier), type_name);
+  LLDB_INSTRUMENT_VA(this, type_name);
 
   if (!IsValid())
     return false;
@@ -389,9 +369,7 @@ bool SBTypeCategory::DeleteTypeFormat(SBTypeNameSpecifier type_name) {
 
 bool SBTypeCategory::AddTypeSummary(SBTypeNameSpecifier type_name,
                                     SBTypeSummary summary) {
-  LLDB_RECORD_METHOD(bool, SBTypeCategory, AddTypeSummary,
-                     (lldb::SBTypeNameSpecifier, lldb::SBTypeSummary),
-                     type_name, summary);
+  LLDB_INSTRUMENT_VA(this, type_name, summary);
 
   if (!IsValid())
     return false;
@@ -447,8 +425,7 @@ bool SBTypeCategory::AddTypeSummary(SBTypeNameSpecifier type_name,
 }
 
 bool SBTypeCategory::DeleteTypeSummary(SBTypeNameSpecifier type_name) {
-  LLDB_RECORD_METHOD(bool, SBTypeCategory, DeleteTypeSummary,
-                     (lldb::SBTypeNameSpecifier), type_name);
+  LLDB_INSTRUMENT_VA(this, type_name);
 
   if (!IsValid())
     return false;
@@ -466,9 +443,7 @@ bool SBTypeCategory::DeleteTypeSummary(SBTypeNameSpecifier type_name) {
 
 bool SBTypeCategory::AddTypeFilter(SBTypeNameSpecifier type_name,
                                    SBTypeFilter filter) {
-  LLDB_RECORD_METHOD(bool, SBTypeCategory, AddTypeFilter,
-                     (lldb::SBTypeNameSpecifier, lldb::SBTypeFilter), type_name,
-                     filter);
+  LLDB_INSTRUMENT_VA(this, type_name, filter);
 
   if (!IsValid())
     return false;
@@ -490,8 +465,7 @@ bool SBTypeCategory::AddTypeFilter(SBTypeNameSpecifier type_name,
 }
 
 bool SBTypeCategory::DeleteTypeFilter(SBTypeNameSpecifier type_name) {
-  LLDB_RECORD_METHOD(bool, SBTypeCategory, DeleteTypeFilter,
-                     (lldb::SBTypeNameSpecifier), type_name);
+  LLDB_INSTRUMENT_VA(this, type_name);
 
   if (!IsValid())
     return false;
@@ -509,9 +483,7 @@ bool SBTypeCategory::DeleteTypeFilter(SBTypeNameSpecifier type_name) {
 
 bool SBTypeCategory::AddTypeSynthetic(SBTypeNameSpecifier type_name,
                                       SBTypeSynthetic synth) {
-  LLDB_RECORD_METHOD(bool, SBTypeCategory, AddTypeSynthetic,
-                     (lldb::SBTypeNameSpecifier, lldb::SBTypeSynthetic),
-                     type_name, synth);
+  LLDB_INSTRUMENT_VA(this, type_name, synth);
 
   if (!IsValid())
     return false;
@@ -567,8 +539,7 @@ bool SBTypeCategory::AddTypeSynthetic(SBTypeNameSpecifier type_name,
 }
 
 bool SBTypeCategory::DeleteTypeSynthetic(SBTypeNameSpecifier type_name) {
-  LLDB_RECORD_METHOD(bool, SBTypeCategory, DeleteTypeSynthetic,
-                     (lldb::SBTypeNameSpecifier), type_name);
+  LLDB_INSTRUMENT_VA(this, type_name);
 
   if (!IsValid())
     return false;
@@ -586,9 +557,7 @@ bool SBTypeCategory::DeleteTypeSynthetic(SBTypeNameSpecifier type_name) {
 
 bool SBTypeCategory::GetDescription(lldb::SBStream &description,
                                     lldb::DescriptionLevel description_level) {
-  LLDB_RECORD_METHOD(bool, SBTypeCategory, GetDescription,
-                     (lldb::SBStream &, lldb::DescriptionLevel), description,
-                     description_level);
+  LLDB_INSTRUMENT_VA(this, description, description_level);
 
   if (!IsValid())
     return false;
@@ -598,9 +567,7 @@ bool SBTypeCategory::GetDescription(lldb::SBStream &description,
 
 lldb::SBTypeCategory &SBTypeCategory::
 operator=(const lldb::SBTypeCategory &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBTypeCategory &,
-                     SBTypeCategory, operator=,(const lldb::SBTypeCategory &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_opaque_sp = rhs.m_opaque_sp;
@@ -609,8 +576,7 @@ operator=(const lldb::SBTypeCategory &rhs) {
 }
 
 bool SBTypeCategory::operator==(lldb::SBTypeCategory &rhs) {
-  LLDB_RECORD_METHOD(bool, SBTypeCategory, operator==,(lldb::SBTypeCategory &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
@@ -619,8 +585,7 @@ bool SBTypeCategory::operator==(lldb::SBTypeCategory &rhs) {
 }
 
 bool SBTypeCategory::operator!=(lldb::SBTypeCategory &rhs) {
-  LLDB_RECORD_METHOD(bool, SBTypeCategory, operator!=,(lldb::SBTypeCategory &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return rhs.IsValid();
diff --git a/lldb/source/API/SBTypeEnumMember.cpp b/lldb/source/API/SBTypeEnumMember.cpp
index d5fc382a35e8e..a3d99bd57e310 100644
--- a/lldb/source/API/SBTypeEnumMember.cpp
+++ b/lldb/source/API/SBTypeEnumMember.cpp
@@ -7,13 +7,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBTypeEnumMember.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBDefines.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/API/SBType.h"
 #include "lldb/Symbol/CompilerType.h"
 #include "lldb/Symbol/Type.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Stream.h"
 
 #include <memory>
@@ -21,9 +21,7 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBTypeEnumMember::SBTypeEnumMember() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeEnumMember);
-}
+SBTypeEnumMember::SBTypeEnumMember() { LLDB_INSTRUMENT_VA(this); }
 
 SBTypeEnumMember::~SBTypeEnumMember() = default;
 
@@ -32,16 +30,13 @@ SBTypeEnumMember::SBTypeEnumMember(
     : m_opaque_sp(enum_member_sp) {}
 
 SBTypeEnumMember::SBTypeEnumMember(const SBTypeEnumMember &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeEnumMember, (const lldb::SBTypeEnumMember &),
-                          rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_sp = clone(rhs.m_opaque_sp);
 }
 
 SBTypeEnumMember &SBTypeEnumMember::operator=(const SBTypeEnumMember &rhs) {
-  LLDB_RECORD_METHOD(
-      SBTypeEnumMember &,
-      SBTypeEnumMember, operator=,(const lldb::SBTypeEnumMember &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_sp = clone(rhs.m_opaque_sp);
@@ -49,17 +44,17 @@ SBTypeEnumMember &SBTypeEnumMember::operator=(const SBTypeEnumMember &rhs) {
 }
 
 bool SBTypeEnumMember::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeEnumMember, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBTypeEnumMember::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeEnumMember, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get();
 }
 
 const char *SBTypeEnumMember::GetName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeEnumMember, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp.get())
     return m_opaque_sp->GetName().GetCString();
@@ -67,7 +62,7 @@ const char *SBTypeEnumMember::GetName() {
 }
 
 int64_t SBTypeEnumMember::GetValueAsSigned() {
-  LLDB_RECORD_METHOD_NO_ARGS(int64_t, SBTypeEnumMember, GetValueAsSigned);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp.get())
     return m_opaque_sp->GetValueAsSigned();
@@ -75,7 +70,7 @@ int64_t SBTypeEnumMember::GetValueAsSigned() {
 }
 
 uint64_t SBTypeEnumMember::GetValueAsUnsigned() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint64_t, SBTypeEnumMember, GetValueAsUnsigned);
+  LLDB_INSTRUMENT_VA(this);
 
   if (m_opaque_sp.get())
     return m_opaque_sp->GetValueAsUnsigned();
@@ -83,7 +78,7 @@ uint64_t SBTypeEnumMember::GetValueAsUnsigned() {
 }
 
 SBType SBTypeEnumMember::GetType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBTypeEnumMember, GetType);
+  LLDB_INSTRUMENT_VA(this);
 
   SBType sb_type;
   if (m_opaque_sp.get()) {
@@ -108,13 +103,12 @@ const TypeEnumMemberImpl &SBTypeEnumMember::ref() const {
 
 SBTypeEnumMemberList::SBTypeEnumMemberList()
     : m_opaque_up(new TypeEnumMemberListImpl()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeEnumMemberList);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBTypeEnumMemberList::SBTypeEnumMemberList(const SBTypeEnumMemberList &rhs)
     : m_opaque_up(new TypeEnumMemberListImpl()) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeEnumMemberList,
-                          (const lldb::SBTypeEnumMemberList &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   for (uint32_t i = 0,
                 rhs_size = const_cast<SBTypeEnumMemberList &>(rhs).GetSize();
@@ -123,21 +117,18 @@ SBTypeEnumMemberList::SBTypeEnumMemberList(const SBTypeEnumMemberList &rhs)
 }
 
 bool SBTypeEnumMemberList::IsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeEnumMemberList, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBTypeEnumMemberList::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeEnumMemberList, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_up != nullptr);
 }
 
 SBTypeEnumMemberList &SBTypeEnumMemberList::
 operator=(const SBTypeEnumMemberList &rhs) {
-  LLDB_RECORD_METHOD(
-      lldb::SBTypeEnumMemberList &,
-      SBTypeEnumMemberList, operator=,(const lldb::SBTypeEnumMemberList &),
-      rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_opaque_up = std::make_unique<TypeEnumMemberListImpl>();
@@ -151,8 +142,7 @@ operator=(const SBTypeEnumMemberList &rhs) {
 }
 
 void SBTypeEnumMemberList::Append(SBTypeEnumMember enum_member) {
-  LLDB_RECORD_METHOD(void, SBTypeEnumMemberList, Append,
-                     (lldb::SBTypeEnumMember), enum_member);
+  LLDB_INSTRUMENT_VA(this, enum_member);
 
   if (enum_member.IsValid())
     m_opaque_up->Append(enum_member.m_opaque_sp);
@@ -160,8 +150,7 @@ void SBTypeEnumMemberList::Append(SBTypeEnumMember enum_member) {
 
 SBTypeEnumMember
 SBTypeEnumMemberList::GetTypeEnumMemberAtIndex(uint32_t index) {
-  LLDB_RECORD_METHOD(lldb::SBTypeEnumMember, SBTypeEnumMemberList,
-                     GetTypeEnumMemberAtIndex, (uint32_t), index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   if (m_opaque_up)
     return SBTypeEnumMember(m_opaque_up->GetTypeEnumMemberAtIndex(index));
@@ -169,7 +158,7 @@ SBTypeEnumMemberList::GetTypeEnumMemberAtIndex(uint32_t index) {
 }
 
 uint32_t SBTypeEnumMemberList::GetSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeEnumMemberList, GetSize);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetSize();
 }
@@ -178,9 +167,7 @@ SBTypeEnumMemberList::~SBTypeEnumMemberList() = default;
 
 bool SBTypeEnumMember::GetDescription(
     lldb::SBStream &description, lldb::DescriptionLevel description_level) {
-  LLDB_RECORD_METHOD(bool, SBTypeEnumMember, GetDescription,
-                     (lldb::SBStream &, lldb::DescriptionLevel), description,
-                     description_level);
+  LLDB_INSTRUMENT_VA(this, description, description_level);
 
   Stream &strm = description.ref();
 
diff --git a/lldb/source/API/SBTypeFilter.cpp b/lldb/source/API/SBTypeFilter.cpp
index a36d93854098f..94f222b254b22 100644
--- a/lldb/source/API/SBTypeFilter.cpp
+++ b/lldb/source/API/SBTypeFilter.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBTypeFilter.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBStream.h"
 
@@ -16,32 +16,32 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBTypeFilter::SBTypeFilter() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeFilter); }
+SBTypeFilter::SBTypeFilter() { LLDB_INSTRUMENT_VA(this); }
 
 SBTypeFilter::SBTypeFilter(uint32_t options)
     : m_opaque_sp(TypeFilterImplSP(new TypeFilterImpl(options))) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeFilter, (uint32_t), options);
+  LLDB_INSTRUMENT_VA(this, options);
 }
 
 SBTypeFilter::SBTypeFilter(const lldb::SBTypeFilter &rhs)
     : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeFilter, (const lldb::SBTypeFilter &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBTypeFilter::~SBTypeFilter() = default;
 
 bool SBTypeFilter::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeFilter, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBTypeFilter::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeFilter, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get() != nullptr;
 }
 
 uint32_t SBTypeFilter::GetOptions() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeFilter, GetOptions);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     return m_opaque_sp->GetOptions();
@@ -49,7 +49,7 @@ uint32_t SBTypeFilter::GetOptions() {
 }
 
 void SBTypeFilter::SetOptions(uint32_t value) {
-  LLDB_RECORD_METHOD(void, SBTypeFilter, SetOptions, (uint32_t), value);
+  LLDB_INSTRUMENT_VA(this, value);
 
   if (CopyOnWrite_Impl())
     m_opaque_sp->SetOptions(value);
@@ -57,9 +57,7 @@ void SBTypeFilter::SetOptions(uint32_t value) {
 
 bool SBTypeFilter::GetDescription(lldb::SBStream &description,
                                   lldb::DescriptionLevel description_level) {
-  LLDB_RECORD_METHOD(bool, SBTypeFilter, GetDescription,
-                     (lldb::SBStream &, lldb::DescriptionLevel), description,
-                     description_level);
+  LLDB_INSTRUMENT_VA(this, description, description_level);
 
   if (!IsValid())
     return false;
@@ -70,15 +68,14 @@ bool SBTypeFilter::GetDescription(lldb::SBStream &description,
 }
 
 void SBTypeFilter::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBTypeFilter, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   if (CopyOnWrite_Impl())
     m_opaque_sp->Clear();
 }
 
 uint32_t SBTypeFilter::GetNumberOfExpressionPaths() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeFilter,
-                             GetNumberOfExpressionPaths);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     return m_opaque_sp->GetCount();
@@ -86,8 +83,7 @@ uint32_t SBTypeFilter::GetNumberOfExpressionPaths() {
 }
 
 const char *SBTypeFilter::GetExpressionPathAtIndex(uint32_t i) {
-  LLDB_RECORD_METHOD(const char *, SBTypeFilter, GetExpressionPathAtIndex,
-                     (uint32_t), i);
+  LLDB_INSTRUMENT_VA(this, i);
 
   if (IsValid()) {
     const char *item = m_opaque_sp->GetExpressionPathAtIndex(i);
@@ -99,8 +95,7 @@ const char *SBTypeFilter::GetExpressionPathAtIndex(uint32_t i) {
 }
 
 bool SBTypeFilter::ReplaceExpressionPathAtIndex(uint32_t i, const char *item) {
-  LLDB_RECORD_METHOD(bool, SBTypeFilter, ReplaceExpressionPathAtIndex,
-                     (uint32_t, const char *), i, item);
+  LLDB_INSTRUMENT_VA(this, i, item);
 
   if (CopyOnWrite_Impl())
     return m_opaque_sp->SetExpressionPathAtIndex(i, item);
@@ -109,16 +104,14 @@ bool SBTypeFilter::ReplaceExpressionPathAtIndex(uint32_t i, const char *item) {
 }
 
 void SBTypeFilter::AppendExpressionPath(const char *item) {
-  LLDB_RECORD_METHOD(void, SBTypeFilter, AppendExpressionPath, (const char *),
-                     item);
+  LLDB_INSTRUMENT_VA(this, item);
 
   if (CopyOnWrite_Impl())
     m_opaque_sp->AddExpressionPath(item);
 }
 
 lldb::SBTypeFilter &SBTypeFilter::operator=(const lldb::SBTypeFilter &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBTypeFilter &,
-                     SBTypeFilter, operator=,(const lldb::SBTypeFilter &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_opaque_sp = rhs.m_opaque_sp;
@@ -127,8 +120,7 @@ lldb::SBTypeFilter &SBTypeFilter::operator=(const lldb::SBTypeFilter &rhs) {
 }
 
 bool SBTypeFilter::operator==(lldb::SBTypeFilter &rhs) {
-  LLDB_RECORD_METHOD(bool, SBTypeFilter, operator==,(lldb::SBTypeFilter &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
@@ -137,8 +129,7 @@ bool SBTypeFilter::operator==(lldb::SBTypeFilter &rhs) {
 }
 
 bool SBTypeFilter::IsEqualTo(lldb::SBTypeFilter &rhs) {
-  LLDB_RECORD_METHOD(bool, SBTypeFilter, IsEqualTo, (lldb::SBTypeFilter &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
@@ -155,8 +146,7 @@ bool SBTypeFilter::IsEqualTo(lldb::SBTypeFilter &rhs) {
 }
 
 bool SBTypeFilter::operator!=(lldb::SBTypeFilter &rhs) {
-  LLDB_RECORD_METHOD(bool, SBTypeFilter, operator!=,(lldb::SBTypeFilter &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
diff --git a/lldb/source/API/SBTypeFormat.cpp b/lldb/source/API/SBTypeFormat.cpp
index 139bbebaa3f43..86e11e8b8fde6 100644
--- a/lldb/source/API/SBTypeFormat.cpp
+++ b/lldb/source/API/SBTypeFormat.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBTypeFormat.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBStream.h"
 
@@ -16,41 +16,39 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBTypeFormat::SBTypeFormat() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeFormat); }
+SBTypeFormat::SBTypeFormat() { LLDB_INSTRUMENT_VA(this); }
 
 SBTypeFormat::SBTypeFormat(lldb::Format format, uint32_t options)
     : m_opaque_sp(
           TypeFormatImplSP(new TypeFormatImpl_Format(format, options))) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeFormat, (lldb::Format, uint32_t), format,
-                          options);
+  LLDB_INSTRUMENT_VA(this, format, options);
 }
 
 SBTypeFormat::SBTypeFormat(const char *type, uint32_t options)
     : m_opaque_sp(TypeFormatImplSP(new TypeFormatImpl_EnumType(
           ConstString(type ? type : ""), options))) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeFormat, (const char *, uint32_t), type,
-                          options);
+  LLDB_INSTRUMENT_VA(this, type, options);
 }
 
 SBTypeFormat::SBTypeFormat(const lldb::SBTypeFormat &rhs)
     : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeFormat, (const lldb::SBTypeFormat &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBTypeFormat::~SBTypeFormat() = default;
 
 bool SBTypeFormat::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeFormat, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBTypeFormat::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeFormat, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get() != nullptr;
 }
 
 lldb::Format SBTypeFormat::GetFormat() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::Format, SBTypeFormat, GetFormat);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid() && m_opaque_sp->GetType() == TypeFormatImpl::Type::eTypeFormat)
     return ((TypeFormatImpl_Format *)m_opaque_sp.get())->GetFormat();
@@ -58,7 +56,7 @@ lldb::Format SBTypeFormat::GetFormat() {
 }
 
 const char *SBTypeFormat::GetTypeName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeFormat, GetTypeName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid() && m_opaque_sp->GetType() == TypeFormatImpl::Type::eTypeEnum)
     return ((TypeFormatImpl_EnumType *)m_opaque_sp.get())
@@ -68,7 +66,7 @@ const char *SBTypeFormat::GetTypeName() {
 }
 
 uint32_t SBTypeFormat::GetOptions() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeFormat, GetOptions);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     return m_opaque_sp->GetOptions();
@@ -76,14 +74,14 @@ uint32_t SBTypeFormat::GetOptions() {
 }
 
 void SBTypeFormat::SetFormat(lldb::Format fmt) {
-  LLDB_RECORD_METHOD(void, SBTypeFormat, SetFormat, (lldb::Format), fmt);
+  LLDB_INSTRUMENT_VA(this, fmt);
 
   if (CopyOnWrite_Impl(Type::eTypeFormat))
     ((TypeFormatImpl_Format *)m_opaque_sp.get())->SetFormat(fmt);
 }
 
 void SBTypeFormat::SetTypeName(const char *type) {
-  LLDB_RECORD_METHOD(void, SBTypeFormat, SetTypeName, (const char *), type);
+  LLDB_INSTRUMENT_VA(this, type);
 
   if (CopyOnWrite_Impl(Type::eTypeEnum))
     ((TypeFormatImpl_EnumType *)m_opaque_sp.get())
@@ -91,7 +89,7 @@ void SBTypeFormat::SetTypeName(const char *type) {
 }
 
 void SBTypeFormat::SetOptions(uint32_t value) {
-  LLDB_RECORD_METHOD(void, SBTypeFormat, SetOptions, (uint32_t), value);
+  LLDB_INSTRUMENT_VA(this, value);
 
   if (CopyOnWrite_Impl(Type::eTypeKeepSame))
     m_opaque_sp->SetOptions(value);
@@ -99,9 +97,7 @@ void SBTypeFormat::SetOptions(uint32_t value) {
 
 bool SBTypeFormat::GetDescription(lldb::SBStream &description,
                                   lldb::DescriptionLevel description_level) {
-  LLDB_RECORD_METHOD(bool, SBTypeFormat, GetDescription,
-                     (lldb::SBStream &, lldb::DescriptionLevel), description,
-                     description_level);
+  LLDB_INSTRUMENT_VA(this, description, description_level);
 
   if (!IsValid())
     return false;
@@ -112,8 +108,7 @@ bool SBTypeFormat::GetDescription(lldb::SBStream &description,
 }
 
 lldb::SBTypeFormat &SBTypeFormat::operator=(const lldb::SBTypeFormat &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBTypeFormat &,
-                     SBTypeFormat, operator=,(const lldb::SBTypeFormat &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_opaque_sp = rhs.m_opaque_sp;
@@ -122,8 +117,7 @@ lldb::SBTypeFormat &SBTypeFormat::operator=(const lldb::SBTypeFormat &rhs) {
 }
 
 bool SBTypeFormat::operator==(lldb::SBTypeFormat &rhs) {
-  LLDB_RECORD_METHOD(bool, SBTypeFormat, operator==,(lldb::SBTypeFormat &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
@@ -131,8 +125,7 @@ bool SBTypeFormat::operator==(lldb::SBTypeFormat &rhs) {
 }
 
 bool SBTypeFormat::IsEqualTo(lldb::SBTypeFormat &rhs) {
-  LLDB_RECORD_METHOD(bool, SBTypeFormat, IsEqualTo, (lldb::SBTypeFormat &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
@@ -144,8 +137,7 @@ bool SBTypeFormat::IsEqualTo(lldb::SBTypeFormat &rhs) {
 }
 
 bool SBTypeFormat::operator!=(lldb::SBTypeFormat &rhs) {
-  LLDB_RECORD_METHOD(bool, SBTypeFormat, operator!=,(lldb::SBTypeFormat &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
diff --git a/lldb/source/API/SBTypeNameSpecifier.cpp b/lldb/source/API/SBTypeNameSpecifier.cpp
index 3b9dd8300c873..bc83a1d664d0d 100644
--- a/lldb/source/API/SBTypeNameSpecifier.cpp
+++ b/lldb/source/API/SBTypeNameSpecifier.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBTypeNameSpecifier.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBStream.h"
 #include "lldb/API/SBType.h"
@@ -17,21 +17,18 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBTypeNameSpecifier::SBTypeNameSpecifier() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeNameSpecifier);
-}
+SBTypeNameSpecifier::SBTypeNameSpecifier() { LLDB_INSTRUMENT_VA(this); }
 
 SBTypeNameSpecifier::SBTypeNameSpecifier(const char *name, bool is_regex)
     : m_opaque_sp(new TypeNameSpecifierImpl(name, is_regex)) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeNameSpecifier, (const char *, bool), name,
-                          is_regex);
+  LLDB_INSTRUMENT_VA(this, name, is_regex);
 
   if (name == nullptr || (*name) == 0)
     m_opaque_sp.reset();
 }
 
 SBTypeNameSpecifier::SBTypeNameSpecifier(SBType type) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeNameSpecifier, (lldb::SBType), type);
+  LLDB_INSTRUMENT_VA(this, type);
 
   if (type.IsValid())
     m_opaque_sp = TypeNameSpecifierImplSP(
@@ -40,24 +37,23 @@ SBTypeNameSpecifier::SBTypeNameSpecifier(SBType type) {
 
 SBTypeNameSpecifier::SBTypeNameSpecifier(const lldb::SBTypeNameSpecifier &rhs)
     : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeNameSpecifier,
-                          (const lldb::SBTypeNameSpecifier &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBTypeNameSpecifier::~SBTypeNameSpecifier() = default;
 
 bool SBTypeNameSpecifier::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeNameSpecifier, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBTypeNameSpecifier::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeNameSpecifier, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get() != nullptr;
 }
 
 const char *SBTypeNameSpecifier::GetName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeNameSpecifier, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return nullptr;
@@ -66,7 +62,7 @@ const char *SBTypeNameSpecifier::GetName() {
 }
 
 SBType SBTypeNameSpecifier::GetType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBTypeNameSpecifier, GetType);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return SBType();
@@ -77,7 +73,7 @@ SBType SBTypeNameSpecifier::GetType() {
 }
 
 bool SBTypeNameSpecifier::IsRegex() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeNameSpecifier, IsRegex);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -87,9 +83,7 @@ bool SBTypeNameSpecifier::IsRegex() {
 
 bool SBTypeNameSpecifier::GetDescription(
     lldb::SBStream &description, lldb::DescriptionLevel description_level) {
-  LLDB_RECORD_METHOD(bool, SBTypeNameSpecifier, GetDescription,
-                     (lldb::SBStream &, lldb::DescriptionLevel), description,
-                     description_level);
+  LLDB_INSTRUMENT_VA(this, description, description_level);
 
   if (!IsValid())
     return false;
@@ -100,9 +94,7 @@ bool SBTypeNameSpecifier::GetDescription(
 
 lldb::SBTypeNameSpecifier &SBTypeNameSpecifier::
 operator=(const lldb::SBTypeNameSpecifier &rhs) {
-  LLDB_RECORD_METHOD(
-      lldb::SBTypeNameSpecifier &,
-      SBTypeNameSpecifier, operator=,(const lldb::SBTypeNameSpecifier &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_opaque_sp = rhs.m_opaque_sp;
@@ -111,8 +103,7 @@ operator=(const lldb::SBTypeNameSpecifier &rhs) {
 }
 
 bool SBTypeNameSpecifier::operator==(lldb::SBTypeNameSpecifier &rhs) {
-  LLDB_RECORD_METHOD(
-      bool, SBTypeNameSpecifier, operator==,(lldb::SBTypeNameSpecifier &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
@@ -120,8 +111,7 @@ bool SBTypeNameSpecifier::operator==(lldb::SBTypeNameSpecifier &rhs) {
 }
 
 bool SBTypeNameSpecifier::IsEqualTo(lldb::SBTypeNameSpecifier &rhs) {
-  LLDB_RECORD_METHOD(bool, SBTypeNameSpecifier, IsEqualTo,
-                     (lldb::SBTypeNameSpecifier &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
@@ -135,8 +125,7 @@ bool SBTypeNameSpecifier::IsEqualTo(lldb::SBTypeNameSpecifier &rhs) {
 }
 
 bool SBTypeNameSpecifier::operator!=(lldb::SBTypeNameSpecifier &rhs) {
-  LLDB_RECORD_METHOD(
-      bool, SBTypeNameSpecifier, operator!=,(lldb::SBTypeNameSpecifier &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
diff --git a/lldb/source/API/SBTypeSummary.cpp b/lldb/source/API/SBTypeSummary.cpp
index af0888078d8fc..a65dfc987ad21 100644
--- a/lldb/source/API/SBTypeSummary.cpp
+++ b/lldb/source/API/SBTypeSummary.cpp
@@ -7,11 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBTypeSummary.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "Utils.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/API/SBValue.h"
 #include "lldb/DataFormatters/DataVisualization.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "llvm/Support/Casting.h"
 
@@ -19,15 +19,14 @@ using namespace lldb;
 using namespace lldb_private;
 
 SBTypeSummaryOptions::SBTypeSummaryOptions() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeSummaryOptions);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_up = std::make_unique<TypeSummaryOptions>();
 }
 
 SBTypeSummaryOptions::SBTypeSummaryOptions(
     const lldb::SBTypeSummaryOptions &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeSummaryOptions,
-                          (const lldb::SBTypeSummaryOptions &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_up = clone(rhs.m_opaque_up);
 }
@@ -35,18 +34,17 @@ SBTypeSummaryOptions::SBTypeSummaryOptions(
 SBTypeSummaryOptions::~SBTypeSummaryOptions() = default;
 
 bool SBTypeSummaryOptions::IsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeSummaryOptions, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBTypeSummaryOptions::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeSummaryOptions, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up.get();
 }
 
 lldb::LanguageType SBTypeSummaryOptions::GetLanguage() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::LanguageType, SBTypeSummaryOptions,
-                             GetLanguage);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     return m_opaque_up->GetLanguage();
@@ -54,8 +52,7 @@ lldb::LanguageType SBTypeSummaryOptions::GetLanguage() {
 }
 
 lldb::TypeSummaryCapping SBTypeSummaryOptions::GetCapping() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::TypeSummaryCapping, SBTypeSummaryOptions,
-                             GetCapping);
+  LLDB_INSTRUMENT_VA(this);
 
   if (IsValid())
     return m_opaque_up->GetCapping();
@@ -63,16 +60,14 @@ lldb::TypeSummaryCapping SBTypeSummaryOptions::GetCapping() {
 }
 
 void SBTypeSummaryOptions::SetLanguage(lldb::LanguageType l) {
-  LLDB_RECORD_METHOD(void, SBTypeSummaryOptions, SetLanguage,
-                     (lldb::LanguageType), l);
+  LLDB_INSTRUMENT_VA(this, l);
 
   if (IsValid())
     m_opaque_up->SetLanguage(l);
 }
 
 void SBTypeSummaryOptions::SetCapping(lldb::TypeSummaryCapping c) {
-  LLDB_RECORD_METHOD(void, SBTypeSummaryOptions, SetCapping,
-                     (lldb::TypeSummaryCapping), c);
+  LLDB_INSTRUMENT_VA(this, c);
 
   if (IsValid())
     m_opaque_up->SetCapping(c);
@@ -102,20 +97,14 @@ const lldb_private::TypeSummaryOptions &SBTypeSummaryOptions::ref() const {
 SBTypeSummaryOptions::SBTypeSummaryOptions(
     const lldb_private::TypeSummaryOptions &lldb_object)
     : m_opaque_up(std::make_unique<TypeSummaryOptions>(lldb_object)) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeSummaryOptions,
-                          (const lldb_private::TypeSummaryOptions &),
-                          lldb_object);
+  LLDB_INSTRUMENT_VA(this, lldb_object);
 }
 
-SBTypeSummary::SBTypeSummary() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeSummary);
-}
+SBTypeSummary::SBTypeSummary() { LLDB_INSTRUMENT_VA(this); }
 
 SBTypeSummary SBTypeSummary::CreateWithSummaryString(const char *data,
                                                      uint32_t options) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBTypeSummary, SBTypeSummary,
-                            CreateWithSummaryString, (const char *, uint32_t),
-                            data, options);
+  LLDB_INSTRUMENT_VA(data, options);
 
   if (!data || data[0] == 0)
     return SBTypeSummary();
@@ -126,9 +115,7 @@ SBTypeSummary SBTypeSummary::CreateWithSummaryString(const char *data,
 
 SBTypeSummary SBTypeSummary::CreateWithFunctionName(const char *data,
                                                     uint32_t options) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBTypeSummary, SBTypeSummary,
-                            CreateWithFunctionName, (const char *, uint32_t),
-                            data, options);
+  LLDB_INSTRUMENT_VA(data, options);
 
   if (!data || data[0] == 0)
     return SBTypeSummary();
@@ -139,9 +126,7 @@ SBTypeSummary SBTypeSummary::CreateWithFunctionName(const char *data,
 
 SBTypeSummary SBTypeSummary::CreateWithScriptCode(const char *data,
                                                   uint32_t options) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBTypeSummary, SBTypeSummary,
-                            CreateWithScriptCode, (const char *, uint32_t),
-                            data, options);
+  LLDB_INSTRUMENT_VA(data, options);
 
   if (!data || data[0] == 0)
     return SBTypeSummary();
@@ -153,10 +138,7 @@ SBTypeSummary SBTypeSummary::CreateWithScriptCode(const char *data,
 SBTypeSummary SBTypeSummary::CreateWithCallback(FormatCallback cb,
                                                 uint32_t options,
                                                 const char *description) {
-  LLDB_RECORD_STATIC_METHOD(
-      lldb::SBTypeSummary, SBTypeSummary, CreateWithCallback,
-      (lldb::SBTypeSummary::FormatCallback, uint32_t, const char *), cb,
-      options, description);
+  LLDB_INSTRUMENT_VA(cb, options, description);
 
   SBTypeSummary retval;
   if (cb) {
@@ -180,23 +162,23 @@ SBTypeSummary SBTypeSummary::CreateWithCallback(FormatCallback cb,
 
 SBTypeSummary::SBTypeSummary(const lldb::SBTypeSummary &rhs)
     : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeSummary, (const lldb::SBTypeSummary &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBTypeSummary::~SBTypeSummary() = default;
 
 bool SBTypeSummary::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeSummary, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBTypeSummary::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeSummary, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get() != nullptr;
 }
 
 bool SBTypeSummary::IsFunctionCode() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeSummary, IsFunctionCode);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -209,7 +191,7 @@ bool SBTypeSummary::IsFunctionCode() {
 }
 
 bool SBTypeSummary::IsFunctionName() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeSummary, IsFunctionName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -222,7 +204,7 @@ bool SBTypeSummary::IsFunctionName() {
 }
 
 bool SBTypeSummary::IsSummaryString() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeSummary, IsSummaryString);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -231,7 +213,7 @@ bool SBTypeSummary::IsSummaryString() {
 }
 
 const char *SBTypeSummary::GetData() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeSummary, GetData);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return nullptr;
@@ -249,7 +231,7 @@ const char *SBTypeSummary::GetData() {
 }
 
 uint32_t SBTypeSummary::GetOptions() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeSummary, GetOptions);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return lldb::eTypeOptionNone;
@@ -257,7 +239,7 @@ uint32_t SBTypeSummary::GetOptions() {
 }
 
 void SBTypeSummary::SetOptions(uint32_t value) {
-  LLDB_RECORD_METHOD(void, SBTypeSummary, SetOptions, (uint32_t), value);
+  LLDB_INSTRUMENT_VA(this, value);
 
   if (!CopyOnWrite_Impl())
     return;
@@ -265,8 +247,7 @@ void SBTypeSummary::SetOptions(uint32_t value) {
 }
 
 void SBTypeSummary::SetSummaryString(const char *data) {
-  LLDB_RECORD_METHOD(void, SBTypeSummary, SetSummaryString, (const char *),
-                     data);
+  LLDB_INSTRUMENT_VA(this, data);
 
   if (!IsValid())
     return;
@@ -278,8 +259,7 @@ void SBTypeSummary::SetSummaryString(const char *data) {
 }
 
 void SBTypeSummary::SetFunctionName(const char *data) {
-  LLDB_RECORD_METHOD(void, SBTypeSummary, SetFunctionName, (const char *),
-                     data);
+  LLDB_INSTRUMENT_VA(this, data);
 
   if (!IsValid())
     return;
@@ -291,8 +271,7 @@ void SBTypeSummary::SetFunctionName(const char *data) {
 }
 
 void SBTypeSummary::SetFunctionCode(const char *data) {
-  LLDB_RECORD_METHOD(void, SBTypeSummary, SetFunctionCode, (const char *),
-                     data);
+  LLDB_INSTRUMENT_VA(this, data);
 
   if (!IsValid())
     return;
@@ -305,9 +284,7 @@ void SBTypeSummary::SetFunctionCode(const char *data) {
 
 bool SBTypeSummary::GetDescription(lldb::SBStream &description,
                                    lldb::DescriptionLevel description_level) {
-  LLDB_RECORD_METHOD(bool, SBTypeSummary, GetDescription,
-                     (lldb::SBStream &, lldb::DescriptionLevel), description,
-                     description_level);
+  LLDB_INSTRUMENT_VA(this, description, description_level);
 
   if (!CopyOnWrite_Impl())
     return false;
@@ -318,8 +295,7 @@ bool SBTypeSummary::GetDescription(lldb::SBStream &description,
 }
 
 bool SBTypeSummary::DoesPrintValue(lldb::SBValue value) {
-  LLDB_RECORD_METHOD(bool, SBTypeSummary, DoesPrintValue, (lldb::SBValue),
-                     value);
+  LLDB_INSTRUMENT_VA(this, value);
 
   if (!IsValid())
     return false;
@@ -328,9 +304,7 @@ bool SBTypeSummary::DoesPrintValue(lldb::SBValue value) {
 }
 
 lldb::SBTypeSummary &SBTypeSummary::operator=(const lldb::SBTypeSummary &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBTypeSummary &,
-                     SBTypeSummary, operator=,(const lldb::SBTypeSummary &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_opaque_sp = rhs.m_opaque_sp;
@@ -339,8 +313,7 @@ lldb::SBTypeSummary &SBTypeSummary::operator=(const lldb::SBTypeSummary &rhs) {
 }
 
 bool SBTypeSummary::operator==(lldb::SBTypeSummary &rhs) {
-  LLDB_RECORD_METHOD(bool, SBTypeSummary, operator==,(lldb::SBTypeSummary &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
@@ -348,8 +321,7 @@ bool SBTypeSummary::operator==(lldb::SBTypeSummary &rhs) {
 }
 
 bool SBTypeSummary::IsEqualTo(lldb::SBTypeSummary &rhs) {
-  LLDB_RECORD_METHOD(bool, SBTypeSummary, IsEqualTo, (lldb::SBTypeSummary &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (IsValid()) {
     // valid and invalid are different
@@ -389,8 +361,7 @@ bool SBTypeSummary::IsEqualTo(lldb::SBTypeSummary &rhs) {
 }
 
 bool SBTypeSummary::operator!=(lldb::SBTypeSummary &rhs) {
-  LLDB_RECORD_METHOD(bool, SBTypeSummary, operator!=,(lldb::SBTypeSummary &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
diff --git a/lldb/source/API/SBTypeSynthetic.cpp b/lldb/source/API/SBTypeSynthetic.cpp
index 5b8a03b27077d..7258ff04745de 100644
--- a/lldb/source/API/SBTypeSynthetic.cpp
+++ b/lldb/source/API/SBTypeSynthetic.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBTypeSynthetic.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBStream.h"
 
@@ -16,15 +16,11 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBTypeSynthetic::SBTypeSynthetic() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBTypeSynthetic);
-}
+SBTypeSynthetic::SBTypeSynthetic() { LLDB_INSTRUMENT_VA(this); }
 
 SBTypeSynthetic SBTypeSynthetic::CreateWithClassName(const char *data,
                                                      uint32_t options) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBTypeSynthetic, SBTypeSynthetic,
-                            CreateWithClassName, (const char *, uint32_t), data,
-                            options);
+  LLDB_INSTRUMENT_VA(data, options);
 
   if (!data || data[0] == 0)
     return SBTypeSynthetic();
@@ -34,9 +30,7 @@ SBTypeSynthetic SBTypeSynthetic::CreateWithClassName(const char *data,
 
 SBTypeSynthetic SBTypeSynthetic::CreateWithScriptCode(const char *data,
                                                       uint32_t options) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBTypeSynthetic, SBTypeSynthetic,
-                            CreateWithScriptCode, (const char *, uint32_t),
-                            data, options);
+  LLDB_INSTRUMENT_VA(data, options);
 
   if (!data || data[0] == 0)
     return SBTypeSynthetic();
@@ -46,24 +40,23 @@ SBTypeSynthetic SBTypeSynthetic::CreateWithScriptCode(const char *data,
 
 SBTypeSynthetic::SBTypeSynthetic(const lldb::SBTypeSynthetic &rhs)
     : m_opaque_sp(rhs.m_opaque_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBTypeSynthetic, (const lldb::SBTypeSynthetic &),
-                          rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBTypeSynthetic::~SBTypeSynthetic() = default;
 
 bool SBTypeSynthetic::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeSynthetic, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBTypeSynthetic::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBTypeSynthetic, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_sp.get() != nullptr;
 }
 
 bool SBTypeSynthetic::IsClassCode() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeSynthetic, IsClassCode);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -72,7 +65,7 @@ bool SBTypeSynthetic::IsClassCode() {
 }
 
 bool SBTypeSynthetic::IsClassName() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBTypeSynthetic, IsClassName);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -80,7 +73,7 @@ bool SBTypeSynthetic::IsClassName() {
 }
 
 const char *SBTypeSynthetic::GetData() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBTypeSynthetic, GetData);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return nullptr;
@@ -91,21 +84,21 @@ const char *SBTypeSynthetic::GetData() {
 }
 
 void SBTypeSynthetic::SetClassName(const char *data) {
-  LLDB_RECORD_METHOD(void, SBTypeSynthetic, SetClassName, (const char *), data);
+  LLDB_INSTRUMENT_VA(this, data);
 
   if (IsValid() && data && *data)
     m_opaque_sp->SetPythonClassName(data);
 }
 
 void SBTypeSynthetic::SetClassCode(const char *data) {
-  LLDB_RECORD_METHOD(void, SBTypeSynthetic, SetClassCode, (const char *), data);
+  LLDB_INSTRUMENT_VA(this, data);
 
   if (IsValid() && data && *data)
     m_opaque_sp->SetPythonCode(data);
 }
 
 uint32_t SBTypeSynthetic::GetOptions() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBTypeSynthetic, GetOptions);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return lldb::eTypeOptionNone;
@@ -113,7 +106,7 @@ uint32_t SBTypeSynthetic::GetOptions() {
 }
 
 void SBTypeSynthetic::SetOptions(uint32_t value) {
-  LLDB_RECORD_METHOD(void, SBTypeSynthetic, SetOptions, (uint32_t), value);
+  LLDB_INSTRUMENT_VA(this, value);
 
   if (!CopyOnWrite_Impl())
     return;
@@ -122,9 +115,7 @@ void SBTypeSynthetic::SetOptions(uint32_t value) {
 
 bool SBTypeSynthetic::GetDescription(lldb::SBStream &description,
                                      lldb::DescriptionLevel description_level) {
-  LLDB_RECORD_METHOD(bool, SBTypeSynthetic, GetDescription,
-                     (lldb::SBStream &, lldb::DescriptionLevel), description,
-                     description_level);
+  LLDB_INSTRUMENT_VA(this, description, description_level);
 
   if (m_opaque_sp) {
     description.Printf("%s\n", m_opaque_sp->GetDescription().c_str());
@@ -135,9 +126,7 @@ bool SBTypeSynthetic::GetDescription(lldb::SBStream &description,
 
 lldb::SBTypeSynthetic &SBTypeSynthetic::
 operator=(const lldb::SBTypeSynthetic &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBTypeSynthetic &,
-                     SBTypeSynthetic, operator=,(const lldb::SBTypeSynthetic &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     m_opaque_sp = rhs.m_opaque_sp;
@@ -146,8 +135,7 @@ operator=(const lldb::SBTypeSynthetic &rhs) {
 }
 
 bool SBTypeSynthetic::operator==(lldb::SBTypeSynthetic &rhs) {
-  LLDB_RECORD_METHOD(
-      bool, SBTypeSynthetic, operator==,(lldb::SBTypeSynthetic &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
@@ -155,8 +143,7 @@ bool SBTypeSynthetic::operator==(lldb::SBTypeSynthetic &rhs) {
 }
 
 bool SBTypeSynthetic::IsEqualTo(lldb::SBTypeSynthetic &rhs) {
-  LLDB_RECORD_METHOD(bool, SBTypeSynthetic, IsEqualTo,
-                     (lldb::SBTypeSynthetic &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
@@ -174,8 +161,7 @@ bool SBTypeSynthetic::IsEqualTo(lldb::SBTypeSynthetic &rhs) {
 }
 
 bool SBTypeSynthetic::operator!=(lldb::SBTypeSynthetic &rhs) {
-  LLDB_RECORD_METHOD(
-      bool, SBTypeSynthetic, operator!=,(lldb::SBTypeSynthetic &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (!IsValid())
     return !rhs.IsValid();
diff --git a/lldb/source/API/SBUnixSignals.cpp b/lldb/source/API/SBUnixSignals.cpp
index 6ee7afb04e7b6..dc7a68255d131 100644
--- a/lldb/source/API/SBUnixSignals.cpp
+++ b/lldb/source/API/SBUnixSignals.cpp
@@ -6,10 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/Target/Platform.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/UnixSignals.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/lldb-defines.h"
 
 #include "lldb/API/SBUnixSignals.h"
@@ -17,13 +17,11 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBUnixSignals::SBUnixSignals() {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBUnixSignals);
-}
+SBUnixSignals::SBUnixSignals() { LLDB_INSTRUMENT_VA(this); }
 
 SBUnixSignals::SBUnixSignals(const SBUnixSignals &rhs)
     : m_opaque_wp(rhs.m_opaque_wp) {
-  LLDB_RECORD_CONSTRUCTOR(SBUnixSignals, (const lldb::SBUnixSignals &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 SBUnixSignals::SBUnixSignals(ProcessSP &process_sp)
@@ -33,9 +31,7 @@ SBUnixSignals::SBUnixSignals(PlatformSP &platform_sp)
     : m_opaque_wp(platform_sp ? platform_sp->GetUnixSignals() : nullptr) {}
 
 const SBUnixSignals &SBUnixSignals::operator=(const SBUnixSignals &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBUnixSignals &,
-                     SBUnixSignals, operator=,(const lldb::SBUnixSignals &),
-                     rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs)
     m_opaque_wp = rhs.m_opaque_wp;
@@ -51,24 +47,23 @@ void SBUnixSignals::SetSP(const UnixSignalsSP &signals_sp) {
 }
 
 void SBUnixSignals::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBUnixSignals, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_wp.reset();
 }
 
 bool SBUnixSignals::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBUnixSignals, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBUnixSignals::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBUnixSignals, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return static_cast<bool>(GetSP());
 }
 
 const char *SBUnixSignals::GetSignalAsCString(int32_t signo) const {
-  LLDB_RECORD_METHOD_CONST(const char *, SBUnixSignals, GetSignalAsCString,
-                           (int32_t), signo);
+  LLDB_INSTRUMENT_VA(this, signo);
 
   if (auto signals_sp = GetSP())
     return signals_sp->GetSignalAsCString(signo);
@@ -77,8 +72,7 @@ const char *SBUnixSignals::GetSignalAsCString(int32_t signo) const {
 }
 
 int32_t SBUnixSignals::GetSignalNumberFromName(const char *name) const {
-  LLDB_RECORD_METHOD_CONST(int32_t, SBUnixSignals, GetSignalNumberFromName,
-                           (const char *), name);
+  LLDB_INSTRUMENT_VA(this, name);
 
   if (auto signals_sp = GetSP())
     return signals_sp->GetSignalNumberFromName(name);
@@ -87,8 +81,7 @@ int32_t SBUnixSignals::GetSignalNumberFromName(const char *name) const {
 }
 
 bool SBUnixSignals::GetShouldSuppress(int32_t signo) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBUnixSignals, GetShouldSuppress, (int32_t),
-                           signo);
+  LLDB_INSTRUMENT_VA(this, signo);
 
   if (auto signals_sp = GetSP())
     return signals_sp->GetShouldSuppress(signo);
@@ -97,8 +90,7 @@ bool SBUnixSignals::GetShouldSuppress(int32_t signo) const {
 }
 
 bool SBUnixSignals::SetShouldSuppress(int32_t signo, bool value) {
-  LLDB_RECORD_METHOD(bool, SBUnixSignals, SetShouldSuppress, (int32_t, bool),
-                     signo, value);
+  LLDB_INSTRUMENT_VA(this, signo, value);
 
   auto signals_sp = GetSP();
 
@@ -109,8 +101,7 @@ bool SBUnixSignals::SetShouldSuppress(int32_t signo, bool value) {
 }
 
 bool SBUnixSignals::GetShouldStop(int32_t signo) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBUnixSignals, GetShouldStop, (int32_t),
-                           signo);
+  LLDB_INSTRUMENT_VA(this, signo);
 
   if (auto signals_sp = GetSP())
     return signals_sp->GetShouldStop(signo);
@@ -119,8 +110,7 @@ bool SBUnixSignals::GetShouldStop(int32_t signo) const {
 }
 
 bool SBUnixSignals::SetShouldStop(int32_t signo, bool value) {
-  LLDB_RECORD_METHOD(bool, SBUnixSignals, SetShouldStop, (int32_t, bool), signo,
-                     value);
+  LLDB_INSTRUMENT_VA(this, signo, value);
 
   auto signals_sp = GetSP();
 
@@ -131,8 +121,7 @@ bool SBUnixSignals::SetShouldStop(int32_t signo, bool value) {
 }
 
 bool SBUnixSignals::GetShouldNotify(int32_t signo) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBUnixSignals, GetShouldNotify, (int32_t),
-                           signo);
+  LLDB_INSTRUMENT_VA(this, signo);
 
   if (auto signals_sp = GetSP())
     return signals_sp->GetShouldNotify(signo);
@@ -141,8 +130,7 @@ bool SBUnixSignals::GetShouldNotify(int32_t signo) const {
 }
 
 bool SBUnixSignals::SetShouldNotify(int32_t signo, bool value) {
-  LLDB_RECORD_METHOD(bool, SBUnixSignals, SetShouldNotify, (int32_t, bool),
-                     signo, value);
+  LLDB_INSTRUMENT_VA(this, signo, value);
 
   auto signals_sp = GetSP();
 
@@ -153,7 +141,7 @@ bool SBUnixSignals::SetShouldNotify(int32_t signo, bool value) {
 }
 
 int32_t SBUnixSignals::GetNumSignals() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(int32_t, SBUnixSignals, GetNumSignals);
+  LLDB_INSTRUMENT_VA(this);
 
   if (auto signals_sp = GetSP())
     return signals_sp->GetNumSignals();
@@ -162,8 +150,7 @@ int32_t SBUnixSignals::GetNumSignals() const {
 }
 
 int32_t SBUnixSignals::GetSignalAtIndex(int32_t index) const {
-  LLDB_RECORD_METHOD_CONST(int32_t, SBUnixSignals, GetSignalAtIndex, (int32_t),
-                           index);
+  LLDB_INSTRUMENT_VA(this, index);
 
   if (auto signals_sp = GetSP())
     return signals_sp->GetSignalAtIndex(index);
diff --git a/lldb/source/API/SBValue.cpp b/lldb/source/API/SBValue.cpp
index 151ccba8381e1..20581cfabdd66 100644
--- a/lldb/source/API/SBValue.cpp
+++ b/lldb/source/API/SBValue.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBValue.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/API/SBDeclaration.h"
 #include "lldb/API/SBStream.h"
@@ -215,23 +215,22 @@ class ValueLocker {
   Status m_lock_error;
 };
 
-SBValue::SBValue() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBValue); }
+SBValue::SBValue() { LLDB_INSTRUMENT_VA(this); }
 
 SBValue::SBValue(const lldb::ValueObjectSP &value_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBValue, (const lldb::ValueObjectSP &), value_sp);
+  LLDB_INSTRUMENT_VA(this, value_sp);
 
   SetSP(value_sp);
 }
 
 SBValue::SBValue(const SBValue &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBValue, (const lldb::SBValue &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   SetSP(rhs.m_opaque_sp);
 }
 
 SBValue &SBValue::operator=(const SBValue &rhs) {
-  LLDB_RECORD_METHOD(lldb::SBValue &,
-                     SBValue, operator=,(const lldb::SBValue &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     SetSP(rhs.m_opaque_sp);
@@ -242,11 +241,11 @@ SBValue &SBValue::operator=(const SBValue &rhs) {
 SBValue::~SBValue() = default;
 
 bool SBValue::IsValid() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBValue::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBValue, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   // If this function ever changes to anything that does more than just check
   // if the opaque shared pointer is non NULL, then we need to update all "if
@@ -256,13 +255,13 @@ SBValue::operator bool() const {
 }
 
 void SBValue::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBValue, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_sp.reset();
 }
 
 SBError SBValue::GetError() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBValue, GetError);
+  LLDB_INSTRUMENT_VA(this);
 
   SBError sb_error;
 
@@ -278,7 +277,7 @@ SBError SBValue::GetError() {
 }
 
 user_id_t SBValue::GetID() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::user_id_t, SBValue, GetID);
+  LLDB_INSTRUMENT_VA(this);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -288,7 +287,7 @@ user_id_t SBValue::GetID() {
 }
 
 const char *SBValue::GetName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *name = nullptr;
   ValueLocker locker;
@@ -300,7 +299,7 @@ const char *SBValue::GetName() {
 }
 
 const char *SBValue::GetTypeName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetTypeName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *name = nullptr;
   ValueLocker locker;
@@ -313,7 +312,7 @@ const char *SBValue::GetTypeName() {
 }
 
 const char *SBValue::GetDisplayTypeName() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetDisplayTypeName);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *name = nullptr;
   ValueLocker locker;
@@ -326,7 +325,7 @@ const char *SBValue::GetDisplayTypeName() {
 }
 
 size_t SBValue::GetByteSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBValue, GetByteSize);
+  LLDB_INSTRUMENT_VA(this);
 
   size_t result = 0;
 
@@ -340,7 +339,7 @@ size_t SBValue::GetByteSize() {
 }
 
 bool SBValue::IsInScope() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, IsInScope);
+  LLDB_INSTRUMENT_VA(this);
 
   bool result = false;
 
@@ -354,7 +353,7 @@ bool SBValue::IsInScope() {
 }
 
 const char *SBValue::GetValue() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetValue);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *cstr = nullptr;
   ValueLocker locker;
@@ -367,7 +366,7 @@ const char *SBValue::GetValue() {
 }
 
 ValueType SBValue::GetValueType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::ValueType, SBValue, GetValueType);
+  LLDB_INSTRUMENT_VA(this);
 
   ValueType result = eValueTypeInvalid;
   ValueLocker locker;
@@ -379,7 +378,7 @@ ValueType SBValue::GetValueType() {
 }
 
 const char *SBValue::GetObjectDescription() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetObjectDescription);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *cstr = nullptr;
   ValueLocker locker;
@@ -392,7 +391,7 @@ const char *SBValue::GetObjectDescription() {
 }
 
 SBType SBValue::GetType() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBType, SBValue, GetType);
+  LLDB_INSTRUMENT_VA(this);
 
   SBType sb_type;
   ValueLocker locker;
@@ -407,7 +406,7 @@ SBType SBValue::GetType() {
 }
 
 bool SBValue::GetValueDidChange() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, GetValueDidChange);
+  LLDB_INSTRUMENT_VA(this);
 
   bool result = false;
   ValueLocker locker;
@@ -421,7 +420,7 @@ bool SBValue::GetValueDidChange() {
 }
 
 const char *SBValue::GetSummary() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetSummary);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *cstr = nullptr;
   ValueLocker locker;
@@ -435,9 +434,7 @@ const char *SBValue::GetSummary() {
 
 const char *SBValue::GetSummary(lldb::SBStream &stream,
                                 lldb::SBTypeSummaryOptions &options) {
-  LLDB_RECORD_METHOD(const char *, SBValue, GetSummary,
-                     (lldb::SBStream &, lldb::SBTypeSummaryOptions &), stream,
-                     options);
+  LLDB_INSTRUMENT_VA(this, stream, options);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -451,7 +448,7 @@ const char *SBValue::GetSummary(lldb::SBStream &stream,
 }
 
 const char *SBValue::GetLocation() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBValue, GetLocation);
+  LLDB_INSTRUMENT_VA(this);
 
   const char *cstr = nullptr;
   ValueLocker locker;
@@ -464,16 +461,14 @@ const char *SBValue::GetLocation() {
 
 // Deprecated - use the one that takes an lldb::SBError
 bool SBValue::SetValueFromCString(const char *value_str) {
-  LLDB_RECORD_METHOD(bool, SBValue, SetValueFromCString, (const char *),
-                     value_str);
+  LLDB_INSTRUMENT_VA(this, value_str);
 
   lldb::SBError dummy;
   return SetValueFromCString(value_str, dummy);
 }
 
 bool SBValue::SetValueFromCString(const char *value_str, lldb::SBError &error) {
-  LLDB_RECORD_METHOD(bool, SBValue, SetValueFromCString,
-                     (const char *, lldb::SBError &), value_str, error);
+  LLDB_INSTRUMENT_VA(this, value_str, error);
 
   bool success = false;
   ValueLocker locker;
@@ -488,7 +483,7 @@ bool SBValue::SetValueFromCString(const char *value_str, lldb::SBError &error) {
 }
 
 lldb::SBTypeFormat SBValue::GetTypeFormat() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeFormat, SBValue, GetTypeFormat);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBTypeFormat format;
   ValueLocker locker;
@@ -504,7 +499,7 @@ lldb::SBTypeFormat SBValue::GetTypeFormat() {
 }
 
 lldb::SBTypeSummary SBValue::GetTypeSummary() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeSummary, SBValue, GetTypeSummary);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBTypeSummary summary;
   ValueLocker locker;
@@ -520,7 +515,7 @@ lldb::SBTypeSummary SBValue::GetTypeSummary() {
 }
 
 lldb::SBTypeFilter SBValue::GetTypeFilter() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeFilter, SBValue, GetTypeFilter);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBTypeFilter filter;
   ValueLocker locker;
@@ -540,7 +535,7 @@ lldb::SBTypeFilter SBValue::GetTypeFilter() {
 }
 
 lldb::SBTypeSynthetic SBValue::GetTypeSynthetic() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTypeSynthetic, SBValue, GetTypeSynthetic);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBTypeSynthetic synthetic;
   ValueLocker locker;
@@ -561,9 +556,7 @@ lldb::SBTypeSynthetic SBValue::GetTypeSynthetic() {
 
 lldb::SBValue SBValue::CreateChildAtOffset(const char *name, uint32_t offset,
                                            SBType type) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValue, CreateChildAtOffset,
-                     (const char *, uint32_t, lldb::SBType), name, offset,
-                     type);
+  LLDB_INSTRUMENT_VA(this, name, offset, type);
 
   lldb::SBValue sb_value;
   ValueLocker locker;
@@ -581,7 +574,7 @@ lldb::SBValue SBValue::CreateChildAtOffset(const char *name, uint32_t offset,
 }
 
 lldb::SBValue SBValue::Cast(SBType type) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValue, Cast, (lldb::SBType), type);
+  LLDB_INSTRUMENT_VA(this, type);
 
   lldb::SBValue sb_value;
   ValueLocker locker;
@@ -595,8 +588,7 @@ lldb::SBValue SBValue::Cast(SBType type) {
 
 lldb::SBValue SBValue::CreateValueFromExpression(const char *name,
                                                  const char *expression) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValue, CreateValueFromExpression,
-                     (const char *, const char *), name, expression);
+  LLDB_INSTRUMENT_VA(this, name, expression);
 
   SBExpressionOptions options;
   options.ref().SetKeepInMemory(true);
@@ -606,9 +598,7 @@ lldb::SBValue SBValue::CreateValueFromExpression(const char *name,
 lldb::SBValue SBValue::CreateValueFromExpression(const char *name,
                                                  const char *expression,
                                                  SBExpressionOptions &options) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValue, CreateValueFromExpression,
-                     (const char *, const char *, lldb::SBExpressionOptions &),
-                     name, expression, options);
+  LLDB_INSTRUMENT_VA(this, name, expression, options);
 
   lldb::SBValue sb_value;
   ValueLocker locker;
@@ -628,9 +618,7 @@ lldb::SBValue SBValue::CreateValueFromExpression(const char *name,
 lldb::SBValue SBValue::CreateValueFromAddress(const char *name,
                                               lldb::addr_t address,
                                               SBType sb_type) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValue, CreateValueFromAddress,
-                     (const char *, lldb::addr_t, lldb::SBType), name, address,
-                     sb_type);
+  LLDB_INSTRUMENT_VA(this, name, address, sb_type);
 
   lldb::SBValue sb_value;
   ValueLocker locker;
@@ -649,9 +637,7 @@ lldb::SBValue SBValue::CreateValueFromAddress(const char *name,
 
 lldb::SBValue SBValue::CreateValueFromData(const char *name, SBData data,
                                            SBType sb_type) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValue, CreateValueFromData,
-                     (const char *, lldb::SBData, lldb::SBType), name, data,
-                     sb_type);
+  LLDB_INSTRUMENT_VA(this, name, data, sb_type);
 
   lldb::SBValue sb_value;
   lldb::ValueObjectSP new_value_sp;
@@ -669,7 +655,7 @@ lldb::SBValue SBValue::CreateValueFromData(const char *name, SBData data,
 }
 
 SBValue SBValue::GetChildAtIndex(uint32_t idx) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValue, GetChildAtIndex, (uint32_t), idx);
+  LLDB_INSTRUMENT_VA(this, idx);
 
   const bool can_create_synthetic = false;
   lldb::DynamicValueType use_dynamic = eNoDynamicValues;
@@ -686,9 +672,7 @@ SBValue SBValue::GetChildAtIndex(uint32_t idx) {
 SBValue SBValue::GetChildAtIndex(uint32_t idx,
                                  lldb::DynamicValueType use_dynamic,
                                  bool can_create_synthetic) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValue, GetChildAtIndex,
-                     (uint32_t, lldb::DynamicValueType, bool), idx, use_dynamic,
-                     can_create_synthetic);
+  LLDB_INSTRUMENT_VA(this, idx, use_dynamic, can_create_synthetic);
 
   lldb::ValueObjectSP child_sp;
 
@@ -709,8 +693,7 @@ SBValue SBValue::GetChildAtIndex(uint32_t idx,
 }
 
 uint32_t SBValue::GetIndexOfChildWithName(const char *name) {
-  LLDB_RECORD_METHOD(uint32_t, SBValue, GetIndexOfChildWithName, (const char *),
-                     name);
+  LLDB_INSTRUMENT_VA(this, name);
 
   uint32_t idx = UINT32_MAX;
   ValueLocker locker;
@@ -722,8 +705,7 @@ uint32_t SBValue::GetIndexOfChildWithName(const char *name) {
 }
 
 SBValue SBValue::GetChildMemberWithName(const char *name) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValue, GetChildMemberWithName,
-                     (const char *), name);
+  LLDB_INSTRUMENT_VA(this, name);
 
   lldb::DynamicValueType use_dynamic_value = eNoDynamicValues;
   TargetSP target_sp;
@@ -738,9 +720,7 @@ SBValue SBValue::GetChildMemberWithName(const char *name) {
 SBValue
 SBValue::GetChildMemberWithName(const char *name,
                                 lldb::DynamicValueType use_dynamic_value) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValue, GetChildMemberWithName,
-                     (const char *, lldb::DynamicValueType), name,
-                     use_dynamic_value);
+  LLDB_INSTRUMENT_VA(this, name, use_dynamic_value);
 
   lldb::ValueObjectSP child_sp;
   const ConstString str_name(name);
@@ -758,8 +738,7 @@ SBValue::GetChildMemberWithName(const char *name,
 }
 
 lldb::SBValue SBValue::GetDynamicValue(lldb::DynamicValueType use_dynamic) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValue, GetDynamicValue,
-                     (lldb::DynamicValueType), use_dynamic);
+  LLDB_INSTRUMENT_VA(this, use_dynamic);
 
   SBValue value_sb;
   if (IsValid()) {
@@ -771,7 +750,7 @@ lldb::SBValue SBValue::GetDynamicValue(lldb::DynamicValueType use_dynamic) {
 }
 
 lldb::SBValue SBValue::GetStaticValue() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBValue, GetStaticValue);
+  LLDB_INSTRUMENT_VA(this);
 
   SBValue value_sb;
   if (IsValid()) {
@@ -784,7 +763,7 @@ lldb::SBValue SBValue::GetStaticValue() {
 }
 
 lldb::SBValue SBValue::GetNonSyntheticValue() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBValue, GetNonSyntheticValue);
+  LLDB_INSTRUMENT_VA(this);
 
   SBValue value_sb;
   if (IsValid()) {
@@ -796,8 +775,7 @@ lldb::SBValue SBValue::GetNonSyntheticValue() {
 }
 
 lldb::DynamicValueType SBValue::GetPreferDynamicValue() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::DynamicValueType, SBValue,
-                             GetPreferDynamicValue);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return eNoDynamicValues;
@@ -805,15 +783,14 @@ lldb::DynamicValueType SBValue::GetPreferDynamicValue() {
 }
 
 void SBValue::SetPreferDynamicValue(lldb::DynamicValueType use_dynamic) {
-  LLDB_RECORD_METHOD(void, SBValue, SetPreferDynamicValue,
-                     (lldb::DynamicValueType), use_dynamic);
+  LLDB_INSTRUMENT_VA(this, use_dynamic);
 
   if (IsValid())
     return m_opaque_sp->SetUseDynamic(use_dynamic);
 }
 
 bool SBValue::GetPreferSyntheticValue() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, GetPreferSyntheticValue);
+  LLDB_INSTRUMENT_VA(this);
 
   if (!IsValid())
     return false;
@@ -821,15 +798,14 @@ bool SBValue::GetPreferSyntheticValue() {
 }
 
 void SBValue::SetPreferSyntheticValue(bool use_synthetic) {
-  LLDB_RECORD_METHOD(void, SBValue, SetPreferSyntheticValue, (bool),
-                     use_synthetic);
+  LLDB_INSTRUMENT_VA(this, use_synthetic);
 
   if (IsValid())
     return m_opaque_sp->SetUseSynthetic(use_synthetic);
 }
 
 bool SBValue::IsDynamic() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, IsDynamic);
+  LLDB_INSTRUMENT_VA(this);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -839,7 +815,7 @@ bool SBValue::IsDynamic() {
 }
 
 bool SBValue::IsSynthetic() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, IsSynthetic);
+  LLDB_INSTRUMENT_VA(this);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -849,7 +825,7 @@ bool SBValue::IsSynthetic() {
 }
 
 bool SBValue::IsSyntheticChildrenGenerated() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, IsSyntheticChildrenGenerated);
+  LLDB_INSTRUMENT_VA(this);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -859,7 +835,7 @@ bool SBValue::IsSyntheticChildrenGenerated() {
 }
 
 void SBValue::SetSyntheticChildrenGenerated(bool is) {
-  LLDB_RECORD_METHOD(void, SBValue, SetSyntheticChildrenGenerated, (bool), is);
+  LLDB_INSTRUMENT_VA(this, is);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -868,8 +844,7 @@ void SBValue::SetSyntheticChildrenGenerated(bool is) {
 }
 
 lldb::SBValue SBValue::GetValueForExpressionPath(const char *expr_path) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValue, GetValueForExpressionPath,
-                     (const char *), expr_path);
+  LLDB_INSTRUMENT_VA(this, expr_path);
 
   lldb::ValueObjectSP child_sp;
   ValueLocker locker;
@@ -886,8 +861,7 @@ lldb::SBValue SBValue::GetValueForExpressionPath(const char *expr_path) {
 }
 
 int64_t SBValue::GetValueAsSigned(SBError &error, int64_t fail_value) {
-  LLDB_RECORD_METHOD(int64_t, SBValue, GetValueAsSigned,
-                     (lldb::SBError &, int64_t), error, fail_value);
+  LLDB_INSTRUMENT_VA(this, error, fail_value);
 
   error.Clear();
   ValueLocker locker;
@@ -907,8 +881,7 @@ int64_t SBValue::GetValueAsSigned(SBError &error, int64_t fail_value) {
 }
 
 uint64_t SBValue::GetValueAsUnsigned(SBError &error, uint64_t fail_value) {
-  LLDB_RECORD_METHOD(uint64_t, SBValue, GetValueAsUnsigned,
-                     (lldb::SBError &, uint64_t), error, fail_value);
+  LLDB_INSTRUMENT_VA(this, error, fail_value);
 
   error.Clear();
   ValueLocker locker;
@@ -928,7 +901,7 @@ uint64_t SBValue::GetValueAsUnsigned(SBError &error, uint64_t fail_value) {
 }
 
 int64_t SBValue::GetValueAsSigned(int64_t fail_value) {
-  LLDB_RECORD_METHOD(int64_t, SBValue, GetValueAsSigned, (int64_t), fail_value);
+  LLDB_INSTRUMENT_VA(this, fail_value);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -939,8 +912,7 @@ int64_t SBValue::GetValueAsSigned(int64_t fail_value) {
 }
 
 uint64_t SBValue::GetValueAsUnsigned(uint64_t fail_value) {
-  LLDB_RECORD_METHOD(uint64_t, SBValue, GetValueAsUnsigned, (uint64_t),
-                     fail_value);
+  LLDB_INSTRUMENT_VA(this, fail_value);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -951,7 +923,7 @@ uint64_t SBValue::GetValueAsUnsigned(uint64_t fail_value) {
 }
 
 bool SBValue::MightHaveChildren() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, MightHaveChildren);
+  LLDB_INSTRUMENT_VA(this);
 
   bool has_children = false;
   ValueLocker locker;
@@ -963,7 +935,7 @@ bool SBValue::MightHaveChildren() {
 }
 
 bool SBValue::IsRuntimeSupportValue() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, IsRuntimeSupportValue);
+  LLDB_INSTRUMENT_VA(this);
 
   bool is_support = false;
   ValueLocker locker;
@@ -975,13 +947,13 @@ bool SBValue::IsRuntimeSupportValue() {
 }
 
 uint32_t SBValue::GetNumChildren() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBValue, GetNumChildren);
+  LLDB_INSTRUMENT_VA(this);
 
   return GetNumChildren(UINT32_MAX);
 }
 
 uint32_t SBValue::GetNumChildren(uint32_t max) {
-  LLDB_RECORD_METHOD(uint32_t, SBValue, GetNumChildren, (uint32_t), max);
+  LLDB_INSTRUMENT_VA(this, max);
 
   uint32_t num_children = 0;
 
@@ -994,7 +966,7 @@ uint32_t SBValue::GetNumChildren(uint32_t max) {
 }
 
 SBValue SBValue::Dereference() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBValue, Dereference);
+  LLDB_INSTRUMENT_VA(this);
 
   SBValue sb_value;
   ValueLocker locker;
@@ -1009,13 +981,13 @@ SBValue SBValue::Dereference() {
 
 // Deprecated - please use GetType().IsPointerType() instead.
 bool SBValue::TypeIsPointerType() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBValue, TypeIsPointerType);
+  LLDB_INSTRUMENT_VA(this);
 
   return GetType().IsPointerType();
 }
 
 void *SBValue::GetOpaqueType() {
-  LLDB_RECORD_METHOD_NO_ARGS(void *, SBValue, GetOpaqueType);
+  LLDB_INSTRUMENT_VA(this);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -1025,7 +997,7 @@ void *SBValue::GetOpaqueType() {
 }
 
 lldb::SBTarget SBValue::GetTarget() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBTarget, SBValue, GetTarget);
+  LLDB_INSTRUMENT_VA(this);
 
   SBTarget sb_target;
   TargetSP target_sp;
@@ -1038,7 +1010,7 @@ lldb::SBTarget SBValue::GetTarget() {
 }
 
 lldb::SBProcess SBValue::GetProcess() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBProcess, SBValue, GetProcess);
+  LLDB_INSTRUMENT_VA(this);
 
   SBProcess sb_process;
   ProcessSP process_sp;
@@ -1051,7 +1023,7 @@ lldb::SBProcess SBValue::GetProcess() {
 }
 
 lldb::SBThread SBValue::GetThread() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBThread, SBValue, GetThread);
+  LLDB_INSTRUMENT_VA(this);
 
   SBThread sb_thread;
   ThreadSP thread_sp;
@@ -1064,7 +1036,7 @@ lldb::SBThread SBValue::GetThread() {
 }
 
 lldb::SBFrame SBValue::GetFrame() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBFrame, SBValue, GetFrame);
+  LLDB_INSTRUMENT_VA(this);
 
   SBFrame sb_frame;
   StackFrameSP frame_sp;
@@ -1085,7 +1057,7 @@ lldb::ValueObjectSP SBValue::GetSP(ValueLocker &locker) const {
 }
 
 lldb::ValueObjectSP SBValue::GetSP() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::ValueObjectSP, SBValue, GetSP);
+  LLDB_INSTRUMENT_VA(this);
 
   ValueLocker locker;
   return GetSP(locker);
@@ -1146,8 +1118,7 @@ void SBValue::SetSP(const lldb::ValueObjectSP &sp,
 }
 
 bool SBValue::GetExpressionPath(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBValue, GetExpressionPath, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -1160,8 +1131,7 @@ bool SBValue::GetExpressionPath(SBStream &description) {
 
 bool SBValue::GetExpressionPath(SBStream &description,
                                 bool qualify_cxx_base_classes) {
-  LLDB_RECORD_METHOD(bool, SBValue, GetExpressionPath, (lldb::SBStream &, bool),
-                     description, qualify_cxx_base_classes);
+  LLDB_INSTRUMENT_VA(this, description, qualify_cxx_base_classes);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -1173,8 +1143,7 @@ bool SBValue::GetExpressionPath(SBStream &description,
 }
 
 lldb::SBValue SBValue::EvaluateExpression(const char *expr) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBValue, SBValue, EvaluateExpression,
-                           (const char *), expr);
+  LLDB_INSTRUMENT_VA(this, expr);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -1196,9 +1165,7 @@ lldb::SBValue SBValue::EvaluateExpression(const char *expr) const {
 lldb::SBValue
 SBValue::EvaluateExpression(const char *expr,
                             const SBExpressionOptions &options) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBValue, SBValue, EvaluateExpression,
-                           (const char *, const lldb::SBExpressionOptions &),
-                           expr, options);
+  LLDB_INSTRUMENT_VA(this, expr, options);
 
   return EvaluateExpression(expr, options, nullptr);
 }
@@ -1206,11 +1173,7 @@ SBValue::EvaluateExpression(const char *expr,
 lldb::SBValue SBValue::EvaluateExpression(const char *expr,
                                           const SBExpressionOptions &options,
                                           const char *name) const {
-  LLDB_RECORD_METHOD_CONST(
-      lldb::SBValue, SBValue, EvaluateExpression,
-      (const char *, const lldb::SBExpressionOptions &, const char *), expr,
-      options, name);
-
+  LLDB_INSTRUMENT_VA(this, expr, options, name);
 
   if (!expr || expr[0] == '\0') {
     return SBValue();
@@ -1249,8 +1212,7 @@ lldb::SBValue SBValue::EvaluateExpression(const char *expr,
 }
 
 bool SBValue::GetDescription(SBStream &description) {
-  LLDB_RECORD_METHOD(bool, SBValue, GetDescription, (lldb::SBStream &),
-                     description);
+  LLDB_INSTRUMENT_VA(this, description);
 
   Stream &strm = description.ref();
 
@@ -1265,7 +1227,7 @@ bool SBValue::GetDescription(SBStream &description) {
 }
 
 lldb::Format SBValue::GetFormat() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::Format, SBValue, GetFormat);
+  LLDB_INSTRUMENT_VA(this);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -1275,7 +1237,7 @@ lldb::Format SBValue::GetFormat() {
 }
 
 void SBValue::SetFormat(lldb::Format format) {
-  LLDB_RECORD_METHOD(void, SBValue, SetFormat, (lldb::Format), format);
+  LLDB_INSTRUMENT_VA(this, format);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -1284,7 +1246,7 @@ void SBValue::SetFormat(lldb::Format format) {
 }
 
 lldb::SBValue SBValue::AddressOf() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBValue, AddressOf);
+  LLDB_INSTRUMENT_VA(this);
 
   SBValue sb_value;
   ValueLocker locker;
@@ -1299,7 +1261,7 @@ lldb::SBValue SBValue::AddressOf() {
 }
 
 lldb::addr_t SBValue::GetLoadAddress() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBValue, GetLoadAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::addr_t value = LLDB_INVALID_ADDRESS;
   ValueLocker locker;
@@ -1329,7 +1291,7 @@ lldb::addr_t SBValue::GetLoadAddress() {
 }
 
 lldb::SBAddress SBValue::GetAddress() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBAddress, SBValue, GetAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   Address addr;
   ValueLocker locker;
@@ -1358,8 +1320,7 @@ lldb::SBAddress SBValue::GetAddress() {
 }
 
 lldb::SBData SBValue::GetPointeeData(uint32_t item_idx, uint32_t item_count) {
-  LLDB_RECORD_METHOD(lldb::SBData, SBValue, GetPointeeData,
-                     (uint32_t, uint32_t), item_idx, item_count);
+  LLDB_INSTRUMENT_VA(this, item_idx, item_count);
 
   lldb::SBData sb_data;
   ValueLocker locker;
@@ -1378,7 +1339,7 @@ lldb::SBData SBValue::GetPointeeData(uint32_t item_idx, uint32_t item_count) {
 }
 
 lldb::SBData SBValue::GetData() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBData, SBValue, GetData);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::SBData sb_data;
   ValueLocker locker;
@@ -1395,8 +1356,7 @@ lldb::SBData SBValue::GetData() {
 }
 
 bool SBValue::SetData(lldb::SBData &data, SBError &error) {
-  LLDB_RECORD_METHOD(bool, SBValue, SetData, (lldb::SBData &, lldb::SBError &),
-                     data, error);
+  LLDB_INSTRUMENT_VA(this, data, error);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -1430,7 +1390,7 @@ bool SBValue::SetData(lldb::SBData &data, SBError &error) {
 }
 
 lldb::SBValue SBValue::Clone(const char *new_name) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValue, Clone, (const char *), new_name);
+  LLDB_INSTRUMENT_VA(this, new_name);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -1442,7 +1402,7 @@ lldb::SBValue SBValue::Clone(const char *new_name) {
 }
 
 lldb::SBDeclaration SBValue::GetDeclaration() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBDeclaration, SBValue, GetDeclaration);
+  LLDB_INSTRUMENT_VA(this);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
@@ -1457,9 +1417,7 @@ lldb::SBDeclaration SBValue::GetDeclaration() {
 
 lldb::SBWatchpoint SBValue::Watch(bool resolve_location, bool read, bool write,
                                   SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBWatchpoint, SBValue, Watch,
-                     (bool, bool, bool, lldb::SBError &), resolve_location,
-                     read, write, error);
+  LLDB_INSTRUMENT_VA(this, resolve_location, read, write, error);
 
   SBWatchpoint sb_watchpoint;
 
@@ -1522,8 +1480,7 @@ lldb::SBWatchpoint SBValue::Watch(bool resolve_location, bool read, bool write,
 // Backward compatibility fix in the interim.
 lldb::SBWatchpoint SBValue::Watch(bool resolve_location, bool read,
                                   bool write) {
-  LLDB_RECORD_METHOD(lldb::SBWatchpoint, SBValue, Watch, (bool, bool, bool),
-                     resolve_location, read, write);
+  LLDB_INSTRUMENT_VA(this, resolve_location, read, write);
 
   SBError error;
   return Watch(resolve_location, read, write, error);
@@ -1531,9 +1488,7 @@ lldb::SBWatchpoint SBValue::Watch(bool resolve_location, bool read,
 
 lldb::SBWatchpoint SBValue::WatchPointee(bool resolve_location, bool read,
                                          bool write, SBError &error) {
-  LLDB_RECORD_METHOD(lldb::SBWatchpoint, SBValue, WatchPointee,
-                     (bool, bool, bool, lldb::SBError &), resolve_location,
-                     read, write, error);
+  LLDB_INSTRUMENT_VA(this, resolve_location, read, write, error);
 
   SBWatchpoint sb_watchpoint;
   if (IsInScope() && GetType().IsPointerType())
@@ -1542,7 +1497,7 @@ lldb::SBWatchpoint SBValue::WatchPointee(bool resolve_location, bool read,
 }
 
 lldb::SBValue SBValue::Persist() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBValue, SBValue, Persist);
+  LLDB_INSTRUMENT_VA(this);
 
   ValueLocker locker;
   lldb::ValueObjectSP value_sp(GetSP(locker));
diff --git a/lldb/source/API/SBValueList.cpp b/lldb/source/API/SBValueList.cpp
index 797c615ce585a..a67030c506f41 100644
--- a/lldb/source/API/SBValueList.cpp
+++ b/lldb/source/API/SBValueList.cpp
@@ -7,10 +7,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBValueList.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBStream.h"
 #include "lldb/API/SBValue.h"
 #include "lldb/Core/ValueObjectList.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include <vector>
 
@@ -67,10 +67,10 @@ class ValueListImpl {
   std::vector<lldb::SBValue> m_values;
 };
 
-SBValueList::SBValueList() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBValueList); }
+SBValueList::SBValueList() { LLDB_INSTRUMENT_VA(this); }
 
 SBValueList::SBValueList(const SBValueList &rhs) {
-  LLDB_RECORD_CONSTRUCTOR(SBValueList, (const lldb::SBValueList &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (rhs.IsValid())
     m_opaque_up = std::make_unique<ValueListImpl>(*rhs);
@@ -84,24 +84,23 @@ SBValueList::SBValueList(const ValueListImpl *lldb_object_ptr) {
 SBValueList::~SBValueList() = default;
 
 bool SBValueList::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBValueList, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBValueList::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBValueList, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return (m_opaque_up != nullptr);
 }
 
 void SBValueList::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBValueList, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_up.reset();
 }
 
 const SBValueList &SBValueList::operator=(const SBValueList &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBValueList &,
-                     SBValueList, operator=,(const lldb::SBValueList &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   if (this != &rhs) {
     if (rhs.IsValid())
@@ -123,8 +122,7 @@ const ValueListImpl *SBValueList::operator->() const {
 const ValueListImpl &SBValueList::operator*() const { return *m_opaque_up; }
 
 void SBValueList::Append(const SBValue &val_obj) {
-  LLDB_RECORD_METHOD(void, SBValueList, Append, (const lldb::SBValue &),
-                     val_obj);
+  LLDB_INSTRUMENT_VA(this, val_obj);
 
   CreateIfNeeded();
   m_opaque_up->Append(val_obj);
@@ -138,8 +136,7 @@ void SBValueList::Append(lldb::ValueObjectSP &val_obj_sp) {
 }
 
 void SBValueList::Append(const lldb::SBValueList &value_list) {
-  LLDB_RECORD_METHOD(void, SBValueList, Append, (const lldb::SBValueList &),
-                     value_list);
+  LLDB_INSTRUMENT_VA(this, value_list);
 
   if (value_list.IsValid()) {
     CreateIfNeeded();
@@ -148,9 +145,7 @@ void SBValueList::Append(const lldb::SBValueList &value_list) {
 }
 
 SBValue SBValueList::GetValueAtIndex(uint32_t idx) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBValue, SBValueList, GetValueAtIndex,
-                           (uint32_t), idx);
-
+  LLDB_INSTRUMENT_VA(this, idx);
 
   SBValue sb_value;
   if (m_opaque_up)
@@ -160,7 +155,7 @@ SBValue SBValueList::GetValueAtIndex(uint32_t idx) const {
 }
 
 uint32_t SBValueList::GetSize() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(uint32_t, SBValueList, GetSize);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t size = 0;
   if (m_opaque_up)
@@ -175,8 +170,7 @@ void SBValueList::CreateIfNeeded() {
 }
 
 SBValue SBValueList::FindValueObjectByUID(lldb::user_id_t uid) {
-  LLDB_RECORD_METHOD(lldb::SBValue, SBValueList, FindValueObjectByUID,
-                     (lldb::user_id_t), uid);
+  LLDB_INSTRUMENT_VA(this, uid);
 
   SBValue sb_value;
   if (m_opaque_up)
@@ -185,8 +179,7 @@ SBValue SBValueList::FindValueObjectByUID(lldb::user_id_t uid) {
 }
 
 SBValue SBValueList::GetFirstValueByName(const char *name) const {
-  LLDB_RECORD_METHOD_CONST(lldb::SBValue, SBValueList, GetFirstValueByName,
-                           (const char *), name);
+  LLDB_INSTRUMENT_VA(this, name);
 
   SBValue sb_value;
   if (m_opaque_up)
diff --git a/lldb/source/API/SBVariablesOptions.cpp b/lldb/source/API/SBVariablesOptions.cpp
index 4057e283e2b20..989d159139cca 100644
--- a/lldb/source/API/SBVariablesOptions.cpp
+++ b/lldb/source/API/SBVariablesOptions.cpp
@@ -7,9 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBVariablesOptions.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBTarget.h"
 #include "lldb/Target/Target.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/lldb-private.h"
 
@@ -80,21 +80,17 @@ class VariablesOptionsImpl {
 
 SBVariablesOptions::SBVariablesOptions()
     : m_opaque_up(new VariablesOptionsImpl()) {
-  LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBVariablesOptions);
+  LLDB_INSTRUMENT_VA(this);
 }
 
 SBVariablesOptions::SBVariablesOptions(const SBVariablesOptions &options)
     : m_opaque_up(new VariablesOptionsImpl(options.ref())) {
-  LLDB_RECORD_CONSTRUCTOR(SBVariablesOptions,
-                          (const lldb::SBVariablesOptions &), options);
+  LLDB_INSTRUMENT_VA(this, options);
 }
 
 SBVariablesOptions &SBVariablesOptions::
 operator=(const SBVariablesOptions &options) {
-  LLDB_RECORD_METHOD(
-      lldb::SBVariablesOptions &,
-      SBVariablesOptions, operator=,(const lldb::SBVariablesOptions &),
-      options);
+  LLDB_INSTRUMENT_VA(this, options);
 
   m_opaque_up = std::make_unique<VariablesOptionsImpl>(options.ref());
   return *this;
@@ -103,109 +99,97 @@ operator=(const SBVariablesOptions &options) {
 SBVariablesOptions::~SBVariablesOptions() = default;
 
 bool SBVariablesOptions::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBVariablesOptions::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up != nullptr;
 }
 
 bool SBVariablesOptions::GetIncludeArguments() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions,
-                                   GetIncludeArguments);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetIncludeArguments();
 }
 
 void SBVariablesOptions::SetIncludeArguments(bool arguments) {
-  LLDB_RECORD_METHOD(void, SBVariablesOptions, SetIncludeArguments, (bool),
-                     arguments);
+  LLDB_INSTRUMENT_VA(this, arguments);
 
   m_opaque_up->SetIncludeArguments(arguments);
 }
 
 bool SBVariablesOptions::GetIncludeRecognizedArguments(
     const lldb::SBTarget &target) const {
-  LLDB_RECORD_METHOD_CONST(bool, SBVariablesOptions,
-                           GetIncludeRecognizedArguments,
-                           (const lldb::SBTarget &), target);
+  LLDB_INSTRUMENT_VA(this, target);
 
   return m_opaque_up->GetIncludeRecognizedArguments(target.GetSP());
 }
 
 void SBVariablesOptions::SetIncludeRecognizedArguments(bool arguments) {
-  LLDB_RECORD_METHOD(void, SBVariablesOptions, SetIncludeRecognizedArguments,
-                     (bool), arguments);
+  LLDB_INSTRUMENT_VA(this, arguments);
 
   m_opaque_up->SetIncludeRecognizedArguments(arguments);
 }
 
 bool SBVariablesOptions::GetIncludeLocals() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions, GetIncludeLocals);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetIncludeLocals();
 }
 
 void SBVariablesOptions::SetIncludeLocals(bool locals) {
-  LLDB_RECORD_METHOD(void, SBVariablesOptions, SetIncludeLocals, (bool),
-                     locals);
+  LLDB_INSTRUMENT_VA(this, locals);
 
   m_opaque_up->SetIncludeLocals(locals);
 }
 
 bool SBVariablesOptions::GetIncludeStatics() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions, GetIncludeStatics);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetIncludeStatics();
 }
 
 void SBVariablesOptions::SetIncludeStatics(bool statics) {
-  LLDB_RECORD_METHOD(void, SBVariablesOptions, SetIncludeStatics, (bool),
-                     statics);
+  LLDB_INSTRUMENT_VA(this, statics);
 
   m_opaque_up->SetIncludeStatics(statics);
 }
 
 bool SBVariablesOptions::GetInScopeOnly() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions, GetInScopeOnly);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetInScopeOnly();
 }
 
 void SBVariablesOptions::SetInScopeOnly(bool in_scope_only) {
-  LLDB_RECORD_METHOD(void, SBVariablesOptions, SetInScopeOnly, (bool),
-                     in_scope_only);
+  LLDB_INSTRUMENT_VA(this, in_scope_only);
 
   m_opaque_up->SetInScopeOnly(in_scope_only);
 }
 
 bool SBVariablesOptions::GetIncludeRuntimeSupportValues() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBVariablesOptions,
-                                   GetIncludeRuntimeSupportValues);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetIncludeRuntimeSupportValues();
 }
 
 void SBVariablesOptions::SetIncludeRuntimeSupportValues(
     bool runtime_support_values) {
-  LLDB_RECORD_METHOD(void, SBVariablesOptions, SetIncludeRuntimeSupportValues,
-                     (bool), runtime_support_values);
+  LLDB_INSTRUMENT_VA(this, runtime_support_values);
 
   m_opaque_up->SetIncludeRuntimeSupportValues(runtime_support_values);
 }
 
 lldb::DynamicValueType SBVariablesOptions::GetUseDynamic() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::DynamicValueType, SBVariablesOptions,
-                                   GetUseDynamic);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_up->GetUseDynamic();
 }
 
 void SBVariablesOptions::SetUseDynamic(lldb::DynamicValueType dynamic) {
-  LLDB_RECORD_METHOD(void, SBVariablesOptions, SetUseDynamic,
-                     (lldb::DynamicValueType), dynamic);
+  LLDB_INSTRUMENT_VA(this, dynamic);
 
   m_opaque_up->SetUseDynamic(dynamic);
 }
diff --git a/lldb/source/API/SBWatchpoint.cpp b/lldb/source/API/SBWatchpoint.cpp
index 207141f5dca6d..f5bd9cd1a9467 100644
--- a/lldb/source/API/SBWatchpoint.cpp
+++ b/lldb/source/API/SBWatchpoint.cpp
@@ -7,12 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/API/SBWatchpoint.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
 #include "lldb/API/SBAddress.h"
 #include "lldb/API/SBDebugger.h"
 #include "lldb/API/SBDefines.h"
 #include "lldb/API/SBEvent.h"
 #include "lldb/API/SBStream.h"
+#include "lldb/Utility/Instrumentation.h"
 
 #include "lldb/Breakpoint/Watchpoint.h"
 #include "lldb/Breakpoint/WatchpointList.h"
@@ -26,21 +26,20 @@
 using namespace lldb;
 using namespace lldb_private;
 
-SBWatchpoint::SBWatchpoint() { LLDB_RECORD_CONSTRUCTOR_NO_ARGS(SBWatchpoint); }
+SBWatchpoint::SBWatchpoint() { LLDB_INSTRUMENT_VA(this); }
 
 SBWatchpoint::SBWatchpoint(const lldb::WatchpointSP &wp_sp)
     : m_opaque_wp(wp_sp) {
-  LLDB_RECORD_CONSTRUCTOR(SBWatchpoint, (const lldb::WatchpointSP &), wp_sp);
+  LLDB_INSTRUMENT_VA(this, wp_sp);
 }
 
 SBWatchpoint::SBWatchpoint(const SBWatchpoint &rhs)
     : m_opaque_wp(rhs.m_opaque_wp) {
-  LLDB_RECORD_CONSTRUCTOR(SBWatchpoint, (const lldb::SBWatchpoint &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 }
 
 const SBWatchpoint &SBWatchpoint::operator=(const SBWatchpoint &rhs) {
-  LLDB_RECORD_METHOD(const lldb::SBWatchpoint &,
-                     SBWatchpoint, operator=,(const lldb::SBWatchpoint &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   m_opaque_wp = rhs.m_opaque_wp;
   return *this;
@@ -49,8 +48,7 @@ const SBWatchpoint &SBWatchpoint::operator=(const SBWatchpoint &rhs) {
 SBWatchpoint::~SBWatchpoint() = default;
 
 watch_id_t SBWatchpoint::GetID() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::watch_id_t, SBWatchpoint, GetID);
-
+  LLDB_INSTRUMENT_VA(this);
 
   watch_id_t watch_id = LLDB_INVALID_WATCH_ID;
   lldb::WatchpointSP watchpoint_sp(GetSP());
@@ -61,31 +59,29 @@ watch_id_t SBWatchpoint::GetID() {
 }
 
 bool SBWatchpoint::IsValid() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBWatchpoint, IsValid);
+  LLDB_INSTRUMENT_VA(this);
   return this->operator bool();
 }
 SBWatchpoint::operator bool() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(bool, SBWatchpoint, operator bool);
+  LLDB_INSTRUMENT_VA(this);
 
   return bool(m_opaque_wp.lock());
 }
 
 bool SBWatchpoint::operator==(const SBWatchpoint &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBWatchpoint, operator==,(const SBWatchpoint &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return GetSP() == rhs.GetSP();
 }
 
 bool SBWatchpoint::operator!=(const SBWatchpoint &rhs) const {
-  LLDB_RECORD_METHOD_CONST(
-      bool, SBWatchpoint, operator!=,(const SBWatchpoint &), rhs);
+  LLDB_INSTRUMENT_VA(this, rhs);
 
   return !(*this == rhs);
 }
 
 SBError SBWatchpoint::GetError() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::SBError, SBWatchpoint, GetError);
+  LLDB_INSTRUMENT_VA(this);
 
   SBError sb_error;
   lldb::WatchpointSP watchpoint_sp(GetSP());
@@ -96,7 +92,7 @@ SBError SBWatchpoint::GetError() {
 }
 
 int32_t SBWatchpoint::GetHardwareIndex() {
-  LLDB_RECORD_METHOD_NO_ARGS(int32_t, SBWatchpoint, GetHardwareIndex);
+  LLDB_INSTRUMENT_VA(this);
 
   int32_t hw_index = -1;
 
@@ -111,7 +107,7 @@ int32_t SBWatchpoint::GetHardwareIndex() {
 }
 
 addr_t SBWatchpoint::GetWatchAddress() {
-  LLDB_RECORD_METHOD_NO_ARGS(lldb::addr_t, SBWatchpoint, GetWatchAddress);
+  LLDB_INSTRUMENT_VA(this);
 
   addr_t ret_addr = LLDB_INVALID_ADDRESS;
 
@@ -126,7 +122,7 @@ addr_t SBWatchpoint::GetWatchAddress() {
 }
 
 size_t SBWatchpoint::GetWatchSize() {
-  LLDB_RECORD_METHOD_NO_ARGS(size_t, SBWatchpoint, GetWatchSize);
+  LLDB_INSTRUMENT_VA(this);
 
   size_t watch_size = 0;
 
@@ -141,7 +137,7 @@ size_t SBWatchpoint::GetWatchSize() {
 }
 
 void SBWatchpoint::SetEnabled(bool enabled) {
-  LLDB_RECORD_METHOD(void, SBWatchpoint, SetEnabled, (bool), enabled);
+  LLDB_INSTRUMENT_VA(this, enabled);
 
   lldb::WatchpointSP watchpoint_sp(GetSP());
   if (watchpoint_sp) {
@@ -161,7 +157,7 @@ void SBWatchpoint::SetEnabled(bool enabled) {
 }
 
 bool SBWatchpoint::IsEnabled() {
-  LLDB_RECORD_METHOD_NO_ARGS(bool, SBWatchpoint, IsEnabled);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::WatchpointSP watchpoint_sp(GetSP());
   if (watchpoint_sp) {
@@ -173,7 +169,7 @@ bool SBWatchpoint::IsEnabled() {
 }
 
 uint32_t SBWatchpoint::GetHitCount() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBWatchpoint, GetHitCount);
+  LLDB_INSTRUMENT_VA(this);
 
   uint32_t count = 0;
   lldb::WatchpointSP watchpoint_sp(GetSP());
@@ -187,7 +183,7 @@ uint32_t SBWatchpoint::GetHitCount() {
 }
 
 uint32_t SBWatchpoint::GetIgnoreCount() {
-  LLDB_RECORD_METHOD_NO_ARGS(uint32_t, SBWatchpoint, GetIgnoreCount);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::WatchpointSP watchpoint_sp(GetSP());
   if (watchpoint_sp) {
@@ -199,7 +195,7 @@ uint32_t SBWatchpoint::GetIgnoreCount() {
 }
 
 void SBWatchpoint::SetIgnoreCount(uint32_t n) {
-  LLDB_RECORD_METHOD(void, SBWatchpoint, SetIgnoreCount, (uint32_t), n);
+  LLDB_INSTRUMENT_VA(this, n);
 
   lldb::WatchpointSP watchpoint_sp(GetSP());
   if (watchpoint_sp) {
@@ -210,7 +206,7 @@ void SBWatchpoint::SetIgnoreCount(uint32_t n) {
 }
 
 const char *SBWatchpoint::GetCondition() {
-  LLDB_RECORD_METHOD_NO_ARGS(const char *, SBWatchpoint, GetCondition);
+  LLDB_INSTRUMENT_VA(this);
 
   lldb::WatchpointSP watchpoint_sp(GetSP());
   if (watchpoint_sp) {
@@ -222,8 +218,7 @@ const char *SBWatchpoint::GetCondition() {
 }
 
 void SBWatchpoint::SetCondition(const char *condition) {
-  LLDB_RECORD_METHOD(void, SBWatchpoint, SetCondition, (const char *),
-                     condition);
+  LLDB_INSTRUMENT_VA(this, condition);
 
   lldb::WatchpointSP watchpoint_sp(GetSP());
   if (watchpoint_sp) {
@@ -235,9 +230,7 @@ void SBWatchpoint::SetCondition(const char *condition) {
 
 bool SBWatchpoint::GetDescription(SBStream &description,
                                   DescriptionLevel level) {
-  LLDB_RECORD_METHOD(bool, SBWatchpoint, GetDescription,
-                     (lldb::SBStream &, lldb::DescriptionLevel), description,
-                     level);
+  LLDB_INSTRUMENT_VA(this, description, level);
 
   Stream &strm = description.ref();
 
@@ -254,27 +247,25 @@ bool SBWatchpoint::GetDescription(SBStream &description,
 }
 
 void SBWatchpoint::Clear() {
-  LLDB_RECORD_METHOD_NO_ARGS(void, SBWatchpoint, Clear);
+  LLDB_INSTRUMENT_VA(this);
 
   m_opaque_wp.reset();
 }
 
 lldb::WatchpointSP SBWatchpoint::GetSP() const {
-  LLDB_RECORD_METHOD_CONST_NO_ARGS(lldb::WatchpointSP, SBWatchpoint, GetSP);
+  LLDB_INSTRUMENT_VA(this);
 
   return m_opaque_wp.lock();
 }
 
 void SBWatchpoint::SetSP(const lldb::WatchpointSP &sp) {
-  LLDB_RECORD_METHOD(void, SBWatchpoint, SetSP, (const lldb::WatchpointSP &),
-                     sp);
+  LLDB_INSTRUMENT_VA(this, sp);
 
   m_opaque_wp = sp;
 }
 
 bool SBWatchpoint::EventIsWatchpointEvent(const lldb::SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(bool, SBWatchpoint, EventIsWatchpointEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   return Watchpoint::WatchpointEventData::GetEventDataFromEvent(event.get()) !=
          nullptr;
@@ -282,9 +273,7 @@ bool SBWatchpoint::EventIsWatchpointEvent(const lldb::SBEvent &event) {
 
 WatchpointEventType
 SBWatchpoint::GetWatchpointEventTypeFromEvent(const SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(lldb::WatchpointEventType, SBWatchpoint,
-                            GetWatchpointEventTypeFromEvent,
-                            (const lldb::SBEvent &), event);
+  LLDB_INSTRUMENT_VA(event);
 
   if (event.IsValid())
     return Watchpoint::WatchpointEventData::GetWatchpointEventTypeFromEvent(
@@ -293,9 +282,7 @@ SBWatchpoint::GetWatchpointEventTypeFromEvent(const SBEvent &event) {
 }
 
 SBWatchpoint SBWatchpoint::GetWatchpointFromEvent(const lldb::SBEvent &event) {
-  LLDB_RECORD_STATIC_METHOD(lldb::SBWatchpoint, SBWatchpoint,
-                            GetWatchpointFromEvent, (const lldb::SBEvent &),
-                            event);
+  LLDB_INSTRUMENT_VA(event);
 
   SBWatchpoint sb_watchpoint;
   if (event.IsValid())
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
index 80e986f18c618..1bf647e4acfc6 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptInterpreterPython.cpp
@@ -37,7 +37,7 @@
 #include "lldb/Interpreter/CommandReturnObject.h"
 #include "lldb/Target/Thread.h"
 #include "lldb/Target/ThreadPlan.h"
-#include "lldb/Utility/ReproducerInstrumentation.h"
+#include "lldb/Utility/Instrumentation.h"
 #include "lldb/Utility/Timer.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
diff --git a/lldb/source/Utility/CMakeLists.txt b/lldb/source/Utility/CMakeLists.txt
index 6790e47d69f25..cc0bf5fdb61ac 100644
--- a/lldb/source/Utility/CMakeLists.txt
+++ b/lldb/source/Utility/CMakeLists.txt
@@ -49,7 +49,7 @@ add_lldb_library(lldbUtility
   RegisterValue.cpp
   RegularExpression.cpp
   Reproducer.cpp
-  ReproducerInstrumentation.cpp
+  Instrumentation.cpp
   ReproducerProvider.cpp
   Scalar.cpp
   SelectHelper.cpp
diff --git a/lldb/source/Utility/ReproducerInstrumentation.cpp b/lldb/source/Utility/Instrumentation.cpp
similarity index 52%
rename from lldb/source/Utility/ReproducerInstrumentation.cpp
rename to lldb/source/Utility/Instrumentation.cpp
index 8365701f8e3be..d375fcea58c02 100644
--- a/lldb/source/Utility/ReproducerInstrumentation.cpp
+++ b/lldb/source/Utility/Instrumentation.cpp
@@ -1,46 +1,37 @@
-//===-- ReproducerInstrumentation.cpp -------------------------------------===//
-//
+//===-- Instrumentation.cpp -----------------------------------------------===//
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
-#include "lldb/Utility/ReproducerInstrumentation.h"
-#include "lldb/Utility/Reproducer.h"
+#include "lldb/Utility/Instrumentation.h"
 #include <cstdio>
 #include <cstdlib>
 #include <limits>
 #include <thread>
 
 using namespace lldb_private;
-using namespace lldb_private::repro;
+using namespace lldb_private::instrumentation;
 
 // Whether we're currently across the API boundary.
 static thread_local bool g_global_boundary = false;
 
-Recorder::Recorder() {
-  if (!g_global_boundary) {
-    g_global_boundary = true;
-    m_local_boundary = true;
-  }
-}
-
-Recorder::Recorder(llvm::StringRef pretty_func, std::string &&pretty_args)
-    :  m_local_boundary(false) {
+Instrumenter::Instrumenter(llvm::StringRef pretty_func,
+                           std::string &&pretty_args)
+    : m_local_boundary(false) {
   if (!g_global_boundary) {
     g_global_boundary = true;
     m_local_boundary = true;
-    LLDB_LOG(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API), "{0} ({1})",
-             pretty_func, pretty_args);
   }
+  LLDB_LOG(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API), "[{0}] {1} ({2})",
+           m_local_boundary ? "external" : "internal", pretty_func,
+           pretty_args);
 }
 
-Recorder::~Recorder() {
-  UpdateBoundary();
-}
+Instrumenter::~Instrumenter() { UpdateBoundary(); }
 
-void Recorder::UpdateBoundary() {
+void Instrumenter::UpdateBoundary() {
   if (m_local_boundary)
     g_global_boundary = false;
 }

From 156b997251db6d87636fa300d7654989caa01dea Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 20 Jan 2022 15:50:27 -0800
Subject: [PATCH 117/946] [lldb] Instrument SB API with signposts

Instrument the SB API with signposts on Darwin. This gives us a time
profile on whose behalf LLDB spends time (particularly when run via the
SBAPI from an IDE).

Differential revision: https://reviews.llvm.org/D117632
---
 lldb/include/lldb/Utility/Instrumentation.h |  2 ++
 lldb/source/Utility/Instrumentation.cpp     | 18 ++++++++++++------
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/lldb/include/lldb/Utility/Instrumentation.h b/lldb/include/lldb/Utility/Instrumentation.h
index 6962270bb89db..ff6591d634378 100644
--- a/lldb/include/lldb/Utility/Instrumentation.h
+++ b/lldb/include/lldb/Utility/Instrumentation.h
@@ -86,6 +86,8 @@ class Instrumenter {
 private:
   void UpdateBoundary();
 
+  llvm::StringRef m_pretty_func;
+
   /// Whether this function call was the one crossing the API boundary.
   bool m_local_boundary = false;
 };
diff --git a/lldb/source/Utility/Instrumentation.cpp b/lldb/source/Utility/Instrumentation.cpp
index d375fcea58c02..861789810e1a1 100644
--- a/lldb/source/Utility/Instrumentation.cpp
+++ b/lldb/source/Utility/Instrumentation.cpp
@@ -6,6 +6,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "lldb/Utility/Instrumentation.h"
+#include "llvm/Support/Signposts.h"
+
 #include <cstdio>
 #include <cstdlib>
 #include <limits>
@@ -17,21 +19,25 @@ using namespace lldb_private::instrumentation;
 // Whether we're currently across the API boundary.
 static thread_local bool g_global_boundary = false;
 
+// Instrument SB API calls with singposts when supported.
+static llvm::ManagedStatic<llvm::SignpostEmitter> g_api_signposts;
+
 Instrumenter::Instrumenter(llvm::StringRef pretty_func,
                            std::string &&pretty_args)
-    : m_local_boundary(false) {
+    : m_pretty_func(pretty_func), m_local_boundary(false) {
   if (!g_global_boundary) {
     g_global_boundary = true;
     m_local_boundary = true;
+    g_api_signposts->startInterval(this, m_pretty_func);
   }
   LLDB_LOG(GetLogIfAllCategoriesSet(LIBLLDB_LOG_API), "[{0}] {1} ({2})",
-           m_local_boundary ? "external" : "internal", pretty_func,
+           m_local_boundary ? "external" : "internal", m_pretty_func,
            pretty_args);
 }
 
-Instrumenter::~Instrumenter() { UpdateBoundary(); }
-
-void Instrumenter::UpdateBoundary() {
-  if (m_local_boundary)
+Instrumenter::~Instrumenter() {
+  if (m_local_boundary) {
     g_global_boundary = false;
+    g_api_signposts->endInterval(this, m_pretty_func);
+  }
 }

From 0dfe953294ba1c0fc43fb710518b9b5a608b223a Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6@vols.utk.edu>
Date: Thu, 20 Jan 2022 12:06:47 -0500
Subject: [PATCH 118/946] [OpenMP] Change default visibility to protected for
 device declarations

This patch changes the special-case handling of visibility when
compiling for an OpenMP target offloading device. This was orignally
added as a precaution against the bug encountered in PR41826 when
symbols in the device were being preempted by shared library symbols.
This should instead be done by making the visibility protected by default.
With protected visibility we are asserting that the symbols on the device
will never be preempted or preempt another symbol pending a shared library
load.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D117806
---
 clang/lib/AST/Decl.cpp                        | 13 --------
 clang/lib/Driver/ToolChains/Clang.cpp         |  6 ++++
 clang/test/OpenMP/declare_target_codegen.cpp  | 33 ++++++++++---------
 ...x_declare_target_var_ctor_dtor_codegen.cpp | 28 ++++++++--------
 .../nvptx_target_pure_deleted_codegen.cpp     | 10 +++---
 .../OpenMP/nvptx_unsupported_type_codegen.cpp | 16 ++++-----
 .../OpenMP/target_attribute_convergent.cpp    |  6 ++--
 7 files changed, 53 insertions(+), 59 deletions(-)

diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp
index 232376e4e05db..b2ee34f20cf73 100644
--- a/clang/lib/AST/Decl.cpp
+++ b/clang/lib/AST/Decl.cpp
@@ -787,10 +787,6 @@ LinkageComputer::getLVForNamespaceScopeDecl(const NamedDecl *D,
     // Note that we don't want to make the variable non-external
     // because of this, but unique-external linkage suits us.
 
-    // We need variables inside OpenMP declare target directives to be visible.
-    if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(Var))
-      return LinkageInfo::external();
-
     if (Context.getLangOpts().CPlusPlus && !isFirstInExternCContext(Var) &&
         !IgnoreVarTypeLinkage) {
       LinkageInfo TypeLV = getLVForType(*Var->getType(), computation);
@@ -917,10 +913,6 @@ LinkageComputer::getLVForNamespaceScopeDecl(const NamedDecl *D,
   if (!isExternallyVisible(LV.getLinkage()))
     return LinkageInfo(LV.getLinkage(), DefaultVisibility, false);
 
-  // Mark the symbols as hidden when compiling for the device.
-  if (Context.getLangOpts().OpenMP && Context.getLangOpts().OpenMPIsDevice)
-    LV.mergeVisibility(HiddenVisibility, /*newExplicit=*/false);
-
   return LV;
 }
 
@@ -1075,11 +1067,6 @@ LinkageComputer::getLVForClassMember(const NamedDecl *D,
   // Finally, merge in information from the class.
   LV.mergeMaybeWithVisibility(classLV, considerClassVisibility);
 
-  // We need variables inside OpenMP declare target directives to be visible.
-  if (const VarDecl *VD = dyn_cast<VarDecl>(D))
-    if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
-      return LinkageInfo(LV.getLinkage(), DefaultVisibility, false);
-
   return LV;
 }
 
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a22f03a488486..dd2570132ddf7 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5822,6 +5822,12 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
       CmdArgs.push_back("-ftype-visibility");
       CmdArgs.push_back("default");
     }
+  } else if (IsOpenMPDevice) {
+    // When compiling for the OpenMP device we want protected visibility by
+    // default. This prevents the device from accidenally preempting code on the
+    // host, makes the system more robust, and improves performance.
+    CmdArgs.push_back("-fvisibility");
+    CmdArgs.push_back("protected");
   }
 
   if (!RawTriple.isPS4())
diff --git a/clang/test/OpenMP/declare_target_codegen.cpp b/clang/test/OpenMP/declare_target_codegen.cpp
index 6ea9f181b5cbd..991d6aa67f539 100644
--- a/clang/test/OpenMP/declare_target_codegen.cpp
+++ b/clang/test/OpenMP/declare_target_codegen.cpp
@@ -1,22 +1,22 @@
 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -DLOAD
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DLOAD | FileCheck %s
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t
-// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - -DLOAD | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DLOAD | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - -DLOAD | FileCheck %s
 
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -DOMP5 | FileCheck %s --check-prefix HOST5
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -DOMP5
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DOMP5 | FileCheck %s --check-prefix DEV5
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DOMP5 | FileCheck %s --check-prefix DEV5
 
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - -DOMP5 | FileCheck %s --check-prefix KMPC-ONLY
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - -DOMP5 | FileCheck %s --check-prefix SIMD-ONLY
 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -DOMP5
-// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DOMP5 | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DOMP5 | FileCheck %s --check-prefix SIMD-ONLY
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=45
-// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-version=45 | FileCheck %s --check-prefix SIMD-ONLY
-// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t -fopenmp-version=45
-// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -o - -fopenmp-version=45 | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-version=45 | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t -fopenmp-version=45
+// RUN: %clang_cc1 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify -o - -fopenmp-version=45 | FileCheck %s --check-prefix SIMD-ONLY
 
 // expected-no-diagnostics
 
@@ -26,26 +26,26 @@
 // CHECK-NOT: define {{.*}}{{baz1|baz4|maini1|Base|virtual_}}
 // CHECK-DAG: Bake
 // CHECK-NOT: @{{hhh|ggg|fff|eee}} =
-// CHECK-DAG: @flag = global i8 undef,
+// CHECK-DAG: @flag = protected global i8 undef,
 // CHECK-DAG: @aaa = external global i32,
-// CHECK-DAG: @bbb = global i32 0,
+// CHECK-DAG: @bbb ={{ protected | }}global i32 0,
 // CHECK-DAG: weak constant %struct.__tgt_offload_entry { i8* bitcast (i32* @bbb to i8*),
 // CHECK-DAG: @ccc = external global i32,
-// CHECK-DAG: @ddd = global i32 0,
+// CHECK-DAG: @ddd ={{ protected | }}global i32 0,
 // CHECK-DAG: @hhh_decl_tgt_ref_ptr = weak global i32* null
 // CHECK-DAG: @ggg_decl_tgt_ref_ptr = weak global i32* null
 // CHECK-DAG: @fff_decl_tgt_ref_ptr = weak global i32* null
 // CHECK-DAG: @eee_decl_tgt_ref_ptr = weak global i32* null
 // CHECK-DAG: @{{.*}}maini1{{.*}}aaa = internal global i64 23,
 // CHECK-DAG: @pair = {{.*}}addrspace(3) global %struct.PAIR undef
-// CHECK-DAG: @_ZN2SS3SSSE = global i32 1,
-// CHECK-DAG: @b = global i32 15,
-// CHECK-DAG: @d = global i32 0,
+// CHECK-DAG: @_ZN2SS3SSSE ={{ protected | }}global i32 1,
+// CHECK-DAG: @b ={{ protected | }}global i32 15,
+// CHECK-DAG: @d ={{ protected | }}global i32 0,
 // CHECK-DAG: @c = external global i32,
-// CHECK-DAG: @globals = global %struct.S zeroinitializer,
+// CHECK-DAG: @globals ={{ protected | }}global %struct.S zeroinitializer,
 // CHECK-DAG: [[STAT:@.+stat]] = internal global %struct.S zeroinitializer,
 // CHECK-DAG: [[STAT_REF:@.+]] = internal constant %struct.S* [[STAT]]
-// CHECK-DAG: @out_decl_target = global i32 0,
+// CHECK-DAG: @out_decl_target ={{ protected | }}global i32 0,
 // CHECK-DAG: @llvm.used = appending global [2 x i8*] [i8* bitcast (void ()* @__omp_offloading__{{.+}}_globals_l[[@LINE+84]]_ctor to i8*), i8* bitcast (void ()* @__omp_offloading__{{.+}}_stat_l[[@LINE+85]]_ctor to i8*)],
 // CHECK-DAG: @llvm.compiler.used = appending global [1 x i8*] [i8* bitcast (%struct.S** [[STAT_REF]] to i8*)],
 
@@ -291,4 +291,5 @@ struct SS {
 #pragma omp end declare target
 };
 int SS::SSS = 1;
+
 #endif
diff --git a/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp b/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp
index 975015cc58276..0b8c17f1d3279 100644
--- a/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_declare_target_var_ctor_dtor_codegen.cpp
@@ -1,14 +1,14 @@
 // RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix HOST --check-prefix CHECK
 // RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix DEVICE --check-prefix CHECK
-// RUN: %clang_cc1 -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t
-// RUN: %clang_cc1 -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefix DEVICE --check-prefix CHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix DEVICE --check-prefix CHECK
+// RUN: %clang_cc1 -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t
+// RUN: %clang_cc1 -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefix DEVICE --check-prefix CHECK
 
 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o - | FileCheck %s --check-prefix SIMD-ONLY
 // RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o -| FileCheck %s --check-prefix SIMD-ONLY
-// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t
-// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o -| FileCheck %s --check-prefix SIMD-ONLY
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - | FileCheck %s --check-prefix SIMD-ONLY
 
 #ifndef HEADER
 #define HEADER
@@ -16,9 +16,9 @@
 // SIMD-ONLY-NOT: {{__kmpc|__tgt}}
 
 // DEVICE-DAG: [[C_ADDR:.+]] = internal global i32 0,
-// DEVICE-DAG: [[CD_ADDR:@.+]] ={{ hidden | }}global %struct.S zeroinitializer,
+// DEVICE-DAG: [[CD_ADDR:@.+]] ={{ protected | }}global %struct.S zeroinitializer,
 // HOST-DAG: @[[C_ADDR:.+]] = internal global i32 0,
-// HOST-DAG: @[[CD_ADDR:.+]] ={{( hidden | dso_local)?}} global %struct.S zeroinitializer,
+// HOST-DAG: @[[CD_ADDR:.+]] ={{( protected | dso_local)?}} global %struct.S zeroinitializer,
 
 #pragma omp declare target
 int foo() { return 0; }
@@ -34,12 +34,12 @@ int car() { return 0; }
 #pragma omp declare target (bar)
 int caz() { return 0; }
 
-// DEVICE-DAG: define{{ hidden | }}noundef i32 [[FOO:@.*foo.*]]()
-// DEVICE-DAG: define{{ hidden | }}noundef i32 [[BAR:@.*bar.*]]()
-// DEVICE-DAG: define{{ hidden | }}noundef i32 [[BAZ:@.*baz.*]]()
-// DEVICE-DAG: define{{ hidden | }}noundef i32 [[DOO:@.*doo.*]]()
-// DEVICE-DAG: define{{ hidden | }}noundef i32 [[CAR:@.*car.*]]()
-// DEVICE-DAG: define{{ hidden | }}noundef i32 [[CAZ:@.*caz.*]]()
+// DEVICE-DAG: define{{ protected | }}noundef i32 [[FOO:@.*foo.*]]()
+// DEVICE-DAG: define{{ protected | }}noundef i32 [[BAR:@.*bar.*]]()
+// DEVICE-DAG: define{{ protected | }}noundef i32 [[BAZ:@.*baz.*]]()
+// DEVICE-DAG: define{{ protected | }}noundef i32 [[DOO:@.*doo.*]]()
+// DEVICE-DAG: define{{ protected | }}noundef i32 [[CAR:@.*car.*]]()
+// DEVICE-DAG: define{{ protected | }}noundef i32 [[CAZ:@.*caz.*]]()
 
 static int c = foo() + bar() + baz();
 #pragma omp declare target (c)
diff --git a/clang/test/OpenMP/nvptx_target_pure_deleted_codegen.cpp b/clang/test/OpenMP/nvptx_target_pure_deleted_codegen.cpp
index e38fcfb080007..781d0ae3d4142 100644
--- a/clang/test/OpenMP/nvptx_target_pure_deleted_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_pure_deleted_codegen.cpp
@@ -1,17 +1,17 @@
 // Test target codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fno-rtti | FileCheck %s
-// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fno-rtti | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fno-rtti | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fno-rtti | FileCheck %s
 // expected-no-diagnostics
 
 #ifndef HEADER
 #define HEADER
 
 // CHECK-NOT: class_type_info
-// CHECK-DAG: @_ZTV7Derived = linkonce_odr hidden unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%class.Derived*)* @_ZN7Derived3fooEv to i8*)] }
-// CHECK-DAG: @_ZTV4Base = linkonce_odr hidden unnamed_addr constant { [3 x i8*] } zeroinitializer
+// CHECK-DAG: @_ZTV7Derived = linkonce_odr protected unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%class.Derived*)* @_ZN7Derived3fooEv to i8*)] }
+// CHECK-DAG: @_ZTV4Base = linkonce_odr protected unnamed_addr constant { [3 x i8*] } zeroinitializer
 // CHECK-NOT: class_type_info
 class Base {
   public:
diff --git a/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp b/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp
index 43c47dbc5a973..9e3f5a56bcbca 100644
--- a/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_unsupported_type_codegen.cpp
@@ -1,8 +1,8 @@
 // Test target codegen - host bc file has to be created first.
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple x86_64-unknown-linux -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s
 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-host.bc
-// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-linux-gnu -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -fopenmp-host-ir-file-path %t-host.bc -o - | FileCheck %s
 // expected-no-diagnostics
 
 // CHECK-DAG: [[T:%.+]] = type {{.+}}, {{fp128|ppc_fp128}},
@@ -34,18 +34,18 @@ struct T1 {
 #pragma omp declare target
 T a = T();
 T f = a;
-// CHECK: define{{ hidden | }}void @{{.+}}foo{{.+}}([[T]]* noundef byval([[T]]) align {{.+}})
+// CHECK: define{{ protected | }}void @{{.+}}foo{{.+}}([[T]]* noundef byval([[T]]) align {{.+}})
 void foo(T a = T()) {
   return;
 }
-// CHECK: define{{ hidden | }}[6 x i64] @{{.+}}bar{{.+}}()
+// CHECK: define{{ protected | }}[6 x i64] @{{.+}}bar{{.+}}()
 T bar() {
 // CHECK:      bitcast [[T]]* %{{.+}} to [6 x i64]*
 // CHECK-NEXT: load [6 x i64], [6 x i64]* %{{.+}},
 // CHECK-NEXT: ret [6 x i64]
   return T();
 }
-// CHECK: define{{ hidden | }}void @{{.+}}baz{{.+}}()
+// CHECK: define{{ protected | }}void @{{.+}}baz{{.+}}()
 void baz() {
 // CHECK:      call [6 x i64] @{{.+}}bar{{.+}}()
 // CHECK-NEXT: bitcast [[T]]* %{{.+}} to [6 x i64]*
@@ -54,17 +54,17 @@ void baz() {
 }
 T1 a1 = T1();
 T1 f1 = a1;
-// CHECK: define{{ hidden | }}void @{{.+}}foo1{{.+}}([[T1]]* noundef byval([[T1]]) align {{.+}})
+// CHECK: define{{ protected | }}void @{{.+}}foo1{{.+}}([[T1]]* noundef byval([[T1]]) align {{.+}})
 void foo1(T1 a = T1()) {
   return;
 }
-// CHECK: define{{ hidden | }}[[T1]] @{{.+}}bar1{{.+}}()
+// CHECK: define{{ protected | }}[[T1]] @{{.+}}bar1{{.+}}()
 T1 bar1() {
 // CHECK:      load [[T1]], [[T1]]*
 // CHECK-NEXT: ret [[T1]]
   return T1();
 }
-// CHECK: define{{ hidden | }}void @{{.+}}baz1{{.+}}()
+// CHECK: define{{ protected | }}void @{{.+}}baz1{{.+}}()
 void baz1() {
 // CHECK: call [[T1]] @{{.+}}bar1{{.+}}()
   T1 t = bar1();
diff --git a/clang/test/OpenMP/target_attribute_convergent.cpp b/clang/test/OpenMP/target_attribute_convergent.cpp
index 932214e987c86..9bea28789d3ba 100644
--- a/clang/test/OpenMP/target_attribute_convergent.cpp
+++ b/clang/test/OpenMP/target_attribute_convergent.cpp
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -o - | FileCheck %s
-// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -o - | FileCheck %s
+// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fvisibility protected -o - | FileCheck %s
 // expected-no-diagnostics
 
 #pragma omp declare target
@@ -9,5 +9,5 @@ void foo() {}
 #pragma omp end declare target
 
 // CHECK: Function Attrs: {{.*}}convergent{{.*}}
-// CHECK: define hidden void @_Z3foov() [[ATTRIBUTE_NUMBER:#[0-9]+]]
+// CHECK: define protected void @_Z3foov() [[ATTRIBUTE_NUMBER:#[0-9]+]]
 // CHECK: attributes [[ATTRIBUTE_NUMBER]] = { {{.*}}convergent{{.*}} }

From 26feef084616a18e9b61acb9c78dfca40f4c6f97 Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6@vols.utk.edu>
Date: Thu, 20 Jan 2022 13:06:22 -0500
Subject: [PATCH 119/946] [Libomptarget] Change visibility to hidden for device
 RTL

This patch changes the visibility for all construct in the new device
RTL to be hidden by default. This is done after the changes introduced
in D117806 changed the visibility from being hidden by default for all
device compilations. This asserts that the visibility for the device
runtime library will be hidden except for the internal environment
variable. This is done to aid optimization and linking of the device
library.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D117807
---
 openmp/libomptarget/DeviceRTL/CMakeLists.txt            | 4 ++--
 openmp/libomptarget/DeviceRTL/include/Types.h           | 3 +--
 openmp/libomptarget/DeviceRTL/src/Configuration.cpp     | 4 +++-
 openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt    | 2 +-
 openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h | 4 ++--
 openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt     | 1 +
 6 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
index 8185727ab84c2..2e52bdf4d90bf 100644
--- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -130,7 +130,7 @@ set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL "${LIBOMPTARGET_LLVM_INCLUDE_DIRS}
 list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I")
 
 # Set flags for LLVM Bitcode compilation.
-set(bc_flags -S -x c++ -std=c++17
+set(bc_flags -S -x c++ -std=c++17 -fvisibility=hidden
               ${clang_opt_flags}
              -Xclang -emit-llvm-bc
              -Xclang -aux-triple -Xclang ${aux_triple}
@@ -231,5 +231,5 @@ foreach(sm ${nvptx_sm_list})
 endforeach()
 
 foreach(mcpu ${amdgpu_mcpus})
-  compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -fvisibility=default -nogpulib)
+  compileDeviceRTLLibrary(${mcpu} amdgpu -target amdgcn-amd-amdhsa -D__AMDGCN__ -nogpulib)
 endforeach()
diff --git a/openmp/libomptarget/DeviceRTL/include/Types.h b/openmp/libomptarget/DeviceRTL/include/Types.h
index 8d9b48a0f1352..0ff0cee66b3f3 100644
--- a/openmp/libomptarget/DeviceRTL/include/Types.h
+++ b/openmp/libomptarget/DeviceRTL/include/Types.h
@@ -193,8 +193,7 @@ enum OMPTgtExecModeFlags : int8_t {
 // TODO: clang should use address space 5 for omp_thread_mem_alloc, but right
 //       now that's not the case.
 #define THREAD_LOCAL(NAME)                                                     \
-  NAME [[clang::loader_uninitialized, clang::address_space(5),                 \
-         gnu::visibility("hidden")]]
+  NAME [[clang::loader_uninitialized, clang::address_space(5)]]
 
 // TODO: clang should use address space 4 for omp_const_mem_alloc, maybe it
 //       does?
diff --git a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
index ee8a98d703618..349f93a08701c 100644
--- a/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Configuration.cpp
@@ -23,8 +23,10 @@ using namespace _OMP;
 extern uint32_t __omp_rtl_debug_kind; // defined by CGOpenMPRuntimeGPU
 
 // TODO: We want to change the name as soon as the old runtime is gone.
+// This variable should be visibile to the plugin so we override the default
+// hidden visibility.
 DeviceEnvironmentTy CONSTANT(omptarget_device_environment)
-    __attribute__((used, retain, weak));
+    __attribute__((used, retain, weak, visibility("protected")));
 
 uint32_t config::getDebugKind() {
   return __omp_rtl_debug_kind & omptarget_device_environment.DebugKind;
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
index 0207da1c8b5db..5ff5dde1c45e7 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@@ -132,7 +132,7 @@ macro(add_cuda_bc_library)
     -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device
     -D__AMDGCN__
     -Xclang -target-cpu -Xclang ${mcpu}
-    -fvisibility=default
+    -fvisibility=hidden
     -Wno-unused-value
     -nogpulib
     -O${optimization_level}
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index 1303258f17494..3ea7cdeb5b2bc 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -34,8 +34,8 @@ typedef uint64_t __kmpc_impl_lanemask_t;
   __attribute__((used))   /* Don't discard values the plugin reads */          \
   __attribute__((weak))   /* We may have multiple definitions */               \
   __attribute__((retain)) /* Also needed to keep values alive */               \
-  __attribute__((visibility("default"))) /* Access via SHT_HASH */             \
-  __attribute__((section(".data")))      /* Not .bss, can write before load */
+  __attribute__((visibility("protected"))) /* Access via SHT_HASH */           \
+  __attribute__((section(".data")))        /* Not .bss, can write before load */
 
 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
 
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index 3f1c4e75cbc16..16126891b6521 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -165,6 +165,7 @@ set(bc_flags -S -x c++ -O1 -std=c++14
              -mllvm -openmp-opt-disable
              -ffreestanding
              -target nvptx64
+             -fvisibility=hidden
              -Xclang -emit-llvm-bc
              -Xclang -aux-triple -Xclang ${aux_triple}
              -fopenmp -fopenmp-cuda-mode -Xclang -fopenmp-is-device

From 58580e922a69d94859a2506c3053d8c066a1e38c Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Thu, 20 Jan 2022 22:20:24 +0000
Subject: [PATCH 120/946] [llvm][cmake] Make `llvm_install_symlink` robust to
 absolute dirs.

If `CMAKE_INSTALL_BINDIR` is a different absolute path per project, as
it is with NixOS when we install every package to its own prefix, the
old way fails when the absolute path gets prepended.

There are still some issues with dowstream packages using `LLVM_TOOLS_INSTALL_DIR` which also may be absolute and just for LLVM proper, but that will be addressed in a future commit.

Differential Revision: https://reviews.llvm.org/D101070
---
 llvm/cmake/modules/LLVMInstallSymlink.cmake | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/cmake/modules/LLVMInstallSymlink.cmake b/llvm/cmake/modules/LLVMInstallSymlink.cmake
index b5c35f706cb7e..ef7f5ab5288de 100644
--- a/llvm/cmake/modules/LLVMInstallSymlink.cmake
+++ b/llvm/cmake/modules/LLVMInstallSymlink.cmake
@@ -6,7 +6,8 @@ include(GNUInstallDirs)
 
 function(install_symlink name target outdir)
   set(DESTDIR $ENV{DESTDIR})
-  set(bindir "${DESTDIR}${CMAKE_INSTALL_PREFIX}/${outdir}")
+  GNUInstallDirs_get_absolute_install_dir(bindir "${outdir}" BINDIR)
+  set(bindir "${DESTDIR}${bindir}")
 
   message(STATUS "Creating ${name}")
 

From a65934241c22ac834578c73f528b062fc53d80bf Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Fri, 21 Jan 2022 02:11:31 +0000
Subject: [PATCH 121/946] [gn build] Port 1755f5b1d7b7

---
 llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn
index 15c1262f53ae3..7aff2ae2a22a0 100644
--- a/llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Utility/BUILD.gn
@@ -22,6 +22,7 @@ static_library("Utility") {
     "FileSpec.cpp",
     "GDBRemote.cpp",
     "IOObject.cpp",
+    "Instrumentation.cpp",
     "LLDBAssert.cpp",
     "Listener.cpp",
     "Log.cpp",
@@ -31,7 +32,6 @@ static_library("Utility") {
     "RegisterValue.cpp",
     "RegularExpression.cpp",
     "Reproducer.cpp",
-    "ReproducerInstrumentation.cpp",
     "ReproducerProvider.cpp",
     "Scalar.cpp",
     "SelectHelper.cpp",

From cac164ff9cb671fae7a00d9a3adf1834ebca8bd9 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 20 Jan 2022 18:28:17 -0800
Subject: [PATCH 122/946] [lldb] Update the modulemap

---
 lldb/include/lldb/module.modulemap | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/include/lldb/module.modulemap b/lldb/include/lldb/module.modulemap
index c0d467a6505eb..303d6b15e808c 100644
--- a/lldb/include/lldb/module.modulemap
+++ b/lldb/include/lldb/module.modulemap
@@ -2,7 +2,7 @@
 module lldb_API {
   requires cplusplus
 
-  textual header "Utility/ReproducerInstrumentation.h"
+  textual header "Utility/Instrumentation.h"
 
   umbrella "API"
   module * { export * }

From d93a11c138bb57f0ecca234867dc65eed35ccba3 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Fri, 21 Jan 2022 02:48:10 +0000
Subject: [PATCH 123/946] Revert "[llvm][cmake] Make `llvm_install_symlink`
 robust to absolute dirs."

https://lab.llvm.org/buildbot/#/builders/36/builds/16668 was the sort of
thing I saw before when this was part of D99484, and it makes some sense
now this would have something to do with it.

This reverts commit 58580e922a69d94859a2506c3053d8c066a1e38c.
---
 llvm/cmake/modules/LLVMInstallSymlink.cmake | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/cmake/modules/LLVMInstallSymlink.cmake b/llvm/cmake/modules/LLVMInstallSymlink.cmake
index ef7f5ab5288de..b5c35f706cb7e 100644
--- a/llvm/cmake/modules/LLVMInstallSymlink.cmake
+++ b/llvm/cmake/modules/LLVMInstallSymlink.cmake
@@ -6,8 +6,7 @@ include(GNUInstallDirs)
 
 function(install_symlink name target outdir)
   set(DESTDIR $ENV{DESTDIR})
-  GNUInstallDirs_get_absolute_install_dir(bindir "${outdir}" BINDIR)
-  set(bindir "${DESTDIR}${bindir}")
+  set(bindir "${DESTDIR}${CMAKE_INSTALL_PREFIX}/${outdir}")
 
   message(STATUS "Creating ${name}")
 

From 82af95029ec947fed8b9c516f04d4f217bd87930 Mon Sep 17 00:00:00 2001
From: Joao Moreira <joao.moreira@intel.com>
Date: Fri, 21 Jan 2022 09:31:21 +0800
Subject: [PATCH 124/946] [X86] Enable ibt-seal optimization when LTO is used
 in Kernel

Intel's CET/IBT requires every indirect branch target to be an ENDBR instruction. Because of that, the compiler needs to correctly emit these instruction on function's prologues. Because this is a security feature, it is desirable that only actual indirect-branch-targeted functions are emitted with ENDBRs. While it is possible to identify address-taken functions through LTO, minimizing these ENDBR instructions remains a hard task for user-space binaries because exported functions may end being reachable through PLT entries, that will use an indirect branch for such. Because this cannot be determined during compilation-time, the compiler currently emits ENDBRs to every non-local-linkage function.

Despite the challenge presented for user-space, the kernel landscape is different as no PLTs are used. With the intent of providing the most fit ENDBR emission for the kernel, kernel developers proposed an optimization named "ibt-seal" which replaces the ENDBRs for NOPs directly in the binary. The discussion of this feature can be seen in [1].

This diff brings the enablement of the flag -mibt-seal, which in combination with LTO enforces a different policy for ENDBR placement in when the code-model is set to "kernel". In this scenario, the compiler will only emit ENDBRs to address taken functions, ignoring non-address taken functions that are don't have local linkage.

A comparison between an LTO-compiled kernel binaries without and with the -mibt-seal feature enabled shows that when -mibt-seal was used, the number of ENDBRs in the vmlinux.o binary patched by objtool decreased from 44383 to 33192, and that the number of superfluous ENDBR instructions nopped-out decreased from 11730 to 540.

The 540 missed superfluous ENDBRs need to be investigated further, but hypotheses are: assembly code not being taken care of by the compiler, kernel exported symbols mechanisms creating bogus address taken situations or even these being removed due to other binary optimizations like kernel's static_calls. For now, I assume that the large drop in the number of ENDBR instructions already justifies the feature being merged.

[1] - https://lkml.org/lkml/2021/11/22/591

Reviewed By: xiangzhangllvm

Differential Revision: https://reviews.llvm.org/D116070
---
 clang/include/clang/Basic/CodeGenOptions.def  |  2 +
 clang/include/clang/Driver/Options.td         |  2 +
 clang/lib/CodeGen/CodeGenModule.cpp           |  3 ++
 clang/lib/Driver/ToolChains/Clang.cpp         |  3 ++
 clang/lib/Frontend/CompilerInvocation.cpp     |  3 ++
 .../Target/X86/X86IndirectBranchTracking.cpp  | 48 ++++++++++++++-----
 llvm/test/CodeGen/X86/ibtseal-kernel.ll       | 19 ++++++++
 llvm/test/CodeGen/X86/ibtseal-large.ll        | 19 ++++++++
 llvm/test/CodeGen/X86/ibtseal-small.ll        | 19 ++++++++
 9 files changed, 107 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/ibtseal-kernel.ll
 create mode 100644 llvm/test/CodeGen/X86/ibtseal-large.ll
 create mode 100644 llvm/test/CodeGen/X86/ibtseal-small.ll

diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def
index 3526b8a4a9044..0da875525c0c4 100644
--- a/clang/include/clang/Basic/CodeGenOptions.def
+++ b/clang/include/clang/Basic/CodeGenOptions.def
@@ -107,6 +107,8 @@ CODEGENOPT(CFProtectionReturn , 1, 0) ///< if -fcf-protection is
                                       ///< set to full or return.
 CODEGENOPT(CFProtectionBranch , 1, 0) ///< if -fcf-protection is
                                       ///< set to full or branch.
+CODEGENOPT(IBTSeal, 1, 0)             ///< set to optimize CFProtectionBranch.
+
 CODEGENOPT(XRayInstrumentFunctions , 1, 0) ///< Set when -fxray-instrument is
                                            ///< enabled.
 CODEGENOPT(StackSizeSection  , 1, 0) ///< Set when -fstack-size-section is enabled.
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index b66363b1d3e92..49ceebcb51cf5 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1927,6 +1927,8 @@ def fcf_protection_EQ : Joined<["-"], "fcf-protection=">, Flags<[CoreOption, CC1
 def fcf_protection : Flag<["-"], "fcf-protection">, Group<f_Group>, Flags<[CoreOption, CC1Option]>,
   Alias<fcf_protection_EQ>, AliasArgs<["full"]>,
   HelpText<"Enable cf-protection in 'full' mode">;
+def mibt_seal : Flag<["-"], "mibt-seal">, Group<m_Group>, Flags<[CoreOption, CC1Option]>,
+  HelpText<"Optimize fcf-protection=branch/full (requires LTO).">;
 
 defm xray_instrument : BoolFOption<"xray-instrument",
   LangOpts<"XRayInstrument">, DefaultFalse,
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index e91da73d2f03c..d534cf182f5a7 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -712,6 +712,9 @@ void CodeGenModule::Release() {
                               1);
   }
 
+  if (CodeGenOpts.IBTSeal)
+    getModule().addModuleFlag(llvm::Module::Override, "ibt-seal", 1);
+
   // Add module metadata for return address signing (ignoring
   // non-leaf/all) and stack tagging. These are actually turned on by function
   // attributes, but we use module metadata to emit build attributes. This is
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index dd2570132ddf7..52d576345c027 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -6166,6 +6166,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
         Args.MakeArgString(Twine("-fcf-protection=") + A->getValue()));
   }
 
+  if (IsUsingLTO)
+    Args.AddLastArg(CmdArgs, options::OPT_mibt_seal);
+
   // Forward -f options with positive and negative forms; we translate these by
   // hand.  Do not propagate PGO options to the GPU-side compilations as the
   // profile info is for the host-side compilation only.
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 7727d70adfb1e..eaca1fbb9def8 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1814,6 +1814,9 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args,
       Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Name;
   }
 
+  if (Opts.PrepareForLTO && Args.hasArg(OPT_mibt_seal))
+    Opts.IBTSeal = 1;
+
   for (auto *A :
        Args.filtered(OPT_mlink_bitcode_file, OPT_mlink_builtin_bitcode)) {
     CodeGenOptions::BitcodeFileToLink F;
diff --git a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
index 6642f46e64b2f..7e751a4c8811e 100644
--- a/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
+++ b/llvm/lib/Target/X86/X86IndirectBranchTracking.cpp
@@ -95,14 +95,45 @@ static bool IsCallReturnTwice(llvm::MachineOperand &MOp) {
   return Attrs.hasFnAttr(Attribute::ReturnsTwice);
 }
 
+// Checks if function should have an ENDBR in its prologue
+static bool needsPrologueENDBR(MachineFunction &MF, const Module *M) {
+  Function &F = MF.getFunction();
+
+  if (F.doesNoCfCheck())
+    return false;
+
+  const X86TargetMachine *TM =
+      static_cast<const X86TargetMachine *>(&MF.getTarget());
+  Metadata *IBTSeal = M->getModuleFlag("ibt-seal");
+
+  switch (TM->getCodeModel()) {
+  // Large code model functions always reachable through indirect calls.
+  case CodeModel::Large:
+    return true;
+  // Only address taken functions in LTO'ed kernel are reachable indirectly.
+  // IBTSeal implies LTO, thus only check if function is address taken.
+  case CodeModel::Kernel:
+    // Check if ibt-seal was enabled (implies LTO is being used).
+    if (IBTSeal) {
+      return F.hasAddressTaken();
+    }
+    // if !IBTSeal, fall into default case.
+    LLVM_FALLTHROUGH;
+  // Address taken or externally linked functions may be reachable.
+  default:
+    return (F.hasAddressTaken() || !F.hasLocalLinkage());
+  }
+}
+
 bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
   const X86Subtarget &SubTarget = MF.getSubtarget<X86Subtarget>();
 
+  const Module *M = MF.getMMI().getModule();
   // Check that the cf-protection-branch is enabled.
-  Metadata *isCFProtectionSupported =
-      MF.getMMI().getModule()->getModuleFlag("cf-protection-branch");
-  // NB: We need to enable IBT in jitted code if JIT compiler is CET
-  // enabled.
+  Metadata *isCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
+
+  //  NB: We need to enable IBT in jitted code if JIT compiler is CET
+  //  enabled.
   const X86TargetMachine *TM =
       static_cast<const X86TargetMachine *>(&MF.getTarget());
 #ifdef __CET__
@@ -119,13 +150,8 @@ bool X86IndirectBranchTrackingPass::runOnMachineFunction(MachineFunction &MF) {
   TII = SubTarget.getInstrInfo();
   EndbrOpcode = SubTarget.is64Bit() ? X86::ENDBR64 : X86::ENDBR32;
 
-  // Large code model, non-internal function or function whose address
-  // was taken, can be accessed through indirect calls. Mark the first
-  // BB with ENDBR instruction unless nocf_check attribute is used.
-  if ((TM->getCodeModel() == CodeModel::Large ||
-       MF.getFunction().hasAddressTaken() ||
-       !MF.getFunction().hasLocalLinkage()) &&
-      !MF.getFunction().doesNoCfCheck()) {
+  // If function is reachable indirectly, mark the first BB with ENDBR.
+  if (needsPrologueENDBR(MF, M)) {
     auto MBB = MF.begin();
     Changed |= addENDBR(*MBB, MBB->begin());
   }
diff --git a/llvm/test/CodeGen/X86/ibtseal-kernel.ll b/llvm/test/CodeGen/X86/ibtseal-kernel.ll
new file mode 100644
index 0000000000000..eb98515f01f81
--- /dev/null
+++ b/llvm/test/CodeGen/X86/ibtseal-kernel.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-unknown-linux-gnu -x86-indirect-branch-tracking --code-model=kernel | FileCheck %s --check-prefix=CHECK-KERNEL-IBTSEAL
+
+; CHECK-KERNEL-IBTSEAL: foo:
+; CHECK-KERNEL-IBTSEAL: endbr
+; CHECK-KERNEL-IBTSEAL: bar:
+; CHECK-KERNEL-IBTSEAL-NOT: endbr
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local void @foo() {
+  ret void
+}
+
+define dso_local i8* @bar() {
+  ret i8* bitcast (void ()* @foo to i8*)
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 4, !"ibt-seal", i32 1}
diff --git a/llvm/test/CodeGen/X86/ibtseal-large.ll b/llvm/test/CodeGen/X86/ibtseal-large.ll
new file mode 100644
index 0000000000000..e48ac8eb19a7b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/ibtseal-large.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-unknown-linux-gnu -x86-indirect-branch-tracking --code-model=large | FileCheck %s --check-prefix=CHECK-LARGE-IBTSEAL
+
+; CHECK-LARGE-IBTSEAL: foo:
+; CHECK-LARGE-IBTSEAL: endbr
+; CHECK-LARGE-IBTSEAL: bar:
+; CHECK-LARGE-IBTSEAL: endbr
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local void @foo() {
+  ret void
+}
+
+define dso_local i8* @bar() {
+  ret i8* bitcast (void ()* @foo to i8*)
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 4, !"ibt-seal", i32 1}
diff --git a/llvm/test/CodeGen/X86/ibtseal-small.ll b/llvm/test/CodeGen/X86/ibtseal-small.ll
new file mode 100644
index 0000000000000..3d810d89a26c6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/ibtseal-small.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -O2 -mtriple=x86_64-unknown-linux-gnu -x86-indirect-branch-tracking --code-model=small | FileCheck %s --check-prefix=CHECK-SMALL-IBTSEAL
+
+; CHECK-SMALL-IBTSEAL: foo:
+; CHECK-SMALL-IBTSEAL: endbr
+; CHECK-SMALL-IBTSEAL: bar:
+; CHECK-SMALL-IBTSEAL: endbr
+
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local void @foo() {
+  ret void
+}
+
+define dso_local i8* @bar() {
+  ret i8* bitcast (void ()* @foo to i8*)
+}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 4, !"ibt-seal", i32 1}

From 7ee1c162cc53d37f717f9a138276ad64fa6863bc Mon Sep 17 00:00:00 2001
From: Wu Xinlong <821408745@qq.com>
Date: Thu, 20 Jan 2022 16:35:55 +0800
Subject: [PATCH 125/946] [RISCV][RFC] add inst support of zbkb

This commit add instructions supports of `zbkb` which defined in scalar cryptography extension version v1.0.0 (has been ratified already).

Most of the zbkb directives reuse parts of the zbp and zbb directives, so this patch just modified some of the inst aliases and predicates.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117640
---
 llvm/lib/Support/RISCVISAInfo.cpp          |  2 +
 llvm/lib/Target/RISCV/RISCV.td             | 20 +++++++++
 llvm/lib/Target/RISCV/RISCVInstrInfoZb.td  | 48 +++++++++++++---------
 llvm/lib/Target/RISCV/RISCVSchedRocket.td  |  2 +-
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td |  2 +-
 llvm/lib/Target/RISCV/RISCVSubtarget.h     |  2 +
 llvm/test/CodeGen/RISCV/attributes.ll      |  4 ++
 llvm/test/CodeGen/RISCV/rv32zbp.ll         | 14 +++----
 llvm/test/CodeGen/RISCV/rv64zbp.ll         |  4 +-
 llvm/test/MC/RISCV/attribute-arch.s        |  3 ++
 llvm/test/MC/RISCV/rv32zbkb-only-valid.s   | 16 ++++++++
 llvm/test/MC/RISCV/rv32zbkb-valid.s        | 45 ++++++++++++++++++++
 llvm/test/MC/RISCV/rv32zbp-aliases-valid.s | 10 +----
 llvm/test/MC/RISCV/rv32zbp-only-valid.s    |  6 +++
 llvm/test/MC/RISCV/rv32zbp-valid.s         |  2 +-
 llvm/test/MC/RISCV/rv64-zbkb-valid.s       | 20 +++++++++
 llvm/test/MC/RISCV/rv64zbkb-only-valid.s   |  9 ++++
 llvm/test/MC/RISCV/rv64zbp-aliases-valid.s |  2 +-
 18 files changed, 170 insertions(+), 41 deletions(-)
 create mode 100644 llvm/test/MC/RISCV/rv32zbkb-only-valid.s
 create mode 100644 llvm/test/MC/RISCV/rv32zbkb-valid.s
 create mode 100644 llvm/test/MC/RISCV/rv64-zbkb-valid.s
 create mode 100644 llvm/test/MC/RISCV/rv64zbkb-only-valid.s

diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index fc52fc6803439..0d9b6e4fa4bb5 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -55,6 +55,8 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
     {"zbb", RISCVExtensionVersion{1, 0}},
     {"zbc", RISCVExtensionVersion{1, 0}},
     {"zbs", RISCVExtensionVersion{1, 0}},
+
+    {"zbkb", RISCVExtensionVersion{1, 0}},
 };
 
 static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index b5d2bd01d3552..378720bc6b26d 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -143,6 +143,26 @@ def HasStdExtZbbOrZbp
                                    "'Zbb' (Base 'B' Instructions) or "
                                    "'Zbp' (Permutation 'B' Instructions)">;
 
+def FeatureStdExtZbkb
+    : SubtargetFeature<"zbkb", "HasStdExtZbkb", "true",
+                       "'Zbkb' (Bitmanip instructions for Cryptography)">;
+def HasStdExtZbkb : Predicate<"Subtarget->hasStdExtZbkb()">,
+                             AssemblerPredicate<(all_of FeatureStdExtZbkb),
+                             "'Zbkb' (Bitmanip instructions for Cryptography)">;
+
+def HasStdExtZbpOrZbkb
+    : Predicate<"Subtarget->hasStdExtZbp() || Subtarget->hasStdExtZbkb()">,
+                AssemblerPredicate<(any_of FeatureStdExtZbp, FeatureStdExtZbkb),
+                                   "'Zbp' (Permutation 'B' Instructions) or "
+                                   "'Zbkb' (Bitmanip instructions for Cryptography)">;
+
+def HasStdExtZbbOrZbpOrZbkb
+    : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp() || Subtarget->hasStdExtZbkb()">,
+                AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbp, FeatureStdExtZbkb),
+                                   "'Zbb' (Base 'B' Instructions) or "
+                                   "'Zbp' (Permutation 'B' Instructions) or "
+                                   "'Zbkb' (Bitmanip instructions for Cryptography)">;
+
 def FeatureNoRVCHints
     : SubtargetFeature<"no-rvc-hints", "EnableRVCHintInstrs", "false",
                        "Disable RVC Hint Instructions.">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index f23fd2adb665b..f8030b35da3a2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -313,14 +313,14 @@ class RVBTernaryImm5<bits<2> funct2, bits<3> funct3, RISCVOpcode opcode,
 // Instructions
 //===----------------------------------------------------------------------===//
 
-let Predicates = [HasStdExtZbbOrZbp] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
 def ANDN  : ALU_rr<0b0100000, 0b111, "andn">,
             Sched<[WriteIALU, ReadIALU, ReadIALU]>;
 def ORN   : ALU_rr<0b0100000, 0b110, "orn">,
             Sched<[WriteIALU, ReadIALU, ReadIALU]>;
 def XNOR  : ALU_rr<0b0100000, 0b100, "xnor">,
             Sched<[WriteIALU, ReadIALU, ReadIALU]>;
-} // Predicates = [HasStdExtZbbOrZbp]
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb]
 
 let Predicates = [HasStdExtZba] in {
 def SH1ADD : ALU_rr<0b0010000, 0b010, "sh1add">,
@@ -331,12 +331,12 @@ def SH3ADD : ALU_rr<0b0010000, 0b110, "sh3add">,
              Sched<[WriteSHXADD, ReadSHXADD, ReadSHXADD]>;
 } // Predicates = [HasStdExtZba]
 
-let Predicates = [HasStdExtZbbOrZbp] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
 def ROL   : ALU_rr<0b0110000, 0b001, "rol">,
             Sched<[WriteRotateReg, ReadRotateReg, ReadRotateReg]>;
 def ROR   : ALU_rr<0b0110000, 0b101, "ror">,
             Sched<[WriteRotateReg, ReadRotateReg, ReadRotateReg]>;
-} // Predicates = [HasStdExtZbbOrZbp]
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb]
 
 let Predicates = [HasStdExtZbs] in {
 def BCLR : ALU_rr<0b0100100, 0b001, "bclr">,
@@ -360,7 +360,7 @@ def XPERMB : ALU_rr<0b0010100, 0b100, "xperm.b">, Sched<[]>;
 def XPERMH : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>;
 } // Predicates = [HasStdExtZbp]
 
-let Predicates = [HasStdExtZbbOrZbp] in
+let Predicates = [HasStdExtZbbOrZbpOrZbkb] in
 def RORI  : RVBShift_ri<0b01100, 0b101, OPC_OP_IMM, "rori">,
             Sched<[WriteRotateImm, ReadRotateImm]>;
 
@@ -471,11 +471,13 @@ def BDECOMPRESS : ALU_rr<0b0100100, 0b110, "bdecompress">, Sched<[]>;
 def BCOMPRESS   : ALU_rr<0b0000100, 0b110, "bcompress">, Sched<[]>;
 } // Predicates = [HasStdExtZbe]
 
-let Predicates = [HasStdExtZbp] in {
+let Predicates = [HasStdExtZbpOrZbkb] in {
 def PACK  : ALU_rr<0b0000100, 0b100, "pack">, Sched<[]>;
-def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>;
 def PACKH : ALU_rr<0b0000100, 0b111, "packh">, Sched<[]>;
-} // Predicates = [HasStdExtZbp]
+} // Predicates = [HasStdExtZbpOrZbkb]
+
+let Predicates = [HasStdExtZbp] in 
+def PACKU : ALU_rr<0b0100100, 0b100, "packu">, Sched<[]>;
 
 let Predicates = [HasStdExtZbm, IsRV64] in {
 def BMATOR   : ALU_rr<0b0000100, 0b011, "bmator">, Sched<[]>;
@@ -504,7 +506,7 @@ def SH3ADDUW : ALUW_rr<0b0010000, 0b110, "sh3add.uw">,
                Sched<[WriteSHXADD32, ReadSHXADD32, ReadSHXADD32]>;
 } // Predicates = [HasStdExtZbb, IsRV64]
 
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
 def ROLW  : ALUW_rr<0b0110000, 0b001, "rolw">,
             Sched<[WriteRotateReg32, ReadRotateReg32, ReadRotateReg32]>;
 def RORW  : ALUW_rr<0b0110000, 0b101, "rorw">,
@@ -520,7 +522,7 @@ let Predicates = [HasStdExtZbp, IsRV64] in {
 def XPERMW : ALU_rr<0b0010100, 0b000, "xperm.w">, Sched<[]>;
 } // Predicates = [HasStdExtZbp, IsRV64]
 
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in
 def RORIW : RVBShiftW_ri<0b0110000, 0b101, OPC_OP_IMM_32, "roriw">,
             Sched<[WriteRotateImm32, ReadRotateImm32]>;
 
@@ -559,10 +561,11 @@ def BDECOMPRESSW : ALUW_rr<0b0100100, 0b110, "bdecompressw">, Sched<[]>;
 def BCOMPRESSW   : ALUW_rr<0b0000100, 0b110, "bcompressw">, Sched<[]>;
 } // Predicates = [HasStdExtZbe, IsRV64]
 
-let Predicates = [HasStdExtZbp, IsRV64] in {
+let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in 
 def PACKW  : ALUW_rr<0b0000100, 0b100, "packw">, Sched<[]>;
+
+let Predicates = [HasStdExtZbp, IsRV64] in 
 def PACKUW : ALUW_rr<0b0100100, 0b100, "packuw">, Sched<[]>;
-} // Predicates = [HasStdExtZbp, IsRV64]
 
 let Predicates = [HasStdExtZbf, IsRV64] in
 def BFPW : ALUW_rr<0b0100100, 0b111, "bfpw">,
@@ -593,21 +596,21 @@ def ZEXTH_RV64 : RVInstR<0b0000100, 0b100, OPC_OP_32, (outs GPR:$rd),
 // causes diagnostics to suggest that Zbp rather than Zbb is required for rev8
 // or gorci. Since Zbb is closer to being finalized than Zbp this will be
 // misleading to users.
-let Predicates = [HasStdExtZbbOrZbp, IsRV32] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV32] in {
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def REV8_RV32 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
                         "rev8", "$rd, $rs1">, Sched<[WriteREV8, ReadREV8]> {
   let imm12 = { 0b01101, 0b0011000 };
 }
-} // Predicates = [HasStdExtZbbOrZbp, IsRV32]
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV32]
 
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 def REV8_RV64 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
                         "rev8", "$rd, $rs1">, Sched<[WriteREV8, ReadREV8]> {
   let imm12 = { 0b01101, 0b0111000 };
 }
-} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64]
 
 let Predicates = [HasStdExtZbbOrZbp] in {
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
@@ -617,6 +620,15 @@ def ORCB : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
 }
 } // Predicates = [HasStdExtZbbOrZbp]
 
+let Predicates = [HasStdExtZbpOrZbkb] in 
+def BREV8 : RVBUnary<0b0110100, 0b00111, 0b101, OPC_OP_IMM, "brev8">;
+
+let Predicates = [HasStdExtZbpOrZbkb, IsRV32] in {
+def ZIP_RV32   : RVBUnary<0b0000100, 0b01111, 0b001, OPC_OP_IMM, "zip">;
+def UNZIP_RV32 : RVBUnary<0b0000100, 0b01111, 0b101, OPC_OP_IMM, "unzip">;
+} // Predicates = [HasStdExtZbkb, IsRV32]
+
+
 //===----------------------------------------------------------------------===//
 // Pseudo Instructions
 //===----------------------------------------------------------------------===//
@@ -631,11 +643,11 @@ def : InstAlias<"rev2.n $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00010)>;
 def : InstAlias<"rev.n $rd, $rs",  (GREVI GPR:$rd, GPR:$rs, 0b00011)>;
 def : InstAlias<"rev4.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00100)>;
 def : InstAlias<"rev2.b $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b00110)>;
-def : InstAlias<"rev.b $rd, $rs",  (GREVI GPR:$rd, GPR:$rs, 0b00111)>;
 def : InstAlias<"rev8.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01000)>;
 def : InstAlias<"rev4.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01100)>;
 def : InstAlias<"rev2.h $rd, $rs", (GREVI GPR:$rd, GPR:$rs, 0b01110)>;
 def : InstAlias<"rev.h $rd, $rs",  (GREVI GPR:$rd, GPR:$rs, 0b01111)>;
+def : InstAlias<"rev.b $rd, $rs",  (BREV8 GPR:$rd, GPR:$rs)>;
 
 def : InstAlias<"zip.n $rd, $rs",    (SHFLI   GPR:$rd, GPR:$rs, 0b0001)>;
 def : InstAlias<"unzip.n $rd, $rs",  (UNSHFLI GPR:$rd, GPR:$rs, 0b0001)>;
@@ -675,8 +687,6 @@ def : InstAlias<"zip4 $rd, $rs",   (SHFLI   GPR:$rd, GPR:$rs, 0b1100)>;
 def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1100)>;
 def : InstAlias<"zip2 $rd, $rs",   (SHFLI   GPR:$rd, GPR:$rs, 0b1110)>;
 def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1110)>;
-def : InstAlias<"zip $rd, $rs",    (SHFLI   GPR:$rd, GPR:$rs, 0b1111)>;
-def : InstAlias<"unzip $rd, $rs",  (UNSHFLI GPR:$rd, GPR:$rs, 0b1111)>;
 
 def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b10000)>;
 def : InstAlias<"orc8 $rd, $rs",  (GORCI GPR:$rd, GPR:$rs, 0b11000)>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index b907ada3a1d5a..6cc24fa17c84a 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -17,7 +17,7 @@ def RocketModel : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = false;
-  let UnsupportedFeatures = [HasVInstructions, HasVInstructionsI64];
+  let UnsupportedFeatures = [HasStdExtZbkb, HasVInstructions, HasVInstructionsI64];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 5672637a40cc2..2da8c14088890 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -15,7 +15,7 @@ def SiFive7Model : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = 0;
-  let UnsupportedFeatures = [HasVInstructions];
+  let UnsupportedFeatures = [HasStdExtZbkb, HasVInstructions];
 }
 
 // The SiFive7 microarchitecture has two pipelines: A and B.
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 4b5958ad38d9c..141e7114b5883 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -83,6 +83,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool HasStdExtZve64d = false;
   bool HasStdExtZfhmin = false;
   bool HasStdExtZfh = false;
+  bool HasStdExtZbkb = false;
   bool HasRV64 = false;
   bool IsRV32E = false;
   bool EnableLinkerRelax = false;
@@ -156,6 +157,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool hasStdExtZvl() const { return ZvlLen != ExtZvl::NotSet; }
   bool hasStdExtZfhmin() const { return HasStdExtZfhmin; }
   bool hasStdExtZfh() const { return HasStdExtZfh; }
+  bool hasStdExtZbkb() const { return HasStdExtZbkb; }
   bool is64Bit() const { return HasRV64; }
   bool isRV32E() const { return IsRV32E; }
   bool enableLinkerRelax() const { return EnableLinkerRelax; }
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index 59790c9967111..dd4a340edeac4 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -19,6 +19,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt %s -o - | FileCheck --check-prefix=RV32ZBT %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV32V %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV32COMBINED %s
+; RUN: llc -mtriple=riscv32 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV32ZBKB %s
 ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefix=RV64M %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefix=RV64A %s
 ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefix=RV64F %s
@@ -38,6 +39,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt %s -o - | FileCheck --check-prefix=RV64ZBT %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV64V %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV64COMBINED %s
+; RUN: llc -mtriple=riscv64 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV64ZBKB %s
 
 
 ; RV32M: .attribute 5, "rv32i2p0_m2p0"
@@ -59,6 +61,7 @@
 ; RV32ZBT: .attribute 5, "rv32i2p0_zbt0p93"
 ; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 ; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
+; RV32ZBKB: .attribute 5, "rv32i2p0_zbkb1p0"
 
 ; RV64M: .attribute 5, "rv64i2p0_m2p0"
 ; RV64A: .attribute 5, "rv64i2p0_a2p0"
@@ -79,6 +82,7 @@
 ; RV64ZBT: .attribute 5, "rv64i2p0_zbt0p93"
 ; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
 ; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
+; RV64ZBKB: .attribute 5, "rv64i2p0_zbkb1p0"
 
 define i32 @addi(i32 %a) {
   %1 = add i32 %a, 1
diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll
index 026a27b691196..1e2b0322b3688 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll
@@ -1491,7 +1491,7 @@ define i32 @grev7_i32(i32 %a) nounwind {
 ;
 ; RV32ZBP-LABEL: grev7_i32:
 ; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    rev.b a0, a0
+; RV32ZBP-NEXT:    grevi a0, a0, 7
 ; RV32ZBP-NEXT:    ret
   %and1 = shl i32 %a, 1
   %shl1 = and i32 %and1, -1431655766
@@ -1560,8 +1560,8 @@ define i64 @grev7_i64(i64 %a) nounwind {
 ;
 ; RV32ZBP-LABEL: grev7_i64:
 ; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    rev.b a0, a0
-; RV32ZBP-NEXT:    rev.b a1, a1
+; RV32ZBP-NEXT:    grevi a0, a0, 7
+; RV32ZBP-NEXT:    grevi a1, a1, 7
 ; RV32ZBP-NEXT:    ret
   %and1 = shl i64 %a, 1
   %shl1 = and i64 %and1, -6148914691236517206
@@ -2175,7 +2175,7 @@ define zeroext i8 @bitreverse_i8(i8 zeroext %a) nounwind {
 ;
 ; RV32ZBP-LABEL: bitreverse_i8:
 ; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    rev.b a0, a0
+; RV32ZBP-NEXT:    grevi a0, a0, 7
 ; RV32ZBP-NEXT:    ret
   %1 = tail call i8 @llvm.bitreverse.i8(i8 %a)
   ret i8 %1
@@ -2450,7 +2450,7 @@ define i32 @bitreverse_bswap_i32(i32 %a) {
 ;
 ; RV32ZBP-LABEL: bitreverse_bswap_i32:
 ; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    rev.b a0, a0
+; RV32ZBP-NEXT:    grevi a0, a0, 7
 ; RV32ZBP-NEXT:    ret
   %1 = call i32 @llvm.bitreverse.i32(i32 %a)
   %2 = call i32 @llvm.bswap.i32(i32 %1)
@@ -2539,8 +2539,8 @@ define i64 @bitreverse_bswap_i64(i64 %a) {
 ;
 ; RV32ZBP-LABEL: bitreverse_bswap_i64:
 ; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    rev.b a0, a0
-; RV32ZBP-NEXT:    rev.b a1, a1
+; RV32ZBP-NEXT:    grevi a0, a0, 7
+; RV32ZBP-NEXT:    grevi a1, a1, 7
 ; RV32ZBP-NEXT:    ret
   %1 = call i64 @llvm.bitreverse.i64(i64 %a)
   %2 = call i64 @llvm.bswap.i64(i64 %1)
diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll
index 674ffcff180de..5b74675cdb3ff 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll
@@ -1438,7 +1438,7 @@ define i64 @grev7_i64(i64 %a) nounwind {
 ;
 ; RV64ZBP-LABEL: grev7_i64:
 ; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    rev.b a0, a0
+; RV64ZBP-NEXT:    grevi a0, a0, 7
 ; RV64ZBP-NEXT:    ret
   %and1 = shl i64 %a, 1
   %shl1 = and i64 %and1, -6148914691236517206
@@ -2481,7 +2481,7 @@ define i64 @bitreverse_bswap_i64(i64 %a) {
 ;
 ; RV64ZBP-LABEL: bitreverse_bswap_i64:
 ; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    rev.b a0, a0
+; RV64ZBP-NEXT:    grevi a0, a0, 7
 ; RV64ZBP-NEXT:    ret
   %1 = call i64 @llvm.bitreverse.i64(i64 %a)
   %2 = call i64 @llvm.bswap.i64(i64 %1)
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index 3d7be42caeb56..703b6a6aa8105 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -127,3 +127,6 @@
 
 .attribute arch, "rv32ifd_zve64d0p10"
 # CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10"
+
+.attribute arch, "rv32i_zbkb1p0"
+# CHECK: attribute      5, "rv32i2p0_zbkb1p0"
diff --git a/llvm/test/MC/RISCV/rv32zbkb-only-valid.s b/llvm/test/MC/RISCV/rv32zbkb-only-valid.s
new file mode 100644
index 0000000000000..a04ff04ba3f10
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zbkb-only-valid.s
@@ -0,0 +1,16 @@
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+zbkb -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zbkb < %s \
+# RUN:     | llvm-objdump --mattr=+zbkb -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: rev8 t0, t1
+# CHECK-ASM: encoding: [0x93,0x52,0x83,0x69]
+rev8 t0, t1
+
+# CHECK-ASM-AND-OBJ: zip t0, t1
+# CHECK-ASM: encoding: [0x93,0x12,0xf3,0x08]
+zip t0, t1
+# CHECK-S-OBJ-NOALIAS: unzip t0, t1
+# CHECK-ASM: encoding: [0x93,0x52,0xf3,0x08]
+unzip t0, t1
diff --git a/llvm/test/MC/RISCV/rv32zbkb-valid.s b/llvm/test/MC/RISCV/rv32zbkb-valid.s
new file mode 100644
index 0000000000000..36e0a27922776
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zbkb-valid.s
@@ -0,0 +1,45 @@
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+zbkb -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zbkb -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zbkb < %s \
+# RUN:     | llvm-objdump --mattr=+zbkb -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zbkb < %s \
+# RUN:     | llvm-objdump --mattr=+zbkb -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: ror t0, t1, t2
+# CHECK-ASM: encoding: [0xb3,0x52,0x73,0x60]
+ror t0, t1, t2
+# CHECK-ASM-AND-OBJ: rol t0, t1, t2
+# CHECK-ASM: encoding: [0xb3,0x12,0x73,0x60]
+rol t0, t1, t2
+# CHECK-ASM-AND-OBJ: rori t0, t1, 31
+# CHECK-ASM: encoding: [0x93,0x52,0xf3,0x61]
+rori t0, t1, 31
+# CHECK-ASM-AND-OBJ: rori t0, t1, 0
+# CHECK-ASM: encoding: [0x93,0x52,0x03,0x60]
+rori t0, t1, 0
+
+# CHECK-ASM-AND-OBJ: andn t0, t1, t2
+# CHECK-ASM: encoding: [0xb3,0x72,0x73,0x40]
+andn t0, t1, t2
+# CHECK-ASM-AND-OBJ: orn t0, t1, t2
+# CHECK-ASM: encoding: [0xb3,0x62,0x73,0x40]
+orn t0, t1, t2
+# CHECK-ASM-AND-OBJ: xnor t0, t1, t2
+# CHECK-ASM: encoding: [0xb3,0x42,0x73,0x40]
+xnor t0, t1, t2
+
+# CHECK-ASM: pack t0, t1, zero
+# CHECK-OBJ: zext.h t0, t1
+# CHECK-ASM: encoding: [0xb3,0x42,0x03,0x08]
+pack t0, t1, x0
+# CHECK-ASM-AND-OBJ: packh t0, t1, t2
+# CHECK-ASM: encoding: [0xb3,0x72,0x73,0x08]
+packh t0, t1, t2
+
+# CHECK-ASM-AND-OBJ: brev8 t0, t1
+# CHECK-ASM: encoding: [0x93,0x52,0x73,0x68]
+brev8 t0, t1
diff --git a/llvm/test/MC/RISCV/rv32zbp-aliases-valid.s b/llvm/test/MC/RISCV/rv32zbp-aliases-valid.s
index dd14f49840a06..6472394cee260 100644
--- a/llvm/test/MC/RISCV/rv32zbp-aliases-valid.s
+++ b/llvm/test/MC/RISCV/rv32zbp-aliases-valid.s
@@ -39,7 +39,7 @@ rev4.b x5, x6
 # CHECK-S-OBJ: rev2.b t0, t1
 rev2.b x5, x6
 
-# CHECK-S-OBJ-NOALIAS: grevi t0, t1, 7
+# CHECK-S-OBJ-NOALIAS: brev8 t0, t1
 # CHECK-S-OBJ: rev.b t0, t1
 rev.b x5, x6
 
@@ -151,14 +151,6 @@ zip2 x5, x6
 # CHECK-S-OBJ: unzip2 t0, t1
 unzip2 x5, x6
 
-# CHECK-S-OBJ-NOALIAS: shfli t0, t1, 15
-# CHECK-S-OBJ: zip t0, t1
-zip x5, x6
-
-# CHECK-S-OBJ-NOALIAS: unshfli t0, t1, 15
-# CHECK-S-OBJ: unzip t0, t1
-unzip x5, x6
-
 # CHECK-S-OBJ-NOALIAS: gorci t0, t1, 1
 # CHECK-S-OBJ: orc.p t0, t1
 orc.p x5, x6
diff --git a/llvm/test/MC/RISCV/rv32zbp-only-valid.s b/llvm/test/MC/RISCV/rv32zbp-only-valid.s
index a7aba5ee258f6..42f462e98a21a 100644
--- a/llvm/test/MC/RISCV/rv32zbp-only-valid.s
+++ b/llvm/test/MC/RISCV/rv32zbp-only-valid.s
@@ -13,3 +13,9 @@ pack t0, t1, x0
 # CHECK-OBJ: rev8 t0, t1
 # CHECK-ASM: encoding: [0x93,0x52,0x83,0x69]
 grevi t0, t1, 24
+# CHECK-ASM-AND-OBJ: zip t0, t1
+# CHECK-ASM: encoding: [0x93,0x12,0xf3,0x08]
+zip x5, x6
+# CHECK-ASM-AND-OBJ: unzip t0, t1
+# CHECK-ASM: encoding: [0x93,0x52,0xf3,0x08]
+unzip x5, x6
diff --git a/llvm/test/MC/RISCV/rv32zbp-valid.s b/llvm/test/MC/RISCV/rv32zbp-valid.s
index 32474d0ead90c..5cb23fc38b242 100644
--- a/llvm/test/MC/RISCV/rv32zbp-valid.s
+++ b/llvm/test/MC/RISCV/rv32zbp-valid.s
@@ -54,5 +54,5 @@ xperm.n t0, t1, t2
 # CHECK-ASM: encoding: [0xb3,0x42,0x73,0x28]
 xperm.b t0, t1, t2
 # CHECK-ASM-AND-OBJ: xperm.h t0, t1, t2
-# CHECK-ASM: encoding: [0xb3,0x62,0x73,0x28
+# CHECK-ASM: encoding: [0xb3,0x62,0x73,0x28]
 xperm.h t0, t1, t2
diff --git a/llvm/test/MC/RISCV/rv64-zbkb-valid.s b/llvm/test/MC/RISCV/rv64-zbkb-valid.s
new file mode 100644
index 0000000000000..01e1f6a760c89
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64-zbkb-valid.s
@@ -0,0 +1,20 @@
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zbkb < %s \
+# RUN:     | llvm-objdump --mattr=+zbkb -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: rorw t0, t1, t2
+# CHECK-ASM: encoding: [0xbb,0x52,0x73,0x60]
+rorw t0, t1, t2
+# CHECK-ASM-AND-OBJ: rolw t0, t1, t2
+# CHECK-ASM: encoding: [0xbb,0x12,0x73,0x60]
+rolw t0, t1, t2
+# CHECK-ASM-AND-OBJ: roriw t0, t1, 31
+# CHECK-ASM: encoding: [0x9b,0x52,0xf3,0x61]
+roriw t0, t1, 31
+# CHECK-ASM-AND-OBJ: roriw t0, t1, 0
+# CHECK-ASM: encoding: [0x9b,0x52,0x03,0x60]
+roriw t0, t1, 0
+
+# CHECK-ASM-AND-OBJ: packw t0, t1, t2
+# CHECK-ASM: encoding: [0xbb,0x42,0x73,0x08]
+packw t0, t1, t2
diff --git a/llvm/test/MC/RISCV/rv64zbkb-only-valid.s b/llvm/test/MC/RISCV/rv64zbkb-only-valid.s
new file mode 100644
index 0000000000000..2d3e7381b3560
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64zbkb-only-valid.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zbkb -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zbkb < %s \
+# RUN:     | llvm-objdump --mattr=+zbkb -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: rev8 t0, t1
+# CHECK-ASM: encoding: [0x93,0x52,0x83,0x6b]
+rev8 t0, t1
diff --git a/llvm/test/MC/RISCV/rv64zbp-aliases-valid.s b/llvm/test/MC/RISCV/rv64zbp-aliases-valid.s
index 6c51d96ab6b13..c1474afec5271 100644
--- a/llvm/test/MC/RISCV/rv64zbp-aliases-valid.s
+++ b/llvm/test/MC/RISCV/rv64zbp-aliases-valid.s
@@ -39,7 +39,7 @@ rev4.b x5, x6
 # CHECK-S-OBJ: rev2.b t0, t1
 rev2.b x5, x6
 
-# CHECK-S-OBJ-NOALIAS: grevi t0, t1, 7
+# CHECK-S-OBJ-NOALIAS: brev8 t0, t1
 # CHECK-S-OBJ: rev.b t0, t1
 rev.b x5, x6
 

From 7b3d30728816403d1fd73cc5082e9fb761262bce Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Thu, 20 Jan 2022 20:43:48 -0800
Subject: [PATCH 126/946] [RISCV] Add isel patterns for grevi, shfli, and
 unshfli to brev8/zip/unzip instructions.

Zbkb supports some encodings of the general grevi, shfli, and
unshfli instructions legal, so we added separate instructions for
those encodings to improve the diagnostics for assembler and
disassembler. To be consistent we should always use these separate
instructions whenever those specific encodings of grevi/shfli/unshfli
occur. So this patch adds specific isel patterns to override the generic
isel patterns for these cases. Similar was done for rev8 and zext.h
for Zbb previously.
---
 llvm/lib/Target/RISCV/RISCVInstrInfoZb.td    |  8 ++++++++
 llvm/test/CodeGen/RISCV/rv32zbp-intrinsic.ll | 18 ++++++++++++++++++
 llvm/test/CodeGen/RISCV/rv32zbp.ll           | 14 +++++++-------
 llvm/test/CodeGen/RISCV/rv64zbp.ll           |  4 ++--
 4 files changed, 35 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index f8030b35da3a2..aae646f02f13f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -687,6 +687,7 @@ def : InstAlias<"zip4 $rd, $rs",   (SHFLI   GPR:$rd, GPR:$rs, 0b1100)>;
 def : InstAlias<"unzip4 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1100)>;
 def : InstAlias<"zip2 $rd, $rs",   (SHFLI   GPR:$rd, GPR:$rs, 0b1110)>;
 def : InstAlias<"unzip2 $rd, $rs", (UNSHFLI GPR:$rd, GPR:$rs, 0b1110)>;
+// zip and unzip are considered instructions rather than an alias.
 
 def : InstAlias<"orc16 $rd, $rs", (GORCI GPR:$rd, GPR:$rs, 0b10000)>;
 def : InstAlias<"orc8 $rd, $rs",  (GORCI GPR:$rd, GPR:$rs, 0b11000)>;
@@ -865,6 +866,9 @@ def : PatGprImm<riscv_shfl, SHFLI, shfl_uimm>;
 def : PatGprImm<riscv_unshfl, UNSHFLI, shfl_uimm>;
 def : PatGprImm<riscv_grev, GREVI, uimmlog2xlen>;
 def : PatGprImm<riscv_gorc, GORCI, uimmlog2xlen>;
+
+// We treat brev8 as a separate instruction, so match it directly.
+def : Pat<(riscv_grev GPR:$rs1, 7), (BREV8 GPR:$rs1)>;
 } // Predicates = [HasStdExtZbp]
 
 let Predicates = [HasStdExtZbp, IsRV64] in
@@ -876,6 +880,10 @@ def : Pat<(i32 (rotl (riscv_grev GPR:$rs1, 24), (i32 16))), (GREVI GPR:$rs1, 8)>
 
 // We treat rev8 as a separate instruction, so match it directly.
 def : Pat<(i32 (riscv_grev GPR:$rs1, 24)), (REV8_RV32 GPR:$rs1)>;
+
+// We treat zip and unzip as separate instructions, so match it directly.
+def : Pat<(i32 (riscv_shfl GPR:$rs1, 15)), (ZIP_RV32 GPR:$rs1)>;
+def : Pat<(i32 (riscv_unshfl GPR:$rs1, 15)), (UNZIP_RV32 GPR:$rs1)>;
 } // Predicates = [HasStdExtZbp, IsRV32]
 
 let Predicates = [HasStdExtZbp, IsRV64] in {
diff --git a/llvm/test/CodeGen/RISCV/rv32zbp-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zbp-intrinsic.ll
index 6285804190ac2..4f1dd3d588844 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbp-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbp-intrinsic.ll
@@ -92,6 +92,15 @@ define i32 @shfli32(i32 %a) nounwind {
  ret i32 %tmp
 }
 
+define i32 @zipi32(i32 %a) nounwind {
+; RV32ZBP-LABEL: zipi32:
+; RV32ZBP:       # %bb.0:
+; RV32ZBP-NEXT:    zip a0, a0
+; RV32ZBP-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.shfl.i32(i32 %a, i32 15)
+ ret i32 %tmp
+}
+
 declare i32 @llvm.riscv.unshfl.i32(i32 %a, i32 %b)
 
 define i32 @unshfl32(i32 %a, i32 %b) nounwind {
@@ -122,6 +131,15 @@ define i32 @unshfli32(i32 %a) nounwind {
  ret i32 %tmp
 }
 
+define i32 @unzipi32(i32 %a) nounwind {
+; RV32ZBP-LABEL: unzipi32:
+; RV32ZBP:       # %bb.0:
+; RV32ZBP-NEXT:    unzip a0, a0
+; RV32ZBP-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.unshfl.i32(i32 %a, i32 15)
+ ret i32 %tmp
+}
+
 declare i32 @llvm.riscv.xperm.n.i32(i32 %a, i32 %b)
 
 define i32 @xpermn32(i32 %a, i32 %b) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll
index 1e2b0322b3688..026a27b691196 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll
@@ -1491,7 +1491,7 @@ define i32 @grev7_i32(i32 %a) nounwind {
 ;
 ; RV32ZBP-LABEL: grev7_i32:
 ; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    grevi a0, a0, 7
+; RV32ZBP-NEXT:    rev.b a0, a0
 ; RV32ZBP-NEXT:    ret
   %and1 = shl i32 %a, 1
   %shl1 = and i32 %and1, -1431655766
@@ -1560,8 +1560,8 @@ define i64 @grev7_i64(i64 %a) nounwind {
 ;
 ; RV32ZBP-LABEL: grev7_i64:
 ; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    grevi a0, a0, 7
-; RV32ZBP-NEXT:    grevi a1, a1, 7
+; RV32ZBP-NEXT:    rev.b a0, a0
+; RV32ZBP-NEXT:    rev.b a1, a1
 ; RV32ZBP-NEXT:    ret
   %and1 = shl i64 %a, 1
   %shl1 = and i64 %and1, -6148914691236517206
@@ -2175,7 +2175,7 @@ define zeroext i8 @bitreverse_i8(i8 zeroext %a) nounwind {
 ;
 ; RV32ZBP-LABEL: bitreverse_i8:
 ; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    grevi a0, a0, 7
+; RV32ZBP-NEXT:    rev.b a0, a0
 ; RV32ZBP-NEXT:    ret
   %1 = tail call i8 @llvm.bitreverse.i8(i8 %a)
   ret i8 %1
@@ -2450,7 +2450,7 @@ define i32 @bitreverse_bswap_i32(i32 %a) {
 ;
 ; RV32ZBP-LABEL: bitreverse_bswap_i32:
 ; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    grevi a0, a0, 7
+; RV32ZBP-NEXT:    rev.b a0, a0
 ; RV32ZBP-NEXT:    ret
   %1 = call i32 @llvm.bitreverse.i32(i32 %a)
   %2 = call i32 @llvm.bswap.i32(i32 %1)
@@ -2539,8 +2539,8 @@ define i64 @bitreverse_bswap_i64(i64 %a) {
 ;
 ; RV32ZBP-LABEL: bitreverse_bswap_i64:
 ; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    grevi a0, a0, 7
-; RV32ZBP-NEXT:    grevi a1, a1, 7
+; RV32ZBP-NEXT:    rev.b a0, a0
+; RV32ZBP-NEXT:    rev.b a1, a1
 ; RV32ZBP-NEXT:    ret
   %1 = call i64 @llvm.bitreverse.i64(i64 %a)
   %2 = call i64 @llvm.bswap.i64(i64 %1)
diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll
index 5b74675cdb3ff..674ffcff180de 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll
@@ -1438,7 +1438,7 @@ define i64 @grev7_i64(i64 %a) nounwind {
 ;
 ; RV64ZBP-LABEL: grev7_i64:
 ; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    grevi a0, a0, 7
+; RV64ZBP-NEXT:    rev.b a0, a0
 ; RV64ZBP-NEXT:    ret
   %and1 = shl i64 %a, 1
   %shl1 = and i64 %and1, -6148914691236517206
@@ -2481,7 +2481,7 @@ define i64 @bitreverse_bswap_i64(i64 %a) {
 ;
 ; RV64ZBP-LABEL: bitreverse_bswap_i64:
 ; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    grevi a0, a0, 7
+; RV64ZBP-NEXT:    rev.b a0, a0
 ; RV64ZBP-NEXT:    ret
   %1 = call i64 @llvm.bitreverse.i64(i64 %a)
   %2 = call i64 @llvm.bswap.i64(i64 %1)

From 26167cae45154d326aba88c8e8ca72e2bc42ea30 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Fri, 21 Jan 2022 05:15:46 +0000
Subject: [PATCH 127/946] Print the `// ----` separator between modules when
 using -split-input-file with mlir-opt

This allows to pipe sequences of `mlir-opt -split-input-file | mlir-opt -split-input-file`.

Depends On D117750

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D117756
---
 mlir/lib/Support/MlirOptMain.cpp   | 10 ++++++----
 mlir/test/Dialect/OpenACC/ops.mlir |  4 ++--
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/mlir/lib/Support/MlirOptMain.cpp b/mlir/lib/Support/MlirOptMain.cpp
index 5c3f7a7732403..af4681f50ccbd 100644
--- a/mlir/lib/Support/MlirOptMain.cpp
+++ b/mlir/lib/Support/MlirOptMain.cpp
@@ -158,10 +158,12 @@ LogicalResult mlir::MlirOptMain(raw_ostream &outputStream,
     return splitAndProcessBuffer(
         std::move(buffer),
         [&](std::unique_ptr<MemoryBuffer> chunkBuffer, raw_ostream &os) {
-          return processBuffer(os, std::move(chunkBuffer), verifyDiagnostics,
-                               verifyPasses, allowUnregisteredDialects,
-                               preloadDialectsInContext, passManagerSetupFn,
-                               registry, threadPool);
+          LogicalResult result = processBuffer(
+              os, std::move(chunkBuffer), verifyDiagnostics, verifyPasses,
+              allowUnregisteredDialects, preloadDialectsInContext,
+              passManagerSetupFn, registry, threadPool);
+          os << "// -----\n";
+          return result;
         },
         outputStream);
 
diff --git a/mlir/test/Dialect/OpenACC/ops.mlir b/mlir/test/Dialect/OpenACC/ops.mlir
index 9ced56acfe74b..05e8026b554ca 100644
--- a/mlir/test/Dialect/OpenACC/ops.mlir
+++ b/mlir/test/Dialect/OpenACC/ops.mlir
@@ -1,8 +1,8 @@
 // RUN: mlir-opt -split-input-file %s | FileCheck %s
 // Verify the printed output can be parsed.
-// RUN: mlir-opt -split-input-file %s | mlir-opt -allow-unregistered-dialect  | FileCheck %s
+// RUN: mlir-opt -split-input-file %s | mlir-opt  -split-input-file  | FileCheck %s
 // Verify the generic form can be parsed.
-// RUN: mlir-opt -split-input-file -mlir-print-op-generic %s | mlir-opt -allow-unregistered-dialect | FileCheck %s
+// RUN: mlir-opt -split-input-file -mlir-print-op-generic %s | mlir-opt -split-input-file | FileCheck %s
 
 func @compute1(%A: memref<10x10xf32>, %B: memref<10x10xf32>, %C: memref<10x10xf32>) -> memref<10x10xf32> {
   %c0 = arith.constant 0 : index

From 308d8b8c6618f570166bcc7dbb87f97c04bba1b2 Mon Sep 17 00:00:00 2001
From: Rahul Kayaith <rahul@untether.ai>
Date: Fri, 21 Jan 2022 05:21:00 +0000
Subject: [PATCH 128/946] [mlir][python] 8b/16b DenseIntElements access

This extends dense attribute element access to support 8b and 16b ints.
Also extends the corresponding parts of the C api.

Reviewed By: ftynse

Differential Revision: https://reviews.llvm.org/D117731
---
 mlir/include/mlir-c/BuiltinAttributes.h   |  8 +++++
 mlir/lib/Bindings/Python/IRAttributes.cpp | 12 +++++++
 mlir/lib/CAPI/IR/BuiltinAttributes.cpp    | 16 +++++++++
 mlir/test/CAPI/ir.c                       | 11 ++++++
 mlir/test/python/ir/attributes.py         | 44 +++++++++++++++++++++++
 5 files changed, 91 insertions(+)

diff --git a/mlir/include/mlir-c/BuiltinAttributes.h b/mlir/include/mlir-c/BuiltinAttributes.h
index 5839cd3d2408a..973b7e99469c0 100644
--- a/mlir/include/mlir-c/BuiltinAttributes.h
+++ b/mlir/include/mlir-c/BuiltinAttributes.h
@@ -355,6 +355,10 @@ MLIR_CAPI_EXPORTED MlirAttribute mlirDenseElementsAttrUInt8Get(
     MlirType shapedType, intptr_t numElements, const uint8_t *elements);
 MLIR_CAPI_EXPORTED MlirAttribute mlirDenseElementsAttrInt8Get(
     MlirType shapedType, intptr_t numElements, const int8_t *elements);
+MLIR_CAPI_EXPORTED MlirAttribute mlirDenseElementsAttrUInt16Get(
+    MlirType shapedType, intptr_t numElements, const uint16_t *elements);
+MLIR_CAPI_EXPORTED MlirAttribute mlirDenseElementsAttrInt16Get(
+    MlirType shapedType, intptr_t numElements, const int16_t *elements);
 MLIR_CAPI_EXPORTED MlirAttribute mlirDenseElementsAttrUInt32Get(
     MlirType shapedType, intptr_t numElements, const uint32_t *elements);
 MLIR_CAPI_EXPORTED MlirAttribute mlirDenseElementsAttrInt32Get(
@@ -416,6 +420,10 @@ MLIR_CAPI_EXPORTED int8_t mlirDenseElementsAttrGetInt8Value(MlirAttribute attr,
                                                             intptr_t pos);
 MLIR_CAPI_EXPORTED uint8_t
 mlirDenseElementsAttrGetUInt8Value(MlirAttribute attr, intptr_t pos);
+MLIR_CAPI_EXPORTED int16_t
+mlirDenseElementsAttrGetInt16Value(MlirAttribute attr, intptr_t pos);
+MLIR_CAPI_EXPORTED uint16_t
+mlirDenseElementsAttrGetUInt16Value(MlirAttribute attr, intptr_t pos);
 MLIR_CAPI_EXPORTED int32_t
 mlirDenseElementsAttrGetInt32Value(MlirAttribute attr, intptr_t pos);
 MLIR_CAPI_EXPORTED uint32_t
diff --git a/mlir/lib/Bindings/Python/IRAttributes.cpp b/mlir/lib/Bindings/Python/IRAttributes.cpp
index fd44ffe6ba5fe..5d87641c379d8 100644
--- a/mlir/lib/Bindings/Python/IRAttributes.cpp
+++ b/mlir/lib/Bindings/Python/IRAttributes.cpp
@@ -673,6 +673,12 @@ class PyDenseIntElementsAttribute
       if (width == 1) {
         return mlirDenseElementsAttrGetBoolValue(*this, pos);
       }
+      if (width == 8) {
+        return mlirDenseElementsAttrGetUInt8Value(*this, pos);
+      }
+      if (width == 16) {
+        return mlirDenseElementsAttrGetUInt16Value(*this, pos);
+      }
       if (width == 32) {
         return mlirDenseElementsAttrGetUInt32Value(*this, pos);
       }
@@ -683,6 +689,12 @@ class PyDenseIntElementsAttribute
       if (width == 1) {
         return mlirDenseElementsAttrGetBoolValue(*this, pos);
       }
+      if (width == 8) {
+        return mlirDenseElementsAttrGetInt8Value(*this, pos);
+      }
+      if (width == 16) {
+        return mlirDenseElementsAttrGetInt16Value(*this, pos);
+      }
       if (width == 32) {
         return mlirDenseElementsAttrGetInt32Value(*this, pos);
       }
diff --git a/mlir/lib/CAPI/IR/BuiltinAttributes.cpp b/mlir/lib/CAPI/IR/BuiltinAttributes.cpp
index c20548bd47597..7b718da88ceef 100644
--- a/mlir/lib/CAPI/IR/BuiltinAttributes.cpp
+++ b/mlir/lib/CAPI/IR/BuiltinAttributes.cpp
@@ -426,6 +426,16 @@ MlirAttribute mlirDenseElementsAttrInt8Get(MlirType shapedType,
                                            const int8_t *elements) {
   return getDenseAttribute(shapedType, numElements, elements);
 }
+MlirAttribute mlirDenseElementsAttrUInt16Get(MlirType shapedType,
+                                             intptr_t numElements,
+                                             const uint16_t *elements) {
+  return getDenseAttribute(shapedType, numElements, elements);
+}
+MlirAttribute mlirDenseElementsAttrInt16Get(MlirType shapedType,
+                                            intptr_t numElements,
+                                            const int16_t *elements) {
+  return getDenseAttribute(shapedType, numElements, elements);
+}
 MlirAttribute mlirDenseElementsAttrUInt32Get(MlirType shapedType,
                                              intptr_t numElements,
                                              const uint32_t *elements) {
@@ -530,6 +540,12 @@ int8_t mlirDenseElementsAttrGetInt8Value(MlirAttribute attr, intptr_t pos) {
 uint8_t mlirDenseElementsAttrGetUInt8Value(MlirAttribute attr, intptr_t pos) {
   return unwrap(attr).cast<DenseElementsAttr>().getValues<uint8_t>()[pos];
 }
+int16_t mlirDenseElementsAttrGetInt16Value(MlirAttribute attr, intptr_t pos) {
+  return unwrap(attr).cast<DenseElementsAttr>().getValues<int16_t>()[pos];
+}
+uint16_t mlirDenseElementsAttrGetUInt16Value(MlirAttribute attr, intptr_t pos) {
+  return unwrap(attr).cast<DenseElementsAttr>().getValues<uint16_t>()[pos];
+}
 int32_t mlirDenseElementsAttrGetInt32Value(MlirAttribute attr, intptr_t pos) {
   return unwrap(attr).cast<DenseElementsAttr>().getValues<int32_t>()[pos];
 }
diff --git a/mlir/test/CAPI/ir.c b/mlir/test/CAPI/ir.c
index d01ccaeb0e93a..257d5e9b8683d 100644
--- a/mlir/test/CAPI/ir.c
+++ b/mlir/test/CAPI/ir.c
@@ -904,6 +904,8 @@ int printBuiltinAttributes(MlirContext ctx) {
   int bools[] = {0, 1};
   uint8_t uints8[] = {0u, 1u};
   int8_t ints8[] = {0, 1};
+  uint16_t uints16[] = {0u, 1u};
+  int16_t ints16[] = {0, 1};
   uint32_t uints32[] = {0u, 1u};
   int32_t ints32[] = {0, 1};
   uint64_t uints64[] = {0u, 1u};
@@ -921,6 +923,13 @@ int printBuiltinAttributes(MlirContext ctx) {
   MlirAttribute int8Elements = mlirDenseElementsAttrInt8Get(
       mlirRankedTensorTypeGet(2, shape, mlirIntegerTypeGet(ctx, 8), encoding),
       2, ints8);
+  MlirAttribute uint16Elements = mlirDenseElementsAttrUInt16Get(
+      mlirRankedTensorTypeGet(2, shape, mlirIntegerTypeUnsignedGet(ctx, 16),
+                              encoding),
+      2, uints16);
+  MlirAttribute int16Elements = mlirDenseElementsAttrInt16Get(
+      mlirRankedTensorTypeGet(2, shape, mlirIntegerTypeGet(ctx, 16), encoding),
+      2, ints16);
   MlirAttribute uint32Elements = mlirDenseElementsAttrUInt32Get(
       mlirRankedTensorTypeGet(2, shape, mlirIntegerTypeUnsignedGet(ctx, 32),
                               encoding),
@@ -956,6 +965,8 @@ int printBuiltinAttributes(MlirContext ctx) {
   if (mlirDenseElementsAttrGetBoolValue(boolElements, 1) != 1 ||
       mlirDenseElementsAttrGetUInt8Value(uint8Elements, 1) != 1 ||
       mlirDenseElementsAttrGetInt8Value(int8Elements, 1) != 1 ||
+      mlirDenseElementsAttrGetUInt16Value(uint16Elements, 1) != 1 ||
+      mlirDenseElementsAttrGetInt16Value(int16Elements, 1) != 1 ||
       mlirDenseElementsAttrGetUInt32Value(uint32Elements, 1) != 1 ||
       mlirDenseElementsAttrGetInt32Value(int32Elements, 1) != 1 ||
       mlirDenseElementsAttrGetUInt64Value(uint64Elements, 1) != 1 ||
diff --git a/mlir/test/python/ir/attributes.py b/mlir/test/python/ir/attributes.py
index 5f8dd0ad1183f..48f2d4b3df067 100644
--- a/mlir/test/python/ir/attributes.py
+++ b/mlir/test/python/ir/attributes.py
@@ -292,6 +292,50 @@ def testDenseIntAttr():
     print(ShapedType(a.type).element_type)
 
 
+# CHECK-LABEL: TEST: testDenseIntAttrGetItem
+@run
+def testDenseIntAttrGetItem():
+  def print_item(attr_asm):
+    attr = DenseIntElementsAttr(Attribute.parse(attr_asm))
+    dtype = ShapedType(attr.type).element_type
+    try:
+      item = attr[0]
+      print(f"{dtype}:", item)
+    except TypeError as e:
+      print(f"{dtype}:", e)
+
+  with Context():
+    # CHECK: i1: 1
+    print_item("dense<true> : tensor<i1>")
+    # CHECK: i8: 123
+    print_item("dense<123> : tensor<i8>")
+    # CHECK: i16: 123
+    print_item("dense<123> : tensor<i16>")
+    # CHECK: i32: 123
+    print_item("dense<123> : tensor<i32>")
+    # CHECK: i64: 123
+    print_item("dense<123> : tensor<i64>")
+    # CHECK: ui8: 123
+    print_item("dense<123> : tensor<ui8>")
+    # CHECK: ui16: 123
+    print_item("dense<123> : tensor<ui16>")
+    # CHECK: ui32: 123
+    print_item("dense<123> : tensor<ui32>")
+    # CHECK: ui64: 123
+    print_item("dense<123> : tensor<ui64>")
+    # CHECK: si8: -123
+    print_item("dense<-123> : tensor<si8>")
+    # CHECK: si16: -123
+    print_item("dense<-123> : tensor<si16>")
+    # CHECK: si32: -123
+    print_item("dense<-123> : tensor<si32>")
+    # CHECK: si64: -123
+    print_item("dense<-123> : tensor<si64>")
+
+    # CHECK: i7: Unsupported integer type
+    print_item("dense<123> : tensor<i7>")
+
+
 # CHECK-LABEL: TEST: testDenseFPAttr
 @run
 def testDenseFPAttr():

From 9006bf424847bf91f0a624ffc27ad165c7b804c4 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Fri, 21 Jan 2022 05:45:48 +0000
Subject: [PATCH 129/946] Remove obsolete `getAsmResultNames` from
 OpAsmDialectInterface

This is superseded by the same method on OpAsmOpInterface, which is
available on the Dialect through the Fallback mechanism,

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D117750
---
 mlir/include/mlir/IR/OpImplementation.h    |  4 ----
 mlir/lib/IR/AsmPrinter.cpp                 | 14 ++++----------
 mlir/test/IR/parser.mlir                   |  7 ++-----
 mlir/test/lib/Dialect/Test/TestDialect.cpp |  6 ------
 4 files changed, 6 insertions(+), 25 deletions(-)

diff --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h
index 56e7c72093219..77e858e484be7 100644
--- a/mlir/include/mlir/IR/OpImplementation.h
+++ b/mlir/include/mlir/IR/OpImplementation.h
@@ -1350,10 +1350,6 @@ class OpAsmDialectInterface
     return AliasResult::NoAlias;
   }
 
-  /// Get a special name to use when printing the given operation. See
-  /// OpAsmInterface.td#getAsmResultNames for usage details and documentation.
-  virtual void getAsmResultNames(Operation *op,
-                                 OpAsmSetValueNameFn setNameFn) const {}
 };
 } // namespace mlir
 
diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index 6875aca46fcd2..f69d147c51b38 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -797,8 +797,7 @@ class SSANameState {
   /// A sentinel value used for values with names set.
   enum : unsigned { NameSentinel = ~0U };
 
-  SSANameState(Operation *op, const OpPrintingFlags &printerFlags,
-               DialectInterfaceCollection<OpAsmDialectInterface> &interfaces);
+  SSANameState(Operation *op, const OpPrintingFlags &printerFlags);
 
   /// Print the SSA identifier for the given value to 'stream'. If
   /// 'printResultNo' is true, it also presents the result number ('#' number)
@@ -866,15 +865,12 @@ class SSANameState {
   /// These are the printing flags.  They control, eg., whether to print in
   /// generic form.
   OpPrintingFlags printerFlags;
-
-  DialectInterfaceCollection<OpAsmDialectInterface> &interfaces;
 };
 } // namespace
 
 SSANameState::SSANameState(
-    Operation *op, const OpPrintingFlags &printerFlags,
-    DialectInterfaceCollection<OpAsmDialectInterface> &interfaces)
-    : printerFlags(printerFlags), interfaces(interfaces) {
+    Operation *op, const OpPrintingFlags &printerFlags)
+    : printerFlags(printerFlags) {
   llvm::SaveAndRestore<unsigned> valueIDSaver(nextValueID);
   llvm::SaveAndRestore<unsigned> argumentIDSaver(nextArgumentID);
   llvm::SaveAndRestore<unsigned> conflictIDSaver(nextConflictID);
@@ -1071,8 +1067,6 @@ void SSANameState::numberValuesInOp(Operation &op) {
   if (!printerFlags.shouldPrintGenericOpForm()) {
     if (OpAsmOpInterface asmInterface = dyn_cast<OpAsmOpInterface>(&op))
       asmInterface.getAsmResultNames(setResultNameFn);
-    else if (auto *asmInterface = interfaces.getInterfaceFor(op.getDialect()))
-      asmInterface->getAsmResultNames(&op, setResultNameFn);
   }
 
   // If the first result wasn't numbered, give it a default number.
@@ -1172,7 +1166,7 @@ class AsmStateImpl {
 public:
   explicit AsmStateImpl(Operation *op, const OpPrintingFlags &printerFlags,
                         AsmState::LocationMap *locationMap)
-      : interfaces(op->getContext()), nameState(op, printerFlags, interfaces),
+      : interfaces(op->getContext()), nameState(op, printerFlags),
         printerFlags(printerFlags), locationMap(locationMap) {}
 
   /// Initialize the alias state to enable the printing of aliases.
diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir
index 636f4eea0c5ff..0c7bdd1e08bae 100644
--- a/mlir/test/IR/parser.mlir
+++ b/mlir/test/IR/parser.mlir
@@ -1209,18 +1209,15 @@ func private @string_attr_name() attributes {"0 . 0", nested = {"0 . 0"}}
 func private @nested_reference() attributes {test.ref = @some_symbol::@some_nested_symbol }
 
 // CHECK-LABEL: func @custom_asm_names
-func @custom_asm_names() -> (i32, i32, i32, i32, i32, i32, i32) {
+func @custom_asm_names() -> (i32, i32, i32, i32, i32, i32) {
   // CHECK: %[[FIRST:first.*]], %[[MIDDLE:middle_results.*]]:2, %[[LAST:[0-9]+]]
   %0, %1:2, %2 = "test.asm_interface_op"() : () -> (i32, i32, i32, i32)
 
   // CHECK: %[[FIRST_2:first.*]], %[[LAST_2:[0-9]+]]
   %3, %4 = "test.asm_interface_op"() : () -> (i32, i32)
 
-  // CHECK: %[[RESULT:result.*]]
-  %5 = "test.asm_dialect_interface_op"() : () -> (i32)
-
   // CHECK: return %[[FIRST]], %[[MIDDLE]]#0, %[[MIDDLE]]#1, %[[LAST]], %[[FIRST_2]], %[[LAST_2]]
-  return %0, %1#0, %1#1, %2, %3, %4, %5 : i32, i32, i32, i32, i32, i32, i32
+  return %0, %1#0, %1#1, %2, %3, %4 : i32, i32, i32, i32, i32, i32
 }
 
 
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp
index 4d316bd58a8ad..2b915a9fd6c2a 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.cpp
+++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp
@@ -99,12 +99,6 @@ struct TestOpAsmInterface : public OpAsmDialectInterface {
     }
     return AliasResult::NoAlias;
   }
-
-  void getAsmResultNames(Operation *op,
-                         OpAsmSetValueNameFn setNameFn) const final {
-    if (auto asmOp = dyn_cast<AsmDialectInterfaceOp>(op))
-      setNameFn(asmOp, "result");
-  }
 };
 
 struct TestDialectFoldInterface : public DialectFoldInterface {

From a7f8aea71485c00db2ffb6288ae58475869048b1 Mon Sep 17 00:00:00 2001
From: Sockke <liuke.gehry@bytedance.com>
Date: Fri, 21 Jan 2022 14:23:52 +0800
Subject: [PATCH 130/946] [clang-tidy] Fix wrong FixIt in
 performance-move-const-arg

There are incorrect Fixit and missing warnings:
case :
A trivially-copyable object wrapped by std::move is passed to the function with rvalue reference parameters. Removing std::move will cause compilation errors.
```
void showInt(int&&) {}
void testInt() {
    int a = 10;
    // expect: warning + nofix
    showInt(std::move(a));  // showInt(a) <--- wrong fix
}

struct Tmp {};
void showTmp(Tmp&&) {}
void testTmp() {
    Tmp t;
    // expect: warning + nofix
    showTmp(std::move(t));  // showTmp(t) <--- wrong fix
}
```

Reviewed By: aaron.ballman, Quuxplusone

Differential Revision: https://reviews.llvm.org/D107450
---
 .../performance/MoveConstArgCheck.cpp         | 119 ++++++++++++++++--
 .../performance/MoveConstArgCheck.h           |   2 +
 clang-tools-extra/docs/ReleaseNotes.rst       |   3 +
 .../checkers/performance-move-const-arg.cpp   |  94 ++++++++++++++
 4 files changed, 205 insertions(+), 13 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp
index e946a1f39fe98..0e91451211aed 100644
--- a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp
+++ b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp
@@ -47,17 +47,62 @@ void MoveConstArgCheck::registerMatchers(MatchFinder *Finder) {
 
   Finder->addMatcher(MoveCallMatcher, this);
 
+  auto ConstTypeParmMatcher =
+      qualType(references(isConstQualified())).bind("invocation-parm-type");
+  auto RValueTypeParmMatcher =
+      qualType(rValueReferenceType()).bind("invocation-parm-type");
+  // Matches respective ParmVarDecl for a CallExpr or CXXConstructExpr.
+  auto ArgumentWithParamMatcher = forEachArgumentWithParam(
+      MoveCallMatcher, parmVarDecl(anyOf(hasType(ConstTypeParmMatcher),
+                                         hasType(RValueTypeParmMatcher)))
+                           .bind("invocation-parm"));
+  // Matches respective types of arguments for a CallExpr or CXXConstructExpr
+  // and it works on calls through function pointers as well.
+  auto ArgumentWithParamTypeMatcher = forEachArgumentWithParamType(
+      MoveCallMatcher, anyOf(ConstTypeParmMatcher, RValueTypeParmMatcher));
+
   Finder->addMatcher(
-      invocation(forEachArgumentWithParam(
-                     MoveCallMatcher,
-                     parmVarDecl(hasType(references(isConstQualified())))))
+      invocation(anyOf(ArgumentWithParamMatcher, ArgumentWithParamTypeMatcher))
           .bind("receiving-expr"),
       this);
 }
 
+bool IsRValueReferenceParam(const Expr *Invocation,
+                            const QualType &InvocationParmType,
+                            const Expr *Arg) {
+  if (Invocation && InvocationParmType->isRValueReferenceType() &&
+      Arg->isLValue()) {
+    if (!Invocation->getType()->isRecordType())
+      return true;
+    else {
+      if (const auto *ConstructCallExpr =
+              dyn_cast<CXXConstructExpr>(Invocation)) {
+        if (const auto *ConstructorDecl = ConstructCallExpr->getConstructor()) {
+          if (!ConstructorDecl->isCopyOrMoveConstructor() &&
+              !ConstructorDecl->isDefaultConstructor())
+            return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
 void MoveConstArgCheck::check(const MatchFinder::MatchResult &Result) {
   const auto *CallMove = Result.Nodes.getNodeAs<CallExpr>("call-move");
   const auto *ReceivingExpr = Result.Nodes.getNodeAs<Expr>("receiving-expr");
+  const auto *InvocationParm =
+      Result.Nodes.getNodeAs<ParmVarDecl>("invocation-parm");
+  const auto *InvocationParmType =
+      Result.Nodes.getNodeAs<QualType>("invocation-parm-type");
+
+  // Skipping matchers which have been matched.
+  if (!ReceivingExpr && AlreadyCheckedMoves.contains(CallMove))
+    return;
+
+  if (ReceivingExpr)
+    AlreadyCheckedMoves.insert(CallMove);
+
   const Expr *Arg = CallMove->getArg(0);
   SourceManager &SM = Result.Context->getSourceManager();
 
@@ -90,20 +135,68 @@ void MoveConstArgCheck::check(const MatchFinder::MatchResult &Result) {
       return;
 
     bool IsVariable = isa<DeclRefExpr>(Arg);
+    // std::move shouldn't be removed when an lvalue wrapped by std::move is
+    // passed to the function with an rvalue reference parameter.
+    bool IsRVRefParam =
+        IsRValueReferenceParam(ReceivingExpr, *InvocationParmType, Arg);
     const auto *Var =
         IsVariable ? dyn_cast<DeclRefExpr>(Arg)->getDecl() : nullptr;
-    auto Diag = diag(FileMoveRange.getBegin(),
-                     "std::move of the %select{|const }0"
-                     "%select{expression|variable %4}1 "
-                     "%select{|of the trivially-copyable type %5 }2"
-                     "has no effect; remove std::move()"
-                     "%select{| or make the variable non-const}3")
-                << IsConstArg << IsVariable << IsTriviallyCopyable
-                << (IsConstArg && IsVariable && !IsTriviallyCopyable) << Var
-                << Arg->getType();
 
-    replaceCallWithArg(CallMove, Diag, SM, getLangOpts());
+    {
+      auto Diag = diag(FileMoveRange.getBegin(),
+                       "std::move of the %select{|const }0"
+                       "%select{expression|variable %5}1 "
+                       "%select{|of the trivially-copyable type %6 }2"
+                       "has no effect%select{; remove std::move()|}3"
+                       "%select{| or make the variable non-const}4")
+                  << IsConstArg << IsVariable << IsTriviallyCopyable
+                  << IsRVRefParam
+                  << (IsConstArg && IsVariable && !IsTriviallyCopyable) << Var
+                  << Arg->getType();
+      if (!IsRVRefParam)
+        replaceCallWithArg(CallMove, Diag, SM, getLangOpts());
+    }
+    if (IsRVRefParam) {
+      // Generate notes for an invocation with an rvalue reference parameter.
+      const auto *ReceivingCallExpr = dyn_cast<CallExpr>(ReceivingExpr);
+      const auto *ReceivingConstructExpr =
+          dyn_cast<CXXConstructExpr>(ReceivingExpr);
+      // Skipping the invocation which is a template instantiation.
+      if ((!ReceivingCallExpr || !ReceivingCallExpr->getDirectCallee() ||
+           ReceivingCallExpr->getDirectCallee()->isTemplateInstantiation()) &&
+          (!ReceivingConstructExpr ||
+           !ReceivingConstructExpr->getConstructor() ||
+           ReceivingConstructExpr->getConstructor()->isTemplateInstantiation()))
+        return;
+
+      const NamedDecl *FunctionName = nullptr;
+      FunctionName =
+          ReceivingCallExpr
+              ? ReceivingCallExpr->getDirectCallee()->getUnderlyingDecl()
+              : ReceivingConstructExpr->getConstructor()->getUnderlyingDecl();
+
+      QualType NoRefType = (*InvocationParmType)->getPointeeType();
+      PrintingPolicy PolicyWithSuppressedTag(getLangOpts());
+      PolicyWithSuppressedTag.SuppressTagKeyword = true;
+      PolicyWithSuppressedTag.SuppressUnwrittenScope = true;
+      std::string ExpectParmTypeName =
+          NoRefType.getAsString(PolicyWithSuppressedTag);
+      if (!NoRefType->isPointerType()) {
+        NoRefType.addConst();
+        ExpectParmTypeName =
+            NoRefType.getAsString(PolicyWithSuppressedTag) + " &";
+      }
+
+      diag(InvocationParm->getLocation(),
+           "consider changing the %ordinal0 parameter of %1 from %2 to '%3'",
+           DiagnosticIDs::Note)
+          << (InvocationParm->getFunctionScopeIndex() + 1) << FunctionName
+          << *InvocationParmType << ExpectParmTypeName;
+    }
   } else if (ReceivingExpr) {
+    if ((*InvocationParmType)->isRValueReferenceType())
+      return;
+
     auto Diag = diag(FileMoveRange.getBegin(),
                      "passing result of std::move() as a const reference "
                      "argument; no move will actually happen");
diff --git a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h
index 28fe8d523d943..4a93e4c306e30 100644
--- a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h
+++ b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_MISC_MOVECONSTANTARGUMENTCHECK_H
 
 #include "../ClangTidyCheck.h"
+#include "llvm/ADT/DenseSet.h"
 
 namespace clang {
 namespace tidy {
@@ -36,6 +37,7 @@ class MoveConstArgCheck : public ClangTidyCheck {
 
 private:
   const bool CheckTriviallyCopyableMove;
+  llvm::DenseSet<const CallExpr *> AlreadyCheckedMoves;
 };
 
 } // namespace performance
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 683e914b7e863..e3c1c4b9411bb 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -175,6 +175,9 @@ Changes in existing checks
   option to control whether to warn on narrowing integer to floating-point
   conversions.
 
+- Improved :doc:`performance-move-const-arg` check.
+
+  Removed a wrong FixIt for trivially copyable objects wrapped by ``std::move()`` and passed to an rvalue reference parameter. Removal of ``std::move()`` would break the code.
 
 Removed checks
 ^^^^^^^^^^^^^^
diff --git a/clang-tools-extra/test/clang-tidy/checkers/performance-move-const-arg.cpp b/clang-tools-extra/test/clang-tidy/checkers/performance-move-const-arg.cpp
index 06ed6e0b56b1c..c1e5761c538e9 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/performance-move-const-arg.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/performance-move-const-arg.cpp
@@ -246,3 +246,97 @@ void lambda2() {
   };
   f(MoveSemantics());
 }
+
+void showInt(int &&v);
+void showInt(int v1, int &&v2);
+void showPointer(const char *&&s);
+void showPointer2(const char *const &&s);
+void showTriviallyCopyable(TriviallyCopyable &&obj);
+void showTriviallyCopyablePointer(const TriviallyCopyable *&&obj);
+void testFunctions() {
+  int a = 10;
+  showInt(std::move(a));
+  // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect [performance-move-const-arg]
+  // CHECK-MESSAGES: :[[@LINE-10]]:20: note: consider changing the 1st parameter of 'showInt' from 'int &&' to 'const int &'
+  showInt(int());
+  showInt(a, std::move(a));
+  // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect [performance-move-const-arg]
+  // CHECK-MESSAGES: :[[@LINE-13]]:28: note: consider changing the 2nd parameter of 'showInt' from 'int &&' to 'const int &'
+  const char* s = "";
+  showPointer(std::move(s));
+  // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: std::move of the variable 's' of the trivially-copyable type 'const char *' has no effect [performance-move-const-arg]
+  // CHECK-MESSAGES: :[[@LINE-16]]:32: note: consider changing the 1st parameter of 'showPointer' from 'const char *&&' to 'const char *'
+  showPointer2(std::move(s)); 
+  // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: std::move of the variable 's' of the trivially-copyable type 'const char *' has no effect [performance-move-const-arg]
+  // CHECK-MESSAGES: :[[@LINE-18]]:39: note: consider changing the 1st parameter of 'showPointer2' from 'const char *const &&' to 'const char *const'
+  TriviallyCopyable *obj = new TriviallyCopyable();
+  showTriviallyCopyable(std::move(*obj));
+  // CHECK-MESSAGES: :[[@LINE-1]]:25: warning: std::move of the expression of the trivially-copyable type 'TriviallyCopyable' has no effect [performance-move-const-arg]
+  // CHECK-MESSAGES: :[[@LINE-21]]:48: note: consider changing the 1st parameter of 'showTriviallyCopyable' from 'TriviallyCopyable &&' to 'const TriviallyCopyable &'
+  showTriviallyCopyablePointer(std::move(obj));
+  // CHECK-MESSAGES: :[[@LINE-1]]:32: warning: std::move of the variable 'obj' of the trivially-copyable type 'TriviallyCopyable *' has no effect [performance-move-const-arg]
+  // CHECK-MESSAGES: :[[@LINE-23]]:62: note: consider changing the 1st parameter of 'showTriviallyCopyablePointer' from 'const TriviallyCopyable *&&' to 'const TriviallyCopyable *'
+}
+template <class T>
+void forwardToShowInt(T && t) {
+  showInt(static_cast<T &&>(t));
+}
+void testTemplate() {
+  int a = 10;
+  forwardToShowInt(std::move(a));
+  // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect [performance-move-const-arg]
+}
+
+struct Tmp {
+  Tmp();
+  Tmp(int &&a);
+  Tmp(int v1, int &&a);
+  Tmp(const char *&&s);
+  Tmp(TriviallyCopyable&& obj);
+  Tmp(const TriviallyCopyable *&&obj);
+  void showTmp(TriviallyCopyable&& t);
+  static void showTmpStatic(TriviallyCopyable&& t);
+};
+void testMethods() {
+  Tmp t;
+  int a = 10;
+  Tmp t1(std::move(a));
+  // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect [performance-move-const-arg]
+  // CHECK-MESSAGES: :[[@LINE-13]]:13: note: consider changing the 1st parameter of 'Tmp' from 'int &&' to 'const int &'
+  Tmp t2(a, std::move(a));
+  // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect [performance-move-const-arg]
+  // CHECK-MESSAGES: :[[@LINE-15]]:21: note: consider changing the 2nd parameter of 'Tmp' from 'int &&' to 'const int &'
+  const char* s = "";
+  Tmp t3(std::move(s));
+  // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: std::move of the variable 's' of the trivially-copyable type 'const char *' has no effect [performance-move-const-arg]
+  // CHECK-MESSAGES: :[[@LINE-18]]:21: note: consider changing the 1st parameter of 'Tmp' from 'const char *&&' to 'const char *'
+  TriviallyCopyable *obj = new TriviallyCopyable();
+  Tmp t4(std::move(*obj));
+  // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: std::move of the expression of the trivially-copyable type 'TriviallyCopyable' has no effect [performance-move-const-arg]
+  // CHECK-MESSAGES: :[[@LINE-21]]:27: note: consider changing the 1st parameter of 'Tmp' from 'TriviallyCopyable &&' to 'const TriviallyCopyable &'
+  Tmp t5(std::move(obj));
+  // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: std::move of the variable 'obj' of the trivially-copyable type 'TriviallyCopyable *' has no effect [performance-move-const-arg]
+  // CHECK-MESSAGES: :[[@LINE-23]]:34: note: consider changing the 1st parameter of 'Tmp' from 'const TriviallyCopyable *&&' to 'const TriviallyCopyable *'
+  t.showTmp(std::move(*obj));
+  // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: std::move of the expression of the trivially-copyable type 'TriviallyCopyable' has no effect [performance-move-const-arg]
+  // CHECK-MESSAGES: :[[@LINE-25]]:36: note: consider changing the 1st parameter of 'showTmp' from 'TriviallyCopyable &&' to 'const TriviallyCopyable &'
+  Tmp::showTmpStatic(std::move(*obj));
+  // CHECK-MESSAGES: :[[@LINE-1]]:22: warning: std::move of the expression of the trivially-copyable type 'TriviallyCopyable' has no effect [performance-move-const-arg]
+  // CHECK-MESSAGES: :[[@LINE-27]]:49: note: consider changing the 1st parameter of 'showTmpStatic' from 'TriviallyCopyable &&' to 'const TriviallyCopyable &'
+}
+
+void showA(A &&v) {}
+void testA() {
+  A a;
+  showA(std::move(a));
+}
+
+void testFuncPointer() {
+  int a = 10;
+  void (*choice)(int, int &&);
+  choice = showInt;
+  choice(std::move(a), std::move(a));
+  // CHECK-MESSAGES: :[[@LINE-1]]:10: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect; remove std::move() [performance-move-const-arg]
+  // CHECK-FIXES: choice(a, std::move(a));
+  // CHECK-MESSAGES: :[[@LINE-3]]:24: warning: std::move of the variable 'a' of the trivially-copyable type 'int' has no effect [performance-move-const-arg]
+}

From 82bb8a588ddea104faadb54366e3d42b5857fc06 Mon Sep 17 00:00:00 2001
From: Zi Xuan Wu <zixuan.wu@linux.alibaba.com>
Date: Thu, 20 Jan 2022 17:44:53 +0800
Subject: [PATCH 131/946] [CSKY] Add codegen support of GlobalTLSAddress
 lowering

There are static and dynamic TLS address lowering in DAG stage according to different TLS model.
It needs PseudoTLSLA32 pseudo to get address of TLS-related entry which resides in constant pool.
---
 llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp   |  30 ++++
 llvm/lib/Target/CSKY/CSKYAsmPrinter.h     |   1 +
 llvm/lib/Target/CSKY/CSKYISelLowering.cpp | 115 ++++++++++++++
 llvm/lib/Target/CSKY/CSKYISelLowering.h   |   4 +
 llvm/test/CodeGen/CSKY/tls-models.ll      | 179 ++++++++++++++++++++++
 5 files changed, 329 insertions(+)
 create mode 100644 llvm/test/CodeGen/CSKY/tls-models.ll

diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
index cfe97b971c53d..c8269eeacfdb2 100644
--- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp
@@ -58,6 +58,33 @@ void CSKYAsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
 // instructions) auto-generated.
 #include "CSKYGenMCPseudoLowering.inc"
 
+void CSKYAsmPrinter::expandTLSLA(const MachineInstr *MI) {
+  const CSKYInstrInfo *TII = Subtarget->getInstrInfo();
+
+  DebugLoc DL = MI->getDebugLoc();
+
+  MCSymbol *PCLabel = OutContext.getOrCreateSymbol(
+      Twine(MAI->getPrivateGlobalPrefix()) + "PC" + Twine(getFunctionNumber()) +
+      "_" + Twine(MI->getOperand(3).getImm()));
+
+  OutStreamer->emitLabel(PCLabel);
+
+  auto Instr = BuildMI(*MF, DL, TII->get(CSKY::LRW32))
+                   .add(MI->getOperand(0))
+                   .add(MI->getOperand(2));
+  MCInst LRWInst;
+  MCInstLowering.Lower(Instr, LRWInst);
+  EmitToStreamer(*OutStreamer, LRWInst);
+
+  Instr = BuildMI(*MF, DL, TII->get(CSKY::GRS32))
+              .add(MI->getOperand(1))
+              .addSym(PCLabel);
+  MCInst GRSInst;
+  MCInstLowering.Lower(Instr, GRSInst);
+  EmitToStreamer(*OutStreamer, GRSInst);
+  return;
+}
+
 void CSKYAsmPrinter::emitCustomConstantPool(const MachineInstr *MI) {
 
   // This instruction represents a floating constant pool in the function.
@@ -102,6 +129,9 @@ void CSKYAsmPrinter::emitInstruction(const MachineInstr *MI) {
     InConstantPool = false;
   }
 
+  if (MI->getOpcode() == CSKY::PseudoTLSLA32)
+    return expandTLSLA(MI);
+
   if (MI->getOpcode() == CSKY::CONSTPOOL_ENTRY)
     return emitCustomConstantPool(MI);
 
diff --git a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
index 64c94f08eae4e..04a253d349c80 100644
--- a/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
+++ b/llvm/lib/Target/CSKY/CSKYAsmPrinter.h
@@ -26,6 +26,7 @@ class LLVM_LIBRARY_VISIBILITY CSKYAsmPrinter : public AsmPrinter {
   /// MachineFunction.
   MachineConstantPool *MCP;
 
+  void expandTLSLA(const MachineInstr *MI);
   void emitCustomConstantPool(const MachineInstr *MI);
 
 public:
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
index b253e10574a74..c4d5d687216d1 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -119,6 +119,8 @@ SDValue CSKYTargetLowering::LowerOperation(SDValue Op,
     return LowerGlobalAddress(Op, DAG);
   case ISD::ExternalSymbol:
     return LowerExternalSymbol(Op, DAG);
+  case ISD::GlobalTLSAddress:
+    return LowerGlobalTLSAddress(Op, DAG);
   case ISD::JumpTable:
     return LowerJumpTable(Op, DAG);
   case ISD::BlockAddress:
@@ -1005,3 +1007,116 @@ Register CSKYTargetLowering::getExceptionSelectorRegister(
     const Constant *PersonalityFn) const {
   return CSKY::R1;
 }
+
+SDValue CSKYTargetLowering::LowerGlobalTLSAddress(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  SDLoc DL(Op);
+  EVT Ty = Op.getValueType();
+  GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
+  int64_t Offset = N->getOffset();
+  MVT XLenVT = MVT::i32;
+
+  TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
+  SDValue Addr;
+  switch (Model) {
+  case TLSModel::LocalExec:
+    Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
+    break;
+  case TLSModel::InitialExec:
+    Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
+    break;
+  case TLSModel::LocalDynamic:
+  case TLSModel::GeneralDynamic:
+    Addr = getDynamicTLSAddr(N, DAG);
+    break;
+  }
+
+  // In order to maximise the opportunity for common subexpression elimination,
+  // emit a separate ADD node for the global address offset instead of folding
+  // it in the global address node. Later peephole optimisations may choose to
+  // fold it back in when profitable.
+  if (Offset != 0)
+    return DAG.getNode(ISD::ADD, DL, Ty, Addr,
+                       DAG.getConstant(Offset, DL, XLenVT));
+  return Addr;
+}
+
+SDValue CSKYTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
+                                             SelectionDAG &DAG,
+                                             bool UseGOT) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
+
+  unsigned CSKYPCLabelIndex = CFI->createPICLabelUId();
+
+  SDLoc DL(N);
+  EVT Ty = getPointerTy(DAG.getDataLayout());
+
+  CSKYCP::CSKYCPModifier Flag = UseGOT ? CSKYCP::TLSIE : CSKYCP::TLSLE;
+  bool AddCurrentAddr = UseGOT ? true : false;
+  unsigned char PCAjust = UseGOT ? 4 : 0;
+
+  CSKYConstantPoolValue *CPV =
+      CSKYConstantPoolConstant::Create(N->getGlobal(), CSKYCP::CPValue, PCAjust,
+                                       Flag, AddCurrentAddr, CSKYPCLabelIndex);
+  SDValue CAddr = DAG.getTargetConstantPool(CPV, Ty);
+
+  SDValue Load;
+  if (UseGOT) {
+    SDValue PICLabel = DAG.getTargetConstant(CSKYPCLabelIndex, DL, MVT::i32);
+    auto *LRWGRS = DAG.getMachineNode(CSKY::PseudoTLSLA32, DL, {Ty, Ty},
+                                      {CAddr, PICLabel});
+    auto LRWADDGRS =
+        DAG.getNode(ISD::ADD, DL, Ty, SDValue(LRWGRS, 0), SDValue(LRWGRS, 1));
+    Load = DAG.getLoad(Ty, DL, DAG.getEntryNode(), LRWADDGRS,
+                       MachinePointerInfo(N->getGlobal()));
+  } else {
+    Load = SDValue(DAG.getMachineNode(CSKY::LRW32, DL, Ty, CAddr), 0);
+  }
+
+  // Add the thread pointer.
+  SDValue TPReg = DAG.getRegister(CSKY::R31, MVT::i32);
+  return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
+}
+
+SDValue CSKYTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
+                                              SelectionDAG &DAG) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  CSKYMachineFunctionInfo *CFI = MF.getInfo<CSKYMachineFunctionInfo>();
+
+  unsigned CSKYPCLabelIndex = CFI->createPICLabelUId();
+
+  SDLoc DL(N);
+  EVT Ty = getPointerTy(DAG.getDataLayout());
+  IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
+
+  CSKYConstantPoolValue *CPV =
+      CSKYConstantPoolConstant::Create(N->getGlobal(), CSKYCP::CPValue, 4,
+                                       CSKYCP::TLSGD, true, CSKYPCLabelIndex);
+  SDValue Addr = DAG.getTargetConstantPool(CPV, Ty);
+  SDValue PICLabel = DAG.getTargetConstant(CSKYPCLabelIndex, DL, MVT::i32);
+
+  auto *LRWGRS =
+      DAG.getMachineNode(CSKY::PseudoTLSLA32, DL, {Ty, Ty}, {Addr, PICLabel});
+
+  auto Load =
+      DAG.getNode(ISD::ADD, DL, Ty, SDValue(LRWGRS, 0), SDValue(LRWGRS, 1));
+
+  // Prepare argument list to generate call.
+  ArgListTy Args;
+  ArgListEntry Entry;
+  Entry.Node = Load;
+  Entry.Ty = CallTy;
+  Args.push_back(Entry);
+
+  // Setup call to __tls_get_addr.
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(DL)
+      .setChain(DAG.getEntryNode())
+      .setLibCallee(CallingConv::C, CallTy,
+                    DAG.getExternalSymbol("__tls_get_addr", Ty),
+                    std::move(Args));
+  SDValue V = LowerCallTo(CLI).first;
+
+  return V;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.h b/llvm/lib/Target/CSKY/CSKYISelLowering.h
index 3b3218b015e8f..e1744d5ce2203 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.h
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.h
@@ -154,6 +154,10 @@ class CSKYTargetLowering : public TargetLowering {
   SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
 
+  SDValue getStaticTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG,
+                           bool UseGOT) const;
+  SDValue getDynamicTLSAddr(GlobalAddressSDNode *N, SelectionDAG &DAG) const;
+
   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
   CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg) const;
 };
diff --git a/llvm/test/CodeGen/CSKY/tls-models.ll b/llvm/test/CodeGen/CSKY/tls-models.ll
new file mode 100644
index 0000000000000..35dca36f515b2
--- /dev/null
+++ b/llvm/test/CodeGen/CSKY/tls-models.ll
@@ -0,0 +1,179 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=csky -csky-no-aliases -relocation-model=pic -mattr=+2e3 < %s \
+; RUN:     | FileCheck -check-prefix=CSKY-PIC %s
+; RUN: llc -mtriple=csky -csky-no-aliases -mattr=+2e3 < %s | FileCheck -check-prefix=CSKY-NOPIC %s
+
+; Check that TLS symbols are lowered correctly based on the specified
+; model. Make sure they're external to avoid them all being optimised to Local
+; Exec for the executable.
+
+@unspecified = external thread_local global i32
+@ld = external thread_local(localdynamic) global i32
+@ie = external thread_local(initialexec) global i32
+@le = external thread_local(localexec) global i32
+
+
+; No model specified
+
+define i32* @f1() nounwind {
+; CSKY-PIC-LABEL: f1:
+; CSKY-PIC:       # %bb.0: # %entry
+; CSKY-PIC-NEXT:    subi16 sp, sp, 8
+; CSKY-PIC-NEXT:    st32.w rgb, (sp, 4) # 4-byte Folded Spill
+; CSKY-PIC-NEXT:    st32.w lr, (sp, 0) # 4-byte Folded Spill
+; CSKY-PIC-NEXT:    lrw32 rgb, [.LCPI0_0]
+; CSKY-PIC-NEXT:  .LPC0_1:
+; CSKY-PIC-NEXT:    lrw32 a0, [.LCPI0_1]
+; CSKY-PIC-NEXT:    grs32 a1, .LPC0_1
+; CSKY-PIC-NEXT:    addu16 a0, a1
+; CSKY-PIC-NEXT:    lrw32 a1, [.LCPI0_2]
+; CSKY-PIC-NEXT:    ldr32.w a1, (rgb, a1 << 0)
+; CSKY-PIC-NEXT:    jsr16 a1
+; CSKY-PIC-NEXT:    ld32.w lr, (sp, 0) # 4-byte Folded Reload
+; CSKY-PIC-NEXT:    ld32.w rgb, (sp, 4) # 4-byte Folded Reload
+; CSKY-PIC-NEXT:    addi16 sp, sp, 8
+; CSKY-PIC-NEXT:    rts16
+; CSKY-PIC-NEXT:    .p2align 1
+; CSKY-PIC-NEXT:  # %bb.1:
+; CSKY-PIC-NEXT:    .p2align 2
+; CSKY-PIC-NEXT:  .LCPI0_0:
+; CSKY-PIC-NEXT:    .long _GLOBAL_OFFSET_TABLE_
+; CSKY-PIC-NEXT:  .LCPI0_1:
+; CSKY-PIC-NEXT:  .Ltmp0:
+; CSKY-PIC-NEXT:    .long unspecified-(.LPC0_1-.Ltmp0)@TLSGD32
+; CSKY-PIC-NEXT:  .LCPI0_2:
+; CSKY-PIC-NEXT:    .long __tls_get_addr@PLT
+;
+; CSKY-NOPIC-LABEL: f1:
+; CSKY-NOPIC:       # %bb.0: # %entry
+; CSKY-NOPIC-NEXT:  .LPC0_1:
+; CSKY-NOPIC-NEXT:    lrw32 a0, [.LCPI0_0]
+; CSKY-NOPIC-NEXT:    grs32 a1, .LPC0_1
+; CSKY-NOPIC-NEXT:    ldr32.w a0, (a0, a1 << 0)
+; CSKY-NOPIC-NEXT:    addu32 a0, a0, tls
+; CSKY-NOPIC-NEXT:    rts16
+; CSKY-NOPIC-NEXT:    .p2align 1
+; CSKY-NOPIC-NEXT:  # %bb.1:
+; CSKY-NOPIC-NEXT:    .p2align 2
+; CSKY-NOPIC-NEXT:  .LCPI0_0:
+; CSKY-NOPIC-NEXT:  .Ltmp0:
+; CSKY-NOPIC-NEXT:    .long unspecified-(.LPC0_1-.Ltmp0)@GOTTPOFF
+entry:
+  ret i32* @unspecified
+}
+
+
+; localdynamic specified
+
+define i32* @f2() nounwind {
+; CSKY-PIC-LABEL: f2:
+; CSKY-PIC:       # %bb.0: # %entry
+; CSKY-PIC-NEXT:    subi16 sp, sp, 8
+; CSKY-PIC-NEXT:    st32.w rgb, (sp, 4) # 4-byte Folded Spill
+; CSKY-PIC-NEXT:    st32.w lr, (sp, 0) # 4-byte Folded Spill
+; CSKY-PIC-NEXT:    lrw32 rgb, [.LCPI1_0]
+; CSKY-PIC-NEXT:  .LPC1_1:
+; CSKY-PIC-NEXT:    lrw32 a0, [.LCPI1_1]
+; CSKY-PIC-NEXT:    grs32 a1, .LPC1_1
+; CSKY-PIC-NEXT:    addu16 a0, a1
+; CSKY-PIC-NEXT:    lrw32 a1, [.LCPI1_2]
+; CSKY-PIC-NEXT:    ldr32.w a1, (rgb, a1 << 0)
+; CSKY-PIC-NEXT:    jsr16 a1
+; CSKY-PIC-NEXT:    ld32.w lr, (sp, 0) # 4-byte Folded Reload
+; CSKY-PIC-NEXT:    ld32.w rgb, (sp, 4) # 4-byte Folded Reload
+; CSKY-PIC-NEXT:    addi16 sp, sp, 8
+; CSKY-PIC-NEXT:    rts16
+; CSKY-PIC-NEXT:    .p2align 1
+; CSKY-PIC-NEXT:  # %bb.1:
+; CSKY-PIC-NEXT:    .p2align 2
+; CSKY-PIC-NEXT:  .LCPI1_0:
+; CSKY-PIC-NEXT:    .long _GLOBAL_OFFSET_TABLE_
+; CSKY-PIC-NEXT:  .LCPI1_1:
+; CSKY-PIC-NEXT:  .Ltmp1:
+; CSKY-PIC-NEXT:    .long ld-(.LPC1_1-.Ltmp1)@TLSGD32
+; CSKY-PIC-NEXT:  .LCPI1_2:
+; CSKY-PIC-NEXT:    .long __tls_get_addr@PLT
+;
+; CSKY-NOPIC-LABEL: f2:
+; CSKY-NOPIC:       # %bb.0: # %entry
+; CSKY-NOPIC-NEXT:  .LPC1_1:
+; CSKY-NOPIC-NEXT:    lrw32 a0, [.LCPI1_0]
+; CSKY-NOPIC-NEXT:    grs32 a1, .LPC1_1
+; CSKY-NOPIC-NEXT:    ldr32.w a0, (a0, a1 << 0)
+; CSKY-NOPIC-NEXT:    addu32 a0, a0, tls
+; CSKY-NOPIC-NEXT:    rts16
+; CSKY-NOPIC-NEXT:    .p2align 1
+; CSKY-NOPIC-NEXT:  # %bb.1:
+; CSKY-NOPIC-NEXT:    .p2align 2
+; CSKY-NOPIC-NEXT:  .LCPI1_0:
+; CSKY-NOPIC-NEXT:  .Ltmp1:
+; CSKY-NOPIC-NEXT:    .long ld-(.LPC1_1-.Ltmp1)@GOTTPOFF
+entry:
+  ret i32* @ld
+}
+
+
+; initialexec specified
+
+define i32* @f3() nounwind {
+; CSKY-PIC-LABEL: f3:
+; CSKY-PIC:       # %bb.0: # %entry
+; CSKY-PIC-NEXT:  .LPC2_1:
+; CSKY-PIC-NEXT:    lrw32 a0, [.LCPI2_0]
+; CSKY-PIC-NEXT:    grs32 a1, .LPC2_1
+; CSKY-PIC-NEXT:    ldr32.w a0, (a0, a1 << 0)
+; CSKY-PIC-NEXT:    addu32 a0, a0, tls
+; CSKY-PIC-NEXT:    rts16
+; CSKY-PIC-NEXT:    .p2align 1
+; CSKY-PIC-NEXT:  # %bb.1:
+; CSKY-PIC-NEXT:    .p2align 2
+; CSKY-PIC-NEXT:  .LCPI2_0:
+; CSKY-PIC-NEXT:  .Ltmp2:
+; CSKY-PIC-NEXT:    .long ie-(.LPC2_1-.Ltmp2)@GOTTPOFF
+;
+; CSKY-NOPIC-LABEL: f3:
+; CSKY-NOPIC:       # %bb.0: # %entry
+; CSKY-NOPIC-NEXT:  .LPC2_1:
+; CSKY-NOPIC-NEXT:    lrw32 a0, [.LCPI2_0]
+; CSKY-NOPIC-NEXT:    grs32 a1, .LPC2_1
+; CSKY-NOPIC-NEXT:    ldr32.w a0, (a0, a1 << 0)
+; CSKY-NOPIC-NEXT:    addu32 a0, a0, tls
+; CSKY-NOPIC-NEXT:    rts16
+; CSKY-NOPIC-NEXT:    .p2align 1
+; CSKY-NOPIC-NEXT:  # %bb.1:
+; CSKY-NOPIC-NEXT:    .p2align 2
+; CSKY-NOPIC-NEXT:  .LCPI2_0:
+; CSKY-NOPIC-NEXT:  .Ltmp2:
+; CSKY-NOPIC-NEXT:    .long ie-(.LPC2_1-.Ltmp2)@GOTTPOFF
+entry:
+  ret i32* @ie
+}
+
+
+; localexec specified
+
+define i32* @f4() nounwind {
+; CSKY-PIC-LABEL: f4:
+; CSKY-PIC:       # %bb.0: # %entry
+; CSKY-PIC-NEXT:    lrw32 a0, [.LCPI3_0]
+; CSKY-PIC-NEXT:    addu32 a0, a0, tls
+; CSKY-PIC-NEXT:    rts16
+; CSKY-PIC-NEXT:    .p2align 1
+; CSKY-PIC-NEXT:  # %bb.1:
+; CSKY-PIC-NEXT:    .p2align 2
+; CSKY-PIC-NEXT:  .LCPI3_0:
+; CSKY-PIC-NEXT:    .long le@TPOFF
+;
+; CSKY-NOPIC-LABEL: f4:
+; CSKY-NOPIC:       # %bb.0: # %entry
+; CSKY-NOPIC-NEXT:    lrw32 a0, [.LCPI3_0]
+; CSKY-NOPIC-NEXT:    addu32 a0, a0, tls
+; CSKY-NOPIC-NEXT:    rts16
+; CSKY-NOPIC-NEXT:    .p2align 1
+; CSKY-NOPIC-NEXT:  # %bb.1:
+; CSKY-NOPIC-NEXT:    .p2align 2
+; CSKY-NOPIC-NEXT:  .LCPI3_0:
+; CSKY-NOPIC-NEXT:    .long le@TPOFF
+entry:
+  ret i32* @le
+}

From 8def89b5dc82efba039cca4ff9d072e169da7329 Mon Sep 17 00:00:00 2001
From: wangpc <pc.wang@linux.alibaba.com>
Date: Fri, 21 Jan 2022 13:54:09 +0800
Subject: [PATCH 132/946] [RISCV] Set CostPerUse to 1 iff RVC is enabled

After D86836, we can define multiple cost values for
different cost models. So here we set CostPerUse to
1 iff RVC is enabled to avoid potential impact on RA.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117741
---
 llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp   |    5 +
 llvm/lib/Target/RISCV/RISCVRegisterInfo.h     |    2 +
 llvm/lib/Target/RISCV/RISCVRegisterInfo.td    |    7 +-
 llvm/test/CodeGen/RISCV/add-before-shl.ll     |   20 +-
 llvm/test/CodeGen/RISCV/addcarry.ll           |   30 +-
 llvm/test/CodeGen/RISCV/atomic-rmw.ll         | 2400 ++++++++---------
 llvm/test/CodeGen/RISCV/atomic-signext.ll     |  480 ++--
 .../CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll    |   98 +-
 llvm/test/CodeGen/RISCV/callee-saved-gprs.ll  |  796 +++---
 ...calling-conv-ilp32-ilp32f-ilp32d-common.ll |   76 +-
 .../calling-conv-lp64-lp64f-lp64d-common.ll   |   38 +-
 .../test/CodeGen/RISCV/double-arith-strict.ll |   92 +-
 llvm/test/CodeGen/RISCV/double-arith.ll       |  208 +-
 llvm/test/CodeGen/RISCV/double-convert.ll     |  186 +-
 llvm/test/CodeGen/RISCV/double-fcmp-strict.ll |   64 +-
 llvm/test/CodeGen/RISCV/double-fcmp.ll        |   32 +-
 llvm/test/CodeGen/RISCV/float-arith-strict.ll |   32 +-
 llvm/test/CodeGen/RISCV/float-arith.ll        |  124 +-
 llvm/test/CodeGen/RISCV/float-convert.ll      |   88 +-
 llvm/test/CodeGen/RISCV/fp128.ll              |   32 +-
 llvm/test/CodeGen/RISCV/fpclamptosat.ll       |  414 +--
 llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll   | 1175 ++++----
 llvm/test/CodeGen/RISCV/half-arith.ll         |  708 +++--
 llvm/test/CodeGen/RISCV/half-convert.ll       |  112 +-
 llvm/test/CodeGen/RISCV/half-intrinsics.ll    |  152 +-
 llvm/test/CodeGen/RISCV/mul.ll                |  388 ++-
 llvm/test/CodeGen/RISCV/remat.ll              |   44 +-
 llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll   |   24 +-
 llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll        |   30 +-
 llvm/test/CodeGen/RISCV/rv32zbb.ll            |   96 +-
 llvm/test/CodeGen/RISCV/rv32zbp.ll            |  546 ++--
 llvm/test/CodeGen/RISCV/rv32zbt.ll            |   28 +-
 .../test/CodeGen/RISCV/rv64i-complex-float.ll |    8 +-
 llvm/test/CodeGen/RISCV/rv64zbp.ll            |   56 +-
 .../CodeGen/RISCV/rvv/calling-conv-fastcc.ll  |   86 +-
 .../rvv/fixed-vector-strided-load-store.ll    |   88 +-
 .../RISCV/rvv/fixed-vectors-bitreverse.ll     |  174 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-bswap.ll  |   80 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll   |   14 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll   |   66 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-elen.ll   |   28 +-
 .../RISCV/rvv/fixed-vectors-fp-vrgather.ll    |   48 +-
 .../RISCV/rvv/fixed-vectors-unaligned.ll      |   32 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll |   18 +-
 .../CodeGen/RISCV/rvv/no-reserved-frame.ll    |    8 +-
 .../CodeGen/RISCV/rvv/sink-splat-operands.ll  |  692 ++---
 llvm/test/CodeGen/RISCV/rvv/vpload.ll         |   50 +-
 llvm/test/CodeGen/RISCV/rvv/vpstore.ll        |   52 +-
 .../CodeGen/RISCV/rvv/vreductions-int-vp.ll   |    6 +-
 llvm/test/CodeGen/RISCV/sadd_sat.ll           |    8 +-
 llvm/test/CodeGen/RISCV/sadd_sat_plus.ll      |   12 +-
 .../CodeGen/RISCV/select-optimize-multiple.ll |   98 +-
 llvm/test/CodeGen/RISCV/sextw-removal.ll      |   12 +-
 llvm/test/CodeGen/RISCV/shadowcallstack.ll    |   16 +-
 llvm/test/CodeGen/RISCV/shifts.ll             |  484 ++--
 .../CodeGen/RISCV/srem-seteq-illegal-types.ll |   32 +-
 llvm/test/CodeGen/RISCV/srem-vector-lkk.ll    |  948 ++++---
 llvm/test/CodeGen/RISCV/ssub_sat.ll           |   14 +-
 llvm/test/CodeGen/RISCV/ssub_sat_plus.ll      |   16 +-
 llvm/test/CodeGen/RISCV/stack-store-check.ll  |   84 +-
 llvm/test/CodeGen/RISCV/tail-calls.ll         |    8 +-
 .../RISCV/umulo-128-legalisation-lowering.ll  |  146 +-
 .../CodeGen/RISCV/urem-seteq-illegal-types.ll |   12 +-
 llvm/test/CodeGen/RISCV/urem-vector-lkk.ll    |  950 ++++---
 llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll   |   16 +-
 llvm/test/CodeGen/RISCV/xaluo.ll              |  840 +++---
 66 files changed, 6801 insertions(+), 6928 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 9094dff1dda18..35363bf37c0db 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -347,3 +347,8 @@ void RISCVRegisterInfo::getOffsetOpcodes(const StackOffset &Offset,
     Ops.push_back(dwarf::DW_OP_minus);
   }
 }
+
+unsigned
+RISCVRegisterInfo::getRegisterCostTableIndex(const MachineFunction &MF) const {
+  return MF.getSubtarget<RISCVSubtarget>().hasStdExtC() ? 1 : 0;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index 2b2bbdfbdf32f..9e0ef7902210b 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -66,6 +66,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
 
   void getOffsetOpcodes(const StackOffset &Offset,
                         SmallVectorImpl<uint64_t> &Ops) const override;
+
+  unsigned getRegisterCostTableIndex(const MachineFunction &MF) const override;
 };
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 79370791efa24..8c1c03b51c249 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -73,12 +73,11 @@ def sub_vrm1_7 : ComposedSubRegIndex<sub_vrm2_3, sub_vrm1_1>;
 // are not part of GPRC, the most restrictive register class used by the
 // compressed instruction set. This will influence the greedy register
 // allocator to reduce the use of registers that can't be encoded in 16 bit
-// instructions. This affects register allocation even when compressed
-// instruction isn't targeted, we see no major negative codegen impact.
+// instructions.
 
 let RegAltNameIndices = [ABIRegAltName] in {
   def X0  : RISCVReg<0, "x0", ["zero"]>, DwarfRegNum<[0]>;
-  let CostPerUse = [1] in {
+  let CostPerUse = [0, 1] in {
   def X1  : RISCVReg<1, "x1", ["ra"]>, DwarfRegNum<[1]>;
   def X2  : RISCVReg<2, "x2", ["sp"]>, DwarfRegNum<[2]>;
   def X3  : RISCVReg<3, "x3", ["gp"]>, DwarfRegNum<[3]>;
@@ -95,7 +94,7 @@ let RegAltNameIndices = [ABIRegAltName] in {
   def X13 : RISCVReg<13,"x13", ["a3"]>, DwarfRegNum<[13]>;
   def X14 : RISCVReg<14,"x14", ["a4"]>, DwarfRegNum<[14]>;
   def X15 : RISCVReg<15,"x15", ["a5"]>, DwarfRegNum<[15]>;
-  let CostPerUse = [1] in {
+  let CostPerUse = [0, 1] in {
   def X16 : RISCVReg<16,"x16", ["a6"]>, DwarfRegNum<[16]>;
   def X17 : RISCVReg<17,"x17", ["a7"]>, DwarfRegNum<[17]>;
   def X18 : RISCVReg<18,"x18", ["s2"]>, DwarfRegNum<[18]>;
diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll
index 212c74b79a343..a977cad504d3e 100644
--- a/llvm/test/CodeGen/RISCV/add-before-shl.ll
+++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll
@@ -163,23 +163,23 @@ define i128 @add_wide_operand(i128 %a) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lw a2, 0(a1)
 ; RV32I-NEXT:    lw a3, 4(a1)
-; RV32I-NEXT:    lw a6, 12(a1)
+; RV32I-NEXT:    lw a4, 12(a1)
 ; RV32I-NEXT:    lw a1, 8(a1)
 ; RV32I-NEXT:    srli a5, a2, 29
-; RV32I-NEXT:    slli a4, a3, 3
-; RV32I-NEXT:    or a4, a4, a5
+; RV32I-NEXT:    slli a6, a3, 3
+; RV32I-NEXT:    or a5, a6, a5
 ; RV32I-NEXT:    srli a3, a3, 29
-; RV32I-NEXT:    slli a5, a1, 3
-; RV32I-NEXT:    or a3, a5, a3
+; RV32I-NEXT:    slli a6, a1, 3
+; RV32I-NEXT:    or a3, a6, a3
 ; RV32I-NEXT:    srli a1, a1, 29
-; RV32I-NEXT:    slli a5, a6, 3
-; RV32I-NEXT:    or a1, a5, a1
+; RV32I-NEXT:    slli a4, a4, 3
+; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    slli a2, a2, 3
-; RV32I-NEXT:    lui a5, 128
-; RV32I-NEXT:    add a1, a1, a5
+; RV32I-NEXT:    lui a4, 128
+; RV32I-NEXT:    add a1, a1, a4
 ; RV32I-NEXT:    sw a2, 0(a0)
 ; RV32I-NEXT:    sw a3, 8(a0)
-; RV32I-NEXT:    sw a4, 4(a0)
+; RV32I-NEXT:    sw a5, 4(a0)
 ; RV32I-NEXT:    sw a1, 12(a0)
 ; RV32I-NEXT:    jalr zero, 0(ra)
 ;
diff --git a/llvm/test/CodeGen/RISCV/addcarry.ll b/llvm/test/CodeGen/RISCV/addcarry.ll
index 71711f9eb5054..18d1364f08d98 100644
--- a/llvm/test/CodeGen/RISCV/addcarry.ll
+++ b/llvm/test/CodeGen/RISCV/addcarry.ll
@@ -10,19 +10,19 @@ define i64 @addcarry(i64 %x, i64 %y) nounwind {
 ; RISCV32-LABEL: addcarry:
 ; RISCV32:       # %bb.0:
 ; RISCV32-NEXT:    mul a4, a0, a3
-; RISCV32-NEXT:    mulhu a7, a0, a2
-; RISCV32-NEXT:    add a4, a7, a4
-; RISCV32-NEXT:    mul a5, a1, a2
-; RISCV32-NEXT:    add a6, a4, a5
-; RISCV32-NEXT:    sltu t0, a6, a4
-; RISCV32-NEXT:    sltu a4, a4, a7
-; RISCV32-NEXT:    mulhu a5, a0, a3
-; RISCV32-NEXT:    add a4, a5, a4
-; RISCV32-NEXT:    mulhu a5, a1, a2
-; RISCV32-NEXT:    add a4, a4, a5
-; RISCV32-NEXT:    add a4, a4, t0
-; RISCV32-NEXT:    mul a5, a1, a3
-; RISCV32-NEXT:    add a5, a4, a5
+; RISCV32-NEXT:    mulhu a5, a0, a2
+; RISCV32-NEXT:    add a6, a5, a4
+; RISCV32-NEXT:    mul a4, a1, a2
+; RISCV32-NEXT:    add a4, a6, a4
+; RISCV32-NEXT:    sltu a7, a4, a6
+; RISCV32-NEXT:    sltu a5, a6, a5
+; RISCV32-NEXT:    mulhu a6, a0, a3
+; RISCV32-NEXT:    add a5, a6, a5
+; RISCV32-NEXT:    mulhu a6, a1, a2
+; RISCV32-NEXT:    add a5, a5, a6
+; RISCV32-NEXT:    add a5, a5, a7
+; RISCV32-NEXT:    mul a6, a1, a3
+; RISCV32-NEXT:    add a5, a5, a6
 ; RISCV32-NEXT:    bgez a1, .LBB0_2
 ; RISCV32-NEXT:  # %bb.1:
 ; RISCV32-NEXT:    sub a5, a5, a2
@@ -32,9 +32,9 @@ define i64 @addcarry(i64 %x, i64 %y) nounwind {
 ; RISCV32-NEXT:    sub a5, a5, a0
 ; RISCV32-NEXT:  .LBB0_4:
 ; RISCV32-NEXT:    slli a1, a5, 30
-; RISCV32-NEXT:    srli a3, a6, 2
+; RISCV32-NEXT:    srli a3, a4, 2
 ; RISCV32-NEXT:    or a1, a1, a3
-; RISCV32-NEXT:    slli a3, a6, 30
+; RISCV32-NEXT:    slli a3, a4, 30
 ; RISCV32-NEXT:    mul a0, a0, a2
 ; RISCV32-NEXT:    srli a0, a0, 2
 ; RISCV32-NEXT:    or a0, a3, a0
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index 2b3453456d6a9..62fcee85d5f99 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -2008,9 +2008,9 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 24
-; RV32I-NEXT:    srai s1, a0, 24
+; RV32I-NEXT:    srai s2, a0, 24
 ; RV32I-NEXT:    j .LBB35_2
 ; RV32I-NEXT:  .LBB35_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB35_2 Depth=1
@@ -2027,10 +2027,10 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    blt s1, a0, .LBB35_1
+; RV32I-NEXT:    blt s2, a0, .LBB35_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB35_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB35_1
 ; RV32I-NEXT:  .LBB35_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -2043,30 +2043,30 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_max_i8_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 24
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a4, a1, .LBB35_3
+; RV32IA-NEXT:    lr.w a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a7, a1, .LBB35_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB35_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB35_3: # in Loop: Header=BB35_1 Depth=1
-; RV32IA-NEXT:    sc.w a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB35_1
+; RV32IA-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB35_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -2080,9 +2080,9 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 56
-; RV64I-NEXT:    srai s1, a0, 56
+; RV64I-NEXT:    srai s2, a0, 56
 ; RV64I-NEXT:    j .LBB35_2
 ; RV64I-NEXT:  .LBB35_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB35_2 Depth=1
@@ -2099,10 +2099,10 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a0, .LBB35_1
+; RV64I-NEXT:    blt s2, a0, .LBB35_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB35_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB35_1
 ; RV64I-NEXT:  .LBB35_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -2115,30 +2115,30 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_max_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 56
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB35_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a4, a1, .LBB35_3
+; RV64IA-NEXT:    lr.w a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a7, a1, .LBB35_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB35_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB35_3: # in Loop: Header=BB35_1 Depth=1
-; RV64IA-NEXT:    sc.w a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB35_1
+; RV64IA-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB35_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -2156,9 +2156,9 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 24
-; RV32I-NEXT:    srai s1, a0, 24
+; RV32I-NEXT:    srai s2, a0, 24
 ; RV32I-NEXT:    j .LBB36_2
 ; RV32I-NEXT:  .LBB36_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB36_2 Depth=1
@@ -2175,10 +2175,10 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    blt s1, a0, .LBB36_1
+; RV32I-NEXT:    blt s2, a0, .LBB36_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB36_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB36_1
 ; RV32I-NEXT:  .LBB36_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -2191,30 +2191,30 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_max_i8_acquire:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 24
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a4, a1, .LBB36_3
+; RV32IA-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a7, a1, .LBB36_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB36_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB36_3: # in Loop: Header=BB36_1 Depth=1
-; RV32IA-NEXT:    sc.w a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB36_1
+; RV32IA-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB36_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -2228,9 +2228,9 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 56
-; RV64I-NEXT:    srai s1, a0, 56
+; RV64I-NEXT:    srai s2, a0, 56
 ; RV64I-NEXT:    j .LBB36_2
 ; RV64I-NEXT:  .LBB36_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB36_2 Depth=1
@@ -2247,10 +2247,10 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a0, .LBB36_1
+; RV64I-NEXT:    blt s2, a0, .LBB36_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB36_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB36_1
 ; RV64I-NEXT:  .LBB36_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -2263,30 +2263,30 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_max_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 56
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB36_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a4, a1, .LBB36_3
+; RV64IA-NEXT:    lr.w.aq a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a7, a1, .LBB36_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB36_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB36_3: # in Loop: Header=BB36_1 Depth=1
-; RV64IA-NEXT:    sc.w a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB36_1
+; RV64IA-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB36_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -2304,9 +2304,9 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 24
-; RV32I-NEXT:    srai s1, a0, 24
+; RV32I-NEXT:    srai s2, a0, 24
 ; RV32I-NEXT:    j .LBB37_2
 ; RV32I-NEXT:  .LBB37_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB37_2 Depth=1
@@ -2323,10 +2323,10 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    blt s1, a0, .LBB37_1
+; RV32I-NEXT:    blt s2, a0, .LBB37_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB37_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB37_1
 ; RV32I-NEXT:  .LBB37_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -2339,30 +2339,30 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_max_i8_release:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 24
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a4, a1, .LBB37_3
+; RV32IA-NEXT:    lr.w a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a7, a1, .LBB37_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB37_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB37_3: # in Loop: Header=BB37_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB37_1
+; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB37_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -2376,9 +2376,9 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 56
-; RV64I-NEXT:    srai s1, a0, 56
+; RV64I-NEXT:    srai s2, a0, 56
 ; RV64I-NEXT:    j .LBB37_2
 ; RV64I-NEXT:  .LBB37_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB37_2 Depth=1
@@ -2395,10 +2395,10 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a0, .LBB37_1
+; RV64I-NEXT:    blt s2, a0, .LBB37_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB37_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB37_1
 ; RV64I-NEXT:  .LBB37_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -2411,30 +2411,30 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_max_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 56
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB37_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a4, a1, .LBB37_3
+; RV64IA-NEXT:    lr.w a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a7, a1, .LBB37_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB37_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB37_3: # in Loop: Header=BB37_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB37_1
+; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB37_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -2452,9 +2452,9 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 24
-; RV32I-NEXT:    srai s1, a0, 24
+; RV32I-NEXT:    srai s2, a0, 24
 ; RV32I-NEXT:    j .LBB38_2
 ; RV32I-NEXT:  .LBB38_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB38_2 Depth=1
@@ -2471,10 +2471,10 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    blt s1, a0, .LBB38_1
+; RV32I-NEXT:    blt s2, a0, .LBB38_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB38_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB38_1
 ; RV32I-NEXT:  .LBB38_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -2487,30 +2487,30 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_max_i8_acq_rel:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 24
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB38_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a4, a1, .LBB38_3
+; RV32IA-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a7, a1, .LBB38_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB38_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB38_3: # in Loop: Header=BB38_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB38_1
+; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB38_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -2524,9 +2524,9 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 56
-; RV64I-NEXT:    srai s1, a0, 56
+; RV64I-NEXT:    srai s2, a0, 56
 ; RV64I-NEXT:    j .LBB38_2
 ; RV64I-NEXT:  .LBB38_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB38_2 Depth=1
@@ -2543,10 +2543,10 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a0, .LBB38_1
+; RV64I-NEXT:    blt s2, a0, .LBB38_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB38_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB38_1
 ; RV64I-NEXT:  .LBB38_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -2559,30 +2559,30 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_max_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 56
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB38_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a4, a1, .LBB38_3
+; RV64IA-NEXT:    lr.w.aq a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a7, a1, .LBB38_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB38_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB38_3: # in Loop: Header=BB38_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB38_1
+; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB38_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -2600,9 +2600,9 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 24
-; RV32I-NEXT:    srai s1, a0, 24
+; RV32I-NEXT:    srai s2, a0, 24
 ; RV32I-NEXT:    j .LBB39_2
 ; RV32I-NEXT:  .LBB39_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB39_2 Depth=1
@@ -2619,10 +2619,10 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    blt s1, a0, .LBB39_1
+; RV32I-NEXT:    blt s2, a0, .LBB39_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB39_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB39_1
 ; RV32I-NEXT:  .LBB39_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -2635,30 +2635,30 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_max_i8_seq_cst:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 24
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB39_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a4, a1, .LBB39_3
+; RV32IA-NEXT:    lr.w.aqrl a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a7, a1, .LBB39_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB39_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB39_3: # in Loop: Header=BB39_1 Depth=1
-; RV32IA-NEXT:    sc.w.aqrl a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB39_1
+; RV32IA-NEXT:    sc.w.aqrl a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB39_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -2672,9 +2672,9 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 56
-; RV64I-NEXT:    srai s1, a0, 56
+; RV64I-NEXT:    srai s2, a0, 56
 ; RV64I-NEXT:    j .LBB39_2
 ; RV64I-NEXT:  .LBB39_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB39_2 Depth=1
@@ -2691,10 +2691,10 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a0, .LBB39_1
+; RV64I-NEXT:    blt s2, a0, .LBB39_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB39_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB39_1
 ; RV64I-NEXT:  .LBB39_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -2707,30 +2707,30 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_max_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 56
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB39_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aqrl a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a4, a1, .LBB39_3
+; RV64IA-NEXT:    lr.w.aqrl a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a7, a1, .LBB39_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB39_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB39_3: # in Loop: Header=BB39_1 Depth=1
-; RV64IA-NEXT:    sc.w.aqrl a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB39_1
+; RV64IA-NEXT:    sc.w.aqrl a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB39_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -2748,9 +2748,9 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 24
-; RV32I-NEXT:    srai s1, a0, 24
+; RV32I-NEXT:    srai s2, a0, 24
 ; RV32I-NEXT:    j .LBB40_2
 ; RV32I-NEXT:  .LBB40_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB40_2 Depth=1
@@ -2767,10 +2767,10 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bge s1, a0, .LBB40_1
+; RV32I-NEXT:    bge s2, a0, .LBB40_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB40_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB40_1
 ; RV32I-NEXT:  .LBB40_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -2783,30 +2783,30 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_min_i8_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 24
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a1, a4, .LBB40_3
+; RV32IA-NEXT:    lr.w a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a1, a7, .LBB40_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB40_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB40_3: # in Loop: Header=BB40_1 Depth=1
-; RV32IA-NEXT:    sc.w a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB40_1
+; RV32IA-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB40_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -2820,9 +2820,9 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 56
-; RV64I-NEXT:    srai s1, a0, 56
+; RV64I-NEXT:    srai s2, a0, 56
 ; RV64I-NEXT:    j .LBB40_2
 ; RV64I-NEXT:  .LBB40_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB40_2 Depth=1
@@ -2839,10 +2839,10 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a0, .LBB40_1
+; RV64I-NEXT:    bge s2, a0, .LBB40_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB40_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB40_1
 ; RV64I-NEXT:  .LBB40_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -2855,30 +2855,30 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_min_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 56
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB40_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a1, a4, .LBB40_3
+; RV64IA-NEXT:    lr.w a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a1, a7, .LBB40_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB40_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB40_3: # in Loop: Header=BB40_1 Depth=1
-; RV64IA-NEXT:    sc.w a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB40_1
+; RV64IA-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB40_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -2896,9 +2896,9 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 24
-; RV32I-NEXT:    srai s1, a0, 24
+; RV32I-NEXT:    srai s2, a0, 24
 ; RV32I-NEXT:    j .LBB41_2
 ; RV32I-NEXT:  .LBB41_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB41_2 Depth=1
@@ -2915,10 +2915,10 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bge s1, a0, .LBB41_1
+; RV32I-NEXT:    bge s2, a0, .LBB41_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB41_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB41_1
 ; RV32I-NEXT:  .LBB41_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -2931,30 +2931,30 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_min_i8_acquire:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 24
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a1, a4, .LBB41_3
+; RV32IA-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a1, a7, .LBB41_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB41_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB41_3: # in Loop: Header=BB41_1 Depth=1
-; RV32IA-NEXT:    sc.w a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB41_1
+; RV32IA-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB41_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -2968,9 +2968,9 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 56
-; RV64I-NEXT:    srai s1, a0, 56
+; RV64I-NEXT:    srai s2, a0, 56
 ; RV64I-NEXT:    j .LBB41_2
 ; RV64I-NEXT:  .LBB41_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB41_2 Depth=1
@@ -2987,10 +2987,10 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a0, .LBB41_1
+; RV64I-NEXT:    bge s2, a0, .LBB41_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB41_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB41_1
 ; RV64I-NEXT:  .LBB41_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -3003,30 +3003,30 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_min_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 56
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB41_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a1, a4, .LBB41_3
+; RV64IA-NEXT:    lr.w.aq a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a1, a7, .LBB41_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB41_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB41_3: # in Loop: Header=BB41_1 Depth=1
-; RV64IA-NEXT:    sc.w a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB41_1
+; RV64IA-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB41_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -3044,9 +3044,9 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 24
-; RV32I-NEXT:    srai s1, a0, 24
+; RV32I-NEXT:    srai s2, a0, 24
 ; RV32I-NEXT:    j .LBB42_2
 ; RV32I-NEXT:  .LBB42_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB42_2 Depth=1
@@ -3063,10 +3063,10 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bge s1, a0, .LBB42_1
+; RV32I-NEXT:    bge s2, a0, .LBB42_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB42_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB42_1
 ; RV32I-NEXT:  .LBB42_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -3079,30 +3079,30 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_min_i8_release:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 24
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB42_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a1, a4, .LBB42_3
+; RV32IA-NEXT:    lr.w a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a1, a7, .LBB42_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB42_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB42_3: # in Loop: Header=BB42_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB42_1
+; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB42_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -3116,9 +3116,9 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 56
-; RV64I-NEXT:    srai s1, a0, 56
+; RV64I-NEXT:    srai s2, a0, 56
 ; RV64I-NEXT:    j .LBB42_2
 ; RV64I-NEXT:  .LBB42_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB42_2 Depth=1
@@ -3135,10 +3135,10 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a0, .LBB42_1
+; RV64I-NEXT:    bge s2, a0, .LBB42_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB42_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB42_1
 ; RV64I-NEXT:  .LBB42_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -3151,30 +3151,30 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_min_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 56
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB42_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a1, a4, .LBB42_3
+; RV64IA-NEXT:    lr.w a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a1, a7, .LBB42_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB42_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB42_3: # in Loop: Header=BB42_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB42_1
+; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB42_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -3192,9 +3192,9 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 24
-; RV32I-NEXT:    srai s1, a0, 24
+; RV32I-NEXT:    srai s2, a0, 24
 ; RV32I-NEXT:    j .LBB43_2
 ; RV32I-NEXT:  .LBB43_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB43_2 Depth=1
@@ -3211,10 +3211,10 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bge s1, a0, .LBB43_1
+; RV32I-NEXT:    bge s2, a0, .LBB43_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB43_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB43_1
 ; RV32I-NEXT:  .LBB43_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -3227,30 +3227,30 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_min_i8_acq_rel:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 24
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB43_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a1, a4, .LBB43_3
+; RV32IA-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a1, a7, .LBB43_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB43_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB43_3: # in Loop: Header=BB43_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB43_1
+; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB43_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -3264,9 +3264,9 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 56
-; RV64I-NEXT:    srai s1, a0, 56
+; RV64I-NEXT:    srai s2, a0, 56
 ; RV64I-NEXT:    j .LBB43_2
 ; RV64I-NEXT:  .LBB43_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB43_2 Depth=1
@@ -3283,10 +3283,10 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a0, .LBB43_1
+; RV64I-NEXT:    bge s2, a0, .LBB43_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB43_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB43_1
 ; RV64I-NEXT:  .LBB43_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -3299,30 +3299,30 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_min_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 56
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB43_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a1, a4, .LBB43_3
+; RV64IA-NEXT:    lr.w.aq a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a1, a7, .LBB43_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB43_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB43_3: # in Loop: Header=BB43_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB43_1
+; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB43_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -3340,9 +3340,9 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 24
-; RV32I-NEXT:    srai s1, a0, 24
+; RV32I-NEXT:    srai s2, a0, 24
 ; RV32I-NEXT:    j .LBB44_2
 ; RV32I-NEXT:  .LBB44_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB44_2 Depth=1
@@ -3359,10 +3359,10 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bge s1, a0, .LBB44_1
+; RV32I-NEXT:    bge s2, a0, .LBB44_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB44_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB44_1
 ; RV32I-NEXT:  .LBB44_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -3375,30 +3375,30 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_min_i8_seq_cst:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 24
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a1, a4, .LBB44_3
+; RV32IA-NEXT:    lr.w.aqrl a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a1, a7, .LBB44_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB44_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB44_3: # in Loop: Header=BB44_1 Depth=1
-; RV32IA-NEXT:    sc.w.aqrl a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB44_1
+; RV32IA-NEXT:    sc.w.aqrl a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB44_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -3412,9 +3412,9 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 56
-; RV64I-NEXT:    srai s1, a0, 56
+; RV64I-NEXT:    srai s2, a0, 56
 ; RV64I-NEXT:    j .LBB44_2
 ; RV64I-NEXT:  .LBB44_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB44_2 Depth=1
@@ -3431,10 +3431,10 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a0, .LBB44_1
+; RV64I-NEXT:    bge s2, a0, .LBB44_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB44_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB44_1
 ; RV64I-NEXT:  .LBB44_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -3447,30 +3447,30 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_min_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 56
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB44_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aqrl a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a1, a4, .LBB44_3
+; RV64IA-NEXT:    lr.w.aqrl a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a1, a7, .LBB44_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB44_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB44_3: # in Loop: Header=BB44_1 Depth=1
-; RV64IA-NEXT:    sc.w.aqrl a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB44_1
+; RV64IA-NEXT:    sc.w.aqrl a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB44_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -3488,8 +3488,8 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    andi s1, a1, 255
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    andi s2, a1, 255
 ; RV32I-NEXT:    j .LBB45_2
 ; RV32I-NEXT:  .LBB45_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB45_2 Depth=1
@@ -3505,10 +3505,10 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    andi a0, a3, 255
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bltu s1, a0, .LBB45_1
+; RV32I-NEXT:    bltu s2, a0, .LBB45_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB45_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB45_1
 ; RV32I-NEXT:  .LBB45_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -3521,23 +3521,23 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umax_i8_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    li a3, 255
 ; RV32IA-NEXT:    sll a3, a3, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a6)
-; RV32IA-NEXT:    and a2, a4, a3
+; RV32IA-NEXT:    lr.w a4, (a2)
+; RV32IA-NEXT:    and a6, a4, a3
 ; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a2, a1, .LBB45_3
+; RV32IA-NEXT:    bgeu a6, a1, .LBB45_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB45_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a4, a1
 ; RV32IA-NEXT:    and a5, a5, a3
 ; RV32IA-NEXT:    xor a5, a4, a5
 ; RV32IA-NEXT:  .LBB45_3: # in Loop: Header=BB45_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a6)
+; RV32IA-NEXT:    sc.w a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB45_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a4, a0
@@ -3552,8 +3552,8 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    andi s1, a1, 255
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    andi s2, a1, 255
 ; RV64I-NEXT:    j .LBB45_2
 ; RV64I-NEXT:  .LBB45_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB45_2 Depth=1
@@ -3569,10 +3569,10 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    andi a0, a3, 255
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bltu s1, a0, .LBB45_1
+; RV64I-NEXT:    bltu s2, a0, .LBB45_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB45_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB45_1
 ; RV64I-NEXT:  .LBB45_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -3585,23 +3585,23 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    li a3, 255
 ; RV64IA-NEXT:    sllw a3, a3, a0
 ; RV64IA-NEXT:    andi a1, a1, 255
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB45_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a4, (a6)
-; RV64IA-NEXT:    and a2, a4, a3
+; RV64IA-NEXT:    lr.w a4, (a2)
+; RV64IA-NEXT:    and a6, a4, a3
 ; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a2, a1, .LBB45_3
+; RV64IA-NEXT:    bgeu a6, a1, .LBB45_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB45_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a4, a1
 ; RV64IA-NEXT:    and a5, a5, a3
 ; RV64IA-NEXT:    xor a5, a4, a5
 ; RV64IA-NEXT:  .LBB45_3: # in Loop: Header=BB45_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a6)
+; RV64IA-NEXT:    sc.w a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB45_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a4, a0
@@ -3620,8 +3620,8 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    andi s1, a1, 255
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    andi s2, a1, 255
 ; RV32I-NEXT:    j .LBB46_2
 ; RV32I-NEXT:  .LBB46_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB46_2 Depth=1
@@ -3637,10 +3637,10 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    andi a0, a3, 255
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bltu s1, a0, .LBB46_1
+; RV32I-NEXT:    bltu s2, a0, .LBB46_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB46_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB46_1
 ; RV32I-NEXT:  .LBB46_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -3653,23 +3653,23 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umax_i8_acquire:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    li a3, 255
 ; RV32IA-NEXT:    sll a3, a3, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a6)
-; RV32IA-NEXT:    and a2, a4, a3
+; RV32IA-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-NEXT:    and a6, a4, a3
 ; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a2, a1, .LBB46_3
+; RV32IA-NEXT:    bgeu a6, a1, .LBB46_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a4, a1
 ; RV32IA-NEXT:    and a5, a5, a3
 ; RV32IA-NEXT:    xor a5, a4, a5
 ; RV32IA-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a6)
+; RV32IA-NEXT:    sc.w a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB46_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a4, a0
@@ -3684,8 +3684,8 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    andi s1, a1, 255
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    andi s2, a1, 255
 ; RV64I-NEXT:    j .LBB46_2
 ; RV64I-NEXT:  .LBB46_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB46_2 Depth=1
@@ -3701,10 +3701,10 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    andi a0, a3, 255
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bltu s1, a0, .LBB46_1
+; RV64I-NEXT:    bltu s2, a0, .LBB46_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB46_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB46_1
 ; RV64I-NEXT:  .LBB46_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -3717,23 +3717,23 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    li a3, 255
 ; RV64IA-NEXT:    sllw a3, a3, a0
 ; RV64IA-NEXT:    andi a1, a1, 255
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB46_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a6)
-; RV64IA-NEXT:    and a2, a4, a3
+; RV64IA-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-NEXT:    and a6, a4, a3
 ; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a2, a1, .LBB46_3
+; RV64IA-NEXT:    bgeu a6, a1, .LBB46_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB46_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a4, a1
 ; RV64IA-NEXT:    and a5, a5, a3
 ; RV64IA-NEXT:    xor a5, a4, a5
 ; RV64IA-NEXT:  .LBB46_3: # in Loop: Header=BB46_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a6)
+; RV64IA-NEXT:    sc.w a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB46_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a4, a0
@@ -3752,8 +3752,8 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    andi s1, a1, 255
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    andi s2, a1, 255
 ; RV32I-NEXT:    j .LBB47_2
 ; RV32I-NEXT:  .LBB47_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB47_2 Depth=1
@@ -3769,10 +3769,10 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    andi a0, a3, 255
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bltu s1, a0, .LBB47_1
+; RV32I-NEXT:    bltu s2, a0, .LBB47_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB47_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB47_1
 ; RV32I-NEXT:  .LBB47_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -3785,23 +3785,23 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umax_i8_release:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    li a3, 255
 ; RV32IA-NEXT:    sll a3, a3, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a6)
-; RV32IA-NEXT:    and a2, a4, a3
+; RV32IA-NEXT:    lr.w a4, (a2)
+; RV32IA-NEXT:    and a6, a4, a3
 ; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a2, a1, .LBB47_3
+; RV32IA-NEXT:    bgeu a6, a1, .LBB47_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a4, a1
 ; RV32IA-NEXT:    and a5, a5, a3
 ; RV32IA-NEXT:    xor a5, a4, a5
 ; RV32IA-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB47_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a4, a0
@@ -3816,8 +3816,8 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    andi s1, a1, 255
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    andi s2, a1, 255
 ; RV64I-NEXT:    j .LBB47_2
 ; RV64I-NEXT:  .LBB47_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB47_2 Depth=1
@@ -3833,10 +3833,10 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    andi a0, a3, 255
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bltu s1, a0, .LBB47_1
+; RV64I-NEXT:    bltu s2, a0, .LBB47_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB47_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB47_1
 ; RV64I-NEXT:  .LBB47_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -3849,23 +3849,23 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    li a3, 255
 ; RV64IA-NEXT:    sllw a3, a3, a0
 ; RV64IA-NEXT:    andi a1, a1, 255
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB47_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a4, (a6)
-; RV64IA-NEXT:    and a2, a4, a3
+; RV64IA-NEXT:    lr.w a4, (a2)
+; RV64IA-NEXT:    and a6, a4, a3
 ; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a2, a1, .LBB47_3
+; RV64IA-NEXT:    bgeu a6, a1, .LBB47_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB47_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a4, a1
 ; RV64IA-NEXT:    and a5, a5, a3
 ; RV64IA-NEXT:    xor a5, a4, a5
 ; RV64IA-NEXT:  .LBB47_3: # in Loop: Header=BB47_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB47_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a4, a0
@@ -3884,8 +3884,8 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    andi s1, a1, 255
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    andi s2, a1, 255
 ; RV32I-NEXT:    j .LBB48_2
 ; RV32I-NEXT:  .LBB48_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB48_2 Depth=1
@@ -3901,10 +3901,10 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    andi a0, a3, 255
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bltu s1, a0, .LBB48_1
+; RV32I-NEXT:    bltu s2, a0, .LBB48_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB48_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB48_1
 ; RV32I-NEXT:  .LBB48_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -3917,23 +3917,23 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umax_i8_acq_rel:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    li a3, 255
 ; RV32IA-NEXT:    sll a3, a3, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a6)
-; RV32IA-NEXT:    and a2, a4, a3
+; RV32IA-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-NEXT:    and a6, a4, a3
 ; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a2, a1, .LBB48_3
+; RV32IA-NEXT:    bgeu a6, a1, .LBB48_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a4, a1
 ; RV32IA-NEXT:    and a5, a5, a3
 ; RV32IA-NEXT:    xor a5, a4, a5
 ; RV32IA-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB48_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a4, a0
@@ -3948,8 +3948,8 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    andi s1, a1, 255
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    andi s2, a1, 255
 ; RV64I-NEXT:    j .LBB48_2
 ; RV64I-NEXT:  .LBB48_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB48_2 Depth=1
@@ -3965,10 +3965,10 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    andi a0, a3, 255
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bltu s1, a0, .LBB48_1
+; RV64I-NEXT:    bltu s2, a0, .LBB48_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB48_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB48_1
 ; RV64I-NEXT:  .LBB48_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -3981,23 +3981,23 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    li a3, 255
 ; RV64IA-NEXT:    sllw a3, a3, a0
 ; RV64IA-NEXT:    andi a1, a1, 255
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB48_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a6)
-; RV64IA-NEXT:    and a2, a4, a3
+; RV64IA-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-NEXT:    and a6, a4, a3
 ; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a2, a1, .LBB48_3
+; RV64IA-NEXT:    bgeu a6, a1, .LBB48_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB48_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a4, a1
 ; RV64IA-NEXT:    and a5, a5, a3
 ; RV64IA-NEXT:    xor a5, a4, a5
 ; RV64IA-NEXT:  .LBB48_3: # in Loop: Header=BB48_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB48_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a4, a0
@@ -4016,8 +4016,8 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    andi s1, a1, 255
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    andi s2, a1, 255
 ; RV32I-NEXT:    j .LBB49_2
 ; RV32I-NEXT:  .LBB49_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB49_2 Depth=1
@@ -4033,10 +4033,10 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    andi a0, a3, 255
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bltu s1, a0, .LBB49_1
+; RV32I-NEXT:    bltu s2, a0, .LBB49_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB49_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB49_1
 ; RV32I-NEXT:  .LBB49_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -4049,23 +4049,23 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umax_i8_seq_cst:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    li a3, 255
 ; RV32IA-NEXT:    sll a3, a3, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a4, (a6)
-; RV32IA-NEXT:    and a2, a4, a3
+; RV32IA-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-NEXT:    and a6, a4, a3
 ; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a2, a1, .LBB49_3
+; RV32IA-NEXT:    bgeu a6, a1, .LBB49_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB49_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a4, a1
 ; RV32IA-NEXT:    and a5, a5, a3
 ; RV32IA-NEXT:    xor a5, a4, a5
 ; RV32IA-NEXT:  .LBB49_3: # in Loop: Header=BB49_1 Depth=1
-; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a6)
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB49_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a4, a0
@@ -4080,8 +4080,8 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    andi s1, a1, 255
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    andi s2, a1, 255
 ; RV64I-NEXT:    j .LBB49_2
 ; RV64I-NEXT:  .LBB49_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB49_2 Depth=1
@@ -4097,10 +4097,10 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    andi a0, a3, 255
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bltu s1, a0, .LBB49_1
+; RV64I-NEXT:    bltu s2, a0, .LBB49_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB49_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB49_1
 ; RV64I-NEXT:  .LBB49_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -4113,23 +4113,23 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    li a3, 255
 ; RV64IA-NEXT:    sllw a3, a3, a0
 ; RV64IA-NEXT:    andi a1, a1, 255
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB49_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aqrl a4, (a6)
-; RV64IA-NEXT:    and a2, a4, a3
+; RV64IA-NEXT:    lr.w.aqrl a4, (a2)
+; RV64IA-NEXT:    and a6, a4, a3
 ; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a2, a1, .LBB49_3
+; RV64IA-NEXT:    bgeu a6, a1, .LBB49_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB49_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a4, a1
 ; RV64IA-NEXT:    and a5, a5, a3
 ; RV64IA-NEXT:    xor a5, a4, a5
 ; RV64IA-NEXT:  .LBB49_3: # in Loop: Header=BB49_1 Depth=1
-; RV64IA-NEXT:    sc.w.aqrl a5, a5, (a6)
+; RV64IA-NEXT:    sc.w.aqrl a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB49_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a4, a0
@@ -4148,8 +4148,8 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    andi s1, a1, 255
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    andi s2, a1, 255
 ; RV32I-NEXT:    j .LBB50_2
 ; RV32I-NEXT:  .LBB50_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB50_2 Depth=1
@@ -4165,10 +4165,10 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    andi a0, a3, 255
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bgeu s1, a0, .LBB50_1
+; RV32I-NEXT:    bgeu s2, a0, .LBB50_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB50_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB50_1
 ; RV32I-NEXT:  .LBB50_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -4181,23 +4181,23 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umin_i8_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    li a3, 255
 ; RV32IA-NEXT:    sll a3, a3, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB50_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a6)
-; RV32IA-NEXT:    and a2, a4, a3
+; RV32IA-NEXT:    lr.w a4, (a2)
+; RV32IA-NEXT:    and a6, a4, a3
 ; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a1, a2, .LBB50_3
+; RV32IA-NEXT:    bgeu a1, a6, .LBB50_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB50_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a4, a1
 ; RV32IA-NEXT:    and a5, a5, a3
 ; RV32IA-NEXT:    xor a5, a4, a5
 ; RV32IA-NEXT:  .LBB50_3: # in Loop: Header=BB50_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a6)
+; RV32IA-NEXT:    sc.w a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB50_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a4, a0
@@ -4212,8 +4212,8 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    andi s1, a1, 255
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    andi s2, a1, 255
 ; RV64I-NEXT:    j .LBB50_2
 ; RV64I-NEXT:  .LBB50_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB50_2 Depth=1
@@ -4229,10 +4229,10 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    andi a0, a3, 255
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bgeu s1, a0, .LBB50_1
+; RV64I-NEXT:    bgeu s2, a0, .LBB50_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB50_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB50_1
 ; RV64I-NEXT:  .LBB50_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -4245,23 +4245,23 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    li a3, 255
 ; RV64IA-NEXT:    sllw a3, a3, a0
 ; RV64IA-NEXT:    andi a1, a1, 255
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB50_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a4, (a6)
-; RV64IA-NEXT:    and a2, a4, a3
+; RV64IA-NEXT:    lr.w a4, (a2)
+; RV64IA-NEXT:    and a6, a4, a3
 ; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a1, a2, .LBB50_3
+; RV64IA-NEXT:    bgeu a1, a6, .LBB50_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB50_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a4, a1
 ; RV64IA-NEXT:    and a5, a5, a3
 ; RV64IA-NEXT:    xor a5, a4, a5
 ; RV64IA-NEXT:  .LBB50_3: # in Loop: Header=BB50_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a6)
+; RV64IA-NEXT:    sc.w a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB50_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a4, a0
@@ -4280,8 +4280,8 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    andi s1, a1, 255
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    andi s2, a1, 255
 ; RV32I-NEXT:    j .LBB51_2
 ; RV32I-NEXT:  .LBB51_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB51_2 Depth=1
@@ -4297,10 +4297,10 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    andi a0, a3, 255
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bgeu s1, a0, .LBB51_1
+; RV32I-NEXT:    bgeu s2, a0, .LBB51_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB51_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB51_1
 ; RV32I-NEXT:  .LBB51_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -4313,23 +4313,23 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umin_i8_acquire:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    li a3, 255
 ; RV32IA-NEXT:    sll a3, a3, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a6)
-; RV32IA-NEXT:    and a2, a4, a3
+; RV32IA-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-NEXT:    and a6, a4, a3
 ; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a1, a2, .LBB51_3
+; RV32IA-NEXT:    bgeu a1, a6, .LBB51_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a4, a1
 ; RV32IA-NEXT:    and a5, a5, a3
 ; RV32IA-NEXT:    xor a5, a4, a5
 ; RV32IA-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a6)
+; RV32IA-NEXT:    sc.w a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB51_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a4, a0
@@ -4344,8 +4344,8 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    andi s1, a1, 255
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    andi s2, a1, 255
 ; RV64I-NEXT:    j .LBB51_2
 ; RV64I-NEXT:  .LBB51_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB51_2 Depth=1
@@ -4361,10 +4361,10 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    andi a0, a3, 255
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bgeu s1, a0, .LBB51_1
+; RV64I-NEXT:    bgeu s2, a0, .LBB51_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB51_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB51_1
 ; RV64I-NEXT:  .LBB51_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -4377,23 +4377,23 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    li a3, 255
 ; RV64IA-NEXT:    sllw a3, a3, a0
 ; RV64IA-NEXT:    andi a1, a1, 255
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB51_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a6)
-; RV64IA-NEXT:    and a2, a4, a3
+; RV64IA-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-NEXT:    and a6, a4, a3
 ; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a1, a2, .LBB51_3
+; RV64IA-NEXT:    bgeu a1, a6, .LBB51_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB51_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a4, a1
 ; RV64IA-NEXT:    and a5, a5, a3
 ; RV64IA-NEXT:    xor a5, a4, a5
 ; RV64IA-NEXT:  .LBB51_3: # in Loop: Header=BB51_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a6)
+; RV64IA-NEXT:    sc.w a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB51_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a4, a0
@@ -4412,8 +4412,8 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    andi s1, a1, 255
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    andi s2, a1, 255
 ; RV32I-NEXT:    j .LBB52_2
 ; RV32I-NEXT:  .LBB52_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB52_2 Depth=1
@@ -4429,10 +4429,10 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    andi a0, a3, 255
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bgeu s1, a0, .LBB52_1
+; RV32I-NEXT:    bgeu s2, a0, .LBB52_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB52_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB52_1
 ; RV32I-NEXT:  .LBB52_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -4445,23 +4445,23 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umin_i8_release:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    li a3, 255
 ; RV32IA-NEXT:    sll a3, a3, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a6)
-; RV32IA-NEXT:    and a2, a4, a3
+; RV32IA-NEXT:    lr.w a4, (a2)
+; RV32IA-NEXT:    and a6, a4, a3
 ; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a1, a2, .LBB52_3
+; RV32IA-NEXT:    bgeu a1, a6, .LBB52_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a4, a1
 ; RV32IA-NEXT:    and a5, a5, a3
 ; RV32IA-NEXT:    xor a5, a4, a5
 ; RV32IA-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB52_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a4, a0
@@ -4476,8 +4476,8 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    andi s1, a1, 255
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    andi s2, a1, 255
 ; RV64I-NEXT:    j .LBB52_2
 ; RV64I-NEXT:  .LBB52_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB52_2 Depth=1
@@ -4493,10 +4493,10 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    andi a0, a3, 255
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bgeu s1, a0, .LBB52_1
+; RV64I-NEXT:    bgeu s2, a0, .LBB52_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB52_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB52_1
 ; RV64I-NEXT:  .LBB52_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -4509,23 +4509,23 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    li a3, 255
 ; RV64IA-NEXT:    sllw a3, a3, a0
 ; RV64IA-NEXT:    andi a1, a1, 255
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB52_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a4, (a6)
-; RV64IA-NEXT:    and a2, a4, a3
+; RV64IA-NEXT:    lr.w a4, (a2)
+; RV64IA-NEXT:    and a6, a4, a3
 ; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a1, a2, .LBB52_3
+; RV64IA-NEXT:    bgeu a1, a6, .LBB52_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB52_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a4, a1
 ; RV64IA-NEXT:    and a5, a5, a3
 ; RV64IA-NEXT:    xor a5, a4, a5
 ; RV64IA-NEXT:  .LBB52_3: # in Loop: Header=BB52_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB52_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a4, a0
@@ -4544,8 +4544,8 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    andi s1, a1, 255
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    andi s2, a1, 255
 ; RV32I-NEXT:    j .LBB53_2
 ; RV32I-NEXT:  .LBB53_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB53_2 Depth=1
@@ -4561,10 +4561,10 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    andi a0, a3, 255
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bgeu s1, a0, .LBB53_1
+; RV32I-NEXT:    bgeu s2, a0, .LBB53_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB53_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB53_1
 ; RV32I-NEXT:  .LBB53_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -4577,23 +4577,23 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umin_i8_acq_rel:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    li a3, 255
 ; RV32IA-NEXT:    sll a3, a3, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a4, (a6)
-; RV32IA-NEXT:    and a2, a4, a3
+; RV32IA-NEXT:    lr.w.aq a4, (a2)
+; RV32IA-NEXT:    and a6, a4, a3
 ; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a1, a2, .LBB53_3
+; RV32IA-NEXT:    bgeu a1, a6, .LBB53_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a4, a1
 ; RV32IA-NEXT:    and a5, a5, a3
 ; RV32IA-NEXT:    xor a5, a4, a5
 ; RV32IA-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB53_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a4, a0
@@ -4608,8 +4608,8 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    andi s1, a1, 255
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    andi s2, a1, 255
 ; RV64I-NEXT:    j .LBB53_2
 ; RV64I-NEXT:  .LBB53_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB53_2 Depth=1
@@ -4625,10 +4625,10 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    andi a0, a3, 255
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bgeu s1, a0, .LBB53_1
+; RV64I-NEXT:    bgeu s2, a0, .LBB53_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB53_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB53_1
 ; RV64I-NEXT:  .LBB53_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -4641,23 +4641,23 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    li a3, 255
 ; RV64IA-NEXT:    sllw a3, a3, a0
 ; RV64IA-NEXT:    andi a1, a1, 255
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB53_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a4, (a6)
-; RV64IA-NEXT:    and a2, a4, a3
+; RV64IA-NEXT:    lr.w.aq a4, (a2)
+; RV64IA-NEXT:    and a6, a4, a3
 ; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a1, a2, .LBB53_3
+; RV64IA-NEXT:    bgeu a1, a6, .LBB53_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB53_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a4, a1
 ; RV64IA-NEXT:    and a5, a5, a3
 ; RV64IA-NEXT:    xor a5, a4, a5
 ; RV64IA-NEXT:  .LBB53_3: # in Loop: Header=BB53_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB53_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a4, a0
@@ -4676,8 +4676,8 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    andi s1, a1, 255
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    andi s2, a1, 255
 ; RV32I-NEXT:    j .LBB54_2
 ; RV32I-NEXT:  .LBB54_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB54_2 Depth=1
@@ -4693,10 +4693,10 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    andi a0, a3, 255
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bgeu s1, a0, .LBB54_1
+; RV32I-NEXT:    bgeu s2, a0, .LBB54_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB54_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB54_1
 ; RV32I-NEXT:  .LBB54_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -4709,23 +4709,23 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umin_i8_seq_cst:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    li a3, 255
 ; RV32IA-NEXT:    sll a3, a3, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB54_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a4, (a6)
-; RV32IA-NEXT:    and a2, a4, a3
+; RV32IA-NEXT:    lr.w.aqrl a4, (a2)
+; RV32IA-NEXT:    and a6, a4, a3
 ; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a1, a2, .LBB54_3
+; RV32IA-NEXT:    bgeu a1, a6, .LBB54_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB54_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a4, a1
 ; RV32IA-NEXT:    and a5, a5, a3
 ; RV32IA-NEXT:    xor a5, a4, a5
 ; RV32IA-NEXT:  .LBB54_3: # in Loop: Header=BB54_1 Depth=1
-; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a6)
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB54_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a4, a0
@@ -4740,8 +4740,8 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    andi s1, a1, 255
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    andi s2, a1, 255
 ; RV64I-NEXT:    j .LBB54_2
 ; RV64I-NEXT:  .LBB54_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB54_2 Depth=1
@@ -4757,10 +4757,10 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    andi a0, a3, 255
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bgeu s1, a0, .LBB54_1
+; RV64I-NEXT:    bgeu s2, a0, .LBB54_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB54_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB54_1
 ; RV64I-NEXT:  .LBB54_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -4773,23 +4773,23 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    li a3, 255
 ; RV64IA-NEXT:    sllw a3, a3, a0
 ; RV64IA-NEXT:    andi a1, a1, 255
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB54_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aqrl a4, (a6)
-; RV64IA-NEXT:    and a2, a4, a3
+; RV64IA-NEXT:    lr.w.aqrl a4, (a2)
+; RV64IA-NEXT:    and a6, a4, a3
 ; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a1, a2, .LBB54_3
+; RV64IA-NEXT:    bgeu a1, a6, .LBB54_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB54_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a4, a1
 ; RV64IA-NEXT:    and a5, a5, a3
 ; RV64IA-NEXT:    xor a5, a4, a5
 ; RV64IA-NEXT:  .LBB54_3: # in Loop: Header=BB54_1 Depth=1
-; RV64IA-NEXT:    sc.w.aqrl a5, a5, (a6)
+; RV64IA-NEXT:    sc.w.aqrl a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB54_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a4, a0
@@ -6868,9 +6868,9 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lhu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 16
-; RV32I-NEXT:    srai s1, a0, 16
+; RV32I-NEXT:    srai s2, a0, 16
 ; RV32I-NEXT:    j .LBB90_2
 ; RV32I-NEXT:  .LBB90_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB90_2 Depth=1
@@ -6887,10 +6887,10 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    blt s1, a0, .LBB90_1
+; RV32I-NEXT:    blt s2, a0, .LBB90_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB90_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB90_1
 ; RV32I-NEXT:  .LBB90_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -6903,31 +6903,31 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_max_i16_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 16
 ; RV32IA-NEXT:    srai a1, a1, 16
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 16
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB90_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a4, a1, .LBB90_3
+; RV32IA-NEXT:    lr.w a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a7, a1, .LBB90_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB90_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB90_3: # in Loop: Header=BB90_1 Depth=1
-; RV32IA-NEXT:    sc.w a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB90_1
+; RV32IA-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB90_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -6941,9 +6941,9 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lhu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 48
-; RV64I-NEXT:    srai s1, a0, 48
+; RV64I-NEXT:    srai s2, a0, 48
 ; RV64I-NEXT:    j .LBB90_2
 ; RV64I-NEXT:  .LBB90_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB90_2 Depth=1
@@ -6960,10 +6960,10 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a0, .LBB90_1
+; RV64I-NEXT:    blt s2, a0, .LBB90_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB90_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB90_1
 ; RV64I-NEXT:  .LBB90_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -6976,31 +6976,31 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_max_i16_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
 ; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 48
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB90_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a4, a1, .LBB90_3
+; RV64IA-NEXT:    lr.w a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a7, a1, .LBB90_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB90_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB90_3: # in Loop: Header=BB90_1 Depth=1
-; RV64IA-NEXT:    sc.w a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB90_1
+; RV64IA-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB90_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -7018,9 +7018,9 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lhu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 16
-; RV32I-NEXT:    srai s1, a0, 16
+; RV32I-NEXT:    srai s2, a0, 16
 ; RV32I-NEXT:    j .LBB91_2
 ; RV32I-NEXT:  .LBB91_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB91_2 Depth=1
@@ -7037,10 +7037,10 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    blt s1, a0, .LBB91_1
+; RV32I-NEXT:    blt s2, a0, .LBB91_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB91_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB91_1
 ; RV32I-NEXT:  .LBB91_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -7053,31 +7053,31 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_max_i16_acquire:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 16
 ; RV32IA-NEXT:    srai a1, a1, 16
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 16
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB91_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a4, a1, .LBB91_3
+; RV32IA-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a7, a1, .LBB91_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB91_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB91_3: # in Loop: Header=BB91_1 Depth=1
-; RV32IA-NEXT:    sc.w a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB91_1
+; RV32IA-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB91_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -7091,9 +7091,9 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lhu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 48
-; RV64I-NEXT:    srai s1, a0, 48
+; RV64I-NEXT:    srai s2, a0, 48
 ; RV64I-NEXT:    j .LBB91_2
 ; RV64I-NEXT:  .LBB91_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB91_2 Depth=1
@@ -7110,10 +7110,10 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a0, .LBB91_1
+; RV64I-NEXT:    blt s2, a0, .LBB91_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB91_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB91_1
 ; RV64I-NEXT:  .LBB91_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -7126,31 +7126,31 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_max_i16_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
 ; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 48
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB91_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a4, a1, .LBB91_3
+; RV64IA-NEXT:    lr.w.aq a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a7, a1, .LBB91_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB91_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB91_3: # in Loop: Header=BB91_1 Depth=1
-; RV64IA-NEXT:    sc.w a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB91_1
+; RV64IA-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB91_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -7168,9 +7168,9 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lhu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 16
-; RV32I-NEXT:    srai s1, a0, 16
+; RV32I-NEXT:    srai s2, a0, 16
 ; RV32I-NEXT:    j .LBB92_2
 ; RV32I-NEXT:  .LBB92_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB92_2 Depth=1
@@ -7187,10 +7187,10 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    blt s1, a0, .LBB92_1
+; RV32I-NEXT:    blt s2, a0, .LBB92_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB92_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB92_1
 ; RV32I-NEXT:  .LBB92_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -7203,31 +7203,31 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_max_i16_release:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 16
 ; RV32IA-NEXT:    srai a1, a1, 16
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 16
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB92_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a4, a1, .LBB92_3
+; RV32IA-NEXT:    lr.w a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a7, a1, .LBB92_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB92_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB92_3: # in Loop: Header=BB92_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB92_1
+; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB92_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -7241,9 +7241,9 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lhu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 48
-; RV64I-NEXT:    srai s1, a0, 48
+; RV64I-NEXT:    srai s2, a0, 48
 ; RV64I-NEXT:    j .LBB92_2
 ; RV64I-NEXT:  .LBB92_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB92_2 Depth=1
@@ -7260,10 +7260,10 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a0, .LBB92_1
+; RV64I-NEXT:    blt s2, a0, .LBB92_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB92_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB92_1
 ; RV64I-NEXT:  .LBB92_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -7276,31 +7276,31 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_max_i16_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
 ; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 48
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB92_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a4, a1, .LBB92_3
+; RV64IA-NEXT:    lr.w a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a7, a1, .LBB92_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB92_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB92_3: # in Loop: Header=BB92_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB92_1
+; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB92_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -7318,9 +7318,9 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lhu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 16
-; RV32I-NEXT:    srai s1, a0, 16
+; RV32I-NEXT:    srai s2, a0, 16
 ; RV32I-NEXT:    j .LBB93_2
 ; RV32I-NEXT:  .LBB93_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB93_2 Depth=1
@@ -7337,10 +7337,10 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    blt s1, a0, .LBB93_1
+; RV32I-NEXT:    blt s2, a0, .LBB93_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB93_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB93_1
 ; RV32I-NEXT:  .LBB93_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -7353,31 +7353,31 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_max_i16_acq_rel:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 16
 ; RV32IA-NEXT:    srai a1, a1, 16
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 16
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB93_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a4, a1, .LBB93_3
+; RV32IA-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a7, a1, .LBB93_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB93_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB93_3: # in Loop: Header=BB93_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB93_1
+; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB93_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -7391,9 +7391,9 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lhu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 48
-; RV64I-NEXT:    srai s1, a0, 48
+; RV64I-NEXT:    srai s2, a0, 48
 ; RV64I-NEXT:    j .LBB93_2
 ; RV64I-NEXT:  .LBB93_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB93_2 Depth=1
@@ -7410,10 +7410,10 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a0, .LBB93_1
+; RV64I-NEXT:    blt s2, a0, .LBB93_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB93_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB93_1
 ; RV64I-NEXT:  .LBB93_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -7426,31 +7426,31 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_max_i16_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
 ; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 48
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB93_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a4, a1, .LBB93_3
+; RV64IA-NEXT:    lr.w.aq a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a7, a1, .LBB93_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB93_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB93_3: # in Loop: Header=BB93_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB93_1
+; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB93_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -7468,9 +7468,9 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lhu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 16
-; RV32I-NEXT:    srai s1, a0, 16
+; RV32I-NEXT:    srai s2, a0, 16
 ; RV32I-NEXT:    j .LBB94_2
 ; RV32I-NEXT:  .LBB94_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB94_2 Depth=1
@@ -7487,10 +7487,10 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    blt s1, a0, .LBB94_1
+; RV32I-NEXT:    blt s2, a0, .LBB94_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB94_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB94_1
 ; RV32I-NEXT:  .LBB94_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -7503,31 +7503,31 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_max_i16_seq_cst:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 16
 ; RV32IA-NEXT:    srai a1, a1, 16
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 16
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB94_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a4, a1, .LBB94_3
+; RV32IA-NEXT:    lr.w.aqrl a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a7, a1, .LBB94_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB94_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB94_3: # in Loop: Header=BB94_1 Depth=1
-; RV32IA-NEXT:    sc.w.aqrl a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB94_1
+; RV32IA-NEXT:    sc.w.aqrl a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB94_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -7541,9 +7541,9 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lhu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 48
-; RV64I-NEXT:    srai s1, a0, 48
+; RV64I-NEXT:    srai s2, a0, 48
 ; RV64I-NEXT:    j .LBB94_2
 ; RV64I-NEXT:  .LBB94_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB94_2 Depth=1
@@ -7560,10 +7560,10 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a0, .LBB94_1
+; RV64I-NEXT:    blt s2, a0, .LBB94_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB94_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB94_1
 ; RV64I-NEXT:  .LBB94_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -7576,31 +7576,31 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_max_i16_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
 ; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 48
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB94_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aqrl a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a4, a1, .LBB94_3
+; RV64IA-NEXT:    lr.w.aqrl a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a7, a1, .LBB94_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB94_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB94_3: # in Loop: Header=BB94_1 Depth=1
-; RV64IA-NEXT:    sc.w.aqrl a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB94_1
+; RV64IA-NEXT:    sc.w.aqrl a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB94_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -7618,9 +7618,9 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lhu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 16
-; RV32I-NEXT:    srai s1, a0, 16
+; RV32I-NEXT:    srai s2, a0, 16
 ; RV32I-NEXT:    j .LBB95_2
 ; RV32I-NEXT:  .LBB95_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB95_2 Depth=1
@@ -7637,10 +7637,10 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bge s1, a0, .LBB95_1
+; RV32I-NEXT:    bge s2, a0, .LBB95_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB95_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB95_1
 ; RV32I-NEXT:  .LBB95_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -7653,31 +7653,31 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_min_i16_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 16
 ; RV32IA-NEXT:    srai a1, a1, 16
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 16
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB95_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a1, a4, .LBB95_3
+; RV32IA-NEXT:    lr.w a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a1, a7, .LBB95_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB95_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB95_3: # in Loop: Header=BB95_1 Depth=1
-; RV32IA-NEXT:    sc.w a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB95_1
+; RV32IA-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB95_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -7691,9 +7691,9 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lhu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 48
-; RV64I-NEXT:    srai s1, a0, 48
+; RV64I-NEXT:    srai s2, a0, 48
 ; RV64I-NEXT:    j .LBB95_2
 ; RV64I-NEXT:  .LBB95_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB95_2 Depth=1
@@ -7710,10 +7710,10 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a0, .LBB95_1
+; RV64I-NEXT:    bge s2, a0, .LBB95_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB95_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB95_1
 ; RV64I-NEXT:  .LBB95_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -7726,31 +7726,31 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_min_i16_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
 ; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 48
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB95_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a1, a4, .LBB95_3
+; RV64IA-NEXT:    lr.w a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a1, a7, .LBB95_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB95_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB95_3: # in Loop: Header=BB95_1 Depth=1
-; RV64IA-NEXT:    sc.w a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB95_1
+; RV64IA-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB95_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -7768,9 +7768,9 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lhu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 16
-; RV32I-NEXT:    srai s1, a0, 16
+; RV32I-NEXT:    srai s2, a0, 16
 ; RV32I-NEXT:    j .LBB96_2
 ; RV32I-NEXT:  .LBB96_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB96_2 Depth=1
@@ -7787,10 +7787,10 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bge s1, a0, .LBB96_1
+; RV32I-NEXT:    bge s2, a0, .LBB96_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB96_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB96_1
 ; RV32I-NEXT:  .LBB96_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -7803,31 +7803,31 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_min_i16_acquire:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 16
 ; RV32IA-NEXT:    srai a1, a1, 16
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 16
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a1, a4, .LBB96_3
+; RV32IA-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a1, a7, .LBB96_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB96_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB96_3: # in Loop: Header=BB96_1 Depth=1
-; RV32IA-NEXT:    sc.w a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB96_1
+; RV32IA-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB96_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -7841,9 +7841,9 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lhu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 48
-; RV64I-NEXT:    srai s1, a0, 48
+; RV64I-NEXT:    srai s2, a0, 48
 ; RV64I-NEXT:    j .LBB96_2
 ; RV64I-NEXT:  .LBB96_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB96_2 Depth=1
@@ -7860,10 +7860,10 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a0, .LBB96_1
+; RV64I-NEXT:    bge s2, a0, .LBB96_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB96_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB96_1
 ; RV64I-NEXT:  .LBB96_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -7876,31 +7876,31 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_min_i16_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
 ; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 48
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB96_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a1, a4, .LBB96_3
+; RV64IA-NEXT:    lr.w.aq a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a1, a7, .LBB96_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB96_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB96_3: # in Loop: Header=BB96_1 Depth=1
-; RV64IA-NEXT:    sc.w a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB96_1
+; RV64IA-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB96_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -7918,9 +7918,9 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lhu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 16
-; RV32I-NEXT:    srai s1, a0, 16
+; RV32I-NEXT:    srai s2, a0, 16
 ; RV32I-NEXT:    j .LBB97_2
 ; RV32I-NEXT:  .LBB97_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB97_2 Depth=1
@@ -7937,10 +7937,10 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bge s1, a0, .LBB97_1
+; RV32I-NEXT:    bge s2, a0, .LBB97_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB97_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB97_1
 ; RV32I-NEXT:  .LBB97_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -7953,31 +7953,31 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_min_i16_release:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 16
 ; RV32IA-NEXT:    srai a1, a1, 16
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 16
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a1, a4, .LBB97_3
+; RV32IA-NEXT:    lr.w a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a1, a7, .LBB97_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB97_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB97_3: # in Loop: Header=BB97_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB97_1
+; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB97_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -7991,9 +7991,9 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lhu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 48
-; RV64I-NEXT:    srai s1, a0, 48
+; RV64I-NEXT:    srai s2, a0, 48
 ; RV64I-NEXT:    j .LBB97_2
 ; RV64I-NEXT:  .LBB97_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB97_2 Depth=1
@@ -8010,10 +8010,10 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a0, .LBB97_1
+; RV64I-NEXT:    bge s2, a0, .LBB97_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB97_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB97_1
 ; RV64I-NEXT:  .LBB97_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -8026,31 +8026,31 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_min_i16_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
 ; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 48
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB97_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a1, a4, .LBB97_3
+; RV64IA-NEXT:    lr.w a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a1, a7, .LBB97_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB97_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB97_3: # in Loop: Header=BB97_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB97_1
+; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB97_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -8068,9 +8068,9 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lhu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 16
-; RV32I-NEXT:    srai s1, a0, 16
+; RV32I-NEXT:    srai s2, a0, 16
 ; RV32I-NEXT:    j .LBB98_2
 ; RV32I-NEXT:  .LBB98_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB98_2 Depth=1
@@ -8087,10 +8087,10 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bge s1, a0, .LBB98_1
+; RV32I-NEXT:    bge s2, a0, .LBB98_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB98_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB98_1
 ; RV32I-NEXT:  .LBB98_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -8103,31 +8103,31 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_min_i16_acq_rel:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 16
 ; RV32IA-NEXT:    srai a1, a1, 16
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 16
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a1, a4, .LBB98_3
+; RV32IA-NEXT:    lr.w.aq a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a1, a7, .LBB98_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB98_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB98_3: # in Loop: Header=BB98_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB98_1
+; RV32IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB98_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -8141,9 +8141,9 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lhu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 48
-; RV64I-NEXT:    srai s1, a0, 48
+; RV64I-NEXT:    srai s2, a0, 48
 ; RV64I-NEXT:    j .LBB98_2
 ; RV64I-NEXT:  .LBB98_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB98_2 Depth=1
@@ -8160,10 +8160,10 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a0, .LBB98_1
+; RV64I-NEXT:    bge s2, a0, .LBB98_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB98_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB98_1
 ; RV64I-NEXT:  .LBB98_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -8176,31 +8176,31 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_min_i16_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
 ; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 48
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB98_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a1, a4, .LBB98_3
+; RV64IA-NEXT:    lr.w.aq a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a1, a7, .LBB98_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB98_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB98_3: # in Loop: Header=BB98_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB98_1
+; RV64IA-NEXT:    sc.w.rl a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB98_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -8218,9 +8218,9 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lhu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 16
-; RV32I-NEXT:    srai s1, a0, 16
+; RV32I-NEXT:    srai s2, a0, 16
 ; RV32I-NEXT:    j .LBB99_2
 ; RV32I-NEXT:  .LBB99_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB99_2 Depth=1
@@ -8237,10 +8237,10 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bge s1, a0, .LBB99_1
+; RV32I-NEXT:    bge s2, a0, .LBB99_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB99_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB99_1
 ; RV32I-NEXT:  .LBB99_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
@@ -8253,31 +8253,31 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_min_i16_seq_cst:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 16
 ; RV32IA-NEXT:    srai a1, a1, 16
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 16
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a1, a4, .LBB99_3
+; RV32IA-NEXT:    lr.w.aqrl a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a1, a7, .LBB99_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB99_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB99_3: # in Loop: Header=BB99_1 Depth=1
-; RV32IA-NEXT:    sc.w.aqrl a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB99_1
+; RV32IA-NEXT:    sc.w.aqrl a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB99_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    ret
@@ -8291,9 +8291,9 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lhu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 48
-; RV64I-NEXT:    srai s1, a0, 48
+; RV64I-NEXT:    srai s2, a0, 48
 ; RV64I-NEXT:    j .LBB99_2
 ; RV64I-NEXT:  .LBB99_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB99_2 Depth=1
@@ -8310,10 +8310,10 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a0, .LBB99_1
+; RV64I-NEXT:    bge s2, a0, .LBB99_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB99_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB99_1
 ; RV64I-NEXT:  .LBB99_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -8326,31 +8326,31 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_min_i16_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
 ; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 48
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB99_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aqrl a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a1, a4, .LBB99_3
+; RV64IA-NEXT:    lr.w.aqrl a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a1, a7, .LBB99_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB99_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB99_3: # in Loop: Header=BB99_1 Depth=1
-; RV64IA-NEXT:    sc.w.aqrl a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB99_1
+; RV64IA-NEXT:    sc.w.aqrl a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB99_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    ret
@@ -8367,18 +8367,18 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lhu a1, 0(a0)
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and s1, s2, s0
+; RV32I-NEXT:    addi s2, a0, -1
+; RV32I-NEXT:    and s3, s0, s2
 ; RV32I-NEXT:    j .LBB100_2
 ; RV32I-NEXT:  .LBB100_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB100_2 Depth=1
 ; RV32I-NEXT:    sh a1, 10(sp)
 ; RV32I-NEXT:    addi a1, sp, 10
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    call __atomic_compare_exchange_2@plt
@@ -8386,12 +8386,12 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    bnez a0, .LBB100_4
 ; RV32I-NEXT:  .LBB100_2: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a0, a1, s0
+; RV32I-NEXT:    and a0, a1, s2
 ; RV32I-NEXT:    mv a2, a1
-; RV32I-NEXT:    bltu s1, a0, .LBB100_1
+; RV32I-NEXT:    bltu s3, a0, .LBB100_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB100_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    j .LBB100_1
 ; RV32I-NEXT:  .LBB100_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
@@ -8405,7 +8405,7 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umax_i16_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    lui a3, 16
 ; RV32IA-NEXT:    addi a3, a3, -1
@@ -8413,16 +8413,16 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32IA-NEXT:    and a1, a1, a3
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB100_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a6)
-; RV32IA-NEXT:    and a2, a3, a4
+; RV32IA-NEXT:    lr.w a3, (a2)
+; RV32IA-NEXT:    and a6, a3, a4
 ; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a2, a1, .LBB100_3
+; RV32IA-NEXT:    bgeu a6, a1, .LBB100_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB100_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a3, a1
 ; RV32IA-NEXT:    and a5, a5, a4
 ; RV32IA-NEXT:    xor a5, a3, a5
 ; RV32IA-NEXT:  .LBB100_3: # in Loop: Header=BB100_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a6)
+; RV32IA-NEXT:    sc.w a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB100_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a3, a0
@@ -8436,18 +8436,18 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lhu a1, 0(a0)
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and s1, s2, s0
+; RV64I-NEXT:    addiw s2, a0, -1
+; RV64I-NEXT:    and s3, s0, s2
 ; RV64I-NEXT:    j .LBB100_2
 ; RV64I-NEXT:  .LBB100_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB100_2 Depth=1
 ; RV64I-NEXT:    sh a1, 6(sp)
 ; RV64I-NEXT:    addi a1, sp, 6
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    li a3, 0
 ; RV64I-NEXT:    li a4, 0
 ; RV64I-NEXT:    call __atomic_compare_exchange_2@plt
@@ -8455,12 +8455,12 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    bnez a0, .LBB100_4
 ; RV64I-NEXT:  .LBB100_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a0, a1, s0
+; RV64I-NEXT:    and a0, a1, s2
 ; RV64I-NEXT:    mv a2, a1
-; RV64I-NEXT:    bltu s1, a0, .LBB100_1
+; RV64I-NEXT:    bltu s3, a0, .LBB100_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB100_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    j .LBB100_1
 ; RV64I-NEXT:  .LBB100_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
@@ -8474,7 +8474,7 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i16_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
 ; RV64IA-NEXT:    addiw a3, a3, -1
@@ -8482,16 +8482,16 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB100_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a6)
-; RV64IA-NEXT:    and a2, a3, a4
+; RV64IA-NEXT:    lr.w a3, (a2)
+; RV64IA-NEXT:    and a6, a3, a4
 ; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a2, a1, .LBB100_3
+; RV64IA-NEXT:    bgeu a6, a1, .LBB100_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB100_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a3, a1
 ; RV64IA-NEXT:    and a5, a5, a4
 ; RV64IA-NEXT:    xor a5, a3, a5
 ; RV64IA-NEXT:  .LBB100_3: # in Loop: Header=BB100_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a6)
+; RV64IA-NEXT:    sc.w a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB100_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a3, a0
@@ -8509,12 +8509,12 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lhu a1, 0(a0)
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and s1, s2, s0
+; RV32I-NEXT:    addi s2, a0, -1
+; RV32I-NEXT:    and s3, s0, s2
 ; RV32I-NEXT:    j .LBB101_2
 ; RV32I-NEXT:  .LBB101_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB101_2 Depth=1
@@ -8522,18 +8522,18 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi a1, sp, 10
 ; RV32I-NEXT:    li a3, 2
 ; RV32I-NEXT:    li a4, 2
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV32I-NEXT:    lh a1, 10(sp)
 ; RV32I-NEXT:    bnez a0, .LBB101_4
 ; RV32I-NEXT:  .LBB101_2: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a0, a1, s0
+; RV32I-NEXT:    and a0, a1, s2
 ; RV32I-NEXT:    mv a2, a1
-; RV32I-NEXT:    bltu s1, a0, .LBB101_1
+; RV32I-NEXT:    bltu s3, a0, .LBB101_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB101_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    j .LBB101_1
 ; RV32I-NEXT:  .LBB101_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
@@ -8547,7 +8547,7 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umax_i16_acquire:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    lui a3, 16
 ; RV32IA-NEXT:    addi a3, a3, -1
@@ -8555,16 +8555,16 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32IA-NEXT:    and a1, a1, a3
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB101_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a6)
-; RV32IA-NEXT:    and a2, a3, a4
+; RV32IA-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-NEXT:    and a6, a3, a4
 ; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a2, a1, .LBB101_3
+; RV32IA-NEXT:    bgeu a6, a1, .LBB101_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB101_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a3, a1
 ; RV32IA-NEXT:    and a5, a5, a4
 ; RV32IA-NEXT:    xor a5, a3, a5
 ; RV32IA-NEXT:  .LBB101_3: # in Loop: Header=BB101_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a6)
+; RV32IA-NEXT:    sc.w a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB101_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a3, a0
@@ -8578,12 +8578,12 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lhu a1, 0(a0)
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and s1, s2, s0
+; RV64I-NEXT:    addiw s2, a0, -1
+; RV64I-NEXT:    and s3, s0, s2
 ; RV64I-NEXT:    j .LBB101_2
 ; RV64I-NEXT:  .LBB101_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB101_2 Depth=1
@@ -8591,18 +8591,18 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi a1, sp, 6
 ; RV64I-NEXT:    li a3, 2
 ; RV64I-NEXT:    li a4, 2
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV64I-NEXT:    lh a1, 6(sp)
 ; RV64I-NEXT:    bnez a0, .LBB101_4
 ; RV64I-NEXT:  .LBB101_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a0, a1, s0
+; RV64I-NEXT:    and a0, a1, s2
 ; RV64I-NEXT:    mv a2, a1
-; RV64I-NEXT:    bltu s1, a0, .LBB101_1
+; RV64I-NEXT:    bltu s3, a0, .LBB101_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB101_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    j .LBB101_1
 ; RV64I-NEXT:  .LBB101_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
@@ -8616,7 +8616,7 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i16_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
 ; RV64IA-NEXT:    addiw a3, a3, -1
@@ -8624,16 +8624,16 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB101_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a6)
-; RV64IA-NEXT:    and a2, a3, a4
+; RV64IA-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-NEXT:    and a6, a3, a4
 ; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a2, a1, .LBB101_3
+; RV64IA-NEXT:    bgeu a6, a1, .LBB101_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB101_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a3, a1
 ; RV64IA-NEXT:    and a5, a5, a4
 ; RV64IA-NEXT:    xor a5, a3, a5
 ; RV64IA-NEXT:  .LBB101_3: # in Loop: Header=BB101_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a6)
+; RV64IA-NEXT:    sc.w a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB101_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a3, a0
@@ -8651,31 +8651,31 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lhu a1, 0(a0)
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and s1, s2, s0
+; RV32I-NEXT:    addi s2, a0, -1
+; RV32I-NEXT:    and s3, s0, s2
 ; RV32I-NEXT:    j .LBB102_2
 ; RV32I-NEXT:  .LBB102_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB102_2 Depth=1
 ; RV32I-NEXT:    sh a1, 10(sp)
 ; RV32I-NEXT:    addi a1, sp, 10
 ; RV32I-NEXT:    li a3, 3
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV32I-NEXT:    lh a1, 10(sp)
 ; RV32I-NEXT:    bnez a0, .LBB102_4
 ; RV32I-NEXT:  .LBB102_2: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a0, a1, s0
+; RV32I-NEXT:    and a0, a1, s2
 ; RV32I-NEXT:    mv a2, a1
-; RV32I-NEXT:    bltu s1, a0, .LBB102_1
+; RV32I-NEXT:    bltu s3, a0, .LBB102_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB102_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    j .LBB102_1
 ; RV32I-NEXT:  .LBB102_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
@@ -8689,7 +8689,7 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umax_i16_release:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    lui a3, 16
 ; RV32IA-NEXT:    addi a3, a3, -1
@@ -8697,16 +8697,16 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV32IA-NEXT:    and a1, a1, a3
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB102_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a6)
-; RV32IA-NEXT:    and a2, a3, a4
+; RV32IA-NEXT:    lr.w a3, (a2)
+; RV32IA-NEXT:    and a6, a3, a4
 ; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a2, a1, .LBB102_3
+; RV32IA-NEXT:    bgeu a6, a1, .LBB102_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB102_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a3, a1
 ; RV32IA-NEXT:    and a5, a5, a4
 ; RV32IA-NEXT:    xor a5, a3, a5
 ; RV32IA-NEXT:  .LBB102_3: # in Loop: Header=BB102_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB102_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a3, a0
@@ -8720,31 +8720,31 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lhu a1, 0(a0)
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and s1, s2, s0
+; RV64I-NEXT:    addiw s2, a0, -1
+; RV64I-NEXT:    and s3, s0, s2
 ; RV64I-NEXT:    j .LBB102_2
 ; RV64I-NEXT:  .LBB102_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB102_2 Depth=1
 ; RV64I-NEXT:    sh a1, 6(sp)
 ; RV64I-NEXT:    addi a1, sp, 6
 ; RV64I-NEXT:    li a3, 3
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    li a4, 0
 ; RV64I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV64I-NEXT:    lh a1, 6(sp)
 ; RV64I-NEXT:    bnez a0, .LBB102_4
 ; RV64I-NEXT:  .LBB102_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a0, a1, s0
+; RV64I-NEXT:    and a0, a1, s2
 ; RV64I-NEXT:    mv a2, a1
-; RV64I-NEXT:    bltu s1, a0, .LBB102_1
+; RV64I-NEXT:    bltu s3, a0, .LBB102_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB102_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    j .LBB102_1
 ; RV64I-NEXT:  .LBB102_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
@@ -8758,7 +8758,7 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i16_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
 ; RV64IA-NEXT:    addiw a3, a3, -1
@@ -8766,16 +8766,16 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB102_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a6)
-; RV64IA-NEXT:    and a2, a3, a4
+; RV64IA-NEXT:    lr.w a3, (a2)
+; RV64IA-NEXT:    and a6, a3, a4
 ; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a2, a1, .LBB102_3
+; RV64IA-NEXT:    bgeu a6, a1, .LBB102_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB102_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a3, a1
 ; RV64IA-NEXT:    and a5, a5, a4
 ; RV64IA-NEXT:    xor a5, a3, a5
 ; RV64IA-NEXT:  .LBB102_3: # in Loop: Header=BB102_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB102_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a3, a0
@@ -8793,12 +8793,12 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lhu a1, 0(a0)
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and s1, s2, s0
+; RV32I-NEXT:    addi s2, a0, -1
+; RV32I-NEXT:    and s3, s0, s2
 ; RV32I-NEXT:    j .LBB103_2
 ; RV32I-NEXT:  .LBB103_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB103_2 Depth=1
@@ -8806,18 +8806,18 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi a1, sp, 10
 ; RV32I-NEXT:    li a3, 4
 ; RV32I-NEXT:    li a4, 2
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV32I-NEXT:    lh a1, 10(sp)
 ; RV32I-NEXT:    bnez a0, .LBB103_4
 ; RV32I-NEXT:  .LBB103_2: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a0, a1, s0
+; RV32I-NEXT:    and a0, a1, s2
 ; RV32I-NEXT:    mv a2, a1
-; RV32I-NEXT:    bltu s1, a0, .LBB103_1
+; RV32I-NEXT:    bltu s3, a0, .LBB103_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB103_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    j .LBB103_1
 ; RV32I-NEXT:  .LBB103_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
@@ -8831,7 +8831,7 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umax_i16_acq_rel:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    lui a3, 16
 ; RV32IA-NEXT:    addi a3, a3, -1
@@ -8839,16 +8839,16 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32IA-NEXT:    and a1, a1, a3
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB103_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a6)
-; RV32IA-NEXT:    and a2, a3, a4
+; RV32IA-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-NEXT:    and a6, a3, a4
 ; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a2, a1, .LBB103_3
+; RV32IA-NEXT:    bgeu a6, a1, .LBB103_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB103_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a3, a1
 ; RV32IA-NEXT:    and a5, a5, a4
 ; RV32IA-NEXT:    xor a5, a3, a5
 ; RV32IA-NEXT:  .LBB103_3: # in Loop: Header=BB103_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB103_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a3, a0
@@ -8862,12 +8862,12 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lhu a1, 0(a0)
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and s1, s2, s0
+; RV64I-NEXT:    addiw s2, a0, -1
+; RV64I-NEXT:    and s3, s0, s2
 ; RV64I-NEXT:    j .LBB103_2
 ; RV64I-NEXT:  .LBB103_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB103_2 Depth=1
@@ -8875,18 +8875,18 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi a1, sp, 6
 ; RV64I-NEXT:    li a3, 4
 ; RV64I-NEXT:    li a4, 2
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV64I-NEXT:    lh a1, 6(sp)
 ; RV64I-NEXT:    bnez a0, .LBB103_4
 ; RV64I-NEXT:  .LBB103_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a0, a1, s0
+; RV64I-NEXT:    and a0, a1, s2
 ; RV64I-NEXT:    mv a2, a1
-; RV64I-NEXT:    bltu s1, a0, .LBB103_1
+; RV64I-NEXT:    bltu s3, a0, .LBB103_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB103_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    j .LBB103_1
 ; RV64I-NEXT:  .LBB103_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
@@ -8900,7 +8900,7 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i16_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
 ; RV64IA-NEXT:    addiw a3, a3, -1
@@ -8908,16 +8908,16 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB103_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a6)
-; RV64IA-NEXT:    and a2, a3, a4
+; RV64IA-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-NEXT:    and a6, a3, a4
 ; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a2, a1, .LBB103_3
+; RV64IA-NEXT:    bgeu a6, a1, .LBB103_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB103_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a3, a1
 ; RV64IA-NEXT:    and a5, a5, a4
 ; RV64IA-NEXT:    xor a5, a3, a5
 ; RV64IA-NEXT:  .LBB103_3: # in Loop: Header=BB103_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB103_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a3, a0
@@ -8935,12 +8935,12 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lhu a1, 0(a0)
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and s1, s2, s0
+; RV32I-NEXT:    addi s2, a0, -1
+; RV32I-NEXT:    and s3, s0, s2
 ; RV32I-NEXT:    j .LBB104_2
 ; RV32I-NEXT:  .LBB104_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB104_2 Depth=1
@@ -8948,18 +8948,18 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi a1, sp, 10
 ; RV32I-NEXT:    li a3, 5
 ; RV32I-NEXT:    li a4, 5
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV32I-NEXT:    lh a1, 10(sp)
 ; RV32I-NEXT:    bnez a0, .LBB104_4
 ; RV32I-NEXT:  .LBB104_2: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a0, a1, s0
+; RV32I-NEXT:    and a0, a1, s2
 ; RV32I-NEXT:    mv a2, a1
-; RV32I-NEXT:    bltu s1, a0, .LBB104_1
+; RV32I-NEXT:    bltu s3, a0, .LBB104_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB104_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    j .LBB104_1
 ; RV32I-NEXT:  .LBB104_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
@@ -8973,7 +8973,7 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umax_i16_seq_cst:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    lui a3, 16
 ; RV32IA-NEXT:    addi a3, a3, -1
@@ -8981,16 +8981,16 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32IA-NEXT:    and a1, a1, a3
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB104_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a3, (a6)
-; RV32IA-NEXT:    and a2, a3, a4
+; RV32IA-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-NEXT:    and a6, a3, a4
 ; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a2, a1, .LBB104_3
+; RV32IA-NEXT:    bgeu a6, a1, .LBB104_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB104_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a3, a1
 ; RV32IA-NEXT:    and a5, a5, a4
 ; RV32IA-NEXT:    xor a5, a3, a5
 ; RV32IA-NEXT:  .LBB104_3: # in Loop: Header=BB104_1 Depth=1
-; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a6)
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB104_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a3, a0
@@ -9004,12 +9004,12 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lhu a1, 0(a0)
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and s1, s2, s0
+; RV64I-NEXT:    addiw s2, a0, -1
+; RV64I-NEXT:    and s3, s0, s2
 ; RV64I-NEXT:    j .LBB104_2
 ; RV64I-NEXT:  .LBB104_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB104_2 Depth=1
@@ -9017,18 +9017,18 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi a1, sp, 6
 ; RV64I-NEXT:    li a3, 5
 ; RV64I-NEXT:    li a4, 5
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV64I-NEXT:    lh a1, 6(sp)
 ; RV64I-NEXT:    bnez a0, .LBB104_4
 ; RV64I-NEXT:  .LBB104_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a0, a1, s0
+; RV64I-NEXT:    and a0, a1, s2
 ; RV64I-NEXT:    mv a2, a1
-; RV64I-NEXT:    bltu s1, a0, .LBB104_1
+; RV64I-NEXT:    bltu s3, a0, .LBB104_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB104_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    j .LBB104_1
 ; RV64I-NEXT:  .LBB104_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
@@ -9042,7 +9042,7 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i16_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
 ; RV64IA-NEXT:    addiw a3, a3, -1
@@ -9050,16 +9050,16 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB104_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aqrl a3, (a6)
-; RV64IA-NEXT:    and a2, a3, a4
+; RV64IA-NEXT:    lr.w.aqrl a3, (a2)
+; RV64IA-NEXT:    and a6, a3, a4
 ; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a2, a1, .LBB104_3
+; RV64IA-NEXT:    bgeu a6, a1, .LBB104_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB104_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a3, a1
 ; RV64IA-NEXT:    and a5, a5, a4
 ; RV64IA-NEXT:    xor a5, a3, a5
 ; RV64IA-NEXT:  .LBB104_3: # in Loop: Header=BB104_1 Depth=1
-; RV64IA-NEXT:    sc.w.aqrl a5, a5, (a6)
+; RV64IA-NEXT:    sc.w.aqrl a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB104_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a3, a0
@@ -9077,18 +9077,18 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lhu a1, 0(a0)
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and s1, s2, s0
+; RV32I-NEXT:    addi s2, a0, -1
+; RV32I-NEXT:    and s3, s0, s2
 ; RV32I-NEXT:    j .LBB105_2
 ; RV32I-NEXT:  .LBB105_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB105_2 Depth=1
 ; RV32I-NEXT:    sh a1, 10(sp)
 ; RV32I-NEXT:    addi a1, sp, 10
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    call __atomic_compare_exchange_2@plt
@@ -9096,12 +9096,12 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    bnez a0, .LBB105_4
 ; RV32I-NEXT:  .LBB105_2: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a0, a1, s0
+; RV32I-NEXT:    and a0, a1, s2
 ; RV32I-NEXT:    mv a2, a1
-; RV32I-NEXT:    bgeu s1, a0, .LBB105_1
+; RV32I-NEXT:    bgeu s3, a0, .LBB105_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB105_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    j .LBB105_1
 ; RV32I-NEXT:  .LBB105_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
@@ -9115,7 +9115,7 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umin_i16_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    lui a3, 16
 ; RV32IA-NEXT:    addi a3, a3, -1
@@ -9123,16 +9123,16 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32IA-NEXT:    and a1, a1, a3
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB105_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a6)
-; RV32IA-NEXT:    and a2, a3, a4
+; RV32IA-NEXT:    lr.w a3, (a2)
+; RV32IA-NEXT:    and a6, a3, a4
 ; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a1, a2, .LBB105_3
+; RV32IA-NEXT:    bgeu a1, a6, .LBB105_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB105_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a3, a1
 ; RV32IA-NEXT:    and a5, a5, a4
 ; RV32IA-NEXT:    xor a5, a3, a5
 ; RV32IA-NEXT:  .LBB105_3: # in Loop: Header=BB105_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a6)
+; RV32IA-NEXT:    sc.w a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB105_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a3, a0
@@ -9146,18 +9146,18 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lhu a1, 0(a0)
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and s1, s2, s0
+; RV64I-NEXT:    addiw s2, a0, -1
+; RV64I-NEXT:    and s3, s0, s2
 ; RV64I-NEXT:    j .LBB105_2
 ; RV64I-NEXT:  .LBB105_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB105_2 Depth=1
 ; RV64I-NEXT:    sh a1, 6(sp)
 ; RV64I-NEXT:    addi a1, sp, 6
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    li a3, 0
 ; RV64I-NEXT:    li a4, 0
 ; RV64I-NEXT:    call __atomic_compare_exchange_2@plt
@@ -9165,12 +9165,12 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    bnez a0, .LBB105_4
 ; RV64I-NEXT:  .LBB105_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a0, a1, s0
+; RV64I-NEXT:    and a0, a1, s2
 ; RV64I-NEXT:    mv a2, a1
-; RV64I-NEXT:    bgeu s1, a0, .LBB105_1
+; RV64I-NEXT:    bgeu s3, a0, .LBB105_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB105_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    j .LBB105_1
 ; RV64I-NEXT:  .LBB105_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
@@ -9184,7 +9184,7 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i16_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
 ; RV64IA-NEXT:    addiw a3, a3, -1
@@ -9192,16 +9192,16 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB105_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a6)
-; RV64IA-NEXT:    and a2, a3, a4
+; RV64IA-NEXT:    lr.w a3, (a2)
+; RV64IA-NEXT:    and a6, a3, a4
 ; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a1, a2, .LBB105_3
+; RV64IA-NEXT:    bgeu a1, a6, .LBB105_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB105_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a3, a1
 ; RV64IA-NEXT:    and a5, a5, a4
 ; RV64IA-NEXT:    xor a5, a3, a5
 ; RV64IA-NEXT:  .LBB105_3: # in Loop: Header=BB105_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a6)
+; RV64IA-NEXT:    sc.w a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB105_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a3, a0
@@ -9219,12 +9219,12 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lhu a1, 0(a0)
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and s1, s2, s0
+; RV32I-NEXT:    addi s2, a0, -1
+; RV32I-NEXT:    and s3, s0, s2
 ; RV32I-NEXT:    j .LBB106_2
 ; RV32I-NEXT:  .LBB106_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB106_2 Depth=1
@@ -9232,18 +9232,18 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi a1, sp, 10
 ; RV32I-NEXT:    li a3, 2
 ; RV32I-NEXT:    li a4, 2
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV32I-NEXT:    lh a1, 10(sp)
 ; RV32I-NEXT:    bnez a0, .LBB106_4
 ; RV32I-NEXT:  .LBB106_2: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a0, a1, s0
+; RV32I-NEXT:    and a0, a1, s2
 ; RV32I-NEXT:    mv a2, a1
-; RV32I-NEXT:    bgeu s1, a0, .LBB106_1
+; RV32I-NEXT:    bgeu s3, a0, .LBB106_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB106_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    j .LBB106_1
 ; RV32I-NEXT:  .LBB106_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
@@ -9257,7 +9257,7 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umin_i16_acquire:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    lui a3, 16
 ; RV32IA-NEXT:    addi a3, a3, -1
@@ -9265,16 +9265,16 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32IA-NEXT:    and a1, a1, a3
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB106_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a6)
-; RV32IA-NEXT:    and a2, a3, a4
+; RV32IA-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-NEXT:    and a6, a3, a4
 ; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a1, a2, .LBB106_3
+; RV32IA-NEXT:    bgeu a1, a6, .LBB106_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB106_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a3, a1
 ; RV32IA-NEXT:    and a5, a5, a4
 ; RV32IA-NEXT:    xor a5, a3, a5
 ; RV32IA-NEXT:  .LBB106_3: # in Loop: Header=BB106_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a6)
+; RV32IA-NEXT:    sc.w a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB106_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a3, a0
@@ -9288,12 +9288,12 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lhu a1, 0(a0)
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and s1, s2, s0
+; RV64I-NEXT:    addiw s2, a0, -1
+; RV64I-NEXT:    and s3, s0, s2
 ; RV64I-NEXT:    j .LBB106_2
 ; RV64I-NEXT:  .LBB106_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB106_2 Depth=1
@@ -9301,18 +9301,18 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi a1, sp, 6
 ; RV64I-NEXT:    li a3, 2
 ; RV64I-NEXT:    li a4, 2
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV64I-NEXT:    lh a1, 6(sp)
 ; RV64I-NEXT:    bnez a0, .LBB106_4
 ; RV64I-NEXT:  .LBB106_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a0, a1, s0
+; RV64I-NEXT:    and a0, a1, s2
 ; RV64I-NEXT:    mv a2, a1
-; RV64I-NEXT:    bgeu s1, a0, .LBB106_1
+; RV64I-NEXT:    bgeu s3, a0, .LBB106_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB106_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    j .LBB106_1
 ; RV64I-NEXT:  .LBB106_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
@@ -9326,7 +9326,7 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i16_acquire:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
 ; RV64IA-NEXT:    addiw a3, a3, -1
@@ -9334,16 +9334,16 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB106_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a6)
-; RV64IA-NEXT:    and a2, a3, a4
+; RV64IA-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-NEXT:    and a6, a3, a4
 ; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a1, a2, .LBB106_3
+; RV64IA-NEXT:    bgeu a1, a6, .LBB106_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB106_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a3, a1
 ; RV64IA-NEXT:    and a5, a5, a4
 ; RV64IA-NEXT:    xor a5, a3, a5
 ; RV64IA-NEXT:  .LBB106_3: # in Loop: Header=BB106_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a6)
+; RV64IA-NEXT:    sc.w a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB106_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a3, a0
@@ -9361,31 +9361,31 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lhu a1, 0(a0)
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and s1, s2, s0
+; RV32I-NEXT:    addi s2, a0, -1
+; RV32I-NEXT:    and s3, s0, s2
 ; RV32I-NEXT:    j .LBB107_2
 ; RV32I-NEXT:  .LBB107_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB107_2 Depth=1
 ; RV32I-NEXT:    sh a1, 10(sp)
 ; RV32I-NEXT:    addi a1, sp, 10
 ; RV32I-NEXT:    li a3, 3
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV32I-NEXT:    lh a1, 10(sp)
 ; RV32I-NEXT:    bnez a0, .LBB107_4
 ; RV32I-NEXT:  .LBB107_2: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a0, a1, s0
+; RV32I-NEXT:    and a0, a1, s2
 ; RV32I-NEXT:    mv a2, a1
-; RV32I-NEXT:    bgeu s1, a0, .LBB107_1
+; RV32I-NEXT:    bgeu s3, a0, .LBB107_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB107_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    j .LBB107_1
 ; RV32I-NEXT:  .LBB107_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
@@ -9399,7 +9399,7 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umin_i16_release:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    lui a3, 16
 ; RV32IA-NEXT:    addi a3, a3, -1
@@ -9407,16 +9407,16 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV32IA-NEXT:    and a1, a1, a3
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB107_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a6)
-; RV32IA-NEXT:    and a2, a3, a4
+; RV32IA-NEXT:    lr.w a3, (a2)
+; RV32IA-NEXT:    and a6, a3, a4
 ; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a1, a2, .LBB107_3
+; RV32IA-NEXT:    bgeu a1, a6, .LBB107_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB107_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a3, a1
 ; RV32IA-NEXT:    and a5, a5, a4
 ; RV32IA-NEXT:    xor a5, a3, a5
 ; RV32IA-NEXT:  .LBB107_3: # in Loop: Header=BB107_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB107_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a3, a0
@@ -9430,31 +9430,31 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lhu a1, 0(a0)
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and s1, s2, s0
+; RV64I-NEXT:    addiw s2, a0, -1
+; RV64I-NEXT:    and s3, s0, s2
 ; RV64I-NEXT:    j .LBB107_2
 ; RV64I-NEXT:  .LBB107_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB107_2 Depth=1
 ; RV64I-NEXT:    sh a1, 6(sp)
 ; RV64I-NEXT:    addi a1, sp, 6
 ; RV64I-NEXT:    li a3, 3
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    li a4, 0
 ; RV64I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV64I-NEXT:    lh a1, 6(sp)
 ; RV64I-NEXT:    bnez a0, .LBB107_4
 ; RV64I-NEXT:  .LBB107_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a0, a1, s0
+; RV64I-NEXT:    and a0, a1, s2
 ; RV64I-NEXT:    mv a2, a1
-; RV64I-NEXT:    bgeu s1, a0, .LBB107_1
+; RV64I-NEXT:    bgeu s3, a0, .LBB107_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB107_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    j .LBB107_1
 ; RV64I-NEXT:  .LBB107_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
@@ -9468,7 +9468,7 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i16_release:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
 ; RV64IA-NEXT:    addiw a3, a3, -1
@@ -9476,16 +9476,16 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB107_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a6)
-; RV64IA-NEXT:    and a2, a3, a4
+; RV64IA-NEXT:    lr.w a3, (a2)
+; RV64IA-NEXT:    and a6, a3, a4
 ; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a1, a2, .LBB107_3
+; RV64IA-NEXT:    bgeu a1, a6, .LBB107_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB107_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a3, a1
 ; RV64IA-NEXT:    and a5, a5, a4
 ; RV64IA-NEXT:    xor a5, a3, a5
 ; RV64IA-NEXT:  .LBB107_3: # in Loop: Header=BB107_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB107_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a3, a0
@@ -9503,12 +9503,12 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lhu a1, 0(a0)
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and s1, s2, s0
+; RV32I-NEXT:    addi s2, a0, -1
+; RV32I-NEXT:    and s3, s0, s2
 ; RV32I-NEXT:    j .LBB108_2
 ; RV32I-NEXT:  .LBB108_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB108_2 Depth=1
@@ -9516,18 +9516,18 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi a1, sp, 10
 ; RV32I-NEXT:    li a3, 4
 ; RV32I-NEXT:    li a4, 2
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV32I-NEXT:    lh a1, 10(sp)
 ; RV32I-NEXT:    bnez a0, .LBB108_4
 ; RV32I-NEXT:  .LBB108_2: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a0, a1, s0
+; RV32I-NEXT:    and a0, a1, s2
 ; RV32I-NEXT:    mv a2, a1
-; RV32I-NEXT:    bgeu s1, a0, .LBB108_1
+; RV32I-NEXT:    bgeu s3, a0, .LBB108_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB108_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    j .LBB108_1
 ; RV32I-NEXT:  .LBB108_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
@@ -9541,7 +9541,7 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umin_i16_acq_rel:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    lui a3, 16
 ; RV32IA-NEXT:    addi a3, a3, -1
@@ -9549,16 +9549,16 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32IA-NEXT:    and a1, a1, a3
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB108_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aq a3, (a6)
-; RV32IA-NEXT:    and a2, a3, a4
+; RV32IA-NEXT:    lr.w.aq a3, (a2)
+; RV32IA-NEXT:    and a6, a3, a4
 ; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a1, a2, .LBB108_3
+; RV32IA-NEXT:    bgeu a1, a6, .LBB108_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB108_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a3, a1
 ; RV32IA-NEXT:    and a5, a5, a4
 ; RV32IA-NEXT:    xor a5, a3, a5
 ; RV32IA-NEXT:  .LBB108_3: # in Loop: Header=BB108_1 Depth=1
-; RV32IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV32IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB108_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a3, a0
@@ -9572,12 +9572,12 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lhu a1, 0(a0)
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and s1, s2, s0
+; RV64I-NEXT:    addiw s2, a0, -1
+; RV64I-NEXT:    and s3, s0, s2
 ; RV64I-NEXT:    j .LBB108_2
 ; RV64I-NEXT:  .LBB108_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB108_2 Depth=1
@@ -9585,18 +9585,18 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi a1, sp, 6
 ; RV64I-NEXT:    li a3, 4
 ; RV64I-NEXT:    li a4, 2
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV64I-NEXT:    lh a1, 6(sp)
 ; RV64I-NEXT:    bnez a0, .LBB108_4
 ; RV64I-NEXT:  .LBB108_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a0, a1, s0
+; RV64I-NEXT:    and a0, a1, s2
 ; RV64I-NEXT:    mv a2, a1
-; RV64I-NEXT:    bgeu s1, a0, .LBB108_1
+; RV64I-NEXT:    bgeu s3, a0, .LBB108_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB108_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    j .LBB108_1
 ; RV64I-NEXT:  .LBB108_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
@@ -9610,7 +9610,7 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i16_acq_rel:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
 ; RV64IA-NEXT:    addiw a3, a3, -1
@@ -9618,16 +9618,16 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB108_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aq a3, (a6)
-; RV64IA-NEXT:    and a2, a3, a4
+; RV64IA-NEXT:    lr.w.aq a3, (a2)
+; RV64IA-NEXT:    and a6, a3, a4
 ; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a1, a2, .LBB108_3
+; RV64IA-NEXT:    bgeu a1, a6, .LBB108_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB108_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a3, a1
 ; RV64IA-NEXT:    and a5, a5, a4
 ; RV64IA-NEXT:    xor a5, a3, a5
 ; RV64IA-NEXT:  .LBB108_3: # in Loop: Header=BB108_1 Depth=1
-; RV64IA-NEXT:    sc.w.rl a5, a5, (a6)
+; RV64IA-NEXT:    sc.w.rl a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB108_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a3, a0
@@ -9645,12 +9645,12 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lhu a1, 0(a0)
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and s1, s2, s0
+; RV32I-NEXT:    addi s2, a0, -1
+; RV32I-NEXT:    and s3, s0, s2
 ; RV32I-NEXT:    j .LBB109_2
 ; RV32I-NEXT:  .LBB109_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB109_2 Depth=1
@@ -9658,18 +9658,18 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    addi a1, sp, 10
 ; RV32I-NEXT:    li a3, 5
 ; RV32I-NEXT:    li a4, 5
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV32I-NEXT:    lh a1, 10(sp)
 ; RV32I-NEXT:    bnez a0, .LBB109_4
 ; RV32I-NEXT:  .LBB109_2: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a0, a1, s0
+; RV32I-NEXT:    and a0, a1, s2
 ; RV32I-NEXT:    mv a2, a1
-; RV32I-NEXT:    bgeu s1, a0, .LBB109_1
+; RV32I-NEXT:    bgeu s3, a0, .LBB109_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB109_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    j .LBB109_1
 ; RV32I-NEXT:  .LBB109_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
@@ -9683,7 +9683,7 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umin_i16_seq_cst:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    lui a3, 16
 ; RV32IA-NEXT:    addi a3, a3, -1
@@ -9691,16 +9691,16 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32IA-NEXT:    and a1, a1, a3
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB109_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w.aqrl a3, (a6)
-; RV32IA-NEXT:    and a2, a3, a4
+; RV32IA-NEXT:    lr.w.aqrl a3, (a2)
+; RV32IA-NEXT:    and a6, a3, a4
 ; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a1, a2, .LBB109_3
+; RV32IA-NEXT:    bgeu a1, a6, .LBB109_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB109_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a3, a1
 ; RV32IA-NEXT:    and a5, a5, a4
 ; RV32IA-NEXT:    xor a5, a3, a5
 ; RV32IA-NEXT:  .LBB109_3: # in Loop: Header=BB109_1 Depth=1
-; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a6)
+; RV32IA-NEXT:    sc.w.aqrl a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB109_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a3, a0
@@ -9714,12 +9714,12 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lhu a1, 0(a0)
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and s1, s2, s0
+; RV64I-NEXT:    addiw s2, a0, -1
+; RV64I-NEXT:    and s3, s0, s2
 ; RV64I-NEXT:    j .LBB109_2
 ; RV64I-NEXT:  .LBB109_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB109_2 Depth=1
@@ -9727,18 +9727,18 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    addi a1, sp, 6
 ; RV64I-NEXT:    li a3, 5
 ; RV64I-NEXT:    li a4, 5
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __atomic_compare_exchange_2@plt
 ; RV64I-NEXT:    lh a1, 6(sp)
 ; RV64I-NEXT:    bnez a0, .LBB109_4
 ; RV64I-NEXT:  .LBB109_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a0, a1, s0
+; RV64I-NEXT:    and a0, a1, s2
 ; RV64I-NEXT:    mv a2, a1
-; RV64I-NEXT:    bgeu s1, a0, .LBB109_1
+; RV64I-NEXT:    bgeu s3, a0, .LBB109_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB109_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    j .LBB109_1
 ; RV64I-NEXT:  .LBB109_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
@@ -9752,7 +9752,7 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i16_seq_cst:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
 ; RV64IA-NEXT:    addiw a3, a3, -1
@@ -9760,16 +9760,16 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB109_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w.aqrl a3, (a6)
-; RV64IA-NEXT:    and a2, a3, a4
+; RV64IA-NEXT:    lr.w.aqrl a3, (a2)
+; RV64IA-NEXT:    and a6, a3, a4
 ; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a1, a2, .LBB109_3
+; RV64IA-NEXT:    bgeu a1, a6, .LBB109_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB109_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a3, a1
 ; RV64IA-NEXT:    and a5, a5, a4
 ; RV64IA-NEXT:    xor a5, a3, a5
 ; RV64IA-NEXT:  .LBB109_3: # in Loop: Header=BB109_1 Depth=1
-; RV64IA-NEXT:    sc.w.aqrl a5, a5, (a6)
+; RV64IA-NEXT:    sc.w.aqrl a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB109_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a3, a0
@@ -11099,8 +11099,8 @@ define i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB145_2
 ; RV64I-NEXT:  .LBB145_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB145_2 Depth=1
@@ -11115,10 +11115,10 @@ define i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB145_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a3, .LBB145_1
+; RV64I-NEXT:    blt s2, a3, .LBB145_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB145_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB145_1
 ; RV64I-NEXT:  .LBB145_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -11188,8 +11188,8 @@ define i32 @atomicrmw_max_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB146_2
 ; RV64I-NEXT:  .LBB146_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB146_2 Depth=1
@@ -11204,10 +11204,10 @@ define i32 @atomicrmw_max_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB146_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a3, .LBB146_1
+; RV64I-NEXT:    blt s2, a3, .LBB146_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB146_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB146_1
 ; RV64I-NEXT:  .LBB146_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -11277,8 +11277,8 @@ define i32 @atomicrmw_max_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB147_2
 ; RV64I-NEXT:  .LBB147_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB147_2 Depth=1
@@ -11293,10 +11293,10 @@ define i32 @atomicrmw_max_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB147_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a3, .LBB147_1
+; RV64I-NEXT:    blt s2, a3, .LBB147_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB147_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB147_1
 ; RV64I-NEXT:  .LBB147_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -11366,8 +11366,8 @@ define i32 @atomicrmw_max_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB148_2
 ; RV64I-NEXT:  .LBB148_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB148_2 Depth=1
@@ -11382,10 +11382,10 @@ define i32 @atomicrmw_max_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB148_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a3, .LBB148_1
+; RV64I-NEXT:    blt s2, a3, .LBB148_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB148_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB148_1
 ; RV64I-NEXT:  .LBB148_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -11455,8 +11455,8 @@ define i32 @atomicrmw_max_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB149_2
 ; RV64I-NEXT:  .LBB149_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB149_2 Depth=1
@@ -11471,10 +11471,10 @@ define i32 @atomicrmw_max_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB149_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a3, .LBB149_1
+; RV64I-NEXT:    blt s2, a3, .LBB149_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB149_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB149_1
 ; RV64I-NEXT:  .LBB149_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -11544,8 +11544,8 @@ define i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB150_2
 ; RV64I-NEXT:  .LBB150_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB150_2 Depth=1
@@ -11560,10 +11560,10 @@ define i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB150_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a3, .LBB150_1
+; RV64I-NEXT:    bge s2, a3, .LBB150_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB150_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB150_1
 ; RV64I-NEXT:  .LBB150_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -11633,8 +11633,8 @@ define i32 @atomicrmw_min_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB151_2
 ; RV64I-NEXT:  .LBB151_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB151_2 Depth=1
@@ -11649,10 +11649,10 @@ define i32 @atomicrmw_min_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB151_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a3, .LBB151_1
+; RV64I-NEXT:    bge s2, a3, .LBB151_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB151_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB151_1
 ; RV64I-NEXT:  .LBB151_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -11722,8 +11722,8 @@ define i32 @atomicrmw_min_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB152_2
 ; RV64I-NEXT:  .LBB152_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB152_2 Depth=1
@@ -11738,10 +11738,10 @@ define i32 @atomicrmw_min_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB152_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a3, .LBB152_1
+; RV64I-NEXT:    bge s2, a3, .LBB152_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB152_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB152_1
 ; RV64I-NEXT:  .LBB152_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -11811,8 +11811,8 @@ define i32 @atomicrmw_min_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB153_2
 ; RV64I-NEXT:  .LBB153_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB153_2 Depth=1
@@ -11827,10 +11827,10 @@ define i32 @atomicrmw_min_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB153_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a3, .LBB153_1
+; RV64I-NEXT:    bge s2, a3, .LBB153_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB153_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB153_1
 ; RV64I-NEXT:  .LBB153_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -11900,8 +11900,8 @@ define i32 @atomicrmw_min_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB154_2
 ; RV64I-NEXT:  .LBB154_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB154_2 Depth=1
@@ -11916,10 +11916,10 @@ define i32 @atomicrmw_min_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB154_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a3, .LBB154_1
+; RV64I-NEXT:    bge s2, a3, .LBB154_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB154_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB154_1
 ; RV64I-NEXT:  .LBB154_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -11989,8 +11989,8 @@ define i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB155_2
 ; RV64I-NEXT:  .LBB155_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB155_2 Depth=1
@@ -12005,10 +12005,10 @@ define i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB155_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bltu s1, a3, .LBB155_1
+; RV64I-NEXT:    bltu s2, a3, .LBB155_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB155_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB155_1
 ; RV64I-NEXT:  .LBB155_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -12078,8 +12078,8 @@ define i32 @atomicrmw_umax_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB156_2
 ; RV64I-NEXT:  .LBB156_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB156_2 Depth=1
@@ -12094,10 +12094,10 @@ define i32 @atomicrmw_umax_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB156_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bltu s1, a3, .LBB156_1
+; RV64I-NEXT:    bltu s2, a3, .LBB156_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB156_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB156_1
 ; RV64I-NEXT:  .LBB156_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -12167,8 +12167,8 @@ define i32 @atomicrmw_umax_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB157_2
 ; RV64I-NEXT:  .LBB157_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB157_2 Depth=1
@@ -12183,10 +12183,10 @@ define i32 @atomicrmw_umax_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB157_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bltu s1, a3, .LBB157_1
+; RV64I-NEXT:    bltu s2, a3, .LBB157_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB157_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB157_1
 ; RV64I-NEXT:  .LBB157_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -12256,8 +12256,8 @@ define i32 @atomicrmw_umax_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB158_2
 ; RV64I-NEXT:  .LBB158_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB158_2 Depth=1
@@ -12272,10 +12272,10 @@ define i32 @atomicrmw_umax_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB158_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bltu s1, a3, .LBB158_1
+; RV64I-NEXT:    bltu s2, a3, .LBB158_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB158_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB158_1
 ; RV64I-NEXT:  .LBB158_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -12345,8 +12345,8 @@ define i32 @atomicrmw_umax_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB159_2
 ; RV64I-NEXT:  .LBB159_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB159_2 Depth=1
@@ -12361,10 +12361,10 @@ define i32 @atomicrmw_umax_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB159_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bltu s1, a3, .LBB159_1
+; RV64I-NEXT:    bltu s2, a3, .LBB159_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB159_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB159_1
 ; RV64I-NEXT:  .LBB159_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -12434,8 +12434,8 @@ define i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB160_2
 ; RV64I-NEXT:  .LBB160_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB160_2 Depth=1
@@ -12450,10 +12450,10 @@ define i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB160_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bgeu s1, a3, .LBB160_1
+; RV64I-NEXT:    bgeu s2, a3, .LBB160_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB160_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB160_1
 ; RV64I-NEXT:  .LBB160_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -12523,8 +12523,8 @@ define i32 @atomicrmw_umin_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB161_2
 ; RV64I-NEXT:  .LBB161_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB161_2 Depth=1
@@ -12539,10 +12539,10 @@ define i32 @atomicrmw_umin_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB161_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bgeu s1, a3, .LBB161_1
+; RV64I-NEXT:    bgeu s2, a3, .LBB161_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB161_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB161_1
 ; RV64I-NEXT:  .LBB161_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -12612,8 +12612,8 @@ define i32 @atomicrmw_umin_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB162_2
 ; RV64I-NEXT:  .LBB162_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB162_2 Depth=1
@@ -12628,10 +12628,10 @@ define i32 @atomicrmw_umin_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB162_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bgeu s1, a3, .LBB162_1
+; RV64I-NEXT:    bgeu s2, a3, .LBB162_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB162_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB162_1
 ; RV64I-NEXT:  .LBB162_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -12701,8 +12701,8 @@ define i32 @atomicrmw_umin_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB163_2
 ; RV64I-NEXT:  .LBB163_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB163_2 Depth=1
@@ -12717,10 +12717,10 @@ define i32 @atomicrmw_umin_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB163_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bgeu s1, a3, .LBB163_1
+; RV64I-NEXT:    bgeu s2, a3, .LBB163_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB163_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB163_1
 ; RV64I-NEXT:  .LBB163_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -12790,8 +12790,8 @@ define i32 @atomicrmw_umin_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB164_2
 ; RV64I-NEXT:  .LBB164_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB164_2 Depth=1
@@ -12806,10 +12806,10 @@ define i32 @atomicrmw_umin_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB164_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bgeu s1, a3, .LBB164_1
+; RV64I-NEXT:    bgeu s2, a3, .LBB164_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB164_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB164_1
 ; RV64I-NEXT:  .LBB164_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index a8785f95d0367..7b4d37985f988 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -584,9 +584,9 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 24
-; RV32I-NEXT:    srai s1, a0, 24
+; RV32I-NEXT:    srai s2, a0, 24
 ; RV32I-NEXT:    j .LBB10_2
 ; RV32I-NEXT:  .LBB10_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB10_2 Depth=1
@@ -603,10 +603,10 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    blt s1, a0, .LBB10_1
+; RV32I-NEXT:    blt s2, a0, .LBB10_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB10_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB10_1
 ; RV32I-NEXT:  .LBB10_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a3, 24
@@ -620,30 +620,30 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_max_i8_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 24
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB10_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a4, a1, .LBB10_3
+; RV32IA-NEXT:    lr.w a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a7, a1, .LBB10_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB10_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB10_3: # in Loop: Header=BB10_1 Depth=1
-; RV32IA-NEXT:    sc.w a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB10_1
+; RV32IA-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB10_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    slli a0, a0, 24
@@ -659,9 +659,9 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 56
-; RV64I-NEXT:    srai s1, a0, 56
+; RV64I-NEXT:    srai s2, a0, 56
 ; RV64I-NEXT:    j .LBB10_2
 ; RV64I-NEXT:  .LBB10_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB10_2 Depth=1
@@ -678,10 +678,10 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a0, .LBB10_1
+; RV64I-NEXT:    blt s2, a0, .LBB10_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB10_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB10_1
 ; RV64I-NEXT:  .LBB10_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a3, 56
@@ -695,30 +695,30 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_max_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 56
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB10_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a4, a1, .LBB10_3
+; RV64IA-NEXT:    lr.w a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a7, a1, .LBB10_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB10_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB10_3: # in Loop: Header=BB10_1 Depth=1
-; RV64IA-NEXT:    sc.w a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB10_1
+; RV64IA-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB10_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    slli a0, a0, 56
@@ -738,9 +738,9 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 24
-; RV32I-NEXT:    srai s1, a0, 24
+; RV32I-NEXT:    srai s2, a0, 24
 ; RV32I-NEXT:    j .LBB11_2
 ; RV32I-NEXT:  .LBB11_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB11_2 Depth=1
@@ -757,10 +757,10 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bge s1, a0, .LBB11_1
+; RV32I-NEXT:    bge s2, a0, .LBB11_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB11_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB11_1
 ; RV32I-NEXT:  .LBB11_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a3, 24
@@ -774,30 +774,30 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_min_i8_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    li a4, 255
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 24
 ; RV32IA-NEXT:    srai a1, a1, 24
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 24
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a1, a4, .LBB11_3
+; RV32IA-NEXT:    lr.w a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a1, a7, .LBB11_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB11_3: # in Loop: Header=BB11_1 Depth=1
-; RV32IA-NEXT:    sc.w a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB11_1
+; RV32IA-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB11_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    slli a0, a0, 24
@@ -813,9 +813,9 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 56
-; RV64I-NEXT:    srai s1, a0, 56
+; RV64I-NEXT:    srai s2, a0, 56
 ; RV64I-NEXT:    j .LBB11_2
 ; RV64I-NEXT:  .LBB11_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB11_2 Depth=1
@@ -832,10 +832,10 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a0, .LBB11_1
+; RV64I-NEXT:    bge s2, a0, .LBB11_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB11_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB11_1
 ; RV64I-NEXT:  .LBB11_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a3, 56
@@ -849,30 +849,30 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_min_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    li a4, 255
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 56
 ; RV64IA-NEXT:    srai a1, a1, 56
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 56
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB11_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a1, a4, .LBB11_3
+; RV64IA-NEXT:    lr.w a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a1, a7, .LBB11_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB11_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB11_3: # in Loop: Header=BB11_1 Depth=1
-; RV64IA-NEXT:    sc.w a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB11_1
+; RV64IA-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB11_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    slli a0, a0, 56
@@ -892,8 +892,8 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    andi s1, a1, 255
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    andi s2, a1, 255
 ; RV32I-NEXT:    j .LBB12_2
 ; RV32I-NEXT:  .LBB12_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB12_2 Depth=1
@@ -909,10 +909,10 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    andi a0, a3, 255
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bltu s1, a0, .LBB12_1
+; RV32I-NEXT:    bltu s2, a0, .LBB12_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB12_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB12_1
 ; RV32I-NEXT:  .LBB12_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a3, 24
@@ -926,23 +926,23 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umax_i8_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    li a3, 255
 ; RV32IA-NEXT:    sll a3, a3, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a6)
-; RV32IA-NEXT:    and a2, a4, a3
+; RV32IA-NEXT:    lr.w a4, (a2)
+; RV32IA-NEXT:    and a6, a4, a3
 ; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a2, a1, .LBB12_3
+; RV32IA-NEXT:    bgeu a6, a1, .LBB12_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a4, a1
 ; RV32IA-NEXT:    and a5, a5, a3
 ; RV32IA-NEXT:    xor a5, a4, a5
 ; RV32IA-NEXT:  .LBB12_3: # in Loop: Header=BB12_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a6)
+; RV32IA-NEXT:    sc.w a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB12_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a4, a0
@@ -959,8 +959,8 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    andi s1, a1, 255
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    andi s2, a1, 255
 ; RV64I-NEXT:    j .LBB12_2
 ; RV64I-NEXT:  .LBB12_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB12_2 Depth=1
@@ -976,10 +976,10 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    andi a0, a3, 255
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bltu s1, a0, .LBB12_1
+; RV64I-NEXT:    bltu s2, a0, .LBB12_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB12_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB12_1
 ; RV64I-NEXT:  .LBB12_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a3, 56
@@ -993,23 +993,23 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    li a3, 255
 ; RV64IA-NEXT:    sllw a3, a3, a0
 ; RV64IA-NEXT:    andi a1, a1, 255
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB12_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a4, (a6)
-; RV64IA-NEXT:    and a2, a4, a3
+; RV64IA-NEXT:    lr.w a4, (a2)
+; RV64IA-NEXT:    and a6, a4, a3
 ; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a2, a1, .LBB12_3
+; RV64IA-NEXT:    bgeu a6, a1, .LBB12_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB12_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a4, a1
 ; RV64IA-NEXT:    and a5, a5, a3
 ; RV64IA-NEXT:    xor a5, a4, a5
 ; RV64IA-NEXT:  .LBB12_3: # in Loop: Header=BB12_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a6)
+; RV64IA-NEXT:    sc.w a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB12_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a4, a0
@@ -1030,8 +1030,8 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lbu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    andi s1, a1, 255
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    andi s2, a1, 255
 ; RV32I-NEXT:    j .LBB13_2
 ; RV32I-NEXT:  .LBB13_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB13_2 Depth=1
@@ -1047,10 +1047,10 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV32I-NEXT:    andi a0, a3, 255
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bgeu s1, a0, .LBB13_1
+; RV32I-NEXT:    bgeu s2, a0, .LBB13_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB13_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB13_1
 ; RV32I-NEXT:  .LBB13_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a3, 24
@@ -1064,23 +1064,23 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umin_i8_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    li a3, 255
 ; RV32IA-NEXT:    sll a3, a3, a0
 ; RV32IA-NEXT:    andi a1, a1, 255
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a4, (a6)
-; RV32IA-NEXT:    and a2, a4, a3
+; RV32IA-NEXT:    lr.w a4, (a2)
+; RV32IA-NEXT:    and a6, a4, a3
 ; RV32IA-NEXT:    mv a5, a4
-; RV32IA-NEXT:    bgeu a1, a2, .LBB13_3
+; RV32IA-NEXT:    bgeu a1, a6, .LBB13_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a4, a1
 ; RV32IA-NEXT:    and a5, a5, a3
 ; RV32IA-NEXT:    xor a5, a4, a5
 ; RV32IA-NEXT:  .LBB13_3: # in Loop: Header=BB13_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a6)
+; RV32IA-NEXT:    sc.w a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB13_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a4, a0
@@ -1097,8 +1097,8 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lbu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    andi s1, a1, 255
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    andi s2, a1, 255
 ; RV64I-NEXT:    j .LBB13_2
 ; RV64I-NEXT:  .LBB13_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB13_2 Depth=1
@@ -1114,10 +1114,10 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    andi a0, a3, 255
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bgeu s1, a0, .LBB13_1
+; RV64I-NEXT:    bgeu s2, a0, .LBB13_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB13_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB13_1
 ; RV64I-NEXT:  .LBB13_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a3, 56
@@ -1131,23 +1131,23 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i8_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    li a3, 255
 ; RV64IA-NEXT:    sllw a3, a3, a0
 ; RV64IA-NEXT:    andi a1, a1, 255
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB13_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a4, (a6)
-; RV64IA-NEXT:    and a2, a4, a3
+; RV64IA-NEXT:    lr.w a4, (a2)
+; RV64IA-NEXT:    and a6, a4, a3
 ; RV64IA-NEXT:    mv a5, a4
-; RV64IA-NEXT:    bgeu a1, a2, .LBB13_3
+; RV64IA-NEXT:    bgeu a1, a6, .LBB13_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB13_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a4, a1
 ; RV64IA-NEXT:    and a5, a5, a3
 ; RV64IA-NEXT:    xor a5, a4, a5
 ; RV64IA-NEXT:  .LBB13_3: # in Loop: Header=BB13_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a6)
+; RV64IA-NEXT:    sc.w a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB13_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a4, a0
@@ -1636,9 +1636,9 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lhu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 16
-; RV32I-NEXT:    srai s1, a0, 16
+; RV32I-NEXT:    srai s2, a0, 16
 ; RV32I-NEXT:    j .LBB21_2
 ; RV32I-NEXT:  .LBB21_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB21_2 Depth=1
@@ -1655,10 +1655,10 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    blt s1, a0, .LBB21_1
+; RV32I-NEXT:    blt s2, a0, .LBB21_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB21_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB21_1
 ; RV32I-NEXT:  .LBB21_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a3, 16
@@ -1672,31 +1672,31 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_max_i16_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 16
 ; RV32IA-NEXT:    srai a1, a1, 16
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 16
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a4, a1, .LBB21_3
+; RV32IA-NEXT:    lr.w a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a7, a1, .LBB21_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB21_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB21_3: # in Loop: Header=BB21_1 Depth=1
-; RV32IA-NEXT:    sc.w a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB21_1
+; RV32IA-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB21_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    slli a0, a0, 16
@@ -1712,9 +1712,9 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lhu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 48
-; RV64I-NEXT:    srai s1, a0, 48
+; RV64I-NEXT:    srai s2, a0, 48
 ; RV64I-NEXT:    j .LBB21_2
 ; RV64I-NEXT:  .LBB21_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB21_2 Depth=1
@@ -1731,10 +1731,10 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a0, .LBB21_1
+; RV64I-NEXT:    blt s2, a0, .LBB21_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB21_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB21_1
 ; RV64I-NEXT:  .LBB21_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a3, 48
@@ -1748,31 +1748,31 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_max_i16_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
 ; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 48
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB21_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a4, a1, .LBB21_3
+; RV64IA-NEXT:    lr.w a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a7, a1, .LBB21_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB21_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB21_3: # in Loop: Header=BB21_1 Depth=1
-; RV64IA-NEXT:    sc.w a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB21_1
+; RV64IA-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB21_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    slli a0, a0, 48
@@ -1792,9 +1792,9 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lhu a3, 0(a0)
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    slli a0, a1, 16
-; RV32I-NEXT:    srai s1, a0, 16
+; RV32I-NEXT:    srai s2, a0, 16
 ; RV32I-NEXT:    j .LBB22_2
 ; RV32I-NEXT:  .LBB22_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB22_2 Depth=1
@@ -1811,10 +1811,10 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bge s1, a0, .LBB22_1
+; RV32I-NEXT:    bge s2, a0, .LBB22_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB22_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    j .LBB22_1
 ; RV32I-NEXT:  .LBB22_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a3, 16
@@ -1828,31 +1828,31 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_min_i16_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    andi a3, a0, 24
 ; RV32IA-NEXT:    lui a4, 16
 ; RV32IA-NEXT:    addi a4, a4, -1
-; RV32IA-NEXT:    sll a7, a4, a0
+; RV32IA-NEXT:    sll a4, a4, a0
 ; RV32IA-NEXT:    slli a1, a1, 16
 ; RV32IA-NEXT:    srai a1, a1, 16
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:    li a5, 16
 ; RV32IA-NEXT:    sub a3, a5, a3
 ; RV32IA-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a5, (a6)
-; RV32IA-NEXT:    and a4, a5, a7
-; RV32IA-NEXT:    mv a2, a5
-; RV32IA-NEXT:    sll a4, a4, a3
-; RV32IA-NEXT:    sra a4, a4, a3
-; RV32IA-NEXT:    bge a1, a4, .LBB22_3
+; RV32IA-NEXT:    lr.w a5, (a2)
+; RV32IA-NEXT:    and a7, a5, a4
+; RV32IA-NEXT:    mv a6, a5
+; RV32IA-NEXT:    sll a7, a7, a3
+; RV32IA-NEXT:    sra a7, a7, a3
+; RV32IA-NEXT:    bge a1, a7, .LBB22_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB22_1 Depth=1
-; RV32IA-NEXT:    xor a2, a5, a1
-; RV32IA-NEXT:    and a2, a2, a7
-; RV32IA-NEXT:    xor a2, a5, a2
+; RV32IA-NEXT:    xor a6, a5, a1
+; RV32IA-NEXT:    and a6, a6, a4
+; RV32IA-NEXT:    xor a6, a5, a6
 ; RV32IA-NEXT:  .LBB22_3: # in Loop: Header=BB22_1 Depth=1
-; RV32IA-NEXT:    sc.w a2, a2, (a6)
-; RV32IA-NEXT:    bnez a2, .LBB22_1
+; RV32IA-NEXT:    sc.w a6, a6, (a2)
+; RV32IA-NEXT:    bnez a6, .LBB22_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a5, a0
 ; RV32IA-NEXT:    slli a0, a0, 16
@@ -1868,9 +1868,9 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lhu a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    slli a0, a1, 48
-; RV64I-NEXT:    srai s1, a0, 48
+; RV64I-NEXT:    srai s2, a0, 48
 ; RV64I-NEXT:    j .LBB22_2
 ; RV64I-NEXT:  .LBB22_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB22_2 Depth=1
@@ -1887,10 +1887,10 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a0, .LBB22_1
+; RV64I-NEXT:    bge s2, a0, .LBB22_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB22_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB22_1
 ; RV64I-NEXT:  .LBB22_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a3, 48
@@ -1904,31 +1904,31 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_min_i16_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    andi a3, a0, 24
 ; RV64IA-NEXT:    lui a4, 16
 ; RV64IA-NEXT:    addiw a4, a4, -1
-; RV64IA-NEXT:    sllw a7, a4, a0
+; RV64IA-NEXT:    sllw a4, a4, a0
 ; RV64IA-NEXT:    slli a1, a1, 48
 ; RV64IA-NEXT:    srai a1, a1, 48
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:    li a5, 48
 ; RV64IA-NEXT:    sub a3, a5, a3
 ; RV64IA-NEXT:  .LBB22_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a5, (a6)
-; RV64IA-NEXT:    and a4, a5, a7
-; RV64IA-NEXT:    mv a2, a5
-; RV64IA-NEXT:    sll a4, a4, a3
-; RV64IA-NEXT:    sra a4, a4, a3
-; RV64IA-NEXT:    bge a1, a4, .LBB22_3
+; RV64IA-NEXT:    lr.w a5, (a2)
+; RV64IA-NEXT:    and a7, a5, a4
+; RV64IA-NEXT:    mv a6, a5
+; RV64IA-NEXT:    sll a7, a7, a3
+; RV64IA-NEXT:    sra a7, a7, a3
+; RV64IA-NEXT:    bge a1, a7, .LBB22_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB22_1 Depth=1
-; RV64IA-NEXT:    xor a2, a5, a1
-; RV64IA-NEXT:    and a2, a2, a7
-; RV64IA-NEXT:    xor a2, a5, a2
+; RV64IA-NEXT:    xor a6, a5, a1
+; RV64IA-NEXT:    and a6, a6, a4
+; RV64IA-NEXT:    xor a6, a5, a6
 ; RV64IA-NEXT:  .LBB22_3: # in Loop: Header=BB22_1 Depth=1
-; RV64IA-NEXT:    sc.w a2, a2, (a6)
-; RV64IA-NEXT:    bnez a2, .LBB22_1
+; RV64IA-NEXT:    sc.w a6, a6, (a2)
+; RV64IA-NEXT:    bnez a6, .LBB22_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a5, a0
 ; RV64IA-NEXT:    slli a0, a0, 48
@@ -1947,18 +1947,18 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lhu a1, 0(a0)
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and s1, s2, s0
+; RV32I-NEXT:    addi s2, a0, -1
+; RV32I-NEXT:    and s3, s0, s2
 ; RV32I-NEXT:    j .LBB23_2
 ; RV32I-NEXT:  .LBB23_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB23_2 Depth=1
 ; RV32I-NEXT:    sh a1, 10(sp)
 ; RV32I-NEXT:    addi a1, sp, 10
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    call __atomic_compare_exchange_2@plt
@@ -1966,12 +1966,12 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    bnez a0, .LBB23_4
 ; RV32I-NEXT:  .LBB23_2: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a0, a1, s0
+; RV32I-NEXT:    and a0, a1, s2
 ; RV32I-NEXT:    mv a2, a1
-; RV32I-NEXT:    bltu s1, a0, .LBB23_1
+; RV32I-NEXT:    bltu s3, a0, .LBB23_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB23_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    j .LBB23_1
 ; RV32I-NEXT:  .LBB23_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a1, 16
@@ -1986,7 +1986,7 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umax_i16_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    lui a3, 16
 ; RV32IA-NEXT:    addi a3, a3, -1
@@ -1994,16 +1994,16 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32IA-NEXT:    and a1, a1, a3
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a6)
-; RV32IA-NEXT:    and a2, a3, a4
+; RV32IA-NEXT:    lr.w a3, (a2)
+; RV32IA-NEXT:    and a6, a3, a4
 ; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a2, a1, .LBB23_3
+; RV32IA-NEXT:    bgeu a6, a1, .LBB23_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB23_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a3, a1
 ; RV32IA-NEXT:    and a5, a5, a4
 ; RV32IA-NEXT:    xor a5, a3, a5
 ; RV32IA-NEXT:  .LBB23_3: # in Loop: Header=BB23_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a6)
+; RV32IA-NEXT:    sc.w a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB23_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a3, a0
@@ -2019,18 +2019,18 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lhu a1, 0(a0)
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and s1, s2, s0
+; RV64I-NEXT:    addiw s2, a0, -1
+; RV64I-NEXT:    and s3, s0, s2
 ; RV64I-NEXT:    j .LBB23_2
 ; RV64I-NEXT:  .LBB23_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB23_2 Depth=1
 ; RV64I-NEXT:    sh a1, 6(sp)
 ; RV64I-NEXT:    addi a1, sp, 6
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    li a3, 0
 ; RV64I-NEXT:    li a4, 0
 ; RV64I-NEXT:    call __atomic_compare_exchange_2@plt
@@ -2038,12 +2038,12 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    bnez a0, .LBB23_4
 ; RV64I-NEXT:  .LBB23_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a0, a1, s0
+; RV64I-NEXT:    and a0, a1, s2
 ; RV64I-NEXT:    mv a2, a1
-; RV64I-NEXT:    bltu s1, a0, .LBB23_1
+; RV64I-NEXT:    bltu s3, a0, .LBB23_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB23_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    j .LBB23_1
 ; RV64I-NEXT:  .LBB23_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a1, 48
@@ -2058,7 +2058,7 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umax_i16_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
 ; RV64IA-NEXT:    addiw a3, a3, -1
@@ -2066,16 +2066,16 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB23_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a6)
-; RV64IA-NEXT:    and a2, a3, a4
+; RV64IA-NEXT:    lr.w a3, (a2)
+; RV64IA-NEXT:    and a6, a3, a4
 ; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a2, a1, .LBB23_3
+; RV64IA-NEXT:    bgeu a6, a1, .LBB23_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB23_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a3, a1
 ; RV64IA-NEXT:    and a5, a5, a4
 ; RV64IA-NEXT:    xor a5, a3, a5
 ; RV64IA-NEXT:  .LBB23_3: # in Loop: Header=BB23_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a6)
+; RV64IA-NEXT:    sc.w a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB23_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a3, a0
@@ -2095,18 +2095,18 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lhu a1, 0(a0)
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and s1, s2, s0
+; RV32I-NEXT:    addi s2, a0, -1
+; RV32I-NEXT:    and s3, s0, s2
 ; RV32I-NEXT:    j .LBB24_2
 ; RV32I-NEXT:  .LBB24_1: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB24_2 Depth=1
 ; RV32I-NEXT:    sh a1, 10(sp)
 ; RV32I-NEXT:    addi a1, sp, 10
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:    call __atomic_compare_exchange_2@plt
@@ -2114,12 +2114,12 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    bnez a0, .LBB24_4
 ; RV32I-NEXT:  .LBB24_2: # %atomicrmw.start
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    and a0, a1, s0
+; RV32I-NEXT:    and a0, a1, s2
 ; RV32I-NEXT:    mv a2, a1
-; RV32I-NEXT:    bgeu s1, a0, .LBB24_1
+; RV32I-NEXT:    bgeu s3, a0, .LBB24_1
 ; RV32I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV32I-NEXT:    # in Loop: Header=BB24_2 Depth=1
-; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    j .LBB24_1
 ; RV32I-NEXT:  .LBB24_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a1, 16
@@ -2134,7 +2134,7 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV32IA-LABEL: atomicrmw_umin_i16_monotonic:
 ; RV32IA:       # %bb.0:
-; RV32IA-NEXT:    andi a6, a0, -4
+; RV32IA-NEXT:    andi a2, a0, -4
 ; RV32IA-NEXT:    slli a0, a0, 3
 ; RV32IA-NEXT:    lui a3, 16
 ; RV32IA-NEXT:    addi a3, a3, -1
@@ -2142,16 +2142,16 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32IA-NEXT:    and a1, a1, a3
 ; RV32IA-NEXT:    sll a1, a1, a0
 ; RV32IA-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT:    lr.w a3, (a6)
-; RV32IA-NEXT:    and a2, a3, a4
+; RV32IA-NEXT:    lr.w a3, (a2)
+; RV32IA-NEXT:    and a6, a3, a4
 ; RV32IA-NEXT:    mv a5, a3
-; RV32IA-NEXT:    bgeu a1, a2, .LBB24_3
+; RV32IA-NEXT:    bgeu a1, a6, .LBB24_3
 ; RV32IA-NEXT:  # %bb.2: # in Loop: Header=BB24_1 Depth=1
 ; RV32IA-NEXT:    xor a5, a3, a1
 ; RV32IA-NEXT:    and a5, a5, a4
 ; RV32IA-NEXT:    xor a5, a3, a5
 ; RV32IA-NEXT:  .LBB24_3: # in Loop: Header=BB24_1 Depth=1
-; RV32IA-NEXT:    sc.w a5, a5, (a6)
+; RV32IA-NEXT:    sc.w a5, a5, (a2)
 ; RV32IA-NEXT:    bnez a5, .LBB24_1
 ; RV32IA-NEXT:  # %bb.4:
 ; RV32IA-NEXT:    srl a0, a3, a0
@@ -2167,18 +2167,18 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lhu a1, 0(a0)
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and s1, s2, s0
+; RV64I-NEXT:    addiw s2, a0, -1
+; RV64I-NEXT:    and s3, s0, s2
 ; RV64I-NEXT:    j .LBB24_2
 ; RV64I-NEXT:  .LBB24_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB24_2 Depth=1
 ; RV64I-NEXT:    sh a1, 6(sp)
 ; RV64I-NEXT:    addi a1, sp, 6
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    li a3, 0
 ; RV64I-NEXT:    li a4, 0
 ; RV64I-NEXT:    call __atomic_compare_exchange_2@plt
@@ -2186,12 +2186,12 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    bnez a0, .LBB24_4
 ; RV64I-NEXT:  .LBB24_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV64I-NEXT:    and a0, a1, s0
+; RV64I-NEXT:    and a0, a1, s2
 ; RV64I-NEXT:    mv a2, a1
-; RV64I-NEXT:    bgeu s1, a0, .LBB24_1
+; RV64I-NEXT:    bgeu s3, a0, .LBB24_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB24_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    j .LBB24_1
 ; RV64I-NEXT:  .LBB24_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a1, 48
@@ -2206,7 +2206,7 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ;
 ; RV64IA-LABEL: atomicrmw_umin_i16_monotonic:
 ; RV64IA:       # %bb.0:
-; RV64IA-NEXT:    andi a6, a0, -4
+; RV64IA-NEXT:    andi a2, a0, -4
 ; RV64IA-NEXT:    slliw a0, a0, 3
 ; RV64IA-NEXT:    lui a3, 16
 ; RV64IA-NEXT:    addiw a3, a3, -1
@@ -2214,16 +2214,16 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64IA-NEXT:    and a1, a1, a3
 ; RV64IA-NEXT:    sllw a1, a1, a0
 ; RV64IA-NEXT:  .LBB24_1: # =>This Inner Loop Header: Depth=1
-; RV64IA-NEXT:    lr.w a3, (a6)
-; RV64IA-NEXT:    and a2, a3, a4
+; RV64IA-NEXT:    lr.w a3, (a2)
+; RV64IA-NEXT:    and a6, a3, a4
 ; RV64IA-NEXT:    mv a5, a3
-; RV64IA-NEXT:    bgeu a1, a2, .LBB24_3
+; RV64IA-NEXT:    bgeu a1, a6, .LBB24_3
 ; RV64IA-NEXT:  # %bb.2: # in Loop: Header=BB24_1 Depth=1
 ; RV64IA-NEXT:    xor a5, a3, a1
 ; RV64IA-NEXT:    and a5, a5, a4
 ; RV64IA-NEXT:    xor a5, a3, a5
 ; RV64IA-NEXT:  .LBB24_3: # in Loop: Header=BB24_1 Depth=1
-; RV64IA-NEXT:    sc.w a5, a5, (a6)
+; RV64IA-NEXT:    sc.w a5, a5, (a2)
 ; RV64IA-NEXT:    bnez a5, .LBB24_1
 ; RV64IA-NEXT:  # %bb.4:
 ; RV64IA-NEXT:    srlw a0, a3, a0
@@ -2546,8 +2546,8 @@ define signext i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB32_2
 ; RV64I-NEXT:  .LBB32_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB32_2 Depth=1
@@ -2562,10 +2562,10 @@ define signext i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB32_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    blt s1, a3, .LBB32_1
+; RV64I-NEXT:    blt s2, a3, .LBB32_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB32_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB32_1
 ; RV64I-NEXT:  .LBB32_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -2635,8 +2635,8 @@ define signext i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB33_2
 ; RV64I-NEXT:  .LBB33_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB33_2 Depth=1
@@ -2651,10 +2651,10 @@ define signext i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB33_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bge s1, a3, .LBB33_1
+; RV64I-NEXT:    bge s2, a3, .LBB33_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB33_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB33_1
 ; RV64I-NEXT:  .LBB33_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -2724,8 +2724,8 @@ define signext i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB34_2
 ; RV64I-NEXT:  .LBB34_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB34_2 Depth=1
@@ -2740,10 +2740,10 @@ define signext i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB34_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bltu s1, a3, .LBB34_1
+; RV64I-NEXT:    bltu s2, a3, .LBB34_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB34_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB34_1
 ; RV64I-NEXT:  .LBB34_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
@@ -2813,8 +2813,8 @@ define signext i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lw a3, 0(a0)
-; RV64I-NEXT:    mv s2, a1
-; RV64I-NEXT:    sext.w s1, a1
+; RV64I-NEXT:    mv s1, a1
+; RV64I-NEXT:    sext.w s2, a1
 ; RV64I-NEXT:    j .LBB35_2
 ; RV64I-NEXT:  .LBB35_1: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB35_2 Depth=1
@@ -2829,10 +2829,10 @@ define signext i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:  .LBB35_2: # %atomicrmw.start
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bgeu s1, a3, .LBB35_1
+; RV64I-NEXT:    bgeu s2, a3, .LBB35_1
 ; RV64I-NEXT:  # %bb.3: # %atomicrmw.start
 ; RV64I-NEXT:    # in Loop: Header=BB35_2 Depth=1
-; RV64I-NEXT:    mv a2, s2
+; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    j .LBB35_1
 ; RV64I-NEXT:  .LBB35_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
index 007027e5c0e58..4be19eefa948f 100644
--- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
@@ -437,21 +437,21 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s3, a1
-; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    not a1, s4
+; RV32I-NEXT:    not a1, s2
 ; RV32I-NEXT:    and a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 1
 ; RV32I-NEXT:    lui a2, 349525
-; RV32I-NEXT:    addi s5, a2, 1365
-; RV32I-NEXT:    and a1, a1, s5
+; RV32I-NEXT:    addi s4, a2, 1365
+; RV32I-NEXT:    and a1, a1, s4
 ; RV32I-NEXT:    sub a0, a0, a1
 ; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi s0, a1, 819
-; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    addi s5, a1, 819
+; RV32I-NEXT:    and a1, a0, s5
 ; RV32I-NEXT:    srli a0, a0, 2
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s5
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    add a0, a0, a1
@@ -459,32 +459,32 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    addi s6, a1, -241
 ; RV32I-NEXT:    and a0, a0, s6
 ; RV32I-NEXT:    lui a1, 4112
-; RV32I-NEXT:    addi s1, a1, 257
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    addi s3, a1, 257
+; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    addi a0, s3, -1
-; RV32I-NEXT:    not a1, s3
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    addi a0, s1, -1
+; RV32I-NEXT:    not a1, s1
 ; RV32I-NEXT:    and a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 1
-; RV32I-NEXT:    and a1, a1, s5
+; RV32I-NEXT:    and a1, a1, s4
 ; RV32I-NEXT:    sub a0, a0, a1
-; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    and a1, a0, s5
 ; RV32I-NEXT:    srli a0, a0, 2
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s5
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    and a0, a0, s6
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3@plt
-; RV32I-NEXT:    bnez s4, .LBB7_2
+; RV32I-NEXT:    bnez s2, .LBB7_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    srli a0, a0, 24
 ; RV32I-NEXT:    addi a0, a0, 32
 ; RV32I-NEXT:    j .LBB7_3
 ; RV32I-NEXT:  .LBB7_2:
-; RV32I-NEXT:    srli a0, s2, 24
+; RV32I-NEXT:    srli a0, s0, 24
 ; RV32I-NEXT:  .LBB7_3:
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -711,21 +711,21 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s3, a1
-; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    not a1, s4
+; RV32I-NEXT:    not a1, s2
 ; RV32I-NEXT:    and a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 1
 ; RV32I-NEXT:    lui a2, 349525
-; RV32I-NEXT:    addi s5, a2, 1365
-; RV32I-NEXT:    and a1, a1, s5
+; RV32I-NEXT:    addi s4, a2, 1365
+; RV32I-NEXT:    and a1, a1, s4
 ; RV32I-NEXT:    sub a0, a0, a1
 ; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi s0, a1, 819
-; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    addi s5, a1, 819
+; RV32I-NEXT:    and a1, a0, s5
 ; RV32I-NEXT:    srli a0, a0, 2
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s5
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    add a0, a0, a1
@@ -733,32 +733,32 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
 ; RV32I-NEXT:    addi s6, a1, -241
 ; RV32I-NEXT:    and a0, a0, s6
 ; RV32I-NEXT:    lui a1, 4112
-; RV32I-NEXT:    addi s1, a1, 257
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    addi s3, a1, 257
+; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    addi a0, s3, -1
-; RV32I-NEXT:    not a1, s3
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    addi a0, s1, -1
+; RV32I-NEXT:    not a1, s1
 ; RV32I-NEXT:    and a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 1
-; RV32I-NEXT:    and a1, a1, s5
+; RV32I-NEXT:    and a1, a1, s4
 ; RV32I-NEXT:    sub a0, a0, a1
-; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    and a1, a0, s5
 ; RV32I-NEXT:    srli a0, a0, 2
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s5
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    and a0, a0, s6
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3@plt
-; RV32I-NEXT:    bnez s4, .LBB11_2
+; RV32I-NEXT:    bnez s2, .LBB11_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    srli a0, a0, 24
 ; RV32I-NEXT:    addi a0, a0, 32
 ; RV32I-NEXT:    j .LBB11_3
 ; RV32I-NEXT:  .LBB11_2:
-; RV32I-NEXT:    srli a0, s2, 24
+; RV32I-NEXT:    srli a0, s0, 24
 ; RV32I-NEXT:  .LBB11_3:
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -877,17 +877,17 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    srli a0, a1, 1
 ; RV32I-NEXT:    lui a2, 349525
-; RV32I-NEXT:    addi s3, a2, 1365
-; RV32I-NEXT:    and a0, a0, s3
+; RV32I-NEXT:    addi s2, a2, 1365
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    sub a0, a1, a0
 ; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi s0, a1, 819
-; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    addi s3, a1, 819
+; RV32I-NEXT:    and a1, a0, s3
 ; RV32I-NEXT:    srli a0, a0, 2
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    add a0, a0, a1
@@ -899,12 +899,12 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call __mulsi3@plt
 ; RV32I-NEXT:    srli s5, a0, 24
-; RV32I-NEXT:    srli a0, s2, 1
-; RV32I-NEXT:    and a0, a0, s3
-; RV32I-NEXT:    sub a0, s2, a0
-; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    srli a0, s0, 1
+; RV32I-NEXT:    and a0, a0, s2
+; RV32I-NEXT:    sub a0, s0, a0
+; RV32I-NEXT:    and a1, a0, s3
 ; RV32I-NEXT:    srli a0, a0, 2
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    add a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
index 1387a646ce994..5e0c01ee2678c 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
@@ -56,26 +56,26 @@ define void @callee() nounwind {
 ; RV32I-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lw a0, 20(a5)
 ; RV32I-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw t4, 24(a5)
-; RV32I-NEXT:    lw t5, 28(a5)
-; RV32I-NEXT:    lw t6, 32(a5)
-; RV32I-NEXT:    lw s2, 36(a5)
-; RV32I-NEXT:    lw s3, 40(a5)
-; RV32I-NEXT:    lw s4, 44(a5)
-; RV32I-NEXT:    lw s5, 48(a5)
-; RV32I-NEXT:    lw s6, 52(a5)
-; RV32I-NEXT:    lw s7, 56(a5)
-; RV32I-NEXT:    lw s8, 60(a5)
-; RV32I-NEXT:    lw s9, 64(a5)
-; RV32I-NEXT:    lw s10, 68(a5)
-; RV32I-NEXT:    lw s11, 72(a5)
-; RV32I-NEXT:    lw ra, 76(a5)
-; RV32I-NEXT:    lw s1, 80(a5)
-; RV32I-NEXT:    lw t3, 84(a5)
-; RV32I-NEXT:    lw t2, 88(a5)
-; RV32I-NEXT:    lw t1, 92(a5)
-; RV32I-NEXT:    lw t0, 96(a5)
-; RV32I-NEXT:    lw s0, 100(a5)
+; RV32I-NEXT:    lw t0, 24(a5)
+; RV32I-NEXT:    lw t1, 28(a5)
+; RV32I-NEXT:    lw t2, 32(a5)
+; RV32I-NEXT:    lw t3, 36(a5)
+; RV32I-NEXT:    lw t4, 40(a5)
+; RV32I-NEXT:    lw t5, 44(a5)
+; RV32I-NEXT:    lw t6, 48(a5)
+; RV32I-NEXT:    lw s0, 52(a5)
+; RV32I-NEXT:    lw s1, 56(a5)
+; RV32I-NEXT:    lw s2, 60(a5)
+; RV32I-NEXT:    lw s3, 64(a5)
+; RV32I-NEXT:    lw s4, 68(a5)
+; RV32I-NEXT:    lw s5, 72(a5)
+; RV32I-NEXT:    lw s6, 76(a5)
+; RV32I-NEXT:    lw s7, 80(a5)
+; RV32I-NEXT:    lw s8, 84(a5)
+; RV32I-NEXT:    lw s9, 88(a5)
+; RV32I-NEXT:    lw s10, 92(a5)
+; RV32I-NEXT:    lw s11, 96(a5)
+; RV32I-NEXT:    lw ra, 100(a5)
 ; RV32I-NEXT:    lw a6, 104(a5)
 ; RV32I-NEXT:    lw a4, 108(a5)
 ; RV32I-NEXT:    lw a0, 124(a5)
@@ -88,26 +88,26 @@ define void @callee() nounwind {
 ; RV32I-NEXT:    sw a3, 112(a5)
 ; RV32I-NEXT:    sw a4, 108(a5)
 ; RV32I-NEXT:    sw a6, 104(a5)
-; RV32I-NEXT:    sw s0, 100(a5)
-; RV32I-NEXT:    sw t0, 96(a5)
-; RV32I-NEXT:    sw t1, 92(a5)
-; RV32I-NEXT:    sw t2, 88(a5)
-; RV32I-NEXT:    sw t3, 84(a5)
-; RV32I-NEXT:    sw s1, 80(a5)
-; RV32I-NEXT:    sw ra, 76(a5)
-; RV32I-NEXT:    sw s11, 72(a5)
-; RV32I-NEXT:    sw s10, 68(a5)
-; RV32I-NEXT:    sw s9, 64(a5)
-; RV32I-NEXT:    sw s8, 60(a5)
-; RV32I-NEXT:    sw s7, 56(a5)
-; RV32I-NEXT:    sw s6, 52(a5)
-; RV32I-NEXT:    sw s5, 48(a5)
-; RV32I-NEXT:    sw s4, 44(a5)
-; RV32I-NEXT:    sw s3, 40(a5)
-; RV32I-NEXT:    sw s2, 36(a5)
-; RV32I-NEXT:    sw t6, 32(a5)
-; RV32I-NEXT:    sw t5, 28(a5)
-; RV32I-NEXT:    sw t4, 24(a5)
+; RV32I-NEXT:    sw ra, 100(a5)
+; RV32I-NEXT:    sw s11, 96(a5)
+; RV32I-NEXT:    sw s10, 92(a5)
+; RV32I-NEXT:    sw s9, 88(a5)
+; RV32I-NEXT:    sw s8, 84(a5)
+; RV32I-NEXT:    sw s7, 80(a5)
+; RV32I-NEXT:    sw s6, 76(a5)
+; RV32I-NEXT:    sw s5, 72(a5)
+; RV32I-NEXT:    sw s4, 68(a5)
+; RV32I-NEXT:    sw s3, 64(a5)
+; RV32I-NEXT:    sw s2, 60(a5)
+; RV32I-NEXT:    sw s1, 56(a5)
+; RV32I-NEXT:    sw s0, 52(a5)
+; RV32I-NEXT:    sw t6, 48(a5)
+; RV32I-NEXT:    sw t5, 44(a5)
+; RV32I-NEXT:    sw t4, 40(a5)
+; RV32I-NEXT:    sw t3, 36(a5)
+; RV32I-NEXT:    sw t2, 32(a5)
+; RV32I-NEXT:    sw t1, 28(a5)
+; RV32I-NEXT:    sw t0, 24(a5)
 ; RV32I-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    sw a0, 20(a5)
 ; RV32I-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
@@ -169,24 +169,24 @@ define void @callee() nounwind {
 ; RV32I-WITH-FP-NEXT:    sw a0, -76(s0) # 4-byte Folded Spill
 ; RV32I-WITH-FP-NEXT:    lw a0, 24(a5)
 ; RV32I-WITH-FP-NEXT:    sw a0, -80(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw t5, 28(a5)
-; RV32I-WITH-FP-NEXT:    lw t6, 32(a5)
-; RV32I-WITH-FP-NEXT:    lw s2, 36(a5)
-; RV32I-WITH-FP-NEXT:    lw s3, 40(a5)
-; RV32I-WITH-FP-NEXT:    lw s4, 44(a5)
-; RV32I-WITH-FP-NEXT:    lw s5, 48(a5)
-; RV32I-WITH-FP-NEXT:    lw s6, 52(a5)
-; RV32I-WITH-FP-NEXT:    lw s7, 56(a5)
-; RV32I-WITH-FP-NEXT:    lw s8, 60(a5)
-; RV32I-WITH-FP-NEXT:    lw s9, 64(a5)
-; RV32I-WITH-FP-NEXT:    lw s10, 68(a5)
-; RV32I-WITH-FP-NEXT:    lw s11, 72(a5)
-; RV32I-WITH-FP-NEXT:    lw ra, 76(a5)
-; RV32I-WITH-FP-NEXT:    lw t4, 80(a5)
-; RV32I-WITH-FP-NEXT:    lw t3, 84(a5)
-; RV32I-WITH-FP-NEXT:    lw t2, 88(a5)
-; RV32I-WITH-FP-NEXT:    lw s1, 92(a5)
-; RV32I-WITH-FP-NEXT:    lw t1, 96(a5)
+; RV32I-WITH-FP-NEXT:    lw t1, 28(a5)
+; RV32I-WITH-FP-NEXT:    lw t2, 32(a5)
+; RV32I-WITH-FP-NEXT:    lw t3, 36(a5)
+; RV32I-WITH-FP-NEXT:    lw t4, 40(a5)
+; RV32I-WITH-FP-NEXT:    lw t5, 44(a5)
+; RV32I-WITH-FP-NEXT:    lw t6, 48(a5)
+; RV32I-WITH-FP-NEXT:    lw s1, 52(a5)
+; RV32I-WITH-FP-NEXT:    lw s2, 56(a5)
+; RV32I-WITH-FP-NEXT:    lw s3, 60(a5)
+; RV32I-WITH-FP-NEXT:    lw s4, 64(a5)
+; RV32I-WITH-FP-NEXT:    lw s5, 68(a5)
+; RV32I-WITH-FP-NEXT:    lw s6, 72(a5)
+; RV32I-WITH-FP-NEXT:    lw s7, 76(a5)
+; RV32I-WITH-FP-NEXT:    lw s8, 80(a5)
+; RV32I-WITH-FP-NEXT:    lw s9, 84(a5)
+; RV32I-WITH-FP-NEXT:    lw s10, 88(a5)
+; RV32I-WITH-FP-NEXT:    lw s11, 92(a5)
+; RV32I-WITH-FP-NEXT:    lw ra, 96(a5)
 ; RV32I-WITH-FP-NEXT:    lw t0, 100(a5)
 ; RV32I-WITH-FP-NEXT:    lw a6, 104(a5)
 ; RV32I-WITH-FP-NEXT:    lw a4, 108(a5)
@@ -201,24 +201,24 @@ define void @callee() nounwind {
 ; RV32I-WITH-FP-NEXT:    sw a4, 108(a5)
 ; RV32I-WITH-FP-NEXT:    sw a6, 104(a5)
 ; RV32I-WITH-FP-NEXT:    sw t0, 100(a5)
-; RV32I-WITH-FP-NEXT:    sw t1, 96(a5)
-; RV32I-WITH-FP-NEXT:    sw s1, 92(a5)
-; RV32I-WITH-FP-NEXT:    sw t2, 88(a5)
-; RV32I-WITH-FP-NEXT:    sw t3, 84(a5)
-; RV32I-WITH-FP-NEXT:    sw t4, 80(a5)
-; RV32I-WITH-FP-NEXT:    sw ra, 76(a5)
-; RV32I-WITH-FP-NEXT:    sw s11, 72(a5)
-; RV32I-WITH-FP-NEXT:    sw s10, 68(a5)
-; RV32I-WITH-FP-NEXT:    sw s9, 64(a5)
-; RV32I-WITH-FP-NEXT:    sw s8, 60(a5)
-; RV32I-WITH-FP-NEXT:    sw s7, 56(a5)
-; RV32I-WITH-FP-NEXT:    sw s6, 52(a5)
-; RV32I-WITH-FP-NEXT:    sw s5, 48(a5)
-; RV32I-WITH-FP-NEXT:    sw s4, 44(a5)
-; RV32I-WITH-FP-NEXT:    sw s3, 40(a5)
-; RV32I-WITH-FP-NEXT:    sw s2, 36(a5)
-; RV32I-WITH-FP-NEXT:    sw t6, 32(a5)
-; RV32I-WITH-FP-NEXT:    sw t5, 28(a5)
+; RV32I-WITH-FP-NEXT:    sw ra, 96(a5)
+; RV32I-WITH-FP-NEXT:    sw s11, 92(a5)
+; RV32I-WITH-FP-NEXT:    sw s10, 88(a5)
+; RV32I-WITH-FP-NEXT:    sw s9, 84(a5)
+; RV32I-WITH-FP-NEXT:    sw s8, 80(a5)
+; RV32I-WITH-FP-NEXT:    sw s7, 76(a5)
+; RV32I-WITH-FP-NEXT:    sw s6, 72(a5)
+; RV32I-WITH-FP-NEXT:    sw s5, 68(a5)
+; RV32I-WITH-FP-NEXT:    sw s4, 64(a5)
+; RV32I-WITH-FP-NEXT:    sw s3, 60(a5)
+; RV32I-WITH-FP-NEXT:    sw s2, 56(a5)
+; RV32I-WITH-FP-NEXT:    sw s1, 52(a5)
+; RV32I-WITH-FP-NEXT:    sw t6, 48(a5)
+; RV32I-WITH-FP-NEXT:    sw t5, 44(a5)
+; RV32I-WITH-FP-NEXT:    sw t4, 40(a5)
+; RV32I-WITH-FP-NEXT:    sw t3, 36(a5)
+; RV32I-WITH-FP-NEXT:    sw t2, 32(a5)
+; RV32I-WITH-FP-NEXT:    sw t1, 28(a5)
 ; RV32I-WITH-FP-NEXT:    lw a0, -80(s0) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    sw a0, 24(a5)
 ; RV32I-WITH-FP-NEXT:    lw a0, -76(s0) # 4-byte Folded Reload
@@ -279,26 +279,26 @@ define void @callee() nounwind {
 ; RV64I-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    lw a0, 20(a5)
 ; RV64I-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw t4, 24(a5)
-; RV64I-NEXT:    lw t5, 28(a5)
-; RV64I-NEXT:    lw t6, 32(a5)
-; RV64I-NEXT:    lw s2, 36(a5)
-; RV64I-NEXT:    lw s3, 40(a5)
-; RV64I-NEXT:    lw s4, 44(a5)
-; RV64I-NEXT:    lw s5, 48(a5)
-; RV64I-NEXT:    lw s6, 52(a5)
-; RV64I-NEXT:    lw s7, 56(a5)
-; RV64I-NEXT:    lw s8, 60(a5)
-; RV64I-NEXT:    lw s9, 64(a5)
-; RV64I-NEXT:    lw s10, 68(a5)
-; RV64I-NEXT:    lw s11, 72(a5)
-; RV64I-NEXT:    lw ra, 76(a5)
-; RV64I-NEXT:    lw s1, 80(a5)
-; RV64I-NEXT:    lw t3, 84(a5)
-; RV64I-NEXT:    lw t2, 88(a5)
-; RV64I-NEXT:    lw t1, 92(a5)
-; RV64I-NEXT:    lw t0, 96(a5)
-; RV64I-NEXT:    lw s0, 100(a5)
+; RV64I-NEXT:    lw t0, 24(a5)
+; RV64I-NEXT:    lw t1, 28(a5)
+; RV64I-NEXT:    lw t2, 32(a5)
+; RV64I-NEXT:    lw t3, 36(a5)
+; RV64I-NEXT:    lw t4, 40(a5)
+; RV64I-NEXT:    lw t5, 44(a5)
+; RV64I-NEXT:    lw t6, 48(a5)
+; RV64I-NEXT:    lw s0, 52(a5)
+; RV64I-NEXT:    lw s1, 56(a5)
+; RV64I-NEXT:    lw s2, 60(a5)
+; RV64I-NEXT:    lw s3, 64(a5)
+; RV64I-NEXT:    lw s4, 68(a5)
+; RV64I-NEXT:    lw s5, 72(a5)
+; RV64I-NEXT:    lw s6, 76(a5)
+; RV64I-NEXT:    lw s7, 80(a5)
+; RV64I-NEXT:    lw s8, 84(a5)
+; RV64I-NEXT:    lw s9, 88(a5)
+; RV64I-NEXT:    lw s10, 92(a5)
+; RV64I-NEXT:    lw s11, 96(a5)
+; RV64I-NEXT:    lw ra, 100(a5)
 ; RV64I-NEXT:    lw a6, 104(a5)
 ; RV64I-NEXT:    lw a4, 108(a5)
 ; RV64I-NEXT:    lw a0, 124(a5)
@@ -311,26 +311,26 @@ define void @callee() nounwind {
 ; RV64I-NEXT:    sw a3, 112(a5)
 ; RV64I-NEXT:    sw a4, 108(a5)
 ; RV64I-NEXT:    sw a6, 104(a5)
-; RV64I-NEXT:    sw s0, 100(a5)
-; RV64I-NEXT:    sw t0, 96(a5)
-; RV64I-NEXT:    sw t1, 92(a5)
-; RV64I-NEXT:    sw t2, 88(a5)
-; RV64I-NEXT:    sw t3, 84(a5)
-; RV64I-NEXT:    sw s1, 80(a5)
-; RV64I-NEXT:    sw ra, 76(a5)
-; RV64I-NEXT:    sw s11, 72(a5)
-; RV64I-NEXT:    sw s10, 68(a5)
-; RV64I-NEXT:    sw s9, 64(a5)
-; RV64I-NEXT:    sw s8, 60(a5)
-; RV64I-NEXT:    sw s7, 56(a5)
-; RV64I-NEXT:    sw s6, 52(a5)
-; RV64I-NEXT:    sw s5, 48(a5)
-; RV64I-NEXT:    sw s4, 44(a5)
-; RV64I-NEXT:    sw s3, 40(a5)
-; RV64I-NEXT:    sw s2, 36(a5)
-; RV64I-NEXT:    sw t6, 32(a5)
-; RV64I-NEXT:    sw t5, 28(a5)
-; RV64I-NEXT:    sw t4, 24(a5)
+; RV64I-NEXT:    sw ra, 100(a5)
+; RV64I-NEXT:    sw s11, 96(a5)
+; RV64I-NEXT:    sw s10, 92(a5)
+; RV64I-NEXT:    sw s9, 88(a5)
+; RV64I-NEXT:    sw s8, 84(a5)
+; RV64I-NEXT:    sw s7, 80(a5)
+; RV64I-NEXT:    sw s6, 76(a5)
+; RV64I-NEXT:    sw s5, 72(a5)
+; RV64I-NEXT:    sw s4, 68(a5)
+; RV64I-NEXT:    sw s3, 64(a5)
+; RV64I-NEXT:    sw s2, 60(a5)
+; RV64I-NEXT:    sw s1, 56(a5)
+; RV64I-NEXT:    sw s0, 52(a5)
+; RV64I-NEXT:    sw t6, 48(a5)
+; RV64I-NEXT:    sw t5, 44(a5)
+; RV64I-NEXT:    sw t4, 40(a5)
+; RV64I-NEXT:    sw t3, 36(a5)
+; RV64I-NEXT:    sw t2, 32(a5)
+; RV64I-NEXT:    sw t1, 28(a5)
+; RV64I-NEXT:    sw t0, 24(a5)
 ; RV64I-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    sw a0, 20(a5)
 ; RV64I-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
@@ -392,24 +392,24 @@ define void @callee() nounwind {
 ; RV64I-WITH-FP-NEXT:    sd a0, -152(s0) # 8-byte Folded Spill
 ; RV64I-WITH-FP-NEXT:    lw a0, 24(a5)
 ; RV64I-WITH-FP-NEXT:    sd a0, -160(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw t5, 28(a5)
-; RV64I-WITH-FP-NEXT:    lw t6, 32(a5)
-; RV64I-WITH-FP-NEXT:    lw s2, 36(a5)
-; RV64I-WITH-FP-NEXT:    lw s3, 40(a5)
-; RV64I-WITH-FP-NEXT:    lw s4, 44(a5)
-; RV64I-WITH-FP-NEXT:    lw s5, 48(a5)
-; RV64I-WITH-FP-NEXT:    lw s6, 52(a5)
-; RV64I-WITH-FP-NEXT:    lw s7, 56(a5)
-; RV64I-WITH-FP-NEXT:    lw s8, 60(a5)
-; RV64I-WITH-FP-NEXT:    lw s9, 64(a5)
-; RV64I-WITH-FP-NEXT:    lw s10, 68(a5)
-; RV64I-WITH-FP-NEXT:    lw s11, 72(a5)
-; RV64I-WITH-FP-NEXT:    lw ra, 76(a5)
-; RV64I-WITH-FP-NEXT:    lw t4, 80(a5)
-; RV64I-WITH-FP-NEXT:    lw t3, 84(a5)
-; RV64I-WITH-FP-NEXT:    lw t2, 88(a5)
-; RV64I-WITH-FP-NEXT:    lw s1, 92(a5)
-; RV64I-WITH-FP-NEXT:    lw t1, 96(a5)
+; RV64I-WITH-FP-NEXT:    lw t1, 28(a5)
+; RV64I-WITH-FP-NEXT:    lw t2, 32(a5)
+; RV64I-WITH-FP-NEXT:    lw t3, 36(a5)
+; RV64I-WITH-FP-NEXT:    lw t4, 40(a5)
+; RV64I-WITH-FP-NEXT:    lw t5, 44(a5)
+; RV64I-WITH-FP-NEXT:    lw t6, 48(a5)
+; RV64I-WITH-FP-NEXT:    lw s1, 52(a5)
+; RV64I-WITH-FP-NEXT:    lw s2, 56(a5)
+; RV64I-WITH-FP-NEXT:    lw s3, 60(a5)
+; RV64I-WITH-FP-NEXT:    lw s4, 64(a5)
+; RV64I-WITH-FP-NEXT:    lw s5, 68(a5)
+; RV64I-WITH-FP-NEXT:    lw s6, 72(a5)
+; RV64I-WITH-FP-NEXT:    lw s7, 76(a5)
+; RV64I-WITH-FP-NEXT:    lw s8, 80(a5)
+; RV64I-WITH-FP-NEXT:    lw s9, 84(a5)
+; RV64I-WITH-FP-NEXT:    lw s10, 88(a5)
+; RV64I-WITH-FP-NEXT:    lw s11, 92(a5)
+; RV64I-WITH-FP-NEXT:    lw ra, 96(a5)
 ; RV64I-WITH-FP-NEXT:    lw t0, 100(a5)
 ; RV64I-WITH-FP-NEXT:    lw a6, 104(a5)
 ; RV64I-WITH-FP-NEXT:    lw a4, 108(a5)
@@ -424,24 +424,24 @@ define void @callee() nounwind {
 ; RV64I-WITH-FP-NEXT:    sw a4, 108(a5)
 ; RV64I-WITH-FP-NEXT:    sw a6, 104(a5)
 ; RV64I-WITH-FP-NEXT:    sw t0, 100(a5)
-; RV64I-WITH-FP-NEXT:    sw t1, 96(a5)
-; RV64I-WITH-FP-NEXT:    sw s1, 92(a5)
-; RV64I-WITH-FP-NEXT:    sw t2, 88(a5)
-; RV64I-WITH-FP-NEXT:    sw t3, 84(a5)
-; RV64I-WITH-FP-NEXT:    sw t4, 80(a5)
-; RV64I-WITH-FP-NEXT:    sw ra, 76(a5)
-; RV64I-WITH-FP-NEXT:    sw s11, 72(a5)
-; RV64I-WITH-FP-NEXT:    sw s10, 68(a5)
-; RV64I-WITH-FP-NEXT:    sw s9, 64(a5)
-; RV64I-WITH-FP-NEXT:    sw s8, 60(a5)
-; RV64I-WITH-FP-NEXT:    sw s7, 56(a5)
-; RV64I-WITH-FP-NEXT:    sw s6, 52(a5)
-; RV64I-WITH-FP-NEXT:    sw s5, 48(a5)
-; RV64I-WITH-FP-NEXT:    sw s4, 44(a5)
-; RV64I-WITH-FP-NEXT:    sw s3, 40(a5)
-; RV64I-WITH-FP-NEXT:    sw s2, 36(a5)
-; RV64I-WITH-FP-NEXT:    sw t6, 32(a5)
-; RV64I-WITH-FP-NEXT:    sw t5, 28(a5)
+; RV64I-WITH-FP-NEXT:    sw ra, 96(a5)
+; RV64I-WITH-FP-NEXT:    sw s11, 92(a5)
+; RV64I-WITH-FP-NEXT:    sw s10, 88(a5)
+; RV64I-WITH-FP-NEXT:    sw s9, 84(a5)
+; RV64I-WITH-FP-NEXT:    sw s8, 80(a5)
+; RV64I-WITH-FP-NEXT:    sw s7, 76(a5)
+; RV64I-WITH-FP-NEXT:    sw s6, 72(a5)
+; RV64I-WITH-FP-NEXT:    sw s5, 68(a5)
+; RV64I-WITH-FP-NEXT:    sw s4, 64(a5)
+; RV64I-WITH-FP-NEXT:    sw s3, 60(a5)
+; RV64I-WITH-FP-NEXT:    sw s2, 56(a5)
+; RV64I-WITH-FP-NEXT:    sw s1, 52(a5)
+; RV64I-WITH-FP-NEXT:    sw t6, 48(a5)
+; RV64I-WITH-FP-NEXT:    sw t5, 44(a5)
+; RV64I-WITH-FP-NEXT:    sw t4, 40(a5)
+; RV64I-WITH-FP-NEXT:    sw t3, 36(a5)
+; RV64I-WITH-FP-NEXT:    sw t2, 32(a5)
+; RV64I-WITH-FP-NEXT:    sw t1, 28(a5)
 ; RV64I-WITH-FP-NEXT:    ld a0, -160(s0) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    sw a0, 24(a5)
 ; RV64I-WITH-FP-NEXT:    ld a0, -152(s0) # 8-byte Folded Reload
@@ -505,100 +505,100 @@ define void @caller() nounwind {
 ; RV32I-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lw a0, %lo(var+12)(s0)
 ; RV32I-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    addi s1, s0, %lo(var)
-; RV32I-NEXT:    lw a0, 16(s1)
+; RV32I-NEXT:    addi s5, s0, %lo(var)
+; RV32I-NEXT:    lw a0, 16(s5)
 ; RV32I-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 20(s1)
+; RV32I-NEXT:    lw a0, 20(s5)
 ; RV32I-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 24(s1)
+; RV32I-NEXT:    lw a0, 24(s5)
 ; RV32I-NEXT:    sw a0, 64(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 28(s1)
+; RV32I-NEXT:    lw a0, 28(s5)
 ; RV32I-NEXT:    sw a0, 60(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 32(s1)
+; RV32I-NEXT:    lw a0, 32(s5)
 ; RV32I-NEXT:    sw a0, 56(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 36(s1)
+; RV32I-NEXT:    lw a0, 36(s5)
 ; RV32I-NEXT:    sw a0, 52(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 40(s1)
+; RV32I-NEXT:    lw a0, 40(s5)
 ; RV32I-NEXT:    sw a0, 48(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 44(s1)
+; RV32I-NEXT:    lw a0, 44(s5)
 ; RV32I-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 48(s1)
+; RV32I-NEXT:    lw a0, 48(s5)
 ; RV32I-NEXT:    sw a0, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 52(s1)
+; RV32I-NEXT:    lw a0, 52(s5)
 ; RV32I-NEXT:    sw a0, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 56(s1)
+; RV32I-NEXT:    lw a0, 56(s5)
 ; RV32I-NEXT:    sw a0, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 60(s1)
+; RV32I-NEXT:    lw a0, 60(s5)
 ; RV32I-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 64(s1)
+; RV32I-NEXT:    lw a0, 64(s5)
 ; RV32I-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 68(s1)
+; RV32I-NEXT:    lw a0, 68(s5)
 ; RV32I-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 72(s1)
+; RV32I-NEXT:    lw a0, 72(s5)
 ; RV32I-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 76(s1)
+; RV32I-NEXT:    lw a0, 76(s5)
 ; RV32I-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 80(s1)
+; RV32I-NEXT:    lw a0, 80(s5)
 ; RV32I-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 84(s1)
+; RV32I-NEXT:    lw a0, 84(s5)
 ; RV32I-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw s4, 88(s1)
-; RV32I-NEXT:    lw s5, 92(s1)
-; RV32I-NEXT:    lw s6, 96(s1)
-; RV32I-NEXT:    lw s7, 100(s1)
-; RV32I-NEXT:    lw s8, 104(s1)
-; RV32I-NEXT:    lw s9, 108(s1)
-; RV32I-NEXT:    lw s10, 112(s1)
-; RV32I-NEXT:    lw s11, 116(s1)
-; RV32I-NEXT:    lw s2, 120(s1)
-; RV32I-NEXT:    lw s3, 124(s1)
+; RV32I-NEXT:    lw s3, 88(s5)
+; RV32I-NEXT:    lw s4, 92(s5)
+; RV32I-NEXT:    lw s6, 96(s5)
+; RV32I-NEXT:    lw s7, 100(s5)
+; RV32I-NEXT:    lw s8, 104(s5)
+; RV32I-NEXT:    lw s9, 108(s5)
+; RV32I-NEXT:    lw s10, 112(s5)
+; RV32I-NEXT:    lw s11, 116(s5)
+; RV32I-NEXT:    lw s1, 120(s5)
+; RV32I-NEXT:    lw s2, 124(s5)
 ; RV32I-NEXT:    call callee@plt
-; RV32I-NEXT:    sw s3, 124(s1)
-; RV32I-NEXT:    sw s2, 120(s1)
-; RV32I-NEXT:    sw s11, 116(s1)
-; RV32I-NEXT:    sw s10, 112(s1)
-; RV32I-NEXT:    sw s9, 108(s1)
-; RV32I-NEXT:    sw s8, 104(s1)
-; RV32I-NEXT:    sw s7, 100(s1)
-; RV32I-NEXT:    sw s6, 96(s1)
-; RV32I-NEXT:    sw s5, 92(s1)
-; RV32I-NEXT:    sw s4, 88(s1)
+; RV32I-NEXT:    sw s2, 124(s5)
+; RV32I-NEXT:    sw s1, 120(s5)
+; RV32I-NEXT:    sw s11, 116(s5)
+; RV32I-NEXT:    sw s10, 112(s5)
+; RV32I-NEXT:    sw s9, 108(s5)
+; RV32I-NEXT:    sw s8, 104(s5)
+; RV32I-NEXT:    sw s7, 100(s5)
+; RV32I-NEXT:    sw s6, 96(s5)
+; RV32I-NEXT:    sw s4, 92(s5)
+; RV32I-NEXT:    sw s3, 88(s5)
 ; RV32I-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 84(s1)
+; RV32I-NEXT:    sw a0, 84(s5)
 ; RV32I-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 80(s1)
+; RV32I-NEXT:    sw a0, 80(s5)
 ; RV32I-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 76(s1)
+; RV32I-NEXT:    sw a0, 76(s5)
 ; RV32I-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 72(s1)
+; RV32I-NEXT:    sw a0, 72(s5)
 ; RV32I-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 68(s1)
+; RV32I-NEXT:    sw a0, 68(s5)
 ; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 64(s1)
+; RV32I-NEXT:    sw a0, 64(s5)
 ; RV32I-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 60(s1)
+; RV32I-NEXT:    sw a0, 60(s5)
 ; RV32I-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 56(s1)
+; RV32I-NEXT:    sw a0, 56(s5)
 ; RV32I-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 52(s1)
+; RV32I-NEXT:    sw a0, 52(s5)
 ; RV32I-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 48(s1)
+; RV32I-NEXT:    sw a0, 48(s5)
 ; RV32I-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 44(s1)
+; RV32I-NEXT:    sw a0, 44(s5)
 ; RV32I-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 40(s1)
+; RV32I-NEXT:    sw a0, 40(s5)
 ; RV32I-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 36(s1)
+; RV32I-NEXT:    sw a0, 36(s5)
 ; RV32I-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 32(s1)
+; RV32I-NEXT:    sw a0, 32(s5)
 ; RV32I-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 28(s1)
+; RV32I-NEXT:    sw a0, 28(s5)
 ; RV32I-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 24(s1)
+; RV32I-NEXT:    sw a0, 24(s5)
 ; RV32I-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 20(s1)
+; RV32I-NEXT:    sw a0, 20(s5)
 ; RV32I-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 16(s1)
+; RV32I-NEXT:    sw a0, 16(s5)
 ; RV32I-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    sw a0, %lo(var+12)(s0)
 ; RV32I-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
@@ -640,119 +640,119 @@ define void @caller() nounwind {
 ; RV32I-WITH-FP-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
 ; RV32I-WITH-FP-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
 ; RV32I-WITH-FP-NEXT:    addi s0, sp, 144
-; RV32I-WITH-FP-NEXT:    lui s6, %hi(var)
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var)(s6)
+; RV32I-WITH-FP-NEXT:    lui s1, %hi(var)
+; RV32I-WITH-FP-NEXT:    lw a0, %lo(var)(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -56(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+4)(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+4)(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -60(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+8)(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+8)(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -64(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+12)(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+12)(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -68(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    addi s1, s6, %lo(var)
-; RV32I-WITH-FP-NEXT:    lw a0, 16(s1)
+; RV32I-WITH-FP-NEXT:    addi s6, s1, %lo(var)
+; RV32I-WITH-FP-NEXT:    lw a0, 16(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -72(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 20(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 20(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -76(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 24(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 24(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -80(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 28(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 28(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -84(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 32(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 32(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -88(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 36(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 36(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -92(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 40(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 40(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -96(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 44(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 44(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -100(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 48(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 48(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -104(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 52(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 52(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -108(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 56(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 56(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -112(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 60(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 60(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -116(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 64(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 64(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -120(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 68(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 68(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -124(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 72(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 72(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -128(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 76(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 76(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -132(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 80(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 80(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -136(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 84(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 84(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -140(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 88(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 88(s6)
 ; RV32I-WITH-FP-NEXT:    sw a0, -144(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw s8, 92(s1)
-; RV32I-WITH-FP-NEXT:    lw s9, 96(s1)
-; RV32I-WITH-FP-NEXT:    lw s10, 100(s1)
-; RV32I-WITH-FP-NEXT:    lw s11, 104(s1)
-; RV32I-WITH-FP-NEXT:    lw s2, 108(s1)
-; RV32I-WITH-FP-NEXT:    lw s3, 112(s1)
-; RV32I-WITH-FP-NEXT:    lw s4, 116(s1)
-; RV32I-WITH-FP-NEXT:    lw s5, 120(s1)
-; RV32I-WITH-FP-NEXT:    lw s7, 124(s1)
+; RV32I-WITH-FP-NEXT:    lw s8, 92(s6)
+; RV32I-WITH-FP-NEXT:    lw s9, 96(s6)
+; RV32I-WITH-FP-NEXT:    lw s10, 100(s6)
+; RV32I-WITH-FP-NEXT:    lw s11, 104(s6)
+; RV32I-WITH-FP-NEXT:    lw s2, 108(s6)
+; RV32I-WITH-FP-NEXT:    lw s3, 112(s6)
+; RV32I-WITH-FP-NEXT:    lw s4, 116(s6)
+; RV32I-WITH-FP-NEXT:    lw s5, 120(s6)
+; RV32I-WITH-FP-NEXT:    lw s7, 124(s6)
 ; RV32I-WITH-FP-NEXT:    call callee@plt
-; RV32I-WITH-FP-NEXT:    sw s7, 124(s1)
-; RV32I-WITH-FP-NEXT:    sw s5, 120(s1)
-; RV32I-WITH-FP-NEXT:    sw s4, 116(s1)
-; RV32I-WITH-FP-NEXT:    sw s3, 112(s1)
-; RV32I-WITH-FP-NEXT:    sw s2, 108(s1)
-; RV32I-WITH-FP-NEXT:    sw s11, 104(s1)
-; RV32I-WITH-FP-NEXT:    sw s10, 100(s1)
-; RV32I-WITH-FP-NEXT:    sw s9, 96(s1)
-; RV32I-WITH-FP-NEXT:    sw s8, 92(s1)
+; RV32I-WITH-FP-NEXT:    sw s7, 124(s6)
+; RV32I-WITH-FP-NEXT:    sw s5, 120(s6)
+; RV32I-WITH-FP-NEXT:    sw s4, 116(s6)
+; RV32I-WITH-FP-NEXT:    sw s3, 112(s6)
+; RV32I-WITH-FP-NEXT:    sw s2, 108(s6)
+; RV32I-WITH-FP-NEXT:    sw s11, 104(s6)
+; RV32I-WITH-FP-NEXT:    sw s10, 100(s6)
+; RV32I-WITH-FP-NEXT:    sw s9, 96(s6)
+; RV32I-WITH-FP-NEXT:    sw s8, 92(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -144(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 88(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 88(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -140(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 84(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 84(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -136(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 80(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 80(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -132(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 76(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 76(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -128(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 72(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 72(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -124(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 68(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 68(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -120(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 64(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 64(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -116(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 60(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 60(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -112(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 56(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 56(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -108(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 52(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 52(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -104(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 48(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 48(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -100(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 44(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 44(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -96(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 40(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 40(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -92(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 36(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 36(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -88(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 32(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 32(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -84(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 28(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 28(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -80(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 24(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 24(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -76(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 20(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 20(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -72(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 16(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 16(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -68(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+12)(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+12)(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -64(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+8)(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+8)(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -60(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+4)(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+4)(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -56(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var)(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, %lo(var)(s1)
 ; RV32I-WITH-FP-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
@@ -794,100 +794,100 @@ define void @caller() nounwind {
 ; RV64I-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    lw a0, %lo(var+12)(s0)
 ; RV64I-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    addi s1, s0, %lo(var)
-; RV64I-NEXT:    lw a0, 16(s1)
+; RV64I-NEXT:    addi s5, s0, %lo(var)
+; RV64I-NEXT:    lw a0, 16(s5)
 ; RV64I-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 20(s1)
+; RV64I-NEXT:    lw a0, 20(s5)
 ; RV64I-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 24(s1)
+; RV64I-NEXT:    lw a0, 24(s5)
 ; RV64I-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 28(s1)
+; RV64I-NEXT:    lw a0, 28(s5)
 ; RV64I-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 32(s1)
+; RV64I-NEXT:    lw a0, 32(s5)
 ; RV64I-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 36(s1)
+; RV64I-NEXT:    lw a0, 36(s5)
 ; RV64I-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 40(s1)
+; RV64I-NEXT:    lw a0, 40(s5)
 ; RV64I-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 44(s1)
+; RV64I-NEXT:    lw a0, 44(s5)
 ; RV64I-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 48(s1)
+; RV64I-NEXT:    lw a0, 48(s5)
 ; RV64I-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 52(s1)
+; RV64I-NEXT:    lw a0, 52(s5)
 ; RV64I-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 56(s1)
+; RV64I-NEXT:    lw a0, 56(s5)
 ; RV64I-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 60(s1)
+; RV64I-NEXT:    lw a0, 60(s5)
 ; RV64I-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 64(s1)
+; RV64I-NEXT:    lw a0, 64(s5)
 ; RV64I-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 68(s1)
+; RV64I-NEXT:    lw a0, 68(s5)
 ; RV64I-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 72(s1)
+; RV64I-NEXT:    lw a0, 72(s5)
 ; RV64I-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 76(s1)
+; RV64I-NEXT:    lw a0, 76(s5)
 ; RV64I-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 80(s1)
+; RV64I-NEXT:    lw a0, 80(s5)
 ; RV64I-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 84(s1)
+; RV64I-NEXT:    lw a0, 84(s5)
 ; RV64I-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw s4, 88(s1)
-; RV64I-NEXT:    lw s5, 92(s1)
-; RV64I-NEXT:    lw s6, 96(s1)
-; RV64I-NEXT:    lw s7, 100(s1)
-; RV64I-NEXT:    lw s8, 104(s1)
-; RV64I-NEXT:    lw s9, 108(s1)
-; RV64I-NEXT:    lw s10, 112(s1)
-; RV64I-NEXT:    lw s11, 116(s1)
-; RV64I-NEXT:    lw s2, 120(s1)
-; RV64I-NEXT:    lw s3, 124(s1)
+; RV64I-NEXT:    lw s3, 88(s5)
+; RV64I-NEXT:    lw s4, 92(s5)
+; RV64I-NEXT:    lw s6, 96(s5)
+; RV64I-NEXT:    lw s7, 100(s5)
+; RV64I-NEXT:    lw s8, 104(s5)
+; RV64I-NEXT:    lw s9, 108(s5)
+; RV64I-NEXT:    lw s10, 112(s5)
+; RV64I-NEXT:    lw s11, 116(s5)
+; RV64I-NEXT:    lw s1, 120(s5)
+; RV64I-NEXT:    lw s2, 124(s5)
 ; RV64I-NEXT:    call callee@plt
-; RV64I-NEXT:    sw s3, 124(s1)
-; RV64I-NEXT:    sw s2, 120(s1)
-; RV64I-NEXT:    sw s11, 116(s1)
-; RV64I-NEXT:    sw s10, 112(s1)
-; RV64I-NEXT:    sw s9, 108(s1)
-; RV64I-NEXT:    sw s8, 104(s1)
-; RV64I-NEXT:    sw s7, 100(s1)
-; RV64I-NEXT:    sw s6, 96(s1)
-; RV64I-NEXT:    sw s5, 92(s1)
-; RV64I-NEXT:    sw s4, 88(s1)
+; RV64I-NEXT:    sw s2, 124(s5)
+; RV64I-NEXT:    sw s1, 120(s5)
+; RV64I-NEXT:    sw s11, 116(s5)
+; RV64I-NEXT:    sw s10, 112(s5)
+; RV64I-NEXT:    sw s9, 108(s5)
+; RV64I-NEXT:    sw s8, 104(s5)
+; RV64I-NEXT:    sw s7, 100(s5)
+; RV64I-NEXT:    sw s6, 96(s5)
+; RV64I-NEXT:    sw s4, 92(s5)
+; RV64I-NEXT:    sw s3, 88(s5)
 ; RV64I-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 84(s1)
+; RV64I-NEXT:    sw a0, 84(s5)
 ; RV64I-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 80(s1)
+; RV64I-NEXT:    sw a0, 80(s5)
 ; RV64I-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 76(s1)
+; RV64I-NEXT:    sw a0, 76(s5)
 ; RV64I-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 72(s1)
+; RV64I-NEXT:    sw a0, 72(s5)
 ; RV64I-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 68(s1)
+; RV64I-NEXT:    sw a0, 68(s5)
 ; RV64I-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 64(s1)
+; RV64I-NEXT:    sw a0, 64(s5)
 ; RV64I-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 60(s1)
+; RV64I-NEXT:    sw a0, 60(s5)
 ; RV64I-NEXT:    ld a0, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 56(s1)
+; RV64I-NEXT:    sw a0, 56(s5)
 ; RV64I-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 52(s1)
+; RV64I-NEXT:    sw a0, 52(s5)
 ; RV64I-NEXT:    ld a0, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 48(s1)
+; RV64I-NEXT:    sw a0, 48(s5)
 ; RV64I-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 44(s1)
+; RV64I-NEXT:    sw a0, 44(s5)
 ; RV64I-NEXT:    ld a0, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 40(s1)
+; RV64I-NEXT:    sw a0, 40(s5)
 ; RV64I-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 36(s1)
+; RV64I-NEXT:    sw a0, 36(s5)
 ; RV64I-NEXT:    ld a0, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 32(s1)
+; RV64I-NEXT:    sw a0, 32(s5)
 ; RV64I-NEXT:    ld a0, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 28(s1)
+; RV64I-NEXT:    sw a0, 28(s5)
 ; RV64I-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 24(s1)
+; RV64I-NEXT:    sw a0, 24(s5)
 ; RV64I-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 20(s1)
+; RV64I-NEXT:    sw a0, 20(s5)
 ; RV64I-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 16(s1)
+; RV64I-NEXT:    sw a0, 16(s5)
 ; RV64I-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    sw a0, %lo(var+12)(s0)
 ; RV64I-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
@@ -929,119 +929,119 @@ define void @caller() nounwind {
 ; RV64I-WITH-FP-NEXT:    sd s10, 192(sp) # 8-byte Folded Spill
 ; RV64I-WITH-FP-NEXT:    sd s11, 184(sp) # 8-byte Folded Spill
 ; RV64I-WITH-FP-NEXT:    addi s0, sp, 288
-; RV64I-WITH-FP-NEXT:    lui s6, %hi(var)
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var)(s6)
+; RV64I-WITH-FP-NEXT:    lui s1, %hi(var)
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var)(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -112(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+4)(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+4)(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -120(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+8)(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+8)(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -128(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+12)(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+12)(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -136(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    addi s1, s6, %lo(var)
-; RV64I-WITH-FP-NEXT:    lw a0, 16(s1)
+; RV64I-WITH-FP-NEXT:    addi s6, s1, %lo(var)
+; RV64I-WITH-FP-NEXT:    lw a0, 16(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -144(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 20(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 20(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -152(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 24(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 24(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -160(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 28(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 28(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -168(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 32(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 32(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -176(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 36(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 36(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -184(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 40(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 40(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -192(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 44(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 44(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -200(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 48(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 48(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -208(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 52(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 52(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -216(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 56(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 56(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -224(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 60(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 60(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -232(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 64(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 64(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -240(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 68(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 68(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -248(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 72(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 72(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -256(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 76(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 76(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -264(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 80(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 80(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -272(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 84(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 84(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -280(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 88(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 88(s6)
 ; RV64I-WITH-FP-NEXT:    sd a0, -288(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw s8, 92(s1)
-; RV64I-WITH-FP-NEXT:    lw s9, 96(s1)
-; RV64I-WITH-FP-NEXT:    lw s10, 100(s1)
-; RV64I-WITH-FP-NEXT:    lw s11, 104(s1)
-; RV64I-WITH-FP-NEXT:    lw s2, 108(s1)
-; RV64I-WITH-FP-NEXT:    lw s3, 112(s1)
-; RV64I-WITH-FP-NEXT:    lw s4, 116(s1)
-; RV64I-WITH-FP-NEXT:    lw s5, 120(s1)
-; RV64I-WITH-FP-NEXT:    lw s7, 124(s1)
+; RV64I-WITH-FP-NEXT:    lw s8, 92(s6)
+; RV64I-WITH-FP-NEXT:    lw s9, 96(s6)
+; RV64I-WITH-FP-NEXT:    lw s10, 100(s6)
+; RV64I-WITH-FP-NEXT:    lw s11, 104(s6)
+; RV64I-WITH-FP-NEXT:    lw s2, 108(s6)
+; RV64I-WITH-FP-NEXT:    lw s3, 112(s6)
+; RV64I-WITH-FP-NEXT:    lw s4, 116(s6)
+; RV64I-WITH-FP-NEXT:    lw s5, 120(s6)
+; RV64I-WITH-FP-NEXT:    lw s7, 124(s6)
 ; RV64I-WITH-FP-NEXT:    call callee@plt
-; RV64I-WITH-FP-NEXT:    sw s7, 124(s1)
-; RV64I-WITH-FP-NEXT:    sw s5, 120(s1)
-; RV64I-WITH-FP-NEXT:    sw s4, 116(s1)
-; RV64I-WITH-FP-NEXT:    sw s3, 112(s1)
-; RV64I-WITH-FP-NEXT:    sw s2, 108(s1)
-; RV64I-WITH-FP-NEXT:    sw s11, 104(s1)
-; RV64I-WITH-FP-NEXT:    sw s10, 100(s1)
-; RV64I-WITH-FP-NEXT:    sw s9, 96(s1)
-; RV64I-WITH-FP-NEXT:    sw s8, 92(s1)
+; RV64I-WITH-FP-NEXT:    sw s7, 124(s6)
+; RV64I-WITH-FP-NEXT:    sw s5, 120(s6)
+; RV64I-WITH-FP-NEXT:    sw s4, 116(s6)
+; RV64I-WITH-FP-NEXT:    sw s3, 112(s6)
+; RV64I-WITH-FP-NEXT:    sw s2, 108(s6)
+; RV64I-WITH-FP-NEXT:    sw s11, 104(s6)
+; RV64I-WITH-FP-NEXT:    sw s10, 100(s6)
+; RV64I-WITH-FP-NEXT:    sw s9, 96(s6)
+; RV64I-WITH-FP-NEXT:    sw s8, 92(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -288(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 88(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 88(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -280(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 84(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 84(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -272(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 80(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 80(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -264(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 76(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 76(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -256(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 72(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 72(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -248(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 68(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 68(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -240(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 64(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 64(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -232(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 60(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 60(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -224(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 56(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 56(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -216(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 52(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 52(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -208(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 48(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 48(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -200(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 44(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 44(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -192(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 40(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 40(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -184(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 36(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 36(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -176(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 32(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 32(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -168(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 28(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 28(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -160(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 24(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 24(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -152(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 20(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 20(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -144(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 16(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 16(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -136(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+12)(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+12)(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -128(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+8)(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+8)(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -120(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+4)(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+4)(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -112(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var)(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var)(s1)
 ; RV64I-WITH-FP-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 61a95f3a400a3..0b73661af5c06 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -190,21 +190,21 @@ define i32 @caller_many_scalars() nounwind {
 define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind {
 ; RV32I-FPELIM-LABEL: callee_large_scalars:
 ; RV32I-FPELIM:       # %bb.0:
-; RV32I-FPELIM-NEXT:    lw a6, 0(a1)
-; RV32I-FPELIM-NEXT:    lw a7, 0(a0)
+; RV32I-FPELIM-NEXT:    lw a2, 0(a1)
+; RV32I-FPELIM-NEXT:    lw a3, 0(a0)
 ; RV32I-FPELIM-NEXT:    lw a4, 4(a1)
 ; RV32I-FPELIM-NEXT:    lw a5, 12(a1)
-; RV32I-FPELIM-NEXT:    lw a2, 12(a0)
-; RV32I-FPELIM-NEXT:    lw a3, 4(a0)
+; RV32I-FPELIM-NEXT:    lw a6, 12(a0)
+; RV32I-FPELIM-NEXT:    lw a7, 4(a0)
 ; RV32I-FPELIM-NEXT:    lw a1, 8(a1)
 ; RV32I-FPELIM-NEXT:    lw a0, 8(a0)
-; RV32I-FPELIM-NEXT:    xor a2, a2, a5
-; RV32I-FPELIM-NEXT:    xor a3, a3, a4
-; RV32I-FPELIM-NEXT:    or a2, a3, a2
+; RV32I-FPELIM-NEXT:    xor a5, a6, a5
+; RV32I-FPELIM-NEXT:    xor a4, a7, a4
+; RV32I-FPELIM-NEXT:    or a4, a4, a5
 ; RV32I-FPELIM-NEXT:    xor a0, a0, a1
-; RV32I-FPELIM-NEXT:    xor a1, a7, a6
+; RV32I-FPELIM-NEXT:    xor a1, a3, a2
 ; RV32I-FPELIM-NEXT:    or a0, a1, a0
-; RV32I-FPELIM-NEXT:    or a0, a0, a2
+; RV32I-FPELIM-NEXT:    or a0, a0, a4
 ; RV32I-FPELIM-NEXT:    seqz a0, a0
 ; RV32I-FPELIM-NEXT:    ret
 ;
@@ -214,21 +214,21 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind {
 ; RV32I-WITHFP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32I-WITHFP-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-WITHFP-NEXT:    addi s0, sp, 16
-; RV32I-WITHFP-NEXT:    lw a6, 0(a1)
-; RV32I-WITHFP-NEXT:    lw a7, 0(a0)
+; RV32I-WITHFP-NEXT:    lw a2, 0(a1)
+; RV32I-WITHFP-NEXT:    lw a3, 0(a0)
 ; RV32I-WITHFP-NEXT:    lw a4, 4(a1)
 ; RV32I-WITHFP-NEXT:    lw a5, 12(a1)
-; RV32I-WITHFP-NEXT:    lw a2, 12(a0)
-; RV32I-WITHFP-NEXT:    lw a3, 4(a0)
+; RV32I-WITHFP-NEXT:    lw a6, 12(a0)
+; RV32I-WITHFP-NEXT:    lw a7, 4(a0)
 ; RV32I-WITHFP-NEXT:    lw a1, 8(a1)
 ; RV32I-WITHFP-NEXT:    lw a0, 8(a0)
-; RV32I-WITHFP-NEXT:    xor a2, a2, a5
-; RV32I-WITHFP-NEXT:    xor a3, a3, a4
-; RV32I-WITHFP-NEXT:    or a2, a3, a2
+; RV32I-WITHFP-NEXT:    xor a5, a6, a5
+; RV32I-WITHFP-NEXT:    xor a4, a7, a4
+; RV32I-WITHFP-NEXT:    or a4, a4, a5
 ; RV32I-WITHFP-NEXT:    xor a0, a0, a1
-; RV32I-WITHFP-NEXT:    xor a1, a7, a6
+; RV32I-WITHFP-NEXT:    xor a1, a3, a2
 ; RV32I-WITHFP-NEXT:    or a0, a1, a0
-; RV32I-WITHFP-NEXT:    or a0, a0, a2
+; RV32I-WITHFP-NEXT:    or a0, a0, a4
 ; RV32I-WITHFP-NEXT:    seqz a0, a0
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
@@ -297,21 +297,21 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d,
 ; RV32I-FPELIM-LABEL: callee_large_scalars_exhausted_regs:
 ; RV32I-FPELIM:       # %bb.0:
 ; RV32I-FPELIM-NEXT:    lw a0, 4(sp)
-; RV32I-FPELIM-NEXT:    lw a6, 0(a0)
-; RV32I-FPELIM-NEXT:    lw t0, 0(a7)
+; RV32I-FPELIM-NEXT:    lw a1, 0(a0)
+; RV32I-FPELIM-NEXT:    lw a2, 0(a7)
 ; RV32I-FPELIM-NEXT:    lw a3, 4(a0)
 ; RV32I-FPELIM-NEXT:    lw a4, 12(a0)
 ; RV32I-FPELIM-NEXT:    lw a5, 12(a7)
-; RV32I-FPELIM-NEXT:    lw a1, 4(a7)
+; RV32I-FPELIM-NEXT:    lw a6, 4(a7)
 ; RV32I-FPELIM-NEXT:    lw a0, 8(a0)
-; RV32I-FPELIM-NEXT:    lw a2, 8(a7)
+; RV32I-FPELIM-NEXT:    lw a7, 8(a7)
 ; RV32I-FPELIM-NEXT:    xor a4, a5, a4
-; RV32I-FPELIM-NEXT:    xor a1, a1, a3
-; RV32I-FPELIM-NEXT:    or a1, a1, a4
-; RV32I-FPELIM-NEXT:    xor a0, a2, a0
-; RV32I-FPELIM-NEXT:    xor a2, t0, a6
-; RV32I-FPELIM-NEXT:    or a0, a2, a0
-; RV32I-FPELIM-NEXT:    or a0, a0, a1
+; RV32I-FPELIM-NEXT:    xor a3, a6, a3
+; RV32I-FPELIM-NEXT:    or a3, a3, a4
+; RV32I-FPELIM-NEXT:    xor a0, a7, a0
+; RV32I-FPELIM-NEXT:    xor a1, a2, a1
+; RV32I-FPELIM-NEXT:    or a0, a1, a0
+; RV32I-FPELIM-NEXT:    or a0, a0, a3
 ; RV32I-FPELIM-NEXT:    seqz a0, a0
 ; RV32I-FPELIM-NEXT:    ret
 ;
@@ -322,21 +322,21 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d,
 ; RV32I-WITHFP-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-WITHFP-NEXT:    addi s0, sp, 16
 ; RV32I-WITHFP-NEXT:    lw a0, 4(s0)
-; RV32I-WITHFP-NEXT:    lw a6, 0(a0)
-; RV32I-WITHFP-NEXT:    lw t0, 0(a7)
+; RV32I-WITHFP-NEXT:    lw a1, 0(a0)
+; RV32I-WITHFP-NEXT:    lw a2, 0(a7)
 ; RV32I-WITHFP-NEXT:    lw a3, 4(a0)
 ; RV32I-WITHFP-NEXT:    lw a4, 12(a0)
 ; RV32I-WITHFP-NEXT:    lw a5, 12(a7)
-; RV32I-WITHFP-NEXT:    lw a1, 4(a7)
+; RV32I-WITHFP-NEXT:    lw a6, 4(a7)
 ; RV32I-WITHFP-NEXT:    lw a0, 8(a0)
-; RV32I-WITHFP-NEXT:    lw a2, 8(a7)
+; RV32I-WITHFP-NEXT:    lw a7, 8(a7)
 ; RV32I-WITHFP-NEXT:    xor a4, a5, a4
-; RV32I-WITHFP-NEXT:    xor a1, a1, a3
-; RV32I-WITHFP-NEXT:    or a1, a1, a4
-; RV32I-WITHFP-NEXT:    xor a0, a2, a0
-; RV32I-WITHFP-NEXT:    xor a2, t0, a6
-; RV32I-WITHFP-NEXT:    or a0, a2, a0
-; RV32I-WITHFP-NEXT:    or a0, a0, a1
+; RV32I-WITHFP-NEXT:    xor a3, a6, a3
+; RV32I-WITHFP-NEXT:    or a3, a3, a4
+; RV32I-WITHFP-NEXT:    xor a0, a7, a0
+; RV32I-WITHFP-NEXT:    xor a1, a2, a1
+; RV32I-WITHFP-NEXT:    or a0, a1, a0
+; RV32I-WITHFP-NEXT:    or a0, a0, a3
 ; RV32I-WITHFP-NEXT:    seqz a0, a0
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll
index c3c4666d0f504..f5d316e684740 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -106,21 +106,21 @@ define i32 @caller_many_scalars() nounwind {
 define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind {
 ; RV64I-LABEL: callee_large_scalars:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    ld a6, 0(a1)
-; RV64I-NEXT:    ld a7, 0(a0)
+; RV64I-NEXT:    ld a2, 0(a1)
+; RV64I-NEXT:    ld a3, 0(a0)
 ; RV64I-NEXT:    ld a4, 8(a1)
 ; RV64I-NEXT:    ld a5, 24(a1)
-; RV64I-NEXT:    ld a2, 24(a0)
-; RV64I-NEXT:    ld a3, 8(a0)
+; RV64I-NEXT:    ld a6, 24(a0)
+; RV64I-NEXT:    ld a7, 8(a0)
 ; RV64I-NEXT:    ld a1, 16(a1)
 ; RV64I-NEXT:    ld a0, 16(a0)
-; RV64I-NEXT:    xor a2, a2, a5
-; RV64I-NEXT:    xor a3, a3, a4
-; RV64I-NEXT:    or a2, a3, a2
+; RV64I-NEXT:    xor a5, a6, a5
+; RV64I-NEXT:    xor a4, a7, a4
+; RV64I-NEXT:    or a4, a4, a5
 ; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    xor a1, a7, a6
+; RV64I-NEXT:    xor a1, a3, a2
 ; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a4
 ; RV64I-NEXT:    seqz a0, a0
 ; RV64I-NEXT:    ret
   %1 = icmp eq i256 %a, %b
@@ -161,21 +161,21 @@ define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d,
 ; RV64I-LABEL: callee_large_scalars_exhausted_regs:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    ld a0, 8(sp)
-; RV64I-NEXT:    ld a6, 0(a0)
-; RV64I-NEXT:    ld t0, 0(a7)
+; RV64I-NEXT:    ld a1, 0(a0)
+; RV64I-NEXT:    ld a2, 0(a7)
 ; RV64I-NEXT:    ld a3, 8(a0)
 ; RV64I-NEXT:    ld a4, 24(a0)
 ; RV64I-NEXT:    ld a5, 24(a7)
-; RV64I-NEXT:    ld a1, 8(a7)
+; RV64I-NEXT:    ld a6, 8(a7)
 ; RV64I-NEXT:    ld a0, 16(a0)
-; RV64I-NEXT:    ld a2, 16(a7)
+; RV64I-NEXT:    ld a7, 16(a7)
 ; RV64I-NEXT:    xor a4, a5, a4
-; RV64I-NEXT:    xor a1, a1, a3
-; RV64I-NEXT:    or a1, a1, a4
-; RV64I-NEXT:    xor a0, a2, a0
-; RV64I-NEXT:    xor a2, t0, a6
-; RV64I-NEXT:    or a0, a2, a0
-; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    xor a3, a6, a3
+; RV64I-NEXT:    or a3, a3, a4
+; RV64I-NEXT:    xor a0, a7, a0
+; RV64I-NEXT:    xor a1, a2, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    or a0, a0, a3
 ; RV64I-NEXT:    seqz a0, a0
 ; RV64I-NEXT:    ret
   %1 = icmp eq i256 %h, %j
diff --git a/llvm/test/CodeGen/RISCV/double-arith-strict.ll b/llvm/test/CodeGen/RISCV/double-arith-strict.ll
index 9f207cde78f61..915b63588d5a7 100644
--- a/llvm/test/CodeGen/RISCV/double-arith-strict.ll
+++ b/llvm/test/CodeGen/RISCV/double-arith-strict.ll
@@ -313,10 +313,10 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind strictfp {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a3
-; RV32I-NEXT:    mv s3, a2
-; RV32I-NEXT:    mv s0, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
 ; RV32I-NEXT:    li a2, 0
@@ -325,10 +325,10 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind strictfp {
 ; RV32I-NEXT:    mv a4, a0
 ; RV32I-NEXT:    lui a0, 524288
 ; RV32I-NEXT:    xor a5, a1, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a2, s3
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call fma@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -393,25 +393,25 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind strictfp {
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s4, a5
-; RV32I-NEXT:    mv s5, a4
+; RV32I-NEXT:    mv s0, a5
+; RV32I-NEXT:    mv s1, a4
 ; RV32I-NEXT:    mv s2, a3
 ; RV32I-NEXT:    mv s3, a2
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __adddf3@plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    mv s1, a1
-; RV32I-NEXT:    mv a0, s5
-; RV32I-NEXT:    mv a1, s4
+; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv s5, a1
+; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __adddf3@plt
 ; RV32I-NEXT:    mv a4, a0
 ; RV32I-NEXT:    lui a0, 524288
-; RV32I-NEXT:    xor a2, s1, a0
+; RV32I-NEXT:    xor a2, s5, a0
 ; RV32I-NEXT:    xor a5, a1, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, a2
 ; RV32I-NEXT:    mv a2, s3
 ; RV32I-NEXT:    mv a3, s2
@@ -434,19 +434,19 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind strictfp {
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a2
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
 ; RV64I-NEXT:    li a1, -1
 ; RV64I-NEXT:    slli a2, a1, 63
-; RV64I-NEXT:    xor a1, s1, a2
+; RV64I-NEXT:    xor a1, s2, a2
 ; RV64I-NEXT:    xor a2, a0, a2
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    mv a1, s2
+; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    call fma@plt
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -489,7 +489,7 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind strictfp {
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s4, a5
+; RV32I-NEXT:    mv s0, a5
 ; RV32I-NEXT:    mv s1, a4
 ; RV32I-NEXT:    mv s2, a1
 ; RV32I-NEXT:    mv s3, a0
@@ -498,20 +498,20 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind strictfp {
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __adddf3@plt
-; RV32I-NEXT:    mv s5, a0
-; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv s5, a1
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s4
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __adddf3@plt
 ; RV32I-NEXT:    mv a4, a0
 ; RV32I-NEXT:    lui a0, 524288
-; RV32I-NEXT:    xor a3, s0, a0
+; RV32I-NEXT:    xor a3, s5, a0
 ; RV32I-NEXT:    xor a5, a1, a0
 ; RV32I-NEXT:    mv a0, s3
 ; RV32I-NEXT:    mv a1, s2
-; RV32I-NEXT:    mv a2, s5
+; RV32I-NEXT:    mv a2, s4
 ; RV32I-NEXT:    call fma@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -531,19 +531,19 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind strictfp {
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a2
-; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
 ; RV64I-NEXT:    li a1, -1
 ; RV64I-NEXT:    slli a2, a1, 63
-; RV64I-NEXT:    xor a1, s1, a2
+; RV64I-NEXT:    xor a1, s2, a2
 ; RV64I-NEXT:    xor a2, a0, a2
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call fma@plt
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -582,19 +582,19 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind strictfp {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a5
-; RV32I-NEXT:    mv s3, a4
-; RV32I-NEXT:    mv s0, a3
-; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s0, a5
+; RV32I-NEXT:    mv s1, a4
+; RV32I-NEXT:    mv s2, a3
+; RV32I-NEXT:    mv s3, a2
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __adddf3@plt
 ; RV32I-NEXT:    lui a2, 524288
 ; RV32I-NEXT:    xor a1, a1, a2
-; RV32I-NEXT:    mv a2, s1
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:    mv a4, s3
-; RV32I-NEXT:    mv a5, s2
+; RV32I-NEXT:    mv a2, s3
+; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a4, s1
+; RV32I-NEXT:    mv a5, s0
 ; RV32I-NEXT:    call fma@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -654,10 +654,10 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind strictfp {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a5
-; RV32I-NEXT:    mv s3, a4
-; RV32I-NEXT:    mv s0, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s0, a5
+; RV32I-NEXT:    mv s1, a4
+; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    mv a1, a3
 ; RV32I-NEXT:    li a2, 0
@@ -666,10 +666,10 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind strictfp {
 ; RV32I-NEXT:    mv a2, a0
 ; RV32I-NEXT:    lui a0, 524288
 ; RV32I-NEXT:    xor a3, a1, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a4, s3
-; RV32I-NEXT:    mv a5, s2
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a4, s1
+; RV32I-NEXT:    mv a5, s0
 ; RV32I-NEXT:    call fma@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll
index add838302f80c..55c710dbf95d9 100644
--- a/llvm/test/CodeGen/RISCV/double-arith.ll
+++ b/llvm/test/CodeGen/RISCV/double-arith.ll
@@ -479,10 +479,10 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a3
-; RV32I-NEXT:    mv s3, a2
-; RV32I-NEXT:    mv s0, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
 ; RV32I-NEXT:    li a2, 0
@@ -491,10 +491,10 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    mv a4, a0
 ; RV32I-NEXT:    lui a0, 524288
 ; RV32I-NEXT:    xor a5, a1, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a2, s3
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call fma@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -559,25 +559,25 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s4, a5
-; RV32I-NEXT:    mv s5, a4
+; RV32I-NEXT:    mv s0, a5
+; RV32I-NEXT:    mv s1, a4
 ; RV32I-NEXT:    mv s2, a3
 ; RV32I-NEXT:    mv s3, a2
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __adddf3@plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    mv s1, a1
-; RV32I-NEXT:    mv a0, s5
-; RV32I-NEXT:    mv a1, s4
+; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv s5, a1
+; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __adddf3@plt
 ; RV32I-NEXT:    mv a4, a0
 ; RV32I-NEXT:    lui a0, 524288
-; RV32I-NEXT:    xor a2, s1, a0
+; RV32I-NEXT:    xor a2, s5, a0
 ; RV32I-NEXT:    xor a5, a1, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, a2
 ; RV32I-NEXT:    mv a2, s3
 ; RV32I-NEXT:    mv a3, s2
@@ -600,19 +600,19 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a2
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
 ; RV64I-NEXT:    li a1, -1
 ; RV64I-NEXT:    slli a2, a1, 63
-; RV64I-NEXT:    xor a1, s1, a2
+; RV64I-NEXT:    xor a1, s2, a2
 ; RV64I-NEXT:    xor a2, a0, a2
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    mv a1, s2
+; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    call fma@plt
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -655,7 +655,7 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s4, a5
+; RV32I-NEXT:    mv s0, a5
 ; RV32I-NEXT:    mv s1, a4
 ; RV32I-NEXT:    mv s2, a1
 ; RV32I-NEXT:    mv s3, a0
@@ -664,20 +664,20 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __adddf3@plt
-; RV32I-NEXT:    mv s5, a0
-; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv s5, a1
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s4
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __adddf3@plt
 ; RV32I-NEXT:    mv a4, a0
 ; RV32I-NEXT:    lui a0, 524288
-; RV32I-NEXT:    xor a3, s0, a0
+; RV32I-NEXT:    xor a3, s5, a0
 ; RV32I-NEXT:    xor a5, a1, a0
 ; RV32I-NEXT:    mv a0, s3
 ; RV32I-NEXT:    mv a1, s2
-; RV32I-NEXT:    mv a2, s5
+; RV32I-NEXT:    mv a2, s4
 ; RV32I-NEXT:    call fma@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -697,19 +697,19 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a2
-; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
 ; RV64I-NEXT:    li a1, -1
 ; RV64I-NEXT:    slli a2, a1, 63
-; RV64I-NEXT:    xor a1, s1, a2
+; RV64I-NEXT:    xor a1, s2, a2
 ; RV64I-NEXT:    xor a2, a0, a2
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call fma@plt
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -748,19 +748,19 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a5
-; RV32I-NEXT:    mv s3, a4
-; RV32I-NEXT:    mv s0, a3
-; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s0, a5
+; RV32I-NEXT:    mv s1, a4
+; RV32I-NEXT:    mv s2, a3
+; RV32I-NEXT:    mv s3, a2
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __adddf3@plt
 ; RV32I-NEXT:    lui a2, 524288
 ; RV32I-NEXT:    xor a1, a1, a2
-; RV32I-NEXT:    mv a2, s1
-; RV32I-NEXT:    mv a3, s0
-; RV32I-NEXT:    mv a4, s3
-; RV32I-NEXT:    mv a5, s2
+; RV32I-NEXT:    mv a2, s3
+; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a4, s1
+; RV32I-NEXT:    mv a5, s0
 ; RV32I-NEXT:    call fma@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -820,10 +820,10 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a5
-; RV32I-NEXT:    mv s3, a4
-; RV32I-NEXT:    mv s0, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s0, a5
+; RV32I-NEXT:    mv s1, a4
+; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    mv a1, a3
 ; RV32I-NEXT:    li a2, 0
@@ -832,10 +832,10 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    mv a2, a0
 ; RV32I-NEXT:    lui a0, 524288
 ; RV32I-NEXT:    xor a3, a1, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a4, s3
-; RV32I-NEXT:    mv a5, s2
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a4, s1
+; RV32I-NEXT:    mv a5, s0
 ; RV32I-NEXT:    call fma@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -945,10 +945,10 @@ define double @fmsub_d_contract(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a3
-; RV32I-NEXT:    mv s3, a2
-; RV32I-NEXT:    mv s0, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
 ; RV32I-NEXT:    li a2, 0
@@ -956,10 +956,10 @@ define double @fmsub_d_contract(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    call __adddf3@plt
 ; RV32I-NEXT:    mv s4, a0
 ; RV32I-NEXT:    mv s5, a1
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a2, s3
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call __muldf3@plt
 ; RV32I-NEXT:    mv a2, s4
 ; RV32I-NEXT:    mv a3, s5
@@ -981,16 +981,16 @@ define double @fmsub_d_contract(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    mv a1, s2
-; RV64I-NEXT:    call __muldf3@plt
 ; RV64I-NEXT:    mv a1, s0
+; RV64I-NEXT:    call __muldf3@plt
+; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    call __subdf3@plt
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -1033,22 +1033,15 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a5
-; RV32I-NEXT:    mv s3, a4
-; RV32I-NEXT:    mv s0, a3
-; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s0, a5
+; RV32I-NEXT:    mv s1, a4
+; RV32I-NEXT:    mv s2, a3
+; RV32I-NEXT:    mv s3, a2
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __adddf3@plt
 ; RV32I-NEXT:    mv s4, a0
 ; RV32I-NEXT:    mv s5, a1
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    li a2, 0
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    call __adddf3@plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    mv a0, s3
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    li a2, 0
@@ -1056,15 +1049,22 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    call __adddf3@plt
 ; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    mv s3, a1
+; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    li a2, 0
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __adddf3@plt
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s5
-; RV32I-NEXT:    mv a2, s0
-; RV32I-NEXT:    mv a3, s1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s3
 ; RV32I-NEXT:    call __muldf3@plt
 ; RV32I-NEXT:    lui a2, 524288
 ; RV32I-NEXT:    xor a1, a1, a2
-; RV32I-NEXT:    mv a2, s2
-; RV32I-NEXT:    mv a3, s3
+; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    mv a3, s1
 ; RV32I-NEXT:    call __subdf3@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -1078,26 +1078,25 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind {
 ;
 ; RV64I-LABEL: fnmadd_d_contract:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -48
-; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a2
 ; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    call __muldf3@plt
 ; RV64I-NEXT:    li a1, -1
@@ -1105,12 +1104,11 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __subdf3@plt
-; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %a_ = fadd double 0.0, %a ; avoid negation using xor
   %b_ = fadd double 0.0, %b ; avoid negation using xor
@@ -1148,29 +1146,29 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a5
-; RV32I-NEXT:    mv s3, a4
-; RV32I-NEXT:    mv s4, a3
-; RV32I-NEXT:    mv s5, a2
+; RV32I-NEXT:    mv s0, a5
+; RV32I-NEXT:    mv s1, a4
+; RV32I-NEXT:    mv s2, a3
+; RV32I-NEXT:    mv s3, a2
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __adddf3@plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    mv s1, a1
-; RV32I-NEXT:    mv a0, s5
-; RV32I-NEXT:    mv a1, s4
+; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv s5, a1
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __adddf3@plt
 ; RV32I-NEXT:    mv a2, a0
 ; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a0, s4
+; RV32I-NEXT:    mv a1, s5
 ; RV32I-NEXT:    call __muldf3@plt
 ; RV32I-NEXT:    mv a2, a0
 ; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:    mv a0, s3
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __subdf3@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -1189,19 +1187,19 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv s0, a2
 ; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __adddf3@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __muldf3@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __subdf3@plt
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index 63af598ee4653..4cae4e16b4499 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -137,15 +137,15 @@ define i32 @fcvt_w_d_sat(double %a) nounwind {
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    lui a3, 794112
 ; RV32I-NEXT:    li s0, 0
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    call __gedf2@plt
 ; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call __fixdfsi@plt
 ; RV32I-NEXT:    lui s5, 524288
 ; RV32I-NEXT:    lui s4, 524288
@@ -156,17 +156,17 @@ define i32 @fcvt_w_d_sat(double %a) nounwind {
 ; RV32I-NEXT:    lui a0, 269824
 ; RV32I-NEXT:    addi a3, a0, -1
 ; RV32I-NEXT:    lui a2, 1047552
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call __gtdf2@plt
 ; RV32I-NEXT:    bge s0, a0, .LBB3_4
 ; RV32I-NEXT:  # %bb.3:
 ; RV32I-NEXT:    addi s4, s5, -1
 ; RV32I-NEXT:  .LBB3_4: # %start
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
-; RV32I-NEXT:    mv a2, s1
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s1
 ; RV32I-NEXT:    call __unorddf2@plt
 ; RV32I-NEXT:    bne a0, s0, .LBB3_6
 ; RV32I-NEXT:  # %bb.5: # %start
@@ -363,20 +363,20 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s1, a1
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    lui a0, 270080
 ; RV32I-NEXT:    addi a3, a0, -1
 ; RV32I-NEXT:    lui a2, 1048064
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __gtdf2@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __gedf2@plt
 ; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call __fixunsdfsi@plt
 ; RV32I-NEXT:    li a1, 0
@@ -385,7 +385,7 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind {
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:  .LBB6_2: # %start
 ; RV32I-NEXT:    li a0, -1
-; RV32I-NEXT:    bgtz s2, .LBB6_4
+; RV32I-NEXT:    bgtz s0, .LBB6_4
 ; RV32I-NEXT:  # %bb.3: # %start
 ; RV32I-NEXT:    mv a0, a1
 ; RV32I-NEXT:  .LBB6_4: # %start
@@ -694,7 +694,7 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
 ; RV32I-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lui a0, 278016
 ; RV32I-NEXT:    addi s3, a0, -1
@@ -704,67 +704,67 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
 ; RV32I-NEXT:    call __gtdf2@plt
 ; RV32I-NEXT:    mv s4, a0
 ; RV32I-NEXT:    lui a3, 802304
-; RV32I-NEXT:    li s0, 0
+; RV32I-NEXT:    li s2, 0
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    call __gedf2@plt
 ; RV32I-NEXT:    mv s6, a0
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __fixdfdi@plt
 ; RV32I-NEXT:    mv s5, a1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    blt s6, s0, .LBB12_2
+; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    blt s6, s2, .LBB12_2
 ; RV32I-NEXT:  # %bb.1: # %start
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:  .LBB12_2: # %start
 ; RV32I-NEXT:    li s6, -1
-; RV32I-NEXT:    blt s0, s4, .LBB12_4
+; RV32I-NEXT:    blt s2, s4, .LBB12_4
 ; RV32I-NEXT:  # %bb.3: # %start
 ; RV32I-NEXT:    mv s6, a1
 ; RV32I-NEXT:  .LBB12_4: # %start
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    mv a2, s1
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call __unorddf2@plt
-; RV32I-NEXT:    mv s4, s0
-; RV32I-NEXT:    bne a0, s0, .LBB12_6
+; RV32I-NEXT:    mv s4, s2
+; RV32I-NEXT:    bne a0, s2, .LBB12_6
 ; RV32I-NEXT:  # %bb.5: # %start
 ; RV32I-NEXT:    mv s4, s6
 ; RV32I-NEXT:  .LBB12_6: # %start
 ; RV32I-NEXT:    lui a3, 802304
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
-; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    call __gedf2@plt
 ; RV32I-NEXT:    lui s7, 524288
 ; RV32I-NEXT:    lui s6, 524288
-; RV32I-NEXT:    blt a0, s0, .LBB12_8
+; RV32I-NEXT:    blt a0, s2, .LBB12_8
 ; RV32I-NEXT:  # %bb.7: # %start
 ; RV32I-NEXT:    mv s6, s5
 ; RV32I-NEXT:  .LBB12_8: # %start
 ; RV32I-NEXT:    li a2, -1
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    mv a3, s3
 ; RV32I-NEXT:    call __gtdf2@plt
-; RV32I-NEXT:    bge s0, a0, .LBB12_10
+; RV32I-NEXT:    bge s2, a0, .LBB12_10
 ; RV32I-NEXT:  # %bb.9:
 ; RV32I-NEXT:    addi s6, s7, -1
 ; RV32I-NEXT:  .LBB12_10: # %start
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    mv a2, s1
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call __unorddf2@plt
-; RV32I-NEXT:    bne a0, s0, .LBB12_12
+; RV32I-NEXT:    bne a0, s2, .LBB12_12
 ; RV32I-NEXT:  # %bb.11: # %start
-; RV32I-NEXT:    mv s0, s6
+; RV32I-NEXT:    mv s2, s6
 ; RV32I-NEXT:  .LBB12_12: # %start
 ; RV32I-NEXT:    mv a0, s4
-; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
@@ -936,22 +936,22 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s6, 0(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s1, a1
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    lui a0, 278272
 ; RV32I-NEXT:    addi s3, a0, -1
 ; RV32I-NEXT:    li a2, -1
-; RV32I-NEXT:    li s2, -1
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    li s0, -1
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a3, s3
 ; RV32I-NEXT:    call __gtdf2@plt
 ; RV32I-NEXT:    mv s6, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __gedf2@plt
 ; RV32I-NEXT:    mv s4, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call __fixunsdfdi@plt
 ; RV32I-NEXT:    mv s5, a1
@@ -966,12 +966,12 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
 ; RV32I-NEXT:    mv s4, a1
 ; RV32I-NEXT:  .LBB14_4: # %start
 ; RV32I-NEXT:    li a2, -1
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    mv a3, s3
 ; RV32I-NEXT:    call __gtdf2@plt
 ; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
@@ -983,10 +983,10 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
 ; RV32I-NEXT:  .LBB14_6: # %start
 ; RV32I-NEXT:    bgtz s3, .LBB14_8
 ; RV32I-NEXT:  # %bb.7: # %start
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:  .LBB14_8: # %start
 ; RV32I-NEXT:    mv a0, s4
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -1515,15 +1515,15 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    lui a3, 790016
 ; RV32I-NEXT:    li s0, 0
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    call __gedf2@plt
 ; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call __fixdfsi@plt
 ; RV32I-NEXT:    lui s4, 1048568
 ; RV32I-NEXT:    blt s3, s0, .LBB26_2
@@ -1532,8 +1532,8 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
 ; RV32I-NEXT:  .LBB26_2: # %start
 ; RV32I-NEXT:    lui a0, 265728
 ; RV32I-NEXT:    addi a3, a0, -64
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    call __gtdf2@plt
 ; RV32I-NEXT:    bge s0, a0, .LBB26_4
@@ -1541,10 +1541,10 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
 ; RV32I-NEXT:    lui a0, 8
 ; RV32I-NEXT:    addi s4, a0, -1
 ; RV32I-NEXT:  .LBB26_4: # %start
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
-; RV32I-NEXT:    mv a2, s1
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s1
 ; RV32I-NEXT:    call __unorddf2@plt
 ; RV32I-NEXT:    bne a0, s0, .LBB26_6
 ; RV32I-NEXT:  # %bb.5: # %start
@@ -1574,12 +1574,12 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
 ; RV64I-NEXT:    slli a1, a0, 53
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __gedf2@plt
-; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __fixdfdi@plt
-; RV64I-NEXT:    li s1, 0
+; RV64I-NEXT:    li s2, 0
 ; RV64I-NEXT:    lui s3, 1048568
-; RV64I-NEXT:    bltz s2, .LBB26_2
+; RV64I-NEXT:    bltz s1, .LBB26_2
 ; RV64I-NEXT:  # %bb.1: # %start
 ; RV64I-NEXT:    mv s3, a0
 ; RV64I-NEXT:  .LBB26_2: # %start
@@ -1588,7 +1588,7 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
 ; RV64I-NEXT:    slli a1, a0, 38
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __gtdf2@plt
-; RV64I-NEXT:    bge s1, a0, .LBB26_4
+; RV64I-NEXT:    bge s2, a0, .LBB26_4
 ; RV64I-NEXT:  # %bb.3:
 ; RV64I-NEXT:    lui a0, 8
 ; RV64I-NEXT:    addiw s3, a0, -1
@@ -1596,11 +1596,11 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind {
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unorddf2@plt
-; RV64I-NEXT:    bne a0, s1, .LBB26_6
+; RV64I-NEXT:    bne a0, s2, .LBB26_6
 ; RV64I-NEXT:  # %bb.5: # %start
-; RV64I-NEXT:    mv s1, s3
+; RV64I-NEXT:    mv s2, s3
 ; RV64I-NEXT:  .LBB26_6: # %start
-; RV64I-NEXT:    slli a0, s1, 48
+; RV64I-NEXT:    slli a0, s2, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
@@ -1677,20 +1677,20 @@ define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s1, a1
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    lui a0, 265984
 ; RV32I-NEXT:    addi a3, a0, -32
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    call __gtdf2@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __gedf2@plt
 ; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call __fixunsdfsi@plt
 ; RV32I-NEXT:    li a1, 0
@@ -1701,7 +1701,7 @@ define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind {
 ; RV32I-NEXT:    lui a0, 16
 ; RV32I-NEXT:    addi a0, a0, -1
 ; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    bgtz s2, .LBB28_4
+; RV32I-NEXT:    bgtz s0, .LBB28_4
 ; RV32I-NEXT:  # %bb.3: # %start
 ; RV32I-NEXT:    mv a2, a1
 ; RV32I-NEXT:  .LBB28_4: # %start
@@ -1833,15 +1833,15 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    lui a3, 787968
 ; RV32I-NEXT:    li s0, 0
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    call __gedf2@plt
 ; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call __fixdfsi@plt
 ; RV32I-NEXT:    li s4, -128
 ; RV32I-NEXT:    blt s3, s0, .LBB30_2
@@ -1849,8 +1849,8 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
 ; RV32I-NEXT:    mv s4, a0
 ; RV32I-NEXT:  .LBB30_2: # %start
 ; RV32I-NEXT:    lui a3, 263676
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    call __gtdf2@plt
 ; RV32I-NEXT:    li s3, 127
@@ -1858,10 +1858,10 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
 ; RV32I-NEXT:  # %bb.3: # %start
 ; RV32I-NEXT:    mv s3, s4
 ; RV32I-NEXT:  .LBB30_4: # %start
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
-; RV32I-NEXT:    mv a2, s1
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a2, s2
+; RV32I-NEXT:    mv a3, s1
 ; RV32I-NEXT:    call __unorddf2@plt
 ; RV32I-NEXT:    bne a0, s0, .LBB30_6
 ; RV32I-NEXT:  # %bb.5: # %start
@@ -1891,12 +1891,12 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
 ; RV64I-NEXT:    slli a1, a0, 53
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __gedf2@plt
-; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __fixdfdi@plt
-; RV64I-NEXT:    li s1, 0
+; RV64I-NEXT:    li s2, 0
 ; RV64I-NEXT:    li s3, -128
-; RV64I-NEXT:    bltz s2, .LBB30_2
+; RV64I-NEXT:    bltz s1, .LBB30_2
 ; RV64I-NEXT:  # %bb.1: # %start
 ; RV64I-NEXT:    mv s3, a0
 ; RV64I-NEXT:  .LBB30_2: # %start
@@ -1904,19 +1904,19 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind {
 ; RV64I-NEXT:    slli a1, a0, 34
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __gtdf2@plt
-; RV64I-NEXT:    li s2, 127
-; RV64I-NEXT:    blt s1, a0, .LBB30_4
+; RV64I-NEXT:    li s1, 127
+; RV64I-NEXT:    blt s2, a0, .LBB30_4
 ; RV64I-NEXT:  # %bb.3: # %start
-; RV64I-NEXT:    mv s2, s3
+; RV64I-NEXT:    mv s1, s3
 ; RV64I-NEXT:  .LBB30_4: # %start
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unorddf2@plt
-; RV64I-NEXT:    bne a0, s1, .LBB30_6
+; RV64I-NEXT:    bne a0, s2, .LBB30_6
 ; RV64I-NEXT:  # %bb.5: # %start
-; RV64I-NEXT:    mv s1, s2
+; RV64I-NEXT:    mv s2, s1
 ; RV64I-NEXT:  .LBB30_6: # %start
-; RV64I-NEXT:    slli a0, s1, 56
+; RV64I-NEXT:    slli a0, s2, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
@@ -1997,18 +1997,18 @@ define zeroext i8 @fcvt_wu_s_sat_i8(double %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s1, a1
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    lui a3, 263934
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    call __gtdf2@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __gedf2@plt
 ; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call __fixunsdfsi@plt
 ; RV32I-NEXT:    li a1, 0
@@ -2017,7 +2017,7 @@ define zeroext i8 @fcvt_wu_s_sat_i8(double %a) nounwind {
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:  .LBB32_2: # %start
 ; RV32I-NEXT:    li a0, 255
-; RV32I-NEXT:    bgtz s2, .LBB32_4
+; RV32I-NEXT:    bgtz s0, .LBB32_4
 ; RV32I-NEXT:  # %bb.3: # %start
 ; RV32I-NEXT:    mv a0, a1
 ; RV32I-NEXT:  .LBB32_4: # %start
diff --git a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
index 15cf27319e328..2af4ee69ce222 100644
--- a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
+++ b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll
@@ -250,16 +250,16 @@ define i32 @fcmp_one(double %a, double %b) nounwind strictfp {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a3
-; RV32I-NEXT:    mv s3, a2
-; RV32I-NEXT:    mv s0, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    call __eqdf2@plt
 ; RV32I-NEXT:    snez s4, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a2, s3
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call __unorddf2@plt
 ; RV32I-NEXT:    seqz a0, a0
 ; RV32I-NEXT:    and a0, a0, s4
@@ -378,16 +378,16 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a3
-; RV32I-NEXT:    mv s3, a2
-; RV32I-NEXT:    mv s0, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    call __eqdf2@plt
 ; RV32I-NEXT:    seqz s4, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a2, s3
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call __unorddf2@plt
 ; RV32I-NEXT:    snez a0, a0
 ; RV32I-NEXT:    or a0, a0, s4
@@ -885,16 +885,16 @@ define i32 @fcmps_one(double %a, double %b) nounwind strictfp {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a3
-; RV32I-NEXT:    mv s3, a2
-; RV32I-NEXT:    mv s0, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    call __eqdf2@plt
 ; RV32I-NEXT:    snez s4, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a2, s3
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call __unorddf2@plt
 ; RV32I-NEXT:    seqz a0, a0
 ; RV32I-NEXT:    and a0, a0, s4
@@ -999,16 +999,16 @@ define i32 @fcmps_ueq(double %a, double %b) nounwind strictfp {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a3
-; RV32I-NEXT:    mv s3, a2
-; RV32I-NEXT:    mv s0, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    call __eqdf2@plt
 ; RV32I-NEXT:    seqz s4, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a2, s3
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call __unorddf2@plt
 ; RV32I-NEXT:    snez a0, a0
 ; RV32I-NEXT:    or a0, a0, s4
diff --git a/llvm/test/CodeGen/RISCV/double-fcmp.ll b/llvm/test/CodeGen/RISCV/double-fcmp.ll
index 4f987a077b446..9b8fbc3473f41 100644
--- a/llvm/test/CodeGen/RISCV/double-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/double-fcmp.ll
@@ -234,16 +234,16 @@ define i32 @fcmp_one(double %a, double %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a3
-; RV32I-NEXT:    mv s3, a2
-; RV32I-NEXT:    mv s0, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    call __eqdf2@plt
 ; RV32I-NEXT:    snez s4, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a2, s3
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call __unorddf2@plt
 ; RV32I-NEXT:    seqz a0, a0
 ; RV32I-NEXT:    and a0, a0, s4
@@ -348,16 +348,16 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a3
-; RV32I-NEXT:    mv s3, a2
-; RV32I-NEXT:    mv s0, a1
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s0, a3
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    call __eqdf2@plt
 ; RV32I-NEXT:    seqz s4, a0
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    mv a2, s3
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call __unorddf2@plt
 ; RV32I-NEXT:    snez a0, a0
 ; RV32I-NEXT:    or a0, a0, s4
diff --git a/llvm/test/CodeGen/RISCV/float-arith-strict.ll b/llvm/test/CodeGen/RISCV/float-arith-strict.ll
index 401e0f79526f0..e58110dca92c5 100644
--- a/llvm/test/CodeGen/RISCV/float-arith-strict.ll
+++ b/llvm/test/CodeGen/RISCV/float-arith-strict.ll
@@ -379,18 +379,18 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind strictfp {
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a2
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    lui a2, 524288
-; RV32I-NEXT:    xor a1, s1, a2
+; RV32I-NEXT:    xor a1, s2, a2
 ; RV32I-NEXT:    xor a2, a0, a2
 ; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call fmaf@plt
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
@@ -407,18 +407,18 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind strictfp {
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a2
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    lui a2, 524288
-; RV64I-NEXT:    xor a1, s1, a2
+; RV64I-NEXT:    xor a1, s2, a2
 ; RV64I-NEXT:    xor a2, a0, a2
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    mv a1, s2
+; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    call fmaf@plt
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -459,18 +459,18 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind strictfp {
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a2
-; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    mv a0, a1
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    lui a2, 524288
-; RV32I-NEXT:    xor a1, s1, a2
+; RV32I-NEXT:    xor a1, s2, a2
 ; RV32I-NEXT:    xor a2, a0, a2
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call fmaf@plt
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
@@ -487,18 +487,18 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind strictfp {
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a2
-; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    lui a2, 524288
-; RV64I-NEXT:    xor a1, s1, a2
+; RV64I-NEXT:    xor a1, s2, a2
 ; RV64I-NEXT:    xor a2, a0, a2
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call fmaf@plt
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll
index 3a525cf4d8feb..ba6b2eef908a4 100644
--- a/llvm/test/CodeGen/RISCV/float-arith.ll
+++ b/llvm/test/CodeGen/RISCV/float-arith.ll
@@ -557,18 +557,18 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a2
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    lui a2, 524288
-; RV32I-NEXT:    xor a1, s1, a2
+; RV32I-NEXT:    xor a1, s2, a2
 ; RV32I-NEXT:    xor a2, a0, a2
 ; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call fmaf@plt
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
@@ -585,18 +585,18 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a2
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    lui a2, 524288
-; RV64I-NEXT:    xor a1, s1, a2
+; RV64I-NEXT:    xor a1, s2, a2
 ; RV64I-NEXT:    xor a2, a0, a2
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    mv a1, s2
+; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    call fmaf@plt
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -637,18 +637,18 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a2
-; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    mv a0, a1
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    lui a2, 524288
-; RV32I-NEXT:    xor a1, s1, a2
+; RV32I-NEXT:    xor a1, s2, a2
 ; RV32I-NEXT:    xor a2, a0, a2
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call fmaf@plt
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
@@ -665,18 +665,18 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a2
-; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    lui a2, 524288
-; RV64I-NEXT:    xor a1, s1, a2
+; RV64I-NEXT:    xor a1, s2, a2
 ; RV64I-NEXT:    xor a2, a0, a2
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call fmaf@plt
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -883,16 +883,16 @@ define float @fmsub_s_contract(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    mv a1, s2
-; RV32I-NEXT:    call __mulsf3@plt
 ; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    call __mulsf3@plt
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __subsf3@plt
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
@@ -908,16 +908,16 @@ define float @fmsub_s_contract(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    mv a1, s2
-; RV64I-NEXT:    call __mulsf3@plt
 ; RV64I-NEXT:    mv a1, s0
+; RV64I-NEXT:    call __mulsf3@plt
+; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    call __subsf3@plt
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -952,74 +952,70 @@ define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind {
 ;
 ; RV32I-LABEL: fnmadd_s_contract:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    addi sp, sp, -32
-; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a2
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a2
 ; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call __mulsf3@plt
 ; RV32I-NEXT:    lui a1, 524288
 ; RV32I-NEXT:    xor a0, a0, a1
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __subsf3@plt
-; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: fnmadd_s_contract:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -48
-; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    mv s0, a2
 ; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
-; RV64I-NEXT:    mv s3, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    call __mulsf3@plt
 ; RV64I-NEXT:    lui a1, 524288
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __subsf3@plt
-; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %a_ = fadd float 0.0, %a ; avoid negation using xor
   %b_ = fadd float 0.0, %b ; avoid negation using xor
@@ -1054,19 +1050,19 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a2
+; RV32I-NEXT:    mv s0, a2
 ; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __mulsf3@plt
 ; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __subsf3@plt
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
@@ -1082,19 +1078,19 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv s0, a2
 ; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __mulsf3@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __subsf3@plt
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index e3a11088e15f3..ce97b2930e393 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -903,25 +903,25 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __gesf2@plt
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __fixunssfdi@plt
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    li s5, 0
-; RV32I-NEXT:    bltz s1, .LBB14_2
+; RV32I-NEXT:    bltz s2, .LBB14_2
 ; RV32I-NEXT:  # %bb.1: # %start
 ; RV32I-NEXT:    mv s5, a0
 ; RV32I-NEXT:  .LBB14_2: # %start
 ; RV32I-NEXT:    lui a0, 391168
-; RV32I-NEXT:    addi s1, a0, -1
+; RV32I-NEXT:    addi s4, a0, -1
 ; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a1, s4
 ; RV32I-NEXT:    call __gtsf2@plt
+; RV32I-NEXT:    li s2, -1
 ; RV32I-NEXT:    li s3, -1
-; RV32I-NEXT:    li s4, -1
 ; RV32I-NEXT:    bgtz a0, .LBB14_4
 ; RV32I-NEXT:  # %bb.3: # %start
-; RV32I-NEXT:    mv s4, s5
+; RV32I-NEXT:    mv s3, s5
 ; RV32I-NEXT:  .LBB14_4: # %start
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    li a1, 0
@@ -929,17 +929,17 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
 ; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bltz a0, .LBB14_6
 ; RV32I-NEXT:  # %bb.5: # %start
-; RV32I-NEXT:    mv s5, s2
+; RV32I-NEXT:    mv s5, s1
 ; RV32I-NEXT:  .LBB14_6: # %start
 ; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a1, s4
 ; RV32I-NEXT:    call __gtsf2@plt
 ; RV32I-NEXT:    bgtz a0, .LBB14_8
 ; RV32I-NEXT:  # %bb.7: # %start
-; RV32I-NEXT:    mv s3, s5
+; RV32I-NEXT:    mv s2, s5
 ; RV32I-NEXT:  .LBB14_8: # %start
-; RV32I-NEXT:    mv a0, s4
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -1382,12 +1382,12 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lui a1, 815104
 ; RV32I-NEXT:    call __gesf2@plt
-; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __fixsfsi@plt
-; RV32I-NEXT:    li s1, 0
+; RV32I-NEXT:    li s2, 0
 ; RV32I-NEXT:    lui s3, 1048568
-; RV32I-NEXT:    bltz s2, .LBB24_2
+; RV32I-NEXT:    bltz s1, .LBB24_2
 ; RV32I-NEXT:  # %bb.1: # %start
 ; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:  .LBB24_2: # %start
@@ -1395,7 +1395,7 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
 ; RV32I-NEXT:    addi a1, a0, -512
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __gtsf2@plt
-; RV32I-NEXT:    bge s1, a0, .LBB24_4
+; RV32I-NEXT:    bge s2, a0, .LBB24_4
 ; RV32I-NEXT:  # %bb.3:
 ; RV32I-NEXT:    lui a0, 8
 ; RV32I-NEXT:    addi s3, a0, -1
@@ -1403,11 +1403,11 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __unordsf2@plt
-; RV32I-NEXT:    bne a0, s1, .LBB24_6
+; RV32I-NEXT:    bne a0, s2, .LBB24_6
 ; RV32I-NEXT:  # %bb.5: # %start
-; RV32I-NEXT:    mv s1, s3
+; RV32I-NEXT:    mv s2, s3
 ; RV32I-NEXT:  .LBB24_6: # %start
-; RV32I-NEXT:    slli a0, s1, 16
+; RV32I-NEXT:    slli a0, s2, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -1428,12 +1428,12 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lui a1, 815104
 ; RV64I-NEXT:    call __gesf2@plt
-; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __fixsfdi@plt
-; RV64I-NEXT:    li s1, 0
+; RV64I-NEXT:    li s2, 0
 ; RV64I-NEXT:    lui s3, 1048568
-; RV64I-NEXT:    bltz s2, .LBB24_2
+; RV64I-NEXT:    bltz s1, .LBB24_2
 ; RV64I-NEXT:  # %bb.1: # %start
 ; RV64I-NEXT:    mv s3, a0
 ; RV64I-NEXT:  .LBB24_2: # %start
@@ -1441,7 +1441,7 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
 ; RV64I-NEXT:    addiw a1, a0, -512
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __gtsf2@plt
-; RV64I-NEXT:    bge s1, a0, .LBB24_4
+; RV64I-NEXT:    bge s2, a0, .LBB24_4
 ; RV64I-NEXT:  # %bb.3:
 ; RV64I-NEXT:    lui a0, 8
 ; RV64I-NEXT:    addiw s3, a0, -1
@@ -1449,11 +1449,11 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind {
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unordsf2@plt
-; RV64I-NEXT:    bne a0, s1, .LBB24_6
+; RV64I-NEXT:    bne a0, s2, .LBB24_6
 ; RV64I-NEXT:  # %bb.5: # %start
-; RV64I-NEXT:    mv s1, s3
+; RV64I-NEXT:    mv s2, s3
 ; RV64I-NEXT:  .LBB24_6: # %start
-; RV64I-NEXT:    slli a0, s1, 48
+; RV64I-NEXT:    slli a0, s2, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
@@ -1678,31 +1678,31 @@ define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind {
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lui a1, 798720
 ; RV32I-NEXT:    call __gesf2@plt
-; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __fixsfsi@plt
-; RV32I-NEXT:    li s1, 0
+; RV32I-NEXT:    li s2, 0
 ; RV32I-NEXT:    li s3, -128
-; RV32I-NEXT:    bltz s2, .LBB28_2
+; RV32I-NEXT:    bltz s1, .LBB28_2
 ; RV32I-NEXT:  # %bb.1: # %start
 ; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:  .LBB28_2: # %start
 ; RV32I-NEXT:    lui a1, 274400
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __gtsf2@plt
-; RV32I-NEXT:    li s2, 127
-; RV32I-NEXT:    blt s1, a0, .LBB28_4
+; RV32I-NEXT:    li s1, 127
+; RV32I-NEXT:    blt s2, a0, .LBB28_4
 ; RV32I-NEXT:  # %bb.3: # %start
-; RV32I-NEXT:    mv s2, s3
+; RV32I-NEXT:    mv s1, s3
 ; RV32I-NEXT:  .LBB28_4: # %start
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __unordsf2@plt
-; RV32I-NEXT:    bne a0, s1, .LBB28_6
+; RV32I-NEXT:    bne a0, s2, .LBB28_6
 ; RV32I-NEXT:  # %bb.5: # %start
-; RV32I-NEXT:    mv s1, s2
+; RV32I-NEXT:    mv s2, s1
 ; RV32I-NEXT:  .LBB28_6: # %start
-; RV32I-NEXT:    slli a0, s1, 24
+; RV32I-NEXT:    slli a0, s2, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -1723,31 +1723,31 @@ define signext i8 @fcvt_w_s_sat_i8(float %a) nounwind {
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lui a1, 798720
 ; RV64I-NEXT:    call __gesf2@plt
-; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __fixsfdi@plt
-; RV64I-NEXT:    li s1, 0
+; RV64I-NEXT:    li s2, 0
 ; RV64I-NEXT:    li s3, -128
-; RV64I-NEXT:    bltz s2, .LBB28_2
+; RV64I-NEXT:    bltz s1, .LBB28_2
 ; RV64I-NEXT:  # %bb.1: # %start
 ; RV64I-NEXT:    mv s3, a0
 ; RV64I-NEXT:  .LBB28_2: # %start
 ; RV64I-NEXT:    lui a1, 274400
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __gtsf2@plt
-; RV64I-NEXT:    li s2, 127
-; RV64I-NEXT:    blt s1, a0, .LBB28_4
+; RV64I-NEXT:    li s1, 127
+; RV64I-NEXT:    blt s2, a0, .LBB28_4
 ; RV64I-NEXT:  # %bb.3: # %start
-; RV64I-NEXT:    mv s2, s3
+; RV64I-NEXT:    mv s1, s3
 ; RV64I-NEXT:  .LBB28_4: # %start
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unordsf2@plt
-; RV64I-NEXT:    bne a0, s1, .LBB28_6
+; RV64I-NEXT:    bne a0, s2, .LBB28_6
 ; RV64I-NEXT:  # %bb.5: # %start
-; RV64I-NEXT:    mv s1, s2
+; RV64I-NEXT:    mv s2, s1
 ; RV64I-NEXT:  .LBB28_6: # %start
-; RV64I-NEXT:    slli a0, s1, 56
+; RV64I-NEXT:    slli a0, s2, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/fp128.ll b/llvm/test/CodeGen/RISCV/fp128.ll
index cea4e40644523..947aa7c19a2da 100644
--- a/llvm/test/CodeGen/RISCV/fp128.ll
+++ b/llvm/test/CodeGen/RISCV/fp128.ll
@@ -14,25 +14,25 @@ define i32 @test_load_and_cmp() nounwind {
 ; RV32I-NEXT:    addi sp, sp, -48
 ; RV32I-NEXT:    sw ra, 44(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lui a0, %hi(x)
-; RV32I-NEXT:    lw a6, %lo(x)(a0)
-; RV32I-NEXT:    lw a7, %lo(x+4)(a0)
+; RV32I-NEXT:    lw a2, %lo(x)(a0)
+; RV32I-NEXT:    lw a1, %lo(x+4)(a0)
 ; RV32I-NEXT:    lw a3, %lo(x+8)(a0)
 ; RV32I-NEXT:    lw a0, %lo(x+12)(a0)
 ; RV32I-NEXT:    lui a4, %hi(y)
 ; RV32I-NEXT:    lw a5, %lo(y)(a4)
-; RV32I-NEXT:    lw a2, %lo(y+4)(a4)
-; RV32I-NEXT:    lw a1, %lo(y+8)(a4)
+; RV32I-NEXT:    lw a6, %lo(y+4)(a4)
+; RV32I-NEXT:    lw a7, %lo(y+8)(a4)
 ; RV32I-NEXT:    lw a4, %lo(y+12)(a4)
 ; RV32I-NEXT:    sw a4, 20(sp)
-; RV32I-NEXT:    sw a1, 16(sp)
-; RV32I-NEXT:    sw a2, 12(sp)
+; RV32I-NEXT:    sw a7, 16(sp)
+; RV32I-NEXT:    sw a6, 12(sp)
 ; RV32I-NEXT:    sw a5, 8(sp)
 ; RV32I-NEXT:    sw a0, 36(sp)
 ; RV32I-NEXT:    sw a3, 32(sp)
-; RV32I-NEXT:    sw a7, 28(sp)
+; RV32I-NEXT:    sw a1, 28(sp)
 ; RV32I-NEXT:    addi a0, sp, 24
 ; RV32I-NEXT:    addi a1, sp, 8
-; RV32I-NEXT:    sw a6, 24(sp)
+; RV32I-NEXT:    sw a2, 24(sp)
 ; RV32I-NEXT:    call __netf2@plt
 ; RV32I-NEXT:    snez a0, a0
 ; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
@@ -51,26 +51,26 @@ define i32 @test_add_and_fptosi() nounwind {
 ; RV32I-NEXT:    addi sp, sp, -80
 ; RV32I-NEXT:    sw ra, 76(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lui a0, %hi(x)
-; RV32I-NEXT:    lw a6, %lo(x)(a0)
-; RV32I-NEXT:    lw a7, %lo(x+4)(a0)
+; RV32I-NEXT:    lw a3, %lo(x)(a0)
+; RV32I-NEXT:    lw a1, %lo(x+4)(a0)
 ; RV32I-NEXT:    lw a2, %lo(x+8)(a0)
 ; RV32I-NEXT:    lw a0, %lo(x+12)(a0)
 ; RV32I-NEXT:    lui a4, %hi(y)
 ; RV32I-NEXT:    lw a5, %lo(y)(a4)
-; RV32I-NEXT:    lw a3, %lo(y+4)(a4)
-; RV32I-NEXT:    lw a1, %lo(y+8)(a4)
+; RV32I-NEXT:    lw a6, %lo(y+4)(a4)
+; RV32I-NEXT:    lw a7, %lo(y+8)(a4)
 ; RV32I-NEXT:    lw a4, %lo(y+12)(a4)
 ; RV32I-NEXT:    sw a4, 36(sp)
-; RV32I-NEXT:    sw a1, 32(sp)
-; RV32I-NEXT:    sw a3, 28(sp)
+; RV32I-NEXT:    sw a7, 32(sp)
+; RV32I-NEXT:    sw a6, 28(sp)
 ; RV32I-NEXT:    sw a5, 24(sp)
 ; RV32I-NEXT:    sw a0, 52(sp)
 ; RV32I-NEXT:    sw a2, 48(sp)
-; RV32I-NEXT:    sw a7, 44(sp)
+; RV32I-NEXT:    sw a1, 44(sp)
 ; RV32I-NEXT:    addi a0, sp, 56
 ; RV32I-NEXT:    addi a1, sp, 40
 ; RV32I-NEXT:    addi a2, sp, 24
-; RV32I-NEXT:    sw a6, 40(sp)
+; RV32I-NEXT:    sw a3, 40(sp)
 ; RV32I-NEXT:    call __addtf3@plt
 ; RV32I-NEXT:    lw a1, 56(sp)
 ; RV32I-NEXT:    lw a0, 60(sp)
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 46e78e375edff..69a646db09aa3 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -1159,43 +1159,43 @@ define i64 @stest_f64i64(double %x) {
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixdfti@plt
 ; RV32-NEXT:    lw a2, 20(sp)
-; RV32-NEXT:    lw t0, 16(sp)
+; RV32-NEXT:    lw a3, 16(sp)
 ; RV32-NEXT:    lw a1, 12(sp)
 ; RV32-NEXT:    lw a0, 8(sp)
-; RV32-NEXT:    lui a7, 524288
-; RV32-NEXT:    addi a5, a7, -1
+; RV32-NEXT:    lui a4, 524288
+; RV32-NEXT:    addi a5, a4, -1
 ; RV32-NEXT:    beq a1, a5, .LBB18_2
 ; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    sltu a4, a1, a5
-; RV32-NEXT:    or a3, t0, a2
-; RV32-NEXT:    bnez a3, .LBB18_3
+; RV32-NEXT:    sltu a7, a1, a5
+; RV32-NEXT:    or a6, a3, a2
+; RV32-NEXT:    bnez a6, .LBB18_3
 ; RV32-NEXT:    j .LBB18_4
 ; RV32-NEXT:  .LBB18_2:
-; RV32-NEXT:    addi a4, a0, 1
-; RV32-NEXT:    snez a4, a4
-; RV32-NEXT:    or a3, t0, a2
-; RV32-NEXT:    beqz a3, .LBB18_4
+; RV32-NEXT:    addi a6, a0, 1
+; RV32-NEXT:    snez a7, a6
+; RV32-NEXT:    or a6, a3, a2
+; RV32-NEXT:    beqz a6, .LBB18_4
 ; RV32-NEXT:  .LBB18_3: # %entry
-; RV32-NEXT:    slti a4, a2, 0
+; RV32-NEXT:    slti a7, a2, 0
 ; RV32-NEXT:  .LBB18_4: # %entry
 ; RV32-NEXT:    li a6, -1
-; RV32-NEXT:    beqz a4, .LBB18_7
+; RV32-NEXT:    beqz a7, .LBB18_7
 ; RV32-NEXT:  # %bb.5: # %entry
-; RV32-NEXT:    beq a1, a7, .LBB18_8
+; RV32-NEXT:    beq a1, a4, .LBB18_8
 ; RV32-NEXT:  .LBB18_6: # %entry
-; RV32-NEXT:    sltu a4, a7, a1
-; RV32-NEXT:    and a3, t0, a2
+; RV32-NEXT:    sltu a4, a4, a1
+; RV32-NEXT:    and a3, a3, a2
 ; RV32-NEXT:    bne a3, a6, .LBB18_9
 ; RV32-NEXT:    j .LBB18_10
 ; RV32-NEXT:  .LBB18_7: # %entry
 ; RV32-NEXT:    li a2, 0
-; RV32-NEXT:    li t0, 0
+; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    li a0, -1
 ; RV32-NEXT:    mv a1, a5
-; RV32-NEXT:    bne a1, a7, .LBB18_6
+; RV32-NEXT:    bne a1, a4, .LBB18_6
 ; RV32-NEXT:  .LBB18_8:
 ; RV32-NEXT:    snez a4, a0
-; RV32-NEXT:    and a3, t0, a2
+; RV32-NEXT:    and a3, a3, a2
 ; RV32-NEXT:    beq a3, a6, .LBB18_10
 ; RV32-NEXT:  .LBB18_9: # %entry
 ; RV32-NEXT:    slt a4, a6, a2
@@ -1441,43 +1441,43 @@ define i64 @stest_f32i64(float %x) {
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixsfti@plt
 ; RV32-NEXT:    lw a2, 20(sp)
-; RV32-NEXT:    lw t0, 16(sp)
+; RV32-NEXT:    lw a3, 16(sp)
 ; RV32-NEXT:    lw a1, 12(sp)
 ; RV32-NEXT:    lw a0, 8(sp)
-; RV32-NEXT:    lui a7, 524288
-; RV32-NEXT:    addi a5, a7, -1
+; RV32-NEXT:    lui a4, 524288
+; RV32-NEXT:    addi a5, a4, -1
 ; RV32-NEXT:    beq a1, a5, .LBB21_2
 ; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    sltu a4, a1, a5
-; RV32-NEXT:    or a3, t0, a2
-; RV32-NEXT:    bnez a3, .LBB21_3
+; RV32-NEXT:    sltu a7, a1, a5
+; RV32-NEXT:    or a6, a3, a2
+; RV32-NEXT:    bnez a6, .LBB21_3
 ; RV32-NEXT:    j .LBB21_4
 ; RV32-NEXT:  .LBB21_2:
-; RV32-NEXT:    addi a4, a0, 1
-; RV32-NEXT:    snez a4, a4
-; RV32-NEXT:    or a3, t0, a2
-; RV32-NEXT:    beqz a3, .LBB21_4
+; RV32-NEXT:    addi a6, a0, 1
+; RV32-NEXT:    snez a7, a6
+; RV32-NEXT:    or a6, a3, a2
+; RV32-NEXT:    beqz a6, .LBB21_4
 ; RV32-NEXT:  .LBB21_3: # %entry
-; RV32-NEXT:    slti a4, a2, 0
+; RV32-NEXT:    slti a7, a2, 0
 ; RV32-NEXT:  .LBB21_4: # %entry
 ; RV32-NEXT:    li a6, -1
-; RV32-NEXT:    beqz a4, .LBB21_7
+; RV32-NEXT:    beqz a7, .LBB21_7
 ; RV32-NEXT:  # %bb.5: # %entry
-; RV32-NEXT:    beq a1, a7, .LBB21_8
+; RV32-NEXT:    beq a1, a4, .LBB21_8
 ; RV32-NEXT:  .LBB21_6: # %entry
-; RV32-NEXT:    sltu a4, a7, a1
-; RV32-NEXT:    and a3, t0, a2
+; RV32-NEXT:    sltu a4, a4, a1
+; RV32-NEXT:    and a3, a3, a2
 ; RV32-NEXT:    bne a3, a6, .LBB21_9
 ; RV32-NEXT:    j .LBB21_10
 ; RV32-NEXT:  .LBB21_7: # %entry
 ; RV32-NEXT:    li a2, 0
-; RV32-NEXT:    li t0, 0
+; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    li a0, -1
 ; RV32-NEXT:    mv a1, a5
-; RV32-NEXT:    bne a1, a7, .LBB21_6
+; RV32-NEXT:    bne a1, a4, .LBB21_6
 ; RV32-NEXT:  .LBB21_8:
 ; RV32-NEXT:    snez a4, a0
-; RV32-NEXT:    and a3, t0, a2
+; RV32-NEXT:    and a3, a3, a2
 ; RV32-NEXT:    beq a3, a6, .LBB21_10
 ; RV32-NEXT:  .LBB21_9: # %entry
 ; RV32-NEXT:    slt a4, a6, a2
@@ -1685,43 +1685,43 @@ define i64 @stest_f16i64(half %x) {
 ; RV32-NEXT:    addi a0, sp, 8
 ; RV32-NEXT:    call __fixsfti@plt
 ; RV32-NEXT:    lw a2, 20(sp)
-; RV32-NEXT:    lw t0, 16(sp)
+; RV32-NEXT:    lw a3, 16(sp)
 ; RV32-NEXT:    lw a1, 12(sp)
 ; RV32-NEXT:    lw a0, 8(sp)
-; RV32-NEXT:    lui a7, 524288
-; RV32-NEXT:    addi a5, a7, -1
+; RV32-NEXT:    lui a4, 524288
+; RV32-NEXT:    addi a5, a4, -1
 ; RV32-NEXT:    beq a1, a5, .LBB24_2
 ; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    sltu a4, a1, a5
-; RV32-NEXT:    or a3, t0, a2
-; RV32-NEXT:    bnez a3, .LBB24_3
+; RV32-NEXT:    sltu a7, a1, a5
+; RV32-NEXT:    or a6, a3, a2
+; RV32-NEXT:    bnez a6, .LBB24_3
 ; RV32-NEXT:    j .LBB24_4
 ; RV32-NEXT:  .LBB24_2:
-; RV32-NEXT:    addi a4, a0, 1
-; RV32-NEXT:    snez a4, a4
-; RV32-NEXT:    or a3, t0, a2
-; RV32-NEXT:    beqz a3, .LBB24_4
+; RV32-NEXT:    addi a6, a0, 1
+; RV32-NEXT:    snez a7, a6
+; RV32-NEXT:    or a6, a3, a2
+; RV32-NEXT:    beqz a6, .LBB24_4
 ; RV32-NEXT:  .LBB24_3: # %entry
-; RV32-NEXT:    slti a4, a2, 0
+; RV32-NEXT:    slti a7, a2, 0
 ; RV32-NEXT:  .LBB24_4: # %entry
 ; RV32-NEXT:    li a6, -1
-; RV32-NEXT:    beqz a4, .LBB24_7
+; RV32-NEXT:    beqz a7, .LBB24_7
 ; RV32-NEXT:  # %bb.5: # %entry
-; RV32-NEXT:    beq a1, a7, .LBB24_8
+; RV32-NEXT:    beq a1, a4, .LBB24_8
 ; RV32-NEXT:  .LBB24_6: # %entry
-; RV32-NEXT:    sltu a4, a7, a1
-; RV32-NEXT:    and a3, t0, a2
+; RV32-NEXT:    sltu a4, a4, a1
+; RV32-NEXT:    and a3, a3, a2
 ; RV32-NEXT:    bne a3, a6, .LBB24_9
 ; RV32-NEXT:    j .LBB24_10
 ; RV32-NEXT:  .LBB24_7: # %entry
 ; RV32-NEXT:    li a2, 0
-; RV32-NEXT:    li t0, 0
+; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    li a0, -1
 ; RV32-NEXT:    mv a1, a5
-; RV32-NEXT:    bne a1, a7, .LBB24_6
+; RV32-NEXT:    bne a1, a4, .LBB24_6
 ; RV32-NEXT:  .LBB24_8:
 ; RV32-NEXT:    snez a4, a0
-; RV32-NEXT:    and a3, t0, a2
+; RV32-NEXT:    and a3, a3, a2
 ; RV32-NEXT:    beq a3, a6, .LBB24_10
 ; RV32-NEXT:  .LBB24_9: # %entry
 ; RV32-NEXT:    slt a4, a6, a2
@@ -3090,109 +3090,109 @@ define i64 @stest_f64i64_mm(double %x) {
 ; RV32-NEXT:    lw a5, 8(sp)
 ; RV32-NEXT:    lw a3, 20(sp)
 ; RV32-NEXT:    lw a1, 12(sp)
-; RV32-NEXT:    li a6, -1
-; RV32-NEXT:    mv a4, a5
+; RV32-NEXT:    li a2, -1
+; RV32-NEXT:    mv a7, a5
 ; RV32-NEXT:    bltz a3, .LBB45_2
 ; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    li a4, -1
+; RV32-NEXT:    li a7, -1
 ; RV32-NEXT:  .LBB45_2: # %entry
-; RV32-NEXT:    lui a7, 524288
-; RV32-NEXT:    addi a2, a7, -1
-; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    bgeu a1, a2, .LBB45_19
+; RV32-NEXT:    lui a4, 524288
+; RV32-NEXT:    addi a6, a4, -1
+; RV32-NEXT:    mv t0, a5
+; RV32-NEXT:    bgeu a1, a6, .LBB45_19
 ; RV32-NEXT:  # %bb.3: # %entry
-; RV32-NEXT:    lw t0, 16(sp)
-; RV32-NEXT:    bne a1, a2, .LBB45_20
+; RV32-NEXT:    lw a0, 16(sp)
+; RV32-NEXT:    bne a1, a6, .LBB45_20
 ; RV32-NEXT:  .LBB45_4: # %entry
-; RV32-NEXT:    or a0, t0, a3
-; RV32-NEXT:    bnez a0, .LBB45_21
+; RV32-NEXT:    or t0, a0, a3
+; RV32-NEXT:    bnez t0, .LBB45_21
 ; RV32-NEXT:  .LBB45_5: # %entry
-; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    mv a7, a1
 ; RV32-NEXT:    bgez a3, .LBB45_22
 ; RV32-NEXT:  .LBB45_6: # %entry
-; RV32-NEXT:    bgeu a1, a2, .LBB45_23
+; RV32-NEXT:    bgeu a1, a6, .LBB45_23
 ; RV32-NEXT:  .LBB45_7: # %entry
-; RV32-NEXT:    bnez a0, .LBB45_24
+; RV32-NEXT:    bnez t0, .LBB45_24
 ; RV32-NEXT:  .LBB45_8: # %entry
-; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    li a6, 0
 ; RV32-NEXT:    bnez a3, .LBB45_25
 ; RV32-NEXT:  .LBB45_9: # %entry
 ; RV32-NEXT:    bgez a3, .LBB45_26
 ; RV32-NEXT:  .LBB45_10: # %entry
-; RV32-NEXT:    mv a4, a5
-; RV32-NEXT:    bgeu a7, a1, .LBB45_27
+; RV32-NEXT:    mv a7, a5
+; RV32-NEXT:    bgeu a4, a1, .LBB45_27
 ; RV32-NEXT:  .LBB45_11: # %entry
 ; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    bne a1, a7, .LBB45_28
+; RV32-NEXT:    bne a1, a4, .LBB45_28
 ; RV32-NEXT:  .LBB45_12: # %entry
 ; RV32-NEXT:    bltz a3, .LBB45_29
 ; RV32-NEXT:  .LBB45_13: # %entry
-; RV32-NEXT:    and a2, a2, a3
-; RV32-NEXT:    bne a2, a6, .LBB45_30
+; RV32-NEXT:    and a6, a6, a3
+; RV32-NEXT:    bne a6, a2, .LBB45_30
 ; RV32-NEXT:  .LBB45_14: # %entry
-; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    mv a5, a1
 ; RV32-NEXT:    bltz a3, .LBB45_31
 ; RV32-NEXT:  .LBB45_15: # %entry
-; RV32-NEXT:    bgeu a7, a1, .LBB45_32
+; RV32-NEXT:    bgeu a4, a1, .LBB45_32
 ; RV32-NEXT:  .LBB45_16: # %entry
-; RV32-NEXT:    beq a2, a6, .LBB45_18
+; RV32-NEXT:    beq a6, a2, .LBB45_18
 ; RV32-NEXT:  .LBB45_17: # %entry
-; RV32-NEXT:    mv a1, a4
+; RV32-NEXT:    mv a1, a5
 ; RV32-NEXT:  .LBB45_18: # %entry
 ; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ; RV32-NEXT:  .LBB45_19: # %entry
-; RV32-NEXT:    li a0, -1
-; RV32-NEXT:    lw t0, 16(sp)
-; RV32-NEXT:    beq a1, a2, .LBB45_4
+; RV32-NEXT:    li t0, -1
+; RV32-NEXT:    lw a0, 16(sp)
+; RV32-NEXT:    beq a1, a6, .LBB45_4
 ; RV32-NEXT:  .LBB45_20: # %entry
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    or a0, t0, a3
-; RV32-NEXT:    beqz a0, .LBB45_5
+; RV32-NEXT:    mv a5, t0
+; RV32-NEXT:    or t0, a0, a3
+; RV32-NEXT:    beqz t0, .LBB45_5
 ; RV32-NEXT:  .LBB45_21: # %entry
-; RV32-NEXT:    mv a5, a4
-; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    mv a5, a7
+; RV32-NEXT:    mv a7, a1
 ; RV32-NEXT:    bltz a3, .LBB45_6
 ; RV32-NEXT:  .LBB45_22: # %entry
-; RV32-NEXT:    mv a4, a2
-; RV32-NEXT:    bltu a1, a2, .LBB45_7
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    bltu a1, a6, .LBB45_7
 ; RV32-NEXT:  .LBB45_23: # %entry
-; RV32-NEXT:    mv a1, a2
-; RV32-NEXT:    beqz a0, .LBB45_8
+; RV32-NEXT:    mv a1, a6
+; RV32-NEXT:    beqz t0, .LBB45_8
 ; RV32-NEXT:  .LBB45_24: # %entry
-; RV32-NEXT:    mv a1, a4
-; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    mv a1, a7
+; RV32-NEXT:    li a6, 0
 ; RV32-NEXT:    beqz a3, .LBB45_9
 ; RV32-NEXT:  .LBB45_25: # %entry
-; RV32-NEXT:    srai a0, a3, 31
-; RV32-NEXT:    and a2, a0, t0
+; RV32-NEXT:    srai a6, a3, 31
+; RV32-NEXT:    and a6, a6, a0
 ; RV32-NEXT:    bltz a3, .LBB45_10
 ; RV32-NEXT:  .LBB45_26: # %entry
 ; RV32-NEXT:    li a3, 0
-; RV32-NEXT:    mv a4, a5
-; RV32-NEXT:    bltu a7, a1, .LBB45_11
+; RV32-NEXT:    mv a7, a5
+; RV32-NEXT:    bltu a4, a1, .LBB45_11
 ; RV32-NEXT:  .LBB45_27: # %entry
-; RV32-NEXT:    li a4, 0
+; RV32-NEXT:    li a7, 0
 ; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    beq a1, a7, .LBB45_12
+; RV32-NEXT:    beq a1, a4, .LBB45_12
 ; RV32-NEXT:  .LBB45_28: # %entry
-; RV32-NEXT:    mv a0, a4
+; RV32-NEXT:    mv a0, a7
 ; RV32-NEXT:    bgez a3, .LBB45_13
 ; RV32-NEXT:  .LBB45_29: # %entry
 ; RV32-NEXT:    li a5, 0
-; RV32-NEXT:    and a2, a2, a3
-; RV32-NEXT:    beq a2, a6, .LBB45_14
+; RV32-NEXT:    and a6, a6, a3
+; RV32-NEXT:    beq a6, a2, .LBB45_14
 ; RV32-NEXT:  .LBB45_30: # %entry
 ; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    mv a5, a1
 ; RV32-NEXT:    bgez a3, .LBB45_15
 ; RV32-NEXT:  .LBB45_31: # %entry
-; RV32-NEXT:    lui a4, 524288
-; RV32-NEXT:    bltu a7, a1, .LBB45_16
+; RV32-NEXT:    lui a5, 524288
+; RV32-NEXT:    bltu a4, a1, .LBB45_16
 ; RV32-NEXT:  .LBB45_32: # %entry
 ; RV32-NEXT:    lui a1, 524288
-; RV32-NEXT:    bne a2, a6, .LBB45_17
+; RV32-NEXT:    bne a6, a2, .LBB45_17
 ; RV32-NEXT:    j .LBB45_18
 ;
 ; RV64IF-LABEL: stest_f64i64_mm:
@@ -3514,109 +3514,109 @@ define i64 @stest_f32i64_mm(float %x) {
 ; RV32-NEXT:    lw a5, 8(sp)
 ; RV32-NEXT:    lw a3, 20(sp)
 ; RV32-NEXT:    lw a1, 12(sp)
-; RV32-NEXT:    li a6, -1
-; RV32-NEXT:    mv a4, a5
+; RV32-NEXT:    li a2, -1
+; RV32-NEXT:    mv a7, a5
 ; RV32-NEXT:    bltz a3, .LBB48_2
 ; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    li a4, -1
+; RV32-NEXT:    li a7, -1
 ; RV32-NEXT:  .LBB48_2: # %entry
-; RV32-NEXT:    lui a7, 524288
-; RV32-NEXT:    addi a2, a7, -1
-; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    bgeu a1, a2, .LBB48_19
+; RV32-NEXT:    lui a4, 524288
+; RV32-NEXT:    addi a6, a4, -1
+; RV32-NEXT:    mv t0, a5
+; RV32-NEXT:    bgeu a1, a6, .LBB48_19
 ; RV32-NEXT:  # %bb.3: # %entry
-; RV32-NEXT:    lw t0, 16(sp)
-; RV32-NEXT:    bne a1, a2, .LBB48_20
+; RV32-NEXT:    lw a0, 16(sp)
+; RV32-NEXT:    bne a1, a6, .LBB48_20
 ; RV32-NEXT:  .LBB48_4: # %entry
-; RV32-NEXT:    or a0, t0, a3
-; RV32-NEXT:    bnez a0, .LBB48_21
+; RV32-NEXT:    or t0, a0, a3
+; RV32-NEXT:    bnez t0, .LBB48_21
 ; RV32-NEXT:  .LBB48_5: # %entry
-; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    mv a7, a1
 ; RV32-NEXT:    bgez a3, .LBB48_22
 ; RV32-NEXT:  .LBB48_6: # %entry
-; RV32-NEXT:    bgeu a1, a2, .LBB48_23
+; RV32-NEXT:    bgeu a1, a6, .LBB48_23
 ; RV32-NEXT:  .LBB48_7: # %entry
-; RV32-NEXT:    bnez a0, .LBB48_24
+; RV32-NEXT:    bnez t0, .LBB48_24
 ; RV32-NEXT:  .LBB48_8: # %entry
-; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    li a6, 0
 ; RV32-NEXT:    bnez a3, .LBB48_25
 ; RV32-NEXT:  .LBB48_9: # %entry
 ; RV32-NEXT:    bgez a3, .LBB48_26
 ; RV32-NEXT:  .LBB48_10: # %entry
-; RV32-NEXT:    mv a4, a5
-; RV32-NEXT:    bgeu a7, a1, .LBB48_27
+; RV32-NEXT:    mv a7, a5
+; RV32-NEXT:    bgeu a4, a1, .LBB48_27
 ; RV32-NEXT:  .LBB48_11: # %entry
 ; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    bne a1, a7, .LBB48_28
+; RV32-NEXT:    bne a1, a4, .LBB48_28
 ; RV32-NEXT:  .LBB48_12: # %entry
 ; RV32-NEXT:    bltz a3, .LBB48_29
 ; RV32-NEXT:  .LBB48_13: # %entry
-; RV32-NEXT:    and a2, a2, a3
-; RV32-NEXT:    bne a2, a6, .LBB48_30
+; RV32-NEXT:    and a6, a6, a3
+; RV32-NEXT:    bne a6, a2, .LBB48_30
 ; RV32-NEXT:  .LBB48_14: # %entry
-; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    mv a5, a1
 ; RV32-NEXT:    bltz a3, .LBB48_31
 ; RV32-NEXT:  .LBB48_15: # %entry
-; RV32-NEXT:    bgeu a7, a1, .LBB48_32
+; RV32-NEXT:    bgeu a4, a1, .LBB48_32
 ; RV32-NEXT:  .LBB48_16: # %entry
-; RV32-NEXT:    beq a2, a6, .LBB48_18
+; RV32-NEXT:    beq a6, a2, .LBB48_18
 ; RV32-NEXT:  .LBB48_17: # %entry
-; RV32-NEXT:    mv a1, a4
+; RV32-NEXT:    mv a1, a5
 ; RV32-NEXT:  .LBB48_18: # %entry
 ; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ; RV32-NEXT:  .LBB48_19: # %entry
-; RV32-NEXT:    li a0, -1
-; RV32-NEXT:    lw t0, 16(sp)
-; RV32-NEXT:    beq a1, a2, .LBB48_4
+; RV32-NEXT:    li t0, -1
+; RV32-NEXT:    lw a0, 16(sp)
+; RV32-NEXT:    beq a1, a6, .LBB48_4
 ; RV32-NEXT:  .LBB48_20: # %entry
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    or a0, t0, a3
-; RV32-NEXT:    beqz a0, .LBB48_5
+; RV32-NEXT:    mv a5, t0
+; RV32-NEXT:    or t0, a0, a3
+; RV32-NEXT:    beqz t0, .LBB48_5
 ; RV32-NEXT:  .LBB48_21: # %entry
-; RV32-NEXT:    mv a5, a4
-; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    mv a5, a7
+; RV32-NEXT:    mv a7, a1
 ; RV32-NEXT:    bltz a3, .LBB48_6
 ; RV32-NEXT:  .LBB48_22: # %entry
-; RV32-NEXT:    mv a4, a2
-; RV32-NEXT:    bltu a1, a2, .LBB48_7
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    bltu a1, a6, .LBB48_7
 ; RV32-NEXT:  .LBB48_23: # %entry
-; RV32-NEXT:    mv a1, a2
-; RV32-NEXT:    beqz a0, .LBB48_8
+; RV32-NEXT:    mv a1, a6
+; RV32-NEXT:    beqz t0, .LBB48_8
 ; RV32-NEXT:  .LBB48_24: # %entry
-; RV32-NEXT:    mv a1, a4
-; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    mv a1, a7
+; RV32-NEXT:    li a6, 0
 ; RV32-NEXT:    beqz a3, .LBB48_9
 ; RV32-NEXT:  .LBB48_25: # %entry
-; RV32-NEXT:    srai a0, a3, 31
-; RV32-NEXT:    and a2, a0, t0
+; RV32-NEXT:    srai a6, a3, 31
+; RV32-NEXT:    and a6, a6, a0
 ; RV32-NEXT:    bltz a3, .LBB48_10
 ; RV32-NEXT:  .LBB48_26: # %entry
 ; RV32-NEXT:    li a3, 0
-; RV32-NEXT:    mv a4, a5
-; RV32-NEXT:    bltu a7, a1, .LBB48_11
+; RV32-NEXT:    mv a7, a5
+; RV32-NEXT:    bltu a4, a1, .LBB48_11
 ; RV32-NEXT:  .LBB48_27: # %entry
-; RV32-NEXT:    li a4, 0
+; RV32-NEXT:    li a7, 0
 ; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    beq a1, a7, .LBB48_12
+; RV32-NEXT:    beq a1, a4, .LBB48_12
 ; RV32-NEXT:  .LBB48_28: # %entry
-; RV32-NEXT:    mv a0, a4
+; RV32-NEXT:    mv a0, a7
 ; RV32-NEXT:    bgez a3, .LBB48_13
 ; RV32-NEXT:  .LBB48_29: # %entry
 ; RV32-NEXT:    li a5, 0
-; RV32-NEXT:    and a2, a2, a3
-; RV32-NEXT:    beq a2, a6, .LBB48_14
+; RV32-NEXT:    and a6, a6, a3
+; RV32-NEXT:    beq a6, a2, .LBB48_14
 ; RV32-NEXT:  .LBB48_30: # %entry
 ; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    mv a5, a1
 ; RV32-NEXT:    bgez a3, .LBB48_15
 ; RV32-NEXT:  .LBB48_31: # %entry
-; RV32-NEXT:    lui a4, 524288
-; RV32-NEXT:    bltu a7, a1, .LBB48_16
+; RV32-NEXT:    lui a5, 524288
+; RV32-NEXT:    bltu a4, a1, .LBB48_16
 ; RV32-NEXT:  .LBB48_32: # %entry
 ; RV32-NEXT:    lui a1, 524288
-; RV32-NEXT:    bne a2, a6, .LBB48_17
+; RV32-NEXT:    bne a6, a2, .LBB48_17
 ; RV32-NEXT:    j .LBB48_18
 ;
 ; RV64-LABEL: stest_f32i64_mm:
@@ -3886,109 +3886,109 @@ define i64 @stest_f16i64_mm(half %x) {
 ; RV32-NEXT:    lw a5, 8(sp)
 ; RV32-NEXT:    lw a3, 20(sp)
 ; RV32-NEXT:    lw a1, 12(sp)
-; RV32-NEXT:    li a6, -1
-; RV32-NEXT:    mv a4, a5
+; RV32-NEXT:    li a2, -1
+; RV32-NEXT:    mv a7, a5
 ; RV32-NEXT:    bltz a3, .LBB51_2
 ; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    li a4, -1
+; RV32-NEXT:    li a7, -1
 ; RV32-NEXT:  .LBB51_2: # %entry
-; RV32-NEXT:    lui a7, 524288
-; RV32-NEXT:    addi a2, a7, -1
-; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    bgeu a1, a2, .LBB51_19
+; RV32-NEXT:    lui a4, 524288
+; RV32-NEXT:    addi a6, a4, -1
+; RV32-NEXT:    mv t0, a5
+; RV32-NEXT:    bgeu a1, a6, .LBB51_19
 ; RV32-NEXT:  # %bb.3: # %entry
-; RV32-NEXT:    lw t0, 16(sp)
-; RV32-NEXT:    bne a1, a2, .LBB51_20
+; RV32-NEXT:    lw a0, 16(sp)
+; RV32-NEXT:    bne a1, a6, .LBB51_20
 ; RV32-NEXT:  .LBB51_4: # %entry
-; RV32-NEXT:    or a0, t0, a3
-; RV32-NEXT:    bnez a0, .LBB51_21
+; RV32-NEXT:    or t0, a0, a3
+; RV32-NEXT:    bnez t0, .LBB51_21
 ; RV32-NEXT:  .LBB51_5: # %entry
-; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    mv a7, a1
 ; RV32-NEXT:    bgez a3, .LBB51_22
 ; RV32-NEXT:  .LBB51_6: # %entry
-; RV32-NEXT:    bgeu a1, a2, .LBB51_23
+; RV32-NEXT:    bgeu a1, a6, .LBB51_23
 ; RV32-NEXT:  .LBB51_7: # %entry
-; RV32-NEXT:    bnez a0, .LBB51_24
+; RV32-NEXT:    bnez t0, .LBB51_24
 ; RV32-NEXT:  .LBB51_8: # %entry
-; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    li a6, 0
 ; RV32-NEXT:    bnez a3, .LBB51_25
 ; RV32-NEXT:  .LBB51_9: # %entry
 ; RV32-NEXT:    bgez a3, .LBB51_26
 ; RV32-NEXT:  .LBB51_10: # %entry
-; RV32-NEXT:    mv a4, a5
-; RV32-NEXT:    bgeu a7, a1, .LBB51_27
+; RV32-NEXT:    mv a7, a5
+; RV32-NEXT:    bgeu a4, a1, .LBB51_27
 ; RV32-NEXT:  .LBB51_11: # %entry
 ; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    bne a1, a7, .LBB51_28
+; RV32-NEXT:    bne a1, a4, .LBB51_28
 ; RV32-NEXT:  .LBB51_12: # %entry
 ; RV32-NEXT:    bltz a3, .LBB51_29
 ; RV32-NEXT:  .LBB51_13: # %entry
-; RV32-NEXT:    and a2, a2, a3
-; RV32-NEXT:    bne a2, a6, .LBB51_30
+; RV32-NEXT:    and a6, a6, a3
+; RV32-NEXT:    bne a6, a2, .LBB51_30
 ; RV32-NEXT:  .LBB51_14: # %entry
-; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    mv a5, a1
 ; RV32-NEXT:    bltz a3, .LBB51_31
 ; RV32-NEXT:  .LBB51_15: # %entry
-; RV32-NEXT:    bgeu a7, a1, .LBB51_32
+; RV32-NEXT:    bgeu a4, a1, .LBB51_32
 ; RV32-NEXT:  .LBB51_16: # %entry
-; RV32-NEXT:    beq a2, a6, .LBB51_18
+; RV32-NEXT:    beq a6, a2, .LBB51_18
 ; RV32-NEXT:  .LBB51_17: # %entry
-; RV32-NEXT:    mv a1, a4
+; RV32-NEXT:    mv a1, a5
 ; RV32-NEXT:  .LBB51_18: # %entry
 ; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ; RV32-NEXT:  .LBB51_19: # %entry
-; RV32-NEXT:    li a0, -1
-; RV32-NEXT:    lw t0, 16(sp)
-; RV32-NEXT:    beq a1, a2, .LBB51_4
+; RV32-NEXT:    li t0, -1
+; RV32-NEXT:    lw a0, 16(sp)
+; RV32-NEXT:    beq a1, a6, .LBB51_4
 ; RV32-NEXT:  .LBB51_20: # %entry
-; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    or a0, t0, a3
-; RV32-NEXT:    beqz a0, .LBB51_5
+; RV32-NEXT:    mv a5, t0
+; RV32-NEXT:    or t0, a0, a3
+; RV32-NEXT:    beqz t0, .LBB51_5
 ; RV32-NEXT:  .LBB51_21: # %entry
-; RV32-NEXT:    mv a5, a4
-; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    mv a5, a7
+; RV32-NEXT:    mv a7, a1
 ; RV32-NEXT:    bltz a3, .LBB51_6
 ; RV32-NEXT:  .LBB51_22: # %entry
-; RV32-NEXT:    mv a4, a2
-; RV32-NEXT:    bltu a1, a2, .LBB51_7
+; RV32-NEXT:    mv a7, a6
+; RV32-NEXT:    bltu a1, a6, .LBB51_7
 ; RV32-NEXT:  .LBB51_23: # %entry
-; RV32-NEXT:    mv a1, a2
-; RV32-NEXT:    beqz a0, .LBB51_8
+; RV32-NEXT:    mv a1, a6
+; RV32-NEXT:    beqz t0, .LBB51_8
 ; RV32-NEXT:  .LBB51_24: # %entry
-; RV32-NEXT:    mv a1, a4
-; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    mv a1, a7
+; RV32-NEXT:    li a6, 0
 ; RV32-NEXT:    beqz a3, .LBB51_9
 ; RV32-NEXT:  .LBB51_25: # %entry
-; RV32-NEXT:    srai a0, a3, 31
-; RV32-NEXT:    and a2, a0, t0
+; RV32-NEXT:    srai a6, a3, 31
+; RV32-NEXT:    and a6, a6, a0
 ; RV32-NEXT:    bltz a3, .LBB51_10
 ; RV32-NEXT:  .LBB51_26: # %entry
 ; RV32-NEXT:    li a3, 0
-; RV32-NEXT:    mv a4, a5
-; RV32-NEXT:    bltu a7, a1, .LBB51_11
+; RV32-NEXT:    mv a7, a5
+; RV32-NEXT:    bltu a4, a1, .LBB51_11
 ; RV32-NEXT:  .LBB51_27: # %entry
-; RV32-NEXT:    li a4, 0
+; RV32-NEXT:    li a7, 0
 ; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    beq a1, a7, .LBB51_12
+; RV32-NEXT:    beq a1, a4, .LBB51_12
 ; RV32-NEXT:  .LBB51_28: # %entry
-; RV32-NEXT:    mv a0, a4
+; RV32-NEXT:    mv a0, a7
 ; RV32-NEXT:    bgez a3, .LBB51_13
 ; RV32-NEXT:  .LBB51_29: # %entry
 ; RV32-NEXT:    li a5, 0
-; RV32-NEXT:    and a2, a2, a3
-; RV32-NEXT:    beq a2, a6, .LBB51_14
+; RV32-NEXT:    and a6, a6, a3
+; RV32-NEXT:    beq a6, a2, .LBB51_14
 ; RV32-NEXT:  .LBB51_30: # %entry
 ; RV32-NEXT:    mv a0, a5
-; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:    mv a5, a1
 ; RV32-NEXT:    bgez a3, .LBB51_15
 ; RV32-NEXT:  .LBB51_31: # %entry
-; RV32-NEXT:    lui a4, 524288
-; RV32-NEXT:    bltu a7, a1, .LBB51_16
+; RV32-NEXT:    lui a5, 524288
+; RV32-NEXT:    bltu a4, a1, .LBB51_16
 ; RV32-NEXT:  .LBB51_32: # %entry
 ; RV32-NEXT:    lui a1, 524288
-; RV32-NEXT:    bne a2, a6, .LBB51_17
+; RV32-NEXT:    bne a6, a2, .LBB51_17
 ; RV32-NEXT:    j .LBB51_18
 ;
 ; RV64-LABEL: stest_f16i64_mm:
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
index 4cba269baaa51..6a144032e866a 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
@@ -118,56 +118,56 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) {
 ; CHECK-NEXT:    fmv.w.x ft2, a3
 ; CHECK-NEXT:    fmv.w.x ft0, a2
 ; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    lui a6, 524288
-; CHECK-NEXT:    addiw a5, a6, -1
+; CHECK-NEXT:    lui a4, 524288
+; CHECK-NEXT:    addiw a6, a4, -1
 ; CHECK-NEXT:    fcvt.l.s a3, ft2, rtz
-; CHECK-NEXT:    blt a2, a5, .LBB3_2
+; CHECK-NEXT:    blt a2, a6, .LBB3_2
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    mv a2, a6
 ; CHECK-NEXT:  .LBB3_2: # %entry
 ; CHECK-NEXT:    fmv.w.x ft1, a1
 ; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    bge a3, a5, .LBB3_11
+; CHECK-NEXT:    bge a3, a6, .LBB3_11
 ; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    bge a1, a5, .LBB3_12
+; CHECK-NEXT:    fcvt.l.s a5, ft1, rtz
+; CHECK-NEXT:    bge a1, a6, .LBB3_12
 ; CHECK-NEXT:  .LBB3_4: # %entry
-; CHECK-NEXT:    bge a4, a5, .LBB3_13
+; CHECK-NEXT:    bge a5, a6, .LBB3_13
 ; CHECK-NEXT:  .LBB3_5: # %entry
-; CHECK-NEXT:    bge a6, a4, .LBB3_14
+; CHECK-NEXT:    bge a4, a5, .LBB3_14
 ; CHECK-NEXT:  .LBB3_6: # %entry
-; CHECK-NEXT:    bge a6, a1, .LBB3_15
+; CHECK-NEXT:    bge a4, a1, .LBB3_15
 ; CHECK-NEXT:  .LBB3_7: # %entry
-; CHECK-NEXT:    bge a6, a3, .LBB3_16
+; CHECK-NEXT:    bge a4, a3, .LBB3_16
 ; CHECK-NEXT:  .LBB3_8: # %entry
-; CHECK-NEXT:    blt a6, a2, .LBB3_10
+; CHECK-NEXT:    blt a4, a2, .LBB3_10
 ; CHECK-NEXT:  .LBB3_9: # %entry
 ; CHECK-NEXT:    lui a2, 524288
 ; CHECK-NEXT:  .LBB3_10: # %entry
 ; CHECK-NEXT:    sw a2, 12(a0)
 ; CHECK-NEXT:    sw a3, 8(a0)
 ; CHECK-NEXT:    sw a1, 4(a0)
-; CHECK-NEXT:    sw a4, 0(a0)
+; CHECK-NEXT:    sw a5, 0(a0)
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB3_11: # %entry
-; CHECK-NEXT:    mv a3, a5
-; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    blt a1, a5, .LBB3_4
+; CHECK-NEXT:    mv a3, a6
+; CHECK-NEXT:    fcvt.l.s a5, ft1, rtz
+; CHECK-NEXT:    blt a1, a6, .LBB3_4
 ; CHECK-NEXT:  .LBB3_12: # %entry
-; CHECK-NEXT:    mv a1, a5
-; CHECK-NEXT:    blt a4, a5, .LBB3_5
+; CHECK-NEXT:    mv a1, a6
+; CHECK-NEXT:    blt a5, a6, .LBB3_5
 ; CHECK-NEXT:  .LBB3_13: # %entry
-; CHECK-NEXT:    mv a4, a5
-; CHECK-NEXT:    blt a6, a4, .LBB3_6
+; CHECK-NEXT:    mv a5, a6
+; CHECK-NEXT:    blt a4, a5, .LBB3_6
 ; CHECK-NEXT:  .LBB3_14: # %entry
-; CHECK-NEXT:    lui a4, 524288
-; CHECK-NEXT:    blt a6, a1, .LBB3_7
+; CHECK-NEXT:    lui a5, 524288
+; CHECK-NEXT:    blt a4, a1, .LBB3_7
 ; CHECK-NEXT:  .LBB3_15: # %entry
 ; CHECK-NEXT:    lui a1, 524288
-; CHECK-NEXT:    blt a6, a3, .LBB3_8
+; CHECK-NEXT:    blt a4, a3, .LBB3_8
 ; CHECK-NEXT:  .LBB3_16: # %entry
 ; CHECK-NEXT:    lui a3, 524288
-; CHECK-NEXT:    bge a6, a2, .LBB3_9
+; CHECK-NEXT:    bge a4, a2, .LBB3_9
 ; CHECK-NEXT:    j .LBB3_10
 entry:
   %conv = fptosi <4 x float> %x to <4 x i64>
@@ -311,23 +311,23 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s3, -40
 ; CHECK-NEXT:    .cfi_offset s4, -48
 ; CHECK-NEXT:    lhu s2, 24(a1)
-; CHECK-NEXT:    lhu s4, 0(a1)
-; CHECK-NEXT:    lhu s0, 8(a1)
+; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    lhu s3, 8(a1)
 ; CHECK-NEXT:    lhu a1, 16(a1)
-; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    mv a0, s4
+; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    mv a0, s1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s4, a0
-; CHECK-NEXT:    fmv.w.x ft0, s0
+; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    fmv.w.x ft0, s3
 ; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s1
-; CHECK-NEXT:    fcvt.l.s s0, ft0, rtz
+; CHECK-NEXT:    fmv.w.x ft0, s4
+; CHECK-NEXT:    fcvt.l.s s3, ft0, rtz
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    fmv.w.x ft0, a0
@@ -338,10 +338,10 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    mv a0, a4
 ; CHECK-NEXT:  .LBB6_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s4
+; CHECK-NEXT:    fmv.w.x ft0, s1
 ; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    bge s0, a4, .LBB6_11
+; CHECK-NEXT:    bge s3, a4, .LBB6_11
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
 ; CHECK-NEXT:    bge a2, a4, .LBB6_12
@@ -352,16 +352,16 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:  .LBB6_6: # %entry
 ; CHECK-NEXT:    bge a1, a2, .LBB6_15
 ; CHECK-NEXT:  .LBB6_7: # %entry
-; CHECK-NEXT:    bge a1, s0, .LBB6_16
+; CHECK-NEXT:    bge a1, s3, .LBB6_16
 ; CHECK-NEXT:  .LBB6_8: # %entry
 ; CHECK-NEXT:    blt a1, a0, .LBB6_10
 ; CHECK-NEXT:  .LBB6_9: # %entry
 ; CHECK-NEXT:    lui a0, 524288
 ; CHECK-NEXT:  .LBB6_10: # %entry
-; CHECK-NEXT:    sw a0, 12(s3)
-; CHECK-NEXT:    sw s0, 8(s3)
-; CHECK-NEXT:    sw a2, 4(s3)
-; CHECK-NEXT:    sw a3, 0(s3)
+; CHECK-NEXT:    sw a0, 12(s0)
+; CHECK-NEXT:    sw s3, 8(s0)
+; CHECK-NEXT:    sw a2, 4(s0)
+; CHECK-NEXT:    sw a3, 0(s0)
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
@@ -371,7 +371,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB6_11: # %entry
-; CHECK-NEXT:    mv s0, a4
+; CHECK-NEXT:    mv s3, a4
 ; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
 ; CHECK-NEXT:    blt a2, a4, .LBB6_4
 ; CHECK-NEXT:  .LBB6_12: # %entry
@@ -385,9 +385,9 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    blt a1, a2, .LBB6_7
 ; CHECK-NEXT:  .LBB6_15: # %entry
 ; CHECK-NEXT:    lui a2, 524288
-; CHECK-NEXT:    blt a1, s0, .LBB6_8
+; CHECK-NEXT:    blt a1, s3, .LBB6_8
 ; CHECK-NEXT:  .LBB6_16: # %entry
-; CHECK-NEXT:    lui s0, 524288
+; CHECK-NEXT:    lui s3, 524288
 ; CHECK-NEXT:    bge a1, a0, .LBB6_9
 ; CHECK-NEXT:    j .LBB6_10
 entry:
@@ -418,23 +418,23 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s3, -40
 ; CHECK-NEXT:    .cfi_offset s4, -48
 ; CHECK-NEXT:    lhu s2, 0(a1)
-; CHECK-NEXT:    lhu s3, 24(a1)
-; CHECK-NEXT:    lhu s1, 16(a1)
+; CHECK-NEXT:    lhu s1, 24(a1)
+; CHECK-NEXT:    lhu s3, 16(a1)
 ; CHECK-NEXT:    lhu a1, 8(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s4, a0
-; CHECK-NEXT:    mv a0, s1
-; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s1, a0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s3, a0
-; CHECK-NEXT:    fmv.w.x ft0, s1
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __gnu_h2f_ieee@plt
+; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    fmv.w.x ft0, s3
 ; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
 ; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    fcvt.lu.s s1, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.s s3, ft0, rtz
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    fmv.w.x ft0, a0
@@ -445,10 +445,10 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB7_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s3
+; CHECK-NEXT:    fmv.w.x ft0, s1
 ; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    fcvt.lu.s a2, ft1, rtz
-; CHECK-NEXT:    bgeu s1, a1, .LBB7_7
+; CHECK-NEXT:    bgeu s3, a1, .LBB7_7
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    fcvt.lu.s a3, ft0, rtz
 ; CHECK-NEXT:    bgeu a2, a1, .LBB7_8
@@ -459,7 +459,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:  .LBB7_6: # %entry
 ; CHECK-NEXT:    sw a3, 12(s0)
 ; CHECK-NEXT:    sw a2, 8(s0)
-; CHECK-NEXT:    sw s1, 4(s0)
+; CHECK-NEXT:    sw s3, 4(s0)
 ; CHECK-NEXT:    sw a0, 0(s0)
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
@@ -470,7 +470,7 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB7_7: # %entry
-; CHECK-NEXT:    mv s1, a1
+; CHECK-NEXT:    mv s3, a1
 ; CHECK-NEXT:    fcvt.lu.s a3, ft0, rtz
 ; CHECK-NEXT:    bltu a2, a1, .LBB7_4
 ; CHECK-NEXT:  .LBB7_8: # %entry
@@ -503,23 +503,23 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s3, -40
 ; CHECK-NEXT:    .cfi_offset s4, -48
 ; CHECK-NEXT:    lhu s2, 24(a1)
-; CHECK-NEXT:    lhu s4, 0(a1)
-; CHECK-NEXT:    lhu s0, 8(a1)
+; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    lhu s3, 8(a1)
 ; CHECK-NEXT:    lhu a1, 16(a1)
-; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    mv a0, s4
+; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    mv a0, s1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s4, a0
-; CHECK-NEXT:    fmv.w.x ft0, s0
+; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    fmv.w.x ft0, s3
 ; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s1
-; CHECK-NEXT:    fcvt.l.s s0, ft0, rtz
+; CHECK-NEXT:    fmv.w.x ft0, s4
+; CHECK-NEXT:    fcvt.l.s s3, ft0, rtz
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    fmv.w.x ft0, a0
@@ -530,10 +530,10 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    mv a0, a3
 ; CHECK-NEXT:  .LBB8_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s4
+; CHECK-NEXT:    fmv.w.x ft0, s1
 ; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    fcvt.l.s a1, ft1, rtz
-; CHECK-NEXT:    bge s0, a3, .LBB8_11
+; CHECK-NEXT:    bge s3, a3, .LBB8_11
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    fcvt.l.s a2, ft0, rtz
 ; CHECK-NEXT:    bge a1, a3, .LBB8_12
@@ -544,16 +544,16 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:  .LBB8_6: # %entry
 ; CHECK-NEXT:    blez a1, .LBB8_15
 ; CHECK-NEXT:  .LBB8_7: # %entry
-; CHECK-NEXT:    blez s0, .LBB8_16
+; CHECK-NEXT:    blez s3, .LBB8_16
 ; CHECK-NEXT:  .LBB8_8: # %entry
 ; CHECK-NEXT:    bgtz a0, .LBB8_10
 ; CHECK-NEXT:  .LBB8_9: # %entry
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:  .LBB8_10: # %entry
-; CHECK-NEXT:    sw a0, 12(s3)
-; CHECK-NEXT:    sw s0, 8(s3)
-; CHECK-NEXT:    sw a1, 4(s3)
-; CHECK-NEXT:    sw a2, 0(s3)
+; CHECK-NEXT:    sw a0, 12(s0)
+; CHECK-NEXT:    sw s3, 8(s0)
+; CHECK-NEXT:    sw a1, 4(s0)
+; CHECK-NEXT:    sw a2, 0(s0)
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
@@ -563,7 +563,7 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB8_11: # %entry
-; CHECK-NEXT:    mv s0, a3
+; CHECK-NEXT:    mv s3, a3
 ; CHECK-NEXT:    fcvt.l.s a2, ft0, rtz
 ; CHECK-NEXT:    blt a1, a3, .LBB8_4
 ; CHECK-NEXT:  .LBB8_12: # %entry
@@ -577,9 +577,9 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    bgtz a1, .LBB8_7
 ; CHECK-NEXT:  .LBB8_15: # %entry
 ; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bgtz s0, .LBB8_8
+; CHECK-NEXT:    bgtz s3, .LBB8_8
 ; CHECK-NEXT:  .LBB8_16: # %entry
-; CHECK-NEXT:    li s0, 0
+; CHECK-NEXT:    li s3, 0
 ; CHECK-NEXT:    blez a0, .LBB8_9
 ; CHECK-NEXT:    j .LBB8_10
 entry:
@@ -903,7 +903,6 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    sd s6, 32(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s7, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s8, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s9, 8(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
@@ -914,23 +913,19 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s6, -64
 ; CHECK-NEXT:    .cfi_offset s7, -72
 ; CHECK-NEXT:    .cfi_offset s8, -80
-; CHECK-NEXT:    .cfi_offset s9, -88
 ; CHECK-NEXT:    lhu s6, 56(a1)
-; CHECK-NEXT:    lhu s2, 0(a1)
-; CHECK-NEXT:    lhu s3, 8(a1)
-; CHECK-NEXT:    lhu s4, 16(a1)
-; CHECK-NEXT:    lhu s5, 24(a1)
-; CHECK-NEXT:    lhu s1, 32(a1)
-; CHECK-NEXT:    lhu s0, 40(a1)
+; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    lhu s2, 8(a1)
+; CHECK-NEXT:    lhu s3, 16(a1)
+; CHECK-NEXT:    lhu s4, 24(a1)
+; CHECK-NEXT:    lhu s5, 32(a1)
+; CHECK-NEXT:    lhu s7, 40(a1)
 ; CHECK-NEXT:    lhu a1, 48(a1)
-; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s9, a0
-; CHECK-NEXT:    mv a0, s0
-; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    mv a0, s7
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s7, a0
 ; CHECK-NEXT:    mv a0, s5
@@ -945,81 +940,84 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s2, a0
-; CHECK-NEXT:    fmv.w.x ft0, s0
-; CHECK-NEXT:    fsw ft0, 4(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s9
-; CHECK-NEXT:    fcvt.l.s s9, ft0, rtz
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __gnu_h2f_ieee@plt
+; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    fmv.w.x ft0, s7
+; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    fmv.w.x ft0, s8
+; CHECK-NEXT:    fcvt.l.s s7, ft0, rtz
 ; CHECK-NEXT:    mv a0, s6
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    fmv.w.x ft0, a0
 ; CHECK-NEXT:    fcvt.l.s a0, ft0, rtz
 ; CHECK-NEXT:    lui a1, 8
-; CHECK-NEXT:    addiw s1, a1, -1
-; CHECK-NEXT:    blt a0, s1, .LBB15_2
+; CHECK-NEXT:    addiw a7, a1, -1
+; CHECK-NEXT:    blt a0, a7, .LBB15_2
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    mv a0, a7
 ; CHECK-NEXT:  .LBB15_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s7
-; CHECK-NEXT:    flw ft0, 4(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    fmv.w.x ft1, s5
+; CHECK-NEXT:    flw ft0, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    blt s9, s1, .LBB15_4
+; CHECK-NEXT:    blt s7, a7, .LBB15_4
 ; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    mv s9, s1
+; CHECK-NEXT:    mv s7, a7
 ; CHECK-NEXT:  .LBB15_4: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s5
+; CHECK-NEXT:    fmv.w.x ft0, s4
 ; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    blt a1, s1, .LBB15_6
+; CHECK-NEXT:    blt a1, a7, .LBB15_6
 ; CHECK-NEXT:  # %bb.5: # %entry
-; CHECK-NEXT:    mv a1, s1
+; CHECK-NEXT:    mv a1, a7
 ; CHECK-NEXT:  .LBB15_6: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s4
+; CHECK-NEXT:    fmv.w.x ft1, s3
 ; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
-; CHECK-NEXT:    blt a2, s1, .LBB15_8
+; CHECK-NEXT:    blt a2, a7, .LBB15_8
 ; CHECK-NEXT:  # %bb.7: # %entry
-; CHECK-NEXT:    mv a2, s1
+; CHECK-NEXT:    mv a2, a7
 ; CHECK-NEXT:  .LBB15_8: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s3
+; CHECK-NEXT:    fmv.w.x ft0, s2
 ; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    blt a3, s1, .LBB15_10
+; CHECK-NEXT:    blt a3, a7, .LBB15_10
 ; CHECK-NEXT:  # %bb.9: # %entry
-; CHECK-NEXT:    mv a3, s1
+; CHECK-NEXT:    mv a3, a7
 ; CHECK-NEXT:  .LBB15_10: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s2
+; CHECK-NEXT:    fmv.w.x ft1, s1
 ; CHECK-NEXT:    fcvt.l.s a5, ft0, rtz
-; CHECK-NEXT:    bge a4, s1, .LBB15_23
+; CHECK-NEXT:    bge a4, a7, .LBB15_23
 ; CHECK-NEXT:  # %bb.11: # %entry
-; CHECK-NEXT:    fcvt.l.s s0, ft1, rtz
-; CHECK-NEXT:    bge a5, s1, .LBB15_24
+; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
+; CHECK-NEXT:    bge a5, a7, .LBB15_24
 ; CHECK-NEXT:  .LBB15_12: # %entry
-; CHECK-NEXT:    bge s0, s1, .LBB15_25
+; CHECK-NEXT:    bge a6, a7, .LBB15_25
 ; CHECK-NEXT:  .LBB15_13: # %entry
-; CHECK-NEXT:    lui s1, 1048568
-; CHECK-NEXT:    bge s1, s0, .LBB15_26
+; CHECK-NEXT:    lui a7, 1048568
+; CHECK-NEXT:    bge a7, a6, .LBB15_26
 ; CHECK-NEXT:  .LBB15_14: # %entry
-; CHECK-NEXT:    bge s1, a5, .LBB15_27
+; CHECK-NEXT:    bge a7, a5, .LBB15_27
 ; CHECK-NEXT:  .LBB15_15: # %entry
-; CHECK-NEXT:    bge s1, a4, .LBB15_28
+; CHECK-NEXT:    bge a7, a4, .LBB15_28
 ; CHECK-NEXT:  .LBB15_16: # %entry
-; CHECK-NEXT:    bge s1, a3, .LBB15_29
+; CHECK-NEXT:    bge a7, a3, .LBB15_29
 ; CHECK-NEXT:  .LBB15_17: # %entry
-; CHECK-NEXT:    bge s1, a2, .LBB15_30
+; CHECK-NEXT:    bge a7, a2, .LBB15_30
 ; CHECK-NEXT:  .LBB15_18: # %entry
-; CHECK-NEXT:    bge s1, a1, .LBB15_31
+; CHECK-NEXT:    bge a7, a1, .LBB15_31
 ; CHECK-NEXT:  .LBB15_19: # %entry
-; CHECK-NEXT:    bge s1, s9, .LBB15_32
+; CHECK-NEXT:    bge a7, s7, .LBB15_32
 ; CHECK-NEXT:  .LBB15_20: # %entry
-; CHECK-NEXT:    blt s1, a0, .LBB15_22
+; CHECK-NEXT:    blt a7, a0, .LBB15_22
 ; CHECK-NEXT:  .LBB15_21: # %entry
 ; CHECK-NEXT:    lui a0, 1048568
 ; CHECK-NEXT:  .LBB15_22: # %entry
-; CHECK-NEXT:    sh a0, 14(s8)
-; CHECK-NEXT:    sh s9, 12(s8)
-; CHECK-NEXT:    sh a1, 10(s8)
-; CHECK-NEXT:    sh a2, 8(s8)
-; CHECK-NEXT:    sh a3, 6(s8)
-; CHECK-NEXT:    sh a4, 4(s8)
-; CHECK-NEXT:    sh a5, 2(s8)
-; CHECK-NEXT:    sh s0, 0(s8)
+; CHECK-NEXT:    sh a0, 14(s0)
+; CHECK-NEXT:    sh s7, 12(s0)
+; CHECK-NEXT:    sh a1, 10(s0)
+; CHECK-NEXT:    sh a2, 8(s0)
+; CHECK-NEXT:    sh a3, 6(s0)
+; CHECK-NEXT:    sh a4, 4(s0)
+; CHECK-NEXT:    sh a5, 2(s0)
+; CHECK-NEXT:    sh a6, 0(s0)
 ; CHECK-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
@@ -1030,41 +1028,40 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s9, 8(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 96
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB15_23: # %entry
-; CHECK-NEXT:    mv a4, s1
-; CHECK-NEXT:    fcvt.l.s s0, ft1, rtz
-; CHECK-NEXT:    blt a5, s1, .LBB15_12
+; CHECK-NEXT:    mv a4, a7
+; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
+; CHECK-NEXT:    blt a5, a7, .LBB15_12
 ; CHECK-NEXT:  .LBB15_24: # %entry
-; CHECK-NEXT:    mv a5, s1
-; CHECK-NEXT:    blt s0, s1, .LBB15_13
+; CHECK-NEXT:    mv a5, a7
+; CHECK-NEXT:    blt a6, a7, .LBB15_13
 ; CHECK-NEXT:  .LBB15_25: # %entry
-; CHECK-NEXT:    mv s0, s1
-; CHECK-NEXT:    lui s1, 1048568
-; CHECK-NEXT:    blt s1, s0, .LBB15_14
+; CHECK-NEXT:    mv a6, a7
+; CHECK-NEXT:    lui a7, 1048568
+; CHECK-NEXT:    blt a7, a6, .LBB15_14
 ; CHECK-NEXT:  .LBB15_26: # %entry
-; CHECK-NEXT:    lui s0, 1048568
-; CHECK-NEXT:    blt s1, a5, .LBB15_15
+; CHECK-NEXT:    lui a6, 1048568
+; CHECK-NEXT:    blt a7, a5, .LBB15_15
 ; CHECK-NEXT:  .LBB15_27: # %entry
 ; CHECK-NEXT:    lui a5, 1048568
-; CHECK-NEXT:    blt s1, a4, .LBB15_16
+; CHECK-NEXT:    blt a7, a4, .LBB15_16
 ; CHECK-NEXT:  .LBB15_28: # %entry
 ; CHECK-NEXT:    lui a4, 1048568
-; CHECK-NEXT:    blt s1, a3, .LBB15_17
+; CHECK-NEXT:    blt a7, a3, .LBB15_17
 ; CHECK-NEXT:  .LBB15_29: # %entry
 ; CHECK-NEXT:    lui a3, 1048568
-; CHECK-NEXT:    blt s1, a2, .LBB15_18
+; CHECK-NEXT:    blt a7, a2, .LBB15_18
 ; CHECK-NEXT:  .LBB15_30: # %entry
 ; CHECK-NEXT:    lui a2, 1048568
-; CHECK-NEXT:    blt s1, a1, .LBB15_19
+; CHECK-NEXT:    blt a7, a1, .LBB15_19
 ; CHECK-NEXT:  .LBB15_31: # %entry
 ; CHECK-NEXT:    lui a1, 1048568
-; CHECK-NEXT:    blt s1, s9, .LBB15_20
+; CHECK-NEXT:    blt a7, s7, .LBB15_20
 ; CHECK-NEXT:  .LBB15_32: # %entry
-; CHECK-NEXT:    lui s9, 1048568
-; CHECK-NEXT:    bge s1, a0, .LBB15_21
+; CHECK-NEXT:    lui s7, 1048568
+; CHECK-NEXT:    bge a7, a0, .LBB15_21
 ; CHECK-NEXT:    j .LBB15_22
 entry:
   %conv = fptosi <8 x half> %x to <8 x i32>
@@ -1102,20 +1099,17 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s7, -72
 ; CHECK-NEXT:    .cfi_offset s8, -80
 ; CHECK-NEXT:    lhu s6, 0(a1)
-; CHECK-NEXT:    lhu s2, 56(a1)
-; CHECK-NEXT:    lhu s3, 48(a1)
-; CHECK-NEXT:    lhu s4, 40(a1)
-; CHECK-NEXT:    lhu s5, 32(a1)
-; CHECK-NEXT:    lhu s7, 24(a1)
-; CHECK-NEXT:    lhu s1, 16(a1)
+; CHECK-NEXT:    lhu s1, 56(a1)
+; CHECK-NEXT:    lhu s2, 48(a1)
+; CHECK-NEXT:    lhu s3, 40(a1)
+; CHECK-NEXT:    lhu s4, 32(a1)
+; CHECK-NEXT:    lhu s5, 24(a1)
+; CHECK-NEXT:    lhu s7, 16(a1)
 ; CHECK-NEXT:    lhu a1, 8(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s8, a0
-; CHECK-NEXT:    mv a0, s1
-; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s1, a0
 ; CHECK-NEXT:    mv a0, s7
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s7, a0
@@ -1131,64 +1125,67 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s2, a0
-; CHECK-NEXT:    fmv.w.x ft0, s1
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __gnu_h2f_ieee@plt
+; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    fmv.w.x ft0, s7
 ; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
 ; CHECK-NEXT:    fmv.w.x ft0, s8
-; CHECK-NEXT:    fcvt.lu.s s8, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.s s7, ft0, rtz
 ; CHECK-NEXT:    mv a0, s6
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    fcvt.lu.s a6, ft0, rtz
-; CHECK-NEXT:    lui a0, 16
-; CHECK-NEXT:    addiw a1, a0, -1
-; CHECK-NEXT:    bltu a6, a1, .LBB16_2
+; CHECK-NEXT:    fcvt.lu.s a0, ft0, rtz
+; CHECK-NEXT:    lui a1, 16
+; CHECK-NEXT:    addiw a1, a1, -1
+; CHECK-NEXT:    bltu a0, a1, .LBB16_2
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a6, a1
+; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB16_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s7
+; CHECK-NEXT:    fmv.w.x ft1, s5
 ; CHECK-NEXT:    flw ft0, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    fcvt.lu.s a2, ft0, rtz
-; CHECK-NEXT:    bltu s8, a1, .LBB16_4
+; CHECK-NEXT:    bltu s7, a1, .LBB16_4
 ; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    mv s8, a1
+; CHECK-NEXT:    mv s7, a1
 ; CHECK-NEXT:  .LBB16_4: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s5
+; CHECK-NEXT:    fmv.w.x ft0, s4
 ; CHECK-NEXT:    fcvt.lu.s a3, ft1, rtz
 ; CHECK-NEXT:    bltu a2, a1, .LBB16_6
 ; CHECK-NEXT:  # %bb.5: # %entry
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:  .LBB16_6: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s4
+; CHECK-NEXT:    fmv.w.x ft1, s3
 ; CHECK-NEXT:    fcvt.lu.s a4, ft0, rtz
 ; CHECK-NEXT:    bltu a3, a1, .LBB16_8
 ; CHECK-NEXT:  # %bb.7: # %entry
 ; CHECK-NEXT:    mv a3, a1
 ; CHECK-NEXT:  .LBB16_8: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s3
+; CHECK-NEXT:    fmv.w.x ft0, s2
 ; CHECK-NEXT:    fcvt.lu.s a5, ft1, rtz
 ; CHECK-NEXT:    bltu a4, a1, .LBB16_10
 ; CHECK-NEXT:  # %bb.9: # %entry
 ; CHECK-NEXT:    mv a4, a1
 ; CHECK-NEXT:  .LBB16_10: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s2
-; CHECK-NEXT:    fcvt.lu.s s1, ft0, rtz
+; CHECK-NEXT:    fmv.w.x ft1, s1
+; CHECK-NEXT:    fcvt.lu.s a6, ft0, rtz
 ; CHECK-NEXT:    bgeu a5, a1, .LBB16_15
 ; CHECK-NEXT:  # %bb.11: # %entry
-; CHECK-NEXT:    fcvt.lu.s a0, ft1, rtz
-; CHECK-NEXT:    bgeu s1, a1, .LBB16_16
+; CHECK-NEXT:    fcvt.lu.s a7, ft1, rtz
+; CHECK-NEXT:    bgeu a6, a1, .LBB16_16
 ; CHECK-NEXT:  .LBB16_12: # %entry
-; CHECK-NEXT:    bltu a0, a1, .LBB16_14
+; CHECK-NEXT:    bltu a7, a1, .LBB16_14
 ; CHECK-NEXT:  .LBB16_13: # %entry
-; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    mv a7, a1
 ; CHECK-NEXT:  .LBB16_14: # %entry
-; CHECK-NEXT:    sh a0, 14(s0)
-; CHECK-NEXT:    sh s1, 12(s0)
+; CHECK-NEXT:    sh a7, 14(s0)
+; CHECK-NEXT:    sh a6, 12(s0)
 ; CHECK-NEXT:    sh a5, 10(s0)
 ; CHECK-NEXT:    sh a4, 8(s0)
 ; CHECK-NEXT:    sh a3, 6(s0)
 ; CHECK-NEXT:    sh a2, 4(s0)
-; CHECK-NEXT:    sh s8, 2(s0)
-; CHECK-NEXT:    sh a6, 0(s0)
+; CHECK-NEXT:    sh s7, 2(s0)
+; CHECK-NEXT:    sh a0, 0(s0)
 ; CHECK-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
@@ -1203,11 +1200,11 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB16_15: # %entry
 ; CHECK-NEXT:    mv a5, a1
-; CHECK-NEXT:    fcvt.lu.s a0, ft1, rtz
-; CHECK-NEXT:    bltu s1, a1, .LBB16_12
+; CHECK-NEXT:    fcvt.lu.s a7, ft1, rtz
+; CHECK-NEXT:    bltu a6, a1, .LBB16_12
 ; CHECK-NEXT:  .LBB16_16: # %entry
-; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    bgeu a0, a1, .LBB16_13
+; CHECK-NEXT:    mv a6, a1
+; CHECK-NEXT:    bgeu a7, a1, .LBB16_13
 ; CHECK-NEXT:    j .LBB16_14
 entry:
   %conv = fptoui <8 x half> %x to <8 x i32>
@@ -1232,7 +1229,6 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    sd s6, 32(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s7, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s8, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s9, 8(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
@@ -1243,23 +1239,19 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s6, -64
 ; CHECK-NEXT:    .cfi_offset s7, -72
 ; CHECK-NEXT:    .cfi_offset s8, -80
-; CHECK-NEXT:    .cfi_offset s9, -88
 ; CHECK-NEXT:    lhu s6, 56(a1)
-; CHECK-NEXT:    lhu s2, 0(a1)
-; CHECK-NEXT:    lhu s3, 8(a1)
-; CHECK-NEXT:    lhu s4, 16(a1)
-; CHECK-NEXT:    lhu s5, 24(a1)
-; CHECK-NEXT:    lhu s1, 32(a1)
-; CHECK-NEXT:    lhu s0, 40(a1)
+; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    lhu s2, 8(a1)
+; CHECK-NEXT:    lhu s3, 16(a1)
+; CHECK-NEXT:    lhu s4, 24(a1)
+; CHECK-NEXT:    lhu s5, 32(a1)
+; CHECK-NEXT:    lhu s7, 40(a1)
 ; CHECK-NEXT:    lhu a1, 48(a1)
-; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s9, a0
-; CHECK-NEXT:    mv a0, s0
-; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    mv a0, s7
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s7, a0
 ; CHECK-NEXT:    mv a0, s5
@@ -1274,55 +1266,58 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s2, a0
-; CHECK-NEXT:    fmv.w.x ft0, s0
-; CHECK-NEXT:    fsw ft0, 4(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s9
-; CHECK-NEXT:    fcvt.l.s s9, ft0, rtz
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __gnu_h2f_ieee@plt
+; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    fmv.w.x ft0, s7
+; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    fmv.w.x ft0, s8
+; CHECK-NEXT:    fcvt.l.s s7, ft0, rtz
 ; CHECK-NEXT:    mv a0, s6
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    fmv.w.x ft0, a0
 ; CHECK-NEXT:    fcvt.l.s a0, ft0, rtz
 ; CHECK-NEXT:    lui a1, 16
-; CHECK-NEXT:    addiw s1, a1, -1
-; CHECK-NEXT:    blt a0, s1, .LBB17_2
+; CHECK-NEXT:    addiw a7, a1, -1
+; CHECK-NEXT:    blt a0, a7, .LBB17_2
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    mv a0, a7
 ; CHECK-NEXT:  .LBB17_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s7
-; CHECK-NEXT:    flw ft0, 4(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    fmv.w.x ft1, s5
+; CHECK-NEXT:    flw ft0, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    blt s9, s1, .LBB17_4
+; CHECK-NEXT:    blt s7, a7, .LBB17_4
 ; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    mv s9, s1
+; CHECK-NEXT:    mv s7, a7
 ; CHECK-NEXT:  .LBB17_4: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s5
+; CHECK-NEXT:    fmv.w.x ft0, s4
 ; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    blt a1, s1, .LBB17_6
+; CHECK-NEXT:    blt a1, a7, .LBB17_6
 ; CHECK-NEXT:  # %bb.5: # %entry
-; CHECK-NEXT:    mv a1, s1
+; CHECK-NEXT:    mv a1, a7
 ; CHECK-NEXT:  .LBB17_6: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s4
+; CHECK-NEXT:    fmv.w.x ft1, s3
 ; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
-; CHECK-NEXT:    blt a2, s1, .LBB17_8
+; CHECK-NEXT:    blt a2, a7, .LBB17_8
 ; CHECK-NEXT:  # %bb.7: # %entry
-; CHECK-NEXT:    mv a2, s1
+; CHECK-NEXT:    mv a2, a7
 ; CHECK-NEXT:  .LBB17_8: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s3
+; CHECK-NEXT:    fmv.w.x ft0, s2
 ; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    blt a3, s1, .LBB17_10
+; CHECK-NEXT:    blt a3, a7, .LBB17_10
 ; CHECK-NEXT:  # %bb.9: # %entry
-; CHECK-NEXT:    mv a3, s1
+; CHECK-NEXT:    mv a3, a7
 ; CHECK-NEXT:  .LBB17_10: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s2
+; CHECK-NEXT:    fmv.w.x ft1, s1
 ; CHECK-NEXT:    fcvt.l.s a5, ft0, rtz
-; CHECK-NEXT:    bge a4, s1, .LBB17_23
+; CHECK-NEXT:    bge a4, a7, .LBB17_23
 ; CHECK-NEXT:  # %bb.11: # %entry
-; CHECK-NEXT:    fcvt.l.s s0, ft1, rtz
-; CHECK-NEXT:    bge a5, s1, .LBB17_24
+; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
+; CHECK-NEXT:    bge a5, a7, .LBB17_24
 ; CHECK-NEXT:  .LBB17_12: # %entry
-; CHECK-NEXT:    bge s0, s1, .LBB17_25
+; CHECK-NEXT:    bge a6, a7, .LBB17_25
 ; CHECK-NEXT:  .LBB17_13: # %entry
-; CHECK-NEXT:    blez s0, .LBB17_26
+; CHECK-NEXT:    blez a6, .LBB17_26
 ; CHECK-NEXT:  .LBB17_14: # %entry
 ; CHECK-NEXT:    blez a5, .LBB17_27
 ; CHECK-NEXT:  .LBB17_15: # %entry
@@ -1334,20 +1329,20 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:  .LBB17_18: # %entry
 ; CHECK-NEXT:    blez a1, .LBB17_31
 ; CHECK-NEXT:  .LBB17_19: # %entry
-; CHECK-NEXT:    blez s9, .LBB17_32
+; CHECK-NEXT:    blez s7, .LBB17_32
 ; CHECK-NEXT:  .LBB17_20: # %entry
 ; CHECK-NEXT:    bgtz a0, .LBB17_22
 ; CHECK-NEXT:  .LBB17_21: # %entry
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:  .LBB17_22: # %entry
-; CHECK-NEXT:    sh a0, 14(s8)
-; CHECK-NEXT:    sh s9, 12(s8)
-; CHECK-NEXT:    sh a1, 10(s8)
-; CHECK-NEXT:    sh a2, 8(s8)
-; CHECK-NEXT:    sh a3, 6(s8)
-; CHECK-NEXT:    sh a4, 4(s8)
-; CHECK-NEXT:    sh a5, 2(s8)
-; CHECK-NEXT:    sh s0, 0(s8)
+; CHECK-NEXT:    sh a0, 14(s0)
+; CHECK-NEXT:    sh s7, 12(s0)
+; CHECK-NEXT:    sh a1, 10(s0)
+; CHECK-NEXT:    sh a2, 8(s0)
+; CHECK-NEXT:    sh a3, 6(s0)
+; CHECK-NEXT:    sh a4, 4(s0)
+; CHECK-NEXT:    sh a5, 2(s0)
+; CHECK-NEXT:    sh a6, 0(s0)
 ; CHECK-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
@@ -1358,21 +1353,20 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s9, 8(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 96
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB17_23: # %entry
-; CHECK-NEXT:    mv a4, s1
-; CHECK-NEXT:    fcvt.l.s s0, ft1, rtz
-; CHECK-NEXT:    blt a5, s1, .LBB17_12
+; CHECK-NEXT:    mv a4, a7
+; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
+; CHECK-NEXT:    blt a5, a7, .LBB17_12
 ; CHECK-NEXT:  .LBB17_24: # %entry
-; CHECK-NEXT:    mv a5, s1
-; CHECK-NEXT:    blt s0, s1, .LBB17_13
+; CHECK-NEXT:    mv a5, a7
+; CHECK-NEXT:    blt a6, a7, .LBB17_13
 ; CHECK-NEXT:  .LBB17_25: # %entry
-; CHECK-NEXT:    mv s0, s1
-; CHECK-NEXT:    bgtz s0, .LBB17_14
+; CHECK-NEXT:    mv a6, a7
+; CHECK-NEXT:    bgtz a6, .LBB17_14
 ; CHECK-NEXT:  .LBB17_26: # %entry
-; CHECK-NEXT:    li s0, 0
+; CHECK-NEXT:    li a6, 0
 ; CHECK-NEXT:    bgtz a5, .LBB17_15
 ; CHECK-NEXT:  .LBB17_27: # %entry
 ; CHECK-NEXT:    li a5, 0
@@ -1388,9 +1382,9 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) {
 ; CHECK-NEXT:    bgtz a1, .LBB17_19
 ; CHECK-NEXT:  .LBB17_31: # %entry
 ; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bgtz s9, .LBB17_20
+; CHECK-NEXT:    bgtz s7, .LBB17_20
 ; CHECK-NEXT:  .LBB17_32: # %entry
-; CHECK-NEXT:    li s9, 0
+; CHECK-NEXT:    li s7, 0
 ; CHECK-NEXT:    blez a0, .LBB17_21
 ; CHECK-NEXT:    j .LBB17_22
 entry:
@@ -1418,11 +1412,11 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s0, a1
+; CHECK-NEXT:    mv s2, a1
 ; CHECK-NEXT:    call __fixdfti@plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __fixdfti@plt
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    li a0, -1
@@ -1432,7 +1426,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    slti a4, a1, 0
 ; CHECK-NEXT:    bnez s1, .LBB18_4
 ; CHECK-NEXT:  .LBB18_2:
-; CHECK-NEXT:    sltu a5, s2, a3
+; CHECK-NEXT:    sltu a5, s0, a3
 ; CHECK-NEXT:    beqz a5, .LBB18_5
 ; CHECK-NEXT:    j .LBB18_6
 ; CHECK-NEXT:  .LBB18_3:
@@ -1443,7 +1437,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    bnez a5, .LBB18_6
 ; CHECK-NEXT:  .LBB18_5: # %entry
 ; CHECK-NEXT:    li s1, 0
-; CHECK-NEXT:    mv s2, a3
+; CHECK-NEXT:    mv s0, a3
 ; CHECK-NEXT:  .LBB18_6: # %entry
 ; CHECK-NEXT:    beqz a4, .LBB18_10
 ; CHECK-NEXT:  # %bb.7: # %entry
@@ -1453,7 +1447,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    slt a1, a0, a1
 ; CHECK-NEXT:    bne s1, a0, .LBB18_12
 ; CHECK-NEXT:  .LBB18_9:
-; CHECK-NEXT:    sltu a0, a3, s2
+; CHECK-NEXT:    sltu a0, a3, s0
 ; CHECK-NEXT:    beqz a0, .LBB18_13
 ; CHECK-NEXT:    j .LBB18_14
 ; CHECK-NEXT:  .LBB18_10: # %entry
@@ -1468,13 +1462,13 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    slt a0, a0, s1
 ; CHECK-NEXT:    bnez a0, .LBB18_14
 ; CHECK-NEXT:  .LBB18_13: # %entry
-; CHECK-NEXT:    mv s2, a3
+; CHECK-NEXT:    mv s0, a3
 ; CHECK-NEXT:  .LBB18_14: # %entry
 ; CHECK-NEXT:    bnez a1, .LBB18_16
 ; CHECK-NEXT:  # %bb.15: # %entry
 ; CHECK-NEXT:    mv a2, a3
 ; CHECK-NEXT:  .LBB18_16: # %entry
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    mv a0, s0
 ; CHECK-NEXT:    mv a1, a2
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -1505,18 +1499,18 @@ define <2 x i64> @utest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    mv s2, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __fixunsdfti@plt
 ; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    mv s2, a1
-; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    mv s1, a1
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __fixunsdfti@plt
 ; CHECK-NEXT:    beqz a1, .LBB19_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:  .LBB19_2: # %entry
-; CHECK-NEXT:    beqz s2, .LBB19_4
+; CHECK-NEXT:    beqz s1, .LBB19_4
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    li s0, 0
 ; CHECK-NEXT:  .LBB19_4: # %entry
@@ -1548,12 +1542,12 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s0, a0
+; CHECK-NEXT:    mv s2, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __fixdfti@plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __fixdfti@plt
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:    bgtz a1, .LBB20_7
@@ -1584,10 +1578,10 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:    blez s1, .LBB20_4
 ; CHECK-NEXT:  .LBB20_10: # %entry
-; CHECK-NEXT:    li s2, 0
+; CHECK-NEXT:    li s0, 0
 ; CHECK-NEXT:    bnez a3, .LBB20_5
 ; CHECK-NEXT:  .LBB20_11:
-; CHECK-NEXT:    snez a1, s2
+; CHECK-NEXT:    snez a1, s0
 ; CHECK-NEXT:    beqz a2, .LBB20_6
 ; CHECK-NEXT:  .LBB20_12: # %entry
 ; CHECK-NEXT:    sgtz a2, a2
@@ -1597,9 +1591,9 @@ define <2 x i64> @ustest_f64i64(<2 x double> %x) {
 ; CHECK-NEXT:  .LBB20_14: # %entry
 ; CHECK-NEXT:    bnez a1, .LBB20_16
 ; CHECK-NEXT:  # %bb.15: # %entry
-; CHECK-NEXT:    li s2, 0
+; CHECK-NEXT:    li s0, 0
 ; CHECK-NEXT:  .LBB20_16: # %entry
-; CHECK-NEXT:    mv a1, s2
+; CHECK-NEXT:    mv a1, s0
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
@@ -1629,11 +1623,11 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s0, a1
+; CHECK-NEXT:    mv s2, a1
 ; CHECK-NEXT:    call __fixsfti@plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __fixsfti@plt
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    li a0, -1
@@ -1643,7 +1637,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    slti a4, a1, 0
 ; CHECK-NEXT:    bnez s1, .LBB21_4
 ; CHECK-NEXT:  .LBB21_2:
-; CHECK-NEXT:    sltu a5, s2, a3
+; CHECK-NEXT:    sltu a5, s0, a3
 ; CHECK-NEXT:    beqz a5, .LBB21_5
 ; CHECK-NEXT:    j .LBB21_6
 ; CHECK-NEXT:  .LBB21_3:
@@ -1654,7 +1648,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    bnez a5, .LBB21_6
 ; CHECK-NEXT:  .LBB21_5: # %entry
 ; CHECK-NEXT:    li s1, 0
-; CHECK-NEXT:    mv s2, a3
+; CHECK-NEXT:    mv s0, a3
 ; CHECK-NEXT:  .LBB21_6: # %entry
 ; CHECK-NEXT:    beqz a4, .LBB21_10
 ; CHECK-NEXT:  # %bb.7: # %entry
@@ -1664,7 +1658,7 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    slt a1, a0, a1
 ; CHECK-NEXT:    bne s1, a0, .LBB21_12
 ; CHECK-NEXT:  .LBB21_9:
-; CHECK-NEXT:    sltu a0, a3, s2
+; CHECK-NEXT:    sltu a0, a3, s0
 ; CHECK-NEXT:    beqz a0, .LBB21_13
 ; CHECK-NEXT:    j .LBB21_14
 ; CHECK-NEXT:  .LBB21_10: # %entry
@@ -1679,13 +1673,13 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    slt a0, a0, s1
 ; CHECK-NEXT:    bnez a0, .LBB21_14
 ; CHECK-NEXT:  .LBB21_13: # %entry
-; CHECK-NEXT:    mv s2, a3
+; CHECK-NEXT:    mv s0, a3
 ; CHECK-NEXT:  .LBB21_14: # %entry
 ; CHECK-NEXT:    bnez a1, .LBB21_16
 ; CHECK-NEXT:  # %bb.15: # %entry
 ; CHECK-NEXT:    mv a2, a3
 ; CHECK-NEXT:  .LBB21_16: # %entry
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    mv a0, s0
 ; CHECK-NEXT:    mv a1, a2
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -1716,18 +1710,18 @@ define <2 x i64> @utest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    mv s2, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __fixunssfti@plt
 ; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    mv s2, a1
-; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    mv s1, a1
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __fixunssfti@plt
 ; CHECK-NEXT:    beqz a1, .LBB22_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:  .LBB22_2: # %entry
-; CHECK-NEXT:    beqz s2, .LBB22_4
+; CHECK-NEXT:    beqz s1, .LBB22_4
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    li s0, 0
 ; CHECK-NEXT:  .LBB22_4: # %entry
@@ -1759,12 +1753,12 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s0, a0
+; CHECK-NEXT:    mv s2, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __fixsfti@plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __fixsfti@plt
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:    bgtz a1, .LBB23_7
@@ -1795,10 +1789,10 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:    blez s1, .LBB23_4
 ; CHECK-NEXT:  .LBB23_10: # %entry
-; CHECK-NEXT:    li s2, 0
+; CHECK-NEXT:    li s0, 0
 ; CHECK-NEXT:    bnez a3, .LBB23_5
 ; CHECK-NEXT:  .LBB23_11:
-; CHECK-NEXT:    snez a1, s2
+; CHECK-NEXT:    snez a1, s0
 ; CHECK-NEXT:    beqz a2, .LBB23_6
 ; CHECK-NEXT:  .LBB23_12: # %entry
 ; CHECK-NEXT:    sgtz a2, a2
@@ -1808,9 +1802,9 @@ define <2 x i64> @ustest_f32i64(<2 x float> %x) {
 ; CHECK-NEXT:  .LBB23_14: # %entry
 ; CHECK-NEXT:    bnez a1, .LBB23_16
 ; CHECK-NEXT:  # %bb.15: # %entry
-; CHECK-NEXT:    li s2, 0
+; CHECK-NEXT:    li s0, 0
 ; CHECK-NEXT:  .LBB23_16: # %entry
-; CHECK-NEXT:    mv a1, s2
+; CHECK-NEXT:    mv a1, s0
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
@@ -1840,12 +1834,12 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s0, a1
+; CHECK-NEXT:    mv s2, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    call __fixsfti@plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    call __fixsfti@plt
 ; CHECK-NEXT:    mv a2, a0
@@ -1856,7 +1850,7 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    slti a4, a1, 0
 ; CHECK-NEXT:    bnez s1, .LBB24_4
 ; CHECK-NEXT:  .LBB24_2:
-; CHECK-NEXT:    sltu a5, s2, a3
+; CHECK-NEXT:    sltu a5, s0, a3
 ; CHECK-NEXT:    beqz a5, .LBB24_5
 ; CHECK-NEXT:    j .LBB24_6
 ; CHECK-NEXT:  .LBB24_3:
@@ -1867,7 +1861,7 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    bnez a5, .LBB24_6
 ; CHECK-NEXT:  .LBB24_5: # %entry
 ; CHECK-NEXT:    li s1, 0
-; CHECK-NEXT:    mv s2, a3
+; CHECK-NEXT:    mv s0, a3
 ; CHECK-NEXT:  .LBB24_6: # %entry
 ; CHECK-NEXT:    beqz a4, .LBB24_10
 ; CHECK-NEXT:  # %bb.7: # %entry
@@ -1877,7 +1871,7 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    slt a1, a0, a1
 ; CHECK-NEXT:    bne s1, a0, .LBB24_12
 ; CHECK-NEXT:  .LBB24_9:
-; CHECK-NEXT:    sltu a0, a3, s2
+; CHECK-NEXT:    sltu a0, a3, s0
 ; CHECK-NEXT:    beqz a0, .LBB24_13
 ; CHECK-NEXT:    j .LBB24_14
 ; CHECK-NEXT:  .LBB24_10: # %entry
@@ -1892,13 +1886,13 @@ define <2 x i64> @stest_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    slt a0, a0, s1
 ; CHECK-NEXT:    bnez a0, .LBB24_14
 ; CHECK-NEXT:  .LBB24_13: # %entry
-; CHECK-NEXT:    mv s2, a3
+; CHECK-NEXT:    mv s0, a3
 ; CHECK-NEXT:  .LBB24_14: # %entry
 ; CHECK-NEXT:    bnez a1, .LBB24_16
 ; CHECK-NEXT:  # %bb.15: # %entry
 ; CHECK-NEXT:    mv a2, a3
 ; CHECK-NEXT:  .LBB24_16: # %entry
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    mv a0, s0
 ; CHECK-NEXT:    mv a1, a2
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -1929,20 +1923,20 @@ define <2 x i64> @utesth_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    mv s2, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    call __fixunssfti@plt
 ; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    mv s2, a1
-; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    mv s1, a1
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    call __fixunssfti@plt
 ; CHECK-NEXT:    beqz a1, .LBB25_2
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:  .LBB25_2: # %entry
-; CHECK-NEXT:    beqz s2, .LBB25_4
+; CHECK-NEXT:    beqz s1, .LBB25_4
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    li s0, 0
 ; CHECK-NEXT:  .LBB25_4: # %entry
@@ -1974,13 +1968,13 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s0, a0
+; CHECK-NEXT:    mv s2, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    call __fixsfti@plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    call __fixsfti@plt
 ; CHECK-NEXT:    mv a2, a1
@@ -2012,10 +2006,10 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:    blez s1, .LBB26_4
 ; CHECK-NEXT:  .LBB26_10: # %entry
-; CHECK-NEXT:    li s2, 0
+; CHECK-NEXT:    li s0, 0
 ; CHECK-NEXT:    bnez a3, .LBB26_5
 ; CHECK-NEXT:  .LBB26_11:
-; CHECK-NEXT:    snez a1, s2
+; CHECK-NEXT:    snez a1, s0
 ; CHECK-NEXT:    beqz a2, .LBB26_6
 ; CHECK-NEXT:  .LBB26_12: # %entry
 ; CHECK-NEXT:    sgtz a2, a2
@@ -2025,9 +2019,9 @@ define <2 x i64> @ustest_f16i64(<2 x half> %x) {
 ; CHECK-NEXT:  .LBB26_14: # %entry
 ; CHECK-NEXT:    bnez a1, .LBB26_16
 ; CHECK-NEXT:  # %bb.15: # %entry
-; CHECK-NEXT:    li s2, 0
+; CHECK-NEXT:    li s0, 0
 ; CHECK-NEXT:  .LBB26_16: # %entry
-; CHECK-NEXT:    mv a1, s2
+; CHECK-NEXT:    mv a1, s0
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
@@ -2158,56 +2152,56 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) {
 ; CHECK-NEXT:    fmv.w.x ft2, a3
 ; CHECK-NEXT:    fmv.w.x ft0, a2
 ; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    lui a6, 524288
-; CHECK-NEXT:    addiw a5, a6, -1
+; CHECK-NEXT:    lui a4, 524288
+; CHECK-NEXT:    addiw a6, a4, -1
 ; CHECK-NEXT:    fcvt.l.s a3, ft2, rtz
-; CHECK-NEXT:    blt a2, a5, .LBB30_2
+; CHECK-NEXT:    blt a2, a6, .LBB30_2
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a2, a5
+; CHECK-NEXT:    mv a2, a6
 ; CHECK-NEXT:  .LBB30_2: # %entry
 ; CHECK-NEXT:    fmv.w.x ft1, a1
 ; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    bge a3, a5, .LBB30_11
+; CHECK-NEXT:    bge a3, a6, .LBB30_11
 ; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    bge a1, a5, .LBB30_12
+; CHECK-NEXT:    fcvt.l.s a5, ft1, rtz
+; CHECK-NEXT:    bge a1, a6, .LBB30_12
 ; CHECK-NEXT:  .LBB30_4: # %entry
-; CHECK-NEXT:    bge a4, a5, .LBB30_13
+; CHECK-NEXT:    bge a5, a6, .LBB30_13
 ; CHECK-NEXT:  .LBB30_5: # %entry
-; CHECK-NEXT:    bge a6, a4, .LBB30_14
+; CHECK-NEXT:    bge a4, a5, .LBB30_14
 ; CHECK-NEXT:  .LBB30_6: # %entry
-; CHECK-NEXT:    bge a6, a1, .LBB30_15
+; CHECK-NEXT:    bge a4, a1, .LBB30_15
 ; CHECK-NEXT:  .LBB30_7: # %entry
-; CHECK-NEXT:    bge a6, a3, .LBB30_16
+; CHECK-NEXT:    bge a4, a3, .LBB30_16
 ; CHECK-NEXT:  .LBB30_8: # %entry
-; CHECK-NEXT:    blt a6, a2, .LBB30_10
+; CHECK-NEXT:    blt a4, a2, .LBB30_10
 ; CHECK-NEXT:  .LBB30_9: # %entry
 ; CHECK-NEXT:    lui a2, 524288
 ; CHECK-NEXT:  .LBB30_10: # %entry
 ; CHECK-NEXT:    sw a2, 12(a0)
 ; CHECK-NEXT:    sw a3, 8(a0)
 ; CHECK-NEXT:    sw a1, 4(a0)
-; CHECK-NEXT:    sw a4, 0(a0)
+; CHECK-NEXT:    sw a5, 0(a0)
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB30_11: # %entry
-; CHECK-NEXT:    mv a3, a5
-; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    blt a1, a5, .LBB30_4
+; CHECK-NEXT:    mv a3, a6
+; CHECK-NEXT:    fcvt.l.s a5, ft1, rtz
+; CHECK-NEXT:    blt a1, a6, .LBB30_4
 ; CHECK-NEXT:  .LBB30_12: # %entry
-; CHECK-NEXT:    mv a1, a5
-; CHECK-NEXT:    blt a4, a5, .LBB30_5
+; CHECK-NEXT:    mv a1, a6
+; CHECK-NEXT:    blt a5, a6, .LBB30_5
 ; CHECK-NEXT:  .LBB30_13: # %entry
-; CHECK-NEXT:    mv a4, a5
-; CHECK-NEXT:    blt a6, a4, .LBB30_6
+; CHECK-NEXT:    mv a5, a6
+; CHECK-NEXT:    blt a4, a5, .LBB30_6
 ; CHECK-NEXT:  .LBB30_14: # %entry
-; CHECK-NEXT:    lui a4, 524288
-; CHECK-NEXT:    blt a6, a1, .LBB30_7
+; CHECK-NEXT:    lui a5, 524288
+; CHECK-NEXT:    blt a4, a1, .LBB30_7
 ; CHECK-NEXT:  .LBB30_15: # %entry
 ; CHECK-NEXT:    lui a1, 524288
-; CHECK-NEXT:    blt a6, a3, .LBB30_8
+; CHECK-NEXT:    blt a4, a3, .LBB30_8
 ; CHECK-NEXT:  .LBB30_16: # %entry
 ; CHECK-NEXT:    lui a3, 524288
-; CHECK-NEXT:    bge a6, a2, .LBB30_9
+; CHECK-NEXT:    bge a4, a2, .LBB30_9
 ; CHECK-NEXT:    j .LBB30_10
 entry:
   %conv = fptosi <4 x float> %x to <4 x i64>
@@ -2346,23 +2340,23 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s3, -40
 ; CHECK-NEXT:    .cfi_offset s4, -48
 ; CHECK-NEXT:    lhu s2, 24(a1)
-; CHECK-NEXT:    lhu s4, 0(a1)
-; CHECK-NEXT:    lhu s0, 8(a1)
+; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    lhu s3, 8(a1)
 ; CHECK-NEXT:    lhu a1, 16(a1)
-; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    mv a0, s4
+; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    mv a0, s1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s4, a0
-; CHECK-NEXT:    fmv.w.x ft0, s0
+; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    fmv.w.x ft0, s3
 ; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s1
-; CHECK-NEXT:    fcvt.l.s s0, ft0, rtz
+; CHECK-NEXT:    fmv.w.x ft0, s4
+; CHECK-NEXT:    fcvt.l.s s3, ft0, rtz
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    fmv.w.x ft0, a0
@@ -2373,10 +2367,10 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    mv a0, a4
 ; CHECK-NEXT:  .LBB33_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s4
+; CHECK-NEXT:    fmv.w.x ft0, s1
 ; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    bge s0, a4, .LBB33_11
+; CHECK-NEXT:    bge s3, a4, .LBB33_11
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
 ; CHECK-NEXT:    bge a2, a4, .LBB33_12
@@ -2387,16 +2381,16 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:  .LBB33_6: # %entry
 ; CHECK-NEXT:    bge a1, a2, .LBB33_15
 ; CHECK-NEXT:  .LBB33_7: # %entry
-; CHECK-NEXT:    bge a1, s0, .LBB33_16
+; CHECK-NEXT:    bge a1, s3, .LBB33_16
 ; CHECK-NEXT:  .LBB33_8: # %entry
 ; CHECK-NEXT:    blt a1, a0, .LBB33_10
 ; CHECK-NEXT:  .LBB33_9: # %entry
 ; CHECK-NEXT:    lui a0, 524288
 ; CHECK-NEXT:  .LBB33_10: # %entry
-; CHECK-NEXT:    sw a0, 12(s3)
-; CHECK-NEXT:    sw s0, 8(s3)
-; CHECK-NEXT:    sw a2, 4(s3)
-; CHECK-NEXT:    sw a3, 0(s3)
+; CHECK-NEXT:    sw a0, 12(s0)
+; CHECK-NEXT:    sw s3, 8(s0)
+; CHECK-NEXT:    sw a2, 4(s0)
+; CHECK-NEXT:    sw a3, 0(s0)
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
@@ -2406,7 +2400,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB33_11: # %entry
-; CHECK-NEXT:    mv s0, a4
+; CHECK-NEXT:    mv s3, a4
 ; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
 ; CHECK-NEXT:    blt a2, a4, .LBB33_4
 ; CHECK-NEXT:  .LBB33_12: # %entry
@@ -2420,9 +2414,9 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    blt a1, a2, .LBB33_7
 ; CHECK-NEXT:  .LBB33_15: # %entry
 ; CHECK-NEXT:    lui a2, 524288
-; CHECK-NEXT:    blt a1, s0, .LBB33_8
+; CHECK-NEXT:    blt a1, s3, .LBB33_8
 ; CHECK-NEXT:  .LBB33_16: # %entry
-; CHECK-NEXT:    lui s0, 524288
+; CHECK-NEXT:    lui s3, 524288
 ; CHECK-NEXT:    bge a1, a0, .LBB33_9
 ; CHECK-NEXT:    j .LBB33_10
 entry:
@@ -2451,23 +2445,23 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s3, -40
 ; CHECK-NEXT:    .cfi_offset s4, -48
 ; CHECK-NEXT:    lhu s2, 0(a1)
-; CHECK-NEXT:    lhu s3, 24(a1)
-; CHECK-NEXT:    lhu s1, 16(a1)
+; CHECK-NEXT:    lhu s1, 24(a1)
+; CHECK-NEXT:    lhu s3, 16(a1)
 ; CHECK-NEXT:    lhu a1, 8(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s4, a0
-; CHECK-NEXT:    mv a0, s1
-; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s1, a0
 ; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s3, a0
-; CHECK-NEXT:    fmv.w.x ft0, s1
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __gnu_h2f_ieee@plt
+; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    fmv.w.x ft0, s3
 ; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
 ; CHECK-NEXT:    fmv.w.x ft0, s4
-; CHECK-NEXT:    fcvt.lu.s s1, ft0, rtz
+; CHECK-NEXT:    fcvt.lu.s s3, ft0, rtz
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    fmv.w.x ft0, a0
@@ -2478,10 +2472,10 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB34_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s3
+; CHECK-NEXT:    fmv.w.x ft0, s1
 ; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    fcvt.lu.s a2, ft1, rtz
-; CHECK-NEXT:    bgeu s1, a1, .LBB34_7
+; CHECK-NEXT:    bgeu s3, a1, .LBB34_7
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    fcvt.lu.s a3, ft0, rtz
 ; CHECK-NEXT:    bgeu a2, a1, .LBB34_8
@@ -2492,7 +2486,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:  .LBB34_6: # %entry
 ; CHECK-NEXT:    sw a3, 12(s0)
 ; CHECK-NEXT:    sw a2, 8(s0)
-; CHECK-NEXT:    sw s1, 4(s0)
+; CHECK-NEXT:    sw s3, 4(s0)
 ; CHECK-NEXT:    sw a0, 0(s0)
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
@@ -2503,7 +2497,7 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB34_7: # %entry
-; CHECK-NEXT:    mv s1, a1
+; CHECK-NEXT:    mv s3, a1
 ; CHECK-NEXT:    fcvt.lu.s a3, ft0, rtz
 ; CHECK-NEXT:    bltu a2, a1, .LBB34_4
 ; CHECK-NEXT:  .LBB34_8: # %entry
@@ -2535,23 +2529,23 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s3, -40
 ; CHECK-NEXT:    .cfi_offset s4, -48
 ; CHECK-NEXT:    lhu s2, 24(a1)
-; CHECK-NEXT:    lhu s4, 0(a1)
-; CHECK-NEXT:    lhu s0, 8(a1)
+; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    lhu s3, 8(a1)
 ; CHECK-NEXT:    lhu a1, 16(a1)
-; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s1, a0
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv s4, a0
+; CHECK-NEXT:    mv a0, s3
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    mv a0, s4
+; CHECK-NEXT:    mv s3, a0
+; CHECK-NEXT:    mv a0, s1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s4, a0
-; CHECK-NEXT:    fmv.w.x ft0, s0
+; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    fmv.w.x ft0, s3
 ; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s1
-; CHECK-NEXT:    fcvt.l.s s0, ft0, rtz
+; CHECK-NEXT:    fmv.w.x ft0, s4
+; CHECK-NEXT:    fcvt.l.s s3, ft0, rtz
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    fmv.w.x ft0, a0
@@ -2562,10 +2556,10 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:  # %bb.1: # %entry
 ; CHECK-NEXT:    mv a0, a3
 ; CHECK-NEXT:  .LBB35_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s4
+; CHECK-NEXT:    fmv.w.x ft0, s1
 ; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    fcvt.l.s a1, ft1, rtz
-; CHECK-NEXT:    bge s0, a3, .LBB35_11
+; CHECK-NEXT:    bge s3, a3, .LBB35_11
 ; CHECK-NEXT:  # %bb.3: # %entry
 ; CHECK-NEXT:    fcvt.l.s a2, ft0, rtz
 ; CHECK-NEXT:    bge a1, a3, .LBB35_12
@@ -2576,16 +2570,16 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:  .LBB35_6: # %entry
 ; CHECK-NEXT:    blez a1, .LBB35_15
 ; CHECK-NEXT:  .LBB35_7: # %entry
-; CHECK-NEXT:    blez s0, .LBB35_16
+; CHECK-NEXT:    blez s3, .LBB35_16
 ; CHECK-NEXT:  .LBB35_8: # %entry
 ; CHECK-NEXT:    bgtz a0, .LBB35_10
 ; CHECK-NEXT:  .LBB35_9: # %entry
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:  .LBB35_10: # %entry
-; CHECK-NEXT:    sw a0, 12(s3)
-; CHECK-NEXT:    sw s0, 8(s3)
-; CHECK-NEXT:    sw a1, 4(s3)
-; CHECK-NEXT:    sw a2, 0(s3)
+; CHECK-NEXT:    sw a0, 12(s0)
+; CHECK-NEXT:    sw s3, 8(s0)
+; CHECK-NEXT:    sw a1, 4(s0)
+; CHECK-NEXT:    sw a2, 0(s0)
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
@@ -2595,7 +2589,7 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB35_11: # %entry
-; CHECK-NEXT:    mv s0, a3
+; CHECK-NEXT:    mv s3, a3
 ; CHECK-NEXT:    fcvt.l.s a2, ft0, rtz
 ; CHECK-NEXT:    blt a1, a3, .LBB35_4
 ; CHECK-NEXT:  .LBB35_12: # %entry
@@ -2609,9 +2603,9 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    bgtz a1, .LBB35_7
 ; CHECK-NEXT:  .LBB35_15: # %entry
 ; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bgtz s0, .LBB35_8
+; CHECK-NEXT:    bgtz s3, .LBB35_8
 ; CHECK-NEXT:  .LBB35_16: # %entry
-; CHECK-NEXT:    li s0, 0
+; CHECK-NEXT:    li s3, 0
 ; CHECK-NEXT:    blez a0, .LBB35_9
 ; CHECK-NEXT:    j .LBB35_10
 entry:
@@ -2923,7 +2917,6 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    sd s6, 32(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s7, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s8, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s9, 8(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
@@ -2934,23 +2927,19 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s6, -64
 ; CHECK-NEXT:    .cfi_offset s7, -72
 ; CHECK-NEXT:    .cfi_offset s8, -80
-; CHECK-NEXT:    .cfi_offset s9, -88
 ; CHECK-NEXT:    lhu s6, 56(a1)
-; CHECK-NEXT:    lhu s2, 0(a1)
-; CHECK-NEXT:    lhu s3, 8(a1)
-; CHECK-NEXT:    lhu s4, 16(a1)
-; CHECK-NEXT:    lhu s5, 24(a1)
-; CHECK-NEXT:    lhu s1, 32(a1)
-; CHECK-NEXT:    lhu s0, 40(a1)
+; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    lhu s2, 8(a1)
+; CHECK-NEXT:    lhu s3, 16(a1)
+; CHECK-NEXT:    lhu s4, 24(a1)
+; CHECK-NEXT:    lhu s5, 32(a1)
+; CHECK-NEXT:    lhu s7, 40(a1)
 ; CHECK-NEXT:    lhu a1, 48(a1)
-; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s9, a0
-; CHECK-NEXT:    mv a0, s0
-; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    mv a0, s7
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s7, a0
 ; CHECK-NEXT:    mv a0, s5
@@ -2965,81 +2954,84 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s2, a0
-; CHECK-NEXT:    fmv.w.x ft0, s0
-; CHECK-NEXT:    fsw ft0, 4(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s9
-; CHECK-NEXT:    fcvt.l.s s9, ft0, rtz
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __gnu_h2f_ieee@plt
+; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    fmv.w.x ft0, s7
+; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    fmv.w.x ft0, s8
+; CHECK-NEXT:    fcvt.l.s s7, ft0, rtz
 ; CHECK-NEXT:    mv a0, s6
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    fmv.w.x ft0, a0
 ; CHECK-NEXT:    fcvt.l.s a0, ft0, rtz
 ; CHECK-NEXT:    lui a1, 8
-; CHECK-NEXT:    addiw s1, a1, -1
-; CHECK-NEXT:    blt a0, s1, .LBB42_2
+; CHECK-NEXT:    addiw a7, a1, -1
+; CHECK-NEXT:    blt a0, a7, .LBB42_2
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    mv a0, a7
 ; CHECK-NEXT:  .LBB42_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s7
-; CHECK-NEXT:    flw ft0, 4(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    fmv.w.x ft1, s5
+; CHECK-NEXT:    flw ft0, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    blt s9, s1, .LBB42_4
+; CHECK-NEXT:    blt s7, a7, .LBB42_4
 ; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    mv s9, s1
+; CHECK-NEXT:    mv s7, a7
 ; CHECK-NEXT:  .LBB42_4: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s5
+; CHECK-NEXT:    fmv.w.x ft0, s4
 ; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    blt a1, s1, .LBB42_6
+; CHECK-NEXT:    blt a1, a7, .LBB42_6
 ; CHECK-NEXT:  # %bb.5: # %entry
-; CHECK-NEXT:    mv a1, s1
+; CHECK-NEXT:    mv a1, a7
 ; CHECK-NEXT:  .LBB42_6: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s4
+; CHECK-NEXT:    fmv.w.x ft1, s3
 ; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
-; CHECK-NEXT:    blt a2, s1, .LBB42_8
+; CHECK-NEXT:    blt a2, a7, .LBB42_8
 ; CHECK-NEXT:  # %bb.7: # %entry
-; CHECK-NEXT:    mv a2, s1
+; CHECK-NEXT:    mv a2, a7
 ; CHECK-NEXT:  .LBB42_8: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s3
+; CHECK-NEXT:    fmv.w.x ft0, s2
 ; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    blt a3, s1, .LBB42_10
+; CHECK-NEXT:    blt a3, a7, .LBB42_10
 ; CHECK-NEXT:  # %bb.9: # %entry
-; CHECK-NEXT:    mv a3, s1
+; CHECK-NEXT:    mv a3, a7
 ; CHECK-NEXT:  .LBB42_10: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s2
+; CHECK-NEXT:    fmv.w.x ft1, s1
 ; CHECK-NEXT:    fcvt.l.s a5, ft0, rtz
-; CHECK-NEXT:    bge a4, s1, .LBB42_23
+; CHECK-NEXT:    bge a4, a7, .LBB42_23
 ; CHECK-NEXT:  # %bb.11: # %entry
-; CHECK-NEXT:    fcvt.l.s s0, ft1, rtz
-; CHECK-NEXT:    bge a5, s1, .LBB42_24
+; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
+; CHECK-NEXT:    bge a5, a7, .LBB42_24
 ; CHECK-NEXT:  .LBB42_12: # %entry
-; CHECK-NEXT:    bge s0, s1, .LBB42_25
+; CHECK-NEXT:    bge a6, a7, .LBB42_25
 ; CHECK-NEXT:  .LBB42_13: # %entry
-; CHECK-NEXT:    lui s1, 1048568
-; CHECK-NEXT:    bge s1, s0, .LBB42_26
+; CHECK-NEXT:    lui a7, 1048568
+; CHECK-NEXT:    bge a7, a6, .LBB42_26
 ; CHECK-NEXT:  .LBB42_14: # %entry
-; CHECK-NEXT:    bge s1, a5, .LBB42_27
+; CHECK-NEXT:    bge a7, a5, .LBB42_27
 ; CHECK-NEXT:  .LBB42_15: # %entry
-; CHECK-NEXT:    bge s1, a4, .LBB42_28
+; CHECK-NEXT:    bge a7, a4, .LBB42_28
 ; CHECK-NEXT:  .LBB42_16: # %entry
-; CHECK-NEXT:    bge s1, a3, .LBB42_29
+; CHECK-NEXT:    bge a7, a3, .LBB42_29
 ; CHECK-NEXT:  .LBB42_17: # %entry
-; CHECK-NEXT:    bge s1, a2, .LBB42_30
+; CHECK-NEXT:    bge a7, a2, .LBB42_30
 ; CHECK-NEXT:  .LBB42_18: # %entry
-; CHECK-NEXT:    bge s1, a1, .LBB42_31
+; CHECK-NEXT:    bge a7, a1, .LBB42_31
 ; CHECK-NEXT:  .LBB42_19: # %entry
-; CHECK-NEXT:    bge s1, s9, .LBB42_32
+; CHECK-NEXT:    bge a7, s7, .LBB42_32
 ; CHECK-NEXT:  .LBB42_20: # %entry
-; CHECK-NEXT:    blt s1, a0, .LBB42_22
+; CHECK-NEXT:    blt a7, a0, .LBB42_22
 ; CHECK-NEXT:  .LBB42_21: # %entry
 ; CHECK-NEXT:    lui a0, 1048568
 ; CHECK-NEXT:  .LBB42_22: # %entry
-; CHECK-NEXT:    sh a0, 14(s8)
-; CHECK-NEXT:    sh s9, 12(s8)
-; CHECK-NEXT:    sh a1, 10(s8)
-; CHECK-NEXT:    sh a2, 8(s8)
-; CHECK-NEXT:    sh a3, 6(s8)
-; CHECK-NEXT:    sh a4, 4(s8)
-; CHECK-NEXT:    sh a5, 2(s8)
-; CHECK-NEXT:    sh s0, 0(s8)
+; CHECK-NEXT:    sh a0, 14(s0)
+; CHECK-NEXT:    sh s7, 12(s0)
+; CHECK-NEXT:    sh a1, 10(s0)
+; CHECK-NEXT:    sh a2, 8(s0)
+; CHECK-NEXT:    sh a3, 6(s0)
+; CHECK-NEXT:    sh a4, 4(s0)
+; CHECK-NEXT:    sh a5, 2(s0)
+; CHECK-NEXT:    sh a6, 0(s0)
 ; CHECK-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
@@ -3050,41 +3042,40 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s9, 8(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 96
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB42_23: # %entry
-; CHECK-NEXT:    mv a4, s1
-; CHECK-NEXT:    fcvt.l.s s0, ft1, rtz
-; CHECK-NEXT:    blt a5, s1, .LBB42_12
+; CHECK-NEXT:    mv a4, a7
+; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
+; CHECK-NEXT:    blt a5, a7, .LBB42_12
 ; CHECK-NEXT:  .LBB42_24: # %entry
-; CHECK-NEXT:    mv a5, s1
-; CHECK-NEXT:    blt s0, s1, .LBB42_13
+; CHECK-NEXT:    mv a5, a7
+; CHECK-NEXT:    blt a6, a7, .LBB42_13
 ; CHECK-NEXT:  .LBB42_25: # %entry
-; CHECK-NEXT:    mv s0, s1
-; CHECK-NEXT:    lui s1, 1048568
-; CHECK-NEXT:    blt s1, s0, .LBB42_14
+; CHECK-NEXT:    mv a6, a7
+; CHECK-NEXT:    lui a7, 1048568
+; CHECK-NEXT:    blt a7, a6, .LBB42_14
 ; CHECK-NEXT:  .LBB42_26: # %entry
-; CHECK-NEXT:    lui s0, 1048568
-; CHECK-NEXT:    blt s1, a5, .LBB42_15
+; CHECK-NEXT:    lui a6, 1048568
+; CHECK-NEXT:    blt a7, a5, .LBB42_15
 ; CHECK-NEXT:  .LBB42_27: # %entry
 ; CHECK-NEXT:    lui a5, 1048568
-; CHECK-NEXT:    blt s1, a4, .LBB42_16
+; CHECK-NEXT:    blt a7, a4, .LBB42_16
 ; CHECK-NEXT:  .LBB42_28: # %entry
 ; CHECK-NEXT:    lui a4, 1048568
-; CHECK-NEXT:    blt s1, a3, .LBB42_17
+; CHECK-NEXT:    blt a7, a3, .LBB42_17
 ; CHECK-NEXT:  .LBB42_29: # %entry
 ; CHECK-NEXT:    lui a3, 1048568
-; CHECK-NEXT:    blt s1, a2, .LBB42_18
+; CHECK-NEXT:    blt a7, a2, .LBB42_18
 ; CHECK-NEXT:  .LBB42_30: # %entry
 ; CHECK-NEXT:    lui a2, 1048568
-; CHECK-NEXT:    blt s1, a1, .LBB42_19
+; CHECK-NEXT:    blt a7, a1, .LBB42_19
 ; CHECK-NEXT:  .LBB42_31: # %entry
 ; CHECK-NEXT:    lui a1, 1048568
-; CHECK-NEXT:    blt s1, s9, .LBB42_20
+; CHECK-NEXT:    blt a7, s7, .LBB42_20
 ; CHECK-NEXT:  .LBB42_32: # %entry
-; CHECK-NEXT:    lui s9, 1048568
-; CHECK-NEXT:    bge s1, a0, .LBB42_21
+; CHECK-NEXT:    lui s7, 1048568
+; CHECK-NEXT:    bge a7, a0, .LBB42_21
 ; CHECK-NEXT:    j .LBB42_22
 entry:
   %conv = fptosi <8 x half> %x to <8 x i32>
@@ -3109,7 +3100,6 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    sd s6, 32(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s7, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s8, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s9, 8(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
@@ -3120,25 +3110,21 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s6, -64
 ; CHECK-NEXT:    .cfi_offset s7, -72
 ; CHECK-NEXT:    .cfi_offset s8, -80
-; CHECK-NEXT:    .cfi_offset s9, -88
 ; CHECK-NEXT:    lhu s5, 0(a1)
-; CHECK-NEXT:    lhu s2, 56(a1)
-; CHECK-NEXT:    lhu s3, 48(a1)
-; CHECK-NEXT:    lhu s4, 40(a1)
-; CHECK-NEXT:    lhu s6, 32(a1)
-; CHECK-NEXT:    lhu s7, 24(a1)
-; CHECK-NEXT:    lhu s1, 16(a1)
+; CHECK-NEXT:    lhu s1, 56(a1)
+; CHECK-NEXT:    lhu s2, 48(a1)
+; CHECK-NEXT:    lhu s3, 40(a1)
+; CHECK-NEXT:    lhu s4, 32(a1)
+; CHECK-NEXT:    lhu s6, 24(a1)
+; CHECK-NEXT:    lhu s7, 16(a1)
 ; CHECK-NEXT:    lhu a1, 8(a1)
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s8, a0
-; CHECK-NEXT:    mv a0, s1
-; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s9, a0
 ; CHECK-NEXT:    mv a0, s7
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    mv s7, a0
 ; CHECK-NEXT:    mv a0, s6
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s6, a0
@@ -3151,72 +3137,75 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s2, a0
-; CHECK-NEXT:    fmv.w.x ft0, s1
-; CHECK-NEXT:    fsw ft0, 4(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s9
-; CHECK-NEXT:    fcvt.lu.s s1, ft0, rtz
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __gnu_h2f_ieee@plt
+; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    fmv.w.x ft0, s6
+; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    fmv.w.x ft0, s7
+; CHECK-NEXT:    fcvt.lu.s s7, ft0, rtz
 ; CHECK-NEXT:    fmv.w.x ft0, s8
 ; CHECK-NEXT:    fcvt.lu.s a0, ft0, rtz
-; CHECK-NEXT:    sext.w s7, a0
+; CHECK-NEXT:    sext.w s6, a0
 ; CHECK-NEXT:    mv a0, s5
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    fmv.w.x ft0, a0
 ; CHECK-NEXT:    fcvt.lu.s a0, ft0, rtz
-; CHECK-NEXT:    sext.w a6, a0
-; CHECK-NEXT:    lui a0, 16
-; CHECK-NEXT:    addiw a1, a0, -1
-; CHECK-NEXT:    bltu a6, a1, .LBB43_2
+; CHECK-NEXT:    sext.w a0, a0
+; CHECK-NEXT:    lui a1, 16
+; CHECK-NEXT:    addiw a1, a1, -1
+; CHECK-NEXT:    bltu a0, a1, .LBB43_2
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a6, a1
+; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:  .LBB43_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s6
-; CHECK-NEXT:    flw ft1, 4(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    fcvt.lu.s a0, ft1, rtz
-; CHECK-NEXT:    sext.w a2, s1
-; CHECK-NEXT:    bltu s7, a1, .LBB43_4
+; CHECK-NEXT:    fmv.w.x ft0, s4
+; CHECK-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    fcvt.lu.s a3, ft1, rtz
+; CHECK-NEXT:    sext.w a2, s7
+; CHECK-NEXT:    bltu s6, a1, .LBB43_4
 ; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    mv s7, a1
+; CHECK-NEXT:    mv s6, a1
 ; CHECK-NEXT:  .LBB43_4: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s4
+; CHECK-NEXT:    fmv.w.x ft1, s3
 ; CHECK-NEXT:    fcvt.lu.s a4, ft0, rtz
-; CHECK-NEXT:    sext.w a3, a0
+; CHECK-NEXT:    sext.w a3, a3
 ; CHECK-NEXT:    bltu a2, a1, .LBB43_6
 ; CHECK-NEXT:  # %bb.5: # %entry
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:  .LBB43_6: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s3
-; CHECK-NEXT:    fcvt.lu.s a0, ft1, rtz
+; CHECK-NEXT:    fmv.w.x ft0, s2
+; CHECK-NEXT:    fcvt.lu.s a5, ft1, rtz
 ; CHECK-NEXT:    sext.w a4, a4
 ; CHECK-NEXT:    bltu a3, a1, .LBB43_8
 ; CHECK-NEXT:  # %bb.7: # %entry
 ; CHECK-NEXT:    mv a3, a1
 ; CHECK-NEXT:  .LBB43_8: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s2
-; CHECK-NEXT:    fcvt.lu.s s1, ft0, rtz
-; CHECK-NEXT:    sext.w a5, a0
+; CHECK-NEXT:    fmv.w.x ft1, s1
+; CHECK-NEXT:    fcvt.lu.s a6, ft0, rtz
+; CHECK-NEXT:    sext.w a5, a5
 ; CHECK-NEXT:    bltu a4, a1, .LBB43_10
 ; CHECK-NEXT:  # %bb.9: # %entry
 ; CHECK-NEXT:    mv a4, a1
 ; CHECK-NEXT:  .LBB43_10: # %entry
-; CHECK-NEXT:    fcvt.lu.s a0, ft1, rtz
-; CHECK-NEXT:    sext.w s1, s1
+; CHECK-NEXT:    fcvt.lu.s a7, ft1, rtz
+; CHECK-NEXT:    sext.w a6, a6
 ; CHECK-NEXT:    bgeu a5, a1, .LBB43_15
 ; CHECK-NEXT:  # %bb.11: # %entry
-; CHECK-NEXT:    sext.w a0, a0
-; CHECK-NEXT:    bgeu s1, a1, .LBB43_16
+; CHECK-NEXT:    sext.w a7, a7
+; CHECK-NEXT:    bgeu a6, a1, .LBB43_16
 ; CHECK-NEXT:  .LBB43_12: # %entry
-; CHECK-NEXT:    bltu a0, a1, .LBB43_14
+; CHECK-NEXT:    bltu a7, a1, .LBB43_14
 ; CHECK-NEXT:  .LBB43_13: # %entry
-; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    mv a7, a1
 ; CHECK-NEXT:  .LBB43_14: # %entry
-; CHECK-NEXT:    sh a0, 14(s0)
-; CHECK-NEXT:    sh s1, 12(s0)
+; CHECK-NEXT:    sh a7, 14(s0)
+; CHECK-NEXT:    sh a6, 12(s0)
 ; CHECK-NEXT:    sh a5, 10(s0)
 ; CHECK-NEXT:    sh a4, 8(s0)
 ; CHECK-NEXT:    sh a3, 6(s0)
 ; CHECK-NEXT:    sh a2, 4(s0)
-; CHECK-NEXT:    sh s7, 2(s0)
-; CHECK-NEXT:    sh a6, 0(s0)
+; CHECK-NEXT:    sh s6, 2(s0)
+; CHECK-NEXT:    sh a0, 0(s0)
 ; CHECK-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
@@ -3227,16 +3216,15 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s9, 8(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 96
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB43_15: # %entry
 ; CHECK-NEXT:    mv a5, a1
-; CHECK-NEXT:    sext.w a0, a0
-; CHECK-NEXT:    bltu s1, a1, .LBB43_12
+; CHECK-NEXT:    sext.w a7, a7
+; CHECK-NEXT:    bltu a6, a1, .LBB43_12
 ; CHECK-NEXT:  .LBB43_16: # %entry
-; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    bgeu a0, a1, .LBB43_13
+; CHECK-NEXT:    mv a6, a1
+; CHECK-NEXT:    bgeu a7, a1, .LBB43_13
 ; CHECK-NEXT:    j .LBB43_14
 entry:
   %conv = fptoui <8 x half> %x to <8 x i32>
@@ -3260,7 +3248,6 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    sd s6, 32(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s7, 24(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s8, 16(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    sd s9, 8(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    .cfi_offset ra, -8
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
@@ -3271,23 +3258,19 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s6, -64
 ; CHECK-NEXT:    .cfi_offset s7, -72
 ; CHECK-NEXT:    .cfi_offset s8, -80
-; CHECK-NEXT:    .cfi_offset s9, -88
 ; CHECK-NEXT:    lhu s6, 56(a1)
-; CHECK-NEXT:    lhu s2, 0(a1)
-; CHECK-NEXT:    lhu s3, 8(a1)
-; CHECK-NEXT:    lhu s4, 16(a1)
-; CHECK-NEXT:    lhu s5, 24(a1)
-; CHECK-NEXT:    lhu s1, 32(a1)
-; CHECK-NEXT:    lhu s0, 40(a1)
+; CHECK-NEXT:    lhu s1, 0(a1)
+; CHECK-NEXT:    lhu s2, 8(a1)
+; CHECK-NEXT:    lhu s3, 16(a1)
+; CHECK-NEXT:    lhu s4, 24(a1)
+; CHECK-NEXT:    lhu s5, 32(a1)
+; CHECK-NEXT:    lhu s7, 40(a1)
 ; CHECK-NEXT:    lhu a1, 48(a1)
-; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s9, a0
-; CHECK-NEXT:    mv a0, s0
-; CHECK-NEXT:    call __gnu_h2f_ieee@plt
-; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    mv s8, a0
+; CHECK-NEXT:    mv a0, s7
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s7, a0
 ; CHECK-NEXT:    mv a0, s5
@@ -3302,55 +3285,58 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    mv s2, a0
-; CHECK-NEXT:    fmv.w.x ft0, s0
-; CHECK-NEXT:    fsw ft0, 4(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    fmv.w.x ft0, s9
-; CHECK-NEXT:    fcvt.l.s s9, ft0, rtz
+; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    call __gnu_h2f_ieee@plt
+; CHECK-NEXT:    mv s1, a0
+; CHECK-NEXT:    fmv.w.x ft0, s7
+; CHECK-NEXT:    fsw ft0, 12(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    fmv.w.x ft0, s8
+; CHECK-NEXT:    fcvt.l.s s7, ft0, rtz
 ; CHECK-NEXT:    mv a0, s6
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    fmv.w.x ft0, a0
 ; CHECK-NEXT:    fcvt.l.s a0, ft0, rtz
 ; CHECK-NEXT:    lui a1, 16
-; CHECK-NEXT:    addiw s1, a1, -1
-; CHECK-NEXT:    blt a0, s1, .LBB44_2
+; CHECK-NEXT:    addiw a7, a1, -1
+; CHECK-NEXT:    blt a0, a7, .LBB44_2
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    mv a0, s1
+; CHECK-NEXT:    mv a0, a7
 ; CHECK-NEXT:  .LBB44_2: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s7
-; CHECK-NEXT:    flw ft0, 4(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    fmv.w.x ft1, s5
+; CHECK-NEXT:    flw ft0, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    fcvt.l.s a1, ft0, rtz
-; CHECK-NEXT:    blt s9, s1, .LBB44_4
+; CHECK-NEXT:    blt s7, a7, .LBB44_4
 ; CHECK-NEXT:  # %bb.3: # %entry
-; CHECK-NEXT:    mv s9, s1
+; CHECK-NEXT:    mv s7, a7
 ; CHECK-NEXT:  .LBB44_4: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s5
+; CHECK-NEXT:    fmv.w.x ft0, s4
 ; CHECK-NEXT:    fcvt.l.s a2, ft1, rtz
-; CHECK-NEXT:    blt a1, s1, .LBB44_6
+; CHECK-NEXT:    blt a1, a7, .LBB44_6
 ; CHECK-NEXT:  # %bb.5: # %entry
-; CHECK-NEXT:    mv a1, s1
+; CHECK-NEXT:    mv a1, a7
 ; CHECK-NEXT:  .LBB44_6: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s4
+; CHECK-NEXT:    fmv.w.x ft1, s3
 ; CHECK-NEXT:    fcvt.l.s a3, ft0, rtz
-; CHECK-NEXT:    blt a2, s1, .LBB44_8
+; CHECK-NEXT:    blt a2, a7, .LBB44_8
 ; CHECK-NEXT:  # %bb.7: # %entry
-; CHECK-NEXT:    mv a2, s1
+; CHECK-NEXT:    mv a2, a7
 ; CHECK-NEXT:  .LBB44_8: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, s3
+; CHECK-NEXT:    fmv.w.x ft0, s2
 ; CHECK-NEXT:    fcvt.l.s a4, ft1, rtz
-; CHECK-NEXT:    blt a3, s1, .LBB44_10
+; CHECK-NEXT:    blt a3, a7, .LBB44_10
 ; CHECK-NEXT:  # %bb.9: # %entry
-; CHECK-NEXT:    mv a3, s1
+; CHECK-NEXT:    mv a3, a7
 ; CHECK-NEXT:  .LBB44_10: # %entry
-; CHECK-NEXT:    fmv.w.x ft1, s2
+; CHECK-NEXT:    fmv.w.x ft1, s1
 ; CHECK-NEXT:    fcvt.l.s a5, ft0, rtz
-; CHECK-NEXT:    bge a4, s1, .LBB44_23
+; CHECK-NEXT:    bge a4, a7, .LBB44_23
 ; CHECK-NEXT:  # %bb.11: # %entry
-; CHECK-NEXT:    fcvt.l.s s0, ft1, rtz
-; CHECK-NEXT:    bge a5, s1, .LBB44_24
+; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
+; CHECK-NEXT:    bge a5, a7, .LBB44_24
 ; CHECK-NEXT:  .LBB44_12: # %entry
-; CHECK-NEXT:    bge s0, s1, .LBB44_25
+; CHECK-NEXT:    bge a6, a7, .LBB44_25
 ; CHECK-NEXT:  .LBB44_13: # %entry
-; CHECK-NEXT:    blez s0, .LBB44_26
+; CHECK-NEXT:    blez a6, .LBB44_26
 ; CHECK-NEXT:  .LBB44_14: # %entry
 ; CHECK-NEXT:    blez a5, .LBB44_27
 ; CHECK-NEXT:  .LBB44_15: # %entry
@@ -3362,20 +3348,20 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:  .LBB44_18: # %entry
 ; CHECK-NEXT:    blez a1, .LBB44_31
 ; CHECK-NEXT:  .LBB44_19: # %entry
-; CHECK-NEXT:    blez s9, .LBB44_32
+; CHECK-NEXT:    blez s7, .LBB44_32
 ; CHECK-NEXT:  .LBB44_20: # %entry
 ; CHECK-NEXT:    bgtz a0, .LBB44_22
 ; CHECK-NEXT:  .LBB44_21: # %entry
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:  .LBB44_22: # %entry
-; CHECK-NEXT:    sh a0, 14(s8)
-; CHECK-NEXT:    sh s9, 12(s8)
-; CHECK-NEXT:    sh a1, 10(s8)
-; CHECK-NEXT:    sh a2, 8(s8)
-; CHECK-NEXT:    sh a3, 6(s8)
-; CHECK-NEXT:    sh a4, 4(s8)
-; CHECK-NEXT:    sh a5, 2(s8)
-; CHECK-NEXT:    sh s0, 0(s8)
+; CHECK-NEXT:    sh a0, 14(s0)
+; CHECK-NEXT:    sh s7, 12(s0)
+; CHECK-NEXT:    sh a1, 10(s0)
+; CHECK-NEXT:    sh a2, 8(s0)
+; CHECK-NEXT:    sh a3, 6(s0)
+; CHECK-NEXT:    sh a4, 4(s0)
+; CHECK-NEXT:    sh a5, 2(s0)
+; CHECK-NEXT:    sh a6, 0(s0)
 ; CHECK-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
@@ -3386,21 +3372,20 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s9, 8(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 96
 ; CHECK-NEXT:    ret
 ; CHECK-NEXT:  .LBB44_23: # %entry
-; CHECK-NEXT:    mv a4, s1
-; CHECK-NEXT:    fcvt.l.s s0, ft1, rtz
-; CHECK-NEXT:    blt a5, s1, .LBB44_12
+; CHECK-NEXT:    mv a4, a7
+; CHECK-NEXT:    fcvt.l.s a6, ft1, rtz
+; CHECK-NEXT:    blt a5, a7, .LBB44_12
 ; CHECK-NEXT:  .LBB44_24: # %entry
-; CHECK-NEXT:    mv a5, s1
-; CHECK-NEXT:    blt s0, s1, .LBB44_13
+; CHECK-NEXT:    mv a5, a7
+; CHECK-NEXT:    blt a6, a7, .LBB44_13
 ; CHECK-NEXT:  .LBB44_25: # %entry
-; CHECK-NEXT:    mv s0, s1
-; CHECK-NEXT:    bgtz s0, .LBB44_14
+; CHECK-NEXT:    mv a6, a7
+; CHECK-NEXT:    bgtz a6, .LBB44_14
 ; CHECK-NEXT:  .LBB44_26: # %entry
-; CHECK-NEXT:    li s0, 0
+; CHECK-NEXT:    li a6, 0
 ; CHECK-NEXT:    bgtz a5, .LBB44_15
 ; CHECK-NEXT:  .LBB44_27: # %entry
 ; CHECK-NEXT:    li a5, 0
@@ -3416,9 +3401,9 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) {
 ; CHECK-NEXT:    bgtz a1, .LBB44_19
 ; CHECK-NEXT:  .LBB44_31: # %entry
 ; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bgtz s9, .LBB44_20
+; CHECK-NEXT:    bgtz s7, .LBB44_20
 ; CHECK-NEXT:  .LBB44_32: # %entry
-; CHECK-NEXT:    li s9, 0
+; CHECK-NEXT:    li s7, 0
 ; CHECK-NEXT:    blez a0, .LBB44_21
 ; CHECK-NEXT:    j .LBB44_22
 entry:
@@ -3559,12 +3544,12 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s0, a0
+; CHECK-NEXT:    mv s2, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __fixunsdfti@plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __fixunsdfti@plt
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    mv a3, a1
@@ -3581,7 +3566,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:  .LBB46_4: # %entry
 ; CHECK-NEXT:    beq s1, a4, .LBB46_6
 ; CHECK-NEXT:  .LBB46_5: # %entry
-; CHECK-NEXT:    mv a1, s2
+; CHECK-NEXT:    mv a1, s0
 ; CHECK-NEXT:  .LBB46_6: # %entry
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -3593,7 +3578,7 @@ define <2 x i64> @utest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    mv a0, a2
 ; CHECK-NEXT:    beq s1, a1, .LBB46_4
 ; CHECK-NEXT:  .LBB46_8: # %entry
-; CHECK-NEXT:    mv s2, a1
+; CHECK-NEXT:    mv s0, a1
 ; CHECK-NEXT:    bne s1, a4, .LBB46_5
 ; CHECK-NEXT:    j .LBB46_6
 entry:
@@ -3616,11 +3601,11 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s0, a1
+; CHECK-NEXT:    mv s2, a1
 ; CHECK-NEXT:    call __fixdfti@plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __fixdfti@plt
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:    li a5, 1
@@ -3673,11 +3658,11 @@ define <2 x i64> @ustest_f64i64_mm(<2 x double> %x) {
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    blez s1, .LBB47_5
 ; CHECK-NEXT:  .LBB47_16: # %entry
-; CHECK-NEXT:    li s2, 0
+; CHECK-NEXT:    li s0, 0
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:    beq s1, a5, .LBB47_6
 ; CHECK-NEXT:  .LBB47_17: # %entry
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    mv a0, s0
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    bgtz a4, .LBB47_7
 ; CHECK-NEXT:  .LBB47_18: # %entry
@@ -3827,12 +3812,12 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s0, a0
+; CHECK-NEXT:    mv s2, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __fixunssfti@plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __fixunssfti@plt
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    mv a3, a1
@@ -3849,7 +3834,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:  .LBB49_4: # %entry
 ; CHECK-NEXT:    beq s1, a4, .LBB49_6
 ; CHECK-NEXT:  .LBB49_5: # %entry
-; CHECK-NEXT:    mv a1, s2
+; CHECK-NEXT:    mv a1, s0
 ; CHECK-NEXT:  .LBB49_6: # %entry
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -3861,7 +3846,7 @@ define <2 x i64> @utest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    mv a0, a2
 ; CHECK-NEXT:    beq s1, a1, .LBB49_4
 ; CHECK-NEXT:  .LBB49_8: # %entry
-; CHECK-NEXT:    mv s2, a1
+; CHECK-NEXT:    mv s0, a1
 ; CHECK-NEXT:    bne s1, a4, .LBB49_5
 ; CHECK-NEXT:    j .LBB49_6
 entry:
@@ -3884,11 +3869,11 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s0, a1
+; CHECK-NEXT:    mv s2, a1
 ; CHECK-NEXT:    call __fixsfti@plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __fixsfti@plt
 ; CHECK-NEXT:    mv a2, a1
 ; CHECK-NEXT:    li a5, 1
@@ -3941,11 +3926,11 @@ define <2 x i64> @ustest_f32i64_mm(<2 x float> %x) {
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    blez s1, .LBB50_5
 ; CHECK-NEXT:  .LBB50_16: # %entry
-; CHECK-NEXT:    li s2, 0
+; CHECK-NEXT:    li s0, 0
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:    beq s1, a5, .LBB50_6
 ; CHECK-NEXT:  .LBB50_17: # %entry
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    mv a0, s0
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    bgtz a4, .LBB50_7
 ; CHECK-NEXT:  .LBB50_18: # %entry
@@ -4097,13 +4082,13 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s0, a0
+; CHECK-NEXT:    mv s2, a0
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    call __fixunssfti@plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    call __fixunssfti@plt
 ; CHECK-NEXT:    mv a2, a0
@@ -4121,7 +4106,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:  .LBB52_4: # %entry
 ; CHECK-NEXT:    beq s1, a4, .LBB52_6
 ; CHECK-NEXT:  .LBB52_5: # %entry
-; CHECK-NEXT:    mv a1, s2
+; CHECK-NEXT:    mv a1, s0
 ; CHECK-NEXT:  .LBB52_6: # %entry
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
@@ -4133,7 +4118,7 @@ define <2 x i64> @utesth_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    mv a0, a2
 ; CHECK-NEXT:    beq s1, a1, .LBB52_4
 ; CHECK-NEXT:  .LBB52_8: # %entry
-; CHECK-NEXT:    mv s2, a1
+; CHECK-NEXT:    mv s0, a1
 ; CHECK-NEXT:    bne s1, a4, .LBB52_5
 ; CHECK-NEXT:    j .LBB52_6
 entry:
@@ -4156,12 +4141,12 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    .cfi_offset s0, -16
 ; CHECK-NEXT:    .cfi_offset s1, -24
 ; CHECK-NEXT:    .cfi_offset s2, -32
-; CHECK-NEXT:    mv s0, a1
+; CHECK-NEXT:    mv s2, a1
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    call __fixsfti@plt
-; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    mv s1, a1
-; CHECK-NEXT:    mv a0, s0
+; CHECK-NEXT:    mv a0, s2
 ; CHECK-NEXT:    call __gnu_h2f_ieee@plt
 ; CHECK-NEXT:    call __fixsfti@plt
 ; CHECK-NEXT:    mv a2, a1
@@ -4215,11 +4200,11 @@ define <2 x i64> @ustest_f16i64_mm(<2 x half> %x) {
 ; CHECK-NEXT:    mv a1, a0
 ; CHECK-NEXT:    blez s1, .LBB53_5
 ; CHECK-NEXT:  .LBB53_16: # %entry
-; CHECK-NEXT:    li s2, 0
+; CHECK-NEXT:    li s0, 0
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:    beq s1, a5, .LBB53_6
 ; CHECK-NEXT:  .LBB53_17: # %entry
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    mv a0, s0
 ; CHECK-NEXT:    mv a2, a0
 ; CHECK-NEXT:    bgtz a4, .LBB53_7
 ; CHECK-NEXT:  .LBB53_18: # %entry
diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll
index a3dd3af884554..02e87dc3883ed 100644
--- a/llvm/test/CodeGen/RISCV/half-arith.ll
+++ b/llvm/test/CodeGen/RISCV/half-arith.ll
@@ -31,13 +31,13 @@ define half @fadd_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s0, a1, -1
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    addi s2, a1, -1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    and a0, s0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    mv a0, s1
@@ -57,13 +57,13 @@ define half @fadd_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s0, a1, -1
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    addiw s2, a1, -1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    and a0, s0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s1
@@ -97,13 +97,13 @@ define half @fsub_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s0, a1, -1
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    addi s2, a1, -1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    and a0, s0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    mv a0, s1
@@ -123,13 +123,13 @@ define half @fsub_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s0, a1, -1
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    addiw s2, a1, -1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    and a0, s0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s1
@@ -163,13 +163,13 @@ define half @fmul_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s0, a1, -1
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    addi s2, a1, -1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    and a0, s0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    mv a0, s1
@@ -189,13 +189,13 @@ define half @fmul_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s0, a1, -1
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    addiw s2, a1, -1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    and a0, s0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s1
@@ -229,13 +229,13 @@ define half @fdiv_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s0, a1, -1
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    addi s2, a1, -1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    and a0, s0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    mv a0, s1
@@ -255,13 +255,13 @@ define half @fdiv_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s0, a1, -1
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    addiw s2, a1, -1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    and a0, s0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s1
@@ -461,27 +461,27 @@ define half @fsgnjn_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s3, a1
-; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    addi s3, a0, -1
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s3, s0
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    lui a1, 524288
 ; RV32I-NEXT:    xor a0, a0, a1
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    lui a1, 1048568
 ; RV32I-NEXT:    and a0, a0, a1
-; RV32I-NEXT:    slli a1, s2, 17
+; RV32I-NEXT:    slli a1, s1, 17
 ; RV32I-NEXT:    srli a1, a1, 17
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -500,27 +500,27 @@ define half @fsgnjn_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s3, a1
-; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    addiw s3, a0, -1
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s3, s0
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s1
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    lui a1, 524288
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    lui a1, 1048568
 ; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    slli a1, s2, 49
+; RV64I-NEXT:    slli a1, s1, 49
 ; RV64I-NEXT:    srli a1, a1, 49
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
@@ -562,25 +562,25 @@ define half @fabs_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s1, a1, -1
-; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    addi s2, a1, -1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    and a0, s2, s1
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    and a0, s0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    slli a0, a0, 1
 ; RV32I-NEXT:    srli a0, a0, 1
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __addsf3@plt
@@ -599,25 +599,25 @@ define half @fabs_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s1, a1, -1
-; RV64I-NEXT:    and a0, a0, s1
+; RV64I-NEXT:    addiw s2, a1, -1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    and a0, s2, s1
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    and a0, s0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    and a0, a0, s1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    slli a0, a0, 33
 ; RV64I-NEXT:    srli a0, a0, 33
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    and a0, a0, s1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __addsf3@plt
@@ -654,13 +654,13 @@ define half @fmin_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s0, a1, -1
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    addi s2, a1, -1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    and a0, s0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    mv a0, s1
@@ -680,13 +680,13 @@ define half @fmin_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s0, a1, -1
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    addiw s2, a1, -1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    and a0, s0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s1
@@ -722,13 +722,13 @@ define half @fmax_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s0, a1, -1
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    addi s2, a1, -1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    and a0, s0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    mv a0, s1
@@ -748,13 +748,13 @@ define half @fmax_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s0, a1, -1
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    addiw s2, a1, -1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    and a0, s0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s1
@@ -791,20 +791,20 @@ define half @fmadd_s(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a2
+; RV32I-NEXT:    mv s0, a2
 ; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s0, a1, -1
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    addi s3, a1, -1
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    and a0, s1, s0
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    and a0, s0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call fmaf@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
@@ -824,20 +824,20 @@ define half @fmadd_s(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv s0, a2
 ; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s0, a1, -1
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    addiw s3, a1, -1
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s3, a0
-; RV64I-NEXT:    and a0, s1, s0
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    and a0, s0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    call fmaf@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
@@ -875,33 +875,32 @@ define half @fmsub_s(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and a0, a2, s0
+; RV32I-NEXT:    addi s3, a0, -1
+; RV32I-NEXT:    and a0, a2, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    lui a1, 524288
 ; RV32I-NEXT:    xor a0, a0, a1
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s4, a0
-; RV32I-NEXT:    and a0, s1, s0
-; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s4, s0
+; RV32I-NEXT:    and a0, s0, s3
+; RV32I-NEXT:    call __gnu_h2f_ieee@plt
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    and a0, s2, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    mv a0, s3
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call fmaf@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -909,7 +908,6 @@ define half @fmsub_s(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -921,33 +919,32 @@ define half @fmsub_s(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and a0, a2, s0
+; RV64I-NEXT:    addiw s3, a0, -1
+; RV64I-NEXT:    and a0, a2, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    lui a1, 524288
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s4, a0
-; RV64I-NEXT:    and a0, s1, s0
-; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s3, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s4, s0
+; RV64I-NEXT:    and a0, s0, s3
+; RV64I-NEXT:    call __gnu_h2f_ieee@plt
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    and a0, s2, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    mv a0, s3
-; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    mv a0, s1
+; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call fmaf@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
@@ -955,7 +952,6 @@ define half @fmsub_s(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %c_ = fadd half 0.0, %c ; avoid negation using xor
@@ -990,44 +986,44 @@ define half @fnmadd_s(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s3, a2
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s1, a1, -1
-; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    addi s3, a1, -1
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s4, a0
-; RV32I-NEXT:    and a0, s3, s1
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    and a0, s4, s1
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    and a0, s2, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    lui s4, 524288
 ; RV32I-NEXT:    xor a0, a0, s4
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    and a0, s0, s1
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    xor a0, a0, s4
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s4, a0
-; RV32I-NEXT:    and a0, s2, s1
-; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    and a0, s3, s1
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    and a0, s0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    and a0, s4, s1
+; RV32I-NEXT:    and a0, s2, s3
+; RV32I-NEXT:    call __gnu_h2f_ieee@plt
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call fmaf@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -1048,44 +1044,44 @@ define half @fnmadd_s(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s3, a2
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s1, a2
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s1, a1, -1
-; RV64I-NEXT:    and a0, a0, s1
+; RV64I-NEXT:    addiw s3, a1, -1
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s4, a0
-; RV64I-NEXT:    and a0, s3, s1
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    and a0, s4, s1
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    and a0, s2, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    lui s4, 524288
 ; RV64I-NEXT:    xor a0, a0, s4
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s3, a0
-; RV64I-NEXT:    and a0, s0, s1
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    xor a0, a0, s4
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s4, a0
-; RV64I-NEXT:    and a0, s2, s1
-; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s2, a0
-; RV64I-NEXT:    and a0, s3, s1
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    and a0, s0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    and a0, s4, s1
+; RV64I-NEXT:    and a0, s2, s3
+; RV64I-NEXT:    call __gnu_h2f_ieee@plt
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    mv a0, s0
-; RV64I-NEXT:    mv a1, s2
+; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call fmaf@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
@@ -1130,44 +1126,44 @@ define half @fnmadd_s_2(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s3, a2
-; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s1, a2
+; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s1, a0, -1
-; RV32I-NEXT:    and a0, a1, s1
+; RV32I-NEXT:    addi s3, a0, -1
+; RV32I-NEXT:    and a0, a1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s4, a0
-; RV32I-NEXT:    and a0, s3, s1
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    and a0, s4, s1
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    and a0, s2, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    lui s4, 524288
 ; RV32I-NEXT:    xor a0, a0, s4
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    and a0, s0, s1
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    xor a0, a0, s4
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s4, a0
-; RV32I-NEXT:    and a0, s2, s1
-; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    and a0, s3, s1
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    and a0, s0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    and a0, s4, s1
+; RV32I-NEXT:    and a0, s2, s3
+; RV32I-NEXT:    call __gnu_h2f_ieee@plt
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    mv a0, s2
-; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call fmaf@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -1188,44 +1184,44 @@ define half @fnmadd_s_2(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s3, a2
-; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv s1, a2
+; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s1, a0, -1
-; RV64I-NEXT:    and a0, a1, s1
+; RV64I-NEXT:    addiw s3, a0, -1
+; RV64I-NEXT:    and a0, a1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s4, a0
-; RV64I-NEXT:    and a0, s3, s1
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    and a0, s4, s1
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    and a0, s2, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    lui s4, 524288
 ; RV64I-NEXT:    xor a0, a0, s4
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s3, a0
-; RV64I-NEXT:    and a0, s0, s1
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    xor a0, a0, s4
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s4, a0
-; RV64I-NEXT:    and a0, s2, s1
-; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s2, a0
-; RV64I-NEXT:    and a0, s3, s1
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    and a0, s0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    and a0, s4, s1
+; RV64I-NEXT:    and a0, s2, s3
+; RV64I-NEXT:    call __gnu_h2f_ieee@plt
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    mv a0, s2
-; RV64I-NEXT:    mv a1, s0
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    call fmaf@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
@@ -1267,32 +1263,31 @@ define half @fnmsub_s(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a2
+; RV32I-NEXT:    mv s0, a2
 ; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s0, a1, -1
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    addi s3, a1, -1
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    lui a1, 524288
 ; RV32I-NEXT:    xor a0, a0, a1
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s4, a0
-; RV32I-NEXT:    and a0, s1, s0
-; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s4, s0
+; RV32I-NEXT:    and a0, s0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv a1, s3
-; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    and a0, s2, s3
+; RV32I-NEXT:    call __gnu_h2f_ieee@plt
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    call fmaf@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -1300,7 +1295,6 @@ define half @fnmsub_s(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1312,32 +1306,31 @@ define half @fnmsub_s(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv s0, a2
 ; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s0, a1, -1
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    addiw s3, a1, -1
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    lui a1, 524288
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s4, a0
-; RV64I-NEXT:    and a0, s1, s0
-; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s3, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s4, s0
+; RV64I-NEXT:    and a0, s0, s3
+; RV64I-NEXT:    call __gnu_h2f_ieee@plt
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    and a0, s2, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv a1, s3
-; RV64I-NEXT:    mv a2, s1
+; RV64I-NEXT:    mv a1, s1
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    call fmaf@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
@@ -1345,7 +1338,6 @@ define half @fnmsub_s(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %a_ = fadd half 0.0, %a
@@ -1377,33 +1369,32 @@ define half @fnmsub_s_2(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a2
+; RV32I-NEXT:    mv s0, a2
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and a0, a1, s0
+; RV32I-NEXT:    addi s3, a0, -1
+; RV32I-NEXT:    and a0, a1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    lui a1, 524288
 ; RV32I-NEXT:    xor a0, a0, a1
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s4, a0
-; RV32I-NEXT:    and a0, s1, s0
-; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s4, s0
+; RV32I-NEXT:    and a0, s0, s3
+; RV32I-NEXT:    call __gnu_h2f_ieee@plt
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    and a0, s2, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:    mv a0, s3
-; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    call fmaf@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -1411,7 +1402,6 @@ define half @fnmsub_s_2(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1423,33 +1413,32 @@ define half @fnmsub_s_2(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv s0, a2
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and a0, a1, s0
+; RV64I-NEXT:    addiw s3, a0, -1
+; RV64I-NEXT:    and a0, a1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    lui a1, 524288
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s4, a0
-; RV64I-NEXT:    and a0, s1, s0
-; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s3, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s4, s0
+; RV64I-NEXT:    and a0, s0, s3
+; RV64I-NEXT:    call __gnu_h2f_ieee@plt
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    and a0, s2, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s3
-; RV64I-NEXT:    mv a2, s1
+; RV64I-NEXT:    mv a0, s1
+; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    call fmaf@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
@@ -1457,7 +1446,6 @@ define half @fnmsub_s_2(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %b_ = fadd half 0.0, %b
@@ -1485,26 +1473,26 @@ define half @fmadd_s_contract(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a2
-; RV32I-NEXT:    mv s3, a1
+; RV32I-NEXT:    mv s0, a2
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s1, a1, -1
-; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    addi s3, a1, -1
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    and a0, s3, s1
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __mulsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    and a0, s2, s1
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    and a0, s0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    and a0, s0, s1
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -1523,26 +1511,26 @@ define half @fmadd_s_contract(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a2
-; RV64I-NEXT:    mv s3, a1
+; RV64I-NEXT:    mv s0, a2
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s1, a1, -1
-; RV64I-NEXT:    and a0, a0, s1
+; RV64I-NEXT:    addiw s3, a1, -1
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    and a0, s3, s1
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __mulsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    and a0, s2, s1
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    and a0, s0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s2, a0
-; RV64I-NEXT:    and a0, s0, s1
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv a1, s2
+; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
@@ -1580,32 +1568,32 @@ define half @fmsub_s_contract(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s3, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lui a0, 16
-; RV32I-NEXT:    addi s0, a0, -1
-; RV32I-NEXT:    and a0, a2, s0
+; RV32I-NEXT:    addi s3, a0, -1
+; RV32I-NEXT:    and a0, a2, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    and a0, s1, s0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s3, s0
+; RV32I-NEXT:    and a0, s0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __mulsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    and a0, s2, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __subsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -1624,32 +1612,32 @@ define half @fmsub_s_contract(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s3, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lui a0, 16
-; RV64I-NEXT:    addiw s0, a0, -1
-; RV64I-NEXT:    and a0, a2, s0
+; RV64I-NEXT:    addiw s3, a0, -1
+; RV64I-NEXT:    and a0, a2, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    mv s2, a0
-; RV64I-NEXT:    and a0, s1, s0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s3, s0
+; RV64I-NEXT:    and a0, s0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __mulsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    and a0, s2, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s1
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __subsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
@@ -1692,50 +1680,49 @@ define half @fnmadd_s_contract(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a2
-; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s0, a2
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s1, a1, -1
-; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    addi s3, a1, -1
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    and a0, s0, s1
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s4, a0
-; RV32I-NEXT:    and a0, s2, s1
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    and a0, s0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    and a0, s3, s1
-; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    and a0, s4, s1
+; RV32I-NEXT:    and a0, s2, s3
+; RV32I-NEXT:    call __gnu_h2f_ieee@plt
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __mulsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    lui a1, 524288
 ; RV32I-NEXT:    xor a0, a0, a1
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    and a0, s2, s1
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    and a0, s0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    and a0, s0, s1
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __subsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -1743,7 +1730,6 @@ define half @fnmadd_s_contract(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1755,50 +1741,49 @@ define half @fnmadd_s_contract(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a2
-; RV64I-NEXT:    mv s0, a1
+; RV64I-NEXT:    mv s0, a2
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s1, a1, -1
-; RV64I-NEXT:    and a0, a0, s1
+; RV64I-NEXT:    addiw s3, a1, -1
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s3, a0
-; RV64I-NEXT:    and a0, s0, s1
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s4, a0
-; RV64I-NEXT:    and a0, s2, s1
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    and a0, s0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s2, a0
-; RV64I-NEXT:    and a0, s3, s1
-; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    and a0, s4, s1
+; RV64I-NEXT:    and a0, s2, s3
+; RV64I-NEXT:    call __gnu_h2f_ieee@plt
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __mulsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    and a0, a0, s1
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    lui a1, 524288
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    and a0, s2, s1
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    and a0, s0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s2, a0
-; RV64I-NEXT:    and a0, s0, s1
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv a1, s2
+; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __subsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
@@ -1806,7 +1791,6 @@ define half @fnmadd_s_contract(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %a_ = fadd half 0.0, %a ; avoid negation using xor
@@ -1843,39 +1827,39 @@ define half @fnmsub_s_contract(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a2
+; RV32I-NEXT:    mv s0, a2
 ; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s0, a1, -1
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    addi s3, a1, -1
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    and a0, s1, s0
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s3, s0
+; RV32I-NEXT:    and a0, s2, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    and a0, s1, s0
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __mulsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    and a0, s0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    and a0, s1, s0
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __subsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -1894,39 +1878,39 @@ define half @fnmsub_s_contract(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv s0, a2
 ; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s0, a1, -1
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    addiw s3, a1, -1
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s3, a0
-; RV64I-NEXT:    and a0, s1, s0
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s3, s0
+; RV64I-NEXT:    and a0, s2, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s3, a0
-; RV64I-NEXT:    and a0, s1, s0
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __mulsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    and a0, s0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s2, a0
-; RV64I-NEXT:    and a0, s1, s0
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __subsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll
index 5c68be119ba09..6b9a2c6feb5a6 100644
--- a/llvm/test/CodeGen/RISCV/half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert.ll
@@ -336,17 +336,17 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s2, a1, -1
-; RV32I-NEXT:    and a0, a0, s2
+; RV32I-NEXT:    addi s0, a1, -1
+; RV32I-NEXT:    and a0, a0, s0
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __gesf2@plt
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __fixunssfsi@plt
 ; RV32I-NEXT:    li s3, 0
-; RV32I-NEXT:    bltz s0, .LBB3_2
+; RV32I-NEXT:    bltz s2, .LBB3_2
 ; RV32I-NEXT:  # %bb.1: # %start
 ; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:  .LBB3_2: # %start
@@ -356,9 +356,9 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind {
 ; RV32I-NEXT:    call __gtsf2@plt
 ; RV32I-NEXT:    bgtz a0, .LBB3_4
 ; RV32I-NEXT:  # %bb.3: # %start
-; RV32I-NEXT:    mv s2, s3
+; RV32I-NEXT:    mv s0, s3
 ; RV32I-NEXT:  .LBB3_4: # %start
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -376,17 +376,17 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind {
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s2, a1, -1
-; RV64I-NEXT:    and a0, a0, s2
+; RV64I-NEXT:    addiw s0, a1, -1
+; RV64I-NEXT:    and a0, a0, s0
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    li a1, 0
 ; RV64I-NEXT:    call __gesf2@plt
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __fixunssfdi@plt
 ; RV64I-NEXT:    li s3, 0
-; RV64I-NEXT:    bltz s0, .LBB3_2
+; RV64I-NEXT:    bltz s2, .LBB3_2
 ; RV64I-NEXT:  # %bb.1: # %start
 ; RV64I-NEXT:    mv s3, a0
 ; RV64I-NEXT:  .LBB3_2: # %start
@@ -396,9 +396,9 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind {
 ; RV64I-NEXT:    call __gtsf2@plt
 ; RV64I-NEXT:    bgtz a0, .LBB3_4
 ; RV64I-NEXT:  # %bb.3: # %start
-; RV64I-NEXT:    mv s2, s3
+; RV64I-NEXT:    mv s0, s3
 ; RV64I-NEXT:  .LBB3_4: # %start
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
@@ -1378,25 +1378,25 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    call __gesf2@plt
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __fixunssfdi@plt
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    li s5, 0
-; RV32I-NEXT:    bltz s1, .LBB12_2
+; RV32I-NEXT:    bltz s2, .LBB12_2
 ; RV32I-NEXT:  # %bb.1: # %start
 ; RV32I-NEXT:    mv s5, a0
 ; RV32I-NEXT:  .LBB12_2: # %start
 ; RV32I-NEXT:    lui a0, 391168
-; RV32I-NEXT:    addi s1, a0, -1
+; RV32I-NEXT:    addi s4, a0, -1
 ; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a1, s4
 ; RV32I-NEXT:    call __gtsf2@plt
+; RV32I-NEXT:    li s2, -1
 ; RV32I-NEXT:    li s3, -1
-; RV32I-NEXT:    li s4, -1
 ; RV32I-NEXT:    bgtz a0, .LBB12_4
 ; RV32I-NEXT:  # %bb.3: # %start
-; RV32I-NEXT:    mv s4, s5
+; RV32I-NEXT:    mv s3, s5
 ; RV32I-NEXT:  .LBB12_4: # %start
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    li a1, 0
@@ -1404,17 +1404,17 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; RV32I-NEXT:    li s5, 0
 ; RV32I-NEXT:    bltz a0, .LBB12_6
 ; RV32I-NEXT:  # %bb.5: # %start
-; RV32I-NEXT:    mv s5, s2
+; RV32I-NEXT:    mv s5, s1
 ; RV32I-NEXT:  .LBB12_6: # %start
 ; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a1, s4
 ; RV32I-NEXT:    call __gtsf2@plt
 ; RV32I-NEXT:    bgtz a0, .LBB12_8
 ; RV32I-NEXT:  # %bb.7: # %start
-; RV32I-NEXT:    mv s3, s5
+; RV32I-NEXT:    mv s2, s5
 ; RV32I-NEXT:  .LBB12_8: # %start
-; RV32I-NEXT:    mv a0, s4
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -2507,12 +2507,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lui a1, 815104
 ; RV32I-NEXT:    call __gesf2@plt
-; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __fixsfsi@plt
-; RV32I-NEXT:    li s1, 0
+; RV32I-NEXT:    li s2, 0
 ; RV32I-NEXT:    lui s3, 1048568
-; RV32I-NEXT:    bltz s2, .LBB32_2
+; RV32I-NEXT:    bltz s1, .LBB32_2
 ; RV32I-NEXT:  # %bb.1: # %start
 ; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:  .LBB32_2: # %start
@@ -2520,7 +2520,7 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV32I-NEXT:    addi a1, a0, -512
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __gtsf2@plt
-; RV32I-NEXT:    bge s1, a0, .LBB32_4
+; RV32I-NEXT:    bge s2, a0, .LBB32_4
 ; RV32I-NEXT:  # %bb.3:
 ; RV32I-NEXT:    lui a0, 8
 ; RV32I-NEXT:    addi s3, a0, -1
@@ -2528,11 +2528,11 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __unordsf2@plt
-; RV32I-NEXT:    bne a0, s1, .LBB32_6
+; RV32I-NEXT:    bne a0, s2, .LBB32_6
 ; RV32I-NEXT:  # %bb.5: # %start
-; RV32I-NEXT:    mv s1, s3
+; RV32I-NEXT:    mv s2, s3
 ; RV32I-NEXT:  .LBB32_6: # %start
-; RV32I-NEXT:    slli a0, s1, 16
+; RV32I-NEXT:    slli a0, s2, 16
 ; RV32I-NEXT:    srai a0, a0, 16
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -2556,12 +2556,12 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lui a1, 815104
 ; RV64I-NEXT:    call __gesf2@plt
-; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __fixsfdi@plt
-; RV64I-NEXT:    li s1, 0
+; RV64I-NEXT:    li s2, 0
 ; RV64I-NEXT:    lui s3, 1048568
-; RV64I-NEXT:    bltz s2, .LBB32_2
+; RV64I-NEXT:    bltz s1, .LBB32_2
 ; RV64I-NEXT:  # %bb.1: # %start
 ; RV64I-NEXT:    mv s3, a0
 ; RV64I-NEXT:  .LBB32_2: # %start
@@ -2569,7 +2569,7 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV64I-NEXT:    addiw a1, a0, -512
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __gtsf2@plt
-; RV64I-NEXT:    bge s1, a0, .LBB32_4
+; RV64I-NEXT:    bge s2, a0, .LBB32_4
 ; RV64I-NEXT:  # %bb.3:
 ; RV64I-NEXT:    lui a0, 8
 ; RV64I-NEXT:    addiw s3, a0, -1
@@ -2577,11 +2577,11 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind {
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unordsf2@plt
-; RV64I-NEXT:    bne a0, s1, .LBB32_6
+; RV64I-NEXT:    bne a0, s2, .LBB32_6
 ; RV64I-NEXT:  # %bb.5: # %start
-; RV64I-NEXT:    mv s1, s3
+; RV64I-NEXT:    mv s2, s3
 ; RV64I-NEXT:  .LBB32_6: # %start
-; RV64I-NEXT:    slli a0, s1, 48
+; RV64I-NEXT:    slli a0, s2, 48
 ; RV64I-NEXT:    srai a0, a0, 48
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
@@ -2909,31 +2909,31 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    lui a1, 798720
 ; RV32I-NEXT:    call __gesf2@plt
-; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __fixsfsi@plt
-; RV32I-NEXT:    li s1, 0
+; RV32I-NEXT:    li s2, 0
 ; RV32I-NEXT:    li s3, -128
-; RV32I-NEXT:    bltz s2, .LBB36_2
+; RV32I-NEXT:    bltz s1, .LBB36_2
 ; RV32I-NEXT:  # %bb.1: # %start
 ; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:  .LBB36_2: # %start
 ; RV32I-NEXT:    lui a1, 274400
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __gtsf2@plt
-; RV32I-NEXT:    li s2, 127
-; RV32I-NEXT:    blt s1, a0, .LBB36_4
+; RV32I-NEXT:    li s1, 127
+; RV32I-NEXT:    blt s2, a0, .LBB36_4
 ; RV32I-NEXT:  # %bb.3: # %start
-; RV32I-NEXT:    mv s2, s3
+; RV32I-NEXT:    mv s1, s3
 ; RV32I-NEXT:  .LBB36_4: # %start
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __unordsf2@plt
-; RV32I-NEXT:    bne a0, s1, .LBB36_6
+; RV32I-NEXT:    bne a0, s2, .LBB36_6
 ; RV32I-NEXT:  # %bb.5: # %start
-; RV32I-NEXT:    mv s1, s2
+; RV32I-NEXT:    mv s2, s1
 ; RV32I-NEXT:  .LBB36_6: # %start
-; RV32I-NEXT:    slli a0, s1, 24
+; RV32I-NEXT:    slli a0, s2, 24
 ; RV32I-NEXT:    srai a0, a0, 24
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -2957,31 +2957,31 @@ define signext i8 @fcvt_w_s_sat_i8(half %a) nounwind {
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    lui a1, 798720
 ; RV64I-NEXT:    call __gesf2@plt
-; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __fixsfdi@plt
-; RV64I-NEXT:    li s1, 0
+; RV64I-NEXT:    li s2, 0
 ; RV64I-NEXT:    li s3, -128
-; RV64I-NEXT:    bltz s2, .LBB36_2
+; RV64I-NEXT:    bltz s1, .LBB36_2
 ; RV64I-NEXT:  # %bb.1: # %start
 ; RV64I-NEXT:    mv s3, a0
 ; RV64I-NEXT:  .LBB36_2: # %start
 ; RV64I-NEXT:    lui a1, 274400
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __gtsf2@plt
-; RV64I-NEXT:    li s2, 127
-; RV64I-NEXT:    blt s1, a0, .LBB36_4
+; RV64I-NEXT:    li s1, 127
+; RV64I-NEXT:    blt s2, a0, .LBB36_4
 ; RV64I-NEXT:  # %bb.3: # %start
-; RV64I-NEXT:    mv s2, s3
+; RV64I-NEXT:    mv s1, s3
 ; RV64I-NEXT:  .LBB36_4: # %start
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __unordsf2@plt
-; RV64I-NEXT:    bne a0, s1, .LBB36_6
+; RV64I-NEXT:    bne a0, s2, .LBB36_6
 ; RV64I-NEXT:  # %bb.5: # %start
-; RV64I-NEXT:    mv s1, s2
+; RV64I-NEXT:    mv s2, s1
 ; RV64I-NEXT:  .LBB36_6: # %start
-; RV64I-NEXT:    slli a0, s1, 56
+; RV64I-NEXT:    slli a0, s2, 56
 ; RV64I-NEXT:    srai a0, a0, 56
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
index acde022135e37..0d4e4ac322e1a 100644
--- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
@@ -398,24 +398,24 @@ define half @sincos_f16(half %a) nounwind {
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s1, a1, -1
-; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    addi s2, a1, -1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    call sinf@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call cosf@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    and a0, s2, s1
+; RV32I-NEXT:    and a0, s1, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    and a0, s0, s1
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    and a0, s0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
@@ -433,24 +433,24 @@ define half @sincos_f16(half %a) nounwind {
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s1, a1, -1
-; RV64I-NEXT:    and a0, a0, s1
+; RV64I-NEXT:    addiw s2, a1, -1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    call sinf@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call cosf@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    and a0, s2, s1
+; RV64I-NEXT:    and a0, s1, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s2, a0
-; RV64I-NEXT:    and a0, s0, s1
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    and a0, s0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
@@ -523,13 +523,13 @@ define half @pow_f16(half %a, half %b) nounwind {
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s0, a1, -1
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    addi s2, a1, -1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    and a0, s0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    mv a0, s1
@@ -549,13 +549,13 @@ define half @pow_f16(half %a, half %b) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s0, a1, -1
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    addiw s2, a1, -1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    and a0, s0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s1
@@ -982,20 +982,20 @@ define half @fma_f16(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a2
+; RV32I-NEXT:    mv s0, a2
 ; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s0, a1, -1
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    addi s3, a1, -1
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s3, a0
-; RV32I-NEXT:    and a0, s1, s0
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    and a0, s0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a2, a0
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call fmaf@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
@@ -1015,20 +1015,20 @@ define half @fma_f16(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a2
+; RV64I-NEXT:    mv s0, a2
 ; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s0, a1, -1
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    addiw s3, a1, -1
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s3, a0
-; RV64I-NEXT:    and a0, s1, s0
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    and a0, s0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a2, a0
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    call fmaf@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
@@ -1074,26 +1074,26 @@ define half @fmuladd_f16(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a2
-; RV32I-NEXT:    mv s3, a1
+; RV32I-NEXT:    mv s0, a2
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s1, a1, -1
-; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    addi s3, a1, -1
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    and a0, s3, s1
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __mulsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    and a0, s2, s1
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    and a0, s0, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    and a0, s0, s1
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    and a0, s1, s3
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv a1, s2
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -1112,26 +1112,26 @@ define half @fmuladd_f16(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a2
-; RV64I-NEXT:    mv s3, a1
+; RV64I-NEXT:    mv s0, a2
+; RV64I-NEXT:    mv s1, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s1, a1, -1
-; RV64I-NEXT:    and a0, a0, s1
+; RV64I-NEXT:    addiw s3, a1, -1
+; RV64I-NEXT:    and a0, a0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    and a0, s3, s1
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
-; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __mulsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    and a0, s2, s1
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    and a0, s0, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s2, a0
-; RV64I-NEXT:    and a0, s0, s1
+; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    and a0, s1, s3
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv a1, s2
+; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
@@ -1213,13 +1213,13 @@ define half @minnum_f16(half %a, half %b) nounwind {
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s0, a1, -1
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    addi s2, a1, -1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    and a0, s0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    mv a0, s1
@@ -1239,13 +1239,13 @@ define half @minnum_f16(half %a, half %b) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s0, a1, -1
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    addiw s2, a1, -1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    and a0, s0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s1
@@ -1291,13 +1291,13 @@ define half @maxnum_f16(half %a, half %b) nounwind {
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s0, a1, -1
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    addi s2, a1, -1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    and a0, s2, s0
+; RV32I-NEXT:    and a0, s0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    mv a0, s1
@@ -1317,13 +1317,13 @@ define half @maxnum_f16(half %a, half %b) nounwind {
 ; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    mv s2, a1
+; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s0, a1, -1
-; RV64I-NEXT:    and a0, a0, s0
+; RV64I-NEXT:    addiw s2, a1, -1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s1, a0
-; RV64I-NEXT:    and a0, s2, s0
+; RV64I-NEXT:    and a0, s0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s1
diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 411369557ff34..3884c67d399da 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -1141,50 +1141,50 @@ define i64 @muli64_m3840(i64 %a) nounwind {
 define i128 @muli128_m3840(i128 %a) nounwind {
 ; RV32I-LABEL: muli128_m3840:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    lw a2, 4(a1)
+; RV32I-NEXT:    lw a4, 4(a1)
 ; RV32I-NEXT:    lw a3, 8(a1)
-; RV32I-NEXT:    lw a4, 0(a1)
-; RV32I-NEXT:    lw a1, 12(a1)
-; RV32I-NEXT:    srli a6, a2, 20
-; RV32I-NEXT:    slli a5, a3, 12
-; RV32I-NEXT:    or a6, a5, a6
-; RV32I-NEXT:    srli a7, a2, 24
-; RV32I-NEXT:    slli a5, a3, 8
-; RV32I-NEXT:    or a7, a5, a7
-; RV32I-NEXT:    sltu t0, a7, a6
-; RV32I-NEXT:    srli t1, a3, 20
-; RV32I-NEXT:    slli a5, a1, 12
-; RV32I-NEXT:    or a5, a5, t1
-; RV32I-NEXT:    srli a3, a3, 24
-; RV32I-NEXT:    slli a1, a1, 8
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    sub t2, a1, a5
+; RV32I-NEXT:    lw a6, 0(a1)
+; RV32I-NEXT:    lw a5, 12(a1)
 ; RV32I-NEXT:    srli a1, a4, 20
-; RV32I-NEXT:    slli a3, a2, 12
-; RV32I-NEXT:    or a3, a3, a1
-; RV32I-NEXT:    srli a1, a4, 24
-; RV32I-NEXT:    slli a2, a2, 8
-; RV32I-NEXT:    or a5, a2, a1
-; RV32I-NEXT:    slli t1, a4, 12
-; RV32I-NEXT:    slli t3, a4, 8
-; RV32I-NEXT:    sltu t4, t3, t1
-; RV32I-NEXT:    sub t0, t2, t0
-; RV32I-NEXT:    mv a2, t4
+; RV32I-NEXT:    slli a2, a3, 12
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a2, a4, 24
+; RV32I-NEXT:    slli a7, a3, 8
+; RV32I-NEXT:    or a2, a7, a2
+; RV32I-NEXT:    sltu t0, a2, a1
+; RV32I-NEXT:    srli a7, a3, 20
+; RV32I-NEXT:    slli t1, a5, 12
+; RV32I-NEXT:    or a7, t1, a7
+; RV32I-NEXT:    srli a3, a3, 24
+; RV32I-NEXT:    slli a5, a5, 8
+; RV32I-NEXT:    or a3, a5, a3
+; RV32I-NEXT:    sub t1, a3, a7
+; RV32I-NEXT:    srli a3, a6, 20
+; RV32I-NEXT:    slli a5, a4, 12
+; RV32I-NEXT:    or a3, a5, a3
+; RV32I-NEXT:    srli a5, a6, 24
+; RV32I-NEXT:    slli a4, a4, 8
+; RV32I-NEXT:    or a5, a4, a5
+; RV32I-NEXT:    slli a4, a6, 12
+; RV32I-NEXT:    slli a6, a6, 8
+; RV32I-NEXT:    sltu a7, a6, a4
+; RV32I-NEXT:    sub t0, t1, t0
+; RV32I-NEXT:    mv t1, a7
 ; RV32I-NEXT:    beq a5, a3, .LBB30_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sltu a2, a5, a3
+; RV32I-NEXT:    sltu t1, a5, a3
 ; RV32I-NEXT:  .LBB30_2:
-; RV32I-NEXT:    sub a1, a7, a6
-; RV32I-NEXT:    sltu a4, a1, a2
-; RV32I-NEXT:    sub a4, t0, a4
-; RV32I-NEXT:    sub a1, a1, a2
-; RV32I-NEXT:    sub a2, a5, a3
-; RV32I-NEXT:    sub a2, a2, t4
-; RV32I-NEXT:    sub a3, t3, t1
-; RV32I-NEXT:    sw a3, 0(a0)
-; RV32I-NEXT:    sw a2, 4(a0)
+; RV32I-NEXT:    sub a1, a2, a1
+; RV32I-NEXT:    sltu a2, a1, t1
+; RV32I-NEXT:    sub a2, t0, a2
+; RV32I-NEXT:    sub a1, a1, t1
+; RV32I-NEXT:    sub a3, a5, a3
+; RV32I-NEXT:    sub a3, a3, a7
+; RV32I-NEXT:    sub a4, a6, a4
+; RV32I-NEXT:    sw a4, 0(a0)
+; RV32I-NEXT:    sw a3, 4(a0)
 ; RV32I-NEXT:    sw a1, 8(a0)
-; RV32I-NEXT:    sw a4, 12(a0)
+; RV32I-NEXT:    sw a2, 12(a0)
 ; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli128_m3840:
@@ -1192,56 +1192,54 @@ define i128 @muli128_m3840(i128 %a) nounwind {
 ; RV32IM-NEXT:    addi sp, sp, -16
 ; RV32IM-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
 ; RV32IM-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a6, 12(a1)
-; RV32IM-NEXT:    lw a7, 8(a1)
+; RV32IM-NEXT:    lw a2, 12(a1)
+; RV32IM-NEXT:    lw a3, 8(a1)
 ; RV32IM-NEXT:    lw a4, 0(a1)
 ; RV32IM-NEXT:    lw a1, 4(a1)
 ; RV32IM-NEXT:    lui a5, 1048575
 ; RV32IM-NEXT:    addi a5, a5, 256
-; RV32IM-NEXT:    mulhu a2, a4, a5
-; RV32IM-NEXT:    mul a3, a1, a5
-; RV32IM-NEXT:    add a2, a3, a2
-; RV32IM-NEXT:    sltu t0, a2, a3
-; RV32IM-NEXT:    mulhu a3, a1, a5
-; RV32IM-NEXT:    add t5, a3, t0
-; RV32IM-NEXT:    sub t0, a2, a4
-; RV32IM-NEXT:    neg t4, a4
-; RV32IM-NEXT:    sltu t1, t0, t4
+; RV32IM-NEXT:    mulhu a6, a4, a5
+; RV32IM-NEXT:    mul a7, a1, a5
+; RV32IM-NEXT:    add a6, a7, a6
+; RV32IM-NEXT:    sltu a7, a6, a7
+; RV32IM-NEXT:    mulhu t0, a1, a5
+; RV32IM-NEXT:    add a7, t0, a7
+; RV32IM-NEXT:    sub a6, a6, a4
+; RV32IM-NEXT:    neg t0, a4
+; RV32IM-NEXT:    sltu t1, a6, t0
 ; RV32IM-NEXT:    li t2, -1
 ; RV32IM-NEXT:    mulhu t3, a4, t2
-; RV32IM-NEXT:    add a2, t3, t1
-; RV32IM-NEXT:    add t1, t5, a2
-; RV32IM-NEXT:    sub a3, t1, a1
-; RV32IM-NEXT:    mul a2, a7, a5
-; RV32IM-NEXT:    sub a2, a2, a4
-; RV32IM-NEXT:    add t6, a3, a2
-; RV32IM-NEXT:    sltu s2, t6, a3
+; RV32IM-NEXT:    add t1, t3, t1
+; RV32IM-NEXT:    add t1, a7, t1
+; RV32IM-NEXT:    sub t4, t1, a1
+; RV32IM-NEXT:    mul t5, a3, a5
+; RV32IM-NEXT:    sub t5, t5, a4
+; RV32IM-NEXT:    add t6, t4, t5
+; RV32IM-NEXT:    sltu s0, t6, t4
 ; RV32IM-NEXT:    neg s1, a1
-; RV32IM-NEXT:    sltu a3, a3, s1
-; RV32IM-NEXT:    sltu s1, t1, t5
-; RV32IM-NEXT:    mulhu s0, a1, t2
-; RV32IM-NEXT:    add s1, s0, s1
-; RV32IM-NEXT:    add a3, s1, a3
-; RV32IM-NEXT:    sltu a2, a2, t4
-; RV32IM-NEXT:    mul s1, a6, a5
-; RV32IM-NEXT:    mulhu s0, a7, a5
-; RV32IM-NEXT:    sub s0, s0, a7
-; RV32IM-NEXT:    add s1, s0, s1
-; RV32IM-NEXT:    sub s0, t3, a4
-; RV32IM-NEXT:    sub a1, s0, a1
-; RV32IM-NEXT:    add a1, a1, s1
+; RV32IM-NEXT:    sltu t4, t4, s1
+; RV32IM-NEXT:    sltu a7, t1, a7
+; RV32IM-NEXT:    mulhu t1, a1, t2
+; RV32IM-NEXT:    add a7, t1, a7
+; RV32IM-NEXT:    add a7, a7, t4
+; RV32IM-NEXT:    sltu t0, t5, t0
+; RV32IM-NEXT:    mul a2, a2, a5
+; RV32IM-NEXT:    mulhu t1, a3, a5
+; RV32IM-NEXT:    sub a3, t1, a3
+; RV32IM-NEXT:    add a2, a3, a2
+; RV32IM-NEXT:    sub a3, t3, a4
+; RV32IM-NEXT:    sub a1, a3, a1
 ; RV32IM-NEXT:    add a1, a1, a2
-; RV32IM-NEXT:    add a1, a3, a1
-; RV32IM-NEXT:    add a1, a1, s2
+; RV32IM-NEXT:    add a1, a1, t0
+; RV32IM-NEXT:    add a1, a7, a1
+; RV32IM-NEXT:    add a1, a1, s0
 ; RV32IM-NEXT:    mul a2, a4, a5
 ; RV32IM-NEXT:    sw a2, 0(a0)
-; RV32IM-NEXT:    sw t0, 4(a0)
+; RV32IM-NEXT:    sw a6, 4(a0)
 ; RV32IM-NEXT:    sw t6, 8(a0)
 ; RV32IM-NEXT:    sw a1, 12(a0)
 ; RV32IM-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    addi sp, sp, 16
 ; RV32IM-NEXT:    ret
 ;
@@ -1279,39 +1277,39 @@ define i128 @muli128_m63(i128 %a) nounwind {
 ; RV32I-LABEL: muli128_m63:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lw a2, 0(a1)
-; RV32I-NEXT:    lw t0, 12(a1)
-; RV32I-NEXT:    lw a4, 8(a1)
+; RV32I-NEXT:    lw a5, 12(a1)
+; RV32I-NEXT:    lw a7, 8(a1)
 ; RV32I-NEXT:    lw a3, 4(a1)
-; RV32I-NEXT:    slli a6, a2, 6
-; RV32I-NEXT:    sltu a7, a2, a6
-; RV32I-NEXT:    srli a1, a2, 26
-; RV32I-NEXT:    slli a5, a3, 6
-; RV32I-NEXT:    or t2, a5, a1
-; RV32I-NEXT:    mv t3, a7
-; RV32I-NEXT:    beq a3, t2, .LBB31_2
+; RV32I-NEXT:    slli a1, a2, 6
+; RV32I-NEXT:    sltu a4, a2, a1
+; RV32I-NEXT:    srli a6, a2, 26
+; RV32I-NEXT:    slli t0, a3, 6
+; RV32I-NEXT:    or a6, t0, a6
+; RV32I-NEXT:    mv t0, a4
+; RV32I-NEXT:    beq a3, a6, .LBB31_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sltu t3, a3, t2
+; RV32I-NEXT:    sltu t0, a3, a6
 ; RV32I-NEXT:  .LBB31_2:
 ; RV32I-NEXT:    srli t1, a3, 26
-; RV32I-NEXT:    slli a1, a4, 6
-; RV32I-NEXT:    or a1, a1, t1
-; RV32I-NEXT:    sub a5, a4, a1
-; RV32I-NEXT:    sltu t1, a5, t3
-; RV32I-NEXT:    sltu t4, a4, a1
-; RV32I-NEXT:    srli a4, a4, 26
-; RV32I-NEXT:    slli a1, t0, 6
-; RV32I-NEXT:    or a1, a1, a4
-; RV32I-NEXT:    sub a1, t0, a1
-; RV32I-NEXT:    sub a1, a1, t4
-; RV32I-NEXT:    sub a1, a1, t1
-; RV32I-NEXT:    sub a4, a5, t3
-; RV32I-NEXT:    sub a3, a3, t2
-; RV32I-NEXT:    sub a3, a3, a7
-; RV32I-NEXT:    sub a2, a2, a6
-; RV32I-NEXT:    sw a2, 0(a0)
+; RV32I-NEXT:    slli t2, a7, 6
+; RV32I-NEXT:    or t1, t2, t1
+; RV32I-NEXT:    sub t2, a7, t1
+; RV32I-NEXT:    sltu t3, t2, t0
+; RV32I-NEXT:    sltu t1, a7, t1
+; RV32I-NEXT:    srli a7, a7, 26
+; RV32I-NEXT:    slli t4, a5, 6
+; RV32I-NEXT:    or a7, t4, a7
+; RV32I-NEXT:    sub a5, a5, a7
+; RV32I-NEXT:    sub a5, a5, t1
+; RV32I-NEXT:    sub a5, a5, t3
+; RV32I-NEXT:    sub a7, t2, t0
+; RV32I-NEXT:    sub a3, a3, a6
+; RV32I-NEXT:    sub a3, a3, a4
+; RV32I-NEXT:    sub a1, a2, a1
+; RV32I-NEXT:    sw a1, 0(a0)
 ; RV32I-NEXT:    sw a3, 4(a0)
-; RV32I-NEXT:    sw a4, 8(a0)
-; RV32I-NEXT:    sw a1, 12(a0)
+; RV32I-NEXT:    sw a7, 8(a0)
+; RV32I-NEXT:    sw a5, 12(a0)
 ; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: muli128_m63:
@@ -1319,55 +1317,55 @@ define i128 @muli128_m63(i128 %a) nounwind {
 ; RV32IM-NEXT:    addi sp, sp, -16
 ; RV32IM-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
 ; RV32IM-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw a7, 12(a1)
+; RV32IM-NEXT:    lw a2, 12(a1)
 ; RV32IM-NEXT:    lw a3, 0(a1)
 ; RV32IM-NEXT:    lw a4, 4(a1)
-; RV32IM-NEXT:    lw t5, 8(a1)
-; RV32IM-NEXT:    li a6, -63
-; RV32IM-NEXT:    mulhu a5, a3, a6
-; RV32IM-NEXT:    slli a2, a4, 6
-; RV32IM-NEXT:    sub a2, a2, a4
-; RV32IM-NEXT:    sub a5, a5, a2
-; RV32IM-NEXT:    neg a2, a2
-; RV32IM-NEXT:    sltu t0, a5, a2
-; RV32IM-NEXT:    mulhu a2, a4, a6
-; RV32IM-NEXT:    add t4, a2, t0
-; RV32IM-NEXT:    sub t0, a5, a3
-; RV32IM-NEXT:    neg t1, a3
-; RV32IM-NEXT:    sltu a5, t0, t1
+; RV32IM-NEXT:    lw a1, 8(a1)
+; RV32IM-NEXT:    li a5, -63
+; RV32IM-NEXT:    mulhu a6, a3, a5
+; RV32IM-NEXT:    slli a7, a4, 6
+; RV32IM-NEXT:    sub a7, a7, a4
+; RV32IM-NEXT:    sub a6, a6, a7
+; RV32IM-NEXT:    neg a7, a7
+; RV32IM-NEXT:    sltu a7, a6, a7
+; RV32IM-NEXT:    mulhu t0, a4, a5
+; RV32IM-NEXT:    add a7, t0, a7
+; RV32IM-NEXT:    sub a6, a6, a3
+; RV32IM-NEXT:    neg t0, a3
+; RV32IM-NEXT:    sltu t1, a6, t0
 ; RV32IM-NEXT:    li t2, -1
 ; RV32IM-NEXT:    mulhu t3, a3, t2
-; RV32IM-NEXT:    add a5, t3, a5
-; RV32IM-NEXT:    add a5, t4, a5
-; RV32IM-NEXT:    sub a2, a5, a4
-; RV32IM-NEXT:    slli a1, t5, 6
-; RV32IM-NEXT:    sub a1, a1, t5
-; RV32IM-NEXT:    add a1, a1, a3
-; RV32IM-NEXT:    sub t6, a2, a1
-; RV32IM-NEXT:    sltu s0, t6, a2
+; RV32IM-NEXT:    add t1, t3, t1
+; RV32IM-NEXT:    add t1, a7, t1
+; RV32IM-NEXT:    sub t4, t1, a4
+; RV32IM-NEXT:    slli t5, a1, 6
+; RV32IM-NEXT:    sub t5, t5, a1
+; RV32IM-NEXT:    add t5, t5, a3
+; RV32IM-NEXT:    sub t6, t4, t5
+; RV32IM-NEXT:    sltu s0, t6, t4
 ; RV32IM-NEXT:    neg s1, a4
-; RV32IM-NEXT:    sltu a2, a2, s1
-; RV32IM-NEXT:    sltu a5, a5, t4
-; RV32IM-NEXT:    mulhu s1, a4, t2
-; RV32IM-NEXT:    add a5, s1, a5
-; RV32IM-NEXT:    add a2, a5, a2
-; RV32IM-NEXT:    slli a5, a7, 6
-; RV32IM-NEXT:    sub a5, a7, a5
-; RV32IM-NEXT:    mulhu s1, t5, a6
-; RV32IM-NEXT:    sub s1, s1, t5
-; RV32IM-NEXT:    add a5, s1, a5
-; RV32IM-NEXT:    sub s1, t3, a3
-; RV32IM-NEXT:    sub a4, s1, a4
-; RV32IM-NEXT:    add a4, a4, a5
-; RV32IM-NEXT:    neg a1, a1
-; RV32IM-NEXT:    sltu a1, a1, t1
-; RV32IM-NEXT:    add a1, a4, a1
+; RV32IM-NEXT:    sltu t4, t4, s1
+; RV32IM-NEXT:    sltu a7, t1, a7
+; RV32IM-NEXT:    mulhu t1, a4, t2
+; RV32IM-NEXT:    add a7, t1, a7
+; RV32IM-NEXT:    add a7, a7, t4
+; RV32IM-NEXT:    slli t1, a2, 6
+; RV32IM-NEXT:    sub a2, a2, t1
+; RV32IM-NEXT:    mulhu a5, a1, a5
+; RV32IM-NEXT:    sub a1, a5, a1
+; RV32IM-NEXT:    add a1, a1, a2
+; RV32IM-NEXT:    sub a2, t3, a3
+; RV32IM-NEXT:    sub a2, a2, a4
 ; RV32IM-NEXT:    add a1, a2, a1
+; RV32IM-NEXT:    neg a2, t5
+; RV32IM-NEXT:    sltu a2, a2, t0
+; RV32IM-NEXT:    add a1, a1, a2
+; RV32IM-NEXT:    add a1, a7, a1
 ; RV32IM-NEXT:    add a1, a1, s0
 ; RV32IM-NEXT:    slli a2, a3, 6
 ; RV32IM-NEXT:    sub a2, a3, a2
 ; RV32IM-NEXT:    sw a2, 0(a0)
-; RV32IM-NEXT:    sw t0, 4(a0)
+; RV32IM-NEXT:    sw a6, 4(a0)
 ; RV32IM-NEXT:    sw t6, 8(a0)
 ; RV32IM-NEXT:    sw a1, 12(a0)
 ; RV32IM-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
@@ -1417,60 +1415,60 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
 ; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s9, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s0, a3
-; RV32I-NEXT:    mv s5, a2
-; RV32I-NEXT:    mv s2, a1
-; RV32I-NEXT:    mv s3, a0
+; RV32I-NEXT:    mv s2, a3
+; RV32I-NEXT:    mv s3, a2
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    srai s4, a3, 31
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __muldi3@plt
-; RV32I-NEXT:    mv s1, a1
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv s5, a1
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    mv a2, s5
+; RV32I-NEXT:    mv a2, s3
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __muldi3@plt
-; RV32I-NEXT:    add s1, a0, s1
-; RV32I-NEXT:    sltu a0, s1, a0
+; RV32I-NEXT:    add s5, a0, s5
+; RV32I-NEXT:    sltu a0, s5, a0
 ; RV32I-NEXT:    add s7, a1, a0
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __muldi3@plt
-; RV32I-NEXT:    add a2, a0, s1
+; RV32I-NEXT:    add a2, a0, s5
 ; RV32I-NEXT:    sltu a0, a2, a0
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    add s8, s7, a0
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    mv a2, s0
+; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __muldi3@plt
-; RV32I-NEXT:    mv s9, a0
+; RV32I-NEXT:    mv s5, a0
 ; RV32I-NEXT:    mv s6, a1
-; RV32I-NEXT:    add s1, a0, s8
-; RV32I-NEXT:    mv a0, s5
-; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    add s9, a0, s8
+; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    li a2, 0
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __muldi3@plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    mv s5, a1
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s3, a1
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s4
-; RV32I-NEXT:    mv a2, s3
-; RV32I-NEXT:    mv a3, s2
+; RV32I-NEXT:    mv a2, s1
+; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call __muldi3@plt
-; RV32I-NEXT:    add a3, a0, s0
-; RV32I-NEXT:    add a2, s1, a3
-; RV32I-NEXT:    sltu a4, a2, s1
-; RV32I-NEXT:    sltu a5, s1, s9
-; RV32I-NEXT:    sltu s1, s8, s7
-; RV32I-NEXT:    add s1, s6, s1
-; RV32I-NEXT:    add a5, s1, a5
-; RV32I-NEXT:    add a1, a1, s5
+; RV32I-NEXT:    add a3, a0, s2
+; RV32I-NEXT:    add a2, s9, a3
+; RV32I-NEXT:    sltu a4, a2, s9
+; RV32I-NEXT:    sltu a5, s9, s5
+; RV32I-NEXT:    sltu a6, s8, s7
+; RV32I-NEXT:    add a6, s6, a6
+; RV32I-NEXT:    add a5, a6, a5
+; RV32I-NEXT:    add a1, a1, s3
 ; RV32I-NEXT:    sltu a0, a3, a0
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    add a0, a5, a0
@@ -1492,36 +1490,36 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
 ;
 ; RV32IM-LABEL: mulhsu_i64:
 ; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    srai a7, a3, 31
-; RV32IM-NEXT:    mulhu a6, a0, a2
-; RV32IM-NEXT:    mul a5, a1, a2
-; RV32IM-NEXT:    add a4, a5, a6
-; RV32IM-NEXT:    sltu a5, a4, a5
+; RV32IM-NEXT:    srai a4, a3, 31
+; RV32IM-NEXT:    mulhu a5, a0, a2
+; RV32IM-NEXT:    mul a6, a1, a2
+; RV32IM-NEXT:    add a5, a6, a5
+; RV32IM-NEXT:    sltu a6, a5, a6
 ; RV32IM-NEXT:    mulhu a2, a1, a2
-; RV32IM-NEXT:    add a6, a2, a5
+; RV32IM-NEXT:    add a6, a2, a6
 ; RV32IM-NEXT:    mul a2, a0, a3
-; RV32IM-NEXT:    add a4, a2, a4
-; RV32IM-NEXT:    sltu a2, a4, a2
-; RV32IM-NEXT:    mulhu a4, a0, a3
-; RV32IM-NEXT:    add a2, a4, a2
-; RV32IM-NEXT:    add a4, a6, a2
-; RV32IM-NEXT:    mul a5, a1, a3
-; RV32IM-NEXT:    add a2, a5, a4
-; RV32IM-NEXT:    mul t1, a7, a0
-; RV32IM-NEXT:    add t0, a2, t1
-; RV32IM-NEXT:    sltu t2, t0, a2
-; RV32IM-NEXT:    sltu a2, a2, a5
-; RV32IM-NEXT:    sltu a4, a4, a6
+; RV32IM-NEXT:    add a5, a2, a5
+; RV32IM-NEXT:    sltu a2, a5, a2
+; RV32IM-NEXT:    mulhu a5, a0, a3
+; RV32IM-NEXT:    add a2, a5, a2
+; RV32IM-NEXT:    add a5, a6, a2
+; RV32IM-NEXT:    mul a7, a1, a3
+; RV32IM-NEXT:    add t0, a7, a5
+; RV32IM-NEXT:    mul t1, a4, a0
+; RV32IM-NEXT:    add a2, t0, t1
+; RV32IM-NEXT:    sltu t2, a2, t0
+; RV32IM-NEXT:    sltu a7, t0, a7
+; RV32IM-NEXT:    sltu a5, a5, a6
 ; RV32IM-NEXT:    mulhu a3, a1, a3
-; RV32IM-NEXT:    add a3, a3, a4
-; RV32IM-NEXT:    add a2, a3, a2
-; RV32IM-NEXT:    mul a1, a7, a1
-; RV32IM-NEXT:    mulhu a0, a7, a0
+; RV32IM-NEXT:    add a3, a3, a5
+; RV32IM-NEXT:    add a3, a3, a7
+; RV32IM-NEXT:    mul a1, a4, a1
+; RV32IM-NEXT:    mulhu a0, a4, a0
 ; RV32IM-NEXT:    add a0, a0, a1
 ; RV32IM-NEXT:    add a0, a0, t1
-; RV32IM-NEXT:    add a0, a2, a0
+; RV32IM-NEXT:    add a0, a3, a0
 ; RV32IM-NEXT:    add a1, a0, t2
-; RV32IM-NEXT:    mv a0, t0
+; RV32IM-NEXT:    mv a0, a2
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: mulhsu_i64:
diff --git a/llvm/test/CodeGen/RISCV/remat.ll b/llvm/test/CodeGen/RISCV/remat.ll
index 46f7a4dbda60c..b6375a91413b8 100644
--- a/llvm/test/CodeGen/RISCV/remat.ll
+++ b/llvm/test/CodeGen/RISCV/remat.ll
@@ -37,16 +37,16 @@ define i32 @test() nounwind {
 ; RV32I-NEXT:    sw s9, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s10, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s11, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lui s6, %hi(a)
-; RV32I-NEXT:    lw a0, %lo(a)(s6)
+; RV32I-NEXT:    lui s0, %hi(a)
+; RV32I-NEXT:    lw a0, %lo(a)(s0)
 ; RV32I-NEXT:    beqz a0, .LBB0_11
 ; RV32I-NEXT:  # %bb.1: # %for.body.preheader
-; RV32I-NEXT:    lui s2, %hi(l)
-; RV32I-NEXT:    lui s3, %hi(k)
-; RV32I-NEXT:    lui s4, %hi(j)
-; RV32I-NEXT:    lui s5, %hi(i)
-; RV32I-NEXT:    lui s1, %hi(d)
-; RV32I-NEXT:    lui s0, %hi(e)
+; RV32I-NEXT:    lui s1, %hi(l)
+; RV32I-NEXT:    lui s2, %hi(k)
+; RV32I-NEXT:    lui s3, %hi(j)
+; RV32I-NEXT:    lui s4, %hi(i)
+; RV32I-NEXT:    lui s5, %hi(d)
+; RV32I-NEXT:    lui s6, %hi(e)
 ; RV32I-NEXT:    lui s7, %hi(f)
 ; RV32I-NEXT:    lui s8, %hi(g)
 ; RV32I-NEXT:    lui s9, %hi(h)
@@ -55,56 +55,56 @@ define i32 @test() nounwind {
 ; RV32I-NEXT:    j .LBB0_3
 ; RV32I-NEXT:  .LBB0_2: # %for.inc
 ; RV32I-NEXT:    # in Loop: Header=BB0_3 Depth=1
-; RV32I-NEXT:    lw a0, %lo(a)(s6)
+; RV32I-NEXT:    lw a0, %lo(a)(s0)
 ; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    sw a0, %lo(a)(s6)
+; RV32I-NEXT:    sw a0, %lo(a)(s0)
 ; RV32I-NEXT:    beqz a0, .LBB0_11
 ; RV32I-NEXT:  .LBB0_3: # %for.body
 ; RV32I-NEXT:    # =>This Inner Loop Header: Depth=1
-; RV32I-NEXT:    lw a1, %lo(l)(s2)
+; RV32I-NEXT:    lw a1, %lo(l)(s1)
 ; RV32I-NEXT:    beqz a1, .LBB0_5
 ; RV32I-NEXT:  # %bb.4: # %if.then
 ; RV32I-NEXT:    # in Loop: Header=BB0_3 Depth=1
 ; RV32I-NEXT:    lw a1, %lo(b)(s11)
 ; RV32I-NEXT:    lw a2, %lo(c)(s10)
-; RV32I-NEXT:    lw a3, %lo(d)(s1)
-; RV32I-NEXT:    lw a4, %lo(e)(s0)
+; RV32I-NEXT:    lw a3, %lo(d)(s5)
+; RV32I-NEXT:    lw a4, %lo(e)(s6)
 ; RV32I-NEXT:    li a5, 32
 ; RV32I-NEXT:    call foo@plt
 ; RV32I-NEXT:  .LBB0_5: # %if.end
 ; RV32I-NEXT:    # in Loop: Header=BB0_3 Depth=1
-; RV32I-NEXT:    lw a0, %lo(k)(s3)
+; RV32I-NEXT:    lw a0, %lo(k)(s2)
 ; RV32I-NEXT:    beqz a0, .LBB0_7
 ; RV32I-NEXT:  # %bb.6: # %if.then3
 ; RV32I-NEXT:    # in Loop: Header=BB0_3 Depth=1
 ; RV32I-NEXT:    lw a0, %lo(b)(s11)
 ; RV32I-NEXT:    lw a1, %lo(c)(s10)
-; RV32I-NEXT:    lw a2, %lo(d)(s1)
-; RV32I-NEXT:    lw a3, %lo(e)(s0)
+; RV32I-NEXT:    lw a2, %lo(d)(s5)
+; RV32I-NEXT:    lw a3, %lo(e)(s6)
 ; RV32I-NEXT:    lw a4, %lo(f)(s7)
 ; RV32I-NEXT:    li a5, 64
 ; RV32I-NEXT:    call foo@plt
 ; RV32I-NEXT:  .LBB0_7: # %if.end5
 ; RV32I-NEXT:    # in Loop: Header=BB0_3 Depth=1
-; RV32I-NEXT:    lw a0, %lo(j)(s4)
+; RV32I-NEXT:    lw a0, %lo(j)(s3)
 ; RV32I-NEXT:    beqz a0, .LBB0_9
 ; RV32I-NEXT:  # %bb.8: # %if.then7
 ; RV32I-NEXT:    # in Loop: Header=BB0_3 Depth=1
 ; RV32I-NEXT:    lw a0, %lo(c)(s10)
-; RV32I-NEXT:    lw a1, %lo(d)(s1)
-; RV32I-NEXT:    lw a2, %lo(e)(s0)
+; RV32I-NEXT:    lw a1, %lo(d)(s5)
+; RV32I-NEXT:    lw a2, %lo(e)(s6)
 ; RV32I-NEXT:    lw a3, %lo(f)(s7)
 ; RV32I-NEXT:    lw a4, %lo(g)(s8)
 ; RV32I-NEXT:    li a5, 32
 ; RV32I-NEXT:    call foo@plt
 ; RV32I-NEXT:  .LBB0_9: # %if.end9
 ; RV32I-NEXT:    # in Loop: Header=BB0_3 Depth=1
-; RV32I-NEXT:    lw a0, %lo(i)(s5)
+; RV32I-NEXT:    lw a0, %lo(i)(s4)
 ; RV32I-NEXT:    beqz a0, .LBB0_2
 ; RV32I-NEXT:  # %bb.10: # %if.then11
 ; RV32I-NEXT:    # in Loop: Header=BB0_3 Depth=1
-; RV32I-NEXT:    lw a0, %lo(d)(s1)
-; RV32I-NEXT:    lw a1, %lo(e)(s0)
+; RV32I-NEXT:    lw a0, %lo(d)(s5)
+; RV32I-NEXT:    lw a1, %lo(e)(s6)
 ; RV32I-NEXT:    lw a2, %lo(f)(s7)
 ; RV32I-NEXT:    lw a3, %lo(g)(s8)
 ; RV32I-NEXT:    lw a4, %lo(h)(s9)
diff --git a/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll b/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll
index 45a6666d489c7..9659ba49bcbc6 100644
--- a/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll
+++ b/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll
@@ -19,18 +19,18 @@ define half @half_test(half %a, half %b) nounwind {
 ; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    mv s0, a1
 ; RV32I-NEXT:    lui a1, 16
-; RV32I-NEXT:    addi s1, a1, -1
-; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    addi s2, a1, -1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    and a0, s0, s1
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    and a0, s0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __addsf3@plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee@plt
-; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __divsf3@plt
@@ -51,18 +51,18 @@ define half @half_test(half %a, half %b) nounwind {
 ; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    mv s0, a1
 ; RV64I-NEXT:    lui a1, 16
-; RV64I-NEXT:    addiw s1, a1, -1
-; RV64I-NEXT:    and a0, a0, s1
+; RV64I-NEXT:    addiw s2, a1, -1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
-; RV64I-NEXT:    mv s2, a0
-; RV64I-NEXT:    and a0, s0, s1
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    and a0, s0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __addsf3@plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee@plt
-; RV64I-NEXT:    and a0, a0, s1
+; RV64I-NEXT:    and a0, a0, s2
 ; RV64I-NEXT:    call __gnu_h2f_ieee@plt
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __divsf3@plt
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
index aab1817aff782..68b355981e2c6 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
@@ -183,18 +183,18 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind {
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    mv a4, a0
 ; RV32I-NEXT:  .LBB7_2:
-; RV32I-NEXT:    sll a6, a4, a2
+; RV32I-NEXT:    sll a5, a4, a2
 ; RV32I-NEXT:    bnez a3, .LBB7_4
 ; RV32I-NEXT:  # %bb.3:
 ; RV32I-NEXT:    mv a0, a1
 ; RV32I-NEXT:  .LBB7_4:
 ; RV32I-NEXT:    srli a1, a0, 1
-; RV32I-NEXT:    not a5, a2
-; RV32I-NEXT:    srl a1, a1, a5
-; RV32I-NEXT:    or a3, a6, a1
+; RV32I-NEXT:    not a6, a2
+; RV32I-NEXT:    srl a1, a1, a6
+; RV32I-NEXT:    or a3, a5, a1
 ; RV32I-NEXT:    sll a0, a0, a2
 ; RV32I-NEXT:    srli a1, a4, 1
-; RV32I-NEXT:    srl a1, a1, a5
+; RV32I-NEXT:    srl a1, a1, a6
 ; RV32I-NEXT:    or a1, a0, a1
 ; RV32I-NEXT:    mv a0, a3
 ; RV32I-NEXT:    ret
@@ -208,18 +208,18 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind {
 ; RV32ZBB-NEXT:  # %bb.1:
 ; RV32ZBB-NEXT:    mv a4, a0
 ; RV32ZBB-NEXT:  .LBB7_2:
-; RV32ZBB-NEXT:    sll a6, a4, a2
+; RV32ZBB-NEXT:    sll a5, a4, a2
 ; RV32ZBB-NEXT:    bnez a3, .LBB7_4
 ; RV32ZBB-NEXT:  # %bb.3:
 ; RV32ZBB-NEXT:    mv a0, a1
 ; RV32ZBB-NEXT:  .LBB7_4:
 ; RV32ZBB-NEXT:    srli a1, a0, 1
-; RV32ZBB-NEXT:    not a5, a2
-; RV32ZBB-NEXT:    srl a1, a1, a5
-; RV32ZBB-NEXT:    or a3, a6, a1
+; RV32ZBB-NEXT:    not a6, a2
+; RV32ZBB-NEXT:    srl a1, a1, a6
+; RV32ZBB-NEXT:    or a3, a5, a1
 ; RV32ZBB-NEXT:    sll a0, a0, a2
 ; RV32ZBB-NEXT:    srli a1, a4, 1
-; RV32ZBB-NEXT:    srl a1, a1, a5
+; RV32ZBB-NEXT:    srl a1, a1, a6
 ; RV32ZBB-NEXT:    or a1, a0, a1
 ; RV32ZBB-NEXT:    mv a0, a3
 ; RV32ZBB-NEXT:    ret
@@ -233,18 +233,18 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind {
 ; RV32ZBP-NEXT:  # %bb.1:
 ; RV32ZBP-NEXT:    mv a4, a0
 ; RV32ZBP-NEXT:  .LBB7_2:
-; RV32ZBP-NEXT:    sll a6, a4, a2
+; RV32ZBP-NEXT:    sll a5, a4, a2
 ; RV32ZBP-NEXT:    bnez a3, .LBB7_4
 ; RV32ZBP-NEXT:  # %bb.3:
 ; RV32ZBP-NEXT:    mv a0, a1
 ; RV32ZBP-NEXT:  .LBB7_4:
 ; RV32ZBP-NEXT:    srli a1, a0, 1
-; RV32ZBP-NEXT:    not a5, a2
-; RV32ZBP-NEXT:    srl a1, a1, a5
-; RV32ZBP-NEXT:    or a3, a6, a1
+; RV32ZBP-NEXT:    not a6, a2
+; RV32ZBP-NEXT:    srl a1, a1, a6
+; RV32ZBP-NEXT:    or a3, a5, a1
 ; RV32ZBP-NEXT:    sll a0, a0, a2
 ; RV32ZBP-NEXT:    srli a1, a4, 1
-; RV32ZBP-NEXT:    srl a1, a1, a5
+; RV32ZBP-NEXT:    srl a1, a1, a6
 ; RV32ZBP-NEXT:    or a1, a0, a1
 ; RV32ZBP-NEXT:    mv a0, a3
 ; RV32ZBP-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index f4e5efaaa36d9..d5d6b3ab0b893 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -73,8 +73,8 @@ define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s3, a1
-; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv s0, a1
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    srli a0, a1, 1
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 2
@@ -88,14 +88,14 @@ define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    not a0, a0
 ; RV32I-NEXT:    srli a1, a0, 1
 ; RV32I-NEXT:    lui a2, 349525
-; RV32I-NEXT:    addi s5, a2, 1365
-; RV32I-NEXT:    and a1, a1, s5
+; RV32I-NEXT:    addi s4, a2, 1365
+; RV32I-NEXT:    and a1, a1, s4
 ; RV32I-NEXT:    sub a0, a0, a1
 ; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi s1, a1, 819
-; RV32I-NEXT:    and a1, a0, s1
+; RV32I-NEXT:    addi s5, a1, 819
+; RV32I-NEXT:    and a1, a0, s5
 ; RV32I-NEXT:    srli a0, a0, 2
-; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    and a0, a0, s5
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    add a0, a0, a1
@@ -103,12 +103,12 @@ define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    addi s6, a1, -241
 ; RV32I-NEXT:    and a0, a0, s6
 ; RV32I-NEXT:    lui a1, 4112
-; RV32I-NEXT:    addi s0, a1, 257
-; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    addi s3, a1, 257
+; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    srli a0, s4, 1
-; RV32I-NEXT:    or a0, s4, a0
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    srli a0, s2, 1
+; RV32I-NEXT:    or a0, s2, a0
 ; RV32I-NEXT:    srli a1, a0, 2
 ; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 4
@@ -119,24 +119,24 @@ define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    not a0, a0
 ; RV32I-NEXT:    srli a1, a0, 1
-; RV32I-NEXT:    and a1, a1, s5
+; RV32I-NEXT:    and a1, a1, s4
 ; RV32I-NEXT:    sub a0, a0, a1
-; RV32I-NEXT:    and a1, a0, s1
+; RV32I-NEXT:    and a1, a0, s5
 ; RV32I-NEXT:    srli a0, a0, 2
-; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    and a0, a0, s5
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    and a0, a0, s6
-; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3@plt
-; RV32I-NEXT:    bnez s3, .LBB1_2
+; RV32I-NEXT:    bnez s0, .LBB1_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    srli a0, a0, 24
 ; RV32I-NEXT:    addi a0, a0, 32
 ; RV32I-NEXT:    j .LBB1_3
 ; RV32I-NEXT:  .LBB1_2:
-; RV32I-NEXT:    srli a0, s2, 24
+; RV32I-NEXT:    srli a0, s1, 24
 ; RV32I-NEXT:  .LBB1_3:
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -227,21 +227,21 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s6, 0(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s3, a1
-; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    mv s1, a1
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    addi a0, a0, -1
-; RV32I-NEXT:    not a1, s4
+; RV32I-NEXT:    not a1, s2
 ; RV32I-NEXT:    and a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 1
 ; RV32I-NEXT:    lui a2, 349525
-; RV32I-NEXT:    addi s5, a2, 1365
-; RV32I-NEXT:    and a1, a1, s5
+; RV32I-NEXT:    addi s4, a2, 1365
+; RV32I-NEXT:    and a1, a1, s4
 ; RV32I-NEXT:    sub a0, a0, a1
 ; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi s0, a1, 819
-; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    addi s5, a1, 819
+; RV32I-NEXT:    and a1, a0, s5
 ; RV32I-NEXT:    srli a0, a0, 2
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s5
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    add a0, a0, a1
@@ -249,32 +249,32 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    addi s6, a1, -241
 ; RV32I-NEXT:    and a0, a0, s6
 ; RV32I-NEXT:    lui a1, 4112
-; RV32I-NEXT:    addi s1, a1, 257
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    addi s3, a1, 257
+; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3@plt
-; RV32I-NEXT:    mv s2, a0
-; RV32I-NEXT:    addi a0, s3, -1
-; RV32I-NEXT:    not a1, s3
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    addi a0, s1, -1
+; RV32I-NEXT:    not a1, s1
 ; RV32I-NEXT:    and a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 1
-; RV32I-NEXT:    and a1, a1, s5
+; RV32I-NEXT:    and a1, a1, s4
 ; RV32I-NEXT:    sub a0, a0, a1
-; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    and a1, a0, s5
 ; RV32I-NEXT:    srli a0, a0, 2
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s5
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    and a0, a0, s6
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3@plt
-; RV32I-NEXT:    bnez s4, .LBB3_2
+; RV32I-NEXT:    bnez s2, .LBB3_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    srli a0, a0, 24
 ; RV32I-NEXT:    addi a0, a0, 32
 ; RV32I-NEXT:    j .LBB3_3
 ; RV32I-NEXT:  .LBB3_2:
-; RV32I-NEXT:    srli a0, s2, 24
+; RV32I-NEXT:    srli a0, s0, 24
 ; RV32I-NEXT:  .LBB3_3:
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -356,17 +356,17 @@ define i64 @ctpop_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    srli a0, a1, 1
 ; RV32I-NEXT:    lui a2, 349525
-; RV32I-NEXT:    addi s3, a2, 1365
-; RV32I-NEXT:    and a0, a0, s3
+; RV32I-NEXT:    addi s2, a2, 1365
+; RV32I-NEXT:    and a0, a0, s2
 ; RV32I-NEXT:    sub a0, a1, a0
 ; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi s0, a1, 819
-; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    addi s3, a1, 819
+; RV32I-NEXT:    and a1, a0, s3
 ; RV32I-NEXT:    srli a0, a0, 2
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    add a0, a0, a1
@@ -378,12 +378,12 @@ define i64 @ctpop_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call __mulsi3@plt
 ; RV32I-NEXT:    srli s5, a0, 24
-; RV32I-NEXT:    srli a0, s2, 1
-; RV32I-NEXT:    and a0, a0, s3
-; RV32I-NEXT:    sub a0, s2, a0
-; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    srli a0, s0, 1
+; RV32I-NEXT:    and a0, a0, s2
+; RV32I-NEXT:    sub a0, s0, a0
+; RV32I-NEXT:    and a1, a0, s3
 ; RV32I-NEXT:    srli a0, a0, 2
-; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    and a0, a0, s3
 ; RV32I-NEXT:    add a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    add a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll
index 026a27b691196..1df371c407d77 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll
@@ -39,18 +39,18 @@ define i64 @gorc1_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a3, a1, 1
 ; RV32I-NEXT:    lui a4, 699051
 ; RV32I-NEXT:    addi a4, a4, -1366
-; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a3, a3, a4
 ; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a1, 1
 ; RV32I-NEXT:    srli a5, a0, 1
-; RV32I-NEXT:    lui a3, 349525
-; RV32I-NEXT:    addi a3, a3, 1365
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    lui a6, 349525
+; RV32I-NEXT:    addi a6, a6, 1365
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    or a0, a5, a0
 ; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    or a1, a1, a3
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: gorc1_i64:
@@ -102,18 +102,18 @@ define i64 @gorc2_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a3, a1, 2
 ; RV32I-NEXT:    lui a4, 838861
 ; RV32I-NEXT:    addi a4, a4, -820
-; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a3, a3, a4
 ; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a1, 2
 ; RV32I-NEXT:    srli a5, a0, 2
-; RV32I-NEXT:    lui a3, 209715
-; RV32I-NEXT:    addi a3, a3, 819
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    lui a6, 209715
+; RV32I-NEXT:    addi a6, a6, 819
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    or a0, a5, a0
 ; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    or a1, a1, a3
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: gorc2_i64:
@@ -181,34 +181,34 @@ define i64 @gorc3_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a3, a0, 1
 ; RV32I-NEXT:    lui a4, 699051
 ; RV32I-NEXT:    addi a4, a4, -1366
-; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a3, a3, a4
 ; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a0, 1
 ; RV32I-NEXT:    srli a5, a1, 1
-; RV32I-NEXT:    lui a3, 349525
-; RV32I-NEXT:    addi a3, a3, 1365
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    lui a6, 349525
+; RV32I-NEXT:    addi a6, a6, 1365
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a0, a4, a0
 ; RV32I-NEXT:    or a1, a5, a1
 ; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    or a0, a0, a6
+; RV32I-NEXT:    or a0, a0, a3
 ; RV32I-NEXT:    slli a2, a0, 2
 ; RV32I-NEXT:    slli a3, a1, 2
 ; RV32I-NEXT:    lui a4, 838861
 ; RV32I-NEXT:    addi a4, a4, -820
-; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a3, a3, a4
 ; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a1, 2
 ; RV32I-NEXT:    srli a5, a0, 2
-; RV32I-NEXT:    lui a3, 209715
-; RV32I-NEXT:    addi a3, a3, 819
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    lui a6, 209715
+; RV32I-NEXT:    addi a6, a6, 819
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    or a0, a5, a0
 ; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    or a1, a1, a3
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: gorc3_i64:
@@ -266,18 +266,18 @@ define i64 @gorc4_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a3, a1, 4
 ; RV32I-NEXT:    lui a4, 986895
 ; RV32I-NEXT:    addi a4, a4, 240
-; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a3, a3, a4
 ; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a1, 4
 ; RV32I-NEXT:    srli a5, a0, 4
-; RV32I-NEXT:    lui a3, 61681
-; RV32I-NEXT:    addi a3, a3, -241
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    lui a6, 61681
+; RV32I-NEXT:    addi a6, a6, -241
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    or a0, a5, a0
 ; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    or a1, a1, a3
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: gorc4_i64:
@@ -345,34 +345,34 @@ define i64 @gorc5_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a3, a0, 1
 ; RV32I-NEXT:    lui a4, 699051
 ; RV32I-NEXT:    addi a4, a4, -1366
-; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a3, a3, a4
 ; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a0, 1
 ; RV32I-NEXT:    srli a5, a1, 1
-; RV32I-NEXT:    lui a3, 349525
-; RV32I-NEXT:    addi a3, a3, 1365
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    lui a6, 349525
+; RV32I-NEXT:    addi a6, a6, 1365
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a0, a4, a0
 ; RV32I-NEXT:    or a1, a5, a1
 ; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    or a0, a0, a6
+; RV32I-NEXT:    or a0, a0, a3
 ; RV32I-NEXT:    slli a2, a0, 4
 ; RV32I-NEXT:    slli a3, a1, 4
 ; RV32I-NEXT:    lui a4, 986895
 ; RV32I-NEXT:    addi a4, a4, 240
-; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a3, a3, a4
 ; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a1, 4
 ; RV32I-NEXT:    srli a5, a0, 4
-; RV32I-NEXT:    lui a3, 61681
-; RV32I-NEXT:    addi a3, a3, -241
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    lui a6, 61681
+; RV32I-NEXT:    addi a6, a6, -241
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    or a0, a5, a0
 ; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    or a1, a1, a3
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: gorc5_i64:
@@ -446,34 +446,34 @@ define i64 @gorc6_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a3, a0, 2
 ; RV32I-NEXT:    lui a4, 838861
 ; RV32I-NEXT:    addi a4, a4, -820
-; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a3, a3, a4
 ; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a0, 2
 ; RV32I-NEXT:    srli a5, a1, 2
-; RV32I-NEXT:    lui a3, 209715
-; RV32I-NEXT:    addi a3, a3, 819
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    lui a6, 209715
+; RV32I-NEXT:    addi a6, a6, 819
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a0, a4, a0
 ; RV32I-NEXT:    or a1, a5, a1
 ; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    or a0, a0, a6
+; RV32I-NEXT:    or a0, a0, a3
 ; RV32I-NEXT:    slli a2, a0, 4
 ; RV32I-NEXT:    slli a3, a1, 4
 ; RV32I-NEXT:    lui a4, 986895
 ; RV32I-NEXT:    addi a4, a4, 240
-; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a3, a3, a4
 ; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a1, 4
 ; RV32I-NEXT:    srli a5, a0, 4
-; RV32I-NEXT:    lui a3, 61681
-; RV32I-NEXT:    addi a3, a3, -241
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    lui a6, 61681
+; RV32I-NEXT:    addi a6, a6, -241
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    or a0, a5, a0
 ; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    or a1, a1, a3
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: gorc6_i64:
@@ -563,50 +563,50 @@ define i64 @gorc7_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a3, a1, 1
 ; RV32I-NEXT:    lui a4, 699051
 ; RV32I-NEXT:    addi a4, a4, -1366
-; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a3, a3, a4
 ; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a1, 1
 ; RV32I-NEXT:    srli a5, a0, 1
-; RV32I-NEXT:    lui a3, 349525
-; RV32I-NEXT:    addi a3, a3, 1365
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    lui a6, 349525
+; RV32I-NEXT:    addi a6, a6, 1365
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    or a0, a5, a0
 ; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    or a1, a1, a3
 ; RV32I-NEXT:    slli a2, a1, 2
 ; RV32I-NEXT:    slli a3, a0, 2
 ; RV32I-NEXT:    lui a4, 838861
 ; RV32I-NEXT:    addi a4, a4, -820
-; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a3, a3, a4
 ; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a0, 2
 ; RV32I-NEXT:    srli a5, a1, 2
-; RV32I-NEXT:    lui a3, 209715
-; RV32I-NEXT:    addi a3, a3, 819
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    lui a6, 209715
+; RV32I-NEXT:    addi a6, a6, 819
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a0, a4, a0
 ; RV32I-NEXT:    or a1, a5, a1
 ; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    or a0, a0, a6
+; RV32I-NEXT:    or a0, a0, a3
 ; RV32I-NEXT:    slli a2, a0, 4
 ; RV32I-NEXT:    slli a3, a1, 4
 ; RV32I-NEXT:    lui a4, 986895
 ; RV32I-NEXT:    addi a4, a4, 240
-; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a3, a3, a4
 ; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a1, 4
 ; RV32I-NEXT:    srli a5, a0, 4
-; RV32I-NEXT:    lui a3, 61681
-; RV32I-NEXT:    addi a3, a3, -241
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    lui a6, 61681
+; RV32I-NEXT:    addi a6, a6, -241
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    or a0, a5, a0
 ; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    or a1, a1, a3
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: gorc7_i64:
@@ -670,18 +670,18 @@ define i64 @gorc8_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a3, a1, 8
 ; RV32I-NEXT:    lui a4, 1044496
 ; RV32I-NEXT:    addi a4, a4, -256
-; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a3, a3, a4
 ; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a1, 8
 ; RV32I-NEXT:    srli a5, a0, 8
-; RV32I-NEXT:    lui a3, 4080
-; RV32I-NEXT:    addi a3, a3, 255
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    lui a6, 4080
+; RV32I-NEXT:    addi a6, a6, 255
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    or a0, a5, a0
 ; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    or a1, a1, a3
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: gorc8_i64:
@@ -830,30 +830,30 @@ define i64 @gorc2b_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a3, a0, 2
 ; RV32I-NEXT:    lui a4, 838861
 ; RV32I-NEXT:    addi a4, a4, -820
-; RV32I-NEXT:    and a6, a3, a4
-; RV32I-NEXT:    and a7, a2, a4
+; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a5, a0, 2
-; RV32I-NEXT:    srli a3, a1, 2
-; RV32I-NEXT:    lui a2, 209715
-; RV32I-NEXT:    addi a2, a2, 819
-; RV32I-NEXT:    and a3, a3, a2
-; RV32I-NEXT:    and a5, a5, a2
+; RV32I-NEXT:    srli a6, a1, 2
+; RV32I-NEXT:    lui a7, 209715
+; RV32I-NEXT:    addi a7, a7, 819
+; RV32I-NEXT:    and a6, a6, a7
+; RV32I-NEXT:    and a5, a5, a7
 ; RV32I-NEXT:    or a0, a5, a0
-; RV32I-NEXT:    or a1, a3, a1
-; RV32I-NEXT:    or a1, a1, a7
-; RV32I-NEXT:    or a0, a0, a6
-; RV32I-NEXT:    slli a3, a0, 2
-; RV32I-NEXT:    slli a5, a1, 2
-; RV32I-NEXT:    and a6, a5, a4
+; RV32I-NEXT:    or a1, a6, a1
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a3, a1, 2
 ; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a1, 2
 ; RV32I-NEXT:    srli a5, a0, 2
-; RV32I-NEXT:    and a5, a5, a2
-; RV32I-NEXT:    and a2, a4, a2
-; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    and a5, a5, a7
+; RV32I-NEXT:    and a4, a4, a7
+; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    or a0, a5, a0
-; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a1, a1, a3
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: gorc2b_i64:
@@ -941,46 +941,46 @@ define i64 @gorc3b_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a3, a1, 1
 ; RV32I-NEXT:    lui a4, 699051
 ; RV32I-NEXT:    addi a4, a4, -1366
-; RV32I-NEXT:    and a6, a3, a4
-; RV32I-NEXT:    and a7, a2, a4
+; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a5, a1, 1
-; RV32I-NEXT:    srli a3, a0, 1
-; RV32I-NEXT:    lui a2, 349525
-; RV32I-NEXT:    addi a2, a2, 1365
-; RV32I-NEXT:    and a3, a3, a2
-; RV32I-NEXT:    and a5, a5, a2
+; RV32I-NEXT:    srli a6, a0, 1
+; RV32I-NEXT:    lui a7, 349525
+; RV32I-NEXT:    addi a7, a7, 1365
+; RV32I-NEXT:    and a6, a6, a7
+; RV32I-NEXT:    and a5, a5, a7
 ; RV32I-NEXT:    or a1, a5, a1
-; RV32I-NEXT:    or a0, a3, a0
-; RV32I-NEXT:    or a0, a0, a7
-; RV32I-NEXT:    or a1, a1, a6
-; RV32I-NEXT:    slli a6, a1, 2
-; RV32I-NEXT:    slli a5, a0, 2
-; RV32I-NEXT:    lui a3, 838861
-; RV32I-NEXT:    addi a3, a3, -820
-; RV32I-NEXT:    and a7, a5, a3
-; RV32I-NEXT:    and a6, a6, a3
-; RV32I-NEXT:    srli t0, a0, 2
-; RV32I-NEXT:    srli a3, a1, 2
-; RV32I-NEXT:    lui a5, 209715
-; RV32I-NEXT:    addi a5, a5, 819
+; RV32I-NEXT:    or a0, a6, a0
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    slli a2, a1, 2
+; RV32I-NEXT:    slli a3, a0, 2
+; RV32I-NEXT:    lui a5, 838861
+; RV32I-NEXT:    addi a5, a5, -820
 ; RV32I-NEXT:    and a3, a3, a5
-; RV32I-NEXT:    and a5, t0, a5
+; RV32I-NEXT:    and a2, a2, a5
+; RV32I-NEXT:    srli a5, a0, 2
+; RV32I-NEXT:    srli a6, a1, 2
+; RV32I-NEXT:    lui t0, 209715
+; RV32I-NEXT:    addi t0, t0, 819
+; RV32I-NEXT:    and a6, a6, t0
+; RV32I-NEXT:    and a5, a5, t0
 ; RV32I-NEXT:    or a0, a5, a0
-; RV32I-NEXT:    or a1, a3, a1
-; RV32I-NEXT:    or a1, a1, a6
-; RV32I-NEXT:    or a0, a0, a7
-; RV32I-NEXT:    slli a3, a0, 1
-; RV32I-NEXT:    slli a5, a1, 1
-; RV32I-NEXT:    and a6, a5, a4
+; RV32I-NEXT:    or a1, a6, a1
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    slli a3, a1, 1
 ; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a4, a1, 1
 ; RV32I-NEXT:    srli a5, a0, 1
-; RV32I-NEXT:    and a5, a5, a2
-; RV32I-NEXT:    and a2, a4, a2
-; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    and a5, a5, a7
+; RV32I-NEXT:    and a4, a4, a7
+; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    or a0, a5, a0
-; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a1, a1, a3
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: gorc3b_i64:
@@ -1818,20 +1818,20 @@ define i64 @grev2b_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    and a0, a0, a5
 ; RV32I-NEXT:    or a0, a2, a0
 ; RV32I-NEXT:    or a1, a3, a1
-; RV32I-NEXT:    slli a6, a1, 2
+; RV32I-NEXT:    slli a2, a1, 2
 ; RV32I-NEXT:    slli a3, a0, 2
-; RV32I-NEXT:    lui a2, 838861
-; RV32I-NEXT:    addi a2, a2, -820
-; RV32I-NEXT:    and a7, a3, a2
-; RV32I-NEXT:    and a2, a6, a2
+; RV32I-NEXT:    lui a6, 838861
+; RV32I-NEXT:    addi a6, a6, -820
+; RV32I-NEXT:    and a3, a3, a6
+; RV32I-NEXT:    and a2, a2, a6
 ; RV32I-NEXT:    srli a1, a1, 2
 ; RV32I-NEXT:    srli a0, a0, 2
-; RV32I-NEXT:    lui a3, 209715
-; RV32I-NEXT:    addi a3, a3, 819
-; RV32I-NEXT:    and a0, a0, a3
-; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    lui a6, 209715
+; RV32I-NEXT:    addi a6, a6, 819
+; RV32I-NEXT:    and a0, a0, a6
+; RV32I-NEXT:    and a1, a1, a6
 ; RV32I-NEXT:    or a1, a2, a1
-; RV32I-NEXT:    or a0, a7, a0
+; RV32I-NEXT:    or a0, a3, a0
 ; RV32I-NEXT:    slli a2, a0, 1
 ; RV32I-NEXT:    slli a3, a1, 1
 ; RV32I-NEXT:    and a3, a3, a4
@@ -1945,40 +1945,40 @@ define i64 @grev0_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    and a1, a1, a5
 ; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    or a0, a3, a0
-; RV32I-NEXT:    slli a6, a0, 2
+; RV32I-NEXT:    slli a2, a0, 2
 ; RV32I-NEXT:    slli a3, a1, 2
-; RV32I-NEXT:    lui a2, 838861
-; RV32I-NEXT:    addi a2, a2, -820
-; RV32I-NEXT:    and a7, a3, a2
-; RV32I-NEXT:    and a6, a6, a2
+; RV32I-NEXT:    lui a6, 838861
+; RV32I-NEXT:    addi a6, a6, -820
+; RV32I-NEXT:    and a3, a3, a6
+; RV32I-NEXT:    and a2, a2, a6
 ; RV32I-NEXT:    srli a0, a0, 2
 ; RV32I-NEXT:    srli a1, a1, 2
-; RV32I-NEXT:    lui a3, 209715
-; RV32I-NEXT:    addi a3, a3, 819
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
-; RV32I-NEXT:    or t0, a6, a0
-; RV32I-NEXT:    or a1, a7, a1
-; RV32I-NEXT:    slli a6, a1, 1
-; RV32I-NEXT:    slli a0, t0, 1
-; RV32I-NEXT:    and a7, a0, a4
-; RV32I-NEXT:    and a4, a6, a4
+; RV32I-NEXT:    lui a7, 209715
+; RV32I-NEXT:    addi a7, a7, 819
+; RV32I-NEXT:    and a1, a1, a7
+; RV32I-NEXT:    and a0, a0, a7
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    slli a2, a1, 1
+; RV32I-NEXT:    slli a3, a0, 1
+; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
 ; RV32I-NEXT:    srli a1, a1, 1
-; RV32I-NEXT:    srli a0, t0, 1
+; RV32I-NEXT:    srli a0, a0, 1
 ; RV32I-NEXT:    and a0, a0, a5
 ; RV32I-NEXT:    and a1, a1, a5
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    or a0, a7, a0
-; RV32I-NEXT:    slli a4, a0, 2
-; RV32I-NEXT:    slli a5, a1, 2
-; RV32I-NEXT:    and a5, a5, a2
-; RV32I-NEXT:    and a2, a4, a2
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a3, a1, 2
+; RV32I-NEXT:    and a3, a3, a6
+; RV32I-NEXT:    and a2, a2, a6
 ; RV32I-NEXT:    srli a0, a0, 2
 ; RV32I-NEXT:    srli a1, a1, 2
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    and a1, a1, a7
+; RV32I-NEXT:    and a0, a0, a7
 ; RV32I-NEXT:    or a0, a2, a0
-; RV32I-NEXT:    or a1, a5, a1
+; RV32I-NEXT:    or a1, a3, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: grev0_i64:
@@ -2275,13 +2275,13 @@ define i64 @bitreverse_i64(i64 %a) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    srli a2, a1, 8
 ; RV32I-NEXT:    lui a3, 16
-; RV32I-NEXT:    addi a7, a3, -256
-; RV32I-NEXT:    and a2, a2, a7
+; RV32I-NEXT:    addi a3, a3, -256
+; RV32I-NEXT:    and a2, a2, a3
 ; RV32I-NEXT:    srli a4, a1, 24
 ; RV32I-NEXT:    or a2, a2, a4
 ; RV32I-NEXT:    slli a4, a1, 8
-; RV32I-NEXT:    lui a6, 4080
-; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    lui a5, 4080
+; RV32I-NEXT:    and a4, a4, a5
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    or a1, a1, a4
 ; RV32I-NEXT:    or a1, a1, a2
@@ -2293,27 +2293,27 @@ define i64 @bitreverse_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a1, a1, 4
 ; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    srli a2, a1, 2
-; RV32I-NEXT:    lui a5, 209715
-; RV32I-NEXT:    addi a5, a5, 819
-; RV32I-NEXT:    and a2, a2, a5
-; RV32I-NEXT:    and a1, a1, a5
+; RV32I-NEXT:    lui a6, 209715
+; RV32I-NEXT:    addi a6, a6, 819
+; RV32I-NEXT:    and a2, a2, a6
+; RV32I-NEXT:    and a1, a1, a6
 ; RV32I-NEXT:    slli a1, a1, 2
 ; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    srli a2, a1, 1
-; RV32I-NEXT:    lui a3, 349525
-; RV32I-NEXT:    addi a3, a3, 1365
-; RV32I-NEXT:    and a2, a2, a3
-; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    lui a7, 349525
+; RV32I-NEXT:    addi a7, a7, 1365
+; RV32I-NEXT:    and a2, a2, a7
+; RV32I-NEXT:    and a1, a1, a7
 ; RV32I-NEXT:    slli a1, a1, 1
-; RV32I-NEXT:    or t0, a2, a1
+; RV32I-NEXT:    or a2, a2, a1
 ; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    and a1, a1, a7
-; RV32I-NEXT:    srli a2, a0, 24
-; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    slli a2, a0, 8
-; RV32I-NEXT:    and a2, a2, a6
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    srli a3, a0, 24
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    slli a3, a0, 8
+; RV32I-NEXT:    and a3, a3, a5
 ; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a3
 ; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    and a1, a1, a4
@@ -2321,16 +2321,16 @@ define i64 @bitreverse_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    slli a0, a0, 4
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 2
-; RV32I-NEXT:    and a1, a1, a5
-; RV32I-NEXT:    and a0, a0, a5
+; RV32I-NEXT:    and a1, a1, a6
+; RV32I-NEXT:    and a0, a0, a6
 ; RV32I-NEXT:    slli a0, a0, 2
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 1
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    and a1, a1, a7
+; RV32I-NEXT:    and a0, a0, a7
 ; RV32I-NEXT:    slli a0, a0, 1
 ; RV32I-NEXT:    or a1, a1, a0
-; RV32I-NEXT:    mv a0, t0
+; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: bitreverse_i64:
@@ -2462,13 +2462,13 @@ define i64 @bitreverse_bswap_i64(i64 %a) {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    srli a3, a1, 8
 ; RV32I-NEXT:    lui a2, 16
-; RV32I-NEXT:    addi a7, a2, -256
-; RV32I-NEXT:    and a3, a3, a7
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a3, a3, a2
 ; RV32I-NEXT:    srli a4, a1, 24
 ; RV32I-NEXT:    or a4, a3, a4
 ; RV32I-NEXT:    slli a5, a1, 8
-; RV32I-NEXT:    lui a6, 4080
-; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    lui a3, 4080
+; RV32I-NEXT:    and a5, a5, a3
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    or a1, a1, a5
 ; RV32I-NEXT:    or a1, a1, a4
@@ -2480,58 +2480,58 @@ define i64 @bitreverse_bswap_i64(i64 %a) {
 ; RV32I-NEXT:    slli a1, a1, 4
 ; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    srli a4, a1, 2
-; RV32I-NEXT:    lui a3, 209715
-; RV32I-NEXT:    addi a3, a3, 819
-; RV32I-NEXT:    and a4, a4, a3
-; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    lui a6, 209715
+; RV32I-NEXT:    addi a6, a6, 819
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    and a1, a1, a6
 ; RV32I-NEXT:    slli a1, a1, 2
 ; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    srli a4, a1, 1
-; RV32I-NEXT:    lui a2, 349525
-; RV32I-NEXT:    addi a2, a2, 1365
-; RV32I-NEXT:    and a4, a4, a2
-; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    lui a7, 349525
+; RV32I-NEXT:    addi a7, a7, 1365
+; RV32I-NEXT:    and a4, a4, a7
+; RV32I-NEXT:    and a1, a1, a7
 ; RV32I-NEXT:    slli a1, a1, 1
 ; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    srli a4, a0, 8
-; RV32I-NEXT:    and t0, a4, a7
-; RV32I-NEXT:    srli a4, a0, 24
-; RV32I-NEXT:    or t0, t0, a4
-; RV32I-NEXT:    slli a4, a0, 8
-; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    and a4, a4, a2
+; RV32I-NEXT:    srli t0, a0, 24
+; RV32I-NEXT:    or a4, a4, t0
+; RV32I-NEXT:    slli t0, a0, 8
+; RV32I-NEXT:    and t0, t0, a3
 ; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a4
 ; RV32I-NEXT:    or a0, a0, t0
+; RV32I-NEXT:    or a0, a0, a4
 ; RV32I-NEXT:    srli a4, a0, 4
 ; RV32I-NEXT:    and a4, a4, a5
 ; RV32I-NEXT:    and a0, a0, a5
 ; RV32I-NEXT:    slli a0, a0, 4
 ; RV32I-NEXT:    or a0, a4, a0
 ; RV32I-NEXT:    srli a4, a0, 2
-; RV32I-NEXT:    and a4, a4, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    and a0, a0, a6
 ; RV32I-NEXT:    slli a0, a0, 2
 ; RV32I-NEXT:    or a0, a4, a0
-; RV32I-NEXT:    srli a3, a0, 1
-; RV32I-NEXT:    and a3, a3, a2
-; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    srli a4, a0, 1
+; RV32I-NEXT:    and a4, a4, a7
+; RV32I-NEXT:    and a0, a0, a7
 ; RV32I-NEXT:    slli a0, a0, 1
-; RV32I-NEXT:    or a0, a3, a0
-; RV32I-NEXT:    srli a2, a0, 8
-; RV32I-NEXT:    and a2, a2, a7
-; RV32I-NEXT:    srli a3, a0, 24
-; RV32I-NEXT:    or a2, a2, a3
-; RV32I-NEXT:    slli a3, a0, 8
-; RV32I-NEXT:    and a3, a3, a6
+; RV32I-NEXT:    or a0, a4, a0
+; RV32I-NEXT:    srli a4, a0, 8
+; RV32I-NEXT:    and a4, a4, a2
+; RV32I-NEXT:    srli a5, a0, 24
+; RV32I-NEXT:    or a4, a4, a5
+; RV32I-NEXT:    slli a5, a0, 8
+; RV32I-NEXT:    and a5, a5, a3
 ; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    srli a2, a1, 8
-; RV32I-NEXT:    and a2, a2, a7
-; RV32I-NEXT:    srli a3, a1, 24
-; RV32I-NEXT:    or a2, a2, a3
-; RV32I-NEXT:    slli a3, a1, 8
-; RV32I-NEXT:    and a3, a3, a6
+; RV32I-NEXT:    or a0, a0, a5
+; RV32I-NEXT:    or a0, a0, a4
+; RV32I-NEXT:    srli a4, a1, 8
+; RV32I-NEXT:    and a2, a4, a2
+; RV32I-NEXT:    srli a4, a1, 24
+; RV32I-NEXT:    or a2, a2, a4
+; RV32I-NEXT:    slli a4, a1, 8
+; RV32I-NEXT:    and a3, a4, a3
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    or a1, a1, a3
 ; RV32I-NEXT:    or a1, a1, a2
@@ -2584,16 +2584,16 @@ define i64 @shfl1_i64(i64 %a, i64 %b) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lui a2, 629146
 ; RV32I-NEXT:    addi a2, a2, -1639
-; RV32I-NEXT:    and a6, a0, a2
+; RV32I-NEXT:    and a3, a0, a2
 ; RV32I-NEXT:    and a2, a1, a2
 ; RV32I-NEXT:    slli a4, a1, 1
 ; RV32I-NEXT:    slli a5, a0, 1
-; RV32I-NEXT:    lui a3, 279620
-; RV32I-NEXT:    addi a3, a3, 1092
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a2, a2, a3
-; RV32I-NEXT:    or a3, a6, a5
+; RV32I-NEXT:    lui a6, 279620
+; RV32I-NEXT:    addi a6, a6, 1092
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a2, a2, a4
+; RV32I-NEXT:    or a3, a3, a5
 ; RV32I-NEXT:    srli a0, a0, 1
 ; RV32I-NEXT:    srli a1, a1, 1
 ; RV32I-NEXT:    lui a4, 139810
@@ -2656,16 +2656,16 @@ define i64 @shfl2_i64(i64 %a, i64 %b) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lui a2, 801852
 ; RV32I-NEXT:    addi a2, a2, 963
-; RV32I-NEXT:    and a6, a0, a2
+; RV32I-NEXT:    and a3, a0, a2
 ; RV32I-NEXT:    and a2, a1, a2
 ; RV32I-NEXT:    slli a4, a1, 2
 ; RV32I-NEXT:    slli a5, a0, 2
-; RV32I-NEXT:    lui a3, 197379
-; RV32I-NEXT:    addi a3, a3, 48
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    or a2, a2, a3
-; RV32I-NEXT:    or a3, a6, a5
+; RV32I-NEXT:    lui a6, 197379
+; RV32I-NEXT:    addi a6, a6, 48
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a2, a2, a4
+; RV32I-NEXT:    or a3, a3, a5
 ; RV32I-NEXT:    srli a0, a0, 2
 ; RV32I-NEXT:    srli a1, a1, 2
 ; RV32I-NEXT:    lui a4, 49345
@@ -2728,24 +2728,24 @@ define i64 @shfl4_i64(i64 %a, i64 %b) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lui a2, 983295
 ; RV32I-NEXT:    addi a2, a2, 15
-; RV32I-NEXT:    and a6, a1, a2
+; RV32I-NEXT:    and a3, a1, a2
 ; RV32I-NEXT:    and a2, a0, a2
 ; RV32I-NEXT:    slli a4, a1, 4
 ; RV32I-NEXT:    slli a5, a0, 4
-; RV32I-NEXT:    lui a3, 61441
-; RV32I-NEXT:    addi a3, a3, -256
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    lui a6, 61441
+; RV32I-NEXT:    addi a6, a6, -256
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
 ; RV32I-NEXT:    srli a1, a1, 4
 ; RV32I-NEXT:    srli a0, a0, 4
-; RV32I-NEXT:    lui a4, 3840
-; RV32I-NEXT:    addi a4, a4, 240
-; RV32I-NEXT:    and a0, a0, a4
-; RV32I-NEXT:    and a1, a1, a4
-; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    lui a6, 3840
+; RV32I-NEXT:    addi a6, a6, 240
+; RV32I-NEXT:    and a0, a0, a6
+; RV32I-NEXT:    and a1, a1, a6
+; RV32I-NEXT:    or a1, a4, a1
 ; RV32I-NEXT:    or a0, a5, a0
 ; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    or a1, a1, a3
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: shfl4_i64:
@@ -2799,22 +2799,22 @@ define i64 @shfl8_i64(i64 %a, i64 %b) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lui a2, 1044480
 ; RV32I-NEXT:    addi a2, a2, 255
-; RV32I-NEXT:    and a6, a0, a2
+; RV32I-NEXT:    and a3, a0, a2
 ; RV32I-NEXT:    and a2, a1, a2
 ; RV32I-NEXT:    slli a4, a0, 8
 ; RV32I-NEXT:    slli a5, a1, 8
-; RV32I-NEXT:    lui a3, 4080
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    lui a6, 4080
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
 ; RV32I-NEXT:    srli a1, a1, 8
 ; RV32I-NEXT:    srli a0, a0, 8
-; RV32I-NEXT:    lui a4, 16
-; RV32I-NEXT:    addi a4, a4, -256
-; RV32I-NEXT:    and a0, a0, a4
-; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    lui a6, 16
+; RV32I-NEXT:    addi a6, a6, -256
+; RV32I-NEXT:    and a0, a0, a6
+; RV32I-NEXT:    and a1, a1, a6
 ; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    or a0, a0, a6
-; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    or a0, a4, a0
 ; RV32I-NEXT:    or a1, a5, a1
 ; RV32I-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/rv32zbt.ll b/llvm/test/CodeGen/RISCV/rv32zbt.ll
index 1582b4f62265b..2e6c7da552e8d 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbt.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbt.ll
@@ -341,14 +341,14 @@ define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind {
 ; RV32I-LABEL: fshl_i64:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    srli a5, a4, 5
-; RV32I-NEXT:    andi a5, a5, 1
-; RV32I-NEXT:    mv a6, a3
-; RV32I-NEXT:    bnez a5, .LBB13_2
+; RV32I-NEXT:    andi a6, a5, 1
+; RV32I-NEXT:    mv a5, a3
+; RV32I-NEXT:    bnez a6, .LBB13_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    mv a6, a0
+; RV32I-NEXT:    mv a5, a0
 ; RV32I-NEXT:  .LBB13_2:
-; RV32I-NEXT:    sll a7, a6, a4
-; RV32I-NEXT:    bnez a5, .LBB13_4
+; RV32I-NEXT:    sll a7, a5, a4
+; RV32I-NEXT:    bnez a6, .LBB13_4
 ; RV32I-NEXT:  # %bb.3:
 ; RV32I-NEXT:    mv a2, a3
 ; RV32I-NEXT:  .LBB13_4:
@@ -356,12 +356,12 @@ define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind {
 ; RV32I-NEXT:    not a3, a4
 ; RV32I-NEXT:    srl a2, a2, a3
 ; RV32I-NEXT:    or a2, a7, a2
-; RV32I-NEXT:    bnez a5, .LBB13_6
+; RV32I-NEXT:    bnez a6, .LBB13_6
 ; RV32I-NEXT:  # %bb.5:
 ; RV32I-NEXT:    mv a0, a1
 ; RV32I-NEXT:  .LBB13_6:
 ; RV32I-NEXT:    sll a0, a0, a4
-; RV32I-NEXT:    srli a1, a6, 1
+; RV32I-NEXT:    srli a1, a5, 1
 ; RV32I-NEXT:    srl a1, a1, a3
 ; RV32I-NEXT:    or a1, a0, a1
 ; RV32I-NEXT:    mv a0, a2
@@ -420,24 +420,24 @@ define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind {
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    mv a2, a3
 ; RV32I-NEXT:  .LBB15_2:
-; RV32I-NEXT:    srl a6, a2, a4
+; RV32I-NEXT:    srl a2, a2, a4
 ; RV32I-NEXT:    beqz a5, .LBB15_4
 ; RV32I-NEXT:  # %bb.3:
 ; RV32I-NEXT:    mv a3, a0
 ; RV32I-NEXT:  .LBB15_4:
 ; RV32I-NEXT:    slli a7, a3, 1
-; RV32I-NEXT:    not t0, a4
-; RV32I-NEXT:    sll a2, a7, t0
-; RV32I-NEXT:    or a6, a2, a6
+; RV32I-NEXT:    not a6, a4
+; RV32I-NEXT:    sll a7, a7, a6
+; RV32I-NEXT:    or a2, a7, a2
 ; RV32I-NEXT:    srl a3, a3, a4
 ; RV32I-NEXT:    beqz a5, .LBB15_6
 ; RV32I-NEXT:  # %bb.5:
 ; RV32I-NEXT:    mv a0, a1
 ; RV32I-NEXT:  .LBB15_6:
 ; RV32I-NEXT:    slli a0, a0, 1
-; RV32I-NEXT:    sll a0, a0, t0
+; RV32I-NEXT:    sll a0, a0, a6
 ; RV32I-NEXT:    or a1, a0, a3
-; RV32I-NEXT:    mv a0, a6
+; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBT-LABEL: fshr_i64:
diff --git a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
index 198aea298be2b..a99bd2843f735 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
@@ -13,15 +13,15 @@ define i64 @complex_float_add(i64 %a.coerce, i64 %b.coerce) nounwind {
 ; CHECK-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
 ; CHECK-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
-; CHECK-NEXT:    srli s2, a0, 32
+; CHECK-NEXT:    srli s0, a0, 32
 ; CHECK-NEXT:    srli s1, a1, 32
 ; CHECK-NEXT:    call __addsf3@plt
-; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    mv a0, s2
+; CHECK-NEXT:    mv s2, a0
+; CHECK-NEXT:    mv a0, s0
 ; CHECK-NEXT:    mv a1, s1
 ; CHECK-NEXT:    call __addsf3@plt
 ; CHECK-NEXT:    slli a0, a0, 32
-; CHECK-NEXT:    slli a1, s0, 32
+; CHECK-NEXT:    slli a1, s2, 32
 ; CHECK-NEXT:    srli a1, a1, 32
 ; CHECK-NEXT:    or a0, a0, a1
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll
index 674ffcff180de..260892285643a 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll
@@ -2408,38 +2408,38 @@ define i64 @bitreverse_bswap_i64(i64 %a) {
 ; RV64I-LABEL: bitreverse_bswap_i64:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    srli a1, a0, 24
-; RV64I-NEXT:    lui a6, 4080
-; RV64I-NEXT:    and a1, a1, a6
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    and a1, a1, a2
 ; RV64I-NEXT:    srli a3, a0, 8
 ; RV64I-NEXT:    li a4, 255
-; RV64I-NEXT:    slli a7, a4, 24
-; RV64I-NEXT:    and a3, a3, a7
+; RV64I-NEXT:    slli a5, a4, 24
+; RV64I-NEXT:    and a3, a3, a5
 ; RV64I-NEXT:    or a1, a3, a1
 ; RV64I-NEXT:    srli a3, a0, 40
-; RV64I-NEXT:    lui a2, 16
-; RV64I-NEXT:    addiw a2, a2, -256
-; RV64I-NEXT:    and a3, a3, a2
-; RV64I-NEXT:    srli a5, a0, 56
-; RV64I-NEXT:    or a3, a3, a5
+; RV64I-NEXT:    lui a6, 16
+; RV64I-NEXT:    addiw a6, a6, -256
+; RV64I-NEXT:    and a3, a3, a6
+; RV64I-NEXT:    srli a7, a0, 56
+; RV64I-NEXT:    or a3, a3, a7
 ; RV64I-NEXT:    or a1, a1, a3
 ; RV64I-NEXT:    slli a3, a0, 24
-; RV64I-NEXT:    slli t0, a4, 40
-; RV64I-NEXT:    and a3, a3, t0
-; RV64I-NEXT:    srliw a5, a0, 24
-; RV64I-NEXT:    slli a5, a5, 32
-; RV64I-NEXT:    or a3, a3, a5
-; RV64I-NEXT:    slli a5, a0, 40
+; RV64I-NEXT:    slli a7, a4, 40
+; RV64I-NEXT:    and a3, a3, a7
+; RV64I-NEXT:    srliw t0, a0, 24
+; RV64I-NEXT:    slli t0, t0, 32
+; RV64I-NEXT:    or a3, a3, t0
+; RV64I-NEXT:    slli t0, a0, 40
 ; RV64I-NEXT:    slli a4, a4, 48
-; RV64I-NEXT:    and a5, a5, a4
+; RV64I-NEXT:    and t0, t0, a4
 ; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    or a0, a0, a5
-; RV64I-NEXT:    lui a5, %hi(.LCPI68_0)
-; RV64I-NEXT:    ld a5, %lo(.LCPI68_0)(a5)
+; RV64I-NEXT:    or a0, a0, t0
+; RV64I-NEXT:    lui t0, %hi(.LCPI68_0)
+; RV64I-NEXT:    ld t0, %lo(.LCPI68_0)(t0)
 ; RV64I-NEXT:    or a0, a0, a3
 ; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    srli a1, a0, 4
-; RV64I-NEXT:    and a1, a1, a5
-; RV64I-NEXT:    and a0, a0, a5
+; RV64I-NEXT:    and a1, a1, t0
+; RV64I-NEXT:    and a0, a0, t0
 ; RV64I-NEXT:    lui a3, %hi(.LCPI68_1)
 ; RV64I-NEXT:    ld a3, %lo(.LCPI68_1)(a3)
 ; RV64I-NEXT:    slli a0, a0, 4
@@ -2457,17 +2457,17 @@ define i64 @bitreverse_bswap_i64(i64 %a) {
 ; RV64I-NEXT:    slli a0, a0, 1
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 40
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srli a2, a0, 56
-; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    srli a2, a0, 24
-; RV64I-NEXT:    and a2, a2, a6
+; RV64I-NEXT:    and a1, a1, a6
+; RV64I-NEXT:    srli a3, a0, 56
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    srli a3, a0, 24
+; RV64I-NEXT:    and a2, a3, a2
 ; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    and a3, a3, a7
+; RV64I-NEXT:    and a3, a3, a5
 ; RV64I-NEXT:    or a2, a3, a2
 ; RV64I-NEXT:    or a1, a2, a1
 ; RV64I-NEXT:    slli a2, a0, 24
-; RV64I-NEXT:    and a2, a2, t0
+; RV64I-NEXT:    and a2, a2, a7
 ; RV64I-NEXT:    srliw a3, a0, 24
 ; RV64I-NEXT:    slli a3, a3, 32
 ; RV64I-NEXT:    or a2, a2, a3
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
index 0955eba777f01..ca0abc5e40031 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
@@ -103,76 +103,76 @@ define fastcc <vscale x 128 x i32> @ret_split_nxv128i32(<vscale x 128 x i32>* %x
 ; CHECK-NEXT:    slli a2, a2, 5
 ; CHECK-NEXT:    sub sp, sp, a2
 ; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    slli a6, a2, 3
-; CHECK-NEXT:    add a4, a1, a6
+; CHECK-NEXT:    slli a3, a2, 3
+; CHECK-NEXT:    add a4, a1, a3
 ; CHECK-NEXT:    vl8re32.v v8, (a4)
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    li a4, 24
-; CHECK-NEXT:    mul a3, a3, a4
-; CHECK-NEXT:    add a3, sp, a3
-; CHECK-NEXT:    addi a3, a3, 16
-; CHECK-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    slli a7, a2, 4
-; CHECK-NEXT:    add a5, a1, a7
+; CHECK-NEXT:    csrr a4, vlenb
+; CHECK-NEXT:    li a5, 24
+; CHECK-NEXT:    mul a4, a4, a5
+; CHECK-NEXT:    add a4, sp, a4
+; CHECK-NEXT:    addi a4, a4, 16
+; CHECK-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT:    slli a4, a2, 4
+; CHECK-NEXT:    add a5, a1, a4
 ; CHECK-NEXT:    vl8re32.v v8, (a5)
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 4
-; CHECK-NEXT:    add a3, sp, a3
-; CHECK-NEXT:    addi a3, a3, 16
-; CHECK-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; CHECK-NEXT:    csrr a5, vlenb
+; CHECK-NEXT:    slli a5, a5, 4
+; CHECK-NEXT:    add a5, sp, a5
+; CHECK-NEXT:    addi a5, a5, 16
+; CHECK-NEXT:    vs8r.v v8, (a5) # Unknown-size Folded Spill
 ; CHECK-NEXT:    li a5, 24
-; CHECK-NEXT:    mul t1, a2, a5
-; CHECK-NEXT:    add a3, a1, t1
-; CHECK-NEXT:    vl8re32.v v8, (a3)
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a3, a3, 3
-; CHECK-NEXT:    add a3, sp, a3
-; CHECK-NEXT:    addi a3, a3, 16
-; CHECK-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    slli t3, a2, 5
-; CHECK-NEXT:    add a4, a1, t3
-; CHECK-NEXT:    vl8re32.v v8, (a4)
-; CHECK-NEXT:    addi a3, sp, 16
-; CHECK-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; CHECK-NEXT:    li a4, 40
-; CHECK-NEXT:    mul a4, a2, a4
-; CHECK-NEXT:    add t0, a1, a4
-; CHECK-NEXT:    li a5, 48
 ; CHECK-NEXT:    mul a5, a2, a5
-; CHECK-NEXT:    add t2, a1, a5
-; CHECK-NEXT:    li a3, 56
-; CHECK-NEXT:    mul a2, a2, a3
-; CHECK-NEXT:    add a3, a1, a2
+; CHECK-NEXT:    add a6, a1, a5
+; CHECK-NEXT:    vl8re32.v v8, (a6)
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    slli a6, a6, 3
+; CHECK-NEXT:    add a6, sp, a6
+; CHECK-NEXT:    addi a6, a6, 16
+; CHECK-NEXT:    vs8r.v v8, (a6) # Unknown-size Folded Spill
+; CHECK-NEXT:    slli a6, a2, 5
+; CHECK-NEXT:    add a7, a1, a6
+; CHECK-NEXT:    vl8re32.v v8, (a7)
+; CHECK-NEXT:    addi a7, sp, 16
+; CHECK-NEXT:    vs8r.v v8, (a7) # Unknown-size Folded Spill
+; CHECK-NEXT:    li a7, 40
+; CHECK-NEXT:    mul a7, a2, a7
+; CHECK-NEXT:    add t0, a1, a7
+; CHECK-NEXT:    li t1, 48
+; CHECK-NEXT:    mul t1, a2, t1
+; CHECK-NEXT:    add t2, a1, t1
+; CHECK-NEXT:    li t3, 56
+; CHECK-NEXT:    mul a2, a2, t3
+; CHECK-NEXT:    add t3, a1, a2
 ; CHECK-NEXT:    vl8re32.v v8, (a1)
 ; CHECK-NEXT:    vl8re32.v v0, (t0)
-; CHECK-NEXT:    vl8re32.v v16, (a3)
+; CHECK-NEXT:    vl8re32.v v16, (t3)
 ; CHECK-NEXT:    vl8re32.v v24, (t2)
 ; CHECK-NEXT:    vs8r.v v8, (a0)
 ; CHECK-NEXT:    add a1, a0, a2
 ; CHECK-NEXT:    vs8r.v v16, (a1)
-; CHECK-NEXT:    add a1, a0, a5
+; CHECK-NEXT:    add a1, a0, t1
 ; CHECK-NEXT:    vs8r.v v24, (a1)
-; CHECK-NEXT:    add a1, a0, a4
+; CHECK-NEXT:    add a1, a0, a7
 ; CHECK-NEXT:    vs8r.v v0, (a1)
-; CHECK-NEXT:    add a1, a0, t3
+; CHECK-NEXT:    add a1, a0, a6
 ; CHECK-NEXT:    addi a2, sp, 16
 ; CHECK-NEXT:    vl8re8.v v8, (a2) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vs8r.v v8, (a1)
-; CHECK-NEXT:    add a1, a0, t1
+; CHECK-NEXT:    add a1, a0, a5
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 3
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
 ; CHECK-NEXT:    vl8re8.v v8, (a2) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vs8r.v v8, (a1)
-; CHECK-NEXT:    add a1, a0, a7
+; CHECK-NEXT:    add a1, a0, a4
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 4
 ; CHECK-NEXT:    add a2, sp, a2
 ; CHECK-NEXT:    addi a2, a2, 16
 ; CHECK-NEXT:    vl8re8.v v8, (a2) # Unknown-size Folded Reload
 ; CHECK-NEXT:    vs8r.v v8, (a1)
-; CHECK-NEXT:    add a0, a0, a6
+; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    li a2, 24
 ; CHECK-NEXT:    mul a1, a1, a2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll
index e563b0834d607..bde5c515a1425 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll
@@ -32,17 +32,17 @@ define void @gather(i8* noalias nocapture %A, i8* noalias nocapture readonly %B)
 ; CHECK-ASM-LABEL: gather:
 ; CHECK-ASM:       # %bb.0: # %entry
 ; CHECK-ASM-NEXT:    li a2, 0
-; CHECK-ASM-NEXT:    li a6, 32
+; CHECK-ASM-NEXT:    li a3, 32
 ; CHECK-ASM-NEXT:    li a4, 5
 ; CHECK-ASM-NEXT:    li a5, 1024
 ; CHECK-ASM-NEXT:  .LBB0_1: # %vector.body
 ; CHECK-ASM-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-ASM-NEXT:    vsetvli zero, a6, e8, m1, ta, mu
+; CHECK-ASM-NEXT:    vsetvli zero, a3, e8, m1, ta, mu
 ; CHECK-ASM-NEXT:    vlse8.v v8, (a1), a4
-; CHECK-ASM-NEXT:    add a3, a0, a2
-; CHECK-ASM-NEXT:    vle8.v v9, (a3)
+; CHECK-ASM-NEXT:    add a6, a0, a2
+; CHECK-ASM-NEXT:    vle8.v v9, (a6)
 ; CHECK-ASM-NEXT:    vadd.vv v8, v9, v8
-; CHECK-ASM-NEXT:    vse8.v v8, (a3)
+; CHECK-ASM-NEXT:    vse8.v v8, (a6)
 ; CHECK-ASM-NEXT:    addi a2, a2, 32
 ; CHECK-ASM-NEXT:    addi a1, a1, 160
 ; CHECK-ASM-NEXT:    bne a2, a5, .LBB0_1
@@ -101,18 +101,18 @@ define void @gather_masked(i8* noalias nocapture %A, i8* noalias nocapture reado
 ; CHECK-ASM-NEXT:    addiw a3, a3, 873
 ; CHECK-ASM-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
 ; CHECK-ASM-NEXT:    vmv.s.x v0, a3
-; CHECK-ASM-NEXT:    li a6, 32
+; CHECK-ASM-NEXT:    li a3, 32
 ; CHECK-ASM-NEXT:    li a4, 5
 ; CHECK-ASM-NEXT:    li a5, 1024
 ; CHECK-ASM-NEXT:  .LBB1_1: # %vector.body
 ; CHECK-ASM-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-ASM-NEXT:    vsetvli zero, a6, e8, m1, ta, mu
+; CHECK-ASM-NEXT:    vsetvli zero, a3, e8, m1, ta, mu
 ; CHECK-ASM-NEXT:    vmv1r.v v9, v8
 ; CHECK-ASM-NEXT:    vlse8.v v9, (a1), a4, v0.t
-; CHECK-ASM-NEXT:    add a3, a0, a2
-; CHECK-ASM-NEXT:    vle8.v v10, (a3)
+; CHECK-ASM-NEXT:    add a6, a0, a2
+; CHECK-ASM-NEXT:    vle8.v v10, (a6)
 ; CHECK-ASM-NEXT:    vadd.vv v9, v10, v9
-; CHECK-ASM-NEXT:    vse8.v v9, (a3)
+; CHECK-ASM-NEXT:    vse8.v v9, (a6)
 ; CHECK-ASM-NEXT:    addi a2, a2, 32
 ; CHECK-ASM-NEXT:    addi a1, a1, 160
 ; CHECK-ASM-NEXT:    bne a2, a5, .LBB1_1
@@ -168,17 +168,17 @@ define void @gather_negative_stride(i8* noalias nocapture %A, i8* noalias nocapt
 ; CHECK-ASM:       # %bb.0: # %entry
 ; CHECK-ASM-NEXT:    li a2, 0
 ; CHECK-ASM-NEXT:    addi a1, a1, 155
-; CHECK-ASM-NEXT:    li a6, 32
+; CHECK-ASM-NEXT:    li a3, 32
 ; CHECK-ASM-NEXT:    li a4, -5
 ; CHECK-ASM-NEXT:    li a5, 1024
 ; CHECK-ASM-NEXT:  .LBB2_1: # %vector.body
 ; CHECK-ASM-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-ASM-NEXT:    vsetvli zero, a6, e8, m1, ta, mu
+; CHECK-ASM-NEXT:    vsetvli zero, a3, e8, m1, ta, mu
 ; CHECK-ASM-NEXT:    vlse8.v v8, (a1), a4
-; CHECK-ASM-NEXT:    add a3, a0, a2
-; CHECK-ASM-NEXT:    vle8.v v9, (a3)
+; CHECK-ASM-NEXT:    add a6, a0, a2
+; CHECK-ASM-NEXT:    vle8.v v9, (a6)
 ; CHECK-ASM-NEXT:    vadd.vv v8, v9, v8
-; CHECK-ASM-NEXT:    vse8.v v8, (a3)
+; CHECK-ASM-NEXT:    vse8.v v8, (a6)
 ; CHECK-ASM-NEXT:    addi a2, a2, 32
 ; CHECK-ASM-NEXT:    addi a1, a1, 160
 ; CHECK-ASM-NEXT:    bne a2, a5, .LBB2_1
@@ -303,14 +303,14 @@ define void @scatter(i8* noalias nocapture %A, i8* noalias nocapture readonly %B
 ; CHECK-ASM-LABEL: scatter:
 ; CHECK-ASM:       # %bb.0: # %entry
 ; CHECK-ASM-NEXT:    li a2, 0
-; CHECK-ASM-NEXT:    li a6, 32
+; CHECK-ASM-NEXT:    li a3, 32
 ; CHECK-ASM-NEXT:    li a4, 5
 ; CHECK-ASM-NEXT:    li a5, 1024
 ; CHECK-ASM-NEXT:  .LBB4_1: # %vector.body
 ; CHECK-ASM-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-ASM-NEXT:    add a3, a1, a2
-; CHECK-ASM-NEXT:    vsetvli zero, a6, e8, m1, ta, mu
-; CHECK-ASM-NEXT:    vle8.v v8, (a3)
+; CHECK-ASM-NEXT:    add a6, a1, a2
+; CHECK-ASM-NEXT:    vsetvli zero, a3, e8, m1, ta, mu
+; CHECK-ASM-NEXT:    vle8.v v8, (a6)
 ; CHECK-ASM-NEXT:    vlse8.v v9, (a0), a4
 ; CHECK-ASM-NEXT:    vadd.vv v8, v9, v8
 ; CHECK-ASM-NEXT:    vsse8.v v8, (a0), a4
@@ -369,7 +369,7 @@ define void @scatter_masked(i8* noalias nocapture %A, i8* noalias nocapture read
 ; CHECK-ASM-LABEL: scatter_masked:
 ; CHECK-ASM:       # %bb.0: # %entry
 ; CHECK-ASM-NEXT:    li a2, 0
-; CHECK-ASM-NEXT:    li a6, 32
+; CHECK-ASM-NEXT:    li a3, 32
 ; CHECK-ASM-NEXT:    lui a4, 983765
 ; CHECK-ASM-NEXT:    addiw a4, a4, 873
 ; CHECK-ASM-NEXT:    vsetivli zero, 1, e32, mf2, ta, mu
@@ -378,9 +378,9 @@ define void @scatter_masked(i8* noalias nocapture %A, i8* noalias nocapture read
 ; CHECK-ASM-NEXT:    li a5, 1024
 ; CHECK-ASM-NEXT:  .LBB5_1: # %vector.body
 ; CHECK-ASM-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-ASM-NEXT:    add a3, a1, a2
-; CHECK-ASM-NEXT:    vsetvli zero, a6, e8, m1, ta, mu
-; CHECK-ASM-NEXT:    vle8.v v9, (a3)
+; CHECK-ASM-NEXT:    add a6, a1, a2
+; CHECK-ASM-NEXT:    vsetvli zero, a3, e8, m1, ta, mu
+; CHECK-ASM-NEXT:    vle8.v v9, (a6)
 ; CHECK-ASM-NEXT:    vmv1r.v v10, v8
 ; CHECK-ASM-NEXT:    vlse8.v v10, (a0), a4, v0.t
 ; CHECK-ASM-NEXT:    vadd.vv v9, v10, v9
@@ -1047,43 +1047,43 @@ define void @strided_load_startval_add_with_splat(i8* noalias nocapture %0, i8*
 ; CHECK-ASM-NEXT:  # %bb.2:
 ; CHECK-ASM-NEXT:    slli a3, a4, 32
 ; CHECK-ASM-NEXT:    srli a3, a3, 32
-; CHECK-ASM-NEXT:    addi a6, a3, 1
-; CHECK-ASM-NEXT:    andi a7, a6, -32
-; CHECK-ASM-NEXT:    add a3, a7, a2
-; CHECK-ASM-NEXT:    slli a4, a2, 2
-; CHECK-ASM-NEXT:    add a4, a4, a2
+; CHECK-ASM-NEXT:    addi a4, a3, 1
+; CHECK-ASM-NEXT:    andi a5, a4, -32
+; CHECK-ASM-NEXT:    add a3, a5, a2
+; CHECK-ASM-NEXT:    slli a6, a2, 2
+; CHECK-ASM-NEXT:    add a6, a6, a2
 ; CHECK-ASM-NEXT:    add a2, a0, a2
-; CHECK-ASM-NEXT:    add a4, a1, a4
-; CHECK-ASM-NEXT:    li t0, 32
-; CHECK-ASM-NEXT:    li t1, 5
-; CHECK-ASM-NEXT:    mv a5, a7
+; CHECK-ASM-NEXT:    add a6, a1, a6
+; CHECK-ASM-NEXT:    li a7, 32
+; CHECK-ASM-NEXT:    li t0, 5
+; CHECK-ASM-NEXT:    mv t1, a5
 ; CHECK-ASM-NEXT:  .LBB12_3: # =>This Inner Loop Header: Depth=1
-; CHECK-ASM-NEXT:    vsetvli zero, t0, e8, m1, ta, mu
-; CHECK-ASM-NEXT:    vlse8.v v8, (a4), t1
+; CHECK-ASM-NEXT:    vsetvli zero, a7, e8, m1, ta, mu
+; CHECK-ASM-NEXT:    vlse8.v v8, (a6), t0
 ; CHECK-ASM-NEXT:    vle8.v v9, (a2)
 ; CHECK-ASM-NEXT:    vadd.vv v8, v9, v8
 ; CHECK-ASM-NEXT:    vse8.v v8, (a2)
-; CHECK-ASM-NEXT:    addi a5, a5, -32
+; CHECK-ASM-NEXT:    addi t1, t1, -32
 ; CHECK-ASM-NEXT:    addi a2, a2, 32
-; CHECK-ASM-NEXT:    addi a4, a4, 160
-; CHECK-ASM-NEXT:    bnez a5, .LBB12_3
+; CHECK-ASM-NEXT:    addi a6, a6, 160
+; CHECK-ASM-NEXT:    bnez t1, .LBB12_3
 ; CHECK-ASM-NEXT:  # %bb.4:
-; CHECK-ASM-NEXT:    beq a6, a7, .LBB12_7
+; CHECK-ASM-NEXT:    beq a4, a5, .LBB12_7
 ; CHECK-ASM-NEXT:  .LBB12_5:
 ; CHECK-ASM-NEXT:    slli a2, a3, 2
 ; CHECK-ASM-NEXT:    add a2, a2, a3
 ; CHECK-ASM-NEXT:    add a1, a1, a2
-; CHECK-ASM-NEXT:    li a6, 1024
+; CHECK-ASM-NEXT:    li a2, 1024
 ; CHECK-ASM-NEXT:  .LBB12_6: # =>This Inner Loop Header: Depth=1
 ; CHECK-ASM-NEXT:    lb a4, 0(a1)
 ; CHECK-ASM-NEXT:    add a5, a0, a3
-; CHECK-ASM-NEXT:    lb a2, 0(a5)
-; CHECK-ASM-NEXT:    addw a2, a2, a4
-; CHECK-ASM-NEXT:    sb a2, 0(a5)
-; CHECK-ASM-NEXT:    addiw a2, a3, 1
+; CHECK-ASM-NEXT:    lb a6, 0(a5)
+; CHECK-ASM-NEXT:    addw a4, a6, a4
+; CHECK-ASM-NEXT:    sb a4, 0(a5)
+; CHECK-ASM-NEXT:    addiw a4, a3, 1
 ; CHECK-ASM-NEXT:    addi a3, a3, 1
 ; CHECK-ASM-NEXT:    addi a1, a1, 5
-; CHECK-ASM-NEXT:    bne a2, a6, .LBB12_6
+; CHECK-ASM-NEXT:    bne a4, a2, .LBB12_6
 ; CHECK-ASM-NEXT:  .LBB12_7:
 ; CHECK-ASM-NEXT:    ret
   %4 = icmp eq i32 %2, 1024
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
index 65b8911749b36..bde7f07d1b53b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
@@ -866,8 +866,8 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) {
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v8, 24
 ; LMULMAX1-RV32-NEXT:    vor.vv v10, v10, v11
 ; LMULMAX1-RV32-NEXT:    vsll.vi v11, v8, 8
-; LMULMAX1-RV32-NEXT:    lui a6, 4080
-; LMULMAX1-RV32-NEXT:    vand.vx v11, v11, a6
+; LMULMAX1-RV32-NEXT:    lui a3, 4080
+; LMULMAX1-RV32-NEXT:    vand.vx v11, v11, a3
 ; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 24
 ; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v11
 ; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
@@ -886,10 +886,10 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) {
 ; LMULMAX1-RV32-NEXT:    vsll.vi v8, v8, 2
 ; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV32-NEXT:    lui a3, 349525
-; LMULMAX1-RV32-NEXT:    addi a3, a3, 1365
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a3
+; LMULMAX1-RV32-NEXT:    lui a6, 349525
+; LMULMAX1-RV32-NEXT:    addi a6, a6, 1365
+; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a6
+; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a6
 ; LMULMAX1-RV32-NEXT:    vadd.vv v8, v8, v8
 ; LMULMAX1-RV32-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 8
@@ -897,7 +897,7 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) {
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v9, 24
 ; LMULMAX1-RV32-NEXT:    vor.vv v10, v10, v11
 ; LMULMAX1-RV32-NEXT:    vsll.vi v11, v9, 8
-; LMULMAX1-RV32-NEXT:    vand.vx v11, v11, a6
+; LMULMAX1-RV32-NEXT:    vand.vx v11, v11, a3
 ; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 24
 ; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v11
 ; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
@@ -912,8 +912,8 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) {
 ; LMULMAX1-RV32-NEXT:    vsll.vi v9, v9, 2
 ; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a3
+; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a6
+; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a6
 ; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v9
 ; LMULMAX1-RV32-NEXT:    vor.vv v9, v10, v9
 ; LMULMAX1-RV32-NEXT:    vse32.v v9, (a0)
@@ -933,8 +933,8 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) {
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v8, 24
 ; LMULMAX1-RV64-NEXT:    vor.vv v10, v10, v11
 ; LMULMAX1-RV64-NEXT:    vsll.vi v11, v8, 8
-; LMULMAX1-RV64-NEXT:    lui a6, 4080
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a6
+; LMULMAX1-RV64-NEXT:    lui a3, 4080
+; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a3
 ; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 24
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
@@ -953,10 +953,10 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) {
 ; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 2
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    lui a3, 349525
-; LMULMAX1-RV64-NEXT:    addiw a3, a3, 1365
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a3
+; LMULMAX1-RV64-NEXT:    lui a6, 349525
+; LMULMAX1-RV64-NEXT:    addiw a6, a6, 1365
+; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a6
+; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a6
 ; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v8
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 8
@@ -964,7 +964,7 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) {
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v9, 24
 ; LMULMAX1-RV64-NEXT:    vor.vv v10, v10, v11
 ; LMULMAX1-RV64-NEXT:    vsll.vi v11, v9, 8
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a6
+; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a3
 ; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 24
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v11
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
@@ -979,8 +979,8 @@ define void @bitreverse_v8i32(<8 x i32>* %x, <8 x i32>* %y) {
 ; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 2
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a3
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a3
+; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a6
+; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a6
 ; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v9
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
 ; LMULMAX1-RV64-NEXT:    vse32.v v9, (a0)
@@ -1153,23 +1153,23 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vand.vx v10, v10, a4
 ; LMULMAX1-RV32-NEXT:    vor.vv v10, v10, v9
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v12, 24
-; LMULMAX1-RV32-NEXT:    lui a6, 4080
-; LMULMAX1-RV32-NEXT:    vand.vx v11, v9, a6
-; LMULMAX1-RV32-NEXT:    li a5, 5
+; LMULMAX1-RV32-NEXT:    lui a5, 4080
+; LMULMAX1-RV32-NEXT:    vand.vx v11, v9, a5
+; LMULMAX1-RV32-NEXT:    li a6, 5
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
-; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a5
+; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a6
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; LMULMAX1-RV32-NEXT:    vmv.v.i v9, 0
-; LMULMAX1-RV32-NEXT:    lui a5, 1044480
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v9, v9, a5, v0
+; LMULMAX1-RV32-NEXT:    lui a6, 1044480
+; LMULMAX1-RV32-NEXT:    vmerge.vxm v9, v9, a6, v0
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v13, v12, 8
 ; LMULMAX1-RV32-NEXT:    vand.vv v13, v13, v9
 ; LMULMAX1-RV32-NEXT:    vor.vv v11, v13, v11
 ; LMULMAX1-RV32-NEXT:    vor.vv v13, v11, v10
-; LMULMAX1-RV32-NEXT:    li a5, 255
+; LMULMAX1-RV32-NEXT:    li a6, 255
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v10, a5
+; LMULMAX1-RV32-NEXT:    vmv.v.x v10, a6
 ; LMULMAX1-RV32-NEXT:    vmerge.vim v10, v10, 0, v0
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
 ; LMULMAX1-RV32-NEXT:    vsll.vi v11, v12, 8
@@ -1183,7 +1183,7 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vor.vv v14, v15, v14
 ; LMULMAX1-RV32-NEXT:    vsll.vx v15, v12, a3
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v16, a6
+; LMULMAX1-RV32-NEXT:    vmv.v.x v16, a5
 ; LMULMAX1-RV32-NEXT:    vmerge.vim v16, v16, 0, v0
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
 ; LMULMAX1-RV32-NEXT:    vand.vv v15, v15, v16
@@ -1192,30 +1192,30 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vor.vv v12, v12, v14
 ; LMULMAX1-RV32-NEXT:    vor.vv v12, v12, v13
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v13, v12, 4
-; LMULMAX1-RV32-NEXT:    lui a5, 61681
-; LMULMAX1-RV32-NEXT:    addi a5, a5, -241
+; LMULMAX1-RV32-NEXT:    lui a6, 61681
+; LMULMAX1-RV32-NEXT:    addi a6, a6, -241
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v14, a5
+; LMULMAX1-RV32-NEXT:    vmv.v.x v14, a6
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
 ; LMULMAX1-RV32-NEXT:    vand.vv v13, v13, v14
 ; LMULMAX1-RV32-NEXT:    vand.vv v12, v12, v14
 ; LMULMAX1-RV32-NEXT:    vsll.vi v12, v12, 4
 ; LMULMAX1-RV32-NEXT:    vor.vv v12, v13, v12
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v13, v12, 2
-; LMULMAX1-RV32-NEXT:    lui a5, 209715
-; LMULMAX1-RV32-NEXT:    addi a5, a5, 819
+; LMULMAX1-RV32-NEXT:    lui a6, 209715
+; LMULMAX1-RV32-NEXT:    addi a6, a6, 819
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v15, a5
+; LMULMAX1-RV32-NEXT:    vmv.v.x v15, a6
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
 ; LMULMAX1-RV32-NEXT:    vand.vv v13, v13, v15
 ; LMULMAX1-RV32-NEXT:    vand.vv v12, v12, v15
 ; LMULMAX1-RV32-NEXT:    vsll.vi v12, v12, 2
 ; LMULMAX1-RV32-NEXT:    vor.vv v12, v13, v12
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v13, v12, 1
-; LMULMAX1-RV32-NEXT:    lui a5, 349525
-; LMULMAX1-RV32-NEXT:    addi a5, a5, 1365
+; LMULMAX1-RV32-NEXT:    lui a6, 349525
+; LMULMAX1-RV32-NEXT:    addi a6, a6, 1365
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v17, a5
+; LMULMAX1-RV32-NEXT:    vmv.v.x v17, a6
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
 ; LMULMAX1-RV32-NEXT:    vand.vv v13, v13, v17
 ; LMULMAX1-RV32-NEXT:    vand.vv v12, v12, v17
@@ -1226,7 +1226,7 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vand.vx v18, v18, a4
 ; LMULMAX1-RV32-NEXT:    vor.vv v13, v18, v13
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v18, v8, 24
-; LMULMAX1-RV32-NEXT:    vand.vx v18, v18, a6
+; LMULMAX1-RV32-NEXT:    vand.vx v18, v18, a5
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v19, v8, 8
 ; LMULMAX1-RV32-NEXT:    vand.vv v9, v19, v9
 ; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v18
@@ -1264,99 +1264,99 @@ define void @bitreverse_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV64-LABEL: bitreverse_v4i64:
 ; LMULMAX1-RV64:       # %bb.0:
 ; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    addi a7, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v9, (a7)
+; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
+; LMULMAX1-RV64-NEXT:    vle64.v v9, (a1)
 ; LMULMAX1-RV64-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    li t0, 56
-; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v9, t0
-; LMULMAX1-RV64-NEXT:    li t1, 40
-; LMULMAX1-RV64-NEXT:    vsrl.vx v11, v9, t1
+; LMULMAX1-RV64-NEXT:    li a2, 56
+; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v9, a2
+; LMULMAX1-RV64-NEXT:    li a3, 40
+; LMULMAX1-RV64-NEXT:    vsrl.vx v11, v9, a3
 ; LMULMAX1-RV64-NEXT:    lui a4, 16
-; LMULMAX1-RV64-NEXT:    addiw t2, a4, -256
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, t2
+; LMULMAX1-RV64-NEXT:    addiw a4, a4, -256
+; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a4
 ; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v9, 24
-; LMULMAX1-RV64-NEXT:    lui a6, 4080
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a6
+; LMULMAX1-RV64-NEXT:    lui a5, 4080
+; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a5
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v12, v9, 8
-; LMULMAX1-RV64-NEXT:    li a5, 255
-; LMULMAX1-RV64-NEXT:    slli t3, a5, 24
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, t3
+; LMULMAX1-RV64-NEXT:    li a6, 255
+; LMULMAX1-RV64-NEXT:    slli a7, a6, 24
+; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a7
 ; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
 ; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
 ; LMULMAX1-RV64-NEXT:    vsll.vi v11, v9, 8
-; LMULMAX1-RV64-NEXT:    slli t4, a5, 32
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, t4
+; LMULMAX1-RV64-NEXT:    slli t0, a6, 32
+; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, t0
 ; LMULMAX1-RV64-NEXT:    vsll.vi v12, v9, 24
-; LMULMAX1-RV64-NEXT:    slli a3, a5, 40
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a3
+; LMULMAX1-RV64-NEXT:    slli t1, a6, 40
+; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, t1
 ; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
-; LMULMAX1-RV64-NEXT:    vsll.vx v12, v9, t0
-; LMULMAX1-RV64-NEXT:    vsll.vx v9, v9, t1
-; LMULMAX1-RV64-NEXT:    slli a5, a5, 48
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a5
+; LMULMAX1-RV64-NEXT:    vsll.vx v12, v9, a2
+; LMULMAX1-RV64-NEXT:    vsll.vx v9, v9, a3
+; LMULMAX1-RV64-NEXT:    slli a6, a6, 48
+; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a6
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v12, v9
-; LMULMAX1-RV64-NEXT:    lui a4, %hi(.LCPI5_0)
-; LMULMAX1-RV64-NEXT:    ld a4, %lo(.LCPI5_0)(a4)
+; LMULMAX1-RV64-NEXT:    lui t2, %hi(.LCPI5_0)
+; LMULMAX1-RV64-NEXT:    ld t2, %lo(.LCPI5_0)(t2)
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v11
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a4
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a4
-; LMULMAX1-RV64-NEXT:    lui a1, %hi(.LCPI5_1)
-; LMULMAX1-RV64-NEXT:    ld a1, %lo(.LCPI5_1)(a1)
+; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, t2
+; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, t2
+; LMULMAX1-RV64-NEXT:    lui t3, %hi(.LCPI5_1)
+; LMULMAX1-RV64-NEXT:    ld t3, %lo(.LCPI5_1)(t3)
 ; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 4
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a1
-; LMULMAX1-RV64-NEXT:    lui a2, %hi(.LCPI5_2)
-; LMULMAX1-RV64-NEXT:    ld a2, %lo(.LCPI5_2)(a2)
+; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, t3
+; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, t3
+; LMULMAX1-RV64-NEXT:    lui t4, %hi(.LCPI5_2)
+; LMULMAX1-RV64-NEXT:    ld t4, %lo(.LCPI5_2)(t4)
 ; LMULMAX1-RV64-NEXT:    vsll.vi v9, v9, 2
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a2
+; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, t4
+; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, t4
 ; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v9
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v8, t0
-; LMULMAX1-RV64-NEXT:    vsrl.vx v11, v8, t1
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, t2
+; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v8, a2
+; LMULMAX1-RV64-NEXT:    vsrl.vx v11, v8, a3
+; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a4
 ; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v8, 24
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a6
+; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a5
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v12, v8, 8
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, t3
+; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a7
 ; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
 ; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
 ; LMULMAX1-RV64-NEXT:    vsll.vi v11, v8, 8
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, t4
+; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, t0
 ; LMULMAX1-RV64-NEXT:    vsll.vi v12, v8, 24
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a3
+; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, t1
 ; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
-; LMULMAX1-RV64-NEXT:    vsll.vx v12, v8, t0
-; LMULMAX1-RV64-NEXT:    vsll.vx v8, v8, t1
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a5
+; LMULMAX1-RV64-NEXT:    vsll.vx v12, v8, a2
+; LMULMAX1-RV64-NEXT:    vsll.vx v8, v8, a3
+; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a6
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v12, v8
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a4
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a4
+; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, t2
+; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, t2
 ; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 4
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 2
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a1
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a1
+; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, t3
+; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, t3
 ; LMULMAX1-RV64-NEXT:    vsll.vi v8, v8, 2
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
-; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, a2
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a2
+; LMULMAX1-RV64-NEXT:    vand.vx v10, v10, t4
+; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, t4
 ; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v8
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v10, v8
 ; LMULMAX1-RV64-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v9, (a7)
+; LMULMAX1-RV64-NEXT:    vse64.v v9, (a1)
 ; LMULMAX1-RV64-NEXT:    ret
   %a = load <4 x i64>, <4 x i64>* %x
   %b = load <4 x i64>, <4 x i64>* %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
index 9441accb0631a..b27469939cf3f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
@@ -595,8 +595,8 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
 ; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
 ; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
-; LMULMAX1-RV32-NEXT:    li a6, 56
-; LMULMAX1-RV32-NEXT:    vsrl.vx v10, v9, a6
+; LMULMAX1-RV32-NEXT:    li a2, 56
+; LMULMAX1-RV32-NEXT:    vsrl.vx v10, v9, a2
 ; LMULMAX1-RV32-NEXT:    li a3, 40
 ; LMULMAX1-RV32-NEXT:    vsrl.vx v11, v9, a3
 ; LMULMAX1-RV32-NEXT:    lui a4, 16
@@ -606,21 +606,21 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v11, v9, 24
 ; LMULMAX1-RV32-NEXT:    lui a5, 4080
 ; LMULMAX1-RV32-NEXT:    vand.vx v11, v11, a5
-; LMULMAX1-RV32-NEXT:    li a2, 5
+; LMULMAX1-RV32-NEXT:    li a6, 5
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 1, e8, mf8, ta, mu
-; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a2
+; LMULMAX1-RV32-NEXT:    vmv.s.x v0, a6
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; LMULMAX1-RV32-NEXT:    vmv.v.i v12, 0
-; LMULMAX1-RV32-NEXT:    lui a2, 1044480
-; LMULMAX1-RV32-NEXT:    vmerge.vxm v12, v12, a2, v0
+; LMULMAX1-RV32-NEXT:    lui a6, 1044480
+; LMULMAX1-RV32-NEXT:    vmerge.vxm v12, v12, a6, v0
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v13, v9, 8
 ; LMULMAX1-RV32-NEXT:    vand.vv v13, v13, v12
 ; LMULMAX1-RV32-NEXT:    vor.vv v11, v13, v11
 ; LMULMAX1-RV32-NEXT:    vor.vv v10, v11, v10
-; LMULMAX1-RV32-NEXT:    li a2, 255
+; LMULMAX1-RV32-NEXT:    li a6, 255
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX1-RV32-NEXT:    vmv.v.x v11, a2
+; LMULMAX1-RV32-NEXT:    vmv.v.x v11, a6
 ; LMULMAX1-RV32-NEXT:    vmerge.vim v11, v11, 0, v0
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
 ; LMULMAX1-RV32-NEXT:    vsll.vi v13, v9, 8
@@ -638,11 +638,11 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vmerge.vim v16, v16, 0, v0
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
 ; LMULMAX1-RV32-NEXT:    vand.vv v15, v15, v16
-; LMULMAX1-RV32-NEXT:    vsll.vx v9, v9, a6
+; LMULMAX1-RV32-NEXT:    vsll.vx v9, v9, a2
 ; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v15
 ; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v13
 ; LMULMAX1-RV32-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vsrl.vx v10, v8, a6
+; LMULMAX1-RV32-NEXT:    vsrl.vx v10, v8, a2
 ; LMULMAX1-RV32-NEXT:    vsrl.vx v13, v8, a3
 ; LMULMAX1-RV32-NEXT:    vand.vx v13, v13, a4
 ; LMULMAX1-RV32-NEXT:    vor.vv v10, v13, v10
@@ -659,7 +659,7 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV32-NEXT:    vor.vv v11, v12, v11
 ; LMULMAX1-RV32-NEXT:    vsll.vx v12, v8, a3
 ; LMULMAX1-RV32-NEXT:    vand.vv v12, v12, v16
-; LMULMAX1-RV32-NEXT:    vsll.vx v8, v8, a6
+; LMULMAX1-RV32-NEXT:    vsll.vx v8, v8, a2
 ; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v12
 ; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v11
 ; LMULMAX1-RV32-NEXT:    vor.vv v8, v8, v10
@@ -670,63 +670,63 @@ define void @bswap_v4i64(<4 x i64>* %x, <4 x i64>* %y) {
 ; LMULMAX1-RV64-LABEL: bswap_v4i64:
 ; LMULMAX1-RV64:       # %bb.0:
 ; LMULMAX1-RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; LMULMAX1-RV64-NEXT:    addi t1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle64.v v8, (t1)
+; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
+; LMULMAX1-RV64-NEXT:    vle64.v v8, (a1)
 ; LMULMAX1-RV64-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    li a7, 56
-; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v8, a7
-; LMULMAX1-RV64-NEXT:    li t0, 40
-; LMULMAX1-RV64-NEXT:    vsrl.vx v11, v8, t0
+; LMULMAX1-RV64-NEXT:    li a2, 56
+; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v8, a2
+; LMULMAX1-RV64-NEXT:    li a3, 40
+; LMULMAX1-RV64-NEXT:    vsrl.vx v11, v8, a3
 ; LMULMAX1-RV64-NEXT:    lui a4, 16
 ; LMULMAX1-RV64-NEXT:    addiw a4, a4, -256
 ; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a4
 ; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v8, 24
-; LMULMAX1-RV64-NEXT:    lui a6, 4080
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a6
+; LMULMAX1-RV64-NEXT:    lui a5, 4080
+; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a5
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v12, v8, 8
-; LMULMAX1-RV64-NEXT:    li a5, 255
-; LMULMAX1-RV64-NEXT:    slli a2, a5, 24
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a2
+; LMULMAX1-RV64-NEXT:    li a6, 255
+; LMULMAX1-RV64-NEXT:    slli a7, a6, 24
+; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a7
 ; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
 ; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
 ; LMULMAX1-RV64-NEXT:    vsll.vi v11, v8, 8
-; LMULMAX1-RV64-NEXT:    slli a3, a5, 32
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a3
+; LMULMAX1-RV64-NEXT:    slli t0, a6, 32
+; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, t0
 ; LMULMAX1-RV64-NEXT:    vsll.vi v12, v8, 24
-; LMULMAX1-RV64-NEXT:    slli a1, a5, 40
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a1
+; LMULMAX1-RV64-NEXT:    slli t1, a6, 40
+; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, t1
 ; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
-; LMULMAX1-RV64-NEXT:    vsll.vx v12, v8, a7
-; LMULMAX1-RV64-NEXT:    vsll.vx v8, v8, t0
-; LMULMAX1-RV64-NEXT:    slli a5, a5, 48
-; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a5
+; LMULMAX1-RV64-NEXT:    vsll.vx v12, v8, a2
+; LMULMAX1-RV64-NEXT:    vsll.vx v8, v8, a3
+; LMULMAX1-RV64-NEXT:    slli a6, a6, 48
+; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a6
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v12, v8
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v11
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v9, a7
-; LMULMAX1-RV64-NEXT:    vsrl.vx v11, v9, t0
+; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v9, a2
+; LMULMAX1-RV64-NEXT:    vsrl.vx v11, v9, a3
 ; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a4
 ; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v11, v9, 24
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a6
+; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a5
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v12, v9, 8
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a2
+; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a7
 ; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
 ; LMULMAX1-RV64-NEXT:    vor.vv v10, v11, v10
 ; LMULMAX1-RV64-NEXT:    vsll.vi v11, v9, 8
-; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, a3
+; LMULMAX1-RV64-NEXT:    vand.vx v11, v11, t0
 ; LMULMAX1-RV64-NEXT:    vsll.vi v12, v9, 24
-; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, a1
+; LMULMAX1-RV64-NEXT:    vand.vx v12, v12, t1
 ; LMULMAX1-RV64-NEXT:    vor.vv v11, v12, v11
-; LMULMAX1-RV64-NEXT:    vsll.vx v12, v9, a7
-; LMULMAX1-RV64-NEXT:    vsll.vx v9, v9, t0
-; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a5
+; LMULMAX1-RV64-NEXT:    vsll.vx v12, v9, a2
+; LMULMAX1-RV64-NEXT:    vsll.vx v9, v9, a3
+; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a6
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v12, v9
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v11
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
 ; LMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse64.v v8, (t1)
+; LMULMAX1-RV64-NEXT:    vse64.v v8, (a1)
 ; LMULMAX1-RV64-NEXT:    ret
   %a = load <4 x i64>, <4 x i64>* %x
   %b = load <4 x i64>, <4 x i64>* %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index 10bb341a17668..b53641f5cb33c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -1746,8 +1746,8 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 16
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT:    li a6, 32
-; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v8, a6
+; LMULMAX1-RV64-NEXT:    li a2, 32
+; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v8, a2
 ; LMULMAX1-RV64-NEXT:    vor.vv v8, v8, v10
 ; LMULMAX1-RV64-NEXT:    vxor.vi v8, v8, -1
 ; LMULMAX1-RV64-NEXT:    lui a3, %hi(.LCPI7_0)
@@ -1763,12 +1763,12 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    vadd.vv v8, v10, v8
 ; LMULMAX1-RV64-NEXT:    lui a5, %hi(.LCPI7_2)
 ; LMULMAX1-RV64-NEXT:    ld a5, %lo(.LCPI7_2)(a5)
-; LMULMAX1-RV64-NEXT:    lui a2, %hi(.LCPI7_3)
-; LMULMAX1-RV64-NEXT:    ld a2, %lo(.LCPI7_3)(a2)
+; LMULMAX1-RV64-NEXT:    lui a6, %hi(.LCPI7_3)
+; LMULMAX1-RV64-NEXT:    ld a6, %lo(.LCPI7_3)(a6)
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
 ; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v10
 ; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a2
+; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a6
 ; LMULMAX1-RV64-NEXT:    li a7, 56
 ; LMULMAX1-RV64-NEXT:    vsrl.vx v8, v8, a7
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
@@ -1781,7 +1781,7 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 16
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
-; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v9, a6
+; LMULMAX1-RV64-NEXT:    vsrl.vx v10, v9, a2
 ; LMULMAX1-RV64-NEXT:    vor.vv v9, v9, v10
 ; LMULMAX1-RV64-NEXT:    vxor.vi v9, v9, -1
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
@@ -1794,7 +1794,7 @@ define void @ctlz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
 ; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v10
 ; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a2
+; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a6
 ; LMULMAX1-RV64-NEXT:    vsrl.vx v9, v9, a7
 ; LMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
 ; LMULMAX1-RV64-NEXT:    vse64.v v8, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 727e7cd63b86e..2a7680a22acab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -1030,8 +1030,8 @@ define void @cttz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind {
 ; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
 ; LMULMAX1-RV32-NEXT:    vle16.v v8, (a1)
 ; LMULMAX1-RV32-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    li a6, 1
-; LMULMAX1-RV32-NEXT:    vsub.vx v10, v8, a6
+; LMULMAX1-RV32-NEXT:    li a2, 1
+; LMULMAX1-RV32-NEXT:    vsub.vx v10, v8, a2
 ; LMULMAX1-RV32-NEXT:    vxor.vi v8, v8, -1
 ; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v10
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
@@ -1050,10 +1050,10 @@ define void @cttz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind {
 ; LMULMAX1-RV32-NEXT:    lui a5, 1
 ; LMULMAX1-RV32-NEXT:    addi a5, a5, -241
 ; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT:    li a2, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a2
+; LMULMAX1-RV32-NEXT:    li a6, 257
+; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a6
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV32-NEXT:    vsub.vx v10, v9, a6
+; LMULMAX1-RV32-NEXT:    vsub.vx v10, v9, a2
 ; LMULMAX1-RV32-NEXT:    vxor.vi v9, v9, -1
 ; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v10
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
@@ -1066,7 +1066,7 @@ define void @cttz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind {
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
 ; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
 ; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT:    vmul.vx v9, v9, a2
+; LMULMAX1-RV32-NEXT:    vmul.vx v9, v9, a6
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 8
 ; LMULMAX1-RV32-NEXT:    vse16.v v9, (a0)
 ; LMULMAX1-RV32-NEXT:    vse16.v v8, (a1)
@@ -1078,8 +1078,8 @@ define void @cttz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
 ; LMULMAX1-RV64-NEXT:    vle16.v v8, (a1)
 ; LMULMAX1-RV64-NEXT:    vle16.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    li a6, 1
-; LMULMAX1-RV64-NEXT:    vsub.vx v10, v8, a6
+; LMULMAX1-RV64-NEXT:    li a2, 1
+; LMULMAX1-RV64-NEXT:    vsub.vx v10, v8, a2
 ; LMULMAX1-RV64-NEXT:    vxor.vi v8, v8, -1
 ; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
@@ -1098,10 +1098,10 @@ define void @cttz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    lui a5, 1
 ; LMULMAX1-RV64-NEXT:    addiw a5, a5, -241
 ; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT:    li a2, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a2
+; LMULMAX1-RV64-NEXT:    li a6, 257
+; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a6
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 8
-; LMULMAX1-RV64-NEXT:    vsub.vx v10, v9, a6
+; LMULMAX1-RV64-NEXT:    vsub.vx v10, v9, a2
 ; LMULMAX1-RV64-NEXT:    vxor.vi v9, v9, -1
 ; LMULMAX1-RV64-NEXT:    vand.vv v9, v9, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
@@ -1114,7 +1114,7 @@ define void @cttz_v16i16(<16 x i16>* %x, <16 x i16>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
 ; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v10
 ; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a2
+; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a6
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 8
 ; LMULMAX1-RV64-NEXT:    vse16.v v9, (a0)
 ; LMULMAX1-RV64-NEXT:    vse16.v v8, (a1)
@@ -1228,8 +1228,8 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind {
 ; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
 ; LMULMAX1-RV32-NEXT:    vle32.v v8, (a1)
 ; LMULMAX1-RV32-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-RV32-NEXT:    li a6, 1
-; LMULMAX1-RV32-NEXT:    vsub.vx v10, v8, a6
+; LMULMAX1-RV32-NEXT:    li a2, 1
+; LMULMAX1-RV32-NEXT:    vsub.vx v10, v8, a2
 ; LMULMAX1-RV32-NEXT:    vxor.vi v8, v8, -1
 ; LMULMAX1-RV32-NEXT:    vand.vv v8, v8, v10
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v8, 1
@@ -1248,11 +1248,11 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind {
 ; LMULMAX1-RV32-NEXT:    lui a5, 61681
 ; LMULMAX1-RV32-NEXT:    addi a5, a5, -241
 ; LMULMAX1-RV32-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-RV32-NEXT:    lui a2, 4112
-; LMULMAX1-RV32-NEXT:    addi a2, a2, 257
-; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a2
+; LMULMAX1-RV32-NEXT:    lui a6, 4112
+; LMULMAX1-RV32-NEXT:    addi a6, a6, 257
+; LMULMAX1-RV32-NEXT:    vmul.vx v8, v8, a6
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX1-RV32-NEXT:    vsub.vx v10, v9, a6
+; LMULMAX1-RV32-NEXT:    vsub.vx v10, v9, a2
 ; LMULMAX1-RV32-NEXT:    vxor.vi v9, v9, -1
 ; LMULMAX1-RV32-NEXT:    vand.vv v9, v9, v10
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 1
@@ -1265,7 +1265,7 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind {
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v10, v9, 4
 ; LMULMAX1-RV32-NEXT:    vadd.vv v9, v9, v10
 ; LMULMAX1-RV32-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-RV32-NEXT:    vmul.vx v9, v9, a2
+; LMULMAX1-RV32-NEXT:    vmul.vx v9, v9, a6
 ; LMULMAX1-RV32-NEXT:    vsrl.vi v9, v9, 24
 ; LMULMAX1-RV32-NEXT:    vse32.v v9, (a0)
 ; LMULMAX1-RV32-NEXT:    vse32.v v8, (a1)
@@ -1277,8 +1277,8 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
 ; LMULMAX1-RV64-NEXT:    vle32.v v8, (a1)
 ; LMULMAX1-RV64-NEXT:    vle32.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    li a6, 1
-; LMULMAX1-RV64-NEXT:    vsub.vx v10, v8, a6
+; LMULMAX1-RV64-NEXT:    li a2, 1
+; LMULMAX1-RV64-NEXT:    vsub.vx v10, v8, a2
 ; LMULMAX1-RV64-NEXT:    vxor.vi v8, v8, -1
 ; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 1
@@ -1297,11 +1297,11 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    lui a5, 61681
 ; LMULMAX1-RV64-NEXT:    addiw a5, a5, -241
 ; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT:    lui a2, 4112
-; LMULMAX1-RV64-NEXT:    addiw a2, a2, 257
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a2
+; LMULMAX1-RV64-NEXT:    lui a6, 4112
+; LMULMAX1-RV64-NEXT:    addiw a6, a6, 257
+; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a6
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v8, v8, 24
-; LMULMAX1-RV64-NEXT:    vsub.vx v10, v9, a6
+; LMULMAX1-RV64-NEXT:    vsub.vx v10, v9, a2
 ; LMULMAX1-RV64-NEXT:    vxor.vi v9, v9, -1
 ; LMULMAX1-RV64-NEXT:    vand.vv v9, v9, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
@@ -1314,7 +1314,7 @@ define void @cttz_v8i32(<8 x i32>* %x, <8 x i32>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
 ; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v10
 ; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a2
+; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a6
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v9, v9, 24
 ; LMULMAX1-RV64-NEXT:    vse32.v v9, (a0)
 ; LMULMAX1-RV64-NEXT:    vse32.v v8, (a1)
@@ -1512,8 +1512,8 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
 ; LMULMAX1-RV64-NEXT:    vle64.v v8, (a1)
 ; LMULMAX1-RV64-NEXT:    vle64.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    li a6, 1
-; LMULMAX1-RV64-NEXT:    vsub.vx v10, v8, a6
+; LMULMAX1-RV64-NEXT:    li a2, 1
+; LMULMAX1-RV64-NEXT:    vsub.vx v10, v8, a2
 ; LMULMAX1-RV64-NEXT:    vxor.vi v8, v8, -1
 ; LMULMAX1-RV64-NEXT:    vand.vv v8, v8, v10
 ; LMULMAX1-RV64-NEXT:    lui a3, %hi(.LCPI7_0)
@@ -1529,15 +1529,15 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    vadd.vv v8, v10, v8
 ; LMULMAX1-RV64-NEXT:    lui a5, %hi(.LCPI7_2)
 ; LMULMAX1-RV64-NEXT:    ld a5, %lo(.LCPI7_2)(a5)
-; LMULMAX1-RV64-NEXT:    lui a2, %hi(.LCPI7_3)
-; LMULMAX1-RV64-NEXT:    ld a2, %lo(.LCPI7_3)(a2)
+; LMULMAX1-RV64-NEXT:    lui a6, %hi(.LCPI7_3)
+; LMULMAX1-RV64-NEXT:    ld a6, %lo(.LCPI7_3)(a6)
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v8, 4
 ; LMULMAX1-RV64-NEXT:    vadd.vv v8, v8, v10
 ; LMULMAX1-RV64-NEXT:    vand.vx v8, v8, a5
-; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a2
+; LMULMAX1-RV64-NEXT:    vmul.vx v8, v8, a6
 ; LMULMAX1-RV64-NEXT:    li a7, 56
 ; LMULMAX1-RV64-NEXT:    vsrl.vx v8, v8, a7
-; LMULMAX1-RV64-NEXT:    vsub.vx v10, v9, a6
+; LMULMAX1-RV64-NEXT:    vsub.vx v10, v9, a2
 ; LMULMAX1-RV64-NEXT:    vxor.vi v9, v9, -1
 ; LMULMAX1-RV64-NEXT:    vand.vv v9, v9, v10
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 1
@@ -1550,7 +1550,7 @@ define void @cttz_v4i64(<4 x i64>* %x, <4 x i64>* %y) nounwind {
 ; LMULMAX1-RV64-NEXT:    vsrl.vi v10, v9, 4
 ; LMULMAX1-RV64-NEXT:    vadd.vv v9, v9, v10
 ; LMULMAX1-RV64-NEXT:    vand.vx v9, v9, a5
-; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a2
+; LMULMAX1-RV64-NEXT:    vmul.vx v9, v9, a6
 ; LMULMAX1-RV64-NEXT:    vsrl.vx v9, v9, a7
 ; LMULMAX1-RV64-NEXT:    vse64.v v9, (a0)
 ; LMULMAX1-RV64-NEXT:    vse64.v v8, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
index cbccf73c32ed3..d84679b8b0c45 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
@@ -29,25 +29,25 @@ define void @add_v2i64(<2 x i64>* %x, <2 x i64>* %y) {
 ; RV32-LABEL: add_v2i64:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    lw a2, 8(a0)
-; RV32-NEXT:    lw a6, 12(a0)
+; RV32-NEXT:    lw a3, 12(a0)
 ; RV32-NEXT:    lw a4, 0(a0)
-; RV32-NEXT:    lw a7, 4(a0)
-; RV32-NEXT:    lw a3, 4(a1)
-; RV32-NEXT:    lw a5, 0(a1)
+; RV32-NEXT:    lw a5, 4(a0)
+; RV32-NEXT:    lw a6, 4(a1)
+; RV32-NEXT:    lw a7, 0(a1)
 ; RV32-NEXT:    lw t0, 8(a1)
 ; RV32-NEXT:    lw a1, 12(a1)
-; RV32-NEXT:    add a3, a7, a3
-; RV32-NEXT:    add a5, a4, a5
-; RV32-NEXT:    sltu a4, a5, a4
-; RV32-NEXT:    add a3, a3, a4
-; RV32-NEXT:    add a1, a6, a1
-; RV32-NEXT:    add a4, a2, t0
-; RV32-NEXT:    sltu a2, a4, a2
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    add a6, a4, a7
+; RV32-NEXT:    sltu a4, a6, a4
+; RV32-NEXT:    add a4, a5, a4
+; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    add a3, a2, t0
+; RV32-NEXT:    sltu a2, a3, a2
 ; RV32-NEXT:    add a1, a1, a2
-; RV32-NEXT:    sw a4, 8(a0)
-; RV32-NEXT:    sw a5, 0(a0)
+; RV32-NEXT:    sw a3, 8(a0)
+; RV32-NEXT:    sw a6, 0(a0)
 ; RV32-NEXT:    sw a1, 12(a0)
-; RV32-NEXT:    sw a3, 4(a0)
+; RV32-NEXT:    sw a4, 4(a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: add_v2i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll
index 0697170f8d886..9e3988caba6fd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll
@@ -63,24 +63,24 @@ define void @gather_const_v64f16(<64 x half>* %x) {
 ;
 ; LMULMAX1-LABEL: gather_const_v64f16:
 ; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a6, a0, 16
-; LMULMAX1-NEXT:    addi a7, a0, 48
+; LMULMAX1-NEXT:    addi a1, a0, 16
+; LMULMAX1-NEXT:    addi a2, a0, 48
 ; LMULMAX1-NEXT:    addi a3, a0, 32
 ; LMULMAX1-NEXT:    addi a4, a0, 80
 ; LMULMAX1-NEXT:    addi a5, a0, 94
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
 ; LMULMAX1-NEXT:    vlse16.v v8, (a5), zero
 ; LMULMAX1-NEXT:    addi a5, a0, 64
-; LMULMAX1-NEXT:    addi a1, a0, 112
-; LMULMAX1-NEXT:    addi a2, a0, 96
-; LMULMAX1-NEXT:    vse16.v v8, (a2)
-; LMULMAX1-NEXT:    vse16.v v8, (a1)
+; LMULMAX1-NEXT:    addi a6, a0, 112
+; LMULMAX1-NEXT:    addi a7, a0, 96
+; LMULMAX1-NEXT:    vse16.v v8, (a7)
+; LMULMAX1-NEXT:    vse16.v v8, (a6)
 ; LMULMAX1-NEXT:    vse16.v v8, (a5)
 ; LMULMAX1-NEXT:    vse16.v v8, (a4)
 ; LMULMAX1-NEXT:    vse16.v v8, (a3)
-; LMULMAX1-NEXT:    vse16.v v8, (a7)
+; LMULMAX1-NEXT:    vse16.v v8, (a2)
 ; LMULMAX1-NEXT:    vse16.v v8, (a0)
-; LMULMAX1-NEXT:    vse16.v v8, (a6)
+; LMULMAX1-NEXT:    vse16.v v8, (a1)
 ; LMULMAX1-NEXT:    ret
   %a = load <64 x half>, <64 x half>* %x
   %b = extractelement <64 x half> %a, i32 47
@@ -102,24 +102,24 @@ define void @gather_const_v32f32(<32 x float>* %x) {
 ;
 ; LMULMAX1-LABEL: gather_const_v32f32:
 ; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a6, a0, 16
-; LMULMAX1-NEXT:    addi a7, a0, 48
+; LMULMAX1-NEXT:    addi a1, a0, 16
+; LMULMAX1-NEXT:    addi a2, a0, 48
 ; LMULMAX1-NEXT:    addi a3, a0, 32
 ; LMULMAX1-NEXT:    addi a4, a0, 80
 ; LMULMAX1-NEXT:    addi a5, a0, 68
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; LMULMAX1-NEXT:    vlse32.v v8, (a5), zero
 ; LMULMAX1-NEXT:    addi a5, a0, 64
-; LMULMAX1-NEXT:    addi a1, a0, 112
-; LMULMAX1-NEXT:    addi a2, a0, 96
-; LMULMAX1-NEXT:    vse32.v v8, (a2)
-; LMULMAX1-NEXT:    vse32.v v8, (a1)
+; LMULMAX1-NEXT:    addi a6, a0, 112
+; LMULMAX1-NEXT:    addi a7, a0, 96
+; LMULMAX1-NEXT:    vse32.v v8, (a7)
+; LMULMAX1-NEXT:    vse32.v v8, (a6)
 ; LMULMAX1-NEXT:    vse32.v v8, (a5)
 ; LMULMAX1-NEXT:    vse32.v v8, (a4)
 ; LMULMAX1-NEXT:    vse32.v v8, (a3)
-; LMULMAX1-NEXT:    vse32.v v8, (a7)
+; LMULMAX1-NEXT:    vse32.v v8, (a2)
 ; LMULMAX1-NEXT:    vse32.v v8, (a0)
-; LMULMAX1-NEXT:    vse32.v v8, (a6)
+; LMULMAX1-NEXT:    vse32.v v8, (a1)
 ; LMULMAX1-NEXT:    ret
   %a = load <32 x float>, <32 x float>* %x
   %b = extractelement <32 x float> %a, i32 17
@@ -140,23 +140,23 @@ define void @gather_const_v16f64(<16 x double>* %x) {
 ;
 ; LMULMAX1-LABEL: gather_const_v16f64:
 ; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi a6, a0, 16
-; LMULMAX1-NEXT:    addi a7, a0, 48
+; LMULMAX1-NEXT:    addi a1, a0, 16
+; LMULMAX1-NEXT:    addi a2, a0, 48
 ; LMULMAX1-NEXT:    addi a3, a0, 32
 ; LMULMAX1-NEXT:    addi a4, a0, 80
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
 ; LMULMAX1-NEXT:    vlse64.v v8, (a4), zero
 ; LMULMAX1-NEXT:    addi a5, a0, 64
-; LMULMAX1-NEXT:    addi a1, a0, 112
-; LMULMAX1-NEXT:    addi a2, a0, 96
-; LMULMAX1-NEXT:    vse64.v v8, (a2)
-; LMULMAX1-NEXT:    vse64.v v8, (a1)
+; LMULMAX1-NEXT:    addi a6, a0, 112
+; LMULMAX1-NEXT:    addi a7, a0, 96
+; LMULMAX1-NEXT:    vse64.v v8, (a7)
+; LMULMAX1-NEXT:    vse64.v v8, (a6)
 ; LMULMAX1-NEXT:    vse64.v v8, (a5)
 ; LMULMAX1-NEXT:    vse64.v v8, (a4)
 ; LMULMAX1-NEXT:    vse64.v v8, (a3)
-; LMULMAX1-NEXT:    vse64.v v8, (a7)
+; LMULMAX1-NEXT:    vse64.v v8, (a2)
 ; LMULMAX1-NEXT:    vse64.v v8, (a0)
-; LMULMAX1-NEXT:    vse64.v v8, (a6)
+; LMULMAX1-NEXT:    vse64.v v8, (a1)
 ; LMULMAX1-NEXT:    ret
   %a = load <16 x double>, <16 x double>* %x
   %b = extractelement <16 x double> %a, i32 10
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
index 32c073d59241c..6402c25d068cd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
@@ -548,16 +548,16 @@ define void @masked_load_v2i32_align1(<2 x i32>* %a, <2 x i32> %m, <2 x i32>* %r
 ; RV32-NEXT:    andi a3, a2, 1
 ; RV32-NEXT:    beqz a3, .LBB8_2
 ; RV32-NEXT:  # %bb.1: # %cond.load
-; RV32-NEXT:    lbu a6, 1(a0)
-; RV32-NEXT:    lbu a7, 0(a0)
+; RV32-NEXT:    lbu a3, 1(a0)
+; RV32-NEXT:    lbu a4, 0(a0)
 ; RV32-NEXT:    lbu a5, 3(a0)
-; RV32-NEXT:    lbu a3, 2(a0)
-; RV32-NEXT:    slli a4, a6, 8
-; RV32-NEXT:    or a4, a4, a7
-; RV32-NEXT:    slli a5, a5, 8
-; RV32-NEXT:    or a3, a5, a3
-; RV32-NEXT:    slli a3, a3, 16
+; RV32-NEXT:    lbu a6, 2(a0)
+; RV32-NEXT:    slli a3, a3, 8
 ; RV32-NEXT:    or a3, a3, a4
+; RV32-NEXT:    slli a4, a5, 8
+; RV32-NEXT:    or a4, a4, a6
+; RV32-NEXT:    slli a4, a4, 16
+; RV32-NEXT:    or a3, a4, a3
 ; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
 ; RV32-NEXT:    vmv.v.x v8, a3
 ; RV32-NEXT:    andi a2, a2, 2
@@ -608,16 +608,16 @@ define void @masked_load_v2i32_align1(<2 x i32>* %a, <2 x i32> %m, <2 x i32>* %r
 ; RV64-NEXT:    andi a3, a2, 1
 ; RV64-NEXT:    beqz a3, .LBB8_2
 ; RV64-NEXT:  # %bb.1: # %cond.load
-; RV64-NEXT:    lbu a6, 1(a0)
-; RV64-NEXT:    lbu a7, 0(a0)
+; RV64-NEXT:    lbu a3, 1(a0)
+; RV64-NEXT:    lbu a4, 0(a0)
 ; RV64-NEXT:    lb a5, 3(a0)
-; RV64-NEXT:    lbu a3, 2(a0)
-; RV64-NEXT:    slli a4, a6, 8
-; RV64-NEXT:    or a4, a4, a7
-; RV64-NEXT:    slli a5, a5, 8
-; RV64-NEXT:    or a3, a5, a3
-; RV64-NEXT:    slli a3, a3, 16
+; RV64-NEXT:    lbu a6, 2(a0)
+; RV64-NEXT:    slli a3, a3, 8
 ; RV64-NEXT:    or a3, a3, a4
+; RV64-NEXT:    slli a4, a5, 8
+; RV64-NEXT:    or a4, a4, a6
+; RV64-NEXT:    slli a4, a4, 16
+; RV64-NEXT:    or a3, a4, a3
 ; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
 ; RV64-NEXT:    vmv.v.x v8, a3
 ; RV64-NEXT:    andi a2, a2, 2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
index 7ba839edb7527..ad21c399c15f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
@@ -421,18 +421,18 @@ define <33 x double> @vpload_v33f64(<33 x double>* %ptr, <33 x i1> %m, i32 zeroe
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a5, a4
 ; CHECK-NEXT:  .LBB32_2:
-; CHECK-NEXT:    li a6, 16
-; CHECK-NEXT:    bltu a5, a6, .LBB32_4
+; CHECK-NEXT:    li a4, 16
+; CHECK-NEXT:    bltu a5, a4, .LBB32_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    li a5, 16
 ; CHECK-NEXT:  .LBB32_4:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, ta, mu
 ; CHECK-NEXT:    vslidedown.vi v0, v8, 4
-; CHECK-NEXT:    addi a4, a1, 256
+; CHECK-NEXT:    addi a6, a1, 256
 ; CHECK-NEXT:    vsetvli zero, a5, e64, m8, ta, mu
-; CHECK-NEXT:    vle64.v v16, (a4), v0.t
-; CHECK-NEXT:    li a4, 32
-; CHECK-NEXT:    bltu a2, a4, .LBB32_6
+; CHECK-NEXT:    vle64.v v16, (a6), v0.t
+; CHECK-NEXT:    li a5, 32
+; CHECK-NEXT:    bltu a2, a5, .LBB32_6
 ; CHECK-NEXT:  # %bb.5:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:  .LBB32_6:
@@ -443,10 +443,10 @@ define <33 x double> @vpload_v33f64(<33 x double>* %ptr, <33 x i1> %m, i32 zeroe
 ; CHECK-NEXT:  .LBB32_8:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
 ; CHECK-NEXT:    vslidedown.vi v0, v8, 2
-; CHECK-NEXT:    addi a4, a1, 128
+; CHECK-NEXT:    addi a5, a1, 128
 ; CHECK-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
-; CHECK-NEXT:    vle64.v v24, (a4), v0.t
-; CHECK-NEXT:    bltu a2, a6, .LBB32_10
+; CHECK-NEXT:    vle64.v v24, (a5), v0.t
+; CHECK-NEXT:    bltu a2, a4, .LBB32_10
 ; CHECK-NEXT:  # %bb.9:
 ; CHECK-NEXT:    li a2, 16
 ; CHECK-NEXT:  .LBB32_10:
diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
index 78dc3900ebe2c..8ac3073199689 100644
--- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
@@ -27,13 +27,13 @@ define signext i32 @foo(i32 signext %aa) #0 {
 ; CHECK-NEXT:    lw a6, 24(s1)
 ; CHECK-NEXT:    lw a7, 20(s1)
 ; CHECK-NEXT:    lw t1, 16(s1)
-; CHECK-NEXT:    lw t2, 12(s1)
-; CHECK-NEXT:    lw a1, 8(s1)
+; CHECK-NEXT:    lw a1, 12(s1)
+; CHECK-NEXT:    lw t2, 8(s1)
 ; CHECK-NEXT:    sw a0, 52(s1)
 ; CHECK-NEXT:    sw a0, 48(s1)
 ; CHECK-NEXT:    addi sp, sp, -32
-; CHECK-NEXT:    sd a1, 16(sp)
-; CHECK-NEXT:    sd t2, 8(sp)
+; CHECK-NEXT:    sd t2, 16(sp)
+; CHECK-NEXT:    sd a1, 8(sp)
 ; CHECK-NEXT:    addi a1, s1, 48
 ; CHECK-NEXT:    sd t1, 0(sp)
 ; CHECK-NEXT:    mv a0, t0
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index 7f4d2ec0b8f1e..2acb185d59ead 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -257,33 +257,33 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_mul_scalable(i32* nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_mul_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a7, a2, 1
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    srli a2, a6, 1
 ; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a7, .LBB7_2
+; CHECK-NEXT:    bgeu a3, a2, .LBB7_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB7_5
 ; CHECK-NEXT:  .LBB7_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a3, a7
-; CHECK-NEXT:    sub t0, a3, a6
-; CHECK-NEXT:    slli a4, a2, 1
-; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
+; CHECK-NEXT:    slli a6, a6, 1
+; CHECK-NEXT:    mv a7, a0
 ; CHECK-NEXT:  .LBB7_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a2)
-; CHECK-NEXT:    vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a7)
+; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vmul.vx v8, v8, a1
-; CHECK-NEXT:    vs2r.v v8, (a2)
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    bne a5, t0, .LBB7_3
+; CHECK-NEXT:    vs2r.v v8, (a7)
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a7, a7, a6
+; CHECK-NEXT:    bne a5, a3, .LBB7_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB7_7
+; CHECK-NEXT:    beqz a4, .LBB7_7
 ; CHECK-NEXT:  .LBB7_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, t0, -1024
-; CHECK-NEXT:    slli a3, t0, 2
+; CHECK-NEXT:    addi a2, a3, -1024
+; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB7_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -350,33 +350,33 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_add_scalable(i32* nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_add_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a7, a2, 1
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    srli a2, a6, 1
 ; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a7, .LBB8_2
+; CHECK-NEXT:    bgeu a3, a2, .LBB8_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB8_5
 ; CHECK-NEXT:  .LBB8_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a3, a7
-; CHECK-NEXT:    sub t0, a3, a6
-; CHECK-NEXT:    slli a4, a2, 1
-; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
+; CHECK-NEXT:    slli a6, a6, 1
+; CHECK-NEXT:    mv a7, a0
 ; CHECK-NEXT:  .LBB8_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a2)
-; CHECK-NEXT:    vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a7)
+; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vadd.vx v8, v8, a1
-; CHECK-NEXT:    vs2r.v v8, (a2)
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    bne a5, t0, .LBB8_3
+; CHECK-NEXT:    vs2r.v v8, (a7)
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a7, a7, a6
+; CHECK-NEXT:    bne a5, a3, .LBB8_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB8_7
+; CHECK-NEXT:    beqz a4, .LBB8_7
 ; CHECK-NEXT:  .LBB8_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, t0, -1024
-; CHECK-NEXT:    slli a3, t0, 2
+; CHECK-NEXT:    addi a2, a3, -1024
+; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB8_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -443,33 +443,33 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_sub_scalable(i32* nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_sub_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a7, a2, 1
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    srli a2, a6, 1
 ; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a7, .LBB9_2
+; CHECK-NEXT:    bgeu a3, a2, .LBB9_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB9_5
 ; CHECK-NEXT:  .LBB9_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a3, a7
-; CHECK-NEXT:    sub t0, a3, a6
-; CHECK-NEXT:    slli a4, a2, 1
-; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
+; CHECK-NEXT:    slli a6, a6, 1
+; CHECK-NEXT:    mv a7, a0
 ; CHECK-NEXT:  .LBB9_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a2)
-; CHECK-NEXT:    vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a7)
+; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vsub.vx v8, v8, a1
-; CHECK-NEXT:    vs2r.v v8, (a2)
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    bne a5, t0, .LBB9_3
+; CHECK-NEXT:    vs2r.v v8, (a7)
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a7, a7, a6
+; CHECK-NEXT:    bne a5, a3, .LBB9_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB9_7
+; CHECK-NEXT:    beqz a4, .LBB9_7
 ; CHECK-NEXT:  .LBB9_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, t0, -1024
-; CHECK-NEXT:    slli a3, t0, 2
+; CHECK-NEXT:    addi a2, a3, -1024
+; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB9_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -536,33 +536,33 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_rsub_scalable(i32* nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_rsub_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a7, a2, 1
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    srli a2, a6, 1
 ; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a7, .LBB10_2
+; CHECK-NEXT:    bgeu a3, a2, .LBB10_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB10_5
 ; CHECK-NEXT:  .LBB10_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a3, a7
-; CHECK-NEXT:    sub t0, a3, a6
-; CHECK-NEXT:    slli a4, a2, 1
-; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
+; CHECK-NEXT:    slli a6, a6, 1
+; CHECK-NEXT:    mv a7, a0
 ; CHECK-NEXT:  .LBB10_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a2)
-; CHECK-NEXT:    vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a7)
+; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vrsub.vx v8, v8, a1
-; CHECK-NEXT:    vs2r.v v8, (a2)
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    bne a5, t0, .LBB10_3
+; CHECK-NEXT:    vs2r.v v8, (a7)
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a7, a7, a6
+; CHECK-NEXT:    bne a5, a3, .LBB10_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB10_7
+; CHECK-NEXT:    beqz a4, .LBB10_7
 ; CHECK-NEXT:  .LBB10_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, t0, -1024
-; CHECK-NEXT:    slli a3, t0, 2
+; CHECK-NEXT:    addi a2, a3, -1024
+; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB10_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -629,33 +629,33 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_and_scalable(i32* nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_and_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a7, a2, 1
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    srli a2, a6, 1
 ; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a7, .LBB11_2
+; CHECK-NEXT:    bgeu a3, a2, .LBB11_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB11_5
 ; CHECK-NEXT:  .LBB11_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a3, a7
-; CHECK-NEXT:    sub t0, a3, a6
-; CHECK-NEXT:    slli a4, a2, 1
-; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
+; CHECK-NEXT:    slli a6, a6, 1
+; CHECK-NEXT:    mv a7, a0
 ; CHECK-NEXT:  .LBB11_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a2)
-; CHECK-NEXT:    vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a7)
+; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vand.vx v8, v8, a1
-; CHECK-NEXT:    vs2r.v v8, (a2)
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    bne a5, t0, .LBB11_3
+; CHECK-NEXT:    vs2r.v v8, (a7)
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a7, a7, a6
+; CHECK-NEXT:    bne a5, a3, .LBB11_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB11_7
+; CHECK-NEXT:    beqz a4, .LBB11_7
 ; CHECK-NEXT:  .LBB11_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, t0, -1024
-; CHECK-NEXT:    slli a3, t0, 2
+; CHECK-NEXT:    addi a2, a3, -1024
+; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB11_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -722,33 +722,33 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_or_scalable(i32* nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_or_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a7, a2, 1
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    srli a2, a6, 1
 ; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a7, .LBB12_2
+; CHECK-NEXT:    bgeu a3, a2, .LBB12_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB12_5
 ; CHECK-NEXT:  .LBB12_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a3, a7
-; CHECK-NEXT:    sub t0, a3, a6
-; CHECK-NEXT:    slli a4, a2, 1
-; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
+; CHECK-NEXT:    slli a6, a6, 1
+; CHECK-NEXT:    mv a7, a0
 ; CHECK-NEXT:  .LBB12_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a2)
-; CHECK-NEXT:    vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a7)
+; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vor.vx v8, v8, a1
-; CHECK-NEXT:    vs2r.v v8, (a2)
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    bne a5, t0, .LBB12_3
+; CHECK-NEXT:    vs2r.v v8, (a7)
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a7, a7, a6
+; CHECK-NEXT:    bne a5, a3, .LBB12_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB12_7
+; CHECK-NEXT:    beqz a4, .LBB12_7
 ; CHECK-NEXT:  .LBB12_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, t0, -1024
-; CHECK-NEXT:    slli a3, t0, 2
+; CHECK-NEXT:    addi a2, a3, -1024
+; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB12_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -815,33 +815,33 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_xor_scalable(i32* nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_xor_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a7, a2, 1
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    srli a2, a6, 1
 ; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a7, .LBB13_2
+; CHECK-NEXT:    bgeu a3, a2, .LBB13_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB13_5
 ; CHECK-NEXT:  .LBB13_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a3, a7
-; CHECK-NEXT:    sub t0, a3, a6
-; CHECK-NEXT:    slli a4, a2, 1
-; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
+; CHECK-NEXT:    slli a6, a6, 1
+; CHECK-NEXT:    mv a7, a0
 ; CHECK-NEXT:  .LBB13_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a2)
-; CHECK-NEXT:    vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a7)
+; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vxor.vx v8, v8, a1
-; CHECK-NEXT:    vs2r.v v8, (a2)
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    bne a5, t0, .LBB13_3
+; CHECK-NEXT:    vs2r.v v8, (a7)
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a7, a7, a6
+; CHECK-NEXT:    bne a5, a3, .LBB13_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB13_7
+; CHECK-NEXT:    beqz a4, .LBB13_7
 ; CHECK-NEXT:  .LBB13_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, t0, -1024
-; CHECK-NEXT:    slli a3, t0, 2
+; CHECK-NEXT:    addi a2, a3, -1024
+; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB13_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -1016,33 +1016,33 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_shl_scalable(i32* nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_shl_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a7, a2, 1
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    srli a2, a6, 1
 ; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a7, .LBB17_2
+; CHECK-NEXT:    bgeu a3, a2, .LBB17_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB17_5
 ; CHECK-NEXT:  .LBB17_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a3, a7
-; CHECK-NEXT:    sub t0, a3, a6
-; CHECK-NEXT:    slli a4, a2, 1
-; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
+; CHECK-NEXT:    slli a6, a6, 1
+; CHECK-NEXT:    mv a7, a0
 ; CHECK-NEXT:  .LBB17_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a2)
-; CHECK-NEXT:    vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a7)
+; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vsll.vx v8, v8, a1
-; CHECK-NEXT:    vs2r.v v8, (a2)
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    bne a5, t0, .LBB17_3
+; CHECK-NEXT:    vs2r.v v8, (a7)
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a7, a7, a6
+; CHECK-NEXT:    bne a5, a3, .LBB17_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB17_7
+; CHECK-NEXT:    beqz a4, .LBB17_7
 ; CHECK-NEXT:  .LBB17_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, t0, -1024
-; CHECK-NEXT:    slli a3, t0, 2
+; CHECK-NEXT:    addi a2, a3, -1024
+; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB17_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -1109,33 +1109,33 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_lshr_scalable(i32* nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_lshr_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a7, a2, 1
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    srli a2, a6, 1
 ; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a7, .LBB18_2
+; CHECK-NEXT:    bgeu a3, a2, .LBB18_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB18_5
 ; CHECK-NEXT:  .LBB18_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a3, a7
-; CHECK-NEXT:    sub t0, a3, a6
-; CHECK-NEXT:    slli a4, a2, 1
-; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
+; CHECK-NEXT:    slli a6, a6, 1
+; CHECK-NEXT:    mv a7, a0
 ; CHECK-NEXT:  .LBB18_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a2)
-; CHECK-NEXT:    vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a7)
+; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vsrl.vx v8, v8, a1
-; CHECK-NEXT:    vs2r.v v8, (a2)
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    bne a5, t0, .LBB18_3
+; CHECK-NEXT:    vs2r.v v8, (a7)
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a7, a7, a6
+; CHECK-NEXT:    bne a5, a3, .LBB18_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB18_7
+; CHECK-NEXT:    beqz a4, .LBB18_7
 ; CHECK-NEXT:  .LBB18_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, t0, -1024
-; CHECK-NEXT:    slli a3, t0, 2
+; CHECK-NEXT:    addi a2, a3, -1024
+; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB18_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -1202,30 +1202,30 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_ashr_scalable(i32* nocapture %a) {
 ; CHECK-LABEL: sink_splat_ashr_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    srli a7, a3, 1
+; CHECK-NEXT:    csrr a5, vlenb
+; CHECK-NEXT:    srli a1, a5, 1
 ; CHECK-NEXT:    li a2, 1024
-; CHECK-NEXT:    bgeu a2, a7, .LBB19_2
+; CHECK-NEXT:    bgeu a2, a1, .LBB19_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB19_5
 ; CHECK-NEXT:  .LBB19_2: # %vector.ph
 ; CHECK-NEXT:    li a4, 0
-; CHECK-NEXT:    remu a6, a2, a7
-; CHECK-NEXT:    sub a2, a2, a6
-; CHECK-NEXT:    slli a5, a3, 1
-; CHECK-NEXT:    mv a3, a0
+; CHECK-NEXT:    remu a3, a2, a1
+; CHECK-NEXT:    sub a2, a2, a3
+; CHECK-NEXT:    slli a5, a5, 1
+; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB19_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a3)
-; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a6)
+; CHECK-NEXT:    vsetvli a7, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vsra.vi v8, v8, 2
-; CHECK-NEXT:    vs2r.v v8, (a3)
-; CHECK-NEXT:    add a4, a4, a7
-; CHECK-NEXT:    add a3, a3, a5
+; CHECK-NEXT:    vs2r.v v8, (a6)
+; CHECK-NEXT:    add a4, a4, a1
+; CHECK-NEXT:    add a6, a6, a5
 ; CHECK-NEXT:    bne a4, a2, .LBB19_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB19_7
+; CHECK-NEXT:    beqz a3, .LBB19_7
 ; CHECK-NEXT:  .LBB19_5: # %for.body.preheader
 ; CHECK-NEXT:    addi a1, a2, -1024
 ; CHECK-NEXT:    slli a2, a2, 2
@@ -1517,30 +1517,30 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_fmul_scalable(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fmul_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a7, vlenb
-; CHECK-NEXT:    srli a3, a7, 2
-; CHECK-NEXT:    li a4, 1024
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a3, a2, 2
+; CHECK-NEXT:    li a6, 1024
 ; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    bgeu a4, a3, .LBB26_2
+; CHECK-NEXT:    bgeu a6, a3, .LBB26_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 0
 ; CHECK-NEXT:    j .LBB26_5
 ; CHECK-NEXT:  .LBB26_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a4, a3
-; CHECK-NEXT:    sub a1, a4, a6
-; CHECK-NEXT:    mv a4, a0
+; CHECK-NEXT:    remu a4, a6, a3
+; CHECK-NEXT:    sub a1, a6, a4
+; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB26_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl1re32.v v8, (a4)
-; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vl1re32.v v8, (a6)
+; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vfmul.vf v8, v8, ft0
-; CHECK-NEXT:    vs1r.v v8, (a4)
+; CHECK-NEXT:    vs1r.v v8, (a6)
 ; CHECK-NEXT:    add a5, a5, a3
-; CHECK-NEXT:    add a4, a4, a7
+; CHECK-NEXT:    add a6, a6, a2
 ; CHECK-NEXT:    bne a5, a1, .LBB26_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB26_7
+; CHECK-NEXT:    beqz a4, .LBB26_7
 ; CHECK-NEXT:  .LBB26_5: # %for.body.preheader
 ; CHECK-NEXT:    addi a2, a1, -1024
 ; CHECK-NEXT:    slli a1, a1, 2
@@ -1610,30 +1610,30 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_fdiv_scalable(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fdiv_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a7, vlenb
-; CHECK-NEXT:    srli a3, a7, 2
-; CHECK-NEXT:    li a4, 1024
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a3, a2, 2
+; CHECK-NEXT:    li a6, 1024
 ; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    bgeu a4, a3, .LBB27_2
+; CHECK-NEXT:    bgeu a6, a3, .LBB27_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 0
 ; CHECK-NEXT:    j .LBB27_5
 ; CHECK-NEXT:  .LBB27_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a4, a3
-; CHECK-NEXT:    sub a1, a4, a6
-; CHECK-NEXT:    mv a4, a0
+; CHECK-NEXT:    remu a4, a6, a3
+; CHECK-NEXT:    sub a1, a6, a4
+; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB27_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl1re32.v v8, (a4)
-; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vl1re32.v v8, (a6)
+; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
-; CHECK-NEXT:    vs1r.v v8, (a4)
+; CHECK-NEXT:    vs1r.v v8, (a6)
 ; CHECK-NEXT:    add a5, a5, a3
-; CHECK-NEXT:    add a4, a4, a7
+; CHECK-NEXT:    add a6, a6, a2
 ; CHECK-NEXT:    bne a5, a1, .LBB27_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB27_7
+; CHECK-NEXT:    beqz a4, .LBB27_7
 ; CHECK-NEXT:  .LBB27_5: # %for.body.preheader
 ; CHECK-NEXT:    addi a2, a1, -1024
 ; CHECK-NEXT:    slli a1, a1, 2
@@ -1703,30 +1703,30 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_frdiv_scalable(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_frdiv_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a7, vlenb
-; CHECK-NEXT:    srli a3, a7, 2
-; CHECK-NEXT:    li a4, 1024
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a3, a2, 2
+; CHECK-NEXT:    li a6, 1024
 ; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    bgeu a4, a3, .LBB28_2
+; CHECK-NEXT:    bgeu a6, a3, .LBB28_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 0
 ; CHECK-NEXT:    j .LBB28_5
 ; CHECK-NEXT:  .LBB28_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a4, a3
-; CHECK-NEXT:    sub a1, a4, a6
-; CHECK-NEXT:    mv a4, a0
+; CHECK-NEXT:    remu a4, a6, a3
+; CHECK-NEXT:    sub a1, a6, a4
+; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB28_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl1re32.v v8, (a4)
-; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vl1re32.v v8, (a6)
+; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
-; CHECK-NEXT:    vs1r.v v8, (a4)
+; CHECK-NEXT:    vs1r.v v8, (a6)
 ; CHECK-NEXT:    add a5, a5, a3
-; CHECK-NEXT:    add a4, a4, a7
+; CHECK-NEXT:    add a6, a6, a2
 ; CHECK-NEXT:    bne a5, a1, .LBB28_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB28_7
+; CHECK-NEXT:    beqz a4, .LBB28_7
 ; CHECK-NEXT:  .LBB28_5: # %for.body.preheader
 ; CHECK-NEXT:    addi a2, a1, -1024
 ; CHECK-NEXT:    slli a1, a1, 2
@@ -1796,30 +1796,30 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_fadd_scalable(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fadd_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a7, vlenb
-; CHECK-NEXT:    srli a3, a7, 2
-; CHECK-NEXT:    li a4, 1024
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a3, a2, 2
+; CHECK-NEXT:    li a6, 1024
 ; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    bgeu a4, a3, .LBB29_2
+; CHECK-NEXT:    bgeu a6, a3, .LBB29_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 0
 ; CHECK-NEXT:    j .LBB29_5
 ; CHECK-NEXT:  .LBB29_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a4, a3
-; CHECK-NEXT:    sub a1, a4, a6
-; CHECK-NEXT:    mv a4, a0
+; CHECK-NEXT:    remu a4, a6, a3
+; CHECK-NEXT:    sub a1, a6, a4
+; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB29_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl1re32.v v8, (a4)
-; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vl1re32.v v8, (a6)
+; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vfadd.vf v8, v8, ft0
-; CHECK-NEXT:    vs1r.v v8, (a4)
+; CHECK-NEXT:    vs1r.v v8, (a6)
 ; CHECK-NEXT:    add a5, a5, a3
-; CHECK-NEXT:    add a4, a4, a7
+; CHECK-NEXT:    add a6, a6, a2
 ; CHECK-NEXT:    bne a5, a1, .LBB29_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB29_7
+; CHECK-NEXT:    beqz a4, .LBB29_7
 ; CHECK-NEXT:  .LBB29_5: # %for.body.preheader
 ; CHECK-NEXT:    addi a2, a1, -1024
 ; CHECK-NEXT:    slli a1, a1, 2
@@ -1889,30 +1889,30 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_fsub_scalable(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_fsub_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a7, vlenb
-; CHECK-NEXT:    srli a3, a7, 2
-; CHECK-NEXT:    li a4, 1024
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a3, a2, 2
+; CHECK-NEXT:    li a6, 1024
 ; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    bgeu a4, a3, .LBB30_2
+; CHECK-NEXT:    bgeu a6, a3, .LBB30_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 0
 ; CHECK-NEXT:    j .LBB30_5
 ; CHECK-NEXT:  .LBB30_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a4, a3
-; CHECK-NEXT:    sub a1, a4, a6
-; CHECK-NEXT:    mv a4, a0
+; CHECK-NEXT:    remu a4, a6, a3
+; CHECK-NEXT:    sub a1, a6, a4
+; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB30_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl1re32.v v8, (a4)
-; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vl1re32.v v8, (a6)
+; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vfsub.vf v8, v8, ft0
-; CHECK-NEXT:    vs1r.v v8, (a4)
+; CHECK-NEXT:    vs1r.v v8, (a6)
 ; CHECK-NEXT:    add a5, a5, a3
-; CHECK-NEXT:    add a4, a4, a7
+; CHECK-NEXT:    add a6, a6, a2
 ; CHECK-NEXT:    bne a5, a1, .LBB30_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB30_7
+; CHECK-NEXT:    beqz a4, .LBB30_7
 ; CHECK-NEXT:  .LBB30_5: # %for.body.preheader
 ; CHECK-NEXT:    addi a2, a1, -1024
 ; CHECK-NEXT:    slli a1, a1, 2
@@ -1982,30 +1982,30 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_frsub_scalable(float* nocapture %a, float %x) {
 ; CHECK-LABEL: sink_splat_frsub_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a7, vlenb
-; CHECK-NEXT:    srli a3, a7, 2
-; CHECK-NEXT:    li a4, 1024
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    srli a3, a2, 2
+; CHECK-NEXT:    li a6, 1024
 ; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    bgeu a4, a3, .LBB31_2
+; CHECK-NEXT:    bgeu a6, a3, .LBB31_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    li a1, 0
 ; CHECK-NEXT:    j .LBB31_5
 ; CHECK-NEXT:  .LBB31_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a4, a3
-; CHECK-NEXT:    sub a1, a4, a6
-; CHECK-NEXT:    mv a4, a0
+; CHECK-NEXT:    remu a4, a6, a3
+; CHECK-NEXT:    sub a1, a6, a4
+; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB31_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl1re32.v v8, (a4)
-; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vl1re32.v v8, (a6)
+; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
-; CHECK-NEXT:    vs1r.v v8, (a4)
+; CHECK-NEXT:    vs1r.v v8, (a6)
 ; CHECK-NEXT:    add a5, a5, a3
-; CHECK-NEXT:    add a4, a4, a7
+; CHECK-NEXT:    add a6, a6, a2
 ; CHECK-NEXT:    bne a5, a1, .LBB31_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB31_7
+; CHECK-NEXT:    beqz a4, .LBB31_7
 ; CHECK-NEXT:  .LBB31_5: # %for.body.preheader
 ; CHECK-NEXT:    addi a2, a1, -1024
 ; CHECK-NEXT:    slli a1, a1, 2
@@ -2159,36 +2159,36 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_fma_scalable(float* noalias nocapture %a, float* noalias nocapture readonly %b, float %x) {
 ; CHECK-LABEL: sink_splat_fma_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a7, vlenb
-; CHECK-NEXT:    srli t1, a7, 2
+; CHECK-NEXT:    csrr a3, vlenb
+; CHECK-NEXT:    srli a4, a3, 2
 ; CHECK-NEXT:    li t0, 1024
 ; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    bgeu t0, t1, .LBB34_2
+; CHECK-NEXT:    bgeu t0, a4, .LBB34_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB34_5
 ; CHECK-NEXT:  .LBB34_2: # %vector.ph
-; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    li a3, 0
-; CHECK-NEXT:    remu a6, t0, t1
-; CHECK-NEXT:    sub t0, t0, a6
+; CHECK-NEXT:    li a6, 0
+; CHECK-NEXT:    li a7, 0
+; CHECK-NEXT:    remu a5, t0, a4
+; CHECK-NEXT:    sub a2, t0, a5
 ; CHECK-NEXT:  .LBB34_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    add a2, a0, a5
-; CHECK-NEXT:    vl1re32.v v8, (a2)
-; CHECK-NEXT:    add a4, a1, a5
-; CHECK-NEXT:    vl1re32.v v9, (a4)
-; CHECK-NEXT:    vsetvli a4, zero, e32, m1, ta, mu
+; CHECK-NEXT:    add t0, a0, a6
+; CHECK-NEXT:    vl1re32.v v8, (t0)
+; CHECK-NEXT:    add t1, a1, a6
+; CHECK-NEXT:    vl1re32.v v9, (t1)
+; CHECK-NEXT:    vsetvli t1, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vfmacc.vf v9, ft0, v8
-; CHECK-NEXT:    vs1r.v v9, (a2)
-; CHECK-NEXT:    add a3, a3, t1
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    bne a3, t0, .LBB34_3
+; CHECK-NEXT:    vs1r.v v9, (t0)
+; CHECK-NEXT:    add a7, a7, a4
+; CHECK-NEXT:    add a6, a6, a3
+; CHECK-NEXT:    bne a7, a2, .LBB34_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB34_7
+; CHECK-NEXT:    beqz a5, .LBB34_7
 ; CHECK-NEXT:  .LBB34_5: # %for.body.preheader
-; CHECK-NEXT:    addi a3, t0, -1024
-; CHECK-NEXT:    slli a2, t0, 2
+; CHECK-NEXT:    addi a3, a2, -1024
+; CHECK-NEXT:    slli a2, a2, 2
 ; CHECK-NEXT:    add a1, a1, a2
 ; CHECK-NEXT:    add a0, a0, a2
 ; CHECK-NEXT:  .LBB34_6: # %for.body
@@ -2263,36 +2263,36 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_fma_commute_scalable(float* noalias nocapture %a, float* noalias nocapture readonly %b, float %x) {
 ; CHECK-LABEL: sink_splat_fma_commute_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a7, vlenb
-; CHECK-NEXT:    srli t1, a7, 2
+; CHECK-NEXT:    csrr a3, vlenb
+; CHECK-NEXT:    srli a4, a3, 2
 ; CHECK-NEXT:    li t0, 1024
 ; CHECK-NEXT:    fmv.w.x ft0, a2
-; CHECK-NEXT:    bgeu t0, t1, .LBB35_2
+; CHECK-NEXT:    bgeu t0, a4, .LBB35_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a2, 0
 ; CHECK-NEXT:    j .LBB35_5
 ; CHECK-NEXT:  .LBB35_2: # %vector.ph
-; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    li a3, 0
-; CHECK-NEXT:    remu a6, t0, t1
-; CHECK-NEXT:    sub t0, t0, a6
+; CHECK-NEXT:    li a6, 0
+; CHECK-NEXT:    li a7, 0
+; CHECK-NEXT:    remu a5, t0, a4
+; CHECK-NEXT:    sub a2, t0, a5
 ; CHECK-NEXT:  .LBB35_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    add a2, a0, a5
-; CHECK-NEXT:    vl1re32.v v8, (a2)
-; CHECK-NEXT:    add a4, a1, a5
-; CHECK-NEXT:    vl1re32.v v9, (a4)
-; CHECK-NEXT:    vsetvli a4, zero, e32, m1, ta, mu
+; CHECK-NEXT:    add t0, a0, a6
+; CHECK-NEXT:    vl1re32.v v8, (t0)
+; CHECK-NEXT:    add t1, a1, a6
+; CHECK-NEXT:    vl1re32.v v9, (t1)
+; CHECK-NEXT:    vsetvli t1, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vfmacc.vf v9, ft0, v8
-; CHECK-NEXT:    vs1r.v v9, (a2)
-; CHECK-NEXT:    add a3, a3, t1
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    bne a3, t0, .LBB35_3
+; CHECK-NEXT:    vs1r.v v9, (t0)
+; CHECK-NEXT:    add a7, a7, a4
+; CHECK-NEXT:    add a6, a6, a3
+; CHECK-NEXT:    bne a7, a2, .LBB35_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB35_7
+; CHECK-NEXT:    beqz a5, .LBB35_7
 ; CHECK-NEXT:  .LBB35_5: # %for.body.preheader
-; CHECK-NEXT:    addi a3, t0, -1024
-; CHECK-NEXT:    slli a2, t0, 2
+; CHECK-NEXT:    addi a3, a2, -1024
+; CHECK-NEXT:    slli a2, a2, 2
 ; CHECK-NEXT:    add a1, a1, a2
 ; CHECK-NEXT:    add a0, a0, a2
 ; CHECK-NEXT:  .LBB35_6: # %for.body
@@ -2593,33 +2593,33 @@ for.cond.cleanup:                                 ; preds = %vector.body
 define void @sink_splat_udiv_scalable(i32* nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_udiv_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a7, a2, 1
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    srli a2, a6, 1
 ; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a7, .LBB42_2
+; CHECK-NEXT:    bgeu a3, a2, .LBB42_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB42_5
 ; CHECK-NEXT:  .LBB42_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a3, a7
-; CHECK-NEXT:    sub t0, a3, a6
-; CHECK-NEXT:    slli a4, a2, 1
-; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
+; CHECK-NEXT:    slli a6, a6, 1
+; CHECK-NEXT:    mv a7, a0
 ; CHECK-NEXT:  .LBB42_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a2)
-; CHECK-NEXT:    vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a7)
+; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vdivu.vx v8, v8, a1
-; CHECK-NEXT:    vs2r.v v8, (a2)
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    bne a5, t0, .LBB42_3
+; CHECK-NEXT:    vs2r.v v8, (a7)
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a7, a7, a6
+; CHECK-NEXT:    bne a5, a3, .LBB42_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB42_7
+; CHECK-NEXT:    beqz a4, .LBB42_7
 ; CHECK-NEXT:  .LBB42_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, t0, -1024
-; CHECK-NEXT:    slli a3, t0, 2
+; CHECK-NEXT:    addi a2, a3, -1024
+; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB42_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -2686,33 +2686,33 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_sdiv_scalable(i32* nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_sdiv_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a7, a2, 1
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    srli a2, a6, 1
 ; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a7, .LBB43_2
+; CHECK-NEXT:    bgeu a3, a2, .LBB43_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB43_5
 ; CHECK-NEXT:  .LBB43_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a3, a7
-; CHECK-NEXT:    sub t0, a3, a6
-; CHECK-NEXT:    slli a4, a2, 1
-; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
+; CHECK-NEXT:    slli a6, a6, 1
+; CHECK-NEXT:    mv a7, a0
 ; CHECK-NEXT:  .LBB43_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a2)
-; CHECK-NEXT:    vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a7)
+; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vdiv.vx v8, v8, a1
-; CHECK-NEXT:    vs2r.v v8, (a2)
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    bne a5, t0, .LBB43_3
+; CHECK-NEXT:    vs2r.v v8, (a7)
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a7, a7, a6
+; CHECK-NEXT:    bne a5, a3, .LBB43_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB43_7
+; CHECK-NEXT:    beqz a4, .LBB43_7
 ; CHECK-NEXT:  .LBB43_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, t0, -1024
-; CHECK-NEXT:    slli a3, t0, 2
+; CHECK-NEXT:    addi a2, a3, -1024
+; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB43_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -2779,33 +2779,33 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_urem_scalable(i32* nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_urem_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a7, a2, 1
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    srli a2, a6, 1
 ; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a7, .LBB44_2
+; CHECK-NEXT:    bgeu a3, a2, .LBB44_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB44_5
 ; CHECK-NEXT:  .LBB44_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a3, a7
-; CHECK-NEXT:    sub t0, a3, a6
-; CHECK-NEXT:    slli a4, a2, 1
-; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
+; CHECK-NEXT:    slli a6, a6, 1
+; CHECK-NEXT:    mv a7, a0
 ; CHECK-NEXT:  .LBB44_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a2)
-; CHECK-NEXT:    vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a7)
+; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vremu.vx v8, v8, a1
-; CHECK-NEXT:    vs2r.v v8, (a2)
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    bne a5, t0, .LBB44_3
+; CHECK-NEXT:    vs2r.v v8, (a7)
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a7, a7, a6
+; CHECK-NEXT:    bne a5, a3, .LBB44_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB44_7
+; CHECK-NEXT:    beqz a4, .LBB44_7
 ; CHECK-NEXT:  .LBB44_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, t0, -1024
-; CHECK-NEXT:    slli a3, t0, 2
+; CHECK-NEXT:    addi a2, a3, -1024
+; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB44_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
@@ -2872,33 +2872,33 @@ for.body:                                         ; preds = %for.body.preheader,
 define void @sink_splat_srem_scalable(i32* nocapture %a, i32 signext %x) {
 ; CHECK-LABEL: sink_splat_srem_scalable:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    csrr a2, vlenb
-; CHECK-NEXT:    srli a7, a2, 1
+; CHECK-NEXT:    csrr a6, vlenb
+; CHECK-NEXT:    srli a2, a6, 1
 ; CHECK-NEXT:    li a3, 1024
-; CHECK-NEXT:    bgeu a3, a7, .LBB45_2
+; CHECK-NEXT:    bgeu a3, a2, .LBB45_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    li t0, 0
+; CHECK-NEXT:    li a3, 0
 ; CHECK-NEXT:    j .LBB45_5
 ; CHECK-NEXT:  .LBB45_2: # %vector.ph
 ; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    remu a6, a3, a7
-; CHECK-NEXT:    sub t0, a3, a6
-; CHECK-NEXT:    slli a4, a2, 1
-; CHECK-NEXT:    mv a2, a0
+; CHECK-NEXT:    remu a4, a3, a2
+; CHECK-NEXT:    sub a3, a3, a4
+; CHECK-NEXT:    slli a6, a6, 1
+; CHECK-NEXT:    mv a7, a0
 ; CHECK-NEXT:  .LBB45_3: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vl2re32.v v8, (a2)
-; CHECK-NEXT:    vsetvli a3, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vl2re32.v v8, (a7)
+; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vrem.vx v8, v8, a1
-; CHECK-NEXT:    vs2r.v v8, (a2)
-; CHECK-NEXT:    add a5, a5, a7
-; CHECK-NEXT:    add a2, a2, a4
-; CHECK-NEXT:    bne a5, t0, .LBB45_3
+; CHECK-NEXT:    vs2r.v v8, (a7)
+; CHECK-NEXT:    add a5, a5, a2
+; CHECK-NEXT:    add a7, a7, a6
+; CHECK-NEXT:    bne a5, a3, .LBB45_3
 ; CHECK-NEXT:  # %bb.4: # %middle.block
-; CHECK-NEXT:    beqz a6, .LBB45_7
+; CHECK-NEXT:    beqz a4, .LBB45_7
 ; CHECK-NEXT:  .LBB45_5: # %for.body.preheader
-; CHECK-NEXT:    addi a2, t0, -1024
-; CHECK-NEXT:    slli a3, t0, 2
+; CHECK-NEXT:    addi a2, a3, -1024
+; CHECK-NEXT:    slli a3, a3, 2
 ; CHECK-NEXT:    add a0, a0, a3
 ; CHECK-NEXT:  .LBB45_6: # %for.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
index f5cb93cc7da40..e909ffe6a025a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
@@ -495,48 +495,48 @@ define <vscale x 16 x double> @vpload_nxv17f64(<vscale x 17 x double>* %ptr, <vs
 ; CHECK-LABEL: vpload_nxv17f64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a7, a3, 1
+; CHECK-NEXT:    slli a5, a3, 1
 ; CHECK-NEXT:    vmv1r.v v8, v0
-; CHECK-NEXT:    mv t0, a2
-; CHECK-NEXT:    bltu a2, a7, .LBB38_2
+; CHECK-NEXT:    mv a4, a2
+; CHECK-NEXT:    bltu a2, a5, .LBB38_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv t0, a7
+; CHECK-NEXT:    mv a4, a5
 ; CHECK-NEXT:  .LBB38_2:
-; CHECK-NEXT:    sub a5, t0, a3
+; CHECK-NEXT:    sub a7, a4, a3
 ; CHECK-NEXT:    li a6, 0
-; CHECK-NEXT:    bltu t0, a5, .LBB38_4
+; CHECK-NEXT:    bltu a4, a7, .LBB38_4
 ; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    mv a6, a5
+; CHECK-NEXT:    mv a6, a7
 ; CHECK-NEXT:  .LBB38_4:
-; CHECK-NEXT:    li a5, 0
-; CHECK-NEXT:    srli t1, a3, 3
-; CHECK-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vslidedown.vx v0, v8, t1
-; CHECK-NEXT:    slli a4, a3, 3
-; CHECK-NEXT:    add a4, a0, a4
+; CHECK-NEXT:    li a7, 0
+; CHECK-NEXT:    srli t0, a3, 3
+; CHECK-NEXT:    vsetvli t1, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vslidedown.vx v0, v8, t0
+; CHECK-NEXT:    slli t0, a3, 3
+; CHECK-NEXT:    add t0, a0, t0
 ; CHECK-NEXT:    vsetvli zero, a6, e64, m8, ta, mu
-; CHECK-NEXT:    vle64.v v16, (a4), v0.t
+; CHECK-NEXT:    vle64.v v16, (t0), v0.t
 ; CHECK-NEXT:    srli a6, a3, 2
-; CHECK-NEXT:    sub a4, a2, a7
-; CHECK-NEXT:    slli a7, a3, 4
-; CHECK-NEXT:    bltu a2, a4, .LBB38_6
+; CHECK-NEXT:    sub t0, a2, a5
+; CHECK-NEXT:    slli a5, a3, 4
+; CHECK-NEXT:    bltu a2, t0, .LBB38_6
 ; CHECK-NEXT:  # %bb.5:
-; CHECK-NEXT:    mv a5, a4
+; CHECK-NEXT:    mv a7, t0
 ; CHECK-NEXT:  .LBB38_6:
 ; CHECK-NEXT:    vsetvli a2, zero, e8, mf2, ta, mu
 ; CHECK-NEXT:    vslidedown.vx v0, v8, a6
-; CHECK-NEXT:    add a2, a0, a7
-; CHECK-NEXT:    bltu a5, a3, .LBB38_8
+; CHECK-NEXT:    add a2, a0, a5
+; CHECK-NEXT:    bltu a7, a3, .LBB38_8
 ; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mv a5, a3
+; CHECK-NEXT:    mv a7, a3
 ; CHECK-NEXT:  .LBB38_8:
-; CHECK-NEXT:    vsetvli zero, a5, e64, m8, ta, mu
+; CHECK-NEXT:    vsetvli zero, a7, e64, m8, ta, mu
 ; CHECK-NEXT:    vle64.v v24, (a2), v0.t
-; CHECK-NEXT:    bltu t0, a3, .LBB38_10
+; CHECK-NEXT:    bltu a4, a3, .LBB38_10
 ; CHECK-NEXT:  # %bb.9:
-; CHECK-NEXT:    mv t0, a3
+; CHECK-NEXT:    mv a4, a3
 ; CHECK-NEXT:  .LBB38_10:
-; CHECK-NEXT:    vsetvli zero, t0, e64, m8, ta, mu
+; CHECK-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
 ; CHECK-NEXT:    vmv1r.v v0, v8
 ; CHECK-NEXT:    vle64.v v8, (a0), v0.t
 ; CHECK-NEXT:    vs1r.v v24, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
index 76985c72555c4..60d9d6c4faedc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
@@ -408,50 +408,50 @@ define void @vpstore_nxv17f64(<vscale x 17 x double> %val, <vscale x 17 x double
 ; CHECK-NEXT:    slli a3, a3, 3
 ; CHECK-NEXT:    sub sp, sp, a3
 ; CHECK-NEXT:    csrr a3, vlenb
-; CHECK-NEXT:    slli a6, a3, 1
+; CHECK-NEXT:    slli a4, a3, 1
 ; CHECK-NEXT:    vmv1r.v v24, v0
-; CHECK-NEXT:    addi a4, sp, 16
-; CHECK-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
+; CHECK-NEXT:    addi a5, sp, 16
+; CHECK-NEXT:    vs8r.v v16, (a5) # Unknown-size Folded Spill
 ; CHECK-NEXT:    mv a5, a2
-; CHECK-NEXT:    bltu a2, a6, .LBB31_2
+; CHECK-NEXT:    bltu a2, a4, .LBB31_2
 ; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    mv a5, a6
+; CHECK-NEXT:    mv a5, a4
 ; CHECK-NEXT:  .LBB31_2:
-; CHECK-NEXT:    mv a4, a5
+; CHECK-NEXT:    mv a7, a5
 ; CHECK-NEXT:    bltu a5, a3, .LBB31_4
 ; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    mv a4, a3
+; CHECK-NEXT:    mv a7, a3
 ; CHECK-NEXT:  .LBB31_4:
-; CHECK-NEXT:    li a7, 0
+; CHECK-NEXT:    li a6, 0
 ; CHECK-NEXT:    vl8re64.v v16, (a0)
-; CHECK-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; CHECK-NEXT:    vsetvli zero, a7, e64, m8, ta, mu
 ; CHECK-NEXT:    sub a0, a5, a3
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vse64.v v8, (a1), v0.t
 ; CHECK-NEXT:    bltu a5, a0, .LBB31_6
 ; CHECK-NEXT:  # %bb.5:
-; CHECK-NEXT:    mv a7, a0
+; CHECK-NEXT:    mv a6, a0
 ; CHECK-NEXT:  .LBB31_6:
 ; CHECK-NEXT:    li a0, 0
-; CHECK-NEXT:    srli a4, a3, 3
-; CHECK-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vslidedown.vx v0, v24, a4
-; CHECK-NEXT:    slli a4, a3, 3
-; CHECK-NEXT:    add a4, a1, a4
-; CHECK-NEXT:    vsetvli zero, a7, e64, m8, ta, mu
-; CHECK-NEXT:    addi a5, sp, 16
-; CHECK-NEXT:    vl8re8.v v8, (a5) # Unknown-size Folded Reload
-; CHECK-NEXT:    vse64.v v8, (a4), v0.t
-; CHECK-NEXT:    srli a7, a3, 2
-; CHECK-NEXT:    sub a4, a2, a6
-; CHECK-NEXT:    slli a5, a3, 4
-; CHECK-NEXT:    bltu a2, a4, .LBB31_8
+; CHECK-NEXT:    srli a5, a3, 3
+; CHECK-NEXT:    vsetvli a7, zero, e8, mf4, ta, mu
+; CHECK-NEXT:    vslidedown.vx v0, v24, a5
+; CHECK-NEXT:    slli a5, a3, 3
+; CHECK-NEXT:    add a5, a1, a5
+; CHECK-NEXT:    vsetvli zero, a6, e64, m8, ta, mu
+; CHECK-NEXT:    addi a6, sp, 16
+; CHECK-NEXT:    vl8re8.v v8, (a6) # Unknown-size Folded Reload
+; CHECK-NEXT:    vse64.v v8, (a5), v0.t
+; CHECK-NEXT:    srli a5, a3, 2
+; CHECK-NEXT:    sub a6, a2, a4
+; CHECK-NEXT:    slli a4, a3, 4
+; CHECK-NEXT:    bltu a2, a6, .LBB31_8
 ; CHECK-NEXT:  # %bb.7:
-; CHECK-NEXT:    mv a0, a4
+; CHECK-NEXT:    mv a0, a6
 ; CHECK-NEXT:  .LBB31_8:
 ; CHECK-NEXT:    vsetvli a2, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vslidedown.vx v0, v24, a7
-; CHECK-NEXT:    add a1, a1, a5
+; CHECK-NEXT:    vslidedown.vx v0, v24, a5
+; CHECK-NEXT:    add a1, a1, a4
 ; CHECK-NEXT:    bltu a0, a3, .LBB31_10
 ; CHECK-NEXT:  # %bb.9:
 ; CHECK-NEXT:    mv a0, a3
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
index 997a8d67de712..aa725dbca9533 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
@@ -1185,7 +1185,7 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, <vscale x 32 x i32> %
 ; RV64-LABEL: vpreduce_umax_nxv32i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    srli a6, a3, 2
+; RV64-NEXT:    srli a2, a3, 2
 ; RV64-NEXT:    slli a4, a0, 32
 ; RV64-NEXT:    slli a0, a3, 1
 ; RV64-NEXT:    srli a3, a4, 32
@@ -1195,8 +1195,8 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, <vscale x 32 x i32> %
 ; RV64-NEXT:    mv a4, a0
 ; RV64-NEXT:  .LBB67_2:
 ; RV64-NEXT:    li a5, 0
-; RV64-NEXT:    vsetvli a2, zero, e8, mf2, ta, mu
-; RV64-NEXT:    vslidedown.vx v24, v0, a6
+; RV64-NEXT:    vsetvli a6, zero, e8, mf2, ta, mu
+; RV64-NEXT:    vslidedown.vx v24, v0, a2
 ; RV64-NEXT:    vsetivli zero, 1, e32, m1, ta, mu
 ; RV64-NEXT:    vmv.s.x v25, a3
 ; RV64-NEXT:    vsetvli zero, a4, e32, m8, tu, mu
diff --git a/llvm/test/CodeGen/RISCV/sadd_sat.ll b/llvm/test/CodeGen/RISCV/sadd_sat.ll
index ff40c03c10b0f..c9da3934b010d 100644
--- a/llvm/test/CodeGen/RISCV/sadd_sat.ll
+++ b/llvm/test/CodeGen/RISCV/sadd_sat.ll
@@ -160,12 +160,12 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; RV32IZbbZbt-NEXT:    add a0, a4, a0
 ; RV32IZbbZbt-NEXT:    srai a4, a0, 31
 ; RV32IZbbZbt-NEXT:    lui a5, 524288
-; RV32IZbbZbt-NEXT:    xor a6, a4, a5
-; RV32IZbbZbt-NEXT:    xor a5, a1, a0
+; RV32IZbbZbt-NEXT:    xor a5, a4, a5
+; RV32IZbbZbt-NEXT:    xor a6, a1, a0
 ; RV32IZbbZbt-NEXT:    xor a1, a1, a3
-; RV32IZbbZbt-NEXT:    andn a1, a5, a1
+; RV32IZbbZbt-NEXT:    andn a1, a6, a1
 ; RV32IZbbZbt-NEXT:    slti a3, a1, 0
-; RV32IZbbZbt-NEXT:    cmov a1, a3, a6, a0
+; RV32IZbbZbt-NEXT:    cmov a1, a3, a5, a0
 ; RV32IZbbZbt-NEXT:    cmov a0, a3, a4, a2
 ; RV32IZbbZbt-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll
index b266b22bd8012..47c904108126e 100644
--- a/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll
+++ b/llvm/test/CodeGen/RISCV/sadd_sat_plus.ll
@@ -168,13 +168,13 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
 ; RV32IZbbZbt-NEXT:    add a0, a2, a0
 ; RV32IZbbZbt-NEXT:    srai a2, a0, 31
 ; RV32IZbbZbt-NEXT:    lui a4, 524288
-; RV32IZbbZbt-NEXT:    xor a6, a2, a4
-; RV32IZbbZbt-NEXT:    xor a4, a1, a0
+; RV32IZbbZbt-NEXT:    xor a4, a2, a4
+; RV32IZbbZbt-NEXT:    xor a6, a1, a0
 ; RV32IZbbZbt-NEXT:    xor a1, a1, a5
-; RV32IZbbZbt-NEXT:    andn a1, a4, a1
-; RV32IZbbZbt-NEXT:    slti a4, a1, 0
-; RV32IZbbZbt-NEXT:    cmov a1, a4, a6, a0
-; RV32IZbbZbt-NEXT:    cmov a0, a4, a2, a3
+; RV32IZbbZbt-NEXT:    andn a1, a6, a1
+; RV32IZbbZbt-NEXT:    slti a5, a1, 0
+; RV32IZbbZbt-NEXT:    cmov a1, a5, a4, a0
+; RV32IZbbZbt-NEXT:    cmov a0, a5, a2, a3
 ; RV32IZbbZbt-NEXT:    ret
 ;
 ; RV64IZbbZbt-LABEL: func64:
diff --git a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll
index 495555c67e877..891a08a0ed495 100644
--- a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll
+++ b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll
@@ -65,20 +65,20 @@ define i128 @cmovcc128(i64 signext %a, i128 %b, i128 %c) nounwind {
 ; RV32I-NEXT:  .LBB1_2: # %entry
 ; RV32I-NEXT:    beqz a1, .LBB1_5
 ; RV32I-NEXT:  # %bb.3: # %entry
-; RV32I-NEXT:    addi a7, a4, 4
+; RV32I-NEXT:    addi a5, a4, 4
 ; RV32I-NEXT:    bnez a1, .LBB1_6
 ; RV32I-NEXT:  .LBB1_4:
-; RV32I-NEXT:    addi a5, a3, 8
+; RV32I-NEXT:    addi a6, a3, 8
 ; RV32I-NEXT:    j .LBB1_7
 ; RV32I-NEXT:  .LBB1_5:
-; RV32I-NEXT:    addi a7, a3, 4
+; RV32I-NEXT:    addi a5, a3, 4
 ; RV32I-NEXT:    beqz a1, .LBB1_4
 ; RV32I-NEXT:  .LBB1_6: # %entry
-; RV32I-NEXT:    addi a5, a4, 8
+; RV32I-NEXT:    addi a6, a4, 8
 ; RV32I-NEXT:  .LBB1_7: # %entry
-; RV32I-NEXT:    lw a6, 0(a2)
-; RV32I-NEXT:    lw a7, 0(a7)
-; RV32I-NEXT:    lw a2, 0(a5)
+; RV32I-NEXT:    lw a2, 0(a2)
+; RV32I-NEXT:    lw a5, 0(a5)
+; RV32I-NEXT:    lw a6, 0(a6)
 ; RV32I-NEXT:    beqz a1, .LBB1_9
 ; RV32I-NEXT:  # %bb.8: # %entry
 ; RV32I-NEXT:    addi a1, a4, 12
@@ -88,25 +88,25 @@ define i128 @cmovcc128(i64 signext %a, i128 %b, i128 %c) nounwind {
 ; RV32I-NEXT:  .LBB1_10: # %entry
 ; RV32I-NEXT:    lw a1, 0(a1)
 ; RV32I-NEXT:    sw a1, 12(a0)
-; RV32I-NEXT:    sw a2, 8(a0)
-; RV32I-NEXT:    sw a7, 4(a0)
-; RV32I-NEXT:    sw a6, 0(a0)
+; RV32I-NEXT:    sw a6, 8(a0)
+; RV32I-NEXT:    sw a5, 4(a0)
+; RV32I-NEXT:    sw a2, 0(a0)
 ; RV32I-NEXT:    ret
 ;
 ; RV32IBT-LABEL: cmovcc128:
 ; RV32IBT:       # %bb.0: # %entry
-; RV32IBT-NEXT:    addi a6, a3, 12
-; RV32IBT-NEXT:    addi a7, a4, 12
-; RV32IBT-NEXT:    addi t0, a3, 8
-; RV32IBT-NEXT:    addi t1, a4, 8
-; RV32IBT-NEXT:    addi t2, a3, 4
-; RV32IBT-NEXT:    addi a5, a4, 4
+; RV32IBT-NEXT:    addi a5, a3, 12
+; RV32IBT-NEXT:    addi a6, a4, 12
+; RV32IBT-NEXT:    addi a7, a3, 8
+; RV32IBT-NEXT:    addi t0, a4, 8
+; RV32IBT-NEXT:    addi t1, a3, 4
+; RV32IBT-NEXT:    addi t2, a4, 4
 ; RV32IBT-NEXT:    xori a1, a1, 123
 ; RV32IBT-NEXT:    or a1, a1, a2
 ; RV32IBT-NEXT:    cmov a2, a1, a4, a3
-; RV32IBT-NEXT:    cmov a3, a1, a5, t2
-; RV32IBT-NEXT:    cmov a4, a1, t1, t0
-; RV32IBT-NEXT:    cmov a1, a1, a7, a6
+; RV32IBT-NEXT:    cmov a3, a1, t2, t1
+; RV32IBT-NEXT:    cmov a4, a1, t0, a7
+; RV32IBT-NEXT:    cmov a1, a1, a6, a5
 ; RV32IBT-NEXT:    lw a1, 0(a1)
 ; RV32IBT-NEXT:    lw a4, 0(a4)
 ; RV32IBT-NEXT:    lw a3, 0(a3)
@@ -192,20 +192,20 @@ define i128 @cmov128(i1 %a, i128 %b, i128 %c) nounwind {
 ; RV32I-NEXT:  .LBB3_2: # %entry
 ; RV32I-NEXT:    bnez a1, .LBB3_5
 ; RV32I-NEXT:  # %bb.3: # %entry
-; RV32I-NEXT:    addi a7, a3, 4
+; RV32I-NEXT:    addi a5, a3, 4
 ; RV32I-NEXT:    beqz a1, .LBB3_6
 ; RV32I-NEXT:  .LBB3_4:
-; RV32I-NEXT:    addi a5, a2, 8
+; RV32I-NEXT:    addi a6, a2, 8
 ; RV32I-NEXT:    j .LBB3_7
 ; RV32I-NEXT:  .LBB3_5:
-; RV32I-NEXT:    addi a7, a2, 4
+; RV32I-NEXT:    addi a5, a2, 4
 ; RV32I-NEXT:    bnez a1, .LBB3_4
 ; RV32I-NEXT:  .LBB3_6: # %entry
-; RV32I-NEXT:    addi a5, a3, 8
+; RV32I-NEXT:    addi a6, a3, 8
 ; RV32I-NEXT:  .LBB3_7: # %entry
-; RV32I-NEXT:    lw a6, 0(a4)
-; RV32I-NEXT:    lw a7, 0(a7)
-; RV32I-NEXT:    lw a4, 0(a5)
+; RV32I-NEXT:    lw a4, 0(a4)
+; RV32I-NEXT:    lw a5, 0(a5)
+; RV32I-NEXT:    lw a6, 0(a6)
 ; RV32I-NEXT:    bnez a1, .LBB3_9
 ; RV32I-NEXT:  # %bb.8: # %entry
 ; RV32I-NEXT:    addi a1, a3, 12
@@ -215,26 +215,26 @@ define i128 @cmov128(i1 %a, i128 %b, i128 %c) nounwind {
 ; RV32I-NEXT:  .LBB3_10: # %entry
 ; RV32I-NEXT:    lw a1, 0(a1)
 ; RV32I-NEXT:    sw a1, 12(a0)
-; RV32I-NEXT:    sw a4, 8(a0)
-; RV32I-NEXT:    sw a7, 4(a0)
-; RV32I-NEXT:    sw a6, 0(a0)
+; RV32I-NEXT:    sw a6, 8(a0)
+; RV32I-NEXT:    sw a5, 4(a0)
+; RV32I-NEXT:    sw a4, 0(a0)
 ; RV32I-NEXT:    ret
 ;
 ; RV32IBT-LABEL: cmov128:
 ; RV32IBT:       # %bb.0: # %entry
-; RV32IBT-NEXT:    addi a6, a3, 12
-; RV32IBT-NEXT:    addi a7, a2, 12
-; RV32IBT-NEXT:    addi t0, a3, 8
-; RV32IBT-NEXT:    addi t1, a2, 8
-; RV32IBT-NEXT:    addi a4, a3, 4
-; RV32IBT-NEXT:    addi a5, a2, 4
+; RV32IBT-NEXT:    addi a4, a3, 12
+; RV32IBT-NEXT:    addi a5, a2, 12
+; RV32IBT-NEXT:    addi a6, a3, 8
+; RV32IBT-NEXT:    addi a7, a2, 8
+; RV32IBT-NEXT:    addi t0, a3, 4
+; RV32IBT-NEXT:    addi t1, a2, 4
 ; RV32IBT-NEXT:    andi a1, a1, 1
 ; RV32IBT-NEXT:    cmov a2, a1, a2, a3
-; RV32IBT-NEXT:    cmov a3, a1, a5, a4
-; RV32IBT-NEXT:    cmov a4, a1, t1, t0
-; RV32IBT-NEXT:    cmov a1, a1, a7, a6
+; RV32IBT-NEXT:    cmov a3, a1, t1, t0
+; RV32IBT-NEXT:    cmov a6, a1, a7, a6
+; RV32IBT-NEXT:    cmov a1, a1, a5, a4
 ; RV32IBT-NEXT:    lw a1, 0(a1)
-; RV32IBT-NEXT:    lw a4, 0(a4)
+; RV32IBT-NEXT:    lw a4, 0(a6)
 ; RV32IBT-NEXT:    lw a3, 0(a3)
 ; RV32IBT-NEXT:    lw a2, 0(a2)
 ; RV32IBT-NEXT:    sw a1, 12(a0)
@@ -476,17 +476,17 @@ entry:
 define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind {
 ; RV32I-LABEL: cmovdiffcc:
 ; RV32I:       # %bb.0: # %entry
-; RV32I-NEXT:    andi a0, a0, 1
-; RV32I-NEXT:    andi a1, a1, 1
-; RV32I-NEXT:    beqz a0, .LBB7_3
+; RV32I-NEXT:    andi a6, a0, 1
+; RV32I-NEXT:    andi a0, a1, 1
+; RV32I-NEXT:    beqz a6, .LBB7_3
 ; RV32I-NEXT:  # %bb.1: # %entry
-; RV32I-NEXT:    beqz a1, .LBB7_4
+; RV32I-NEXT:    beqz a0, .LBB7_4
 ; RV32I-NEXT:  .LBB7_2: # %entry
 ; RV32I-NEXT:    add a0, a2, a4
 ; RV32I-NEXT:    ret
 ; RV32I-NEXT:  .LBB7_3: # %entry
 ; RV32I-NEXT:    mv a2, a3
-; RV32I-NEXT:    bnez a1, .LBB7_2
+; RV32I-NEXT:    bnez a0, .LBB7_2
 ; RV32I-NEXT:  .LBB7_4: # %entry
 ; RV32I-NEXT:    mv a4, a5
 ; RV32I-NEXT:    add a0, a2, a4
@@ -503,17 +503,17 @@ define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind {
 ;
 ; RV64I-LABEL: cmovdiffcc:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    andi a0, a0, 1
-; RV64I-NEXT:    andi a1, a1, 1
-; RV64I-NEXT:    beqz a0, .LBB7_3
+; RV64I-NEXT:    andi a6, a0, 1
+; RV64I-NEXT:    andi a0, a1, 1
+; RV64I-NEXT:    beqz a6, .LBB7_3
 ; RV64I-NEXT:  # %bb.1: # %entry
-; RV64I-NEXT:    beqz a1, .LBB7_4
+; RV64I-NEXT:    beqz a0, .LBB7_4
 ; RV64I-NEXT:  .LBB7_2: # %entry
 ; RV64I-NEXT:    addw a0, a2, a4
 ; RV64I-NEXT:    ret
 ; RV64I-NEXT:  .LBB7_3: # %entry
 ; RV64I-NEXT:    mv a2, a3
-; RV64I-NEXT:    bnez a1, .LBB7_2
+; RV64I-NEXT:    bnez a0, .LBB7_2
 ; RV64I-NEXT:  .LBB7_4: # %entry
 ; RV64I-NEXT:    mv a4, a5
 ; RV64I-NEXT:    addw a0, a2, a4
diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll
index 868ab46acedc0..c580d26695e0b 100644
--- a/llvm/test/CodeGen/RISCV/sextw-removal.ll
+++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll
@@ -179,19 +179,19 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind {
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sraw a0, a0, a1
 ; RV64I-NEXT:    lui a1, 349525
-; RV64I-NEXT:    addiw s2, a1, 1365
+; RV64I-NEXT:    addiw s0, a1, 1365
 ; RV64I-NEXT:    lui a1, 209715
 ; RV64I-NEXT:    addiw s1, a1, 819
 ; RV64I-NEXT:    lui a1, 61681
-; RV64I-NEXT:    addiw s3, a1, -241
+; RV64I-NEXT:    addiw s2, a1, -241
 ; RV64I-NEXT:    lui a1, 4112
-; RV64I-NEXT:    addiw s0, a1, 257
+; RV64I-NEXT:    addiw s3, a1, 257
 ; RV64I-NEXT:  .LBB4_1: # %bb2
 ; RV64I-NEXT:    # =>This Inner Loop Header: Depth=1
 ; RV64I-NEXT:    call bar@plt
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    srli a0, a0, 1
-; RV64I-NEXT:    and a0, a0, s2
+; RV64I-NEXT:    and a0, a0, s0
 ; RV64I-NEXT:    subw a0, a1, a0
 ; RV64I-NEXT:    and a2, a0, s1
 ; RV64I-NEXT:    srli a0, a0, 2
@@ -199,8 +199,8 @@ define void @test5(i32 signext %arg, i32 signext %arg1) nounwind {
 ; RV64I-NEXT:    add a0, a2, a0
 ; RV64I-NEXT:    srli a2, a0, 4
 ; RV64I-NEXT:    add a0, a0, a2
-; RV64I-NEXT:    and a0, a0, s3
-; RV64I-NEXT:    mulw a0, a0, s0
+; RV64I-NEXT:    and a0, a0, s2
+; RV64I-NEXT:    mulw a0, a0, s3
 ; RV64I-NEXT:    srliw a0, a0, 24
 ; RV64I-NEXT:    bnez a1, .LBB4_1
 ; RV64I-NEXT:  # %bb.2: # %bb7
diff --git a/llvm/test/CodeGen/RISCV/shadowcallstack.ll b/llvm/test/CodeGen/RISCV/shadowcallstack.ll
index 188bcead5d0d4..6b25b0d910c79 100644
--- a/llvm/test/CodeGen/RISCV/shadowcallstack.ll
+++ b/llvm/test/CodeGen/RISCV/shadowcallstack.ll
@@ -82,14 +82,14 @@ define i32 @f4() shadowcallstack {
 ; RV32-NEXT:    .cfi_offset s1, -12
 ; RV32-NEXT:    .cfi_offset s3, -16
 ; RV32-NEXT:    call bar@plt
-; RV32-NEXT:    mv s3, a0
+; RV32-NEXT:    mv s0, a0
 ; RV32-NEXT:    call bar@plt
 ; RV32-NEXT:    mv s1, a0
 ; RV32-NEXT:    call bar@plt
-; RV32-NEXT:    mv s0, a0
+; RV32-NEXT:    mv s3, a0
 ; RV32-NEXT:    call bar@plt
-; RV32-NEXT:    add a1, s3, s1
-; RV32-NEXT:    add a0, s0, a0
+; RV32-NEXT:    add a1, s0, s1
+; RV32-NEXT:    add a0, s3, a0
 ; RV32-NEXT:    add a0, a1, a0
 ; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
@@ -115,14 +115,14 @@ define i32 @f4() shadowcallstack {
 ; RV64-NEXT:    .cfi_offset s1, -24
 ; RV64-NEXT:    .cfi_offset s3, -32
 ; RV64-NEXT:    call bar@plt
-; RV64-NEXT:    mv s3, a0
+; RV64-NEXT:    mv s0, a0
 ; RV64-NEXT:    call bar@plt
 ; RV64-NEXT:    mv s1, a0
 ; RV64-NEXT:    call bar@plt
-; RV64-NEXT:    mv s0, a0
+; RV64-NEXT:    mv s3, a0
 ; RV64-NEXT:    call bar@plt
-; RV64-NEXT:    addw a1, s3, s1
-; RV64-NEXT:    addw a0, s0, a0
+; RV64-NEXT:    addw a1, s0, s1
+; RV64-NEXT:    addw a0, s3, a0
 ; RV64-NEXT:    addw a0, a1, a0
 ; RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index cdee24f11a7b5..1353f2cd8c638 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -148,120 +148,116 @@ define i64 @shl64_minsize(i64 %a, i64 %b) minsize nounwind {
 define i128 @lshr128(i128 %a, i128 %b) nounwind {
 ; RV32I-LABEL: lshr128:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    addi sp, sp, -16
-; RV32I-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lw a2, 0(a2)
-; RV32I-NEXT:    lw t0, 8(a1)
-; RV32I-NEXT:    lw t4, 12(a1)
-; RV32I-NEXT:    li a6, 64
+; RV32I-NEXT:    lw a5, 8(a1)
+; RV32I-NEXT:    lw a4, 12(a1)
+; RV32I-NEXT:    li a3, 64
+; RV32I-NEXT:    sub t0, a3, a2
+; RV32I-NEXT:    li a6, 32
 ; RV32I-NEXT:    sub t1, a6, a2
-; RV32I-NEXT:    li a3, 32
-; RV32I-NEXT:    sub t5, a3, a2
 ; RV32I-NEXT:    li t2, 31
-; RV32I-NEXT:    bltz t5, .LBB6_2
+; RV32I-NEXT:    bltz t1, .LBB6_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sll a3, t0, t5
+; RV32I-NEXT:    sll t6, a5, t1
 ; RV32I-NEXT:    j .LBB6_3
 ; RV32I-NEXT:  .LBB6_2:
-; RV32I-NEXT:    sll a3, t4, t1
-; RV32I-NEXT:    sub a4, t2, t1
-; RV32I-NEXT:    srli a5, t0, 1
-; RV32I-NEXT:    srl a4, a5, a4
-; RV32I-NEXT:    or a3, a3, a4
+; RV32I-NEXT:    sll a6, a4, t0
+; RV32I-NEXT:    sub a7, t2, t0
+; RV32I-NEXT:    srli t3, a5, 1
+; RV32I-NEXT:    srl a7, t3, a7
+; RV32I-NEXT:    or t6, a6, a7
 ; RV32I-NEXT:  .LBB6_3:
-; RV32I-NEXT:    lw a5, 4(a1)
-; RV32I-NEXT:    addi t6, a2, -32
-; RV32I-NEXT:    bgez t6, .LBB6_5
+; RV32I-NEXT:    lw t5, 4(a1)
+; RV32I-NEXT:    addi a6, a2, -32
+; RV32I-NEXT:    bgez a6, .LBB6_5
 ; RV32I-NEXT:  # %bb.4:
-; RV32I-NEXT:    srl a4, a5, a2
-; RV32I-NEXT:    or a3, a3, a4
+; RV32I-NEXT:    srl a7, t5, a2
+; RV32I-NEXT:    or t6, t6, a7
 ; RV32I-NEXT:  .LBB6_5:
-; RV32I-NEXT:    addi a4, a2, -96
+; RV32I-NEXT:    addi t4, a2, -96
 ; RV32I-NEXT:    addi t3, a2, -64
-; RV32I-NEXT:    bltz a4, .LBB6_7
+; RV32I-NEXT:    bltz t4, .LBB6_7
 ; RV32I-NEXT:  # %bb.6:
 ; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    bgeu a2, a6, .LBB6_8
+; RV32I-NEXT:    bgeu a2, a3, .LBB6_8
 ; RV32I-NEXT:    j .LBB6_9
 ; RV32I-NEXT:  .LBB6_7:
-; RV32I-NEXT:    srl a7, t4, t3
-; RV32I-NEXT:    bltu a2, a6, .LBB6_9
+; RV32I-NEXT:    srl a7, a4, t3
+; RV32I-NEXT:    bltu a2, a3, .LBB6_9
 ; RV32I-NEXT:  .LBB6_8:
-; RV32I-NEXT:    mv a3, a7
+; RV32I-NEXT:    mv t6, a7
 ; RV32I-NEXT:  .LBB6_9:
-; RV32I-NEXT:    mv a7, a5
+; RV32I-NEXT:    mv a7, t5
 ; RV32I-NEXT:    beqz a2, .LBB6_11
 ; RV32I-NEXT:  # %bb.10:
-; RV32I-NEXT:    mv a7, a3
+; RV32I-NEXT:    mv a7, t6
 ; RV32I-NEXT:  .LBB6_11:
-; RV32I-NEXT:    lw s0, 0(a1)
+; RV32I-NEXT:    lw a1, 0(a1)
 ; RV32I-NEXT:    sub t2, t2, a2
-; RV32I-NEXT:    bltz t6, .LBB6_13
+; RV32I-NEXT:    bltz a6, .LBB6_13
 ; RV32I-NEXT:  # %bb.12:
-; RV32I-NEXT:    srl a5, a5, t6
-; RV32I-NEXT:    bltz t5, .LBB6_14
+; RV32I-NEXT:    srl t5, t5, a6
+; RV32I-NEXT:    bltz t1, .LBB6_14
 ; RV32I-NEXT:    j .LBB6_15
 ; RV32I-NEXT:  .LBB6_13:
-; RV32I-NEXT:    srl a3, s0, a2
-; RV32I-NEXT:    slli a5, a5, 1
-; RV32I-NEXT:    sll a5, a5, t2
-; RV32I-NEXT:    or a5, a3, a5
-; RV32I-NEXT:    bgez t5, .LBB6_15
+; RV32I-NEXT:    srl t6, a1, a2
+; RV32I-NEXT:    slli t5, t5, 1
+; RV32I-NEXT:    sll t5, t5, t2
+; RV32I-NEXT:    or t5, t6, t5
+; RV32I-NEXT:    bgez t1, .LBB6_15
 ; RV32I-NEXT:  .LBB6_14:
-; RV32I-NEXT:    sll a3, t0, t1
-; RV32I-NEXT:    or a5, a5, a3
+; RV32I-NEXT:    sll t0, a5, t0
+; RV32I-NEXT:    or t5, t5, t0
 ; RV32I-NEXT:  .LBB6_15:
-; RV32I-NEXT:    slli a3, t4, 1
-; RV32I-NEXT:    bltz a4, .LBB6_17
+; RV32I-NEXT:    slli t0, a4, 1
+; RV32I-NEXT:    bltz t4, .LBB6_17
 ; RV32I-NEXT:  # %bb.16:
-; RV32I-NEXT:    srl a4, t4, a4
-; RV32I-NEXT:    bgeu a2, a6, .LBB6_18
+; RV32I-NEXT:    srl t1, a4, t4
+; RV32I-NEXT:    bgeu a2, a3, .LBB6_18
 ; RV32I-NEXT:    j .LBB6_19
 ; RV32I-NEXT:  .LBB6_17:
-; RV32I-NEXT:    li a4, 95
-; RV32I-NEXT:    sub a4, a4, a2
-; RV32I-NEXT:    sll a4, a3, a4
-; RV32I-NEXT:    srl a1, t0, t3
-; RV32I-NEXT:    or a4, a1, a4
-; RV32I-NEXT:    bltu a2, a6, .LBB6_19
+; RV32I-NEXT:    li t1, 95
+; RV32I-NEXT:    sub t1, t1, a2
+; RV32I-NEXT:    sll t1, t0, t1
+; RV32I-NEXT:    srl t3, a5, t3
+; RV32I-NEXT:    or t1, t3, t1
+; RV32I-NEXT:    bltu a2, a3, .LBB6_19
 ; RV32I-NEXT:  .LBB6_18:
-; RV32I-NEXT:    mv a5, a4
+; RV32I-NEXT:    mv t5, t1
 ; RV32I-NEXT:  .LBB6_19:
 ; RV32I-NEXT:    bnez a2, .LBB6_22
 ; RV32I-NEXT:  # %bb.20:
-; RV32I-NEXT:    bltz t6, .LBB6_23
+; RV32I-NEXT:    bltz a6, .LBB6_23
 ; RV32I-NEXT:  .LBB6_21:
-; RV32I-NEXT:    srl a3, t4, t6
-; RV32I-NEXT:    bgeu a2, a6, .LBB6_24
+; RV32I-NEXT:    srl a5, a4, a6
+; RV32I-NEXT:    bgeu a2, a3, .LBB6_24
 ; RV32I-NEXT:    j .LBB6_25
 ; RV32I-NEXT:  .LBB6_22:
-; RV32I-NEXT:    mv s0, a5
-; RV32I-NEXT:    bgez t6, .LBB6_21
+; RV32I-NEXT:    mv a1, t5
+; RV32I-NEXT:    bgez a6, .LBB6_21
 ; RV32I-NEXT:  .LBB6_23:
-; RV32I-NEXT:    srl a1, t0, a2
-; RV32I-NEXT:    sll a3, a3, t2
-; RV32I-NEXT:    or a3, a1, a3
-; RV32I-NEXT:    bltu a2, a6, .LBB6_25
+; RV32I-NEXT:    srl a5, a5, a2
+; RV32I-NEXT:    sll t0, t0, t2
+; RV32I-NEXT:    or a5, a5, t0
+; RV32I-NEXT:    bltu a2, a3, .LBB6_25
 ; RV32I-NEXT:  .LBB6_24:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:  .LBB6_25:
-; RV32I-NEXT:    bltz t6, .LBB6_27
+; RV32I-NEXT:    bltz a6, .LBB6_27
 ; RV32I-NEXT:  # %bb.26:
 ; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bgeu a2, a6, .LBB6_28
+; RV32I-NEXT:    bgeu a2, a3, .LBB6_28
 ; RV32I-NEXT:    j .LBB6_29
 ; RV32I-NEXT:  .LBB6_27:
-; RV32I-NEXT:    srl a4, t4, a2
-; RV32I-NEXT:    bltu a2, a6, .LBB6_29
+; RV32I-NEXT:    srl a4, a4, a2
+; RV32I-NEXT:    bltu a2, a3, .LBB6_29
 ; RV32I-NEXT:  .LBB6_28:
 ; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:  .LBB6_29:
 ; RV32I-NEXT:    sw a4, 12(a0)
-; RV32I-NEXT:    sw a3, 8(a0)
-; RV32I-NEXT:    sw s0, 0(a0)
+; RV32I-NEXT:    sw a5, 8(a0)
+; RV32I-NEXT:    sw a1, 0(a0)
 ; RV32I-NEXT:    sw a7, 4(a0)
-; RV32I-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: lshr128:
@@ -290,120 +286,118 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    addi sp, sp, -16
 ; RV32I-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lw a2, 0(a2)
-; RV32I-NEXT:    lw t2, 8(a1)
-; RV32I-NEXT:    lw t5, 12(a1)
-; RV32I-NEXT:    li a6, 64
-; RV32I-NEXT:    sub t1, a6, a2
-; RV32I-NEXT:    li a3, 32
-; RV32I-NEXT:    sub t6, a3, a2
+; RV32I-NEXT:    lw a5, 8(a1)
+; RV32I-NEXT:    lw a4, 12(a1)
+; RV32I-NEXT:    li a3, 64
+; RV32I-NEXT:    sub t1, a3, a2
+; RV32I-NEXT:    li a6, 32
+; RV32I-NEXT:    sub t2, a6, a2
 ; RV32I-NEXT:    li t4, 31
-; RV32I-NEXT:    bltz t6, .LBB7_2
+; RV32I-NEXT:    bltz t2, .LBB7_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    sll s0, t2, t6
+; RV32I-NEXT:    sll s0, a5, t2
 ; RV32I-NEXT:    j .LBB7_3
 ; RV32I-NEXT:  .LBB7_2:
-; RV32I-NEXT:    sll a3, t5, t1
-; RV32I-NEXT:    sub a4, t4, t1
-; RV32I-NEXT:    srli a5, t2, 1
-; RV32I-NEXT:    srl a4, a5, a4
-; RV32I-NEXT:    or s0, a3, a4
+; RV32I-NEXT:    sll a6, a4, t1
+; RV32I-NEXT:    sub a7, t4, t1
+; RV32I-NEXT:    srli t0, a5, 1
+; RV32I-NEXT:    srl a7, t0, a7
+; RV32I-NEXT:    or s0, a6, a7
 ; RV32I-NEXT:  .LBB7_3:
-; RV32I-NEXT:    lw a5, 4(a1)
-; RV32I-NEXT:    addi a3, a2, -32
-; RV32I-NEXT:    bgez a3, .LBB7_5
+; RV32I-NEXT:    lw t6, 4(a1)
+; RV32I-NEXT:    addi a6, a2, -32
+; RV32I-NEXT:    bgez a6, .LBB7_5
 ; RV32I-NEXT:  # %bb.4:
-; RV32I-NEXT:    srl a4, a5, a2
-; RV32I-NEXT:    or s0, s0, a4
+; RV32I-NEXT:    srl a7, t6, a2
+; RV32I-NEXT:    or s0, s0, a7
 ; RV32I-NEXT:  .LBB7_5:
 ; RV32I-NEXT:    addi t3, a2, -64
-; RV32I-NEXT:    addi a4, a2, -96
-; RV32I-NEXT:    srai a7, t5, 31
-; RV32I-NEXT:    bltz a4, .LBB7_7
+; RV32I-NEXT:    addi t5, a2, -96
+; RV32I-NEXT:    srai a7, a4, 31
+; RV32I-NEXT:    bltz t5, .LBB7_7
 ; RV32I-NEXT:  # %bb.6:
 ; RV32I-NEXT:    mv t0, a7
-; RV32I-NEXT:    bgeu a2, a6, .LBB7_8
+; RV32I-NEXT:    bgeu a2, a3, .LBB7_8
 ; RV32I-NEXT:    j .LBB7_9
 ; RV32I-NEXT:  .LBB7_7:
-; RV32I-NEXT:    sra t0, t5, t3
-; RV32I-NEXT:    bltu a2, a6, .LBB7_9
+; RV32I-NEXT:    sra t0, a4, t3
+; RV32I-NEXT:    bltu a2, a3, .LBB7_9
 ; RV32I-NEXT:  .LBB7_8:
 ; RV32I-NEXT:    mv s0, t0
 ; RV32I-NEXT:  .LBB7_9:
-; RV32I-NEXT:    mv t0, a5
+; RV32I-NEXT:    mv t0, t6
 ; RV32I-NEXT:    beqz a2, .LBB7_11
 ; RV32I-NEXT:  # %bb.10:
 ; RV32I-NEXT:    mv t0, s0
 ; RV32I-NEXT:  .LBB7_11:
-; RV32I-NEXT:    lw s1, 0(a1)
+; RV32I-NEXT:    lw a1, 0(a1)
 ; RV32I-NEXT:    sub t4, t4, a2
-; RV32I-NEXT:    bltz a3, .LBB7_13
+; RV32I-NEXT:    bltz a6, .LBB7_13
 ; RV32I-NEXT:  # %bb.12:
-; RV32I-NEXT:    srl a5, a5, a3
-; RV32I-NEXT:    bltz t6, .LBB7_14
+; RV32I-NEXT:    srl t6, t6, a6
+; RV32I-NEXT:    bltz t2, .LBB7_14
 ; RV32I-NEXT:    j .LBB7_15
 ; RV32I-NEXT:  .LBB7_13:
-; RV32I-NEXT:    srl s0, s1, a2
-; RV32I-NEXT:    slli a5, a5, 1
-; RV32I-NEXT:    sll a5, a5, t4
-; RV32I-NEXT:    or a5, s0, a5
-; RV32I-NEXT:    bgez t6, .LBB7_15
+; RV32I-NEXT:    srl s0, a1, a2
+; RV32I-NEXT:    slli t6, t6, 1
+; RV32I-NEXT:    sll t6, t6, t4
+; RV32I-NEXT:    or t6, s0, t6
+; RV32I-NEXT:    bgez t2, .LBB7_15
 ; RV32I-NEXT:  .LBB7_14:
-; RV32I-NEXT:    sll s0, t2, t1
-; RV32I-NEXT:    or a5, a5, s0
+; RV32I-NEXT:    sll t1, a5, t1
+; RV32I-NEXT:    or t6, t6, t1
 ; RV32I-NEXT:  .LBB7_15:
-; RV32I-NEXT:    slli s0, t5, 1
-; RV32I-NEXT:    bltz a4, .LBB7_17
+; RV32I-NEXT:    slli t1, a4, 1
+; RV32I-NEXT:    bltz t5, .LBB7_17
 ; RV32I-NEXT:  # %bb.16:
-; RV32I-NEXT:    sra a4, t5, a4
-; RV32I-NEXT:    bgeu a2, a6, .LBB7_18
+; RV32I-NEXT:    sra t2, a4, t5
+; RV32I-NEXT:    bgeu a2, a3, .LBB7_18
 ; RV32I-NEXT:    j .LBB7_19
 ; RV32I-NEXT:  .LBB7_17:
-; RV32I-NEXT:    li a4, 95
-; RV32I-NEXT:    sub a4, a4, a2
-; RV32I-NEXT:    sll a4, s0, a4
-; RV32I-NEXT:    srl a1, t2, t3
-; RV32I-NEXT:    or a4, a1, a4
-; RV32I-NEXT:    bltu a2, a6, .LBB7_19
+; RV32I-NEXT:    li t2, 95
+; RV32I-NEXT:    sub t2, t2, a2
+; RV32I-NEXT:    sll t2, t1, t2
+; RV32I-NEXT:    srl t3, a5, t3
+; RV32I-NEXT:    or t2, t3, t2
+; RV32I-NEXT:    bltu a2, a3, .LBB7_19
 ; RV32I-NEXT:  .LBB7_18:
-; RV32I-NEXT:    mv a5, a4
+; RV32I-NEXT:    mv t6, t2
 ; RV32I-NEXT:  .LBB7_19:
 ; RV32I-NEXT:    bnez a2, .LBB7_22
 ; RV32I-NEXT:  # %bb.20:
-; RV32I-NEXT:    bltz a3, .LBB7_23
+; RV32I-NEXT:    bltz a6, .LBB7_23
 ; RV32I-NEXT:  .LBB7_21:
-; RV32I-NEXT:    sra a4, t5, a3
-; RV32I-NEXT:    bgeu a2, a6, .LBB7_24
+; RV32I-NEXT:    sra a5, a4, a6
+; RV32I-NEXT:    bgeu a2, a3, .LBB7_24
 ; RV32I-NEXT:    j .LBB7_25
 ; RV32I-NEXT:  .LBB7_22:
-; RV32I-NEXT:    mv s1, a5
-; RV32I-NEXT:    bgez a3, .LBB7_21
+; RV32I-NEXT:    mv a1, t6
+; RV32I-NEXT:    bgez a6, .LBB7_21
 ; RV32I-NEXT:  .LBB7_23:
-; RV32I-NEXT:    srl a1, t2, a2
-; RV32I-NEXT:    sll a4, s0, t4
-; RV32I-NEXT:    or a4, a1, a4
-; RV32I-NEXT:    bltu a2, a6, .LBB7_25
+; RV32I-NEXT:    srl a5, a5, a2
+; RV32I-NEXT:    sll t1, t1, t4
+; RV32I-NEXT:    or a5, a5, t1
+; RV32I-NEXT:    bltu a2, a3, .LBB7_25
 ; RV32I-NEXT:  .LBB7_24:
-; RV32I-NEXT:    mv a4, a7
+; RV32I-NEXT:    mv a5, a7
 ; RV32I-NEXT:  .LBB7_25:
-; RV32I-NEXT:    bltz a3, .LBB7_27
+; RV32I-NEXT:    bltz a6, .LBB7_27
 ; RV32I-NEXT:  # %bb.26:
-; RV32I-NEXT:    mv a3, a7
-; RV32I-NEXT:    bgeu a2, a6, .LBB7_28
+; RV32I-NEXT:    mv a4, a7
+; RV32I-NEXT:    bgeu a2, a3, .LBB7_28
 ; RV32I-NEXT:    j .LBB7_29
 ; RV32I-NEXT:  .LBB7_27:
-; RV32I-NEXT:    sra a3, t5, a2
-; RV32I-NEXT:    bltu a2, a6, .LBB7_29
+; RV32I-NEXT:    sra a4, a4, a2
+; RV32I-NEXT:    bltu a2, a3, .LBB7_29
 ; RV32I-NEXT:  .LBB7_28:
-; RV32I-NEXT:    mv a3, a7
+; RV32I-NEXT:    mv a4, a7
 ; RV32I-NEXT:  .LBB7_29:
-; RV32I-NEXT:    sw a3, 12(a0)
-; RV32I-NEXT:    sw a4, 8(a0)
-; RV32I-NEXT:    sw s1, 0(a0)
+; RV32I-NEXT:    sw a4, 12(a0)
+; RV32I-NEXT:    sw a5, 8(a0)
+; RV32I-NEXT:    sw a1, 0(a0)
 ; RV32I-NEXT:    sw t0, 4(a0)
 ; RV32I-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -431,120 +425,116 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
 define i128 @shl128(i128 %a, i128 %b) nounwind {
 ; RV32I-LABEL: shl128:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    addi sp, sp, -16
-; RV32I-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lw a2, 0(a2)
-; RV32I-NEXT:    lw t0, 4(a1)
-; RV32I-NEXT:    lw t4, 0(a1)
-; RV32I-NEXT:    li a6, 64
+; RV32I-NEXT:    lw a5, 4(a1)
+; RV32I-NEXT:    lw a4, 0(a1)
+; RV32I-NEXT:    li a3, 64
+; RV32I-NEXT:    sub t0, a3, a2
+; RV32I-NEXT:    li a6, 32
 ; RV32I-NEXT:    sub t1, a6, a2
-; RV32I-NEXT:    li a3, 32
-; RV32I-NEXT:    sub t5, a3, a2
 ; RV32I-NEXT:    li t2, 31
-; RV32I-NEXT:    bltz t5, .LBB8_2
+; RV32I-NEXT:    bltz t1, .LBB8_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    srl a3, t0, t5
+; RV32I-NEXT:    srl t6, a5, t1
 ; RV32I-NEXT:    j .LBB8_3
 ; RV32I-NEXT:  .LBB8_2:
-; RV32I-NEXT:    srl a3, t4, t1
-; RV32I-NEXT:    sub a4, t2, t1
-; RV32I-NEXT:    slli a5, t0, 1
-; RV32I-NEXT:    sll a4, a5, a4
-; RV32I-NEXT:    or a3, a3, a4
+; RV32I-NEXT:    srl a6, a4, t0
+; RV32I-NEXT:    sub a7, t2, t0
+; RV32I-NEXT:    slli t3, a5, 1
+; RV32I-NEXT:    sll a7, t3, a7
+; RV32I-NEXT:    or t6, a6, a7
 ; RV32I-NEXT:  .LBB8_3:
-; RV32I-NEXT:    lw a5, 8(a1)
-; RV32I-NEXT:    addi t6, a2, -32
-; RV32I-NEXT:    bgez t6, .LBB8_5
+; RV32I-NEXT:    lw t5, 8(a1)
+; RV32I-NEXT:    addi a6, a2, -32
+; RV32I-NEXT:    bgez a6, .LBB8_5
 ; RV32I-NEXT:  # %bb.4:
-; RV32I-NEXT:    sll a4, a5, a2
-; RV32I-NEXT:    or a3, a3, a4
+; RV32I-NEXT:    sll a7, t5, a2
+; RV32I-NEXT:    or t6, t6, a7
 ; RV32I-NEXT:  .LBB8_5:
-; RV32I-NEXT:    addi a4, a2, -96
+; RV32I-NEXT:    addi t4, a2, -96
 ; RV32I-NEXT:    addi t3, a2, -64
-; RV32I-NEXT:    bltz a4, .LBB8_7
+; RV32I-NEXT:    bltz t4, .LBB8_7
 ; RV32I-NEXT:  # %bb.6:
 ; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    bgeu a2, a6, .LBB8_8
+; RV32I-NEXT:    bgeu a2, a3, .LBB8_8
 ; RV32I-NEXT:    j .LBB8_9
 ; RV32I-NEXT:  .LBB8_7:
-; RV32I-NEXT:    sll a7, t4, t3
-; RV32I-NEXT:    bltu a2, a6, .LBB8_9
+; RV32I-NEXT:    sll a7, a4, t3
+; RV32I-NEXT:    bltu a2, a3, .LBB8_9
 ; RV32I-NEXT:  .LBB8_8:
-; RV32I-NEXT:    mv a3, a7
+; RV32I-NEXT:    mv t6, a7
 ; RV32I-NEXT:  .LBB8_9:
-; RV32I-NEXT:    mv a7, a5
+; RV32I-NEXT:    mv a7, t5
 ; RV32I-NEXT:    beqz a2, .LBB8_11
 ; RV32I-NEXT:  # %bb.10:
-; RV32I-NEXT:    mv a7, a3
+; RV32I-NEXT:    mv a7, t6
 ; RV32I-NEXT:  .LBB8_11:
-; RV32I-NEXT:    lw s0, 12(a1)
+; RV32I-NEXT:    lw a1, 12(a1)
 ; RV32I-NEXT:    sub t2, t2, a2
-; RV32I-NEXT:    bltz t6, .LBB8_13
+; RV32I-NEXT:    bltz a6, .LBB8_13
 ; RV32I-NEXT:  # %bb.12:
-; RV32I-NEXT:    sll a5, a5, t6
-; RV32I-NEXT:    bltz t5, .LBB8_14
+; RV32I-NEXT:    sll t5, t5, a6
+; RV32I-NEXT:    bltz t1, .LBB8_14
 ; RV32I-NEXT:    j .LBB8_15
 ; RV32I-NEXT:  .LBB8_13:
-; RV32I-NEXT:    sll a3, s0, a2
-; RV32I-NEXT:    srli a5, a5, 1
-; RV32I-NEXT:    srl a5, a5, t2
-; RV32I-NEXT:    or a5, a3, a5
-; RV32I-NEXT:    bgez t5, .LBB8_15
+; RV32I-NEXT:    sll t6, a1, a2
+; RV32I-NEXT:    srli t5, t5, 1
+; RV32I-NEXT:    srl t5, t5, t2
+; RV32I-NEXT:    or t5, t6, t5
+; RV32I-NEXT:    bgez t1, .LBB8_15
 ; RV32I-NEXT:  .LBB8_14:
-; RV32I-NEXT:    srl a3, t0, t1
-; RV32I-NEXT:    or a5, a5, a3
+; RV32I-NEXT:    srl t0, a5, t0
+; RV32I-NEXT:    or t5, t5, t0
 ; RV32I-NEXT:  .LBB8_15:
-; RV32I-NEXT:    srli a3, t4, 1
-; RV32I-NEXT:    bltz a4, .LBB8_17
+; RV32I-NEXT:    srli t0, a4, 1
+; RV32I-NEXT:    bltz t4, .LBB8_17
 ; RV32I-NEXT:  # %bb.16:
-; RV32I-NEXT:    sll a4, t4, a4
-; RV32I-NEXT:    bgeu a2, a6, .LBB8_18
+; RV32I-NEXT:    sll t1, a4, t4
+; RV32I-NEXT:    bgeu a2, a3, .LBB8_18
 ; RV32I-NEXT:    j .LBB8_19
 ; RV32I-NEXT:  .LBB8_17:
-; RV32I-NEXT:    li a4, 95
-; RV32I-NEXT:    sub a4, a4, a2
-; RV32I-NEXT:    srl a4, a3, a4
-; RV32I-NEXT:    sll a1, t0, t3
-; RV32I-NEXT:    or a4, a1, a4
-; RV32I-NEXT:    bltu a2, a6, .LBB8_19
+; RV32I-NEXT:    li t1, 95
+; RV32I-NEXT:    sub t1, t1, a2
+; RV32I-NEXT:    srl t1, t0, t1
+; RV32I-NEXT:    sll t3, a5, t3
+; RV32I-NEXT:    or t1, t3, t1
+; RV32I-NEXT:    bltu a2, a3, .LBB8_19
 ; RV32I-NEXT:  .LBB8_18:
-; RV32I-NEXT:    mv a5, a4
+; RV32I-NEXT:    mv t5, t1
 ; RV32I-NEXT:  .LBB8_19:
 ; RV32I-NEXT:    bnez a2, .LBB8_22
 ; RV32I-NEXT:  # %bb.20:
-; RV32I-NEXT:    bltz t6, .LBB8_23
+; RV32I-NEXT:    bltz a6, .LBB8_23
 ; RV32I-NEXT:  .LBB8_21:
-; RV32I-NEXT:    sll a3, t4, t6
-; RV32I-NEXT:    bgeu a2, a6, .LBB8_24
+; RV32I-NEXT:    sll a5, a4, a6
+; RV32I-NEXT:    bgeu a2, a3, .LBB8_24
 ; RV32I-NEXT:    j .LBB8_25
 ; RV32I-NEXT:  .LBB8_22:
-; RV32I-NEXT:    mv s0, a5
-; RV32I-NEXT:    bgez t6, .LBB8_21
+; RV32I-NEXT:    mv a1, t5
+; RV32I-NEXT:    bgez a6, .LBB8_21
 ; RV32I-NEXT:  .LBB8_23:
-; RV32I-NEXT:    sll a1, t0, a2
-; RV32I-NEXT:    srl a3, a3, t2
-; RV32I-NEXT:    or a3, a1, a3
-; RV32I-NEXT:    bltu a2, a6, .LBB8_25
+; RV32I-NEXT:    sll a5, a5, a2
+; RV32I-NEXT:    srl t0, t0, t2
+; RV32I-NEXT:    or a5, a5, t0
+; RV32I-NEXT:    bltu a2, a3, .LBB8_25
 ; RV32I-NEXT:  .LBB8_24:
-; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    li a5, 0
 ; RV32I-NEXT:  .LBB8_25:
-; RV32I-NEXT:    bltz t6, .LBB8_27
+; RV32I-NEXT:    bltz a6, .LBB8_27
 ; RV32I-NEXT:  # %bb.26:
 ; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bgeu a2, a6, .LBB8_28
+; RV32I-NEXT:    bgeu a2, a3, .LBB8_28
 ; RV32I-NEXT:    j .LBB8_29
 ; RV32I-NEXT:  .LBB8_27:
-; RV32I-NEXT:    sll a4, t4, a2
-; RV32I-NEXT:    bltu a2, a6, .LBB8_29
+; RV32I-NEXT:    sll a4, a4, a2
+; RV32I-NEXT:    bltu a2, a3, .LBB8_29
 ; RV32I-NEXT:  .LBB8_28:
 ; RV32I-NEXT:    li a4, 0
 ; RV32I-NEXT:  .LBB8_29:
 ; RV32I-NEXT:    sw a4, 0(a0)
-; RV32I-NEXT:    sw a3, 4(a0)
-; RV32I-NEXT:    sw s0, 12(a0)
+; RV32I-NEXT:    sw a5, 4(a0)
+; RV32I-NEXT:    sw a1, 12(a0)
 ; RV32I-NEXT:    sw a7, 8(a0)
-; RV32I-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: shl128:
@@ -606,69 +596,69 @@ define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind {
 define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind {
 ; RV32I-LABEL: fshr128_minsize:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    lw t2, 8(a1)
-; RV32I-NEXT:    lw a3, 0(a1)
+; RV32I-NEXT:    lw a3, 8(a1)
+; RV32I-NEXT:    lw t2, 0(a1)
 ; RV32I-NEXT:    lw a2, 0(a2)
 ; RV32I-NEXT:    lw a7, 4(a1)
-; RV32I-NEXT:    lw t1, 12(a1)
-; RV32I-NEXT:    andi a1, a2, 64
-; RV32I-NEXT:    mv a5, a7
-; RV32I-NEXT:    mv a6, a3
-; RV32I-NEXT:    beqz a1, .LBB10_2
+; RV32I-NEXT:    lw a1, 12(a1)
+; RV32I-NEXT:    andi t1, a2, 64
+; RV32I-NEXT:    mv t0, a7
+; RV32I-NEXT:    mv a4, t2
+; RV32I-NEXT:    beqz t1, .LBB10_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    mv a5, t1
-; RV32I-NEXT:    mv a6, t2
+; RV32I-NEXT:    mv t0, a1
+; RV32I-NEXT:    mv a4, a3
 ; RV32I-NEXT:  .LBB10_2:
-; RV32I-NEXT:    andi a4, a2, 32
-; RV32I-NEXT:    mv t0, a6
-; RV32I-NEXT:    bnez a4, .LBB10_13
+; RV32I-NEXT:    andi a6, a2, 32
+; RV32I-NEXT:    mv a5, a4
+; RV32I-NEXT:    bnez a6, .LBB10_13
 ; RV32I-NEXT:  # %bb.3:
-; RV32I-NEXT:    bnez a1, .LBB10_14
+; RV32I-NEXT:    bnez t1, .LBB10_14
 ; RV32I-NEXT:  .LBB10_4:
-; RV32I-NEXT:    beqz a4, .LBB10_6
+; RV32I-NEXT:    beqz a6, .LBB10_6
 ; RV32I-NEXT:  .LBB10_5:
-; RV32I-NEXT:    mv a5, t2
+; RV32I-NEXT:    mv t0, a3
 ; RV32I-NEXT:  .LBB10_6:
-; RV32I-NEXT:    slli t3, a5, 1
-; RV32I-NEXT:    not a3, a2
-; RV32I-NEXT:    beqz a1, .LBB10_8
+; RV32I-NEXT:    slli t3, t0, 1
+; RV32I-NEXT:    not t2, a2
+; RV32I-NEXT:    beqz t1, .LBB10_8
 ; RV32I-NEXT:  # %bb.7:
-; RV32I-NEXT:    mv t1, a7
+; RV32I-NEXT:    mv a1, a7
 ; RV32I-NEXT:  .LBB10_8:
-; RV32I-NEXT:    srl a7, t0, a2
-; RV32I-NEXT:    sll a1, t3, a3
-; RV32I-NEXT:    srl a5, a5, a2
-; RV32I-NEXT:    beqz a4, .LBB10_10
+; RV32I-NEXT:    srl a7, a5, a2
+; RV32I-NEXT:    sll t1, t3, t2
+; RV32I-NEXT:    srl t0, t0, a2
+; RV32I-NEXT:    beqz a6, .LBB10_10
 ; RV32I-NEXT:  # %bb.9:
-; RV32I-NEXT:    mv t2, t1
+; RV32I-NEXT:    mv a3, a1
 ; RV32I-NEXT:  .LBB10_10:
-; RV32I-NEXT:    or a7, a1, a7
-; RV32I-NEXT:    slli a1, t2, 1
-; RV32I-NEXT:    sll a1, a1, a3
-; RV32I-NEXT:    or a5, a1, a5
-; RV32I-NEXT:    srl a1, t2, a2
-; RV32I-NEXT:    beqz a4, .LBB10_12
+; RV32I-NEXT:    or a7, t1, a7
+; RV32I-NEXT:    slli t1, a3, 1
+; RV32I-NEXT:    sll t1, t1, t2
+; RV32I-NEXT:    or t0, t1, t0
+; RV32I-NEXT:    srl a3, a3, a2
+; RV32I-NEXT:    beqz a6, .LBB10_12
 ; RV32I-NEXT:  # %bb.11:
-; RV32I-NEXT:    mv t1, a6
+; RV32I-NEXT:    mv a1, a4
 ; RV32I-NEXT:  .LBB10_12:
-; RV32I-NEXT:    slli a4, t1, 1
-; RV32I-NEXT:    sll a4, a4, a3
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    srl a2, t1, a2
-; RV32I-NEXT:    slli a4, t0, 1
-; RV32I-NEXT:    sll a3, a4, a3
-; RV32I-NEXT:    or a2, a3, a2
-; RV32I-NEXT:    sw a2, 12(a0)
-; RV32I-NEXT:    sw a1, 8(a0)
-; RV32I-NEXT:    sw a5, 4(a0)
+; RV32I-NEXT:    slli a4, a1, 1
+; RV32I-NEXT:    sll a4, a4, t2
+; RV32I-NEXT:    or a3, a4, a3
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    slli a2, a5, 1
+; RV32I-NEXT:    sll a2, a2, t2
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    sw a1, 12(a0)
+; RV32I-NEXT:    sw a3, 8(a0)
+; RV32I-NEXT:    sw t0, 4(a0)
 ; RV32I-NEXT:    sw a7, 0(a0)
 ; RV32I-NEXT:    ret
 ; RV32I-NEXT:  .LBB10_13:
-; RV32I-NEXT:    mv t0, a5
-; RV32I-NEXT:    beqz a1, .LBB10_4
+; RV32I-NEXT:    mv a5, t0
+; RV32I-NEXT:    beqz t1, .LBB10_4
 ; RV32I-NEXT:  .LBB10_14:
-; RV32I-NEXT:    mv t2, a3
-; RV32I-NEXT:    bnez a4, .LBB10_5
+; RV32I-NEXT:    mv a3, t2
+; RV32I-NEXT:    bnez a6, .LBB10_5
 ; RV32I-NEXT:    j .LBB10_6
 ;
 ; RV64I-LABEL: fshr128_minsize:
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 6bae641408887..2b7f579c9b2ce 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -306,13 +306,13 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32-NEXT:    lbu a1, 12(s0)
 ; RV32-NEXT:    lw a2, 8(s0)
 ; RV32-NEXT:    andi a3, a0, 1
-; RV32-NEXT:    neg s2, a3
+; RV32-NEXT:    neg s1, a3
 ; RV32-NEXT:    slli a3, a1, 30
 ; RV32-NEXT:    srli a4, a2, 2
-; RV32-NEXT:    or s3, a4, a3
+; RV32-NEXT:    or s2, a4, a3
 ; RV32-NEXT:    srli a1, a1, 2
 ; RV32-NEXT:    andi a1, a1, 1
-; RV32-NEXT:    neg s1, a1
+; RV32-NEXT:    neg s3, a1
 ; RV32-NEXT:    slli a1, a2, 31
 ; RV32-NEXT:    srli a0, a0, 1
 ; RV32-NEXT:    or a0, a0, a1
@@ -327,17 +327,17 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32-NEXT:    mv s6, a1
 ; RV32-NEXT:    li a2, -5
 ; RV32-NEXT:    li a3, -1
-; RV32-NEXT:    mv a0, s3
-; RV32-NEXT:    mv a1, s1
+; RV32-NEXT:    mv a0, s2
+; RV32-NEXT:    mv a1, s3
 ; RV32-NEXT:    call __moddi3@plt
-; RV32-NEXT:    mv s1, a0
+; RV32-NEXT:    mv s2, a0
 ; RV32-NEXT:    mv s3, a1
 ; RV32-NEXT:    li a2, 6
 ; RV32-NEXT:    mv a0, s4
-; RV32-NEXT:    mv a1, s2
+; RV32-NEXT:    mv a1, s1
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __moddi3@plt
-; RV32-NEXT:    xori a2, s1, 2
+; RV32-NEXT:    xori a2, s2, 2
 ; RV32-NEXT:    or a2, a2, s3
 ; RV32-NEXT:    snez a2, a2
 ; RV32-NEXT:    xori a3, s5, 1
@@ -460,13 +460,13 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32M-NEXT:    lbu a1, 12(s0)
 ; RV32M-NEXT:    lw a2, 8(s0)
 ; RV32M-NEXT:    andi a3, a0, 1
-; RV32M-NEXT:    neg s2, a3
+; RV32M-NEXT:    neg s1, a3
 ; RV32M-NEXT:    slli a3, a1, 30
 ; RV32M-NEXT:    srli a4, a2, 2
-; RV32M-NEXT:    or s3, a4, a3
+; RV32M-NEXT:    or s2, a4, a3
 ; RV32M-NEXT:    srli a1, a1, 2
 ; RV32M-NEXT:    andi a1, a1, 1
-; RV32M-NEXT:    neg s1, a1
+; RV32M-NEXT:    neg s3, a1
 ; RV32M-NEXT:    slli a1, a2, 31
 ; RV32M-NEXT:    srli a0, a0, 1
 ; RV32M-NEXT:    or a0, a0, a1
@@ -481,17 +481,17 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32M-NEXT:    mv s6, a1
 ; RV32M-NEXT:    li a2, -5
 ; RV32M-NEXT:    li a3, -1
-; RV32M-NEXT:    mv a0, s3
-; RV32M-NEXT:    mv a1, s1
+; RV32M-NEXT:    mv a0, s2
+; RV32M-NEXT:    mv a1, s3
 ; RV32M-NEXT:    call __moddi3@plt
-; RV32M-NEXT:    mv s1, a0
+; RV32M-NEXT:    mv s2, a0
 ; RV32M-NEXT:    mv s3, a1
 ; RV32M-NEXT:    li a2, 6
 ; RV32M-NEXT:    mv a0, s4
-; RV32M-NEXT:    mv a1, s2
+; RV32M-NEXT:    mv a1, s1
 ; RV32M-NEXT:    li a3, 0
 ; RV32M-NEXT:    call __moddi3@plt
-; RV32M-NEXT:    xori a2, s1, 2
+; RV32M-NEXT:    xori a2, s2, 2
 ; RV32M-NEXT:    or a2, a2, s3
 ; RV32M-NEXT:    snez a2, a2
 ; RV32M-NEXT:    xori a3, s5, 1
diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
index 0295f955292b1..a45cb88adb5f8 100644
--- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
@@ -18,44 +18,42 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lh s2, 12(a1)
-; RV32I-NEXT:    lh s3, 8(a1)
-; RV32I-NEXT:    lh s0, 4(a1)
+; RV32I-NEXT:    lh s0, 12(a1)
+; RV32I-NEXT:    lh s1, 8(a1)
+; RV32I-NEXT:    lh s2, 4(a1)
 ; RV32I-NEXT:    lh a2, 0(a1)
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    li a1, 95
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    call __modsi3@plt
 ; RV32I-NEXT:    mv s4, a0
 ; RV32I-NEXT:    li a1, -124
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __modsi3@plt
-; RV32I-NEXT:    mv s5, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    li a1, 98
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __modsi3@plt
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    li a1, -1003
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __modsi3@plt
-; RV32I-NEXT:    sh a0, 6(s1)
-; RV32I-NEXT:    sh s0, 4(s1)
-; RV32I-NEXT:    sh s5, 2(s1)
-; RV32I-NEXT:    sh s4, 0(s1)
+; RV32I-NEXT:    sh a0, 6(s3)
+; RV32I-NEXT:    sh s1, 4(s3)
+; RV32I-NEXT:    sh s2, 2(s3)
+; RV32I-NEXT:    sh s4, 0(s3)
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: fold_srem_vec_1:
 ; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    lh a6, 12(a1)
+; RV32IM-NEXT:    lh a2, 12(a1)
 ; RV32IM-NEXT:    lh a3, 8(a1)
 ; RV32IM-NEXT:    lh a4, 0(a1)
 ; RV32IM-NEXT:    lh a1, 4(a1)
@@ -63,88 +61,86 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
 ; RV32IM-NEXT:    addi a5, a5, 389
 ; RV32IM-NEXT:    mulh a5, a4, a5
 ; RV32IM-NEXT:    add a5, a5, a4
-; RV32IM-NEXT:    srli a2, a5, 31
+; RV32IM-NEXT:    srli a6, a5, 31
 ; RV32IM-NEXT:    srli a5, a5, 6
-; RV32IM-NEXT:    add a2, a5, a2
-; RV32IM-NEXT:    li a5, 95
-; RV32IM-NEXT:    mul a2, a2, a5
-; RV32IM-NEXT:    sub a2, a4, a2
-; RV32IM-NEXT:    lui a4, 507375
-; RV32IM-NEXT:    addi a4, a4, 1981
-; RV32IM-NEXT:    mulh a4, a1, a4
-; RV32IM-NEXT:    sub a4, a4, a1
-; RV32IM-NEXT:    srli a5, a4, 31
-; RV32IM-NEXT:    srli a4, a4, 6
-; RV32IM-NEXT:    add a4, a4, a5
-; RV32IM-NEXT:    li a5, -124
-; RV32IM-NEXT:    mul a4, a4, a5
-; RV32IM-NEXT:    sub a1, a1, a4
-; RV32IM-NEXT:    lui a4, 342392
-; RV32IM-NEXT:    addi a4, a4, 669
-; RV32IM-NEXT:    mulh a4, a3, a4
-; RV32IM-NEXT:    srli a5, a4, 31
-; RV32IM-NEXT:    srli a4, a4, 5
-; RV32IM-NEXT:    add a4, a4, a5
-; RV32IM-NEXT:    li a5, 98
-; RV32IM-NEXT:    mul a4, a4, a5
-; RV32IM-NEXT:    sub a3, a3, a4
-; RV32IM-NEXT:    lui a4, 780943
-; RV32IM-NEXT:    addi a4, a4, 1809
-; RV32IM-NEXT:    mulh a4, a6, a4
-; RV32IM-NEXT:    srli a5, a4, 31
-; RV32IM-NEXT:    srli a4, a4, 8
-; RV32IM-NEXT:    add a4, a4, a5
-; RV32IM-NEXT:    li a5, -1003
-; RV32IM-NEXT:    mul a4, a4, a5
-; RV32IM-NEXT:    sub a4, a6, a4
-; RV32IM-NEXT:    sh a4, 6(a0)
+; RV32IM-NEXT:    add a5, a5, a6
+; RV32IM-NEXT:    li a6, 95
+; RV32IM-NEXT:    mul a5, a5, a6
+; RV32IM-NEXT:    sub a4, a4, a5
+; RV32IM-NEXT:    lui a5, 507375
+; RV32IM-NEXT:    addi a5, a5, 1981
+; RV32IM-NEXT:    mulh a5, a1, a5
+; RV32IM-NEXT:    sub a5, a5, a1
+; RV32IM-NEXT:    srli a6, a5, 31
+; RV32IM-NEXT:    srli a5, a5, 6
+; RV32IM-NEXT:    add a5, a5, a6
+; RV32IM-NEXT:    li a6, -124
+; RV32IM-NEXT:    mul a5, a5, a6
+; RV32IM-NEXT:    sub a1, a1, a5
+; RV32IM-NEXT:    lui a5, 342392
+; RV32IM-NEXT:    addi a5, a5, 669
+; RV32IM-NEXT:    mulh a5, a3, a5
+; RV32IM-NEXT:    srli a6, a5, 31
+; RV32IM-NEXT:    srli a5, a5, 5
+; RV32IM-NEXT:    add a5, a5, a6
+; RV32IM-NEXT:    li a6, 98
+; RV32IM-NEXT:    mul a5, a5, a6
+; RV32IM-NEXT:    sub a3, a3, a5
+; RV32IM-NEXT:    lui a5, 780943
+; RV32IM-NEXT:    addi a5, a5, 1809
+; RV32IM-NEXT:    mulh a5, a2, a5
+; RV32IM-NEXT:    srli a6, a5, 31
+; RV32IM-NEXT:    srli a5, a5, 8
+; RV32IM-NEXT:    add a5, a5, a6
+; RV32IM-NEXT:    li a6, -1003
+; RV32IM-NEXT:    mul a5, a5, a6
+; RV32IM-NEXT:    sub a2, a2, a5
+; RV32IM-NEXT:    sh a2, 6(a0)
 ; RV32IM-NEXT:    sh a3, 4(a0)
 ; RV32IM-NEXT:    sh a1, 2(a0)
-; RV32IM-NEXT:    sh a2, 0(a0)
+; RV32IM-NEXT:    sh a4, 0(a0)
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: fold_srem_vec_1:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -64
-; RV64I-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lh s2, 24(a1)
-; RV64I-NEXT:    lh s3, 16(a1)
-; RV64I-NEXT:    lh s0, 8(a1)
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lh s0, 24(a1)
+; RV64I-NEXT:    lh s1, 16(a1)
+; RV64I-NEXT:    lh s2, 8(a1)
 ; RV64I-NEXT:    lh a2, 0(a1)
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s3, a0
 ; RV64I-NEXT:    li a1, 95
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    call __moddi3@plt
 ; RV64I-NEXT:    mv s4, a0
 ; RV64I-NEXT:    li a1, -124
-; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __moddi3@plt
-; RV64I-NEXT:    mv s5, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    li a1, 98
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __moddi3@plt
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    li a1, -1003
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __moddi3@plt
-; RV64I-NEXT:    sh a0, 6(s1)
-; RV64I-NEXT:    sh s0, 4(s1)
-; RV64I-NEXT:    sh s5, 2(s1)
-; RV64I-NEXT:    sh s4, 0(s1)
-; RV64I-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 64
+; RV64I-NEXT:    sh a0, 6(s3)
+; RV64I-NEXT:    sh s1, 4(s3)
+; RV64I-NEXT:    sh s2, 2(s3)
+; RV64I-NEXT:    sh s4, 0(s3)
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: fold_srem_vec_1:
@@ -152,45 +148,45 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    lh a2, 0(a1)
 ; RV64IM-NEXT:    lui a3, %hi(.LCPI0_0)
 ; RV64IM-NEXT:    ld a3, %lo(.LCPI0_0)(a3)
-; RV64IM-NEXT:    lh a6, 24(a1)
-; RV64IM-NEXT:    lh a7, 16(a1)
+; RV64IM-NEXT:    lh a4, 24(a1)
+; RV64IM-NEXT:    lh a5, 16(a1)
 ; RV64IM-NEXT:    lh a1, 8(a1)
 ; RV64IM-NEXT:    mulh a3, a2, a3
 ; RV64IM-NEXT:    add a3, a3, a2
-; RV64IM-NEXT:    srli a4, a3, 63
+; RV64IM-NEXT:    srli a6, a3, 63
 ; RV64IM-NEXT:    srli a3, a3, 6
-; RV64IM-NEXT:    addw a3, a3, a4
-; RV64IM-NEXT:    lui a4, %hi(.LCPI0_1)
-; RV64IM-NEXT:    ld a4, %lo(.LCPI0_1)(a4)
-; RV64IM-NEXT:    li a5, 95
-; RV64IM-NEXT:    mulw a3, a3, a5
+; RV64IM-NEXT:    addw a3, a3, a6
+; RV64IM-NEXT:    lui a6, %hi(.LCPI0_1)
+; RV64IM-NEXT:    ld a6, %lo(.LCPI0_1)(a6)
+; RV64IM-NEXT:    li a7, 95
+; RV64IM-NEXT:    mulw a3, a3, a7
 ; RV64IM-NEXT:    subw a2, a2, a3
-; RV64IM-NEXT:    mulh a3, a1, a4
+; RV64IM-NEXT:    mulh a3, a1, a6
 ; RV64IM-NEXT:    sub a3, a3, a1
-; RV64IM-NEXT:    srli a4, a3, 63
+; RV64IM-NEXT:    srli a6, a3, 63
 ; RV64IM-NEXT:    srli a3, a3, 6
-; RV64IM-NEXT:    addw a3, a3, a4
-; RV64IM-NEXT:    lui a4, %hi(.LCPI0_2)
-; RV64IM-NEXT:    ld a4, %lo(.LCPI0_2)(a4)
-; RV64IM-NEXT:    li a5, -124
-; RV64IM-NEXT:    mulw a3, a3, a5
+; RV64IM-NEXT:    addw a3, a3, a6
+; RV64IM-NEXT:    lui a6, %hi(.LCPI0_2)
+; RV64IM-NEXT:    ld a6, %lo(.LCPI0_2)(a6)
+; RV64IM-NEXT:    li a7, -124
+; RV64IM-NEXT:    mulw a3, a3, a7
 ; RV64IM-NEXT:    subw a1, a1, a3
-; RV64IM-NEXT:    mulh a3, a7, a4
-; RV64IM-NEXT:    srli a4, a3, 63
+; RV64IM-NEXT:    mulh a3, a5, a6
+; RV64IM-NEXT:    srli a6, a3, 63
 ; RV64IM-NEXT:    srli a3, a3, 5
-; RV64IM-NEXT:    addw a3, a3, a4
-; RV64IM-NEXT:    lui a4, %hi(.LCPI0_3)
-; RV64IM-NEXT:    ld a4, %lo(.LCPI0_3)(a4)
-; RV64IM-NEXT:    li a5, 98
-; RV64IM-NEXT:    mulw a3, a3, a5
-; RV64IM-NEXT:    subw a3, a7, a3
-; RV64IM-NEXT:    mulh a4, a6, a4
-; RV64IM-NEXT:    srli a5, a4, 63
-; RV64IM-NEXT:    srli a4, a4, 7
-; RV64IM-NEXT:    addw a4, a4, a5
-; RV64IM-NEXT:    li a5, -1003
-; RV64IM-NEXT:    mulw a4, a4, a5
-; RV64IM-NEXT:    subw a4, a6, a4
+; RV64IM-NEXT:    addw a3, a3, a6
+; RV64IM-NEXT:    lui a6, %hi(.LCPI0_3)
+; RV64IM-NEXT:    ld a6, %lo(.LCPI0_3)(a6)
+; RV64IM-NEXT:    li a7, 98
+; RV64IM-NEXT:    mulw a3, a3, a7
+; RV64IM-NEXT:    subw a3, a5, a3
+; RV64IM-NEXT:    mulh a5, a4, a6
+; RV64IM-NEXT:    srli a6, a5, 63
+; RV64IM-NEXT:    srli a5, a5, 7
+; RV64IM-NEXT:    addw a5, a5, a6
+; RV64IM-NEXT:    li a6, -1003
+; RV64IM-NEXT:    mulw a5, a5, a6
+; RV64IM-NEXT:    subw a4, a4, a5
 ; RV64IM-NEXT:    sh a4, 6(a0)
 ; RV64IM-NEXT:    sh a3, 4(a0)
 ; RV64IM-NEXT:    sh a1, 2(a0)
@@ -210,126 +206,122 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lh s2, 12(a1)
-; RV32I-NEXT:    lh s3, 8(a1)
-; RV32I-NEXT:    lh s0, 4(a1)
+; RV32I-NEXT:    lh s0, 12(a1)
+; RV32I-NEXT:    lh s1, 8(a1)
+; RV32I-NEXT:    lh s2, 4(a1)
 ; RV32I-NEXT:    lh a2, 0(a1)
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    li a1, 95
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    call __modsi3@plt
 ; RV32I-NEXT:    mv s4, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __modsi3@plt
-; RV32I-NEXT:    mv s5, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __modsi3@plt
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __modsi3@plt
-; RV32I-NEXT:    sh a0, 6(s1)
-; RV32I-NEXT:    sh s0, 4(s1)
-; RV32I-NEXT:    sh s5, 2(s1)
-; RV32I-NEXT:    sh s4, 0(s1)
+; RV32I-NEXT:    sh a0, 6(s3)
+; RV32I-NEXT:    sh s1, 4(s3)
+; RV32I-NEXT:    sh s2, 2(s3)
+; RV32I-NEXT:    sh s4, 0(s3)
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: fold_srem_vec_2:
 ; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    lh a6, 12(a1)
+; RV32IM-NEXT:    lh a2, 12(a1)
 ; RV32IM-NEXT:    lh a3, 8(a1)
 ; RV32IM-NEXT:    lh a4, 0(a1)
 ; RV32IM-NEXT:    lh a1, 4(a1)
 ; RV32IM-NEXT:    lui a5, 706409
 ; RV32IM-NEXT:    addi a5, a5, 389
-; RV32IM-NEXT:    mulh a2, a4, a5
-; RV32IM-NEXT:    add a2, a2, a4
-; RV32IM-NEXT:    srli a7, a2, 31
-; RV32IM-NEXT:    srli a2, a2, 6
-; RV32IM-NEXT:    add a2, a2, a7
+; RV32IM-NEXT:    mulh a6, a4, a5
+; RV32IM-NEXT:    add a6, a6, a4
+; RV32IM-NEXT:    srli a7, a6, 31
+; RV32IM-NEXT:    srli a6, a6, 6
+; RV32IM-NEXT:    add a6, a6, a7
 ; RV32IM-NEXT:    li a7, 95
-; RV32IM-NEXT:    mul a2, a2, a7
-; RV32IM-NEXT:    sub t0, a4, a2
-; RV32IM-NEXT:    mulh a4, a1, a5
-; RV32IM-NEXT:    add a4, a4, a1
-; RV32IM-NEXT:    srli a2, a4, 31
-; RV32IM-NEXT:    srli a4, a4, 6
-; RV32IM-NEXT:    add a2, a4, a2
-; RV32IM-NEXT:    mul a2, a2, a7
-; RV32IM-NEXT:    sub a1, a1, a2
-; RV32IM-NEXT:    mulh a2, a3, a5
-; RV32IM-NEXT:    add a2, a2, a3
-; RV32IM-NEXT:    srli a4, a2, 31
-; RV32IM-NEXT:    srli a2, a2, 6
-; RV32IM-NEXT:    add a2, a2, a4
-; RV32IM-NEXT:    mul a2, a2, a7
-; RV32IM-NEXT:    sub a2, a3, a2
-; RV32IM-NEXT:    mulh a3, a6, a5
-; RV32IM-NEXT:    add a3, a3, a6
-; RV32IM-NEXT:    srli a4, a3, 31
-; RV32IM-NEXT:    srli a3, a3, 6
-; RV32IM-NEXT:    add a3, a3, a4
-; RV32IM-NEXT:    mul a3, a3, a7
-; RV32IM-NEXT:    sub a3, a6, a3
-; RV32IM-NEXT:    sh a3, 6(a0)
-; RV32IM-NEXT:    sh a2, 4(a0)
+; RV32IM-NEXT:    mul a6, a6, a7
+; RV32IM-NEXT:    sub a4, a4, a6
+; RV32IM-NEXT:    mulh a6, a1, a5
+; RV32IM-NEXT:    add a6, a6, a1
+; RV32IM-NEXT:    srli t0, a6, 31
+; RV32IM-NEXT:    srli a6, a6, 6
+; RV32IM-NEXT:    add a6, a6, t0
+; RV32IM-NEXT:    mul a6, a6, a7
+; RV32IM-NEXT:    sub a1, a1, a6
+; RV32IM-NEXT:    mulh a6, a3, a5
+; RV32IM-NEXT:    add a6, a6, a3
+; RV32IM-NEXT:    srli t0, a6, 31
+; RV32IM-NEXT:    srli a6, a6, 6
+; RV32IM-NEXT:    add a6, a6, t0
+; RV32IM-NEXT:    mul a6, a6, a7
+; RV32IM-NEXT:    sub a3, a3, a6
+; RV32IM-NEXT:    mulh a5, a2, a5
+; RV32IM-NEXT:    add a5, a5, a2
+; RV32IM-NEXT:    srli a6, a5, 31
+; RV32IM-NEXT:    srli a5, a5, 6
+; RV32IM-NEXT:    add a5, a5, a6
+; RV32IM-NEXT:    mul a5, a5, a7
+; RV32IM-NEXT:    sub a2, a2, a5
+; RV32IM-NEXT:    sh a2, 6(a0)
+; RV32IM-NEXT:    sh a3, 4(a0)
 ; RV32IM-NEXT:    sh a1, 2(a0)
-; RV32IM-NEXT:    sh t0, 0(a0)
+; RV32IM-NEXT:    sh a4, 0(a0)
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: fold_srem_vec_2:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -64
-; RV64I-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lh s2, 24(a1)
-; RV64I-NEXT:    lh s3, 16(a1)
-; RV64I-NEXT:    lh s0, 8(a1)
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lh s0, 24(a1)
+; RV64I-NEXT:    lh s1, 16(a1)
+; RV64I-NEXT:    lh s2, 8(a1)
 ; RV64I-NEXT:    lh a2, 0(a1)
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s3, a0
 ; RV64I-NEXT:    li a1, 95
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    call __moddi3@plt
 ; RV64I-NEXT:    mv s4, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __moddi3@plt
-; RV64I-NEXT:    mv s5, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __moddi3@plt
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __moddi3@plt
-; RV64I-NEXT:    sh a0, 6(s1)
-; RV64I-NEXT:    sh s0, 4(s1)
-; RV64I-NEXT:    sh s5, 2(s1)
-; RV64I-NEXT:    sh s4, 0(s1)
-; RV64I-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 64
+; RV64I-NEXT:    sh a0, 6(s3)
+; RV64I-NEXT:    sh s1, 4(s3)
+; RV64I-NEXT:    sh s2, 2(s3)
+; RV64I-NEXT:    sh s4, 0(s3)
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: fold_srem_vec_2:
@@ -337,42 +329,42 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    lh a2, 0(a1)
 ; RV64IM-NEXT:    lui a3, %hi(.LCPI1_0)
 ; RV64IM-NEXT:    ld a3, %lo(.LCPI1_0)(a3)
-; RV64IM-NEXT:    lh a6, 24(a1)
+; RV64IM-NEXT:    lh a4, 24(a1)
 ; RV64IM-NEXT:    lh a5, 16(a1)
 ; RV64IM-NEXT:    lh a1, 8(a1)
-; RV64IM-NEXT:    mulh a4, a2, a3
-; RV64IM-NEXT:    add a4, a4, a2
-; RV64IM-NEXT:    srli a7, a4, 63
-; RV64IM-NEXT:    srli a4, a4, 6
-; RV64IM-NEXT:    addw a4, a4, a7
+; RV64IM-NEXT:    mulh a6, a2, a3
+; RV64IM-NEXT:    add a6, a6, a2
+; RV64IM-NEXT:    srli a7, a6, 63
+; RV64IM-NEXT:    srli a6, a6, 6
+; RV64IM-NEXT:    addw a6, a6, a7
 ; RV64IM-NEXT:    li a7, 95
-; RV64IM-NEXT:    mulw a4, a4, a7
-; RV64IM-NEXT:    subw t0, a2, a4
-; RV64IM-NEXT:    mulh a4, a1, a3
-; RV64IM-NEXT:    add a4, a4, a1
-; RV64IM-NEXT:    srli a2, a4, 63
-; RV64IM-NEXT:    srli a4, a4, 6
-; RV64IM-NEXT:    addw a2, a4, a2
-; RV64IM-NEXT:    mulw a2, a2, a7
-; RV64IM-NEXT:    subw a1, a1, a2
-; RV64IM-NEXT:    mulh a2, a5, a3
-; RV64IM-NEXT:    add a2, a2, a5
-; RV64IM-NEXT:    srli a4, a2, 63
-; RV64IM-NEXT:    srli a2, a2, 6
-; RV64IM-NEXT:    addw a2, a2, a4
-; RV64IM-NEXT:    mulw a2, a2, a7
-; RV64IM-NEXT:    subw a2, a5, a2
-; RV64IM-NEXT:    mulh a3, a6, a3
-; RV64IM-NEXT:    add a3, a3, a6
-; RV64IM-NEXT:    srli a4, a3, 63
+; RV64IM-NEXT:    mulw a6, a6, a7
+; RV64IM-NEXT:    subw a2, a2, a6
+; RV64IM-NEXT:    mulh a6, a1, a3
+; RV64IM-NEXT:    add a6, a6, a1
+; RV64IM-NEXT:    srli t0, a6, 63
+; RV64IM-NEXT:    srli a6, a6, 6
+; RV64IM-NEXT:    addw a6, a6, t0
+; RV64IM-NEXT:    mulw a6, a6, a7
+; RV64IM-NEXT:    subw a1, a1, a6
+; RV64IM-NEXT:    mulh a6, a5, a3
+; RV64IM-NEXT:    add a6, a6, a5
+; RV64IM-NEXT:    srli t0, a6, 63
+; RV64IM-NEXT:    srli a6, a6, 6
+; RV64IM-NEXT:    addw a6, a6, t0
+; RV64IM-NEXT:    mulw a6, a6, a7
+; RV64IM-NEXT:    subw a5, a5, a6
+; RV64IM-NEXT:    mulh a3, a4, a3
+; RV64IM-NEXT:    add a3, a3, a4
+; RV64IM-NEXT:    srli a6, a3, 63
 ; RV64IM-NEXT:    srli a3, a3, 6
-; RV64IM-NEXT:    addw a3, a3, a4
+; RV64IM-NEXT:    addw a3, a3, a6
 ; RV64IM-NEXT:    mulw a3, a3, a7
-; RV64IM-NEXT:    subw a3, a6, a3
+; RV64IM-NEXT:    subw a3, a4, a3
 ; RV64IM-NEXT:    sh a3, 6(a0)
-; RV64IM-NEXT:    sh a2, 4(a0)
+; RV64IM-NEXT:    sh a5, 4(a0)
 ; RV64IM-NEXT:    sh a1, 2(a0)
-; RV64IM-NEXT:    sh t0, 0(a0)
+; RV64IM-NEXT:    sh a2, 0(a0)
 ; RV64IM-NEXT:    ret
   %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
   ret <4 x i16> %1
@@ -394,47 +386,46 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s9, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lh s2, 0(a1)
-; RV32I-NEXT:    lh s3, 4(a1)
-; RV32I-NEXT:    lh s4, 8(a1)
-; RV32I-NEXT:    lh s1, 12(a1)
+; RV32I-NEXT:    lh s1, 0(a1)
+; RV32I-NEXT:    lh s2, 4(a1)
+; RV32I-NEXT:    lh s3, 8(a1)
+; RV32I-NEXT:    lh s4, 12(a1)
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    call __modsi3@plt
 ; RV32I-NEXT:    mv s5, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s4
+; RV32I-NEXT:    mv a0, s3
 ; RV32I-NEXT:    call __modsi3@plt
 ; RV32I-NEXT:    mv s6, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __modsi3@plt
 ; RV32I-NEXT:    mv s7, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __modsi3@plt
 ; RV32I-NEXT:    mv s8, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    call __divsi3@plt
-; RV32I-NEXT:    mv s9, a0
-; RV32I-NEXT:    li a1, 95
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    call __divsi3@plt
 ; RV32I-NEXT:    mv s4, a0
 ; RV32I-NEXT:    li a1, 95
 ; RV32I-NEXT:    mv a0, s3
 ; RV32I-NEXT:    call __divsi3@plt
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    li a1, 95
 ; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __divsi3@plt
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    li a1, 95
+; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    call __divsi3@plt
 ; RV32I-NEXT:    add a0, s8, a0
-; RV32I-NEXT:    add a1, s7, s1
-; RV32I-NEXT:    add a2, s6, s4
-; RV32I-NEXT:    add a3, s5, s9
+; RV32I-NEXT:    add a1, s7, s2
+; RV32I-NEXT:    add a2, s6, s3
+; RV32I-NEXT:    add a3, s5, s4
 ; RV32I-NEXT:    sh a3, 6(s0)
 ; RV32I-NEXT:    sh a2, 4(s0)
 ; RV32I-NEXT:    sh a1, 2(s0)
@@ -449,127 +440,124 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: combine_srem_sdiv:
 ; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    lh a6, 0(a1)
+; RV32IM-NEXT:    lh a2, 0(a1)
 ; RV32IM-NEXT:    lh a3, 4(a1)
 ; RV32IM-NEXT:    lh a4, 12(a1)
 ; RV32IM-NEXT:    lh a1, 8(a1)
 ; RV32IM-NEXT:    lui a5, 706409
 ; RV32IM-NEXT:    addi a5, a5, 389
-; RV32IM-NEXT:    mulh a2, a4, a5
-; RV32IM-NEXT:    add a2, a2, a4
-; RV32IM-NEXT:    srli a7, a2, 31
-; RV32IM-NEXT:    srai a2, a2, 6
-; RV32IM-NEXT:    add t0, a2, a7
+; RV32IM-NEXT:    mulh a6, a4, a5
+; RV32IM-NEXT:    add a6, a6, a4
+; RV32IM-NEXT:    srli a7, a6, 31
+; RV32IM-NEXT:    srai a6, a6, 6
+; RV32IM-NEXT:    add a6, a6, a7
 ; RV32IM-NEXT:    li a7, 95
-; RV32IM-NEXT:    mul a2, t0, a7
-; RV32IM-NEXT:    sub t1, a4, a2
-; RV32IM-NEXT:    mulh a4, a1, a5
-; RV32IM-NEXT:    add a4, a4, a1
-; RV32IM-NEXT:    srli a2, a4, 31
-; RV32IM-NEXT:    srai a4, a4, 6
-; RV32IM-NEXT:    add a2, a4, a2
-; RV32IM-NEXT:    mul a4, a2, a7
-; RV32IM-NEXT:    sub t2, a1, a4
-; RV32IM-NEXT:    mulh a4, a3, a5
-; RV32IM-NEXT:    add a4, a4, a3
-; RV32IM-NEXT:    srli a1, a4, 31
-; RV32IM-NEXT:    srai a4, a4, 6
-; RV32IM-NEXT:    add a1, a4, a1
-; RV32IM-NEXT:    mul a4, a1, a7
-; RV32IM-NEXT:    sub a3, a3, a4
-; RV32IM-NEXT:    mulh a4, a6, a5
+; RV32IM-NEXT:    mul t0, a6, a7
+; RV32IM-NEXT:    sub a4, a4, t0
+; RV32IM-NEXT:    mulh t0, a1, a5
+; RV32IM-NEXT:    add t0, t0, a1
+; RV32IM-NEXT:    srli t1, t0, 31
+; RV32IM-NEXT:    srai t0, t0, 6
+; RV32IM-NEXT:    add t0, t0, t1
+; RV32IM-NEXT:    mul t1, t0, a7
+; RV32IM-NEXT:    sub a1, a1, t1
+; RV32IM-NEXT:    mulh t1, a3, a5
+; RV32IM-NEXT:    add t1, t1, a3
+; RV32IM-NEXT:    srli t2, t1, 31
+; RV32IM-NEXT:    srai t1, t1, 6
+; RV32IM-NEXT:    add t1, t1, t2
+; RV32IM-NEXT:    mul t2, t1, a7
+; RV32IM-NEXT:    sub a3, a3, t2
+; RV32IM-NEXT:    mulh a5, a2, a5
+; RV32IM-NEXT:    add a5, a5, a2
+; RV32IM-NEXT:    srli t2, a5, 31
+; RV32IM-NEXT:    srai a5, a5, 6
+; RV32IM-NEXT:    add a5, a5, t2
+; RV32IM-NEXT:    mul a7, a5, a7
+; RV32IM-NEXT:    sub a2, a2, a7
+; RV32IM-NEXT:    add a2, a2, a5
+; RV32IM-NEXT:    add a3, a3, t1
+; RV32IM-NEXT:    add a1, a1, t0
 ; RV32IM-NEXT:    add a4, a4, a6
-; RV32IM-NEXT:    srli a5, a4, 31
-; RV32IM-NEXT:    srai a4, a4, 6
-; RV32IM-NEXT:    add a4, a4, a5
-; RV32IM-NEXT:    mul a5, a4, a7
-; RV32IM-NEXT:    sub a5, a6, a5
-; RV32IM-NEXT:    add a4, a5, a4
-; RV32IM-NEXT:    add a1, a3, a1
-; RV32IM-NEXT:    add a2, t2, a2
-; RV32IM-NEXT:    add a3, t1, t0
-; RV32IM-NEXT:    sh a3, 6(a0)
-; RV32IM-NEXT:    sh a2, 4(a0)
-; RV32IM-NEXT:    sh a1, 2(a0)
-; RV32IM-NEXT:    sh a4, 0(a0)
+; RV32IM-NEXT:    sh a4, 6(a0)
+; RV32IM-NEXT:    sh a1, 4(a0)
+; RV32IM-NEXT:    sh a3, 2(a0)
+; RV32IM-NEXT:    sh a2, 0(a0)
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: combine_srem_sdiv:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -96
-; RV64I-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 64(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s3, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s5, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s6, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s7, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s8, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s9, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lh s2, 0(a1)
-; RV64I-NEXT:    lh s3, 8(a1)
-; RV64I-NEXT:    lh s4, 16(a1)
-; RV64I-NEXT:    lh s1, 24(a1)
+; RV64I-NEXT:    addi sp, sp, -80
+; RV64I-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s8, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lh s1, 0(a1)
+; RV64I-NEXT:    lh s2, 8(a1)
+; RV64I-NEXT:    lh s3, 16(a1)
+; RV64I-NEXT:    lh s4, 24(a1)
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s1
+; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    call __moddi3@plt
 ; RV64I-NEXT:    mv s5, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s4
+; RV64I-NEXT:    mv a0, s3
 ; RV64I-NEXT:    call __moddi3@plt
 ; RV64I-NEXT:    mv s6, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __moddi3@plt
 ; RV64I-NEXT:    mv s7, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __moddi3@plt
 ; RV64I-NEXT:    mv s8, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    call __divdi3@plt
-; RV64I-NEXT:    mv s9, a0
-; RV64I-NEXT:    li a1, 95
 ; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    call __divdi3@plt
 ; RV64I-NEXT:    mv s4, a0
 ; RV64I-NEXT:    li a1, 95
 ; RV64I-NEXT:    mv a0, s3
 ; RV64I-NEXT:    call __divdi3@plt
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s3, a0
 ; RV64I-NEXT:    li a1, 95
 ; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __divdi3@plt
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    li a1, 95
+; RV64I-NEXT:    mv a0, s1
+; RV64I-NEXT:    call __divdi3@plt
 ; RV64I-NEXT:    addw a0, s8, a0
-; RV64I-NEXT:    addw a1, s7, s1
-; RV64I-NEXT:    addw a2, s6, s4
-; RV64I-NEXT:    addw a3, s5, s9
+; RV64I-NEXT:    addw a1, s7, s2
+; RV64I-NEXT:    addw a2, s6, s3
+; RV64I-NEXT:    addw a3, s5, s4
 ; RV64I-NEXT:    sh a3, 6(s0)
 ; RV64I-NEXT:    sh a2, 4(s0)
 ; RV64I-NEXT:    sh a1, 2(s0)
 ; RV64I-NEXT:    sh a0, 0(s0)
-; RV64I-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s9, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 96
+; RV64I-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 80
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: combine_srem_sdiv:
@@ -577,45 +565,45 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    lh a2, 24(a1)
 ; RV64IM-NEXT:    lui a3, %hi(.LCPI2_0)
 ; RV64IM-NEXT:    ld a3, %lo(.LCPI2_0)(a3)
-; RV64IM-NEXT:    lh a6, 0(a1)
+; RV64IM-NEXT:    lh a4, 0(a1)
 ; RV64IM-NEXT:    lh a5, 8(a1)
 ; RV64IM-NEXT:    lh a1, 16(a1)
-; RV64IM-NEXT:    mulh a4, a2, a3
-; RV64IM-NEXT:    add a4, a4, a2
-; RV64IM-NEXT:    srli a7, a4, 63
-; RV64IM-NEXT:    srai a4, a4, 6
-; RV64IM-NEXT:    addw t0, a4, a7
+; RV64IM-NEXT:    mulh a6, a2, a3
+; RV64IM-NEXT:    add a6, a6, a2
+; RV64IM-NEXT:    srli a7, a6, 63
+; RV64IM-NEXT:    srai a6, a6, 6
+; RV64IM-NEXT:    addw a6, a6, a7
 ; RV64IM-NEXT:    li a7, 95
-; RV64IM-NEXT:    mulw a4, t0, a7
-; RV64IM-NEXT:    subw t1, a2, a4
-; RV64IM-NEXT:    mulh a4, a1, a3
-; RV64IM-NEXT:    add a4, a4, a1
-; RV64IM-NEXT:    srli a2, a4, 63
-; RV64IM-NEXT:    srai a4, a4, 6
-; RV64IM-NEXT:    addw a2, a4, a2
-; RV64IM-NEXT:    mulw a4, a2, a7
-; RV64IM-NEXT:    subw t2, a1, a4
-; RV64IM-NEXT:    mulh a4, a5, a3
-; RV64IM-NEXT:    add a4, a4, a5
-; RV64IM-NEXT:    srli a1, a4, 63
-; RV64IM-NEXT:    srai a4, a4, 6
-; RV64IM-NEXT:    addw a1, a4, a1
-; RV64IM-NEXT:    mulw a4, a1, a7
-; RV64IM-NEXT:    subw a4, a5, a4
-; RV64IM-NEXT:    mulh a3, a6, a3
-; RV64IM-NEXT:    add a3, a3, a6
-; RV64IM-NEXT:    srli a5, a3, 63
+; RV64IM-NEXT:    mulw t0, a6, a7
+; RV64IM-NEXT:    subw a2, a2, t0
+; RV64IM-NEXT:    mulh t0, a1, a3
+; RV64IM-NEXT:    add t0, t0, a1
+; RV64IM-NEXT:    srli t1, t0, 63
+; RV64IM-NEXT:    srai t0, t0, 6
+; RV64IM-NEXT:    addw t0, t0, t1
+; RV64IM-NEXT:    mulw t1, t0, a7
+; RV64IM-NEXT:    subw a1, a1, t1
+; RV64IM-NEXT:    mulh t1, a5, a3
+; RV64IM-NEXT:    add t1, t1, a5
+; RV64IM-NEXT:    srli t2, t1, 63
+; RV64IM-NEXT:    srai t1, t1, 6
+; RV64IM-NEXT:    addw t1, t1, t2
+; RV64IM-NEXT:    mulw t2, t1, a7
+; RV64IM-NEXT:    subw a5, a5, t2
+; RV64IM-NEXT:    mulh a3, a4, a3
+; RV64IM-NEXT:    add a3, a3, a4
+; RV64IM-NEXT:    srli t2, a3, 63
 ; RV64IM-NEXT:    srai a3, a3, 6
-; RV64IM-NEXT:    addw a3, a3, a5
-; RV64IM-NEXT:    mulw a5, a3, a7
-; RV64IM-NEXT:    subw a5, a6, a5
-; RV64IM-NEXT:    addw a3, a5, a3
-; RV64IM-NEXT:    addw a1, a4, a1
-; RV64IM-NEXT:    addw a2, t2, a2
-; RV64IM-NEXT:    addw a4, t1, t0
-; RV64IM-NEXT:    sh a4, 6(a0)
-; RV64IM-NEXT:    sh a2, 4(a0)
-; RV64IM-NEXT:    sh a1, 2(a0)
+; RV64IM-NEXT:    addw a3, a3, t2
+; RV64IM-NEXT:    mulw a7, a3, a7
+; RV64IM-NEXT:    subw a4, a4, a7
+; RV64IM-NEXT:    addw a3, a4, a3
+; RV64IM-NEXT:    addw a4, a5, t1
+; RV64IM-NEXT:    addw a1, a1, t0
+; RV64IM-NEXT:    addw a2, a2, a6
+; RV64IM-NEXT:    sh a2, 6(a0)
+; RV64IM-NEXT:    sh a1, 4(a0)
+; RV64IM-NEXT:    sh a4, 2(a0)
 ; RV64IM-NEXT:    sh a3, 0(a0)
 ; RV64IM-NEXT:    ret
   %1 = srem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
@@ -642,21 +630,21 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    srli a4, a2, 26
 ; RV32I-NEXT:    add a4, a2, a4
 ; RV32I-NEXT:    andi a4, a4, -64
-; RV32I-NEXT:    sub s2, a2, a4
+; RV32I-NEXT:    sub s1, a2, a4
 ; RV32I-NEXT:    srli a2, a1, 27
 ; RV32I-NEXT:    add a2, a1, a2
 ; RV32I-NEXT:    andi a2, a2, -32
-; RV32I-NEXT:    sub s3, a1, a2
+; RV32I-NEXT:    sub s2, a1, a2
 ; RV32I-NEXT:    srli a1, a3, 29
 ; RV32I-NEXT:    add a1, a3, a1
 ; RV32I-NEXT:    andi a1, a1, -8
-; RV32I-NEXT:    sub s1, a3, a1
+; RV32I-NEXT:    sub s3, a3, a1
 ; RV32I-NEXT:    li a1, 95
 ; RV32I-NEXT:    call __modsi3@plt
 ; RV32I-NEXT:    sh a0, 6(s0)
-; RV32I-NEXT:    sh s1, 4(s0)
-; RV32I-NEXT:    sh s3, 2(s0)
-; RV32I-NEXT:    sh s2, 0(s0)
+; RV32I-NEXT:    sh s3, 4(s0)
+; RV32I-NEXT:    sh s2, 2(s0)
+; RV32I-NEXT:    sh s1, 0(s0)
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -677,9 +665,9 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
 ; RV32IM-NEXT:    add a5, a5, a4
 ; RV32IM-NEXT:    srli a6, a5, 31
 ; RV32IM-NEXT:    srli a5, a5, 6
-; RV32IM-NEXT:    add a6, a5, a6
-; RV32IM-NEXT:    li a5, 95
-; RV32IM-NEXT:    mul a5, a6, a5
+; RV32IM-NEXT:    add a5, a5, a6
+; RV32IM-NEXT:    li a6, 95
+; RV32IM-NEXT:    mul a5, a5, a6
 ; RV32IM-NEXT:    sub a4, a4, a5
 ; RV32IM-NEXT:    srli a5, a1, 26
 ; RV32IM-NEXT:    add a5, a1, a5
@@ -715,21 +703,21 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    srli a4, a2, 58
 ; RV64I-NEXT:    add a4, a2, a4
 ; RV64I-NEXT:    andi a4, a4, -64
-; RV64I-NEXT:    subw s2, a2, a4
+; RV64I-NEXT:    subw s1, a2, a4
 ; RV64I-NEXT:    srli a2, a1, 59
 ; RV64I-NEXT:    add a2, a1, a2
 ; RV64I-NEXT:    andi a2, a2, -32
-; RV64I-NEXT:    subw s3, a1, a2
+; RV64I-NEXT:    subw s2, a1, a2
 ; RV64I-NEXT:    srli a1, a3, 61
 ; RV64I-NEXT:    add a1, a3, a1
 ; RV64I-NEXT:    andi a1, a1, -8
-; RV64I-NEXT:    subw s1, a3, a1
+; RV64I-NEXT:    subw s3, a3, a1
 ; RV64I-NEXT:    li a1, 95
 ; RV64I-NEXT:    call __moddi3@plt
 ; RV64I-NEXT:    sh a0, 6(s0)
-; RV64I-NEXT:    sh s1, 4(s0)
-; RV64I-NEXT:    sh s3, 2(s0)
-; RV64I-NEXT:    sh s2, 0(s0)
+; RV64I-NEXT:    sh s3, 4(s0)
+; RV64I-NEXT:    sh s2, 2(s0)
+; RV64I-NEXT:    sh s1, 0(s0)
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
@@ -750,9 +738,9 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    add a3, a3, a2
 ; RV64IM-NEXT:    srli a6, a3, 63
 ; RV64IM-NEXT:    srli a3, a3, 6
-; RV64IM-NEXT:    addw a6, a3, a6
-; RV64IM-NEXT:    li a3, 95
-; RV64IM-NEXT:    mulw a3, a6, a3
+; RV64IM-NEXT:    addw a3, a3, a6
+; RV64IM-NEXT:    li a6, 95
+; RV64IM-NEXT:    mulw a3, a3, a6
 ; RV64IM-NEXT:    subw a2, a2, a3
 ; RV64IM-NEXT:    srli a3, a1, 58
 ; RV64IM-NEXT:    add a3, a1, a3
@@ -785,10 +773,10 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lh s2, 12(a1)
+; RV32I-NEXT:    lh s0, 12(a1)
 ; RV32I-NEXT:    lh s1, 8(a1)
 ; RV32I-NEXT:    lh a2, 4(a1)
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    li a1, 654
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    call __modsi3@plt
@@ -799,12 +787,12 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lui a0, 1
 ; RV32I-NEXT:    addi a1, a0, 1327
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __modsi3@plt
-; RV32I-NEXT:    sh a0, 6(s0)
-; RV32I-NEXT:    sh s1, 4(s0)
-; RV32I-NEXT:    sh s3, 2(s0)
-; RV32I-NEXT:    sh zero, 0(s0)
+; RV32I-NEXT:    sh a0, 6(s2)
+; RV32I-NEXT:    sh s1, 4(s2)
+; RV32I-NEXT:    sh s3, 2(s2)
+; RV32I-NEXT:    sh zero, 0(s2)
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -862,10 +850,10 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lh s2, 24(a1)
+; RV64I-NEXT:    lh s0, 24(a1)
 ; RV64I-NEXT:    lh s1, 16(a1)
 ; RV64I-NEXT:    lh a2, 8(a1)
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    li a1, 654
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    call __moddi3@plt
@@ -876,12 +864,12 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lui a0, 1
 ; RV64I-NEXT:    addiw a1, a0, 1327
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __moddi3@plt
-; RV64I-NEXT:    sh a0, 6(s0)
-; RV64I-NEXT:    sh s1, 4(s0)
-; RV64I-NEXT:    sh s3, 2(s0)
-; RV64I-NEXT:    sh zero, 0(s0)
+; RV64I-NEXT:    sh a0, 6(s2)
+; RV64I-NEXT:    sh s1, 4(s2)
+; RV64I-NEXT:    sh s3, 2(s2)
+; RV64I-NEXT:    sh zero, 0(s2)
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
@@ -895,7 +883,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    lh a2, 16(a1)
 ; RV64IM-NEXT:    lui a3, %hi(.LCPI4_0)
 ; RV64IM-NEXT:    ld a3, %lo(.LCPI4_0)(a3)
-; RV64IM-NEXT:    lh a6, 24(a1)
+; RV64IM-NEXT:    lh a4, 24(a1)
 ; RV64IM-NEXT:    lh a1, 8(a1)
 ; RV64IM-NEXT:    mulh a3, a2, a3
 ; RV64IM-NEXT:    add a3, a3, a2
@@ -904,26 +892,26 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    addw a3, a3, a5
 ; RV64IM-NEXT:    lui a5, %hi(.LCPI4_1)
 ; RV64IM-NEXT:    ld a5, %lo(.LCPI4_1)(a5)
-; RV64IM-NEXT:    li a4, 23
-; RV64IM-NEXT:    mulw a3, a3, a4
+; RV64IM-NEXT:    li a6, 23
+; RV64IM-NEXT:    mulw a3, a3, a6
 ; RV64IM-NEXT:    subw a2, a2, a3
 ; RV64IM-NEXT:    mulh a3, a1, a5
-; RV64IM-NEXT:    srli a4, a3, 63
+; RV64IM-NEXT:    srli a5, a3, 63
 ; RV64IM-NEXT:    srli a3, a3, 8
-; RV64IM-NEXT:    addw a3, a3, a4
-; RV64IM-NEXT:    lui a4, %hi(.LCPI4_2)
-; RV64IM-NEXT:    ld a4, %lo(.LCPI4_2)(a4)
-; RV64IM-NEXT:    li a5, 654
-; RV64IM-NEXT:    mulw a3, a3, a5
+; RV64IM-NEXT:    addw a3, a3, a5
+; RV64IM-NEXT:    lui a5, %hi(.LCPI4_2)
+; RV64IM-NEXT:    ld a5, %lo(.LCPI4_2)(a5)
+; RV64IM-NEXT:    li a6, 654
+; RV64IM-NEXT:    mulw a3, a3, a6
 ; RV64IM-NEXT:    subw a1, a1, a3
-; RV64IM-NEXT:    mulh a3, a6, a4
-; RV64IM-NEXT:    srli a4, a3, 63
+; RV64IM-NEXT:    mulh a3, a4, a5
+; RV64IM-NEXT:    srli a5, a3, 63
 ; RV64IM-NEXT:    srli a3, a3, 11
-; RV64IM-NEXT:    addw a3, a3, a4
-; RV64IM-NEXT:    lui a4, 1
-; RV64IM-NEXT:    addiw a4, a4, 1327
-; RV64IM-NEXT:    mulw a3, a3, a4
-; RV64IM-NEXT:    subw a3, a6, a3
+; RV64IM-NEXT:    addw a3, a3, a5
+; RV64IM-NEXT:    lui a5, 1
+; RV64IM-NEXT:    addiw a5, a5, 1327
+; RV64IM-NEXT:    mulw a3, a3, a5
+; RV64IM-NEXT:    subw a3, a4, a3
 ; RV64IM-NEXT:    sh zero, 0(a0)
 ; RV64IM-NEXT:    sh a3, 6(a0)
 ; RV64IM-NEXT:    sh a1, 2(a0)
@@ -945,7 +933,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lh a2, 4(a1)
 ; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    lh s2, 12(a1)
+; RV32I-NEXT:    lh s1, 12(a1)
 ; RV32I-NEXT:    lh a0, 8(a1)
 ; RV32I-NEXT:    srli a1, a2, 17
 ; RV32I-NEXT:    add a1, a2, a1
@@ -954,13 +942,13 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sub s3, a2, a1
 ; RV32I-NEXT:    li a1, 23
 ; RV32I-NEXT:    call __modsi3@plt
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    lui a0, 1
 ; RV32I-NEXT:    addi a1, a0, 1327
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __modsi3@plt
 ; RV32I-NEXT:    sh a0, 6(s0)
-; RV32I-NEXT:    sh s1, 4(s0)
+; RV32I-NEXT:    sh s2, 4(s0)
 ; RV32I-NEXT:    sh zero, 0(s0)
 ; RV32I-NEXT:    sh s3, 2(s0)
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
@@ -1017,7 +1005,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    lh a2, 8(a1)
 ; RV64I-NEXT:    mv s0, a0
-; RV64I-NEXT:    lh s2, 24(a1)
+; RV64I-NEXT:    lh s1, 24(a1)
 ; RV64I-NEXT:    lh a0, 16(a1)
 ; RV64I-NEXT:    srli a1, a2, 49
 ; RV64I-NEXT:    add a1, a2, a1
@@ -1026,13 +1014,13 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    subw s3, a2, a1
 ; RV64I-NEXT:    li a1, 23
 ; RV64I-NEXT:    call __moddi3@plt
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    lui a0, 1
 ; RV64I-NEXT:    addiw a1, a0, 1327
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __moddi3@plt
 ; RV64I-NEXT:    sh a0, 6(s0)
-; RV64I-NEXT:    sh s1, 4(s0)
+; RV64I-NEXT:    sh s2, 4(s0)
 ; RV64I-NEXT:    sh zero, 0(s0)
 ; RV64I-NEXT:    sh s3, 2(s0)
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
@@ -1054,13 +1042,13 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    srli a5, a3, 63
 ; RV64IM-NEXT:    srli a3, a3, 4
 ; RV64IM-NEXT:    addw a3, a3, a5
-; RV64IM-NEXT:    li a6, 23
-; RV64IM-NEXT:    lui a5, %hi(.LCPI5_1)
-; RV64IM-NEXT:    ld a5, %lo(.LCPI5_1)(a5)
-; RV64IM-NEXT:    mulw a3, a3, a6
+; RV64IM-NEXT:    li a5, 23
+; RV64IM-NEXT:    lui a6, %hi(.LCPI5_1)
+; RV64IM-NEXT:    ld a6, %lo(.LCPI5_1)(a6)
+; RV64IM-NEXT:    mulw a3, a3, a5
 ; RV64IM-NEXT:    lh a1, 8(a1)
 ; RV64IM-NEXT:    subw a2, a2, a3
-; RV64IM-NEXT:    mulh a3, a4, a5
+; RV64IM-NEXT:    mulh a3, a4, a6
 ; RV64IM-NEXT:    srli a5, a3, 63
 ; RV64IM-NEXT:    srli a3, a3, 11
 ; RV64IM-NEXT:    addw a3, a3, a5
@@ -1097,16 +1085,15 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
 ; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s9, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw s2, 24(a1)
-; RV32I-NEXT:    lw s3, 28(a1)
-; RV32I-NEXT:    lw s4, 16(a1)
-; RV32I-NEXT:    lw s5, 20(a1)
-; RV32I-NEXT:    lw s6, 8(a1)
-; RV32I-NEXT:    lw s1, 12(a1)
+; RV32I-NEXT:    lw s0, 24(a1)
+; RV32I-NEXT:    lw s1, 28(a1)
+; RV32I-NEXT:    lw s2, 16(a1)
+; RV32I-NEXT:    lw s3, 20(a1)
+; RV32I-NEXT:    lw s4, 8(a1)
+; RV32I-NEXT:    lw s5, 12(a1)
 ; RV32I-NEXT:    lw a3, 0(a1)
 ; RV32I-NEXT:    lw a1, 4(a1)
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s6, a0
 ; RV32I-NEXT:    li a2, 1
 ; RV32I-NEXT:    mv a0, a3
 ; RV32I-NEXT:    li a3, 0
@@ -1114,33 +1101,33 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
 ; RV32I-NEXT:    mv s7, a0
 ; RV32I-NEXT:    mv s8, a1
 ; RV32I-NEXT:    li a2, 654
-; RV32I-NEXT:    mv a0, s6
-; RV32I-NEXT:    mv a1, s1
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    call __moddi3@plt
-; RV32I-NEXT:    mv s6, a0
-; RV32I-NEXT:    mv s9, a1
-; RV32I-NEXT:    li a2, 23
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s5
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __moddi3@plt
 ; RV32I-NEXT:    mv s4, a0
-; RV32I-NEXT:    mv s1, a1
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a2, a0, 1327
+; RV32I-NEXT:    mv s5, a1
+; RV32I-NEXT:    li a2, 23
 ; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __moddi3@plt
-; RV32I-NEXT:    sw a1, 28(s0)
-; RV32I-NEXT:    sw a0, 24(s0)
-; RV32I-NEXT:    sw s1, 20(s0)
-; RV32I-NEXT:    sw s4, 16(s0)
-; RV32I-NEXT:    sw s9, 12(s0)
-; RV32I-NEXT:    sw s6, 8(s0)
-; RV32I-NEXT:    sw s8, 4(s0)
-; RV32I-NEXT:    sw s7, 0(s0)
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s3, a1
+; RV32I-NEXT:    lui a0, 1
+; RV32I-NEXT:    addi a2, a0, 1327
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __moddi3@plt
+; RV32I-NEXT:    sw a1, 28(s6)
+; RV32I-NEXT:    sw a0, 24(s6)
+; RV32I-NEXT:    sw s3, 20(s6)
+; RV32I-NEXT:    sw s2, 16(s6)
+; RV32I-NEXT:    sw s5, 12(s6)
+; RV32I-NEXT:    sw s4, 8(s6)
+; RV32I-NEXT:    sw s8, 4(s6)
+; RV32I-NEXT:    sw s7, 0(s6)
 ; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
@@ -1151,7 +1138,6 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
 ; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
 ;
@@ -1168,16 +1154,15 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
 ; RV32IM-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
 ; RV32IM-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
 ; RV32IM-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s9, 4(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw s2, 24(a1)
-; RV32IM-NEXT:    lw s3, 28(a1)
-; RV32IM-NEXT:    lw s4, 16(a1)
-; RV32IM-NEXT:    lw s5, 20(a1)
-; RV32IM-NEXT:    lw s6, 8(a1)
-; RV32IM-NEXT:    lw s1, 12(a1)
+; RV32IM-NEXT:    lw s0, 24(a1)
+; RV32IM-NEXT:    lw s1, 28(a1)
+; RV32IM-NEXT:    lw s2, 16(a1)
+; RV32IM-NEXT:    lw s3, 20(a1)
+; RV32IM-NEXT:    lw s4, 8(a1)
+; RV32IM-NEXT:    lw s5, 12(a1)
 ; RV32IM-NEXT:    lw a3, 0(a1)
 ; RV32IM-NEXT:    lw a1, 4(a1)
-; RV32IM-NEXT:    mv s0, a0
+; RV32IM-NEXT:    mv s6, a0
 ; RV32IM-NEXT:    li a2, 1
 ; RV32IM-NEXT:    mv a0, a3
 ; RV32IM-NEXT:    li a3, 0
@@ -1185,33 +1170,33 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
 ; RV32IM-NEXT:    mv s7, a0
 ; RV32IM-NEXT:    mv s8, a1
 ; RV32IM-NEXT:    li a2, 654
-; RV32IM-NEXT:    mv a0, s6
-; RV32IM-NEXT:    mv a1, s1
-; RV32IM-NEXT:    li a3, 0
-; RV32IM-NEXT:    call __moddi3@plt
-; RV32IM-NEXT:    mv s6, a0
-; RV32IM-NEXT:    mv s9, a1
-; RV32IM-NEXT:    li a2, 23
 ; RV32IM-NEXT:    mv a0, s4
 ; RV32IM-NEXT:    mv a1, s5
 ; RV32IM-NEXT:    li a3, 0
 ; RV32IM-NEXT:    call __moddi3@plt
 ; RV32IM-NEXT:    mv s4, a0
-; RV32IM-NEXT:    mv s1, a1
-; RV32IM-NEXT:    lui a0, 1
-; RV32IM-NEXT:    addi a2, a0, 1327
+; RV32IM-NEXT:    mv s5, a1
+; RV32IM-NEXT:    li a2, 23
 ; RV32IM-NEXT:    mv a0, s2
 ; RV32IM-NEXT:    mv a1, s3
 ; RV32IM-NEXT:    li a3, 0
 ; RV32IM-NEXT:    call __moddi3@plt
-; RV32IM-NEXT:    sw a1, 28(s0)
-; RV32IM-NEXT:    sw a0, 24(s0)
-; RV32IM-NEXT:    sw s1, 20(s0)
-; RV32IM-NEXT:    sw s4, 16(s0)
-; RV32IM-NEXT:    sw s9, 12(s0)
-; RV32IM-NEXT:    sw s6, 8(s0)
-; RV32IM-NEXT:    sw s8, 4(s0)
-; RV32IM-NEXT:    sw s7, 0(s0)
+; RV32IM-NEXT:    mv s2, a0
+; RV32IM-NEXT:    mv s3, a1
+; RV32IM-NEXT:    lui a0, 1
+; RV32IM-NEXT:    addi a2, a0, 1327
+; RV32IM-NEXT:    mv a0, s0
+; RV32IM-NEXT:    mv a1, s1
+; RV32IM-NEXT:    li a3, 0
+; RV32IM-NEXT:    call __moddi3@plt
+; RV32IM-NEXT:    sw a1, 28(s6)
+; RV32IM-NEXT:    sw a0, 24(s6)
+; RV32IM-NEXT:    sw s3, 20(s6)
+; RV32IM-NEXT:    sw s2, 16(s6)
+; RV32IM-NEXT:    sw s5, 12(s6)
+; RV32IM-NEXT:    sw s4, 8(s6)
+; RV32IM-NEXT:    sw s8, 4(s6)
+; RV32IM-NEXT:    sw s7, 0(s6)
 ; RV32IM-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
@@ -1222,7 +1207,6 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
 ; RV32IM-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    addi sp, sp, 48
 ; RV32IM-NEXT:    ret
 ;
@@ -1234,10 +1218,10 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    ld s2, 24(a1)
+; RV64I-NEXT:    ld s0, 24(a1)
 ; RV64I-NEXT:    ld s1, 16(a1)
 ; RV64I-NEXT:    ld a2, 8(a1)
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    li a1, 654
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    call __moddi3@plt
@@ -1248,12 +1232,12 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lui a0, 1
 ; RV64I-NEXT:    addiw a1, a0, 1327
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __moddi3@plt
-; RV64I-NEXT:    sd a0, 24(s0)
-; RV64I-NEXT:    sd s1, 16(s0)
-; RV64I-NEXT:    sd s3, 8(s0)
-; RV64I-NEXT:    sd zero, 0(s0)
+; RV64I-NEXT:    sd a0, 24(s2)
+; RV64I-NEXT:    sd s1, 16(s2)
+; RV64I-NEXT:    sd s3, 8(s2)
+; RV64I-NEXT:    sd zero, 0(s2)
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
@@ -1267,7 +1251,7 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
 ; RV64IM-NEXT:    ld a2, 16(a1)
 ; RV64IM-NEXT:    lui a3, %hi(.LCPI6_0)
 ; RV64IM-NEXT:    ld a3, %lo(.LCPI6_0)(a3)
-; RV64IM-NEXT:    ld a6, 24(a1)
+; RV64IM-NEXT:    ld a4, 24(a1)
 ; RV64IM-NEXT:    ld a1, 8(a1)
 ; RV64IM-NEXT:    mulh a3, a2, a3
 ; RV64IM-NEXT:    add a3, a3, a2
@@ -1276,26 +1260,26 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
 ; RV64IM-NEXT:    add a3, a3, a5
 ; RV64IM-NEXT:    lui a5, %hi(.LCPI6_1)
 ; RV64IM-NEXT:    ld a5, %lo(.LCPI6_1)(a5)
-; RV64IM-NEXT:    li a4, 23
-; RV64IM-NEXT:    mul a3, a3, a4
+; RV64IM-NEXT:    li a6, 23
+; RV64IM-NEXT:    mul a3, a3, a6
 ; RV64IM-NEXT:    sub a2, a2, a3
 ; RV64IM-NEXT:    mulh a3, a1, a5
-; RV64IM-NEXT:    srli a4, a3, 63
+; RV64IM-NEXT:    srli a5, a3, 63
 ; RV64IM-NEXT:    srai a3, a3, 8
-; RV64IM-NEXT:    add a3, a3, a4
-; RV64IM-NEXT:    lui a4, %hi(.LCPI6_2)
-; RV64IM-NEXT:    ld a4, %lo(.LCPI6_2)(a4)
-; RV64IM-NEXT:    li a5, 654
-; RV64IM-NEXT:    mul a3, a3, a5
+; RV64IM-NEXT:    add a3, a3, a5
+; RV64IM-NEXT:    lui a5, %hi(.LCPI6_2)
+; RV64IM-NEXT:    ld a5, %lo(.LCPI6_2)(a5)
+; RV64IM-NEXT:    li a6, 654
+; RV64IM-NEXT:    mul a3, a3, a6
 ; RV64IM-NEXT:    sub a1, a1, a3
-; RV64IM-NEXT:    mulh a3, a6, a4
-; RV64IM-NEXT:    srli a4, a3, 63
+; RV64IM-NEXT:    mulh a3, a4, a5
+; RV64IM-NEXT:    srli a5, a3, 63
 ; RV64IM-NEXT:    srai a3, a3, 11
-; RV64IM-NEXT:    add a3, a3, a4
-; RV64IM-NEXT:    lui a4, 1
-; RV64IM-NEXT:    addiw a4, a4, 1327
-; RV64IM-NEXT:    mul a3, a3, a4
-; RV64IM-NEXT:    sub a3, a6, a3
+; RV64IM-NEXT:    add a3, a3, a5
+; RV64IM-NEXT:    lui a5, 1
+; RV64IM-NEXT:    addiw a5, a5, 1327
+; RV64IM-NEXT:    mul a3, a3, a5
+; RV64IM-NEXT:    sub a3, a4, a3
 ; RV64IM-NEXT:    sd zero, 0(a0)
 ; RV64IM-NEXT:    sd a3, 24(a0)
 ; RV64IM-NEXT:    sd a1, 8(a0)
diff --git a/llvm/test/CodeGen/RISCV/ssub_sat.ll b/llvm/test/CodeGen/RISCV/ssub_sat.ll
index 7fe062176ec4f..b250773b30978 100644
--- a/llvm/test/CodeGen/RISCV/ssub_sat.ll
+++ b/llvm/test/CodeGen/RISCV/ssub_sat.ll
@@ -156,16 +156,16 @@ define i64 @func2(i64 %x, i64 %y) nounwind {
 ; RV32IZbbZbt-NEXT:    sltu a4, a0, a2
 ; RV32IZbbZbt-NEXT:    sub a5, a1, a3
 ; RV32IZbbZbt-NEXT:    sub a4, a5, a4
-; RV32IZbbZbt-NEXT:    srai a6, a4, 31
-; RV32IZbbZbt-NEXT:    lui a5, 524288
-; RV32IZbbZbt-NEXT:    xor a7, a6, a5
-; RV32IZbbZbt-NEXT:    xor a5, a1, a4
+; RV32IZbbZbt-NEXT:    srai a5, a4, 31
+; RV32IZbbZbt-NEXT:    lui a6, 524288
+; RV32IZbbZbt-NEXT:    xor a6, a5, a6
+; RV32IZbbZbt-NEXT:    xor a7, a1, a4
 ; RV32IZbbZbt-NEXT:    xor a1, a1, a3
-; RV32IZbbZbt-NEXT:    and a1, a1, a5
+; RV32IZbbZbt-NEXT:    and a1, a1, a7
 ; RV32IZbbZbt-NEXT:    slti a3, a1, 0
-; RV32IZbbZbt-NEXT:    cmov a1, a3, a7, a4
+; RV32IZbbZbt-NEXT:    cmov a1, a3, a6, a4
 ; RV32IZbbZbt-NEXT:    sub a0, a0, a2
-; RV32IZbbZbt-NEXT:    cmov a0, a3, a6, a0
+; RV32IZbbZbt-NEXT:    cmov a0, a3, a5, a0
 ; RV32IZbbZbt-NEXT:    ret
 ;
 ; RV64IZbbZbt-LABEL: func2:
diff --git a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
index 662eacc27b6aa..a7c366ce1679f 100644
--- a/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
+++ b/llvm/test/CodeGen/RISCV/ssub_sat_plus.ll
@@ -164,16 +164,16 @@ define i64 @func64(i64 %x, i64 %y, i64 %z) nounwind {
 ; RV32IZbbZbt-NEXT:    sltu a2, a0, a4
 ; RV32IZbbZbt-NEXT:    sub a3, a1, a5
 ; RV32IZbbZbt-NEXT:    sub a2, a3, a2
-; RV32IZbbZbt-NEXT:    srai a6, a2, 31
-; RV32IZbbZbt-NEXT:    lui a3, 524288
-; RV32IZbbZbt-NEXT:    xor a7, a6, a3
-; RV32IZbbZbt-NEXT:    xor a3, a1, a2
+; RV32IZbbZbt-NEXT:    srai a3, a2, 31
+; RV32IZbbZbt-NEXT:    lui a6, 524288
+; RV32IZbbZbt-NEXT:    xor a6, a3, a6
+; RV32IZbbZbt-NEXT:    xor a7, a1, a2
 ; RV32IZbbZbt-NEXT:    xor a1, a1, a5
-; RV32IZbbZbt-NEXT:    and a1, a1, a3
-; RV32IZbbZbt-NEXT:    slti a3, a1, 0
-; RV32IZbbZbt-NEXT:    cmov a1, a3, a7, a2
+; RV32IZbbZbt-NEXT:    and a1, a1, a7
+; RV32IZbbZbt-NEXT:    slti a5, a1, 0
+; RV32IZbbZbt-NEXT:    cmov a1, a5, a6, a2
 ; RV32IZbbZbt-NEXT:    sub a0, a0, a4
-; RV32IZbbZbt-NEXT:    cmov a0, a3, a6, a0
+; RV32IZbbZbt-NEXT:    cmov a0, a5, a3, a0
 ; RV32IZbbZbt-NEXT:    ret
 ;
 ; RV64IZbbZbt-LABEL: func64:
diff --git a/llvm/test/CodeGen/RISCV/stack-store-check.ll b/llvm/test/CodeGen/RISCV/stack-store-check.ll
index 218490933333e..323ee554b67e4 100644
--- a/llvm/test/CodeGen/RISCV/stack-store-check.ll
+++ b/llvm/test/CodeGen/RISCV/stack-store-check.ll
@@ -32,12 +32,12 @@ define void @main() local_unnamed_addr nounwind {
 ; CHECK-NEXT:    lw s6, %lo(U)(a0)
 ; CHECK-NEXT:    lw s7, %lo(U+4)(a0)
 ; CHECK-NEXT:    lw s8, %lo(U+8)(a0)
-; CHECK-NEXT:    lw s2, %lo(U+12)(a0)
+; CHECK-NEXT:    lw s0, %lo(U+12)(a0)
 ; CHECK-NEXT:    sw zero, 612(sp)
 ; CHECK-NEXT:    sw zero, 608(sp)
 ; CHECK-NEXT:    sw zero, 604(sp)
 ; CHECK-NEXT:    sw zero, 600(sp)
-; CHECK-NEXT:    sw s2, 596(sp)
+; CHECK-NEXT:    sw s0, 596(sp)
 ; CHECK-NEXT:    sw s8, 592(sp)
 ; CHECK-NEXT:    sw s7, 588(sp)
 ; CHECK-NEXT:    addi a0, sp, 616
@@ -45,21 +45,21 @@ define void @main() local_unnamed_addr nounwind {
 ; CHECK-NEXT:    addi a2, sp, 584
 ; CHECK-NEXT:    sw s6, 584(sp)
 ; CHECK-NEXT:    call __subtf3@plt
-; CHECK-NEXT:    lw s4, 616(sp)
-; CHECK-NEXT:    lw s5, 620(sp)
+; CHECK-NEXT:    lw s9, 616(sp)
+; CHECK-NEXT:    lw s2, 620(sp)
 ; CHECK-NEXT:    lw s3, 624(sp)
-; CHECK-NEXT:    lw s11, 628(sp)
-; CHECK-NEXT:    sw s2, 548(sp)
+; CHECK-NEXT:    lw s4, 628(sp)
+; CHECK-NEXT:    sw s0, 548(sp)
 ; CHECK-NEXT:    sw s8, 544(sp)
 ; CHECK-NEXT:    sw s7, 540(sp)
 ; CHECK-NEXT:    sw s6, 536(sp)
-; CHECK-NEXT:    sw s11, 564(sp)
+; CHECK-NEXT:    sw s4, 564(sp)
 ; CHECK-NEXT:    sw s3, 560(sp)
-; CHECK-NEXT:    sw s5, 556(sp)
+; CHECK-NEXT:    sw s2, 556(sp)
 ; CHECK-NEXT:    addi a0, sp, 568
 ; CHECK-NEXT:    addi a1, sp, 552
 ; CHECK-NEXT:    addi a2, sp, 536
-; CHECK-NEXT:    sw s4, 552(sp)
+; CHECK-NEXT:    sw s9, 552(sp)
 ; CHECK-NEXT:    call __subtf3@plt
 ; CHECK-NEXT:    lw a0, 568(sp)
 ; CHECK-NEXT:    sw a0, 40(sp) # 4-byte Folded Spill
@@ -73,7 +73,7 @@ define void @main() local_unnamed_addr nounwind {
 ; CHECK-NEXT:    sw zero, 496(sp)
 ; CHECK-NEXT:    sw zero, 492(sp)
 ; CHECK-NEXT:    sw zero, 488(sp)
-; CHECK-NEXT:    sw s2, 516(sp)
+; CHECK-NEXT:    sw s0, 516(sp)
 ; CHECK-NEXT:    sw s8, 512(sp)
 ; CHECK-NEXT:    sw s7, 508(sp)
 ; CHECK-NEXT:    addi a0, sp, 520
@@ -81,10 +81,10 @@ define void @main() local_unnamed_addr nounwind {
 ; CHECK-NEXT:    addi a2, sp, 488
 ; CHECK-NEXT:    sw s6, 504(sp)
 ; CHECK-NEXT:    call __addtf3@plt
-; CHECK-NEXT:    lw s9, 520(sp)
+; CHECK-NEXT:    lw s11, 520(sp)
 ; CHECK-NEXT:    lw s10, 524(sp)
-; CHECK-NEXT:    lw s0, 528(sp)
-; CHECK-NEXT:    sw s0, 20(sp) # 4-byte Folded Spill
+; CHECK-NEXT:    lw s5, 528(sp)
+; CHECK-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
 ; CHECK-NEXT:    lw s1, 532(sp)
 ; CHECK-NEXT:    sw s1, 16(sp) # 4-byte Folded Spill
 ; CHECK-NEXT:    lui a0, %hi(Y1)
@@ -100,13 +100,13 @@ define void @main() local_unnamed_addr nounwind {
 ; CHECK-NEXT:    sw a3, 304(sp)
 ; CHECK-NEXT:    sw a2, 300(sp)
 ; CHECK-NEXT:    sw a1, 296(sp)
-; CHECK-NEXT:    sw s11, 324(sp)
+; CHECK-NEXT:    sw s4, 324(sp)
 ; CHECK-NEXT:    sw s3, 320(sp)
-; CHECK-NEXT:    sw s5, 316(sp)
+; CHECK-NEXT:    sw s2, 316(sp)
 ; CHECK-NEXT:    addi a0, sp, 328
 ; CHECK-NEXT:    addi a1, sp, 312
 ; CHECK-NEXT:    addi a2, sp, 296
-; CHECK-NEXT:    sw s4, 312(sp)
+; CHECK-NEXT:    sw s9, 312(sp)
 ; CHECK-NEXT:    call __multf3@plt
 ; CHECK-NEXT:    lw a0, 328(sp)
 ; CHECK-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
@@ -114,18 +114,18 @@ define void @main() local_unnamed_addr nounwind {
 ; CHECK-NEXT:    sw a0, 36(sp) # 4-byte Folded Spill
 ; CHECK-NEXT:    lw a0, 336(sp)
 ; CHECK-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    lw s4, 340(sp)
-; CHECK-NEXT:    sw s2, 468(sp)
+; CHECK-NEXT:    lw s9, 340(sp)
+; CHECK-NEXT:    sw s0, 468(sp)
 ; CHECK-NEXT:    sw s8, 464(sp)
 ; CHECK-NEXT:    sw s7, 460(sp)
 ; CHECK-NEXT:    sw s6, 456(sp)
 ; CHECK-NEXT:    sw s1, 452(sp)
-; CHECK-NEXT:    sw s0, 448(sp)
+; CHECK-NEXT:    sw s5, 448(sp)
 ; CHECK-NEXT:    sw s10, 444(sp)
 ; CHECK-NEXT:    addi a0, sp, 472
 ; CHECK-NEXT:    addi a1, sp, 456
 ; CHECK-NEXT:    addi a2, sp, 440
-; CHECK-NEXT:    sw s9, 440(sp)
+; CHECK-NEXT:    sw s11, 440(sp)
 ; CHECK-NEXT:    call __addtf3@plt
 ; CHECK-NEXT:    lw a3, 472(sp)
 ; CHECK-NEXT:    lw a0, 476(sp)
@@ -152,31 +152,31 @@ define void @main() local_unnamed_addr nounwind {
 ; CHECK-NEXT:    sw a2, %lo(X+8)(a4)
 ; CHECK-NEXT:    sw a3, %lo(X+4)(a4)
 ; CHECK-NEXT:    sw a0, %lo(X)(a4)
-; CHECK-NEXT:    lw s8, 4(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    sw s8, 212(sp)
-; CHECK-NEXT:    lw s7, 8(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    sw s7, 208(sp)
-; CHECK-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    sw s11, 204(sp)
+; CHECK-NEXT:    lw s4, 4(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    sw s4, 212(sp)
+; CHECK-NEXT:    lw s3, 8(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    sw s3, 208(sp)
+; CHECK-NEXT:    lw s2, 12(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    sw s2, 204(sp)
 ; CHECK-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    sw a0, 200(sp)
 ; CHECK-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    sw a0, 228(sp)
-; CHECK-NEXT:    lw s3, 24(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    sw s3, 224(sp)
-; CHECK-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    sw s2, 220(sp)
+; CHECK-NEXT:    lw s1, 24(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    sw s1, 224(sp)
+; CHECK-NEXT:    lw s0, 32(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    sw s0, 220(sp)
 ; CHECK-NEXT:    addi a0, sp, 232
 ; CHECK-NEXT:    addi a1, sp, 216
 ; CHECK-NEXT:    addi a2, sp, 200
-; CHECK-NEXT:    lw s1, 40(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    sw s1, 216(sp)
+; CHECK-NEXT:    lw s8, 40(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    sw s8, 216(sp)
 ; CHECK-NEXT:    call __multf3@plt
 ; CHECK-NEXT:    lw s5, 232(sp)
 ; CHECK-NEXT:    lw a0, 236(sp)
 ; CHECK-NEXT:    sw a0, 0(sp) # 4-byte Folded Spill
 ; CHECK-NEXT:    lw s6, 240(sp)
-; CHECK-NEXT:    lw s0, 244(sp)
+; CHECK-NEXT:    lw s7, 244(sp)
 ; CHECK-NEXT:    sw zero, 356(sp)
 ; CHECK-NEXT:    sw zero, 352(sp)
 ; CHECK-NEXT:    sw zero, 348(sp)
@@ -189,7 +189,7 @@ define void @main() local_unnamed_addr nounwind {
 ; CHECK-NEXT:    addi a0, sp, 376
 ; CHECK-NEXT:    addi a1, sp, 360
 ; CHECK-NEXT:    addi a2, sp, 344
-; CHECK-NEXT:    sw s9, 360(sp)
+; CHECK-NEXT:    sw s11, 360(sp)
 ; CHECK-NEXT:    call __multf3@plt
 ; CHECK-NEXT:    lw a0, 376(sp)
 ; CHECK-NEXT:    lw a1, 388(sp)
@@ -202,10 +202,10 @@ define void @main() local_unnamed_addr nounwind {
 ; CHECK-NEXT:    sw a0, %lo(S)(a4)
 ; CHECK-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    sw a0, 260(sp)
-; CHECK-NEXT:    sw s3, 256(sp)
-; CHECK-NEXT:    sw s2, 252(sp)
-; CHECK-NEXT:    sw s1, 248(sp)
-; CHECK-NEXT:    sw s4, 276(sp)
+; CHECK-NEXT:    sw s1, 256(sp)
+; CHECK-NEXT:    sw s0, 252(sp)
+; CHECK-NEXT:    sw s8, 248(sp)
+; CHECK-NEXT:    sw s9, 276(sp)
 ; CHECK-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    sw a0, 272(sp)
 ; CHECK-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
@@ -229,7 +229,7 @@ define void @main() local_unnamed_addr nounwind {
 ; CHECK-NEXT:    sw zero, 160(sp)
 ; CHECK-NEXT:    sw zero, 156(sp)
 ; CHECK-NEXT:    sw zero, 152(sp)
-; CHECK-NEXT:    sw s0, 180(sp)
+; CHECK-NEXT:    sw s7, 180(sp)
 ; CHECK-NEXT:    sw s6, 176(sp)
 ; CHECK-NEXT:    lw a0, 0(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    sw a0, 172(sp)
@@ -251,9 +251,9 @@ define void @main() local_unnamed_addr nounwind {
 ; CHECK-NEXT:    sw zero, 112(sp)
 ; CHECK-NEXT:    sw zero, 108(sp)
 ; CHECK-NEXT:    sw zero, 104(sp)
-; CHECK-NEXT:    sw s8, 132(sp)
-; CHECK-NEXT:    sw s7, 128(sp)
-; CHECK-NEXT:    sw s11, 124(sp)
+; CHECK-NEXT:    sw s4, 132(sp)
+; CHECK-NEXT:    sw s3, 128(sp)
+; CHECK-NEXT:    sw s2, 124(sp)
 ; CHECK-NEXT:    addi a0, sp, 136
 ; CHECK-NEXT:    addi a1, sp, 120
 ; CHECK-NEXT:    addi a2, sp, 104
diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll
index c358b6172d468..200a8731cbd64 100644
--- a/llvm/test/CodeGen/RISCV/tail-calls.ll
+++ b/llvm/test/CodeGen/RISCV/tail-calls.ll
@@ -45,12 +45,12 @@ define void @caller_indirect_tail(i32 %a) nounwind {
 ; CHECK-NOT: tail callee_indirect2
 
 ; CHECK: lui a0, %hi(callee_indirect2)
-; CHECK-NEXT: addi a5, a0, %lo(callee_indirect2)
-; CHECK-NEXT: jr a5
+; CHECK-NEXT: addi t1, a0, %lo(callee_indirect2)
+; CHECK-NEXT: jr t1
 
 ; CHECK: lui a0, %hi(callee_indirect1)
-; CHECK-NEXT: addi a5, a0, %lo(callee_indirect1)
-; CHECK-NEXT: jr a5
+; CHECK-NEXT: addi t1, a0, %lo(callee_indirect1)
+; CHECK-NEXT: jr t1
 entry:
   %tobool = icmp eq i32 %a, 0
   %callee = select i1 %tobool, void ()* @callee_indirect1, void ()* @callee_indirect2
diff --git a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
index 09f74a175802d..8e2c3f350df81 100644
--- a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
@@ -10,103 +10,99 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 {
 ; RISCV32-NEXT:    sw s2, 20(sp) # 4-byte Folded Spill
 ; RISCV32-NEXT:    sw s3, 16(sp) # 4-byte Folded Spill
 ; RISCV32-NEXT:    sw s4, 12(sp) # 4-byte Folded Spill
-; RISCV32-NEXT:    sw s5, 8(sp) # 4-byte Folded Spill
-; RISCV32-NEXT:    sw s6, 4(sp) # 4-byte Folded Spill
-; RISCV32-NEXT:    lw a6, 12(a1)
+; RISCV32-NEXT:    lw a3, 12(a1)
 ; RISCV32-NEXT:    lw a7, 12(a2)
-; RISCV32-NEXT:    lw t3, 8(a1)
+; RISCV32-NEXT:    lw a6, 8(a1)
 ; RISCV32-NEXT:    lw a4, 0(a2)
 ; RISCV32-NEXT:    lw a5, 0(a1)
-; RISCV32-NEXT:    lw a3, 4(a1)
-; RISCV32-NEXT:    lw s2, 8(a2)
+; RISCV32-NEXT:    lw t3, 4(a1)
+; RISCV32-NEXT:    lw t0, 8(a2)
 ; RISCV32-NEXT:    lw a2, 4(a2)
 ; RISCV32-NEXT:    mulhu a1, a5, a4
-; RISCV32-NEXT:    mul s1, a3, a4
-; RISCV32-NEXT:    add a1, s1, a1
-; RISCV32-NEXT:    sltu s1, a1, s1
-; RISCV32-NEXT:    mulhu s0, a3, a4
-; RISCV32-NEXT:    add t4, s0, s1
-; RISCV32-NEXT:    mul s0, a5, a2
-; RISCV32-NEXT:    add t0, s0, a1
-; RISCV32-NEXT:    sltu a1, t0, s0
-; RISCV32-NEXT:    mulhu s0, a5, a2
-; RISCV32-NEXT:    add a1, s0, a1
-; RISCV32-NEXT:    add a1, t4, a1
-; RISCV32-NEXT:    mul s0, a3, a2
-; RISCV32-NEXT:    add s1, s0, a1
-; RISCV32-NEXT:    mul t1, s2, a5
-; RISCV32-NEXT:    mul s3, t3, a4
+; RISCV32-NEXT:    mul t1, t3, a4
+; RISCV32-NEXT:    add a1, t1, a1
+; RISCV32-NEXT:    sltu t1, a1, t1
+; RISCV32-NEXT:    mulhu t2, t3, a4
+; RISCV32-NEXT:    add t4, t2, t1
+; RISCV32-NEXT:    mul t1, a5, a2
+; RISCV32-NEXT:    add a1, t1, a1
+; RISCV32-NEXT:    sltu t1, a1, t1
+; RISCV32-NEXT:    mulhu t2, a5, a2
+; RISCV32-NEXT:    add t1, t2, t1
+; RISCV32-NEXT:    add t5, t4, t1
+; RISCV32-NEXT:    mul t6, t3, a2
+; RISCV32-NEXT:    add s0, t6, t5
+; RISCV32-NEXT:    mul t1, t0, a5
+; RISCV32-NEXT:    mul s3, a6, a4
 ; RISCV32-NEXT:    add s4, s3, t1
-; RISCV32-NEXT:    add t1, s1, s4
-; RISCV32-NEXT:    sltu t2, t1, s1
-; RISCV32-NEXT:    sltu s1, s1, s0
-; RISCV32-NEXT:    sltu a1, a1, t4
-; RISCV32-NEXT:    mulhu s0, a3, a2
-; RISCV32-NEXT:    add a1, s0, a1
-; RISCV32-NEXT:    add s0, a1, s1
-; RISCV32-NEXT:    mul a1, a3, s2
-; RISCV32-NEXT:    mul s1, a7, a5
-; RISCV32-NEXT:    add a1, s1, a1
-; RISCV32-NEXT:    mulhu s5, s2, a5
-; RISCV32-NEXT:    add s6, s5, a1
-; RISCV32-NEXT:    mul s1, a2, t3
-; RISCV32-NEXT:    mul a1, a6, a4
-; RISCV32-NEXT:    add a1, a1, s1
-; RISCV32-NEXT:    mulhu t5, t3, a4
-; RISCV32-NEXT:    add t6, t5, a1
-; RISCV32-NEXT:    add a1, t6, s6
-; RISCV32-NEXT:    sltu s1, s4, s3
-; RISCV32-NEXT:    add a1, a1, s1
-; RISCV32-NEXT:    add a1, s0, a1
-; RISCV32-NEXT:    add t4, a1, t2
+; RISCV32-NEXT:    add t1, s0, s4
+; RISCV32-NEXT:    sltu t2, t1, s0
+; RISCV32-NEXT:    sltu t6, s0, t6
+; RISCV32-NEXT:    sltu t4, t5, t4
+; RISCV32-NEXT:    mulhu t5, t3, a2
+; RISCV32-NEXT:    add t4, t5, t4
+; RISCV32-NEXT:    add s0, t4, t6
+; RISCV32-NEXT:    mul t4, t3, t0
+; RISCV32-NEXT:    mul t5, a7, a5
+; RISCV32-NEXT:    add t4, t5, t4
+; RISCV32-NEXT:    mulhu s1, t0, a5
+; RISCV32-NEXT:    add s2, s1, t4
+; RISCV32-NEXT:    mul t4, a2, a6
+; RISCV32-NEXT:    mul t5, a3, a4
+; RISCV32-NEXT:    add t4, t5, t4
+; RISCV32-NEXT:    mulhu t5, a6, a4
+; RISCV32-NEXT:    add t6, t5, t4
+; RISCV32-NEXT:    add t4, t6, s2
+; RISCV32-NEXT:    sltu s3, s4, s3
+; RISCV32-NEXT:    add t4, t4, s3
+; RISCV32-NEXT:    add t4, s0, t4
+; RISCV32-NEXT:    add t4, t4, t2
 ; RISCV32-NEXT:    beq t4, s0, .LBB0_2
 ; RISCV32-NEXT:  # %bb.1: # %start
 ; RISCV32-NEXT:    sltu t2, t4, s0
 ; RISCV32-NEXT:  .LBB0_2: # %start
-; RISCV32-NEXT:    sltu a1, s6, s5
+; RISCV32-NEXT:    sltu s0, s2, s1
+; RISCV32-NEXT:    snez s1, t3
+; RISCV32-NEXT:    snez s2, a7
+; RISCV32-NEXT:    and s1, s2, s1
+; RISCV32-NEXT:    mulhu s2, a7, a5
+; RISCV32-NEXT:    snez s2, s2
+; RISCV32-NEXT:    or s1, s1, s2
+; RISCV32-NEXT:    mulhu t3, t3, t0
+; RISCV32-NEXT:    snez t3, t3
+; RISCV32-NEXT:    or t3, s1, t3
+; RISCV32-NEXT:    or t3, t3, s0
+; RISCV32-NEXT:    sltu t5, t6, t5
+; RISCV32-NEXT:    snez t6, a2
 ; RISCV32-NEXT:    snez s0, a3
-; RISCV32-NEXT:    snez s1, a7
-; RISCV32-NEXT:    and s0, s1, s0
-; RISCV32-NEXT:    mulhu s1, a7, a5
-; RISCV32-NEXT:    snez s1, s1
-; RISCV32-NEXT:    or s0, s0, s1
-; RISCV32-NEXT:    mulhu a3, a3, s2
-; RISCV32-NEXT:    snez a3, a3
-; RISCV32-NEXT:    or a3, s0, a3
-; RISCV32-NEXT:    or a1, a3, a1
-; RISCV32-NEXT:    sltu a3, t6, t5
-; RISCV32-NEXT:    snez s1, a2
-; RISCV32-NEXT:    snez s0, a6
-; RISCV32-NEXT:    and s1, s0, s1
-; RISCV32-NEXT:    mulhu s0, a6, a4
+; RISCV32-NEXT:    and t6, s0, t6
+; RISCV32-NEXT:    mulhu s0, a3, a4
 ; RISCV32-NEXT:    snez s0, s0
-; RISCV32-NEXT:    or s1, s1, s0
-; RISCV32-NEXT:    mulhu a2, a2, t3
+; RISCV32-NEXT:    or t6, t6, s0
+; RISCV32-NEXT:    mulhu a2, a2, a6
 ; RISCV32-NEXT:    snez a2, a2
-; RISCV32-NEXT:    or a2, s1, a2
-; RISCV32-NEXT:    or a2, a2, a3
-; RISCV32-NEXT:    or a3, s2, a7
+; RISCV32-NEXT:    or a2, t6, a2
+; RISCV32-NEXT:    or a2, a2, t5
+; RISCV32-NEXT:    or a7, t0, a7
+; RISCV32-NEXT:    snez a7, a7
+; RISCV32-NEXT:    or a3, a6, a3
 ; RISCV32-NEXT:    snez a3, a3
-; RISCV32-NEXT:    or s1, t3, a6
-; RISCV32-NEXT:    snez s1, s1
-; RISCV32-NEXT:    and a3, s1, a3
+; RISCV32-NEXT:    and a3, a3, a7
 ; RISCV32-NEXT:    or a2, a3, a2
-; RISCV32-NEXT:    or a1, a2, a1
-; RISCV32-NEXT:    or a1, a1, t2
-; RISCV32-NEXT:    mul a2, a5, a4
-; RISCV32-NEXT:    andi a1, a1, 1
-; RISCV32-NEXT:    sw a2, 0(a0)
-; RISCV32-NEXT:    sw t0, 4(a0)
+; RISCV32-NEXT:    or a2, a2, t3
+; RISCV32-NEXT:    or a2, a2, t2
+; RISCV32-NEXT:    mul a3, a5, a4
+; RISCV32-NEXT:    andi a2, a2, 1
+; RISCV32-NEXT:    sw a3, 0(a0)
+; RISCV32-NEXT:    sw a1, 4(a0)
 ; RISCV32-NEXT:    sw t1, 8(a0)
 ; RISCV32-NEXT:    sw t4, 12(a0)
-; RISCV32-NEXT:    sb a1, 16(a0)
+; RISCV32-NEXT:    sb a2, 16(a0)
 ; RISCV32-NEXT:    lw s0, 28(sp) # 4-byte Folded Reload
 ; RISCV32-NEXT:    lw s1, 24(sp) # 4-byte Folded Reload
 ; RISCV32-NEXT:    lw s2, 20(sp) # 4-byte Folded Reload
 ; RISCV32-NEXT:    lw s3, 16(sp) # 4-byte Folded Reload
 ; RISCV32-NEXT:    lw s4, 12(sp) # 4-byte Folded Reload
-; RISCV32-NEXT:    lw s5, 8(sp) # 4-byte Folded Reload
-; RISCV32-NEXT:    lw s6, 4(sp) # 4-byte Folded Reload
 ; RISCV32-NEXT:    addi sp, sp, 32
 ; RISCV32-NEXT:    ret
 start:
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index 9acc1ad7e0347..e0de325fd5359 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -394,8 +394,8 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV64-NEXT:    lwu a1, 0(s0)
 ; RV64-NEXT:    slli a0, a0, 32
 ; RV64-NEXT:    or a0, a1, a0
-; RV64-NEXT:    srli s2, a0, 11
-; RV64-NEXT:    srli s1, a0, 22
+; RV64-NEXT:    srli s1, a0, 11
+; RV64-NEXT:    srli s2, a0, 22
 ; RV64-NEXT:    andi a0, a0, 2047
 ; RV64-NEXT:    li a1, 683
 ; RV64-NEXT:    call __muldi3@plt
@@ -407,14 +407,14 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV64-NEXT:    li a1, 341
 ; RV64-NEXT:    sltu s3, a1, a0
 ; RV64-NEXT:    li a1, 819
-; RV64-NEXT:    mv a0, s1
+; RV64-NEXT:    mv a0, s2
 ; RV64-NEXT:    call __muldi3@plt
 ; RV64-NEXT:    addiw a0, a0, -1638
 ; RV64-NEXT:    andi a0, a0, 2047
 ; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    sltu s1, a1, a0
+; RV64-NEXT:    sltu s2, a1, a0
 ; RV64-NEXT:    li a1, 1463
-; RV64-NEXT:    mv a0, s2
+; RV64-NEXT:    mv a0, s1
 ; RV64-NEXT:    call __muldi3@plt
 ; RV64-NEXT:    addiw a0, a0, -1463
 ; RV64-NEXT:    andi a0, a0, 2047
@@ -426,7 +426,7 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV64-NEXT:    andi a0, a0, 2047
 ; RV64-NEXT:    slli a0, a0, 11
 ; RV64-NEXT:    or a0, a1, a0
-; RV64-NEXT:    slli a1, s1, 22
+; RV64-NEXT:    slli a1, s2, 22
 ; RV64-NEXT:    sub a0, a0, a1
 ; RV64-NEXT:    sw a0, 0(s0)
 ; RV64-NEXT:    slli a0, a0, 31
diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
index c028c7d387dcd..b804e53b6a71d 100644
--- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
@@ -19,127 +19,123 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lhu s2, 12(a1)
-; RV32I-NEXT:    lhu s3, 8(a1)
-; RV32I-NEXT:    lhu s0, 4(a1)
+; RV32I-NEXT:    lhu s0, 12(a1)
+; RV32I-NEXT:    lhu s1, 8(a1)
+; RV32I-NEXT:    lhu s2, 4(a1)
 ; RV32I-NEXT:    lhu a2, 0(a1)
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    li a1, 95
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    call __umodsi3@plt
 ; RV32I-NEXT:    mv s4, a0
 ; RV32I-NEXT:    li a1, 124
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __umodsi3@plt
-; RV32I-NEXT:    mv s5, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    li a1, 98
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __umodsi3@plt
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    li a1, 1003
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __umodsi3@plt
-; RV32I-NEXT:    sh a0, 6(s1)
-; RV32I-NEXT:    sh s0, 4(s1)
-; RV32I-NEXT:    sh s5, 2(s1)
-; RV32I-NEXT:    sh s4, 0(s1)
+; RV32I-NEXT:    sh a0, 6(s3)
+; RV32I-NEXT:    sh s1, 4(s3)
+; RV32I-NEXT:    sh s2, 2(s3)
+; RV32I-NEXT:    sh s4, 0(s3)
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: fold_urem_vec_1:
 ; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    lhu a6, 12(a1)
+; RV32IM-NEXT:    lhu a2, 12(a1)
 ; RV32IM-NEXT:    lhu a3, 8(a1)
 ; RV32IM-NEXT:    lhu a4, 0(a1)
 ; RV32IM-NEXT:    lhu a1, 4(a1)
 ; RV32IM-NEXT:    lui a5, 364242
 ; RV32IM-NEXT:    addi a5, a5, 777
 ; RV32IM-NEXT:    mulhu a5, a4, a5
-; RV32IM-NEXT:    sub a2, a4, a5
-; RV32IM-NEXT:    srli a2, a2, 1
-; RV32IM-NEXT:    add a2, a2, a5
-; RV32IM-NEXT:    srli a2, a2, 6
-; RV32IM-NEXT:    li a5, 95
-; RV32IM-NEXT:    mul a2, a2, a5
-; RV32IM-NEXT:    sub a2, a4, a2
-; RV32IM-NEXT:    srli a4, a1, 2
-; RV32IM-NEXT:    lui a5, 135300
-; RV32IM-NEXT:    addi a5, a5, 529
-; RV32IM-NEXT:    mulhu a4, a4, a5
-; RV32IM-NEXT:    srli a4, a4, 2
-; RV32IM-NEXT:    li a5, 124
-; RV32IM-NEXT:    mul a4, a4, a5
-; RV32IM-NEXT:    sub a1, a1, a4
-; RV32IM-NEXT:    lui a4, 342392
-; RV32IM-NEXT:    addi a4, a4, 669
-; RV32IM-NEXT:    mulhu a4, a3, a4
-; RV32IM-NEXT:    srli a4, a4, 5
-; RV32IM-NEXT:    li a5, 98
-; RV32IM-NEXT:    mul a4, a4, a5
-; RV32IM-NEXT:    sub a3, a3, a4
-; RV32IM-NEXT:    lui a4, 267633
-; RV32IM-NEXT:    addi a4, a4, -1809
-; RV32IM-NEXT:    mulhu a4, a6, a4
-; RV32IM-NEXT:    srli a4, a4, 8
-; RV32IM-NEXT:    li a5, 1003
-; RV32IM-NEXT:    mul a4, a4, a5
-; RV32IM-NEXT:    sub a4, a6, a4
-; RV32IM-NEXT:    sh a4, 6(a0)
+; RV32IM-NEXT:    sub a6, a4, a5
+; RV32IM-NEXT:    srli a6, a6, 1
+; RV32IM-NEXT:    add a5, a6, a5
+; RV32IM-NEXT:    srli a5, a5, 6
+; RV32IM-NEXT:    li a6, 95
+; RV32IM-NEXT:    mul a5, a5, a6
+; RV32IM-NEXT:    sub a4, a4, a5
+; RV32IM-NEXT:    srli a5, a1, 2
+; RV32IM-NEXT:    lui a6, 135300
+; RV32IM-NEXT:    addi a6, a6, 529
+; RV32IM-NEXT:    mulhu a5, a5, a6
+; RV32IM-NEXT:    srli a5, a5, 2
+; RV32IM-NEXT:    li a6, 124
+; RV32IM-NEXT:    mul a5, a5, a6
+; RV32IM-NEXT:    sub a1, a1, a5
+; RV32IM-NEXT:    lui a5, 342392
+; RV32IM-NEXT:    addi a5, a5, 669
+; RV32IM-NEXT:    mulhu a5, a3, a5
+; RV32IM-NEXT:    srli a5, a5, 5
+; RV32IM-NEXT:    li a6, 98
+; RV32IM-NEXT:    mul a5, a5, a6
+; RV32IM-NEXT:    sub a3, a3, a5
+; RV32IM-NEXT:    lui a5, 267633
+; RV32IM-NEXT:    addi a5, a5, -1809
+; RV32IM-NEXT:    mulhu a5, a2, a5
+; RV32IM-NEXT:    srli a5, a5, 8
+; RV32IM-NEXT:    li a6, 1003
+; RV32IM-NEXT:    mul a5, a5, a6
+; RV32IM-NEXT:    sub a2, a2, a5
+; RV32IM-NEXT:    sh a2, 6(a0)
 ; RV32IM-NEXT:    sh a3, 4(a0)
 ; RV32IM-NEXT:    sh a1, 2(a0)
-; RV32IM-NEXT:    sh a2, 0(a0)
+; RV32IM-NEXT:    sh a4, 0(a0)
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: fold_urem_vec_1:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -64
-; RV64I-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lhu s2, 24(a1)
-; RV64I-NEXT:    lhu s3, 16(a1)
-; RV64I-NEXT:    lhu s0, 8(a1)
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lhu s0, 24(a1)
+; RV64I-NEXT:    lhu s1, 16(a1)
+; RV64I-NEXT:    lhu s2, 8(a1)
 ; RV64I-NEXT:    lhu a2, 0(a1)
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s3, a0
 ; RV64I-NEXT:    li a1, 95
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    call __umoddi3@plt
 ; RV64I-NEXT:    mv s4, a0
 ; RV64I-NEXT:    li a1, 124
-; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __umoddi3@plt
-; RV64I-NEXT:    mv s5, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    li a1, 98
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __umoddi3@plt
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    li a1, 1003
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __umoddi3@plt
-; RV64I-NEXT:    sh a0, 6(s1)
-; RV64I-NEXT:    sh s0, 4(s1)
-; RV64I-NEXT:    sh s5, 2(s1)
-; RV64I-NEXT:    sh s4, 0(s1)
-; RV64I-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 64
+; RV64I-NEXT:    sh a0, 6(s3)
+; RV64I-NEXT:    sh s1, 4(s3)
+; RV64I-NEXT:    sh s2, 2(s3)
+; RV64I-NEXT:    sh s4, 0(s3)
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: fold_urem_vec_1:
@@ -147,44 +143,44 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    lhu a2, 0(a1)
 ; RV64IM-NEXT:    lui a3, %hi(.LCPI0_0)
 ; RV64IM-NEXT:    ld a3, %lo(.LCPI0_0)(a3)
-; RV64IM-NEXT:    lhu a6, 24(a1)
+; RV64IM-NEXT:    lhu a4, 24(a1)
 ; RV64IM-NEXT:    lhu a5, 16(a1)
 ; RV64IM-NEXT:    lhu a1, 8(a1)
 ; RV64IM-NEXT:    mulhu a3, a2, a3
-; RV64IM-NEXT:    sub a4, a2, a3
-; RV64IM-NEXT:    srli a4, a4, 1
-; RV64IM-NEXT:    add a3, a4, a3
+; RV64IM-NEXT:    sub a6, a2, a3
+; RV64IM-NEXT:    srli a6, a6, 1
+; RV64IM-NEXT:    add a3, a6, a3
 ; RV64IM-NEXT:    srli a3, a3, 6
-; RV64IM-NEXT:    li a7, 95
-; RV64IM-NEXT:    lui a4, %hi(.LCPI0_1)
-; RV64IM-NEXT:    ld a4, %lo(.LCPI0_1)(a4)
-; RV64IM-NEXT:    mulw a3, a3, a7
-; RV64IM-NEXT:    subw t0, a2, a3
+; RV64IM-NEXT:    li a6, 95
+; RV64IM-NEXT:    lui a7, %hi(.LCPI0_1)
+; RV64IM-NEXT:    ld a7, %lo(.LCPI0_1)(a7)
+; RV64IM-NEXT:    mulw a3, a3, a6
+; RV64IM-NEXT:    subw a2, a2, a3
 ; RV64IM-NEXT:    srli a3, a1, 2
-; RV64IM-NEXT:    mulhu a3, a3, a4
+; RV64IM-NEXT:    mulhu a3, a3, a7
 ; RV64IM-NEXT:    srli a3, a3, 3
-; RV64IM-NEXT:    li a7, 124
-; RV64IM-NEXT:    lui a4, %hi(.LCPI0_2)
-; RV64IM-NEXT:    ld a4, %lo(.LCPI0_2)(a4)
-; RV64IM-NEXT:    mulw a3, a3, a7
+; RV64IM-NEXT:    li a6, 124
+; RV64IM-NEXT:    lui a7, %hi(.LCPI0_2)
+; RV64IM-NEXT:    ld a7, %lo(.LCPI0_2)(a7)
+; RV64IM-NEXT:    mulw a3, a3, a6
 ; RV64IM-NEXT:    subw a1, a1, a3
 ; RV64IM-NEXT:    srli a3, a5, 1
-; RV64IM-NEXT:    mulhu a3, a3, a4
+; RV64IM-NEXT:    mulhu a3, a3, a7
 ; RV64IM-NEXT:    srli a3, a3, 4
-; RV64IM-NEXT:    lui a4, %hi(.LCPI0_3)
-; RV64IM-NEXT:    ld a4, %lo(.LCPI0_3)(a4)
-; RV64IM-NEXT:    li a2, 98
-; RV64IM-NEXT:    mulw a2, a3, a2
-; RV64IM-NEXT:    subw a2, a5, a2
-; RV64IM-NEXT:    mulhu a3, a6, a4
-; RV64IM-NEXT:    srli a3, a3, 7
-; RV64IM-NEXT:    li a4, 1003
-; RV64IM-NEXT:    mulw a3, a3, a4
-; RV64IM-NEXT:    subw a3, a6, a3
-; RV64IM-NEXT:    sh a3, 6(a0)
-; RV64IM-NEXT:    sh a2, 4(a0)
+; RV64IM-NEXT:    lui a6, %hi(.LCPI0_3)
+; RV64IM-NEXT:    ld a6, %lo(.LCPI0_3)(a6)
+; RV64IM-NEXT:    li a7, 98
+; RV64IM-NEXT:    mulw a3, a3, a7
+; RV64IM-NEXT:    subw a3, a5, a3
+; RV64IM-NEXT:    mulhu a5, a4, a6
+; RV64IM-NEXT:    srli a5, a5, 7
+; RV64IM-NEXT:    li a6, 1003
+; RV64IM-NEXT:    mulw a5, a5, a6
+; RV64IM-NEXT:    subw a4, a4, a5
+; RV64IM-NEXT:    sh a4, 6(a0)
+; RV64IM-NEXT:    sh a3, 4(a0)
 ; RV64IM-NEXT:    sh a1, 2(a0)
-; RV64IM-NEXT:    sh t0, 0(a0)
+; RV64IM-NEXT:    sh a2, 0(a0)
 ; RV64IM-NEXT:    ret
   %1 = urem <4 x i16> %x, <i16 95, i16 124, i16 98, i16 1003>
   ret <4 x i16> %1
@@ -200,126 +196,122 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s5, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lhu s2, 12(a1)
-; RV32I-NEXT:    lhu s3, 8(a1)
-; RV32I-NEXT:    lhu s0, 4(a1)
+; RV32I-NEXT:    lhu s0, 12(a1)
+; RV32I-NEXT:    lhu s1, 8(a1)
+; RV32I-NEXT:    lhu s2, 4(a1)
 ; RV32I-NEXT:    lhu a2, 0(a1)
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    li a1, 95
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    call __umodsi3@plt
 ; RV32I-NEXT:    mv s4, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __umodsi3@plt
-; RV32I-NEXT:    mv s5, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __umodsi3@plt
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __umodsi3@plt
-; RV32I-NEXT:    sh a0, 6(s1)
-; RV32I-NEXT:    sh s0, 4(s1)
-; RV32I-NEXT:    sh s5, 2(s1)
-; RV32I-NEXT:    sh s4, 0(s1)
+; RV32I-NEXT:    sh a0, 6(s3)
+; RV32I-NEXT:    sh s1, 4(s3)
+; RV32I-NEXT:    sh s2, 2(s3)
+; RV32I-NEXT:    sh s4, 0(s3)
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: fold_urem_vec_2:
 ; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    lhu a6, 12(a1)
-; RV32IM-NEXT:    lhu a7, 8(a1)
+; RV32IM-NEXT:    lhu a2, 12(a1)
+; RV32IM-NEXT:    lhu a3, 8(a1)
 ; RV32IM-NEXT:    lhu a4, 0(a1)
 ; RV32IM-NEXT:    lhu a1, 4(a1)
 ; RV32IM-NEXT:    lui a5, 364242
 ; RV32IM-NEXT:    addi a5, a5, 777
-; RV32IM-NEXT:    mulhu a2, a4, a5
-; RV32IM-NEXT:    sub a3, a4, a2
-; RV32IM-NEXT:    srli a3, a3, 1
-; RV32IM-NEXT:    add a2, a3, a2
-; RV32IM-NEXT:    srli a2, a2, 6
-; RV32IM-NEXT:    li a3, 95
-; RV32IM-NEXT:    mul a2, a2, a3
-; RV32IM-NEXT:    sub t0, a4, a2
-; RV32IM-NEXT:    mulhu a4, a1, a5
-; RV32IM-NEXT:    sub a2, a1, a4
-; RV32IM-NEXT:    srli a2, a2, 1
-; RV32IM-NEXT:    add a2, a2, a4
-; RV32IM-NEXT:    srli a2, a2, 6
-; RV32IM-NEXT:    mul a2, a2, a3
-; RV32IM-NEXT:    sub a1, a1, a2
-; RV32IM-NEXT:    mulhu a2, a7, a5
-; RV32IM-NEXT:    sub a4, a7, a2
-; RV32IM-NEXT:    srli a4, a4, 1
-; RV32IM-NEXT:    add a2, a4, a2
-; RV32IM-NEXT:    srli a2, a2, 6
-; RV32IM-NEXT:    mul a2, a2, a3
-; RV32IM-NEXT:    sub a2, a7, a2
-; RV32IM-NEXT:    mulhu a4, a6, a5
-; RV32IM-NEXT:    sub a5, a6, a4
-; RV32IM-NEXT:    srli a5, a5, 1
-; RV32IM-NEXT:    add a4, a5, a4
-; RV32IM-NEXT:    srli a4, a4, 6
-; RV32IM-NEXT:    mul a3, a4, a3
-; RV32IM-NEXT:    sub a3, a6, a3
-; RV32IM-NEXT:    sh a3, 6(a0)
-; RV32IM-NEXT:    sh a2, 4(a0)
+; RV32IM-NEXT:    mulhu a6, a4, a5
+; RV32IM-NEXT:    sub a7, a4, a6
+; RV32IM-NEXT:    srli a7, a7, 1
+; RV32IM-NEXT:    add a6, a7, a6
+; RV32IM-NEXT:    srli a6, a6, 6
+; RV32IM-NEXT:    li a7, 95
+; RV32IM-NEXT:    mul a6, a6, a7
+; RV32IM-NEXT:    sub a4, a4, a6
+; RV32IM-NEXT:    mulhu a6, a1, a5
+; RV32IM-NEXT:    sub t0, a1, a6
+; RV32IM-NEXT:    srli t0, t0, 1
+; RV32IM-NEXT:    add a6, t0, a6
+; RV32IM-NEXT:    srli a6, a6, 6
+; RV32IM-NEXT:    mul a6, a6, a7
+; RV32IM-NEXT:    sub a1, a1, a6
+; RV32IM-NEXT:    mulhu a6, a3, a5
+; RV32IM-NEXT:    sub t0, a3, a6
+; RV32IM-NEXT:    srli t0, t0, 1
+; RV32IM-NEXT:    add a6, t0, a6
+; RV32IM-NEXT:    srli a6, a6, 6
+; RV32IM-NEXT:    mul a6, a6, a7
+; RV32IM-NEXT:    sub a3, a3, a6
+; RV32IM-NEXT:    mulhu a5, a2, a5
+; RV32IM-NEXT:    sub a6, a2, a5
+; RV32IM-NEXT:    srli a6, a6, 1
+; RV32IM-NEXT:    add a5, a6, a5
+; RV32IM-NEXT:    srli a5, a5, 6
+; RV32IM-NEXT:    mul a5, a5, a7
+; RV32IM-NEXT:    sub a2, a2, a5
+; RV32IM-NEXT:    sh a2, 6(a0)
+; RV32IM-NEXT:    sh a3, 4(a0)
 ; RV32IM-NEXT:    sh a1, 2(a0)
-; RV32IM-NEXT:    sh t0, 0(a0)
+; RV32IM-NEXT:    sh a4, 0(a0)
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: fold_urem_vec_2:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -64
-; RV64I-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s5, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lhu s2, 24(a1)
-; RV64I-NEXT:    lhu s3, 16(a1)
-; RV64I-NEXT:    lhu s0, 8(a1)
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lhu s0, 24(a1)
+; RV64I-NEXT:    lhu s1, 16(a1)
+; RV64I-NEXT:    lhu s2, 8(a1)
 ; RV64I-NEXT:    lhu a2, 0(a1)
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s3, a0
 ; RV64I-NEXT:    li a1, 95
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    call __umoddi3@plt
 ; RV64I-NEXT:    mv s4, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __umoddi3@plt
-; RV64I-NEXT:    mv s5, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __umoddi3@plt
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __umoddi3@plt
-; RV64I-NEXT:    sh a0, 6(s1)
-; RV64I-NEXT:    sh s0, 4(s1)
-; RV64I-NEXT:    sh s5, 2(s1)
-; RV64I-NEXT:    sh s4, 0(s1)
-; RV64I-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 64
+; RV64I-NEXT:    sh a0, 6(s3)
+; RV64I-NEXT:    sh s1, 4(s3)
+; RV64I-NEXT:    sh s2, 2(s3)
+; RV64I-NEXT:    sh s4, 0(s3)
+; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: fold_urem_vec_2:
@@ -327,42 +319,42 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    lhu a2, 0(a1)
 ; RV64IM-NEXT:    lui a3, %hi(.LCPI1_0)
 ; RV64IM-NEXT:    ld a3, %lo(.LCPI1_0)(a3)
-; RV64IM-NEXT:    lhu a6, 24(a1)
-; RV64IM-NEXT:    lhu a7, 16(a1)
+; RV64IM-NEXT:    lhu a4, 24(a1)
+; RV64IM-NEXT:    lhu a5, 16(a1)
 ; RV64IM-NEXT:    lhu a1, 8(a1)
-; RV64IM-NEXT:    mulhu a4, a2, a3
-; RV64IM-NEXT:    sub a5, a2, a4
-; RV64IM-NEXT:    srli a5, a5, 1
-; RV64IM-NEXT:    add a4, a5, a4
-; RV64IM-NEXT:    srli a4, a4, 6
-; RV64IM-NEXT:    li a5, 95
-; RV64IM-NEXT:    mulw a4, a4, a5
-; RV64IM-NEXT:    subw t0, a2, a4
-; RV64IM-NEXT:    mulhu a4, a1, a3
-; RV64IM-NEXT:    sub a2, a1, a4
-; RV64IM-NEXT:    srli a2, a2, 1
-; RV64IM-NEXT:    add a2, a2, a4
-; RV64IM-NEXT:    srli a2, a2, 6
-; RV64IM-NEXT:    mulw a2, a2, a5
-; RV64IM-NEXT:    subw a1, a1, a2
-; RV64IM-NEXT:    mulhu a2, a7, a3
-; RV64IM-NEXT:    sub a4, a7, a2
-; RV64IM-NEXT:    srli a4, a4, 1
-; RV64IM-NEXT:    add a2, a4, a2
-; RV64IM-NEXT:    srli a2, a2, 6
-; RV64IM-NEXT:    mulw a2, a2, a5
-; RV64IM-NEXT:    subw a2, a7, a2
-; RV64IM-NEXT:    mulhu a3, a6, a3
-; RV64IM-NEXT:    sub a4, a6, a3
-; RV64IM-NEXT:    srli a4, a4, 1
-; RV64IM-NEXT:    add a3, a4, a3
+; RV64IM-NEXT:    mulhu a6, a2, a3
+; RV64IM-NEXT:    sub a7, a2, a6
+; RV64IM-NEXT:    srli a7, a7, 1
+; RV64IM-NEXT:    add a6, a7, a6
+; RV64IM-NEXT:    srli a6, a6, 6
+; RV64IM-NEXT:    li a7, 95
+; RV64IM-NEXT:    mulw a6, a6, a7
+; RV64IM-NEXT:    subw a2, a2, a6
+; RV64IM-NEXT:    mulhu a6, a1, a3
+; RV64IM-NEXT:    sub t0, a1, a6
+; RV64IM-NEXT:    srli t0, t0, 1
+; RV64IM-NEXT:    add a6, t0, a6
+; RV64IM-NEXT:    srli a6, a6, 6
+; RV64IM-NEXT:    mulw a6, a6, a7
+; RV64IM-NEXT:    subw a1, a1, a6
+; RV64IM-NEXT:    mulhu a6, a5, a3
+; RV64IM-NEXT:    sub t0, a5, a6
+; RV64IM-NEXT:    srli t0, t0, 1
+; RV64IM-NEXT:    add a6, t0, a6
+; RV64IM-NEXT:    srli a6, a6, 6
+; RV64IM-NEXT:    mulw a6, a6, a7
+; RV64IM-NEXT:    subw a5, a5, a6
+; RV64IM-NEXT:    mulhu a3, a4, a3
+; RV64IM-NEXT:    sub a6, a4, a3
+; RV64IM-NEXT:    srli a6, a6, 1
+; RV64IM-NEXT:    add a3, a6, a3
 ; RV64IM-NEXT:    srli a3, a3, 6
-; RV64IM-NEXT:    mulw a3, a3, a5
-; RV64IM-NEXT:    subw a3, a6, a3
+; RV64IM-NEXT:    mulw a3, a3, a7
+; RV64IM-NEXT:    subw a3, a4, a3
 ; RV64IM-NEXT:    sh a3, 6(a0)
-; RV64IM-NEXT:    sh a2, 4(a0)
+; RV64IM-NEXT:    sh a5, 4(a0)
 ; RV64IM-NEXT:    sh a1, 2(a0)
-; RV64IM-NEXT:    sh t0, 0(a0)
+; RV64IM-NEXT:    sh a2, 0(a0)
 ; RV64IM-NEXT:    ret
   %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
   ret <4 x i16> %1
@@ -384,47 +376,46 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s9, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lhu s2, 0(a1)
-; RV32I-NEXT:    lhu s3, 4(a1)
-; RV32I-NEXT:    lhu s4, 8(a1)
-; RV32I-NEXT:    lhu s1, 12(a1)
+; RV32I-NEXT:    lhu s1, 0(a1)
+; RV32I-NEXT:    lhu s2, 4(a1)
+; RV32I-NEXT:    lhu s3, 8(a1)
+; RV32I-NEXT:    lhu s4, 12(a1)
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    call __umodsi3@plt
 ; RV32I-NEXT:    mv s5, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s4
+; RV32I-NEXT:    mv a0, s3
 ; RV32I-NEXT:    call __umodsi3@plt
 ; RV32I-NEXT:    mv s6, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s3
+; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __umodsi3@plt
 ; RV32I-NEXT:    mv s7, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __umodsi3@plt
 ; RV32I-NEXT:    mv s8, a0
 ; RV32I-NEXT:    li a1, 95
-; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    call __udivsi3@plt
-; RV32I-NEXT:    mv s9, a0
-; RV32I-NEXT:    li a1, 95
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    call __udivsi3@plt
 ; RV32I-NEXT:    mv s4, a0
 ; RV32I-NEXT:    li a1, 95
 ; RV32I-NEXT:    mv a0, s3
 ; RV32I-NEXT:    call __udivsi3@plt
-; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    mv s3, a0
 ; RV32I-NEXT:    li a1, 95
 ; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __udivsi3@plt
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    li a1, 95
+; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    call __udivsi3@plt
 ; RV32I-NEXT:    add a0, s8, a0
-; RV32I-NEXT:    add a1, s7, s1
-; RV32I-NEXT:    add a2, s6, s4
-; RV32I-NEXT:    add a3, s5, s9
+; RV32I-NEXT:    add a1, s7, s2
+; RV32I-NEXT:    add a2, s6, s3
+; RV32I-NEXT:    add a3, s5, s4
 ; RV32I-NEXT:    sh a3, 6(s0)
 ; RV32I-NEXT:    sh a2, 4(s0)
 ; RV32I-NEXT:    sh a1, 2(s0)
@@ -439,127 +430,124 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
 ;
 ; RV32IM-LABEL: combine_urem_udiv:
 ; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    lhu a6, 0(a1)
-; RV32IM-NEXT:    lhu a7, 4(a1)
+; RV32IM-NEXT:    lhu a2, 0(a1)
+; RV32IM-NEXT:    lhu a3, 4(a1)
 ; RV32IM-NEXT:    lhu a4, 12(a1)
 ; RV32IM-NEXT:    lhu a1, 8(a1)
 ; RV32IM-NEXT:    lui a5, 364242
 ; RV32IM-NEXT:    addi a5, a5, 777
-; RV32IM-NEXT:    mulhu a2, a4, a5
-; RV32IM-NEXT:    sub a3, a4, a2
-; RV32IM-NEXT:    srli a3, a3, 1
-; RV32IM-NEXT:    add a2, a3, a2
-; RV32IM-NEXT:    srli t3, a2, 6
-; RV32IM-NEXT:    li t0, 95
-; RV32IM-NEXT:    mul a3, t3, t0
-; RV32IM-NEXT:    sub t1, a4, a3
-; RV32IM-NEXT:    mulhu a4, a1, a5
-; RV32IM-NEXT:    sub a3, a1, a4
-; RV32IM-NEXT:    srli a3, a3, 1
-; RV32IM-NEXT:    add a3, a3, a4
-; RV32IM-NEXT:    srli a3, a3, 6
-; RV32IM-NEXT:    mul a4, a3, t0
-; RV32IM-NEXT:    sub t2, a1, a4
-; RV32IM-NEXT:    mulhu a4, a7, a5
-; RV32IM-NEXT:    sub a1, a7, a4
-; RV32IM-NEXT:    srli a1, a1, 1
-; RV32IM-NEXT:    add a1, a1, a4
-; RV32IM-NEXT:    srli a1, a1, 6
-; RV32IM-NEXT:    mul a4, a1, t0
-; RV32IM-NEXT:    sub a4, a7, a4
-; RV32IM-NEXT:    mulhu a5, a6, a5
-; RV32IM-NEXT:    sub a2, a6, a5
-; RV32IM-NEXT:    srli a2, a2, 1
+; RV32IM-NEXT:    mulhu a6, a4, a5
+; RV32IM-NEXT:    sub a7, a4, a6
+; RV32IM-NEXT:    srli a7, a7, 1
+; RV32IM-NEXT:    add a6, a7, a6
+; RV32IM-NEXT:    srli a6, a6, 6
+; RV32IM-NEXT:    li a7, 95
+; RV32IM-NEXT:    mul t0, a6, a7
+; RV32IM-NEXT:    sub a4, a4, t0
+; RV32IM-NEXT:    mulhu t0, a1, a5
+; RV32IM-NEXT:    sub t1, a1, t0
+; RV32IM-NEXT:    srli t1, t1, 1
+; RV32IM-NEXT:    add t0, t1, t0
+; RV32IM-NEXT:    srli t0, t0, 6
+; RV32IM-NEXT:    mul t1, t0, a7
+; RV32IM-NEXT:    sub a1, a1, t1
+; RV32IM-NEXT:    mulhu t1, a3, a5
+; RV32IM-NEXT:    sub t2, a3, t1
+; RV32IM-NEXT:    srli t2, t2, 1
+; RV32IM-NEXT:    add t1, t2, t1
+; RV32IM-NEXT:    srli t1, t1, 6
+; RV32IM-NEXT:    mul t2, t1, a7
+; RV32IM-NEXT:    sub a3, a3, t2
+; RV32IM-NEXT:    mulhu a5, a2, a5
+; RV32IM-NEXT:    sub t2, a2, a5
+; RV32IM-NEXT:    srli t2, t2, 1
+; RV32IM-NEXT:    add a5, t2, a5
+; RV32IM-NEXT:    srli a5, a5, 6
+; RV32IM-NEXT:    mul a7, a5, a7
+; RV32IM-NEXT:    sub a2, a2, a7
 ; RV32IM-NEXT:    add a2, a2, a5
-; RV32IM-NEXT:    srli a2, a2, 6
-; RV32IM-NEXT:    mul a5, a2, t0
-; RV32IM-NEXT:    sub a5, a6, a5
-; RV32IM-NEXT:    add a2, a5, a2
-; RV32IM-NEXT:    add a1, a4, a1
-; RV32IM-NEXT:    add a3, t2, a3
-; RV32IM-NEXT:    add a4, t1, t3
+; RV32IM-NEXT:    add a3, a3, t1
+; RV32IM-NEXT:    add a1, a1, t0
+; RV32IM-NEXT:    add a4, a4, a6
 ; RV32IM-NEXT:    sh a4, 6(a0)
-; RV32IM-NEXT:    sh a3, 4(a0)
-; RV32IM-NEXT:    sh a1, 2(a0)
+; RV32IM-NEXT:    sh a1, 4(a0)
+; RV32IM-NEXT:    sh a3, 2(a0)
 ; RV32IM-NEXT:    sh a2, 0(a0)
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: combine_urem_udiv:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -96
-; RV64I-NEXT:    sd ra, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 64(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s3, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s5, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s6, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s7, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s8, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s9, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lhu s2, 0(a1)
-; RV64I-NEXT:    lhu s3, 8(a1)
-; RV64I-NEXT:    lhu s4, 16(a1)
-; RV64I-NEXT:    lhu s1, 24(a1)
+; RV64I-NEXT:    addi sp, sp, -80
+; RV64I-NEXT:    sd ra, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s8, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lhu s1, 0(a1)
+; RV64I-NEXT:    lhu s2, 8(a1)
+; RV64I-NEXT:    lhu s3, 16(a1)
+; RV64I-NEXT:    lhu s4, 24(a1)
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s1
+; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    call __umoddi3@plt
 ; RV64I-NEXT:    mv s5, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s4
+; RV64I-NEXT:    mv a0, s3
 ; RV64I-NEXT:    call __umoddi3@plt
 ; RV64I-NEXT:    mv s6, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s3
+; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __umoddi3@plt
 ; RV64I-NEXT:    mv s7, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __umoddi3@plt
 ; RV64I-NEXT:    mv s8, a0
 ; RV64I-NEXT:    li a1, 95
-; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    call __udivdi3@plt
-; RV64I-NEXT:    mv s9, a0
-; RV64I-NEXT:    li a1, 95
 ; RV64I-NEXT:    mv a0, s4
 ; RV64I-NEXT:    call __udivdi3@plt
 ; RV64I-NEXT:    mv s4, a0
 ; RV64I-NEXT:    li a1, 95
 ; RV64I-NEXT:    mv a0, s3
 ; RV64I-NEXT:    call __udivdi3@plt
-; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    mv s3, a0
 ; RV64I-NEXT:    li a1, 95
 ; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __udivdi3@plt
+; RV64I-NEXT:    mv s2, a0
+; RV64I-NEXT:    li a1, 95
+; RV64I-NEXT:    mv a0, s1
+; RV64I-NEXT:    call __udivdi3@plt
 ; RV64I-NEXT:    addw a0, s8, a0
-; RV64I-NEXT:    addw a1, s7, s1
-; RV64I-NEXT:    addw a2, s6, s4
-; RV64I-NEXT:    addw a3, s5, s9
+; RV64I-NEXT:    addw a1, s7, s2
+; RV64I-NEXT:    addw a2, s6, s3
+; RV64I-NEXT:    addw a3, s5, s4
 ; RV64I-NEXT:    sh a3, 6(s0)
 ; RV64I-NEXT:    sh a2, 4(s0)
 ; RV64I-NEXT:    sh a1, 2(s0)
 ; RV64I-NEXT:    sh a0, 0(s0)
-; RV64I-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s9, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 96
+; RV64I-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 80
 ; RV64I-NEXT:    ret
 ;
 ; RV64IM-LABEL: combine_urem_udiv:
@@ -567,45 +555,45 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    lhu a2, 24(a1)
 ; RV64IM-NEXT:    lui a3, %hi(.LCPI2_0)
 ; RV64IM-NEXT:    ld a3, %lo(.LCPI2_0)(a3)
-; RV64IM-NEXT:    lhu a6, 0(a1)
-; RV64IM-NEXT:    lhu a7, 8(a1)
+; RV64IM-NEXT:    lhu a4, 0(a1)
+; RV64IM-NEXT:    lhu a5, 8(a1)
 ; RV64IM-NEXT:    lhu a1, 16(a1)
-; RV64IM-NEXT:    mulhu a4, a2, a3
-; RV64IM-NEXT:    sub a5, a2, a4
-; RV64IM-NEXT:    srli a5, a5, 1
-; RV64IM-NEXT:    add a4, a5, a4
-; RV64IM-NEXT:    srli t3, a4, 6
-; RV64IM-NEXT:    li t0, 95
-; RV64IM-NEXT:    mulw a5, t3, t0
-; RV64IM-NEXT:    subw t1, a2, a5
-; RV64IM-NEXT:    mulhu a5, a1, a3
-; RV64IM-NEXT:    sub a2, a1, a5
-; RV64IM-NEXT:    srli a2, a2, 1
-; RV64IM-NEXT:    add a2, a2, a5
-; RV64IM-NEXT:    srli a2, a2, 6
-; RV64IM-NEXT:    mulw a5, a2, t0
-; RV64IM-NEXT:    subw t2, a1, a5
-; RV64IM-NEXT:    mulhu a5, a7, a3
-; RV64IM-NEXT:    sub a1, a7, a5
-; RV64IM-NEXT:    srli a1, a1, 1
-; RV64IM-NEXT:    add a1, a1, a5
-; RV64IM-NEXT:    srli a1, a1, 6
-; RV64IM-NEXT:    mulw a5, a1, t0
-; RV64IM-NEXT:    subw a5, a7, a5
-; RV64IM-NEXT:    mulhu a3, a6, a3
-; RV64IM-NEXT:    sub a4, a6, a3
-; RV64IM-NEXT:    srli a4, a4, 1
-; RV64IM-NEXT:    add a3, a4, a3
+; RV64IM-NEXT:    mulhu a6, a2, a3
+; RV64IM-NEXT:    sub a7, a2, a6
+; RV64IM-NEXT:    srli a7, a7, 1
+; RV64IM-NEXT:    add a6, a7, a6
+; RV64IM-NEXT:    srli a6, a6, 6
+; RV64IM-NEXT:    li a7, 95
+; RV64IM-NEXT:    mulw t0, a6, a7
+; RV64IM-NEXT:    subw a2, a2, t0
+; RV64IM-NEXT:    mulhu t0, a1, a3
+; RV64IM-NEXT:    sub t1, a1, t0
+; RV64IM-NEXT:    srli t1, t1, 1
+; RV64IM-NEXT:    add t0, t1, t0
+; RV64IM-NEXT:    srli t0, t0, 6
+; RV64IM-NEXT:    mulw t1, t0, a7
+; RV64IM-NEXT:    subw a1, a1, t1
+; RV64IM-NEXT:    mulhu t1, a5, a3
+; RV64IM-NEXT:    sub t2, a5, t1
+; RV64IM-NEXT:    srli t2, t2, 1
+; RV64IM-NEXT:    add t1, t2, t1
+; RV64IM-NEXT:    srli t1, t1, 6
+; RV64IM-NEXT:    mulw t2, t1, a7
+; RV64IM-NEXT:    subw a5, a5, t2
+; RV64IM-NEXT:    mulhu a3, a4, a3
+; RV64IM-NEXT:    sub t2, a4, a3
+; RV64IM-NEXT:    srli t2, t2, 1
+; RV64IM-NEXT:    add a3, t2, a3
 ; RV64IM-NEXT:    srli a3, a3, 6
-; RV64IM-NEXT:    mulw a4, a3, t0
-; RV64IM-NEXT:    subw a4, a6, a4
+; RV64IM-NEXT:    mulw a7, a3, a7
+; RV64IM-NEXT:    subw a4, a4, a7
 ; RV64IM-NEXT:    addw a3, a4, a3
-; RV64IM-NEXT:    addw a1, a5, a1
-; RV64IM-NEXT:    addw a2, t2, a2
-; RV64IM-NEXT:    addw a4, t1, t3
-; RV64IM-NEXT:    sh a4, 6(a0)
-; RV64IM-NEXT:    sh a2, 4(a0)
-; RV64IM-NEXT:    sh a1, 2(a0)
+; RV64IM-NEXT:    addw a4, a5, t1
+; RV64IM-NEXT:    addw a1, a1, t0
+; RV64IM-NEXT:    addw a2, a2, a6
+; RV64IM-NEXT:    sh a2, 6(a0)
+; RV64IM-NEXT:    sh a1, 4(a0)
+; RV64IM-NEXT:    sh a4, 2(a0)
 ; RV64IM-NEXT:    sh a3, 0(a0)
 ; RV64IM-NEXT:    ret
   %1 = urem <4 x i16> %x, <i16 95, i16 95, i16 95, i16 95>
@@ -624,17 +612,17 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lhu s2, 8(a1)
-; RV32I-NEXT:    lhu s3, 4(a1)
-; RV32I-NEXT:    lhu s1, 0(a1)
+; RV32I-NEXT:    lhu s1, 8(a1)
+; RV32I-NEXT:    lhu s2, 4(a1)
+; RV32I-NEXT:    lhu s3, 0(a1)
 ; RV32I-NEXT:    lhu a2, 12(a1)
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    li a1, 95
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    call __umodsi3@plt
-; RV32I-NEXT:    andi a1, s1, 63
-; RV32I-NEXT:    andi a2, s3, 31
-; RV32I-NEXT:    andi a3, s2, 7
+; RV32I-NEXT:    andi a1, s3, 63
+; RV32I-NEXT:    andi a2, s2, 31
+; RV32I-NEXT:    andi a3, s1, 7
 ; RV32I-NEXT:    sh a0, 6(s0)
 ; RV32I-NEXT:    sh a3, 4(s0)
 ; RV32I-NEXT:    sh a2, 2(s0)
@@ -649,27 +637,27 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
 ;
 ; RV32IM-LABEL: dont_fold_urem_power_of_two:
 ; RV32IM:       # %bb.0:
-; RV32IM-NEXT:    lhu a6, 8(a1)
+; RV32IM-NEXT:    lhu a2, 8(a1)
 ; RV32IM-NEXT:    lhu a3, 4(a1)
 ; RV32IM-NEXT:    lhu a4, 12(a1)
 ; RV32IM-NEXT:    lhu a1, 0(a1)
 ; RV32IM-NEXT:    lui a5, 364242
 ; RV32IM-NEXT:    addi a5, a5, 777
 ; RV32IM-NEXT:    mulhu a5, a4, a5
-; RV32IM-NEXT:    sub a2, a4, a5
-; RV32IM-NEXT:    srli a2, a2, 1
-; RV32IM-NEXT:    add a2, a2, a5
-; RV32IM-NEXT:    srli a2, a2, 6
-; RV32IM-NEXT:    li a5, 95
-; RV32IM-NEXT:    mul a2, a2, a5
-; RV32IM-NEXT:    sub a2, a4, a2
+; RV32IM-NEXT:    sub a6, a4, a5
+; RV32IM-NEXT:    srli a6, a6, 1
+; RV32IM-NEXT:    add a5, a6, a5
+; RV32IM-NEXT:    srli a5, a5, 6
+; RV32IM-NEXT:    li a6, 95
+; RV32IM-NEXT:    mul a5, a5, a6
+; RV32IM-NEXT:    sub a4, a4, a5
 ; RV32IM-NEXT:    andi a1, a1, 63
 ; RV32IM-NEXT:    andi a3, a3, 31
-; RV32IM-NEXT:    andi a4, a6, 7
-; RV32IM-NEXT:    sh a4, 4(a0)
+; RV32IM-NEXT:    andi a2, a2, 7
+; RV32IM-NEXT:    sh a2, 4(a0)
 ; RV32IM-NEXT:    sh a3, 2(a0)
 ; RV32IM-NEXT:    sh a1, 0(a0)
-; RV32IM-NEXT:    sh a2, 6(a0)
+; RV32IM-NEXT:    sh a4, 6(a0)
 ; RV32IM-NEXT:    ret
 ;
 ; RV64I-LABEL: dont_fold_urem_power_of_two:
@@ -680,17 +668,17 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lhu s2, 16(a1)
-; RV64I-NEXT:    lhu s3, 8(a1)
-; RV64I-NEXT:    lhu s1, 0(a1)
+; RV64I-NEXT:    lhu s1, 16(a1)
+; RV64I-NEXT:    lhu s2, 8(a1)
+; RV64I-NEXT:    lhu s3, 0(a1)
 ; RV64I-NEXT:    lhu a2, 24(a1)
 ; RV64I-NEXT:    mv s0, a0
 ; RV64I-NEXT:    li a1, 95
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    call __umoddi3@plt
-; RV64I-NEXT:    andi a1, s1, 63
-; RV64I-NEXT:    andi a2, s3, 31
-; RV64I-NEXT:    andi a3, s2, 7
+; RV64I-NEXT:    andi a1, s3, 63
+; RV64I-NEXT:    andi a2, s2, 31
+; RV64I-NEXT:    andi a3, s1, 7
 ; RV64I-NEXT:    sh a0, 6(s0)
 ; RV64I-NEXT:    sh a3, 4(s0)
 ; RV64I-NEXT:    sh a2, 2(s0)
@@ -708,20 +696,20 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    lhu a2, 24(a1)
 ; RV64IM-NEXT:    lui a3, %hi(.LCPI3_0)
 ; RV64IM-NEXT:    ld a3, %lo(.LCPI3_0)(a3)
-; RV64IM-NEXT:    lhu a6, 16(a1)
+; RV64IM-NEXT:    lhu a4, 16(a1)
 ; RV64IM-NEXT:    lhu a5, 8(a1)
 ; RV64IM-NEXT:    lhu a1, 0(a1)
 ; RV64IM-NEXT:    mulhu a3, a2, a3
-; RV64IM-NEXT:    sub a4, a2, a3
-; RV64IM-NEXT:    srli a4, a4, 1
-; RV64IM-NEXT:    add a3, a4, a3
+; RV64IM-NEXT:    sub a6, a2, a3
+; RV64IM-NEXT:    srli a6, a6, 1
+; RV64IM-NEXT:    add a3, a6, a3
 ; RV64IM-NEXT:    srli a3, a3, 6
-; RV64IM-NEXT:    li a4, 95
-; RV64IM-NEXT:    mulw a3, a3, a4
+; RV64IM-NEXT:    li a6, 95
+; RV64IM-NEXT:    mulw a3, a3, a6
 ; RV64IM-NEXT:    subw a2, a2, a3
 ; RV64IM-NEXT:    andi a1, a1, 63
 ; RV64IM-NEXT:    andi a3, a5, 31
-; RV64IM-NEXT:    andi a4, a6, 7
+; RV64IM-NEXT:    andi a4, a4, 7
 ; RV64IM-NEXT:    sh a4, 4(a0)
 ; RV64IM-NEXT:    sh a3, 2(a0)
 ; RV64IM-NEXT:    sh a1, 0(a0)
@@ -741,10 +729,10 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lhu s2, 12(a1)
+; RV32I-NEXT:    lhu s0, 12(a1)
 ; RV32I-NEXT:    lhu s1, 8(a1)
 ; RV32I-NEXT:    lhu a2, 4(a1)
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s2, a0
 ; RV32I-NEXT:    li a1, 654
 ; RV32I-NEXT:    mv a0, a2
 ; RV32I-NEXT:    call __umodsi3@plt
@@ -755,12 +743,12 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    mv s1, a0
 ; RV32I-NEXT:    lui a0, 1
 ; RV32I-NEXT:    addi a1, a0, 1327
-; RV32I-NEXT:    mv a0, s2
+; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __umodsi3@plt
-; RV32I-NEXT:    sh a0, 6(s0)
-; RV32I-NEXT:    sh s1, 4(s0)
-; RV32I-NEXT:    sh s3, 2(s0)
-; RV32I-NEXT:    sh zero, 0(s0)
+; RV32I-NEXT:    sh a0, 6(s2)
+; RV32I-NEXT:    sh s1, 4(s2)
+; RV32I-NEXT:    sh s3, 2(s2)
+; RV32I-NEXT:    sh zero, 0(s2)
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -811,10 +799,10 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lhu s2, 24(a1)
+; RV64I-NEXT:    lhu s0, 24(a1)
 ; RV64I-NEXT:    lhu s1, 16(a1)
 ; RV64I-NEXT:    lhu a2, 8(a1)
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    li a1, 654
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    call __umoddi3@plt
@@ -825,12 +813,12 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lui a0, 1
 ; RV64I-NEXT:    addiw a1, a0, 1327
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __umoddi3@plt
-; RV64I-NEXT:    sh a0, 6(s0)
-; RV64I-NEXT:    sh s1, 4(s0)
-; RV64I-NEXT:    sh s3, 2(s0)
-; RV64I-NEXT:    sh zero, 0(s0)
+; RV64I-NEXT:    sh a0, 6(s2)
+; RV64I-NEXT:    sh s1, 4(s2)
+; RV64I-NEXT:    sh s3, 2(s2)
+; RV64I-NEXT:    sh zero, 0(s2)
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
@@ -851,29 +839,29 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
 ; RV64IM-NEXT:    srli a5, a5, 1
 ; RV64IM-NEXT:    add a3, a5, a3
 ; RV64IM-NEXT:    srli a3, a3, 4
-; RV64IM-NEXT:    li a6, 23
-; RV64IM-NEXT:    lui a5, %hi(.LCPI4_1)
-; RV64IM-NEXT:    ld a5, %lo(.LCPI4_1)(a5)
-; RV64IM-NEXT:    mulw a3, a3, a6
-; RV64IM-NEXT:    subw a6, a2, a3
+; RV64IM-NEXT:    li a5, 23
+; RV64IM-NEXT:    lui a6, %hi(.LCPI4_1)
+; RV64IM-NEXT:    ld a6, %lo(.LCPI4_1)(a6)
+; RV64IM-NEXT:    mulw a3, a3, a5
+; RV64IM-NEXT:    subw a2, a2, a3
 ; RV64IM-NEXT:    srli a3, a1, 1
-; RV64IM-NEXT:    mulhu a3, a3, a5
+; RV64IM-NEXT:    mulhu a3, a3, a6
 ; RV64IM-NEXT:    srli a3, a3, 7
 ; RV64IM-NEXT:    lui a5, %hi(.LCPI4_2)
 ; RV64IM-NEXT:    ld a5, %lo(.LCPI4_2)(a5)
-; RV64IM-NEXT:    li a2, 654
-; RV64IM-NEXT:    mulw a2, a3, a2
-; RV64IM-NEXT:    subw a1, a1, a2
-; RV64IM-NEXT:    mulhu a2, a4, a5
-; RV64IM-NEXT:    srli a2, a2, 12
-; RV64IM-NEXT:    lui a3, 1
-; RV64IM-NEXT:    addiw a3, a3, 1327
-; RV64IM-NEXT:    mulw a2, a2, a3
-; RV64IM-NEXT:    subw a2, a4, a2
+; RV64IM-NEXT:    li a6, 654
+; RV64IM-NEXT:    mulw a3, a3, a6
+; RV64IM-NEXT:    subw a1, a1, a3
+; RV64IM-NEXT:    mulhu a3, a4, a5
+; RV64IM-NEXT:    srli a3, a3, 12
+; RV64IM-NEXT:    lui a5, 1
+; RV64IM-NEXT:    addiw a5, a5, 1327
+; RV64IM-NEXT:    mulw a3, a3, a5
+; RV64IM-NEXT:    subw a3, a4, a3
 ; RV64IM-NEXT:    sh zero, 0(a0)
-; RV64IM-NEXT:    sh a2, 6(a0)
+; RV64IM-NEXT:    sh a3, 6(a0)
 ; RV64IM-NEXT:    sh a1, 2(a0)
-; RV64IM-NEXT:    sh a6, 4(a0)
+; RV64IM-NEXT:    sh a2, 4(a0)
 ; RV64IM-NEXT:    ret
   %1 = urem <4 x i16> %x, <i16 1, i16 654, i16 23, i16 5423>
   ret <4 x i16> %1
@@ -903,16 +891,15 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
 ; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s9, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw s2, 24(a1)
-; RV32I-NEXT:    lw s3, 28(a1)
-; RV32I-NEXT:    lw s4, 16(a1)
-; RV32I-NEXT:    lw s5, 20(a1)
-; RV32I-NEXT:    lw s6, 8(a1)
-; RV32I-NEXT:    lw s1, 12(a1)
+; RV32I-NEXT:    lw s0, 24(a1)
+; RV32I-NEXT:    lw s1, 28(a1)
+; RV32I-NEXT:    lw s2, 16(a1)
+; RV32I-NEXT:    lw s3, 20(a1)
+; RV32I-NEXT:    lw s4, 8(a1)
+; RV32I-NEXT:    lw s5, 12(a1)
 ; RV32I-NEXT:    lw a3, 0(a1)
 ; RV32I-NEXT:    lw a1, 4(a1)
-; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    mv s6, a0
 ; RV32I-NEXT:    li a2, 1
 ; RV32I-NEXT:    mv a0, a3
 ; RV32I-NEXT:    li a3, 0
@@ -920,33 +907,33 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
 ; RV32I-NEXT:    mv s7, a0
 ; RV32I-NEXT:    mv s8, a1
 ; RV32I-NEXT:    li a2, 654
-; RV32I-NEXT:    mv a0, s6
-; RV32I-NEXT:    mv a1, s1
-; RV32I-NEXT:    li a3, 0
-; RV32I-NEXT:    call __umoddi3@plt
-; RV32I-NEXT:    mv s6, a0
-; RV32I-NEXT:    mv s9, a1
-; RV32I-NEXT:    li a2, 23
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s5
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __umoddi3@plt
 ; RV32I-NEXT:    mv s4, a0
-; RV32I-NEXT:    mv s1, a1
-; RV32I-NEXT:    lui a0, 1
-; RV32I-NEXT:    addi a2, a0, 1327
+; RV32I-NEXT:    mv s5, a1
+; RV32I-NEXT:    li a2, 23
 ; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    li a3, 0
 ; RV32I-NEXT:    call __umoddi3@plt
-; RV32I-NEXT:    sw a1, 28(s0)
-; RV32I-NEXT:    sw a0, 24(s0)
-; RV32I-NEXT:    sw s1, 20(s0)
-; RV32I-NEXT:    sw s4, 16(s0)
-; RV32I-NEXT:    sw s9, 12(s0)
-; RV32I-NEXT:    sw s6, 8(s0)
-; RV32I-NEXT:    sw s8, 4(s0)
-; RV32I-NEXT:    sw s7, 0(s0)
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    mv s3, a1
+; RV32I-NEXT:    lui a0, 1
+; RV32I-NEXT:    addi a2, a0, 1327
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    li a3, 0
+; RV32I-NEXT:    call __umoddi3@plt
+; RV32I-NEXT:    sw a1, 28(s6)
+; RV32I-NEXT:    sw a0, 24(s6)
+; RV32I-NEXT:    sw s3, 20(s6)
+; RV32I-NEXT:    sw s2, 16(s6)
+; RV32I-NEXT:    sw s5, 12(s6)
+; RV32I-NEXT:    sw s4, 8(s6)
+; RV32I-NEXT:    sw s8, 4(s6)
+; RV32I-NEXT:    sw s7, 0(s6)
 ; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
@@ -957,7 +944,6 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
 ; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
 ;
@@ -974,16 +960,15 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
 ; RV32IM-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
 ; RV32IM-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
 ; RV32IM-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    sw s9, 4(sp) # 4-byte Folded Spill
-; RV32IM-NEXT:    lw s2, 24(a1)
-; RV32IM-NEXT:    lw s3, 28(a1)
-; RV32IM-NEXT:    lw s4, 16(a1)
-; RV32IM-NEXT:    lw s5, 20(a1)
-; RV32IM-NEXT:    lw s6, 8(a1)
-; RV32IM-NEXT:    lw s1, 12(a1)
+; RV32IM-NEXT:    lw s0, 24(a1)
+; RV32IM-NEXT:    lw s1, 28(a1)
+; RV32IM-NEXT:    lw s2, 16(a1)
+; RV32IM-NEXT:    lw s3, 20(a1)
+; RV32IM-NEXT:    lw s4, 8(a1)
+; RV32IM-NEXT:    lw s5, 12(a1)
 ; RV32IM-NEXT:    lw a3, 0(a1)
 ; RV32IM-NEXT:    lw a1, 4(a1)
-; RV32IM-NEXT:    mv s0, a0
+; RV32IM-NEXT:    mv s6, a0
 ; RV32IM-NEXT:    li a2, 1
 ; RV32IM-NEXT:    mv a0, a3
 ; RV32IM-NEXT:    li a3, 0
@@ -991,33 +976,33 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
 ; RV32IM-NEXT:    mv s7, a0
 ; RV32IM-NEXT:    mv s8, a1
 ; RV32IM-NEXT:    li a2, 654
-; RV32IM-NEXT:    mv a0, s6
-; RV32IM-NEXT:    mv a1, s1
-; RV32IM-NEXT:    li a3, 0
-; RV32IM-NEXT:    call __umoddi3@plt
-; RV32IM-NEXT:    mv s6, a0
-; RV32IM-NEXT:    mv s9, a1
-; RV32IM-NEXT:    li a2, 23
 ; RV32IM-NEXT:    mv a0, s4
 ; RV32IM-NEXT:    mv a1, s5
 ; RV32IM-NEXT:    li a3, 0
 ; RV32IM-NEXT:    call __umoddi3@plt
 ; RV32IM-NEXT:    mv s4, a0
-; RV32IM-NEXT:    mv s1, a1
-; RV32IM-NEXT:    lui a0, 1
-; RV32IM-NEXT:    addi a2, a0, 1327
+; RV32IM-NEXT:    mv s5, a1
+; RV32IM-NEXT:    li a2, 23
 ; RV32IM-NEXT:    mv a0, s2
 ; RV32IM-NEXT:    mv a1, s3
 ; RV32IM-NEXT:    li a3, 0
 ; RV32IM-NEXT:    call __umoddi3@plt
-; RV32IM-NEXT:    sw a1, 28(s0)
-; RV32IM-NEXT:    sw a0, 24(s0)
-; RV32IM-NEXT:    sw s1, 20(s0)
-; RV32IM-NEXT:    sw s4, 16(s0)
-; RV32IM-NEXT:    sw s9, 12(s0)
-; RV32IM-NEXT:    sw s6, 8(s0)
-; RV32IM-NEXT:    sw s8, 4(s0)
-; RV32IM-NEXT:    sw s7, 0(s0)
+; RV32IM-NEXT:    mv s2, a0
+; RV32IM-NEXT:    mv s3, a1
+; RV32IM-NEXT:    lui a0, 1
+; RV32IM-NEXT:    addi a2, a0, 1327
+; RV32IM-NEXT:    mv a0, s0
+; RV32IM-NEXT:    mv a1, s1
+; RV32IM-NEXT:    li a3, 0
+; RV32IM-NEXT:    call __umoddi3@plt
+; RV32IM-NEXT:    sw a1, 28(s6)
+; RV32IM-NEXT:    sw a0, 24(s6)
+; RV32IM-NEXT:    sw s3, 20(s6)
+; RV32IM-NEXT:    sw s2, 16(s6)
+; RV32IM-NEXT:    sw s5, 12(s6)
+; RV32IM-NEXT:    sw s4, 8(s6)
+; RV32IM-NEXT:    sw s8, 4(s6)
+; RV32IM-NEXT:    sw s7, 0(s6)
 ; RV32IM-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
@@ -1028,7 +1013,6 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
 ; RV32IM-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    addi sp, sp, 48
 ; RV32IM-NEXT:    ret
 ;
@@ -1040,10 +1024,10 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
 ; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    ld s2, 24(a1)
+; RV64I-NEXT:    ld s0, 24(a1)
 ; RV64I-NEXT:    ld s1, 16(a1)
 ; RV64I-NEXT:    ld a2, 8(a1)
-; RV64I-NEXT:    mv s0, a0
+; RV64I-NEXT:    mv s2, a0
 ; RV64I-NEXT:    li a1, 654
 ; RV64I-NEXT:    mv a0, a2
 ; RV64I-NEXT:    call __umoddi3@plt
@@ -1054,12 +1038,12 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
 ; RV64I-NEXT:    mv s1, a0
 ; RV64I-NEXT:    lui a0, 1
 ; RV64I-NEXT:    addiw a1, a0, 1327
-; RV64I-NEXT:    mv a0, s2
+; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __umoddi3@plt
-; RV64I-NEXT:    sd a0, 24(s0)
-; RV64I-NEXT:    sd s1, 16(s0)
-; RV64I-NEXT:    sd s3, 8(s0)
-; RV64I-NEXT:    sd zero, 0(s0)
+; RV64I-NEXT:    sd a0, 24(s2)
+; RV64I-NEXT:    sd s1, 16(s2)
+; RV64I-NEXT:    sd s3, 8(s2)
+; RV64I-NEXT:    sd zero, 0(s2)
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
@@ -1080,29 +1064,29 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
 ; RV64IM-NEXT:    srli a5, a5, 1
 ; RV64IM-NEXT:    add a3, a5, a3
 ; RV64IM-NEXT:    srli a3, a3, 4
-; RV64IM-NEXT:    li a6, 23
-; RV64IM-NEXT:    lui a5, %hi(.LCPI6_1)
-; RV64IM-NEXT:    ld a5, %lo(.LCPI6_1)(a5)
-; RV64IM-NEXT:    mul a3, a3, a6
-; RV64IM-NEXT:    sub a6, a2, a3
+; RV64IM-NEXT:    li a5, 23
+; RV64IM-NEXT:    lui a6, %hi(.LCPI6_1)
+; RV64IM-NEXT:    ld a6, %lo(.LCPI6_1)(a6)
+; RV64IM-NEXT:    mul a3, a3, a5
+; RV64IM-NEXT:    sub a2, a2, a3
 ; RV64IM-NEXT:    srli a3, a1, 1
-; RV64IM-NEXT:    mulhu a3, a3, a5
+; RV64IM-NEXT:    mulhu a3, a3, a6
 ; RV64IM-NEXT:    srli a3, a3, 7
 ; RV64IM-NEXT:    lui a5, %hi(.LCPI6_2)
 ; RV64IM-NEXT:    ld a5, %lo(.LCPI6_2)(a5)
-; RV64IM-NEXT:    li a2, 654
-; RV64IM-NEXT:    mul a2, a3, a2
-; RV64IM-NEXT:    sub a1, a1, a2
-; RV64IM-NEXT:    mulhu a2, a4, a5
-; RV64IM-NEXT:    srli a2, a2, 12
-; RV64IM-NEXT:    lui a3, 1
-; RV64IM-NEXT:    addiw a3, a3, 1327
-; RV64IM-NEXT:    mul a2, a2, a3
-; RV64IM-NEXT:    sub a2, a4, a2
+; RV64IM-NEXT:    li a6, 654
+; RV64IM-NEXT:    mul a3, a3, a6
+; RV64IM-NEXT:    sub a1, a1, a3
+; RV64IM-NEXT:    mulhu a3, a4, a5
+; RV64IM-NEXT:    srli a3, a3, 12
+; RV64IM-NEXT:    lui a5, 1
+; RV64IM-NEXT:    addiw a5, a5, 1327
+; RV64IM-NEXT:    mul a3, a3, a5
+; RV64IM-NEXT:    sub a3, a4, a3
 ; RV64IM-NEXT:    sd zero, 0(a0)
-; RV64IM-NEXT:    sd a2, 24(a0)
+; RV64IM-NEXT:    sd a3, 24(a0)
 ; RV64IM-NEXT:    sd a1, 8(a0)
-; RV64IM-NEXT:    sd a6, 16(a0)
+; RV64IM-NEXT:    sd a2, 16(a0)
 ; RV64IM-NEXT:    ret
   %1 = urem <4 x i64> %x, <i64 1, i64 654, i64 23, i64 5423>
   ret <4 x i64> %1
diff --git a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll
index 929c154f39e89..1b81beb113ecd 100644
--- a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll
+++ b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll
@@ -16,18 +16,18 @@ define void @vec3_setcc_crash(<3 x i8>* %in, <3 x i8>* %out) {
 ; RV32-NEXT:    addi a2, a2, -256
 ; RV32-NEXT:    and a2, a0, a2
 ; RV32-NEXT:    slli a3, a2, 16
-; RV32-NEXT:    srai a6, a3, 24
+; RV32-NEXT:    srai a3, a3, 24
 ; RV32-NEXT:    slli a4, a0, 24
-; RV32-NEXT:    srai a3, a4, 24
+; RV32-NEXT:    srai a6, a4, 24
 ; RV32-NEXT:    slli a4, a0, 8
 ; RV32-NEXT:    mv a5, a0
-; RV32-NEXT:    bgtz a3, .LBB0_2
+; RV32-NEXT:    bgtz a6, .LBB0_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a5, 0
 ; RV32-NEXT:  .LBB0_2:
 ; RV32-NEXT:    srai a4, a4, 24
 ; RV32-NEXT:    andi a5, a5, 255
-; RV32-NEXT:    bgtz a6, .LBB0_4
+; RV32-NEXT:    bgtz a3, .LBB0_4
 ; RV32-NEXT:  # %bb.3:
 ; RV32-NEXT:    li a2, 0
 ; RV32-NEXT:    j .LBB0_5
@@ -54,18 +54,18 @@ define void @vec3_setcc_crash(<3 x i8>* %in, <3 x i8>* %out) {
 ; RV64-NEXT:    addiw a2, a2, -256
 ; RV64-NEXT:    and a2, a0, a2
 ; RV64-NEXT:    slli a3, a2, 48
-; RV64-NEXT:    srai a6, a3, 56
+; RV64-NEXT:    srai a3, a3, 56
 ; RV64-NEXT:    slli a4, a0, 56
-; RV64-NEXT:    srai a3, a4, 56
+; RV64-NEXT:    srai a6, a4, 56
 ; RV64-NEXT:    slli a4, a0, 40
 ; RV64-NEXT:    mv a5, a0
-; RV64-NEXT:    bgtz a3, .LBB0_2
+; RV64-NEXT:    bgtz a6, .LBB0_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a5, 0
 ; RV64-NEXT:  .LBB0_2:
 ; RV64-NEXT:    srai a4, a4, 56
 ; RV64-NEXT:    andi a5, a5, 255
-; RV64-NEXT:    bgtz a6, .LBB0_4
+; RV64-NEXT:    bgtz a3, .LBB0_4
 ; RV64-NEXT:  # %bb.3:
 ; RV64-NEXT:    li a2, 0
 ; RV64-NEXT:    j .LBB0_5
diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index a3732e1104eb8..7342cb56827c2 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -567,9 +567,9 @@ entry:
 define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) {
 ; RV32-LABEL: ssubo.i64:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    sltu a6, a0, a2
-; RV32-NEXT:    sub a5, a1, a3
-; RV32-NEXT:    sub a5, a5, a6
+; RV32-NEXT:    sltu a5, a0, a2
+; RV32-NEXT:    sub a6, a1, a3
+; RV32-NEXT:    sub a5, a6, a5
 ; RV32-NEXT:    xor a6, a1, a5
 ; RV32-NEXT:    xor a1, a1, a3
 ; RV32-NEXT:    and a1, a1, a6
@@ -591,9 +591,9 @@ define zeroext i1 @ssubo.i64(i64 %v1, i64 %v2, i64* %res) {
 ;
 ; RV32ZBA-LABEL: ssubo.i64:
 ; RV32ZBA:       # %bb.0: # %entry
-; RV32ZBA-NEXT:    sltu a6, a0, a2
-; RV32ZBA-NEXT:    sub a5, a1, a3
-; RV32ZBA-NEXT:    sub a5, a5, a6
+; RV32ZBA-NEXT:    sltu a5, a0, a2
+; RV32ZBA-NEXT:    sub a6, a1, a3
+; RV32ZBA-NEXT:    sub a5, a6, a5
 ; RV32ZBA-NEXT:    xor a6, a1, a5
 ; RV32ZBA-NEXT:    xor a1, a1, a3
 ; RV32ZBA-NEXT:    and a1, a1, a6
@@ -905,64 +905,58 @@ define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s3, 0(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset s0, -4
 ; RV32-NEXT:    .cfi_offset s1, -8
-; RV32-NEXT:    .cfi_offset s2, -12
-; RV32-NEXT:    .cfi_offset s3, -16
-; RV32-NEXT:    mulhu a6, a0, a2
-; RV32-NEXT:    mul a5, a1, a2
-; RV32-NEXT:    add a6, a5, a6
-; RV32-NEXT:    sltu a7, a6, a5
-; RV32-NEXT:    mulhu a5, a1, a2
-; RV32-NEXT:    add a7, a5, a7
-; RV32-NEXT:    mul a5, a0, a3
-; RV32-NEXT:    add a6, a5, a6
-; RV32-NEXT:    sltu t0, a6, a5
-; RV32-NEXT:    mulhu a5, a0, a3
-; RV32-NEXT:    add a5, a5, t0
-; RV32-NEXT:    add t0, a7, a5
-; RV32-NEXT:    mul t1, a1, a3
-; RV32-NEXT:    add a5, t1, t0
+; RV32-NEXT:    mulhu a5, a0, a2
+; RV32-NEXT:    mul a6, a1, a2
+; RV32-NEXT:    add a5, a6, a5
+; RV32-NEXT:    sltu a6, a5, a6
+; RV32-NEXT:    mulhu a7, a1, a2
+; RV32-NEXT:    add a6, a7, a6
+; RV32-NEXT:    mul a7, a0, a3
+; RV32-NEXT:    add a5, a7, a5
+; RV32-NEXT:    sltu a7, a5, a7
+; RV32-NEXT:    mulhu t0, a0, a3
+; RV32-NEXT:    add a7, t0, a7
+; RV32-NEXT:    add a7, a6, a7
+; RV32-NEXT:    mul t0, a1, a3
+; RV32-NEXT:    add t1, t0, a7
 ; RV32-NEXT:    srai t2, a1, 31
 ; RV32-NEXT:    mul t3, a2, t2
 ; RV32-NEXT:    srai t4, a3, 31
 ; RV32-NEXT:    mul t5, t4, a0
 ; RV32-NEXT:    add t6, t5, t3
-; RV32-NEXT:    add s3, a5, t6
-; RV32-NEXT:    sltu s2, s3, a5
-; RV32-NEXT:    sltu a5, a5, t1
-; RV32-NEXT:    sltu s1, t0, a7
-; RV32-NEXT:    mulhu s0, a1, a3
-; RV32-NEXT:    add s1, s0, s1
-; RV32-NEXT:    add a5, s1, a5
-; RV32-NEXT:    mulhu s1, a2, t2
-; RV32-NEXT:    add s1, s1, t3
+; RV32-NEXT:    add s0, t1, t6
+; RV32-NEXT:    sltu s1, s0, t1
+; RV32-NEXT:    sltu t0, t1, t0
+; RV32-NEXT:    sltu a6, a7, a6
+; RV32-NEXT:    mulhu a7, a1, a3
+; RV32-NEXT:    add a6, a7, a6
+; RV32-NEXT:    add a6, a6, t0
+; RV32-NEXT:    mulhu a7, a2, t2
+; RV32-NEXT:    add a7, a7, t3
 ; RV32-NEXT:    mul a3, a3, t2
-; RV32-NEXT:    add a3, s1, a3
+; RV32-NEXT:    add a3, a7, a3
 ; RV32-NEXT:    mul a1, t4, a1
-; RV32-NEXT:    mulhu s1, t4, a0
-; RV32-NEXT:    add a1, s1, a1
+; RV32-NEXT:    mulhu a7, t4, a0
+; RV32-NEXT:    add a1, a7, a1
 ; RV32-NEXT:    add a1, a1, t5
 ; RV32-NEXT:    add a1, a1, a3
 ; RV32-NEXT:    sltu a3, t6, t5
 ; RV32-NEXT:    add a1, a1, a3
-; RV32-NEXT:    add a1, a5, a1
-; RV32-NEXT:    add a1, a1, s2
-; RV32-NEXT:    srai a3, a6, 31
+; RV32-NEXT:    add a1, a6, a1
+; RV32-NEXT:    add a1, a1, s1
+; RV32-NEXT:    srai a3, a5, 31
 ; RV32-NEXT:    xor a1, a1, a3
-; RV32-NEXT:    xor a3, s3, a3
+; RV32-NEXT:    xor a3, s0, a3
 ; RV32-NEXT:    or a1, a3, a1
 ; RV32-NEXT:    snez a1, a1
 ; RV32-NEXT:    mul a0, a0, a2
 ; RV32-NEXT:    sw a0, 0(a4)
-; RV32-NEXT:    sw a6, 4(a4)
+; RV32-NEXT:    sw a5, 4(a4)
 ; RV32-NEXT:    mv a0, a1
 ; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 0(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
@@ -982,64 +976,58 @@ define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) {
 ; RV32ZBA-NEXT:    .cfi_def_cfa_offset 16
 ; RV32ZBA-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
 ; RV32ZBA-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
-; RV32ZBA-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
-; RV32ZBA-NEXT:    sw s3, 0(sp) # 4-byte Folded Spill
 ; RV32ZBA-NEXT:    .cfi_offset s0, -4
 ; RV32ZBA-NEXT:    .cfi_offset s1, -8
-; RV32ZBA-NEXT:    .cfi_offset s2, -12
-; RV32ZBA-NEXT:    .cfi_offset s3, -16
-; RV32ZBA-NEXT:    mulhu a6, a0, a2
-; RV32ZBA-NEXT:    mul a5, a1, a2
-; RV32ZBA-NEXT:    add a6, a5, a6
-; RV32ZBA-NEXT:    sltu a7, a6, a5
-; RV32ZBA-NEXT:    mulhu a5, a1, a2
-; RV32ZBA-NEXT:    add a7, a5, a7
-; RV32ZBA-NEXT:    mul a5, a0, a3
-; RV32ZBA-NEXT:    add a6, a5, a6
-; RV32ZBA-NEXT:    sltu t0, a6, a5
-; RV32ZBA-NEXT:    mulhu a5, a0, a3
-; RV32ZBA-NEXT:    add a5, a5, t0
-; RV32ZBA-NEXT:    add t0, a7, a5
-; RV32ZBA-NEXT:    mul t1, a1, a3
-; RV32ZBA-NEXT:    add a5, t1, t0
+; RV32ZBA-NEXT:    mulhu a5, a0, a2
+; RV32ZBA-NEXT:    mul a6, a1, a2
+; RV32ZBA-NEXT:    add a5, a6, a5
+; RV32ZBA-NEXT:    sltu a6, a5, a6
+; RV32ZBA-NEXT:    mulhu a7, a1, a2
+; RV32ZBA-NEXT:    add a6, a7, a6
+; RV32ZBA-NEXT:    mul a7, a0, a3
+; RV32ZBA-NEXT:    add a5, a7, a5
+; RV32ZBA-NEXT:    sltu a7, a5, a7
+; RV32ZBA-NEXT:    mulhu t0, a0, a3
+; RV32ZBA-NEXT:    add a7, t0, a7
+; RV32ZBA-NEXT:    add a7, a6, a7
+; RV32ZBA-NEXT:    mul t0, a1, a3
+; RV32ZBA-NEXT:    add t1, t0, a7
 ; RV32ZBA-NEXT:    srai t2, a1, 31
 ; RV32ZBA-NEXT:    mul t3, a2, t2
 ; RV32ZBA-NEXT:    srai t4, a3, 31
 ; RV32ZBA-NEXT:    mul t5, t4, a0
 ; RV32ZBA-NEXT:    add t6, t5, t3
-; RV32ZBA-NEXT:    add s3, a5, t6
-; RV32ZBA-NEXT:    sltu s2, s3, a5
-; RV32ZBA-NEXT:    sltu a5, a5, t1
-; RV32ZBA-NEXT:    sltu s1, t0, a7
-; RV32ZBA-NEXT:    mulhu s0, a1, a3
-; RV32ZBA-NEXT:    add s1, s0, s1
-; RV32ZBA-NEXT:    add a5, s1, a5
-; RV32ZBA-NEXT:    mulhu s1, a2, t2
-; RV32ZBA-NEXT:    add s1, s1, t3
+; RV32ZBA-NEXT:    add s0, t1, t6
+; RV32ZBA-NEXT:    sltu s1, s0, t1
+; RV32ZBA-NEXT:    sltu t0, t1, t0
+; RV32ZBA-NEXT:    sltu a6, a7, a6
+; RV32ZBA-NEXT:    mulhu a7, a1, a3
+; RV32ZBA-NEXT:    add a6, a7, a6
+; RV32ZBA-NEXT:    add a6, a6, t0
+; RV32ZBA-NEXT:    mulhu a7, a2, t2
+; RV32ZBA-NEXT:    add a7, a7, t3
 ; RV32ZBA-NEXT:    mul a3, a3, t2
-; RV32ZBA-NEXT:    add a3, s1, a3
+; RV32ZBA-NEXT:    add a3, a7, a3
 ; RV32ZBA-NEXT:    mul a1, t4, a1
-; RV32ZBA-NEXT:    mulhu s1, t4, a0
-; RV32ZBA-NEXT:    add a1, s1, a1
+; RV32ZBA-NEXT:    mulhu a7, t4, a0
+; RV32ZBA-NEXT:    add a1, a7, a1
 ; RV32ZBA-NEXT:    add a1, a1, t5
 ; RV32ZBA-NEXT:    add a1, a1, a3
 ; RV32ZBA-NEXT:    sltu a3, t6, t5
 ; RV32ZBA-NEXT:    add a1, a1, a3
-; RV32ZBA-NEXT:    add a1, a5, a1
-; RV32ZBA-NEXT:    add a1, a1, s2
-; RV32ZBA-NEXT:    srai a3, a6, 31
+; RV32ZBA-NEXT:    add a1, a6, a1
+; RV32ZBA-NEXT:    add a1, a1, s1
+; RV32ZBA-NEXT:    srai a3, a5, 31
 ; RV32ZBA-NEXT:    xor a1, a1, a3
-; RV32ZBA-NEXT:    xor a3, s3, a3
+; RV32ZBA-NEXT:    xor a3, s0, a3
 ; RV32ZBA-NEXT:    or a1, a3, a1
 ; RV32ZBA-NEXT:    snez a1, a1
 ; RV32ZBA-NEXT:    mul a0, a0, a2
 ; RV32ZBA-NEXT:    sw a0, 0(a4)
-; RV32ZBA-NEXT:    sw a6, 4(a4)
+; RV32ZBA-NEXT:    sw a5, 4(a4)
 ; RV32ZBA-NEXT:    mv a0, a1
 ; RV32ZBA-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
 ; RV32ZBA-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
-; RV32ZBA-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
-; RV32ZBA-NEXT:    lw s3, 0(sp) # 4-byte Folded Reload
 ; RV32ZBA-NEXT:    addi sp, sp, 16
 ; RV32ZBA-NEXT:    ret
 ;
@@ -1063,27 +1051,27 @@ entry:
 define zeroext i1 @smulo2.i64(i64 %v1, i64* %res) {
 ; RV32-LABEL: smulo2.i64:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    li a7, 13
-; RV32-NEXT:    mulhu a4, a0, a7
-; RV32-NEXT:    mul a5, a1, a7
-; RV32-NEXT:    add t0, a5, a4
-; RV32-NEXT:    sltu a6, t0, a5
-; RV32-NEXT:    mulhu a5, a1, a7
-; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    li a3, 13
+; RV32-NEXT:    mulhu a4, a0, a3
+; RV32-NEXT:    mul a5, a1, a3
+; RV32-NEXT:    add a4, a5, a4
+; RV32-NEXT:    sltu a5, a4, a5
+; RV32-NEXT:    mulhu a6, a1, a3
+; RV32-NEXT:    add a5, a6, a5
 ; RV32-NEXT:    srai a1, a1, 31
-; RV32-NEXT:    mul a3, a1, a7
-; RV32-NEXT:    add a3, a5, a3
-; RV32-NEXT:    srai a4, t0, 31
-; RV32-NEXT:    xor a6, a3, a4
-; RV32-NEXT:    sltu a3, a3, a5
-; RV32-NEXT:    mulh a1, a1, a7
-; RV32-NEXT:    add a1, a1, a3
-; RV32-NEXT:    xor a1, a1, a4
-; RV32-NEXT:    or a1, a6, a1
+; RV32-NEXT:    mul a6, a1, a3
+; RV32-NEXT:    add a6, a5, a6
+; RV32-NEXT:    srai a7, a4, 31
+; RV32-NEXT:    xor t0, a6, a7
+; RV32-NEXT:    sltu a5, a6, a5
+; RV32-NEXT:    mulh a1, a1, a3
+; RV32-NEXT:    add a1, a1, a5
+; RV32-NEXT:    xor a1, a1, a7
+; RV32-NEXT:    or a1, t0, a1
 ; RV32-NEXT:    snez a1, a1
-; RV32-NEXT:    mul a0, a0, a7
+; RV32-NEXT:    mul a0, a0, a3
 ; RV32-NEXT:    sw a0, 0(a2)
-; RV32-NEXT:    sw t0, 4(a2)
+; RV32-NEXT:    sw a4, 4(a2)
 ; RV32-NEXT:    mv a0, a1
 ; RV32-NEXT:    ret
 ;
@@ -1100,27 +1088,27 @@ define zeroext i1 @smulo2.i64(i64 %v1, i64* %res) {
 ;
 ; RV32ZBA-LABEL: smulo2.i64:
 ; RV32ZBA:       # %bb.0: # %entry
-; RV32ZBA-NEXT:    li a7, 13
-; RV32ZBA-NEXT:    mulhu a4, a0, a7
-; RV32ZBA-NEXT:    mul a5, a1, a7
-; RV32ZBA-NEXT:    add t0, a5, a4
-; RV32ZBA-NEXT:    sltu a6, t0, a5
-; RV32ZBA-NEXT:    mulhu a5, a1, a7
-; RV32ZBA-NEXT:    add a5, a5, a6
+; RV32ZBA-NEXT:    li a3, 13
+; RV32ZBA-NEXT:    mulhu a4, a0, a3
+; RV32ZBA-NEXT:    mul a5, a1, a3
+; RV32ZBA-NEXT:    add a4, a5, a4
+; RV32ZBA-NEXT:    sltu a5, a4, a5
+; RV32ZBA-NEXT:    mulhu a6, a1, a3
+; RV32ZBA-NEXT:    add a5, a6, a5
 ; RV32ZBA-NEXT:    srai a1, a1, 31
-; RV32ZBA-NEXT:    mul a3, a1, a7
-; RV32ZBA-NEXT:    add a3, a5, a3
-; RV32ZBA-NEXT:    srai a4, t0, 31
-; RV32ZBA-NEXT:    xor a6, a3, a4
-; RV32ZBA-NEXT:    sltu a3, a3, a5
-; RV32ZBA-NEXT:    mulh a1, a1, a7
-; RV32ZBA-NEXT:    add a1, a1, a3
-; RV32ZBA-NEXT:    xor a1, a1, a4
-; RV32ZBA-NEXT:    or a1, a6, a1
+; RV32ZBA-NEXT:    mul a6, a1, a3
+; RV32ZBA-NEXT:    add a6, a5, a6
+; RV32ZBA-NEXT:    srai a7, a4, 31
+; RV32ZBA-NEXT:    xor t0, a6, a7
+; RV32ZBA-NEXT:    sltu a5, a6, a5
+; RV32ZBA-NEXT:    mulh a1, a1, a3
+; RV32ZBA-NEXT:    add a1, a1, a5
+; RV32ZBA-NEXT:    xor a1, a1, a7
+; RV32ZBA-NEXT:    or a1, t0, a1
 ; RV32ZBA-NEXT:    snez a1, a1
-; RV32ZBA-NEXT:    mul a0, a0, a7
+; RV32ZBA-NEXT:    mul a0, a0, a3
 ; RV32ZBA-NEXT:    sw a0, 0(a2)
-; RV32ZBA-NEXT:    sw t0, 4(a2)
+; RV32ZBA-NEXT:    sw a4, 4(a2)
 ; RV32ZBA-NEXT:    mv a0, a1
 ; RV32ZBA-NEXT:    ret
 ;
@@ -1289,25 +1277,25 @@ define signext i32 @umulo3.i32(i32 signext %0, i32 signext %1, i32* %2) {
 define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, i64* %res) {
 ; RV32-LABEL: umulo.i64:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    mul a6, a3, a0
-; RV32-NEXT:    mul a5, a1, a2
-; RV32-NEXT:    add a6, a5, a6
-; RV32-NEXT:    mulhu a5, a0, a2
-; RV32-NEXT:    add a6, a5, a6
-; RV32-NEXT:    sltu a7, a6, a5
-; RV32-NEXT:    snez t0, a3
-; RV32-NEXT:    snez a5, a1
-; RV32-NEXT:    and a5, a5, t0
+; RV32-NEXT:    mul a5, a3, a0
+; RV32-NEXT:    mul a6, a1, a2
+; RV32-NEXT:    add a5, a6, a5
+; RV32-NEXT:    mulhu a6, a0, a2
+; RV32-NEXT:    add a5, a6, a5
+; RV32-NEXT:    sltu a6, a5, a6
+; RV32-NEXT:    snez a7, a3
+; RV32-NEXT:    snez t0, a1
+; RV32-NEXT:    and a7, t0, a7
 ; RV32-NEXT:    mulhu a1, a1, a2
 ; RV32-NEXT:    snez a1, a1
-; RV32-NEXT:    or a1, a5, a1
+; RV32-NEXT:    or a1, a7, a1
 ; RV32-NEXT:    mulhu a3, a3, a0
 ; RV32-NEXT:    snez a3, a3
 ; RV32-NEXT:    or a1, a1, a3
-; RV32-NEXT:    or a1, a1, a7
+; RV32-NEXT:    or a1, a1, a6
 ; RV32-NEXT:    mul a0, a0, a2
 ; RV32-NEXT:    sw a0, 0(a4)
-; RV32-NEXT:    sw a6, 4(a4)
+; RV32-NEXT:    sw a5, 4(a4)
 ; RV32-NEXT:    mv a0, a1
 ; RV32-NEXT:    ret
 ;
@@ -1322,25 +1310,25 @@ define zeroext i1 @umulo.i64(i64 %v1, i64 %v2, i64* %res) {
 ;
 ; RV32ZBA-LABEL: umulo.i64:
 ; RV32ZBA:       # %bb.0: # %entry
-; RV32ZBA-NEXT:    mul a6, a3, a0
-; RV32ZBA-NEXT:    mul a5, a1, a2
-; RV32ZBA-NEXT:    add a6, a5, a6
-; RV32ZBA-NEXT:    mulhu a5, a0, a2
-; RV32ZBA-NEXT:    add a6, a5, a6
-; RV32ZBA-NEXT:    sltu a7, a6, a5
-; RV32ZBA-NEXT:    snez t0, a3
-; RV32ZBA-NEXT:    snez a5, a1
-; RV32ZBA-NEXT:    and a5, a5, t0
+; RV32ZBA-NEXT:    mul a5, a3, a0
+; RV32ZBA-NEXT:    mul a6, a1, a2
+; RV32ZBA-NEXT:    add a5, a6, a5
+; RV32ZBA-NEXT:    mulhu a6, a0, a2
+; RV32ZBA-NEXT:    add a5, a6, a5
+; RV32ZBA-NEXT:    sltu a6, a5, a6
+; RV32ZBA-NEXT:    snez a7, a3
+; RV32ZBA-NEXT:    snez t0, a1
+; RV32ZBA-NEXT:    and a7, t0, a7
 ; RV32ZBA-NEXT:    mulhu a1, a1, a2
 ; RV32ZBA-NEXT:    snez a1, a1
-; RV32ZBA-NEXT:    or a1, a5, a1
+; RV32ZBA-NEXT:    or a1, a7, a1
 ; RV32ZBA-NEXT:    mulhu a3, a3, a0
 ; RV32ZBA-NEXT:    snez a3, a3
 ; RV32ZBA-NEXT:    or a1, a1, a3
-; RV32ZBA-NEXT:    or a1, a1, a7
+; RV32ZBA-NEXT:    or a1, a1, a6
 ; RV32ZBA-NEXT:    mul a0, a0, a2
 ; RV32ZBA-NEXT:    sw a0, 0(a4)
-; RV32ZBA-NEXT:    sw a6, 4(a4)
+; RV32ZBA-NEXT:    sw a5, 4(a4)
 ; RV32ZBA-NEXT:    mv a0, a1
 ; RV32ZBA-NEXT:    ret
 ;
@@ -2340,62 +2328,56 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset s0, -4
-; RV32-NEXT:    .cfi_offset s1, -8
-; RV32-NEXT:    .cfi_offset s2, -12
 ; RV32-NEXT:    mulhu a4, a0, a2
 ; RV32-NEXT:    mul a5, a1, a2
 ; RV32-NEXT:    add a4, a5, a4
-; RV32-NEXT:    sltu a6, a4, a5
-; RV32-NEXT:    mulhu a5, a1, a2
+; RV32-NEXT:    sltu a5, a4, a5
+; RV32-NEXT:    mulhu a6, a1, a2
+; RV32-NEXT:    add a5, a6, a5
+; RV32-NEXT:    mul a6, a0, a3
+; RV32-NEXT:    add a4, a6, a4
+; RV32-NEXT:    sltu a6, a4, a6
+; RV32-NEXT:    mulhu a7, a0, a3
+; RV32-NEXT:    add a6, a7, a6
 ; RV32-NEXT:    add a6, a5, a6
-; RV32-NEXT:    mul a5, a0, a3
-; RV32-NEXT:    add a7, a5, a4
-; RV32-NEXT:    sltu a5, a7, a5
-; RV32-NEXT:    mulhu a4, a0, a3
-; RV32-NEXT:    add a4, a4, a5
-; RV32-NEXT:    add t0, a6, a4
-; RV32-NEXT:    mul t1, a1, a3
-; RV32-NEXT:    add a4, t1, t0
-; RV32-NEXT:    srai a5, a1, 31
-; RV32-NEXT:    mul t2, a2, a5
+; RV32-NEXT:    mul a7, a1, a3
+; RV32-NEXT:    add t0, a7, a6
+; RV32-NEXT:    srai t1, a1, 31
+; RV32-NEXT:    mul t2, a2, t1
 ; RV32-NEXT:    srai t3, a3, 31
 ; RV32-NEXT:    mul t4, t3, a0
 ; RV32-NEXT:    add t5, t4, t2
-; RV32-NEXT:    add t6, a4, t5
-; RV32-NEXT:    sltu s2, t6, a4
-; RV32-NEXT:    sltu a4, a4, t1
-; RV32-NEXT:    sltu s0, t0, a6
-; RV32-NEXT:    mulhu s1, a1, a3
-; RV32-NEXT:    add s1, s1, s0
-; RV32-NEXT:    add a4, s1, a4
-; RV32-NEXT:    mulhu s1, a2, a5
-; RV32-NEXT:    add s1, s1, t2
-; RV32-NEXT:    mul a5, a3, a5
-; RV32-NEXT:    add a5, s1, a5
-; RV32-NEXT:    mul s1, t3, a1
-; RV32-NEXT:    mulhu s0, t3, a0
-; RV32-NEXT:    add s1, s0, s1
-; RV32-NEXT:    add s1, s1, t4
-; RV32-NEXT:    add a5, s1, a5
-; RV32-NEXT:    sltu s1, t5, t4
-; RV32-NEXT:    add a5, a5, s1
-; RV32-NEXT:    add a4, a4, a5
-; RV32-NEXT:    add a4, a4, s2
-; RV32-NEXT:    srai a5, a7, 31
-; RV32-NEXT:    xor a4, a4, a5
-; RV32-NEXT:    xor a5, t6, a5
-; RV32-NEXT:    or a4, a5, a4
+; RV32-NEXT:    add t6, t0, t5
+; RV32-NEXT:    sltu s0, t6, t0
+; RV32-NEXT:    sltu a7, t0, a7
+; RV32-NEXT:    sltu a5, a6, a5
+; RV32-NEXT:    mulhu a6, a1, a3
+; RV32-NEXT:    add a5, a6, a5
+; RV32-NEXT:    add a5, a5, a7
+; RV32-NEXT:    mulhu a6, a2, t1
+; RV32-NEXT:    add a6, a6, t2
+; RV32-NEXT:    mul a7, a3, t1
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    mul a7, t3, a1
+; RV32-NEXT:    mulhu t0, t3, a0
+; RV32-NEXT:    add a7, t0, a7
+; RV32-NEXT:    add a7, a7, t4
+; RV32-NEXT:    add a6, a7, a6
+; RV32-NEXT:    sltu a7, t5, t4
+; RV32-NEXT:    add a6, a6, a7
+; RV32-NEXT:    add a5, a5, a6
+; RV32-NEXT:    add a5, a5, s0
+; RV32-NEXT:    srai a4, a4, 31
+; RV32-NEXT:    xor a5, a5, a4
+; RV32-NEXT:    xor a4, t6, a4
+; RV32-NEXT:    or a4, a4, a5
 ; RV32-NEXT:    bnez a4, .LBB44_2
 ; RV32-NEXT:  # %bb.1: # %entry
 ; RV32-NEXT:    mv a0, a2
 ; RV32-NEXT:    mv a1, a3
 ; RV32-NEXT:  .LBB44_2: # %entry
 ; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
@@ -2415,62 +2397,56 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
 ; RV32ZBA-NEXT:    addi sp, sp, -16
 ; RV32ZBA-NEXT:    .cfi_def_cfa_offset 16
 ; RV32ZBA-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
-; RV32ZBA-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
-; RV32ZBA-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
 ; RV32ZBA-NEXT:    .cfi_offset s0, -4
-; RV32ZBA-NEXT:    .cfi_offset s1, -8
-; RV32ZBA-NEXT:    .cfi_offset s2, -12
 ; RV32ZBA-NEXT:    mulhu a4, a0, a2
 ; RV32ZBA-NEXT:    mul a5, a1, a2
 ; RV32ZBA-NEXT:    add a4, a5, a4
-; RV32ZBA-NEXT:    sltu a6, a4, a5
-; RV32ZBA-NEXT:    mulhu a5, a1, a2
+; RV32ZBA-NEXT:    sltu a5, a4, a5
+; RV32ZBA-NEXT:    mulhu a6, a1, a2
+; RV32ZBA-NEXT:    add a5, a6, a5
+; RV32ZBA-NEXT:    mul a6, a0, a3
+; RV32ZBA-NEXT:    add a4, a6, a4
+; RV32ZBA-NEXT:    sltu a6, a4, a6
+; RV32ZBA-NEXT:    mulhu a7, a0, a3
+; RV32ZBA-NEXT:    add a6, a7, a6
 ; RV32ZBA-NEXT:    add a6, a5, a6
-; RV32ZBA-NEXT:    mul a5, a0, a3
-; RV32ZBA-NEXT:    add a7, a5, a4
-; RV32ZBA-NEXT:    sltu a5, a7, a5
-; RV32ZBA-NEXT:    mulhu a4, a0, a3
-; RV32ZBA-NEXT:    add a4, a4, a5
-; RV32ZBA-NEXT:    add t0, a6, a4
-; RV32ZBA-NEXT:    mul t1, a1, a3
-; RV32ZBA-NEXT:    add a4, t1, t0
-; RV32ZBA-NEXT:    srai a5, a1, 31
-; RV32ZBA-NEXT:    mul t2, a2, a5
+; RV32ZBA-NEXT:    mul a7, a1, a3
+; RV32ZBA-NEXT:    add t0, a7, a6
+; RV32ZBA-NEXT:    srai t1, a1, 31
+; RV32ZBA-NEXT:    mul t2, a2, t1
 ; RV32ZBA-NEXT:    srai t3, a3, 31
 ; RV32ZBA-NEXT:    mul t4, t3, a0
 ; RV32ZBA-NEXT:    add t5, t4, t2
-; RV32ZBA-NEXT:    add t6, a4, t5
-; RV32ZBA-NEXT:    sltu s2, t6, a4
-; RV32ZBA-NEXT:    sltu a4, a4, t1
-; RV32ZBA-NEXT:    sltu s0, t0, a6
-; RV32ZBA-NEXT:    mulhu s1, a1, a3
-; RV32ZBA-NEXT:    add s1, s1, s0
-; RV32ZBA-NEXT:    add a4, s1, a4
-; RV32ZBA-NEXT:    mulhu s1, a2, a5
-; RV32ZBA-NEXT:    add s1, s1, t2
-; RV32ZBA-NEXT:    mul a5, a3, a5
-; RV32ZBA-NEXT:    add a5, s1, a5
-; RV32ZBA-NEXT:    mul s1, t3, a1
-; RV32ZBA-NEXT:    mulhu s0, t3, a0
-; RV32ZBA-NEXT:    add s1, s0, s1
-; RV32ZBA-NEXT:    add s1, s1, t4
-; RV32ZBA-NEXT:    add a5, s1, a5
-; RV32ZBA-NEXT:    sltu s1, t5, t4
-; RV32ZBA-NEXT:    add a5, a5, s1
-; RV32ZBA-NEXT:    add a4, a4, a5
-; RV32ZBA-NEXT:    add a4, a4, s2
-; RV32ZBA-NEXT:    srai a5, a7, 31
-; RV32ZBA-NEXT:    xor a4, a4, a5
-; RV32ZBA-NEXT:    xor a5, t6, a5
-; RV32ZBA-NEXT:    or a4, a5, a4
+; RV32ZBA-NEXT:    add t6, t0, t5
+; RV32ZBA-NEXT:    sltu s0, t6, t0
+; RV32ZBA-NEXT:    sltu a7, t0, a7
+; RV32ZBA-NEXT:    sltu a5, a6, a5
+; RV32ZBA-NEXT:    mulhu a6, a1, a3
+; RV32ZBA-NEXT:    add a5, a6, a5
+; RV32ZBA-NEXT:    add a5, a5, a7
+; RV32ZBA-NEXT:    mulhu a6, a2, t1
+; RV32ZBA-NEXT:    add a6, a6, t2
+; RV32ZBA-NEXT:    mul a7, a3, t1
+; RV32ZBA-NEXT:    add a6, a6, a7
+; RV32ZBA-NEXT:    mul a7, t3, a1
+; RV32ZBA-NEXT:    mulhu t0, t3, a0
+; RV32ZBA-NEXT:    add a7, t0, a7
+; RV32ZBA-NEXT:    add a7, a7, t4
+; RV32ZBA-NEXT:    add a6, a7, a6
+; RV32ZBA-NEXT:    sltu a7, t5, t4
+; RV32ZBA-NEXT:    add a6, a6, a7
+; RV32ZBA-NEXT:    add a5, a5, a6
+; RV32ZBA-NEXT:    add a5, a5, s0
+; RV32ZBA-NEXT:    srai a4, a4, 31
+; RV32ZBA-NEXT:    xor a5, a5, a4
+; RV32ZBA-NEXT:    xor a4, t6, a4
+; RV32ZBA-NEXT:    or a4, a4, a5
 ; RV32ZBA-NEXT:    bnez a4, .LBB44_2
 ; RV32ZBA-NEXT:  # %bb.1: # %entry
 ; RV32ZBA-NEXT:    mv a0, a2
 ; RV32ZBA-NEXT:    mv a1, a3
 ; RV32ZBA-NEXT:  .LBB44_2: # %entry
 ; RV32ZBA-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
-; RV32ZBA-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
-; RV32ZBA-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32ZBA-NEXT:    addi sp, sp, 16
 ; RV32ZBA-NEXT:    ret
 ;
@@ -2497,40 +2473,36 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset s0, -4
-; RV32-NEXT:    .cfi_offset s1, -8
-; RV32-NEXT:    .cfi_offset s2, -12
 ; RV32-NEXT:    mulhu a4, a0, a2
 ; RV32-NEXT:    mul a5, a1, a2
 ; RV32-NEXT:    add a4, a5, a4
-; RV32-NEXT:    sltu a6, a4, a5
-; RV32-NEXT:    mulhu a5, a1, a2
+; RV32-NEXT:    sltu a5, a4, a5
+; RV32-NEXT:    mulhu a6, a1, a2
+; RV32-NEXT:    add a5, a6, a5
+; RV32-NEXT:    mul a6, a0, a3
+; RV32-NEXT:    add a4, a6, a4
+; RV32-NEXT:    sltu a6, a4, a6
+; RV32-NEXT:    mulhu a7, a0, a3
+; RV32-NEXT:    add a6, a7, a6
 ; RV32-NEXT:    add a6, a5, a6
-; RV32-NEXT:    mul a5, a0, a3
-; RV32-NEXT:    add a7, a5, a4
-; RV32-NEXT:    sltu a5, a7, a5
-; RV32-NEXT:    mulhu a4, a0, a3
-; RV32-NEXT:    add a4, a4, a5
-; RV32-NEXT:    add t0, a6, a4
-; RV32-NEXT:    mul t1, a1, a3
-; RV32-NEXT:    add a4, t1, t0
-; RV32-NEXT:    srai a5, a1, 31
-; RV32-NEXT:    mul t2, a2, a5
+; RV32-NEXT:    mul a7, a1, a3
+; RV32-NEXT:    add t0, a7, a6
+; RV32-NEXT:    srai t1, a1, 31
+; RV32-NEXT:    mul t2, a2, t1
 ; RV32-NEXT:    srai t3, a3, 31
 ; RV32-NEXT:    mul t4, t3, a0
 ; RV32-NEXT:    add t5, t4, t2
-; RV32-NEXT:    add t6, a4, t5
-; RV32-NEXT:    sltu s2, t6, a4
-; RV32-NEXT:    sltu a4, a4, t1
-; RV32-NEXT:    sltu s0, t0, a6
-; RV32-NEXT:    mulhu s1, a1, a3
-; RV32-NEXT:    add s1, s1, s0
-; RV32-NEXT:    add a4, s1, a4
-; RV32-NEXT:    mulhu a2, a2, a5
+; RV32-NEXT:    add t6, t0, t5
+; RV32-NEXT:    sltu s0, t6, t0
+; RV32-NEXT:    sltu a7, t0, a7
+; RV32-NEXT:    sltu a5, a6, a5
+; RV32-NEXT:    mulhu a6, a1, a3
+; RV32-NEXT:    add a5, a6, a5
+; RV32-NEXT:    add a5, a5, a7
+; RV32-NEXT:    mulhu a2, a2, t1
 ; RV32-NEXT:    add a2, a2, t2
-; RV32-NEXT:    mul a3, a3, a5
+; RV32-NEXT:    mul a3, a3, t1
 ; RV32-NEXT:    add a2, a2, a3
 ; RV32-NEXT:    mul a1, t3, a1
 ; RV32-NEXT:    mulhu a0, t3, a0
@@ -2539,16 +2511,14 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
 ; RV32-NEXT:    add a0, a0, a2
 ; RV32-NEXT:    sltu a1, t5, t4
 ; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, a4, a0
-; RV32-NEXT:    add a0, a0, s2
-; RV32-NEXT:    srai a1, a7, 31
+; RV32-NEXT:    add a0, a5, a0
+; RV32-NEXT:    add a0, a0, s0
+; RV32-NEXT:    srai a1, a4, 31
 ; RV32-NEXT:    xor a0, a0, a1
 ; RV32-NEXT:    xor a1, t6, a1
 ; RV32-NEXT:    or a0, a1, a0
 ; RV32-NEXT:    seqz a0, a0
 ; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
@@ -2566,40 +2536,36 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
 ; RV32ZBA-NEXT:    addi sp, sp, -16
 ; RV32ZBA-NEXT:    .cfi_def_cfa_offset 16
 ; RV32ZBA-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
-; RV32ZBA-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
-; RV32ZBA-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
 ; RV32ZBA-NEXT:    .cfi_offset s0, -4
-; RV32ZBA-NEXT:    .cfi_offset s1, -8
-; RV32ZBA-NEXT:    .cfi_offset s2, -12
 ; RV32ZBA-NEXT:    mulhu a4, a0, a2
 ; RV32ZBA-NEXT:    mul a5, a1, a2
 ; RV32ZBA-NEXT:    add a4, a5, a4
-; RV32ZBA-NEXT:    sltu a6, a4, a5
-; RV32ZBA-NEXT:    mulhu a5, a1, a2
+; RV32ZBA-NEXT:    sltu a5, a4, a5
+; RV32ZBA-NEXT:    mulhu a6, a1, a2
+; RV32ZBA-NEXT:    add a5, a6, a5
+; RV32ZBA-NEXT:    mul a6, a0, a3
+; RV32ZBA-NEXT:    add a4, a6, a4
+; RV32ZBA-NEXT:    sltu a6, a4, a6
+; RV32ZBA-NEXT:    mulhu a7, a0, a3
+; RV32ZBA-NEXT:    add a6, a7, a6
 ; RV32ZBA-NEXT:    add a6, a5, a6
-; RV32ZBA-NEXT:    mul a5, a0, a3
-; RV32ZBA-NEXT:    add a7, a5, a4
-; RV32ZBA-NEXT:    sltu a5, a7, a5
-; RV32ZBA-NEXT:    mulhu a4, a0, a3
-; RV32ZBA-NEXT:    add a4, a4, a5
-; RV32ZBA-NEXT:    add t0, a6, a4
-; RV32ZBA-NEXT:    mul t1, a1, a3
-; RV32ZBA-NEXT:    add a4, t1, t0
-; RV32ZBA-NEXT:    srai a5, a1, 31
-; RV32ZBA-NEXT:    mul t2, a2, a5
+; RV32ZBA-NEXT:    mul a7, a1, a3
+; RV32ZBA-NEXT:    add t0, a7, a6
+; RV32ZBA-NEXT:    srai t1, a1, 31
+; RV32ZBA-NEXT:    mul t2, a2, t1
 ; RV32ZBA-NEXT:    srai t3, a3, 31
 ; RV32ZBA-NEXT:    mul t4, t3, a0
 ; RV32ZBA-NEXT:    add t5, t4, t2
-; RV32ZBA-NEXT:    add t6, a4, t5
-; RV32ZBA-NEXT:    sltu s2, t6, a4
-; RV32ZBA-NEXT:    sltu a4, a4, t1
-; RV32ZBA-NEXT:    sltu s0, t0, a6
-; RV32ZBA-NEXT:    mulhu s1, a1, a3
-; RV32ZBA-NEXT:    add s1, s1, s0
-; RV32ZBA-NEXT:    add a4, s1, a4
-; RV32ZBA-NEXT:    mulhu a2, a2, a5
+; RV32ZBA-NEXT:    add t6, t0, t5
+; RV32ZBA-NEXT:    sltu s0, t6, t0
+; RV32ZBA-NEXT:    sltu a7, t0, a7
+; RV32ZBA-NEXT:    sltu a5, a6, a5
+; RV32ZBA-NEXT:    mulhu a6, a1, a3
+; RV32ZBA-NEXT:    add a5, a6, a5
+; RV32ZBA-NEXT:    add a5, a5, a7
+; RV32ZBA-NEXT:    mulhu a2, a2, t1
 ; RV32ZBA-NEXT:    add a2, a2, t2
-; RV32ZBA-NEXT:    mul a3, a3, a5
+; RV32ZBA-NEXT:    mul a3, a3, t1
 ; RV32ZBA-NEXT:    add a2, a2, a3
 ; RV32ZBA-NEXT:    mul a1, t3, a1
 ; RV32ZBA-NEXT:    mulhu a0, t3, a0
@@ -2608,16 +2574,14 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
 ; RV32ZBA-NEXT:    add a0, a0, a2
 ; RV32ZBA-NEXT:    sltu a1, t5, t4
 ; RV32ZBA-NEXT:    add a0, a0, a1
-; RV32ZBA-NEXT:    add a0, a4, a0
-; RV32ZBA-NEXT:    add a0, a0, s2
-; RV32ZBA-NEXT:    srai a1, a7, 31
+; RV32ZBA-NEXT:    add a0, a5, a0
+; RV32ZBA-NEXT:    add a0, a0, s0
+; RV32ZBA-NEXT:    srai a1, a4, 31
 ; RV32ZBA-NEXT:    xor a0, a0, a1
 ; RV32ZBA-NEXT:    xor a1, t6, a1
 ; RV32ZBA-NEXT:    or a0, a1, a0
 ; RV32ZBA-NEXT:    seqz a0, a0
 ; RV32ZBA-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
-; RV32ZBA-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
-; RV32ZBA-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32ZBA-NEXT:    addi sp, sp, 16
 ; RV32ZBA-NEXT:    ret
 ;
@@ -2730,17 +2694,17 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
 ; RV32-NEXT:    add a4, a5, a4
 ; RV32-NEXT:    mulhu a5, a0, a2
 ; RV32-NEXT:    add a4, a5, a4
-; RV32-NEXT:    sltu a6, a4, a5
+; RV32-NEXT:    sltu a4, a4, a5
 ; RV32-NEXT:    snez a5, a3
-; RV32-NEXT:    snez a4, a1
-; RV32-NEXT:    and a4, a4, a5
-; RV32-NEXT:    mulhu a5, a1, a2
-; RV32-NEXT:    snez a5, a5
-; RV32-NEXT:    or a4, a4, a5
-; RV32-NEXT:    mulhu a5, a3, a0
-; RV32-NEXT:    snez a5, a5
-; RV32-NEXT:    or a4, a4, a5
-; RV32-NEXT:    or a4, a4, a6
+; RV32-NEXT:    snez a6, a1
+; RV32-NEXT:    and a5, a6, a5
+; RV32-NEXT:    mulhu a6, a1, a2
+; RV32-NEXT:    snez a6, a6
+; RV32-NEXT:    or a5, a5, a6
+; RV32-NEXT:    mulhu a6, a3, a0
+; RV32-NEXT:    snez a6, a6
+; RV32-NEXT:    or a5, a5, a6
+; RV32-NEXT:    or a4, a5, a4
 ; RV32-NEXT:    bnez a4, .LBB48_2
 ; RV32-NEXT:  # %bb.1: # %entry
 ; RV32-NEXT:    mv a0, a2
@@ -2764,17 +2728,17 @@ define i64 @umulo.select.i64(i64 %v1, i64 %v2) {
 ; RV32ZBA-NEXT:    add a4, a5, a4
 ; RV32ZBA-NEXT:    mulhu a5, a0, a2
 ; RV32ZBA-NEXT:    add a4, a5, a4
-; RV32ZBA-NEXT:    sltu a6, a4, a5
+; RV32ZBA-NEXT:    sltu a4, a4, a5
 ; RV32ZBA-NEXT:    snez a5, a3
-; RV32ZBA-NEXT:    snez a4, a1
-; RV32ZBA-NEXT:    and a4, a4, a5
-; RV32ZBA-NEXT:    mulhu a5, a1, a2
-; RV32ZBA-NEXT:    snez a5, a5
-; RV32ZBA-NEXT:    or a4, a4, a5
-; RV32ZBA-NEXT:    mulhu a5, a3, a0
-; RV32ZBA-NEXT:    snez a5, a5
-; RV32ZBA-NEXT:    or a4, a4, a5
-; RV32ZBA-NEXT:    or a4, a4, a6
+; RV32ZBA-NEXT:    snez a6, a1
+; RV32ZBA-NEXT:    and a5, a6, a5
+; RV32ZBA-NEXT:    mulhu a6, a1, a2
+; RV32ZBA-NEXT:    snez a6, a6
+; RV32ZBA-NEXT:    or a5, a5, a6
+; RV32ZBA-NEXT:    mulhu a6, a3, a0
+; RV32ZBA-NEXT:    snez a6, a6
+; RV32ZBA-NEXT:    or a5, a5, a6
+; RV32ZBA-NEXT:    or a4, a5, a4
 ; RV32ZBA-NEXT:    bnez a4, .LBB48_2
 ; RV32ZBA-NEXT:  # %bb.1: # %entry
 ; RV32ZBA-NEXT:    mv a0, a2
@@ -2805,17 +2769,17 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
 ; RV32-NEXT:    add a4, a5, a4
 ; RV32-NEXT:    mulhu a5, a0, a2
 ; RV32-NEXT:    add a4, a5, a4
-; RV32-NEXT:    sltu a6, a4, a5
+; RV32-NEXT:    sltu a4, a4, a5
 ; RV32-NEXT:    snez a5, a3
-; RV32-NEXT:    snez a4, a1
-; RV32-NEXT:    and a4, a4, a5
+; RV32-NEXT:    snez a6, a1
+; RV32-NEXT:    and a5, a6, a5
 ; RV32-NEXT:    mulhu a1, a1, a2
 ; RV32-NEXT:    snez a1, a1
-; RV32-NEXT:    or a1, a4, a1
+; RV32-NEXT:    or a1, a5, a1
 ; RV32-NEXT:    mulhu a0, a3, a0
 ; RV32-NEXT:    snez a0, a0
 ; RV32-NEXT:    or a0, a1, a0
-; RV32-NEXT:    or a0, a0, a6
+; RV32-NEXT:    or a0, a0, a4
 ; RV32-NEXT:    xori a0, a0, 1
 ; RV32-NEXT:    ret
 ;
@@ -2832,17 +2796,17 @@ define i1 @umulo.not.i64(i64 %v1, i64 %v2) {
 ; RV32ZBA-NEXT:    add a4, a5, a4
 ; RV32ZBA-NEXT:    mulhu a5, a0, a2
 ; RV32ZBA-NEXT:    add a4, a5, a4
-; RV32ZBA-NEXT:    sltu a6, a4, a5
+; RV32ZBA-NEXT:    sltu a4, a4, a5
 ; RV32ZBA-NEXT:    snez a5, a3
-; RV32ZBA-NEXT:    snez a4, a1
-; RV32ZBA-NEXT:    and a4, a4, a5
+; RV32ZBA-NEXT:    snez a6, a1
+; RV32ZBA-NEXT:    and a5, a6, a5
 ; RV32ZBA-NEXT:    mulhu a1, a1, a2
 ; RV32ZBA-NEXT:    snez a1, a1
-; RV32ZBA-NEXT:    or a1, a4, a1
+; RV32ZBA-NEXT:    or a1, a5, a1
 ; RV32ZBA-NEXT:    mulhu a0, a3, a0
 ; RV32ZBA-NEXT:    snez a0, a0
 ; RV32ZBA-NEXT:    or a0, a1, a0
-; RV32ZBA-NEXT:    or a0, a0, a6
+; RV32ZBA-NEXT:    or a0, a0, a4
 ; RV32ZBA-NEXT:    xori a0, a0, 1
 ; RV32ZBA-NEXT:    ret
 ;
@@ -3479,40 +3443,36 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset s0, -4
-; RV32-NEXT:    .cfi_offset s1, -8
-; RV32-NEXT:    .cfi_offset s2, -12
 ; RV32-NEXT:    mulhu a4, a0, a2
 ; RV32-NEXT:    mul a5, a1, a2
 ; RV32-NEXT:    add a4, a5, a4
-; RV32-NEXT:    sltu a6, a4, a5
-; RV32-NEXT:    mulhu a5, a1, a2
+; RV32-NEXT:    sltu a5, a4, a5
+; RV32-NEXT:    mulhu a6, a1, a2
+; RV32-NEXT:    add a5, a6, a5
+; RV32-NEXT:    mul a6, a0, a3
+; RV32-NEXT:    add a4, a6, a4
+; RV32-NEXT:    sltu a6, a4, a6
+; RV32-NEXT:    mulhu a7, a0, a3
+; RV32-NEXT:    add a6, a7, a6
 ; RV32-NEXT:    add a6, a5, a6
-; RV32-NEXT:    mul a5, a0, a3
-; RV32-NEXT:    add a7, a5, a4
-; RV32-NEXT:    sltu a5, a7, a5
-; RV32-NEXT:    mulhu a4, a0, a3
-; RV32-NEXT:    add a4, a4, a5
-; RV32-NEXT:    add t0, a6, a4
-; RV32-NEXT:    mul t1, a1, a3
-; RV32-NEXT:    add a4, t1, t0
-; RV32-NEXT:    srai a5, a1, 31
-; RV32-NEXT:    mul t2, a2, a5
+; RV32-NEXT:    mul a7, a1, a3
+; RV32-NEXT:    add t0, a7, a6
+; RV32-NEXT:    srai t1, a1, 31
+; RV32-NEXT:    mul t2, a2, t1
 ; RV32-NEXT:    srai t3, a3, 31
 ; RV32-NEXT:    mul t4, t3, a0
 ; RV32-NEXT:    add t5, t4, t2
-; RV32-NEXT:    add t6, a4, t5
-; RV32-NEXT:    sltu s2, t6, a4
-; RV32-NEXT:    sltu a4, a4, t1
-; RV32-NEXT:    sltu s0, t0, a6
-; RV32-NEXT:    mulhu s1, a1, a3
-; RV32-NEXT:    add s1, s1, s0
-; RV32-NEXT:    add a4, s1, a4
-; RV32-NEXT:    mulhu a2, a2, a5
+; RV32-NEXT:    add t6, t0, t5
+; RV32-NEXT:    sltu s0, t6, t0
+; RV32-NEXT:    sltu a7, t0, a7
+; RV32-NEXT:    sltu a5, a6, a5
+; RV32-NEXT:    mulhu a6, a1, a3
+; RV32-NEXT:    add a5, a6, a5
+; RV32-NEXT:    add a5, a5, a7
+; RV32-NEXT:    mulhu a2, a2, t1
 ; RV32-NEXT:    add a2, a2, t2
-; RV32-NEXT:    mul a3, a3, a5
+; RV32-NEXT:    mul a3, a3, t1
 ; RV32-NEXT:    add a2, a2, a3
 ; RV32-NEXT:    mul a1, t3, a1
 ; RV32-NEXT:    mulhu a0, t3, a0
@@ -3521,9 +3481,9 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
 ; RV32-NEXT:    add a0, a0, a2
 ; RV32-NEXT:    sltu a1, t5, t4
 ; RV32-NEXT:    add a0, a0, a1
-; RV32-NEXT:    add a0, a4, a0
-; RV32-NEXT:    add a0, a0, s2
-; RV32-NEXT:    srai a1, a7, 31
+; RV32-NEXT:    add a0, a5, a0
+; RV32-NEXT:    add a0, a0, s0
+; RV32-NEXT:    srai a1, a4, 31
 ; RV32-NEXT:    xor a0, a0, a1
 ; RV32-NEXT:    xor a1, t6, a1
 ; RV32-NEXT:    or a0, a1, a0
@@ -3535,8 +3495,6 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
 ; RV32-NEXT:    li a0, 1
 ; RV32-NEXT:  .LBB59_3: # %overflow
 ; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
@@ -3558,40 +3516,36 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
 ; RV32ZBA-NEXT:    addi sp, sp, -16
 ; RV32ZBA-NEXT:    .cfi_def_cfa_offset 16
 ; RV32ZBA-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
-; RV32ZBA-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
-; RV32ZBA-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
 ; RV32ZBA-NEXT:    .cfi_offset s0, -4
-; RV32ZBA-NEXT:    .cfi_offset s1, -8
-; RV32ZBA-NEXT:    .cfi_offset s2, -12
 ; RV32ZBA-NEXT:    mulhu a4, a0, a2
 ; RV32ZBA-NEXT:    mul a5, a1, a2
 ; RV32ZBA-NEXT:    add a4, a5, a4
-; RV32ZBA-NEXT:    sltu a6, a4, a5
-; RV32ZBA-NEXT:    mulhu a5, a1, a2
+; RV32ZBA-NEXT:    sltu a5, a4, a5
+; RV32ZBA-NEXT:    mulhu a6, a1, a2
+; RV32ZBA-NEXT:    add a5, a6, a5
+; RV32ZBA-NEXT:    mul a6, a0, a3
+; RV32ZBA-NEXT:    add a4, a6, a4
+; RV32ZBA-NEXT:    sltu a6, a4, a6
+; RV32ZBA-NEXT:    mulhu a7, a0, a3
+; RV32ZBA-NEXT:    add a6, a7, a6
 ; RV32ZBA-NEXT:    add a6, a5, a6
-; RV32ZBA-NEXT:    mul a5, a0, a3
-; RV32ZBA-NEXT:    add a7, a5, a4
-; RV32ZBA-NEXT:    sltu a5, a7, a5
-; RV32ZBA-NEXT:    mulhu a4, a0, a3
-; RV32ZBA-NEXT:    add a4, a4, a5
-; RV32ZBA-NEXT:    add t0, a6, a4
-; RV32ZBA-NEXT:    mul t1, a1, a3
-; RV32ZBA-NEXT:    add a4, t1, t0
-; RV32ZBA-NEXT:    srai a5, a1, 31
-; RV32ZBA-NEXT:    mul t2, a2, a5
+; RV32ZBA-NEXT:    mul a7, a1, a3
+; RV32ZBA-NEXT:    add t0, a7, a6
+; RV32ZBA-NEXT:    srai t1, a1, 31
+; RV32ZBA-NEXT:    mul t2, a2, t1
 ; RV32ZBA-NEXT:    srai t3, a3, 31
 ; RV32ZBA-NEXT:    mul t4, t3, a0
 ; RV32ZBA-NEXT:    add t5, t4, t2
-; RV32ZBA-NEXT:    add t6, a4, t5
-; RV32ZBA-NEXT:    sltu s2, t6, a4
-; RV32ZBA-NEXT:    sltu a4, a4, t1
-; RV32ZBA-NEXT:    sltu s0, t0, a6
-; RV32ZBA-NEXT:    mulhu s1, a1, a3
-; RV32ZBA-NEXT:    add s1, s1, s0
-; RV32ZBA-NEXT:    add a4, s1, a4
-; RV32ZBA-NEXT:    mulhu a2, a2, a5
+; RV32ZBA-NEXT:    add t6, t0, t5
+; RV32ZBA-NEXT:    sltu s0, t6, t0
+; RV32ZBA-NEXT:    sltu a7, t0, a7
+; RV32ZBA-NEXT:    sltu a5, a6, a5
+; RV32ZBA-NEXT:    mulhu a6, a1, a3
+; RV32ZBA-NEXT:    add a5, a6, a5
+; RV32ZBA-NEXT:    add a5, a5, a7
+; RV32ZBA-NEXT:    mulhu a2, a2, t1
 ; RV32ZBA-NEXT:    add a2, a2, t2
-; RV32ZBA-NEXT:    mul a3, a3, a5
+; RV32ZBA-NEXT:    mul a3, a3, t1
 ; RV32ZBA-NEXT:    add a2, a2, a3
 ; RV32ZBA-NEXT:    mul a1, t3, a1
 ; RV32ZBA-NEXT:    mulhu a0, t3, a0
@@ -3600,9 +3554,9 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
 ; RV32ZBA-NEXT:    add a0, a0, a2
 ; RV32ZBA-NEXT:    sltu a1, t5, t4
 ; RV32ZBA-NEXT:    add a0, a0, a1
-; RV32ZBA-NEXT:    add a0, a4, a0
-; RV32ZBA-NEXT:    add a0, a0, s2
-; RV32ZBA-NEXT:    srai a1, a7, 31
+; RV32ZBA-NEXT:    add a0, a5, a0
+; RV32ZBA-NEXT:    add a0, a0, s0
+; RV32ZBA-NEXT:    srai a1, a4, 31
 ; RV32ZBA-NEXT:    xor a0, a0, a1
 ; RV32ZBA-NEXT:    xor a1, t6, a1
 ; RV32ZBA-NEXT:    or a0, a1, a0
@@ -3614,8 +3568,6 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
 ; RV32ZBA-NEXT:    li a0, 1
 ; RV32ZBA-NEXT:  .LBB59_3: # %overflow
 ; RV32ZBA-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
-; RV32ZBA-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
-; RV32ZBA-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32ZBA-NEXT:    addi sp, sp, 16
 ; RV32ZBA-NEXT:    ret
 ;
@@ -3647,41 +3599,41 @@ continue:
 define zeroext i1 @smulo2.br.i64(i64 %v1) {
 ; RV32-LABEL: smulo2.br.i64:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    li a6, -13
-; RV32-NEXT:    mulhu a3, a0, a6
-; RV32-NEXT:    mul a4, a1, a6
+; RV32-NEXT:    li a2, -13
+; RV32-NEXT:    mulhu a3, a0, a2
+; RV32-NEXT:    mul a4, a1, a2
 ; RV32-NEXT:    add a3, a4, a3
 ; RV32-NEXT:    sltu a4, a3, a4
-; RV32-NEXT:    mulhu a5, a1, a6
-; RV32-NEXT:    add t3, a5, a4
-; RV32-NEXT:    sub t0, a3, a0
-; RV32-NEXT:    neg t1, a0
-; RV32-NEXT:    sltu a2, t0, t1
+; RV32-NEXT:    mulhu a5, a1, a2
+; RV32-NEXT:    add a4, a5, a4
+; RV32-NEXT:    sub a3, a3, a0
+; RV32-NEXT:    neg a5, a0
+; RV32-NEXT:    sltu a6, a3, a5
 ; RV32-NEXT:    li a7, -1
-; RV32-NEXT:    mulhu t2, a0, a7
-; RV32-NEXT:    add a2, t2, a2
-; RV32-NEXT:    add a2, t3, a2
-; RV32-NEXT:    sub a5, a2, a1
-; RV32-NEXT:    srai t6, a1, 31
-; RV32-NEXT:    mul a4, t6, a6
-; RV32-NEXT:    sub a4, a4, a0
-; RV32-NEXT:    add t4, a5, a4
-; RV32-NEXT:    sltu t5, t4, a5
-; RV32-NEXT:    neg a3, a1
-; RV32-NEXT:    sltu a3, a5, a3
-; RV32-NEXT:    sltu a2, a2, t3
-; RV32-NEXT:    mulhu a5, a1, a7
-; RV32-NEXT:    add a2, a5, a2
-; RV32-NEXT:    add a2, a2, a3
-; RV32-NEXT:    sltu a3, a4, t1
-; RV32-NEXT:    mulh a4, t6, a6
-; RV32-NEXT:    sub a0, t2, a0
+; RV32-NEXT:    mulhu t0, a0, a7
+; RV32-NEXT:    add a6, t0, a6
+; RV32-NEXT:    add a6, a4, a6
+; RV32-NEXT:    sub t1, a6, a1
+; RV32-NEXT:    srai t2, a1, 31
+; RV32-NEXT:    mul t3, t2, a2
+; RV32-NEXT:    sub t3, t3, a0
+; RV32-NEXT:    add t4, t1, t3
+; RV32-NEXT:    sltu t5, t4, t1
+; RV32-NEXT:    neg t6, a1
+; RV32-NEXT:    sltu t1, t1, t6
+; RV32-NEXT:    sltu a4, a6, a4
+; RV32-NEXT:    mulhu a6, a1, a7
+; RV32-NEXT:    add a4, a6, a4
+; RV32-NEXT:    add a4, a4, t1
+; RV32-NEXT:    sltu a5, t3, a5
+; RV32-NEXT:    mulh a2, t2, a2
+; RV32-NEXT:    sub a0, t0, a0
 ; RV32-NEXT:    sub a0, a0, a1
-; RV32-NEXT:    add a0, a0, a4
-; RV32-NEXT:    add a0, a0, a3
-; RV32-NEXT:    add a0, a2, a0
+; RV32-NEXT:    add a0, a0, a2
+; RV32-NEXT:    add a0, a0, a5
+; RV32-NEXT:    add a0, a4, a0
 ; RV32-NEXT:    add a0, a0, t5
-; RV32-NEXT:    srai a1, t0, 31
+; RV32-NEXT:    srai a1, a3, 31
 ; RV32-NEXT:    xor a0, a0, a1
 ; RV32-NEXT:    xor a1, t4, a1
 ; RV32-NEXT:    or a0, a1, a0
@@ -3709,41 +3661,41 @@ define zeroext i1 @smulo2.br.i64(i64 %v1) {
 ;
 ; RV32ZBA-LABEL: smulo2.br.i64:
 ; RV32ZBA:       # %bb.0: # %entry
-; RV32ZBA-NEXT:    li a6, -13
-; RV32ZBA-NEXT:    mulhu a3, a0, a6
-; RV32ZBA-NEXT:    mul a4, a1, a6
+; RV32ZBA-NEXT:    li a2, -13
+; RV32ZBA-NEXT:    mulhu a3, a0, a2
+; RV32ZBA-NEXT:    mul a4, a1, a2
 ; RV32ZBA-NEXT:    add a3, a4, a3
 ; RV32ZBA-NEXT:    sltu a4, a3, a4
-; RV32ZBA-NEXT:    mulhu a5, a1, a6
-; RV32ZBA-NEXT:    add t3, a5, a4
-; RV32ZBA-NEXT:    sub t0, a3, a0
-; RV32ZBA-NEXT:    neg t1, a0
-; RV32ZBA-NEXT:    sltu a2, t0, t1
+; RV32ZBA-NEXT:    mulhu a5, a1, a2
+; RV32ZBA-NEXT:    add a4, a5, a4
+; RV32ZBA-NEXT:    sub a3, a3, a0
+; RV32ZBA-NEXT:    neg a5, a0
+; RV32ZBA-NEXT:    sltu a6, a3, a5
 ; RV32ZBA-NEXT:    li a7, -1
-; RV32ZBA-NEXT:    mulhu t2, a0, a7
-; RV32ZBA-NEXT:    add a2, t2, a2
-; RV32ZBA-NEXT:    add a2, t3, a2
-; RV32ZBA-NEXT:    sub a5, a2, a1
-; RV32ZBA-NEXT:    srai t6, a1, 31
-; RV32ZBA-NEXT:    mul a4, t6, a6
-; RV32ZBA-NEXT:    sub a4, a4, a0
-; RV32ZBA-NEXT:    add t4, a5, a4
-; RV32ZBA-NEXT:    sltu t5, t4, a5
-; RV32ZBA-NEXT:    neg a3, a1
-; RV32ZBA-NEXT:    sltu a3, a5, a3
-; RV32ZBA-NEXT:    sltu a2, a2, t3
-; RV32ZBA-NEXT:    mulhu a5, a1, a7
-; RV32ZBA-NEXT:    add a2, a5, a2
-; RV32ZBA-NEXT:    add a2, a2, a3
-; RV32ZBA-NEXT:    sltu a3, a4, t1
-; RV32ZBA-NEXT:    mulh a4, t6, a6
-; RV32ZBA-NEXT:    sub a0, t2, a0
+; RV32ZBA-NEXT:    mulhu t0, a0, a7
+; RV32ZBA-NEXT:    add a6, t0, a6
+; RV32ZBA-NEXT:    add a6, a4, a6
+; RV32ZBA-NEXT:    sub t1, a6, a1
+; RV32ZBA-NEXT:    srai t2, a1, 31
+; RV32ZBA-NEXT:    mul t3, t2, a2
+; RV32ZBA-NEXT:    sub t3, t3, a0
+; RV32ZBA-NEXT:    add t4, t1, t3
+; RV32ZBA-NEXT:    sltu t5, t4, t1
+; RV32ZBA-NEXT:    neg t6, a1
+; RV32ZBA-NEXT:    sltu t1, t1, t6
+; RV32ZBA-NEXT:    sltu a4, a6, a4
+; RV32ZBA-NEXT:    mulhu a6, a1, a7
+; RV32ZBA-NEXT:    add a4, a6, a4
+; RV32ZBA-NEXT:    add a4, a4, t1
+; RV32ZBA-NEXT:    sltu a5, t3, a5
+; RV32ZBA-NEXT:    mulh a2, t2, a2
+; RV32ZBA-NEXT:    sub a0, t0, a0
 ; RV32ZBA-NEXT:    sub a0, a0, a1
-; RV32ZBA-NEXT:    add a0, a0, a4
-; RV32ZBA-NEXT:    add a0, a0, a3
-; RV32ZBA-NEXT:    add a0, a2, a0
+; RV32ZBA-NEXT:    add a0, a0, a2
+; RV32ZBA-NEXT:    add a0, a0, a5
+; RV32ZBA-NEXT:    add a0, a4, a0
 ; RV32ZBA-NEXT:    add a0, a0, t5
-; RV32ZBA-NEXT:    srai a1, t0, 31
+; RV32ZBA-NEXT:    srai a1, a3, 31
 ; RV32ZBA-NEXT:    xor a0, a0, a1
 ; RV32ZBA-NEXT:    xor a1, t4, a1
 ; RV32ZBA-NEXT:    or a0, a1, a0
@@ -3852,17 +3804,17 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
 ; RV32-NEXT:    add a4, a5, a4
 ; RV32-NEXT:    mulhu a5, a0, a2
 ; RV32-NEXT:    add a4, a5, a4
-; RV32-NEXT:    sltu a6, a4, a5
+; RV32-NEXT:    sltu a4, a4, a5
 ; RV32-NEXT:    snez a5, a3
-; RV32-NEXT:    snez a4, a1
-; RV32-NEXT:    and a4, a4, a5
+; RV32-NEXT:    snez a6, a1
+; RV32-NEXT:    and a5, a6, a5
 ; RV32-NEXT:    mulhu a1, a1, a2
 ; RV32-NEXT:    snez a1, a1
-; RV32-NEXT:    or a1, a4, a1
+; RV32-NEXT:    or a1, a5, a1
 ; RV32-NEXT:    mulhu a0, a3, a0
 ; RV32-NEXT:    snez a0, a0
 ; RV32-NEXT:    or a0, a1, a0
-; RV32-NEXT:    or a0, a0, a6
+; RV32-NEXT:    or a0, a0, a4
 ; RV32-NEXT:    beqz a0, .LBB62_2
 ; RV32-NEXT:  # %bb.1: # %overflow
 ; RV32-NEXT:    li a0, 0
@@ -3889,17 +3841,17 @@ define zeroext i1 @umulo.br.i64(i64 %v1, i64 %v2) {
 ; RV32ZBA-NEXT:    add a4, a5, a4
 ; RV32ZBA-NEXT:    mulhu a5, a0, a2
 ; RV32ZBA-NEXT:    add a4, a5, a4
-; RV32ZBA-NEXT:    sltu a6, a4, a5
+; RV32ZBA-NEXT:    sltu a4, a4, a5
 ; RV32ZBA-NEXT:    snez a5, a3
-; RV32ZBA-NEXT:    snez a4, a1
-; RV32ZBA-NEXT:    and a4, a4, a5
+; RV32ZBA-NEXT:    snez a6, a1
+; RV32ZBA-NEXT:    and a5, a6, a5
 ; RV32ZBA-NEXT:    mulhu a1, a1, a2
 ; RV32ZBA-NEXT:    snez a1, a1
-; RV32ZBA-NEXT:    or a1, a4, a1
+; RV32ZBA-NEXT:    or a1, a5, a1
 ; RV32ZBA-NEXT:    mulhu a0, a3, a0
 ; RV32ZBA-NEXT:    snez a0, a0
 ; RV32ZBA-NEXT:    or a0, a1, a0
-; RV32ZBA-NEXT:    or a0, a0, a6
+; RV32ZBA-NEXT:    or a0, a0, a4
 ; RV32ZBA-NEXT:    beqz a0, .LBB62_2
 ; RV32ZBA-NEXT:  # %bb.1: # %overflow
 ; RV32ZBA-NEXT:    li a0, 0

From 75184f14aecd8147a02189a843789a4eb5e5b571 Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin@accesssoftek.com>
Date: Fri, 21 Jan 2022 13:04:32 +0700
Subject: [PATCH 133/946] [DebugInfo] Fix handling '# line "file"' for DWARFv5

`CppHashInfo.Filename` is a `StringRef` that references a part of the
source file and it is not null-terminated at the end of the file name.
`AsmParser::parseAndMatchAndEmitTargetInstruction()` passes it to
`getStreamer().emitDwarfFileDirective()`, and it eventually comes to
`isRootFile()`. The comparison fails because `FileName.data()` is not
properly terminated.

In addition, the old code might cause a significant speed degradation
for long source files. The `operator!=()` for `std::string` and
`const char *` can be implemented in a way that it finds the length of
the second argument first, which slows the comparison for long data.
`parseAndMatchAndEmitTargetInstruction()` calls
`emitDwarfFileDirective()` every time if `CppHashInfo.Filename` is not
empty. As a result, the longer the source file is, the slower the
compilation wend, and for a very long file, it might take hours instead
of a couple of seconds normally.

Differential Revision: https://reviews.llvm.org/D117785
---
 llvm/lib/MC/MCDwarf.cpp            |  2 +-
 llvm/test/MC/ELF/debug-hash-file.s | 20 +++++++++++++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index 1c9cfb9042e28..15cfdba5d7fde 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -561,7 +561,7 @@ Expected<unsigned> MCDwarfLineTable::tryGetFile(StringRef &Directory,
 
 static bool isRootFile(const MCDwarfFile &RootFile, StringRef &Directory,
                        StringRef &FileName, Optional<MD5::MD5Result> Checksum) {
-  if (RootFile.Name.empty() || RootFile.Name != FileName.data())
+  if (RootFile.Name.empty() || StringRef(RootFile.Name) != FileName)
     return false;
   return RootFile.Checksum == Checksum;
 }
diff --git a/llvm/test/MC/ELF/debug-hash-file.s b/llvm/test/MC/ELF/debug-hash-file.s
index a72e5d6aa8d42..9d4cf6ae68bdc 100644
--- a/llvm/test/MC/ELF/debug-hash-file.s
+++ b/llvm/test/MC/ELF/debug-hash-file.s
@@ -1,5 +1,8 @@
 // RUN: llvm-mc -triple x86_64-unknown-linux-gnu -filetype obj -g -dwarf-version 4 -o %t %s
-// RUN: llvm-dwarfdump -debug-info -debug-line %t | FileCheck %s
+// RUN: llvm-dwarfdump -debug-info -debug-line %t | FileCheck %s --check-prefixes=CHECK,DWARF4
+
+// RUN: llvm-mc -triple x86_64-unknown-linux-gnu -filetype obj -g -dwarf-version 5 -o %t %s
+// RUN: llvm-dwarfdump -debug-info -debug-line %t | FileCheck %s --check-prefixes=CHECK,DWARF5
 
 // CHECK: DW_TAG_compile_unit
 // CHECK-NOT: DW_TAG_
@@ -8,10 +11,17 @@
 // CHECK-NOT: DW_TAG_
 // CHECK: DW_AT_decl_file ("/MyTest/Inputs{{(/|\\)+}}other.S")
 
-// CHECK: include_directories[ 1] = "/MyTest/Inputs"
-// CHECK: file_names[ 1]:
-// CHECK-NEXT: name: "other.S"
-// CHECK-NEXT: dir_index: 1
+// DWARF4: include_directories[ 1] = "/MyTest/Inputs"
+// DWARF4: file_names[ 1]:
+// DWARF4-NEXT: name: "other.S"
+// DWARF4-NEXT: dir_index: 1
+
+// DWARF5:      include_directories[ 0] =
+// DWARF5-NOT:  include_directories[ 1] =
+// DWARF5:      file_names[ 0]:
+// DWARF5-NEXT:           name: "/MyTest/Inputs/other.S"
+// DWARF5-NEXT:      dir_index: 0
+// DWARF5-NOT:  file_names[ 1]:
 
 # 1 "/MyTest/Inputs/other.S"
 

From 86b08ed6bb16a96e921445299858f3cabd6f9e45 Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin@accesssoftek.com>
Date: Fri, 21 Jan 2022 13:04:44 +0700
Subject: [PATCH 134/946] [DebugInfo][NFC] Do not call 'isRootFile' for DWARF
 Version < 5

A quicker comparison should be done first.

Differential Revision: https://reviews.llvm.org/D117786
---
 llvm/lib/MC/MCDwarf.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index 15cfdba5d7fde..2cb5a000f88a7 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -586,7 +586,7 @@ MCDwarfLineTableHeader::tryGetFile(StringRef &Directory,
     trackMD5Usage(Checksum.hasValue());
     HasSource = (Source != None);
   }
-  if (isRootFile(RootFile, Directory, FileName, Checksum) && DwarfVersion >= 5)
+  if (DwarfVersion >= 5 && isRootFile(RootFile, Directory, FileName, Checksum))
     return 0;
   if (FileNumber == 0) {
     // File numbers start with 1 and/or after any file numbers

From 7f0f4cab18a9e3abf8d0583c1a87e352cd5577a6 Mon Sep 17 00:00:00 2001
From: Siva Chandra Reddy <sivachandra@google.com>
Date: Fri, 21 Jan 2022 06:28:42 +0000
Subject: [PATCH 135/946] [libc][NFC] Add 'struct_' prefix to type headers
 defining struct types.

This allows header generator to generate type inclusion boiler plate in
a straightforward manner.
---
 libc/config/linux/api.td                                      | 4 ++--
 libc/include/CMakeLists.txt                                   | 4 ++--
 libc/include/llvm-libc-types/CMakeLists.txt                   | 4 ++--
 .../llvm-libc-types/{__sigaction.h => struct_sigaction.h}     | 0
 libc/include/llvm-libc-types/{tm.h => struct_tm.h}            | 0
 5 files changed, 6 insertions(+), 6 deletions(-)
 rename libc/include/llvm-libc-types/{__sigaction.h => struct_sigaction.h} (100%)
 rename libc/include/llvm-libc-types/{tm.h => struct_tm.h} (100%)

diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index f882ec304f192..48cd1c1113485 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -24,7 +24,7 @@ def SSizeT : TypeDecl<"ssize_t"> {
 
 def StructTm: TypeDecl<"struct tm"> {
   let Decl = [{
-    #include <llvm-libc-types/tm.h>
+    #include <llvm-libc-types/struct_tm.h>
   }];
 }
 
@@ -334,7 +334,7 @@ def SysMManAPI : PublicAPI<"sys/mman.h"> {
 
 def StructSigactionDefn : TypeDecl<"struct sigaction"> {
   let Decl = [{
-    #include <llvm-libc-types/__sigaction.h>
+    #include <llvm-libc-types/struct_sigaction.h>
   }];
 }
 
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index 0b62563f3170a..d3b813b87b49e 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -67,7 +67,7 @@ add_gen_header(
   DEPENDS
     .llvm_libc_common_h
     .llvm-libc-types.time_t
-    .llvm-libc-types.tm
+    .llvm-libc-types.struct_tm
 )
 
 add_gen_header(
@@ -103,7 +103,7 @@ add_gen_header(
   DATA_FILES
     ../config/${LIBC_TARGET_OS}/signal.h.in
   DEPENDS
-    .llvm-libc-types.__sigaction
+    .llvm-libc-types.struct_sigaction
     .llvm-libc-types.__sighandler_t
 )
 
diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt
index 73c295843e2a4..3c9bed7183b5d 100644
--- a/libc/include/llvm-libc-types/CMakeLists.txt
+++ b/libc/include/llvm-libc-types/CMakeLists.txt
@@ -1,7 +1,6 @@
 add_header(__bsearchcompare_t HDR __bsearchcompare_t.h)
 add_header(__call_once_func_t HDR __call_once_func_t.h)
 add_header(__qsortcompare_t HDR __qsortcompare_t.h)
-add_header(__sigaction HDR __sigaction.h)
 add_header(__sighandler_t HDR __sighandler_t.h)
 add_header(cnd_t HDR cnd_t.h)
 add_header(double_t HDR double_t.h)
@@ -18,7 +17,8 @@ add_header(off_t HDR off_t.h)
 add_header(once_flag HDR once_flag.h)
 add_header(size_t HDR size_t.h)
 add_header(ssize_t HDR ssize_t.h)
+add_header(struct_sigaction HDR struct_sigaction.h)
+add_header(struct_tm HDR struct_tm.h)
 add_header(thrd_start_t HDR thrd_start_t.h)
 add_header(thrd_t HDR thrd_t.h)
 add_header(time_t HDR time_t.h)
-add_header(tm HDR tm.h)
diff --git a/libc/include/llvm-libc-types/__sigaction.h b/libc/include/llvm-libc-types/struct_sigaction.h
similarity index 100%
rename from libc/include/llvm-libc-types/__sigaction.h
rename to libc/include/llvm-libc-types/struct_sigaction.h
diff --git a/libc/include/llvm-libc-types/tm.h b/libc/include/llvm-libc-types/struct_tm.h
similarity index 100%
rename from libc/include/llvm-libc-types/tm.h
rename to libc/include/llvm-libc-types/struct_tm.h

From e6de53b4de4aecca4ac892500a0907805896ed27 Mon Sep 17 00:00:00 2001
From: eopXD <eop.chen@sifive.com>
Date: Thu, 20 Jan 2022 02:24:10 -0800
Subject: [PATCH 136/946] [RISCV] Bump rvv-related extensions from 0.10 to 1.0

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D112987
---
 clang/test/Driver/riscv-arch.c                | 12 +--
 .../test/Preprocessor/riscv-target-features.c | 64 ++++++++--------
 llvm/lib/Support/RISCVISAInfo.cpp             | 36 ++++-----
 llvm/test/CodeGen/RISCV/attributes.ll         |  8 +-
 llvm/test/MC/RISCV/attribute-arch.s           | 76 +++++++++----------
 5 files changed, 98 insertions(+), 98 deletions(-)

diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c
index 656abde4f75e4..36043124e4565 100644
--- a/clang/test/Driver/riscv-arch.c
+++ b/clang/test/Driver/riscv-arch.c
@@ -412,20 +412,20 @@
 // RV32-EXPERIMENTAL-V-BADVERS: error: invalid arch name 'rv32iv0p1'
 // RV32-EXPERIMENTAL-V-BADVERS: unsupported version number 0.1 for experimental extension 'v'
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10 -menable-experimental-extensions -### %s -c 2>&1 | \
+// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0 -menable-experimental-extensions -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-V-GOODVERS %s
 // RV32-EXPERIMENTAL-V-GOODVERS: "-target-feature" "+experimental-v"
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvl32b0p10 -### %s -c 2>&1 | \
+// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b1p0 -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-NOFLAG %s
-// RV32-EXPERIMENTAL-ZVL-NOFLAG: error: invalid arch name 'rv32iv0p10_zvl32b0p10'
+// RV32-EXPERIMENTAL-ZVL-NOFLAG: error: invalid arch name 'rv32iv1p0_zvl32b1p0'
 // RV32-EXPERIMENTAL-ZVL-NOFLAG: requires '-menable-experimental-extensions'
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvl32b0p1 -menable-experimental-extensions -### %s -c 2>&1 | \
+// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b0p1 -menable-experimental-extensions -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-BADVERS %s
-// RV32-EXPERIMENTAL-ZVL-BADVERS: error: invalid arch name 'rv32iv0p10_zvl32b0p1'
+// RV32-EXPERIMENTAL-ZVL-BADVERS: error: invalid arch name 'rv32iv1p0_zvl32b0p1'
 // RV32-EXPERIMENTAL-ZVL-BADVERS: unsupported version number 0.1 for experimental extension
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p10_zvl32b0p10 -menable-experimental-extensions -### %s -c 2>&1 | \
+// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b1p0 -menable-experimental-extensions -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-GOODVERS %s
 // RV32-EXPERIMENTAL-ZVL-GOODVERS: "-target-feature" "+experimental-zvl32b"
diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index c69285f6e3996..ec356cee7426e 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -212,12 +212,12 @@
 // CHECK-ZBT-EXT: __riscv_zbt 93000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv32iv0p10 -x c -E -dM %s \
+// RUN: -march=rv32iv1p0 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64iv0p10 -x c -E -dM %s \
+// RUN: -march=rv64iv1p0 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s
-// CHECK-V-EXT: __riscv_v 10000{{$}}
+// CHECK-V-EXT: __riscv_v 1000000{{$}}
 // CHECK-V-EXT: __riscv_vector 1
 
 // RUN: %clang -target riscv32-unknown-linux-gnu \
@@ -237,107 +237,107 @@
 // CHECK-ZFH-EXT: __riscv_zfh 1000000{{$}}
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64iv0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64iv1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-V-MINVLEN %s
 // CHECK-V-MINVLEN: __riscv_v_elen 64
 // CHECK-V-MINVLEN: __riscv_v_elen_fp 64
 // CHECK-V-MINVLEN: __riscv_v_min_vlen 128
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64iv0p10_zvl256b0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64iv1p0_zvl256b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL256b %s
 // CHECK-ZVL256b: __riscv_v_min_vlen 256
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64iv0p10_zvl512b0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64iv1p0_zvl512b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL512b %s
 // CHECK-ZVL512b: __riscv_v_min_vlen 512
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64iv0p10_zvl1024b0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64iv1p0_zvl1024b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL1024b %s
 // CHECK-ZVL1024b: __riscv_v_min_vlen 1024
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64iv0p10_zvl2048b0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64iv1p0_zvl2048b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL2048b %s
 // CHECK-ZVL2048b: __riscv_v_min_vlen 2048
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64iv0p10_zvl4096b0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64iv1p0_zvl4096b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL4096b %s
 // CHECK-ZVL4096b: __riscv_v_min_vlen 4096
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64iv0p10_zvl8192b0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64iv1p0_zvl8192b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL8192b %s
 // CHECK-ZVL8192b: __riscv_v_min_vlen 8192
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64iv0p10_zvl16384b0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64iv1p0_zvl16384b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL16384b %s
 // CHECK-ZVL16384b: __riscv_v_min_vlen 16384
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64iv0p10_zvl32768b0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64iv1p0_zvl32768b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL32768b %s
 // CHECK-ZVL32768b: __riscv_v_min_vlen 32768
 
 // RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64iv0p10_zvl65536b0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64iv1p0_zvl65536b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL65536b %s
 // CHECK-ZVL65536b: __riscv_v_min_vlen 65536
 
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64ifdzve64d0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64ifdzve64d1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVE64D-EXT %s
 // CHECK-ZVE64D-EXT: __riscv_v_elen 64
 // CHECK-ZVE64D-EXT: __riscv_v_elen_fp 64
 // CHECK-ZVE64D-EXT: __riscv_v_min_vlen 64
 // CHECK-ZVE64D-EXT: __riscv_vector 1
-// CHECK-ZVE64D-EXT: __riscv_zve32f 10000{{$}}
-// CHECK-ZVE64D-EXT: __riscv_zve32x 10000{{$}}
-// CHECK-ZVE64D-EXT: __riscv_zve64d 10000{{$}}
-// CHECK-ZVE64D-EXT: __riscv_zve64f 10000{{$}}
-// CHECK-ZVE64D-EXT: __riscv_zve64x 10000{{$}}
+// CHECK-ZVE64D-EXT: __riscv_zve32f 1000000{{$}}
+// CHECK-ZVE64D-EXT: __riscv_zve32x 1000000{{$}}
+// CHECK-ZVE64D-EXT: __riscv_zve64d 1000000{{$}}
+// CHECK-ZVE64D-EXT: __riscv_zve64f 1000000{{$}}
+// CHECK-ZVE64D-EXT: __riscv_zve64x 1000000{{$}}
 
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64ifzve64f0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64ifzve64f1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVE64F-EXT %s
 // CHECK-ZVE64F-EXT: __riscv_v_elen 64
 // CHECK-ZVE64F-EXT: __riscv_v_elen_fp 32
 // CHECK-ZVE64F-EXT: __riscv_v_min_vlen 64
 // CHECK-ZVE64F-EXT: __riscv_vector 1
-// CHECK-ZVE64F-EXT: __riscv_zve32f 10000{{$}}
-// CHECK-ZVE64F-EXT: __riscv_zve32x 10000{{$}}
-// CHECK-ZVE64F-EXT: __riscv_zve64f 10000{{$}}
-// CHECK-ZVE64F-EXT: __riscv_zve64x 10000{{$}}
+// CHECK-ZVE64F-EXT: __riscv_zve32f 1000000{{$}}
+// CHECK-ZVE64F-EXT: __riscv_zve32x 1000000{{$}}
+// CHECK-ZVE64F-EXT: __riscv_zve64f 1000000{{$}}
+// CHECK-ZVE64F-EXT: __riscv_zve64x 1000000{{$}}
 
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64izve64x0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64izve64x1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVE64X-EXT %s
 // CHECK-ZVE64X-EXT: __riscv_v_elen 64
 // CHECK-ZVE64X-EXT: __riscv_v_elen_fp 0
 // CHECK-ZVE64X-EXT: __riscv_v_min_vlen 64
 // CHECK-ZVE64X-EXT: __riscv_vector 1
-// CHECK-ZVE64X-EXT: __riscv_zve32x 10000{{$}}
-// CHECK-ZVE64X-EXT: __riscv_zve64x 10000{{$}}
+// CHECK-ZVE64X-EXT: __riscv_zve32x 1000000{{$}}
+// CHECK-ZVE64X-EXT: __riscv_zve64x 1000000{{$}}
 
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64ifzve32f0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64ifzve32f1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVE32F-EXT %s
 // CHECK-ZVE32F-EXT: __riscv_v_elen 32
 // CHECK-ZVE32F-EXT: __riscv_v_elen_fp 32
 // CHECK-ZVE32F-EXT: __riscv_v_min_vlen 32
 // CHECK-ZVE32F-EXT: __riscv_vector 1
-// CHECK-ZVE32F-EXT: __riscv_zve32f 10000{{$}}
-// CHECK-ZVE32F-EXT: __riscv_zve32x 10000{{$}}
+// CHECK-ZVE32F-EXT: __riscv_zve32f 1000000{{$}}
+// CHECK-ZVE32F-EXT: __riscv_zve32x 1000000{{$}}
 
 // RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
-// RUN: -march=rv64izve32x0p10 -x c -E -dM %s -o - \
+// RUN: -march=rv64izve32x1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVE32X-EXT %s
 // CHECK-ZVE32X-EXT: __riscv_v_elen 32
 // CHECK-ZVE32X-EXT: __riscv_v_elen_fp 0
 // CHECK-ZVE32X-EXT: __riscv_v_min_vlen 32
 // CHECK-ZVE32X-EXT: __riscv_vector 1
-// CHECK-ZVE32X-EXT: __riscv_zve32x 10000{{$}}
+// CHECK-ZVE32X-EXT: __riscv_zve32x 1000000{{$}}
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 0d9b6e4fa4bb5..80fae5510326d 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -60,7 +60,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
 };
 
 static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
-    {"v", RISCVExtensionVersion{0, 10}},
+    {"v", RISCVExtensionVersion{1, 0}},
     {"zbe", RISCVExtensionVersion{0, 93}},
     {"zbf", RISCVExtensionVersion{0, 93}},
     {"zbm", RISCVExtensionVersion{0, 93}},
@@ -68,23 +68,23 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
     {"zbr", RISCVExtensionVersion{0, 93}},
     {"zbt", RISCVExtensionVersion{0, 93}},
 
-    {"zvl32b", RISCVExtensionVersion{0, 10}},
-    {"zvl64b", RISCVExtensionVersion{0, 10}},
-    {"zvl128b", RISCVExtensionVersion{0, 10}},
-    {"zvl256b", RISCVExtensionVersion{0, 10}},
-    {"zvl512b", RISCVExtensionVersion{0, 10}},
-    {"zvl1024b", RISCVExtensionVersion{0, 10}},
-    {"zvl2048b", RISCVExtensionVersion{0, 10}},
-    {"zvl4096b", RISCVExtensionVersion{0, 10}},
-    {"zvl8192b", RISCVExtensionVersion{0, 10}},
-    {"zvl16384b", RISCVExtensionVersion{0, 10}},
-    {"zvl32768b", RISCVExtensionVersion{0, 10}},
-    {"zvl65536b", RISCVExtensionVersion{0, 10}},
-    {"zve32x", RISCVExtensionVersion{0, 10}},
-    {"zve32f", RISCVExtensionVersion{0, 10}},
-    {"zve64x", RISCVExtensionVersion{0, 10}},
-    {"zve64f", RISCVExtensionVersion{0, 10}},
-    {"zve64d", RISCVExtensionVersion{0, 10}},
+    {"zvl32b", RISCVExtensionVersion{1, 0}},
+    {"zvl64b", RISCVExtensionVersion{1, 0}},
+    {"zvl128b", RISCVExtensionVersion{1, 0}},
+    {"zvl256b", RISCVExtensionVersion{1, 0}},
+    {"zvl512b", RISCVExtensionVersion{1, 0}},
+    {"zvl1024b", RISCVExtensionVersion{1, 0}},
+    {"zvl2048b", RISCVExtensionVersion{1, 0}},
+    {"zvl4096b", RISCVExtensionVersion{1, 0}},
+    {"zvl8192b", RISCVExtensionVersion{1, 0}},
+    {"zvl16384b", RISCVExtensionVersion{1, 0}},
+    {"zvl32768b", RISCVExtensionVersion{1, 0}},
+    {"zvl65536b", RISCVExtensionVersion{1, 0}},
+    {"zve32x", RISCVExtensionVersion{1, 0}},
+    {"zve32f", RISCVExtensionVersion{1, 0}},
+    {"zve64x", RISCVExtensionVersion{1, 0}},
+    {"zve64f", RISCVExtensionVersion{1, 0}},
+    {"zve64d", RISCVExtensionVersion{1, 0}},
 };
 
 static bool stripExperimentalPrefix(StringRef &Ext) {
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index dd4a340edeac4..fa02d72797350 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -59,8 +59,8 @@
 ; RV32ZBR: .attribute 5, "rv32i2p0_zbr0p93"
 ; RV32ZBS: .attribute 5, "rv32i2p0_zbs1p0"
 ; RV32ZBT: .attribute 5, "rv32i2p0_zbt0p93"
-; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
-; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
+; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
+; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 ; RV32ZBKB: .attribute 5, "rv32i2p0_zbkb1p0"
 
 ; RV64M: .attribute 5, "rv64i2p0_m2p0"
@@ -80,8 +80,8 @@
 ; RV64ZBR: .attribute 5, "rv64i2p0_zbr0p93"
 ; RV64ZBS: .attribute 5, "rv64i2p0_zbs1p0"
 ; RV64ZBT: .attribute 5, "rv64i2p0_zbt0p93"
-; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
-; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v0p10_zfh1p0_zfhmin1p0_zbb1p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
+; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
+; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 ; RV64ZBKB: .attribute 5, "rv64i2p0_zbkb1p0"
 
 define i32 @addi(i32 %a) {
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index 703b6a6aa8105..d95e99348e434 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -35,8 +35,8 @@
 
 ## Experimental extensions require version string to be explicitly specified
 
-.attribute arch, "rv32iv0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
+.attribute arch, "rv32iv1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 
 .attribute arch, "rv32izba1p0"
 # CHECK: attribute      5, "rv32i2p0_zba1p0"
@@ -74,59 +74,59 @@
 .attribute arch, "rv32ifzfh1p0"
 # CHECK: attribute      5, "rv32i2p0_f2p0_zfh1p0_zfhmin1p0"
 
-.attribute arch, "rv32iv0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
+.attribute arch, "rv32iv1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 
-.attribute arch, "rv32iv0p10zvl32b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
+.attribute arch, "rv32iv1p0zvl32b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 
-.attribute arch, "rv32iv0p10zvl64b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
+.attribute arch, "rv32iv1p0zvl64b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 
-.attribute arch, "rv32iv0p10zvl128b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl32b0p10_zvl64b0p10"
+.attribute arch, "rv32iv1p0zvl128b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 
-.attribute arch, "rv32iv0p10zvl256b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl64b0p10"
+.attribute arch, "rv32iv1p0zvl256b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl64b1p0"
 
-.attribute arch, "rv32iv0p10zvl512b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10"
+.attribute arch, "rv32iv1p0zvl512b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
 
-.attribute arch, "rv32iv0p10zvl1024b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10"
+.attribute arch, "rv32iv1p0zvl1024b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
 
-.attribute arch, "rv32iv0p10zvl2048b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl512b0p10_zvl64b0p10"
+.attribute arch, "rv32iv1p0zvl2048b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
 
-.attribute arch, "rv32iv0p10zvl4096b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10"
+.attribute arch, "rv32iv1p0zvl4096b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0"
 
-.attribute arch, "rv32iv0p10zvl8192b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10"
+.attribute arch, "rv32iv1p0zvl8192b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
 
-.attribute arch, "rv32iv0p10zvl16384b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10"
+.attribute arch, "rv32iv1p0zvl16384b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
 
-.attribute arch, "rv32iv0p10zvl32768b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32768b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl8192b0p10"
+.attribute arch, "rv32iv1p0zvl32768b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
 
-.attribute arch, "rv32iv0p10zvl65536b0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v0p10_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl1024b0p10_zvl128b0p10_zvl16384b0p10_zvl2048b0p10_zvl256b0p10_zvl32768b0p10_zvl32b0p10_zvl4096b0p10_zvl512b0p10_zvl64b0p10_zvl65536b0p10_zvl8192b0p10"
+.attribute arch, "rv32iv1p0zvl65536b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl65536b1p0_zvl8192b1p0"
 
-.attribute arch, "rv32i_zve32x0p10"
-# CHECK: attribute      5, "rv32i2p0_zve32x0p10_zvl32b0p10"
+.attribute arch, "rv32i_zve32x1p0"
+# CHECK: attribute      5, "rv32i2p0_zve32x1p0_zvl32b1p0"
 
-.attribute arch, "rv32if_zve32f0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f0p10_zve32x0p10_zvl32b0p10"
+.attribute arch, "rv32if_zve32f1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f1p0_zve32x1p0_zvl32b1p0"
 
-.attribute arch, "rv32i_zve64x0p10"
-# CHECK: attribute      5, "rv32i2p0_zve32x0p10_zve64x0p10_zvl32b0p10_zvl64b0p10"
+.attribute arch, "rv32i_zve64x1p0"
+# CHECK: attribute      5, "rv32i2p0_zve32x1p0_zve64x1p0_zvl32b1p0_zvl64b1p0"
 
-.attribute arch, "rv32if_zve64f0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f0p10_zve32x0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10"
+.attribute arch, "rv32if_zve64f1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f1p0_zve32x1p0_zve64f1p0_zve64x1p0_zvl32b1p0_zvl64b1p0"
 
-.attribute arch, "rv32ifd_zve64d0p10"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_zve32f0p10_zve32x0p10_zve64d0p10_zve64f0p10_zve64x0p10_zvl32b0p10_zvl64b0p10"
+.attribute arch, "rv32ifd_zve64d1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl32b1p0_zvl64b1p0"
 
 .attribute arch, "rv32i_zbkb1p0"
 # CHECK: attribute      5, "rv32i2p0_zbkb1p0"

From a99e06aa869b44588a18a423f58e0ab30c292d8e Mon Sep 17 00:00:00 2001
From: MaheshRavishankar <ravishankarm@google.com>
Date: Thu, 20 Jan 2022 22:27:06 -0800
Subject: [PATCH 137/946] [mlir][Linalg] Avoid generating illegal operations
 during elementwise fusion.

In some cases, fusion can produce illegal operations if after fusion
the range of some of the loops cannot be computed from shapes of its
operands. Check for this case and abort the fusion if this happens.

Differential Revision: https://reviews.llvm.org/D117602
---
 .../Linalg/Transforms/ElementwiseOpFusion.cpp |  7 +++++
 .../Linalg/fusion-elementwise-ops.mlir        | 30 +++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
index 33286258543e5..be34ef8bbd625 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
@@ -318,6 +318,13 @@ fuseElementwiseOpsImpl(GenericOp producer, OpOperand *consumerOpOperand,
       consumer.iterator_types(),
       /*doc=*/nullptr,
       /*library_call=*/nullptr);
+  if (!fusedOp.getShapesToLoopsMap()) {
+    // Fused op has invalid indexing maps. Typically this means something is off
+    // in the input, but going ahead here would result in verification errors.
+    // So cleanup and abort.
+    rewriter.eraseOp(fusedOp);
+    return llvm::None;
+  }
 
   // Construct an AffineMap from consumer loops to producer loops.
   // consumer loop -> tensor index
diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
index 6ae9e15543e1c..3f68820b18cc7 100644
--- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
@@ -945,3 +945,33 @@ func @no_fusion_missing_reduction_shape(%arg0: tensor<f32>, %arg1: index) -> ten
   } -> tensor<?xf32>
   return %8 : tensor<?xf32>
 }
+
+// -----
+
+func @illegal_fusion(%arg0 : tensor<5000xi64>, %arg1 : tensor<5000xi32>) -> tensor<5000xi32> {
+  %c1_i32 = arith.constant 1 : i32
+  %0 = linalg.generic {
+        indexing_maps = [affine_map<(d0) -> (d0)>],
+        iterator_types = ["parallel"]}
+        outs(%arg0 : tensor<5000xi64>) {
+        ^bb0(%arg3: i64):  // no predecessors
+          %22 = linalg.index 0 : index
+          %23 = arith.index_cast %22 : index to i64
+          linalg.yield %23 : i64
+        } -> tensor<5000xi64>
+  %1 = linalg.init_tensor [5000] : tensor<5000xi32>
+  %2 = linalg.generic {
+        indexing_maps = [affine_map<(d0, d1) -> (d0)>, affine_map<(d0, d1) -> (d1)>],
+        iterator_types = ["parallel", "parallel"]}
+        ins(%0 : tensor<5000xi64>) outs(%1 : tensor<5000xi32>) {
+        ^bb0(%arg3: i64, %arg5: i32):  // no predecessors
+          %22 = arith.index_cast %arg3 : i64 to index
+          %23 = tensor.extract %arg1[%22] : tensor<5000xi32>
+          linalg.yield %23 : i32
+        } -> tensor<5000xi32>
+  return %2 : tensor<5000xi32>
+}
+// CHECK-LABEL: func @illegal_fusion(
+//       CHECK:   %[[PRODUCER:.+]] = linalg.generic
+//       CHECK:    linalg.generic
+//   CHECK-SAME:       ins(%[[PRODUCER]]

From 05cd9a0596d8d2cc4fdb1d1dfa0957968aceaf92 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 21 Jan 2022 09:06:35 +0100
Subject: [PATCH 138/946] [ConstantFold] Simplify type check in reinterpret
 load folding (NFC)

Keep a list of allowed types, but then always construct the map
type the same way. We need an integer with the same width as the
original type.
---
 llvm/lib/Analysis/ConstantFolding.cpp | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 253a7243bcf59..d42086a10ee14 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -553,23 +553,16 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
 
   // If this isn't an integer load we can't fold it directly.
   if (!IntType) {
-    // If this is a float/double load, we can try folding it as an int32/64 load
-    // and then bitcast the result.  This can be useful for union cases.  Note
+    // If this is a non-integer load, we can try folding it as an int load and
+    // then bitcast the result.  This can be useful for union cases.  Note
     // that address spaces don't matter here since we're not going to result in
     // an actual new load.
-    Type *MapTy;
-    if (LoadTy->isHalfTy())
-      MapTy = Type::getInt16Ty(C->getContext());
-    else if (LoadTy->isFloatTy())
-      MapTy = Type::getInt32Ty(C->getContext());
-    else if (LoadTy->isDoubleTy())
-      MapTy = Type::getInt64Ty(C->getContext());
-    else if (LoadTy->isVectorTy()) {
-      MapTy = PointerType::getIntNTy(
-          C->getContext(), DL.getTypeSizeInBits(LoadTy).getFixedSize());
-    } else
+    if (!LoadTy->isHalfTy() && !LoadTy->isFloatTy() && !LoadTy->isDoubleTy() &&
+        !LoadTy->isVectorTy())
       return nullptr;
 
+    Type *MapTy = Type::getIntNTy(
+          C->getContext(), DL.getTypeSizeInBits(LoadTy).getFixedSize());
     if (Constant *Res = FoldReinterpretLoadFromConst(C, MapTy, Offset, DL)) {
       if (Res->isNullValue() && !LoadTy->isX86_MMXTy() &&
           !LoadTy->isX86_AMXTy())

From 6a19cb837c9b2ca14642bb0a8f1234903e4430d0 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 20 Jan 2022 16:48:19 +0100
Subject: [PATCH 139/946] [ConstantFold] Support pointers in reinterpret load
 folding

Peculiarly, the necessary code to handle pointers (including the
check for non-integral address spaces) is already in place,
because we were already allowing vectors of pointers here, just
not plain pointers.
---
 llvm/lib/Analysis/ConstantFolding.cpp                | 2 +-
 llvm/test/Transforms/InstSimplify/ConstProp/loads.ll | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index d42086a10ee14..c834ecf107915 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -558,7 +558,7 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
     // that address spaces don't matter here since we're not going to result in
     // an actual new load.
     if (!LoadTy->isHalfTy() && !LoadTy->isFloatTy() && !LoadTy->isDoubleTy() &&
-        !LoadTy->isVectorTy())
+        !LoadTy->isPointerTy() && !LoadTy->isVectorTy())
       return nullptr;
 
     Type *MapTy = Type::getIntNTy(
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
index 65daa324f7468..990063be0468e 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
@@ -340,9 +340,11 @@ define i32 @load_all_undef() {
 @g_i8_data = constant [16 x i8] c"\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00"
 
 define i64* @load_ptr_from_i8_data() {
-; CHECK-LABEL: @load_ptr_from_i8_data(
-; CHECK-NEXT:    [[V:%.*]] = load i64*, i64** bitcast ([16 x i8]* @g_i8_data to i64**), align 8
-; CHECK-NEXT:    ret i64* [[V]]
+; LE-LABEL: @load_ptr_from_i8_data(
+; LE-NEXT:    ret i64* inttoptr (i64 1 to i64*)
+;
+; BE-LABEL: @load_ptr_from_i8_data(
+; BE-NEXT:    ret i64* inttoptr (i64 72057594037927936 to i64*)
 ;
   %v = load i64*, i64** bitcast ([16 x i8]* @g_i8_data to i64**)
   ret i64* %v

From 7950010e4983a58d19a5d8a831f4c2467c04c56d Mon Sep 17 00:00:00 2001
From: Simon Moll <simon.moll@emea.nec.com>
Date: Fri, 21 Jan 2022 09:15:50 +0100
Subject: [PATCH 140/946] [VE][NFC] Factor out helper functions

Factor out some helper functions to cleanup VEISelLowering.

Reviewed By: kaz7

Differential Revision: https://reviews.llvm.org/D117683
---
 llvm/lib/Target/VE/VECustomDAG.cpp    | 30 ++++++++++++++++++++++
 llvm/lib/Target/VE/VECustomDAG.h      |  6 +++++
 llvm/lib/Target/VE/VEISelLowering.cpp | 37 ++++-----------------------
 3 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp
index 98348c504990d..2f9976e426129 100644
--- a/llvm/lib/Target/VE/VECustomDAG.cpp
+++ b/llvm/lib/Target/VE/VECustomDAG.cpp
@@ -19,9 +19,39 @@
 
 namespace llvm {
 
+/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
+Optional<unsigned> getVVPOpcode(unsigned Opcode) {
+  switch (Opcode) {
+#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME)                                       \
+  case ISD::VPOPC:                                                             \
+    return VEISD::VVPNAME;
+#define ADD_VVP_OP(VVPNAME, SDNAME)                                            \
+  case VEISD::VVPNAME:                                                         \
+  case ISD::SDNAME:                                                            \
+    return VEISD::VVPNAME;
+#include "VVPNodes.def"
+  }
+  return None;
+}
+
+bool isVVPBinaryOp(unsigned VVPOpcode) {
+  switch (VVPOpcode) {
+#define ADD_BINARY_VVP_OP(VVPNAME, ...)                                        \
+  case VEISD::VVPNAME:                                                         \
+    return true;
+#include "VVPNodes.def"
+  }
+  return false;
+}
+
 SDValue VECustomDAG::getConstant(uint64_t Val, EVT VT, bool IsTarget,
                                  bool IsOpaque) const {
   return DAG.getConstant(Val, DL, VT, IsTarget, IsOpaque);
 }
 
+SDValue VECustomDAG::getBroadcast(EVT ResultVT, SDValue Scalar,
+                                  SDValue AVL) const {
+  return getNode(VEISD::VEC_BROADCAST, ResultVT, {Scalar, AVL});
+}
+
 } // namespace llvm
diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h
index 05c4c603c2fa9..e78b5dda6828c 100644
--- a/llvm/lib/Target/VE/VECustomDAG.h
+++ b/llvm/lib/Target/VE/VECustomDAG.h
@@ -21,6 +21,10 @@
 
 namespace llvm {
 
+Optional<unsigned> getVVPOpcode(unsigned Opcode);
+
+bool isVVPBinaryOp(unsigned Opcode);
+
 class VECustomDAG {
   SelectionDAG &DAG;
   SDLoc DL;
@@ -64,6 +68,8 @@ class VECustomDAG {
 
   SDValue getConstant(uint64_t Val, EVT VT, bool IsTarget = false,
                       bool IsOpaque = false) const;
+
+  SDValue getBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index abedee4788d9d..3ab876aa05c99 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -1661,8 +1661,7 @@ SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
     MVT LegalResVT = MVT::getVectorVT(ElemVT, 256);
 
     auto AVL = CDAG.getConstant(NumEls, MVT::i32);
-    return CDAG.getNode(VEISD::VEC_BROADCAST, LegalResVT,
-                        {Op.getOperand(0), AVL});
+    return CDAG.getBroadcast(LegalResVT, Op.getOperand(0), AVL);
   }
 
   // Expand
@@ -2667,21 +2666,6 @@ bool VETargetLowering::hasAndNot(SDValue Y) const {
   return true;
 }
 
-/// \returns the VVP_* SDNode opcode corresponsing to \p OC.
-static Optional<unsigned> getVVPOpcode(unsigned Opcode) {
-  switch (Opcode) {
-#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME)                                       \
-  case ISD::VPOPC:                                                             \
-    return VEISD::VVPNAME;
-#define ADD_VVP_OP(VVPNAME, SDNAME)                                            \
-  case VEISD::VVPNAME:                                                         \
-  case ISD::SDNAME:                                                            \
-    return VEISD::VVPNAME;
-#include "VVPNodes.def"
-  }
-  return None;
-}
-
 SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
   // Can we represent this as a VVP node.
   const unsigned Opcode = Op->getOpcode();
@@ -2711,26 +2695,15 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
     // Materialize the VL parameter.
     AVL = CDAG.getConstant(OpVecVT.getVectorNumElements(), MVT::i32);
     SDValue ConstTrue = CDAG.getConstant(1, MVT::i32);
-    Mask = CDAG.getNode(VEISD::VEC_BROADCAST, MaskVT,
-                        ConstTrue); // emit a VEISD::VEC_BROADCAST here.
+    Mask = CDAG.getBroadcast(MaskVT, ConstTrue, AVL);
   }
 
-  // Categories we are interested in.
-  bool IsBinaryOp = false;
-
-  switch (VVPOpcode) {
-#define ADD_BINARY_VVP_OP(VVPNAME, ...)                                        \
-  case VEISD::VVPNAME:                                                         \
-    IsBinaryOp = true;                                                         \
-    break;
-#include "VVPNodes.def"
-  }
-
-  if (IsBinaryOp) {
+  if (isVVPBinaryOp(VVPOpcode)) {
     assert(LegalVecVT.isSimple());
     return CDAG.getNode(VVPOpcode, LegalVecVT,
                         {Op->getOperand(0), Op->getOperand(1), Mask, AVL});
-  } else if (VVPOpcode == VEISD::VVP_SELECT) {
+  }
+  if (VVPOpcode == VEISD::VVP_SELECT) {
     auto Mask = Op->getOperand(0);
     auto OnTrue = Op->getOperand(1);
     auto OnFalse = Op->getOperand(2);

From 3f9d1f516e19cc9548cf17ec60b73300d4ab8360 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 21 Jan 2022 09:20:54 +0100
Subject: [PATCH 141/946] [InstSimplify] Add tests for reinterpret load of
 floats (NFC)

Add tests for currently unsupported float types.
---
 .../InstSimplify/ConstProp/loads.ll           | 42 +++++++++++++++++--
 1 file changed, 39 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
index 990063be0468e..43038bcbdbfe7 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
@@ -100,17 +100,53 @@ define double @test8() {
 
 
 ; i128 load.
-define i128 @test9() {
-; LE-LABEL: @test9(
+define i128 @test_i128() {
+; LE-LABEL: @test_i128(
 ; LE-NEXT:    ret i128 2071796475790618158476296315
 ;
-; BE-LABEL: @test9(
+; BE-LABEL: @test_i128(
 ; BE-NEXT:    ret i128 2268949521066387161080
 ;
   %r = load i128, i128* bitcast({i64, i64}* @g3 to i128*)
   ret i128 %r
 }
 
+define fp128 @test_fp128() {
+; CHECK-LABEL: @test_fp128(
+; CHECK-NEXT:    [[R:%.*]] = load fp128, fp128* bitcast ({ i64, i64 }* @g3 to fp128*), align 16
+; CHECK-NEXT:    ret fp128 [[R]]
+;
+  %r = load fp128, fp128* bitcast({i64, i64}* @g3 to fp128*)
+  ret fp128 %r
+}
+
+define ppc_fp128 @test_ppc_fp128() {
+; CHECK-LABEL: @test_ppc_fp128(
+; CHECK-NEXT:    [[R:%.*]] = load ppc_fp128, ppc_fp128* bitcast ({ i64, i64 }* @g3 to ppc_fp128*), align 16
+; CHECK-NEXT:    ret ppc_fp128 [[R]]
+;
+  %r = load ppc_fp128, ppc_fp128* bitcast({i64, i64}* @g3 to ppc_fp128*)
+  ret ppc_fp128 %r
+}
+
+define x86_fp80 @test_x86_fp80() {
+; CHECK-LABEL: @test_x86_fp80(
+; CHECK-NEXT:    [[R:%.*]] = load x86_fp80, x86_fp80* bitcast ({ i64, i64 }* @g3 to x86_fp80*), align 16
+; CHECK-NEXT:    ret x86_fp80 [[R]]
+;
+  %r = load x86_fp80, x86_fp80* bitcast({i64, i64}* @g3 to x86_fp80*)
+  ret x86_fp80 %r
+}
+
+define bfloat @test_bfloat() {
+; CHECK-LABEL: @test_bfloat(
+; CHECK-NEXT:    [[R:%.*]] = load bfloat, bfloat* bitcast ({ i64, i64 }* @g3 to bfloat*), align 2
+; CHECK-NEXT:    ret bfloat [[R]]
+;
+  %r = load bfloat, bfloat* bitcast({i64, i64}* @g3 to bfloat*)
+  ret bfloat %r
+}
+
 ; vector load.
 define <2 x i64> @test10() {
 ; CHECK-LABEL: @test10(

From b4900296e4a51b0076bac69b31871c7a29efa90f Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 21 Jan 2022 09:23:41 +0100
Subject: [PATCH 142/946] [ConstantFold] Allow all float types in reinterpret
 load folding

Rather than hardcoding just half, float and double, allow all
floating point types.
---
 llvm/lib/Analysis/ConstantFolding.cpp         |  4 +--
 .../InstSimplify/ConstProp/loads.ll           | 32 ++++++++++++-------
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index c834ecf107915..38c9cc7b9df29 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -557,8 +557,8 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
     // then bitcast the result.  This can be useful for union cases.  Note
     // that address spaces don't matter here since we're not going to result in
     // an actual new load.
-    if (!LoadTy->isHalfTy() && !LoadTy->isFloatTy() && !LoadTy->isDoubleTy() &&
-        !LoadTy->isPointerTy() && !LoadTy->isVectorTy())
+    if (!LoadTy->isFloatingPointTy() && !LoadTy->isPointerTy() &&
+        !LoadTy->isVectorTy())
       return nullptr;
 
     Type *MapTy = Type::getIntNTy(
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
index 43038bcbdbfe7..5e28f166b9ebf 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/loads.ll
@@ -112,36 +112,44 @@ define i128 @test_i128() {
 }
 
 define fp128 @test_fp128() {
-; CHECK-LABEL: @test_fp128(
-; CHECK-NEXT:    [[R:%.*]] = load fp128, fp128* bitcast ({ i64, i64 }* @g3 to fp128*), align 16
-; CHECK-NEXT:    ret fp128 [[R]]
+; LE-LABEL: @test_fp128(
+; LE-NEXT:    ret fp128 0xL000000000000007B0000000006B1BFF8
+;
+; BE-LABEL: @test_fp128(
+; BE-NEXT:    ret fp128 0xL0000000006B1BFF8000000000000007B
 ;
   %r = load fp128, fp128* bitcast({i64, i64}* @g3 to fp128*)
   ret fp128 %r
 }
 
 define ppc_fp128 @test_ppc_fp128() {
-; CHECK-LABEL: @test_ppc_fp128(
-; CHECK-NEXT:    [[R:%.*]] = load ppc_fp128, ppc_fp128* bitcast ({ i64, i64 }* @g3 to ppc_fp128*), align 16
-; CHECK-NEXT:    ret ppc_fp128 [[R]]
+; LE-LABEL: @test_ppc_fp128(
+; LE-NEXT:    ret ppc_fp128 bitcast (i128 2071796475790618158476296315 to ppc_fp128)
+;
+; BE-LABEL: @test_ppc_fp128(
+; BE-NEXT:    ret ppc_fp128 bitcast (i128 2268949521066387161080 to ppc_fp128)
 ;
   %r = load ppc_fp128, ppc_fp128* bitcast({i64, i64}* @g3 to ppc_fp128*)
   ret ppc_fp128 %r
 }
 
 define x86_fp80 @test_x86_fp80() {
-; CHECK-LABEL: @test_x86_fp80(
-; CHECK-NEXT:    [[R:%.*]] = load x86_fp80, x86_fp80* bitcast ({ i64, i64 }* @g3 to x86_fp80*), align 16
-; CHECK-NEXT:    ret x86_fp80 [[R]]
+; LE-LABEL: @test_x86_fp80(
+; LE-NEXT:    ret x86_fp80 0xKFFFF000000000000007B
+;
+; BE-LABEL: @test_x86_fp80(
+; BE-NEXT:    ret x86_fp80 0xK000000000000007B0000
 ;
   %r = load x86_fp80, x86_fp80* bitcast({i64, i64}* @g3 to x86_fp80*)
   ret x86_fp80 %r
 }
 
 define bfloat @test_bfloat() {
-; CHECK-LABEL: @test_bfloat(
-; CHECK-NEXT:    [[R:%.*]] = load bfloat, bfloat* bitcast ({ i64, i64 }* @g3 to bfloat*), align 2
-; CHECK-NEXT:    ret bfloat [[R]]
+; LE-LABEL: @test_bfloat(
+; LE-NEXT:    ret bfloat 0xR007B
+;
+; BE-LABEL: @test_bfloat(
+; BE-NEXT:    ret bfloat 0xR0000
 ;
   %r = load bfloat, bfloat* bitcast({i64, i64}* @g3 to bfloat*)
   ret bfloat %r

From 99b5a8049be49563bd7541dd4ea93ad2f6516299 Mon Sep 17 00:00:00 2001
From: Tres Popp <tpopp@google.com>
Date: Fri, 21 Jan 2022 09:49:55 +0100
Subject: [PATCH 143/946] Match bazel config with cmake after f29256a64

---
 utils/bazel/llvm_configs/llvm-config.h.cmake | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/utils/bazel/llvm_configs/llvm-config.h.cmake b/utils/bazel/llvm_configs/llvm-config.h.cmake
index a5edc2084a8a5..ec18b40fe04d9 100644
--- a/utils/bazel/llvm_configs/llvm-config.h.cmake
+++ b/utils/bazel/llvm_configs/llvm-config.h.cmake
@@ -91,9 +91,6 @@
 /* Define if LLVM was built with a dependency to the libtensorflow dynamic library */
 #cmakedefine LLVM_HAVE_TF_API
 
-/* Define if LLVM was built with a dependency to the tensorflow compiler */
-#cmakedefine LLVM_HAVE_TF_AOT
-
 /* Define to 1 if you have the <sysexits.h> header file. */
 #cmakedefine HAVE_SYSEXITS_H ${HAVE_SYSEXITS_H}
 

From 69825f369302184aecb1ee53d9224e49ba15d9ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval@gmail.com>
Date: Fri, 21 Jan 2022 09:56:26 +0100
Subject: [PATCH 144/946] [fir] Add array operations documentation

This patch adds documentation on FIR array operations
and their usage.

Reviewed By: schweitz

Differential Revision: https://reviews.llvm.org/D115077
---
 flang/docs/FIRArrayOperations.md | 342 +++++++++++++++++++++++++++++++
 1 file changed, 342 insertions(+)
 create mode 100644 flang/docs/FIRArrayOperations.md

diff --git a/flang/docs/FIRArrayOperations.md b/flang/docs/FIRArrayOperations.md
new file mode 100644
index 0000000000000..822bafca84eb5
--- /dev/null
+++ b/flang/docs/FIRArrayOperations.md
@@ -0,0 +1,342 @@
+<!--===- docs/FIRArrayOperations.md 
+  
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+  
+-->
+
+# Design: FIR Array operations
+
+```eval_rst
+.. contents::
+   :local:
+```
+
+## General
+
+The array operations in FIR model the copy-in/copy-out semantics over Fortran
+statements.
+
+Fortran language semantics sometimes require the compiler to make a temporary 
+copy of an array or array slice. Situations where this can occur include:
+
+* Passing a non-contiguous array to a procedure that does not declare it as
+  assumed-shape.
+* Array expressions, especially those involving `RESHAPE`, `PACK`, and `MERGE`.
+* Assignments of arrays where the array appears on both the left and right-hand
+  sides of the assignment.
+* Assignments of `POINTER` arrays.
+
+There are currently the following operations:
+- `fir.array_load`
+- `fir.array_merge_store`
+- `fir.array_fetch`
+- `fir.array_update`
+- `fir.array_access`
+- `fir.array_amend`
+
+`array_load`(s) and `array_merge_store` are a pairing that brackets the lifetime
+of the array copies.
+
+`array_fetch` and `array_update` are defined to work as getter/setter pairs on 
+values of elements from loaded array copies. These have "GEP-like" syntax and
+semantics.
+
+Fortran arrays are implicitly memory bound as are some other Fortran type/kind
+entities. For entities that can be atomically promoted to the value domain,
+we use `array_fetch` and `array_update`.
+
+`array_access` and `array_amend` are defined to work as getter/setter pairs on
+references to elements in loaded array copies. `array_access` has "GEP-like"
+syntax. `array_amend` annotates which loaded array copy is being written to.
+It is invalid to update an array copy without `array_amend`; doing so will
+result in undefined behavior.
+For those type/kinds that cannot be promoted to values, we must leave them in a
+memory reference domain, and we use `array_access` and `array_amend`.
+
+## array_load
+
+This operation taken with `array_merge_store` captures Fortran's
+copy-in/copy-out semantics. One way to think of this is that array_load
+creates a snapshot copy of the entire array. This copy can then be used
+as the "original value" of the array while the array's new value is
+computed. The `array_merge_store` operation is the copy-out semantics, which
+merge the updates with the original array value to produce the final array
+result. This abstracts the copy operations as opposed to always creating
+copies or requiring dependence analysis be performed on the syntax trees
+and before lowering to the IR.
+
+Load an entire array as a single SSA value.
+
+```fortran
+  real :: a(o:n,p:m)
+  ...
+  ... = ... a ...
+```
+
+One can use `fir.array_load` to produce an ssa-value that captures an
+immutable value of the entire array `a`, as in the Fortran array expression
+shown above. Subsequent changes to the memory containing the array do not
+alter its composite value. This operation lets one load an array as a
+value while applying a runtime shape, shift, or slice to the memory
+reference, and its semantics guarantee immutability.
+
+```mlir
+%s = fir.shape_shift %lb1, %ex1, %lb2, %ex2 : (index, index, index, index) -> !fir.shape<2>
+// load the entire array 'a'
+%v = fir.array_load %a(%s) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.array<?x?xf32>
+// a fir.store here into array %a does not change %v
+```
+
+# array_merge_store
+
+The `array_merge_store` operation stores a merged array value to memory. 
+
+
+```fortran
+  real :: a(n,m)
+  ...
+  a = ...
+```
+
+One can use `fir.array_merge_store` to merge/copy the value of `a` in an
+array expression as shown above.
+
+```mlir
+  %v = fir.array_load %a(%shape) : ...
+  %r = fir.array_update %v, %f, %i, %j : (!fir.array<?x?xf32>, f32, index, index) -> !fir.array<?x?xf32>
+  fir.array_merge_store %v, %r to %a : !fir.ref<!fir.array<?x?xf32>>
+```
+
+This operation merges the original loaded array value, `%v`, with the
+chained updates, `%r`, and stores the result to the array at address, `%a`.
+
+This operation taken with `array_load`'s captures Fortran's
+copy-in/copy-out semantics. The first operands of `array_merge_store` is the
+result of the initial `array_load` operation. While this value could be
+retrieved by reference chasiing through the different array operations it is
+useful to have it on hand directly for analysis passes since this directly
+defines the "bounds" of the Fortran statement represented by these operations.
+The intention is to allow copy-in/copy-out regions to be easily delineated,
+analyzed, and optimized.
+
+## array_fetch
+
+The `array_fetch` operation fetches the value of an element in an array value.
+
+```fortran
+  real :: a(n,m)
+  ...
+  ... a ...
+  ... a(r,s+1) ...
+```
+
+One can use `fir.array_fetch` to fetch the (implied) value of `a(i,j)` in
+an array expression as shown above. It can also be used to extract the
+element `a(r,s+1)` in the second expression.
+
+```mlir
+  %s = fir.shape %n, %m : (index, index) -> !fir.shape<2>
+  // load the entire array 'a'
+  %v = fir.array_load %a(%s) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.array<?x?xf32>
+  // fetch the value of one of the array value's elements
+  %1 = fir.array_fetch %v, %i, %j : (!fir.array<?x?xf32>, index, index) -> f32
+```
+
+It is only possible to use `array_fetch` on an `array_load` result value or a
+value that can be trace back transitively to an `array_load` as the dominating
+source. Other array operation such as `array_update` can be in between.
+
+## array_update
+
+The `array_update` operation is used to update the value of an element in an
+array value. A new array value is returned where all element values of the input
+array are identical except for the selected element which is the value passed in
+the update.
+
+```fortran
+  real :: a(n,m)
+  ...
+  a = ...
+```
+
+One can use `fir.array_update` to update the (implied) value of `a(i,j)`
+in an array expression as shown above.
+
+```mlir
+  %s = fir.shape %n, %m : (index, index) -> !fir.shape<2>
+  // load the entire array 'a'
+  %v = fir.array_load %a(%s) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.array<?x?xf32>
+  // update the value of one of the array value's elements
+  // %r_{ij} = %f  if (i,j) = (%i,%j),   %v_{ij} otherwise
+  %r = fir.array_update %v, %f, %i, %j : (!fir.array<?x?xf32>, f32, index, index) -> !fir.array<?x?xf32>
+  fir.array_merge_store %v, %r to %a : !fir.ref<!fir.array<?x?xf32>>
+```
+
+An array value update behaves as if a mapping function from the indices
+to the new value has been added, replacing the previous mapping. These
+mappings can be added to the ssa-value, but will not be materialized in
+memory until the `fir.array_merge_store` is performed.
+`fir.array_update` can be seen as an array access with a notion that the array
+will be changed at the accessed position when `fir.array_merge_store` is
+performed.
+
+## array_access
+
+The `array_access` provides a reference to a single element from an array value.
+This is *not* a view in the immutable array, otherwise it couldn't be stored to.
+It can be see as a logical copy of the element and its position in the array.
+Tis reference can be written to and modified withoiut changing the original
+array.
+
+The `array_access` operation is used to fetch the memory reference of an element
+in an array value.
+
+```fortran
+  real :: a(n,m)
+  ...
+  ... a ...
+  ... a(r,s+1) ...
+```
+
+One can use `fir.array_access` to recover the implied memory reference to
+the element `a(i,j)` in an array expression `a` as shown above. It can also
+be used to recover the reference element `a(r,s+1)` in the second
+expression.
+
+```mlir
+  %s = fir.shape %n, %m : (index, index) -> !fir.shape<2>
+  // load the entire array 'a'
+  %v = fir.array_load %a(%s) : (!fir.ref<!fir.array<?x?xf32>>, !fir.shape<2>) -> !fir.array<?x?xf32>
+  // fetch the value of one of the array value's elements
+  %1 = fir.array_access %v, %i, %j : (!fir.array<?x?xf32>, index, index) -> !fir.ref<f32>
+```
+
+It is only possible to use `array_access` on an `array_load` result value or a
+value that can be trace back transitively to an `array_load` as the dominating
+source. Other array operation such as `array_amend` can be in between.
+
+`array_access` if mainly used with `character`'s arrays and arrays of derived
+types where because they might have a non-compile time sizes that would be
+useless too load entirely or too big to load. 
+
+Here is a simple example with a `character` array assignment. 
+
+Fortran
+```
+subroutine foo(c1, c2, n)
+  integer(8) :: n
+  character(n) :: c1(:), c2(:)
+  c1 = c2
+end subroutine
+```
+
+It results in this cleaned-up FIR:
+```
+func @_QPfoo(%arg0: !fir.box<!fir.array<?x!fir.char<1,?>>>, %arg1: !fir.box<!fir.array<?x!fir.char<1,?>>>, %arg2: !fir.ref<i64>) {
+    %0 = fir.load %arg2 : !fir.ref<i64>
+    %c0 = arith.constant 0 : index
+    %1:3 = fir.box_dims %arg0, %c0 : (!fir.box<!fir.array<?x!fir.char<1,?>>>, index) -> (index, index, index)
+    %2 = fir.array_load %arg0 : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> !fir.array<?x!fir.char<1,?>>
+    %3 = fir.array_load %arg1 : (!fir.box<!fir.array<?x!fir.char<1,?>>>) -> !fir.array<?x!fir.char<1,?>>
+    %c1 = arith.constant 1 : index
+    %4 = arith.subi %1#1, %c1 : index
+    %5 = fir.do_loop %arg3 = %c0 to %4 step %c1 unordered iter_args(%arg4 = %2) -> (!fir.array<?x!fir.char<1,?>>) {
+      %6 = fir.array_access %3, %arg3 : (!fir.array<?x!fir.char<1,?>>, index) -> !fir.ref<!fir.char<1,?>>
+      %7 = fir.array_access %arg4, %arg3 : (!fir.array<?x!fir.char<1,?>>, index) -> !fir.ref<!fir.char<1,?>>
+      %false = arith.constant false
+      %8 = fir.convert %7 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+      %9 = fir.convert %6 : (!fir.ref<!fir.char<1,?>>) -> !fir.ref<i8>
+      fir.call @llvm.memmove.p0i8.p0i8.i64(%8, %9, %0, %false) : (!fir.ref<i8>, !fir.ref<i8>, i64, i1) -> ()
+      %10 = fir.array_amend %arg4, %7 : (!fir.array<?x!fir.char<1,?>>, !fir.ref<!fir.char<1,?>>) -> !fir.array<?x!fir.char<1,?>>
+      fir.result %10 : !fir.array<?x!fir.char<1,?>>
+    }
+    fir.array_merge_store %2, %5 to %arg0 : !fir.array<?x!fir.char<1,?>>, !fir.array<?x!fir.char<1,?>>, !fir.box<!fir.array<?x!fir.char<1,?>>>
+    return
+  }
+  func private @llvm.memmove.p0i8.p0i8.i64(!fir.ref<i8>, !fir.ref<i8>, i64, i1)
+}
+```
+
+`fir.array_access` and `fir.array_amend` split the two purposes of
+`fir.array_update` into two distinct operations to work on type/kind that must
+reside in the memory reference domain. `fir.array_access` captures the array
+access semantics and `fir.array_amend` denotes which `fir.array_access` is the
+lhs.
+
+We do not want to start loading the entire `!fir.ref<!fir.char<1,?>>` here since
+it has dynamic length, and even if constant, could be too long to do so.
+
+## array_amend
+
+The `array_amend` operation marks an array value as having been changed via a 
+reference obtain by an `array_access`. It acts as a logical transaction log
+that is used to merge the final result back with an `array_merge_store`
+operation.
+
+```mlir
+  // fetch the value of one of the array value's elements
+  %1 = fir.array_access %v, %i, %j : (!fir.array<?x?xT>, index, index) -> !fir.ref<T>
+  // modify the element by storing data using %1 as a reference
+  %2 = ... %1 ...
+  // mark the array value
+  %new_v = fir.array_amend %v, %2 : (!fir.array<?x?xT>, !fir.ref<T>) -> !fir.array<?x?xT>
+```
+
+## Example
+
+Here is an example of a FIR code using several array operations together. The
+example below is a simplified version of the FIR code comiing from the
+following Fortran code snippet.
+
+```fortran
+subroutine s(a,l,u)
+  type t
+    integer m
+  end type t
+  type(t) :: a(:)
+  integer :: l, u
+  forall (i=l:u)
+    a(i) = a(u-i+1)
+  end forall
+end
+```
+
+```
+func @_QPs(%arg0: !fir.box<!fir.array<?x!fir.type<_QFsTt{m:i32}>>>, %arg1: !fir.ref<i32>, %arg2: !fir.ref<i32>) {
+  %l = fir.load %arg1 : !fir.ref<i32>
+  %l_index = fir.convert %l : (i32) -> index
+  %u = fir.load %arg2 : !fir.ref<i32>
+  %u_index = fir.convert %u : (i32) -> index
+  %c1 = arith.constant 1 : index
+  // This is the "copy-in" array used on the RHS of the expression. It will be indexed into and loaded at each iteration.
+  %array_a_src = fir.array_load %arg0 : (!fir.box<!fir.array<?x!fir.type<_QFsTt{m:i32}>>>) -> !fir.array<?x!fir.type<_QFsTt{m:i32}>>
+
+  // This is the "seed" for the "copy-out" array on the LHS. It'll flow from iteration to iteration and gets
+  // updated at each iteration.
+  %array_a_dest_init = fir.array_load %arg0 : (!fir.box<!fir.array<?x!fir.type<_QFsTt{m:i32}>>>) -> !fir.array<?x!fir.type<_QFsTt{m:i32}>>
+  
+  %array_a_final = fir.do_loop %i = %l_index to %u_index step %c1 unordered iter_args(%array_a_dest = %array_a_dest_init) -> (!fir.array<?x!fir.type<_QFsTt{m:i32}>>) {
+    // Compute indexing for the RHS and array the element.
+    %u_minus_i = arith.subi %u_index, %i : index // u-i
+    %u_minus_i_plus_one = arith.addi %u_minus_i, %c1: index // u-i+1
+    %a_src_ref = fir.array_access %array_a_src, %u_minus_i_plus_one {Fortran.offsets} : (!fir.array<?x!fir.type<_QFsTt{m:i32}>>, index) -> !fir.ref<!fir.type<_QFsTt{m:i32}>>
+    %a_src_elt = fir.load %a_src_ref : !fir.ref<!fir.type<_QFsTt{m:i32}>>
+
+    // Get the reference to the element in the array on the LHS
+    %a_dst_ref = fir.array_access %array_a_dest, %i {Fortran.offsets} : (!fir.array<?x!fir.type<_QFsTt{m:i32}>>, index) -> !fir.ref<!fir.type<_QFsTt{m:i32}>>
+
+    // Store the value, and update the array
+    fir.store %a_src_elt to %a_dst_ref : !fir.ref<!fir.type<_QFsTt{m:i32}>>
+    %updated_array_a = fir.array_amend %array_a_dest, %a_dst_ref : (!fir.array<?x!fir.type<_QFsTt{m:i32}>>, !fir.ref<!fir.type<_QFsTt{m:i32}>>) -> !fir.array<?x!fir.type<_QFsTt{m:i32}>>
+
+    // Forward the current updated array to the next iteration.
+    fir.result %updated_array_a : !fir.array<?x!fir.type<_QFsTt{m:i32}>>
+  }
+  // Store back the result by merging the initial value loaded before the loop
+  // with the final one produced by the loop.
+  fir.array_merge_store %array_a_dest_init, %array_a_final to %arg0 : !fir.array<?x!fir.type<_QFsTt{m:i32}>>, !fir.array<?x!fir.type<_QFsTt{m:i32}>>, !fir.box<!fir.array<?x!fir.type<_QFsTt{m:i32}>>>
+  return
+}
+```

From d03c5bc8d437e47ae424ac9611ae441cd5225526 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Markus=20B=C3=B6ck?= <markus.boeck02@gmail.com>
Date: Fri, 21 Jan 2022 10:03:48 +0100
Subject: [PATCH 145/946] [mlir] Fully qualify return types in
 OpAsmInterface.td and FunctionInterfaces.td

---
 mlir/include/mlir/IR/FunctionInterfaces.td | 6 +++---
 mlir/include/mlir/IR/OpAsmInterface.td     | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlir/include/mlir/IR/FunctionInterfaces.td b/mlir/include/mlir/IR/FunctionInterfaces.td
index b6993ff6ffc76..20c7d7bbd51b7 100644
--- a/mlir/include/mlir/IR/FunctionInterfaces.td
+++ b/mlir/include/mlir/IR/FunctionInterfaces.td
@@ -55,13 +55,13 @@ def FunctionOpInterface : OpInterface<"FunctionOpInterface"> {
       the type (to allow for this method may be called on function
       declarations).
     }],
-    "ArrayRef<Type>", "getArgumentTypes">,
+    "::llvm::ArrayRef<::mlir::Type>", "getArgumentTypes">,
     InterfaceMethod<[{
       Returns the function result types based exclusively on
       the type (to allow for this method may be called on function
       declarations).
     }],
-    "ArrayRef<Type>", "getResultTypes">,
+    "::llvm::ArrayRef<::mlir::Type>", "getResultTypes">,
     InterfaceMethod<[{
       Returns a clone of the function type with the given argument and
       result types.
@@ -70,7 +70,7 @@ def FunctionOpInterface : OpInterface<"FunctionOpInterface"> {
             an appropriate clone method:
               `Type clone(ArrayRef<Type> inputs, ArrayRef<Type> results)`
     }],
-    "Type", "cloneTypeWith", (ins
+    "::mlir::Type", "cloneTypeWith", (ins
       "::mlir::TypeRange":$inputs, "::mlir::TypeRange":$results
     ), /*methodBody=*/[{}], /*defaultImplementation=*/[{
       return $_op.getType().clone(inputs, results);
diff --git a/mlir/include/mlir/IR/OpAsmInterface.td b/mlir/include/mlir/IR/OpAsmInterface.td
index b49e12ea9a85e..c13e59fb1b466 100644
--- a/mlir/include/mlir/IR/OpAsmInterface.td
+++ b/mlir/include/mlir/IR/OpAsmInterface.td
@@ -70,7 +70,7 @@ def OpAsmOpInterface : OpInterface<"OpAsmOpInterface"> {
       returned `spv`. The default implementation returns an empty string which
       is ignored.
       }],
-      "StringRef", "getDefaultDialect", (ins), "", "return \"\";"
+      "::llvm::StringRef", "getDefaultDialect", (ins), "", "return \"\";"
     >,
   ];
 }

From a2f6921ef2a1564a52aa3ecd7e30697250ccaf2e Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Thu, 20 Jan 2022 11:08:24 +0100
Subject: [PATCH 146/946] [llvm] Remove unused headers in LLVMDemangle

As an hint to the impact of the cleanup, running

clang++ -E  -Iinclude -I../llvm/include ../llvm/lib/Demangle/*.cpp -std=c++14 -fno-rtti -fno-exceptions | wc -l

before: 208053 lines
after:  203965 lines
---
 llvm/include/llvm/Demangle/MicrosoftDemangle.h      | 2 --
 llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h | 1 -
 llvm/lib/Demangle/ItaniumDemangle.cpp               | 2 --
 llvm/lib/Demangle/MicrosoftDemangleNodes.cpp        | 1 -
 4 files changed, 6 deletions(-)

diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangle.h b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
index 0403136616019..6f2d0416901ec 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangle.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangle.h
@@ -9,10 +9,8 @@
 #ifndef LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
 #define LLVM_DEMANGLE_MICROSOFTDEMANGLE_H
 
-#include "llvm/Demangle/DemangleConfig.h"
 #include "llvm/Demangle/MicrosoftDemangleNodes.h"
 #include "llvm/Demangle/StringView.h"
-#include "llvm/Demangle/Utility.h"
 
 #include <utility>
 
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index 1455dad3d1b2b..8ad2472364b4c 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_DEMANGLE_MICROSOFTDEMANGLENODES_H
 #define LLVM_DEMANGLE_MICROSOFTDEMANGLENODES_H
 
-#include "llvm/Demangle/DemangleConfig.h"
 #include "llvm/Demangle/StringView.h"
 #include <array>
 #include <cstdint>
diff --git a/llvm/lib/Demangle/ItaniumDemangle.cpp b/llvm/lib/Demangle/ItaniumDemangle.cpp
index 3f68f76761ce0..1a5db755e37b5 100644
--- a/llvm/lib/Demangle/ItaniumDemangle.cpp
+++ b/llvm/lib/Demangle/ItaniumDemangle.cpp
@@ -19,9 +19,7 @@
 #include <cstdlib>
 #include <cstring>
 #include <functional>
-#include <numeric>
 #include <utility>
-#include <vector>
 
 using namespace llvm;
 using namespace llvm::itanium_demangle;
diff --git a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
index 32d8dff66c3f8..d07d05a08c556 100644
--- a/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
+++ b/llvm/lib/Demangle/MicrosoftDemangleNodes.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Demangle/MicrosoftDemangleNodes.h"
-#include "llvm/Demangle/DemangleConfig.h"
 #include "llvm/Demangle/Utility.h"
 #include <cctype>
 #include <string>

From c0cf209076a29076ebf43c59dff3cc3c8400e4d7 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Fri, 21 Jan 2022 09:34:38 +0000
Subject: [PATCH 147/946] [VPlan] Add
 VPWidenIntOrFpInductionRecipe::isCanonical, use it (NFCI).

This patch adds VPWidenIntOrFpInductionRecipe::isCanonical to check if
an induction recipe is canonical. The code is also updated to use it
instead of isCanonicalID.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D117551
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 24 +++++++------------
 llvm/lib/Transforms/Vectorize/VPlan.cpp       |  6 +++++
 llvm/lib/Transforms/Vectorize/VPlan.h         |  5 ++++
 llvm/lib/Transforms/Vectorize/VPlanValue.h    |  6 +++++
 4 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 56b8bd8564f2a..d186ae59a74a2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -510,8 +510,7 @@ class InnerLoopVectorizer {
   /// is provided, the integer induction variable will first be truncated to
   /// the corresponding type. \p CanonicalIV is the scalar value generated for
   /// the canonical induction variable.
-  void widenIntOrFpInduction(PHINode *IV, const InductionDescriptor &ID,
-                             Value *Start, TruncInst *Trunc, VPValue *Def,
+  void widenIntOrFpInduction(PHINode *IV, VPWidenIntOrFpInductionRecipe *Def,
                              VPTransformState &State, Value *CanonicalIV);
 
   /// Construct the vector value of a scalarized value \p V one lane at a time.
@@ -2478,17 +2477,12 @@ bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
   return llvm::any_of(IV->users(), isScalarInst);
 }
 
-/// Returns true if \p ID starts at 0 and has a step of 1.
-static bool isCanonicalID(const InductionDescriptor &ID) {
-  if (!ID.getConstIntStepValue() || !ID.getConstIntStepValue()->isOne())
-    return false;
-  auto *StartC = dyn_cast<ConstantInt>(ID.getStartValue());
-  return StartC && StartC->isZero();
-}
-
 void InnerLoopVectorizer::widenIntOrFpInduction(
-    PHINode *IV, const InductionDescriptor &ID, Value *Start, TruncInst *Trunc,
-    VPValue *Def, VPTransformState &State, Value *CanonicalIV) {
+    PHINode *IV, VPWidenIntOrFpInductionRecipe *Def, VPTransformState &State,
+    Value *CanonicalIV) {
+  Value *Start = Def->getStartValue()->getLiveInIRValue();
+  const InductionDescriptor &ID = Def->getInductionDescriptor();
+  TruncInst *Trunc = Def->getTruncInst();
   IRBuilder<> &Builder = State.Builder;
   assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
   assert(!State.VF.isZero() && "VF must be non-zero");
@@ -2519,7 +2513,7 @@ void InnerLoopVectorizer::widenIntOrFpInduction(
   auto CreateScalarIV = [&](Value *&Step) -> Value * {
     Value *ScalarIV = CanonicalIV;
     Type *NeededType = IV->getType();
-    if (!isCanonicalID(ID) || ScalarIV->getType() != NeededType) {
+    if (!Def->isCanonical() || ScalarIV->getType() != NeededType) {
       ScalarIV =
           NeededType->isIntegerTy()
               ? Builder.CreateSExtOrTrunc(ScalarIV, NeededType)
@@ -9702,9 +9696,7 @@ void VPWidenGEPRecipe::execute(VPTransformState &State) {
 void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
   assert(!State.Instance && "Int or FP induction being replicated.");
   auto *CanonicalIV = State.get(getParent()->getPlan()->getCanonicalIV(), 0);
-  State.ILV->widenIntOrFpInduction(IV, getInductionDescriptor(),
-                                   getStartValue()->getLiveInIRValue(),
-                                   getTruncInst(), this, State, CanonicalIV);
+  State.ILV->widenIntOrFpInduction(IV, this, State, CanonicalIV);
 }
 
 void VPWidenPHIRecipe::execute(VPTransformState &State) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 04c8e399c5438..0ec2361390590 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1263,6 +1263,12 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
     O << " " << VPlanIngredient(IV);
 }
 
+bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
+  auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
+  auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep());
+  return StartC && StartC->isZero() && StepC && StepC->isOne();
+}
+
 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
                              VPSlotTracker &SlotTracker) const {
   O << Indent << "WIDEN-GEP ";
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c7946b6d2adef..10d5c1b3409a5 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1059,6 +1059,7 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
 
   /// Returns the start value of the induction.
   VPValue *getStartValue() { return getOperand(0); }
+  const VPValue *getStartValue() const { return getOperand(0); }
 
   /// Returns the first defined value as TruncInst, if it is one or nullptr
   /// otherwise.
@@ -1071,6 +1072,10 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
 
   /// Returns the induction descriptor for the recipe.
   const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }
+
+  /// Returns true if the induction is canonical, i.e. starting at 0 and
+  /// incremented by UF * VF (= the original IV is incremented by 1).
+  bool isCanonical() const;
 };
 
 /// A pure virtual base class for all recipes modeling header phis, including
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 2df547e960ba9..5296d2b9485cc 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -178,6 +178,7 @@ class VPValue {
   void replaceAllUsesWith(VPValue *New);
 
   VPDef *getDef() { return Def; }
+  const VPDef *getDef() const { return Def; }
 
   /// Returns the underlying IR value, if this VPValue is defined outside the
   /// scope of VPlan. Returns nullptr if the VPValue is defined by a VPDef
@@ -187,6 +188,11 @@ class VPValue {
            "VPValue is not a live-in; it is defined by a VPDef inside a VPlan");
     return getUnderlyingValue();
   }
+  const Value *getLiveInIRValue() const {
+    assert(!getDef() &&
+           "VPValue is not a live-in; it is defined by a VPDef inside a VPlan");
+    return getUnderlyingValue();
+  }
 };
 
 typedef DenseMap<Value *, VPValue *> Value2VPValueTy;

From 55689904d2e5afcc5309f7234d6369307ee305d0 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Fri, 21 Jan 2022 09:44:31 +0000
Subject: [PATCH 148/946] [VPlan] Move ::isCanonical outside ifdef.

This fixes a build failure with assertions disabled.
---
 llvm/lib/Transforms/Vectorize/VPlan.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 0ec2361390590..a96c122db2a97 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -1262,6 +1262,7 @@ void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
   } else
     O << " " << VPlanIngredient(IV);
 }
+#endif
 
 bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
   auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue());
@@ -1269,6 +1270,7 @@ bool VPWidenIntOrFpInductionRecipe::isCanonical() const {
   return StartC && StartC->isZero() && StepC && StepC->isOne();
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
                              VPSlotTracker &SlotTracker) const {
   O << Indent << "WIDEN-GEP ";

From 1f9e18b6565fd1bb69c4b649b9efd3467b3c7c7d Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Thu, 20 Jan 2022 11:21:47 +0100
Subject: [PATCH 149/946] [llvm] Remove (some) LLVMDemangle header dependencies

- Avoid using <iterator> for std::end on a plain array (using <array> instead)
- Avoid using <algorithm> for std::min and std::equal (using alternate logic and std::strcmp instead)

As an hint to the impact of the cleanup, running

clang++ -E  -Iinclude -I../llvm/include ../llvm/lib/Demangle/*.cpp -std=c++14 -fno-rtti -fno-exceptions | wc -l

before:  203965 lines
after:   169704 lines
---
 llvm/include/llvm/Demangle/ItaniumDemangle.h |  3 ++-
 llvm/include/llvm/Demangle/StringView.h      | 14 +++++++-------
 llvm/include/llvm/Demangle/Utility.h         |  9 +++++----
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index b25139d8a72ba..01f414a7257bf 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -21,12 +21,13 @@
 #include "DemangleConfig.h"
 #include "StringView.h"
 #include "Utility.h"
+#include <algorithm>
 #include <cassert>
 #include <cctype>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <numeric>
+#include <limits>
 #include <utility>
 
 #define FOR_EACH_NODE_KIND(X) \
diff --git a/llvm/include/llvm/Demangle/StringView.h b/llvm/include/llvm/Demangle/StringView.h
index 1e4d3803f06cd..7c8cb482ae1c1 100644
--- a/llvm/include/llvm/Demangle/StringView.h
+++ b/llvm/include/llvm/Demangle/StringView.h
@@ -14,7 +14,6 @@
 #define DEMANGLE_STRINGVIEW_H
 
 #include "DemangleConfig.h"
-#include <algorithm>
 #include <cassert>
 #include <cstring>
 
@@ -38,15 +37,16 @@ class StringView {
 
   StringView substr(size_t Pos, size_t Len = npos) const {
     assert(Pos <= size());
-    return StringView(begin() + Pos, std::min(Len, size() - Pos));
+    if (Len > size() - Pos)
+      Len = size() - Pos;
+    return StringView(begin() + Pos, Len);
   }
 
   size_t find(char C, size_t From = 0) const {
-    size_t FindBegin = std::min(From, size());
     // Avoid calling memchr with nullptr.
-    if (FindBegin < size()) {
+    if (From < size()) {
       // Just forward to memchr, which is faster than a hand-rolled loop.
-      if (const void *P = ::memchr(First + FindBegin, C, size() - FindBegin))
+      if (const void *P = ::memchr(First + From, C, size() - From))
         return size_t(static_cast<const char *>(P) - First);
     }
     return npos;
@@ -98,7 +98,7 @@ class StringView {
   bool startsWith(StringView Str) const {
     if (Str.size() > size())
       return false;
-    return std::equal(Str.begin(), Str.end(), begin());
+    return std::strncmp(Str.begin(), begin(), Str.size()) == 0;
   }
 
   const char &operator[](size_t Idx) const { return *(begin() + Idx); }
@@ -111,7 +111,7 @@ class StringView {
 
 inline bool operator==(const StringView &LHS, const StringView &RHS) {
   return LHS.size() == RHS.size() &&
-         std::equal(LHS.begin(), LHS.end(), RHS.begin());
+         std::strncmp(LHS.begin(), RHS.begin(), LHS.size()) == 0;
 }
 
 DEMANGLE_NAMESPACE_END
diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index 733d83ad1b6ba..b816aa22c24e0 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -14,10 +14,11 @@
 #define DEMANGLE_UTILITY_H
 
 #include "StringView.h"
+#include <array>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
-#include <iterator>
+#include <exception>
 #include <limits>
 
 DEMANGLE_NAMESPACE_BEGIN
@@ -48,8 +49,8 @@ class OutputBuffer {
       return;
     }
 
-    char Temp[21];
-    char *TempPtr = std::end(Temp);
+    std::array<char, 21> Temp;
+    char *TempPtr = Temp.end();
 
     while (N) {
       *--TempPtr = char('0' + N % 10);
@@ -59,7 +60,7 @@ class OutputBuffer {
     // Add negative sign...
     if (isNeg)
       *--TempPtr = '-';
-    this->operator<<(StringView(TempPtr, std::end(Temp)));
+    this->operator<<(StringView(TempPtr, Temp.end()));
   }
 
 public:

From e7762653d3b071dfb86d4e2d3bcf7c1455683d37 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 21 Jan 2022 11:19:54 +0100
Subject: [PATCH 150/946] [Attributor] Avoid some pointer element type accesses

---
 llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 0723402c19ee0..d0e13dc269385 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1216,7 +1216,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
         }
         UsrOI.Offset = PtrOI.Offset +
                        DL.getIndexedOffsetInType(
-                           CurPtr->getType()->getPointerElementType(), Indices);
+                           GEP->getSourceElementType(), Indices);
         Follow = true;
         return true;
       }
@@ -6650,9 +6650,10 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
     IRBuilder<NoFolder> IRB(IP);
     const DataLayout &DL = IP->getModule()->getDataLayout();
 
-    if (Base->getType()->getPointerElementType() != PrivType)
-      Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(),
-                                                 "", ACS.getInstruction());
+    Type *PrivPtrType = PrivType->getPointerTo();
+    if (Base->getType() != PrivPtrType)
+      Base = BitCastInst::CreateBitOrPointerCast(Base, PrivPtrType, "",
+                                                 ACS.getInstruction());
 
     // Traverse the type, build GEPs and loads.
     if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
@@ -6794,7 +6795,7 @@ struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl {
     if (auto *AI = dyn_cast<AllocaInst>(Obj))
       if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
         if (CI->isOne())
-          return Obj->getType()->getPointerElementType();
+          return AI->getAllocatedType();
     if (auto *Arg = dyn_cast<Argument>(Obj)) {
       auto &PrivArgAA = A.getAAFor<AAPrivatizablePtr>(
           *this, IRPosition::argument(*Arg), DepClassTy::REQUIRED);

From 065044c443f4041f32e0a8d6e633f9d92580fbca Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Fri, 21 Jan 2022 11:56:32 +0100
Subject: [PATCH 151/946] Fix 1f9e18b6565fd1bb69c4b649b9efd3467b3c7c7d

Don't assume iterator on std::array<char, ...> are char*, use .data() instead
---
 llvm/include/llvm/Demangle/Utility.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index b816aa22c24e0..989b41701e4c9 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -50,7 +50,7 @@ class OutputBuffer {
     }
 
     std::array<char, 21> Temp;
-    char *TempPtr = Temp.end();
+    char *TempPtr = Temp.data() + Temp.size();
 
     while (N) {
       *--TempPtr = char('0' + N % 10);

From 329feeb938ac63602136bcb3c5ec3a64109be94c Mon Sep 17 00:00:00 2001
From: Sameer Rahmani <lxsameer@gnu.org>
Date: Fri, 21 Jan 2022 21:27:55 +1100
Subject: [PATCH 152/946] [ORC][docs] Describe removing JITDylibs, using custom
 program representations.

Add documentation around:
* Removing JITDylib from the session
* Add support for custom program representation

Reviewed By: lhames

Differential Revision: https://reviews.llvm.org/D116476
---
 llvm/docs/ORCv2.rst | 92 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 92 insertions(+)

diff --git a/llvm/docs/ORCv2.rst b/llvm/docs/ORCv2.rst
index ec372f575b195..8012820d60e52 100644
--- a/llvm/docs/ORCv2.rst
+++ b/llvm/docs/ORCv2.rst
@@ -579,6 +579,98 @@ calling the ``ExecutionSession::createJITDylib`` method with a unique name:
 The JITDylib is owned by the ``ExecutionEngine`` instance and will be freed
 when it is destroyed.
 
+How to remove a JITDylib
+------------------------
+JITDylibs can be removed completely by calling  ``ExecutionSession::removeJITDylib``.
+Calling that function will close the give JITDylib and clear all the resources held for
+it. No code can be added to a closed JITDylib.
+
+Please note that closing a JITDylib won't update any pointers, you are responsible for
+ensuring that any code/data contained in the JITDylib is no longer in use.
+
+Also You can use a custom resource tracker to remove individual modules from a JITDylib.
+
+How to add the support for custom program representation
+--------------------------------------------------------
+In order to add the support for a custom program representation, a custom ``MaterializationUnit``
+for the program representation, and a custom ``Layer`` are needed. The Layer will have two
+operations: ``add`` and ``emit``. The ``add`` operation takes an instance of your program
+representation, builds one of your custom ``MaterializationUnits`` to hold it, then adds it
+to a ``JITDylib``. The emit operation takes a ``MaterializationResponsibility`` object and an
+instance of your program representation and materializes it, usually by compiling it and handing
+the resulting object off to an ``ObjectLinkingLayer``.
+
+Your custom ``MaterializationUnit`` will have two operations: ``materialize`` and ``discard``. The
+``materialize`` function will be called for you when any symbol provided by the unit is looked up,
+and it should just call the ``emit`` function on your layer, passing in the given
+``MaterializationResponsibility`` and the wrapped program representation. The ``discard`` function
+will be called if some weak symbol provided by your unit is not needed (because the JIT found an
+overriding definition). You can use this to drop your definition early, or just ignore it and let
+the linker drops the definition later.
+
+Here is an example of an ASTLayer:
+
+  .. code-block:: c++
+
+    // ... In you JIT class
+    AstLayer astLayer;
+    // ...
+
+
+    class AstMaterializationUnit : public orc::MaterializationUnit {
+    public:
+      AstMaterializationUnit(AstLayer &l, Ast &ast)
+      : llvm::orc::MaterializationUnit(l.getInterface(ast)), astLayer(l),
+      ast(ast) {};
+
+      llvm::StringRef getName() const override {
+        return "AstMaterializationUnit";
+      }
+
+      void materialize(std::unique_ptr<orc::MaterializationResponsibility> r) override {
+        astLayer.emit(std::move(r), ast);
+      };
+
+    private:
+      void discard(const llvm::orc::JITDylib &jd, const llvm::orc::SymbolStringPtr &sym) override {
+        llvm_unreachable("functions are not overridable");
+      }
+
+
+      AstLayer &astLayer;
+      Ast &ast;
+    };
+
+    class AstLayer {
+      llvhm::orc::IRLayer &baseLayer;
+      llvhm::orc::MangleAndInterner &mangler;
+
+    public:
+      AstLayer(llvm::orc::IRLayer &baseLayer, llvm::orc::MangleAndInterner &mangler)
+      : baseLayer(baseLayer), mangler(mangler){};
+
+      llvm::Error add(llvm::orc::ResourceTrackerSP &rt, Ast &ast) {
+        return rt->getJITDylib().define(std::make_unique<AstMaterializationUnit>(*this, ast), rt);
+      }
+
+      void emit(std::unique_ptr<orc::MaterializationResponsibility> mr, Ast &ast) {
+        // compileAst is just function that compiles the given AST and returns
+        // a `llvm::orc::ThreadSafeModule`
+        baseLayer.emit(std::move(mr), compileAst(ast));
+      }
+
+      llvm::orc::MaterializationUnit::Interface getInterface(Ast &ast) {
+          SymbolFlagsMap Symbols;
+          // Find all the symbols in the AST and for each of them
+          // add it to the Symbols map.
+          Symbols[mangler(someNameFromAST)] =
+            JITSymbolFlags(JITSymbolFlags::Exported | JITSymbolFlags::Callable);
+          return MaterializationUnit::Interface(std::move(Symbols), nullptr);
+      }
+    };
+
+Take look at the source code of `Building A JIT's Chapter 4 <tutorial/BuildingAJIT4.html>`_ for a complete example.
+
 How to use ThreadSafeModule and ThreadSafeContext
 -------------------------------------------------
 

From b351ac3873db15b16c2aa6d1e0e08ff9fab44f1f Mon Sep 17 00:00:00 2001
From: Sebastian Neubauer <Sebastian.Neubauer@amd.com>
Date: Mon, 20 Dec 2021 14:25:16 +0100
Subject: [PATCH 153/946] [AMDGPU][NFC] Regenerate InstCombine test

---
 .../InstCombine/AMDGPU/amdgcn-intrinsics.ll   | 306 +++++++++---------
 1 file changed, 153 insertions(+), 153 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index b9fba55e5026b..a6ddcdd0a4a0e 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -66,7 +66,7 @@ define double @test_constant_fold_rcp_f64_43() nounwind {
 
 define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp {
 ; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) [[ATTR11:#.*]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR12:[0-9]+]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone
@@ -1662,7 +1662,7 @@ define i64 @icmp_constant_inputs_false() {
 
 define i64 @icmp_constant_inputs_true() {
 ; CHECK-LABEL: @icmp_constant_inputs_true(
-; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12:#.*]]
+; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR13:[0-9]+]]
 ; CHECK-NEXT:    ret i64 [[RESULT]]
 ;
   %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)
@@ -2369,7 +2369,7 @@ define i64 @fcmp_constant_inputs_false() {
 
 define i64 @fcmp_constant_inputs_true() {
 ; CHECK-LABEL: @fcmp_constant_inputs_true(
-; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12]]
+; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR13]]
 ; CHECK-NEXT:    ret i64 [[RESULT]]
 ;
   %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4)
@@ -2411,7 +2411,7 @@ define i64 @ballot_zero_64() {
 
 define i64 @ballot_one_64() {
 ; CHECK-LABEL: @ballot_one_64(
-; CHECK-NEXT:    [[B:%.*]] = call i64 @llvm.read_register.i64(metadata !0) [[ATTR12]]
+; CHECK-NEXT:    [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR13]]
 ; CHECK-NEXT:    ret i64 [[B]]
 ;
   %b = call i64 @llvm.amdgcn.ballot.i64(i1 1)
@@ -2437,7 +2437,7 @@ define i32 @ballot_zero_32() {
 
 define i32 @ballot_one_32() {
 ; CHECK-LABEL: @ballot_one_32(
-; CHECK-NEXT:    [[B:%.*]] = call i32 @llvm.read_register.i32(metadata !1) [[ATTR12]]
+; CHECK-NEXT:    [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR13]]
 ; CHECK-NEXT:    ret i32 [[B]]
 ;
   %b = call i32 @llvm.amdgcn.ballot.i32(i1 1)
@@ -2861,8 +2861,8 @@ declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i
 
 define amdgpu_kernel void @image_sample_a16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -2873,8 +2873,8 @@ define amdgpu_kernel void @image_sample_a16_1d(<4 x float> addrspace(1)* %out, <
 
 define amdgpu_kernel void @image_sample_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -2886,8 +2886,8 @@ define amdgpu_kernel void @image_sample_a16_2d(<4 x float> addrspace(1)* %out, <
 
 define amdgpu_kernel void @image_sample_a16_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
 ; CHECK-LABEL: @image_sample_a16_3d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -2901,8 +2901,8 @@ define amdgpu_kernel void @image_sample_a16_3d(<4 x float> addrspace(1)* %out, <
 define amdgpu_kernel void @image_sample_a16_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
 ;
 ; CHECK-LABEL: @image_sample_a16_cube(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -2915,8 +2915,8 @@ define amdgpu_kernel void @image_sample_a16_cube(<4 x float> addrspace(1)* %out,
 
 define amdgpu_kernel void @image_sample_a16_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_1darray(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -2928,8 +2928,8 @@ define amdgpu_kernel void @image_sample_a16_1darray(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_2darray(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -2942,8 +2942,8 @@ define amdgpu_kernel void @image_sample_a16_2darray(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_c_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -2954,8 +2954,8 @@ define amdgpu_kernel void @image_sample_a16_c_1d(<4 x float> addrspace(1)* %out,
 
 define amdgpu_kernel void @image_sample_a16_c_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -2967,8 +2967,8 @@ define amdgpu_kernel void @image_sample_a16_c_2d(<4 x float> addrspace(1)* %out,
 
 define amdgpu_kernel void @image_sample_a16_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_cl_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -2980,8 +2980,8 @@ define amdgpu_kernel void @image_sample_a16_cl_1d(<4 x float> addrspace(1)* %out
 
 define amdgpu_kernel void @image_sample_a16_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_cl_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -2994,8 +2994,8 @@ define amdgpu_kernel void @image_sample_a16_cl_2d(<4 x float> addrspace(1)* %out
 
 define amdgpu_kernel void @image_sample_a16_c_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_cl_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3007,8 +3007,8 @@ define amdgpu_kernel void @image_sample_a16_c_cl_1d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_c_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_cl_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3021,8 +3021,8 @@ define amdgpu_kernel void @image_sample_a16_c_cl_2d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
 ; CHECK-LABEL: @image_sample_a16_b_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3033,8 +3033,8 @@ define amdgpu_kernel void @image_sample_a16_b_1d(<4 x float> addrspace(1)* %out,
 
 define amdgpu_kernel void @image_sample_a16_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_b_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3046,8 +3046,8 @@ define amdgpu_kernel void @image_sample_a16_b_2d(<4 x float> addrspace(1)* %out,
 
 define amdgpu_kernel void @image_sample_a16_c_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_b_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3058,8 +3058,8 @@ define amdgpu_kernel void @image_sample_a16_c_b_1d(<4 x float> addrspace(1)* %ou
 
 define amdgpu_kernel void @image_sample_a16_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_b_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3071,8 +3071,8 @@ define amdgpu_kernel void @image_sample_a16_c_b_2d(<4 x float> addrspace(1)* %ou
 
 define amdgpu_kernel void @image_sample_a16_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_b_cl_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3084,8 +3084,8 @@ define amdgpu_kernel void @image_sample_a16_b_cl_1d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_b_cl_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3098,8 +3098,8 @@ define amdgpu_kernel void @image_sample_a16_b_cl_2d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_c_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_b_cl_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3111,8 +3111,8 @@ define amdgpu_kernel void @image_sample_a16_c_b_cl_1d(<4 x float> addrspace(1)*
 
 define amdgpu_kernel void @image_sample_a16_c_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_b_cl_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3125,8 +3125,8 @@ define amdgpu_kernel void @image_sample_a16_c_b_cl_2d(<4 x float> addrspace(1)*
 
 define amdgpu_kernel void @image_sample_a16_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
 ; CHECK-LABEL: @image_sample_a16_d_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3139,8 +3139,8 @@ define amdgpu_kernel void @image_sample_a16_d_1d(<4 x float> addrspace(1)* %out,
 
 define amdgpu_kernel void @image_sample_a16_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_d_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3156,8 +3156,8 @@ define amdgpu_kernel void @image_sample_a16_d_2d(<4 x float> addrspace(1)* %out,
 
 define amdgpu_kernel void @image_sample_a16_d_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
 ; CHECK-LABEL: @image_sample_a16_d_3d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3176,8 +3176,8 @@ define amdgpu_kernel void @image_sample_a16_d_3d(<4 x float> addrspace(1)* %out,
 
 define amdgpu_kernel void @image_sample_a16_c_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_d_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3190,8 +3190,8 @@ define amdgpu_kernel void @image_sample_a16_c_d_1d(<4 x float> addrspace(1)* %ou
 
 define amdgpu_kernel void @image_sample_a16_c_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_d_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3207,8 +3207,8 @@ define amdgpu_kernel void @image_sample_a16_c_d_2d(<4 x float> addrspace(1)* %ou
 
 define amdgpu_kernel void @image_sample_a16_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_d_cl_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3222,8 +3222,8 @@ define amdgpu_kernel void @image_sample_a16_d_cl_1d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_d_cl_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3240,8 +3240,8 @@ define amdgpu_kernel void @image_sample_a16_d_cl_2d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_d_cl_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3255,8 +3255,8 @@ define amdgpu_kernel void @image_sample_a16_c_d_cl_1d(<4 x float> addrspace(1)*
 
 define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_d_cl_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3273,8 +3273,8 @@ define amdgpu_kernel void @image_sample_a16_c_d_cl_2d(<4 x float> addrspace(1)*
 
 define amdgpu_kernel void @image_sample_a16_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
 ; CHECK-LABEL: @image_sample_a16_cd_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3287,8 +3287,8 @@ define amdgpu_kernel void @image_sample_a16_cd_1d(<4 x float> addrspace(1)* %out
 
 define amdgpu_kernel void @image_sample_a16_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_cd_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3304,8 +3304,8 @@ define amdgpu_kernel void @image_sample_a16_cd_2d(<4 x float> addrspace(1)* %out
 
 define amdgpu_kernel void @image_sample_a16_c_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_cd_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3318,8 +3318,8 @@ define amdgpu_kernel void @image_sample_a16_c_cd_1d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_c_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_cd_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3335,8 +3335,8 @@ define amdgpu_kernel void @image_sample_a16_c_cd_2d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_cd_cl_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3350,8 +3350,8 @@ define amdgpu_kernel void @image_sample_a16_cd_cl_1d(<4 x float> addrspace(1)* %
 
 define amdgpu_kernel void @image_sample_a16_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_cd_cl_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3368,8 +3368,8 @@ define amdgpu_kernel void @image_sample_a16_cd_cl_2d(<4 x float> addrspace(1)* %
 
 define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_cd_cl_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3383,8 +3383,8 @@ define amdgpu_kernel void @image_sample_a16_c_cd_cl_1d(<4 x float> addrspace(1)*
 
 define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
 ; CHECK-LABEL: @image_sample_a16_c_cd_cl_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f16(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3401,8 +3401,8 @@ define amdgpu_kernel void @image_sample_a16_c_cd_cl_2d(<4 x float> addrspace(1)*
 
 define amdgpu_kernel void @image_sample_a16_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
 ; CHECK-LABEL: @image_sample_a16_l_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3414,8 +3414,8 @@ define amdgpu_kernel void @image_sample_a16_l_1d(<4 x float> addrspace(1)* %out,
 
 define amdgpu_kernel void @image_sample_a16_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
 ; CHECK-LABEL: @image_sample_a16_l_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3428,8 +3428,8 @@ define amdgpu_kernel void @image_sample_a16_l_2d(<4 x float> addrspace(1)* %out,
 
 define amdgpu_kernel void @image_sample_a16_c_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
 ; CHECK-LABEL: @image_sample_a16_c_l_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3441,8 +3441,8 @@ define amdgpu_kernel void @image_sample_a16_c_l_1d(<4 x float> addrspace(1)* %ou
 
 define amdgpu_kernel void @image_sample_a16_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
 ; CHECK-LABEL: @image_sample_a16_c_l_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3455,8 +3455,8 @@ define amdgpu_kernel void @image_sample_a16_c_l_2d(<4 x float> addrspace(1)* %ou
 
 define amdgpu_kernel void @image_sample_a16_lz_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_lz_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3467,8 +3467,8 @@ define amdgpu_kernel void @image_sample_a16_lz_1d(<4 x float> addrspace(1)* %out
 
 define amdgpu_kernel void @image_sample_a16_lz_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_lz_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3480,8 +3480,8 @@ define amdgpu_kernel void @image_sample_a16_lz_2d(<4 x float> addrspace(1)* %out
 
 define amdgpu_kernel void @image_sample_a16_c_lz_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s) {
 ; CHECK-LABEL: @image_sample_a16_c_lz_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3492,8 +3492,8 @@ define amdgpu_kernel void @image_sample_a16_c_lz_1d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_c_lz_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_c_lz_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f16(i32 15, float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3505,8 +3505,8 @@ define amdgpu_kernel void @image_sample_a16_c_lz_2d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(float addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V1(
-; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store float [[TMP1]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f16(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store float [[RES]], float addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3523,8 +3523,8 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V1(float addrspace(1)*
 
 define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_V2(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <2 x float> [[TMP1]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <2 x float> [[RES]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3545,8 +3545,8 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(<2 x float> addrspa
 
 define amdgpu_kernel void @image_sample_g16_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
 ; CHECK-LABEL: @image_sample_g16_d_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3558,8 +3558,8 @@ define amdgpu_kernel void @image_sample_g16_d_1d(<4 x float> addrspace(1)* %out,
 
 define amdgpu_kernel void @image_sample_g16_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @image_sample_g16_d_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3573,8 +3573,8 @@ define amdgpu_kernel void @image_sample_g16_d_2d(<4 x float> addrspace(1)* %out,
 
 define amdgpu_kernel void @image_sample_g16_d_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
 ; CHECK-LABEL: @image_sample_g16_d_3d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DRDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[DRDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3590,8 +3590,8 @@ define amdgpu_kernel void @image_sample_g16_d_3d(<4 x float> addrspace(1)* %out,
 
 define amdgpu_kernel void @image_sample_g16_c_d_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
 ; CHECK-LABEL: @image_sample_g16_c_d_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3603,8 +3603,8 @@ define amdgpu_kernel void @image_sample_g16_c_d_1d(<4 x float> addrspace(1)* %ou
 
 define amdgpu_kernel void @image_sample_g16_c_d_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @image_sample_g16_c_d_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3618,8 +3618,8 @@ define amdgpu_kernel void @image_sample_g16_c_d_2d(<4 x float> addrspace(1)* %ou
 
 define amdgpu_kernel void @image_sample_g16_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_d_cl_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3631,8 +3631,8 @@ define amdgpu_kernel void @image_sample_g16_d_cl_1d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_g16_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_d_cl_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3646,8 +3646,8 @@ define amdgpu_kernel void @image_sample_g16_d_cl_2d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_g16_c_d_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_c_d_cl_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3659,8 +3659,8 @@ define amdgpu_kernel void @image_sample_g16_c_d_cl_1d(<4 x float> addrspace(1)*
 
 define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_c_d_cl_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3674,8 +3674,8 @@ define amdgpu_kernel void @image_sample_g16_c_d_cl_2d(<4 x float> addrspace(1)*
 
 define amdgpu_kernel void @image_sample_g16_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
 ; CHECK-LABEL: @image_sample_g16_cd_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3687,8 +3687,8 @@ define amdgpu_kernel void @image_sample_g16_cd_1d(<4 x float> addrspace(1)* %out
 
 define amdgpu_kernel void @image_sample_g16_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @image_sample_g16_cd_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3702,8 +3702,8 @@ define amdgpu_kernel void @image_sample_g16_cd_2d(<4 x float> addrspace(1)* %out
 
 define amdgpu_kernel void @image_sample_g16_c_cd_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
 ; CHECK-LABEL: @image_sample_g16_c_cd_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3715,8 +3715,8 @@ define amdgpu_kernel void @image_sample_g16_c_cd_1d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_g16_c_cd_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
 ; CHECK-LABEL: @image_sample_g16_c_cd_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3730,8 +3730,8 @@ define amdgpu_kernel void @image_sample_g16_c_cd_2d(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_g16_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_cd_cl_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3743,8 +3743,8 @@ define amdgpu_kernel void @image_sample_g16_cd_cl_1d(<4 x float> addrspace(1)* %
 
 define amdgpu_kernel void @image_sample_g16_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_cd_cl_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3758,8 +3758,8 @@ define amdgpu_kernel void @image_sample_g16_cd_cl_2d(<4 x float> addrspace(1)* %
 
 define amdgpu_kernel void @image_sample_g16_c_cd_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_c_cd_cl_1d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3771,8 +3771,8 @@ define amdgpu_kernel void @image_sample_g16_c_cd_cl_1d(<4 x float> addrspace(1)*
 
 define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
 ; CHECK-LABEL: @image_sample_g16_c_cd_cl_2d(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3786,8 +3786,8 @@ define amdgpu_kernel void @image_sample_g16_c_cd_cl_2d(<4 x float> addrspace(1)*
 
 define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(float addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
 ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V1(
-; CHECK-NEXT:    [[TMP1:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store float [[TMP1]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store float [[RES]], float addrspace(1)* [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3801,8 +3801,8 @@ define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V1(float addrspace(1)*
 
 define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
 ; CHECK-LABEL: @image_sample_g16_c_d_o_2darray_V2(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <2 x float> [[TMP1]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <2 x float> [[RES]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
 ; CHECK-NEXT:    ret void
 ;
   %dsdh32 = fpext half %dsdh to float
@@ -3820,8 +3820,8 @@ define amdgpu_kernel void @image_sample_g16_c_d_o_2darray_V2(<2 x float> addrspa
 
 define amdgpu_kernel void @image_sample_a16_1d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_1d_nnan(
-; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3832,8 +3832,8 @@ define amdgpu_kernel void @image_sample_a16_1d_nnan(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_1d_nnan_ninf_nsz(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_1d_nnan_ninf_nsz(
-; CHECK-NEXT:    [[TMP1:%.*]] = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call nnan ninf nsz <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3844,8 +3844,8 @@ define amdgpu_kernel void @image_sample_a16_1d_nnan_ninf_nsz(<4 x float> addrspa
 
 define amdgpu_kernel void @image_sample_a16_1d_fast(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
 ; CHECK-LABEL: @image_sample_a16_1d_fast(
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call fast <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3856,8 +3856,8 @@ define amdgpu_kernel void @image_sample_a16_1d_fast(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_2d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t) {
 ; CHECK-LABEL: @image_sample_a16_2d_nnan(
-; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3869,8 +3869,8 @@ define amdgpu_kernel void @image_sample_a16_2d_nnan(<4 x float> addrspace(1)* %o
 
 define amdgpu_kernel void @image_sample_a16_3d_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %r) {
 ; CHECK-LABEL: @image_sample_a16_3d_nnan(
-; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[R:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3884,8 +3884,8 @@ define amdgpu_kernel void @image_sample_a16_3d_nnan(<4 x float> addrspace(1)* %o
 define amdgpu_kernel void @image_sample_a16_cube_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %face) {
 ;
 ; CHECK-LABEL: @image_sample_a16_cube_nnan(
-; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[FACE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3898,8 +3898,8 @@ define amdgpu_kernel void @image_sample_a16_cube_nnan(<4 x float> addrspace(1)*
 
 define amdgpu_kernel void @image_sample_a16_1darray_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_1darray_nnan(
-; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f16(i32 15, half [[S:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float
@@ -3911,8 +3911,8 @@ define amdgpu_kernel void @image_sample_a16_1darray_nnan(<4 x float> addrspace(1
 
 define amdgpu_kernel void @image_sample_a16_2darray_nnan(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %slice) {
 ; CHECK-LABEL: @image_sample_a16_2darray_nnan(
-; CHECK-NEXT:    [[TMP1:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
-; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    [[RES:%.*]] = call nnan <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f16(i32 15, half [[S:%.*]], half [[T:%.*]], half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
   %s32 = fpext half %s to float

From 0530fdbbbb84ea3024a4a8f7156ff716f00ffd48 Mon Sep 17 00:00:00 2001
From: Sebastian Neubauer <Sebastian.Neubauer@amd.com>
Date: Mon, 20 Dec 2021 15:11:01 +0100
Subject: [PATCH 154/946] [AMDGPU] Fix LOD bias in A16 combine

As the codegen fix in D111754, the LOD bias needs to be converted to 16
bits. Fix this in the combine.

Differential Revision: https://reviews.llvm.org/D116038
---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      |  23 ++-
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     |  31 ++-
 llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h      |   7 +
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |   5 +-
 llvm/lib/Target/AMDGPU/MIMGInstructions.td    |  14 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  12 +-
 .../InstCombine/AMDGPU/amdgcn-intrinsics.ll   | 176 +++++++++++++++---
 7 files changed, 229 insertions(+), 39 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 861545b445a33..c5d266eb57ecf 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -558,6 +558,9 @@ class AMDGPUSampleVariant<string ucmod, string lcmod, list<AMDGPUArg> extra_addr
 
   // {offset} {bias} {z-compare}
   list<AMDGPUArg> ExtraAddrArgs = extra_addr;
+  bit Offset = false;
+  bit Bias = false;
+  bit ZCompare = false;
   bit Gradients = false;
 
   // Name of the {lod} or {clamp} argument that is appended to the coordinates,
@@ -571,6 +574,7 @@ defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
   multiclass AMDGPUSampleHelper_Offset<string ucmod, string lcmod,
                                        list<AMDGPUArg> extra_addr> {
     def NAME#lcmod : AMDGPUSampleVariant<ucmod, lcmod, extra_addr>;
+    let Offset = true in
     def NAME#lcmod#_o : AMDGPUSampleVariant<
         ucmod#"_O", lcmod#"_o", !listconcat([AMDGPUArg<llvm_i32_ty, "offset">], extra_addr)>;
   }
@@ -578,6 +582,7 @@ defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
   multiclass AMDGPUSampleHelper_Compare<string ucmod, string lcmod,
                                         list<AMDGPUArg> extra_addr> {
     defm NAME : AMDGPUSampleHelper_Offset<ucmod, lcmod, extra_addr>;
+    let ZCompare = true in
     defm NAME : AMDGPUSampleHelper_Offset<
         "_C"#ucmod, "_c"#lcmod, !listconcat(extra_addr, [AMDGPUArg<llvm_float_ty, "zcompare">])>;
   }
@@ -591,6 +596,7 @@ defset list<AMDGPUSampleVariant> AMDGPUSampleVariants = {
 
   defset list<AMDGPUSampleVariant> AMDGPUSampleVariantsNoGradients = {
     defm AMDGPUSample : AMDGPUSampleHelper_Clamp<"", "", []>;
+    let Bias = true in
     defm AMDGPUSample : AMDGPUSampleHelper_Clamp<
         "_B", "_b", [AMDGPUArg<llvm_anyfloat_ty, "bias">]>;
     let LodOrClamp = "lod" in
@@ -618,6 +624,9 @@ class AMDGPUDimProfile<string opmod,
   list<LLVMType> RetTypes = [];
   list<AMDGPUArg> DataArgs = [];
   list<AMDGPUArg> ExtraAddrArgs = [];
+  bit Offset = false;
+  bit Bias = false;
+  bit ZCompare = false;
   bit Gradients = false;
   string LodClampMip = "";
 
@@ -652,6 +661,9 @@ class AMDGPUDimProfileCopy<AMDGPUDimProfile base> : AMDGPUDimProfile<base.OpMod,
   let RetTypes = base.RetTypes;
   let DataArgs = base.DataArgs;
   let ExtraAddrArgs = base.ExtraAddrArgs;
+  let Offset = base.Offset;
+  let Bias = base.Bias;
+  let ZCompare = base.ZCompare;
   let Gradients = base.Gradients;
   let LodClampMip = base.LodClampMip;
 }
@@ -662,6 +674,9 @@ class AMDGPUDimSampleProfile<string opmod,
   let IsSample = true;
   let RetTypes = [llvm_any_ty];
   let ExtraAddrArgs = sample.ExtraAddrArgs;
+  let Offset = sample.Offset;
+  let Bias = sample.Bias;
+  let ZCompare = sample.ZCompare;
   let Gradients = sample.Gradients;
   let LodClampMip = sample.LodOrClamp;
 }
@@ -702,7 +717,10 @@ class AMDGPUDimGetResInfoProfile<AMDGPUDimProps dim>
 class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> {
   int NumDataArgs = !size(P_.DataArgs);
   int NumDmaskArgs = !not(P_.IsAtomic);
-  int NumExtraAddrArgs = !size(P_.ExtraAddrArgs);
+  int NumOffsetArgs = !if(P_.Offset, 1, 0);
+  int NumBiasArgs = !if(P_.Bias, 1, 0);
+  int NumZCompareArgs = !if(P_.ZCompare, 1, 0);
+  int NumExtraAddrArgs = !add(NumOffsetArgs, NumBiasArgs, NumZCompareArgs);
   int NumVAddrArgs = !size(P_.AddrArgs);
   int NumGradientArgs = !if(P_.Gradients, !size(P_.Dim.GradientArgs), 0);
   int NumCoordArgs = !if(P_.IsSample, !size(P_.Dim.CoordSliceArgs), !size(P_.Dim.CoordSliceIntArgs));
@@ -710,6 +728,9 @@ class AMDGPUImageDimIntrinsicEval<AMDGPUDimProfile P_> {
   int NumSampArgs = !if(P_.IsSample, 2, 0);
   int DmaskArgIndex = NumDataArgs;
   int VAddrArgIndex = !add(DmaskArgIndex, NumDmaskArgs);
+  int OffsetArgIndex = VAddrArgIndex;
+  int BiasArgIndex = !add(VAddrArgIndex, NumOffsetArgs);
+  int ZCompareArgIndex = !add(BiasArgIndex, NumBiasArgs);
   int GradientArgIndex = !add(VAddrArgIndex, NumExtraAddrArgs);
   int CoordArgIndex = !add(GradientArgIndex, NumGradientArgs);
   int LodArgIndex = !add(VAddrArgIndex, NumVAddrArgs, -1);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index db84b87669241..5eb7cf89abb24 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -127,14 +127,20 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
     FloatCoord = Coord->getType()->isFloatingPointTy();
   }
 
-  if (OnlyDerivatives) {
-    if (!ST->hasG16())
-      return None;
-  } else {
-    if (!ST->hasA16())
-      OnlyDerivatives = true; // Only supports G16
+  if (!OnlyDerivatives && !ST->hasA16())
+    OnlyDerivatives = true; // Only supports G16
+
+  // Check if there is a bias parameter and if it can be converted to f16
+  if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
+    Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
+    if (!canSafelyConvertTo16Bit(*Bias))
+      OnlyDerivatives = true;
   }
 
+  if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
+                                               ImageDimIntr->CoordStart))
+    return None;
+
   Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
                                : Type::getInt16Ty(II.getContext());
 
@@ -143,8 +149,13 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
     return None;
 
   ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
-  if (!OnlyDerivatives)
+  if (!OnlyDerivatives) {
     ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
+
+    // Change the bias type
+    if (ImageDimIntr->NumBiasArgs != 0)
+      ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
+  }
   Function *I =
       Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys);
 
@@ -158,6 +169,12 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
         convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
   }
 
+  // Convert the bias
+  if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
+    Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
+    Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
+  }
+
   CallInst *NewCall = IC.Builder.CreateCall(I, Args);
   NewCall->takeName(&II);
   NewCall->copyMetadata(II);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index 673011f48289e..e7ee364476824 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -49,6 +49,9 @@ struct ImageDimIntrinsicInfo {
   unsigned BaseOpcode;
   MIMGDim Dim;
 
+  uint8_t NumOffsetArgs;
+  uint8_t NumBiasArgs;
+  uint8_t NumZCompareArgs;
   uint8_t NumGradients;
   uint8_t NumDmask;
   uint8_t NumData;
@@ -57,6 +60,9 @@ struct ImageDimIntrinsicInfo {
 
   uint8_t DMaskIndex;
   uint8_t VAddrStart;
+  uint8_t OffsetIndex;
+  uint8_t BiasIndex;
+  uint8_t ZCompareIndex;
   uint8_t GradientStart;
   uint8_t CoordStart;
   uint8_t LodIndex;
@@ -68,6 +74,7 @@ struct ImageDimIntrinsicInfo {
   uint8_t TexFailCtrlIndex;
   uint8_t CachePolicyIndex;
 
+  uint8_t BiasTyArg;
   uint8_t GradientTyArg;
   uint8_t CoordTyArg;
 };
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 20b2b0f1be0ce..5092e0f553e2a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4273,15 +4273,18 @@ static void packImage16bitOpsToDwords(MachineIRBuilder &B, MachineInstr &MI,
     if ((I < Intr->GradientStart) ||
         (I >= Intr->GradientStart && I < Intr->CoordStart && !IsG16) ||
         (I >= Intr->CoordStart && !IsA16)) {
-      // Handle any gradient or coordinate operands that should not be packed
       if ((I < Intr->GradientStart) && IsA16 &&
           (B.getMRI()->getType(AddrReg) == S16)) {
+        assert(I == Intr->BiasIndex && "Got unexpected 16-bit extra argument");
         // Special handling of bias when A16 is on. Bias is of type half but
         // occupies full 32-bit.
         PackedAddrs.push_back(
             B.buildBuildVector(V2S16, {AddrReg, B.buildUndef(S16).getReg(0)})
                 .getReg(0));
       } else {
+        assert((!IsA16 || Intr->NumBiasArgs == 0 || I != Intr->BiasIndex) &&
+               "Bias needs to be converted to 16 bit in A16 mode");
+        // Handle any gradient or coordinate operands that should not be packed
         AddrReg = B.buildBitcast(V2S16, AddrReg).getReg(0);
         PackedAddrs.push_back(AddrReg);
       }
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 6dd886367302a..1d8a558359378 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -1070,6 +1070,9 @@ class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
   AMDGPUDimProps Dim = I.P.Dim;
   AMDGPUImageDimIntrinsicEval DimEval = AMDGPUImageDimIntrinsicEval<I.P>;
 
+  bits<8> NumOffsetArgs = DimEval.NumOffsetArgs;
+  bits<8> NumBiasArgs = DimEval.NumBiasArgs;
+  bits<8> NumZCompareArgs = DimEval.NumZCompareArgs;
   bits<8> NumGradients = DimEval.NumGradientArgs;
   bits<8> NumDmask = DimEval.NumDmaskArgs;
   bits<8> NumData = DimEval.NumDataArgs;
@@ -1078,6 +1081,9 @@ class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
 
   bits<8> DMaskIndex = DimEval.DmaskArgIndex;
   bits<8> VAddrStart = DimEval.VAddrArgIndex;
+  bits<8> OffsetIndex = DimEval.OffsetArgIndex;
+  bits<8> BiasIndex = DimEval.BiasArgIndex;
+  bits<8> ZCompareIndex = DimEval.ZCompareArgIndex;
   bits<8> GradientStart = DimEval.GradientArgIndex;
   bits<8> CoordStart = DimEval.CoordArgIndex;
   bits<8> LodIndex = DimEval.LodArgIndex;
@@ -1089,6 +1095,8 @@ class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
   bits<8> TexFailCtrlIndex = DimEval.TexFailCtrlArgIndex;
   bits<8> CachePolicyIndex = DimEval.CachePolicyArgIndex;
 
+  bits<8> BiasTyArg = !add(I.P.NumRetAndDataAnyTypes,
+    !if(!eq(NumOffsetArgs, 0), 0, I.P.ExtraAddrArgs[0].Type.isAny));
   bits<8> GradientTyArg = !add(I.P.NumRetAndDataAnyTypes,
     !foldl(0, I.P.ExtraAddrArgs, cnt, arg, !add(cnt, arg.Type.isAny)));
   bits<8> CoordTyArg = !add(GradientTyArg, !if(I.P.Gradients, 1, 0));
@@ -1096,10 +1104,10 @@ class ImageDimIntrinsicInfo<AMDGPUImageDimIntrinsic I> {
 
 def ImageDimIntrinsicTable : GenericTable {
   let FilterClass = "ImageDimIntrinsicInfo";
-  let Fields = ["Intr", "BaseOpcode", "Dim", "NumGradients", "NumDmask", "NumData", "NumVAddrs", "NumArgs",
-    "DMaskIndex", "VAddrStart", "GradientStart", "CoordStart", "LodIndex", "MipIndex", "VAddrEnd",
+  let Fields = ["Intr", "BaseOpcode", "Dim", "NumOffsetArgs", "NumBiasArgs", "NumZCompareArgs", "NumGradients", "NumDmask", "NumData", "NumVAddrs", "NumArgs",
+    "DMaskIndex", "VAddrStart", "OffsetIndex", "BiasIndex", "ZCompareIndex", "GradientStart", "CoordStart", "LodIndex", "MipIndex", "VAddrEnd",
     "RsrcIndex", "SampIndex", "UnormIndex", "TexFailCtrlIndex", "CachePolicyIndex",
-    "GradientTyArg", "CoordTyArg"];
+    "BiasTyArg", "GradientTyArg", "CoordTyArg"];
   string TypeOf_BaseOpcode = "MIMGBaseOpcode";
   string TypeOf_Dim = "MIMGDim";
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 5176ba44afad6..26229b40f4dc5 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6316,12 +6316,18 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
   // Push back extra arguments.
   for (unsigned I = Intr->VAddrStart; I < Intr->GradientStart; I++) {
     if (IsA16 && (Op.getOperand(ArgOffset + I).getValueType() == MVT::f16)) {
+      assert(I == Intr->BiasIndex && "Got unexpected 16-bit extra argument");
       // Special handling of bias when A16 is on. Bias is of type half but
       // occupies full 32-bit.
-      SDValue bias = DAG.getBuildVector( MVT::v2f16, DL, {Op.getOperand(ArgOffset + I), DAG.getUNDEF(MVT::f16)});
-      VAddrs.push_back(bias);
-    } else
+      SDValue Bias = DAG.getBuildVector(
+          MVT::v2f16, DL,
+          {Op.getOperand(ArgOffset + I), DAG.getUNDEF(MVT::f16)});
+      VAddrs.push_back(Bias);
+    } else {
+      assert((!IsA16 || Intr->NumBiasArgs == 0 || I != Intr->BiasIndex) &&
+             "Bias needs to be converted to 16 bit in A16 mode");
       VAddrs.push_back(Op.getOperand(ArgOffset + I));
+    }
   }
 
   if (BaseOpcode->Gradients && !ST->hasG16() && (IsA16 != IsG16)) {
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index a6ddcdd0a4a0e..9607fe63f4637 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -3019,9 +3019,23 @@ define amdgpu_kernel void @image_sample_a16_c_cl_2d(<4 x float> addrspace(1)* %o
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
-; CHECK-LABEL: @image_sample_a16_b_1d(
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+define amdgpu_kernel void @image_sample_a16_b16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s) {
+; CHECK-LABEL: @image_sample_a16_b16_1d(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %bias32 = fpext half %bias to float
+  %s32 = fpext half %s to float
+  %res = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias32, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_sample_a16_b32_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s) {
+; CHECK-LABEL: @image_sample_a16_b32_1d(
+; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
@@ -3031,9 +3045,25 @@ define amdgpu_kernel void @image_sample_a16_b_1d(<4 x float> addrspace(1)* %out,
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
-; CHECK-LABEL: @image_sample_a16_b_2d(
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+define amdgpu_kernel void @image_sample_a16_b16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t) {
+; CHECK-LABEL: @image_sample_a16_b16_2d(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %bias32 = fpext half %bias to float
+  %s32 = fpext half %s to float
+  %t32 = fpext half %t to float
+  %res = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_sample_a16_b32_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t) {
+; CHECK-LABEL: @image_sample_a16_b32_2d(
+; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
+; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
@@ -3044,9 +3074,23 @@ define amdgpu_kernel void @image_sample_a16_b_2d(<4 x float> addrspace(1)* %out,
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
-; CHECK-LABEL: @image_sample_a16_c_b_1d(
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+define amdgpu_kernel void @image_sample_a16_c_b16_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s) {
+; CHECK-LABEL: @image_sample_a16_c_b16_1d(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %bias32 = fpext half %bias to float
+  %s32 = fpext half %s to float
+  %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_sample_a16_c_b32_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s) {
+; CHECK-LABEL: @image_sample_a16_c_b32_1d(
+; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
@@ -3056,9 +3100,25 @@ define amdgpu_kernel void @image_sample_a16_c_b_1d(<4 x float> addrspace(1)* %ou
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
-; CHECK-LABEL: @image_sample_a16_c_b_2d(
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+define amdgpu_kernel void @image_sample_a16_c_b16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t) {
+; CHECK-LABEL: @image_sample_a16_c_b16_2d(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %bias32 = fpext half %bias to float
+  %s32 = fpext half %s to float
+  %t32 = fpext half %t to float
+  %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_sample_a16_c_b32_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t) {
+; CHECK-LABEL: @image_sample_a16_c_b32_2d(
+; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
+; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
@@ -3069,9 +3129,25 @@ define amdgpu_kernel void @image_sample_a16_c_b_2d(<4 x float> addrspace(1)* %ou
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
-; CHECK-LABEL: @image_sample_a16_b_cl_1d(
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+define amdgpu_kernel void @image_sample_a16_b16_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %clamp) {
+; CHECK-LABEL: @image_sample_a16_b16_cl_1d(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %bias32 = fpext half %bias to float
+  %s32 = fpext half %s to float
+  %clamp32 = fpext half %clamp to float
+  %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_sample_a16_b32_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %clamp) {
+; CHECK-LABEL: @image_sample_a16_b32_cl_1d(
+; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
+; CHECK-NEXT:    [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
@@ -3082,9 +3158,27 @@ define amdgpu_kernel void @image_sample_a16_b_cl_1d(<4 x float> addrspace(1)* %o
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
-; CHECK-LABEL: @image_sample_a16_b_cl_2d(
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+define amdgpu_kernel void @image_sample_a16_b16_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, half %s, half %t, half %clamp) {
+; CHECK-LABEL: @image_sample_a16_b16_cl_2d(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %bias32 = fpext half %bias to float
+  %s32 = fpext half %s to float
+  %t32 = fpext half %t to float
+  %clamp32 = fpext half %clamp to float
+  %res = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias32, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_sample_a16_b32_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, half %s, half %t, half %clamp) {
+; CHECK-LABEL: @image_sample_a16_b32_cl_2d(
+; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
+; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
+; CHECK-NEXT:    [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
@@ -3096,9 +3190,25 @@ define amdgpu_kernel void @image_sample_a16_b_cl_2d(<4 x float> addrspace(1)* %o
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_b_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
-; CHECK-LABEL: @image_sample_a16_c_b_cl_1d(
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+define amdgpu_kernel void @image_sample_a16_c_b16_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %clamp) {
+; CHECK-LABEL: @image_sample_a16_c_b16_cl_1d(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %bias32 = fpext half %bias to float
+  %s32 = fpext half %s to float
+  %clamp32 = fpext half %clamp to float
+  %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_sample_a16_c_b32_cl_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %clamp) {
+; CHECK-LABEL: @image_sample_a16_c_b32_cl_1d(
+; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
+; CHECK-NEXT:    [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;
@@ -3109,9 +3219,27 @@ define amdgpu_kernel void @image_sample_a16_c_b_cl_1d(<4 x float> addrspace(1)*
   ret void
 }
 
-define amdgpu_kernel void @image_sample_a16_c_b_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
-; CHECK-LABEL: @image_sample_a16_c_b_cl_2d(
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f16(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+define amdgpu_kernel void @image_sample_a16_c_b16_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %bias, float %zcompare, half %s, half %t, half %clamp) {
+; CHECK-LABEL: @image_sample_a16_c_b16_cl_2d(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f16.f16(i32 15, half [[BIAS:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], half [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %bias32 = fpext half %bias to float
+  %s32 = fpext half %s to float
+  %t32 = fpext half %t to float
+  %clamp32 = fpext half %clamp to float
+  %res = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias32, float %zcompare, float %s32, float %t32, float %clamp32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_sample_a16_c_b32_cl_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, half %s, half %t, half %clamp) {
+; CHECK-LABEL: @image_sample_a16_c_b32_cl_2d(
+; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
+; CHECK-NEXT:    [[T32:%.*]] = fpext half [[T:%.*]] to float
+; CHECK-NEXT:    [[CLAMP32:%.*]] = fpext half [[CLAMP:%.*]] to float
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S32]], float [[T32]], float [[CLAMP32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
 ; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
 ; CHECK-NEXT:    ret void
 ;

From 603d18033c510c99ad84f26b6603db1ca68a500f Mon Sep 17 00:00:00 2001
From: Sebastian Neubauer <Sebastian.Neubauer@amd.com>
Date: Tue, 21 Dec 2021 17:27:14 +0100
Subject: [PATCH 155/946] [AMDGPU][InstCombine] Remove zero LOD bias

If the bias is zero, we can remove it from the image instruction.
Also copy other image optimizations (l->lz, mip->nomip) to IR combines.

Differential Revision: https://reviews.llvm.org/D116042
---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     | 138 +++-
 llvm/lib/Target/AMDGPU/MIMGInstructions.td    |  34 +
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |   1 +
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |   9 +
 .../InstCombine/AMDGPU/amdgcn-intrinsics.ll   | 589 +++++++++++++++++-
 5 files changed, 732 insertions(+), 39 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 5eb7cf89abb24..84363d3c6aa1a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -97,10 +97,92 @@ static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
   llvm_unreachable("Should never be called!");
 }
 
+/// Applies Function(II.Args, II.ArgTys) and replaces the intrinsic call with
+/// the modified arguments.
+static Optional<Instruction *> modifyIntrinsicCall(
+    IntrinsicInst &II, unsigned NewIntr, InstCombiner &IC,
+    std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
+        Func) {
+  SmallVector<Type *, 4> ArgTys;
+  if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
+    return None;
+
+  SmallVector<Value *, 8> Args(II.args());
+
+  // Modify arguments and types
+  Func(Args, ArgTys);
+
+  Function *I = Intrinsic::getDeclaration(II.getModule(), NewIntr, ArgTys);
+
+  CallInst *NewCall = IC.Builder.CreateCall(I, Args);
+  NewCall->takeName(&II);
+  NewCall->copyMetadata(II);
+  if (isa<FPMathOperator>(NewCall))
+    NewCall->copyFastMathFlags(&II);
+
+  // Erase and replace uses
+  if (!II.getType()->isVoidTy())
+    IC.replaceInstUsesWith(II, NewCall);
+  return IC.eraseInstFromFunction(II);
+}
+
 static Optional<Instruction *>
 simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
                              const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
                              IntrinsicInst &II, InstCombiner &IC) {
+  // Optimize _L to _LZ when _L is zero
+  if (const auto *LZMappingInfo =
+          AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
+    if (auto *ConstantLod =
+            dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) {
+      if (ConstantLod->isZero() || ConstantLod->isNegative()) {
+        const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
+            AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
+                                                     ImageDimIntr->Dim);
+        return modifyIntrinsicCall(
+            II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+              Args.erase(Args.begin() + ImageDimIntr->LodIndex);
+            });
+      }
+    }
+  }
+
+  // Optimize _mip away, when 'lod' is zero
+  if (const auto *MIPMappingInfo =
+          AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) {
+    if (auto *ConstantMip =
+            dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) {
+      if (ConstantMip->isZero()) {
+        const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
+            AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
+                                                     ImageDimIntr->Dim);
+        return modifyIntrinsicCall(
+            II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+              Args.erase(Args.begin() + ImageDimIntr->MipIndex);
+            });
+      }
+    }
+  }
+
+  // Optimize _bias away when 'bias' is zero
+  if (const auto *BiasMappingInfo =
+          AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) {
+    if (auto *ConstantBias =
+            dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) {
+      if (ConstantBias->isZero()) {
+        const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
+            AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
+                                                     ImageDimIntr->Dim);
+        return modifyIntrinsicCall(
+            II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+              Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
+              ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
+            });
+      }
+    }
+  }
+
+  // Try to use A16 or G16
   if (!ST->hasA16() && !ST->hasG16())
     return None;
 
@@ -144,43 +226,31 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
   Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
                                : Type::getInt16Ty(II.getContext());
 
-  SmallVector<Type *, 4> ArgTys;
-  if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
-    return None;
-
-  ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
-  if (!OnlyDerivatives) {
-    ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
-
-    // Change the bias type
-    if (ImageDimIntr->NumBiasArgs != 0)
-      ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
-  }
-  Function *I =
-      Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys);
-
-  SmallVector<Value *, 8> Args(II.args());
+  return modifyIntrinsicCall(
+      II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
+        ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
+        if (!OnlyDerivatives) {
+          ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
 
-  unsigned EndIndex =
-      OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
-  for (unsigned OperandIndex = ImageDimIntr->GradientStart;
-       OperandIndex < EndIndex; OperandIndex++) {
-    Args[OperandIndex] =
-        convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
-  }
+          // Change the bias type
+          if (ImageDimIntr->NumBiasArgs != 0)
+            ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
+        }
 
-  // Convert the bias
-  if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
-    Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
-    Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
-  }
+        unsigned EndIndex =
+            OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
+        for (unsigned OperandIndex = ImageDimIntr->GradientStart;
+             OperandIndex < EndIndex; OperandIndex++) {
+          Args[OperandIndex] =
+              convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
+        }
 
-  CallInst *NewCall = IC.Builder.CreateCall(I, Args);
-  NewCall->takeName(&II);
-  NewCall->copyMetadata(II);
-  if (isa<FPMathOperator>(NewCall))
-    NewCall->copyFastMathFlags(&II);
-  return IC.replaceInstUsesWith(II, NewCall);
+        // Convert the bias
+        if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
+          Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
+          Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
+        }
+      });
 }
 
 bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 1d8a558359378..49eaa1499bb76 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -131,6 +131,22 @@ def MIMGMIPMappingTable : GenericTable {
   let PrimaryKeyName = "getMIMGMIPMappingInfo";
 }
 
+class MIMGBiasMapping<MIMGBaseOpcode bias, MIMGBaseOpcode nobias> {
+  MIMGBaseOpcode Bias = bias;
+  MIMGBaseOpcode NoBias = nobias;
+}
+
+def MIMGBiasMappingTable : GenericTable {
+  let FilterClass = "MIMGBiasMapping";
+  let CppTypeName = "MIMGBiasMappingInfo";
+  let Fields = ["Bias", "NoBias"];
+  string TypeOf_Bias = "MIMGBaseOpcode";
+  string TypeOf_NoBias = "MIMGBaseOpcode";
+
+  let PrimaryKey = ["Bias"];
+  let PrimaryKeyName = "getMIMGBiasMappingInfo";
+}
+
 class MIMGG16Mapping<MIMGBaseOpcode g, MIMGBaseOpcode g16> {
   MIMGBaseOpcode G = g;
   MIMGBaseOpcode G16 = g16;
@@ -1140,6 +1156,24 @@ def : MIMGLZMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_LZ_O>;
 def : MIMGMIPMapping<IMAGE_LOAD_MIP, IMAGE_LOAD>;
 def : MIMGMIPMapping<IMAGE_STORE_MIP, IMAGE_STORE>;
 
+// Bias to NoBias Optimization Mapping
+def : MIMGBiasMapping<IMAGE_SAMPLE_B, IMAGE_SAMPLE>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL, IMAGE_SAMPLE_CL>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B, IMAGE_SAMPLE_C>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL, IMAGE_SAMPLE_C_CL>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_B_O, IMAGE_SAMPLE_O>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_B_CL_O, IMAGE_SAMPLE_CL_O>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_O, IMAGE_SAMPLE_C_O>;
+def : MIMGBiasMapping<IMAGE_SAMPLE_C_B_CL_O, IMAGE_SAMPLE_C_CL_O>;
+def : MIMGBiasMapping<IMAGE_GATHER4_B, IMAGE_GATHER4>;
+def : MIMGBiasMapping<IMAGE_GATHER4_B_CL, IMAGE_GATHER4_CL>;
+def : MIMGBiasMapping<IMAGE_GATHER4_C_B, IMAGE_GATHER4_C>;
+def : MIMGBiasMapping<IMAGE_GATHER4_C_B_CL, IMAGE_GATHER4_C_CL>;
+def : MIMGBiasMapping<IMAGE_GATHER4_B_O, IMAGE_GATHER4_O>;
+def : MIMGBiasMapping<IMAGE_GATHER4_B_CL_O, IMAGE_GATHER4_CL_O>;
+def : MIMGBiasMapping<IMAGE_GATHER4_C_B_O, IMAGE_GATHER4_C_O>;
+def : MIMGBiasMapping<IMAGE_GATHER4_C_B_CL_O, IMAGE_GATHER4_C_CL_O>;
+
 // G to G16 Optimization Mapping
 def : MIMGG16Mapping<IMAGE_SAMPLE_D, IMAGE_SAMPLE_D_G16>;
 def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL, IMAGE_SAMPLE_D_CL_G16>;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 6c7d73aebe0c4..fa1fa5b850d57 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -132,6 +132,7 @@ bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) {
 #define GET_MIMGInfoTable_IMPL
 #define GET_MIMGLZMappingTable_IMPL
 #define GET_MIMGMIPMappingTable_IMPL
+#define GET_MIMGBiasMappingTable_IMPL
 #define GET_MIMGG16MappingTable_IMPL
 #include "AMDGPUGenSearchableTables.inc"
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 061c74c0ace69..cabae3d1ab7e1 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -64,6 +64,7 @@ struct GcnBufferFormatInfo {
 #define GET_MIMGEncoding_DECL
 #define GET_MIMGLZMapping_DECL
 #define GET_MIMGMIPMapping_DECL
+#define GET_MIMGBiASMapping_DECL
 #include "AMDGPUGenSearchableTables.inc"
 
 namespace IsaInfo {
@@ -330,6 +331,11 @@ struct MIMGMIPMappingInfo {
   MIMGBaseOpcode NONMIP;
 };
 
+struct MIMGBiasMappingInfo {
+  MIMGBaseOpcode Bias;
+  MIMGBaseOpcode NoBias;
+};
+
 struct MIMGG16MappingInfo {
   MIMGBaseOpcode G;
   MIMGBaseOpcode G16;
@@ -341,6 +347,9 @@ const MIMGLZMappingInfo *getMIMGLZMappingInfo(unsigned L);
 LLVM_READONLY
 const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
 
+LLVM_READONLY
+const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
+
 LLVM_READONLY
 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
 
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 9607fe63f4637..bac4c7826a4fe 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -66,7 +66,7 @@ define double @test_constant_fold_rcp_f64_43() nounwind {
 
 define float @test_constant_fold_rcp_f32_43_strictfp() nounwind strictfp {
 ; CHECK-LABEL: @test_constant_fold_rcp_f32_43_strictfp(
-; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR12:[0-9]+]]
+; CHECK-NEXT:    [[VAL:%.*]] = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) #[[ATTR14:[0-9]+]]
 ; CHECK-NEXT:    ret float [[VAL]]
 ;
   %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) strictfp nounwind readnone
@@ -1662,7 +1662,7 @@ define i64 @icmp_constant_inputs_false() {
 
 define i64 @icmp_constant_inputs_true() {
 ; CHECK-LABEL: @icmp_constant_inputs_true(
-; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR13:[0-9]+]]
+; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0:![0-9]+]]) #[[ATTR15:[0-9]+]]
 ; CHECK-NEXT:    ret i64 [[RESULT]]
 ;
   %result = call i64 @llvm.amdgcn.icmp.i64.i32(i32 9, i32 8, i32 34)
@@ -2369,7 +2369,7 @@ define i64 @fcmp_constant_inputs_false() {
 
 define i64 @fcmp_constant_inputs_true() {
 ; CHECK-LABEL: @fcmp_constant_inputs_true(
-; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR13]]
+; CHECK-NEXT:    [[RESULT:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR15]]
 ; CHECK-NEXT:    ret i64 [[RESULT]]
 ;
   %result = call i64 @llvm.amdgcn.fcmp.i64.f32(float 2.0, float 4.0, i32 4)
@@ -2411,7 +2411,7 @@ define i64 @ballot_zero_64() {
 
 define i64 @ballot_one_64() {
 ; CHECK-LABEL: @ballot_one_64(
-; CHECK-NEXT:    [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR13]]
+; CHECK-NEXT:    [[B:%.*]] = call i64 @llvm.read_register.i64(metadata [[META0]]) #[[ATTR15]]
 ; CHECK-NEXT:    ret i64 [[B]]
 ;
   %b = call i64 @llvm.amdgcn.ballot.i64(i1 1)
@@ -2437,7 +2437,7 @@ define i32 @ballot_zero_32() {
 
 define i32 @ballot_one_32() {
 ; CHECK-LABEL: @ballot_one_32(
-; CHECK-NEXT:    [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR13]]
+; CHECK-NEXT:    [[B:%.*]] = call i32 @llvm.read_register.i32(metadata [[META1:![0-9]+]]) #[[ATTR15]]
 ; CHECK-NEXT:    ret i32 [[B]]
 ;
   %b = call i32 @llvm.amdgcn.ballot.i32(i1 1)
@@ -4051,6 +4051,585 @@ define amdgpu_kernel void @image_sample_a16_2darray_nnan(<4 x float> addrspace(1
   ret void
 }
 
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample l to lz
+; --------------------------------------------------------------------
+
+declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+define amdgpu_kernel void @sample_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
+; CHECK-LABEL: @sample_l_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+; CHECK-LABEL: @sample_l_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_c_l_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
+; CHECK-LABEL: @sample_c_l_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+; CHECK-LABEL: @sample_c_l_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
+; CHECK-LABEL: @sample_l_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
+; CHECK-LABEL: @sample_l_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_c_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
+; CHECK-LABEL: @sample_c_l_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_c_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
+; CHECK-LABEL: @sample_c_l_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @gather4_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+; CHECK-LABEL: @gather4_l_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @gather4_c_l_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+; CHECK-LABEL: @gather4_c_l_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @gather4_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
+; CHECK-LABEL: @gather4_l_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @gather4_c_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
+; CHECK-LABEL: @gather4_c_l_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @gather4_c_l_o_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %slice, float %lod) {
+; CHECK-LABEL: @gather4_c_l_o_2darray(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2darray.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2darray.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float %slice, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample mipmap zero
+; --------------------------------------------------------------------
+
+define amdgpu_kernel void @load_mip_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s) {
+; CHECK-LABEL: @load_mip_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @load_mip_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; CHECK-LABEL: @load_mip_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @load_mip_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
+; CHECK-LABEL: @load_mip_3d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @load_mip_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; CHECK-LABEL: @load_mip_1darray(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @load_mip_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
+; CHECK-LABEL: @load_mip_2darray(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @load_mip_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
+; CHECK-LABEL: @load_mip_cube(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+
+define amdgpu_kernel void @store_mip_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
+; CHECK-LABEL: @store_mip_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    ret void
+;
+main_body:
+  call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_kernel void @store_mip_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
+; CHECK-LABEL: @store_mip_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    ret void
+;
+main_body:
+  call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_kernel void @store_mip_3d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
+; CHECK-LABEL: @store_mip_3d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    ret void
+;
+main_body:
+  call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_kernel void @store_mip_1darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
+; CHECK-LABEL: @store_mip_1darray(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    ret void
+;
+main_body:
+  call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_kernel void @store_mip_2darray(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
+; CHECK-LABEL: @store_mip_2darray(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    ret void
+;
+main_body:
+  call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_kernel void @store_mip_cube(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
+; CHECK-LABEL: @store_mip_cube(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> [[VDATA:%.*]], i32 15, i32 [[S:%.*]], i32 [[T:%.*]], i32 [[U:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    ret void
+;
+main_body:
+  call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
+
+
+declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample bias zero
+; --------------------------------------------------------------------
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32, i32, half, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+define amdgpu_kernel void @sample_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; CHECK-LABEL: @sample_b_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; CHECK-LABEL: @sample_b_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32(i32 15, float -0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_c_b_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+; CHECK-LABEL: @sample_c_b_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32(i32 15, float -0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+; CHECK-LABEL: @sample_c_b_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32(i32 15, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s) {
+; CHECK-LABEL: @sample_b_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
+; CHECK-LABEL: @sample_b_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_c_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s) {
+; CHECK-LABEL: @sample_c_b_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_c_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
+; CHECK-LABEL: @sample_c_b_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @gather4_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; CHECK-LABEL: @gather4_b_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32(i32 15, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @gather4_c_b_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+; CHECK-LABEL: @gather4_c_b_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32(i32 15,  float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @gather4_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
+; CHECK-LABEL: @gather4_b_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32(i32 15, i32 %offset, float 0.0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @gather4_c_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
+; CHECK-LABEL: @gather4_c_b_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32(i32 15, i32 %offset,  float 0.0, float %zcompare,float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @sample_c_b_o_a16_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t) {
+; CHECK-LABEL: @sample_c_b_o_a16_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f16(i32 15, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[S:%.*]], half [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f16(i32 15, i32 %offset, half 0.0, float %zcompare, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+; Check that bias is not optimized away if > 0
+define amdgpu_kernel void @sample_b_1d_pos(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; CHECK-LABEL: @sample_b_1d_pos(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float 1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float 1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+; Check that bias is not optimized away if < 0
+define amdgpu_kernel void @sample_b_1d_neg(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; CHECK-LABEL: @sample_b_1d_neg(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float -1.000000e+00, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float -1.0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+; Zero bias + A16
+define amdgpu_kernel void @sample_b_1d_a16(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s) {
+; CHECK-LABEL: @sample_b_1d_a16(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f16(i32 15, half [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %s32 = fpext half %s to float
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32(i32 15, float -0.0, float %s32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.is.shared
 ; --------------------------------------------------------------------

From ae2f9c8be89768086b9f335d60bbe8312b212f95 Mon Sep 17 00:00:00 2001
From: Sebastian Neubauer <Sebastian.Neubauer@amd.com>
Date: Tue, 21 Dec 2021 17:31:24 +0100
Subject: [PATCH 156/946] [AMDGPU] Remove lz and nomip combine from codegen

These combines have been moved into the IR combiner in D116042.

Differential Revision: https://reviews.llvm.org/D116116
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |  24 -
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  38 -
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  26 -
 .../GlobalISel/image_ls_mipmap_zero.a16.ll    | 667 ------------------
 .../AMDGPU/GlobalISel/image_ls_mipmap_zero.ll | 403 -----------
 .../llvm.amdgcn.image.sample.ltolz.a16.ll     | 565 ---------------
 .../llvm.amdgcn.image.sample.ltolz.ll         | 293 --------
 llvm/test/CodeGen/AMDGPU/cluster_stores.ll    |  19 +-
 .../CodeGen/AMDGPU/dagcombine-fma-fmad.ll     |  28 +-
 .../CodeGen/AMDGPU/image_ls_mipmap_zero.ll    | 132 ----
 .../AMDGPU/llvm.amdgcn.image.sample.ltolz.ll  | 113 ---
 llvm/test/CodeGen/AMDGPU/skip-if-dead.ll      |   9 +-
 12 files changed, 31 insertions(+), 2286 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll
 delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.ll
 delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.a16.ll
 delete mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.ll
 delete mode 100644 llvm/test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll
 delete mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 6ab2cb6df3bda..e48dca3cc9572 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1510,10 +1510,6 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
     AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
 
   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
-  const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
-      AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
-  const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
-      AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
   unsigned IntrOpcode = Intr->BaseOpcode;
   const bool IsGFX10Plus = AMDGPU::isGFX10Plus(STI);
 
@@ -1586,26 +1582,6 @@ bool AMDGPUInstructionSelector::selectImageIntrinsic(
     }
   }
 
-  // Optimize _L to _LZ when _L is zero
-  if (LZMappingInfo) {
-    // The legalizer replaced the register with an immediate 0 if we need to
-    // change the opcode.
-    const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->LodIndex);
-    if (Lod.isImm()) {
-      assert(Lod.getImm() == 0);
-      IntrOpcode = LZMappingInfo->LZ;  // set new opcode to _lz variant of _l
-    }
-  }
-
-  // Optimize _mip away, when 'lod' is zero
-  if (MIPMappingInfo) {
-    const MachineOperand &Lod = MI.getOperand(ArgOffset + Intr->MipIndex);
-    if (Lod.isImm()) {
-      assert(Lod.getImm() == 0);
-      IntrOpcode = MIPMappingInfo->NONMIP;  // set new opcode to variant without _mip
-    }
-  }
-
   // Set G16 opcode
   if (IsG16 && !IsA16) {
     const AMDGPU::MIMGG16MappingInfo *G16MappingInfo =
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 5092e0f553e2a..04c6f67ed3390 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -4450,44 +4450,6 @@ bool AMDGPULegalizerInfo::legalizeImageIntrinsic(
 
   unsigned CorrectedNumVAddrs = Intr->NumVAddrs;
 
-  // Optimize _L to _LZ when _L is zero
-  if (const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
-          AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode)) {
-    const ConstantFP *ConstantLod;
-
-    if (mi_match(MI.getOperand(ArgOffset + Intr->LodIndex).getReg(), *MRI,
-                 m_GFCst(ConstantLod))) {
-      if (ConstantLod->isZero() || ConstantLod->isNegative()) {
-        // Set new opcode to _lz variant of _l, and change the intrinsic ID.
-        const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
-            AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
-                                                     Intr->Dim);
-
-        // The starting indexes should remain in the same place.
-        --CorrectedNumVAddrs;
-
-        MI.getOperand(NumDefs).setIntrinsicID(
-            static_cast<Intrinsic::ID>(NewImageDimIntr->Intr));
-        MI.RemoveOperand(ArgOffset + Intr->LodIndex);
-        Intr = NewImageDimIntr;
-      }
-    }
-  }
-
-  // Optimize _mip away, when 'lod' is zero
-  if (AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode)) {
-    int64_t ConstantLod;
-    if (mi_match(MI.getOperand(ArgOffset + Intr->MipIndex).getReg(), *MRI,
-                 m_ICst(ConstantLod))) {
-      if (ConstantLod == 0) {
-        // TODO: Change intrinsic opcode and remove operand instead or replacing
-        // it with 0, as the _L to _LZ handling is done above.
-        MI.getOperand(ArgOffset + Intr->MipIndex).ChangeToImmediate(0);
-        --CorrectedNumVAddrs;
-      }
-    }
-  }
-
   // Rewrite the addressing register layout before doing anything else.
   if (BaseOpcode->Gradients && !ST.hasG16() && (IsA16 != IsG16)) {
     // 16 bit gradients are supported, but are tied to the A16 control
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 26229b40f4dc5..4008c7e36e4f4 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6188,10 +6188,6 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
       AMDGPU::getMIMGBaseOpcodeInfo(Intr->BaseOpcode);
   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfo(Intr->Dim);
-  const AMDGPU::MIMGLZMappingInfo *LZMappingInfo =
-      AMDGPU::getMIMGLZMappingInfo(Intr->BaseOpcode);
-  const AMDGPU::MIMGMIPMappingInfo *MIPMappingInfo =
-      AMDGPU::getMIMGMIPMappingInfo(Intr->BaseOpcode);
   unsigned IntrOpcode = Intr->BaseOpcode;
   bool IsGFX10Plus = AMDGPU::isGFX10Plus(*Subtarget);
 
@@ -6279,28 +6275,6 @@ SDValue SITargetLowering::lowerImage(SDValue Op,
   unsigned VAddrEnd = ArgOffset + Intr->VAddrEnd;
   SmallVector<SDValue, 4> VAddrs;
 
-  // Optimize _L to _LZ when _L is zero
-  if (LZMappingInfo) {
-    if (auto *ConstantLod = dyn_cast<ConstantFPSDNode>(
-            Op.getOperand(ArgOffset + Intr->LodIndex))) {
-      if (ConstantLod->isZero() || ConstantLod->isNegative()) {
-        IntrOpcode = LZMappingInfo->LZ;  // set new opcode to _lz variant of _l
-        VAddrEnd--;                      // remove 'lod'
-      }
-    }
-  }
-
-  // Optimize _mip away, when 'lod' is zero
-  if (MIPMappingInfo) {
-    if (auto *ConstantLod = dyn_cast<ConstantSDNode>(
-            Op.getOperand(ArgOffset + Intr->MipIndex))) {
-      if (ConstantLod->isZero()) {
-        IntrOpcode = MIPMappingInfo->NONMIP;  // set new opcode to variant without _mip
-        VAddrEnd--;                           // remove 'mip'
-      }
-    }
-  }
-
   // Check for 16 bit addresses or derivatives and pack if true.
   MVT VAddrVT =
       Op.getOperand(ArgOffset + Intr->GradientStart).getSimpleValueType();
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll
deleted file mode 100644
index af7509fd0897d..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.a16.ll
+++ /dev/null
@@ -1,667 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=legalizer -o - %s | FileCheck -check-prefix=GFX10 %s
-
-define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i16 %s) {
-  ; GFX9-LABEL: name: load_mip_1d
-  ; GFX9: bb.1.main_body:
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[DEF]](s32)
-  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
-  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  ; GFX10-LABEL: name: load_mip_1d
-  ; GFX10: bb.1.main_body:
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[DEF]](s32)
-  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
-  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 15, i16 %s, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i16 %s, i16 %t) {
-  ; GFX9-LABEL: name: load_mip_2d
-  ; GFX9: bb.1.main_body:
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
-  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
-  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  ; GFX10-LABEL: name: load_mip_2d
-  ; GFX10: bb.1.main_body:
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
-  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2d), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
-  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %u) {
-  ; GFX9-LABEL: name: load_mip_3d
-  ; GFX9: bb.1.main_body:
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
-  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[DEF]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
-  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  ; GFX10-LABEL: name: load_mip_3d
-  ; GFX10: bb.1.main_body:
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
-  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[DEF]](s32)
-  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.3d), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
-  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i16 %s, i16 %t) {
-  ; GFX9-LABEL: name: load_mip_1darray
-  ; GFX9: bb.1.main_body:
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
-  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
-  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  ; GFX10-LABEL: name: load_mip_1darray
-  ; GFX10: bb.1.main_body:
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
-  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.1darray), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
-  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %u) {
-  ; GFX9-LABEL: name: load_mip_2darray
-  ; GFX9: bb.1.main_body:
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
-  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[DEF]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
-  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  ; GFX10-LABEL: name: load_mip_2darray
-  ; GFX10: bb.1.main_body:
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
-  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[DEF]](s32)
-  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.2darray), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
-  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %u) {
-  ; GFX9-LABEL: name: load_mip_cube
-  ; GFX9: bb.1.main_body:
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
-  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[DEF]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-  ; GFX9:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GFX9:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
-  ; GFX9:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX9:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX9:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX9:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-  ; GFX10-LABEL: name: load_mip_cube
-  ; GFX10: bb.1.main_body:
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY8]](s32), [[COPY9]](s32)
-  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY10]](s32), [[DEF]](s32)
-  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-  ; GFX10:   [[AMDGPU_INTRIN_IMAGE_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.load.mip.cube), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable load (<4 x s32>) from custom "ImageResource")
-  ; GFX10:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[AMDGPU_INTRIN_IMAGE_LOAD]](<4 x s32>)
-  ; GFX10:   $vgpr0 = COPY [[UV]](s32)
-  ; GFX10:   $vgpr1 = COPY [[UV1]](s32)
-  ; GFX10:   $vgpr2 = COPY [[UV2]](s32)
-  ; GFX10:   $vgpr3 = COPY [[UV3]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i16 %s) {
-  ; GFX9-LABEL: name: store_mip_1d
-  ; GFX9: bb.1.main_body:
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
-  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
-  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
-  ; GFX9:   S_ENDPGM 0
-  ; GFX10-LABEL: name: store_mip_1d
-  ; GFX10: bb.1.main_body:
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
-  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[DEF]](s32)
-  ; GFX10:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
-  ; GFX10:   S_ENDPGM 0
-main_body:
-  call void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i16 %s, i16 %t) {
-  ; GFX9-LABEL: name: store_mip_2d
-  ; GFX9: bb.1.main_body:
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
-  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
-  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
-  ; GFX9:   S_ENDPGM 0
-  ; GFX10-LABEL: name: store_mip_2d
-  ; GFX10: bb.1.main_body:
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
-  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
-  ; GFX10:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
-  ; GFX10:   S_ENDPGM 0
-main_body:
-  call void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i16 %s, i16 %t, i16 %u) {
-  ; GFX9-LABEL: name: store_mip_3d
-  ; GFX9: bb.1.main_body:
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
-  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
-  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
-  ; GFX9:   S_ENDPGM 0
-  ; GFX10-LABEL: name: store_mip_3d
-  ; GFX10: bb.1.main_body:
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
-  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
-  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
-  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-  ; GFX10:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.3d), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
-  ; GFX10:   S_ENDPGM 0
-main_body:
-  call void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i16 %s, i16 %t) {
-  ; GFX9-LABEL: name: store_mip_1darray
-  ; GFX9: bb.1.main_body:
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
-  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
-  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
-  ; GFX9:   S_ENDPGM 0
-  ; GFX10-LABEL: name: store_mip_1darray
-  ; GFX10: bb.1.main_body:
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
-  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
-  ; GFX10:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.1darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[BUILD_VECTOR_TRUNC]](<2 x s16>), $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
-  ; GFX10:   S_ENDPGM 0
-main_body:
-  call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i16 %s, i16 %t, i16 %u) {
-  ; GFX9-LABEL: name: store_mip_2darray
-  ; GFX9: bb.1.main_body:
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
-  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
-  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
-  ; GFX9:   S_ENDPGM 0
-  ; GFX10-LABEL: name: store_mip_2darray
-  ; GFX10: bb.1.main_body:
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
-  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
-  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
-  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-  ; GFX10:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.2darray), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
-  ; GFX10:   S_ENDPGM 0
-main_body:
-  call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i16 %s, i16 %t, i16 %u) {
-  ; GFX9-LABEL: name: store_mip_cube
-  ; GFX9: bb.1.main_body:
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX9:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX9:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX9:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX9:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX9:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX9:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX9:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX9:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX9:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX9:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX9:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX9:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
-  ; GFX9:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX9:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GFX9:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GFX9:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
-  ; GFX9:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX9:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
-  ; GFX9:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-  ; GFX9:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
-  ; GFX9:   S_ENDPGM 0
-  ; GFX10-LABEL: name: store_mip_cube
-  ; GFX10: bb.1.main_body:
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6
-  ; GFX10:   [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:_(s32) = COPY $sgpr4
-  ; GFX10:   [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr5
-  ; GFX10:   [[COPY4:%[0-9]+]]:_(s32) = COPY $sgpr6
-  ; GFX10:   [[COPY5:%[0-9]+]]:_(s32) = COPY $sgpr7
-  ; GFX10:   [[COPY6:%[0-9]+]]:_(s32) = COPY $sgpr8
-  ; GFX10:   [[COPY7:%[0-9]+]]:_(s32) = COPY $sgpr9
-  ; GFX10:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
-  ; GFX10:   [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr0
-  ; GFX10:   [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr1
-  ; GFX10:   [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr2
-  ; GFX10:   [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr3
-  ; GFX10:   [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
-  ; GFX10:   [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr4
-  ; GFX10:   [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr5
-  ; GFX10:   [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr6
-  ; GFX10:   [[BUILD_VECTOR_TRUNC:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY12]](s32), [[COPY13]](s32)
-  ; GFX10:   [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
-  ; GFX10:   [[BUILD_VECTOR_TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[COPY14]](s32), [[DEF]](s32)
-  ; GFX10:   [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR_TRUNC]](<2 x s16>), [[BUILD_VECTOR_TRUNC1]](<2 x s16>)
-  ; GFX10:   G_AMDGPU_INTRIN_IMAGE_STORE intrinsic(@llvm.amdgcn.image.store.mip.cube), [[BUILD_VECTOR1]](<4 x s32>), 15, [[CONCAT_VECTORS]](<4 x s16>), $noreg, $noreg, 0, [[BUILD_VECTOR]](<8 x s32>), 0, 0, 3 :: (dereferenceable store (<4 x s32>) into custom "ImageResource")
-  ; GFX10:   S_ENDPGM 0
-main_body:
-  call void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float> %vdata, i32 15, i16 %s, i16 %t, i16 %u, i16 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i16(i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i16(i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i16(<4 x float>, i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #1
-
-attributes #0 = { nounwind readonly }
-attributes #1 = { nounwind writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.ll
deleted file mode 100644
index a5bf0dbf6c8b3..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/image_ls_mipmap_zero.ll
+++ /dev/null
@@ -1,403 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
-
-define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s) {
-; GFX9-LABEL: load_mip_1d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf unorm
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: load_mip_1d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
-; GFX9-LABEL: load_mip_2d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: load_mip_2d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
-; GFX9-LABEL: load_mip_3d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: load_mip_3d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
-; GFX9-LABEL: load_mip_1darray:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: load_mip_1darray:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
-; GFX9-LABEL: load_mip_2darray:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: load_mip_2darray:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
-; GFX9-LABEL: load_mip_cube:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: load_mip_cube:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
-; GFX9-LABEL: store_mip_1d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf unorm
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: store_mip_1d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    image_store v[0:3], v4, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
-; GFX10-NEXT:    s_endpgm
-main_body:
-  call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
-; GFX9-LABEL: store_mip_2d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: store_mip_2d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
-; GFX10-NEXT:    s_endpgm
-main_body:
-  call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
-; GFX9-LABEL: store_mip_3d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: store_mip_3d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm
-; GFX10-NEXT:    s_endpgm
-main_body:
-  call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
-; GFX9-LABEL: store_mip_1darray:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: store_mip_1darray:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    image_store v[0:3], v[4:5], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY unorm
-; GFX10-NEXT:    s_endpgm
-main_body:
-  call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
-; GFX9-LABEL: store_mip_2darray:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: store_mip_2darray:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY unorm
-; GFX10-NEXT:    s_endpgm
-main_body:
-  call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
-; GFX9-LABEL: store_mip_cube:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da
-; GFX9-NEXT:    s_endpgm
-;
-; GFX10-LABEL: store_mip_cube:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    image_store v[0:3], v[4:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_CUBE unorm
-; GFX10-NEXT:    s_endpgm
-main_body:
-  call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
-declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #1
-
-attributes #0 = { nounwind readonly }
-attributes #1 = { nounwind writeonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.a16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.a16.ll
deleted file mode 100644
index 48a027502360c..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.a16.ll
+++ /dev/null
@@ -1,565 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
-
-define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %lod) {
-; GFX9-LABEL: sample_l_1d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: sample_l_1d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    s_mov_b32 s8, s10
-; GFX10-NEXT:    s_mov_b32 s9, s11
-; GFX10-NEXT:    s_mov_b32 s10, s12
-; GFX10-NEXT:    s_mov_b32 s11, s13
-; GFX10-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 15, half %s, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
-; GFX9-LABEL: sample_l_2d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
-; GFX9-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: sample_l_2d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v0, v1
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    s_mov_b32 s8, s10
-; GFX10-NEXT:    s_mov_b32 s9, s11
-; GFX10-NEXT:    s_mov_b32 s10, s12
-; GFX10-NEXT:    s_mov_b32 s11, s13
-; GFX10-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 15, half %s, half %t, half -0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %lod) {
-; GFX9-LABEL: sample_c_l_1d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
-; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    v_and_or_b32 v1, v1, v2, s12
-; GFX9-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: sample_c_l_1d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s8, s10
-; GFX10-NEXT:    s_mov_b32 s10, s12
-; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    v_and_or_b32 v1, 0xffff, v1, s12
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    s_mov_b32 s9, s11
-; GFX10-NEXT:    s_mov_b32 s11, s13
-; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 15, float %zcompare, half %s, half -2.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
-; GFX9-LABEL: sample_c_l_2d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
-; GFX9-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: sample_c_l_2d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    v_and_or_b32 v1, 0xffff, v1, v2
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    s_mov_b32 s8, s10
-; GFX10-NEXT:    s_mov_b32 s9, s11
-; GFX10-NEXT:    s_mov_b32 s10, s12
-; GFX10-NEXT:    s_mov_b32 s11, s13
-; GFX10-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, half %s, half %lod) {
-; GFX9-LABEL: sample_l_o_1d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
-; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    v_and_or_b32 v1, v1, v2, s12
-; GFX9-NEXT:    image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: sample_l_o_1d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s8, s10
-; GFX10-NEXT:    s_mov_b32 s10, s12
-; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    v_and_or_b32 v1, 0xffff, v1, s12
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    s_mov_b32 s9, s11
-; GFX10-NEXT:    s_mov_b32 s11, s13
-; GFX10-NEXT:    image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f16(i32 15, i32 %offset, half %s, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, half %s, half %t, half %lod) {
-; GFX9-LABEL: sample_l_o_2d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
-; GFX9-NEXT:    image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: sample_l_o_2d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    v_and_or_b32 v1, 0xffff, v1, v2
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    s_mov_b32 s8, s10
-; GFX10-NEXT:    s_mov_b32 s9, s11
-; GFX10-NEXT:    s_mov_b32 s10, s12
-; GFX10-NEXT:    s_mov_b32 s11, s13
-; GFX10-NEXT:    image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f16(i32 15, i32 %offset, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %lod) {
-; GFX9-LABEL: sample_c_l_o_1d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
-; GFX9-NEXT:    s_lshl_b32 s12, s0, 16
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    v_and_or_b32 v2, v2, v3, s12
-; GFX9-NEXT:    image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: sample_c_l_o_1d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s8, s10
-; GFX10-NEXT:    s_mov_b32 s10, s12
-; GFX10-NEXT:    s_lshl_b32 s12, s0, 16
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    v_and_or_b32 v2, 0xffff, v2, s12
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    s_mov_b32 s9, s11
-; GFX10-NEXT:    s_mov_b32 s11, s13
-; GFX10-NEXT:    image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f16(i32 15, i32 %offset, float %zcompare, half %s, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t, half %lod) {
-; GFX9-LABEL: sample_c_l_o_2d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
-; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    v_and_or_b32 v2, v2, v4, v3
-; GFX9-NEXT:    image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: sample_c_l_o_2d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    v_and_or_b32 v2, 0xffff, v2, v3
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    s_mov_b32 s8, s10
-; GFX10-NEXT:    s_mov_b32 s9, s11
-; GFX10-NEXT:    s_mov_b32 s10, s12
-; GFX10-NEXT:    s_mov_b32 s11, s13
-; GFX10-NEXT:    image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f16(i32 15, i32 %offset, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %s, half %t, half %lod) {
-; GFX9-LABEL: gather4_l_2d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
-; GFX9-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf a16
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: gather4_l_2d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v0, v1
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    s_mov_b32 s8, s10
-; GFX10-NEXT:    s_mov_b32 s9, s11
-; GFX10-NEXT:    s_mov_b32 s10, s12
-; GFX10-NEXT:    s_mov_b32 s11, s13
-; GFX10-NEXT:    image_gather4_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 15, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %s, half %t, half %lod) {
-; GFX9-LABEL: gather4_c_l_2d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
-; GFX9-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: gather4_c_l_2d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    v_and_or_b32 v1, 0xffff, v1, v2
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    s_mov_b32 s8, s10
-; GFX10-NEXT:    s_mov_b32 s9, s11
-; GFX10-NEXT:    s_mov_b32 s10, s12
-; GFX10-NEXT:    s_mov_b32 s11, s13
-; GFX10-NEXT:    image_gather4_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 15, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, half %s, half %t, half %lod) {
-; GFX9-LABEL: gather4_l_o_2d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    v_and_or_b32 v1, v1, v3, v2
-; GFX9-NEXT:    image_gather4_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf a16
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: gather4_l_o_2d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    v_and_or_b32 v1, 0xffff, v1, v2
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    s_mov_b32 s8, s10
-; GFX10-NEXT:    s_mov_b32 s9, s11
-; GFX10-NEXT:    s_mov_b32 s10, s12
-; GFX10-NEXT:    s_mov_b32 s11, s13
-; GFX10-NEXT:    image_gather4_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f16(i32 15, i32 %offset, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %s, half %t, half %lod) {
-; GFX9-LABEL: gather4_c_l_o_2d:
-; GFX9:       ; %bb.0: ; %main_body
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
-; GFX9-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s4
-; GFX9-NEXT:    s_mov_b32 s3, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s8
-; GFX9-NEXT:    s_mov_b32 s7, s9
-; GFX9-NEXT:    s_mov_b32 s8, s10
-; GFX9-NEXT:    s_mov_b32 s9, s11
-; GFX9-NEXT:    s_mov_b32 s10, s12
-; GFX9-NEXT:    s_mov_b32 s11, s13
-; GFX9-NEXT:    v_and_or_b32 v2, v2, v4, v3
-; GFX9-NEXT:    image_gather4_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    ; return to shader part epilog
-;
-; GFX10-LABEL: gather4_c_l_o_2d:
-; GFX10:       ; %bb.0: ; %main_body
-; GFX10-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; GFX10-NEXT:    s_mov_b32 s0, s2
-; GFX10-NEXT:    s_mov_b32 s1, s3
-; GFX10-NEXT:    s_mov_b32 s2, s4
-; GFX10-NEXT:    s_mov_b32 s3, s5
-; GFX10-NEXT:    v_and_or_b32 v2, 0xffff, v2, v3
-; GFX10-NEXT:    s_mov_b32 s4, s6
-; GFX10-NEXT:    s_mov_b32 s5, s7
-; GFX10-NEXT:    s_mov_b32 s6, s8
-; GFX10-NEXT:    s_mov_b32 s7, s9
-; GFX10-NEXT:    s_mov_b32 s8, s10
-; GFX10-NEXT:    s_mov_b32 s9, s11
-; GFX10-NEXT:    s_mov_b32 s10, s12
-; GFX10-NEXT:    s_mov_b32 s11, s13
-; GFX10-NEXT:    image_gather4_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
-; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f16(i32 15, i32 %offset, float %zcompare, half %s, half %t, half 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f16(i32 immarg, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f16(i32 immarg, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f16(i32 immarg, i32, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f16(i32 immarg, i32, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f16(i32 immarg, i32, float, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f16(i32 immarg, i32, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f16(i32 immarg, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f16(i32 immarg, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f16(i32 immarg, i32, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f16(i32 immarg, i32, float, half, half, half, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.ll
deleted file mode 100644
index 516e92e08b16e..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.sample.ltolz.ll
+++ /dev/null
@@ -1,293 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GCN %s
-
-define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
-; GCN-LABEL: sample_l_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b32 s0, s2
-; GCN-NEXT:    s_mov_b32 s1, s3
-; GCN-NEXT:    s_mov_b32 s2, s4
-; GCN-NEXT:    s_mov_b32 s3, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s8
-; GCN-NEXT:    s_mov_b32 s7, s9
-; GCN-NEXT:    s_mov_b32 s8, s10
-; GCN-NEXT:    s_mov_b32 s9, s11
-; GCN-NEXT:    s_mov_b32 s10, s12
-; GCN-NEXT:    s_mov_b32 s11, s13
-; GCN-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
-; GCN-LABEL: sample_l_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b32 s0, s2
-; GCN-NEXT:    s_mov_b32 s1, s3
-; GCN-NEXT:    s_mov_b32 s2, s4
-; GCN-NEXT:    s_mov_b32 s3, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s8
-; GCN-NEXT:    s_mov_b32 s7, s9
-; GCN-NEXT:    s_mov_b32 s8, s10
-; GCN-NEXT:    s_mov_b32 s9, s11
-; GCN-NEXT:    s_mov_b32 s10, s12
-; GCN-NEXT:    s_mov_b32 s11, s13
-; GCN-NEXT:    image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
-; GCN-LABEL: sample_c_l_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b32 s0, s2
-; GCN-NEXT:    s_mov_b32 s1, s3
-; GCN-NEXT:    s_mov_b32 s2, s4
-; GCN-NEXT:    s_mov_b32 s3, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s8
-; GCN-NEXT:    s_mov_b32 s7, s9
-; GCN-NEXT:    s_mov_b32 s8, s10
-; GCN-NEXT:    s_mov_b32 s9, s11
-; GCN-NEXT:    s_mov_b32 s10, s12
-; GCN-NEXT:    s_mov_b32 s11, s13
-; GCN-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
-; GCN-LABEL: sample_c_l_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b32 s0, s2
-; GCN-NEXT:    s_mov_b32 s1, s3
-; GCN-NEXT:    s_mov_b32 s2, s4
-; GCN-NEXT:    s_mov_b32 s3, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s8
-; GCN-NEXT:    s_mov_b32 s7, s9
-; GCN-NEXT:    s_mov_b32 s8, s10
-; GCN-NEXT:    s_mov_b32 s9, s11
-; GCN-NEXT:    s_mov_b32 s10, s12
-; GCN-NEXT:    s_mov_b32 s11, s13
-; GCN-NEXT:    image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
-; GCN-LABEL: sample_l_o_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b32 s0, s2
-; GCN-NEXT:    s_mov_b32 s1, s3
-; GCN-NEXT:    s_mov_b32 s2, s4
-; GCN-NEXT:    s_mov_b32 s3, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s8
-; GCN-NEXT:    s_mov_b32 s7, s9
-; GCN-NEXT:    s_mov_b32 s8, s10
-; GCN-NEXT:    s_mov_b32 s9, s11
-; GCN-NEXT:    s_mov_b32 s10, s12
-; GCN-NEXT:    s_mov_b32 s11, s13
-; GCN-NEXT:    image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
-; GCN-LABEL: sample_l_o_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b32 s0, s2
-; GCN-NEXT:    s_mov_b32 s1, s3
-; GCN-NEXT:    s_mov_b32 s2, s4
-; GCN-NEXT:    s_mov_b32 s3, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s8
-; GCN-NEXT:    s_mov_b32 s7, s9
-; GCN-NEXT:    s_mov_b32 s8, s10
-; GCN-NEXT:    s_mov_b32 s9, s11
-; GCN-NEXT:    s_mov_b32 s10, s12
-; GCN-NEXT:    s_mov_b32 s11, s13
-; GCN-NEXT:    image_sample_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
-; GCN-LABEL: sample_c_l_o_1d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b32 s0, s2
-; GCN-NEXT:    s_mov_b32 s1, s3
-; GCN-NEXT:    s_mov_b32 s2, s4
-; GCN-NEXT:    s_mov_b32 s3, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s8
-; GCN-NEXT:    s_mov_b32 s7, s9
-; GCN-NEXT:    s_mov_b32 s8, s10
-; GCN-NEXT:    s_mov_b32 s9, s11
-; GCN-NEXT:    s_mov_b32 s10, s12
-; GCN-NEXT:    s_mov_b32 s11, s13
-; GCN-NEXT:    image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
-; GCN-LABEL: sample_c_l_o_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b32 s0, s2
-; GCN-NEXT:    s_mov_b32 s1, s3
-; GCN-NEXT:    s_mov_b32 s2, s4
-; GCN-NEXT:    s_mov_b32 s3, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s8
-; GCN-NEXT:    s_mov_b32 s7, s9
-; GCN-NEXT:    s_mov_b32 s8, s10
-; GCN-NEXT:    s_mov_b32 s9, s11
-; GCN-NEXT:    s_mov_b32 s10, s12
-; GCN-NEXT:    s_mov_b32 s11, s13
-; GCN-NEXT:    image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
-; GCN-LABEL: gather4_l_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b32 s0, s2
-; GCN-NEXT:    s_mov_b32 s1, s3
-; GCN-NEXT:    s_mov_b32 s2, s4
-; GCN-NEXT:    s_mov_b32 s3, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s8
-; GCN-NEXT:    s_mov_b32 s7, s9
-; GCN-NEXT:    s_mov_b32 s8, s10
-; GCN-NEXT:    s_mov_b32 s9, s11
-; GCN-NEXT:    s_mov_b32 s10, s12
-; GCN-NEXT:    s_mov_b32 s11, s13
-; GCN-NEXT:    image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
-; GCN-LABEL: gather4_c_l_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b32 s0, s2
-; GCN-NEXT:    s_mov_b32 s1, s3
-; GCN-NEXT:    s_mov_b32 s2, s4
-; GCN-NEXT:    s_mov_b32 s3, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s8
-; GCN-NEXT:    s_mov_b32 s7, s9
-; GCN-NEXT:    s_mov_b32 s8, s10
-; GCN-NEXT:    s_mov_b32 s9, s11
-; GCN-NEXT:    s_mov_b32 s10, s12
-; GCN-NEXT:    s_mov_b32 s11, s13
-; GCN-NEXT:    image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
-; GCN-LABEL: gather4_l_o_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b32 s0, s2
-; GCN-NEXT:    s_mov_b32 s1, s3
-; GCN-NEXT:    s_mov_b32 s2, s4
-; GCN-NEXT:    s_mov_b32 s3, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s8
-; GCN-NEXT:    s_mov_b32 s7, s9
-; GCN-NEXT:    s_mov_b32 s8, s10
-; GCN-NEXT:    s_mov_b32 s9, s11
-; GCN-NEXT:    s_mov_b32 s10, s12
-; GCN-NEXT:    s_mov_b32 s11, s13
-; GCN-NEXT:    image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
-; GCN-LABEL: gather4_c_l_o_2d:
-; GCN:       ; %bb.0: ; %main_body
-; GCN-NEXT:    s_mov_b32 s0, s2
-; GCN-NEXT:    s_mov_b32 s1, s3
-; GCN-NEXT:    s_mov_b32 s2, s4
-; GCN-NEXT:    s_mov_b32 s3, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s8
-; GCN-NEXT:    s_mov_b32 s7, s9
-; GCN-NEXT:    s_mov_b32 s8, s10
-; GCN-NEXT:    s_mov_b32 s9, s11
-; GCN-NEXT:    s_mov_b32 s10, s12
-; GCN-NEXT:    s_mov_b32 s11, s13
-; GCN-NEXT:    image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    ; return to shader part epilog
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.000000e+00, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
-
-attributes #0 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll
index 8fab3d392c98f..763ead034f612 100644
--- a/llvm/test/CodeGen/AMDGPU/cluster_stores.ll
+++ b/llvm/test/CodeGen/AMDGPU/cluster_stores.ll
@@ -269,10 +269,10 @@ define amdgpu_ps void @cluster_image_load(<8 x i32> inreg %src, <8 x i32> inreg
 entry:
   %x1 = add i32 %x, 1
   %y1 = add i32 %y, 1
-  %val1 = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %x1, i32 %y1, i32 0, <8 x i32> %src, i32 0, i32 0)
+  %val1 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %x1, i32 %y1, <8 x i32> %src, i32 0, i32 0)
   %x2 = add i32 %x, 2
   %y2 = add i32 %y, 2
-  %val2 = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %x2, i32 %y2, i32 0, <8 x i32> %src, i32 0, i32 0)
+  %val2 = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %x2, i32 %y2, <8 x i32> %src, i32 0, i32 0)
   %val = fadd fast <4 x float> %val1, %val2
   call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %val, i32 15, i32 %x, i32 %y, <8 x i32> %dst, i32 0, i32 0)
   ret void
@@ -286,20 +286,22 @@ entry:
 define amdgpu_ps void @no_cluster_image_load(<8 x i32> inreg %src1, <8 x i32> inreg %src2, <8 x i32> inreg %dst, i32 %x, i32 %y) {
 ; GFX9-LABEL: no_cluster_image_load:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    image_load v[2:5], v[0:1], s[0:7] dmask:0xf unorm
-; GFX9-NEXT:    image_load v[6:9], v[0:1], s[8:15] dmask:0xf unorm
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    image_load_mip v[3:6], v[0:2], s[0:7] dmask:0xf unorm
+; GFX9-NEXT:    image_load_mip v[7:10], v[0:2], s[8:15] dmask:0xf unorm
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_add_f32_e32 v6, v6, v10
 ; GFX9-NEXT:    v_add_f32_e32 v5, v5, v9
 ; GFX9-NEXT:    v_add_f32_e32 v4, v4, v8
 ; GFX9-NEXT:    v_add_f32_e32 v3, v3, v7
-; GFX9-NEXT:    v_add_f32_e32 v2, v2, v6
-; GFX9-NEXT:    image_store v[2:5], v[0:1], s[16:23] dmask:0xf unorm
+; GFX9-NEXT:    image_store v[3:6], v[0:1], s[16:23] dmask:0xf unorm
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: no_cluster_image_load:
 ; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    image_load v[2:5], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
-; GFX10-NEXT:    image_load v[6:9], v[0:1], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
+; GFX10-NEXT:    v_mov_b32_e32 v10, 0
+; GFX10-NEXT:    image_load_mip v[2:5], [v0, v1, v10], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
+; GFX10-NEXT:    image_load_mip v[6:9], [v0, v1, v10], s[8:15] dmask:0xf dim:SQ_RSRC_IMG_2D unorm
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_add_f32_e32 v5, v5, v9
 ; GFX10-NEXT:    v_add_f32_e32 v4, v4, v8
@@ -389,6 +391,7 @@ entry:
   ret void
 }
 
+declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
 declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
 declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
 declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg)
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
index 661b1ac056e8f..67c11cc5beaef 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
@@ -5,6 +5,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
 ; GCN-LABEL: _amdgpu_ps_main:
 ; GCN:       ; %bb.0: ; %.entry
 ; GCN-NEXT:    s_mov_b32 s0, 0
+; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    s_mov_b32 s1, s0
 ; GCN-NEXT:    s_mov_b32 s2, s0
 ; GCN-NEXT:    s_mov_b32 s3, s0
@@ -14,10 +15,11 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
 ; GCN-NEXT:    s_mov_b32 s7, s0
 ; GCN-NEXT:    image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    s_clause 0x2
+; GCN-NEXT:    s_clause 0x1
 ; GCN-NEXT:    image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D
 ; GCN-NEXT:    image_sample v3, v[0:1], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D
-; GCN-NEXT:    image_load v4, v[0:1], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D unorm
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    image_load_mip v4, v[2:4], s[0:7] dmask:0x4 dim:SQ_RSRC_IMG_2D unorm
 ; GCN-NEXT:    s_clause 0x3
 ; GCN-NEXT:    s_buffer_load_dword s24, s[0:3], 0x5c
 ; GCN-NEXT:    s_buffer_load_dword s28, s[0:3], 0x7c
@@ -44,33 +46,31 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
 ; GCN-NEXT:    v_sub_f32_e32 v8, s0, v1
 ; GCN-NEXT:    v_fma_f32 v7, -s2, v6, s6
 ; GCN-NEXT:    v_fma_f32 v5, v6, v5, 1.0
+; GCN-NEXT:    v_mad_f32 v10, s2, v6, v2
 ; GCN-NEXT:    s_mov_b32 s0, 0x3c23d70a
 ; GCN-NEXT:    v_fmac_f32_e32 v1, v6, v8
+; GCN-NEXT:    v_mac_f32_e32 v10, v7, v6
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_mul_f32_e32 v9, s10, v0
 ; GCN-NEXT:    v_fma_f32 v0, -v0, s10, s14
-; GCN-NEXT:    v_fmac_f32_e32 v9, v0, v6
-; GCN-NEXT:    v_sub_f32_e32 v0, v1, v5
-; GCN-NEXT:    v_fmac_f32_e32 v5, v0, v6
-; GCN-NEXT:    s_waitcnt vmcnt(2)
-; GCN-NEXT:    v_mad_f32 v10, s2, v6, v2
 ; GCN-NEXT:    v_mul_f32_e32 v8, s18, v2
-; GCN-NEXT:    s_waitcnt vmcnt(1)
 ; GCN-NEXT:    v_mul_f32_e32 v3, s22, v3
-; GCN-NEXT:    v_mac_f32_e32 v10, v7, v6
+; GCN-NEXT:    v_fmac_f32_e32 v9, v0, v6
+; GCN-NEXT:    v_sub_f32_e32 v0, v1, v5
 ; GCN-NEXT:    v_mul_f32_e32 v1, v8, v6
 ; GCN-NEXT:    v_mul_f32_e32 v7, v6, v3
 ; GCN-NEXT:    v_fma_f32 v3, -v6, v3, v9
-; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_add_f32_e32 v4, v4, v10
+; GCN-NEXT:    v_fmac_f32_e32 v5, v0, v6
 ; GCN-NEXT:    v_fma_f32 v0, v2, s26, -v1
 ; GCN-NEXT:    v_fmac_f32_e32 v7, v3, v6
-; GCN-NEXT:    v_mul_f32_e32 v3, v4, v6
-; GCN-NEXT:    v_fma_f32 v4, v5, s0, 0x3ca3d70a
 ; GCN-NEXT:    v_fmac_f32_e32 v1, v0, v6
 ; GCN-NEXT:    v_mul_f32_e32 v0, v2, v6
-; GCN-NEXT:    v_mul_f32_e32 v2, v7, v4
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    v_add_f32_e32 v4, v4, v10
+; GCN-NEXT:    v_mul_f32_e32 v3, v4, v6
+; GCN-NEXT:    v_fma_f32 v4, v5, s0, 0x3ca3d70a
 ; GCN-NEXT:    v_mul_f32_e32 v1, v3, v1
+; GCN-NEXT:    v_mul_f32_e32 v2, v7, v4
 ; GCN-NEXT:    v_fmac_f32_e32 v1, v2, v0
 ; GCN-NEXT:    v_max_f32_e32 v0, 0, v1
 ; GCN-NEXT:    ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll b/llvm/test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll
deleted file mode 100644
index 7e3270fa288ca..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/image_ls_mipmap_zero.ll
+++ /dev/null
@@ -1,132 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
-
-
-; GCN-LABEL: {{^}}load_mip_1d:
-; GCN-NOT: image_load_mip
-; GCN: image_load
-define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}load_mip_2d:
-; GCN-NOT: image_load_mip
-; GCN: image_load
-define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}load_mip_3d:
-; GCN-NOT: image_load_mip
-; GCN: image_load
-define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}load_mip_1darray:
-; GCN-NOT: image_load_mip
-; GCN: image_load
-define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}load_mip_2darray:
-; GCN-NOT: image_load_mip
-; GCN: image_load
-define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}load_mip_cube:
-; GCN-NOT: image_load_mip
-; GCN: image_load
-define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %u) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-
-
-; GCN-LABEL: {{^}}store_mip_1d:
-; GCN-NOT: image_store_mip
-; GCN: image_store
-define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) {
-main_body:
-  call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-; GCN-LABEL: {{^}}store_mip_2d:
-; GCN-NOT: image_store_mip
-; GCN: image_store
-define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
-main_body:
-  call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-; GCN-LABEL: {{^}}store_mip_3d:
-; GCN-NOT: image_store_mip
-; GCN: image_store
-define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
-main_body:
-  call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-; GCN-LABEL: {{^}}store_mip_1darray:
-; GCN-NOT: image_store_mip
-; GCN: image_store
-define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) {
-main_body:
-  call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-; GCN-LABEL: {{^}}store_mip_2darray:
-; GCN-NOT: image_store_mip
-; GCN: image_store
-define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
-main_body:
-  call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-; GCN-LABEL: {{^}}store_mip_cube:
-; GCN-NOT: image_store_mip
-; GCN: image_store
-define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %u) {
-main_body:
-  call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %u, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
-  ret void
-}
-
-declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1
-
-
-declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll
deleted file mode 100644
index 330e2f4ce1496..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.ltolz.ll
+++ /dev/null
@@ -1,113 +0,0 @@
-; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
-
-
-; GCN-LABEL: {{^}}sample_l_1d:
-; GCN: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf{{$}}
-define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}sample_l_2d:
-; GCN: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
-define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float -0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}sample_c_l_1d:
-; GCN: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
-define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float -2.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}sample_c_l_2d:
-; GCN: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
-define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}sample_l_o_1d:
-; GCN: image_sample_lz_o v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
-define amdgpu_ps <4 x float> @sample_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %lod) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}sample_l_o_2d:
-; GCN: image_sample_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
-define amdgpu_ps <4 x float> @sample_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}sample_c_l_o_1d:
-; GCN: image_sample_c_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
-define amdgpu_ps <4 x float> @sample_c_l_o_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %lod) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}sample_c_l_o_2d:
-; GCN: image_sample_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-define amdgpu_ps <4 x float> @sample_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}gather4_l_2d:
-; GCN: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf{{$}}
-define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 15, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}gather4_c_l_2d:
-; GCN: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
-define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}gather4_l_o_2d:
-; GCN: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf{{$}}
-define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-; GCN-LABEL: {{^}}gather4_c_l_o_2d:
-; GCN: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf{{$}}
-define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
-main_body:
-  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 15, i32 %offset, float %zcompare, float %s, float %t, float 0.0, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
-  ret <4 x float> %v
-}
-
-declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-
-declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
-declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
index 84cec8366259d..f212b46432ffb 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -1235,6 +1235,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
 ; SI:       ; %bb.0: ; %.entry
 ; SI-NEXT:    s_mov_b32 s4, 0
 ; SI-NEXT:    s_mov_b64 s[0:1], exec
+; SI-NEXT:    v_mov_b32_e32 v4, 0
 ; SI-NEXT:    v_mov_b32_e32 v2, v1
 ; SI-NEXT:    v_mov_b32_e32 v3, v1
 ; SI-NEXT:    s_mov_b32 s5, s4
@@ -1244,7 +1245,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
 ; SI-NEXT:    s_mov_b32 s9, s4
 ; SI-NEXT:    s_mov_b32 s10, s4
 ; SI-NEXT:    s_mov_b32 s11, s4
-; SI-NEXT:    image_sample_lz v1, v[1:3], s[4:11], s[0:3] dmask:0x1 da
+; SI-NEXT:    image_sample_l v1, v[1:4], s[4:11], s[0:3] dmask:0x1 da
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
 ; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
@@ -1274,6 +1275,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
 ;
 ; GFX10-WAVE64-LABEL: cbranch_kill:
 ; GFX10-WAVE64:       ; %bb.0: ; %.entry
+; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX10-WAVE64-NEXT:    s_mov_b32 s4, 0
 ; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
 ; GFX10-WAVE64-NEXT:    s_mov_b32 s5, s4
@@ -1283,7 +1285,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
 ; GFX10-WAVE64-NEXT:    s_mov_b32 s9, s4
 ; GFX10-WAVE64-NEXT:    s_mov_b32 s10, s4
 ; GFX10-WAVE64-NEXT:    s_mov_b32 s11, s4
-; GFX10-WAVE64-NEXT:    image_sample_lz v1, [v1, v1, v1], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10-WAVE64-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
 ; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-WAVE64-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
 ; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
@@ -1313,6 +1315,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
 ;
 ; GFX10-WAVE32-LABEL: cbranch_kill:
 ; GFX10-WAVE32:       ; %bb.0: ; %.entry
+; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX10-WAVE32-NEXT:    s_mov_b32 s4, 0
 ; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
 ; GFX10-WAVE32-NEXT:    s_mov_b32 s5, s4
@@ -1322,7 +1325,7 @@ define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
 ; GFX10-WAVE32-NEXT:    s_mov_b32 s9, s4
 ; GFX10-WAVE32-NEXT:    s_mov_b32 s10, s4
 ; GFX10-WAVE32-NEXT:    s_mov_b32 s11, s4
-; GFX10-WAVE32-NEXT:    image_sample_lz v1, [v1, v1, v1], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
+; GFX10-WAVE32-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
 ; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-WAVE32-NEXT:    v_cmp_ge_f32_e32 vcc_lo, 0, v1
 ; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo

From f53d359816e66a107195e1e4b581e2a33bbafaa4 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Fri, 21 Jan 2022 12:12:16 +0100
Subject: [PATCH 157/946] Fix 1f9e18b6565fd1bb69c4b649b9efd3467b3c7c7d

Part 2
---
 llvm/include/llvm/Demangle/Utility.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index 989b41701e4c9..587c0e4bec36d 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -60,7 +60,7 @@ class OutputBuffer {
     // Add negative sign...
     if (isNeg)
       *--TempPtr = '-';
-    this->operator<<(StringView(TempPtr, Temp.end()));
+    this->operator<<(StringView(TempPtr, Temp.data() + Temp.size()));
   }
 
 public:

From 9c5b856dac5c0ccbe755410b826f683ef01d7f08 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 21 Jan 2022 12:20:23 +0100
Subject: [PATCH 158/946] [CoroSplit] Avoid pointer element type accesses

Use isOpaqueOrPointeeTypeMatches() for the assertions instead.
---
 llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 2e4661fa1d0a1..b5129809c6a6a 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -618,7 +618,8 @@ static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
   Value *CachedSlot = nullptr;
   auto getSwiftErrorSlot = [&](Type *ValueTy) -> Value * {
     if (CachedSlot) {
-      assert(CachedSlot->getType()->getPointerElementType() == ValueTy &&
+      assert(cast<PointerType>(CachedSlot->getType())
+                 ->isOpaqueOrPointeeTypeMatches(ValueTy) &&
              "multiple swifterror slots in function with different types");
       return CachedSlot;
     }
@@ -627,7 +628,8 @@ static void replaceSwiftErrorOps(Function &F, coro::Shape &Shape,
     for (auto &Arg : F.args()) {
       if (Arg.isSwiftError()) {
         CachedSlot = &Arg;
-        assert(Arg.getType()->getPointerElementType() == ValueTy &&
+        assert(cast<PointerType>(Arg.getType())
+                   ->isOpaqueOrPointeeTypeMatches(ValueTy) &&
                "swifterror argument does not have expected type");
         return &Arg;
       }

From 0ca426d6ac65b84c70ac7fd9511628ce5115423e Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 21 Jan 2022 11:22:36 +0000
Subject: [PATCH 159/946] [llvm-mca] Improve barriers for strict region marking
 (PR52198)

As suggested on the bug, to help (but not completely....) stop folded instructions crossing the inline asm barriers used for llvm-mca analysis, we should recommend tagging with memory captures/attributes.

Differential Revision: https://reviews.llvm.org/D117788
---
 llvm/docs/CommandGuide/llvm-mca.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst
index b08f088762799..fdb45783ea663 100644
--- a/llvm/docs/CommandGuide/llvm-mca.rst
+++ b/llvm/docs/CommandGuide/llvm-mca.rst
@@ -299,9 +299,9 @@ C++. As a workaround, inline assembly directives may be used:
 .. code-block:: c++
 
   int foo(int a, int b) {
-    __asm volatile("# LLVM-MCA-BEGIN foo");
+    __asm volatile("# LLVM-MCA-BEGIN foo":::"memory");
     a += 42;
-    __asm volatile("# LLVM-MCA-END");
+    __asm volatile("# LLVM-MCA-END":::"memory");
     a *= b;
     return a;
   }

From bfbdb5e43e50484e179122bfb66cff8165e4b084 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 21 Jan 2022 12:34:35 +0100
Subject: [PATCH 160/946] [Coroutines] Avoid some pointer element type accesses

These are just verifying that pointer types are correct, which is
no longer relevant under opaque pointers.
---
 llvm/lib/Transforms/Coroutines/Coroutines.cpp | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index 1078aac257d72..a8123aee319ef 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -676,6 +676,9 @@ static void checkAsyncFuncPointer(const Instruction *I, Value *V) {
   if (!AsyncFuncPtrAddr)
     fail(I, "llvm.coro.id.async async function pointer not a global", V);
 
+  if (AsyncFuncPtrAddr->getType()->isOpaquePointerTy())
+    return;
+
   auto *StructTy =
       cast<StructType>(AsyncFuncPtrAddr->getType()->getPointerElementType());
   if (StructTy->isOpaque() || !StructTy->isPacked() ||
@@ -701,14 +704,16 @@ void CoroIdAsyncInst::checkWellFormed() const {
 static void checkAsyncContextProjectFunction(const Instruction *I,
                                              Function *F) {
   auto *FunTy = cast<FunctionType>(F->getValueType());
-  if (!FunTy->getReturnType()->isPointerTy() ||
-      !FunTy->getReturnType()->getPointerElementType()->isIntegerTy(8))
+  Type *Int8Ty = Type::getInt8Ty(F->getContext());
+  auto *RetPtrTy = dyn_cast<PointerType>(FunTy->getReturnType());
+  if (!RetPtrTy || !RetPtrTy->isOpaqueOrPointeeTypeMatches(Int8Ty))
     fail(I,
          "llvm.coro.suspend.async resume function projection function must "
          "return an i8* type",
          F);
   if (FunTy->getNumParams() != 1 || !FunTy->getParamType(0)->isPointerTy() ||
-      !FunTy->getParamType(0)->getPointerElementType()->isIntegerTy(8))
+      !cast<PointerType>(FunTy->getParamType(0))
+           ->isOpaqueOrPointeeTypeMatches(Int8Ty))
     fail(I,
          "llvm.coro.suspend.async resume function projection function must "
          "take one i8* type as parameter",
@@ -723,8 +728,7 @@ void CoroAsyncEndInst::checkWellFormed() const {
   auto *MustTailCallFunc = getMustTailCallFunction();
   if (!MustTailCallFunc)
     return;
-  auto *FnTy =
-      cast<FunctionType>(MustTailCallFunc->getType()->getPointerElementType());
+  auto *FnTy = MustTailCallFunc->getFunctionType();
   if (FnTy->getNumParams() != (arg_size() - 3))
     fail(this,
          "llvm.coro.end.async must tail call function argument type must "

From 597eae998a874a872b67d1a22a04d7c45d2ef94b Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Thu, 20 Jan 2022 17:22:09 +0100
Subject: [PATCH 161/946] [clangd][Background] Make index validation logs
 verbose

These errors are non-harmful and should be transient. They either
imply:
- compilation database returned stale results for TUs and it'll be fixed once
  it's updated to match project state.
- a TUs dependencies has changed and some headers no longer exist. this should
  be fixed with the next indexing cycle.

In either case the user will have some stale symbols in their index until clangd
restarts and the underlying issue is resolved. On the downside these logs are
confusing users when there's another issue.

Differential Revision: https://reviews.llvm.org/D117792
---
 clang-tools-extra/clangd/index/Background.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp
index ddfe962d31890..a6ee1d980e049 100644
--- a/clang-tools-extra/clangd/index/Background.cpp
+++ b/clang-tools-extra/clangd/index/Background.cpp
@@ -82,7 +82,7 @@ llvm::SmallString<128> getAbsolutePath(const tooling::CompileCommand &Cmd) {
 bool shardIsStale(const LoadedShard &LS, llvm::vfs::FileSystem *FS) {
   auto Buf = FS->getBufferForFile(LS.AbsolutePath);
   if (!Buf) {
-    elog("Background-index: Couldn't read {0} to validate stored index: {1}",
+    vlog("Background-index: Couldn't read {0} to validate stored index: {1}",
          LS.AbsolutePath, Buf.getError().message());
     // There is no point in indexing an unreadable file.
     return false;

From b6a41fddcfd375ce30487ef87ca2cd65a6be0bcc Mon Sep 17 00:00:00 2001
From: OCHyams <orlando.hyams@sony.com>
Date: Fri, 21 Jan 2022 10:54:53 +0000
Subject: [PATCH 162/946] [DWARF][DebugInfo] Fix off-by-one error in size of
 DW_TAG_base_type types

Fix PR53163 by rounding the byte size of DW_TAG_base_type types up. Without
this fix we risk emitting types with a truncated size (including rounding
less-than-byte-sized types' sizes down to zero).

Reviewed By: probinson

Differential Revision: https://reviews.llvm.org/D117124
---
 .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp   |  3 +-
 llvm/test/DebugInfo/X86/base-type-size.ll     | 50 +++++++++++++++++++
 2 files changed, 52 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/DebugInfo/X86/base-type-size.ll

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 3ab73d128aed1..ab3c9f486670e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1581,7 +1581,8 @@ void DwarfCompileUnit::createBaseTypeDIEs() {
               Twine(dwarf::AttributeEncodingString(Btr.Encoding) +
                     "_" + Twine(Btr.BitSize)).toStringRef(Str));
     addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding);
-    addUInt(Die, dwarf::DW_AT_byte_size, None, Btr.BitSize / 8);
+    // Round up to smallest number of bytes that contains this number of bits.
+    addUInt(Die, dwarf::DW_AT_byte_size, None, divideCeil(Btr.BitSize, 8));
 
     Btr.Die = &Die;
   }
diff --git a/llvm/test/DebugInfo/X86/base-type-size.ll b/llvm/test/DebugInfo/X86/base-type-size.ll
new file mode 100644
index 0000000000000..3a8dc37bdc65f
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/base-type-size.ll
@@ -0,0 +1,50 @@
+; RUN: llc %s --filetype=obj -o - | llvm-dwarfdump - -o - | FileCheck %s
+
+;; cat test.cpp
+;; void ext(bool);
+;; void fun(bool b) { ext(b); }
+;; $ clang++ test.cpp -o - -emit-llvm -S -O2 -gdwarf-5
+;;
+;; Check that the DW_TAG_base_type DIE for the 1u conversion in the DIExpression
+;; has a non-zero DW_AT_byte_size attribute.
+
+; CHECK: DW_TAG_base_type
+; CHECK-NEXT: DW_AT_name      ("DW_ATE_unsigned_1")
+; CHECK-NEXT: DW_AT_encoding  (DW_ATE_unsigned)
+; CHECK-NEXT: DW_AT_byte_size (0x01)
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local void @_Z3funb(i1 zeroext %b) local_unnamed_addr #0 !dbg !7 {
+entry:
+  call void @llvm.dbg.value(metadata i1 %b, metadata !12, metadata !DIExpression(DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !13
+  tail call void @_Z3extb(i1 zeroext %b), !dbg !14
+  ret void, !dbg !15
+}
+
+declare !dbg !16 dso_local void @_Z3extb(i1 zeroext) local_unnamed_addr
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 14.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "test.cpp", directory: "/")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 7, !"uwtable", i32 1}
+!6 = !{!"clang version 14.0.0"}
+!7 = distinct !DISubprogram(name: "fun", linkageName: "_Z3funb", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11)
+!8 = !DISubroutineType(types: !9)
+!9 = !{null, !10}
+!10 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean)
+!11 = !{!12}
+!12 = !DILocalVariable(name: "b", arg: 1, scope: !7, file: !1, line: 2, type: !10)
+!13 = !DILocation(line: 0, scope: !7)
+!14 = !DILocation(line: 2, column: 20, scope: !7)
+!15 = !DILocation(line: 2, column: 28, scope: !7)
+!16 = !DISubprogram(name: "ext", linkageName: "_Z3extb", scope: !1, file: !1, line: 1, type: !8, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !17)
+!17 = !{}

From 357f2d9ccf204981fd90481ba5ebb942ea46d7f9 Mon Sep 17 00:00:00 2001
From: Siddharth Bhat <siddu.druid@gmail.com>
Date: Fri, 21 Jan 2022 17:02:39 +0530
Subject: [PATCH 163/946] [mlir][LangRef] Add top-level production to the MLIR
 grammar

The LangRef currently lacks a top-level production, leaving the productions attribute-alias-def and type-alias-defunused. Clarify the situation by declaring what is to be parsed by an MLIR parser at the toplevel.

Reviewed By: mehdi_amini

Differential Revision: https://reviews.llvm.org/D117668
---
 mlir/docs/LangRef.md | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/mlir/docs/LangRef.md b/mlir/docs/LangRef.md
index 357778a54f087..4b956c581cb55 100644
--- a/mlir/docs/LangRef.md
+++ b/mlir/docs/LangRef.md
@@ -179,6 +179,19 @@ string-literal  ::= `"` [^"\n\f\v\r]* `"`   TODO: define escaping rules
 Not listed here, but MLIR does support comments. They use standard BCPL syntax,
 starting with a `//` and going until the end of the line.
 
+
+### Top level Productions
+
+```
+// Top level production
+toplevel := (operation | attribute-alias-def | type-alias-def)*
+```
+
+The production `toplevel` is the top level production that is parsed by any parsing
+consuming the MLIR syntax. [Operations](#operations),
+[Attribute alises](#attribute-value-aliases), and [Type aliases](#type-aliases)
+can be declared on the toplevel.
+
 ### Identifiers and keywords
 
 Syntax:

From 4d268dc94a6bef0221c94d4b7e4c2b112e75fe1b Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser@codeplay.com>
Date: Wed, 19 Jan 2022 17:49:33 +0000
Subject: [PATCH 164/946] [RISCV] Enable CGP to sink splat operands of VP
 intrinsics

This patch brings better splat-matching to our VP support, by sinking
splat operands of VP intrinsics back into the same block as the VP
operation. The list of VP intrinsics we are interested in matches that
of the regular instructions.

Some optimization is still lacking. For instance, our VL nodes aren't
recognized as commutative, so splats must be on the RHS. Because of
this, we limit our sinking of splats to just the RHS operand for now.
Improvement in this regard can come in another patch.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117703
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  24 +
 .../CodeGen/RISCV/rvv/sink-splat-operands.ll  | 771 ++++++++++++++++++
 2 files changed, 795 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 507a21b16e4eb..4aba42b014d17 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1260,6 +1260,30 @@ bool RISCVTargetLowering::shouldSinkOperands(
         switch (II->getIntrinsicID()) {
         case Intrinsic::fma:
           return Operand == 0 || Operand == 1;
+        // FIXME: Our patterns can only match vx/vf instructions when the splat
+        // it on the RHS, because TableGen doesn't recognize our VP operations
+        // as commutative.
+        case Intrinsic::vp_add:
+        case Intrinsic::vp_mul:
+        case Intrinsic::vp_and:
+        case Intrinsic::vp_or:
+        case Intrinsic::vp_xor:
+        case Intrinsic::vp_fadd:
+        case Intrinsic::vp_fsub:
+        case Intrinsic::vp_fmul:
+        case Intrinsic::vp_fdiv:
+        case Intrinsic::vp_shl:
+        case Intrinsic::vp_lshr:
+        case Intrinsic::vp_ashr:
+        case Intrinsic::vp_udiv:
+        case Intrinsic::vp_sdiv:
+        case Intrinsic::vp_urem:
+        case Intrinsic::vp_srem:
+          return Operand == 1;
+        // ... the one exception is vp.sub which has explicit patterns for both
+        // LHS and RHS (as vrsub).
+        case Intrinsic::vp_sub:
+          return Operand == 0 || Operand == 1;
         default:
           return false;
         }
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index 2acb185d59ead..f6a348418f5b2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -2961,3 +2961,774 @@ for.body:                                         ; preds = %for.body.preheader,
   %cmp.not = icmp eq i64 %indvars.iv.next, 1024
   br i1 %cmp.not, label %for.cond.cleanup, label %for.body
 }
+
+declare <4 x i32> @llvm.vp.mul.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define void @sink_splat_vp_mul(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_mul:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:  .LBB46_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vmul.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a3, a3, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a3, .LBB46_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i32, i32* %a, i64 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+  %2 = call <4 x i32> @llvm.vp.mul.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast i32* %0 to <4 x i32>*
+  store <4 x i32> %2, <4 x i32>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x i32> @llvm.vp.add.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define void @sink_splat_vp_add(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_add:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:  .LBB47_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vadd.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a3, a3, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a3, .LBB47_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i32, i32* %a, i64 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+  %2 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast i32* %0 to <4 x i32>*
+  store <4 x i32> %2, <4 x i32>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+; FIXME: This doesn't match against vadd.vx because our patterns aren't
+; commutative.
+
+define void @sink_splat_vp_add_commute(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_add_commute:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:  .LBB48_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vadd.vv v9, v8, v9, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v9, (a0)
+; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a1, .LBB48_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i32, i32* %a, i64 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+  %2 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
+  %3 = bitcast i32* %0 to <4 x i32>*
+  store <4 x i32> %2, <4 x i32>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x i32> @llvm.vp.sub.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define void @sink_splat_vp_sub(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_sub:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:  .LBB49_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vsub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a3, a3, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a3, .LBB49_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i32, i32* %a, i64 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+  %2 = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast i32* %0 to <4 x i32>*
+  store <4 x i32> %2, <4 x i32>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+define void @sink_splat_vp_rsub(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_rsub:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:  .LBB50_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vrsub.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a3, a3, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a3, .LBB50_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i32, i32* %a, i64 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+  %2 = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
+  %3 = bitcast i32* %0 to <4 x i32>*
+  store <4 x i32> %2, <4 x i32>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x i32> @llvm.vp.shl.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define void @sink_splat_vp_shl(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_shl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:  .LBB51_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vsll.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a3, a3, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a3, .LBB51_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i32, i32* %a, i64 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+  %2 = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast i32* %0 to <4 x i32>*
+  store <4 x i32> %2, <4 x i32>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define void @sink_splat_vp_lshr(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_lshr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:  .LBB52_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vsrl.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a3, a3, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a3, .LBB52_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i32, i32* %a, i64 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+  %2 = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast i32* %0 to <4 x i32>*
+  store <4 x i32> %2, <4 x i32>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define void @sink_splat_vp_ashr(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_ashr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:  .LBB53_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vsra.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a3, a3, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a3, .LBB53_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i32, i32* %a, i64 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+  %2 = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast i32* %0 to <4 x i32>*
+  store <4 x i32> %2, <4 x i32>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x float> @llvm.vp.fmul.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
+
+define void @sink_splat_vp_fmul(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_fmul:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fmv.w.x ft0, a1
+; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:  .LBB54_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a1, .LBB54_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
+  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds float, float* %a, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
+  %2 = call <4 x float> @llvm.vp.fmul.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast float* %0 to <4 x float>*
+  store <4 x float> %2, <4 x float>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x float> @llvm.vp.fdiv.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
+
+define void @sink_splat_vp_fdiv(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_fdiv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fmv.w.x ft0, a1
+; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:  .LBB55_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a1, .LBB55_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
+  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds float, float* %a, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
+  %2 = call <4 x float> @llvm.vp.fdiv.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast float* %0 to <4 x float>*
+  store <4 x float> %2, <4 x float>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+; FIXME: vfrdiv.vf doesn't match against masked instructions
+
+define void @sink_splat_vp_frdiv(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_frdiv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fmv.w.x ft0, a1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vfmv.v.f v8, ft0
+; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:  .LBB56_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vv v9, v8, v9, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v9, (a0)
+; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a1, .LBB56_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
+  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds float, float* %a, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
+  %2 = call <4 x float> @llvm.vp.fdiv.v4i32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x i1> %m, i32 %vl)
+  %3 = bitcast float* %0 to <4 x float>*
+  store <4 x float> %2, <4 x float>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x float> @llvm.vp.fadd.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
+
+define void @sink_splat_vp_fadd(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_fadd:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fmv.w.x ft0, a1
+; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:  .LBB57_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a1, .LBB57_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
+  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds float, float* %a, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
+  %2 = call <4 x float> @llvm.vp.fadd.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast float* %0 to <4 x float>*
+  store <4 x float> %2, <4 x float>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x float> @llvm.vp.fsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
+
+define void @sink_splat_vp_fsub(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_fsub:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fmv.w.x ft0, a1
+; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:  .LBB58_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a1, .LBB58_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
+  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds float, float* %a, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
+  %2 = call <4 x float> @llvm.vp.fsub.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast float* %0 to <4 x float>*
+  store <4 x float> %2, <4 x float>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x float> @llvm.vp.frsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
+
+; FIXME: vfrsub.vf doesn't match against masked instructions
+
+define void @sink_splat_vp_frsub(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_frsub:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    fmv.w.x ft0, a1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vfmv.v.f v8, ft0
+; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:  .LBB59_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vfsub.vv v9, v8, v9, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v9, (a0)
+; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a1, .LBB59_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
+  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds float, float* %a, i64 %index
+  %1 = bitcast float* %0 to <4 x float>*
+  %wide.load = load <4 x float>, <4 x float>* %1, align 4
+  %2 = call <4 x float> @llvm.vp.fsub.v4i32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x i1> %m, i32 %vl)
+  %3 = bitcast float* %0 to <4 x float>*
+  store <4 x float> %2, <4 x float>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define void @sink_splat_vp_udiv(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_udiv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:  .LBB60_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vdivu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a3, a3, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a3, .LBB60_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i32, i32* %a, i64 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+  %2 = call <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast i32* %0 to <4 x i32>*
+  store <4 x i32> %2, <4 x i32>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define void @sink_splat_vp_sdiv(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_sdiv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:  .LBB61_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vdiv.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a3, a3, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a3, .LBB61_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i32, i32* %a, i64 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+  %2 = call <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast i32* %0 to <4 x i32>*
+  store <4 x i32> %2, <4 x i32>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x i32> @llvm.vp.urem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define void @sink_splat_vp_urem(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_urem:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:  .LBB62_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vremu.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a3, a3, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a3, .LBB62_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i32, i32* %a, i64 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+  %2 = call <4 x i32> @llvm.vp.urem.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast i32* %0 to <4 x i32>*
+  store <4 x i32> %2, <4 x i32>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+declare <4 x i32> @llvm.vp.srem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
+
+define void @sink_splat_vp_srem(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_srem:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    li a3, 1024
+; CHECK-NEXT:  .LBB63_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vrem.vx v8, v8, a1, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v8, (a0)
+; CHECK-NEXT:    addi a3, a3, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a3, .LBB63_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i32, i32* %a, i64 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+  %2 = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
+  %3 = bitcast i32* %0 to <4 x i32>*
+  store <4 x i32> %2, <4 x i32>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}
+
+; Check that we don't sink a splat operand that has no chance of being folded.
+
+define void @sink_splat_vp_srem_commute(i32* nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: sink_splat_vp_srem_commute:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vmv.v.x v8, a1
+; CHECK-NEXT:    li a1, 1024
+; CHECK-NEXT:  .LBB64_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
+; CHECK-NEXT:    vrem.vv v9, v8, v9, v0.t
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vse32.v v9, (a0)
+; CHECK-NEXT:    addi a1, a1, -4
+; CHECK-NEXT:    addi a0, a0, 16
+; CHECK-NEXT:    bnez a1, .LBB64_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    ret
+entry:
+  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds i32, i32* %a, i64 %index
+  %1 = bitcast i32* %0 to <4 x i32>*
+  %wide.load = load <4 x i32>, <4 x i32>* %1, align 4
+  %2 = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
+  %3 = bitcast i32* %0 to <4 x i32>*
+  store <4 x i32> %2, <4 x i32>* %3, align 4
+  %index.next = add nuw i64 %index, 4
+  %4 = icmp eq i64 %index.next, 1024
+  br i1 %4, label %for.cond.cleanup, label %vector.body
+
+for.cond.cleanup:                                 ; preds = %vector.body
+  ret void
+}

From 825a3cd6b6972b6a50b80bed7d951d7ea7f90669 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Thu, 20 Jan 2022 14:11:15 +0100
Subject: [PATCH 165/946] [clangd] Fail inlayHints requests on content changes

This should improve the overall UX by making the labels less jumpy.

Differential Revision: https://reviews.llvm.org/D117776
---
 clang-tools-extra/clangd/ClangdServer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp
index 0760ed5317be8..a7210e0526a41 100644
--- a/clang-tools-extra/clangd/ClangdServer.cpp
+++ b/clang-tools-extra/clangd/ClangdServer.cpp
@@ -775,7 +775,7 @@ void ClangdServer::inlayHints(PathRef File, llvm::Optional<Range> RestrictRange,
       return CB(InpAST.takeError());
     CB(clangd::inlayHints(InpAST->AST, std::move(RestrictRange)));
   };
-  WorkScheduler->runWithAST("InlayHints", File, std::move(Action));
+  WorkScheduler->runWithAST("InlayHints", File, std::move(Action), Transient);
 }
 
 void ClangdServer::onFileEvent(const DidChangeWatchedFilesParams &Params) {

From 4727d29d908f9dd608dd97a58c0af1ad579fd3ca Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 21 Jan 2022 11:55:40 +0000
Subject: [PATCH 166/946] [X86] Remove __builtin_ia32_pabs intrinsics and use
 generic __builtin_elementwise_abs

D111986 added the generic `__builtin_elementwise_abs()` intrinsic with the same integer absolute behaviour as the SSE/AVX instructions (abs(INT_MIN) == INT_MIN)

This patch removes the `__builtin_ia32_pabs*` intrinsics and just uses `__builtin_elementwise_abs` - the existing tests see no changes:
```
__m256i test_mm256_abs_epi8(__m256i a) {
  // CHECK-LABEL: test_mm256_abs_epi8
  // CHECK: [[ABS:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %{{.*}}, i1 false)
  return _mm256_abs_epi8(a);
}
```
This requires us to add a `__v64qs` explicitly signed char vector type (we already have `__v16qs` and `__v32qs`).

Differential Revision: https://reviews.llvm.org/D117791
---
 clang/include/clang/Basic/BuiltinsX86.def | 12 ------------
 clang/lib/CodeGen/CGBuiltin.cpp           | 15 ---------------
 clang/lib/Headers/avx2intrin.h            |  6 +++---
 clang/lib/Headers/avx512bwintrin.h        |  4 ++--
 clang/lib/Headers/avx512fintrin.h         |  8 ++++++--
 clang/lib/Headers/avx512vlintrin.h        |  4 ++--
 clang/lib/Headers/tmmintrin.h             |  6 +++---
 clang/test/CodeGen/builtins-x86.c         |  3 ---
 8 files changed, 16 insertions(+), 42 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index bc6208be45606..9b7c763b0c6c7 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -296,9 +296,6 @@ TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "ncV:128:", "ssse3")
 TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "ncV:128:", "ssse3")
 TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "ncV:128:", "ssse3")
 TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "ncV:128:", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "ncV:128:", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "ncV:128:", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "ncV:128:", "ssse3")
 
 TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse")
 TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh","xmmintrin.h", ALL_LANGUAGES, "sse")
@@ -558,9 +555,6 @@ TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx")
 
 // AVX2
 TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2")
@@ -927,8 +921,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:",
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f")
@@ -1045,8 +1037,6 @@ TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx5
 TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw")
 
-TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
@@ -1198,8 +1188,6 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5
 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a49c035002786..49f054ec1a982 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14285,21 +14285,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       return Builder.CreateCall(F, Ops[0]);
     }
   }
-  case X86::BI__builtin_ia32_pabsb128:
-  case X86::BI__builtin_ia32_pabsw128:
-  case X86::BI__builtin_ia32_pabsd128:
-  case X86::BI__builtin_ia32_pabsb256:
-  case X86::BI__builtin_ia32_pabsw256:
-  case X86::BI__builtin_ia32_pabsd256:
-  case X86::BI__builtin_ia32_pabsq128:
-  case X86::BI__builtin_ia32_pabsq256:
-  case X86::BI__builtin_ia32_pabsb512:
-  case X86::BI__builtin_ia32_pabsw512:
-  case X86::BI__builtin_ia32_pabsd512:
-  case X86::BI__builtin_ia32_pabsq512: {
-    Function *F = CGM.getIntrinsic(Intrinsic::abs, Ops[0]->getType());
-    return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
-  }
   case X86::BI__builtin_ia32_pmaxsb128:
   case X86::BI__builtin_ia32_pmaxsw128:
   case X86::BI__builtin_ia32_pmaxsd128:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 5064c87c2bb19..c9ad74ce3fa42 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -26,19 +26,19 @@
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi8(__m256i __a)
 {
-    return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);
+    return (__m256i)__builtin_elementwise_abs((__v32qs)__a);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi16(__m256i __a)
 {
-    return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);
+    return (__m256i)__builtin_elementwise_abs((__v16hi)__a);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi32(__m256i __a)
 {
-    return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);
+    return (__m256i)__builtin_elementwise_abs((__v8si)__a);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 6aee8aed84871..53319eb23011d 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -485,7 +485,7 @@ _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi8 (__m512i __A)
 {
-  return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A);
+  return (__m512i)__builtin_elementwise_abs((__v64qs)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -507,7 +507,7 @@ _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi16 (__m512i __A)
 {
-  return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A);
+  return (__m512i)__builtin_elementwise_abs((__v32hi)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index df298640523b7..9b02a7cffc64d 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -26,6 +26,10 @@ typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
 
+/* We need an explicitly signed variant for char. Note that this shouldn't
+ * appear in the interface though. */
+typedef signed char __v64qs __attribute__((__vector_size__(64)));
+
 typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
 typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
 typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
@@ -1846,7 +1850,7 @@ _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi64(__m512i __A)
 {
-  return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
+  return (__m512i)__builtin_elementwise_abs((__v8di)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1868,7 +1872,7 @@ _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi32(__m512i __A)
 {
-  return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
+  return (__m512i)__builtin_elementwise_abs((__v16si) __A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 0519dba59081a..eddb99902e3d5 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -2988,7 +2988,7 @@ _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_abs_epi64 (__m128i __A) {
-  return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
+  return (__m128i)__builtin_elementwise_abs((__v2di)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3007,7 +3007,7 @@ _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi64 (__m256i __A) {
-  return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
+  return (__m256i)__builtin_elementwise_abs((__v4di)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index bcffa8187801c..cb9be2349de5a 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -53,7 +53,7 @@ _mm_abs_pi8(__m64 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi8(__m128i __a)
 {
-    return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
+    return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
 }
 
 /// Computes the absolute value of each of the packed 16-bit signed
@@ -89,7 +89,7 @@ _mm_abs_pi16(__m64 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi16(__m128i __a)
 {
-    return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
+    return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
 }
 
 /// Computes the absolute value of each of the packed 32-bit signed
@@ -125,7 +125,7 @@ _mm_abs_pi32(__m64 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi32(__m128i __a)
 {
-    return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
+    return (__m128i)__builtin_elementwise_abs((__v4si)__a);
 }
 
 /// Concatenates the two 128-bit integer vector operands, and
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index 61b9d53c74f9d..bfcd30072fc1f 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -259,11 +259,8 @@ void f0() {
   tmp_V4s = __builtin_ia32_psignw(tmp_V4s, tmp_V4s);
   tmp_V4i = __builtin_ia32_psignd128(tmp_V4i, tmp_V4i);
   tmp_V2i = __builtin_ia32_psignd(tmp_V2i, tmp_V2i);
-  tmp_V16c = __builtin_ia32_pabsb128(tmp_V16c);
   tmp_V8c = __builtin_ia32_pabsb(tmp_V8c);
-  tmp_V8s = __builtin_ia32_pabsw128(tmp_V8s);
   tmp_V4s = __builtin_ia32_pabsw(tmp_V4s);
-  tmp_V4i = __builtin_ia32_pabsd128(tmp_V4i);
   tmp_V2i = __builtin_ia32_pabsd(tmp_V2i);
   tmp_V4s = __builtin_ia32_psllw(tmp_V4s, tmp_V1LLi);
   tmp_V2i = __builtin_ia32_pslld(tmp_V2i, tmp_V1LLi);

From ced077e1ba52ec2937aab538e9b6fa5149f8c567 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda@apple.com>
Date: Fri, 21 Jan 2022 10:54:27 +0100
Subject: [PATCH 167/946] [clang][deps] NFC: Simplify handling of cached FS
 errors

The return types of some `CachedFileSystemEntry` member function are needlessly complex.

This patch attempts to simplify the code by unwrapping cached entries that represent errors early, and then asserting `!isError()`.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D115935
---
 .../DependencyScanningFilesystem.h            | 61 ++++++++++++-------
 .../DependencyScanningFilesystem.cpp          | 13 ++--
 2 files changed, 45 insertions(+), 29 deletions(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index 1358950b437c8..08a60fe780f50 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -57,25 +57,26 @@ class CachedFileSystemEntry {
     return !MaybeStat || MaybeStat->isStatusKnown();
   }
 
+  /// \returns True if the entry is a filesystem error.
+  bool isError() const { return !MaybeStat; }
+
   /// \returns True if the current entry points to a directory.
-  bool isDirectory() const { return MaybeStat && MaybeStat->isDirectory(); }
+  bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
 
-  /// \returns The error or the file's original contents.
-  llvm::ErrorOr<StringRef> getOriginalContents() const {
-    if (!MaybeStat)
-      return MaybeStat.getError();
-    assert(!MaybeStat->isDirectory() && "not a file");
+  /// \returns Original contents of the file.
+  StringRef getOriginalContents() const {
     assert(isInitialized() && "not initialized");
+    assert(!isError() && "error");
+    assert(!MaybeStat->isDirectory() && "not a file");
     assert(OriginalContents && "not read");
     return OriginalContents->getBuffer();
   }
 
-  /// \returns The error or the file's minimized contents.
-  llvm::ErrorOr<StringRef> getMinimizedContents() const {
-    if (!MaybeStat)
-      return MaybeStat.getError();
-    assert(!MaybeStat->isDirectory() && "not a file");
+  /// \returns Minimized contents of the file.
+  StringRef getMinimizedContents() const {
     assert(isInitialized() && "not initialized");
+    assert(!isError() && "error");
+    assert(!isDirectory() && "not a file");
     llvm::MemoryBuffer *Buffer = MinimizedContentsAccess.load();
     assert(Buffer && "not minimized");
     return Buffer->getBuffer();
@@ -94,21 +95,31 @@ class CachedFileSystemEntry {
     return ShouldBeMinimized && !MinimizedContentsAccess.load();
   }
 
-  /// \returns The error or the status of the entry.
-  llvm::ErrorOr<llvm::vfs::Status> getStatus() const {
+  /// \returns The error.
+  std::error_code getError() const {
+    assert(isInitialized() && "not initialized");
+    return MaybeStat.getError();
+  }
+
+  /// \returns The entry status.
+  llvm::vfs::Status getStatus() const {
     assert(isInitialized() && "not initialized");
-    return MaybeStat;
+    assert(!isError() && "error");
+    return *MaybeStat;
   }
 
   /// \returns the name of the file.
   StringRef getName() const {
     assert(isInitialized() && "not initialized");
+    assert(!isError() && "error");
     return MaybeStat->getName();
   }
 
   /// Return the mapping between location -> distance that is used to speed up
   /// the block skipping in the preprocessor.
   const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const {
+    assert(!isError() && "error");
+    assert(!isDirectory() && "not a file");
     return PPSkippedRangeMapping;
   }
 
@@ -183,19 +194,25 @@ class EntryRef {
   EntryRef(bool Minimized, const CachedFileSystemEntry &Entry)
       : Minimized(Minimized), Entry(Entry) {}
 
-  llvm::ErrorOr<llvm::vfs::Status> getStatus() const {
-    auto MaybeStat = Entry.getStatus();
-    if (!MaybeStat || MaybeStat->isDirectory())
-      return MaybeStat;
-    return llvm::vfs::Status::copyWithNewSize(*MaybeStat,
-                                              getContents()->size());
+  llvm::vfs::Status getStatus() const {
+    llvm::vfs::Status Stat = Entry.getStatus();
+    if (Stat.isDirectory())
+      return Stat;
+    return llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
   }
 
+  bool isError() const { return Entry.isError(); }
   bool isDirectory() const { return Entry.isDirectory(); }
-
   StringRef getName() const { return Entry.getName(); }
 
-  llvm::ErrorOr<StringRef> getContents() const {
+  /// If the cached entry represents an error, promotes it into `ErrorOr`.
+  llvm::ErrorOr<EntryRef> unwrapError() const {
+    if (isError())
+      return Entry.getError();
+    return *this;
+  }
+
+  StringRef getContents() const {
     return Minimized ? Entry.getMinimizedContents()
                      : Entry.getOriginalContents();
   }
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index acceec690c11e..6b8c692335bd6 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -164,7 +164,7 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
 
   const auto *Entry = LocalCache.getCachedEntry(Filename);
   if (Entry && !Entry->needsUpdate(ShouldBeMinimized))
-    return EntryRef(ShouldBeMinimized, *Entry);
+    return EntryRef(ShouldBeMinimized, *Entry).unwrapError();
 
   // FIXME: Handle PCM/PCH files.
   // FIXME: Handle module map files.
@@ -194,7 +194,7 @@ DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
 
   // Store the result in the local cache.
   Entry = &SharedCacheEntry.Value;
-  return EntryRef(ShouldBeMinimized, *Entry);
+  return EntryRef(ShouldBeMinimized, *Entry).unwrapError();
 }
 
 llvm::ErrorOr<llvm::vfs::Status>
@@ -241,16 +241,15 @@ class MinimizedVFSFile final : public llvm::vfs::File {
 
 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create(
     EntryRef Entry, ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings) {
+  assert(!Entry.isError() && "error");
+
   if (Entry.isDirectory())
     return std::make_error_code(std::errc::is_a_directory);
 
-  llvm::ErrorOr<StringRef> Contents = Entry.getContents();
-  if (!Contents)
-    return Contents.getError();
   auto Result = std::make_unique<MinimizedVFSFile>(
-      llvm::MemoryBuffer::getMemBuffer(*Contents, Entry.getName(),
+      llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), Entry.getName(),
                                        /*RequiresNullTerminator=*/false),
-      *Entry.getStatus());
+      Entry.getStatus());
 
   const auto *EntrySkipMappings = Entry.getPPSkippedRangeMapping();
   if (EntrySkipMappings && !EntrySkipMappings->empty() && PPSkipMappings)

From 5daeada33051aa85777593d3f69eb29f26e7fb2f Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda@apple.com>
Date: Fri, 21 Jan 2022 10:55:34 +0100
Subject: [PATCH 168/946] [clang][deps] Ensure filesystem cache consistency

The minimizing filesystem used by the dependency scanner isn't great when it comes to the consistency of its caches. There are two problems that can be exposed by a filesystem that changes during dependency scan:
1. In-memory cache entries for original and minimized files are distinct, populated at different times using separate stat/open syscalls. This means that when a file is read with minimization disabled, its contents might be inconsistent when the same file is read with minimization enabled at later point (and vice versa).
2. In-memory cache entries are indexed by filename. This is problematic for symlinks, where the contents of the symlink might be inconsistent with contents of the original file (for the same reason as in problem 1).

This patch ensures consistency by always stating/reading a file exactly once. The original contents are always cached and minimized contents are derived from that on demand. The cache entries are now indexed by their `UniqueID` ensuring consistency for symlinks too. Moreover, the stat/read syscalls are now issued outside of critical section.

Depends on D115935.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D114966
---
 .../DependencyScanningFilesystem.h            | 302 +++++++++++++-----
 .../DependencyScanningFilesystem.cpp          | 224 +++++++++----
 .../Tooling/DependencyScannerTest.cpp         |   4 +
 3 files changed, 379 insertions(+), 151 deletions(-)

diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index 08a60fe780f50..70e9c4a3ffea2 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -22,6 +22,26 @@ namespace clang {
 namespace tooling {
 namespace dependencies {
 
+/// Original and minimized contents of a cached file entry. Single instance can
+/// be shared between multiple entries.
+struct CachedFileContents {
+  CachedFileContents(std::unique_ptr<llvm::MemoryBuffer> Original)
+      : Original(std::move(Original)), MinimizedAccess(nullptr) {}
+
+  /// Owning storage for the minimized contents.
+  std::unique_ptr<llvm::MemoryBuffer> Original;
+
+  /// The mutex that must be locked before mutating minimized contents.
+  std::mutex ValueLock;
+  /// Owning storage for the minimized contents.
+  std::unique_ptr<llvm::MemoryBuffer> MinimizedStorage;
+  /// Accessor to the minimized contents that's atomic to avoid data races.
+  std::atomic<llvm::MemoryBuffer *> MinimizedAccess;
+  /// Skipped range mapping of the minimized contents.
+  /// This is initialized iff `MinimizedAccess != nullptr`.
+  PreprocessorSkippedRangeMapping PPSkippedRangeMapping;
+};
+
 /// An in-memory representation of a file system entity that is of interest to
 /// the dependency scanning filesystem.
 ///
@@ -29,111 +49,99 @@ namespace dependencies {
 /// - opened file with original contents and a stat value,
 /// - opened file with original contents, minimized contents and a stat value,
 /// - directory entry with its stat value,
-/// - filesystem error,
-/// - uninitialized entry with unknown status.
+/// - filesystem error.
+///
+/// Single instance of this class can be shared across different filenames (e.g.
+/// a regular file and a symlink). For this reason the status filename is empty
+/// and is only materialized by \c EntryRef that knows the requested filename.
 class CachedFileSystemEntry {
 public:
-  /// Creates an uninitialized entry.
-  CachedFileSystemEntry()
-      : MaybeStat(llvm::vfs::Status()), MinimizedContentsAccess(nullptr) {}
-
-  /// Initialize the cached file system entry.
-  void init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus, StringRef Filename,
-            llvm::vfs::FileSystem &FS);
+  /// Creates an entry without contents: either a filesystem error or
+  /// a directory with stat value.
+  CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat)
+      : MaybeStat(std::move(Stat)), Contents(nullptr) {
+    clearStatName();
+  }
 
-  /// Initialize the entry as file with minimized or original contents.
-  ///
-  /// The filesystem opens the file even for `stat` calls open to avoid the
-  /// issues with stat + open of minimized files that might lead to a
-  /// mismatching size of the file.
-  llvm::ErrorOr<llvm::vfs::Status> initFile(StringRef Filename,
-                                            llvm::vfs::FileSystem &FS);
-
-  /// Minimize contents of the file.
-  void minimizeFile();
-
-  /// \returns True if the entry is initialized.
-  bool isInitialized() const {
-    return !MaybeStat || MaybeStat->isStatusKnown();
+  /// Creates an entry representing a file with contents.
+  CachedFileSystemEntry(llvm::ErrorOr<llvm::vfs::Status> Stat,
+                        CachedFileContents *Contents)
+      : MaybeStat(std::move(Stat)), Contents(std::move(Contents)) {
+    clearStatName();
   }
 
   /// \returns True if the entry is a filesystem error.
   bool isError() const { return !MaybeStat; }
 
-  /// \returns True if the current entry points to a directory.
+  /// \returns True if the current entry represents a directory.
   bool isDirectory() const { return !isError() && MaybeStat->isDirectory(); }
 
   /// \returns Original contents of the file.
   StringRef getOriginalContents() const {
-    assert(isInitialized() && "not initialized");
     assert(!isError() && "error");
     assert(!MaybeStat->isDirectory() && "not a file");
-    assert(OriginalContents && "not read");
-    return OriginalContents->getBuffer();
+    assert(Contents && "contents not initialized");
+    return Contents->Original->getBuffer();
   }
 
   /// \returns Minimized contents of the file.
   StringRef getMinimizedContents() const {
-    assert(isInitialized() && "not initialized");
     assert(!isError() && "error");
-    assert(!isDirectory() && "not a file");
-    llvm::MemoryBuffer *Buffer = MinimizedContentsAccess.load();
+    assert(!MaybeStat->isDirectory() && "not a file");
+    assert(Contents && "contents not initialized");
+    llvm::MemoryBuffer *Buffer = Contents->MinimizedAccess.load();
     assert(Buffer && "not minimized");
     return Buffer->getBuffer();
   }
 
-  /// \returns True if this entry represents a file that can be read.
-  bool isReadable() const { return MaybeStat && !MaybeStat->isDirectory(); }
-
-  /// \returns True if this cached entry needs to be updated.
-  bool needsUpdate(bool ShouldBeMinimized) const {
-    return isReadable() && needsMinimization(ShouldBeMinimized);
-  }
-
-  /// \returns True if the contents of this entry need to be minimized.
-  bool needsMinimization(bool ShouldBeMinimized) const {
-    return ShouldBeMinimized && !MinimizedContentsAccess.load();
-  }
-
   /// \returns The error.
-  std::error_code getError() const {
-    assert(isInitialized() && "not initialized");
-    return MaybeStat.getError();
-  }
+  std::error_code getError() const { return MaybeStat.getError(); }
 
-  /// \returns The entry status.
+  /// \returns The entry status with empty filename.
   llvm::vfs::Status getStatus() const {
-    assert(isInitialized() && "not initialized");
     assert(!isError() && "error");
+    assert(MaybeStat->getName().empty() && "stat name must be empty");
     return *MaybeStat;
   }
 
-  /// \returns the name of the file.
-  StringRef getName() const {
-    assert(isInitialized() && "not initialized");
+  /// \returns The unique ID of the entry.
+  llvm::sys::fs::UniqueID getUniqueID() const {
     assert(!isError() && "error");
-    return MaybeStat->getName();
+    return MaybeStat->getUniqueID();
   }
 
-  /// Return the mapping between location -> distance that is used to speed up
+  /// \returns The mapping between location -> distance that is used to speed up
   /// the block skipping in the preprocessor.
   const PreprocessorSkippedRangeMapping &getPPSkippedRangeMapping() const {
     assert(!isError() && "error");
     assert(!isDirectory() && "not a file");
-    return PPSkippedRangeMapping;
+    assert(Contents && "contents not initialized");
+    return Contents->PPSkippedRangeMapping;
+  }
+
+  /// \returns The data structure holding both original and minimized contents.
+  CachedFileContents *getContents() const {
+    assert(!isError() && "error");
+    assert(!isDirectory() && "not a file");
+    return Contents;
   }
 
 private:
-  llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
-  std::unique_ptr<llvm::MemoryBuffer> OriginalContents;
+  void clearStatName() {
+    if (MaybeStat)
+      MaybeStat = llvm::vfs::Status::copyWithNewName(*MaybeStat, "");
+  }
 
-  /// Owning storage for the minimized file contents.
-  std::unique_ptr<llvm::MemoryBuffer> MinimizedContentsStorage;
-  /// Atomic view of the minimized file contents.
-  /// This prevents data races when multiple threads call `needsMinimization`.
-  std::atomic<llvm::MemoryBuffer *> MinimizedContentsAccess;
+  /// Either the filesystem error or status of the entry.
+  /// The filename is empty and only materialized by \c EntryRef.
+  llvm::ErrorOr<llvm::vfs::Status> MaybeStat;
 
-  PreprocessorSkippedRangeMapping PPSkippedRangeMapping;
+  /// Non-owning pointer to the file contents.
+  ///
+  /// We're using pointer here to keep the size of this class small. Instances
+  /// representing directories and filesystem errors don't hold any contents
+  /// anyway.
+  CachedFileContents *Contents;
 };
 
 /// This class is a shared cache, that caches the 'stat' and 'open' calls to the
@@ -144,24 +152,59 @@ class CachedFileSystemEntry {
 /// the worker threads.
 class DependencyScanningFilesystemSharedCache {
 public:
-  struct SharedFileSystemEntry {
-    std::mutex ValueLock;
-    CachedFileSystemEntry Value;
+  struct CacheShard {
+    /// The mutex that needs to be locked before mutation of any member.
+    mutable std::mutex CacheLock;
+
+    /// Map from filenames to cached entries.
+    llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>
+        EntriesByFilename;
+
+    /// Map from unique IDs to cached entries.
+    llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
+        EntriesByUID;
+
+    /// The backing storage for cached entries.
+    llvm::SpecificBumpPtrAllocator<CachedFileSystemEntry> EntryStorage;
+
+    /// The backing storage for cached contents.
+    llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
+
+    /// Returns entry associated with the filename or nullptr if none is found.
+    const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
+
+    /// Returns entry associated with the unique ID or nullptr if none is found.
+    const CachedFileSystemEntry *
+    findEntryByUID(llvm::sys::fs::UniqueID UID) const;
+
+    /// Returns entry associated with the filename if there is some. Otherwise,
+    /// constructs new one with the given status, associates it with the
+    /// filename and returns the result.
+    const CachedFileSystemEntry &
+    getOrEmplaceEntryForFilename(StringRef Filename,
+                                 llvm::ErrorOr<llvm::vfs::Status> Stat);
+
+    /// Returns entry associated with the unique ID if there is some. Otherwise,
+    /// constructs new one with the given status and contents, associates it
+    /// with the unique ID and returns the result.
+    const CachedFileSystemEntry &
+    getOrEmplaceEntryForUID(llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
+                            std::unique_ptr<llvm::MemoryBuffer> Contents);
+
+    /// Returns entry associated with the filename if there is some. Otherwise,
+    /// associates the given entry with the filename and returns it.
+    const CachedFileSystemEntry &
+    getOrInsertEntryForFilename(StringRef Filename,
+                                const CachedFileSystemEntry &Entry);
   };
 
   DependencyScanningFilesystemSharedCache();
 
-  /// Returns a cache entry for the corresponding key.
-  ///
-  /// A new cache entry is created if the key is not in the cache. This is a
-  /// thread safe call.
-  SharedFileSystemEntry &get(StringRef Key);
+  /// Returns shard for the given key.
+  CacheShard &getShardForFilename(StringRef Filename) const;
+  CacheShard &getShardForUID(llvm::sys::fs::UniqueID UID) const;
 
 private:
-  struct CacheShard {
-    std::mutex CacheLock;
-    llvm::StringMap<SharedFileSystemEntry, llvm::BumpPtrAllocator> Cache;
-  };
   std::unique_ptr<CacheShard[]> CacheShards;
   unsigned NumShards;
 };
@@ -173,8 +216,20 @@ class DependencyScanningFilesystemLocalCache {
   llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
 
 public:
-  const CachedFileSystemEntry *getCachedEntry(StringRef Filename) {
-    return Cache[Filename];
+  /// Returns entry associated with the filename or nullptr if none is found.
+  const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
+    auto It = Cache.find(Filename);
+    return It == Cache.end() ? nullptr : It->getValue();
+  }
+
+  /// Associates the given entry with the filename and returns the given entry
+  /// pointer (for convenience).
+  const CachedFileSystemEntry &
+  insertEntryForFilename(StringRef Filename,
+                         const CachedFileSystemEntry &Entry) {
+    const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
+    assert(InsertedEntry == &Entry && "entry already present");
+    return *InsertedEntry;
   }
 };
 
@@ -187,23 +242,25 @@ class EntryRef {
   /// are minimized.
   bool Minimized;
 
+  /// The filename used to access this entry.
+  std::string Filename;
+
   /// The underlying cached entry.
   const CachedFileSystemEntry &Entry;
 
 public:
-  EntryRef(bool Minimized, const CachedFileSystemEntry &Entry)
-      : Minimized(Minimized), Entry(Entry) {}
+  EntryRef(bool Minimized, StringRef Name, const CachedFileSystemEntry &Entry)
+      : Minimized(Minimized), Filename(Name), Entry(Entry) {}
 
   llvm::vfs::Status getStatus() const {
     llvm::vfs::Status Stat = Entry.getStatus();
-    if (Stat.isDirectory())
-      return Stat;
-    return llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
+    if (!Stat.isDirectory())
+      Stat = llvm::vfs::Status::copyWithNewSize(Stat, getContents().size());
+    return llvm::vfs::Status::copyWithNewName(Stat, Filename);
   }
 
   bool isError() const { return Entry.isError(); }
   bool isDirectory() const { return Entry.isDirectory(); }
-  StringRef getName() const { return Entry.getName(); }
 
   /// If the cached entry represents an error, promotes it into `ErrorOr`.
   llvm::ErrorOr<EntryRef> unwrapError() const {
@@ -253,8 +310,87 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   /// Check whether the file should be minimized.
   bool shouldMinimize(StringRef Filename);
 
+  /// Returns entry for the given filename.
+  ///
+  /// Attempts to use the local and shared caches first, then falls back to
+  /// using the underlying filesystem.
   llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);
 
+  /// For a filename that's not yet associated with any entry in the caches,
+  /// uses the underlying filesystem to either look up the entry based in the
+  /// shared cache indexed by unique ID, or creates new entry from scratch.
+  llvm::ErrorOr<const CachedFileSystemEntry &>
+  computeAndStoreResult(StringRef Filename);
+
+  /// Minimizes the given entry if necessary and returns a wrapper object with
+  /// reference semantics.
+  EntryRef minimizeIfNecessary(const CachedFileSystemEntry &Entry,
+                               StringRef Filename);
+
+  /// Represents a filesystem entry that has been stat-ed (and potentially read)
+  /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
+  struct TentativeEntry {
+    llvm::vfs::Status Status;
+    std::unique_ptr<llvm::MemoryBuffer> Contents;
+
+    TentativeEntry(llvm::vfs::Status Status,
+                   std::unique_ptr<llvm::MemoryBuffer> Contents = nullptr)
+        : Status(std::move(Status)), Contents(std::move(Contents)) {}
+  };
+
+  /// Reads file at the given path. Enforces consistency between the file size
+  /// in status and size of read contents.
+  llvm::ErrorOr<TentativeEntry> readFile(StringRef Filename);
+
+  /// Returns entry associated with the unique ID of the given tentative entry
+  /// if there is some in the shared cache. Otherwise, constructs new one,
+  /// associates it with the unique ID and returns the result.
+  const CachedFileSystemEntry &
+  getOrEmplaceSharedEntryForUID(TentativeEntry TEntry);
+
+  /// Returns entry associated with the filename or nullptr if none is found.
+  ///
+  /// Returns entry from local cache if there is some. Otherwise, if the entry
+  /// is found in the shared cache, writes it through the local cache and
+  /// returns it. Otherwise returns nullptr.
+  const CachedFileSystemEntry *
+  findEntryByFilenameWithWriteThrough(StringRef Filename);
+
+  /// Returns entry associated with the unique ID in the shared cache or nullptr
+  /// if none is found.
+  const CachedFileSystemEntry *
+  findSharedEntryByUID(llvm::vfs::Status Stat) const {
+    return SharedCache.getShardForUID(Stat.getUniqueID())
+        .findEntryByUID(Stat.getUniqueID());
+  }
+
+  /// Associates the given entry with the filename in the local cache and
+  /// returns it.
+  const CachedFileSystemEntry &
+  insertLocalEntryForFilename(StringRef Filename,
+                              const CachedFileSystemEntry &Entry) {
+    return LocalCache.insertEntryForFilename(Filename, Entry);
+  }
+
+  /// Returns entry associated with the filename in the shared cache if there is
+  /// some. Otherwise, constructs new one with the given error code, associates
+  /// it with the filename and returns the result.
+  const CachedFileSystemEntry &
+  getOrEmplaceSharedEntryForFilename(StringRef Filename, std::error_code EC) {
+    return SharedCache.getShardForFilename(Filename)
+        .getOrEmplaceEntryForFilename(Filename, EC);
+  }
+
+  /// Returns entry associated with the filename in the shared cache if there is
+  /// some. Otherwise, associates the given entry with the filename and returns
+  /// it.
+  const CachedFileSystemEntry &
+  getOrInsertSharedEntryForFilename(StringRef Filename,
+                                    const CachedFileSystemEntry &Entry) {
+    return SharedCache.getShardForFilename(Filename)
+        .getOrInsertEntryForFilename(Filename, Entry);
+  }
+
   /// The global cache shared between worker threads.
   DependencyScanningFilesystemSharedCache &SharedCache;
   /// The local cache is used by the worker thread to cache file system queries
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index 6b8c692335bd6..cc8968a7b680f 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -16,10 +16,10 @@ using namespace clang;
 using namespace tooling;
 using namespace dependencies;
 
-llvm::ErrorOr<llvm::vfs::Status>
-CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) {
+llvm::ErrorOr<DependencyScanningWorkerFilesystem::TentativeEntry>
+DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
   // Load the file and its content from the file system.
-  auto MaybeFile = FS.openFileForRead(Filename);
+  auto MaybeFile = getUnderlyingFS().openFileForRead(Filename);
   if (!MaybeFile)
     return MaybeFile.getError();
   auto File = std::move(*MaybeFile);
@@ -34,24 +34,42 @@ CachedFileSystemEntry::initFile(StringRef Filename, llvm::vfs::FileSystem &FS) {
     return MaybeBuffer.getError();
   auto Buffer = std::move(*MaybeBuffer);
 
-  OriginalContents = std::move(Buffer);
-  return Stat;
+  // If the file size changed between read and stat, pretend it didn't.
+  if (Stat.getSize() != Buffer->getBufferSize())
+    Stat = llvm::vfs::Status::copyWithNewSize(Stat, Buffer->getBufferSize());
+
+  return TentativeEntry(Stat, std::move(Buffer));
 }
 
-void CachedFileSystemEntry::minimizeFile() {
-  assert(OriginalContents && "minimizing missing contents");
+EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary(
+    const CachedFileSystemEntry &Entry, StringRef Filename) {
+  if (Entry.isError() || Entry.isDirectory() || !shouldMinimize(Filename))
+    return EntryRef(/*Minimized=*/false, Filename, Entry);
+
+  CachedFileContents *Contents = Entry.getContents();
+  assert(Contents && "contents not initialized");
+
+  // Double-checked locking.
+  if (Contents->MinimizedAccess.load())
+    return EntryRef(/*Minimized=*/true, Filename, Entry);
+
+  std::lock_guard<std::mutex> GuardLock(Contents->ValueLock);
+
+  // Double-checked locking.
+  if (Contents->MinimizedAccess.load())
+    return EntryRef(/*Minimized=*/true, Filename, Entry);
 
   llvm::SmallString<1024> MinimizedFileContents;
   // Minimize the file down to directives that might affect the dependencies.
   SmallVector<minimize_source_to_dependency_directives::Token, 64> Tokens;
-  if (minimizeSourceToDependencyDirectives(OriginalContents->getBuffer(),
+  if (minimizeSourceToDependencyDirectives(Contents->Original->getBuffer(),
                                            MinimizedFileContents, Tokens)) {
     // FIXME: Propagate the diagnostic if desired by the client.
     // Use the original file if the minimization failed.
-    MinimizedContentsStorage =
-        llvm::MemoryBuffer::getMemBuffer(*OriginalContents);
-    MinimizedContentsAccess.store(MinimizedContentsStorage.get());
-    return;
+    Contents->MinimizedStorage =
+        llvm::MemoryBuffer::getMemBuffer(*Contents->Original);
+    Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
+    return EntryRef(/*Minimized=*/true, Filename, Entry);
   }
 
   // The contents produced by the minimizer must be null terminated.
@@ -74,16 +92,17 @@ void CachedFileSystemEntry::minimizeFile() {
     }
     Mapping[Range.Offset] = Range.Length;
   }
-  PPSkippedRangeMapping = std::move(Mapping);
+  Contents->PPSkippedRangeMapping = std::move(Mapping);
 
-  MinimizedContentsStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
+  Contents->MinimizedStorage = std::make_unique<llvm::SmallVectorMemoryBuffer>(
       std::move(MinimizedFileContents));
-  // The algorithm in `getOrCreateFileSystemEntry` uses the presence of
-  // minimized contents to decide whether an entry is up-to-date or not.
-  // If it is up-to-date, the skipped range mappings must be already computed.
-  // This is why we need to store the minimized contents **after** storing the
-  // skipped range mappings. Failing to do so would lead to a data race.
-  MinimizedContentsAccess.store(MinimizedContentsStorage.get());
+  // This function performed double-checked locking using `MinimizedAccess`.
+  // Assigning it must be the last thing this function does. If we were to
+  // assign it before `PPSkippedRangeMapping`, other threads may skip the
+  // critical section (`MinimizedAccess != nullptr`) and access the mappings
+  // that are about to be initialized, leading to a data race.
+  Contents->MinimizedAccess.store(Contents->MinimizedStorage.get());
+  return EntryRef(/*Minimized=*/true, Filename, Entry);
 }
 
 DependencyScanningFilesystemSharedCache::
@@ -98,12 +117,70 @@ DependencyScanningFilesystemSharedCache::
   CacheShards = std::make_unique<CacheShard[]>(NumShards);
 }
 
-DependencyScanningFilesystemSharedCache::SharedFileSystemEntry &
-DependencyScanningFilesystemSharedCache::get(StringRef Key) {
-  CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
-  std::lock_guard<std::mutex> LockGuard(Shard.CacheLock);
-  auto It = Shard.Cache.try_emplace(Key);
-  return It.first->getValue();
+DependencyScanningFilesystemSharedCache::CacheShard &
+DependencyScanningFilesystemSharedCache::getShardForFilename(
+    StringRef Filename) const {
+  return CacheShards[llvm::hash_value(Filename) % NumShards];
+}
+
+DependencyScanningFilesystemSharedCache::CacheShard &
+DependencyScanningFilesystemSharedCache::getShardForUID(
+    llvm::sys::fs::UniqueID UID) const {
+  auto Hash = llvm::hash_combine(UID.getDevice(), UID.getFile());
+  return CacheShards[Hash % NumShards];
+}
+
+const CachedFileSystemEntry *
+DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
+    StringRef Filename) const {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto It = EntriesByFilename.find(Filename);
+  return It == EntriesByFilename.end() ? nullptr : It->getValue();
+}
+
+const CachedFileSystemEntry *
+DependencyScanningFilesystemSharedCache::CacheShard::findEntryByUID(
+    llvm::sys::fs::UniqueID UID) const {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto It = EntriesByUID.find(UID);
+  return It == EntriesByUID.end() ? nullptr : It->getSecond();
+}
+
+const CachedFileSystemEntry &
+DependencyScanningFilesystemSharedCache::CacheShard::
+    getOrEmplaceEntryForFilename(StringRef Filename,
+                                 llvm::ErrorOr<llvm::vfs::Status> Stat) {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto Insertion = EntriesByFilename.insert({Filename, nullptr});
+  if (Insertion.second)
+    Insertion.first->second =
+        new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
+  return *Insertion.first->second;
+}
+
+const CachedFileSystemEntry &
+DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
+    llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
+    std::unique_ptr<llvm::MemoryBuffer> Contents) {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  auto Insertion = EntriesByUID.insert({UID, nullptr});
+  if (Insertion.second) {
+    CachedFileContents *StoredContents = nullptr;
+    if (Contents)
+      StoredContents = new (ContentsStorage.Allocate())
+          CachedFileContents(std::move(Contents));
+    Insertion.first->second = new (EntryStorage.Allocate())
+        CachedFileSystemEntry(std::move(Stat), StoredContents);
+  }
+  return *Insertion.first->second;
+}
+
+const CachedFileSystemEntry &
+DependencyScanningFilesystemSharedCache::CacheShard::
+    getOrInsertEntryForFilename(StringRef Filename,
+                                const CachedFileSystemEntry &Entry) {
+  std::lock_guard<std::mutex> LockGuard(CacheLock);
+  return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
 }
 
 /// Whitelist file extensions that should be minimized, treating no extension as
@@ -148,53 +225,63 @@ bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) {
   return !NotToBeMinimized.contains(Filename);
 }
 
-void CachedFileSystemEntry::init(llvm::ErrorOr<llvm::vfs::Status> &&MaybeStatus,
-                                 StringRef Filename,
-                                 llvm::vfs::FileSystem &FS) {
-  if (!MaybeStatus || MaybeStatus->isDirectory())
-    MaybeStat = std::move(MaybeStatus);
-  else
-    MaybeStat = initFile(Filename, FS);
+const CachedFileSystemEntry &
+DependencyScanningWorkerFilesystem::getOrEmplaceSharedEntryForUID(
+    TentativeEntry TEntry) {
+  auto &Shard = SharedCache.getShardForUID(TEntry.Status.getUniqueID());
+  return Shard.getOrEmplaceEntryForUID(TEntry.Status.getUniqueID(),
+                                       std::move(TEntry.Status),
+                                       std::move(TEntry.Contents));
 }
 
-llvm::ErrorOr<EntryRef>
-DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
+const CachedFileSystemEntry *
+DependencyScanningWorkerFilesystem::findEntryByFilenameWithWriteThrough(
     StringRef Filename) {
-  bool ShouldBeMinimized = shouldMinimize(Filename);
-
-  const auto *Entry = LocalCache.getCachedEntry(Filename);
-  if (Entry && !Entry->needsUpdate(ShouldBeMinimized))
-    return EntryRef(ShouldBeMinimized, *Entry).unwrapError();
-
-  // FIXME: Handle PCM/PCH files.
-  // FIXME: Handle module map files.
-
-  auto &SharedCacheEntry = SharedCache.get(Filename);
-  {
-    std::lock_guard<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
-    CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value;
-
-    if (!CacheEntry.isInitialized()) {
-      auto MaybeStatus = getUnderlyingFS().status(Filename);
-      if (!MaybeStatus && !shouldCacheStatFailures(Filename))
-        // HACK: We need to always restat non source files if the stat fails.
-        //   This is because Clang first looks up the module cache and module
-        //   files before building them, and then looks for them again. If we
-        //   cache the stat failure, it won't see them the second time.
-        return MaybeStatus.getError();
-      CacheEntry.init(std::move(MaybeStatus), Filename, getUnderlyingFS());
-    }
+  if (const auto *Entry = LocalCache.findEntryByFilename(Filename))
+    return Entry;
+  auto &Shard = SharedCache.getShardForFilename(Filename);
+  if (const auto *Entry = Shard.findEntryByFilename(Filename))
+    return &LocalCache.insertEntryForFilename(Filename, *Entry);
+  return nullptr;
+}
 
-    // Checking `needsUpdate` verifies the entry represents an opened file.
-    // Only checking `needsMinimization` could lead to minimization of files
-    // that we failed to load (such files don't have `OriginalContents`).
-    if (CacheEntry.needsUpdate(ShouldBeMinimized))
-      CacheEntry.minimizeFile();
+llvm::ErrorOr<const CachedFileSystemEntry &>
+DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
+  llvm::ErrorOr<llvm::vfs::Status> Stat = getUnderlyingFS().status(Filename);
+  if (!Stat) {
+    if (!shouldCacheStatFailures(Filename))
+      return Stat.getError();
+    const auto &Entry =
+        getOrEmplaceSharedEntryForFilename(Filename, Stat.getError());
+    return insertLocalEntryForFilename(Filename, Entry);
   }
 
-  // Store the result in the local cache.
-  Entry = &SharedCacheEntry.Value;
-  return EntryRef(ShouldBeMinimized, *Entry).unwrapError();
+  if (const auto *Entry = findSharedEntryByUID(*Stat))
+    return insertLocalEntryForFilename(Filename, *Entry);
+
+  auto TEntry =
+      Stat->isDirectory() ? TentativeEntry(*Stat) : readFile(Filename);
+
+  const CachedFileSystemEntry *SharedEntry = [&]() {
+    if (TEntry) {
+      const auto &UIDEntry = getOrEmplaceSharedEntryForUID(std::move(*TEntry));
+      return &getOrInsertSharedEntryForFilename(Filename, UIDEntry);
+    }
+    return &getOrEmplaceSharedEntryForFilename(Filename, TEntry.getError());
+  }();
+
+  return insertLocalEntryForFilename(Filename, *SharedEntry);
+}
+
+llvm::ErrorOr<EntryRef>
+DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
+    StringRef Filename) {
+  if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
+    return minimizeIfNecessary(*Entry, Filename).unwrapError();
+  auto MaybeEntry = computeAndStoreResult(Filename);
+  if (!MaybeEntry)
+    return MaybeEntry.getError();
+  return minimizeIfNecessary(*MaybeEntry, Filename).unwrapError();
 }
 
 llvm::ErrorOr<llvm::vfs::Status>
@@ -247,7 +334,8 @@ llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MinimizedVFSFile::create(
     return std::make_error_code(std::errc::is_a_directory);
 
   auto Result = std::make_unique<MinimizedVFSFile>(
-      llvm::MemoryBuffer::getMemBuffer(Entry.getContents(), Entry.getName(),
+      llvm::MemoryBuffer::getMemBuffer(Entry.getContents(),
+                                       Entry.getStatus().getName(),
                                        /*RequiresNullTerminator=*/false),
       Entry.getStatus());
 
diff --git a/clang/unittests/Tooling/DependencyScannerTest.cpp b/clang/unittests/Tooling/DependencyScannerTest.cpp
index 90ab1df267530..784d759986375 100644
--- a/clang/unittests/Tooling/DependencyScannerTest.cpp
+++ b/clang/unittests/Tooling/DependencyScannerTest.cpp
@@ -224,6 +224,8 @@ TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately1) {
   EXPECT_TRUE(StatusFull1);
   EXPECT_EQ(StatusMinimized0->getSize(), 17u);
   EXPECT_EQ(StatusFull1->getSize(), 30u);
+  EXPECT_EQ(StatusMinimized0->getName(), StringRef("/mod.h"));
+  EXPECT_EQ(StatusFull1->getName(), StringRef("/mod.h"));
 }
 
 TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) {
@@ -245,6 +247,8 @@ TEST(DependencyScanningFilesystem, IgnoredFilesAreCachedSeparately2) {
   EXPECT_TRUE(StatusMinimized1);
   EXPECT_EQ(StatusFull0->getSize(), 30u);
   EXPECT_EQ(StatusMinimized1->getSize(), 17u);
+  EXPECT_EQ(StatusFull0->getName(), StringRef("/mod.h"));
+  EXPECT_EQ(StatusMinimized1->getName(), StringRef("/mod.h"));
 }
 
 } // end namespace dependencies

From 8cc2a137270462bc191377dbab97c739583814dd Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda@apple.com>
Date: Fri, 21 Jan 2022 11:18:22 +0100
Subject: [PATCH 169/946] [clang][deps] Handle symlinks in minimizing FS

The minimizing and caching filesystem used by the dependency scanner can be configured to **not** minimize some files. That's necessary when scanning a TU with prebuilt inputs (i.e. PCH) that refer to the original (non-minimized) files. Minimizing such files in the dependency scanner would cause discrepancy between the current perceived state of the filesystem and the file sizes stored in the AST file. By not minimizing such files, we avoid creating the discrepancy.

The problem with the current approach is that files that should not be minimized are identified by their path. This breaks down when the prebuilt input (PCH) and the current TU refer to the same file via different paths (i.e. symlinks). This patch switches from paths to `llvm::sys::fs::UniqueID` when identifying ignored files. This is consistent with how the rest of Clang treats files.

Depends on D114966.

Reviewed By: dexonsmith, arphaman

Differential Revision: https://reviews.llvm.org/D114971
---
 .../DependencyScanningFilesystem.h            | 12 +++--
 .../DependencyScanningFilesystem.cpp          | 34 ++++++------
 clang/test/ClangScanDeps/modules-symlink.c    | 54 +++++++++++++++++++
 3 files changed, 79 insertions(+), 21 deletions(-)
 create mode 100644 clang/test/ClangScanDeps/modules-symlink.c

diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
index 70e9c4a3ffea2..7c830d3f27333 100644
--- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
+++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h
@@ -11,8 +11,8 @@
 
 #include "clang/Basic/LLVM.h"
 #include "clang/Lex/PreprocessorExcludedConditionalDirectiveSkipMapping.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSet.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/VirtualFileSystem.h"
@@ -308,13 +308,15 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
 
 private:
   /// Check whether the file should be minimized.
-  bool shouldMinimize(StringRef Filename);
+  bool shouldMinimize(StringRef Filename, llvm::sys::fs::UniqueID UID);
 
   /// Returns entry for the given filename.
   ///
   /// Attempts to use the local and shared caches first, then falls back to
   /// using the underlying filesystem.
-  llvm::ErrorOr<EntryRef> getOrCreateFileSystemEntry(StringRef Filename);
+  llvm::ErrorOr<EntryRef>
+  getOrCreateFileSystemEntry(StringRef Filename,
+                             bool DisableMinimization = false);
 
   /// For a filename that's not yet associated with any entry in the caches,
   /// uses the underlying filesystem to either look up the entry based in the
@@ -325,7 +327,7 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   /// Minimizes the given entry if necessary and returns a wrapper object with
   /// reference semantics.
   EntryRef minimizeIfNecessary(const CachedFileSystemEntry &Entry,
-                               StringRef Filename);
+                               StringRef Filename, bool Disable);
 
   /// Represents a filesystem entry that has been stat-ed (and potentially read)
   /// and that's about to be inserted into the cache as `CachedFileSystemEntry`.
@@ -401,7 +403,7 @@ class DependencyScanningWorkerFilesystem : public llvm::vfs::ProxyFileSystem {
   /// currently active preprocessor.
   ExcludedPreprocessorDirectiveSkipMapping *PPSkipMappings;
   /// The set of files that should not be minimized.
-  llvm::StringSet<> NotToBeMinimized;
+  llvm::DenseSet<llvm::sys::fs::UniqueID> NotToBeMinimized;
 };
 
 } // end namespace dependencies
diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
index cc8968a7b680f..80a70252721d8 100644
--- a/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
+++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
@@ -42,8 +42,9 @@ DependencyScanningWorkerFilesystem::readFile(StringRef Filename) {
 }
 
 EntryRef DependencyScanningWorkerFilesystem::minimizeIfNecessary(
-    const CachedFileSystemEntry &Entry, StringRef Filename) {
-  if (Entry.isError() || Entry.isDirectory() || !shouldMinimize(Filename))
+    const CachedFileSystemEntry &Entry, StringRef Filename, bool Disable) {
+  if (Entry.isError() || Entry.isDirectory() || Disable ||
+      !shouldMinimize(Filename, Entry.getUniqueID()))
     return EntryRef(/*Minimized=*/false, Filename, Entry);
 
   CachedFileContents *Contents = Entry.getContents();
@@ -210,19 +211,18 @@ static bool shouldCacheStatFailures(StringRef Filename) {
 }
 
 void DependencyScanningWorkerFilesystem::disableMinimization(
-    StringRef RawFilename) {
-  llvm::SmallString<256> Filename;
-  llvm::sys::path::native(RawFilename, Filename);
-  NotToBeMinimized.insert(Filename);
+    StringRef Filename) {
+  // Since we're not done setting up `NotToBeMinimized` yet, we need to disable
+  // minimization explicitly.
+  if (llvm::ErrorOr<EntryRef> Result =
+          getOrCreateFileSystemEntry(Filename, /*DisableMinimization=*/true))
+    NotToBeMinimized.insert(Result->getStatus().getUniqueID());
 }
 
-bool DependencyScanningWorkerFilesystem::shouldMinimize(StringRef RawFilename) {
-  if (!shouldMinimizeBasedOnExtension(RawFilename))
-    return false;
-
-  llvm::SmallString<256> Filename;
-  llvm::sys::path::native(RawFilename, Filename);
-  return !NotToBeMinimized.contains(Filename);
+bool DependencyScanningWorkerFilesystem::shouldMinimize(
+    StringRef Filename, llvm::sys::fs::UniqueID UID) {
+  return shouldMinimizeBasedOnExtension(Filename) &&
+         !NotToBeMinimized.contains(UID);
 }
 
 const CachedFileSystemEntry &
@@ -275,13 +275,15 @@ DependencyScanningWorkerFilesystem::computeAndStoreResult(StringRef Filename) {
 
 llvm::ErrorOr<EntryRef>
 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
-    StringRef Filename) {
+    StringRef Filename, bool DisableMinimization) {
   if (const auto *Entry = findEntryByFilenameWithWriteThrough(Filename))
-    return minimizeIfNecessary(*Entry, Filename).unwrapError();
+    return minimizeIfNecessary(*Entry, Filename, DisableMinimization)
+        .unwrapError();
   auto MaybeEntry = computeAndStoreResult(Filename);
   if (!MaybeEntry)
     return MaybeEntry.getError();
-  return minimizeIfNecessary(*MaybeEntry, Filename).unwrapError();
+  return minimizeIfNecessary(*MaybeEntry, Filename, DisableMinimization)
+      .unwrapError();
 }
 
 llvm::ErrorOr<llvm::vfs::Status>
diff --git a/clang/test/ClangScanDeps/modules-symlink.c b/clang/test/ClangScanDeps/modules-symlink.c
new file mode 100644
index 0000000000000..1a2fe2d9f5123
--- /dev/null
+++ b/clang/test/ClangScanDeps/modules-symlink.c
@@ -0,0 +1,54 @@
+// RUN: rm -rf %t
+// RUN: split-file %s %t
+
+//--- cdb_pch.json
+[
+  {
+    "directory": "DIR",
+    "command": "clang -x c-header DIR/pch.h -fmodules -gmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache -o DIR/pch.h.gch",
+    "file": "DIR/pch.h"
+  }
+]
+
+//--- cdb_tu.json
+[
+  {
+    "directory": "DIR",
+    "command": "clang -c DIR/tu.c -fmodules -gmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache -include DIR/pch.h -o DIR/tu.o",
+    "file": "DIR/tu.c"
+  }
+]
+
+//--- module.modulemap
+module mod { header "symlink.h" }
+
+//--- pch.h
+#include "symlink.h"
+
+//--- original.h
+// Comment that will be stripped by the minimizer.
+#define MACRO 1
+
+//--- tu.c
+#include "original.h"
+static int foo = MACRO; // Macro usage that will trigger
+                        // input file consistency checks.
+
+// RUN: ln -s %t/original.h %t/symlink.h
+
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb_pch.json > %t/cdb.json
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full \
+// RUN:   -generate-modules-path-args -module-files-dir %t/build > %t/result_pch.json
+//
+// RUN: %python %S/../../utils/module-deps-to-rsp.py %t/result_pch.json \
+// RUN:   --module-name=mod > %t/mod.cc1.rsp
+// RUN: %python %S/../../utils/module-deps-to-rsp.py %t/result_pch.json \
+// RUN:   --tu-index=0 > %t/pch.rsp
+//
+// RUN: %clang @%t/mod.cc1.rsp
+// RUN: %clang -x c-header %t/pch.h -fmodules -gmodules -fimplicit-module-maps \
+// RUN:   -fmodules-cache-path=%t/cache -o %t/pch.h.gch -I %t @%t/pch.rsp
+
+// RUN: sed -e "s|DIR|%/t|g" %t/cdb_tu.json > %t/cdb.json
+// RUN: clang-scan-deps -compilation-database %t/cdb.json -format experimental-full \
+// RUN:   -generate-modules-path-args -module-files-dir %t/build > %t/result_tu.json

From 68db0e25df4b1edaa2c6080eb88453ab01ea01d3 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Fri, 21 Jan 2022 13:14:39 +0100
Subject: [PATCH 170/946] [flang] Update tco tool pipline and add translation
 to LLVM IR

tco is a tool to test the FIR to LLVM IR pipeline of the Flang compiler.

This patch update tco pipelines and adds the translation to LLVM IR.

A simple test is added to make sure the tool is working with a simple
FIR program.
More tests will be upstream in follow up patch from the fir-dev branch.

This patch is part of the upstreaming effort from fir-dev branch.

Reviewed By: schweitz, mehdi_amini

Differential Revision: https://reviews.llvm.org/D117781

Co-authored-by: Eric Schweitz <eschweitz@nvidia.com>
Co-authored-by: Jean Perier <jperier@nvidia.com>
Co-authored-by: Andrzej Warzynski <andrzej.warzynski@arm.com>
---
 .../include/flang/Optimizer/CodeGen/CodeGen.h |  10 +-
 .../include/flang/Optimizer/Support/InitFIR.h |  12 +-
 flang/include/flang/Tools/CLOptions.inc       | 160 ++++++++++++++++++
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       |  37 ++++
 flang/lib/Optimizer/Support/CMakeLists.txt    |   1 +
 flang/lib/Optimizer/Support/InitFIR.cpp       |  20 +++
 flang/test/Fir/basic-program.fir              |  11 ++
 flang/tools/tco/CMakeLists.txt                |  20 ++-
 flang/tools/tco/tco.cpp                       |  45 ++++-
 9 files changed, 301 insertions(+), 15 deletions(-)
 create mode 100644 flang/include/flang/Tools/CLOptions.inc
 create mode 100644 flang/lib/Optimizer/Support/InitFIR.cpp
 create mode 100644 flang/test/Fir/basic-program.fir

diff --git a/flang/include/flang/Optimizer/CodeGen/CodeGen.h b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
index 1bd31b207859a..939d6aebb524d 100644
--- a/flang/include/flang/Optimizer/CodeGen/CodeGen.h
+++ b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
@@ -12,6 +12,8 @@
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassRegistry.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
 #include <memory>
 
 namespace fir {
@@ -36,9 +38,13 @@ std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>> createFirTargetRewritePass(
 /// Convert FIR to the LLVM IR dialect
 std::unique_ptr<mlir::Pass> createFIRToLLVMPass();
 
+using LLVMIRLoweringPrinter =
+    std::function<void(llvm::Module &, llvm::raw_ostream &)>;
 /// Convert the LLVM IR dialect to LLVM-IR proper
-std::unique_ptr<mlir::Pass>
-createLLVMDialectToLLVMPass(llvm::raw_ostream &output);
+std::unique_ptr<mlir::Pass> createLLVMDialectToLLVMPass(
+    llvm::raw_ostream &output,
+    LLVMIRLoweringPrinter printer =
+        [](llvm::Module &m, llvm::raw_ostream &out) { m.print(out, nullptr); });
 
 // declarative passes
 #define GEN_PASS_REGISTRATION
diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h
index e78967de2a383..2e8c1685a06f7 100644
--- a/flang/include/flang/Optimizer/Support/InitFIR.h
+++ b/flang/include/flang/Optimizer/Support/InitFIR.h
@@ -13,7 +13,6 @@
 #ifndef FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
 #define FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
 
-#include "flang/Optimizer/CodeGen/CodeGen.h"
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "mlir/Conversion/Passes.h"
 #include "mlir/Dialect/Affine/Passes.h"
@@ -35,11 +34,19 @@ namespace fir::support {
 #define FLANG_DIALECT_LIST                                                     \
   FLANG_NONCODEGEN_DIALECT_LIST, FIRCodeGenDialect, mlir::LLVM::LLVMDialect
 
+inline void registerNonCodegenDialects(mlir::DialectRegistry &registry) {
+  registry.insert<FLANG_NONCODEGEN_DIALECT_LIST>();
+}
+
 /// Register all the dialects used by flang.
 inline void registerDialects(mlir::DialectRegistry &registry) {
   registry.insert<FLANG_DIALECT_LIST>();
 }
 
+inline void loadNonCodegenDialects(mlir::MLIRContext &context) {
+  context.loadDialect<FLANG_NONCODEGEN_DIALECT_LIST>();
+}
+
 /// Forced load of all the dialects used by flang.  Lowering is not an MLIR
 /// pass, but a producer of FIR and MLIR. It is therefore a requirement that the
 /// dialects be preloaded to be able to build the IR.
@@ -75,6 +82,9 @@ inline void registerMLIRPassesForFortranTools() {
   mlir::registerConvertAffineToStandardPass();
 }
 
+/// Register the interfaces needed to lower to LLVM IR.
+void registerLLVMTranslation(mlir::MLIRContext &context);
+
 } // namespace fir::support
 
 #endif // FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
new file mode 100644
index 0000000000000..1c85075d5cc17
--- /dev/null
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -0,0 +1,160 @@
+//===-- CLOptions.inc -- command line options -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+/// This file defines some shared command-line options that can be used when
+/// debugging the test tools. This file must be included into the tool.
+
+#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "mlir/Transforms/Passes.h"
+#include "flang/Optimizer/CodeGen/CodeGen.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "llvm/Support/CommandLine.h"
+
+#define DisableOption(DOName, DOOption, DODescription) \
+  static llvm::cl::opt<bool> disable##DOName("disable-" DOOption, \
+      llvm::cl::desc("disable " DODescription " pass"), llvm::cl::init(false), \
+      llvm::cl::Hidden)
+
+/// Shared option in tools to control whether dynamically sized array
+/// allocations should always be on the heap.
+static llvm::cl::opt<bool> dynamicArrayStackToHeapAllocation(
+    "fdynamic-heap-array",
+    llvm::cl::desc("place all array allocations of dynamic size on the heap"),
+    llvm::cl::init(false), llvm::cl::Hidden);
+
+/// Shared option in tools to set a maximum value for the number of elements in
+/// a compile-time sized array that can be allocated on the stack.
+static llvm::cl::opt<std::size_t> arrayStackAllocationThreshold(
+    "fstack-array-size",
+    llvm::cl::desc(
+        "place all array allocations more than <size> elements on the heap"),
+    llvm::cl::init(~static_cast<std::size_t>(0)), llvm::cl::Hidden);
+
+namespace {
+/// Optimizer Passes
+DisableOption(CfgConversion, "cfg-conversion", "disable FIR to CFG pass");
+DisableOption(FirAvc, "avc", "array value copy analysis and transformation");
+DisableOption(
+    FirMao, "memory-allocation-opt", "memory allocation optimization");
+
+/// CodeGen Passes
+#if !defined(FLANG_EXCLUDE_CODEGEN)
+DisableOption(CodeGenRewrite, "codegen-rewrite", "rewrite FIR for codegen");
+DisableOption(TargetRewrite, "target-rewrite", "rewrite FIR for target");
+DisableOption(FirToLlvmIr, "fir-to-llvmir", "FIR to LLVM-IR dialect");
+DisableOption(LlvmIrToLlvm, "llvm", "conversion to LLVM");
+#endif
+
+/// Generic for adding a pass to the pass manager if it is not disabled.
+template <typename F>
+void addPassConditionally(
+    mlir::PassManager &pm, llvm::cl::opt<bool> &disabled, F ctor) {
+  if (!disabled)
+    pm.addPass(ctor());
+}
+
+template <typename OP, typename F>
+void addNestedPassConditionally(
+    mlir::PassManager &pm, llvm::cl::opt<bool> &disabled, F ctor) {
+  if (!disabled)
+    pm.addNestedPass<OP>(ctor());
+}
+
+} // namespace
+
+namespace fir {
+
+static void defaultFlangInlinerOptPipeline(mlir::OpPassManager &pm) {
+  mlir::GreedyRewriteConfig config;
+  config.enableRegionSimplification = false;
+  pm.addPass(mlir::createCanonicalizerPass(config));
+}
+
+inline void addCfgConversionPass(mlir::PassManager &pm) {
+  addNestedPassConditionally<mlir::FuncOp>(
+      pm, disableCfgConversion, fir::createFirToCfgPass);
+}
+
+inline void addAVC(mlir::PassManager &pm) {
+  addNestedPassConditionally<mlir::FuncOp>(
+      pm, disableFirAvc, fir::createArrayValueCopyPass);
+}
+
+#if !defined(FLANG_EXCLUDE_CODEGEN)
+inline void addCodeGenRewritePass(mlir::PassManager &pm) {
+  addPassConditionally(
+      pm, disableCodeGenRewrite, fir::createFirCodeGenRewritePass);
+}
+
+inline void addTargetRewritePass(mlir::PassManager &pm) {
+  addPassConditionally(pm, disableTargetRewrite, []() {
+    return fir::createFirTargetRewritePass(fir::TargetRewriteOptions{});
+  });
+}
+
+inline void addFIRToLLVMPass(mlir::PassManager &pm) {
+  addPassConditionally(pm, disableFirToLlvmIr, fir::createFIRToLLVMPass);
+}
+
+inline void addLLVMDialectToLLVMPass(
+    mlir::PassManager &pm, llvm::raw_ostream &output) {
+  addPassConditionally(pm, disableLlvmIrToLlvm,
+      [&]() { return fir::createLLVMDialectToLLVMPass(output); });
+}
+#endif
+
+/// Create a pass pipeline for running default optimization passes for
+/// incremental conversion of FIR.
+///
+/// \param pm - MLIR pass manager that will hold the pipeline definition
+inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm) {
+  // simplify the IR
+  mlir::GreedyRewriteConfig config;
+  config.enableRegionSimplification = false;
+  fir::addAVC(pm);
+  pm.addNestedPass<mlir::FuncOp>(fir::createCharacterConversionPass());
+  pm.addPass(mlir::createCanonicalizerPass(config));
+
+  // The default inliner pass adds the canonicalizer pass with the default
+  // configuration. Create the inliner pass with tco config.
+  llvm::StringMap<mlir::OpPassManager> pipelines;
+  pm.addPass(
+      mlir::createInlinerPass(pipelines, defaultFlangInlinerOptPipeline));
+  pm.addPass(mlir::createCSEPass());
+
+  // convert control flow to CFG form
+  fir::addCfgConversionPass(pm);
+  pm.addPass(mlir::createLowerToCFGPass());
+
+  pm.addPass(mlir::createCanonicalizerPass(config));
+}
+
+#if !defined(FLANG_EXCLUDE_CODEGEN)
+inline void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm) {
+  pm.addNestedPass<mlir::FuncOp>(fir::createAbstractResultOptPass());
+  fir::addCodeGenRewritePass(pm);
+  fir::addTargetRewritePass(pm);
+  fir::addFIRToLLVMPass(pm);
+}
+
+/// Create a pass pipeline for lowering from MLIR to LLVM IR
+///
+/// \param pm - MLIR pass manager that will hold the pipeline definition
+inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm) {
+  // Add default optimizer pass pipeline.
+  fir::createDefaultFIROptimizerPassPipeline(pm);
+
+  // Add codegen pass pipeline.
+  fir::createDefaultFIRCodeGenPassPipeline(pm);
+}
+#undef FLANG_EXCLUDE_CODEGEN
+#endif
+
+} // namespace fir
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index be2e7cde916df..40d6d2017b2fa 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -23,6 +23,7 @@
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Pass/Pass.h"
+#include "mlir/Target/LLVMIR/ModuleTranslation.h"
 #include "llvm/ADT/ArrayRef.h"
 
 #define DEBUG_TYPE "flang-codegen"
@@ -3305,8 +3306,44 @@ class FIRToLLVMLowering : public fir::FIRToLLVMLoweringBase<FIRToLLVMLowering> {
     }
   }
 };
+
+/// Lower from LLVM IR dialect to proper LLVM-IR and dump the module
+struct LLVMIRLoweringPass
+    : public mlir::PassWrapper<LLVMIRLoweringPass,
+                               mlir::OperationPass<mlir::ModuleOp>> {
+  using Printer = fir::LLVMIRLoweringPrinter;
+  LLVMIRLoweringPass(raw_ostream &output, Printer p)
+      : output{output}, printer{p} {}
+
+  mlir::ModuleOp getModule() { return getOperation(); }
+
+  void runOnOperation() override final {
+    auto *ctx = getModule().getContext();
+    auto optName = getModule().getName();
+    llvm::LLVMContext llvmCtx;
+    if (auto llvmModule = mlir::translateModuleToLLVMIR(
+            getModule(), llvmCtx, optName ? *optName : "FIRModule")) {
+      printer(*llvmModule, output);
+      return;
+    }
+
+    mlir::emitError(mlir::UnknownLoc::get(ctx), "could not emit LLVM-IR\n");
+    signalPassFailure();
+  }
+
+private:
+  raw_ostream &output;
+  Printer printer;
+};
+
 } // namespace
 
 std::unique_ptr<mlir::Pass> fir::createFIRToLLVMPass() {
   return std::make_unique<FIRToLLVMLowering>();
 }
+
+std::unique_ptr<mlir::Pass>
+fir::createLLVMDialectToLLVMPass(raw_ostream &output,
+                                 fir::LLVMIRLoweringPrinter printer) {
+  return std::make_unique<LLVMIRLoweringPass>(output, printer);
+}
diff --git a/flang/lib/Optimizer/Support/CMakeLists.txt b/flang/lib/Optimizer/Support/CMakeLists.txt
index 30a163de9ccaf..779e20711513e 100644
--- a/flang/lib/Optimizer/Support/CMakeLists.txt
+++ b/flang/lib/Optimizer/Support/CMakeLists.txt
@@ -2,6 +2,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
 
 add_flang_library(FIRSupport
   FIRContext.cpp
+  InitFIR.cpp
   InternalNames.cpp
   KindMapping.cpp
 
diff --git a/flang/lib/Optimizer/Support/InitFIR.cpp b/flang/lib/Optimizer/Support/InitFIR.cpp
new file mode 100644
index 0000000000000..baa1336d9ca02
--- /dev/null
+++ b/flang/lib/Optimizer/Support/InitFIR.cpp
@@ -0,0 +1,20 @@
+//===-- Optimizer/Support/InitFIR.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Support/InitFIR.h"
+#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
+
+void fir::support::registerLLVMTranslation(mlir::MLIRContext &context) {
+  mlir::DialectRegistry registry;
+  // Register OpenMP dialect interface here as well.
+  mlir::registerOpenMPDialectTranslation(registry);
+  // Register LLVM-IR dialect interface.
+  registerLLVMDialectTranslation(registry);
+  context.appendDialectRegistry(registry);
+}
diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir
new file mode 100644
index 0000000000000..02463bef99496
--- /dev/null
+++ b/flang/test/Fir/basic-program.fir
@@ -0,0 +1,11 @@
+// RUN: tco --target=x86_64-unknown-linux-gnu %s | FileCheck %s
+
+// Check that tco is working with a basic test.
+
+func @_QQmain() {
+  return
+}
+
+// CHECK: ; ModuleID = 'FIRModule'
+// CHECK-LABEL: define void @_QQmain()
+// CHECK:       ret void
diff --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt
index 1a9c5ac72f153..a64b9c59bd02a 100644
--- a/flang/tools/tco/CMakeLists.txt
+++ b/flang/tools/tco/CMakeLists.txt
@@ -1,13 +1,25 @@
-get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+set(LLVM_LINK_COMPONENTS
+  AllTargetsAsmParsers
+  AllTargetsCodeGens
+  AllTargetsDescs
+  AllTargetsInfos
+)
+llvm_map_components_to_libnames(llvm_libs ${LLVM_LINK_COMPONENTS})
 
-set(LIBS
+add_flang_tool(tco tco.cpp)
+llvm_update_compile_flags(tco)
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+target_link_libraries(tco PRIVATE
   FIRCodeGen
   FIRDialect
   FIRSupport
   FIRTransforms
+  FIRBuilder
   ${dialect_libs}
   MLIRIR
   MLIRLLVMIR
+  MLIRLLVMToLLVMIRTranslation
+  MLIRTargetLLVMIRExport
   MLIRPass
   MLIRStandardToLLVM
   MLIRTransforms
@@ -18,7 +30,5 @@ set(LIBS
   MLIRStandardToLLVM
   MLIRSupport
   MLIRVectorToLLVM
+  ${llvm_libs}
 )
-
-add_flang_tool(tco tco.cpp)
-target_link_libraries(tco PRIVATE ${LIBS})
diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp
index 8f2c283bc82f9..2bb3b27e7eb63 100644
--- a/flang/tools/tco/tco.cpp
+++ b/flang/tools/tco/tco.cpp
@@ -11,8 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "flang/Optimizer/CodeGen/CodeGen.h"
+#include "flang/Optimizer/Support/FIRContext.h"
 #include "flang/Optimizer/Support/InitFIR.h"
+#include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Optimizer/Support/KindMapping.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
+#include "mlir/IR/AsmState.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/Parser.h"
@@ -25,11 +31,13 @@
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
+/// list of program return codes
 static cl::opt<std::string>
     inputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
 
@@ -42,8 +50,14 @@ static cl::opt<bool> emitFir("emit-fir",
                              cl::desc("Parse and pretty-print the input"),
                              cl::init(false));
 
+static cl::opt<std::string> targetTriple("target",
+                                         cl::desc("specify a target triple"),
+                                         cl::init("native"));
+
+#include "flang/Tools/CLOptions.inc"
+
 static void printModuleBody(mlir::ModuleOp mod, raw_ostream &output) {
-  for (auto &op : mod.getBody()->without_terminator())
+  for (auto &op : *mod.getBody())
     output << op << '\n';
 }
 
@@ -65,6 +79,8 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
   mlir::DialectRegistry registry;
   fir::support::registerDialects(registry);
   mlir::MLIRContext context(registry);
+  fir::support::loadDialects(context);
+  fir::support::registerLLVMTranslation(context);
   auto owningRef = mlir::parseSourceFile(sourceMgr, &context);
 
   if (!owningRef) {
@@ -80,21 +96,31 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
   ToolOutputFile out(outputFilename, ec, sys::fs::OF_None);
 
   // run passes
-  mlir::PassManager pm{&context};
+  fir::KindMapping kindMap{&context};
+  fir::setTargetTriple(*owningRef, targetTriple);
+  fir::setKindMapping(*owningRef, kindMap);
+  mlir::PassManager pm(&context, mlir::OpPassManager::Nesting::Implicit);
+  pm.enableVerifier(/*verifyPasses=*/true);
   mlir::applyPassManagerCLOptions(pm);
   if (emitFir) {
     // parse the input and pretty-print it back out
     // -emit-fir intentionally disables all the passes
+  } else if (passPipeline.hasAnyOccurrences()) {
+    auto errorHandler = [&](const Twine &msg) {
+      mlir::emitError(mlir::UnknownLoc::get(pm.getContext())) << msg;
+      return mlir::failure();
+    };
+    if (mlir::failed(passPipeline.addToPipeline(pm, errorHandler)))
+      return mlir::failure();
   } else {
-    // TODO: Actually add passes when added to FIR code base
-    // add all the passes
-    // the user can disable them individually
+    fir::createMLIRToLLVMPassPipeline(pm);
+    fir::addLLVMDialectToLLVMPass(pm, out.os());
   }
 
   // run the pass manager
   if (mlir::succeeded(pm.run(*owningRef))) {
     // passes ran successfully, so keep the output
-    if (emitFir)
+    if (emitFir || passPipeline.hasAnyOccurrences())
       printModuleBody(*owningRef, out.os());
     out.keep();
     return mlir::success();
@@ -107,8 +133,13 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
 }
 
 int main(int argc, char **argv) {
-  fir::support::registerMLIRPassesForFortranTools();
   [[maybe_unused]] InitLLVM y(argc, argv);
+  fir::support::registerMLIRPassesForFortranTools();
+  fir::registerOptCodeGenPasses();
+  fir::registerOptTransformPasses();
+  InitializeAllTargets();
+  mlir::registerAsmPrinterCLOptions();
+  mlir::registerMLIRContextCLOptions();
   mlir::registerPassManagerCLOptions();
   mlir::PassPipelineCLParser passPipe("", "Compiler passes to run");
   cl::ParseCommandLineOptions(argc, argv, "Tilikum Crossing Optimizer\n");

From 8ee135dcf8ff060656ad481c3e980fe8763576f5 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 21 Jan 2022 12:24:32 +0000
Subject: [PATCH 171/946] [X86] Remove `__builtin_ia32_pmax/min` intrinsics and
 use generic `__builtin_elementwise_max/min`

D111985 added the generic `__builtin_elementwise_max` and `__builtin_elementwise_min` intrinsics with the same integer behaviour as the SSE/AVX instructions

This patch removes the `__builtin_ia32_pmax/min` intrinsics and just uses `__builtin_elementwise_max/min` - the existing tests see no changes:
```
__m256i test_mm256_max_epu32(__m256i a, __m256i b) {
  // CHECK-LABEL: test_mm256_max_epu32
  // CHECK: call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
  return _mm256_max_epu32(a, b);
}
```
This requires us to add a `__v64qs` explicitly signed char vector type (we already have `__v16qs` and `__v32qs`).

Sibling patch to D117791

Differential Revision: https://reviews.llvm.org/D117798
---
 clang/include/clang/Basic/BuiltinsX86.def | 48 ---------------------
 clang/lib/CodeGen/CGBuiltin.cpp           | 52 -----------------------
 clang/lib/Headers/avx2intrin.h            | 24 +++++------
 clang/lib/Headers/avx512bwintrin.h        | 16 +++----
 clang/lib/Headers/avx512fintrin.h         | 16 +++----
 clang/lib/Headers/avx512vlintrin.h        | 16 +++----
 clang/lib/Headers/emmintrin.h             |  8 ++--
 clang/lib/Headers/smmintrin.h             | 16 +++----
 clang/test/CodeGen/builtins-x86.c         | 12 ------
 9 files changed, 48 insertions(+), 160 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 9b7c763b0c6c7..a8f5567248624 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -265,10 +265,6 @@ TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "ncV:128:", "sse2")
@@ -377,14 +373,6 @@ TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "ncV:128:", "sse4.1")
 
-TARGET_BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2OiV4iV4i", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "ncV:128:", "sse4.1")
@@ -580,18 +568,6 @@ TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4OiV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "ncV:256:", "avx2")
@@ -921,14 +897,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:",
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminud512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8OiV16iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8OiV16iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "nV:512:", "avx512f")
@@ -1047,14 +1015,6 @@ TARGET_BUILTIN(__builtin_ia32_paddusb512, "V64cV64cV64c", "ncV:512:", "avx512bw"
 TARGET_BUILTIN(__builtin_ia32_paddusw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pavgb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pavgw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaxuw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pminsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pminsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psubsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psubsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
@@ -1188,14 +1148,6 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5
 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 49f054ec1a982..4c68b20067b99 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14285,58 +14285,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       return Builder.CreateCall(F, Ops[0]);
     }
   }
-  case X86::BI__builtin_ia32_pmaxsb128:
-  case X86::BI__builtin_ia32_pmaxsw128:
-  case X86::BI__builtin_ia32_pmaxsd128:
-  case X86::BI__builtin_ia32_pmaxsq128:
-  case X86::BI__builtin_ia32_pmaxsb256:
-  case X86::BI__builtin_ia32_pmaxsw256:
-  case X86::BI__builtin_ia32_pmaxsd256:
-  case X86::BI__builtin_ia32_pmaxsq256:
-  case X86::BI__builtin_ia32_pmaxsb512:
-  case X86::BI__builtin_ia32_pmaxsw512:
-  case X86::BI__builtin_ia32_pmaxsd512:
-  case X86::BI__builtin_ia32_pmaxsq512:
-    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smax);
-  case X86::BI__builtin_ia32_pmaxub128:
-  case X86::BI__builtin_ia32_pmaxuw128:
-  case X86::BI__builtin_ia32_pmaxud128:
-  case X86::BI__builtin_ia32_pmaxuq128:
-  case X86::BI__builtin_ia32_pmaxub256:
-  case X86::BI__builtin_ia32_pmaxuw256:
-  case X86::BI__builtin_ia32_pmaxud256:
-  case X86::BI__builtin_ia32_pmaxuq256:
-  case X86::BI__builtin_ia32_pmaxub512:
-  case X86::BI__builtin_ia32_pmaxuw512:
-  case X86::BI__builtin_ia32_pmaxud512:
-  case X86::BI__builtin_ia32_pmaxuq512:
-    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umax);
-  case X86::BI__builtin_ia32_pminsb128:
-  case X86::BI__builtin_ia32_pminsw128:
-  case X86::BI__builtin_ia32_pminsd128:
-  case X86::BI__builtin_ia32_pminsq128:
-  case X86::BI__builtin_ia32_pminsb256:
-  case X86::BI__builtin_ia32_pminsw256:
-  case X86::BI__builtin_ia32_pminsd256:
-  case X86::BI__builtin_ia32_pminsq256:
-  case X86::BI__builtin_ia32_pminsb512:
-  case X86::BI__builtin_ia32_pminsw512:
-  case X86::BI__builtin_ia32_pminsd512:
-  case X86::BI__builtin_ia32_pminsq512:
-    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smin);
-  case X86::BI__builtin_ia32_pminub128:
-  case X86::BI__builtin_ia32_pminuw128:
-  case X86::BI__builtin_ia32_pminud128:
-  case X86::BI__builtin_ia32_pminuq128:
-  case X86::BI__builtin_ia32_pminub256:
-  case X86::BI__builtin_ia32_pminuw256:
-  case X86::BI__builtin_ia32_pminud256:
-  case X86::BI__builtin_ia32_pminuq256:
-  case X86::BI__builtin_ia32_pminub512:
-  case X86::BI__builtin_ia32_pminuw512:
-  case X86::BI__builtin_ia32_pminud512:
-  case X86::BI__builtin_ia32_pminuq512:
-    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umin);
 
   case X86::BI__builtin_ia32_pmuludq128:
   case X86::BI__builtin_ia32_pmuludq256:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index c9ad74ce3fa42..e33514a60ff3e 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -253,73 +253,73 @@ _mm256_madd_epi16(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);
+  return (__m256i)__builtin_elementwise_max((__v32qs)__a, (__v32qs)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);
+  return (__m256i)__builtin_elementwise_max((__v16hi)__a, (__v16hi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi32(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);
+  return (__m256i)__builtin_elementwise_max((__v8si)__a, (__v8si)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);
+  return (__m256i)__builtin_elementwise_max((__v32qu)__a, (__v32qu)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);
+  return (__m256i)__builtin_elementwise_max((__v16hu)__a, (__v16hu)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu32(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);
+  return (__m256i)__builtin_elementwise_max((__v8su)__a, (__v8su)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);
+  return (__m256i)__builtin_elementwise_min((__v32qs)__a, (__v32qs)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);
+  return (__m256i)__builtin_elementwise_min((__v16hi)__a, (__v16hi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi32(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);
+  return (__m256i)__builtin_elementwise_min((__v8si)__a, (__v8si)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);
+  return (__m256i)__builtin_elementwise_min((__v32qu)__a, (__v32qu)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);
+  return (__m256i)__builtin_elementwise_min((__v16hu)__a, (__v16hu)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu32(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);
+  return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 53319eb23011d..522ef100bab1a 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -751,7 +751,7 @@ _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epi8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B);
+  return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -773,7 +773,7 @@ _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epi16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B);
+  return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -796,7 +796,7 @@ _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B);
+  return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -818,7 +818,7 @@ _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B);
+  return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -840,7 +840,7 @@ _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epi8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B);
+  return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -862,7 +862,7 @@ _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epi16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B);
+  return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -884,7 +884,7 @@ _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B);
+  return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -906,7 +906,7 @@ _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B);
+  return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9b02a7cffc64d..8695aeb94de24 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1090,7 +1090,7 @@ static __inline __m512i
 __DEFAULT_FN_ATTRS512
 _mm512_max_epi32(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1112,7 +1112,7 @@ _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu32(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1134,7 +1134,7 @@ _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epi64(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1156,7 +1156,7 @@ _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu64(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1325,7 +1325,7 @@ static __inline __m512i
 __DEFAULT_FN_ATTRS512
 _mm512_min_epi32(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1347,7 +1347,7 @@ _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu32(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1369,7 +1369,7 @@ _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epi64(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1391,7 +1391,7 @@ _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu64(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index eddb99902e3d5..178c9dbc0e6ea 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -3054,7 +3054,7 @@ _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_max_epi64 (__m128i __A, __m128i __B) {
-  return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3073,7 +3073,7 @@ _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi64 (__m256i __A, __m256i __B) {
-  return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -3120,7 +3120,7 @@ _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_max_epu64 (__m128i __A, __m128i __B) {
-  return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3139,7 +3139,7 @@ _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu64 (__m256i __A, __m256i __B) {
-  return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -3186,7 +3186,7 @@ _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_min_epi64 (__m128i __A, __m128i __B) {
-  return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3205,7 +3205,7 @@ _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi64 (__m256i __A, __m256i __B) {
-  return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -3252,7 +3252,7 @@ _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_min_epu64 (__m128i __A, __m128i __B) {
-  return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3271,7 +3271,7 @@ _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu64 (__m256i __A, __m256i __B) {
-  return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 6e9c3032c21f7..4618b808efc48 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2375,7 +2375,7 @@ _mm_madd_epi16(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_max_epi16(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
+  return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Compares corresponding elements of two 128-bit unsigned [16 x i8]
@@ -2395,7 +2395,7 @@ _mm_max_epi16(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_max_epu8(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
+  return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b);
 }
 
 /// Compares corresponding elements of two 128-bit signed [8 x i16]
@@ -2415,7 +2415,7 @@ _mm_max_epu8(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_min_epi16(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
+  return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Compares corresponding elements of two 128-bit unsigned [16 x i8]
@@ -2435,7 +2435,7 @@ _mm_min_epi16(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_min_epu8(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
+  return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b);
 }
 
 /// Multiplies the corresponding elements of two signed [8 x i16]
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 710e55aaa1203..0df59c5fcc592 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -668,7 +668,7 @@ _mm_stream_load_si128 (__m128i const *__V)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epi8 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);
+  return (__m128i) __builtin_elementwise_min((__v16qs) __V1, (__v16qs) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -687,7 +687,7 @@ _mm_min_epi8 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epi8 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);
+  return (__m128i) __builtin_elementwise_max((__v16qs) __V1, (__v16qs) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -706,7 +706,7 @@ _mm_max_epi8 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epu16 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);
+  return (__m128i) __builtin_elementwise_min((__v8hu) __V1, (__v8hu) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -725,7 +725,7 @@ _mm_min_epu16 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epu16 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);
+  return (__m128i) __builtin_elementwise_max((__v8hu) __V1, (__v8hu) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -744,7 +744,7 @@ _mm_max_epu16 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epi32 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);
+  return (__m128i) __builtin_elementwise_min((__v4si) __V1, (__v4si) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -763,7 +763,7 @@ _mm_min_epi32 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epi32 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);
+  return (__m128i) __builtin_elementwise_max((__v4si) __V1, (__v4si) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -782,7 +782,7 @@ _mm_max_epi32 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epu32 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);
+  return (__m128i) __builtin_elementwise_min((__v4su) __V1, (__v4su) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -801,7 +801,7 @@ _mm_min_epu32 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epu32 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);
+  return (__m128i) __builtin_elementwise_max((__v4su) __V1, (__v4su) __V2);
 }
 
 /* SSE4 Insertion and Extraction from XMM Register Instructions.  */
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index bfcd30072fc1f..9eb5f2f5d149e 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -221,10 +221,6 @@ void f0() {
   tmp_V16c = __builtin_ia32_psubusb128(tmp_V16c, tmp_V16c);
   tmp_V8s = __builtin_ia32_psubusw128(tmp_V8s, tmp_V8s);
   tmp_V8s = __builtin_ia32_pmulhw128(tmp_V8s, tmp_V8s);
-  tmp_V16c = __builtin_ia32_pmaxub128(tmp_V16c, tmp_V16c);
-  tmp_V8s = __builtin_ia32_pmaxsw128(tmp_V8s, tmp_V8s);
-  tmp_V16c = __builtin_ia32_pminub128(tmp_V16c, tmp_V16c);
-  tmp_V8s = __builtin_ia32_pminsw128(tmp_V8s, tmp_V8s);
   tmp_V16c = __builtin_ia32_packsswb128(tmp_V8s, tmp_V8s);
   tmp_V8s = __builtin_ia32_packssdw128(tmp_V4i, tmp_V4i);
   tmp_V16c = __builtin_ia32_packuswb128(tmp_V8s, tmp_V8s);
@@ -455,14 +451,6 @@ void f0() {
   tmp_V2d = __builtin_ia32_blendvpd(tmp_V2d, tmp_V2d, tmp_V2d);
   tmp_V4f = __builtin_ia32_blendvps(tmp_V4f, tmp_V4f, tmp_V4f);
   tmp_V8s = __builtin_ia32_packusdw128(tmp_V4i, tmp_V4i);
-  tmp_V16c = __builtin_ia32_pmaxsb128(tmp_V16c, tmp_V16c);
-  tmp_V4i = __builtin_ia32_pmaxsd128(tmp_V4i, tmp_V4i);
-  tmp_V4i = __builtin_ia32_pmaxud128(tmp_V4i, tmp_V4i);
-  tmp_V8s = __builtin_ia32_pmaxuw128(tmp_V8s, tmp_V8s);
-  tmp_V16c = __builtin_ia32_pminsb128(tmp_V16c, tmp_V16c);
-  tmp_V4i = __builtin_ia32_pminsd128(tmp_V4i, tmp_V4i);
-  tmp_V4i = __builtin_ia32_pminud128(tmp_V4i, tmp_V4i);
-  tmp_V8s = __builtin_ia32_pminuw128(tmp_V8s, tmp_V8s);
   tmp_V2LLi = __builtin_ia32_pmuldq128(tmp_V4i, tmp_V4i);
   tmp_V4f = __builtin_ia32_roundps(tmp_V4f, imm_i_0_16);
   tmp_V4f = __builtin_ia32_roundss(tmp_V4f, tmp_V4f, imm_i_0_16);

From 3ef88b31843e040c95f23ff2c3c206f1fa399c05 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 21 Jan 2022 12:34:19 +0000
Subject: [PATCH 172/946] Revert rG8ee135dcf8ff060656ad481c3e980fe8763576f5
 "[X86] Remove `__builtin_ia32_pmax/min` intrinsics and use generic
 `__builtin_elementwise_max/min`"

Some build bots are referencing the `__builtin_ia32_pmax/min` intrinsics via alternative headers
---
 clang/include/clang/Basic/BuiltinsX86.def | 48 +++++++++++++++++++++
 clang/lib/CodeGen/CGBuiltin.cpp           | 52 +++++++++++++++++++++++
 clang/lib/Headers/avx2intrin.h            | 24 +++++------
 clang/lib/Headers/avx512bwintrin.h        | 16 +++----
 clang/lib/Headers/avx512fintrin.h         | 16 +++----
 clang/lib/Headers/avx512vlintrin.h        | 16 +++----
 clang/lib/Headers/emmintrin.h             |  8 ++--
 clang/lib/Headers/smmintrin.h             | 16 +++----
 clang/test/CodeGen/builtins-x86.c         | 12 ++++++
 9 files changed, 160 insertions(+), 48 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index a8f5567248624..9b7c763b0c6c7 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -265,6 +265,10 @@ TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "ncV:128:", "sse2")
@@ -373,6 +377,14 @@ TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "ncV:128:", "sse4.1")
 
+TARGET_BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2OiV4iV4i", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "ncV:128:", "sse4.1")
@@ -568,6 +580,18 @@ TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4OiV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "ncV:256:", "avx2")
@@ -897,6 +921,14 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:",
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminud512, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8OiV16iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8OiV16iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "nV:512:", "avx512f")
@@ -1015,6 +1047,14 @@ TARGET_BUILTIN(__builtin_ia32_paddusb512, "V64cV64cV64c", "ncV:512:", "avx512bw"
 TARGET_BUILTIN(__builtin_ia32_paddusw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pavgb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pavgw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxuw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psubsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psubsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
@@ -1148,6 +1188,14 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5
 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4c68b20067b99..49f054ec1a982 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14285,6 +14285,58 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       return Builder.CreateCall(F, Ops[0]);
     }
   }
+  case X86::BI__builtin_ia32_pmaxsb128:
+  case X86::BI__builtin_ia32_pmaxsw128:
+  case X86::BI__builtin_ia32_pmaxsd128:
+  case X86::BI__builtin_ia32_pmaxsq128:
+  case X86::BI__builtin_ia32_pmaxsb256:
+  case X86::BI__builtin_ia32_pmaxsw256:
+  case X86::BI__builtin_ia32_pmaxsd256:
+  case X86::BI__builtin_ia32_pmaxsq256:
+  case X86::BI__builtin_ia32_pmaxsb512:
+  case X86::BI__builtin_ia32_pmaxsw512:
+  case X86::BI__builtin_ia32_pmaxsd512:
+  case X86::BI__builtin_ia32_pmaxsq512:
+    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smax);
+  case X86::BI__builtin_ia32_pmaxub128:
+  case X86::BI__builtin_ia32_pmaxuw128:
+  case X86::BI__builtin_ia32_pmaxud128:
+  case X86::BI__builtin_ia32_pmaxuq128:
+  case X86::BI__builtin_ia32_pmaxub256:
+  case X86::BI__builtin_ia32_pmaxuw256:
+  case X86::BI__builtin_ia32_pmaxud256:
+  case X86::BI__builtin_ia32_pmaxuq256:
+  case X86::BI__builtin_ia32_pmaxub512:
+  case X86::BI__builtin_ia32_pmaxuw512:
+  case X86::BI__builtin_ia32_pmaxud512:
+  case X86::BI__builtin_ia32_pmaxuq512:
+    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umax);
+  case X86::BI__builtin_ia32_pminsb128:
+  case X86::BI__builtin_ia32_pminsw128:
+  case X86::BI__builtin_ia32_pminsd128:
+  case X86::BI__builtin_ia32_pminsq128:
+  case X86::BI__builtin_ia32_pminsb256:
+  case X86::BI__builtin_ia32_pminsw256:
+  case X86::BI__builtin_ia32_pminsd256:
+  case X86::BI__builtin_ia32_pminsq256:
+  case X86::BI__builtin_ia32_pminsb512:
+  case X86::BI__builtin_ia32_pminsw512:
+  case X86::BI__builtin_ia32_pminsd512:
+  case X86::BI__builtin_ia32_pminsq512:
+    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smin);
+  case X86::BI__builtin_ia32_pminub128:
+  case X86::BI__builtin_ia32_pminuw128:
+  case X86::BI__builtin_ia32_pminud128:
+  case X86::BI__builtin_ia32_pminuq128:
+  case X86::BI__builtin_ia32_pminub256:
+  case X86::BI__builtin_ia32_pminuw256:
+  case X86::BI__builtin_ia32_pminud256:
+  case X86::BI__builtin_ia32_pminuq256:
+  case X86::BI__builtin_ia32_pminub512:
+  case X86::BI__builtin_ia32_pminuw512:
+  case X86::BI__builtin_ia32_pminud512:
+  case X86::BI__builtin_ia32_pminuq512:
+    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umin);
 
   case X86::BI__builtin_ia32_pmuludq128:
   case X86::BI__builtin_ia32_pmuludq256:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index e33514a60ff3e..c9ad74ce3fa42 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -253,73 +253,73 @@ _mm256_madd_epi16(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_max((__v32qs)__a, (__v32qs)__b);
+  return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_max((__v16hi)__a, (__v16hi)__b);
+  return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi32(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_max((__v8si)__a, (__v8si)__b);
+  return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_max((__v32qu)__a, (__v32qu)__b);
+  return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_max((__v16hu)__a, (__v16hu)__b);
+  return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu32(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_max((__v8su)__a, (__v8su)__b);
+  return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_min((__v32qs)__a, (__v32qs)__b);
+  return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_min((__v16hi)__a, (__v16hi)__b);
+  return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi32(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_min((__v8si)__a, (__v8si)__b);
+  return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_min((__v32qu)__a, (__v32qu)__b);
+  return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_min((__v16hu)__a, (__v16hu)__b);
+  return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu32(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b);
+  return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 522ef100bab1a..53319eb23011d 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -751,7 +751,7 @@ _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epi8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B);
+  return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -773,7 +773,7 @@ _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epi16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B);
+  return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -796,7 +796,7 @@ _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B);
+  return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -818,7 +818,7 @@ _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B);
+  return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -840,7 +840,7 @@ _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epi8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B);
+  return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -862,7 +862,7 @@ _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epi16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B);
+  return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -884,7 +884,7 @@ _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B);
+  return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -906,7 +906,7 @@ _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B);
+  return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 8695aeb94de24..9b02a7cffc64d 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1090,7 +1090,7 @@ static __inline __m512i
 __DEFAULT_FN_ATTRS512
 _mm512_max_epi32(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1112,7 +1112,7 @@ _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu32(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
+  return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1134,7 +1134,7 @@ _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epi64(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1156,7 +1156,7 @@ _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu64(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
+  return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1325,7 +1325,7 @@ static __inline __m512i
 __DEFAULT_FN_ATTRS512
 _mm512_min_epi32(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1347,7 +1347,7 @@ _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu32(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
+  return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1369,7 +1369,7 @@ _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epi64(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1391,7 +1391,7 @@ _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu64(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
+  return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 178c9dbc0e6ea..eddb99902e3d5 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -3054,7 +3054,7 @@ _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_max_epi64 (__m128i __A, __m128i __B) {
-  return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3073,7 +3073,7 @@ _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi64 (__m256i __A, __m256i __B) {
-  return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -3120,7 +3120,7 @@ _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_max_epu64 (__m128i __A, __m128i __B) {
-  return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B);
+  return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3139,7 +3139,7 @@ _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu64 (__m256i __A, __m256i __B) {
-  return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B);
+  return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -3186,7 +3186,7 @@ _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_min_epi64 (__m128i __A, __m128i __B) {
-  return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3205,7 +3205,7 @@ _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi64 (__m256i __A, __m256i __B) {
-  return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -3252,7 +3252,7 @@ _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_min_epu64 (__m128i __A, __m128i __B) {
-  return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B);
+  return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3271,7 +3271,7 @@ _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu64 (__m256i __A, __m256i __B) {
-  return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B);
+  return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 4618b808efc48..6e9c3032c21f7 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2375,7 +2375,7 @@ _mm_madd_epi16(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_max_epi16(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b);
+  return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Compares corresponding elements of two 128-bit unsigned [16 x i8]
@@ -2395,7 +2395,7 @@ _mm_max_epi16(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_max_epu8(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b);
+  return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
 }
 
 /// Compares corresponding elements of two 128-bit signed [8 x i16]
@@ -2415,7 +2415,7 @@ _mm_max_epu8(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_min_epi16(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b);
+  return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Compares corresponding elements of two 128-bit unsigned [16 x i8]
@@ -2435,7 +2435,7 @@ _mm_min_epi16(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_min_epu8(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b);
+  return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
 }
 
 /// Multiplies the corresponding elements of two signed [8 x i16]
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 0df59c5fcc592..710e55aaa1203 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -668,7 +668,7 @@ _mm_stream_load_si128 (__m128i const *__V)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epi8 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_elementwise_min((__v16qs) __V1, (__v16qs) __V2);
+  return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -687,7 +687,7 @@ _mm_min_epi8 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epi8 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_elementwise_max((__v16qs) __V1, (__v16qs) __V2);
+  return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -706,7 +706,7 @@ _mm_max_epi8 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epu16 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_elementwise_min((__v8hu) __V1, (__v8hu) __V2);
+  return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -725,7 +725,7 @@ _mm_min_epu16 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epu16 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_elementwise_max((__v8hu) __V1, (__v8hu) __V2);
+  return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -744,7 +744,7 @@ _mm_max_epu16 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epi32 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_elementwise_min((__v4si) __V1, (__v4si) __V2);
+  return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -763,7 +763,7 @@ _mm_min_epi32 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epi32 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_elementwise_max((__v4si) __V1, (__v4si) __V2);
+  return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -782,7 +782,7 @@ _mm_max_epi32 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epu32 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_elementwise_min((__v4su) __V1, (__v4su) __V2);
+  return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -801,7 +801,7 @@ _mm_min_epu32 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epu32 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_elementwise_max((__v4su) __V1, (__v4su) __V2);
+  return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);
 }
 
 /* SSE4 Insertion and Extraction from XMM Register Instructions.  */
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index 9eb5f2f5d149e..bfcd30072fc1f 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -221,6 +221,10 @@ void f0() {
   tmp_V16c = __builtin_ia32_psubusb128(tmp_V16c, tmp_V16c);
   tmp_V8s = __builtin_ia32_psubusw128(tmp_V8s, tmp_V8s);
   tmp_V8s = __builtin_ia32_pmulhw128(tmp_V8s, tmp_V8s);
+  tmp_V16c = __builtin_ia32_pmaxub128(tmp_V16c, tmp_V16c);
+  tmp_V8s = __builtin_ia32_pmaxsw128(tmp_V8s, tmp_V8s);
+  tmp_V16c = __builtin_ia32_pminub128(tmp_V16c, tmp_V16c);
+  tmp_V8s = __builtin_ia32_pminsw128(tmp_V8s, tmp_V8s);
   tmp_V16c = __builtin_ia32_packsswb128(tmp_V8s, tmp_V8s);
   tmp_V8s = __builtin_ia32_packssdw128(tmp_V4i, tmp_V4i);
   tmp_V16c = __builtin_ia32_packuswb128(tmp_V8s, tmp_V8s);
@@ -451,6 +455,14 @@ void f0() {
   tmp_V2d = __builtin_ia32_blendvpd(tmp_V2d, tmp_V2d, tmp_V2d);
   tmp_V4f = __builtin_ia32_blendvps(tmp_V4f, tmp_V4f, tmp_V4f);
   tmp_V8s = __builtin_ia32_packusdw128(tmp_V4i, tmp_V4i);
+  tmp_V16c = __builtin_ia32_pmaxsb128(tmp_V16c, tmp_V16c);
+  tmp_V4i = __builtin_ia32_pmaxsd128(tmp_V4i, tmp_V4i);
+  tmp_V4i = __builtin_ia32_pmaxud128(tmp_V4i, tmp_V4i);
+  tmp_V8s = __builtin_ia32_pmaxuw128(tmp_V8s, tmp_V8s);
+  tmp_V16c = __builtin_ia32_pminsb128(tmp_V16c, tmp_V16c);
+  tmp_V4i = __builtin_ia32_pminsd128(tmp_V4i, tmp_V4i);
+  tmp_V4i = __builtin_ia32_pminud128(tmp_V4i, tmp_V4i);
+  tmp_V8s = __builtin_ia32_pminuw128(tmp_V8s, tmp_V8s);
   tmp_V2LLi = __builtin_ia32_pmuldq128(tmp_V4i, tmp_V4i);
   tmp_V4f = __builtin_ia32_roundps(tmp_V4f, imm_i_0_16);
   tmp_V4f = __builtin_ia32_roundss(tmp_V4f, tmp_V4f, imm_i_0_16);

From 0abaf64580921e31983e355972b91c83fd7521f2 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 21 Jan 2022 12:35:36 +0000
Subject: [PATCH 173/946] Revert rG4727d29d908f9dd608dd97a58c0af1ad579fd3ca
 "[X86] Remove __builtin_ia32_pabs intrinsics and use generic
 __builtin_elementwise_abs"

Some build bots are referencing the `__builtin_ia32_pabs` intrinsics via alternative headers
---
 clang/include/clang/Basic/BuiltinsX86.def | 12 ++++++++++++
 clang/lib/CodeGen/CGBuiltin.cpp           | 15 +++++++++++++++
 clang/lib/Headers/avx2intrin.h            |  6 +++---
 clang/lib/Headers/avx512bwintrin.h        |  4 ++--
 clang/lib/Headers/avx512fintrin.h         |  8 ++------
 clang/lib/Headers/avx512vlintrin.h        |  4 ++--
 clang/lib/Headers/tmmintrin.h             |  6 +++---
 clang/test/CodeGen/builtins-x86.c         |  3 +++
 8 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 9b7c763b0c6c7..bc6208be45606 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -296,6 +296,9 @@ TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "ncV:128:", "ssse3")
 TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "ncV:128:", "ssse3")
 TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "ncV:128:", "ssse3")
 TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "ncV:128:", "ssse3")
 
 TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse")
 TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh","xmmintrin.h", ALL_LANGUAGES, "sse")
@@ -555,6 +558,9 @@ TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx")
 
 // AVX2
 TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2")
@@ -921,6 +927,8 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:",
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f")
@@ -1037,6 +1045,8 @@ TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx5
 TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw")
 
+TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
@@ -1188,6 +1198,8 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5
 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2OiV2Oi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 49f054ec1a982..a49c035002786 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14285,6 +14285,21 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       return Builder.CreateCall(F, Ops[0]);
     }
   }
+  case X86::BI__builtin_ia32_pabsb128:
+  case X86::BI__builtin_ia32_pabsw128:
+  case X86::BI__builtin_ia32_pabsd128:
+  case X86::BI__builtin_ia32_pabsb256:
+  case X86::BI__builtin_ia32_pabsw256:
+  case X86::BI__builtin_ia32_pabsd256:
+  case X86::BI__builtin_ia32_pabsq128:
+  case X86::BI__builtin_ia32_pabsq256:
+  case X86::BI__builtin_ia32_pabsb512:
+  case X86::BI__builtin_ia32_pabsw512:
+  case X86::BI__builtin_ia32_pabsd512:
+  case X86::BI__builtin_ia32_pabsq512: {
+    Function *F = CGM.getIntrinsic(Intrinsic::abs, Ops[0]->getType());
+    return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
+  }
   case X86::BI__builtin_ia32_pmaxsb128:
   case X86::BI__builtin_ia32_pmaxsw128:
   case X86::BI__builtin_ia32_pmaxsd128:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index c9ad74ce3fa42..5064c87c2bb19 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -26,19 +26,19 @@
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi8(__m256i __a)
 {
-    return (__m256i)__builtin_elementwise_abs((__v32qs)__a);
+    return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi16(__m256i __a)
 {
-    return (__m256i)__builtin_elementwise_abs((__v16hi)__a);
+    return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi32(__m256i __a)
 {
-    return (__m256i)__builtin_elementwise_abs((__v8si)__a);
+    return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 53319eb23011d..6aee8aed84871 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -485,7 +485,7 @@ _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi8 (__m512i __A)
 {
-  return (__m512i)__builtin_elementwise_abs((__v64qs)__A);
+  return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -507,7 +507,7 @@ _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi16 (__m512i __A)
 {
-  return (__m512i)__builtin_elementwise_abs((__v32hi)__A);
+  return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9b02a7cffc64d..df298640523b7 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -26,10 +26,6 @@ typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
 
-/* We need an explicitly signed variant for char. Note that this shouldn't
- * appear in the interface though. */
-typedef signed char __v64qs __attribute__((__vector_size__(64)));
-
 typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
 typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
 typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
@@ -1850,7 +1846,7 @@ _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi64(__m512i __A)
 {
-  return (__m512i)__builtin_elementwise_abs((__v8di)__A);
+  return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1872,7 +1868,7 @@ _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi32(__m512i __A)
 {
-  return (__m512i)__builtin_elementwise_abs((__v16si) __A);
+  return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index eddb99902e3d5..0519dba59081a 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -2988,7 +2988,7 @@ _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_abs_epi64 (__m128i __A) {
-  return (__m128i)__builtin_elementwise_abs((__v2di)__A);
+  return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3007,7 +3007,7 @@ _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi64 (__m256i __A) {
-  return (__m256i)__builtin_elementwise_abs((__v4di)__A);
+  return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index cb9be2349de5a..bcffa8187801c 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -53,7 +53,7 @@ _mm_abs_pi8(__m64 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi8(__m128i __a)
 {
-    return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
+    return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
 }
 
 /// Computes the absolute value of each of the packed 16-bit signed
@@ -89,7 +89,7 @@ _mm_abs_pi16(__m64 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi16(__m128i __a)
 {
-    return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
+    return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
 }
 
 /// Computes the absolute value of each of the packed 32-bit signed
@@ -125,7 +125,7 @@ _mm_abs_pi32(__m64 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi32(__m128i __a)
 {
-    return (__m128i)__builtin_elementwise_abs((__v4si)__a);
+    return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
 }
 
 /// Concatenates the two 128-bit integer vector operands, and
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index bfcd30072fc1f..61b9d53c74f9d 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -259,8 +259,11 @@ void f0() {
   tmp_V4s = __builtin_ia32_psignw(tmp_V4s, tmp_V4s);
   tmp_V4i = __builtin_ia32_psignd128(tmp_V4i, tmp_V4i);
   tmp_V2i = __builtin_ia32_psignd(tmp_V2i, tmp_V2i);
+  tmp_V16c = __builtin_ia32_pabsb128(tmp_V16c);
   tmp_V8c = __builtin_ia32_pabsb(tmp_V8c);
+  tmp_V8s = __builtin_ia32_pabsw128(tmp_V8s);
   tmp_V4s = __builtin_ia32_pabsw(tmp_V4s);
+  tmp_V4i = __builtin_ia32_pabsd128(tmp_V4i);
   tmp_V2i = __builtin_ia32_pabsd(tmp_V2i);
   tmp_V4s = __builtin_ia32_psllw(tmp_V4s, tmp_V1LLi);
   tmp_V2i = __builtin_ia32_pslld(tmp_V2i, tmp_V1LLi);

From 75e164f61d391979b4829bf2746a5d74b94e95f2 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Thu, 20 Jan 2022 12:55:14 +0100
Subject: [PATCH 174/946] [llvm] Cleanup header dependencies in ADT and Support

The cleanup was manual, but assisted by "include-what-you-use". It consists in

1. Removing unused forward declaration. No impact expected.
2. Removing unused headers in .cpp files. No impact expected.
3. Removing unused headers in .h files. This removes implicit dependencies and
   is generally considered a good thing, but this may break downstream builds.
   I've updated llvm, clang, lld, lldb and mlir deps, and included a list of the
   modification in the second part of the commit.
4. Replacing header inclusion by forward declaration. This has the same impact
   as 3.

Notable changes:

- llvm/Support/TargetParser.h no longer includes llvm/Support/AArch64TargetParser.h nor llvm/Support/ARMTargetParser.h
- llvm/Support/TypeSize.h no longer includes llvm/Support/WithColor.h
- llvm/Support/YAMLTraits.h no longer includes llvm/Support/Regex.h
- llvm/ADT/SmallVector.h no longer includes llvm/Support/MemAlloc.h nor llvm/Support/ErrorHandling.h

You may need to add some of these headers in your compilation units, if needs be.

As an hint to the impact of the cleanup, running

clang++ -E  -Iinclude -I../llvm/include ../llvm/lib/Support/*.cpp -std=c++14 -fno-rtti -fno-exceptions | wc -l

before: 8000919 lines
after:  7917500 lines

Reduced dependencies also helps incremental rebuilds and is more ccache
friendly, something not shown by the above metric :-)

Discourse thread on the topic: https://llvm.discourse.group/t/include-what-you-use-include-cleanup/5831
---
 clang/lib/Basic/Targets/AArch64.h                        | 1 +
 clang/lib/Basic/Targets/ARM.h                            | 1 +
 clang/lib/Driver/SanitizerArgs.cpp                       | 1 +
 clang/lib/Driver/ToolChains/Arch/AArch64.cpp             | 1 +
 clang/lib/Driver/ToolChains/Arch/ARM.cpp                 | 1 +
 clang/lib/Driver/ToolChains/Arch/ARM.h                   | 1 +
 clang/tools/libclang/BuildSystem.cpp                     | 1 +
 lldb/source/Host/common/Socket.cpp                       | 1 +
 llvm/include/llvm/ADT/Optional.h                         | 1 -
 llvm/include/llvm/ADT/SmallVector.h                      | 2 --
 llvm/include/llvm/MC/MCStreamer.h                        | 1 +
 llvm/include/llvm/Object/ELFObjectFile.h                 | 1 +
 llvm/include/llvm/Support/AArch64TargetParser.h          | 1 -
 llvm/include/llvm/Support/ARMAttributeParser.h           | 7 +++----
 llvm/include/llvm/Support/Allocator.h                    | 4 ----
 llvm/include/llvm/Support/BinaryStreamReader.h           | 2 --
 llvm/include/llvm/Support/BinaryStreamRef.h              | 1 -
 llvm/include/llvm/Support/BinaryStreamWriter.h           | 1 -
 llvm/include/llvm/Support/BlockFrequency.h               | 5 ++---
 llvm/include/llvm/Support/BranchProbability.h            | 1 -
 llvm/include/llvm/Support/ConvertUTF.h                   | 1 -
 llvm/include/llvm/Support/ELFAttributeParser.h           | 3 ++-
 llvm/include/llvm/Support/Error.h                        | 1 -
 llvm/include/llvm/Support/ExtensibleRTTI.h               | 2 --
 llvm/include/llvm/Support/FileCollector.h                | 1 -
 llvm/include/llvm/Support/FileUtilities.h                | 6 +++---
 llvm/include/llvm/Support/FormatVariadic.h               | 3 ++-
 llvm/include/llvm/Support/GraphWriter.h                  | 2 --
 llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h | 1 -
 llvm/include/llvm/Support/RISCVISAInfo.h                 | 2 --
 llvm/include/llvm/Support/SymbolRemappingReader.h        | 3 ++-
 llvm/include/llvm/Support/TargetParser.h                 | 2 --
 llvm/include/llvm/Support/TimeProfiler.h                 | 3 ++-
 llvm/include/llvm/Support/Timer.h                        | 2 --
 llvm/include/llvm/Support/TrigramIndex.h                 | 1 -
 llvm/include/llvm/Support/TypeSize.h                     | 2 +-
 llvm/include/llvm/Support/YAMLTraits.h                   | 4 ----
 llvm/lib/Debuginfod/Debuginfod.cpp                       | 2 ++
 llvm/lib/Object/Object.cpp                               | 1 +
 llvm/lib/Support/APInt.cpp                               | 2 --
 llvm/lib/Support/ARMAttributeParser.cpp                  | 2 --
 llvm/lib/Support/ARMWinEH.cpp                            | 1 -
 llvm/lib/Support/BlockFrequency.cpp                      | 1 +
 llvm/lib/Support/DAGDeltaAlgorithm.cpp                   | 1 -
 llvm/lib/Support/DataExtractor.cpp                       | 1 -
 llvm/lib/Support/ELFAttributeParser.cpp                  | 2 --
 llvm/lib/Support/FileOutputBuffer.cpp                    | 2 --
 llvm/lib/Support/FileUtilities.cpp                       | 3 ---
 llvm/lib/Support/GraphWriter.cpp                         | 2 --
 llvm/lib/Support/InitLLVM.cpp                            | 6 +++---
 llvm/lib/Support/JSON.cpp                                | 1 +
 llvm/lib/Support/MSP430AttributeParser.cpp               | 3 ++-
 llvm/lib/Support/MemoryBuffer.cpp                        | 5 ++---
 llvm/lib/Support/NativeFormatting.cpp                    | 1 -
 llvm/lib/Support/PrettyStackTrace.cpp                    | 1 -
 llvm/lib/Support/ScopedPrinter.cpp                       | 1 -
 llvm/lib/Support/Signals.cpp                             | 1 +
 llvm/lib/Support/Signposts.cpp                           | 1 -
 llvm/lib/Support/SmallPtrSet.cpp                         | 1 -
 llvm/lib/Support/SmallVector.cpp                         | 1 +
 llvm/lib/Support/SpecialCaseList.cpp                     | 1 -
 llvm/lib/Support/StringMap.cpp                           | 1 -
 llvm/lib/Support/SymbolRemappingReader.cpp               | 1 +
 llvm/lib/Support/TargetParser.cpp                        | 2 --
 llvm/lib/Support/ThreadPool.cpp                          | 1 -
 llvm/lib/Support/TimeProfiler.cpp                        | 2 +-
 llvm/lib/Support/ToolOutputFile.cpp                      | 1 -
 llvm/lib/Support/Triple.cpp                              | 2 +-
 llvm/lib/Support/TypeSize.cpp                            | 1 +
 llvm/lib/Support/VirtualFileSystem.cpp                   | 3 ---
 llvm/lib/Support/X86TargetParser.cpp                     | 1 -
 llvm/lib/Support/YAMLParser.cpp                          | 1 -
 llvm/lib/Support/YAMLTraits.cpp                          | 2 --
 llvm/lib/Support/raw_ostream.cpp                         | 2 --
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp             | 1 +
 llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp   | 1 +
 llvm/lib/Target/ARM/ARMSubtarget.cpp                     | 1 +
 llvm/lib/Target/ARM/ARMTargetMachine.cpp                 | 1 +
 llvm/tools/llvm-diff/llvm-diff.cpp                       | 1 +
 llvm/tools/llvm-lto/llvm-lto.cpp                         | 1 +
 llvm/tools/llvm-modextract/llvm-modextract.cpp           | 1 +
 llvm/tools/llvm-reduce/llvm-reduce.cpp                   | 1 +
 llvm/tools/llvm-split/llvm-split.cpp                     | 1 +
 llvm/tools/llvm-stress/llvm-stress.cpp                   | 1 +
 llvm/unittests/Support/TargetParserTest.cpp              | 3 ++-
 llvm/unittests/Support/raw_ostream_test.cpp              | 1 +
 mlir/lib/TableGen/Format.cpp                             | 2 ++
 mlir/lib/TableGen/Predicate.cpp                          | 1 +
 mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp           | 1 +
 mlir/tools/mlir-tblgen/OpFormatGen.cpp                   | 1 +
 90 files changed, 63 insertions(+), 93 deletions(-)

diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index b9e6e3214c44d..ebddce0c1c73e 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -15,6 +15,7 @@
 
 #include "OSTargets.h"
 #include "clang/Basic/TargetBuiltins.h"
+#include "llvm/Support/AArch64TargetParser.h"
 #include "llvm/Support/TargetParser.h"
 
 namespace clang {
diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h
index 40c658f3f40e2..f074dac57f9b3 100644
--- a/clang/lib/Basic/Targets/ARM.h
+++ b/clang/lib/Basic/Targets/ARM.h
@@ -18,6 +18,7 @@
 #include "clang/Basic/TargetOptions.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ARMTargetParser.h"
 #include "llvm/Support/TargetParser.h"
 
 namespace clang {
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 34505319af1bc..403fac76f0602 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SpecialCaseList.h"
+#include "llvm/Support/AArch64TargetParser.h"
 #include "llvm/Support/TargetParser.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Transforms/Instrumentation/AddressSanitizerOptions.h"
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index 89a77a368ef02..ca0ca4bf4eeac 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -11,6 +11,7 @@
 #include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Options.h"
 #include "llvm/Option/ArgList.h"
+#include "llvm/Support/AArch64TargetParser.h"
 #include "llvm/Support/TargetParser.h"
 #include "llvm/Support/Host.h"
 
diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.cpp b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
index 1055d7800b63e..16af9f6d71295 100644
--- a/clang/lib/Driver/ToolChains/Arch/ARM.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/ARM.cpp
@@ -12,6 +12,7 @@
 #include "clang/Driver/Options.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Option/ArgList.h"
+#include "llvm/Support/ARMTargetParser.h"
 #include "llvm/Support/TargetParser.h"
 #include "llvm/Support/Host.h"
 
diff --git a/clang/lib/Driver/ToolChains/Arch/ARM.h b/clang/lib/Driver/ToolChains/Arch/ARM.h
index 881b63bd36b9c..862a2f2796be5 100644
--- a/clang/lib/Driver/ToolChains/Arch/ARM.h
+++ b/clang/lib/Driver/ToolChains/Arch/ARM.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Option/Option.h"
+#include "llvm/Support/ARMTargetParser.h"
 #include "llvm/Support/TargetParser.h"
 #include <string>
 #include <vector>
diff --git a/clang/tools/libclang/BuildSystem.cpp b/clang/tools/libclang/BuildSystem.cpp
index 0d69dcf1725e1..2f638ee8700d9 100644
--- a/clang/tools/libclang/BuildSystem.cpp
+++ b/clang/tools/libclang/BuildSystem.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Chrono.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemAlloc.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/lldb/source/Host/common/Socket.cpp b/lldb/source/Host/common/Socket.cpp
index 1c74a8fb59029..d8b8f54a6468d 100644
--- a/lldb/source/Host/common/Socket.cpp
+++ b/lldb/source/Host/common/Socket.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Errno.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/Support/WindowsError.h"
 
 #if LLDB_ENABLE_POSIX
diff --git a/llvm/include/llvm/ADT/Optional.h b/llvm/include/llvm/ADT/Optional.h
index 2af59865b8ffe..7d6b3e92f6b27 100644
--- a/llvm/include/llvm/ADT/Optional.h
+++ b/llvm/include/llvm/ADT/Optional.h
@@ -21,7 +21,6 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/type_traits.h"
 #include <cassert>
-#include <memory>
 #include <new>
 #include <utility>
 
diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index 321546fec0130..9347f01a4191f 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -15,8 +15,6 @@
 
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemAlloc.h"
 #include "llvm/Support/type_traits.h"
 #include <algorithm>
 #include <cassert>
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index e19d03705acdd..3d6c512bfe73d 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -27,6 +27,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/SMLoc.h"
+#include "llvm/Support/ARMTargetParser.h"
 #include "llvm/Support/TargetParser.h"
 #include "llvm/Support/VersionTuple.h"
 #include <cassert>
diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h
index 716b94d92d032..e2d2784d4f238 100644
--- a/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/llvm/include/llvm/Object/ELFObjectFile.h
@@ -34,6 +34,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include <cassert>
 #include <cstdint>
 #include <system_error>
diff --git a/llvm/include/llvm/Support/AArch64TargetParser.h b/llvm/include/llvm/Support/AArch64TargetParser.h
index e53f16a8dd425..d094c704d291c 100644
--- a/llvm/include/llvm/Support/AArch64TargetParser.h
+++ b/llvm/include/llvm/Support/AArch64TargetParser.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_SUPPORT_AARCH64TARGETPARSER_H
 #define LLVM_SUPPORT_AARCH64TARGETPARSER_H
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ARMTargetParser.h"
 #include <vector>
diff --git a/llvm/include/llvm/Support/ARMAttributeParser.h b/llvm/include/llvm/Support/ARMAttributeParser.h
index 1cc30af5eaf03..cbb5701540e14 100644
--- a/llvm/include/llvm/Support/ARMAttributeParser.h
+++ b/llvm/include/llvm/Support/ARMAttributeParser.h
@@ -11,14 +11,13 @@
 
 #include "ARMBuildAttributes.h"
 #include "ELFAttributeParser.h"
-#include "ScopedPrinter.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Endian.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
 
 namespace llvm {
 
+class ScopedPrinter;
+
 class ARMAttributeParser : public ELFAttributeParser {
   struct DisplayHandler {
     ARMBuildAttrs::AttrType attribute;
diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h
index 3b1d11e3f75f8..ec5ed06b7fa4b 100644
--- a/llvm/include/llvm/Support/Allocator.h
+++ b/llvm/include/llvm/Support/Allocator.h
@@ -22,16 +22,12 @@
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/AllocatorBase.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/MemAlloc.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
-#include <cstdlib>
 #include <iterator>
-#include <type_traits>
 #include <utility>
 
 namespace llvm {
diff --git a/llvm/include/llvm/Support/BinaryStreamReader.h b/llvm/include/llvm/Support/BinaryStreamReader.h
index 29b4b09b848c3..c664ac48daad1 100644
--- a/llvm/include/llvm/Support/BinaryStreamReader.h
+++ b/llvm/include/llvm/Support/BinaryStreamReader.h
@@ -10,7 +10,6 @@
 #define LLVM_SUPPORT_BINARYSTREAMREADER_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/BinaryStreamArray.h"
@@ -18,7 +17,6 @@
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
-#include "llvm/Support/type_traits.h"
 #include <type_traits>
 
 namespace llvm {
diff --git a/llvm/include/llvm/Support/BinaryStreamRef.h b/llvm/include/llvm/Support/BinaryStreamRef.h
index e0aaab82ffab5..bc8c6a496ecf7 100644
--- a/llvm/include/llvm/Support/BinaryStreamRef.h
+++ b/llvm/include/llvm/Support/BinaryStreamRef.h
@@ -14,7 +14,6 @@
 #include "llvm/Support/BinaryStream.h"
 #include "llvm/Support/BinaryStreamError.h"
 #include "llvm/Support/Error.h"
-#include <algorithm>
 #include <cstdint>
 #include <memory>
 
diff --git a/llvm/include/llvm/Support/BinaryStreamWriter.h b/llvm/include/llvm/Support/BinaryStreamWriter.h
index 3054f4ac7ef00..c05b0420aaa33 100644
--- a/llvm/include/llvm/Support/BinaryStreamWriter.h
+++ b/llvm/include/llvm/Support/BinaryStreamWriter.h
@@ -10,7 +10,6 @@
 #define LLVM_SUPPORT_BINARYSTREAMWRITER_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/BinaryStreamArray.h"
 #include "llvm/Support/BinaryStreamError.h"
diff --git a/llvm/include/llvm/Support/BlockFrequency.h b/llvm/include/llvm/Support/BlockFrequency.h
index 18fb60e1904b3..bf0ad46ab4994 100644
--- a/llvm/include/llvm/Support/BlockFrequency.h
+++ b/llvm/include/llvm/Support/BlockFrequency.h
@@ -13,12 +13,11 @@
 #ifndef LLVM_SUPPORT_BLOCKFREQUENCY_H
 #define LLVM_SUPPORT_BLOCKFREQUENCY_H
 
-#include "llvm/Support/BranchProbability.h"
-#include "llvm/Support/DataTypes.h"
+#include <cstdint>
 
 namespace llvm {
 
-class raw_ostream;
+class BranchProbability;
 
 // This class represents Block Frequency as a 64-bit value.
 class BlockFrequency {
diff --git a/llvm/include/llvm/Support/BranchProbability.h b/llvm/include/llvm/Support/BranchProbability.h
index 6c7ad1fe2a52c..6f071c15421f1 100644
--- a/llvm/include/llvm/Support/BranchProbability.h
+++ b/llvm/include/llvm/Support/BranchProbability.h
@@ -16,7 +16,6 @@
 #include "llvm/Support/DataTypes.h"
 #include <algorithm>
 #include <cassert>
-#include <climits>
 #include <numeric>
 
 namespace llvm {
diff --git a/llvm/include/llvm/Support/ConvertUTF.h b/llvm/include/llvm/Support/ConvertUTF.h
index 1add185330fa0..a23aad6884d03 100644
--- a/llvm/include/llvm/Support/ConvertUTF.h
+++ b/llvm/include/llvm/Support/ConvertUTF.h
@@ -91,7 +91,6 @@
 
 #include <cstddef>
 #include <string>
-#include <system_error>
 
 // Wrap everything in namespace llvm so that programs can link with llvm and
 // their own version of the unicode libraries.
diff --git a/llvm/include/llvm/Support/ELFAttributeParser.h b/llvm/include/llvm/Support/ELFAttributeParser.h
index 8bf87b2d84f05..3062dfffff68a 100644
--- a/llvm/include/llvm/Support/ELFAttributeParser.h
+++ b/llvm/include/llvm/Support/ELFAttributeParser.h
@@ -10,15 +10,16 @@
 #define LLVM_SUPPORT_ELFATTRIBUTEPARSER_H
 
 #include "ELFAttributes.h"
-#include "ScopedPrinter.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 
 #include <unordered_map>
 
 namespace llvm {
 class StringRef;
+class ScopedPrinter;
 
 class ELFAttributeParser {
   StringRef vendor;
diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h
index e2002b89ada26..3997f0ea6db79 100644
--- a/llvm/include/llvm/Support/Error.h
+++ b/llvm/include/llvm/Support/Error.h
@@ -14,7 +14,6 @@
 #define LLVM_SUPPORT_ERROR_H
 
 #include "llvm-c/Error.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
diff --git a/llvm/include/llvm/Support/ExtensibleRTTI.h b/llvm/include/llvm/Support/ExtensibleRTTI.h
index 21055247e9327..d3193be6f529e 100644
--- a/llvm/include/llvm/Support/ExtensibleRTTI.h
+++ b/llvm/include/llvm/Support/ExtensibleRTTI.h
@@ -62,8 +62,6 @@
 
 namespace llvm {
 
-template <typename ThisT, typename ParentT> class RTTIExtends;
-
 /// Base class for the extensible RTTI hierarchy.
 ///
 /// This class defines virtual methods, dynamicClassID and isA, that enable
diff --git a/llvm/include/llvm/Support/FileCollector.h b/llvm/include/llvm/Support/FileCollector.h
index 264fb55c9dba7..232dc8658aa38 100644
--- a/llvm/include/llvm/Support/FileCollector.h
+++ b/llvm/include/llvm/Support/FileCollector.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_SUPPORT_FILECOLLECTOR_H
 #define LLVM_SUPPORT_FILECOLLECTOR_H
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSet.h"
 #include "llvm/Support/VirtualFileSystem.h"
diff --git a/llvm/include/llvm/Support/FileUtilities.h b/llvm/include/llvm/Support/FileUtilities.h
index 04efdced32a4a..f8a37fe1177d9 100644
--- a/llvm/include/llvm/Support/FileUtilities.h
+++ b/llvm/include/llvm/Support/FileUtilities.h
@@ -15,10 +15,10 @@
 #define LLVM_SUPPORT_FILEUTILITIES_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Errc.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
+
+#include <system_error>
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Support/FormatVariadic.h b/llvm/include/llvm/Support/FormatVariadic.h
index 89575f01b7171..a872afb5e45e5 100644
--- a/llvm/include/llvm/Support/FormatVariadic.h
+++ b/llvm/include/llvm/Support/FormatVariadic.h
@@ -29,16 +29,17 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/FormatCommon.h"
 #include "llvm/Support/FormatProviders.h"
 #include "llvm/Support/FormatVariadicDetails.h"
 #include "llvm/Support/raw_ostream.h"
+#include <array>
 #include <cstddef>
 #include <string>
 #include <tuple>
 #include <utility>
-#include <vector>
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Support/GraphWriter.h b/llvm/include/llvm/Support/GraphWriter.h
index 1c0f5f702c6d9..515057e7e312f 100644
--- a/llvm/include/llvm/Support/GraphWriter.h
+++ b/llvm/include/llvm/Support/GraphWriter.h
@@ -28,8 +28,6 @@
 #include "llvm/Support/DOTGraphTraits.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-#include <cstddef>
 #include <iterator>
 #include <string>
 #include <type_traits>
diff --git a/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h b/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h
index 8e1b3d631983d..aa7997a0228ba 100644
--- a/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h
+++ b/llvm/include/llvm/Support/ItaniumManglingCanonicalizer.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H
 #define LLVM_SUPPORT_ITANIUMMANGLINGCANONICALIZER_H
 
-#include <cstddef>
 #include <cstdint>
 
 namespace llvm {
diff --git a/llvm/include/llvm/Support/RISCVISAInfo.h b/llvm/include/llvm/Support/RISCVISAInfo.h
index 93aaa82f4c0b1..b450c1df3558d 100644
--- a/llvm/include/llvm/Support/RISCVISAInfo.h
+++ b/llvm/include/llvm/Support/RISCVISAInfo.h
@@ -9,8 +9,6 @@
 #ifndef LLVM_SUPPORT_RISCVISAINFO_H
 #define LLVM_SUPPORT_RISCVISAINFO_H
 
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
 
diff --git a/llvm/include/llvm/Support/SymbolRemappingReader.h b/llvm/include/llvm/Support/SymbolRemappingReader.h
index 820cf9e021920..4fdaf87be082a 100644
--- a/llvm/include/llvm/Support/SymbolRemappingReader.h
+++ b/llvm/include/llvm/Support/SymbolRemappingReader.h
@@ -62,10 +62,11 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ItaniumManglingCanonicalizer.h"
-#include "llvm/Support/MemoryBuffer.h"
 
 namespace llvm {
 
+class MemoryBuffer;
+
 class SymbolRemappingParseError : public ErrorInfo<SymbolRemappingParseError> {
 public:
   SymbolRemappingParseError(StringRef File, int64_t Line, const Twine &Message)
diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h
index 01e25a0ea857c..1d7594ebedc22 100644
--- a/llvm/include/llvm/Support/TargetParser.h
+++ b/llvm/include/llvm/Support/TargetParser.h
@@ -17,8 +17,6 @@
 // FIXME: vector is used because that's what clang uses for subtarget feature
 // lists, but SmallVector would probably be better
 #include "llvm/ADT/Triple.h"
-#include "llvm/Support/AArch64TargetParser.h"
-#include "llvm/Support/ARMTargetParser.h"
 #include "llvm/Support/RISCVISAInfo.h"
 #include <vector>
 
diff --git a/llvm/include/llvm/Support/TimeProfiler.h b/llvm/include/llvm/Support/TimeProfiler.h
index 84794a25f78e5..6141acc99db28 100644
--- a/llvm/include/llvm/Support/TimeProfiler.h
+++ b/llvm/include/llvm/Support/TimeProfiler.h
@@ -10,10 +10,11 @@
 #define LLVM_SUPPORT_TIMEPROFILER_H
 
 #include "llvm/Support/Error.h"
-#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 
+class raw_pwrite_stream;
+
 struct TimeTraceProfiler;
 TimeTraceProfiler *getTimeTraceProfilerInstance();
 
diff --git a/llvm/include/llvm/Support/Timer.h b/llvm/include/llvm/Support/Timer.h
index c5874ed356988..5a55491b3276a 100644
--- a/llvm/include/llvm/Support/Timer.h
+++ b/llvm/include/llvm/Support/Timer.h
@@ -14,12 +14,10 @@
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
 #include <string>
-#include <utility>
 #include <vector>
 
 namespace llvm {
 
-class Timer;
 class TimerGroup;
 class raw_ostream;
 
diff --git a/llvm/include/llvm/Support/TrigramIndex.h b/llvm/include/llvm/Support/TrigramIndex.h
index 0be6a1012718b..f772deca03014 100644
--- a/llvm/include/llvm/Support/TrigramIndex.h
+++ b/llvm/include/llvm/Support/TrigramIndex.h
@@ -33,7 +33,6 @@
 #include <vector>
 
 namespace llvm {
-class StringRef;
 
 class TrigramIndex {
  public:
diff --git a/llvm/include/llvm/Support/TypeSize.h b/llvm/include/llvm/Support/TypeSize.h
index 7d1274735a373..6bddb602e8c19 100644
--- a/llvm/include/llvm/Support/TypeSize.h
+++ b/llvm/include/llvm/Support/TypeSize.h
@@ -17,7 +17,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/WithColor.h"
+#include "llvm/Support/raw_ostream.h"
 
 #include <algorithm>
 #include <array>
diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index bea232e6e0000..66529075e2e71 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -18,7 +18,6 @@
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Endian.h"
-#include "llvm/Support/Regex.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/VersionTuple.h"
@@ -26,9 +25,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <cctype>
-#include <cstddef>
-#include <cstdint>
-#include <iterator>
 #include <map>
 #include <memory>
 #include <new>
diff --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp
index debee6e52b720..27614572766d4 100644
--- a/llvm/lib/Debuginfod/Debuginfod.cpp
+++ b/llvm/lib/Debuginfod/Debuginfod.cpp
@@ -21,8 +21,10 @@
 #include "llvm/Debuginfod/HTTPClient.h"
 #include "llvm/Support/CachePruning.h"
 #include "llvm/Support/Caching.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/xxhash.h"
 
 namespace llvm {
diff --git a/llvm/lib/Object/Object.cpp b/llvm/lib/Object/Object.cpp
index 0659cf6a2d41e..576eb8d069d62 100644
--- a/llvm/lib/Object/Object.cpp
+++ b/llvm/lib/Object/Object.cpp
@@ -16,6 +16,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/MachOUniversal.h"
+#include "llvm/Support/MemAlloc.h"
 
 using namespace llvm;
 using namespace object;
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index 4940b61602d19..b536e9a9a6d02 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -24,9 +24,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include <climits>
 #include <cmath>
-#include <cstdlib>
 #include <cstring>
 using namespace llvm;
 
diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp
index 4b07fb7c87140..908e56319025d 100644
--- a/llvm/lib/Support/ARMAttributeParser.cpp
+++ b/llvm/lib/Support/ARMAttributeParser.cpp
@@ -9,8 +9,6 @@
 #include "llvm/Support/ARMAttributeParser.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Errc.h"
-#include "llvm/Support/LEB128.h"
 #include "llvm/Support/ScopedPrinter.h"
 
 using namespace llvm;
diff --git a/llvm/lib/Support/ARMWinEH.cpp b/llvm/lib/Support/ARMWinEH.cpp
index 2e2fcf28451ff..8e7fa1149082f 100644
--- a/llvm/lib/Support/ARMWinEH.cpp
+++ b/llvm/lib/Support/ARMWinEH.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/ARMWinEH.h"
-#include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
 namespace ARM {
diff --git a/llvm/lib/Support/BlockFrequency.cpp b/llvm/lib/Support/BlockFrequency.cpp
index 2b63294f3789e..702165ac480b4 100644
--- a/llvm/lib/Support/BlockFrequency.cpp
+++ b/llvm/lib/Support/BlockFrequency.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
 #include <cassert>
 
 using namespace llvm;
diff --git a/llvm/lib/Support/DAGDeltaAlgorithm.cpp b/llvm/lib/Support/DAGDeltaAlgorithm.cpp
index a6daee00bd431..f1b730e2b58c4 100644
--- a/llvm/lib/Support/DAGDeltaAlgorithm.cpp
+++ b/llvm/lib/Support/DAGDeltaAlgorithm.cpp
@@ -37,7 +37,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
-#include <iterator>
 #include <map>
 using namespace llvm;
 
diff --git a/llvm/lib/Support/DataExtractor.cpp b/llvm/lib/Support/DataExtractor.cpp
index 133d674275e8c..8cf3121911539 100644
--- a/llvm/lib/Support/DataExtractor.cpp
+++ b/llvm/lib/Support/DataExtractor.cpp
@@ -9,7 +9,6 @@
 #include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Host.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/SwapByteOrder.h"
 
diff --git a/llvm/lib/Support/ELFAttributeParser.cpp b/llvm/lib/Support/ELFAttributeParser.cpp
index 1206553343efe..cf8a666e92bc0 100644
--- a/llvm/lib/Support/ELFAttributeParser.cpp
+++ b/llvm/lib/Support/ELFAttributeParser.cpp
@@ -7,10 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/ELFAttributeParser.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Errc.h"
-#include "llvm/Support/LEB128.h"
 #include "llvm/Support/ScopedPrinter.h"
 
 using namespace llvm;
diff --git a/llvm/lib/Support/FileOutputBuffer.cpp b/llvm/lib/Support/FileOutputBuffer.cpp
index 4b4406c4c9f4b..c11ee59da0dda 100644
--- a/llvm/lib/Support/FileOutputBuffer.cpp
+++ b/llvm/lib/Support/FileOutputBuffer.cpp
@@ -11,11 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/FileOutputBuffer.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Memory.h"
-#include "llvm/Support/Path.h"
 #include <system_error>
 
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
diff --git a/llvm/lib/Support/FileUtilities.cpp b/llvm/lib/Support/FileUtilities.cpp
index dbe28e56b2c37..1a14ed2673606 100644
--- a/llvm/lib/Support/FileUtilities.cpp
+++ b/llvm/lib/Support/FileUtilities.cpp
@@ -12,15 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/FileUtilities.h"
-#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
-#include <cctype>
 #include <cmath>
 #include <cstdint>
 #include <cstdlib>
diff --git a/llvm/lib/Support/GraphWriter.cpp b/llvm/lib/Support/GraphWriter.cpp
index 696e6b7a99d8c..6e6d79b225ac8 100644
--- a/llvm/lib/Support/GraphWriter.cpp
+++ b/llvm/lib/Support/GraphWriter.cpp
@@ -18,7 +18,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Config/config.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
@@ -26,7 +25,6 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
-#include <cassert>
 #include <string>
 #include <system_error>
 #include <vector>
diff --git a/llvm/lib/Support/InitLLVM.cpp b/llvm/lib/Support/InitLLVM.cpp
index 152de6ebae0ac..8c6f86f68fa24 100644
--- a/llvm/lib/Support/InitLLVM.cpp
+++ b/llvm/lib/Support/InitLLVM.cpp
@@ -7,12 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/Error.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
-#include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
-#include <string>
+#include "llvm/Support/SwapByteOrder.h"
 
 #ifdef _WIN32
 #include "llvm/Support/Windows/WindowsSupport.h"
diff --git a/llvm/lib/Support/JSON.cpp b/llvm/lib/Support/JSON.cpp
index 17b36ed51850d..20babbe56d861 100644
--- a/llvm/lib/Support/JSON.cpp
+++ b/llvm/lib/Support/JSON.cpp
@@ -12,6 +12,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/NativeFormatting.h"
 #include <cctype>
 
 namespace llvm {
diff --git a/llvm/lib/Support/MSP430AttributeParser.cpp b/llvm/lib/Support/MSP430AttributeParser.cpp
index a9948a158fc01..a230a3a70adb6 100644
--- a/llvm/lib/Support/MSP430AttributeParser.cpp
+++ b/llvm/lib/Support/MSP430AttributeParser.cpp
@@ -7,7 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/MSP430AttributeParser.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 using namespace llvm::MSP430Attrs;
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 1bbdafd082a45..7192fb1321cb6 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -13,9 +13,9 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/config.h"
-#include "llvm/Support/AutoConvert.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Errc.h"
-#include "llvm/Support/Errno.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Path.h"
@@ -23,7 +23,6 @@
 #include "llvm/Support/Program.h"
 #include "llvm/Support/SmallVectorMemoryBuffer.h"
 #include <cassert>
-#include <cerrno>
 #include <cstring>
 #include <new>
 #include <sys/types.h>
diff --git a/llvm/lib/Support/NativeFormatting.cpp b/llvm/lib/Support/NativeFormatting.cpp
index 254d18d797b3e..0a797046bb684 100644
--- a/llvm/lib/Support/NativeFormatting.cpp
+++ b/llvm/lib/Support/NativeFormatting.cpp
@@ -13,7 +13,6 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include <float.h>
 
 using namespace llvm;
 
diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp
index 0d07057f1df05..5d3335d001f31 100644
--- a/llvm/lib/Support/PrettyStackTrace.cpp
+++ b/llvm/lib/Support/PrettyStackTrace.cpp
@@ -13,7 +13,6 @@
 
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm-c/ErrorHandling.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/SaveAndRestore.h"
diff --git a/llvm/lib/Support/ScopedPrinter.cpp b/llvm/lib/Support/ScopedPrinter.cpp
index ea90a24eaceda..a434e50e8c1fb 100644
--- a/llvm/lib/Support/ScopedPrinter.cpp
+++ b/llvm/lib/Support/ScopedPrinter.cpp
@@ -1,7 +1,6 @@
 #include "llvm/Support/ScopedPrinter.h"
 
 #include "llvm/Support/Format.h"
-#include <cctype>
 
 using namespace llvm::support;
 
diff --git a/llvm/lib/Support/Signals.cpp b/llvm/lib/Support/Signals.cpp
index c018dc92bf408..5ce41c9870299 100644
--- a/llvm/lib/Support/Signals.cpp
+++ b/llvm/lib/Support/Signals.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/StringSaver.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/llvm/lib/Support/Signposts.cpp b/llvm/lib/Support/Signposts.cpp
index 58fafb26cdf3a..074dddc81c808 100644
--- a/llvm/lib/Support/Signposts.cpp
+++ b/llvm/lib/Support/Signposts.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Signposts.h"
-#include "llvm/Support/Timer.h"
 
 #include "llvm/Config/config.h"
 #if LLVM_SUPPORT_XCODE_SIGNPOSTS
diff --git a/llvm/lib/Support/SmallPtrSet.cpp b/llvm/lib/Support/SmallPtrSet.cpp
index f6e2dfb8a6c91..cbb87ea8717cf 100644
--- a/llvm/lib/Support/SmallPtrSet.cpp
+++ b/llvm/lib/Support/SmallPtrSet.cpp
@@ -13,7 +13,6 @@
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/DenseMapInfo.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemAlloc.h"
 #include <algorithm>
diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp
index 2d7721e4e1fb6..8cafbc7fad0de 100644
--- a/llvm/lib/Support/SmallVector.cpp
+++ b/llvm/lib/Support/SmallVector.cpp
@@ -12,6 +12,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/MemAlloc.h"
 #include <cstdint>
 #ifdef LLVM_ENABLE_EXCEPTIONS
 #include <stdexcept>
diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp
index 1939ed9e9547b..137b37f2b1c3c 100644
--- a/llvm/lib/Support/SpecialCaseList.cpp
+++ b/llvm/lib/Support/SpecialCaseList.cpp
@@ -15,7 +15,6 @@
 
 #include "llvm/Support/SpecialCaseList.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/VirtualFileSystem.h"
diff --git a/llvm/lib/Support/StringMap.cpp b/llvm/lib/Support/StringMap.cpp
index f65d3846623c8..012c785b4351d 100644
--- a/llvm/lib/Support/StringMap.cpp
+++ b/llvm/lib/Support/StringMap.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/DJB.h"
 #include "llvm/Support/MathExtras.h"
 
diff --git a/llvm/lib/Support/SymbolRemappingReader.cpp b/llvm/lib/Support/SymbolRemappingReader.cpp
index 1caf0947216ea..90997ab0a6cea 100644
--- a/llvm/lib/Support/SymbolRemappingReader.cpp
+++ b/llvm/lib/Support/SymbolRemappingReader.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/LineIterator.h"
+#include "llvm/Support/MemoryBuffer.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp
index bc60bdea5f62e..c14ddbeea5c39 100644
--- a/llvm/lib/Support/TargetParser.cpp
+++ b/llvm/lib/Support/TargetParser.cpp
@@ -13,9 +13,7 @@
 
 #include "llvm/Support/TargetParser.h"
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/ARMBuildAttributes.h"
 
 using namespace llvm;
diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index 6eec368e626ff..bf2584950c4ac 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -14,7 +14,6 @@
 
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Threading.h"
-#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp
index 2b094a4983a08..a727bfa51731d 100644
--- a/llvm/lib/Support/TimeProfiler.cpp
+++ b/llvm/lib/Support/TimeProfiler.cpp
@@ -13,8 +13,8 @@
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/JSON.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Threading.h"
diff --git a/llvm/lib/Support/ToolOutputFile.cpp b/llvm/lib/Support/ToolOutputFile.cpp
index c192ce60f31c9..c2ca97a59c620 100644
--- a/llvm/lib/Support/ToolOutputFile.cpp
+++ b/llvm/lib/Support/ToolOutputFile.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/ToolOutputFile.h"
-#include "llvm/ADT/Triple.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Signals.h"
 using namespace llvm;
diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp
index 1452fa62f5fdc..20dea8c302a5e 100644
--- a/llvm/lib/Support/Triple.cpp
+++ b/llvm/lib/Support/Triple.cpp
@@ -14,7 +14,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/SwapByteOrder.h"
-#include "llvm/Support/TargetParser.h"
+#include "llvm/Support/ARMTargetParser.h"
 #include "llvm/Support/VersionTuple.h"
 #include <cassert>
 #include <cstring>
diff --git a/llvm/lib/Support/TypeSize.cpp b/llvm/lib/Support/TypeSize.cpp
index abb81016a0bad..a80fde83e3bc7 100644
--- a/llvm/lib/Support/TypeSize.cpp
+++ b/llvm/lib/Support/TypeSize.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/Support/TypeSize.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/WithColor.h"
 
 #include "DebugOptions.h"
 
diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp
index a963beb180bae..f15e301874c44 100644
--- a/llvm/lib/Support/VirtualFileSystem.cpp
+++ b/llvm/lib/Support/VirtualFileSystem.cpp
@@ -35,7 +35,6 @@
 #include "llvm/Support/FileSystem/UniqueID.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
-#include "llvm/Support/Process.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/YAMLParser.h"
@@ -46,9 +45,7 @@
 #include <cstdint>
 #include <iterator>
 #include <limits>
-#include <map>
 #include <memory>
-#include <mutex>
 #include <string>
 #include <system_error>
 #include <utility>
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index ab49ac548f89a..10f9692d217e9 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -12,7 +12,6 @@
 
 #include "llvm/Support/X86TargetParser.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/ADT/Triple.h"
 #include <numeric>
 
 using namespace llvm;
diff --git a/llvm/lib/Support/YAMLParser.cpp b/llvm/lib/Support/YAMLParser.cpp
index 2adf37a511d15..200261d3ed5c0 100644
--- a/llvm/lib/Support/YAMLParser.cpp
+++ b/llvm/lib/Support/YAMLParser.cpp
@@ -27,7 +27,6 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/Unicode.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp
index 416423298bc6d..b79d520900d2c 100644
--- a/llvm/lib/Support/YAMLTraits.cpp
+++ b/llvm/lib/Support/YAMLTraits.cpp
@@ -18,13 +18,11 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Unicode.h"
 #include "llvm/Support/YAMLParser.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
-#include <cstdlib>
 #include <cstring>
 #include <string>
 #include <vector>
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index 073f370ba34e3..1b1b0af79ae8d 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -24,10 +24,8 @@
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include <algorithm>
-#include <cctype>
 #include <cerrno>
 #include <cstdio>
-#include <iterator>
 #include <sys/stat.h>
 
 // <fcntl.h> may provide O_BINARY.
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index df8f1091bc452..f4d046078d68e 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/Support/AArch64TargetParser.h"
 #include "llvm/Support/TargetParser.h"
 
 using namespace llvm;
diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 2928b6c299168..33ed7ae9780e6 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SMLoc.h"
+#include "llvm/Support/AArch64TargetParser.h"
 #include "llvm/Support/TargetParser.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 36c4bbaafcbf8..16befa4fff040 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -35,6 +35,7 @@
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ARMTargetParser.h"
 #include "llvm/Support/TargetParser.h"
 #include "llvm/Target/TargetOptions.h"
 
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 0b314ac2a41e1..c38970f8e3414 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -43,6 +43,7 @@
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ARMTargetParser.h"
 #include "llvm/Support/TargetParser.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Target/TargetOptions.h"
diff --git a/llvm/tools/llvm-diff/llvm-diff.cpp b/llvm/tools/llvm-diff/llvm-diff.cpp
index d9d19f35ffee8..7349469c80d6d 100644
--- a/llvm/tools/llvm-diff/llvm-diff.cpp
+++ b/llvm/tools/llvm-diff/llvm-diff.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/WithColor.h"
 #include <string>
 #include <utility>
 
diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp
index c9021b135c6cf..d78c4dff7db48 100644
--- a/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -46,6 +46,7 @@
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Target/TargetOptions.h"
 #include <algorithm>
 #include <cassert>
diff --git a/llvm/tools/llvm-modextract/llvm-modextract.cpp b/llvm/tools/llvm-modextract/llvm-modextract.cpp
index 9a44cbf68d0de..b1d6bfb790ec0 100644
--- a/llvm/tools/llvm-modextract/llvm-modextract.cpp
+++ b/llvm/tools/llvm-modextract/llvm-modextract.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/WithColor.h"
 
 using namespace llvm;
 
diff --git a/llvm/tools/llvm-reduce/llvm-reduce.cpp b/llvm/tools/llvm-reduce/llvm-reduce.cpp
index e07351aaa385b..59cc055a0870e 100644
--- a/llvm/tools/llvm-reduce/llvm-reduce.cpp
+++ b/llvm/tools/llvm-reduce/llvm-reduce.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Target/TargetMachine.h"
 #include <system_error>
 #include <vector>
diff --git a/llvm/tools/llvm-split/llvm-split.cpp b/llvm/tools/llvm-split/llvm-split.cpp
index 6de28dc611ecb..c6e20e0373c71 100644
--- a/llvm/tools/llvm-split/llvm-split.cpp
+++ b/llvm/tools/llvm-split/llvm-split.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Transforms/Utils/SplitModule.h"
 
 using namespace llvm;
diff --git a/llvm/tools/llvm-stress/llvm-stress.cpp b/llvm/tools/llvm-stress/llvm-stress.cpp
index f2be4e7d0712c..bb11c18b57fa4 100644
--- a/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -41,6 +41,7 @@
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/WithColor.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp
index 8fe13caab0ca7..768ec83a0e126 100644
--- a/llvm/unittests/Support/TargetParserTest.cpp
+++ b/llvm/unittests/Support/TargetParserTest.cpp
@@ -6,11 +6,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/TargetParser.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/AArch64TargetParser.h"
 #include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/TargetParser.h"
 #include "gtest/gtest.h"
 #include <string>
 
diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp
index 74587c76236a6..5125af8ce8a4b 100644
--- a/llvm/unittests/Support/raw_ostream_test.cpp
+++ b/llvm/unittests/Support/raw_ostream_test.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/Format.h"
diff --git a/mlir/lib/TableGen/Format.cpp b/mlir/lib/TableGen/Format.cpp
index 917d1f5b50fff..7209cafcab7db 100644
--- a/mlir/lib/TableGen/Format.cpp
+++ b/mlir/lib/TableGen/Format.cpp
@@ -13,6 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/TableGen/Format.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
 #include <cctype>
 
 using namespace mlir;
diff --git a/mlir/lib/TableGen/Predicate.cpp b/mlir/lib/TableGen/Predicate.cpp
index f9b6f6719efef..3492c2f89077f 100644
--- a/mlir/lib/TableGen/Predicate.cpp
+++ b/mlir/lib/TableGen/Predicate.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
index e5b75561e7f8a..0dc1ef426d02a 100644
--- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
@@ -14,6 +14,7 @@
 #include "mlir/TableGen/Format.h"
 #include "mlir/TableGen/GenInfo.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
index 8a61e3795b5b4..7a722305d0c31 100644
--- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/TableGen/Error.h"

From 2b8e4c6e5fbd5ec3bf7b75fd6b1e11d66fde78a9 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Fri, 21 Jan 2022 14:01:51 +0100
Subject: [PATCH 175/946] Add missing header in Support/ConvertUTF.h

---
 llvm/include/llvm/Support/ConvertUTF.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/include/llvm/Support/ConvertUTF.h b/llvm/include/llvm/Support/ConvertUTF.h
index a23aad6884d03..1add185330fa0 100644
--- a/llvm/include/llvm/Support/ConvertUTF.h
+++ b/llvm/include/llvm/Support/ConvertUTF.h
@@ -91,6 +91,7 @@
 
 #include <cstddef>
 #include <string>
+#include <system_error>
 
 // Wrap everything in namespace llvm so that programs can link with llvm and
 // their own version of the unicode libraries.

From 38ac4093d9d2ae28d631ca1cc5802533989165c5 Mon Sep 17 00:00:00 2001
From: Archibald Elliott <archibald.elliott@arm.com>
Date: Fri, 21 Jan 2022 13:14:58 +0000
Subject: [PATCH 176/946] [NFCI][Support] Avoid ASSERT_/EXPECT_TRUE(A <op> B)

The error messages in tests are far better when a test fails if the test
is written using ASSERT_/EXPECT_<operator>(A, B) rather than
ASSERT_/EXPECT_TRUE(A <operator> B).

This commit updates all of llvm/unittests/Support to use these macros
where possible.

This change has not been possible in:
- llvm/unittests/Support/FSUniqueIDTest.cpp - due to not overloading
  operators beyond ==, != and <.
- llvm/unittests/Support/BranchProbabilityTest.cpp - where the unchanged
  tests are of the operator overloads themselves.

There are other possibilities of this conversion not being valid, which
have not applied in these tests, as they do not use NULL (they use
nullptr), and they do not use const char* (they use std::string or
StringRef).

Reviewed By: mubashar_

Differential Revision: https://reviews.llvm.org/D117319
---
 llvm/unittests/Support/Casting.cpp            | 34 +++----
 llvm/unittests/Support/CommandLineTest.cpp    | 89 +++++++++----------
 .../DynamicLibrary/DynamicLibraryTest.cpp     | 42 +++++----
 llvm/unittests/Support/ErrorTest.cpp          | 23 +++--
 llvm/unittests/Support/FSUniqueIDTest.cpp     |  6 +-
 .../unittests/Support/IndexedAccessorTest.cpp |  2 +-
 llvm/unittests/Support/JSONTest.cpp           |  4 +-
 llvm/unittests/Support/MemoryBufferTest.cpp   | 20 ++---
 llvm/unittests/Support/Path.cpp               |  2 +-
 llvm/unittests/Support/ProgramTest.cpp        |  4 +-
 llvm/unittests/Support/TarWriterTest.cpp      |  4 +-
 llvm/unittests/Support/TargetParserTest.cpp   | 26 +++---
 llvm/unittests/Support/TimerTest.cpp          |  2 +-
 llvm/unittests/Support/UnicodeTest.cpp        |  6 +-
 .../Support/VirtualFileSystemTest.cpp         | 76 ++++++++--------
 llvm/unittests/Support/YAMLIOTest.cpp         | 86 +++++++++---------
 llvm/unittests/Support/YAMLParserTest.cpp     | 24 ++---
 llvm/unittests/Support/raw_ostream_test.cpp   |  2 +-
 18 files changed, 235 insertions(+), 217 deletions(-)

diff --git a/llvm/unittests/Support/Casting.cpp b/llvm/unittests/Support/Casting.cpp
index a196fc2ec5ed3..e99c0d4860315 100644
--- a/llvm/unittests/Support/Casting.cpp
+++ b/llvm/unittests/Support/Casting.cpp
@@ -283,7 +283,7 @@ TEST(CastingTest, UpcastIsInferred) {
   Derived D;
   EXPECT_TRUE(isa<Base>(D));
   Base *BP = dyn_cast<Base>(&D);
-  EXPECT_TRUE(BP != nullptr);
+  EXPECT_NE(BP, nullptr);
 }
 
 
@@ -379,31 +379,31 @@ TEST(CastingTest, smart_isa) {
 }
 
 TEST(CastingTest, smart_cast) {
-  EXPECT_TRUE(cast<pointer_wrappers::Derived>(MD) == &D);
-  EXPECT_TRUE(cast<pointer_wrappers::Derived>(CD) == &D);
+  EXPECT_EQ(cast<pointer_wrappers::Derived>(MD), &D);
+  EXPECT_EQ(cast<pointer_wrappers::Derived>(CD), &D);
 }
 
 TEST(CastingTest, smart_cast_or_null) {
-  EXPECT_TRUE(cast_or_null<pointer_wrappers::Derived>(MN) == nullptr);
-  EXPECT_TRUE(cast_or_null<pointer_wrappers::Derived>(CN) == nullptr);
-  EXPECT_TRUE(cast_or_null<pointer_wrappers::Derived>(MD) == &D);
-  EXPECT_TRUE(cast_or_null<pointer_wrappers::Derived>(CD) == &D);
+  EXPECT_EQ(cast_or_null<pointer_wrappers::Derived>(MN), nullptr);
+  EXPECT_EQ(cast_or_null<pointer_wrappers::Derived>(CN), nullptr);
+  EXPECT_EQ(cast_or_null<pointer_wrappers::Derived>(MD), &D);
+  EXPECT_EQ(cast_or_null<pointer_wrappers::Derived>(CD), &D);
 }
 
 TEST(CastingTest, smart_dyn_cast) {
-  EXPECT_TRUE(dyn_cast<pointer_wrappers::Derived>(MB) == nullptr);
-  EXPECT_TRUE(dyn_cast<pointer_wrappers::Derived>(CB) == nullptr);
-  EXPECT_TRUE(dyn_cast<pointer_wrappers::Derived>(MD) == &D);
-  EXPECT_TRUE(dyn_cast<pointer_wrappers::Derived>(CD) == &D);
+  EXPECT_EQ(dyn_cast<pointer_wrappers::Derived>(MB), nullptr);
+  EXPECT_EQ(dyn_cast<pointer_wrappers::Derived>(CB), nullptr);
+  EXPECT_EQ(dyn_cast<pointer_wrappers::Derived>(MD), &D);
+  EXPECT_EQ(dyn_cast<pointer_wrappers::Derived>(CD), &D);
 }
 
 TEST(CastingTest, smart_dyn_cast_or_null) {
-  EXPECT_TRUE(dyn_cast_or_null<pointer_wrappers::Derived>(MN) == nullptr);
-  EXPECT_TRUE(dyn_cast_or_null<pointer_wrappers::Derived>(CN) == nullptr);
-  EXPECT_TRUE(dyn_cast_or_null<pointer_wrappers::Derived>(MB) == nullptr);
-  EXPECT_TRUE(dyn_cast_or_null<pointer_wrappers::Derived>(CB) == nullptr);
-  EXPECT_TRUE(dyn_cast_or_null<pointer_wrappers::Derived>(MD) == &D);
-  EXPECT_TRUE(dyn_cast_or_null<pointer_wrappers::Derived>(CD) == &D);
+  EXPECT_EQ(dyn_cast_or_null<pointer_wrappers::Derived>(MN), nullptr);
+  EXPECT_EQ(dyn_cast_or_null<pointer_wrappers::Derived>(CN), nullptr);
+  EXPECT_EQ(dyn_cast_or_null<pointer_wrappers::Derived>(MB), nullptr);
+  EXPECT_EQ(dyn_cast_or_null<pointer_wrappers::Derived>(CB), nullptr);
+  EXPECT_EQ(dyn_cast_or_null<pointer_wrappers::Derived>(MD), &D);
+  EXPECT_EQ(dyn_cast_or_null<pointer_wrappers::Derived>(CD), &D);
 }
 
 } // end namespace pointer_wrappers
diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp
index 4e1160fe2dbc5..8032d0e7bf403 100644
--- a/llvm/unittests/Support/CommandLineTest.cpp
+++ b/llvm/unittests/Support/CommandLineTest.cpp
@@ -101,8 +101,7 @@ TEST(CommandLineTest, ModifyExisitingOption) {
   StringMap<cl::Option *> &Map =
       cl::getRegisteredOptions(*cl::TopLevelSubCommand);
 
-  ASSERT_TRUE(Map.count("test-option") == 1) <<
-    "Could not find option in map.";
+  ASSERT_EQ(Map.count("test-option"), 1u) << "Could not find option in map.";
 
   cl::Option *Retrieved = Map["test-option"];
   ASSERT_EQ(&TestOption, Retrieved) << "Retrieved wrong option.";
@@ -701,7 +700,7 @@ TEST(CommandLineTest, DefaultOptions) {
   const char *args0[] = {"prog", "-b", "args0 bar string", "-f"};
   EXPECT_TRUE(cl::ParseCommandLineOptions(sizeof(args0) / sizeof(char *), args0,
                                           StringRef(), &llvm::nulls()));
-  EXPECT_TRUE(Bar == "args0 bar string");
+  EXPECT_EQ(Bar, "args0 bar string");
   EXPECT_TRUE(Foo);
   EXPECT_FALSE(SC1_B);
   EXPECT_TRUE(SC2_Foo.empty());
@@ -711,7 +710,7 @@ TEST(CommandLineTest, DefaultOptions) {
   const char *args1[] = {"prog", "sc1", "-b", "-bar", "args1 bar string", "-f"};
   EXPECT_TRUE(cl::ParseCommandLineOptions(sizeof(args1) / sizeof(char *), args1,
                                           StringRef(), &llvm::nulls()));
-  EXPECT_TRUE(Bar == "args1 bar string");
+  EXPECT_EQ(Bar, "args1 bar string");
   EXPECT_TRUE(Foo);
   EXPECT_TRUE(SC1_B);
   EXPECT_TRUE(SC2_Foo.empty());
@@ -727,10 +726,10 @@ TEST(CommandLineTest, DefaultOptions) {
                          "-f", "-foo", "foo string"};
   EXPECT_TRUE(cl::ParseCommandLineOptions(sizeof(args2) / sizeof(char *), args2,
                                           StringRef(), &llvm::nulls()));
-  EXPECT_TRUE(Bar == "args2 bar string");
+  EXPECT_EQ(Bar, "args2 bar string");
   EXPECT_TRUE(Foo);
   EXPECT_FALSE(SC1_B);
-  EXPECT_TRUE(SC2_Foo == "foo string");
+  EXPECT_EQ(SC2_Foo, "foo string");
   for (auto *S : cl::getRegisteredSubcommands()) {
     if (*S) {
       EXPECT_EQ("sc2", S->getName());
@@ -777,8 +776,8 @@ TEST(CommandLineTest, ResponseFileWindows) {
   EXPECT_TRUE(
       cl::ParseCommandLineOptions(2, args, StringRef(), &llvm::nulls()));
   EXPECT_TRUE(TopLevelOpt);
-  EXPECT_TRUE(InputFilenames[0] == "path\\dir\\file1");
-  EXPECT_TRUE(InputFilenames[1] == "path/dir/file2");
+  EXPECT_EQ(InputFilenames[0], "path\\dir\\file1");
+  EXPECT_EQ(InputFilenames[1], "path/dir/file2");
 }
 
 TEST(CommandLineTest, ResponseFiles) {
@@ -1011,9 +1010,9 @@ TEST(CommandLineTest, SetDefautValue) {
   EXPECT_TRUE(
     cl::ParseCommandLineOptions(2, args, StringRef(), &llvm::nulls()));
 
-  EXPECT_TRUE(Opt1 == "false");
+  EXPECT_EQ(Opt1, "false");
   EXPECT_TRUE(Opt2);
-  EXPECT_TRUE(Opt3 == 3);
+  EXPECT_EQ(Opt3, 3);
 
   Opt2 = false;
   Opt3 = 1;
@@ -1028,9 +1027,9 @@ TEST(CommandLineTest, SetDefautValue) {
     O->setDefault();
   }
 
-  EXPECT_TRUE(Opt1 == "true");
+  EXPECT_EQ(Opt1, "true");
   EXPECT_TRUE(Opt2);
-  EXPECT_TRUE(Opt3 == 3);
+  EXPECT_EQ(Opt3, 3);
   Alias.removeArgument();
 }
 
@@ -1135,8 +1134,8 @@ TEST(CommandLineTest, PositionalEatArgsError) {
 
   cl::ResetAllOptionOccurrences();
   EXPECT_TRUE(cl::ParseCommandLineOptions(6, args4, StringRef(), &OS)); OS.flush();
-  EXPECT_TRUE(PosEatArgs.size() == 1);
-  EXPECT_TRUE(PosEatArgs2.size() == 2);
+  EXPECT_EQ(PosEatArgs.size(), 1u);
+  EXPECT_EQ(PosEatArgs2.size(), 2u);
   EXPECT_TRUE(Errs.empty());
 }
 
@@ -1412,8 +1411,8 @@ TEST(CommandLineTest, PrefixOptions) {
   const char *args[] = {"prog", "-I=/usr/include"};
   EXPECT_TRUE(
       cl::ParseCommandLineOptions(2, args, StringRef(), &llvm::nulls()));
-  EXPECT_TRUE(IncludeDirs.size() == 1);
-  EXPECT_TRUE(IncludeDirs.front().compare("/usr/include") == 0);
+  EXPECT_EQ(IncludeDirs.size(), 1u);
+  EXPECT_EQ(IncludeDirs.front().compare("/usr/include"), 0);
 
   IncludeDirs.erase(IncludeDirs.begin());
   cl::ResetAllOptionOccurrences();
@@ -1424,8 +1423,8 @@ TEST(CommandLineTest, PrefixOptions) {
   const char *args2[] = {"prog", "-I", "/usr/include"};
   EXPECT_TRUE(
       cl::ParseCommandLineOptions(3, args2, StringRef(), &llvm::nulls()));
-  EXPECT_TRUE(IncludeDirs.size() == 1);
-  EXPECT_TRUE(IncludeDirs.front().compare("/usr/include") == 0);
+  EXPECT_EQ(IncludeDirs.size(), 1u);
+  EXPECT_EQ(IncludeDirs.front().compare("/usr/include"), 0);
 
   IncludeDirs.erase(IncludeDirs.begin());
   cl::ResetAllOptionOccurrences();
@@ -1435,8 +1434,8 @@ TEST(CommandLineTest, PrefixOptions) {
   const char *args3[] = {"prog", "-I/usr/include"};
   EXPECT_TRUE(
       cl::ParseCommandLineOptions(2, args3, StringRef(), &llvm::nulls()));
-  EXPECT_TRUE(IncludeDirs.size() == 1);
-  EXPECT_TRUE(IncludeDirs.front().compare("/usr/include") == 0);
+  EXPECT_EQ(IncludeDirs.size(), 1u);
+  EXPECT_EQ(IncludeDirs.front().compare("/usr/include"), 0);
 
   StackOption<std::string, cl::list<std::string>> MacroDefs(
       "D", cl::AlwaysPrefix, cl::desc("Define a macro"),
@@ -1450,8 +1449,8 @@ TEST(CommandLineTest, PrefixOptions) {
   const char *args4[] = {"prog", "-D=HAVE_FOO"};
   EXPECT_TRUE(
       cl::ParseCommandLineOptions(2, args4, StringRef(), &llvm::nulls()));
-  EXPECT_TRUE(MacroDefs.size() == 1);
-  EXPECT_TRUE(MacroDefs.front().compare("=HAVE_FOO") == 0);
+  EXPECT_EQ(MacroDefs.size(), 1u);
+  EXPECT_EQ(MacroDefs.front().compare("=HAVE_FOO"), 0);
 
   MacroDefs.erase(MacroDefs.begin());
   cl::ResetAllOptionOccurrences();
@@ -1471,8 +1470,8 @@ TEST(CommandLineTest, PrefixOptions) {
   const char *args6[] = {"prog", "-DHAVE_FOO"};
   EXPECT_TRUE(
       cl::ParseCommandLineOptions(2, args6, StringRef(), &llvm::nulls()));
-  EXPECT_TRUE(MacroDefs.size() == 1);
-  EXPECT_TRUE(MacroDefs.front().compare("HAVE_FOO") == 0);
+  EXPECT_EQ(MacroDefs.size(), 1u);
+  EXPECT_EQ(MacroDefs.front().compare("HAVE_FOO"), 0);
 }
 
 TEST(CommandLineTest, GroupingWithValue) {
@@ -1757,12 +1756,12 @@ TEST(CommandLineTest, OptionErrorMessage) {
 
   OptA.error("custom error", OS);
   OS.flush();
-  EXPECT_FALSE(Errs.find("for the -a option:") == std::string::npos);
+  EXPECT_NE(Errs.find("for the -a option:"), std::string::npos);
   Errs.clear();
 
   OptLong.error("custom error", OS);
   OS.flush();
-  EXPECT_FALSE(Errs.find("for the --long option:") == std::string::npos);
+  EXPECT_NE(Errs.find("for the --long option:"), std::string::npos);
   Errs.clear();
 
   cl::ResetAllOptionOccurrences();
@@ -1785,8 +1784,8 @@ TEST(CommandLineTest, OptionErrorMessageSuggest) {
 
   EXPECT_FALSE(cl::ParseCommandLineOptions(2, args, StringRef(), &OS));
   OS.flush();
-  EXPECT_FALSE(Errs.find("prog: Did you mean '--aluminium'?\n") ==
-               std::string::npos);
+  EXPECT_NE(Errs.find("prog: Did you mean '--aluminium'?\n"),
+            std::string::npos);
   Errs.clear();
 
   cl::ResetAllOptionOccurrences();
@@ -1808,8 +1807,8 @@ TEST(CommandLineTest, OptionErrorMessageSuggestNoHidden) {
 
   EXPECT_FALSE(cl::ParseCommandLineOptions(2, args, StringRef(), &OS));
   OS.flush();
-  EXPECT_FALSE(Errs.find("prog: Did you mean '--aluminium'?\n") ==
-               std::string::npos);
+  EXPECT_NE(Errs.find("prog: Did you mean '--aluminium'?\n"),
+            std::string::npos);
   Errs.clear();
 
   cl::ResetAllOptionOccurrences();
@@ -1840,7 +1839,7 @@ TEST(CommandLineTest, Callback) {
   EXPECT_TRUE(OptA);
   EXPECT_FALSE(OptB);
   EXPECT_FALSE(OptC);
-  EXPECT_TRUE(List.size() == 0);
+  EXPECT_EQ(List.size(), 0u);
   cl::ResetAllOptionOccurrences();
 
   const char *args2[] = {"prog", "-b"};
@@ -1848,7 +1847,7 @@ TEST(CommandLineTest, Callback) {
   EXPECT_TRUE(OptA);
   EXPECT_TRUE(OptB);
   EXPECT_FALSE(OptC);
-  EXPECT_TRUE(List.size() == 0);
+  EXPECT_EQ(List.size(), 0u);
   cl::ResetAllOptionOccurrences();
 
   const char *args3[] = {"prog", "-c"};
@@ -1856,7 +1855,7 @@ TEST(CommandLineTest, Callback) {
   EXPECT_TRUE(OptA);
   EXPECT_TRUE(OptB);
   EXPECT_TRUE(OptC);
-  EXPECT_TRUE(List.size() == 0);
+  EXPECT_EQ(List.size(), 0u);
   cl::ResetAllOptionOccurrences();
 
   const char *args4[] = {"prog", "--list=foo,bar"};
@@ -1864,7 +1863,7 @@ TEST(CommandLineTest, Callback) {
   EXPECT_TRUE(OptA);
   EXPECT_TRUE(OptB);
   EXPECT_TRUE(OptC);
-  EXPECT_TRUE(List.size() == 2);
+  EXPECT_EQ(List.size(), 2u);
   cl::ResetAllOptionOccurrences();
 
   const char *args5[] = {"prog", "--list=bar"};
@@ -1872,7 +1871,7 @@ TEST(CommandLineTest, Callback) {
   EXPECT_FALSE(OptA);
   EXPECT_FALSE(OptB);
   EXPECT_FALSE(OptC);
-  EXPECT_TRUE(List.size() == 1);
+  EXPECT_EQ(List.size(), 1u);
 
   cl::ResetAllOptionOccurrences();
 }
@@ -1899,9 +1898,9 @@ TEST(CommandLineTest, ConsumeAfterOnePositional) {
   EXPECT_TRUE(cl::ParseCommandLineOptions(4, Args, StringRef(), &OS));
   OS.flush();
   EXPECT_EQ("input", Input);
-  EXPECT_TRUE(ExtraArgs.size() == 2);
-  EXPECT_TRUE(ExtraArgs[0] == "arg1");
-  EXPECT_TRUE(ExtraArgs[1] == "arg2");
+  EXPECT_EQ(ExtraArgs.size(), 2u);
+  EXPECT_EQ(ExtraArgs[0], "arg1");
+  EXPECT_EQ(ExtraArgs[1], "arg2");
   EXPECT_TRUE(Errs.empty());
 }
 
@@ -1923,9 +1922,9 @@ TEST(CommandLineTest, ConsumeAfterTwoPositionals) {
   OS.flush();
   EXPECT_EQ("input1", Input1);
   EXPECT_EQ("input2", Input2);
-  EXPECT_TRUE(ExtraArgs.size() == 2);
-  EXPECT_TRUE(ExtraArgs[0] == "arg1");
-  EXPECT_TRUE(ExtraArgs[1] == "arg2");
+  EXPECT_EQ(ExtraArgs.size(), 2u);
+  EXPECT_EQ(ExtraArgs[0], "arg1");
+  EXPECT_EQ(ExtraArgs[1], "arg2");
   EXPECT_TRUE(Errs.empty());
 }
 
@@ -1946,17 +1945,17 @@ TEST(CommandLineTest, ResetAllOptionOccurrences) {
   EXPECT_TRUE(OS.str().empty());
 
   EXPECT_TRUE(Option);
-  EXPECT_EQ(1, (int)Sink.size());
+  EXPECT_EQ(1u, Sink.size());
   EXPECT_EQ("-unknown", Sink[0]);
   EXPECT_EQ("input", Input);
-  EXPECT_EQ(1, (int)ExtraArgs.size());
+  EXPECT_EQ(1u, ExtraArgs.size());
   EXPECT_EQ("-arg", ExtraArgs[0]);
 
   cl::ResetAllOptionOccurrences();
   EXPECT_FALSE(Option);
-  EXPECT_EQ(0, (int)Sink.size());
+  EXPECT_EQ(0u, Sink.size());
   EXPECT_EQ(0, Input.getNumOccurrences());
-  EXPECT_EQ(0, (int)ExtraArgs.size());
+  EXPECT_EQ(0u, ExtraArgs.size());
 }
 
 } // anonymous namespace
diff --git a/llvm/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp b/llvm/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp
index 6c9063775a59d..784b9c1bb2d27 100644
--- a/llvm/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp
+++ b/llvm/unittests/Support/DynamicLibrary/DynamicLibraryTest.cpp
@@ -66,11 +66,13 @@ TEST(DynamicLibrary, Overload) {
     EXPECT_TRUE(Err.empty());
 
     GetString GS = FuncPtr<GetString>(DL.getAddressOfSymbol("TestA"));
-    EXPECT_TRUE(GS != nullptr && GS != &TestA);
+    EXPECT_NE(GS, nullptr);
+    EXPECT_NE(GS, &TestA);
     EXPECT_EQ(StdString(GS()), "LibCall");
 
     GS = FuncPtr<GetString>(DynamicLibrary::SearchForAddressOfSymbol("TestA"));
-    EXPECT_TRUE(GS != nullptr && GS != &TestA);
+    EXPECT_NE(GS, nullptr);
+    EXPECT_NE(GS, &TestA);
     EXPECT_EQ(StdString(GS()), "LibCall");
 
     DL = DynamicLibrary::getPermanentLibrary(nullptr, &Err);
@@ -79,32 +81,37 @@ TEST(DynamicLibrary, Overload) {
 
     // Test overloading local symbols does not occur by default
     GS = FuncPtr<GetString>(DynamicLibrary::SearchForAddressOfSymbol("TestA"));
-    EXPECT_TRUE(GS != nullptr && GS == &TestA);
+    EXPECT_NE(GS, nullptr);
+    EXPECT_EQ(GS, &TestA);
     EXPECT_EQ(StdString(GS()), "ProcessCall");
 
     GS = FuncPtr<GetString>(DL.getAddressOfSymbol("TestA"));
-    EXPECT_TRUE(GS != nullptr && GS == &TestA);
+    EXPECT_NE(GS, nullptr);
+    EXPECT_EQ(GS, &TestA);
     EXPECT_EQ(StdString(GS()), "ProcessCall");
 
     // Test overloading by forcing library priority when searching for a symbol
     DynamicLibrary::SearchOrder = DynamicLibrary::SO_LoadedFirst;
     GS = FuncPtr<GetString>(DynamicLibrary::SearchForAddressOfSymbol("TestA"));
-    EXPECT_TRUE(GS != nullptr && GS != &TestA);
+    EXPECT_NE(GS, nullptr);
+    EXPECT_NE(GS, &TestA);
     EXPECT_EQ(StdString(GS()), "LibCall");
 
     DynamicLibrary::AddSymbol("TestA", PtrFunc(&OverloadTestA));
     GS = FuncPtr<GetString>(DL.getAddressOfSymbol("TestA"));
-    EXPECT_TRUE(GS != nullptr && GS != &OverloadTestA);
+    EXPECT_NE(GS, nullptr);
+    EXPECT_NE(GS, &OverloadTestA);
 
     GS = FuncPtr<GetString>(DynamicLibrary::SearchForAddressOfSymbol("TestA"));
-    EXPECT_TRUE(GS != nullptr && GS == &OverloadTestA);
+    EXPECT_NE(GS, nullptr);
+    EXPECT_EQ(GS, &OverloadTestA);
     EXPECT_EQ(StdString(GS()), "OverloadCall");
   }
   EXPECT_TRUE(FuncPtr<GetString>(DynamicLibrary::SearchForAddressOfSymbol(
                   "TestA")) == nullptr);
 
   // Check serach ordering is reset to default after call to llvm_shutdown
-  EXPECT_TRUE(DynamicLibrary::SearchOrder == DynamicLibrary::SO_Linker);
+  EXPECT_EQ(DynamicLibrary::SearchOrder, DynamicLibrary::SO_Linker);
 }
 
 TEST(DynamicLibrary, Shutdown) {
@@ -120,15 +127,15 @@ TEST(DynamicLibrary, Shutdown) {
 
     SetStrings SS_0 = FuncPtr<SetStrings>(
         DynamicLibrary::SearchForAddressOfSymbol("SetStrings"));
-    EXPECT_TRUE(SS_0 != nullptr);
+    EXPECT_NE(SS_0, nullptr);
 
     SS_0(A, B);
     EXPECT_EQ(B, "Local::Local(PipSqueak)");
 
     TestOrder TO_0 = FuncPtr<TestOrder>(
         DynamicLibrary::SearchForAddressOfSymbol("TestOrder"));
-    EXPECT_TRUE(TO_0 != nullptr);
-    
+    EXPECT_NE(TO_0, nullptr);
+
     DynamicLibrary DL2 =
         DynamicLibrary::getPermanentLibrary(LibPath(C).c_str(), &Err);
     EXPECT_TRUE(DL2.isValid());
@@ -137,13 +144,13 @@ TEST(DynamicLibrary, Shutdown) {
     // Should find latest version of symbols in SecondLib
     SetStrings SS_1 = FuncPtr<SetStrings>(
         DynamicLibrary::SearchForAddressOfSymbol("SetStrings"));
-    EXPECT_TRUE(SS_1 != nullptr);
-    EXPECT_TRUE(SS_0 != SS_1);
+    EXPECT_NE(SS_1, nullptr);
+    EXPECT_NE(SS_0, SS_1);
 
     TestOrder TO_1 = FuncPtr<TestOrder>(
         DynamicLibrary::SearchForAddressOfSymbol("TestOrder"));
-    EXPECT_TRUE(TO_1 != nullptr);
-    EXPECT_TRUE(TO_0 != TO_1);
+    EXPECT_NE(TO_1, nullptr);
+    EXPECT_NE(TO_0, TO_1);
 
     B.clear();
     SS_1(C, B);
@@ -154,8 +161,9 @@ TEST(DynamicLibrary, Shutdown) {
   }
   EXPECT_EQ(A, "Global::~Global");
   EXPECT_EQ(B, "Local::~Local");
-  EXPECT_TRUE(FuncPtr<SetStrings>(DynamicLibrary::SearchForAddressOfSymbol(
-                  "SetStrings")) == nullptr);
+  EXPECT_EQ(FuncPtr<SetStrings>(
+                DynamicLibrary::SearchForAddressOfSymbol("SetStrings")),
+            nullptr);
 
   // Test unload/destruction ordering
   EXPECT_EQ(Order.size(), 2UL);
diff --git a/llvm/unittests/Support/ErrorTest.cpp b/llvm/unittests/Support/ErrorTest.cpp
index 3ca94b2a2aef9..d4daceda2b14a 100644
--- a/llvm/unittests/Support/ErrorTest.cpp
+++ b/llvm/unittests/Support/ErrorTest.cpp
@@ -179,7 +179,7 @@ TEST(Error, HandleCustomError) {
     CaughtErrorInfo = CE.getInfo();
   });
 
-  EXPECT_TRUE(CaughtErrorInfo == 42) << "Wrong result from CustomError handler";
+  EXPECT_EQ(CaughtErrorInfo, 42) << "Wrong result from CustomError handler";
 }
 
 // Check that handler type deduction also works for handlers
@@ -253,7 +253,8 @@ TEST(Error, HandleCustomErrorWithCustomBaseClass) {
                     CaughtErrorExtraInfo = SE.getExtraInfo();
                   });
 
-  EXPECT_TRUE(CaughtErrorInfo == 42 && CaughtErrorExtraInfo == 7)
+  EXPECT_EQ(CaughtErrorInfo, 42) << "Wrong result from CustomSubError handler";
+  EXPECT_EQ(CaughtErrorExtraInfo, 7)
       << "Wrong result from CustomSubError handler";
 }
 
@@ -270,9 +271,9 @@ TEST(Error, FirstHandlerOnly) {
                   },
                   [&](const CustomError &CE) { DummyInfo = CE.getInfo(); });
 
-  EXPECT_TRUE(CaughtErrorInfo == 42 && CaughtErrorExtraInfo == 7 &&
-              DummyInfo == 0)
-      << "Activated the wrong Error handler(s)";
+  EXPECT_EQ(CaughtErrorInfo, 42) << "Activated the wrong Error handler(s)";
+  EXPECT_EQ(CaughtErrorExtraInfo, 7) << "Activated the wrong Error handler(s)";
+  EXPECT_EQ(DummyInfo, 0) << "Activated the wrong Error handler(s)";
 }
 
 // Check that general handlers shadow specific ones.
@@ -289,7 +290,11 @@ TEST(Error, HandlerShadowing) {
         DummyExtraInfo = SE.getExtraInfo();
       });
 
-  EXPECT_TRUE(CaughtErrorInfo == 42 && DummyInfo == 0 && DummyExtraInfo == 0)
+  EXPECT_EQ(CaughtErrorInfo, 42)
+      << "General Error handler did not shadow specific handler";
+  EXPECT_EQ(DummyInfo, 0)
+      << "General Error handler did not shadow specific handler";
+  EXPECT_EQ(DummyExtraInfo, 0)
       << "General Error handler did not shadow specific handler";
 }
 
@@ -317,9 +322,9 @@ TEST(Error, CheckJoinErrors) {
                     CustomErrorInfo1 = CE.getInfo();
                   });
 
-  EXPECT_TRUE(CustomErrorInfo1 == 7 && CustomErrorInfo2 == 42 &&
-              CustomErrorExtraInfo == 7)
-      << "Failed handling compound Error.";
+  EXPECT_EQ(CustomErrorInfo1, 7) << "Failed handling compound Error.";
+  EXPECT_EQ(CustomErrorInfo2, 42) << "Failed handling compound Error.";
+  EXPECT_EQ(CustomErrorExtraInfo, 7) << "Failed handling compound Error.";
 
   // Test appending a single item to a list.
   {
diff --git a/llvm/unittests/Support/FSUniqueIDTest.cpp b/llvm/unittests/Support/FSUniqueIDTest.cpp
index 6c794730e39dc..81e5088bf24cb 100644
--- a/llvm/unittests/Support/FSUniqueIDTest.cpp
+++ b/llvm/unittests/Support/FSUniqueIDTest.cpp
@@ -20,9 +20,9 @@ TEST(FSUniqueIDTest, construct) {
 }
 
 TEST(FSUniqueIDTest, equals) {
-  EXPECT_TRUE(UniqueID(20, 10) == UniqueID(20, 10));
-  EXPECT_FALSE(UniqueID(20, 20) == UniqueID(20, 10));
-  EXPECT_FALSE(UniqueID(10, 10) == UniqueID(20, 10));
+  EXPECT_EQ(UniqueID(20, 10), UniqueID(20, 10));
+  EXPECT_NE(UniqueID(20, 20), UniqueID(20, 10));
+  EXPECT_NE(UniqueID(10, 10), UniqueID(20, 10));
 }
 
 TEST(FSUniqueIDTest, less) {
diff --git a/llvm/unittests/Support/IndexedAccessorTest.cpp b/llvm/unittests/Support/IndexedAccessorTest.cpp
index 9981e91df100e..501d7a6ea2ec0 100644
--- a/llvm/unittests/Support/IndexedAccessorTest.cpp
+++ b/llvm/unittests/Support/IndexedAccessorTest.cpp
@@ -32,7 +32,7 @@ struct ArrayIndexedAccessorRange
 template <typename T>
 static void compareData(ArrayIndexedAccessorRange<T> range,
                         ArrayRef<T> referenceData) {
-  ASSERT_TRUE(referenceData.size() == range.size());
+  ASSERT_EQ(referenceData.size(), range.size());
   ASSERT_TRUE(std::equal(range.begin(), range.end(), referenceData.begin()));
 }
 
diff --git a/llvm/unittests/Support/JSONTest.cpp b/llvm/unittests/Support/JSONTest.cpp
index f28c99819d053..ecfd2a5fe1a09 100644
--- a/llvm/unittests/Support/JSONTest.cpp
+++ b/llvm/unittests/Support/JSONTest.cpp
@@ -138,9 +138,9 @@ TEST(JSONTest, Object) {
   EXPECT_FALSE(O.try_emplace("a", 4).second);
 
   auto D = O.find("d");
-  EXPECT_FALSE(D == O.end());
+  EXPECT_NE(D, O.end());
   auto E = O.find("e");
-  EXPECT_TRUE(E == O.end());
+  EXPECT_EQ(E, O.end());
 
   O.erase("b");
   O.erase(D);
diff --git a/llvm/unittests/Support/MemoryBufferTest.cpp b/llvm/unittests/Support/MemoryBufferTest.cpp
index c3e7b3c926a6c..bcd25021b5635 100644
--- a/llvm/unittests/Support/MemoryBufferTest.cpp
+++ b/llvm/unittests/Support/MemoryBufferTest.cpp
@@ -75,15 +75,15 @@ class MemoryBufferTest : public testing::Test {
 TEST_F(MemoryBufferTest, get) {
   // Default name and null-terminator flag
   OwningBuffer MB1(MemoryBuffer::getMemBuffer(data));
-  EXPECT_TRUE(nullptr != MB1.get());
+  EXPECT_NE(nullptr, MB1.get());
 
   // RequiresNullTerminator = false
   OwningBuffer MB2(MemoryBuffer::getMemBuffer(data, "one", false));
-  EXPECT_TRUE(nullptr != MB2.get());
+  EXPECT_NE(nullptr, MB2.get());
 
   // RequiresNullTerminator = true
   OwningBuffer MB3(MemoryBuffer::getMemBuffer(data, "two", true));
-  EXPECT_TRUE(nullptr != MB3.get());
+  EXPECT_NE(nullptr, MB3.get());
 
   // verify all 3 buffers point to the same address
   EXPECT_EQ(MB1->getBufferStart(), MB2->getBufferStart());
@@ -153,11 +153,11 @@ TEST_F(MemoryBufferTest, NullTerminator4K) {
 TEST_F(MemoryBufferTest, copy) {
   // copy with no name
   OwningBuffer MBC1(MemoryBuffer::getMemBufferCopy(data));
-  EXPECT_TRUE(nullptr != MBC1.get());
+  EXPECT_NE(nullptr, MBC1.get());
 
   // copy with a name
   OwningBuffer MBC2(MemoryBuffer::getMemBufferCopy(data, "copy"));
-  EXPECT_TRUE(nullptr != MBC2.get());
+  EXPECT_NE(nullptr, MBC2.get());
 
   // verify the two copies do not point to the same place
   EXPECT_NE(MBC1->getBufferStart(), MBC2->getBufferStart());
@@ -198,25 +198,25 @@ TEST_F(MemoryBufferTest, createFromPipe) {
 TEST_F(MemoryBufferTest, make_new) {
   // 0-sized buffer
   OwningBuffer Zero(WritableMemoryBuffer::getNewUninitMemBuffer(0));
-  EXPECT_TRUE(nullptr != Zero.get());
+  EXPECT_NE(nullptr, Zero.get());
 
   // uninitialized buffer with no name
   OwningBuffer One(WritableMemoryBuffer::getNewUninitMemBuffer(321));
-  EXPECT_TRUE(nullptr != One.get());
+  EXPECT_NE(nullptr, One.get());
 
   // uninitialized buffer with name
   OwningBuffer Two(WritableMemoryBuffer::getNewUninitMemBuffer(123, "bla"));
-  EXPECT_TRUE(nullptr != Two.get());
+  EXPECT_NE(nullptr, Two.get());
 
   // 0-initialized buffer with no name
   OwningBuffer Three(WritableMemoryBuffer::getNewMemBuffer(321, data));
-  EXPECT_TRUE(nullptr != Three.get());
+  EXPECT_NE(nullptr, Three.get());
   for (size_t i = 0; i < 321; ++i)
     EXPECT_EQ(0, Three->getBufferStart()[0]);
 
   // 0-initialized buffer with name
   OwningBuffer Four(WritableMemoryBuffer::getNewMemBuffer(123, "zeros"));
-  EXPECT_TRUE(nullptr != Four.get());
+  EXPECT_NE(nullptr, Four.get());
   for (size_t i = 0; i < 123; ++i)
     EXPECT_EQ(0, Four->getBufferStart()[0]);
 }
diff --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp
index e3fa5d0b2c491..b749448141f79 100644
--- a/llvm/unittests/Support/Path.cpp
+++ b/llvm/unittests/Support/Path.cpp
@@ -2315,7 +2315,7 @@ TEST_F(FileSystemTest, widenPath) {
   for (size_t i = 0; i < NumChars; ++i)
     Input += Pi;
   // Check that UTF-8 length already exceeds MAX_PATH.
-  EXPECT_TRUE(Input.size() > MAX_PATH);
+  EXPECT_GT(Input.size(), MAX_PATH);
   SmallVector<wchar_t, MAX_PATH + 16> Result;
   ASSERT_NO_ERROR(windows::widenPath(Input, Result));
   // Result should not start with the long path prefix.
diff --git a/llvm/unittests/Support/ProgramTest.cpp b/llvm/unittests/Support/ProgramTest.cpp
index d899026a358a0..fbbcd847e3c7a 100644
--- a/llvm/unittests/Support/ProgramTest.cpp
+++ b/llvm/unittests/Support/ProgramTest.cpp
@@ -287,8 +287,8 @@ TEST(ProgramTest, TestExecuteNegative) {
     bool ExecutionFailed;
     int RetCode = ExecuteAndWait(Executable, argv, llvm::None, {}, 0, 0, &Error,
                                  &ExecutionFailed);
-    ASSERT_TRUE(RetCode < 0) << "On error ExecuteAndWait should return 0 or "
-                                "positive value indicating the result code";
+    ASSERT_LT(RetCode, 0) << "On error ExecuteAndWait should return 0 or "
+                             "positive value indicating the result code";
     ASSERT_TRUE(ExecutionFailed);
     ASSERT_FALSE(Error.empty());
   }
diff --git a/llvm/unittests/Support/TarWriterTest.cpp b/llvm/unittests/Support/TarWriterTest.cpp
index 5a7d901a2e9d0..b5f072431fd15 100644
--- a/llvm/unittests/Support/TarWriterTest.cpp
+++ b/llvm/unittests/Support/TarWriterTest.cpp
@@ -68,7 +68,7 @@ static std::vector<uint8_t> createTar(StringRef Base, StringRef Filename) {
 
 static UstarHeader createUstar(StringRef Base, StringRef Filename) {
   std::vector<uint8_t> Buf = createTar(Base, Filename);
-  EXPECT_TRUE(Buf.size() >= sizeof(UstarHeader));
+  EXPECT_GE(Buf.size(), sizeof(UstarHeader));
   return *reinterpret_cast<const UstarHeader *>(Buf.data());
 }
 
@@ -112,7 +112,7 @@ TEST_F(TarWriterTest, LongFilename) {
 
 TEST_F(TarWriterTest, Pax) {
   std::vector<uint8_t> Buf = createTar("", std::string(200, 'x'));
-  EXPECT_TRUE(Buf.size() >= 1024);
+  EXPECT_GE(Buf.size(), 1024u);
 
   auto *Hdr = reinterpret_cast<const UstarHeader *>(Buf.data());
   EXPECT_EQ("", StringRef(Hdr->Prefix));
diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp
index 768ec83a0e126..a9a8de6d291a3 100644
--- a/llvm/unittests/Support/TargetParserTest.cpp
+++ b/llvm/unittests/Support/TargetParserTest.cpp
@@ -687,13 +687,13 @@ TEST(TargetParserTest, ARMExtensionFeatures) {
     Features.clear();
     ARM::getExtensionFeatures(E.first, Features);
     EXPECT_TRUE(llvm::is_contained(Features, E.second.at(0)));
-    EXPECT_TRUE(Extensions.size() == Features.size());
+    EXPECT_EQ(Extensions.size(), Features.size());
 
     // test -extension
     Features.clear();
     ARM::getExtensionFeatures(~E.first, Features);
     EXPECT_TRUE(llvm::is_contained(Features, E.second.at(1)));
-    EXPECT_TRUE(Extensions.size() == Features.size());
+    EXPECT_EQ(Extensions.size(), Features.size());
   }
 }
 
@@ -701,10 +701,12 @@ TEST(TargetParserTest, ARMFPUFeatures) {
   std::vector<StringRef> Features;
   for (ARM::FPUKind FK = static_cast<ARM::FPUKind>(0);
        FK <= ARM::FPUKind::FK_LAST;
-       FK = static_cast<ARM::FPUKind>(static_cast<unsigned>(FK) + 1))
-    EXPECT_TRUE((FK == ARM::FK_INVALID || FK >= ARM::FK_LAST)
-                    ? !ARM::getFPUFeatures(FK, Features)
-                    : ARM::getFPUFeatures(FK, Features));
+       FK = static_cast<ARM::FPUKind>(static_cast<unsigned>(FK) + 1)) {
+    if (FK == ARM::FK_INVALID || FK >= ARM::FK_LAST)
+      EXPECT_FALSE(ARM::getFPUFeatures(FK, Features));
+    else
+      EXPECT_TRUE(ARM::getFPUFeatures(FK, Features));
+  }
 }
 
 TEST(TargetParserTest, ARMArchExtFeature) {
@@ -1448,7 +1450,7 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
   EXPECT_TRUE(!Features.size());
 
   AArch64::getExtensionFeatures(ExtVal, Features);
-  EXPECT_TRUE(Extensions.size() == Features.size());
+  EXPECT_EQ(Extensions.size(), Features.size());
 
   EXPECT_TRUE(llvm::is_contained(Features, "+crc"));
   EXPECT_TRUE(llvm::is_contained(Features, "+crypto"));
@@ -1477,10 +1479,12 @@ TEST(TargetParserTest, AArch64ExtensionFeatures) {
 TEST(TargetParserTest, AArch64ArchFeatures) {
   std::vector<StringRef> Features;
 
-  for (auto AK : AArch64::ArchKinds)
-    EXPECT_TRUE((AK == AArch64::ArchKind::INVALID)
-                    ? !AArch64::getArchFeatures(AK, Features)
-                    : AArch64::getArchFeatures(AK, Features));
+  for (auto AK : AArch64::ArchKinds) {
+    if (AK == AArch64::ArchKind::INVALID)
+      EXPECT_FALSE(AArch64::getArchFeatures(AK, Features));
+    else
+      EXPECT_TRUE(AArch64::getArchFeatures(AK, Features));
+  }
 }
 
 TEST(TargetParserTest, AArch64ArchExtFeature) {
diff --git a/llvm/unittests/Support/TimerTest.cpp b/llvm/unittests/Support/TimerTest.cpp
index 9a9b67eaa9f78..09545eb6939ae 100644
--- a/llvm/unittests/Support/TimerTest.cpp
+++ b/llvm/unittests/Support/TimerTest.cpp
@@ -45,7 +45,7 @@ TEST(Timer, Additivity) {
   T1.stopTimer();
   auto TR2 = T1.getTotalTime();
 
-  EXPECT_TRUE(TR1 < TR2);
+  EXPECT_LT(TR1, TR2);
 }
 
 TEST(Timer, CheckIfTriggered) {
diff --git a/llvm/unittests/Support/UnicodeTest.cpp b/llvm/unittests/Support/UnicodeTest.cpp
index 6ce323dc8f380..09f1cb3e1ff66 100644
--- a/llvm/unittests/Support/UnicodeTest.cpp
+++ b/llvm/unittests/Support/UnicodeTest.cpp
@@ -95,9 +95,9 @@ TEST(Unicode, isPrintable) {
     UTF32 *Target32 = &buf32[0];
     auto status = ConvertUTF8toUTF32(&Target8, Target8 + 1, &Target32,
                                      Target32 + 1, strictConversion);
-    EXPECT_TRUE(status == conversionOK);
-    EXPECT_TRUE((columnWidthUTF8(reinterpret_cast<const char *>(buf8)) == 1) ==
-                (bool)isPrintable(buf32[0]));
+    EXPECT_EQ(status, conversionOK);
+    EXPECT_EQ((columnWidthUTF8(reinterpret_cast<const char *>(buf8)) == 1),
+              (bool)isPrintable(buf32[0]));
   }
 }
 
diff --git a/llvm/unittests/Support/VirtualFileSystemTest.cpp b/llvm/unittests/Support/VirtualFileSystemTest.cpp
index caae58f74f636..6b191c6401685 100644
--- a/llvm/unittests/Support/VirtualFileSystemTest.cpp
+++ b/llvm/unittests/Support/VirtualFileSystemTest.cpp
@@ -567,7 +567,8 @@ TEST(VirtualFileSystemTest, BasicRealFSRecursiveIteration) {
   for (const std::string &Name : Contents) {
     ASSERT_FALSE(Name.empty());
     int Index = Name[Name.size() - 1] - 'a';
-    ASSERT_TRUE(Index >= 0 && Index < 4);
+    ASSERT_GE(Index, 0);
+    ASSERT_LT(Index, 4);
     Counts[Index]++;
   }
   EXPECT_EQ(1, Counts[0]); // a
@@ -644,7 +645,8 @@ TEST(VirtualFileSystemTest, BasicRealFSRecursiveIterationNoPush) {
     for (const std::string &Name : Contents) {
       ASSERT_FALSE(Name.empty());
       int Index = Name[Name.size() - 1] - 'a';
-      ASSERT_TRUE(Index >= 0 && Index < 7);
+      ASSERT_GE(Index, 0);
+      ASSERT_LT(Index, 7);
       Counts[Index]++;
     }
     EXPECT_EQ(1, Counts[0]); // a
@@ -1183,9 +1185,9 @@ TEST_F(InMemoryFileSystemTest, AddHardLinkToFile) {
   FS.addFile(Target, 0, MemoryBuffer::getMemBuffer("content of target"));
   EXPECT_TRUE(FS.addHardLink(FromLink, Target));
   EXPECT_THAT(FromLink, IsHardLinkTo(&FS, Target));
-  EXPECT_TRUE(FS.status(FromLink)->getSize() == FS.status(Target)->getSize());
-  EXPECT_TRUE(FS.getBufferForFile(FromLink)->get()->getBuffer() ==
-              FS.getBufferForFile(Target)->get()->getBuffer());
+  EXPECT_EQ(FS.status(FromLink)->getSize(), FS.status(Target)->getSize());
+  EXPECT_EQ(FS.getBufferForFile(FromLink)->get()->getBuffer(),
+            FS.getBufferForFile(Target)->get()->getBuffer());
 }
 
 TEST_F(InMemoryFileSystemTest, AddHardLinkInChainPattern) {
@@ -1381,7 +1383,7 @@ TEST_F(VFSFromYAMLTest, MappedFiles) {
       "]\n"
       "}",
       Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   IntrusiveRefCntPtr<vfs::OverlayFileSystem> O(
       new vfs::OverlayFileSystem(Lower));
@@ -1474,7 +1476,7 @@ TEST_F(VFSFromYAMLTest, MappedRoot) {
                         "]\n"
                         "}",
                         Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   IntrusiveRefCntPtr<vfs::OverlayFileSystem> O(
       new vfs::OverlayFileSystem(Lower));
@@ -1522,7 +1524,7 @@ TEST_F(VFSFromYAMLTest, RemappedDirectoryOverlay) {
                         "              ]\n"
                         "}]}",
                         Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   IntrusiveRefCntPtr<vfs::OverlayFileSystem> O(
       new vfs::OverlayFileSystem(Lower));
@@ -1567,7 +1569,7 @@ TEST_F(VFSFromYAMLTest, RemappedDirectoryOverlayNoExternalNames) {
                         "              ]\n"
                         "}]}",
                         Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   ErrorOr<vfs::Status> S = FS->status("//root/foo");
   ASSERT_FALSE(S.getError());
@@ -1608,7 +1610,7 @@ TEST_F(VFSFromYAMLTest, RemappedDirectoryOverlayNoFallthrough) {
                         "              ]\n"
                         "}]}",
                         Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   ErrorOr<vfs::Status> S = Lower->status("//root/foo");
   ASSERT_FALSE(S.getError());
@@ -1752,7 +1754,7 @@ TEST_F(VFSFromYAMLTest, CaseInsensitive) {
       "              ]\n"
       "}]}",
       Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   IntrusiveRefCntPtr<vfs::OverlayFileSystem> O(
       new vfs::OverlayFileSystem(Lower));
@@ -1788,7 +1790,7 @@ TEST_F(VFSFromYAMLTest, CaseSensitive) {
       "              ]\n"
       "}]}",
       Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   IntrusiveRefCntPtr<vfs::OverlayFileSystem> O(
       new vfs::OverlayFileSystem(Lower));
@@ -1930,7 +1932,7 @@ TEST_F(VFSFromYAMLTest, UseExternalName) {
                         "  }\n"
                         "] }",
                         Lower);
-  ASSERT_TRUE(nullptr != FS.get());
+  ASSERT_NE(nullptr, FS.get());
 
   // default true
   EXPECT_EQ("//root/external/file", FS->status("//root/A")->getName());
@@ -1954,7 +1956,7 @@ TEST_F(VFSFromYAMLTest, UseExternalName) {
                          "  }\n"
                          "] }",
                          Lower);
-  ASSERT_TRUE(nullptr != FS.get());
+  ASSERT_NE(nullptr, FS.get());
 
   // default
   EXPECT_EQ("//root/A", FS->status("//root/A")->getName());
@@ -1974,7 +1976,7 @@ TEST_F(VFSFromYAMLTest, MultiComponentPath) {
                         "    'external-contents': '//root/other' }]\n"
                         "}",
                         Lower);
-  ASSERT_TRUE(nullptr != FS.get());
+  ASSERT_NE(nullptr, FS.get());
   EXPECT_FALSE(FS->status("//root/path/to/file").getError());
   EXPECT_FALSE(FS->status("//root/path/to").getError());
   EXPECT_FALSE(FS->status("//root/path").getError());
@@ -1988,7 +1990,7 @@ TEST_F(VFSFromYAMLTest, MultiComponentPath) {
       "                    'external-contents': '//root/other' }]}]\n"
       "}",
       Lower);
-  ASSERT_TRUE(nullptr != FS.get());
+  ASSERT_NE(nullptr, FS.get());
   EXPECT_FALSE(FS->status("//root/path/to/file").getError());
   EXPECT_FALSE(FS->status("//root/path/to").getError());
   EXPECT_FALSE(FS->status("//root/path").getError());
@@ -2002,7 +2004,7 @@ TEST_F(VFSFromYAMLTest, MultiComponentPath) {
       "                    'external-contents': '//root/other' }]}]\n"
       "}",
       Lower);
-  ASSERT_TRUE(nullptr != FS.get());
+  ASSERT_NE(nullptr, FS.get());
   EXPECT_FALSE(FS->status("//root/path/to/file").getError());
   EXPECT_FALSE(FS->status("//root/path/to").getError());
   EXPECT_FALSE(FS->status("//root/path").getError());
@@ -2021,7 +2023,7 @@ TEST_F(VFSFromYAMLTest, TrailingSlashes) {
       "                    'external-contents': '//root/other' }]}]\n"
       "}",
       Lower);
-  ASSERT_TRUE(nullptr != FS.get());
+  ASSERT_NE(nullptr, FS.get());
   EXPECT_FALSE(FS->status("//root/path/to/file").getError());
   EXPECT_FALSE(FS->status("//root/path/to").getError());
   EXPECT_FALSE(FS->status("//root/path").getError());
@@ -2057,7 +2059,7 @@ TEST_F(VFSFromYAMLTest, DirectoryIteration) {
       "]\n"
       "}",
       Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   IntrusiveRefCntPtr<vfs::OverlayFileSystem> O(
       new vfs::OverlayFileSystem(Lower));
@@ -2107,7 +2109,7 @@ TEST_F(VFSFromYAMLTest, DirectoryIterationSameDirMultipleEntries) {
       "]\n"
       "}",
       Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   IntrusiveRefCntPtr<vfs::OverlayFileSystem> O(
       new vfs::OverlayFileSystem(Lower));
@@ -2142,7 +2144,7 @@ TEST_F(VFSFromYAMLTest, RecursiveDirectoryIterationLevel) {
       "]\n"
       "}",
       Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   IntrusiveRefCntPtr<vfs::OverlayFileSystem> O(
       new vfs::OverlayFileSystem(Lower));
@@ -2238,7 +2240,7 @@ TEST_F(VFSFromYAMLTest, NonFallthroughDirectoryIteration) {
       "]\n"
       "}",
       Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   std::error_code EC;
   checkContents(FS->dir_begin("//root/", EC),
@@ -2266,7 +2268,7 @@ TEST_F(VFSFromYAMLTest, DirectoryIterationWithDuplicates) {
       "]\n"
       "}",
 	  Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   std::error_code EC;
   checkContents(FS->dir_begin("//root/", EC),
@@ -2295,7 +2297,7 @@ TEST_F(VFSFromYAMLTest, DirectoryIterationErrorInVFSLayer) {
       "]\n"
       "}",
       Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   std::error_code EC;
   checkContents(FS->dir_begin("//root/foo", EC),
@@ -2328,7 +2330,7 @@ TEST_F(VFSFromYAMLTest, GetRealPath) {
       "]\n"
       "}",
       Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   // Regular file present in underlying file system.
   SmallString<16> RealPath;
@@ -2370,7 +2372,7 @@ TEST_F(VFSFromYAMLTest, WorkingDirectory) {
       "]\n"
       "}",
       Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
   std::error_code EC = FS->setCurrentWorkingDirectory("//root/bar");
   ASSERT_FALSE(EC);
 
@@ -2439,10 +2441,10 @@ TEST_F(VFSFromYAMLTest, WorkingDirectoryFallthrough) {
       "]\n"
       "}",
       Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
   std::error_code EC = FS->setCurrentWorkingDirectory("//root/");
   ASSERT_FALSE(EC);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   llvm::ErrorOr<vfs::Status> Status = FS->status("bar/a");
   ASSERT_FALSE(Status.getError());
@@ -2511,10 +2513,10 @@ TEST_F(VFSFromYAMLTest, WorkingDirectoryFallthroughInvalid) {
       "]\n"
       "}",
       Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
   std::error_code EC = FS->setCurrentWorkingDirectory("//root/");
   ASSERT_FALSE(EC);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   llvm::ErrorOr<vfs::Status> Status = FS->status("bar/a");
   ASSERT_FALSE(Status.getError());
@@ -2548,10 +2550,10 @@ TEST_F(VFSFromYAMLTest, VirtualWorkingDirectory) {
       "]\n"
       "}",
       Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
   std::error_code EC = FS->setCurrentWorkingDirectory("//root/bar");
   ASSERT_FALSE(EC);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   llvm::ErrorOr<vfs::Status> Status = FS->status("a");
   ASSERT_FALSE(Status.getError());
@@ -2596,7 +2598,7 @@ TEST_F(VFSFromYAMLTest, YAMLVFSWriterTest) {
   Lower->addDirectory("//root/h");
 
   IntrusiveRefCntPtr<vfs::FileSystem> FS = getFromYAMLRawString(Buffer, Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   EXPECT_TRUE(FS->exists(_a.path()));
   EXPECT_TRUE(FS->exists(_ab.path()));
@@ -2636,7 +2638,7 @@ TEST_F(VFSFromYAMLTest, YAMLVFSWriterTest2) {
 
   IntrusiveRefCntPtr<ErrorDummyFileSystem> Lower(new ErrorDummyFileSystem());
   IntrusiveRefCntPtr<vfs::FileSystem> FS = getFromYAMLRawString(Buffer, Lower);
-  EXPECT_TRUE(FS.get() != nullptr);
+  EXPECT_NE(FS.get(), nullptr);
 }
 
 TEST_F(VFSFromYAMLTest, YAMLVFSWriterTest3) {
@@ -2669,7 +2671,7 @@ TEST_F(VFSFromYAMLTest, YAMLVFSWriterTest3) {
 
   IntrusiveRefCntPtr<ErrorDummyFileSystem> Lower(new ErrorDummyFileSystem());
   IntrusiveRefCntPtr<vfs::FileSystem> FS = getFromYAMLRawString(Buffer, Lower);
-  EXPECT_TRUE(FS.get() != nullptr);
+  EXPECT_NE(FS.get(), nullptr);
 }
 
 TEST_F(VFSFromYAMLTest, YAMLVFSWriterTestHandleDirs) {
@@ -2689,7 +2691,7 @@ TEST_F(VFSFromYAMLTest, YAMLVFSWriterTestHandleDirs) {
   OS.flush();
 
   // We didn't add a single file - only directories.
-  EXPECT_TRUE(Buffer.find("'type': 'file'") == std::string::npos);
+  EXPECT_EQ(Buffer.find("'type': 'file'"), std::string::npos);
 
   IntrusiveRefCntPtr<ErrorDummyFileSystem> Lower(new ErrorDummyFileSystem());
   Lower->addDirectory("//root/a");
@@ -2701,7 +2703,7 @@ TEST_F(VFSFromYAMLTest, YAMLVFSWriterTestHandleDirs) {
   Lower->addRegularFile("//root/c/c");
 
   IntrusiveRefCntPtr<vfs::FileSystem> FS = getFromYAMLRawString(Buffer, Lower);
-  ASSERT_TRUE(FS.get() != nullptr);
+  ASSERT_NE(FS.get(), nullptr);
 
   EXPECT_FALSE(FS->exists(_a.path("a")));
   EXPECT_FALSE(FS->exists(_b.path("b")));
diff --git a/llvm/unittests/Support/YAMLIOTest.cpp b/llvm/unittests/Support/YAMLIOTest.cpp
index 3e1efbc755011..a9230c2ca4767 100644
--- a/llvm/unittests/Support/YAMLIOTest.cpp
+++ b/llvm/unittests/Support/YAMLIOTest.cpp
@@ -398,8 +398,8 @@ TEST(YAMLIO, TestReadBuiltInTypes) {
   yin >> map;
 
   EXPECT_FALSE(yin.error());
-  EXPECT_TRUE(map.str.equals("hello there"));
-  EXPECT_TRUE(map.stdstr == "hello where?");
+  EXPECT_EQ(map.str, "hello there");
+  EXPECT_EQ(map.stdstr, "hello where?");
   EXPECT_EQ(map.u64, 5000000000ULL);
   EXPECT_EQ(map.u32, 4000000000U);
   EXPECT_EQ(map.u16, 65000);
@@ -454,23 +454,23 @@ TEST(YAMLIO, TestReadWriteBuiltInTypes) {
     yin >> map;
 
     EXPECT_FALSE(yin.error());
-    EXPECT_TRUE(map.str.equals("one two"));
-    EXPECT_TRUE(map.stdstr == "three four");
-    EXPECT_EQ(map.u64,      6000000000ULL);
-    EXPECT_EQ(map.u32,      3000000000U);
-    EXPECT_EQ(map.u16,      50000);
-    EXPECT_EQ(map.u8,       254);
-    EXPECT_EQ(map.b,        true);
-    EXPECT_EQ(map.s64,      -6000000000LL);
-    EXPECT_EQ(map.s32,      -2000000000L);
-    EXPECT_EQ(map.s16,      -32000);
-    EXPECT_EQ(map.s8,       -128);
-    EXPECT_EQ(map.f,        3.25);
-    EXPECT_EQ(map.d,        -2.8625);
-    EXPECT_EQ(map.h8,       Hex8(254));
-    EXPECT_EQ(map.h16,      Hex16(50000));
-    EXPECT_EQ(map.h32,      Hex32(3000000000U));
-    EXPECT_EQ(map.h64,      Hex64(6000000000LL));
+    EXPECT_EQ(map.str, "one two");
+    EXPECT_EQ(map.stdstr, "three four");
+    EXPECT_EQ(map.u64, 6000000000ULL);
+    EXPECT_EQ(map.u32, 3000000000U);
+    EXPECT_EQ(map.u16, 50000);
+    EXPECT_EQ(map.u8, 254);
+    EXPECT_EQ(map.b, true);
+    EXPECT_EQ(map.s64, -6000000000LL);
+    EXPECT_EQ(map.s32, -2000000000L);
+    EXPECT_EQ(map.s16, -32000);
+    EXPECT_EQ(map.s8, -128);
+    EXPECT_EQ(map.f, 3.25);
+    EXPECT_EQ(map.d, -2.8625);
+    EXPECT_EQ(map.h8, Hex8(254));
+    EXPECT_EQ(map.h16, Hex16(50000));
+    EXPECT_EQ(map.h32, Hex32(3000000000U));
+    EXPECT_EQ(map.h64, Hex64(6000000000LL));
   }
 }
 
@@ -785,18 +785,18 @@ TEST(YAMLIO, TestReadWriteStringTypes) {
     yin >> map;
 
     EXPECT_FALSE(yin.error());
-    EXPECT_TRUE(map.str1.equals("'aaa"));
-    EXPECT_TRUE(map.str2.equals("\"bbb"));
-    EXPECT_TRUE(map.str3.equals("`ccc"));
-    EXPECT_TRUE(map.str4.equals("@ddd"));
-    EXPECT_TRUE(map.str5.equals(""));
-    EXPECT_TRUE(map.str6.equals("0000000004000000"));
-    EXPECT_TRUE(map.stdstr1 == "'eee");
-    EXPECT_TRUE(map.stdstr2 == "\"fff");
-    EXPECT_TRUE(map.stdstr3 == "`ggg");
-    EXPECT_TRUE(map.stdstr4 == "@hhh");
-    EXPECT_TRUE(map.stdstr5 == "");
-    EXPECT_TRUE(map.stdstr6 == "0000000004000000");
+    EXPECT_EQ(map.str1, "'aaa");
+    EXPECT_EQ(map.str2, "\"bbb");
+    EXPECT_EQ(map.str3, "`ccc");
+    EXPECT_EQ(map.str4, "@ddd");
+    EXPECT_EQ(map.str5, "");
+    EXPECT_EQ(map.str6, "0000000004000000");
+    EXPECT_EQ(map.stdstr1, "'eee");
+    EXPECT_EQ(map.stdstr2, "\"fff");
+    EXPECT_EQ(map.stdstr3, "`ggg");
+    EXPECT_EQ(map.stdstr4, "@hhh");
+    EXPECT_EQ(map.stdstr5, "");
+    EXPECT_EQ(map.stdstr6, "0000000004000000");
     EXPECT_EQ(std::string("\0a\0b\0", 5), map.stdstr13);
   }
 }
@@ -2208,10 +2208,10 @@ TEST(YAMLIO, TestReadBuiltInTypesHex8Error) {
   yin2 >> seq2;
   EXPECT_TRUE(!!yin2.error());
 
-  EXPECT_TRUE(seq.size() == 3);
-  EXPECT_TRUE(seq.size() == seq2.size());
+  EXPECT_EQ(seq.size(), 3u);
+  EXPECT_EQ(seq.size(), seq2.size());
   for (size_t i = 0; i < seq.size(); ++i)
-    EXPECT_TRUE(seq[i] == seq2[i]);
+    EXPECT_EQ(seq[i], seq2[i]);
 }
 
 
@@ -2238,10 +2238,10 @@ TEST(YAMLIO, TestReadBuiltInTypesHex16Error) {
   yin2 >> seq2;
   EXPECT_TRUE(!!yin2.error());
 
-  EXPECT_TRUE(seq.size() == 3);
-  EXPECT_TRUE(seq.size() == seq2.size());
+  EXPECT_EQ(seq.size(), 3u);
+  EXPECT_EQ(seq.size(), seq2.size());
   for (size_t i = 0; i < seq.size(); ++i)
-    EXPECT_TRUE(seq[i] == seq2[i]);
+    EXPECT_EQ(seq[i], seq2[i]);
 }
 
 //
@@ -2268,10 +2268,10 @@ TEST(YAMLIO, TestReadBuiltInTypesHex32Error) {
   yin2 >> seq2;
   EXPECT_TRUE(!!yin2.error());
 
-  EXPECT_TRUE(seq.size() == 3);
-  EXPECT_TRUE(seq.size() == seq2.size());
+  EXPECT_EQ(seq.size(), 3u);
+  EXPECT_EQ(seq.size(), seq2.size());
   for (size_t i = 0; i < seq.size(); ++i)
-    EXPECT_TRUE(seq[i] == seq2[i]);
+    EXPECT_EQ(seq[i], seq2[i]);
 }
 
 //
@@ -2297,10 +2297,10 @@ TEST(YAMLIO, TestReadBuiltInTypesHex64Error) {
   yin2 >> seq2;
   EXPECT_TRUE(!!yin2.error());
 
-  EXPECT_TRUE(seq.size() == 3);
-  EXPECT_TRUE(seq.size() == seq2.size());
+  EXPECT_EQ(seq.size(), 3u);
+  EXPECT_EQ(seq.size(), seq2.size());
   for (size_t i = 0; i < seq.size(); ++i)
-    EXPECT_TRUE(seq[i] == seq2[i]);
+    EXPECT_EQ(seq[i], seq2[i]);
 }
 
 TEST(YAMLIO, TestMalformedMapFailsGracefully) {
diff --git a/llvm/unittests/Support/YAMLParserTest.cpp b/llvm/unittests/Support/YAMLParserTest.cpp
index adda6ce9f5c30..692d963828318 100644
--- a/llvm/unittests/Support/YAMLParserTest.cpp
+++ b/llvm/unittests/Support/YAMLParserTest.cpp
@@ -269,9 +269,9 @@ TEST(YAMLParser, SameNodeIteratorOperatorNotEquals) {
   auto Begin = Node->begin();
   auto End = Node->end();
 
-  EXPECT_TRUE(Begin != End);
-  EXPECT_FALSE(Begin != Begin);
-  EXPECT_FALSE(End != End);
+  EXPECT_NE(Begin, End);
+  EXPECT_EQ(Begin, Begin);
+  EXPECT_EQ(End, End);
 }
 
 TEST(YAMLParser, SameNodeIteratorOperatorEquals) {
@@ -284,9 +284,9 @@ TEST(YAMLParser, SameNodeIteratorOperatorEquals) {
   auto Begin = Node->begin();
   auto End = Node->end();
 
-  EXPECT_FALSE(Begin == End);
-  EXPECT_TRUE(Begin == Begin);
-  EXPECT_TRUE(End == End);
+  EXPECT_NE(Begin, End);
+  EXPECT_EQ(Begin, Begin);
+  EXPECT_EQ(End, End);
 }
 
 TEST(YAMLParser, DifferentNodesIteratorOperatorNotEquals) {
@@ -305,9 +305,9 @@ TEST(YAMLParser, DifferentNodesIteratorOperatorNotEquals) {
   auto AnotherBegin = AnotherNode->begin();
   auto AnotherEnd = AnotherNode->end();
 
-  EXPECT_TRUE(Begin != AnotherBegin);
-  EXPECT_TRUE(Begin != AnotherEnd);
-  EXPECT_FALSE(End != AnotherEnd);
+  EXPECT_NE(Begin, AnotherBegin);
+  EXPECT_NE(Begin, AnotherEnd);
+  EXPECT_EQ(End, AnotherEnd);
 }
 
 TEST(YAMLParser, DifferentNodesIteratorOperatorEquals) {
@@ -326,9 +326,9 @@ TEST(YAMLParser, DifferentNodesIteratorOperatorEquals) {
   auto AnotherBegin = AnotherNode->begin();
   auto AnotherEnd = AnotherNode->end();
 
-  EXPECT_FALSE(Begin == AnotherBegin);
-  EXPECT_FALSE(Begin == AnotherEnd);
-  EXPECT_TRUE(End == AnotherEnd);
+  EXPECT_NE(Begin, AnotherBegin);
+  EXPECT_NE(Begin, AnotherEnd);
+  EXPECT_EQ(End, AnotherEnd);
 }
 
 TEST(YAMLParser, FlowSequenceTokensOutsideFlowSequence) {
diff --git a/llvm/unittests/Support/raw_ostream_test.cpp b/llvm/unittests/Support/raw_ostream_test.cpp
index 5125af8ce8a4b..8c695300b745d 100644
--- a/llvm/unittests/Support/raw_ostream_test.cpp
+++ b/llvm/unittests/Support/raw_ostream_test.cpp
@@ -469,7 +469,7 @@ TEST(raw_ostreamTest, reserve_stream) {
   OS << "11111111111111111111";
   uint64_t CurrentPos = OS.tell();
   OS.reserveExtraSpace(1000);
-  EXPECT_TRUE(Str.capacity() >= CurrentPos + 1000);
+  EXPECT_GE(Str.capacity(), CurrentPos + 1000);
   OS << "hello";
   OS << 1;
   OS << 'w' << 'o' << 'r' << 'l' << 'd';

From 51c53a0791cd2794365cab9917922ce1e324b379 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Fri, 21 Jan 2022 14:18:17 +0100
Subject: [PATCH 177/946] Add apple-specific missing include

---
 llvm/lib/Support/PrettyStackTrace.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Support/PrettyStackTrace.cpp b/llvm/lib/Support/PrettyStackTrace.cpp
index 5d3335d001f31..fa91405fee10a 100644
--- a/llvm/lib/Support/PrettyStackTrace.cpp
+++ b/llvm/lib/Support/PrettyStackTrace.cpp
@@ -20,6 +20,10 @@
 #include "llvm/Support/Watchdog.h"
 #include "llvm/Support/raw_ostream.h"
 
+#ifdef __APPLE__
+#include "llvm/ADT/SmallString.h"
+#endif
+
 #include <atomic>
 #include <cassert>
 #include <cstdarg>

From 2a9e33db4f0a558572309b29a7d247185b4c21d1 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Fri, 21 Jan 2022 14:28:47 +0100
Subject: [PATCH 178/946] Add ms-specific missing header in
 Support/InitLLVM.cpp

---
 llvm/lib/Support/InitLLVM.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Support/InitLLVM.cpp b/llvm/lib/Support/InitLLVM.cpp
index 8c6f86f68fa24..2b7173b289403 100644
--- a/llvm/lib/Support/InitLLVM.cpp
+++ b/llvm/lib/Support/InitLLVM.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Support/SwapByteOrder.h"
 
 #ifdef _WIN32
+#include "llvm/Support/Error.h"
 #include "llvm/Support/Windows/WindowsSupport.h"
 #endif
 

From 622354a522073b0a048a88c957b161fb376a40eb Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda@apple.com>
Date: Wed, 12 Jan 2022 16:09:58 +0100
Subject: [PATCH 179/946] [llvm][ADT] Implement `BitVector::{pop_,}back`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LLVM Programmer’s Manual strongly discourages the use of `std::vector<bool>` and suggests `llvm::BitVector` as a possible replacement.

Currently, some users of `std::vector<bool>` cannot switch to `llvm::BitVector` because it doesn't implement the `pop_back()` and `back()` functions.

To enable easy transition of `std::vector<bool>` users, this patch implements `llvm::BitVector::pop_back()` and `llvm::BitVector::back()`.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D117115
---
 .../readability/FunctionSizeCheck.cpp         |  3 ++-
 clang-tools-extra/clangd/SourceCode.cpp       |  3 ++-
 clang/lib/Format/UnwrappedLineParser.cpp      |  4 +--
 clang/lib/Format/UnwrappedLineParser.h        |  3 ++-
 llvm/include/llvm/ADT/BitVector.h             | 12 +++++++++
 llvm/include/llvm/ADT/SmallBitVector.h        | 12 +++++++++
 llvm/lib/MC/MCParser/MasmParser.cpp           |  3 ++-
 llvm/unittests/ADT/BitVectorTest.cpp          | 26 +++++++++++++++++++
 8 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.cpp b/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.cpp
index d98bec2ebdf1f..9d3300d2787ca 100644
--- a/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/FunctionSizeCheck.cpp
@@ -9,6 +9,7 @@
 #include "FunctionSizeCheck.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "llvm/ADT/BitVector.h"
 
 using namespace clang::ast_matchers;
 
@@ -118,7 +119,7 @@ class FunctionASTVisitor : public RecursiveASTVisitor<FunctionASTVisitor> {
     std::vector<SourceLocation> NestingThresholders;
   };
   FunctionInfo Info;
-  std::vector<bool> TrackedParent;
+  llvm::BitVector TrackedParent;
   unsigned StructNesting = 0;
   unsigned CurrentNestingLevel = 0;
 };
diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp
index 6f6d936ac3a7e..e005fe4b37361 100644
--- a/clang-tools-extra/clangd/SourceCode.cpp
+++ b/clang-tools-extra/clangd/SourceCode.cpp
@@ -27,6 +27,7 @@
 #include "clang/Tooling/Core/Replacement.h"
 #include "clang/Tooling/Syntax/Tokens.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
@@ -663,7 +664,7 @@ void parseNamespaceEvents(llvm::StringRef Code, const LangOptions &LangOpts,
   // Stack of enclosing namespaces, e.g. {"clang", "clangd"}
   std::vector<std::string> Enclosing; // Contains e.g. "clang", "clangd"
   // Stack counts open braces. true if the brace opened a namespace.
-  std::vector<bool> BraceStack;
+  llvm::BitVector BraceStack;
 
   enum {
     Default,
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index f466111260962..67b7b3937b07e 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -58,7 +58,7 @@ namespace {
 
 class ScopedDeclarationState {
 public:
-  ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
+  ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
                          bool MustBeDeclaration)
       : Line(Line), Stack(Stack) {
     Line.MustBeDeclaration = MustBeDeclaration;
@@ -74,7 +74,7 @@ class ScopedDeclarationState {
 
 private:
   UnwrappedLine &Line;
-  std::vector<bool> &Stack;
+  llvm::BitVector &Stack;
 };
 
 static bool isLineComment(const FormatToken &FormatTok) {
diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h
index 8d4d4dca7633f..3f64d57c7bff7 100644
--- a/clang/lib/Format/UnwrappedLineParser.h
+++ b/clang/lib/Format/UnwrappedLineParser.h
@@ -18,6 +18,7 @@
 #include "FormatToken.h"
 #include "clang/Basic/IdentifierTable.h"
 #include "clang/Format/Format.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/Support/Regex.h"
 #include <stack>
 #include <vector>
@@ -231,7 +232,7 @@ class UnwrappedLineParser {
 
   // We store for each line whether it must be a declaration depending on
   // whether we are in a compound statement or not.
-  std::vector<bool> DeclarationScopeStack;
+  llvm::BitVector DeclarationScopeStack;
 
   const FormatStyle &Style;
   const AdditionalKeywords &Keywords;
diff --git a/llvm/include/llvm/ADT/BitVector.h b/llvm/include/llvm/ADT/BitVector.h
index cd1964cbdd98d..fff4a8f578d2a 100644
--- a/llvm/include/llvm/ADT/BitVector.h
+++ b/llvm/include/llvm/ADT/BitVector.h
@@ -444,6 +444,12 @@ class BitVector {
     return (Bits[Idx / BITWORD_SIZE] & Mask) != 0;
   }
 
+  /// Return the last element in the vector.
+  bool back() const {
+    assert(!empty() && "Getting last element of empty vector.");
+    return (*this)[size() - 1];
+  }
+
   bool test(unsigned Idx) const {
     return (*this)[Idx];
   }
@@ -465,6 +471,12 @@ class BitVector {
       set(OldSize);
   }
 
+  /// Pop one bit from the end of the vector.
+  void pop_back() {
+    assert(!empty() && "Empty vector has no element to pop.");
+    resize(size() - 1);
+  }
+
   /// Test if any common bits are set.
   bool anyCommon(const BitVector &RHS) const {
     unsigned ThisWords = Bits.size();
diff --git a/llvm/include/llvm/ADT/SmallBitVector.h b/llvm/include/llvm/ADT/SmallBitVector.h
index 51ee5dbbce05a..17be317a10d72 100644
--- a/llvm/include/llvm/ADT/SmallBitVector.h
+++ b/llvm/include/llvm/ADT/SmallBitVector.h
@@ -462,6 +462,12 @@ class SmallBitVector {
     return getPointer()->operator[](Idx);
   }
 
+  /// Return the last element in the vector.
+  bool back() const {
+    assert(!empty() && "Getting last element of empty vector.");
+    return (*this)[size() - 1];
+  }
+
   bool test(unsigned Idx) const {
     return (*this)[Idx];
   }
@@ -471,6 +477,12 @@ class SmallBitVector {
     resize(size() + 1, Val);
   }
 
+  /// Pop one bit from the end of the vector.
+  void pop_back() {
+    assert(!empty() && "Empty vector has no element to pop.");
+    resize(size() - 1);
+  }
+
   /// Test if any common bits are set.
   bool anyCommon(const SmallBitVector &RHS) const {
     if (isSmall() && RHS.isSmall())
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index e2dfd339e93e2..a888e830182fb 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -13,6 +13,7 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
@@ -379,7 +380,7 @@ class MasmParser : public MCAsmParser {
   /// time of assembly
   struct tm TM;
 
-  std::vector<bool> EndStatementAtEOFStack;
+  BitVector EndStatementAtEOFStack;
 
   AsmCond TheCondState;
   std::vector<AsmCond> TheCondStack;
diff --git a/llvm/unittests/ADT/BitVectorTest.cpp b/llvm/unittests/ADT/BitVectorTest.cpp
index 7ab4ab9a9d06e..ce043d47b97f1 100644
--- a/llvm/unittests/ADT/BitVectorTest.cpp
+++ b/llvm/unittests/ADT/BitVectorTest.cpp
@@ -1171,21 +1171,25 @@ TYPED_TEST(BitVectorTest, PushBack) {
   EXPECT_EQ(-1, Vec.find_first());
   EXPECT_EQ(10U, Vec.size());
   EXPECT_EQ(0U, Vec.count());
+  EXPECT_EQ(false, Vec.back());
 
   Vec.push_back(true);
   EXPECT_EQ(10, Vec.find_first());
   EXPECT_EQ(11U, Vec.size());
   EXPECT_EQ(1U, Vec.count());
+  EXPECT_EQ(true, Vec.back());
 
   Vec.push_back(false);
   EXPECT_EQ(10, Vec.find_first());
   EXPECT_EQ(12U, Vec.size());
   EXPECT_EQ(1U, Vec.count());
+  EXPECT_EQ(false, Vec.back());
 
   Vec.push_back(true);
   EXPECT_EQ(10, Vec.find_first());
   EXPECT_EQ(13U, Vec.size());
   EXPECT_EQ(2U, Vec.count());
+  EXPECT_EQ(true, Vec.back());
 
   // Add a lot of values to cause reallocation.
   for (int i = 0; i != 100; ++i) {
@@ -1197,6 +1201,28 @@ TYPED_TEST(BitVectorTest, PushBack) {
   EXPECT_EQ(102U, Vec.count());
 }
 
+TYPED_TEST(BitVectorTest, PopBack) {
+  TypeParam Vec(10, true);
+  EXPECT_EQ(10U, Vec.size());
+  EXPECT_EQ(10U, Vec.count());
+  EXPECT_EQ(true, Vec.back());
+
+  Vec.pop_back();
+  EXPECT_EQ(9U, Vec.size());
+  EXPECT_EQ(9U, Vec.count());
+  EXPECT_EQ(true, Vec.back());
+
+  Vec.push_back(false);
+  EXPECT_EQ(10U, Vec.size());
+  EXPECT_EQ(9U, Vec.count());
+  EXPECT_EQ(false, Vec.back());
+
+  Vec.pop_back();
+  EXPECT_EQ(9U, Vec.size());
+  EXPECT_EQ(9U, Vec.count());
+  EXPECT_EQ(true, Vec.back());
+}
+
 TYPED_TEST(BitVectorTest, DenseSet) {
   DenseSet<TypeParam> Set;
   TypeParam A(10, true);

From 7e3bcae5069fdb13ba6241b726d3a3912287784e Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Fri, 21 Jan 2022 14:59:11 +0100
Subject: [PATCH 180/946] Add apple-specific missing header in
 Support/GraphWriter.cpp

---
 llvm/lib/Support/GraphWriter.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/lib/Support/GraphWriter.cpp b/llvm/lib/Support/GraphWriter.cpp
index 6e6d79b225ac8..e875e18a7e92e 100644
--- a/llvm/lib/Support/GraphWriter.cpp
+++ b/llvm/lib/Support/GraphWriter.cpp
@@ -25,6 +25,11 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/raw_ostream.h"
+
+#ifdef __APPLE__
+#include "llvm/Support/CommandLine.h"
+#endif
+
 #include <string>
 #include <system_error>
 #include <vector>

From d5ae039ed7b84bf767d15417a3e9bf61f982257b Mon Sep 17 00:00:00 2001
From: Kai Nacke <kai.nacke@de.ibm.com>
Date: Thu, 20 Jan 2022 14:28:18 -0500
Subject: [PATCH 181/946] [SystemZ] Properly register machine passes.

Registering the passes enables use of -stop-before=/-stop-after
options.

Reviewed By: uweigand

Differential Revision: https://reviews.llvm.org/D117823
---
 llvm/lib/Target/SystemZ/SystemZ.h               | 10 ++++++++++
 llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp | 10 +---------
 llvm/lib/Target/SystemZ/SystemZElimCompare.cpp  | 12 ++++++------
 llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp    | 12 ++++++------
 llvm/lib/Target/SystemZ/SystemZLongBranch.cpp   | 12 +++++++-----
 llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp  | 10 +---------
 llvm/lib/Target/SystemZ/SystemZShortenInst.cpp  | 17 +++++++++--------
 llvm/lib/Target/SystemZ/SystemZTDC.cpp          |  4 ----
 .../lib/Target/SystemZ/SystemZTargetMachine.cpp |  8 ++++++++
 9 files changed, 48 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZ.h b/llvm/lib/Target/SystemZ/SystemZ.h
index bedbd061ea5c1..5be19f0e3b467 100644
--- a/llvm/lib/Target/SystemZ/SystemZ.h
+++ b/llvm/lib/Target/SystemZ/SystemZ.h
@@ -20,6 +20,7 @@
 namespace llvm {
 class SystemZTargetMachine;
 class FunctionPass;
+class PassRegistry;
 
 namespace SystemZ {
 // Condition-code mask values.
@@ -196,6 +197,15 @@ FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZPostRewritePass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZTDCPass();
+
+void initializeSystemZElimComparePass(PassRegistry &);
+void initializeSystemZShortenInstPass(PassRegistry &);
+void initializeSystemZLongBranchPass(PassRegistry &);
+void initializeSystemZLDCleanupPass(PassRegistry &);
+void initializeSystemZCopyPhysRegsPass(PassRegistry &);
+void initializeSystemZPostRewritePass(PassRegistry &);
+void initializeSystemZTDCPassPass(PassRegistry &);
+
 } // end namespace llvm
 
 #endif
diff --git a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp
index 7d21d29d270e3..763aa8c0e41fd 100644
--- a/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZCopyPhysRegs.cpp
@@ -25,12 +25,6 @@
 
 using namespace llvm;
 
-#define SYSTEMZ_COPYPHYSREGS_NAME "SystemZ Copy Physregs"
-
-namespace llvm {
-  void initializeSystemZCopyPhysRegsPass(PassRegistry&);
-}
-
 namespace {
 
 class SystemZCopyPhysRegs : public MachineFunctionPass {
@@ -41,8 +35,6 @@ class SystemZCopyPhysRegs : public MachineFunctionPass {
     initializeSystemZCopyPhysRegsPass(*PassRegistry::getPassRegistry());
   }
 
-  StringRef getPassName() const override { return SYSTEMZ_COPYPHYSREGS_NAME; }
-
   bool runOnMachineFunction(MachineFunction &MF) override;
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
@@ -59,7 +51,7 @@ char SystemZCopyPhysRegs::ID = 0;
 } // end anonymous namespace
 
 INITIALIZE_PASS(SystemZCopyPhysRegs, "systemz-copy-physregs",
-                SYSTEMZ_COPYPHYSREGS_NAME, false, false)
+                "SystemZ Copy Physregs", false, false)
 
 FunctionPass *llvm::createSystemZCopyPhysRegsPass(SystemZTargetMachine &TM) {
   return new SystemZCopyPhysRegs();
diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 631cbff303e83..4893acc813352 100644
--- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -65,11 +65,8 @@ class SystemZElimCompare : public MachineFunctionPass {
 public:
   static char ID;
 
-  SystemZElimCompare(const SystemZTargetMachine &tm)
-    : MachineFunctionPass(ID) {}
-
-  StringRef getPassName() const override {
-    return "SystemZ Comparison Elimination";
+  SystemZElimCompare() : MachineFunctionPass(ID) {
+    initializeSystemZElimComparePass(*PassRegistry::getPassRegistry());
   }
 
   bool processBlock(MachineBasicBlock &MBB);
@@ -106,6 +103,9 @@ char SystemZElimCompare::ID = 0;
 
 } // end anonymous namespace
 
+INITIALIZE_PASS(SystemZElimCompare, DEBUG_TYPE,
+                "SystemZ Comparison Elimination", false, false)
+
 // Returns true if MI is an instruction whose output equals the value in Reg.
 static bool preservesValueOf(MachineInstr &MI, unsigned Reg) {
   switch (MI.getOpcode()) {
@@ -746,5 +746,5 @@ bool SystemZElimCompare::runOnMachineFunction(MachineFunction &F) {
 }
 
 FunctionPass *llvm::createSystemZElimComparePass(SystemZTargetMachine &TM) {
-  return new SystemZElimCompare(TM);
+  return new SystemZElimCompare();
 }
diff --git a/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp b/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
index 06d893d043e9d..d6c7959854485 100644
--- a/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -29,11 +29,8 @@ namespace {
 class SystemZLDCleanup : public MachineFunctionPass {
 public:
   static char ID;
-  SystemZLDCleanup(const SystemZTargetMachine &tm)
-    : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {}
-
-  StringRef getPassName() const override {
-    return "SystemZ Local Dynamic TLS Access Clean-up";
+  SystemZLDCleanup() : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {
+    initializeSystemZLDCleanupPass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
@@ -52,8 +49,11 @@ char SystemZLDCleanup::ID = 0;
 
 } // end anonymous namespace
 
+INITIALIZE_PASS(SystemZLDCleanup, "systemz-ld-cleanup",
+                "SystemZ Local Dynamic TLS Access Clean-up", false, false)
+
 FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) {
-  return new SystemZLDCleanup(TM);
+  return new SystemZLDCleanup();
 }
 
 void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const {
diff --git a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
index 9c985c16f0829..d53693154d404 100644
--- a/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -135,10 +135,9 @@ class SystemZLongBranch : public MachineFunctionPass {
 public:
   static char ID;
 
-  SystemZLongBranch(const SystemZTargetMachine &tm)
-    : MachineFunctionPass(ID) {}
-
-  StringRef getPassName() const override { return "SystemZ Long Branch"; }
+  SystemZLongBranch() : MachineFunctionPass(ID) {
+    initializeSystemZLongBranchPass(*PassRegistry::getPassRegistry());
+  }
 
   bool runOnMachineFunction(MachineFunction &F) override;
 
@@ -174,6 +173,9 @@ const uint64_t MaxForwardRange = 0xfffe;
 
 } // end anonymous namespace
 
+INITIALIZE_PASS(SystemZLongBranch, DEBUG_TYPE, "SystemZ Long Branch", false,
+                false)
+
 // Position describes the state immediately before Block.  Update Block
 // accordingly and move Position to the end of the block's non-terminator
 // instructions.
@@ -481,5 +483,5 @@ bool SystemZLongBranch::runOnMachineFunction(MachineFunction &F) {
 }
 
 FunctionPass *llvm::createSystemZLongBranchPass(SystemZTargetMachine &TM) {
-  return new SystemZLongBranch(TM);
+  return new SystemZLongBranch();
 }
diff --git a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp
index aaa7f8fc88f50..4b95d0d67389d 100644
--- a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp
@@ -21,16 +21,10 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 using namespace llvm;
 
-#define SYSTEMZ_POSTREWRITE_NAME "SystemZ Post Rewrite pass"
-
 #define DEBUG_TYPE "systemz-postrewrite"
 STATISTIC(MemFoldCopies, "Number of copies inserted before folded mem ops.");
 STATISTIC(LOCRMuxJumps, "Number of LOCRMux jump-sequences (lower is better)");
 
-namespace llvm {
-  void initializeSystemZPostRewritePass(PassRegistry&);
-}
-
 namespace {
 
 class SystemZPostRewrite : public MachineFunctionPass {
@@ -44,8 +38,6 @@ class SystemZPostRewrite : public MachineFunctionPass {
 
   bool runOnMachineFunction(MachineFunction &Fn) override;
 
-  StringRef getPassName() const override { return SYSTEMZ_POSTREWRITE_NAME; }
-
 private:
   void selectLOCRMux(MachineBasicBlock &MBB,
                      MachineBasicBlock::iterator MBBI,
@@ -70,7 +62,7 @@ char SystemZPostRewrite::ID = 0;
 } // end anonymous namespace
 
 INITIALIZE_PASS(SystemZPostRewrite, "systemz-post-rewrite",
-                SYSTEMZ_POSTREWRITE_NAME, false, false)
+                "SystemZ Post Rewrite pass", false, false)
 
 /// Returns an instance of the Post Rewrite pass.
 FunctionPass *llvm::createSystemZPostRewritePass(SystemZTargetMachine &TM) {
diff --git a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index 254e5e92449b2..92930dad80ef8 100644
--- a/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -26,11 +26,7 @@ namespace {
 class SystemZShortenInst : public MachineFunctionPass {
 public:
   static char ID;
-  SystemZShortenInst(const SystemZTargetMachine &tm);
-
-  StringRef getPassName() const override {
-    return "SystemZ Instruction Shortening";
-  }
+  SystemZShortenInst();
 
   bool processBlock(MachineBasicBlock &MBB);
   bool runOnMachineFunction(MachineFunction &F) override;
@@ -56,12 +52,17 @@ class SystemZShortenInst : public MachineFunctionPass {
 char SystemZShortenInst::ID = 0;
 } // end anonymous namespace
 
+INITIALIZE_PASS(SystemZShortenInst, DEBUG_TYPE,
+                "SystemZ Instruction Shortening", false, false)
+
 FunctionPass *llvm::createSystemZShortenInstPass(SystemZTargetMachine &TM) {
-  return new SystemZShortenInst(TM);
+  return new SystemZShortenInst();
 }
 
-SystemZShortenInst::SystemZShortenInst(const SystemZTargetMachine &tm)
-  : MachineFunctionPass(ID), TII(nullptr) {}
+SystemZShortenInst::SystemZShortenInst()
+    : MachineFunctionPass(ID), TII(nullptr) {
+  initializeSystemZShortenInstPass(*PassRegistry::getPassRegistry());
+}
 
 // Tie operands if MI has become a two-address instruction.
 static void tieOpsIfNeeded(MachineInstr &MI) {
diff --git a/llvm/lib/Target/SystemZ/SystemZTDC.cpp b/llvm/lib/Target/SystemZ/SystemZTDC.cpp
index 7cb7dca2ea28b..f62afb8ddfcfa 100644
--- a/llvm/lib/Target/SystemZ/SystemZTDC.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTDC.cpp
@@ -61,10 +61,6 @@
 
 using namespace llvm;
 
-namespace llvm {
-  void initializeSystemZTDCPassPass(PassRegistry&);
-}
-
 namespace {
 
 class SystemZTDCPass : public FunctionPass {
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index deb3358102ede..f1469fe8f56b7 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -32,6 +32,14 @@ using namespace llvm;
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeSystemZTarget() {
   // Register the target.
   RegisterTargetMachine<SystemZTargetMachine> X(getTheSystemZTarget());
+  auto &PR = *PassRegistry::getPassRegistry();
+  initializeSystemZElimComparePass(PR);
+  initializeSystemZShortenInstPass(PR);
+  initializeSystemZLongBranchPass(PR);
+  initializeSystemZLDCleanupPass(PR);
+  initializeSystemZShortenInstPass(PR);
+  initializeSystemZPostRewritePass(PR);
+  initializeSystemZTDCPassPass(PR);
 }
 
 // Determine whether we use the vector ABI.

From e9211e03937751ab75bbb34e38acc330b85fb0d8 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Thu, 20 Jan 2022 12:04:42 +0100
Subject: [PATCH 182/946] Remove dependency from raw_ostream on <chrono>

The tryLockFor method from raw_fd_sotreamis the sole user of that
header, and it's not referenced in the mono repo. I still chose to keep
it (may be useful for downstream user) but added a transient type that's
forward declared to hold the duration parameter.

Notable changes:

- "llvm/Support/Duration.h" must be included in order to use tryLockFor.
- "llvm/Support/raw_ostream.h" no longer includes <chrono>

This sole change has an interesting impact on the number of processed
line, as measured by:

clang++ -E  -Iinclude -I../llvm/include ../llvm/lib/Support/*.cpp -std=c++14 -fno-rtti -fno-exceptions | wc -l

before: 7917500
after:  7835142

Discourse thread on the topic: https://llvm.discourse.group/t/include-what-you-use-include-cleanup/5831
---
 lldb/tools/lldb-vscode/FifoFiles.h        |  2 ++
 llvm/include/llvm/Debuginfod/Debuginfod.h |  2 ++
 llvm/include/llvm/Debuginfod/HTTPClient.h |  2 ++
 llvm/include/llvm/Support/Duration.h      | 28 +++++++++++++++++++++++
 llvm/include/llvm/Support/raw_ostream.h   |  4 ++--
 llvm/lib/Support/raw_ostream.cpp          |  5 ++--
 llvm/unittests/Support/Path.cpp           |  1 +
 7 files changed, 40 insertions(+), 4 deletions(-)
 create mode 100644 llvm/include/llvm/Support/Duration.h

diff --git a/lldb/tools/lldb-vscode/FifoFiles.h b/lldb/tools/lldb-vscode/FifoFiles.h
index f186f65e86c43..a0c4562b5a6b7 100644
--- a/lldb/tools/lldb-vscode/FifoFiles.h
+++ b/lldb/tools/lldb-vscode/FifoFiles.h
@@ -14,6 +14,8 @@
 
 #include "JSONUtils.h"
 
+#include <chrono>
+
 namespace lldb_vscode {
 
 /// Struct that controls the life of a fifo file in the filesystem.
diff --git a/llvm/include/llvm/Debuginfod/Debuginfod.h b/llvm/include/llvm/Debuginfod/Debuginfod.h
index fcb8ed3a9222b..064cfa75b1a1b 100644
--- a/llvm/include/llvm/Debuginfod/Debuginfod.h
+++ b/llvm/include/llvm/Debuginfod/Debuginfod.h
@@ -23,6 +23,8 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MemoryBuffer.h"
 
+#include <chrono>
+
 namespace llvm {
 
 typedef ArrayRef<uint8_t> BuildIDRef;
diff --git a/llvm/include/llvm/Debuginfod/HTTPClient.h b/llvm/include/llvm/Debuginfod/HTTPClient.h
index e8f0e7ef8f786..ca3b76ca9f3f4 100644
--- a/llvm/include/llvm/Debuginfod/HTTPClient.h
+++ b/llvm/include/llvm/Debuginfod/HTTPClient.h
@@ -19,6 +19,8 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MemoryBuffer.h"
 
+#include <chrono>
+
 namespace llvm {
 
 enum class HTTPMethod { GET };
diff --git a/llvm/include/llvm/Support/Duration.h b/llvm/include/llvm/Support/Duration.h
new file mode 100644
index 0000000000000..a5a0e2a3357aa
--- /dev/null
+++ b/llvm/include/llvm/Support/Duration.h
@@ -0,0 +1,28 @@
+//===--- Duration.h - wrapper around std::chrono::Duration ------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//  The sole purpose of this file is to avoid the dependency on <chrono> in
+//  raw_ostream.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_DURATION_H
+#define LLVM_SUPPORT_DURATION_H
+
+#include <chrono>
+
+namespace llvm {
+class Duration {
+  std::chrono::milliseconds Value;
+  public:
+  Duration(std::chrono::milliseconds Value) : Value(Value) {}
+  std::chrono::milliseconds getDuration() const { return Value; }
+};
+}
+
+#endif
diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h
index fc46ec0d74564..e288ac27e804d 100644
--- a/llvm/include/llvm/Support/raw_ostream.h
+++ b/llvm/include/llvm/Support/raw_ostream.h
@@ -17,7 +17,6 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
-#include <chrono>
 #include <cstddef>
 #include <cstdint>
 #include <cstring>
@@ -30,6 +29,7 @@
 
 namespace llvm {
 
+class Duration;
 class formatv_object_base;
 class format_object_base;
 class FormattedString;
@@ -574,7 +574,7 @@ class raw_fd_ostream : public raw_pwrite_stream {
   ///
   /// It is used as @ref lock.
   LLVM_NODISCARD
-  Expected<sys::fs::FileLocker> tryLockFor(std::chrono::milliseconds Timeout);
+  Expected<sys::fs::FileLocker> tryLockFor(Duration const& Timeout);
 };
 
 /// This returns a reference to a raw_fd_ostream for standard output. Use it
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index 1b1b0af79ae8d..e4b747b68beaa 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Duration.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
@@ -868,8 +869,8 @@ Expected<sys::fs::FileLocker> raw_fd_ostream::lock() {
 }
 
 Expected<sys::fs::FileLocker>
-raw_fd_ostream::tryLockFor(std::chrono::milliseconds Timeout) {
-  std::error_code EC = sys::fs::tryLockFile(FD, Timeout);
+raw_fd_ostream::tryLockFor(Duration const& Timeout) {
+  std::error_code EC = sys::fs::tryLockFile(FD, Timeout.getDuration());
   if (!EC)
     return sys::fs::FileLocker(FD);
   return errorCodeToError(EC);
diff --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp
index b749448141f79..c3ba0b51f06f7 100644
--- a/llvm/unittests/Support/Path.cpp
+++ b/llvm/unittests/Support/Path.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Duration.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"

From ad43217a046634be24174299beec3a28018ec3c0 Mon Sep 17 00:00:00 2001
From: Caroline Concatto <caroline.concatto@arm.com>
Date: Mon, 13 Dec 2021 17:20:16 +0000
Subject: [PATCH 183/946] [InstCombine] Fold for masked gather when loading the
 same value each time.

This patch checks in the masked gather when the first operand value is a
splat and the mask is all one, because the masked gather is reloading the
same value each time. This patch replaces this pattern of masked gather by
a scalar load of the value and splats it in a vector.

Differential Revision: https://reviews.llvm.org/D115726
---
 .../InstCombine/InstCombineCalls.cpp          | 20 +++++-
 .../InstCombine/masked_intrinsics.ll          | 62 +++++++++++++++++++
 2 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e3a9e806abdba..f63a186166ecc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -352,9 +352,27 @@ Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
 // * Dereferenceable address & few lanes -> scalarize speculative load/selects
 // * Adjacent vector addresses -> masked.load
 // * Narrow width by halfs excluding zero/undef lanes
-// * Vector splat address w/known mask -> scalar load
 // * Vector incrementing address -> vector masked load
 Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
+  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
+  if (!ConstMask)
+    return nullptr;
+
+  // Vector splat address w/known mask -> scalar load
+  // Fold the gather to load the source vector first lane
+  // because it is reloading the same value each time
+  if (ConstMask->isAllOnesValue())
+    if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
+      auto *VecTy = cast<VectorType>(II.getType());
+      const Align Alignment =
+          cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
+      LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
+                                              Alignment, "load.scalar");
+      Value *Shuf =
+          Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
+      return replaceInstUsesWith(II, cast<Instruction>(Shuf));
+    }
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
index a82aebd738fea..5ba559fd35f9c 100644
--- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
@@ -376,3 +376,65 @@ define void @negative_scatter_v4i16_no_uniform_vals_no_uniform_ptrs_all_active_m
 ; Function Attrs:
 declare void @llvm.masked.scatter.v4i16.v4p0i16(<4 x i16>, <4 x i16*>, i32 immarg, <4 x i1>)
 declare void @llvm.masked.scatter.nxv4i16.nxv4p0i16(<vscale x 4 x i16>, <vscale x 4 x i16*>, i32 immarg, <vscale x 4 x i1>)
+
+; Test gathers that can be simplified to scalar load + splat
+
+;; Splat address and all active mask
+define <vscale x 2 x i64> @gather_nxv2i64_uniform_ptrs_all_active_mask(i64* %src) {
+; CHECK-LABEL: @gather_nxv2i64_uniform_ptrs_all_active_mask(
+; CHECK-NEXT:    [[LOAD_SCALAR:%.*]] = load i64, i64* [[SRC:%.*]], align 8
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[LOAD_SCALAR]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[BROADCAST_SPLAT2]]
+;
+  %broadcast.splatinsert = insertelement <vscale x 2 x i64*> poison, i64 *%src, i32 0
+  %broadcast.splat = shufflevector <vscale x 2 x i64*> %broadcast.splatinsert, <vscale x 2 x i64*> poison, <vscale x 2 x i32> zeroinitializer
+  %res = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*> %broadcast.splat, i32 8, <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i32 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i64> undef)
+  ret <vscale x 2 x i64> %res
+}
+
+define <2 x i64> @gather_v2i64_uniform_ptrs_all_active_mask(i64* %src) {
+; CHECK-LABEL: @gather_v2i64_uniform_ptrs_all_active_mask(
+; CHECK-NEXT:    [[LOAD_SCALAR:%.*]] = load i64, i64* [[SRC:%.*]], align 8
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[LOAD_SCALAR]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    ret <2 x i64> [[BROADCAST_SPLAT2]]
+;
+  %broadcast.splatinsert = insertelement <2 x i64*> poison, i64 *%src, i32 0
+  %broadcast.splat = shufflevector <2 x i64*> %broadcast.splatinsert, <2 x i64*> poison, <2 x i32> zeroinitializer
+  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %broadcast.splat, i32 8, <2 x i1> <i1 1, i1 1>, <2 x i64> undef)
+  ret <2 x i64> %res
+}
+
+; Negative gather tests
+
+;; Vector of pointers is not a splat.
+define <2 x i64> @negative_gather_v2i64_non_uniform_ptrs_all_active_mask(<2 x i64*> %inVal, i64* %src ) {
+; CHECK-LABEL: @negative_gather_v2i64_non_uniform_ptrs_all_active_mask(
+; CHECK-NEXT:    [[INSERT_VALUE:%.*]] = insertelement <2 x i64*> [[INVAL:%.*]], i64* [[SRC:%.*]], i64 1
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> [[INSERT_VALUE]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> undef)
+; CHECK-NEXT:    ret <2 x i64> [[RES]]
+;
+  %insert.value = insertelement <2 x i64*> %inVal, i64 *%src, i32 1
+  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %insert.value, i32 8, <2 x i1><i1 1, i1 1>, <2 x i64> undef)
+  ret <2 x i64> %res
+}
+
+;; Unknown mask value
+define <2 x i64> @negative_gather_v2i64_uniform_ptrs_no_all_active_mask(i64* %src, <2 x i1> %mask) {
+; CHECK-LABEL: @negative_gather_v2i64_uniform_ptrs_no_all_active_mask(
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64*> poison, i64* [[SRC:%.*]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64*> [[BROADCAST_SPLATINSERT]], <2 x i64*> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> [[BROADCAST_SPLAT]], i32 8, <2 x i1> [[MASK:%.*]], <2 x i64> undef)
+; CHECK-NEXT:    ret <2 x i64> [[RES]]
+;
+  %broadcast.splatinsert = insertelement <2 x i64*> poison, i64 *%src, i32 0
+  %broadcast.splat = shufflevector <2 x i64*> %broadcast.splatinsert, <2 x i64*> poison, <2 x i32> zeroinitializer
+  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %broadcast.splat, i32 8, <2 x i1> %mask, <2 x i64> undef)
+  ret <2 x i64> %res
+}
+
+; Function Attrs:
+declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x i64*>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
+declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
+

From 4d82ae67b20826d97471c1ea76e8db3b054398f9 Mon Sep 17 00:00:00 2001
From: Kristof Beyls <kristof.beyls@arm.com>
Date: Wed, 12 Jan 2022 14:40:14 +0100
Subject: [PATCH 184/946] Add security group 2021 transparency report.

Differential Revision:  https://reviews.llvm.org/D117872
---
 llvm/docs/Reference.rst                   |  1 +
 llvm/docs/Security.rst                    |  2 ++
 llvm/docs/SecurityTransparencyReports.rst | 44 +++++++++++++++++++++++
 3 files changed, 47 insertions(+)
 create mode 100644 llvm/docs/SecurityTransparencyReports.rst

diff --git a/llvm/docs/Reference.rst b/llvm/docs/Reference.rst
index d10fc8f23f735..0a2a84b31c4e2 100644
--- a/llvm/docs/Reference.rst
+++ b/llvm/docs/Reference.rst
@@ -38,6 +38,7 @@ LLVM and API reference documentation.
    ScudoHardenedAllocator
    MemTagSanitizer
    Security
+   SecurityTransparencyReports
    SegmentedStacks
    StackMaps
    SpeculativeLoadHardening
diff --git a/llvm/docs/Security.rst b/llvm/docs/Security.rst
index 19ce13b04babc..04cf5cabf8793 100644
--- a/llvm/docs/Security.rst
+++ b/llvm/docs/Security.rst
@@ -116,6 +116,8 @@ Transparency Report
 
 Every year, the LLVM Security Group must publish a transparency report. The intent of this report is to keep the community informed by summarizing the disclosures that have been made public in the last year. It shall contain a list of all public disclosures, as well as statistics on time to fix issues, length of embargo periods, and so on.
 
+The transparency reports are published at :doc:`SecurityTransparencyReports`.
+
 
 Privileges and Responsibilities of LLVM Security Group Members
 ==============================================================
diff --git a/llvm/docs/SecurityTransparencyReports.rst b/llvm/docs/SecurityTransparencyReports.rst
new file mode 100644
index 0000000000000..bcc28d8a9624f
--- /dev/null
+++ b/llvm/docs/SecurityTransparencyReports.rst
@@ -0,0 +1,44 @@
+========================================
+LLVM Security Group Transparency Reports
+========================================
+
+This page lists the yearly LLVM Security group transparency reports.
+
+2021
+----
+
+The :doc:`LLVM security group <Security>` was established on the 10th of July
+2020 by the act of the `initial
+commit <https://github.com/llvm/llvm-project/commit/7bf73bcf6d93>`_ describing
+the purpose of the group and the processes it follows.  Many of the group's
+processes were still not well-defined enough for the group to operate well.
+Over the course of 2021, the key processes were defined well enough to enable
+the group to operate reasonably well:
+
+* We defined details on how to report security issues, see `this commit on
+  20th of May 2021 <https://github.com/llvm/llvm-project/commit/c9dbaa4c86d2>`_
+* We refined the nomination process for new group members, see `this
+  commit on 30th of July 2021 <https://github.com/llvm/llvm-project/commit/4c98e9455aad>`_
+* We started writing an annual transparency report (you're reading the 2021
+  report here).
+
+Over the course of 2021, we had 2 people leave the LLVM Security group and 4
+people join.
+
+In 2021, the security group received 13 issue reports that were made publicly
+visible before 31st of December 2021.  The security group judged 2 of these
+reports to be security issues:
+
+* https://bugs.chromium.org/p/llvm/issues/detail?id=5
+* https://bugs.chromium.org/p/llvm/issues/detail?id=11
+
+Both issues were addressed with source changes: #5 in clangd/vscode-clangd, and
+#11 in llvm-project.  No dedicated LLVM release was made for either.
+
+We believe that with the publishing of this first annual transparency report,
+the security group now has implemented all necessary processes for the group to
+operate as promised. The group's processes can be improved further, and we do
+expect further improvements to get implemented in 2022. Many of the potential
+improvements end up being discussed on the `monthly public call on LLVM's
+security group <https://llvm.org/docs/GettingInvolved.html#online-sync-ups>`_.
+

From b8102449a72c5144cb75cfca46e78b517d7f6606 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Fri, 21 Jan 2022 15:54:33 +0100
Subject: [PATCH 185/946] [clang-tidy] Avoid binding nullptr to a reference

That's undefined behavior. Found by -fsanitize=null.
---
 .../clang-tidy/performance/MoveConstArgCheck.cpp            | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp
index 0e91451211aed..6e7d28b2974f7 100644
--- a/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp
+++ b/clang-tools-extra/clang-tidy/performance/MoveConstArgCheck.cpp
@@ -68,9 +68,9 @@ void MoveConstArgCheck::registerMatchers(MatchFinder *Finder) {
 }
 
 bool IsRValueReferenceParam(const Expr *Invocation,
-                            const QualType &InvocationParmType,
+                            const QualType *InvocationParmType,
                             const Expr *Arg) {
-  if (Invocation && InvocationParmType->isRValueReferenceType() &&
+  if (Invocation && (*InvocationParmType)->isRValueReferenceType() &&
       Arg->isLValue()) {
     if (!Invocation->getType()->isRecordType())
       return true;
@@ -138,7 +138,7 @@ void MoveConstArgCheck::check(const MatchFinder::MatchResult &Result) {
     // std::move shouldn't be removed when an lvalue wrapped by std::move is
     // passed to the function with an rvalue reference parameter.
     bool IsRVRefParam =
-        IsRValueReferenceParam(ReceivingExpr, *InvocationParmType, Arg);
+        IsRValueReferenceParam(ReceivingExpr, InvocationParmType, Arg);
     const auto *Var =
         IsVariable ? dyn_cast<DeclRefExpr>(Arg)->getDecl() : nullptr;
 

From e5fd3a7df9170cc69c88881e06fbd33c9cbd633d Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Fri, 21 Jan 2022 09:59:22 -0500
Subject: [PATCH 186/946] Try to unbreak build on Windows after e9211e03937

---
 llvm/tools/llvm-readobj/COFFDumper.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/tools/llvm-readobj/COFFDumper.cpp b/llvm/tools/llvm-readobj/COFFDumper.cpp
index e1b28e3ce7451..caeb49af24be8 100644
--- a/llvm/tools/llvm-readobj/COFFDumper.cpp
+++ b/llvm/tools/llvm-readobj/COFFDumper.cpp
@@ -52,6 +52,7 @@
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/Win64EH.h"
 #include "llvm/Support/raw_ostream.h"
+#include <ctime>
 
 using namespace llvm;
 using namespace llvm::object;

From 8bc66189429919d86d3ff3b4b5a3f63cfa6c35ca Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Fri, 21 Jan 2022 16:04:19 +0100
Subject: [PATCH 187/946] Add missing llvm/support/Regex.h include in
 polly/lib/Analysis/ScopDetection.cpp

---
 polly/lib/Analysis/ScopDetection.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp
index f0be6d5ff1ae0..8b3af67a596b3 100644
--- a/polly/lib/Analysis/ScopDetection.cpp
+++ b/polly/lib/Analysis/ScopDetection.cpp
@@ -78,6 +78,7 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>

From 3c9e3dada916f33e8a4c62629f1954a7a5cdbf71 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Fri, 21 Jan 2022 10:04:41 -0500
Subject: [PATCH 188/946] Try to unbreak build on Windows more after
 e9211e03937

---
 llvm/tools/llvm-readobj/XCOFFDumper.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/tools/llvm-readobj/XCOFFDumper.cpp b/llvm/tools/llvm-readobj/XCOFFDumper.cpp
index e34b105cfbf0a..6e778d558d4fb 100644
--- a/llvm/tools/llvm-readobj/XCOFFDumper.cpp
+++ b/llvm/tools/llvm-readobj/XCOFFDumper.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/ScopedPrinter.h"
 
+#include <ctime>
 #include <stddef.h>
 
 using namespace llvm;

From 9900acacfb3ff2bc5c957aeb84225c29aa1b74fc Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng@google.com>
Date: Fri, 21 Jan 2022 10:17:28 -0500
Subject: [PATCH 189/946] [libcxx][doc][nfc] Fixed typo in doc

---
 libcxx/docs/DesignDocs/UniquePtrTrivialAbi.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/docs/DesignDocs/UniquePtrTrivialAbi.rst b/libcxx/docs/DesignDocs/UniquePtrTrivialAbi.rst
index a0f260a44e815..1af0b34263662 100644
--- a/libcxx/docs/DesignDocs/UniquePtrTrivialAbi.rst
+++ b/libcxx/docs/DesignDocs/UniquePtrTrivialAbi.rst
@@ -36,7 +36,7 @@ Design
 ======
 
 * Annotate the two definitions of ``std::unique_ptr``  with ``clang::trivial_abi`` attribute.
-* Put the attribuate behind a flag because this change has potential compilation and runtime breakages.
+* Put the attribute behind a flag because this change has potential compilation and runtime breakages.
 
 
 This comes with some side effects:

From 9d3437fbf3419502351d41ff9e28f06b0c3f06e8 Mon Sep 17 00:00:00 2001
From: Chris Bieneman <beanz@abolishcrlf.org>
Date: Tue, 18 Jan 2022 18:20:14 -0600
Subject: [PATCH 190/946] [ADT] [NFC] Add StringRef::detectEOL

This change moves EOL detection out of the clang::InclusionRewriter into
llvm::StringRef so that it can be easily reused elsewhere. It also adds
additional explicit test cases to verify the correct and expected return
results.

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D117626
---
 .../Frontend/Rewrite/InclusionRewriter.cpp    | 20 ++-----------
 llvm/include/llvm/ADT/StringRef.h             | 19 ++++++++++++
 llvm/unittests/ADT/StringRefTest.cpp          | 30 +++++++++++++++++++
 3 files changed, 51 insertions(+), 18 deletions(-)

diff --git a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
index 931f3a24c5888..3e8d582f90c27 100644
--- a/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
+++ b/clang/lib/Frontend/Rewrite/InclusionRewriter.cpp
@@ -251,28 +251,12 @@ bool InclusionRewriter::IsIfAtLocationTrue(SourceLocation Loc) const {
   return false;
 }
 
-/// Detect the likely line ending style of \p FromFile by examining the first
-/// newline found within it.
-static StringRef DetectEOL(const MemoryBufferRef &FromFile) {
-  // Detect what line endings the file uses, so that added content does not mix
-  // the style. We need to check for "\r\n" first because "\n\r" will match
-  // "\r\n\r\n".
-  const char *Pos = strchr(FromFile.getBufferStart(), '\n');
-  if (!Pos)
-    return "\n";
-  if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
-    return "\r\n";
-  if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
-    return "\n\r";
-  return "\n";
-}
-
 void InclusionRewriter::detectMainFileEOL() {
   Optional<MemoryBufferRef> FromFile = *SM.getBufferOrNone(SM.getMainFileID());
   assert(FromFile);
   if (!FromFile)
     return; // Should never happen, but whatever.
-  MainEOL = DetectEOL(*FromFile);
+  MainEOL = FromFile->getBuffer().detectEOL();
 }
 
 /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
@@ -378,7 +362,7 @@ void InclusionRewriter::Process(FileID FileId,
   Lexer RawLex(FileId, FromFile, PP.getSourceManager(), PP.getLangOpts());
   RawLex.SetCommentRetentionState(false);
 
-  StringRef LocalEOL = DetectEOL(FromFile);
+  StringRef LocalEOL = FromFile.getBuffer().detectEOL();
 
   // Per the GNU docs: "1" indicates entering a new file.
   if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())
diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 3950910f0635a..9f64250c58a36 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -877,6 +877,25 @@ namespace llvm {
       return ltrim(Chars).rtrim(Chars);
     }
 
+    /// Detect the line ending style of the string.
+    ///
+    /// If the string contains a line ending, return the line ending character
+    /// sequence that is detected. Otherwise return '\n' for unix line endings.
+    ///
+    /// \return - The line ending character sequence.
+    LLVM_NODISCARD
+    StringRef detectEOL() const {
+      size_t Pos = find('\r');
+      if (Pos == npos) {
+        // If there is no carriage return, assume unix
+        return "\n";
+      }
+      if (Pos + 1 < Length && Data[Pos + 1] == '\n')
+        return "\r\n"; // Windows
+      if (Pos > 0 && Data[Pos - 1] == '\n')
+        return "\n\r"; // You monster!
+      return "\r";     // Classic Mac
+    }
     /// @}
   };
 
diff --git a/llvm/unittests/ADT/StringRefTest.cpp b/llvm/unittests/ADT/StringRefTest.cpp
index 41c35804f1226..e80a25a19969c 100644
--- a/llvm/unittests/ADT/StringRefTest.cpp
+++ b/llvm/unittests/ADT/StringRefTest.cpp
@@ -1109,6 +1109,36 @@ TEST(StringRefTest, GTestPrinter) {
   EXPECT_EQ(R"("foo")", ::testing::PrintToString(StringRef("foo")));
 }
 
+TEST(StringRefTest, LFLineEnding) {
+  constexpr StringRef Cases[] = {"\nDoggo\nPupper", "Floofer\n", "Woofer"};
+  EXPECT_EQ(StringRef("\n"), Cases[0].detectEOL());
+  EXPECT_EQ(StringRef("\n"), Cases[1].detectEOL());
+  EXPECT_EQ(StringRef("\n"), Cases[2].detectEOL());
+}
+
+TEST(StringRefTest, CRLineEnding) {
+  constexpr StringRef Cases[] = {"\rDoggo\rPupper", "Floofer\r", "Woo\rfer\n"};
+  EXPECT_EQ(StringRef("\r"), Cases[0].detectEOL());
+  EXPECT_EQ(StringRef("\r"), Cases[1].detectEOL());
+  EXPECT_EQ(StringRef("\r"), Cases[2].detectEOL());
+}
+
+TEST(StringRefTest, CRLFLineEnding) {
+  constexpr StringRef Cases[] = {"\r\nDoggo\r\nPupper", "Floofer\r\n",
+                                 "Woofer\r\nSubWoofer\n"};
+  EXPECT_EQ(StringRef("\r\n"), Cases[0].detectEOL());
+  EXPECT_EQ(StringRef("\r\n"), Cases[1].detectEOL());
+  EXPECT_EQ(StringRef("\r\n"), Cases[2].detectEOL());
+}
+
+TEST(StringRefTest, LFCRLineEnding) {
+  constexpr StringRef Cases[] = {"\n\rDoggo\n\rPupper", "Floofer\n\r",
+                                 "Woofer\n\rSubWoofer\n"};
+  EXPECT_EQ(StringRef("\n\r"), Cases[0].detectEOL());
+  EXPECT_EQ(StringRef("\n\r"), Cases[1].detectEOL());
+  EXPECT_EQ(StringRef("\n\r"), Cases[2].detectEOL());
+}
+
 static_assert(std::is_trivially_copyable<StringRef>::value,
               "trivially copyable");
 

From 5597ec2dc4f888bf9704626c798ec6e50e5d1384 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Fri, 21 Jan 2022 17:05:36 +0100
Subject: [PATCH 191/946] Include missing "llvm/Support/Path.h" in
 "flang/lib/Frontend/CompilerInvocation.cpp"

---
 flang/lib/Frontend/CompilerInvocation.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index 159c5632c0e48..4525130e5c2e7 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Option/OptTable.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/raw_ostream.h"
 #include <memory>

From f24fe96f469b8ccdb8504f998d1107d79c8fd363 Mon Sep 17 00:00:00 2001
From: Jake Egan <jakeegan10@gmail.com>
Date: Fri, 21 Jan 2022 11:18:48 -0500
Subject: [PATCH 192/946] [ifs] Use a tmp file instead of "-"

Currently, Clang on AIX uses the system assembler to generate object files from assembly. The use of `-o -` results in a file named `-` instead of output to stdout. This patch uses a temporary object file instead.

Reviewed By: DiggerLin, hubert.reinterpretcast

Differential Revision: https://reviews.llvm.org/D117587
---
 clang/test/InterfaceStubs/object.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/test/InterfaceStubs/object.c b/clang/test/InterfaceStubs/object.c
index 45e2d38ba3e9c..a7609ff40af72 100644
--- a/clang/test/InterfaceStubs/object.c
+++ b/clang/test/InterfaceStubs/object.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -fvisibility default -o - -emit-interface-stubs %s | FileCheck -check-prefix=CHECK-TAPI %s
-// RUN: %clang -fvisibility=default -c -o - %s | llvm-nm - 2>&1 | FileCheck -check-prefix=CHECK-SYMBOLS %s
+// RUN: %clang -fvisibility=default -c -o %t.o %s
+// RUN: llvm-nm %t.o 2>&1 | FileCheck -check-prefix=CHECK-SYMBOLS %s
 
 // CHECK-TAPI: data", Type: Object, Size: 4 }
 // CHECK-SYMBOLS: data

From cab96169380296a496614f433507d86b743f0d02 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Sun, 7 Nov 2021 19:44:59 +0100
Subject: [PATCH 193/946] [libc++] Use addressof in unordered_map.

This addresses the usage of `operator&` in `<unordered_map>`.

(Note there are still more headers with the same issue.)

Reviewed By: #libc, Quuxplusone, ldionne

Differential Revision: https://reviews.llvm.org/D117393
---
 libcxx/include/__hash_table                   | 40 +++++++--------
 libcxx/include/unordered_map                  | 21 ++++----
 ...rator.operators.addressof.compile.pass.cpp | 49 +++++++++++++++++++
 .../assign_move.addressof.compile.pass.cpp    | 42 ++++++++++++++++
 .../move.addressof.compile.pass.cpp           | 33 +++++++++++++
 .../move_alloc.addressof.compile.pass.cpp     | 36 ++++++++++++++
 .../emplace_hint.addressof.compile.pass.cpp   | 30 ++++++++++++
 ...rase_const_iter.addressof.compile.pass.cpp | 27 ++++++++++
 .../erase_range.addressof.compile.pass.cpp    | 27 ++++++++++
 ...nt_const_lvalue.addressof.compile.pass.cpp | 28 +++++++++++
 ...ible_value_type.addressof.compile.pass.cpp | 28 +++++++++++
 ...alue_value_type.addressof.compile.pass.cpp | 28 +++++++++++
 ...ry_emplace_hint.addressof.compile.pass.cpp | 40 +++++++++++++++
 .../swap.addressof.compile.pass.cpp           | 29 +++++++++++
 .../move.addressof.compile.pass.cpp           | 33 +++++++++++++
 .../move_alloc.addressof.compile.pass.cpp     | 36 ++++++++++++++
 .../emplace_hint.addressof.compile.pass.cpp   | 30 ++++++++++++
 17 files changed, 527 insertions(+), 30 deletions(-)
 create mode 100644 libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp

diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index 6b682ab27c6c3..adc732cffb015 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -308,9 +308,9 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     __hash_iterator& operator=(const __hash_iterator& __i)
     {
-        if (this != &__i)
+        if (this != _VSTD::addressof(__i))
         {
-            __get_db()->__iterator_copy(this, &__i);
+            __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
             __node_ = __i.__node_;
         }
         return *this;
@@ -406,7 +406,7 @@ public:
         : __node_(__x.__node_)
     {
 #if _LIBCPP_DEBUG_LEVEL == 2
-        __get_db()->__iterator_copy(this, &__x);
+        __get_db()->__iterator_copy(this, _VSTD::addressof(__x));
 #endif
     }
 
@@ -415,7 +415,7 @@ public:
     __hash_const_iterator(const __hash_const_iterator& __i)
         : __node_(__i.__node_)
     {
-        __get_db()->__iterator_copy(this, &__i);
+        __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
     }
 
     _LIBCPP_INLINE_VISIBILITY
@@ -427,9 +427,9 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     __hash_const_iterator& operator=(const __hash_const_iterator& __i)
     {
-        if (this != &__i)
+        if (this != _VSTD::addressof(__i))
         {
-            __get_db()->__iterator_copy(this, &__i);
+            __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
             __node_ = __i.__node_;
         }
         return *this;
@@ -523,7 +523,7 @@ public:
           __bucket_(__i.__bucket_),
           __bucket_count_(__i.__bucket_count_)
     {
-        __get_db()->__iterator_copy(this, &__i);
+        __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
     }
 
     _LIBCPP_INLINE_VISIBILITY
@@ -535,9 +535,9 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     __hash_local_iterator& operator=(const __hash_local_iterator& __i)
     {
-        if (this != &__i)
+        if (this != _VSTD::addressof(__i))
         {
-            __get_db()->__iterator_copy(this, &__i);
+            __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
             __node_ = __i.__node_;
             __bucket_ = __i.__bucket_;
             __bucket_count_ = __i.__bucket_count_;
@@ -655,7 +655,7 @@ public:
           __bucket_count_(__x.__bucket_count_)
     {
 #if _LIBCPP_DEBUG_LEVEL == 2
-        __get_db()->__iterator_copy(this, &__x);
+        __get_db()->__iterator_copy(this, _VSTD::addressof(__x));
 #endif
     }
 
@@ -666,7 +666,7 @@ public:
           __bucket_(__i.__bucket_),
           __bucket_count_(__i.__bucket_count_)
     {
-        __get_db()->__iterator_copy(this, &__i);
+        __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
     }
 
     _LIBCPP_INLINE_VISIBILITY
@@ -678,9 +678,9 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     __hash_const_local_iterator& operator=(const __hash_const_local_iterator& __i)
     {
-        if (this != &__i)
+        if (this != _VSTD::addressof(__i))
         {
-            __get_db()->__iterator_copy(this, &__i);
+            __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
             __node_ = __i.__node_;
             __bucket_ = __i.__bucket_;
             __bucket_count_ = __i.__bucket_count_;
@@ -1615,7 +1615,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(
         __u.size() = 0;
     }
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, &__u);
+    __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
@@ -2021,7 +2021,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi(
         const_iterator __p, __node_pointer __cp)
 {
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                          "unordered container::emplace_hint(const_iterator, args...) called with an iterator not"
                          " referring to this unordered container");
     if (__p != end() && key_eq()(*__p, __cp->__value_))
@@ -2148,7 +2148,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_hint_multi(
         const_iterator __p, _Args&&... __args)
 {
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                          "unordered container::emplace_hint(const_iterator, args...) called with an iterator not"
                          " referring to this unordered container");
     __node_holder __h = __construct_node(_VSTD::forward<_Args>(__args)...);
@@ -2472,7 +2472,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p)
 {
     __next_pointer __np = __p.__node_;
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                          "unordered container erase(iterator) called with an iterator not"
                          " referring to this container");
     _LIBCPP_DEBUG_ASSERT(__p != end(),
@@ -2492,10 +2492,10 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first,
                                                 const_iterator __last)
 {
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__first)) == this,
                          "unordered container::erase(iterator, iterator) called with an iterator not"
                          " referring to this container");
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__last) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__last)) == this,
                          "unordered container::erase(iterator, iterator) called with an iterator not"
                          " referring to this container");
     for (const_iterator __p = __first; __first != __last; __p = __first)
@@ -2727,7 +2727,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::swap(__hash_table& __u)
         __u.__bucket_list_[__constrain_hash(__u.__p1_.first().__next_->__hash(), __u.bucket_count())] =
             __u.__p1_.first().__ptr();
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, &__u);
+    __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map
index 73edadab20990..accab28a99592 100644
--- a/libcxx/include/unordered_map
+++ b/libcxx/include/unordered_map
@@ -519,6 +519,7 @@ template <class Key, class T, class Hash, class Pred, class Alloc>
 #include <__functional/is_transparent.h>
 #include <__hash_table>
 #include <__iterator/iterator_traits.h>
+#include <__memory/addressof.h>
 #include <__node_handle>
 #include <__utility/forward.h>
 #include <compare>
@@ -1186,7 +1187,7 @@ public:
         {return __table_.__insert_unique(__x);}
 
     iterator insert(const_iterator __p, const value_type& __x) {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i()_VSTD::addressof(__p) == this,
                              "unordered_map::insert(const_iterator, const value_type&) called with an iterator not "
                              "referring to this unordered_map");
         ((void)__p);
@@ -1207,7 +1208,7 @@ public:
         {return __table_.__insert_unique(_VSTD::move(__x));}
 
     iterator insert(const_iterator __p, value_type&& __x) {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                              "unordered_map::insert(const_iterator, const value_type&) called with an iterator not"
                              " referring to this unordered_map");
         ((void)__p);
@@ -1225,7 +1226,7 @@ public:
         _LIBCPP_INLINE_VISIBILITY
         iterator insert(const_iterator __p, _Pp&& __x)
         {
-            _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+            _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                                  "unordered_map::insert(const_iterator, value_type&&) called with an iterator not"
                                  " referring to this unordered_map");
           ((void)__p);
@@ -1241,7 +1242,7 @@ public:
     template <class... _Args>
     _LIBCPP_INLINE_VISIBILITY
     iterator emplace_hint(const_iterator __p, _Args&&... __args) {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                              "unordered_map::emplace_hint(const_iterator, args...) called with an iterator not"
                              " referring to this unordered_map");
           ((void)__p);
@@ -1273,7 +1274,7 @@ public:
         _LIBCPP_INLINE_VISIBILITY
         iterator try_emplace(const_iterator __h, const key_type& __k, _Args&&... __args)
     {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__h)) == this,
                              "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not"
                              " referring to this unordered_map");
         ((void)__h);
@@ -1284,7 +1285,7 @@ public:
         _LIBCPP_INLINE_VISIBILITY
         iterator try_emplace(const_iterator __h, key_type&& __k, _Args&&... __args)
     {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i()_VSTD::addressof(__h) == this,
                              "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not"
                              " referring to this unordered_map");
         ((void)__h);
@@ -1692,7 +1693,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(
 {
     _VSTD::__debug_db_insert_c(this);
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, &__u);
+    __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
@@ -1712,7 +1713,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(
     }
 #if _LIBCPP_DEBUG_LEVEL == 2
     else
-        __get_db()->swap(this, &__u);
+        __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
@@ -2468,7 +2469,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(
 {
     _VSTD::__debug_db_insert_c(this);
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, &__u);
+    __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
@@ -2489,7 +2490,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(
     }
 #if _LIBCPP_DEBUG_LEVEL == 2
     else
-        __get_db()->swap(this, &__u);
+        __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
diff --git a/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..856b78293a107
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp
@@ -0,0 +1,49 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// Validate the constructors of the (const)(_local)_iterator classes to be
+// properly guarded against ADL-hijacking operator&.
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+template <class ToIterator, class FromIterator>
+void test() {
+  FromIterator from;
+  ToIterator copy(from);
+  copy = from;
+
+  ToIterator move(std::move(from));
+  from = FromIterator();
+  move = std::move(from);
+}
+
+void test() {
+  {
+    using I = std::unordered_map<operator_hijacker, operator_hijacker>::iterator;
+    using CI = std::unordered_map<operator_hijacker, operator_hijacker>::const_iterator;
+    test<I, I>();
+    test<CI, I>();
+    test<CI, CI>();
+  }
+  {
+    using IL = std::unordered_map<operator_hijacker, operator_hijacker>::local_iterator;
+    using CIL = std::unordered_map<operator_hijacker, operator_hijacker>::const_local_iterator;
+    test<IL, IL>();
+    test<CIL, IL>();
+    test<CIL, CIL>();
+  }
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..6dbd7aaea2a8e
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp
@@ -0,0 +1,42 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// unordered_map& operator=(unordered_map&&)
+//     noexcept(
+//         allocator_type::propagate_on_container_move_assignment::value &&
+//         is_nothrow_move_assignable<allocator_type>::value &&
+//         is_nothrow_move_assignable<hasher>::value &&
+//         is_nothrow_move_assignable<key_equal>::value);
+
+// Validate whether the container can be move-assigned with an ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  {
+    std::unordered_map<int, operator_hijacker> mo;
+    std::unordered_map<int, operator_hijacker> m;
+    m = std::move(mo);
+  }
+  {
+    std::unordered_map<operator_hijacker, int> mo;
+    std::unordered_map<operator_hijacker, int> m;
+    m = std::move(mo);
+  }
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..e36c6525d631b
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// unordered_map(unordered_map&& u)
+//   noexcept(
+//     is_nothrow_move_constructible<hasher>::value &&
+//     is_nothrow_move_constructible<key_equal>::value &&
+//     is_nothrow_move_constructible<allocator_type>::value);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> mo;
+  std::unordered_map<operator_hijacker, operator_hijacker> m(std::move(mo));
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..1fec0ee5d0f4b
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp
@@ -0,0 +1,36 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// unordered_map(unordered_map&& u, const allocator_type& a);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+#include "test_allocator.h"
+#include "min_allocator.h"
+
+void test() {
+  using A = test_allocator<std::pair<const operator_hijacker, operator_hijacker>>;
+  using C = std::unordered_map<operator_hijacker, operator_hijacker, std::hash<operator_hijacker>,
+                               std::equal_to<operator_hijacker>, A>;
+
+  C mo;
+  C m(std::move(mo), A());
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..58ddefd8cfbfc
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// template <class... Args>
+//     iterator emplace_hint(const_iterator position, Args&&... args);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> m;
+  m.emplace_hint(m.cbegin());
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..1461f2499baad
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// iterator erase(const_iterator p)
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> m;
+  m.erase(m.cbegin());
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..5f342f7b2152f
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// iterator erase(const_iterator first, const_iterator last)
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> m;
+  m.erase(m.cbegin(), m.cend());
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..db1805e7d7e63
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// iterator insert(const_iterator p, const value_type& x);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> m;
+  const std::pair<operator_hijacker, operator_hijacker> v;
+  m.insert(m.cend(), v);
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..530b826b61e78
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// template <class P,
+//           class = typename enable_if<is_convertible<P, value_type>::value>::type>
+//     pair<iterator, bool> insert(P&& x);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test(std::unordered_map<operator_hijacker, operator_hijacker>& m) { m.insert(m.cend(), *m.begin()); }
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..80219cb193edd
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// iterator insert(const_iterator hint, value_type&& obj);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test(std::unordered_map<operator_hijacker, operator_hijacker>& m) {
+  m.insert(m.cend(), std::pair<operator_hijacker, operator_hijacker>{});
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..2c667374d4fe8
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp
@@ -0,0 +1,40 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// template <class... Args>
+//     iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args);
+// template <class... Args>
+//     iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args);
+// template <class M>
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> m;
+  {
+    const operator_hijacker k;
+    m.try_emplace(m.cend(), k);
+  }
+  {
+    operator_hijacker k;
+    m.try_emplace(m.cend(), std::move(k));
+  }
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..f5b5f516d42b5
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// void swap(unordered_map& c)
+//     noexcept(allocator_traits<Allocator>::is_always_equal::value &&
+//               noexcept(swap(declval<Hash&>(), declval<Hash&>())) &&
+//               noexcept(swap(declval<Pred&>(), declval<Pred&>())));
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> m1;
+  std::unordered_map<operator_hijacker, operator_hijacker> m2;
+  std::swap(m1, m2);
+}
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..73b19f35e2048
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_multimap
+
+// unordered_multimap(unordered_multimap&&)
+//   noexcept(
+//     is_nothrow_move_constructible<hasher>::value &&
+//     is_nothrow_move_constructible<key_equal>::value &&
+//     is_nothrow_move_constructible<allocator_type>::value);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_multimap<operator_hijacker, operator_hijacker> mo;
+  std::unordered_multimap<operator_hijacker, operator_hijacker> m(std::move(mo));
+}
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..6419a03666d65
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp
@@ -0,0 +1,36 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_multimap
+
+// unordered_multimap(unordered_map&& u, const allocator_type& a);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+#include "test_allocator.h"
+#include "min_allocator.h"
+
+void test() {
+  using A = test_allocator<std::pair<const operator_hijacker, operator_hijacker>>;
+  using C = std::unordered_multimap<operator_hijacker, operator_hijacker, std::hash<operator_hijacker>,
+                                    std::equal_to<operator_hijacker>, A>;
+
+  C mo;
+  C m(std::move(mo), A());
+}
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..5e23b73cf34b3
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_multimap
+
+// template <class... Args>
+//     iterator emplace_hint(const_iterator position, Args&&... args);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_multimap<operator_hijacker, operator_hijacker> m;
+  m.emplace_hint(m.cbegin());
+}

From b7fd91c84b4eea5324d9757243387280f4284236 Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Thu, 13 Jan 2022 16:27:28 -0800
Subject: [PATCH 194/946] Upstream MLIR PyTACO implementation.

Add TACO tests to test/Integration/Dialect/SparseTensor/taco. Add the MLIR
PyTACO implementation as tools under the directory.

Reviewed By: aartbik, mehdi_amini

Differential Revision: https://reviews.llvm.org/D117260
---
 mlir/python/requirements.txt                  |    1 +
 .../Dialect/SparseTensor/taco/README.md       |   27 +
 .../Dialect/SparseTensor/taco/data/gold_A.tns |   50 +
 .../Dialect/SparseTensor/taco/data/gold_y.tns |    4 +
 .../Dialect/SparseTensor/taco/data/nell-2.tns |    5 +
 .../Dialect/SparseTensor/taco/data/pwtk.mtx   |   11 +
 .../Dialect/SparseTensor/taco/test_MTTKRP.py  |   53 +
 .../Dialect/SparseTensor/taco/test_SpMV.py    |   54 +
 .../taco/test_simple_tensor_algebra.py        |   30 +
 .../SparseTensor/taco/tools/lit.local.cfg     |    2 +
 .../SparseTensor/taco/tools/mlir_pytaco.py    | 1768 +++++++++++++++++
 .../taco/tools/mlir_pytaco_api.py             |   47 +
 .../SparseTensor/taco/tools/mlir_pytaco_io.py |  206 ++
 .../taco/tools/mlir_pytaco_utils.py           |  121 ++
 14 files changed, 2379 insertions(+)
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/README.md
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_A.tns
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_y.tns
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/data/pwtk.mtx
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/tools/lit.local.cfg
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_api.py
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py

diff --git a/mlir/python/requirements.txt b/mlir/python/requirements.txt
index 0cc86af2c9cfb..991e8eb243358 100644
--- a/mlir/python/requirements.txt
+++ b/mlir/python/requirements.txt
@@ -1,3 +1,4 @@
 numpy
 pybind11>=2.8.0
 PyYAML
+dataclasses
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/README.md b/mlir/test/Integration/Dialect/SparseTensor/taco/README.md
new file mode 100644
index 0000000000000..88a8ce2581962
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/README.md
@@ -0,0 +1,27 @@
+# MLIR-PyTACO: Implementing PyTACO with MLIR
+
+TACO (http://tensor-compiler.org/) is a tensor algebra compiler. TACO defines
+PyTACO, a domain specific language in Python, for writing tensor algebra
+applications.
+
+This directory contains the implementation of PyTACO using MLIR. In particular,
+we implement a Python layer that accepts the PyTACO language, generates MLIR
+linalg.generic OPs with sparse tensor annotation to represent the tensor
+computation, and invokes the MLIR sparse tensor code generator
+(https://mlir.llvm.org/docs/Dialects/SparseTensorOps/) as well as other MLIR
+compilation passes to generate an executable. Then, we invoke the MLIR execution
+engine to execute the program and pass the result back to the Python layer.
+
+As can be seen from the tests in this directory, in order to port a PyTACO
+program to MLIR-PyTACO, we basically only need to replace this line that imports
+PyTACO:
+
+```python
+import pytaco as pt
+```
+
+with this line to import MLIR-PyTACO:
+
+```python
+from tools import mlir_pytaco_api as pt
+```
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_A.tns b/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_A.tns
new file mode 100644
index 0000000000000..b66caa12106a9
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_A.tns
@@ -0,0 +1,50 @@
+1 1 12
+1 2 12
+1 3 12
+1 4 12
+1 5 12
+1 6 12
+1 7 12
+1 8 12
+1 9 12
+1 10 12
+1 11 12
+1 12 12
+1 13 12
+1 14 12
+1 15 12
+1 16 12
+1 17 12
+1 18 12
+1 19 12
+1 20 12
+1 21 12
+1 22 12
+1 23 12
+1 24 12
+1 25 12
+2 1 6
+2 2 6
+2 3 6
+2 4 6
+2 5 6
+2 6 6
+2 7 6
+2 8 6
+2 9 6
+2 10 6
+2 11 6
+2 12 6
+2 13 6
+2 14 6
+2 15 6
+2 16 6
+2 17 6
+2 18 6
+2 19 6
+2 20 6
+2 21 6
+2 22 6
+2 23 6
+2 24 6
+2 25 6
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_y.tns b/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_y.tns
new file mode 100644
index 0000000000000..a9eab90a0627a
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/data/gold_y.tns
@@ -0,0 +1,4 @@
+# See http://frostt.io/tensors/file-formats.html for FROSTT (.tns) format
+1 37102
+2 -20.4138
+3 804927
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns b/mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns
new file mode 100644
index 0000000000000..a6c570c3c7d8f
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/data/nell-2.tns
@@ -0,0 +1,5 @@
+1 1 1 1.0
+1 2 2 2.0
+1 3 4 3.0
+2 1 1 1.0
+2 4 3 2.0
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/data/pwtk.mtx b/mlir/test/Integration/Dialect/SparseTensor/taco/data/pwtk.mtx
new file mode 100644
index 0000000000000..ec1cebc1c8f82
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/data/pwtk.mtx
@@ -0,0 +1,11 @@
+%%MatrixMarket matrix coordinate real symmetric
+%-------------------------------------------------------------------------------
+% To download a matrix for a real world application
+% https://math.nist.gov/MatrixMarket/
+%-------------------------------------------------------------------------------
+3 3 5
+1 1 37423.0879671
+2 1 -22.4050781162
+3 1 -300.654980157
+3 2 -.00869762944058
+3 3 805225.750212
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py
new file mode 100644
index 0000000000000..1fda4f4406393
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_MTTKRP.py
@@ -0,0 +1,53 @@
+# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s
+
+import numpy as np
+import os
+import sys
+import tempfile
+
+_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_PATH)
+from tools import mlir_pytaco_api as pt
+
+###### This PyTACO part is taken from the TACO open-source project. ######
+# See http://tensor-compiler.org/docs/data_analytics/index.html.
+
+compressed = pt.compressed
+dense = pt.dense
+
+# Define formats for storing the sparse tensor and dense matrices.
+csf = pt.format([compressed, compressed, compressed])
+rm = pt.format([dense, dense])
+
+# Load a sparse three-dimensional tensor from file (stored in the FROSTT
+# format) and store it as a compressed sparse fiber tensor. We use a small
+# tensor for the purpose of testing. To run the program using the data from
+# the real application, please download the data from:
+# http://frostt.io/tensors/nell-2/
+B = pt.read(os.path.join(_SCRIPT_PATH, "data/nell-2.tns"), csf)
+
+# These two lines have been modified from the original program to use static
+# data to support result comparison.
+C = pt.from_array(np.full((B.shape[1], 25), 1, dtype=np.float64))
+D = pt.from_array(np.full((B.shape[2], 25), 2, dtype=np.float64))
+
+# Declare the result to be a dense matrix.
+A = pt.tensor([B.shape[0], 25], rm)
+
+# Declare index vars.
+i, j, k, l = pt.get_index_vars(4)
+
+# Define the MTTKRP computation.
+A[i, j] = B[i, k, l] * D[l, j] * C[k, j]
+
+##########################################################################
+
+# CHECK: Compare result True
+# Perform the MTTKRP computation and write the result to file.
+with tempfile.TemporaryDirectory() as test_dir:
+  actual_file = os.path.join(test_dir, "A.tns")
+  pt.write(actual_file, A)
+  actual = np.loadtxt(actual_file, np.float64)
+  expected = np.loadtxt(
+      os.path.join(_SCRIPT_PATH, "data/gold_A.tns"), np.float64)
+  print(f"Compare result {np.allclose(actual, expected, rtol=0.01)}")
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py
new file mode 100644
index 0000000000000..80bb023360ff8
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_SpMV.py
@@ -0,0 +1,54 @@
+# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s
+
+import numpy as np
+import os
+import sys
+import tempfile
+
+_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_PATH)
+from tools import mlir_pytaco_api as pt
+
+###### This PyTACO part is taken from the TACO open-source project. ######
+# See http://tensor-compiler.org/docs/scientific_computing/index.html.
+
+compressed = pt.compressed
+dense = pt.dense
+
+# Define formats for storing the sparse matrix and dense vectors.
+csr = pt.format([dense, compressed])
+dv = pt.format([dense])
+
+# Load a sparse matrix stored in the matrix market format) and store it
+# as a CSR matrix.  The matrix in this test is a reduced version of the data
+# downloaded from here:
+# https://www.cise.ufl.edu/research/sparse/MM/Boeing/pwtk.tar.gz
+# In order to run the program using the matrix above, you can download the
+# matrix and replace this path to the actual path to the file.
+A = pt.read(os.path.join(_SCRIPT_PATH, "data/pwtk.mtx"), csr)
+
+# These two lines have been modified from the original program to use static
+# data to support result comparison.
+x = pt.from_array(np.full((A.shape[1],), 1, dtype=np.float64))
+z = pt.from_array(np.full((A.shape[0],), 2, dtype=np.float64))
+
+# Declare the result to be a dense vector
+y = pt.tensor([A.shape[0]], dv)
+
+# Declare index vars
+i, j = pt.get_index_vars(2)
+
+# Define the SpMV computation
+y[i] = A[i, j] * x[j] + z[i]
+
+##########################################################################
+
+# CHECK: Compare result True
+# Perform the SpMV computation and write the result to file
+with tempfile.TemporaryDirectory() as test_dir:
+  actual_file = os.path.join(test_dir, "y.tns")
+  pt.write(actual_file, y)
+  actual = np.loadtxt(actual_file, np.float64)
+  expected = np.loadtxt(
+      os.path.join(_SCRIPT_PATH, "data/gold_y.tns"), np.float64)
+  print(f"Compare result {np.allclose(actual, expected, rtol=0.01)}")
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py b/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py
new file mode 100644
index 0000000000000..021519028496c
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/test_simple_tensor_algebra.py
@@ -0,0 +1,30 @@
+# RUN: SUPPORTLIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext %PYTHON %s | FileCheck %s
+
+import os
+import sys
+
+_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(_SCRIPT_PATH)
+from tools import mlir_pytaco_api as pt
+
+compressed = pt.compressed
+dense = pt.dense
+
+# Ensure that we can run an unmodified PyTACO program with a simple tensor
+# algebra expression using tensor index notation, and produce the expected
+# result.
+i, j = pt.get_index_vars(2)
+A = pt.tensor([2, 3])
+B = pt.tensor([2, 3])
+C = pt.tensor([2, 3])
+D = pt.tensor([2, 3], dense)
+A.insert([0, 1], 10)
+A.insert([1, 2], 40)
+B.insert([0, 0], 20)
+B.insert([1, 2], 30)
+C.insert([0, 1], 5)
+C.insert([1, 2], 7)
+D[i, j] = A[i, j] + B[i, j] - C[i, j]
+
+# CHECK: [20. 5. 0. 0. 0. 63.]
+print(D.to_array().reshape(6))
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/lit.local.cfg b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/lit.local.cfg
new file mode 100644
index 0000000000000..650ca33613cc6
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/lit.local.cfg
@@ -0,0 +1,2 @@
+# Files in this directory are tools, not tests.
+config.unsupported = True
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
new file mode 100644
index 0000000000000..f64d34037eabd
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco.py
@@ -0,0 +1,1768 @@
+#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+#  See https://llvm.org/LICENSE.txt for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""Experimental MLIR-PyTACO with sparse tensor support.
+
+See http://tensor-compiler.org/ for TACO tensor compiler.
+
+This module implements the Python classes for PyTACO index notation. These
+include classes for data types, tensor dimension formats (aka mode formats),
+tensor dimension orderings (aka mode ordering), tensor storage formats, and
+tensors.
+
+The PyTACO API doesn't follow the naming conversion required by the style guide
+for this module. As such, we first implement the supporting classes and routines
+following the style guide, and then define the type aliases and constants to
+support the PyTACO API in the pytaco_api module.
+"""
+
+from typing import Any, Callable, Dict, Iterable, List, Optional, Set, Tuple, Union
+
+import abc
+import ctypes
+import dataclasses
+import enum
+import numpy as np
+import functools
+import operator
+import os
+import threading
+
+# Import MLIR related modules.
+from mlir import all_passes_registration  # Register MLIR compiler passes.
+from mlir import execution_engine
+from mlir import ir
+from mlir import runtime
+from mlir.dialects import arith
+from mlir.dialects import builtin
+from mlir.dialects import linalg
+from mlir.dialects import std
+from mlir.dialects import sparse_tensor
+from mlir.dialects.linalg.opdsl import lang
+from mlir.passmanager import PassManager
+
+from . import mlir_pytaco_utils as utils
+
+# TACO naming prefixes.
+_TACO_INDEX_PREFIX = "i"
+_TACO_TENSOR_PREFIX = "A"
+
+# Bitwidths for pointers and indices.
+_POINTER_BIT_WIDTH = 0
+_INDEX_BIT_WIDTH = 0
+# The name for the environment variable that provides the full path for the
+# supporting library.
+_SUPPORTLIB_ENV_VAR = "SUPPORTLIB"
+# The default supporting library if the environment variable is not provided.
+_DEFAULT_SUPPORTLIB = "libmlir_c_runner_utils.so"
+# The JIT compiler optimization level.
+_OPT_LEVEL = 2
+# The entry point to the JIT compiled program.
+_ENTRY_NAME = "main"
+
+# Type aliases for type annotation.
+_BinaryOp = Callable[[Any, Any], Any]
+_ExprVisitor = Callable[..., None]
+_ExprInfoDict = Dict["IndexExpr", "_ExprInfo"]
+_LogicalOp = Callable[[bool, bool], bool]
+_ModeFormatOp = Callable[["ModeFormat", "ModeFormat"], "ModeFormat"]
+_SubtreeLeafChecker = Optional[Callable[..., bool]]
+
+
+class Type(enum.Enum):
+  """The data types supported by TACO.
+
+  We use numpy data types to implement the enum data types.
+  """
+  INT16 = np.int16
+  INT32 = np.int32
+  INT64 = np.int64
+  # numpy _ctype_from_dtype_scalar can't handle np.float16 yet.
+  FLOAT32 = np.float32
+  FLOAT64 = np.float64
+
+
+# All floating point type enums.
+_FLOAT_TYPES = (Type.FLOAT32, Type.FLOAT64)
+# All integral type enums.
+_INT_TYPES = (Type.INT16, Type.INT32, Type.INT64)
+# Type alias for any numpy type used to implement the runtime support for the
+# enum data types.
+_AnyRuntimeType = Union[np.int16, np.int32, np.int64, np.float32, np.float64]
+
+
+@dataclasses.dataclass(frozen=True)
+class DType:
+  """The data type class.
+
+  We support the TACO API dtype class with an alias of this class.
+
+  The following methods are defined by the TACO API:
+    is_float: Returns whether the data type represents a floating point value.
+    is_int:   Returns whether the data type represents an integral value.
+
+  Attributes:
+    kind: A Type enum representing the data type.
+    value: The numpy data type for the TACO data type.
+  """
+  kind: Type = Type.FLOAT64
+
+  def is_float(self) -> bool:
+    """Returns whether the data type represents a floating point value."""
+    return self.kind in _FLOAT_TYPES
+
+  def is_int(self) -> bool:
+    """Returns whether the data type represents an integral value."""
+    return self.kind in _INT_TYPES
+
+  @property
+  def value(self) -> _AnyRuntimeType:
+    """Returns the numpy dtype for the data type."""
+    return self.kind.value
+
+
+def _mlir_type_from_taco_type(dtype: DType) -> ir.Type:
+  """Returns the MLIR type corresponding to the given TACO type."""
+  dtype_to_irtype = {
+      Type.INT16: ir.IntegerType.get_signless(16),
+      Type.INT32: ir.IntegerType.get_signless(32),
+      Type.INT64: ir.IntegerType.get_signless(64),
+      Type.FLOAT32: ir.F32Type.get(),
+      Type.FLOAT64: ir.F64Type.get()
+  }
+  return dtype_to_irtype[dtype.kind]
+
+
+def _compile_mlir(module: ir.Module) -> ir.Module:
+  """Compiles an MLIR module and returns the compiled module."""
+  # TODO: Replace this with a pipeline implemented for 
+  #   https://github.com/llvm/llvm-project/issues/51751.
+  pipeline = (
+      f"sparsification,"
+      f"sparse-tensor-conversion,"
+      f"builtin.func(linalg-bufferize,convert-linalg-to-loops,convert-vector-to-scf),"
+      f"convert-scf-to-std,"
+      f"func-bufferize,"
+      f"tensor-constant-bufferize,"
+      f"builtin.func(tensor-bufferize,std-bufferize,finalizing-bufferize),"
+      f"convert-vector-to-llvm{{reassociate-fp-reductions=1 enable-index-optimizations=1}},"
+      f"lower-affine,"
+      f"convert-memref-to-llvm,"
+      f"convert-std-to-llvm,"
+      f"reconcile-unrealized-casts")
+  PassManager.parse(pipeline).run(module)
+  return module
+
+
+@functools.lru_cache()
+def _get_support_lib_name() -> str:
+  """Returns the string for the supporting C shared library."""
+  return os.getenv(_SUPPORTLIB_ENV_VAR, _DEFAULT_SUPPORTLIB)
+
+
+def _ctype_pointer_from_array(array: np.ndarray) -> ctypes.pointer:
+  """Returns the ctype pointer for the given numpy array."""
+  return ctypes.pointer(
+      ctypes.pointer(runtime.get_ranked_memref_descriptor(array)))
+
+
+class ModeFormat(enum.Enum):
+  """The tensor dimension storage format class.
+
+  We support the TACO API mode_format class with an alias of this class.
+
+  In TACO, a tensor dimension is called a mode and the storage format for a
+  tensor dimension is called a mode format.
+  """
+  DENSE = sparse_tensor.DimLevelType.dense
+  COMPRESSED = sparse_tensor.DimLevelType.compressed
+
+
+def _mode_format_operation(a: ModeFormat, b: ModeFormat,
+                           op: _LogicalOp) -> ModeFormat:
+  """Implements the given operator on ModeFormat."""
+  return (ModeFormat.COMPRESSED
+          if op(a == ModeFormat.COMPRESSED, b == ModeFormat.COMPRESSED) else
+          ModeFormat.DENSE)
+
+
+def _mode_format_estimator(op: _BinaryOp) -> _ModeFormatOp:
+  """Produces a ModeFormat operator for the given binary operator.
+
+  The ModeFormat operator is used as a heuristic to derive the destination
+  dimension sparsity from the source dimension sparsity. In particular, if the
+  binary operator produces a disjunction of the zero values from its source
+  operands, such as the MUL operator, we return a ModeFormat operator that
+  uses operator.or_. That is, we estimate that a dimension for the MUL
+  operation result to be sparse if either of its source operands is sparse.
+
+  On the other hand, if the binary operator produces a conjunction of the
+  zero values from its source operands, such as the ADD operator, we return
+  a ModeFormat operator that uses operator.and_. In this case, we estimate
+  that a dimension for the ADD operation result to be sparse if both of its
+  source operands are sparse.
+
+  Args:
+    op: A _BinaryOp object representing a supporting operator on tensors.
+
+  Returns:
+    A ModeFormatOp for estimating the destination dimension sparsity from
+    the source dimension sparsity.
+  """
+  conjunction = functools.partial(_mode_format_operation, op=operator.and_)
+  disjunction = functools.partial(_mode_format_operation, op=operator.or_)
+  return conjunction if op(0, 1) != 0 else disjunction
+
+
+def _all_instance_of(collection: Iterable, cls: Any) -> bool:
+  """Returns true if all elements of the iterable is an instance of cls."""
+  return all(isinstance(e, cls) for e in collection)
+
+
+def _identity_ordering(rank: int) -> List[int]:
+  """Returns the identity ordering for tensor of given rank."""
+  return list(range(rank))
+
+
+@dataclasses.dataclass(frozen=True)
+class ModeOrdering:
+  """The tensor dimension ordering class.
+
+  We support the TACO API mode_ordering class with an alias of this class.
+
+  Attributes:
+    ordering: A list of integers representing the ordering of the tensor
+      dimensions.
+  """
+  ordering: List[int]
+
+  def __post_init__(self) -> None:
+    """Verifies the value in ordering.
+
+    Raises:
+       ValueError: If ordering is not a list of integers.
+    """
+    if (not isinstance(self.ordering, list) or
+        not _all_instance_of(self.ordering, int)):
+      raise ValueError("Ordering must be a list of integers: "
+                       f"{self.ordering}")
+    # Check that ordering is a permutation of the dimension numbers.
+    if sorted(self.ordering) != _identity_ordering(self.rank()):
+      raise ValueError(f"Invalid ordering: {self.ordering} != "
+                       f"permutation{_identity_ordering(self.rank())}.")
+
+  def rank(self) -> int:
+    """Returns the number of dimensions represented by the ordering."""
+    return len(self.ordering)
+
+
+@dataclasses.dataclass(frozen=True)
+class ModeFormatPack:
+  """The tensor dimension format class.
+
+  We support the TACO API mode_format_pack class with an alias of this class.
+
+  The storage format of a tensor contains one mode_format for each tensor
+  dimension.
+
+  Attributes:
+    formats: A list of ModeFormat representing the storage format for each of
+      the tensor dimension.
+  """
+  formats: List[ModeFormat]
+
+  def __post_init__(self) -> None:
+    """Verifies the value in formats.
+
+    Raises:
+       ValueError: If formats is not a list of ModeFormats.
+    """
+    if (not isinstance(self.formats, list) or
+        not _all_instance_of(self.formats, ModeFormat)):
+      raise ValueError("Formats must be a list of ModeFormat: "
+                       f"{self.formats}")
+
+  def rank(self) -> int:
+    """Returns the number of dimensions represented by the format pack."""
+    return len(self.formats)
+
+
+@dataclasses.dataclass
+class Format:
+  """The tensor format class defined by the TACO API.
+
+  Attributes:
+    format_pack: A ModeFormatPack representing the storage format for the tensor
+      dimensions.
+    ordering: A ModeOrdering representing the tensor dimension ordering in the
+      storage.
+  """
+  format_pack: ModeFormatPack
+  ordering: Optional[ModeOrdering] = None
+
+  def __post_init__(self) -> None:
+    """Verifies and fixes up the values in format_pack and ordering.
+
+    Verifies and fixes up the values in format_pack and ordering to supports the
+    initializer syntax defined by the TACO API. If format_pack is a list of
+    ModeFormat, replaces it with ModeFormatPack constructed from the list. If
+    ordering is not provided, set ordering to the natural ordering for the rank
+    corresponding to format_pack.
+
+    Raises:
+       ValueError: If format_pack is not an instance of ModeFormatPack or if
+         ordering is not an instance of ModeOrdering.
+    """
+    if isinstance(self.format_pack, list):
+      if not _all_instance_of(self.format_pack, ModeFormat):
+        raise ValueError(f"Expected a list of ModeFormat: {self.format_pack}")
+      self.format_pack = ModeFormatPack(self.format_pack)
+    if not isinstance(self.format_pack, ModeFormatPack):
+      raise ValueError(f"Expected ModeFormatpack: {self.format_pack}")
+
+    if self.ordering is None:
+      self.ordering = ModeOrdering(list(range(self.rank())))
+    if not isinstance(self.ordering, ModeOrdering):
+      raise ValueError(f"Expected ModeOrdering: {self.ordering}")
+
+    if self.format_pack.rank() != self.ordering.rank():
+      raise ValueError("Inconsistent ModeFormatPack and ModeOrdering: "
+                       f"len({self.format_pack}) != "
+                       f"len({self.ordering})")
+
+  def is_dense(self) -> bool:
+    """Returns true if all the Tensor dimensions have a dense format."""
+    return all([f == ModeFormat.DENSE for f in self.format_pack.formats])
+
+  def rank(self) -> int:
+    """Returns the number of dimensions represented by the format."""
+    return self.format_pack.rank()
+
+  def mlir_tensor_attr(self) -> Optional[sparse_tensor.EncodingAttr]:
+    """Constructs the MLIR attributes for the tensor format."""
+    if self.is_dense():
+      return None
+
+    order = (
+        range(self.rank()) if
+        (self.ordering is None) else self.ordering.ordering)
+    mlir_storage_format = [f.value for f in self.format_pack.formats]
+    return sparse_tensor.EncodingAttr.get(mlir_storage_format,
+                                          ir.AffineMap.get_permutation(order),
+                                          _POINTER_BIT_WIDTH, _INDEX_BIT_WIDTH)
+
+
+def _make_format(formats: List[ModeFormat],
+                 ordering: Optional[List[int]] = None) -> Format:
+  """Constructs a format from a list of ModeFormat and an optional ordering.
+
+  Args:
+    formats: A list of ModeFormat, one for each dimension of a tensor.
+    ordering: An optional list of integer, for the ordering of the tensor
+      dimensions. When an ordering is not given, the identity ordering is used.
+
+  Returns:
+    A tensor format object.
+
+  Raises:
+    ValueError: If formats is not a list of ModeFormat or the length of formats
+      is not consistent with the len of ordering.
+  """
+  ordering = ordering or _identity_ordering(len(formats))
+  return Format(ModeFormatPack(formats), ModeOrdering(ordering))
+
+
+class _AtomicCounter:
+  """An atomic counter."""
+
+  def __init__(self):
+    self._counter = 0
+    self._counter_lock = threading.Lock()
+
+  def increment(self) -> int:
+    """Increments the counter by one and returns the old value."""
+    old_value = self._counter
+    with self._counter_lock:
+      self._counter = self._counter + 1
+    return old_value
+
+
+class IndexVar:
+  """The tensor index class.
+
+  We support the TACO API index_var class with an alias of this class.
+
+  An IndexVar object represents an index variable in tensor index notation.
+
+  Attributes:
+    name: A unique string name of the IndexVar.
+  """
+  _counter = _AtomicCounter()
+
+  def __init__(self):
+    id = self._counter.increment()
+    self._name = f"{_TACO_INDEX_PREFIX}{id}"
+
+  def __repr__(self) -> str:
+    return f"IndexVar(name={repr(self._name)})"
+
+  @property
+  def name(self) -> str:
+    """Returns the name of the IndexVar."""
+    return self._name
+
+
+def get_index_vars(n: int) -> List[IndexVar]:
+  """Returns a list of n IndexVar.
+
+  This routine is defined by the TACO API.
+
+  Args:
+    n: An interger representing the number of IndexVar to get.
+
+  Returns:
+    A list of IndexVar.
+
+  Raises:
+    ValueError: if n is not a positive integer.
+  """
+  if not isinstance(n, int) or n <= 0:
+    raise ValueError(f"Expected an integer: {n}.")
+  # If lock contention ever becomes an issue, we could implement a bulk getter
+  # that returns a range by only claiming the lock once.
+  return [IndexVar() for i in range(n)]
+
+
+def _mlir_symbols_from_index_vars(
+    index_vars: Tuple[IndexVar, ...]) -> Tuple[lang.SymbolDef, ...]:
+  """Returns a tuple of MLIR symbols for the given tuple of index_var."""
+  return tuple(getattr(lang.S, i.name) for i in index_vars)
+
+
+def _mlir_dimensions_from_index_vars(
+    index_vars: Tuple[IndexVar, ...]) -> Tuple[lang.DimDef, ...]:
+  """Returns a tuple of MLIR dimensions for the given tuple of index_var."""
+  return tuple(getattr(lang.D, i.name) for i in index_vars)
+
+
+def _mlir_tensor_type(
+    dtype: DType, shape: Tuple[int, ...],
+    attr: Optional[sparse_tensor.EncodingAttr]) -> ir.RankedTensorType:
+  """Returns an MLIR tensor type.
+
+  Args:
+    dtype: An DType object for the element data type of the tensor.
+    shape: A tuple of integer for the shape of the tensor.
+    attr: An optional MLIR sparse tensor attribute, only provided if the tensor
+      is a sparse tensor.
+
+  Returns:
+    An MLIR ranked tensor type.
+  """
+  ir_type = _mlir_type_from_taco_type(dtype)
+  return ir.RankedTensorType.get(shape, ir_type, attr)
+
+
+def _verify_and_normalize_indices(indices) -> Tuple[IndexVar, ...]:
+  """Verifies and normalizes the indices for a tensor access.
+
+  Args:
+    indices: The index expression used to access a tensor, which could be any
+      Python object from user inputs.
+
+  Returns:
+    A tuple of IndexVar.
+
+  Raises:
+    ValueError: If indices is not an IndexVar or a tuple of IndexVar.
+  """
+  if isinstance(indices, IndexVar):
+    return (indices,)
+  elif isinstance(indices, tuple) and _all_instance_of(indices, IndexVar):
+    return indices
+
+  raise ValueError(f"Expected IndexVars: {indices}")
+
+
+@dataclasses.dataclass(frozen=True)
+class _StructOpInfo:
+  """Information for generating a structured op in the linalg dialect.
+
+  This information is associated with an expression node that serves as the
+  root for an expression subtree implemented with a structured op.
+
+  Attributes:
+    dst_indices: A tuple of IndexVar, representing the result dimensions of the
+      structured op. This is used to construct the temporary variable for the
+      tensor to hold the structured op result.
+    dst_dims: A tuple of int, representing the result shape of the structured
+      op.
+    dst_dtype: A DType representing the data type of the structured op result.
+    dst_name: A string representing the name of the structured op result.
+    dst_format: A Format object representing the destination tensor format.
+  """
+  dst_indices: Tuple[IndexVar, ...]
+  dst_dims: Tuple[int, ...]
+  dst_dtype: DType
+  dst_name: str
+  dst_format: Format
+
+  def __post_init__(self) -> None:
+    """Verifies the integrity of the attribute values."""
+    assert len(self.dst_indices) == len(self.dst_dims)
+    assert self.dst_format is not None
+
+  def emit_tensor_init(self) -> ir.RankedTensorType:
+    """Returns an initialization for the destination tensor."""
+    if self.dst_format.is_dense():
+      # Initialize the dense tensor.
+      ir_type = _mlir_type_from_taco_type(self.dst_dtype)
+      tensor = linalg.InitTensorOp(self.dst_dims, ir_type).result
+      zero = arith.ConstantOp(ir_type, 0.0)
+      return linalg.FillOp(output=tensor, value=zero).results[0]
+
+    # Initialize the sparse tensor.
+    mlir_type = _mlir_tensor_type(self.dst_dtype, self.dst_dims,
+                                  self.dst_format.mlir_tensor_attr())
+    index_type = ir.IndexType.get()
+    dims = [arith.ConstantOp(index_type, d).result for d in mlir_type.shape]
+    return sparse_tensor.InitOp(mlir_type, dims)
+
+
+class _Stats:
+  """Information to describe how a tensor expression is implemented.
+
+  Currently, we only record the temporary tensors introduced for splitting the
+  original expression.
+  """
+
+  def __init__(self):
+    self._temps = []
+
+  def __repr__(self) -> str:
+    return f"_Stats({repr(self._temps)})"
+
+  def add_element(self, structop: _StructOpInfo):
+    """Adds a temporary tensor."""
+    self._temps.append(structop)
+
+  def get_total(self) -> int:
+    """Gets the total number of temporary tensors."""
+    return len(self._temps)
+
+  def _get_element(self, idx: int) -> _StructOpInfo:
+    """Gets the ith temporary tensor."""
+    assert idx < self.get_total()
+    return self._temps[idx]
+
+  def get_dimensions(self, idx: int) -> Tuple[int]:
+    """Gets the dimensions for the ith temporary tensor."""
+    return self._get_element(idx).dst_dims
+
+  def get_formats(self, idx: int) -> Tuple[ModeFormat]:
+    """Gets the ModeFormats for the ith temporary tensor."""
+    return tuple(self._get_element(idx).dst_format.format_pack.formats)
+
+
+class Tensor:
+  """The tensor class.
+
+  We support the TACO API tensor class with an alias of this class.
+
+  This class is part of the TACO API with the following methods:
+    insert: Inserts a value to the given coordinate in the tensor.
+    to_array: Returns a numpy ndarray for the tensor.
+
+  TACO API also defines the following arrtibutes for the class:
+    dtype: A dtype object representing the data type of the tensor.
+    format: A format object representing the storage format of the tensor.
+    name: A string object representing the name of the tensor.
+    order: An integral rank of the tensor.
+    shape: A list of integers representing the shape of the tensor.
+
+  We currently ignore the tensor dimension ordering for dense tensor.
+  """
+  _counter = _AtomicCounter()
+
+  def _get_unique_name(self) -> str:
+    """Returns a unique name for creating a new Tensor."""
+    return f"{_TACO_TENSOR_PREFIX}{self._counter.increment()}"
+
+  def _init_format(self, fmt: Union[ModeFormat, List[ModeFormat],
+                                    Format]) -> None:
+    """Process the fmt argument for the Tensor constructor.
+
+    Args:
+      fmt: This argument can be a ModeFormat, List[ModeFormat], or format. If
+        this argument is a ModeFormat, uses this ModeFormat for all the tensor
+        dimensions. If this argument is a list of ModeFormat, the len of the
+        list should equal to the rank of the tensor. If this argument is a
+        format, uses it for the format of the tensor.
+
+    Raises:
+      ValueError: If fmt is not one of the expected type or is inconsistent
+        with the rank of the tensor. This is because fmt could be an users
+        input.
+    """
+    if isinstance(fmt, ModeFormat):
+      self._format = _make_format([fmt] * self.order)
+    elif isinstance(fmt, list):
+      if len(fmt) == self.order and isinstance(fmt[0], ModeFormat):
+        self._format = _make_format(fmt)
+      else:
+        raise ValueError("Inconsistent shape and format: "
+                         f"{self._shape}, {fmt}.")
+    elif isinstance(fmt, Format):
+      if fmt.rank() != self.order:
+        raise ValueError("Inconsistent shape and format: "
+                         f"{self._shape}, {fmt}.")
+      else:
+        self._format = fmt
+    else:
+      raise ValueError(f"Invalid format argument: {fmt}.")
+
+  def __init__(self,
+               value_or_shape: Optional[Union[List[int], Tuple[int, ...], float,
+                                              int]] = None,
+               fmt: Optional[Union[ModeFormat, List[ModeFormat],
+                                   Format]] = None,
+               dtype: Optional[DType] = None,
+               name: Optional[str] = None):
+    """The tensor constructor interface defined by TACO API.
+
+    Args:
+      value_or_shape: This argument is optional and can be int, float,
+        List[int], or Tuple[int, ...]. If this argument is an int or float,
+        creates a scalar tensor and initializes it with the value. If this
+        argument is a list or tuple of int, uses it as the shape to create a
+        tensor.
+      fmt: This argument can be a ModeFormat, List[ModeFormat], or format. If
+        this argument is a ModeFormat, uses this ModeFormat for all the tensor
+        dimensions. If this argument is a list of ModeFormat, the len of the
+        list should equal to the rank of the tensor. If this argument is a
+        format, uses it for the format of the tensor.
+      dtype: An object of dtype, representing the data type of the tensor.
+      name: A string name of the tensor. If a name is not given, creates a
+        unique name for the tensor.
+
+    Raises:
+      ValueError: If there is any inconsistency among the input arguments.
+    """
+    # Take care of the argument default values.
+    fmt = fmt or ModeFormat.COMPRESSED
+    dtype = dtype or DType(Type.FLOAT64)
+    self._name = name or self._get_unique_name()
+
+    self._dtype = dtype
+    # We currently use _coords and _values to host the sparse tensor value with
+    # COO format, and _dense_storage to host the dense tensor value. We haven't
+    # implement the conversion between the two storages yet. This will be
+    # improved in a follow up CL.
+    self._coords = []
+    self._values = []
+    self._dense_storage = None
+    self._stats = _Stats()
+    if value_or_shape is None or isinstance(value_or_shape, int) or isinstance(
+        value_or_shape, float):
+      # Create a scalar tensor and ignore the fmt parameter.
+      self._shape = []
+      self._format = _make_format([], [])
+      if value_or_shape is not None:
+        self._dense_storage = np.array(value_or_shape, dtype=self._dtype.value)
+    elif (isinstance(value_or_shape, tuple) or isinstance(
+        value_or_shape, list)) and _all_instance_of(value_or_shape, int):
+      # Create a tensor with the specified shape and format.
+      self._shape = list(value_or_shape)
+      self._init_format(fmt)
+    else:
+      raise ValueError("Invalid first argument. "
+                       "Must be a tuple or list for a shape or a single value"
+                       f"if initializing a scalar tensor: {value_or_shape}.")
+
+  def __repr__(self) -> str:
+    value_str = (f"{repr(self._dense_storage)})" if self.is_dense() else
+                 f"{repr(self._coords)} {repr(self._values)})")
+    return (f"Tensor(_name={repr(self._name)} "
+            f"_dtype={repr(self._dtype)} : ") + value_str
+
+  def insert(self, coords: List[int], val: Union[float, int]) -> None:
+    """Inserts a value to the given coordinate.
+
+    Args:
+      coords: A list of integer coordinates. The length of the list must be the
+        same as the rank of the tensor.
+      val: A value being inserted. It is either an integral or a floating point
+        value. This value will be converted to the data type of the tensor.
+
+    Raises:
+      ValueError: When there is any problem in the parameters.
+    """
+    if not isinstance(coords, list):
+      raise ValueError(f"Non list coordinate detected: {coords}.")
+    if not _all_instance_of(coords, int):
+      raise ValueError(f"Non integer coordinate detected: {coords}.")
+    if (len(coords) != self.order or
+        any([c < 0 or c >= self._shape[i] for i, c in enumerate(coords)])):
+      raise ValueError("Invalid coordinate for rank: "
+                       f"{self.order}, {coords}.")
+
+    if not isinstance(val, int) and not isinstance(val, float):
+      raise ValueError(f"Value is neither int nor float: {val}.")
+
+    self._coords.append(tuple(coords))
+    self._values.append(self._dtype.value(val))
+
+  def is_dense(self) -> bool:
+    """Returns true if all the Tensor dimensions have a dense format."""
+    return self._format.is_dense()
+
+  def to_array(self) -> np.ndarray:
+    """Returns the numpy array for the Tensor.
+
+    This is currenly only implemented for dense Tensor.
+    """
+    if not self.is_dense():
+      raise ValueError("Conversion from non-dense Tensor "
+                       "to numpy array not supported yet.")
+    return self._dense_storage
+
+  @staticmethod
+  def from_array(array: np.ndarray) -> "Tensor":
+    """Returns a dense tensor with the value copied from the input array.
+
+    We currently only support the conversion of float64 numpy arrays to Tensor.
+
+    Args:
+      array: The numpy array that provides the data type, shape and value for
+        the tensor.
+
+    Returns:
+      A Tensor object.
+
+    Raises:
+      ValueError if the data type of the numpy array is not float64.
+    """
+    if array.dtype != np.float64:
+      raise ValueError(f"Expected float64 value type: {array.dtype}.")
+    tensor = Tensor(array.shape, ModeFormat.DENSE)
+    tensor._dense_storage = np.copy(array)
+    return tensor
+
+  @staticmethod
+  def from_coo(
+      coordinates: List[Tuple[int, ...]],
+      values: List[_AnyRuntimeType],
+      fmt: Format,
+      dtype: DType,
+  ) -> "Tensor":
+    """Converts coordinates and values to a sparse tensor representation.
+
+    Args:
+      coordinates: A list of coordinates with non-zero values.
+      values: The non-zero values.
+      fmt: The tensor storage format.
+      dtype: The tensor element data type.
+
+    Returns:
+      A tensor with the given non-zero values and storage format. The shape of
+      the tensor has the minimum size for each dimension to make the given
+      coordinates valid.
+    """
+    assert (isinstance(coordinates, List) and
+            _all_instance_of(coordinates, Tuple))
+    assert (isinstance(values, List) and _all_instance_of(values, dtype.value))
+    assert isinstance(fmt, Format)
+
+    rank = fmt.rank()
+    assert all(len(c) == rank and _all_instance_of(c, int) for c in coordinates)
+
+    # Find the maximum coordinate value for each dimension.
+    max_coordinate = list(map(max, zip(*coordinates)))
+    # The size of each dimension is one more that such a maximum coordinate
+    # value.
+    shape = [c + 1 for c in max_coordinate]
+    tensor = Tensor(shape, fmt)
+    tensor._coords = coordinates
+    tensor._values = values
+
+    return tensor
+
+  @property
+  def dtype(self) -> DType:
+    """Returns the data type for the Tensor."""
+    return self._dtype
+
+  @property
+  def format(self) -> Format:
+    """Returns the storage format for the Tensor."""
+    return self._format
+
+  @property
+  def name(self) -> str:
+    """Returns the name for the Tensor."""
+    return self._name
+
+  @property
+  def order(self) -> int:
+    """Returns the rank of the Tensor."""
+    return len(self._shape)
+
+  @property
+  def shape(self) -> List[int]:
+    """Returns the shape of the Tensor."""
+    return self._shape
+
+  def __getitem__(self, key) -> "Access":
+    """Verifies and processes a tensor access.
+
+    In the tensor index notation, a tensor access T[i, j] is represented as
+    retrieving a value with key (i, j) from the tensor object T in Python. This
+    routine verifies the key for the tensor access and returns a tensor access
+    object.
+
+    Args:
+      key: The key used to access the tensor, which could be any Python object
+        from user inputs.
+
+    Returns:
+      The corresponding tensor access object.
+
+    Raises:
+      ValueError: If key is not an IndexVar or a tuple of IndexVar.
+    """
+    indices = _verify_and_normalize_indices(key)
+    return Access(self, indices)
+
+  def __setitem__(self, key, value) -> None:
+    """Verifies and processes a tensor assignment.
+
+    In the tensor index notation, a tensor assignment "T[i, j] = ..." is
+    represented as setting a value for a tensor object T via key (i, j) in
+    Python. This routine verifies the key, evaluates the value, and assigns the
+    value to the tensor.
+
+    We only support assignment of dense tensor currently.
+
+    Args:
+      key: The key used to access the tensor, which could be any Python object
+        from user inputs.
+      value: The value assigned to the tensor, which could be any Python object
+        from user inputs.
+
+    Raises:
+      ValueError: If tensor is not a dense tensor, or the key is not an IndexVar
+        or a tuple of IndexVar, or the length of the indices is not the same as
+        the rank of the tensor.
+    """
+    indices = _verify_and_normalize_indices(key)
+    if len(indices) != self.order:
+      raise ValueError("Mismatch between indices and tensor rank: "
+                       f"len({indices}) != {self.order}.")
+
+    result = value.evaluate(self, indices)
+    if self.is_dense():
+      assert isinstance(result, np.ndarray)
+      self._dense_storage = result
+    else:
+      assert _all_instance_of(result, np.ndarray) and len(result) == 2
+      assert (result[0].ndim, result[1].ndim) == (1, 2)
+      (self._values, self._coords) = result
+
+  def mlir_tensor_type(self) -> ir.RankedTensorType:
+    """Returns the MLIR type for the tensor."""
+    return _mlir_tensor_type(self._dtype, tuple(self._shape),
+                             self._format.mlir_tensor_attr())
+
+  def dense_dst_ctype_pointer(self) -> ctypes.pointer:
+    """Returns the ctypes pointer for the pointer to an MemRefDescriptor.
+
+    For a dense tensor output, the MLIR compiler allocates the storage for
+    the tensor. This routine returns the pointer to an MLIR MemRefDescriptor for
+    receiving the tensor.
+    """
+    assert self.is_dense()
+    mem_ref_desc = runtime.make_nd_memref_descriptor(
+        self.order, np.ctypeslib.as_ctypes_type(self.dtype.value))()
+    return ctypes.pointer(ctypes.pointer(mem_ref_desc))
+
+  def ctype_pointer(self) -> ctypes.pointer:
+    """Returns the ctypes pointer for the pointer to the input tensor."""
+    if self.is_dense():
+      if self._dense_storage is None:
+        self._dense_storage = np.zeros(self._shape, self._dtype.value)
+      return _ctype_pointer_from_array(self._dense_storage)
+
+    shape = np.array(self._shape, np.int64)
+    indices = np.array(self._coords, np.int64)
+    values = np.array(self._values, self._dtype.value)
+    ptr = utils.coo_tensor_to_sparse_tensor(_get_support_lib_name(), shape,
+                                            values, indices)
+    return ctypes.pointer(ctypes.cast(ptr, ctypes.c_void_p))
+
+  def get_coordinates_and_values(
+      self) -> Tuple[List[Tuple[int, ...]], List[_AnyRuntimeType]]:
+    """Returns the coordinates and values for the non-zero elements."""
+    if not self.is_dense():
+      return (self._coords, self._values)
+
+    # Coordinates for non-zero elements, grouped by dimensions.
+    coords_by_dims = self._dense_storage.nonzero()
+    # Coordinates for non-zero elements, grouped by elements.
+    coords = np.transpose(coords_by_dims)
+    values = self._dense_storage[coords_by_dims]
+    return (coords, values)
+
+  def _record_stats(self, structop: "_StructOpInfo"):
+    """Collects information for temporary tensors."""
+    # Exclude user specified destination tensors.
+    if structop.dst_name == self.name:
+      return
+
+    self._stats.add_element(structop)
+
+
+def _emit_operand(op_def: lang.LinalgOpDef, indices: Tuple[IndexVar, ...],
+                  name: str, kind: lang.OperandKind) -> lang.OperandDef:
+  """Emits an operand for a tensor access in the current linalg operation.
+
+  Args:
+    op_def: A LinalgOpDef representing the current linalg dialect operation.
+    indices: A tuple of IndexVar used to access the tensor.
+    name: A unique string name of the tensor.
+    kind: An OperandKind for the operand.
+
+  Returns:
+    An OperandDef representing the operand.
+  """
+  dim_sym = _mlir_symbols_from_index_vars(indices)
+  opnd = lang.OperandDef(kind, lang.T, dim_sym)
+  op_def.add_operand(name, opnd)
+  return opnd
+
+
+@dataclasses.dataclass(frozen=True)
+class _DimInfo:
+  """Information for an operand dimension.
+
+  Attributes:
+    dim: An integer for the size of the dimension.
+    mode_format: A ModeFormat for the dimension sparsity.
+  """
+  dim: int
+  mode_format: ModeFormat
+
+
+@dataclasses.dataclass()
+class _ExprInfo:
+  """Expression information for validation and code generation.
+
+  Attributes:
+    src_indices: A tuple of IndexVar for the indices used by the tensors in the
+      expression tree.
+    dim_infos: A tuple of _DimInfo, representing the dimension information
+      corresponding to the src_indices.
+    reduce_indices: A set of IndexVar for the indices reduced by the expression.
+    acc_reduce_indices: An accumulated set of IndexVar for the indices reduced
+      by the expression and its children.
+    structop_info: Information to support the code generation for a structured
+      op in the linalg dialect, if the corresponding expression node is the root
+      of a subtree for a structured op.
+    mlir_value: The MLIR value generated for the structured op.
+  """
+  src_indices: Tuple[IndexVar, ...]
+  dim_infos: Tuple[_DimInfo, ...]
+  reduce_indices: Optional[Set[IndexVar]] = None
+  acc_reduce_indices: Optional[Set[IndexVar]] = None
+  structop_info: Optional[_StructOpInfo] = None
+  mlir_value: Optional[ir.Value] = None
+
+  def __post_init__(self) -> None:
+    """Verifies and fix up attribute values.
+
+    Verifies the consistency of the attributes and modifies the default values
+    to support convenient initializer syntax.
+    """
+    assert len(self.src_indices) == len(self.dim_infos)
+    self.reduce_indices = self.reduce_indices or set()
+    self.acc_reduce_indices = self.acc_reduce_indices or set()
+
+
+class IndexExpr(abc.ABC):
+  """The index notation base class.
+
+  We support the TACO API index_expression class with an alias of this class.
+  """
+
+  def _verify_operand_and_build_expr(self, rhs, op: _BinaryOp) -> "_BinaryExpr":
+    """Verifies the RHS operand and returns a binary expression.
+
+    Args:
+      rhs: The RHS of the binary operation, which could be any Python object
+        from user inputs.
+      op: A _BinaryOp object representing the binary operator.
+
+    Raises:
+      ValueError: If rhs is not an IndexExpr.
+    """
+    if not isinstance(rhs, IndexExpr):
+      raise ValueError(f"Expected IndexExpr: {rhs}")
+    return _BinaryExpr(op, self, rhs)
+
+  def __add__(self, rhs) -> "_BinaryExpr":
+    """Defines the operator +.
+
+    Args:
+      rhs: The value being added, which could be any Python object from user
+        inputs.
+
+    Returns:
+      A _BinaryExpr object representing the operation.
+
+    Raises:
+      ValueError: If rhs is not an IndexExpr.
+    """
+    return self._verify_operand_and_build_expr(rhs, operator.add)
+
+  def __mul__(self, rhs) -> "_BinaryExpr":
+    """Defines the operator *.
+
+    Args:
+      rhs: The value being multiplied, which could be any Python object from
+        user inputs.
+
+    Returns:
+      A _BinaryExpr object representing the operation.
+
+    Raises:
+      ValueError: If rhs is not an IndexExpr.
+    """
+    return self._verify_operand_and_build_expr(rhs, operator.mul)
+
+  def __sub__(self, rhs) -> "_BinaryExpr":
+    """Defines the operator -.
+
+    Args:
+      rhs: The value being subtracted, which could be any Python object from
+        user inputs.
+
+    Returns:
+      A _BinaryExpr object representing the operation.
+
+    Raises:
+      ValueError: If rhs is not an IndexExpr.
+    """
+    return self._verify_operand_and_build_expr(rhs, operator.sub)
+
+  @abc.abstractmethod
+  def _visit(self,
+             func: _ExprVisitor,
+             args,
+             *,
+             leaf_checker: _SubtreeLeafChecker = None) -> None:
+    """A post-order visitor.
+
+    Args:
+      func: A callable applied to each node in the expression tree.
+      args: The variable-length arguments passed to the callable. These
+        arguments are grouped as an iterable and will be unpacked before passing
+        to the callable. This is to enable the keyword argument only syntax
+        after this argument.
+      leaf_checker: A callable object to identify nodes that should be treated
+        as leaf nodes to support partial tree visiting.
+    """
+    pass
+
+  @abc.abstractmethod
+  def _emit_expression(
+      self,
+      expr_to_opnd: Dict["IndexExpr", lang.OperandDef],
+      expr_to_info: _ExprInfoDict,
+  ) -> lang.ScalarExpression:
+    """Emits MLIR for the expression tree.
+
+    Args:
+      expr_to_opnd: A dictionary for looking up structured op input operands for
+        the input nodes of the structured op.
+      expr_to_info: A dictionary for looking up code generation information for
+        expressions.
+
+    Returns:
+      A linalg dialect ScalarExpression for the expression.
+    """
+    pass
+
+  @abc.abstractmethod
+  def dtype(self) -> DType:
+    """Returns the data type for the result of the expression."""
+    pass
+
+  def _emit_structured_op(self, expr_to_info: _ExprInfoDict) -> None:
+    """Emits a structured op in the linalg dialect for the expression tree.
+
+    We define a DefineOpcallable in the domain specific language for the linalg
+    dialect and execute the callable to generate the structured op. Self is the
+    root of the expression tree for the structured op.
+
+    Args:
+      expr_to_info: A dictionary for looking up code generation information for
+        expressions.
+    """
+    op_info = expr_to_info[self].structop_info
+    op_name = op_info.dst_name
+    op_def = lang.LinalgOpDef(name=op_name)
+    op_callable = lang.DefinedOpCallable(op_name, op_def)
+
+    # Collect the input expression nodes for the structured op.
+    expr_inputs = []
+    self._visit(
+        _gather_structured_op_input,
+        (self, expr_to_info, expr_inputs),
+        leaf_checker=_is_structured_op_leaf,
+    )
+
+    # Create a linalg structured op operand for each input expression node and
+    # build a dictionary for looking up the information.
+    expr_to_input_opnd = {
+        e: _emit_structured_op_input(e, expr_to_info, op_def)
+        for e in expr_inputs
+    }
+
+    # Emit the expression tree, which produces the value assigned to the
+    # destination tensor.
+    value = self._emit_expression(expr_to_input_opnd, expr_to_info)
+    # Emit the structured op representation for the destination tensor.
+    dst_opnd = _emit_operand(op_def, op_info.dst_indices, op_info.dst_name,
+                             lang.OperandKind.OutputTensor)
+    dst_dim_syms = _mlir_dimensions_from_index_vars(op_info.dst_indices)
+    dst_use = lang.TensorUse(dst_opnd, dst_dim_syms)
+
+    expr_info = expr_to_info[self]
+    # If the structured op reduces some indices, explicitly represent the
+    # reduction. This is done by generating a ReduceFn for the dimensions being
+    # reduced in the linalg dialect and calling the function with the value
+    # being reduced. We only support add reduction currently.
+    if expr_info.reduce_indices:
+      reduce_dims = _mlir_dimensions_from_index_vars(expr_info.reduce_indices)
+      value = lang.ReduceFn.add[reduce_dims](value)
+
+    # Emit the assignment as a comprehension in the linalg dialect.
+    comp = lang.Comprehension((dst_use, value))
+    op_def.comprehensions.append(comp)
+
+    # The structured op in the linalg dialect requires an explicit
+    # initialization for the destination tensor. Emit MLIR to initialize the
+    # destination tensor.
+    init = op_info.emit_tensor_init()
+
+    # Collect MLIR values for the linalg input operands, with the assumption
+    # that dictionary preserves the insertion order.
+    args = [
+        expr_to_info[expr].mlir_value
+        for expr, opnd in expr_to_input_opnd.items()
+    ]
+    # Execute the DefineOpcallable object for the linalg dialect operation to
+    # emit MLIR for the linalg structured op.
+    expr_info.mlir_value = op_callable(*args, outs=[init])
+
+  def _identify_structured_ops(
+      self,
+      expr_to_info: _ExprInfoDict,
+      dst: Tensor,
+      dst_indices: Tuple[IndexVar, ...],
+  ) -> List["IndexExpr"]:
+    """Returns expression nodes for the roots of the identified structured ops.
+
+    A structured op in the linalg dialect only supports reduction performed on
+    the whole expression. If the expression tree contains reduction that are
+    performed on part of the expression tree, the expression tree needs to be
+    implemented with multiple structured ops. This routine identifies all the
+    expression nodes that contain reduction as the root of structured ops in the
+    linalg dialect.
+
+    Args:
+      expr_to_info: A dictionary for looking up code generation information for
+        expressions.
+      dst: A destination Tensor that accepts the value of the expression tree.
+      dst_indices: The indices used by the destination index expression.
+
+    Returns:
+      An ordered list of IndexExpr for the root expressions of the structured
+      ops, where child expressions go before parent expressions that use their
+      results.
+    """
+    reduce_indices = tuple(
+        set(expr_to_info[self].src_indices) - set(dst_indices))
+    for reduce_index in reduce_indices:
+      _mark_structured_op_root(self, reduce_index, expr_to_info)
+
+    self._visit(_accumulate_reduce_indices, (expr_to_info,))
+    structop_roots = []
+    self._visit(_gather_structured_op, (expr_to_info, structop_roots))
+
+    # Handle the root of the top level expression.
+    if not structop_roots or structop_roots[-1] != self:
+      # The top level expression is not a reduction. Add the top level
+      # expression as a structured op root.
+      structop_roots.append(self)
+
+    # Use user specified information for the destination tensor to build an
+    # _StructOpInfo for the top level expression.
+    expr_to_info[self].structop_info = _StructOpInfo(dst_indices,
+                                                     tuple(dst.shape),
+                                                     self.dtype(), dst.name,
+                                                     dst.format)
+
+    return structop_roots
+
+  def _validate_and_collect_expr_info(
+      self,
+      dst: Tensor,
+      dst_indices: Tuple[IndexVar, ...],
+  ) -> _ExprInfoDict:
+    """Propagates expression information for validation.
+
+    Propagates the indices used by child expression nodes to parent expression
+    nodes. Also collects and validates the sizes for the dimensions
+    corresponding to the indices.
+
+    Args:
+      dst: A destination Tensor that accepts the value of the expression tree.
+      dst_indices: The indices used by the destination index expression.
+
+    Raises:
+      ValueError if there is any inconsistency in indices or dimensional
+      values.
+
+    Returns:
+      A dictionary of (IndexExpr, _ExprInfo).
+    """
+    expr_to_info = {}
+    # Validate the expression tree and construct expression information.
+    self._visit(_validate_and_collect_expr_info, (expr_to_info,))
+
+    # Validate the destination dimension information.
+    info = expr_to_info[self]
+    index_to_dim_info = {i: d for i, d in zip(info.src_indices, info.dim_infos)}
+    for i, d, in zip(dst_indices, dst.shape):
+      if i not in index_to_dim_info:
+        raise ValueError("Destination IndexVar not used in the "
+                         f"source expression: {i}")
+      else:
+        if d != index_to_dim_info[i].dim:
+          raise ValueError(f"Inconsistent destination dimension for {i}: "
+                           f"{d} vs {index_to_dim_info[i].dim}")
+
+    return expr_to_info
+
+  def _emit_assignment(
+      self,
+      module: ir.Module,
+      dst: Tensor,
+      dst_indices: Tuple[IndexVar, ...],
+      expr_to_info: _ExprInfoDict,
+      input_accesses: List["Access"],
+  ) -> None:
+    """Emits an MLIR function for assigning the expression to a tensor."""
+    input_types = [a.tensor.mlir_tensor_type() for a in input_accesses]
+
+    # Build the kernel for the operations.
+    with ir.InsertionPoint(module.body):
+
+      @builtin.FuncOp.from_py_func(*input_types, name=_ENTRY_NAME)
+      def linalg_funcop(*args):
+        # Set up the mapping from the Access nodes to their MLIR values.
+        for e, mlir in zip(input_accesses, args):
+          expr_to_info[e].mlir_value = mlir
+
+        # Emit structured ops in the linalg dialect to implement the assignment.
+        for structop_root in self._identify_structured_ops(
+            expr_to_info, dst, dst_indices):
+          structop_root._emit_structured_op(expr_to_info)
+          dst._record_stats(expr_to_info[structop_root].structop_info)
+
+        # The function returns the MLIR value of the root expression.
+        return expr_to_info[self].mlir_value
+
+      linalg_funcop.func_op.attributes[
+          "llvm.emit_c_interface"] = ir.UnitAttr.get()
+
+  def evaluate(
+      self,
+      dst: Tensor,
+      dst_indices: Tuple[IndexVar, ...],
+  ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
+    """Evaluates tensor assignment dst[dst_indices] = expression.
+
+    Args:
+      dst: The destination tensor.
+      dst_indices: The tuple of IndexVar used to access the destination tensor.
+
+    Returns:
+      The result of the dense tensor represented in numpy ndarray or the sparse
+      tensor represented by two numpy ndarray for its non-zero values and
+      indices.
+
+    Raises:
+      ValueError: If the expression is not proper or not supported.
+    """
+    expr_to_info = self._validate_and_collect_expr_info(dst, dst_indices)
+
+    # Compute a list of input accesses.
+    input_accesses = []
+    self._visit(_gather_input_accesses_index_vars, (input_accesses,))
+
+    support_lib = _get_support_lib_name()
+    # Build and compile the module to produce the execution engine.
+    with ir.Context(), ir.Location.unknown():
+      module = ir.Module.create()
+      self._emit_assignment(module, dst, dst_indices, expr_to_info,
+                            input_accesses)
+      compiled_module = _compile_mlir(module)
+
+      # We currently rely on an environment to pass in the full path of a
+      # supporting library for the execution engine.
+      engine = execution_engine.ExecutionEngine(
+          compiled_module, opt_level=_OPT_LEVEL, shared_libs=[support_lib])
+
+    # Gather the pointers for the input buffers.
+    input_pointers = [a.tensor.ctype_pointer() for a in input_accesses]
+    if dst.is_dense():
+      # The pointer to receive dense output is the first argument to the
+      # execution engine.
+      arg_pointers = [dst.dense_dst_ctype_pointer()] + input_pointers
+    else:
+      # The pointer to receive sparse output is the last argument to the
+      # execution engine. The pointer to receive a sparse tensor output is a
+      # pointer to pointer of char.
+      arg_pointers = input_pointers + [
+          ctypes.pointer(ctypes.pointer(ctypes.c_char(0)))
+      ]
+
+    # Invoke the execution engine to run the module and return the result.
+    engine.invoke(_ENTRY_NAME, *arg_pointers)
+
+    if dst.is_dense():
+      return runtime.ranked_memref_to_numpy(arg_pointers[0][0])
+
+    # Check and return the sparse tensor output.
+    rank, nse, shape, values, indices = utils.sparse_tensor_to_coo_tensor(
+        support_lib,
+        ctypes.cast(arg_pointers[-1][0], ctypes.c_void_p),
+        np.float64,
+    )
+    assert (np.equal(rank, dst.order)
+            and np.array_equal(shape, np.array(dst.shape)) and
+            np.equal(values.ndim, 1) and np.equal(values.shape[0], nse) and
+            np.equal(indices.ndim, 2) and np.equal(indices.shape[0], nse) and
+            np.equal(indices.shape[1], rank))
+    return (values, indices)
+
+
+@dataclasses.dataclass(frozen=True)
+class Access(IndexExpr):
+  """The tensor access class.
+
+  We support the TACO API access class with an alias of this class.
+
+  Attributes:
+    tensor: A Tensor being accessed.
+    indices: A tuple of IndexVar, representing the indices used to access the
+      Tensor.
+  """
+  tensor: Tensor
+  indices: Tuple[IndexVar, ...]
+
+  def __post_init__(self) -> None:
+    """Verifies the tensor and indices for a tensor access.
+
+    Raises:
+       ValueError: If indices is not a list of IndexVar or the len of indices
+       doesn't equal to the rank of the tensor.
+    """
+    if (not isinstance(self.indices, tuple) or
+        not _all_instance_of(self.indices, IndexVar)):
+      raise ValueError(f"Indices contain non IndexVar: {str(self.indices)}.")
+    if self.tensor.order != len(self.indices):
+      raise ValueError("Invalid indices for rank: "
+                       f"str{self.tensor.order} != len({str(self.indices)}).")
+
+  def _emit_expression(
+      self,
+      expr_to_opnd: Dict[IndexExpr, lang.OperandDef],
+      expr_to_info: _ExprInfoDict,
+  ) -> lang.ScalarExpression:
+    """Emits a linalg dialect TensorUse expression for the tensor access."""
+    assert self in expr_to_opnd
+    dims = _mlir_dimensions_from_index_vars(self.indices)
+    return lang.TensorUse(expr_to_opnd[self], dims)
+
+  def _visit(self,
+             func: _ExprVisitor,
+             args,
+             *,
+             leaf_checker: _SubtreeLeafChecker = None) -> None:
+    if leaf_checker:
+      assert leaf_checker(self, *args)
+    func(self, *args)
+
+  def dtype(self) -> DType:
+    return self.tensor.dtype
+
+
+def _gather_input_accesses_index_vars(
+    expr: IndexExpr,
+    input_accesses: List[Access],
+) -> None:
+  """Collects Access nodes."""
+  if isinstance(expr, Access) and expr not in input_accesses:
+    input_accesses.append(expr)
+
+
+def _op_to_callable(op: _BinaryOp) -> lang.ArithFnType:
+  """Returns the linalg dialect function object for the given operation."""
+  op_to_callable = {
+      operator.add: lang.ArithFn.add,
+      operator.sub: lang.ArithFn.sub,
+      operator.mul: lang.ArithFn.mul,
+  }
+  return op_to_callable[op]
+
+
+@dataclasses.dataclass(frozen=True)
+class _BinaryExpr(IndexExpr):
+  """The representation for a binary operation.
+
+  Attributes:
+  op: A _BinaryOp representing the binary operation.
+  a: An IndexExpr representing the first operand of the operation.
+  b: An IndexExpr representing the second operand of the operation.
+  """
+  op: _BinaryOp
+  a: IndexExpr
+  b: IndexExpr
+
+  def __post_init__(self) -> None:
+    """Verifies that the operands being added are IndexExpr."""
+    assert isinstance(self.a, IndexExpr) and isinstance(self.b, IndexExpr)
+
+  def _emit_expression(
+      self,
+      expr_to_opnd: Dict[IndexExpr, lang.OperandDef],
+      expr_to_info: _ExprInfoDict,
+  ) -> lang.ScalarExpression:
+    """Emits the expression tree and returns the expression."""
+    # The current expression node is an internal node of the structured op.
+    if self not in expr_to_opnd:
+      a = self.a._emit_expression(expr_to_opnd, expr_to_info)
+      b = self.b._emit_expression(expr_to_opnd, expr_to_info)
+      return _op_to_callable(self.op)(a, b)
+
+    # The current expression is a leaf node of the structured op. That is, it is
+    # a temporary tensor generated by its child structured op.
+    op_info = expr_to_info[self].structop_info
+    assert op_info is not None
+    dims = _mlir_dimensions_from_index_vars(op_info.dst_indices)
+    return lang.TensorUse(expr_to_opnd[self], dims)
+
+  def _visit(self,
+             func: _ExprVisitor,
+             args,
+             *,
+             leaf_checker: _SubtreeLeafChecker = None) -> None:
+    """A post-order visitor."""
+    if leaf_checker is None or not leaf_checker(self, *args):
+      self.a._visit(func, args, leaf_checker=leaf_checker)
+      self.b._visit(func, args, leaf_checker=leaf_checker)
+    func(self, *args)
+
+  def dtype(self) -> DType:
+    """Returns the data type of the binary operation."""
+    return self.a.dtype()
+
+
+def _validate_and_collect_dim_info(
+    index_to_dim_info: Dict[IndexVar, _DimInfo],
+    indices: Tuple[IndexVar, ...],
+    dim_infos: Tuple[_DimInfo, ...],
+    expr: _BinaryExpr,
+) -> None:
+  """Validates and collects the dimension information for an index notation.
+
+  Validates (indices, dim_infos) against the information collected from other
+  source operands and is represented by index_to_dim_info. In particular, we
+  ensure that each IndexVar corresponds to only one dimension size. We also
+  aggregate the new information represented in (indices, dim_infos) to
+  index_to_dim_info.
+
+  Args:
+    index_to_dim: A dictionary of (IndexVar, _DimInfo) collected from the
+      previous operands.
+    indices: The IndexVars to be validated.
+    dim_infos: The dimension information for the IndexVars to be validated.
+    expr: The binary expression where (indices, dim_infos) is used.
+
+  Raises:
+    ValueError if there is any problem in the IndexVars or dimensional values.
+  """
+  assert len(indices) == len(dim_infos)
+  for i, d in zip(indices, dim_infos):
+    if i not in index_to_dim_info:
+      index_to_dim_info[i] = d
+    else:
+      if d.dim != index_to_dim_info[i].dim:
+        raise ValueError(f"Inconsistent source dimension for {i}: "
+                         f"{d.dim} vs {index_to_dim_info[i].dim}")
+      mode_format = _mode_format_estimator(expr.op)(
+          index_to_dim_info[i].mode_format, d.mode_format)
+      index_to_dim_info[i] = _DimInfo(d.dim, mode_format)
+
+
+def _validate_and_collect_expr_info(
+    expr: IndexExpr,
+    expr_to_info: _ExprInfoDict,
+) -> None:
+  """Validates dimension information and constructs _ExprInfo.
+
+  Validates that dimensional values for the same IndexVar are the same. Collects
+  a list of IndexVar used by the expression and their corresponding dimensional
+  values. Constructs an _ExprInfo object to record the information for the
+  IndexExpr.
+
+  This routine is passed to the post-order visitor as an _ExprVisitor object.
+
+  Args:
+    expr: The IndexExpr being validated.
+    expr_to_info: The dictionary of (IndexExpr, _ExprInfo) for recording the
+      expression information.
+
+  Raises:
+    ValueError if there is any problem in the IndexVars or dimensional values.
+  """
+  # Objects of class Access can be shared by different expressions. Avoid
+  # processing Access objects multiple times by skipping the processing if expr
+  # is already in the dictionary.
+  if expr in expr_to_info:
+    return
+
+  if isinstance(expr, Access):
+    src_indices = expr.indices
+    src_dims = tuple(expr.tensor.shape)
+    mode_formats = tuple(expr.tensor.format.format_pack.formats)
+    assert len(src_dims) == len(mode_formats)
+    dim_infos = tuple([_DimInfo(d, m) for d, m in zip(src_dims, mode_formats)])
+  else:
+    assert isinstance(expr, _BinaryExpr)
+    a_info = expr_to_info[expr.a]
+    index_to_dim_info = {
+        i: d for i, d in zip(a_info.src_indices, a_info.dim_infos)
+    }
+    b_info = expr_to_info[expr.b]
+    _validate_and_collect_dim_info(index_to_dim_info, b_info.src_indices,
+                                   b_info.dim_infos, expr)
+    # Here we rely on the fact that dictionaries keep the insertion order for
+    # keys and values.
+    src_indices = tuple(index_to_dim_info.keys())
+    dim_infos = tuple(index_to_dim_info.values())
+
+  expr_to_info[expr] = _ExprInfo(src_indices, dim_infos)
+
+
+def _mark_structured_op_root(
+    expr: IndexExpr,
+    reduce_index: IndexVar,
+    expr_to_info: _ExprInfoDict,
+) -> None:
+  """Identifies the root expression for a structured op in the linalg dialect.
+
+  An linalg structured op can only perform reduction on the whole expression.
+  For a TACO tensor algebra expression, the reduction on an IndexVar is done at
+  the smallest expression that contains all the uses of the IndexVar. If such an
+  expression is only part of the whole expression, we need to split this
+  sub-expression tree out from its parent and implement the sub-expression as a
+  structured op.
+
+  This routine identifies the root expression node for performing a reduction on
+  the given IndexVar. If the reduction of the given IndexVar should be performed
+  on expression X, then the IndexVar is added to expr_to_info[X].reduce_indices
+
+  Args:
+    expr: The root IndexExpr for the tensor algebra expression.
+    reduce_index: The IndexVar which we want to find out the proper expression
+      to perform a reduction.
+    expr_to_info: The dictionary to look up _ExprInfo for IndexExpr.
+  """
+  assert (isinstance(expr, _BinaryExpr))
+  a_info = expr_to_info[expr.a]
+  b_info = expr_to_info[expr.b]
+  expr_info = expr_to_info[expr]
+
+  if reduce_index in a_info.src_indices and reduce_index in b_info.src_indices:
+    expr_info.reduce_indices.add(reduce_index)
+    return
+
+  if reduce_index in a_info.src_indices:
+    _mark_structured_op_root(expr.a, reduce_index, expr_to_info)
+  elif reduce_index in b_info.src_indices:
+    _mark_structured_op_root(expr.b, reduce_index, expr_to_info)
+  else:
+    assert False, "Unreachable path"
+
+
+def _accumulate_reduce_indices(
+    expr: IndexExpr,
+    expr_to_info: _ExprInfoDict,
+) -> None:
+  """Propagates reduction indices from child expressions to parent expressions.
+
+  This routine is passed to the post-order visitor as an _ExprVisitor object.
+
+  Args:
+    expr: The IndexExpr being visited.
+    expr_to_info: The dictionary of (IndexExpr, _ExprInfo) for recording the
+      expression information.
+  """
+  assert expr in expr_to_info
+  expr_info = expr_to_info[expr]
+
+  if isinstance(expr, _BinaryExpr):
+    a_info = expr_to_info[expr.a]
+    b_info = expr_to_info[expr.b]
+    expr_info.acc_reduce_indices = (
+        a_info.acc_reduce_indices | b_info.acc_reduce_indices
+        | expr_info.reduce_indices)
+  else:
+    assert isinstance(expr, Access)
+
+
+def _gather_structured_op(
+    expr: IndexExpr,
+    expr_to_info: _ExprInfoDict,
+    structop_roots: List[IndexExpr],
+) -> None:
+  """Adds structured op root expression information to structop_roots.
+
+  This routine is passed to the post-order visitor as an _ExprVisitor object.
+
+  Args:
+    expr: The IndexExpr being visited.
+    expr_to_info: The dictionary to look up _ExprInfo for IndexExpr.
+    structop_roots: The resulting list of IndexExpr that are the roots for
+      linalg structured ops.
+  """
+  if not expr_to_info[expr].reduce_indices:
+    return
+
+  # If the expression is the root for reducing some indices, collect the indices
+  # and dimensions for the reduction result.
+  dst_indices = []
+  dst_dims = []
+  mode_fmts = []
+  for i, d in zip(expr_to_info[expr].src_indices, expr_to_info[expr].dim_infos):
+    if i not in expr_to_info[expr].acc_reduce_indices:
+      dst_indices.append(i)
+      dst_dims.append(d.dim)
+      mode_fmts.append(d.mode_format)
+
+  # Add the information to the dictionary.
+  op_info = _StructOpInfo(
+      tuple(dst_indices),
+      tuple(dst_dims),
+      expr.dtype(),
+      f"temp{len(structop_roots)}",
+      _make_format(mode_fmts),
+  )
+  expr_to_info[expr].structop_info = op_info
+
+  # Add the expression to the list of structured op roots.
+  structop_roots.append(expr)
+
+
+def _is_structured_op_leaf(
+    expr: IndexExpr,
+    root: IndexExpr,
+    expr_to_info: _ExprInfoDict,
+    *unused_args,
+) -> bool:
+  """Returns true iff the expression is a leaf node for a structured op.
+
+  The root of a structured op is a leaf of its parent structured op that uses
+  its result. An expression node is a leaf node for the current structured op if
+  it is an Access node or the root for a structured op that is not the current
+  structured op.
+
+  This routine is passed to the post-order visitor as a _SubtreeLeafChecker
+  object. Because the post-order visitor pass the same parameters to both
+  _SubtreeLeafChecker and _ExprVisitor, this routine may received unused
+  parameters.
+
+  Args:
+    expr: The IndexExpr being visited.
+    root: The root of the current structured op.
+    expr_to_info: The dictionary to look up _ExprInfo for IndexExpr.
+
+  Returns:
+    True if the current IndexExpr is a leaf for the current structured op.
+  """
+  return (expr != root and
+          expr_to_info[expr].structop_info is not None) or isinstance(
+              expr, Access)
+
+
+def _gather_structured_op_input(
+    expr: IndexExpr,
+    root: IndexExpr,
+    expr_to_info: _ExprInfoDict,
+    structop_inputs: List[IndexExpr],
+) -> None:
+  """Adds the IndexExpr to structop_inputs if it is an input.
+
+  If the current IndexExpr is an input for the current structured op, adds it to
+  structop_inputs. The current IndexExpr is an input if it is an Access node or
+  if it is the root for a structured op that is not the current structured op.
+
+  This routine is passed to the post-order visitor as an _ExprVisitor object.
+
+  Args:
+    expr: The IndexExpr being visited.
+    root: The root of the current structured op.
+    expr_to_info: The dictionary to look up _ExprInfo for IndexExpr.
+    structop_inputs: The resulting list of IndexExpr that provide input to the
+      current structured op.
+  """
+  if (expr != root and expr not in structop_inputs) and (
+      isinstance(expr, Access) or
+      (expr in expr_to_info and expr_to_info[expr].structop_info)):
+    structop_inputs.append(expr)
+
+
+def _emit_structured_op_input(
+    expr: IndexExpr,
+    expr_to_info: _ExprInfoDict,
+    op_def: lang.LinalgOpDef,
+) -> lang.OperandDef:
+  """Emits OperandDef in the linalg dialect for the input IndexExpr.
+
+  Args:
+    expr: The input IndexExpr for the current structured op.
+    expr_to_info: The dictionary to look up _ExprInfo for IndexExpr.
+    op_def: The linalg operation for the current structured op.
+
+  Returns:
+    An OperandDef in the linalg dialect for the input IndexExpr.
+  """
+  op_info = expr_to_info[expr].structop_info
+  if op_info:
+    # The input is a temporary tensor produced by another structured op.
+    indices = op_info.dst_indices
+    name = op_info.dst_name
+  else:
+    # The input is a user provided tensor.
+    assert isinstance(expr, Access)
+    indices = expr.indices
+    name = expr.tensor.name
+
+  dim_sym = _mlir_symbols_from_index_vars(indices)
+  opnd = lang.OperandDef(lang.OperandKind.InputTensor, lang.T, dim_sym)
+  op_def.add_operand(name, opnd)
+  return opnd
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_api.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_api.py
new file mode 100644
index 0000000000000..05704b92f6a84
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_api.py
@@ -0,0 +1,47 @@
+#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+#  See https://llvm.org/LICENSE.txt for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""Supports the PyTACO API with the MLIR-PyTACO implementation.
+
+See http://tensor-compiler.org/ for TACO tensor compiler.
+
+This module exports the MLIR-PyTACO implementation through the language defined
+by PyTACO. In particular, it defines the function and type aliases and constants
+needed for the PyTACO API to support the execution of PyTACO programs using the
+MLIR-PyTACO implementation.
+"""
+
+from . import mlir_pytaco
+from . import mlir_pytaco_io
+
+# Functions defined by PyTACO API.
+get_index_vars = mlir_pytaco.get_index_vars
+from_array = mlir_pytaco.Tensor.from_array
+read = mlir_pytaco_io.read
+write = mlir_pytaco_io.write
+
+# Classes defined by PyTACO API.
+dtype = mlir_pytaco.DType
+mode_format = mlir_pytaco.ModeFormat
+mode_ordering = mlir_pytaco.ModeOrdering
+mode_format_pack = mlir_pytaco.ModeFormatPack
+format = mlir_pytaco.Format
+index_var = mlir_pytaco.IndexVar
+tensor = mlir_pytaco.Tensor
+index_expression = mlir_pytaco.IndexExpr
+access = mlir_pytaco.Access
+
+# Data type constants defined by PyTACO API.
+int16 = mlir_pytaco.DType(mlir_pytaco.Type.INT16)
+int32 = mlir_pytaco.DType(mlir_pytaco.Type.INT32)
+int64 = mlir_pytaco.DType(mlir_pytaco.Type.INT64)
+float32 = mlir_pytaco.DType(mlir_pytaco.Type.FLOAT32)
+float64 = mlir_pytaco.DType(mlir_pytaco.Type.FLOAT64)
+
+# Storage format constants defined by the PyTACO API. In PyTACO, each storage
+# format constant has two aliasing names.
+compressed = mlir_pytaco.ModeFormat.COMPRESSED
+Compressed = mlir_pytaco.ModeFormat.COMPRESSED
+dense = mlir_pytaco.ModeFormat.DENSE
+Dense = mlir_pytaco.ModeFormat.DENSE
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
new file mode 100644
index 0000000000000..0ee69c78da37a
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_io.py
@@ -0,0 +1,206 @@
+#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+#  See https://llvm.org/LICENSE.txt for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+"""Experimental MLIR-PyTACO with sparse tensor support.
+
+See http://tensor-compiler.org/ for TACO tensor compiler.
+
+This module implements the PyTACO API for writing a tensor to a file or reading
+a tensor from a file.
+
+See the following links for Matrix Market Exchange (.mtx) format and FROSTT
+(.tns) format:
+  https://math.nist.gov/MatrixMarket/formats.html
+  http://frostt.io/tensors/file-formats.html
+"""
+
+from typing import List, TextIO
+
+from . import mlir_pytaco
+
+# Define the type aliases so that we can write the implementation here as if
+# it were part of mlir_pytaco.py.
+Tensor = mlir_pytaco.Tensor
+Format = mlir_pytaco.Format
+DType = mlir_pytaco.DType
+Type = mlir_pytaco.Type
+
+# Constants used in the implementation.
+_MTX_FILENAME_SUFFIX = ".mtx"
+_TNS_FILENAME_SUFFIX = ".tns"
+
+_MTX_HEAD = "%%MatrixMarket"
+_MTX_MATRIX = "matrix"
+_MTX_COORDINATE = "coordinate"
+_MTX_REAL = "real"
+_MTX_SYMMETRY = "symmetric"
+_MTX_GENERAL = "general"
+_SYMMETRY_FIELD_ID = 4
+
+# The TACO supported header for .mtx has the following five fields:
+# . %%MatrixMarket
+# . matrix | tensor
+# . coordinate | array
+# . real
+# . symmetric | general
+#
+# This is what we support currently.
+_SUPPORTED_HEADER_FIELDS = ((_MTX_HEAD,), (_MTX_MATRIX,), (_MTX_COORDINATE,),
+                            (_MTX_REAL,), (_MTX_GENERAL, _MTX_SYMMETRY))
+
+_A_SPACE = " "
+_MTX_COMMENT = "%"
+_TNS_COMMENT = "#"
+
+
+def _coordinate_from_strings(strings: List[str]) -> List[int]:
+  """"Return the coordinate represented by the input strings."""
+  # Coordinates are 1-based in the text file and 0-based in memory.
+  return [int(s) - 1 for s in strings]
+
+
+def _read_coordinate_format(file: TextIO, tensor: Tensor,
+                            is_symmetric: bool) -> None:
+  """Reads tensor values in coordinate format."""
+  rank = tensor.order
+  # Process the data for the tensor.
+  for line in file:
+    if not line:
+      continue
+
+    fields = line.split(_A_SPACE)
+    if rank != len(fields) - 1:
+      raise ValueError("The format and data have mismatched ranks: "
+                       f"{rank} vs {len(fields)-1}.")
+    coordinate = _coordinate_from_strings(fields[:-1])
+    value = float(fields[-1])
+    tensor.insert(coordinate, value)
+    if is_symmetric and coordinate[0] != coordinate[-1]:
+      coordinate.reverse()
+      tensor.insert(coordinate, value)
+
+
+def _read_mtx(file: TextIO, fmt: Format) -> Tensor:
+  """Inputs tensor from a text file with .mtx format."""
+  # The first line should have this five fields:
+  #   head tensor-kind format data-type symmetry
+  fields = file.readline().rstrip("\n").split(_A_SPACE)
+  tuple_to_str = lambda x: "|".join(x)
+  if len(fields) != len(_SUPPORTED_HEADER_FIELDS):
+    raise ValueError(
+        "Expected first line with theses fields "
+        f"{' '.join(map(tuple_to_str, _SUPPORTED_HEADER_FIELDS))}: "
+        f"{' '.join(fields)}")
+
+  for i, values in enumerate(_SUPPORTED_HEADER_FIELDS):
+    if fields[i] not in values:
+      raise ValueError(f"The {i}th field can only be one of these values "
+                       f"{tuple_to_str(values)}: {fields[i]}")
+
+  is_symmetric = (fields[_SYMMETRY_FIELD_ID] == _MTX_SYMMETRY)
+  # Skip leading empty lines or comment lines.
+  line = file.readline()
+  while not line or line[0] == _MTX_COMMENT:
+    line = file.readline()
+
+  # Process the first data line with dimensions and number of non-zero values.
+  fields = line.split(_A_SPACE)
+  rank = fmt.rank()
+  if rank != len(fields) - 1:
+    raise ValueError("The format and data have mismatched ranks: "
+                     f"{rank} vs {len(fields)-1}.")
+  shape = fields[:-1]
+  shape = [int(s) for s in shape]
+  num_non_zero = float(fields[-1])
+
+  # Read the tensor values in coordinate format.
+  tensor = Tensor(shape, fmt)
+  _read_coordinate_format(file, tensor, is_symmetric)
+  return tensor
+
+
+def _read_tns(file: TextIO, fmt: Format) -> Tensor:
+  """Inputs tensor from a text file with .tns format."""
+  rank = fmt.rank()
+  coordinates = []
+  values = []
+  dtype = DType(Type.FLOAT64)
+
+  for line in file:
+    # Skip empty lines and comment lines.
+    if not line or line[0] == _TNS_COMMENT:
+      continue
+
+    # Process each line with a coordinate and the value at the coordinate.
+    fields = line.split(_A_SPACE)
+    if rank != len(fields) - 1:
+      raise ValueError("The format and data have mismatched ranks: "
+                       f"{rank} vs {len(fields)-1}.")
+    coordinates.append(tuple(_coordinate_from_strings(fields[:-1])))
+    values.append(dtype.value(fields[-1]))
+
+  return Tensor.from_coo(coordinates, values, fmt, dtype)
+
+
+def _write_tns(file: TextIO, tensor: Tensor) -> None:
+  """Outputs a tensor to a file using .tns format."""
+  coords, non_zeros = tensor.get_coordinates_and_values()
+  assert len(coords) == len(non_zeros)
+  # Output a coordinate and the corresponding value in a line.
+  for c, v in zip(coords, non_zeros):
+    # The coordinates are 1-based in the text file and 0-based in memory.
+    plus_one_to_str = lambda x: str(x + 1)
+    file.write(f"{' '.join(map(plus_one_to_str,c))} {v}\n")
+
+
+def read(filename: str, fmt: Format) -> Tensor:
+  """Inputs a tensor from a given file.
+
+  The name suffix of the file specifies the format of the input tensor. We
+  currently only support .mtx format for support sparse tensors.
+
+  Args:
+    filename: A string input filename.
+    fmt: The storage format of the tensor.
+
+  Raises:
+    ValueError: If filename doesn't end with .mtx or .tns, or fmt is not an
+    instance of Format or fmt is not a sparse tensor.
+  """
+  if (not isinstance(filename, str) or
+      (not filename.endswith(_MTX_FILENAME_SUFFIX) and
+       not filename.endswith(_TNS_FILENAME_SUFFIX))):
+    raise ValueError("Expected string filename ends with "
+                     f"{_MTX_FILENAME_SUFFIX} or {_TNS_FILENAME_SUFFIX}: "
+                     f"{filename}.")
+  if not isinstance(fmt, Format) or fmt.is_dense():
+    raise ValueError(f"Expected a sparse Format object: {fmt}.")
+
+  with open(filename, "r") as file:
+    return (_read_mtx(file, fmt) if filename.endswith(_MTX_FILENAME_SUFFIX) else
+            _read_tns(file, fmt))
+
+
+def write(filename: str, tensor: Tensor) -> None:
+  """Outputs a tensor to a given file.
+
+  The name suffix of the file specifies the format of the output. We currently
+  only support .tns format.
+
+  Args:
+    filename: A string output filename.
+    tensor: The tensor to output.
+
+  Raises:
+    ValueError: If filename doesn't end with .tns or tensor is not a Tensor.
+  """
+  if (not isinstance(filename, str) or
+      not filename.endswith(_TNS_FILENAME_SUFFIX)):
+    raise ValueError("Expected string filename ends with"
+                     f" {_TNS_FILENAME_SUFFIX}: {filename}.")
+  if not isinstance(tensor, Tensor):
+    raise ValueError(f"Expected a Tensor object: {tensor}.")
+
+  with open(filename, "w") as file:
+    return _write_tns(file, tensor)
diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py
new file mode 100644
index 0000000000000..867a129e9a09b
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/tools/mlir_pytaco_utils.py
@@ -0,0 +1,121 @@
+#  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+#  See https://llvm.org/LICENSE.txt for license information.
+#  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# This file contains the utilities to process sparse tensor outputs.
+
+from typing import Tuple
+import ctypes
+import functools
+import numpy as np
+
+
+@functools.lru_cache()
+def _get_c_shared_lib(lib_name: str) -> ctypes.CDLL:
+  """Loads and returns the requested C shared library.
+
+  Args:
+    lib_name: A string representing the C shared library.
+
+  Returns:
+    The C shared library.
+
+  Raises:
+    OSError: If there is any problem in loading the shared library.
+    ValueError: If the shared library doesn't contain the needed routines.
+  """
+  # This raises OSError exception if there is any problem in loading the shared
+  # library.
+  c_lib = ctypes.CDLL(lib_name)
+
+  try:
+    c_lib.convertToMLIRSparseTensor.restype = ctypes.c_void_p
+  except Exception as e:
+    raise ValueError("Missing function convertToMLIRSparseTensor from "
+                     f"the supporting C shared library: {e} ") from e
+
+  try:
+    c_lib.convertFromMLIRSparseTensor.restype = ctypes.c_void_p
+  except Exception as e:
+    raise ValueError("Missing function convertFromMLIRSparseTensor from "
+                     f"the C shared library: {e} ") from e
+
+  return c_lib
+
+
+def sparse_tensor_to_coo_tensor(
+    lib_name: str,
+    sparse_tensor: ctypes.c_void_p,
+    dtype: np.dtype,
+) -> Tuple[int, int, np.ndarray, np.ndarray, np.ndarray]:
+  """Converts an MLIR sparse tensor to a COO-flavored format tensor.
+
+  Args:
+     lib_name: A string for the supporting C shared library.
+     sparse_tensor: A ctypes.c_void_p to the MLIR sparse tensor descriptor.
+     dtype: The numpy data type for the tensor elements.
+
+  Returns:
+    A tuple that contains the following values for the COO-flavored format
+    tensor:
+    rank: An integer for the rank of the tensor.
+    nse: An interger for the number of non-zero values in the tensor.
+    shape: A 1D numpy array of integers, for the shape of the tensor.
+    values: A 1D numpy array, for the non-zero values in the tensor.
+    indices: A 2D numpy array of integers, representing the indices for the
+      non-zero values in the tensor.
+
+  Raises:
+    OSError: If there is any problem in loading the shared library.
+    ValueError: If the shared library doesn't contain the needed routines.
+  """
+  c_lib = _get_c_shared_lib(lib_name)
+
+  rank = ctypes.c_ulonglong(0)
+  nse = ctypes.c_ulonglong(0)
+  shape = ctypes.POINTER(ctypes.c_ulonglong)()
+  values = ctypes.POINTER(np.ctypeslib.as_ctypes_type(dtype))()
+  indices = ctypes.POINTER(ctypes.c_ulonglong)()
+  c_lib.convertFromMLIRSparseTensor(sparse_tensor, ctypes.byref(rank),
+                                    ctypes.byref(nse), ctypes.byref(shape),
+                                    ctypes.byref(values), ctypes.byref(indices))
+
+  # Convert the returned values to the corresponding numpy types.
+  shape = np.ctypeslib.as_array(shape, shape=[rank.value])
+  values = np.ctypeslib.as_array(values, shape=[nse.value])
+  indices = np.ctypeslib.as_array(indices, shape=[nse.value, rank.value])
+  return rank, nse, shape, values, indices
+
+
+def coo_tensor_to_sparse_tensor(lib_name: str, np_shape: np.ndarray,
+                                np_values: np.ndarray,
+                                np_indices: np.ndarray) -> int:
+  """Converts a COO-flavored format sparse tensor to an MLIR sparse tensor.
+
+  Args:
+     lib_name: A string for the supporting C shared library.
+     np_shape: A 1D numpy array of integers, for the shape of the tensor.
+     np_values: A 1D numpy array, for the non-zero values in the tensor.
+     np_indices: A 2D numpy array of integers, representing the indices for the
+       non-zero values in the tensor.
+
+  Returns:
+     An integer for the non-null ctypes.c_void_p to the MLIR sparse tensor
+     descriptor.
+
+  Raises:
+    OSError: If there is any problem in loading the shared library.
+    ValueError: If the shared library doesn't contain the needed routines.
+  """
+
+  rank = ctypes.c_ulonglong(len(np_shape))
+  nse = ctypes.c_ulonglong(len(np_values))
+  shape = np_shape.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
+  values = np_values.ctypes.data_as(
+      ctypes.POINTER(np.ctypeslib.as_ctypes_type(np_values.dtype)))
+  indices = np_indices.ctypes.data_as(ctypes.POINTER(ctypes.c_ulonglong))
+
+  c_lib = _get_c_shared_lib(lib_name)
+  ptr = c_lib.convertToMLIRSparseTensor(rank, nse, shape, values, indices)
+  assert ptr is not None, "Problem with calling convertToMLIRSparseTensor"
+  return ptr

From e4a556268ea97320ba96de6e2f26235b4046c994 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Fri, 21 Jan 2022 17:59:07 +0100
Subject: [PATCH 195/946] Revert "[libc++] Use addressof in unordered_map."

This reverts commit cab96169380296a496614f433507d86b743f0d02.

This breaks the CI.
---
 libcxx/include/__hash_table                   | 40 +++++++--------
 libcxx/include/unordered_map                  | 21 ++++----
 ...rator.operators.addressof.compile.pass.cpp | 49 -------------------
 .../assign_move.addressof.compile.pass.cpp    | 42 ----------------
 .../move.addressof.compile.pass.cpp           | 33 -------------
 .../move_alloc.addressof.compile.pass.cpp     | 36 --------------
 .../emplace_hint.addressof.compile.pass.cpp   | 30 ------------
 ...rase_const_iter.addressof.compile.pass.cpp | 27 ----------
 .../erase_range.addressof.compile.pass.cpp    | 27 ----------
 ...nt_const_lvalue.addressof.compile.pass.cpp | 28 -----------
 ...ible_value_type.addressof.compile.pass.cpp | 28 -----------
 ...alue_value_type.addressof.compile.pass.cpp | 28 -----------
 ...ry_emplace_hint.addressof.compile.pass.cpp | 40 ---------------
 .../swap.addressof.compile.pass.cpp           | 29 -----------
 .../move.addressof.compile.pass.cpp           | 33 -------------
 .../move_alloc.addressof.compile.pass.cpp     | 36 --------------
 .../emplace_hint.addressof.compile.pass.cpp   | 30 ------------
 17 files changed, 30 insertions(+), 527 deletions(-)
 delete mode 100644 libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp
 delete mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp

diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index adc732cffb015..6b682ab27c6c3 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -308,9 +308,9 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     __hash_iterator& operator=(const __hash_iterator& __i)
     {
-        if (this != _VSTD::addressof(__i))
+        if (this != &__i)
         {
-            __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
+            __get_db()->__iterator_copy(this, &__i);
             __node_ = __i.__node_;
         }
         return *this;
@@ -406,7 +406,7 @@ public:
         : __node_(__x.__node_)
     {
 #if _LIBCPP_DEBUG_LEVEL == 2
-        __get_db()->__iterator_copy(this, _VSTD::addressof(__x));
+        __get_db()->__iterator_copy(this, &__x);
 #endif
     }
 
@@ -415,7 +415,7 @@ public:
     __hash_const_iterator(const __hash_const_iterator& __i)
         : __node_(__i.__node_)
     {
-        __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
+        __get_db()->__iterator_copy(this, &__i);
     }
 
     _LIBCPP_INLINE_VISIBILITY
@@ -427,9 +427,9 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     __hash_const_iterator& operator=(const __hash_const_iterator& __i)
     {
-        if (this != _VSTD::addressof(__i))
+        if (this != &__i)
         {
-            __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
+            __get_db()->__iterator_copy(this, &__i);
             __node_ = __i.__node_;
         }
         return *this;
@@ -523,7 +523,7 @@ public:
           __bucket_(__i.__bucket_),
           __bucket_count_(__i.__bucket_count_)
     {
-        __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
+        __get_db()->__iterator_copy(this, &__i);
     }
 
     _LIBCPP_INLINE_VISIBILITY
@@ -535,9 +535,9 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     __hash_local_iterator& operator=(const __hash_local_iterator& __i)
     {
-        if (this != _VSTD::addressof(__i))
+        if (this != &__i)
         {
-            __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
+            __get_db()->__iterator_copy(this, &__i);
             __node_ = __i.__node_;
             __bucket_ = __i.__bucket_;
             __bucket_count_ = __i.__bucket_count_;
@@ -655,7 +655,7 @@ public:
           __bucket_count_(__x.__bucket_count_)
     {
 #if _LIBCPP_DEBUG_LEVEL == 2
-        __get_db()->__iterator_copy(this, _VSTD::addressof(__x));
+        __get_db()->__iterator_copy(this, &__x);
 #endif
     }
 
@@ -666,7 +666,7 @@ public:
           __bucket_(__i.__bucket_),
           __bucket_count_(__i.__bucket_count_)
     {
-        __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
+        __get_db()->__iterator_copy(this, &__i);
     }
 
     _LIBCPP_INLINE_VISIBILITY
@@ -678,9 +678,9 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     __hash_const_local_iterator& operator=(const __hash_const_local_iterator& __i)
     {
-        if (this != _VSTD::addressof(__i))
+        if (this != &__i)
         {
-            __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
+            __get_db()->__iterator_copy(this, &__i);
             __node_ = __i.__node_;
             __bucket_ = __i.__bucket_;
             __bucket_count_ = __i.__bucket_count_;
@@ -1615,7 +1615,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(
         __u.size() = 0;
     }
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, _VSTD::addressof(__u));
+    __get_db()->swap(this, &__u);
 #endif
 }
 
@@ -2021,7 +2021,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi(
         const_iterator __p, __node_pointer __cp)
 {
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
                          "unordered container::emplace_hint(const_iterator, args...) called with an iterator not"
                          " referring to this unordered container");
     if (__p != end() && key_eq()(*__p, __cp->__value_))
@@ -2148,7 +2148,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_hint_multi(
         const_iterator __p, _Args&&... __args)
 {
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
                          "unordered container::emplace_hint(const_iterator, args...) called with an iterator not"
                          " referring to this unordered container");
     __node_holder __h = __construct_node(_VSTD::forward<_Args>(__args)...);
@@ -2472,7 +2472,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p)
 {
     __next_pointer __np = __p.__node_;
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
                          "unordered container erase(iterator) called with an iterator not"
                          " referring to this container");
     _LIBCPP_DEBUG_ASSERT(__p != end(),
@@ -2492,10 +2492,10 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first,
                                                 const_iterator __last)
 {
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__first)) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this,
                          "unordered container::erase(iterator, iterator) called with an iterator not"
                          " referring to this container");
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__last)) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__last) == this,
                          "unordered container::erase(iterator, iterator) called with an iterator not"
                          " referring to this container");
     for (const_iterator __p = __first; __first != __last; __p = __first)
@@ -2727,7 +2727,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::swap(__hash_table& __u)
         __u.__bucket_list_[__constrain_hash(__u.__p1_.first().__next_->__hash(), __u.bucket_count())] =
             __u.__p1_.first().__ptr();
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, _VSTD::addressof(__u));
+    __get_db()->swap(this, &__u);
 #endif
 }
 
diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map
index accab28a99592..73edadab20990 100644
--- a/libcxx/include/unordered_map
+++ b/libcxx/include/unordered_map
@@ -519,7 +519,6 @@ template <class Key, class T, class Hash, class Pred, class Alloc>
 #include <__functional/is_transparent.h>
 #include <__hash_table>
 #include <__iterator/iterator_traits.h>
-#include <__memory/addressof.h>
 #include <__node_handle>
 #include <__utility/forward.h>
 #include <compare>
@@ -1187,7 +1186,7 @@ public:
         {return __table_.__insert_unique(__x);}
 
     iterator insert(const_iterator __p, const value_type& __x) {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i()_VSTD::addressof(__p) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
                              "unordered_map::insert(const_iterator, const value_type&) called with an iterator not "
                              "referring to this unordered_map");
         ((void)__p);
@@ -1208,7 +1207,7 @@ public:
         {return __table_.__insert_unique(_VSTD::move(__x));}
 
     iterator insert(const_iterator __p, value_type&& __x) {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
                              "unordered_map::insert(const_iterator, const value_type&) called with an iterator not"
                              " referring to this unordered_map");
         ((void)__p);
@@ -1226,7 +1225,7 @@ public:
         _LIBCPP_INLINE_VISIBILITY
         iterator insert(const_iterator __p, _Pp&& __x)
         {
-            _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
+            _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
                                  "unordered_map::insert(const_iterator, value_type&&) called with an iterator not"
                                  " referring to this unordered_map");
           ((void)__p);
@@ -1242,7 +1241,7 @@ public:
     template <class... _Args>
     _LIBCPP_INLINE_VISIBILITY
     iterator emplace_hint(const_iterator __p, _Args&&... __args) {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
                              "unordered_map::emplace_hint(const_iterator, args...) called with an iterator not"
                              " referring to this unordered_map");
           ((void)__p);
@@ -1274,7 +1273,7 @@ public:
         _LIBCPP_INLINE_VISIBILITY
         iterator try_emplace(const_iterator __h, const key_type& __k, _Args&&... __args)
     {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__h)) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this,
                              "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not"
                              " referring to this unordered_map");
         ((void)__h);
@@ -1285,7 +1284,7 @@ public:
         _LIBCPP_INLINE_VISIBILITY
         iterator try_emplace(const_iterator __h, key_type&& __k, _Args&&... __args)
     {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i()_VSTD::addressof(__h) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this,
                              "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not"
                              " referring to this unordered_map");
         ((void)__h);
@@ -1693,7 +1692,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(
 {
     _VSTD::__debug_db_insert_c(this);
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, _VSTD::addressof(__u));
+    __get_db()->swap(this, &__u);
 #endif
 }
 
@@ -1713,7 +1712,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(
     }
 #if _LIBCPP_DEBUG_LEVEL == 2
     else
-        __get_db()->swap(this, _VSTD::addressof(__u));
+        __get_db()->swap(this, &__u);
 #endif
 }
 
@@ -2469,7 +2468,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(
 {
     _VSTD::__debug_db_insert_c(this);
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, _VSTD::addressof(__u));
+    __get_db()->swap(this, &__u);
 #endif
 }
 
@@ -2490,7 +2489,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(
     }
 #if _LIBCPP_DEBUG_LEVEL == 2
     else
-        __get_db()->swap(this, _VSTD::addressof(__u));
+        __get_db()->swap(this, &__u);
 #endif
 }
 
diff --git a/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp
deleted file mode 100644
index 856b78293a107..0000000000000
--- a/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_map
-
-// Validate the constructors of the (const)(_local)_iterator classes to be
-// properly guarded against ADL-hijacking operator&.
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-template <class ToIterator, class FromIterator>
-void test() {
-  FromIterator from;
-  ToIterator copy(from);
-  copy = from;
-
-  ToIterator move(std::move(from));
-  from = FromIterator();
-  move = std::move(from);
-}
-
-void test() {
-  {
-    using I = std::unordered_map<operator_hijacker, operator_hijacker>::iterator;
-    using CI = std::unordered_map<operator_hijacker, operator_hijacker>::const_iterator;
-    test<I, I>();
-    test<CI, I>();
-    test<CI, CI>();
-  }
-  {
-    using IL = std::unordered_map<operator_hijacker, operator_hijacker>::local_iterator;
-    using CIL = std::unordered_map<operator_hijacker, operator_hijacker>::const_local_iterator;
-    test<IL, IL>();
-    test<CIL, IL>();
-    test<CIL, CIL>();
-  }
-}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp
deleted file mode 100644
index 6dbd7aaea2a8e..0000000000000
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_map
-
-// unordered_map& operator=(unordered_map&&)
-//     noexcept(
-//         allocator_type::propagate_on_container_move_assignment::value &&
-//         is_nothrow_move_assignable<allocator_type>::value &&
-//         is_nothrow_move_assignable<hasher>::value &&
-//         is_nothrow_move_assignable<key_equal>::value);
-
-// Validate whether the container can be move-assigned with an ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-void test() {
-  {
-    std::unordered_map<int, operator_hijacker> mo;
-    std::unordered_map<int, operator_hijacker> m;
-    m = std::move(mo);
-  }
-  {
-    std::unordered_map<operator_hijacker, int> mo;
-    std::unordered_map<operator_hijacker, int> m;
-    m = std::move(mo);
-  }
-}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp
deleted file mode 100644
index e36c6525d631b..0000000000000
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_map
-
-// unordered_map(unordered_map&& u)
-//   noexcept(
-//     is_nothrow_move_constructible<hasher>::value &&
-//     is_nothrow_move_constructible<key_equal>::value &&
-//     is_nothrow_move_constructible<allocator_type>::value);
-
-// Validate whether the operation properly guards against ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-void test() {
-  std::unordered_map<operator_hijacker, operator_hijacker> mo;
-  std::unordered_map<operator_hijacker, operator_hijacker> m(std::move(mo));
-}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp
deleted file mode 100644
index 1fec0ee5d0f4b..0000000000000
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_map
-
-// unordered_map(unordered_map&& u, const allocator_type& a);
-
-// Validate whether the operation properly guards against ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-#include "test_allocator.h"
-#include "min_allocator.h"
-
-void test() {
-  using A = test_allocator<std::pair<const operator_hijacker, operator_hijacker>>;
-  using C = std::unordered_map<operator_hijacker, operator_hijacker, std::hash<operator_hijacker>,
-                               std::equal_to<operator_hijacker>, A>;
-
-  C mo;
-  C m(std::move(mo), A());
-}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp
deleted file mode 100644
index 58ddefd8cfbfc..0000000000000
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_map
-
-// template <class... Args>
-//     iterator emplace_hint(const_iterator position, Args&&... args);
-
-// Validate whether the operation properly guards against ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-void test() {
-  std::unordered_map<operator_hijacker, operator_hijacker> m;
-  m.emplace_hint(m.cbegin());
-}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp
deleted file mode 100644
index 1461f2499baad..0000000000000
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_map
-
-// iterator erase(const_iterator p)
-
-// Validate whether the operation properly guards against ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-void test() {
-  std::unordered_map<operator_hijacker, operator_hijacker> m;
-  m.erase(m.cbegin());
-}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp
deleted file mode 100644
index 5f342f7b2152f..0000000000000
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,27 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_map
-
-// iterator erase(const_iterator first, const_iterator last)
-
-// Validate whether the operation properly guards against ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-void test() {
-  std::unordered_map<operator_hijacker, operator_hijacker> m;
-  m.erase(m.cbegin(), m.cend());
-}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp
deleted file mode 100644
index db1805e7d7e63..0000000000000
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_map
-
-// iterator insert(const_iterator p, const value_type& x);
-
-// Validate whether the operation properly guards against ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-void test() {
-  std::unordered_map<operator_hijacker, operator_hijacker> m;
-  const std::pair<operator_hijacker, operator_hijacker> v;
-  m.insert(m.cend(), v);
-}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp
deleted file mode 100644
index 530b826b61e78..0000000000000
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_map
-
-// template <class P,
-//           class = typename enable_if<is_convertible<P, value_type>::value>::type>
-//     pair<iterator, bool> insert(P&& x);
-
-// Validate whether the operation properly guards against ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-void test(std::unordered_map<operator_hijacker, operator_hijacker>& m) { m.insert(m.cend(), *m.begin()); }
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp
deleted file mode 100644
index 80219cb193edd..0000000000000
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_map
-
-// iterator insert(const_iterator hint, value_type&& obj);
-
-// Validate whether the operation properly guards against ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-void test(std::unordered_map<operator_hijacker, operator_hijacker>& m) {
-  m.insert(m.cend(), std::pair<operator_hijacker, operator_hijacker>{});
-}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp
deleted file mode 100644
index 2c667374d4fe8..0000000000000
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_map
-
-// template <class... Args>
-//     iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args);
-// template <class... Args>
-//     iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args);
-// template <class M>
-
-// Validate whether the operation properly guards against ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-void test() {
-  std::unordered_map<operator_hijacker, operator_hijacker> m;
-  {
-    const operator_hijacker k;
-    m.try_emplace(m.cend(), k);
-  }
-  {
-    operator_hijacker k;
-    m.try_emplace(m.cend(), std::move(k));
-  }
-}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp
deleted file mode 100644
index f5b5f516d42b5..0000000000000
--- a/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03
-
-// <unordered_map>
-
-// void swap(unordered_map& c)
-//     noexcept(allocator_traits<Allocator>::is_always_equal::value &&
-//               noexcept(swap(declval<Hash&>(), declval<Hash&>())) &&
-//               noexcept(swap(declval<Pred&>(), declval<Pred&>())));
-
-// Validate whether the operation properly guards against ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-void test() {
-  std::unordered_map<operator_hijacker, operator_hijacker> m1;
-  std::unordered_map<operator_hijacker, operator_hijacker> m2;
-  std::swap(m1, m2);
-}
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp
deleted file mode 100644
index 73b19f35e2048..0000000000000
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_multimap
-
-// unordered_multimap(unordered_multimap&&)
-//   noexcept(
-//     is_nothrow_move_constructible<hasher>::value &&
-//     is_nothrow_move_constructible<key_equal>::value &&
-//     is_nothrow_move_constructible<allocator_type>::value);
-
-// Validate whether the operation properly guards against ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-void test() {
-  std::unordered_multimap<operator_hijacker, operator_hijacker> mo;
-  std::unordered_multimap<operator_hijacker, operator_hijacker> m(std::move(mo));
-}
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp
deleted file mode 100644
index 6419a03666d65..0000000000000
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,36 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_multimap
-
-// unordered_multimap(unordered_map&& u, const allocator_type& a);
-
-// Validate whether the operation properly guards against ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-#include "test_allocator.h"
-#include "min_allocator.h"
-
-void test() {
-  using A = test_allocator<std::pair<const operator_hijacker, operator_hijacker>>;
-  using C = std::unordered_multimap<operator_hijacker, operator_hijacker, std::hash<operator_hijacker>,
-                                    std::equal_to<operator_hijacker>, A>;
-
-  C mo;
-  C m(std::move(mo), A());
-}
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp
deleted file mode 100644
index 5e23b73cf34b3..0000000000000
--- a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03
-
-// <unordered_map>
-
-// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
-//           class Alloc = allocator<pair<const Key, T>>>
-// class unordered_multimap
-
-// template <class... Args>
-//     iterator emplace_hint(const_iterator position, Args&&... args);
-
-// Validate whether the operation properly guards against ADL-hijacking operator&
-
-#include <unordered_map>
-
-#include "test_macros.h"
-#include "operator_hijacker.h"
-
-void test() {
-  std::unordered_multimap<operator_hijacker, operator_hijacker> m;
-  m.emplace_hint(m.cbegin());
-}

From 23a7bb541dae47e691be5380e96ca63f04d6d194 Mon Sep 17 00:00:00 2001
From: Marek Kurdej <marek.kurdej+llvm.org@gmail.com>
Date: Fri, 21 Jan 2022 18:17:55 +0100
Subject: [PATCH 196/946] [clang-format] Fix comment in spaceRequiredBefore.
 NFC.

---
 clang/lib/Format/TokenAnnotator.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 7fe0d319e5703..3ba81dfed38c2 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3323,7 +3323,7 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     // or import .....;
     if (Left.is(Keywords.kw_import) && Right.isOneOf(tok::less, tok::ellipsis))
       return true;
-    // No space between module :.
+    // Space between `module :` and `import :`.
     if (Left.isOneOf(Keywords.kw_module, Keywords.kw_import) &&
         Right.is(TT_ModulePartitionColon))
       return true;

From 5659638418808697061f59b78e2f93fc15f2d7cd Mon Sep 17 00:00:00 2001
From: David Tenty <daltenty@ibm.com>
Date: Fri, 21 Jan 2022 12:20:28 -0500
Subject: [PATCH 197/946] Revert "[compiler-rt][cmake] Use HandleOutOfTreeLLVM
 like libcxx and friends"

This reverts commit 8c9f62ea90c70d538766a81ef5980c9223b8566b, which is causing build failures on
the bots because it inadvertently changes the output directory of the compiler-rt libs when
built as a runtime.

Differential Revision: https://reviews.llvm.org/D117815
---
 compiler-rt/CMakeLists.txt | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 974e2333c7abd..12946d74c797b 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -5,6 +5,13 @@
 
 cmake_minimum_required(VERSION 3.13.4)
 
+# Check if compiler-rt is built as a standalone project.
+if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR COMPILER_RT_STANDALONE_BUILD)
+  project(CompilerRT C CXX ASM)
+  set(COMPILER_RT_STANDALONE_BUILD TRUE)
+  set_property(GLOBAL PROPERTY USE_FOLDERS ON)
+endif()
+
 set(LLVM_COMMON_CMAKE_UTILS "${CMAKE_CURRENT_SOURCE_DIR}/../cmake")
 
 # Add path for custom compiler-rt modules.
@@ -15,16 +22,6 @@ list(INSERT CMAKE_MODULE_PATH 0
   "${LLVM_COMMON_CMAKE_UTILS}/Modules"
   )
 
-# Check if compiler-rt is built as a standalone project.
-if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR COMPILER_RT_STANDALONE_BUILD)
-  project(CompilerRT C CXX ASM)
-  set(COMPILER_RT_STANDALONE_BUILD TRUE)
-  set_property(GLOBAL PROPERTY USE_FOLDERS ON)
-
-  # Find the LLVM sources and simulate LLVM CMake options.
-  include(HandleOutOfTreeLLVM)
-endif()
-
 if(CMAKE_CONFIGURATION_TYPES)
   set(CMAKE_CFG_RESOLVED_INTDIR "${CMAKE_CFG_INTDIR}/")
 else()

From 26cbc430197a3432075c7c5dfec41765f92b97ed Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Fri, 21 Jan 2022 18:30:34 +0100
Subject: [PATCH 198/946] [flang] Remove target and require shell

Fix failure from 68db0e25df4b1edaa2c6080eb88453ab01ea01d3 on
arm buildbot.
---
 flang/test/Fir/basic-program.fir | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir
index 02463bef99496..b417a6148d39b 100644
--- a/flang/test/Fir/basic-program.fir
+++ b/flang/test/Fir/basic-program.fir
@@ -1,4 +1,5 @@
-// RUN: tco --target=x86_64-unknown-linux-gnu %s | FileCheck %s
+// RUN: tco %s | FileCheck %s
+// REQUIRES: shell
 
 // Check that tco is working with a basic test.
 

From 10e5c513b59bff0a049cc2a24bb7d41cd874ad7a Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@google.com>
Date: Fri, 21 Jan 2022 09:53:14 -0800
Subject: [PATCH 199/946] Revert "[cmake] Duplicate
 `{llvm,compiler_rt}_check_linker_flag` for runtime libs and llvm"

This reverts commit 4af11272f57a4a6fed2932e9e0857b2c1a707c51.
---
 cmake/Modules/CheckLinkerFlag.cmake          | 17 +++++++++++++++++
 compiler-rt/cmake/config-ix.cmake            | 20 +++++++++++++-------
 libcxx/cmake/config-ix.cmake                 |  4 ++--
 libunwind/cmake/config-ix.cmake              |  8 ++++----
 llvm/cmake/modules/LLVMCheckLinkerFlag.cmake | 12 ++----------
 runtimes/CMakeLists.txt                      |  6 +++---
 6 files changed, 41 insertions(+), 26 deletions(-)
 create mode 100644 cmake/Modules/CheckLinkerFlag.cmake

diff --git a/cmake/Modules/CheckLinkerFlag.cmake b/cmake/Modules/CheckLinkerFlag.cmake
new file mode 100644
index 0000000000000..722fe5b1b8ead
--- /dev/null
+++ b/cmake/Modules/CheckLinkerFlag.cmake
@@ -0,0 +1,17 @@
+include(CMakePushCheckState)
+include(CheckCCompilerFlag)
+
+function(llvm_check_linker_flag flag dest)
+  # If testing a flag with check_c_compiler_flag, it gets added to the compile
+  # command only, but not to the linker command in that test. If the flag
+  # is vital for linking to succeed, the test would fail even if it would
+  # have succeeded if it was included on both commands.
+  #
+  # Therefore, try adding the flag to CMAKE_REQUIRED_FLAGS, which gets
+  # added to both compiling and linking commands in the tests.
+
+  cmake_push_check_state()
+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${flag}")
+  check_c_compiler_flag("" ${dest})
+  cmake_pop_check_state()
+endfunction()
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 596f61e8c82ec..33693ce60321d 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -1,5 +1,4 @@
 include(CMakePushCheckState)
-include(LLVMCheckLinkerFlag) # Compat until CMake 3.18
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(CheckIncludeFiles)
@@ -7,6 +6,13 @@ include(CheckLibraryExists)
 include(CheckSymbolExists)
 include(TestBigEndian)
 
+function(compiler_rt_check_linker_flag flag out_var)
+  cmake_push_check_state()
+  set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} ${flag}")
+  check_cxx_compiler_flag("" ${out_var})
+  cmake_pop_check_state()
+endfunction()
+
 check_library_exists(c fopen "" COMPILER_RT_HAS_LIBC)
 if (COMPILER_RT_USE_BUILTINS_LIBRARY)
   include(HandleCompilerRT)
@@ -165,12 +171,12 @@ check_library_exists(c++ __cxa_throw "" COMPILER_RT_HAS_LIBCXX)
 check_library_exists(stdc++ __cxa_throw "" COMPILER_RT_HAS_LIBSTDCXX)
 
 # Linker flags.
-llvm_check_linker_flag(CXX "-Wl,-z,text" COMPILER_RT_HAS_Z_TEXT)
-llvm_check_linker_flag(CXX "-fuse-ld=lld" COMPILER_RT_HAS_FUSE_LD_LLD_FLAG)
+compiler_rt_check_linker_flag("-Wl,-z,text" COMPILER_RT_HAS_Z_TEXT)
+compiler_rt_check_linker_flag("-fuse-ld=lld" COMPILER_RT_HAS_FUSE_LD_LLD_FLAG)
 
 if(${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
   set(VERS_COMPAT_OPTION "-Wl,-z,gnu-version-script-compat")
-  llvm_check_linker_flag(CXX "${VERS_COMPAT_OPTION}" COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT)
+  compiler_rt_check_linker_flag("${VERS_COMPAT_OPTION}" COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT)
 endif()
 
 set(DUMMY_VERS ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/dummy.vers)
@@ -181,10 +187,10 @@ if(COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT)
   # -z gnu-version-script-compat.
   string(APPEND VERS_OPTION " ${VERS_COMPAT_OPTION}")
 endif()
-llvm_check_linker_flag(CXX "${VERS_OPTION}" COMPILER_RT_HAS_VERSION_SCRIPT)
+compiler_rt_check_linker_flag("${VERS_OPTION}" COMPILER_RT_HAS_VERSION_SCRIPT)
 
 if(ANDROID)
-  llvm_check_linker_flag(CXX "-Wl,-z,global" COMPILER_RT_HAS_Z_GLOBAL)
+  compiler_rt_check_linker_flag("-Wl,-z,global" COMPILER_RT_HAS_Z_GLOBAL)
   check_library_exists(log __android_log_write "" COMPILER_RT_HAS_LIBLOG)
 endif()
 
@@ -430,7 +436,7 @@ if(APPLE)
     -lc++
     -lc++abi)
 
-  llvm_check_linker_flag(CXX "-fapplication-extension" COMPILER_RT_HAS_APP_EXTENSION)
+  compiler_rt_check_linker_flag("-fapplication-extension" COMPILER_RT_HAS_APP_EXTENSION)
   if(COMPILER_RT_HAS_APP_EXTENSION)
     list(APPEND DARWIN_COMMON_LINK_FLAGS "-fapplication-extension")
   endif()
diff --git a/libcxx/cmake/config-ix.cmake b/libcxx/cmake/config-ix.cmake
index e114337f081a3..689a9d09c0179 100644
--- a/libcxx/cmake/config-ix.cmake
+++ b/libcxx/cmake/config-ix.cmake
@@ -1,6 +1,6 @@
 include(CMakePushCheckState)
 include(CheckLibraryExists)
-include(LLVMCheckLinkerFlag) # Compat until CMake 3.18
+include(CheckLinkerFlag)
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(CheckCSourceCompiles)
@@ -12,7 +12,7 @@ include(CheckCSourceCompiles)
 # libunwind (and the compiler implicit -lunwind wouldn't succeed as the newly
 # built libunwind isn't installed yet). For those cases, it'd be good to
 # link with --uwnindlib=none. Check if that option works.
-llvm_check_linker_flag(C "--unwindlib=none" LIBCXX_SUPPORTS_UNWINDLIB_NONE_FLAG)
+llvm_check_linker_flag("--unwindlib=none" LIBCXX_SUPPORTS_UNWINDLIB_NONE_FLAG)
 if (LIBCXX_SUPPORTS_UNWINDLIB_NONE_FLAG)
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --unwindlib=none")
 endif()
diff --git a/libunwind/cmake/config-ix.cmake b/libunwind/cmake/config-ix.cmake
index c1e1b4631abfe..c814611d43785 100644
--- a/libunwind/cmake/config-ix.cmake
+++ b/libunwind/cmake/config-ix.cmake
@@ -2,14 +2,14 @@ include(CMakePushCheckState)
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 include(CheckLibraryExists)
-include(LLVMCheckLinkerFlag) # Compat until CMake 3.18
+include(CheckLinkerFlag)
 include(CheckSymbolExists)
 include(CheckCSourceCompiles)
 
 # The compiler driver may be implicitly trying to link against libunwind, which
 # might not work if libunwind doesn't exist yet. Try to check if
 # --unwindlib=none is supported, and use that if possible.
-llvm_check_linker_flag(C "--unwindlib=none" LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG)
+llvm_check_linker_flag("--unwindlib=none" LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG)
 if (LIBUNWIND_SUPPORTS_UNWINDLIB_NONE_FLAG)
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --unwindlib=none")
 endif()
@@ -34,11 +34,11 @@ endif()
 # required for the link to go through. We remove sanitizers from the
 # configuration checks to avoid spurious link errors.
 
-llvm_check_linker_flag(C "-nostdlib++" LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG)
+llvm_check_linker_flag(-nostdlib++ LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG)
 if (LIBUNWIND_SUPPORTS_NOSTDLIBXX_FLAG)
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nostdlib++")
 else()
-  llvm_check_linker_flag(C "-nodefaultlibs" LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG)
+  llvm_check_linker_flag(-nodefaultlibs LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG)
   if (LIBUNWIND_SUPPORTS_NODEFAULTLIBS_FLAG)
     set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nodefaultlibs")
   endif()
diff --git a/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake b/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake
index 79c4e2cb4c2cd..253dd768654a2 100644
--- a/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake
+++ b/llvm/cmake/modules/LLVMCheckLinkerFlag.cmake
@@ -5,22 +5,14 @@ if (COMMAND check_linker_flag)
     check_linker_flag(${ARGN})
   endmacro()
 else()
-  # Until the minimum CMAKE version is 3.18
-
   include(CheckCXXCompilerFlag)
   include(CMakePushCheckState)
 
-  # cmake builtin compatible, except we assume lang is C or CXX
+  # cmake builtin compatible, except we assume lang is CXX
   function(llvm_check_linker_flag lang flag out_var)
     cmake_push_check_state()
     set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${flag}")
-    if("${lang}" STREQUAL "C")
-      check_c_compiler_flag("" ${out_var})
-    elseif("${lang}" STREQUAL "CXX")
-      check_cxx_compiler_flag("" ${out_var})
-    else()
-      message(FATAL_ERROR "\"${lang}\" is not C or CXX")
-    endif()
+    check_cxx_compiler_flag("" ${out_var})
     cmake_pop_check_state()
   endfunction()
 endif()
diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt
index 1a50d9e8c98b3..cedce7b3541e5 100644
--- a/runtimes/CMakeLists.txt
+++ b/runtimes/CMakeLists.txt
@@ -88,7 +88,7 @@ set(LLVM_CMAKE_DIR ${LLVM_MAIN_SRC_DIR}/cmake/modules)
 set(LLVM_PATH ${CMAKE_CURRENT_SOURCE_DIR}/../llvm)
 
 include(CheckLibraryExists)
-include(LLVMCheckLinkerFlag) # Compat until CMake 3.18
+include(CheckLinkerFlag)
 include(CheckCCompilerFlag)
 include(CheckCXXCompilerFlag)
 
@@ -100,7 +100,7 @@ if (NOT LLVM_RUNTIMES_LINKING_WORKS)
   # --unwindlib=none is supported, and use that if possible.
   # Don't add this if not necessary to fix linking, as it can break using
   # e.g. ASAN/TSAN.
-  llvm_check_linker_flag(C "--unwindlib=none" LLVM_RUNTIMES_SUPPORT_UNWINDLIB_NONE_FLAG)
+  llvm_check_linker_flag("--unwindlib=none" LLVM_RUNTIMES_SUPPORT_UNWINDLIB_NONE_FLAG)
   if (LLVM_RUNTIMES_SUPPORT_UNWINDLIB_NONE_FLAG)
     set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} --unwindlib=none")
   endif()
@@ -110,7 +110,7 @@ endif()
 # Check for -nostdlib++ first; if there's no C++ standard library yet,
 # all check_cxx_compiler_flag commands will fail until we add -nostdlib++
 # (or -nodefaultlibs).
-llvm_check_linker_flag(C "-nostdlib++" LLVM_RUNTIMES_SUPPORT_NOSTDLIBXX_FLAG)
+llvm_check_linker_flag(-nostdlib++ LLVM_RUNTIMES_SUPPORT_NOSTDLIBXX_FLAG)
 if (LLVM_RUNTIMES_SUPPORT_NOSTDLIBXX_FLAG)
   set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -nostdlib++")
 endif()

From 5061eb6b0121af11a784d92e2dee5996858d04cd Mon Sep 17 00:00:00 2001
From: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
Date: Fri, 21 Jan 2022 09:57:17 -0800
Subject: [PATCH 200/946] [Sparc] Don't define __sparcv9 and __sparcv9__ when
 targeting V8+

Currently, clang defines the three macros __sparcv9, __sparcv9__
and __sparc_v9__ when targeting the V8+ baseline, i.e. using the
V9 instruction set on a 32-bit target.

Since neither gcc nor SolarisStudio define __sparcv9 and __sparcv9__
when targeting V8+, some existing code such as the glibc breaks when
defining either of these two macros on a 32-bit target as they are
used to detect a 64-bit target. Update the tests accordingly.

Fixes PR49562.

Reviewed By: jrtc27, MaskRay, hvdijk

Differential Revision: https://reviews.llvm.org/D98574
---
 clang/lib/Basic/Targets/Sparc.cpp                | 2 --
 clang/test/Preprocessor/predefined-arch-macros.c | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Basic/Targets/Sparc.cpp b/clang/lib/Basic/Targets/Sparc.cpp
index 5eeb77406c342..9321024348014 100644
--- a/clang/lib/Basic/Targets/Sparc.cpp
+++ b/clang/lib/Basic/Targets/Sparc.cpp
@@ -156,8 +156,6 @@ void SparcV8TargetInfo::getTargetDefines(const LangOptions &Opts,
       Builder.defineMacro("__sparcv8__");
       break;
     case CG_V9:
-      Builder.defineMacro("__sparcv9");
-      Builder.defineMacro("__sparcv9__");
       Builder.defineMacro("__sparc_v9__");
       break;
     }
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index 757008005ebaf..f0604de684fbe 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -3479,8 +3479,8 @@
 // CHECK_SPARC-V9-NOT: #define __sparcv8 1
 // CHECK_SPARC-V9-NOT: #define __sparcv8__ 1
 // CHECK_SPARC-V9: #define __sparc_v9__ 1
-// CHECK_SPARC-V9: #define __sparcv9 1
-// CHECK_SPARC-V9: #define __sparcv9__ 1
+// CHECK_SPARC-V9-NOT: #define __sparcv9 1
+// CHECK_SPARC-V9-NOT: #define __sparcv9__ 1
 
 // RUN: %clang -E -dM %s -o - 2>&1 \
 // RUN:     -target sparc-sun-solaris \

From e6ceec9c1d190cdd465548161df6c8ebbb327739 Mon Sep 17 00:00:00 2001
From: eopXD <eop.chen@sifive.com>
Date: Thu, 20 Jan 2022 10:16:00 -0800
Subject: [PATCH 201/946] [Clang][RISCV] Restrict rvv builtins with zve macros

The `zve` extension specifies the maximum ELEN for both integer and floating
point mode - defined by macro `__riscv_v_elen` and `__riscv_v_elen_fp`.
This commit restricts the functions in riscv_vector.h by the zve defined
macro-s.

Change enum `RISCVExtension` to `RISCVPredefinedMacro` since now it
contains not only extensions. Also added type alignment to it.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D112986
---
 clang/utils/TableGen/RISCVVEmitter.cpp | 72 +++++++++++++++-----------
 1 file changed, 43 insertions(+), 29 deletions(-)

diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index 84da6a5901a43..ea2d0b8d2f2f8 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -100,6 +100,9 @@ class RVVType {
   bool isValid() const { return Valid; }
   bool isScalar() const { return Scale.hasValue() && Scale.getValue() == 0; }
   bool isVector() const { return Scale.hasValue() && Scale.getValue() != 0; }
+  bool isVector(unsigned Width) const {
+    return isVector() && ElementBitwidth == Width;
+  }
   bool isFloat() const { return ScalarType == ScalarTypeKind::Float; }
   bool isSignedInteger() const {
     return ScalarType == ScalarTypeKind::SignedInteger;
@@ -134,13 +137,15 @@ class RVVType {
 
 using RVVTypePtr = RVVType *;
 using RVVTypes = std::vector<RVVTypePtr>;
+using RISCVPredefinedMacroT = uint8_t;
 
-enum RISCVExtension : uint8_t {
+enum RISCVPredefinedMacro : RISCVPredefinedMacroT {
   Basic = 0,
-  F = 1 << 1,
-  D = 1 << 2,
-  Zfh = 1 << 3,
-  RV64 = 1 << 4,
+  Zfh = 1 << 1,
+  RV64 = 1 << 2,
+  VectorMaxELen64 = 1 << 3,
+  VectorMaxELenFp32 = 1 << 4,
+  VectorMaxELenFp64 = 1 << 5,
 };
 
 // TODO refactor RVVIntrinsic class design after support all intrinsic
@@ -164,7 +169,7 @@ class RVVIntrinsic {
   // The types we use to obtain the specific LLVM intrinsic. They are index of
   // InputTypes. -1 means the return type.
   std::vector<int64_t> IntrinsicTypes;
-  uint8_t RISCVExtensions = 0;
+  RISCVPredefinedMacroT RISCVPredefinedMacros = 0;
   unsigned NF = 1;
 
 public:
@@ -188,7 +193,9 @@ class RVVIntrinsic {
   bool isMask() const { return IsMask; }
   StringRef getIRName() const { return IRName; }
   StringRef getManualCodegen() const { return ManualCodegen; }
-  uint8_t getRISCVExtensions() const { return RISCVExtensions; }
+  RISCVPredefinedMacroT getRISCVPredefinedMacros() const {
+    return RISCVPredefinedMacros;
+  }
   unsigned getNF() const { return NF; }
   const std::vector<int64_t> &getIntrinsicTypes() const {
     return IntrinsicTypes;
@@ -251,7 +258,8 @@ class RVVEmitter {
 
   // Emit the architecture preprocessor definitions. Return true when emits
   // non-empty string.
-  bool emitExtDefStr(uint8_t Extensions, raw_ostream &o);
+  bool emitMacroRestrictionStr(RISCVPredefinedMacroT PredefinedMacros,
+                               raw_ostream &o);
   // Slice Prototypes string into sub prototype string and process each sub
   // prototype string individually in the Handler.
   void parsePrototypes(StringRef Prototypes,
@@ -789,15 +797,17 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
   // Init RISC-V extensions
   for (const auto &T : OutInTypes) {
     if (T->isFloatVector(16) || T->isFloat(16))
-      RISCVExtensions |= RISCVExtension::Zfh;
-    else if (T->isFloatVector(32) || T->isFloat(32))
-      RISCVExtensions |= RISCVExtension::F;
-    else if (T->isFloatVector(64) || T->isFloat(64))
-      RISCVExtensions |= RISCVExtension::D;
+      RISCVPredefinedMacros |= RISCVPredefinedMacro::Zfh;
+    if (T->isFloatVector(32))
+      RISCVPredefinedMacros |= RISCVPredefinedMacro::VectorMaxELenFp32;
+    if (T->isFloatVector(64))
+      RISCVPredefinedMacros |= RISCVPredefinedMacro::VectorMaxELenFp64;
+    if (T->isVector(64))
+      RISCVPredefinedMacros |= RISCVPredefinedMacro::VectorMaxELen64;
   }
   for (auto Extension : RequiredExtensions) {
     if (Extension == "RV64")
-      RISCVExtensions |= RISCVExtension::RV64;
+      RISCVPredefinedMacros |= RISCVPredefinedMacro::RV64;
   }
 
   // Init OutputType and InputTypes
@@ -981,7 +991,7 @@ void RVVEmitter::createHeader(raw_ostream &OS) {
   // The same extension include in the same arch guard marco.
   llvm::stable_sort(Defs, [](const std::unique_ptr<RVVIntrinsic> &A,
                              const std::unique_ptr<RVVIntrinsic> &B) {
-    return A->getRISCVExtensions() < B->getRISCVExtensions();
+    return A->getRISCVPredefinedMacros() < B->getRISCVPredefinedMacros();
   });
 
   OS << "#define __rvv_ai static __inline__\n";
@@ -1280,15 +1290,16 @@ Optional<RVVTypePtr> RVVEmitter::computeType(BasicType BT, int Log2LMUL,
 void RVVEmitter::emitArchMacroAndBody(
     std::vector<std::unique_ptr<RVVIntrinsic>> &Defs, raw_ostream &OS,
     std::function<void(raw_ostream &, const RVVIntrinsic &)> PrintBody) {
-  uint8_t PrevExt = (*Defs.begin())->getRISCVExtensions();
-  bool NeedEndif = emitExtDefStr(PrevExt, OS);
+  RISCVPredefinedMacroT PrevMacros =
+      (*Defs.begin())->getRISCVPredefinedMacros();
+  bool NeedEndif = emitMacroRestrictionStr(PrevMacros, OS);
   for (auto &Def : Defs) {
-    uint8_t CurExt = Def->getRISCVExtensions();
-    if (CurExt != PrevExt) {
+    RISCVPredefinedMacroT CurMacros = Def->getRISCVPredefinedMacros();
+    if (CurMacros != PrevMacros) {
       if (NeedEndif)
         OS << "#endif\n\n";
-      NeedEndif = emitExtDefStr(CurExt, OS);
-      PrevExt = CurExt;
+      NeedEndif = emitMacroRestrictionStr(CurMacros, OS);
+      PrevMacros = CurMacros;
     }
     if (Def->hasAutoDef())
       PrintBody(OS, *Def);
@@ -1297,19 +1308,22 @@ void RVVEmitter::emitArchMacroAndBody(
     OS << "#endif\n\n";
 }
 
-bool RVVEmitter::emitExtDefStr(uint8_t Extents, raw_ostream &OS) {
-  if (Extents == RISCVExtension::Basic)
+bool RVVEmitter::emitMacroRestrictionStr(RISCVPredefinedMacroT PredefinedMacros,
+                                         raw_ostream &OS) {
+  if (PredefinedMacros == RISCVPredefinedMacro::Basic)
     return false;
   OS << "#if ";
   ListSeparator LS(" && ");
-  if (Extents & RISCVExtension::F)
-    OS << LS << "defined(__riscv_f)";
-  if (Extents & RISCVExtension::D)
-    OS << LS << "defined(__riscv_d)";
-  if (Extents & RISCVExtension::Zfh)
+  if (PredefinedMacros & RISCVPredefinedMacro::Zfh)
     OS << LS << "defined(__riscv_zfh)";
-  if (Extents & RISCVExtension::RV64)
+  if (PredefinedMacros & RISCVPredefinedMacro::RV64)
     OS << LS << "(__riscv_xlen == 64)";
+  if (PredefinedMacros & RISCVPredefinedMacro::VectorMaxELen64)
+    OS << LS << "(__riscv_v_elen >= 64)";
+  if (PredefinedMacros & RISCVPredefinedMacro::VectorMaxELenFp32)
+    OS << LS << "(__riscv_v_elen_fp >= 32)";
+  if (PredefinedMacros & RISCVPredefinedMacro::VectorMaxELenFp64)
+    OS << LS << "(__riscv_v_elen_fp >= 64)";
   OS << "\n";
   return true;
 }

From 754d6af7c35983612241b9a077722f4471bbd683 Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer@google.com>
Date: Wed, 5 Jan 2022 16:27:23 -0800
Subject: [PATCH 202/946] [NFC] Improve code reuse.

Reviewed By: eugenis

Differential Revision: https://reviews.llvm.org/D116711
---
 .../Instrumentation/HWAddressSanitizer.cpp      | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
index 8d3bc1383e96d..fb10a99d13382 100644
--- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp
@@ -1403,16 +1403,16 @@ bool HWAddressSanitizer::instrumentStack(
 
     size_t Size = getAllocaSizeInBytes(*AI);
     size_t AlignedSize = alignTo(Size, Mapping.getObjectAlignment());
+    auto TagEnd = [&](Instruction *Node) {
+      IRB.SetInsertPoint(Node);
+      Value *UARTag = getUARTag(IRB, StackTag);
+      tagAlloca(IRB, AI, UARTag, AlignedSize);
+    };
     bool StandardLifetime =
         UnrecognizedLifetimes.empty() && isStandardLifetime(Info, GetDT());
     if (DetectUseAfterScope && StandardLifetime) {
       IntrinsicInst *Start = Info.LifetimeStart[0];
       IRB.SetInsertPoint(Start->getNextNode());
-      auto TagEnd = [&](Instruction *Node) {
-        IRB.SetInsertPoint(Node);
-        Value *UARTag = getUARTag(IRB, StackTag);
-        tagAlloca(IRB, AI, UARTag, AlignedSize);
-      };
       tagAlloca(IRB, AI, Tag, Size);
       if (!forAllReachableExits(GetDT(), GetPDT(), Start, Info.LifetimeEnd,
                                 RetVec, TagEnd)) {
@@ -1421,11 +1421,8 @@ bool HWAddressSanitizer::instrumentStack(
       }
     } else {
       tagAlloca(IRB, AI, Tag, Size);
-      for (auto *RI : RetVec) {
-        IRB.SetInsertPoint(RI);
-        Value *UARTag = getUARTag(IRB, StackTag);
-        tagAlloca(IRB, AI, UARTag, AlignedSize);
-      }
+      for (auto *RI : RetVec)
+        TagEnd(RI);
       if (!StandardLifetime) {
         for (auto &II : Info.LifetimeStart)
           II->eraseFromParent();

From 11754a4dbbad2c3be803d1a7366b861941550a6c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Fri, 21 Jan 2022 10:49:50 -0800
Subject: [PATCH 203/946] [RISCV] Use RVBUnary in more places to simplify some
 tablegen declarations. NFCI

---
 llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index aae646f02f13f..a8fffb2d7d792 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -597,27 +597,18 @@ def ZEXTH_RV64 : RVInstR<0b0000100, 0b100, OPC_OP_32, (outs GPR:$rd),
 // or gorci. Since Zbb is closer to being finalized than Zbp this will be
 // misleading to users.
 let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV32] in {
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def REV8_RV32 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
-                        "rev8", "$rd, $rs1">, Sched<[WriteREV8, ReadREV8]> {
-  let imm12 = { 0b01101, 0b0011000 };
-}
+def REV8_RV32 : RVBUnary<0b0110100, 0b11000, 0b101, OPC_OP_IMM, "rev8">,
+                Sched<[WriteREV8, ReadREV8]>;
 } // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV32]
 
 let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def REV8_RV64 : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
-                        "rev8", "$rd, $rs1">, Sched<[WriteREV8, ReadREV8]> {
-  let imm12 = { 0b01101, 0b0111000 };
-}
+def REV8_RV64 : RVBUnary<0b0110101, 0b11000, 0b101, OPC_OP_IMM, "rev8">,
+                Sched<[WriteREV8, ReadREV8]>;
 } // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64]
 
 let Predicates = [HasStdExtZbbOrZbp] in {
-let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
-def ORCB : RVInstI<0b101, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
-                   "orc.b", "$rd, $rs1">, Sched<[WriteORCB, ReadORCB]> {
-  let imm12 = { 0b00101, 0b0000111 };
-}
+def ORCB : RVBUnary<0b0010100, 0b00111, 0b101, OPC_OP_IMM, "orc.b">,
+           Sched<[WriteORCB, ReadORCB]>;
 } // Predicates = [HasStdExtZbbOrZbp]
 
 let Predicates = [HasStdExtZbpOrZbkb] in 

From 4710750854cee1fdadf5f3381e9431655056b646 Mon Sep 17 00:00:00 2001
From: Lei Zhang <antiagainst@google.com>
Date: Fri, 7 Jan 2022 17:19:41 -0500
Subject: [PATCH 204/946] [mlir][spirv] Support size-1 vector inserts during
 conversion

Differential Revision: https://reviews.llvm.org/D115517
---
 .../VectorToSPIRV/VectorToSPIRV.cpp           | 22 ++++++++++++++-----
 .../test/Conversion/VectorToSPIRV/simple.mlir | 22 +++++++++++++++++++
 2 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp b/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp
index 27037cb4b6f2a..051b691011d8a 100644
--- a/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp
+++ b/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp
@@ -157,6 +157,13 @@ struct VectorInsertOpConvert final
   LogicalResult
   matchAndRewrite(vector::InsertOp insertOp, OpAdaptor adaptor,
                   ConversionPatternRewriter &rewriter) const override {
+    // Special case for inserting scalar values into size-1 vectors.
+    if (insertOp.getSourceType().isIntOrFloat() &&
+        insertOp.getDestVectorType().getNumElements() == 1) {
+      rewriter.replaceOp(insertOp, adaptor.source());
+      return success();
+    }
+
     if (insertOp.getSourceType().isa<VectorType>() ||
         !spirv::CompositeType::isValid(insertOp.getDestVectorType()))
       return failure();
@@ -209,20 +216,23 @@ struct VectorInsertStridedSliceOpConvert final
     Value srcVector = adaptor.getOperands().front();
     Value dstVector = adaptor.getOperands().back();
 
-    // Insert scalar values not supported yet.
-    if (srcVector.getType().isa<spirv::ScalarType>() ||
-        dstVector.getType().isa<spirv::ScalarType>())
-      return failure();
-
     uint64_t stride = getFirstIntValue(insertOp.strides());
     if (stride != 1)
       return failure();
+    uint64_t offset = getFirstIntValue(insertOp.offsets());
+
+    if (srcVector.getType().isa<spirv::ScalarType>()) {
+      assert(!dstVector.getType().isa<spirv::ScalarType>());
+      rewriter.replaceOpWithNewOp<spirv::CompositeInsertOp>(
+          insertOp, dstVector.getType(), srcVector, dstVector,
+          rewriter.getI32ArrayAttr(offset));
+      return success();
+    }
 
     uint64_t totalSize =
         dstVector.getType().cast<VectorType>().getNumElements();
     uint64_t insertSize =
         srcVector.getType().cast<VectorType>().getNumElements();
-    uint64_t offset = getFirstIntValue(insertOp.offsets());
 
     SmallVector<int32_t, 2> indices(totalSize);
     std::iota(indices.begin(), indices.end(), 0);
diff --git a/mlir/test/Conversion/VectorToSPIRV/simple.mlir b/mlir/test/Conversion/VectorToSPIRV/simple.mlir
index 8f5cf197713d2..7a3e4b3289729 100644
--- a/mlir/test/Conversion/VectorToSPIRV/simple.mlir
+++ b/mlir/test/Conversion/VectorToSPIRV/simple.mlir
@@ -61,6 +61,17 @@ func @insert(%arg0 : vector<4xf32>, %arg1: f32) -> vector<4xf32> {
 
 // -----
 
+// CHECK-LABEL: @insert_size1_vector
+//  CHECK-SAME: %[[V:.*]]: vector<1xf32>, %[[S:.*]]: f32
+//       CHECK:   %[[R:.+]] = builtin.unrealized_conversion_cast %[[S]]
+//       CHECK:   return %[[R]]
+func @insert_size1_vector(%arg0 : vector<1xf32>, %arg1: f32) -> vector<1xf32> {
+  %1 = vector.insert %arg1, %arg0[0] : f32 into vector<1xf32>
+	return %1 : vector<1xf32>
+}
+
+// -----
+
 // CHECK-LABEL: @extract_element
 //  CHECK-SAME: %[[V:.*]]: vector<4xf32>, %[[ID:.*]]: i32
 //       CHECK:   spv.VectorExtractDynamic %[[V]][%[[ID]]] : vector<4xf32>, i32
@@ -139,6 +150,17 @@ func @insert_strided_slice(%arg0: vector<2xf32>, %arg1: vector<4xf32>) -> vector
 
 // -----
 
+// CHECK-LABEL: @insert_size1_vector
+//  CHECK-SAME: %[[SUB:.*]]: vector<1xf32>, %[[FULL:.*]]: vector<3xf32>
+//       CHECK:   %[[S:.+]] = builtin.unrealized_conversion_cast %[[SUB]]
+//       CHECK:   spv.CompositeInsert %[[S]], %[[FULL]][2 : i32] : f32 into vector<3xf32>
+func @insert_size1_vector(%arg0 : vector<1xf32>, %arg1: vector<3xf32>) -> vector<3xf32> {
+  %1 = vector.insert_strided_slice %arg0, %arg1 {offsets = [2], strides = [1]} : vector<1xf32> into vector<3xf32>
+	return %1 : vector<3xf32>
+}
+
+// -----
+
 // CHECK-LABEL: @fma
 //  CHECK-SAME: %[[A:.*]]: vector<4xf32>, %[[B:.*]]: vector<4xf32>, %[[C:.*]]: vector<4xf32>
 //       CHECK:   spv.GLSL.Fma %[[A]], %[[B]], %[[C]] : vector<4xf32>

From fd0c6f53913f272ddd88948644fae36e63db120c Mon Sep 17 00:00:00 2001
From: Alexander Belyaev <pifon@google.com>
Date: Fri, 21 Jan 2022 19:29:08 +0100
Subject: [PATCH 205/946] [mlir] Move linalg::PadTensorOp to tensor::PadOp.

RFC: https://llvm.discourse.group/t/rfc-move-linalg-padtensorop-to-tensor-padop/5785

Differential Revision: https://reviews.llvm.org/D117892
---
 .../mlir/Dialect/Linalg/IR/LinalgOps.td       | 202 -------
 .../Dialect/Linalg/Transforms/HoistPadding.h  |  11 +-
 .../Dialect/Linalg/Transforms/Transforms.h    |  36 +-
 .../include/mlir/Dialect/Linalg/Utils/Utils.h |   6 +-
 mlir/include/mlir/Dialect/Tensor/IR/Tensor.h  |   1 +
 .../mlir/Dialect/Tensor/IR/TensorOps.td       | 190 +++++-
 .../Tensor/IR/TensorTilingInterfaceImpl.h     |  36 ++
 .../include/mlir/Dialect/Tensor/Utils/Utils.h |  34 ++
 mlir/include/mlir/InitAllDialects.h           |   2 +
 .../Conversion/TosaToLinalg/TosaToLinalg.cpp  |   3 +-
 .../TosaToLinalg/TosaToLinalgNamed.cpp        |   7 +-
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp      | 565 ------------------
 .../Dialect/Linalg/Transforms/Bufferize.cpp   |   4 +-
 .../Transforms/ComprehensiveBufferizePass.cpp |   2 +-
 .../Linalg/Transforms/HoistPadding.cpp        |  27 +-
 .../Transforms/LinalgStrategyPasses.cpp       |   6 +-
 .../Linalg/Transforms/PadOpInterchange.cpp    |  13 +-
 mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp |  47 +-
 .../Dialect/Linalg/Transforms/Transforms.cpp  |  48 +-
 .../Linalg/Transforms/Vectorization.cpp       |  75 ++-
 mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt  |   1 +
 mlir/lib/Dialect/Linalg/Utils/Utils.cpp       |  19 +-
 mlir/lib/Dialect/Tensor/CMakeLists.txt        |   1 +
 mlir/lib/Dialect/Tensor/IR/CMakeLists.txt     |  18 +
 .../IR/TensorInferTypeOpInterfaceImpl.cpp     |  43 ++
 mlir/lib/Dialect/Tensor/IR/TensorOps.cpp      | 253 ++++++++
 .../Tensor/IR/TensorTilingInterfaceImpl.cpp   | 279 +++++++++
 mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt  |  12 +
 mlir/lib/Dialect/Tensor/Utils/Utils.cpp       |  54 ++
 .../TosaToLinalg/tosa-to-linalg-named.mlir    |  20 +-
 .../TosaToLinalg/tosa-to-linalg.mlir          |  16 +-
 mlir/test/Dialect/Linalg/bufferize.mlir       |   4 +-
 mlir/test/Dialect/Linalg/canonicalize.mlir    | 204 +------
 .../test/Dialect/Linalg/codegen-strategy.mlir |   2 +-
 .../Dialect/Linalg/generalize-pad-tensor.mlir |   8 +-
 mlir/test/Dialect/Linalg/hoist-padding.mlir   |  80 +--
 mlir/test/Dialect/Linalg/invalid.mlir         |  65 --
 .../test/Dialect/Linalg/lower-pad-tensor.mlir |  12 +-
 mlir/test/Dialect/Linalg/pad.mlir             |  58 +-
 mlir/test/Dialect/Linalg/pad_fusion.mlir      |   8 +-
 .../resolve-shaped-type-result-dims.mlir      |   4 +-
 mlir/test/Dialect/Linalg/roundtrip.mlir       |  71 ---
 .../Linalg/subtensor-of-padtensor.mlir        |  80 +--
 .../Dialect/Linalg/tile-and-fuse-tensors.mlir |   6 +-
 .../Dialect/Linalg/tile-pad-tensor-op.mlir    |  22 +-
 mlir/test/Dialect/Linalg/vectorization.mlir   |  52 +-
 mlir/test/Dialect/Tensor/canonicalize.mlir    | 196 ++++++
 mlir/test/Dialect/Tensor/invalid.mlir         |  55 ++
 mlir/test/Dialect/Tensor/ops.mlir             |  74 +++
 .../CPU/test-comprehensive-bufferize.mlir     |   8 +-
 .../Dialect/Linalg/CPU/test-padtensor.mlir    |   4 +-
 .../Dialect/Linalg/TestLinalgTransforms.cpp   |   7 +-
 .../llvm-project-overlay/mlir/BUILD.bazel     |  38 +-
 53 files changed, 1639 insertions(+), 1450 deletions(-)
 create mode 100644 mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h
 create mode 100644 mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
 create mode 100644 mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
 create mode 100644 mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt
 create mode 100644 mlir/lib/Dialect/Tensor/Utils/Utils.cpp

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index 4b8ae4985ca53..4150dee567fc7 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -18,7 +18,6 @@ include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/Interfaces/LoopLikeInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
-include "mlir/Interfaces/TilingInterface.td"
 include "mlir/Interfaces/ViewLikeInterface.td"
 
 // Base class for Linalg dialect ops that do not correspond to library calls.
@@ -130,207 +129,6 @@ def Linalg_InitTensorOp : Linalg_Op<"init_tensor",
   let hasCanonicalizer = 1;
 }
 
-def Linalg_PadTensorOp : Linalg_Op<"pad_tensor",
-    [AttrSizedOperandSegments, NoSideEffect,
-     DeclareOpInterfaceMethods<ReifyRankedShapedTypeOpInterface>,
-     DeclareOpInterfaceMethods<TilingInterface,
-         ["getDestinationOperands", "getLoopIteratorTypes", "getIterationDomain",
-          "getTiledImplementation"]>]> {
-  let summary = "tensor pad operation";
-  let description = [{
-    `linalg.pad_tensor` is an operation that pads the `source` tensor
-    with given `low` and `high` padding config.
-
-    The PadTensor operation supports the following arguments:
-
-    * source: the "base" tensor on which to pad.
-    * low: A list contains the padding along the start of each
-           dimension, i.e `low`.
-    * high: A list contains the padding along the end of each
-            dimension, i.e. `high`.
-    * nofold: indicates that the operation should not be folded when source and
-              result types are equal.
-
-    The result tensor dimensions are `low` + `dim` + `high` along that
-    dimension. The number of elements of `low` and `high` must match
-    the rank of the input tensor. They can be either a constant or a
-    dynamic value.
-
-    The region of the `pad_tensor` operation returns the value to use
-    for the padding. The arguments of the region represent the index
-    of the source being accessed. There should be as many arguments as
-    the rank of the `source` tensor. The value `yield`-ed by the
-    region is used as the value of the view at the given position.
-
-    If `nofold` is set, the padding operation will not be folded away even
-    if the source type and the padded type have the same static shape. This can
-    be used, e.g., for packing or promotion to faster memory.
-
-    Example 1:
-
-    ```mlir
-      %pad_value = ... : f32
-      %0 = linalg.pad_tensor %0 low[1, 2] high[2, 3] {
-      ^bb0(%arg0 : index, %arg1 : index):
-        linalg.yield %pad_value : f32
-      } : tensor<?x?xf32> to tensor<?x?xf32>
-    ```
-
-    Example 2:
-
-    ```mlir
-      %pad_value = ... : f32
-      %0 = linalg.pad_tensor %arg0 low[2, %arg1, 3, 3] high[3, 3, %arg1, 2] {
-      ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index):
-          linalg.yield %pad_value : f32
-      } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
-    ```
-
-    Example 3:
-
-    ```mlir
-      %pad_value = ... : f32
-      %0 = linalg.pad_tensor %arg0 low[0, 0] high[%ub0, %ub1] {
-      ^bb0(%arg1: index, %arg2: index):
-        linalg.yield %pad_value : f32
-      } : tensor<2x3xf32> to tensor<?x?xf32>
-    ```
-
-    Example 4:
-
-    ```mlir
-      // Force a padded value to be always exist with `nofold`.
-      %pad_value = ... : f32
-      %0 = linalg.pad_tensor %arg0 nofold low[0, 0] high[0, 0] {
-      ^bb0(%arg1: index, %arg2: index):
-        linalg.yield %pad_value : f32
-      } : tensor<2x3xf32> to tensor<2x3xf32>
-    ```
-  }];
-
-  let arguments = (ins
-    AnyTensor:$source,
-    Variadic<Index>:$low,
-    Variadic<Index>:$high,
-    I64ArrayAttr:$static_low,
-    I64ArrayAttr:$static_high,
-    UnitAttr:$nofold);
-
-  let regions = (region SizedRegion<1>:$region);
-
-  let results = (outs AnyTensor:$result);
-
-  // TODO: Remove custom<InferType> when AllTypesMatch supports opt. operands.
-  let assemblyFormat = [{
-    $source
-    (`nofold` $nofold^)?
-    `low` `` custom<OperandsOrIntegersSizesList>($low, $static_low)
-    `high` `` custom<OperandsOrIntegersSizesList>($high, $static_high)
-    $region attr-dict `:` type($source) `to` type($result)
-  }];
-
-  let extraClassDeclaration = [{
-    static StringRef getStaticLowAttrName() {
-      return "static_low";
-    }
-
-    static StringRef getStaticHighAttrName() {
-      return "static_high";
-    }
-
-    RankedTensorType getSourceType() {
-      return source().getType().cast<RankedTensorType>();
-    }
-    RankedTensorType getResultType() {
-      return getResult().getType().cast<RankedTensorType>();
-    }
-
-    // Infer the shape of the result tensor given the type of the source tensor
-    // and paddings. Known result dimensions that cannot necessarily be inferred
-    // from low/high padding sizes can be optionally specified. Those will be
-    // considered when computing the result type.
-    static RankedTensorType inferResultType(
-                                RankedTensorType sourceType,
-                                ArrayRef<int64_t> staticLow,
-                                ArrayRef<int64_t> staticHigh,
-                                ArrayRef<int64_t> resultShape = {});
-
-    // Return a PadTensorOp that pads `source` to `type` size where the static
-    // sizes are assumed to be greater than the dynamic sizes. The op performs
-    // "high" padding (i.e. it adds trailing padding values until the desired
-    // size is met).
-    static linalg::PadTensorOp createPadHighOp(
-        Type type, Value source, Value pad, bool nofold, Location loc,
-        OpBuilder & builder);
-
-    // Return a PadTensorOp that pads `source to `type` size with `pad` value.
-    // I.e., a block will be created and the `pad` value will be yielded
-    // directly. If the type passed is nullptr, it is inferred.
-    static linalg::PadTensorOp createPadScalarOp(
-        Type type, Value source, Value pad, ArrayRef<OpFoldResult> low,
-        ArrayRef<OpFoldResult> high, bool nofold, Location loc,
-        OpBuilder & builder);
-
-    // Return the pad value if it is a constant. Return null value otherwise.
-    Value getConstantPaddingValue();
-
-    // Return a vector of all the static or dynamic values (low/high padding) of
-    // the op.
-    inline SmallVector<OpFoldResult> getMixedPadImpl(ArrayAttr staticAttrs,
-                                                     ValueRange values) {
-      SmallVector<OpFoldResult> res;
-      unsigned numDynamic = 0;
-      unsigned count = staticAttrs.size();
-      for (unsigned idx = 0; idx < count; ++idx) {
-        if (ShapedType::isDynamic(staticAttrs[idx].cast<IntegerAttr>().getInt()))
-          res.push_back(values[numDynamic++]);
-        else
-          res.push_back(staticAttrs[idx]);
-      }
-      return res;
-    }
-    SmallVector<OpFoldResult> getMixedLowPad() {
-      return getMixedPadImpl(static_low(), low());
-    }
-    SmallVector<OpFoldResult> getMixedHighPad() {
-      return getMixedPadImpl(static_high(), high());
-    }
-    // Return true if low padding is guaranteed to be 0.
-    bool hasZeroLowPad() {
-      return llvm::all_of(getMixedLowPad(), [](OpFoldResult ofr) {
-        return getConstantIntValue(ofr) == static_cast<int64_t>(0);
-      });
-    }
-    // Return true if high padding is guaranteed to be 0.
-    bool hasZeroHighPad() {
-      return llvm::all_of(getMixedHighPad(), [](OpFoldResult ofr) {
-        return getConstantIntValue(ofr) == static_cast<int64_t>(0);
-      });
-    }
-  }];
-
-  let builders = [
-    // Build a PadTensorOp with mixed static and dynamic entries.
-    OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$staticLow,
-      "ArrayRef<int64_t>":$staticHigh, "ValueRange":$low, "ValueRange":$high,
-      CArg<"bool", "false">:$nofold,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a PadTensorOp with all dynamic entries.
-    OpBuilder<(ins "Value":$source, "ValueRange":$low, "ValueRange":$high,
-      CArg<"bool", "false">:$nofold,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-    // Build a PadTensorOp with mixed static and dynamic entries and custom
-    // result type. If the type passed is nullptr, it is inferred.
-    OpBuilder<(ins "Type":$resultType, "Value":$source,
-      "ArrayRef<OpFoldResult>":$low, "ArrayRef<OpFoldResult>":$high,
-      CArg<"bool", "false">:$nofold,
-      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
-  ];
-
-  let hasCanonicalizer = 1;
-  let hasFolder = 1;
-}
-
 def Linalg_YieldOp : Linalg_Op<"yield", [NoSideEffect, ReturnLike, Terminator]>,
     Arguments<(ins Variadic<AnyType>:$values)> {
   let summary = "Linalg yield operation";
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
index 90e78ca0e274e..8d3315d5ea971 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
@@ -1,4 +1,4 @@
-//===- HoistPadding.h - Hoisting transformation for PadTensorOp -*- C++ -*-===//
+//===- HoistPadding.h - Hoisting for tensor::PadOp -*- C++ --------------*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -14,8 +14,11 @@
 namespace mlir {
 class Value;
 
+namespace tensor {
+class PadOp;
+} // namespace tensor
+
 namespace linalg {
-class PadTensorOp;
 
 /// Mechanically hoist padding operations on tensors by `numLoops` into a new,
 /// generally larger tensor. This achieves packing of multiple padding ops into
@@ -59,8 +62,8 @@ class PadTensorOp;
 ///      }
 ///    }
 /// ```
-FailureOr<Value> hoistPaddingOnTensors(PadTensorOp opToHoist, int numLoops,
-                                       PadTensorOp &hoistedOp);
+FailureOr<Value> hoistPaddingOnTensors(tensor::PadOp opToHoist, int numLoops,
+                                       tensor::PadOp &hoistedOp);
 
 } // namespace linalg
 } // namespace mlir
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index f52276e8e9e69..f5e99d5afe83e 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1132,18 +1132,18 @@ void populateLinalgDistributeTiledLoopPattern(
 // Op-specific patterns.
 //===----------------------------------------------------------------------===//
 
-/// PadTensorOp is not canonicalized away yet, so we provide a transformation to
-/// `linalg.generic`.
-struct PadTensorOpTransformationPattern : public OpRewritePattern<PadTensorOp> {
-  using OpRewritePattern<PadTensorOp>::OpRewritePattern;
+/// tensor::PadOp is not canonicalized away yet, so we provide a transformation
+/// to `linalg.generic`.
+struct PadOpTransformationPattern : public OpRewritePattern<tensor::PadOp> {
+  using OpRewritePattern<tensor::PadOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(PadTensorOp padOp,
+  LogicalResult matchAndRewrite(tensor::PadOp padOp,
                                 PatternRewriter &rewriter) const override;
 };
 
 /// Pad the operands of `opToPad` to a static bounding box. Use `paddingFunc`
 /// and `nofoldFunc` to set the padding value and the nofold attribute of the
-/// introduced PadTensorOps, respectively. Update `paddedOp` to the cloned
+/// introduced tensor::PadOps, respectively. Update `paddedOp` to the cloned
 /// statically shaped operation and return the extracted dynamically shaped
 /// results. If padding fails, return failure.
 FailureOr<SmallVector<Value>>
@@ -1153,23 +1153,23 @@ rewriteAsPaddedOp(OpBuilder &b, LinalgOp opToPad,
                   LinalgOp &paddedOp);
 
 using OptimizeCopyFn =
-    std::function<LogicalResult(PatternRewriter &, PadTensorOp, Value)>;
+    std::function<LogicalResult(PatternRewriter &, tensor::PadOp, Value)>;
 
-/// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp and
+/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp and
 /// InsertSliceOp. For now, only constant padding values are supported.
 /// `OptimizeCopyFn` can be used to customize copying step optimization.
-struct GeneralizePadTensorOpPattern : public OpRewritePattern<PadTensorOp> {
-  GeneralizePadTensorOpPattern(MLIRContext *context,
-                               OptimizeCopyFn optimizeCopyFn = nullptr,
-                               PatternBenefit benefit = 1)
-      : OpRewritePattern<PadTensorOp>(context, benefit),
+struct GeneralizePadOpPattern : public OpRewritePattern<tensor::PadOp> {
+  GeneralizePadOpPattern(MLIRContext *context,
+                         OptimizeCopyFn optimizeCopyFn = nullptr,
+                         PatternBenefit benefit = 1)
+      : OpRewritePattern<tensor::PadOp>(context, benefit),
         optimizeCopyFn(std::move(optimizeCopyFn)) {}
-  LogicalResult matchAndRewrite(PadTensorOp padOp,
+  LogicalResult matchAndRewrite(tensor::PadOp padOp,
                                 PatternRewriter &rewriter) const override;
 
 protected:
   OptimizeCopyFn optimizeCopyFn;
-  Value createFillOrGenerateOp(PatternRewriter &rewriter, PadTensorOp padOp,
+  Value createFillOrGenerateOp(PatternRewriter &rewriter, tensor::PadOp padOp,
                                Value dest,
                                const SmallVector<Value> &dynSizes) const;
 };
@@ -1179,9 +1179,9 @@ struct GeneralizePadTensorOpPattern : public OpRewritePattern<PadTensorOp> {
 /// are used to encode a certain ordering of pattern application. To avoid
 /// scattering magic constants throughout the code base, the patterns must be
 /// added with this function. `baseBenefit` can be used to offset the benefit
-/// of all PadTensorOp vectorization patterns by a certain value.
-void populatePadTensorOpVectorizationPatterns(RewritePatternSet &patterns,
-                                              PatternBenefit baseBenefit = 1);
+/// of all tensor::PadOp vectorization patterns by a certain value.
+void populatePadOpVectorizationPatterns(RewritePatternSet &patterns,
+                                        PatternBenefit baseBenefit = 1);
 
 /// Match and rewrite for the pattern:
 /// ```
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index 7646ef2b5df67..b466d7726f502 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -107,12 +107,12 @@ tensor::ExtractSliceOp makeComposedExtractSliceOp(
     OpBuilder &b, Location loc, Value source, ArrayRef<OpFoldResult> offsets,
     ArrayRef<OpFoldResult> sizes, ArrayRef<OpFoldResult> strides);
 
-/// Create a PadTensorOp that pads `source` to the size of the statically sized
-/// `type` whose static sizes are assumed to be greater than the dynamic
+/// Create a tensor::PadOp that pads `source` to the size of the statically
+/// sized `type` whose static sizes are assumed to be greater than the dynamic
 /// `source` size. The padding introduces trailing `pad` values until the target
 /// size is met. If `source` is defined by one or more LinalgOps that have been
 /// padded with the same value and sizes, return their padded result instead of
-/// creating a PadTensorOp.
+/// creating a tensor::PadOp.
 ///
 /// Example:
 /// ```
diff --git a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h
index db8a07a689ee2..cfec22be37e5f 100644
--- a/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h
+++ b/mlir/include/mlir/Dialect/Tensor/IR/Tensor.h
@@ -19,6 +19,7 @@
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/InferTypeOpInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "mlir/Interfaces/TilingInterface.h"
 #include "mlir/Interfaces/ViewLikeInterface.h"
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
index 1a95d921fee22..05cb41d791d35 100644
--- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
@@ -14,6 +14,7 @@ include "mlir/Interfaces/CastInterfaces.td"
 include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
+include "mlir/Interfaces/TilingInterface.td"
 include "mlir/Interfaces/ViewLikeInterface.td"
 
 class Tensor_Op<string mnemonic, list<OpTrait> traits = []>
@@ -777,6 +778,190 @@ def Tensor_CollapseShapeOp : Tensor_ReassociativeReshapeOp<"collapse_shape"> {
   let extraClassDeclaration = commonExtraClassDeclaration;
 }
 
+//===----------------------------------------------------------------------===//
+// PadOp
+//===----------------------------------------------------------------------===//
+
+def Tensor_PadOp : Tensor_Op<"pad", [AttrSizedOperandSegments, NoSideEffect]> {
+  let summary = "tensor pad operation";
+  let description = [{
+    `tensor.pad` is an operation that pads the `source` tensor
+    with given `low` and `high` padding config.
+
+    The PadTensor operation supports the following arguments:
+
+    * source: the "base" tensor on which to pad.
+    * low: A list contains the padding along the start of each
+           dimension, i.e `low`.
+    * high: A list contains the padding along the end of each
+            dimension, i.e. `high`.
+    * nofold: indicates that the operation should not be folded when source and
+              result types are equal.
+
+    The result tensor dimensions are `low` + `dim` + `high` along that
+    dimension. The number of elements of `low` and `high` must match
+    the rank of the input tensor. They can be either a constant or a
+    dynamic value.
+
+    The region of the `tensor.pad` operation returns the value to use
+    for the padding. The arguments of the region represent the index
+    of the source being accessed. There should be as many arguments as
+    the rank of the `source` tensor. The value `yield`-ed by the
+    region is used as the value of the view at the given position.
+
+    If `nofold` is set, the padding operation will not be folded away even
+    if the source type and the padded type have the same static shape. This can
+    be used, e.g., for packing or promotion to faster memory.
+
+    Example 1:
+
+    ```mlir
+      %pad_value = ... : f32
+      %0 = tensor.pad %0 low[1, 2] high[2, 3] {
+      ^bb0(%arg0 : index, %arg1 : index):
+        tensor.yield %pad_value : f32
+      } : tensor<?x?xf32> to tensor<?x?xf32>
+    ```
+
+    Example 2:
+
+    ```mlir
+      %pad_value = ... : f32
+      %0 = tensor.pad %arg0 low[2, %arg1, 3, 3] high[3, 3, %arg1, 2] {
+      ^bb0(%arg2: index, %arg3: index, %arg4: index, %arg5: index):
+          tensor.yield %pad_value : f32
+      } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
+    ```
+
+    Example 3:
+
+    ```mlir
+      %pad_value = ... : f32
+      %0 = tensor.pad %arg0 low[0, 0] high[%ub0, %ub1] {
+      ^bb0(%arg1: index, %arg2: index):
+        tensor.yield %pad_value : f32
+      } : tensor<2x3xf32> to tensor<?x?xf32>
+    ```
+
+    Example 4:
+
+    ```mlir
+      // Force a padded value to be always exist with `nofold`.
+      %pad_value = ... : f32
+      %0 = tensor.pad %arg0 nofold low[0, 0] high[0, 0] {
+      ^bb0(%arg1: index, %arg2: index):
+        tensor.yield %pad_value : f32
+      } : tensor<2x3xf32> to tensor<2x3xf32>
+    ```
+  }];
+
+  let arguments = (ins
+    AnyTensor:$source,
+    Variadic<Index>:$low,
+    Variadic<Index>:$high,
+    I64ArrayAttr:$static_low,
+    I64ArrayAttr:$static_high,
+    UnitAttr:$nofold);
+
+  let regions = (region SizedRegion<1>:$region);
+
+  let results = (outs AnyTensor:$result);
+
+  // TODO: Remove custom<InferType> when AllTypesMatch supports opt. operands.
+  let assemblyFormat = [{
+    $source
+    (`nofold` $nofold^)?
+    `low` `` custom<OperandsOrIntegersSizesList>($low, $static_low)
+    `high` `` custom<OperandsOrIntegersSizesList>($high, $static_high)
+    $region attr-dict `:` type($source) `to` type($result)
+  }];
+
+  let extraClassDeclaration = [{
+    static StringRef getStaticLowAttrName() {
+      return "static_low";
+    }
+
+    static StringRef getStaticHighAttrName() {
+      return "static_high";
+    }
+
+    RankedTensorType getSourceType() {
+      return source().getType().cast<RankedTensorType>();
+    }
+    RankedTensorType getResultType() {
+      return getResult().getType().cast<RankedTensorType>();
+    }
+
+    // Infer the shape of the result tensor given the type of the source tensor
+    // and paddings. Known result dimensions that cannot necessarily be inferred
+    // from low/high padding sizes can be optionally specified. Those will be
+    // considered when computing the result type.
+    static RankedTensorType inferResultType(
+                                RankedTensorType sourceType,
+                                ArrayRef<int64_t> staticLow,
+                                ArrayRef<int64_t> staticHigh,
+                                ArrayRef<int64_t> resultShape = {});
+
+    // Return the pad value if it is a constant. Return null value otherwise.
+    Value getConstantPaddingValue();
+
+    // Return a vector of all the static or dynamic values (low/high padding) of
+    // the op.
+    inline SmallVector<OpFoldResult> getMixedPadImpl(ArrayAttr staticAttrs,
+                                                     ValueRange values) {
+      SmallVector<OpFoldResult> res;
+      unsigned numDynamic = 0;
+      unsigned count = staticAttrs.size();
+      for (unsigned idx = 0; idx < count; ++idx) {
+        if (ShapedType::isDynamic(staticAttrs[idx].cast<IntegerAttr>().getInt()))
+          res.push_back(values[numDynamic++]);
+        else
+          res.push_back(staticAttrs[idx]);
+      }
+      return res;
+    }
+    SmallVector<OpFoldResult> getMixedLowPad() {
+      return getMixedPadImpl(static_low(), low());
+    }
+    SmallVector<OpFoldResult> getMixedHighPad() {
+      return getMixedPadImpl(static_high(), high());
+    }
+    // Return true if low padding is guaranteed to be 0.
+    bool hasZeroLowPad() {
+      return llvm::all_of(getMixedLowPad(), [](OpFoldResult ofr) {
+        return getConstantIntValue(ofr) == static_cast<int64_t>(0);
+      });
+    }
+    // Return true if high padding is guaranteed to be 0.
+    bool hasZeroHighPad() {
+      return llvm::all_of(getMixedHighPad(), [](OpFoldResult ofr) {
+        return getConstantIntValue(ofr) == static_cast<int64_t>(0);
+      });
+    }
+  }];
+
+  let builders = [
+    // Build a PadOp with mixed static and dynamic entries.
+    OpBuilder<(ins "Value":$source, "ArrayRef<int64_t>":$staticLow,
+      "ArrayRef<int64_t>":$staticHigh, "ValueRange":$low, "ValueRange":$high,
+      CArg<"bool", "false">:$nofold,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build a PadOp with all dynamic entries.
+    OpBuilder<(ins "Value":$source, "ValueRange":$low, "ValueRange":$high,
+      CArg<"bool", "false">:$nofold,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+    // Build a PadOp with mixed static and dynamic entries and custom
+    // result type. If the type passed is nullptr, it is inferred.
+    OpBuilder<(ins "Type":$resultType, "Value":$source,
+      "ArrayRef<OpFoldResult>":$low, "ArrayRef<OpFoldResult>":$high,
+      CArg<"bool", "false">:$nofold,
+      CArg<"ArrayRef<NamedAttribute>", "{}">:$attrs)>,
+  ];
+
+  let hasCanonicalizer = 1;
+  let hasFolder = 1;
+}
+
 
 //===----------------------------------------------------------------------===//
 // YieldOp
@@ -784,16 +969,17 @@ def Tensor_CollapseShapeOp : Tensor_ReassociativeReshapeOp<"collapse_shape"> {
 
 def Tensor_YieldOp : Tensor_Op<"yield",
     [NoSideEffect, ReturnLike, Terminator,
-     HasParent<"::mlir::tensor::GenerateOp">]> {
+     HasParent<"::mlir::tensor::GenerateOp, ::mlir::tensor::PadOp">]> {
   let summary = "Yield a value from a region";
   let description = [{
      This operation is used to yield a single value from a within a region. It
      is used to create dynamically sized tensors
-     (see `tensor.generate` op).
+     (see `tensor.generate` and `tensor.pad` ops).
   }];
 
   let arguments = (ins AnyType:$value);
   let assemblyFormat = "$value attr-dict `:` type($value)";
+
   // Dummy builder to appease code in templated ensureTerminator that
   // GenerateOp's auto-generated parser calls.
   let builders = [OpBuilder<(ins), [{ /* nothing to do */ }]>];
diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h b/mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h
new file mode 100644
index 0000000000000..6cd819758eab7
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h
@@ -0,0 +1,36 @@
+//===- TensorTilingOpInterfaceImpl.h - ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements Tiling interface for TensorOps with ExternalModel.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_TENSOR_IR_TENSORTILINGINTERFACEIMPL_H_
+#define MLIR_DIALECT_TENSOR_IR_TENSORTILINGINTERFACEIMPL_H_
+
+#include "mlir/IR/Dialect.h"
+
+namespace mlir {
+namespace tensor {
+
+/// Registers external models for Tiling interface for tensor ops.
+/// Currently, it registers:
+///
+/// * TilingInterface for `tensor.pad`.
+///
+/// Unfortunately, a "normal" internal registration is not possible at the
+/// moment, because of the dependency of the interface implementation for these
+/// ops on `affine.apply` and Affine dialect already depends on TensorOps. In
+/// order to break the cyclic dependency (TensorOps->AffineOps->TensorOps) the
+/// implementation is moved to a separate library.
+void registerTilingOpInterfaceExternalModels(mlir::DialectRegistry &registry);
+
+} // namespace tensor
+} // namespace mlir
+
+#endif // MLIR_DIALECT_TENSOR_IR_TENSORTILINGINTERFACEIMPL_H_
diff --git a/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
new file mode 100644
index 0000000000000..4b4c53896b7d8
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Tensor/Utils/Utils.h
@@ -0,0 +1,34 @@
+//===- Utils.h -  Utilities to support the Tensor dialect -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_TENSOR_UTILS_UTILS_H_
+#define MLIR_DIALECT_TENSOR_UTILS_UTILS_H_
+
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+
+namespace mlir {
+namespace tensor {
+
+// Return a PadOp that pads `source` to `type` size where the static
+// sizes are assumed to be greater than the dynamic sizes. The op performs
+// "high" padding (i.e. it adds trailing padding values until the desired
+// size is met).
+PadOp createPadHighOp(Type type, Value source, Value pad, bool nofold,
+                      Location loc, OpBuilder &builder);
+
+// Return a PadOp that pads `source to `type` size with `pad` value.
+// I.e., a block will be created and the `pad` value will be yielded
+// directly. If the type passed is nullptr, it is inferred.
+PadOp createPadScalarOp(Type type, Value source, Value pad,
+                        ArrayRef<OpFoldResult> low, ArrayRef<OpFoldResult> high,
+                        bool nofold, Location loc, OpBuilder &builder);
+
+} // namespace tensor
+} // namespace mlir
+
+#endif // MLIR_DIALECT_TENSOR_UTILS_UTILS_H_
diff --git a/mlir/include/mlir/InitAllDialects.h b/mlir/include/mlir/InitAllDialects.h
index 40e9781edcc90..86f4e1db7a284 100644
--- a/mlir/include/mlir/InitAllDialects.h
+++ b/mlir/include/mlir/InitAllDialects.h
@@ -43,6 +43,7 @@
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h"
+#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/Dialect/X86Vector/X86VectorDialect.h"
@@ -86,6 +87,7 @@ inline void registerAllDialects(DialectRegistry &registry) {
                   x86vector::X86VectorDialect>();
   // clang-format on
   tensor::registerInferTypeOpInterfaceExternalModels(registry);
+  tensor::registerTilingOpInterfaceExternalModels(registry);
 }
 
 /// Append all the MLIR dialects to the registry contained in the given context.
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
index 0f53b30125625..6833a0c2d72cb 100644
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Utils/Utils.h"
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
 #include "mlir/Dialect/Tosa/Utils/CoversionUtils.h"
 #include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
@@ -1932,7 +1933,7 @@ class PadConverter : public OpRewritePattern<tosa::PadOp> {
       highValues.push_back(highVal);
     }
 
-    auto newPadOp = linalg::PadTensorOp::createPadScalarOp(
+    auto newPadOp = tensor::createPadScalarOp(
         padOp.getType(), input, padConstant, lowValues, highValues,
         /*nofold=*/false, loc, rewriter);
 
diff --git a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
index 54012c9760eb2..4fcd2cf56c35c 100644
--- a/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
+++ b/mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp
@@ -17,6 +17,7 @@
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Utils/Utils.h"
 #include "mlir/Dialect/Tosa/IR/TosaOps.h"
 #include "mlir/Dialect/Tosa/Utils/CoversionUtils.h"
 #include "mlir/Dialect/Utils/ReshapeOpsUtils.h"
@@ -55,9 +56,9 @@ static mlir::Value applyPad(Location loc, Value input, ArrayRef<int64_t> pad,
 
   Value padValue = rewriter.create<arith::ConstantOp>(loc, padAttr);
 
-  return linalg::PadTensorOp::createPadScalarOp(
-             RankedTensorType::get(paddedShape, inputETy), input, padValue,
-             lowIndices, highIndices, /*nofold=*/false, loc, rewriter)
+  return tensor::createPadScalarOp(RankedTensorType::get(paddedShape, inputETy),
+                                   input, padValue, lowIndices, highIndices,
+                                   /*nofold=*/false, loc, rewriter)
       .result();
 }
 
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index f41e8487858f3..3ca3932a44eec 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -1079,561 +1079,6 @@ LogicalResult InitTensorOp::reifyResultShapes(
   return success();
 }
 
-//===----------------------------------------------------------------------===//
-// PadTensorOp
-//===----------------------------------------------------------------------===//
-
-// TODO: Replace custom<InferType> directive with AllTypesMatch as soon as it
-// supports optional types.
-void printInferType(OpAsmPrinter &printer, Operation *op, Value optOperand,
-                    Type typeToInfer, Type typeToInferFrom) {}
-
-ParseResult parseInferType(OpAsmParser &parser,
-                           Optional<OpAsmParser::OperandType> optOperand,
-                           Type &typeToInfer, Type typeToInferFrom) {
-  if (optOperand)
-    typeToInfer = typeToInferFrom;
-  return success();
-}
-
-static LogicalResult verify(PadTensorOp op) {
-  auto sourceType = op.source().getType().cast<RankedTensorType>();
-  auto resultType = op.result().getType().cast<RankedTensorType>();
-  auto expectedType = PadTensorOp::inferResultType(
-      sourceType, extractFromI64ArrayAttr(op.static_low()),
-      extractFromI64ArrayAttr(op.static_high()));
-  for (int i = 0, e = sourceType.getRank(); i < e; ++i) {
-    if (resultType.getDimSize(i) == expectedType.getDimSize(i))
-      continue;
-    if (expectedType.isDynamicDim(i))
-      continue;
-    return op.emitError("specified type ")
-           << resultType << " does not match the inferred type "
-           << expectedType;
-  }
-
-  auto &region = op.region();
-  unsigned rank = resultType.getRank();
-  Block &block = region.front();
-  if (block.getNumArguments() != rank)
-    return op.emitError("expected the block to have ") << rank << " arguments";
-
-  // Note: the number and type of yield values are checked in the YieldOp.
-  for (const auto &en : llvm::enumerate(block.getArgumentTypes())) {
-    if (!en.value().isIndex())
-      return op.emitOpError("expected block argument ")
-             << (en.index() + 1) << " to be an index";
-  }
-
-  return success();
-}
-
-RankedTensorType PadTensorOp::inferResultType(RankedTensorType sourceType,
-                                              ArrayRef<int64_t> staticLow,
-                                              ArrayRef<int64_t> staticHigh,
-                                              ArrayRef<int64_t> resultShape) {
-  unsigned rank = sourceType.getRank();
-  assert(staticLow.size() == rank && "unexpected staticLow size mismatch");
-  assert(staticHigh.size() == rank && "unexpected staticHigh size mismatch");
-  assert((resultShape.empty() || resultShape.size() == rank) &&
-         "unexpected resultShape size mismatch");
-
-  SmallVector<int64_t, 4> inferredShape;
-  for (auto i : llvm::seq<unsigned>(0, rank)) {
-    if (sourceType.isDynamicDim(i) ||
-        staticLow[i] == ShapedType::kDynamicSize ||
-        staticHigh[i] == ShapedType::kDynamicSize) {
-      inferredShape.push_back(resultShape.empty() ? ShapedType::kDynamicSize
-                                                  : resultShape[i]);
-    } else {
-      int64_t size = sourceType.getDimSize(i) + staticLow[i] + staticHigh[i];
-      assert((resultShape.empty() || size == resultShape[i] ||
-              resultShape[i] == ShapedType::kDynamicSize) &&
-             "mismatch between inferred shape and result shape");
-      inferredShape.push_back(size);
-    }
-  }
-
-  return RankedTensorType::get(inferredShape, sourceType.getElementType());
-}
-
-void PadTensorOp::build(OpBuilder &b, OperationState &result, Value source,
-                        ArrayRef<int64_t> staticLow,
-                        ArrayRef<int64_t> staticHigh, ValueRange low,
-                        ValueRange high, bool nofold,
-                        ArrayRef<NamedAttribute> attrs) {
-  auto sourceType = source.getType().cast<RankedTensorType>();
-  auto resultType = inferResultType(sourceType, staticLow, staticHigh);
-  build(b, result, resultType, source, low, high, b.getI64ArrayAttr(staticLow),
-        b.getI64ArrayAttr(staticHigh), nofold ? b.getUnitAttr() : UnitAttr());
-  result.addAttributes(attrs);
-}
-
-void PadTensorOp::build(OpBuilder &b, OperationState &result, Value source,
-                        ValueRange low, ValueRange high, bool nofold,
-                        ArrayRef<NamedAttribute> attrs) {
-  auto sourceType = source.getType().cast<RankedTensorType>();
-  unsigned rank = sourceType.getRank();
-  SmallVector<int64_t, 4> staticVector(rank, ShapedType::kDynamicSize);
-  build(b, result, source, staticVector, staticVector, low, high, nofold,
-        attrs);
-}
-
-void PadTensorOp::build(OpBuilder &b, OperationState &result, Type resultType,
-                        Value source, ArrayRef<OpFoldResult> low,
-                        ArrayRef<OpFoldResult> high, bool nofold,
-                        ArrayRef<NamedAttribute> attrs) {
-  assert(resultType.isa<RankedTensorType>());
-  auto sourceType = source.getType().cast<RankedTensorType>();
-  SmallVector<Value, 4> dynamicLow, dynamicHigh;
-  SmallVector<int64_t, 4> staticLow, staticHigh;
-  // staticLow and staticHigh have full information of the padding config.
-  // This will grow staticLow and staticHigh with 1 value. If the config is
-  // dynamic (ie not a constant), dynamicLow and dynamicHigh will grow with 1
-  // value as well.
-  dispatchIndexOpFoldResults(low, dynamicLow, staticLow,
-                             ShapedType::kDynamicSize);
-  dispatchIndexOpFoldResults(high, dynamicHigh, staticHigh,
-                             ShapedType::kDynamicSize);
-  if (!resultType) {
-    resultType =
-        PadTensorOp::inferResultType(sourceType, staticLow, staticHigh);
-  }
-  build(b, result, resultType, source, dynamicLow, dynamicHigh,
-        b.getI64ArrayAttr(staticLow), b.getI64ArrayAttr(staticHigh),
-        nofold ? b.getUnitAttr() : UnitAttr());
-  result.addAttributes(attrs);
-}
-
-PadTensorOp PadTensorOp::createPadScalarOp(Type type, Value source, Value pad,
-                                           ArrayRef<OpFoldResult> low,
-                                           ArrayRef<OpFoldResult> high,
-                                           bool nofold, Location loc,
-                                           OpBuilder &builder) {
-  auto padTensorOp =
-      builder.create<linalg::PadTensorOp>(loc, type, source, low, high, nofold);
-  int rank = padTensorOp.getResultType().getRank();
-  SmallVector<Type, 4> blockArgTypes(rank, builder.getIndexType());
-  SmallVector<Location, 4> blockArgLocs(rank, loc);
-  auto &region = padTensorOp.region();
-  // `builder.createBlock` changes the insertion point within the block. Create
-  // a guard to reset the insertion point of the builder after it is destroyed.
-  OpBuilder::InsertionGuard guard(builder);
-  builder.createBlock(&region, region.end(), blockArgTypes, blockArgLocs);
-  builder.create<linalg::YieldOp>(loc, pad);
-  return padTensorOp;
-}
-
-PadTensorOp PadTensorOp::createPadHighOp(Type type, Value source, Value pad,
-                                         bool nofold, Location loc,
-                                         OpBuilder &b) {
-  SmallVector<OpFoldResult, 4> low, high;
-  auto rankedTensorType = type.cast<RankedTensorType>();
-  assert(rankedTensorType.hasStaticShape());
-  for (const auto &en : enumerate(rankedTensorType.getShape())) {
-    AffineExpr d0;
-    bindDims(b.getContext(), d0);
-    auto dimOp = b.createOrFold<tensor::DimOp>(loc, source, en.index());
-    Value paddingWidth =
-        makeComposedAffineApply(b, loc, en.value() - d0, {dimOp});
-    high.push_back(paddingWidth);
-    low.push_back(b.createOrFold<arith::ConstantIndexOp>(loc, 0));
-  }
-  return PadTensorOp::createPadScalarOp(type, source, pad, low, high, nofold,
-                                        loc, b);
-}
-
-LogicalResult PadTensorOp::reifyResultShapes(
-    OpBuilder &b, ReifiedRankedShapedTypeDims &reifiedReturnShapes) {
-  Location loc = getLoc();
-  auto lowPad = getMixedLowPad();
-  auto highPad = getMixedHighPad();
-  SmallVector<Value> shapes;
-  for (auto dim : llvm::seq<int64_t>(0, getSourceType().getRank())) {
-    // Shape along each dimension is source dim + low pad + high pad.
-    SmallVector<Value> mapOperands;
-    mapOperands.push_back(b.createOrFold<tensor::DimOp>(loc, source(), dim));
-    AffineExpr expr = b.getAffineDimExpr(0);
-    unsigned numSymbols = 0;
-    auto addOpFoldResult = [&](OpFoldResult valueOrAttr) {
-      if (Value v = valueOrAttr.dyn_cast<Value>()) {
-        expr = expr + b.getAffineSymbolExpr(numSymbols++);
-        mapOperands.push_back(v);
-        return;
-      }
-      int64_t staticValue =
-          valueOrAttr.get<Attribute>().cast<IntegerAttr>().getInt();
-      expr = expr + staticValue;
-    };
-    addOpFoldResult(lowPad[dim]);
-    addOpFoldResult(highPad[dim]);
-    shapes.push_back(applyMapToValues(
-        b, loc, AffineMap::get(1, numSymbols, expr), mapOperands)[0]);
-  }
-  reifiedReturnShapes.emplace_back(std::move(shapes));
-  return success();
-}
-
-//===----------------------------------------------------------------------===//
-// Methods related to PadTensor tiling.
-//===----------------------------------------------------------------------===//
-
-SmallVector<Value> PadTensorOp::getDestinationOperands(OpBuilder &b) {
-  ReifiedRankedShapedTypeDims reifiedShapes;
-  (void)reifyResultShapes(b, reifiedShapes);
-  SmallVector<OpFoldResult> mixedSizes = getAsOpFoldResult(reifiedShapes[0]);
-  Value initTensor = b.create<InitTensorOp>(getLoc(), mixedSizes,
-                                            getResultType().getElementType());
-  return {initTensor};
-}
-
-SmallVector<StringRef> PadTensorOp::getLoopIteratorTypes() {
-  SmallVector<StringRef> iteratorTypes(getResultType().getRank(),
-                                       getParallelIteratorTypeName());
-  return iteratorTypes;
-}
-
-SmallVector<Range> PadTensorOp::getIterationDomain(OpBuilder &b) {
-  ReifiedRankedShapedTypeDims reifiedShapes;
-  (void)reifyResultShapes(b, reifiedShapes);
-  Value zero = b.create<arith::ConstantIndexOp>(getLoc(), 0);
-  Value one = b.create<arith::ConstantIndexOp>(getLoc(), 1);
-  // Initialize all the ranges to {zero, one, one}. All the `ub`s are
-  // overwritten.
-  SmallVector<Range> loopRanges(reifiedShapes[0].size(), {zero, one, one});
-  for (const auto &ub : enumerate(reifiedShapes[0]))
-    loopRanges[ub.index()].size = ub.value();
-  return loopRanges;
-}
-
-SmallVector<Operation *> PadTensorOp::getTiledImplementation(
-    OpBuilder &b, ValueRange dest, ArrayRef<OpFoldResult> offsets,
-    ArrayRef<OpFoldResult> sizes, bool /*tileDestOperands*/) {
-  // Only constant padding value supported.
-  Value padValue = getConstantPaddingValue();
-  if (!padValue)
-    return {};
-
-  // Helper variables and functions for various arithmetic operations. These are
-  // used extensively for computing new offset/length and padding values.
-  Location loc = getLoc();
-  AffineExpr dim0, dim1;
-  bindDims(b.getContext(), dim0, dim1);
-  // Add two integers.
-  auto addMap = AffineMap::get(2, 0, {dim0 + dim1});
-  auto add = [&](Value v1, Value v2) {
-    return b.createOrFold<AffineApplyOp>(loc, addMap, ValueRange{v1, v2});
-  };
-  // Subtract two integers.
-  auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
-  auto sub = [&](Value v1, Value v2) {
-    return b.createOrFold<AffineApplyOp>(loc, subMap, ValueRange{v1, v2});
-  };
-  // Take the minimum of two integers.
-  auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());
-  auto min = [&](Value v1, Value v2) {
-    return b.createOrFold<AffineMinOp>(loc, idMap, ValueRange{v1, v2});
-  };
-  // Take the maximum of two integers.
-  auto max = [&](Value v1, Value v2) {
-    return b.createOrFold<AffineMaxOp>(loc, idMap, ValueRange{v1, v2});
-  };
-  // Zero index-typed integer.
-  auto zero = b.create<arith::ConstantIndexOp>(loc, 0);
-
-  // Helper function for filling static/dynamic low/high padding indices vectors
-  // of PadTensorOp.
-  auto appendIndex = [&](Value val, SmallVector<Value> &dynIndices,
-                         SmallVector<int64_t> &staticIndices) {
-    if (auto constInt = getConstantIntValue(val)) {
-      staticIndices.push_back(*constInt);
-    } else {
-      staticIndices.push_back(ShapedType::kDynamicSize);
-      dynIndices.push_back(val);
-    }
-  };
-
-  // Compute new offsets, lengths, low padding, high padding.
-  SmallVector<OpFoldResult> newOffsets, newLengths, newStrides;
-  SmallVector<Value> newLows, newHighs;
-  SmallVector<int64_t> staticNewLows, staticNewHighs;
-  // Set to true if the original data source is not read at all.
-  bool hasZeroLen = false;
-  // Same as hasZeroLen, but for dynamic dimension sizes. This condition
-  // is true if the original data source turns out to be unused at runtime.
-  Value dynHasZeroLenCond;
-
-  int64_t rank = getSourceType().getRank();
-  for (unsigned dim = 0; dim < rank; ++dim) {
-    auto low = getValueOrCreateConstantIndexOp(b, loc, getMixedLowPad()[dim]);
-    bool hasLowPad = getConstantIntValue(low) != static_cast<int64_t>(0);
-    auto high = getValueOrCreateConstantIndexOp(b, loc, getMixedHighPad()[dim]);
-    bool hasHighPad = getConstantIntValue(high) != static_cast<int64_t>(0);
-    auto offset = getValueOrCreateConstantIndexOp(b, loc, offsets[dim]);
-    auto length = getValueOrCreateConstantIndexOp(b, loc, sizes[dim]);
-    auto srcSize = b.createOrFold<tensor::DimOp>(loc, source(), dim);
-
-    // The new amount of low padding is `low - offset`. Except for the case
-    // where none of the low padding is read. In that case, the new amount of
-    // low padding is zero.
-    //
-    // Optimization: If low = 0, then newLow = 0.
-    Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
-    appendIndex(newLow, newLows, staticNewLows);
-
-    // Start reading the data from position `offset - low`. Since the original
-    // read may have started in the low padding zone, this value could be
-    // negative. Therefore, start reading from:
-    //
-    // max(offset - low, 0)
-    //
-    // The original read could also have started in the high padding zone.
-    // In that case, set the offset to the end of source tensor. The new
-    // ExtractSliceOp length will be zero in that case. (Effectively reading no
-    // data from the source.)
-    //
-    // Optimization: If low = 0, then the formula can be simplified.
-    Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize)
-                                : min(offset, srcSize);
-    newOffsets.push_back(getAsOpFoldResult(newOffset));
-
-    // The original ExtractSliceOp was reading until position `offset + length`.
-    // Therefore, the corresponding position within the source tensor is:
-    //
-    // offset + length - low
-    //
-    // In case the original ExtractSliceOp stopped reading within the low
-    // padding zone, this value can be negative. In that case, the end position
-    // of the read should be zero. (Similar to newOffset.)
-    //
-    // The original read could also have stopped in the high padding zone.
-    // In that case, set the end positition of the read should be the end of the
-    // source tensor. (Similar to newOffset.)
-    //
-    // endLoc = min(max(offset - low + length, 0), srcSize)
-    //
-    // The new ExtractSliceOp length is `endLoc - newOffset`.
-    //
-    // Optimization: If low = 0, then the formula can be simplified.
-    Value endLoc = hasLowPad
-                       ? min(max(add(sub(offset, low), length), zero), srcSize)
-                       : min(add(offset, length), srcSize);
-    Value newLength = sub(endLoc, newOffset);
-    newLengths.push_back(getAsOpFoldResult(newLength));
-
-    // Check if newLength is zero. In that case, no SubTensorOp should be
-    // executed.
-    if (auto newLengthInt = getConstantIntValue(newLength)) {
-      hasZeroLen |= *newLengthInt == 0;
-    } else {
-      Value check = b.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
-                                            newLength, zero);
-      dynHasZeroLenCond =
-          dynHasZeroLenCond
-              ? b.create<arith::OrIOp>(loc, check, dynHasZeroLenCond)
-              : check;
-    }
-
-    // The amount of high padding is simply the number of elements remaining,
-    // so that the result has the same length as the original ExtractSliceOp.
-    // As an optimization, if the original high padding is zero, then the new
-    // high padding must also be zero.
-    Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero;
-    appendIndex(newHigh, newHighs, staticNewHighs);
-
-    // Only unit stride supported.
-    newStrides.push_back(b.getIndexAttr(1));
-  }
-
-  // The shape of the result can be obtained from the sizes passed in.
-  SmallVector<Value> dynDims;
-  SmallVector<int64_t> shape;
-  dispatchIndexOpFoldResults(sizes, dynDims, shape, ShapedType::kDynamicSize);
-  RankedTensorType resultType =
-      RankedTensorType::get(shape, getResultType().getElementType());
-
-  // Insert cast to ensure that types match. (May be folded away.)
-  auto castResult = [&](Value val) -> Operation * {
-    auto castOp = b.create<tensor::CastOp>(loc, resultType, val);
-    return castOp;
-  };
-
-  // In cases where the original data source is unused: Emit a GenerateOp and
-  // do not generate a SliceOp. (The result shape of the SliceOp would
-  // have a dimension of size 0, the semantics of which is unclear.)
-  auto createGenerateOp = [&]() {
-    // Create GenerateOp.
-    auto generateOp = b.create<tensor::GenerateOp>(
-        loc, resultType, dynDims,
-        [&](OpBuilder &builder, Location gLoc, ValueRange indices) {
-          builder.create<tensor::YieldOp>(gLoc, padValue);
-        });
-    return castResult(generateOp);
-  };
-
-  // Emit a SliceOp and a PadTensorOp. Should not be used in cases where
-  // the result shape of the new SliceOp has a zero dimension.
-  auto createPadTensorOfSubTensor = [&]() {
-    // Create pad_tensor(subtensor(x)).
-    auto newSliceOp = b.create<tensor::ExtractSliceOp>(
-        loc, source(), newOffsets, newLengths, newStrides);
-    auto newPadTensorOp = b.create<PadTensorOp>(
-        loc, newSliceOp, staticNewLows, staticNewHighs, newLows, newHighs);
-
-    // Copy region to new PadTensorOp.
-    BlockAndValueMapping bvm;
-    region().cloneInto(&newPadTensorOp.getRegion(), bvm);
-
-    // Cast result and return.
-    return castResult(newPadTensorOp);
-  };
-
-  // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known
-  // that the original data source x is not used.
-  if (hasZeroLen) {
-    return {createGenerateOp()};
-  }
-
-  // If there are dynamic dimensions: Generate an scf.if check to avoid creating
-  // SliceOps with result dimensions of size 0 at runtime.
-  if (dynHasZeroLenCond) {
-    auto result = b.create<scf::IfOp>(
-        loc, resultType, dynHasZeroLenCond,
-        /*thenBuilder=*/
-        [&](OpBuilder &b, Location loc) {
-          b.create<scf::YieldOp>(loc, createGenerateOp()->getResult(0));
-        },
-        /*elseBuilder=*/
-        [&](OpBuilder &b, Location loc) {
-          b.create<scf::YieldOp>(loc,
-                                 createPadTensorOfSubTensor()->getResult(0));
-        });
-    return {result};
-  }
-  return {createPadTensorOfSubTensor()};
-}
-
-namespace {
-// Folds linalg.pad_tensor when padding is static zeros and the attribute
-// doesn't request otherwise.
-struct FoldStaticZeroPadding : public OpRewritePattern<PadTensorOp> {
-  using OpRewritePattern<PadTensorOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(PadTensorOp padTensorOp,
-                                PatternRewriter &rewriter) const override {
-    if (!padTensorOp.hasZeroLowPad() || !padTensorOp.hasZeroHighPad())
-      return failure();
-    if (padTensorOp.nofold())
-      return failure();
-    rewriter.replaceOpWithNewOp<tensor::CastOp>(
-        padTensorOp, padTensorOp.result().getType(), padTensorOp.source());
-    return success();
-  }
-};
-
-// Fold CastOp into PadTensorOp when adding static information.
-struct FoldSourceTensorCast : public OpRewritePattern<PadTensorOp> {
-  using OpRewritePattern<PadTensorOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(PadTensorOp padTensorOp,
-                                PatternRewriter &rewriter) const override {
-    auto castOp = padTensorOp.source().getDefiningOp<tensor::CastOp>();
-    if (!tensor::canFoldIntoConsumerOp(castOp))
-      return failure();
-
-    auto newResultType = PadTensorOp::inferResultType(
-        castOp.source().getType().cast<RankedTensorType>(),
-        extractFromI64ArrayAttr(padTensorOp.static_low()),
-        extractFromI64ArrayAttr(padTensorOp.static_high()),
-        padTensorOp.getResultType().getShape());
-
-    if (newResultType == padTensorOp.getResultType()) {
-      rewriter.updateRootInPlace(padTensorOp, [&]() {
-        padTensorOp.sourceMutable().assign(castOp.source());
-      });
-    } else {
-      auto newOp = rewriter.create<PadTensorOp>(
-          padTensorOp->getLoc(), newResultType, padTensorOp.source(),
-          padTensorOp.low(), padTensorOp.high(), padTensorOp.static_low(),
-          padTensorOp.static_high(), padTensorOp.nofold());
-      BlockAndValueMapping mapper;
-      padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper);
-
-      rewriter.replaceOpWithNewOp<tensor::CastOp>(
-          padTensorOp, padTensorOp.getResultType(), newOp);
-    }
-    return success();
-  }
-};
-
-// Fold CastOp using the result of PadTensorOp back into the latter if it adds
-// static information.
-struct FoldTargetTensorCast : public OpRewritePattern<PadTensorOp> {
-  using OpRewritePattern<PadTensorOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(PadTensorOp padTensorOp,
-                                PatternRewriter &rewriter) const override {
-    if (!padTensorOp.result().hasOneUse())
-      return failure();
-    auto tensorCastOp =
-        dyn_cast<tensor::CastOp>(*padTensorOp->getUsers().begin());
-    if (!tensorCastOp)
-      return failure();
-    if (!tensor::preservesStaticInformation(padTensorOp.result().getType(),
-                                            tensorCastOp.dest().getType()))
-      return failure();
-
-    auto replacementOp = rewriter.create<PadTensorOp>(
-        padTensorOp.getLoc(), tensorCastOp.dest().getType(),
-        padTensorOp.source(), padTensorOp.low(), padTensorOp.high(),
-        padTensorOp.static_low(), padTensorOp.static_high(),
-        padTensorOp.nofold());
-    replacementOp.region().takeBody(padTensorOp.region());
-
-    rewriter.replaceOp(padTensorOp, replacementOp.result());
-    rewriter.replaceOp(tensorCastOp, replacementOp.result());
-    return success();
-  }
-};
-} // namespace
-
-void PadTensorOp::getCanonicalizationPatterns(RewritePatternSet &results,
-                                              MLIRContext *context) {
-  results.add<FoldStaticZeroPadding, FoldSourceTensorCast>(context);
-  results.add<FoldTargetTensorCast>(context);
-}
-
-/// Return the padding value of the PadTensorOp if it constant. In this context,
-/// "constant" means an actual constant or "defined outside of the block".
-///
-/// Values are considered constant in three cases:
-///  - A ConstantLike value.
-///  - A basic block argument from a different block.
-///  - A value defined outside of the block.
-///
-/// If the padding value is not constant, an empty Value is returned.
-Value PadTensorOp::getConstantPaddingValue() {
-  auto yieldOp = dyn_cast<YieldOp>(getRegion().front().getTerminator());
-  if (!yieldOp || yieldOp.values().size() != 1)
-    return {};
-  Value padValue = yieldOp.values().front();
-  // Check if yield value is a constant.
-  if (matchPattern(padValue, m_Constant()))
-    return padValue;
-  // Check if yield value is defined inside the PadTensorOp block.
-  if (padValue.getParentBlock() == &getRegion().front())
-    return {};
-  // Else: Yield value defined outside of the PadTensorOp block.
-  return padValue;
-}
-
-OpFoldResult PadTensorOp::fold(ArrayRef<Attribute>) {
-  if (getResultType().hasStaticShape() && getResultType() == getSourceType() &&
-      !nofold())
-    return source();
-  return {};
-}
-
 //===----------------------------------------------------------------------===//
 // YieldOp
 //===----------------------------------------------------------------------===//
@@ -1687,16 +1132,6 @@ static LogicalResult verify(linalg::YieldOp op) {
   if (auto linalgOp = dyn_cast<LinalgOp>(parentOp))
     return verifyYield(op, cast<LinalgOp>(parentOp));
 
-  if (auto padTensorOp = dyn_cast<linalg::PadTensorOp>(parentOp)) {
-    if (op.getNumOperands() != 1)
-      return op.emitOpError("expected single yield operand (got ")
-             << op->getNumOperands() << ")";
-    if (op.getOperand(0).getType() !=
-        padTensorOp.getType().cast<ShapedType>().getElementType())
-      return op.emitOpError("expected yield type to match shape element type");
-    return success();
-  }
-
   if (auto tiledLoopOp = dyn_cast<linalg::TiledLoopOp>(parentOp)) {
     // Check if output args with tensor types match results types.
     SmallVector<Value, 2> tensorOuts;
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
index 1283dec2a0a5b..9e45ed2a8fbf0 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp
@@ -320,7 +320,7 @@ struct LinalgBufferizePass : public LinalgBufferizeBase<LinalgBufferizePass> {
     target.addLegalDialect<arith::ArithmeticDialect, AffineDialect,
                            memref::MemRefDialect, StandardOpsDialect,
                            tensor::TensorDialect>();
-    target.addIllegalOp<InitTensorOp, PadTensorOp, tensor::CollapseShapeOp,
+    target.addIllegalOp<InitTensorOp, tensor::PadOp, tensor::CollapseShapeOp,
                         tensor::ExpandShapeOp, tensor::ExtractSliceOp,
                         tensor::InsertSliceOp>();
 
@@ -363,5 +363,5 @@ void mlir::linalg::populateLinalgBufferizePatterns(
       VectorTransferWriteOpConverter
     >(typeConverter, patterns.getContext());
   // clang-format on
-  patterns.add<GeneralizePadTensorOpPattern>(patterns.getContext());
+  patterns.add<GeneralizePadOpPattern>(patterns.getContext());
 }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
index 5ea77641b7f06..3c8b9c9606952 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -67,7 +67,7 @@ struct LinalgComprehensiveModuleBufferize
 
 static void applyEnablingTransformations(ModuleOp moduleOp) {
   RewritePatternSet patterns(moduleOp.getContext());
-  patterns.add<GeneralizePadTensorOpPattern>(moduleOp.getContext());
+  patterns.add<GeneralizePadOpPattern>(moduleOp.getContext());
   (void)applyPatternsAndFoldGreedily(moduleOp, std::move(patterns));
 }
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
index a4edd8a87bba7..21c92ee304dfa 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
@@ -1,4 +1,4 @@
-//===- HoistPadding.cpp - Hoisting transformation for PadTensorOp ---------===//
+//===- HoistPadding.cpp - Hoisting for tensor::PadOp ----------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -37,7 +37,7 @@ using llvm::dbgs;
 using namespace mlir;
 using namespace mlir::linalg;
 
-/// Analysis class to support PadTensorOp hoisting across multiple enclosing
+/// Analysis class to support tensor::PadOp hoisting across multiple enclosing
 /// loops. The failure conditions are:
 ///   1. Pad op has a use that is not an input of a LinalgOp.
 ///   2. Pad op does not have a constant padding value.
@@ -53,7 +53,7 @@ using namespace mlir::linalg;
 ///   8. There is no enclosing scf::ForOp that indexes the padded data.
 /// Other cases succeed and will trigger hoisting of the pad op.
 struct HoistingAnalysis {
-  HoistingAnalysis(PadTensorOp padTensorOp, int numLoops);
+  HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops);
 
   bool isValid() { return valid; }
 
@@ -98,7 +98,7 @@ struct HoistingAnalysis {
   /// ```
   /// dropNonIndexDependencies(%padded_slice, %slice)
   /// removes [scf.for %k, linalg.fill(%cst, %arg1)] from backwardSlice.
-  LogicalResult dropNonIndexDependencies(PadTensorOp padTensorOp,
+  LogicalResult dropNonIndexDependencies(tensor::PadOp padTensorOp,
                                          tensor::ExtractSliceOp sliceOp);
 
   /// Encodes whether the analysis is valid and hoisting can proceed.
@@ -107,7 +107,7 @@ struct HoistingAnalysis {
 
 /// Return true if all uses of `padTensorOp` are an input tensor of some
 /// LinalgOp.
-static bool isOnlyUsedAsInputOfLinalgOp(PadTensorOp padTensorOp) {
+static bool isOnlyUsedAsInputOfLinalgOp(tensor::PadOp padTensorOp) {
   for (OpOperand &use : padTensorOp.result().getUses()) {
     auto linalgUser = dyn_cast<linalg::LinalgOp>(use.getOwner());
     if (!linalgUser || !linalgUser.isInputTensor(&use)) {
@@ -126,7 +126,7 @@ static bool isOnlyUsedAsInputOfLinalgOp(PadTensorOp padTensorOp) {
 /// Multi-loops such as scf.parallel or linalg.tiled_loop are not modeled atm.
 /// Control-flow and other containing ops with regions are not modeled atm.
 static void
-getAtMostNEnclosingLoops(PadTensorOp padTensorOp, int nLevels,
+getAtMostNEnclosingLoops(tensor::PadOp padTensorOp, int nLevels,
                          SmallVector<scf::ForOp> &reverseEnclosingLoops) {
   AsmState state(padTensorOp->getParentOfType<mlir::FuncOp>());
   (void)state;
@@ -143,7 +143,7 @@ getAtMostNEnclosingLoops(PadTensorOp padTensorOp, int nLevels,
   }
 }
 
-HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) {
+HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) {
   valid = false;
 
   // Bail on any use that isn't an input of a Linalg op.
@@ -232,7 +232,7 @@ HoistingAnalysis::HoistingAnalysis(PadTensorOp padTensorOp, int numLoops) {
 }
 
 LogicalResult
-HoistingAnalysis::dropNonIndexDependencies(PadTensorOp padTensorOp,
+HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padTensorOp,
                                            tensor::ExtractSliceOp sliceOp) {
   // Set of all values used for index computation.
   SetVector<Value> indexEdges;
@@ -373,9 +373,9 @@ static Value buildLoopIterationCount(OpBuilder &b, scf::ForOp outer,
                                        ValueRange{ivVal, lbVal, stepVal});
 }
 
-FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist,
+FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
                                                      int numLoops,
-                                                     PadTensorOp &hoistedOp) {
+                                                     tensor::PadOp &hoistedOp) {
   LLVM_DEBUG(DBGS() << "Try to hoist " << *(opToHoist) << " by " << numLoops
                     << " loops\n");
   HoistingAnalysis analysis(opToHoist, numLoops);
@@ -399,7 +399,7 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist,
   // Create the packed tensor<?x?x..?xpadded_shape> into which we amortize
   // padding.
   SmallVector<int64_t> packedShape(nPackedLoops, ShapedType::kDynamicSize);
-  // TODO: go grab dims when necessary, for now PadTensorOp returns a static
+  // TODO: go grab dims when necessary, for now tensor::PadOp returns a static
   // tensor.
   llvm::append_range(packedShape, paddedTensorType.getShape());
   auto packedTensorType =
@@ -463,7 +463,7 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist,
   // sizes = [1 .. 1, paddedShape].
   SmallVector<OpFoldResult> sizes(nPackedLoops, b.getIndexAttr(1));
   for (int64_t sz : paddedTensorType.getShape()) {
-    // TODO: go grab dims when necessary, for now PadTensorOp returns a static
+    // TODO: go grab dims when necessary, for now tensor::PadOp returns a static
     // tensor.
     assert(!ShapedType::isDynamic(sz) && "padded tensor needs static sizes");
     sizes.push_back(b.getIndexAttr(sz));
@@ -506,6 +506,7 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(PadTensorOp opToHoist,
       loc, opToHoist.getResultType(), packedTensor, offsets, sizes, strides);
 
   // Make the newly cloned `opToHoist` available to the caller.
-  hoistedOp = cast<PadTensorOp>(bvm.lookup(opToHoist.result()).getDefiningOp());
+  hoistedOp =
+      cast<tensor::PadOp>(bvm.lookup(opToHoist.result()).getDefiningOp());
   return newResult;
 }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
index 31f8fa5b369e3..025adc2c56b2b 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
@@ -100,7 +100,7 @@ struct LinalgStrategyTilePass
                                              filter);
     else
       tilingPattern.add<LinalgTilingPattern>(ctx, options, filter);
-    if (anchorOpName == linalg::PadTensorOp::getOperationName())
+    if (anchorOpName == tensor::PadOp::getOperationName())
       populatePadTensorTilingPatterns(tilingPattern, options);
     (void)applyPatternsAndFoldGreedily(funcOp, std::move(tilingPattern));
   }
@@ -302,12 +302,12 @@ struct LinalgStrategyVectorizePass
                                        std::move(vectorizationPatterns));
 
     // Apply the pad tensor op vectorization separately to avoid running the
-    // GenericPadTensorOpVectorizationPattern too early.
+    // GenericPadOpVectorizationPattern too early.
     // TODO: Improve once we have better infrastructure to control pattern
     // application.
     if (vectorizePadding) {
       RewritePatternSet patterns(funcOp.getContext());
-      linalg::populatePadTensorOpVectorizationPatterns(patterns);
+      linalg::populatePadOpVectorizationPatterns(patterns);
       (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
     }
   }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp b/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp
index 64de5197266be..78b5305c8a1ec 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp
@@ -38,9 +38,9 @@ namespace {
 /// ```
 ///
 /// if the `linalg.generic` has all parallel iterator types.
-struct FusePadTensorOp : OpRewritePattern<PadTensorOp> {
-  using OpRewritePattern<PadTensorOp>::OpRewritePattern;
-  LogicalResult matchAndRewrite(PadTensorOp padOp,
+struct FusePadOp : OpRewritePattern<tensor::PadOp> {
+  using OpRewritePattern<tensor::PadOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(tensor::PadOp padOp,
                                 PatternRewriter &rewriter) const override {
     // Only works on padding op that sets the padded value to a constant.
     Value padValue = padOp.getConstantPaddingValue();
@@ -61,7 +61,10 @@ struct FusePadTensorOp : OpRewritePattern<PadTensorOp> {
           padOp, "only supported for ops with all parallel iterator types");
     }
     ReifiedRankedShapedTypeDims resultShape;
-    if (failed(padOp.reifyResultShapes(rewriter, resultShape)) ||
+    ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
+        dyn_cast<ReifyRankedShapedTypeOpInterface>(padOp.getOperation());
+    if (failed(reifyShapedTypeInterface.reifyResultShapes(rewriter,
+                                                          resultShape)) ||
         resultShape.size() != 1) {
       return rewriter.notifyMatchFailure(
           padOp, "failed to get shape of pad op result");
@@ -118,5 +121,5 @@ struct FusePadTensorOp : OpRewritePattern<PadTensorOp> {
 
 void mlir::linalg::populateFusePadTensorWithProducerLinalgOpPatterns(
     RewritePatternSet &patterns) {
-  patterns.add<FusePadTensorOp>(patterns.getContext());
+  patterns.add<FusePadOp>(patterns.getContext());
 }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index f005846ef4667..22fc7df2d69aa 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -338,18 +338,18 @@ mlir::linalg::tileLinalgOp(RewriterBase &b, LinalgOp op,
   return failure();
 }
 
-/// Generate a loop nest around a given PadTensorOp (for tiling). `newPadOp`
-/// and `loopNest` are output parameters that return the new (tiled) PadTensorOp
-/// and the loop nest.
-static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op,
-                                     PadTensorOp &newPadOp, LoopNest &loopNest,
-                                     const LinalgTilingOptions &options) {
+/// Generate a loop nest around a given tensor::PadOp (for tiling). `newPadOp`
+/// and `loopNest` are output parameters that return the new (tiled)
+/// tensor::PadOp and the loop nest.
+static LogicalResult tilePadOp(RewriterBase &builder, tensor::PadOp op,
+                               tensor::PadOp &newPadOp, LoopNest &loopNest,
+                               const LinalgTilingOptions &options) {
   Location loc = op.getLoc();
   OpBuilder::InsertionGuard g(builder);
   builder.setInsertionPoint(op);
 
-  // Clone PadTensorOp so that the existing op can be replaced more easily.
-  newPadOp = cast<PadTensorOp>(builder.clone(*op.getOperation()));
+  // Clone tensor::PadOp so that the existing op can be replaced more easily.
+  newPadOp = cast<tensor::PadOp>(builder.clone(*op.getOperation()));
   // Get rank and tile sizes.
   int64_t rank = op.getResultType().getRank();
   SmallVector<Value> tileSizes =
@@ -358,7 +358,9 @@ static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op,
   Value zero = builder.create<arith::ConstantIndexOp>(loc, 0);
   tileSizes.append(rank - tileSizes.size(), zero);
   // Compute lower and upper bounds of the loop nest.
-  SmallVector<Range> ranges = op.getIterationDomain(builder);
+  TilingInterface tilingInterface =
+      dyn_cast<TilingInterface>(op.getOperation());
+  SmallVector<Range> ranges = tilingInterface.getIterationDomain(builder);
   SmallVector<Value> lbs, dims, allDims, steps;
   for (int64_t i = 0; i < rank; ++i) {
     allDims.push_back(ranges[i].size);
@@ -369,7 +371,8 @@ static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op,
     }
   }
   // Generate loop nest: One loop per dimension.
-  SmallVector<Value> destOperand = op.getDestinationOperands(builder);
+  SmallVector<Value> destOperand =
+      tilingInterface.getDestinationOperands(builder);
   loopNest = mlir::scf::buildLoopNest(
       builder, loc, lbs, /*ubs=*/dims, steps, ValueRange(destOperand),
       [&](OpBuilder &b, Location loc, ValueRange localIvs,
@@ -379,8 +382,8 @@ static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op,
             computeTileOffsets(b, loc, localIvs, tileSizes);
         SmallVector<Value> sizes =
             computeTileSizes(b, loc, localIvs, tileSizes, allDims);
-        // Create ExtractSliceOp: Extract a tile from the PadTensorOp.
-        // Note: The PadTensorOp is located outside of the loop nest. It is
+        // Create ExtractSliceOp: Extract a tile from the tensor::PadOp.
+        // Note: The tensor::PadOp is located outside of the loop nest. It is
         // later moved inside by ExtractSliceOfPadTensorSwapPattern.
         auto map = AffineMap::getMultiDimIdentityMap(rank, b.getContext());
         Value tiledOutput =
@@ -399,21 +402,21 @@ static LogicalResult tilePadTensorOp(RewriterBase &builder, PadTensorOp op,
 }
 
 namespace {
-struct PadTensorOpTilingPattern : public OpRewritePattern<PadTensorOp> {
-  PadTensorOpTilingPattern(MLIRContext *ctx, LinalgTilingOptions opt)
-      : OpRewritePattern<PadTensorOp>(ctx), options(std::move(opt)) {}
+struct PadOpTilingPattern : public OpRewritePattern<tensor::PadOp> {
+  PadOpTilingPattern(MLIRContext *ctx, LinalgTilingOptions opt)
+      : OpRewritePattern<tensor::PadOp>(ctx), options(std::move(opt)) {}
 
-  LogicalResult matchAndRewrite(PadTensorOp op,
+  LogicalResult matchAndRewrite(tensor::PadOp op,
                                 PatternRewriter &rewriter) const override {
     if (op->hasAttr(LinalgTransforms::kLinalgTransformMarker))
       return failure();
-    PadTensorOp newPadOp;
+    tensor::PadOp newPadOp;
     LoopNest loopNest;
-    if (failed(tilePadTensorOp(rewriter, op, newPadOp, loopNest, options)))
+    if (failed(tilePadOp(rewriter, op, newPadOp, loopNest, options)))
       return failure();
     newPadOp->setAttr(LinalgTransforms::kLinalgTransformMarker,
                       rewriter.getUnitAttr());
-    // Replace all uses of the original PadTensorOp.
+    // Replace all uses of the original tensor::PadOp.
     rewriter.replaceOp(op, loopNest.getResults()[0]);
     return success();
   }
@@ -470,7 +473,7 @@ void mlir::linalg::populateLinalgTilingCanonicalizationPatterns(
   tensor::InsertSliceOp::getCanonicalizationPatterns(patterns, ctx);
 
   InitTensorOp::getCanonicalizationPatterns(patterns, ctx);
-  PadTensorOp::getCanonicalizationPatterns(patterns, ctx);
+  tensor::PadOp::getCanonicalizationPatterns(patterns, ctx);
   ctx->getLoadedDialect<LinalgDialect>()->getCanonicalizationPatterns(patterns);
 
   CanonicalizationPatternList<
@@ -489,13 +492,13 @@ static void insertTilingPatterns(RewritePatternSet &patterns,
 #define GET_OP_LIST
 #include "mlir/Dialect/Linalg/IR/LinalgStructuredOps.cpp.inc"
                  >::insert(patterns, options, f);
-  patterns.add<PadTensorOpTilingPattern>(ctx, options);
+  patterns.add<PadOpTilingPattern>(ctx, options);
 }
 
 void mlir::linalg::populatePadTensorTilingPatterns(
     RewritePatternSet &patterns, const LinalgTilingOptions &options) {
   auto *ctx = patterns.getContext();
-  patterns.add<PadTensorOpTilingPattern>(ctx, options);
+  patterns.add<PadOpTilingPattern>(ctx, options);
 }
 
 static void applyExtractSliceOfPadTensorSwapPattern(FuncOp funcOp) {
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 462f3c668f5c3..9eb7b7cfe751c 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -160,9 +160,9 @@ LinalgTilingOptions &mlir::linalg::LinalgTilingOptions::scalarizeDynamicDims() {
 /// Helper function that tries to pad `opOperand`. Exit early for scalar
 /// operands, if `paddingFunc` returns failure, or if `opOperand` is not defined
 /// by an ExtractSliceOp. Otherwise, try to pad the operand even if it already
-/// has a static shape. Set `result` to the result of the created PadTensorOp or
-/// and return success if the operand either has been padded to a static shape
-/// or already had a static shape and failure otherwise.
+/// has a static shape. Set `result` to the result of the created tensor::PadOp
+/// or and return success if the operand either has been padded to a static
+/// shape or already had a static shape and failure otherwise.
 static LogicalResult padOperandToSmallestStaticBoundingBox(
     OpBuilder &b, linalg::LinalgOp opToPad, OpOperand *opOperand,
     const PaddingValueComputationFunction &paddingFunc,
@@ -528,10 +528,10 @@ mlir::linalg::LinalgPaddingPattern::returningMatchAndRewrite(
   // Hoist the padding.
   for (const auto &en : enumerate(depths)) {
     OpOperand &opOperand = paddedOp->getOpOperand(en.index());
-    auto padTensorOp = opOperand.get().getDefiningOp<PadTensorOp>();
+    auto padTensorOp = opOperand.get().getDefiningOp<tensor::PadOp>();
     if (!padTensorOp || en.value() == 0)
       continue;
-    PadTensorOp hoistedOp;
+    tensor::PadOp hoistedOp;
     FailureOr<Value> newResult =
         hoistPaddingOnTensors(padTensorOp, en.value(), hoistedOp);
     if (failed(newResult))
@@ -749,10 +749,11 @@ static SmallVector<StringRef> getNParallelLoopsAttrs(unsigned nParallelLoops) {
   return SmallVector<StringRef>(nParallelLoops, getParallelIteratorTypeName());
 }
 
-/// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp (to
+/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp (to
 /// initialize with pad_val) and GenericOp (to copy contents).
-LogicalResult PadTensorOpTransformationPattern::matchAndRewrite(
-    linalg::PadTensorOp padOp, PatternRewriter &rewriter) const {
+LogicalResult
+PadOpTransformationPattern::matchAndRewrite(tensor::PadOp padOp,
+                                            PatternRewriter &rewriter) const {
 
   auto inputShapedType = padOp.source().getType().cast<ShapedType>();
   auto resultShapedType = padOp.result().getType().cast<ShapedType>();
@@ -767,9 +768,8 @@ LogicalResult PadTensorOpTransformationPattern::matchAndRewrite(
   //   1. A BBarg from a different block.
   //   2. A value defined outside of the current block.
   Block &block = padOp.region().front();
-  auto yieldOp = cast<YieldOp>(block.getTerminator());
-  assert(yieldOp.getNumOperands() == 1 && "expected single operand yield");
-  Value padValue = yieldOp.values().front();
+  auto yieldOp = cast<tensor::YieldOp>(block.getTerminator());
+  Value padValue = yieldOp.value();
   Operation *definingOp = padValue.getDefiningOp();
   if (definingOp && definingOp->getBlock() == &block)
     return failure();
@@ -812,8 +812,8 @@ LogicalResult PadTensorOpTransformationPattern::matchAndRewrite(
 
 /// Filling `dest` using FillOp constant padding value if possible.
 /// Otherwise, generate a tensor::GenerateOp.
-Value GeneralizePadTensorOpPattern::createFillOrGenerateOp(
-    PatternRewriter &rewriter, PadTensorOp padOp, Value dest,
+Value GeneralizePadOpPattern::createFillOrGenerateOp(
+    PatternRewriter &rewriter, tensor::PadOp padOp, Value dest,
     const SmallVector<Value> &dynSizes) const {
   auto padValue = padOp.getConstantPaddingValue();
   if (padValue)
@@ -825,20 +825,12 @@ Value GeneralizePadTensorOpPattern::createFillOrGenerateOp(
   // Copy region to new op.
   BlockAndValueMapping bvm;
   padOp.region().cloneInto(&generateOp.getRegion(), bvm);
-  // Rewrite linalg::YieldOp to tensor::YieldOp.
-  OpBuilder::InsertionGuard guard(rewriter);
-  auto yieldOp =
-      dyn_cast<linalg::YieldOp>(generateOp.getRegion().front().getTerminator());
-  assert(yieldOp && "malformed PadTensorOp: expected YieldOp terminator");
-  assert(yieldOp.values().size() == 1);
-  rewriter.setInsertionPoint(yieldOp);
-  rewriter.replaceOpWithNewOp<tensor::YieldOp>(yieldOp, yieldOp.values()[0]);
   return generateOp;
 }
 
 LogicalResult
-GeneralizePadTensorOpPattern::matchAndRewrite(PadTensorOp padOp,
-                                              PatternRewriter &rewriter) const {
+GeneralizePadOpPattern::matchAndRewrite(tensor::PadOp padOp,
+                                        PatternRewriter &rewriter) const {
   // Given an OpFoldResult, return an index-typed value.
   auto getIdxValue = [&](OpFoldResult ofr) {
     if (auto val = ofr.dyn_cast<Value>())
@@ -877,10 +869,10 @@ GeneralizePadTensorOpPattern::matchAndRewrite(PadTensorOp padOp,
   if (optimizeCopyFn && optimizeCopyFn(rewriter, padOp, fill).succeeded())
     return success();
 
-  // PadTensorOps cannot be optimized. Generate a InsertSliceOp instead
+  // tensor::PadOps cannot be optimized. Generate a InsertSliceOp instead
   // for copying the PadOp source.
   auto sourceType = padOp.getSourceType();
-  // Compute size of source of PadTensorOp.
+  // Compute size of source of tensor::PadOp.
   SmallVector<OpFoldResult> srcSizes;
   for (unsigned dim = 0; dim < sourceType.getRank(); ++dim) {
     if (sourceType.isDynamicDim(dim)) {
@@ -901,15 +893,17 @@ GeneralizePadTensorOpPattern::matchAndRewrite(PadTensorOp padOp,
 
 LogicalResult ExtractSliceOfPadTensorSwapPattern::matchAndRewrite(
     tensor::ExtractSliceOp sliceOp, PatternRewriter &rewriter) const {
-  auto padOp = sliceOp.source().getDefiningOp<PadTensorOp>();
+  auto padOp = sliceOp.source().getDefiningOp<tensor::PadOp>();
   if (!padOp)
     return failure();
   // Only unit stride supported.
   if (!sliceOp.hasUnitStride())
     return failure();
 
+  TilingInterface tilingInterface =
+      dyn_cast<TilingInterface>(padOp.getOperation());
   Operation *tiledPadOp =
-      padOp
+      tilingInterface
           .getTiledImplementation(
               rewriter, /*dest=*/ValueRange{}, sliceOp.getMixedOffsets(),
               sliceOp.getMixedSizes(), /*tileDestOperands=*/false)
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index 7472d9ee20898..e0b2c64056674 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -682,20 +682,19 @@ static SmallVector<Value> ofrToIndexValues(OpBuilder &builder, Location loc,
   return result;
 }
 
-/// Rewrite a PadTensorOp into a sequence of InitTensorOp, FillOp and
+/// Rewrite a tensor::PadOp into a sequence of InitTensorOp, FillOp and
 /// InsertSliceOp. For now, only constant padding values are supported.
 /// If there is enough static type information, TransferReadOps and
 /// TransferWriteOps may be generated instead of InsertSliceOps.
-struct GenericPadTensorOpVectorizationPattern
-    : public GeneralizePadTensorOpPattern {
-  GenericPadTensorOpVectorizationPattern(MLIRContext *context,
-                                         PatternBenefit benefit = 1)
-      : GeneralizePadTensorOpPattern(context, tryVectorizeCopy, benefit) {}
-  /// Vectorize the copying of a PadTensorOp's source. This is possible if
+struct GenericPadOpVectorizationPattern : public GeneralizePadOpPattern {
+  GenericPadOpVectorizationPattern(MLIRContext *context,
+                                   PatternBenefit benefit = 1)
+      : GeneralizePadOpPattern(context, tryVectorizeCopy, benefit) {}
+  /// Vectorize the copying of a tensor::PadOp's source. This is possible if
   /// each dimension size is statically know in the source type or the result
   /// type (or both).
   static LogicalResult tryVectorizeCopy(PatternRewriter &rewriter,
-                                        PadTensorOp padOp, Value dest) {
+                                        tensor::PadOp padOp, Value dest) {
     auto sourceType = padOp.getSourceType();
     auto resultType = padOp.getResultType();
 
@@ -767,13 +766,13 @@ struct GenericPadTensorOpVectorizationPattern
   }
 };
 
-/// Base pattern for rewriting PadTensorOps whose result is consumed by a
+/// Base pattern for rewriting tensor::PadOps whose result is consumed by a
 /// given operation type OpTy.
 template <typename OpTy>
-struct VectorizePadTensorOpUserPattern : public OpRewritePattern<PadTensorOp> {
-  using OpRewritePattern<PadTensorOp>::OpRewritePattern;
+struct VectorizePadOpUserPattern : public OpRewritePattern<tensor::PadOp> {
+  using OpRewritePattern<tensor::PadOp>::OpRewritePattern;
 
-  LogicalResult matchAndRewrite(PadTensorOp padOp,
+  LogicalResult matchAndRewrite(tensor::PadOp padOp,
                                 PatternRewriter &rewriter) const final {
     bool changed = false;
     // Insert users in vector, because some users may be replaced/removed.
@@ -785,10 +784,10 @@ struct VectorizePadTensorOpUserPattern : public OpRewritePattern<PadTensorOp> {
 
 protected:
   virtual LogicalResult rewriteUser(PatternRewriter &rewriter,
-                                    PadTensorOp padOp, OpTy op) const = 0;
+                                    tensor::PadOp padOp, OpTy op) const = 0;
 };
 
-/// Rewrite use of PadTensorOp result in TransferReadOp. E.g.:
+/// Rewrite use of tensor::PadOp result in TransferReadOp. E.g.:
 /// ```
 /// %0 = linalg.pad_tensor %src ... : tensor<?x?xf32> to tensor<17x5xf32>
 /// %r = vector.transfer_read %0[%c0, %c0], %cst
@@ -807,12 +806,12 @@ struct VectorizePadTensorOpUserPattern : public OpRewritePattern<PadTensorOp> {
 /// - `xferOp` has no out-of-bounds dims or mask.
 /// - Low padding is static 0.
 /// - Single, scalar padding value.
-struct PadTensorOpVectorizationWithTransferReadPattern
-    : public VectorizePadTensorOpUserPattern<vector::TransferReadOp> {
-  using VectorizePadTensorOpUserPattern<
-      vector::TransferReadOp>::VectorizePadTensorOpUserPattern;
+struct PadOpVectorizationWithTransferReadPattern
+    : public VectorizePadOpUserPattern<vector::TransferReadOp> {
+  using VectorizePadOpUserPattern<
+      vector::TransferReadOp>::VectorizePadOpUserPattern;
 
-  LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp,
+  LogicalResult rewriteUser(PatternRewriter &rewriter, tensor::PadOp padOp,
                             vector::TransferReadOp xferOp) const override {
     // Low padding must be static 0.
     if (!padOp.hasZeroLowPad())
@@ -837,7 +836,7 @@ struct PadTensorOpVectorizationWithTransferReadPattern
   }
 };
 
-/// Rewrite use of PadTensorOp result in TransferWriteOp.
+/// Rewrite use of tensor::PadOp result in TransferWriteOp.
 /// This pattern rewrites TransferWriteOps that write to a padded tensor
 /// value, where the same amount of padding is immediately removed again after
 /// the write. In such cases, the TransferWriteOp can write to the non-padded
@@ -869,12 +868,12 @@ struct PadTensorOpVectorizationWithTransferReadPattern
 ///   ExtractSliceOp trims the same amount of padding that was added
 ///   beforehand.
 /// - Single, scalar padding value.
-struct PadTensorOpVectorizationWithTransferWritePattern
-    : public VectorizePadTensorOpUserPattern<vector::TransferWriteOp> {
-  using VectorizePadTensorOpUserPattern<
-      vector::TransferWriteOp>::VectorizePadTensorOpUserPattern;
+struct PadOpVectorizationWithTransferWritePattern
+    : public VectorizePadOpUserPattern<vector::TransferWriteOp> {
+  using VectorizePadOpUserPattern<
+      vector::TransferWriteOp>::VectorizePadOpUserPattern;
 
-  LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp,
+  LogicalResult rewriteUser(PatternRewriter &rewriter, tensor::PadOp padOp,
                             vector::TransferWriteOp xferOp) const override {
     // TODO: support 0-d corner case.
     if (xferOp.getTransferRank() == 0)
@@ -925,7 +924,7 @@ struct PadTensorOpVectorizationWithTransferWritePattern
   /// sizes may turn out to be equal at runtime.
   bool hasSameTensorSize(Value beforePadding,
                          tensor::ExtractSliceOp afterTrimming) const {
-    // If the input to PadTensorOp is a CastOp, try with with both CastOp
+    // If the input to tensor::PadOp is a CastOp, try with with both CastOp
     // result and CastOp operand.
     if (auto castOp = beforePadding.getDefiningOp<tensor::CastOp>())
       if (hasSameTensorSize(castOp.source(), afterTrimming))
@@ -1000,7 +999,7 @@ struct PadTensorOpVectorizationWithTransferWritePattern
   }
 };
 
-/// Rewrite use of PadTensorOp result in InsertSliceOp. E.g.:
+/// Rewrite use of tensor::PadOp result in InsertSliceOp. E.g.:
 /// ```
 /// %0 = linalg.pad_tensor %src ... : tensor<?x?xf32> to tensor<17x5xf32>
 /// %r = tensor.insert_slice %0
@@ -1023,12 +1022,12 @@ struct PadTensorOpVectorizationWithTransferWritePattern
 /// - Only unit strides in `insertOp`.
 /// - Single, scalar padding value.
 /// - `padOp` result not used as destination.
-struct PadTensorOpVectorizationWithInsertSlicePattern
-    : public VectorizePadTensorOpUserPattern<tensor::InsertSliceOp> {
-  using VectorizePadTensorOpUserPattern<
-      tensor::InsertSliceOp>::VectorizePadTensorOpUserPattern;
+struct PadOpVectorizationWithInsertSlicePattern
+    : public VectorizePadOpUserPattern<tensor::InsertSliceOp> {
+  using VectorizePadOpUserPattern<
+      tensor::InsertSliceOp>::VectorizePadOpUserPattern;
 
-  LogicalResult rewriteUser(PatternRewriter &rewriter, PadTensorOp padOp,
+  LogicalResult rewriteUser(PatternRewriter &rewriter, tensor::PadOp padOp,
                             tensor::InsertSliceOp insertOp) const override {
     // Low padding must be static 0.
     if (!padOp.hasZeroLowPad())
@@ -1087,14 +1086,14 @@ struct PadTensorOpVectorizationWithInsertSlicePattern
   }
 };
 
-void mlir::linalg::populatePadTensorOpVectorizationPatterns(
+void mlir::linalg::populatePadOpVectorizationPatterns(
     RewritePatternSet &patterns, PatternBenefit baseBenefit) {
-  patterns.add<GenericPadTensorOpVectorizationPattern>(patterns.getContext(),
-                                                       baseBenefit);
+  patterns.add<GenericPadOpVectorizationPattern>(patterns.getContext(),
+                                                 baseBenefit);
   // Try these specialized patterns first before resorting to the generic one.
-  patterns.add<PadTensorOpVectorizationWithTransferReadPattern,
-               PadTensorOpVectorizationWithTransferWritePattern,
-               PadTensorOpVectorizationWithInsertSlicePattern>(
+  patterns.add<PadOpVectorizationWithTransferReadPattern,
+               PadOpVectorizationWithTransferWritePattern,
+               PadOpVectorizationWithInsertSlicePattern>(
       patterns.getContext(), baseBenefit.getBenefit() + 1);
 }
 
diff --git a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
index a55955654dafe..1231f378a306d 100644
--- a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
@@ -12,5 +12,6 @@ add_mlir_dialect_library(MLIRLinalgUtils
   MLIRSCF
   MLIRPass
   MLIRStandard
+  MLIRTensorUtils
   MLIRTransformUtils
   )
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 61be7bc6c6461..bf37719325ccb 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -23,6 +23,7 @@
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Utils/Utils.h"
 #include "mlir/Dialect/Utils/StaticValueUtils.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineExprVisitor.h"
@@ -328,7 +329,7 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
   // Exit if `source` is not defined by an ExtractSliceOp.
   auto sliceOp = source.getDefiningOp<tensor::ExtractSliceOp>();
   if (!sliceOp)
-    return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+    return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Search the `source` use-def chain for padded LinalgOps.
   Value current = sliceOp.source();
@@ -339,22 +340,22 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
     OpResult opResult = current.cast<OpResult>();
     current = linalgOp.getOutputOperand(opResult.getResultNumber())->get();
   }
-  auto padTensorOp = current ? current.getDefiningOp<PadTensorOp>() : nullptr;
+  auto padTensorOp = current ? current.getDefiningOp<tensor::PadOp>() : nullptr;
 
-  // Exit if the search fails to match a PadTensorOp at the end of the matched
+  // Exit if the search fails to match a tensor::PadOp at the end of the matched
   // LinalgOp sequence.
   if (!padTensorOp)
-    return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+    return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Exit if the padded result type does not match.
   if (sliceOp.source().getType() != type)
-    return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+    return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Exit if the LinalgOps are not high padded.
   if (llvm::any_of(padTensorOp.getMixedLowPad(), [](OpFoldResult ofr) {
         return getConstantIntValue(ofr) != static_cast<int64_t>(0);
       }))
-    return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+    return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Exit if `padTensorOpSliceOp`, which defines the slice used by
   // `padTensorOp`, is rank-reducing.
@@ -362,7 +363,7 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
       padTensorOp.source().getDefiningOp<tensor::ExtractSliceOp>();
   if (!padTensorOpSliceOp || sliceOp.getMixedSizes().size() !=
                                  padTensorOpSliceOp.getMixedSizes().size())
-    return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+    return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size
   // of the slice padded by `padTensorOp`.
@@ -372,7 +373,7 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
                      return !isEqualConstantIntOrValue(std::get<0>(it),
                                                        std::get<1>(it));
                    }))
-    return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+    return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Exit if the padding values do not match.
   Attribute padTensorOpPadAttr, padAttr;
@@ -380,7 +381,7 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
   if (!padTensorOpPad ||
       !matchPattern(padTensorOpPad, m_Constant(&padTensorOpPadAttr)) ||
       !matchPattern(pad, m_Constant(&padAttr)) || padTensorOpPadAttr != padAttr)
-    return PadTensorOp::createPadHighOp(type, source, pad, nofold, loc, b);
+    return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Return the padded result if the padding values and sizes match.
   return sliceOp.source();
diff --git a/mlir/lib/Dialect/Tensor/CMakeLists.txt b/mlir/lib/Dialect/Tensor/CMakeLists.txt
index 9f57627c321fb..31167e6af908b 100644
--- a/mlir/lib/Dialect/Tensor/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_subdirectory(IR)
 add_subdirectory(Transforms)
+add_subdirectory(Utils)
diff --git a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt
index 87aeaab6ca976..df2807f318e04 100644
--- a/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/IR/CMakeLists.txt
@@ -2,6 +2,7 @@ set(LLVM_OPTIONAL_SOURCES
   TensorDialect.cpp
   TensorInferTypeOpInterfaceImpl.cpp
   TensorOps.cpp
+  TensorTilingInterfaceImpl.cpp
 )
 
 add_mlir_dialect_library(MLIRTensor
@@ -43,3 +44,20 @@ add_mlir_dialect_library(MLIRTensorInferTypeOpInterfaceImpl
   MLIRSupport
   MLIRTensor
   )
+
+add_mlir_dialect_library(MLIRTensorTilingInterfaceImpl
+  TensorTilingInterfaceImpl.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor
+
+  LINK_LIBS PUBLIC
+  MLIRAffine
+  MLIRIR
+  MLIRLinalg
+  MLIRSCF
+  MLIRStandard
+  MLIRSupport
+  MLIRTensor
+  MLIRTilingInterface
+  )
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp
index 588b635805893..bb7bd82f40a68 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp
@@ -161,6 +161,48 @@ struct ReifyExpandOrCollapseShapeOp
   }
 };
 
+namespace {
+
+struct ReifyPadOp
+    : public ReifyRankedShapedTypeOpInterface::ExternalModel<ReifyPadOp,
+                                                             PadOp> {
+  LogicalResult
+  reifyResultShapes(Operation *op, OpBuilder &b,
+                    ReifiedRankedShapedTypeDims &reifiedReturnShapes) const {
+    auto padOp = cast<PadOp>(op);
+    Location loc = padOp.getLoc();
+    auto lowPad = padOp.getMixedLowPad();
+    auto highPad = padOp.getMixedHighPad();
+    SmallVector<Value> shapes;
+    for (auto dim : llvm::seq<int64_t>(0, padOp.getSourceType().getRank())) {
+      // Shape along each dimension is source dim + low pad + high pad.
+      SmallVector<Value> mapOperands;
+      mapOperands.push_back(
+          b.createOrFold<tensor::DimOp>(loc, padOp.source(), dim));
+      AffineExpr expr = b.getAffineDimExpr(0);
+      unsigned numSymbols = 0;
+      auto addOpFoldResult = [&](OpFoldResult valueOrAttr) {
+        if (Value v = valueOrAttr.dyn_cast<Value>()) {
+          expr = expr + b.getAffineSymbolExpr(numSymbols++);
+          mapOperands.push_back(v);
+          return;
+        }
+        int64_t staticValue =
+            valueOrAttr.get<Attribute>().cast<IntegerAttr>().getInt();
+        expr = expr + staticValue;
+      };
+      addOpFoldResult(lowPad[dim]);
+      addOpFoldResult(highPad[dim]);
+      shapes.push_back(applyMapToValues(
+          b, loc, AffineMap::get(1, numSymbols, expr), mapOperands)[0]);
+    }
+    reifiedReturnShapes.emplace_back(std::move(shapes));
+    return success();
+  }
+};
+
+} // namespace
+
 void mlir::tensor::registerInferTypeOpInterfaceExternalModels(
     DialectRegistry &registry) {
   registry
@@ -169,4 +211,5 @@ void mlir::tensor::registerInferTypeOpInterfaceExternalModels(
   registry
       .addOpInterface<tensor::CollapseShapeOp,
                       ReifyExpandOrCollapseShapeOp<tensor::CollapseShapeOp>>();
+  registry.addOpInterface<tensor::PadOp, ReifyPadOp>();
 }
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index 613edde638683..42f57a9cf99bd 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -476,6 +476,7 @@ static LogicalResult verify(GenerateOp op) {
   // Ensure that the region yields an element of the right type.
   auto yieldOp =
       llvm::cast<YieldOp>(op.body().getBlocks().front().getTerminator());
+
   if (yieldOp.value().getType() != resultTy.getElementType())
     return op.emitOpError(
         "body must be terminated with a `yield` operation of the tensor "
@@ -1482,6 +1483,258 @@ Value mlir::tensor::createCanonicalRankReducingInsertSliceOp(OpBuilder &b,
                                                sizes, strides);
 }
 
+//===----------------------------------------------------------------------===//
+// PadOp
+//===----------------------------------------------------------------------===//
+
+// TODO: Replace custom<InferType> directive with AllTypesMatch as soon as it
+// supports optional types.
+void printInferType(OpAsmPrinter &printer, Operation *op, Value optOperand,
+                    Type typeToInfer, Type typeToInferFrom) {}
+
+ParseResult parseInferType(OpAsmParser &parser,
+                           Optional<OpAsmParser::OperandType> optOperand,
+                           Type &typeToInfer, Type typeToInferFrom) {
+  if (optOperand)
+    typeToInfer = typeToInferFrom;
+  return success();
+}
+
+static LogicalResult verify(PadOp op) {
+  auto sourceType = op.source().getType().cast<RankedTensorType>();
+  auto resultType = op.result().getType().cast<RankedTensorType>();
+  auto expectedType = PadOp::inferResultType(
+      sourceType, extractFromI64ArrayAttr(op.static_low()),
+      extractFromI64ArrayAttr(op.static_high()));
+  for (int i = 0, e = sourceType.getRank(); i < e; ++i) {
+    if (resultType.getDimSize(i) == expectedType.getDimSize(i))
+      continue;
+    if (expectedType.isDynamicDim(i))
+      continue;
+    return op.emitError("specified type ")
+           << resultType << " does not match the inferred type "
+           << expectedType;
+  }
+
+  auto &region = op.region();
+  unsigned rank = resultType.getRank();
+  Block &block = region.front();
+  if (block.getNumArguments() != rank)
+    return op.emitError("expected the block to have ") << rank << " arguments";
+
+  // Note: the number and type of yield values are checked in the YieldOp.
+  for (const auto &en : llvm::enumerate(block.getArgumentTypes())) {
+    if (!en.value().isIndex())
+      return op.emitOpError("expected block argument ")
+             << (en.index() + 1) << " to be an index";
+  }
+
+  // Ensure that the region yields an element of the right type.
+  auto yieldOp = llvm::cast<YieldOp>(block.getTerminator());
+  if (yieldOp.value().getType() !=
+      op.getType().cast<ShapedType>().getElementType())
+    return op.emitOpError("expected yield type to match shape element type");
+
+  return success();
+}
+
+RankedTensorType PadOp::inferResultType(RankedTensorType sourceType,
+                                        ArrayRef<int64_t> staticLow,
+                                        ArrayRef<int64_t> staticHigh,
+                                        ArrayRef<int64_t> resultShape) {
+  unsigned rank = sourceType.getRank();
+  assert(staticLow.size() == rank && "unexpected staticLow size mismatch");
+  assert(staticHigh.size() == rank && "unexpected staticHigh size mismatch");
+  assert((resultShape.empty() || resultShape.size() == rank) &&
+         "unexpected resultShape size mismatch");
+
+  SmallVector<int64_t, 4> inferredShape;
+  for (auto i : llvm::seq<unsigned>(0, rank)) {
+    if (sourceType.isDynamicDim(i) ||
+        staticLow[i] == ShapedType::kDynamicSize ||
+        staticHigh[i] == ShapedType::kDynamicSize) {
+      inferredShape.push_back(resultShape.empty() ? ShapedType::kDynamicSize
+                                                  : resultShape[i]);
+    } else {
+      int64_t size = sourceType.getDimSize(i) + staticLow[i] + staticHigh[i];
+      assert((resultShape.empty() || size == resultShape[i] ||
+              resultShape[i] == ShapedType::kDynamicSize) &&
+             "mismatch between inferred shape and result shape");
+      inferredShape.push_back(size);
+    }
+  }
+
+  return RankedTensorType::get(inferredShape, sourceType.getElementType());
+}
+
+void PadOp::build(OpBuilder &b, OperationState &result, Value source,
+                  ArrayRef<int64_t> staticLow, ArrayRef<int64_t> staticHigh,
+                  ValueRange low, ValueRange high, bool nofold,
+                  ArrayRef<NamedAttribute> attrs) {
+  auto sourceType = source.getType().cast<RankedTensorType>();
+  auto resultType = inferResultType(sourceType, staticLow, staticHigh);
+  build(b, result, resultType, source, low, high, b.getI64ArrayAttr(staticLow),
+        b.getI64ArrayAttr(staticHigh), nofold ? b.getUnitAttr() : UnitAttr());
+  result.addAttributes(attrs);
+}
+
+void PadOp::build(OpBuilder &b, OperationState &result, Value source,
+                  ValueRange low, ValueRange high, bool nofold,
+                  ArrayRef<NamedAttribute> attrs) {
+  auto sourceType = source.getType().cast<RankedTensorType>();
+  unsigned rank = sourceType.getRank();
+  SmallVector<int64_t, 4> staticVector(rank, ShapedType::kDynamicSize);
+  build(b, result, source, staticVector, staticVector, low, high, nofold,
+        attrs);
+}
+
+void PadOp::build(OpBuilder &b, OperationState &result, Type resultType,
+                  Value source, ArrayRef<OpFoldResult> low,
+                  ArrayRef<OpFoldResult> high, bool nofold,
+                  ArrayRef<NamedAttribute> attrs) {
+  assert(resultType.isa<RankedTensorType>());
+  auto sourceType = source.getType().cast<RankedTensorType>();
+  SmallVector<Value, 4> dynamicLow, dynamicHigh;
+  SmallVector<int64_t, 4> staticLow, staticHigh;
+  // staticLow and staticHigh have full information of the padding config.
+  // This will grow staticLow and staticHigh with 1 value. If the config is
+  // dynamic (ie not a constant), dynamicLow and dynamicHigh will grow with 1
+  // value as well.
+  dispatchIndexOpFoldResults(low, dynamicLow, staticLow,
+                             ShapedType::kDynamicSize);
+  dispatchIndexOpFoldResults(high, dynamicHigh, staticHigh,
+                             ShapedType::kDynamicSize);
+  if (!resultType) {
+    resultType = PadOp::inferResultType(sourceType, staticLow, staticHigh);
+  }
+  build(b, result, resultType, source, dynamicLow, dynamicHigh,
+        b.getI64ArrayAttr(staticLow), b.getI64ArrayAttr(staticHigh),
+        nofold ? b.getUnitAttr() : UnitAttr());
+  result.addAttributes(attrs);
+}
+
+namespace {
+// Folds tensor.pad when padding is static zeros and the attribute
+// doesn't request otherwise.
+struct FoldStaticZeroPadding : public OpRewritePattern<PadOp> {
+  using OpRewritePattern<PadOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(PadOp padTensorOp,
+                                PatternRewriter &rewriter) const override {
+    if (!padTensorOp.hasZeroLowPad() || !padTensorOp.hasZeroHighPad())
+      return failure();
+    if (padTensorOp.nofold())
+      return failure();
+    rewriter.replaceOpWithNewOp<tensor::CastOp>(
+        padTensorOp, padTensorOp.result().getType(), padTensorOp.source());
+    return success();
+  }
+};
+
+// Fold CastOp into PadOp when adding static information.
+struct FoldSourceTensorCast : public OpRewritePattern<PadOp> {
+  using OpRewritePattern<PadOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(PadOp padTensorOp,
+                                PatternRewriter &rewriter) const override {
+    auto castOp = padTensorOp.source().getDefiningOp<tensor::CastOp>();
+    if (!tensor::canFoldIntoConsumerOp(castOp))
+      return failure();
+
+    auto newResultType = PadOp::inferResultType(
+        castOp.source().getType().cast<RankedTensorType>(),
+        extractFromI64ArrayAttr(padTensorOp.static_low()),
+        extractFromI64ArrayAttr(padTensorOp.static_high()),
+        padTensorOp.getResultType().getShape());
+
+    if (newResultType == padTensorOp.getResultType()) {
+      rewriter.updateRootInPlace(padTensorOp, [&]() {
+        padTensorOp.sourceMutable().assign(castOp.source());
+      });
+    } else {
+      auto newOp = rewriter.create<PadOp>(
+          padTensorOp->getLoc(), newResultType, padTensorOp.source(),
+          padTensorOp.low(), padTensorOp.high(), padTensorOp.static_low(),
+          padTensorOp.static_high(), padTensorOp.nofold());
+      BlockAndValueMapping mapper;
+      padTensorOp.getRegion().cloneInto(&newOp.getRegion(), mapper);
+
+      rewriter.replaceOpWithNewOp<tensor::CastOp>(
+          padTensorOp, padTensorOp.getResultType(), newOp);
+    }
+    return success();
+  }
+};
+
+// Fold CastOp using the result of PadOp back into the latter if it adds
+// static information.
+struct FoldTargetTensorCast : public OpRewritePattern<PadOp> {
+  using OpRewritePattern<PadOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(PadOp padTensorOp,
+                                PatternRewriter &rewriter) const override {
+    if (!padTensorOp.result().hasOneUse())
+      return failure();
+    auto tensorCastOp =
+        dyn_cast<tensor::CastOp>(*padTensorOp->getUsers().begin());
+    if (!tensorCastOp)
+      return failure();
+    if (!tensor::preservesStaticInformation(padTensorOp.result().getType(),
+                                            tensorCastOp.dest().getType()))
+      return failure();
+
+    auto replacementOp = rewriter.create<PadOp>(
+        padTensorOp.getLoc(), tensorCastOp.dest().getType(),
+        padTensorOp.source(), padTensorOp.low(), padTensorOp.high(),
+        padTensorOp.static_low(), padTensorOp.static_high(),
+        padTensorOp.nofold());
+    replacementOp.region().takeBody(padTensorOp.region());
+
+    rewriter.replaceOp(padTensorOp, replacementOp.result());
+    rewriter.replaceOp(tensorCastOp, replacementOp.result());
+    return success();
+  }
+};
+} // namespace
+
+void PadOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                        MLIRContext *context) {
+  results
+      .add<FoldStaticZeroPadding, FoldSourceTensorCast, FoldTargetTensorCast>(
+          context);
+}
+
+/// Return the padding value of the PadOp if it constant. In this context,
+/// "constant" means an actual constant or "defined outside of the block".
+///
+/// Values are considered constant in three cases:
+///  - A ConstantLike value.
+///  - A basic block argument from a different block.
+///  - A value defined outside of the block.
+///
+/// If the padding value is not constant, an empty Value is returned.
+Value PadOp::getConstantPaddingValue() {
+  auto yieldOp = dyn_cast<YieldOp>(getRegion().front().getTerminator());
+  if (!yieldOp)
+    return {};
+  Value padValue = yieldOp.value();
+  // Check if yield value is a constant.
+  if (matchPattern(padValue, m_Constant()))
+    return padValue;
+  // Check if yield value is defined inside the PadOp block.
+  if (padValue.getParentBlock() == &getRegion().front())
+    return {};
+  // Else: Yield value defined outside of the PadOp block.
+  return padValue;
+}
+
+OpFoldResult PadOp::fold(ArrayRef<Attribute>) {
+  if (getResultType().hasStaticShape() && getResultType() == getSourceType() &&
+      !nofold())
+    return source();
+  return {};
+}
+
 //===----------------------------------------------------------------------===//
 // TableGen'd op method definitions
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
new file mode 100644
index 0000000000000..d206dc2ce9e90
--- /dev/null
+++ b/mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp
@@ -0,0 +1,279 @@
+//===- TensorTilingInterface.cpp - Tiling Interface  models *- C++ ------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/SCF/SCF.h"
+#include "mlir/Dialect/StandardOps/Utils/Utils.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Interfaces/TilingInterface.h"
+
+using namespace mlir;
+using namespace mlir::tensor;
+
+namespace {
+
+struct PadOpTiling : public TilingInterface::ExternalModel<PadOpTiling, PadOp> {
+
+  SmallVector<Value> getDestinationOperands(Operation *op, OpBuilder &b) const {
+    ReifiedRankedShapedTypeDims reifiedShapes;
+    ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
+        dyn_cast<ReifyRankedShapedTypeOpInterface>(op);
+    (void)reifyShapedTypeInterface.reifyResultShapes(b, reifiedShapes);
+
+    auto padOp = cast<PadOp>(op);
+    SmallVector<OpFoldResult> mixedSizes = getAsOpFoldResult(reifiedShapes[0]);
+    Value initTensor = b.create<linalg::InitTensorOp>(
+        op->getLoc(), mixedSizes, padOp.getResultType().getElementType());
+    return {initTensor};
+  }
+
+  SmallVector<StringRef> getLoopIteratorTypes(Operation *op) const {
+    auto padOp = cast<PadOp>(op);
+    SmallVector<StringRef> iteratorTypes(padOp.getResultType().getRank(),
+                                         getParallelIteratorTypeName());
+    return iteratorTypes;
+  }
+
+  SmallVector<Range> getIterationDomain(Operation *op, OpBuilder &b) const {
+    ReifiedRankedShapedTypeDims reifiedShapes;
+    ReifyRankedShapedTypeOpInterface reifyShapedTypeInterface =
+        dyn_cast<ReifyRankedShapedTypeOpInterface>(op);
+    (void)reifyShapedTypeInterface.reifyResultShapes(b, reifiedShapes);
+
+    Location loc = op->getLoc();
+    Value zero = b.create<arith::ConstantIndexOp>(loc, 0);
+    Value one = b.create<arith::ConstantIndexOp>(loc, 1);
+    // Initialize all the ranges to {zero, one, one}. All the `ub`s are
+    // overwritten.
+    SmallVector<Range> loopRanges(reifiedShapes[0].size(), {zero, one, one});
+    for (const auto &ub : enumerate(reifiedShapes[0]))
+      loopRanges[ub.index()].size = ub.value();
+    return loopRanges;
+  }
+
+  SmallVector<Operation *>
+  getTiledImplementation(Operation *op, OpBuilder &b, ValueRange dest,
+                         ArrayRef<OpFoldResult> offsets,
+                         ArrayRef<OpFoldResult> sizes,
+                         bool /*tileDestOperands*/) const {
+    auto padOp = cast<PadOp>(op);
+    // Only constant padding value supported.
+    Value padValue = padOp.getConstantPaddingValue();
+    if (!padValue)
+      return {};
+
+    // Helper variables and functions for various arithmetic operations. These
+    // are used extensively for computing new offset/length and padding values.
+    Location loc = op->getLoc();
+    AffineExpr dim0, dim1;
+    bindDims(b.getContext(), dim0, dim1);
+    // Add two integers.
+    auto addMap = AffineMap::get(2, 0, {dim0 + dim1});
+    auto add = [&](Value v1, Value v2) {
+      return b.createOrFold<AffineApplyOp>(loc, addMap, ValueRange{v1, v2});
+    };
+    // Subtract two integers.
+    auto subMap = AffineMap::get(2, 0, {dim0 - dim1});
+    auto sub = [&](Value v1, Value v2) {
+      return b.createOrFold<AffineApplyOp>(loc, subMap, ValueRange{v1, v2});
+    };
+    // Take the minimum of two integers.
+    auto idMap = AffineMap::getMultiDimIdentityMap(2, b.getContext());
+    auto min = [&](Value v1, Value v2) {
+      return b.createOrFold<AffineMinOp>(loc, idMap, ValueRange{v1, v2});
+    };
+    // Take the maximum of two integers.
+    auto max = [&](Value v1, Value v2) {
+      return b.createOrFold<AffineMaxOp>(loc, idMap, ValueRange{v1, v2});
+    };
+    // Zero index-typed integer.
+    auto zero = b.create<arith::ConstantIndexOp>(loc, 0);
+
+    // Helper function for filling static/dynamic low/high padding indices
+    // vectors of PadOp.
+    auto appendIndex = [&](Value val, SmallVector<Value> &dynIndices,
+                           SmallVector<int64_t> &staticIndices) {
+      if (auto constInt = getConstantIntValue(val)) {
+        staticIndices.push_back(*constInt);
+      } else {
+        staticIndices.push_back(ShapedType::kDynamicSize);
+        dynIndices.push_back(val);
+      }
+    };
+
+    // Compute new offsets, lengths, low padding, high padding.
+    SmallVector<OpFoldResult> newOffsets, newLengths, newStrides;
+    SmallVector<Value> newLows, newHighs;
+    SmallVector<int64_t> staticNewLows, staticNewHighs;
+    // Set to true if the original data source is not read at all.
+    bool hasZeroLen = false;
+    // Same as hasZeroLen, but for dynamic dimension sizes. This condition
+    // is true if the original data source turns out to be unused at runtime.
+    Value dynHasZeroLenCond;
+
+    int64_t rank = padOp.getSourceType().getRank();
+    for (unsigned dim = 0; dim < rank; ++dim) {
+      auto low =
+          getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedLowPad()[dim]);
+      bool hasLowPad = getConstantIntValue(low) != static_cast<int64_t>(0);
+      auto high =
+          getValueOrCreateConstantIndexOp(b, loc, padOp.getMixedHighPad()[dim]);
+      bool hasHighPad = getConstantIntValue(high) != static_cast<int64_t>(0);
+      auto offset = getValueOrCreateConstantIndexOp(b, loc, offsets[dim]);
+      auto length = getValueOrCreateConstantIndexOp(b, loc, sizes[dim]);
+      auto srcSize = b.createOrFold<tensor::DimOp>(loc, padOp.source(), dim);
+
+      // The new amount of low padding is `low - offset`. Except for the case
+      // where none of the low padding is read. In that case, the new amount of
+      // low padding is zero.
+      //
+      // Optimization: If low = 0, then newLow = 0.
+      Value newLow = hasLowPad ? max(zero, sub(low, offset)) : zero;
+      appendIndex(newLow, newLows, staticNewLows);
+
+      // Start reading the data from position `offset - low`. Since the original
+      // read may have started in the low padding zone, this value could be
+      // negative. Therefore, start reading from:
+      //
+      // max(offset - low, 0)
+      //
+      // The original read could also have started in the high padding zone.
+      // In that case, set the offset to the end of source tensor. The new
+      // ExtractSliceOp length will be zero in that case. (Effectively reading
+      // no data from the source.)
+      //
+      // Optimization: If low = 0, then the formula can be simplified.
+      Value newOffset = hasLowPad ? min(max(sub(offset, low), zero), srcSize)
+                                  : min(offset, srcSize);
+      newOffsets.push_back(getAsOpFoldResult(newOffset));
+
+      // The original ExtractSliceOp was reading until position `offset +
+      // length`. Therefore, the corresponding position within the source tensor
+      // is:
+      //
+      // offset + length - low
+      //
+      // In case the original ExtractSliceOp stopped reading within the low
+      // padding zone, this value can be negative. In that case, the end
+      // position of the read should be zero. (Similar to newOffset.)
+      //
+      // The original read could also have stopped in the high padding zone.
+      // In that case, set the end positition of the read should be the end of
+      // the source tensor. (Similar to newOffset.)
+      //
+      // endLoc = min(max(offset - low + length, 0), srcSize)
+      //
+      // The new ExtractSliceOp length is `endLoc - newOffset`.
+      //
+      // Optimization: If low = 0, then the formula can be simplified.
+      Value endLoc =
+          hasLowPad ? min(max(add(sub(offset, low), length), zero), srcSize)
+                    : min(add(offset, length), srcSize);
+      Value newLength = sub(endLoc, newOffset);
+      newLengths.push_back(getAsOpFoldResult(newLength));
+
+      // Check if newLength is zero. In that case, no SubTensorOp should be
+      // executed.
+      if (auto newLengthInt = getConstantIntValue(newLength)) {
+        hasZeroLen |= *newLengthInt == 0;
+      } else {
+        Value check = b.create<arith::CmpIOp>(loc, arith::CmpIPredicate::eq,
+                                              newLength, zero);
+        dynHasZeroLenCond =
+            dynHasZeroLenCond
+                ? b.create<arith::OrIOp>(loc, check, dynHasZeroLenCond)
+                : check;
+      }
+
+      // The amount of high padding is simply the number of elements remaining,
+      // so that the result has the same length as the original ExtractSliceOp.
+      // As an optimization, if the original high padding is zero, then the new
+      // high padding must also be zero.
+      Value newHigh = hasHighPad ? sub(sub(length, newLength), newLow) : zero;
+      appendIndex(newHigh, newHighs, staticNewHighs);
+
+      // Only unit stride supported.
+      newStrides.push_back(b.getIndexAttr(1));
+    }
+
+    // The shape of the result can be obtained from the sizes passed in.
+    SmallVector<Value> dynDims;
+    SmallVector<int64_t> shape;
+    dispatchIndexOpFoldResults(sizes, dynDims, shape, ShapedType::kDynamicSize);
+    RankedTensorType resultType =
+        RankedTensorType::get(shape, padOp.getResultType().getElementType());
+
+    // Insert cast to ensure that types match. (May be folded away.)
+    auto castResult = [&](Value val) -> Operation * {
+      auto castOp = b.create<tensor::CastOp>(loc, resultType, val);
+      return castOp;
+    };
+
+    // In cases where the original data source is unused: Emit a GenerateOp and
+    // do not generate a SliceOp. (The result shape of the SliceOp would
+    // have a dimension of size 0, the semantics of which is unclear.)
+    auto createGenerateOp = [&]() {
+      // Create GenerateOp.
+      auto generateOp = b.create<tensor::GenerateOp>(
+          loc, resultType, dynDims,
+          [&](OpBuilder &builder, Location gLoc, ValueRange indices) {
+            builder.create<tensor::YieldOp>(gLoc, padValue);
+          });
+      return castResult(generateOp);
+    };
+
+    // Emit a SliceOp and a PadOp. Should not be used in cases where
+    // the result shape of the new SliceOp has a zero dimension.
+    auto createPadTensorOfSubTensor = [&]() {
+      // Create pad_tensor(subtensor(x)).
+      auto newSliceOp = b.create<tensor::ExtractSliceOp>(
+          loc, padOp.source(), newOffsets, newLengths, newStrides);
+      auto newPadOp = b.create<PadOp>(loc, newSliceOp, staticNewLows,
+                                      staticNewHighs, newLows, newHighs);
+
+      // Copy region to new PadOp.
+      BlockAndValueMapping bvm;
+      padOp.region().cloneInto(&newPadOp.getRegion(), bvm);
+
+      // Cast result and return.
+      return castResult(newPadOp);
+    };
+
+    // Rewrite subtensor(pad_tensor(x)) into a GenerateOp it is statically known
+    // that the original data source x is not used.
+    if (hasZeroLen)
+      return {createGenerateOp()};
+
+    // If there are dynamic dimensions: Generate an scf.if check to avoid
+    // creating SliceOps with result dimensions of size 0 at runtime.
+    if (dynHasZeroLenCond) {
+      auto result = b.create<scf::IfOp>(
+          loc, resultType, dynHasZeroLenCond,
+          /*thenBuilder=*/
+          [&](OpBuilder &b, Location loc) {
+            b.create<scf::YieldOp>(loc, createGenerateOp()->getResult(0));
+          },
+          /*elseBuilder=*/
+          [&](OpBuilder &b, Location loc) {
+            b.create<scf::YieldOp>(loc,
+                                   createPadTensorOfSubTensor()->getResult(0));
+          });
+      return {result};
+    }
+    return {createPadTensorOfSubTensor()};
+  }
+};
+
+} // namespace
+
+void mlir::tensor::registerTilingOpInterfaceExternalModels(
+    DialectRegistry &registry) {
+  registry.addOpInterface<tensor::PadOp, PadOpTiling>();
+}
diff --git a/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt
new file mode 100644
index 0000000000000..19a00b5bc6eb9
--- /dev/null
+++ b/mlir/lib/Dialect/Tensor/Utils/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_mlir_dialect_library(MLIRTensorUtils
+  Utils.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Tensor
+
+  LINK_LIBS PUBLIC
+  MLIRAffine
+  MLIRArithmetic
+  MLIRIR
+  MLIRTensor
+)
diff --git a/mlir/lib/Dialect/Tensor/Utils/Utils.cpp b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
new file mode 100644
index 0000000000000..c7054cf50d060
--- /dev/null
+++ b/mlir/lib/Dialect/Tensor/Utils/Utils.cpp
@@ -0,0 +1,54 @@
+//===- Utils.cpp - Utilities to support the Tensor dialect ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for the Tensor dialect.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/Tensor/Utils/Utils.h"
+
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+
+using namespace mlir;
+using namespace mlir::tensor;
+
+PadOp mlir::tensor::createPadScalarOp(Type type, Value source, Value pad,
+                                      ArrayRef<OpFoldResult> low,
+                                      ArrayRef<OpFoldResult> high, bool nofold,
+                                      Location loc, OpBuilder &builder) {
+  auto padTensorOp =
+      builder.create<PadOp>(loc, type, source, low, high, nofold);
+  int rank = padTensorOp.getResultType().getRank();
+  SmallVector<Type, 4> blockArgTypes(rank, builder.getIndexType());
+  SmallVector<Location, 4> blockArgLocs(rank, loc);
+  auto &region = padTensorOp.region();
+  // `builder.createBlock` changes the insertion point within the block. Create
+  // a guard to reset the insertion point of the builder after it is destroyed.
+  OpBuilder::InsertionGuard guard(builder);
+  builder.createBlock(&region, region.end(), blockArgTypes, blockArgLocs);
+  builder.create<YieldOp>(loc, pad);
+  return padTensorOp;
+}
+
+PadOp mlir::tensor::createPadHighOp(Type type, Value source, Value pad,
+                                    bool nofold, Location loc, OpBuilder &b) {
+  SmallVector<OpFoldResult, 4> low, high;
+  auto rankedTensorType = type.cast<RankedTensorType>();
+  assert(rankedTensorType.hasStaticShape());
+  for (const auto &en : enumerate(rankedTensorType.getShape())) {
+    AffineExpr d0;
+    bindDims(b.getContext(), d0);
+    auto dimOp = b.createOrFold<tensor::DimOp>(loc, source, en.index());
+    Value paddingWidth =
+        makeComposedAffineApply(b, loc, en.value() - d0, {dimOp});
+    high.push_back(paddingWidth);
+    low.push_back(b.createOrFold<arith::ConstantIndexOp>(loc, 0));
+  }
+  return createPadScalarOp(type, source, pad, low, high, nofold, loc, b);
+}
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
index cac5cb5d7eb22..bd08b1ae2be4d 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-named.mlir
@@ -153,8 +153,8 @@ func @max_pool(%arg0: tensor<1x6x34x62xf32>) -> () {
 // CHECK-LABEL: @max_pool_padded
 func @max_pool_padded(%arg0: tensor<1x6x34x62xf32>) -> () {
   // CHECK-DAG: [[CONST:%.+]] = arith.constant -3.40282347E+38 : f32
-  // CHECK-DAG: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0]
-  // CHECK-DAG:   linalg.yield [[CONST]]
+  // CHECK-DAG: [[PAD:%.+]] = tensor.pad %arg0 low[0, 0, 0, 0] high[0, 0, 1, 0]
+  // CHECK-DAG:   tensor.yield [[CONST]]
   // CHECK-DAG: [[INITVAL:%.+]] = arith.constant -3.40282347E+38 : f32
   // CHECK-DAG: [[INIT:%.+]] = linalg.init_tensor [1, 4, 33, 62]
   // CHECK-DAG: [[FILL:%.+]] = linalg.fill([[INITVAL]], [[INIT]])
@@ -206,7 +206,7 @@ func @max_pool_i32(%arg0: tensor<1x6x34x62xi32>) -> () {
 func @avg_pool(%arg0: tensor<1x6x34x62xf32>) -> (tensor<1x5x33x62xf32>) {
   // Initial piece computes the sum of the pooling region, with appropriate padding.
   // CHECK: [[CONST:%.+]] = arith.constant 0
-  // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
+  // CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
   // CHECK: [[CONST:%.+]] = arith.constant 0
   // CHECK: [[POOLINIT:%.+]] = linalg.init_tensor [1, 5, 33, 62]
   // CHECK: [[FILL:%.+]] = linalg.fill([[CONST]], [[POOLINIT]])
@@ -268,7 +268,7 @@ func @avg_pool_dyn(%arg0: tensor<?x6x34x62xf32>) -> (tensor<?x5x33x62xf32>) {
   // The calculations remain the same as above, only testing for dyn behavior
   // CHECK: %[[C0:.+]] = arith.constant 0
   // CHECK: %[[BATCH:.+]] = tensor.dim %arg0, %[[C0]]
-  // CHECK: %[[PAD:.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
+  // CHECK: %[[PAD:.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
   // CHECK: %[[POOLINIT:.+]] = linalg.init_tensor [%[[BATCH]], 5, 33, 62]
   // CHECK: %[[FILL:.+]] = linalg.fill
   // CHECK: %[[KERNEL:.+]] = linalg.init_tensor [4, 4]
@@ -386,8 +386,8 @@ func @conv2d_dyn(%input: tensor<?x49x42x27xf32>, %weights: tensor<28x3x3x27xf32>
 // CHECK-LABEL: @conv2d_padded_f32
 func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x28xf32>, %bias: tensor<28xf32>) -> () {
   // CHECK: %[[C0:.+]] = arith.constant 0
-  // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
-  // CHECK:   linalg.yield %[[C0]]
+  // CHECK: tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
+  // CHECK:   tensor.yield %[[C0]]
   // CHECK: linalg.conv_2d_nhwc_hwcf
   %0 = "tosa.conv2d"(%input, %weights, %bias) {pad = [1, 1, 1, 1], stride = [1, 1], dilation = [2, 1]} : (tensor<1x47x40x28xf32>, tensor<28x3x3x28xf32>, tensor<28xf32>)  -> (tensor<1x45x40x28xf32>)
   return
@@ -398,8 +398,8 @@ func @conv2d_padded_f32(%input: tensor<1x47x40x28xf32>, %weights: tensor<28x3x3x
 // CHECK-LABEL: @conv2d_quant
 func @conv2d_quant(%arg0 : tensor<1x12x12x1xi8>, %arg1 : tensor<1024x3x3x1xi8>, %arg2 : tensor<1024xi32>) -> () {
   // CHECK:   %[[C22:.+]] = arith.constant -22
-  // CHECK: linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
-  // CHECK:   linalg.yield %[[C22]]
+  // CHECK: tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
+  // CHECK:   tensor.yield %[[C22]]
   // CHECK: linalg.conv_2d_nhwc_hwcf_q
   %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = [1, 1], pad = [1, 1, 1, 1], quantization_info = {input_zp = -22 : i32, weight_zp = 42 : i32}, stride = [1, 1]} : (tensor<1x12x12x1xi8>, tensor<1024x3x3x1xi8>, tensor<1024xi32>) -> tensor<1x12x12x1024xi32>
   return
@@ -481,8 +481,8 @@ func @depthwise_conv_strides(%arg0 : tensor<1x11x9x3xf32>, %arg1 : tensor<3x1x3x
 // CHECK-LABEL: @depthwise_conv_quant
 func @depthwise_conv_quant(%arg0 : tensor<1x12x12x4xi8>, %arg1 : tensor<3x3x4x128xi8>, %arg2 : tensor<512xi32>) -> () {
   // CHECK: [[PADV:%.+]] = arith.constant -128
-  // CHECK: [[PAD:%.+]] = linalg.pad_tensor %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
-  // CHECK:   linalg.yield [[PADV]]
+  // CHECK: [[PAD:%.+]] = tensor.pad %arg0 low[0, 1, 1, 0] high[0, 1, 1, 0]
+  // CHECK:   tensor.yield [[PADV]]
 
   // CHECK: [[INIT:%.+]] = linalg.init_tensor [1, 12, 12, 4, 128]
   // CHECK: [[CST0:%.+]] = arith.constant 0
diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
index 452c04b3489cd..55b8bce54b1a2 100644
--- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
+++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg.mlir
@@ -1158,9 +1158,9 @@ func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {
   // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index
   // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index
   // CHECK-DAG: [[CST:%.+]] = arith.constant 0.000000e+00 : f32
-  // CHECK: linalg.pad_tensor %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
+  // CHECK: tensor.pad %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
   // CHECK: ^bb0(%arg1: index, %arg2: index):  
-  // CHECK:   linalg.yield [[CST]]
+  // CHECK:   tensor.yield [[CST]]
   // CHECK: } : tensor<1x2xf32> to tensor<4x9xf32>
   %1 = "tosa.pad"(%arg0, %0)  : (tensor<1x2xf32>, tensor<2x2xi32>)  -> (tensor<4x9xf32>)
   return %1 : tensor<4x9xf32>
@@ -1169,8 +1169,8 @@ func @pad_float(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {
 func @pad_int(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) {
   %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32>
   // CHECK: [[CST:%.+]] = arith.constant 0 : i32
-  // CHECK: linalg.pad_tensor
-  // CHECK:   linalg.yield [[CST]]
+  // CHECK: tensor.pad
+  // CHECK:   tensor.yield [[CST]]
   %1 = "tosa.pad"(%arg0, %0)  : (tensor<1x2xi32>, tensor<2x2xi32>)  -> (tensor<4x9xi32>)
   return %1 : tensor<4x9xi32>
 }
@@ -1178,8 +1178,8 @@ func @pad_int(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) {
 func @pad_quant(%arg0 : tensor<1x2xi32>) -> (tensor<4x9xi32>) {
   %0 = arith.constant dense<[[1, 2], [3, 4]]> : tensor<2x2xi32>
   // CHECK: [[CST:%.+]] = arith.constant 42 : i32
-  // CHECK: linalg.pad_tensor
-  // CHECK:   linalg.yield [[CST]]
+  // CHECK: tensor.pad
+  // CHECK:   tensor.yield [[CST]]
   %1 = "tosa.pad"(%arg0, %0) { quantization_info = { input_zp = 42 : i32}} : (tensor<1x2xi32>, tensor<2x2xi32>)  -> (tensor<4x9xi32>)
   return %1 : tensor<4x9xi32>
 }
@@ -1194,9 +1194,9 @@ func @pad_float_explicit(%arg0 : tensor<1x2xf32>) -> (tensor<4x9xf32>) {
   // CHECK-DAG: [[INDEX3:%.+]] = arith.constant 3 : index
   // CHECK-DAG: [[INDEX4:%.+]] = arith.constant 4 : index
   // CHECK-DAG: [[CST:%.+]] = arith.constant 4.200000e+01 : f32
-  // CHECK: linalg.pad_tensor %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
+  // CHECK: tensor.pad %arg0 low{{\[}}%{{.*}}, [[INDEX3]]] high{{\[}}[[INDEX2]], [[INDEX4]]]  {
   // CHECK: ^bb0(%arg1: index, %arg2: index):  
-  // CHECK:   linalg.yield [[CST]]
+  // CHECK:   tensor.yield [[CST]]
   // CHECK: } : tensor<1x2xf32> to tensor<4x9xf32>
   %1 = arith.constant dense<42.0> : tensor<f32>
   %2 = "tosa.pad"(%arg0, %0, %1)  : (tensor<1x2xf32>, tensor<2x2xi32>, tensor<f32>)  -> (tensor<4x9xf32>)
diff --git a/mlir/test/Dialect/Linalg/bufferize.mlir b/mlir/test/Dialect/Linalg/bufferize.mlir
index 45e722d9f74b6..80bd5f8363e3e 100644
--- a/mlir/test/Dialect/Linalg/bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/bufferize.mlir
@@ -277,9 +277,9 @@ func @bufferize_tensor_collapse_shape(%arg0: tensor<4x5xf32>) -> tensor<20xf32>
 func @pad_tensor_dynamic_shape(%arg0: tensor<4x?x2x?xf32>, %arg1: index) -> tensor<4x?x?x?xf32> {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
-  %out = linalg.pad_tensor %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1]  {
+  %out = tensor.pad %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1]  {
   ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index):  
-    linalg.yield %cst : f32
+    tensor.yield %cst : f32
   } : tensor<4x?x2x?xf32> to tensor<4x?x?x?xf32>
   return %out : tensor<4x?x?x?xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index 4d844b3035261..44cb18f11d152 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -282,7 +282,7 @@ func @fold_init_tensor_with_slice
 //   CHECK-NOT:   linalg.fill
 //   CHECK-NOT:   linalg.matmul
 //   CHECK-NOT:   linalg.generic
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //       CHECK:   return
 func @dead_linalg_tensor(%arg0 : tensor<7x7xi32>, %arg1 : tensor<7x7xf32>,
                          %arg2: tensor<?x?xf32>, %high : index) {
@@ -296,146 +296,15 @@ func @dead_linalg_tensor(%arg0 : tensor<7x7xi32>, %arg1 : tensor<7x7xf32>,
   ^bb(%3: i32) :
     linalg.yield %3 : i32
   } -> tensor<7x7xi32>
-  %3 = linalg.pad_tensor %arg2 low[%c0, %c0] high[%high, %high] {
-        ^bb0(%arg9: index, %arg10: index):  
-          linalg.yield %cst : f32
+  %3 = tensor.pad %arg2 low[%c0, %c0] high[%high, %high] {
+        ^bb0(%arg9: index, %arg10: index):
+          tensor.yield %cst : f32
   } : tensor<?x?xf32> to tensor<2x4xf32>
   return
 }
 
 // -----
 
-// CHECK-LABEL: func @pad_tensor_same_static_shape(
-//  CHECK-SAME:   %[[ARG0:.*]]: tensor<5x6xf32>
-//   CHECK-NOT:   linalg.pad_tensor
-//       CHECK:   return %[[ARG0]]
-func @pad_tensor_same_static_shape(%arg0: tensor<5x6xf32>, %a: index)
-    -> tensor<5x6xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = linalg.pad_tensor %arg0 low[%a, 0] high[0, %a] {
-        ^bb0(%arg1: index, %arg2: index):
-          linalg.yield %cst : f32
-  } : tensor<5x6xf32> to tensor<5x6xf32>
-  return %0 : tensor<5x6xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @pad_tensor_nofold_same_static_shape(
-//  CHECK-SAME:   %[[ARG0:.*]]: tensor<5x6xf32>
-//       CHECK:   %[[PAD:.*]] = linalg.pad_tensor
-//       CHECK:   return %[[PAD]]
-func @pad_tensor_nofold_same_static_shape(%arg0: tensor<5x6xf32>, %a: index)
-    -> tensor<5x6xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = linalg.pad_tensor %arg0 nofold low[%a, 0] high[0, %a] {
-        ^bb0(%arg1: index, %arg2: index):
-          linalg.yield %cst : f32
-  } : tensor<5x6xf32> to tensor<5x6xf32>
-  return %0 : tensor<5x6xf32>
-}
-
-// -----
-
-// CHECK-LABEL:   func @pad_tensor_after_cast_different_shape(
-// CHECK-SAME:      %[[INPUT:.*]]: tensor<?x64x?x?xf32>) -> tensor<?x?x?x?xf32> {
-// CHECK:           %[[CST:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK:           %[[PADDED:.*]] = linalg.pad_tensor %[[INPUT]]
-// CHECK-SAME:        low[0, 0, 1, 1] high[0, 0, 1, 1]  {
-// CHECK:           ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index):
-// CHECK:             linalg.yield %[[CST]] : f32
-// CHECK:           } : tensor<?x64x?x?xf32> to tensor<?x64x?x?xf32>
-// CHECK:           %[[DYNAMIC:.*]] = tensor.cast %[[PADDED:.*]] :
-// CHECK-SAME:         tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
-// CHECK:           return %[[DYNAMIC]] : tensor<?x?x?x?xf32>
-// CHECK:         }
-func @pad_tensor_after_cast_different_shape(%arg0: tensor<?x64x?x?xf32>)
-    -> tensor<?x?x?x?xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-  %dynamic = tensor.cast %arg0 : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
-  %padded = linalg.pad_tensor %dynamic low[0, 0, 1, 1] high[0, 0, 1, 1]  {
-    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  
-    linalg.yield %cst: f32
-  } : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
-  return %padded: tensor<?x?x?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL:   func @pad_tensor_after_cast_same_shape(
-// CHECK-SAME:      %[[INPUT:.*]]: tensor<?x64x?x?xf32>,
-// CHECK-SAME:      %[[PADDING:.*]]: index) -> tensor<?x?x?x?xf32> {
-// CHECK:           %[[CST:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK:           %[[PADDED:.*]] = linalg.pad_tensor %[[INPUT]]
-// CHECK-SAME:        low[0, %[[PADDING]], 1, 1] high[0, %[[PADDING]], 1, 1]  {
-// CHECK:           ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index):
-// CHECK:             linalg.yield %[[CST]] : f32
-// CHECK:           } : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
-// CHECK:           return %[[PADDED:.*]] : tensor<?x?x?x?xf32>
-// CHECK:         }
-func @pad_tensor_after_cast_same_shape(%arg0: tensor<?x64x?x?xf32>, %padding : index)
-    -> tensor<?x?x?x?xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-  %dynamic = tensor.cast %arg0 : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
-  %padded = linalg.pad_tensor %dynamic low[0, %padding, 1, 1] high[0, %padding, 1, 1]  {
-    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  
-    linalg.yield %cst: f32
-  } : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
-  return %padded: tensor<?x?x?x?xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @pad_tensor_of_cast(
-// CHECK-NOT:     tensor.cast
-// CHECK:         linalg.pad_tensor
-// CHECK:         tensor<8x?xf32> to tensor<8x32xf32>
-func @pad_tensor_of_cast(%t: tensor<8x?xf32>, %s: index) -> tensor<8x32xf32> {
-  %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.000000e+00 : f32
-  %0 = tensor.cast %t : tensor<8x?xf32> to tensor<?x?xf32>
-  %1 = linalg.pad_tensor %0 low[%c0, %c0] high[%c0, %s]  {
-  ^bb0(%arg9: index, %arg10: index):  
-    linalg.yield %cst : f32
-  } : tensor<?x?xf32> to tensor<8x32xf32>
-  return %1 : tensor<8x32xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @cast_of_pad_more_static
-func @cast_of_pad_more_static(%arg0: tensor<?x?xf32>, %padding: index) -> tensor<32x32xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-  // CHECK: %[[PAD:.*]] = linalg.pad_tensor
-  // CHECK: tensor<?x?xf32> to tensor<32x32xf32>
-  %padded = linalg.pad_tensor %arg0 low[%padding, %padding] high[0, 0] {
-  ^bb0(%arg1: index, %arg2: index):
-    linalg.yield %cst : f32
-  } : tensor<?x?xf32> to tensor<?x?xf32>
-  // CHECK-NOT: tensor.cast
-  %casted = tensor.cast %padded : tensor<?x?xf32> to tensor<32x32xf32>
-  // CHECK: return %[[PAD]]
-  return %casted : tensor<32x32xf32>
-}
-
-// -----
-
-// CHECK-LABEL: @cast_of_pad_less_static
-func @cast_of_pad_less_static(%arg0: tensor<32x?x?xf32>, %padding: index) -> tensor<?x32x32xf32> {
-  %cst = arith.constant 0.000000e+00 : f32
-  // CHECK: linalg.pad_tensor
-  %padded = linalg.pad_tensor %arg0 low[%padding, %padding, %padding] high[0, 0, 0] {
-  ^bb0(%arg1: index, %arg2: index, %arg3: index):
-    linalg.yield %cst : f32
-  } : tensor<32x?x?xf32> to tensor<32x?x?xf32>
-  // CHECK: %[[CAST:.*]] = tensor.cast
-  %casted = tensor.cast %padded : tensor<32x?x?xf32> to tensor<?x32x32xf32>
-  // CHECK: return %[[CAST]]
-  return %casted : tensor<?x32x32xf32>
-}
-
-// -----
-
 func @propogate_casts(%arg0 : tensor<?x?xf32>, %arg1 : f32, %arg2 : index,
     %arg3 : index) -> tensor<?x?xf32> {
   %c0 = arith.constant 0 : index
@@ -579,71 +448,6 @@ func @fold_tiled_loop_inputs(%A: memref<192xf32>, %A_tensor: tensor<192xf32>,
 
 // -----
 
-func @tensor_pad_cast_fold(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
-  %c0 = arith.constant 0 : index
-  %cst = arith.constant 0.0 : f32
-  %0 = tensor.cast %arg0 : tensor<4x4xf32> to tensor<?x?xf32>
-  %1 = linalg.pad_tensor %0 low[%c0, %c0] high[%c0, %c0]  {
-    ^bb0(%arg1: index, %arg2: index):  
-      linalg.yield %cst : f32
-  } : tensor<?x?xf32> to tensor<4x4xf32>
-  return %1 : tensor<4x4xf32>
-}
-// CHECK-LABEL: @tensor_pad_cast
-// CHECK-SAME: %[[ARG0:.+]]: tensor<4x4xf32>
-// CHECK: return %[[ARG0]]
-
-// -----
-
-// CHECK-LABEL: func @fold_pad_tensor_source_cast(
-//  CHECK-SAME:                  %[[ARG0:.*]]: tensor<4x?xf32>
-//   CHECK-NOT:   tensor.cast
-//       CHECK:   %[[RESULT:.*]] = linalg.pad_tensor %[[ARG0]]
-func @fold_pad_tensor_source_cast(%arg0: tensor<4x?xf32>) -> tensor<4x4xf32> {
-  %cst = arith.constant 0.0 : f32
-  %0 = tensor.cast %arg0 : tensor<4x?xf32> to tensor<?x?xf32>
-  %1 = linalg.pad_tensor %0 low[0, 0] high[0, 1]  {
-    ^bb0(%arg1: index, %arg2: index):  
-      linalg.yield %cst : f32
-  } : tensor<?x?xf32> to tensor<4x4xf32>
-  return %1 : tensor<4x4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @pad_static_zero_cast(
-//  CHECK-SAME:                  %[[ARG0:.*]]: tensor<?x?x?xf32>
-//   CHECK-NOT:   linalg.pad_tensor
-//       CHECK:   %[[RESULT:.*]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
-//       CHECK:   return %[[RESULT]]
-func @pad_static_zero_cast(%arg0: tensor<?x?x?xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
-  %c0 = arith.constant 0 : index
-  %0 = linalg.pad_tensor %arg0 low[0, %c0, 0] high[0, 0, %c0] {
-    ^bb0(%arg1: index, %arg2: index, %arg3: index):
-      linalg.yield %pad_value : f32
-    } : tensor<?x?x?xf32> to tensor<2x3x4xf32>
-
-  return %0 : tensor<2x3x4xf32>
-}
-
-// -----
-
-// CHECK-LABEL: func @pad_nofold_static_zero(
-//  CHECK-SAME:                  %[[ARG0:.*]]: tensor<?x?x?xf32>
-//       CHECK:   %[[PAD:.*]] = linalg.pad_tensor
-//       CHECK:   return %[[PAD]]
-func @pad_nofold_static_zero(%arg0: tensor<?x?x?xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
-  %c0 = arith.constant 0 : index
-  %0 = linalg.pad_tensor %arg0 nofold low[0, %c0, 0] high[0, 0, %c0] {
-    ^bb0(%arg1: index, %arg2: index, %arg3: index):
-      linalg.yield %pad_value : f32
-    } : tensor<?x?x?xf32> to tensor<2x3x4xf32>
-
-  return %0 : tensor<2x3x4xf32>
-}
-
-// -----
-
 func private @some_use(%i : index, %j : index)
 
 // CHECK-LABEL: func @init_canonicalize
diff --git a/mlir/test/Dialect/Linalg/codegen-strategy.mlir b/mlir/test/Dialect/Linalg/codegen-strategy.mlir
index 7119db8f0ccd1..d698d63758e4c 100644
--- a/mlir/test/Dialect/Linalg/codegen-strategy.mlir
+++ b/mlir/test/Dialect/Linalg/codegen-strategy.mlir
@@ -48,7 +48,7 @@ func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<7
 func @matmul(%arg0: tensor<72x72xf32>, %arg1: tensor<72x72xf32>, %arg2: tensor<72x72xf32>) -> tensor<72x72xf32> {
 
   // Check the padding of the input operands has been hoisted out of the tile loop nest.
-  //      CHECK-PAD-COUNT=2: linalg.pad_tensor %{{.*}} nofold
+  //      CHECK-PAD-COUNT=2: tensor.pad %{{.*}} nofold
   //              CHECK-PAD: scf.for
   // Check CSE eliminates the duplicate min operations introduced by tiling.
   //              CHECK-PAD: affine.min #[[MAP0]]
diff --git a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
index 9dd1c1e1ef967..8b5e2a313d5a5 100644
--- a/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/generalize-pad-tensor.mlir
@@ -9,9 +9,9 @@
 // CHECK:           return %[[PADDED]] : tensor<1x32x32x1xf32>
 func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
   %cst = arith.constant 0.000000e+00 : f32
-  %0 = linalg.pad_tensor %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0]  {
+  %0 = tensor.pad %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0]  {
   ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  
-    linalg.yield %cst : f32
+    tensor.yield %cst : f32
   } : tensor<1x28x28x1xf32> to tensor<1x32x32x1xf32>
   return %0 : tensor<1x32x32x1xf32>
 }
@@ -38,9 +38,9 @@ func @generalize_pad_tensor_static_shape(%arg0: tensor<1x28x28x1xf32>) -> tensor
 func @generalize_pad_tensor_dynamic_shape(%arg0: tensor<4x?x2x?xf32>, %arg1: index) -> tensor<4x?x?x?xf32> {
   %c0 = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
-  %out = linalg.pad_tensor %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1]  {
+  %out = tensor.pad %arg0 low[%c0, %c0, %arg1, %c0] high[%c0, %c0, %c0, %arg1]  {
   ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index, %gen_arg4: index):  
-    linalg.yield %cst : f32
+    tensor.yield %cst : f32
   } : tensor<4x?x2x?xf32> to tensor<4x?x?x?xf32>
   return %out : tensor<4x?x?x?xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir
index 566abcfbc39ec..416dfe37e93d0 100644
--- a/mlir/test/Dialect/Linalg/hoist-padding.mlir
+++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir
@@ -18,7 +18,7 @@ func @static_size_divisible(%arg0: tensor<24x12xf32>,
   //      MATVEC:  %[[T0:.*]] = scf.for %[[PIV0:[0-9a-z]+]] =
   //        MATVEC:   %[[PIDX0:.*]] = affine.apply #[[DIV4]](%[[PIV0]])
   //        MATVEC:   %[[T1:.*]] = tensor.extract_slice %[[ARG1]][%[[PIV0]]] [4]
-  //        MATVEC:   %[[T2:.*]] = linalg.pad_tensor %[[T1]]
+  //        MATVEC:   %[[T2:.*]] = tensor.pad %[[T1]]
   //        MATVEC:   %[[T3:.*]] = tensor.insert_slice %[[T1:.*]]{{.*}}[%[[PIDX0]]
 
   //      MATVEC:  scf.for %[[IV0:[0-9a-zA-Z]*]] =
@@ -29,9 +29,9 @@ func @static_size_divisible(%arg0: tensor<24x12xf32>,
     //  MATVEC-DAG:   %[[IDX0:.*]] = affine.apply #[[DIV4]](%[[IV0]])
     //  MATVEC-DAG:   %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]]
     %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
-    %3 = linalg.pad_tensor %2 nofold low[%c0] high[%c0]  {
+    %3 = tensor.pad %2 nofold low[%c0] high[%c0]  {
     ^bb0(%arg5: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
     } : tensor<4xf32> to tensor<4xf32>
 
     // Check matvec uses the packed input vector.
@@ -67,7 +67,7 @@ func @static_size_not_divisible(%arg0: tensor<24x12xf32>,
   //        MATVEC:   %[[TS0:.*]] = affine.min #[[MAP0]](%[[PIV0]])
   //        MATVEC:   %[[T1:.*]] = tensor.extract_slice %[[ARG1]][%[[PIV0]]] [%[[TS0]]]
   //        MATVEC:   %[[HPD0:.*]] = affine.apply #[[MAP1]](%[[TS0]])
-  //        MATVEC:   %[[T2:.*]] = linalg.pad_tensor %[[T1]]{{.*}}high[%[[HPD0]]
+  //        MATVEC:   %[[T2:.*]] = tensor.pad %[[T1]]{{.*}}high[%[[HPD0]]
   //        MATVEC:   %[[T3:.*]] = tensor.insert_slice %[[T1:.*]]{{.*}}[%[[PIDX0]]
 
   //      MATVEC:  scf.for %[[IV0:[0-9a-zA-Z]*]] =
@@ -80,13 +80,13 @@ func @static_size_not_divisible(%arg0: tensor<24x12xf32>,
     //  MATVEC-DAG:   %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]]
     %3 = tensor.extract_slice %arg1[%arg3] [%1] [1] : tensor<12xf32> to tensor<?xf32>
     %4 = affine.apply #map1(%1)
-    %5 = linalg.pad_tensor %2 low[%c0, %c0] high[%c0, %4]  {
+    %5 = tensor.pad %2 low[%c0, %c0] high[%c0, %4]  {
     ^bb0(%arg5: index, %arg6: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
     } : tensor<24x?xf32> to tensor<24x5xf32>
-    %6 = linalg.pad_tensor %3 low[%c0] high[%4]  {
+    %6 = tensor.pad %3 low[%c0] high[%4]  {
     ^bb0(%arg5: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
     } : tensor<?xf32> to tensor<5xf32>
 
     // Check matvec uses the packed input vector.
@@ -127,7 +127,7 @@ func @dynamic_size(%arg0: tensor<24x?xf32>,
   //        MATVEC:   %[[TS0:.*]] = affine.min #[[MAP0]](%[[PIV0]])[%[[D0]]]
   //        MATVEC:   %[[T1:.*]] = tensor.extract_slice %[[ARG1]][%[[PIV0]]] [%[[TS0]]]
   //        MATVEC:   %[[HPD0:.*]] = affine.apply #[[MAP1]](%[[TS0]])
-  //        MATVEC:   %[[T2:.*]] = linalg.pad_tensor %[[T1]]{{.*}}high[%[[HPD0]]
+  //        MATVEC:   %[[T2:.*]] = tensor.pad %[[T1]]{{.*}}high[%[[HPD0]]
   //        MATVEC:   %[[T3:.*]] = tensor.insert_slice %[[T1:.*]]{{.*}}[%[[PIDX0]]
 
   //      MATVEC:  scf.for %[[IV0:[0-9a-zA-Z]*]] =
@@ -140,13 +140,13 @@ func @dynamic_size(%arg0: tensor<24x?xf32>,
     //  MATVEC-DAG:   %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]]
     %4 = tensor.extract_slice %arg1[%arg3] [%2] [1] : tensor<?xf32> to tensor<?xf32>
     %5 = affine.apply #map1(%2)
-    %6 = linalg.pad_tensor %3 low[%c0, %c0] high[%c0, %5]  {
+    %6 = tensor.pad %3 low[%c0, %c0] high[%c0, %5]  {
     ^bb0(%arg5: index, %arg6: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
     } : tensor<24x?xf32> to tensor<24x4xf32>
-    %7 = linalg.pad_tensor %4 nofold low[%c0] high[%5]  {
+    %7 = tensor.pad %4 nofold low[%c0] high[%5]  {
     ^bb0(%arg5: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
     } : tensor<?xf32> to tensor<4xf32>
 
     // Check matvec uses the packed input vector.
@@ -174,13 +174,13 @@ func @non_constant_padding(%arg0: tensor<24x12xf32>,
 
     // Check the non constant padding is not hoisted.
     //      MATVEC:  %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]]
-    //      MATVEC:  %[[T1:.*]] = linalg.pad_tensor %[[T0]]
+    //      MATVEC:  %[[T1:.*]] = tensor.pad %[[T0]]
     %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
-    %3 = linalg.pad_tensor %2 nofold low[%c0] high[%c0]  {
+    %3 = tensor.pad %2 nofold low[%c0] high[%c0]  {
     ^bb0(%arg5: index):  
       %5 = arith.index_cast %arg3 : index to i32
       %6 = arith.sitofp %5 : i32 to f32
-      linalg.yield %6 : f32
+      tensor.yield %6 : f32
     } : tensor<4xf32> to tensor<4xf32>
 
     // Check matvec uses the padded input vector.
@@ -209,13 +209,13 @@ func @non_constant_op_padding(%arg0: tensor<24x12xf32>,
     // Check the non constant op padding is not hoisted.
     //      MATVEC:  %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]]
     //      MATVEC:  %[[V0:.*]] = tensor.extract %[[ARG1]][%[[IV0]]
-    //      MATVEC:  %[[T1:.*]] = linalg.pad_tensor %[[T0]]
-    //        MATVEC:  linalg.yield %[[V0]]
+    //      MATVEC:  %[[T1:.*]] = tensor.pad %[[T0]]
+    //        MATVEC:  tensor.yield %[[V0]]
     %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = tensor.extract %arg1[%arg3] : tensor<12xf32>
-    %4 = linalg.pad_tensor %2 nofold low[%c0] high[%c0]  {
+    %4 = tensor.pad %2 nofold low[%c0] high[%c0]  {
     ^bb0(%arg5: index):  
-      linalg.yield %3 : f32
+      tensor.yield %3 : f32
     } : tensor<4xf32> to tensor<4xf32>
 
     // Check matvec uses the padded input vector.
@@ -247,12 +247,12 @@ func @non_index_operand(%arg0: tensor<24x12xf32>,
     // Check the index_cast prevents hoisting due to its non index operand.
     //      MATVEC:  %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]]
     //      MATVEC:  %[[IDX0:.*]] = arith.index_cast %[[ARG3]]
-    //      MATVEC:  %[[T1:.*]] = linalg.pad_tensor %[[T0]]{{.*}}%[[IDX0]]
+    //      MATVEC:  %[[T1:.*]] = tensor.pad %[[T0]]{{.*}}%[[IDX0]]
     %2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = arith.index_cast %arg3 : i32 to index
-    %4 = linalg.pad_tensor %2 nofold low[%3] high[%3]  {
+    %4 = tensor.pad %2 nofold low[%3] high[%3]  {
     ^bb0(%arg6: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
     } : tensor<4xf32> to tensor<4xf32>
 
     // Check matvec uses the padded input vector.
@@ -284,12 +284,12 @@ func @memory_effect(%arg0: tensor<24x12xf32>,
     // Check the load prevents hoisting due to its memory effect.
     //      MATVEC:  %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]]
     //      MATVEC:  %[[IDX0:.*]] = memref.load %[[ARG3]]
-    //      MATVEC:  %[[T1:.*]] = linalg.pad_tensor %[[T0]]{{.*}}%[[IDX0]]
+    //      MATVEC:  %[[T1:.*]] = tensor.pad %[[T0]]{{.*}}%[[IDX0]]
     %2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = memref.load %arg3[%c0] : memref<?xindex>
-    %4 = linalg.pad_tensor %2 nofold low[%3] high[%3]  {
+    %4 = tensor.pad %2 nofold low[%3] high[%3]  {
     ^bb0(%arg6: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
     } : tensor<4xf32> to tensor<4xf32>
 
     // Check matvec uses the padded input vector.
@@ -321,15 +321,15 @@ func @index_result_loop(%arg0: tensor<24x12xf32>,
     // Check the unexpected operation with a region prevents hoisting.
     //      MATVEC:  %[[T0:.*]] = tensor.extract_slice %[[ARG1]][%[[IV0]]
     //      MATVEC:  %[[IDX0:.*]] = scf.for {{.*}} step %[[ARG3]]
-    //      MATVEC:  %[[T1:.*]] = linalg.pad_tensor %[[T0]]{{.*}}%[[IDX0]]
+    //      MATVEC:  %[[T1:.*]] = tensor.pad %[[T0]]{{.*}}%[[IDX0]]
     %2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = scf.for %arg6 = %c0 to %c12 step %arg3 iter_args(%arg7 = %c0) -> (index) {
       %6 = arith.addi %arg3, %arg7 : index
       scf.yield %6 : index
     }
-    %4 = linalg.pad_tensor %2 nofold low[%3] high[%3]  {
+    %4 = tensor.pad %2 nofold low[%3] high[%3]  {
     ^bb0(%arg6: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
     } : tensor<4xf32> to tensor<4xf32>
 
     // Check matvec uses the padded input vector.
@@ -361,7 +361,7 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
   // Check the second input operand is hoisted by two loop nests.
   //      MATMUL:  %[[T0:.*]] = scf.for %[[PIV0:[0-9a-z]+]] =
   //        MATMUL:   %[[T1:.*]] = tensor.extract_slice %[[ARG1]]
-  //        MATMUL:   %[[T2:.*]] = linalg.pad_tensor %[[T1]]
+  //        MATMUL:   %[[T2:.*]] = tensor.pad %[[T1]]
 
   //      MATMUL:  scf.for %[[IV0:[0-9a-zA-Z]*]] =
   %0 = scf.for %arg3 = %c0 to %c12 step %c5 iter_args(%arg4 = %arg2) -> (tensor<12x24xf32>) {
@@ -372,9 +372,9 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
     %3 = affine.apply #map1(%1)
 
     // Check the fused and padded fill op does not prevent hoisting.
-    %4 = linalg.pad_tensor %2 nofold low[%c0, %c0] high[%3, %c0]  {
+    %4 = tensor.pad %2 nofold low[%c0, %c0] high[%3, %c0]  {
     ^bb0(%arg5: index, %arg6: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
     } : tensor<?x24xf32> to tensor<5x24xf32>
     %5 = linalg.fill(%cst, %4) : f32, tensor<5x24xf32> -> tensor<5x24xf32>
     %6 = tensor.extract_slice %5[0, 0] [%1, 24] [1, 1] : tensor<5x24xf32> to tensor<?x24xf32>
@@ -382,7 +382,7 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
     // Check the first input operand is hoisted by one loop nest.
     //      MATMUL:  %[[T3:.*]] = scf.for %[[PIV1:[0-9a-z]+]] =
     //        MATMUL:   %[[T4:.*]] = tensor.extract_slice %[[ARG0]]
-    //        MATMUL:   %[[T5:.*]] = linalg.pad_tensor %[[T4]]
+    //        MATMUL:   %[[T5:.*]] = tensor.pad %[[T4]]
 
     //      MATMUL:  scf.for %[[IV1:[0-9a-zA-Z]*]] =
     %7 = scf.for %arg5 = %c0 to %c6 step %c3 iter_args(%arg6 = %6) -> (tensor<?x24xf32>) {
@@ -393,20 +393,20 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
       %9 = tensor.extract_slice %arg0[%arg3, %arg5] [%1, 3] [1, 1] : tensor<12x6xf32> to tensor<?x3xf32>
       %10 = tensor.extract_slice %arg1[%arg5, 0] [3, 24] [1, 1] : tensor<6x24xf32> to tensor<3x24xf32>
       %11 = tensor.extract_slice %arg6[0, 0] [%1, 24] [1, 1] : tensor<?x24xf32> to tensor<?x24xf32>
-      %12 = linalg.pad_tensor %9 nofold low[%c0, %c0] high[%3, %c0]  {
+      %12 = tensor.pad %9 nofold low[%c0, %c0] high[%3, %c0]  {
       ^bb0(%arg7: index, %arg8: index):  
-        linalg.yield %cst : f32
+        tensor.yield %cst : f32
       } : tensor<?x3xf32> to tensor<5x3xf32>
-      %13 = linalg.pad_tensor %10 nofold low[%c0, %c0] high[%c0, %c0]  {
+      %13 = tensor.pad %10 nofold low[%c0, %c0] high[%c0, %c0]  {
       ^bb0(%arg7: index, %arg8: index):  
-        linalg.yield %cst : f32
+        tensor.yield %cst : f32
       } : tensor<3x24xf32> to tensor<3x24xf32>
 
       // Check the output padding is not hoisted.
-      //      MATMUL:   %[[T8:.*]] = linalg.pad_tensor
-      %14 = linalg.pad_tensor %11 nofold low[%c0, %c0] high[%3, %c0]  {
+      //      MATMUL:   %[[T8:.*]] = tensor.pad
+      %14 = tensor.pad %11 nofold low[%c0, %c0] high[%3, %c0]  {
       ^bb0(%arg7: index, %arg8: index):  
-        linalg.yield %cst : f32
+        tensor.yield %cst : f32
       } : tensor<?x24xf32> to tensor<5x24xf32>
 
       // Check matmul uses the padded operands.
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
index b205e30213498..40defe47a1cb6 100644
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -353,71 +353,6 @@ func @init_tensor_err(%arg0 : index)
 
 // -----
 
-
-func @pad_result_type(%arg0: tensor<?x2x3x4xi32>, %arg1: index, %arg2: i32) -> tensor<?x?x?x8xf32> {
-  // expected-error @+1 {{specified type 'tensor<?x?x?x8xf32>' does not match the inferred type 'tensor<?x?x?x9xi32>}}
-  %0 = linalg.pad_tensor %arg0 low[1, %arg1, 2, 2] high[1, 2, %arg1, 3] {
-  ^bb0(%arg3: index, %arg4: index):  
-    linalg.yield %arg2 : i32
-  } : tensor<?x2x3x4xi32> to tensor<?x?x?x8xf32>
-  return %0 : tensor<?x?x?x8xf32>
-}
-
-// -----
-
-func @pad_number_of_block_args(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
-  // expected-error @+1 {{expected the block to have 2 arguments}}
-  %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
-  ^bb0(%arg2: index, %arg3: index, %arg4: index):  
-    linalg.yield %arg1 : i32
-  } : tensor<?x4xi32> to tensor<?x9xi32>
-  return %0 : tensor<?x9xi32>
-}
-
-// -----
-
-func @pad_no_block(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
-  // expected-error @+1 {{op region #0 ('region') failed to verify constraint: region with 1 blocks}}
-  %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
-  } : tensor<?x4xi32> to tensor<?x9xi32>
-  return %0 : tensor<?x9xi32>
-}
-
-// -----
-
-func @pad_block_args(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
-  // expected-error @+1 {{op expected block argument 1 to be an index}}
-  %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
-  ^bb0(%arg2: i32, %arg3: i32):  
-    linalg.yield %arg1 : i32
-  } : tensor<?x4xi32> to tensor<?x9xi32>
-  return %0 : tensor<?x9xi32>
-}
-
-// -----
-
-func @pad_num_yields(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
-  // expected-error @+3 {{op expected single yield operand (got 2)}}
-  %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
-  ^bb0(%arg2: index, %arg3: index):  
-    linalg.yield %arg1, %arg1 : i32, i32
-  } : tensor<?x4xi32> to tensor<?x9xi32>
-  return %0 : tensor<?x9xi32>
-}
-
-// -----
-
-func @pad_yield_type(%arg0: tensor<?x4xi32>, %arg1: i8) -> tensor<?x9xi32> {
-  // expected-error @+3 {{op expected yield type to match shape element type}}
-  %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
-  ^bb0(%arg2: index, %arg3: index):  
-    linalg.yield %arg1 : i8
-  } : tensor<?x4xi32> to tensor<?x9xi32>
-  return %0 : tensor<?x9xi32>
-}
-
-// -----
-
 func @illegal_fill_tensor_no_return(%arg0 : index, %arg1 : index, %arg2 : f32)
 {
   %0 = linalg.init_tensor [%arg0, %arg1] : tensor<?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
index 0e4c62447e507..c6a3b1eed30f1 100644
--- a/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
+++ b/mlir/test/Dialect/Linalg/lower-pad-tensor.mlir
@@ -6,9 +6,9 @@
 func @pad_tensor_with_memrefs(%arg0: memref<1x28x28x1xf32>) -> memref<2x31x31x3xf32> {
   %cst = arith.constant 0.000000e+00 : f32
   %0 = bufferization.to_tensor %arg0 : memref<1x28x28x1xf32>
-  %1 = linalg.pad_tensor %0 low[1, 1, 1, 2] high[0, 2, 2, 0]  {
+  %1 = tensor.pad %0 low[1, 1, 1, 2] high[0, 2, 2, 0]  {
   ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  
-    linalg.yield %cst : f32
+    tensor.yield %cst : f32
   } : tensor<1x28x28x1xf32> to tensor<2x31x31x3xf32>
   %2 = bufferization.to_memref %1 : memref<2x31x31x3xf32>
   return %2 : memref<2x31x31x3xf32>
@@ -25,9 +25,9 @@ func @pad_tensor_with_memrefs(%arg0: memref<1x28x28x1xf32>) -> memref<2x31x31x3x
 // CHECK-LABEL: func @pad_tensor_no_memrefs
 func @pad_tensor_no_memrefs(%arg0: tensor<1x28x28xf32>) -> tensor<2x32x32xf32> {
   %cst = arith.constant 0.000000e+00 : f32
-  %0 = linalg.pad_tensor %arg0 low[1, 2, 2] high[0, 2, 2]  {
+  %0 = tensor.pad %arg0 low[1, 2, 2] high[0, 2, 2]  {
   ^bb0(%arg1: index, %arg2: index, %arg3: index):  
-    linalg.yield %cst : f32
+    tensor.yield %cst : f32
   } : tensor<1x28x28xf32> to tensor<2x32x32xf32>
   return %0 : tensor<2x32x32xf32>
 }
@@ -43,9 +43,9 @@ func @pad_tensor_no_memrefs(%arg0: tensor<1x28x28xf32>) -> tensor<2x32x32xf32> {
 // CHECK-LABEL: func @pad_tensor_detailed
 func @pad_tensor_detailed(%arg0: tensor<1x28x28x1xf32>) -> tensor<1x32x32x1xf32> {
   %cst = arith.constant 0.000000e+00 : f32
-  %0 = linalg.pad_tensor %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0]  {
+  %0 = tensor.pad %arg0 low[0, 2, 2, 0] high[0, 2, 2, 0]  {
   ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):  
-    linalg.yield %cst : f32
+    tensor.yield %cst : f32
   } : tensor<1x28x28x1xf32> to tensor<1x32x32x1xf32>
   return %0 : tensor<1x32x32x1xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/pad.mlir b/mlir/test/Dialect/Linalg/pad.mlir
index 31163f5a6be8c..36879b7254a7e 100644
--- a/mlir/test/Dialect/Linalg/pad.mlir
+++ b/mlir/test/Dialect/Linalg/pad.mlir
@@ -31,10 +31,10 @@ func @static_sizes_output_divisible(%arg0: tensor<24x12xf32>,
 
   // Check statically sized matmul inputs with partially divisible sizes are padded.
   //      MATMUL:   %[[V0:.*]] = affine.apply #[[MAP1]]()[%[[TS2]]]
-  //      MATMUL:   %[[T3:.*]] = linalg.pad_tensor %[[T0]] nofold
+  //      MATMUL:   %[[T3:.*]] = tensor.pad %[[T0]] nofold
   // MATMUL-SAME:                  [%[[C0]], %[[C0]]]
   // MATMUL-SAME:                  [%[[C0]], %[[V0]]
-  //      MATMUL:   %[[T4:.*]] = linalg.pad_tensor %[[T1]] nofold
+  //      MATMUL:   %[[T4:.*]] = tensor.pad %[[T1]] nofold
 
   // Check the statically sized matmul output with fully divisible sizes is not padded.
   //      MATMUL:   %[[T5:.*]] = linalg.matmul
@@ -74,7 +74,7 @@ func @static_sizes_input_divisible(%arg0: tensor<24x12xf32>,
 
   // Check the statically sized matmul output with partially divisible sizes is padded.
   //      MATMUL:   %[[V0:.*]] = affine.apply #[[MAP1]]()[%[[TS1]]]
-  //      MATMUL:   %[[T1:.*]] = linalg.pad_tensor %[[T0]] low
+  //      MATMUL:   %[[T1:.*]] = tensor.pad %[[T0]] low
   // MATMUL-SAME:                  [%[[C0]], %[[C0]]]
   // MATMUL-SAME:                  [%[[C0]], %[[V0]]
 
@@ -137,11 +137,11 @@ func @dynamic_sizes(%arg0: tensor<?x?xf32>,
   // Check all matmul operands are padded.
   //      MATMUL:   %[[V0:.*]] = affine.apply #[[MAP3]]()[%[[TS0]]]
   //      MATMUL:   %[[V1:.*]] = affine.apply #[[MAP4]]()[%[[TS2]]]
-  //      MATMUL:   %[[T3:.*]] = linalg.pad_tensor %{{.*}} nofold
+  //      MATMUL:   %[[T3:.*]] = tensor.pad %{{.*}} nofold
   // MATMUL-SAME:                  [%[[C0]], %[[C0]]]
   // MATMUL-SAME:                  [%[[V0]], %[[V1]]
-  //      MATMUL:   %[[T4:.*]] = linalg.pad_tensor %{{.*}} nofold
-  //      MATMUL:   %[[T5:.*]] = linalg.pad_tensor %{{.*}} low
+  //      MATMUL:   %[[T4:.*]] = tensor.pad %{{.*}} nofold
+  //      MATMUL:   %[[T5:.*]] = tensor.pad %{{.*}} low
 
   // Check the dynamic matmul has been erased.
   //  MATMUL-NOT:   = linalg.matmul {{.*}} tensor<?x?xf32>
@@ -172,7 +172,7 @@ func @pad_multiple(%arg0: tensor<64x64xf32>,
   %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
 
   // Check both fill operations are padded by the same pad tensor operation.
-  //      FILL:  %[[T0:.*]] = linalg.pad_tensor
+  //      FILL:  %[[T0:.*]] = tensor.pad
   //      FILL:  %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]])
   //      FILL:  %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]])
   //      FILL:  = tensor.extract_slice %[[T2]]
@@ -197,20 +197,20 @@ func @compose_padding(%arg0: tensor<64x64xf32>,
   //      MATMUL:  %[[T0:.*]] = tensor.extract_slice %[[ARG0]]
   // MATMUL-SAME:                                     [0, 0]
   // MATMUL-SAME:                                     [%[[SIZE]], %[[SIZE]]]
-  //      MATMUL:  %[[T1:.*]] = linalg.pad_tensor %[[T0]]
+  //      MATMUL:  %[[T1:.*]] = tensor.pad %[[T0]]
   //      MATMUL:  %[[T2:.*]] = linalg.fill(%{{.*}}, %[[T1]]
   //      MATMUL:  %[[T3:.*]] = linalg.fill(%{{.*}}, %[[T2]]
   %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
-  %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0]  {
+  %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0]  {
     ^bb0(%arg3: index, %arg4: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
   } : tensor<?x?xf32> to tensor<64x64xf32>
   %2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
   %3 = linalg.fill(%cst, %2) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
   %4 = tensor.extract_slice %3[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
 
   // Check there are no additional pad tensor operations.
-  //  MATMUL-NOT:  linalg.pad_tensor
+  //  MATMUL-NOT:  tensor.pad
 
   // Check the matmul directly uses the result of the fill operation.
   //      MATMUL:  %[[T4:.*]] = linalg.matmul ins(%[[T3]]
@@ -233,16 +233,16 @@ func @different_padding_values(%arg0: tensor<64x64xf32>,
   %cst = arith.constant 42.0 : f32
   %size = affine.min #map0()[%iv0]
   %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
-  %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0]  {
+  %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0]  {
     ^bb0(%arg3: index, %arg4: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
   } : tensor<?x?xf32> to tensor<64x64xf32>
   %2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
   %4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
 
   // Different padding values prevent composing the paddings (42.0 vs. 0.0).
   //      MATMUL:  = linalg.fill
-  //      MATMUL:  = linalg.pad_tensor
+  //      MATMUL:  = tensor.pad
   //      MATMUL:  = linalg.matmul
   %5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %5 : tensor<?x?xf32>
@@ -258,16 +258,16 @@ func @different_padding_dynamic_sizes(%arg0: tensor<64x64xf32>,
   %cst = arith.constant 0.0 : f32
   %size = affine.min #map0()[%iv0]
   %0 = tensor.extract_slice %arg0[0, 0] [%iv0, %iv0] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
-  %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0]  {
+  %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0]  {
     ^bb0(%arg3: index, %arg4: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
   } : tensor<?x?xf32> to tensor<64x64xf32>
   %2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
   %4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
 
   // Different dynamic sizes prevent composing the paddings (%iv0 vs %size).
   //      MATMUL:  = linalg.fill
-  //      MATMUL:  = linalg.pad_tensor
+  //      MATMUL:  = tensor.pad
   //      MATMUL:  = linalg.matmul
   %5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %5 : tensor<?x?xf32>
@@ -283,16 +283,16 @@ func @different_padding_dynamic_rank(%arg0: tensor<64x64x1xf32>,
   %cst = arith.constant 0.0 : f32
   %size = affine.min #map0()[%iv0]
   %0 = tensor.extract_slice %arg0[0, 0, 0] [%size, %size, 1] [1, 1, 1] : tensor<64x64x1xf32> to tensor<?x?xf32>
-  %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0]  {
+  %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0]  {
     ^bb0(%arg3: index, %arg4: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
   } : tensor<?x?xf32> to tensor<64x64xf32>
   %2 = linalg.fill(%cst, %1) : f32, tensor<64x64xf32> -> tensor<64x64xf32>
   %3 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<64x64xf32> to tensor<?x?xf32>
 
   // Different dynamic ranks prevent composing the paddings ([%size, %size, 1] vs [%size, %size]).
   //      MATMUL:  = linalg.fill
-  //      MATMUL:  = linalg.pad_tensor
+  //      MATMUL:  = tensor.pad
   //      MATMUL:  = linalg.matmul
   %4 = linalg.matmul ins(%3, %3 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%3 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %4 : tensor<?x?xf32>
@@ -308,16 +308,16 @@ func @different_padding_static_sizes(%arg0: tensor<62x62xf32>,
   %cst = arith.constant 0.0 : f32
   %size = affine.min #map0()[%iv0]
   %0 = tensor.extract_slice %arg0[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor<?x?xf32>
-  %1 = linalg.pad_tensor %0 low[0, 0] high[%iv0, %iv0]  {
+  %1 = tensor.pad %0 low[0, 0] high[%iv0, %iv0]  {
     ^bb0(%arg3: index, %arg4: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
   } : tensor<?x?xf32> to tensor<62x62xf32>
   %2 = linalg.fill(%cst, %1) : f32, tensor<62x62xf32> -> tensor<62x62xf32>
   %4 = tensor.extract_slice %2[0, 0] [%size, %size] [1, 1] : tensor<62x62xf32> to tensor<?x?xf32>
 
   // Different static sizes prevent composing the paddings (62 vs 64 derived from #map0).
   //      MATMUL:  = linalg.fill
-  //      MATMUL:  = linalg.pad_tensor
+  //      MATMUL:  = tensor.pad
   //      MATMUL:  = linalg.matmul
   %5 = linalg.matmul ins(%4, %4 : tensor<?x?xf32>, tensor<?x?xf32>) outs(%4 : tensor<?x?xf32>) -> tensor<?x?xf32>
   return %5 : tensor<?x?xf32>
@@ -336,7 +336,7 @@ func @scalar_operand(%arg0: f32,
   %0 = affine.min #map0()[%iv0]
 
   //      FILL:   %[[T0:.*]] = tensor.extract_slice %[[ARG1]]
-  //      FILL:   %[[T1:.*]] = linalg.pad_tensor %[[T0]] nofold
+  //      FILL:   %[[T1:.*]] = tensor.pad %[[T0]] nofold
   %1 = tensor.extract_slice %arg1[0, 0] [4, %0] [1, 1] : tensor<24x12xf32> to tensor<4x?xf32>
 
   // Check only the fill output operand is padded.
@@ -361,8 +361,8 @@ func @static_extract_slice_missing(%arg0: tensor<24x12xf32>,
   %2 = tensor.extract_slice %arg1[%iv2, %iv1] [%0, 5] [1, 1] : tensor<12x25xf32> to tensor<?x5xf32>
 
   // Check the matmul inputs are padded despite the missing slice for the static output.
-  //      MATMUL:  %[[T0:.*]] = linalg.pad_tensor
-  //      MATMUL:  %[[T1:.*]] = linalg.pad_tensor
+  //      MATMUL:  %[[T0:.*]] = tensor.pad
+  //      MATMUL:  %[[T1:.*]] = tensor.pad
   //      MATMUL:  = linalg.matmul ins(%[[T0]], %[[T1]]
   // MATMUL-SAME:                 outs(%[[ARG2]]
   %3 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%arg2 : tensor<4x5xf32>) -> tensor<4x5xf32>
@@ -414,8 +414,8 @@ func @static_input_padding_only(%arg0: tensor<24x12xf32>,
   %3 = tensor.extract_slice %arg2[%iv0, %iv1] [4, 5] [1, 1] : tensor<24x25xf32> to tensor<4x5xf32>
 
   // Check the matmul inputs are padded despite the failure to compute a padding value for the static output.
-  // INPUTS-ONLY:  %[[T1:.*]] = linalg.pad_tensor
-  // INPUTS-ONLY:  %[[T2:.*]] = linalg.pad_tensor
+  // INPUTS-ONLY:  %[[T1:.*]] = tensor.pad
+  // INPUTS-ONLY:  %[[T2:.*]] = tensor.pad
   // INPUTS-ONLY:  = linalg.matmul ins(%[[T1]], %[[T2]]
   // INPUTS-ONLY-SAME:             outs(%[[T0]]
   %4 = linalg.matmul ins(%1, %2 : tensor<4x?xf32>, tensor<?x5xf32>) outs(%3 : tensor<4x5xf32>) -> tensor<4x5xf32>
@@ -465,7 +465,7 @@ func @rank_reducing(%arg0: tensor<1x64x1x64xf32>,
   %0 = tensor.extract_slice %arg0[0, 0, 0, 0] [1, %size, 1, %size] [1, 1, 1, 1] : tensor<1x64x1x64xf32> to tensor<1x?x?xf32>
 
   // Check the fill is padded despite the rank-reducing slice operation.
-  //      FILL:  %[[T0:.*]] = linalg.pad_tensor
+  //      FILL:  %[[T0:.*]] = tensor.pad
   //      FILL:  %[[T1:.*]] = linalg.fill(%{{.*}}, %[[T0]])
   // FILL-SAME:    tensor<1x64x64xf32>
   //      FILL:  = tensor.extract_slice %[[T1]]
diff --git a/mlir/test/Dialect/Linalg/pad_fusion.mlir b/mlir/test/Dialect/Linalg/pad_fusion.mlir
index 7f6bd150f3de9..90e6381f6f16a 100644
--- a/mlir/test/Dialect/Linalg/pad_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/pad_fusion.mlir
@@ -15,9 +15,9 @@ func @dynamic_pad_fusion(%arg0 : tensor<?x?xf32>, %arg1 : index, %arg2 : index,
       %1 = arith.mulf %arg6, %arg6 : f32
       linalg.yield %1 : f32
     } -> tensor<?x?xf32>
-  %1 = linalg.pad_tensor %0 low [%arg1, %arg2] high [%arg3, %arg4] {
+  %1 = tensor.pad %0 low [%arg1, %arg2] high [%arg3, %arg4] {
     ^bb0(%arg6: index, %arg7 : index):
-      linalg.yield %arg5 : f32
+      tensor.yield %arg5 : f32
     } : tensor<?x?xf32> to tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
 }
@@ -64,9 +64,9 @@ func @mixed_pad_fusion(%arg0 : tensor<?x42xf32>, %arg1 : index, %arg2 : index,
       %1 = arith.mulf %arg4, %arg4 : f32
       linalg.yield %1 : f32
     } -> tensor<42x?xf32>
-  %1 = linalg.pad_tensor %0 low [3, %arg1] high [4, %arg2] {
+  %1 = tensor.pad %0 low [3, %arg1] high [4, %arg2] {
     ^bb0(%arg4: index, %arg5 : index):
-      linalg.yield %arg3 : f32
+      tensor.yield %arg3 : f32
     } : tensor<42x?xf32> to tensor<49x?xf32>
   return %1 : tensor<49x?xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
index 9e0a672252104..27f014ed66147 100644
--- a/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
+++ b/mlir/test/Dialect/Linalg/resolve-shaped-type-result-dims.mlir
@@ -253,9 +253,9 @@ func @dim_of_pad_op(%arg0 : tensor<2x?x?xf32>, %arg1 : index, %arg2 : index,
    %c3 = arith.constant 3 : index
    %c4 = arith.constant 4 : index
    %c5 = arith.constant 5 : index
-   %0 = linalg.pad_tensor %arg0 low[%c3, %arg1, %c4] high[7, %c5, %arg2] {
+   %0 = tensor.pad %arg0 low[%c3, %arg1, %c4] high[7, %c5, %arg2] {
      ^bb0(%arg4: index, %arg5: index, %arg6: index):
-       linalg.yield %arg3 : f32
+       tensor.yield %arg3 : f32
    } : tensor<2x?x?xf32> to tensor<?x?x?xf32>
    %1 = tensor.dim %0, %c0 : tensor<?x?x?xf32>
    %2 = tensor.dim %0, %c1 : tensor<?x?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
index 9e8dfb292032f..337b7c0ad2b7e 100644
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -15,77 +15,6 @@
 // CHECK-DAG: #[[$strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)>
 // CHECK-DAG: #[[$strided3DT:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d2 * s1 + s0 + d1 * s2 + d0)>
 
-func @pad_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index,
-                  %pad_value: f32) -> tensor<6x?x?x?xf32> {
-  %0 = linalg.pad_tensor %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] {
-    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
-      linalg.yield %pad_value : f32
-    } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
-  return %0 : tensor<6x?x?x?xf32>
-}
-// CHECK-LABEL: func @pad_dynamic
-//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
-//  CHECK-SAME: %[[LOW:[a-zA-Z0-9_]*]]
-//  CHECK-SAME: %[[HIGH:[a-zA-Z0-9_]*]]
-//       CHECK:   linalg.pad_tensor %[[ARG0]]
-//  CHECK-SAME:     low[2, %[[LOW]], 3, 3]
-//  CHECK-SAME:     high[3, 3, %[[HIGH]], 2]
-//       CHECK:    : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
-
-// -----
-
-func @pad_static(%arg0: tensor<3x4xf32>, %pad_value: f32) -> tensor<6x9xf32> {
-  %0 = linalg.pad_tensor %arg0 low[1, 2] high[2, 3] {
-    ^bb0(%arg1 : index, %arg2 : index):
-      linalg.yield %pad_value : f32
-    } : tensor<3x4xf32> to tensor<6x9xf32>
-  return %0 : tensor<6x9xf32>
-}
-// CHECK-LABEL: func @pad_static
-//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
-//       CHECK:   linalg.pad_tensor %[[ARG0]] low[1, 2] high[2, 3]
-//       CHECK:    : tensor<3x4xf32> to tensor<6x9xf32>
-
-// -----
-
-func @pad_asymmetrical(%arg0: tensor<2x3xf32>, %ub0: index, %ub1: index,
-                       %pad_value: f32) -> tensor<?x?xf32> {
-  %0 = linalg.pad_tensor %arg0 low[0, 0] high[%ub0, %ub1] {
-    ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad_value : f32
-    } : tensor<2x3xf32> to tensor<?x?xf32>
-  return %0 : tensor<?x?xf32>
-}
-// CHECK-LABEL: func @pad_asymmetrical
-//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
-//  CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]]
-//  CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]]
-//       CHECK:   linalg.pad_tensor %[[ARG0]]
-//  CHECK-SAME:     low[0, 0]
-//  CHECK-SAME:     high[%[[UB0]], %[[UB1]]]
-//       CHECK:    : tensor<2x3xf32> to tensor<?x?xf32>
-
-// -----
-
-func @pad_to_static_size(%arg0: tensor<?x?xf32>, %ub0: index, %ub1: index,
-                         %pad_value: f32) -> tensor<2x3xf32> {
-  %0 = linalg.pad_tensor %arg0 low[0, 0] high[%ub0, %ub1] {
-    ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad_value : f32
-    } : tensor<?x?xf32> to tensor<2x3xf32>
-  return %0 : tensor<2x3xf32>
-}
-// CHECK-LABEL: func @pad_to_static_size
-//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
-//  CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]]
-//  CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]]
-//       CHECK:   linalg.pad_tensor %[[ARG0]]
-//  CHECK-SAME:     low[0, 0]
-//  CHECK-SAME:     high[%[[UB0]], %[[UB1]]]
-//       CHECK:    : tensor<?x?xf32> to tensor<2x3xf32>
-
-// -----
-
 func @views(%arg0: index) {
   %c0 = arith.constant 0 : index
   %0 = arith.muli %arg0, %arg0 : index
diff --git a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
index a8e26baa2bded..64bb9d1ea9eff 100644
--- a/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
+++ b/mlir/test/Dialect/Linalg/subtensor-of-padtensor.mlir
@@ -6,9 +6,9 @@
 //       CHECK:   return %[[RESULT]]
 func @static_data_only(%arg0 : tensor<4x5xf32>, %pad : f32)
     -> tensor<2x1xf32> {
-  %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] {
+  %0 = tensor.pad %arg0 low[0, 0] high[7, 8] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad : f32
+      tensor.yield %pad : f32
     } : tensor<4x5xf32> to tensor<11x13xf32>
   %1 = tensor.extract_slice %0[1, 2] [2, 1] [1, 1] : tensor<11x13xf32> to tensor<2x1xf32>
   return %1 : tensor<2x1xf32>
@@ -18,16 +18,16 @@ func @static_data_only(%arg0 : tensor<4x5xf32>, %pad : f32)
 
 // CHECK-LABEL: @static_high_pad_only
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //   CHECK-NOT:   tensor.extract_slice
 //       CHECK:   %[[RESULT:.*]] = tensor.generate
 //       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<2x4xf32>
 func @static_high_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
     -> tensor<2x4xf32> {
-  %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] {
+  %0 = tensor.pad %arg0 low[0, 0] high[7, 8] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad : f32
+      tensor.yield %pad : f32
     } : tensor<4x5xf32> to tensor<11x13xf32>
   %1 = tensor.extract_slice %0[4, 5] [2, 4] [1, 1] : tensor<11x13xf32> to tensor<2x4xf32>
   return %1 : tensor<2x4xf32>
@@ -37,16 +37,16 @@ func @static_high_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
 
 // CHECK-LABEL: @static_low_pad_only
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //   CHECK-NOT:   tensor.extract_slice
 //       CHECK:   %[[RESULT:.*]] = tensor.generate
 //       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<2x3xf32>
 func @static_low_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
     -> tensor<2x3xf32> {
-  %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] {
+  %0 = tensor.pad %arg0 low[3, 7] high[7, 8] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad : f32
+      tensor.yield %pad : f32
     } : tensor<4x5xf32> to tensor<14x20xf32>
   %1 = tensor.extract_slice %0[1, 3] [2, 3] [1, 1] : tensor<14x20xf32> to tensor<2x3xf32>
   return %1 : tensor<2x3xf32>
@@ -56,16 +56,16 @@ func @static_low_pad_only(%arg0 : tensor<4x5xf32>, %pad : f32)
 
 // CHECK-LABEL: @static_low_pad_only_2
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //   CHECK-NOT:   tensor.extract_slice
 //       CHECK:   %[[RESULT:.*]] = tensor.generate
 //       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<1x3xf32>
 func @static_low_pad_only_2(%arg0 : tensor<4x5xf32>, %pad : f32)
     -> tensor<1x3xf32> {
-  %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] {
+  %0 = tensor.pad %arg0 low[3, 7] high[7, 8] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad : f32
+      tensor.yield %pad : f32
     } : tensor<4x5xf32> to tensor<14x20xf32>
   %1 = tensor.extract_slice %0[1, 3] [1, 3] [1, 1] : tensor<14x20xf32> to tensor<1x3xf32>
   return %1 : tensor<1x3xf32>
@@ -75,16 +75,16 @@ func @static_low_pad_only_2(%arg0 : tensor<4x5xf32>, %pad : f32)
 
 // CHECK-LABEL: @static_mixed_data_high_pad
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //       CHECK:   %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][2, 4] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
-//       CHECK:   %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[1, 3]
-//       CHECK:     linalg.yield %[[PAD]]
+//       CHECK:   %[[RESULT:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[1, 3]
+//       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<3x4xf32>
 func @static_mixed_data_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
     -> tensor<3x4xf32> {
-  %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] {
+  %0 = tensor.pad %arg0 low[0, 0] high[7, 8] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad : f32
+      tensor.yield %pad : f32
     } : tensor<4x5xf32> to tensor<11x13xf32>
   %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<11x13xf32> to tensor<3x4xf32>
   return %1 : tensor<3x4xf32>
@@ -94,16 +94,16 @@ func @static_mixed_data_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
 
 // CHECK-LABEL: @static_mixed_data_low_pad
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //       CHECK:   %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [2, 1] [1, 1] : tensor<4x5xf32> to tensor<2x1xf32>
-//       CHECK:   %[[RESULT:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[1, 3] high[0, 0]
-//       CHECK:     linalg.yield %[[PAD]]
+//       CHECK:   %[[RESULT:.*]] = tensor.pad %[[SUBTENSOR]] low[1, 3] high[0, 0]
+//       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<3x4xf32>
 func @static_mixed_data_low_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
     -> tensor<3x4xf32> {
-  %0 = linalg.pad_tensor %arg0 low[3, 7] high[7, 8] {
+  %0 = tensor.pad %arg0 low[3, 7] high[7, 8] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad : f32
+      tensor.yield %pad : f32
     } : tensor<4x5xf32> to tensor<14x20xf32>
   %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<14x20xf32> to tensor<3x4xf32>
   return %1 : tensor<3x4xf32>
@@ -113,15 +113,15 @@ func @static_mixed_data_low_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
 
 // CHECK-LABEL: @static_mixed_data_low_high_pad
 //  CHECK-SAME:   %[[ARG0:.*]]: tensor<4x5xf32>, %[[PAD:.*]]: f32
-//   CHECK-NOT:   linalg.pad_tensor
-//       CHECK:   %[[RESULT:.*]] = linalg.pad_tensor %[[ARG0]] low[1, 1] high[2, 3]
-//       CHECK:     linalg.yield %[[PAD]]
+//   CHECK-NOT:   tensor.pad
+//       CHECK:   %[[RESULT:.*]] = tensor.pad %[[ARG0]] low[1, 1] high[2, 3]
+//       CHECK:     tensor.yield %[[PAD]]
 //       CHECK:   return %[[RESULT]] : tensor<7x9xf32>
 func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
     -> tensor<7x9xf32> {
-  %0 = linalg.pad_tensor %arg0 low[2, 3] high[7, 8] {
+  %0 = tensor.pad %arg0 low[2, 3] high[7, 8] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad : f32
+      tensor.yield %pad : f32
     } : tensor<4x5xf32> to tensor<13x16xf32>
   %1 = tensor.extract_slice %0[1, 2] [7, 9] [1, 1] : tensor<13x16xf32> to tensor<7x9xf32>
   return %1 : tensor<7x9xf32>
@@ -131,7 +131,7 @@ func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
 
 // CHECK-LABEL: @dynamic_high_pad
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<?x5xf32>
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
 //       CHECK:   tensor.dim %[[ARG0]], %[[C0]]
 //       CHECK:   %[[RESULT:.*]] = scf.if %{{.*}} -> (tensor<3x4xf32>) {
@@ -139,14 +139,14 @@ func @static_mixed_data_low_high_pad(%arg0 : tensor<4x5xf32>, %pad : f32)
 //       CHECK:     scf.yield %[[GEN]]
 //       CHECK:   } else {
 //       CHECK:     %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor<?x5xf32> to tensor<?x1xf32>
-//       CHECK:     %[[PADTENSOR:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
+//       CHECK:     %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
 //       CHECK:     scf.yield %[[PADTENSOR]]
 //       CHECK:   }
 //       CHECK:   return %[[RESULT]]
 func @dynamic_high_pad(%arg0 : tensor<?x5xf32>, %h1: index, %pad : f32) -> tensor<3x4xf32> {
-  %0 = linalg.pad_tensor %arg0 low[0, 0] high[%h1, 8] {
+  %0 = tensor.pad %arg0 low[0, 0] high[%h1, 8] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad : f32
+      tensor.yield %pad : f32
     } : tensor<?x5xf32> to tensor<?x13xf32>
   %1 = tensor.extract_slice %0[2, 4] [3, 4] [1, 1] : tensor<?x13xf32> to tensor<3x4xf32>
   return %1 : tensor<3x4xf32>
@@ -156,7 +156,7 @@ func @dynamic_high_pad(%arg0 : tensor<?x5xf32>, %h1: index, %pad : f32) -> tenso
 
 // CHECK-LABEL: @dynamic_extract_size
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<?x5xf32>, %[[ARG1:.*]]: index
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
 //       CHECK:   tensor.dim %[[ARG0]], %[[C0]]
 //       CHECK:   %[[RESULT:.*]] = scf.if %{{.*}} -> (tensor<?x4xf32>) {
@@ -164,14 +164,14 @@ func @dynamic_high_pad(%arg0 : tensor<?x5xf32>, %h1: index, %pad : f32) -> tenso
 //       CHECK:     scf.yield %[[GEN]]
 //       CHECK:   } else {
 //       CHECK:     %[[SUBTENSOR:.*]] = tensor.extract_slice %[[ARG0]][%{{.*}}, 4] [%{{.*}}, 1] [1, 1] : tensor<?x5xf32> to tensor<?x1xf32>
-//       CHECK:     %[[PADTENSOR:.*]] = linalg.pad_tensor %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
+//       CHECK:     %[[PADTENSOR:.*]] = tensor.pad %[[SUBTENSOR]] low[0, 0] high[%{{.*}}, 3]
 //       CHECK:     scf.yield %[[PADTENSOR]]
 //       CHECK:   }
 //       CHECK:   return %[[RESULT]]
 func @dynamic_extract_size(%arg0 : tensor<?x5xf32>, %s1: index, %pad : f32) -> tensor<?x4xf32> {
-  %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] {
+  %0 = tensor.pad %arg0 low[0, 0] high[7, 8] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad : f32
+      tensor.yield %pad : f32
     } : tensor<?x5xf32> to tensor<?x13xf32>
   %1 = tensor.extract_slice %0[2, 4] [%s1, 4] [1, 1] : tensor<?x13xf32> to tensor<?x4xf32>
   return %1 : tensor<?x4xf32>
@@ -184,14 +184,14 @@ func @dynamic_extract_size(%arg0 : tensor<?x5xf32>, %s1: index, %pad : f32) -> t
 //       CHECK:     tensor.generate
 //       CHECK:   else
 //       CHECK:     %[[SLICE:.*]] = tensor.extract_slice
-//       CHECK:     linalg.pad_tensor %[[SLICE]] low[0, 0]
+//       CHECK:     tensor.pad %[[SLICE]] low[0, 0]
 func @dynamic_zero_low_padding(%arg0 : tensor<?x?xf32>, %pad : f32,
                                %o1 : index, %o2 : index,
                                %s1 : index, %s2 : index)
     -> tensor<?x?xf32> {
-  %0 = linalg.pad_tensor %arg0 low[0, 0] high[7, 8] {
+  %0 = tensor.pad %arg0 low[0, 0] high[7, 8] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad : f32
+      tensor.yield %pad : f32
     } : tensor<?x?xf32> to tensor<?x?xf32>
   %1 = tensor.extract_slice %0[%o1, %o2] [%s1, %s2] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
@@ -204,14 +204,14 @@ func @dynamic_zero_low_padding(%arg0 : tensor<?x?xf32>, %pad : f32,
 //       CHECK:     tensor.generate
 //       CHECK:   else
 //       CHECK:     %[[SLICE:.*]] = tensor.extract_slice
-//       CHECK:     linalg.pad_tensor %[[SLICE]] low[%{{.*}}, %{{.*}}] high[0, 0]
+//       CHECK:     tensor.pad %[[SLICE]] low[%{{.*}}, %{{.*}}] high[0, 0]
 func @dynamic_zero_high_padding(%arg0 : tensor<?x?xf32>, %pad : f32,
                                 %o1 : index, %o2 : index,
                                 %s1 : index, %s2 : index)
     -> tensor<?x?xf32> {
-  %0 = linalg.pad_tensor %arg0 low[7, 8] high[0, 0] {
+  %0 = tensor.pad %arg0 low[7, 8] high[0, 0] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad : f32
+      tensor.yield %pad : f32
     } : tensor<?x?xf32> to tensor<?x?xf32>
   %1 = tensor.extract_slice %0[%o1, %o2] [%s1, %s2] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
   return %1 : tensor<?x?xf32>
diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
index b60c8c466f154..ac94261a153f0 100644
--- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
+++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir
@@ -288,7 +288,7 @@ func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x?x?x?x
 //     CHECK:         tensor.generate
 //     CHECK:       else
 //     CHECK:         tensor.extract_slice
-//     CHECK:         linalg.pad_tensor
+//     CHECK:         tensor.pad
 //     CHECK:       tensor.extract_slice
 //     CHECK:       tensor.extract_slice
 //     CHECK:       linalg.generic
@@ -303,9 +303,9 @@ func @pad_generic_static(%small_input: tensor<58x1xf32>, %large_input: tensor<64
   %d0 = tensor.dim %large_input, %c0 : tensor<64x128xf32>
   %d1 = tensor.dim %large_input, %c1 : tensor<64x128xf32>
 
-  %pad = linalg.pad_tensor %small_input low[4, 60] high[2, 67] {
+  %pad = tensor.pad %small_input low[4, 60] high[2, 67] {
   ^bb0(%arg0: index, %arg1: index):
-    linalg.yield %zero : f32
+    tensor.yield %zero : f32
   } : tensor<58x1xf32> to tensor<64x128xf32>
 
   %fill = linalg.fill(%zero, %large_input) : f32, tensor<64x128xf32> -> tensor<64x128xf32>
diff --git a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
index a83793544078c..a8dfdd940673a 100644
--- a/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
+++ b/mlir/test/Dialect/Linalg/tile-pad-tensor-op.mlir
@@ -23,7 +23,7 @@
 //       TILE2:         tensor.generate
 //       TILE2:       else
 //       TILE2:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       TILE2:         %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]]
+//       TILE2:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
 //       TILE2:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
 //       TILE2:   return %[[RESULT]]
 
@@ -43,15 +43,15 @@
 //       TILE1:       tensor.generate
 //       TILE1:     else
 //       TILE1:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       TILE1:       %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
+//       TILE1:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[{{.*}}, {{.*}}]
 //       TILE1:     tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [%[[DIM0]], {{.*}}] [1, 1]
 //       TILE1:   return %[[RESULT]]
 
 func @dynamic_pad_tensor(%input_tensor: tensor<?x?xf32>,
                          %pad_value: f32) -> tensor<?x?xf32> {
-  %0 = linalg.pad_tensor %input_tensor low[3, 4] high[5, 3] {
+  %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad_value : f32
+      tensor.yield %pad_value : f32
     } : tensor<?x?xf32> to tensor<?x?xf32>
   return %0 : tensor<?x?xf32>
 }
@@ -71,7 +71,7 @@ func @dynamic_pad_tensor(%input_tensor: tensor<?x?xf32>,
 //       TILE2:         tensor.generate
 //       TILE2:       else
 //       TILE2:         %[[SLICE:.*]] = tensor.extract_slice %[[IN]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
-//       TILE2:         %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]]
+//       TILE2:         %[[PAD:.*]] = tensor.pad %[[SLICE]]
 //       TILE2:       tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][{{.*}}, {{.*}}] [{{.*}}, {{.*}}] [1, 1]
 //       TILE2:   return %[[RESULT]]
 
@@ -86,15 +86,15 @@ func @dynamic_pad_tensor(%input_tensor: tensor<?x?xf32>,
 //       TILE1:       tensor.generate
 //       TILE1:     else
 //       TILE1:       %[[SLICE:.*]] = tensor.extract_slice %[[IN]][0, {{.*}}] [7, {{.*}}] [1, 1]
-//       TILE1:       %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
+//       TILE1:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[3, %{{.*}}] high[5, {{.*}}]
 //       TILE1:     tensor.insert_slice %[[SWAP_RESULT]] into %[[INNER_OUT]][0, {{.*}}] [15, {{.*}}] [1, 1]
 //       TILE1:   return %[[RESULT]]
 
 func @static_pad_tensor(%input_tensor: tensor<7x9xf32>,
                         %pad_value: f32) -> tensor<15x16xf32> {
-  %0 = linalg.pad_tensor %input_tensor low[3, 4] high[5, 3] {
+  %0 = tensor.pad %input_tensor low[3, 4] high[5, 3] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad_value : f32
+      tensor.yield %pad_value : f32
     } : tensor<7x9xf32> to tensor<15x16xf32>
   return %0 : tensor<15x16xf32>
 }
@@ -112,7 +112,7 @@ func @static_pad_tensor(%input_tensor: tensor<7x9xf32>,
 //       TILE1:       scf.yield %[[GEN]] : tensor<14x3xf32>
 //       TILE1:     else
 //       TILE1:       %[[SLICE:.*]] = tensor.extract_slice %arg0[0, %{{.*}}] [7, %{{.*}}] [1, 1] : tensor<7x9xf32> to tensor<7x?xf32>
-//       TILE1:       %[[PAD:.*]] = linalg.pad_tensor %[[SLICE]] low[0, 0] high[7, %{{.*}}]
+//       TILE1:       %[[PAD:.*]] = tensor.pad %[[SLICE]] low[0, 0] high[7, %{{.*}}]
 //       TILE1:       scf.yield %[[PAD]] : tensor<14x3xf32>
 //       TILE1:     %[[R3:.*]] = tensor.insert_slice %[[R2]] into %[[INNER_OUT]][0, %[[IV]]] [14, 3] [1, 1] : tensor<14x3xf32> into tensor<14x15xf32>
 //       TILE1:     scf.yield %[[R3]] : tensor<14x15xf32>
@@ -120,9 +120,9 @@ func @static_pad_tensor(%input_tensor: tensor<7x9xf32>,
 func @static_pad_tile_evenly(%input_tensor: tensor<7x9xf32>,
                              %output_tensor: tensor<14x15xf32>,
                              %pad_value: f32) -> tensor<14x15xf32> {
-  %0 = linalg.pad_tensor %input_tensor low[0, 0] high[7, 6] {
+  %0 = tensor.pad %input_tensor low[0, 0] high[7, 6] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %pad_value : f32
+      tensor.yield %pad_value : f32
     } : tensor<7x9xf32> to tensor<14x15xf32>
   return %0 : tensor<14x15xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/vectorization.mlir b/mlir/test/Dialect/Linalg/vectorization.mlir
index ee3ec0019a840..c9f50af28ef27 100644
--- a/mlir/test/Dialect/Linalg/vectorization.mlir
+++ b/mlir/test/Dialect/Linalg/vectorization.mlir
@@ -537,7 +537,7 @@ func @matmul_tensors(
 
 // CHECK-LABEL: func @pad_static(
 //  CHECK-SAME:                  %[[ARG0:.*]]: tensor<2x?x2xf32>, %[[PAD:.*]]: f32
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
 //   CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
 //   CHECK-DAG:   %[[INIT:.*]] = linalg.init_tensor [2, 3, 4] : tensor<2x3x4xf32>
@@ -547,9 +547,9 @@ func @matmul_tensors(
 //       CHECK:   %[[RESULT:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x3x2xf32>, tensor<2x3x4xf32>
 //       CHECK:   return %[[RESULT]]
 func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
-  %0 = linalg.pad_tensor %arg0 low[0, 0, 2] high[0, 1, 0] {
+  %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] {
     ^bb0(%arg1: index, %arg2: index, %arg3: index):
-      linalg.yield %pad_value : f32
+      tensor.yield %pad_value : f32
     } : tensor<2x?x2xf32> to tensor<2x3x4xf32>
   return %0 : tensor<2x3x4xf32>
 }
@@ -558,7 +558,7 @@ func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32>
 
 // CHECK-LABEL: func @pad_static_source(
 //  CHECK-SAME:                  %[[ARG0:.*]]: tensor<2x5x2xf32>, %[[PAD:.*]]: f32
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
 //   CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
 //       CHECK:   %[[INIT:.*]] = linalg.init_tensor [2, 6, 4] : tensor<2x6x4xf32>
@@ -568,9 +568,9 @@ func @pad_static(%arg0: tensor<2x?x2xf32>, %pad_value: f32) -> tensor<2x3x4xf32>
 //       CHECK:   %[[WRITE:.*]] = vector.transfer_write %[[READ]], %[[FILL]][%[[C0]], %[[C0]], %[[C2]]] {in_bounds = [true, true, true]} : vector<2x5x2xf32>, tensor<2x6x4xf32>
 //       CHECK:   return %[[WRITE]]
 func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6x4xf32> {
-  %0 = linalg.pad_tensor %arg0 low[0, 0, 2] high[0, 1, 0] {
+  %0 = tensor.pad %arg0 low[0, 0, 2] high[0, 1, 0] {
     ^bb0(%arg1: index, %arg2: index, %arg3: index):
-      linalg.yield %pad_value : f32
+      tensor.yield %pad_value : f32
     } : tensor<2x5x2xf32> to tensor<2x6x4xf32>
   return %0 : tensor<2x6x4xf32>
 }
@@ -579,7 +579,7 @@ func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6
 
 // CHECK-LABEL: func @pad_static_dynamic(
 //  CHECK-SAME:                          %[[SRC:.*]]: tensor<1x2x2x?xf32>, %[[LOW:.*]]: index, %[[HIGH:.*]]: index
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //   CHECK-DAG:   %[[C2:.*]] = arith.constant 2 : index
 //   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
 //   CHECK-DAG:   %[[C5:.*]] = arith.constant 5 : index
@@ -596,9 +596,9 @@ func @pad_static_source(%arg0: tensor<2x5x2xf32>, %pad_value: f32) -> tensor<2x6
 //       CHECK:   return %[[RESULT]]
 func @pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index,
                   %pad_value: f32) -> tensor<6x?x?x?xf32> {
-  %0 = linalg.pad_tensor %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] {
+  %0 = tensor.pad %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] {
     ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
-      linalg.yield %pad_value : f32
+      tensor.yield %pad_value : f32
     } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
   return %0 : tensor<6x?x?x?xf32>
 }
@@ -607,7 +607,7 @@ func @pad_static_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index,
 
 // CHECK-LABEL: func @pad_and_transfer_read
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<5x6xf32>
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
 //   CHECK-DAG:   %[[C5:.*]] = arith.constant 5.0
 //       CHECK:   %[[RESULT:.*]] = vector.transfer_read %[[ARG0]][%[[C0]], %[[C0]]], %[[C5]] : tensor<5x6xf32>, vector<7x9xf32>
@@ -616,9 +616,9 @@ func @pad_and_transfer_read(%arg0: tensor<5x6xf32>) -> vector<7x9xf32> {
   %c0 = arith.constant 0 : index
   %c5 = arith.constant 5.0 : f32
   %c6 = arith.constant 6.0 : f32
-  %0 = linalg.pad_tensor %arg0 low[0, 0] high[5, 7] {
+  %0 = tensor.pad %arg0 low[0, 0] high[5, 7] {
     ^bb0(%arg1: index, %arg2: index):
-      linalg.yield %c5 : f32
+      tensor.yield %c5 : f32
   } : tensor<5x6xf32> to tensor<10x13xf32>
   %1 = vector.transfer_read %0[%c0, %c0], %c6
       : tensor<10x13xf32>, vector<7x9xf32>
@@ -631,7 +631,7 @@ func private @make_vector() -> vector<7x9xf32>
 
 // CHECK-LABEL: func @pad_and_transfer_write_static
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<5x6xf32>
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
 //       CHECK:   %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32>
 //       CHECK:   %[[RESULT:.*]] = vector.transfer_write %[[VEC0]], %[[ARG0]][%[[C0]], %[[C0]]] : vector<7x9xf32>, tensor<5x6xf32>
@@ -640,9 +640,9 @@ func @pad_and_transfer_write_static(
     %arg0: tensor<5x6xf32>) -> tensor<5x6xf32> {
   %c0 = arith.constant 0 : index
   %c5 = arith.constant 5.0 : f32
-  %0 = linalg.pad_tensor %arg0 low[0, 0] high[5, 7] {
+  %0 = tensor.pad %arg0 low[0, 0] high[5, 7] {
     ^bb0(%arg2: index, %arg3: index):
-      linalg.yield %c5 : f32
+      tensor.yield %c5 : f32
   } : tensor<5x6xf32> to tensor<10x13xf32>
   %1 = call @make_vector() : () -> vector<7x9xf32>
   %2 = vector.transfer_write %1, %0[%c0, %c0]
@@ -657,7 +657,7 @@ func private @make_vector() -> vector<7x9xf32>
 
 // CHECK-LABEL: func @pad_and_transfer_write_dynamic_static
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<?x?xf32>, %[[SIZE:.*]]: index, %[[PADDING:.*]]: index
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //       CHECK:   %[[C0:.*]] = arith.constant 0 : index
 //       CHECK:   %[[SUB:.*]] = tensor.extract_slice %[[ARG0]][0, 0] [%[[SIZE]], 6] [1, 1] : tensor<?x?xf32> to tensor<?x6xf32>
 //       CHECK:   %[[VEC0:.*]] = call @make_vector() : () -> vector<7x9xf32>
@@ -669,9 +669,9 @@ func @pad_and_transfer_write_dynamic_static(
   %c5 = arith.constant 5.0 : f32
   %s = tensor.extract_slice %arg0[0, 0] [%size, 6] [1, 1]
       : tensor<?x?xf32> to tensor<?x6xf32>
-  %0 = linalg.pad_tensor %s low[0, 0] high[%padding, 7] {
+  %0 = tensor.pad %s low[0, 0] high[%padding, 7] {
     ^bb0(%arg2: index, %arg3: index):
-      linalg.yield %c5 : f32
+      tensor.yield %c5 : f32
   } : tensor<?x6xf32> to tensor<?x13xf32>
   %1 = call @make_vector() : () -> vector<7x9xf32>
   %2 = vector.transfer_write %1, %0[%c0, %c0]
@@ -686,7 +686,7 @@ func private @make_vector() -> tensor<12x13xf32>
 
 // CHECK-LABEL: func @pad_and_insert_slice_source
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<5x6xf32>
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
 //   CHECK-DAG:   %[[C5:.*]] = arith.constant 5.0
 //       CHECK:   %[[VEC0:.*]] = call @make_vector() : () -> tensor<12x13xf32>
@@ -697,9 +697,9 @@ func @pad_and_insert_slice_source(
     %arg0: tensor<5x6xf32>) -> tensor<12x13xf32> {
   %c0 = arith.constant 0 : index
   %c5 = arith.constant 5.0 : f32
-  %0 = linalg.pad_tensor %arg0 low[0, 0] high[2, 3] {
+  %0 = tensor.pad %arg0 low[0, 0] high[2, 3] {
     ^bb0(%arg2: index, %arg3: index):
-      linalg.yield %c5 : f32
+      tensor.yield %c5 : f32
   } : tensor<5x6xf32> to tensor<7x9xf32>
   %1 = call @make_vector() : () -> tensor<12x13xf32>
   %r = tensor.insert_slice %0 into %1[0, 0][7, 9][1, 1] : tensor<7x9xf32> into tensor<12x13xf32>
@@ -717,9 +717,9 @@ func private @make_vector() -> tensor<12x13xf32>
 func @pad_and_insert_slice_dest(
     %arg0: tensor<1x5x6xf32>) -> tensor<1x12x13xf32> {
   %c5 = arith.constant 5.0 : f32
-  %0 = linalg.pad_tensor %arg0 low[0, 0, 0] high[0, 7, 7] {
+  %0 = tensor.pad %arg0 low[0, 0, 0] high[0, 7, 7] {
     ^bb0(%arg2: index, %arg3: index, %arg4: index):
-      linalg.yield %c5 : f32
+      tensor.yield %c5 : f32
   } : tensor<1x5x6xf32> to tensor<1x12x13xf32>
   %1 = call @make_vector() : () -> tensor<12x13xf32>
   %r = tensor.insert_slice %1 into %0[0, 0, 0][1, 12, 13][1, 1, 1] : tensor<12x13xf32> into tensor<1x12x13xf32>
@@ -730,7 +730,7 @@ func @pad_and_insert_slice_dest(
 
 // CHECK-LABEL: func @pad_tensor_non_const_pad_value
 //  CHECK-SAME:     %[[ARG0:.*]]: tensor<5x6xf32>
-//   CHECK-NOT:   linalg.pad_tensor
+//   CHECK-NOT:   tensor.pad
 //   CHECK-DAG:   %[[C0:.*]] = arith.constant 0 : index
 //   CHECK-DAG:   %[[C3:.*]] = arith.constant 3 : index
 //   CHECK-DAG:   %[[C4:.*]] = arith.constant 4 : index
@@ -743,14 +743,14 @@ func @pad_and_insert_slice_dest(
 func @pad_tensor_non_const_pad_value(%arg0: tensor<5x6xf32>) -> tensor<12x13xf32> {
   %c0 = arith.constant 0 : index
   %c5 = arith.constant 5.0 : f32
-  %0 = linalg.pad_tensor %arg0 low[3, 4] high[4, 3] {
+  %0 = tensor.pad %arg0 low[3, 4] high[4, 3] {
     ^bb0(%arg1: index, %arg2: index):
       %i1 = arith.index_cast %arg1 : index to i32
       %i2 = arith.index_cast %arg2 : index to i32
       %f1 = arith.sitofp %i1 : i32 to f32
       %f2 = arith.sitofp %i2 : i32 to f32
       %m = arith.mulf %f1, %f2 : f32
-      linalg.yield %m : f32
+      tensor.yield %m : f32
   } : tensor<5x6xf32> to tensor<12x13xf32>
   return %0 : tensor<12x13xf32>
 }
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
index 82f880d098fdc..10d39132a1126 100644
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -982,3 +982,199 @@ func @fold_rank() -> (index) {
   // CHECK-NEXT: return [[C3]]
   return %rank_0 : index
 }
+
+// -----
+
+// CHECK-LABEL: func @pad_tensor_same_static_shape(
+//  CHECK-SAME:   %[[ARG0:.*]]: tensor<5x6xf32>
+//   CHECK-NOT:   tensor.pad
+//       CHECK:   return %[[ARG0]]
+func @pad_tensor_same_static_shape(%arg0: tensor<5x6xf32>, %a: index)
+    -> tensor<5x6xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %0 = tensor.pad %arg0 low[%a, 0] high[0, %a] {
+        ^bb0(%arg1: index, %arg2: index):
+          tensor.yield %cst : f32
+  } : tensor<5x6xf32> to tensor<5x6xf32>
+  return %0 : tensor<5x6xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @pad_tensor_nofold_same_static_shape(
+//  CHECK-SAME:   %[[ARG0:.*]]: tensor<5x6xf32>
+//       CHECK:   %[[PAD:.*]] = tensor.pad
+//       CHECK:   return %[[PAD]]
+func @pad_tensor_nofold_same_static_shape(%arg0: tensor<5x6xf32>, %a: index)
+    -> tensor<5x6xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %0 = tensor.pad %arg0 nofold low[%a, 0] high[0, %a] {
+        ^bb0(%arg1: index, %arg2: index):
+          tensor.yield %cst : f32
+  } : tensor<5x6xf32> to tensor<5x6xf32>
+  return %0 : tensor<5x6xf32>
+}
+
+// -----
+
+// CHECK-LABEL:   func @pad_tensor_after_cast_different_shape(
+// CHECK-SAME:      %[[INPUT:.*]]: tensor<?x64x?x?xf32>) -> tensor<?x?x?x?xf32> {
+// CHECK:           %[[CST:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[PADDED:.*]] = tensor.pad %[[INPUT]]
+// CHECK-SAME:        low[0, 0, 1, 1] high[0, 0, 1, 1]  {
+// CHECK:           ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index):
+// CHECK:             tensor.yield %[[CST]] : f32
+// CHECK:           } : tensor<?x64x?x?xf32> to tensor<?x64x?x?xf32>
+// CHECK:           %[[DYNAMIC:.*]] = tensor.cast %[[PADDED:.*]] :
+// CHECK-SAME:         tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
+// CHECK:           return %[[DYNAMIC]] : tensor<?x?x?x?xf32>
+// CHECK:         }
+func @pad_tensor_after_cast_different_shape(%arg0: tensor<?x64x?x?xf32>)
+    -> tensor<?x?x?x?xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %dynamic = tensor.cast %arg0 : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
+  %padded = tensor.pad %dynamic low[0, 0, 1, 1] high[0, 0, 1, 1]  {
+    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
+    tensor.yield %cst: f32
+  } : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
+  return %padded: tensor<?x?x?x?xf32>
+}
+
+// -----
+
+// CHECK-LABEL:   func @pad_tensor_after_cast_same_shape(
+// CHECK-SAME:      %[[INPUT:.*]]: tensor<?x64x?x?xf32>,
+// CHECK-SAME:      %[[PADDING:.*]]: index) -> tensor<?x?x?x?xf32> {
+// CHECK:           %[[CST:.*]] = arith.constant 0.000000e+00 : f32
+// CHECK:           %[[PADDED:.*]] = tensor.pad %[[INPUT]]
+// CHECK-SAME:        low[0, %[[PADDING]], 1, 1] high[0, %[[PADDING]], 1, 1]  {
+// CHECK:           ^bb0(%[[ARG1:.*]]: index, %[[ARG2:.*]]: index, %[[ARG3:.*]]: index, %[[ARG4:.*]]: index):
+// CHECK:             tensor.yield %[[CST]] : f32
+// CHECK:           } : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
+// CHECK:           return %[[PADDED:.*]] : tensor<?x?x?x?xf32>
+// CHECK:         }
+func @pad_tensor_after_cast_same_shape(%arg0: tensor<?x64x?x?xf32>, %padding : index)
+    -> tensor<?x?x?x?xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %dynamic = tensor.cast %arg0 : tensor<?x64x?x?xf32> to tensor<?x?x?x?xf32>
+  %padded = tensor.pad %dynamic low[0, %padding, 1, 1] high[0, %padding, 1, 1]  {
+    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
+    tensor.yield %cst: f32
+  } : tensor<?x?x?x?xf32> to tensor<?x?x?x?xf32>
+  return %padded: tensor<?x?x?x?xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @pad_tensor_of_cast(
+// CHECK-NOT:     tensor.cast
+// CHECK:         tensor.pad
+// CHECK:         tensor<8x?xf32> to tensor<8x32xf32>
+func @pad_tensor_of_cast(%t: tensor<8x?xf32>, %s: index) -> tensor<8x32xf32> {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.000000e+00 : f32
+  %0 = tensor.cast %t : tensor<8x?xf32> to tensor<?x?xf32>
+  %1 = tensor.pad %0 low[%c0, %c0] high[%c0, %s]  {
+  ^bb0(%arg9: index, %arg10: index):
+    tensor.yield %cst : f32
+  } : tensor<?x?xf32> to tensor<8x32xf32>
+  return %1 : tensor<8x32xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @cast_of_pad_more_static
+func @cast_of_pad_more_static(%arg0: tensor<?x?xf32>, %padding: index) -> tensor<32x32xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  // CHECK: %[[PAD:.*]] = tensor.pad
+  // CHECK: tensor<?x?xf32> to tensor<32x32xf32>
+  %padded = tensor.pad %arg0 low[%padding, %padding] high[0, 0] {
+  ^bb0(%arg1: index, %arg2: index):
+    tensor.yield %cst : f32
+  } : tensor<?x?xf32> to tensor<?x?xf32>
+  // CHECK-NOT: tensor.cast
+  %casted = tensor.cast %padded : tensor<?x?xf32> to tensor<32x32xf32>
+  // CHECK: return %[[PAD]]
+  return %casted : tensor<32x32xf32>
+}
+
+// -----
+
+// CHECK-LABEL: @cast_of_pad_less_static
+func @cast_of_pad_less_static(%arg0: tensor<32x?x?xf32>, %padding: index) -> tensor<?x32x32xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  // CHECK: tensor.pad
+  %padded = tensor.pad %arg0 low[%padding, %padding, %padding] high[0, 0, 0] {
+  ^bb0(%arg1: index, %arg2: index, %arg3: index):
+    tensor.yield %cst : f32
+  } : tensor<32x?x?xf32> to tensor<32x?x?xf32>
+  // CHECK: %[[CAST:.*]] = tensor.cast
+  %casted = tensor.cast %padded : tensor<32x?x?xf32> to tensor<?x32x32xf32>
+  // CHECK: return %[[CAST]]
+  return %casted : tensor<?x32x32xf32>
+}
+
+// -----
+
+func @tensor_pad_cast_fold(%arg0: tensor<4x4xf32>) -> tensor<4x4xf32> {
+  %c0 = arith.constant 0 : index
+  %cst = arith.constant 0.0 : f32
+  %0 = tensor.cast %arg0 : tensor<4x4xf32> to tensor<?x?xf32>
+  %1 = tensor.pad %0 low[%c0, %c0] high[%c0, %c0]  {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %cst : f32
+  } : tensor<?x?xf32> to tensor<4x4xf32>
+  return %1 : tensor<4x4xf32>
+}
+// CHECK-LABEL: @tensor_pad_cast
+// CHECK-SAME: %[[ARG0:.+]]: tensor<4x4xf32>
+// CHECK: return %[[ARG0]]
+
+// -----
+
+// CHECK-LABEL: func @fold_pad_tensor_source_cast(
+//  CHECK-SAME:                  %[[ARG0:.*]]: tensor<4x?xf32>
+//   CHECK-NOT:   tensor.cast
+//       CHECK:   %[[RESULT:.*]] = tensor.pad %[[ARG0]]
+func @fold_pad_tensor_source_cast(%arg0: tensor<4x?xf32>) -> tensor<4x4xf32> {
+  %cst = arith.constant 0.0 : f32
+  %0 = tensor.cast %arg0 : tensor<4x?xf32> to tensor<?x?xf32>
+  %1 = tensor.pad %0 low[0, 0] high[0, 1]  {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %cst : f32
+  } : tensor<?x?xf32> to tensor<4x4xf32>
+  return %1 : tensor<4x4xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @pad_static_zero_cast(
+//  CHECK-SAME:                  %[[ARG0:.*]]: tensor<?x?x?xf32>
+//   CHECK-NOT:   tensor.pad
+//       CHECK:   %[[RESULT:.*]] = tensor.cast %[[ARG0]] : tensor<?x?x?xf32> to tensor<2x3x4xf32>
+//       CHECK:   return %[[RESULT]]
+func @pad_static_zero_cast(%arg0: tensor<?x?x?xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
+  %c0 = arith.constant 0 : index
+  %0 = tensor.pad %arg0 low[0, %c0, 0] high[0, 0, %c0] {
+    ^bb0(%arg1: index, %arg2: index, %arg3: index):
+      tensor.yield %pad_value : f32
+    } : tensor<?x?x?xf32> to tensor<2x3x4xf32>
+
+  return %0 : tensor<2x3x4xf32>
+}
+
+// -----
+
+// CHECK-LABEL: func @pad_nofold_static_zero(
+//  CHECK-SAME:                  %[[ARG0:.*]]: tensor<?x?x?xf32>
+//       CHECK:   %[[PAD:.*]] = tensor.pad
+//       CHECK:   return %[[PAD]]
+func @pad_nofold_static_zero(%arg0: tensor<?x?x?xf32>, %pad_value: f32) -> tensor<2x3x4xf32> {
+  %c0 = arith.constant 0 : index
+  %0 = tensor.pad %arg0 nofold low[0, %c0, 0] high[0, 0, %c0] {
+    ^bb0(%arg1: index, %arg2: index, %arg3: index):
+      tensor.yield %pad_value : f32
+    } : tensor<?x?x?xf32> to tensor<2x3x4xf32>
+
+  return %0 : tensor<2x3x4xf32>
+}
diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir
index 8cdab35fb5e20..cec4718595a45 100644
--- a/mlir/test/Dialect/Tensor/invalid.mlir
+++ b/mlir/test/Dialect/Tensor/invalid.mlir
@@ -317,3 +317,58 @@ func @illegal_num_offsets(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?x?xf32>,
   %0 = tensor.insert_slice %arg0 into %arg1[0, 0] [%arg2, %arg3] [1, 1] : tensor<?x?xf32> into tensor<?x?x?xf32>
   return
 }
+
+// -----
+
+
+func @pad_result_type(%arg0: tensor<?x2x3x4xi32>, %arg1: index, %arg2: i32) -> tensor<?x?x?x8xf32> {
+  // expected-error @+1 {{specified type 'tensor<?x?x?x8xf32>' does not match the inferred type 'tensor<?x?x?x9xi32>}}
+  %0 = tensor.pad %arg0 low[1, %arg1, 2, 2] high[1, 2, %arg1, 3] {
+  ^bb0(%arg3: index, %arg4: index):
+    tensor.yield %arg2 : i32
+  } : tensor<?x2x3x4xi32> to tensor<?x?x?x8xf32>
+  return %0 : tensor<?x?x?x8xf32>
+}
+
+// -----
+
+func @pad_number_of_block_args(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
+  // expected-error @+1 {{expected the block to have 2 arguments}}
+  %0 = tensor.pad %arg0 low[1, 2] high[2, 3] {
+  ^bb0(%arg2: index, %arg3: index, %arg4: index):
+    tensor.yield %arg1 : i32
+  } : tensor<?x4xi32> to tensor<?x9xi32>
+  return %0 : tensor<?x9xi32>
+}
+
+// -----
+
+func @pad_no_block(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
+  // expected-error @+1 {{op region #0 ('region') failed to verify constraint: region with 1 blocks}}
+  %0 = tensor.pad %arg0 low[1, 2] high[2, 3] {
+  } : tensor<?x4xi32> to tensor<?x9xi32>
+  return %0 : tensor<?x9xi32>
+}
+
+// -----
+
+func @pad_block_args(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
+  // expected-error @+1 {{op expected block argument 1 to be an index}}
+  %0 = tensor.pad %arg0 low[1, 2] high[2, 3] {
+  ^bb0(%arg2: i32, %arg3: i32):
+    tensor.yield %arg1 : i32
+  } : tensor<?x4xi32> to tensor<?x9xi32>
+  return %0 : tensor<?x9xi32>
+}
+
+// -----
+
+func @pad_yield_type(%arg0: tensor<?x4xi32>, %arg1: i8) -> tensor<?x9xi32> {
+  // expected-error @+1 {{op expected yield type to match shape element type}}
+  %0 = tensor.pad %arg0 low[1, 2] high[2, 3] {
+  ^bb0(%arg2: index, %arg3: index):
+    tensor.yield %arg1 : i8
+  } : tensor<?x4xi32> to tensor<?x9xi32>
+  return %0 : tensor<?x9xi32>
+}
+
diff --git a/mlir/test/Dialect/Tensor/ops.mlir b/mlir/test/Dialect/Tensor/ops.mlir
index d461dffeb6d5b..a76a18d190b57 100644
--- a/mlir/test/Dialect/Tensor/ops.mlir
+++ b/mlir/test/Dialect/Tensor/ops.mlir
@@ -176,3 +176,77 @@ func @rank(%t : tensor<4x4x?xf32>) {
   %1 = tensor.rank %t : tensor<4x4x?xf32>
   return
 }
+
+// -----
+
+func @pad_dynamic(%arg0: tensor<1x2x2x?xf32>, %low: index, %high: index,
+                  %pad_value: f32) -> tensor<6x?x?x?xf32> {
+  %0 = tensor.pad %arg0 low[2, %low, 3, 3] high[3, 3, %high, 2] {
+    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
+      tensor.yield %pad_value : f32
+    } : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
+  return %0 : tensor<6x?x?x?xf32>
+}
+// CHECK-LABEL: func @pad_dynamic
+//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
+//  CHECK-SAME: %[[LOW:[a-zA-Z0-9_]*]]
+//  CHECK-SAME: %[[HIGH:[a-zA-Z0-9_]*]]
+//       CHECK:   tensor.pad %[[ARG0]]
+//  CHECK-SAME:     low[2, %[[LOW]], 3, 3]
+//  CHECK-SAME:     high[3, 3, %[[HIGH]], 2]
+//       CHECK:    : tensor<1x2x2x?xf32> to tensor<6x?x?x?xf32>
+
+// -----
+
+func @pad_static(%arg0: tensor<3x4xf32>, %pad_value: f32) -> tensor<6x9xf32> {
+  %0 = tensor.pad %arg0 low[1, 2] high[2, 3] {
+    ^bb0(%arg1 : index, %arg2 : index):
+      tensor.yield %pad_value : f32
+    } : tensor<3x4xf32> to tensor<6x9xf32>
+  return %0 : tensor<6x9xf32>
+}
+// CHECK-LABEL: func @pad_static
+//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
+//       CHECK:   tensor.pad %[[ARG0]] low[1, 2] high[2, 3]
+//       CHECK:    : tensor<3x4xf32> to tensor<6x9xf32>
+
+// -----
+
+func @pad_asymmetrical(%arg0: tensor<2x3xf32>, %ub0: index, %ub1: index,
+                       %pad_value: f32) -> tensor<?x?xf32> {
+  %0 = tensor.pad %arg0 low[0, 0] high[%ub0, %ub1] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %pad_value : f32
+    } : tensor<2x3xf32> to tensor<?x?xf32>
+  return %0 : tensor<?x?xf32>
+}
+// CHECK-LABEL: func @pad_asymmetrical
+//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
+//  CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]]
+//  CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]]
+//       CHECK:   tensor.pad %[[ARG0]]
+//  CHECK-SAME:     low[0, 0]
+//  CHECK-SAME:     high[%[[UB0]], %[[UB1]]]
+//       CHECK:    : tensor<2x3xf32> to tensor<?x?xf32>
+
+// -----
+
+func @pad_to_static_size(%arg0: tensor<?x?xf32>, %ub0: index, %ub1: index,
+                         %pad_value: f32) -> tensor<2x3xf32> {
+  %0 = tensor.pad %arg0 low[0, 0] high[%ub0, %ub1] {
+    ^bb0(%arg1: index, %arg2: index):
+      tensor.yield %pad_value : f32
+    } : tensor<?x?xf32> to tensor<2x3xf32>
+  return %0 : tensor<2x3xf32>
+}
+// CHECK-LABEL: func @pad_to_static_size
+//  CHECK-SAME: %[[ARG0:[a-zA-Z0-9_]*]]
+//  CHECK-SAME: %[[UB0:[a-zA-Z0-9_]*]]
+//  CHECK-SAME: %[[UB1:[a-zA-Z0-9_]*]]
+//       CHECK:   tensor.pad %[[ARG0]]
+//  CHECK-SAME:     low[0, 0]
+//  CHECK-SAME:     high[%[[UB0]], %[[UB1]]]
+//       CHECK:    : tensor<?x?xf32> to tensor<2x3xf32>
+
+// -----
+
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
index 58a4b7630dcc1..d840491b89984 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-comprehensive-bufferize.mlir
@@ -21,9 +21,9 @@ func @init_and_dot(%arg0: tensor<64xf32>, %arg1: tensor<64xf32>, %arg2: tensor<f
     %8 = affine.apply #map1(%arg3, %c0)[%c2]
     %9 = tensor.extract_slice %arg1[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32>
     %10 = tensor.cast %9 : tensor<2xf32> to tensor<?xf32>
-    %11 = linalg.pad_tensor %10 low[%c0] high[%c0]  {
+    %11 = tensor.pad %10 low[%c0] high[%c0]  {
     ^bb0(%arg5: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
     } : tensor<?xf32> to tensor<2xf32>
     %12 = tensor.insert_slice %11 into %arg4[%8, 0] [1, 2] [1, 1] : tensor<2xf32> into tensor<?x2xf32>
     scf.yield %12 : tensor<?x2xf32>
@@ -38,9 +38,9 @@ func @init_and_dot(%arg0: tensor<64xf32>, %arg1: tensor<64xf32>, %arg2: tensor<f
     %8 = affine.apply #map1(%arg3, %c0)[%c2]
     %9 = tensor.extract_slice %arg0[%arg3] [2] [1] : tensor<64xf32> to tensor<2xf32>
     %10 = tensor.cast %9 : tensor<2xf32> to tensor<?xf32>
-    %11 = linalg.pad_tensor %10 low[%c0] high[%c0]  {
+    %11 = tensor.pad %10 low[%c0] high[%c0]  {
     ^bb0(%arg5: index):  
-      linalg.yield %cst : f32
+      tensor.yield %cst : f32
     } : tensor<?xf32> to tensor<2xf32>
     %12 = tensor.insert_slice %11 into %arg4[%8, 0] [1, 2] [1, 1] : tensor<2xf32> into tensor<?x2xf32>
     scf.yield %12 : tensor<?x2xf32>
diff --git a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
index 05b7e1a7d2cac..ced7a49073b37 100644
--- a/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
+++ b/mlir/test/Integration/Dialect/Linalg/CPU/test-padtensor.mlir
@@ -13,9 +13,9 @@ func @main() {
   %offset = arith.constant 2 : index
   %cst = arith.constant 2.3 : f32
   %c0 = arith.constant 0 : index
-  %out = linalg.pad_tensor %dynamic low[%c0, %offset, %c0] high[%c0, %c0, %offset]  {
+  %out = tensor.pad %dynamic low[%c0, %offset, %c0] high[%c0, %c0, %offset]  {
   ^bb0(%gen_arg1: index, %gen_arg2: index, %gen_arg3: index):
-    linalg.yield %cst : f32
+    tensor.yield %cst : f32
   } : tensor<1x?x3xf32> to tensor<1x?x?xf32>
   %unranked = tensor.cast %out: tensor<1x?x?xf32> to tensor<*xf32>
   call @print_memref_f32(%unranked) : (tensor<*xf32>) -> ()
diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
index 76d983d3892e4..eef99f82fcb3f 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgTransforms.cpp
@@ -42,6 +42,7 @@ struct TestLinalgTransforms
                     memref::MemRefDialect,
                     scf::SCFDialect,
                     StandardOpsDialect,
+                    linalg::LinalgDialect,
                     vector::VectorDialect,
                     gpu::GPUDialect>();
     // clang-format on
@@ -549,20 +550,20 @@ static void applyLinalgToVectorPatterns(FuncOp funcOp) {
       funcOp.getContext(),
       LinalgTransformationFilter()
           .addOpFilter<ContractionOpInterface, FillOp, CopyOp, GenericOp>());
-  populatePadTensorOpVectorizationPatterns(patterns);
+  populatePadOpVectorizationPatterns(patterns);
   populateConvolutionVectorizationPatterns(patterns);
   (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
 }
 
 static void applyPadTensorToGenericPatterns(FuncOp funcOp) {
   RewritePatternSet patterns(funcOp.getContext());
-  patterns.add<PadTensorOpTransformationPattern>(funcOp.getContext());
+  patterns.add<PadOpTransformationPattern>(funcOp.getContext());
   (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
 }
 
 static void applyGeneralizePadTensorPatterns(FuncOp funcOp) {
   RewritePatternSet patterns(funcOp.getContext());
-  patterns.add<GeneralizePadTensorOpPattern>(funcOp.getContext());
+  patterns.add<GeneralizePadOpPattern>(funcOp.getContext());
   (void)applyPatternsAndFoldGreedily(funcOp, std::move(patterns));
 }
 
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index cd4d2e195d03a..e6b4654097d2c 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -4280,6 +4280,7 @@ td_library(
         ":InferTypeOpInterfaceTdFiles",
         ":OpBaseTdFiles",
         ":SideEffectInterfacesTdFiles",
+        ":TilingInterfaceTdFiles",
         ":ViewLikeInterfaceTdFiles",
     ],
 )
@@ -4336,6 +4337,7 @@ cc_library(
         ":StandardOps",
         ":Support",
         ":TensorOpsIncGen",
+        ":TilingInterface",
         ":ViewLikeInterface",
         "//llvm:Support",
     ],
@@ -4356,6 +4358,38 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "TensorTilingInterfaceImpl",
+    srcs = ["lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp"],
+    hdrs = ["include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h"],
+    includes = ["include"],
+    deps = [
+        ":Affine",
+        ":IR",
+        ":LinalgOps",
+        ":SCFDialect",
+        ":StandardOps",
+        ":TensorDialect",
+        ":TilingInterface",
+        "//llvm:Support",
+    ],
+)
+
+cc_library(
+    name = "TensorUtils",
+    srcs = ["lib/Dialect/Tensor/Utils/Utils.cpp"],
+    hdrs = ["include/mlir/Dialect/Tensor/Utils/Utils.h"],
+    includes = ["include"],
+    deps = [
+        ":Affine",
+        ":ArithmeticDialect",
+        ":IR",
+        ":Support",
+        ":TensorDialect",
+        "//llvm:Support",
+    ],
+)
+
 gentbl_cc_library(
     name = "TensorPassIncGen",
     strip_include_prefix = "include",
@@ -5634,6 +5668,7 @@ cc_library(
         ":StandardToSPIRV",
         ":TensorDialect",
         ":TensorInferTypeOpInterfaceImpl",
+        ":TensorTilingInterfaceImpl",
         ":TensorTransforms",
         ":TosaDialect",
         ":TosaToLinalg",
@@ -6913,6 +6948,7 @@ cc_library(
         ":Support",
         ":TensorBufferizableOpInterfaceImpl",
         ":TensorDialect",
+        ":TensorUtils",
         ":TransformUtils",
         ":VectorBufferizableOpInterfaceImpl",
         ":VectorOps",
@@ -6952,7 +6988,6 @@ cc_library(
     deps = [
         ":IR",
         ":Support",
-        ":TensorDialect",
         ":TilingInterfaceIncGen",
         ":ViewLikeInterface",
         "//llvm:Support",
@@ -7255,6 +7290,7 @@ cc_library(
         ":SCFDialect",
         ":StandardOps",
         ":TensorDialect",
+        ":TensorUtils",
         ":TosaDialect",
         ":Transforms",
     ],

From 3c90ae5d0b7137315e1c60734bc32b4356f987d4 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Fri, 21 Jan 2022 20:34:17 +0100
Subject: [PATCH 206/946] Revert "[flang] Update tco tool pipline and add
 translation to LLVM IR"

This reverts commit 68db0e25df4b1edaa2c6080eb88453ab01ea01d3.
---
 .../include/flang/Optimizer/CodeGen/CodeGen.h |  10 +-
 .../include/flang/Optimizer/Support/InitFIR.h |  12 +-
 flang/include/flang/Tools/CLOptions.inc       | 160 ------------------
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       |  37 ----
 flang/lib/Optimizer/Support/CMakeLists.txt    |   1 -
 flang/lib/Optimizer/Support/InitFIR.cpp       |  20 ---
 flang/test/Fir/basic-program.fir              |  12 --
 flang/tools/tco/CMakeLists.txt                |  20 +--
 flang/tools/tco/tco.cpp                       |  45 +----
 9 files changed, 15 insertions(+), 302 deletions(-)
 delete mode 100644 flang/include/flang/Tools/CLOptions.inc
 delete mode 100644 flang/lib/Optimizer/Support/InitFIR.cpp
 delete mode 100644 flang/test/Fir/basic-program.fir

diff --git a/flang/include/flang/Optimizer/CodeGen/CodeGen.h b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
index 939d6aebb524d..1bd31b207859a 100644
--- a/flang/include/flang/Optimizer/CodeGen/CodeGen.h
+++ b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
@@ -12,8 +12,6 @@
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassRegistry.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/raw_ostream.h"
 #include <memory>
 
 namespace fir {
@@ -38,13 +36,9 @@ std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>> createFirTargetRewritePass(
 /// Convert FIR to the LLVM IR dialect
 std::unique_ptr<mlir::Pass> createFIRToLLVMPass();
 
-using LLVMIRLoweringPrinter =
-    std::function<void(llvm::Module &, llvm::raw_ostream &)>;
 /// Convert the LLVM IR dialect to LLVM-IR proper
-std::unique_ptr<mlir::Pass> createLLVMDialectToLLVMPass(
-    llvm::raw_ostream &output,
-    LLVMIRLoweringPrinter printer =
-        [](llvm::Module &m, llvm::raw_ostream &out) { m.print(out, nullptr); });
+std::unique_ptr<mlir::Pass>
+createLLVMDialectToLLVMPass(llvm::raw_ostream &output);
 
 // declarative passes
 #define GEN_PASS_REGISTRATION
diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h
index 2e8c1685a06f7..e78967de2a383 100644
--- a/flang/include/flang/Optimizer/Support/InitFIR.h
+++ b/flang/include/flang/Optimizer/Support/InitFIR.h
@@ -13,6 +13,7 @@
 #ifndef FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
 #define FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
 
+#include "flang/Optimizer/CodeGen/CodeGen.h"
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "mlir/Conversion/Passes.h"
 #include "mlir/Dialect/Affine/Passes.h"
@@ -34,19 +35,11 @@ namespace fir::support {
 #define FLANG_DIALECT_LIST                                                     \
   FLANG_NONCODEGEN_DIALECT_LIST, FIRCodeGenDialect, mlir::LLVM::LLVMDialect
 
-inline void registerNonCodegenDialects(mlir::DialectRegistry &registry) {
-  registry.insert<FLANG_NONCODEGEN_DIALECT_LIST>();
-}
-
 /// Register all the dialects used by flang.
 inline void registerDialects(mlir::DialectRegistry &registry) {
   registry.insert<FLANG_DIALECT_LIST>();
 }
 
-inline void loadNonCodegenDialects(mlir::MLIRContext &context) {
-  context.loadDialect<FLANG_NONCODEGEN_DIALECT_LIST>();
-}
-
 /// Forced load of all the dialects used by flang.  Lowering is not an MLIR
 /// pass, but a producer of FIR and MLIR. It is therefore a requirement that the
 /// dialects be preloaded to be able to build the IR.
@@ -82,9 +75,6 @@ inline void registerMLIRPassesForFortranTools() {
   mlir::registerConvertAffineToStandardPass();
 }
 
-/// Register the interfaces needed to lower to LLVM IR.
-void registerLLVMTranslation(mlir::MLIRContext &context);
-
 } // namespace fir::support
 
 #endif // FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
deleted file mode 100644
index 1c85075d5cc17..0000000000000
--- a/flang/include/flang/Tools/CLOptions.inc
+++ /dev/null
@@ -1,160 +0,0 @@
-//===-- CLOptions.inc -- command line options -------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-/// This file defines some shared command-line options that can be used when
-/// debugging the test tools. This file must be included into the tool.
-
-#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/Passes.h"
-#include "flang/Optimizer/CodeGen/CodeGen.h"
-#include "flang/Optimizer/Transforms/Passes.h"
-#include "llvm/Support/CommandLine.h"
-
-#define DisableOption(DOName, DOOption, DODescription) \
-  static llvm::cl::opt<bool> disable##DOName("disable-" DOOption, \
-      llvm::cl::desc("disable " DODescription " pass"), llvm::cl::init(false), \
-      llvm::cl::Hidden)
-
-/// Shared option in tools to control whether dynamically sized array
-/// allocations should always be on the heap.
-static llvm::cl::opt<bool> dynamicArrayStackToHeapAllocation(
-    "fdynamic-heap-array",
-    llvm::cl::desc("place all array allocations of dynamic size on the heap"),
-    llvm::cl::init(false), llvm::cl::Hidden);
-
-/// Shared option in tools to set a maximum value for the number of elements in
-/// a compile-time sized array that can be allocated on the stack.
-static llvm::cl::opt<std::size_t> arrayStackAllocationThreshold(
-    "fstack-array-size",
-    llvm::cl::desc(
-        "place all array allocations more than <size> elements on the heap"),
-    llvm::cl::init(~static_cast<std::size_t>(0)), llvm::cl::Hidden);
-
-namespace {
-/// Optimizer Passes
-DisableOption(CfgConversion, "cfg-conversion", "disable FIR to CFG pass");
-DisableOption(FirAvc, "avc", "array value copy analysis and transformation");
-DisableOption(
-    FirMao, "memory-allocation-opt", "memory allocation optimization");
-
-/// CodeGen Passes
-#if !defined(FLANG_EXCLUDE_CODEGEN)
-DisableOption(CodeGenRewrite, "codegen-rewrite", "rewrite FIR for codegen");
-DisableOption(TargetRewrite, "target-rewrite", "rewrite FIR for target");
-DisableOption(FirToLlvmIr, "fir-to-llvmir", "FIR to LLVM-IR dialect");
-DisableOption(LlvmIrToLlvm, "llvm", "conversion to LLVM");
-#endif
-
-/// Generic for adding a pass to the pass manager if it is not disabled.
-template <typename F>
-void addPassConditionally(
-    mlir::PassManager &pm, llvm::cl::opt<bool> &disabled, F ctor) {
-  if (!disabled)
-    pm.addPass(ctor());
-}
-
-template <typename OP, typename F>
-void addNestedPassConditionally(
-    mlir::PassManager &pm, llvm::cl::opt<bool> &disabled, F ctor) {
-  if (!disabled)
-    pm.addNestedPass<OP>(ctor());
-}
-
-} // namespace
-
-namespace fir {
-
-static void defaultFlangInlinerOptPipeline(mlir::OpPassManager &pm) {
-  mlir::GreedyRewriteConfig config;
-  config.enableRegionSimplification = false;
-  pm.addPass(mlir::createCanonicalizerPass(config));
-}
-
-inline void addCfgConversionPass(mlir::PassManager &pm) {
-  addNestedPassConditionally<mlir::FuncOp>(
-      pm, disableCfgConversion, fir::createFirToCfgPass);
-}
-
-inline void addAVC(mlir::PassManager &pm) {
-  addNestedPassConditionally<mlir::FuncOp>(
-      pm, disableFirAvc, fir::createArrayValueCopyPass);
-}
-
-#if !defined(FLANG_EXCLUDE_CODEGEN)
-inline void addCodeGenRewritePass(mlir::PassManager &pm) {
-  addPassConditionally(
-      pm, disableCodeGenRewrite, fir::createFirCodeGenRewritePass);
-}
-
-inline void addTargetRewritePass(mlir::PassManager &pm) {
-  addPassConditionally(pm, disableTargetRewrite, []() {
-    return fir::createFirTargetRewritePass(fir::TargetRewriteOptions{});
-  });
-}
-
-inline void addFIRToLLVMPass(mlir::PassManager &pm) {
-  addPassConditionally(pm, disableFirToLlvmIr, fir::createFIRToLLVMPass);
-}
-
-inline void addLLVMDialectToLLVMPass(
-    mlir::PassManager &pm, llvm::raw_ostream &output) {
-  addPassConditionally(pm, disableLlvmIrToLlvm,
-      [&]() { return fir::createLLVMDialectToLLVMPass(output); });
-}
-#endif
-
-/// Create a pass pipeline for running default optimization passes for
-/// incremental conversion of FIR.
-///
-/// \param pm - MLIR pass manager that will hold the pipeline definition
-inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm) {
-  // simplify the IR
-  mlir::GreedyRewriteConfig config;
-  config.enableRegionSimplification = false;
-  fir::addAVC(pm);
-  pm.addNestedPass<mlir::FuncOp>(fir::createCharacterConversionPass());
-  pm.addPass(mlir::createCanonicalizerPass(config));
-
-  // The default inliner pass adds the canonicalizer pass with the default
-  // configuration. Create the inliner pass with tco config.
-  llvm::StringMap<mlir::OpPassManager> pipelines;
-  pm.addPass(
-      mlir::createInlinerPass(pipelines, defaultFlangInlinerOptPipeline));
-  pm.addPass(mlir::createCSEPass());
-
-  // convert control flow to CFG form
-  fir::addCfgConversionPass(pm);
-  pm.addPass(mlir::createLowerToCFGPass());
-
-  pm.addPass(mlir::createCanonicalizerPass(config));
-}
-
-#if !defined(FLANG_EXCLUDE_CODEGEN)
-inline void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm) {
-  pm.addNestedPass<mlir::FuncOp>(fir::createAbstractResultOptPass());
-  fir::addCodeGenRewritePass(pm);
-  fir::addTargetRewritePass(pm);
-  fir::addFIRToLLVMPass(pm);
-}
-
-/// Create a pass pipeline for lowering from MLIR to LLVM IR
-///
-/// \param pm - MLIR pass manager that will hold the pipeline definition
-inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm) {
-  // Add default optimizer pass pipeline.
-  fir::createDefaultFIROptimizerPassPipeline(pm);
-
-  // Add codegen pass pipeline.
-  fir::createDefaultFIRCodeGenPassPipeline(pm);
-}
-#undef FLANG_EXCLUDE_CODEGEN
-#endif
-
-} // namespace fir
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 40d6d2017b2fa..be2e7cde916df 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -23,7 +23,6 @@
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Target/LLVMIR/ModuleTranslation.h"
 #include "llvm/ADT/ArrayRef.h"
 
 #define DEBUG_TYPE "flang-codegen"
@@ -3306,44 +3305,8 @@ class FIRToLLVMLowering : public fir::FIRToLLVMLoweringBase<FIRToLLVMLowering> {
     }
   }
 };
-
-/// Lower from LLVM IR dialect to proper LLVM-IR and dump the module
-struct LLVMIRLoweringPass
-    : public mlir::PassWrapper<LLVMIRLoweringPass,
-                               mlir::OperationPass<mlir::ModuleOp>> {
-  using Printer = fir::LLVMIRLoweringPrinter;
-  LLVMIRLoweringPass(raw_ostream &output, Printer p)
-      : output{output}, printer{p} {}
-
-  mlir::ModuleOp getModule() { return getOperation(); }
-
-  void runOnOperation() override final {
-    auto *ctx = getModule().getContext();
-    auto optName = getModule().getName();
-    llvm::LLVMContext llvmCtx;
-    if (auto llvmModule = mlir::translateModuleToLLVMIR(
-            getModule(), llvmCtx, optName ? *optName : "FIRModule")) {
-      printer(*llvmModule, output);
-      return;
-    }
-
-    mlir::emitError(mlir::UnknownLoc::get(ctx), "could not emit LLVM-IR\n");
-    signalPassFailure();
-  }
-
-private:
-  raw_ostream &output;
-  Printer printer;
-};
-
 } // namespace
 
 std::unique_ptr<mlir::Pass> fir::createFIRToLLVMPass() {
   return std::make_unique<FIRToLLVMLowering>();
 }
-
-std::unique_ptr<mlir::Pass>
-fir::createLLVMDialectToLLVMPass(raw_ostream &output,
-                                 fir::LLVMIRLoweringPrinter printer) {
-  return std::make_unique<LLVMIRLoweringPass>(output, printer);
-}
diff --git a/flang/lib/Optimizer/Support/CMakeLists.txt b/flang/lib/Optimizer/Support/CMakeLists.txt
index 779e20711513e..30a163de9ccaf 100644
--- a/flang/lib/Optimizer/Support/CMakeLists.txt
+++ b/flang/lib/Optimizer/Support/CMakeLists.txt
@@ -2,7 +2,6 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
 
 add_flang_library(FIRSupport
   FIRContext.cpp
-  InitFIR.cpp
   InternalNames.cpp
   KindMapping.cpp
 
diff --git a/flang/lib/Optimizer/Support/InitFIR.cpp b/flang/lib/Optimizer/Support/InitFIR.cpp
deleted file mode 100644
index baa1336d9ca02..0000000000000
--- a/flang/lib/Optimizer/Support/InitFIR.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-//===-- Optimizer/Support/InitFIR.cpp -------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "flang/Optimizer/Support/InitFIR.h"
-#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
-#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
-
-void fir::support::registerLLVMTranslation(mlir::MLIRContext &context) {
-  mlir::DialectRegistry registry;
-  // Register OpenMP dialect interface here as well.
-  mlir::registerOpenMPDialectTranslation(registry);
-  // Register LLVM-IR dialect interface.
-  registerLLVMDialectTranslation(registry);
-  context.appendDialectRegistry(registry);
-}
diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir
deleted file mode 100644
index b417a6148d39b..0000000000000
--- a/flang/test/Fir/basic-program.fir
+++ /dev/null
@@ -1,12 +0,0 @@
-// RUN: tco %s | FileCheck %s
-// REQUIRES: shell
-
-// Check that tco is working with a basic test.
-
-func @_QQmain() {
-  return
-}
-
-// CHECK: ; ModuleID = 'FIRModule'
-// CHECK-LABEL: define void @_QQmain()
-// CHECK:       ret void
diff --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt
index a64b9c59bd02a..1a9c5ac72f153 100644
--- a/flang/tools/tco/CMakeLists.txt
+++ b/flang/tools/tco/CMakeLists.txt
@@ -1,25 +1,13 @@
-set(LLVM_LINK_COMPONENTS
-  AllTargetsAsmParsers
-  AllTargetsCodeGens
-  AllTargetsDescs
-  AllTargetsInfos
-)
-llvm_map_components_to_libnames(llvm_libs ${LLVM_LINK_COMPONENTS})
-
-add_flang_tool(tco tco.cpp)
-llvm_update_compile_flags(tco)
 get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
-target_link_libraries(tco PRIVATE
+
+set(LIBS
   FIRCodeGen
   FIRDialect
   FIRSupport
   FIRTransforms
-  FIRBuilder
   ${dialect_libs}
   MLIRIR
   MLIRLLVMIR
-  MLIRLLVMToLLVMIRTranslation
-  MLIRTargetLLVMIRExport
   MLIRPass
   MLIRStandardToLLVM
   MLIRTransforms
@@ -30,5 +18,7 @@ target_link_libraries(tco PRIVATE
   MLIRStandardToLLVM
   MLIRSupport
   MLIRVectorToLLVM
-  ${llvm_libs}
 )
+
+add_flang_tool(tco tco.cpp)
+target_link_libraries(tco PRIVATE ${LIBS})
diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp
index 2bb3b27e7eb63..8f2c283bc82f9 100644
--- a/flang/tools/tco/tco.cpp
+++ b/flang/tools/tco/tco.cpp
@@ -11,14 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "flang/Optimizer/CodeGen/CodeGen.h"
-#include "flang/Optimizer/Support/FIRContext.h"
 #include "flang/Optimizer/Support/InitFIR.h"
-#include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Optimizer/Support/KindMapping.h"
-#include "flang/Optimizer/Transforms/Passes.h"
-#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
-#include "mlir/IR/AsmState.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/Parser.h"
@@ -31,13 +25,11 @@
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
-/// list of program return codes
 static cl::opt<std::string>
     inputFilename(cl::Positional, cl::desc("<input file>"), cl::init("-"));
 
@@ -50,14 +42,8 @@ static cl::opt<bool> emitFir("emit-fir",
                              cl::desc("Parse and pretty-print the input"),
                              cl::init(false));
 
-static cl::opt<std::string> targetTriple("target",
-                                         cl::desc("specify a target triple"),
-                                         cl::init("native"));
-
-#include "flang/Tools/CLOptions.inc"
-
 static void printModuleBody(mlir::ModuleOp mod, raw_ostream &output) {
-  for (auto &op : *mod.getBody())
+  for (auto &op : mod.getBody()->without_terminator())
     output << op << '\n';
 }
 
@@ -79,8 +65,6 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
   mlir::DialectRegistry registry;
   fir::support::registerDialects(registry);
   mlir::MLIRContext context(registry);
-  fir::support::loadDialects(context);
-  fir::support::registerLLVMTranslation(context);
   auto owningRef = mlir::parseSourceFile(sourceMgr, &context);
 
   if (!owningRef) {
@@ -96,31 +80,21 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
   ToolOutputFile out(outputFilename, ec, sys::fs::OF_None);
 
   // run passes
-  fir::KindMapping kindMap{&context};
-  fir::setTargetTriple(*owningRef, targetTriple);
-  fir::setKindMapping(*owningRef, kindMap);
-  mlir::PassManager pm(&context, mlir::OpPassManager::Nesting::Implicit);
-  pm.enableVerifier(/*verifyPasses=*/true);
+  mlir::PassManager pm{&context};
   mlir::applyPassManagerCLOptions(pm);
   if (emitFir) {
     // parse the input and pretty-print it back out
     // -emit-fir intentionally disables all the passes
-  } else if (passPipeline.hasAnyOccurrences()) {
-    auto errorHandler = [&](const Twine &msg) {
-      mlir::emitError(mlir::UnknownLoc::get(pm.getContext())) << msg;
-      return mlir::failure();
-    };
-    if (mlir::failed(passPipeline.addToPipeline(pm, errorHandler)))
-      return mlir::failure();
   } else {
-    fir::createMLIRToLLVMPassPipeline(pm);
-    fir::addLLVMDialectToLLVMPass(pm, out.os());
+    // TODO: Actually add passes when added to FIR code base
+    // add all the passes
+    // the user can disable them individually
   }
 
   // run the pass manager
   if (mlir::succeeded(pm.run(*owningRef))) {
     // passes ran successfully, so keep the output
-    if (emitFir || passPipeline.hasAnyOccurrences())
+    if (emitFir)
       printModuleBody(*owningRef, out.os());
     out.keep();
     return mlir::success();
@@ -133,13 +107,8 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
 }
 
 int main(int argc, char **argv) {
-  [[maybe_unused]] InitLLVM y(argc, argv);
   fir::support::registerMLIRPassesForFortranTools();
-  fir::registerOptCodeGenPasses();
-  fir::registerOptTransformPasses();
-  InitializeAllTargets();
-  mlir::registerAsmPrinterCLOptions();
-  mlir::registerMLIRContextCLOptions();
+  [[maybe_unused]] InitLLVM y(argc, argv);
   mlir::registerPassManagerCLOptions();
   mlir::PassPipelineCLParser passPipe("", "Compiler passes to run");
   cl::ParseCommandLineOptions(argc, argv, "Tilikum Crossing Optimizer\n");

From 48132bb1e437f213e7a183a6a69e7589abe33af6 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Fri, 21 Jan 2022 11:30:49 -0800
Subject: [PATCH 207/946] [RISCV] Simplify interface to combineMUL_VLToVWMUL.
 NFC

Instead of passing the both the SDNode* and 2 of the operands
in two different orders, just pass the SDNode * and a bool to
indicate which operand order to test.

While there rename to combineMUL_VLToVWMUL_VL.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4aba42b014d17..e1f1c49094424 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7242,9 +7242,14 @@ static SDValue performANY_EXTENDCombine(SDNode *N,
 
 // Try to form VWMUL or VWMULU.
 // FIXME: Support VWMULSU.
-static SDValue combineMUL_VLToVWMUL(SDNode *N, SDValue Op0, SDValue Op1,
-                                    SelectionDAG &DAG) {
+static SDValue combineMUL_VLToVWMUL_VL(SDNode *N, SelectionDAG &DAG,
+                                       bool Commute) {
   assert(N->getOpcode() == RISCVISD::MUL_VL && "Unexpected opcode");
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  if (Commute)
+    std::swap(Op0, Op1);
+
   bool IsSignExt = Op0.getOpcode() == RISCVISD::VSEXT_VL;
   bool IsZeroExt = Op0.getOpcode() == RISCVISD::VZEXT_VL;
   if ((!IsSignExt && !IsZeroExt) || !Op0.hasOneUse())
@@ -7887,15 +7892,11 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
     }
     break;
   }
-  case RISCVISD::MUL_VL: {
-    SDValue Op0 = N->getOperand(0);
-    SDValue Op1 = N->getOperand(1);
-    if (SDValue V = combineMUL_VLToVWMUL(N, Op0, Op1, DAG))
+  case RISCVISD::MUL_VL:
+    if (SDValue V = combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ false))
       return V;
-    if (SDValue V = combineMUL_VLToVWMUL(N, Op1, Op0, DAG))
-      return V;
-    return SDValue();
-  }
+    // Mul is commutative.
+    return combineMUL_VLToVWMUL_VL(N, DAG, /*Commute*/ true);
   case ISD::STORE: {
     auto *Store = cast<StoreSDNode>(N);
     SDValue Val = Store->getValue();

From d6e2c95d2252f479d5bc7a74df70f90eba94b34d Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Fri, 21 Jan 2022 18:00:33 +0100
Subject: [PATCH 208/946] [libc++] Use addressof in unordered_map.

This addresses the usage of `operator&` in `<unordered_map>`.

(Note there are still more headers with the same issue.)

Reviewed By: #libc, Quuxplusone, ldionne

Differential Revision: https://reviews.llvm.org/D117393
---
 libcxx/include/__hash_table                   | 40 +++++++--------
 libcxx/include/unordered_map                  | 21 ++++----
 ...rator.operators.addressof.compile.pass.cpp | 49 +++++++++++++++++++
 .../assign_move.addressof.compile.pass.cpp    | 42 ++++++++++++++++
 .../move.addressof.compile.pass.cpp           | 33 +++++++++++++
 .../move_alloc.addressof.compile.pass.cpp     | 36 ++++++++++++++
 .../emplace_hint.addressof.compile.pass.cpp   | 30 ++++++++++++
 ...rase_const_iter.addressof.compile.pass.cpp | 27 ++++++++++
 .../erase_range.addressof.compile.pass.cpp    | 27 ++++++++++
 ...nt_const_lvalue.addressof.compile.pass.cpp | 28 +++++++++++
 ...ible_value_type.addressof.compile.pass.cpp | 28 +++++++++++
 ...alue_value_type.addressof.compile.pass.cpp | 28 +++++++++++
 ...ry_emplace_hint.addressof.compile.pass.cpp | 40 +++++++++++++++
 .../swap.addressof.compile.pass.cpp           | 29 +++++++++++
 .../move.addressof.compile.pass.cpp           | 33 +++++++++++++
 .../move_alloc.addressof.compile.pass.cpp     | 36 ++++++++++++++
 .../emplace_hint.addressof.compile.pass.cpp   | 30 ++++++++++++
 17 files changed, 527 insertions(+), 30 deletions(-)
 create mode 100644 libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp

diff --git a/libcxx/include/__hash_table b/libcxx/include/__hash_table
index 6b682ab27c6c3..adc732cffb015 100644
--- a/libcxx/include/__hash_table
+++ b/libcxx/include/__hash_table
@@ -308,9 +308,9 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     __hash_iterator& operator=(const __hash_iterator& __i)
     {
-        if (this != &__i)
+        if (this != _VSTD::addressof(__i))
         {
-            __get_db()->__iterator_copy(this, &__i);
+            __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
             __node_ = __i.__node_;
         }
         return *this;
@@ -406,7 +406,7 @@ public:
         : __node_(__x.__node_)
     {
 #if _LIBCPP_DEBUG_LEVEL == 2
-        __get_db()->__iterator_copy(this, &__x);
+        __get_db()->__iterator_copy(this, _VSTD::addressof(__x));
 #endif
     }
 
@@ -415,7 +415,7 @@ public:
     __hash_const_iterator(const __hash_const_iterator& __i)
         : __node_(__i.__node_)
     {
-        __get_db()->__iterator_copy(this, &__i);
+        __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
     }
 
     _LIBCPP_INLINE_VISIBILITY
@@ -427,9 +427,9 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     __hash_const_iterator& operator=(const __hash_const_iterator& __i)
     {
-        if (this != &__i)
+        if (this != _VSTD::addressof(__i))
         {
-            __get_db()->__iterator_copy(this, &__i);
+            __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
             __node_ = __i.__node_;
         }
         return *this;
@@ -523,7 +523,7 @@ public:
           __bucket_(__i.__bucket_),
           __bucket_count_(__i.__bucket_count_)
     {
-        __get_db()->__iterator_copy(this, &__i);
+        __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
     }
 
     _LIBCPP_INLINE_VISIBILITY
@@ -535,9 +535,9 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     __hash_local_iterator& operator=(const __hash_local_iterator& __i)
     {
-        if (this != &__i)
+        if (this != _VSTD::addressof(__i))
         {
-            __get_db()->__iterator_copy(this, &__i);
+            __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
             __node_ = __i.__node_;
             __bucket_ = __i.__bucket_;
             __bucket_count_ = __i.__bucket_count_;
@@ -655,7 +655,7 @@ public:
           __bucket_count_(__x.__bucket_count_)
     {
 #if _LIBCPP_DEBUG_LEVEL == 2
-        __get_db()->__iterator_copy(this, &__x);
+        __get_db()->__iterator_copy(this, _VSTD::addressof(__x));
 #endif
     }
 
@@ -666,7 +666,7 @@ public:
           __bucket_(__i.__bucket_),
           __bucket_count_(__i.__bucket_count_)
     {
-        __get_db()->__iterator_copy(this, &__i);
+        __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
     }
 
     _LIBCPP_INLINE_VISIBILITY
@@ -678,9 +678,9 @@ public:
     _LIBCPP_INLINE_VISIBILITY
     __hash_const_local_iterator& operator=(const __hash_const_local_iterator& __i)
     {
-        if (this != &__i)
+        if (this != _VSTD::addressof(__i))
         {
-            __get_db()->__iterator_copy(this, &__i);
+            __get_db()->__iterator_copy(this, _VSTD::addressof(__i));
             __node_ = __i.__node_;
             __bucket_ = __i.__bucket_;
             __bucket_count_ = __i.__bucket_count_;
@@ -1615,7 +1615,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign(
         __u.size() = 0;
     }
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, &__u);
+    __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
@@ -2021,7 +2021,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi(
         const_iterator __p, __node_pointer __cp)
 {
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                          "unordered container::emplace_hint(const_iterator, args...) called with an iterator not"
                          " referring to this unordered container");
     if (__p != end() && key_eq()(*__p, __cp->__value_))
@@ -2148,7 +2148,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_hint_multi(
         const_iterator __p, _Args&&... __args)
 {
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                          "unordered container::emplace_hint(const_iterator, args...) called with an iterator not"
                          " referring to this unordered container");
     __node_holder __h = __construct_node(_VSTD::forward<_Args>(__args)...);
@@ -2472,7 +2472,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p)
 {
     __next_pointer __np = __p.__node_;
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                          "unordered container erase(iterator) called with an iterator not"
                          " referring to this container");
     _LIBCPP_DEBUG_ASSERT(__p != end(),
@@ -2492,10 +2492,10 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator
 __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first,
                                                 const_iterator __last)
 {
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__first)) == this,
                          "unordered container::erase(iterator, iterator) called with an iterator not"
                          " referring to this container");
-    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__last) == this,
+    _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__last)) == this,
                          "unordered container::erase(iterator, iterator) called with an iterator not"
                          " referring to this container");
     for (const_iterator __p = __first; __first != __last; __p = __first)
@@ -2727,7 +2727,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::swap(__hash_table& __u)
         __u.__bucket_list_[__constrain_hash(__u.__p1_.first().__next_->__hash(), __u.bucket_count())] =
             __u.__p1_.first().__ptr();
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, &__u);
+    __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map
index 73edadab20990..361db707d2461 100644
--- a/libcxx/include/unordered_map
+++ b/libcxx/include/unordered_map
@@ -519,6 +519,7 @@ template <class Key, class T, class Hash, class Pred, class Alloc>
 #include <__functional/is_transparent.h>
 #include <__hash_table>
 #include <__iterator/iterator_traits.h>
+#include <__memory/addressof.h>
 #include <__node_handle>
 #include <__utility/forward.h>
 #include <compare>
@@ -1186,7 +1187,7 @@ public:
         {return __table_.__insert_unique(__x);}
 
     iterator insert(const_iterator __p, const value_type& __x) {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                              "unordered_map::insert(const_iterator, const value_type&) called with an iterator not "
                              "referring to this unordered_map");
         ((void)__p);
@@ -1207,7 +1208,7 @@ public:
         {return __table_.__insert_unique(_VSTD::move(__x));}
 
     iterator insert(const_iterator __p, value_type&& __x) {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                              "unordered_map::insert(const_iterator, const value_type&) called with an iterator not"
                              " referring to this unordered_map");
         ((void)__p);
@@ -1225,7 +1226,7 @@ public:
         _LIBCPP_INLINE_VISIBILITY
         iterator insert(const_iterator __p, _Pp&& __x)
         {
-            _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+            _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                                  "unordered_map::insert(const_iterator, value_type&&) called with an iterator not"
                                  " referring to this unordered_map");
           ((void)__p);
@@ -1241,7 +1242,7 @@ public:
     template <class... _Args>
     _LIBCPP_INLINE_VISIBILITY
     iterator emplace_hint(const_iterator __p, _Args&&... __args) {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                              "unordered_map::emplace_hint(const_iterator, args...) called with an iterator not"
                              " referring to this unordered_map");
           ((void)__p);
@@ -1273,7 +1274,7 @@ public:
         _LIBCPP_INLINE_VISIBILITY
         iterator try_emplace(const_iterator __h, const key_type& __k, _Args&&... __args)
     {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__h)) == this,
                              "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not"
                              " referring to this unordered_map");
         ((void)__h);
@@ -1284,7 +1285,7 @@ public:
         _LIBCPP_INLINE_VISIBILITY
         iterator try_emplace(const_iterator __h, key_type&& __k, _Args&&... __args)
     {
-        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(&__h) == this,
+        _LIBCPP_DEBUG_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__h)) == this,
                              "unordered_map::try_emplace(const_iterator, key, args...) called with an iterator not"
                              " referring to this unordered_map");
         ((void)__h);
@@ -1692,7 +1693,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(
 {
     _VSTD::__debug_db_insert_c(this);
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, &__u);
+    __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
@@ -1712,7 +1713,7 @@ unordered_map<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_map(
     }
 #if _LIBCPP_DEBUG_LEVEL == 2
     else
-        __get_db()->swap(this, &__u);
+        __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
@@ -2468,7 +2469,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(
 {
     _VSTD::__debug_db_insert_c(this);
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, &__u);
+    __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
@@ -2489,7 +2490,7 @@ unordered_multimap<_Key, _Tp, _Hash, _Pred, _Alloc>::unordered_multimap(
     }
 #if _LIBCPP_DEBUG_LEVEL == 2
     else
-        __get_db()->swap(this, &__u);
+        __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
diff --git a/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..856b78293a107
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/iterator.operators.addressof.compile.pass.cpp
@@ -0,0 +1,49 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// Validate the constructors of the (const)(_local)_iterator classes to be
+// properly guarded against ADL-hijacking operator&.
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+template <class ToIterator, class FromIterator>
+void test() {
+  FromIterator from;
+  ToIterator copy(from);
+  copy = from;
+
+  ToIterator move(std::move(from));
+  from = FromIterator();
+  move = std::move(from);
+}
+
+void test() {
+  {
+    using I = std::unordered_map<operator_hijacker, operator_hijacker>::iterator;
+    using CI = std::unordered_map<operator_hijacker, operator_hijacker>::const_iterator;
+    test<I, I>();
+    test<CI, I>();
+    test<CI, CI>();
+  }
+  {
+    using IL = std::unordered_map<operator_hijacker, operator_hijacker>::local_iterator;
+    using CIL = std::unordered_map<operator_hijacker, operator_hijacker>::const_local_iterator;
+    test<IL, IL>();
+    test<CIL, IL>();
+    test<CIL, CIL>();
+  }
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..6dbd7aaea2a8e
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/assign_move.addressof.compile.pass.cpp
@@ -0,0 +1,42 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// unordered_map& operator=(unordered_map&&)
+//     noexcept(
+//         allocator_type::propagate_on_container_move_assignment::value &&
+//         is_nothrow_move_assignable<allocator_type>::value &&
+//         is_nothrow_move_assignable<hasher>::value &&
+//         is_nothrow_move_assignable<key_equal>::value);
+
+// Validate whether the container can be move-assigned with an ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  {
+    std::unordered_map<int, operator_hijacker> mo;
+    std::unordered_map<int, operator_hijacker> m;
+    m = std::move(mo);
+  }
+  {
+    std::unordered_map<operator_hijacker, int> mo;
+    std::unordered_map<operator_hijacker, int> m;
+    m = std::move(mo);
+  }
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..e36c6525d631b
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move.addressof.compile.pass.cpp
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// unordered_map(unordered_map&& u)
+//   noexcept(
+//     is_nothrow_move_constructible<hasher>::value &&
+//     is_nothrow_move_constructible<key_equal>::value &&
+//     is_nothrow_move_constructible<allocator_type>::value);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> mo;
+  std::unordered_map<operator_hijacker, operator_hijacker> m(std::move(mo));
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..1fec0ee5d0f4b
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.cnstr/move_alloc.addressof.compile.pass.cpp
@@ -0,0 +1,36 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// unordered_map(unordered_map&& u, const allocator_type& a);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+#include "test_allocator.h"
+#include "min_allocator.h"
+
+void test() {
+  using A = test_allocator<std::pair<const operator_hijacker, operator_hijacker>>;
+  using C = std::unordered_map<operator_hijacker, operator_hijacker, std::hash<operator_hijacker>,
+                               std::equal_to<operator_hijacker>, A>;
+
+  C mo;
+  C m(std::move(mo), A());
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..58ddefd8cfbfc
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/emplace_hint.addressof.compile.pass.cpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// template <class... Args>
+//     iterator emplace_hint(const_iterator position, Args&&... args);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> m;
+  m.emplace_hint(m.cbegin());
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..1461f2499baad
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_const_iter.addressof.compile.pass.cpp
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// iterator erase(const_iterator p)
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> m;
+  m.erase(m.cbegin());
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..5f342f7b2152f
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/erase_range.addressof.compile.pass.cpp
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// iterator erase(const_iterator first, const_iterator last)
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> m;
+  m.erase(m.cbegin(), m.cend());
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..db1805e7d7e63
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_hint_const_lvalue.addressof.compile.pass.cpp
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// iterator insert(const_iterator p, const value_type& x);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> m;
+  const std::pair<operator_hijacker, operator_hijacker> v;
+  m.insert(m.cend(), v);
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..530b826b61e78
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_constructible_value_type.addressof.compile.pass.cpp
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// template <class P,
+//           class = typename enable_if<is_convertible<P, value_type>::value>::type>
+//     pair<iterator, bool> insert(P&& x);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test(std::unordered_map<operator_hijacker, operator_hijacker>& m) { m.insert(m.cend(), *m.begin()); }
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..80219cb193edd
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/insert_rvalue_value_type.addressof.compile.pass.cpp
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// iterator insert(const_iterator hint, value_type&& obj);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test(std::unordered_map<operator_hijacker, operator_hijacker>& m) {
+  m.insert(m.cend(), std::pair<operator_hijacker, operator_hijacker>{});
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..2c667374d4fe8
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.modifiers/try_emplace_hint.addressof.compile.pass.cpp
@@ -0,0 +1,40 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_map
+
+// template <class... Args>
+//     iterator try_emplace(const_iterator hint, const key_type& k, Args&&... args);
+// template <class... Args>
+//     iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args);
+// template <class M>
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> m;
+  {
+    const operator_hijacker k;
+    m.try_emplace(m.cend(), k);
+  }
+  {
+    operator_hijacker k;
+    m.try_emplace(m.cend(), std::move(k));
+  }
+}
diff --git a/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..f5b5f516d42b5
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.map/unord.map.swap/swap.addressof.compile.pass.cpp
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// void swap(unordered_map& c)
+//     noexcept(allocator_traits<Allocator>::is_always_equal::value &&
+//               noexcept(swap(declval<Hash&>(), declval<Hash&>())) &&
+//               noexcept(swap(declval<Pred&>(), declval<Pred&>())));
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_map<operator_hijacker, operator_hijacker> m1;
+  std::unordered_map<operator_hijacker, operator_hijacker> m2;
+  std::swap(m1, m2);
+}
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..73b19f35e2048
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move.addressof.compile.pass.cpp
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_multimap
+
+// unordered_multimap(unordered_multimap&&)
+//   noexcept(
+//     is_nothrow_move_constructible<hasher>::value &&
+//     is_nothrow_move_constructible<key_equal>::value &&
+//     is_nothrow_move_constructible<allocator_type>::value);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_multimap<operator_hijacker, operator_hijacker> mo;
+  std::unordered_multimap<operator_hijacker, operator_hijacker> m(std::move(mo));
+}
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..6419a03666d65
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.cnstr/move_alloc.addressof.compile.pass.cpp
@@ -0,0 +1,36 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_multimap
+
+// unordered_multimap(unordered_map&& u, const allocator_type& a);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+#include "test_allocator.h"
+#include "min_allocator.h"
+
+void test() {
+  using A = test_allocator<std::pair<const operator_hijacker, operator_hijacker>>;
+  using C = std::unordered_multimap<operator_hijacker, operator_hijacker, std::hash<operator_hijacker>,
+                                    std::equal_to<operator_hijacker>, A>;
+
+  C mo;
+  C m(std::move(mo), A());
+}
diff --git a/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..5e23b73cf34b3
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.multimap/unord.multimap.modifiers/emplace_hint.addressof.compile.pass.cpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_map>
+
+// template <class Key, class T, class Hash = hash<Key>, class Pred = equal_to<Key>,
+//           class Alloc = allocator<pair<const Key, T>>>
+// class unordered_multimap
+
+// template <class... Args>
+//     iterator emplace_hint(const_iterator position, Args&&... args);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_map>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_multimap<operator_hijacker, operator_hijacker> m;
+  m.emplace_hint(m.cbegin());
+}

From 4d0a18d06e8ea6ee38efd53e9febf7cefe7d3925 Mon Sep 17 00:00:00 2001
From: wren romano <2998727+wrengr@users.noreply.github.com>
Date: Thu, 20 Jan 2022 12:56:25 -0800
Subject: [PATCH 209/946] [mlir][sparse] Adding assertions for overhead storage
 types

Fixes https://bugs.llvm.org/show_bug.cgi?id=52314 aka https://github.com/llvm/llvm-project/issues/51656

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D117597
---
 .../lib/ExecutionEngine/SparseTensorUtils.cpp | 50 +++++++++++++++----
 1 file changed, 39 insertions(+), 11 deletions(-)

diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
index 9087ed1885bfa..20cd1b53d31b4 100644
--- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
@@ -26,6 +26,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <limits>
 #include <numeric>
 #include <vector>
 
@@ -254,25 +255,28 @@ class SparseTensorStorage : public SparseTensorStorageBase {
     bool allDense = true;
     uint64_t sz = 1;
     for (uint64_t r = 0; r < rank; r++) {
+      assert(sizes[r] > 0 && "Dimension size zero has trivial storage");
       sz *= sizes[r];
       if (sparsity[r] == DimLevelType::kCompressed) {
         pointers[r].reserve(sz + 1);
         indices[r].reserve(sz);
         sz = 1;
         allDense = false;
+        // Prepare the pointer structure.  We cannot use `addPointer`
+        // here, because `isCompressedDim` won't work until after this
+        // preparation has been done.
+        pointers[r].push_back(0);
       } else {
         assert(sparsity[r] == DimLevelType::kDense &&
                "singleton not yet supported");
       }
     }
-    // Prepare sparse pointer structures for all dimensions.
-    for (uint64_t r = 0; r < rank; r++)
-      if (sparsity[r] == DimLevelType::kCompressed)
-        pointers[r].push_back(0);
     // Then assign contents from coordinate scheme tensor if provided.
     if (tensor) {
-      // Lexicographically sort the tensor, to ensure precondition of `fromCOO`.
+      // Ensure both preconditions of `fromCOO`.
+      assert(tensor->getSizes() == sizes && "Tensor size mismatch");
       tensor->sort();
+      // Now actually insert the `elements`.
       const std::vector<Element<V>> &elements = tensor->getElements();
       uint64_t nnz = elements.size();
       values.reserve(nnz);
@@ -403,10 +407,33 @@ class SparseTensorStorage : public SparseTensorStorageBase {
   }
 
 private:
+  /// Appends the next free position of `indices[d]` to `pointers[d]`.
+  /// Thus, when called after inserting the last element of a segment,
+  /// it will append the position where the next segment begins.
+  inline void addPointer(uint64_t d) {
+    assert(isCompressedDim(d)); // Entails `d < getRank()`.
+    uint64_t p = indices[d].size();
+    assert(p <= std::numeric_limits<P>::max() &&
+           "Pointer value is too large for the P-type");
+    pointers[d].push_back(p); // Here is where we convert to `P`.
+  }
+
+  /// Appends the given index to `indices[d]`.
+  inline void addIndex(uint64_t d, uint64_t i) {
+    assert(isCompressedDim(d)); // Entails `d < getRank()`.
+    assert(i <= std::numeric_limits<I>::max() &&
+           "Index value is too large for the I-type");
+    indices[d].push_back(i); // Here is where we convert to `I`.
+  }
+
   /// Initializes sparse tensor storage scheme from a memory-resident sparse
   /// tensor in coordinate scheme. This method prepares the pointers and
   /// indices arrays under the given per-dimension dense/sparse annotations.
-  /// Precondition: the `elements` must be lexicographically sorted.
+  ///
+  /// Preconditions:
+  /// (1) the `elements` must be lexicographically sorted.
+  /// (2) the indices of every element are valid for `sizes` (equal rank
+  ///     and pointwise less-than).
   void fromCOO(const std::vector<Element<V>> &elements, uint64_t lo,
                uint64_t hi, uint64_t d) {
     // Once dimensions are exhausted, insert the numerical values.
@@ -426,7 +453,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
         seg++;
       // Handle segment in interval for sparse or dense dimension.
       if (isCompressedDim(d)) {
-        indices[d].push_back(i);
+        addIndex(d, i);
       } else {
         // For dense storage we must fill in all the zero values between
         // the previous element (when last we ran this for-loop) and the
@@ -441,7 +468,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
     }
     // Finalize the sparse pointer structure at this dimension.
     if (isCompressedDim(d)) {
-      pointers[d].push_back(indices[d].size());
+      addPointer(d);
     } else {
       // For dense storage we must fill in all the zero values after
       // the last element.
@@ -479,7 +506,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
     if (d == getRank()) {
       values.push_back(0);
     } else if (isCompressedDim(d)) {
-      pointers[d].push_back(indices[d].size());
+      addPointer(d);
     } else {
       for (uint64_t full = 0, sz = sizes[d]; full < sz; full++)
         endDim(d + 1);
@@ -493,7 +520,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
     for (uint64_t i = 0; i < rank - diff; i++) {
       uint64_t d = rank - i - 1;
       if (isCompressedDim(d)) {
-        pointers[d].push_back(indices[d].size());
+        addPointer(d);
       } else {
         for (uint64_t full = idx[d] + 1, sz = sizes[d]; full < sz; full++)
           endDim(d + 1);
@@ -508,7 +535,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
     for (uint64_t d = diff; d < rank; d++) {
       uint64_t i = cursor[d];
       if (isCompressedDim(d)) {
-        indices[d].push_back(i);
+        addIndex(d, i);
       } else {
         for (uint64_t full = top; full < i; full++)
           endDim(d + 1);
@@ -532,6 +559,7 @@ class SparseTensorStorage : public SparseTensorStorageBase {
 
   /// Returns true if dimension is compressed.
   inline bool isCompressedDim(uint64_t d) const {
+    assert(d < getRank());
     return (!pointers[d].empty());
   }
 

From cb8b94f6efa9a1b434afd9906e87e918ffe762dd Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Fri, 21 Jan 2022 20:01:06 +0000
Subject: [PATCH 210/946] [AArch64] Add extra tests useful in testing hadd. NFC

---
 llvm/test/CodeGen/AArch64/arm64-vhadd.ll | 427 +++++++++++++++++++++++
 1 file changed, 427 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
index 712bd16870237..d692d6b2f8bea 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll
@@ -817,6 +817,433 @@ define <4 x i64> @hadd32_zext_lsr(<4 x i32> %src1, <4 x i32> %src2) nounwind {
 }
 
 
+define <4 x i16> @hadd8_sext_asr(<4 x i8> %src1, <4 x i8> %src2) nounwind {
+; CHECK-LABEL: hadd8_sext_asr:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl.4h v0, v0, #8
+; CHECK-NEXT:    shl.4h v1, v1, #8
+; CHECK-NEXT:    sshr.4h v0, v0, #8
+; CHECK-NEXT:    ssra.4h v0, v1, #8
+; CHECK-NEXT:    sshr.4h v0, v0, #1
+; CHECK-NEXT:    ret
+  %zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
+  %zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
+  %add = add <4 x i16> %zextsrc1, %zextsrc2
+  %resulti8 = ashr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
+  ret <4 x i16> %resulti8
+}
+
+define <4 x i16> @hadd8_zext_asr(<4 x i8> %src1, <4 x i8> %src2) nounwind {
+; CHECK-LABEL: hadd8_zext_asr:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bic.4h v0, #255, lsl #8
+; CHECK-NEXT:    bic.4h v1, #255, lsl #8
+; CHECK-NEXT:    add.4h v0, v0, v1
+; CHECK-NEXT:    ushr.4h v0, v0, #1
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
+  %zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
+  %add = add <4 x i16> %zextsrc1, %zextsrc2
+  %resulti8 = ashr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
+  ret <4 x i16> %resulti8
+}
+
+define <4 x i16> @hadd8_sext_lsr(<4 x i8> %src1, <4 x i8> %src2) nounwind {
+; CHECK-LABEL: hadd8_sext_lsr:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl.4h v0, v0, #8
+; CHECK-NEXT:    shl.4h v1, v1, #8
+; CHECK-NEXT:    sshr.4h v0, v0, #8
+; CHECK-NEXT:    ssra.4h v0, v1, #8
+; CHECK-NEXT:    ushr.4h v0, v0, #1
+; CHECK-NEXT:    ret
+  %zextsrc1 = sext <4 x i8> %src1 to <4 x i16>
+  %zextsrc2 = sext <4 x i8> %src2 to <4 x i16>
+  %add = add <4 x i16> %zextsrc1, %zextsrc2
+  %resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
+  ret <4 x i16> %resulti8
+}
+
+define <4 x i16> @hadd8_zext_lsr(<4 x i8> %src1, <4 x i8> %src2) nounwind {
+; CHECK-LABEL: hadd8_zext_lsr:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bic.4h v0, #255, lsl #8
+; CHECK-NEXT:    bic.4h v1, #255, lsl #8
+; CHECK-NEXT:    add.4h v0, v0, v1
+; CHECK-NEXT:    ushr.4h v0, v0, #1
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <4 x i8> %src1 to <4 x i16>
+  %zextsrc2 = zext <4 x i8> %src2 to <4 x i16>
+  %add = add <4 x i16> %zextsrc1, %zextsrc2
+  %resulti8 = lshr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
+  ret <4 x i16> %resulti8
+}
+
+
+
+define void @testLowerToSHADD8b_c(<8 x i8> %src1, <8 x i8>* %dest) nounwind {
+; CHECK-LABEL: testLowerToSHADD8b_c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.8h v1, #10
+; CHECK-NEXT:    saddw.8h v0, v1, v0
+; CHECK-NEXT:    shrn.8b v0, v0, #1
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %sextsrc1 = sext <8 x i8> %src1 to <8 x i16>
+  %add = add <8 x i16> %sextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+  %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <8 x i16> %resulti16 to <8 x i8>
+  store <8 x i8> %result, <8 x i8>* %dest, align 8
+  ret void
+}
+
+define void @testLowerToSHADD4h_c(<4 x i16> %src1, <4 x i16>* %dest) nounwind {
+; CHECK-LABEL: testLowerToSHADD4h_c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.4s v1, #10
+; CHECK-NEXT:    saddw.4s v0, v1, v0
+; CHECK-NEXT:    shrn.4h v0, v0, #1
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %sextsrc1 = sext <4 x i16> %src1 to <4 x i32>
+  %add = add <4 x i32> %sextsrc1, <i32 10, i32 10, i32 10, i32 10>
+  %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
+  %result = trunc <4 x i32> %resulti16 to <4 x i16>
+  store <4 x i16> %result, <4 x i16>* %dest, align 8
+  ret void
+}
+
+define void @testLowerToSHADD2s_c(<2 x i32> %src1, <2 x i32>* %dest) nounwind {
+; CHECK-LABEL: testLowerToSHADD2s_c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #10
+; CHECK-NEXT:    dup.2d v1, x8
+; CHECK-NEXT:    saddw.2d v0, v1, v0
+; CHECK-NEXT:    shrn.2s v0, v0, #1
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %sextsrc1 = sext <2 x i32> %src1 to <2 x i64>
+  %add = add <2 x i64> %sextsrc1, <i64 10, i64 10>
+  %resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
+  %result = trunc <2 x i64> %resulti16 to <2 x i32>
+  store <2 x i32> %result, <2 x i32>* %dest, align 8
+  ret void
+}
+
+define void @testLowerToSHADD16b_c(<16 x i8> %src1, <16 x i8>* %dest) nounwind {
+; CHECK-LABEL: testLowerToSHADD16b_c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.8h v1, #10
+; CHECK-NEXT:    saddw.8h v2, v1, v0
+; CHECK-NEXT:    saddw2.8h v0, v1, v0
+; CHECK-NEXT:    shrn.8b v1, v2, #1
+; CHECK-NEXT:    shrn2.16b v1, v0, #1
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %sextsrc1 = sext <16 x i8> %src1 to <16 x i16>
+  %add = add <16 x i16> %sextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+  %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <16 x i16> %resulti16 to <16 x i8>
+  store <16 x i8> %result, <16 x i8>* %dest, align 16
+  ret void
+}
+
+define void @testLowerToSHADD8h_c(<8 x i16> %src1, <8 x i16>* %dest) nounwind {
+; CHECK-LABEL: testLowerToSHADD8h_c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.4s v1, #10
+; CHECK-NEXT:    saddw.4s v2, v1, v0
+; CHECK-NEXT:    saddw2.4s v0, v1, v0
+; CHECK-NEXT:    shrn.4h v1, v2, #1
+; CHECK-NEXT:    shrn2.8h v1, v0, #1
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %sextsrc1 = sext <8 x i16> %src1 to <8 x i32>
+  %add = add <8 x i32> %sextsrc1, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
+  %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %result = trunc <8 x i32> %resulti16 to <8 x i16>
+  store <8 x i16> %result, <8 x i16>* %dest, align 16
+  ret void
+}
+
+define void @testLowerToSHADD4s_c(<4 x i32> %src1, <4 x i32>* %dest) nounwind {
+; CHECK-LABEL: testLowerToSHADD4s_c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #10
+; CHECK-NEXT:    dup.2d v1, x8
+; CHECK-NEXT:    saddw.2d v2, v1, v0
+; CHECK-NEXT:    saddw2.2d v0, v1, v0
+; CHECK-NEXT:    shrn.2s v1, v2, #1
+; CHECK-NEXT:    shrn2.4s v1, v0, #1
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %sextsrc1 = sext <4 x i32> %src1 to <4 x i64>
+  %add = add <4 x i64> %sextsrc1, <i64 10, i64 10, i64 10, i64 10>
+  %resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+  %result = trunc <4 x i64> %resulti16 to <4 x i32>
+  store <4 x i32> %result, <4 x i32>* %dest, align 16
+  ret void
+}
+
+define void @testLowerToUHADD8b_c(<8 x i8> %src1, <8 x i8>* %dest) nounwind {
+; CHECK-LABEL: testLowerToUHADD8b_c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.8h v1, #10
+; CHECK-NEXT:    uaddw.8h v0, v1, v0
+; CHECK-NEXT:    shrn.8b v0, v0, #1
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <8 x i8> %src1 to <8 x i16>
+  %add = add <8 x i16> %zextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+  %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <8 x i16> %resulti16 to <8 x i8>
+  store <8 x i8> %result, <8 x i8>* %dest, align 8
+  ret void
+}
+
+define void @testLowerToUHADD4h_c(<4 x i16> %src1, <4 x i16>* %dest) nounwind {
+; CHECK-LABEL: testLowerToUHADD4h_c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.4s v1, #10
+; CHECK-NEXT:    uaddw.4s v0, v1, v0
+; CHECK-NEXT:    shrn.4h v0, v0, #1
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <4 x i16> %src1 to <4 x i32>
+  %add = add <4 x i32> %zextsrc1, <i32 10, i32 10, i32 10, i32 10>
+  %resulti16 = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
+  %result = trunc <4 x i32> %resulti16 to <4 x i16>
+  store <4 x i16> %result, <4 x i16>* %dest, align 8
+  ret void
+}
+
+define void @testLowerToUHADD2s_c(<2 x i32> %src1, <2 x i32>* %dest) nounwind {
+; CHECK-LABEL: testLowerToUHADD2s_c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #10
+; CHECK-NEXT:    dup.2d v1, x8
+; CHECK-NEXT:    uaddw.2d v0, v1, v0
+; CHECK-NEXT:    shrn.2s v0, v0, #1
+; CHECK-NEXT:    str d0, [x0]
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <2 x i32> %src1 to <2 x i64>
+  %add = add <2 x i64> %zextsrc1, <i64 10, i64 10>
+  %resulti16 = lshr <2 x i64> %add, <i64 1, i64 1>
+  %result = trunc <2 x i64> %resulti16 to <2 x i32>
+  store <2 x i32> %result, <2 x i32>* %dest, align 8
+  ret void
+}
+
+define void @testLowerToUHADD16b_c(<16 x i8> %src1, <16 x i8>* %dest) nounwind {
+; CHECK-LABEL: testLowerToUHADD16b_c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.8h v1, #10
+; CHECK-NEXT:    uaddw.8h v2, v1, v0
+; CHECK-NEXT:    uaddw2.8h v0, v1, v0
+; CHECK-NEXT:    shrn.8b v1, v2, #1
+; CHECK-NEXT:    shrn2.16b v1, v0, #1
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <16 x i8> %src1 to <16 x i16>
+  %add = add <16 x i16> %zextsrc1, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
+  %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <16 x i16> %resulti16 to <16 x i8>
+  store <16 x i8> %result, <16 x i8>* %dest, align 16
+  ret void
+}
+
+define void @testLowerToUHADD8h_c(<8 x i16> %src1, <8 x i16>* %dest) nounwind {
+; CHECK-LABEL: testLowerToUHADD8h_c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.4s v1, #10
+; CHECK-NEXT:    uaddw.4s v2, v1, v0
+; CHECK-NEXT:    uaddw2.4s v0, v1, v0
+; CHECK-NEXT:    shrn.4h v1, v2, #1
+; CHECK-NEXT:    shrn2.8h v1, v0, #1
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <8 x i16> %src1 to <8 x i32>
+  %add = add <8 x i32> %zextsrc1, <i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
+  %resulti16 = lshr <8 x i32> %add, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
+  %result = trunc <8 x i32> %resulti16 to <8 x i16>
+  store <8 x i16> %result, <8 x i16>* %dest, align 16
+  ret void
+}
+
+define void @testLowerToUHADD4s_c(<4 x i32> %src1, <4 x i32>* %dest) nounwind {
+; CHECK-LABEL: testLowerToUHADD4s_c:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #10
+; CHECK-NEXT:    dup.2d v1, x8
+; CHECK-NEXT:    uaddw.2d v2, v1, v0
+; CHECK-NEXT:    uaddw2.2d v0, v1, v0
+; CHECK-NEXT:    shrn.2s v1, v2, #1
+; CHECK-NEXT:    shrn2.4s v1, v0, #1
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %zextsrc1 = zext <4 x i32> %src1 to <4 x i64>
+  %add = add <4 x i64> %zextsrc1, <i64 10, i64 10, i64 10, i64 10>
+  %resulti16 = lshr <4 x i64> %add, <i64 1, i64 1, i64 1, i64 1>
+  %result = trunc <4 x i64> %resulti16 to <4 x i32>
+  store <4 x i32> %result, <4 x i32>* %dest, align 16
+  ret void
+}
+
+
+define <8 x i8> @andmaskv8i8(<8 x i16> %src1, <8 x i8> %src2) nounwind {
+; CHECK-LABEL: andmaskv8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.8h v2, #7
+; CHECK-NEXT:    and.16b v0, v0, v2
+; CHECK-NEXT:    uaddw.8h v0, v0, v1
+; CHECK-NEXT:    shrn.8b v0, v0, #1
+; CHECK-NEXT:    ret
+  %zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %zextsrc2 = zext <8 x i8> %src2 to <8 x i16>
+  %add = add <8 x i16> %zextsrc1, %zextsrc2
+  %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <8 x i16> %resulti16 to <8 x i8>
+  ret <8 x i8> %result
+}
+
+define <16 x i8> @andmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) nounwind {
+; CHECK-LABEL: andmaskv16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.8h v3, #7
+; CHECK-NEXT:    and.16b v0, v0, v3
+; CHECK-NEXT:    and.16b v1, v1, v3
+; CHECK-NEXT:    uaddw.8h v0, v0, v2
+; CHECK-NEXT:    uaddw2.8h v1, v1, v2
+; CHECK-NEXT:    shrn.8b v0, v0, #1
+; CHECK-NEXT:    shrn2.16b v0, v1, #1
+; CHECK-NEXT:    ret
+  %zextsrc1 = and <16 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %zextsrc2 = zext <16 x i8> %src2 to <16 x i16>
+  %add = add <16 x i16> %zextsrc1, %zextsrc2
+  %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <16 x i16> %resulti16 to <16 x i8>
+  ret <16 x i8> %result
+}
+
+define <16 x i8> @andmask2v16i8(<16 x i16> %src1, <16 x i16> %src2) nounwind {
+; CHECK-LABEL: andmask2v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.8h v4, #7
+; CHECK-NEXT:    movi.8h v5, #3
+; CHECK-NEXT:    and.16b v0, v0, v4
+; CHECK-NEXT:    and.16b v2, v2, v5
+; CHECK-NEXT:    and.16b v1, v1, v4
+; CHECK-NEXT:    and.16b v3, v3, v5
+; CHECK-NEXT:    add.8h v0, v0, v2
+; CHECK-NEXT:    add.8h v1, v1, v3
+; CHECK-NEXT:    shrn.8b v0, v0, #1
+; CHECK-NEXT:    shrn2.16b v0, v1, #1
+; CHECK-NEXT:    ret
+  %zextsrc1 = and <16 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %zextsrc2 = and <16 x i16> %src2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
+  %add = add <16 x i16> %zextsrc1, %zextsrc2
+  %resulti16 = lshr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <16 x i16> %resulti16 to <16 x i8>
+  ret <16 x i8> %result
+}
+
+define <8 x i8> @andmask2v8i8(<8 x i16> %src1, <8 x i16> %src2) nounwind {
+; CHECK-LABEL: andmask2v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.8h v2, #7
+; CHECK-NEXT:    bic.8h v1, #255, lsl #8
+; CHECK-NEXT:    and.16b v0, v0, v2
+; CHECK-NEXT:    add.8h v0, v0, v1
+; CHECK-NEXT:    shrn.8b v0, v0, #1
+; CHECK-NEXT:    ret
+  %zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %zextsrc2 = and <8 x i16> %src2, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
+  %add = add <8 x i16> %zextsrc1, %zextsrc2
+  %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <8 x i16> %resulti16 to <8 x i8>
+  ret <8 x i8> %result
+}
+
+define <8 x i16> @andmask3v8i8(<8 x i16> %src1, <8 x i16> %src2) nounwind {
+; CHECK-LABEL: andmask3v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi.8h v2, #7
+; CHECK-NEXT:    bic.8h v1, #254, lsl #8
+; CHECK-NEXT:    and.16b v0, v0, v2
+; CHECK-NEXT:    add.8h v0, v0, v1
+; CHECK-NEXT:    ushr.8h v0, v0, #1
+; CHECK-NEXT:    ret
+  %zextsrc1 = and <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %zextsrc2 = and <8 x i16> %src2, <i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511, i16 511>
+  %add = add <8 x i16> %zextsrc1, %zextsrc2
+  %resulti16 = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %resulti16
+}
+
+define <16 x i8> @sextmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) nounwind {
+; CHECK-LABEL: sextmaskv16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll.8h v3, v2, #0
+; CHECK-NEXT:    sshr.8h v1, v1, #11
+; CHECK-NEXT:    ssra.8h v3, v0, #11
+; CHECK-NEXT:    saddw2.8h v1, v1, v2
+; CHECK-NEXT:    shrn.8b v0, v3, #1
+; CHECK-NEXT:    shrn2.16b v0, v1, #1
+; CHECK-NEXT:    ret
+  %sextsrc1 = ashr <16 x i16> %src1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %sextsrc2 = sext <16 x i8> %src2 to <16 x i16>
+  %add = add <16 x i16> %sextsrc1, %sextsrc2
+  %resulti16 = ashr <16 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <16 x i16> %resulti16 to <16 x i8>
+  ret <16 x i8> %result
+}
+
+define <8 x i8> @sextmaskv8i8(<8 x i16> %src1, <8 x i8> %src2) nounwind {
+; CHECK-LABEL: sextmaskv8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll.8h v1, v1, #0
+; CHECK-NEXT:    ssra.8h v1, v0, #11
+; CHECK-NEXT:    shrn.8b v0, v1, #1
+; CHECK-NEXT:    ret
+  %sextsrc1 = ashr <8 x i16> %src1, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11>
+  %sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
+  %add = add <8 x i16> %sextsrc1, %sextsrc2
+  %resulti16 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <8 x i16> %resulti16 to <8 x i8>
+  ret <8 x i8> %result
+}
+
+define <8 x i8> @sextmask2v8i8(<8 x i16> %src1, <8 x i8> %src2) nounwind {
+; CHECK-LABEL: sextmask2v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll.8h v1, v1, #0
+; CHECK-NEXT:    ssra.8h v1, v0, #8
+; CHECK-NEXT:    shrn.8b v0, v1, #1
+; CHECK-NEXT:    ret
+  %sextsrc1 = ashr <8 x i16> %src1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+  %sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
+  %add = add <8 x i16> %sextsrc1, %sextsrc2
+  %resulti16 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <8 x i16> %resulti16 to <8 x i8>
+  ret <8 x i8> %result
+}
+
+define <8 x i8> @sextmask3v8i8(<8 x i16> %src1, <8 x i8> %src2) nounwind {
+; CHECK-LABEL: sextmask3v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sshll.8h v1, v1, #0
+; CHECK-NEXT:    usra.8h v1, v0, #7
+; CHECK-NEXT:    shrn.8b v0, v1, #1
+; CHECK-NEXT:    ret
+  %sextsrc1 = ashr <8 x i16> %src1, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
+  %sextsrc2 = sext <8 x i8> %src2 to <8 x i16>
+  %add = add <8 x i16> %sextsrc1, %sextsrc2
+  %resulti16 = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  %result = trunc <8 x i16> %resulti16 to <8 x i8>
+  ret <8 x i8> %result
+}
+
+
 declare <8 x i8>  @llvm.aarch64.neon.srhadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
 declare <4 x i16> @llvm.aarch64.neon.srhadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.aarch64.neon.srhadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone

From f18fcdabda7271d386d24de71302907cc3f0fe4b Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov@fb.com>
Date: Thu, 8 Apr 2021 23:31:12 -0700
Subject: [PATCH 211/946] [BOLT][NFC] Expand auto types pt.2

Summary: Expand autos where it may lead to differences in the BOLT binary.

Test Plan: NFC

Reviewers: maksfb

Reviewed By: maks

FBD27673231
---
 bolt/lib/Core/BinaryFunction.cpp          |  4 ++--
 bolt/lib/Passes/IndirectCallPromotion.cpp | 12 ++++++------
 bolt/lib/Profile/YAMLProfileWriter.cpp    |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 9d55a837f2a3c..18f521edff05f 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -3613,13 +3613,13 @@ void BinaryFunction::insertBasicBlocks(
     std::vector<std::unique_ptr<BinaryBasicBlock>> &&NewBBs,
     const bool UpdateLayout, const bool UpdateCFIState,
     const bool RecomputeLandingPads) {
-  const auto StartIndex = Start ? getIndex(Start) : -1;
+  const int64_t StartIndex = Start ? getIndex(Start) : -1LL;
   const size_t NumNewBlocks = NewBBs.size();
 
   BasicBlocks.insert(BasicBlocks.begin() + (StartIndex + 1), NumNewBlocks,
                      nullptr);
 
-  auto I = StartIndex + 1;
+  int64_t I = StartIndex + 1;
   for (std::unique_ptr<BinaryBasicBlock> &BB : NewBBs) {
     assert(!BasicBlocks[I]);
     BasicBlocks[I++] = BB.release();
diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp
index 188655b8b7004..4f1604177ab8d 100644
--- a/bolt/lib/Passes/IndirectCallPromotion.cpp
+++ b/bolt/lib/Passes/IndirectCallPromotion.cpp
@@ -281,7 +281,7 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB,
         Inst.getOperand(0).getReg() == BC.MRI->getProgramCounter())
       return Targets;
 
-    auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>(
+    const auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>(
         Inst, "CallProfile");
     if (ICSP) {
       for (const IndirectCallProfile &CSP : ICSP.get()) {
@@ -938,7 +938,7 @@ size_t IndirectCallPromotion::canPromoteCallsite(
   // If we have no targets (or no calls), skip this callsite.
   if (Targets.empty() || !NumCalls) {
     if (opts::Verbosity >= 1) {
-      const auto InstIdx = &Inst - &(*BB.begin());
+      const ptrdiff_t InstIdx = &Inst - &(*BB.begin());
       outs() << "BOLT-INFO: ICP failed in " << *BB.getFunction() << " @ "
              << InstIdx << " in " << BB.getName() << ", calls = " << NumCalls
              << ", targets empty or NumCalls == 0.\n";
@@ -985,7 +985,7 @@ size_t IndirectCallPromotion::canPromoteCallsite(
     if (TopNFrequency == 0 ||
         TopNFrequency < opts::IndirectCallPromotionMispredictThreshold) {
       if (opts::Verbosity >= 1) {
-        const auto InstIdx = &Inst - &(*BB.begin());
+        const ptrdiff_t InstIdx = &Inst - &(*BB.begin());
         outs() << "BOLT-INFO: ICP failed in " << *BB.getFunction() << " @ "
                << InstIdx << " in " << BB.getName() << ", calls = " << NumCalls
                << ", top N mis. frequency " << format("%.1f", TopNFrequency)
@@ -1034,7 +1034,7 @@ size_t IndirectCallPromotion::canPromoteCallsite(
       if (TopNMispredictFrequency <
           opts::IndirectCallPromotionMispredictThreshold) {
         if (opts::Verbosity >= 1) {
-          const auto InstIdx = &Inst - &(*BB.begin());
+          const ptrdiff_t InstIdx = &Inst - &(*BB.begin());
           outs() << "BOLT-INFO: ICP failed in " << *BB.getFunction() << " @ "
                  << InstIdx << " in " << BB.getName()
                  << ", calls = " << NumCalls << ", top N mispredict frequency "
@@ -1064,7 +1064,7 @@ void IndirectCallPromotion::printCallsiteInfo(
   BinaryContext &BC = BB.getFunction()->getBinaryContext();
   const bool IsTailCall = BC.MIB->isTailCall(Inst);
   const bool IsJumpTable = BB.getFunction()->getJumpTable(Inst);
-  const auto InstIdx = &Inst - &(*BB.begin());
+  const ptrdiff_t InstIdx = &Inst - &(*BB.begin());
 
   outs() << "BOLT-INFO: ICP candidate branch info: " << *BB.getFunction()
          << " @ " << InstIdx << " in " << BB.getName()
@@ -1219,7 +1219,7 @@ void IndirectCallPromotion::runOnFunctions(BinaryContext &BC) {
 
       for (unsigned Idx = 0; Idx < BB->size(); ++Idx) {
         MCInst &Inst = BB->getInstructionAtIndex(Idx);
-        const auto InstIdx = &Inst - &(*BB->begin());
+        const ptrdiff_t InstIdx = &Inst - &(*BB->begin());
         const bool IsTailCall = BC.MIB->isTailCall(Inst);
         const bool HasIndirectCallProfile =
             BC.MIB->hasAnnotation(Inst, "CallProfile");
diff --git a/bolt/lib/Profile/YAMLProfileWriter.cpp b/bolt/lib/Profile/YAMLProfileWriter.cpp
index d8d69759c22bf..ddbcd5939adaf 100644
--- a/bolt/lib/Profile/YAMLProfileWriter.cpp
+++ b/bolt/lib/Profile/YAMLProfileWriter.cpp
@@ -59,7 +59,7 @@ void convert(const BinaryFunction &BF,
       CSI.Offset = *Offset - BB->getInputOffset();
 
       if (BC.MIB->isIndirectCall(Instr) || BC.MIB->isIndirectBranch(Instr)) {
-        auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>(
+        const auto ICSP = BC.MIB->tryGetAnnotationAs<IndirectCallSiteProfile>(
             Instr, "CallProfile");
         if (!ICSP)
           continue;

From 5a654b01133fe5f5b2b4088495f829bb8c34e12d Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov@fb.com>
Date: Wed, 19 Jan 2022 20:20:55 -0800
Subject: [PATCH 212/946] [BOLT] Make ICP target selection (more) deterministic

Summary: Break ties by selecting targets with lower addresses.

Reviewers: maksfb

FBD33677001
---
 bolt/lib/Passes/IndirectCallPromotion.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp
index 4f1604177ab8d..4104cc9ce3078 100644
--- a/bolt/lib/Passes/IndirectCallPromotion.cpp
+++ b/bolt/lib/Passes/IndirectCallPromotion.cpp
@@ -292,17 +292,18 @@ IndirectCallPromotion::getCallTargets(BinaryBasicBlock &BB,
     }
   }
 
-  // Sort by target count, number of indices in case of jump table,  and
-  // mispredicts. We prioritize targets with high count, small number of
-  // indices and high mispredicts
+  // Sort by target count, number of indices in case of jump table, and
+  // mispredicts. We prioritize targets with high count, small number of indices
+  // and high mispredicts. Break ties by selecting targets with lower addresses.
   std::stable_sort(Targets.begin(), Targets.end(),
                    [](const Callsite &A, const Callsite &B) {
                      if (A.Branches != B.Branches)
                        return A.Branches > B.Branches;
-                     else if (A.JTIndices.size() != B.JTIndices.size())
+                     if (A.JTIndices.size() != B.JTIndices.size())
                        return A.JTIndices.size() < B.JTIndices.size();
-                     else
+                     if (A.Mispreds != B.Mispreds)
                        return A.Mispreds > B.Mispreds;
+                     return A.To.Addr < B.To.Addr;
                    });
 
   // Remove non-symbol targets

From f8c7fb499be6c2712be828ffe7378802dfd645a0 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <aaupov@fb.com>
Date: Fri, 29 Oct 2021 17:27:38 -0700
Subject: [PATCH 213/946] [BOLT][NFC] Reduce includes with include-what-you-use

Summary: Removed redundant includes with IWYU

Test Plan: ninja bolt

Reviewers: maksfb

FBD32043568
---
 bolt/include/bolt/Core/BinaryContext.h    | 3 +--
 bolt/include/bolt/Core/BinaryData.h       | 1 -
 bolt/include/bolt/Core/DebugData.h        | 3 ---
 bolt/include/bolt/Passes/AllocCombiner.h  | 2 --
 bolt/include/bolt/Rewrite/DWARFRewriter.h | 3 +--
 bolt/lib/Core/BinaryFunction.cpp          | 1 -
 bolt/lib/Core/DebugData.cpp               | 7 ++++---
 bolt/lib/Core/JumpTable.cpp               | 1 -
 bolt/lib/Passes/IndirectCallPromotion.cpp | 1 -
 bolt/lib/Passes/ThreeWayBranch.cpp        | 2 --
 bolt/lib/Rewrite/DWARFRewriter.cpp        | 4 +---
 bolt/lib/Rewrite/MachORewriteInstance.cpp | 2 --
 bolt/lib/Rewrite/RewriteInstance.cpp      | 2 --
 bolt/lib/Utils/Utils.cpp                  | 1 -
 14 files changed, 7 insertions(+), 26 deletions(-)

diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h
index abcd2c3692055..c626af3a897d6 100644
--- a/bolt/include/bolt/Core/BinaryContext.h
+++ b/bolt/include/bolt/Core/BinaryContext.h
@@ -28,13 +28,12 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCPseudoProbe.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Support/ErrorOr.h"
diff --git a/bolt/include/bolt/Core/BinaryData.h b/bolt/include/bolt/Core/BinaryData.h
index 831f968bbfaea..01e1538f8a95e 100644
--- a/bolt/include/bolt/Core/BinaryData.h
+++ b/bolt/include/bolt/Core/BinaryData.h
@@ -16,7 +16,6 @@
 #define BOLT_CORE_BINARY_DATA_H
 
 #include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/bolt/include/bolt/Core/DebugData.h b/bolt/include/bolt/Core/DebugData.h
index 832123fde38a0..761614c00f619 100644
--- a/bolt/include/bolt/Core/DebugData.h
+++ b/bolt/include/bolt/Core/DebugData.h
@@ -14,7 +14,6 @@
 #ifndef BOLT_CORE_DEBUG_DATA_H
 #define BOLT_CORE_DEBUG_DATA_H
 
-#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
 #include "llvm/MC/MCDwarf.h"
@@ -33,8 +32,6 @@
 
 namespace llvm {
 
-class DWARFAbbreviationDeclarationSet;
-
 namespace bolt {
 
 class BinaryContext;
diff --git a/bolt/include/bolt/Passes/AllocCombiner.h b/bolt/include/bolt/Passes/AllocCombiner.h
index 810c265714193..21f1aa73d343e 100644
--- a/bolt/include/bolt/Passes/AllocCombiner.h
+++ b/bolt/include/bolt/Passes/AllocCombiner.h
@@ -13,8 +13,6 @@
 
 namespace llvm {
 namespace bolt {
-class DataflowInfoManager;
-class FrameAnalysis;
 
 class AllocCombinerPass : public BinaryFunctionPass {
   /// Stats aggregating variables
diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h
index 55dda5d71fc63..d75ae8ade8516 100644
--- a/bolt/include/bolt/Rewrite/DWARFRewriter.h
+++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h
@@ -10,11 +10,10 @@
 #define BOLT_REWRITE_DWARF_REWRITER_H
 
 #include "bolt/Core/DebugData.h"
-#include "bolt/Rewrite/RewriteInstance.h"
 #include <cstdint>
-#include <map>
 #include <memory>
 #include <mutex>
+#include <set>
 #include <unordered_map>
 #include <vector>
 
diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 18f521edff05f..0b175a8b47e9b 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -28,7 +28,6 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
-#include "llvm/MC/MCStreamer.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp
index 396bf8aebf52f..fde6227a02d6c 100644
--- a/bolt/lib/Core/DebugData.cpp
+++ b/bolt/lib/Core/DebugData.cpp
@@ -11,11 +11,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "bolt/Core/DebugData.h"
-#include "bolt/Core/BinaryBasicBlock.h"
-#include "bolt/Core/BinaryFunction.h"
+#include "bolt/Core/BinaryContext.h"
 #include "bolt/Utils/Utils.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/LEB128.h"
@@ -32,6 +31,8 @@ extern llvm::cl::opt<unsigned> Verbosity;
 } // namespace opts
 
 namespace llvm {
+class MCSymbol;
+
 namespace bolt {
 
 const DebugLineTableRowRef DebugLineTableRowRef::NULL_ROW{0, 0};
diff --git a/bolt/lib/Core/JumpTable.cpp b/bolt/lib/Core/JumpTable.cpp
index a3c273b890da3..91c73f67b87ee 100644
--- a/bolt/lib/Core/JumpTable.cpp
+++ b/bolt/lib/Core/JumpTable.cpp
@@ -14,7 +14,6 @@
 #include "bolt/Core/BinaryFunction.h"
 #include "bolt/Core/BinarySection.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
 
 #define DEBUG_TYPE "bolt"
 
diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp
index 4104cc9ce3078..dd1cc2024e8de 100644
--- a/bolt/lib/Passes/IndirectCallPromotion.cpp
+++ b/bolt/lib/Passes/IndirectCallPromotion.cpp
@@ -13,7 +13,6 @@
 #include "bolt/Passes/IndirectCallPromotion.h"
 #include "bolt/Passes/BinaryFunctionCallGraph.h"
 #include "bolt/Passes/DataflowInfoManager.h"
-#include "bolt/Utils/CommandLineOpts.h"
 #include "llvm/Support/CommandLine.h"
 
 #define DEBUG_TYPE "ICP"
diff --git a/bolt/lib/Passes/ThreeWayBranch.cpp b/bolt/lib/Passes/ThreeWayBranch.cpp
index 5ee2b66a9fd5c..445faba888ae5 100644
--- a/bolt/lib/Passes/ThreeWayBranch.cpp
+++ b/bolt/lib/Passes/ThreeWayBranch.cpp
@@ -12,8 +12,6 @@
 
 #include "bolt/Passes/ThreeWayBranch.h"
 
-#include <numeric>
-
 using namespace llvm;
 
 namespace llvm {
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 531f82499fc09..c44e548463cf7 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -11,6 +11,7 @@
 #include "bolt/Core/BinaryFunction.h"
 #include "bolt/Core/DebugData.h"
 #include "bolt/Core/ParallelUtilities.h"
+#include "bolt/Rewrite/RewriteInstance.h"
 #include "bolt/Utils/Utils.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/BinaryFormat/Dwarf.h"
@@ -20,11 +21,8 @@
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
diff --git a/bolt/lib/Rewrite/MachORewriteInstance.cpp b/bolt/lib/Rewrite/MachORewriteInstance.cpp
index e04c22282104c..c823349d8fd18 100644
--- a/bolt/lib/Rewrite/MachORewriteInstance.cpp
+++ b/bolt/lib/Rewrite/MachORewriteInstance.cpp
@@ -19,10 +19,8 @@
 #include "bolt/Rewrite/ExecutableFileMemoryManager.h"
 #include "bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h"
 #include "bolt/Utils/Utils.h"
-#include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCObjectWriter.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ToolOutputFile.h"
 
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 73288a32315be..934f79749e88d 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -35,10 +35,8 @@
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAsmLayout.h"
-#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/TargetRegistry.h"
diff --git a/bolt/lib/Utils/Utils.cpp b/bolt/lib/Utils/Utils.cpp
index f94d286bc87b8..3d18da5d836d7 100644
--- a/bolt/lib/Utils/Utils.cpp
+++ b/bolt/lib/Utils/Utils.cpp
@@ -13,7 +13,6 @@
 #include "bolt/Utils/Utils.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/MC/MCDwarf.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/raw_ostream.h"
 

From 2f9f9afa4e1281b4ac7c8ad36860a4e35e6f5070 Mon Sep 17 00:00:00 2001
From: Rob Suderman <rob.suderman@gmail.com>
Date: Fri, 21 Jan 2022 12:22:50 -0800
Subject: [PATCH 214/946] [mlir] Add polynomial approximation for atan and
 atan2

Implement a taylor series approximation for atan and add an atan2 lowering
that uses atan's appromation. This includes tests for edge cases and tests
for each quadrant.

Reviewed By: NatashaKnk

Differential Revision: https://reviews.llvm.org/D115682
---
 .../Transforms/PolynomialApproximation.cpp    | 134 +++++++++++++++++-
 .../Math/polynomial-approximation.mlir        |  82 +++++++++++
 .../math-polynomial-approx.mlir               | 118 +++++++++++++++
 3 files changed, 331 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
index 9931e89647bcf..7d04ae7e3d34f 100644
--- a/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
+++ b/mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp
@@ -278,6 +278,133 @@ Value makePolynomialCalculation(ImplicitLocOpBuilder &builder,
 }
 } // namespace
 
+//----------------------------------------------------------------------------//
+// AtanOp approximation.
+//----------------------------------------------------------------------------//
+
+namespace {
+struct AtanApproximation : public OpRewritePattern<math::AtanOp> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(math::AtanOp op,
+                                PatternRewriter &rewriter) const final;
+};
+} // namespace
+
+LogicalResult
+AtanApproximation::matchAndRewrite(math::AtanOp op,
+                                   PatternRewriter &rewriter) const {
+  auto operand = op.getOperand();
+  if (!getElementTypeOrSelf(operand).isF32())
+    return rewriter.notifyMatchFailure(op, "unsupported operand type");
+
+  ArrayRef<int64_t> shape = vectorShape(op.getOperand());
+
+  ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
+  auto one = broadcast(builder, f32Cst(builder, 1.0f), shape);
+
+  // Remap the problem over [0.0, 1.0] by looking at the absolute value and the
+  // handling symmetry.
+  Value abs = builder.create<math::AbsOp>(operand);
+  Value reciprocal = builder.create<arith::DivFOp>(one, abs);
+  Value compare =
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, abs, reciprocal);
+  Value x = builder.create<SelectOp>(compare, abs, reciprocal);
+
+  // Perform the Taylor series approximation for atan over the range
+  // [-1.0, 1.0].
+  auto n1 = broadcast(builder, f32Cst(builder, 0.14418283), shape);
+  auto n2 = broadcast(builder, f32Cst(builder, -0.34999234), shape);
+  auto n3 = broadcast(builder, f32Cst(builder, -0.01067831), shape);
+  auto n4 = broadcast(builder, f32Cst(builder, 1.00209986), shape);
+
+  Value p = builder.create<math::FmaOp>(x, n1, n2);
+  p = builder.create<math::FmaOp>(x, p, n3);
+  p = builder.create<math::FmaOp>(x, p, n4);
+  p = builder.create<arith::MulFOp>(x, p);
+
+  // Remap the solution for over [0.0, 1.0] to [0.0, inf]
+  auto half_pi = broadcast(builder, f32Cst(builder, 1.57079632679f), shape);
+  Value sub = builder.create<arith::SubFOp>(half_pi, p);
+  Value select = builder.create<SelectOp>(compare, p, sub);
+
+  // Correct for signing of the input.
+  rewriter.replaceOpWithNewOp<math::CopySignOp>(op, select, operand);
+  return success();
+}
+
+//----------------------------------------------------------------------------//
+// AtanOp approximation.
+//----------------------------------------------------------------------------//
+
+namespace {
+struct Atan2Approximation : public OpRewritePattern<math::Atan2Op> {
+public:
+  using OpRewritePattern::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(math::Atan2Op op,
+                                PatternRewriter &rewriter) const final;
+};
+} // namespace
+
+LogicalResult
+Atan2Approximation::matchAndRewrite(math::Atan2Op op,
+                                    PatternRewriter &rewriter) const {
+  auto y = op.getOperand(0);
+  auto x = op.getOperand(1);
+  if (!getElementTypeOrSelf(x).isF32())
+    return rewriter.notifyMatchFailure(op, "unsupported operand type");
+
+  ImplicitLocOpBuilder builder(op->getLoc(), rewriter);
+  ArrayRef<int64_t> shape = vectorShape(op.getResult());
+
+  // Compute atan in the valid range.
+  auto div = builder.create<arith::DivFOp>(y, x);
+  auto atan = builder.create<math::AtanOp>(div);
+
+  // Determine what the atan would be for a 180 degree rotation.
+  auto zero = broadcast(builder, f32Cst(builder, 0.0f), shape);
+  auto pi = broadcast(builder, f32Cst(builder, 3.14159265359f), shape);
+  auto add_pi = builder.create<arith::AddFOp>(atan, pi);
+  auto sub_pi = builder.create<arith::SubFOp>(atan, pi);
+  auto atan_gt =
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OGT, atan, zero);
+  auto flipped_atan = builder.create<SelectOp>(atan_gt, sub_pi, add_pi);
+
+  // Determine whether to directly use atan or use the 180 degree flip
+  auto x_gt = builder.create<arith::CmpFOp>(arith::CmpFPredicate::OGT, x, zero);
+  Value result = builder.create<SelectOp>(x_gt, atan, flipped_atan);
+
+  // Handle x = 0, y > 0
+  Value x_zero =
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, x, zero);
+  Value y_gt =
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OGT, y, zero);
+  Value is_half_pi = builder.create<arith::AndIOp>(x_zero, y_gt);
+  auto half_pi = broadcast(builder, f32Cst(builder, 1.57079632679f), shape);
+  result = builder.create<SelectOp>(is_half_pi, half_pi, result);
+
+  // Handle x = 0, y < 0
+  Value y_lt =
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OLT, y, zero);
+  Value is_negative_half_pi_pi = builder.create<arith::AndIOp>(x_zero, y_lt);
+  auto negative_half_pi_pi =
+      broadcast(builder, f32Cst(builder, -1.57079632679), shape);
+  result = builder.create<SelectOp>(is_negative_half_pi_pi, negative_half_pi_pi,
+                                    result);
+
+  // Handle x = 0, y = 0;
+  Value y_zero =
+      builder.create<arith::CmpFOp>(arith::CmpFPredicate::OEQ, y, zero);
+  Value is_nan = builder.create<arith::AndIOp>(x_zero, y_zero);
+  Value cst_nan = broadcast(builder, f32FromBits(builder, 0x7fc00000), shape);
+  result = builder.create<SelectOp>(is_nan, cst_nan, result);
+
+  rewriter.replaceOp(op, result);
+  return success();
+}
+
 //----------------------------------------------------------------------------//
 // TanhOp approximation.
 //----------------------------------------------------------------------------//
@@ -1074,9 +1201,10 @@ RsqrtApproximation::matchAndRewrite(math::RsqrtOp op,
 void mlir::populateMathPolynomialApproximationPatterns(
     RewritePatternSet &patterns,
     const MathPolynomialApproximationOptions &options) {
-  patterns.add<TanhApproximation, LogApproximation, Log2Approximation,
-               Log1pApproximation, ErfPolynomialApproximation, ExpApproximation,
-               ExpM1Approximation, SinAndCosApproximation<true, math::SinOp>,
+  patterns.add<AtanApproximation, Atan2Approximation, TanhApproximation,
+               LogApproximation, Log2Approximation, Log1pApproximation,
+               ErfPolynomialApproximation, ExpApproximation, ExpM1Approximation,
+               SinAndCosApproximation<true, math::SinOp>,
                SinAndCosApproximation<false, math::CosOp>>(
       patterns.getContext());
   if (options.enableAvx2)
diff --git a/mlir/test/Dialect/Math/polynomial-approximation.mlir b/mlir/test/Dialect/Math/polynomial-approximation.mlir
index f388b84d83c8e..a40cc9c6f037a 100644
--- a/mlir/test/Dialect/Math/polynomial-approximation.mlir
+++ b/mlir/test/Dialect/Math/polynomial-approximation.mlir
@@ -507,3 +507,85 @@ func @rsqrt_vector_2x16xf32(%arg0: vector<2x16xf32>) -> vector<2x16xf32> {
   %0 = math.rsqrt %arg0 : vector<2x16xf32>
   return %0 : vector<2x16xf32>
 }
+
+// CHECK-LABEL: @atan_scalar
+// CHECK-DAG:  %[[ONE:.+]] = arith.constant 1.000000e+00
+// CHECK-DAG:  %[[N1:.+]] = arith.constant 0.144182831
+// CHECK-DAG:  %[[N2:.+]] = arith.constant -0.349992335
+// CHECK-DAG:  %[[N3:.+]] = arith.constant -0.0106783099
+// CHECK-DAG:  %[[N4:.+]] = arith.constant 1.00209987
+// CHECK-DAG:  %[[HALF_PI:.+]] = arith.constant 1.57079637
+// CHECK-DAG:  %[[ABS:.+]] = math.abs %arg0
+// CHECK-DAG:  %[[DIV:.+]] = arith.divf %cst, %[[ABS]]
+// CHECK-DAG:  %[[CMP:.+]] = arith.cmpf olt, %[[ABS]], %[[DIV]]
+// CHECK-DAG:  %[[SEL:.+]] = select %[[CMP]], %[[ABS]], %[[DIV]]
+// CHECK-DAG:  %[[P0:.+]] = math.fma %[[SEL]], %[[N1]], %[[N2]]
+// CHECK-DAG:  %[[P1:.+]] = math.fma %[[SEL]], %[[P0]], %[[N3]]
+// CHECK-DAG:  %[[P2:.+]] = math.fma %[[SEL]], %[[P1]], %[[N4]]
+// CHECK-DAG:  %[[P3:.+]] = arith.mulf %[[SEL]], %[[P2]]
+// CHECK-DAG:  %[[SUB:.+]] = arith.subf %[[HALF_PI]], %[[P3]]
+// CHECK-DAG:  %[[EST:.+]] = select %[[CMP]], %[[P3]], %[[SUB]]
+// CHECK-DAG:  %[[RES:.+]] = math.copysign %[[EST]], %arg0
+// CHECK:  return %[[RES]]
+func @atan_scalar(%arg0: f32) -> f32 {
+  %0 = math.atan %arg0 : f32
+  return %0 : f32
+}
+
+
+// CHECK-LABEL: @atan2_scalar
+
+// ATan approximation:
+// CHECK-DAG:  %[[ONE:.+]] = arith.constant 1.000000e+00
+// CHECK-DAG:  %[[N1:.+]] = arith.constant 0.144182831
+// CHECK-DAG:  %[[N2:.+]] = arith.constant -0.349992335
+// CHECK-DAG:  %[[N3:.+]] = arith.constant -0.0106783099
+// CHECK-DAG:  %[[N4:.+]] = arith.constant 1.00209987
+// CHECK-DAG:  %[[HALF_PI:.+]] = arith.constant 1.57079637
+// CHECK-DAG:  %[[RATIO:.+]] = arith.divf %arg0, %arg1
+// CHECK-DAG:  %[[ABS:.+]] = math.abs %[[RATIO]]
+// CHECK-DAG:  %[[DIV:.+]] = arith.divf %cst, %[[ABS]]
+// CHECK-DAG:  %[[CMP:.+]] = arith.cmpf olt, %[[ABS]], %[[DIV]]
+// CHECK-DAG:  %[[SEL:.+]] = select %[[CMP]], %[[ABS]], %[[DIV]]
+// CHECK-DAG:  %[[P0:.+]] = math.fma %[[SEL]], %[[N1]], %[[N2]]
+// CHECK-DAG:  %[[P1:.+]] = math.fma %[[SEL]], %[[P0]], %[[N3]]
+// CHECK-DAG:  %[[P2:.+]] = math.fma %[[SEL]], %[[P1]], %[[N4]]
+// CHECK-DAG:  %[[P3:.+]] = arith.mulf %[[SEL]], %[[P2]]
+// CHECK-DAG:  %[[SUB:.+]] = arith.subf %[[HALF_PI]], %[[P3]]
+// CHECK-DAG:  %[[EST:.+]] = select %[[CMP]], %[[P3]], %[[SUB]]
+// CHECK-DAG:  %[[ATAN:.+]] = math.copysign %[[EST]], %[[RATIO]]
+
+// Handle the case of x < 0:
+// CHECK-DAG: %[[ZERO:.+]] = arith.constant 0.000000e+00
+// CHECK-DAG: %[[PI:.+]] = arith.constant 3.14159274
+// CHECK-DAG: %[[ADD_PI:.+]] = arith.addf %[[ATAN]], %[[PI]]
+// CHECK-DAG: %[[SUB_PI:.+]] = arith.subf %[[ATAN]], %[[PI]]
+// CHECK-DAG: %[[CMP_ATAN:.+]] = arith.cmpf ogt, %[[ATAN]], %[[ZERO]]
+// CHECK-DAG: %[[ATAN_ADJUST:.+]] = select %[[CMP_ATAN]], %[[SUB_PI]], %[[ADD_PI]]
+// CHECK-DAG: %[[X_NEG:.+]] = arith.cmpf ogt, %arg1, %[[ZERO]]
+// CHECK-DAG: %[[ATAN_EST:.+]] = select %[[X_NEG]], %[[ATAN]], %[[ATAN_ADJUST]]
+
+// Handle PI / 2 edge case:
+// CHECK-DAG: %[[X_ZERO:.+]] = arith.cmpf oeq, %arg1, %[[ZERO]]
+// CHECK-DAG: %[[Y_POS:.+]] = arith.cmpf ogt, %arg0, %[[ZERO]]
+// CHECK-DAG: %[[IS_HALF_PI:.+]] = arith.andi %[[X_ZERO]], %[[Y_POS]]
+// CHECK-DAG: %[[EDGE1:.+]] = select %[[IS_HALF_PI]], %[[HALF_PI]], %[[ATAN_EST]]
+
+// Handle -PI / 2 edge case:
+// CHECK-DAG: %[[NEG_HALF_PI:.+]] = arith.constant -1.57079637
+// CHECK-DAG: %[[Y_NEG:.+]] = arith.cmpf olt, %arg0, %[[ZERO]]
+// CHECK-DAG: %[[IS_NEG_HALF_PI:.+]] = arith.andi %[[X_ZERO]], %[[Y_NEG]]
+// CHECK-DAG: %[[EDGE2:.+]] = select %[[IS_NEG_HALF_PI]], %[[NEG_HALF_PI]], %[[EDGE1]]
+
+// Handle Nan edgecase:
+// CHECK-DAG: %[[Y_ZERO:.+]] = arith.cmpf oeq, %arg0, %[[ZERO]]
+// CHECK-DAG: %[[X_Y_ZERO:.+]] = arith.andi %[[X_ZERO]], %[[Y_ZERO]]
+// CHECK-DAG: %[[NAN:.+]] = arith.constant 0x7FC00000
+// CHECK-DAG: %[[EDGE3:.+]] = select %[[X_Y_ZERO]], %[[NAN]], %[[EDGE2]]
+// CHECK: return %[[EDGE3]]
+
+func @atan2_scalar(%arg0: f32, %arg1: f32) -> f32 {
+  %0 = math.atan2 %arg0, %arg1 : f32
+  return %0 : f32
+}
+
diff --git a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir
index b3c41057fa302..5a41d56dd42bd 100644
--- a/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir
+++ b/mlir/test/mlir-cpu-runner/math-polynomial-approx.mlir
@@ -371,6 +371,122 @@ func @cos() {
   return
 }
 
+// -------------------------------------------------------------------------- //
+// Atan.
+// -------------------------------------------------------------------------- //
+
+func @atan() {
+  // CHECK: -0.785184
+  %0 = arith.constant -1.0 : f32
+  %atan_0 = math.atan %0 : f32
+  vector.print %atan_0 : f32
+
+  // CHECK: 0.785184
+  %1 = arith.constant 1.0 : f32
+  %atan_1 = math.atan %1 : f32
+  vector.print %atan_1 : f32
+
+  // CHECK: -0.463643
+  %2 = arith.constant -0.5 : f32
+  %atan_2 = math.atan %2 : f32
+  vector.print %atan_2 : f32
+
+  // CHECK: 0.463643
+  %3 = arith.constant 0.5 : f32
+  %atan_3 = math.atan %3 : f32
+  vector.print %atan_3 : f32
+
+  // CHECK: 0
+  %4 = arith.constant 0.0 : f32
+  %atan_4 = math.atan %4 : f32
+  vector.print %atan_4 : f32
+
+  // CHECK: -1.10715
+  %5 = arith.constant -2.0 : f32
+  %atan_5 = math.atan %5 : f32
+  vector.print %atan_5 : f32
+
+  // CHECK: 1.10715
+  %6 = arith.constant 2.0 : f32
+  %atan_6 = math.atan %6 : f32
+  vector.print %atan_6 : f32
+
+  return
+}
+
+
+// -------------------------------------------------------------------------- //
+// Atan2.
+// -------------------------------------------------------------------------- //
+
+func @atan2() {
+  %zero = arith.constant 0.0 : f32
+  %one = arith.constant 1.0 : f32
+  %two = arith.constant 2.0 : f32
+  %neg_one = arith.constant -1.0 : f32
+  %neg_two = arith.constant -2.0 : f32
+
+  // CHECK: 0
+  %atan2_0 = math.atan2 %zero, %one : f32
+  vector.print %atan2_0 : f32
+
+  // CHECK: 1.5708
+  %atan2_1 = math.atan2 %one, %zero : f32
+  vector.print %atan2_1 : f32
+
+  // CHECK: 3.14159
+  %atan2_2 = math.atan2 %zero, %neg_one : f32
+  vector.print %atan2_2 : f32
+
+  // CHECK: -1.5708
+  %atan2_3 = math.atan2 %neg_one, %zero : f32
+  vector.print %atan2_3 : f32
+
+  // CHECK: nan
+  %atan2_4 = math.atan2 %zero, %zero : f32
+  vector.print %atan2_4 : f32
+
+  // CHECK: 1.10715
+  %atan2_5 = math.atan2 %two, %one : f32
+  vector.print %atan2_5 : f32
+
+  // CHECK: 2.03444
+  %x6 = arith.constant -1.0 : f32
+  %y6 = arith.constant 2.0 : f32
+  %atan2_6 = math.atan2 %two, %neg_one : f32
+  vector.print %atan2_6 : f32
+
+  // CHECK: -2.03444
+  %atan2_7 = math.atan2 %neg_two, %neg_one : f32
+  vector.print %atan2_7 : f32
+
+  // CHECK: -1.10715
+  %atan2_8 = math.atan2 %neg_two, %one : f32
+  vector.print %atan2_8 : f32
+
+  // CHECK: 0.463643
+  %atan2_9 = math.atan2 %one, %two : f32
+  vector.print %atan2_9 : f32
+
+  // CHECK: 2.67795
+  %x10 = arith.constant -2.0 : f32
+  %y10 = arith.constant 1.0 : f32
+  %atan2_10 = math.atan2 %one, %neg_two : f32
+  vector.print %atan2_10 : f32
+
+  // CHECK: -2.67795
+  %x11 = arith.constant -2.0 : f32
+  %y11 = arith.constant -1.0 : f32
+  %atan2_11 = math.atan2 %neg_one, %neg_two : f32
+  vector.print %atan2_11 : f32
+
+  // CHECK: -0.463643
+  %atan2_12 = math.atan2 %neg_one, %two : f32
+  vector.print %atan2_12 : f32
+
+  return
+}
+
 
 func @main() {
   call @tanh(): () -> ()
@@ -382,5 +498,7 @@ func @main() {
   call @expm1(): () -> ()
   call @sin(): () -> ()
   call @cos(): () -> ()
+  call @atan() : () -> ()
+  call @atan2() : () -> ()
   return
 }

From 0d9cc6995401e629f63b1e43e2e6e8bf73c4edd7 Mon Sep 17 00:00:00 2001
From: Daniel Thornburgh <dthorn@google.com>
Date: Fri, 21 Jan 2022 19:26:52 +0000
Subject: [PATCH 215/946] [Support] Update missed tests with lazy caching
 behavior.

Fixes test failures created by https://reviews.llvm.org/D117589.

Reviewed By: zhuhan0

Differential Revision: https://reviews.llvm.org/D117915
---
 llvm/test/tools/gold/X86/cache.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/tools/gold/X86/cache.ll b/llvm/test/tools/gold/X86/cache.ll
index 5ab5563025587..fd3e7ba876830 100644
--- a/llvm/test/tools/gold/X86/cache.ll
+++ b/llvm/test/tools/gold/X86/cache.ll
@@ -8,8 +8,8 @@
 ; RUN:     --plugin-opt=cache-dir=%t.cache \
 ; RUN:     -o %t3.o %t2.o %t.o
 
-; We should just get the timestamp file
-; RUN: ls %t.cache | count 1
+; Since nothing was added to the cache, there shouldn't be a timestamp file yet.
+; RUN: not ls %t.cache
 
 
 ; Verify that enabling caching is working with module with hash.

From cd4e600f5f5cedd092c8ff19c208897034494f3d Mon Sep 17 00:00:00 2001
From: Alex Brachet <abrachet@google.com>
Date: Fri, 21 Jan 2022 21:00:39 +0000
Subject: [PATCH 216/946] [Sema] Warn about printf %n on Android and Fuchsia

The `printf` specifier `%n` is not supported on Android's libc and will soon be removed from Fuchsia's

Reviewed By: enh

Differential Revision: https://reviews.llvm.org/D117611
---
 clang/include/clang/AST/FormatString.h        |  3 +-
 .../clang/Basic/DiagnosticSemaKinds.td        |  3 +
 clang/lib/AST/OSLog.cpp                       |  4 +-
 clang/lib/AST/PrintfFormatString.cpp          |  2 +-
 clang/lib/Sema/SemaChecking.cpp               | 24 ++++--
 clang/test/FixIt/format.m                     | 16 +++-
 clang/test/Sema/format-strings.c              | 84 ++++++++++++++++++-
 7 files changed, 122 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/AST/FormatString.h b/clang/include/clang/AST/FormatString.h
index 5a407b9261922..d7933382f13d6 100644
--- a/clang/include/clang/AST/FormatString.h
+++ b/clang/include/clang/AST/FormatString.h
@@ -726,7 +726,8 @@ class FormatStringHandler {
 
   virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
                                      const char *startSpecifier,
-                                     unsigned specifierLen) {
+                                     unsigned specifierLen,
+                                     const TargetInfo &Target) {
     return true;
   }
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 19ce0ffcec51d..88e430d8eb09f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9369,6 +9369,9 @@ def warn_printf_ObjCflags_without_ObjCConversion: Warning<
 def warn_printf_invalid_objc_flag: Warning<
     "'%0' is not a valid object format flag">,
     InGroup<Format>;
+def warn_printf_narg_not_supported : Warning<
+    "'%%n' specifier not supported on this platform">,
+    InGroup<Format>;
 def warn_scanf_scanlist_incomplete : Warning<
   "no closing ']' for '%%[' in scanf format string">,
   InGroup<Format>;
diff --git a/clang/lib/AST/OSLog.cpp b/clang/lib/AST/OSLog.cpp
index 094c0102854b1..4cc5def0651f7 100644
--- a/clang/lib/AST/OSLog.cpp
+++ b/clang/lib/AST/OSLog.cpp
@@ -56,8 +56,8 @@ class OSLogFormatStringHandler
   }
 
   bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
-                             const char *StartSpecifier,
-                             unsigned SpecifierLen) override {
+                             const char *StartSpecifier, unsigned SpecifierLen,
+                             const TargetInfo &) override {
     if (!FS.consumesDataArgument() &&
         FS.getConversionSpecifier().getKind() !=
             clang::analyze_format_string::ConversionSpecifier::PrintErrno)
diff --git a/clang/lib/AST/PrintfFormatString.cpp b/clang/lib/AST/PrintfFormatString.cpp
index a286db3b9b9f1..c6c41abc7e9a5 100644
--- a/clang/lib/AST/PrintfFormatString.cpp
+++ b/clang/lib/AST/PrintfFormatString.cpp
@@ -428,7 +428,7 @@ bool clang::analyze_format_string::ParsePrintfString(FormatStringHandler &H,
       continue;
     // We have a format specifier.  Pass it to the callback.
     if (!H.HandlePrintfSpecifier(FSR.getValue(), FSR.getStart(),
-                                 I - FSR.getStart()))
+                                 I - FSR.getStart(), Target))
       return true;
   }
   assert(I == E && "Format string not exhausted");
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 27653464110aa..e2b78fa212b81 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -499,7 +499,8 @@ class EstimateSizeFormatHandler
              1 /* null byte always written by sprintf */) {}
 
   bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
-                             const char *, unsigned SpecifierLen) override {
+                             const char *, unsigned SpecifierLen,
+                             const TargetInfo &) override {
 
     const size_t FieldWidth = computeFieldWidth(FS);
     const size_t Precision = computePrecision(FS);
@@ -8909,8 +8910,8 @@ class CheckPrintfHandler : public CheckFormatHandler {
   void handleInvalidMaskType(StringRef MaskType) override;
 
   bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
-                             const char *startSpecifier,
-                             unsigned specifierLen) override;
+                             const char *startSpecifier, unsigned specifierLen,
+                             const TargetInfo &Target) override;
   bool checkFormatExpr(const analyze_printf::PrintfSpecifier &FS,
                        const char *StartSpecifier,
                        unsigned SpecifierLen,
@@ -9169,11 +9170,9 @@ bool CheckPrintfHandler::checkForCStrMembers(
   return false;
 }
 
-bool
-CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier
-                                            &FS,
-                                          const char *startSpecifier,
-                                          unsigned specifierLen) {
+bool CheckPrintfHandler::HandlePrintfSpecifier(
+    const analyze_printf::PrintfSpecifier &FS, const char *startSpecifier,
+    unsigned specifierLen, const TargetInfo &Target) {
   using namespace analyze_format_string;
   using namespace analyze_printf;
 
@@ -9305,6 +9304,15 @@ CheckPrintfHandler::HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier
     }
   }
 
+  const llvm::Triple &Triple = Target.getTriple();
+  if (CS.getKind() == ConversionSpecifier::nArg &&
+      (Triple.isAndroid() || Triple.isOSFuchsia())) {
+    EmitFormatDiagnostic(S.PDiag(diag::warn_printf_narg_not_supported),
+                         getLocationOfByte(CS.getStart()),
+                         /*IsStringLocation*/ false,
+                         getSpecifierRange(startSpecifier, specifierLen));
+  }
+
   // Check for invalid use of field width
   if (!FS.hasValidFieldWidth()) {
     HandleInvalidAmount(FS, FS.getFieldWidth(), /* field width */ 0,
diff --git a/clang/test/FixIt/format.m b/clang/test/FixIt/format.m
index 0d173846d0ada..af2d2ce797a49 100644
--- a/clang/test/FixIt/format.m
+++ b/clang/test/FixIt/format.m
@@ -241,8 +241,13 @@ void testSizeTypes() {
   // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:14}:"%f"
   
   short x;
+#if !defined(__ANDROID__) && !defined(__Fuchsia__)
   printf("%zn", &x); // expected-warning-re{{format specifies type 'ssize_t *' (aka '{{.+}}') but the argument has type 'short *'}}
-  // PrintfSpecifier::fixType doesn't handle %n, so a fix-it is not emitted, 
+#else
+  printf("%zn", &x); // expected-warning-re{{format specifies type 'ssize_t *' (aka '{{.+}}') but the argument has type 'short *'}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
+#endif // !defined(__ANDROID__) && !defined(__Fuchsia__)
+  // PrintfSpecifier::fixType doesn't handle %n, so a fix-it is not emitted,
   // see the comment in PrintfSpecifier::fixType in PrintfFormatString.cpp.
 }
 
@@ -269,12 +274,21 @@ void testPtrDiffTypes() {
   // CHECK: fix-it:"{{.*}}":{[[@LINE-1]]:11-[[@LINE-1]]:14}:"%f"
 
   ptrdiff_t p3 = 0;
+#if !defined(__ANDROID__) && !defined(__Fuchsia__)
   printf("%tn", &p3); // No warning.
+#else
+  printf("%tn", &p3); // expected-warning{{'%n' specifier not supported on this platform}}
+#endif // !defined(__ANDROID__) && !defined(__Fuchsia__)
 
   short x;
+#if !defined(__ANDROID__) && !defined(__Fuchsia__)
   printf("%tn", &x); // expected-warning-re{{format specifies type 'ptrdiff_t *' (aka '{{.+}}') but the argument has type 'short *'}}
   // PrintfSpecifier::fixType doesn't handle %n, so a fix-it is not emitted,
   // see the comment in PrintfSpecifier::fixType in PrintfFormatString.cpp.
+#else
+  printf("%tn", &x); // expected-warning-re{{format specifies type 'ptrdiff_t *' (aka '{{.+}}') but the argument has type 'short *'}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
+#endif // !defined(__ANDROID__) && !defined(__Fuchsia__)
 }
 
 void testEnum() {
diff --git a/clang/test/Sema/format-strings.c b/clang/test/Sema/format-strings.c
index bbe47636ebb7d..bb5c4c4d1de7f 100644
--- a/clang/test/Sema/format-strings.c
+++ b/clang/test/Sema/format-strings.c
@@ -1,5 +1,7 @@
 // RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs %s
 // RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -fno-signed-char %s
+// RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-unknown-fuchsia %s
+// RUN: %clang_cc1 -fblocks -fsyntax-only -verify -Wformat-nonliteral -isystem %S/Inputs -triple=x86_64-linux-android %s
 
 #include <stdarg.h>
 #include <stddef.h>
@@ -118,6 +120,8 @@ void check_conditional_literal(const char* s, int i) {
   printf(i ? "%i\n" : "%i %s %s\n", i, s); // expected-warning{{more '%' conversions than data arguments}}
 }
 
+#if !defined(__ANDROID__) && !defined(__Fuchsia__)
+
 void check_writeback_specifier()
 {
   int x;
@@ -154,6 +158,45 @@ void check_writeback_specifier()
   // expected-note@-1{{did you mean to use 'll'?}}
 }
 
+#else
+
+void check_writeback_specifier()
+{
+  int x;
+  printf("%n", &x); // expected-warning{{'%n' specifier not supported on this platform}}
+
+  printf("%hhn", (signed char*)0); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%hhn", (char*)0); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%hhn", (unsigned char*)0); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%hhn", (int*)0); // expected-warning{{format specifies type 'signed char *' but the argument has type 'int *'}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
+
+  printf("%hn", (short*)0); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%hn", (unsigned short*)0); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%hn", (int*)0); // expected-warning{{format specifies type 'short *' but the argument has type 'int *'}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
+
+  printf("%n", (int*)0); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%n", (unsigned int*)0); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%n", (char*)0); // expected-warning{{format specifies type 'int *' but the argument has type 'char *'}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
+
+  printf("%ln", (long*)0); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%ln", (unsigned long*)0); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%ln", (int*)0); // expected-warning{{format specifies type 'long *' but the argument has type 'int *'}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
+
+  printf("%lln", (long long*)0); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%lln", (unsigned long long*)0); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%lln", (int*)0); // expected-warning{{format specifies type 'long long *' but the argument has type 'int *'}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
+
+  printf("%qn", (long long*)0); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%qn", (unsigned long long*)0); // expected-warning{{'%n' specifier not supported on this platform}}
+}
+
+#endif // !defined(__ANDROID__) && !defined(__Fuchsia__)
+
 void check_invalid_specifier(FILE* fp, char *buf)
 {
   printf("%s%lb%d","unix",10,20); // expected-warning {{invalid conversion specifier 'b'}} expected-warning {{data argument not used by format string}}
@@ -386,14 +429,28 @@ void bug7377_bad_length_mod_usage() {
   // Bad flag usage
   printf("%#p", (void *) 0); // expected-warning{{flag '#' results in undefined behavior with 'p' conversion specifier}}
   printf("%0d", -1); // no-warning
+  printf("%-p", (void *) 0); // no-warning
+#if !defined(__ANDROID__) && !defined(__Fuchsia__)
   printf("%#n", (int *) 0); // expected-warning{{flag '#' results in undefined behavior with 'n' conversion specifier}}
   printf("%-n", (int *) 0); // expected-warning{{flag '-' results in undefined behavior with 'n' conversion specifier}}
-  printf("%-p", (void *) 0); // no-warning
+#else
+  printf("%#n", (int *) 0); // expected-warning{{flag '#' results in undefined behavior with 'n' conversion specifier}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
+  printf("%-n", (int *) 0); // expected-warning{{flag '-' results in undefined behavior with 'n' conversion specifier}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
+#endif // !defined(__ANDROID__) && !defined(__Fuchsia__)
 
   // Bad optional amount use
   printf("%.2c", 'a'); // expected-warning{{precision used with 'c' conversion specifier, resulting in undefined behavior}}
+#if !defined(__ANDROID__) && !defined(__Fuchsia__)
+  printf("%1n", (int *) 0); // expected-warning{{field width used with 'n' conversion specifier, resulting in undefined behavior}}
+  printf("%.9n", (int *) 0); // expected-warning{{precision used with 'n' conversion specifier, resulting in undefined behavior}}
+#else
   printf("%1n", (int *) 0); // expected-warning{{field width used with 'n' conversion specifier, resulting in undefined behavior}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
   printf("%.9n", (int *) 0); // expected-warning{{precision used with 'n' conversion specifier, resulting in undefined behavior}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
+#endif // #if !defined(__ANDROID__) && !defined(__Fuchsia__)
 
   // Ignored flags
   printf("% +f", 1.23); // expected-warning{{flag ' ' is ignored when flag '+' is present}}
@@ -644,6 +701,8 @@ void test14_zed(int *p) {
   test14_bar("%", "%d", p); // expected-warning{{incomplete format specifier}}
 }
 
+#if !defined(__ANDROID__) && !defined(__Fuchsia__)
+
 void test_qualifiers(volatile int *vip, const int *cip,
                      const volatile int *cvip) {
   printf("%n", cip); // expected-warning{{format specifies type 'int *' but the argument has type 'const int *'}}
@@ -660,6 +719,29 @@ void test_qualifiers(volatile int *vip, const int *cip,
   printf("%n", (cip_t)0); // expected-warning{{format specifies type 'int *' but the argument has type 'cip_t' (aka 'const int *')}}
 }
 
+#else
+
+void test_qualifiers(volatile int *vip, const int *cip,
+                     const volatile int *cvip) {
+  printf("%n", cip); // expected-warning{{format specifies type 'int *' but the argument has type 'const int *'}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
+  printf("%n", cvip); // expected-warning{{format specifies type 'int *' but the argument has type 'const volatile int *'}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
+
+  printf("%n", vip); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%p", cip); // No warning.
+  printf("%p", cvip); // No warning.
+
+
+  typedef int* ip_t;
+  typedef const int* cip_t;
+  printf("%n", (ip_t)0); // expected-warning{{'%n' specifier not supported on this platform}}
+  printf("%n", (cip_t)0); // expected-warning{{format specifies type 'int *' but the argument has type 'cip_t' (aka 'const int *')}}
+  // expected-warning@-1 {{'%n' specifier not supported on this platform}}
+}
+
+#endif // #if !defined(__ANDROID__) && !defined(__Fuchsia__)
+
 #pragma GCC diagnostic ignored "-Wformat-nonliteral"
 #pragma GCC diagnostic warning "-Wformat-security"
 // <rdar://problem/14178260>

From 0379459fc5860a9c34d5592d30f5834afbcd6b75 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Fri, 21 Jan 2022 12:31:41 -0800
Subject: [PATCH 217/946] [RISCV] Strengthen a SDTypeProfile. Fix formatting.

---
 llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 9745c13863823..28cb8fc413793 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -216,17 +216,17 @@ def riscv_zext_vl : SDNode<"RISCVISD::VZEXT_VL", SDT_RISCVVEXTEND_VL>;
 
 def riscv_trunc_vector_vl : SDNode<"RISCVISD::TRUNCATE_VECTOR_VL",
                                    SDTypeProfile<1, 3, [SDTCisVec<0>,
-                                                        SDTCisVec<1>,
+                                                        SDTCisSameNumEltsAs<0, 1>,
                                                         SDTCisSameNumEltsAs<0, 2>,
                                                         SDTCVecEltisVT<2, i1>,
                                                         SDTCisVT<3, XLenVT>]>>;
 
 def SDT_RISCVVWBinOp_VL : SDTypeProfile<1, 4, [SDTCisVec<0>,
-                                             SDTCisSameNumEltsAs<0, 1>,
-                                             SDTCisSameAs<1, 2>,
-                                             SDTCisSameNumEltsAs<1, 3>,
-                                             SDTCVecEltisVT<3, i1>,
-                                             SDTCisVT<4, XLenVT>]>;
+                                               SDTCisSameNumEltsAs<0, 1>,
+                                               SDTCisSameAs<1, 2>,
+                                               SDTCisSameNumEltsAs<1, 3>,
+                                               SDTCVecEltisVT<3, i1>,
+                                               SDTCisVT<4, XLenVT>]>;
 def riscv_vwmul_vl  : SDNode<"RISCVISD::VWMUL_VL",  SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
 def riscv_vwmulu_vl : SDNode<"RISCVISD::VWMULU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;
 def riscv_vwaddu_vl : SDNode<"RISCVISD::VWADDU_VL", SDT_RISCVVWBinOp_VL, [SDNPCommutative]>;

From 4f8ea3c84f3de6ddb754ad339c4672c7e3b7fc74 Mon Sep 17 00:00:00 2001
From: Muiez Ahmed <muiez@ibm.com>
Date: Fri, 21 Jan 2022 16:18:46 -0500
Subject: [PATCH 218/946] [SystemZ][z/OS][NFC] Remove extra symbol

---
 libcxx/include/__locale | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/include/__locale b/libcxx/include/__locale
index 6181f2539475d..98445bd2d8f40 100644
--- a/libcxx/include/__locale
+++ b/libcxx/include/__locale
@@ -512,8 +512,7 @@ public:
 # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA
 # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_XDIGIT
 #elif defined(__MVS__)
-    static const mask __regex_word = 0x8000;
-# if defined(__NATIVE_ASCII_F)`
+# if defined(__NATIVE_ASCII_F)
     typedef unsigned int mask;
     static const mask space  = _ISSPACE_A;
     static const mask print  = _ISPRINT_A;
@@ -538,6 +537,7 @@ public:
     static const mask xdigit = __ISXDIGIT;
     static const mask blank  = __ISBLANK;
 # endif
+    static const mask __regex_word = 0x8000;
 #else
 # error unknown rune table for this platform -- do you mean to define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE?
 #endif

From d84d1135d80c1dead6564347943ba56eed5aac3b Mon Sep 17 00:00:00 2001
From: Shubham Sandeep Rastogi <srastogi22@apple.com>
Date: Wed, 17 Nov 2021 15:05:58 -0800
Subject: [PATCH 219/946] Emit swift5 reflection section data in dsym bundle
 generated by dsymutil in the Dwarf section.

Add support for Swift reflection metadata to dsymutil.

This patch adds support for copying Swift reflection metadata (__swift5_.* sections) from .o files to into the symbol-rich binary in the output .dSYM. The functionality is automatically enabled only if a .o file has reflection metadata sections and the binary doesn't. When copying dsymutil moves the section from the __TEXT segment to the __DWARF segment.

rdar://76973336

https://reviews.llvm.org/D115007
---
 llvm/include/llvm/BinaryFormat/Swift.def      |  26 +
 llvm/include/llvm/BinaryFormat/Swift.h        |  24 +
 llvm/include/llvm/DWARFLinker/DWARFStreamer.h |   8 +-
 llvm/include/llvm/MC/MCContext.h              |  10 +-
 llvm/include/llvm/MC/MCObjectFileInfo.h       |  13 +
 llvm/include/llvm/Object/MachO.h              |   4 +
 llvm/include/llvm/Object/ObjectFile.h         |   6 +
 llvm/lib/DWARFLinker/DWARFStreamer.cpp        |  20 +-
 llvm/lib/MC/MCContext.cpp                     |   8 +-
 llvm/lib/MC/MCObjectFileInfo.cpp              |  11 +
 llvm/lib/Object/MachOObjectFile.cpp           |  12 +
 llvm/test/tools/dsymutil/Inputs/main.yaml     | 886 ++++++++++++++++++
 .../dsymutil/Inputs/reflection_metadata.yaml  | 436 +++++++++
 llvm/test/tools/dsymutil/Inputs/test.yaml     | 254 +++++
 llvm/test/tools/dsymutil/reflection-dump.test |  42 +
 llvm/tools/dsymutil/DwarfLinkerForBinary.cpp  |  86 +-
 16 files changed, 1835 insertions(+), 11 deletions(-)
 create mode 100644 llvm/include/llvm/BinaryFormat/Swift.def
 create mode 100644 llvm/include/llvm/BinaryFormat/Swift.h
 create mode 100644 llvm/test/tools/dsymutil/Inputs/main.yaml
 create mode 100644 llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml
 create mode 100644 llvm/test/tools/dsymutil/Inputs/test.yaml
 create mode 100644 llvm/test/tools/dsymutil/reflection-dump.test

diff --git a/llvm/include/llvm/BinaryFormat/Swift.def b/llvm/include/llvm/BinaryFormat/Swift.def
new file mode 100644
index 0000000000000..39931bec70e57
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/Swift.def
@@ -0,0 +1,26 @@
+//===- llvm/BinaryFormat/Swift.def - Swift definitions ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Macros for running through Swift enumerators.
+//
+//===----------------------------------------------------------------------===//
+
+#if !(defined HANDLE_SWIFT_SECTION)
+#error "Missing macro definition of HANDLE_SWIFT_SECTION"
+#endif
+
+#ifndef HANDLE_SWIFT_SECTION
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)
+#endif
+
+HANDLE_SWIFT_SECTION(Fieldmd, "__swift5_fieldmd", "swift5_fieldmd", ".sw5flmd")
+HANDLE_SWIFT_SECTION(Assocty, "__swift5_assocty", "swift5_assocty", ".sw5asty")
+HANDLE_SWIFT_SECTION(Builtin, "__swift5_builtin", "swift5_builtin", ".sw5bltn")
+HANDLE_SWIFT_SECTION(Capture, "__swift5_capture", "swift5_capture", ".sw5cptr")
+HANDLE_SWIFT_SECTION(Typeref, "__swift5_typeref", "swift5_typeref", ".sw5tyrf")
+HANDLE_SWIFT_SECTION(Reflstr, "__swift5_reflstr", "swift5_reflstr", ".sw5rfst")
diff --git a/llvm/include/llvm/BinaryFormat/Swift.h b/llvm/include/llvm/BinaryFormat/Swift.h
new file mode 100644
index 0000000000000..63bbfe08c86d4
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/Swift.h
@@ -0,0 +1,24 @@
+//===-- llvm/BinaryFormat/Swift.h ---Swift Constants-------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+
+#ifndef LLVM_BINARYFORMAT_SWIFT_H
+#define LLVM_BINARYFORMAT_SWIFT_H
+
+namespace llvm {
+namespace swift {
+
+enum Swift5ReflectionSectionKind {
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) KIND,
+#include "llvm/BinaryFormat/Swift.def"
+#undef HANDLE_SWIFT_SECTION
+  Unknown,
+  Last = Unknown
+};
+} // end of namespace swift
+} // end of namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
index 9a5c6bcaf83f3..8e845ee91b9f7 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_DWARFLINKER_DWARFSTREAMER_H
 #define LLVM_DWARFLINKER_DWARFSTREAMER_H
 
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/CodeGen/AccelTable.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/DWARFLinker/DWARFLinker.h"
@@ -48,7 +49,7 @@ class DwarfStreamer : public DwarfEmitter {
       : OutFile(OutFile), OutFileType(OutFileType), Translator(Translator),
         ErrorHandler(Error), WarningHandler(Warning) {}
 
-  bool init(Triple TheTriple);
+  bool init(Triple TheTriple, StringRef Swift5ReflectionSegmentName);
 
   /// Dump the file to the disk.
   void finish();
@@ -85,6 +86,11 @@ class DwarfStreamer : public DwarfEmitter {
   /// Emit the swift_ast section stored in \p Buffer.
   void emitSwiftAST(StringRef Buffer);
 
+  /// Emit the swift reflection section stored in \p Buffer.
+  void emitSwiftReflectionSection(
+      llvm::swift::Swift5ReflectionSectionKind ReflSectionKind,
+      StringRef Buffer, uint32_t Alignment, uint32_t Size);
+
   /// Emit debug_ranges for \p FuncRange by translating the
   /// original \p Entries.
   void emitRangesEntries(
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 88d86d5b675ac..d2307d6922780 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -80,6 +80,10 @@ namespace llvm {
   private:
     Environment Env;
 
+    /// The name of the Segment where Swift5 Reflection Section data will be
+    /// outputted
+    StringRef Swift5ReflectionSegmentName;
+
     /// The triple for this object.
     Triple TT;
 
@@ -399,13 +403,17 @@ namespace llvm {
                        const MCRegisterInfo *MRI, const MCSubtargetInfo *MSTI,
                        const SourceMgr *Mgr = nullptr,
                        MCTargetOptions const *TargetOpts = nullptr,
-                       bool DoAutoReset = true);
+                       bool DoAutoReset = true,
+                       StringRef Swift5ReflSegmentName = {});
     MCContext(const MCContext &) = delete;
     MCContext &operator=(const MCContext &) = delete;
     ~MCContext();
 
     Environment getObjectFileType() const { return Env; }
 
+    const StringRef &getSwift5ReflectionSegmentName() const {
+      return Swift5ReflectionSegmentName;
+    }
     const Triple &getTargetTriple() const { return TT; }
     const SourceMgr *getSourceManager() const { return SrcMgr; }
 
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index 5e0cccaba77fa..1b4804db783b4 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -15,6 +15,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/VersionTuple.h"
@@ -228,6 +229,10 @@ class MCObjectFileInfo {
   MCSection *ReadOnly8Section = nullptr;
   MCSection *ReadOnly16Section = nullptr;
 
+  // Swift5 Reflection Data Sections
+  std::array<MCSection *, swift::Swift5ReflectionSectionKind::Last>
+      Swift5ReflectionSections = {};
+
 public:
   void initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
                             bool LargeCodeModel = false);
@@ -423,6 +428,14 @@ class MCObjectFileInfo {
 
   bool isPositionIndependent() const { return PositionIndependent; }
 
+  // Swift5 Reflection Data Sections
+  MCSection *getSwift5ReflectionSection(
+      llvm::swift::Swift5ReflectionSectionKind ReflSectionKind) {
+    return ReflSectionKind != llvm::swift::Swift5ReflectionSectionKind::Unknown
+               ? Swift5ReflectionSections[ReflSectionKind]
+               : nullptr;
+  }
+
 private:
   bool PositionIndependent = false;
   MCContext *Ctx = nullptr;
diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index ede742c47f971..09b6454bb0c14 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
@@ -583,6 +584,9 @@ class MachOObjectFile : public ObjectFile {
 
   StringRef mapDebugSectionName(StringRef Name) const override;
 
+  llvm::swift::Swift5ReflectionSectionKind
+  mapReflectionSectionNameToEnumValue(StringRef SectionName) const override;
+
   bool hasPageZeroSegment() const { return HasPageZeroSegment; }
 
   static bool classof(const Binary *v) {
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index 12704b1fc88e7..29919db772f0e 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/Magic.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/SymbolicFile.h"
@@ -290,6 +291,11 @@ class ObjectFile : public SymbolicFile {
   virtual void getRelocationTypeName(DataRefImpl Rel,
                                      SmallVectorImpl<char> &Result) const = 0;
 
+  virtual llvm::swift::Swift5ReflectionSectionKind
+  mapReflectionSectionNameToEnumValue(StringRef SectionName) const {
+    return llvm::swift::Swift5ReflectionSectionKind::Unknown;
+  };
+
   Expected<uint64_t> getSymbolValue(DataRefImpl Symb) const;
 
 public:
diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index 1ab6ead3b5f66..7f9b9a9bc793c 100644
--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -27,7 +27,8 @@
 
 namespace llvm {
 
-bool DwarfStreamer::init(Triple TheTriple) {
+bool DwarfStreamer::init(Triple TheTriple,
+                         StringRef Swift5ReflectionSegmentName) {
   std::string ErrorStr;
   std::string TripleName;
   StringRef Context = "dwarf streamer init";
@@ -54,8 +55,9 @@ bool DwarfStreamer::init(Triple TheTriple) {
   if (!MSTI)
     return error("no subtarget info for target " + TripleName, Context), false;
 
-  MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get()));
-  MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false));
+  MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get(), nullptr,
+                         nullptr, true, Swift5ReflectionSegmentName));
+  MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false, false));
   MC->setObjectFileInfo(MOFI.get());
 
   MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions);
@@ -302,6 +304,18 @@ void DwarfStreamer::emitSwiftAST(StringRef Buffer) {
   MS->emitBytes(Buffer);
 }
 
+void DwarfStreamer::emitSwiftReflectionSection(
+    llvm::swift::Swift5ReflectionSectionKind ReflSectionKind, StringRef Buffer,
+    uint32_t Alignment, uint32_t Size) {
+  MCSection *ReflectionSection =
+      MOFI->getSwift5ReflectionSection(ReflSectionKind);
+  if (ReflectionSection == nullptr)
+    return;
+  ReflectionSection->setAlignment(Align(Alignment));
+  MS->SwitchSection(ReflectionSection);
+  MS->emitBytes(Buffer);
+}
+
 /// Emit the debug_range section contents for \p FuncRange by
 /// translating the original \p Entries. The debug_range section
 /// format is totally trivial, consisting just of pairs of address
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index 7f639e9c408fe..eafcee1e0607b 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -67,10 +67,10 @@ static void defaultDiagHandler(const SMDiagnostic &SMD, bool, const SourceMgr &,
 MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai,
                      const MCRegisterInfo *mri, const MCSubtargetInfo *msti,
                      const SourceMgr *mgr, MCTargetOptions const *TargetOpts,
-                     bool DoAutoReset)
-    : TT(TheTriple), SrcMgr(mgr), InlineSrcMgr(nullptr),
-      DiagHandler(defaultDiagHandler), MAI(mai), MRI(mri), MSTI(msti),
-      Symbols(Allocator), UsedNames(Allocator),
+                     bool DoAutoReset, StringRef Swift5ReflSegmentName)
+    : Swift5ReflectionSegmentName(Swift5ReflSegmentName), TT(TheTriple),
+      SrcMgr(mgr), InlineSrcMgr(nullptr), DiagHandler(defaultDiagHandler),
+      MAI(mai), MRI(mri), MSTI(msti), Symbols(Allocator), UsedNames(Allocator),
       InlineAsmUsedLabelNames(Allocator),
       CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0),
       AutoReset(DoAutoReset), TargetOptions(TargetOpts) {
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index d7f85f793c55f..77b0b0ee687cb 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -299,6 +299,17 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
   RemarksSection = Ctx->getMachOSection(
       "__LLVM", "__remarks", MachO::S_ATTR_DEBUG, SectionKind::getMetadata());
 
+  // The architecture of dsymutil makes it very difficult to copy the Swift
+  // reflection metadata sections into the __TEXT segment, so dsymutil creates
+  // these sections in the __DWARF segment instead.
+  if (!Ctx->getSwift5ReflectionSegmentName().empty()) {
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)                           \
+  Swift5ReflectionSections[llvm::swift::Swift5ReflectionSectionKind::KIND] =   \
+      Ctx->getMachOSection(Ctx->getSwift5ReflectionSegmentName().data(),       \
+                           MACHO, 0, SectionKind::getMetadata());
+#include "llvm/BinaryFormat/Swift.def"
+  }
+
   TLSExtraDataSection = TLSTLVSection;
 }
 
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 42e257516f4e0..83bc74ff31c40 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/ObjectFile.h"
@@ -4765,3 +4766,14 @@ MachOObjectFile::findDsymObjectMembers(StringRef Path) {
                              Path.str().c_str());
   return ObjectPaths;
 }
+
+llvm::swift::Swift5ReflectionSectionKind
+MachOObjectFile::mapReflectionSectionNameToEnumValue(
+    StringRef SectionName) const {
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)                           \
+  .Case(MACHO, llvm::swift::Swift5ReflectionSectionKind::KIND)
+  return StringSwitch<llvm::swift::Swift5ReflectionSectionKind>(SectionName)
+#include "llvm/BinaryFormat/Swift.def"
+      .Default(llvm::swift::Swift5ReflectionSectionKind::Unknown);
+#undef HANDLE_SWIFT_SECTION
+}
diff --git a/llvm/test/tools/dsymutil/Inputs/main.yaml b/llvm/test/tools/dsymutil/Inputs/main.yaml
new file mode 100644
index 0000000000000..d81931d7861c3
--- /dev/null
+++ b/llvm/test/tools/dsymutil/Inputs/main.yaml
@@ -0,0 +1,886 @@
+# How to generate this file:
+# 1. First take a swift file and run xcrun swiftc -g -v test.swift 
+# reflection_metadata.swift, make sure the two swift files are in a short path 
+# like /tmp/
+
+# 2. Now you can see what the driver does, generate the object files in the 
+# tmp directory and link them to create the input binary
+
+# 3. Run obj2yaml on the input binary to create a yaml file and strip out the 
+# swift5 reflection sections from the load commands in the text segment
+
+# 4. I ran delta to reduce this file.
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x1000007
+  cpusubtype:      0x3
+  filetype:        0x2
+  ncmds:           18
+  sizeofcmds:      2848
+  flags:           0x200085
+  reserved:        0x0
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         72
+    segname:         __PAGEZERO
+    vmaddr:          0
+    vmsize:          4294967296
+    fileoff:         0
+    filesize:        0
+    maxprot:         0
+    initprot:        0
+    nsects:          0
+    flags:           0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         952
+    segname:         __TEXT
+    vmaddr:          4294967296
+    vmsize:          16384
+    fileoff:         0
+    filesize:        16384
+    maxprot:         5
+    initprot:        5
+    nsects:          11
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x100003EB0
+        size:            336
+        offset:          0x3EB0
+        align:           3
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         392
+    segname:         __DATA_CONST
+    vmaddr:          4294983680
+    vmsize:          16384
+    fileoff:         16384
+    filesize:        16384
+    maxprot:         3
+    initprot:        3
+    nsects:          4
+    flags:           16
+    Sections:
+      - sectname:        __got
+        segname:         __DATA_CONST
+        addr:            0x100004000
+        size:            48
+        offset:          0x4000
+        align:           3
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x6
+        reserved1:       0x11
+        reserved2:       0x0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         392
+    segname:         __DATA
+    vmaddr:          4295000064
+    vmsize:          16384
+    fileoff:         32768
+    filesize:        16384
+    maxprot:         3
+    initprot:        3
+    nsects:          4
+    flags:           0
+    Sections:
+      - sectname:        __la_symbol_ptr
+        segname:         __DATA
+        addr:            0x100008000
+        size:            384
+        offset:          0x8088
+        align:           3
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         72
+    segname:         __LINKEDIT
+    vmaddr:          4295016448
+    vmsize:          32768
+    fileoff:         49152
+    filesize:        23584
+    maxprot:         1
+    initprot:        1
+    nsects:          0
+    flags:           0
+  - cmd:             LC_DYLD_INFO_ONLY
+    cmdsize:         48
+    rebase_off:      49152
+    rebase_size:     64
+    bind_off:        49216
+    bind_size:       216
+    weak_bind_off:   0
+    weak_bind_size:  0
+    lazy_bind_off:   49432
+    lazy_bind_size:  600
+    export_off:      50032
+    export_size:     1000
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          51136
+    nsyms:           638
+    stroff:          61504
+    strsize:         11232
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       560
+    iextdefsym:      560
+    nextdefsym:      52
+    iundefsym:       612
+    nundefsym:       26
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  61344
+    nindirectsyms:   40
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+  - cmd:             LC_LOAD_DYLINKER
+    cmdsize:         32
+    name:            12
+  - cmd:             LC_UUID
+    cmdsize:         24
+    uuid:            AA0A51FA-8B29-3A7B-85AA-FA6A457B2211
+  - cmd:             LC_BUILD_VERSION
+    cmdsize:         32
+    platform:        1
+    minos:           786432
+    sdk:             786688
+    ntools:          1
+  - cmd:             LC_SOURCE_VERSION
+    cmdsize:         16
+    version:         0
+  - cmd:             LC_MAIN
+    cmdsize:         24
+    entryoff:        9376
+    stacksize:       0
+  - cmd:             LC_LOAD_DYLIB
+    cmdsize:         56
+    dylib:
+      name:            24
+      timestamp:       2
+      current_version: 14942208
+      compatibility_version: 65536
+  - cmd:             LC_LOAD_DYLIB
+    cmdsize:         56
+    dylib:
+      name:            24
+      timestamp:       2
+      current_version: 85917696
+      compatibility_version: 65536
+  - cmd:             LC_LOAD_DYLIB
+    cmdsize:         64
+    dylib:
+      name:            24
+      timestamp:       2
+      current_version: 85196845
+      compatibility_version: 65536
+  - cmd:             LC_FUNCTION_STARTS
+    cmdsize:         16
+    dataoff:         51032
+    datasize:        104
+  - cmd:             LC_DATA_IN_CODE
+    cmdsize:         16
+    dataoff:         51136
+    datasize:        0
+LinkEditData:
+  NameList:
+    - n_strx:          2355
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976208
+    - n_strx:          2398
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976224
+    - n_strx:          2440
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976240
+    - n_strx:          2479
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976256
+    - n_strx:          2509
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294976272
+    - n_strx:          2570
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976320
+    - n_strx:          2590
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294976512
+    - n_strx:          2635
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294976576
+    - n_strx:          2683
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294976608
+    - n_strx:          2731
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976640
+    - n_strx:          2751
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976656
+    - n_strx:          2775
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976704
+    - n_strx:          2791
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294976720
+    - n_strx:          2814
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976752
+    - n_strx:          2838
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976768
+    - n_strx:          2873
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976784
+    - n_strx:          2906
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294976832
+    - n_strx:          2926
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294977104
+    - n_strx:          2946
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294977200
+    - n_strx:          2966
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977376
+    - n_strx:          3008
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977392
+    - n_strx:          3049
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977408
+    - n_strx:          3087
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977424
+    - n_strx:          3116
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294977440
+    - n_strx:          3176
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977488
+    - n_strx:          3201
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977504
+    - n_strx:          3232
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977552
+    - n_strx:          3270
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977648
+    - n_strx:          3318
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977664
+    - n_strx:          3364
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294978352
+    - n_strx:          3411
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294978464
+    - n_strx:          3447
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294978688
+    - n_strx:          3506
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294978832
+    - n_strx:          3567
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294978944
+    - n_strx:          3587
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979024
+    - n_strx:          3607
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979056
+    - n_strx:          3627
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979136
+    - n_strx:          3647
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294979232
+    - n_strx:          3666
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979264
+    - n_strx:          3686
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979328
+    - n_strx:          3706
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979536
+    - n_strx:          3726
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979856
+    - n_strx:          3746
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979872
+    - n_strx:          3766
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979888
+    - n_strx:          3786
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979920
+    - n_strx:          3814
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979936
+    - n_strx:          3842
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294980240
+    - n_strx:          3871
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294980288
+    - n_strx:          3898
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294980320
+    - n_strx:          3927
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294980368
+    - n_strx:          3951
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294980384
+    - n_strx:          3982
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294980448
+    - n_strx:          4001
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294980464
+    - n_strx:          4032
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294980512
+    - n_strx:          4060
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294980800
+    - n_strx:          4088
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294981120
+    - n_strx:          4116
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294981136
+    - n_strx:          4144
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294981152
+    - n_strx:          4172
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294981184
+    - n_strx:          4208
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294981248
+    - n_strx:          4225
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294981280
+    - n_strx:          4253
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294981328
+    - n_strx:          4276
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294981376
+    - n_strx:          4294
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          128
+      n_value:         4294981764
+    - n_strx:          4306
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294981824
+    - n_strx:          4322
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294981952
+    - n_strx:          4349
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294981960
+    - n_strx:          4387
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294981968
+    - n_strx:          4423
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294982160
+    - n_strx:          4474
+      n_type:          0xE
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294982352
+    - n_strx:          4503
+      n_type:          0xE
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294982448
+    - n_strx:          4530
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          128
+      n_value:         4294982464
+    - n_strx:          4558
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982466
+    - n_strx:          4571
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982470
+    - n_strx:          4608
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982476
+    - n_strx:          4639
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982498
+    - n_strx:          4666
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982506
+    - n_strx:          4691
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982510
+    - n_strx:          4727
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982516
+    - n_strx:          4758
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982522
+    - n_strx:          4790
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982528
+    - n_strx:          4820
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982534
+    - n_strx:          4859
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982540
+    - n_strx:          4902
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982554
+    - n_strx:          4945
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982564
+    - n_strx:          4986
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         0
+    - n_strx:          5987
+      n_type:          0x66
+      n_sect:          3
+      n_desc:          1
+      n_value:         1638431181
+    - n_strx:          7104
+      n_type:          0x66
+      n_sect:          3
+      n_desc:          1
+      n_value:         1638431191
+  StringTable:
+    - ' '
+    - '_$s4main10MyProtocolMp'
+    - '_$s4main10MyProtocolTL'
+    - '_$s4main11ConformanceV5innerSivM'
+    - '_$s4main11ConformanceV5innerSivg'
+    - '_$s4main11ConformanceV5innerSivpMV'
+    - '_$s4main11ConformanceV5innerSivpfi'
+    - '_$s4main11ConformanceV5innerSivs'
+    - '_$s4main11ConformanceVAA10MyProtocolAAMc'
+    - '_$s4main11ConformanceVAA10MyProtocolAAWP'
+    - '_$s4main11ConformanceVMa'
+    - '_$s4main11ConformanceVMn'
+    - '_$s4main11ConformanceVN'
+    - '_$s4main12Conformance2V5innerSivM'
+    - '_$s4main12Conformance2V5innerSivg'
+    - '_$s4main12Conformance2V5innerSivpMV'
+    - '_$s4main12Conformance2V5innerSivpfi'
+    - '_$s4main12Conformance2V5innerSivs'
+    - '_$s4main12Conformance2VAA10MyProtocolAAMc'
+    - '_$s4main12Conformance2VAA10MyProtocolAAWP'
+    - '_$s4main12Conformance2VMa'
+    - '_$s4main12Conformance2VMn'
+    - '_$s4main12Conformance2VN'
+    - '_$s4main13MyGenericEnumOMa'
+    - '_$s4main13MyGenericEnumOMn'
+    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfC'
+    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfCTq'
+    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfc'
+    - '_$s4main14MyGenericClassCMa'
+    - '_$s4main14MyGenericClassCMn'
+    - '_$s4main14MyGenericClassCfD'
+    - '_$s4main14MyGenericClassCfd'
+    - '_$s4main15MyGenericStructVMa'
+    - '_$s4main15MyGenericStructVMn'
+    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlF'
+    - '_$s4main6MyEnumOMa'
+    - '_$s4main6MyEnumOMn'
+    - '_$s4main6MyEnumON'
+    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfC'
+    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfCTq'
+    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfc'
+    - '_$s4main7MyClassCMa'
+    - '_$s4main7MyClassCMm'
+    - '_$s4main7MyClassCMn'
+    - '_$s4main7MyClassCN'
+    - '_$s4main7MyClassCfD'
+    - '_$s4main7MyClassCfd'
+    - '_$s4main8MyStructVMa'
+    - '_$s4main8MyStructVMn'
+    - '_$s4main8MyStructVN'
+    - '_$s5Inner4main10MyProtocolPTl'
+    - __mh_execute_header
+    - _main
+    - '_$sBi64_WV'
+    - '_$sBoWV'
+    - '_$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC'
+    - '_$sSSN'
+    - '_$sSaMa'
+    - '_$ss27_allocateUninitializedArrayySayxG_BptBwlF'
+    - '_$ss5print_9separator10terminatoryypd_S2StF'
+    - '_$sypN'
+    - '_$sytWV'
+    - '_OBJC_CLASS_$__TtCs12_SwiftObject'
+    - '_OBJC_METACLASS_$__TtCs12_SwiftObject'
+    - __objc_empty_cache
+    - _objc_opt_self
+    - _swift_allocObject
+    - _swift_allocateGenericClassMetadata
+    - _swift_allocateGenericValueMetadata
+    - _swift_bridgeObjectRelease
+    - _swift_checkMetadataState
+    - _swift_deallocClassInstance
+    - _swift_deallocObject
+    - _swift_getAssociatedTypeWitness
+    - _swift_getGenericMetadata
+    - _swift_initClassMetadata2
+    - _swift_release
+    - _swift_retain
+    - dyld_stub_binder
+    - '_$s4main12Conformance2V5innerSivM.resume.0'
+    - '_$s4main12Conformance2V5innerACSi_tcfcfA_'
+    - '_$s4main12Conformance2V5innerACSi_tcfC'
+    - '_$s4main12Conformance2VACycfC'
+    - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW'
+    - '_$s4main3AppVAAyyFZ'
+    - '_$ss27_finalizeUninitializedArrayySayxGABnlF'
+    - '_$ss5print_9separator10terminatoryypd_S2StFfA0_'
+    - '_$ss5print_9separator10terminatoryypd_S2StFfA1_'
+    - '_$s4main3AppVACycfC'
+    - '_$s4main3AppV5$mainyyFZ'
+    - '_$s4main3AppVMa'
+    - '_$sSa12_endMutationyyF'
+    - '_$s4main7MyClassC1iSivg'
+    - '_$s4main7MyClassC2msAA0B6StructVvg'
+    - '_$s4main7MyClassC2meAA0B4EnumOvg'
+    - '_$s4main6MyEnumOWOy'
+    - '_$s4main6MyEnumOWOe'
+    - '_$s4main6MyEnumOWOh'
+    - '_$s4main11ConformanceV5innerSivM.resume.0'
+    - '_$s4main11ConformanceV5innerACSi_tcfcfA_'
+    - '_$s4main11ConformanceV5innerACSi_tcfC'
+    - '_$s4main11ConformanceVACycfC'
+    - '_$s4main11ConformanceVAA10MyProtocolA2aDP5inner5InnerQzvgTW'
+    - '_$s4main8MyStructVACycfC'
+    - '_$s4main14MyGenericClassC1txvg'
+    - '_$s4main14MyGenericClassC1i5InnerQzvg'
+    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvg'
+    - '_$s4main14MyGenericClassC3mgeAA0bC4EnumOyxGvg'
+    - '_$s4main13MyGenericEnumOyxGAA0B8ProtocolRzlWOh'
+    - '_$s4main15MyGenericStructVACyxGycfC'
+    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_'
+    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_TA'
+    - '_$s4main6MyEnumOwCP'
+    - '_$s4main6MyEnumOwxx'
+    - '_$s4main6MyEnumOwcp'
+    - '_$s4main6MyEnumOwca'
+    - ___swift_memcpy9_8
+    - '_$s4main6MyEnumOwta'
+    - '_$s4main6MyEnumOwet'
+    - '_$s4main6MyEnumOwst'
+    - '_$s4main6MyEnumOwug'
+    - '_$s4main6MyEnumOwup'
+    - '_$s4main6MyEnumOwui'
+    - '_$s4main14MyGenericClassCMi'
+    - '_$s4main14MyGenericClassCMr'
+    - '_$s4main15MyGenericStructVMi'
+    - '_$s4main13MyGenericEnumOMi'
+    - ___swift_initWithCopy_strong
+    - ___swift_destroy_strong
+    - ___swift_assignWithCopy_strong
+    - ___swift_memcpy8_8
+    - ___swift_assignWithTake_strong
+    - '_$s4main13MyGenericEnumOwet'
+    - '_$s4main13MyGenericEnumOwst'
+    - '_$s4main13MyGenericEnumOwug'
+    - '_$s4main13MyGenericEnumOwup'
+    - '_$s4main13MyGenericEnumOwui'
+    - ___swift_instantiateGenericMetadata
+    - ___chkstk_darwin
+    - ___chkstk_darwin_llvm_probe
+    - ___chkstk_darwin_probe
+    - ____chkstk_darwin
+    - '_$s4mainMXM'
+    - '_$s4main3AppVMn'
+    - '_$s4main7MyClassC1iSivpWvd'
+    - '_$s4main7MyClassC2msAA0B6StructVvpWvd'
+    - '_$s4main7MyClassC2meAA0B4EnumOvpWvd'
+    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvpWvd'
+    - '_$s4main15MyGenericStructVMP'
+    - '_$s4main13MyGenericEnumOMP'
+    - ___swift_reflection_version
+    - _symbolic Si
+    - _symbolic _____ 4main12Conformance2V
+    - '_symbolic $s4main10MyProtocolP'
+    - _symbolic _____ 4main3AppV
+    - _symbolic x
+    - _symbolic B0
+    - _symbolic _____ 4main11ConformanceV
+    - _symbolic _____ 4main7MyClassC
+    - _symbolic _____ 4main8MyStructV
+    - _symbolic _____ 4main6MyEnumO
+    - _symbolic _____ 4main14MyGenericClassC
+    - _symbolic 5Inner_____Qz 4main10MyProtocolP
+    - _symbolic _____yxG 4main15MyGenericStructV
+    - _symbolic _____yxG 4main13MyGenericEnumO
+    - _symbolic _____ 4main15MyGenericStructV
+    - _symbolic _____ 4main13MyGenericEnumO
+    - _symbolic _____yxG 4main14MyGenericClassC
+    - '_$s4main12Conformance2VAA10MyProtocolAAMA'
+    - '_$s4main11ConformanceVAA10MyProtocolAAMA'
+    - '_$s4main12Conformance2VMF'
+    - '_$s4main3AppVMF'
+    - '_$s4main10MyProtocol_pMF'
+    - '_$s4main7MyClassCMF'
+    - '_$s4main11ConformanceVMF'
+    - '_$s4main8MyStructVMF'
+    - '_$s4main6MyEnumOMF'
+    - '_$s4main14MyGenericClassCMF'
+    - '_$s4main15MyGenericStructVMF'
+    - '_$s4main13MyGenericEnumOMF'
+    - '_$s4main6MyEnumOMB'
+    - '_$s4main12Conformance2VMf'
+    - '_$s4main3AppVMf'
+    - '_$s4main3AppVN'
+    - '_$s4main11ConformanceVMf'
+    - '_$s4main8MyStructVMf'
+    - '_$s4main6MyEnumOWV'
+    - '_$s4main6MyEnumOMf'
+    - ___unnamed_23
+    - '_$s4main14MyGenericClassCMP'
+    - '_$s4main13MyGenericEnumOWV'
+    - __METACLASS_DATA__TtC4main7MyClass
+    - __IVARS__TtC4main7MyClass
+    - __DATA__TtC4main7MyClass
+    - __IVARS__TtC4main14MyGenericClass
+    - __dyld_private
+    - '_$s4main7MyClassCMf'
+    - '_$s4main14MyGenericClassCMI'
+    - '_$s4main15MyGenericStructVMI'
+    - '_$s4main13MyGenericEnumOMI'
+    - '/tmp/main-1.swiftmodule'
+    - '/Users/shubham/Development/test76973336/final2objfiletest/'
+    - test.swift
+    - '/tmp/test-1.o'
+    - '_$s4main12Conformance2V5innerSivpfi'
+    - '_$s4main12Conformance2V5innerSivg'
+    - '_$s4main12Conformance2V5innerSivs'
+    - '_$s4main12Conformance2V5innerSivM'
+    - '_$s4main12Conformance2V5innerSivM.resume.0'
+    - '_$s4main12Conformance2V5innerACSi_tcfcfA_'
+    - '_$s4main12Conformance2V5innerACSi_tcfC'
+    - '_$s4main12Conformance2VACycfC'
+    - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW'
+    - '_$s4main3AppVAAyyFZ'
+    - '_$ss27_finalizeUninitializedArrayySayxGABnlF'
+    - '_$ss5print_9separator10terminatoryypd_S2StFfA0_'
+    - '_$ss5print_9separator10terminatoryypd_S2StFfA1_'
+    - '_$s4main3AppVACycfC'
+    - '_$s4main3AppV5$mainyyFZ'
+    - _main
+    - '_$s4main12Conformance2VMa'
+    - '_$s4main3AppVMa'
+    - '_$sSa12_endMutationyyF'
+    - '_$s4main12Conformance2VAA10MyProtocolAAMc'
+    - '_$s4main12Conformance2V5innerSivpMV'
+    - '_$s4mainMXM'
+    - '_$s4main12Conformance2VMn'
+    - '_$s4main3AppVMn'
+    - _symbolic Si
+    - _symbolic _____ 4main12Conformance2V
+    - '_symbolic $s4main10MyProtocolP'
+    - _symbolic _____ 4main3AppV
+    - '_$s4main12Conformance2VAA10MyProtocolAAMA'
+    - '_$s4main12Conformance2VMF'
+    - '_$s4main3AppVMF'
+    - '_$s4main12Conformance2VMf'
+    - '_$s4main12Conformance2VN'
+    - '_$s4main3AppVMf'
+    - '_$s4main3AppVN'
+    - '_$s4main12Conformance2VAA10MyProtocolAAWP'
+    - reflection_metadata.swift
+    - '/tmp/reflection_metadata-1.o'
diff --git a/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml b/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml
new file mode 100644
index 0000000000000..b2179a23bf28d
--- /dev/null
+++ b/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml
@@ -0,0 +1,436 @@
+# How to generate this file:
+# 1. First take a swift file and run xcrun swiftc -g -v file.swift 
+# secondfile.swift, make sure the two swift files are in a short path like /tmp/
+
+# 2. Now you can see what the driver does, generate the object files in the 
+# tmp directory
+
+# 3. Run obj2yaml on object file to create a yaml file
+
+# 4. I ran delta to reduce this file.
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x1000007
+  cpusubtype:      0x3
+  filetype:        0x1
+  ncmds:           8
+  sizeofcmds:      2800
+  flags:           0x2000
+  reserved:        0x0
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         2552
+    segname:         ''
+    vmaddr:          0
+    vmsize:          21352
+    fileoff:         2832
+    filesize:        20967
+    maxprot:         7
+    initprot:        7
+    nsects:          31
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0
+        size:            4571
+        offset:          0xB10
+        align:           4
+        reloff:          0x5CF8
+        nreloc:          74
+        flags:           0x80000400
+        reserved1:       0x0
+        reserved2:       0x0
+        relocations:
+          - address:         0x11A1
+            symbolnum:       142
+            pcrel:           true
+            length:          2
+            extern:          true
+            type:            1
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_typeref
+        segname:         __TEXT
+        addr:            0x11DC
+        size:            117
+        offset:          0x1CEC
+        align:           1
+        reloff:          0x5F48
+        nreloc:          22
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         7800423000005369000001FFFFFFFF002473346D61696E31304D7950726F746F636F6C50000001FFFFFFFF0001FFFFFFFF0001FFFFFFFF0001FFFFFFFF0035496E6E657201F9FFFFFF517A0001FFFFFFFF797847000001FFFFFFFF797847000001FFFFFFFF0001FFFFFFFF0001FFFFFFFF79784700
+        relocations:
+          - address:         0x6D
+            symbolnum:       163
+            pcrel:           false
+            length:          2
+            extern:          true
+            type:            0
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_capture
+        segname:         __TEXT
+        addr:            0x1254
+        size:            24
+        offset:          0x1D64
+        align:           2
+        reloff:          0x5FF8
+        nreloc:          6
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         010000000100000002000000F4FFFFFFF0FFFFFFECFFFFFF
+        relocations:
+          - address:         0x14
+            symbolnum:       29
+            pcrel:           false
+            length:          3
+            extern:          true
+            type:            0
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_reflstr
+        segname:         __TEXT
+        addr:            0x17D8
+        size:            37
+        offset:          0x22E8
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         496E6E65720069006D73006D6500696E6E6572004300490074006D6773006D676500474300
+      - sectname:        __swift5_assocty
+        segname:         __TEXT
+        addr:            0x1800
+        size:            24
+        offset:          0x2310
+        align:           2
+        reloff:          0x6530
+        nreloc:          8
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         00000000FCFFFFFF0100000008000000F0FFFFFFECFFFFFF
+        relocations:
+          - address:         0x14
+            symbolnum:       31
+            pcrel:           false
+            length:          2
+            extern:          true
+            type:            5
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_fieldmd
+        segname:         __TEXT
+        addr:            0x1818
+        size:            260
+        offset:          0x2328
+        align:           2
+        reloff:          0x6570
+        nreloc:          60
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         000000000000000004000C0000000000000000000000000001000C000300000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF00000000D4FFFFFFD0FFFFFF000000000000000000000C000100000002000000ECFFFFFFE8FFFFFF000000000000000000000C0000000000000000000000000003000C000200000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF000000000000000001000C000400000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF00000000D4FFFFFFD0FFFFFF00000000C8FFFFFFC4FFFFFF000000000000000000000C0000000000000000000000000002000C000100000000000000ECFFFFFFE8FFFFFF
+        relocations:
+          - address:         0x100
+            symbolnum:       71
+            pcrel:           false
+            length:          2
+            extern:          true
+            type:            0
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_builtin
+        segname:         __TEXT
+        addr:            0x1AC8
+        size:            20
+        offset:          0x25D8
+        align:           2
+        reloff:          0x67F8
+        nreloc:          2
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         00000000090000000800010010000000FE000000
+        relocations:
+          - address:         0x0
+            symbolnum:       52
+            pcrel:           false
+            length:          2
+            extern:          true
+            type:            5
+            scattered:       false
+            value:           0
+      - sectname:        __bss
+        segname:         __DATA
+        addr:            0x3372
+        size:            2084
+        offset:          0x50B0
+        align:           3
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x6800000B
+        reserved1:       0x0
+        reserved2:       0x0
+        relocations:
+          - address:         0x56
+            symbolnum:       1
+            pcrel:           false
+            length:          3
+            extern:          false
+            type:            0
+            scattered:       false
+            value:           0
+  - cmd:             LC_BUILD_VERSION
+    cmdsize:         24
+    platform:        1
+    minos:           786432
+    sdk:             786688
+    ntools:          0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          27888
+    nsyms:           185
+    stroff:          30848
+    strsize:         5056
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       79
+    iextdefsym:      79
+    nextdefsym:      87
+    iundefsym:       166
+    nundefsym:       19
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  0
+    nindirectsyms:   0
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         40
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x53, 
+                       0x0, 0x0, 0x0, 0x0 ]
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         24
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x43, 
+                       0x6F, 0x72, 0x65, 0x0 ]
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         32
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x5F, 
+                       0x6E, 0x63, 0x79, 0x0 ]
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         24
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x6F, 0x62, 0x6A, 0x63, 0x0, 0x0, 0x0, 
+                       0x0, 0x0, 0x0 ]
+LinkEditData:
+  NameList:
+    - n_strx:          5014
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         5600
+  StringTable:
+    - ''
+    - l_objectdestroy
+    - '_$s4main6MyEnumOWOy'
+    - '_$s4main6MyEnumOwxx'
+    - _symbolic x
+    - '_$s4main6MyEnumOwst'
+    - '_$s4main13MyGenericEnumOwst'
+    - '_$s4main6MyEnumOwet'
+    - '_$s4main13MyGenericEnumOwet'
+    - '_OBJC_CLASS_$__TtCs12_SwiftObject'
+    - '_OBJC_METACLASS_$__TtCs12_SwiftObject'
+    - _swift_deallocObject
+    - _swift_allocObject
+    - '_$s4main11ConformanceV5innerSivs'
+    - _swift_getAssociatedTypeWitness
+    - __IVARS__TtC4main7MyClass
+    - __DATA__TtC4main7MyClass
+    - __METACLASS_DATA__TtC4main7MyClass
+    - __IVARS__TtC4main14MyGenericClass
+    - l_protocols
+    - _objc_classes
+    - l_protocol_conformances
+    - l__swift5_reflection_descriptor
+    - l_coro.devirt.trigger
+    - '_$s4main14MyGenericClassCMr'
+    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfCTq'
+    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfCTq'
+    - '_$s4main6MyEnumOwup'
+    - '_$s4main13MyGenericEnumOwup'
+    - '_$s4main6MyEnumOwcp'
+    - '_$s4main10MyProtocolMp'
+    - ___swift_reflection_version
+    - ____chkstk_darwin
+    - _swift_retain
+    - '_$s4main8MyStructVMn'
+    - '_$s4main15MyGenericStructVMn'
+    - '_$s4main11ConformanceVMn'
+    - '_$s4main6MyEnumOMn'
+    - '_$s4main13MyGenericEnumOMn'
+    - '_$s4main7MyClassCMn'
+    - '_$s4main14MyGenericClassCMn'
+    - '_$s4main7MyClassCMm'
+    - '_$s5Inner4main10MyProtocolPTl'
+    - '_$s4main6MyEnumOwui'
+    - '_$s4main13MyGenericEnumOwui'
+    - '_$s4main11ConformanceV5innerSivpfi'
+    - _symbolic Si
+    - '_$s4main15MyGenericStructVMi'
+    - '_$s4main13MyGenericEnumOMi'
+    - '_$s4main14MyGenericClassCMi'
+    - l_llvm.swift_module_hash
+    - '_$s4main13MyGenericEnumOyxGAA0B8ProtocolRzlWOh'
+    - '_$s4main6MyEnumOWOh'
+    - '_$s4main14MyGenericClassC1i5InnerQzvg'
+    - '_$s4main14MyGenericClassC1txvg'
+    - '_$s4main11ConformanceV5innerSivg'
+    - '_$s4main7MyClassC1iSivg'
+    - '_$s4main7MyClassC2msAA0B6StructVvg'
+    - '_$s4main7MyClassC2meAA0B4EnumOvg'
+    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvg'
+    - '_$s4main14MyGenericClassC3mgeAA0bC4EnumOyxGvg'
+    - '_$s4main6MyEnumOwug'
+    - '_$s4main13MyGenericEnumOwug'
+    - ___swift_initWithCopy_strong
+    - ___swift_assignWithCopy_strong
+    - ___swift_destroy_strong
+    - ___swift_assignWithTake_strong
+    - _objc_opt_self
+    - '_$s4main8MyStructVMf'
+    - '_$s4main11ConformanceVMf'
+    - '_$s4main6MyEnumOMf'
+    - '_$s4main7MyClassCMf'
+    - _swift_checkMetadataState
+    - _swift_release
+    - l_type_metadata_table
+    - __objc_empty_cache
+    - _swift_deallocClassInstance
+    - ___chkstk_darwin_llvm_probe
+    - '_$s4main6MyEnumOWOe'
+    - '_$s4main7MyClassC1iSivpWvd'
+    - '_$s4main7MyClassC2msAA0B6StructVvpWvd'
+    - '_$s4main7MyClassC2meAA0B4EnumOvpWvd'
+    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvpWvd'
+    - '_$s4main7MyClassCfd'
+    - '_$s4main14MyGenericClassCfd'
+    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfc'
+    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfc'
+    - '_$s4main11ConformanceVAA10MyProtocolAAMc'
+    - '_$s4main6MyEnumOwta'
+    - l_metadata
+    - _swift_allocateGenericClassMetadata
+    - _swift_allocateGenericValueMetadata
+    - _swift_getGenericMetadata
+    - ___swift_instantiateGenericMetadata
+    - '_$s4main6MyEnumOwca'
+    - '_$s4main8MyStructVMa'
+    - '_$s4main15MyGenericStructVMa'
+    - '_$s4main11ConformanceVMa'
+    - '_$s4main6MyEnumOMa'
+    - '_$s4main13MyGenericEnumOMa'
+    - '_$s4main7MyClassCMa'
+    - '_$s4main14MyGenericClassCMa'
+    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_'
+    - '_$s4main11ConformanceV5innerACSi_tcfcfA_'
+    - '_$s4main11ConformanceVAA10MyProtocolA2aDP5inner5InnerQzvgTW'
+    - _symbolic _____ 4main8MyStructV
+    - _symbolic _____ 4main15MyGenericStructV
+    - _symbolic _____yxG 4main15MyGenericStructV
+    - _symbolic _____ 4main11ConformanceV
+    - '_$sytWV'
+    - '_$sBoWV'
+    - '_$sBi64_WV'
+    - '_$s4main6MyEnumOWV'
+    - '_$s4main13MyGenericEnumOWV'
+    - '_$s4main11ConformanceV5innerSivpMV'
+    - '_symbolic $s4main10MyProtocolP'
+    - _symbolic 5Inner_____Qz 4main10MyProtocolP
+    - '_$s4main11ConformanceVAA10MyProtocolAAWP'
+    - '_$s4main15MyGenericStructVMP'
+    - '_$s4main13MyGenericEnumOMP'
+    - '_$s4main14MyGenericClassCMP'
+    - '_$s4main6MyEnumOwCP'
+    - _symbolic _____ 4main6MyEnumO
+    - _symbolic _____ 4main13MyGenericEnumO
+    - _symbolic _____yxG 4main13MyGenericEnumO
+    - '_$s4main8MyStructVN'
+    - '_$s4main11ConformanceVN'
+    - '_$s4main6MyEnumON'
+    - '_$s4main7MyClassCN'
+    - '_$s4main11ConformanceV5innerSivM'
+    - '_$s4mainMXM'
+    - '_$s4main10MyProtocolTL'
+    - '_$s4main15MyGenericStructVMI'
+    - '_$s4main13MyGenericEnumOMI'
+    - '_$s4main14MyGenericClassCMI'
+    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlF'
+    - '_$s4main10MyProtocol_pMF'
+    - '_$s4main8MyStructVMF'
+    - '_$s4main15MyGenericStructVMF'
+    - '_$s4main11ConformanceVMF'
+    - '_$s4main6MyEnumOMF'
+    - '_$s4main13MyGenericEnumOMF'
+    - '_$s4main7MyClassCMF'
+    - '_$s4main14MyGenericClassCMF'
+    - '_$s4main7MyClassCfD'
+    - '_$s4main14MyGenericClassCfD'
+    - _symbolic _____ 4main7MyClassC
+    - _symbolic _____ 4main14MyGenericClassC
+    - _symbolic _____yxG 4main14MyGenericClassC
+    - '_$s4main15MyGenericStructVACyxGycfC'
+    - '_$s4main8MyStructVACycfC'
+    - '_$s4main11ConformanceVACycfC'
+    - '_$s4main11ConformanceV5innerACSi_tcfC'
+    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfC'
+    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfC'
+    - '_$s4main6MyEnumOMB'
+    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_TA'
+    - '_$s4main11ConformanceVAA10MyProtocolAAMA'
+    - l___unnamed_29
+    - l___unnamed_19
+    - ___swift_memcpy9_8
+    - ___swift_memcpy8_8
+    - l___unnamed_28
+    - l___unnamed_18
+    - l___unnamed_27
+    - l___unnamed_17
+    - l___unnamed_26
+    - l___unnamed_16
+    - l___unnamed_25
+    - l___unnamed_15
+    - l___unnamed_4
+    - l___unnamed_24
+    - l___unnamed_14
+    - l___unnamed_3
+    - ___unnamed_23
+    - l___unnamed_13
+    - _swift_initClassMetadata2
+    - l___unnamed_2
+    - l___unnamed_12
+    - l___unnamed_1
+    - l___unnamed_11
+    - _symbolic B0
+    - l___unnamed_30
+    - l___unnamed_10
+    - '_$s4main11ConformanceV5innerSivM.resume.0'
diff --git a/llvm/test/tools/dsymutil/Inputs/test.yaml b/llvm/test/tools/dsymutil/Inputs/test.yaml
new file mode 100644
index 0000000000000..da3aa9a8aaf34
--- /dev/null
+++ b/llvm/test/tools/dsymutil/Inputs/test.yaml
@@ -0,0 +1,254 @@
+# How to generate this file:
+# 1. First take a swift file and run xcrun swiftc -g -v file.swift 
+# secondfile.swift, make sure the two swift files are in a short path like /tmp/
+
+# 2. Now you can see what the driver does, generate the object files in the 
+# tmp directory
+
+# 3. Run obj2yaml on object file to create a yaml file
+
+# 4. I ran delta to reduce this file.
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x1000007
+  cpusubtype:      0x3
+  filetype:        0x1
+  ncmds:           8
+  sizeofcmds:      2240
+  flags:           0x2000
+  reserved:        0x0
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         1992
+    segname:         ''
+    vmaddr:          0
+    vmsize:          6592
+    fileoff:         2272
+    filesize:        6592
+    maxprot:         7
+    initprot:        7
+    nsects:          24
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0
+        size:            593
+        offset:          0x8E0
+        align:           4
+        reloff:          0x22A0
+        nreloc:          24
+        flags:           0x80000400
+        reserved1:       0x0
+        reserved2:       0x0
+        relocations:
+          - address:         0x233
+            symbolnum:       2
+            pcrel:           true
+            length:          2
+            extern:          true
+            type:            4
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_typeref
+        segname:         __TEXT
+        addr:            0x2D6
+        size:            38
+        offset:          0xBB6
+        align:           1
+        reloff:          0x2418
+        nreloc:          4
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         5369000001FFFFFFFF002473346D61696E31304D7950726F746F636F6C50000001FFFFFFFF00
+        relocations:
+          - address:         0x21
+            symbolnum:       46
+            pcrel:           false
+            length:          3
+            extern:          true
+            type:            0
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_reflstr
+        segname:         __TEXT
+        addr:            0x318
+        size:            12
+        offset:          0xBF8
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         496E6E657200696E6E657200
+      - sectname:        __swift5_assocty
+        segname:         __TEXT
+        addr:            0x324
+        size:            24
+        offset:          0xC04
+        align:           2
+        reloff:          0x2450
+        nreloc:          8
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         00000000FCFFFFFF0100000008000000F0FFFFFFECFFFFFF
+        relocations:
+          - address:         0x14
+            symbolnum:       5
+            pcrel:           false
+            length:          2
+            extern:          true
+            type:            0
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_fieldmd
+        segname:         __TEXT
+        addr:            0x378
+        size:            44
+        offset:          0xC58
+        align:           2
+        reloff:          0x24C0
+        nreloc:          8
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         000000000000000000000C000100000002000000ECFFFFFFE8FFFFFF000000000000000000000C0000000000
+        relocations:
+          - address:         0x1C
+            symbolnum:       12
+            pcrel:           false
+            length:          3
+            extern:          false
+            type:            0
+            scattered:       false
+            value:           0
+  - cmd:             LC_BUILD_VERSION
+    cmdsize:         24
+    platform:        1
+    minos:           786432
+    sdk:             786688
+    ntools:          0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          9824
+    nsyms:           57
+    stroff:          10736
+    strsize:         1544
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       16
+    iextdefsym:      16
+    nextdefsym:      31
+    iundefsym:       47
+    nundefsym:       10
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  0
+    nindirectsyms:   0
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         40
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x53, 
+                       0x0, 0x0, 0x0, 0x0 ]
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         24
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x43, 
+                       0x6F, 0x72, 0x65, 0x0 ]
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         32
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x5F, 
+                       0x6E, 0x63, 0x79, 0x0 ]
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         24
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x6F, 0x62, 0x6A, 0x63, 0x0, 0x0, 0x0, 
+                       0x0, 0x0, 0x0 ]
+LinkEditData:
+  NameList:
+    - n_strx:          1494
+      n_type:          0xE
+      n_sect:          9
+      n_desc:          0
+      n_value:         0
+  StringTable:
+    - ''
+    - l_entry_point
+    - '_$s4main12Conformance2V5innerSivs'
+    - l_protocol_conformances
+    - l_coro.devirt.trigger
+    - '_$s4main10MyProtocolMp'
+    - ___swift_reflection_version
+    - _main
+    - '_$s4main3AppVMn'
+    - '_$s4main12Conformance2VMn'
+    - '_$s4main12Conformance2V5innerSivpfi'
+    - _symbolic Si
+    - l_llvm.swift_module_hash
+    - '_$s4main12Conformance2V5innerSivg'
+    - '_$s4main3AppVMf'
+    - '_$s4main12Conformance2VMf'
+    - _swift_bridgeObjectRelease
+    - l_type_metadata_table
+    - '_$s4main12Conformance2VAA10MyProtocolAAMc'
+    - '_$sSaMa'
+    - '_$s4main3AppVMa'
+    - '_$s4main12Conformance2VMa'
+    - '_$s4main12Conformance2V5innerACSi_tcfcfA_'
+    - '_$ss5print_9separator10terminatoryypd_S2StFfA1_'
+    - '_$ss5print_9separator10terminatoryypd_S2StFfA0_'
+    - '_$s4main3AppV5$mainyyFZ'
+    - '_$s4main3AppVAAyyFZ'
+    - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW'
+    - _symbolic _____ 4main3AppV
+    - '_$sytWV'
+    - '_$sBi64_WV'
+    - '_$s4main12Conformance2V5innerSivpMV'
+    - _symbolic _____ 4main12Conformance2V
+    - '_symbolic $s4main10MyProtocolP'
+    - '_$s4main12Conformance2VAA10MyProtocolAAWP'
+    - '_$sypN'
+    - '_$s4main3AppVN'
+    - '_$s4main12Conformance2VN'
+    - '_$sSSN'
+    - '_$s4main12Conformance2V5innerSivM'
+    - '_$s4mainMXM'
+    - '_$sSa12_endMutationyyF'
+    - '_$ss5print_9separator10terminatoryypd_S2StF'
+    - '_$ss27_allocateUninitializedArrayySayxG_BptBwlF'
+    - '_$ss27_finalizeUninitializedArrayySayxGABnlF'
+    - '_$s4main3AppVMF'
+    - '_$s4main12Conformance2VMF'
+    - '_$s4main3AppVACycfC'
+    - '_$s4main12Conformance2VACycfC'
+    - '_$s4main12Conformance2V5innerACSi_tcfC'
+    - '_$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC'
+    - '_$s4main12Conformance2VAA10MyProtocolAAMA'
+    - l___unnamed_8
+    - l___unnamed_7
+    - l___unnamed_6
+    - l___unnamed_5
+    - l___unnamed_4
+    - '_$s4main12Conformance2V5innerSivM.resume.0'
+    - ''
+    - ''
+    - ''
+    - ''
+    - ''
+    - ''
+    - ''
diff --git a/llvm/test/tools/dsymutil/reflection-dump.test b/llvm/test/tools/dsymutil/reflection-dump.test
new file mode 100644
index 0000000000000..3fea11ee57adc
--- /dev/null
+++ b/llvm/test/tools/dsymutil/reflection-dump.test
@@ -0,0 +1,42 @@
+RUN: rm -rf %t.dir && mkdir -p %t.dir/tmp
+RUN: cp %p/Inputs/main.yaml %t.dir
+RUN: cp %p/Inputs/test.yaml %t.dir
+RUN: cp %p/Inputs/reflection_metadata.yaml %t.dir
+RUN: yaml2obj %p/Inputs/main.yaml -o %t.dir/main
+RUN: yaml2obj %p/Inputs/test.yaml -o %t.dir/tmp/test-1.o
+RUN: yaml2obj %p/Inputs/reflection_metadata.yaml -o %t.dir/tmp/reflection_metadata-1.o
+
+RUN: dsymutil -oso-prepend-path=%t.dir %t.dir/main -o %t.dir/main.dSYM
+RUN: llvm-objdump -s %t.dir/main.dSYM/Contents/Resources/DWARF/main | FileCheck %s
+
+CHECK: Contents of section __DWARF,__swift5_typeref:
+CHECK-NEXT:  10000e000 53690000 01ffffff ff002473 346d6169  Si........$s4mai
+CHECK-NEXT:  10000e010 6e31304d 7950726f 746f636f 6c500000  n10MyProtocolP..
+CHECK-NEXT:  10000e020 01ffffff ff007800 42300000 53690000  ......x.B0..Si..
+CHECK-NEXT:  10000e030 01ffffff ff002473 346d6169 6e31304d  ......$s4main10M
+CHECK-NEXT:  10000e040 7950726f 746f636f 6c500000 01ffffff  yProtocolP......
+
+CHECK: Contents of section __DWARF,__swift5_reflstr:
+CHECK-NEXT:  10000e09b 496e6e65 7200696e 6e657200 496e6e65  Inner.inner.Inne
+CHECK-NEXT:  10000e0ab 72006900 6d73006d 6500696e 6e657200  r.i.ms.me.inner.
+CHECK-NEXT:  10000e0bb 43004900 74006d67 73006d67 65004743  C.I.t.mgs.mge.GC
+CHECK-NEXT:  10000e0cb 00
+
+CHECK: Contents of section __DWARF,__swift5_assocty:
+CHECK-NEXT:  10000e0cc 00000000 fcffffff 01000000 08000000  ................
+CHECK-NEXT:  10000e0dc f0ffffff ecffffff 00000000 fcffffff  ................
+CHECK-NEXT:  10000e0ec 01000000 08000000 f0ffffff ecffffff  ................
+
+CHECK: Contents of section __DWARF,__swift5_fieldmd:
+CHECK-NEXT:  10000e0fc 00000000 00000000 00000c00 01000000  ................
+CHECK-NEXT:  10000e10c 02000000 ecffffff e8ffffff 00000000  ................
+CHECK-NEXT:  10000e11c 00000000 00000c00 00000000 00000000  ................
+CHECK-NEXT:  10000e12c 00000000 04000c00 00000000 00000000  ................
+
+CHECK: Contents of section __DWARF,__swift5_capture:
+CHECK-NEXT:  10000e22c 01000000 01000000 02000000 f4ffffff  ................
+CHECK-NEXT:	 10000e23c f0ffffff ecffffff                    ........
+
+CHECK: Contents of section __DWARF,__swift5_builtin:
+CHECK-NEXT:  10000e244 00000000 09000000 08000100 10000000  ................
+CHECK-NEXT:  10000e254 fe000000                             ....
diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
index a8dfde0865377..b9682b83ac67a 100644
--- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
+++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
@@ -30,6 +30,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/CodeGen/AccelTable.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DIE.h"
@@ -174,7 +175,7 @@ bool DwarfLinkerForBinary::createStreamer(const Triple &TheTriple,
       [&](const Twine &Warning, StringRef Context, const DWARFDie *) {
         warn(Warning, Context);
       });
-  return Streamer->init(TheTriple);
+  return Streamer->init(TheTriple, "__DWARF");
 }
 
 ErrorOr<const object::ObjectFile &>
@@ -295,6 +296,77 @@ DwarfLinkerForBinary::loadObject(const DebugMapObject &Obj,
   return ErrorOrObj.getError();
 }
 
+static bool binaryHasSwiftReflectionSections(const DebugMap &Map,
+                                             const LinkOptions &Options,
+                                             BinaryHolder &BinHolder) {
+  // If the input binary has swift5 reflection sections, there is no need to
+  // copy them to the .dSYM. Only copy them for binaries where the linker
+  // omitted the reflection metadata.
+  if (!Map.getBinaryPath().empty() &&
+      Options.FileType == OutputFileType::Object) {
+
+    auto ObjectEntry = BinHolder.getObjectEntry(Map.getBinaryPath());
+    // If ObjectEntry or Object has an error, no binary exists, therefore no
+    // reflection sections exist.
+    if (!ObjectEntry) {
+      // Any errors will be diagnosed later in the main loop, ignore them here.
+      llvm::consumeError(ObjectEntry.takeError());
+      return false;
+    }
+
+    auto Object =
+        ObjectEntry->getObjectAs<object::MachOObjectFile>(Map.getTriple());
+    if (!Object) {
+      // Any errors will be diagnosed later in the main loop, ignore them here.
+      llvm::consumeError(Object.takeError());
+      return false;
+    }
+
+    for (auto &Section : Object->sections()) {
+      llvm::Expected<llvm::StringRef> NameOrErr =
+          Object->getSectionName(Section.getRawDataRefImpl());
+      if (!NameOrErr) {
+        llvm::consumeError(NameOrErr.takeError());
+        continue;
+      }
+      NameOrErr->consume_back("__TEXT");
+      if (Object->mapReflectionSectionNameToEnumValue(*NameOrErr) !=
+          llvm::swift::Swift5ReflectionSectionKind::Unknown) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+static void
+copySwiftReflectionMetadata(const llvm::dsymutil::DebugMapObject *Obj,
+                            DwarfStreamer *Streamer) {
+  auto OF =
+      llvm::object::ObjectFile::createObjectFile(Obj->getObjectFilename());
+  if (!OF) {
+    llvm::consumeError(OF.takeError());
+    return;
+  } else if (auto *MO =
+                 dyn_cast<llvm::object::MachOObjectFile>(OF->getBinary())) {
+    for (auto &Section : OF->getBinary()->sections()) {
+      llvm::Expected<llvm::StringRef> NameOrErr =
+          MO->getSectionName(Section.getRawDataRefImpl());
+      if (!NameOrErr) {
+        llvm::consumeError(NameOrErr.takeError());
+        continue;
+      }
+      llvm::Expected<llvm::StringRef> SectionContents = Section.getContents();
+      if (SectionContents) {
+        NameOrErr->consume_back("__TEXT");
+        Streamer->emitSwiftReflectionSection(
+            MO->mapReflectionSectionNameToEnumValue(*NameOrErr),
+            *SectionContents, Section.getAlignment(), Section.getSize());
+      }
+    }
+  }
+}
+
 bool DwarfLinkerForBinary::link(const DebugMap &Map) {
   if (!createStreamer(Map.getTriple(), OutFile))
     return false;
@@ -389,8 +461,19 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) {
         llvm_unreachable("Unhandled DebugMap object");
       });
   GeneralLinker.setSwiftInterfacesMap(&ParseableSwiftInterfaces);
+  bool ReflectionSectionsPresentInBinary = false;
+  // If there is no output specified, no point in checking the binary for swift5
+  // reflection sections.
+  if (!Options.NoOutput) {
+    ReflectionSectionsPresentInBinary =
+        binaryHasSwiftReflectionSections(Map, Options, BinHolder);
+  }
 
   for (const auto &Obj : Map.objects()) {
+
+    if (!ReflectionSectionsPresentInBinary)
+      copySwiftReflectionMetadata(Obj.get(), Streamer.get());
+
     // N_AST objects (swiftmodule files) should get dumped directly into the
     // appropriate DWARF section.
     if (Obj->getType() == MachO::N_AST) {
@@ -431,7 +514,6 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) {
 
       continue;
     }
-
     if (auto ErrorOrObj = loadObject(*Obj, Map, RL))
       GeneralLinker.addObjectFile(*ErrorOrObj);
     else {

From 9f4cc5a6bb56b42bb90ea31f10ecf8fed8f07653 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Fri, 21 Jan 2022 11:55:17 -0800
Subject: [PATCH 220/946] [gn build] Set HAVE_MALLINFO2=1

I'm seeing deprecated warnings due to using mallinfo() instead of
mallinfo2().

../../llvm/lib/Support/Unix/Process.inc:98:10: warning: 'mallinfo' is deprecated [-Wdeprecated-declarations]
  mi = ::mallinfo();

mallinfo2() is part of glibc 2.33 which was released in Feb 2021, which
is fairly recent but I think gn users should be using fairly up to date
glibcs.

If this breaks people we could make this a gn arg instead.

Differential Revision: https://reviews.llvm.org/D117916
---
 llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
index 3efbdde7d85b4..3c882a80fee19 100644
--- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
@@ -89,7 +89,7 @@ write_cmake_config("config") {
     "HAVE_LIBPFM=",
     "HAVE_LIBPSAPI=",
     "HAVE_MALLCTL=",
-    "HAVE_MALLINFO2=",
+    "HAVE_MALLINFO2=1",
     "HAVE_SIGNAL_H=1",
     "HAVE_STD_IS_TRIVIALLY_COPYABLE=1",
     "HAVE_STRERROR=1",

From 6103b2d45bfb43bb403d289d80c137d5155b0778 Mon Sep 17 00:00:00 2001
From: Shubham Sandeep Rastogi <srastogi22@apple.com>
Date: Fri, 21 Jan 2022 13:43:51 -0800
Subject: [PATCH 221/946] Revert "Emit swift5 reflection section data in dsym
 bundle generated by dsymutil in the Dwarf section."

This reverts commit d84d1135d80c1dead6564347943ba56eed5aac3b. to investigate buildbot failures
---
 llvm/include/llvm/BinaryFormat/Swift.def      |  26 -
 llvm/include/llvm/BinaryFormat/Swift.h        |  24 -
 llvm/include/llvm/DWARFLinker/DWARFStreamer.h |   8 +-
 llvm/include/llvm/MC/MCContext.h              |  10 +-
 llvm/include/llvm/MC/MCObjectFileInfo.h       |  13 -
 llvm/include/llvm/Object/MachO.h              |   4 -
 llvm/include/llvm/Object/ObjectFile.h         |   6 -
 llvm/lib/DWARFLinker/DWARFStreamer.cpp        |  20 +-
 llvm/lib/MC/MCContext.cpp                     |   8 +-
 llvm/lib/MC/MCObjectFileInfo.cpp              |  11 -
 llvm/lib/Object/MachOObjectFile.cpp           |  12 -
 llvm/test/tools/dsymutil/Inputs/main.yaml     | 886 ------------------
 .../dsymutil/Inputs/reflection_metadata.yaml  | 436 ---------
 llvm/test/tools/dsymutil/Inputs/test.yaml     | 254 -----
 llvm/test/tools/dsymutil/reflection-dump.test |  42 -
 llvm/tools/dsymutil/DwarfLinkerForBinary.cpp  |  86 +-
 16 files changed, 11 insertions(+), 1835 deletions(-)
 delete mode 100644 llvm/include/llvm/BinaryFormat/Swift.def
 delete mode 100644 llvm/include/llvm/BinaryFormat/Swift.h
 delete mode 100644 llvm/test/tools/dsymutil/Inputs/main.yaml
 delete mode 100644 llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml
 delete mode 100644 llvm/test/tools/dsymutil/Inputs/test.yaml
 delete mode 100644 llvm/test/tools/dsymutil/reflection-dump.test

diff --git a/llvm/include/llvm/BinaryFormat/Swift.def b/llvm/include/llvm/BinaryFormat/Swift.def
deleted file mode 100644
index 39931bec70e57..0000000000000
--- a/llvm/include/llvm/BinaryFormat/Swift.def
+++ /dev/null
@@ -1,26 +0,0 @@
-//===- llvm/BinaryFormat/Swift.def - Swift definitions ---------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Macros for running through Swift enumerators.
-//
-//===----------------------------------------------------------------------===//
-
-#if !(defined HANDLE_SWIFT_SECTION)
-#error "Missing macro definition of HANDLE_SWIFT_SECTION"
-#endif
-
-#ifndef HANDLE_SWIFT_SECTION
-#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)
-#endif
-
-HANDLE_SWIFT_SECTION(Fieldmd, "__swift5_fieldmd", "swift5_fieldmd", ".sw5flmd")
-HANDLE_SWIFT_SECTION(Assocty, "__swift5_assocty", "swift5_assocty", ".sw5asty")
-HANDLE_SWIFT_SECTION(Builtin, "__swift5_builtin", "swift5_builtin", ".sw5bltn")
-HANDLE_SWIFT_SECTION(Capture, "__swift5_capture", "swift5_capture", ".sw5cptr")
-HANDLE_SWIFT_SECTION(Typeref, "__swift5_typeref", "swift5_typeref", ".sw5tyrf")
-HANDLE_SWIFT_SECTION(Reflstr, "__swift5_reflstr", "swift5_reflstr", ".sw5rfst")
diff --git a/llvm/include/llvm/BinaryFormat/Swift.h b/llvm/include/llvm/BinaryFormat/Swift.h
deleted file mode 100644
index 63bbfe08c86d4..0000000000000
--- a/llvm/include/llvm/BinaryFormat/Swift.h
+++ /dev/null
@@ -1,24 +0,0 @@
-//===-- llvm/BinaryFormat/Swift.h ---Swift Constants-------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-
-#ifndef LLVM_BINARYFORMAT_SWIFT_H
-#define LLVM_BINARYFORMAT_SWIFT_H
-
-namespace llvm {
-namespace swift {
-
-enum Swift5ReflectionSectionKind {
-#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) KIND,
-#include "llvm/BinaryFormat/Swift.def"
-#undef HANDLE_SWIFT_SECTION
-  Unknown,
-  Last = Unknown
-};
-} // end of namespace swift
-} // end of namespace llvm
-
-#endif
diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
index 8e845ee91b9f7..9a5c6bcaf83f3 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_DWARFLINKER_DWARFSTREAMER_H
 #define LLVM_DWARFLINKER_DWARFSTREAMER_H
 
-#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/CodeGen/AccelTable.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/DWARFLinker/DWARFLinker.h"
@@ -49,7 +48,7 @@ class DwarfStreamer : public DwarfEmitter {
       : OutFile(OutFile), OutFileType(OutFileType), Translator(Translator),
         ErrorHandler(Error), WarningHandler(Warning) {}
 
-  bool init(Triple TheTriple, StringRef Swift5ReflectionSegmentName);
+  bool init(Triple TheTriple);
 
   /// Dump the file to the disk.
   void finish();
@@ -86,11 +85,6 @@ class DwarfStreamer : public DwarfEmitter {
   /// Emit the swift_ast section stored in \p Buffer.
   void emitSwiftAST(StringRef Buffer);
 
-  /// Emit the swift reflection section stored in \p Buffer.
-  void emitSwiftReflectionSection(
-      llvm::swift::Swift5ReflectionSectionKind ReflSectionKind,
-      StringRef Buffer, uint32_t Alignment, uint32_t Size);
-
   /// Emit debug_ranges for \p FuncRange by translating the
   /// original \p Entries.
   void emitRangesEntries(
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index d2307d6922780..88d86d5b675ac 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -80,10 +80,6 @@ namespace llvm {
   private:
     Environment Env;
 
-    /// The name of the Segment where Swift5 Reflection Section data will be
-    /// outputted
-    StringRef Swift5ReflectionSegmentName;
-
     /// The triple for this object.
     Triple TT;
 
@@ -403,17 +399,13 @@ namespace llvm {
                        const MCRegisterInfo *MRI, const MCSubtargetInfo *MSTI,
                        const SourceMgr *Mgr = nullptr,
                        MCTargetOptions const *TargetOpts = nullptr,
-                       bool DoAutoReset = true,
-                       StringRef Swift5ReflSegmentName = {});
+                       bool DoAutoReset = true);
     MCContext(const MCContext &) = delete;
     MCContext &operator=(const MCContext &) = delete;
     ~MCContext();
 
     Environment getObjectFileType() const { return Env; }
 
-    const StringRef &getSwift5ReflectionSegmentName() const {
-      return Swift5ReflectionSegmentName;
-    }
     const Triple &getTargetTriple() const { return TT; }
     const SourceMgr *getSourceManager() const { return SrcMgr; }
 
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index 1b4804db783b4..5e0cccaba77fa 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -15,7 +15,6 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/VersionTuple.h"
@@ -229,10 +228,6 @@ class MCObjectFileInfo {
   MCSection *ReadOnly8Section = nullptr;
   MCSection *ReadOnly16Section = nullptr;
 
-  // Swift5 Reflection Data Sections
-  std::array<MCSection *, swift::Swift5ReflectionSectionKind::Last>
-      Swift5ReflectionSections = {};
-
 public:
   void initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
                             bool LargeCodeModel = false);
@@ -428,14 +423,6 @@ class MCObjectFileInfo {
 
   bool isPositionIndependent() const { return PositionIndependent; }
 
-  // Swift5 Reflection Data Sections
-  MCSection *getSwift5ReflectionSection(
-      llvm::swift::Swift5ReflectionSectionKind ReflSectionKind) {
-    return ReflSectionKind != llvm::swift::Swift5ReflectionSectionKind::Unknown
-               ? Swift5ReflectionSections[ReflSectionKind]
-               : nullptr;
-  }
-
 private:
   bool PositionIndependent = false;
   MCContext *Ctx = nullptr;
diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index 09b6454bb0c14..ede742c47f971 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -22,7 +22,6 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/MachO.h"
-#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
@@ -584,9 +583,6 @@ class MachOObjectFile : public ObjectFile {
 
   StringRef mapDebugSectionName(StringRef Name) const override;
 
-  llvm::swift::Swift5ReflectionSectionKind
-  mapReflectionSectionNameToEnumValue(StringRef SectionName) const override;
-
   bool hasPageZeroSegment() const { return HasPageZeroSegment; }
 
   static bool classof(const Binary *v) {
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index 29919db772f0e..12704b1fc88e7 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -18,7 +18,6 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/Magic.h"
-#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/SymbolicFile.h"
@@ -291,11 +290,6 @@ class ObjectFile : public SymbolicFile {
   virtual void getRelocationTypeName(DataRefImpl Rel,
                                      SmallVectorImpl<char> &Result) const = 0;
 
-  virtual llvm::swift::Swift5ReflectionSectionKind
-  mapReflectionSectionNameToEnumValue(StringRef SectionName) const {
-    return llvm::swift::Swift5ReflectionSectionKind::Unknown;
-  };
-
   Expected<uint64_t> getSymbolValue(DataRefImpl Symb) const;
 
 public:
diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index 7f9b9a9bc793c..1ab6ead3b5f66 100644
--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -27,8 +27,7 @@
 
 namespace llvm {
 
-bool DwarfStreamer::init(Triple TheTriple,
-                         StringRef Swift5ReflectionSegmentName) {
+bool DwarfStreamer::init(Triple TheTriple) {
   std::string ErrorStr;
   std::string TripleName;
   StringRef Context = "dwarf streamer init";
@@ -55,9 +54,8 @@ bool DwarfStreamer::init(Triple TheTriple,
   if (!MSTI)
     return error("no subtarget info for target " + TripleName, Context), false;
 
-  MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get(), nullptr,
-                         nullptr, true, Swift5ReflectionSegmentName));
-  MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false, false));
+  MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get()));
+  MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false));
   MC->setObjectFileInfo(MOFI.get());
 
   MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions);
@@ -304,18 +302,6 @@ void DwarfStreamer::emitSwiftAST(StringRef Buffer) {
   MS->emitBytes(Buffer);
 }
 
-void DwarfStreamer::emitSwiftReflectionSection(
-    llvm::swift::Swift5ReflectionSectionKind ReflSectionKind, StringRef Buffer,
-    uint32_t Alignment, uint32_t Size) {
-  MCSection *ReflectionSection =
-      MOFI->getSwift5ReflectionSection(ReflSectionKind);
-  if (ReflectionSection == nullptr)
-    return;
-  ReflectionSection->setAlignment(Align(Alignment));
-  MS->SwitchSection(ReflectionSection);
-  MS->emitBytes(Buffer);
-}
-
 /// Emit the debug_range section contents for \p FuncRange by
 /// translating the original \p Entries. The debug_range section
 /// format is totally trivial, consisting just of pairs of address
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index eafcee1e0607b..7f639e9c408fe 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -67,10 +67,10 @@ static void defaultDiagHandler(const SMDiagnostic &SMD, bool, const SourceMgr &,
 MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai,
                      const MCRegisterInfo *mri, const MCSubtargetInfo *msti,
                      const SourceMgr *mgr, MCTargetOptions const *TargetOpts,
-                     bool DoAutoReset, StringRef Swift5ReflSegmentName)
-    : Swift5ReflectionSegmentName(Swift5ReflSegmentName), TT(TheTriple),
-      SrcMgr(mgr), InlineSrcMgr(nullptr), DiagHandler(defaultDiagHandler),
-      MAI(mai), MRI(mri), MSTI(msti), Symbols(Allocator), UsedNames(Allocator),
+                     bool DoAutoReset)
+    : TT(TheTriple), SrcMgr(mgr), InlineSrcMgr(nullptr),
+      DiagHandler(defaultDiagHandler), MAI(mai), MRI(mri), MSTI(msti),
+      Symbols(Allocator), UsedNames(Allocator),
       InlineAsmUsedLabelNames(Allocator),
       CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0),
       AutoReset(DoAutoReset), TargetOptions(TargetOpts) {
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index 77b0b0ee687cb..d7f85f793c55f 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -299,17 +299,6 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
   RemarksSection = Ctx->getMachOSection(
       "__LLVM", "__remarks", MachO::S_ATTR_DEBUG, SectionKind::getMetadata());
 
-  // The architecture of dsymutil makes it very difficult to copy the Swift
-  // reflection metadata sections into the __TEXT segment, so dsymutil creates
-  // these sections in the __DWARF segment instead.
-  if (!Ctx->getSwift5ReflectionSegmentName().empty()) {
-#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)                           \
-  Swift5ReflectionSections[llvm::swift::Swift5ReflectionSectionKind::KIND] =   \
-      Ctx->getMachOSection(Ctx->getSwift5ReflectionSegmentName().data(),       \
-                           MACHO, 0, SectionKind::getMetadata());
-#include "llvm/BinaryFormat/Swift.def"
-  }
-
   TLSExtraDataSection = TLSTLVSection;
 }
 
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 83bc74ff31c40..42e257516f4e0 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -20,7 +20,6 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/MachO.h"
-#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/ObjectFile.h"
@@ -4766,14 +4765,3 @@ MachOObjectFile::findDsymObjectMembers(StringRef Path) {
                              Path.str().c_str());
   return ObjectPaths;
 }
-
-llvm::swift::Swift5ReflectionSectionKind
-MachOObjectFile::mapReflectionSectionNameToEnumValue(
-    StringRef SectionName) const {
-#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)                           \
-  .Case(MACHO, llvm::swift::Swift5ReflectionSectionKind::KIND)
-  return StringSwitch<llvm::swift::Swift5ReflectionSectionKind>(SectionName)
-#include "llvm/BinaryFormat/Swift.def"
-      .Default(llvm::swift::Swift5ReflectionSectionKind::Unknown);
-#undef HANDLE_SWIFT_SECTION
-}
diff --git a/llvm/test/tools/dsymutil/Inputs/main.yaml b/llvm/test/tools/dsymutil/Inputs/main.yaml
deleted file mode 100644
index d81931d7861c3..0000000000000
--- a/llvm/test/tools/dsymutil/Inputs/main.yaml
+++ /dev/null
@@ -1,886 +0,0 @@
-# How to generate this file:
-# 1. First take a swift file and run xcrun swiftc -g -v test.swift 
-# reflection_metadata.swift, make sure the two swift files are in a short path 
-# like /tmp/
-
-# 2. Now you can see what the driver does, generate the object files in the 
-# tmp directory and link them to create the input binary
-
-# 3. Run obj2yaml on the input binary to create a yaml file and strip out the 
-# swift5 reflection sections from the load commands in the text segment
-
-# 4. I ran delta to reduce this file.
-
---- !mach-o
-FileHeader:
-  magic:           0xFEEDFACF
-  cputype:         0x1000007
-  cpusubtype:      0x3
-  filetype:        0x2
-  ncmds:           18
-  sizeofcmds:      2848
-  flags:           0x200085
-  reserved:        0x0
-LoadCommands:
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         72
-    segname:         __PAGEZERO
-    vmaddr:          0
-    vmsize:          4294967296
-    fileoff:         0
-    filesize:        0
-    maxprot:         0
-    initprot:        0
-    nsects:          0
-    flags:           0
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         952
-    segname:         __TEXT
-    vmaddr:          4294967296
-    vmsize:          16384
-    fileoff:         0
-    filesize:        16384
-    maxprot:         5
-    initprot:        5
-    nsects:          11
-    flags:           0
-    Sections:
-      - sectname:        __text
-        segname:         __TEXT
-        addr:            0x100003EB0
-        size:            336
-        offset:          0x3EB0
-        align:           3
-        reloff:          0x0
-        nreloc:          0
-        flags:           0x0
-        reserved1:       0x0
-        reserved2:       0x0
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         392
-    segname:         __DATA_CONST
-    vmaddr:          4294983680
-    vmsize:          16384
-    fileoff:         16384
-    filesize:        16384
-    maxprot:         3
-    initprot:        3
-    nsects:          4
-    flags:           16
-    Sections:
-      - sectname:        __got
-        segname:         __DATA_CONST
-        addr:            0x100004000
-        size:            48
-        offset:          0x4000
-        align:           3
-        reloff:          0x0
-        nreloc:          0
-        flags:           0x6
-        reserved1:       0x11
-        reserved2:       0x0
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         392
-    segname:         __DATA
-    vmaddr:          4295000064
-    vmsize:          16384
-    fileoff:         32768
-    filesize:        16384
-    maxprot:         3
-    initprot:        3
-    nsects:          4
-    flags:           0
-    Sections:
-      - sectname:        __la_symbol_ptr
-        segname:         __DATA
-        addr:            0x100008000
-        size:            384
-        offset:          0x8088
-        align:           3
-        reloff:          0x0
-        nreloc:          0
-        flags:           0x0
-        reserved1:       0x0
-        reserved2:       0x0
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         72
-    segname:         __LINKEDIT
-    vmaddr:          4295016448
-    vmsize:          32768
-    fileoff:         49152
-    filesize:        23584
-    maxprot:         1
-    initprot:        1
-    nsects:          0
-    flags:           0
-  - cmd:             LC_DYLD_INFO_ONLY
-    cmdsize:         48
-    rebase_off:      49152
-    rebase_size:     64
-    bind_off:        49216
-    bind_size:       216
-    weak_bind_off:   0
-    weak_bind_size:  0
-    lazy_bind_off:   49432
-    lazy_bind_size:  600
-    export_off:      50032
-    export_size:     1000
-  - cmd:             LC_SYMTAB
-    cmdsize:         24
-    symoff:          51136
-    nsyms:           638
-    stroff:          61504
-    strsize:         11232
-  - cmd:             LC_DYSYMTAB
-    cmdsize:         80
-    ilocalsym:       0
-    nlocalsym:       560
-    iextdefsym:      560
-    nextdefsym:      52
-    iundefsym:       612
-    nundefsym:       26
-    tocoff:          0
-    ntoc:            0
-    modtaboff:       0
-    nmodtab:         0
-    extrefsymoff:    0
-    nextrefsyms:     0
-    indirectsymoff:  61344
-    nindirectsyms:   40
-    extreloff:       0
-    nextrel:         0
-    locreloff:       0
-    nlocrel:         0
-  - cmd:             LC_LOAD_DYLINKER
-    cmdsize:         32
-    name:            12
-  - cmd:             LC_UUID
-    cmdsize:         24
-    uuid:            AA0A51FA-8B29-3A7B-85AA-FA6A457B2211
-  - cmd:             LC_BUILD_VERSION
-    cmdsize:         32
-    platform:        1
-    minos:           786432
-    sdk:             786688
-    ntools:          1
-  - cmd:             LC_SOURCE_VERSION
-    cmdsize:         16
-    version:         0
-  - cmd:             LC_MAIN
-    cmdsize:         24
-    entryoff:        9376
-    stacksize:       0
-  - cmd:             LC_LOAD_DYLIB
-    cmdsize:         56
-    dylib:
-      name:            24
-      timestamp:       2
-      current_version: 14942208
-      compatibility_version: 65536
-  - cmd:             LC_LOAD_DYLIB
-    cmdsize:         56
-    dylib:
-      name:            24
-      timestamp:       2
-      current_version: 85917696
-      compatibility_version: 65536
-  - cmd:             LC_LOAD_DYLIB
-    cmdsize:         64
-    dylib:
-      name:            24
-      timestamp:       2
-      current_version: 85196845
-      compatibility_version: 65536
-  - cmd:             LC_FUNCTION_STARTS
-    cmdsize:         16
-    dataoff:         51032
-    datasize:        104
-  - cmd:             LC_DATA_IN_CODE
-    cmdsize:         16
-    dataoff:         51136
-    datasize:        0
-LinkEditData:
-  NameList:
-    - n_strx:          2355
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976208
-    - n_strx:          2398
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976224
-    - n_strx:          2440
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976240
-    - n_strx:          2479
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976256
-    - n_strx:          2509
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294976272
-    - n_strx:          2570
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976320
-    - n_strx:          2590
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294976512
-    - n_strx:          2635
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294976576
-    - n_strx:          2683
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294976608
-    - n_strx:          2731
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976640
-    - n_strx:          2751
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976656
-    - n_strx:          2775
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976704
-    - n_strx:          2791
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294976720
-    - n_strx:          2814
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976752
-    - n_strx:          2838
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976768
-    - n_strx:          2873
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976784
-    - n_strx:          2906
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294976832
-    - n_strx:          2926
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294977104
-    - n_strx:          2946
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294977200
-    - n_strx:          2966
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977376
-    - n_strx:          3008
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977392
-    - n_strx:          3049
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977408
-    - n_strx:          3087
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977424
-    - n_strx:          3116
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294977440
-    - n_strx:          3176
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977488
-    - n_strx:          3201
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977504
-    - n_strx:          3232
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977552
-    - n_strx:          3270
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977648
-    - n_strx:          3318
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977664
-    - n_strx:          3364
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294978352
-    - n_strx:          3411
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294978464
-    - n_strx:          3447
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294978688
-    - n_strx:          3506
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294978832
-    - n_strx:          3567
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294978944
-    - n_strx:          3587
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979024
-    - n_strx:          3607
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979056
-    - n_strx:          3627
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979136
-    - n_strx:          3647
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294979232
-    - n_strx:          3666
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979264
-    - n_strx:          3686
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979328
-    - n_strx:          3706
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979536
-    - n_strx:          3726
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979856
-    - n_strx:          3746
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979872
-    - n_strx:          3766
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979888
-    - n_strx:          3786
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979920
-    - n_strx:          3814
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979936
-    - n_strx:          3842
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294980240
-    - n_strx:          3871
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294980288
-    - n_strx:          3898
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294980320
-    - n_strx:          3927
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294980368
-    - n_strx:          3951
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294980384
-    - n_strx:          3982
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294980448
-    - n_strx:          4001
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294980464
-    - n_strx:          4032
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294980512
-    - n_strx:          4060
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294980800
-    - n_strx:          4088
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294981120
-    - n_strx:          4116
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294981136
-    - n_strx:          4144
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294981152
-    - n_strx:          4172
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294981184
-    - n_strx:          4208
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294981248
-    - n_strx:          4225
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294981280
-    - n_strx:          4253
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294981328
-    - n_strx:          4276
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294981376
-    - n_strx:          4294
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          128
-      n_value:         4294981764
-    - n_strx:          4306
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294981824
-    - n_strx:          4322
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294981952
-    - n_strx:          4349
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294981960
-    - n_strx:          4387
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294981968
-    - n_strx:          4423
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294982160
-    - n_strx:          4474
-      n_type:          0xE
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294982352
-    - n_strx:          4503
-      n_type:          0xE
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294982448
-    - n_strx:          4530
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          128
-      n_value:         4294982464
-    - n_strx:          4558
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982466
-    - n_strx:          4571
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982470
-    - n_strx:          4608
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982476
-    - n_strx:          4639
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982498
-    - n_strx:          4666
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982506
-    - n_strx:          4691
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982510
-    - n_strx:          4727
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982516
-    - n_strx:          4758
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982522
-    - n_strx:          4790
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982528
-    - n_strx:          4820
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982534
-    - n_strx:          4859
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982540
-    - n_strx:          4902
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982554
-    - n_strx:          4945
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982564
-    - n_strx:          4986
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         0
-    - n_strx:          5987
-      n_type:          0x66
-      n_sect:          3
-      n_desc:          1
-      n_value:         1638431181
-    - n_strx:          7104
-      n_type:          0x66
-      n_sect:          3
-      n_desc:          1
-      n_value:         1638431191
-  StringTable:
-    - ' '
-    - '_$s4main10MyProtocolMp'
-    - '_$s4main10MyProtocolTL'
-    - '_$s4main11ConformanceV5innerSivM'
-    - '_$s4main11ConformanceV5innerSivg'
-    - '_$s4main11ConformanceV5innerSivpMV'
-    - '_$s4main11ConformanceV5innerSivpfi'
-    - '_$s4main11ConformanceV5innerSivs'
-    - '_$s4main11ConformanceVAA10MyProtocolAAMc'
-    - '_$s4main11ConformanceVAA10MyProtocolAAWP'
-    - '_$s4main11ConformanceVMa'
-    - '_$s4main11ConformanceVMn'
-    - '_$s4main11ConformanceVN'
-    - '_$s4main12Conformance2V5innerSivM'
-    - '_$s4main12Conformance2V5innerSivg'
-    - '_$s4main12Conformance2V5innerSivpMV'
-    - '_$s4main12Conformance2V5innerSivpfi'
-    - '_$s4main12Conformance2V5innerSivs'
-    - '_$s4main12Conformance2VAA10MyProtocolAAMc'
-    - '_$s4main12Conformance2VAA10MyProtocolAAWP'
-    - '_$s4main12Conformance2VMa'
-    - '_$s4main12Conformance2VMn'
-    - '_$s4main12Conformance2VN'
-    - '_$s4main13MyGenericEnumOMa'
-    - '_$s4main13MyGenericEnumOMn'
-    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfC'
-    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfCTq'
-    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfc'
-    - '_$s4main14MyGenericClassCMa'
-    - '_$s4main14MyGenericClassCMn'
-    - '_$s4main14MyGenericClassCfD'
-    - '_$s4main14MyGenericClassCfd'
-    - '_$s4main15MyGenericStructVMa'
-    - '_$s4main15MyGenericStructVMn'
-    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlF'
-    - '_$s4main6MyEnumOMa'
-    - '_$s4main6MyEnumOMn'
-    - '_$s4main6MyEnumON'
-    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfC'
-    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfCTq'
-    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfc'
-    - '_$s4main7MyClassCMa'
-    - '_$s4main7MyClassCMm'
-    - '_$s4main7MyClassCMn'
-    - '_$s4main7MyClassCN'
-    - '_$s4main7MyClassCfD'
-    - '_$s4main7MyClassCfd'
-    - '_$s4main8MyStructVMa'
-    - '_$s4main8MyStructVMn'
-    - '_$s4main8MyStructVN'
-    - '_$s5Inner4main10MyProtocolPTl'
-    - __mh_execute_header
-    - _main
-    - '_$sBi64_WV'
-    - '_$sBoWV'
-    - '_$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC'
-    - '_$sSSN'
-    - '_$sSaMa'
-    - '_$ss27_allocateUninitializedArrayySayxG_BptBwlF'
-    - '_$ss5print_9separator10terminatoryypd_S2StF'
-    - '_$sypN'
-    - '_$sytWV'
-    - '_OBJC_CLASS_$__TtCs12_SwiftObject'
-    - '_OBJC_METACLASS_$__TtCs12_SwiftObject'
-    - __objc_empty_cache
-    - _objc_opt_self
-    - _swift_allocObject
-    - _swift_allocateGenericClassMetadata
-    - _swift_allocateGenericValueMetadata
-    - _swift_bridgeObjectRelease
-    - _swift_checkMetadataState
-    - _swift_deallocClassInstance
-    - _swift_deallocObject
-    - _swift_getAssociatedTypeWitness
-    - _swift_getGenericMetadata
-    - _swift_initClassMetadata2
-    - _swift_release
-    - _swift_retain
-    - dyld_stub_binder
-    - '_$s4main12Conformance2V5innerSivM.resume.0'
-    - '_$s4main12Conformance2V5innerACSi_tcfcfA_'
-    - '_$s4main12Conformance2V5innerACSi_tcfC'
-    - '_$s4main12Conformance2VACycfC'
-    - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW'
-    - '_$s4main3AppVAAyyFZ'
-    - '_$ss27_finalizeUninitializedArrayySayxGABnlF'
-    - '_$ss5print_9separator10terminatoryypd_S2StFfA0_'
-    - '_$ss5print_9separator10terminatoryypd_S2StFfA1_'
-    - '_$s4main3AppVACycfC'
-    - '_$s4main3AppV5$mainyyFZ'
-    - '_$s4main3AppVMa'
-    - '_$sSa12_endMutationyyF'
-    - '_$s4main7MyClassC1iSivg'
-    - '_$s4main7MyClassC2msAA0B6StructVvg'
-    - '_$s4main7MyClassC2meAA0B4EnumOvg'
-    - '_$s4main6MyEnumOWOy'
-    - '_$s4main6MyEnumOWOe'
-    - '_$s4main6MyEnumOWOh'
-    - '_$s4main11ConformanceV5innerSivM.resume.0'
-    - '_$s4main11ConformanceV5innerACSi_tcfcfA_'
-    - '_$s4main11ConformanceV5innerACSi_tcfC'
-    - '_$s4main11ConformanceVACycfC'
-    - '_$s4main11ConformanceVAA10MyProtocolA2aDP5inner5InnerQzvgTW'
-    - '_$s4main8MyStructVACycfC'
-    - '_$s4main14MyGenericClassC1txvg'
-    - '_$s4main14MyGenericClassC1i5InnerQzvg'
-    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvg'
-    - '_$s4main14MyGenericClassC3mgeAA0bC4EnumOyxGvg'
-    - '_$s4main13MyGenericEnumOyxGAA0B8ProtocolRzlWOh'
-    - '_$s4main15MyGenericStructVACyxGycfC'
-    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_'
-    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_TA'
-    - '_$s4main6MyEnumOwCP'
-    - '_$s4main6MyEnumOwxx'
-    - '_$s4main6MyEnumOwcp'
-    - '_$s4main6MyEnumOwca'
-    - ___swift_memcpy9_8
-    - '_$s4main6MyEnumOwta'
-    - '_$s4main6MyEnumOwet'
-    - '_$s4main6MyEnumOwst'
-    - '_$s4main6MyEnumOwug'
-    - '_$s4main6MyEnumOwup'
-    - '_$s4main6MyEnumOwui'
-    - '_$s4main14MyGenericClassCMi'
-    - '_$s4main14MyGenericClassCMr'
-    - '_$s4main15MyGenericStructVMi'
-    - '_$s4main13MyGenericEnumOMi'
-    - ___swift_initWithCopy_strong
-    - ___swift_destroy_strong
-    - ___swift_assignWithCopy_strong
-    - ___swift_memcpy8_8
-    - ___swift_assignWithTake_strong
-    - '_$s4main13MyGenericEnumOwet'
-    - '_$s4main13MyGenericEnumOwst'
-    - '_$s4main13MyGenericEnumOwug'
-    - '_$s4main13MyGenericEnumOwup'
-    - '_$s4main13MyGenericEnumOwui'
-    - ___swift_instantiateGenericMetadata
-    - ___chkstk_darwin
-    - ___chkstk_darwin_llvm_probe
-    - ___chkstk_darwin_probe
-    - ____chkstk_darwin
-    - '_$s4mainMXM'
-    - '_$s4main3AppVMn'
-    - '_$s4main7MyClassC1iSivpWvd'
-    - '_$s4main7MyClassC2msAA0B6StructVvpWvd'
-    - '_$s4main7MyClassC2meAA0B4EnumOvpWvd'
-    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvpWvd'
-    - '_$s4main15MyGenericStructVMP'
-    - '_$s4main13MyGenericEnumOMP'
-    - ___swift_reflection_version
-    - _symbolic Si
-    - _symbolic _____ 4main12Conformance2V
-    - '_symbolic $s4main10MyProtocolP'
-    - _symbolic _____ 4main3AppV
-    - _symbolic x
-    - _symbolic B0
-    - _symbolic _____ 4main11ConformanceV
-    - _symbolic _____ 4main7MyClassC
-    - _symbolic _____ 4main8MyStructV
-    - _symbolic _____ 4main6MyEnumO
-    - _symbolic _____ 4main14MyGenericClassC
-    - _symbolic 5Inner_____Qz 4main10MyProtocolP
-    - _symbolic _____yxG 4main15MyGenericStructV
-    - _symbolic _____yxG 4main13MyGenericEnumO
-    - _symbolic _____ 4main15MyGenericStructV
-    - _symbolic _____ 4main13MyGenericEnumO
-    - _symbolic _____yxG 4main14MyGenericClassC
-    - '_$s4main12Conformance2VAA10MyProtocolAAMA'
-    - '_$s4main11ConformanceVAA10MyProtocolAAMA'
-    - '_$s4main12Conformance2VMF'
-    - '_$s4main3AppVMF'
-    - '_$s4main10MyProtocol_pMF'
-    - '_$s4main7MyClassCMF'
-    - '_$s4main11ConformanceVMF'
-    - '_$s4main8MyStructVMF'
-    - '_$s4main6MyEnumOMF'
-    - '_$s4main14MyGenericClassCMF'
-    - '_$s4main15MyGenericStructVMF'
-    - '_$s4main13MyGenericEnumOMF'
-    - '_$s4main6MyEnumOMB'
-    - '_$s4main12Conformance2VMf'
-    - '_$s4main3AppVMf'
-    - '_$s4main3AppVN'
-    - '_$s4main11ConformanceVMf'
-    - '_$s4main8MyStructVMf'
-    - '_$s4main6MyEnumOWV'
-    - '_$s4main6MyEnumOMf'
-    - ___unnamed_23
-    - '_$s4main14MyGenericClassCMP'
-    - '_$s4main13MyGenericEnumOWV'
-    - __METACLASS_DATA__TtC4main7MyClass
-    - __IVARS__TtC4main7MyClass
-    - __DATA__TtC4main7MyClass
-    - __IVARS__TtC4main14MyGenericClass
-    - __dyld_private
-    - '_$s4main7MyClassCMf'
-    - '_$s4main14MyGenericClassCMI'
-    - '_$s4main15MyGenericStructVMI'
-    - '_$s4main13MyGenericEnumOMI'
-    - '/tmp/main-1.swiftmodule'
-    - '/Users/shubham/Development/test76973336/final2objfiletest/'
-    - test.swift
-    - '/tmp/test-1.o'
-    - '_$s4main12Conformance2V5innerSivpfi'
-    - '_$s4main12Conformance2V5innerSivg'
-    - '_$s4main12Conformance2V5innerSivs'
-    - '_$s4main12Conformance2V5innerSivM'
-    - '_$s4main12Conformance2V5innerSivM.resume.0'
-    - '_$s4main12Conformance2V5innerACSi_tcfcfA_'
-    - '_$s4main12Conformance2V5innerACSi_tcfC'
-    - '_$s4main12Conformance2VACycfC'
-    - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW'
-    - '_$s4main3AppVAAyyFZ'
-    - '_$ss27_finalizeUninitializedArrayySayxGABnlF'
-    - '_$ss5print_9separator10terminatoryypd_S2StFfA0_'
-    - '_$ss5print_9separator10terminatoryypd_S2StFfA1_'
-    - '_$s4main3AppVACycfC'
-    - '_$s4main3AppV5$mainyyFZ'
-    - _main
-    - '_$s4main12Conformance2VMa'
-    - '_$s4main3AppVMa'
-    - '_$sSa12_endMutationyyF'
-    - '_$s4main12Conformance2VAA10MyProtocolAAMc'
-    - '_$s4main12Conformance2V5innerSivpMV'
-    - '_$s4mainMXM'
-    - '_$s4main12Conformance2VMn'
-    - '_$s4main3AppVMn'
-    - _symbolic Si
-    - _symbolic _____ 4main12Conformance2V
-    - '_symbolic $s4main10MyProtocolP'
-    - _symbolic _____ 4main3AppV
-    - '_$s4main12Conformance2VAA10MyProtocolAAMA'
-    - '_$s4main12Conformance2VMF'
-    - '_$s4main3AppVMF'
-    - '_$s4main12Conformance2VMf'
-    - '_$s4main12Conformance2VN'
-    - '_$s4main3AppVMf'
-    - '_$s4main3AppVN'
-    - '_$s4main12Conformance2VAA10MyProtocolAAWP'
-    - reflection_metadata.swift
-    - '/tmp/reflection_metadata-1.o'
diff --git a/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml b/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml
deleted file mode 100644
index b2179a23bf28d..0000000000000
--- a/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml
+++ /dev/null
@@ -1,436 +0,0 @@
-# How to generate this file:
-# 1. First take a swift file and run xcrun swiftc -g -v file.swift 
-# secondfile.swift, make sure the two swift files are in a short path like /tmp/
-
-# 2. Now you can see what the driver does, generate the object files in the 
-# tmp directory
-
-# 3. Run obj2yaml on object file to create a yaml file
-
-# 4. I ran delta to reduce this file.
-
---- !mach-o
-FileHeader:
-  magic:           0xFEEDFACF
-  cputype:         0x1000007
-  cpusubtype:      0x3
-  filetype:        0x1
-  ncmds:           8
-  sizeofcmds:      2800
-  flags:           0x2000
-  reserved:        0x0
-LoadCommands:
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         2552
-    segname:         ''
-    vmaddr:          0
-    vmsize:          21352
-    fileoff:         2832
-    filesize:        20967
-    maxprot:         7
-    initprot:        7
-    nsects:          31
-    flags:           0
-    Sections:
-      - sectname:        __text
-        segname:         __TEXT
-        addr:            0x0
-        size:            4571
-        offset:          0xB10
-        align:           4
-        reloff:          0x5CF8
-        nreloc:          74
-        flags:           0x80000400
-        reserved1:       0x0
-        reserved2:       0x0
-        relocations:
-          - address:         0x11A1
-            symbolnum:       142
-            pcrel:           true
-            length:          2
-            extern:          true
-            type:            1
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_typeref
-        segname:         __TEXT
-        addr:            0x11DC
-        size:            117
-        offset:          0x1CEC
-        align:           1
-        reloff:          0x5F48
-        nreloc:          22
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         7800423000005369000001FFFFFFFF002473346D61696E31304D7950726F746F636F6C50000001FFFFFFFF0001FFFFFFFF0001FFFFFFFF0001FFFFFFFF0035496E6E657201F9FFFFFF517A0001FFFFFFFF797847000001FFFFFFFF797847000001FFFFFFFF0001FFFFFFFF0001FFFFFFFF79784700
-        relocations:
-          - address:         0x6D
-            symbolnum:       163
-            pcrel:           false
-            length:          2
-            extern:          true
-            type:            0
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_capture
-        segname:         __TEXT
-        addr:            0x1254
-        size:            24
-        offset:          0x1D64
-        align:           2
-        reloff:          0x5FF8
-        nreloc:          6
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         010000000100000002000000F4FFFFFFF0FFFFFFECFFFFFF
-        relocations:
-          - address:         0x14
-            symbolnum:       29
-            pcrel:           false
-            length:          3
-            extern:          true
-            type:            0
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_reflstr
-        segname:         __TEXT
-        addr:            0x17D8
-        size:            37
-        offset:          0x22E8
-        align:           0
-        reloff:          0x0
-        nreloc:          0
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         496E6E65720069006D73006D6500696E6E6572004300490074006D6773006D676500474300
-      - sectname:        __swift5_assocty
-        segname:         __TEXT
-        addr:            0x1800
-        size:            24
-        offset:          0x2310
-        align:           2
-        reloff:          0x6530
-        nreloc:          8
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         00000000FCFFFFFF0100000008000000F0FFFFFFECFFFFFF
-        relocations:
-          - address:         0x14
-            symbolnum:       31
-            pcrel:           false
-            length:          2
-            extern:          true
-            type:            5
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_fieldmd
-        segname:         __TEXT
-        addr:            0x1818
-        size:            260
-        offset:          0x2328
-        align:           2
-        reloff:          0x6570
-        nreloc:          60
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         000000000000000004000C0000000000000000000000000001000C000300000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF00000000D4FFFFFFD0FFFFFF000000000000000000000C000100000002000000ECFFFFFFE8FFFFFF000000000000000000000C0000000000000000000000000003000C000200000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF000000000000000001000C000400000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF00000000D4FFFFFFD0FFFFFF00000000C8FFFFFFC4FFFFFF000000000000000000000C0000000000000000000000000002000C000100000000000000ECFFFFFFE8FFFFFF
-        relocations:
-          - address:         0x100
-            symbolnum:       71
-            pcrel:           false
-            length:          2
-            extern:          true
-            type:            0
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_builtin
-        segname:         __TEXT
-        addr:            0x1AC8
-        size:            20
-        offset:          0x25D8
-        align:           2
-        reloff:          0x67F8
-        nreloc:          2
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         00000000090000000800010010000000FE000000
-        relocations:
-          - address:         0x0
-            symbolnum:       52
-            pcrel:           false
-            length:          2
-            extern:          true
-            type:            5
-            scattered:       false
-            value:           0
-      - sectname:        __bss
-        segname:         __DATA
-        addr:            0x3372
-        size:            2084
-        offset:          0x50B0
-        align:           3
-        reloff:          0x0
-        nreloc:          0
-        flags:           0x6800000B
-        reserved1:       0x0
-        reserved2:       0x0
-        relocations:
-          - address:         0x56
-            symbolnum:       1
-            pcrel:           false
-            length:          3
-            extern:          false
-            type:            0
-            scattered:       false
-            value:           0
-  - cmd:             LC_BUILD_VERSION
-    cmdsize:         24
-    platform:        1
-    minos:           786432
-    sdk:             786688
-    ntools:          0
-  - cmd:             LC_SYMTAB
-    cmdsize:         24
-    symoff:          27888
-    nsyms:           185
-    stroff:          30848
-    strsize:         5056
-  - cmd:             LC_DYSYMTAB
-    cmdsize:         80
-    ilocalsym:       0
-    nlocalsym:       79
-    iextdefsym:      79
-    nextdefsym:      87
-    iundefsym:       166
-    nundefsym:       19
-    tocoff:          0
-    ntoc:            0
-    modtaboff:       0
-    nmodtab:         0
-    extrefsymoff:    0
-    nextrefsyms:     0
-    indirectsymoff:  0
-    nindirectsyms:   0
-    extreloff:       0
-    nextrel:         0
-    locreloff:       0
-    nlocrel:         0
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         40
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x53, 
-                       0x0, 0x0, 0x0, 0x0 ]
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         24
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x43, 
-                       0x6F, 0x72, 0x65, 0x0 ]
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         32
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x5F, 
-                       0x6E, 0x63, 0x79, 0x0 ]
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         24
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x6F, 0x62, 0x6A, 0x63, 0x0, 0x0, 0x0, 
-                       0x0, 0x0, 0x0 ]
-LinkEditData:
-  NameList:
-    - n_strx:          5014
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         5600
-  StringTable:
-    - ''
-    - l_objectdestroy
-    - '_$s4main6MyEnumOWOy'
-    - '_$s4main6MyEnumOwxx'
-    - _symbolic x
-    - '_$s4main6MyEnumOwst'
-    - '_$s4main13MyGenericEnumOwst'
-    - '_$s4main6MyEnumOwet'
-    - '_$s4main13MyGenericEnumOwet'
-    - '_OBJC_CLASS_$__TtCs12_SwiftObject'
-    - '_OBJC_METACLASS_$__TtCs12_SwiftObject'
-    - _swift_deallocObject
-    - _swift_allocObject
-    - '_$s4main11ConformanceV5innerSivs'
-    - _swift_getAssociatedTypeWitness
-    - __IVARS__TtC4main7MyClass
-    - __DATA__TtC4main7MyClass
-    - __METACLASS_DATA__TtC4main7MyClass
-    - __IVARS__TtC4main14MyGenericClass
-    - l_protocols
-    - _objc_classes
-    - l_protocol_conformances
-    - l__swift5_reflection_descriptor
-    - l_coro.devirt.trigger
-    - '_$s4main14MyGenericClassCMr'
-    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfCTq'
-    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfCTq'
-    - '_$s4main6MyEnumOwup'
-    - '_$s4main13MyGenericEnumOwup'
-    - '_$s4main6MyEnumOwcp'
-    - '_$s4main10MyProtocolMp'
-    - ___swift_reflection_version
-    - ____chkstk_darwin
-    - _swift_retain
-    - '_$s4main8MyStructVMn'
-    - '_$s4main15MyGenericStructVMn'
-    - '_$s4main11ConformanceVMn'
-    - '_$s4main6MyEnumOMn'
-    - '_$s4main13MyGenericEnumOMn'
-    - '_$s4main7MyClassCMn'
-    - '_$s4main14MyGenericClassCMn'
-    - '_$s4main7MyClassCMm'
-    - '_$s5Inner4main10MyProtocolPTl'
-    - '_$s4main6MyEnumOwui'
-    - '_$s4main13MyGenericEnumOwui'
-    - '_$s4main11ConformanceV5innerSivpfi'
-    - _symbolic Si
-    - '_$s4main15MyGenericStructVMi'
-    - '_$s4main13MyGenericEnumOMi'
-    - '_$s4main14MyGenericClassCMi'
-    - l_llvm.swift_module_hash
-    - '_$s4main13MyGenericEnumOyxGAA0B8ProtocolRzlWOh'
-    - '_$s4main6MyEnumOWOh'
-    - '_$s4main14MyGenericClassC1i5InnerQzvg'
-    - '_$s4main14MyGenericClassC1txvg'
-    - '_$s4main11ConformanceV5innerSivg'
-    - '_$s4main7MyClassC1iSivg'
-    - '_$s4main7MyClassC2msAA0B6StructVvg'
-    - '_$s4main7MyClassC2meAA0B4EnumOvg'
-    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvg'
-    - '_$s4main14MyGenericClassC3mgeAA0bC4EnumOyxGvg'
-    - '_$s4main6MyEnumOwug'
-    - '_$s4main13MyGenericEnumOwug'
-    - ___swift_initWithCopy_strong
-    - ___swift_assignWithCopy_strong
-    - ___swift_destroy_strong
-    - ___swift_assignWithTake_strong
-    - _objc_opt_self
-    - '_$s4main8MyStructVMf'
-    - '_$s4main11ConformanceVMf'
-    - '_$s4main6MyEnumOMf'
-    - '_$s4main7MyClassCMf'
-    - _swift_checkMetadataState
-    - _swift_release
-    - l_type_metadata_table
-    - __objc_empty_cache
-    - _swift_deallocClassInstance
-    - ___chkstk_darwin_llvm_probe
-    - '_$s4main6MyEnumOWOe'
-    - '_$s4main7MyClassC1iSivpWvd'
-    - '_$s4main7MyClassC2msAA0B6StructVvpWvd'
-    - '_$s4main7MyClassC2meAA0B4EnumOvpWvd'
-    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvpWvd'
-    - '_$s4main7MyClassCfd'
-    - '_$s4main14MyGenericClassCfd'
-    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfc'
-    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfc'
-    - '_$s4main11ConformanceVAA10MyProtocolAAMc'
-    - '_$s4main6MyEnumOwta'
-    - l_metadata
-    - _swift_allocateGenericClassMetadata
-    - _swift_allocateGenericValueMetadata
-    - _swift_getGenericMetadata
-    - ___swift_instantiateGenericMetadata
-    - '_$s4main6MyEnumOwca'
-    - '_$s4main8MyStructVMa'
-    - '_$s4main15MyGenericStructVMa'
-    - '_$s4main11ConformanceVMa'
-    - '_$s4main6MyEnumOMa'
-    - '_$s4main13MyGenericEnumOMa'
-    - '_$s4main7MyClassCMa'
-    - '_$s4main14MyGenericClassCMa'
-    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_'
-    - '_$s4main11ConformanceV5innerACSi_tcfcfA_'
-    - '_$s4main11ConformanceVAA10MyProtocolA2aDP5inner5InnerQzvgTW'
-    - _symbolic _____ 4main8MyStructV
-    - _symbolic _____ 4main15MyGenericStructV
-    - _symbolic _____yxG 4main15MyGenericStructV
-    - _symbolic _____ 4main11ConformanceV
-    - '_$sytWV'
-    - '_$sBoWV'
-    - '_$sBi64_WV'
-    - '_$s4main6MyEnumOWV'
-    - '_$s4main13MyGenericEnumOWV'
-    - '_$s4main11ConformanceV5innerSivpMV'
-    - '_symbolic $s4main10MyProtocolP'
-    - _symbolic 5Inner_____Qz 4main10MyProtocolP
-    - '_$s4main11ConformanceVAA10MyProtocolAAWP'
-    - '_$s4main15MyGenericStructVMP'
-    - '_$s4main13MyGenericEnumOMP'
-    - '_$s4main14MyGenericClassCMP'
-    - '_$s4main6MyEnumOwCP'
-    - _symbolic _____ 4main6MyEnumO
-    - _symbolic _____ 4main13MyGenericEnumO
-    - _symbolic _____yxG 4main13MyGenericEnumO
-    - '_$s4main8MyStructVN'
-    - '_$s4main11ConformanceVN'
-    - '_$s4main6MyEnumON'
-    - '_$s4main7MyClassCN'
-    - '_$s4main11ConformanceV5innerSivM'
-    - '_$s4mainMXM'
-    - '_$s4main10MyProtocolTL'
-    - '_$s4main15MyGenericStructVMI'
-    - '_$s4main13MyGenericEnumOMI'
-    - '_$s4main14MyGenericClassCMI'
-    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlF'
-    - '_$s4main10MyProtocol_pMF'
-    - '_$s4main8MyStructVMF'
-    - '_$s4main15MyGenericStructVMF'
-    - '_$s4main11ConformanceVMF'
-    - '_$s4main6MyEnumOMF'
-    - '_$s4main13MyGenericEnumOMF'
-    - '_$s4main7MyClassCMF'
-    - '_$s4main14MyGenericClassCMF'
-    - '_$s4main7MyClassCfD'
-    - '_$s4main14MyGenericClassCfD'
-    - _symbolic _____ 4main7MyClassC
-    - _symbolic _____ 4main14MyGenericClassC
-    - _symbolic _____yxG 4main14MyGenericClassC
-    - '_$s4main15MyGenericStructVACyxGycfC'
-    - '_$s4main8MyStructVACycfC'
-    - '_$s4main11ConformanceVACycfC'
-    - '_$s4main11ConformanceV5innerACSi_tcfC'
-    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfC'
-    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfC'
-    - '_$s4main6MyEnumOMB'
-    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_TA'
-    - '_$s4main11ConformanceVAA10MyProtocolAAMA'
-    - l___unnamed_29
-    - l___unnamed_19
-    - ___swift_memcpy9_8
-    - ___swift_memcpy8_8
-    - l___unnamed_28
-    - l___unnamed_18
-    - l___unnamed_27
-    - l___unnamed_17
-    - l___unnamed_26
-    - l___unnamed_16
-    - l___unnamed_25
-    - l___unnamed_15
-    - l___unnamed_4
-    - l___unnamed_24
-    - l___unnamed_14
-    - l___unnamed_3
-    - ___unnamed_23
-    - l___unnamed_13
-    - _swift_initClassMetadata2
-    - l___unnamed_2
-    - l___unnamed_12
-    - l___unnamed_1
-    - l___unnamed_11
-    - _symbolic B0
-    - l___unnamed_30
-    - l___unnamed_10
-    - '_$s4main11ConformanceV5innerSivM.resume.0'
diff --git a/llvm/test/tools/dsymutil/Inputs/test.yaml b/llvm/test/tools/dsymutil/Inputs/test.yaml
deleted file mode 100644
index da3aa9a8aaf34..0000000000000
--- a/llvm/test/tools/dsymutil/Inputs/test.yaml
+++ /dev/null
@@ -1,254 +0,0 @@
-# How to generate this file:
-# 1. First take a swift file and run xcrun swiftc -g -v file.swift 
-# secondfile.swift, make sure the two swift files are in a short path like /tmp/
-
-# 2. Now you can see what the driver does, generate the object files in the 
-# tmp directory
-
-# 3. Run obj2yaml on object file to create a yaml file
-
-# 4. I ran delta to reduce this file.
-
---- !mach-o
-FileHeader:
-  magic:           0xFEEDFACF
-  cputype:         0x1000007
-  cpusubtype:      0x3
-  filetype:        0x1
-  ncmds:           8
-  sizeofcmds:      2240
-  flags:           0x2000
-  reserved:        0x0
-LoadCommands:
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         1992
-    segname:         ''
-    vmaddr:          0
-    vmsize:          6592
-    fileoff:         2272
-    filesize:        6592
-    maxprot:         7
-    initprot:        7
-    nsects:          24
-    flags:           0
-    Sections:
-      - sectname:        __text
-        segname:         __TEXT
-        addr:            0x0
-        size:            593
-        offset:          0x8E0
-        align:           4
-        reloff:          0x22A0
-        nreloc:          24
-        flags:           0x80000400
-        reserved1:       0x0
-        reserved2:       0x0
-        relocations:
-          - address:         0x233
-            symbolnum:       2
-            pcrel:           true
-            length:          2
-            extern:          true
-            type:            4
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_typeref
-        segname:         __TEXT
-        addr:            0x2D6
-        size:            38
-        offset:          0xBB6
-        align:           1
-        reloff:          0x2418
-        nreloc:          4
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         5369000001FFFFFFFF002473346D61696E31304D7950726F746F636F6C50000001FFFFFFFF00
-        relocations:
-          - address:         0x21
-            symbolnum:       46
-            pcrel:           false
-            length:          3
-            extern:          true
-            type:            0
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_reflstr
-        segname:         __TEXT
-        addr:            0x318
-        size:            12
-        offset:          0xBF8
-        align:           0
-        reloff:          0x0
-        nreloc:          0
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         496E6E657200696E6E657200
-      - sectname:        __swift5_assocty
-        segname:         __TEXT
-        addr:            0x324
-        size:            24
-        offset:          0xC04
-        align:           2
-        reloff:          0x2450
-        nreloc:          8
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         00000000FCFFFFFF0100000008000000F0FFFFFFECFFFFFF
-        relocations:
-          - address:         0x14
-            symbolnum:       5
-            pcrel:           false
-            length:          2
-            extern:          true
-            type:            0
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_fieldmd
-        segname:         __TEXT
-        addr:            0x378
-        size:            44
-        offset:          0xC58
-        align:           2
-        reloff:          0x24C0
-        nreloc:          8
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         000000000000000000000C000100000002000000ECFFFFFFE8FFFFFF000000000000000000000C0000000000
-        relocations:
-          - address:         0x1C
-            symbolnum:       12
-            pcrel:           false
-            length:          3
-            extern:          false
-            type:            0
-            scattered:       false
-            value:           0
-  - cmd:             LC_BUILD_VERSION
-    cmdsize:         24
-    platform:        1
-    minos:           786432
-    sdk:             786688
-    ntools:          0
-  - cmd:             LC_SYMTAB
-    cmdsize:         24
-    symoff:          9824
-    nsyms:           57
-    stroff:          10736
-    strsize:         1544
-  - cmd:             LC_DYSYMTAB
-    cmdsize:         80
-    ilocalsym:       0
-    nlocalsym:       16
-    iextdefsym:      16
-    nextdefsym:      31
-    iundefsym:       47
-    nundefsym:       10
-    tocoff:          0
-    ntoc:            0
-    modtaboff:       0
-    nmodtab:         0
-    extrefsymoff:    0
-    nextrefsyms:     0
-    indirectsymoff:  0
-    nindirectsyms:   0
-    extreloff:       0
-    nextrel:         0
-    locreloff:       0
-    nlocrel:         0
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         40
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x53, 
-                       0x0, 0x0, 0x0, 0x0 ]
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         24
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x43, 
-                       0x6F, 0x72, 0x65, 0x0 ]
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         32
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x5F, 
-                       0x6E, 0x63, 0x79, 0x0 ]
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         24
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x6F, 0x62, 0x6A, 0x63, 0x0, 0x0, 0x0, 
-                       0x0, 0x0, 0x0 ]
-LinkEditData:
-  NameList:
-    - n_strx:          1494
-      n_type:          0xE
-      n_sect:          9
-      n_desc:          0
-      n_value:         0
-  StringTable:
-    - ''
-    - l_entry_point
-    - '_$s4main12Conformance2V5innerSivs'
-    - l_protocol_conformances
-    - l_coro.devirt.trigger
-    - '_$s4main10MyProtocolMp'
-    - ___swift_reflection_version
-    - _main
-    - '_$s4main3AppVMn'
-    - '_$s4main12Conformance2VMn'
-    - '_$s4main12Conformance2V5innerSivpfi'
-    - _symbolic Si
-    - l_llvm.swift_module_hash
-    - '_$s4main12Conformance2V5innerSivg'
-    - '_$s4main3AppVMf'
-    - '_$s4main12Conformance2VMf'
-    - _swift_bridgeObjectRelease
-    - l_type_metadata_table
-    - '_$s4main12Conformance2VAA10MyProtocolAAMc'
-    - '_$sSaMa'
-    - '_$s4main3AppVMa'
-    - '_$s4main12Conformance2VMa'
-    - '_$s4main12Conformance2V5innerACSi_tcfcfA_'
-    - '_$ss5print_9separator10terminatoryypd_S2StFfA1_'
-    - '_$ss5print_9separator10terminatoryypd_S2StFfA0_'
-    - '_$s4main3AppV5$mainyyFZ'
-    - '_$s4main3AppVAAyyFZ'
-    - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW'
-    - _symbolic _____ 4main3AppV
-    - '_$sytWV'
-    - '_$sBi64_WV'
-    - '_$s4main12Conformance2V5innerSivpMV'
-    - _symbolic _____ 4main12Conformance2V
-    - '_symbolic $s4main10MyProtocolP'
-    - '_$s4main12Conformance2VAA10MyProtocolAAWP'
-    - '_$sypN'
-    - '_$s4main3AppVN'
-    - '_$s4main12Conformance2VN'
-    - '_$sSSN'
-    - '_$s4main12Conformance2V5innerSivM'
-    - '_$s4mainMXM'
-    - '_$sSa12_endMutationyyF'
-    - '_$ss5print_9separator10terminatoryypd_S2StF'
-    - '_$ss27_allocateUninitializedArrayySayxG_BptBwlF'
-    - '_$ss27_finalizeUninitializedArrayySayxGABnlF'
-    - '_$s4main3AppVMF'
-    - '_$s4main12Conformance2VMF'
-    - '_$s4main3AppVACycfC'
-    - '_$s4main12Conformance2VACycfC'
-    - '_$s4main12Conformance2V5innerACSi_tcfC'
-    - '_$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC'
-    - '_$s4main12Conformance2VAA10MyProtocolAAMA'
-    - l___unnamed_8
-    - l___unnamed_7
-    - l___unnamed_6
-    - l___unnamed_5
-    - l___unnamed_4
-    - '_$s4main12Conformance2V5innerSivM.resume.0'
-    - ''
-    - ''
-    - ''
-    - ''
-    - ''
-    - ''
-    - ''
diff --git a/llvm/test/tools/dsymutil/reflection-dump.test b/llvm/test/tools/dsymutil/reflection-dump.test
deleted file mode 100644
index 3fea11ee57adc..0000000000000
--- a/llvm/test/tools/dsymutil/reflection-dump.test
+++ /dev/null
@@ -1,42 +0,0 @@
-RUN: rm -rf %t.dir && mkdir -p %t.dir/tmp
-RUN: cp %p/Inputs/main.yaml %t.dir
-RUN: cp %p/Inputs/test.yaml %t.dir
-RUN: cp %p/Inputs/reflection_metadata.yaml %t.dir
-RUN: yaml2obj %p/Inputs/main.yaml -o %t.dir/main
-RUN: yaml2obj %p/Inputs/test.yaml -o %t.dir/tmp/test-1.o
-RUN: yaml2obj %p/Inputs/reflection_metadata.yaml -o %t.dir/tmp/reflection_metadata-1.o
-
-RUN: dsymutil -oso-prepend-path=%t.dir %t.dir/main -o %t.dir/main.dSYM
-RUN: llvm-objdump -s %t.dir/main.dSYM/Contents/Resources/DWARF/main | FileCheck %s
-
-CHECK: Contents of section __DWARF,__swift5_typeref:
-CHECK-NEXT:  10000e000 53690000 01ffffff ff002473 346d6169  Si........$s4mai
-CHECK-NEXT:  10000e010 6e31304d 7950726f 746f636f 6c500000  n10MyProtocolP..
-CHECK-NEXT:  10000e020 01ffffff ff007800 42300000 53690000  ......x.B0..Si..
-CHECK-NEXT:  10000e030 01ffffff ff002473 346d6169 6e31304d  ......$s4main10M
-CHECK-NEXT:  10000e040 7950726f 746f636f 6c500000 01ffffff  yProtocolP......
-
-CHECK: Contents of section __DWARF,__swift5_reflstr:
-CHECK-NEXT:  10000e09b 496e6e65 7200696e 6e657200 496e6e65  Inner.inner.Inne
-CHECK-NEXT:  10000e0ab 72006900 6d73006d 6500696e 6e657200  r.i.ms.me.inner.
-CHECK-NEXT:  10000e0bb 43004900 74006d67 73006d67 65004743  C.I.t.mgs.mge.GC
-CHECK-NEXT:  10000e0cb 00
-
-CHECK: Contents of section __DWARF,__swift5_assocty:
-CHECK-NEXT:  10000e0cc 00000000 fcffffff 01000000 08000000  ................
-CHECK-NEXT:  10000e0dc f0ffffff ecffffff 00000000 fcffffff  ................
-CHECK-NEXT:  10000e0ec 01000000 08000000 f0ffffff ecffffff  ................
-
-CHECK: Contents of section __DWARF,__swift5_fieldmd:
-CHECK-NEXT:  10000e0fc 00000000 00000000 00000c00 01000000  ................
-CHECK-NEXT:  10000e10c 02000000 ecffffff e8ffffff 00000000  ................
-CHECK-NEXT:  10000e11c 00000000 00000c00 00000000 00000000  ................
-CHECK-NEXT:  10000e12c 00000000 04000c00 00000000 00000000  ................
-
-CHECK: Contents of section __DWARF,__swift5_capture:
-CHECK-NEXT:  10000e22c 01000000 01000000 02000000 f4ffffff  ................
-CHECK-NEXT:	 10000e23c f0ffffff ecffffff                    ........
-
-CHECK: Contents of section __DWARF,__swift5_builtin:
-CHECK-NEXT:  10000e244 00000000 09000000 08000100 10000000  ................
-CHECK-NEXT:  10000e254 fe000000                             ....
diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
index b9682b83ac67a..a8dfde0865377 100644
--- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
+++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
@@ -30,7 +30,6 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/BinaryFormat/MachO.h"
-#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/CodeGen/AccelTable.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DIE.h"
@@ -175,7 +174,7 @@ bool DwarfLinkerForBinary::createStreamer(const Triple &TheTriple,
       [&](const Twine &Warning, StringRef Context, const DWARFDie *) {
         warn(Warning, Context);
       });
-  return Streamer->init(TheTriple, "__DWARF");
+  return Streamer->init(TheTriple);
 }
 
 ErrorOr<const object::ObjectFile &>
@@ -296,77 +295,6 @@ DwarfLinkerForBinary::loadObject(const DebugMapObject &Obj,
   return ErrorOrObj.getError();
 }
 
-static bool binaryHasSwiftReflectionSections(const DebugMap &Map,
-                                             const LinkOptions &Options,
-                                             BinaryHolder &BinHolder) {
-  // If the input binary has swift5 reflection sections, there is no need to
-  // copy them to the .dSYM. Only copy them for binaries where the linker
-  // omitted the reflection metadata.
-  if (!Map.getBinaryPath().empty() &&
-      Options.FileType == OutputFileType::Object) {
-
-    auto ObjectEntry = BinHolder.getObjectEntry(Map.getBinaryPath());
-    // If ObjectEntry or Object has an error, no binary exists, therefore no
-    // reflection sections exist.
-    if (!ObjectEntry) {
-      // Any errors will be diagnosed later in the main loop, ignore them here.
-      llvm::consumeError(ObjectEntry.takeError());
-      return false;
-    }
-
-    auto Object =
-        ObjectEntry->getObjectAs<object::MachOObjectFile>(Map.getTriple());
-    if (!Object) {
-      // Any errors will be diagnosed later in the main loop, ignore them here.
-      llvm::consumeError(Object.takeError());
-      return false;
-    }
-
-    for (auto &Section : Object->sections()) {
-      llvm::Expected<llvm::StringRef> NameOrErr =
-          Object->getSectionName(Section.getRawDataRefImpl());
-      if (!NameOrErr) {
-        llvm::consumeError(NameOrErr.takeError());
-        continue;
-      }
-      NameOrErr->consume_back("__TEXT");
-      if (Object->mapReflectionSectionNameToEnumValue(*NameOrErr) !=
-          llvm::swift::Swift5ReflectionSectionKind::Unknown) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-static void
-copySwiftReflectionMetadata(const llvm::dsymutil::DebugMapObject *Obj,
-                            DwarfStreamer *Streamer) {
-  auto OF =
-      llvm::object::ObjectFile::createObjectFile(Obj->getObjectFilename());
-  if (!OF) {
-    llvm::consumeError(OF.takeError());
-    return;
-  } else if (auto *MO =
-                 dyn_cast<llvm::object::MachOObjectFile>(OF->getBinary())) {
-    for (auto &Section : OF->getBinary()->sections()) {
-      llvm::Expected<llvm::StringRef> NameOrErr =
-          MO->getSectionName(Section.getRawDataRefImpl());
-      if (!NameOrErr) {
-        llvm::consumeError(NameOrErr.takeError());
-        continue;
-      }
-      llvm::Expected<llvm::StringRef> SectionContents = Section.getContents();
-      if (SectionContents) {
-        NameOrErr->consume_back("__TEXT");
-        Streamer->emitSwiftReflectionSection(
-            MO->mapReflectionSectionNameToEnumValue(*NameOrErr),
-            *SectionContents, Section.getAlignment(), Section.getSize());
-      }
-    }
-  }
-}
-
 bool DwarfLinkerForBinary::link(const DebugMap &Map) {
   if (!createStreamer(Map.getTriple(), OutFile))
     return false;
@@ -461,19 +389,8 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) {
         llvm_unreachable("Unhandled DebugMap object");
       });
   GeneralLinker.setSwiftInterfacesMap(&ParseableSwiftInterfaces);
-  bool ReflectionSectionsPresentInBinary = false;
-  // If there is no output specified, no point in checking the binary for swift5
-  // reflection sections.
-  if (!Options.NoOutput) {
-    ReflectionSectionsPresentInBinary =
-        binaryHasSwiftReflectionSections(Map, Options, BinHolder);
-  }
 
   for (const auto &Obj : Map.objects()) {
-
-    if (!ReflectionSectionsPresentInBinary)
-      copySwiftReflectionMetadata(Obj.get(), Streamer.get());
-
     // N_AST objects (swiftmodule files) should get dumped directly into the
     // appropriate DWARF section.
     if (Obj->getType() == MachO::N_AST) {
@@ -514,6 +431,7 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) {
 
       continue;
     }
+
     if (auto ErrorOrObj = loadObject(*Obj, Map, RL))
       GeneralLinker.addObjectFile(*ErrorOrObj);
     else {

From e39c262979e6870fcd74ae8ac7428b940f3b6c07 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Fri, 21 Jan 2022 13:53:03 -0800
Subject: [PATCH 222/946] Revert "[gn build] Set HAVE_MALLINFO2=1"

This reverts commit 9f4cc5a6bb56b42bb90ea31f10ecf8fed8f07653.

Breaks http://45.33.8.238/macm1/26108/step_4.txt.
---
 llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
index 3c882a80fee19..3efbdde7d85b4 100644
--- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
@@ -89,7 +89,7 @@ write_cmake_config("config") {
     "HAVE_LIBPFM=",
     "HAVE_LIBPSAPI=",
     "HAVE_MALLCTL=",
-    "HAVE_MALLINFO2=1",
+    "HAVE_MALLINFO2=",
     "HAVE_SIGNAL_H=1",
     "HAVE_STD_IS_TRIVIALLY_COPYABLE=1",
     "HAVE_STRERROR=1",

From 705d8c49f9be82415a9cdd793cda405333fba71e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= <david.bolvansky@gmail.com>
Date: Fri, 21 Jan 2022 22:59:14 +0100
Subject: [PATCH 223/946] [x86] regenerate smul-with-overflow.ll; add test
 which failed with llvm 13 and lower (NFC)

---
 llvm/test/CodeGen/X86/smul-with-overflow.ll | 734 +++++++++++++++++++-
 1 file changed, 716 insertions(+), 18 deletions(-)

diff --git a/llvm/test/CodeGen/X86/smul-with-overflow.ll b/llvm/test/CodeGen/X86/smul-with-overflow.ll
index 7154a896a359d..1feb6f4026299 100644
--- a/llvm/test/CodeGen/X86/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/X86/smul-with-overflow.ll
@@ -1,9 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-- | FileCheck %s
 
 @ok = internal constant [4 x i8] c"%d\0A\00"
 @no = internal constant [4 x i8] c"no\0A\00"
 
 define i1 @test1(i32 %v1, i32 %v2) nounwind {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    imull {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    jno .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %overflow
+; CHECK-NEXT:    pushl $no
+; CHECK-NEXT:    calll printf@PLT
+; CHECK-NEXT:    addl $4, %esp
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    retl
+; CHECK-NEXT:  .LBB0_1: # %normal
+; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    pushl $ok
+; CHECK-NEXT:    calll printf@PLT
+; CHECK-NEXT:    addl $8, %esp
+; CHECK-NEXT:    movb $1, %al
+; CHECK-NEXT:    retl
 entry:
   %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
   %sum = extractvalue {i32, i1} %t, 0
@@ -17,12 +36,27 @@ normal:
 overflow:
   %t2 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @no, i32 0, i32 0) ) nounwind
   ret i1 false
-; CHECK-LABEL: test1:
-; CHECK: imull
-; CHECK-NEXT: jno
 }
 
 define i1 @test2(i32 %v1, i32 %v2) nounwind {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    imull {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    jno .LBB1_2
+; CHECK-NEXT:  # %bb.1: # %overflow
+; CHECK-NEXT:    pushl $no
+; CHECK-NEXT:    calll printf@PLT
+; CHECK-NEXT:    addl $4, %esp
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    retl
+; CHECK-NEXT:  .LBB1_2: # %normal
+; CHECK-NEXT:    pushl %eax
+; CHECK-NEXT:    pushl $ok
+; CHECK-NEXT:    calll printf@PLT
+; CHECK-NEXT:    addl $8, %esp
+; CHECK-NEXT:    movb $1, %al
+; CHECK-NEXT:    retl
 entry:
   %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2)
   %sum = extractvalue {i32, i1} %t, 0
@@ -36,48 +70,712 @@ overflow:
 normal:
   %t1 = tail call i32 (i8*, ...) @printf( i8* getelementptr ([4 x i8], [4 x i8]* @ok, i32 0, i32 0), i32 %sum ) nounwind
   ret i1 true
-; CHECK-LABEL: test2:
-; CHECK: imull
-; CHECK-NEXT: jno
 }
 
 declare i32 @printf(i8*, ...) nounwind
 declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
 
 define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+; CHECK-LABEL: test3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    addl %eax, %eax
+; CHECK-NEXT:    retl
 entry:
 	%tmp0 = add i32 %b, %a
 	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 2)
 	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
 	ret i32 %tmp2
-; CHECK-LABEL: test3:
-; CHECK: addl
-; CHECK-NEXT: addl
-; CHECK-NEXT: ret
 }
 
 define i32 @test4(i32 %a, i32 %b) nounwind readnone {
+; CHECK-LABEL: test4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    imull $4, %eax, %eax
+; CHECK-NEXT:    retl
 entry:
 	%tmp0 = add i32 %b, %a
 	%tmp1 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %tmp0, i32 4)
 	%tmp2 = extractvalue { i32, i1 } %tmp1, 0
 	ret i32 %tmp2
-; CHECK-LABEL: test4:
-; CHECK: addl
-; CHECK: mull
-; CHECK-NEXT: ret
 }
 
 declare { i63, i1 } @llvm.smul.with.overflow.i63(i63, i63) nounwind readnone
 
+; Was returning false, should return true (not constant folded yet though).
+; PR13991
 define i1 @test5() nounwind {
+; CHECK-LABEL: test5:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movb $1, %al
+; CHECK-NEXT:    retl
 entry:
   %res = call { i63, i1 } @llvm.smul.with.overflow.i63(i63 4, i63 4611686018427387903)
   %sum = extractvalue { i63, i1 } %res, 0
   %overflow = extractvalue { i63, i1 } %res, 1
   ret i1 %overflow
-; Was returning false, should return true (not constant folded yet though).
-; PR13991
-; CHECK-LABEL: test5:
-; CHECK-NOT: xorb
+}
+
+
+
+declare { i129, i1 } @llvm.smul.with.overflow.i129(i129, i129)
+
+define { i129, i1 } @smul_ovf(i129 %x, i129 %y) nounwind {
+; CHECK-LABEL: smul_ovf:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    pushl %ebx
+; CHECK-NEXT:    pushl %edi
+; CHECK-NEXT:    pushl %esi
+; CHECK-NEXT:    subl $164, %esp
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    negl %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT:    andl $1, %ebp
+; CHECK-NEXT:    negl %ebp
+; CHECK-NEXT:    movl %ebp, %eax
+; CHECK-NEXT:    mull {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl %edx, %ebx
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    movl %eax, %esi
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    addl %edx, %ecx
+; CHECK-NEXT:    adcl $0, %ebx
+; CHECK-NEXT:    movl %ebp, %eax
+; CHECK-NEXT:    mull {{[0-9]+}}(%esp)
+; CHECK-NEXT:    addl %eax, %ecx
+; CHECK-NEXT:    movl %ecx, %edi
+; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl %edx, %ebx
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    addl %eax, %ebx
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movzbl %cl, %eax
+; CHECK-NEXT:    adcl %edx, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    addl %ebx, %esi
+; CHECK-NEXT:    adcl %eax, %edi
+; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, %ebx
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %ebp, %eax
+; CHECK-NEXT:    movl %ebp, %ebx
+; CHECK-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    mull {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    movl %eax, %ebp
+; CHECK-NEXT:    addl %edx, %ebp
+; CHECK-NEXT:    adcl $0, %ecx
+; CHECK-NEXT:    movl %ebx, %eax
+; CHECK-NEXT:    mull {{[0-9]+}}(%esp)
+; CHECK-NEXT:    addl %eax, %ebp
+; CHECK-NEXT:    adcl %edx, %ecx
+; CHECK-NEXT:    setb %bl
+; CHECK-NEXT:    addl %eax, %ecx
+; CHECK-NEXT:    movzbl %bl, %eax
+; CHECK-NEXT:    adcl %edx, %eax
+; CHECK-NEXT:    addl %edi, %esi
+; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    movl %ecx, %ebx
+; CHECK-NEXT:    adcl $0, %ebx
+; CHECK-NEXT:    movl %eax, %esi
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    adcl $0, %esi
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    addl %edi, %ebx
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl %ebp, %esi
+; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    adcl %ecx, %eax
+; CHECK-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, %edx
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    mull %esi
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT:    movl %ebp, %eax
+; CHECK-NEXT:    mull %esi
+; CHECK-NEXT:    movl %edx, %esi
+; CHECK-NEXT:    movl %eax, %ebx
+; CHECK-NEXT:    addl %ecx, %ebx
+; CHECK-NEXT:    adcl $0, %esi
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT:    mull %edi
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    addl %ebx, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl %esi, %ecx
+; CHECK-NEXT:    setb %bl
+; CHECK-NEXT:    movl %ebp, %eax
+; CHECK-NEXT:    mull %edi
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movzbl %bl, %eax
+; CHECK-NEXT:    adcl %eax, %edx
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    mull %esi
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    mull %esi
+; CHECK-NEXT:    movl %edx, %esi
+; CHECK-NEXT:    movl %eax, %ebp
+; CHECK-NEXT:    addl %ecx, %ebp
+; CHECK-NEXT:    adcl $0, %esi
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT:    mull %ebx
+; CHECK-NEXT:    movl %edx, %edi
+; CHECK-NEXT:    addl %ebp, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl %esi, %edi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    mull %ebx
+; CHECK-NEXT:    movl %eax, %esi
+; CHECK-NEXT:    addl %edi, %esi
+; CHECK-NEXT:    movzbl %cl, %eax
+; CHECK-NEXT:    adcl %eax, %edx
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT:    mull %ebx
+; CHECK-NEXT:    movl %edx, %edi
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    mull %ebx
+; CHECK-NEXT:    movl %edx, %ebp
+; CHECK-NEXT:    movl %eax, %ebx
+; CHECK-NEXT:    addl %edi, %ebx
+; CHECK-NEXT:    adcl $0, %ebp
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT:    mull %edi
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    addl %ebx, %eax
+; CHECK-NEXT:    movl %eax, %ebx
+; CHECK-NEXT:    adcl %ebp, %ecx
+; CHECK-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    mull %edi
+; CHECK-NEXT:    movl %edx, %ebp
+; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    addl %ecx, %edi
+; CHECK-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-NEXT:    adcl %eax, %ebp
+; CHECK-NEXT:    addl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, %edi
+; CHECK-NEXT:    adcl $0, %ebp
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; CHECK-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    mull %esi
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    mull %esi
+; CHECK-NEXT:    movl %edx, %esi
+; CHECK-NEXT:    movl %eax, %ebx
+; CHECK-NEXT:    addl %ecx, %ebx
+; CHECK-NEXT:    adcl $0, %esi
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    mull %ecx
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    addl %ebx, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl %esi, %ecx
+; CHECK-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    mull {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl %eax, %ebx
+; CHECK-NEXT:    addl %ecx, %ebx
+; CHECK-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-NEXT:    adcl %eax, %edx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    addl %edi, %ecx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT:    adcl %ebp, %esi
+; CHECK-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-NEXT:    adcl %eax, %ebx
+; CHECK-NEXT:    adcl $0, %edx
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    adcl $0, (%esp) # 4-byte Folded Spill
+; CHECK-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    mull %ecx
+; CHECK-NEXT:    movl %edx, %esi
+; CHECK-NEXT:    movl %eax, %ebx
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    mull %ecx
+; CHECK-NEXT:    movl %eax, %ebp
+; CHECK-NEXT:    addl %esi, %ebp
+; CHECK-NEXT:    movl %edx, %edi
+; CHECK-NEXT:    adcl $0, %edi
+; CHECK-NEXT:    addl %ebx, %ebp
+; CHECK-NEXT:    adcl %esi, %edi
+; CHECK-NEXT:    setb %bl
+; CHECK-NEXT:    addl %eax, %edi
+; CHECK-NEXT:    movzbl %bl, %eax
+; CHECK-NEXT:    adcl %edx, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    mull %ecx
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %eax, %ebx
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    mull %ecx
+; CHECK-NEXT:    movl %edx, %esi
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    addl %eax, %ecx
+; CHECK-NEXT:    adcl $0, %edx
+; CHECK-NEXT:    addl %ebx, %ecx
+; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl %eax, %edx
+; CHECK-NEXT:    setb %bl
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movzbl %bl, %eax
+; CHECK-NEXT:    adcl %esi, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT:    addl %edx, %esi
+; CHECK-NEXT:    movl %ebp, %ebx
+; CHECK-NEXT:    adcl %eax, %ebx
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    adcl $0, %ecx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; CHECK-NEXT:    adcl $0, %edx
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT:    adcl $0, %esi
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    adcl $0, %eax
+; CHECK-NEXT:    addl %ecx, %esi
+; CHECK-NEXT:    adcl %edx, %eax
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:    adcl %ebp, %eax
+; CHECK-NEXT:    movl %eax, %esi
+; CHECK-NEXT:    movzbl %cl, %ecx
+; CHECK-NEXT:    adcl %edi, %ecx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; CHECK-NEXT:    adcl $0, %edi
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, %edx
+; CHECK-NEXT:    adcl $0, %esi
+; CHECK-NEXT:    adcl $0, %ecx
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    adcl $0, %eax
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl (%esp), %ecx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    mull {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; CHECK-NEXT:    movl %eax, %ebx
+; CHECK-NEXT:    addl %edx, %ebx
+; CHECK-NEXT:    movl %edx, %ebp
+; CHECK-NEXT:    adcl $0, %ebp
+; CHECK-NEXT:    addl %eax, %ebx
+; CHECK-NEXT:    movl %ebx, %esi
+; CHECK-NEXT:    adcl %edx, %ebp
+; CHECK-NEXT:    setb %ch
+; CHECK-NEXT:    addl %eax, %ebp
+; CHECK-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movzbl %ch, %ecx
+; CHECK-NEXT:    adcl %edx, %ecx
+; CHECK-NEXT:    movl %eax, %ebx
+; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    addl %ebp, %ebx
+; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl %ecx, %esi
+; CHECK-NEXT:    movl %ebp, %eax
+; CHECK-NEXT:    adcl $0, %eax
+; CHECK-NEXT:    movl %ecx, %ebp
+; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, %ecx
+; CHECK-NEXT:    addl %edi, %ebx
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl %edx, %esi
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; CHECK-NEXT:    movl %edi, %ebx
+; CHECK-NEXT:    adcl $0, %ebx
+; CHECK-NEXT:    movl %ebp, %edx
+; CHECK-NEXT:    adcl $0, %edx
+; CHECK-NEXT:    addl %eax, %ebx
+; CHECK-NEXT:    adcl %ecx, %edx
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    addl %ecx, %ebx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; CHECK-NEXT:    adcl %ebp, %edx
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    adcl %edi, %eax
+; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    adcl $0, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    addl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    movl (%esp), %eax # 4-byte Reload
+; CHECK-NEXT:    adcl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; CHECK-NEXT:    adcl %eax, %ebx
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, %edx
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, %edi
+; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; CHECK-NEXT:    mull %ebp
+; CHECK-NEXT:    movl %edx, %esi
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    mull %ebp
+; CHECK-NEXT:    movl %ebp, %edi
+; CHECK-NEXT:    movl %eax, %ebx
+; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    addl %esi, %ebx
+; CHECK-NEXT:    movl %edx, %ebp
+; CHECK-NEXT:    adcl $0, %ebp
+; CHECK-NEXT:    addl %ecx, %ebx
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl %esi, %ebp
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    addl %eax, %ebp
+; CHECK-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movzbl %cl, %eax
+; CHECK-NEXT:    adcl %edx, %eax
+; CHECK-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    mull %edi
+; CHECK-NEXT:    movl %edx, %esi
+; CHECK-NEXT:    movl %eax, %ebx
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    mull %edi
+; CHECK-NEXT:    movl %edx, %edi
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    addl %esi, %ecx
+; CHECK-NEXT:    movl %edi, %esi
+; CHECK-NEXT:    adcl $0, %esi
+; CHECK-NEXT:    addl %ebx, %ecx
+; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl %edx, %esi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    addl %eax, %esi
+; CHECK-NEXT:    movzbl %cl, %edx
+; CHECK-NEXT:    adcl %edi, %edx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    addl %esi, %ecx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    adcl %edx, %eax
+; CHECK-NEXT:    movl %ebp, %edi
+; CHECK-NEXT:    adcl $0, %edi
+; CHECK-NEXT:    movl (%esp), %ebx # 4-byte Reload
+; CHECK-NEXT:    movl %ebx, %ebp
+; CHECK-NEXT:    adcl $0, %ebp
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, %esi
+; CHECK-NEXT:    adcl $0, %edx
+; CHECK-NEXT:    addl %edi, %esi
+; CHECK-NEXT:    adcl %ebp, %edx
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; CHECK-NEXT:    addl %edi, %esi
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movzbl %al, %eax
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, %ebx
+; CHECK-NEXT:    movl %ebx, (%esp) # 4-byte Spill
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    imull %edx, %eax
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    imull %edx, %eax
+; CHECK-NEXT:    addl %ecx, %eax
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    imull %edx, %eax
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; CHECK-NEXT:    movl %eax, %ebp
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    imull %edx, %eax
+; CHECK-NEXT:    addl %ebp, %eax
+; CHECK-NEXT:    movl %edi, %ebp
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; CHECK-NEXT:    adcl %ecx, %eax
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    imull %edx
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    addl %eax, %eax
+; CHECK-NEXT:    adcl %edx, %ecx
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; CHECK-NEXT:    adcl %eax, %ebp
+; CHECK-NEXT:    adcl %ecx, %edi
+; CHECK-NEXT:    addl %esi, %ebx
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; CHECK-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl (%esp), %edi # 4-byte Folded Reload
+; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    mull {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl %eax, %esi
+; CHECK-NEXT:    movl %eax, %ebx
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %edx, %edi
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    addl %edx, %esi
+; CHECK-NEXT:    adcl $0, %edi
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    movl %ecx, %ebp
+; CHECK-NEXT:    mull {{[0-9]+}}(%esp)
+; CHECK-NEXT:    addl %eax, %esi
+; CHECK-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; CHECK-NEXT:    adcl %edx, %edi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    addl %eax, %edi
+; CHECK-NEXT:    movzbl %cl, %ecx
+; CHECK-NEXT:    adcl %edx, %ecx
+; CHECK-NEXT:    movl %ebx, %eax
+; CHECK-NEXT:    addl %edi, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    adcl %ecx, %eax
+; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, %edi
+; CHECK-NEXT:    adcl $0, %ecx
+; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl %ebp, %esi
+; CHECK-NEXT:    movl %ebp, %eax
+; CHECK-NEXT:    mull {{[0-9]+}}(%esp)
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    movl %eax, %ebp
+; CHECK-NEXT:    movl %eax, %ebx
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    addl %edx, %ebp
+; CHECK-NEXT:    adcl $0, %ecx
+; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    mull {{[0-9]+}}(%esp)
+; CHECK-NEXT:    addl %eax, %ebp
+; CHECK-NEXT:    adcl %edx, %ecx
+; CHECK-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; CHECK-NEXT:    addl %eax, %ecx
+; CHECK-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 1-byte Folded Reload
+; CHECK-NEXT:    adcl %edx, %esi
+; CHECK-NEXT:    movl %ebx, %edx
+; CHECK-NEXT:    addl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    adcl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; CHECK-NEXT:    movl %ecx, %ebx
+; CHECK-NEXT:    adcl $0, %ebx
+; CHECK-NEXT:    movl %esi, %eax
+; CHECK-NEXT:    adcl $0, %eax
+; CHECK-NEXT:    addl %edi, %ebx
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; CHECK-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; CHECK-NEXT:    addl %edx, %ebx
+; CHECK-NEXT:    adcl %ebp, %eax
+; CHECK-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 1-byte Folded Reload
+; CHECK-NEXT:    adcl %ecx, %edi
+; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl $0, %esi
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT:    imull %ecx, %edi
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; CHECK-NEXT:    movl %edi, %ebp
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; CHECK-NEXT:    imull %ecx, %edi
+; CHECK-NEXT:    addl %ebp, %edi
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT:    imull %ecx, %ebp
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; CHECK-NEXT:    imull {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    addl %ebp, %ecx
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; CHECK-NEXT:    adcl %edi, %ecx
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; CHECK-NEXT:    addl %ebx, %ebp
+; CHECK-NEXT:    movl %ebp, %ebx
+; CHECK-NEXT:    adcl %eax, %edi
+; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; CHECK-NEXT:    adcl %esi, %ecx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; CHECK-NEXT:    movl (%esp), %eax # 4-byte Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; CHECK-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; CHECK-NEXT:    movl %ebp, %ebx
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; CHECK-NEXT:    adcl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; CHECK-NEXT:    sarl $31, %eax
+; CHECK-NEXT:    xorl %eax, %ebx
+; CHECK-NEXT:    xorl %eax, %edx
+; CHECK-NEXT:    orl %ebx, %edx
+; CHECK-NEXT:    xorl %eax, %ebp
+; CHECK-NEXT:    orl %edx, %ebp
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; CHECK-NEXT:    xorl %eax, %ebx
+; CHECK-NEXT:    xorl %eax, %ecx
+; CHECK-NEXT:    orl %ebx, %ecx
+; CHECK-NEXT:    xorl %eax, %esi
+; CHECK-NEXT:    orl %ecx, %esi
+; CHECK-NEXT:    xorl %eax, %edi
+; CHECK-NEXT:    xorl (%esp), %eax # 4-byte Folded Reload
+; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    orl %ebp, %eax
+; CHECK-NEXT:    orl %edi, %eax
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    andl $1, %ecx
+; CHECK-NEXT:    movl %ecx, %edx
+; CHECK-NEXT:    negl %edx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; CHECK-NEXT:    xorl %edx, %ebx
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; CHECK-NEXT:    xorl %edx, %esi
+; CHECK-NEXT:    orl %ebx, %esi
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; CHECK-NEXT:    xorl %edx, %ebx
+; CHECK-NEXT:    orl %esi, %ebx
+; CHECK-NEXT:    xorl %edi, %edx
+; CHECK-NEXT:    orl %ebx, %edx
+; CHECK-NEXT:    orl %eax, %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; CHECK-NEXT:    movl %edx, 4(%eax)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; CHECK-NEXT:    movl %edx, (%eax)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; CHECK-NEXT:    movl %edx, 8(%eax)
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; CHECK-NEXT:    movl %edx, 12(%eax)
+; CHECK-NEXT:    movb %cl, 16(%eax)
+; CHECK-NEXT:    setne 20(%eax)
+; CHECK-NEXT:    addl $164, %esp
+; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    popl %edi
+; CHECK-NEXT:    popl %ebx
+; CHECK-NEXT:    popl %ebp
+; CHECK-NEXT:    retl $4
+  %r = tail call { i129, i1 } @llvm.smul.with.overflow.i129(i129 %x, i129 %y)
+  ret { i129, i1 } %r
 }

From 6df05697ca1d3e691f674014f7728ff71147bbe7 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Fri, 21 Jan 2022 11:55:17 -0800
Subject: [PATCH 224/946] [gn build] Set HAVE_MALLINFO2=1

I'm seeing deprecated warnings due to using mallinfo() instead of
mallinfo2().

  ../../llvm/lib/Support/Unix/Process.inc:98:10: warning: 'mallinfo' is deprecated [-Wdeprecated-declarations]
    mi = ::mallinfo();

mallinfo2() is part of glibc 2.33 which was released in Feb 2021, which
is fairly recent but I think gn users should be using fairly up to date
glibcs.

If this breaks people we could make this a gn arg instead.

Differential Revision: https://reviews.llvm.org/D117916
---
 llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
index 3efbdde7d85b4..07b6453ea9b8a 100644
--- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
@@ -89,7 +89,6 @@ write_cmake_config("config") {
     "HAVE_LIBPFM=",
     "HAVE_LIBPSAPI=",
     "HAVE_MALLCTL=",
-    "HAVE_MALLINFO2=",
     "HAVE_SIGNAL_H=1",
     "HAVE_STD_IS_TRIVIALLY_COPYABLE=1",
     "HAVE_STRERROR=1",
@@ -143,6 +142,7 @@ write_cmake_config("config") {
       "HAVE_LINK_H=1",
       "HAVE_LSEEK64=1",
       "HAVE_MALLINFO=1",
+      "HAVE_MALLINFO2=1",
       "HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=1",
     ]
   } else {
@@ -151,6 +151,7 @@ write_cmake_config("config") {
       "HAVE_LINK_H=",
       "HAVE_LSEEK64=",
       "HAVE_MALLINFO=",
+      "HAVE_MALLINFO2=",
       "HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=",
     ]
   }

From 653b007dc186845699d330c66dc9dfb3aaf396df Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Fri, 21 Jan 2022 23:18:23 +0100
Subject: [PATCH 225/946] [CodeComplete] fix nullptr crash in  612f5ed8823120

---
 clang/lib/Sema/SemaCodeComplete.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index cc08dee266136..b86bfe869c69d 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -5519,9 +5519,10 @@ QualType getApproximateType(const Expr *E) {
                         : getApproximateType(CDSME->getBase());
     if (CDSME->isArrow() && !Base.isNull())
       Base = Base->getPointeeType(); // could handle unique_ptr etc here?
-    auto *RD = Base.isNull()
-                   ? nullptr
-                   : llvm::dyn_cast<CXXRecordDecl>(getAsRecordDecl(Base));
+    auto *RD =
+        Base.isNull()
+            ? nullptr
+            : llvm::dyn_cast_or_null<CXXRecordDecl>(getAsRecordDecl(Base));
     if (RD && RD->isCompleteDefinition()) {
       // Look up member heuristically, including in bases.
       for (const auto *Member : RD->lookupDependentName(

From b796709a62da2a09ab753236deb3c9f2fc14cf47 Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Fri, 21 Jan 2022 14:40:36 -0800
Subject: [PATCH 226/946] Only run MLIR PyTACO tests when python bindings are
 enabled.

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D117930
---
 .../test/Integration/Dialect/SparseTensor/taco/lit.local.cfg | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg

diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg b/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg
new file mode 100644
index 0000000000000..cf04454dea6ef
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg
@@ -0,0 +1,5 @@
+# Disable ASAN's leak detection for python OpsDSL tests.
+config.environment['ASAN_OPTIONS'] = 'detect_leaks=0'
+# Only run when python bindings are enabled.
+if not config.enable_bindings_python:
+  config.unsupported = True

From ba093fe58b152e12049663e87536ea15a6de638d Mon Sep 17 00:00:00 2001
From: Bixia Zheng <bixia@google.com>
Date: Fri, 21 Jan 2022 15:15:06 -0800
Subject: [PATCH 227/946] Fix a commit.

Reviewed By: aartbik

Differential Revision: https://reviews.llvm.org/D117932
---
 mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg b/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg
index cf04454dea6ef..7137d0fba95f8 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg
+++ b/mlir/test/Integration/Dialect/SparseTensor/taco/lit.local.cfg
@@ -1,4 +1,4 @@
-# Disable ASAN's leak detection for python OpsDSL tests.
+# Disable ASAN's leak detection for python taco tests.
 config.environment['ASAN_OPTIONS'] = 'detect_leaks=0'
 # Only run when python bindings are enabled.
 if not config.enable_bindings_python:

From 6ba1fb04214bad08b8b19afba91798818ea276e5 Mon Sep 17 00:00:00 2001
From: Zequan Wu <zequanwu@google.com>
Date: Fri, 21 Jan 2022 15:09:42 -0800
Subject: [PATCH 228/946] [llvm-pdbutil] Fix gaps ouput.

---
 llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
index f284b87128640..e6b5d21f36e5f 100644
--- a/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
+++ b/llvm/tools/llvm-pdbutil/MinimalSymbolDumper.cpp
@@ -586,8 +586,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
   AutoIndent Indent(P, 7);
   P.formatLine("offset = {0}, range = {1}", Def.Hdr.Offset,
                formatRange(Def.Range));
-  P.formatLine("gaps = {2}", Def.Hdr.Offset,
-               formatGaps(P.getIndentLevel() + 9, Def.Gaps));
+  P.formatLine("gaps = [{0}]", formatGaps(P.getIndentLevel() + 9, Def.Gaps));
   return Error::success();
 }
 
@@ -599,7 +598,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
                formatRegisterId(Def.Hdr.Register, CompilationCPU),
                int32_t(Def.Hdr.BasePointerOffset), Def.offsetInParent(),
                Def.hasSpilledUDTMember());
-  P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range),
+  P.formatLine("range = {0}, gaps = [{1}]", formatRange(Def.Range),
                formatGaps(P.getIndentLevel() + 9, Def.Gaps));
   return Error::success();
 }
@@ -626,7 +625,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
   P.formatLine("register = {0}, may have no name = {1}, offset in parent = {2}",
                formatRegisterId(Def.Hdr.Register, CompilationCPU), NoName,
                uint32_t(Def.Hdr.OffsetInParent));
-  P.formatLine("range = {0}, gaps = {1}", formatRange(Def.Range),
+  P.formatLine("range = {0}, gaps = [{1}]", formatRange(Def.Range),
                formatGaps(P.getIndentLevel() + 9, Def.Gaps));
   return Error::success();
 }
@@ -636,7 +635,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR,
   AutoIndent Indent(P, 7);
   P.formatLine("program = {0}, offset in parent = {1}, range = {2}",
                Def.Program, Def.OffsetInParent, formatRange(Def.Range));
-  P.formatLine("gaps = {0}", formatGaps(P.getIndentLevel() + 9, Def.Gaps));
+  P.formatLine("gaps = [{0}]", formatGaps(P.getIndentLevel() + 9, Def.Gaps));
   return Error::success();
 }
 
@@ -644,7 +643,7 @@ Error MinimalSymbolDumper::visitKnownRecord(CVSymbol &CVR, DefRangeSym &Def) {
   AutoIndent Indent(P, 7);
   P.formatLine("program = {0}, range = {1}", Def.Program,
                formatRange(Def.Range));
-  P.formatLine("gaps = {0}", formatGaps(P.getIndentLevel() + 9, Def.Gaps));
+  P.formatLine("gaps = [{0}]", formatGaps(P.getIndentLevel() + 9, Def.Gaps));
   return Error::success();
 }
 

From 58ee14e29e98384bba6d2e1c1789b7f8e3060d24 Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com@gmail.com>
Date: Thu, 20 Jan 2022 14:18:20 -0800
Subject: [PATCH 229/946] [lldb] Fix timer logging inverted quiet condition

The logic of `g_quiet` was inverted in D26243. This corrects the issue.

Without this, running `log timers enable` produces a high volume of incremental
timer output.

Differential Revision: https://reviews.llvm.org/D117837
---
 lldb/source/Utility/Timer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lldb/source/Utility/Timer.cpp b/lldb/source/Utility/Timer.cpp
index 2f3afe4c87037..b190f35007d50 100644
--- a/lldb/source/Utility/Timer.cpp
+++ b/lldb/source/Utility/Timer.cpp
@@ -63,7 +63,7 @@ Timer::Timer(Timer::Category &category, const char *format, ...)
   TimerStack &stack = GetTimerStackForCurrentThread();
 
   stack.push_back(this);
-  if (g_quiet && stack.size() <= g_display_depth) {
+  if (!g_quiet && stack.size() <= g_display_depth) {
     std::lock_guard<std::mutex> lock(GetFileMutex());
 
     // Indent
@@ -89,7 +89,7 @@ Timer::~Timer() {
   Signposts->endInterval(this, m_category.GetName());
 
   TimerStack &stack = GetTimerStackForCurrentThread();
-  if (g_quiet && stack.size() <= g_display_depth) {
+  if (!g_quiet && stack.size() <= g_display_depth) {
     std::lock_guard<std::mutex> lock(GetFileMutex());
     ::fprintf(stdout, "%*s%.9f sec (%.9f sec)\n",
               int(stack.size() - 1) * TIMER_INDENT_AMOUNT, "",

From efa15f417847439eb8884f1ef47e163fa450ac7c Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik@google.com>
Date: Thu, 20 Jan 2022 17:27:23 -0800
Subject: [PATCH 230/946] [mlir][sparse] add ability for sparse tensor output

Rationale:
Although file I/O is a bit alien to MLIR itself, we provide two convenient ways
for sparse tensor I/O. The input part was already there (behind the swiss army
knife sparse_tensor.new). Now we have a sparse_tensor.out to write out data. As
before, the ops are kept vague and may change in the future. For now this
allows us to compare TACO vs MLIR very easily.

Reviewed By: bixia

Differential Revision: https://reviews.llvm.org/D117850
---
 .../SparseTensor/IR/SparseTensorOps.td        | 20 ++++++
 .../SparseTensor/IR/SparseTensorDialect.cpp   |  6 ++
 .../Transforms/SparseTensorConversion.cpp     | 70 +++++++++++++++----
 .../lib/ExecutionEngine/SparseTensorUtils.cpp | 48 +++++++++++++
 .../test/Dialect/SparseTensor/conversion.mlir | 24 +++++++
 mlir/test/Dialect/SparseTensor/invalid.mlir   |  8 +++
 mlir/test/Dialect/SparseTensor/roundtrip.mlir | 14 ++++
 7 files changed, 177 insertions(+), 13 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
index b7fce5b3137c1..1209a70a72c22 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
+++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td
@@ -351,4 +351,24 @@ def SparseTensor_ReleaseOp : SparseTensor_Op<"release", []>,
   let assemblyFormat = "$tensor attr-dict `:` type($tensor)";
 }
 
+def SparseTensor_OutOp : SparseTensor_Op<"out", []>,
+    Arguments<(ins AnyType:$tensor, AnyType:$dest)> {
+  string summary = "Outputs a sparse tensor to the given destination";
+  string description = [{
+    Outputs the contents of a sparse tensor to the destination defined by an
+    opaque pointer provided by `dest`. For targets that have access to a file
+    system, for example, this pointer may specify a filename (or file) for output.
+    The form of the operation is kept deliberately very general to allow for
+    alternative implementations in the future, such as sending the contents to
+    a buffer defined by a pointer.
+
+    Example:
+
+    ```mlir
+    sparse_tensor.out %t, %dest : tensor<1024x1024xf64, #CSR>, !Dest
+    ```
+  }];
+  let assemblyFormat = "$tensor `,` $dest attr-dict `:` type($tensor) `,` type($dest)";
+}
+
 #endif // SPARSETENSOR_OPS
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index 8f44b5b1b9e22..8a7942c8d666a 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -329,6 +329,12 @@ static LogicalResult verify(ReleaseOp op) {
   return success();
 }
 
+static LogicalResult verify(OutOp op) {
+  if (!getSparseTensorEncoding(op.tensor().getType()))
+    return op.emitError("expected a sparse tensor for output");
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // TensorDialect Methods.
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
index a28f9ac70b318..94e87b3b79b7f 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodegenUtils.h"
+
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
@@ -189,8 +190,8 @@ static Value genBuffer(ConversionPatternRewriter &rewriter, Location loc,
 /// computation.
 static void newParams(ConversionPatternRewriter &rewriter,
                       SmallVector<Value, 8> &params, Operation *op,
-                      SparseTensorEncodingAttr &enc, Action action,
-                      ValueRange szs, Value ptr = Value()) {
+                      ShapedType stp, SparseTensorEncodingAttr &enc,
+                      Action action, ValueRange szs, Value ptr = Value()) {
   Location loc = op->getLoc();
   ArrayRef<SparseTensorEncodingAttr::DimLevelType> dlt = enc.getDimLevelType();
   unsigned sz = dlt.size();
@@ -218,7 +219,7 @@ static void newParams(ConversionPatternRewriter &rewriter,
   }
   params.push_back(genBuffer(rewriter, loc, rev));
   // Secondary and primary types encoding.
-  Type elemTp = op->getResult(0).getType().cast<ShapedType>().getElementType();
+  Type elemTp = stp.getElementType();
   params.push_back(constantPointerTypeEncoding(rewriter, loc, enc));
   params.push_back(constantIndexTypeEncoding(rewriter, loc, enc));
   params.push_back(constantPrimaryTypeEncoding(rewriter, loc, elemTp));
@@ -420,9 +421,10 @@ class SparseTensorNewConverter : public OpConversionPattern<NewOp> {
     // inferred from the result type of the new operator.
     SmallVector<Value, 4> sizes;
     SmallVector<Value, 8> params;
-    sizesFromType(rewriter, sizes, op.getLoc(), resType.cast<ShapedType>());
+    ShapedType stp = resType.cast<ShapedType>();
+    sizesFromType(rewriter, sizes, op.getLoc(), stp);
     Value ptr = adaptor.getOperands()[0];
-    newParams(rewriter, params, op, enc, Action::kFromFile, sizes, ptr);
+    newParams(rewriter, params, op, stp, enc, Action::kFromFile, sizes, ptr);
     rewriter.replaceOp(op, genNewCall(rewriter, op, params));
     return success();
   }
@@ -441,7 +443,9 @@ class SparseTensorInitConverter : public OpConversionPattern<InitOp> {
     // Generate the call to construct empty tensor. The sizes are
     // explicitly defined by the arguments to the init operator.
     SmallVector<Value, 8> params;
-    newParams(rewriter, params, op, enc, Action::kEmpty, adaptor.getOperands());
+    ShapedType stp = resType.cast<ShapedType>();
+    newParams(rewriter, params, op, stp, enc, Action::kEmpty,
+              adaptor.getOperands());
     rewriter.replaceOp(op, genNewCall(rewriter, op, params));
     return success();
   }
@@ -472,15 +476,15 @@ class SparseTensorConvertConverter : public OpConversionPattern<ConvertOp> {
       }
       SmallVector<Value, 4> sizes;
       SmallVector<Value, 8> params;
-      sizesFromPtr(rewriter, sizes, op, encSrc, srcType.cast<ShapedType>(),
-                   src);
+      ShapedType stp = srcType.cast<ShapedType>();
+      sizesFromPtr(rewriter, sizes, op, encSrc, stp, src);
       // Set up encoding with right mix of src and dst so that the two
       // method calls can share most parameters, while still providing
       // the correct sparsity information to either of them.
       auto enc = SparseTensorEncodingAttr::get(
           op->getContext(), encDst.getDimLevelType(), encDst.getDimOrdering(),
           encSrc.getPointerBitWidth(), encSrc.getIndexBitWidth());
-      newParams(rewriter, params, op, enc, Action::kToCOO, sizes, src);
+      newParams(rewriter, params, op, stp, enc, Action::kToCOO, sizes, src);
       Value coo = genNewCall(rewriter, op, params);
       params[3] = constantPointerTypeEncoding(rewriter, loc, encDst);
       params[4] = constantIndexTypeEncoding(rewriter, loc, encDst);
@@ -512,7 +516,8 @@ class SparseTensorConvertConverter : public OpConversionPattern<ConvertOp> {
       SmallVector<Value, 4> sizes;
       SmallVector<Value, 8> params;
       sizesFromPtr(rewriter, sizes, op, encSrc, srcTensorTp, src);
-      newParams(rewriter, params, op, encDst, Action::kToIterator, sizes, src);
+      newParams(rewriter, params, op, dstTensorTp, encDst, Action::kToIterator,
+                sizes, src);
       Value iter = genNewCall(rewriter, op, params);
       Value ind = genAlloca(rewriter, loc, rank, rewriter.getIndexType());
       Value elemPtr = genAllocaScalar(rewriter, loc, elemTp);
@@ -567,7 +572,7 @@ class SparseTensorConvertConverter : public OpConversionPattern<ConvertOp> {
     SmallVector<Value, 4> sizes;
     SmallVector<Value, 8> params;
     sizesFromSrc(rewriter, sizes, loc, src);
-    newParams(rewriter, params, op, encDst, Action::kEmptyCOO, sizes);
+    newParams(rewriter, params, op, stp, encDst, Action::kEmptyCOO, sizes);
     Value ptr = genNewCall(rewriter, op, params);
     Value ind = genAlloca(rewriter, loc, rank, rewriter.getIndexType());
     Value perm = params[2];
@@ -771,6 +776,45 @@ class SparseTensorCompressConverter : public OpConversionPattern<CompressOp> {
   }
 };
 
+class SparseTensorOutConverter : public OpConversionPattern<OutOp> {
+public:
+  using OpConversionPattern::OpConversionPattern;
+  LogicalResult
+  matchAndRewrite(OutOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    Location loc = op->getLoc();
+    ShapedType srcType = op.tensor().getType().cast<ShapedType>();
+    // Convert to default permuted COO.
+    Value src = adaptor.getOperands()[0];
+    auto encSrc = getSparseTensorEncoding(srcType);
+    SmallVector<Value, 4> sizes;
+    SmallVector<Value, 8> params;
+    sizesFromPtr(rewriter, sizes, op, encSrc, srcType, src);
+    auto enc = SparseTensorEncodingAttr::get(
+        op->getContext(), encSrc.getDimLevelType(), AffineMap(),
+        encSrc.getPointerBitWidth(), encSrc.getIndexBitWidth());
+    newParams(rewriter, params, op, srcType, enc, Action::kToCOO, sizes, src);
+    Value coo = genNewCall(rewriter, op, params);
+    // Then output the tensor to external file with indices in the externally
+    // visible lexicographic index order. A sort is required if the source was
+    // not in that order yet (note that the sort can be dropped altogether if
+    // external format does not care about the order at all, but here we assume
+    // it does).
+    bool sort =
+        encSrc.getDimOrdering() && !encSrc.getDimOrdering().isIdentity();
+    params.clear();
+    params.push_back(coo);
+    params.push_back(adaptor.getOperands()[1]);
+    params.push_back(constantI1(rewriter, loc, sort));
+    Type eltType = srcType.getElementType();
+    SmallString<18> name{"outSparseTensor", primaryTypeFunctionSuffix(eltType)};
+    TypeRange noTp;
+    replaceOpWithFuncCall(rewriter, op, name, noTp, params,
+                          EmitCInterface::Off);
+    return success();
+  }
+};
+
 } // namespace
 
 //===----------------------------------------------------------------------===//
@@ -787,6 +831,6 @@ void mlir::populateSparseTensorConversionPatterns(TypeConverter &typeConverter,
                SparseTensorReleaseConverter, SparseTensorToPointersConverter,
                SparseTensorToIndicesConverter, SparseTensorToValuesConverter,
                SparseTensorLoadConverter, SparseTensorLexInsertConverter,
-               SparseTensorExpandConverter, SparseTensorCompressConverter>(
-      typeConverter, patterns.getContext());
+               SparseTensorExpandConverter, SparseTensorCompressConverter,
+               SparseTensorOutConverter>(typeConverter, patterns.getContext());
 }
diff --git a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
index 20cd1b53d31b4..605e17764773f 100644
--- a/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
+++ b/mlir/lib/ExecutionEngine/SparseTensorUtils.cpp
@@ -26,6 +26,8 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <fstream>
+#include <iostream>
 #include <limits>
 #include <numeric>
 #include <vector>
@@ -713,6 +715,31 @@ static SparseTensorCOO<V> *openSparseTensorCOO(char *filename, uint64_t rank,
   return tensor;
 }
 
+/// Writes the sparse tensor to extended FROSTT format.
+template <typename V>
+void outSparseTensor(const SparseTensorCOO<V> &tensor, char *filename) {
+  auto &sizes = tensor.getSizes();
+  auto &elements = tensor.getElements();
+  uint64_t rank = tensor.getRank();
+  uint64_t nnz = elements.size();
+  std::fstream file;
+  file.open(filename, std::ios_base::out | std::ios_base::trunc);
+  assert(file.is_open());
+  file << "; extended FROSTT format\n" << rank << " " << nnz << std::endl;
+  for (uint64_t r = 0; r < rank - 1; r++)
+    file << sizes[r] << " ";
+  file << sizes[rank - 1] << std::endl;
+  for (uint64_t i = 0; i < nnz; i++) {
+    auto &idx = elements[i].indices;
+    for (uint64_t r = 0; r < rank; r++)
+      file << (idx[r] + 1) << " ";
+    file << elements[i].value << std::endl;
+  }
+  file.flush();
+  file.close();
+  assert(file.good());
+}
+
 } // namespace
 
 extern "C" {
@@ -845,6 +872,17 @@ extern "C" {
         cursor, values, filled, added, count);                                 \
   }
 
+#define IMPL_OUT(NAME, V)                                                      \
+  void NAME(void *tensor, void *dest, bool sort) {                             \
+    assert(tensor &&dest);                                                     \
+    auto coo = static_cast<SparseTensorCOO<V> *>(tensor);                      \
+    if (sort)                                                                  \
+      coo->sort();                                                             \
+    char *filename = static_cast<char *>(dest);                                \
+    outSparseTensor<V>(*coo, filename);                                        \
+    delete coo;                                                                \
+  }
+
 // Assume index_t is in fact uint64_t, so that _mlir_ciface_newSparseTensor
 // can safely rewrite kIndex to kU64.  We make this assertion to guarantee
 // that this file cannot get out of sync with its header.
@@ -1026,6 +1064,14 @@ IMPL_EXPINSERT(expInsertI32, int32_t)
 IMPL_EXPINSERT(expInsertI16, int16_t)
 IMPL_EXPINSERT(expInsertI8, int8_t)
 
+/// Helper to output a sparse tensor, one per value type.
+IMPL_OUT(outSparseTensorF64, double)
+IMPL_OUT(outSparseTensorF32, float)
+IMPL_OUT(outSparseTensorI64, int64_t)
+IMPL_OUT(outSparseTensorI32, int32_t)
+IMPL_OUT(outSparseTensorI16, int16_t)
+IMPL_OUT(outSparseTensorI8, int8_t)
+
 #undef CASE
 #undef IMPL_SPARSEVALUES
 #undef IMPL_GETOVERHEAD
@@ -1033,6 +1079,7 @@ IMPL_EXPINSERT(expInsertI8, int8_t)
 #undef IMPL_GETNEXT
 #undef IMPL_LEXINSERT
 #undef IMPL_EXPINSERT
+#undef IMPL_OUT
 
 //===----------------------------------------------------------------------===//
 //
@@ -1162,6 +1209,7 @@ void convertFromMLIRSparseTensor(void *tensor, uint64_t *pRank, uint64_t *pNse,
   *pValues = values;
   *pIndices = indices;
 }
+
 } // extern "C"
 
 #endif // MLIR_CRUNNERUTILS_DEFINE_FUNCTIONS
diff --git a/mlir/test/Dialect/SparseTensor/conversion.mlir b/mlir/test/Dialect/SparseTensor/conversion.mlir
index 89ee0d5b7c816..04c8a51817813 100644
--- a/mlir/test/Dialect/SparseTensor/conversion.mlir
+++ b/mlir/test/Dialect/SparseTensor/conversion.mlir
@@ -468,3 +468,27 @@ func @sparse_compression(%arg0: tensor<8x8xf64, #SparseMatrix>,
     : tensor<8x8xf64, #SparseMatrix>, memref<?xindex>, memref<?xf64>, memref<?xi1>, memref<?xindex>, index
   return
 }
+
+// CHECK-LABEL: func @sparse_out1(
+//  CHECK-SAME: %[[A:.*]]: !llvm.ptr<i8>,
+//  CHECK-SAME: %[[B:.*]]: !llvm.ptr<i8>)
+//  CHECK-DAG:  %[[C:.*]] = arith.constant false
+//       CHECK: %[[T:.*]] = call @newSparseTensor(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[A]])
+//       CHECK: call @outSparseTensorF64(%[[T]], %[[B]], %[[C]]) : (!llvm.ptr<i8>, !llvm.ptr<i8>, i1) -> ()
+//       CHECK: return
+func @sparse_out1(%arg0: tensor<?x?xf64, #SparseMatrix>, %arg1: !llvm.ptr<i8>) {
+  sparse_tensor.out %arg0, %arg1 : tensor<?x?xf64, #SparseMatrix>, !llvm.ptr<i8>
+  return
+}
+
+// CHECK-LABEL: func @sparse_out2(
+//  CHECK-SAME: %[[A:.*]]: !llvm.ptr<i8>,
+//  CHECK-SAME: %[[B:.*]]: !llvm.ptr<i8>)
+//  CHECK-DAG:  %[[C:.*]] = arith.constant true
+//       CHECK: %[[T:.*]] = call @newSparseTensor(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[A]])
+//       CHECK: call @outSparseTensorF32(%[[T]], %[[B]], %[[C]]) : (!llvm.ptr<i8>, !llvm.ptr<i8>, i1) -> ()
+//       CHECK: return
+func @sparse_out2(%arg0: tensor<?x?x?xf32, #SparseTensor>, %arg1: !llvm.ptr<i8>) {
+  sparse_tensor.out %arg0, %arg1 : tensor<?x?x?xf32, #SparseTensor>, !llvm.ptr<i8>
+  return
+}
diff --git a/mlir/test/Dialect/SparseTensor/invalid.mlir b/mlir/test/Dialect/SparseTensor/invalid.mlir
index 06d662127174c..84990221e4df4 100644
--- a/mlir/test/Dialect/SparseTensor/invalid.mlir
+++ b/mlir/test/Dialect/SparseTensor/invalid.mlir
@@ -204,3 +204,11 @@ func @sparse_convert_dim_mismatch(%arg0: tensor<10x?xf32>) -> tensor<10x10xf32,
   %0 = sparse_tensor.convert %arg0 : tensor<10x?xf32> to tensor<10x10xf32, #CSR>
   return %0 : tensor<10x10xf32, #CSR>
 }
+
+// -----
+
+func @invalid_out_dense(%arg0: tensor<10xf64>, %arg1: !llvm.ptr<i8>) {
+  // expected-error@+1 {{expected a sparse tensor for output}}
+  sparse_tensor.out %arg0, %arg1 : tensor<10xf64>, !llvm.ptr<i8>
+  return
+}
diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
index 853befc1cdef4..5457e55f57e6a 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
@@ -179,3 +179,17 @@ func @sparse_compression(%arg0: tensor<8x8xf64, #SparseMatrix>,
     : tensor<8x8xf64, #SparseMatrix>, memref<?xindex>, memref<?xf64>, memref<?xi1>, memref<?xindex>, index
   return
 }
+
+// -----
+
+#SparseMatrix = #sparse_tensor.encoding<{dimLevelType = ["compressed", "compressed"]}>
+
+// CHECK-LABEL: func @sparse_out(
+//  CHECK-SAME: %[[A:.*]]: tensor<?x?xf64, #sparse_tensor.encoding<{{.*}}>>,
+//  CHECK-SAME: %[[B:.*]]: !llvm.ptr<i8>)
+//       CHECK: sparse_tensor.out %[[A]], %[[B]] : tensor<?x?xf64, #sparse_tensor.encoding<{{.*}}>>, !llvm.ptr<i8>
+//       CHECK: return
+func @sparse_out(%arg0: tensor<?x?xf64, #SparseMatrix>, %arg1: !llvm.ptr<i8>) {
+  sparse_tensor.out %arg0, %arg1 : tensor<?x?xf64, #SparseMatrix>, !llvm.ptr<i8>
+  return
+}

From 10d0d8c0c1db57b7ff465df7ced78a42a20d592d Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Sun, 16 Jan 2022 06:14:24 +0000
Subject: [PATCH 231/946] [clang][cmake] Use `GNUInstallDirs` to support custom
 installation dirs

I am breaking apart D99484 so the cause of build failures is easier to
understand.

Differential Revision: https://reviews.llvm.org/D117419
---
 clang/CMakeLists.txt                            | 14 +++++++++-----
 clang/cmake/modules/AddClang.cmake              |  5 +++--
 clang/cmake/modules/CMakeLists.txt              |  4 +++-
 clang/tools/c-index-test/CMakeLists.txt         |  2 +-
 clang/tools/clang-format/CMakeLists.txt         | 12 ++++++------
 clang/tools/clang-nvlink-wrapper/CMakeLists.txt |  2 +-
 clang/tools/clang-rename/CMakeLists.txt         |  4 ++--
 clang/tools/libclang/CMakeLists.txt             |  2 +-
 clang/tools/scan-build-py/CMakeLists.txt        |  6 +++---
 clang/tools/scan-build/CMakeLists.txt           |  6 +++---
 clang/tools/scan-view/CMakeLists.txt            |  4 ++--
 clang/utils/hmaptool/CMakeLists.txt             |  2 +-
 12 files changed, 35 insertions(+), 28 deletions(-)

diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 7ea37850ad609..49150fa7c5612 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -4,7 +4,13 @@ cmake_minimum_required(VERSION 3.13.4)
 # standalone project, using LLVM as an external library:
 if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   project(Clang)
+  set(CLANG_BUILT_STANDALONE TRUE)
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
 
+if(CLANG_BUILT_STANDALONE)
   set(CMAKE_CXX_STANDARD 14 CACHE STRING "C++ standard to conform to")
   set(CMAKE_CXX_STANDARD_REQUIRED YES)
   set(CMAKE_CXX_EXTENSIONS NO)
@@ -185,8 +191,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
     endif()
   endif()
 
-  set(CLANG_BUILT_STANDALONE TRUE)
-
   set(BACKEND_PACKAGE_STRING "LLVM ${LLVM_PACKAGE_VERSION}")
 else()
   set(BACKEND_PACKAGE_STRING "${PACKAGE_STRING}")
@@ -424,7 +428,7 @@ include_directories(BEFORE
 
 if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
   install(DIRECTORY include/clang include/clang-c
-    DESTINATION include
+    DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
     COMPONENT clang-headers
     FILES_MATCHING
     PATTERN "*.def"
@@ -433,7 +437,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
     )
 
   install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/include/clang
-    DESTINATION include
+    DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
     COMPONENT clang-headers
     FILES_MATCHING
     PATTERN "CMakeFiles" EXCLUDE
@@ -453,7 +457,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
 
   add_custom_target(bash-autocomplete DEPENDS utils/bash-autocomplete.sh)
   install(PROGRAMS utils/bash-autocomplete.sh
-          DESTINATION share/clang
+          DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
           COMPONENT bash-autocomplete)
   if(NOT LLVM_ENABLE_IDE)
     add_llvm_install_targets(install-bash-autocomplete
diff --git a/clang/cmake/modules/AddClang.cmake b/clang/cmake/modules/AddClang.cmake
index 5752f4277444e..9bbbfc032b7df 100644
--- a/clang/cmake/modules/AddClang.cmake
+++ b/clang/cmake/modules/AddClang.cmake
@@ -1,3 +1,4 @@
+include(GNUInstallDirs)
 include(LLVMDistributionSupport)
 
 function(clang_tablegen)
@@ -120,7 +121,7 @@ macro(add_clang_library name)
           ${export_to_clangtargets}
           LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX}
           ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX}
-          RUNTIME DESTINATION bin)
+          RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
 
         if (NOT LLVM_ENABLE_IDE)
           add_llvm_install_targets(install-${lib}
@@ -159,7 +160,7 @@ macro(add_clang_tool name)
     get_target_export_arg(${name} Clang export_to_clangtargets)
     install(TARGETS ${name}
       ${export_to_clangtargets}
-      RUNTIME DESTINATION bin
+      RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
       COMPONENT ${name})
 
     if(NOT LLVM_ENABLE_IDE)
diff --git a/clang/cmake/modules/CMakeLists.txt b/clang/cmake/modules/CMakeLists.txt
index e9cc1240dafb7..c6f6ce9fe5d69 100644
--- a/clang/cmake/modules/CMakeLists.txt
+++ b/clang/cmake/modules/CMakeLists.txt
@@ -1,3 +1,4 @@
+include(ExtendPath)
 include(LLVMDistributionSupport)
 include(FindPrefixFromConfig)
 
@@ -42,8 +43,9 @@ find_prefix_from_config(CLANG_CONFIG_CODE CLANG_INSTALL_PREFIX "${CLANG_INSTALL_
 set(CLANG_CONFIG_CMAKE_DIR "\${CLANG_INSTALL_PREFIX}/${CLANG_INSTALL_PACKAGE_DIR}")
 set(CLANG_CONFIG_LLVM_CMAKE_DIR "\${CLANG_INSTALL_PREFIX}/${LLVM_INSTALL_PACKAGE_DIR}")
 get_config_exports_includes(Clang CLANG_CONFIG_INCLUDE_EXPORTS)
+extend_path(base_includedir "\${CLANG_INSTALL_PREFIX}" "${CMAKE_INSTALL_INCLUDEDIR}")
 set(CLANG_CONFIG_INCLUDE_DIRS
-  "\${CLANG_INSTALL_PREFIX}/include"
+  "${base_includedir}"
   )
 configure_file(
   ${CMAKE_CURRENT_SOURCE_DIR}/ClangConfig.cmake.in
diff --git a/clang/tools/c-index-test/CMakeLists.txt b/clang/tools/c-index-test/CMakeLists.txt
index 99c6081db2d63..0ae1b4e55244e 100644
--- a/clang/tools/c-index-test/CMakeLists.txt
+++ b/clang/tools/c-index-test/CMakeLists.txt
@@ -49,7 +49,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
     set_property(TARGET c-index-test APPEND PROPERTY INSTALL_RPATH
        "@executable_path/../../lib")
   else()
-    set(INSTALL_DESTINATION bin)
+    set(INSTALL_DESTINATION "${CMAKE_INSTALL_BINDIR}")
   endif()
 
   install(TARGETS c-index-test
diff --git a/clang/tools/clang-format/CMakeLists.txt b/clang/tools/clang-format/CMakeLists.txt
index 35ecdb11253ce..bbdef93b576b8 100644
--- a/clang/tools/clang-format/CMakeLists.txt
+++ b/clang/tools/clang-format/CMakeLists.txt
@@ -21,20 +21,20 @@ if( LLVM_LIB_FUZZING_ENGINE OR LLVM_USE_SANITIZE_COVERAGE )
 endif()
 
 install(PROGRAMS clang-format-bbedit.applescript
-  DESTINATION share/clang
+  DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
   COMPONENT clang-format)
 install(PROGRAMS clang-format-diff.py
-  DESTINATION share/clang
+  DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
   COMPONENT clang-format)
 install(PROGRAMS clang-format-sublime.py
-  DESTINATION share/clang
+  DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
   COMPONENT clang-format)
 install(PROGRAMS clang-format.el
-  DESTINATION share/clang
+  DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
   COMPONENT clang-format)
 install(PROGRAMS clang-format.py
-  DESTINATION share/clang
+  DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
   COMPONENT clang-format)
 install(PROGRAMS git-clang-format
-  DESTINATION bin
+  DESTINATION "${CMAKE_INSTALL_BINDIR}"
   COMPONENT clang-format)
diff --git a/clang/tools/clang-nvlink-wrapper/CMakeLists.txt b/clang/tools/clang-nvlink-wrapper/CMakeLists.txt
index 033392f1c2bdc..2c979e5097958 100644
--- a/clang/tools/clang-nvlink-wrapper/CMakeLists.txt
+++ b/clang/tools/clang-nvlink-wrapper/CMakeLists.txt
@@ -22,4 +22,4 @@ target_link_libraries(clang-nvlink-wrapper
   ${CLANG_NVLINK_WRAPPER_LIB_DEPS}
   )
 
-install(TARGETS clang-nvlink-wrapper RUNTIME DESTINATION bin)
+install(TARGETS clang-nvlink-wrapper RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
diff --git a/clang/tools/clang-rename/CMakeLists.txt b/clang/tools/clang-rename/CMakeLists.txt
index cda8e29ec5b18..58da000272f6a 100644
--- a/clang/tools/clang-rename/CMakeLists.txt
+++ b/clang/tools/clang-rename/CMakeLists.txt
@@ -19,8 +19,8 @@ clang_target_link_libraries(clang-rename
   )
 
 install(PROGRAMS clang-rename.py
-  DESTINATION share/clang
+  DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
   COMPONENT clang-rename)
 install(PROGRAMS clang-rename.el
-  DESTINATION share/clang
+  DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
   COMPONENT clang-rename)
diff --git a/clang/tools/libclang/CMakeLists.txt b/clang/tools/libclang/CMakeLists.txt
index bf88dca0a34b1..4e0647971ab46 100644
--- a/clang/tools/libclang/CMakeLists.txt
+++ b/clang/tools/libclang/CMakeLists.txt
@@ -186,7 +186,7 @@ endif()
 if(INTERNAL_INSTALL_PREFIX)
   set(LIBCLANG_HEADERS_INSTALL_DESTINATION "${INTERNAL_INSTALL_PREFIX}/include")
 else()
-  set(LIBCLANG_HEADERS_INSTALL_DESTINATION include)
+  set(LIBCLANG_HEADERS_INSTALL_DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
 endif()
 
 install(DIRECTORY ../../include/clang-c
diff --git a/clang/tools/scan-build-py/CMakeLists.txt b/clang/tools/scan-build-py/CMakeLists.txt
index c9f1cb7d6b2a7..061dc7ef4dd9e 100644
--- a/clang/tools/scan-build-py/CMakeLists.txt
+++ b/clang/tools/scan-build-py/CMakeLists.txt
@@ -43,7 +43,7 @@ foreach(BinFile ${BinFiles})
                          ${CMAKE_BINARY_DIR}/bin/scan-build-py
                        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/scan-build)
     install (PROGRAMS "bin/scan-build"
-             DESTINATION bin
+             DESTINATION "${CMAKE_INSTALL_BINDIR}"
              RENAME scan-build-py
              COMPONENT scan-build-py)
     list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/scan-build-py)
@@ -56,7 +56,7 @@ foreach(BinFile ${BinFiles})
                          ${CMAKE_BINARY_DIR}/bin/
                        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile})
     install(PROGRAMS bin/${BinFile}
-            DESTINATION bin
+            DESTINATION "${CMAKE_INSTALL_BINDIR}"
             COMPONENT scan-build-py)
     list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile})
   endif()
@@ -72,7 +72,7 @@ foreach(lib ${LibExecs})
                      DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/libexec/${lib})
   list(APPEND Depends ${CMAKE_BINARY_DIR}/libexec/${lib})
   install(PROGRAMS libexec/${lib}
-          DESTINATION libexec
+          DESTINATION "${CMAKE_INSTALL_LIBEXECDIR}"
           COMPONENT scan-build-py)
 endforeach()
 
diff --git a/clang/tools/scan-build/CMakeLists.txt b/clang/tools/scan-build/CMakeLists.txt
index 74334e53c9b18..4a578b4c6f3ed 100644
--- a/clang/tools/scan-build/CMakeLists.txt
+++ b/clang/tools/scan-build/CMakeLists.txt
@@ -47,7 +47,7 @@ if(CLANG_INSTALL_SCANBUILD)
                        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile})
     list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile})
     install(PROGRAMS bin/${BinFile}
-            DESTINATION bin
+            DESTINATION "${CMAKE_INSTALL_BINDIR}"
             COMPONENT scan-build)
   endforeach()
 
@@ -61,7 +61,7 @@ if(CLANG_INSTALL_SCANBUILD)
                        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/libexec/${LibexecFile})
     list(APPEND Depends ${CMAKE_BINARY_DIR}/libexec/${LibexecFile})
     install(PROGRAMS libexec/${LibexecFile}
-            DESTINATION libexec
+            DESTINATION "${CMAKE_INSTALL_LIBEXECDIR}"
             COMPONENT scan-build)
   endforeach()
 
@@ -89,7 +89,7 @@ if(CLANG_INSTALL_SCANBUILD)
                        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/share/scan-build/${ShareFile})
     list(APPEND Depends ${CMAKE_BINARY_DIR}/share/scan-build/${ShareFile})
     install(FILES share/scan-build/${ShareFile}
-            DESTINATION share/scan-build
+            DESTINATION "${CMAKE_INSTALL_DATADIR}/scan-build"
             COMPONENT scan-build)
   endforeach()
 
diff --git a/clang/tools/scan-view/CMakeLists.txt b/clang/tools/scan-view/CMakeLists.txt
index eccc6b83195b6..07aec76ee66f5 100644
--- a/clang/tools/scan-view/CMakeLists.txt
+++ b/clang/tools/scan-view/CMakeLists.txt
@@ -20,7 +20,7 @@ if(CLANG_INSTALL_SCANVIEW)
                        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile})
     list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile})
     install(PROGRAMS bin/${BinFile}
-            DESTINATION bin
+            DESTINATION "${CMAKE_INSTALL_BINDIR}"
             COMPONENT scan-view)
   endforeach()
 
@@ -34,7 +34,7 @@ if(CLANG_INSTALL_SCANVIEW)
                        DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/share/${ShareFile})
     list(APPEND Depends ${CMAKE_BINARY_DIR}/share/scan-view/${ShareFile})
     install(FILES share/${ShareFile}
-            DESTINATION share/scan-view
+            DESTINATION "${CMAKE_INSTALL_DATADIR}/scan-view"
             COMPONENT scan-view)
   endforeach()
 
diff --git a/clang/utils/hmaptool/CMakeLists.txt b/clang/utils/hmaptool/CMakeLists.txt
index 62f2de0cb15ce..f0d9866782b88 100644
--- a/clang/utils/hmaptool/CMakeLists.txt
+++ b/clang/utils/hmaptool/CMakeLists.txt
@@ -10,7 +10,7 @@ add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/${CLANG_HM
 
 list(APPEND Depends ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/${CLANG_HMAPTOOL})
 install(PROGRAMS ${CLANG_HMAPTOOL}
-        DESTINATION bin
+        DESTINATION "${CMAKE_INSTALL_BINDIR}"
         COMPONENT hmaptool)
 
 add_custom_target(hmaptool ALL DEPENDS ${Depends})

From 1613f8b8d7d5fc155e1a0ce2f88c4be6b115936e Mon Sep 17 00:00:00 2001
From: Mitch Phillips <31459023+hctim@users.noreply.github.com>
Date: Fri, 21 Jan 2022 16:22:29 -0800
Subject: [PATCH 232/946] NFC (build fix): Add header for llvm::errs().

Looks like e9211e039377 unfortunately broke the sanitizer build bots,
because those bots compile the symbolizer with DLLVM_ENABLE_THREADS=Off.
Likely, before the patch, this header was transitively included.
---
 llvm/lib/Support/ThreadPool.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index bf2584950c4ac..6eec368e626ff 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Threading.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 

From 08574ce4d6254edf857cd374b54eca71dd2710f6 Mon Sep 17 00:00:00 2001
From: not-jenni <jennik@google.com>
Date: Fri, 21 Jan 2022 16:16:29 -0800
Subject: [PATCH 233/946] [mlir][tosa] Add clamp + clamp as single clamp
 canonicalization

When 2 clamp ops are in a row, they can be canonicalized into a single clamp
that uses the most constrained range

Reviewed By: rsuderman

Differential Revision: https://reviews.llvm.org/D117934
---
 mlir/lib/Dialect/Tosa/IR/TosaOps.cpp     | 31 ++++++++++++++++++++++++
 mlir/test/Dialect/Tosa/canonicalize.mlir | 10 ++++++++
 2 files changed, 41 insertions(+)

diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
index f93e5b2052b9d..af8fa306d7651 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
@@ -526,9 +526,40 @@ struct ClampIsNoOp : public OpRewritePattern<tosa::ClampOp> {
   }
 };
 
+struct ClampClampOptimization : public OpRewritePattern<tosa::ClampOp> {
+  using OpRewritePattern<tosa::ClampOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(tosa::ClampOp op,
+                                PatternRewriter &rewriter) const override {
+    Value input = op.input();
+
+    Operation *definingOp = input.getDefiningOp();
+    if (!definingOp)
+      return failure();
+
+    if (tosa::ClampOp clampOp = dyn_cast<tosa::ClampOp>(definingOp)) {
+      auto min_fp = std::max(op.min_fp(), clampOp.min_fp()).convertToFloat();
+      auto max_fp = std::min(op.max_fp(), clampOp.max_fp()).convertToFloat();
+
+      auto min_int = std::max(op.min_int(), clampOp.min_int());
+      auto max_int = std::min(op.max_int(), clampOp.max_int());
+
+      rewriter.replaceOpWithNewOp<tosa::ClampOp>(
+          op, op.getType(), clampOp.input(),
+          rewriter.getI64IntegerAttr(min_int),
+          rewriter.getI64IntegerAttr(max_int), rewriter.getF32FloatAttr(min_fp),
+          rewriter.getF32FloatAttr(max_fp));
+      return success();
+    }
+
+    return failure();
+  }
+};
+
 void ClampOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
                                           MLIRContext *context) {
   results.insert<ClampIsNoOp>(context);
+  results.insert<ClampClampOptimization>(context);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir
index c1e828cc7fca4..41303eebc0693 100644
--- a/mlir/test/Dialect/Tosa/canonicalize.mlir
+++ b/mlir/test/Dialect/Tosa/canonicalize.mlir
@@ -98,6 +98,16 @@ func @clamp_uint8_is_noop(%arg0: tensor<4xui8>) -> tensor<4xui8> {
 
 // -----
 
+// CHECK-LABEL: @clamp_twice_is_single_clamp
+func @clamp_twice_is_single_clamp(%arg0: tensor<4xi8>) -> tensor<4xi8> {
+  // CHECK: "tosa.clamp"(%arg0) {max_fp = 3.000000e+00 : f32, max_int = 2 : i64, min_fp = -3.000000e+00 : f32, min_int = -2 : i64}
+  %0 = "tosa.clamp"(%arg0) {max_fp = 3.0 : f32, max_int = 4 : i64, min_fp = -5.0 : f32, min_int = -2 : i64} :  (tensor<4xi8>) -> tensor<4xi8>
+  %1 = "tosa.clamp"(%0) {max_fp = 5.0 : f32, max_int = 2 : i64, min_fp = -3.0 : f32, min_int = -4 : i64} :  (tensor<4xi8>) -> tensor<4xi8>
+  return %1 : tensor<4xi8>
+}
+
+// -----
+
 // CHECK-LABEL: @concat_fold
 func @concat_fold(%arg0: tensor<?x1xf32>) -> tensor<?x1xf32> {
   // CHECK: return %arg0

From 13fa17db3a720d149bcd0783856347a4f09cf634 Mon Sep 17 00:00:00 2001
From: Chris Bieneman <beanz@abolishcrlf.org>
Date: Fri, 21 Jan 2022 10:47:15 -0600
Subject: [PATCH 234/946] [split-file] Respect input file's line endings

This change adds support for split-file to respect the line ending style
of the input file. This enables split-file to work as expected on
Windows with input files containing CRLF line endings.

The test files added along with this change mirror the existing basic
tests, but are forced to contain CRLF line endings via git attributes.
This will result in the tests always containing CRLF line endings when
checked out regardless of the user's OS.

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D117897
---
 llvm/.gitattributes                             |  2 ++
 llvm/test/tools/split-file/Inputs/basic-aa.crlf |  2 ++
 llvm/test/tools/split-file/Inputs/basic-bb.crlf |  4 ++++
 llvm/test/tools/split-file/basic.crlf.test      | 10 ++++++++++
 llvm/tools/split-file/split-file.cpp            |  3 ++-
 5 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/tools/split-file/Inputs/basic-aa.crlf
 create mode 100644 llvm/test/tools/split-file/Inputs/basic-bb.crlf
 create mode 100644 llvm/test/tools/split-file/basic.crlf.test

diff --git a/llvm/.gitattributes b/llvm/.gitattributes
index 710defda24b62..b07b652eee02e 100644
--- a/llvm/.gitattributes
+++ b/llvm/.gitattributes
@@ -25,3 +25,5 @@ test/tools/llvm-mca/X86/directives-handle-crlf.s text eol=crlf
 test/tools/llvm-strings/radix.test text eol=lf
 test/tools/split-file/basic.test text eol=lf
 test/tools/split-file/Inputs/basic-*.txt eol=lf
+test/tools/split-file/basic.crlf.test text eol=crlf
+test/tools/split-file/Inputs/basic-*.crlf eol=crlf
diff --git a/llvm/test/tools/split-file/Inputs/basic-aa.crlf b/llvm/test/tools/split-file/Inputs/basic-aa.crlf
new file mode 100644
index 0000000000000..0b9ddeb2fc12a
--- /dev/null
+++ b/llvm/test/tools/split-file/Inputs/basic-aa.crlf
@@ -0,0 +1,2 @@
+
+aa
diff --git a/llvm/test/tools/split-file/Inputs/basic-bb.crlf b/llvm/test/tools/split-file/Inputs/basic-bb.crlf
new file mode 100644
index 0000000000000..b6c3c808ec62f
--- /dev/null
+++ b/llvm/test/tools/split-file/Inputs/basic-bb.crlf
@@ -0,0 +1,4 @@
+
+
+
+bb
diff --git a/llvm/test/tools/split-file/basic.crlf.test b/llvm/test/tools/split-file/basic.crlf.test
new file mode 100644
index 0000000000000..f01074a879630
--- /dev/null
+++ b/llvm/test/tools/split-file/basic.crlf.test
@@ -0,0 +1,10 @@
+#--- aa
+aa
+;--- bb
+bb
+;--- end
+
+# RUN: rm -rf %t
+# RUN: split-file --leading-lines %s %t
+# RUN: diff %S/Inputs/basic-aa.crlf %t/aa
+# RUN: diff %S/Inputs/basic-bb.crlf %t/bb
diff --git a/llvm/tools/split-file/split-file.cpp b/llvm/tools/split-file/split-file.cpp
index bde7d21a51e9a..4a92c1be78a2b 100644
--- a/llvm/tools/split-file/split-file.cpp
+++ b/llvm/tools/split-file/split-file.cpp
@@ -71,6 +71,7 @@ struct Part {
 static int handle(MemoryBuffer &inputBuf, StringRef input) {
   DenseMap<StringRef, Part> partToBegin;
   StringRef lastPart, separator;
+  StringRef EOL = inputBuf.getBuffer().detectEOL();
   for (line_iterator i(inputBuf, /*SkipBlanks=*/false, '\0'); !i.is_at_eof();) {
     const int64_t lineNo = i.line_number();
     const StringRef line = *i++;
@@ -128,7 +129,7 @@ static int handle(MemoryBuffer &inputBuf, StringRef input) {
 
     Part &part = keyValue.second;
     for (int64_t i = 0; i != part.leadingLines; ++i)
-      (*f).os().write('\n');
+      (*f).os() << EOL;
     if (part.begin)
       (*f).os().write(part.begin, part.end - part.begin);
     outputFiles.push_back(std::move(f));

From 4f547ee8b8a7d3e298c6cd95e58dd8916883e2d5 Mon Sep 17 00:00:00 2001
From: Joe Loser <joeloser93@gmail.com>
Date: Fri, 21 Jan 2022 15:01:34 -0500
Subject: [PATCH 235/946] [libc++][test] Add const and reference tests for
 enable_view. NFC.

As discussed in https://reviews.llvm.org/D117714, there is missing test coverage
for the behavior of `enable_view` when given a const or reference qualified
type. Add such tests showing the current behavior.

Differential Revision: https://reviews.llvm.org/D117918
---
 .../range.view/enable_view.compile.pass.cpp   | 65 +++++++++++++++++--
 1 file changed, 61 insertions(+), 4 deletions(-)

diff --git a/libcxx/test/std/ranges/range.req/range.view/enable_view.compile.pass.cpp b/libcxx/test/std/ranges/range.req/range.view/enable_view.compile.pass.cpp
index 19ea867f3773a..7a12ccfc51e7f 100644
--- a/libcxx/test/std/ranges/range.req/range.view/enable_view.compile.pass.cpp
+++ b/libcxx/test/std/ranges/range.req/range.view/enable_view.compile.pass.cpp
@@ -22,27 +22,84 @@
 // Doesn't derive from view_base
 struct Empty { };
 static_assert(!std::ranges::enable_view<Empty>);
+static_assert(!std::ranges::enable_view<Empty&>);
+static_assert(!std::ranges::enable_view<Empty&&>);
+static_assert(!std::ranges::enable_view<const Empty>);
+static_assert(!std::ranges::enable_view<const Empty&>);
+static_assert(!std::ranges::enable_view<const Empty&&>);
 
 // Derives from view_base, but privately
 struct PrivateViewBase : private std::ranges::view_base { };
 static_assert(!std::ranges::enable_view<PrivateViewBase>);
+static_assert(!std::ranges::enable_view<PrivateViewBase&>);
+static_assert(!std::ranges::enable_view<PrivateViewBase&&>);
+static_assert(!std::ranges::enable_view<const PrivateViewBase>);
+static_assert(!std::ranges::enable_view<const PrivateViewBase&>);
+static_assert(!std::ranges::enable_view<const PrivateViewBase&&>);
 
 // Derives from view_base, but specializes enable_view to false
 struct EnableViewFalse : std::ranges::view_base { };
-namespace std::ranges { template <> constexpr bool enable_view<EnableViewFalse> = false; }
+template <> constexpr bool std::ranges::enable_view<EnableViewFalse> = false;
 static_assert(!std::ranges::enable_view<EnableViewFalse>);
-
+static_assert(!std::ranges::enable_view<EnableViewFalse&>);
+static_assert(!std::ranges::enable_view<EnableViewFalse&&>);
+static_assert(std::ranges::enable_view<const EnableViewFalse>);
+static_assert(!std::ranges::enable_view<const EnableViewFalse&>);
+static_assert(!std::ranges::enable_view<const EnableViewFalse&&>);
 
 // Derives from view_base
 struct PublicViewBase : std::ranges::view_base { };
 static_assert(std::ranges::enable_view<PublicViewBase>);
+static_assert(!std::ranges::enable_view<PublicViewBase&>);
+static_assert(!std::ranges::enable_view<PublicViewBase&&>);
+static_assert(std::ranges::enable_view<const PublicViewBase>);
+static_assert(!std::ranges::enable_view<const PublicViewBase&>);
+static_assert(!std::ranges::enable_view<const PublicViewBase&&>);
 
 // Does not derive from view_base, but specializes enable_view to true
 struct EnableViewTrue { };
-namespace std::ranges { template <> constexpr bool enable_view<EnableViewTrue> = true; }
+template <> constexpr bool std::ranges::enable_view<EnableViewTrue> = true;
 static_assert(std::ranges::enable_view<EnableViewTrue>);
-
+static_assert(!std::ranges::enable_view<EnableViewTrue&>);
+static_assert(!std::ranges::enable_view<EnableViewTrue&&>);
+static_assert(!std::ranges::enable_view<const EnableViewTrue>);
+static_assert(!std::ranges::enable_view<const EnableViewTrue&>);
+static_assert(!std::ranges::enable_view<const EnableViewTrue&&>);
 
 // Make sure that enable_view is a bool, not some other contextually-convertible-to-bool type.
 ASSERT_SAME_TYPE(decltype(std::ranges::enable_view<Empty>), const bool);
 ASSERT_SAME_TYPE(decltype(std::ranges::enable_view<PublicViewBase>), const bool);
+
+struct V1 : std::ranges::view_interface<V1> {};
+static_assert(std::ranges::enable_view<V1>);
+static_assert(!std::ranges::enable_view<V1&>);
+static_assert(!std::ranges::enable_view<V1&&>);
+static_assert(std::ranges::enable_view<const V1>);
+static_assert(!std::ranges::enable_view<const V1&>);
+static_assert(!std::ranges::enable_view<const V1&&>);
+
+struct V2 : std::ranges::view_interface<V1>, std::ranges::view_interface<V2> {};
+static_assert(!std::ranges::enable_view<V2>);
+static_assert(!std::ranges::enable_view<V2&>);
+static_assert(!std::ranges::enable_view<V2&&>);
+static_assert(!std::ranges::enable_view<const V2>);
+static_assert(!std::ranges::enable_view<const V2&>);
+static_assert(!std::ranges::enable_view<const V2&&>);
+
+struct V3 : std::ranges::view_interface<V1> {};
+static_assert(std::ranges::enable_view<V3>);
+static_assert(!std::ranges::enable_view<V3&>);
+static_assert(!std::ranges::enable_view<V3&&>);
+static_assert(std::ranges::enable_view<const V3>);
+static_assert(!std::ranges::enable_view<const V3&>);
+static_assert(!std::ranges::enable_view<const V3&&>);
+
+struct PrivateInherit : private std::ranges::view_interface<PrivateInherit> {};
+static_assert(!std::ranges::enable_view<PrivateInherit>);
+
+// ADL-proof
+struct Incomplete;
+template<class T> struct Holder { T t; };
+static_assert(!std::ranges::enable_view<Holder<Incomplete>*>);
+
+static_assert(!std::ranges::enable_view<void>);

From 9cddfe3085c4c500e64350b56c37ae2ed1cbe3f6 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@google.com>
Date: Mon, 10 Jan 2022 14:51:37 -0800
Subject: [PATCH 236/946] [CMake] Passthrough OSX CMake options to builtins and
 runtimes

When using the default target, there's no other way to pass these
into the builtins and runtimes subbuilds.

Differential Revision: https://reviews.llvm.org/D116976
---
 llvm/runtimes/CMakeLists.txt | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index ba78f466ab3a2..05567b5234eea 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -90,7 +90,10 @@ function(builtin_default_target compiler_rt_path)
                                       -DCMAKE_ASM_COMPILER_WORKS=ON
                                       ${COMMON_CMAKE_ARGS}
                                       ${BUILTINS_CMAKE_ARGS}
-                           PASSTHROUGH_PREFIXES COMPILER_RT
+                           PASSTHROUGH_PREFIXES CMAKE_OSX
+                                                COMPILER_RT
+                                                DARWIN
+                                                SANITIZER
                            USE_TOOLCHAIN
                            TARGET_TRIPLE ${TARGET_TRIPLE}
                            ${EXTRA_ARGS})
@@ -181,10 +184,10 @@ foreach(entry ${runtimes})
   if (${canon_name} STREQUAL "OPENMP")
     list(APPEND prefixes "LIBOMP" "LIBOMPTARGET")
   endif()
-  # Many compiler-rt options start with SANITIZER_ rather than COMPILER_RT_,
-  # so when compiler-rt is enabled, consider both.
+  # Many compiler-rt options start with SANITIZER_ and DARWIN_ rather than
+  # COMPILER_RT_, so when compiler-rt is enabled, consider both.
   if(canon_name STREQUAL "COMPILER_RT")
-    list(APPEND prefixes SANITIZER)
+    list(APPEND prefixes SANITIZER DARWIN)
   endif()
 
   string(FIND ${projName} "lib" LIB_IDX)
@@ -241,7 +244,8 @@ function(runtime_default_target)
                                       -DCMAKE_ASM_COMPILER_WORKS=ON
                                       ${COMMON_CMAKE_ARGS}
                                       ${RUNTIMES_CMAKE_ARGS}
-                           PASSTHROUGH_PREFIXES LLVM_ENABLE_RUNTIMES
+                           PASSTHROUGH_PREFIXES CMAKE_OSX
+                                                LLVM_ENABLE_RUNTIMES
                                                 LLVM_USE_LINKER
                                                 ${ARG_PREFIXES}
                            EXTRA_TARGETS ${extra_targets}

From e6cdef187ed37468c14c53723c58cbde3e1341db Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Fri, 21 Jan 2022 17:00:03 -0800
Subject: [PATCH 237/946] [XRay][test] Clean up llc RUN lines

---
 .../CodeGen/AArch64/xray-attribute-instrumentation.ll     | 2 +-
 llvm/test/CodeGen/AArch64/xray-omit-function-index.ll     | 4 ++--
 .../AArch64/xray-partial-instrumentation-skip-entry.ll    | 2 +-
 .../AArch64/xray-partial-instrumentation-skip-exit.ll     | 2 +-
 llvm/test/CodeGen/AArch64/xray-tail-call-sled.ll          | 2 +-
 .../CodeGen/ARM/xray-armv6-attribute-instrumentation.ll   | 4 ++--
 .../CodeGen/ARM/xray-armv7-attribute-instrumentation.ll   | 4 ++--
 llvm/test/CodeGen/ARM/xray-tail-call-sled.ll              | 4 ++--
 llvm/test/CodeGen/Hexagon/xray-pred-ret.ll                | 2 +-
 llvm/test/CodeGen/Hexagon/xray.ll                         | 4 ++--
 .../CodeGen/Mips/xray-mips-attribute-instrumentation.ll   | 8 ++++----
 llvm/test/CodeGen/Mips/xray-section-group.ll              | 8 ++++----
 .../CodeGen/PowerPC/xray-attribute-instrumentation.ll     | 5 ++---
 llvm/test/CodeGen/PowerPC/xray-conditional-return.ll      | 2 +-
 llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll       | 2 +-
 llvm/test/CodeGen/PowerPC/xray-tail-call-hidden.ll        | 2 +-
 llvm/test/CodeGen/PowerPC/xray-tail-call-sled.ll          | 2 +-
 llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll   | 7 +++----
 llvm/test/CodeGen/X86/xray-custom-log.ll                  | 4 ++--
 llvm/test/CodeGen/X86/xray-empty-firstmbb.mir             | 2 +-
 llvm/test/CodeGen/X86/xray-ignore-loop-detection.ll       | 4 ++--
 llvm/test/CodeGen/X86/xray-log-args.ll                    | 4 ++--
 llvm/test/CodeGen/X86/xray-loop-detection.ll              | 4 ++--
 llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir     | 2 +-
 .../X86/xray-partial-instrumentation-skip-entry.ll        | 7 +++----
 .../CodeGen/X86/xray-partial-instrumentation-skip-exit.ll | 6 +++---
 llvm/test/CodeGen/X86/xray-section-group.ll               | 6 +++---
 llvm/test/CodeGen/X86/xray-selective-instrumentation.ll   | 2 +-
 llvm/test/CodeGen/X86/xray-tail-call-sled.ll              | 4 ++--
 29 files changed, 54 insertions(+), 57 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/xray-attribute-instrumentation.ll b/llvm/test/CodeGen/AArch64/xray-attribute-instrumentation.ll
index b14463ed32a89..5ca170ac0a2b9 100644
--- a/llvm/test/CodeGen/AArch64/xray-attribute-instrumentation.ll
+++ b/llvm/test/CodeGen/AArch64/xray-attribute-instrumentation.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=asm -o - -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
 
 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
 ; CHECK-LABEL: foo:
diff --git a/llvm/test/CodeGen/AArch64/xray-omit-function-index.ll b/llvm/test/CodeGen/AArch64/xray-omit-function-index.ll
index 385298387b6b4..4b2e6b72c02f2 100644
--- a/llvm/test/CodeGen/AArch64/xray-omit-function-index.ll
+++ b/llvm/test/CodeGen/AArch64/xray-omit-function-index.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=asm -no-xray-index -o - -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -no-xray-index -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
 
 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
 ; CHECK-LABEL: Lxray_sled_0:
@@ -30,4 +30,4 @@ define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always"
 ; CHECK:       .xword .Lxray_sled_1
 ; CHECK-LABEL: Lxray_sleds_end0
 
-; CHECK-NOT: xray_fn_idx
\ No newline at end of file
+; CHECK-NOT: xray_fn_idx
diff --git a/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-entry.ll b/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-entry.ll
index 43e1dfd51740c..a28d780bf4975 100644
--- a/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-entry.ll
+++ b/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-entry.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=asm -o - -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
 
 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" "xray-skip-entry" {
 ; CHECK-NOT: Lxray_sled_0:
diff --git a/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-exit.ll b/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-exit.ll
index 4a74e9d19b4c3..ecfa6ac29f62b 100644
--- a/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-exit.ll
+++ b/llvm/test/CodeGen/AArch64/xray-partial-instrumentation-skip-exit.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=asm -o - -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
 
 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" "xray-skip-exit" {
 ; CHECK-LABEL: Lxray_sled_0:
diff --git a/llvm/test/CodeGen/AArch64/xray-tail-call-sled.ll b/llvm/test/CodeGen/AArch64/xray-tail-call-sled.ll
index b6f7a4edbed5b..b4a541bca3284 100644
--- a/llvm/test/CodeGen/AArch64/xray-tail-call-sled.ll
+++ b/llvm/test/CodeGen/AArch64/xray-tail-call-sled.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=asm -o - -mtriple=aarch64-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s
 
 define i32 @callee() nounwind noinline uwtable "function-instrument"="xray-always" {
 ; CHECK:       .p2align	2
diff --git a/llvm/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll b/llvm/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll
index 3cec7cd699ad9..53bc8d62fd833 100644
--- a/llvm/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll
+++ b/llvm/test/CodeGen/ARM/xray-armv6-attribute-instrumentation.ll
@@ -1,5 +1,5 @@
-; RUN: llc -filetype=asm -o - -mtriple=armv6-unknown-linux-gnu < %s | FileCheck %s
-; RUN: llc -filetype=asm -o - -mtriple=armv6-apple-ios6.0.0  < %s | FileCheck %s
+; RUN: llc -mtriple=armv6-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=armv6-apple-ios6.0.0  < %s | FileCheck %s
 
 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
 ; CHECK-LABEL: Lxray_sled_0:
diff --git a/llvm/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll b/llvm/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll
index e10746d33001f..98dbabcb6aba6 100644
--- a/llvm/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll
+++ b/llvm/test/CodeGen/ARM/xray-armv7-attribute-instrumentation.ll
@@ -1,5 +1,5 @@
-; RUN: llc -filetype=asm -o - -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s
-; RUN: llc -filetype=asm -o - -mtriple=armv7-apple-ios6.0.0  < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios6.0.0  < %s | FileCheck %s
 
 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
 ; CHECK-LABEL: Lxray_sled_0:
diff --git a/llvm/test/CodeGen/ARM/xray-tail-call-sled.ll b/llvm/test/CodeGen/ARM/xray-tail-call-sled.ll
index 2d3af5595f13f..93b9e2f3387a1 100644
--- a/llvm/test/CodeGen/ARM/xray-tail-call-sled.ll
+++ b/llvm/test/CodeGen/ARM/xray-tail-call-sled.ll
@@ -1,5 +1,5 @@
-; RUN: llc -filetype=asm -o - -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s
-; RUN: llc -filetype=asm -o - -mtriple=armv7-apple-ios6.0.0    < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=armv7-apple-ios6.0.0    < %s | FileCheck %s
 
 define i32 @callee() nounwind noinline uwtable "function-instrument"="xray-always" {
 ; CHECK:       .p2align	2
diff --git a/llvm/test/CodeGen/Hexagon/xray-pred-ret.ll b/llvm/test/CodeGen/Hexagon/xray-pred-ret.ll
index c7d5333059253..306a00fc298e0 100644
--- a/llvm/test/CodeGen/Hexagon/xray-pred-ret.ll
+++ b/llvm/test/CodeGen/Hexagon/xray-pred-ret.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=asm -o - -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s
+; RUN: llc -mtriple=hexagon-unknown-linux-musl < %s | FileCheck %s
 
 define void @Foo(i32 signext %a, i32 signext %b) #0 {
 ; CHECK-LABEL: @Foo
diff --git a/llvm/test/CodeGen/Hexagon/xray.ll b/llvm/test/CodeGen/Hexagon/xray.ll
index ba5913a12de39..b9b25b80ef00f 100644
--- a/llvm/test/CodeGen/Hexagon/xray.ll
+++ b/llvm/test/CodeGen/Hexagon/xray.ll
@@ -1,5 +1,5 @@
-; RUN: llc -filetype=asm -o - -mtriple=hexagon-unknown-elf < %s | FileCheck %s
-; RUN: llc -filetype=asm -o - -mtriple=hexagon-unknown-linux-musl  < %s | FileCheck %s
+; RUN: llc -mtriple=hexagon-unknown-elf < %s | FileCheck %s
+; RUN: llc -mtriple=hexagon-unknown-linux-musl  < %s | FileCheck %s
 
 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
 ; CHECK-LABEL: .Lxray_sled_0:
diff --git a/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll b/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll
index 2b28fae57dbf5..ae542146a7997 100644
--- a/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll
+++ b/llvm/test/CodeGen/Mips/xray-mips-attribute-instrumentation.ll
@@ -1,7 +1,7 @@
-; RUN: llc -filetype=asm -o - -mtriple=mips-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS32 %s
-; RUN: llc -filetype=asm -o - -mtriple=mipsel-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS32 %s
-; RUN: llc -filetype=asm -o - -mtriple=mips64-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS64 %s
-; RUN: llc -filetype=asm -o - -mtriple=mips64el-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS64 %s
+; RUN: llc -mtriple=mips-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS32 %s
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS32 %s
+; RUN: llc -mtriple=mips64-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS64 %s
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu < %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-MIPS64 %s
 
 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
 ; CHECK:       .p2align 2
diff --git a/llvm/test/CodeGen/Mips/xray-section-group.ll b/llvm/test/CodeGen/Mips/xray-section-group.ll
index 218516dd189ca..5a208217092dd 100644
--- a/llvm/test/CodeGen/Mips/xray-section-group.ll
+++ b/llvm/test/CodeGen/Mips/xray-section-group.ll
@@ -1,11 +1,11 @@
-; RUN: llc -filetype=asm -o - -mtriple=mips-unknown-linux-gnu -function-sections < %s | FileCheck %s
-; RUN: llc -filetype=asm -o - -mtriple=mipsel-unknown-linux-gnu -function-sections < %s | FileCheck %s
+; RUN: llc -mtriple=mips-unknown-linux-gnu -function-sections < %s | FileCheck %s
+; RUN: llc -mtriple=mipsel-unknown-linux-gnu -function-sections < %s | FileCheck %s
 ; RUN: llc -filetype=obj -o %t -mtriple=mips-unknown-linux-gnu -function-sections < %s
 ; RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix=CHECK-OBJ
 ; RUN: llc -filetype=obj -o %t -mtriple=mipsel-unknown-linux-gnu -function-sections < %s
 ; RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix=CHECK-OBJ
-; RUN: llc -filetype=asm -o - -mtriple=mips64-unknown-linux-gnu -function-sections < %s | FileCheck %s
-; RUN: llc -filetype=asm -o - -mtriple=mips64el-unknown-linux-gnu -function-sections < %s | FileCheck %s
+; RUN: llc -mtriple=mips64-unknown-linux-gnu -function-sections < %s | FileCheck %s
+; RUN: llc -mtriple=mips64el-unknown-linux-gnu -function-sections < %s | FileCheck %s
 ; RUN: llc -filetype=obj -o %t -mtriple=mips64-unknown-linux-gnu -function-sections < %s
 ; RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix=CHECK-OBJ
 ; RUN: llc -filetype=obj -o %t -mtriple=mips64el-unknown-linux-gnu -function-sections < %s
diff --git a/llvm/test/CodeGen/PowerPC/xray-attribute-instrumentation.ll b/llvm/test/CodeGen/PowerPC/xray-attribute-instrumentation.ll
index f73679001158d..fcebe37753127 100644
--- a/llvm/test/CodeGen/PowerPC/xray-attribute-instrumentation.ll
+++ b/llvm/test/CodeGen/PowerPC/xray-attribute-instrumentation.ll
@@ -1,6 +1,5 @@
-; RUN: llc -filetype=asm -o - -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
-; RUN: llc -filetype=asm -o - -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN:    -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s
 
 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
 ; CHECK-LABEL: foo:
diff --git a/llvm/test/CodeGen/PowerPC/xray-conditional-return.ll b/llvm/test/CodeGen/PowerPC/xray-conditional-return.ll
index c0e8c1c3c20ca..5851e92f78938 100644
--- a/llvm/test/CodeGen/PowerPC/xray-conditional-return.ll
+++ b/llvm/test/CodeGen/PowerPC/xray-conditional-return.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=asm -o - -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
 
 define void @Foo(i32 signext %a, i32 signext %b) #0 {
 ; CHECK-LABEL: @Foo
diff --git a/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll b/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll
index 1f176f6f36676..3f15d46290a1e 100644
--- a/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll
+++ b/llvm/test/CodeGen/PowerPC/xray-ret-is-terminator.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -o - -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
 
 define void @ILLBeBack() #0 {
 ; CHECK-LABEL: @ILLBeBack
diff --git a/llvm/test/CodeGen/PowerPC/xray-tail-call-hidden.ll b/llvm/test/CodeGen/PowerPC/xray-tail-call-hidden.ll
index d427dbb4238e1..949b2837fd5c2 100644
--- a/llvm/test/CodeGen/PowerPC/xray-tail-call-hidden.ll
+++ b/llvm/test/CodeGen/PowerPC/xray-tail-call-hidden.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=asm -o - -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
 
 declare hidden i32 @callee() nounwind noinline uwtable "function-instrument"="xray-always"
 
diff --git a/llvm/test/CodeGen/PowerPC/xray-tail-call-sled.ll b/llvm/test/CodeGen/PowerPC/xray-tail-call-sled.ll
index e071e8ae40133..186ec53f88a95 100644
--- a/llvm/test/CodeGen/PowerPC/xray-tail-call-sled.ll
+++ b/llvm/test/CodeGen/PowerPC/xray-tail-call-sled.ll
@@ -1,4 +1,4 @@
-; RUN: llc -filetype=asm -relocation-model=pic -o - -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -relocation-model=pic -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
 
 define i32 @callee() nounwind noinline uwtable "function-instrument"="xray-always" {
 ; CHECK-LABEL: .Ltmp0:
diff --git a/llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll b/llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll
index 8d1d2bc77c646..585c28ea12549 100644
--- a/llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll
+++ b/llvm/test/CodeGen/X86/xray-attribute-instrumentation.ll
@@ -1,7 +1,6 @@
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -filetype=asm -o - \
-; RUN:     -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown    < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin-unknown    < %s | FileCheck %s
 
 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
 ; CHECK:       .p2align 1, 0x90
diff --git a/llvm/test/CodeGen/X86/xray-custom-log.ll b/llvm/test/CodeGen/X86/xray-custom-log.ll
index f0d882ddbac67..1579e4b909de7 100644
--- a/llvm/test/CodeGen/X86/xray-custom-log.ll
+++ b/llvm/test/CodeGen/X86/xray-custom-log.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -mtriple=x86_64 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=x86_64 -relocation-model=pic < %s | FileCheck %s --check-prefix=PIC
+; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64 -relocation-model=pic < %s | FileCheck %s --check-prefix=PIC
 
 ; RUN: llc -mtriple=x86_64 -filetype=obj %s -o %t
 ; RUN: llvm-dwarfdump %t | FileCheck %s --check-prefix=DBG
diff --git a/llvm/test/CodeGen/X86/xray-empty-firstmbb.mir b/llvm/test/CodeGen/X86/xray-empty-firstmbb.mir
index e87c86591ce8d..df5dc7b28ec1a 100644
--- a/llvm/test/CodeGen/X86/xray-empty-firstmbb.mir
+++ b/llvm/test/CodeGen/X86/xray-empty-firstmbb.mir
@@ -1,4 +1,4 @@
-# RUN: llc -run-pass xray-instrumentation -mtriple=x86_64-unknown-linux-gnu -o - %s | FileCheck %s
+# RUN: llc -run-pass=xray-instrumentation -mtriple=x86_64-unknown-linux-gnu -o - %s | FileCheck %s
 #
 # Make sure we can handle empty first basic blocks.
 
diff --git a/llvm/test/CodeGen/X86/xray-ignore-loop-detection.ll b/llvm/test/CodeGen/X86/xray-ignore-loop-detection.ll
index 2450d991e3aa2..29c9bea7509c8 100644
--- a/llvm/test/CodeGen/X86/xray-ignore-loop-detection.ll
+++ b/llvm/test/CodeGen/X86/xray-ignore-loop-detection.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown    < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin-unknown    < %s | FileCheck %s
 
 define i32 @foo(i32 %i) nounwind noinline uwtable "xray-instruction-threshold"="10" "xray-ignore-loops" {
 entry:
diff --git a/llvm/test/CodeGen/X86/xray-log-args.ll b/llvm/test/CodeGen/X86/xray-log-args.ll
index 812e04a483fb7..1aac51f42c75a 100644
--- a/llvm/test/CodeGen/X86/xray-log-args.ll
+++ b/llvm/test/CodeGen/X86/xray-log-args.ll
@@ -1,7 +1,7 @@
 ; When logging arguments is specified, emit the entry sled accordingly.
 
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s
 
 define i32 @callee(i32 %arg) nounwind noinline uwtable "function-instrument"="xray-always" "xray-log-args"="1" {
   ret i32 %arg
diff --git a/llvm/test/CodeGen/X86/xray-loop-detection.ll b/llvm/test/CodeGen/X86/xray-loop-detection.ll
index 4acb22983be3d..81450da7408ef 100644
--- a/llvm/test/CodeGen/X86/xray-loop-detection.ll
+++ b/llvm/test/CodeGen/X86/xray-loop-detection.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown    < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin-unknown    < %s | FileCheck %s
 
 define i32 @foo(i32 %i) nounwind noinline uwtable "xray-instruction-threshold"="10" {
 entry:
diff --git a/llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir b/llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir
index 69e8c6bfda4be..60a33b95f1412 100644
--- a/llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir
+++ b/llvm/test/CodeGen/X86/xray-multiplerets-in-blocks.mir
@@ -1,4 +1,4 @@
-# RUN: llc -verify-machineinstrs -run-pass xray-instrumentation -mtriple=x86_64-unknown-linux-gnu -o - %s | FileCheck %s
+# RUN: llc -run-pass=xray-instrumentation -mtriple=x86_64-unknown-linux-gnu -o - %s | FileCheck %s
 #
 # Make sure we can handle multiple ret instructions in a single basic block for
 # XRay.
diff --git a/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-entry.ll b/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-entry.ll
index e0beb2e9082d1..83c254a0d8877 100644
--- a/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-entry.ll
+++ b/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-entry.ll
@@ -1,7 +1,6 @@
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -filetype=asm -o - \
-; RUN:     -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown    < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin-unknown    < %s | FileCheck %s
 
 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" "xray-skip-entry" {
 ; CHECK-NOT: Lxray_sled_0:
diff --git a/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-exit.ll b/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-exit.ll
index 9c370e541219d..a7afad5980196 100644
--- a/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-exit.ll
+++ b/llvm/test/CodeGen/X86/xray-partial-instrumentation-skip-exit.ll
@@ -1,7 +1,7 @@
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -filetype=asm -o - \
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc \
 ; RUN:     -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown    < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin-unknown    < %s | FileCheck %s
 
 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" "xray-skip-exit" {
 ; CHECK:       .p2align 1, 0x90
diff --git a/llvm/test/CodeGen/X86/xray-section-group.ll b/llvm/test/CodeGen/X86/xray-section-group.ll
index 9bfe82d400c29..c05520adf8997 100644
--- a/llvm/test/CodeGen/X86/xray-section-group.ll
+++ b/llvm/test/CodeGen/X86/xray-section-group.ll
@@ -1,6 +1,6 @@
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu -function-sections < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -filetype=obj -o %t -mtriple=x86_64-unknown-linux-gnu -function-sections < %s
-; RUN: llvm-objdump --triple=x86_64-unknown-linux-gnu --disassemble-all %t | FileCheck %s --check-prefix=CHECK-OBJ
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -function-sections < %s | FileCheck %s
+; RUN: llc -filetype=obj -o %t -mtriple=x86_64-unknown-linux-gnu -function-sections < %s
+; RUN: llvm-objdump --disassemble-all %t | FileCheck %s --check-prefix=CHECK-OBJ
 
 define i32 @foo() nounwind noinline uwtable "function-instrument"="xray-always" {
 ; CHECK: .section .text.foo,"ax",@progbits
diff --git a/llvm/test/CodeGen/X86/xray-selective-instrumentation.ll b/llvm/test/CodeGen/X86/xray-selective-instrumentation.ll
index 8e3e0be412bcb..7bf47ea2894c0 100644
--- a/llvm/test/CodeGen/X86/xray-selective-instrumentation.ll
+++ b/llvm/test/CodeGen/X86/xray-selective-instrumentation.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mcpu=nehalem | FileCheck %s
+; RUN: llc < %s -mcpu=nehalem | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin8"
diff --git a/llvm/test/CodeGen/X86/xray-tail-call-sled.ll b/llvm/test/CodeGen/X86/xray-tail-call-sled.ll
index d109cf1c3dea0..b89f833abb3b8 100644
--- a/llvm/test/CodeGen/X86/xray-tail-call-sled.ll
+++ b/llvm/test/CodeGen/X86/xray-tail-call-sled.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -filetype=asm -o - -mtriple=x86_64-darwin-unknown    < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-darwin-unknown    < %s | FileCheck %s
 
 define dso_local i32 @callee() nounwind noinline uwtable "function-instrument"="xray-always" {
 ; CHECK:       .p2align 1, 0x90

From 04eb93b1d559b40ffd6e8f3146cfb2ade6bb49d0 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler@nvidia.com>
Date: Wed, 19 Jan 2022 09:16:07 -0800
Subject: [PATCH 238/946] [flang] Fix repeated "DT" editing

User-defined derived type editing in formatted I/O wasn't
working with repeat counts; e.g., "2DT(10)".  The solution required
some code to be moved from GetNextDataEdit() to CueUpNextDataEdit() so
that a stack entry for a nonparenthesized repeated data edit
descriptor would work correctly -- all other data edit descriptors
are capable of dealing with repetition in their callees, so the bug
hadn't been exposed before.

Debugging this problem led to some improvements in error messages
for bad format strings, and those changes have been retained; also,
a dead member function was discovered and expunged.

Differential Revision: https://reviews.llvm.org/D117904
---
 flang/runtime/descriptor-io.cpp       |   2 +-
 flang/runtime/format-implementation.h | 135 ++++++++------------------
 flang/runtime/format.h                |  14 ++-
 flang/runtime/io-stmt.h               |   5 +-
 4 files changed, 55 insertions(+), 101 deletions(-)

diff --git a/flang/runtime/descriptor-io.cpp b/flang/runtime/descriptor-io.cpp
index 20828a6d9a84e..d34ac68c8a533 100644
--- a/flang/runtime/descriptor-io.cpp
+++ b/flang/runtime/descriptor-io.cpp
@@ -19,7 +19,7 @@ std::optional<bool> DefinedFormattedIo(IoStatementState &io,
           peek->descriptor == DataEdit::ListDirected)) {
     // User-defined derived type formatting
     IoErrorHandler &handler{io.GetIoErrorHandler()};
-    DataEdit edit{*io.GetNextDataEdit()}; // consume it this time
+    DataEdit edit{*io.GetNextDataEdit(1)}; // now consume it; no repeats
     RUNTIME_CHECK(handler, edit.descriptor == peek->descriptor);
     char ioType[2 + edit.maxIoTypeChars];
     auto ioTypeLen{std::size_t{2} /*"DT"*/ + edit.ioTypeChars};
diff --git a/flang/runtime/format-implementation.h b/flang/runtime/format-implementation.h
index a32bb8e928fd7..b9c1b8427afe3 100644
--- a/flang/runtime/format-implementation.h
+++ b/flang/runtime/format-implementation.h
@@ -33,59 +33,6 @@ FormatControl<CONTEXT>::FormatControl(const Terminator &terminator,
   stack_[0].remaining = Iteration::unlimited; // 13.4(8)
 }
 
-template <typename CONTEXT>
-int FormatControl<CONTEXT>::GetMaxParenthesisNesting(
-    IoErrorHandler &handler, const CharType *format, std::size_t formatLength) {
-  int maxNesting{0};
-  int nesting{0};
-  const CharType *end{format + formatLength};
-  std::optional<CharType> quote;
-  int repeat{0};
-  for (const CharType *p{format}; p < end; ++p) {
-    if (quote) {
-      if (*p == *quote) {
-        quote.reset();
-      }
-    } else if (*p >= '0' && *p <= '9') {
-      repeat = 10 * repeat + *p - '0';
-    } else if (*p != ' ') {
-      switch (*p) {
-      case '\'':
-      case '"':
-        quote = *p;
-        break;
-      case 'h':
-      case 'H': // 9HHOLLERITH
-        p += repeat;
-        if (p >= end) {
-          handler.SignalError(IostatErrorInFormat,
-              "Hollerith (%dH) too long in FORMAT", repeat);
-          return maxNesting;
-        }
-        break;
-      case ' ':
-        break;
-      case '(':
-        ++nesting;
-        maxNesting = std::max(nesting, maxNesting);
-        break;
-      case ')':
-        nesting = std::max(nesting - 1, 0);
-        break;
-      }
-      repeat = 0;
-    }
-  }
-  if (quote) {
-    handler.SignalError(
-        IostatErrorInFormat, "Unbalanced quotation marks in FORMAT string");
-  } else if (nesting) {
-    handler.SignalError(
-        IostatErrorInFormat, "Unbalanced parentheses in FORMAT string");
-  }
-  return maxNesting;
-}
-
 template <typename CONTEXT>
 int FormatControl<CONTEXT>::GetIntField(
     IoErrorHandler &handler, CharType firstCh) {
@@ -98,7 +45,11 @@ int FormatControl<CONTEXT>::GetIntField(
   int result{0};
   bool negate{ch == '-'};
   if (negate || ch == '+') {
-    firstCh = '\0';
+    if (firstCh) {
+      firstCh = '\0';
+    } else {
+      ++offset_;
+    }
     ch = PeekNext();
   }
   while (ch >= '0' && ch <= '9') {
@@ -222,6 +173,15 @@ static void HandleControl(CONTEXT &context, char ch, char next, int n) {
 template <typename CONTEXT>
 int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) {
   int unlimitedLoopCheck{-1};
+  // Do repetitions remain on an unparenthesized data edit?
+  while (height_ > 1 && format_[stack_[height_ - 1].start] != '(') {
+    offset_ = stack_[height_ - 1].start;
+    int repeat{stack_[height_ - 1].remaining};
+    --height_;
+    if (repeat > 0) {
+      return repeat;
+    }
+  }
   while (true) {
     std::optional<int> repeat;
     bool unlimited{false};
@@ -242,16 +202,18 @@ int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) {
       unlimited = true;
       ch = GetNextChar(context);
       if (ch != '(') {
-        context.SignalError(IostatErrorInFormat,
-            "Invalid FORMAT: '*' may appear only before '('");
+        ReportBadFormat(context,
+            "Invalid FORMAT: '*' may appear only before '('",
+            maybeReversionPoint);
         return 0;
       }
     }
     ch = Capitalize(ch);
     if (ch == '(') {
       if (height_ >= maxHeight_) {
-        context.SignalError(IostatErrorInFormat,
-            "FORMAT stack overflow: too many nested parentheses");
+        ReportBadFormat(context,
+            "FORMAT stack overflow: too many nested parentheses",
+            maybeReversionPoint);
         return 0;
       }
       stack_[height_].start = offset_ - 1; // the '('
@@ -271,11 +233,11 @@ int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) {
         // Subtle point (F'2018 13.4 para 9): tha last parenthesized group
         // at height 1 becomes the restart point after control reaches the
         // end of the format, including its repeat count.
-        stack_[0].start = maybeReversionPoint - 1;
+        stack_[0].start = maybeReversionPoint;
       }
       ++height_;
     } else if (height_ == 0) {
-      context.SignalError(IostatErrorInFormat, "FORMAT lacks initial '('");
+      ReportBadFormat(context, "FORMAT lacks initial '('", maybeReversionPoint);
       return 0;
     } else if (ch == ')') {
       if (height_ == 1) {
@@ -284,12 +246,16 @@ int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) {
         }
         context.AdvanceRecord(); // implied / before rightmost )
       }
-      auto restart{stack_[height_ - 1].start + 1};
+      auto restart{stack_[height_ - 1].start};
+      if (format_[restart] == '(') {
+        ++restart;
+      }
       if (stack_[height_ - 1].remaining == Iteration::unlimited) {
         offset_ = restart;
         if (offset_ == unlimitedLoopCheck) {
-          context.SignalError(IostatErrorInFormat,
-              "Unlimited repetition in FORMAT lacks data edit descriptors");
+          ReportBadFormat(context,
+              "Unlimited repetition in FORMAT lacks data edit descriptors",
+              restart);
         }
       } else if (stack_[height_ - 1].remaining-- > 0) {
         offset_ = restart;
@@ -304,8 +270,9 @@ int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) {
         ++offset_;
       }
       if (offset_ >= formatLength_) {
-        context.SignalError(IostatErrorInFormat,
-            "FORMAT missing closing quote on character literal");
+        ReportBadFormat(context,
+            "FORMAT missing closing quote on character literal",
+            maybeReversionPoint);
         return 0;
       }
       ++offset_;
@@ -322,8 +289,8 @@ int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) {
     } else if (ch == 'H') {
       // 9HHOLLERITH
       if (!repeat || *repeat < 1 || offset_ + *repeat > formatLength_) {
-        context.SignalError(
-            IostatErrorInFormat, "Invalid width on Hollerith in FORMAT");
+        ReportBadFormat(context, "Invalid width on Hollerith in FORMAT",
+            maybeReversionPoint);
         return 0;
       }
       context.Emit(format_ + offset_, static_cast<std::size_t>(*repeat));
@@ -364,8 +331,8 @@ int FormatControl<CONTEXT>::CueUpNextDataEdit(Context &context, bool stop) {
       // TODO: any other raw characters?
       context.Emit(format_ + offset_ - 1, 1);
     } else {
-      context.SignalError(IostatErrorInFormat,
-          "Invalid character '%c' in FORMAT", static_cast<char>(ch));
+      ReportBadFormat(
+          context, "Invalid character in FORMAT", maybeReversionPoint);
       return 0;
     }
   }
@@ -410,11 +377,9 @@ DataEdit FormatControl<CONTEXT>::GetNextDataEdit(
         }
       }
       if (!ok) {
-        context.SignalError(
-            IostatErrorInFormat, "Unclosed DT'iotype' in FORMAT");
+        ReportBadFormat(context, "Unclosed DT'iotype' in FORMAT", start);
       } else if (tooLong) {
-        context.SignalError(
-            IostatErrorInFormat, "Excessive DT'iotype' in FORMAT");
+        ReportBadFormat(context, "Excessive DT'iotype' in FORMAT", start);
       }
     }
     if (PeekNext() == '(') {
@@ -434,11 +399,9 @@ DataEdit FormatControl<CONTEXT>::GetNextDataEdit(
         }
       }
       if (!ok) {
-        context.SignalError(
-            IostatErrorInFormat, "Unclosed DT(v_list) in FORMAT");
+        ReportBadFormat(context, "Unclosed DT(v_list) in FORMAT", start);
       } else if (tooLong) {
-        context.SignalError(
-            IostatErrorInFormat, "Excessive DT(v_list) in FORMAT");
+        ReportBadFormat(context, "Excessive DT(v_list) in FORMAT", start);
       }
     }
   }
@@ -460,27 +423,13 @@ DataEdit FormatControl<CONTEXT>::GetNextDataEdit(
     }
   }
   edit.modes = context.mutableModes();
-
   // Handle repeated nonparenthesized edit descriptors
+  edit.repeat = std::min(repeat, maxRepeat); // 0 if maxRepeat==0
   if (repeat > maxRepeat) {
     stack_[height_].start = start; // after repeat count
-    stack_[height_].remaining = repeat; // full count
+    stack_[height_].remaining = repeat - edit.repeat;
     ++height_;
   }
-  edit.repeat = std::min(1, maxRepeat); // 0 if maxRepeat==0
-  if (height_ > 1) { // Subtle: stack_[0].start doesn't necessarily point to '('
-    int start{stack_[height_ - 1].start};
-    if (format_[start] != '(') {
-      if (stack_[height_ - 1].remaining > maxRepeat) {
-        edit.repeat = maxRepeat;
-        stack_[height_ - 1].remaining -= maxRepeat;
-        offset_ = start; // repeat same edit descriptor next time
-      } else {
-        edit.repeat = stack_[height_ - 1].remaining;
-        --height_;
-      }
-    }
-  }
   return edit;
 }
 
diff --git a/flang/runtime/format.h b/flang/runtime/format.h
index 8dccaab969a6a..98c1136b0f519 100644
--- a/flang/runtime/format.h
+++ b/flang/runtime/format.h
@@ -86,11 +86,6 @@ template <typename CONTEXT> class FormatControl {
   FormatControl(const Terminator &, const CharType *format,
       std::size_t formatLength, int maxHeight = maxMaxHeight);
 
-  // Determines the max parenthesis nesting level by scanning and validating
-  // the FORMAT string.
-  static int GetMaxParenthesisNesting(
-      IoErrorHandler &, const CharType *format, std::size_t formatLength);
-
   // For attempting to allocate in a user-supplied stack area
   static std::size_t GetNeededSize(int maxHeight) {
     return sizeof(FormatControl) -
@@ -146,6 +141,15 @@ template <typename CONTEXT> class FormatControl {
     return ch >= 'a' && ch <= 'z' ? ch + 'A' - 'a' : ch;
   }
 
+  void ReportBadFormat(Context &context, const char *msg, int offset) const {
+    if constexpr (std::is_same_v<CharType, char>) {
+      context.SignalError(IostatErrorInFormat,
+          "%s; at offset %d in format '%s'", msg, offset, format_);
+    } else {
+      context.SignalError(IostatErrorInFormat, "%s; at offset %d", msg, offset);
+    }
+  }
+
   // Data members are arranged and typed so as to reduce size.
   // This structure may be allocated in stack space loaned by the
   // user program for internal I/O.
diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h
index 8327326c7f9ef..baf038dbdd245 100644
--- a/flang/runtime/io-stmt.h
+++ b/flang/runtime/io-stmt.h
@@ -87,7 +87,7 @@ class IoStatementState {
   void BackspaceRecord();
   void HandleRelativePosition(std::int64_t);
   void HandleAbsolutePosition(std::int64_t); // for r* in list I/O
-  std::optional<DataEdit> GetNextDataEdit(int = 1);
+  std::optional<DataEdit> GetNextDataEdit(int maxRepeat = 1);
   ExternalFileUnit *GetExternalFileUnit() const; // null if internal unit
   bool BeginReadingRecord();
   void FinishReadingRecord();
@@ -287,7 +287,8 @@ struct IoStatementBase : public IoErrorHandler {
   void BackspaceRecord();
   void HandleRelativePosition(std::int64_t);
   void HandleAbsolutePosition(std::int64_t);
-  std::optional<DataEdit> GetNextDataEdit(IoStatementState &, int = 1);
+  std::optional<DataEdit> GetNextDataEdit(
+      IoStatementState &, int maxRepeat = 1);
   ExternalFileUnit *GetExternalFileUnit() const;
   bool BeginReadingRecord();
   void FinishReadingRecord();

From db07e082abafb1494ba674047645e6113316673c Mon Sep 17 00:00:00 2001
From: Julian Lettner <julian.lettner@apple.com>
Date: Fri, 21 Jan 2022 16:58:06 -0800
Subject: [PATCH 239/946] [TSan] Omit vfork interceptor iOS simulator runtime

`_vfork` moved from libsystem_kernel.dylib to libsystem_c.dylib as part
of the below changes.  The iOS simulator does not actually have
libsystem_kernel.dylib of its own, it only has the host Mac's.  The
umbrella-nature of Libsystem makes this movement transparent to
everyone; except the simulator! So when we "back deploy", i.e., use the
current version of TSan with an older simulator runtime then this symbol
is now missing, when we run on the latest OS (but an older simulator
runtime).

Note we use `SANITIZER_IOS` because usage of vfork is forbidden on iOS
and the API is completely unavailable on watchOS and tvOS, even if this
problem is specific to the iOS simulator.

Caused by:
rdar://74818691 (Shim vfork() to fork syscall on iOS)
rdar://76762076 (Shim vfork() to fork syscall on macOS)

Radar-Id: rdar://8634734
---
 compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index c4f43d8171abb..056bd15e0907a 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -2204,6 +2204,7 @@ void atfork_child() {
   FdOnFork(thr, pc);
 }
 
+#if !SANITIZER_IOS
 TSAN_INTERCEPTOR(int, vfork, int fake) {
   // Some programs (e.g. openjdk) call close for all file descriptors
   // in the child process. Under tsan it leads to false positives, because
@@ -2220,6 +2221,7 @@ TSAN_INTERCEPTOR(int, vfork, int fake) {
   // Instead we simply turn vfork into fork.
   return WRAP(fork)(fake);
 }
+#endif
 
 #if SANITIZER_LINUX
 TSAN_INTERCEPTOR(int, clone, int (*fn)(void *), void *stack, int flags,

From 3726626a26ec7bfccfd526e02f89c1ac5fe3520a Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler@nvidia.com>
Date: Tue, 18 Jan 2022 15:17:15 -0800
Subject: [PATCH 240/946] [flang] Fix crash from USE-associated defined I/O
 subprograms

User-defined derived type I/O implementation subroutines and
generic interfaces may be USE-associated, but the code that builds
the type description table wasn't allowing for that possibility.
Add a call to GetUltimate() to cope.

Differential Revision: https://reviews.llvm.org/D117902
---
 flang/lib/Semantics/runtime-type-info.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flang/lib/Semantics/runtime-type-info.cpp b/flang/lib/Semantics/runtime-type-info.cpp
index 4c53df09ee637..250223bfafab1 100644
--- a/flang/lib/Semantics/runtime-type-info.cpp
+++ b/flang/lib/Semantics/runtime-type-info.cpp
@@ -1065,7 +1065,7 @@ void RuntimeTableBuilder::IncorporateDefinedIoGenericInterfaces(
     GenericKind::DefinedIo definedIo, const Scope *scope) {
   for (; !scope->IsGlobal(); scope = &scope->parent()) {
     if (auto asst{scope->find(name)}; asst != scope->end()) {
-      const Symbol &generic{*asst->second};
+      const Symbol &generic{asst->second->GetUltimate()};
       const auto &genericDetails{generic.get<GenericDetails>()};
       CHECK(std::holds_alternative<GenericKind::DefinedIo>(
           genericDetails.kind().u));

From b95150418fb6e2d22a0bd84abcdc1f3cc7e5a0bf Mon Sep 17 00:00:00 2001
From: Dave Lee <davelee.com@gmail.com>
Date: Thu, 13 Jan 2022 16:02:45 -0800
Subject: [PATCH 241/946] [lldb] Allow aliases to aliases of raw input commands

Allow users to create aliases for aliases to raw input commands. That probably
sounds convoluted, so here's an example:

```
command alias some-setup env SOMEVAR=SOMEVALUE
```

This an alias based on `env`, which itself is an alias for `_regex-env`.
`_regex-env` is a `command regex` command, which takes raw input.

The above `some-setup` alias fails with:

```
error: Unable to create requested alias.
```

This change allows such aliases to be created. lldb already supports aliases to
aliases for parsed commands.

Differential Revision: https://reviews.llvm.org/D117259
---
 lldb/source/Commands/CommandObjectCommands.cpp             | 5 +++--
 .../API/commands/command/nested_alias/TestNestedAlias.py   | 7 +++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp
index 1ec54cf7ededa..defa21af7c170 100644
--- a/lldb/source/Commands/CommandObjectCommands.cpp
+++ b/lldb/source/Commands/CommandObjectCommands.cpp
@@ -485,8 +485,9 @@ rather than using a positional placeholder:"
     OptionArgVectorSP option_arg_vector_sp =
         OptionArgVectorSP(new OptionArgVector);
 
-    if (CommandObjectSP cmd_obj_sp =
-            m_interpreter.GetCommandSPExact(cmd_obj.GetCommandName())) {
+    const bool include_aliases = true;
+    if (CommandObjectSP cmd_obj_sp = m_interpreter.GetCommandSPExact(
+            cmd_obj.GetCommandName(), include_aliases)) {
       if (m_interpreter.AliasExists(alias_command) ||
           m_interpreter.UserCommandExists(alias_command)) {
         result.AppendWarningWithFormat(
diff --git a/lldb/test/API/commands/command/nested_alias/TestNestedAlias.py b/lldb/test/API/commands/command/nested_alias/TestNestedAlias.py
index d4fc99492a698..bbe9c14f69f6d 100644
--- a/lldb/test/API/commands/command/nested_alias/TestNestedAlias.py
+++ b/lldb/test/API/commands/command/nested_alias/TestNestedAlias.py
@@ -46,6 +46,8 @@ def cleanup():
             self.runCmd('command unalias rd', check=False)
             self.runCmd('command unalias fo', check=False)
             self.runCmd('command unalias foself', check=False)
+            self.runCmd('command unalias add_two', check=False)
+            self.runCmd('command unalias two', check=False)
 
         # Execute the cleanup function during test case tear down.
         self.addTearDownHook(cleanup)
@@ -96,3 +98,8 @@ def cleanup():
                 'Show variables for the current',
                 'stack frame.'],
             matching=True)
+
+        # Check that aliases can be created for raw input commands.
+        self.expect('command alias two expr -- 2')
+        self.expect('command alias add_two two +')
+        self.expect('add_two 3', patterns=[' = 5$'])

From e796eaf2af65d5b7f09d7024a545ab0c61b832ac Mon Sep 17 00:00:00 2001
From: Alex Fan <alex.fan.q@gmail.com>
Date: Fri, 21 Jan 2022 19:51:09 +0800
Subject: [PATCH 242/946] [RISCV][RFC] add MC support for zbkc subextension

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117874
---
 llvm/lib/Support/RISCVISAInfo.cpp          |  1 +
 llvm/lib/Target/RISCV/RISCV.td             | 15 ++++++++++++++
 llvm/lib/Target/RISCV/RISCVInstrInfoZb.td  |  9 ++++++---
 llvm/lib/Target/RISCV/RISCVSchedRocket.td  |  2 +-
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td |  2 +-
 llvm/lib/Target/RISCV/RISCVSubtarget.h     |  2 ++
 llvm/test/CodeGen/RISCV/attributes.ll      |  4 ++++
 llvm/test/MC/RISCV/attribute-arch.s        |  3 +++
 llvm/test/MC/RISCV/rv32zbkc-invalid.s      |  9 +++++++++
 llvm/test/MC/RISCV/rv32zbkc-valid.s        | 23 ++++++++++++++++++++++
 10 files changed, 65 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/MC/RISCV/rv32zbkc-invalid.s
 create mode 100644 llvm/test/MC/RISCV/rv32zbkc-valid.s

diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 80fae5510326d..c42f3604d67ff 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -57,6 +57,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
     {"zbs", RISCVExtensionVersion{1, 0}},
 
     {"zbkb", RISCVExtensionVersion{1, 0}},
+    {"zbkc", RISCVExtensionVersion{1, 0}},
 };
 
 static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 378720bc6b26d..2b6ea4067a8ea 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -163,6 +163,21 @@ def HasStdExtZbbOrZbpOrZbkb
                                    "'Zbp' (Permutation 'B' Instructions) or "
                                    "'Zbkb' (Bitmanip instructions for Cryptography)">;
 
+// The Carry-less multiply subextension for cryptography is a subset of basic carry-less multiply subextension. The former should be enabled if the latter is enabled.
+def FeatureStdExtZbkc
+    : SubtargetFeature<"zbkc", "HasStdExtZbkc", "true",
+                       "'Zbkc' (Carry-less multiply instructions for Cryptography)">;
+def HasStdExtZbkc
+    : Predicate<"Subtarget->hasStdExtZbkc()">,
+                             AssemblerPredicate<(all_of FeatureStdExtZbkc),
+                             "'Zbkc' (Carry-less multiply instructions for Cryptography)">;
+
+def HasStdExtZbcOrZbkc
+    : Predicate<"Subtarget->hasStdExtZbc() || Subtarget->hasStdExtZbkc()">,
+                AssemblerPredicate<(any_of FeatureStdExtZbc, FeatureStdExtZbkc),
+                                   "'Zbc' (Carry-Less 'B' Instructions) or "
+                                   "'Zbkc' (Carry-less multiply instructions for Cryptography)">;
+
 def FeatureNoRVCHints
     : SubtargetFeature<"no-rvc-hints", "EnableRVCHintInstrs", "false",
                        "Disable RVC Hint Instructions.">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index a8fffb2d7d792..560ebb4eb08c6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -440,13 +440,16 @@ def CRC32CD : RVBUnary<0b0110000, 0b11011, 0b001, OPC_OP_IMM, "crc32c.d">,
               Sched<[]>;
 
 let Predicates = [HasStdExtZbc] in {
-def CLMUL  : ALU_rr<0b0000101, 0b001, "clmul">,
-             Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
 def CLMULR : ALU_rr<0b0000101, 0b010, "clmulr">,
              Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
+} // Predicates = [HasStdExtZbc]
+
+let Predicates = [HasStdExtZbcOrZbkc] in {
+def CLMUL  : ALU_rr<0b0000101, 0b001, "clmul">,
+             Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
 def CLMULH : ALU_rr<0b0000101, 0b011, "clmulh">,
              Sched<[WriteCLMUL, ReadCLMUL, ReadCLMUL]>;
-} // Predicates = [HasStdExtZbc]
+} // Predicates = [HasStdExtZbcOrZbkc]
 
 let Predicates = [HasStdExtZbb] in {
 def MIN  : ALU_rr<0b0000101, 0b100, "min">,
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index 6cc24fa17c84a..783e65c1aa185 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -17,7 +17,7 @@ def RocketModel : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = false;
-  let UnsupportedFeatures = [HasStdExtZbkb, HasVInstructions, HasVInstructionsI64];
+  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasVInstructions, HasVInstructionsI64];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 2da8c14088890..d164514ce70f0 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -15,7 +15,7 @@ def SiFive7Model : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = 0;
-  let UnsupportedFeatures = [HasStdExtZbkb, HasVInstructions];
+  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasVInstructions];
 }
 
 // The SiFive7 microarchitecture has two pipelines: A and B.
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 141e7114b5883..bacb8fae37941 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -84,6 +84,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool HasStdExtZfhmin = false;
   bool HasStdExtZfh = false;
   bool HasStdExtZbkb = false;
+  bool HasStdExtZbkc = false;
   bool HasRV64 = false;
   bool IsRV32E = false;
   bool EnableLinkerRelax = false;
@@ -158,6 +159,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool hasStdExtZfhmin() const { return HasStdExtZfhmin; }
   bool hasStdExtZfh() const { return HasStdExtZfh; }
   bool hasStdExtZbkb() const { return HasStdExtZbkb; }
+  bool hasStdExtZbkc() const { return HasStdExtZbkc; }
   bool is64Bit() const { return HasRV64; }
   bool isRV32E() const { return IsRV32E; }
   bool enableLinkerRelax() const { return EnableLinkerRelax; }
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index fa02d72797350..19ba02d531567 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -20,6 +20,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV32V %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV32COMBINED %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV32ZBKB %s
+; RUN: llc -mtriple=riscv32 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV32ZBKC %s
 ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefix=RV64M %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefix=RV64A %s
 ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefix=RV64F %s
@@ -40,6 +41,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV64V %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV64COMBINED %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV64ZBKB %s
+; RUN: llc -mtriple=riscv64 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV64ZBKC %s
 
 
 ; RV32M: .attribute 5, "rv32i2p0_m2p0"
@@ -62,6 +64,7 @@
 ; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 ; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 ; RV32ZBKB: .attribute 5, "rv32i2p0_zbkb1p0"
+; RV32ZBKC: .attribute 5, "rv32i2p0_zbkc1p0"
 
 ; RV64M: .attribute 5, "rv64i2p0_m2p0"
 ; RV64A: .attribute 5, "rv64i2p0_a2p0"
@@ -83,6 +86,7 @@
 ; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 ; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 ; RV64ZBKB: .attribute 5, "rv64i2p0_zbkb1p0"
+; RV64ZBKC: .attribute 5, "rv64i2p0_zbkc1p0"
 
 define i32 @addi(i32 %a) {
   %1 = add i32 %a, 1
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index d95e99348e434..65d4008a5869e 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -130,3 +130,6 @@
 
 .attribute arch, "rv32i_zbkb1p0"
 # CHECK: attribute      5, "rv32i2p0_zbkb1p0"
+
+.attribute arch, "rv32i_zbkc1p0"
+# CHECK: attribute      5, "rv32i2p0_zbkc1p0"
diff --git a/llvm/test/MC/RISCV/rv32zbkc-invalid.s b/llvm/test/MC/RISCV/rv32zbkc-invalid.s
new file mode 100644
index 0000000000000..dba625fe20c2a
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zbkc-invalid.s
@@ -0,0 +1,9 @@
+# RUN: not llvm-mc -triple riscv32 -mattr=+zbkc < %s 2>&1 | FileCheck %s
+
+# Too few operands
+clmul t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
+# Too few operands
+clmulh t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
+
+# Undefined zbc instruction in zbkc
+clmulr t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zbc' (Carry-Less 'B' Instructions)
diff --git a/llvm/test/MC/RISCV/rv32zbkc-valid.s b/llvm/test/MC/RISCV/rv32zbkc-valid.s
new file mode 100644
index 0000000000000..10a62d9d2ce2d
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zbkc-valid.s
@@ -0,0 +1,23 @@
+# With Bitmanip carry-less multiply extension:
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+zbkc -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zbkc -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zbkc < %s \
+# RUN:     | llvm-objdump --mattr=+zbkc -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zbkc < %s \
+# RUN:     | llvm-objdump --mattr=+zbkc -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+zbkc,+zbc -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zbkc,+zbc < %s \
+# RUN:     | llvm-objdump --mattr=+zbkc,+zbc -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: clmul t0, t1, t2
+# CHECK-ASM: encoding: [0xb3,0x12,0x73,0x0a]
+clmul t0, t1, t2
+# CHECK-ASM-AND-OBJ: clmulh t0, t1, t2
+# CHECK-ASM: encoding: [0xb3,0x32,0x73,0x0a]
+clmulh t0, t1, t2

From b1856009fbc1fd594e283460a781f2b34fbd7d55 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler@nvidia.com>
Date: Tue, 18 Jan 2022 10:40:10 -0800
Subject: [PATCH 243/946] [flang] Allow INQUIRE() on a child unit in
 user-defined I/O procedure

A procedure that implements a user-defined derived type I/O operation
is allowed to perform an INQUIRE statement on its unit.

Differential Revision: https://reviews.llvm.org/D117905https://reviews.llvm.org/D117905
---
 flang/runtime/io-api.cpp | 49 ++++++++++++++++++++++------------------
 flang/runtime/unit.h     |  2 +-
 2 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp
index 03c878d1a8506..64c798d7ff8ae 100644
--- a/flang/runtime/io-api.cpp
+++ b/flang/runtime/io-api.cpp
@@ -156,6 +156,13 @@ Cookie BeginExternalListIO(const char *what, int unitNumber,
   }
   ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous(
       unitNumber, DIR, false /*!unformatted*/, terminator)};
+  if (!unit.isUnformatted.has_value()) {
+    unit.isUnformatted = false;
+  }
+  if (*unit.isUnformatted) {
+    terminator.Crash("%s attempted on unformatted file", what);
+    return nullptr;
+  }
   if (ChildIo * child{unit.GetChildIo()}) {
     return child->CheckFormattingAndDirection(terminator, what, false, DIR)
         ? &child->BeginIoStatement<ChildListIoStatementState<DIR>>(
@@ -166,13 +173,6 @@ Cookie BeginExternalListIO(const char *what, int unitNumber,
       terminator.Crash("%s attempted on direct access file", what);
       return nullptr;
     }
-    if (!unit.isUnformatted.has_value()) {
-      unit.isUnformatted = false;
-    }
-    if (*unit.isUnformatted) {
-      terminator.Crash("%s attempted on unformatted file", what);
-      return nullptr;
-    }
     IoErrorHandler handler{terminator};
     unit.SetDirection(DIR, handler);
     IoStatementState &io{unit.BeginIoStatement<STATE<DIR>>(
@@ -202,6 +202,13 @@ Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength,
   }
   ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous(
       unitNumber, DIR, false /*!unformatted*/, terminator)};
+  if (!unit.isUnformatted.has_value()) {
+    unit.isUnformatted = false;
+  }
+  if (*unit.isUnformatted) {
+    terminator.Crash("Formatted I/O attempted on unformatted file");
+    return nullptr;
+  }
   if (ChildIo * child{unit.GetChildIo()}) {
     return child->CheckFormattingAndDirection(terminator,
                DIR == Direction::Output ? "formatted output"
@@ -211,13 +218,6 @@ Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength,
               *child, sourceFile, sourceLine)
         : nullptr;
   } else {
-    if (!unit.isUnformatted.has_value()) {
-      unit.isUnformatted = false;
-    }
-    if (*unit.isUnformatted) {
-      terminator.Crash("Formatted I/O attempted on unformatted file");
-      return nullptr;
-    }
     IoErrorHandler handler{terminator};
     unit.SetDirection(DIR, handler);
     IoStatementState &io{
@@ -247,6 +247,12 @@ Cookie BeginUnformattedIO(
   Terminator terminator{sourceFile, sourceLine};
   ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous(
       unitNumber, DIR, true /*unformatted*/, terminator)};
+  if (!unit.isUnformatted.has_value()) {
+    unit.isUnformatted = true;
+  }
+  if (!*unit.isUnformatted) {
+    terminator.Crash("Unformatted I/O attempted on formatted file");
+  }
   if (ChildIo * child{unit.GetChildIo()}) {
     return child->CheckFormattingAndDirection(terminator,
                DIR == Direction::Output ? "unformatted output"
@@ -256,12 +262,6 @@ Cookie BeginUnformattedIO(
               *child, sourceFile, sourceLine)
         : nullptr;
   } else {
-    if (!unit.isUnformatted.has_value()) {
-      unit.isUnformatted = true;
-    }
-    if (!*unit.isUnformatted) {
-      terminator.Crash("Unformatted I/O attempted on formatted file");
-    }
     IoStatementState &io{
         unit.BeginIoStatement<ExternalUnformattedIoStatementState<DIR>>(
             unit, sourceFile, sourceLine)};
@@ -367,8 +367,13 @@ Cookie IONAME(BeginRewind)(
 Cookie IONAME(BeginInquireUnit)(
     ExternalUnit unitNumber, const char *sourceFile, int sourceLine) {
   if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) {
-    return &unit->BeginIoStatement<InquireUnitState>(
-        *unit, sourceFile, sourceLine);
+    if (ChildIo * child{unit->GetChildIo()}) {
+      return &child->BeginIoStatement<InquireUnitState>(
+          *unit, sourceFile, sourceLine);
+    } else {
+      return &unit->BeginIoStatement<InquireUnitState>(
+          *unit, sourceFile, sourceLine);
+    }
   } else {
     // INQUIRE(UNIT=unrecognized unit)
     Terminator oom{sourceFile, sourceLine};
diff --git a/flang/runtime/unit.h b/flang/runtime/unit.h
index a63921c9db75d..eaaccd578d8b8 100644
--- a/flang/runtime/unit.h
+++ b/flang/runtime/unit.h
@@ -183,7 +183,7 @@ class ChildIo {
       ChildListIoStatementState<Direction::Output>,
       ChildListIoStatementState<Direction::Input>,
       ChildUnformattedIoStatementState<Direction::Output>,
-      ChildUnformattedIoStatementState<Direction::Input>>
+      ChildUnformattedIoStatementState<Direction::Input>, InquireUnitState>
       u_;
   std::optional<IoStatementState> io_;
 };

From 55d887b833646baeea0e3371fd2cbbd7550a8d4d Mon Sep 17 00:00:00 2001
From: Wei Wang <apollo.mobility@gmail.com>
Date: Tue, 18 Jan 2022 14:08:48 -0800
Subject: [PATCH 244/946] [time-trace] Add optimizer and codegen regions to NPM

Optimizer and codegen regions were only added to legacy PM. Add
them to NPM as well.

Differential Revision: https://reviews.llvm.org/D117605
---
 clang/lib/CodeGen/BackendUtil.cpp | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 6b8e052305b49..9ae5c870afc81 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1492,8 +1492,11 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
   }
 
   // Now that we have all of the passes ready, run them.
-  PrettyStackTraceString CrashInfo("Optimizer");
-  MPM.run(*TheModule, MAM);
+  {
+    PrettyStackTraceString CrashInfo("Optimizer");
+    llvm::TimeTraceScope TimeScope("Optimizer");
+    MPM.run(*TheModule, MAM);
+  }
 }
 
 void EmitAssemblyHelper::RunCodegenPipeline(
@@ -1525,8 +1528,11 @@ void EmitAssemblyHelper::RunCodegenPipeline(
     return;
   }
 
-  PrettyStackTraceString CrashInfo("Code generation");
-  CodeGenPasses.run(*TheModule);
+  {
+    PrettyStackTraceString CrashInfo("Code generation");
+    llvm::TimeTraceScope TimeScope("CodeGenPasses");
+    CodeGenPasses.run(*TheModule);
+  }
 }
 
 /// A clean version of `EmitAssembly` that uses the new pass manager.

From b6098c07cb2076e53b4251df9edfc0a01d75ee4c Mon Sep 17 00:00:00 2001
From: Prashant Kumar <pk5561@gmail.com>
Date: Fri, 21 Jan 2022 23:53:22 +0530
Subject: [PATCH 245/946] [MLIR] Fix negative gcd in `normalizeDivisionByGCD`
 function.

When the coefficients of dividend are negative, the gcd may be negative
which will change the sign of dividend and overflow denominator.

Reviewed By: Groverkss

Differential Revision: https://reviews.llvm.org/D117911
---
 mlir/lib/Analysis/Presburger/Utils.cpp        |  7 ++++--
 .../Presburger/IntegerPolyhedronTest.cpp      | 24 +++++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp
index 765d4fb6a8dfd..7b8fe49b23c83 100644
--- a/mlir/lib/Analysis/Presburger/Utils.cpp
+++ b/mlir/lib/Analysis/Presburger/Utils.cpp
@@ -25,7 +25,10 @@ static void normalizeDivisionByGCD(SmallVectorImpl<int64_t> &dividend,
                                    unsigned &divisor) {
   if (divisor == 0 || dividend.empty())
     return;
-  int64_t gcd = llvm::greatestCommonDivisor(dividend.front(), int64_t(divisor));
+  // We take the absolute value of dividend's coefficients to make sure that
+  // `gcd` is positive.
+  int64_t gcd =
+      llvm::greatestCommonDivisor(std::abs(dividend.front()), int64_t(divisor));
 
   // The reason for ignoring the constant term is as follows.
   // For a division:
@@ -35,7 +38,7 @@ static void normalizeDivisionByGCD(SmallVectorImpl<int64_t> &dividend,
   // Since `{a/m}/d` in the dividend satisfies 0 <= {a/m}/d < 1/d, it will not
   // influence the result of the floor division and thus, can be ignored.
   for (size_t i = 1, m = dividend.size() - 1; i < m; i++) {
-    gcd = llvm::greatestCommonDivisor(dividend[i], gcd);
+    gcd = llvm::greatestCommonDivisor(std::abs(dividend[i]), gcd);
     if (gcd == 1)
       return;
   }
diff --git a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp
index 2e4c135770431..5d1cfb4c6e781 100644
--- a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp
+++ b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp
@@ -969,4 +969,28 @@ TEST(IntegerPolyhedronTest, mergeDivisionsConstants) {
   }
 }
 
+TEST(IntegerPolyhedronTest, negativeDividends) {
+  // (x) : (exists y = [-x + 1 / 2], z = [-x - 2 / 3]: y + z >= x).
+  IntegerPolyhedron poly1(1);
+  poly1.addLocalFloorDiv({-1, 1}, 2); // y = [x + 1 / 2].
+  // Normalization test with negative dividends
+  poly1.addLocalFloorDiv({-3, 0, -6}, 9); // z = [3x + 6 / 9] -> [x + 2 / 3].
+  poly1.addInequality({-1, 1, 1, 0});     // y + z >= x.
+
+  // (x) : (exists y = [x + 1 / 3], z = [x + 2 / 3]: y + z <= x).
+  IntegerPolyhedron poly2(1);
+  // Normalization test.
+  poly2.addLocalFloorDiv({-2, 2}, 4);     // y = [-2x + 2 / 4] -> [-x + 1 / 2].
+  poly2.addLocalFloorDiv({-1, 0, -2}, 3); // z = [-x - 2 / 3].
+  poly2.addInequality({1, -1, -1, 0});    // y + z <= x.
+
+  poly1.mergeLocalIds(poly2);
+
+  // Merging triggers normalization.
+  std::vector<SmallVector<int64_t, 8>> divisions = {{-1, 0, 0, 1},
+                                                    {-1, 0, 0, -2}};
+  SmallVector<unsigned, 8> denoms = {2, 3};
+  checkDivisionRepresentation(poly1, divisions, denoms);
+}
+
 } // namespace mlir

From de872382951572b70dfaefe8d77eb98d15586115 Mon Sep 17 00:00:00 2001
From: luxufan <932494295@qq.com>
Date: Sat, 22 Jan 2022 16:07:17 +0800
Subject: [PATCH 246/946] [JITLink] Add anonymous symbols in LinkGraph for
 unnamed temporary symbols

In RISCV, temporary symbols will be used to generate dwarf, eh_frame sections..., and will be placed in object code's symbol table. However, LLVM does not use names on these temporary symbols. This patch add anonymous symbols in LinkGraph for these temporary symbols.

Reviewed By: lhames

Differential Revision: https://reviews.llvm.org/D116475
---
 .../llvm/ExecutionEngine/JITLink/riscv.h      |  8 +++++++-
 .../JITLink/ELFLinkGraphBuilder.h             | 12 +++++++----
 .../lib/ExecutionEngine/JITLink/ELF_riscv.cpp |  8 ++++++++
 llvm/lib/ExecutionEngine/JITLink/riscv.cpp    |  2 ++
 .../JITLink/RISCV/anonymous_symbol.s          | 20 +++++++++++++++++++
 5 files changed, 45 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s

diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
index 3c5cdfcfdba40..4d045a23c012b 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
@@ -79,6 +79,12 @@ enum EdgeKind_riscv : Edge::Kind {
   ///   Fixup <- (Target - Fixup + Addend)
   R_RISCV_CALL,
 
+  /// 32 bits PC relative relocation
+  ///
+  /// Fixup expression:
+  ///   Fixup <- (Target - Fixup + Addend)
+  R_RISCV_32_PCREL,
+
   /// PC relative GOT offset
   ///
   /// Fixup expression:
@@ -137,7 +143,7 @@ enum EdgeKind_riscv : Edge::Kind {
   ///
   /// Fixup expression
   ///   Fixup <- (Target - *{1}Fixup - Addend)
-  R_RISCV_SUB8
+  R_RISCV_SUB8,
 };
 
 /// Returns a string name for the given riscv edge. For debugging purposes
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
index 931a60224ee2f..2ab7ed61f71b4 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
@@ -441,11 +441,15 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
                  << "\"\n";
         });
 
-        // Model the section symbols as anonymous symbol.
+        // In RISCV, temporary symbols (Used to generate dwarf, eh_frame
+        // sections...) will appear in object code's symbol table, and LLVM does
+        // not use names on these temporary symbols (RISCV gnu toolchain uses
+        // names on these temporary symbols). If the symbol is unnamed, add an
+        // anonymous symbol.
         auto &GSym =
-            Sym.getType() == ELF::STT_SECTION
-                ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size, false,
-                                        false)
+            Name->empty()
+                ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size,
+                                        false, false)
                 : G->addDefinedSymbol(*B, Sym.getValue(), *Name, Sym.st_size, L,
                                       S, Sym.getType() == ELF::STT_FUNC, false);
         setGraphSymbol(SymIndex, GSym);
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
index 4483147c1b1d7..b3bc5cdb13f1d 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
@@ -359,6 +359,12 @@ class ELFJITLinker_riscv : public JITLinker<ELFJITLinker_riscv> {
       *FixupPtr = static_cast<uint8_t>(Value);
       break;
     }
+    case R_RISCV_32_PCREL: {
+      // FIXME: It seems that R_RISCV_32_PCREL relocation will only appear in debug sections
+      // like eh_frame section. Currently, because of eh_frame will not be processed in JITLink's RISCV
+      // backend, test this relocation is difficult, so here report error if needs to fixup this relocation
+      return make_error<JITLinkError>("Fixup of relocation type R_RISCV_32_PCREL is not supportted");
+    }
     }
     return Error::success();
   }
@@ -409,6 +415,8 @@ class ELFLinkGraphBuilder_riscv : public ELFLinkGraphBuilder<ELFT> {
       return EdgeKind_riscv::R_RISCV_SUB16;
     case ELF::R_RISCV_SUB8:
       return EdgeKind_riscv::R_RISCV_SUB8;
+    case ELF::R_RISCV_32_PCREL:
+      return EdgeKind_riscv::R_RISCV_32_PCREL;
     }
 
     return make_error<JITLinkError>("Unsupported riscv relocation:" +
diff --git a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
index 4d1ace73a04e3..1dcc3ebb8e97b 100644
--- a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
@@ -38,6 +38,8 @@ const char *getEdgeKindName(Edge::Kind K) {
     return "R_RISCV_PCREL_LO12_S";
   case R_RISCV_CALL:
     return "R_RISCV_CALL";
+  case R_RISCV_32_PCREL:
+    return "R_RISCV_32_PCREL";
   case R_RISCV_ADD64:
     return "R_RISCV_ADD64";
   case R_RISCV_ADD32:
diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s
new file mode 100644
index 0000000000000..cb4c59e196dc9
--- /dev/null
+++ b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s
@@ -0,0 +1,20 @@
+# RUN: llvm-mc -triple=riscv64 -filetype=obj -o %t %s
+# RUN: llvm-jitlink -debug-only=jitlink -noexec %t 2>&1 | FileCheck %s
+#
+# Because of the exist of cfi directive, sections like eh_frame section will be emitted
+# in llvm's object code emission phase. Anonymous symbols will also be emitted to indicate
+# the section start and section end. So that by relocating these symbol, the section length
+# can be calculated.
+#
+# CHECK: Creating defined graph symbol for ELF symbol ""
+# CHECK: Creating defined graph symbol for ELF symbol ""
+        .text
+        .globl main
+        .p2align 2
+        .type main,@function
+main:
+        .cfi_startproc
+        ret
+        .Lfunc_end0:
+        .size main, .Lfunc_end0-main
+        .cfi_endproc

From fdb6578514dd3799ad23c8bbb7699577c0fb414d Mon Sep 17 00:00:00 2001
From: luxufan <932494295@qq.com>
Date: Sat, 22 Jan 2022 17:26:54 +0800
Subject: [PATCH 247/946] Revert "[JITLink] Add anonymous symbols in LinkGraph
 for unnamed temporary symbols"

This reverts commit de872382951572b70dfaefe8d77eb98d15586115.

Buildbot check error
---
 .../llvm/ExecutionEngine/JITLink/riscv.h      |  8 +-------
 .../JITLink/ELFLinkGraphBuilder.h             | 12 ++++-------
 .../lib/ExecutionEngine/JITLink/ELF_riscv.cpp |  8 --------
 llvm/lib/ExecutionEngine/JITLink/riscv.cpp    |  2 --
 .../JITLink/RISCV/anonymous_symbol.s          | 20 -------------------
 5 files changed, 5 insertions(+), 45 deletions(-)
 delete mode 100644 llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s

diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
index 4d045a23c012b..3c5cdfcfdba40 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
@@ -79,12 +79,6 @@ enum EdgeKind_riscv : Edge::Kind {
   ///   Fixup <- (Target - Fixup + Addend)
   R_RISCV_CALL,
 
-  /// 32 bits PC relative relocation
-  ///
-  /// Fixup expression:
-  ///   Fixup <- (Target - Fixup + Addend)
-  R_RISCV_32_PCREL,
-
   /// PC relative GOT offset
   ///
   /// Fixup expression:
@@ -143,7 +137,7 @@ enum EdgeKind_riscv : Edge::Kind {
   ///
   /// Fixup expression
   ///   Fixup <- (Target - *{1}Fixup - Addend)
-  R_RISCV_SUB8,
+  R_RISCV_SUB8
 };
 
 /// Returns a string name for the given riscv edge. For debugging purposes
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
index 2ab7ed61f71b4..931a60224ee2f 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
@@ -441,15 +441,11 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
                  << "\"\n";
         });
 
-        // In RISCV, temporary symbols (Used to generate dwarf, eh_frame
-        // sections...) will appear in object code's symbol table, and LLVM does
-        // not use names on these temporary symbols (RISCV gnu toolchain uses
-        // names on these temporary symbols). If the symbol is unnamed, add an
-        // anonymous symbol.
+        // Model the section symbols as anonymous symbol.
         auto &GSym =
-            Name->empty()
-                ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size,
-                                        false, false)
+            Sym.getType() == ELF::STT_SECTION
+                ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size, false,
+                                        false)
                 : G->addDefinedSymbol(*B, Sym.getValue(), *Name, Sym.st_size, L,
                                       S, Sym.getType() == ELF::STT_FUNC, false);
         setGraphSymbol(SymIndex, GSym);
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
index b3bc5cdb13f1d..4483147c1b1d7 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
@@ -359,12 +359,6 @@ class ELFJITLinker_riscv : public JITLinker<ELFJITLinker_riscv> {
       *FixupPtr = static_cast<uint8_t>(Value);
       break;
     }
-    case R_RISCV_32_PCREL: {
-      // FIXME: It seems that R_RISCV_32_PCREL relocation will only appear in debug sections
-      // like eh_frame section. Currently, because of eh_frame will not be processed in JITLink's RISCV
-      // backend, test this relocation is difficult, so here report error if needs to fixup this relocation
-      return make_error<JITLinkError>("Fixup of relocation type R_RISCV_32_PCREL is not supportted");
-    }
     }
     return Error::success();
   }
@@ -415,8 +409,6 @@ class ELFLinkGraphBuilder_riscv : public ELFLinkGraphBuilder<ELFT> {
       return EdgeKind_riscv::R_RISCV_SUB16;
     case ELF::R_RISCV_SUB8:
       return EdgeKind_riscv::R_RISCV_SUB8;
-    case ELF::R_RISCV_32_PCREL:
-      return EdgeKind_riscv::R_RISCV_32_PCREL;
     }
 
     return make_error<JITLinkError>("Unsupported riscv relocation:" +
diff --git a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
index 1dcc3ebb8e97b..4d1ace73a04e3 100644
--- a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
@@ -38,8 +38,6 @@ const char *getEdgeKindName(Edge::Kind K) {
     return "R_RISCV_PCREL_LO12_S";
   case R_RISCV_CALL:
     return "R_RISCV_CALL";
-  case R_RISCV_32_PCREL:
-    return "R_RISCV_32_PCREL";
   case R_RISCV_ADD64:
     return "R_RISCV_ADD64";
   case R_RISCV_ADD32:
diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s
deleted file mode 100644
index cb4c59e196dc9..0000000000000
--- a/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s
+++ /dev/null
@@ -1,20 +0,0 @@
-# RUN: llvm-mc -triple=riscv64 -filetype=obj -o %t %s
-# RUN: llvm-jitlink -debug-only=jitlink -noexec %t 2>&1 | FileCheck %s
-#
-# Because of the exist of cfi directive, sections like eh_frame section will be emitted
-# in llvm's object code emission phase. Anonymous symbols will also be emitted to indicate
-# the section start and section end. So that by relocating these symbol, the section length
-# can be calculated.
-#
-# CHECK: Creating defined graph symbol for ELF symbol ""
-# CHECK: Creating defined graph symbol for ELF symbol ""
-        .text
-        .globl main
-        .p2align 2
-        .type main,@function
-main:
-        .cfi_startproc
-        ret
-        .Lfunc_end0:
-        .size main, .Lfunc_end0-main
-        .cfi_endproc

From f7d4cafe5a6a51ccc6072c9dd304ced4f8e96aa7 Mon Sep 17 00:00:00 2001
From: fourdim <fourdim@foxmail.com>
Date: Sat, 22 Jan 2022 03:34:13 +0800
Subject: [PATCH 248/946] [JITLink][RISCV] Support R_RISCV_SET* and
 R_RISCV_32_PCREL relocations

This patch supports R_RISCV_SET* and R_RISCV_32_PCREL relocations in JITLink.

Reviewed By: StephenFan

Differential Revision: https://reviews.llvm.org/D117082
---
 .../llvm/ExecutionEngine/JITLink/riscv.h      | 32 +++++++++++++-
 .../lib/ExecutionEngine/JITLink/ELF_riscv.cpp | 43 +++++++++++++++++++
 llvm/lib/ExecutionEngine/JITLink/riscv.cpp    | 10 +++++
 .../JITLink/RISCV/ELF_pc_relative.s           | 19 ++++++++
 .../JITLink/RISCV/ELF_reloc_set.s             | 31 +++++++++++++
 5 files changed, 134 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_relative.s
 create mode 100644 llvm/test/ExecutionEngine/JITLink/RISCV/ELF_reloc_set.s

diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
index 3c5cdfcfdba40..ed874c53d269b 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
@@ -137,7 +137,37 @@ enum EdgeKind_riscv : Edge::Kind {
   ///
   /// Fixup expression
   ///   Fixup <- (Target - *{1}Fixup - Addend)
-  R_RISCV_SUB8
+  R_RISCV_SUB8,
+
+  /// Local label assignment
+  ///
+  /// Fixup expression:
+  ///   Fixup <- (Target + Addend)
+  R_RISCV_SET6,
+
+  /// Local label assignment
+  ///
+  /// Fixup expression:
+  ///   Fixup <- (Target + Addend)
+  R_RISCV_SET8,
+
+  /// Local label assignment
+  ///
+  /// Fixup expression:
+  ///   Fixup <- (Target + Addend)
+  R_RISCV_SET16,
+
+  /// Local label assignment
+  ///
+  /// Fixup expression:
+  ///   Fixup <- (Target + Addend)
+  R_RISCV_SET32,
+
+  /// Local label assignment
+  ///
+  /// Fixup expression:
+  ///   Fixup <- (Target - Fixup + Addend)
+  R_RISCV_32_PCREL,
 };
 
 /// Returns a string name for the given riscv edge. For debugging purposes
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
index 4483147c1b1d7..f83001417e946 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_riscv.cpp
@@ -359,6 +359,39 @@ class ELFJITLinker_riscv : public JITLinker<ELFJITLinker_riscv> {
       *FixupPtr = static_cast<uint8_t>(Value);
       break;
     }
+    case R_RISCV_SET6: {
+      int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue();
+      uint32_t RawData = *(little32_t *)FixupPtr;
+      int64_t Word6 = Value & 0x3f;
+      *(little32_t *)FixupPtr = (RawData & 0xffffffc0) | Word6;
+      break;
+    }
+    case R_RISCV_SET8: {
+      int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue();
+      uint32_t RawData = *(little32_t *)FixupPtr;
+      int64_t Word8 = Value & 0xff;
+      *(little32_t *)FixupPtr = (RawData & 0xffffff00) | Word8;
+      break;
+    }
+    case R_RISCV_SET16: {
+      int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue();
+      uint32_t RawData = *(little32_t *)FixupPtr;
+      int64_t Word16 = Value & 0xffff;
+      *(little32_t *)FixupPtr = (RawData & 0xffff0000) | Word16;
+      break;
+    }
+    case R_RISCV_SET32: {
+      int64_t Value = (E.getTarget().getAddress() + E.getAddend()).getValue();
+      int64_t Word32 = Value & 0xffffffff;
+      *(little32_t *)FixupPtr = Word32;
+      break;
+    }
+    case R_RISCV_32_PCREL: {
+      int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
+      int64_t Word32 = Value & 0xffffffff;
+      *(little32_t *)FixupPtr = Word32;
+      break;
+    }
     }
     return Error::success();
   }
@@ -409,6 +442,16 @@ class ELFLinkGraphBuilder_riscv : public ELFLinkGraphBuilder<ELFT> {
       return EdgeKind_riscv::R_RISCV_SUB16;
     case ELF::R_RISCV_SUB8:
       return EdgeKind_riscv::R_RISCV_SUB8;
+    case ELF::R_RISCV_SET6:
+      return EdgeKind_riscv::R_RISCV_SET6;
+    case ELF::R_RISCV_SET8:
+      return EdgeKind_riscv::R_RISCV_SET8;
+    case ELF::R_RISCV_SET16:
+      return EdgeKind_riscv::R_RISCV_SET16;
+    case ELF::R_RISCV_SET32:
+      return EdgeKind_riscv::R_RISCV_SET32;
+    case ELF::R_RISCV_32_PCREL:
+      return EdgeKind_riscv::R_RISCV_32_PCREL;
     }
 
     return make_error<JITLinkError>("Unsupported riscv relocation:" +
diff --git a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
index 4d1ace73a04e3..6efd7abd85ddf 100644
--- a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
@@ -54,6 +54,16 @@ const char *getEdgeKindName(Edge::Kind K) {
     return "R_RISCV_SUB16";
   case R_RISCV_SUB8:
     return "R_RISCV_SUB8";
+  case R_RISCV_SET6:
+    return "R_RISCV_SET6";
+  case R_RISCV_SET8:
+    return "R_RISCV_SET8";
+  case R_RISCV_SET16:
+    return "R_RISCV_SET16";
+  case R_RISCV_SET32:
+    return "R_RISCV_SET32";
+  case R_RISCV_32_PCREL:
+    return "R_RISCV_32_PCREL";
   }
   return getGenericEdgeKindName(K);
 }
diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_relative.s b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_relative.s
new file mode 100644
index 0000000000000..b4dd87a16ffc6
--- /dev/null
+++ b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_pc_relative.s
@@ -0,0 +1,19 @@
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: llvm-mc -triple=riscv64 -filetype=obj -o %t/riscv64_pc_relative.o %s
+# RUN: llvm-mc -triple=riscv32 -filetype=obj -o %t/riscv32_pc_relative.o %s
+# RUN: llvm-jitlink -noexec -check %s %t/riscv64_pc_relative.o
+# RUN: llvm-jitlink -noexec -check %s %t/riscv32_pc_relative.o
+
+# jitlink-check: *{4}(foo) = 0x4
+
+.global main
+main:
+  lw a0, foo
+
+.section ".text","",@progbits
+.type foo,@function
+foo:
+  nop
+  nop
+  .reloc foo, R_RISCV_32_PCREL, foo+4
+  .size foo, 8
diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_reloc_set.s b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_reloc_set.s
new file mode 100644
index 0000000000000..edd617b70c8f0
--- /dev/null
+++ b/llvm/test/ExecutionEngine/JITLink/RISCV/ELF_reloc_set.s
@@ -0,0 +1,31 @@
+# RUN: rm -rf %t && mkdir -p %t
+# RUN: llvm-mc -triple=riscv64 -filetype=obj -o %t/riscv64_reloc_set.o %s
+# RUN: llvm-mc -triple=riscv32 -filetype=obj -o %t/riscv32_reloc_set.o %s
+# RUN: llvm-jitlink -noexec \
+# RUN:  -slab-allocate 100Kb -slab-address 0xfff0f0f0 -slab-page-size 4096 \
+# RUN:  -check %s %t/riscv64_reloc_set.o
+# RUN: llvm-jitlink -noexec \
+# RUN:  -slab-allocate 100Kb -slab-address 0xfff0f0f0 -slab-page-size 4096 \
+# RUN:  -check %s %t/riscv32_reloc_set.o
+
+# jitlink-check: *{4}(foo) = foo
+# jitlink-check: *{2}(foo+4) = foo[15:0]
+# jitlink-check: *{1}(foo+6) = foo[7:0]
+# jitlink-check: *{1}(foo+7) = foo[5:0]
+
+.global main
+main:
+  lw a0, foo
+
+.section ".rodata","",@progbits
+.type foo,@object
+foo:
+  .reloc foo, R_RISCV_SET32, foo
+  .reloc foo+4, R_RISCV_SET16, foo
+  .reloc foo+6, R_RISCV_SET8, foo
+  .reloc foo+7, R_RISCV_SET6, foo
+  .word 0
+  .half 0
+  .byte 0
+  .byte 0
+  .size foo, 8

From 26544b98f7bf744d2ccd29cc6559db24bc1a4e50 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Fri, 21 Jan 2022 20:08:57 +0100
Subject: [PATCH 249/946] [libc++] Use addressof in unordered_set.

This addresses the usage of `operator&` in `<unordered_set>`.

(Note there are still more headers with the same issue.)

Reviewed By: #libc, philnik, Quuxplusone

Differential Revision: https://reviews.llvm.org/D117917
---
 libcxx/include/unordered_set                  | 15 +++---
 .../move.addressof.compile.pass.cpp           | 29 ++++++++++++
 .../move_alloc.addressof.compile.pass.cpp     | 33 +++++++++++++
 .../emplace_hint.addressof.compile.pass.cpp   | 30 ++++++++++++
 ...nt_const_lvalue.addressof.compile.pass.cpp | 28 +++++++++++
 ...ert_hint_rvalue.addressof.compile.pass.cpp | 27 +++++++++++
 ...rator.operators.addressof.compile.pass.cpp | 47 +++++++++++++++++++
 .../move.addressof.compile.pass.cpp           | 29 ++++++++++++
 .../move_alloc.addressof.compile.pass.cpp     | 35 ++++++++++++++
 9 files changed, 266 insertions(+), 7 deletions(-)
 create mode 100644 libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move_alloc.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.set/emplace_hint.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.set/insert_hint_rvalue.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.set/iterator.operators.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move.addressof.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move_alloc.addressof.compile.pass.cpp

diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set
index ad58fda24f87e..29a19f2f0cb5b 100644
--- a/libcxx/include/unordered_set
+++ b/libcxx/include/unordered_set
@@ -463,6 +463,7 @@ template <class Value, class Hash, class Pred, class Alloc>
 #include <__debug>
 #include <__functional/is_transparent.h>
 #include <__hash_table>
+#include <__memory/addressof.h>
 #include <__node_handle>
 #include <__utility/forward.h>
 #include <compare>
@@ -640,7 +641,7 @@ public:
 #if _LIBCPP_DEBUG_LEVEL == 2
         iterator emplace_hint(const_iterator __p, _Args&&... __args)
         {
-            _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+            _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                 "unordered_set::emplace_hint(const_iterator, args...) called with an iterator not"
                 " referring to this unordered_set");
             return __table_.__emplace_unique(_VSTD::forward<_Args>(__args)...).first;
@@ -657,7 +658,7 @@ public:
 #if _LIBCPP_DEBUG_LEVEL == 2
     iterator insert(const_iterator __p, value_type&& __x)
         {
-            _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+            _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                 "unordered_set::insert(const_iterator, value_type&&) called with an iterator not"
                 " referring to this unordered_set");
             return insert(_VSTD::move(__x)).first;
@@ -678,7 +679,7 @@ public:
 #if _LIBCPP_DEBUG_LEVEL == 2
     iterator insert(const_iterator __p, const value_type& __x)
         {
-            _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this,
+            _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(_VSTD::addressof(__p)) == this,
                 "unordered_set::insert(const_iterator, const value_type&) called with an iterator not"
                 " referring to this unordered_set");
             return insert(__x).first;
@@ -1019,7 +1020,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(
 {
     _VSTD::__debug_db_insert_c(this);
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, &__u);
+    __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
@@ -1037,7 +1038,7 @@ unordered_set<_Value, _Hash, _Pred, _Alloc>::unordered_set(
     }
 #if _LIBCPP_DEBUG_LEVEL == 2
     else
-        __get_db()->swap(this, &__u);
+        __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
@@ -1660,7 +1661,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(
 {
     _VSTD::__debug_db_insert_c(this);
 #if _LIBCPP_DEBUG_LEVEL == 2
-    __get_db()->swap(this, &__u);
+    __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
@@ -1678,7 +1679,7 @@ unordered_multiset<_Value, _Hash, _Pred, _Alloc>::unordered_multiset(
     }
 #if _LIBCPP_DEBUG_LEVEL == 2
     else
-        __get_db()->swap(this, &__u);
+        __get_db()->swap(this, _VSTD::addressof(__u));
 #endif
 }
 
diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..9df029e61341a
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move.addressof.compile.pass.cpp
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_set>
+
+// template <class Value, class Hash = hash<Value>, class Pred = equal_to<Value>,
+//           class Alloc = allocator<Value>>
+// class unordered_multiset
+
+// unordered_multiset(unordered_multiset&& u);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_set>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_multiset<operator_hijacker> so;
+  std::unordered_multiset<operator_hijacker> s(std::move(so));
+}
diff --git a/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move_alloc.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..cd1f70eab9e68
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.multiset/unord.multiset.cnstr/move_alloc.addressof.compile.pass.cpp
@@ -0,0 +1,33 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_set>
+
+// template <class Value, class Hash = hash<Value>, class Pred = equal_to<Value>,
+//           class Alloc = allocator<Value>>
+// class unordered_multiset
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_set>
+
+#include "test_allocator.h"
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  using A = test_allocator<operator_hijacker>;
+  using H = std::hash<operator_hijacker>;
+  using P = std::equal_to<operator_hijacker>;
+
+  const A a;
+  std::unordered_multiset<operator_hijacker, H, P, A> so;
+  std::unordered_multiset<operator_hijacker, H, P, A> s(std::move(so), a);
+}
diff --git a/libcxx/test/std/containers/unord/unord.set/emplace_hint.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/emplace_hint.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..af6e37eaef960
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.set/emplace_hint.addressof.compile.pass.cpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_set>
+
+// template <class Value, class Hash = hash<Value>, class Pred = equal_to<Value>,
+//           class Alloc = allocator<Value>>
+// class unordered_set
+
+// template <class... Args>
+//     iterator emplace_hint(const_iterator p, Args&&... args);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_set>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_set<operator_hijacker> s;
+  s.emplace_hint(s.cbegin());
+}
diff --git a/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..f83f28fc923fc
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.set/insert_hint_const_lvalue.addressof.compile.pass.cpp
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_set>
+
+// template <class Value, class Hash = hash<Value>, class Pred = equal_to<Value>,
+//           class Alloc = allocator<Value>>
+// class unordered_set
+
+// iterator insert(const_iterator p, const value_type& x);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_set>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_set<operator_hijacker> s;
+  const operator_hijacker v;
+  s.insert(s.cbegin(), v);
+}
diff --git a/libcxx/test/std/containers/unord/unord.set/insert_hint_rvalue.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/insert_hint_rvalue.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..333c673e30b9a
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.set/insert_hint_rvalue.addressof.compile.pass.cpp
@@ -0,0 +1,27 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_set>
+
+// template <class Value, class Hash = hash<Value>, class Pred = equal_to<Value>,
+//           class Alloc = allocator<Value>>
+// class unordered_set
+
+// iterator insert(const_iterator p, value_type&& x);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_set>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_set<operator_hijacker> s;
+  s.insert(s.cbegin(), operator_hijacker());
+}
diff --git a/libcxx/test/std/containers/unord/unord.set/iterator.operators.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/iterator.operators.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..457172d11f6bd
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.set/iterator.operators.addressof.compile.pass.cpp
@@ -0,0 +1,47 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <unordered_set>
+
+// template <class Value, class Hash = hash<Value>, class Pred = equal_to<Value>,
+//           class Alloc = allocator<Value>>
+
+// class unordered_set
+
+#include <unordered_set>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+template <class ToIterator, class FromIterator>
+void test() {
+  FromIterator from;
+  ToIterator copy(from);
+  copy = from;
+
+  ToIterator move(std::move(from));
+  from = FromIterator();
+  move = std::move(from);
+}
+
+void test() {
+  {
+    using I = std::unordered_set<operator_hijacker>::iterator;
+    using CI = std::unordered_set<operator_hijacker>::const_iterator;
+    test<I, I>();
+    test<CI, I>();
+    test<CI, CI>();
+  }
+  {
+    using IL = std::unordered_set<operator_hijacker>::local_iterator;
+    using CIL = std::unordered_set<operator_hijacker>::const_local_iterator;
+    test<IL, IL>();
+    test<CIL, IL>();
+    test<CIL, CIL>();
+  }
+}
diff --git a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..f7e9eb444bd58
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move.addressof.compile.pass.cpp
@@ -0,0 +1,29 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_set>
+
+// template <class Value, class Hash = hash<Value>, class Pred = equal_to<Value>,
+//           class Alloc = allocator<Value>>
+// class unordered_set
+
+// unordered_set(unordered_set&& u);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_set>
+
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  std::unordered_set<operator_hijacker> so;
+  std::unordered_set<operator_hijacker> s(std::move(so));
+}
diff --git a/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move_alloc.addressof.compile.pass.cpp b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move_alloc.addressof.compile.pass.cpp
new file mode 100644
index 0000000000000..8e9d8cb8d87b2
--- /dev/null
+++ b/libcxx/test/std/containers/unord/unord.set/unord.set.cnstr/move_alloc.addressof.compile.pass.cpp
@@ -0,0 +1,35 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03
+
+// <unordered_set>
+
+// template <class Value, class Hash = hash<Value>, class Pred = equal_to<Value>,
+//           class Alloc = allocator<Value>>
+// class unordered_set
+
+// unordered_set(unordered_set&& u, const allocator_type& a);
+
+// Validate whether the operation properly guards against ADL-hijacking operator&
+
+#include <unordered_set>
+
+#include "test_allocator.h"
+#include "test_macros.h"
+#include "operator_hijacker.h"
+
+void test() {
+  using A = test_allocator<operator_hijacker>;
+  using H = std::hash<operator_hijacker>;
+  using P = std::equal_to<operator_hijacker>;
+
+  const A a;
+  std::unordered_set<operator_hijacker, H, P, A> so;
+  std::unordered_set<operator_hijacker, H, P, A> s(std::move(so), a);
+}

From 4041354b4c12fb4329853b67f61b8617252188d6 Mon Sep 17 00:00:00 2001
From: Alexander Belyaev <pifon@google.com>
Date: Sat, 22 Jan 2022 11:42:47 +0100
Subject: [PATCH 250/946] [mlir] Add
 SingleBlockImplicitTerminator<"tensor::YieldOp"> to PadOp.

---
 mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td | 5 +++--
 mlir/test/Dialect/Tensor/invalid.mlir            | 9 ---------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
index 05cb41d791d35..882ea33b9c03b 100644
--- a/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
+++ b/mlir/include/mlir/Dialect/Tensor/IR/TensorOps.td
@@ -782,13 +782,14 @@ def Tensor_CollapseShapeOp : Tensor_ReassociativeReshapeOp<"collapse_shape"> {
 // PadOp
 //===----------------------------------------------------------------------===//
 
-def Tensor_PadOp : Tensor_Op<"pad", [AttrSizedOperandSegments, NoSideEffect]> {
+def Tensor_PadOp : Tensor_Op<"pad", [AttrSizedOperandSegments, NoSideEffect,
+    SingleBlockImplicitTerminator<"mlir::tensor::YieldOp">]> {
   let summary = "tensor pad operation";
   let description = [{
     `tensor.pad` is an operation that pads the `source` tensor
     with given `low` and `high` padding config.
 
-    The PadTensor operation supports the following arguments:
+    The PadOp operation supports the following arguments:
 
     * source: the "base" tensor on which to pad.
     * low: A list contains the padding along the start of each
diff --git a/mlir/test/Dialect/Tensor/invalid.mlir b/mlir/test/Dialect/Tensor/invalid.mlir
index cec4718595a45..7dbf662fd6fe3 100644
--- a/mlir/test/Dialect/Tensor/invalid.mlir
+++ b/mlir/test/Dialect/Tensor/invalid.mlir
@@ -343,15 +343,6 @@ func @pad_number_of_block_args(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9
 
 // -----
 
-func @pad_no_block(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
-  // expected-error @+1 {{op region #0 ('region') failed to verify constraint: region with 1 blocks}}
-  %0 = tensor.pad %arg0 low[1, 2] high[2, 3] {
-  } : tensor<?x4xi32> to tensor<?x9xi32>
-  return %0 : tensor<?x9xi32>
-}
-
-// -----
-
 func @pad_block_args(%arg0: tensor<?x4xi32>, %arg1: i32) -> tensor<?x9xi32> {
   // expected-error @+1 {{op expected block argument 1 to be an index}}
   %0 = tensor.pad %arg0 low[1, 2] high[2, 3] {

From 93deac2e2ba96248c05441a0bfa8385a73d78acb Mon Sep 17 00:00:00 2001
From: Micah Weston <micahsweston@gmail.com>
Date: Sat, 22 Jan 2022 12:39:22 +0000
Subject: [PATCH 251/946] [AArch64] Optimize add/sub with immediate through
 MIPeepholeOpt

Fixes the build issue with D111034, whose goal was to optimize
add/sub with long immediates.

Optimize ([add|sub] r, imm) -> ([ADD|SUB] ([ADD|SUB] r, #imm0, lsl #12), #imm1),
if imm == (imm0<<12)+imm1. and both imm0 and imm1 are non-zero 12-bit unsigned
integers.

Optimize ([add|sub] r, imm) -> ([SUB|ADD] ([SUB|ADD] r, #imm0, lsl #12), #imm1),
if imm == -(imm0<<12)-imm1, and both imm0 and imm1 are non-zero 12-bit unsigned
integers.

The change which fixed the build issue in D111034 was the use of new virtual
registers so that SSA form is maintained until deleting MI.

Differential Revision: https://reviews.llvm.org/D117429
---
 .../Target/AArch64/AArch64MIPeepholeOpt.cpp   | 204 +++++++++++++++---
 .../test/CodeGen/AArch64/addsub-24bit-imm.mir |  63 ++++++
 llvm/test/CodeGen/AArch64/addsub.ll           |  96 ++++++---
 .../AArch64/large-offset-gep.ll               |   5 +-
 4 files changed, 302 insertions(+), 66 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir

diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 3e1306eb32972..0ddfc717e47f3 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -11,12 +11,19 @@
 // 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
 //    MOVi64imm + ANDXrr ==> ANDXri + ANDXri
 //
+// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
+//    MOVi64imm + ADDXrr ==> ANDXri + ANDXri
+//
+// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
+//    MOVi64imm + SUBXrr ==> SUBXri + SUBXri
+//
 //    The mov pseudo instruction could be expanded to multiple mov instructions
 //    later. In this case, we could try to split the constant  operand of mov
-//    instruction into two bitmask immediates. It makes two AND instructions
-//    intead of multiple `mov` + `and` instructions.
+//    instruction into two immediates which can be directly encoded into
+//    *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
+//    multiple `mov` + `and/add/sub` instructions.
 //
-// 2. Remove redundant ORRWrs which is generated by zero-extend.
+// 4. Remove redundant ORRWrs which is generated by zero-extend.
 //
 //    %3:gpr32 = ORRWrs $wzr, %2, 0
 //    %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
@@ -51,6 +58,12 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
   MachineLoopInfo *MLI;
   MachineRegisterInfo *MRI;
 
+  bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
+                        MachineInstr *&SubregToRegMI);
+
+  template <typename T>
+  bool visitADDSUB(MachineInstr &MI,
+                   SmallSetVector<MachineInstr *, 8> &ToBeRemoved, bool IsAdd);
   template <typename T>
   bool visitAND(MachineInstr &MI,
                 SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
@@ -131,36 +144,9 @@ bool AArch64MIPeepholeOpt::visitAND(
   assert((RegSize == 32 || RegSize == 64) &&
          "Invalid RegSize for AND bitmask peephole optimization");
 
-  // Check whether AND's MBB is in loop and the AND is loop invariant.
-  MachineBasicBlock *MBB = MI.getParent();
-  MachineLoop *L = MLI->getLoopFor(MBB);
-  if (L && !L->isLoopInvariant(MI))
-    return false;
-
-  // Check whether AND's operand is MOV with immediate.
-  MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
-  if (!MovMI)
-    return false;
-
-  MachineInstr *SubregToRegMI = nullptr;
-  // If it is SUBREG_TO_REG, check its operand.
-  if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
-    SubregToRegMI = MovMI;
-    MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
-    if (!MovMI)
-      return false;
-  }
-
-  if (MovMI->getOpcode() != AArch64::MOVi32imm &&
-      MovMI->getOpcode() != AArch64::MOVi64imm)
-    return false;
-
-  // If the MOV has multiple uses, do not split the immediate because it causes
-  // more instructions.
-  if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
-    return false;
-
-  if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
+  // Perform several essential checks against current MI.
+  MachineInstr *MovMI = nullptr, *SubregToRegMI = nullptr;
+  if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
     return false;
 
   // Split the bitmask immediate into two.
@@ -177,6 +163,7 @@ bool AArch64MIPeepholeOpt::visitAND(
 
   // Create new AND MIs.
   DebugLoc DL = MI.getDebugLoc();
+  MachineBasicBlock *MBB = MI.getParent();
   const TargetRegisterClass *ANDImmRC =
       (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
   Register DstReg = MI.getOperand(0).getReg();
@@ -251,6 +238,145 @@ bool AArch64MIPeepholeOpt::visitORR(
   return true;
 }
 
+template <typename T>
+static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
+  // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
+  // imm0 and imm1 are non-zero 12-bit unsigned int.
+  if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
+      (Imm & ~static_cast<T>(0xffffff)) != 0)
+    return false;
+
+  // The immediate can not be composed via a single instruction.
+  SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
+  AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
+  if (Insn.size() == 1)
+    return false;
+
+  // Split Imm into (Imm0 << 12) + Imm1;
+  Imm0 = (Imm >> 12) & 0xfff;
+  Imm1 = Imm & 0xfff;
+  return true;
+}
+
+template <typename T>
+bool AArch64MIPeepholeOpt::visitADDSUB(
+    MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
+    bool IsAdd) {
+  // Try below transformation.
+  //
+  // MOVi32imm + ADDWrr ==> ADDWri + ADDWri
+  // MOVi64imm + ADDXrr ==> ADDXri + ADDXri
+  //
+  // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
+  // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
+  //
+  // The mov pseudo instruction could be expanded to multiple mov instructions
+  // later. Let's try to split the constant operand of mov instruction into two
+  // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
+  // multiple `mov` + `and/sub` instructions.
+
+  unsigned RegSize = sizeof(T) * 8;
+  assert((RegSize == 32 || RegSize == 64) &&
+         "Invalid RegSize for legal add/sub immediate peephole optimization");
+
+  // Perform several essential checks against current MI.
+  MachineInstr *MovMI, *SubregToRegMI;
+  if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
+    return false;
+
+  // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
+  T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
+  unsigned Opcode;
+  if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) {
+    if (IsAdd)
+      Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
+    else
+      Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
+  } else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) {
+    if (IsAdd)
+      Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
+    else
+      Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
+  } else {
+    return false;
+  }
+
+  // Create new ADD/SUB MIs.
+  DebugLoc DL = MI.getDebugLoc();
+  MachineBasicBlock *MBB = MI.getParent();
+  const TargetRegisterClass *RC =
+      (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  Register NewTmpReg = MRI->createVirtualRegister(RC);
+  Register NewDstReg = MRI->createVirtualRegister(RC);
+
+  MRI->constrainRegClass(SrcReg, RC);
+  BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
+      .addReg(SrcReg)
+      .addImm(Imm0)
+      .addImm(12);
+
+  MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
+  BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
+      .addReg(NewTmpReg)
+      .addImm(Imm1)
+      .addImm(0);
+
+  MRI->replaceRegWith(DstReg, NewDstReg);
+  // replaceRegWith changes MI's definition register. Keep it for SSA form until
+  // deleting MI.
+  MI.getOperand(0).setReg(DstReg);
+
+  // Record the MIs need to be removed.
+  ToBeRemoved.insert(&MI);
+  if (SubregToRegMI)
+    ToBeRemoved.insert(SubregToRegMI);
+  ToBeRemoved.insert(MovMI);
+
+  return true;
+}
+
+// Checks if the corresponding MOV immediate instruction is applicable for
+// this peephole optimization.
+bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
+                                            MachineInstr *&MovMI,
+                                            MachineInstr *&SubregToRegMI) {
+  // Check whether current MBB is in loop and the AND is loop invariant.
+  MachineBasicBlock *MBB = MI.getParent();
+  MachineLoop *L = MLI->getLoopFor(MBB);
+  if (L && !L->isLoopInvariant(MI))
+    return false;
+
+  // Check whether current MI's operand is MOV with immediate.
+  MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
+  if (!MovMI)
+    return false;
+
+  // If it is SUBREG_TO_REG, check its operand.
+  SubregToRegMI = nullptr;
+  if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
+    SubregToRegMI = MovMI;
+    MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
+    if (!MovMI)
+      return false;
+  }
+
+  if (MovMI->getOpcode() != AArch64::MOVi32imm &&
+      MovMI->getOpcode() != AArch64::MOVi64imm)
+    return false;
+
+  // If the MOV has multiple uses, do not split the immediate because it causes
+  // more instructions.
+  if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
+    return false;
+  if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
+    return false;
+
+  // It is OK to perform this peephole optimization.
+  return true;
+}
+
 bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
@@ -278,6 +404,18 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
       case AArch64::ORRWrs:
         Changed = visitORR(MI, ToBeRemoved);
         break;
+      case AArch64::ADDWrr:
+        Changed = visitADDSUB<uint32_t>(MI, ToBeRemoved, true);
+        break;
+      case AArch64::SUBWrr:
+        Changed = visitADDSUB<uint32_t>(MI, ToBeRemoved, false);
+        break;
+      case AArch64::ADDXrr:
+        Changed = visitADDSUB<uint64_t>(MI, ToBeRemoved, true);
+        break;
+      case AArch64::SUBXrr:
+        Changed = visitADDSUB<uint64_t>(MI, ToBeRemoved, false);
+        break;
       }
     }
   }
diff --git a/llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir b/llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir
new file mode 100644
index 0000000000000..b114e617cbd87
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/addsub-24bit-imm.mir
@@ -0,0 +1,63 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=aarch64-mi-peephole-opt -o - -mtriple=aarch64-unknown-linux -verify-machineinstrs %s | FileCheck %s
+
+# Main intention is to verify machine instructions have valid register classes.
+# Use of UBFM[W|X]ri is used as an arbitrary instruction that requires GPR[32|64]RegClass.
+# If the ADD/SUB optimization generates invalid register classes, this test will fail.
+---
+name: addi
+body: |
+  bb.0.entry:
+    liveins: $w0
+    ; CHECK-LABEL: name: addi
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY $w0
+    ; CHECK-NEXT: [[ADDWri:%[0-9]+]]:gpr32sp = ADDWri [[COPY]], 273, 12
+    ; CHECK-NEXT: [[ADDWri1:%[0-9]+]]:gpr32common = ADDWri [[ADDWri]], 3549, 0
+    ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32 = UBFMWri [[ADDWri1]], 28, 31
+    ; CHECK-NEXT: $w0 = COPY [[UBFMWri]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %0:gpr32 = COPY $w0
+    %1:gpr32 = MOVi32imm 1121757
+    %2:gpr32 = ADDWrr %0, %1
+    %3:gpr32 = UBFMWri %2, 28, 31
+    $w0 = COPY %3
+    RET_ReallyLR implicit $w0
+...
+---
+name: addl
+body: |
+  bb.0.entry:
+    liveins: $x0
+    ; CHECK-LABEL: name: addl
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri [[COPY]], 273, 12
+    ; CHECK-NEXT: [[ADDXri1:%[0-9]+]]:gpr64common = ADDXri [[ADDXri]], 3549, 0
+    ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[ADDXri1]], 28, 31
+    ; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:gpr64 = COPY $x0
+    %1:gpr32 = MOVi32imm 1121757
+    %2:gpr64 = SUBREG_TO_REG 0, %1, %subreg.sub_32
+    %3:gpr64 = ADDXrr %0, killed %2
+    %4:gpr64 = UBFMXri %3, 28, 31
+    $x0 = COPY %4
+    RET_ReallyLR implicit $x0
+...
+---
+name: addl_negate
+body: |
+  bb.0.entry:
+    liveins: $x0
+    ; CHECK-LABEL: name: addl_negate
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY $x0
+    ; CHECK-NEXT: [[SUBXri:%[0-9]+]]:gpr64sp = SUBXri [[COPY]], 273, 12
+    ; CHECK-NEXT: [[SUBXri1:%[0-9]+]]:gpr64common = SUBXri [[SUBXri]], 3549, 0
+    ; CHECK-NEXT: [[UBFMXri:%[0-9]+]]:gpr64 = UBFMXri [[SUBXri1]], 28, 31
+    ; CHECK-NEXT: $x0 = COPY [[UBFMXri]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %0:gpr64 = COPY $x0
+    %1:gpr64 = MOVi64imm -1121757
+    %2:gpr64 = ADDXrr %0, killed %1
+    %3:gpr64 = UBFMXri %2, 28, 31
+    $x0 = COPY %3
+    RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index f0857fe2d9660..37c9e4c5c6fe1 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -152,9 +152,8 @@ define void @sub_med() {
 define i64 @add_two_parts_imm_i64(i64 %a) {
 ; CHECK-LABEL: add_two_parts_imm_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #42325
-; CHECK-NEXT:    movk w8, #170, lsl #16
-; CHECK-NEXT:    add x0, x0, x8
+; CHECK-NEXT:    add x8, x0, #2730, lsl #12 // =11182080
+; CHECK-NEXT:    add x0, x8, #1365
 ; CHECK-NEXT:    ret
   %b = add i64 %a, 11183445
   ret i64 %b
@@ -163,9 +162,8 @@ define i64 @add_two_parts_imm_i64(i64 %a) {
 define i32 @add_two_parts_imm_i32(i32 %a) {
 ; CHECK-LABEL: add_two_parts_imm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #42325
-; CHECK-NEXT:    movk w8, #170, lsl #16
-; CHECK-NEXT:    add w0, w0, w8
+; CHECK-NEXT:    add w8, w0, #2730, lsl #12 // =11182080
+; CHECK-NEXT:    add w0, w8, #1365
 ; CHECK-NEXT:    ret
   %b = add i32 %a, 11183445
   ret i32 %b
@@ -174,9 +172,8 @@ define i32 @add_two_parts_imm_i32(i32 %a) {
 define i64 @add_two_parts_imm_i64_neg(i64 %a) {
 ; CHECK-LABEL: add_two_parts_imm_i64_neg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #-42325
-; CHECK-NEXT:    movk x8, #65365, lsl #16
-; CHECK-NEXT:    add x0, x0, x8
+; CHECK-NEXT:    sub x8, x0, #2730, lsl #12 // =11182080
+; CHECK-NEXT:    sub x0, x8, #1365
 ; CHECK-NEXT:    ret
   %b = add i64 %a, -11183445
   ret i64 %b
@@ -185,9 +182,8 @@ define i64 @add_two_parts_imm_i64_neg(i64 %a) {
 define i32 @add_two_parts_imm_i32_neg(i32 %a) {
 ; CHECK-LABEL: add_two_parts_imm_i32_neg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #23211
-; CHECK-NEXT:    movk w8, #65365, lsl #16
-; CHECK-NEXT:    add w0, w0, w8
+; CHECK-NEXT:    sub w8, w0, #2730, lsl #12 // =11182080
+; CHECK-NEXT:    sub w0, w8, #1365
 ; CHECK-NEXT:    ret
   %b = add i32 %a, -11183445
   ret i32 %b
@@ -196,9 +192,8 @@ define i32 @add_two_parts_imm_i32_neg(i32 %a) {
 define i64 @sub_two_parts_imm_i64(i64 %a) {
 ; CHECK-LABEL: sub_two_parts_imm_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov x8, #-42325
-; CHECK-NEXT:    movk x8, #65365, lsl #16
-; CHECK-NEXT:    add x0, x0, x8
+; CHECK-NEXT:    sub x8, x0, #2730, lsl #12 // =11182080
+; CHECK-NEXT:    sub x0, x8, #1365
 ; CHECK-NEXT:    ret
   %b = sub i64 %a, 11183445
   ret i64 %b
@@ -207,9 +202,8 @@ define i64 @sub_two_parts_imm_i64(i64 %a) {
 define i32 @sub_two_parts_imm_i32(i32 %a) {
 ; CHECK-LABEL: sub_two_parts_imm_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #23211
-; CHECK-NEXT:    movk w8, #65365, lsl #16
-; CHECK-NEXT:    add w0, w0, w8
+; CHECK-NEXT:    sub w8, w0, #2730, lsl #12 // =11182080
+; CHECK-NEXT:    sub w0, w8, #1365
 ; CHECK-NEXT:    ret
   %b = sub i32 %a, 11183445
   ret i32 %b
@@ -218,9 +212,8 @@ define i32 @sub_two_parts_imm_i32(i32 %a) {
 define i64 @sub_two_parts_imm_i64_neg(i64 %a) {
 ; CHECK-LABEL: sub_two_parts_imm_i64_neg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #42325
-; CHECK-NEXT:    movk w8, #170, lsl #16
-; CHECK-NEXT:    add x0, x0, x8
+; CHECK-NEXT:    add x8, x0, #2730, lsl #12 // =11182080
+; CHECK-NEXT:    add x0, x8, #1365
 ; CHECK-NEXT:    ret
   %b = sub i64 %a, -11183445
   ret i64 %b
@@ -229,14 +222,57 @@ define i64 @sub_two_parts_imm_i64_neg(i64 %a) {
 define i32 @sub_two_parts_imm_i32_neg(i32 %a) {
 ; CHECK-LABEL: sub_two_parts_imm_i32_neg:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #42325
-; CHECK-NEXT:    movk w8, #170, lsl #16
-; CHECK-NEXT:    add w0, w0, w8
+; CHECK-NEXT:    add w8, w0, #2730, lsl #12 // =11182080
+; CHECK-NEXT:    add w0, w8, #1365
 ; CHECK-NEXT:    ret
   %b = sub i32 %a, -11183445
   ret i32 %b
 }
 
+define i32 @add_27962026(i32 %a) {
+; CHECK-LABEL: add_27962026:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #43690
+; CHECK-NEXT:    movk w8, #426, lsl #16
+; CHECK-NEXT:    add w0, w0, w8
+; CHECK-NEXT:    ret
+  %b = add i32 %a, 27962026
+  ret i32 %b
+}
+
+define i32 @add_65534(i32 %a) {
+; CHECK-LABEL: add_65534:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #65534
+; CHECK-NEXT:    add w0, w0, w8
+; CHECK-NEXT:    ret
+  %b = add i32 %a, 65534
+  ret i32 %b
+}
+
+declare i32 @foox(i32)
+
+define void @add_in_loop(i32 %0) {
+; CHECK-LABEL: add_in_loop:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    mov w19, #43690
+; CHECK-NEXT:    movk w19, #170, lsl #16
+; CHECK-NEXT:  .LBB15_1: // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    add w0, w0, w19
+; CHECK-NEXT:    bl foox
+; CHECK-NEXT:    b .LBB15_1
+  br label %2
+2:
+  %3 = phi i32 [ %0, %1 ], [ %5, %2 ]
+  %4 = add nsw i32 %3, 11184810
+  %5 = tail call i32 @foox(i32 %4) #2
+  br label %2
+}
+
 define void @testing() {
 ; CHECK-LABEL: testing:
 ; CHECK:       // %bb.0:
@@ -244,7 +280,7 @@ define void @testing() {
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:var_i32]
 ; CHECK-NEXT:    ldr w9, [x8]
 ; CHECK-NEXT:    cmp w9, #4095
-; CHECK-NEXT:    b.ne .LBB13_6
+; CHECK-NEXT:    b.ne .LBB16_6
 ; CHECK-NEXT:  // %bb.1: // %test2
 ; CHECK-NEXT:    adrp x10, :got:var2_i32
 ; CHECK-NEXT:    add w11, w9, #1
@@ -252,26 +288,26 @@ define void @testing() {
 ; CHECK-NEXT:    str w11, [x8]
 ; CHECK-NEXT:    ldr w10, [x10]
 ; CHECK-NEXT:    cmp w10, #3567, lsl #12 // =14610432
-; CHECK-NEXT:    b.lo .LBB13_6
+; CHECK-NEXT:    b.lo .LBB16_6
 ; CHECK-NEXT:  // %bb.2: // %test3
 ; CHECK-NEXT:    add w11, w9, #2
 ; CHECK-NEXT:    cmp w9, #123
 ; CHECK-NEXT:    str w11, [x8]
-; CHECK-NEXT:    b.lt .LBB13_6
+; CHECK-NEXT:    b.lt .LBB16_6
 ; CHECK-NEXT:  // %bb.3: // %test4
 ; CHECK-NEXT:    add w11, w9, #3
 ; CHECK-NEXT:    cmp w10, #321
 ; CHECK-NEXT:    str w11, [x8]
-; CHECK-NEXT:    b.gt .LBB13_6
+; CHECK-NEXT:    b.gt .LBB16_6
 ; CHECK-NEXT:  // %bb.4: // %test5
 ; CHECK-NEXT:    add w11, w9, #4
 ; CHECK-NEXT:    cmn w10, #443
 ; CHECK-NEXT:    str w11, [x8]
-; CHECK-NEXT:    b.ge .LBB13_6
+; CHECK-NEXT:    b.ge .LBB16_6
 ; CHECK-NEXT:  // %bb.5: // %test6
 ; CHECK-NEXT:    add w9, w9, #5
 ; CHECK-NEXT:    str w9, [x8]
-; CHECK-NEXT:  .LBB13_6: // %common.ret
+; CHECK-NEXT:  .LBB16_6: // %common.ret
 ; CHECK-NEXT:    ret
   %val = load i32, i32* @var_i32
   %val2 = load i32, i32* @var2_i32
diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
index 1c587080f4b68..97e877211b120 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll
@@ -214,10 +214,9 @@ define void @test5([65536 x i32]** %s, i32 %n) {
 ; CHECK-LABEL: test5:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    ldr x9, [x0]
-; CHECK-NEXT:    mov w10, #14464
-; CHECK-NEXT:    movk w10, #1, lsl #16
 ; CHECK-NEXT:    mov w8, wzr
-; CHECK-NEXT:    add x9, x9, x10
+; CHECK-NEXT:    add x9, x9, #19, lsl #12 // =77824
+; CHECK-NEXT:    add x9, x9, #2176
 ; CHECK-NEXT:    cmp w8, w1
 ; CHECK-NEXT:    b.ge .LBB4_2
 ; CHECK-NEXT:  .LBB4_1: // %while_body

From 0283b07746e879961fd9361579e0da2a62430696 Mon Sep 17 00:00:00 2001
From: luxufan <932494295@qq.com>
Date: Sat, 22 Jan 2022 16:07:17 +0800
Subject: [PATCH 252/946] reapply de872382951 "[JITLink] Add anonymous symbols
 in LinkGraph..." with fixes

This reapply `de872382951572b70dfaefe8d77eb98d15586115`, which was
reverted in `fdb6578514dd3799ad23c8bbb7699577c0fb414d`

Add `# REQUIRES: asserts` in test file `anonymous_symbol.s` to disable
this test for non-debug build
---
 .../llvm/ExecutionEngine/JITLink/riscv.h      | 12 +++++------
 .../JITLink/ELFLinkGraphBuilder.h             | 12 +++++++----
 llvm/lib/ExecutionEngine/JITLink/riscv.cpp    |  4 ++--
 .../JITLink/RISCV/anonymous_symbol.s          | 21 +++++++++++++++++++
 4 files changed, 37 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s

diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
index ed874c53d269b..5abd4cf11deaf 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/riscv.h
@@ -79,6 +79,12 @@ enum EdgeKind_riscv : Edge::Kind {
   ///   Fixup <- (Target - Fixup + Addend)
   R_RISCV_CALL,
 
+  /// 32 bits PC relative relocation
+  ///
+  /// Fixup expression:
+  ///   Fixup <- (Target - Fixup + Addend)
+  R_RISCV_32_PCREL,
+
   /// PC relative GOT offset
   ///
   /// Fixup expression:
@@ -162,12 +168,6 @@ enum EdgeKind_riscv : Edge::Kind {
   /// Fixup expression:
   ///   Fixup <- (Target + Addend)
   R_RISCV_SET32,
-
-  /// Local label assignment
-  ///
-  /// Fixup expression:
-  ///   Fixup <- (Target - Fixup + Addend)
-  R_RISCV_32_PCREL,
 };
 
 /// Returns a string name for the given riscv edge. For debugging purposes
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
index 931a60224ee2f..2ab7ed61f71b4 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h
@@ -441,11 +441,15 @@ template <typename ELFT> Error ELFLinkGraphBuilder<ELFT>::graphifySymbols() {
                  << "\"\n";
         });
 
-        // Model the section symbols as anonymous symbol.
+        // In RISCV, temporary symbols (Used to generate dwarf, eh_frame
+        // sections...) will appear in object code's symbol table, and LLVM does
+        // not use names on these temporary symbols (RISCV gnu toolchain uses
+        // names on these temporary symbols). If the symbol is unnamed, add an
+        // anonymous symbol.
         auto &GSym =
-            Sym.getType() == ELF::STT_SECTION
-                ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size, false,
-                                        false)
+            Name->empty()
+                ? G->addAnonymousSymbol(*B, Sym.getValue(), Sym.st_size,
+                                        false, false)
                 : G->addDefinedSymbol(*B, Sym.getValue(), *Name, Sym.st_size, L,
                                       S, Sym.getType() == ELF::STT_FUNC, false);
         setGraphSymbol(SymIndex, GSym);
diff --git a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
index 6efd7abd85ddf..3ce2cf10a24cb 100644
--- a/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/riscv.cpp
@@ -38,6 +38,8 @@ const char *getEdgeKindName(Edge::Kind K) {
     return "R_RISCV_PCREL_LO12_S";
   case R_RISCV_CALL:
     return "R_RISCV_CALL";
+  case R_RISCV_32_PCREL:
+    return "R_RISCV_32_PCREL";
   case R_RISCV_ADD64:
     return "R_RISCV_ADD64";
   case R_RISCV_ADD32:
@@ -62,8 +64,6 @@ const char *getEdgeKindName(Edge::Kind K) {
     return "R_RISCV_SET16";
   case R_RISCV_SET32:
     return "R_RISCV_SET32";
-  case R_RISCV_32_PCREL:
-    return "R_RISCV_32_PCREL";
   }
   return getGenericEdgeKindName(K);
 }
diff --git a/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s
new file mode 100644
index 0000000000000..fc1c006095444
--- /dev/null
+++ b/llvm/test/ExecutionEngine/JITLink/RISCV/anonymous_symbol.s
@@ -0,0 +1,21 @@
+# REQUIRES: asserts
+# RUN: llvm-mc -triple=riscv64 -filetype=obj -o %t %s
+# RUN: llvm-jitlink -debug-only=jitlink -noexec %t 2>&1 | FileCheck %s
+#
+# Because of the exist of cfi directive, sections like eh_frame section will be emitted
+# in llvm's object code emission phase. Anonymous symbols will also be emitted to indicate
+# the section start and section end. So that by relocating these symbol, the section length
+# can be calculated.
+#
+# CHECK: Creating defined graph symbol for ELF symbol ""
+# CHECK: Creating defined graph symbol for ELF symbol ""
+        .text
+        .globl main
+        .p2align 2
+        .type main,@function
+main:
+        .cfi_startproc
+        ret
+        .Lfunc_end0:
+        .size main, .Lfunc_end0-main
+        .cfi_endproc

From b27e5459d51fd5ba80a1182e5bd8c0fd5e2e6a49 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Sat, 22 Jan 2022 13:20:36 +0000
Subject: [PATCH 253/946] [DAG] Convert truncstore(extend(x)) back to store(x)

Pulled out of D106237, this folds truncstore(extend(x)) back to store(x)
if the original store was legal. This can come up due to the order we
fold nodes. A fold from X86 needs to be adjusted to prevent infinite
loops, to have it pick the operand of a trunc more directly.

Differential Revision: https://reviews.llvm.org/D117901
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  9 +++++++++
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  3 ++-
 llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll | 15 +++++++--------
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index d1f75b40e79db..861beee6386bf 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -18396,6 +18396,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       Value.getValueType().isInteger() &&
       (!isa<ConstantSDNode>(Value) ||
        !cast<ConstantSDNode>(Value)->isOpaque())) {
+    // Convert a truncating store of a extension into a standard store.
+    if ((Value.getOpcode() == ISD::ZERO_EXTEND ||
+         Value.getOpcode() == ISD::SIGN_EXTEND ||
+         Value.getOpcode() == ISD::ANY_EXTEND) &&
+        Value.getOperand(0).getValueType() == ST->getMemoryVT() &&
+        TLI.isOperationLegalOrCustom(ISD::STORE, ST->getMemoryVT()))
+      return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+                          ST->getMemOperand());
+
     APInt TruncDemandedBits =
         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
                              ST->getMemoryVT().getScalarSizeInBits());
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 71c80d518f998..17079116a6ae1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48170,7 +48170,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
       St->getValue().getOperand(0).getValueType() == MVT::v16i16 &&
       TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) &&
       St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) {
-    SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue());
+    SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32,
+                              St->getValue().getOperand(0));
     return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(),
                              MVT::v16i8, St->getMemOperand());
   }
diff --git a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
index 80802658b2a04..2660c3162b107 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
+++ b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
@@ -543,17 +543,16 @@ define amdgpu_kernel void @test_copy_v3i8_align1(<3 x i8> addrspace(1)* %out, <3
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    s_mov_b32 s8, s2
 ; VI-NEXT:    s_mov_b32 s9, s3
-; VI-NEXT:    buffer_load_ubyte v0, off, s[8:11], 0
-; VI-NEXT:    buffer_load_ubyte v1, off, s[8:11], 0 offset:1
-; VI-NEXT:    buffer_load_ubyte v2, off, s[8:11], 0 offset:2
+; VI-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
+; VI-NEXT:    buffer_load_ubyte v1, off, s[8:11], 0 offset:2
 ; VI-NEXT:    s_mov_b32 s4, s0
 ; VI-NEXT:    s_mov_b32 s5, s1
-; VI-NEXT:    s_waitcnt vmcnt(2)
+; VI-NEXT:    s_waitcnt vmcnt(1)
 ; VI-NEXT:    buffer_store_byte v0, off, s[4:7], 0
-; VI-NEXT:    s_waitcnt vmcnt(2)
-; VI-NEXT:    buffer_store_byte v1, off, s[4:7], 0 offset:1
-; VI-NEXT:    s_waitcnt vmcnt(2)
-; VI-NEXT:    buffer_store_byte v2, off, s[4:7], 0 offset:2
+; VI-NEXT:    s_waitcnt vmcnt(1)
+; VI-NEXT:    buffer_store_byte v1, off, s[4:7], 0 offset:2
+; VI-NEXT:    v_lshrrev_b16_e32 v0, 8, v0
+; VI-NEXT:    buffer_store_byte v0, off, s[4:7], 0 offset:1
 ; VI-NEXT:    s_endpgm
   %val = load <3 x i8>, <3 x i8> addrspace(1)* %in, align 1
   store <3 x i8> %val, <3 x i8> addrspace(1)* %out, align 1

From 8dedf9b58bff3589bff8cb422e449c4ee7f11499 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Sat, 22 Jan 2022 23:20:14 +0800
Subject: [PATCH 254/946] [PowerPC] Change CTR clobber estimation for 128-bit
 floating types

Reviewed By: shchenz

Differential Revision: https://reviews.llvm.org/D117459
---
 .../Target/PowerPC/PPCTargetTransformInfo.cpp | 10 +++-
 llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll    | 52 ++++++++++++++++++-
 2 files changed, 58 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index ed28731b8ef20..707c1396e5728 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -653,11 +653,17 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
       }
 
       return true;
-    } else if (isa<BinaryOperator>(J) &&
-               (J->getType()->getScalarType()->isFP128Ty() ||
+    } else if ((J->getType()->getScalarType()->isFP128Ty() ||
                 J->getType()->getScalarType()->isPPC_FP128Ty())) {
       // Most operations on f128 or ppc_f128 values become calls.
       return true;
+    } else if (isa<FCmpInst>(J) &&
+               J->getOperand(0)->getType()->getScalarType()->isFP128Ty()) {
+      return true;
+    } else if ((isa<FPTruncInst>(J) || isa<FPExtInst>(J)) &&
+               (cast<CastInst>(J)->getSrcTy()->getScalarType()->isFP128Ty() ||
+                cast<CastInst>(J)->getDestTy()->getScalarType()->isFP128Ty())) {
+      return true;
     } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
                isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
       CastInst *CI = cast<CastInst>(J);
diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
index 57095413cdb26..fde8e20212c03 100644
--- a/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
+++ b/llvm/test/CodeGen/PowerPC/ctrloop-fp128.ll
@@ -1,5 +1,7 @@
-; RUN: llc -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr9 \
-; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr9 \
+; RUN:   -mtriple=powerpc64le-unknown-unknown | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -stop-after=hardware-loops -mcpu=pwr8 \
+; RUN:   -mtriple=powerpc64le-unknown-unknown | FileCheck %s
 
 @a = internal global fp128 0xL00000000000000000000000000000000, align 16
 @x = internal global [4 x fp128] zeroinitializer, align 16
@@ -29,4 +31,50 @@ for.end:                                          ; preds = %for.body
 ; CHECK-NOT:     call i1 @llvm.loop.decrement.i64(i64 1)
 }
 
+define void @fpext_ctrloop_fp128(double* %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds double, double* %a, i64 %i.06
+  %0 = load double, double* %arrayidx, align 8
+  %ext = fpext double %0 to fp128
+  %arrayidx1 = getelementptr inbounds [4 x fp128], [4 x fp128]* @y, i64 0, i64 %i.06
+  store fp128 %ext, fp128* %arrayidx1, align 16
+  %inc = add nuw nsw i64 %i.06, 1
+  %exitcond = icmp eq i64 %inc, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+
+; CHECK-LABEL: fpext_ctrloop_fp128
+; CHECK-NOT: call void @llvm.set.loop.iterations.i64(i64 4)
+; CHECK-NOT: call i1 @llvm.loop.decrement.i64(i64 1)
+}
+
+define void @fptrunc_ctrloop_fp128(double* %a) {
+entry:
+  br label %for.body
+
+for.body:
+  %i.06 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds [4 x fp128], [4 x fp128]* @x, i64 0, i64 %i.06
+  %0 = load fp128, fp128* %arrayidx, align 16
+  %trunc = fptrunc fp128 %0 to double
+  %arrayidx1 = getelementptr inbounds double, double* %a, i64 %i.06
+  store double %trunc, double* %arrayidx1, align 16
+  %inc = add nuw nsw i64 %i.06, 1
+  %exitcond = icmp eq i64 %inc, 4
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+
+; CHECK-LABEL: fptrunc_ctrloop_fp128
+; CHECK-NOT: call void @llvm.set.loop.iterations.i64(i64 4)
+; CHECK-NOT: call i1 @llvm.loop.decrement.i64(i64 1)
+}
+
 declare void @obfuscate(i8*, ...) local_unnamed_addr #2

From 00d68c3824bfcb103783b94c4cd8df353e4ee85d Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Sat, 22 Jan 2022 23:29:34 +0800
Subject: [PATCH 255/946] [PowerPC] Support parsing GNU attributes in MC

This patch is the first step to enable support of GNU attribute in LLVM
PowerPC, enabling it for PowerPC targets, otherwise llvm-mc raises error
when seeing the attribute section.

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D115854
---
 llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 12 ++++++++++++
 llvm/test/MC/PowerPC/gnu-attribute.s               | 11 +++++++++++
 2 files changed, 23 insertions(+)
 create mode 100644 llvm/test/MC/PowerPC/gnu-attribute.s

diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index a640e63b5df84..715cff72dcab4 100644
--- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -121,6 +121,7 @@ class PPCAsmParser : public MCTargetAsmParser {
   bool ParseDirectiveMachine(SMLoc L);
   bool ParseDirectiveAbiVersion(SMLoc L);
   bool ParseDirectiveLocalEntry(SMLoc L);
+  bool ParseGNUAttribute(SMLoc L);
 
   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                OperandVector &Operands, MCStreamer &Out,
@@ -1605,6 +1606,8 @@ bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
     ParseDirectiveAbiVersion(DirectiveID.getLoc());
   else if (IDVal == ".localentry")
     ParseDirectiveLocalEntry(DirectiveID.getLoc());
+  else if (IDVal.startswith(".gnu_attribute"))
+    ParseGNUAttribute(DirectiveID.getLoc());
   else
     return true;
   return false;
@@ -1720,7 +1723,16 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
   return false;
 }
 
+bool PPCAsmParser::ParseGNUAttribute(SMLoc L) {
+  int64_t Tag;
+  int64_t IntegerValue;
+  if (!getParser().parseGNUAttribute(L, Tag, IntegerValue))
+    return false;
+
+  getParser().getStreamer().emitGNUAttribute(Tag, IntegerValue);
 
+  return true;
+}
 
 /// Force static initialization.
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCAsmParser() {
diff --git a/llvm/test/MC/PowerPC/gnu-attribute.s b/llvm/test/MC/PowerPC/gnu-attribute.s
new file mode 100644
index 0000000000000..98a558d52c6fe
--- /dev/null
+++ b/llvm/test/MC/PowerPC/gnu-attribute.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc -triple powerpc64-unknown-linux-gnu < %s | FileCheck %s
+# RUN: llvm-mc -triple powerpc64le-unknown-linux-gnu < %s | FileCheck %s
+
+	.text
+add:
+	add 3, 4, 3
+	blr
+	.gnu_attribute 4, 13
+
+# CHECK-LABEL: add:
+# CHECK: .gnu_attribute 4, 13

From 5f2854f1daa79373ef67211bebce5e08f087c3b3 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sat, 22 Jan 2022 15:34:10 +0000
Subject: [PATCH 256/946] [LV] Always create VPWidenCanonicalIVRecipe, optimize
 away later.

This patch updates createBlockInMask to always generate
VPWidenCanonicalIVRecipe and adds a transform to optimize it away later,
if it is not needed.

This is a step towards breaking up VPWidenIntOrFpInductionRecipe and
explicitly distinguishing between vector phis and scalarizing.

Split off from D116123.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D117140
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 12 +++------
 llvm/lib/Transforms/Vectorize/VPlan.h         | 27 +++++++++++++++++++
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 27 +++++++++++++++++++
 .../Transforms/Vectorize/VPlanTransforms.h    |  4 +++
 4 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d186ae59a74a2..7b90dcff7bc1e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -8418,15 +8418,8 @@ VPValue *VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlanPtr &Plan) {
     assert(CM.foldTailByMasking() && "must fold the tail");
     VPBasicBlock *HeaderVPBB = Plan->getEntry()->getEntryBasicBlock();
     auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
-
-    VPValue *IV = nullptr;
-    if (Legal->getPrimaryInduction())
-      IV = Plan->getOrAddVPValue(Legal->getPrimaryInduction());
-    else {
-      auto *IVRecipe = new VPWidenCanonicalIVRecipe(Plan->getCanonicalIV());
-      HeaderVPBB->insert(IVRecipe, NewInsertionPoint);
-      IV = IVRecipe;
-    }
+    auto *IV = new VPWidenCanonicalIVRecipe(Plan->getCanonicalIV());
+    HeaderVPBB->insert(IV, HeaderVPBB->getFirstNonPhi());
 
     VPBuilder::InsertPointGuard Guard(Builder);
     Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
@@ -9201,6 +9194,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlanWithVPRecipes(
     }
   }
 
+  VPlanTransforms::removeRedundantCanonicalIVs(*Plan);
   VPlanTransforms::removeRedundantInductionCasts(*Plan);
 
   // Now that sink-after is done, move induction recipes for optimized truncates
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 10d5c1b3409a5..824440f98a8b4 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1076,6 +1076,12 @@ class VPWidenIntOrFpInductionRecipe : public VPRecipeBase, public VPValue {
   /// Returns true if the induction is canonical, i.e. starting at 0 and
   /// incremented by UF * VF (= the original IV is incremented by 1).
   bool isCanonical() const;
+
+  /// Returns the scalar type of the induction.
+  const Type *getScalarType() const {
+    const TruncInst *TruncI = getTruncInst();
+    return TruncI ? TruncI->getType() : IV->getType();
+  }
 };
 
 /// A pure virtual base class for all recipes modeling header phis, including
@@ -1675,6 +1681,11 @@ class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
   void print(raw_ostream &O, const Twine &Indent,
              VPSlotTracker &SlotTracker) const override;
 #endif
+
+  /// Returns the scalar type of the induction.
+  const Type *getScalarType() const {
+    return getOperand(0)->getLiveInIRValue()->getType();
+  }
 };
 
 /// A Recipe for widening the canonical induction variable of the vector loop.
@@ -1691,6 +1702,16 @@ class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
     return D->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC;
   }
 
+  /// Extra classof implementations to allow directly casting from VPUser ->
+  /// VPWidenCanonicalIVRecipe.
+  static inline bool classof(const VPUser *U) {
+    auto *R = dyn_cast<VPRecipeBase>(U);
+    return R && R->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC;
+  }
+  static inline bool classof(const VPRecipeBase *R) {
+    return R->getVPDefID() == VPRecipeBase::VPWidenCanonicalIVSC;
+  }
+
   /// Generate a canonical vector induction variable of the vector loop, with
   /// start = {<Part*VF, Part*VF+1, ..., Part*VF+VF-1> for 0 <= Part < UF}, and
   /// step = <VF*UF, VF*UF, ..., VF*UF>.
@@ -1701,6 +1722,12 @@ class VPWidenCanonicalIVRecipe : public VPRecipeBase, public VPValue {
   void print(raw_ostream &O, const Twine &Indent,
              VPSlotTracker &SlotTracker) const override;
 #endif
+
+  /// Returns the scalar type of the induction.
+  const Type *getScalarType() const {
+    return cast<VPCanonicalIVPHIRecipe>(getOperand(0)->getDef())
+        ->getScalarType();
+  }
 };
 
 /// VPBasicBlock serves as the leaf of the Hierarchical Control-Flow Graph. It
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d2daf558c2c56..fb5f3d4281896 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -324,3 +324,30 @@ void VPlanTransforms::removeRedundantInductionCasts(VPlan &Plan) {
     E.first->eraseFromParent();
   }
 }
+
+void VPlanTransforms::removeRedundantCanonicalIVs(VPlan &Plan) {
+  VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
+  VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
+  for (VPUser *U : CanonicalIV->users()) {
+    WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
+    if (WidenNewIV)
+      break;
+  }
+
+  if (!WidenNewIV)
+    return;
+
+  VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+  for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
+    auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
+
+    // If the induction recipe is canonical and the types match, use it
+    // directly.
+    if (WidenOriginalIV && WidenOriginalIV->isCanonical() &&
+        WidenOriginalIV->getScalarType() == WidenNewIV->getScalarType()) {
+      WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
+      WidenNewIV->eraseFromParent();
+      return;
+    }
+  }
+}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index a82a562d5e353..e74409a86466f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -45,6 +45,10 @@ struct VPlanTransforms {
   /// in the vectorized loop. There is no need to vectorize the cast - the same
   /// value can be used for both the phi and casts in the vector loop.
   static void removeRedundantInductionCasts(VPlan &Plan);
+
+  /// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
+  /// recipe, if it exists.
+  static void removeRedundantCanonicalIVs(VPlan &Plan);
 };
 
 } // namespace llvm

From 26fffc1b8e755c60727d206a1f07a1ca1d299f48 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer <arthur.j.odwyer@gmail.com>
Date: Sat, 22 Jan 2022 11:53:00 -0500
Subject: [PATCH 257/946] [libc++] [test] {cpo,niebloid}.compile.pass.cpp: Also
 test their constness.

This will detect if someone writes `inline auto cpo =` instead of
`inline constexpr auto cpo =`. I don't know how that'd be possible,
but it's easy to test, so let's test it.
---
 .../customization.point.object/cpo.compile.pass.cpp            | 3 +++
 .../customization.point.object/niebloid.compile.pass.cpp       | 1 +
 2 files changed, 4 insertions(+)

diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp
index 4cd461631838d..819aeb454a45c 100644
--- a/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp
+++ b/libcxx/test/std/library/description/conventions/customization.point.object/cpo.compile.pass.cpp
@@ -13,13 +13,16 @@
 // [range.adaptor.object] "A range adaptor object is a customization point object..."
 
 #include <compare>
+#include <concepts>
 #include <iterator>
 #include <ranges>
+#include <type_traits>
 #include <utility>
 
 // Test for basic properties of C++20 16.3.3.3.6 [customization.point.object].
 template <class CPO, class... Args>
 constexpr bool test(CPO& o, Args&&...) {
+  static_assert(std::is_const_v<CPO>);
   static_assert(std::is_class_v<CPO>);
   static_assert(std::is_trivial_v<CPO>);
 
diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp
index fcca5813dcb31..532fa5786f8e1 100644
--- a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp
+++ b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp
@@ -32,6 +32,7 @@
 
 template <class CPO, class... Args>
 constexpr bool test(CPO& o, Args&&...) {
+  static_assert(std::is_const_v<CPO>);
   static_assert(std::is_class_v<CPO>);
   static_assert(std::is_trivial_v<CPO>);
 

From e9d0f8baf2361b190b0ffde67cad62828fda8ce6 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler@nvidia.com>
Date: Fri, 14 Jan 2022 13:34:10 -0800
Subject: [PATCH 258/946] [flang] Don't drop format string for external child
 I/O

In user-defined derived type I/O to an external unit, don't
omit the format string from the constructor of ChildFormattedIoStatement.
And include any user IOMSG text in the crash message of the
parent, if it doesn't catch errors.

Differential Revision: https://reviews.llvm.org/D117903
---
 flang/runtime/format-implementation.h | 2 +-
 flang/runtime/io-api.cpp              | 2 +-
 flang/runtime/io-error.cpp            | 8 ++++++--
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/flang/runtime/format-implementation.h b/flang/runtime/format-implementation.h
index b9c1b8427afe3..6b1a64b96a851 100644
--- a/flang/runtime/format-implementation.h
+++ b/flang/runtime/format-implementation.h
@@ -353,7 +353,7 @@ DataEdit FormatControl<CONTEXT>::GetNextDataEdit(
       ++offset_;
     }
   } else if (edit.descriptor == 'D' && Capitalize(PeekNext()) == 'T') {
-    // DT'iotype'(v_list) user-defined derived type I/O
+    // DT['iotype'][(v_list)] user-defined derived type I/O
     edit.descriptor = DataEdit::DefinedDerivedType;
     ++offset_;
     if (auto quote{static_cast<char>(PeekNext())};
diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp
index 64c798d7ff8ae..9c50adf184898 100644
--- a/flang/runtime/io-api.cpp
+++ b/flang/runtime/io-api.cpp
@@ -215,7 +215,7 @@ Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength,
                                         : "formatted input",
                false, DIR)
         ? &child->BeginIoStatement<ChildFormattedIoStatementState<DIR>>(
-              *child, sourceFile, sourceLine)
+              *child, format, formatLength, sourceFile, sourceLine)
         : nullptr;
   } else {
     IoErrorHandler handler{terminator};
diff --git a/flang/runtime/io-error.cpp b/flang/runtime/io-error.cpp
index 48647ec8805e7..e139e0649e503 100644
--- a/flang/runtime/io-error.cpp
+++ b/flang/runtime/io-error.cpp
@@ -59,10 +59,14 @@ void IoErrorHandler::SignalError(int iostatOrErrno) {
 
 void IoErrorHandler::Forward(
     int ioStatOrErrno, const char *msg, std::size_t length) {
-  SignalError(ioStatOrErrno);
-  if (ioStat_ != IostatOk && (flags_ & hasIoMsg)) {
+  if (ioStat_ != IostatOk && msg && (flags_ & hasIoMsg)) {
     ioMsg_ = SaveDefaultCharacter(msg, length, *this);
   }
+  if (ioStatOrErrno != IostatOk && msg) {
+    SignalError(ioStatOrErrno, "%.*s", static_cast<int>(length), msg);
+  } else {
+    SignalError(ioStatOrErrno);
+  }
 }
 
 void IoErrorHandler::SignalErrno() { SignalError(errno); }

From 896a543e72fd3ab044e64d1140a37c7f4876fc71 Mon Sep 17 00:00:00 2001
From: Peter Klausler <pklausler@nvidia.com>
Date: Wed, 19 Jan 2022 16:25:41 -0800
Subject: [PATCH 259/946] [flang] Support DECIMAL='COMMA' mode in namelist I/O

DECIMAL='COMMA' mode affects item separators, real editing, and
complex editing.

Differential Revision: https://reviews.llvm.org/D117906
---
 flang/runtime/descriptor-io.h        |  1 +
 flang/runtime/edit-input.cpp         | 10 ++++++---
 flang/runtime/io-stmt.cpp            |  5 +++--
 flang/runtime/io-stmt.h              |  7 ++++++-
 flang/runtime/namelist.cpp           | 17 +++++++++++----
 flang/unittests/Runtime/Namelist.cpp | 31 ++++++++++++++++++++++++++++
 6 files changed, 61 insertions(+), 10 deletions(-)

diff --git a/flang/runtime/descriptor-io.h b/flang/runtime/descriptor-io.h
index 03b7e798af431..1e78a826f04db 100644
--- a/flang/runtime/descriptor-io.h
+++ b/flang/runtime/descriptor-io.h
@@ -124,6 +124,7 @@ inline bool FormattedComplexIO(
       DataEdit rEdit, iEdit;
       rEdit.descriptor = DataEdit::ListDirectedRealPart;
       iEdit.descriptor = DataEdit::ListDirectedImaginaryPart;
+      rEdit.modes = iEdit.modes = io.mutableModes();
       if (!RealOutputEditing<KIND>{io, x[0]}.Edit(rEdit) ||
           !RealOutputEditing<KIND>{io, x[1]}.Edit(iEdit)) {
         return false;
diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 7fe4bfaf9a89e..dff79841cf18c 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -48,6 +48,10 @@ static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
   return true;
 }
 
+static inline char32_t GetDecimalPoint(const DataEdit &edit) {
+  return edit.modes.editingFlags & decimalComma ? char32_t{','} : char32_t{'.'};
+}
+
 // Prepares input from a field, and consumes the sign, if any.
 // Returns true if there's a '-' sign.
 static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
@@ -59,7 +63,7 @@ static bool ScanNumericPrefix(IoStatementState &io, const DataEdit &edit,
     if (negative || *next == '+') {
       io.GotChar();
       io.SkipSpaces(remaining);
-      next = io.NextInField(remaining);
+      next = io.NextInField(remaining, GetDecimalPoint(edit));
     }
   }
   return negative;
@@ -154,7 +158,7 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
     Put('0');
     return got;
   }
-  char32_t decimal = edit.modes.editingFlags & decimalComma ? ',' : '.';
+  char32_t decimal{GetDecimalPoint(edit)};
   char32_t first{*next >= 'a' && *next <= 'z' ? *next + 'A' - 'a' : *next};
   if (first == 'N' || first == 'I') {
     // NaN or infinity - convert to upper case
@@ -179,7 +183,7 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
     Put('.'); // input field is normalized to a fraction
     auto start{got};
     bool bzMode{(edit.modes.editingFlags & blankZero) != 0};
-    for (; next; next = io.NextInField(remaining)) {
+    for (; next; next = io.NextInField(remaining, decimal)) {
       char32_t ch{*next};
       if (ch == ' ' || ch == '\t') {
         if (bzMode) {
diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp
index 0a544d69958bf..52d0a1ebe6a39 100644
--- a/flang/runtime/io-stmt.cpp
+++ b/flang/runtime/io-stmt.cpp
@@ -580,13 +580,13 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
   DataEdit edit;
   edit.descriptor = DataEdit::ListDirected;
   edit.repeat = 1; // may be overridden below
-  edit.modes = connection.modes;
+  edit.modes = io.mutableModes();
   if (hitSlash_) { // everything after '/' is nullified
     edit.descriptor = DataEdit::ListDirectedNullValue;
     return edit;
   }
   char32_t comma{','};
-  if (io.mutableModes().editingFlags & decimalComma) {
+  if (edit.modes.editingFlags & decimalComma) {
     comma = ';';
   }
   if (remaining_ > 0 && !realPart_) { // "r*c" repetition in progress
@@ -619,6 +619,7 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
     // Consume comma & whitespace after previous item.
     // This includes the comma between real and imaginary components
     // in list-directed/NAMELIST complex input.
+    // (When DECIMAL='COMMA', the comma is actually a semicolon.)
     io.HandleRelativePosition(1);
     ch = io.GetNextNonBlank();
   }
diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h
index baf038dbdd245..32e546e5c082d 100644
--- a/flang/runtime/io-stmt.h
+++ b/flang/runtime/io-stmt.h
@@ -163,9 +163,14 @@ class IoStatementState {
     return std::nullopt;
   }
 
-  std::optional<char32_t> NextInField(std::optional<int> &remaining) {
+  std::optional<char32_t> NextInField(
+      std::optional<int> &remaining, char32_t decimal = '.') {
     if (!remaining) { // list-directed or NAMELIST: check for separators
       if (auto next{GetCurrentChar()}) {
+        if (*next == decimal) { // can be ','
+          HandleRelativePosition(1);
+          return next;
+        }
         switch (*next) {
         case ' ':
         case '\t':
diff --git a/flang/runtime/namelist.cpp b/flang/runtime/namelist.cpp
index fde828fddf443..8d291619b8f5c 100644
--- a/flang/runtime/namelist.cpp
+++ b/flang/runtime/namelist.cpp
@@ -20,11 +20,17 @@ namespace Fortran::runtime::io {
 // NAMELIST input, plus a byte for NUL termination.
 static constexpr std::size_t nameBufferSize{201};
 
+static inline char32_t GetComma(IoStatementState &io) {
+  return io.mutableModes().editingFlags & decimalComma ? char32_t{';'}
+                                                       : char32_t{','};
+}
+
 bool IONAME(OutputNamelist)(Cookie cookie, const NamelistGroup &group) {
   IoStatementState &io{*cookie};
   io.CheckFormattedStmtType<Direction::Output>("OutputNamelist");
   ConnectionState &connection{io.GetConnectionState()};
   connection.modes.inNamelist = true;
+  char comma{static_cast<char>(GetComma(io))};
   // Internal functions to advance records and convert case
   const auto EmitWithAdvance{[&](char ch) -> bool {
     return (!connection.NeedAdvance(1) || io.AdvanceRecord()) &&
@@ -51,7 +57,7 @@ bool IONAME(OutputNamelist)(Cookie cookie, const NamelistGroup &group) {
   for (std::size_t j{0}; j < group.items; ++j) {
     // [,]ITEM=...
     const NamelistGroup::Item &item{group.item[j]};
-    if (!(EmitWithAdvance(j == 0 ? ' ' : ',') && EmitUpperCase(item.name) &&
+    if (!(EmitWithAdvance(j == 0 ? ' ' : comma) && EmitUpperCase(item.name) &&
             EmitWithAdvance('=') &&
             descr::DescriptorIO<Direction::Output>(io, item.descriptor))) {
       return false;
@@ -137,6 +143,7 @@ static bool HandleSubscripts(IoStatementState &io, Descriptor &desc,
   std::size_t contiguousStride{source.ElementBytes()};
   bool ok{true};
   std::optional<char32_t> ch{io.GetNextNonBlank()};
+  char32_t comma{GetComma(io)};
   for (; ch && *ch != ')'; ++j) {
     SubscriptValue dimLower{0}, dimUpper{0}, dimStride{0};
     if (j < maxRank && j < source.rank()) {
@@ -197,7 +204,7 @@ static bool HandleSubscripts(IoStatementState &io, Descriptor &desc,
       dimUpper = dimLower;
       dimStride = 0;
     }
-    if (ch && *ch == ',') {
+    if (ch && *ch == comma) {
       io.HandleRelativePosition(1);
       ch = io.GetNextNonBlank();
     }
@@ -358,6 +365,7 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) {
   std::optional<char32_t> next;
   char name[nameBufferSize];
   RUNTIME_CHECK(handler, group.groupName != nullptr);
+  char32_t comma{GetComma(io)};
   while (true) {
     next = io.GetNextNonBlank();
     while (next && *next != '&') {
@@ -391,7 +399,8 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) {
     }
     if (!GetLowerCaseName(io, name, sizeof name)) {
       handler.SignalError(
-          "NAMELIST input group '%s' was not terminated", group.groupName);
+          "NAMELIST input group '%s' was not terminated at '%c'",
+          group.groupName, static_cast<char>(*next));
       return false;
     }
     std::size_t itemIndex{0};
@@ -461,7 +470,7 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) {
       return false;
     }
     next = io.GetNextNonBlank();
-    if (next && *next == ',') {
+    if (next && *next == comma) {
       io.HandleRelativePosition(1);
     }
   }
diff --git a/flang/unittests/Runtime/Namelist.cpp b/flang/unittests/Runtime/Namelist.cpp
index 38305f729b145..ba0bae6468a03 100644
--- a/flang/unittests/Runtime/Namelist.cpp
+++ b/flang/unittests/Runtime/Namelist.cpp
@@ -274,4 +274,35 @@ TEST(NamelistTests, Skip) {
   EXPECT_EQ(got, expect);
 }
 
+// Tests DECIMAL=COMMA mode
+TEST(NamelistTests, Comma) {
+  OwningPtr<Descriptor> scDesc{
+      MakeArray<TypeCategory::Complex, static_cast<int>(sizeof(float))>(
+          std::vector<int>{2}, std::vector<std::complex<float>>{{}, {}})};
+  const NamelistGroup::Item items[]{{"z", *scDesc}};
+  const NamelistGroup group{"nml", 1, items};
+  static char t1[]{"&nml z=(-1,0;2,0);(-3,0;0,5)/"};
+  StaticDescriptor<1, true> statDesc;
+  Descriptor &internalDesc{statDesc.descriptor()};
+  internalDesc.Establish(TypeCode{CFI_type_char},
+      /*elementBytes=*/std::strlen(t1), t1, 0, nullptr, CFI_attribute_pointer);
+  auto inCookie{IONAME(BeginInternalArrayListInput)(
+      internalDesc, nullptr, 0, __FILE__, __LINE__)};
+  ASSERT_TRUE(IONAME(SetDecimal)(inCookie, "COMMA", 5));
+  ASSERT_TRUE(IONAME(InputNamelist)(inCookie, group));
+  ASSERT_EQ(IONAME(EndIoStatement)(inCookie), IostatOk)
+      << "namelist input with skipping";
+  char out[30];
+  internalDesc.Establish(TypeCode{CFI_type_char}, /*elementBytes=*/sizeof out,
+      out, 0, nullptr, CFI_attribute_pointer);
+  auto outCookie{IONAME(BeginInternalArrayListOutput)(
+      internalDesc, nullptr, 0, __FILE__, __LINE__)};
+  ASSERT_TRUE(IONAME(SetDecimal)(outCookie, "COMMA", 5));
+  ASSERT_TRUE(IONAME(OutputNamelist)(outCookie, group));
+  ASSERT_EQ(IONAME(EndIoStatement)(outCookie), IostatOk) << "namelist output";
+  std::string got{out, sizeof out};
+  static const std::string expect{"&NML Z= (-1,;2,) (-3,;,5)/    "};
+  EXPECT_EQ(got, expect);
+}
+
 // TODO: Internal NAMELIST error tests

From 0a6b4258ab0e773b7b2b06db1317faf556931481 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Sat, 22 Jan 2022 01:33:49 -0500
Subject: [PATCH 260/946] [openmp][cmake] Use `GNUInstallDirs` to support
 custom installation dirs

I am breaking apart D99484 so the cause of build failures is easier to
understand.

Differential Revision: https://reviews.llvm.org/D117945
---
 openmp/CMakeLists.txt                            | 5 +++++
 openmp/libompd/src/CMakeLists.txt                | 2 +-
 openmp/runtime/cmake/LibompCheckLinkerFlag.cmake | 2 ++
 openmp/runtime/src/CMakeLists.txt                | 6 +++---
 openmp/tools/multiplex/CMakeLists.txt            | 2 +-
 5 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/openmp/CMakeLists.txt b/openmp/CMakeLists.txt
index 7f11a05f56227..4530281aff268 100644
--- a/openmp/CMakeLists.txt
+++ b/openmp/CMakeLists.txt
@@ -7,7 +7,12 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})
 if (OPENMP_STANDALONE_BUILD OR "${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}")
   set(OPENMP_STANDALONE_BUILD TRUE)
   project(openmp C CXX)
+endif()
+
+# Must go below project(..)
+include(GNUInstallDirs)
 
+if (OPENMP_STANDALONE_BUILD)
   # CMAKE_BUILD_TYPE was not set, default to Release.
   if (NOT CMAKE_BUILD_TYPE)
     set(CMAKE_BUILD_TYPE Release)
diff --git a/openmp/libompd/src/CMakeLists.txt b/openmp/libompd/src/CMakeLists.txt
index 9c203bdd6b4fc..25a850ed457d7 100644
--- a/openmp/libompd/src/CMakeLists.txt
+++ b/openmp/libompd/src/CMakeLists.txt
@@ -47,4 +47,4 @@ include_directories (
 INSTALL( TARGETS ompd
         LIBRARY DESTINATION ${OPENMP_INSTALL_LIBDIR}
         ARCHIVE DESTINATION ${OPENMP_INSTALL_LIBDIR}
-        RUNTIME DESTINATION bin )
+        RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}" )
diff --git a/openmp/runtime/cmake/LibompCheckLinkerFlag.cmake b/openmp/runtime/cmake/LibompCheckLinkerFlag.cmake
index fb284599ca386..4c30514af88e5 100644
--- a/openmp/runtime/cmake/LibompCheckLinkerFlag.cmake
+++ b/openmp/runtime/cmake/LibompCheckLinkerFlag.cmake
@@ -8,6 +8,8 @@
 #//===----------------------------------------------------------------------===//
 #
 
+include(GNUInstallDirs)
+
 # Checking a linker flag to build a shared library
 # There is no real trivial way to do this in CMake, so we implement it here
 # this will have ${boolean} = TRUE if the flag succeeds, otherwise FALSE.
diff --git a/openmp/runtime/src/CMakeLists.txt b/openmp/runtime/src/CMakeLists.txt
index e4f4e6e1e73ff..27e748c4d6dc1 100644
--- a/openmp/runtime/src/CMakeLists.txt
+++ b/openmp/runtime/src/CMakeLists.txt
@@ -346,19 +346,19 @@ add_dependencies(libomp-micro-tests libomp-test-deps)
 # We want to install libomp in DESTDIR/CMAKE_INSTALL_PREFIX/lib
 # We want to install headers in DESTDIR/CMAKE_INSTALL_PREFIX/include
 if(${OPENMP_STANDALONE_BUILD})
-  set(LIBOMP_HEADERS_INSTALL_PATH include)
+  set(LIBOMP_HEADERS_INSTALL_PATH "${CMAKE_INSTALL_INCLUDEDIR}")
 else()
   string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION ${PACKAGE_VERSION})
   set(LIBOMP_HEADERS_INSTALL_PATH "${OPENMP_INSTALL_LIBDIR}/clang/${CLANG_VERSION}/include")
 endif()
 if(WIN32)
-  install(TARGETS omp RUNTIME DESTINATION bin)
+  install(TARGETS omp RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
   install(TARGETS ${LIBOMP_IMP_LIB_TARGET} ARCHIVE DESTINATION "${OPENMP_INSTALL_LIBDIR}")
   # Create aliases (regular copies) of the library for backwards compatibility
   set(LIBOMP_ALIASES "libiomp5md")
   foreach(alias IN LISTS LIBOMP_ALIASES)
     install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E copy \"${LIBOMP_LIB_FILE}\"
-      \"${alias}${LIBOMP_LIBRARY_SUFFIX}\" WORKING_DIRECTORY \"\${CMAKE_INSTALL_PREFIX}/bin\")")
+      \"${alias}${LIBOMP_LIBRARY_SUFFIX}\" WORKING_DIRECTORY \"\${CMAKE_INSTALL_BINDIR}\")")
     install(CODE "execute_process(COMMAND \"\${CMAKE_COMMAND}\" -E copy \"${LIBOMP_IMP_LIB_FILE}\"
       \"${alias}${CMAKE_STATIC_LIBRARY_SUFFIX}\" WORKING_DIRECTORY \"\${CMAKE_INSTALL_PREFIX}/${OPENMP_INSTALL_LIBDIR}\")")
   endforeach()
diff --git a/openmp/tools/multiplex/CMakeLists.txt b/openmp/tools/multiplex/CMakeLists.txt
index 64317c112176c..8b50e95899cc4 100644
--- a/openmp/tools/multiplex/CMakeLists.txt
+++ b/openmp/tools/multiplex/CMakeLists.txt
@@ -4,7 +4,7 @@ if(LIBOMP_OMPT_SUPPORT)
   add_library(ompt-multiplex INTERFACE)
   target_include_directories(ompt-multiplex INTERFACE ${CMAKE_CURRENT_SOURCE_DIR})
 
-  install(FILES ompt-multiplex.h DESTINATION include)
+  install(FILES ompt-multiplex.h DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
 
   add_subdirectory(tests)
 endif()

From d44b6be6eaa8c165d3526d61dcc0f1c459e5722f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sat, 22 Jan 2022 11:55:13 -0800
Subject: [PATCH 261/946] [RISCV] Don't Custom legalize f16/f32/f64 bitcasts if
 those types aren't Legal.

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index e1f1c49094424..411191343cf04 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1025,9 +1025,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::BITCAST, MVT::i16, Custom);
       setOperationAction(ISD::BITCAST, MVT::i32, Custom);
       setOperationAction(ISD::BITCAST, MVT::i64, Custom);
-      setOperationAction(ISD::BITCAST, MVT::f16, Custom);
-      setOperationAction(ISD::BITCAST, MVT::f32, Custom);
-      setOperationAction(ISD::BITCAST, MVT::f64, Custom);
+      if (Subtarget.hasStdExtZfh())
+        setOperationAction(ISD::BITCAST, MVT::f16, Custom);
+      if (Subtarget.hasStdExtF())
+        setOperationAction(ISD::BITCAST, MVT::f32, Custom);
+      if (Subtarget.hasStdExtD())
+        setOperationAction(ISD::BITCAST, MVT::f64, Custom);
     }
   }
 

From 39e602b6c4335b2572c74eaf9a666bebde7fd8b5 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sat, 22 Jan 2022 12:36:12 -0500
Subject: [PATCH 262/946] [InstCombine] try to fold binop with phi operands

This is an alternate version of D115914 that handles/tests all binary opcodes.

I suspect that we don't see these patterns too often because -simplifycfg
would convert the minimal cases into selects rather than leave them in phi form
(note: instcombine has logic holes for combining the select patterns too though,
so that's another potential patch).

We only create a new binop in a predecessor that unconditionally branches to
the final block.
https://alive2.llvm.org/ce/z/C57M2F
https://alive2.llvm.org/ce/z/WHwAoU (not safe to speculate an sdiv for example)
https://alive2.llvm.org/ce/z/rdVUvW (but it is ok on this path)

Differential Revision: https://reviews.llvm.org/D117110
---
 .../InstCombine/InstCombineAddSub.cpp         |  12 ++
 .../InstCombine/InstCombineAndOrXor.cpp       |   9 ++
 .../InstCombine/InstCombineInternal.h         |  10 ++
 .../InstCombine/InstCombineMulDivRem.cpp      |  18 +++
 .../InstCombine/InstCombineShifts.cpp         |   3 +
 .../InstCombine/InstructionCombining.cpp      |  64 +++++++++
 .../InstCombine/binop-phi-operands.ll         | 122 ++++++++----------
 .../Transforms/InstCombine/zext-or-icmp.ll    |  12 +-
 8 files changed, 171 insertions(+), 79 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index a2e875638d7d6..0598f751febe2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1288,6 +1288,9 @@ Instruction *InstCombinerImpl::visitAdd(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   // (A*B)+(A*C) -> A*(B+C) etc
   if (Value *V = SimplifyUsingDistributiveLaws(I))
     return replaceInstUsesWith(I, V);
@@ -1536,6 +1539,9 @@ Instruction *InstCombinerImpl::visitFAdd(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   if (Instruction *FoldedFAdd = foldBinOpIntoSelectOrPhi(I))
     return FoldedFAdd;
 
@@ -1751,6 +1757,9 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   // If this is a 'B = x-(-A)', change to B = x+A.
@@ -2313,6 +2322,9 @@ Instruction *InstCombinerImpl::visitFSub(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   // Subtraction from -0.0 is the canonical form of fneg.
   // fsub -0.0, X ==> fneg X
   // fsub nsz 0.0, X ==> fneg nsz X
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index fe6a6c1203fd2..6bbb0251f2bc0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1872,6 +1872,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
@@ -2665,6 +2668,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   // See if we can simplify any instructions used by the instruction whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(I))
@@ -3553,6 +3559,9 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   if (Instruction *NewXor = foldXorToXor(I, Builder))
     return NewXor;
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index f92ee31a3de26..a7d1ff202e5e7 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -608,6 +608,16 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   /// only possible if all operands to the PHI are constants).
   Instruction *foldOpIntoPhi(Instruction &I, PHINode *PN);
 
+  /// For a binary operator with 2 phi operands, try to hoist the binary
+  /// operation before the phi. This can result in fewer instructions in
+  /// patterns where at least one set of phi operands simplifies.
+  /// Example:
+  /// BB3: binop (phi [X, BB1], [C1, BB2]), (phi [Y, BB1], [C2, BB2])
+  /// -->
+  /// BB1: BO = binop X, Y
+  /// BB3: phi [BO, BB1], [(binop C1, C2), BB2]
+  Instruction *foldBinopWithPhiOperands(BinaryOperator &BO);
+
   /// Given an instruction with a select as one operand and a constant as the
   /// other operand, try to fold the binary operator into the select arguments.
   /// This also works for Cast instructions, which obviously do not have a
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 076c3134d0782..1aa10b550fc40 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -155,6 +155,9 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   if (Value *V = SimplifyUsingDistributiveLaws(I))
     return replaceInstUsesWith(I, V);
 
@@ -450,6 +453,9 @@ Instruction *InstCombinerImpl::visitFMul(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   if (Instruction *FoldedMul = foldBinOpIntoSelectOrPhi(I))
     return FoldedMul;
 
@@ -750,6 +756,9 @@ static bool isMultiple(const APInt &C1, const APInt &C2, APInt &Quotient,
 /// division instructions.
 /// Common integer divide transforms
 Instruction *InstCombinerImpl::commonIDivTransforms(BinaryOperator &I) {
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   bool IsSigned = I.getOpcode() == Instruction::SDiv;
   Type *Ty = I.getType();
@@ -1367,6 +1376,9 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   if (Instruction *R = foldFDivConstantDivisor(I))
     return R;
 
@@ -1468,6 +1480,9 @@ Instruction *InstCombinerImpl::visitFDiv(BinaryOperator &I) {
 /// remainder instructions.
 /// Common integer remainder transforms
 Instruction *InstCombinerImpl::commonIRemTransforms(BinaryOperator &I) {
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   // The RHS is known non-zero.
@@ -1646,5 +1661,8 @@ Instruction *InstCombinerImpl::visitFRem(BinaryOperator &I) {
   if (Instruction *X = foldVectorBinop(I))
     return X;
 
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   return nullptr;
 }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 9acad19df9df5..17f0c5c4cff0e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -369,6 +369,9 @@ static Instruction *foldShiftOfShiftedLogic(BinaryOperator &I,
 }
 
 Instruction *InstCombinerImpl::commonShiftTransforms(BinaryOperator &I) {
+  if (Instruction *Phi = foldBinopWithPhiOperands(I))
+    return Phi;
+
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   assert(Op0->getType() == Op1->getType());
 
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 89c5fef18eca8..32f6a980afa8d 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1287,6 +1287,70 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
   return replaceInstUsesWith(I, NewPN);
 }
 
+Instruction *InstCombinerImpl::foldBinopWithPhiOperands(BinaryOperator &BO) {
+  // TODO: This should be similar to the incoming values check in foldOpIntoPhi:
+  //       we are guarding against replicating the binop in >1 predecessor.
+  //       This could miss matching a phi with 2 constant incoming values.
+  auto *Phi0 = dyn_cast<PHINode>(BO.getOperand(0));
+  auto *Phi1 = dyn_cast<PHINode>(BO.getOperand(1));
+  if (!Phi0 || !Phi1 || !Phi0->hasOneUse() || !Phi1->hasOneUse() ||
+      Phi0->getNumOperands() != 2 || Phi1->getNumOperands() != 2)
+    return nullptr;
+
+  // TODO: Remove the restriction for binop being in the same block as the phis.
+  if (BO.getParent() != Phi0->getParent() ||
+      BO.getParent() != Phi1->getParent())
+    return nullptr;
+
+  // Match a pair of incoming constants for one of the predecessor blocks.
+  BasicBlock *ConstBB, *OtherBB;
+  Constant *C0, *C1;
+  if (match(Phi0->getIncomingValue(0), m_ImmConstant(C0))) {
+    ConstBB = Phi0->getIncomingBlock(0);
+    OtherBB = Phi0->getIncomingBlock(1);
+  } else if (match(Phi0->getIncomingValue(1), m_ImmConstant(C0))) {
+    ConstBB = Phi0->getIncomingBlock(1);
+    OtherBB = Phi0->getIncomingBlock(0);
+  } else {
+    return nullptr;
+  }
+  if (!match(Phi1->getIncomingValueForBlock(ConstBB), m_ImmConstant(C1)))
+    return nullptr;
+
+  // The block that we are hoisting to must reach here unconditionally.
+  // Otherwise, we could be speculatively executing an expensive or
+  // non-speculative op.
+  auto *PredBlockBranch = dyn_cast<BranchInst>(OtherBB->getTerminator());
+  if (!PredBlockBranch || PredBlockBranch->isConditional() ||
+      !DT.isReachableFromEntry(OtherBB))
+    return nullptr;
+
+  // TODO: This check could be tightened to only apply to binops (div/rem) that
+  //       are not safe to speculatively execute. But that could allow hoisting
+  //       potentially expensive instructions (fdiv for example).
+  for (auto BBIter = BO.getParent()->begin(); &*BBIter != &BO; ++BBIter)
+    if (!isGuaranteedToTransferExecutionToSuccessor(&*BBIter))
+      return nullptr;
+
+  // Make a new binop in the predecessor block with the non-constant incoming
+  // values.
+  Builder.SetInsertPoint(PredBlockBranch);
+  Value *NewBO = Builder.CreateBinOp(BO.getOpcode(),
+                                     Phi0->getIncomingValueForBlock(OtherBB),
+                                     Phi1->getIncomingValueForBlock(OtherBB));
+  if (auto *NotFoldedNewBO = dyn_cast<BinaryOperator>(NewBO))
+    NotFoldedNewBO->copyIRFlags(&BO);
+
+  // Fold constants for the predecessor block with constant incoming values.
+  Constant *NewC = ConstantExpr::get(BO.getOpcode(), C0, C1);
+
+  // Replace the binop with a phi of the new values. The old phis are dead.
+  PHINode *NewPhi = PHINode::Create(BO.getType(), 2);
+  NewPhi->addIncoming(NewBO, OtherBB);
+  NewPhi->addIncoming(NewC, ConstBB);
+  return NewPhi;
+}
+
 Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
   if (!isa<Constant>(I.getOperand(1)))
     return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/binop-phi-operands.ll b/llvm/test/Transforms/InstCombine/binop-phi-operands.ll
index 81428e898eda2..b43081383156f 100644
--- a/llvm/test/Transforms/InstCombine/binop-phi-operands.ll
+++ b/llvm/test/Transforms/InstCombine/binop-phi-operands.ll
@@ -4,6 +4,8 @@
 declare void @use(i32)
 declare void @sideeffect()
 
+; negative test (but we could allow this?) - don't hoist to conditional predecessor block
+
 define i32 @add_const_incoming0_speculative(i1 %b, i32 %x, i32 %y) {
 ; CHECK-LABEL: @add_const_incoming0_speculative(
 ; CHECK-NEXT:  entry:
@@ -34,11 +36,10 @@ define i32 @add_const_incoming0_nonspeculative(i1 %b, i32 %x, i32 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[X:%.*]], [[IF]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i32 [ 17, [[ENTRY]] ], [ [[Y:%.*]], [[IF]] ]
-; CHECK-NEXT:    [[R:%.*]] = add i32 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP0]], [[IF]] ], [ 59, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
@@ -54,6 +55,8 @@ then:
   ret i32 %r
 }
 
+; negative test (but we could allow this?) - don't hoist to conditional predecessor block
+
 define i32 @sub_const_incoming0(i1 %b, i32 %x, i32 %y) {
 ; CHECK-LABEL: @sub_const_incoming0(
 ; CHECK-NEXT:  entry:
@@ -84,11 +87,10 @@ define i32 @sub_const_incoming1(i1 %b, i32 %x, i32 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i32 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i32 [ [[Y:%.*]], [[IF]] ], [ 17, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = sub i32 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[TMP0]], [[IF]] ], [ 25, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
 entry:
@@ -109,11 +111,10 @@ define i8 @mul_const_incoming1(i1 %b, i8 %x, i8 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = mul i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 17, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = mul i8 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ -54, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
 entry:
@@ -134,11 +135,10 @@ define i8 @and_const_incoming1(i1 %b, i8 %x, i8 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 17, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = and i8 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
 entry:
@@ -159,11 +159,10 @@ define i8 @xor_const_incoming1(i1 %b, i8 %x, i8 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 17, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = xor i8 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 59, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
 entry:
@@ -184,11 +183,10 @@ define i64 @or_const_incoming1(i1 %b, i64 %x, i64 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i64 [ [[X:%.*]], [[IF]] ], [ 3, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i64 [ [[Y:%.*]], [[IF]] ], [ 16, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = or i64 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i64 [ [[TMP0]], [[IF]] ], [ 19, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i64 [[R]]
 ;
 entry:
@@ -209,11 +207,10 @@ define i64 @or_const_incoming01(i1 %b, i64 %x, i64 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i64 [ 3, [[ENTRY:%.*]] ], [ [[X:%.*]], [[IF]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i64 [ 16, [[ENTRY]] ], [ [[Y:%.*]], [[IF]] ]
-; CHECK-NEXT:    [[R:%.*]] = or i64 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i64 [ [[TMP0]], [[IF]] ], [ 19, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i64 [[R]]
 ;
 entry:
@@ -234,11 +231,10 @@ define i64 @or_const_incoming10(i1 %b, i64 %x, i64 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[Y:%.*]], [[X:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i64 [ [[Y:%.*]], [[IF]] ], [ 16, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i64 [ [[X:%.*]], [[IF]] ], [ 3, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = or i64 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i64 [ [[TMP0]], [[IF]] ], [ 19, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i64 [[R]]
 ;
 entry:
@@ -254,6 +250,8 @@ then:
   ret i64 %r
 }
 
+; negative test (but we could allow this?) - don't hoist to conditional predecessor block
+
 define i8 @ashr_const_incoming0_speculative(i1 %b, i8 %x, i8 %y) {
 ; CHECK-LABEL: @ashr_const_incoming0_speculative(
 ; CHECK-NEXT:  entry:
@@ -284,11 +282,10 @@ define i8 @ashr_const_incoming0(i1 %b, i8 %x, i8 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i8 [ 42, [[ENTRY:%.*]] ], [ [[X:%.*]], [[IF]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i8 [ 3, [[ENTRY]] ], [ [[Y:%.*]], [[IF]] ]
-; CHECK-NEXT:    [[R:%.*]] = ashr i8 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 5, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
 entry:
@@ -309,11 +306,10 @@ define i8 @lshr_const_incoming1(i1 %b, i8 %x, i8 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = lshr i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 3, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = lshr i8 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 5, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
 entry:
@@ -334,11 +330,10 @@ define i8 @shl_const_incoming1(i1 %b, i8 %x, i8 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = shl nuw nsw i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 3, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = shl nuw nsw i8 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 80, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
 entry:
@@ -384,11 +379,10 @@ define i8 @sdiv_const_incoming1(i1 %b, i8 %x, i8 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = sdiv i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ -42, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 17, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = sdiv i8 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ -2, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
 entry:
@@ -409,11 +403,10 @@ define i8 @udiv_const_incoming1(i1 %b, i8 %x, i8 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = udiv i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ -42, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ 17, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = udiv i8 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 12, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
 entry:
@@ -434,11 +427,10 @@ define i8 @srem_const_incoming1(i1 %b, i8 %x, i8 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = srem i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ -17, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = srem i8 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 8, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
 entry:
@@ -459,11 +451,10 @@ define i8 @urem_const_incoming1(i1 %b, i8 %x, i8 %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = urem i8 [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi i8 [ [[X:%.*]], [[IF]] ], [ 42, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi i8 [ [[Y:%.*]], [[IF]] ], [ -17, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = urem i8 [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi i8 [ [[TMP0]], [[IF]] ], [ 42, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
 entry:
@@ -484,11 +475,10 @@ define float @fmul_const_incoming1(i1 %b, float %x, float %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = fmul float [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi float [ [[X:%.*]], [[IF]] ], [ 4.200000e+01, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi float [ [[Y:%.*]], [[IF]] ], [ 1.700000e+01, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = fmul float [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi float [ [[TMP0]], [[IF]] ], [ 7.140000e+02, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret float [[R]]
 ;
 entry:
@@ -509,11 +499,10 @@ define float @fadd_const_incoming1(i1 %b, float %x, float %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = fadd fast float [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi float [ [[X:%.*]], [[IF]] ], [ 4.200000e+01, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi float [ [[Y:%.*]], [[IF]] ], [ 1.700000e+01, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = fadd fast float [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi float [ [[TMP0]], [[IF]] ], [ 5.900000e+01, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret float [[R]]
 ;
 entry:
@@ -534,11 +523,10 @@ define float @fsub_const_incoming1(i1 %b, float %x, float %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = fsub nnan ninf float [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi float [ [[X:%.*]], [[IF]] ], [ 4.200000e+01, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi float [ [[Y:%.*]], [[IF]] ], [ 1.700000e+01, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = fsub nnan ninf float [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi float [ [[TMP0]], [[IF]] ], [ 2.500000e+01, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret float [[R]]
 ;
 entry:
@@ -559,11 +547,10 @@ define float @frem_const_incoming1(i1 %b, float %x, float %y) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[IF:%.*]], label [[THEN:%.*]]
 ; CHECK:       if:
+; CHECK-NEXT:    [[TMP0:%.*]] = frem nsz float [[X:%.*]], [[Y:%.*]]
 ; CHECK-NEXT:    br label [[THEN]]
 ; CHECK:       then:
-; CHECK-NEXT:    [[P0:%.*]] = phi float [ [[X:%.*]], [[IF]] ], [ 4.200000e+01, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[P1:%.*]] = phi float [ [[Y:%.*]], [[IF]] ], [ 1.700000e+01, [[ENTRY]] ]
-; CHECK-NEXT:    [[R:%.*]] = frem nsz float [[P0]], [[P1]]
+; CHECK-NEXT:    [[R:%.*]] = phi float [ [[TMP0]], [[IF]] ], [ 8.000000e+00, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret float [[R]]
 ;
 entry:
@@ -658,6 +645,9 @@ then:
   ret i64 %r
 }
 
+; The mul could be hoisted before the call that may not return
+; if we are ok with speculating a potentially expensive op.
+
 define i8 @mul_const_incoming0_speculatable(i1 %b, i8 %x, i8 %y) {
 ; CHECK-LABEL: @mul_const_incoming0_speculatable(
 ; CHECK-NEXT:  entry:
@@ -685,6 +675,8 @@ then:
   ret i8 %r
 }
 
+; The udiv should never be hoisted before the call that may not return.
+
 define i8 @udiv_const_incoming0_not_speculatable(i1 %b, i8 %x, i8 %y) {
 ; CHECK-LABEL: @udiv_const_incoming0_not_speculatable(
 ; CHECK-NEXT:  entry:
@@ -712,6 +704,8 @@ then:
   ret i8 %r
 }
 
+; TODO: It is ok to hoist the udiv even though it is not in the same block as the phis.
+
 define i8 @udiv_const_incoming0_different_block(i1 %b, i8 %x, i8 %y) {
 ; CHECK-LABEL: @udiv_const_incoming0_different_block(
 ; CHECK-NEXT:  entry:
@@ -750,20 +744,10 @@ define { i64, i32 } @ParseRetVal(i1 %b, { i64, i32 } ()* %x) {
 ; CHECK-NEXT:    [[T4:%.*]] = tail call { i64, i32 } [[X:%.*]]()
 ; CHECK-NEXT:    [[T5:%.*]] = extractvalue { i64, i32 } [[T4]], 0
 ; CHECK-NEXT:    [[T6:%.*]] = extractvalue { i64, i32 } [[T4]], 1
-; CHECK-NEXT:    [[T7:%.*]] = and i64 [[T5]], -4294967296
-; CHECK-NEXT:    [[T8:%.*]] = and i64 [[T5]], 4294901760
-; CHECK-NEXT:    [[T9:%.*]] = and i64 [[T5]], 65280
-; CHECK-NEXT:    [[T10:%.*]] = and i64 [[T5]], 255
 ; CHECK-NEXT:    br label [[F]]
 ; CHECK:       f:
-; CHECK-NEXT:    [[T12:%.*]] = phi i64 [ [[T10]], [[T]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[T13:%.*]] = phi i64 [ [[T9]], [[T]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[T14:%.*]] = phi i64 [ [[T8]], [[T]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[T15:%.*]] = phi i64 [ [[T7]], [[T]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[T16:%.*]] = phi i32 [ [[T6]], [[T]] ], [ 0, [[ENTRY]] ]
-; CHECK-NEXT:    [[T17:%.*]] = or i64 [[T13]], [[T12]]
-; CHECK-NEXT:    [[T18:%.*]] = or i64 [[T17]], [[T14]]
-; CHECK-NEXT:    [[T19:%.*]] = or i64 [[T18]], [[T15]]
+; CHECK-NEXT:    [[T16:%.*]] = phi i32 [ [[T6]], [[T]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[T19:%.*]] = phi i64 [ [[T5]], [[T]] ], [ 0, [[ENTRY]] ]
 ; CHECK-NEXT:    [[T20:%.*]] = insertvalue { i64, i32 } poison, i64 [[T19]], 0
 ; CHECK-NEXT:    [[T21:%.*]] = insertvalue { i64, i32 } [[T20]], i32 [[T16]], 1
 ; CHECK-NEXT:    ret { i64, i32 } [[T21]]
diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
index 6b7f1d029cb67..68d1e8addd35a 100644
--- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -46,11 +46,7 @@ define i32 @dont_widen_undef() {
 ; CHECK:       block1:
 ; CHECK-NEXT:    br label [[BLOCK2]]
 ; CHECK:       block2:
-; CHECK-NEXT:    [[CMP_I:%.*]] = phi i1 [ false, [[BLOCK1:%.*]] ], [ true, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[CMP115:%.*]] = phi i1 [ true, [[BLOCK1]] ], [ false, [[ENTRY]] ]
-; CHECK-NEXT:    [[CMP1:%.*]] = or i1 [[CMP_I]], [[CMP115]]
-; CHECK-NEXT:    [[CONV2:%.*]] = zext i1 [[CMP1]] to i32
-; CHECK-NEXT:    ret i32 [[CONV2]]
+; CHECK-NEXT:    ret i32 1
 ;
 entry:
   br label %block2
@@ -76,11 +72,7 @@ define i32 @dont_widen_undef_logical() {
 ; CHECK:       block1:
 ; CHECK-NEXT:    br label [[BLOCK2]]
 ; CHECK:       block2:
-; CHECK-NEXT:    [[CMP_I:%.*]] = phi i1 [ false, [[BLOCK1:%.*]] ], [ true, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[CMP115:%.*]] = phi i1 [ true, [[BLOCK1]] ], [ false, [[ENTRY]] ]
-; CHECK-NEXT:    [[CMP1:%.*]] = or i1 [[CMP_I]], [[CMP115]]
-; CHECK-NEXT:    [[CONV2:%.*]] = zext i1 [[CMP1]] to i32
-; CHECK-NEXT:    ret i32 [[CONV2]]
+; CHECK-NEXT:    ret i32 1
 ;
 entry:
   br label %block2

From 7c16647c3676587391f6bb80ec87d9621ca9472f Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Sun, 16 Jan 2022 05:52:22 +0000
Subject: [PATCH 263/946] [clang-tools-extra][cmake] Use `GNUInstallDirs` to
 support custom installation dirs.

This is the original patch in my GNUInstallDirs series, now last to merge as the final piece!

It arose as a new draft of D28234. I initially did the unorthodox thing of pushing to that when I wasn't the original author, but since I ended up

 - Using `GNUInstallDirs`, rather than mimicking it, as the original author was hesitant to do but others requested.

 - Converting all the packages, not just LLVM, effecting many more projects than LLVM itself.

I figured it was time to make a new revision.

I have used this patch series (and many back-ports) as the basis of https://github.com/NixOS/nixpkgs/pull/111487 for my distro (NixOS), which was merged last spring (2021). It looked like people were generally on board in D28234, but I make note of this here in case extra motivation is useful.

---

As pointed out in the original issue, a central tension is that LLVM already has some partial support for these sorts of things. Variables like `COMPILER_RT_INSTALL_PATH` have already been dealt with. Variables like `LLVM_LIBDIR_SUFFIX` however, will require further work, so that we may use `CMAKE_INSTALL_LIBDIR`.

These remaining items will be addressed in further patches. What is here is now rote and so we should get it out of the way before dealing more intricately with the remainder.

Reviewed By: #libunwind, #libc, #libc_abi, compnerd

Differential Revision: https://reviews.llvm.org/D99484
---
 clang-tools-extra/CMakeLists.txt                              | 1 +
 clang-tools-extra/clang-doc/tool/CMakeLists.txt               | 4 ++--
 .../clang-include-fixer/find-all-symbols/tool/CMakeLists.txt  | 2 +-
 clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt     | 4 ++--
 clang-tools-extra/clang-tidy/CMakeLists.txt                   | 2 +-
 clang-tools-extra/clang-tidy/tool/CMakeLists.txt              | 4 ++--
 clang-tools-extra/modularize/CMakeLists.txt                   | 2 +-
 7 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/CMakeLists.txt b/clang-tools-extra/CMakeLists.txt
index 2e73b6ba81d2e..7b8274a97336b 100644
--- a/clang-tools-extra/CMakeLists.txt
+++ b/clang-tools-extra/CMakeLists.txt
@@ -1,4 +1,5 @@
 include(CMakeDependentOption)
+include(GNUInstallDirs)
 
 option(CLANG_TIDY_ENABLE_STATIC_ANALYZER
   "Include static analyzer checks in clang-tidy" ON)
diff --git a/clang-tools-extra/clang-doc/tool/CMakeLists.txt b/clang-tools-extra/clang-doc/tool/CMakeLists.txt
index 7e71478869160..fb8317b272932 100644
--- a/clang-tools-extra/clang-doc/tool/CMakeLists.txt
+++ b/clang-tools-extra/clang-doc/tool/CMakeLists.txt
@@ -19,9 +19,9 @@ target_link_libraries(clang-doc
   )
 
 install(FILES ../assets/clang-doc-default-stylesheet.css
-  DESTINATION share/clang
+  DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
   COMPONENT clang-doc)
 
 install(FILES ../assets/index.js
-  DESTINATION share/clang
+  DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
   COMPONENT clang-doc)
diff --git a/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/CMakeLists.txt b/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/CMakeLists.txt
index 8f5509d22e24a..e6926a0d5bd10 100644
--- a/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/CMakeLists.txt
+++ b/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/CMakeLists.txt
@@ -20,5 +20,5 @@ target_link_libraries(find-all-symbols
   )
 
 install(PROGRAMS run-find-all-symbols.py
-  DESTINATION share/clang
+  DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
   COMPONENT find-all-symbols)
diff --git a/clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt b/clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt
index 3936ac1e8a5a5..5b9e00ab87cd8 100644
--- a/clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt
+++ b/clang-tools-extra/clang-include-fixer/tool/CMakeLists.txt
@@ -21,8 +21,8 @@ target_link_libraries(clang-include-fixer
   )
 
 install(PROGRAMS clang-include-fixer.el
-  DESTINATION share/clang
+  DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
   COMPONENT clang-include-fixer)
 install(PROGRAMS clang-include-fixer.py
-  DESTINATION share/clang
+  DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
   COMPONENT clang-include-fixer)
diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt
index 455645050d93d..075e9f9909d65 100644
--- a/clang-tools-extra/clang-tidy/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/CMakeLists.txt
@@ -113,7 +113,7 @@ add_subdirectory(utils)
 
 if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
   install(DIRECTORY .
-    DESTINATION include/clang-tidy
+    DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/clang-tidy"
     COMPONENT clang-tidy-headers
     FILES_MATCHING
     PATTERN "*.h"
diff --git a/clang-tools-extra/clang-tidy/tool/CMakeLists.txt b/clang-tools-extra/clang-tidy/tool/CMakeLists.txt
index ad3255b024fc6..4b8c93801501a 100644
--- a/clang-tools-extra/clang-tidy/tool/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/tool/CMakeLists.txt
@@ -52,9 +52,9 @@ target_link_libraries(clang-tidy
 
 
 install(PROGRAMS clang-tidy-diff.py
-  DESTINATION share/clang
+  DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
   COMPONENT clang-tidy)
 install(PROGRAMS run-clang-tidy.py
-  DESTINATION bin
+  DESTINATION "${CMAKE_INSTALL_BINDIR}"
   COMPONENT clang-tidy
   RENAME run-clang-tidy)
diff --git a/clang-tools-extra/modularize/CMakeLists.txt b/clang-tools-extra/modularize/CMakeLists.txt
index 4caae81c49b62..fb17e353c39fd 100644
--- a/clang-tools-extra/modularize/CMakeLists.txt
+++ b/clang-tools-extra/modularize/CMakeLists.txt
@@ -23,5 +23,5 @@ clang_target_link_libraries(modularize
   )
 
 install(TARGETS modularize
-        RUNTIME DESTINATION bin
+        RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}"
         COMPONENT clang-extras)

From c1988dbf2d191d771e3a396eaa6396500965787d Mon Sep 17 00:00:00 2001
From: Malhar Jajoo <malhar.jajoo@arm.com>
Date: Sat, 22 Jan 2022 22:05:38 +0000
Subject: [PATCH 264/946] [openmp] Allow x87 fp functions only in Openmp
 runtime for x86.

This patch allows Openmp runtime atomic functions operating on x87 high-precision
to be present only in Openmp runtime for x86 architectures

The functions affected are:

__kmpc_atomic_10
__kmpc_atomic_20
__kmpc_atomic_cmplx10_add
__kmpc_atomic_cmplx10_div
__kmpc_atomic_cmplx10_mul
__kmpc_atomic_cmplx10_sub
__kmpc_atomic_float10_add
__kmpc_atomic_float10_div
__kmpc_atomic_float10_mul
__kmpc_atomic_float10_sub

__kmpc_atomic_float10_add_fp
__kmpc_atomic_float10_div_fp
__kmpc_atomic_float10_mul_fp
__kmpc_atomic_float10_sub_fp
__kmpc_atomic_float10_max
__kmpc_atomic_float10_min

Differential Revision: https://reviews.llvm.org/D117473
---
 openmp/runtime/src/kmp_atomic.cpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/openmp/runtime/src/kmp_atomic.cpp b/openmp/runtime/src/kmp_atomic.cpp
index 83d646054f633..0bd7b1a41ac46 100644
--- a/openmp/runtime/src/kmp_atomic.cpp
+++ b/openmp/runtime/src/kmp_atomic.cpp
@@ -1235,10 +1235,12 @@ MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
                  KMP_ARCH_X86) // __kmpc_atomic_float8_max
 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
                  KMP_ARCH_X86) // __kmpc_atomic_float8_min
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
 MIN_MAX_CRITICAL(float10, max, long double, <, 10r,
                  1) // __kmpc_atomic_float10_max
 MIN_MAX_CRITICAL(float10, min, long double, >, 10r,
                  1) // __kmpc_atomic_float10_min
+#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
 #if KMP_HAVE_QUAD
 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
                  1) // __kmpc_atomic_float16_max
@@ -1317,6 +1319,7 @@ ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
   }
 
 /* ------------------------------------------------------------------------- */
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
 // routines for long double type
 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
                 1) // __kmpc_atomic_float10_add
@@ -1326,6 +1329,7 @@ ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
                 1) // __kmpc_atomic_float10_mul
 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
                 1) // __kmpc_atomic_float10_div
+#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
 #if KMP_HAVE_QUAD
 // routines for _Quad type
 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
@@ -1371,6 +1375,7 @@ ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
                 1) // __kmpc_atomic_cmplx10_add
 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
@@ -1379,6 +1384,7 @@ ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
                 1) // __kmpc_atomic_cmplx10_mul
 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
                 1) // __kmpc_atomic_cmplx10_div
+#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
 #if KMP_HAVE_QUAD
 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
                 1) // __kmpc_atomic_cmplx16_add
@@ -1797,6 +1803,7 @@ ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
 
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
                    1) // __kmpc_atomic_float10_add_fp
 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
@@ -1806,7 +1813,6 @@ ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
                    1) // __kmpc_atomic_float10_div_fp
 
-#if KMP_ARCH_X86 || KMP_ARCH_X86_64
 // Reverse operations
 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
@@ -3594,7 +3600,7 @@ void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
       __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
   }
 }
-
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
                       void (*f)(void *, void *, void *)) {
   KMP_DEBUG_ASSERT(__kmp_init_serial);
@@ -3615,6 +3621,7 @@ void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
 #endif /* KMP_GOMP_COMPAT */
     __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
 }
+#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
 
 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
                       void (*f)(void *, void *, void *)) {
@@ -3636,7 +3643,7 @@ void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
 #endif /* KMP_GOMP_COMPAT */
     __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
 }
-
+#if KMP_ARCH_X86 || KMP_ARCH_X86_64
 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
                       void (*f)(void *, void *, void *)) {
   KMP_DEBUG_ASSERT(__kmp_init_serial);
@@ -3657,7 +3664,7 @@ void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
 #endif /* KMP_GOMP_COMPAT */
     __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
 }
-
+#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
                       void (*f)(void *, void *, void *)) {
   KMP_DEBUG_ASSERT(__kmp_init_serial);

From b8467952404c3598c9c901332607eb1886e1721c Mon Sep 17 00:00:00 2001
From: Dave <croepha@gmail.com>
Date: Sat, 22 Jan 2022 17:30:27 -0800
Subject: [PATCH 265/946] [docs] [clang] Small documentation change for
 compilation databases

We have an page dedicated to compliation databases including
various ways to generate them, but we don't mention that clang
has a built in method to do this.  This addresses that.

Reviewed By: joerg

Differential Revision: https://reviews.llvm.org/D116882
---
 clang/docs/JSONCompilationDatabase.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/clang/docs/JSONCompilationDatabase.rst b/clang/docs/JSONCompilationDatabase.rst
index 3595cf452f4ca..6fd17fe440add 100644
--- a/clang/docs/JSONCompilationDatabase.rst
+++ b/clang/docs/JSONCompilationDatabase.rst
@@ -29,6 +29,10 @@ system is not necessarily the best solution:
 Supported Systems
 =================
 
+Clang has the ablity to generate compilation database fragments via
+the :option:`-MJ argument <clang -MJ\<arg>>`. You can concatenate those
+fragments together between ``[`` and ``]`` to create a compilation database.
+
 Currently `CMake <https://cmake.org>`_ (since 2.8.5) supports generation
 of compilation databases for Unix Makefile builds (Ninja builds in the
 works) with the option ``CMAKE_EXPORT_COMPILE_COMMANDS``.

From 37d1d02200b9472082304c191f396f0489d00e05 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang@intel.com>
Date: Sun, 23 Jan 2022 09:14:58 +0800
Subject: [PATCH 266/946] [X86][MS] Change the alignment of f80 to 16 bytes on
 Windows 32bits to match with ICC

MSVC currently doesn't support 80 bits long double. ICC supports it when
the option `/Qlong-double` is specified. Changing the alignment of f80
to 16 bytes so that we can be compatible with ICC's option.

Reviewed By: rnk, craig.topper

Differential Revision: https://reviews.llvm.org/D115942
---
 clang/lib/Basic/Targets/X86.h                 |  11 +-
 clang/test/CodeGen/target-data.c              |   2 +-
 llvm/lib/IR/AutoUpgrade.cpp                   |  31 ++-
 llvm/lib/Target/X86/X86TargetMachine.cpp      |   2 +-
 llvm/test/Bitcode/upgrade-datalayout3.ll      |   2 +-
 llvm/test/Bitcode/upgrade-datalayout4.ll      |   8 +
 .../test/CodeGen/X86/long-double-abi-align.ll |   4 +-
 llvm/test/CodeGen/X86/scalar-fp-to-i32.ll     | 219 ++++++++++++------
 llvm/test/CodeGen/X86/scalar-fp-to-i64.ll     |  32 +--
 .../Bitcode/DataLayoutUpgradeTest.cpp         |   2 +-
 10 files changed, 209 insertions(+), 104 deletions(-)
 create mode 100644 llvm/test/Bitcode/upgrade-datalayout4.ll

diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index c952b8c9a3369..d1b66432e38b4 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -533,11 +533,12 @@ class LLVM_LIBRARY_VISIBILITY WindowsX86_32TargetInfo
     DoubleAlign = LongLongAlign = 64;
     bool IsWinCOFF =
         getTriple().isOSWindows() && getTriple().isOSBinFormatCOFF();
-    resetDataLayout(IsWinCOFF ? "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:"
-                                "64-i64:64-f80:32-n8:16:32-a:0:32-S32"
-                              : "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:"
-                                "64-i64:64-f80:32-n8:16:32-a:0:32-S32",
-                    IsWinCOFF ? "_" : "");
+    bool IsMSVC = getTriple().isWindowsMSVCEnvironment();
+    std::string Layout = IsWinCOFF ? "e-m:x" : "e-m:e";
+    Layout += "-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-";
+    Layout += IsMSVC ? "f80:128" : "f80:32";
+    Layout += "-n8:16:32-a:0:32-S32";
+    resetDataLayout(Layout, IsWinCOFF ? "_" : "");
   }
 };
 
diff --git a/clang/test/CodeGen/target-data.c b/clang/test/CodeGen/target-data.c
index d702f845112bd..e4150837279ce 100644
--- a/clang/test/CodeGen/target-data.c
+++ b/clang/test/CodeGen/target-data.c
@@ -8,7 +8,7 @@
 
 // RUN: %clang_cc1 -triple i686-unknown-win32 -emit-llvm -o - %s | \
 // RUN:     FileCheck --check-prefix=I686-WIN32 %s
-// I686-WIN32: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32"
+// I686-WIN32: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-a:0:32-S32"
 
 // RUN: %clang_cc1 -triple i686-unknown-cygwin -emit-llvm -o - %s | \
 // RUN:     FileCheck --check-prefix=I686-CYGWIN %s
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index 5aa3b3008581f..b820eabf173d7 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -4576,18 +4576,31 @@ std::string llvm::UpgradeDataLayoutString(StringRef DL, StringRef TT) {
     return DL.empty() ? std::string("G1") : (DL + "-G1").str();
   }
 
+  std::string Res = DL.str();
+  if (!T.isX86())
+    return Res;
+
+  // If the datalayout matches the expected format, add pointer size address
+  // spaces to the datalayout.
   std::string AddrSpaces = "-p270:32:32-p271:32:32-p272:64:64";
-  // If X86, and the datalayout matches the expected format, add pointer size
-  // address spaces to the datalayout.
-  if (!T.isX86() || DL.contains(AddrSpaces))
-    return std::string(DL);
+  if (!DL.contains(AddrSpaces)) {
+    SmallVector<StringRef, 4> Groups;
+    Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
+    if (R.match(DL, &Groups))
+      Res = (Groups[1] + AddrSpaces + Groups[3]).str();
+  }
 
-  SmallVector<StringRef, 4> Groups;
-  Regex R("(e-m:[a-z](-p:32:32)?)(-[if]64:.*$)");
-  if (!R.match(DL, &Groups))
-    return std::string(DL);
+  // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
+  // Raising the alignment is safe because Clang did not produce f80 values in
+  // the MSVC environment before this upgrade was added.
+  if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
+    StringRef Ref = Res;
+    auto I = Ref.find("-f80:32-");
+    if (I != StringRef::npos)
+      Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
+  }
 
-  return (Groups[1] + AddrSpaces + Groups[3]).str();
+  return Res;
 }
 
 void llvm::UpgradeAttributes(AttrBuilder &B) {
diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 78bc5519c23ff..e3d0128dd73da 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -127,7 +127,7 @@ static std::string computeDataLayout(const Triple &TT) {
   // Some ABIs align long double to 128 bits, others to 32.
   if (TT.isOSNaCl() || TT.isOSIAMCU())
     ; // No f80
-  else if (TT.isArch64Bit() || TT.isOSDarwin())
+  else if (TT.isArch64Bit() || TT.isOSDarwin() || TT.isWindowsMSVCEnvironment())
     Ret += "-f80:128";
   else
     Ret += "-f80:32";
diff --git a/llvm/test/Bitcode/upgrade-datalayout3.ll b/llvm/test/Bitcode/upgrade-datalayout3.ll
index 526ba6069dc6f..6d95f2407acf4 100644
--- a/llvm/test/Bitcode/upgrade-datalayout3.ll
+++ b/llvm/test/Bitcode/upgrade-datalayout3.ll
@@ -5,4 +5,4 @@
 target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
 target triple = "i686-pc-windows-msvc"
 
-; CHECK: target datalayout = "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-S32"
+; CHECK: target datalayout = "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-S32"
diff --git a/llvm/test/Bitcode/upgrade-datalayout4.ll b/llvm/test/Bitcode/upgrade-datalayout4.ll
new file mode 100644
index 0000000000000..ee0e5fe3bf6fa
--- /dev/null
+++ b/llvm/test/Bitcode/upgrade-datalayout4.ll
@@ -0,0 +1,8 @@
+; Test to make sure datalayout is automatically upgraded.
+;
+; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+; CHECK: target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32-a:0:32-S32"
diff --git a/llvm/test/CodeGen/X86/long-double-abi-align.ll b/llvm/test/CodeGen/X86/long-double-abi-align.ll
index ca45886db5705..6a409f1b1dce1 100644
--- a/llvm/test/CodeGen/X86/long-double-abi-align.ll
+++ b/llvm/test/CodeGen/X86/long-double-abi-align.ll
@@ -11,7 +11,7 @@ define void @foo(i32 %0, x86_fp80 %1, i32 %2) nounwind {
 ; MSVC-NEXT:    movl %esp, %ebp
 ; MSVC-NEXT:    andl $-16, %esp
 ; MSVC-NEXT:    subl $32, %esp
-; MSVC-NEXT:    fldt 12(%ebp)
+; MSVC-NEXT:    fldt 24(%ebp)
 ; MSVC-NEXT:    fstpt (%esp)
 ; MSVC-NEXT:    leal 8(%ebp), %eax
 ; MSVC-NEXT:    pushl %eax
@@ -21,7 +21,7 @@ define void @foo(i32 %0, x86_fp80 %1, i32 %2) nounwind {
 ; MSVC-NEXT:    pushl %eax
 ; MSVC-NEXT:    calll _escape
 ; MSVC-NEXT:    addl $4, %esp
-; MSVC-NEXT:    leal 24(%ebp), %eax
+; MSVC-NEXT:    leal 40(%ebp), %eax
 ; MSVC-NEXT:    pushl %eax
 ; MSVC-NEXT:    calll _escape
 ; MSVC-NEXT:    addl $4, %esp
diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
index 469c05d44813f..b22533a8c8ee2 100644
--- a/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
+++ b/llvm/test/CodeGen/X86/scalar-fp-to-i32.ll
@@ -344,8 +344,8 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind {
 ; X86-AVX512-WIN:       # %bb.0:
 ; X86-AVX512-WIN-NEXT:    pushl %ebp
 ; X86-AVX512-WIN-NEXT:    movl %esp, %ebp
-; X86-AVX512-WIN-NEXT:    andl $-8, %esp
-; X86-AVX512-WIN-NEXT:    subl $8, %esp
+; X86-AVX512-WIN-NEXT:    andl $-16, %esp
+; X86-AVX512-WIN-NEXT:    subl $16, %esp
 ; X86-AVX512-WIN-NEXT:    fldt 8(%ebp)
 ; X86-AVX512-WIN-NEXT:    fisttpll (%esp)
 ; X86-AVX512-WIN-NEXT:    movl (%esp), %eax
@@ -382,8 +382,8 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind {
 ; X86-SSE3-WIN:       # %bb.0:
 ; X86-SSE3-WIN-NEXT:    pushl %ebp
 ; X86-SSE3-WIN-NEXT:    movl %esp, %ebp
-; X86-SSE3-WIN-NEXT:    andl $-8, %esp
-; X86-SSE3-WIN-NEXT:    subl $8, %esp
+; X86-SSE3-WIN-NEXT:    andl $-16, %esp
+; X86-SSE3-WIN-NEXT:    subl $16, %esp
 ; X86-SSE3-WIN-NEXT:    fldt 8(%ebp)
 ; X86-SSE3-WIN-NEXT:    fisttpll (%esp)
 ; X86-SSE3-WIN-NEXT:    movl (%esp), %eax
@@ -420,8 +420,8 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind {
 ; X86-SSE2-WIN:       # %bb.0:
 ; X86-SSE2-WIN-NEXT:    pushl %ebp
 ; X86-SSE2-WIN-NEXT:    movl %esp, %ebp
-; X86-SSE2-WIN-NEXT:    andl $-8, %esp
-; X86-SSE2-WIN-NEXT:    subl $16, %esp
+; X86-SSE2-WIN-NEXT:    andl $-16, %esp
+; X86-SSE2-WIN-NEXT:    subl $32, %esp
 ; X86-SSE2-WIN-NEXT:    fldt 8(%ebp)
 ; X86-SSE2-WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
 ; X86-SSE2-WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
@@ -482,8 +482,8 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind {
 ; X86-SSE1-WIN:       # %bb.0:
 ; X86-SSE1-WIN-NEXT:    pushl %ebp
 ; X86-SSE1-WIN-NEXT:    movl %esp, %ebp
-; X86-SSE1-WIN-NEXT:    andl $-8, %esp
-; X86-SSE1-WIN-NEXT:    subl $16, %esp
+; X86-SSE1-WIN-NEXT:    andl $-16, %esp
+; X86-SSE1-WIN-NEXT:    subl $32, %esp
 ; X86-SSE1-WIN-NEXT:    fldt 8(%ebp)
 ; X86-SSE1-WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
 ; X86-SSE1-WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
@@ -516,8 +516,8 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind {
 ; X87-WIN:       # %bb.0:
 ; X87-WIN-NEXT:    pushl %ebp
 ; X87-WIN-NEXT:    movl %esp, %ebp
-; X87-WIN-NEXT:    andl $-8, %esp
-; X87-WIN-NEXT:    subl $16, %esp
+; X87-WIN-NEXT:    andl $-16, %esp
+; X87-WIN-NEXT:    subl $32, %esp
 ; X87-WIN-NEXT:    fldt 8(%ebp)
 ; X87-WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
 ; X87-WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
@@ -550,14 +550,27 @@ define i32 @x_to_u32(x86_fp80 %a) nounwind {
 }
 
 define i32 @x_to_s32(x86_fp80 %a) nounwind {
-; X86-AVX512-LABEL: x_to_s32:
-; X86-AVX512:       # %bb.0:
-; X86-AVX512-NEXT:    pushl %eax
-; X86-AVX512-NEXT:    fldt {{[0-9]+}}(%esp)
-; X86-AVX512-NEXT:    fisttpl (%esp)
-; X86-AVX512-NEXT:    movl (%esp), %eax
-; X86-AVX512-NEXT:    popl %ecx
-; X86-AVX512-NEXT:    retl
+; X86-AVX512-WIN-LABEL: x_to_s32:
+; X86-AVX512-WIN:       # %bb.0:
+; X86-AVX512-WIN-NEXT:    pushl %ebp
+; X86-AVX512-WIN-NEXT:    movl %esp, %ebp
+; X86-AVX512-WIN-NEXT:    andl $-16, %esp
+; X86-AVX512-WIN-NEXT:    subl $16, %esp
+; X86-AVX512-WIN-NEXT:    fldt 8(%ebp)
+; X86-AVX512-WIN-NEXT:    fisttpl {{[0-9]+}}(%esp)
+; X86-AVX512-WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-WIN-NEXT:    movl %ebp, %esp
+; X86-AVX512-WIN-NEXT:    popl %ebp
+; X86-AVX512-WIN-NEXT:    retl
+;
+; X86-AVX512-LIN-LABEL: x_to_s32:
+; X86-AVX512-LIN:       # %bb.0:
+; X86-AVX512-LIN-NEXT:    pushl %eax
+; X86-AVX512-LIN-NEXT:    fldt {{[0-9]+}}(%esp)
+; X86-AVX512-LIN-NEXT:    fisttpl (%esp)
+; X86-AVX512-LIN-NEXT:    movl (%esp), %eax
+; X86-AVX512-LIN-NEXT:    popl %ecx
+; X86-AVX512-LIN-NEXT:    retl
 ;
 ; X64-AVX512-WIN-LABEL: x_to_s32:
 ; X64-AVX512-WIN:       # %bb.0:
@@ -575,14 +588,27 @@ define i32 @x_to_s32(x86_fp80 %a) nounwind {
 ; X64-AVX512-LIN-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
 ; X64-AVX512-LIN-NEXT:    retq
 ;
-; X86-SSE3-LABEL: x_to_s32:
-; X86-SSE3:       # %bb.0:
-; X86-SSE3-NEXT:    pushl %eax
-; X86-SSE3-NEXT:    fldt {{[0-9]+}}(%esp)
-; X86-SSE3-NEXT:    fisttpl (%esp)
-; X86-SSE3-NEXT:    movl (%esp), %eax
-; X86-SSE3-NEXT:    popl %ecx
-; X86-SSE3-NEXT:    retl
+; X86-SSE3-WIN-LABEL: x_to_s32:
+; X86-SSE3-WIN:       # %bb.0:
+; X86-SSE3-WIN-NEXT:    pushl %ebp
+; X86-SSE3-WIN-NEXT:    movl %esp, %ebp
+; X86-SSE3-WIN-NEXT:    andl $-16, %esp
+; X86-SSE3-WIN-NEXT:    subl $16, %esp
+; X86-SSE3-WIN-NEXT:    fldt 8(%ebp)
+; X86-SSE3-WIN-NEXT:    fisttpl {{[0-9]+}}(%esp)
+; X86-SSE3-WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE3-WIN-NEXT:    movl %ebp, %esp
+; X86-SSE3-WIN-NEXT:    popl %ebp
+; X86-SSE3-WIN-NEXT:    retl
+;
+; X86-SSE3-LIN-LABEL: x_to_s32:
+; X86-SSE3-LIN:       # %bb.0:
+; X86-SSE3-LIN-NEXT:    pushl %eax
+; X86-SSE3-LIN-NEXT:    fldt {{[0-9]+}}(%esp)
+; X86-SSE3-LIN-NEXT:    fisttpl (%esp)
+; X86-SSE3-LIN-NEXT:    movl (%esp), %eax
+; X86-SSE3-LIN-NEXT:    popl %ecx
+; X86-SSE3-LIN-NEXT:    retl
 ;
 ; X64-SSE3-WIN-LABEL: x_to_s32:
 ; X64-SSE3-WIN:       # %bb.0:
@@ -600,20 +626,39 @@ define i32 @x_to_s32(x86_fp80 %a) nounwind {
 ; X64-SSE3-LIN-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
 ; X64-SSE3-LIN-NEXT:    retq
 ;
-; X86-SSE2-LABEL: x_to_s32:
-; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    subl $8, %esp
-; X86-SSE2-NEXT:    fldt {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fnstcw (%esp)
-; X86-SSE2-NEXT:    movzwl (%esp), %eax
-; X86-SSE2-NEXT:    orl $3072, %eax # imm = 0xC00
-; X86-SSE2-NEXT:    movw %ax, {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fistpl {{[0-9]+}}(%esp)
-; X86-SSE2-NEXT:    fldcw (%esp)
-; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE2-NEXT:    addl $8, %esp
-; X86-SSE2-NEXT:    retl
+; X86-SSE2-WIN-LABEL: x_to_s32:
+; X86-SSE2-WIN:       # %bb.0:
+; X86-SSE2-WIN-NEXT:    pushl %ebp
+; X86-SSE2-WIN-NEXT:    movl %esp, %ebp
+; X86-SSE2-WIN-NEXT:    andl $-16, %esp
+; X86-SSE2-WIN-NEXT:    subl $16, %esp
+; X86-SSE2-WIN-NEXT:    fldt 8(%ebp)
+; X86-SSE2-WIN-NEXT:    fnstcw (%esp)
+; X86-SSE2-WIN-NEXT:    movzwl (%esp), %eax
+; X86-SSE2-WIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; X86-SSE2-WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X86-SSE2-WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; X86-SSE2-WIN-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-SSE2-WIN-NEXT:    fldcw (%esp)
+; X86-SSE2-WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-WIN-NEXT:    movl %ebp, %esp
+; X86-SSE2-WIN-NEXT:    popl %ebp
+; X86-SSE2-WIN-NEXT:    retl
+;
+; X86-SSE2-LIN-LABEL: x_to_s32:
+; X86-SSE2-LIN:       # %bb.0:
+; X86-SSE2-LIN-NEXT:    subl $8, %esp
+; X86-SSE2-LIN-NEXT:    fldt {{[0-9]+}}(%esp)
+; X86-SSE2-LIN-NEXT:    fnstcw (%esp)
+; X86-SSE2-LIN-NEXT:    movzwl (%esp), %eax
+; X86-SSE2-LIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; X86-SSE2-LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X86-SSE2-LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; X86-SSE2-LIN-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-SSE2-LIN-NEXT:    fldcw (%esp)
+; X86-SSE2-LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-LIN-NEXT:    addl $8, %esp
+; X86-SSE2-LIN-NEXT:    retl
 ;
 ; X64-SSE2-WIN-LABEL: x_to_s32:
 ; X64-SSE2-WIN:       # %bb.0:
@@ -643,35 +688,73 @@ define i32 @x_to_s32(x86_fp80 %a) nounwind {
 ; X64-SSE2-LIN-NEXT:    movl -{{[0-9]+}}(%rsp), %eax
 ; X64-SSE2-LIN-NEXT:    retq
 ;
-; X86-SSE1-LABEL: x_to_s32:
-; X86-SSE1:       # %bb.0:
-; X86-SSE1-NEXT:    subl $8, %esp
-; X86-SSE1-NEXT:    fldt {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    fnstcw (%esp)
-; X86-SSE1-NEXT:    movzwl (%esp), %eax
-; X86-SSE1-NEXT:    orl $3072, %eax # imm = 0xC00
-; X86-SSE1-NEXT:    movw %ax, {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    fistpl {{[0-9]+}}(%esp)
-; X86-SSE1-NEXT:    fldcw (%esp)
-; X86-SSE1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-SSE1-NEXT:    addl $8, %esp
-; X86-SSE1-NEXT:    retl
+; X86-SSE1-WIN-LABEL: x_to_s32:
+; X86-SSE1-WIN:       # %bb.0:
+; X86-SSE1-WIN-NEXT:    pushl %ebp
+; X86-SSE1-WIN-NEXT:    movl %esp, %ebp
+; X86-SSE1-WIN-NEXT:    andl $-16, %esp
+; X86-SSE1-WIN-NEXT:    subl $16, %esp
+; X86-SSE1-WIN-NEXT:    fldt 8(%ebp)
+; X86-SSE1-WIN-NEXT:    fnstcw (%esp)
+; X86-SSE1-WIN-NEXT:    movzwl (%esp), %eax
+; X86-SSE1-WIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; X86-SSE1-WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X86-SSE1-WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; X86-SSE1-WIN-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-SSE1-WIN-NEXT:    fldcw (%esp)
+; X86-SSE1-WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-WIN-NEXT:    movl %ebp, %esp
+; X86-SSE1-WIN-NEXT:    popl %ebp
+; X86-SSE1-WIN-NEXT:    retl
 ;
-; X87-LABEL: x_to_s32:
-; X87:       # %bb.0:
-; X87-NEXT:    subl $8, %esp
-; X87-NEXT:    fldt {{[0-9]+}}(%esp)
-; X87-NEXT:    fnstcw (%esp)
-; X87-NEXT:    movzwl (%esp), %eax
-; X87-NEXT:    orl $3072, %eax # imm = 0xC00
-; X87-NEXT:    movw %ax, {{[0-9]+}}(%esp)
-; X87-NEXT:    fldcw {{[0-9]+}}(%esp)
-; X87-NEXT:    fistpl {{[0-9]+}}(%esp)
-; X87-NEXT:    fldcw (%esp)
-; X87-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X87-NEXT:    addl $8, %esp
-; X87-NEXT:    retl
+; X86-SSE1-LIN-LABEL: x_to_s32:
+; X86-SSE1-LIN:       # %bb.0:
+; X86-SSE1-LIN-NEXT:    subl $8, %esp
+; X86-SSE1-LIN-NEXT:    fldt {{[0-9]+}}(%esp)
+; X86-SSE1-LIN-NEXT:    fnstcw (%esp)
+; X86-SSE1-LIN-NEXT:    movzwl (%esp), %eax
+; X86-SSE1-LIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; X86-SSE1-LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X86-SSE1-LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; X86-SSE1-LIN-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X86-SSE1-LIN-NEXT:    fldcw (%esp)
+; X86-SSE1-LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SSE1-LIN-NEXT:    addl $8, %esp
+; X86-SSE1-LIN-NEXT:    retl
+;
+; X87-WIN-LABEL: x_to_s32:
+; X87-WIN:       # %bb.0:
+; X87-WIN-NEXT:    pushl %ebp
+; X87-WIN-NEXT:    movl %esp, %ebp
+; X87-WIN-NEXT:    andl $-16, %esp
+; X87-WIN-NEXT:    subl $16, %esp
+; X87-WIN-NEXT:    fldt 8(%ebp)
+; X87-WIN-NEXT:    fnstcw (%esp)
+; X87-WIN-NEXT:    movzwl (%esp), %eax
+; X87-WIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; X87-WIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-WIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; X87-WIN-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X87-WIN-NEXT:    fldcw (%esp)
+; X87-WIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X87-WIN-NEXT:    movl %ebp, %esp
+; X87-WIN-NEXT:    popl %ebp
+; X87-WIN-NEXT:    retl
+;
+; X87-LIN-LABEL: x_to_s32:
+; X87-LIN:       # %bb.0:
+; X87-LIN-NEXT:    subl $8, %esp
+; X87-LIN-NEXT:    fldt {{[0-9]+}}(%esp)
+; X87-LIN-NEXT:    fnstcw (%esp)
+; X87-LIN-NEXT:    movzwl (%esp), %eax
+; X87-LIN-NEXT:    orl $3072, %eax # imm = 0xC00
+; X87-LIN-NEXT:    movw %ax, {{[0-9]+}}(%esp)
+; X87-LIN-NEXT:    fldcw {{[0-9]+}}(%esp)
+; X87-LIN-NEXT:    fistpl {{[0-9]+}}(%esp)
+; X87-LIN-NEXT:    fldcw (%esp)
+; X87-LIN-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X87-LIN-NEXT:    addl $8, %esp
+; X87-LIN-NEXT:    retl
   %r = fptosi x86_fp80 %a to i32
   ret i32 %r
 }
diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
index 0ce9c87057467..718b8b558c9b2 100644
--- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
+++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll
@@ -909,8 +909,8 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; X86-AVX512-WIN:       # %bb.0:
 ; X86-AVX512-WIN-NEXT:    pushl %ebp
 ; X86-AVX512-WIN-NEXT:    movl %esp, %ebp
-; X86-AVX512-WIN-NEXT:    andl $-8, %esp
-; X86-AVX512-WIN-NEXT:    subl $8, %esp
+; X86-AVX512-WIN-NEXT:    andl $-16, %esp
+; X86-AVX512-WIN-NEXT:    subl $16, %esp
 ; X86-AVX512-WIN-NEXT:    fldt 8(%ebp)
 ; X86-AVX512-WIN-NEXT:    flds __real@5f000000
 ; X86-AVX512-WIN-NEXT:    xorl %edx, %edx
@@ -985,8 +985,8 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; X86-SSE3-WIN:       # %bb.0:
 ; X86-SSE3-WIN-NEXT:    pushl %ebp
 ; X86-SSE3-WIN-NEXT:    movl %esp, %ebp
-; X86-SSE3-WIN-NEXT:    andl $-8, %esp
-; X86-SSE3-WIN-NEXT:    subl $8, %esp
+; X86-SSE3-WIN-NEXT:    andl $-16, %esp
+; X86-SSE3-WIN-NEXT:    subl $16, %esp
 ; X86-SSE3-WIN-NEXT:    fldt 8(%ebp)
 ; X86-SSE3-WIN-NEXT:    flds __real@5f000000
 ; X86-SSE3-WIN-NEXT:    xorl %edx, %edx
@@ -1061,8 +1061,8 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; X86-SSE2-WIN:       # %bb.0:
 ; X86-SSE2-WIN-NEXT:    pushl %ebp
 ; X86-SSE2-WIN-NEXT:    movl %esp, %ebp
-; X86-SSE2-WIN-NEXT:    andl $-8, %esp
-; X86-SSE2-WIN-NEXT:    subl $16, %esp
+; X86-SSE2-WIN-NEXT:    andl $-16, %esp
+; X86-SSE2-WIN-NEXT:    subl $32, %esp
 ; X86-SSE2-WIN-NEXT:    fldt 8(%ebp)
 ; X86-SSE2-WIN-NEXT:    flds __real@5f000000
 ; X86-SSE2-WIN-NEXT:    xorl %edx, %edx
@@ -1161,8 +1161,8 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; X87-WIN:       # %bb.0:
 ; X87-WIN-NEXT:    pushl %ebp
 ; X87-WIN-NEXT:    movl %esp, %ebp
-; X87-WIN-NEXT:    andl $-8, %esp
-; X87-WIN-NEXT:    subl $16, %esp
+; X87-WIN-NEXT:    andl $-16, %esp
+; X87-WIN-NEXT:    subl $32, %esp
 ; X87-WIN-NEXT:    fldt 8(%ebp)
 ; X87-WIN-NEXT:    flds __real@5f000000
 ; X87-WIN-NEXT:    fucom %st(1)
@@ -1235,8 +1235,8 @@ define i64 @x_to_s64(x86_fp80 %a) nounwind {
 ; X86-AVX512-WIN:       # %bb.0:
 ; X86-AVX512-WIN-NEXT:    pushl %ebp
 ; X86-AVX512-WIN-NEXT:    movl %esp, %ebp
-; X86-AVX512-WIN-NEXT:    andl $-8, %esp
-; X86-AVX512-WIN-NEXT:    subl $8, %esp
+; X86-AVX512-WIN-NEXT:    andl $-16, %esp
+; X86-AVX512-WIN-NEXT:    subl $16, %esp
 ; X86-AVX512-WIN-NEXT:    fldt 8(%ebp)
 ; X86-AVX512-WIN-NEXT:    fisttpll (%esp)
 ; X86-AVX512-WIN-NEXT:    movl (%esp), %eax
@@ -1275,8 +1275,8 @@ define i64 @x_to_s64(x86_fp80 %a) nounwind {
 ; X86-SSE3-WIN:       # %bb.0:
 ; X86-SSE3-WIN-NEXT:    pushl %ebp
 ; X86-SSE3-WIN-NEXT:    movl %esp, %ebp
-; X86-SSE3-WIN-NEXT:    andl $-8, %esp
-; X86-SSE3-WIN-NEXT:    subl $8, %esp
+; X86-SSE3-WIN-NEXT:    andl $-16, %esp
+; X86-SSE3-WIN-NEXT:    subl $16, %esp
 ; X86-SSE3-WIN-NEXT:    fldt 8(%ebp)
 ; X86-SSE3-WIN-NEXT:    fisttpll (%esp)
 ; X86-SSE3-WIN-NEXT:    movl (%esp), %eax
@@ -1315,8 +1315,8 @@ define i64 @x_to_s64(x86_fp80 %a) nounwind {
 ; X86-SSE2-WIN:       # %bb.0:
 ; X86-SSE2-WIN-NEXT:    pushl %ebp
 ; X86-SSE2-WIN-NEXT:    movl %esp, %ebp
-; X86-SSE2-WIN-NEXT:    andl $-8, %esp
-; X86-SSE2-WIN-NEXT:    subl $16, %esp
+; X86-SSE2-WIN-NEXT:    andl $-16, %esp
+; X86-SSE2-WIN-NEXT:    subl $32, %esp
 ; X86-SSE2-WIN-NEXT:    fldt 8(%ebp)
 ; X86-SSE2-WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
 ; X86-SSE2-WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
@@ -1379,8 +1379,8 @@ define i64 @x_to_s64(x86_fp80 %a) nounwind {
 ; X87-WIN:       # %bb.0:
 ; X87-WIN-NEXT:    pushl %ebp
 ; X87-WIN-NEXT:    movl %esp, %ebp
-; X87-WIN-NEXT:    andl $-8, %esp
-; X87-WIN-NEXT:    subl $16, %esp
+; X87-WIN-NEXT:    andl $-16, %esp
+; X87-WIN-NEXT:    subl $32, %esp
 ; X87-WIN-NEXT:    fldt 8(%ebp)
 ; X87-WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
 ; X87-WIN-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
diff --git a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
index ec900471a833e..0c835744e4fa7 100644
--- a/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
+++ b/llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp
@@ -24,7 +24,7 @@ TEST(DataLayoutUpgradeTest, ValidDataLayoutUpgrade) {
   EXPECT_EQ(DL1, "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64"
                  "-f80:128-n8:16:32:64-S128");
   EXPECT_EQ(DL2, "e-m:w-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64"
-                 "-f80:32-n8:16:32-S32");
+                 "-f80:128-n8:16:32-S32");
   EXPECT_EQ(DL3, "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128"
                  "-n32:64-S128");
 

From 2513b79030636e62a785019bb9477920c8140d85 Mon Sep 17 00:00:00 2001
From: Joe Loser <joeloser93@gmail.com>
Date: Wed, 19 Jan 2022 15:16:15 -0500
Subject: [PATCH 267/946] [libc++] Implement LWG3549: view_interface need not
 inherit from view_base

Implement LWG3549 by making `view_interface` not inherit from `view_base`. Types
are still views if they have a public and unambiguous derivation from
`view_interface`, so adjust the `enable_view` machinery as such to account for
that.

Differential Revision: https://reviews.llvm.org/D117714
---
 libcxx/docs/Status/Cxx2bIssues.csv                   |  2 +-
 libcxx/include/__ranges/enable_view.h                | 12 +++++++++++-
 libcxx/include/__ranges/view_interface.h             |  3 +--
 .../view.interface/view.interface.pass.cpp           |  9 +++++++++
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index 726668f3bdb2a..a91e3b52a1705 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -91,7 +91,7 @@
 `3544 <https://wg21.link/LWG3544>`__,"``format-arg-store::args`` is unintentionally not exposition-only","June 2021","","","|format|"
 `3546 <https://wg21.link/LWG3546>`__,"``common_iterator``'s postfix-proxy is not quite right","June 2021","","","|ranges|"
 `3548 <https://wg21.link/LWG3548>`__,"``shared_ptr`` construction from ``unique_ptr`` should move (not copy) the deleter","June 2021","",""
-`3549 <https://wg21.link/LWG3549>`__,"``view_interface`` is overspecified to derive from ``view_base``","June 2021","","","|ranges|"
+`3549 <https://wg21.link/LWG3549>`__,"``view_interface`` is overspecified to derive from ``view_base``","June 2021","|Complete|","14.0","|ranges|"
 `3551 <https://wg21.link/LWG3551>`__,"``borrowed_{iterator,subrange}_t`` are overspecified","June 2021","","","|ranges|"
 `3552 <https://wg21.link/LWG3552>`__,"Parallel specialized memory algorithms should require forward iterators","June 2021","",""
 `3553 <https://wg21.link/LWG3553>`__,"Useless constraint in ``split_view::outer-iterator::value_type::begin()``","June 2021","","","|ranges|"
diff --git a/libcxx/include/__ranges/enable_view.h b/libcxx/include/__ranges/enable_view.h
index a09de11da81ed..e1daec046fc0c 100644
--- a/libcxx/include/__ranges/enable_view.h
+++ b/libcxx/include/__ranges/enable_view.h
@@ -12,6 +12,7 @@
 
 #include <__config>
 #include <concepts>
+#include <type_traits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
 #pragma GCC system_header
@@ -25,8 +26,17 @@ namespace ranges {
 
 struct view_base { };
 
+template<class _Derived>
+  requires is_class_v<_Derived> && same_as<_Derived, remove_cv_t<_Derived>>
+class view_interface;
+
+template<class _Op, class _Yp>
+  requires is_convertible_v<_Op*, view_interface<_Yp>*>
+void __is_derived_from_view_interface(const _Op*, const view_interface<_Yp>*);
+
 template <class _Tp>
-inline constexpr bool enable_view = derived_from<_Tp, view_base>;
+inline constexpr bool enable_view = derived_from<_Tp, view_base> ||
+  requires { ranges::__is_derived_from_view_interface((_Tp*)nullptr, (_Tp*)nullptr); };
 
 } // end namespace ranges
 
diff --git a/libcxx/include/__ranges/view_interface.h b/libcxx/include/__ranges/view_interface.h
index 8a1f5d8c9251c..c5215cbcb8e39 100644
--- a/libcxx/include/__ranges/view_interface.h
+++ b/libcxx/include/__ranges/view_interface.h
@@ -18,7 +18,6 @@
 #include <__ranges/access.h>
 #include <__ranges/concepts.h>
 #include <__ranges/empty.h>
-#include <__ranges/enable_view.h>
 #include <concepts>
 #include <type_traits>
 
@@ -40,7 +39,7 @@ void __implicitly_convert_to(type_identity_t<_Tp>) noexcept;
 
 template<class _Derived>
   requires is_class_v<_Derived> && same_as<_Derived, remove_cv_t<_Derived>>
-class view_interface : public view_base {
+class view_interface {
   _LIBCPP_HIDE_FROM_ABI
   constexpr _Derived& __derived() noexcept {
     static_assert(sizeof(_Derived) && derived_from<_Derived, view_interface> && view<_Derived>);
diff --git a/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp b/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp
index ebc443ebe08d3..b3f9b972a0cb2 100644
--- a/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp
+++ b/libcxx/test/std/ranges/range.utility/view.interface/view.interface.pass.cpp
@@ -79,6 +79,11 @@ struct MoveOnlyForwardRange : std::ranges::view_interface<MoveOnlyForwardRange>
 };
 static_assert(std::ranges::view<MoveOnlyForwardRange>);
 
+struct MI : std::ranges::view_interface<InputRange>,
+            std::ranges::view_interface<MoveOnlyForwardRange> {
+};
+static_assert(!std::ranges::view<MI>);
+
 struct EmptyIsTrue : std::ranges::view_interface<EmptyIsTrue> {
   int buff[8] = {0, 1, 2, 3, 4, 5, 6, 7};
   constexpr ForwardIter begin() const { return ForwardIter(const_cast<int*>(buff)); }
@@ -300,6 +305,10 @@ constexpr bool testFrontBack() {
   return true;
 }
 
+struct V1 : std::ranges::view_interface<V1> { };
+struct V2 : std::ranges::view_interface<V2> { V1 base_; };
+static_assert(sizeof(V2) == sizeof(V1));
+
 int main(int, char**) {
   testEmpty();
   static_assert(testEmpty());

From 3cf15af2daa9177a5604d122a9c5cbcf86f7fe33 Mon Sep 17 00:00:00 2001
From: eopXD <eop.chen@sifive.com>
Date: Fri, 21 Jan 2022 00:39:23 -0800
Subject: [PATCH 268/946] [RISCV] Remove experimental prefix from rvv-related
 extensions.

Extensions affected: +v, +zve*, +zvl*

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117860
---
 .../RISCV/riscv-attr-builtin-alias-err.c      |   2 +-
 .../CodeGen/RISCV/riscv-attr-builtin-alias.c  |   2 +-
 .../test/CodeGen/RISCV/riscv-inline-asm-rvv.c |   4 +-
 clang/test/CodeGen/RISCV/riscv-v-debuginfo.c  |   2 +-
 clang/test/CodeGen/RISCV/riscv-v-lifetime.cpp |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vaadd.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vadc.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vadd.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vand.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vasub.c   |   2 +-
 .../rvv-intrinsics-overloaded/vcompress.c     |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vcpop.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vdiv.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfabs.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfadd.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfclass.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfcvt.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfdiv.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfirst.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfmacc.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfmadd.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfmax.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfmerge.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfmin.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfmsac.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfmsub.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfmul.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfmv.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfncvt.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfneg.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfnmacc.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfnmadd.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfnmsac.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfnmsub.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfrdiv.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfrec7.c  |   2 +-
 .../rvv-intrinsics-overloaded/vfredmax.c      |   2 +-
 .../rvv-intrinsics-overloaded/vfredmin.c      |   2 +-
 .../rvv-intrinsics-overloaded/vfredsum.c      |   2 +-
 .../rvv-intrinsics-overloaded/vfrsqrt7.c      |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfrsub.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfsgnj.c  |   2 +-
 .../rvv-intrinsics-overloaded/vfslide1down.c  |   2 +-
 .../rvv-intrinsics-overloaded/vfslide1up.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfsqrt.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfsub.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfwadd.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfwcvt.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfwmacc.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfwmsac.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfwmul.c  |   2 +-
 .../rvv-intrinsics-overloaded/vfwnmacc.c      |   2 +-
 .../rvv-intrinsics-overloaded/vfwnmsac.c      |   2 +-
 .../rvv-intrinsics-overloaded/vfwredsum.c     |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vfwsub.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vget.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vid.c     |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/viota.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vle.c     |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vlmul.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vloxei.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vloxseg.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vlse.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vlseg.c   |   4 +-
 .../RISCV/rvv-intrinsics-overloaded/vlsegff.c |   4 +-
 .../RISCV/rvv-intrinsics-overloaded/vlsseg.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vluxei.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vluxseg.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmacc.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmadc.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmadd.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmand.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmax.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmerge.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmfeq.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmfge.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmfgt.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmfle.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmflt.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmfne.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmin.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmmv.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmnand.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmnor.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmnot.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmor.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmsbc.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmsbf.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmseq.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmsge.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmsgt.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmsif.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmsle.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmslt.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmsne.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmsof.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmul.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmv.c     |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmxnor.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vmxor.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vnclip.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vncvt.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vneg.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vnmsac.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vnmsub.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vnot.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vnsra.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vnsrl.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vor.c     |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vredand.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vredmax.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vredmin.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vredor.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vredsum.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vredxor.c |   2 +-
 .../rvv-intrinsics-overloaded/vreinterpret.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vrem.c    |   2 +-
 .../rvv-intrinsics-overloaded/vrgather.c      |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vrsub.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsadd.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsbc.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vse.c     |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vset.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsext.c   |   2 +-
 .../rvv-intrinsics-overloaded/vslide1down.c   |   2 +-
 .../rvv-intrinsics-overloaded/vslide1up.c     |   2 +-
 .../rvv-intrinsics-overloaded/vslidedown.c    |   2 +-
 .../rvv-intrinsics-overloaded/vslideup.c      |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsll.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsmul.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsoxei.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsoxseg.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsra.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsrl.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsse.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsseg.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vssra.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vssrl.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vssseg.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vssub.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsub.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsuxei.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vsuxseg.c |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vwadd.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vwcvt.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vwmacc.c  |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vwmul.c   |   2 +-
 .../rvv-intrinsics-overloaded/vwredsum.c      |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vwsub.c   |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vxor.c    |   2 +-
 .../RISCV/rvv-intrinsics-overloaded/vzext.c   |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vaadd.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vadc.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vadd.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vand.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vasub.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vcompress.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vcpop.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vdiv.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vfabs.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vfadd.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfclass.c    |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vfcvt.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vfdiv.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfirst.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfmacc.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfmadd.c     |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vfmax.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfmerge.c    |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vfmin.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfmsac.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfmsub.c     |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vfmul.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vfmv.c  |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfncvt.c     |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vfneg.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfnmacc.c    |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfnmadd.c    |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfnmsac.c    |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfnmsub.c    |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfrdiv.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfrec7.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfredmax.c   |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfredmin.c   |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfredsum.c   |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfrsqrt7.c   |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfrsub.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfsgnj.c     |   2 +-
 .../RISCV/rvv-intrinsics/vfslide1down.c       |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfslide1up.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfsqrt.c     |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vfsub.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfwadd.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfwcvt.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfwmacc.c    |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfwmsac.c    |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfwmul.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfwnmacc.c   |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfwnmsac.c   |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfwredsum.c  |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vfwsub.c     |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vget.c  |   2 +-
 clang/test/CodeGen/RISCV/rvv-intrinsics/vid.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/viota.c |   2 +-
 clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vleff.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vlmul.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vloxei.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vloxseg.c    |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vlse.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vlseg.c |   4 +-
 .../CodeGen/RISCV/rvv-intrinsics/vlsegff.c    |   4 +-
 .../CodeGen/RISCV/rvv-intrinsics/vlsseg.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vluxei.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vluxseg.c    |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmacc.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmadc.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmadd.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmand.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmax.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmclr.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vmerge.c     |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmfeq.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmfge.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmfgt.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmfle.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmflt.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmfne.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmin.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmmv.c  |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vmnand.c     |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmnor.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmnot.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmor.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmsbc.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmsbf.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmseq.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmset.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmsge.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmsgt.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmsif.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmsle.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmslt.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmsne.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmsof.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmul.c  |   2 +-
 clang/test/CodeGen/RISCV/rvv-intrinsics/vmv.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vmxnor.c     |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vmxor.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vnclip.c     |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vncvt.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vneg.c  |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vnmsac.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vnmsub.c     |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vnot.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vnsra.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vnsrl.c |   2 +-
 clang/test/CodeGen/RISCV/rvv-intrinsics/vor.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vredand.c    |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vredmax.c    |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vredmin.c    |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vredor.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vredsum.c    |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vredxor.c    |   2 +-
 .../RISCV/rvv-intrinsics/vreinterpret.c       |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vrem.c  |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vrgather.c   |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vrsub.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vsadd.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vsbc.c  |   2 +-
 clang/test/CodeGen/RISCV/rvv-intrinsics/vse.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vset.c  |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vsetvl.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vsetvlmax.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vsext.c |   2 +-
 .../RISCV/rvv-intrinsics/vslide1down.c        |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vslide1up.c  |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vslidedown.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vslideup.c   |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vsll.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vsmul.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vsoxei.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vsoxseg.c    |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vsra.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vsrl.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vsse.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vsseg.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vssra.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vssrl.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vssseg.c     |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vssub.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vsub.c  |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vsuxei.c     |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vsuxseg.c    |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vundefined.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vwadd.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vwcvt.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vwmacc.c     |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vwmul.c |   2 +-
 .../CodeGen/RISCV/rvv-intrinsics/vwredsum.c   |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vwsub.c |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vxor.c  |   2 +-
 .../test/CodeGen/RISCV/rvv-intrinsics/vzext.c |   2 +-
 clang/test/CodeGen/RISCV/rvv_errors.c         |   2 +-
 clang/test/Driver/riscv-arch.c                |  41 +++----
 clang/test/Headers/riscv-vector-header.c      |   2 +-
 .../test/Preprocessor/riscv-target-features.c |  34 +++---
 clang/test/Sema/riscv-types.c                 |   2 +-
 clang/utils/TableGen/RISCVVEmitter.cpp        |   2 +-
 llvm/lib/Support/RISCVISAInfo.cpp             |  18 +--
 llvm/lib/Target/RISCV/RISCV.td                |  16 +--
 .../CostModel/RISCV/fixed-vector-gather.ll    |   2 +-
 .../CostModel/RISCV/fixed-vector-scatter.ll   |   2 +-
 .../Analysis/CostModel/RISCV/rvv-shuffle.ll   |   2 +-
 llvm/test/CodeGen/RISCV/attributes.ll         |   8 +-
 llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll     |   4 +-
 .../RISCV/rvv/access-fixed-objects-by-rvv.ll  |   2 +-
 .../RISCV/rvv/addi-scalable-offset.mir        |   2 +-
 .../CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll  |   2 +-
 .../CodeGen/RISCV/rvv/bitreverse-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll   |   4 +-
 .../CodeGen/RISCV/rvv/calling-conv-fastcc.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/calling-conv.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/combine-sats.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/combine-splats.ll |   4 +-
 .../CodeGen/RISCV/rvv/combine-store-fp.ll     |   4 +-
 .../RISCV/rvv/common-shuffle-patterns.ll      |   2 +-
 .../rvv/commuted-op-indices-regression.mir    |   2 +-
 .../CodeGen/RISCV/rvv/constant-folding.ll     |   4 +-
 llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll    |   8 +-
 llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll    |   8 +-
 .../RISCV/rvv/debug-info-rvv-dbg-value.mir    |   4 +-
 .../test/CodeGen/RISCV/rvv/emergency-slot.mir |   2 +-
 .../CodeGen/RISCV/rvv/extload-truncstore.ll   |   4 +-
 .../CodeGen/RISCV/rvv/extract-subvector.ll    |   4 +-
 .../CodeGen/RISCV/rvv/extractelt-fp-rv32.ll   |   2 +-
 .../CodeGen/RISCV/rvv/extractelt-fp-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll  |   4 +-
 .../CodeGen/RISCV/rvv/extractelt-int-rv32.ll  |   2 +-
 .../CodeGen/RISCV/rvv/extractelt-int-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll  |   4 +-
 ...ixed-vector-strided-load-store-negative.ll |   2 +-
 .../rvv/fixed-vector-strided-load-store.ll    |   4 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-abs.ll    |   8 +-
 .../rvv/fixed-vectors-bitcast-large-vector.ll |   6 +-
 .../RISCV/rvv/fixed-vectors-bitcast.ll        |   8 +-
 .../RISCV/rvv/fixed-vectors-bitreverse.ll     |   8 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-bswap.ll  |   8 +-
 .../rvv/fixed-vectors-calling-conv-fastcc.ll  |   4 +-
 .../RISCV/rvv/fixed-vectors-calling-conv.ll   |   8 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll   |  20 ++--
 .../CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll  |   8 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll   |  20 ++--
 .../CodeGen/RISCV/rvv/fixed-vectors-elen.ll   |   8 +-
 .../rvv/fixed-vectors-emergency-slot.mir      |   2 +-
 .../rvv/fixed-vectors-extload-truncstore.ll   |   8 +-
 .../RISCV/rvv/fixed-vectors-extract-i1.ll     |   4 +-
 .../rvv/fixed-vectors-extract-subvector.ll    |   4 +-
 .../RISCV/rvv/fixed-vectors-extract.ll        |   4 +-
 .../RISCV/rvv/fixed-vectors-fp-bitcast.ll     |   4 +-
 .../RISCV/rvv/fixed-vectors-fp-buildvec.ll    |   8 +-
 .../RISCV/rvv/fixed-vectors-fp-conv.ll        |   8 +-
 .../RISCV/rvv/fixed-vectors-fp-interleave.ll  |   8 +-
 .../RISCV/rvv/fixed-vectors-fp-setcc.ll       |   4 +-
 .../RISCV/rvv/fixed-vectors-fp-shuffles.ll    |   4 +-
 .../RISCV/rvv/fixed-vectors-fp-splat.ll       |   8 +-
 .../RISCV/rvv/fixed-vectors-fp-vrgather.ll    |   8 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-fp.ll     |   8 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll   |   8 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll   |   8 +-
 .../RISCV/rvv/fixed-vectors-insert-i1.ll      |   4 +-
 .../rvv/fixed-vectors-insert-subvector.ll     |  18 +--
 .../CodeGen/RISCV/rvv/fixed-vectors-insert.ll |   4 +-
 .../RISCV/rvv/fixed-vectors-int-buildvec.ll   |   4 +-
 .../RISCV/rvv/fixed-vectors-int-exttrunc.ll   |  12 +-
 .../RISCV/rvv/fixed-vectors-int-interleave.ll |   8 +-
 .../RISCV/rvv/fixed-vectors-int-setcc.ll      |   4 +-
 .../RISCV/rvv/fixed-vectors-int-shuffles.ll   |   4 +-
 .../RISCV/rvv/fixed-vectors-int-splat.ll      |  12 +-
 .../RISCV/rvv/fixed-vectors-int-vrgather.ll   |   8 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-int.ll    |   8 +-
 .../RISCV/rvv/fixed-vectors-marith-vp.ll      |   4 +-
 .../RISCV/rvv/fixed-vectors-mask-buildvec.ll  |  28 ++---
 .../rvv/fixed-vectors-mask-load-store.ll      |   8 +-
 .../RISCV/rvv/fixed-vectors-mask-logic.ll     |   8 +-
 .../RISCV/rvv/fixed-vectors-mask-splat.ll     |   8 +-
 .../RISCV/rvv/fixed-vectors-masked-gather.ll  |   4 +-
 .../RISCV/rvv/fixed-vectors-masked-load-fp.ll |   4 +-
 .../rvv/fixed-vectors-masked-load-int.ll      |   4 +-
 .../RISCV/rvv/fixed-vectors-masked-scatter.ll |   4 +-
 .../rvv/fixed-vectors-masked-store-fp.ll      |   4 +-
 .../rvv/fixed-vectors-masked-store-int.ll     |   4 +-
 .../rvv/fixed-vectors-reduction-fp-vp.ll      |   4 +-
 .../RISCV/rvv/fixed-vectors-reduction-fp.ll   |   4 +-
 .../rvv/fixed-vectors-reduction-int-vp.ll     |   4 +-
 .../RISCV/rvv/fixed-vectors-reduction-int.ll  |   4 +-
 .../rvv/fixed-vectors-reduction-mask-vp.ll    |   4 +-
 .../RISCV/rvv/fixed-vectors-select-fp.ll      |   4 +-
 .../RISCV/rvv/fixed-vectors-select-int.ll     |   4 +-
 .../rvv/fixed-vectors-stepvector-rv32.ll      |   4 +-
 .../rvv/fixed-vectors-stepvector-rv64.ll      |   4 +-
 .../rvv/fixed-vectors-store-merge-crash.ll    |   2 +-
 .../RISCV/rvv/fixed-vectors-unaligned.ll      |   4 +-
 .../RISCV/rvv/fixed-vectors-vadd-vp.ll        |   4 +-
 .../RISCV/rvv/fixed-vectors-vand-vp.ll        |   4 +-
 .../RISCV/rvv/fixed-vectors-vdiv-vp.ll        |   4 +-
 .../RISCV/rvv/fixed-vectors-vdivu-vp.ll       |   4 +-
 .../RISCV/rvv/fixed-vectors-vfadd-vp.ll       |   4 +-
 .../RISCV/rvv/fixed-vectors-vfdiv-vp.ll       |   4 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll  |   4 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll  |   4 +-
 .../RISCV/rvv/fixed-vectors-vfmul-vp.ll       |   4 +-
 .../RISCV/rvv/fixed-vectors-vfrdiv-vp.ll      |   4 +-
 .../RISCV/rvv/fixed-vectors-vfrsub-vp.ll      |   4 +-
 .../RISCV/rvv/fixed-vectors-vfsub-vp.ll       |   4 +-
 .../RISCV/rvv/fixed-vectors-vmul-vp.ll        |   4 +-
 .../RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll    |   4 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll |   4 +-
 .../RISCV/rvv/fixed-vectors-vpgather.ll       |   4 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vpload.ll |   4 +-
 .../RISCV/rvv/fixed-vectors-vpscatter.ll      |   4 +-
 .../RISCV/rvv/fixed-vectors-vpstore.ll        |   4 +-
 .../rvv/fixed-vectors-vreductions-mask.ll     |   8 +-
 .../RISCV/rvv/fixed-vectors-vrem-vp.ll        |   4 +-
 .../RISCV/rvv/fixed-vectors-vremu-vp.ll       |   4 +-
 .../RISCV/rvv/fixed-vectors-vrsub-vp.ll       |   4 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll  |   4 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll |   4 +-
 .../RISCV/rvv/fixed-vectors-vselect-vp.ll     |   4 +-
 .../RISCV/rvv/fixed-vectors-vselect.ll        |   4 +-
 .../RISCV/rvv/fixed-vectors-vshl-vp.ll        |   4 +-
 .../RISCV/rvv/fixed-vectors-vsra-vp.ll        |   4 +-
 .../RISCV/rvv/fixed-vectors-vsrl-vp.ll        |   4 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vssub.ll  |   4 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll |   4 +-
 .../RISCV/rvv/fixed-vectors-vsub-vp.ll        |   4 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vwmacc.ll |   4 +-
 .../RISCV/rvv/fixed-vectors-vwmaccu.ll        |   4 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll  |   4 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll |   4 +-
 .../RISCV/rvv/fixed-vectors-vxor-vp.ll        |   4 +-
 .../test/CodeGen/RISCV/rvv/frameindex-addr.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll  |   4 +-
 .../CodeGen/RISCV/rvv/get-vlen-debugloc.mir   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/inline-asm.ll     |   2 +-
 .../CodeGen/RISCV/rvv/insert-subvector.ll     |   4 +-
 .../CodeGen/RISCV/rvv/insertelt-fp-rv32.ll    |   2 +-
 .../CodeGen/RISCV/rvv/insertelt-fp-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll   |   4 +-
 .../CodeGen/RISCV/rvv/insertelt-int-rv32.ll   |   2 +-
 .../CodeGen/RISCV/rvv/insertelt-int-rv64.ll   |   2 +-
 .../CodeGen/RISCV/rvv/interleave-crash.ll     |   4 +-
 .../RISCV/rvv/large-rvv-stack-size.mir        |   2 +-
 .../CodeGen/RISCV/rvv/legalize-load-sdnode.ll |   4 +-
 .../RISCV/rvv/legalize-scalable-vectortype.ll |   4 +-
 .../RISCV/rvv/legalize-store-sdnode.ll        |   4 +-
 .../CodeGen/RISCV/rvv/load-add-store-16.ll    |   4 +-
 .../CodeGen/RISCV/rvv/load-add-store-32.ll    |   4 +-
 .../CodeGen/RISCV/rvv/load-add-store-64.ll    |   4 +-
 .../CodeGen/RISCV/rvv/load-add-store-8.ll     |   4 +-
 llvm/test/CodeGen/RISCV/rvv/load-mask.ll      |   4 +-
 llvm/test/CodeGen/RISCV/rvv/localvar.ll       |   2 +-
 llvm/test/CodeGen/RISCV/rvv/marith-vp.ll      |   4 +-
 .../RISCV/rvv/mask-exts-truncs-rv32.ll        |   2 +-
 .../RISCV/rvv/mask-exts-truncs-rv64.ll        |   2 +-
 .../test/CodeGen/RISCV/rvv/mask-reg-alloc.mir |   2 +-
 llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll |   4 +-
 .../test/CodeGen/RISCV/rvv/masked-load-int.ll |   4 +-
 .../test/CodeGen/RISCV/rvv/masked-store-fp.ll |   4 +-
 .../CodeGen/RISCV/rvv/masked-store-int.ll     |   4 +-
 llvm/test/CodeGen/RISCV/rvv/memory-args.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll |   4 +-
 .../test/CodeGen/RISCV/rvv/mscatter-sdnode.ll |   4 +-
 .../RISCV/rvv/named-vector-shuffle-reverse.ll |  12 +-
 .../CodeGen/RISCV/rvv/no-reserved-frame.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/pr52475.ll        |   4 +-
 .../CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/reg-coalescing.mir |   2 +-
 .../CodeGen/RISCV/rvv/regalloc-fast-crash.ll  |   2 +-
 .../RISCV/rvv/rv32-spill-vector-csr.ll        |   4 +-
 .../CodeGen/RISCV/rvv/rv32-spill-vector.ll    |   4 +-
 .../CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll   |   4 +-
 .../RISCV/rvv/rv32-vsetvli-intrinsics.ll      |   2 +-
 .../RISCV/rvv/rv64-spill-vector-csr.ll        |   4 +-
 .../CodeGen/RISCV/rvv/rv64-spill-vector.ll    |   4 +-
 .../CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll   |   4 +-
 .../RISCV/rvv/rv64-vsetvli-intrinsics.ll      |   2 +-
 .../test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/rvv-framelayout.ll |   2 +-
 .../CodeGen/RISCV/rvv/rvv-out-arguments.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll |   4 +-
 llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/select-fp.ll      |   4 +-
 llvm/test/CodeGen/RISCV/rvv/select-int.ll     |   4 +-
 llvm/test/CodeGen/RISCV/rvv/select-sra.ll     |   4 +-
 llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll  |   2 +-
 .../CodeGen/RISCV/rvv/setcc-integer-rv32.ll   |   2 +-
 .../CodeGen/RISCV/rvv/setcc-integer-rv64.ll   |   2 +-
 .../CodeGen/RISCV/rvv/sink-splat-operands.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/stepvector.ll     |   4 +-
 .../RISCV/rvv/tail-agnostic-impdef-copy.mir   |   6 +-
 llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll   |   2 +-
 .../RISCV/rvv/unaligned-loads-stores.ll       |   4 +-
 llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vaadd-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vaaddu-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vadc-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vadd-policy.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vadd-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll        |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vand-vp.ll        |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vasub-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vasubu-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vdiv-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll        |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vdivu-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll       |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll       |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll   |   2 +-
 .../CodeGen/RISCV/rvv/vfcopysign-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll |   2 +-
 .../CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll   |   2 +-
 .../CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll   |   2 +-
 .../CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll  |   2 +-
 .../CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll       |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll       |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfmv.f.s.ll       |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll  |   2 +-
 .../test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll |   2 +-
 .../CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll     |   2 +-
 .../CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll     |   2 +-
 .../CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll  |   2 +-
 .../CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll  |   2 +-
 .../CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll  |   2 +-
 .../CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll  |   2 +-
 .../CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll |   2 +-
 .../CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll |   2 +-
 .../CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll     |   2 +-
 .../CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll  |   4 +-
 .../test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll      |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll      |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll   |   2 +-
 .../CodeGen/RISCV/rvv/vfslide1down-rv32.ll    |   2 +-
 .../CodeGen/RISCV/rvv/vfslide1down-rv64.ll    |   2 +-
 .../test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll       |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll  |   2 +-
 .../test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll |   2 +-
 .../CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll     |   2 +-
 .../CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll     |   2 +-
 .../CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll  |   2 +-
 .../CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll  |   2 +-
 .../CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll |   2 +-
 .../CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll |   2 +-
 .../CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll     |   2 +-
 .../CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll  |   2 +-
 .../test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll       |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll       |   2 +-
 llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll       |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll       |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll       |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll       |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll     |   2 +-
 .../CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll   |   2 +-
 .../CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmadc-rv64.ll     |   2 +-
 .../CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll  |   2 +-
 .../CodeGen/RISCV/rvv/vmadc.carry.in-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmax-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmaxu-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmin-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vminu-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsbc-rv64.ll     |   2 +-
 .../CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll |   2 +-
 .../CodeGen/RISCV/rvv/vmsbc.borrow.in-rv64.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsbf-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsbf-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmul-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll        |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmulh-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmulhu-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll       |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll       |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll     |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vor-vp.ll         |   4 +-
 .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vpload.ll         |   4 +-
 .../CodeGen/RISCV/rvv/vpscatter-sdnode.ll     |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vpstore.ll        |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vredand-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredand-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredmax-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredmax-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredmin-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredmin-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredminu-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredminu-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredor-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredor-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredsum-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredsum-rv64.ll   |   2 +-
 .../RISCV/rvv/vreductions-fp-sdnode.ll        |   4 +-
 .../CodeGen/RISCV/rvv/vreductions-fp-vp.ll    |   4 +-
 .../CodeGen/RISCV/rvv/vreductions-int-rv32.ll |   2 +-
 .../CodeGen/RISCV/rvv/vreductions-int-rv64.ll |   2 +-
 .../CodeGen/RISCV/rvv/vreductions-int-vp.ll   |   4 +-
 .../CodeGen/RISCV/rvv/vreductions-mask-vp.ll  |   4 +-
 .../CodeGen/RISCV/rvv/vreductions-mask.ll     |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vredxor-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vredxor-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vrem-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll        |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vremu-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll       |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll  |   2 +-
 .../CodeGen/RISCV/rvv/vrgatherei16-rv32.ll    |   2 +-
 .../CodeGen/RISCV/rvv/vrgatherei16-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vrsub-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll       |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsbc-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vse-rv32.ll       |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vse-rv64.ll       |   2 +-
 .../test/CodeGen/RISCV/rvv/vselect-fp-rv32.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vselect-fp-rv64.ll |   2 +-
 .../CodeGen/RISCV/rvv/vselect-int-rv32.ll     |   2 +-
 .../CodeGen/RISCV/rvv/vselect-int-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll     |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsetvl-ext.ll     |   2 +-
 .../RISCV/rvv/vsetvli-insert-crossbb.ll       |   2 +-
 .../RISCV/rvv/vsetvli-insert-crossbb.mir      |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vsetvli-insert.mir |   4 +-
 .../CodeGen/RISCV/rvv/vsetvli-regression.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll        |   4 +-
 .../CodeGen/RISCV/rvv/vslide1down-rv32.ll     |   2 +-
 .../CodeGen/RISCV/rvv/vslide1down-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vslidedown-rv32.ll |   2 +-
 .../test/CodeGen/RISCV/rvv/vslidedown-rv64.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vslideup-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vslideup-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll       |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll       |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsoxei-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll     |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll     |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsra-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsra-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll        |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsrl-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsrl-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll        |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsse-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsse-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsseg-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vssra-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vssrl-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vssseg-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vssseg-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsub-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll        |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vsuxei-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vwadd-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwadd-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vwaddu-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwaddu-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmacc-sdnode.ll  |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmul-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmul-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmulu-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwmulu-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwredsum-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwredsum-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv32.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv64.ll |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwsub-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwsub-rv64.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll   |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll   |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwsubu-rv32.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwsubu-rv64.ll    |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll      |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll        |   4 +-
 llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll     |   2 +-
 llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll     |   2 +-
 .../RISCV/rvv/wrong-stack-slot-rv32.mir       |   2 +-
 .../RISCV/rvv/wrong-stack-slot-rv64.mir       |   2 +-
 llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir  |   2 +-
 llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir |   2 +-
 .../test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll |   2 +-
 .../CodeGen/RISCV/scalable-vector-struct.ll   |   2 +-
 llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll   |   2 +-
 .../CodeGen/RISCV/srem-seteq-illegal-types.ll |   4 +-
 .../CodeGen/RISCV/urem-seteq-illegal-types.ll |   4 +-
 llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll   |   4 +-
 llvm/test/MC/RISCV/attribute-arch-invalid.s   |   6 -
 llvm/test/MC/RISCV/attribute-arch.s           | 109 +++++++++---------
 llvm/test/MC/RISCV/rvv/add.s                  |   8 +-
 llvm/test/MC/RISCV/rvv/aliases.s              |   4 +-
 llvm/test/MC/RISCV/rvv/and.s                  |   8 +-
 llvm/test/MC/RISCV/rvv/clip.s                 |   8 +-
 llvm/test/MC/RISCV/rvv/compare.s              |   8 +-
 llvm/test/MC/RISCV/rvv/convert.s              |   8 +-
 llvm/test/MC/RISCV/rvv/div.s                  |   8 +-
 llvm/test/MC/RISCV/rvv/ext.s                  |   8 +-
 llvm/test/MC/RISCV/rvv/fadd.s                 |   8 +-
 llvm/test/MC/RISCV/rvv/fcompare.s             |   8 +-
 llvm/test/MC/RISCV/rvv/fdiv.s                 |   8 +-
 llvm/test/MC/RISCV/rvv/fmacc.s                |   8 +-
 llvm/test/MC/RISCV/rvv/fminmax.s              |   8 +-
 llvm/test/MC/RISCV/rvv/fmul.s                 |   8 +-
 llvm/test/MC/RISCV/rvv/fmv.s                  |   8 +-
 llvm/test/MC/RISCV/rvv/fothers.s              |   8 +-
 llvm/test/MC/RISCV/rvv/freduction.s           |   8 +-
 llvm/test/MC/RISCV/rvv/fsub.s                 |   8 +-
 llvm/test/MC/RISCV/rvv/invalid-eew.s          |   2 +-
 llvm/test/MC/RISCV/rvv/invalid.s              |   2 +-
 llvm/test/MC/RISCV/rvv/load.s                 |   8 +-
 llvm/test/MC/RISCV/rvv/macc.s                 |   8 +-
 llvm/test/MC/RISCV/rvv/mask.s                 |   8 +-
 llvm/test/MC/RISCV/rvv/minmax.s               |   8 +-
 llvm/test/MC/RISCV/rvv/mul.s                  |   8 +-
 llvm/test/MC/RISCV/rvv/mv.s                   |   8 +-
 llvm/test/MC/RISCV/rvv/or.s                   |   8 +-
 llvm/test/MC/RISCV/rvv/others.s               |   8 +-
 llvm/test/MC/RISCV/rvv/reduction.s            |   8 +-
 llvm/test/MC/RISCV/rvv/shift.s                |   8 +-
 llvm/test/MC/RISCV/rvv/sign-injection.s       |   8 +-
 llvm/test/MC/RISCV/rvv/snippet.s              |   4 +-
 llvm/test/MC/RISCV/rvv/store.s                |   8 +-
 llvm/test/MC/RISCV/rvv/sub.s                  |   8 +-
 llvm/test/MC/RISCV/rvv/vsetvl-invalid.s       |   4 +-
 llvm/test/MC/RISCV/rvv/vsetvl.s               |   8 +-
 llvm/test/MC/RISCV/rvv/xor.s                  |   8 +-
 llvm/test/MC/RISCV/rvv/zvlsseg.s              |   8 +-
 .../RISCV/masked_gather_scatter.ll            |   4 +-
 .../LoopVectorize/RISCV/reg-usage.ll          |   8 +-
 .../LoopVectorize/RISCV/riscv-interleaved.ll  |   2 +-
 .../LoopVectorize/RISCV/riscv-unroll.ll       |   8 +-
 .../RISCV/scalable-reductions.ll              |   2 +-
 .../LoopVectorize/RISCV/scalable-vf-hint.ll   |   2 +-
 .../RISCV/unroll-in-loop-vectorizer.ll        |   2 +-
 .../RISCV/rvv-min-vector-size.ll              |   6 +-
 1078 files changed, 1638 insertions(+), 1662 deletions(-)

diff --git a/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias-err.c b/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias-err.c
index 9ee8ce68ebc56..e80ac5e26e8cf 100644
--- a/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias-err.c
+++ b/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias-err.c
@@ -1,6 +1,6 @@
 // REQUIRES: riscv-registered-target
 // RUN: not %clang_cc1 -triple riscv64 -fsyntax-only -verify \
-// RUN:   -target-feature +experimental-v %s 2>&1 \
+// RUN:   -target-feature +v %s 2>&1 \
 // RUN: | FileCheck %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c b/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
index 9db7eaa3bcd11..dab41f1695e2e 100644
--- a/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
+++ b/clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -emit-llvm -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -emit-llvm -target-feature +v \
 // RUN:   %s -o - \
 // RUN:   | FileCheck %s
 
diff --git a/clang/test/CodeGen/RISCV/riscv-inline-asm-rvv.c b/clang/test/CodeGen/RISCV/riscv-inline-asm-rvv.c
index 146ed23386028..59adc5c45521a 100644
--- a/clang/test/CodeGen/RISCV/riscv-inline-asm-rvv.c
+++ b/clang/test/CodeGen/RISCV/riscv-inline-asm-rvv.c
@@ -1,9 +1,9 @@
 // REQUIRES: riscv-registered-target
 
-// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv32 -target-feature +v \
 // RUN:     -O2 -emit-llvm %s -o - \
 // RUN:     | FileCheck %s
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v \
 // RUN:     -O2 -emit-llvm %s -o - \
 // RUN:     | FileCheck %s
 
diff --git a/clang/test/CodeGen/RISCV/riscv-v-debuginfo.c b/clang/test/CodeGen/RISCV/riscv-v-debuginfo.c
index f0e405aa79c66..2217e6ea8d07e 100644
--- a/clang/test/CodeGen/RISCV/riscv-v-debuginfo.c
+++ b/clang/test/CodeGen/RISCV/riscv-v-debuginfo.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v \
 // RUN:   -dwarf-version=4 -debug-info-kind=limited -emit-llvm -o - %s \
 // RUN:   | FileCheck --check-prefix=DEBUGINFO %s
 #include <stdint.h>
diff --git a/clang/test/CodeGen/RISCV/riscv-v-lifetime.cpp b/clang/test/CodeGen/RISCV/riscv-v-lifetime.cpp
index f82389487f582..c6aa2a56133f2 100644
--- a/clang/test/CodeGen/RISCV/riscv-v-lifetime.cpp
+++ b/clang/test/CodeGen/RISCV/riscv-v-lifetime.cpp
@@ -1,5 +1,5 @@
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -std=c++11 -triple riscv64 -target-feature +experimental-v \
+// RUN: %clang_cc1 -std=c++11 -triple riscv64 -target-feature +v \
 // RUN:   -O1 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vaadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vaadd.c
index b3912101ddde9..9b6b966723e39 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vaadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vaadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadc.c
index 5c90d3e970b5b..db3ea976f4803 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadd.c
index 9f436c5001200..e3ff19a319483 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -target-feature +zfh -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vand.c
index 41dfe0bd7a1c8..909da393f7665 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vand.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vand.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vasub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vasub.c
index e1fbd75e0f4e6..8def45998e1b8 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vasub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vasub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcompress.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcompress.c
index cd2c0b04288ef..bd787d9b1263f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcompress.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcompress.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcpop.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcpop.c
index ba29104b15afc..f134a7784b759 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcpop.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vcpop.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vdiv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vdiv.c
index 7c1e7607c8d06..e5f8d3f645c04 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vdiv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vdiv.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfabs.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfabs.c
index 3afe6a35edf17..f784db297373a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfabs.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfabs.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfadd.c
index 0d0649afcee51..0d7c709183905 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -target-feature +zfh -disable-O0-optnone -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfclass.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfclass.c
index 7df3f86f4b0d5..3b172d9261eaf 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfclass.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfclass.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfcvt.c
index 8fad4dfc00f9a..b763497c6cf10 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfcvt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfcvt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfdiv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfdiv.c
index 5431a0d53c6bf..f8c882ee23f31 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfdiv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfdiv.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfirst.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfirst.c
index 4fbd882f805a8..9a222f9ff7c35 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfirst.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfirst.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmacc.c
index b7b5ab2b1f825..4c44b69ca654a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmacc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmadd.c
index 4fe2854f28e14..1a81935f88490 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmax.c
index 58577091d3563..1ae5f289ca667 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmax.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmax.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmerge.c
index a4284be12b05c..a945c0bafdfa5 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmerge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmerge.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmin.c
index 5435e9441ab40..560d03030b8ba 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmin.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmin.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsac.c
index 60d839b982241..e42979c08f349 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsac.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsub.c
index 814989fea75dd..b3dace5e66bd4 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmsub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmul.c
index a8efe8da248b2..6051fcf8dceec 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmv.c
index 5b8935e1e230d..3702b7c7b63e0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfmv.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfncvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfncvt.c
index 22b72bbb1e702..b6c28840157a0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfncvt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfncvt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfneg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfneg.c
index a3b7cdc97f117..b2f4fb383db44 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfneg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfneg.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmacc.c
index cf04131ef16ba..c4a534d11ad41 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmacc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmadd.c
index 28bd617ba1567..42a257ad9c0d1 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsac.c
index 6ccdd00b41837..8360fa5fa3006 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsac.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsub.c
index 6f64cbaeb9f7c..b75872317f473 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfnmsub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrdiv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrdiv.c
index 0810b8204e2cc..8c19f7bd147f9 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrdiv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrdiv.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrec7.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrec7.c
index cb65d923b4f4a..bff86dec3b518 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrec7.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrec7.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmax.c
index cf09fa1e44492..c61e6ade90eb8 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmax.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmax.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmin.c
index bb66a84074138..735e0f620aa2e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmin.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredmin.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredsum.c
index 4ecd8afc87de1..e5d3da2c8e913 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredsum.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfredsum.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsqrt7.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsqrt7.c
index 403a83c95486e..6486ded04427a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsqrt7.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsqrt7.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsub.c
index f350d56cfe1b3..e407104b992a2 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfrsub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsgnj.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsgnj.c
index 1165b32360014..e5fa14a92604b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsgnj.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsgnj.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1down.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1down.c
index d6e6a29cbc65e..e70c63f05770a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1down.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1down.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1up.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1up.c
index df8a60cc2eda0..989fd283b920f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1up.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfslide1up.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsqrt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsqrt.c
index 3c42e76eba88f..17615694da722 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsqrt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsqrt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsub.c
index 449985b16cb52..2df9853fdfe54 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfsub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwadd.c
index 7bd209b1c968b..2a5bf9f06f114 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwcvt.c
index 624235efb8950..ef65d95f6556a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwcvt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwcvt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmacc.c
index aeb2a27df958a..4e55316a3eeca 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmacc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmsac.c
index e550c07b0dec5..c4086f06beb53 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmsac.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmul.c
index 03f4275e358bc..8d5396c32933e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmacc.c
index d714fb520a826..4b5ccba44758f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmacc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmsac.c
index 1f8160fec2c3f..16895d5f4b48d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwnmsac.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwredsum.c
index c185fadb650f0..b3eb1b5408bc8 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwredsum.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwredsum.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwsub.c
index 150ffa14ca803..2dd4350fa9c7a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vfwsub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vget.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vget.c
index bf8d1e46b0353..09728162391c4 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vget.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vget.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vid.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vid.c
index 061096c9f9a0b..f5d01e1de1240 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vid.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vid.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/viota.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/viota.c
index ec51c9169854f..ff2acb39fd91d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/viota.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/viota.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vle.c
index b9839ef6752f7..60a3b74918b86 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vle.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vle.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlmul.c
index 3d27463efbfa4..b6fa36f8416b6 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxei.c
index 8b69c187e0c1a..395c22636a51f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxei.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxei.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c
index 0df229dc803da..f40a89bfe45dc 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v \
+// RUN:   -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlse.c
index c2d2838027ec3..a5ef9176120be 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlse.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlse.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c
index ac49608830570..bf13040961424 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlseg.c
@@ -1,12 +1,12 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c
index 22df2b948634b..1a2a49f60228d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsegff.c
@@ -1,12 +1,12 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c
index 4f79022bcfd92..ac1bbb169033f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vlsseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v \
+// RUN:   -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxei.c
index aec874cb20a25..f1a1081aacfb7 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxei.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxei.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c
index 8db5e8039cafe..72068e7a822ad 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v \
+// RUN:   -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmacc.c
index 7c6c217a8f6c9..b42dd449964f4 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmacc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadc.c
index 1c24617f7d91b..c138e62df3083 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadd.c
index 80cb56780f5f8..197fdce0d4f11 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmand.c
index 6e966a8040c8c..f03ae337bc52d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmand.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmand.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmax.c
index 744ea0fb1e32e..ee0948836af2a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmax.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmax.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c
index 7fd693a4dd924..b36e8f8a153a5 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmerge.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfeq.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfeq.c
index 62d75c3d53b4d..1d1b50e60dd58 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfeq.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfeq.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfge.c
index 325f527dd6756..ed49ac9a0b698 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfge.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfgt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfgt.c
index a0bf92a539ef1..8ac544f5caceb 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfgt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfgt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfle.c
index cad932776fe24..5ae7ade4e5df7 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfle.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfle.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmflt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmflt.c
index 955cec4918891..74aaf22d9031e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmflt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmflt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfne.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfne.c
index 890682f03535d..efeacf95a675a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfne.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmfne.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmin.c
index 4f6a5d68bae05..5c269c17f1ea5 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmin.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmin.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmmv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmmv.c
index b4d949babf665..604763c410444 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmmv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmmv.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnand.c
index fea9524ec11a1..f6adc78cd8d3e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnand.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnand.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnor.c
index 073d0319a0f4a..ed28a2ccf1950 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnot.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnot.c
index 5bfe530f254d7..28a2020c525a0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnot.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmnot.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmor.c
index ecb662c329265..baecf279a07ec 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbc.c
index e642a96bc8f91..33a228ef83ee1 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbf.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbf.c
index 7270fd8b4fc56..7012f0ef5b822 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbf.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsbf.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmseq.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmseq.c
index a20b73278dedd..b8657ae500932 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmseq.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmseq.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsge.c
index 3f7f4449cbeea..9ae2f3a7f34ab 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsge.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsgt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsgt.c
index 6cba226fa9a4b..db63eef64203d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsgt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsgt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsif.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsif.c
index cade3990280db..5e16eb0a3497a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsif.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsif.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsle.c
index 39fb95f7dc8cc..2e4ccae0ca44f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsle.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsle.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmslt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmslt.c
index 0ebee8ee46cc7..00fb31ecf1477 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmslt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmslt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsne.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsne.c
index 9916376f7bb30..ef05f1e825eef 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsne.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsne.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsof.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsof.c
index ed3ef05a7e3b6..7330930687e68 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsof.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmsof.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c
index d871e7a63c9c0..7d633ecde4265 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmv.c
index ed00804c40cf9..b6890fd8829d3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmv.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxnor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxnor.c
index 886d1874a6f84..d1ca74c3da0c3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxnor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxnor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxor.c
index 89403a714b779..4b414bee7b912 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmxor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnclip.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnclip.c
index b5bf86b55dea5..29ccbe114c8b6 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnclip.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnclip.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vncvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vncvt.c
index b547decc6124f..d25b1b4c414a3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vncvt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vncvt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vneg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vneg.c
index 43c1db05aab72..a52e5677e01fc 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vneg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vneg.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsac.c
index b97e838ff1ec1..9c0683af889dc 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsac.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsub.c
index ee2803020f0b0..9d3b6a0021b46 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnmsub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnot.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnot.c
index fbe40dc59ec07..642055f09fab4 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnot.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnot.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -target-feature +zfh -disable-O0-optnone -fallow-half-arguments-and-returns -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsra.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsra.c
index 4501e37e27fa9..c911a400be40a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsra.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsra.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsrl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsrl.c
index 9ef1e0a81c3bc..315a6802b8ba0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsrl.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vnsrl.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vor.c
index ef455106e560e..526f78ad58630 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredand.c
index e1897f20e4245..1199d8fe4f79b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredand.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredand.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmax.c
index 0796133bc67eb..2b91901e0898c 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmax.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmax.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmin.c
index 23767a815f6fe..ae1041584a91f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmin.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredmin.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredor.c
index ad8c736225267..d5ba033a74e4a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredsum.c
index 96c3e938164e9..25b7de13fa1e0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredsum.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredsum.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredxor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredxor.c
index 247f67d69f018..d4cafe7c2b134 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredxor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vredxor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vreinterpret.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vreinterpret.c
index 760e697d9398a..2308dc466cea6 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vreinterpret.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vreinterpret.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrem.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrem.c
index 3d3dc87974809..ddff28ea9d6c1 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrem.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrem.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrgather.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrgather.c
index 314c7eb81b35c..7e4ef5e0ba950 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrgather.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrgather.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrsub.c
index fb8df2ec96217..d313ab0d0e0dc 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vrsub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsadd.c
index 046cc4324b9e9..a5418573ac47c 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsbc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsbc.c
index 5c90af66cb0cd..79c0453a514ca 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsbc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsbc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vse.c
index 20a68c8ac4659..81f14546fd3fd 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vse.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vse.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vset.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vset.c
index 5db2ff82f5984..efc4ee494f7ec 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vset.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vset.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsext.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsext.c
index 714724e7ac683..573d929a62249 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsext.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsext.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1down.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1down.c
index 9bed5a4860723..0e200dc4cb546 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1down.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1down.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1up.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1up.c
index 03b3436b834ae..7d8056e097bbf 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1up.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslide1up.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslidedown.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslidedown.c
index 01088ee621947..c07a80a1bc776 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslidedown.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslidedown.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslideup.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslideup.c
index 93de0f4c364dc..415934c6e2e64 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslideup.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vslideup.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsll.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsll.c
index 4cb7f1fb0fd95..5101560b0e4e0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsll.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsll.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c
index ecafc5576da7a..6c1479515c36f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxei.c
index ba4149a34de6b..04ba0e688014a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxei.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxei.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c
index d3454be64ca57..e11d591fc87cf 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsoxseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v \
+// RUN:   -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsra.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsra.c
index 750bb06e301c4..c7855bb0d02d0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsra.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsra.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsrl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsrl.c
index 6b4fc7c0245a5..bf0e538fc331b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsrl.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsrl.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsse.c
index 10097d0bbba6a..026cd287e05be 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsse.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsse.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c
index b72721984afa8..bae79c0c7b246 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v \
+// RUN:   -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssra.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssra.c
index accf4688d452a..2427f374cdb39 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssra.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssra.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssrl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssrl.c
index 0f1803a7eeb30..d393fa1214fc5 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssrl.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssrl.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c
index 2a95d46f3ae92..1b7f8eb453c92 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v \
+// RUN:   -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssub.c
index cb7ed7338fc62..04b91eb785da2 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vssub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsub.c
index ae661230493e6..720ddadf5123b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxei.c
index 77980ee4cac55..3ade610fb7092 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxei.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxei.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c
index 3a338ad821e27..597f4442f94b6 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsuxseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v \
+// RUN:   -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwadd.c
index 74316ae7e7fc9..7d66d0940473e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwcvt.c
index 79a8c84ca0139..172ea75b07b42 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwcvt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwcvt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmacc.c
index 1c245443da00c..05edc0718edb3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmacc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmul.c
index 88e5f6ca4c52a..3e6eaaa02da9f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwredsum.c
index f06985ca8bc88..78b5ac82be4ea 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwredsum.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwredsum.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwsub.c
index 8ce7f077471df..9080a62a47bad 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vwsub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vxor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vxor.c
index 2f919f342b935..52d0d079453a1 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vxor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vxor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vzext.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vzext.c
index c333b8a831517..80afbddc8d921 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vzext.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vzext.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vaadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vaadd.c
index 597db722f6141..cf6281261b421 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vaadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vaadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vadc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vadc.c
index 03e4c29b552ec..c9134fc0d2bb7 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vadc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vadc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c
index 131c168d76d57..f8070ce5a8312 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -target-feature +zfh -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vand.c
index 65878d6e535b2..54f63cd2bf58d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vand.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vand.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vasub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vasub.c
index 6a87d21e76e02..311881eb70192 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vasub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vasub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vcompress.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vcompress.c
index 9dbb59289abee..5e940fec0f9db 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vcompress.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vcompress.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vcpop.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vcpop.c
index 78a8f11c5aaff..f9b18186ac52b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vcpop.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vcpop.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vdiv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vdiv.c
index 1934d1780e3a1..7d18db0cad686 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vdiv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vdiv.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfabs.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfabs.c
index a97646011f4b0..18495dc881ca3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfabs.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfabs.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfadd.c
index 138d2590f54ca..4757cd9827aa6 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -target-feature +zfh -disable-O0-optnone -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfclass.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfclass.c
index e22d4a0e78c51..a3fe942c8e875 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfclass.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfclass.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfcvt.c
index d0a7280f2f3f9..9369c03987613 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfcvt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfcvt.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfdiv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfdiv.c
index 8c2923167a5f7..c2c5b12aeae09 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfdiv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfdiv.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfirst.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfirst.c
index 74a2b7ce000b2..84d3df136c29c 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfirst.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfirst.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmacc.c
index 17e00d7b3300b..5f01c59536145 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmacc.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmadd.c
index 1042f89f17059..79e8d32e85d9f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmadd.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmax.c
index 1eeef95ba1b7e..052c02faa5897 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmax.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmax.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmerge.c
index 350f0955822ed..48f0506267435 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmerge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmerge.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmin.c
index af8b1ce8a9659..29c2b4c676021 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmin.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmin.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsac.c
index f72986fadb64c..d8e380ec8b20e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsac.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsub.c
index 8b42a34f92b8e..529cb80d3b8f8 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmsub.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmul.c
index 9b98bb5eb966b..0be537645af31 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmul.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmv.c
index acd398a4e501a..74148c846c436 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfmv.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfncvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfncvt.c
index d5940d540effb..694f18bf80b87 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfncvt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfncvt.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfneg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfneg.c
index 6cf510fa781ce..50dc403299221 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfneg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfneg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmacc.c
index fe5f3d104f0f6..192a5619332f2 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmacc.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmadd.c
index 34def944086b4..0e0ae186b87dd 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmadd.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsac.c
index 09f3ddaf8302b..11d3b9816df4d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsac.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsub.c
index 7ae33a87f434c..bdb90cb40e609 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfnmsub.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrdiv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrdiv.c
index 13c01502b1b26..c201f27cc32f3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrdiv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrdiv.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrec7.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrec7.c
index ca509be0c6ebf..5c1e1c8ea9341 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrec7.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrec7.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmax.c
index 57fcd59d12b46..1f76f073ce36b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmax.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmax.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmin.c
index a0bbb655037d7..42717e110bd8b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmin.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredmin.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredsum.c
index 79f127753704d..c8de86b2b89a4 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredsum.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfredsum.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsqrt7.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsqrt7.c
index b7667e4f48195..1469b6addc7f2 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsqrt7.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsqrt7.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsub.c
index 76fd1e08a8713..7428f43db11b7 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfrsub.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsgnj.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsgnj.c
index 2c297bf07f299..277b803b9d687 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsgnj.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsgnj.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1down.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1down.c
index e5846973e155a..9c01c32bbf96b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1down.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1down.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1up.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1up.c
index 9e5bac3347fdc..389c762266d88 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1up.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfslide1up.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsqrt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsqrt.c
index 13ab2517acbe2..9d4b8d9242253 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsqrt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsqrt.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsub.c
index 97e3995012692..88a4c9aadb856 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfsub.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwadd.c
index d871740de6f47..87f59f8483305 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwadd.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwcvt.c
index a5967cc844914..3cd938292e055 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwcvt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwcvt.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmacc.c
index cc32dd19d5793..51c3a6edaaa07 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmacc.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmsac.c
index 4ed20bd75ac0a..f247017eb3ebc 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmsac.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmul.c
index e9e86a8ea38a8..0254680b38d2e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwmul.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmacc.c
index 0ba22fcf44cbf..f9d4cb6b60b84 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmacc.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmsac.c
index 7bd147760fd47..afe3e32691feb 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwnmsac.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredsum.c
index eaeac9cde2a47..9db0ff78f301d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredsum.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwredsum.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwsub.c
index 30407af51d753..410b3c89738b1 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vfwsub.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c
index 43721e970d0e4..dfced599a8e0f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vid.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vid.c
index 7c1f89b88daf1..be2734e47b235 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vid.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vid.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/viota.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/viota.c
index cf983abbbaf8b..8b0efa87d6d5c 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/viota.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/viota.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c
index 69d569a7972c8..ca7ab81f4ec8a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone  -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vleff.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vleff.c
index 01bd48544a73b..d88c6b40aedcb 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vleff.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vleff.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlmul.c
index 5f5517299f701..8858fb4d862a4 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlmul.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxei.c
index 61c4df541d4bf..328b3241ec2e5 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxei.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxei.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c
index f278bb76ae630..e027e4fbe1ac6 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +zfh -target-feature +experimental-v \
+// RUN:   -target-feature +zfh -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlse.c
index addb7e99637b1..e35a9e8168bd0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlse.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlse.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c
index deaaf0ed5ff93..d749bb880a743 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlseg.c
@@ -1,12 +1,12 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c
index fba2c179a4446..0b226fd42f8d1 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsegff.c
@@ -1,12 +1,12 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv32 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone \
 // RUN:   -fallow-half-arguments-and-returns -emit-llvm %s -o - \
 // RUN:   | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c
index cfdf8275483f8..eadd275e76f33 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlsseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +zfh -target-feature +experimental-v \
+// RUN:   -target-feature +zfh -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxei.c
index 8988dd7a6c4d7..d24890ff9c7a4 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxei.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxei.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c
index b83ff9833e7b9..5b39bc2957d48 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +zfh -target-feature +experimental-v \
+// RUN:   -target-feature +zfh -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmacc.c
index 2b729b22ceb22..478a67a16a5a0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmacc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadc.c
index cdfc612234c3f..59ba187f6a1d4 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadd.c
index 7157c8e45003a..de7241136561b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmand.c
index 402280d7a8ca2..cda59a4232c54 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmand.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmand.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmax.c
index 17ace86d76fed..17a4c55b87bab 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmax.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmax.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmclr.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmclr.c
index 885c7cebdc76d..2b02c5f8928e5 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmclr.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmclr.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c
index 20ed58adb500e..a512ad402edb6 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmerge.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfeq.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfeq.c
index 89d83e30dac06..7f2c12cb1c9fa 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfeq.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfeq.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfge.c
index 0573895b22872..03fbd2d9aacc5 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfge.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfgt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfgt.c
index 3c098a1d3fd44..6e8d324174695 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfgt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfgt.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfle.c
index d0c62282d5a7c..7ccbd7031a21b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfle.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfle.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmflt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmflt.c
index 8a0c632d10a37..7700c7edf3875 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmflt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmflt.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfne.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfne.c
index 44c07f8395b5a..277d463df6c6c 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfne.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmfne.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmin.c
index 3bf525539d4f9..bb31db3465e65 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmin.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmin.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmmv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmmv.c
index 68b01c5829754..72537647164ef 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmmv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmmv.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnand.c
index 67429dd14cf58..87b603119bdfd 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnand.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnand.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnor.c
index 2cbad2dd5cce9..74f23a17aebd3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnot.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnot.c
index 4e4456400139a..d6b8ee2d05edc 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnot.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmnot.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmor.c
index 416de0408ffa1..a6a9553fadc9e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbc.c
index 5f4947ae98b32..e6615766b0393 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbf.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbf.c
index 736045c1aa6f7..059e4fef4524a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbf.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsbf.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmseq.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmseq.c
index 80d25a68af06e..f8e7800128fbc 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmseq.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmseq.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmset.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmset.c
index 6f2f03940c232..42252132b8c3b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmset.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmset.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsge.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsge.c
index 04f1151644ac3..dcf5c30d25cf2 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsge.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsge.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsgt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsgt.c
index 8eeacfa1572c2..9e7892cd9bc90 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsgt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsgt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsif.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsif.c
index a0f3baefcd469..8cbb7b282b1a3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsif.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsif.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsle.c
index b481452032972..63d72e45a7b54 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsle.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsle.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmslt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmslt.c
index 267e3671bd60b..486a15e0e3cc6 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmslt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmslt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsne.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsne.c
index 7b830321cae81..5d3db18e50463 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsne.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsne.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsof.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsof.c
index ac6e551cf883c..70b142302d432 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsof.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmsof.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c
index 8f49944ed4ddc..5d7f7e504290e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmv.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmv.c
index 53836fbde0257..0f8517f447a13 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmv.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmv.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxnor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxnor.c
index e64dd8e9224d8..4ffb4dc2efa02 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxnor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxnor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxor.c
index 1f32736c1c637..36a543923e5a8 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmxor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnclip.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnclip.c
index 60ed6a2c9dec1..81dc0a7ee0d8f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnclip.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnclip.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vncvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vncvt.c
index f87538e3c3334..6bdb8baec8ab8 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vncvt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vncvt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vneg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vneg.c
index 674fe2ce90819..e83e29b7244c5 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vneg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vneg.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsac.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsac.c
index 52aea8a954556..65b97bdeccd40 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsac.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsac.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsub.c
index e8fae27f84a15..6bc003f150cda 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnmsub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnot.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnot.c
index 8e843863aaae0..0736b3c35f461 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnot.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnot.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +v \
 // RUN:   -target-feature +zfh -disable-O0-optnone -fallow-half-arguments-and-returns -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsra.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsra.c
index 9fe59bbdc5604..f86d5353acd91 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsra.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsra.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsrl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsrl.c
index 57e89636d702e..33d3479ec7abe 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsrl.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vnsrl.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vor.c
index 45ab3783fae5e..95630044f5e3b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredand.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredand.c
index 2da8f15cdad75..a7c1db6c00472 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredand.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredand.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmax.c
index c886985708326..8663c837bc156 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmax.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmax.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmin.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmin.c
index 8cb4711e6b823..2f14a9c36ea2f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmin.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredmin.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredor.c
index 45b23ac73553c..c262ed25567c4 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredsum.c
index 004bbeca5533b..6e246ad7251e1 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredsum.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredsum.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredxor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredxor.c
index e3b2f72e5ecb4..65ab95bdf91c7 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vredxor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vredxor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vreinterpret.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vreinterpret.c
index 8f56bb5aff277..52097da1e6446 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vreinterpret.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vreinterpret.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vrem.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vrem.c
index aba47d40277af..01024f14a0407 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vrem.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vrem.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vrgather.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vrgather.c
index 246d62b30167b..afc7638555f82 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vrgather.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vrgather.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vrsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vrsub.c
index 2e277602f0fae..d7688d2642c4d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vrsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vrsub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsadd.c
index 9ee0cf06efcac..abfe106708dcd 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsbc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsbc.c
index 00b3660c19a75..b3cac73c30f7e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsbc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsbc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vse.c
index 960a84d3ec614..10853e25db05c 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vse.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vse.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone  -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c
index 7c7f778d68db3..f76a5a327d0d2 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvl.c
index 30ef1987507a0..29b59da5240e5 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvl.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvl.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -emit-llvm -o - %s \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -emit-llvm -o - %s \
 // RUN:       | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvlmax.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvlmax.c
index 0aef38ccfa018..d569eb425aa26 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvlmax.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsetvlmax.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -emit-llvm -o - %s \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -emit-llvm -o - %s \
 // RUN:       | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsext.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsext.c
index af844dcc44430..f8561f7d11c12 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsext.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsext.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1down.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1down.c
index a6b1b1948f98b..867fe098640d7 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1down.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1down.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1up.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1up.c
index 2709781a586f2..61d7023eb431c 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1up.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslide1up.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslidedown.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslidedown.c
index 9089a9ac9428a..86b7f9f0ff4a3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslidedown.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslidedown.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslideup.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslideup.c
index 3e5e9e0c53950..ca7cada4e51b6 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vslideup.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vslideup.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsll.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsll.c
index ae3fa6b3fc0ff..a7f2dec736893 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsll.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsll.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c
index 60c8465e9bac3..dd0ba266080a1 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxei.c
index cba2fda30a4a3..642adfbf31c71 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxei.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxei.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c
index ac8c4a412e7d0..1d077cb181647 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsoxseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +zfh -target-feature +experimental-v \
+// RUN:   -target-feature +zfh -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsra.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsra.c
index ee18e20b3236e..ddf1f8bce3c97 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsra.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsra.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsrl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsrl.c
index b620f8812a38c..3150746b841dc 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsrl.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsrl.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsse.c
index dc2bcdd4d9241..48cd6f4ed0653 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsse.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsse.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c
index 0829100f58ca4..47d256dc655f1 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +zfh -target-feature +experimental-v \
+// RUN:   -target-feature +zfh -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssra.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssra.c
index 67e727107ba69..95e9cf843fc66 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssra.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssra.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssrl.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssrl.c
index 2067fbba6144b..fccf57aecfb1d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssrl.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssrl.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c
index b91c023aeede7..73af7a3061c8e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +zfh -target-feature +experimental-v \
+// RUN:   -target-feature +zfh -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssub.c
index f29d895b48c4e..dc16819e04db3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vssub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vssub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsub.c
index 4e8bdb3a7a80b..4275d13f9d714 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxei.c
index 9da42956d516e..0b8b23159954a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxei.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxei.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c
index a6bdc1ad883f3..cb4630dbb92ea 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsuxseg.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +zfh -target-feature +experimental-v \
+// RUN:   -target-feature +zfh -target-feature +v \
 // RUN:   -disable-O0-optnone -emit-llvm %s \
 // RUN:   -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vundefined.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vundefined.c
index 8815848b15186..a69c8e90952b0 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vundefined.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vundefined.c
@@ -1,7 +1,7 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d \
-// RUN:   -target-feature +experimental-v -target-feature +zfh \
+// RUN:   -target-feature +v -target-feature +zfh \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwadd.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwadd.c
index d5bef6564e0e3..77036c4d6e752 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwadd.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwadd.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwcvt.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwcvt.c
index 65f41d555698e..4c4c6a26861b3 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwcvt.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwcvt.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmacc.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmacc.c
index 8c74789447bd0..a0f993feb7c4d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmacc.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmacc.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmul.c
index 58a92db75c825..023133b8f331d 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwredsum.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwredsum.c
index 8b0ff2bcc5f63..dd2295fccfdf9 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwredsum.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwredsum.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwsub.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwsub.c
index de13edb21caba..9422cd4d1b691 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vwsub.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vwsub.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vxor.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vxor.c
index 34ebdf43c7618..be076ea0b5039 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vxor.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vxor.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vzext.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vzext.c
index 29ea205de76c5..c4aad3d8d0491 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vzext.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vzext.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
diff --git a/clang/test/CodeGen/RISCV/rvv_errors.c b/clang/test/CodeGen/RISCV/rvv_errors.c
index 40ec544d0b76b..8316a4313274a 100644
--- a/clang/test/CodeGen/RISCV/rvv_errors.c
+++ b/clang/test/CodeGen/RISCV/rvv_errors.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 %s -triple=riscv64 -target-feature +experimental-v -fsyntax-only -verify
+// RUN: %clang_cc1 %s -triple=riscv64 -target-feature +v -fsyntax-only -verify
 
 void test() {
   __builtin_rvv_vsetvli(1, 7, 0); // expected-error {{argument value 7 is outside the valid range [0, 3]}}
diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c
index 36043124e4565..e81dc7b700ada 100644
--- a/clang/test/Driver/riscv-arch.c
+++ b/clang/test/Driver/riscv-arch.c
@@ -397,35 +397,20 @@
 // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-ZBA %s
 // RV32-ZBA: "-target-feature" "+zba"
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-V-NOFLAG %s
-// RV32-EXPERIMENTAL-V-NOFLAG: error: invalid arch name 'rv32iv'
-// RV32-EXPERIMENTAL-V-NOFLAG: requires '-menable-experimental-extensions'
+// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p1 -### %s -c 2>&1 | \
+// RUN:   FileCheck -check-prefix=RV32-V-BADVERS %s
+// RV32-V-BADVERS: error: invalid arch name 'rv32iv0p1'
+// RV32-V-BADVERS: unsupported version number 0.1 for extension 'v'
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv -menable-experimental-extensions -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-V-NOVERS %s
-// RV32-EXPERIMENTAL-V-NOVERS: error: invalid arch name 'rv32iv'
-// RV32-EXPERIMENTAL-V-NOVERS: experimental extension requires explicit version number
+// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0 -### %s -c 2>&1 | \
+// RUN:   FileCheck -check-prefix=RV32-V-GOODVERS %s
+// RV32-V-GOODVERS: "-target-feature" "+v"
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv0p1 -menable-experimental-extensions -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-V-BADVERS %s
-// RV32-EXPERIMENTAL-V-BADVERS: error: invalid arch name 'rv32iv0p1'
-// RV32-EXPERIMENTAL-V-BADVERS: unsupported version number 0.1 for experimental extension 'v'
-
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0 -menable-experimental-extensions -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-V-GOODVERS %s
-// RV32-EXPERIMENTAL-V-GOODVERS: "-target-feature" "+experimental-v"
+// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b0p1 -### %s -c 2>&1 | \
+// RUN:   FileCheck -check-prefix=RV32-ZVL-BADVERS %s
+// RV32-ZVL-BADVERS: error: invalid arch name 'rv32iv1p0_zvl32b0p1'
+// RV32-ZVL-BADVERS: unsupported version number 0.1 for extension 'zvl32b'
 
 // RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b1p0 -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-NOFLAG %s
-// RV32-EXPERIMENTAL-ZVL-NOFLAG: error: invalid arch name 'rv32iv1p0_zvl32b1p0'
-// RV32-EXPERIMENTAL-ZVL-NOFLAG: requires '-menable-experimental-extensions'
-
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b0p1 -menable-experimental-extensions -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-BADVERS %s
-// RV32-EXPERIMENTAL-ZVL-BADVERS: error: invalid arch name 'rv32iv1p0_zvl32b0p1'
-// RV32-EXPERIMENTAL-ZVL-BADVERS: unsupported version number 0.1 for experimental extension
-
-// RUN: %clang -target riscv32-unknown-elf -march=rv32iv1p0_zvl32b1p0 -menable-experimental-extensions -### %s -c 2>&1 | \
-// RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-ZVL-GOODVERS %s
-// RV32-EXPERIMENTAL-ZVL-GOODVERS: "-target-feature" "+experimental-zvl32b"
+// RUN:   FileCheck -check-prefix=RV32-ZVL-GOODVERS %s
+// RV32-ZVL-GOODVERS: "-target-feature" "+zvl32b"
diff --git a/clang/test/Headers/riscv-vector-header.c b/clang/test/Headers/riscv-vector-header.c
index ce618b5a717e1..70db4d63c276e 100644
--- a/clang/test/Headers/riscv-vector-header.c
+++ b/clang/test/Headers/riscv-vector-header.c
@@ -2,7 +2,7 @@
 
 // RUN: %clang_cc1 -triple riscv64 -fsyntax-only \
 // RUN:   -target-feature +m -target-feature +a -target-feature +f \
-// RUN:   -target-feature +d -target-feature +experimental-v %s
+// RUN:   -target-feature +d -target-feature +v %s
 // expected-no-diagnostics
 
 #include <riscv_vector.h>
diff --git a/clang/test/Preprocessor/riscv-target-features.c b/clang/test/Preprocessor/riscv-target-features.c
index ec356cee7426e..73cb0b1a71c6f 100644
--- a/clang/test/Preprocessor/riscv-target-features.c
+++ b/clang/test/Preprocessor/riscv-target-features.c
@@ -211,10 +211,10 @@
 // CHECK-ZBT-NOT: __riscv_b
 // CHECK-ZBT-EXT: __riscv_zbt 93000{{$}}
 
-// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv32iv1p0 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s
-// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv64-unknown-linux-gnu \
 // RUN: -march=rv64iv1p0 -x c -E -dM %s \
 // RUN: -o - | FileCheck --check-prefix=CHECK-V-EXT %s
 // CHECK-V-EXT: __riscv_v 1000000{{$}}
@@ -236,59 +236,59 @@
 // RUN: -o - | FileCheck --check-prefix=CHECK-ZFH-EXT %s
 // CHECK-ZFH-EXT: __riscv_zfh 1000000{{$}}
 
-// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv64iv1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-V-MINVLEN %s
 // CHECK-V-MINVLEN: __riscv_v_elen 64
 // CHECK-V-MINVLEN: __riscv_v_elen_fp 64
 // CHECK-V-MINVLEN: __riscv_v_min_vlen 128
 
-// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv64iv1p0_zvl256b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL256b %s
 // CHECK-ZVL256b: __riscv_v_min_vlen 256
 
-// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv64iv1p0_zvl512b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL512b %s
 // CHECK-ZVL512b: __riscv_v_min_vlen 512
 
-// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv64iv1p0_zvl1024b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL1024b %s
 // CHECK-ZVL1024b: __riscv_v_min_vlen 1024
 
-// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv64iv1p0_zvl2048b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL2048b %s
 // CHECK-ZVL2048b: __riscv_v_min_vlen 2048
 
-// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv64iv1p0_zvl4096b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL4096b %s
 // CHECK-ZVL4096b: __riscv_v_min_vlen 4096
 
-// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv64iv1p0_zvl8192b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL8192b %s
 // CHECK-ZVL8192b: __riscv_v_min_vlen 8192
 
-// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv64iv1p0_zvl16384b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL16384b %s
 // CHECK-ZVL16384b: __riscv_v_min_vlen 16384
 
-// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv64iv1p0_zvl32768b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL32768b %s
 // CHECK-ZVL32768b: __riscv_v_min_vlen 32768
 
-// RUN: %clang -target riscv32-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv32-unknown-linux-gnu \
 // RUN: -march=rv64iv1p0_zvl65536b1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVL65536b %s
 // CHECK-ZVL65536b: __riscv_v_min_vlen 65536
 
-// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv64-unknown-linux-gnu \
 // RUN: -march=rv64ifdzve64d1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVE64D-EXT %s
 // CHECK-ZVE64D-EXT: __riscv_v_elen 64
@@ -301,7 +301,7 @@
 // CHECK-ZVE64D-EXT: __riscv_zve64f 1000000{{$}}
 // CHECK-ZVE64D-EXT: __riscv_zve64x 1000000{{$}}
 
-// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv64-unknown-linux-gnu \
 // RUN: -march=rv64ifzve64f1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVE64F-EXT %s
 // CHECK-ZVE64F-EXT: __riscv_v_elen 64
@@ -313,7 +313,7 @@
 // CHECK-ZVE64F-EXT: __riscv_zve64f 1000000{{$}}
 // CHECK-ZVE64F-EXT: __riscv_zve64x 1000000{{$}}
 
-// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv64-unknown-linux-gnu \
 // RUN: -march=rv64izve64x1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVE64X-EXT %s
 // CHECK-ZVE64X-EXT: __riscv_v_elen 64
@@ -323,7 +323,7 @@
 // CHECK-ZVE64X-EXT: __riscv_zve32x 1000000{{$}}
 // CHECK-ZVE64X-EXT: __riscv_zve64x 1000000{{$}}
 
-// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv64-unknown-linux-gnu \
 // RUN: -march=rv64ifzve32f1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVE32F-EXT %s
 // CHECK-ZVE32F-EXT: __riscv_v_elen 32
@@ -333,7 +333,7 @@
 // CHECK-ZVE32F-EXT: __riscv_zve32f 1000000{{$}}
 // CHECK-ZVE32F-EXT: __riscv_zve32x 1000000{{$}}
 
-// RUN: %clang -target riscv64-unknown-linux-gnu -menable-experimental-extensions \
+// RUN: %clang -target riscv64-unknown-linux-gnu \
 // RUN: -march=rv64izve32x1p0 -x c -E -dM %s -o - \
 // RUN: | FileCheck --check-prefix=CHECK-ZVE32X-EXT %s
 // CHECK-ZVE32X-EXT: __riscv_v_elen 32
diff --git a/clang/test/Sema/riscv-types.c b/clang/test/Sema/riscv-types.c
index 0d09546603b66..1be20688cc7d5 100644
--- a/clang/test/Sema/riscv-types.c
+++ b/clang/test/Sema/riscv-types.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-v -ast-print %s \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -ast-print %s \
 // RUN:    | FileCheck %s
 
 void bar(void) {
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index ea2d0b8d2f2f8..837226f4e2a54 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -1034,7 +1034,7 @@ void RVVEmitter::createBuiltins(raw_ostream &OS) {
 
   OS << "#if defined(TARGET_BUILTIN) && !defined(RISCVV_BUILTIN)\n";
   OS << "#define RISCVV_BUILTIN(ID, TYPE, ATTRS) TARGET_BUILTIN(ID, TYPE, "
-        "ATTRS, \"experimental-zve32x\")\n";
+        "ATTRS, \"zve32x\")\n";
   OS << "#endif\n";
   for (auto &Def : Defs) {
     auto P =
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index c42f3604d67ff..f8a17f7440f6a 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -58,17 +58,8 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
 
     {"zbkb", RISCVExtensionVersion{1, 0}},
     {"zbkc", RISCVExtensionVersion{1, 0}},
-};
 
-static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
     {"v", RISCVExtensionVersion{1, 0}},
-    {"zbe", RISCVExtensionVersion{0, 93}},
-    {"zbf", RISCVExtensionVersion{0, 93}},
-    {"zbm", RISCVExtensionVersion{0, 93}},
-    {"zbp", RISCVExtensionVersion{0, 93}},
-    {"zbr", RISCVExtensionVersion{0, 93}},
-    {"zbt", RISCVExtensionVersion{0, 93}},
-
     {"zvl32b", RISCVExtensionVersion{1, 0}},
     {"zvl64b", RISCVExtensionVersion{1, 0}},
     {"zvl128b", RISCVExtensionVersion{1, 0}},
@@ -88,6 +79,15 @@ static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
     {"zve64d", RISCVExtensionVersion{1, 0}},
 };
 
+static const RISCVSupportedExtension SupportedExperimentalExtensions[] = {
+    {"zbe", RISCVExtensionVersion{0, 93}},
+    {"zbf", RISCVExtensionVersion{0, 93}},
+    {"zbm", RISCVExtensionVersion{0, 93}},
+    {"zbp", RISCVExtensionVersion{0, 93}},
+    {"zbr", RISCVExtensionVersion{0, 93}},
+    {"zbt", RISCVExtensionVersion{0, 93}},
+};
+
 static bool stripExperimentalPrefix(StringRef &Ext) {
   return Ext.consume_front("experimental-");
 }
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 2b6ea4067a8ea..dea042348d4db 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -185,48 +185,48 @@ def HasRVCHints : Predicate<"Subtarget->enableRVCHintInstrs()">,
                   AssemblerPredicate<(all_of(not FeatureNoRVCHints)),
                                       "RVC Hint Instructions">;
 
-def FeatureStdExtZvl32b : SubtargetFeature<"experimental-zvl32b", "ZvlLen", "ExtZvl::Zvl32b",
+def FeatureStdExtZvl32b : SubtargetFeature<"zvl32b", "ZvlLen", "ExtZvl::Zvl32b",
                        "'Zvl' (Minimum Vector Length) 32">;
 
 foreach i = { 6-15 } in {
     defvar I = !shl(1, i);
     def FeatureStdExtZvl#I#b :
-        SubtargetFeature<"experimental-zvl"#I#"b", "ZvlLen", "ExtZvl::Zvl"#I#"b",
+        SubtargetFeature<"zvl"#I#"b", "ZvlLen", "ExtZvl::Zvl"#I#"b",
                         "'Zvl' (Minimum Vector Length) "#I,
                         [!cast<SubtargetFeature>("FeatureStdExtZvl"#!srl(I, 1)#"b")]>;
 }
 
 def FeatureStdExtZve32x
-    : SubtargetFeature<"experimental-zve32x", "HasStdExtZve32x", "true",
+    : SubtargetFeature<"zve32x", "HasStdExtZve32x", "true",
                        "'Zve32x' (Vector Extensions for Embedded Processors "
                        "with maximal 32 EEW)",
                        [FeatureStdExtZvl32b]>;
 
 def FeatureStdExtZve32f
-    : SubtargetFeature<"experimental-zve32f", "HasStdExtZve32f", "true",
+    : SubtargetFeature<"zve32f", "HasStdExtZve32f", "true",
                        "'Zve32f' (Vector Extensions for Embedded Processors "
                        "with maximal 32 EEW and F extension)",
                        [FeatureStdExtZve32x]>;
 
 def FeatureStdExtZve64x
-    : SubtargetFeature<"experimental-zve64x", "HasStdExtZve64x", "true",
+    : SubtargetFeature<"zve64x", "HasStdExtZve64x", "true",
                        "'Zve64x' (Vector Extensions for Embedded Processors "
                        "with maximal 64 EEW)", [FeatureStdExtZve32x, FeatureStdExtZvl64b]>;
 
 def FeatureStdExtZve64f
-    : SubtargetFeature<"experimental-zve64f", "HasStdExtZve64f", "true",
+    : SubtargetFeature<"zve64f", "HasStdExtZve64f", "true",
                        "'Zve64f' (Vector Extensions for Embedded Processors "
                        "with maximal 64 EEW and F extension)",
                        [FeatureStdExtZve32f, FeatureStdExtZve64x]>;
 
 def FeatureStdExtZve64d
-    : SubtargetFeature<"experimental-zve64d", "HasStdExtZve64d", "true",
+    : SubtargetFeature<"zve64d", "HasStdExtZve64d", "true",
                        "'Zve64d' (Vector Extensions for Embedded Processors "
                        "with maximal 64 EEW, F and D extension)",
                        [FeatureStdExtZve64f]>;
 
 def FeatureStdExtV
-    : SubtargetFeature<"experimental-v", "HasStdExtV", "true",
+    : SubtargetFeature<"v", "HasStdExtV", "true",
                        "'V' (Vector Extension for Application Processors)",
                        [FeatureStdExtZvl128b, FeatureStdExtZve64d, FeatureStdExtF, FeatureStdExtD]>;
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll
index 012a02ff881a9..baa1d412abf26 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-gather.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -cost-model -analyze -mtriple=riscv64 -mattr=+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s 2>%t | FileCheck %s
+; RUN: opt -cost-model -analyze -mtriple=riscv64 -mattr=+v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s 2>%t | FileCheck %s
 ; Check that we don't crash querying costs when vectors are not enabled.
 ; RUN: opt -cost-model -analyze -mtriple=riscv64
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll
index fcb576c5ed385..d27712d155ef0 100644
--- a/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/fixed-vector-scatter.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -cost-model -analyze -mtriple=riscv64 -mattr=+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s 2>%t | FileCheck %s
+; RUN: opt -cost-model -analyze -mtriple=riscv64 -mattr=+v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s 2>%t | FileCheck %s
 ; Check that we don't crash querying costs when vectors are not enabled.
 ; RUN: opt -cost-model -analyze -mtriple=riscv64
 
diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
index 56c175e38e9fd..497c16d2e2a6e 100644
--- a/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/rvv-shuffle.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; Check getShuffleCost for scalable vector
 
-; RUN: opt -cost-model -analyze -mtriple=riscv64 -mattr=+m,+experimental-v < %s | FileCheck %s
+; RUN: opt -cost-model -analyze -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s
 
 define void  @vector_broadcast() {
 ; CHECK-LABEL: 'vector_broadcast'
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index 19ba02d531567..d37d86fc2f04d 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -17,8 +17,8 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbr %s -o - | FileCheck --check-prefix=RV32ZBR %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zbs %s -o - | FileCheck --check-prefix=RV32ZBS %s
 ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt %s -o - | FileCheck --check-prefix=RV32ZBT %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV32V %s
-; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV32COMBINED %s
+; RUN: llc -mtriple=riscv32 -mattr=+v %s -o - | FileCheck --check-prefix=RV32V %s
+; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+v,+f %s -o - | FileCheck --check-prefix=RV32COMBINED %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV32ZBKB %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV32ZBKC %s
 ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefix=RV64M %s
@@ -38,8 +38,8 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbr %s -o - | FileCheck --check-prefix=RV64ZBR %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zbs %s -o - | FileCheck --check-prefix=RV64ZBS %s
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt %s -o - | FileCheck --check-prefix=RV64ZBT %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v %s -o - | FileCheck --check-prefix=RV64V %s
-; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+experimental-v,+f %s -o - | FileCheck --check-prefix=RV64COMBINED %s
+; RUN: llc -mtriple=riscv64 -mattr=+v %s -o - | FileCheck --check-prefix=RV64V %s
+; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+v,+f %s -o - | FileCheck --check-prefix=RV64COMBINED %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV64ZBKB %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV64ZBKC %s
 
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
index 6a144032e866a..33dd10ad52013 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat_vec.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+v -verify-machineinstrs < %s | FileCheck %s
 
 ; i32 saturate
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll
index 575a09b7922b9..c00df3ff4ff3b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/abs-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x i16> @llvm.abs.nxv1i16(<vscale x 1 x i16>, i1)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll
index e46879448d6c2..4216c5952eae8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/access-fixed-objects-by-rvv.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -O2 < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV64IV
 
 define <vscale x 1 x i64> @access_fixed_object(i64 *%val) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
index b9be04a3306d3..8bef6530a0081 100644
--- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=riscv64 -mattr=+experimental-v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
+# RUN: llc -march=riscv64 -mattr=+v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
 
 --- |
   define void @add_scalable_offset(
diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
index 065e5dee2f930..ae64fbc93e237 100644
--- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \
 ; RUN:    | FileCheck %s
 
 define void @lmul1() nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll
index d9ef5cc385f93..8451aeec34c67 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @bitreverse_nxv1i8(<vscale x 1 x i8> %va) {
 ; CHECK-LABEL: bitreverse_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll
index 8c1fa06861681..2b81cac3d67ea 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i16> @bswap_nxv1i16(<vscale x 1 x i16> %va) {
 ; CHECK-LABEL: bswap_nxv1i16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
index ca0abc5e40031..37fd1a7424c3e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 define fastcc <vscale x 4 x i8> @ret_nxv4i8(<vscale x 4 x i8>* %p) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
index 009b71437bf9c..39fbdf32e6a52 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v < %s | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v < %s | FileCheck %s --check-prefix=RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s --check-prefix=RV64
 
 ; Check that we correctly scale the split part indirect offsets by VSCALE.
 define <vscale x 32 x i32> @callee_scalable_vector_split_indirect(<vscale x 32 x i32> %x, <vscale x 32 x i32> %y) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll
index 5400f86e75752..8f362b6f4d19d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-sats.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s \
 ; RUN:     | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s \
 ; RUN:     | FileCheck %s --check-prefixes=CHECK,RV64
 
 ; fold (add (umax X, C), -C) --> (usubsat X, C)
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll
index 1e0180f9ad97e..969f37b338d53 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s
 
 ; fold (and (or x, C), D) -> D if (C & D) == D
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/combine-store-fp.ll
index b7ef676a67258..fdbdc37df47c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/combine-store-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/combine-store-fp.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s
 
 define void @combine_fp_zero_stores_crash(float* %ptr)  {
 ; CHECK-LABEL: combine_fp_zero_stores_crash:
diff --git a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll
index 2b8accc38454c..daad5e051c9d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/common-shuffle-patterns.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s
 
 target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
 target triple = "riscv64-unknown-unknown-elf"
diff --git a/llvm/test/CodeGen/RISCV/rvv/commuted-op-indices-regression.mir b/llvm/test/CodeGen/RISCV/rvv/commuted-op-indices-regression.mir
index c319ba59c2967..870e99f1290b5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/commuted-op-indices-regression.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/commuted-op-indices-regression.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=riscv64 -mattr=+experimental-v -run-pass=simple-register-coalescing %s -o - 2>&1 | FileCheck %s
+# RUN: llc -march=riscv64 -mattr=+v -run-pass=simple-register-coalescing %s -o - 2>&1 | FileCheck %s
 
 # This test used to crash in the register coalescer when the target would
 # return the out-of-bounds CommuteAnyOperandIndex for one of its commutable
diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
index 31bde5a895315..5850c7c1937e3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 ; These tests check that the scalable-vector version of this series of
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index b399de99ccd92..7fb412094054c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32D
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64D
+; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32D
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64D
 
 define <vscale x 1 x i8> @ctlz_nxv1i8(<vscale x 1 x i8> %va) {
 ; RV32I-LABEL: ctlz_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll
index 0a5b2bf3caa01..21de10f5f87ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @ctpop_nxv1i8(<vscale x 1 x i8> %va) {
 ; CHECK-LABEL: ctpop_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index f5148450e1c23..3326407581171 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32D
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64D
+; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32D
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64D
 
 define <vscale x 1 x i8> @cttz_nxv1i8(<vscale x 1 x i8> %va) {
 ; RV32I-LABEL: cttz_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/debug-info-rvv-dbg-value.mir b/llvm/test/CodeGen/RISCV/rvv/debug-info-rvv-dbg-value.mir
index 574905662f8b5..67865ce5f433f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/debug-info-rvv-dbg-value.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/debug-info-rvv-dbg-value.mir
@@ -1,6 +1,6 @@
-# RUN: llc -march=riscv64 -mattr=+experimental-v -o %t0 -filetype=obj \
+# RUN: llc -march=riscv64 -mattr=+v -o %t0 -filetype=obj \
 # RUN:   -start-before=prologepilog %s
-# RUN: llc -march=riscv64 -mattr=+experimental-v -o %t1 -filetype=obj \
+# RUN: llc -march=riscv64 -mattr=+v -o %t1 -filetype=obj \
 # RUN:   -frame-pointer=all -start-before=prologepilog %s
 # RUN: llvm-dwarfdump --name="value0" %t0 | FileCheck %s --check-prefix=CHECK0-PLUS
 # RUN: llvm-dwarfdump --name="value1" %t0 | FileCheck %s --check-prefix=CHECK1-PLUS
diff --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
index 8b33e981854d3..9c283bc3d6830 100644
--- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v -run-pass=prologepilog -o - \
+# RUN: llc -mtriple riscv64 -mattr=+m,+v -run-pass=prologepilog -o - \
 # RUN:     -verify-machineinstrs %s | FileCheck %s
 --- |
   target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
diff --git a/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
index 596cc0a7681d4..5ecfae181a707 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i8> @sextload_nxv1i1_nxv1i8(<vscale x 1 x i1>* %x) {
 ; CHECK-LABEL: sextload_nxv1i1_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
index e99b7b0f79043..d8736937e6eba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extract-subvector.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv32 -mattr=+m,+d,+zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple riscv64 -mattr=+m,+d,+zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple riscv32 -mattr=+m,+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -mattr=+m,+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 4 x i32> @extract_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec) {
 ; CHECK-LABEL: extract_nxv8i32_nxv4i32_0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll
index 2964d2aea09b9..950ca3bde2c88 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define half @extractelt_nxv1f16_0(<vscale x 1 x half> %v) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll
index 1690f42ff036f..b5525d2337047 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define half @extractelt_nxv1f16_0(<vscale x 1 x half> %v) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
index 27d609e6f8a7c..d712ced930116 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define i1 @extractelt_nxv1i1(<vscale x 1 x i8>* %x, i64 %idx) nounwind {
 ; CHECK-LABEL: extractelt_nxv1i1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll
index c50881a4b63bf..2062aaa184125 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define signext i8 @extractelt_nxv1i8_0(<vscale x 1 x i8> %v) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
index 0c0685697eb11..281182ba33f09 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define signext i8 @extractelt_nxv1i8_0(<vscale x 1 x i8> %v) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
index d8f16d90d5353..ccdbf4c08f484 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @ceil_nxv1f16(<vscale x 1 x half> %x) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
index 210136734821b..f3e59dc4281fb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @floor_nxv1f16(<vscale x 1 x half> %x) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-negative.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-negative.ll
index 1bc2adb71c720..376f87345494f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-negative.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store-negative.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=256 | FileCheck %s
+; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s
 
 ; This contains negative tests for the strided load/store recognition in
 ; RISCVGatherScatterLowering.cpp
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll
index bde5c515a1425..bddbcdb5fb84d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-strided-load-store.ll
@@ -1,5 +1,5 @@
-; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=256 | FileCheck %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefix=CHECK-ASM
+; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=256 | FileCheck %s --check-prefix=CHECK-ASM
 
 %struct.foo = type { i32, i32, i32, i32 }
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
index 519f042fd92a0..fee2145d9c44a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64
 
 define void @abs_v16i8(<16 x i8>* %x) {
 ; CHECK-LABEL: abs_v16i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll
index ba251f9fd3352..bbb2c2e6ae280 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=256 < %s | FileCheck %s --check-prefix=VLEN256
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=512 < %s | FileCheck %s --check-prefix=VLEN512
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=1024 < %s | FileCheck %s --check-prefix=VLEN1024
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=256 < %s | FileCheck %s --check-prefix=VLEN256
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=512 < %s | FileCheck %s --check-prefix=VLEN512
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=1024 < %s | FileCheck %s --check-prefix=VLEN1024
 
 define <512 x i8> @bitcast_1024B(<256 x i16> %a, <512 x i8> %b) {
 ; VLEN256-LABEL: bitcast_1024B:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll
index 3043745e9f927..11952a6e959a5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll
@@ -1,15 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:     -riscv-v-vector-bits-min=128 -target-abi=ilp32d < %s \
 ; RUN:     | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:     -riscv-v-vector-bits-min=128 -target-abi=lp64d < %s \
 ; RUN:     | FileCheck %s --check-prefixes=CHECK,RV64
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:     -riscv-v-vector-bits-min=128 \
 ; RUN:     -riscv-v-fixed-length-vector-elen-max=32 -target-abi=ilp32d < %s \
 ; RUN:     | FileCheck %s --check-prefixes=ELEN32,RV32ELEN32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:     -riscv-v-vector-bits-min=128 \
 ; RUN:     -riscv-v-fixed-length-vector-elen-max=32 -target-abi=lp64d < %s \
 ; RUN:     | FileCheck %s --check-prefixes=ELEN32,RV64ELEN32
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
index bde7f07d1b53b..e97a7c98f4dcd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
 
 define void @bitreverse_v8i16(<8 x i16>* %x, <8 x i16>* %y) {
 ; LMULMAX2-RV32-LABEL: bitreverse_v8i16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
index b27469939cf3f..b02c4f6383710 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
 
 define void @bswap_v8i16(<8 x i16>* %x, <8 x i16>* %y) {
 ; LMULMAX2-RV32-LABEL: bswap_v8i16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
index 1afa7b94221ae..90900c60ee44a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
 
 define fastcc <4 x i8> @ret_v4i8(<4 x i8>* %p) {
 ; CHECK-LABEL: ret_v4i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
index 13b856e00a7c5..bc6599de8e676 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
 
 define <4 x i8> @ret_v4i8(<4 x i8>* %p) {
 ; CHECK-LABEL: ret_v4i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index b53641f5cb33c..a67d54f8e7275 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -1,14 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32I
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64I
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32D
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64D
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32D
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64D
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV64
 
 define void @ctlz_v16i8(<16 x i8>* %x, <16 x i8>* %y) nounwind {
 ; LMULMAX2-RV32-LABEL: ctlz_v16i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
index 24afb1d6ee5c6..afa391bc83e39 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
 
 define void @ctpop_v16i8(<16 x i8>* %x, <16 x i8>* %y) {
 ; CHECK-LABEL: ctpop_v16i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 2a7680a22acab..3059d1777c362 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -1,14 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32I
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64I
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32D
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64D
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV32,LMULMAX2-RV32D
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX2-RV64,LMULMAX2-RV64D
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LMULMAX8-RV64
 
 define void @cttz_v16i8(<16 x i8>* %x, <16 x i8>* %y) nounwind {
 ; LMULMAX2-RV32-LABEL: cttz_v16i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
index d84679b8b0c45..99f64e73af2cc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
-; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-zve32f -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-zve32f -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+d,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zve32f -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zve32f -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 ; Test that limiting ELEN, either through the command line or zve32, scalarizes
 ; elements larger than that and disables some fractional LMULs.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-emergency-slot.mir
index 21c01a82dba4e..c2fece9dbf6ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-emergency-slot.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-emergency-slot.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-# RUN: llc -mtriple riscv64 -mattr=+experimental-v -start-before=prologepilog -o - \
+# RUN: llc -mtriple riscv64 -mattr=+v -start-before=prologepilog -o - \
 # RUN:     -verify-machineinstrs %s | FileCheck %s
 --- |
   target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
index 063d4f380faf3..df657d83d5e7f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
 
 define <2 x i16> @sextload_v2i1_v2i16(<2 x i1>* %x) {
 ; CHECK-LABEL: sextload_v2i1_v2i16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
index d9ade0f525469..e2236b3249d1f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define i1 @extractelt_v1i1(<1 x i8>* %x, i64 %idx) nounwind {
 ; CHECK-LABEL: extractelt_v1i1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
index 633251e96362c..7eb5bd53df45c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
 
 define void @extract_v2i8_v4i8_0(<4 x i8>* %x, <2 x i8>* %y) {
 ; CHECK-LABEL: extract_v2i8_v4i8_0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index be851b3c2458f..124552d37a126 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define i8 @extractelt_v16i8(<16 x i8>* %x) nounwind {
 ; CHECK-LABEL: extractelt_v16i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll
index 33923f299a940..bc829dd463404 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-bitcast.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV32-FP
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV64-FP
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV32-FP
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=CHECK,RV64-FP
 
 define i16 @bitcast_v1f16_i16(<1 x half> %a) {
 ; CHECK-LABEL: bitcast_v1f16_i16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index fb752e82a4ed6..d1a5b516d7f9c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
 
 ; Tests that a floating-point build_vector doesn't try and generate a VID
 ; instruction
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
index 5050af573bfd5..a87128bb9b8de 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-conv.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
 
 define void @fpext_v2f16_v2f32(<2 x half>* %x, <2 x float>* %y) {
 ; CHECK-LABEL: fpext_v2f16_v2f32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
index 3f57fcea6acc5..14ec588a640b7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+zfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512
 
 ; Test optimizing interleaves to widening arithmetic.
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
index f31cfe1bd5518..32d272512c0b6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
 
 define void @fcmp_oeq_vv_v8f16(<8 x half>* %x, <8 x half>* %y, <8 x i1>* %z) {
 ; CHECK-LABEL: fcmp_oeq_vv_v8f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
index 25a078cd5239b..ecd4c85b4f56f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <4 x half> @shuffle_v4f16(<4 x half> %x, <4 x half> %y) {
 ; CHECK-LABEL: shuffle_v4f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll
index c6070436767a3..9fb7b4927f71f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-splat.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
 
 define void @splat_v8f16(<8 x half>* %x, half %y) {
 ; CHECK-LABEL: splat_v8f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll
index 9e3988caba6fd..1478249d2ec9c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
 
 define void @gather_const_v8f16(<8 x half>* %x) {
 ; CHECK-LABEL: gather_const_v8f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index fd1a5cb54192e..cb316141e5b72 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
 
 define void @fadd_v8f16(<8 x half>* %x, <8 x half>* %y) {
 ; CHECK-LABEL: fadd_v8f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index f125dfd67c048..0f3efd2daaacd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
 
 define void @fp2si_v2f32_v2i32(<2 x float>* %x, <2 x i32>* %y) {
 ; CHECK-LABEL: fp2si_v2f32_v2i32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
index 8d496e40a466a..9d0a00df4744d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
 
 define void @si2fp_v2i32_v2f32(<2 x i32>* %x, <2 x float>* %y) {
 ; CHECK-LABEL: si2fp_v2i32_v2f32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll
index b375720256434..4bc46c60a87f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <1 x i1> @insertelt_v1i1(<1 x i1> %x, i1 %elt) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
index 01a8870ab2216..6bee4f83a47d2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v --riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -early-live-intervals -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
 
 define <vscale x 8 x i32> @insert_nxv8i32_v2i32_0(<vscale x 8 x i32> %vec, <2 x i32>* %svp) {
 ; CHECK-LABEL: insert_nxv8i32_v2i32_0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index 79195533f43c7..af2737a765a79 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 ; FIXME: This codegen needs to be improved. These tests previously asserted
 ; type legalizing the i64 type on RV32.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 5a6b1f2126edb..f4bb61440f57d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define void @buildvec_vid_v16i8(<16 x i8>* %x) {
 ; CHECK-LABEL: buildvec_vid_v16i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
index 89dd2bded9402..4482c653ad4d1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
 
 define void @sext_v4i8_v4i32(<4 x i8>* %x, <4 x i32>* %z) {
 ; CHECK-LABEL: sext_v4i8_v4i32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
index a17a83169373b..f42e327942f1d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV32-V128
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V128,RV64-V128
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV32-V512
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,V512,RV64-V512
 
 ; Test optimizing interleaves to widening arithmetic.
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll
index 40479b0fb343b..9d0fb925a46e2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
 
 ; FIXME: We use exclusively byte types here because the MVT we use for the
 ; stores is calculated assuming byte elements. We need to deal with mismatched
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 1b8f1d246cd20..b727a27d53a2b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <4 x i16> @shuffle_v4i16(<4 x i16> %x, <4 x i16> %y) {
 ; CHECK-LABEL: shuffle_v4i16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
index 9e3c5ff0e4854..194bf63ccc553 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV32
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV64
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV32
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV32
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8,LMULMAX8-RV64
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2,LMULMAX2-RV64
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1,LMULMAX1-RV64
 
 define void @splat_v16i8(<16 x i8>* %x, i8 %y) {
 ; CHECK-LABEL: splat_v16i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll
index 7cd9807bff727..576de147329cc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
 
 define void @gather_const_v16i8(<16 x i8>* %x) {
 ; CHECK-LABEL: gather_const_v16i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index faa8ff5cd2e95..2b3138cc76e73 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2,LMULMAX2-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2,LMULMAX2-RV64
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2,LMULMAX2-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2,LMULMAX2-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1,LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1,LMULMAX1-RV64
 
 define void @add_v16i8(<16 x i8>* %x, <16 x i8>* %y) {
 ; CHECK-LABEL: add_v16i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll
index 632fe1929512d..1b8da18a3d140 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-marith-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s
 
 declare <1 x i1> @llvm.vp.and.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index 56b3e3d89cb72..6dffc1bd6fd4d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -1,19 +1,19 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX1
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX4
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX4
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX8
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX8
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX2
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX2
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX4
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX4
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32,RV32-LMULMAX8
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64,RV64-LMULMAX8
 ; Test with ELEN limited
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32-ELEN,RV32-ELEN32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64-ELEN,RV64-ELEN32
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=16 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32-ELEN,RV32-ELEN16
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=16 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64-ELEN,RV64-ELEN16
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32-ELEN,RV32-ELEN8
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64-ELEN,RV64-ELEN8
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32-ELEN,RV32-ELEN32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64-ELEN,RV64-ELEN32
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=16 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32-ELEN,RV32-ELEN16
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=16 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64-ELEN,RV64-ELEN16
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32-ELEN,RV32-ELEN8
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-elen-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64-ELEN,RV64-ELEN8
 
 define <1 x i1> @buildvec_mask_nonconst_v1i1(i1 %x) {
 ; CHECK-LABEL: buildvec_mask_nonconst_v1i1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll
index d7ff4203017c4..924d20e8aff80 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64
 
 define void @load_store_v1i1(<1 x i1>* %x, <1 x i1>* %y) {
 ; CHECK-LABEL: load_store_v1i1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll
index 73968f06a963a..c37e4f8d882b1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-logic.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
 
 define void @and_v8i1(<8 x i1>* %x, <8 x i1>* %y) {
 ; CHECK-LABEL: and_v8i1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
index 644a39aada178..21cb21cae7671 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1-RV64
 
 define void @splat_ones_v1i1(<1 x i1>* %x) {
 ; CHECK-LABEL: splat_ones_v1i1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index 4af789ee9963d..c3f7772878d44 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
 
 declare <1 x i8> @llvm.masked.gather.v1i8.v1p0i8(<1 x i8*>, i32, <1 x i1>, <1 x i8>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll
index d75f1e3530b04..49569bbd23822 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define void @masked_load_v1f16(<1 x half>* %a, <1 x half>* %m_ptr, <1 x half>* %res_ptr) nounwind {
 ; CHECK-LABEL: masked_load_v1f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
index 55edce865c5fb..39bcc2dc83d41 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define void @masked_load_v1i8(<1 x i8>* %a, <1 x i8>* %m_ptr, <1 x i8>* %res_ptr) nounwind {
 ; CHECK-LABEL: masked_load_v1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index 5ffb77f09e2b8..f357ae3575122 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
 
 declare void @llvm.masked.scatter.v1i8.v1p0i8(<1 x i8>, <1 x i8*>, i32, <1 x i1>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll
index 33824b182ab91..79d9943422b6e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define void @masked_store_v1f16(<1 x half>* %val_ptr, <1 x half>* %a, <1 x half>* %m_ptr) nounwind {
 ; CHECK-LABEL: masked_store_v1f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll
index 035f9021fd856..01492be088f06 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define void @masked_store_v1i8(<1 x i8>* %val_ptr, <1 x i8>* %a, <1 x i8>* %m_ptr) nounwind {
 ; CHECK-LABEL: masked_store_v1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll
index 3410a1a836167..54fae0066599a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
 
 declare half @llvm.vp.reduce.fadd.v2f16(half, <2 x half>, <2 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
index e37227ef22261..a2727051affec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128  -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128  -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare half @llvm.vector.reduce.fadd.v1f16(half, <1 x half>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
index 405d7aa6b98f9..2ec06885496e9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare i8 @llvm.vp.reduce.add.v2i8(i8, <2 x i8>, <2 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
index 8ee206dd8e164..4844db616b6fb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128  -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128  -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare i8 @llvm.vector.reduce.add.v1i8(<1 x i8>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll
index 59ebdcdbc0485..7577eaaba491d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-mask-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s
 
 declare i1 @llvm.vp.reduce.and.v1i1(i1, <1 x i1>, <1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
index 59c6197aadd97..994d2ace14b64 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
 
 define <2 x half> @select_v2f16(i1 zeroext %c, <2 x half> %a, <2 x half> %b) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll
index d452e525c6788..c38023a8d6488 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <1 x i1> @select_v1i1(i1 zeroext %c, <1 x i1> %a, <1 x i1> %b) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv32.ll
index 129cbd1060beb..bc9a583dbbbe7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
 
 declare <2 x i8> @llvm.experimental.stepvector.v2i8()
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll
index 3c3e70c6b49d3..b1cfc28e9ed3d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2
 
 declare <2 x i8> @llvm.experimental.stepvector.v2i8()
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll
index 97a2e85f4f46e..b6b64d84bb3b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+experimental-v \
+; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb,+v \
 ; RUN:     -riscv-v-vector-bits-min=128 | FileCheck %s
 
 ; This test loads to values and stores them in reversed order. This previously
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
index 6402c25d068cd..d6d1e20b7013e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefix=RV64
 
 define <4 x i32> @load_v4i32_align1(<4 x i32>* %ptr) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
index bd29f51a93919..6d451865124d9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <8 x i7> @llvm.vp.add.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll
index 419a9d4b5b1bc..01a146d155b13 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <8 x i7> @llvm.vp.and.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll
index b7dac4e289044..8605aa1f2114e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdiv-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <8 x i7> @llvm.vp.sdiv.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll
index 246dd3100cb1f..443af68b3eee7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vdivu-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <8 x i7> @llvm.vp.udiv.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
index 215daf4f92f51..86aa970c865f3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfadd-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
 
 declare <2 x half> @llvm.vp.fadd.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll
index 2135fdfe96dbc..ad5e4a6031125 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfdiv-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
 
 declare <2 x half> @llvm.vp.fdiv.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
index ea76152cc2b74..b7d76815e7d32 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
 
 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
index 67e07b5b9f137..dfdfd8b42010f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
 
 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll
index 06c0619a80544..45274f8568aaa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmul-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
 
 declare <2 x half> @llvm.vp.fmul.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll
index 836ccd5581d14..c7a4364b8d497 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
 
 declare <2 x half> @llvm.vp.fdiv.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll
index 60027d26be99a..bdf7c6601d8d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
 
 declare <2 x half> @llvm.vp.fsub.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll
index cfcf8c18f7a29..3d9a771958f20 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsub-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
 
 declare <2 x half> @llvm.vp.fsub.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
index 976d12eab3d72..787e0c2426457 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmul-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <8 x i7> @llvm.vp.mul.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
index ad00d1e4e0abe..84f03cc78e601 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vnsra-vnsrl.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
 
 define <8 x i8> @vnsra_v8i16_v8i8_scalar(<8 x i16> %x, i16 %y) {
 ; CHECK-LABEL: vnsra_v8i16_v8i8_scalar:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll
index b1807c1b29de6..00604c38a6bae 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vor-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <8 x i7> @llvm.vp.or.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
index d41fd7bb068c5..7584a1507e706 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
 
 declare <2 x i8> @llvm.vp.gather.v2i8.v2p0i8(<2 x i8*>, <2 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
index ad21c399c15f6..cf858875d8285 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
 
 declare <2 x i8> @llvm.vp.load.v2i8.p0v2i8(<2 x i8>*, <2 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
index 1e5db6e8ae907..1d5cad5d40212 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
 
 declare void @llvm.vp.scatter.v2i8.v2p0i8(<2 x i8>, <2 x i8*>, <2 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll
index 9781843701362..08f659c2ffea2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
 
 declare void @llvm.vp.store.v2i8.p0v2i8(<2 x i8>, <2 x i8>*, <2 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
index f519a81965aee..8bee712ce4782 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vreductions-mask.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8
 
 declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll
index 9d58cb6bc49a5..380e0096b3f04 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrem-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <8 x i7> @llvm.vp.srem.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll
index 30be5a51002e6..4585a8841008b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vremu-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <8 x i7> @llvm.vp.urem.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll
index 3bb9b61b949e3..f17feb93b84d4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vrsub-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <2 x i8> @llvm.vp.sub.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll
index 293a94ff1e422..dac996a262594 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8>, <2 x i8>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll
index 057687fc41a52..f3a288c4b2aad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8>, <2 x i8>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
index f57d1ee56f059..a28bfa17144e5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
 
 declare <1 x i1> @llvm.vp.select.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
index 154629079ce7e..fc5a136633b4a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+experimental-v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+f,+d -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s
 
 define void @vselect_vv_v8i32(<8 x i32>* %a, <8 x i32>* %b, <8 x i1>* %cc, <8 x i32>* %z) {
 ; CHECK-LABEL: vselect_vv_v8i32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll
index 2c288ecc9480c..ecbdaef193f96 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vshl-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <8 x i7> @llvm.vp.shl.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll
index 84f7a2e349033..54a4de66668ae 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsra-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <8 x i7> @llvm.vp.ashr.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll
index 265acfbf82c0e..75fc113854fbc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsrl-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <8 x i7> @llvm.vp.lshr.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll
index f439cefb128b8..e9ef8b6971acb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8>, <2 x i8>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll
index 6eaadbd4c79e1..1a7cba285437f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <2 x i8> @llvm.usub.sat.v2i8(<2 x i8>, <2 x i8>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll
index e4d100c44407f..3f039c61f6230 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsub-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <8 x i7> @llvm.vp.sub.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmacc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmacc.ll
index 3bcd29572a494..86dd1a216cc53 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmacc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmacc.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
 
 define <2 x i16> @vwmacc_v2i16(<2 x i8>* %x, <2 x i8>* %y, <2 x i16> %z) {
 ; CHECK-LABEL: vwmacc_v2i16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmaccu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmaccu.ll
index fe97f7894cfd7..358f7eb7b992a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmaccu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmaccu.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
 
 define <2 x i16> @vwmaccu_v2i16(<2 x i8>* %x, <2 x i8>* %y, <2 x i16> %z) {
 ; CHECK-LABEL: vwmaccu_v2i16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
index d2553b8accb20..e7180587f9779 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <2 x i16> @vwmul_v2i16(<2 x i8>* %x, <2 x i8>* %y) {
 ; CHECK-LABEL: vwmul_v2i16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll
index 4251922284900..8e9a6598acd1c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
 
 define <2 x i16> @vwmulu_v2i16(<2 x i8>* %x, <2 x i8>* %y) {
 ; CHECK-LABEL: vwmulu_v2i16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll
index 207f12ff3822e..bd4a53f7b8d7e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vxor-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <8 x i7> @llvm.vp.xor.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll
index 31614ed25bbe1..dc2bbe8bfbb25 100644
--- a/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/frameindex-addr.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -stop-after=finalize-isel < %s | FileCheck %s
 
 ; This test makes sure we match FrameIndex into the base address.
 ; Done as a MIR test because eliminateFrameIndex will likely turn it
diff --git a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll
index dfc698ded242c..cdfdd889ed01c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ftrunc-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @trunc_nxv1f16(<vscale x 1 x half> %x) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/get-vlen-debugloc.mir b/llvm/test/CodeGen/RISCV/rvv/get-vlen-debugloc.mir
index 8da8785956668..04ec1f3a24f48 100644
--- a/llvm/test/CodeGen/RISCV/rvv/get-vlen-debugloc.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/get-vlen-debugloc.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=riscv64 -mattr=+experimental-v -o - %s \
+# RUN: llc -march=riscv64 -mattr=+v -o - %s \
 # RUN:   -stop-after=prologepilog | FileCheck %s
 
 --- |
diff --git a/llvm/test/CodeGen/RISCV/rvv/inline-asm.ll b/llvm/test/CodeGen/RISCV/rvv/inline-asm.ll
index ea0d18ebdf46e..6dbebb656b66b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/inline-asm.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/inline-asm.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v < %s \
 ; RUN:     --verify-machineinstrs | FileCheck %s
 
 define <vscale x 1 x i1> @test_1xi1(<vscale x 1 x i1> %in, <vscale x 1 x i1> %in2) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
index 5f495e7f4bab4..08c9fcdead77c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv32 -mattr=+m,+d,+zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple riscv64 -mattr=+m,+d,+zfh,+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple riscv32 -mattr=+m,+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple riscv64 -mattr=+m,+d,+zfh,+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 8 x i32> @insert_nxv8i32_nxv4i32_0(<vscale x 8 x i32> %vec, <vscale x 4 x i32> %subvec) {
 ; CHECK-LABEL: insert_nxv8i32_nxv4i32_0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv32.ll
index 2fa9efbfbf51c..4c6ae4eac3b37 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @insertelt_nxv1f16_0(<vscale x 1 x half> %v, half %elt) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll
index 402e566f573dd..baa9c1532071a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @insertelt_nxv1f16_0(<vscale x 1 x half> %v, half %elt) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll
index cbe815d3b31bc..da0f5c06c63ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i1> @insertelt_nxv1i1(<vscale x 1 x i1> %x, i1 %elt) {
 ; CHECK-LABEL: insertelt_nxv1i1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
index 99dbe7e23c0da..e2045ce254a1f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i8> @insertelt_nxv1i8_0(<vscale x 1 x i8> %v, i8 signext %elt) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll
index 19253cf635ec8..5c147c60404ba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i8> @insertelt_nxv1i8_0(<vscale x 1 x i8> %v, i8 signext %elt) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
index 5a0a91d3ef3c6..e4dd3b8a6c9ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/interleave-crash.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -O1 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=1024 < %s | FileCheck %s --check-prefix=RV64-1024
-; RUN: llc -mtriple=riscv64 -O1 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=2048 < %s | FileCheck %s --check-prefix=RV64-2048
+; RUN: llc -mtriple=riscv64 -O1 -mattr=+m,+v -riscv-v-vector-bits-min=1024 < %s | FileCheck %s --check-prefix=RV64-1024
+; RUN: llc -mtriple=riscv64 -O1 -mattr=+m,+v -riscv-v-vector-bits-min=2048 < %s | FileCheck %s --check-prefix=RV64-2048
 
 define void @interleave256(<256 x i16>* %agg.result, <128 x i16>* %0, <128 x i16>* %1) {
 ; RV64-1024-LABEL: interleave256:
diff --git a/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir b/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
index 0a23c11b54a15..1ef8f19c8b267 100644
--- a/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-# RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v -start-before=prologepilog -o - \
+# RUN: llc -mtriple riscv64 -mattr=+m,+v -start-before=prologepilog -o - \
 # RUN:     -verify-machineinstrs %s | FileCheck %s
 --- |
   target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll
index 5ce9ee9589983..29428288b551c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/legalize-load-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
 
 ; Check that we are able to legalize scalable-vector loads that require widening.
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll
index 46e1c0a4d8a5a..7c423a20a0629 100644
--- a/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 4 x i5> @trunc_nxv4i32_to_nxv4i5(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: trunc_nxv4i32_to_nxv4i5:
diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll
index dfb58e2f6ab78..213fe1095515d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/legalize-store-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+f,+d -verify-machineinstrs < %s | FileCheck %s
 
 ; Check that we are able to legalize scalable-vector stores that require widening.
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/load-add-store-16.ll b/llvm/test/CodeGen/RISCV/rvv/load-add-store-16.ll
index 4d0cf0fae6100..bfbceb4c04ca4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/load-add-store-16.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/load-add-store-16.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv32 -mattr=+experimental-v %s -o - \
+; RUN: llc -mtriple riscv32 -mattr=+v %s -o - \
 ; RUN:     -verify-machineinstrs | FileCheck %s
-; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o - \
+; RUN: llc -mtriple riscv64 -mattr=+v %s -o - \
 ; RUN:     -verify-machineinstrs | FileCheck %s
 
 define void @vadd_vint16m1(<vscale x 4 x i16> *%pc, <vscale x 4 x i16> *%pa, <vscale x 4 x i16> *%pb) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rvv/load-add-store-32.ll b/llvm/test/CodeGen/RISCV/rvv/load-add-store-32.ll
index 73874e2782c16..22a59b3ade70e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/load-add-store-32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/load-add-store-32.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv32 -mattr=+experimental-v %s -o - \
+; RUN: llc -mtriple riscv32 -mattr=+v %s -o - \
 ; RUN:     -verify-machineinstrs | FileCheck %s
-; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o - \
+; RUN: llc -mtriple riscv64 -mattr=+v %s -o - \
 ; RUN:     -verify-machineinstrs | FileCheck %s
 
 define void @vadd_vint32m1(<vscale x 2 x i32> *%pc, <vscale x 2 x i32> *%pa, <vscale x 2 x i32> *%pb) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rvv/load-add-store-64.ll b/llvm/test/CodeGen/RISCV/rvv/load-add-store-64.ll
index 4edad9c1742e3..f091027e6bb7c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/load-add-store-64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/load-add-store-64.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv32 -mattr=+experimental-v %s -o - \
+; RUN: llc -mtriple riscv32 -mattr=+v %s -o - \
 ; RUN:     -verify-machineinstrs | FileCheck %s
-; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o - \
+; RUN: llc -mtriple riscv64 -mattr=+v %s -o - \
 ; RUN:     -verify-machineinstrs | FileCheck %s
 
 define void @vadd_vint64m1(<vscale x 1 x i64> *%pc, <vscale x 1 x i64> *%pa, <vscale x 1 x i64> *%pb) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rvv/load-add-store-8.ll b/llvm/test/CodeGen/RISCV/rvv/load-add-store-8.ll
index 793c2dc8cdcb5..25131a2909ca9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/load-add-store-8.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/load-add-store-8.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv32 -mattr=+experimental-v %s -o - \
+; RUN: llc -mtriple riscv32 -mattr=+v %s -o - \
 ; RUN:     -verify-machineinstrs | FileCheck %s
-; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o - \
+; RUN: llc -mtriple riscv64 -mattr=+v %s -o - \
 ; RUN:     -verify-machineinstrs | FileCheck %s
 
 define void @vadd_vint8m1(<vscale x 8 x i8> *%pc, <vscale x 8 x i8> *%pa, <vscale x 8 x i8> *%pb) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rvv/load-mask.ll b/llvm/test/CodeGen/RISCV/rvv/load-mask.ll
index 4026061158943..6d73a2d9615ca 100644
--- a/llvm/test/CodeGen/RISCV/rvv/load-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/load-mask.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv32 -mattr=+experimental-v %s -o - \
+; RUN: llc -mtriple riscv32 -mattr=+v %s -o - \
 ; RUN:     -verify-machineinstrs | FileCheck %s
-; RUN: llc -mtriple riscv64 -mattr=+experimental-v %s -o - \
+; RUN: llc -mtriple riscv64 -mattr=+v %s -o - \
 ; RUN:     -verify-machineinstrs | FileCheck %s
 
 define void @test_load_mask_64(<vscale x 64 x i1>* %pa, <vscale x 64 x i1>* %pb) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/localvar.ll b/llvm/test/CodeGen/RISCV/rvv/localvar.ll
index 48f9bed5b6b59..0f7fa0b9a44ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/localvar.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/localvar.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV64IV
 
 define void @local_var_mf8() {
diff --git a/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll b/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll
index afd62aebf0a48..5ac40ad8a9235 100644
--- a/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/marith-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s
 
 declare <1 x i1> @llvm.vp.and.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv32.ll
index 886f2c32b66ca..a123b2ee6be32 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i8> @sext_nxv1i1_nxv1i8(<vscale x 1 x i1> %v) {
 ; CHECK-LABEL: sext_nxv1i1_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv64.ll
index c6801feaf5345..6a337320d04c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mask-exts-truncs-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i8> @sext_nxv1i1_nxv1i8(<vscale x 1 x i1> %v) {
 ; CHECK-LABEL: sext_nxv1i1_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir
index 9cce462522d6b..60f02d0b139ed 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+# RUN: llc -march=riscv64 -mattr=+v -verify-machineinstrs \
 # RUN:     -start-after finalize-isel -stop-after prologepilog -o - %s | FileCheck %s
 
 --- |
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll
index 39a9aad1b8931..3e79ae9dc3e75 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-fp.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @masked_load_nxv1f16(<vscale x 1 x half>* %a, <vscale x 1 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv1f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
index 3574e4312ed33..6e6d920a0723e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-load-int.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i8> @masked_load_nxv1i8(<vscale x 1 x i8>* %a, <vscale x 1 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_load_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll
index aad12b1c3d079..de21fcf167505 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-store-fp.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
 
 define void @masked_store_nxv1f16(<vscale x 1 x half> %val, <vscale x 1 x half>* %a, <vscale x 1 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_store_nxv1f16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/masked-store-int.ll
index 184b3af497525..8dde101486448 100644
--- a/llvm/test/CodeGen/RISCV/rvv/masked-store-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/masked-store-int.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define void @masked_store_nxv1i8(<vscale x 1 x i8> %val, <vscale x 1 x i8>* %a, <vscale x 1 x i1> %mask) nounwind {
 ; CHECK-LABEL: masked_store_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll
index 97d1956ce1270..c8d55a2e65c86 100644
--- a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -O2 < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV64IV
 
 declare <vscale x 64 x i8> @llvm.riscv.vmacc.nxv64i8.nxv64i8(
diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
index b1e55a3f2390b..31731af18b6b3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
 
 declare <vscale x 1 x i8> @llvm.masked.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*>, i32, <vscale x 1 x i1>, <vscale x 1 x i8>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
index a4f7dbd8e748f..f82dcca4ce65b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
 
 declare void @llvm.masked.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, i32, <vscale x 1 x i1>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
index b017d9a3b68be..40ede841d9391 100644
--- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+zfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-UNKNOWN
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-256
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-512
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+zfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-UNKNOWN
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-256
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v,+f,+d,+zfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-512
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-UNKNOWN
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-256
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-512
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-UNKNOWN
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-256
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-512
 
 ;
 ; VECTOR_REVERSE - masks
diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
index 8ac3073199689..de2bc6353c820 100644
--- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 
 define signext i32 @foo(i32 signext %aa) #0 {
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr52475.ll b/llvm/test/CodeGen/RISCV/rvv/pr52475.ll
index 5e1e94965f1fa..e2199a6ae9366 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr52475.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr52475.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 \
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 \
 ; RUN:   -pre-RA-sched=list-burr -disable-machine-cse -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 \
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 \
 ; RUN:   -pre-RA-sched=list-burr -disable-machine-cse -verify-machineinstrs < %s | FileCheck %s
 
 define <128 x i32> @ret_split_v128i32(<128 x i32>* %x) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll b/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll
index 5f926efd9012c..bad1af3c0ac36 100644
--- a/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/reg-alloc-reserve-bp.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+f -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+f -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s
 
 define void @foo(i32* nocapture noundef %p1) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir b/llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir
index 504dafc50d8e0..e7e0ec614172a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc %s -mtriple=riscv64 -mattr=+experimental-v -run-pass=simple-register-coalescing -o - | FileCheck %s
+# RUN: llc %s -mtriple=riscv64 -mattr=+v -run-pass=simple-register-coalescing -o - | FileCheck %s
 ---
 # Make sure that SrcReg & DstReg of PseudoVRGATHER are not coalesced
 name:            test_earlyclobber
diff --git a/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll b/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll
index 3c1c5d372eef8..41275917bfefb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/regalloc-fast-crash.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh,+m \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+m \
 ; RUN:     -regalloc=fast -verify-machineinstrs < %s | FileCheck %s
 
 ; This test previously crashed with an error "ran out of registers during register allocation"
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
index b4db000233f34..314c41296c72a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d -O0 < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d -O0 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O0 %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d -O2 < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d -O2 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
 
 @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll
index 96699bb541209..cb21513f702c5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -O0 < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -O0 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O0 %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -O2 < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -O2 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
 
 define <vscale x 1 x i32> @spill_lmul_mf2(<vscale x 1 x i32> %va) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
index 07ac76efd8693..2aa051522fa9a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+m -O0 < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -mattr=+m -O0 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O0 %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+m -O2 < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -mattr=+m -O2 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
 
 define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(i32* %base, i32 %vl) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
index 67c5fccd71b68..081743b31b701 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare i32 @llvm.riscv.vsetvli.i32(i32, i32, i32)
 declare i32 @llvm.riscv.vsetvlimax.i32(i32, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
index 46c263b6c4fad..c2e44e7e4936f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -mattr=+d -O0 < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -O0 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O0 %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -mattr=+d -O2 < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -O2 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
 
 @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll
index a3254901bc6d9..74defdfe62b00 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -O0 < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -O0 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O0 %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -O2 < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -O2 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
 
 define <vscale x 1 x i64> @spill_lmul_1(<vscale x 1 x i64> %va) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
index 8388c4160bef4..a7e83ed72453d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+m -O0 < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+m -O0 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O0 %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+m -O2 < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+m -O2 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
 
 define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(i32* %base, i64 %vl) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
index a37cb4d33e708..2b745cb5eddaa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64)
 declare i64 @llvm.riscv.vsetvlimax.i64(i64, i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll
index af0e5a07862fe..35f7411a921ae 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v < %s 2>&1 | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v < %s 2>&1 | FileCheck %s
 
 define <vscale x 16 x i32> @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, <vscale x 16 x i32> %w, <vscale x 16 x i32> %x, <vscale x 16 x i32> %y, <vscale x 16 x i32> %z) {
 ; CHECK-LABEL: bar:
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll
index 03d95ed99db99..858ce54610cf7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+m -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s
 
 define void @rvv_vla(i64 %n, i64 %i) nounwind {
 ; CHECK-LABEL: rvv_vla:
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
index df561fe6c7fde..8b75cc3981a63 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 
 define dso_local void @lots_args(i32 signext %x0, i32 signext %x1, <vscale x 16 x i32> %v0, i32 signext %x2, i32 signext %x3, i32 signext %x4, i32 signext %x5, i32 signext %x6, i32 %x7, i32 %x8, i32 %x9) #0 {
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll
index 32f05623149ef..838cd82156875 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv32 -mattr=+m,+experimental-v < %s \
+; RUN: llc -mtriple riscv32 -mattr=+m,+v < %s \
 ; RUN:    | FileCheck %s
 
 define i32 @vscale_zero() nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll
index d233e14ba7672..27d55aed5ad2e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-vscale.i64.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv64 -mattr=+m,+experimental-v < %s \
+; RUN: llc -mtriple riscv64 -mattr=+m,+v < %s \
 ; RUN:    | FileCheck %s -check-prefix=RV64
-; RUN: llc -mtriple riscv32 -mattr=+m,+experimental-v < %s \
+; RUN: llc -mtriple riscv32 -mattr=+m,+v < %s \
 ; RUN:    | FileCheck %s -check-prefix=RV32
 
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll
index e645593810308..1455703483acb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/saddo-sdnode.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare { <vscale x 2 x i32>, <vscale x 2 x i1> } @llvm.sadd.with.overflow.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
index 79d638b46a269..0796a10de07db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @select_nxv1f16(i1 zeroext %c, <vscale x 1 x half> %a, <vscale x 1 x half> %b) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/select-int.ll b/llvm/test/CodeGen/RISCV/rvv/select-int.ll
index 9ad7d41483987..a0945e96ca5fe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/select-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/select-int.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i1> @select_nxv1i1(i1 zeroext %c, <vscale x 1 x i1> %a, <vscale x 1 x i1> %b) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/select-sra.ll b/llvm/test/CodeGen/RISCV/rvv/select-sra.ll
index f1c4bddb26cd7..c7d5846f97034 100644
--- a/llvm/test/CodeGen/RISCV/rvv/select-sra.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/select-sra.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
 
 ; This test checks a regression in the select-to-sra transform, which was
 ; asserting (without a precondition) when the vector constants implicitly
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll
index daf5743c269cc..77e9baf86f37b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 ; FIXME: The scalar/vector operations ('fv' tests) should swap operands and
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll
index 6adc6db413344..348a7cd661c07 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 ; FIXME: The scalar/vector operations ('fv' tests) should swap operands and
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll
index e3ff3ed0e0437..374df4eceedd7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 8 x i1> @icmp_eq_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
 ; CHECK-LABEL: icmp_eq_vv_nxv8i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv64.ll
index 803ecdda84a7e..36d96ecec950e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 8 x i1> @icmp_eq_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb) {
 ; CHECK-LABEL: icmp_eq_vv_nxv8i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index f6a348418f5b2..e902e1cd577a8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+experimental-v,+f \
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+f \
 ; RUN:     -riscv-v-vector-bits-min=128 | FileCheck %s
 
 define void @sink_splat_mul(i32* nocapture %a, i32 signext %x) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll
index f79480992fc32..0ea491c788f76 100644
--- a/llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare { <vscale x 1 x i8>, <vscale x 1 x i1> } @llvm.smul.with.overflow.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll
index 9f4d075d63452..4dcf779e2e499 100644
--- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64
 
 declare <vscale x 1 x i8> @llvm.experimental.stepvector.nxv1i8()
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir
index afe2bc56c776a..4b13dfe2a78f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/tail-agnostic-impdef-copy.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc %s -mtriple=riscv64 -mattr=experimental-v -riscv-v-vector-bits-min=128 -run-pass=finalize-isel -o - | FileCheck %s
+# RUN: llc %s -mtriple=riscv64 -mattr=v -riscv-v-vector-bits-min=128 -run-pass=finalize-isel -o - | FileCheck %s
 
 # This test makes sure we peak through the COPY instruction between the
 # IMPLICIT_DEF and PseudoVLE64_V_M8_MASK in order to select the tail agnostic
@@ -21,8 +21,8 @@
   ; Function Attrs: argmemonly nofree nosync nounwind readonly willreturn
   declare <vscale x 8 x i64> @llvm.masked.load.nxv8i64.p0nxv8i64(<vscale x 8 x i64>*, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i64>) #1
 
-  attributes #0 = { nounwind "target-features"="+experimental-v" }
-  attributes #1 = { argmemonly nofree nosync nounwind readonly willreturn "target-features"="+experimental-v" }
+  attributes #0 = { nounwind "target-features"="+v" }
+  attributes #1 = { argmemonly nofree nosync nounwind readonly willreturn "target-features"="+v" }
 
 ...
 ---
diff --git a/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll
index ddd9ffbd02de6..8344124edb02e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare { <vscale x 1 x i8>, <vscale x 1 x i1> } @llvm.umul.with.overflow.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll
index 72fa5f41de282..a8059db69be8b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/unaligned-loads-stores.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple riscv32 -mattr=+d,+zfh,+experimental-v < %s \
+; RUN: llc -mtriple riscv32 -mattr=+d,+zfh,+v < %s \
 ; RUN:    -verify-machineinstrs | FileCheck %s
-; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+experimental-v < %s \
+; RUN: llc -mtriple riscv64 -mattr=+d,+zfh,+v < %s \
 ; RUN:    -verify-machineinstrs | FileCheck %s
 
 define <vscale x 1 x i32> @unaligned_load_nxv1i32_a1(<vscale x 1 x i32>* %ptr) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll b/llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll
index 1796cbcde8755..9dfd10b712769 100644
--- a/llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/undef-vp-ops.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 ; Test that we can remove trivially-undef VP operations of various kinds.
diff --git a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll
index 9defe1460df39..f6d5f71344565 100644
--- a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v < %s | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v < %s | FileCheck %s --check-prefix=RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s --check-prefix=RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s --check-prefix=RV64
 
 define <vscale x 1 x i16> @test_urem_vec_even_divisor_eq0(<vscale x 1 x i16> %x) nounwind {
 ; RV32-LABEL: test_urem_vec_even_divisor_eq0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll
index 516476877b712..13eb36dd1a251 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv64.ll
index d1724cf22079e..4d692eb089c3f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vaadd-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaadd-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vaadd.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll
index 210abf8809624..4e796ebd4a32b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vaaddu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv64.ll
index 8b97aa55c09b3..3f9ed340dede9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vaaddu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll
index d46ec9cda5ac8..94c1f4dd52cc9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadc-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vadc.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vadc-rv64.ll
index 1206136e5575a..40c4fe51c1e9b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadc-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vadc.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-policy.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-policy.ll
index 2a4bc62aaa3c3..57db5175df548 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-policy.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-policy.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   --riscv-no-aliases < %s | FileCheck %s
 
 declare <vscale x 8 x i8> @llvm.riscv.vadd.nxv8i8.nxv8i8(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll
index a10c0c7d38598..4e7b12753aeb2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-rv64.ll
index 35f8d0c44bd21..a56bf1bf97e6e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vadd.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
index 3ad55f6fbc0fa..e6a4da4552874 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vadd_vx_nxv1i8(<vscale x 1 x i8> %va, i8 signext %b) {
 ; CHECK-LABEL: vadd_vx_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
index 4206edadadb85..1190f85852cd9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 8 x i7> @llvm.vp.add.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll
index e3239bd8b2c31..89b9dbcb57c0b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vand-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vand.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll
index feb5bc829a1aa..39a1d9bc4a081 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vand-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vand.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
index bea39f9740ca9..3175d4765f771 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vand-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vand_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vand_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll
index b9782bb4b60db..49c64bb688509 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vand-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 8 x i7> @llvm.vp.and.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll
index 24cbd43726c49..d1cfc9704c1e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vasub-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vasub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vasub-rv64.ll
index 4668e2a096caf..6ad70daafb25d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vasub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vasub-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vasub.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll
index aa5895ffff549..4707aff206d82 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vasubu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv64.ll
index 552ecf1720614..74d4690a8c8db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vasubu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vasubu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vasubu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll
index d8862c9c8e99d..5a55268171f23 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vcompress.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll
index c03b3d1ee28b6..d77f0da267e55 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vcompress-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vcompress.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll
index d21064962c665..fe5bba14eb6d2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare i32 @llvm.riscv.vcpop.i32.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll
index b42d679b52b6e..8d583b8df634a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare i64 @llvm.riscv.vcpop.i64.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll
index b8bbb3d6024d2..e062069311956 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vdiv.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-rv64.ll
index 102bce0efb78b..01f1100045567 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vdiv.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
index 6adaa2476659e..16fe495b981cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vdiv_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vdiv_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll
index 97160b8e93457..3a1b0981b0d95 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 8 x i7> @llvm.vp.sdiv.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll
index e80d871c2485f..b86b902962ea5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vdivu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-rv64.ll
index 9af9d2ad6e419..7efea7cfe3fbf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vdivu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
index 5c3bcd40f4cf5..5bf80bbc34859 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vdivu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vdivu_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll
index 97e45f2f4e6ec..2f56348950f14 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 8 x i7> @llvm.vp.udiv.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll
index 74e9553d0916f..e3d0654b60820 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vexts-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i16> @vsext_nxv1i8_nxv1i16(<vscale x 1 x i8> %va) {
 ; CHECK-LABEL: vsext_nxv1i8_nxv1i16:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
index 0a607d3af630a..e5b1e8dcd2cb1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x half> @llvm.fabs.nxv1f16(<vscale x 1 x half>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll
index 3a34f6ba27fdf..cfdeba067718a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll
index 03ff955623617..8e43295bf7ee3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \
 ; RUN:   -mattr=+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll
index 3fc160103aa39..002fcf15e1d76 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @vfadd_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
index 019a72b02be72..f5ab4df3992e9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x half> @llvm.vp.fadd.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll
index 35cb662fa6750..ae9df2aefa4d9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll
index 7ccfcb8ca7e3b..c86e1f334712a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
index 6e3802b6d3d9e..4c5fc2f32d7ab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll
index f55c9908a4539..5549960bb7736 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfcvt.f.x.v.nxv1f16.nxv1i16(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll
index 93977cf6c7f06..65270dc06336d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfcvt.f.x.v.nxv1f16.nxv1i16(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll
index 22c09b22d71be..1c8c2a80c90db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfcvt.f.xu.v.nxv1f16.nxv1i16(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll
index 914926eb1a6bb..6fc87d15dac18 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfcvt.f.xu.v.nxv1f16.nxv1i16(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll
index d439b9d909fdc..75c0a7ff62a47 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll
index 2d181fbecc41e..9c3e2a03f1141 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll
index 98f9e330d446d..966a5d6f85a0f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll
index e52f1548e5963..3a309eea35dbd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll
index cdc40c9c45968..26632717dfa9f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfcvt.x.f.v.nxv1i16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll
index 50bfe8b7ecc06..f5984a512e0fe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfcvt.x.f.v.nxv1i16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll
index 4316de95a1ceb..e76b0db05446e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv1i16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll
index 801d7b0d03f19..8fb44d2d1ecf0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv1i16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll
index 3f5ee058c566d..2f2bc40a0e6fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll
index 97bc6adfb8c14..dc6760900bafb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
index a0293b624eefc..92d84e7b6f070 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @vfdiv_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
index 4e6c840077401..f785903d06786 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll
index dab1544783435..6d6d3a8c74616 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare i32 @llvm.riscv.vfirst.i32.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll
index 005d023207bfe..0e20516a69c79 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare i64 @llvm.riscv.vfirst.i64.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll
index b3a99448173d7..207c7ecf5a3db 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll
index f77030c1dac0b..f636fa36eb7c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll
index de7d9ed40414b..dd12115fbb95b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll
index 632fdf2f3f877..ae39ef8dd3260 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
index 46735fddc56e5..0c1a79804e167 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 ; This tests a mix of vfmacc and vfmadd by using different operand orders to
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll
index 197b564d058b2..d6a880c743dd2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll
index c11b123fff6ff..aea557735e932 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
index 57f6f0f2d9381..ffa6c42ee795d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x half> @llvm.maxnum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll
index ffa4c3bceac7d..c1d91e208aaa1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll
index 62187750e4525..cf4ad020e4cf1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll
index 2b935c0b1744c..2b2d954a7cc1d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll
index e0af688748318..3f1ffd3e81a64 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
index 41f4fe4fa7b37..86d5b76a7c6af 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x half> @llvm.minnum.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll
index 519ae7e204da9..ff51e6dab20a9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll
index ce757f11d02d6..08666a5dd51c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll
index f5534895fe6f6..8a10800c748ad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll
index 2b6e35cc92161..eb0733b6e5cc0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll
index 9e3cfa87072f5..dd4fb4a321530 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 ; This tests a mix of vfmsac and vfmsub by using different operand orders to
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll
index e0757776acb22..9cab6de9e0b46 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll
index a584ad412124b..f260499700e6f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll
index 3ad4bf8210069..4aad6cd2c7c2a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @vfmul_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll
index 2bff7a44c5213..6223c8d823b27 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x half> @llvm.vp.fmul.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.f.s.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.f.s.ll
index 9f09ff5f8ca1f..56fe5c3891097 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmv.f.s.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.f.s.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-v,+zfh -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-v,+zfh -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+zfh -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+zfh -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s
 
 declare half @llvm.riscv.vfmv.f.s.nxv1f16(<vscale x 1 x half>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll
index 5c07fb600b662..4d47c000788fb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+experimental-v,+zfh -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+zfh -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x half> @llvm.riscv.vfmv.s.f.nxv1f16(<vscale x 1 x half>, half, i32)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll
index db83f2df724ef..ee619309b9e3c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-v,+zfh -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+zfh -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x half> @llvm.riscv.vfmv.s.f.nxv1f16(<vscale x 1 x half>, half, i64)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll
index eee0699137ad3..b4acc57dcd81e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -target-abi ilp32d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -target-abi ilp32d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmv.v.f.nxv1f16(
   half,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll
index 6172df66388a1..1c4d9f6e89dc4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -target-abi lp64d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -target-abi lp64d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmv.v.f.nxv1f16(
   half,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll
index c79f6e0a9f8de..014ff81bada19 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.f.w.nxv1f16.nxv1f32(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll
index 9eaff968ae960..2e35f75eb89c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.f.w.nxv1f16.nxv1f32(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll
index 1f74edd54038c..ab5f66bf692fa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.x.w.nxv1f16.nxv1i32(
   <vscale x 1 x i32>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll
index 5c8f94ac66ac0..d8d55df8e4589 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.x.w.nxv1f16.nxv1i32(
   <vscale x 1 x i32>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll
index 1ac2a36048763..4835d4e5c5916 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.xu.w.nxv1f16.nxv1i32(
   <vscale x 1 x i32>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll
index b8928d0a568b2..32a23932b074b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.xu.w.nxv1f16.nxv1i32(
   <vscale x 1 x i32>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll
index 351fea0637eba..b464fdde6db25 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv1f16.nxv1f32(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll
index af53b02a8a666..4020c1d5d1a34 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv1f16.nxv1f32(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll
index b4fb1c2dd06e1..227210e6f2f01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll
index 6bdd5208ea831..ad695704aae0a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll
index 4a26e7d89b8b7..4bfe331db7e00 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll
index c875cbd31995f..f7f873dd05155 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll
index c521dc30681e9..6c97455f1992e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll
index 547f307332cba..d78d695f3df20 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll
index 0b960a127ae82..4981f8b16d741 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll
index 7817455e14d68..c7bb913f3797f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
index 14318b8602f33..46014911449b8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @vfneg_vv_nxv1f16(<vscale x 1 x half> %va) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll
index 704cd9ee01060..956ac22ede348 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll
index 3a98ab5ace053..6d302b7f49d5c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll
index b4b0f17ba2f67..9d5d70498ab3c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll
index 1f2813400c554..13c9484ad907f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll
index c15245db3a167..e83a13b4221af 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 ; This tests a mix of vfnmacc and vfnmadd by using different operand orders to
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll
index 357a871d2af7f..385b3397ab28b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll
index 3f881110a0d8b..2a7a29a742f92 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll
index 559a243b3d5c6..35e687c4fcbc0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll
index 7ddf5f79d5b25..977163f1383f0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll
index 0c8ab026dd978..d8cbbf1518cc2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 ; This tests a mix of vfnmsac and vfnmsub by using different operand orders to
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
index ce9636fd7272a..b98eadbde1e5c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
 
 define <vscale x 1 x float> @vfpext_nxv1f16_nxv1f32(<vscale x 1 x half> %va) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
index 8c78a870f1d3e..6884ebc92565c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i1> @vfptosi_nxv1f16_nxv1i1(<vscale x 1 x half> %va) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll
index f308f1a292c8c..0fb253dfe1861 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefix=RV64
 
 define <vscale x 1 x half> @vfptrunc_nxv1f32_nxv1f16(<vscale x 1 x float> %va) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll
index 6bd205a022e74..b117a120b04d1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll
index 5fa7b453e583b..6cc0b53443a8f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll
index 56e4c91313dcd..3006acf443206 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x half> @llvm.vp.fdiv.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll
index 9dfbde0d35f5a..30897b95deea2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrec7.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll
index c0f6302b31e47..4e0fe7b9fc62f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrec7.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll
index 10ea4493d5653..25ed3f1ab3676 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv1f16(
   <vscale x 4 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll
index 7c862c31193cf..bb173e91ecf31 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv1f16(
   <vscale x 4 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll
index 21b4d8c636e7f..9561be7b6fc09 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv1f16(
   <vscale x 4 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll
index b31eeb2d99c93..d04ef7a6707de 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv1f16(
   <vscale x 4 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll
index 252b6c13765c6..1f1e68e0dbc9b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv1f16(
   <vscale x 4 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll
index 411c99111b474..8c42e43d9094d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv1f16(
   <vscale x 4 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll
index e16a55c6a6942..7b3691c74887c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv1f16(
   <vscale x 4 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll
index 896dfbabffd85..9264397afa0e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv1f16(
   <vscale x 4 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll
index c423723e44fc6..d0198a85b0c52 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrsqrt7.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll
index 190aa57cee66d..cf9d7a4c0af5a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrsqrt7.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll
index bd8e12488526f..8bbf0aac2b339 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll
index 77929c60d2614..85b7148c3c830 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \
 ; RUN:   -mattr=+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll
index 5a8f76f864077..4f6a1612fb4f4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll
index e6a08cc076b7d..a1dcc1fa26bdc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll
index 9943b5dcf3cca..8ff4b4ba03e2a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll
index 6e54a35a8ae2e..ac708fa2d9918 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll
index 9272119d955ea..9ff02249165bc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll
index 29b38b6449cb2..8f4fcaa73c42c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll
index 92b17a254f012..77ebd259c6896 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll
index 3ca377023594a..a4833408acfff 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll
index 9a2adbc6e0a79..84572f5dec0d1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll
index 9517a4a255af8..0e5b566812011 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfslide1up.nxv1f16.f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll
index 2c5ed6e5871df..37afacc6c7f27 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfslide1up.nxv1f16.f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll
index a9f7415c15c6c..3b86fd763f3c9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll
index 2e95f82bf9eab..c810a516f3b35 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll
index 5acac37ac2c70..4b5a337414942 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x half> @llvm.sqrt.nxv1f16(<vscale x 1 x half>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll
index fd64fb3da6944..99e45214deea0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll
index 15c008f28a424..c972ed358bf9a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \
 ; RUN:   -mattr=+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.nxv1f16(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll
index 7ac9df394bf2a..e127406aacf3a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @vfsub_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
index ca52d476e247d..3f5dbc7384ece 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x half> @llvm.vp.fsub.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll
index 20aa40dda0ee5..02ce6a1c85f90 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll
index 4f829606d14d3..135b56dff1b47 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll
index 486d51182b65c..e7c93d765770a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x double> @vfwadd_vv_nxv1f64(<vscale x 1 x float> %va, <vscale x 1 x float> %vb) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll
index 2c64eb26d074e..fde2d9f89d045 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll
index bea142a2ba9a5..9235c3f251d41 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll
index 21d990ed987fc..460488388b5ea 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.f.v.nxv1f32.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll
index 4637218cbd3b8..e050090b7761c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.f.v.nxv1f32.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll
index e6dbecbe229c8..467b64c20fedd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfwcvt.f.x.v.nxv1f16.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll
index 262a0f8788661..e294cbe085f73 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfwcvt.f.x.v.nxv1f16.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll
index f95130cd1f5e7..fc0af066c39ec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv1f16.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll
index f0016e980c317..107813b7879bd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv1f16.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll
index c70fb1df935e9..d6cf1b3563105 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i32.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll
index 317ac39ede925..4d551f62ec52f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i32.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll
index 31cda39c700b9..c419e08471ca1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i32.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll
index 971ed34a6895b..6b881aba8f652 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i32.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll
index 1d2a542bb51d2..b8d88e1c64e55 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv1i32.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll
index b433cd9b66a70..fd01c64df0d36 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv1i32.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll
index 54ace8228ee78..c2b0a222709e4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv1i32.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll
index 4bcc8a93ad976..dc461d60b0be9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv1i32.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll
index d3485e58e3a76..ffcfd186c8e10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll
index 6ec1b3fe99dc6..bd2dac9493022 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll
index 9b66cfaf15258..8035ec77c03ff 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll
index a4877537cbb06..01d2ce92c916c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll
index ed3b540b2c69d..8e40ddc009af6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll
index 6d36dcfb20caf..9b5a7b0641806 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll
index ba29aacf595f3..bb36d96bd9668 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x double> @vfwmul_vv_nxv1f64(<vscale x 1 x float> %va, <vscale x 1 x float> %vb) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll
index af468ffdc095a..8a5db70f5baf7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll
index fd0f6a2272444..0b21a7aa9b395 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll
index 0bde93d0497c4..554234bb68a61 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll
index c8e8f80302a82..44f2e66113939 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll
index 73780360e9551..37240159bd908 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv1f16(
   <vscale x 2 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll
index 837b04fd29501..2282bd5fbc860 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv1f16(
   <vscale x 2 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll
index 175d073fc9442..fe56d0c6bd0d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv1f16(
   <vscale x 2 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll
index 77d901fa04dfe..52bde877eb7d1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv1f16(
   <vscale x 2 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll
index 01c65629b4f5c..0d7c2e7fe0867 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll
index fac3479c106c6..cdc77ffc55f8a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll
index 8e2ef3f76256f..a8b81fe8feb23 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x double> @vfwsub_vv_nxv1f64(<vscale x 1 x float> %va, <vscale x 1 x float> %vb) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll
index 762ee60fb6eca..3234dec9db144 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll
index 3127e5c5af694..545bc4e4a53de 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll
index 4884ccebad49f..58ed5cfb7620d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vid.nxv1i8(
   i32);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll
index 28b5809d5851a..1413c0ec4b443 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vid.nxv1i8(
   i64);
diff --git a/llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll
index bf6b235eeab40..6b3838cd5898c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.viota.nxv1i8(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll
index 8f795bc056007..0e37ff483fb4f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.viota.nxv1i8(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll
index b0c77864f4fc2..7145cb55d8f84 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vitofp-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @vsitofp_nxv1i1_nxv1f16(<vscale x 1 x i1> %va) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll
index 4b0843094e8bc..51521f31726da 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+zfh \
+; RUN: llc -mtriple=riscv32 -mattr=+v -mattr=+zfh \
 ; RUN:   -mattr=+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll
index ba2ebb676129d..78e005648d746 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \
 ; RUN:   -mattr=+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll
index f016f0e23bead..e5ae807e6ff22 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh,+f,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare { <vscale x 1 x i64>, i32 } @llvm.riscv.vleff.nxv1i64(
   <vscale x 1 x i64>*,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll
index 381e4c5c59b42..d588205f18702 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh,+f,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.nxv1i64(
   <vscale x 1 x i64>*,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll
index b7933dd4c9b93..72cac45129035 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 
 declare <vscale x 1 x i1> @llvm.riscv.vlm.nxv1i1(<vscale x 1 x i1>*, i32);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll
index 2a969a7ae445a..21b3f7b0b0ad2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 
 declare <vscale x 1 x i1> @llvm.riscv.vlm.nxv1i1(<vscale x 1 x i1>*, i64);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll
index d987fe0f7437f..e3275457d2be5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh,+f,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 
 declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll
index 99132b8762347..138916c8bcb16 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh,+f,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64(
   <vscale x 1 x i8>*,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll
index a2f1d1f190784..f702d9f5620bd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vloxseg2.nxv16i16.nxv16i16(i16*, <vscale x 16 x i16>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll
index ec06679f5ba2e..ce63a313cfec5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vloxseg2.nxv16i16.nxv16i16(i16*, <vscale x 16 x i16>, i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll
index b3ecb80d7a7ad..8b68a7f94cf94 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i64> @llvm.riscv.vlse.nxv1i64(
   <vscale x 1 x i64>*,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll
index 32b5acb681818..62fa095e3a14b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i64> @llvm.riscv.vlse.nxv1i64(
   <vscale x 1 x i64>*,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll
index 03ba5f763340d..21ae83e985d05 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlseg2.nxv16i16(i16* , i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll
index c385e412bc966..25a3d43b68061 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlseg2.nxv16i16(i16* , i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll
index 1825c90057a53..28729e179ab40 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare {<vscale x 16 x i16>,<vscale x 16 x i16>, i32} @llvm.riscv.vlseg2ff.nxv16i16(i16* , i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll
index 71ff17ff0fb84..86da8a6ab1652 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare {<vscale x 16 x i16>,<vscale x 16 x i16>, i32} @llvm.riscv.vlseg2ff.nxv16i16(i16* , i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll
index 1aa3aaf579ada..af7e678205a5e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} @llvm.riscv.vlseg2ff.nxv16i16(i16* , i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll
index 4b66af34e776c..db03fbed879f2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare {<vscale x 16 x i16>,<vscale x 16 x i16>, i64} @llvm.riscv.vlseg2ff.nxv16i16(i16* , i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll
index 422e64e23622e..069113cd15d58 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlsseg2.nxv16i16(i16*, i32, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll
index 69dd8a988602f..96f1f34c28c24 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vlsseg2.nxv16i16(i16*, i64, i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll
index 9637e0f327fd8..64d73a94e0042 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh,+f,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 
 declare <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll
index 168c7b2874cdf..efec1362cb9f2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh,+f,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64(
   <vscale x 1 x i8>*,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll
index 4232d1c2d11c8..54bf9d2a9ab56 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vluxseg2.nxv16i16.nxv16i16(i16*, <vscale x 16 x i16>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll
index 7094d305c8bd8..e14587c1d9b3e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare {<vscale x 16 x i16>,<vscale x 16 x i16>} @llvm.riscv.vluxseg2.nxv16i16.nxv16i16(i16*, <vscale x 16 x i16>, i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll
index 0ecbc734de624..3aeaa11d2ff94 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll
index 9ce992e1b8487..cfb3a432b92f0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmacc.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll
index b9d8ac8e3841c..e64a836313d3e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadc-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmadc.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc-rv64.ll
index 4df8881059e31..3739ad3adf406 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadc-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmadc.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll
index b178d910fa984..a924d04c902f9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv64.ll
index 0cafb81d85b48..9fdaf473312a5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmadc.carry.in.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll
index 8deb23a234760..fa46e6e8932a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll
index b62e0ad82e7d6..fc401f3cba819 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmadd.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll
index b8b5c38605ffa..727cf1503b6c9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \
+; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \
+; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 ; This tests a mix of vmacc and vmadd by using different operand orders to
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll
index 16c1398f29115..4d0a65d0f892b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll
index 835c30b50d36f..12107a960f87d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll
index 8743551c89cd6..90d6cba3592d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmandn.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll
index 3cbd68f5e39ba..5ad6aa2ee4d8f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmandn.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll
index bc7699e5a902c..e12c10ae36daa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmarith-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i1> @vmand_vv_nxv1i1(<vscale x 1 x i1> %va, <vscale x 1 x i1> %vb) {
 ; CHECK-LABEL: vmand_vv_nxv1i1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll
index adcefbbc1088b..1d51eb6549cff 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmax-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmax.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-rv64.ll
index b05f95b256225..da40c60ff8a96 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmax-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmax-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmax.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll
index 3cdda837dacce..c12259cb3de44 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmax-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vmax_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vmax_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll
index 65287ee1ca82f..af080efa3358b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmaxu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv64.ll
index a952e8b36bff8..bc63daea21f69 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmaxu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll
index 772ede13f50a0..26629320b90c8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vmax_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vmax_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll
index 886bfebb80f06..49fb55c901b2b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmclr.nxv1i1(
   i32);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll
index 2468d7b1c2c1f..50516db35a4fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmclr.nxv1i1(
   i64);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
index 970045f09e781..25ca3b0631c08 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmerge.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll
index 23140c523eafc..2360985310ec8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmerge-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmerge.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll
index 232294d3496fa..553e1b6930f65 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll
index ea61a95f3d96f..6d4c472a5295d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfeq-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfeq.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll
index be8799de1f7f2..e3afd15c6df74 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll
index 2d6c6a8bc4fd4..ea8e2fcf19af9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfge-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfge.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll
index 942de0218179f..a8246870ac786 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll
index 18e1136778e09..457ea76560ddd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfgt-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfgt.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll
index 128cdf6ce59f6..803eb1ceaab13 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll
index fc59c864bdcd7..d28d8d73b1960 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfle-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfle.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll
index 09577ac427d13..9485c178df504 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll
index a05b50120896e..b55f491944323 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmflt-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmflt.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll
index 7a15fa7ba66e1..30c8be8e6fe0c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll
index ad572467abdb6..e0d9c231516b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmfne-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmfne.nxv1f16(
   <vscale x 1 x half>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll
index 1a08e35eca440..ebc304b18f760 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmin-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmin.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-rv64.ll
index 46726121b21e7..febe87403cdda 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmin-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmin-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmin.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll
index 15d2a2e0c0ae7..bb3a4b149a714 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmin-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vmin_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vmin_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll
index 8add74b9114e1..ec3cc011bd6e1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vminu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-rv64.ll
index 02040135fde8a..f3b9100c38a62 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vminu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vminu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll
index e289491af5677..2ee5b370f07a3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vminu-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vmin_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vmin_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll
index 60144b7239719..958d38f0023f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmnand.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll
index 8c95c93e3f008..b4397d512e34b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmnand.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll
index c772ab9562f8a..ca5bcf5463fa5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmnor.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll
index a14d0e7ffd1b9..5beaeadcdcab2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmnor.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll
index 0f43644d2a241..117d152f7248c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmor.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll
index 689c1400b07ec..81fa40663fc1a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmor.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll
index 15fc0c3c33706..8509603acd2f5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmorn.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll
index d0358ead0012f..89de2b78e94f5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmorn.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll
index 1ca8c1f0fe2bd..fb05c89fd11b7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsbc.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv64.ll
index 2e3b6bd2d49a4..02e1a4541a30b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsbc.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll
index 142c0eeb303e1..f9b46796d6423 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsbc.borrow.in.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv64.ll
index 9d9ec65c9f4af..3b4e2619937ca 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsbc.borrow.in.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv32.ll
index d79b3d2d1c03e..b8f27529534da 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsbf.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv64.ll
index 4d8246d187af9..bf773b7c45502 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsbf-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsbf.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll
index c93e1aa867e1e..6c9536a0d65fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll
index ee35bfd4003fe..300deebbbbf3a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmseq-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll
index 5b8a4f1102d6d..68a6719c76580 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmset.nxv1i1(
   i32);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll
index 22c0f7813dfad..f34157dfd3797 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmset.nxv1i1(
   i64);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll
index 29a2aaf6f192d..2924ca51b4255 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsge-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll
index 5062948a6c839..cf14df3631b9b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsge-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsge.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll
index fc9043d633dd2..02750e7e4fff6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll
index 638140176d7f8..c6c2daf8ed9c6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsgeu.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll
index f11aabd622479..604f2775277f9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsgt.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll
index 7299b3d2132f5..3082564243bb0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgt-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsgt.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll
index f7ccc30165644..209b714eb7948 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll
index e75132618467e..510fc0664d6bd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsgtu.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll
index abf13933c87d0..888b6ebbbc3f8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsif.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll
index 5a0b49f27a469..0f776c83e0129 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsif.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll
index d072e50d12ee5..c5810dacd0e2e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsle.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll
index 88e3ca1efb501..ec74926c8087b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsle-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsle.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll
index cca51cae50ffa..f85b2d16b7cf3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsleu.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll
index b2b859448d59f..74a9516c0ea04 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsleu.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll
index c0b0faa8b4d9b..2b70fb0a5bff7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmslt.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll
index 3afdc6193c11f..711ac89528aba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmslt-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmslt.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll
index 9652a76bdb821..d20313188376f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsltu.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll
index 82491fd181e19..456a37a3f71bc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsltu.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll
index 1865ea6e47850..534b895d00978 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsne.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll
index cd705033921f5..06a51aa27f2a9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsne-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsne.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll
index a8f41ad67800f..b5db2a4b284ad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsof.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll
index 588c95315d2c8..8fba91102f814 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsof.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll
index a0ec4a11c625d..cf0c1da69743c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmul.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-rv64.ll
index b24dcf139260e..adf746657527d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmul.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
index 8ecad6004b9bb..9ebda9dd30eb7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vmul_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vmul_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
index ecf33e3c2ca20..488fc89203a3b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmul-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 8 x i7> @llvm.vp.mul.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll
index 2a7789fd61f10..51e254f5fb352 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmulh.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-rv64.ll
index 4fd892986d871..9242b6220237d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulh-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmulh.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
index 076f726e78359..a65642d05f6ec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 ; Test that the prepareSREMEqFold optimization doesn't crash on scalable
 ; vector types.
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll
index fe7c0b23c4788..068a53f994fe1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmulhsu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv64.ll
index 09c8be1ee63f6..328c9098fb4d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhsu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmulhsu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll
index 67327a9f8914a..8c04d78a90602 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmulhu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv64.ll
index ea7091333b387..48ce2b21aacb6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmulhu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
index 54cb5bfaeef5f..08b0d35a9a7b1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i32> @vmulhu_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
 ; CHECK-LABEL: vmulhu_vv_nxv1i32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll
index fa001c38ef796..44dbe3c095c3d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x i8> @llvm.riscv.vmv.s.x.nxv1i8(<vscale x 1 x i8>, i8, i32)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll
index 45d607f2a15dd..f324d4a45e474 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x i8> @llvm.riscv.vmv.s.x.nxv1i8(<vscale x 1 x i8>, i8, i64);
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll
index af3dfa4cb4987..d4cebd0ab615e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmv.v.v.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll
index aa4ab67be8285..7038f655e5bfa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.v-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmv.v.v.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll
index dab05270b0039..320fa626e7aad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmv.v.x.nxv1i8(
   i8,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv64.ll
index 399e8b53a932c..6bfc5ace93717 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.v.x-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vmv.v.x.nxv1i8(
   i8,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv32.ll
index 190cb0ba1c4de..86a687f65c808 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare i8 @llvm.riscv.vmv.x.s.nxv1i8(<vscale x 1 x i8>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv64.ll
index b6aff9d0ebb97..5ac41b3558f9a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmv.x.s-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare i8 @llvm.riscv.vmv.x.s.nxv1i8(<vscale x 1 x i8>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll
index d923de1200851..82759bc9ea0b5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmxnor.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll
index 48425887bf69b..eb31e34b9c4be 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmxnor.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll
index 2eb3bc6d3de47..15cb88f17599a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmxor.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll
index 2058bdab69b3d..0e8d66b558315 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmxor.nxv1i1(
   <vscale x 1 x i1>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll
index 5301ad154fe3c..3dfadcc3f6c46 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnclip-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vnclip.nxv1i8.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll
index 9c47b5f66f4f3..dc0f3f879e370 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnclip-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vnclip.nxv1i8.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll
index ce705c2d686f6..1fb5c651ebe0f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vnclipu.nxv1i8.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll
index 00ef63edd25d2..0fd55a75c5cf5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnclipu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vnclipu.nxv1i8.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll
index 5ba99c50dc74f..7c1c3e6fcc4f0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll
index 79862351375b7..f9ba1e3016eab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vnmsac.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll
index 372ba28bd459b..3cf95ae313257 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll
index b5383a9f7325e..6519e590aed38 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vnmsub.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll
index 075c0a35f05b7..3e2ff603401a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \
+; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \
+; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 ; This tests a mix of vmacc and vmsub by using different operand orders to
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll
index 2cd8e67c7cf22..123dfbe13d77a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vnsra.nxv1i8.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll
index 51849e008e94c..b6ad65065adbd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vnsra.nxv1i8.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll
index 88d790c99b778..9336bb1c6dc1f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \
+; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \
+; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
 
 define <vscale x 1 x i32> @vnsra_wv_nxv1i32_sext(<vscale x 1 x i64> %va, <vscale x 1 x i32> %vb) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll
index 20cfb1ee1e552..065203a3fa0d4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vnsrl.nxv1i8.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll
index e11eec3c83f88..8f8a89a85e821 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vnsrl.nxv1i8.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll
index fe243983f4ef2..26f32edfc8b7d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \
+; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \
+; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
 
 define <vscale x 1 x i32> @vnsrl_wv_nxv1i32_sext(<vscale x 1 x i64> %va, <vscale x 1 x i32> %vb) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll
index 9514f246eb7eb..ab10d1ce9c3ad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vor-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vor.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll
index a7b031a79ba0d..d30bf628ad030 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vor-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vor.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
index 4a3768c0fd5dc..bef35df66dc62 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vor-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vor_vx_nxv1i8(<vscale x 1 x i8> %va, i8 signext %b) {
 ; CHECK-LABEL: vor_vx_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll
index 469d0489c5d83..50023f8ccefad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vor-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 8 x i7> @llvm.vp.or.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
index 2291a9acd4bf4..dc38a17e10a25 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
 
 declare <vscale x 1 x i8> @llvm.vp.gather.nxv1i8.nxv1p0i8(<vscale x 1 x i8*>, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
index e909ffe6a025a..ade29d29364a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0nxv1i8(<vscale x 1 x i8>*, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
index 92706f7c162f5..62a7244de1b3f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
 
 declare void @llvm.vp.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
index 60d9d6c4faedc..2e5cb42e80ba2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare void @llvm.vp.store.nxv1i8.p0nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>*, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredand-rv32.ll
index e07c51aa26883..322cce33df0a2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredand-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredand-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredand.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredand-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredand-rv64.ll
index bd6fd729dda6d..0f92380beb5c9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredand-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredand-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredand.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredmax-rv32.ll
index fb64bd0df1243..8bb16465d9fa0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredmax-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredmax-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredmax.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredmax-rv64.ll
index 8b3c8374a0da0..f02fdcf277b38 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredmax-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredmax-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredmax.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv32.ll
index 4d2aeae714e75..833ba0bb5723d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredmaxu.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv64.ll
index 2415dbb83ad43..df869a03f1a04 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredmaxu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredmaxu.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredmin-rv32.ll
index 129f80531fe1f..e7e147bf8f39f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredmin-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredmin-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredmin.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredmin-rv64.ll
index fa0976a5353ee..cdaa99581731d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredmin-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredmin-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredmin.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredminu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredminu-rv32.ll
index bbfcd210e7986..8231eaa631fe7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredminu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredminu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredminu.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredminu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredminu-rv64.ll
index 7a97c6550b4c0..3082c309719e7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredminu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredminu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredminu.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredor-rv32.ll
index f453673238152..61663a336d818 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredor-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredor.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredor-rv64.ll
index d8e6be8c8e4e1..355e4ee240bbb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredor-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredor-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredor.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredsum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredsum-rv32.ll
index 50c730f16b3d7..73de97e1a1f02 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredsum-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredsum-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredsum.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredsum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredsum-rv64.ll
index 37aa891f4d6a2..691a521757664 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredsum-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredsum-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredsum.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
index ee6eeb9275760..bcd803fe6902f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare half @llvm.vector.reduce.fadd.nxv1f16(half, <vscale x 1 x half>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
index 6cd3478cae769..916863d14e6f2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare half @llvm.vp.reduce.fadd.nxv1f16(half, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll
index 6ef7129bea8e9..b9e60a32cb926 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll
index f1bc225da2fec..04e8ddf72cc40 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare i8 @llvm.vector.reduce.add.nxv1i8(<vscale x 1 x i8>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
index aa725dbca9533..9f57f9770753d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare i8 @llvm.vp.reduce.add.nxv1i8(i8, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll
index bd4a0b038724b..e204b1be0e359 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask-vp.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 declare i1 @llvm.vp.reduce.and.nxv1i1(i1, <vscale x 1 x i1>, <vscale x 1 x i1>, i32)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll
index f499922b24a33..9b51b2534dfe2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-mask.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s
 
 declare i1 @llvm.vector.reduce.or.nxv1i1(<vscale x 1 x i1>)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredxor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vredxor-rv32.ll
index 4acd94eb2d872..4959f922ddeb4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredxor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredxor-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredxor.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vredxor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vredxor-rv64.ll
index d96536c30cafd..717dd8e51e62a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vredxor-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vredxor-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 8 x i8> @llvm.riscv.vredxor.nxv8i8.nxv1i8(
   <vscale x 8 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll
index 2a17d9f5fbc80..e9a24bcee7d69 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vrem.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-rv64.ll
index 7e4400f7623cd..100b79bde6258 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vrem.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
index c58c3025050ee..f6e47b8272b41 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vrem_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vrem_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll
index 5b299da083174..5cb0cd623caf3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 8 x i7> @llvm.vp.srem.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll
index dbc2c55e3eb81..6b3cf9df77ca6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vremu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-rv64.ll
index 4f88f35b269fb..9b5c03b51f0b4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vremu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vremu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
index 792ed45d6ab66..e67f25b6b5c7c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vremu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vremu_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll
index dededca7bb720..ccc9af287965d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 8 x i7> @llvm.vp.urem.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll
index 239e70e49cefb..75e05a90a06cb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vrgather.vv.nxv1i8.i32(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll
index dc3f02c2501a8..a15e7ba69e385 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vrgather.vv.nxv1i8.i64(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv32.ll
index 3c8e8035e6cb3..720d6d8585fdd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vrgatherei16.vv.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv64.ll
index 4a656ec40fe22..9592630c19781 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vrgatherei16.vv.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll
index 54ac860a6cc41..bd00557309afd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vrsub.nxv1i8.i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-rv64.ll
index cead8fe1784fe..fe930ea9dcce7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vrsub.nxv1i8.i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll
index 352b8b4d27adb..824c34a632a39 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vrsub_vx_nxv1i8(<vscale x 1 x i8> %va, i8 signext %b) {
 ; CHECK-LABEL: vrsub_vx_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll
index 99bf38d64c92b..f3b360261a226 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrsub-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 1 x i8> @llvm.vp.sub.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll
index ee57455925000..968fbf120aa3c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsadd.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll
index c360fc5a4d9b4..b1cf6b22b0f97 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsadd.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll
index ab07fd7d20bf1..14728e92d7b76 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 1 x i8> @llvm.sadd.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll
index c743ac9bbceec..c57a590eb3888 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsaddu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll
index 9b683baf1590f..991d230237757 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsaddu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll
index 8eec3839a37c6..2ca5a68b41d77 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 1 x i8> @llvm.uadd.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll
index c15ad9f2f3a1b..2c5ef91cf9c20 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsbc-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsbc.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsbc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsbc-rv64.ll
index 0140009a5a5b4..555c74c918551 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsbc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsbc-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsbc.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vse-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vse-rv32.ll
index c42261a048558..c01318e733036 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vse-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vse-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+zfh \
+; RUN: llc -mtriple=riscv32 -mattr=+v -mattr=+zfh \
 ; RUN:   -mattr=+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare void @llvm.riscv.vse.nxv1i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vse-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vse-rv64.ll
index 6d5ef4155c6c3..7be33d5a8050d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vse-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vse-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \
 ; RUN:   -mattr=+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare void @llvm.riscv.vse.nxv1i64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv32.ll
index 4f766ebfb441b..16ede807a4143 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @vfmerge_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %cond) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv64.ll
index cbc714e15a4b1..3d1106a5ec94c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x half> @vfmerge_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %cond) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv32.ll
index 7df7112bb43be..3d8a951ee52d2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i8> @vmerge_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %cond) {
 ; CHECK-LABEL: vmerge_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv64.ll
index 5c79b20c439b0..0a3467c7a05d3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-int-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i8> @vmerge_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %cond) {
 ; CHECK-LABEL: vmerge_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll
index bc8e96ec31258..f8227fb538254 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-mask.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i1> @vselect_nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1> %b, <vscale x 1 x i1> %cc) {
 ; CHECK-LABEL: vselect_nxv1i1:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
index ddb97fc7d40c8..bdd180c0e3a9e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+experimental-v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+v -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+experimental-v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+v -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare <vscale x 1 x i1> @llvm.vp.select.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>, <vscale x 1 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvl-ext.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvl-ext.ll
index 632ca9a968ea4..64b73fc19c549 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvl-ext.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvl-ext.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv64 -mattr=+experimental-v | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
 
 declare i64 @llvm.riscv.vsetvli(
   i64, i64, i64);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index e506a623b6fe5..d6ea430c36ca6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+experimental-v \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \
 ; RUN:    -verify-machineinstrs -O2 < %s | FileCheck %s
 
 ; The following tests check whether inserting VSETVLI avoids inserting
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
index f3f5e5034a824..4429e0db4ecd9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc %s -o - -mtriple=riscv64 -mattr=experimental-v \
+# RUN: llc %s -o - -mtriple=riscv64 -mattr=v \
 # RUN:     -run-pass=riscv-insert-vsetvli | FileCheck %s
 
 --- |
@@ -116,7 +116,7 @@
   ; Function Attrs: nounwind readnone
   declare <vscale x 1 x i64> @llvm.riscv.vsext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32>, i64) #1
 
-  attributes #0 = { "target-features"="+experimental-v" }
+  attributes #0 = { "target-features"="+v" }
   attributes #1 = { nounwind readnone }
   attributes #2 = { nounwind }
   attributes #3 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index 39a533ceedc6b..66878967ccb95 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+experimental-v \
+; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+a,+c,+v \
 ; RUN:    -verify-machineinstrs -O2 < %s | FileCheck %s
 
 declare i64 @llvm.riscv.vsetvli(i64, i64, i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
index 719a4844152a7..e5c85d1908c1d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc %s -o - -mtriple=riscv64 -mattr=experimental-v \
+# RUN: llc %s -o - -mtriple=riscv64 -mattr=v \
 # RUN:     -run-pass=riscv-insert-vsetvli | FileCheck %s
 
 --- |
@@ -84,7 +84,7 @@
   ; Function Attrs: nounwind readnone
   declare <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32>, i64) #1
 
-  attributes #0 = { "target-features"="+experimental-v" }
+  attributes #0 = { "target-features"="+v" }
   attributes #1 = { nounwind readnone }
   attributes #2 = { nofree nosync nounwind readnone willreturn }
   attributes #3 = { nounwind }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll
index 3c8e5d9c0d5b0..c49c69b180409 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv64 -mattr=+experimental-v | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s
 
 ; This test checks a regression in the vsetvli insertion pass. We used to
 ; prserve the VL on the second vsetvli with ratio e32/m1, when the the last
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll
index fae6c4a486e3a..72e3958f7e30e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsext-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i64> @llvm.riscv.vsext.nxv1i64.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll
index 74bca169184e3..e63fff73f538f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsext-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i64> @llvm.riscv.vsext.nxv1i64.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll
index 096944c6a219b..f4c63a2b83c08 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vshl-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i8> @vshl_vx_nxv1i8(<vscale x 1 x i8> %va, i8 signext %b) {
 ; CHECK-LABEL: vshl_vx_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll
index cdbeab6a1f707..c7d0732df226f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vshl-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 8 x i7> @llvm.vp.shl.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll
index 9674d9d711c9a..a9a9ea8019b0e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vslide1down.nxv1i8.i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv64.ll
index 677c0925c5079..2110f21bf13d7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vslide1down-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vslide1down.nxv1i8.i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll
index b48f1814a5411..c5b0bb3c07a29 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vslide1up.nxv1i8.i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll
index 8be9b2e7657a6..81c16b9c315cd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vslide1up.nxv1i8.i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv32.ll
index b78473e5e8b1c..4cd2fd28752ed 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vslidedown.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv64.ll
index 694dea11dd8c6..863c955753152 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vslidedown-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vslidedown.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vslideup-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslideup-rv32.ll
index 9d0f5de309977..3cdb4afac9692 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vslideup-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vslideup-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vslideup.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vslideup-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vslideup-rv64.ll
index 3e3bc67f4d303..a9a166c1056e9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vslideup-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vslideup-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vslideup.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll
index 437dcf6e3b322..3f555dba39c5f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsll.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll
index b63d8e31db694..8aa798d3b9e67 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsll.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll
index 2736ac8d2cc6e..3285cdbfe2741 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 
 declare void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>*, i32);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll
index 2547cf3386f18..7fd84ccf35efa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 
 declare void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>*, i64);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll
index d5ed31dbf536e..be3a442cc61b7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsmul.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll
index 4018735e756b0..3a5eb3c18f2b6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsmul.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv32.ll
index 3c589746ae044..2100af514fa3c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare void @llvm.riscv.vsoxei.nxv1i8.nxv1i32(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll
index c352ab4c96df5..a3f6637cfde47 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsoxei-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare void @llvm.riscv.vsoxei.nxv1i8.nxv1i64(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv32.ll
index a1d1170caf31c..9556358394554 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare void @llvm.riscv.vsoxseg2.nxv16i16.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, <vscale x 16 x i16>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv64.ll
index 053a3b011e9a6..289825be3546d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsoxseg-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare void @llvm.riscv.vsoxseg2.nxv16i16.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, <vscale x 16 x i16>, i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll
index 07dcb97573d12..d6a9d7dcb213d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-fp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+experimental-v -target-abi ilp32d -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+zfh,+v -target-abi ilp32d -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefix=RV32V
-; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+experimental-v -target-abi lp64d -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+zfh,+v -target-abi lp64d -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefix=RV64V
 
 define <vscale x 8 x half> @vsplat_nxv8f16(half %f) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll
index 29073eb2c8fd5..c98500a501e54 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-i1.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i1> @vsplat_nxv1i1_0() {
 ; CHECK-LABEL: vsplat_nxv1i1_0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll
index 724ffb2f65f29..14947011213f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-i64.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefix=RV32V
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefix=RV64V
 
 define <vscale x 8 x i64> @vsplat_nxv8i64_1() {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-rv32.ll
index 72803a5218646..78eac5fcc9ec7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsra-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsra-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsra.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-rv64.ll
index a0c463ea0fe93..4841ca02e382b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsra-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsra-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsra.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
index 3c6cf9e089c7b..312f1fde4936d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsra-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i8> @vsra_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vsra_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll
index 676c1631c2ad9..323368986a1a7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsra-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 8 x i7> @llvm.vp.ashr.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-rv32.ll
index 25932f4e185fd..8ae5ec19ccda0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsrl-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsrl.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-rv64.ll
index e23dec5652f1d..054d81a54db17 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsrl-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsrl.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll
index 2bf34ea07b987..d38da572389ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i8> @vsrl_vx_nxv1i8(<vscale x 1 x i8> %va, i8 signext %b) {
 ; CHECK-LABEL: vsrl_vx_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll
index 663a1aaef6f11..1361891667d32 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsrl-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 8 x i7> @llvm.vp.lshr.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsse-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsse-rv32.ll
index d26ab423eab4c..d8441bcd89cbc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsse-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsse-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare void @llvm.riscv.vsse.nxv1i64(
   <vscale x 1 x i64>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsse-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsse-rv64.ll
index f3b6bb8095067..31aebfadf3daa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsse-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsse-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare void @llvm.riscv.vsse.nxv1i64(
   <vscale x 1 x i64>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsseg-rv32.ll
index 9e556fc69e874..130e84e003ad8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsseg-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsseg-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare void @llvm.riscv.vsseg2.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16* , i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll
index a58979e050471..779848c14865e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsseg-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare void @llvm.riscv.vsseg2.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16* , i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll
index 8a22fd3f01806..9cb0c4803370c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssra-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vssra.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssra-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssra-rv64.ll
index 04cd8f2bddfad..b39bdb4bdda64 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssra-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssra-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vssra.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll
index 6d0f202499b10..173f2f863d55b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssrl-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vssrl.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssrl-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssrl-rv64.ll
index f8fd647088592..3bc2f68ebb94a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssrl-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssrl-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vssrl.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssseg-rv32.ll
index 5749bea3ae4b8..34d4482e382da 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssseg-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssseg-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare void @llvm.riscv.vssseg2.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, i32, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssseg-rv64.ll
index dd35ac4c4c7d0..c90d4140221cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssseg-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare void @llvm.riscv.vssseg2.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, i64, i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll
index 7360929828013..c695923f56a06 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vssub.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll
index 02fc13ce15e81..bff302a14b36b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vssub.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll
index bf8c0fd77304f..0ac720d1ca43f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssub-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 1 x i8> @llvm.ssub.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll
index 7f2debfec993b..2b0fcf54963c7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vssubu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll
index 923a1d32e74af..0169d78b4a798 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vssubu.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll
index 20be3f2b29c94..341ec7ae580f2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 1 x i8> @llvm.usub.sat.nxv1i8(<vscale x 1 x i8>, <vscale x 1 x i8>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll
index a49968a40720d..a9896a305516f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsub-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsub.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-rv64.ll
index ad0280ddb48b9..4840fa6030363 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsub-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vsub.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
index f9fe816659300..4744b8e2c33d7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsub-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vsub_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vsub_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll
index d4818c7514049..9af879ef4fde1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsub-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 8 x i7> @llvm.vp.sub.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv32.ll
index e6c209074e923..bcfa09f17f8d2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+d,+zfh,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare void @llvm.riscv.vsuxei.nxv1i8.nxv1i32(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll
index 8f5798e624865..614471ef46429 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsuxei-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare void @llvm.riscv.vsuxei.nxv1i8.nxv1i64(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv32.ll
index 13e4d9781b77a..f2cfbe5837b05 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare void @llvm.riscv.vsuxseg2.nxv16i16.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, <vscale x 16 x i16>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll
index 6f3c5d384a979..fbe64219a8878 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsuxseg-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve64d,+f,+d,+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zfh \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 declare void @llvm.riscv.vsuxseg2.nxv16i16.nxv16i16(<vscale x 16 x i16>,<vscale x 16 x i16>, i16*, <vscale x 16 x i16>, i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll
index 628fd4554885a..9a4b78a009e34 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i8> @vtrunc_nxv1i16_nxv1i8(<vscale x 1 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv1i16_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-rv32.ll
index f57bf32b789cd..72f559952e294 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwadd.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-rv64.ll
index 7ec1fa4e4a950..348d2b32b65c7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwadd.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
index 54dbaf8a52d35..a0e609c832f75 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i64> @vwadd_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
 ; CHECK-LABEL: vwadd_vv_nxv1i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll
index 6f711bc8b348d..6fab280f8f9b1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs -early-live-intervals < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs -early-live-intervals < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll
index 441432c5e74d9..65481d304ddf4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs -early-live-intervals < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -early-live-intervals < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv32.ll
index e42ccf233604b..f37d6ed898679 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwaddu.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv64.ll
index f9ad3eb2a24ab..6c546a504b017 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwaddu.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll
index 0ffeabaa38290..1128135e33ab4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll
index 044737e1e3007..e3840e33f81a8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll
index 6f5aafc378350..a68c26ba9180c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmacc.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll
index 261386ba8a87c..b3922ccb1ce43 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmacc-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmacc.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmacc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwmacc-sdnode.ll
index fdc1bd7364120..5cb8ce11c317b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmacc-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmacc-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -target-abi=ilp32 \
+; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -target-abi=lp64 \
+; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK
 
 define <vscale x 1 x i64> @vwmacc_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i64> %vc) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll
index 5014a750305b8..3b3ed3ce683ab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmaccsu.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll
index 250df8d77149c..07f11b1ef1615 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccsu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmaccsu.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll
index a8dc1c6087b8e..4d7057a8f81a2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmaccu.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll
index 66f2d7fc9fc65..623e00c03aadb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmaccu.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll
index 171e0d17fccc2..6a3aabba1be27 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmaccus.nxv1i16.i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll
index 40e78df76bf19..6786754645fdc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmaccus-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmaccus.nxv1i16.i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul-rv32.ll
index 47ecba522f2fe..338e4b352ddf3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmul-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmul-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmul.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmul-rv64.ll
index 05bdcf70f5480..da5accc0f1bd4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmul-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmul-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmul.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv32.ll
index 0d91f668b792d..4f480ebea8726 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmulsu.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv64.ll
index 4ac17a317522a..da7d9b97bc0ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmulsu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmulsu.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv32.ll
index 9f46f19b99386..11ac17450a4cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmulu.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv64.ll
index 99a0d2183133a..e9adaec0bdc8c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwmulu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwmulu.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv32.ll
index 3b849d9cbbe9f..13e130b7821af 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 4 x i16> @llvm.riscv.vwredsum.nxv4i16.nxv1i8(
   <vscale x 4 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv64.ll
index 09dc8c97aac88..1ea1418cbad06 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwredsum-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 4 x i16> @llvm.riscv.vwredsum.nxv4i16.nxv1i8(
   <vscale x 4 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv32.ll
index c3ad4998d6492..f6cde08be1175 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 4 x i16> @llvm.riscv.vwredsumu.nxv4i16.nxv1i8(
   <vscale x 4 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv64.ll
index 62232b11f8c4c..c1776201926cc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwredsumu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 4 x i16> @llvm.riscv.vwredsumu.nxv4i16.nxv1i8(
   <vscale x 4 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-rv32.ll
index 82b59dc788f5a..ef27184103d60 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwsub.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-rv64.ll
index 9aac53ae44b5c..2de365ecb1218 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwsub.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
index 36f9305504aa8..a2fef3b541d56 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 define <vscale x 1 x i64> @vwsub_vv_nxv1i64(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb) {
 ; CHECK-LABEL: vwsub_vv_nxv1i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll
index 42c6411fa4d65..213cb69a9a6a2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll
index 1257f5a546307..6e8aff5e81593 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv32.ll
index acee77f9bbd01..603f685d4c7e6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwsubu.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv64.ll
index 65a7c23e6225c..3c4cc0283cc0d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwsubu.nxv1i16.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll
index 7a4ac1c97e4be..1406f14de4422 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll
index 4de4be003f58a..d504b680f2a6f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8(
   <vscale x 1 x i16>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll
index 1af276b68441b..8da7f7acb2e62 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxor-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vxor.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll
index f7f5819bad2d9..08c47cd6c17fe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxor-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vxor.nxv1i8.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
index 5afb940d9d075..be0f5d96ce3a4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxor-sdnode.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <vscale x 1 x i8> @vxor_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vxor_vv_nxv1i8:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll
index 975c4e926fbd1..800da27d71f0e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxor-vp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 8 x i7> @llvm.vp.xor.nxv8i7(<vscale x 8 x i7>, <vscale x 8 x i7>, <vscale x 8 x i1>, i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll
index 4e826daa382be..d84527c161c97 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vzext-rv32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll
index f48d607d984e0..dbd58c5650a2b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vzext-rv64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i8(
   <vscale x 1 x i8>,
diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir
index 08a3df1f0b44b..21fef81e60d9e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv32.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-# RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v  -o - %s \
+# RUN: llc -mtriple=riscv32 -mattr=+m,+v  -o - %s \
 # RUN:   -start-before=prologepilog | FileCheck %s
 #
 # This test checks that we are assigning the right stack slot to GPRs and to
diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir
index ce0497adaa39f..994857ba81cb3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/wrong-stack-slot-rv64.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-# RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v  -o - %s \
+# RUN: llc -mtriple=riscv64 -mattr=+m,+v  -o - %s \
 # RUN:   -start-before=prologepilog | FileCheck %s
 #
 # This test checks that we are assigning the right stack slot to GPRs and to
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir
index 2bc83469896cc..66b320a42b02e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple riscv64 -mattr=+experimental-v -verify-machineinstrs -run-pass=postrapseudos %s -o - | FileCheck %s
+# RUN: llc -mtriple riscv64 -mattr=+v -verify-machineinstrs -run-pass=postrapseudos %s -o - | FileCheck %s
 
 ...
 ---
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
index 09b8eaccdab0c..8ed9afd836c9a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=riscv64 -mattr=+experimental-v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
+# RUN: llc -march=riscv64 -mattr=+v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
 
 --- |
   target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll
index a96de294b270d..0420f8bf77240 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-zve32x \
+; RUN: llc -mtriple=riscv64 -mattr=+zve32x \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s
 
 ; Make sure we don't select a 0 vl to X0 in the custom isel handlers we use
diff --git a/llvm/test/CodeGen/RISCV/scalable-vector-struct.ll b/llvm/test/CodeGen/RISCV/scalable-vector-struct.ll
index ca71df0713cb1..030555ca76c1f 100644
--- a/llvm/test/CodeGen/RISCV/scalable-vector-struct.ll
+++ b/llvm/test/CodeGen/RISCV/scalable-vector-struct.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
 
 ; This demonstrates that we can pass a struct containing scalable vectors across
 ; a basic block.
diff --git a/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll b/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll
index 4c3f58432c134..fcf6a8c2b0e0a 100644
--- a/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll
+++ b/llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+zfh \
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh \
 ; RUN:   -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s
 
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 2b7f579c9b2ce..d103ca03cbf45 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -3,8 +3,8 @@
 ; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64
 ; RUN: llc -mtriple=riscv32 -mattr=+m < %s | FileCheck %s --check-prefixes=RV32M
 ; RUN: llc -mtriple=riscv64 -mattr=+m < %s | FileCheck %s --check-prefixes=RV64M
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV32MV
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV64MV
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV32MV
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV64MV
 
 define i1 @test_srem_odd(i29 %X) nounwind {
 ; RV32-LABEL: test_srem_odd:
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index e0de325fd5359..c7e93bea08b9b 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -3,8 +3,8 @@
 ; RUN: llc -mtriple=riscv64 < %s | FileCheck %s --check-prefixes=RV64
 ; RUN: llc -mtriple=riscv32 -mattr=+m < %s | FileCheck %s --check-prefixes=RV32M
 ; RUN: llc -mtriple=riscv64 -mattr=+m < %s | FileCheck %s --check-prefixes=RV64M
-; RUN: llc -mtriple=riscv32 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV32MV
-; RUN: llc -mtriple=riscv64 -mattr=+m,+experimental-v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV64MV
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV32MV
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 < %s | FileCheck %s --check-prefixes=RV64MV
 
 define i1 @test_urem_odd(i13 %X) nounwind {
 ; RV32-LABEL: test_urem_odd:
diff --git a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll
index 1b81beb113ecd..e161e5bb92e1a 100644
--- a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll
+++ b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:     | FileCheck %s --check-prefix=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
 ; RUN:     | FileCheck %s --check-prefix=RV64
 
 ; This test would lead one of the DAGCombiner's visitVSELECT optimizations to
diff --git a/llvm/test/MC/RISCV/attribute-arch-invalid.s b/llvm/test/MC/RISCV/attribute-arch-invalid.s
index 1dd5621d128ba..d1dc71d4982f3 100644
--- a/llvm/test/MC/RISCV/attribute-arch-invalid.s
+++ b/llvm/test/MC/RISCV/attribute-arch-invalid.s
@@ -5,9 +5,6 @@
 
 ## Version strings are required for experimental extensions
 
-.attribute arch, "rv32iv"
-# CHECK: error: invalid arch name 'rv32iv', experimental extension requires explicit version number `v`
-
 .attribute arch, "rv32izbe"
 # CHECK:  error: invalid arch name 'rv32izbe', experimental extension requires explicit version number `zbe`
 
@@ -25,6 +22,3 @@
 
 .attribute arch, "rv32izbt"
 # CHECK: error: invalid arch name 'rv32izbt', experimental extension requires explicit version number `zbt`
-
-.attribute arch, "rv32iv"
-# CHECK: error: invalid arch name 'rv32iv', experimental extension requires explicit version number `v`
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index 65d4008a5869e..15ee933dcdd1d 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -33,11 +33,62 @@
 .attribute arch, "rv32ima2p0_fdc"
 # CHECK: attribute      5, "rv32i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
 
-## Experimental extensions require version string to be explicitly specified
+.attribute arch, "rv32iv"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 
-.attribute arch, "rv32iv1p0"
+.attribute arch, "rv32ivzvl32b"
 # CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 
+.attribute arch, "rv32ivzvl64b"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
+
+.attribute arch, "rv32ivzvl128b"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
+
+.attribute arch, "rv32ivzvl256b"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl64b1p0"
+
+.attribute arch, "rv32ivzvl512b"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
+
+.attribute arch, "rv32ivzvl1024b"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
+
+.attribute arch, "rv32ivzvl2048b"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
+
+.attribute arch, "rv32ivzvl4096b"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0"
+
+.attribute arch, "rv32ivzvl8192b"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
+
+.attribute arch, "rv32ivzvl16384b"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
+
+.attribute arch, "rv32ivzvl32768b"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
+
+.attribute arch, "rv32ivzvl65536b"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl65536b1p0_zvl8192b1p0"
+
+.attribute arch, "rv32izve32x"
+# CHECK: attribute      5, "rv32i2p0_zve32x1p0_zvl32b1p0"
+
+.attribute arch, "rv32ifzve32f"
+# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f1p0_zve32x1p0_zvl32b1p0"
+
+.attribute arch, "rv32izve64x"
+# CHECK: attribute      5, "rv32i2p0_zve32x1p0_zve64x1p0_zvl32b1p0_zvl64b1p0"
+
+.attribute arch, "rv32ifzve64f"
+# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f1p0_zve32x1p0_zve64f1p0_zve64x1p0_zvl32b1p0_zvl64b1p0"
+
+.attribute arch, "rv32ifdzve64d"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl32b1p0_zvl64b1p0"
+
+## Experimental extensions require version string to be explicitly specified
+
 .attribute arch, "rv32izba1p0"
 # CHECK: attribute      5, "rv32i2p0_zba1p0"
 
@@ -74,60 +125,6 @@
 .attribute arch, "rv32ifzfh1p0"
 # CHECK: attribute      5, "rv32i2p0_f2p0_zfh1p0_zfhmin1p0"
 
-.attribute arch, "rv32iv1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
-
-.attribute arch, "rv32iv1p0zvl32b1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
-
-.attribute arch, "rv32iv1p0zvl64b1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
-
-.attribute arch, "rv32iv1p0zvl128b1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
-
-.attribute arch, "rv32iv1p0zvl256b1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl64b1p0"
-
-.attribute arch, "rv32iv1p0zvl512b1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
-
-.attribute arch, "rv32iv1p0zvl1024b1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
-
-.attribute arch, "rv32iv1p0zvl2048b1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
-
-.attribute arch, "rv32iv1p0zvl4096b1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0"
-
-.attribute arch, "rv32iv1p0zvl8192b1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
-
-.attribute arch, "rv32iv1p0zvl16384b1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
-
-.attribute arch, "rv32iv1p0zvl32768b1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
-
-.attribute arch, "rv32iv1p0zvl65536b1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl65536b1p0_zvl8192b1p0"
-
-.attribute arch, "rv32i_zve32x1p0"
-# CHECK: attribute      5, "rv32i2p0_zve32x1p0_zvl32b1p0"
-
-.attribute arch, "rv32if_zve32f1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f1p0_zve32x1p0_zvl32b1p0"
-
-.attribute arch, "rv32i_zve64x1p0"
-# CHECK: attribute      5, "rv32i2p0_zve32x1p0_zve64x1p0_zvl32b1p0_zvl64b1p0"
-
-.attribute arch, "rv32if_zve64f1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_zve32f1p0_zve32x1p0_zve64f1p0_zve64x1p0_zvl32b1p0_zvl64b1p0"
-
-.attribute arch, "rv32ifd_zve64d1p0"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl32b1p0_zvl64b1p0"
-
 .attribute arch, "rv32i_zbkb1p0"
 # CHECK: attribute      5, "rv32i2p0_zbkb1p0"
 
diff --git a/llvm/test/MC/RISCV/rvv/add.s b/llvm/test/MC/RISCV/rvv/add.s
index 18143ff0d738c..17e84b00cce3d 100644
--- a/llvm/test/MC/RISCV/rvv/add.s
+++ b/llvm/test/MC/RISCV/rvv/add.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vadd.vv v8, v4, v20, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/aliases.s b/llvm/test/MC/RISCV/rvv/aliases.s
index 3bf55fa405fe3..0dadeb1b1db19 100644
--- a/llvm/test/MC/RISCV/rvv/aliases.s
+++ b/llvm/test/MC/RISCV/rvv/aliases.s
@@ -1,6 +1,6 @@
-# RUN: llvm-mc --triple=riscv64 -mattr +experimental-v < %s --show-encoding 2>&1 \
+# RUN: llvm-mc --triple=riscv64 -mattr +v < %s --show-encoding 2>&1 \
 # RUN:   -mattr +d | FileCheck --check-prefix=ALIAS %s
-# RUN: llvm-mc --triple=riscv64 -mattr=+experimental-v --riscv-no-aliases < %s \
+# RUN: llvm-mc --triple=riscv64 -mattr=+v --riscv-no-aliases < %s \
 # RUN:   -mattr +d --show-encoding 2>&1 | FileCheck --check-prefix=NO-ALIAS %s
 
 # ALIAS:    vwcvt.x.x.v     v2, v1, v0.t    # encoding: [0x57,0x61,0x10,0xc4]
diff --git a/llvm/test/MC/RISCV/rvv/and.s b/llvm/test/MC/RISCV/rvv/and.s
index e098ff1f2f8f3..c6f3565042662 100644
--- a/llvm/test/MC/RISCV/rvv/and.s
+++ b/llvm/test/MC/RISCV/rvv/and.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vand.vv v8, v4, v20, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/clip.s b/llvm/test/MC/RISCV/rvv/clip.s
index 04cbe5eab49a2..5ec62dafae08c 100644
--- a/llvm/test/MC/RISCV/rvv/clip.s
+++ b/llvm/test/MC/RISCV/rvv/clip.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vnclipu.wv v8, v4, v20, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/compare.s b/llvm/test/MC/RISCV/rvv/compare.s
index 6258c46d05ea1..89f19c5af2550 100644
--- a/llvm/test/MC/RISCV/rvv/compare.s
+++ b/llvm/test/MC/RISCV/rvv/compare.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vmslt.vv v0, v4, v20, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/convert.s b/llvm/test/MC/RISCV/rvv/convert.s
index 3c45e79acf155..0a2d113b238fd 100644
--- a/llvm/test/MC/RISCV/rvv/convert.s
+++ b/llvm/test/MC/RISCV/rvv/convert.s
@@ -1,13 +1,13 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \
+# RUN:        | llvm-objdump -d --mattr=+v --mattr=+f - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
diff --git a/llvm/test/MC/RISCV/rvv/div.s b/llvm/test/MC/RISCV/rvv/div.s
index 47d88daace2b7..6aed20f03d24c 100644
--- a/llvm/test/MC/RISCV/rvv/div.s
+++ b/llvm/test/MC/RISCV/rvv/div.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vdivu.vv v8, v4, v20, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/ext.s b/llvm/test/MC/RISCV/rvv/ext.s
index e095a1c3edb2c..c0cd7341fef2d 100644
--- a/llvm/test/MC/RISCV/rvv/ext.s
+++ b/llvm/test/MC/RISCV/rvv/ext.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vzext.vf2 v8, v4, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/fadd.s b/llvm/test/MC/RISCV/rvv/fadd.s
index fbb8850768ca3..acab959b3ade3 100644
--- a/llvm/test/MC/RISCV/rvv/fadd.s
+++ b/llvm/test/MC/RISCV/rvv/fadd.s
@@ -1,13 +1,13 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \
+# RUN:        | llvm-objdump -d --mattr=+v --mattr=+f - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
diff --git a/llvm/test/MC/RISCV/rvv/fcompare.s b/llvm/test/MC/RISCV/rvv/fcompare.s
index e1c89969bfc56..5716736041c78 100644
--- a/llvm/test/MC/RISCV/rvv/fcompare.s
+++ b/llvm/test/MC/RISCV/rvv/fcompare.s
@@ -1,13 +1,13 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \
+# RUN:        | llvm-objdump -d --mattr=+v --mattr=+f - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
diff --git a/llvm/test/MC/RISCV/rvv/fdiv.s b/llvm/test/MC/RISCV/rvv/fdiv.s
index ea14ef03e054b..a23e4820179f9 100644
--- a/llvm/test/MC/RISCV/rvv/fdiv.s
+++ b/llvm/test/MC/RISCV/rvv/fdiv.s
@@ -1,13 +1,13 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \
+# RUN:        | llvm-objdump -d --mattr=+v --mattr=+f - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
diff --git a/llvm/test/MC/RISCV/rvv/fmacc.s b/llvm/test/MC/RISCV/rvv/fmacc.s
index 032b79ee6ce5c..b59da7a72525f 100644
--- a/llvm/test/MC/RISCV/rvv/fmacc.s
+++ b/llvm/test/MC/RISCV/rvv/fmacc.s
@@ -1,13 +1,13 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \
+# RUN:        | llvm-objdump -d --mattr=+v --mattr=+f - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
diff --git a/llvm/test/MC/RISCV/rvv/fminmax.s b/llvm/test/MC/RISCV/rvv/fminmax.s
index 6734b2a14a7a7..c7fe5c4926db8 100644
--- a/llvm/test/MC/RISCV/rvv/fminmax.s
+++ b/llvm/test/MC/RISCV/rvv/fminmax.s
@@ -1,13 +1,13 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \
+# RUN:        | llvm-objdump -d --mattr=+v --mattr=+f - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
diff --git a/llvm/test/MC/RISCV/rvv/fmul.s b/llvm/test/MC/RISCV/rvv/fmul.s
index 68d9610f67f39..4907fa0b25d52 100644
--- a/llvm/test/MC/RISCV/rvv/fmul.s
+++ b/llvm/test/MC/RISCV/rvv/fmul.s
@@ -1,13 +1,13 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \
+# RUN:        | llvm-objdump -d --mattr=+v --mattr=+f - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
diff --git a/llvm/test/MC/RISCV/rvv/fmv.s b/llvm/test/MC/RISCV/rvv/fmv.s
index a2d915a71a365..f8a1e3e7111b6 100644
--- a/llvm/test/MC/RISCV/rvv/fmv.s
+++ b/llvm/test/MC/RISCV/rvv/fmv.s
@@ -1,13 +1,13 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \
+# RUN:        | llvm-objdump -d --mattr=+v --mattr=+f - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
diff --git a/llvm/test/MC/RISCV/rvv/fothers.s b/llvm/test/MC/RISCV/rvv/fothers.s
index abed79070f867..214d03030cca2 100644
--- a/llvm/test/MC/RISCV/rvv/fothers.s
+++ b/llvm/test/MC/RISCV/rvv/fothers.s
@@ -1,12 +1,12 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:   --mattr=+f --riscv-no-aliases \
 # RUN:   | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:   | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:   --mattr=+f | llvm-objdump -d --mattr=+experimental-v --mattr=+f -M no-aliases - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:   --mattr=+f | llvm-objdump -d --mattr=+v --mattr=+f -M no-aliases - \
 # RUN:   | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:   --mattr=+f | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vfsqrt.v v8, v4, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/freduction.s b/llvm/test/MC/RISCV/rvv/freduction.s
index f093fef0a2e3f..09e074743df26 100644
--- a/llvm/test/MC/RISCV/rvv/freduction.s
+++ b/llvm/test/MC/RISCV/rvv/freduction.s
@@ -1,13 +1,13 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:         --mattr=+f --riscv-no-aliases \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v --mattr=+f \
+# RUN:        | llvm-objdump -d --mattr=+v --mattr=+f \
 # RUN:        -M no-aliases - | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
diff --git a/llvm/test/MC/RISCV/rvv/fsub.s b/llvm/test/MC/RISCV/rvv/fsub.s
index 82f4babb4d7e9..dca6d8d7d65ba 100644
--- a/llvm/test/MC/RISCV/rvv/fsub.s
+++ b/llvm/test/MC/RISCV/rvv/fsub.s
@@ -1,13 +1,13 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \
+# RUN:        | llvm-objdump -d --mattr=+v --mattr=+f - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
diff --git a/llvm/test/MC/RISCV/rvv/invalid-eew.s b/llvm/test/MC/RISCV/rvv/invalid-eew.s
index 65894831006e8..df336ca3b3e23 100644
--- a/llvm/test/MC/RISCV/rvv/invalid-eew.s
+++ b/llvm/test/MC/RISCV/rvv/invalid-eew.s
@@ -1,4 +1,4 @@
-# RUN: not llvm-mc -triple=riscv32 --mattr=+experimental-zve32x %s 2>&1 \
+# RUN: not llvm-mc -triple=riscv32 --mattr=+zve32x %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
 
 vluxei64.v v8, (a0), v4, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/invalid.s b/llvm/test/MC/RISCV/rvv/invalid.s
index 29da69fabaa92..41f870963ef8d 100644
--- a/llvm/test/MC/RISCV/rvv/invalid.s
+++ b/llvm/test/MC/RISCV/rvv/invalid.s
@@ -1,4 +1,4 @@
-# RUN: not llvm-mc -triple=riscv64 --mattr=+experimental-v --mattr=+f %s 2>&1 \
+# RUN: not llvm-mc -triple=riscv64 --mattr=+v --mattr=+f %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
 
 vsetivli a2, 32, e8,m1
diff --git a/llvm/test/MC/RISCV/rvv/load.s b/llvm/test/MC/RISCV/rvv/load.s
index e0991eec87874..689396b55c8a2 100644
--- a/llvm/test/MC/RISCV/rvv/load.s
+++ b/llvm/test/MC/RISCV/rvv/load.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:   --riscv-no-aliases | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:   | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:   | llvm-objdump -d --mattr=+experimental-v -M no-aliases - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:   | llvm-objdump -d --mattr=+v -M no-aliases - \
 # RUN:   | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:   | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vlm.v v0, (a0)
diff --git a/llvm/test/MC/RISCV/rvv/macc.s b/llvm/test/MC/RISCV/rvv/macc.s
index e7cd4e6502aad..6cc2219220902 100644
--- a/llvm/test/MC/RISCV/rvv/macc.s
+++ b/llvm/test/MC/RISCV/rvv/macc.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vmacc.vv v8, v20, v4, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/mask.s b/llvm/test/MC/RISCV/rvv/mask.s
index 797679154bb37..8f4b4ef64b2e9 100644
--- a/llvm/test/MC/RISCV/rvv/mask.s
+++ b/llvm/test/MC/RISCV/rvv/mask.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vmand.mm v8, v4, v20
diff --git a/llvm/test/MC/RISCV/rvv/minmax.s b/llvm/test/MC/RISCV/rvv/minmax.s
index 9829fce067e1f..dce30d189c6c7 100644
--- a/llvm/test/MC/RISCV/rvv/minmax.s
+++ b/llvm/test/MC/RISCV/rvv/minmax.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vminu.vv v8, v4, v20, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/mul.s b/llvm/test/MC/RISCV/rvv/mul.s
index 9ee35101d2dad..b5294867de248 100644
--- a/llvm/test/MC/RISCV/rvv/mul.s
+++ b/llvm/test/MC/RISCV/rvv/mul.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vmul.vv v8, v4, v20, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/mv.s b/llvm/test/MC/RISCV/rvv/mv.s
index 05f7e1df7ae2a..62e1c7119033e 100644
--- a/llvm/test/MC/RISCV/rvv/mv.s
+++ b/llvm/test/MC/RISCV/rvv/mv.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vmv.v.v v8, v20
diff --git a/llvm/test/MC/RISCV/rvv/or.s b/llvm/test/MC/RISCV/rvv/or.s
index ffcf0c0ad39fe..891c313bd90d0 100644
--- a/llvm/test/MC/RISCV/rvv/or.s
+++ b/llvm/test/MC/RISCV/rvv/or.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vor.vv v8, v4, v20, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/others.s b/llvm/test/MC/RISCV/rvv/others.s
index bbd797299f366..06c62a700bc2d 100644
--- a/llvm/test/MC/RISCV/rvv/others.s
+++ b/llvm/test/MC/RISCV/rvv/others.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:   --riscv-no-aliases | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:   | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:   | llvm-objdump -d --mattr=+experimental-v -M no-aliases - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:   | llvm-objdump -d --mattr=+v -M no-aliases - \
 # RUN:   | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:   | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vmerge.vvm v8, v4, v20, v0
diff --git a/llvm/test/MC/RISCV/rvv/reduction.s b/llvm/test/MC/RISCV/rvv/reduction.s
index 1c4ccac75a026..bacb7e6274504 100644
--- a/llvm/test/MC/RISCV/rvv/reduction.s
+++ b/llvm/test/MC/RISCV/rvv/reduction.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vredsum.vs v8, v4, v20, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/shift.s b/llvm/test/MC/RISCV/rvv/shift.s
index 64126367f469b..9b5b77e99ed1a 100644
--- a/llvm/test/MC/RISCV/rvv/shift.s
+++ b/llvm/test/MC/RISCV/rvv/shift.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vsll.vv v8, v4, v20, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/sign-injection.s b/llvm/test/MC/RISCV/rvv/sign-injection.s
index 0b3df8c21d06d..9607d5936d42b 100644
--- a/llvm/test/MC/RISCV/rvv/sign-injection.s
+++ b/llvm/test/MC/RISCV/rvv/sign-injection.s
@@ -1,13 +1,13 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v --mattr=+f - \
+# RUN:        | llvm-objdump -d --mattr=+v --mattr=+f - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:         --mattr=+f \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
diff --git a/llvm/test/MC/RISCV/rvv/snippet.s b/llvm/test/MC/RISCV/rvv/snippet.s
index a5c19b0f18b5a..0f43f8b6e8897 100644
--- a/llvm/test/MC/RISCV/rvv/snippet.s
+++ b/llvm/test/MC/RISCV/rvv/snippet.s
@@ -1,7 +1,7 @@
 ## A snippet from https://github.com/riscv/riscv-v-spec.
 
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v < %s \
-# RUN:   | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v < %s \
+# RUN:   | llvm-objdump -d --mattr=+v - \
 # RUN:   | FileCheck %s --check-prefix=CHECK-INST
 
 loop:
diff --git a/llvm/test/MC/RISCV/rvv/store.s b/llvm/test/MC/RISCV/rvv/store.s
index 2e502d3fbec88..aa8c9e2e38d41 100644
--- a/llvm/test/MC/RISCV/rvv/store.s
+++ b/llvm/test/MC/RISCV/rvv/store.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:   --riscv-no-aliases | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:   | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:   | llvm-objdump -d --mattr=+experimental-v -M no-aliases - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:   | llvm-objdump -d --mattr=+v -M no-aliases - \
 # RUN:   | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:   | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vsm.v v24, (a0)
diff --git a/llvm/test/MC/RISCV/rvv/sub.s b/llvm/test/MC/RISCV/rvv/sub.s
index 67f10b2f16298..a1d4a5563fded 100644
--- a/llvm/test/MC/RISCV/rvv/sub.s
+++ b/llvm/test/MC/RISCV/rvv/sub.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vsub.vv v8, v4, v20, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/vsetvl-invalid.s b/llvm/test/MC/RISCV/rvv/vsetvl-invalid.s
index d0b401da98b37..50ed09ba9629d 100644
--- a/llvm/test/MC/RISCV/rvv/vsetvl-invalid.s
+++ b/llvm/test/MC/RISCV/rvv/vsetvl-invalid.s
@@ -1,7 +1,7 @@
 # RUN: llvm-mc -filetype=obj -triple=riscv32 %s \
-# RUN:     | llvm-objdump -d --mattr=+experimental-v - | FileCheck %s
+# RUN:     | llvm-objdump -d --mattr=+v - | FileCheck %s
 # RUN: llvm-mc -filetype=obj -triple=riscv64 %s \
-# RUN:     | llvm-objdump -d --mattr=+experimental-v - | FileCheck %s
+# RUN:     | llvm-objdump -d --mattr=+v - | FileCheck %s
 
 # CHECK: vsetvli a1, a0, e64, m1, tu, mu
 .word 0x018575d7
diff --git a/llvm/test/MC/RISCV/rvv/vsetvl.s b/llvm/test/MC/RISCV/rvv/vsetvl.s
index 853b12285e725..0d0233f148ba2 100644
--- a/llvm/test/MC/RISCV/rvv/vsetvl.s
+++ b/llvm/test/MC/RISCV/rvv/vsetvl.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 # reserved filed: vlmul[2:0]=4, vsew[2:0]=0b1xx, non-zero bits 8/9/10.
diff --git a/llvm/test/MC/RISCV/rvv/xor.s b/llvm/test/MC/RISCV/rvv/xor.s
index b08fb1b662335..436d4505d7e99 100644
--- a/llvm/test/MC/RISCV/rvv/xor.s
+++ b/llvm/test/MC/RISCV/rvv/xor.s
@@ -1,11 +1,11 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:        | llvm-objdump -d --mattr=+experimental-v - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:        | llvm-objdump -d --mattr=+v - \
 # RUN:        | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vxor.vv v8, v4, v20, v0.t
diff --git a/llvm/test/MC/RISCV/rvv/zvlsseg.s b/llvm/test/MC/RISCV/rvv/zvlsseg.s
index 6845839fcaa33..608646164a2c1 100644
--- a/llvm/test/MC/RISCV/rvv/zvlsseg.s
+++ b/llvm/test/MC/RISCV/rvv/zvlsseg.s
@@ -1,12 +1,12 @@
-# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v %s \
 # RUN:   --riscv-no-aliases \
 # RUN:   | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
 # RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
 # RUN:   | FileCheck %s --check-prefix=CHECK-ERROR
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
-# RUN:   | llvm-objdump -d --mattr=+experimental-v -M no-aliases - \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
+# RUN:   | llvm-objdump -d --mattr=+v -M no-aliases - \
 # RUN:   | FileCheck %s --check-prefix=CHECK-INST
-# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+experimental-v %s \
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v %s \
 # RUN:   | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
 
 vlseg2e8.v v8, (a0), v0.t
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
index 802b251d59ed0..05c270c5f7744 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -loop-vectorize -mtriple=riscv32 -mattr=+experimental-v,+d -riscv-v-vector-bits-min=256 -S | FileCheck %s -check-prefixes=RV32
-; RUN: opt < %s -loop-vectorize -mtriple=riscv64 -mattr=+experimental-v,+d -riscv-v-vector-bits-min=256 -S | FileCheck %s -check-prefixes=RV64
+; RUN: opt < %s -loop-vectorize -mtriple=riscv32 -mattr=+v,+d -riscv-v-vector-bits-min=256 -S | FileCheck %s -check-prefixes=RV32
+; RUN: opt < %s -loop-vectorize -mtriple=riscv64 -mattr=+v,+d -riscv-v-vector-bits-min=256 -S | FileCheck %s -check-prefixes=RV64
 
 ; The source code:
 ;
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
index 4d6508b683770..fec0b66127039 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
@@ -1,18 +1,18 @@
 ; REQUIRES: asserts
 ; RUN: opt -loop-vectorize -mtriple riscv64-linux-gnu \
-; RUN:   -mattr=+experimental-v,+d -debug-only=loop-vectorize \
+; RUN:   -mattr=+v,+d -debug-only=loop-vectorize \
 ; RUN:   -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=1 \
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL1
 ; RUN: opt -loop-vectorize -mtriple riscv64-linux-gnu \
-; RUN:   -mattr=+experimental-v,+d -debug-only=loop-vectorize \
+; RUN:   -mattr=+v,+d -debug-only=loop-vectorize \
 ; RUN:   -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 \
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL2
 ; RUN: opt -loop-vectorize -mtriple riscv64-linux-gnu \
-; RUN:   -mattr=+experimental-v,+d -debug-only=loop-vectorize \
+; RUN:   -mattr=+v,+d -debug-only=loop-vectorize \
 ; RUN:   -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=4 \
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL4
 ; RUN: opt -loop-vectorize -mtriple riscv64-linux-gnu \
-; RUN:   -mattr=+experimental-v,+d -debug-only=loop-vectorize \
+; RUN:   -mattr=+v,+d -debug-only=loop-vectorize \
 ; RUN:   -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=8 \
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL8
 
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll
index 0eaa879fb8ff4..325983fb6036a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-interleaved.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
 ; RUN: opt -loop-vectorize -dce -instcombine -mtriple riscv64-linux-gnu \
-; RUN:   -mattr=+experimental-v -debug-only=loop-vectorize \
+; RUN:   -mattr=+v -debug-only=loop-vectorize \
 ; RUN:   -riscv-v-vector-bits-min=128 -S < %s 2>&1 | FileCheck %s
 
 ; CHECK-LABEL: foo
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll
index 49007beb5714a..e26638adc3ff6 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-unroll.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -S | FileCheck %s --check-prefix=LMUL1
-; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -S | FileCheck %s --check-prefix=LMUL1
-; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv64 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 -S | FileCheck %s --check-prefix=LMUL2
-; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv32 -mattr=+experimental-v -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 -S | FileCheck %s --check-prefix=LMUL2
+; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -S | FileCheck %s --check-prefix=LMUL1
+; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -S | FileCheck %s --check-prefix=LMUL1
+; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 -S | FileCheck %s --check-prefix=LMUL2
+; RUN: opt < %s -loop-vectorize -force-target-max-vector-interleave=1 -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-register-bit-width-lmul=2 -S | FileCheck %s --check-prefix=LMUL2
 
 ; Function Attrs: nounwind
 define i32* @array_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* %c, i32 %size) {
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll
index 75ebccc46a74b..2a529cc653d6f 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-reductions.ll
@@ -2,7 +2,7 @@
 ; RUN:   -riscv-v-vector-bits-min=128 -riscv-v-vector-bits-max=128 \
 ; RUN:   -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize \
 ; RUN:   -pass-remarks-missed=loop-vectorize -mtriple riscv64-linux-gnu \
-; RUN:   -mattr=+experimental-v,+f -S 2>%t | FileCheck %s -check-prefix=CHECK
+; RUN:   -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=CHECK
 ; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK
 
 ; Reduction can be vectorized
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
index 57c612e813897..0c32391a93ad3 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/scalable-vf-hint.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=riscv64 -mattr=+m,+experimental-v -loop-vectorize \
+; RUN: opt -mtriple=riscv64 -mattr=+m,+v -loop-vectorize \
 ; RUN:   -riscv-v-vector-bits-max=512 -S -scalable-vectorization=on < %s 2>&1 \
 ; RUN:   | FileCheck %s
 
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll
index 3349d4554491f..6bd40976e71cd 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/unroll-in-loop-vectorizer.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -mtriple=riscv64 -mattr=+experimental-v -loop-vectorize < %s | FileCheck %s
+; RUN: opt -S -mtriple=riscv64 -mattr=+v -loop-vectorize < %s | FileCheck %s
 
 ; Make sure we don't unroll scalar loops in the loop vectorizer.
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll
index b40562f8f5768..4711e590a0004 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/rvv-min-vector-size.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -slp-vectorizer -mtriple=riscv64 -mattr=+experimental-v \
+; RUN: opt < %s -slp-vectorizer -mtriple=riscv64 -mattr=+v \
 ; RUN: -riscv-v-vector-bits-min=128 -S | FileCheck %s --check-prefixes=CHECK,CHECK-128
-; RUN: opt < %s -slp-vectorizer -mtriple=riscv64 -mattr=+experimental-v \
+; RUN: opt < %s -slp-vectorizer -mtriple=riscv64 -mattr=+v \
 ; RUN: -riscv-v-vector-bits-min=256 -S | FileCheck %s --check-prefixes=CHECK,CHECK-256
-; RUN: opt < %s -slp-vectorizer -mtriple=riscv64 -mattr=+experimental-v \
+; RUN: opt < %s -slp-vectorizer -mtriple=riscv64 -mattr=+v \
 ; RUN: -riscv-v-vector-bits-min=512 -S | FileCheck %s --check-prefixes=CHECK,CHECK-512
 
 target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"

From 85e42db1b6db16a2dab4405604971d84899612bf Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sat, 22 Jan 2022 12:10:57 -0800
Subject: [PATCH 269/946] [RISCV] Merge some rvv intrinsic test cases that only
 differ by XLen type.

Instead of having a test for i32 XLen and i64 XLen, use sed to
replace iXLen with i32/i64 before running llc.

This change updates tests for intrinsics that operate exclusively
on mask values. It removes over 4000 lines worth of test content.
More merging will come in future changes.

Differential Revision: https://reviews.llvm.org/D117968
---
 llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll     | 282 ------
 llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll     | 282 ------
 llvm/test/CodeGen/RISCV/rvv/vcpop.ll          | 284 ++++++
 llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll    | 282 ------
 llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll    | 282 ------
 llvm/test/CodeGen/RISCV/rvv/vfirst.ll         | 284 ++++++
 llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll       | 758 ---------------
 .../CodeGen/RISCV/rvv/{vid-rv32.ll => vid.ll} | 258 ++---
 llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll     | 882 ------------------
 .../RISCV/rvv/{viota-rv32.ll => viota.ll}     | 270 +++---
 llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll       |  94 --
 .../CodeGen/RISCV/rvv/{vlm-rv32.ll => vlm.ll} |  48 +-
 llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll     | 142 ---
 .../RISCV/rvv/{vmand-rv32.ll => vmand.ll}     |  48 +-
 llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll    | 142 ---
 .../RISCV/rvv/{vmandn-rv32.ll => vmandn.ll}   |  48 +-
 llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll     | 114 ---
 .../RISCV/rvv/{vmclr-rv32.ll => vmclr.ll}     |  48 +-
 llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll    | 142 ---
 .../RISCV/rvv/{vmnand-rv32.ll => vmnand.ll}   |  48 +-
 llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll     | 142 ---
 .../RISCV/rvv/{vmnor-rv32.ll => vmnor.ll}     |  48 +-
 llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll      | 142 ---
 .../RISCV/rvv/{vmor-rv32.ll => vmor.ll}       |  48 +-
 llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll     | 142 ---
 .../RISCV/rvv/{vmorn-rv32.ll => vmorn.ll}     |  48 +-
 llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll     | 114 ---
 .../RISCV/rvv/{vmset-rv32.ll => vmset.ll}     |  48 +-
 llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll     | 296 ------
 .../RISCV/rvv/{vmsif-rv32.ll => vmsif.ll}     |  90 +-
 llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll     | 296 ------
 .../RISCV/rvv/{vmsof-rv32.ll => vmsof.ll}     |  90 +-
 llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll    | 142 ---
 .../RISCV/rvv/{vmxnor-rv32.ll => vmxnor.ll}   |  48 +-
 llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll     | 142 ---
 .../RISCV/rvv/{vmxor-rv32.ll => vmxor.ll}     |  48 +-
 llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll       | 137 ---
 .../CodeGen/RISCV/rvv/{vsm-rv32.ll => vsm.ll} |  64 +-
 38 files changed, 1234 insertions(+), 5589 deletions(-)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/vcpop.ll
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/vfirst.ll
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vid-rv32.ll => vid.ll} (81%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{viota-rv32.ll => viota.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vlm-rv32.ll => vlm.ll} (80%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vmand-rv32.ll => vmand.ll} (82%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vmandn-rv32.ll => vmandn.ll} (82%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vmclr-rv32.ll => vmclr.ll} (72%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vmnand-rv32.ll => vmnand.ll} (82%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vmnor-rv32.ll => vmnor.ll} (82%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vmor-rv32.ll => vmor.ll} (82%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vmorn-rv32.ll => vmorn.ll} (82%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vmset-rv32.ll => vmset.ll} (72%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vmsif-rv32.ll => vmsif.ll} (89%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vmsof-rv32.ll => vmsof.ll} (89%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vmxnor-rv32.ll => vmxnor.ll} (82%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vmxor-rv32.ll => vmxor.ll} (82%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vsm-rv32.ll => vsm.ll} (79%)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll
deleted file mode 100644
index fe5bba14eb6d2..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv32.ll
+++ /dev/null
@@ -1,282 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare i32 @llvm.riscv.vcpop.i32.nxv1i1(
-  <vscale x 1 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_m_i32_nxv1i1(<vscale x 1 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.i32.nxv1i1(
-    <vscale x 1 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vcpop.mask.i32.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_mask_m_i32_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vcpop.i32.nxv2i1(
-  <vscale x 2 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_m_i32_nxv2i1(<vscale x 2 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.i32.nxv2i1(
-    <vscale x 2 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vcpop.mask.i32.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_mask_m_i32_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vcpop.i32.nxv4i1(
-  <vscale x 4 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_m_i32_nxv4i1(<vscale x 4 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.i32.nxv4i1(
-    <vscale x 4 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vcpop.mask.i32.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_mask_m_i32_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vcpop.i32.nxv8i1(
-  <vscale x 8 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_m_i32_nxv8i1(<vscale x 8 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.i32.nxv8i1(
-    <vscale x 8 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vcpop.mask.i32.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_mask_m_i32_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vcpop.i32.nxv16i1(
-  <vscale x 16 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_m_i32_nxv16i1(<vscale x 16 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.i32.nxv16i1(
-    <vscale x 16 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vcpop.mask.i32.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_mask_m_i32_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vcpop.i32.nxv32i1(
-  <vscale x 32 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_m_i32_nxv32i1(<vscale x 32 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.i32.nxv32i1(
-    <vscale x 32 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vcpop.mask.i32.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_mask_m_i32_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vcpop.i32.nxv64i1(
-  <vscale x 64 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_m_i32_nxv64i1(<vscale x 64 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i32_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.i32.nxv64i1(
-    <vscale x 64 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vcpop.mask.i32.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i32);
-
-define i32 @intrinsic_vcpop_mask_m_i32_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i32_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vcpop.mask.i32.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll
deleted file mode 100644
index 8d583b8df634a..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vcpop-rv64.ll
+++ /dev/null
@@ -1,282 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare i64 @llvm.riscv.vcpop.i64.nxv1i1(
-  <vscale x 1 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_m_i64_nxv1i1(<vscale x 1 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.i64.nxv1i1(
-    <vscale x 1 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vcpop.mask.i64.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_mask_m_i64_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vcpop.i64.nxv2i1(
-  <vscale x 2 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_m_i64_nxv2i1(<vscale x 2 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.i64.nxv2i1(
-    <vscale x 2 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vcpop.mask.i64.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_mask_m_i64_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vcpop.i64.nxv4i1(
-  <vscale x 4 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_m_i64_nxv4i1(<vscale x 4 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.i64.nxv4i1(
-    <vscale x 4 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vcpop.mask.i64.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_mask_m_i64_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vcpop.i64.nxv8i1(
-  <vscale x 8 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_m_i64_nxv8i1(<vscale x 8 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.i64.nxv8i1(
-    <vscale x 8 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vcpop.mask.i64.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_mask_m_i64_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vcpop.i64.nxv16i1(
-  <vscale x 16 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_m_i64_nxv16i1(<vscale x 16 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.i64.nxv16i1(
-    <vscale x 16 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vcpop.mask.i64.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_mask_m_i64_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vcpop.i64.nxv32i1(
-  <vscale x 32 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_m_i64_nxv32i1(<vscale x 32 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.i64.nxv32i1(
-    <vscale x 32 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vcpop.mask.i64.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_mask_m_i64_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vcpop.i64.nxv64i1(
-  <vscale x 64 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_m_i64_nxv64i1(<vscale x 64 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_m_i64_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vcpop.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.i64.nxv64i1(
-    <vscale x 64 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vcpop.mask.i64.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i64);
-
-define i64 @intrinsic_vcpop_mask_m_i64_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vcpop_mask_m_i64_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vcpop.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vcpop.mask.i64.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop.ll
new file mode 100644
index 0000000000000..1b77ec8dd82e0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vcpop.ll
@@ -0,0 +1,284 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+declare iXLen @llvm.riscv.vcpop.iXLen.nxv1i1(
+  <vscale x 1 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_m_nxv1i1(<vscale x 1 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_m_nxv1i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv1i1(
+    <vscale x 1 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv1i1(
+  <vscale x 1 x i1>,
+  <vscale x 1 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_mask_m_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv1i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vcpop.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv1i1(
+    <vscale x 1 x i1> %0,
+    <vscale x 1 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vcpop.iXLen.nxv2i1(
+  <vscale x 2 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_m_nxv2i1(<vscale x 2 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_m_nxv2i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv2i1(
+    <vscale x 2 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv2i1(
+  <vscale x 2 x i1>,
+  <vscale x 2 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_mask_m_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv2i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vcpop.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv2i1(
+    <vscale x 2 x i1> %0,
+    <vscale x 2 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vcpop.iXLen.nxv4i1(
+  <vscale x 4 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_m_nxv4i1(<vscale x 4 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_m_nxv4i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv4i1(
+    <vscale x 4 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv4i1(
+  <vscale x 4 x i1>,
+  <vscale x 4 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_mask_m_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv4i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vcpop.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv4i1(
+    <vscale x 4 x i1> %0,
+    <vscale x 4 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vcpop.iXLen.nxv8i1(
+  <vscale x 8 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_m_nxv8i1(<vscale x 8 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_m_nxv8i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv8i1(
+    <vscale x 8 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv8i1(
+  <vscale x 8 x i1>,
+  <vscale x 8 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_mask_m_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv8i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vcpop.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv8i1(
+    <vscale x 8 x i1> %0,
+    <vscale x 8 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vcpop.iXLen.nxv16i1(
+  <vscale x 16 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_m_nxv16i1(<vscale x 16 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_m_nxv16i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv16i1(
+    <vscale x 16 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv16i1(
+  <vscale x 16 x i1>,
+  <vscale x 16 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_mask_m_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv16i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vcpop.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv16i1(
+    <vscale x 16 x i1> %0,
+    <vscale x 16 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vcpop.iXLen.nxv32i1(
+  <vscale x 32 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_m_nxv32i1(<vscale x 32 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_m_nxv32i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv32i1(
+    <vscale x 32 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv32i1(
+  <vscale x 32 x i1>,
+  <vscale x 32 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_mask_m_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv32i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vcpop.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv32i1(
+    <vscale x 32 x i1> %0,
+    <vscale x 32 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vcpop.iXLen.nxv64i1(
+  <vscale x 64 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_m_nxv64i1(<vscale x 64 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_m_nxv64i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
+; CHECK-NEXT:    vcpop.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.iXLen.nxv64i1(
+    <vscale x 64 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vcpop.mask.iXLen.nxv64i1(
+  <vscale x 64 x i1>,
+  <vscale x 64 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vcpop_mask_m_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vcpop_mask_m_nxv64i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vcpop.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vcpop.mask.iXLen.nxv64i1(
+    <vscale x 64 x i1> %0,
+    <vscale x 64 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll
deleted file mode 100644
index 6d6d3a8c74616..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv32.ll
+++ /dev/null
@@ -1,282 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare i32 @llvm.riscv.vfirst.i32.nxv1i1(
-  <vscale x 1 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_m_i32_nxv1i1(<vscale x 1 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.i32.nxv1i1(
-    <vscale x 1 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vfirst.mask.i32.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_mask_m_i32_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vfirst.i32.nxv2i1(
-  <vscale x 2 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_m_i32_nxv2i1(<vscale x 2 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.i32.nxv2i1(
-    <vscale x 2 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vfirst.mask.i32.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_mask_m_i32_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vfirst.i32.nxv4i1(
-  <vscale x 4 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_m_i32_nxv4i1(<vscale x 4 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.i32.nxv4i1(
-    <vscale x 4 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vfirst.mask.i32.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_mask_m_i32_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vfirst.i32.nxv8i1(
-  <vscale x 8 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_m_i32_nxv8i1(<vscale x 8 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.i32.nxv8i1(
-    <vscale x 8 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vfirst.mask.i32.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_mask_m_i32_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vfirst.i32.nxv16i1(
-  <vscale x 16 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_m_i32_nxv16i1(<vscale x 16 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.i32.nxv16i1(
-    <vscale x 16 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vfirst.mask.i32.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_mask_m_i32_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vfirst.i32.nxv32i1(
-  <vscale x 32 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_m_i32_nxv32i1(<vscale x 32 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.i32.nxv32i1(
-    <vscale x 32 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vfirst.mask.i32.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_mask_m_i32_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vfirst.i32.nxv64i1(
-  <vscale x 64 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_m_i32_nxv64i1(<vscale x 64 x i1> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i32_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.i32.nxv64i1(
-    <vscale x 64 x i1> %0,
-    i32 %1)
-
-  ret i32 %a
-}
-
-declare i32 @llvm.riscv.vfirst.mask.i32.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i32);
-
-define i32 @intrinsic_vfirst_mask_m_i32_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i32_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i32 @llvm.riscv.vfirst.mask.i32.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    i32 %2)
-
-  ret i32 %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll
deleted file mode 100644
index 0e20516a69c79..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfirst-rv64.ll
+++ /dev/null
@@ -1,282 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare i64 @llvm.riscv.vfirst.i64.nxv1i1(
-  <vscale x 1 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_m_i64_nxv1i1(<vscale x 1 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.i64.nxv1i1(
-    <vscale x 1 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vfirst.mask.i64.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_mask_m_i64_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vfirst.i64.nxv2i1(
-  <vscale x 2 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_m_i64_nxv2i1(<vscale x 2 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.i64.nxv2i1(
-    <vscale x 2 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vfirst.mask.i64.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_mask_m_i64_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vfirst.i64.nxv4i1(
-  <vscale x 4 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_m_i64_nxv4i1(<vscale x 4 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.i64.nxv4i1(
-    <vscale x 4 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vfirst.mask.i64.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_mask_m_i64_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vfirst.i64.nxv8i1(
-  <vscale x 8 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_m_i64_nxv8i1(<vscale x 8 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.i64.nxv8i1(
-    <vscale x 8 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vfirst.mask.i64.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_mask_m_i64_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vfirst.i64.nxv16i1(
-  <vscale x 16 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_m_i64_nxv16i1(<vscale x 16 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.i64.nxv16i1(
-    <vscale x 16 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vfirst.mask.i64.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_mask_m_i64_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vfirst.i64.nxv32i1(
-  <vscale x 32 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_m_i64_nxv32i1(<vscale x 32 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.i64.nxv32i1(
-    <vscale x 32 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vfirst.mask.i64.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_mask_m_i64_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vfirst.i64.nxv64i1(
-  <vscale x 64 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_m_i64_nxv64i1(<vscale x 64 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_m_i64_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vfirst.m a0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.i64.nxv64i1(
-    <vscale x 64 x i1> %0,
-    i64 %1)
-
-  ret i64 %a
-}
-
-declare i64 @llvm.riscv.vfirst.mask.i64.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i64);
-
-define i64 @intrinsic_vfirst_mask_m_i64_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfirst_mask_m_i64_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v9, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    vfirst.m a0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call i64 @llvm.riscv.vfirst.mask.i64.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    i64 %2)
-
-  ret i64 %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfirst.ll b/llvm/test/CodeGen/RISCV/rvv/vfirst.ll
new file mode 100644
index 0000000000000..5d71c17db6c29
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfirst.ll
@@ -0,0 +1,284 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+declare iXLen @llvm.riscv.vfirst.iXLen.nxv1i1(
+  <vscale x 1 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_m_nxv1i1(<vscale x 1 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_m_nxv1i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv1i1(
+    <vscale x 1 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv1i1(
+  <vscale x 1 x i1>,
+  <vscale x 1 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_mask_m_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv1i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfirst.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv1i1(
+    <vscale x 1 x i1> %0,
+    <vscale x 1 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vfirst.iXLen.nxv2i1(
+  <vscale x 2 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_m_nxv2i1(<vscale x 2 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_m_nxv2i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv2i1(
+    <vscale x 2 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv2i1(
+  <vscale x 2 x i1>,
+  <vscale x 2 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_mask_m_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv2i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfirst.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv2i1(
+    <vscale x 2 x i1> %0,
+    <vscale x 2 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vfirst.iXLen.nxv4i1(
+  <vscale x 4 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_m_nxv4i1(<vscale x 4 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_m_nxv4i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv4i1(
+    <vscale x 4 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv4i1(
+  <vscale x 4 x i1>,
+  <vscale x 4 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_mask_m_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv4i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfirst.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv4i1(
+    <vscale x 4 x i1> %0,
+    <vscale x 4 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vfirst.iXLen.nxv8i1(
+  <vscale x 8 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_m_nxv8i1(<vscale x 8 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_m_nxv8i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv8i1(
+    <vscale x 8 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv8i1(
+  <vscale x 8 x i1>,
+  <vscale x 8 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_mask_m_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv8i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfirst.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv8i1(
+    <vscale x 8 x i1> %0,
+    <vscale x 8 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vfirst.iXLen.nxv16i1(
+  <vscale x 16 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_m_nxv16i1(<vscale x 16 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_m_nxv16i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv16i1(
+    <vscale x 16 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv16i1(
+  <vscale x 16 x i1>,
+  <vscale x 16 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_mask_m_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv16i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfirst.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv16i1(
+    <vscale x 16 x i1> %0,
+    <vscale x 16 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vfirst.iXLen.nxv32i1(
+  <vscale x 32 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_m_nxv32i1(<vscale x 32 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_m_nxv32i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv32i1(
+    <vscale x 32 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv32i1(
+  <vscale x 32 x i1>,
+  <vscale x 32 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_mask_m_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv32i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfirst.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv32i1(
+    <vscale x 32 x i1> %0,
+    <vscale x 32 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vfirst.iXLen.nxv64i1(
+  <vscale x 64 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_m_nxv64i1(<vscale x 64 x i1> %0, iXLen %1) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_m_nxv64i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
+; CHECK-NEXT:    vfirst.m a0, v0
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.iXLen.nxv64i1(
+    <vscale x 64 x i1> %0,
+    iXLen %1)
+
+  ret iXLen %a
+}
+
+declare iXLen @llvm.riscv.vfirst.mask.iXLen.nxv64i1(
+  <vscale x 64 x i1>,
+  <vscale x 64 x i1>,
+  iXLen);
+
+define iXLen @intrinsic_vfirst_mask_m_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, iXLen %2) nounwind {
+; CHECK-LABEL: intrinsic_vfirst_mask_m_nxv64i1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v0
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
+; CHECK-NEXT:    vmv1r.v v0, v8
+; CHECK-NEXT:    vfirst.m a0, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call iXLen @llvm.riscv.vfirst.mask.iXLen.nxv64i1(
+    <vscale x 64 x i1> %0,
+    <vscale x 64 x i1> %1,
+    iXLen %2)
+
+  ret iXLen %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll
deleted file mode 100644
index 1413c0ec4b443..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vid-rv64.ll
+++ /dev/null
@@ -1,758 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i8> @llvm.riscv.vid.nxv1i8(
-  i64);
-
-define <vscale x 1 x i8> @intrinsic_vid_v_nxv1i8(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv1i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vid.nxv1i8(
-    i64 %0)
-
-  ret <vscale x 1 x i8> %a
-}
-
-declare <vscale x 1 x i8> @llvm.riscv.vid.mask.nxv1i8(
-  <vscale x 1 x i8>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i8> @intrinsic_vid_mask_v_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vid.mask.nxv1i8(
-    <vscale x 1 x i8> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i8> %a
-}
-
-declare <vscale x 2 x i8> @llvm.riscv.vid.nxv2i8(
-  i64);
-
-define <vscale x 2 x i8> @intrinsic_vid_v_nxv2i8(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv2i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vid.nxv2i8(
-    i64 %0)
-
-  ret <vscale x 2 x i8> %a
-}
-
-declare <vscale x 2 x i8> @llvm.riscv.vid.mask.nxv2i8(
-  <vscale x 2 x i8>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i8> @intrinsic_vid_mask_v_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vid.mask.nxv2i8(
-    <vscale x 2 x i8> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i8> %a
-}
-
-declare <vscale x 4 x i8> @llvm.riscv.vid.nxv4i8(
-  i64);
-
-define <vscale x 4 x i8> @intrinsic_vid_v_nxv4i8(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv4i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vid.nxv4i8(
-    i64 %0)
-
-  ret <vscale x 4 x i8> %a
-}
-
-declare <vscale x 4 x i8> @llvm.riscv.vid.mask.nxv4i8(
-  <vscale x 4 x i8>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i8> @intrinsic_vid_mask_v_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vid.mask.nxv4i8(
-    <vscale x 4 x i8> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i8> %a
-}
-
-declare <vscale x 8 x i8> @llvm.riscv.vid.nxv8i8(
-  i64);
-
-define <vscale x 8 x i8> @intrinsic_vid_v_nxv8i8(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv8i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vid.nxv8i8(
-    i64 %0)
-
-  ret <vscale x 8 x i8> %a
-}
-
-declare <vscale x 8 x i8> @llvm.riscv.vid.mask.nxv8i8(
-  <vscale x 8 x i8>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i8> @intrinsic_vid_mask_v_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vid.mask.nxv8i8(
-    <vscale x 8 x i8> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i8> %a
-}
-
-declare <vscale x 16 x i8> @llvm.riscv.vid.nxv16i8(
-  i64);
-
-define <vscale x 16 x i8> @intrinsic_vid_v_nxv16i8(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv16i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vid.nxv16i8(
-    i64 %0)
-
-  ret <vscale x 16 x i8> %a
-}
-
-declare <vscale x 16 x i8> @llvm.riscv.vid.mask.nxv16i8(
-  <vscale x 16 x i8>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i8> @intrinsic_vid_mask_v_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv16i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vid.mask.nxv16i8(
-    <vscale x 16 x i8> %0,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i8> %a
-}
-
-declare <vscale x 32 x i8> @llvm.riscv.vid.nxv32i8(
-  i64);
-
-define <vscale x 32 x i8> @intrinsic_vid_v_nxv32i8(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv32i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vid.nxv32i8(
-    i64 %0)
-
-  ret <vscale x 32 x i8> %a
-}
-
-declare <vscale x 32 x i8> @llvm.riscv.vid.mask.nxv32i8(
-  <vscale x 32 x i8>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i8> @intrinsic_vid_mask_v_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv32i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vid.mask.nxv32i8(
-    <vscale x 32 x i8> %0,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 32 x i8> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vid.nxv1i16(
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vid_v_nxv1i16(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv1i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vid.nxv1i16(
-    i64 %0)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vid.mask.nxv1i16(
-  <vscale x 1 x i16>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vid_mask_v_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vid.mask.nxv1i16(
-    <vscale x 1 x i16> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vid.nxv2i16(
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vid_v_nxv2i16(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv2i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vid.nxv2i16(
-    i64 %0)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vid.mask.nxv2i16(
-  <vscale x 2 x i16>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vid_mask_v_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vid.mask.nxv2i16(
-    <vscale x 2 x i16> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vid.nxv4i16(
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vid_v_nxv4i16(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv4i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vid.nxv4i16(
-    i64 %0)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vid.mask.nxv4i16(
-  <vscale x 4 x i16>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vid_mask_v_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vid.mask.nxv4i16(
-    <vscale x 4 x i16> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vid.nxv8i16(
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vid_v_nxv8i16(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv8i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vid.nxv8i16(
-    i64 %0)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vid.mask.nxv8i16(
-  <vscale x 8 x i16>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vid_mask_v_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vid.mask.nxv8i16(
-    <vscale x 8 x i16> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vid.nxv16i16(
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vid_v_nxv16i16(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv16i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vid.nxv16i16(
-    i64 %0)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vid.mask.nxv16i16(
-  <vscale x 16 x i16>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vid_mask_v_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv16i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vid.mask.nxv16i16(
-    <vscale x 16 x i16> %0,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.vid.nxv32i16(
-  i64);
-
-define <vscale x 32 x i16> @intrinsic_vid_v_nxv32i16(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv32i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vid.nxv32i16(
-    i64 %0)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.vid.mask.nxv32i16(
-  <vscale x 32 x i16>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i16> @intrinsic_vid_mask_v_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv32i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vid.mask.nxv32i16(
-    <vscale x 32 x i16> %0,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vid.nxv1i32(
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vid_v_nxv1i32(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vid.nxv1i32(
-    i64 %0)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vid.mask.nxv1i32(
-  <vscale x 1 x i32>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vid_mask_v_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vid.mask.nxv1i32(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vid_v_nxv2i32(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(
-    i64 %0)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vid.mask.nxv2i32(
-  <vscale x 2 x i32>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vid_mask_v_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vid.mask.nxv2i32(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vid.nxv4i32(
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vid_v_nxv4i32(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vid.nxv4i32(
-    i64 %0)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vid.mask.nxv4i32(
-  <vscale x 4 x i32>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vid_mask_v_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vid.mask.nxv4i32(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vid.nxv8i32(
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vid_v_nxv8i32(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vid.nxv8i32(
-    i64 %0)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vid.mask.nxv8i32(
-  <vscale x 8 x i32>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vid_mask_v_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vid.mask.nxv8i32(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vid.nxv16i32(
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vid_v_nxv16i32(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv16i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vid.nxv16i32(
-    i64 %0)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vid.mask.nxv16i32(
-  <vscale x 16 x i32>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vid_mask_v_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv16i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vid.mask.nxv16i32(
-    <vscale x 16 x i32> %0,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vid.nxv1i64(
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vid_v_nxv1i64(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv1i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vid.nxv1i64(
-    i64 %0)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vid.mask.nxv1i64(
-  <vscale x 1 x i64>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vid_mask_v_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vid.mask.nxv1i64(
-    <vscale x 1 x i64> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vid.nxv2i64(
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vid_v_nxv2i64(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv2i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vid.nxv2i64(
-    i64 %0)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vid.mask.nxv2i64(
-  <vscale x 2 x i64>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vid_mask_v_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vid.mask.nxv2i64(
-    <vscale x 2 x i64> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vid.nxv4i64(
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vid_v_nxv4i64(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv4i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vid.nxv4i64(
-    i64 %0)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vid.mask.nxv4i64(
-  <vscale x 4 x i64>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vid_mask_v_nxv4i64(<vscale x 4 x i64> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vid.mask.nxv4i64(
-    <vscale x 4 x i64> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vid.nxv8i64(
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vid_v_nxv8i64(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vid_v_nxv8i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vid.nxv8i64(
-    i64 %0)
-
-  ret <vscale x 8 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vid.mask.nxv8i64(
-  <vscale x 8 x i64>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vid_mask_v_nxv8i64(<vscale x 8 x i64> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
-; CHECK-NEXT:    vid.v v8, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vid.mask.nxv8i64(
-    <vscale x 8 x i64> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i64> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vid.ll
similarity index 81%
rename from llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vid.ll
index 58ed5cfb7620d..1888074767cf8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vid-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vid.ll
@@ -1,10 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vid.nxv1i8(
-  i32);
+  iXLen);
 
-define <vscale x 1 x i8> @intrinsic_vid_v_nxv1i8(i32 %0) nounwind {
+define <vscale x 1 x i8> @intrinsic_vid_v_nxv1i8(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -12,7 +14,7 @@ define <vscale x 1 x i8> @intrinsic_vid_v_nxv1i8(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vid.nxv1i8(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 1 x i8> %a
 }
@@ -20,9 +22,9 @@ entry:
 declare <vscale x 1 x i8> @llvm.riscv.vid.mask.nxv1i8(
   <vscale x 1 x i8>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i8> @intrinsic_vid_mask_v_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i8> @intrinsic_vid_mask_v_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
@@ -32,15 +34,15 @@ entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vid.mask.nxv1i8(
     <vscale x 1 x i8> %0,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i8> %a
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vid.nxv2i8(
-  i32);
+  iXLen);
 
-define <vscale x 2 x i8> @intrinsic_vid_v_nxv2i8(i32 %0) nounwind {
+define <vscale x 2 x i8> @intrinsic_vid_v_nxv2i8(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -48,7 +50,7 @@ define <vscale x 2 x i8> @intrinsic_vid_v_nxv2i8(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vid.nxv2i8(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 2 x i8> %a
 }
@@ -56,9 +58,9 @@ entry:
 declare <vscale x 2 x i8> @llvm.riscv.vid.mask.nxv2i8(
   <vscale x 2 x i8>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i8> @intrinsic_vid_mask_v_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i8> @intrinsic_vid_mask_v_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, mu
@@ -68,15 +70,15 @@ entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vid.mask.nxv2i8(
     <vscale x 2 x i8> %0,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i8> %a
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vid.nxv4i8(
-  i32);
+  iXLen);
 
-define <vscale x 4 x i8> @intrinsic_vid_v_nxv4i8(i32 %0) nounwind {
+define <vscale x 4 x i8> @intrinsic_vid_v_nxv4i8(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -84,7 +86,7 @@ define <vscale x 4 x i8> @intrinsic_vid_v_nxv4i8(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vid.nxv4i8(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 4 x i8> %a
 }
@@ -92,9 +94,9 @@ entry:
 declare <vscale x 4 x i8> @llvm.riscv.vid.mask.nxv4i8(
   <vscale x 4 x i8>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i8> @intrinsic_vid_mask_v_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i8> @intrinsic_vid_mask_v_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
@@ -104,15 +106,15 @@ entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vid.mask.nxv4i8(
     <vscale x 4 x i8> %0,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i8> %a
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vid.nxv8i8(
-  i32);
+  iXLen);
 
-define <vscale x 8 x i8> @intrinsic_vid_v_nxv8i8(i32 %0) nounwind {
+define <vscale x 8 x i8> @intrinsic_vid_v_nxv8i8(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -120,7 +122,7 @@ define <vscale x 8 x i8> @intrinsic_vid_v_nxv8i8(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vid.nxv8i8(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 8 x i8> %a
 }
@@ -128,9 +130,9 @@ entry:
 declare <vscale x 8 x i8> @llvm.riscv.vid.mask.nxv8i8(
   <vscale x 8 x i8>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i8> @intrinsic_vid_mask_v_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i8> @intrinsic_vid_mask_v_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, mu
@@ -140,15 +142,15 @@ entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vid.mask.nxv8i8(
     <vscale x 8 x i8> %0,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i8> %a
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vid.nxv16i8(
-  i32);
+  iXLen);
 
-define <vscale x 16 x i8> @intrinsic_vid_v_nxv16i8(i32 %0) nounwind {
+define <vscale x 16 x i8> @intrinsic_vid_v_nxv16i8(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -156,7 +158,7 @@ define <vscale x 16 x i8> @intrinsic_vid_v_nxv16i8(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vid.nxv16i8(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 16 x i8> %a
 }
@@ -164,9 +166,9 @@ entry:
 declare <vscale x 16 x i8> @llvm.riscv.vid.mask.nxv16i8(
   <vscale x 16 x i8>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i8> @intrinsic_vid_mask_v_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i8> @intrinsic_vid_mask_v_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, tu, mu
@@ -176,15 +178,15 @@ entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vid.mask.nxv16i8(
     <vscale x 16 x i8> %0,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i8> %a
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vid.nxv32i8(
-  i32);
+  iXLen);
 
-define <vscale x 32 x i8> @intrinsic_vid_v_nxv32i8(i32 %0) nounwind {
+define <vscale x 32 x i8> @intrinsic_vid_v_nxv32i8(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -192,7 +194,7 @@ define <vscale x 32 x i8> @intrinsic_vid_v_nxv32i8(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vid.nxv32i8(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 32 x i8> %a
 }
@@ -200,9 +202,9 @@ entry:
 declare <vscale x 32 x i8> @llvm.riscv.vid.mask.nxv32i8(
   <vscale x 32 x i8>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i8> @intrinsic_vid_mask_v_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
+define <vscale x 32 x i8> @intrinsic_vid_mask_v_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, mu
@@ -212,15 +214,15 @@ entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vid.mask.nxv32i8(
     <vscale x 32 x i8> %0,
     <vscale x 32 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x i8> %a
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vid.nxv1i16(
-  i32);
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vid_v_nxv1i16(i32 %0) nounwind {
+define <vscale x 1 x i16> @intrinsic_vid_v_nxv1i16(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -228,7 +230,7 @@ define <vscale x 1 x i16> @intrinsic_vid_v_nxv1i16(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vid.nxv1i16(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 1 x i16> %a
 }
@@ -236,9 +238,9 @@ entry:
 declare <vscale x 1 x i16> @llvm.riscv.vid.mask.nxv1i16(
   <vscale x 1 x i16>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vid_mask_v_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i16> @intrinsic_vid_mask_v_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -248,15 +250,15 @@ entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vid.mask.nxv1i16(
     <vscale x 1 x i16> %0,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i16> %a
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vid.nxv2i16(
-  i32);
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vid_v_nxv2i16(i32 %0) nounwind {
+define <vscale x 2 x i16> @intrinsic_vid_v_nxv2i16(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -264,7 +266,7 @@ define <vscale x 2 x i16> @intrinsic_vid_v_nxv2i16(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vid.nxv2i16(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 2 x i16> %a
 }
@@ -272,9 +274,9 @@ entry:
 declare <vscale x 2 x i16> @llvm.riscv.vid.mask.nxv2i16(
   <vscale x 2 x i16>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vid_mask_v_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i16> @intrinsic_vid_mask_v_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -284,15 +286,15 @@ entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vid.mask.nxv2i16(
     <vscale x 2 x i16> %0,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i16> %a
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vid.nxv4i16(
-  i32);
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vid_v_nxv4i16(i32 %0) nounwind {
+define <vscale x 4 x i16> @intrinsic_vid_v_nxv4i16(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -300,7 +302,7 @@ define <vscale x 4 x i16> @intrinsic_vid_v_nxv4i16(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vid.nxv4i16(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 4 x i16> %a
 }
@@ -308,9 +310,9 @@ entry:
 declare <vscale x 4 x i16> @llvm.riscv.vid.mask.nxv4i16(
   <vscale x 4 x i16>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vid_mask_v_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i16> @intrinsic_vid_mask_v_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -320,15 +322,15 @@ entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vid.mask.nxv4i16(
     <vscale x 4 x i16> %0,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i16> %a
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vid.nxv8i16(
-  i32);
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vid_v_nxv8i16(i32 %0) nounwind {
+define <vscale x 8 x i16> @intrinsic_vid_v_nxv8i16(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -336,7 +338,7 @@ define <vscale x 8 x i16> @intrinsic_vid_v_nxv8i16(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vid.nxv8i16(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 8 x i16> %a
 }
@@ -344,9 +346,9 @@ entry:
 declare <vscale x 8 x i16> @llvm.riscv.vid.mask.nxv8i16(
   <vscale x 8 x i16>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vid_mask_v_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i16> @intrinsic_vid_mask_v_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -356,15 +358,15 @@ entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vid.mask.nxv8i16(
     <vscale x 8 x i16> %0,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i16> %a
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vid.nxv16i16(
-  i32);
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vid_v_nxv16i16(i32 %0) nounwind {
+define <vscale x 16 x i16> @intrinsic_vid_v_nxv16i16(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -372,7 +374,7 @@ define <vscale x 16 x i16> @intrinsic_vid_v_nxv16i16(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vid.nxv16i16(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 16 x i16> %a
 }
@@ -380,9 +382,9 @@ entry:
 declare <vscale x 16 x i16> @llvm.riscv.vid.mask.nxv16i16(
   <vscale x 16 x i16>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vid_mask_v_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i16> @intrinsic_vid_mask_v_nxv16i16(<vscale x 16 x i16> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -392,15 +394,15 @@ entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vid.mask.nxv16i16(
     <vscale x 16 x i16> %0,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i16> %a
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vid.nxv32i16(
-  i32);
+  iXLen);
 
-define <vscale x 32 x i16> @intrinsic_vid_v_nxv32i16(i32 %0) nounwind {
+define <vscale x 32 x i16> @intrinsic_vid_v_nxv32i16(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -408,7 +410,7 @@ define <vscale x 32 x i16> @intrinsic_vid_v_nxv32i16(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vid.nxv32i16(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 32 x i16> %a
 }
@@ -416,9 +418,9 @@ entry:
 declare <vscale x 32 x i16> @llvm.riscv.vid.mask.nxv32i16(
   <vscale x 32 x i16>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i16> @intrinsic_vid_mask_v_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
+define <vscale x 32 x i16> @intrinsic_vid_mask_v_nxv32i16(<vscale x 32 x i16> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -428,15 +430,15 @@ entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vid.mask.nxv32i16(
     <vscale x 32 x i16> %0,
     <vscale x 32 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x i16> %a
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vid.nxv1i32(
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vid_v_nxv1i32(i32 %0) nounwind {
+define <vscale x 1 x i32> @intrinsic_vid_v_nxv1i32(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -444,7 +446,7 @@ define <vscale x 1 x i32> @intrinsic_vid_v_nxv1i32(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vid.nxv1i32(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 1 x i32> %a
 }
@@ -452,9 +454,9 @@ entry:
 declare <vscale x 1 x i32> @llvm.riscv.vid.mask.nxv1i32(
   <vscale x 1 x i32>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vid_mask_v_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i32> @intrinsic_vid_mask_v_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -464,15 +466,15 @@ entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vid.mask.nxv1i32(
     <vscale x 1 x i32> %0,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vid_v_nxv2i32(i32 %0) nounwind {
+define <vscale x 2 x i32> @intrinsic_vid_v_nxv2i32(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -480,7 +482,7 @@ define <vscale x 2 x i32> @intrinsic_vid_v_nxv2i32(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vid.nxv2i32(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 2 x i32> %a
 }
@@ -488,9 +490,9 @@ entry:
 declare <vscale x 2 x i32> @llvm.riscv.vid.mask.nxv2i32(
   <vscale x 2 x i32>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vid_mask_v_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i32> @intrinsic_vid_mask_v_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -500,15 +502,15 @@ entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vid.mask.nxv2i32(
     <vscale x 2 x i32> %0,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vid.nxv4i32(
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vid_v_nxv4i32(i32 %0) nounwind {
+define <vscale x 4 x i32> @intrinsic_vid_v_nxv4i32(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -516,7 +518,7 @@ define <vscale x 4 x i32> @intrinsic_vid_v_nxv4i32(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vid.nxv4i32(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 4 x i32> %a
 }
@@ -524,9 +526,9 @@ entry:
 declare <vscale x 4 x i32> @llvm.riscv.vid.mask.nxv4i32(
   <vscale x 4 x i32>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vid_mask_v_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i32> @intrinsic_vid_mask_v_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -536,15 +538,15 @@ entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vid.mask.nxv4i32(
     <vscale x 4 x i32> %0,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vid.nxv8i32(
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vid_v_nxv8i32(i32 %0) nounwind {
+define <vscale x 8 x i32> @intrinsic_vid_v_nxv8i32(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -552,7 +554,7 @@ define <vscale x 8 x i32> @intrinsic_vid_v_nxv8i32(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vid.nxv8i32(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 8 x i32> %a
 }
@@ -560,9 +562,9 @@ entry:
 declare <vscale x 8 x i32> @llvm.riscv.vid.mask.nxv8i32(
   <vscale x 8 x i32>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vid_mask_v_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i32> @intrinsic_vid_mask_v_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -572,15 +574,15 @@ entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vid.mask.nxv8i32(
     <vscale x 8 x i32> %0,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i32> %a
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vid.nxv16i32(
-  i32);
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vid_v_nxv16i32(i32 %0) nounwind {
+define <vscale x 16 x i32> @intrinsic_vid_v_nxv16i32(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -588,7 +590,7 @@ define <vscale x 16 x i32> @intrinsic_vid_v_nxv16i32(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vid.nxv16i32(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 16 x i32> %a
 }
@@ -596,9 +598,9 @@ entry:
 declare <vscale x 16 x i32> @llvm.riscv.vid.mask.nxv16i32(
   <vscale x 16 x i32>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vid_mask_v_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i32> @intrinsic_vid_mask_v_nxv16i32(<vscale x 16 x i32> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -608,15 +610,15 @@ entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vid.mask.nxv16i32(
     <vscale x 16 x i32> %0,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i32> %a
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vid.nxv1i64(
-  i32);
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vid_v_nxv1i64(i32 %0) nounwind {
+define <vscale x 1 x i64> @intrinsic_vid_v_nxv1i64(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -624,7 +626,7 @@ define <vscale x 1 x i64> @intrinsic_vid_v_nxv1i64(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vid.nxv1i64(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 1 x i64> %a
 }
@@ -632,9 +634,9 @@ entry:
 declare <vscale x 1 x i64> @llvm.riscv.vid.mask.nxv1i64(
   <vscale x 1 x i64>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vid_mask_v_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i64> @intrinsic_vid_mask_v_nxv1i64(<vscale x 1 x i64> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -644,15 +646,15 @@ entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vid.mask.nxv1i64(
     <vscale x 1 x i64> %0,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i64> %a
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vid.nxv2i64(
-  i32);
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vid_v_nxv2i64(i32 %0) nounwind {
+define <vscale x 2 x i64> @intrinsic_vid_v_nxv2i64(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -660,7 +662,7 @@ define <vscale x 2 x i64> @intrinsic_vid_v_nxv2i64(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vid.nxv2i64(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 2 x i64> %a
 }
@@ -668,9 +670,9 @@ entry:
 declare <vscale x 2 x i64> @llvm.riscv.vid.mask.nxv2i64(
   <vscale x 2 x i64>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vid_mask_v_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i64> @intrinsic_vid_mask_v_nxv2i64(<vscale x 2 x i64> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -680,15 +682,15 @@ entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vid.mask.nxv2i64(
     <vscale x 2 x i64> %0,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i64> %a
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vid.nxv4i64(
-  i32);
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vid_v_nxv4i64(i32 %0) nounwind {
+define <vscale x 4 x i64> @intrinsic_vid_v_nxv4i64(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -696,7 +698,7 @@ define <vscale x 4 x i64> @intrinsic_vid_v_nxv4i64(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vid.nxv4i64(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 4 x i64> %a
 }
@@ -704,9 +706,9 @@ entry:
 declare <vscale x 4 x i64> @llvm.riscv.vid.mask.nxv4i64(
   <vscale x 4 x i64>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vid_mask_v_nxv4i64(<vscale x 4 x i64> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i64> @intrinsic_vid_mask_v_nxv4i64(<vscale x 4 x i64> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -716,15 +718,15 @@ entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vid.mask.nxv4i64(
     <vscale x 4 x i64> %0,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i64> %a
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vid.nxv8i64(
-  i32);
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vid_v_nxv8i64(i32 %0) nounwind {
+define <vscale x 8 x i64> @intrinsic_vid_v_nxv8i64(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vid_v_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -732,7 +734,7 @@ define <vscale x 8 x i64> @intrinsic_vid_v_nxv8i64(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vid.nxv8i64(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 8 x i64> %a
 }
@@ -740,9 +742,9 @@ entry:
 declare <vscale x 8 x i64> @llvm.riscv.vid.mask.nxv8i64(
   <vscale x 8 x i64>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vid_mask_v_nxv8i64(<vscale x 8 x i64> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i64> @intrinsic_vid_mask_v_nxv8i64(<vscale x 8 x i64> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vid_mask_v_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
@@ -752,7 +754,7 @@ entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vid.mask.nxv8i64(
     <vscale x 8 x i64> %0,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i64> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll
deleted file mode 100644
index 0e37ff483fb4f..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/viota-rv64.ll
+++ /dev/null
@@ -1,882 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i8> @llvm.riscv.viota.nxv1i8(
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i8> @intrinsic_viota_m_nxv1i8_nxv1i1(<vscale x 1 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv1i8_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.viota.nxv1i8(
-    <vscale x 1 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i8> %a
-}
-
-declare <vscale x 1 x i8> @llvm.riscv.viota.mask.nxv1i8(
-  <vscale x 1 x i8>,
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i8> @intrinsic_viota_mask_m_nxv1i8_nxv1i1(<vscale x 1 x i8> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i8_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.viota.mask.nxv1i8(
-    <vscale x 1 x i8> %0,
-    <vscale x 1 x i1> %1,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i8> %a
-}
-
-declare <vscale x 2 x i8> @llvm.riscv.viota.nxv2i8(
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i8> @intrinsic_viota_m_nxv2i8_nxv2i1(<vscale x 2 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv2i8_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.viota.nxv2i8(
-    <vscale x 2 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i8> %a
-}
-
-declare <vscale x 2 x i8> @llvm.riscv.viota.mask.nxv2i8(
-  <vscale x 2 x i8>,
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i8> @intrinsic_viota_mask_m_nxv2i8_nxv2i1(<vscale x 2 x i8> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i8_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.viota.mask.nxv2i8(
-    <vscale x 2 x i8> %0,
-    <vscale x 2 x i1> %1,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i8> %a
-}
-
-declare <vscale x 4 x i8> @llvm.riscv.viota.nxv4i8(
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i8> @intrinsic_viota_m_nxv4i8_nxv4i1(<vscale x 4 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv4i8_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.viota.nxv4i8(
-    <vscale x 4 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i8> %a
-}
-
-declare <vscale x 4 x i8> @llvm.riscv.viota.mask.nxv4i8(
-  <vscale x 4 x i8>,
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i8> @intrinsic_viota_mask_m_nxv4i8_nxv4i1(<vscale x 4 x i8> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i8_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.viota.mask.nxv4i8(
-    <vscale x 4 x i8> %0,
-    <vscale x 4 x i1> %1,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i8> %a
-}
-
-declare <vscale x 8 x i8> @llvm.riscv.viota.nxv8i8(
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i8> @intrinsic_viota_m_nxv8i8_nxv8i1(<vscale x 8 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv8i8_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.viota.nxv8i8(
-    <vscale x 8 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i8> %a
-}
-
-declare <vscale x 8 x i8> @llvm.riscv.viota.mask.nxv8i8(
-  <vscale x 8 x i8>,
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i8> @intrinsic_viota_mask_m_nxv8i8_nxv8i1(<vscale x 8 x i8> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i8_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.viota.mask.nxv8i8(
-    <vscale x 8 x i8> %0,
-    <vscale x 8 x i1> %1,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i8> %a
-}
-
-declare <vscale x 16 x i8> @llvm.riscv.viota.nxv16i8(
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i8> @intrinsic_viota_m_nxv16i8_nxv16i1(<vscale x 16 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv16i8_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.viota.nxv16i8(
-    <vscale x 16 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i8> %a
-}
-
-declare <vscale x 16 x i8> @llvm.riscv.viota.mask.nxv16i8(
-  <vscale x 16 x i8>,
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i8> @intrinsic_viota_mask_m_nxv16i8_nxv16i1(<vscale x 16 x i8> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv16i8_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.viota.mask.nxv16i8(
-    <vscale x 16 x i8> %0,
-    <vscale x 16 x i1> %1,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i8> %a
-}
-
-declare <vscale x 32 x i8> @llvm.riscv.viota.nxv32i8(
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i8> @intrinsic_viota_m_nxv32i8_nxv32i1(<vscale x 32 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv32i8_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.viota.nxv32i8(
-    <vscale x 32 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 32 x i8> %a
-}
-
-declare <vscale x 32 x i8> @llvm.riscv.viota.mask.nxv32i8(
-  <vscale x 32 x i8>,
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i8> @intrinsic_viota_mask_m_nxv32i8_nxv32i1(<vscale x 32 x i8> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv32i8_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.viota.mask.nxv32i8(
-    <vscale x 32 x i8> %0,
-    <vscale x 32 x i1> %1,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 32 x i8> %a
-}
-
-declare <vscale x 64 x i8> @llvm.riscv.viota.nxv64i8(
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i8> @intrinsic_viota_m_nxv64i8_nxv64i1(<vscale x 64 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv64i8_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i8> @llvm.riscv.viota.nxv64i8(
-    <vscale x 64 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 64 x i8> %a
-}
-
-declare <vscale x 64 x i8> @llvm.riscv.viota.mask.nxv64i8(
-  <vscale x 64 x i8>,
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i8> @intrinsic_viota_mask_m_nxv64i8_nxv64i1(<vscale x 64 x i8> %0, <vscale x 64 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv64i8_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i8> @llvm.riscv.viota.mask.nxv64i8(
-    <vscale x 64 x i8> %0,
-    <vscale x 64 x i1> %1,
-    <vscale x 64 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 64 x i8> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.viota.nxv1i16(
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_viota_m_nxv1i16_nxv1i1(<vscale x 1 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv1i16_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.viota.nxv1i16(
-    <vscale x 1 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.viota.mask.nxv1i16(
-  <vscale x 1 x i16>,
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_viota_mask_m_nxv1i16_nxv1i1(<vscale x 1 x i16> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i16_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.viota.mask.nxv1i16(
-    <vscale x 1 x i16> %0,
-    <vscale x 1 x i1> %1,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.viota.nxv2i16(
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_viota_m_nxv2i16_nxv2i1(<vscale x 2 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv2i16_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.viota.nxv2i16(
-    <vscale x 2 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.viota.mask.nxv2i16(
-  <vscale x 2 x i16>,
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_viota_mask_m_nxv2i16_nxv2i1(<vscale x 2 x i16> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i16_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.viota.mask.nxv2i16(
-    <vscale x 2 x i16> %0,
-    <vscale x 2 x i1> %1,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.viota.nxv4i16(
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_viota_m_nxv4i16_nxv4i1(<vscale x 4 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv4i16_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.viota.nxv4i16(
-    <vscale x 4 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.viota.mask.nxv4i16(
-  <vscale x 4 x i16>,
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_viota_mask_m_nxv4i16_nxv4i1(<vscale x 4 x i16> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i16_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.viota.mask.nxv4i16(
-    <vscale x 4 x i16> %0,
-    <vscale x 4 x i1> %1,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.viota.nxv8i16(
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_viota_m_nxv8i16_nxv8i1(<vscale x 8 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv8i16_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.viota.nxv8i16(
-    <vscale x 8 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.viota.mask.nxv8i16(
-  <vscale x 8 x i16>,
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_viota_mask_m_nxv8i16_nxv8i1(<vscale x 8 x i16> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i16_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.viota.mask.nxv8i16(
-    <vscale x 8 x i16> %0,
-    <vscale x 8 x i1> %1,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.viota.nxv16i16(
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_viota_m_nxv16i16_nxv16i1(<vscale x 16 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv16i16_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.viota.nxv16i16(
-    <vscale x 16 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.viota.mask.nxv16i16(
-  <vscale x 16 x i16>,
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_viota_mask_m_nxv16i16_nxv16i1(<vscale x 16 x i16> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv16i16_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.viota.mask.nxv16i16(
-    <vscale x 16 x i16> %0,
-    <vscale x 16 x i1> %1,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.viota.nxv32i16(
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i16> @intrinsic_viota_m_nxv32i16_nxv32i1(<vscale x 32 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv32i16_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.viota.nxv32i16(
-    <vscale x 32 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.viota.mask.nxv32i16(
-  <vscale x 32 x i16>,
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i16> @intrinsic_viota_mask_m_nxv32i16_nxv32i1(<vscale x 32 x i16> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv32i16_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.viota.mask.nxv32i16(
-    <vscale x 32 x i16> %0,
-    <vscale x 32 x i1> %1,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.viota.nxv1i32(
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_viota_m_nxv1i32_nxv1i1(<vscale x 1 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv1i32_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.viota.nxv1i32(
-    <vscale x 1 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.viota.mask.nxv1i32(
-  <vscale x 1 x i32>,
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_viota_mask_m_nxv1i32_nxv1i1(<vscale x 1 x i32> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i32_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.viota.mask.nxv1i32(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x i1> %1,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_viota_m_nxv2i32_nxv2i1(<vscale x 2 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv2i32_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(
-    <vscale x 2 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.viota.mask.nxv2i32(
-  <vscale x 2 x i32>,
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_viota_mask_m_nxv2i32_nxv2i1(<vscale x 2 x i32> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i32_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.viota.mask.nxv2i32(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x i1> %1,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.viota.nxv4i32(
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_viota_m_nxv4i32_nxv4i1(<vscale x 4 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv4i32_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.viota.nxv4i32(
-    <vscale x 4 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.viota.mask.nxv4i32(
-  <vscale x 4 x i32>,
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_viota_mask_m_nxv4i32_nxv4i1(<vscale x 4 x i32> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i32_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.viota.mask.nxv4i32(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x i1> %1,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.viota.nxv8i32(
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_viota_m_nxv8i32_nxv8i1(<vscale x 8 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv8i32_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.viota.nxv8i32(
-    <vscale x 8 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.viota.mask.nxv8i32(
-  <vscale x 8 x i32>,
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_viota_mask_m_nxv8i32_nxv8i1(<vscale x 8 x i32> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i32_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.viota.mask.nxv8i32(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x i1> %1,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.viota.nxv16i32(
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_viota_m_nxv16i32_nxv16i1(<vscale x 16 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv16i32_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.viota.nxv16i32(
-    <vscale x 16 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.viota.mask.nxv16i32(
-  <vscale x 16 x i32>,
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_viota_mask_m_nxv16i32_nxv16i1(<vscale x 16 x i32> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv16i32_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.viota.mask.nxv16i32(
-    <vscale x 16 x i32> %0,
-    <vscale x 16 x i1> %1,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.viota.nxv1i64(
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_viota_m_nxv1i64_nxv1i1(<vscale x 1 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv1i64_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.viota.nxv1i64(
-    <vscale x 1 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.viota.mask.nxv1i64(
-  <vscale x 1 x i64>,
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_viota_mask_m_nxv1i64_nxv1i1(<vscale x 1 x i64> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i64_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.viota.mask.nxv1i64(
-    <vscale x 1 x i64> %0,
-    <vscale x 1 x i1> %1,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.viota.nxv2i64(
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_viota_m_nxv2i64_nxv2i1(<vscale x 2 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv2i64_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.viota.nxv2i64(
-    <vscale x 2 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.viota.mask.nxv2i64(
-  <vscale x 2 x i64>,
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_viota_mask_m_nxv2i64_nxv2i1(<vscale x 2 x i64> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i64_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.viota.mask.nxv2i64(
-    <vscale x 2 x i64> %0,
-    <vscale x 2 x i1> %1,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.viota.nxv4i64(
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_viota_m_nxv4i64_nxv4i1(<vscale x 4 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv4i64_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.viota.nxv4i64(
-    <vscale x 4 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.viota.mask.nxv4i64(
-  <vscale x 4 x i64>,
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_viota_mask_m_nxv4i64_nxv4i1(<vscale x 4 x i64> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i64_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.viota.mask.nxv4i64(
-    <vscale x 4 x i64> %0,
-    <vscale x 4 x i1> %1,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.viota.nxv8i64(
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_viota_m_nxv8i64_nxv8i1(<vscale x 8 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_viota_m_nxv8i64_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    viota.m v8, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.viota.nxv8i64(
-    <vscale x 8 x i1> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.viota.mask.nxv8i64(
-  <vscale x 8 x i64>,
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_viota_mask_m_nxv8i64_nxv8i1(<vscale x 8 x i64> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i64_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
-; CHECK-NEXT:    viota.m v8, v0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.viota.mask.nxv8i64(
-    <vscale x 8 x i64> %0,
-    <vscale x 8 x i1> %1,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i64> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/viota.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/viota.ll
index 6b3838cd5898c..acec6f7c8a08f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/viota-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/viota.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.viota.nxv1i8(
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i8> @intrinsic_viota_m_nxv1i8_nxv1i1(<vscale x 1 x i1> %0, i32 %1) nounwind {
+define <vscale x 1 x i8> @intrinsic_viota_m_nxv1i8_nxv1i1(<vscale x 1 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv1i8_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -14,7 +16,7 @@ define <vscale x 1 x i8> @intrinsic_viota_m_nxv1i8_nxv1i1(<vscale x 1 x i1> %0,
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.viota.nxv1i8(
     <vscale x 1 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i8> %a
 }
@@ -23,9 +25,9 @@ declare <vscale x 1 x i8> @llvm.riscv.viota.mask.nxv1i8(
   <vscale x 1 x i8>,
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i8> @intrinsic_viota_mask_m_nxv1i8_nxv1i1(<vscale x 1 x i8> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i8> @intrinsic_viota_mask_m_nxv1i8_nxv1i1(<vscale x 1 x i8> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i8_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
@@ -36,16 +38,16 @@ entry:
     <vscale x 1 x i8> %0,
     <vscale x 1 x i1> %1,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i8> %a
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.viota.nxv2i8(
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i8> @intrinsic_viota_m_nxv2i8_nxv2i1(<vscale x 2 x i1> %0, i32 %1) nounwind {
+define <vscale x 2 x i8> @intrinsic_viota_m_nxv2i8_nxv2i1(<vscale x 2 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv2i8_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -54,7 +56,7 @@ define <vscale x 2 x i8> @intrinsic_viota_m_nxv2i8_nxv2i1(<vscale x 2 x i1> %0,
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.viota.nxv2i8(
     <vscale x 2 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i8> %a
 }
@@ -63,9 +65,9 @@ declare <vscale x 2 x i8> @llvm.riscv.viota.mask.nxv2i8(
   <vscale x 2 x i8>,
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i8> @intrinsic_viota_mask_m_nxv2i8_nxv2i1(<vscale x 2 x i8> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i8> @intrinsic_viota_mask_m_nxv2i8_nxv2i1(<vscale x 2 x i8> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i8_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, mu
@@ -76,16 +78,16 @@ entry:
     <vscale x 2 x i8> %0,
     <vscale x 2 x i1> %1,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i8> %a
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.viota.nxv4i8(
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i8> @intrinsic_viota_m_nxv4i8_nxv4i1(<vscale x 4 x i1> %0, i32 %1) nounwind {
+define <vscale x 4 x i8> @intrinsic_viota_m_nxv4i8_nxv4i1(<vscale x 4 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv4i8_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -94,7 +96,7 @@ define <vscale x 4 x i8> @intrinsic_viota_m_nxv4i8_nxv4i1(<vscale x 4 x i1> %0,
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.viota.nxv4i8(
     <vscale x 4 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i8> %a
 }
@@ -103,9 +105,9 @@ declare <vscale x 4 x i8> @llvm.riscv.viota.mask.nxv4i8(
   <vscale x 4 x i8>,
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i8> @intrinsic_viota_mask_m_nxv4i8_nxv4i1(<vscale x 4 x i8> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i8> @intrinsic_viota_mask_m_nxv4i8_nxv4i1(<vscale x 4 x i8> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i8_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
@@ -116,16 +118,16 @@ entry:
     <vscale x 4 x i8> %0,
     <vscale x 4 x i1> %1,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i8> %a
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.viota.nxv8i8(
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i8> @intrinsic_viota_m_nxv8i8_nxv8i1(<vscale x 8 x i1> %0, i32 %1) nounwind {
+define <vscale x 8 x i8> @intrinsic_viota_m_nxv8i8_nxv8i1(<vscale x 8 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv8i8_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -134,7 +136,7 @@ define <vscale x 8 x i8> @intrinsic_viota_m_nxv8i8_nxv8i1(<vscale x 8 x i1> %0,
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.viota.nxv8i8(
     <vscale x 8 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i8> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 8 x i8> @llvm.riscv.viota.mask.nxv8i8(
   <vscale x 8 x i8>,
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i8> @intrinsic_viota_mask_m_nxv8i8_nxv8i1(<vscale x 8 x i8> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i8> @intrinsic_viota_mask_m_nxv8i8_nxv8i1(<vscale x 8 x i8> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i8_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, mu
@@ -156,16 +158,16 @@ entry:
     <vscale x 8 x i8> %0,
     <vscale x 8 x i1> %1,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i8> %a
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.viota.nxv16i8(
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i8> @intrinsic_viota_m_nxv16i8_nxv16i1(<vscale x 16 x i1> %0, i32 %1) nounwind {
+define <vscale x 16 x i8> @intrinsic_viota_m_nxv16i8_nxv16i1(<vscale x 16 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv16i8_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -174,7 +176,7 @@ define <vscale x 16 x i8> @intrinsic_viota_m_nxv16i8_nxv16i1(<vscale x 16 x i1>
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.viota.nxv16i8(
     <vscale x 16 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i8> %a
 }
@@ -183,9 +185,9 @@ declare <vscale x 16 x i8> @llvm.riscv.viota.mask.nxv16i8(
   <vscale x 16 x i8>,
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i8> @intrinsic_viota_mask_m_nxv16i8_nxv16i1(<vscale x 16 x i8> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i8> @intrinsic_viota_mask_m_nxv16i8_nxv16i1(<vscale x 16 x i8> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv16i8_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, tu, mu
@@ -196,16 +198,16 @@ entry:
     <vscale x 16 x i8> %0,
     <vscale x 16 x i1> %1,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i8> %a
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.viota.nxv32i8(
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i8> @intrinsic_viota_m_nxv32i8_nxv32i1(<vscale x 32 x i1> %0, i32 %1) nounwind {
+define <vscale x 32 x i8> @intrinsic_viota_m_nxv32i8_nxv32i1(<vscale x 32 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv32i8_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -214,7 +216,7 @@ define <vscale x 32 x i8> @intrinsic_viota_m_nxv32i8_nxv32i1(<vscale x 32 x i1>
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.viota.nxv32i8(
     <vscale x 32 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x i8> %a
 }
@@ -223,9 +225,9 @@ declare <vscale x 32 x i8> @llvm.riscv.viota.mask.nxv32i8(
   <vscale x 32 x i8>,
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i8> @intrinsic_viota_mask_m_nxv32i8_nxv32i1(<vscale x 32 x i8> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
+define <vscale x 32 x i8> @intrinsic_viota_mask_m_nxv32i8_nxv32i1(<vscale x 32 x i8> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv32i8_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, mu
@@ -236,16 +238,16 @@ entry:
     <vscale x 32 x i8> %0,
     <vscale x 32 x i1> %1,
     <vscale x 32 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x i8> %a
 }
 
 declare <vscale x 64 x i8> @llvm.riscv.viota.nxv64i8(
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i8> @intrinsic_viota_m_nxv64i8_nxv64i1(<vscale x 64 x i1> %0, i32 %1) nounwind {
+define <vscale x 64 x i8> @intrinsic_viota_m_nxv64i8_nxv64i1(<vscale x 64 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv64i8_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
@@ -254,7 +256,7 @@ define <vscale x 64 x i8> @intrinsic_viota_m_nxv64i8_nxv64i1(<vscale x 64 x i1>
 entry:
   %a = call <vscale x 64 x i8> @llvm.riscv.viota.nxv64i8(
     <vscale x 64 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 64 x i8> %a
 }
@@ -263,9 +265,9 @@ declare <vscale x 64 x i8> @llvm.riscv.viota.mask.nxv64i8(
   <vscale x 64 x i8>,
   <vscale x 64 x i1>,
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i8> @intrinsic_viota_mask_m_nxv64i8_nxv64i1(<vscale x 64 x i8> %0, <vscale x 64 x i1> %1, i32 %2) nounwind {
+define <vscale x 64 x i8> @intrinsic_viota_mask_m_nxv64i8_nxv64i1(<vscale x 64 x i8> %0, <vscale x 64 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv64i8_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, tu, mu
@@ -276,16 +278,16 @@ entry:
     <vscale x 64 x i8> %0,
     <vscale x 64 x i1> %1,
     <vscale x 64 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 64 x i8> %a
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.viota.nxv1i16(
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_viota_m_nxv1i16_nxv1i1(<vscale x 1 x i1> %0, i32 %1) nounwind {
+define <vscale x 1 x i16> @intrinsic_viota_m_nxv1i16_nxv1i1(<vscale x 1 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv1i16_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -294,7 +296,7 @@ define <vscale x 1 x i16> @intrinsic_viota_m_nxv1i16_nxv1i1(<vscale x 1 x i1> %0
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.viota.nxv1i16(
     <vscale x 1 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i16> %a
 }
@@ -303,9 +305,9 @@ declare <vscale x 1 x i16> @llvm.riscv.viota.mask.nxv1i16(
   <vscale x 1 x i16>,
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_viota_mask_m_nxv1i16_nxv1i1(<vscale x 1 x i16> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i16> @intrinsic_viota_mask_m_nxv1i16_nxv1i1(<vscale x 1 x i16> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i16_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -316,16 +318,16 @@ entry:
     <vscale x 1 x i16> %0,
     <vscale x 1 x i1> %1,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i16> %a
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.viota.nxv2i16(
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_viota_m_nxv2i16_nxv2i1(<vscale x 2 x i1> %0, i32 %1) nounwind {
+define <vscale x 2 x i16> @intrinsic_viota_m_nxv2i16_nxv2i1(<vscale x 2 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv2i16_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -334,7 +336,7 @@ define <vscale x 2 x i16> @intrinsic_viota_m_nxv2i16_nxv2i1(<vscale x 2 x i1> %0
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.viota.nxv2i16(
     <vscale x 2 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i16> %a
 }
@@ -343,9 +345,9 @@ declare <vscale x 2 x i16> @llvm.riscv.viota.mask.nxv2i16(
   <vscale x 2 x i16>,
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_viota_mask_m_nxv2i16_nxv2i1(<vscale x 2 x i16> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i16> @intrinsic_viota_mask_m_nxv2i16_nxv2i1(<vscale x 2 x i16> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i16_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -356,16 +358,16 @@ entry:
     <vscale x 2 x i16> %0,
     <vscale x 2 x i1> %1,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i16> %a
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.viota.nxv4i16(
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_viota_m_nxv4i16_nxv4i1(<vscale x 4 x i1> %0, i32 %1) nounwind {
+define <vscale x 4 x i16> @intrinsic_viota_m_nxv4i16_nxv4i1(<vscale x 4 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv4i16_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -374,7 +376,7 @@ define <vscale x 4 x i16> @intrinsic_viota_m_nxv4i16_nxv4i1(<vscale x 4 x i1> %0
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.viota.nxv4i16(
     <vscale x 4 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i16> %a
 }
@@ -383,9 +385,9 @@ declare <vscale x 4 x i16> @llvm.riscv.viota.mask.nxv4i16(
   <vscale x 4 x i16>,
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_viota_mask_m_nxv4i16_nxv4i1(<vscale x 4 x i16> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i16> @intrinsic_viota_mask_m_nxv4i16_nxv4i1(<vscale x 4 x i16> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i16_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -396,16 +398,16 @@ entry:
     <vscale x 4 x i16> %0,
     <vscale x 4 x i1> %1,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i16> %a
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.viota.nxv8i16(
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_viota_m_nxv8i16_nxv8i1(<vscale x 8 x i1> %0, i32 %1) nounwind {
+define <vscale x 8 x i16> @intrinsic_viota_m_nxv8i16_nxv8i1(<vscale x 8 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv8i16_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -414,7 +416,7 @@ define <vscale x 8 x i16> @intrinsic_viota_m_nxv8i16_nxv8i1(<vscale x 8 x i1> %0
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.viota.nxv8i16(
     <vscale x 8 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i16> %a
 }
@@ -423,9 +425,9 @@ declare <vscale x 8 x i16> @llvm.riscv.viota.mask.nxv8i16(
   <vscale x 8 x i16>,
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_viota_mask_m_nxv8i16_nxv8i1(<vscale x 8 x i16> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i16> @intrinsic_viota_mask_m_nxv8i16_nxv8i1(<vscale x 8 x i16> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i16_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -436,16 +438,16 @@ entry:
     <vscale x 8 x i16> %0,
     <vscale x 8 x i1> %1,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i16> %a
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.viota.nxv16i16(
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_viota_m_nxv16i16_nxv16i1(<vscale x 16 x i1> %0, i32 %1) nounwind {
+define <vscale x 16 x i16> @intrinsic_viota_m_nxv16i16_nxv16i1(<vscale x 16 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv16i16_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -454,7 +456,7 @@ define <vscale x 16 x i16> @intrinsic_viota_m_nxv16i16_nxv16i1(<vscale x 16 x i1
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.viota.nxv16i16(
     <vscale x 16 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i16> %a
 }
@@ -463,9 +465,9 @@ declare <vscale x 16 x i16> @llvm.riscv.viota.mask.nxv16i16(
   <vscale x 16 x i16>,
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_viota_mask_m_nxv16i16_nxv16i1(<vscale x 16 x i16> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i16> @intrinsic_viota_mask_m_nxv16i16_nxv16i1(<vscale x 16 x i16> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv16i16_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -476,16 +478,16 @@ entry:
     <vscale x 16 x i16> %0,
     <vscale x 16 x i1> %1,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i16> %a
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.viota.nxv32i16(
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i16> @intrinsic_viota_m_nxv32i16_nxv32i1(<vscale x 32 x i1> %0, i32 %1) nounwind {
+define <vscale x 32 x i16> @intrinsic_viota_m_nxv32i16_nxv32i1(<vscale x 32 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv32i16_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -494,7 +496,7 @@ define <vscale x 32 x i16> @intrinsic_viota_m_nxv32i16_nxv32i1(<vscale x 32 x i1
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.viota.nxv32i16(
     <vscale x 32 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x i16> %a
 }
@@ -503,9 +505,9 @@ declare <vscale x 32 x i16> @llvm.riscv.viota.mask.nxv32i16(
   <vscale x 32 x i16>,
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i16> @intrinsic_viota_mask_m_nxv32i16_nxv32i1(<vscale x 32 x i16> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
+define <vscale x 32 x i16> @intrinsic_viota_mask_m_nxv32i16_nxv32i1(<vscale x 32 x i16> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv32i16_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -516,16 +518,16 @@ entry:
     <vscale x 32 x i16> %0,
     <vscale x 32 x i1> %1,
     <vscale x 32 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x i16> %a
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.viota.nxv1i32(
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_viota_m_nxv1i32_nxv1i1(<vscale x 1 x i1> %0, i32 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_viota_m_nxv1i32_nxv1i1(<vscale x 1 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv1i32_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -534,7 +536,7 @@ define <vscale x 1 x i32> @intrinsic_viota_m_nxv1i32_nxv1i1(<vscale x 1 x i1> %0
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.viota.nxv1i32(
     <vscale x 1 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -543,9 +545,9 @@ declare <vscale x 1 x i32> @llvm.riscv.viota.mask.nxv1i32(
   <vscale x 1 x i32>,
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_viota_mask_m_nxv1i32_nxv1i1(<vscale x 1 x i32> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i32> @intrinsic_viota_mask_m_nxv1i32_nxv1i1(<vscale x 1 x i32> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i32_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -556,16 +558,16 @@ entry:
     <vscale x 1 x i32> %0,
     <vscale x 1 x i1> %1,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_viota_m_nxv2i32_nxv2i1(<vscale x 2 x i1> %0, i32 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_viota_m_nxv2i32_nxv2i1(<vscale x 2 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv2i32_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -574,7 +576,7 @@ define <vscale x 2 x i32> @intrinsic_viota_m_nxv2i32_nxv2i1(<vscale x 2 x i1> %0
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.viota.nxv2i32(
     <vscale x 2 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -583,9 +585,9 @@ declare <vscale x 2 x i32> @llvm.riscv.viota.mask.nxv2i32(
   <vscale x 2 x i32>,
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_viota_mask_m_nxv2i32_nxv2i1(<vscale x 2 x i32> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i32> @intrinsic_viota_mask_m_nxv2i32_nxv2i1(<vscale x 2 x i32> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i32_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -596,16 +598,16 @@ entry:
     <vscale x 2 x i32> %0,
     <vscale x 2 x i1> %1,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.viota.nxv4i32(
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_viota_m_nxv4i32_nxv4i1(<vscale x 4 x i1> %0, i32 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_viota_m_nxv4i32_nxv4i1(<vscale x 4 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv4i32_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -614,7 +616,7 @@ define <vscale x 4 x i32> @intrinsic_viota_m_nxv4i32_nxv4i1(<vscale x 4 x i1> %0
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.viota.nxv4i32(
     <vscale x 4 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -623,9 +625,9 @@ declare <vscale x 4 x i32> @llvm.riscv.viota.mask.nxv4i32(
   <vscale x 4 x i32>,
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_viota_mask_m_nxv4i32_nxv4i1(<vscale x 4 x i32> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i32> @intrinsic_viota_mask_m_nxv4i32_nxv4i1(<vscale x 4 x i32> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i32_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -636,16 +638,16 @@ entry:
     <vscale x 4 x i32> %0,
     <vscale x 4 x i1> %1,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.viota.nxv8i32(
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_viota_m_nxv8i32_nxv8i1(<vscale x 8 x i1> %0, i32 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_viota_m_nxv8i32_nxv8i1(<vscale x 8 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv8i32_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -654,7 +656,7 @@ define <vscale x 8 x i32> @intrinsic_viota_m_nxv8i32_nxv8i1(<vscale x 8 x i1> %0
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.viota.nxv8i32(
     <vscale x 8 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -663,9 +665,9 @@ declare <vscale x 8 x i32> @llvm.riscv.viota.mask.nxv8i32(
   <vscale x 8 x i32>,
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_viota_mask_m_nxv8i32_nxv8i1(<vscale x 8 x i32> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i32> @intrinsic_viota_mask_m_nxv8i32_nxv8i1(<vscale x 8 x i32> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i32_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -676,16 +678,16 @@ entry:
     <vscale x 8 x i32> %0,
     <vscale x 8 x i1> %1,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i32> %a
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.viota.nxv16i32(
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_viota_m_nxv16i32_nxv16i1(<vscale x 16 x i1> %0, i32 %1) nounwind {
+define <vscale x 16 x i32> @intrinsic_viota_m_nxv16i32_nxv16i1(<vscale x 16 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv16i32_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -694,7 +696,7 @@ define <vscale x 16 x i32> @intrinsic_viota_m_nxv16i32_nxv16i1(<vscale x 16 x i1
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.viota.nxv16i32(
     <vscale x 16 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i32> %a
 }
@@ -703,9 +705,9 @@ declare <vscale x 16 x i32> @llvm.riscv.viota.mask.nxv16i32(
   <vscale x 16 x i32>,
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_viota_mask_m_nxv16i32_nxv16i1(<vscale x 16 x i32> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i32> @intrinsic_viota_mask_m_nxv16i32_nxv16i1(<vscale x 16 x i32> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv16i32_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -716,16 +718,16 @@ entry:
     <vscale x 16 x i32> %0,
     <vscale x 16 x i1> %1,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i32> %a
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.viota.nxv1i64(
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_viota_m_nxv1i64_nxv1i1(<vscale x 1 x i1> %0, i32 %1) nounwind {
+define <vscale x 1 x i64> @intrinsic_viota_m_nxv1i64_nxv1i1(<vscale x 1 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv1i64_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -734,7 +736,7 @@ define <vscale x 1 x i64> @intrinsic_viota_m_nxv1i64_nxv1i1(<vscale x 1 x i1> %0
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.viota.nxv1i64(
     <vscale x 1 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i64> %a
 }
@@ -743,9 +745,9 @@ declare <vscale x 1 x i64> @llvm.riscv.viota.mask.nxv1i64(
   <vscale x 1 x i64>,
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_viota_mask_m_nxv1i64_nxv1i1(<vscale x 1 x i64> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i64> @intrinsic_viota_mask_m_nxv1i64_nxv1i1(<vscale x 1 x i64> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv1i64_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -756,16 +758,16 @@ entry:
     <vscale x 1 x i64> %0,
     <vscale x 1 x i1> %1,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i64> %a
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.viota.nxv2i64(
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_viota_m_nxv2i64_nxv2i1(<vscale x 2 x i1> %0, i32 %1) nounwind {
+define <vscale x 2 x i64> @intrinsic_viota_m_nxv2i64_nxv2i1(<vscale x 2 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv2i64_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -774,7 +776,7 @@ define <vscale x 2 x i64> @intrinsic_viota_m_nxv2i64_nxv2i1(<vscale x 2 x i1> %0
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.viota.nxv2i64(
     <vscale x 2 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i64> %a
 }
@@ -783,9 +785,9 @@ declare <vscale x 2 x i64> @llvm.riscv.viota.mask.nxv2i64(
   <vscale x 2 x i64>,
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_viota_mask_m_nxv2i64_nxv2i1(<vscale x 2 x i64> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i64> @intrinsic_viota_mask_m_nxv2i64_nxv2i1(<vscale x 2 x i64> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv2i64_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -796,16 +798,16 @@ entry:
     <vscale x 2 x i64> %0,
     <vscale x 2 x i1> %1,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i64> %a
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.viota.nxv4i64(
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_viota_m_nxv4i64_nxv4i1(<vscale x 4 x i1> %0, i32 %1) nounwind {
+define <vscale x 4 x i64> @intrinsic_viota_m_nxv4i64_nxv4i1(<vscale x 4 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv4i64_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -814,7 +816,7 @@ define <vscale x 4 x i64> @intrinsic_viota_m_nxv4i64_nxv4i1(<vscale x 4 x i1> %0
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.viota.nxv4i64(
     <vscale x 4 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i64> %a
 }
@@ -823,9 +825,9 @@ declare <vscale x 4 x i64> @llvm.riscv.viota.mask.nxv4i64(
   <vscale x 4 x i64>,
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_viota_mask_m_nxv4i64_nxv4i1(<vscale x 4 x i64> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i64> @intrinsic_viota_mask_m_nxv4i64_nxv4i1(<vscale x 4 x i64> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv4i64_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -836,16 +838,16 @@ entry:
     <vscale x 4 x i64> %0,
     <vscale x 4 x i1> %1,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i64> %a
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.viota.nxv8i64(
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_viota_m_nxv8i64_nxv8i1(<vscale x 8 x i1> %0, i32 %1) nounwind {
+define <vscale x 8 x i64> @intrinsic_viota_m_nxv8i64_nxv8i1(<vscale x 8 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_viota_m_nxv8i64_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -854,7 +856,7 @@ define <vscale x 8 x i64> @intrinsic_viota_m_nxv8i64_nxv8i1(<vscale x 8 x i1> %0
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.viota.nxv8i64(
     <vscale x 8 x i1> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i64> %a
 }
@@ -863,9 +865,9 @@ declare <vscale x 8 x i64> @llvm.riscv.viota.mask.nxv8i64(
   <vscale x 8 x i64>,
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_viota_mask_m_nxv8i64_nxv8i1(<vscale x 8 x i64> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i64> @intrinsic_viota_mask_m_nxv8i64_nxv8i1(<vscale x 8 x i64> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_viota_mask_m_nxv8i64_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
@@ -876,7 +878,7 @@ entry:
     <vscale x 8 x i64> %0,
     <vscale x 8 x i1> %1,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i64> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll
deleted file mode 100644
index 21b3f7b0b0ad2..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vlm-rv64.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-
-declare <vscale x 1 x i1> @llvm.riscv.vlm.nxv1i1(<vscale x 1 x i1>*, i64);
-
-define <vscale x 1 x i1> @intrinsic_vlm_v_nxv1i1(<vscale x 1 x i1>* %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vlm_v_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
-; CHECK-NEXT:    vlm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vlm.nxv1i1(<vscale x 1 x i1>* %0, i64 %1)
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1(<vscale x 2 x i1>*, i64);
-
-define <vscale x 2 x i1> @intrinsic_vlm_v_nxv2i1(<vscale x 2 x i1>* %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vlm_v_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
-; CHECK-NEXT:    vlm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1(<vscale x 2 x i1>* %0, i64 %1)
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vlm.nxv4i1(<vscale x 4 x i1>*, i64);
-
-define <vscale x 4 x i1> @intrinsic_vlm_v_nxv4i1(<vscale x 4 x i1>* %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vlm_v_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu
-; CHECK-NEXT:    vlm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vlm.nxv4i1(<vscale x 4 x i1>* %0, i64 %1)
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vlm.nxv8i1(<vscale x 8 x i1>*, i64);
-
-define <vscale x 8 x i1> @intrinsic_vlm_v_nxv8i1(<vscale x 8 x i1>* %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vlm_v_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
-; CHECK-NEXT:    vlm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vlm.nxv8i1(<vscale x 8 x i1>* %0, i64 %1)
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vlm.nxv16i1(<vscale x 16 x i1>*, i64);
-
-define <vscale x 16 x i1> @intrinsic_vlm_v_nxv16i1(<vscale x 16 x i1>* %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vlm_v_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
-; CHECK-NEXT:    vlm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vlm.nxv16i1(<vscale x 16 x i1>* %0, i64 %1)
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vlm.nxv32i1(<vscale x 32 x i1>*, i64);
-
-define <vscale x 32 x i1> @intrinsic_vlm_v_nxv32i1(<vscale x 32 x i1>* %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vlm_v_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
-; CHECK-NEXT:    vlm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vlm.nxv32i1(<vscale x 32 x i1>* %0, i64 %1)
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vlm.nxv64i1(<vscale x 64 x i1>*, i64);
-
-define <vscale x 64 x i1> @intrinsic_vlm_v_nxv64i1(<vscale x 64 x i1>* %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vlm_v_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, mu
-; CHECK-NEXT:    vlm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vlm.nxv64i1(<vscale x 64 x i1>* %0, i64 %1)
-  ret <vscale x 64 x i1> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlm.ll
similarity index 80%
rename from llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vlm.ll
index 72cac45129035..765db6d47d7a5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlm-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlm.ll
@@ -1,94 +1,96 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 
-declare <vscale x 1 x i1> @llvm.riscv.vlm.nxv1i1(<vscale x 1 x i1>*, i32);
+declare <vscale x 1 x i1> @llvm.riscv.vlm.nxv1i1(<vscale x 1 x i1>*, iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vlm_v_nxv1i1(<vscale x 1 x i1>* %0, i32 %1) nounwind {
+define <vscale x 1 x i1> @intrinsic_vlm_v_nxv1i1(<vscale x 1 x i1>* %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vlm_v_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
 ; CHECK-NEXT:    vlm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vlm.nxv1i1(<vscale x 1 x i1>* %0, i32 %1)
+  %a = call <vscale x 1 x i1> @llvm.riscv.vlm.nxv1i1(<vscale x 1 x i1>* %0, iXLen %1)
   ret <vscale x 1 x i1> %a
 }
 
-declare <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1(<vscale x 2 x i1>*, i32);
+declare <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1(<vscale x 2 x i1>*, iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vlm_v_nxv2i1(<vscale x 2 x i1>* %0, i32 %1) nounwind {
+define <vscale x 2 x i1> @intrinsic_vlm_v_nxv2i1(<vscale x 2 x i1>* %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vlm_v_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
 ; CHECK-NEXT:    vlm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1(<vscale x 2 x i1>* %0, i32 %1)
+  %a = call <vscale x 2 x i1> @llvm.riscv.vlm.nxv2i1(<vscale x 2 x i1>* %0, iXLen %1)
   ret <vscale x 2 x i1> %a
 }
 
-declare <vscale x 4 x i1> @llvm.riscv.vlm.nxv4i1(<vscale x 4 x i1>*, i32);
+declare <vscale x 4 x i1> @llvm.riscv.vlm.nxv4i1(<vscale x 4 x i1>*, iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vlm_v_nxv4i1(<vscale x 4 x i1>* %0, i32 %1) nounwind {
+define <vscale x 4 x i1> @intrinsic_vlm_v_nxv4i1(<vscale x 4 x i1>* %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vlm_v_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu
 ; CHECK-NEXT:    vlm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vlm.nxv4i1(<vscale x 4 x i1>* %0, i32 %1)
+  %a = call <vscale x 4 x i1> @llvm.riscv.vlm.nxv4i1(<vscale x 4 x i1>* %0, iXLen %1)
   ret <vscale x 4 x i1> %a
 }
 
-declare <vscale x 8 x i1> @llvm.riscv.vlm.nxv8i1(<vscale x 8 x i1>*, i32);
+declare <vscale x 8 x i1> @llvm.riscv.vlm.nxv8i1(<vscale x 8 x i1>*, iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vlm_v_nxv8i1(<vscale x 8 x i1>* %0, i32 %1) nounwind {
+define <vscale x 8 x i1> @intrinsic_vlm_v_nxv8i1(<vscale x 8 x i1>* %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vlm_v_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
 ; CHECK-NEXT:    vlm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vlm.nxv8i1(<vscale x 8 x i1>* %0, i32 %1)
+  %a = call <vscale x 8 x i1> @llvm.riscv.vlm.nxv8i1(<vscale x 8 x i1>* %0, iXLen %1)
   ret <vscale x 8 x i1> %a
 }
 
-declare <vscale x 16 x i1> @llvm.riscv.vlm.nxv16i1(<vscale x 16 x i1>*, i32);
+declare <vscale x 16 x i1> @llvm.riscv.vlm.nxv16i1(<vscale x 16 x i1>*, iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vlm_v_nxv16i1(<vscale x 16 x i1>* %0, i32 %1) nounwind {
+define <vscale x 16 x i1> @intrinsic_vlm_v_nxv16i1(<vscale x 16 x i1>* %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vlm_v_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vlm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vlm.nxv16i1(<vscale x 16 x i1>* %0, i32 %1)
+  %a = call <vscale x 16 x i1> @llvm.riscv.vlm.nxv16i1(<vscale x 16 x i1>* %0, iXLen %1)
   ret <vscale x 16 x i1> %a
 }
 
-declare <vscale x 32 x i1> @llvm.riscv.vlm.nxv32i1(<vscale x 32 x i1>*, i32);
+declare <vscale x 32 x i1> @llvm.riscv.vlm.nxv32i1(<vscale x 32 x i1>*, iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vlm_v_nxv32i1(<vscale x 32 x i1>* %0, i32 %1) nounwind {
+define <vscale x 32 x i1> @intrinsic_vlm_v_nxv32i1(<vscale x 32 x i1>* %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vlm_v_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vlm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vlm.nxv32i1(<vscale x 32 x i1>* %0, i32 %1)
+  %a = call <vscale x 32 x i1> @llvm.riscv.vlm.nxv32i1(<vscale x 32 x i1>* %0, iXLen %1)
   ret <vscale x 32 x i1> %a
 }
 
-declare <vscale x 64 x i1> @llvm.riscv.vlm.nxv64i1(<vscale x 64 x i1>*, i32);
+declare <vscale x 64 x i1> @llvm.riscv.vlm.nxv64i1(<vscale x 64 x i1>*, iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vlm_v_nxv64i1(<vscale x 64 x i1>* %0, i32 %1) nounwind {
+define <vscale x 64 x i1> @intrinsic_vlm_v_nxv64i1(<vscale x 64 x i1>* %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vlm_v_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, mu
 ; CHECK-NEXT:    vlm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vlm.nxv64i1(<vscale x 64 x i1>* %0, i32 %1)
+  %a = call <vscale x 64 x i1> @llvm.riscv.vlm.nxv64i1(<vscale x 64 x i1>* %0, iXLen %1)
   ret <vscale x 64 x i1> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll
deleted file mode 100644
index 12107a960f87d..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vmand-rv64.ll
+++ /dev/null
@@ -1,142 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmand_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmand_mm_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmand_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmand_mm_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmand_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmand_mm_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmand.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmand_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmand_mm_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmand.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmand.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmand_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmand_mm_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmand.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmand.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmand_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmand_mm_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmand.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmand_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmand_mm_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 64 x i1> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmand.ll
similarity index 82%
rename from llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vmand.ll
index 4d0a65d0f892b..2743f9deab803 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmand-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmand.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmand_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmand_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmand_mm_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmand.nxv1i1(
     <vscale x 1 x i1> %0,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i1> %a
 }
@@ -24,9 +26,9 @@ entry:
 declare <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1(
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmand_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmand_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmand_mm_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -36,7 +38,7 @@ entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmand.nxv2i1(
     <vscale x 2 x i1> %0,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i1> %a
 }
@@ -44,9 +46,9 @@ entry:
 declare <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmand_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmand_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmand_mm_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -56,7 +58,7 @@ entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmand.nxv4i1(
     <vscale x 4 x i1> %0,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i1> %a
 }
@@ -64,9 +66,9 @@ entry:
 declare <vscale x 8 x i1> @llvm.riscv.vmand.nxv8i1(
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmand_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmand_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmand_mm_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -76,7 +78,7 @@ entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmand.nxv8i1(
     <vscale x 8 x i1> %0,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i1> %a
 }
@@ -84,9 +86,9 @@ entry:
 declare <vscale x 16 x i1> @llvm.riscv.vmand.nxv16i1(
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmand_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmand_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmand_mm_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -96,7 +98,7 @@ entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmand.nxv16i1(
     <vscale x 16 x i1> %0,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i1> %a
 }
@@ -104,9 +106,9 @@ entry:
 declare <vscale x 32 x i1> @llvm.riscv.vmand.nxv32i1(
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmand_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmand_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmand_mm_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -116,7 +118,7 @@ entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmand.nxv32i1(
     <vscale x 32 x i1> %0,
     <vscale x 32 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x i1> %a
 }
@@ -124,9 +126,9 @@ entry:
 declare <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1(
   <vscale x 64 x i1>,
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmand_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i32 %2) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmand_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmand_mm_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
@@ -136,7 +138,7 @@ entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmand.nxv64i1(
     <vscale x 64 x i1> %0,
     <vscale x 64 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 64 x i1> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll
deleted file mode 100644
index 5ad6aa2ee4d8f..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv64.ll
+++ /dev/null
@@ -1,142 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i1> @llvm.riscv.vmandn.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmandn_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmandn_mm_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmandn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmandn.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmandn.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmandn_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmandn_mm_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmandn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmandn.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmandn.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmandn_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmandn_mm_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmandn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmandn.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmandn.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmandn_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmandn_mm_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmandn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmandn.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmandn.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmandn_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmandn_mm_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmandn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmandn.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmandn.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmandn_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmandn_mm_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmandn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmandn.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmandn.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmandn_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmandn_mm_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmandn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmandn.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 64 x i1> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmandn.ll
similarity index 82%
rename from llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vmandn.ll
index 90d6cba3592d0..c7977e9855ea5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmandn-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmandn.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmandn.nxv1i1(
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmandn_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmandn_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmandn_mm_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmandn.nxv1i1(
     <vscale x 1 x i1> %0,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i1> %a
 }
@@ -24,9 +26,9 @@ entry:
 declare <vscale x 2 x i1> @llvm.riscv.vmandn.nxv2i1(
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmandn_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmandn_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmandn_mm_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -36,7 +38,7 @@ entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmandn.nxv2i1(
     <vscale x 2 x i1> %0,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i1> %a
 }
@@ -44,9 +46,9 @@ entry:
 declare <vscale x 4 x i1> @llvm.riscv.vmandn.nxv4i1(
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmandn_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmandn_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmandn_mm_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -56,7 +58,7 @@ entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmandn.nxv4i1(
     <vscale x 4 x i1> %0,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i1> %a
 }
@@ -64,9 +66,9 @@ entry:
 declare <vscale x 8 x i1> @llvm.riscv.vmandn.nxv8i1(
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmandn_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmandn_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmandn_mm_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -76,7 +78,7 @@ entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmandn.nxv8i1(
     <vscale x 8 x i1> %0,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i1> %a
 }
@@ -84,9 +86,9 @@ entry:
 declare <vscale x 16 x i1> @llvm.riscv.vmandn.nxv16i1(
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmandn_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmandn_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmandn_mm_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -96,7 +98,7 @@ entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmandn.nxv16i1(
     <vscale x 16 x i1> %0,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i1> %a
 }
@@ -104,9 +106,9 @@ entry:
 declare <vscale x 32 x i1> @llvm.riscv.vmandn.nxv32i1(
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmandn_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmandn_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmandn_mm_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -116,7 +118,7 @@ entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmandn.nxv32i1(
     <vscale x 32 x i1> %0,
     <vscale x 32 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x i1> %a
 }
@@ -124,9 +126,9 @@ entry:
 declare <vscale x 64 x i1> @llvm.riscv.vmandn.nxv64i1(
   <vscale x 64 x i1>,
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmandn_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i32 %2) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmandn_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmandn_mm_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
@@ -136,7 +138,7 @@ entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmandn.nxv64i1(
     <vscale x 64 x i1> %0,
     <vscale x 64 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 64 x i1> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll
deleted file mode 100644
index 50516db35a4fc..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv64.ll
+++ /dev/null
@@ -1,114 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i1> @llvm.riscv.vmclr.nxv1i1(
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmclr_m_pseudo_nxv1i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmclr.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmclr.nxv1i1(
-    i64 %0)
-
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmclr.nxv2i1(
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmclr_m_pseudo_nxv2i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmclr.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmclr.nxv2i1(
-    i64 %0)
-
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmclr.nxv4i1(
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmclr_m_pseudo_nxv4i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmclr.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmclr.nxv4i1(
-    i64 %0)
-
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmclr.nxv8i1(
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmclr_m_pseudo_nxv8i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmclr.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmclr.nxv8i1(
-    i64 %0)
-
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmclr.nxv16i1(
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmclr_m_pseudo_nxv16i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmclr.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmclr.nxv16i1(
-    i64 %0)
-
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmclr.nxv32i1(
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmclr_m_pseudo_nxv32i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmclr.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmclr.nxv32i1(
-    i64 %0)
-
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmclr.nxv64i1(
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmclr_m_pseudo_nxv64i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmclr.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmclr.nxv64i1(
-    i64 %0)
-
-  ret <vscale x 64 x i1> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmclr.ll
similarity index 72%
rename from llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vmclr.ll
index 49fb55c901b2b..f786f033db2ae 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmclr-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmclr.ll
@@ -1,10 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmclr.nxv1i1(
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmclr_m_pseudo_nxv1i1(i32 %0) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmclr_m_pseudo_nxv1i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -12,15 +14,15 @@ define <vscale x 1 x i1> @intrinsic_vmclr_m_pseudo_nxv1i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmclr.nxv1i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 1 x i1> %a
 }
 
 declare <vscale x 2 x i1> @llvm.riscv.vmclr.nxv2i1(
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmclr_m_pseudo_nxv2i1(i32 %0) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmclr_m_pseudo_nxv2i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -28,15 +30,15 @@ define <vscale x 2 x i1> @intrinsic_vmclr_m_pseudo_nxv2i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmclr.nxv2i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 2 x i1> %a
 }
 
 declare <vscale x 4 x i1> @llvm.riscv.vmclr.nxv4i1(
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmclr_m_pseudo_nxv4i1(i32 %0) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmclr_m_pseudo_nxv4i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -44,15 +46,15 @@ define <vscale x 4 x i1> @intrinsic_vmclr_m_pseudo_nxv4i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmclr.nxv4i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 4 x i1> %a
 }
 
 declare <vscale x 8 x i1> @llvm.riscv.vmclr.nxv8i1(
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmclr_m_pseudo_nxv8i1(i32 %0) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmclr_m_pseudo_nxv8i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -60,15 +62,15 @@ define <vscale x 8 x i1> @intrinsic_vmclr_m_pseudo_nxv8i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmclr.nxv8i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 8 x i1> %a
 }
 
 declare <vscale x 16 x i1> @llvm.riscv.vmclr.nxv16i1(
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmclr_m_pseudo_nxv16i1(i32 %0) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmclr_m_pseudo_nxv16i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -76,15 +78,15 @@ define <vscale x 16 x i1> @intrinsic_vmclr_m_pseudo_nxv16i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmclr.nxv16i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 16 x i1> %a
 }
 
 declare <vscale x 32 x i1> @llvm.riscv.vmclr.nxv32i1(
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmclr_m_pseudo_nxv32i1(i32 %0) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmclr_m_pseudo_nxv32i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -92,15 +94,15 @@ define <vscale x 32 x i1> @intrinsic_vmclr_m_pseudo_nxv32i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmclr.nxv32i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 32 x i1> %a
 }
 
 declare <vscale x 64 x i1> @llvm.riscv.vmclr.nxv64i1(
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmclr_m_pseudo_nxv64i1(i32 %0) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmclr_m_pseudo_nxv64i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmclr_m_pseudo_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
@@ -108,7 +110,7 @@ define <vscale x 64 x i1> @intrinsic_vmclr_m_pseudo_nxv64i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmclr.nxv64i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 64 x i1> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll
deleted file mode 100644
index b4397d512e34b..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv64.ll
+++ /dev/null
@@ -1,142 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i1> @llvm.riscv.vmnand.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmnand_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnand_mm_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmnand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmnand.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmnand.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmnand_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnand_mm_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmnand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmnand.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmnand.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmnand_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnand_mm_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmnand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmnand.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmnand.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmnand_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnand_mm_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmnand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmnand.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmnand.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmnand_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnand_mm_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmnand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmnand.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmnand.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmnand_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnand_mm_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmnand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmnand.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmnand.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmnand_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnand_mm_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmnand.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmnand.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 64 x i1> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmnand.ll
similarity index 82%
rename from llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vmnand.ll
index 958d38f0023f6..ee128c178c552 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmnand-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmnand.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmnand.nxv1i1(
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmnand_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmnand_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnand_mm_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmnand.nxv1i1(
     <vscale x 1 x i1> %0,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i1> %a
 }
@@ -24,9 +26,9 @@ entry:
 declare <vscale x 2 x i1> @llvm.riscv.vmnand.nxv2i1(
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmnand_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmnand_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnand_mm_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -36,7 +38,7 @@ entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmnand.nxv2i1(
     <vscale x 2 x i1> %0,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i1> %a
 }
@@ -44,9 +46,9 @@ entry:
 declare <vscale x 4 x i1> @llvm.riscv.vmnand.nxv4i1(
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmnand_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmnand_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnand_mm_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -56,7 +58,7 @@ entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmnand.nxv4i1(
     <vscale x 4 x i1> %0,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i1> %a
 }
@@ -64,9 +66,9 @@ entry:
 declare <vscale x 8 x i1> @llvm.riscv.vmnand.nxv8i1(
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmnand_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmnand_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnand_mm_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -76,7 +78,7 @@ entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmnand.nxv8i1(
     <vscale x 8 x i1> %0,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i1> %a
 }
@@ -84,9 +86,9 @@ entry:
 declare <vscale x 16 x i1> @llvm.riscv.vmnand.nxv16i1(
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmnand_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmnand_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnand_mm_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -96,7 +98,7 @@ entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmnand.nxv16i1(
     <vscale x 16 x i1> %0,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i1> %a
 }
@@ -104,9 +106,9 @@ entry:
 declare <vscale x 32 x i1> @llvm.riscv.vmnand.nxv32i1(
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmnand_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmnand_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnand_mm_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -116,7 +118,7 @@ entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmnand.nxv32i1(
     <vscale x 32 x i1> %0,
     <vscale x 32 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x i1> %a
 }
@@ -124,9 +126,9 @@ entry:
 declare <vscale x 64 x i1> @llvm.riscv.vmnand.nxv64i1(
   <vscale x 64 x i1>,
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmnand_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i32 %2) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmnand_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnand_mm_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
@@ -136,7 +138,7 @@ entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmnand.nxv64i1(
     <vscale x 64 x i1> %0,
     <vscale x 64 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 64 x i1> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll
deleted file mode 100644
index 5beaeadcdcab2..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv64.ll
+++ /dev/null
@@ -1,142 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i1> @llvm.riscv.vmnor.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmnor_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnor_mm_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmnor.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmnor.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmnor_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnor_mm_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmnor.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmnor.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmnor_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnor_mm_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmnor.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmnor.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmnor_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnor_mm_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmnor.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmnor.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmnor_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnor_mm_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmnor.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmnor.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmnor_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnor_mm_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmnor.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmnor.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmnor_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmnor_mm_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmnor.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 64 x i1> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmnor.ll
similarity index 82%
rename from llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vmnor.ll
index ca5bcf5463fa5..9eb829964b0f1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmnor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmnor.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmnor.nxv1i1(
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmnor_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmnor_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnor_mm_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmnor.nxv1i1(
     <vscale x 1 x i1> %0,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i1> %a
 }
@@ -24,9 +26,9 @@ entry:
 declare <vscale x 2 x i1> @llvm.riscv.vmnor.nxv2i1(
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmnor_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmnor_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnor_mm_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -36,7 +38,7 @@ entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmnor.nxv2i1(
     <vscale x 2 x i1> %0,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i1> %a
 }
@@ -44,9 +46,9 @@ entry:
 declare <vscale x 4 x i1> @llvm.riscv.vmnor.nxv4i1(
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmnor_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmnor_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnor_mm_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -56,7 +58,7 @@ entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmnor.nxv4i1(
     <vscale x 4 x i1> %0,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i1> %a
 }
@@ -64,9 +66,9 @@ entry:
 declare <vscale x 8 x i1> @llvm.riscv.vmnor.nxv8i1(
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmnor_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmnor_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnor_mm_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -76,7 +78,7 @@ entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmnor.nxv8i1(
     <vscale x 8 x i1> %0,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i1> %a
 }
@@ -84,9 +86,9 @@ entry:
 declare <vscale x 16 x i1> @llvm.riscv.vmnor.nxv16i1(
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmnor_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmnor_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnor_mm_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -96,7 +98,7 @@ entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmnor.nxv16i1(
     <vscale x 16 x i1> %0,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i1> %a
 }
@@ -104,9 +106,9 @@ entry:
 declare <vscale x 32 x i1> @llvm.riscv.vmnor.nxv32i1(
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmnor_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmnor_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnor_mm_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -116,7 +118,7 @@ entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmnor.nxv32i1(
     <vscale x 32 x i1> %0,
     <vscale x 32 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x i1> %a
 }
@@ -124,9 +126,9 @@ entry:
 declare <vscale x 64 x i1> @llvm.riscv.vmnor.nxv64i1(
   <vscale x 64 x i1>,
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmnor_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i32 %2) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmnor_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmnor_mm_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
@@ -136,7 +138,7 @@ entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmnor.nxv64i1(
     <vscale x 64 x i1> %0,
     <vscale x 64 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 64 x i1> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll
deleted file mode 100644
index 81fa40663fc1a..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vmor-rv64.ll
+++ /dev/null
@@ -1,142 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i1> @llvm.riscv.vmor.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmor_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmor_mm_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmor.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmor.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmor_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmor_mm_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmor.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmor.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmor_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmor_mm_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmor.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmor.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmor_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmor_mm_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmor.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmor.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmor_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmor_mm_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmor.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmor.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmor_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmor_mm_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmor.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmor.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmor_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmor_mm_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmor.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 64 x i1> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmor.ll
similarity index 82%
rename from llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vmor.ll
index 117d152f7248c..a5bcc135150dc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmor.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmor.nxv1i1(
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmor_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmor_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmor_mm_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmor.nxv1i1(
     <vscale x 1 x i1> %0,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i1> %a
 }
@@ -24,9 +26,9 @@ entry:
 declare <vscale x 2 x i1> @llvm.riscv.vmor.nxv2i1(
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmor_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmor_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmor_mm_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -36,7 +38,7 @@ entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmor.nxv2i1(
     <vscale x 2 x i1> %0,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i1> %a
 }
@@ -44,9 +46,9 @@ entry:
 declare <vscale x 4 x i1> @llvm.riscv.vmor.nxv4i1(
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmor_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmor_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmor_mm_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -56,7 +58,7 @@ entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmor.nxv4i1(
     <vscale x 4 x i1> %0,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i1> %a
 }
@@ -64,9 +66,9 @@ entry:
 declare <vscale x 8 x i1> @llvm.riscv.vmor.nxv8i1(
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmor_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmor_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmor_mm_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -76,7 +78,7 @@ entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmor.nxv8i1(
     <vscale x 8 x i1> %0,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i1> %a
 }
@@ -84,9 +86,9 @@ entry:
 declare <vscale x 16 x i1> @llvm.riscv.vmor.nxv16i1(
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmor_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmor_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmor_mm_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -96,7 +98,7 @@ entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmor.nxv16i1(
     <vscale x 16 x i1> %0,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i1> %a
 }
@@ -104,9 +106,9 @@ entry:
 declare <vscale x 32 x i1> @llvm.riscv.vmor.nxv32i1(
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmor_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmor_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmor_mm_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -116,7 +118,7 @@ entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmor.nxv32i1(
     <vscale x 32 x i1> %0,
     <vscale x 32 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x i1> %a
 }
@@ -124,9 +126,9 @@ entry:
 declare <vscale x 64 x i1> @llvm.riscv.vmor.nxv64i1(
   <vscale x 64 x i1>,
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmor_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i32 %2) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmor_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmor_mm_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
@@ -136,7 +138,7 @@ entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmor.nxv64i1(
     <vscale x 64 x i1> %0,
     <vscale x 64 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 64 x i1> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll
deleted file mode 100644
index 89de2b78e94f5..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv64.ll
+++ /dev/null
@@ -1,142 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i1> @llvm.riscv.vmorn.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmorn_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmorn_mm_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmorn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmorn.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmorn.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmorn_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmorn_mm_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmorn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmorn.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmorn.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmorn_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmorn_mm_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmorn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmorn.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmorn.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmorn_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmorn_mm_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmorn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmorn.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmorn.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmorn_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmorn_mm_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmorn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmorn.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmorn.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmorn_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmorn_mm_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmorn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmorn.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmorn.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmorn_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmorn_mm_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmorn.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmorn.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 64 x i1> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmorn.ll
similarity index 82%
rename from llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vmorn.ll
index 8509603acd2f5..eddfe24493f70 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmorn-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmorn.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmorn.nxv1i1(
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmorn_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmorn_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmorn_mm_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmorn.nxv1i1(
     <vscale x 1 x i1> %0,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i1> %a
 }
@@ -24,9 +26,9 @@ entry:
 declare <vscale x 2 x i1> @llvm.riscv.vmorn.nxv2i1(
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmorn_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmorn_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmorn_mm_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -36,7 +38,7 @@ entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmorn.nxv2i1(
     <vscale x 2 x i1> %0,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i1> %a
 }
@@ -44,9 +46,9 @@ entry:
 declare <vscale x 4 x i1> @llvm.riscv.vmorn.nxv4i1(
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmorn_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmorn_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmorn_mm_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -56,7 +58,7 @@ entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmorn.nxv4i1(
     <vscale x 4 x i1> %0,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i1> %a
 }
@@ -64,9 +66,9 @@ entry:
 declare <vscale x 8 x i1> @llvm.riscv.vmorn.nxv8i1(
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmorn_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmorn_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmorn_mm_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -76,7 +78,7 @@ entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmorn.nxv8i1(
     <vscale x 8 x i1> %0,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i1> %a
 }
@@ -84,9 +86,9 @@ entry:
 declare <vscale x 16 x i1> @llvm.riscv.vmorn.nxv16i1(
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmorn_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmorn_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmorn_mm_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -96,7 +98,7 @@ entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmorn.nxv16i1(
     <vscale x 16 x i1> %0,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i1> %a
 }
@@ -104,9 +106,9 @@ entry:
 declare <vscale x 32 x i1> @llvm.riscv.vmorn.nxv32i1(
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmorn_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmorn_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmorn_mm_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -116,7 +118,7 @@ entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmorn.nxv32i1(
     <vscale x 32 x i1> %0,
     <vscale x 32 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x i1> %a
 }
@@ -124,9 +126,9 @@ entry:
 declare <vscale x 64 x i1> @llvm.riscv.vmorn.nxv64i1(
   <vscale x 64 x i1>,
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmorn_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i32 %2) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmorn_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmorn_mm_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
@@ -136,7 +138,7 @@ entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmorn.nxv64i1(
     <vscale x 64 x i1> %0,
     <vscale x 64 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 64 x i1> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll
deleted file mode 100644
index f34157dfd3797..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vmset-rv64.ll
+++ /dev/null
@@ -1,114 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i1> @llvm.riscv.vmset.nxv1i1(
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmset_m_pseudo_nxv1i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmset.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmset.nxv1i1(
-    i64 %0)
-
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmset.nxv2i1(
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmset_m_pseudo_nxv2i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmset.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmset.nxv2i1(
-    i64 %0)
-
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmset.nxv4i1(
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmset_m_pseudo_nxv4i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmset.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmset.nxv4i1(
-    i64 %0)
-
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmset.nxv8i1(
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmset_m_pseudo_nxv8i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmset.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmset.nxv8i1(
-    i64 %0)
-
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmset.nxv16i1(
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmset_m_pseudo_nxv16i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmset.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmset.nxv16i1(
-    i64 %0)
-
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmset.nxv32i1(
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmset_m_pseudo_nxv32i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmset.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmset.nxv32i1(
-    i64 %0)
-
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmset.nxv64i1(
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmset_m_pseudo_nxv64i1(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmset.m v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmset.nxv64i1(
-    i64 %0)
-
-  ret <vscale x 64 x i1> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmset.ll
similarity index 72%
rename from llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vmset.ll
index 68a6719c76580..19b05954e243f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmset-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmset.ll
@@ -1,10 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmset.nxv1i1(
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmset_m_pseudo_nxv1i1(i32 %0) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmset_m_pseudo_nxv1i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -12,15 +14,15 @@ define <vscale x 1 x i1> @intrinsic_vmset_m_pseudo_nxv1i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmset.nxv1i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 1 x i1> %a
 }
 
 declare <vscale x 2 x i1> @llvm.riscv.vmset.nxv2i1(
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmset_m_pseudo_nxv2i1(i32 %0) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmset_m_pseudo_nxv2i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -28,15 +30,15 @@ define <vscale x 2 x i1> @intrinsic_vmset_m_pseudo_nxv2i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmset.nxv2i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 2 x i1> %a
 }
 
 declare <vscale x 4 x i1> @llvm.riscv.vmset.nxv4i1(
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmset_m_pseudo_nxv4i1(i32 %0) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmset_m_pseudo_nxv4i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -44,15 +46,15 @@ define <vscale x 4 x i1> @intrinsic_vmset_m_pseudo_nxv4i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmset.nxv4i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 4 x i1> %a
 }
 
 declare <vscale x 8 x i1> @llvm.riscv.vmset.nxv8i1(
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmset_m_pseudo_nxv8i1(i32 %0) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmset_m_pseudo_nxv8i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -60,15 +62,15 @@ define <vscale x 8 x i1> @intrinsic_vmset_m_pseudo_nxv8i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmset.nxv8i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 8 x i1> %a
 }
 
 declare <vscale x 16 x i1> @llvm.riscv.vmset.nxv16i1(
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmset_m_pseudo_nxv16i1(i32 %0) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmset_m_pseudo_nxv16i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -76,15 +78,15 @@ define <vscale x 16 x i1> @intrinsic_vmset_m_pseudo_nxv16i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmset.nxv16i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 16 x i1> %a
 }
 
 declare <vscale x 32 x i1> @llvm.riscv.vmset.nxv32i1(
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmset_m_pseudo_nxv32i1(i32 %0) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmset_m_pseudo_nxv32i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -92,15 +94,15 @@ define <vscale x 32 x i1> @intrinsic_vmset_m_pseudo_nxv32i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmset.nxv32i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 32 x i1> %a
 }
 
 declare <vscale x 64 x i1> @llvm.riscv.vmset.nxv64i1(
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmset_m_pseudo_nxv64i1(i32 %0) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmset_m_pseudo_nxv64i1(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmset_m_pseudo_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
@@ -108,7 +110,7 @@ define <vscale x 64 x i1> @intrinsic_vmset_m_pseudo_nxv64i1(i32 %0) nounwind {
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmset.nxv64i1(
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 64 x i1> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll
deleted file mode 100644
index 0f776c83e0129..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv64.ll
+++ /dev/null
@@ -1,296 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i1> @llvm.riscv.vmsif.nxv1i1(
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmsif_m_nxv1i1(<vscale x 1 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_m_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmsif.m v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmsif.nxv1i1(
-    <vscale x 1 x i1> %0,
-    i64 %1)
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 1 x i1> @llvm.riscv.vmsif.mask.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmsif_mask_m_nxv1i1_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv1i1_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsif.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmsif.mask.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3)
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmsif.nxv2i1(
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmsif_m_nxv2i1(<vscale x 2 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_m_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmsif.m v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmsif.nxv2i1(
-    <vscale x 2 x i1> %0,
-    i64 %1)
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmsif.mask.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmsif_mask_m_nxv2i1_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv2i1_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsif.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmsif.mask.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3)
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmsif.nxv4i1(
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmsif_m_nxv4i1(<vscale x 4 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_m_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmsif.m v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmsif.nxv4i1(
-    <vscale x 4 x i1> %0,
-    i64 %1)
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmsif.mask.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmsif_mask_m_nxv4i1_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv4i1_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsif.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmsif.mask.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3)
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmsif.nxv8i1(
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmsif_m_nxv8i1(<vscale x 8 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_m_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmsif.m v8, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmsif.nxv8i1(
-    <vscale x 8 x i1> %0,
-    i64 %1)
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmsif.mask.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmsif_mask_m_nxv8i1_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv8i1_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsif.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmsif.mask.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3)
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmsif.nxv16i1(
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmsif_m_nxv16i1(<vscale x 16 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_m_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmsif.m v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmsif.nxv16i1(
-    <vscale x 16 x i1> %0,
-    i64 %1)
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmsif.mask.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmsif_mask_m_nxv16i1_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv16i1_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsif.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmsif.mask.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3)
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmsif.nxv32i1(
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmsif_m_nxv32i1(<vscale x 32 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_m_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmsif.m v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmsif.nxv32i1(
-    <vscale x 32 x i1> %0,
-    i64 %1)
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmsif.mask.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmsif_mask_m_nxv32i1_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv32i1_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsif.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmsif.mask.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3)
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmsif.nxv64i1(
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmsif_m_nxv64i1(<vscale x 64 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_m_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmsif.m v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmsif.nxv64i1(
-    <vscale x 64 x i1> %0,
-    i64 %1)
-  ret <vscale x 64 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmsif.mask.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmsif_mask_m_nxv64i1_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, <vscale x 64 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv64i1_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsif.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmsif.mask.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    <vscale x 64 x i1> %2,
-    i64 %3)
-  ret <vscale x 64 x i1> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll
similarity index 89%
rename from llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vmsif.ll
index 888b6ebbbc3f8..5b76892e5a3fb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsif-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsif.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsif.nxv1i1(
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmsif_m_nxv1i1(<vscale x 1 x i1> %0, i32 %1) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmsif_m_nxv1i1(<vscale x 1 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_m_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x i1> @intrinsic_vmsif_m_nxv1i1(<vscale x 1 x i1> %0, i32 %1)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsif.nxv1i1(
     <vscale x 1 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 1 x i1> %a
 }
 
@@ -23,9 +25,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsif.mask.nxv1i1(
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmsif_mask_m_nxv1i1_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmsif_mask_m_nxv1i1_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv1i1_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -39,15 +41,15 @@ entry:
     <vscale x 1 x i1> %0,
     <vscale x 1 x i1> %1,
     <vscale x 1 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 1 x i1> %a
 }
 
 declare <vscale x 2 x i1> @llvm.riscv.vmsif.nxv2i1(
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmsif_m_nxv2i1(<vscale x 2 x i1> %0, i32 %1) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmsif_m_nxv2i1(<vscale x 2 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_m_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x i1> @intrinsic_vmsif_m_nxv2i1(<vscale x 2 x i1> %0, i32 %1)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsif.nxv2i1(
     <vscale x 2 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 2 x i1> %a
 }
 
@@ -65,9 +67,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsif.mask.nxv2i1(
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmsif_mask_m_nxv2i1_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmsif_mask_m_nxv2i1_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv2i1_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -81,15 +83,15 @@ entry:
     <vscale x 2 x i1> %0,
     <vscale x 2 x i1> %1,
     <vscale x 2 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 2 x i1> %a
 }
 
 declare <vscale x 4 x i1> @llvm.riscv.vmsif.nxv4i1(
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmsif_m_nxv4i1(<vscale x 4 x i1> %0, i32 %1) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmsif_m_nxv4i1(<vscale x 4 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_m_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x i1> @intrinsic_vmsif_m_nxv4i1(<vscale x 4 x i1> %0, i32 %1)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsif.nxv4i1(
     <vscale x 4 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 4 x i1> %a
 }
 
@@ -107,9 +109,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsif.mask.nxv4i1(
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmsif_mask_m_nxv4i1_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmsif_mask_m_nxv4i1_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv4i1_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -123,15 +125,15 @@ entry:
     <vscale x 4 x i1> %0,
     <vscale x 4 x i1> %1,
     <vscale x 4 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 4 x i1> %a
 }
 
 declare <vscale x 8 x i1> @llvm.riscv.vmsif.nxv8i1(
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmsif_m_nxv8i1(<vscale x 8 x i1> %0, i32 %1) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmsif_m_nxv8i1(<vscale x 8 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_m_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x i1> @intrinsic_vmsif_m_nxv8i1(<vscale x 8 x i1> %0, i32 %1)
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsif.nxv8i1(
     <vscale x 8 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 8 x i1> %a
 }
 
@@ -149,9 +151,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsif.mask.nxv8i1(
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmsif_mask_m_nxv8i1_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmsif_mask_m_nxv8i1_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv8i1_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -165,15 +167,15 @@ entry:
     <vscale x 8 x i1> %0,
     <vscale x 8 x i1> %1,
     <vscale x 8 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 8 x i1> %a
 }
 
 declare <vscale x 16 x i1> @llvm.riscv.vmsif.nxv16i1(
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmsif_m_nxv16i1(<vscale x 16 x i1> %0, i32 %1) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmsif_m_nxv16i1(<vscale x 16 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_m_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x i1> @intrinsic_vmsif_m_nxv16i1(<vscale x 16 x i1> %0, i32
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsif.nxv16i1(
     <vscale x 16 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 16 x i1> %a
 }
 
@@ -191,9 +193,9 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsif.mask.nxv16i1(
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmsif_mask_m_nxv16i1_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmsif_mask_m_nxv16i1_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv16i1_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -207,15 +209,15 @@ entry:
     <vscale x 16 x i1> %0,
     <vscale x 16 x i1> %1,
     <vscale x 16 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 16 x i1> %a
 }
 
 declare <vscale x 32 x i1> @llvm.riscv.vmsif.nxv32i1(
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmsif_m_nxv32i1(<vscale x 32 x i1> %0, i32 %1) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmsif_m_nxv32i1(<vscale x 32 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_m_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 32 x i1> @intrinsic_vmsif_m_nxv32i1(<vscale x 32 x i1> %0, i32
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsif.nxv32i1(
     <vscale x 32 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 32 x i1> %a
 }
 
@@ -233,9 +235,9 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsif.mask.nxv32i1(
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmsif_mask_m_nxv32i1_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmsif_mask_m_nxv32i1_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv32i1_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -249,15 +251,15 @@ entry:
     <vscale x 32 x i1> %0,
     <vscale x 32 x i1> %1,
     <vscale x 32 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 32 x i1> %a
 }
 
 declare <vscale x 64 x i1> @llvm.riscv.vmsif.nxv64i1(
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmsif_m_nxv64i1(<vscale x 64 x i1> %0, i32 %1) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmsif_m_nxv64i1(<vscale x 64 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_m_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 64 x i1> @intrinsic_vmsif_m_nxv64i1(<vscale x 64 x i1> %0, i32
 entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmsif.nxv64i1(
     <vscale x 64 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 64 x i1> %a
 }
 
@@ -275,9 +277,9 @@ declare <vscale x 64 x i1> @llvm.riscv.vmsif.mask.nxv64i1(
   <vscale x 64 x i1>,
   <vscale x 64 x i1>,
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmsif_mask_m_nxv64i1_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, <vscale x 64 x i1> %2, i32 %3) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmsif_mask_m_nxv64i1_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, <vscale x 64 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsif_mask_m_nxv64i1_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -291,6 +293,6 @@ entry:
     <vscale x 64 x i1> %0,
     <vscale x 64 x i1> %1,
     <vscale x 64 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 64 x i1> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll
deleted file mode 100644
index 8fba91102f814..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv64.ll
+++ /dev/null
@@ -1,296 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i1> @llvm.riscv.vmsof.nxv1i1(
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmsof_m_nxv1i1(<vscale x 1 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_m_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmsof.m v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmsof.nxv1i1(
-    <vscale x 1 x i1> %0,
-    i64 %1)
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 1 x i1> @llvm.riscv.vmsof.mask.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmsof_mask_m_nxv1i1_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv1i1_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsof.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmsof.mask.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3)
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmsof.nxv2i1(
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmsof_m_nxv2i1(<vscale x 2 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_m_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmsof.m v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmsof.nxv2i1(
-    <vscale x 2 x i1> %0,
-    i64 %1)
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmsof.mask.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmsof_mask_m_nxv2i1_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv2i1_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsof.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmsof.mask.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3)
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmsof.nxv4i1(
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmsof_m_nxv4i1(<vscale x 4 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_m_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmsof.m v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmsof.nxv4i1(
-    <vscale x 4 x i1> %0,
-    i64 %1)
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmsof.mask.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmsof_mask_m_nxv4i1_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv4i1_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsof.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmsof.mask.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3)
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmsof.nxv8i1(
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmsof_m_nxv8i1(<vscale x 8 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_m_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmsof.m v8, v0
-; CHECK-NEXT:    vmv.v.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmsof.nxv8i1(
-    <vscale x 8 x i1> %0,
-    i64 %1)
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmsof.mask.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmsof_mask_m_nxv8i1_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv8i1_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsof.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmsof.mask.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3)
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmsof.nxv16i1(
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmsof_m_nxv16i1(<vscale x 16 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_m_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmsof.m v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmsof.nxv16i1(
-    <vscale x 16 x i1> %0,
-    i64 %1)
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmsof.mask.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmsof_mask_m_nxv16i1_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv16i1_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsof.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmsof.mask.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3)
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmsof.nxv32i1(
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmsof_m_nxv32i1(<vscale x 32 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_m_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmsof.m v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmsof.nxv32i1(
-    <vscale x 32 x i1> %0,
-    i64 %1)
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmsof.mask.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmsof_mask_m_nxv32i1_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv32i1_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsof.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmsof.mask.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3)
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmsof.nxv64i1(
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmsof_m_nxv64i1(<vscale x 64 x i1> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_m_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmsof.m v8, v0
-; CHECK-NEXT:    vmv1r.v v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmsof.nxv64i1(
-    <vscale x 64 x i1> %0,
-    i64 %1)
-  ret <vscale x 64 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmsof.mask.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmsof_mask_m_nxv64i1_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, <vscale x 64 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv64i1_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vmv1r.v v10, v0
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, tu, mu
-; CHECK-NEXT:    vmv1r.v v0, v9
-; CHECK-NEXT:    vmsof.m v10, v8, v0.t
-; CHECK-NEXT:    vmv1r.v v0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmsof.mask.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    <vscale x 64 x i1> %2,
-    i64 %3)
-  ret <vscale x 64 x i1> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll
similarity index 89%
rename from llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vmsof.ll
index b5db2a4b284ad..290206244f3e1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmsof-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmsof.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmsof.nxv1i1(
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmsof_m_nxv1i1(<vscale x 1 x i1> %0, i32 %1) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmsof_m_nxv1i1(<vscale x 1 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_m_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x i1> @intrinsic_vmsof_m_nxv1i1(<vscale x 1 x i1> %0, i32 %1)
 entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmsof.nxv1i1(
     <vscale x 1 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 1 x i1> %a
 }
 
@@ -23,9 +25,9 @@ declare <vscale x 1 x i1> @llvm.riscv.vmsof.mask.nxv1i1(
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmsof_mask_m_nxv1i1_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmsof_mask_m_nxv1i1_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv1i1_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -39,15 +41,15 @@ entry:
     <vscale x 1 x i1> %0,
     <vscale x 1 x i1> %1,
     <vscale x 1 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 1 x i1> %a
 }
 
 declare <vscale x 2 x i1> @llvm.riscv.vmsof.nxv2i1(
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmsof_m_nxv2i1(<vscale x 2 x i1> %0, i32 %1) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmsof_m_nxv2i1(<vscale x 2 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_m_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x i1> @intrinsic_vmsof_m_nxv2i1(<vscale x 2 x i1> %0, i32 %1)
 entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmsof.nxv2i1(
     <vscale x 2 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 2 x i1> %a
 }
 
@@ -65,9 +67,9 @@ declare <vscale x 2 x i1> @llvm.riscv.vmsof.mask.nxv2i1(
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmsof_mask_m_nxv2i1_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmsof_mask_m_nxv2i1_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv2i1_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -81,15 +83,15 @@ entry:
     <vscale x 2 x i1> %0,
     <vscale x 2 x i1> %1,
     <vscale x 2 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 2 x i1> %a
 }
 
 declare <vscale x 4 x i1> @llvm.riscv.vmsof.nxv4i1(
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmsof_m_nxv4i1(<vscale x 4 x i1> %0, i32 %1) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmsof_m_nxv4i1(<vscale x 4 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_m_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x i1> @intrinsic_vmsof_m_nxv4i1(<vscale x 4 x i1> %0, i32 %1)
 entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmsof.nxv4i1(
     <vscale x 4 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 4 x i1> %a
 }
 
@@ -107,9 +109,9 @@ declare <vscale x 4 x i1> @llvm.riscv.vmsof.mask.nxv4i1(
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmsof_mask_m_nxv4i1_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmsof_mask_m_nxv4i1_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv4i1_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -123,15 +125,15 @@ entry:
     <vscale x 4 x i1> %0,
     <vscale x 4 x i1> %1,
     <vscale x 4 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 4 x i1> %a
 }
 
 declare <vscale x 8 x i1> @llvm.riscv.vmsof.nxv8i1(
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmsof_m_nxv8i1(<vscale x 8 x i1> %0, i32 %1) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmsof_m_nxv8i1(<vscale x 8 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_m_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x i1> @intrinsic_vmsof_m_nxv8i1(<vscale x 8 x i1> %0, i32 %1)
 entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmsof.nxv8i1(
     <vscale x 8 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 8 x i1> %a
 }
 
@@ -149,9 +151,9 @@ declare <vscale x 8 x i1> @llvm.riscv.vmsof.mask.nxv8i1(
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmsof_mask_m_nxv8i1_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmsof_mask_m_nxv8i1_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv8i1_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -165,15 +167,15 @@ entry:
     <vscale x 8 x i1> %0,
     <vscale x 8 x i1> %1,
     <vscale x 8 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 8 x i1> %a
 }
 
 declare <vscale x 16 x i1> @llvm.riscv.vmsof.nxv16i1(
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmsof_m_nxv16i1(<vscale x 16 x i1> %0, i32 %1) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmsof_m_nxv16i1(<vscale x 16 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_m_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x i1> @intrinsic_vmsof_m_nxv16i1(<vscale x 16 x i1> %0, i32
 entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmsof.nxv16i1(
     <vscale x 16 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 16 x i1> %a
 }
 
@@ -191,9 +193,9 @@ declare <vscale x 16 x i1> @llvm.riscv.vmsof.mask.nxv16i1(
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmsof_mask_m_nxv16i1_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmsof_mask_m_nxv16i1_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv16i1_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -207,15 +209,15 @@ entry:
     <vscale x 16 x i1> %0,
     <vscale x 16 x i1> %1,
     <vscale x 16 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 16 x i1> %a
 }
 
 declare <vscale x 32 x i1> @llvm.riscv.vmsof.nxv32i1(
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmsof_m_nxv32i1(<vscale x 32 x i1> %0, i32 %1) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmsof_m_nxv32i1(<vscale x 32 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_m_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 32 x i1> @intrinsic_vmsof_m_nxv32i1(<vscale x 32 x i1> %0, i32
 entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmsof.nxv32i1(
     <vscale x 32 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 32 x i1> %a
 }
 
@@ -233,9 +235,9 @@ declare <vscale x 32 x i1> @llvm.riscv.vmsof.mask.nxv32i1(
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmsof_mask_m_nxv32i1_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmsof_mask_m_nxv32i1_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv32i1_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -249,15 +251,15 @@ entry:
     <vscale x 32 x i1> %0,
     <vscale x 32 x i1> %1,
     <vscale x 32 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 32 x i1> %a
 }
 
 declare <vscale x 64 x i1> @llvm.riscv.vmsof.nxv64i1(
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmsof_m_nxv64i1(<vscale x 64 x i1> %0, i32 %1) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmsof_m_nxv64i1(<vscale x 64 x i1> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_m_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 64 x i1> @intrinsic_vmsof_m_nxv64i1(<vscale x 64 x i1> %0, i32
 entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmsof.nxv64i1(
     <vscale x 64 x i1> %0,
-    i32 %1)
+    iXLen %1)
   ret <vscale x 64 x i1> %a
 }
 
@@ -275,9 +277,9 @@ declare <vscale x 64 x i1> @llvm.riscv.vmsof.mask.nxv64i1(
   <vscale x 64 x i1>,
   <vscale x 64 x i1>,
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmsof_mask_m_nxv64i1_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, <vscale x 64 x i1> %2, i32 %3) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmsof_mask_m_nxv64i1_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, <vscale x 64 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vmsof_mask_m_nxv64i1_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vmv1r.v v10, v0
@@ -291,6 +293,6 @@ entry:
     <vscale x 64 x i1> %0,
     <vscale x 64 x i1> %1,
     <vscale x 64 x i1> %2,
-    i32 %3)
+    iXLen %3)
   ret <vscale x 64 x i1> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll
deleted file mode 100644
index eb31e34b9c4be..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv64.ll
+++ /dev/null
@@ -1,142 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i1> @llvm.riscv.vmxnor.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmxnor_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxnor_mm_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmxnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmxnor.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmxnor.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmxnor_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxnor_mm_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmxnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmxnor.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmxnor.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmxnor_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxnor_mm_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmxnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmxnor.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmxnor.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmxnor_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxnor_mm_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmxnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmxnor.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmxnor.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmxnor_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxnor_mm_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmxnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmxnor.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmxnor.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmxnor_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxnor_mm_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmxnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmxnor.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmxnor.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmxnor_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxnor_mm_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmxnor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmxnor.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 64 x i1> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmxnor.ll
similarity index 82%
rename from llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vmxnor.ll
index 82759bc9ea0b5..fbadb42494837 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmxnor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmxnor.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmxnor.nxv1i1(
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmxnor_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmxnor_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmxnor.nxv1i1(
     <vscale x 1 x i1> %0,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i1> %a
 }
@@ -24,9 +26,9 @@ entry:
 declare <vscale x 2 x i1> @llvm.riscv.vmxnor.nxv2i1(
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmxnor_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmxnor_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -36,7 +38,7 @@ entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmxnor.nxv2i1(
     <vscale x 2 x i1> %0,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i1> %a
 }
@@ -44,9 +46,9 @@ entry:
 declare <vscale x 4 x i1> @llvm.riscv.vmxnor.nxv4i1(
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmxnor_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmxnor_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -56,7 +58,7 @@ entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmxnor.nxv4i1(
     <vscale x 4 x i1> %0,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i1> %a
 }
@@ -64,9 +66,9 @@ entry:
 declare <vscale x 8 x i1> @llvm.riscv.vmxnor.nxv8i1(
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmxnor_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmxnor_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -76,7 +78,7 @@ entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmxnor.nxv8i1(
     <vscale x 8 x i1> %0,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i1> %a
 }
@@ -84,9 +86,9 @@ entry:
 declare <vscale x 16 x i1> @llvm.riscv.vmxnor.nxv16i1(
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmxnor_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmxnor_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -96,7 +98,7 @@ entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmxnor.nxv16i1(
     <vscale x 16 x i1> %0,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i1> %a
 }
@@ -104,9 +106,9 @@ entry:
 declare <vscale x 32 x i1> @llvm.riscv.vmxnor.nxv32i1(
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmxnor_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmxnor_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -116,7 +118,7 @@ entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmxnor.nxv32i1(
     <vscale x 32 x i1> %0,
     <vscale x 32 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x i1> %a
 }
@@ -124,9 +126,9 @@ entry:
 declare <vscale x 64 x i1> @llvm.riscv.vmxnor.nxv64i1(
   <vscale x 64 x i1>,
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmxnor_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i32 %2) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmxnor_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxnor_mm_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
@@ -136,7 +138,7 @@ entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmxnor.nxv64i1(
     <vscale x 64 x i1> %0,
     <vscale x 64 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 64 x i1> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll
deleted file mode 100644
index 0e8d66b558315..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv64.ll
+++ /dev/null
@@ -1,142 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i1> @llvm.riscv.vmxor.nxv1i1(
-  <vscale x 1 x i1>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i1> @intrinsic_vmxor_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxor_mm_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vmxor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmxor.nxv1i1(
-    <vscale x 1 x i1> %0,
-    <vscale x 1 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 1 x i1> %a
-}
-
-declare <vscale x 2 x i1> @llvm.riscv.vmxor.nxv2i1(
-  <vscale x 2 x i1>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i1> @intrinsic_vmxor_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxor_mm_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vmxor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i1> @llvm.riscv.vmxor.nxv2i1(
-    <vscale x 2 x i1> %0,
-    <vscale x 2 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 2 x i1> %a
-}
-
-declare <vscale x 4 x i1> @llvm.riscv.vmxor.nxv4i1(
-  <vscale x 4 x i1>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i1> @intrinsic_vmxor_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxor_mm_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vmxor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i1> @llvm.riscv.vmxor.nxv4i1(
-    <vscale x 4 x i1> %0,
-    <vscale x 4 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 4 x i1> %a
-}
-
-declare <vscale x 8 x i1> @llvm.riscv.vmxor.nxv8i1(
-  <vscale x 8 x i1>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i1> @intrinsic_vmxor_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxor_mm_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vmxor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i1> @llvm.riscv.vmxor.nxv8i1(
-    <vscale x 8 x i1> %0,
-    <vscale x 8 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 8 x i1> %a
-}
-
-declare <vscale x 16 x i1> @llvm.riscv.vmxor.nxv16i1(
-  <vscale x 16 x i1>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i1> @intrinsic_vmxor_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxor_mm_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vmxor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i1> @llvm.riscv.vmxor.nxv16i1(
-    <vscale x 16 x i1> %0,
-    <vscale x 16 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 16 x i1> %a
-}
-
-declare <vscale x 32 x i1> @llvm.riscv.vmxor.nxv32i1(
-  <vscale x 32 x i1>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i1> @intrinsic_vmxor_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxor_mm_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vmxor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i1> @llvm.riscv.vmxor.nxv32i1(
-    <vscale x 32 x i1> %0,
-    <vscale x 32 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 32 x i1> %a
-}
-
-declare <vscale x 64 x i1> @llvm.riscv.vmxor.nxv64i1(
-  <vscale x 64 x i1>,
-  <vscale x 64 x i1>,
-  i64);
-
-define <vscale x 64 x i1> @intrinsic_vmxor_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vmxor_mm_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
-; CHECK-NEXT:    vmxor.mm v0, v0, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 64 x i1> @llvm.riscv.vmxor.nxv64i1(
-    <vscale x 64 x i1> %0,
-    <vscale x 64 x i1> %1,
-    i64 %2)
-
-  ret <vscale x 64 x i1> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmxor.ll
similarity index 82%
rename from llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vmxor.ll
index 15cb88f17599a..4a1900ed1538f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmxor-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmxor.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 declare <vscale x 1 x i1> @llvm.riscv.vmxor.nxv1i1(
   <vscale x 1 x i1>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i1> @intrinsic_vmxor_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
+define <vscale x 1 x i1> @intrinsic_vmxor_mm_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxor_mm_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmxor.nxv1i1(
     <vscale x 1 x i1> %0,
     <vscale x 1 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x i1> %a
 }
@@ -24,9 +26,9 @@ entry:
 declare <vscale x 2 x i1> @llvm.riscv.vmxor.nxv2i1(
   <vscale x 2 x i1>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i1> @intrinsic_vmxor_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
+define <vscale x 2 x i1> @intrinsic_vmxor_mm_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxor_mm_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -36,7 +38,7 @@ entry:
   %a = call <vscale x 2 x i1> @llvm.riscv.vmxor.nxv2i1(
     <vscale x 2 x i1> %0,
     <vscale x 2 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x i1> %a
 }
@@ -44,9 +46,9 @@ entry:
 declare <vscale x 4 x i1> @llvm.riscv.vmxor.nxv4i1(
   <vscale x 4 x i1>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i1> @intrinsic_vmxor_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
+define <vscale x 4 x i1> @intrinsic_vmxor_mm_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxor_mm_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -56,7 +58,7 @@ entry:
   %a = call <vscale x 4 x i1> @llvm.riscv.vmxor.nxv4i1(
     <vscale x 4 x i1> %0,
     <vscale x 4 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x i1> %a
 }
@@ -64,9 +66,9 @@ entry:
 declare <vscale x 8 x i1> @llvm.riscv.vmxor.nxv8i1(
   <vscale x 8 x i1>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i1> @intrinsic_vmxor_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
+define <vscale x 8 x i1> @intrinsic_vmxor_mm_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxor_mm_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -76,7 +78,7 @@ entry:
   %a = call <vscale x 8 x i1> @llvm.riscv.vmxor.nxv8i1(
     <vscale x 8 x i1> %0,
     <vscale x 8 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x i1> %a
 }
@@ -84,9 +86,9 @@ entry:
 declare <vscale x 16 x i1> @llvm.riscv.vmxor.nxv16i1(
   <vscale x 16 x i1>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i1> @intrinsic_vmxor_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
+define <vscale x 16 x i1> @intrinsic_vmxor_mm_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxor_mm_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -96,7 +98,7 @@ entry:
   %a = call <vscale x 16 x i1> @llvm.riscv.vmxor.nxv16i1(
     <vscale x 16 x i1> %0,
     <vscale x 16 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x i1> %a
 }
@@ -104,9 +106,9 @@ entry:
 declare <vscale x 32 x i1> @llvm.riscv.vmxor.nxv32i1(
   <vscale x 32 x i1>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i1> @intrinsic_vmxor_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
+define <vscale x 32 x i1> @intrinsic_vmxor_mm_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxor_mm_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -116,7 +118,7 @@ entry:
   %a = call <vscale x 32 x i1> @llvm.riscv.vmxor.nxv32i1(
     <vscale x 32 x i1> %0,
     <vscale x 32 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x i1> %a
 }
@@ -124,9 +126,9 @@ entry:
 declare <vscale x 64 x i1> @llvm.riscv.vmxor.nxv64i1(
   <vscale x 64 x i1>,
   <vscale x 64 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 64 x i1> @intrinsic_vmxor_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, i32 %2) nounwind {
+define <vscale x 64 x i1> @intrinsic_vmxor_mm_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vmxor_mm_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, mu
@@ -136,7 +138,7 @@ entry:
   %a = call <vscale x 64 x i1> @llvm.riscv.vmxor.nxv64i1(
     <vscale x 64 x i1> %0,
     <vscale x 64 x i1> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 64 x i1> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll
deleted file mode 100644
index 7fd84ccf35efa..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vsm-rv64.ll
+++ /dev/null
@@ -1,137 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-
-declare void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>*, i64);
-
-define void @intrinsic_vsm_v_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1>* %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vsm_v_nxv1i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
-; CHECK-NEXT:    vsm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1>* %1, i64 %2)
-  ret void
-}
-
-declare void @llvm.riscv.vsm.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>*, i64);
-
-define void @intrinsic_vsm_v_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1>* %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vsm_v_nxv2i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
-; CHECK-NEXT:    vsm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  call void @llvm.riscv.vsm.nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1>* %1, i64 %2)
-  ret void
-}
-
-declare void @llvm.riscv.vsm.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>*, i64);
-
-define void @intrinsic_vsm_v_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1>* %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vsm_v_nxv4i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu
-; CHECK-NEXT:    vsm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  call void @llvm.riscv.vsm.nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1>* %1, i64 %2)
-  ret void
-}
-
-declare void @llvm.riscv.vsm.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>*, i64);
-
-define void @intrinsic_vsm_v_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1>* %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vsm_v_nxv8i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
-; CHECK-NEXT:    vsm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  call void @llvm.riscv.vsm.nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1>* %1, i64 %2)
-  ret void
-}
-
-declare void @llvm.riscv.vsm.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>*, i64);
-
-define void @intrinsic_vsm_v_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1>* %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vsm_v_nxv16i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
-; CHECK-NEXT:    vsm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  call void @llvm.riscv.vsm.nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1>* %1, i64 %2)
-  ret void
-}
-
-declare void @llvm.riscv.vsm.nxv32i1(<vscale x 32 x i1>, <vscale x 32 x i1>*, i64);
-
-define void @intrinsic_vsm_v_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1>* %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vsm_v_nxv32i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
-; CHECK-NEXT:    vsm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  call void @llvm.riscv.vsm.nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1>* %1, i64 %2)
-  ret void
-}
-
-declare void @llvm.riscv.vsm.nxv64i1(<vscale x 64 x i1>, <vscale x 64 x i1>*, i64);
-
-define void @intrinsic_vsm_v_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1>* %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vsm_v_nxv64i1:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, mu
-; CHECK-NEXT:    vsm.v v0, (a0)
-; CHECK-NEXT:    ret
-entry:
-  call void @llvm.riscv.vsm.nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1>* %1, i64 %2)
-  ret void
-}
-
-declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16(
-  <vscale x 1 x i16>,
-  <vscale x 1 x i16>,
-  i64);
-
-; Make sure we can use the vsetvli from the producing instruction.
-define void @test_vsetvli_i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1>* %2, i64 %3) nounwind {
-; CHECK-LABEL: test_vsetvli_i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vmseq.vv v8, v8, v9
-; CHECK-NEXT:    vsm.v v8, (a0)
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16(
-    <vscale x 1 x i16> %0,
-    <vscale x 1 x i16> %1,
-    i64 %3)
-  call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1>* %2, i64 %3)
-  ret void
-}
-
-declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32(
-  <vscale x 1 x i32>,
-  <vscale x 1 x i32>,
-  i64);
-
-define void @test_vsetvli_i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1>* %2, i64 %3) nounwind {
-; CHECK-LABEL: test_vsetvli_i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vmseq.vv v8, v8, v9
-; CHECK-NEXT:    vsm.v v8, (a0)
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x i32> %1,
-    i64 %3)
-  call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1>* %2, i64 %3)
-  ret void
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsm.ll
similarity index 79%
rename from llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vsm.ll
index 3285cdbfe2741..acbaa7a6d84b8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsm-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsm.ll
@@ -1,105 +1,107 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s
 
-declare void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>*, i32);
+declare void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>*, iXLen);
 
-define void @intrinsic_vsm_v_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1>* %1, i32 %2) nounwind {
+define void @intrinsic_vsm_v_nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1>* %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsm_v_nxv1i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, ta, mu
 ; CHECK-NEXT:    vsm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1>* %1, i32 %2)
+  call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %0, <vscale x 1 x i1>* %1, iXLen %2)
   ret void
 }
 
-declare void @llvm.riscv.vsm.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>*, i32);
+declare void @llvm.riscv.vsm.nxv2i1(<vscale x 2 x i1>, <vscale x 2 x i1>*, iXLen);
 
-define void @intrinsic_vsm_v_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1>* %1, i32 %2) nounwind {
+define void @intrinsic_vsm_v_nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1>* %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsm_v_nxv2i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, ta, mu
 ; CHECK-NEXT:    vsm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  call void @llvm.riscv.vsm.nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1>* %1, i32 %2)
+  call void @llvm.riscv.vsm.nxv2i1(<vscale x 2 x i1> %0, <vscale x 2 x i1>* %1, iXLen %2)
   ret void
 }
 
-declare void @llvm.riscv.vsm.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>*, i32);
+declare void @llvm.riscv.vsm.nxv4i1(<vscale x 4 x i1>, <vscale x 4 x i1>*, iXLen);
 
-define void @intrinsic_vsm_v_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1>* %1, i32 %2) nounwind {
+define void @intrinsic_vsm_v_nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1>* %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsm_v_nxv4i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, ta, mu
 ; CHECK-NEXT:    vsm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  call void @llvm.riscv.vsm.nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1>* %1, i32 %2)
+  call void @llvm.riscv.vsm.nxv4i1(<vscale x 4 x i1> %0, <vscale x 4 x i1>* %1, iXLen %2)
   ret void
 }
 
-declare void @llvm.riscv.vsm.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>*, i32);
+declare void @llvm.riscv.vsm.nxv8i1(<vscale x 8 x i1>, <vscale x 8 x i1>*, iXLen);
 
-define void @intrinsic_vsm_v_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1>* %1, i32 %2) nounwind {
+define void @intrinsic_vsm_v_nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1>* %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsm_v_nxv8i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
 ; CHECK-NEXT:    vsm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  call void @llvm.riscv.vsm.nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1>* %1, i32 %2)
+  call void @llvm.riscv.vsm.nxv8i1(<vscale x 8 x i1> %0, <vscale x 8 x i1>* %1, iXLen %2)
   ret void
 }
 
-declare void @llvm.riscv.vsm.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>*, i32);
+declare void @llvm.riscv.vsm.nxv16i1(<vscale x 16 x i1>, <vscale x 16 x i1>*, iXLen);
 
-define void @intrinsic_vsm_v_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1>* %1, i32 %2) nounwind {
+define void @intrinsic_vsm_v_nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1>* %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsm_v_nxv16i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
 ; CHECK-NEXT:    vsm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  call void @llvm.riscv.vsm.nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1>* %1, i32 %2)
+  call void @llvm.riscv.vsm.nxv16i1(<vscale x 16 x i1> %0, <vscale x 16 x i1>* %1, iXLen %2)
   ret void
 }
 
-declare void @llvm.riscv.vsm.nxv32i1(<vscale x 32 x i1>, <vscale x 32 x i1>*, i32);
+declare void @llvm.riscv.vsm.nxv32i1(<vscale x 32 x i1>, <vscale x 32 x i1>*, iXLen);
 
-define void @intrinsic_vsm_v_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1>* %1, i32 %2) nounwind {
+define void @intrinsic_vsm_v_nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1>* %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsm_v_nxv32i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vsm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  call void @llvm.riscv.vsm.nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1>* %1, i32 %2)
+  call void @llvm.riscv.vsm.nxv32i1(<vscale x 32 x i1> %0, <vscale x 32 x i1>* %1, iXLen %2)
   ret void
 }
 
-declare void @llvm.riscv.vsm.nxv64i1(<vscale x 64 x i1>, <vscale x 64 x i1>*, i32);
+declare void @llvm.riscv.vsm.nxv64i1(<vscale x 64 x i1>, <vscale x 64 x i1>*, iXLen);
 
-define void @intrinsic_vsm_v_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1>* %1, i32 %2) nounwind {
+define void @intrinsic_vsm_v_nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1>* %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vsm_v_nxv64i1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, mu
 ; CHECK-NEXT:    vsm.v v0, (a0)
 ; CHECK-NEXT:    ret
 entry:
-  call void @llvm.riscv.vsm.nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1>* %1, i32 %2)
+  call void @llvm.riscv.vsm.nxv64i1(<vscale x 64 x i1> %0, <vscale x 64 x i1>* %1, iXLen %2)
   ret void
 }
 
 declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16(
   <vscale x 1 x i16>,
   <vscale x 1 x i16>,
-  i32);
+  iXLen);
 
 ; Make sure we can use the vsetvli from the producing instruction.
-define void @test_vsetvli_i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1>* %2, i32 %3) nounwind {
+define void @test_vsetvli_i16(<vscale x 1 x i16> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1>* %2, iXLen %3) nounwind {
 ; CHECK-LABEL: test_vsetvli_i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
@@ -110,17 +112,17 @@ entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i16(
     <vscale x 1 x i16> %0,
     <vscale x 1 x i16> %1,
-    i32 %3)
-  call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1>* %2, i32 %3)
+    iXLen %3)
+  call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1>* %2, iXLen %3)
   ret void
 }
 
 declare <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32(
   <vscale x 1 x i32>,
   <vscale x 1 x i32>,
-  i32);
+  iXLen);
 
-define void @test_vsetvli_i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1>* %2, i32 %3) nounwind {
+define void @test_vsetvli_i32(<vscale x 1 x i32> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1>* %2, iXLen %3) nounwind {
 ; CHECK-LABEL: test_vsetvli_i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
@@ -131,7 +133,7 @@ entry:
   %a = call <vscale x 1 x i1> @llvm.riscv.vmseq.nxv1i32(
     <vscale x 1 x i32> %0,
     <vscale x 1 x i32> %1,
-    i32 %3)
-  call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1>* %2, i32 %3)
+    iXLen %3)
+  call void @llvm.riscv.vsm.nxv1i1(<vscale x 1 x i1> %a, <vscale x 1 x i1>* %2, iXLen %3)
   ret void
 }

From be6070c290e23d659c6374284a632442e2360967 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sat, 22 Jan 2022 13:41:30 -0800
Subject: [PATCH 270/946] [RISCV] Use FP ABI for some RVV intrinsic tests. NFC

Removes moves from GPR to FPR and improves f64 tests on RV32.

Differential Revision: https://reviews.llvm.org/D117969
---
 llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll     | 184 ++++++------------
 llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll     | 152 ++++++---------
 llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll     | 184 ++++++------------
 llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll     | 152 ++++++---------
 llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll    | 146 +++++---------
 llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll    | 122 +++++-------
 llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll    | 146 +++++---------
 llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll    | 122 +++++-------
 llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll     | 184 ++++++------------
 llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll     | 152 ++++++---------
 llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll   |  93 +++------
 llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll   |  77 +++-----
 llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll     | 184 ++++++------------
 llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll     | 152 ++++++---------
 llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll    | 146 +++++---------
 llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll    | 122 +++++-------
 llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll    | 146 +++++---------
 llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll    | 122 +++++-------
 llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll     | 184 ++++++------------
 llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll     | 152 ++++++---------
 llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll   | 146 +++++---------
 llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll   | 122 +++++-------
 llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll   | 146 +++++---------
 llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll   | 122 +++++-------
 llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll   | 146 +++++---------
 llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll   | 122 +++++-------
 llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll   | 146 +++++---------
 llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll   | 122 +++++-------
 llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll    | 184 ++++++------------
 llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll    | 152 ++++++---------
 llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll    | 184 ++++++------------
 llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll    | 152 ++++++---------
 llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll    | 184 ++++++------------
 llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll    | 152 ++++++---------
 llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll   | 184 ++++++------------
 llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll   | 152 ++++++---------
 llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll   | 184 ++++++------------
 llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll   | 152 ++++++---------
 .../CodeGen/RISCV/rvv/vfslide1down-rv32.ll    | 184 ++++++------------
 .../CodeGen/RISCV/rvv/vfslide1down-rv64.ll    | 152 ++++++---------
 .../test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll | 184 ++++++------------
 .../test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll | 152 ++++++---------
 llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll     | 184 ++++++------------
 llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll     | 152 ++++++---------
 llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll    |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll    |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll  | 137 ++++++-------
 llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll  | 137 ++++++-------
 llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll   |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll   |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll   |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll   |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll    |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll    |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll  |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll  |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll  |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll  |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll    |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll    |  92 ++++-----
 llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll  | 137 ++++++-------
 llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll  | 137 ++++++-------
 62 files changed, 3170 insertions(+), 5348 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll
index cfdeba067718a..5df1881bffb23 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfadd.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfadd.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfadd.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfadd.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfadd.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfadd.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfadd.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfadd.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfadd.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfadd.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfadd.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfadd.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfadd.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfadd.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfadd.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfadd.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfadd.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfadd.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfadd.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfadd.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfadd.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfadd.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfadd.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfadd.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfadd.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfadd.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfadd.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfadd.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfadd.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfadd.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfadd.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfadd.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfadd.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfadd.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfadd.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfadd.mask.nxv16f32.f32(
@@ -1204,13 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.f64(
@@ -1232,13 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfadd.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfadd.mask.nxv1f64.f64(
@@ -1259,13 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfadd.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfadd.nxv2f64.f64(
@@ -1287,13 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfadd.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v10, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfadd.mask.nxv2f64.f64(
@@ -1314,13 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.f64(
@@ -1342,13 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfadd.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v12, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfadd.mask.nxv4f64.f64(
@@ -1369,13 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfadd.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfadd.nxv8f64.f64(
@@ -1397,13 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfadd.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v16, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfadd.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll
index 8e43295bf7ee3..b91d86befbc87 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \
 ; RUN:   -mattr=+d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -688,9 +688,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.f16(
@@ -712,9 +711,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfadd.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfadd.mask.nxv1f16.f16(
@@ -735,9 +733,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfadd.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfadd.nxv2f16.f16(
@@ -759,9 +756,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfadd.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfadd.mask.nxv2f16.f16(
@@ -782,9 +778,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfadd.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfadd.nxv4f16.f16(
@@ -806,9 +801,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfadd.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfadd.mask.nxv4f16.f16(
@@ -829,9 +823,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfadd.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfadd.nxv8f16.f16(
@@ -853,9 +846,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfadd.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfadd.mask.nxv8f16.f16(
@@ -876,9 +868,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfadd.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfadd.nxv16f16.f16(
@@ -900,9 +891,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfadd.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfadd.mask.nxv16f16.f16(
@@ -923,9 +913,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfadd.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfadd.nxv32f16.f16(
@@ -947,9 +936,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfadd.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfadd.mask.nxv32f16.f16(
@@ -970,9 +958,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.f32(
@@ -994,9 +981,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfadd.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfadd.mask.nxv1f32.f32(
@@ -1017,9 +1003,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.f32(
@@ -1041,9 +1026,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfadd.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfadd.mask.nxv2f32.f32(
@@ -1064,9 +1048,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.f32(
@@ -1088,9 +1071,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfadd.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfadd.mask.nxv4f32.f32(
@@ -1111,9 +1093,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfadd.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfadd.nxv8f32.f32(
@@ -1135,9 +1116,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfadd.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfadd.mask.nxv8f32.f32(
@@ -1158,9 +1138,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfadd.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfadd.nxv16f32.f32(
@@ -1182,9 +1161,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfadd.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfadd.mask.nxv16f32.f32(
@@ -1205,9 +1183,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.f64(
@@ -1229,9 +1206,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfadd.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfadd.mask.nxv1f64.f64(
@@ -1252,9 +1228,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfadd.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfadd.nxv2f64.f64(
@@ -1276,9 +1251,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfadd.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfadd.mask.nxv2f64.f64(
@@ -1299,9 +1273,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.f64(
@@ -1323,9 +1296,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfadd.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfadd.mask.nxv4f64.f64(
@@ -1346,9 +1318,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfadd.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfadd.nxv8f64.f64(
@@ -1370,9 +1341,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfadd.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfadd.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfadd.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll
index 2f2bc40a0e6fc..01bfb50ed9b49 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfdiv_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfdiv.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfdiv_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfdiv.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfdiv.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfdiv_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfdiv.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfdiv.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfdiv_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfdiv.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfdiv.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfdiv_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfdiv.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfdiv.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfdiv_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfdiv.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfdiv.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfdiv_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfdiv.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfdiv.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfdiv_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfdiv.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfdiv.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfdiv_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfdiv.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfdiv.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfdiv_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfdiv.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfdiv.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfdiv_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfdiv.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfdiv.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfdiv_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfdiv.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfdiv.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfdiv_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfdiv.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfdiv.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfdiv_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfdiv.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfdiv.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfdiv_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfdiv.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfdiv.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfdiv_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfdiv.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfdiv_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfdiv.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfdiv_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfdiv.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfdiv.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfdiv_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfdiv.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfdiv.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfdiv_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfdiv.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfdiv.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfdiv_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfdiv.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfdiv.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfdiv_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfdiv.mask.nxv16f32.f32(
@@ -1204,13 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfdiv.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfdiv_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfdiv.nxv1f64.f64(
@@ -1232,13 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfdiv.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfdiv_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfdiv.mask.nxv1f64.f64(
@@ -1259,13 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfdiv.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfdiv_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfdiv.nxv2f64.f64(
@@ -1287,13 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfdiv.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfdiv_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v10, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfdiv.mask.nxv2f64.f64(
@@ -1314,13 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfdiv.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfdiv_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfdiv.nxv4f64.f64(
@@ -1342,13 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfdiv.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfdiv_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v12, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfdiv.mask.nxv4f64.f64(
@@ -1369,13 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfdiv.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfdiv_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfdiv.nxv8f64.f64(
@@ -1397,13 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfdiv.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfdiv_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v16, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfdiv.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll
index dc6760900bafb..2d4a16e1bf4e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfdiv_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfdiv.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfdiv_mask_vf_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfdiv.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfdiv.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfdiv_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfdiv.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfdiv.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfdiv_mask_vf_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfdiv.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfdiv.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfdiv_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfdiv.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfdiv.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfdiv_mask_vf_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfdiv.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfdiv.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfdiv_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfdiv.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfdiv.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfdiv_mask_vf_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfdiv.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfdiv.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfdiv_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfdiv.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfdiv.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfdiv_mask_vf_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfdiv.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfdiv.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfdiv_vf_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfdiv.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfdiv.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfdiv_mask_vf_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfdiv.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfdiv.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfdiv_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfdiv.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfdiv.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfdiv_mask_vf_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfdiv.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfdiv.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfdiv_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfdiv.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfdiv.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfdiv_mask_vf_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfdiv.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfdiv_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfdiv.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfdiv_mask_vf_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfdiv.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfdiv.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfdiv_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfdiv.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfdiv.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfdiv_mask_vf_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfdiv.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfdiv.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfdiv_vf_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfdiv.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfdiv.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfdiv_mask_vf_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfdiv.mask.nxv16f32.f32(
@@ -1204,9 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfdiv.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfdiv_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfdiv.nxv1f64.f64(
@@ -1228,9 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfdiv.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfdiv_mask_vf_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfdiv.mask.nxv1f64.f64(
@@ -1251,9 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfdiv.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfdiv_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfdiv.nxv2f64.f64(
@@ -1275,9 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfdiv.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfdiv_mask_vf_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfdiv.mask.nxv2f64.f64(
@@ -1298,9 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfdiv.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfdiv_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfdiv.nxv4f64.f64(
@@ -1322,9 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfdiv.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfdiv_mask_vf_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfdiv.mask.nxv4f64.f64(
@@ -1345,9 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfdiv.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfdiv_vf_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfdiv.nxv8f64.f64(
@@ -1369,9 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfdiv.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfdiv_mask_vf_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfdiv.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfdiv.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll
index 207c7ecf5a3db..16d305bad846c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmacc.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmacc.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmacc.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmacc.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmacc.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmacc.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmacc.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmacc.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmacc.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmacc.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmacc.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmacc.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmacc.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmacc.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmacc.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmacc.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmacc.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmacc.mask.nxv8f32.f32(
@@ -994,13 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.f64(
@@ -1022,13 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmacc.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmacc.mask.nxv1f64.f64(
@@ -1050,13 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v10
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.f64(
@@ -1078,13 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmacc.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v10, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmacc.mask.nxv2f64.f64(
@@ -1106,13 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v12
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.f64(
@@ -1134,13 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmacc.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v12, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmacc.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll
index f636fa36eb7c0..c5809888ff17b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmacc.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmacc.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmacc.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmacc.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmacc.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmacc.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmacc.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmacc.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmacc.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmacc.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmacc.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmacc.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmacc.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmacc.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmacc.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmacc.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmacc.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmacc.mask.nxv8f32.f32(
@@ -994,9 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.f64(
@@ -1018,9 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmacc.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmacc.mask.nxv1f64.f64(
@@ -1042,9 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.f64(
@@ -1066,9 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmacc.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmacc.mask.nxv2f64.f64(
@@ -1090,9 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.f64(
@@ -1114,9 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmacc.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmacc.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll
index dd12115fbb95b..cfb32cfab4cdc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmadd.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmadd.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmadd.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmadd.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmadd.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmadd.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmadd.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmadd.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmadd.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmadd.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmadd.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmadd.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmadd.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmadd.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmadd.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmadd.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmadd.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmadd.mask.nxv8f32.f32(
@@ -994,13 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.f64(
@@ -1022,13 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmadd.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmadd.mask.nxv1f64.f64(
@@ -1050,13 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v10
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.f64(
@@ -1078,13 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmadd.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v10, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmadd.mask.nxv2f64.f64(
@@ -1106,13 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v12
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.f64(
@@ -1134,13 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmadd.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v12, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmadd.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll
index ae39ef8dd3260..afd41bd8a2122 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmadd.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmadd.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmadd.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmadd.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmadd.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmadd.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmadd.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmadd.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmadd.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmadd.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmadd.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmadd.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmadd.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmadd.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmadd.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmadd.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmadd.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmadd.mask.nxv8f32.f32(
@@ -994,9 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.f64(
@@ -1018,9 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmadd.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmadd.mask.nxv1f64.f64(
@@ -1042,9 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.f64(
@@ -1066,9 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmadd.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmadd.mask.nxv2f64.f64(
@@ -1090,9 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.f64(
@@ -1114,9 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmadd.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmadd.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmadd.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll
index d6a880c743dd2..98b4cf71da14d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmax.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmax.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmax.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmax.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmax.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmax.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmax.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmax.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmax.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmax.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmax.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmax.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmax.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmax.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmax.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmax.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmax.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmax.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmax.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmax.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmax.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmax.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmax.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmax.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmax.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmax.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmax.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmax.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmax.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmax.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmax.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmax.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmax.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmax.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmax.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmax.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmax.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmax.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmax.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmax.mask.nxv16f32.f32(
@@ -1204,13 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmax.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmax.nxv1f64.f64(
@@ -1232,13 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmax.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmax.mask.nxv1f64.f64(
@@ -1259,13 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmax.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmax.nxv2f64.f64(
@@ -1287,13 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmax.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v10, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmax.mask.nxv2f64.f64(
@@ -1314,13 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmax.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmax.nxv4f64.f64(
@@ -1342,13 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmax.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v12, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmax.mask.nxv4f64.f64(
@@ -1369,13 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmax.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmax.nxv8f64.f64(
@@ -1397,13 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmax.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v16, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmax.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll
index aea557735e932..4fc7319fb2b29 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmax.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmax.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmax.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmax.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmax.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmax.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmax.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmax.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmax.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmax.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmax.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmax.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmax.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmax.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmax.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmax.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmax.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmax.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmax.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmax.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmax.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmax.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmax.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmax.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmax.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmax.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmax.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmax.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmax.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmax.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmax.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmax.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmax.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmax.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmax.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmax.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmax.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmax.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmax.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmax.mask.nxv16f32.f32(
@@ -1204,9 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmax.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmax.nxv1f64.f64(
@@ -1228,9 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmax.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmax.mask.nxv1f64.f64(
@@ -1251,9 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmax.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmax.nxv2f64.f64(
@@ -1275,9 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmax.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmax.mask.nxv2f64.f64(
@@ -1298,9 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmax.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmax.nxv4f64.f64(
@@ -1322,9 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmax.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmax.mask.nxv4f64.f64(
@@ -1345,9 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmax.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmax.nxv8f64.f64(
@@ -1369,9 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmax.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmax.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmax.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll
index c1d91e208aaa1..3dc1240d0f7b6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -32,9 +32,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.f16(
@@ -77,9 +76,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmerge.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmerge.nxv2f16.f16(
@@ -122,9 +120,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmerge.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmerge.nxv4f16.f16(
@@ -167,9 +164,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmerge.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmerge.nxv8f16.f16(
@@ -212,9 +208,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmerge.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmerge.nxv16f16.f16(
@@ -257,9 +252,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmerge.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmerge.nxv32f16.f16(
@@ -302,9 +296,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.f32(
@@ -347,9 +340,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.f32(
@@ -392,9 +384,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.f32(
@@ -437,9 +428,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.f32(
@@ -482,9 +472,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.f32(
@@ -527,13 +516,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.f64(
@@ -576,13 +560,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.f64(
@@ -625,13 +604,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.f64(
@@ -674,13 +648,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll
index cf4ad020e4cf1..b23d908c7edda 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -32,9 +32,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.f16(
@@ -77,9 +76,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmerge.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmerge.nxv2f16.f16(
@@ -122,9 +120,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmerge.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmerge.nxv4f16.f16(
@@ -167,9 +164,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmerge.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmerge.nxv8f16.f16(
@@ -212,9 +208,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmerge.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmerge.nxv16f16.f16(
@@ -257,9 +252,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmerge.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmerge.nxv32f16.f16(
@@ -302,9 +296,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.f32(
@@ -347,9 +340,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.f32(
@@ -392,9 +384,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.f32(
@@ -437,9 +428,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.f32(
@@ -482,9 +472,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.f32(
@@ -527,9 +516,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.f64(
@@ -572,9 +560,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.f64(
@@ -617,9 +604,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.f64(
@@ -662,9 +648,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, ft0, v0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll
index 2b2d954a7cc1d..0861a787440e4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmin.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmin.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmin.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmin.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmin.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmin.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmin.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmin.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmin.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmin.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmin.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmin.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmin.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmin.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmin.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmin.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmin.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmin.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmin.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmin.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmin.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmin.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmin.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmin.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmin.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmin.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmin.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmin.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmin.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmin.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmin.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmin.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmin.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmin.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmin.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmin.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmin.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmin.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmin.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmin.mask.nxv16f32.f32(
@@ -1204,13 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmin.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmin.nxv1f64.f64(
@@ -1232,13 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmin.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmin.mask.nxv1f64.f64(
@@ -1259,13 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmin.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmin.nxv2f64.f64(
@@ -1287,13 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmin.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v10, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmin.mask.nxv2f64.f64(
@@ -1314,13 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmin.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmin.nxv4f64.f64(
@@ -1342,13 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmin.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v12, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmin.mask.nxv4f64.f64(
@@ -1369,13 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmin.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmin.nxv8f64.f64(
@@ -1397,13 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmin.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v16, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmin.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll
index 3f1ffd3e81a64..e647fe51ffb17 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmin.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmin.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmin.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmin.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmin.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmin.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmin.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmin.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmin.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmin.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmin.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmin.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmin.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmin.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmin.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmin.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmin.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmin.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmin.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmin.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmin.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmin.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmin.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmin.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmin.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmin.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmin.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmin.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmin.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmin.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmin.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmin.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmin.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmin.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmin.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmin.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmin.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmin.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmin.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmin.mask.nxv16f32.f32(
@@ -1204,9 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmin.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmin.nxv1f64.f64(
@@ -1228,9 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmin.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmin.mask.nxv1f64.f64(
@@ -1251,9 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmin.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmin.nxv2f64.f64(
@@ -1275,9 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmin.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmin.mask.nxv2f64.f64(
@@ -1298,9 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmin.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmin.nxv4f64.f64(
@@ -1322,9 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmin.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmin.mask.nxv4f64.f64(
@@ -1345,9 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmin.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmin.nxv8f64.f64(
@@ -1369,9 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmin.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmin.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmin.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll
index ff51e6dab20a9..c8407dfe64730 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsac.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmsac.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsac.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmsac.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsac.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmsac.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsac.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmsac.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsac.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmsac.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsac.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmsac.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsac.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmsac.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsac.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmsac.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsac.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmsac.mask.nxv8f32.f32(
@@ -994,13 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.f64(
@@ -1022,13 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsac.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmsac.mask.nxv1f64.f64(
@@ -1050,13 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v10
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.f64(
@@ -1078,13 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsac.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v10, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmsac.mask.nxv2f64.f64(
@@ -1106,13 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v12
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.f64(
@@ -1134,13 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsac.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v12, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmsac.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll
index 08666a5dd51c8..2a5fb2896aa56 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsac.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmsac.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsac.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmsac.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsac.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmsac.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsac.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmsac.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsac.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmsac.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsac.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmsac.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsac.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmsac.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsac.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmsac.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsac.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmsac.mask.nxv8f32.f32(
@@ -994,9 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.f64(
@@ -1018,9 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsac.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmsac.mask.nxv1f64.f64(
@@ -1042,9 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.f64(
@@ -1066,9 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsac.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmsac.mask.nxv2f64.f64(
@@ -1090,9 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.f64(
@@ -1114,9 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsac.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmsac.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll
index 8a10800c748ad..620c3dcb1025a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsub.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmsub.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsub.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmsub.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsub.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmsub.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsub.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmsub.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsub.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmsub.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsub.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmsub.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsub.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmsub.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsub.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmsub.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsub.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmsub.mask.nxv8f32.f32(
@@ -994,13 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.f64(
@@ -1022,13 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsub.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmsub.mask.nxv1f64.f64(
@@ -1050,13 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v10
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.f64(
@@ -1078,13 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsub.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v10, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmsub.mask.nxv2f64.f64(
@@ -1106,13 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v12
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.f64(
@@ -1134,13 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsub.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v12, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmsub.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll
index eb0733b6e5cc0..70efc0da21f5a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsub.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmsub.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsub.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmsub.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsub.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmsub.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsub.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmsub.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsub.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmsub.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsub.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmsub.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsub.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmsub.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsub.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmsub.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsub.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmsub.mask.nxv8f32.f32(
@@ -994,9 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.f64(
@@ -1018,9 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsub.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmsub.mask.nxv1f64.f64(
@@ -1042,9 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.f64(
@@ -1066,9 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsub.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmsub.mask.nxv2f64.f64(
@@ -1090,9 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.f64(
@@ -1114,9 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsub.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmsub.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmsub.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll
index 9cab6de9e0b46..50ebccd92e64c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmul_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmul.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmul_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmul.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmul.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmul_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmul.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmul.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmul_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmul.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmul.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmul_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmul.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmul.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmul_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmul.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmul.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmul_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmul.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmul.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmul_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmul.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmul.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmul_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmul.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmul.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmul_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmul.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmul.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmul_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmul.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmul.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmul_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmul.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmul.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmul_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmul.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmul.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmul_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmul.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmul.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmul_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmul.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmul.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmul_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmul.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmul_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmul.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmul_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmul.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmul.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmul_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmul.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmul.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmul_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmul.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmul.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmul_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmul.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmul.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmul_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmul.mask.nxv16f32.f32(
@@ -1204,13 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmul_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.f64(
@@ -1232,13 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmul.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmul_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmul.mask.nxv1f64.f64(
@@ -1259,13 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmul.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmul_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmul.nxv2f64.f64(
@@ -1287,13 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmul.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmul_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v10, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmul.mask.nxv2f64.f64(
@@ -1314,13 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmul.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmul_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmul.nxv4f64.f64(
@@ -1342,13 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmul.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmul_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v12, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmul.mask.nxv4f64.f64(
@@ -1369,13 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmul.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmul_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmul.nxv8f64.f64(
@@ -1397,13 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmul.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmul_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v16, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmul.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll
index f260499700e6f..08aa64b6de7fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmul_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfmul.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfmul_mask_vf_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmul.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmul.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmul_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmul.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfmul.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfmul_mask_vf_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmul.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmul.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmul_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmul.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfmul.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfmul_mask_vf_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmul.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmul.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmul_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmul.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfmul.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfmul_mask_vf_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmul.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmul.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmul_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmul.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfmul.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfmul_mask_vf_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmul.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmul.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmul_vf_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmul.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfmul.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfmul_mask_vf_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmul.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmul.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmul_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmul.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfmul.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfmul_mask_vf_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmul.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmul.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmul_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmul.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfmul.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfmul_mask_vf_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmul.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmul_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfmul.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfmul_mask_vf_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmul.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmul.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmul_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmul.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfmul.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfmul_mask_vf_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmul.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmul.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmul_vf_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmul.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfmul.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfmul_mask_vf_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmul.mask.nxv16f32.f32(
@@ -1204,9 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmul_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.f64(
@@ -1228,9 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfmul.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfmul_mask_vf_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmul.mask.nxv1f64.f64(
@@ -1251,9 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmul.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmul_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmul.nxv2f64.f64(
@@ -1275,9 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfmul.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfmul_mask_vf_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmul.mask.nxv2f64.f64(
@@ -1298,9 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmul.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmul_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmul.nxv4f64.f64(
@@ -1322,9 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfmul.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfmul_mask_vf_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmul.mask.nxv4f64.f64(
@@ -1345,9 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmul.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmul_vf_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmul.nxv8f64.f64(
@@ -1369,9 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfmul.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfmul_mask_vf_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfmul.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmul.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll
index 956ac22ede348..fa1767202c126 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmacc.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmacc.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmacc.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmacc.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmacc.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmacc.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmacc.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmacc.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmacc.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmacc.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmacc.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmacc.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmacc.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmacc.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmacc.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmacc.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmacc.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmacc.mask.nxv8f32.f32(
@@ -994,13 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.f64(
@@ -1022,13 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmacc.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmacc.mask.nxv1f64.f64(
@@ -1050,13 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v10
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.f64(
@@ -1078,13 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmacc.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v10, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmacc.mask.nxv2f64.f64(
@@ -1106,13 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v12
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.f64(
@@ -1134,13 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmacc.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v12, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmacc.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll
index 6d302b7f49d5c..f8419e81f7d06 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmacc.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmacc.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmacc.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmacc.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmacc.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmacc.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmacc.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmacc.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmacc.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmacc.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmacc.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmacc.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmacc.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmacc.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmacc.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmacc.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmacc.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmacc.mask.nxv8f32.f32(
@@ -994,9 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.f64(
@@ -1018,9 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmacc.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmacc.mask.nxv1f64.f64(
@@ -1042,9 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.f64(
@@ -1066,9 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmacc.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmacc.mask.nxv2f64.f64(
@@ -1090,9 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.f64(
@@ -1114,9 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmacc.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmacc.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll
index 9d5d70498ab3c..e0d33062322b8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmadd.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmadd.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmadd.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmadd.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmadd.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmadd.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmadd.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmadd.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmadd.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmadd.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmadd.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmadd.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmadd.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmadd.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmadd.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmadd.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmadd.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmadd.mask.nxv8f32.f32(
@@ -994,13 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.f64(
@@ -1022,13 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmadd.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmadd.mask.nxv1f64.f64(
@@ -1050,13 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v10
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.f64(
@@ -1078,13 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmadd.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v10, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmadd.mask.nxv2f64.f64(
@@ -1106,13 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v12
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.f64(
@@ -1134,13 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmadd.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v12, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmadd.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll
index 13c9484ad907f..ab407427952ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmadd.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmadd.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmadd.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmadd.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmadd.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmadd.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmadd.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmadd.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmadd.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmadd.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmadd.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmadd.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmadd.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmadd.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmadd.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmadd.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmadd.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmadd.mask.nxv8f32.f32(
@@ -994,9 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.f64(
@@ -1018,9 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmadd.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmadd.mask.nxv1f64.f64(
@@ -1042,9 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.f64(
@@ -1066,9 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmadd.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmadd.mask.nxv2f64.f64(
@@ -1090,9 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.f64(
@@ -1114,9 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmadd.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmadd.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll
index 385b3397ab28b..834938c7d6e74 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsac.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmsac.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsac.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmsac.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsac.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmsac.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsac.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmsac.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsac.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmsac.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsac.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmsac.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsac.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmsac.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsac.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmsac.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsac.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmsac.mask.nxv8f32.f32(
@@ -994,13 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.f64(
@@ -1022,13 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsac.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmsac.mask.nxv1f64.f64(
@@ -1050,13 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v10
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.f64(
@@ -1078,13 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsac.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v10, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmsac.mask.nxv2f64.f64(
@@ -1106,13 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v12
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.f64(
@@ -1134,13 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsac.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v12, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmsac.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll
index 2a7a29a742f92..58e489618bc4e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsac.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmsac.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsac.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmsac.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsac.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmsac.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsac.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmsac.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsac.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmsac.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsac.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmsac.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsac.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmsac.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsac.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmsac.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsac.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmsac.mask.nxv8f32.f32(
@@ -994,9 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.f64(
@@ -1018,9 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsac.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmsac.mask.nxv1f64.f64(
@@ -1042,9 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.f64(
@@ -1066,9 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsac.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmsac.mask.nxv2f64.f64(
@@ -1090,9 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.f64(
@@ -1114,9 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsac.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmsac.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll
index 35e687c4fcbc0..67dbb5a92dfa3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsub.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmsub.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsub.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmsub.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsub.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmsub.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsub.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmsub.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsub.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmsub.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsub.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmsub.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsub.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmsub.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsub.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmsub.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsub.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmsub.mask.nxv8f32.f32(
@@ -994,13 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.f64(
@@ -1022,13 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsub.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmsub.mask.nxv1f64.f64(
@@ -1050,13 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v10
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.f64(
@@ -1078,13 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsub.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v10, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmsub.mask.nxv2f64.f64(
@@ -1106,13 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v12
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.f64(
@@ -1134,13 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsub.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v12, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmsub.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll
index 977163f1383f0..07b23dbfb066d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -562,9 +562,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.f16(
 define <vscale x 1 x half>  @intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.f16(
@@ -586,9 +585,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsub.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfnmsub.mask.nxv1f16.f16(
@@ -610,9 +608,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.f16(
 define <vscale x 2 x half>  @intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.f16(
@@ -634,9 +631,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsub.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfnmsub.mask.nxv2f16.f16(
@@ -658,9 +654,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.f16(
 define <vscale x 4 x half>  @intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.f16(
@@ -682,9 +677,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsub.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfnmsub.mask.nxv4f16.f16(
@@ -706,9 +700,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.f16(
 define <vscale x 8 x half>  @intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.f16(
@@ -730,9 +723,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsub.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfnmsub.mask.nxv8f16.f16(
@@ -754,9 +746,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.f16(
 define <vscale x 16 x half>  @intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.f16(
@@ -778,9 +769,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsub.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfnmsub.mask.nxv16f16.f16(
@@ -802,9 +792,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.f32(
 define <vscale x 1 x float>  @intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.f32(
@@ -826,9 +815,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsub.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfnmsub.mask.nxv1f32.f32(
@@ -850,9 +838,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.f32(
 define <vscale x 2 x float>  @intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.f32(
@@ -874,9 +861,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsub.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfnmsub.mask.nxv2f32.f32(
@@ -898,9 +884,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.f32(
 define <vscale x 4 x float>  @intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.f32(
@@ -922,9 +907,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsub.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfnmsub.mask.nxv4f32.f32(
@@ -946,9 +930,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.f32(
 define <vscale x 8 x float>  @intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.f32(
@@ -970,9 +953,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsub.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfnmsub.mask.nxv8f32.f32(
@@ -994,9 +976,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.f64(
 define <vscale x 1 x double>  @intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.f64(
@@ -1018,9 +999,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsub.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfnmsub.mask.nxv1f64.f64(
@@ -1042,9 +1022,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.f64(
 define <vscale x 2 x double>  @intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.f64(
@@ -1066,9 +1045,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsub.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfnmsub.mask.nxv2f64.f64(
@@ -1090,9 +1068,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.f64(
 define <vscale x 4 x double>  @intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.f64(
@@ -1114,9 +1091,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsub.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfnmsub.mask.nxv4f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll
index b117a120b04d1..1d502c84b1981 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
@@ -9,9 +9,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
@@ -33,9 +32,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfrdiv.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfrdiv_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfrdiv.mask.nxv1f16.f16(
@@ -56,9 +54,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfrdiv.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfrdiv_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfrdiv.nxv2f16.f16(
@@ -80,9 +77,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfrdiv.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfrdiv_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfrdiv.mask.nxv2f16.f16(
@@ -103,9 +99,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfrdiv.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfrdiv_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfrdiv.nxv4f16.f16(
@@ -127,9 +122,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfrdiv.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfrdiv_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfrdiv.mask.nxv4f16.f16(
@@ -150,9 +144,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfrdiv.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfrdiv_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfrdiv.nxv8f16.f16(
@@ -174,9 +167,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfrdiv.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfrdiv_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfrdiv.mask.nxv8f16.f16(
@@ -197,9 +189,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfrdiv.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfrdiv_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfrdiv.nxv16f16.f16(
@@ -221,9 +212,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfrdiv.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfrdiv_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfrdiv.mask.nxv16f16.f16(
@@ -244,9 +234,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfrdiv.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfrdiv_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfrdiv.nxv32f16.f16(
@@ -268,9 +257,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfrdiv.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfrdiv_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfrdiv.mask.nxv32f16.f16(
@@ -291,9 +279,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfrdiv.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfrdiv_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfrdiv.nxv1f32.f32(
@@ -315,9 +302,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfrdiv.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfrdiv_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfrdiv.mask.nxv1f32.f32(
@@ -338,9 +324,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfrdiv.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfrdiv_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfrdiv.nxv2f32.f32(
@@ -362,9 +347,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfrdiv.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfrdiv_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfrdiv.mask.nxv2f32.f32(
@@ -385,9 +369,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfrdiv.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfrdiv_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfrdiv.nxv4f32.f32(
@@ -409,9 +392,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfrdiv.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfrdiv_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfrdiv.mask.nxv4f32.f32(
@@ -432,9 +414,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfrdiv.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfrdiv_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfrdiv.nxv8f32.f32(
@@ -456,9 +437,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfrdiv.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfrdiv_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfrdiv.mask.nxv8f32.f32(
@@ -479,9 +459,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfrdiv.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfrdiv_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfrdiv.nxv16f32.f32(
@@ -503,9 +482,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfrdiv.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfrdiv_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfrdiv.mask.nxv16f32.f32(
@@ -526,13 +504,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfrdiv.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfrdiv_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfrdiv.nxv1f64.f64(
@@ -554,13 +527,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfrdiv.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfrdiv_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfrdiv.mask.nxv1f64.f64(
@@ -581,13 +549,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfrdiv.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfrdiv_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfrdiv.nxv2f64.f64(
@@ -609,13 +572,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfrdiv.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfrdiv_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v10, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfrdiv.mask.nxv2f64.f64(
@@ -636,13 +594,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfrdiv.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfrdiv_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfrdiv.nxv4f64.f64(
@@ -664,13 +617,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfrdiv.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfrdiv_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v12, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfrdiv.mask.nxv4f64.f64(
@@ -691,13 +639,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfrdiv.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfrdiv_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfrdiv.nxv8f64.f64(
@@ -719,13 +662,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfrdiv.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfrdiv_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v16, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfrdiv.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll
index 6cc0b53443a8f..ccdd6ad371896 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
@@ -9,9 +9,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfrdiv_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
@@ -33,9 +32,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfrdiv.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfrdiv_mask_vf_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfrdiv.mask.nxv1f16.f16(
@@ -56,9 +54,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfrdiv.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfrdiv_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfrdiv.nxv2f16.f16(
@@ -80,9 +77,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfrdiv.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfrdiv_mask_vf_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfrdiv.mask.nxv2f16.f16(
@@ -103,9 +99,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfrdiv.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfrdiv_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfrdiv.nxv4f16.f16(
@@ -127,9 +122,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfrdiv.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfrdiv_mask_vf_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfrdiv.mask.nxv4f16.f16(
@@ -150,9 +144,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfrdiv.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfrdiv_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfrdiv.nxv8f16.f16(
@@ -174,9 +167,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfrdiv.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfrdiv_mask_vf_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfrdiv.mask.nxv8f16.f16(
@@ -197,9 +189,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfrdiv.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfrdiv_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfrdiv.nxv16f16.f16(
@@ -221,9 +212,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfrdiv.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfrdiv_mask_vf_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfrdiv.mask.nxv16f16.f16(
@@ -244,9 +234,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfrdiv.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfrdiv_vf_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfrdiv.nxv32f16.f16(
@@ -268,9 +257,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfrdiv.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfrdiv_mask_vf_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfrdiv.mask.nxv32f16.f16(
@@ -291,9 +279,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfrdiv.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfrdiv_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfrdiv.nxv1f32.f32(
@@ -315,9 +302,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfrdiv.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfrdiv_mask_vf_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfrdiv.mask.nxv1f32.f32(
@@ -338,9 +324,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfrdiv.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfrdiv_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfrdiv.nxv2f32.f32(
@@ -362,9 +347,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfrdiv.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfrdiv_mask_vf_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfrdiv.mask.nxv2f32.f32(
@@ -385,9 +369,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfrdiv.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfrdiv_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfrdiv.nxv4f32.f32(
@@ -409,9 +392,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfrdiv.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfrdiv_mask_vf_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfrdiv.mask.nxv4f32.f32(
@@ -432,9 +414,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfrdiv.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfrdiv_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfrdiv.nxv8f32.f32(
@@ -456,9 +437,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfrdiv.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfrdiv_mask_vf_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfrdiv.mask.nxv8f32.f32(
@@ -479,9 +459,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfrdiv.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfrdiv_vf_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfrdiv.nxv16f32.f32(
@@ -503,9 +482,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfrdiv.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfrdiv_mask_vf_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfrdiv.mask.nxv16f32.f32(
@@ -526,9 +504,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfrdiv.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfrdiv_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfrdiv.nxv1f64.f64(
@@ -550,9 +527,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfrdiv.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfrdiv_mask_vf_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfrdiv.mask.nxv1f64.f64(
@@ -573,9 +549,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfrdiv.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfrdiv_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfrdiv.nxv2f64.f64(
@@ -597,9 +572,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfrdiv.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfrdiv_mask_vf_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfrdiv.mask.nxv2f64.f64(
@@ -620,9 +594,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfrdiv.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfrdiv_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfrdiv.nxv4f64.f64(
@@ -644,9 +617,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfrdiv.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfrdiv_mask_vf_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfrdiv.mask.nxv4f64.f64(
@@ -667,9 +639,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfrdiv.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfrdiv_vf_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfrdiv.nxv8f64.f64(
@@ -691,9 +662,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfrdiv.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfrdiv_mask_vf_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfrdiv.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfrdiv.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll
index 8bbf0aac2b339..eab5b2a414e18 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
@@ -9,9 +9,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
@@ -33,9 +32,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfrsub.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfrsub.mask.nxv1f16.f16(
@@ -56,9 +54,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfrsub.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfrsub.nxv2f16.f16(
@@ -80,9 +77,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfrsub.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfrsub.mask.nxv2f16.f16(
@@ -103,9 +99,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfrsub.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfrsub.nxv4f16.f16(
@@ -127,9 +122,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfrsub.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfrsub.mask.nxv4f16.f16(
@@ -150,9 +144,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfrsub.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfrsub.nxv8f16.f16(
@@ -174,9 +167,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfrsub.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfrsub.mask.nxv8f16.f16(
@@ -197,9 +189,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfrsub.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfrsub.nxv16f16.f16(
@@ -221,9 +212,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfrsub.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfrsub.mask.nxv16f16.f16(
@@ -244,9 +234,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfrsub.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfrsub.nxv32f16.f16(
@@ -268,9 +257,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfrsub.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfrsub.mask.nxv32f16.f16(
@@ -291,9 +279,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfrsub.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfrsub.nxv1f32.f32(
@@ -315,9 +302,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfrsub.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfrsub.mask.nxv1f32.f32(
@@ -338,9 +324,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfrsub.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfrsub.nxv2f32.f32(
@@ -362,9 +347,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfrsub.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfrsub.mask.nxv2f32.f32(
@@ -385,9 +369,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfrsub.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfrsub.nxv4f32.f32(
@@ -409,9 +392,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfrsub.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfrsub.mask.nxv4f32.f32(
@@ -432,9 +414,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfrsub.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfrsub.nxv8f32.f32(
@@ -456,9 +437,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfrsub.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfrsub.mask.nxv8f32.f32(
@@ -479,9 +459,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfrsub.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfrsub.nxv16f32.f32(
@@ -503,9 +482,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfrsub.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfrsub.mask.nxv16f32.f32(
@@ -526,13 +504,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfrsub.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfrsub.nxv1f64.f64(
@@ -554,13 +527,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfrsub.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfrsub.mask.nxv1f64.f64(
@@ -581,13 +549,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfrsub.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfrsub.nxv2f64.f64(
@@ -609,13 +572,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfrsub.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v10, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfrsub.mask.nxv2f64.f64(
@@ -636,13 +594,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfrsub.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfrsub.nxv4f64.f64(
@@ -664,13 +617,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfrsub.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v12, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfrsub.mask.nxv4f64.f64(
@@ -691,13 +639,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfrsub.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfrsub.nxv8f64.f64(
@@ -719,13 +662,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfrsub.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v16, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfrsub.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll
index 85b7148c3c830..0477554c91419 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \
 ; RUN:   -mattr=+d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
@@ -10,9 +10,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
@@ -34,9 +33,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfrsub.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfrsub.mask.nxv1f16.f16(
@@ -57,9 +55,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfrsub.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfrsub.nxv2f16.f16(
@@ -81,9 +78,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfrsub.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfrsub.mask.nxv2f16.f16(
@@ -104,9 +100,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfrsub.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfrsub.nxv4f16.f16(
@@ -128,9 +123,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfrsub.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfrsub.mask.nxv4f16.f16(
@@ -151,9 +145,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfrsub.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfrsub.nxv8f16.f16(
@@ -175,9 +168,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfrsub.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfrsub.mask.nxv8f16.f16(
@@ -198,9 +190,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfrsub.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfrsub.nxv16f16.f16(
@@ -222,9 +213,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfrsub.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfrsub.mask.nxv16f16.f16(
@@ -245,9 +235,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfrsub.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfrsub.nxv32f16.f16(
@@ -269,9 +258,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfrsub.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfrsub.mask.nxv32f16.f16(
@@ -292,9 +280,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfrsub.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfrsub.nxv1f32.f32(
@@ -316,9 +303,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfrsub.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfrsub.mask.nxv1f32.f32(
@@ -339,9 +325,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfrsub.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfrsub.nxv2f32.f32(
@@ -363,9 +348,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfrsub.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfrsub.mask.nxv2f32.f32(
@@ -386,9 +370,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfrsub.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfrsub.nxv4f32.f32(
@@ -410,9 +393,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfrsub.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfrsub.mask.nxv4f32.f32(
@@ -433,9 +415,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfrsub.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfrsub.nxv8f32.f32(
@@ -457,9 +438,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfrsub.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfrsub.mask.nxv8f32.f32(
@@ -480,9 +460,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfrsub.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfrsub.nxv16f32.f32(
@@ -504,9 +483,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfrsub.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfrsub.mask.nxv16f32.f32(
@@ -527,9 +505,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfrsub.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfrsub.nxv1f64.f64(
@@ -551,9 +528,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfrsub.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfrsub.mask.nxv1f64.f64(
@@ -574,9 +550,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfrsub.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfrsub.nxv2f64.f64(
@@ -598,9 +573,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfrsub.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfrsub.mask.nxv2f64.f64(
@@ -621,9 +595,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfrsub.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfrsub.nxv4f64.f64(
@@ -645,9 +618,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfrsub.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfrsub.mask.nxv4f64.f64(
@@ -668,9 +640,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfrsub.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfrsub.nxv8f64.f64(
@@ -692,9 +663,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfrsub.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfrsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfrsub.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll
index a1dcc1fa26bdc..c0e999a2433d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsgnj_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnj.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsgnj_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnj.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnj.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsgnj_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnj.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnj.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsgnj_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnj.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnj.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsgnj_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnj.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnj.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsgnj_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnj.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnj.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsgnj_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnj.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnj.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsgnj_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnj.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnj.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsgnj_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnj.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnj.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsgnj_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnj.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnj.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsgnj_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnj.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnj.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsgnj_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnj.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnj.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsgnj_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnj.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnj.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsgnj_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnj.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnj.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsgnj_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnj.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnj.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsgnj_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnj.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsgnj_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnj.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsgnj_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnj.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnj.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsgnj_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnj.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnj.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsgnj_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnj.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnj.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsgnj_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnj.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnj.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsgnj_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnj.mask.nxv16f32.f32(
@@ -1204,13 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnj.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsgnj_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnj.nxv1f64.f64(
@@ -1232,13 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnj.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsgnj_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnj.mask.nxv1f64.f64(
@@ -1259,13 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnj.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsgnj_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnj.nxv2f64.f64(
@@ -1287,13 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnj.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsgnj_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v10, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnj.mask.nxv2f64.f64(
@@ -1314,13 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnj.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsgnj_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnj.nxv4f64.f64(
@@ -1342,13 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnj.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsgnj_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v12, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnj.mask.nxv4f64.f64(
@@ -1369,13 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnj.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsgnj_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnj.nxv8f64.f64(
@@ -1397,13 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnj.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsgnj_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v16, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnj.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll
index 8ff4b4ba03e2a..d71fb8fb25352 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsgnj_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnj.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsgnj_mask_vf_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnj.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnj.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsgnj_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnj.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnj.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsgnj_mask_vf_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnj.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnj.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsgnj_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnj.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnj.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsgnj_mask_vf_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnj.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnj.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsgnj_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnj.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnj.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsgnj_mask_vf_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnj.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnj.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsgnj_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnj.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnj.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsgnj_mask_vf_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnj.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnj.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsgnj_vf_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnj.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnj.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsgnj_mask_vf_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnj.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnj.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsgnj_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnj.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnj.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsgnj_mask_vf_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnj.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnj.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsgnj_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnj.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnj.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsgnj_mask_vf_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnj.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsgnj_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnj.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsgnj_mask_vf_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnj.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnj.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsgnj_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnj.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnj.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsgnj_mask_vf_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnj.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnj.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsgnj_vf_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnj.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnj.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsgnj_mask_vf_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnj.mask.nxv16f32.f32(
@@ -1204,9 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnj.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsgnj_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnj.nxv1f64.f64(
@@ -1228,9 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnj.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsgnj_mask_vf_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnj.mask.nxv1f64.f64(
@@ -1251,9 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnj.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsgnj_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnj.nxv2f64.f64(
@@ -1275,9 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnj.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsgnj_mask_vf_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnj.mask.nxv2f64.f64(
@@ -1298,9 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnj.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsgnj_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnj.nxv4f64.f64(
@@ -1322,9 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnj.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsgnj_mask_vf_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnj.mask.nxv4f64.f64(
@@ -1345,9 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnj.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsgnj_vf_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnj.nxv8f64.f64(
@@ -1369,9 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnj.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsgnj_mask_vf_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsgnj.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnj.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll
index ac708fa2d9918..0287f9ea2cbf3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsgnjn_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsgnjn_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjn.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnjn.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsgnjn_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjn.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnjn.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsgnjn_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjn.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnjn.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsgnjn_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjn.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnjn.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsgnjn_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjn.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnjn.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsgnjn_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjn.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnjn.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsgnjn_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjn.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnjn.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsgnjn_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjn.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnjn.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsgnjn_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjn.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnjn.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsgnjn_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjn.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnjn.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsgnjn_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjn.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnjn.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsgnjn_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjn.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnjn.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsgnjn_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjn.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnjn.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsgnjn_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjn.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnjn.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsgnjn_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjn.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsgnjn_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnjn.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsgnjn_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjn.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnjn.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsgnjn_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjn.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnjn.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsgnjn_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjn.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnjn.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsgnjn_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjn.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnjn.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsgnjn_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjn.mask.nxv16f32.f32(
@@ -1204,13 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnjn.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsgnjn_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjn.nxv1f64.f64(
@@ -1232,13 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnjn.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsgnjn_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjn.mask.nxv1f64.f64(
@@ -1259,13 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnjn.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsgnjn_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjn.nxv2f64.f64(
@@ -1287,13 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnjn.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsgnjn_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v10, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjn.mask.nxv2f64.f64(
@@ -1314,13 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnjn.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsgnjn_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjn.nxv4f64.f64(
@@ -1342,13 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnjn.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsgnjn_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v12, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjn.mask.nxv4f64.f64(
@@ -1369,13 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnjn.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsgnjn_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjn.nxv8f64.f64(
@@ -1397,13 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnjn.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsgnjn_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v16, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjn.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll
index 9ff02249165bc..f751fd7406389 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsgnjn_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsgnjn_mask_vf_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjn.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnjn.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsgnjn_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjn.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnjn.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsgnjn_mask_vf_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjn.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnjn.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsgnjn_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjn.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnjn.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsgnjn_mask_vf_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjn.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnjn.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsgnjn_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjn.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnjn.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsgnjn_mask_vf_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjn.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnjn.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsgnjn_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjn.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnjn.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsgnjn_mask_vf_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjn.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnjn.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsgnjn_vf_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjn.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnjn.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsgnjn_mask_vf_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjn.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnjn.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsgnjn_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjn.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnjn.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsgnjn_mask_vf_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjn.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnjn.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsgnjn_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjn.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnjn.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsgnjn_mask_vf_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjn.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsgnjn_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnjn.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsgnjn_mask_vf_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjn.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnjn.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsgnjn_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjn.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnjn.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsgnjn_mask_vf_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjn.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnjn.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsgnjn_vf_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjn.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnjn.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsgnjn_mask_vf_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjn.mask.nxv16f32.f32(
@@ -1204,9 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnjn.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsgnjn_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjn.nxv1f64.f64(
@@ -1228,9 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnjn.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsgnjn_mask_vf_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjn.mask.nxv1f64.f64(
@@ -1251,9 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnjn.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsgnjn_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjn.nxv2f64.f64(
@@ -1275,9 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnjn.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsgnjn_mask_vf_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjn.mask.nxv2f64.f64(
@@ -1298,9 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnjn.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsgnjn_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjn.nxv4f64.f64(
@@ -1322,9 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnjn.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsgnjn_mask_vf_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjn.mask.nxv4f64.f64(
@@ -1345,9 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnjn.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsgnjn_vf_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjn.nxv8f64.f64(
@@ -1369,9 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnjn.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsgnjn_mask_vf_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsgnjn.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjn.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll
index 8f4fcaa73c42c..fba4eceb23fb6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsgnjx_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsgnjx_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjx.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnjx.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsgnjx_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjx.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnjx.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsgnjx_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjx.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnjx.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsgnjx_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjx.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnjx.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsgnjx_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjx.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnjx.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsgnjx_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjx.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnjx.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsgnjx_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjx.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnjx.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsgnjx_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjx.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnjx.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsgnjx_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjx.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnjx.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsgnjx_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjx.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnjx.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsgnjx_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjx.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnjx.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsgnjx_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjx.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnjx.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsgnjx_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjx.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnjx.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsgnjx_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjx.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnjx.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsgnjx_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjx.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsgnjx_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnjx.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsgnjx_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjx.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnjx.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsgnjx_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjx.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnjx.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsgnjx_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjx.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnjx.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsgnjx_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjx.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnjx.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsgnjx_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjx.mask.nxv16f32.f32(
@@ -1204,13 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnjx.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsgnjx_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjx.nxv1f64.f64(
@@ -1232,13 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnjx.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsgnjx_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjx.mask.nxv1f64.f64(
@@ -1259,13 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnjx.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsgnjx_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjx.nxv2f64.f64(
@@ -1287,13 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnjx.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsgnjx_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v10, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjx.mask.nxv2f64.f64(
@@ -1314,13 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnjx.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsgnjx_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjx.nxv4f64.f64(
@@ -1342,13 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnjx.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsgnjx_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v12, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjx.mask.nxv4f64.f64(
@@ -1369,13 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnjx.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsgnjx_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjx.nxv8f64.f64(
@@ -1397,13 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnjx.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsgnjx_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v16, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjx.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll
index 77ebd259c6896..4ae69f0d4f613 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsgnjx_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsgnjx_mask_vf_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjx.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnjx.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsgnjx_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjx.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnjx.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsgnjx_mask_vf_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjx.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnjx.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsgnjx_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjx.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnjx.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsgnjx_mask_vf_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjx.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnjx.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsgnjx_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjx.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnjx.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsgnjx_mask_vf_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjx.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnjx.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsgnjx_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjx.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnjx.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsgnjx_mask_vf_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjx.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnjx.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsgnjx_vf_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjx.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnjx.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsgnjx_mask_vf_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjx.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnjx.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsgnjx_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjx.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnjx.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsgnjx_mask_vf_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjx.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnjx.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsgnjx_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjx.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnjx.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsgnjx_mask_vf_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjx.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsgnjx_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnjx.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsgnjx_mask_vf_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjx.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnjx.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsgnjx_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjx.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnjx.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsgnjx_mask_vf_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjx.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnjx.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsgnjx_vf_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjx.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnjx.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsgnjx_mask_vf_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjx.mask.nxv16f32.f32(
@@ -1204,9 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnjx.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsgnjx_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjx.nxv1f64.f64(
@@ -1228,9 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnjx.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsgnjx_mask_vf_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjx.mask.nxv1f64.f64(
@@ -1251,9 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnjx.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsgnjx_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjx.nxv2f64.f64(
@@ -1275,9 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnjx.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsgnjx_mask_vf_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjx.mask.nxv2f64.f64(
@@ -1298,9 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnjx.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsgnjx_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjx.nxv4f64.f64(
@@ -1322,9 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnjx.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsgnjx_mask_vf_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjx.mask.nxv4f64.f64(
@@ -1345,9 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnjx.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsgnjx_vf_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjx.nxv8f64.f64(
@@ -1369,9 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnjx.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsgnjx_mask_vf_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsgnjx.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjx.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll
index a4833408acfff..5baadc48d857e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
@@ -9,9 +9,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
@@ -33,9 +32,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfslide1down.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfslide1down.mask.nxv1f16.f16(
@@ -56,9 +54,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfslide1down.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfslide1down.nxv2f16.f16(
@@ -80,9 +77,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfslide1down.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfslide1down.mask.nxv2f16.f16(
@@ -103,9 +99,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfslide1down.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfslide1down.nxv4f16.f16(
@@ -127,9 +122,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfslide1down.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfslide1down.mask.nxv4f16.f16(
@@ -150,9 +144,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfslide1down.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfslide1down.nxv8f16.f16(
@@ -174,9 +167,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfslide1down.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfslide1down.mask.nxv8f16.f16(
@@ -197,9 +189,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfslide1down.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfslide1down.nxv16f16.f16(
@@ -221,9 +212,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfslide1down.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfslide1down.mask.nxv16f16.f16(
@@ -244,9 +234,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfslide1down.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfslide1down.nxv32f16.f16(
@@ -268,9 +257,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfslide1down.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfslide1down.mask.nxv32f16.f16(
@@ -291,9 +279,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfslide1down.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfslide1down.nxv1f32.f32(
@@ -315,9 +302,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfslide1down.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfslide1down.mask.nxv1f32.f32(
@@ -338,9 +324,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfslide1down.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfslide1down.nxv2f32.f32(
@@ -362,9 +347,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfslide1down.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfslide1down.mask.nxv2f32.f32(
@@ -385,9 +369,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfslide1down.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfslide1down.nxv4f32.f32(
@@ -409,9 +392,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfslide1down.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfslide1down.mask.nxv4f32.f32(
@@ -432,9 +414,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfslide1down.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfslide1down.nxv8f32.f32(
@@ -456,9 +437,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfslide1down.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfslide1down.mask.nxv8f32.f32(
@@ -479,9 +459,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfslide1down.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfslide1down.nxv16f32.f32(
@@ -503,9 +482,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfslide1down.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfslide1down.mask.nxv16f32.f32(
@@ -526,13 +504,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfslide1down.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfslide1down.nxv1f64.f64(
@@ -554,13 +527,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfslide1down.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfslide1down.mask.nxv1f64.f64(
@@ -581,13 +549,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfslide1down.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfslide1down.nxv2f64.f64(
@@ -609,13 +572,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfslide1down.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v10, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfslide1down.mask.nxv2f64.f64(
@@ -636,13 +594,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfslide1down.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfslide1down.nxv4f64.f64(
@@ -664,13 +617,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfslide1down.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v12, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfslide1down.mask.nxv4f64.f64(
@@ -691,13 +639,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfslide1down.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfslide1down.nxv8f64.f64(
@@ -719,13 +662,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfslide1down.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v16, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfslide1down.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll
index 84572f5dec0d1..a0ba31ea26686 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
@@ -9,9 +9,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
@@ -33,9 +32,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfslide1down.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfslide1down.mask.nxv1f16.f16(
@@ -56,9 +54,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfslide1down.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfslide1down.nxv2f16.f16(
@@ -80,9 +77,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfslide1down.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfslide1down.mask.nxv2f16.f16(
@@ -103,9 +99,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfslide1down.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfslide1down.nxv4f16.f16(
@@ -127,9 +122,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfslide1down.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfslide1down.mask.nxv4f16.f16(
@@ -150,9 +144,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfslide1down.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfslide1down.nxv8f16.f16(
@@ -174,9 +167,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfslide1down.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfslide1down.mask.nxv8f16.f16(
@@ -197,9 +189,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfslide1down.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfslide1down.nxv16f16.f16(
@@ -221,9 +212,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfslide1down.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfslide1down.mask.nxv16f16.f16(
@@ -244,9 +234,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfslide1down.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfslide1down.nxv32f16.f16(
@@ -268,9 +257,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfslide1down.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfslide1down.mask.nxv32f16.f16(
@@ -291,9 +279,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfslide1down.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfslide1down.nxv1f32.f32(
@@ -315,9 +302,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfslide1down.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfslide1down.mask.nxv1f32.f32(
@@ -338,9 +324,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfslide1down.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfslide1down.nxv2f32.f32(
@@ -362,9 +347,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfslide1down.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfslide1down.mask.nxv2f32.f32(
@@ -385,9 +369,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfslide1down.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfslide1down.nxv4f32.f32(
@@ -409,9 +392,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfslide1down.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfslide1down.mask.nxv4f32.f32(
@@ -432,9 +414,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfslide1down.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfslide1down.nxv8f32.f32(
@@ -456,9 +437,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfslide1down.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfslide1down.mask.nxv8f32.f32(
@@ -479,9 +459,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfslide1down.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfslide1down.nxv16f32.f32(
@@ -503,9 +482,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfslide1down.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfslide1down.mask.nxv16f32.f32(
@@ -526,9 +504,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfslide1down.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfslide1down.nxv1f64.f64(
@@ -550,9 +527,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfslide1down.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfslide1down.mask.nxv1f64.f64(
@@ -573,9 +549,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfslide1down.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfslide1down.nxv2f64.f64(
@@ -597,9 +572,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfslide1down.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfslide1down.mask.nxv2f64.f64(
@@ -620,9 +594,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfslide1down.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfslide1down.nxv4f64.f64(
@@ -644,9 +617,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfslide1down.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfslide1down.mask.nxv4f64.f64(
@@ -667,9 +639,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfslide1down.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfslide1down.nxv8f64.f64(
@@ -691,9 +662,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfslide1down.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfslide1down.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll
index 0e5b566812011..271bf70522bfa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfslide1up.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
@@ -9,9 +9,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfslide1up.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -34,9 +33,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfslide1up.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfslide1up.mask.nxv1f16.f16(
@@ -57,9 +55,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfslide1up.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -82,9 +79,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfslide1up.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfslide1up.mask.nxv2f16.f16(
@@ -105,9 +101,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfslide1up.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -130,9 +125,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfslide1up.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfslide1up.mask.nxv4f16.f16(
@@ -153,9 +147,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfslide1up.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -178,9 +171,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfslide1up.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfslide1up.mask.nxv8f16.f16(
@@ -201,9 +193,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfslide1up.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -226,9 +217,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfslide1up.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfslide1up.mask.nxv16f16.f16(
@@ -249,9 +239,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfslide1up.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -274,9 +263,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfslide1up.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfslide1up.mask.nxv32f16.f16(
@@ -297,9 +285,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfslide1up.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -322,9 +309,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfslide1up.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfslide1up.mask.nxv1f32.f32(
@@ -345,9 +331,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfslide1up.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -370,9 +355,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfslide1up.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfslide1up.mask.nxv2f32.f32(
@@ -393,9 +377,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfslide1up.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -418,9 +401,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfslide1up.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfslide1up.mask.nxv4f32.f32(
@@ -441,9 +423,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfslide1up.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -466,9 +447,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfslide1up.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfslide1up.mask.nxv8f32.f32(
@@ -489,9 +469,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfslide1up.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -514,9 +493,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfslide1up.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfslide1up.mask.nxv16f32.f32(
@@ -537,14 +515,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfslide1up.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v9
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfslide1up.nxv1f64.f64(
@@ -566,13 +539,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfslide1up.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfslide1up.mask.nxv1f64.f64(
@@ -593,14 +561,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfslide1up.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfslide1up.nxv2f64.f64(
@@ -622,13 +585,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfslide1up.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v10, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfslide1up.mask.nxv2f64.f64(
@@ -649,14 +607,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfslide1up.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfslide1up.nxv4f64.f64(
@@ -678,13 +631,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfslide1up.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v12, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfslide1up.mask.nxv4f64.f64(
@@ -705,14 +653,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfslide1up.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfslide1up.nxv8f64.f64(
@@ -734,13 +677,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfslide1up.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v16, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfslide1up.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll
index 37afacc6c7f27..4b7d1fe55e244 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfslide1up.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
@@ -9,9 +9,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfslide1up.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -34,9 +33,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfslide1up.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfslide1up.mask.nxv1f16.f16(
@@ -57,9 +55,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfslide1up.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -82,9 +79,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfslide1up.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfslide1up.mask.nxv2f16.f16(
@@ -105,9 +101,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfslide1up.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -130,9 +125,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfslide1up.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfslide1up.mask.nxv4f16.f16(
@@ -153,9 +147,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfslide1up.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -178,9 +171,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfslide1up.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfslide1up.mask.nxv8f16.f16(
@@ -201,9 +193,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfslide1up.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -226,9 +217,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfslide1up.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfslide1up.mask.nxv16f16.f16(
@@ -249,9 +239,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfslide1up.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -274,9 +263,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfslide1up.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfslide1up.mask.nxv32f16.f16(
@@ -297,9 +285,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfslide1up.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -322,9 +309,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfslide1up.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfslide1up.mask.nxv1f32.f32(
@@ -345,9 +331,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfslide1up.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -370,9 +355,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfslide1up.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfslide1up.mask.nxv2f32.f32(
@@ -393,9 +377,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfslide1up.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -418,9 +401,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfslide1up.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfslide1up.mask.nxv4f32.f32(
@@ -441,9 +423,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfslide1up.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -466,9 +447,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfslide1up.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfslide1up.mask.nxv8f32.f32(
@@ -489,9 +469,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfslide1up.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -514,9 +493,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfslide1up.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfslide1up.mask.nxv16f32.f32(
@@ -537,9 +515,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfslide1up.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -562,9 +539,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfslide1up.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfslide1up.mask.nxv1f64.f64(
@@ -585,9 +561,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfslide1up.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -610,9 +585,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfslide1up.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfslide1up.mask.nxv2f64.f64(
@@ -633,9 +607,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfslide1up.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -658,9 +631,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfslide1up.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfslide1up.mask.nxv4f64.f64(
@@ -681,9 +653,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfslide1up.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -706,9 +677,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfslide1up.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfslide1up.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfslide1up.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll
index 99e45214deea0..86371c1685fc0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -687,9 +687,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.f16(
@@ -711,9 +710,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsub.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsub.mask.nxv1f16.f16(
@@ -734,9 +732,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsub.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsub.nxv2f16.f16(
@@ -758,9 +755,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsub.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsub.mask.nxv2f16.f16(
@@ -781,9 +777,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsub.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsub.nxv4f16.f16(
@@ -805,9 +800,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsub.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsub.mask.nxv4f16.f16(
@@ -828,9 +822,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsub.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsub.nxv8f16.f16(
@@ -852,9 +845,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsub.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsub.mask.nxv8f16.f16(
@@ -875,9 +867,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsub.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsub.nxv16f16.f16(
@@ -899,9 +890,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsub.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsub.mask.nxv16f16.f16(
@@ -922,9 +912,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsub.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsub.nxv32f16.f16(
@@ -946,9 +935,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsub.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsub.mask.nxv32f16.f16(
@@ -969,9 +957,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsub.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsub.nxv1f32.f32(
@@ -993,9 +980,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsub.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsub.mask.nxv1f32.f32(
@@ -1016,9 +1002,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsub.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsub.nxv2f32.f32(
@@ -1040,9 +1025,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsub.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsub.mask.nxv2f32.f32(
@@ -1063,9 +1047,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.f32(
@@ -1087,9 +1070,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsub.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsub.mask.nxv4f32.f32(
@@ -1110,9 +1092,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsub.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsub.nxv8f32.f32(
@@ -1134,9 +1115,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsub.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsub.mask.nxv8f32.f32(
@@ -1157,9 +1137,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsub.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsub.nxv16f32.f32(
@@ -1181,9 +1160,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsub.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsub.mask.nxv16f32.f32(
@@ -1204,13 +1182,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.f64(
@@ -1232,13 +1205,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsub.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsub.mask.nxv1f64.f64(
@@ -1259,13 +1227,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsub.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsub.nxv2f64.f64(
@@ -1287,13 +1250,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsub.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v10, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsub.mask.nxv2f64.f64(
@@ -1314,13 +1272,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsub.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsub.nxv4f64.f64(
@@ -1342,13 +1295,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsub.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v12, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsub.mask.nxv4f64.f64(
@@ -1369,13 +1317,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsub.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsub.nxv8f64.f64(
@@ -1397,13 +1340,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsub.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw a0, 8(sp)
-; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    fld ft0, 8(sp)
-; CHECK-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v16, ft0, v0.t
-; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsub.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll
index c972ed358bf9a..7445cfb806d43 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \
 ; RUN:   -mattr=+d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -688,9 +688,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.f16(
@@ -712,9 +711,8 @@ declare <vscale x 1 x half> @llvm.riscv.vfsub.mask.nxv1f16.f16(
 define <vscale x 1 x half> @intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsub.mask.nxv1f16.f16(
@@ -735,9 +733,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsub.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsub.nxv2f16.f16(
@@ -759,9 +756,8 @@ declare <vscale x 2 x half> @llvm.riscv.vfsub.mask.nxv2f16.f16(
 define <vscale x 2 x half> @intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsub.mask.nxv2f16.f16(
@@ -782,9 +778,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsub.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsub.nxv4f16.f16(
@@ -806,9 +801,8 @@ declare <vscale x 4 x half> @llvm.riscv.vfsub.mask.nxv4f16.f16(
 define <vscale x 4 x half> @intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsub.mask.nxv4f16.f16(
@@ -829,9 +823,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsub.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsub.nxv8f16.f16(
@@ -853,9 +846,8 @@ declare <vscale x 8 x half> @llvm.riscv.vfsub.mask.nxv8f16.f16(
 define <vscale x 8 x half> @intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsub.mask.nxv8f16.f16(
@@ -876,9 +868,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsub.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsub.nxv16f16.f16(
@@ -900,9 +891,8 @@ declare <vscale x 16 x half> @llvm.riscv.vfsub.mask.nxv16f16.f16(
 define <vscale x 16 x half> @intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsub.mask.nxv16f16.f16(
@@ -923,9 +913,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsub.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsub.nxv32f16.f16(
@@ -947,9 +936,8 @@ declare <vscale x 32 x half> @llvm.riscv.vfsub.mask.nxv32f16.f16(
 define <vscale x 32 x half> @intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsub.mask.nxv32f16.f16(
@@ -970,9 +958,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsub.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsub.nxv1f32.f32(
@@ -994,9 +981,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfsub.mask.nxv1f32.f32(
 define <vscale x 1 x float> @intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsub.mask.nxv1f32.f32(
@@ -1017,9 +1003,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsub.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsub.nxv2f32.f32(
@@ -1041,9 +1026,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfsub.mask.nxv2f32.f32(
 define <vscale x 2 x float> @intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsub.mask.nxv2f32.f32(
@@ -1064,9 +1048,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.f32(
@@ -1088,9 +1071,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfsub.mask.nxv4f32.f32(
 define <vscale x 4 x float> @intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsub.mask.nxv4f32.f32(
@@ -1111,9 +1093,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsub.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsub.nxv8f32.f32(
@@ -1135,9 +1116,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfsub.mask.nxv8f32.f32(
 define <vscale x 8 x float> @intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsub.mask.nxv8f32.f32(
@@ -1158,9 +1138,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsub.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsub.nxv16f32.f32(
@@ -1182,9 +1161,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfsub.mask.nxv16f32.f32(
 define <vscale x 16 x float> @intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsub.mask.nxv16f32.f32(
@@ -1205,9 +1183,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.f64(
@@ -1229,9 +1206,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfsub.mask.nxv1f64.f64(
 define <vscale x 1 x double> @intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsub.mask.nxv1f64.f64(
@@ -1252,9 +1228,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsub.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsub.nxv2f64.f64(
@@ -1276,9 +1251,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfsub.mask.nxv2f64.f64(
 define <vscale x 2 x double> @intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsub.mask.nxv2f64.f64(
@@ -1299,9 +1273,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsub.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsub.nxv4f64.f64(
@@ -1323,9 +1296,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfsub.mask.nxv4f64.f64(
 define <vscale x 4 x double> @intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsub.mask.nxv4f64.f64(
@@ -1346,9 +1318,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsub.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsub.nxv8f64.f64(
@@ -1370,9 +1341,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfsub.mask.nxv8f64.f64(
 define <vscale x 8 x double> @intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.d.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; CHECK-NEXT:    vfsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsub.mask.nxv8f64.f64(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll
index 02ce6a1c85f90..a3bdcc4573287 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -423,9 +423,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.f16(
 define <vscale x 1 x float> @intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwadd.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16(
 define <vscale x 1 x float> @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16(
@@ -471,9 +469,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2f16.f16(
 define <vscale x 2 x float> @intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16(
 define <vscale x 2 x float> @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16(
@@ -519,9 +515,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16(
 define <vscale x 4 x float> @intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwadd.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv2r.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16(
 define <vscale x 4 x float> @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16(
@@ -567,9 +561,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16(
 define <vscale x 8 x float> @intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16(
 define <vscale x 8 x float> @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16(
@@ -615,9 +607,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16(
 define <vscale x 16 x float> @intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwadd.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16(
 define <vscale x 16 x float> @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16(
@@ -663,9 +653,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwadd.nxv1f64.nxv1f32.f32(
 define <vscale x 1 x double> @intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32(
 define <vscale x 1 x double> @intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32(
@@ -711,9 +699,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32(
 define <vscale x 2 x double> @intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwadd.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv2r.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32(
 define <vscale x 2 x double> @intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32(
@@ -759,9 +745,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32(
 define <vscale x 4 x double> @intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32(
 define <vscale x 4 x double> @intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32(
@@ -807,9 +791,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32(
 define <vscale x 8 x double> @intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwadd.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32(
 define <vscale x 8 x double> @intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll
index 135b56dff1b47..a04b9a54b9306 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -423,9 +423,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.f16(
 define <vscale x 1 x float> @intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwadd.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16(
 define <vscale x 1 x float> @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16(
@@ -471,9 +469,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2f16.f16(
 define <vscale x 2 x float> @intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16(
 define <vscale x 2 x float> @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16(
@@ -519,9 +515,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16(
 define <vscale x 4 x float> @intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwadd.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv2r.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16(
 define <vscale x 4 x float> @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16(
@@ -567,9 +561,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16(
 define <vscale x 8 x float> @intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16(
 define <vscale x 8 x float> @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16(
@@ -615,9 +607,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16(
 define <vscale x 16 x float> @intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwadd.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16(
 define <vscale x 16 x float> @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16(
@@ -663,9 +653,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwadd.nxv1f64.nxv1f32.f32(
 define <vscale x 1 x double> @intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32(
 define <vscale x 1 x double> @intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32(
@@ -711,9 +699,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32(
 define <vscale x 2 x double> @intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwadd.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv2r.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32(
 define <vscale x 2 x double> @intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32(
@@ -759,9 +745,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32(
 define <vscale x 4 x double> @intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32(
 define <vscale x 4 x double> @intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32(
@@ -807,9 +791,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32(
 define <vscale x 8 x double> @intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwadd.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32(
 define <vscale x 8 x double> @intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwadd.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll
index fde2d9f89d045..3d046d2ba8057 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
@@ -416,9 +416,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.f16(
@@ -440,9 +439,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.f16(
@@ -463,9 +461,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.f16(
@@ -487,9 +484,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.f16(
@@ -510,9 +506,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.f16(
@@ -534,9 +529,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.f16(
@@ -557,9 +551,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.f16(
@@ -581,9 +574,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.f16(
@@ -604,9 +596,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.f16(
@@ -628,9 +619,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.f16(
@@ -651,9 +641,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.f32(
@@ -675,9 +664,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.f32(
@@ -698,9 +686,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.f32(
@@ -722,9 +709,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.f32(
@@ -745,9 +731,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.f32(
@@ -769,9 +754,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.f32(
@@ -792,9 +776,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.f32(
@@ -816,9 +799,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.f32(
@@ -987,9 +969,8 @@ entry:
 define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.f16(
@@ -1005,9 +986,8 @@ entry:
 define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.f16(
@@ -1023,9 +1003,8 @@ entry:
 define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.f16(
@@ -1041,9 +1020,8 @@ entry:
 define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.f16(
@@ -1059,9 +1037,8 @@ entry:
 define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.f16(
@@ -1077,9 +1054,8 @@ entry:
 define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.f32(
@@ -1095,9 +1071,8 @@ entry:
 define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.f32(
@@ -1113,9 +1088,8 @@ entry:
 define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.f32(
@@ -1131,9 +1105,8 @@ entry:
 define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll
index 9235c3f251d41..3586ec64e9dfe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
@@ -416,9 +416,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.f16(
@@ -440,9 +439,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.f16(
@@ -463,9 +461,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.f16(
@@ -487,9 +484,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.f16(
@@ -510,9 +506,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.f16(
@@ -534,9 +529,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.f16(
@@ -557,9 +551,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.f16(
@@ -581,9 +574,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.f16(
@@ -604,9 +596,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.f16(
@@ -628,9 +619,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.f16(
@@ -651,9 +641,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.f32(
@@ -675,9 +664,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.f32(
@@ -698,9 +686,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.f32(
@@ -722,9 +709,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.f32(
@@ -745,9 +731,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.f32(
@@ -769,9 +754,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.f32(
@@ -792,9 +776,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.f32(
@@ -816,9 +799,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.f32(
@@ -987,9 +969,8 @@ entry:
 define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.f16(
@@ -1005,9 +986,8 @@ entry:
 define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.f16(
@@ -1023,9 +1003,8 @@ entry:
 define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.f16(
@@ -1041,9 +1020,8 @@ entry:
 define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.f16(
@@ -1059,9 +1037,8 @@ entry:
 define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.f16(
@@ -1077,9 +1054,8 @@ entry:
 define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.f32(
@@ -1095,9 +1071,8 @@ entry:
 define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.f32(
@@ -1113,9 +1088,8 @@ entry:
 define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.f32(
@@ -1131,9 +1105,8 @@ entry:
 define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll
index ffcfd186c8e10..0de121cb3f002 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
@@ -424,9 +424,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.f16(
 define <vscale x 1 x float>  @intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.f16(
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmacc.mask.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwmacc.mask.nxv1f32.f16(
@@ -472,9 +470,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.f16(
 define <vscale x 2 x float>  @intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.f16(
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmacc.mask.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwmacc.mask.nxv2f32.f16(
@@ -520,9 +516,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.f16(
 define <vscale x 4 x float>  @intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.f16(
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmacc.mask.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwmacc.mask.nxv4f32.f16(
@@ -568,9 +562,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.f16(
 define <vscale x 8 x float>  @intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.f16(
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmacc.mask.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwmacc.mask.nxv8f32.f16(
@@ -616,9 +608,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.f16(
 define <vscale x 16 x float>  @intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.f16(
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmacc.mask.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwmacc.mask.nxv16f32.f16(
@@ -664,9 +654,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.f32(
 define <vscale x 1 x double>  @intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.f32(
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmacc.mask.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwmacc.mask.nxv1f64.f32(
@@ -712,9 +700,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.f32(
 define <vscale x 2 x double>  @intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.f32(
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmacc.mask.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwmacc.mask.nxv2f64.f32(
@@ -760,9 +746,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.f32(
 define <vscale x 4 x double>  @intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.f32(
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmacc.mask.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwmacc.mask.nxv4f64.f32(
@@ -808,9 +792,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.f32(
 define <vscale x 8 x double>  @intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.f32(
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmacc.mask.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwmacc.mask.nxv8f64.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll
index bd2dac9493022..eb21c54c18e90 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
@@ -424,9 +424,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.f16(
 define <vscale x 1 x float>  @intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.f16(
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmacc.mask.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwmacc.mask.nxv1f32.f16(
@@ -472,9 +470,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.f16(
 define <vscale x 2 x float>  @intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.f16(
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmacc.mask.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwmacc.mask.nxv2f32.f16(
@@ -520,9 +516,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.f16(
 define <vscale x 4 x float>  @intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.f16(
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmacc.mask.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwmacc.mask.nxv4f32.f16(
@@ -568,9 +562,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.f16(
 define <vscale x 8 x float>  @intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.f16(
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmacc.mask.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwmacc.mask.nxv8f32.f16(
@@ -616,9 +608,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.f16(
 define <vscale x 16 x float>  @intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.f16(
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmacc.mask.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwmacc.mask.nxv16f32.f16(
@@ -664,9 +654,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.f32(
 define <vscale x 1 x double>  @intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.f32(
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmacc.mask.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwmacc.mask.nxv1f64.f32(
@@ -712,9 +700,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.f32(
 define <vscale x 2 x double>  @intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.f32(
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmacc.mask.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwmacc.mask.nxv2f64.f32(
@@ -760,9 +746,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.f32(
 define <vscale x 4 x double>  @intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.f32(
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmacc.mask.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwmacc.mask.nxv4f64.f32(
@@ -808,9 +792,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.f32(
 define <vscale x 8 x double>  @intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.f32(
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmacc.mask.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwmacc.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwmacc.mask.nxv8f64.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll
index 8035ec77c03ff..82c4fad996e71 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
@@ -424,9 +424,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.f16(
 define <vscale x 1 x float>  @intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.f16(
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.f16(
@@ -472,9 +470,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.f16(
 define <vscale x 2 x float>  @intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.f16(
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.f16(
@@ -520,9 +516,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.f16(
 define <vscale x 4 x float>  @intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.f16(
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.f16(
@@ -568,9 +562,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.f16(
 define <vscale x 8 x float>  @intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.f16(
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.f16(
@@ -616,9 +608,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.f16(
 define <vscale x 16 x float>  @intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.f16(
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.f16(
@@ -664,9 +654,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.f32(
 define <vscale x 1 x double>  @intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.f32(
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmsac.mask.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwmsac.mask.nxv1f64.f32(
@@ -712,9 +700,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.f32(
 define <vscale x 2 x double>  @intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.f32(
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmsac.mask.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwmsac.mask.nxv2f64.f32(
@@ -760,9 +746,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.f32(
 define <vscale x 4 x double>  @intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.f32(
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmsac.mask.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwmsac.mask.nxv4f64.f32(
@@ -808,9 +792,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.f32(
 define <vscale x 8 x double>  @intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.f32(
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmsac.mask.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwmsac.mask.nxv8f64.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll
index 01d2ce92c916c..b2e1e235e9695 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
@@ -424,9 +424,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.f16(
 define <vscale x 1 x float>  @intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.f16(
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.f16(
@@ -472,9 +470,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.f16(
 define <vscale x 2 x float>  @intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.f16(
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.f16(
@@ -520,9 +516,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.f16(
 define <vscale x 4 x float>  @intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.f16(
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.f16(
@@ -568,9 +562,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.f16(
 define <vscale x 8 x float>  @intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.f16(
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.f16(
@@ -616,9 +608,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.f16(
 define <vscale x 16 x float>  @intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.f16(
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.f16(
@@ -664,9 +654,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.f32(
 define <vscale x 1 x double>  @intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.f32(
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmsac.mask.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwmsac.mask.nxv1f64.f32(
@@ -712,9 +700,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.f32(
 define <vscale x 2 x double>  @intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.f32(
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmsac.mask.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwmsac.mask.nxv2f64.f32(
@@ -760,9 +746,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.f32(
 define <vscale x 4 x double>  @intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.f32(
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmsac.mask.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwmsac.mask.nxv4f64.f32(
@@ -808,9 +792,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.f32(
 define <vscale x 8 x double>  @intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.f32(
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmsac.mask.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwmsac.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwmsac.mask.nxv8f64.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll
index 8e40ddc009af6..670c79975a2e0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -423,9 +423,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.f16(
 define <vscale x 1 x float> @intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwmul.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16(
 define <vscale x 1 x float> @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16(
@@ -471,9 +469,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2f16.f16(
 define <vscale x 2 x float> @intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16(
 define <vscale x 2 x float> @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16(
@@ -519,9 +515,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16(
 define <vscale x 4 x float> @intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwmul.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv2r.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16(
 define <vscale x 4 x float> @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16(
@@ -567,9 +561,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16(
 define <vscale x 8 x float> @intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16(
 define <vscale x 8 x float> @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16(
@@ -615,9 +607,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16(
 define <vscale x 16 x float> @intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwmul.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16(
 define <vscale x 16 x float> @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16(
@@ -663,9 +653,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmul.nxv1f64.nxv1f32.f32(
 define <vscale x 1 x double> @intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32(
 define <vscale x 1 x double> @intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32(
@@ -711,9 +699,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32(
 define <vscale x 2 x double> @intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwmul.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv2r.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32(
 define <vscale x 2 x double> @intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32(
@@ -759,9 +745,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32(
 define <vscale x 4 x double> @intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32(
 define <vscale x 4 x double> @intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32(
@@ -807,9 +791,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32(
 define <vscale x 8 x double> @intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwmul.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32(
 define <vscale x 8 x double> @intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll
index 9b5a7b0641806..fc7d8dcb59e31 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -423,9 +423,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.f16(
 define <vscale x 1 x float> @intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwmul.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16(
 define <vscale x 1 x float> @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16(
@@ -471,9 +469,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2f16.f16(
 define <vscale x 2 x float> @intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16(
 define <vscale x 2 x float> @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16(
@@ -519,9 +515,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16(
 define <vscale x 4 x float> @intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwmul.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv2r.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16(
 define <vscale x 4 x float> @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16(
@@ -567,9 +561,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16(
 define <vscale x 8 x float> @intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16(
 define <vscale x 8 x float> @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16(
@@ -615,9 +607,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16(
 define <vscale x 16 x float> @intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwmul.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16(
 define <vscale x 16 x float> @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16(
@@ -663,9 +653,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmul.nxv1f64.nxv1f32.f32(
 define <vscale x 1 x double> @intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32(
 define <vscale x 1 x double> @intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32(
@@ -711,9 +699,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32(
 define <vscale x 2 x double> @intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwmul.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv2r.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32(
 define <vscale x 2 x double> @intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32(
@@ -759,9 +745,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32(
 define <vscale x 4 x double> @intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32(
 define <vscale x 4 x double> @intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32(
@@ -807,9 +791,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32(
 define <vscale x 8 x double> @intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwmul.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32(
 define <vscale x 8 x double> @intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwmul.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll
index 8a5db70f5baf7..02842609f5685 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
@@ -424,9 +424,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.f16(
 define <vscale x 1 x float>  @intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.f16(
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.f16(
@@ -472,9 +470,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.f16(
 define <vscale x 2 x float>  @intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.f16(
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.f16(
@@ -520,9 +516,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.f16(
 define <vscale x 4 x float>  @intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.f16(
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.f16(
@@ -568,9 +562,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.f16(
 define <vscale x 8 x float>  @intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.f16(
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.f16(
@@ -616,9 +608,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.f16(
 define <vscale x 16 x float>  @intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.f16(
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.f16(
@@ -664,9 +654,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.f32(
 define <vscale x 1 x double>  @intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.f32(
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmacc.mask.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.mask.nxv1f64.f32(
@@ -712,9 +700,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.f32(
 define <vscale x 2 x double>  @intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.f32(
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmacc.mask.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.mask.nxv2f64.f32(
@@ -760,9 +746,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.f32(
 define <vscale x 4 x double>  @intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.f32(
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmacc.mask.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.mask.nxv4f64.f32(
@@ -808,9 +792,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.f32(
 define <vscale x 8 x double>  @intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.f32(
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmacc.mask.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.mask.nxv8f64.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll
index 0b21a7aa9b395..ff2b40cfac2cd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
@@ -424,9 +424,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.f16(
 define <vscale x 1 x float>  @intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.f16(
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.f16(
@@ -472,9 +470,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.f16(
 define <vscale x 2 x float>  @intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.f16(
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.f16(
@@ -520,9 +516,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.f16(
 define <vscale x 4 x float>  @intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.f16(
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.f16(
@@ -568,9 +562,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.f16(
 define <vscale x 8 x float>  @intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.f16(
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.f16(
@@ -616,9 +608,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.f16(
 define <vscale x 16 x float>  @intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.f16(
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.f16(
@@ -664,9 +654,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.f32(
 define <vscale x 1 x double>  @intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.f32(
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmacc.mask.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.mask.nxv1f64.f32(
@@ -712,9 +700,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.f32(
 define <vscale x 2 x double>  @intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.f32(
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmacc.mask.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.mask.nxv2f64.f32(
@@ -760,9 +746,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.f32(
 define <vscale x 4 x double>  @intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.f32(
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmacc.mask.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.mask.nxv4f64.f32(
@@ -808,9 +792,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.f32(
 define <vscale x 8 x double>  @intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.f32(
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmacc.mask.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.mask.nxv8f64.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll
index 554234bb68a61..fe9683ed15adb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
@@ -424,9 +424,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.f16(
 define <vscale x 1 x float>  @intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.f16(
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.f16(
@@ -472,9 +470,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.f16(
 define <vscale x 2 x float>  @intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.f16(
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.f16(
@@ -520,9 +516,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.f16(
 define <vscale x 4 x float>  @intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.f16(
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.f16(
@@ -568,9 +562,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.f16(
 define <vscale x 8 x float>  @intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.f16(
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.f16(
@@ -616,9 +608,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.f16(
 define <vscale x 16 x float>  @intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.f16(
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.f16(
@@ -664,9 +654,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.f32(
 define <vscale x 1 x double>  @intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.f32(
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmsac.mask.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.mask.nxv1f64.f32(
@@ -712,9 +700,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.f32(
 define <vscale x 2 x double>  @intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.f32(
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmsac.mask.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.mask.nxv2f64.f32(
@@ -760,9 +746,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.f32(
 define <vscale x 4 x double>  @intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.f32(
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmsac.mask.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.mask.nxv4f64.f32(
@@ -808,9 +792,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.f32(
 define <vscale x 8 x double>  @intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.f32(
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmsac.mask.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.mask.nxv8f64.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll
index 44f2e66113939..2fe370bb1d82f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
@@ -424,9 +424,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.f16(
 define <vscale x 1 x float>  @intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.f16(
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.f16(
@@ -472,9 +470,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.f16(
 define <vscale x 2 x float>  @intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.f16(
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.f16(
@@ -520,9 +516,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.f16(
 define <vscale x 4 x float>  @intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.f16(
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.f16(
@@ -568,9 +562,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.f16(
 define <vscale x 8 x float>  @intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.f16(
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.f16(
@@ -616,9 +608,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.f16(
 define <vscale x 16 x float>  @intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.f16(
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.f16(
@@ -664,9 +654,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.f32(
 define <vscale x 1 x double>  @intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v9
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.f32(
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmsac.mask.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v9, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.mask.nxv1f64.f32(
@@ -712,9 +700,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.f32(
 define <vscale x 2 x double>  @intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v10
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v10
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.f32(
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmsac.mask.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v10, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v10, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.mask.nxv2f64.f32(
@@ -760,9 +746,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.f32(
 define <vscale x 4 x double>  @intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v12
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v12
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.f32(
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmsac.mask.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v12, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v12, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.mask.nxv4f64.f32(
@@ -808,9 +792,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.f32(
 define <vscale x 8 x double>  @intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v16
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.f32(
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmsac.mask.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, ft0, v16, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v16, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.mask.nxv8f64.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll
index 0d7c2e7fe0867..e81121f848ddb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -423,9 +423,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.f16(
 define <vscale x 1 x float> @intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwsub.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16(
 define <vscale x 1 x float> @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16(
@@ -471,9 +469,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2f16.f16(
 define <vscale x 2 x float> @intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16(
 define <vscale x 2 x float> @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16(
@@ -519,9 +515,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16(
 define <vscale x 4 x float> @intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwsub.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv2r.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16(
 define <vscale x 4 x float> @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16(
@@ -567,9 +561,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16(
 define <vscale x 8 x float> @intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16(
 define <vscale x 8 x float> @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16(
@@ -615,9 +607,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16(
 define <vscale x 16 x float> @intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwsub.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16(
 define <vscale x 16 x float> @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16(
@@ -663,9 +653,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwsub.nxv1f64.nxv1f32.f32(
 define <vscale x 1 x double> @intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32(
 define <vscale x 1 x double> @intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32(
@@ -711,9 +699,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32(
 define <vscale x 2 x double> @intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwsub.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv2r.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32(
 define <vscale x 2 x double> @intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32(
@@ -759,9 +745,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32(
 define <vscale x 4 x double> @intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32(
 define <vscale x 4 x double> @intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32(
@@ -807,9 +791,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32(
 define <vscale x 8 x double> @intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwsub.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32(
 define <vscale x 8 x double> @intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll
index cdc77ffc55f8a..d4b0780f03c57 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
@@ -423,9 +423,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.f16(
 define <vscale x 1 x float> @intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwsub.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -448,9 +447,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16(
 define <vscale x 1 x float> @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16(
@@ -471,9 +469,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2f16.f16(
 define <vscale x 2 x float> @intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -496,9 +493,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16(
 define <vscale x 2 x float> @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16(
@@ -519,9 +515,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16(
 define <vscale x 4 x float> @intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwsub.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv2r.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -544,9 +539,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16(
 define <vscale x 4 x float> @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16(
@@ -567,9 +561,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16(
 define <vscale x 8 x float> @intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -592,9 +585,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16(
 define <vscale x 8 x float> @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16(
@@ -615,9 +607,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16(
 define <vscale x 16 x float> @intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwsub.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -640,9 +631,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16(
 define <vscale x 16 x float> @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16(
@@ -663,9 +653,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwsub.nxv1f64.nxv1f32.f32(
 define <vscale x 1 x double> @intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v9, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v9, v8, fa0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
 entry:
@@ -688,9 +677,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32(
 define <vscale x 1 x double> @intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32(
@@ -711,9 +699,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32(
 define <vscale x 2 x double> @intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vf v10, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwsub.vf v10, v8, fa0
 ; CHECK-NEXT:    vmv2r.v v8, v10
 ; CHECK-NEXT:    ret
 entry:
@@ -736,9 +723,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32(
 define <vscale x 2 x double> @intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32(
@@ -759,9 +745,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32(
 define <vscale x 4 x double> @intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v12, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v12, v8, fa0
 ; CHECK-NEXT:    vmv4r.v v8, v12
 ; CHECK-NEXT:    ret
 entry:
@@ -784,9 +769,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32(
 define <vscale x 4 x double> @intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32(
@@ -807,9 +791,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32(
 define <vscale x 8 x double> @intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v16, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwsub.vf v16, v8, fa0
 ; CHECK-NEXT:    vmv8r.v v8, v16
 ; CHECK-NEXT:    ret
 entry:
@@ -832,9 +815,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32(
 define <vscale x 8 x double> @intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwsub.vf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll
index 3234dec9db144..da2290be93d27 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=ilp32d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
@@ -416,9 +416,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.f16(
@@ -440,9 +439,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.f16(
@@ -463,9 +461,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.f16(
@@ -487,9 +484,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.f16(
@@ -510,9 +506,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.f16(
@@ -534,9 +529,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.f16(
@@ -557,9 +551,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.f16(
@@ -581,9 +574,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.f16(
@@ -604,9 +596,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.f16(
@@ -628,9 +619,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.f16(
@@ -651,9 +641,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.f32(
@@ -675,9 +664,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.f32(
@@ -698,9 +686,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.f32(
@@ -722,9 +709,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.f32(
@@ -745,9 +731,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.f32(
@@ -769,9 +754,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.f32(
@@ -792,9 +776,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, i32 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.f32(
@@ -816,9 +799,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.f32(
@@ -987,9 +969,8 @@ entry:
 define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.f16(
@@ -1005,9 +986,8 @@ entry:
 define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.f16(
@@ -1023,9 +1003,8 @@ entry:
 define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.f16(
@@ -1041,9 +1020,8 @@ entry:
 define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.f16(
@@ -1059,9 +1037,8 @@ entry:
 define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.f16(
@@ -1077,9 +1054,8 @@ entry:
 define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.f32(
@@ -1095,9 +1071,8 @@ entry:
 define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.f32(
@@ -1113,9 +1088,8 @@ entry:
 define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.f32(
@@ -1131,9 +1105,8 @@ entry:
 define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.f32(
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll
index 545bc4e4a53de..ec0bd527dafe6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN:   -target-abi=lp64d < %s | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
@@ -416,9 +416,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.f16(
@@ -440,9 +439,8 @@ declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.f16(
 define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.f16(
@@ -463,9 +461,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.f16(
@@ -487,9 +484,8 @@ declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.f16(
 define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.f16(
@@ -510,9 +506,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.f16(
@@ -534,9 +529,8 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.f16(
 define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.f16(
@@ -557,9 +551,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.f16(
@@ -581,9 +574,8 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.f16(
 define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.f16(
@@ -604,9 +596,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.f16(
@@ -628,9 +619,8 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.f16(
 define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.f16(
@@ -651,9 +641,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.f32(
@@ -675,9 +664,8 @@ declare <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.f32(
 define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v9, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v9, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.f32(
@@ -698,9 +686,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.f32(
@@ -722,9 +709,8 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.f32(
 define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v10, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v10, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.f32(
@@ -745,9 +731,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.f32(
@@ -769,9 +754,8 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.f32(
 define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v12, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v12, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.f32(
@@ -792,9 +776,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, i64 %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.f32(
@@ -816,9 +799,8 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.f32(
 define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v16, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v16, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.f32(
@@ -987,9 +969,8 @@ entry:
 define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.f16(
@@ -1005,9 +986,8 @@ entry:
 define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.f16(
@@ -1023,9 +1003,8 @@ entry:
 define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.f16(
@@ -1041,9 +1020,8 @@ entry:
 define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.f16(
@@ -1059,9 +1037,8 @@ entry:
 define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.h.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.f16(
@@ -1077,9 +1054,8 @@ entry:
 define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.f32(
@@ -1095,9 +1071,8 @@ entry:
 define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.f32(
@@ -1113,9 +1088,8 @@ entry:
 define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.f32(
@@ -1131,9 +1105,8 @@ entry:
 define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    fmv.w.x ft0, a0
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, ft0, v0.t
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
+; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.f32(

From 3dc6fd5151355c309f0c4595b63268138ac57910 Mon Sep 17 00:00:00 2001
From: Alex Brachet <abrachet@google.com>
Date: Sun, 23 Jan 2022 09:44:09 +0000
Subject: [PATCH 271/946] [llvm-objcopy][MachO] Implement --update-section

Implements `--update-section` which is currently supported for ELF for Mach-O as well

Reviewed By: alexander-shaposhnikov

Differential Revision: https://reviews.llvm.org/D117281
---
 .../llvm-objcopy/MachO/update-section.test    | 115 ++++++++++++++++++
 .../tools/llvm-objcopy/MachO/MachOObjcopy.cpp |  56 +++++++++
 2 files changed, 171 insertions(+)
 create mode 100644 llvm/test/tools/llvm-objcopy/MachO/update-section.test

diff --git a/llvm/test/tools/llvm-objcopy/MachO/update-section.test b/llvm/test/tools/llvm-objcopy/MachO/update-section.test
new file mode 100644
index 0000000000000..a4fa5423ee52b
--- /dev/null
+++ b/llvm/test/tools/llvm-objcopy/MachO/update-section.test
@@ -0,0 +1,115 @@
+# RUN: echo -n AAAB > %t.diff
+# RUN: echo -n AAA > %t.smaller
+# RUN: echo -n AAAAAAAAA > %t.larger
+
+# RUN: yaml2obj --docnum=1 %s -o %t
+
+# RUN: llvm-objcopy --update-section __TEXT,__text=%t.diff %t - | obj2yaml | FileCheck %s
+# CHECK: content: '41414142'
+
+# RUN: llvm-objcopy --update-section __TEXT,__text=%t.smaller %t - | obj2yaml | FileCheck %s --check-prefix=SMALLER
+# SMALLER: content: '414141'
+
+# RUN: not llvm-objcopy --update-section __TEXT,__text=%t.larger %t /dev/null 2>&1 | FileCheck %s --check-prefix=TOO-LARGE
+# TOO-LARGE: error: {{.*}}new section cannot be larger than previous section
+
+# RUN: not llvm-objcopy --update-section __TEXT,__text=%t.noexist %t /dev/null
+
+# RUN: not llvm-objcopy --update-section __NOEXIST,__text=%t.diff %t /dev/null 2>&1 | FileCheck %s --check-prefix=NO-SEGMENT
+# NO-SEGMENT: error: {{.*}}could not find segment with name '__NOEXIST'
+
+# RUN: not llvm-objcopy --update-section __TEXT,__noexist=%t.diff %t /dev/null 2>&1 | FileCheck %s --check-prefix=NO-SECTION
+# NO-SECTION: error: {{.*}}could not find section with name '__noexist'
+
+# RUN: yaml2obj --docnum=2 %s -o %t
+
+# RUN: llvm-objcopy --update-section __TEXT,__text=%t.diff %t - | obj2yaml | FileCheck %s --check-prefix=FULL-SECNAME
+# FULL-SECNAME: content: '41414142'
+
+# RUN: not llvm-objcopy --update-section __text=%t.dff %t /dev/null 2>&1 | FileCheck %s --check-prefix=NON-CANONICAL-SECNAME
+# NON-CANONICAL-SECNAME: error: {{.*}}invalid section name '__text' (should be formatted as '<segment name>,<section name>')
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x01000007
+  cpusubtype:      0x00000003
+  filetype:        0x00000001
+  ncmds:           1
+  sizeofcmds:      152
+  flags:           0x00002000
+  reserved:        0x00000000
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         __TEXT
+    vmaddr:          0
+    vmsize:          4
+    fileoff:         184
+    filesize:        4
+    maxprot:         7
+    initprot:        7
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0000000000000000
+        content:         '41414141'
+        size:            4
+        offset:          184
+        align:           0
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x01000007
+  cpusubtype:      0x00000003
+  filetype:        0x00000001
+  ncmds:           1
+  sizeofcmds:      312
+  flags:           0x00002000
+  reserved:        0x00000000
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         312
+    segname:         '__TEXT'
+    vmaddr:          0
+    vmsize:          12
+    fileoff:         344
+    filesize:        12
+    maxprot:         7
+    initprot:        7
+    nsects:          3
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0000000000000000
+        content:         'AABBCCDD'
+        size:            4
+        offset:          344
+        align:           0
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+      - sectname:        __text
+        segname:         __TEXT2
+        addr:            0x0000000000000004
+        content:         ''
+        size:            0
+        offset:          348
+        align:           0
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x00000000
+        reserved1:       0x00000000
+        reserved2:       0x00000000
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
index 915394b65b12d..0f92ca516bef7 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOObjcopy.cpp
@@ -317,6 +317,52 @@ static Error addSection(StringRef SecName, StringRef Filename, Object &Obj) {
   return Error::success();
 }
 
+static Expected<Section &> findSection(StringRef SecName, Object &O) {
+  StringRef SegName;
+  std::tie(SegName, SecName) = SecName.split(",");
+  auto FoundSeg =
+      llvm::find_if(O.LoadCommands, [SegName](const LoadCommand &LC) {
+        return LC.getSegmentName() == SegName;
+      });
+  if (FoundSeg == O.LoadCommands.end())
+    return createStringError(errc::invalid_argument,
+                             "could not find segment with name '%s'",
+                             SegName.str().c_str());
+  auto FoundSec = llvm::find_if(FoundSeg->Sections,
+                                [SecName](const std::unique_ptr<Section> &Sec) {
+                                  return Sec->Sectname == SecName;
+                                });
+  if (FoundSec == FoundSeg->Sections.end())
+    return createStringError(errc::invalid_argument,
+                             "could not find section with name '%s'",
+                             SecName.str().c_str());
+
+  assert(FoundSec->get()->CanonicalName == (SegName + "," + SecName).str());
+  return *FoundSec->get();
+}
+
+static Error updateSection(StringRef SecName, StringRef Filename, Object &O) {
+  Expected<Section &> SecToUpdateOrErr = findSection(SecName, O);
+
+  if (!SecToUpdateOrErr)
+    return SecToUpdateOrErr.takeError();
+  Section &Sec = *SecToUpdateOrErr;
+
+  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+      MemoryBuffer::getFile(Filename);
+  if (!BufOrErr)
+    return createFileError(Filename, errorCodeToError(BufOrErr.getError()));
+  std::unique_ptr<MemoryBuffer> Buf = std::move(*BufOrErr);
+
+  if (Buf->getBufferSize() > Sec.Size)
+    return createStringError(
+        errc::invalid_argument,
+        "new section cannot be larger than previous section");
+  Sec.Content = O.NewSectionsContents.save(Buf->getBuffer());
+  Sec.Size = Sec.Content.size();
+  return Error::success();
+}
+
 // isValidMachOCannonicalName returns success if Name is a MachO cannonical name
 // ("<segment>,<section>") and lengths of both segment and section names are
 // valid.
@@ -374,6 +420,16 @@ static Error handleArgs(const CommonConfig &Config,
       return E;
   }
 
+  for (const auto &Flag : Config.UpdateSection) {
+    StringRef SectionName;
+    StringRef FileName;
+    std::tie(SectionName, FileName) = Flag.split('=');
+    if (Error E = isValidMachOCannonicalName(SectionName))
+      return E;
+    if (Error E = updateSection(SectionName, FileName, Obj))
+      return E;
+  }
+
   if (Error E = processLoadCommands(MachOConfig, Obj))
     return E;
 

From a4f202549208b543ec4475c3210ecdbf84128776 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 11:34:25 +0000
Subject: [PATCH 272/946] [X86] Regenerate avx512-mask-op.ll

Noticed on D86578 - several of the test cases were missing checks as they didn't start on a newline so the update script couldn't see them
---
 llvm/test/CodeGen/X86/avx512-mask-op.ll | 112 ++++++++++++++++++++++--
 1 file changed, 106 insertions(+), 6 deletions(-)

diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index 959ee566cfcfc..b93fcce26fbc6 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -1,8 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512BW
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq  | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512DQ
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,KNL
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefixes=CHECK,SKX
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512bw  | FileCheck %s --check-prefixes=CHECK,AVX512BW
+; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=x86_64-apple-darwin -mattr=+avx512dq  | FileCheck %s --check-prefixes=CHECK,AVX512DQ
 ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=X86
 
 
@@ -596,7 +596,22 @@ define <2 x i64> @test5(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64> %y1
   %res = icmp slt <2 x i1>%x_gt_y, %x1_gt_y1
   %resse = sext <2 x i1>%res to <2 x i64>
   ret <2 x i64> %resse
-}define void @test6(<16 x i1> %mask)  {
+}
+
+define void @test6(<16 x i1> %mask)  {
+; CHECK-LABEL: test6:
+; CHECK:       ## %bb.0: ## %allocas
+; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
+; CHECK-NEXT:    vpmovmskb %xmm0, %eax
+; CHECK-NEXT:    testl $21845, %eax ## imm = 0x5555
+; CHECK-NEXT:    retq
+;
+; X86-LABEL: test6:
+; X86:       ## %bb.0: ## %allocas
+; X86-NEXT:    vpsllw $7, %xmm0, %xmm0
+; X86-NEXT:    vpmovmskb %xmm0, %eax
+; X86-NEXT:    testl $21845, %eax ## imm = 0x5555
+; X86-NEXT:    retl
 allocas:
   %a= and <16 x i1> %mask, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
   %b = bitcast <16 x i1> %a to i16
@@ -609,6 +624,7 @@ true:
 false:
   ret void
 }
+
 define void @test7(<8 x i1> %mask)  {
 ; KNL-LABEL: test7:
 ; KNL:       ## %bb.0: ## %allocas
@@ -666,6 +682,7 @@ true:
 false:
   ret void
 }
+
 define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
 ; KNL-LABEL: test8:
 ; KNL:       ## %bb.0:
@@ -756,6 +773,7 @@ define <16 x i8> @test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) {
   %res = sext <16 x i1> %mix to <16 x i8>
   ret <16 x i8> %res
 }
+
 define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
 ; KNL-LABEL: test9:
 ; KNL:       ## %bb.0:
@@ -838,7 +856,89 @@ define <16 x i1> @test9(<16 x i1>%a, <16 x i1>%b, i32 %a1, i32 %b1) {
   %mask = icmp sgt i32 %a1, %b1
   %c = select i1 %mask, <16 x i1>%a, <16 x i1>%b
   ret <16 x i1>%c
-}define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
+}
+
+define <8 x i1> @test10(<8 x i1>%a, <8 x i1>%b, i32 %a1, i32 %b1) {
+; KNL-LABEL: test10:
+; KNL:       ## %bb.0:
+; KNL-NEXT:    cmpl %esi, %edi
+; KNL-NEXT:    jg LBB19_1
+; KNL-NEXT:  ## %bb.2:
+; KNL-NEXT:    vpmovsxwq %xmm1, %zmm0
+; KNL-NEXT:    jmp LBB19_3
+; KNL-NEXT:  LBB19_1:
+; KNL-NEXT:    vpmovsxwq %xmm0, %zmm0
+; KNL-NEXT:  LBB19_3:
+; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
+; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k1
+; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; KNL-NEXT:    vpmovdw %zmm0, %ymm0
+; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
+; KNL-NEXT:    vzeroupper
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test10:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    cmpl %esi, %edi
+; SKX-NEXT:    jg LBB19_1
+; SKX-NEXT:  ## %bb.2:
+; SKX-NEXT:    vpsllw $15, %xmm1, %xmm0
+; SKX-NEXT:    jmp LBB19_3
+; SKX-NEXT:  LBB19_1:
+; SKX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; SKX-NEXT:  LBB19_3:
+; SKX-NEXT:    vpmovw2m %xmm0, %k0
+; SKX-NEXT:    vpmovm2w %k0, %xmm0
+; SKX-NEXT:    retq
+;
+; AVX512BW-LABEL: test10:
+; AVX512BW:       ## %bb.0:
+; AVX512BW-NEXT:    cmpl %esi, %edi
+; AVX512BW-NEXT:    jg LBB19_1
+; AVX512BW-NEXT:  ## %bb.2:
+; AVX512BW-NEXT:    vpsllw $15, %xmm1, %xmm0
+; AVX512BW-NEXT:    jmp LBB19_3
+; AVX512BW-NEXT:  LBB19_1:
+; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512BW-NEXT:  LBB19_3:
+; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
+; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0
+; AVX512BW-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512DQ-LABEL: test10:
+; AVX512DQ:       ## %bb.0:
+; AVX512DQ-NEXT:    cmpl %esi, %edi
+; AVX512DQ-NEXT:    jg LBB19_1
+; AVX512DQ-NEXT:  ## %bb.2:
+; AVX512DQ-NEXT:    vpmovsxwq %xmm1, %zmm0
+; AVX512DQ-NEXT:    jmp LBB19_3
+; AVX512DQ-NEXT:  LBB19_1:
+; AVX512DQ-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512DQ-NEXT:  LBB19_3:
+; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
+; AVX512DQ-NEXT:    vpmovq2m %zmm0, %k0
+; AVX512DQ-NEXT:    vpmovm2d %k0, %zmm0
+; AVX512DQ-NEXT:    vpmovdw %zmm0, %ymm0
+; AVX512DQ-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512DQ-NEXT:    vzeroupper
+; AVX512DQ-NEXT:    retq
+;
+; X86-LABEL: test10:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    jg LBB19_1
+; X86-NEXT:  ## %bb.2:
+; X86-NEXT:    vpsllw $15, %xmm1, %xmm0
+; X86-NEXT:    jmp LBB19_3
+; X86-NEXT:  LBB19_1:
+; X86-NEXT:    vpsllw $15, %xmm0, %xmm0
+; X86-NEXT:  LBB19_3:
+; X86-NEXT:    vpmovw2m %xmm0, %k0
+; X86-NEXT:    vpmovm2w %k0, %xmm0
+; X86-NEXT:    retl
   %mask = icmp sgt i32 %a1, %b1
   %c = select i1 %mask, <8 x i1>%a, <8 x i1>%b
   ret <8 x i1>%c

From ff05b93a02d18c3b388658de2186f570d725ec71 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 12:45:12 +0000
Subject: [PATCH 273/946] [llvm-objdump] Use cast<> instead of dyn_cast<> to
 avoid dereference of nullptr

The pointers are always dereferenced immediately, so assert the cast is correct instead of returning nullptr
---
 llvm/tools/llvm-objdump/MachODump.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp
index 193ba2da19410..31867625f0e5e 100644
--- a/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/llvm/tools/llvm-objdump/MachODump.cpp
@@ -10231,12 +10231,12 @@ static void PrintMachHeader(const MachOObjectFile *Obj, bool verbose) {
 }
 
 void objdump::printMachOFileHeader(const object::ObjectFile *Obj) {
-  const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj);
+  const MachOObjectFile *file = cast<const MachOObjectFile>(Obj);
   PrintMachHeader(file, Verbose);
 }
 
 void objdump::printMachOLoadCommands(const object::ObjectFile *Obj) {
-  const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj);
+  const MachOObjectFile *file = cast<const MachOObjectFile>(Obj);
   uint32_t filetype = 0;
   uint32_t cputype = 0;
   if (file->is64Bit()) {

From 20d46fbd4a51a0b80731268f8d72b62e87ead915 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 12:47:52 +0000
Subject: [PATCH 274/946] [CodeGenPrepare] Use dyn_cast result to check for
 null pointers

Simplifies logic and helps the static analyzer correctly check for nullptr dereferences
---
 llvm/lib/CodeGen/CodeGenPrepare.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 747f4e4fdecca..28f24e5ea9088 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -4168,11 +4168,11 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
 
   // We can get through binary operator, if it is legal. In other words, the
   // binary operator must have a nuw or nsw flag.
-  const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
-  if (isa_and_nonnull<OverflowingBinaryOperator>(BinOp) &&
-      ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
-       (IsSExt && BinOp->hasNoSignedWrap())))
-    return true;
+  if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
+    if (isa<OverflowingBinaryOperator>(BinOp) &&
+        ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
+         (IsSExt && BinOp->hasNoSignedWrap())))
+      return true;
 
   // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
   if ((Inst->getOpcode() == Instruction::And ||
@@ -4181,10 +4181,10 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
 
   // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
   if (Inst->getOpcode() == Instruction::Xor) {
-    const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
     // Make sure it is not a NOT.
-    if (Cst && !Cst->getValue().isAllOnes())
-      return true;
+    if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
+      if (!Cst->getValue().isAllOnes())
+        return true;
   }
 
   // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))

From 946f29028e063c768dc9b8c6e87c2eb584993df6 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 12:50:12 +0000
Subject: [PATCH 275/946] [llvm-objdump] Use cast<> instead of dyn_cast<> to
 avoid dereference of nullptr

The pointer is dereferenced immediately, so assert the cast is correct instead of returning nullptr
---
 llvm/tools/llvm-objdump/WasmDump.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-objdump/WasmDump.cpp b/llvm/tools/llvm-objdump/WasmDump.cpp
index 28311361d97e9..df0a08e5b1dd0 100644
--- a/llvm/tools/llvm-objdump/WasmDump.cpp
+++ b/llvm/tools/llvm-objdump/WasmDump.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
 using namespace llvm::object;
 
 void objdump::printWasmFileHeader(const object::ObjectFile *Obj) {
-  const auto *File = dyn_cast<const WasmObjectFile>(Obj);
+  const auto *File = cast<const WasmObjectFile>(Obj);
 
   outs() << "Program Header:\n";
   outs() << "Version: 0x";

From 86497026a266f153d1c2b823fe7758acc4ab959d Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 12:57:12 +0000
Subject: [PATCH 276/946] [clang-tidy] Use cast<>/castAs<> instead of
 dyn_cast<>/getAs<> to avoid dereference of nullptr

The pointer is dereferenced immediately, so assert the cast is correct instead of returning nullptr
---
 .../clang-tidy/abseil/DurationFactoryScaleCheck.cpp           | 2 +-
 .../clang-tidy/readability/SuspiciousCallArgumentCheck.cpp    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp
index c9f3a7db03461..dbc3cf2e6128f 100644
--- a/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp
+++ b/clang-tools-extra/clang-tidy/abseil/DurationFactoryScaleCheck.cpp
@@ -192,7 +192,7 @@ void DurationFactoryScaleCheck::check(const MatchFinder::MatchResult &Result) {
                  Result.Nodes.getNodeAs<BinaryOperator>("div_binop")) {
     // We next handle division.
     // For division, we only check the RHS.
-    const auto *FloatLit = llvm::dyn_cast<FloatingLiteral>(DivBinOp->getRHS());
+    const auto *FloatLit = llvm::cast<FloatingLiteral>(DivBinOp->getRHS());
 
     llvm::Optional<DurationScale> NewScale =
         getNewScale(Scale, 1.0 / FloatLit->getValueAsApproximateDouble());
diff --git a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp
index 6ef10925c1336..4d7c3451acc7a 100644
--- a/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/SuspiciousCallArgumentCheck.cpp
@@ -413,9 +413,9 @@ static bool areTypesCompatible(QualType ArgType, QualType ParamType,
   // Arithmetic types are interconvertible, except scoped enums.
   if (ParamType->isArithmeticType() && ArgType->isArithmeticType()) {
     if ((ParamType->isEnumeralType() &&
-         ParamType->getAs<EnumType>()->getDecl()->isScoped()) ||
+         ParamType->castAs<EnumType>()->getDecl()->isScoped()) ||
         (ArgType->isEnumeralType() &&
-         ArgType->getAs<EnumType>()->getDecl()->isScoped()))
+         ArgType->castAs<EnumType>()->getDecl()->isScoped()))
       return false;
 
     return true;

From df0fd1c301d6a17c1cdeea1f19154e60a5b29f47 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 13:24:36 +0000
Subject: [PATCH 277/946] [clangd] Use castAs<> instead of getAs<> to avoid
 dereference of nullptr

The pointer is dereferenced immediately, so assert the cast is correct instead of returning nullptr
---
 clang-tools-extra/clangd/HeuristicResolver.cpp | 5 ++---
 clang-tools-extra/clangd/Hover.cpp             | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clangd/HeuristicResolver.cpp b/clang-tools-extra/clangd/HeuristicResolver.cpp
index 2505280ffa9aa..37e8f134efdfc 100644
--- a/clang-tools-extra/clangd/HeuristicResolver.cpp
+++ b/clang-tools-extra/clangd/HeuristicResolver.cpp
@@ -59,9 +59,8 @@ const Type *HeuristicResolver::getPointeeType(const Type *T) const {
   if (!T)
     return nullptr;
 
-  if (T->isPointerType()) {
-    return T->getAs<PointerType>()->getPointeeType().getTypePtrOrNull();
-  }
+  if (T->isPointerType())
+    return T->castAs<PointerType>()->getPointeeType().getTypePtrOrNull();
 
   // Try to handle smart pointer types.
 
diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp
index 1449faec559cd..58ef2e3feb99d 100644
--- a/clang-tools-extra/clangd/Hover.cpp
+++ b/clang-tools-extra/clangd/Hover.cpp
@@ -147,7 +147,7 @@ HoverInfo::PrintedType printType(QualType QT, ASTContext &ASTCtx,
   // FIXME: This doesn't handle composite types that contain a decltype in them.
   // We should rather have a printing policy for that.
   while (!QT.isNull() && QT->isDecltypeType())
-    QT = QT->getAs<DecltypeType>()->getUnderlyingType();
+    QT = QT->castAs<DecltypeType>()->getUnderlyingType();
   HoverInfo::PrintedType Result;
   llvm::raw_string_ostream OS(Result.Type);
   // Special case: if the outer type is a tag type without qualifiers, then

From c93491352cf3146559de7755283f0dd259392126 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 15:10:33 +0000
Subject: [PATCH 278/946] [lldb] CxxModuleHandler - use cast<> instead of
 dyn_cast<> to avoid dereference of nullptr

The pointer is dereferenced immediately, so assert the cast is correct instead of returning nullptr
---
 lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp b/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp
index 74dd04600b4be..fecffd1183f89 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp
+++ b/lldb/source/Plugins/ExpressionParser/Clang/CxxModuleHandler.cpp
@@ -138,7 +138,7 @@ getEqualLocalDeclContext(Sema &sema, DeclContext *foreign_ctxt) {
 
   // We currently only support building namespaces.
   if (foreign_ctxt->isNamespace()) {
-    NamedDecl *ns = llvm::dyn_cast<NamedDecl>(foreign_ctxt);
+    NamedDecl *ns = llvm::cast<NamedDecl>(foreign_ctxt);
     llvm::StringRef ns_name = ns->getName();
 
     auto lookup_result = emulateLookupInCtxt(sema, ns_name, *parent);

From d7aa402b4b8a325a68c20d0300ac6bc664766be0 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 15:11:08 +0000
Subject: [PATCH 279/946] [lldb] PdbAstBuilder - use cast<> instead of
 dyn_cast<> to avoid dereference of nullptr

The pointers are dereferenced immediately, so assert the cast is correct instead of returning nullptr
---
 lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
index 9473befa6cc34..dc0969a0ce7c6 100644
--- a/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
+++ b/lldb/source/Plugins/SymbolFile/NativePDB/PdbAstBuilder.cpp
@@ -1081,7 +1081,7 @@ PdbAstBuilder::GetOrCreateFunctionDecl(PdbCompilandSymId func_id) {
 
   clang::FunctionDecl *function_decl = nullptr;
   if (parent->isRecord()) {
-    clang::QualType parent_qt = llvm::dyn_cast<clang::TypeDecl>(parent)
+    clang::QualType parent_qt = llvm::cast<clang::TypeDecl>(parent)
                                     ->getTypeForDecl()
                                     ->getCanonicalTypeInternal();
     lldb::opaque_compiler_type_t parent_opaque_ty =
@@ -1318,7 +1318,7 @@ void PdbAstBuilder::ParseAllNamespacesPlusChildrenOf(
     if (!context->isNamespace())
       continue;
 
-    clang::NamespaceDecl *ns = llvm::dyn_cast<clang::NamespaceDecl>(context);
+    clang::NamespaceDecl *ns = llvm::cast<clang::NamespaceDecl>(context);
     std::string actual_ns = ns->getQualifiedNameAsString();
     if (llvm::StringRef(actual_ns).startswith(*parent)) {
       clang::QualType qt = GetOrCreateType(tid);

From d13847bbe5e632ec8f62abc81f74b9351a56d28c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 15:12:44 +0000
Subject: [PATCH 280/946] [lldb] TerminalState::Save - fix unused variable
 warning

Non-POSIX target builds don't use the file descriptor
---
 lldb/source/Host/common/Terminal.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Host/common/Terminal.cpp b/lldb/source/Host/common/Terminal.cpp
index 2a1c12e667bcf..831e9dff4eb18 100644
--- a/lldb/source/Host/common/Terminal.cpp
+++ b/lldb/source/Host/common/Terminal.cpp
@@ -417,8 +417,8 @@ bool TerminalState::Save(Terminal term, bool save_process_group) {
   Clear();
   m_tty = term;
   if (m_tty.IsATerminal()) {
-    int fd = m_tty.GetFileDescriptor();
 #if LLDB_ENABLE_POSIX
+    int fd = m_tty.GetFileDescriptor();
     m_tflags = ::fcntl(fd, F_GETFL, 0);
 #if LLDB_ENABLE_TERMIOS
     std::unique_ptr<Terminal::Data> new_data{new Terminal::Data()};

From 49d38b1d618c02964af93068ee8e1ac753722104 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 15:14:10 +0000
Subject: [PATCH 281/946] Fix "not all control paths return a value" warning.
 NFC.

---
 lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp
index c8063915b178d..d1d844bb4ca41 100644
--- a/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp
+++ b/lldb/source/Plugins/Language/CPlusPlus/GenericBitset.cpp
@@ -72,6 +72,7 @@ ConstString GenericBitsetFrontEnd::GetDataContainerMemberName() {
   case StdLib::LibStdcpp:
     return ConstString("_M_w");
   }
+  llvm_unreachable("Unknown StdLib enum");
 }
 
 bool GenericBitsetFrontEnd::Update() {

From 938944445a1bad0f4467528015c8737227bbc9a7 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer <arthur.j.odwyer@gmail.com>
Date: Sat, 22 Jan 2022 13:06:05 -0500
Subject: [PATCH 282/946] [libc++] Mark LWG3541 as "Complete". NFC.

Differential Revision: https://reviews.llvm.org/D117956
---
 libcxx/docs/Status/Cxx2bIssues.csv          |  4 ++--
 libcxx/include/__iterator/readable_traits.h | 10 +++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index a91e3b52a1705..27b109eeb5dd9 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -85,7 +85,7 @@
 `3536 <https://wg21.link/LWG3536>`__,"Should ``chrono::from_stream()`` assign zero to duration for failure?","June 2021","","","|chrono|"
 `3539 <https://wg21.link/LWG3539>`__,"``format_to`` must not copy models of ``output_iterator<const charT&>``","June 2021","","","|format|"
 `3540 <https://wg21.link/LWG3540>`__,"§[format.arg] There should be no const in ``basic_format_arg(const T* p)``","June 2021","","","|format|"
-`3541 <https://wg21.link/LWG3541>`__,"``indirectly_readable_traits`` should be SFINAE-friendly for all types","June 2021","","","|ranges|"
+`3541 <https://wg21.link/LWG3541>`__,"``indirectly_readable_traits`` should be SFINAE-friendly for all types","June 2021","|Complete|","14.0","|ranges|"
 `3542 <https://wg21.link/LWG3542>`__,"``basic_format_arg`` mishandles ``basic_string_view`` with custom traits","June 2021","|Complete|","14.0","|format|"
 `3543 <https://wg21.link/LWG3543>`__,"Definition of when ``counted_iterators`` refer to the same sequence isn't quite right","June 2021","","","|ranges|"
 `3544 <https://wg21.link/LWG3544>`__,"``format-arg-store::args`` is unintentionally not exposition-only","June 2021","","","|format|"
@@ -138,4 +138,4 @@
 `3595 <https://wg21.link/LWG3595>`__,"Exposition-only classes proxy and postfix-proxy for ``common_iterator`` should be fully ``constexpr``","October 2021","","","|ranges|"
 "","","","",""
 `3645 <https://wg21.link/LWG3645>`__,"``resize_and_overwrite`` is overspecified to call its callback with lvalues", "Not voted in","|Complete|","14.0",""
-"","","","",""
\ No newline at end of file
+"","","","",""
diff --git a/libcxx/include/__iterator/readable_traits.h b/libcxx/include/__iterator/readable_traits.h
index 90121bea80736..13f323e295ba1 100644
--- a/libcxx/include/__iterator/readable_traits.h
+++ b/libcxx/include/__iterator/readable_traits.h
@@ -57,14 +57,14 @@ template<__has_member_element_type _Tp>
 struct indirectly_readable_traits<_Tp>
   : __cond_value_type<typename _Tp::element_type> {};
 
-// Pre-emptively applies LWG3541
 template<__has_member_value_type _Tp>
-requires __has_member_element_type<_Tp>
+  requires __has_member_element_type<_Tp>
 struct indirectly_readable_traits<_Tp> {};
+
 template<__has_member_value_type _Tp>
-requires __has_member_element_type<_Tp> &&
-         same_as<remove_cv_t<typename _Tp::element_type>,
-                 remove_cv_t<typename _Tp::value_type>>
+  requires __has_member_element_type<_Tp> &&
+           same_as<remove_cv_t<typename _Tp::element_type>,
+                   remove_cv_t<typename _Tp::value_type>>
 struct indirectly_readable_traits<_Tp>
   : __cond_value_type<typename _Tp::value_type> {};
 

From 5d78fef6db15f7ba6642431fa3d07ddeda98d4f5 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer <arthur.j.odwyer@gmail.com>
Date: Sat, 22 Jan 2022 15:24:53 -0500
Subject: [PATCH 283/946] [libc++] Fix LWG3437 "__cpp_lib_polymorphic_allocator
 is in the wrong header"

https://cplusplus.github.io/LWG/issue3437

Differential Revision: https://reviews.llvm.org/D117963
---
 libcxx/docs/Status/Cxx2bIssues.csv            |  2 +-
 libcxx/include/version                        |  2 +-
 .../memory.version.pass.cpp                   | 39 -------------------
 .../generate_feature_test_macro_components.py |  2 +-
 4 files changed, 3 insertions(+), 42 deletions(-)

diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index 27b109eeb5dd9..27bc1e5832967 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -31,7 +31,7 @@
 "`3427 <https://wg21.link/LWG3427>`__","``operator<=>(const shared_ptr<T>&, nullptr_t)`` definition ill-formed","November 2020","","","|spaceship|"
 "`3428 <https://wg21.link/LWG3428>`__","``single_view``'s in place constructor should be explicit","November 2020","","","|ranges|"
 "`3434 <https://wg21.link/LWG3434>`__","``ios_base`` never reclaims memory for iarray and parray","November 2020","",""
-"`3437 <https://wg21.link/LWG3437>`__","``__cpp_lib_polymorphic_allocator`` is in the wrong header","November 2020","",""
+"`3437 <https://wg21.link/LWG3437>`__","``__cpp_lib_polymorphic_allocator`` is in the wrong header","November 2020","|Complete|","14.0"
 "`3446 <https://wg21.link/LWG3446>`__","``indirectly_readable_traits`` ambiguity for types with both ``value_type`` and ``element_type``","November 2020","","","|ranges|"
 "`3448 <https://wg21.link/LWG3448>`__","``transform_view``'s ``sentinel<false>`` not comparable with ``iterator<true>``","November 2020","","","|ranges|"
 "`3449 <https://wg21.link/LWG3449>`__","``take_view`` and ``take_while_view``'s ``sentinel<false>`` not comparable with their ``const iterator``","November 2020","","","|ranges|"
diff --git a/libcxx/include/version b/libcxx/include/version
index 39f1b8eb19ef1..0c23656128536 100644
--- a/libcxx/include/version
+++ b/libcxx/include/version
@@ -128,7 +128,7 @@ __cpp_lib_null_iterators                                201304L <iterator>
 __cpp_lib_optional                                      201606L <optional>
 __cpp_lib_out_ptr                                       202106L <memory>
 __cpp_lib_parallel_algorithm                            201603L <algorithm> <numeric>
-__cpp_lib_polymorphic_allocator                         201902L <memory>
+__cpp_lib_polymorphic_allocator                         201902L <memory_resource>
 __cpp_lib_quoted_string_io                              201304L <iomanip>
 __cpp_lib_ranges                                        201811L <algorithm> <functional> <iterator>
                                                                 <memory> <ranges>
diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp
index 415eaa385767e..20fe2514b4eff 100644
--- a/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/support.limits.general/memory.version.pass.cpp
@@ -26,7 +26,6 @@
     __cpp_lib_enable_shared_from_this             201603L [C++17]
     __cpp_lib_make_unique                         201304L [C++14]
     __cpp_lib_out_ptr                             202106L [C++2b]
-    __cpp_lib_polymorphic_allocator               201902L [C++20]
     __cpp_lib_ranges                              201811L [C++20]
     __cpp_lib_raw_memory_algorithms               201606L [C++17]
     __cpp_lib_shared_ptr_arrays                   201611L [C++17]
@@ -82,10 +81,6 @@
 #   error "__cpp_lib_out_ptr should not be defined before c++2b"
 # endif
 
-# ifdef __cpp_lib_polymorphic_allocator
-#   error "__cpp_lib_polymorphic_allocator should not be defined before c++20"
-# endif
-
 # ifdef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should not be defined before c++20"
 # endif
@@ -159,10 +154,6 @@
 #   error "__cpp_lib_out_ptr should not be defined before c++2b"
 # endif
 
-# ifdef __cpp_lib_polymorphic_allocator
-#   error "__cpp_lib_polymorphic_allocator should not be defined before c++20"
-# endif
-
 # ifdef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should not be defined before c++20"
 # endif
@@ -248,10 +239,6 @@
 #   error "__cpp_lib_out_ptr should not be defined before c++2b"
 # endif
 
-# ifdef __cpp_lib_polymorphic_allocator
-#   error "__cpp_lib_polymorphic_allocator should not be defined before c++20"
-# endif
-
 # ifdef __cpp_lib_ranges
 #   error "__cpp_lib_ranges should not be defined before c++20"
 # endif
@@ -364,19 +351,6 @@
 #   error "__cpp_lib_out_ptr should not be defined before c++2b"
 # endif
 
-# if !defined(_LIBCPP_VERSION)
-#   ifndef __cpp_lib_polymorphic_allocator
-#     error "__cpp_lib_polymorphic_allocator should be defined in c++20"
-#   endif
-#   if __cpp_lib_polymorphic_allocator != 201902L
-#     error "__cpp_lib_polymorphic_allocator should have the value 201902L in c++20"
-#   endif
-# else // _LIBCPP_VERSION
-#   ifdef __cpp_lib_polymorphic_allocator
-#     error "__cpp_lib_polymorphic_allocator should not be defined because it is unimplemented in libc++!"
-#   endif
-# endif
-
 # if !defined(_LIBCPP_VERSION)
 #   ifndef __cpp_lib_ranges
 #     error "__cpp_lib_ranges should be defined in c++20"
@@ -528,19 +502,6 @@
 #   endif
 # endif
 
-# if !defined(_LIBCPP_VERSION)
-#   ifndef __cpp_lib_polymorphic_allocator
-#     error "__cpp_lib_polymorphic_allocator should be defined in c++2b"
-#   endif
-#   if __cpp_lib_polymorphic_allocator != 201902L
-#     error "__cpp_lib_polymorphic_allocator should have the value 201902L in c++2b"
-#   endif
-# else // _LIBCPP_VERSION
-#   ifdef __cpp_lib_polymorphic_allocator
-#     error "__cpp_lib_polymorphic_allocator should not be defined because it is unimplemented in libc++!"
-#   endif
-# endif
-
 # if !defined(_LIBCPP_VERSION)
 #   ifndef __cpp_lib_ranges
 #     error "__cpp_lib_ranges should be defined in c++2b"
diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py
index 7047f3f4b0cc1..7b2fc0b621940 100755
--- a/libcxx/utils/generate_feature_test_macro_components.py
+++ b/libcxx/utils/generate_feature_test_macro_components.py
@@ -508,7 +508,7 @@ def add_version_header(tc):
   }, {
     "name": "__cpp_lib_polymorphic_allocator",
     "values": { "c++20": 201902 },
-    "headers": ["memory"],
+    "headers": ["memory_resource"],
     "unimplemented": True,
   }, {
     "name": "__cpp_lib_quoted_string_io",

From d4ed3eff9f9c4d00689e34712db8ac0ca65ddb26 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 15:34:41 +0000
Subject: [PATCH 284/946] [X86] Add vector signbit parity checks for non-popcnt
 targets

Noticed while looking at D117983 - we miss some parity patterns with/without popcnt
---
 llvm/test/CodeGen/X86/parity-vec.ll | 165 +++++++++++++++++++---------
 1 file changed, 115 insertions(+), 50 deletions(-)

diff --git a/llvm/test/CodeGen/X86/parity-vec.ll b/llvm/test/CodeGen/X86/parity-vec.ll
index e52a32d261ed2..60d9d4be68e79 100644
--- a/llvm/test/CodeGen/X86/parity-vec.ll
+++ b/llvm/test/CodeGen/X86/parity-vec.ll
@@ -1,27 +1,44 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+popcnt,+sse2 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=-popcnt,+sse2 | FileCheck %s --check-prefix=NOPOPCNT
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+popcnt,+sse2 | FileCheck %s --check-prefix=POPCNT
 
 define i1 @noncanonical_parity(<16 x i1> %x) {
-; CHECK-LABEL: noncanonical_parity:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    psllw $7, %xmm0
-; CHECK-NEXT:    pmovmskb %xmm0, %eax
-; CHECK-NEXT:    popcntl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
-; CHECK-NEXT:    # kill: def $al killed $al killed $eax
-; CHECK-NEXT:    retq
+; NOPOPCNT-LABEL: noncanonical_parity:
+; NOPOPCNT:       # %bb.0:
+; NOPOPCNT-NEXT:    psllw $7, %xmm0
+; NOPOPCNT-NEXT:    pmovmskb %xmm0, %eax
+; NOPOPCNT-NEXT:    xorb %ah, %al
+; NOPOPCNT-NEXT:    setnp %al
+; NOPOPCNT-NEXT:    retq
+;
+; POPCNT-LABEL: noncanonical_parity:
+; POPCNT:       # %bb.0:
+; POPCNT-NEXT:    psllw $7, %xmm0
+; POPCNT-NEXT:    pmovmskb %xmm0, %eax
+; POPCNT-NEXT:    popcntl %eax, %eax
+; POPCNT-NEXT:    andl $1, %eax
+; POPCNT-NEXT:    # kill: def $al killed $al killed $eax
+; POPCNT-NEXT:    retq
   %r = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %x)
   ret i1 %r
 }
 define i1 @canonical_parity(<16 x i1> %x) {
-; CHECK-LABEL: canonical_parity:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    psllw $7, %xmm0
-; CHECK-NEXT:    pmovmskb %xmm0, %eax
-; CHECK-NEXT:    popcntl %eax, %eax
-; CHECK-NEXT:    testb $1, %al
-; CHECK-NEXT:    setne %al
-; CHECK-NEXT:    retq
+; NOPOPCNT-LABEL: canonical_parity:
+; NOPOPCNT:       # %bb.0:
+; NOPOPCNT-NEXT:    psllw $7, %xmm0
+; NOPOPCNT-NEXT:    pmovmskb %xmm0, %eax
+; NOPOPCNT-NEXT:    xorb %ah, %al
+; NOPOPCNT-NEXT:    setnp %al
+; NOPOPCNT-NEXT:    retq
+;
+; POPCNT-LABEL: canonical_parity:
+; POPCNT:       # %bb.0:
+; POPCNT-NEXT:    psllw $7, %xmm0
+; POPCNT-NEXT:    pmovmskb %xmm0, %eax
+; POPCNT-NEXT:    popcntl %eax, %eax
+; POPCNT-NEXT:    testb $1, %al
+; POPCNT-NEXT:    setne %al
+; POPCNT-NEXT:    retq
   %i1 = bitcast <16 x i1> %x to i16
   %i2 = call i16 @llvm.ctpop.i16(i16 %i1)
   %i3 = and i16 %i2, 1
@@ -29,13 +46,37 @@ define i1 @canonical_parity(<16 x i1> %x) {
   ret i1 %i4
 }
 define i1 @canonical_parity_noncanonical_pred(<16 x i1> %x) {
-; CHECK-LABEL: canonical_parity_noncanonical_pred:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    psllw $7, %xmm0
-; CHECK-NEXT:    pmovmskb %xmm0, %eax
-; CHECK-NEXT:    popcntl %eax, %eax
-; CHECK-NEXT:    # kill: def $al killed $al killed $eax
-; CHECK-NEXT:    retq
+; NOPOPCNT-LABEL: canonical_parity_noncanonical_pred:
+; NOPOPCNT:       # %bb.0:
+; NOPOPCNT-NEXT:    psllw $7, %xmm0
+; NOPOPCNT-NEXT:    pmovmskb %xmm0, %eax
+; NOPOPCNT-NEXT:    movl %eax, %ecx
+; NOPOPCNT-NEXT:    shrl %ecx
+; NOPOPCNT-NEXT:    andl $21845, %ecx # imm = 0x5555
+; NOPOPCNT-NEXT:    subl %ecx, %eax
+; NOPOPCNT-NEXT:    movl %eax, %ecx
+; NOPOPCNT-NEXT:    andl $13107, %ecx # imm = 0x3333
+; NOPOPCNT-NEXT:    shrl $2, %eax
+; NOPOPCNT-NEXT:    andl $13107, %eax # imm = 0x3333
+; NOPOPCNT-NEXT:    addl %ecx, %eax
+; NOPOPCNT-NEXT:    movl %eax, %ecx
+; NOPOPCNT-NEXT:    shrl $4, %ecx
+; NOPOPCNT-NEXT:    addl %eax, %ecx
+; NOPOPCNT-NEXT:    andl $3855, %ecx # imm = 0xF0F
+; NOPOPCNT-NEXT:    movl %ecx, %eax
+; NOPOPCNT-NEXT:    shll $8, %eax
+; NOPOPCNT-NEXT:    addl %ecx, %eax
+; NOPOPCNT-NEXT:    shrl $8, %eax
+; NOPOPCNT-NEXT:    # kill: def $al killed $al killed $eax
+; NOPOPCNT-NEXT:    retq
+;
+; POPCNT-LABEL: canonical_parity_noncanonical_pred:
+; POPCNT:       # %bb.0:
+; POPCNT-NEXT:    psllw $7, %xmm0
+; POPCNT-NEXT:    pmovmskb %xmm0, %eax
+; POPCNT-NEXT:    popcntl %eax, %eax
+; POPCNT-NEXT:    # kill: def $al killed $al killed $eax
+; POPCNT-NEXT:    retq
   %i1 = bitcast <16 x i1> %x to i16
   %i2 = call i16 @llvm.ctpop.i16(i16 %i1)
   %i3 = and i16 %i2, 1
@@ -44,28 +85,44 @@ define i1 @canonical_parity_noncanonical_pred(<16 x i1> %x) {
 }
 
 define i1 @noncanonical_nonparity(<16 x i1> %x) {
-; CHECK-LABEL: noncanonical_nonparity:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    psllw $7, %xmm0
-; CHECK-NEXT:    pmovmskb %xmm0, %eax
-; CHECK-NEXT:    popcntl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
-; CHECK-NEXT:    xorb $1, %al
-; CHECK-NEXT:    # kill: def $al killed $al killed $eax
-; CHECK-NEXT:    retq
+; NOPOPCNT-LABEL: noncanonical_nonparity:
+; NOPOPCNT:       # %bb.0:
+; NOPOPCNT-NEXT:    psllw $7, %xmm0
+; NOPOPCNT-NEXT:    pmovmskb %xmm0, %eax
+; NOPOPCNT-NEXT:    xorb %ah, %al
+; NOPOPCNT-NEXT:    setp %al
+; NOPOPCNT-NEXT:    retq
+;
+; POPCNT-LABEL: noncanonical_nonparity:
+; POPCNT:       # %bb.0:
+; POPCNT-NEXT:    psllw $7, %xmm0
+; POPCNT-NEXT:    pmovmskb %xmm0, %eax
+; POPCNT-NEXT:    popcntl %eax, %eax
+; POPCNT-NEXT:    andl $1, %eax
+; POPCNT-NEXT:    xorb $1, %al
+; POPCNT-NEXT:    # kill: def $al killed $al killed $eax
+; POPCNT-NEXT:    retq
   %r.inv = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %x)
   %r = xor i1 %r.inv, -1
   ret i1 %r
 }
 define i1 @canonical_nonparity(<16 x i1> %x) {
-; CHECK-LABEL: canonical_nonparity:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    psllw $7, %xmm0
-; CHECK-NEXT:    pmovmskb %xmm0, %eax
-; CHECK-NEXT:    popcntl %eax, %eax
-; CHECK-NEXT:    testb $1, %al
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    retq
+; NOPOPCNT-LABEL: canonical_nonparity:
+; NOPOPCNT:       # %bb.0:
+; NOPOPCNT-NEXT:    psllw $7, %xmm0
+; NOPOPCNT-NEXT:    pmovmskb %xmm0, %eax
+; NOPOPCNT-NEXT:    xorb %ah, %al
+; NOPOPCNT-NEXT:    setp %al
+; NOPOPCNT-NEXT:    retq
+;
+; POPCNT-LABEL: canonical_nonparity:
+; POPCNT:       # %bb.0:
+; POPCNT-NEXT:    psllw $7, %xmm0
+; POPCNT-NEXT:    pmovmskb %xmm0, %eax
+; POPCNT-NEXT:    popcntl %eax, %eax
+; POPCNT-NEXT:    testb $1, %al
+; POPCNT-NEXT:    sete %al
+; POPCNT-NEXT:    retq
   %i1 = bitcast <16 x i1> %x to i16
   %i2 = call i16 @llvm.ctpop.i16(i16 %i1)
   %i3 = and i16 %i2, 1
@@ -73,15 +130,23 @@ define i1 @canonical_nonparity(<16 x i1> %x) {
   ret i1 %i4
 }
 define i1 @canonical_nonparity_noncanonical_pred(<16 x i1> %x) {
-; CHECK-LABEL: canonical_nonparity_noncanonical_pred:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    psllw $7, %xmm0
-; CHECK-NEXT:    pmovmskb %xmm0, %eax
-; CHECK-NEXT:    popcntl %eax, %eax
-; CHECK-NEXT:    andl $1, %eax
-; CHECK-NEXT:    xorb $1, %al
-; CHECK-NEXT:    # kill: def $al killed $al killed $eax
-; CHECK-NEXT:    retq
+; NOPOPCNT-LABEL: canonical_nonparity_noncanonical_pred:
+; NOPOPCNT:       # %bb.0:
+; NOPOPCNT-NEXT:    psllw $7, %xmm0
+; NOPOPCNT-NEXT:    pmovmskb %xmm0, %eax
+; NOPOPCNT-NEXT:    xorb %ah, %al
+; NOPOPCNT-NEXT:    setp %al
+; NOPOPCNT-NEXT:    retq
+;
+; POPCNT-LABEL: canonical_nonparity_noncanonical_pred:
+; POPCNT:       # %bb.0:
+; POPCNT-NEXT:    psllw $7, %xmm0
+; POPCNT-NEXT:    pmovmskb %xmm0, %eax
+; POPCNT-NEXT:    popcntl %eax, %eax
+; POPCNT-NEXT:    andl $1, %eax
+; POPCNT-NEXT:    xorb $1, %al
+; POPCNT-NEXT:    # kill: def $al killed $al killed $eax
+; POPCNT-NEXT:    retq
   %i1 = bitcast <16 x i1> %x to i16
   %i2 = call i16 @llvm.ctpop.i16(i16 %i1)
   %i3 = and i16 %i2, 1

From eb3f20e8fa4b76e0103f15623a2fc3b27fb03f85 Mon Sep 17 00:00:00 2001
From: Carlos Galvez <carlosgalvezp@gmail.com>
Date: Fri, 21 Jan 2022 07:59:27 +0000
Subject: [PATCH 285/946] [clang-tidy] Remove gsl::at suggestion from
 cppcoreguidelines-pro-bounds-constant-array-index

Currently the fix hint is hardcoded to gsl::at(). This poses
a problem for people who, for a number of reasons, don't want
or cannot use the GSL library (introducing a new third-party
dependency into a project is not a minor task).

In these situations, the fix hint does more harm than good
as it creates confusion as to what the fix should be. People
can even misinterpret the fix "gsl::at" as e.g. "std::array::at",
which can lead to even more trouble (e.g. when having guidelines
that disallow exceptions).

Furthermore, this is not a requirement from the C++ Core Guidelines.
simply that array indexing needs to be safe. Each project should
be able to decide upon a strategy for safe indexing.

The fix-it is kept for people who want to use the GSL library.

Differential Revision: https://reviews.llvm.org/D117857
---
 .../cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp  | 3 +--
 clang-tools-extra/docs/ReleaseNotes.rst                     | 6 ++++++
 .../cppcoreguidelines-pro-bounds-constant-array-index.rst   | 2 ++
 ...guidelines-pro-bounds-constant-array-index-gslheader.cpp | 6 +++---
 .../cppcoreguidelines-pro-bounds-constant-array-index.cpp   | 6 +++---
 5 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp
index 9cbe1fa65c139..59886ee4a3ebc 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp
@@ -76,8 +76,7 @@ void ProBoundsConstantArrayIndexCheck::check(
 
     auto Diag = diag(Matched->getExprLoc(),
                      "do not use array subscript when the index is "
-                     "not an integer constant expression; use gsl::at() "
-                     "instead");
+                     "not an integer constant expression");
     if (!GslHeader.empty()) {
       Diag << FixItHint::CreateInsertion(BaseRange.getBegin(), "gsl::at(")
            << FixItHint::CreateReplacement(
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index e3c1c4b9411bb..1f7228d5732bd 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -159,6 +159,12 @@ Changes in existing checks
 - Removed default setting ``cppcoreguidelines-explicit-virtual-functions.IgnoreDestructors = "true"``,
   to match the current state of the C++ Core Guidelines.
 
+- Removed suggestion ``use gsl::at`` from warning message in the
+  ``cppcoreguidelines-pro-bounds-constant-array-index`` check, since that is not
+  a requirement from the C++ Core Guidelines. This allows people to choose
+  their own safe indexing strategy. The fix-it is kept for those who want to
+  use the GSL library.
+
 - Updated :doc:`google-readability-casting
   <clang-tidy/checks/google-readability-casting>` to diagnose and fix functional
   casts, to achieve feature parity with the corresponding ``cpplint.py`` check.
diff --git a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-bounds-constant-array-index.rst b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-bounds-constant-array-index.rst
index 4528f2ac6bef6..2a598f2592019 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-bounds-constant-array-index.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/cppcoreguidelines-pro-bounds-constant-array-index.rst
@@ -11,6 +11,8 @@ arrays, see the `-Warray-bounds` Clang diagnostic.
 This rule is part of the "Bounds safety" profile of the C++ Core Guidelines, see
 https://github.com/isocpp/CppCoreGuidelines/blob/master/CppCoreGuidelines.md#Pro-bounds-arrayindex.
 
+Optionally, this check can generate fixes using ``gsl::at`` for indexing.
+
 Options
 -------
 
diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index-gslheader.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index-gslheader.cpp
index 71b957d84740b..87550cbe84335 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index-gslheader.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index-gslheader.cpp
@@ -27,10 +27,10 @@ constexpr int const_index(int base) {
 
 void f(std::array<int, 10> a, int pos) {
   a [ pos / 2 /*comment*/] = 1;
-  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not use array subscript when the index is not an integer constant expression; use gsl::at() instead [cppcoreguidelines-pro-bounds-constant-array-index]
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not use array subscript when the index is not an integer constant expression [cppcoreguidelines-pro-bounds-constant-array-index]
   // CHECK-FIXES: gsl::at(a,  pos / 2 /*comment*/) = 1;
   int j = a[pos - 1];
-  // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: do not use array subscript when the index is not an integer constant expression; use gsl::at() instead
+  // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: do not use array subscript when the index is not an integer constant expression
   // CHECK-FIXES: int j = gsl::at(a, pos - 1);
 
   a.at(pos-1) = 2; // OK, at() instead of []
@@ -54,7 +54,7 @@ void g() {
   int a[10];
   for (int i = 0; i < 10; ++i) {
     a[i] = i;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not use array subscript when the index is not an integer constant expression; use gsl::at() instead
+    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not use array subscript when the index is not an integer constant expression
     // CHECK-FIXES: gsl::at(a, i) = i;
     gsl::at(a, i) = i; // OK, gsl::at() instead of []
   }
diff --git a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index.cpp b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index.cpp
index 351941099343a..7d5390ddecfb9 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/cppcoreguidelines-pro-bounds-constant-array-index.cpp
@@ -25,9 +25,9 @@ constexpr int const_index(int base) {
 
 void f(std::array<int, 10> a, int pos) {
   a [ pos / 2 /*comment*/] = 1;
-  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not use array subscript when the index is not an integer constant expression; use gsl::at() instead [cppcoreguidelines-pro-bounds-constant-array-index]
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: do not use array subscript when the index is not an integer constant expression [cppcoreguidelines-pro-bounds-constant-array-index]
   int j = a[pos - 1];
-  // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: do not use array subscript when the index is not an integer constant expression; use gsl::at() instead
+  // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: do not use array subscript when the index is not an integer constant expression
 
   a.at(pos-1) = 2; // OK, at() instead of []
   gsl::at(a, pos-1) = 2; // OK, gsl::at() instead of []
@@ -50,7 +50,7 @@ void g() {
   int a[10];
   for (int i = 0; i < 10; ++i) {
     a[i] = i;
-    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not use array subscript when the index is not an integer constant expression; use gsl::at() instead
+    // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: do not use array subscript when the index is not an integer constant expression
     // CHECK-FIXES: gsl::at(a, i) = i;
     gsl::at(a, i) = i; // OK, gsl::at() instead of []
   }

From 153359180a9d5a6cea3985db5d3396218dc6252d Mon Sep 17 00:00:00 2001
From: Ayke van Laethem <aykevanlaethem@gmail.com>
Date: Thu, 20 Jan 2022 21:50:26 +0100
Subject: [PATCH 286/946] [AVR] Remove regalloc workaround for LDDWRdPtrQ

Background: https://github.com/avr-rust/rust-legacy-fork/issues/126

In short, this workaround was introduced to fix a "ran out of registers
during regalloc" issue. The root cause has since been fixed in
https://reviews.llvm.org/D54218 so this workaround can be removed.

There is one test that changes a little bit, removing a single
instruction. I also compiled compiler-rt before and after this patch but
didn't see a difference. So presumably the impact is very low. Still,
it's nice to be able to remove such a workaround.

Differential Revision: https://reviews.llvm.org/D117831
---
 llvm/lib/Target/AVR/AVRInstrInfo.td    |  2 +-
 llvm/lib/Target/AVR/AVRRegisterInfo.td | 20 --------------------
 llvm/test/CodeGen/AVR/lpmx.ll          | 22 ++++++++++------------
 3 files changed, 11 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
index a365bc868683d..5cbf3baef5466 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -1394,7 +1394,7 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
   // ldd Rd,   P+q
   // ldd Rd+1, P+q+1
   let Constraints = "@earlyclobber $dst" in def LDDWRdPtrQ
-      : Pseudo<(outs DREGS_WITHOUT_YZ_WORKAROUND
+      : Pseudo<(outs DREGS
                 : $dst),
                (ins memri
                 : $memri),
diff --git a/llvm/lib/Target/AVR/AVRRegisterInfo.td b/llvm/lib/Target/AVR/AVRRegisterInfo.td
index bb4e86ca0536f..c5fda788fe4d7 100644
--- a/llvm/lib/Target/AVR/AVRRegisterInfo.td
+++ b/llvm/lib/Target/AVR/AVRRegisterInfo.td
@@ -178,26 +178,6 @@ def DREGSMOVW : RegisterClass<"AVR", [i16], 8,
                                   R29R28, R17R16, R15R14, R13R12, R11R10, R9R8,
                                   R7R6, R5R4, R3R2, R1R0)>;
 
-// The 16-bit DREGS register class, excluding the Z pointer register.
-//
-// This is used by instructions which cause high pointer register
-// contention which leads to an assertion in the register allocator.
-//
-// There is no technical reason why instructions that use this class
-// cannot use Z; it's simply a workaround a regalloc bug.
-//
-// More information can be found in PR39553.
-def DREGS_WITHOUT_YZ_WORKAROUND
-    : RegisterClass<"AVR", [i16], 8,
-                    (
-                        // Return value and arguments.
-                        add R25R24, R19R18, R21R20, R23R22,
-                        // Scratch registers.
-                        R27R26,
-                        // Callee saved registers.
-                        R17R16, R15R14, R13R12, R11R10, R9R8, R7R6, R5R4, R3R2,
-                        R1R0)>;
-
 // 16-bit register class for immediate instructions.
 def DLDREGS : RegisterClass<"AVR", [i16], 8,
                             (
diff --git a/llvm/test/CodeGen/AVR/lpmx.ll b/llvm/test/CodeGen/AVR/lpmx.ll
index 4a78ce705e7e5..c5162e2efb503 100644
--- a/llvm/test/CodeGen/AVR/lpmx.ll
+++ b/llvm/test/CodeGen/AVR/lpmx.ll
@@ -22,13 +22,12 @@ define i16 @foo0(i16 %a) addrspace(1) {
 ; CHECK-O0-NEXT:    out 61, r28
 ; CHECK-O0-NEXT:    std Y+1, r24
 ; CHECK-O0-NEXT:    std Y+2, r25
-; CHECK-O0-NEXT:    ldd r24, Y+1
-; CHECK-O0-NEXT:    ldd r25, Y+2
-; CHECK-O0-NEXT:    lsl r24
-; CHECK-O0-NEXT:    rol r25
-; CHECK-O0-NEXT:    subi r24, -lo8(arr0)
-; CHECK-O0-NEXT:    sbci r25, -hi8(arr0)
-; CHECK-O0-NEXT:    movw r30, r24
+; CHECK-O0-NEXT:    ldd r30, Y+1
+; CHECK-O0-NEXT:    ldd r31, Y+2
+; CHECK-O0-NEXT:    lsl r30
+; CHECK-O0-NEXT:    rol r31
+; CHECK-O0-NEXT:    subi r30, -lo8(arr0)
+; CHECK-O0-NEXT:    sbci r31, -hi8(arr0)
 ; CHECK-O0-NEXT:    lpm r24, Z+
 ; CHECK-O0-NEXT:    lpm r25, Z
 ; CHECK-O0-NEXT:    adiw r28, 2
@@ -95,11 +94,10 @@ define i8 @foo1(i16 %a) addrspace(1) {
 ; CHECK-O0-NEXT:    out 61, r28
 ; CHECK-O0-NEXT:    std Y+1, r24
 ; CHECK-O0-NEXT:    std Y+2, r25
-; CHECK-O0-NEXT:    ldd r24, Y+1
-; CHECK-O0-NEXT:    ldd r25, Y+2
-; CHECK-O0-NEXT:    subi r24, -lo8(arr1)
-; CHECK-O0-NEXT:    sbci r25, -hi8(arr1)
-; CHECK-O0-NEXT:    movw r30, r24
+; CHECK-O0-NEXT:    ldd r30, Y+1
+; CHECK-O0-NEXT:    ldd r31, Y+2
+; CHECK-O0-NEXT:    subi r30, -lo8(arr1)
+; CHECK-O0-NEXT:    sbci r31, -hi8(arr1)
 ; CHECK-O0-NEXT:    lpm r24, Z
 ; CHECK-O0-NEXT:    adiw r28, 2
 ; CHECK-O0-NEXT:    in r0, 63

From 116ab78694dd2ad903c3fb101d48e01855282bf8 Mon Sep 17 00:00:00 2001
From: Ayke van Laethem <aykevanlaethem@gmail.com>
Date: Sun, 16 Jan 2022 08:42:48 +0100
Subject: [PATCH 287/946] [AVR] Make use of the constant value 0 in R1

The register R1 is defined to have the constant value 0 in the avr-gcc
calling convention (which we follow). Unfortunately, we don't really
make use of it. This patch replaces `LDI 0` instructions with a copy
from R1.

This reduces code size: my AVR build of compiler-rt goes from 50660 to
50240 bytes of code size, which is a 0.8% reduction. Presumably it will
also improve execution speed, although I didn't measure this.

Differential Revision: https://reviews.llvm.org/D117425
---
 llvm/lib/Target/AVR/AVRISelLowering.cpp     | 14 ++++++++++++++
 llvm/lib/Target/AVR/AVRISelLowering.h       |  2 ++
 llvm/lib/Target/AVR/AVRInstrInfo.td         |  4 ++++
 llvm/test/CodeGen/AVR/smul-with-overflow.ll |  2 +-
 llvm/test/CodeGen/AVR/store-undef.ll        |  3 +--
 llvm/test/CodeGen/AVR/umul-with-overflow.ll |  2 +-
 6 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp
index 894f4829fe3f1..a58fedf6cd364 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -1695,6 +1695,18 @@ MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI,
   return BB;
 }
 
+// Insert a read from R1, which almost always contains the value 0.
+MachineBasicBlock *
+AVRTargetLowering::insertCopyR1(MachineInstr &MI, MachineBasicBlock *BB) const {
+  const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
+  MachineBasicBlock::iterator I(MI);
+  BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::COPY))
+      .add(MI.getOperand(0))
+      .addReg(AVR::R1);
+  MI.eraseFromParent();
+  return BB;
+}
+
 MachineBasicBlock *
 AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                MachineBasicBlock *MBB) const {
@@ -1717,6 +1729,8 @@ AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case AVR::MULRdRr:
   case AVR::MULSRdRr:
     return insertMul(MI, MBB);
+  case AVR::CopyR1:
+    return insertCopyR1(MI, MBB);
   }
 
   assert((Opc == AVR::Select16 || Opc == AVR::Select8) &&
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.h b/llvm/lib/Target/AVR/AVRISelLowering.h
index 223a47372ef7b..116417b615669 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -187,6 +187,8 @@ class AVRTargetLowering : public TargetLowering {
 private:
   MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const;
   MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const;
+  MachineBasicBlock *insertCopyR1(MachineInstr &MI,
+                                  MachineBasicBlock *BB) const;
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td
index 5cbf3baef5466..2b96dc0b833ad 100644
--- a/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -2390,6 +2390,10 @@ def Asr16 : ShiftPseudo<(outs DREGS
                                                      : $src, i8
                                                      : $cnt))]>;
 
+// lowered to a copy from R1, which contains the value zero.
+let usesCustomInserter=1 in
+def CopyR1 : Pseudo<(outs GPR8:$rd), (ins), "clrz\t$rd", [(set i8:$rd, 0)]>;
+
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AVR/smul-with-overflow.ll b/llvm/test/CodeGen/AVR/smul-with-overflow.ll
index f2d29161b4d32..cffb84fa0f212 100644
--- a/llvm/test/CodeGen/AVR/smul-with-overflow.ll
+++ b/llvm/test/CodeGen/AVR/smul-with-overflow.ll
@@ -18,7 +18,7 @@ entry-block:
 ; CHECK: ldi    [[RET:r[0-9]+]], 1
 ; CHECK: cp     {{.*}}[[HIGH]], {{.*}}[[LOW]]
 ; CHECK: brne   [[LABEL:.LBB[_0-9]+]]
-; CHECK: ldi    {{.*}}[[RET]], 0
+; CHECK: mov    {{.*}}[[RET]], r1
 ; CHECK: {{.*}}[[LABEL]]
 ; CHECK: ret
 }
diff --git a/llvm/test/CodeGen/AVR/store-undef.ll b/llvm/test/CodeGen/AVR/store-undef.ll
index 1f395b331ca26..4ea1a572a02d5 100644
--- a/llvm/test/CodeGen/AVR/store-undef.ll
+++ b/llvm/test/CodeGen/AVR/store-undef.ll
@@ -6,8 +6,7 @@
 ; CHECK-LABEL: foo
 define void @foo() {
 
-  ; CHECK:      ldi [[SRC:r[0-9]+]], 0
-  ; CHECK-NEXT: st [[PTRREG:X|Y|Z]], [[SRC]]
+  ; CHECK: st [[PTRREG:X|Y|Z]], r1
   store i8 0, i8* undef, align 4
   ret void
 }
diff --git a/llvm/test/CodeGen/AVR/umul-with-overflow.ll b/llvm/test/CodeGen/AVR/umul-with-overflow.ll
index c3c4a30f87ccf..6df0738151543 100644
--- a/llvm/test/CodeGen/AVR/umul-with-overflow.ll
+++ b/llvm/test/CodeGen/AVR/umul-with-overflow.ll
@@ -14,7 +14,7 @@ entry-block:
 ; CHECK: ldi    [[RET:r[0-9]+]], 1
 ; CHECK: cpi    {{.*}}[[HIGH]], 0
 ; CHECK: brne   [[LABEL:.LBB[_0-9]+]]
-; CHECK: ldi    {{.*}}[[RET]], 0
+; CHECK: mov    {{.*}}[[RET]], r1
 ; CHECK: {{.*}}[[LABEL]]
 ; CHECK: ret
 }

From 7c66aaddb128dc0f342830c1efaeb7a278bfc48c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 16:20:34 +0000
Subject: [PATCH 288/946] [DAG] Fold (X & Y) != 0 --> zextOrTrunc(X & Y) iff
 everything but LSB is known zero (PR51312)

Fixes parity codegen issue where we know all but the lowest bit is zero, we can replace the ICMPNE with 0 comparison with a ext/trunc

Differential Revision: https://reviews.llvm.org/D117983
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 19 +++++++++++++++----
 llvm/test/CodeGen/X86/parity-vec.ll           |  4 ++--
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3b53a5b8b7532..269c332ae28a1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3254,17 +3254,29 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
                                          ISD::CondCode Cond, const SDLoc &DL,
                                          DAGCombinerInfo &DCI) const {
-  // Match these patterns in any of their permutations:
-  // (X & Y) == Y
-  // (X & Y) != Y
   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
     std::swap(N0, N1);
 
+  SelectionDAG &DAG = DCI.DAG;
   EVT OpVT = N0.getValueType();
   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
     return SDValue();
 
+  // (X & Y) != 0 --> zextOrTrunc(X & Y)
+  // iff everything but LSB is known zero:
+  if (Cond == ISD::SETNE && isNullConstant(N1) &&
+      (getBooleanContents(VT) == TargetLowering::UndefinedBooleanContent ||
+       getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent)) {
+    unsigned NumEltBits = OpVT.getScalarSizeInBits();
+    APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
+    if (DAG.MaskedValueIsZero(N0, UpperBits))
+      return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
+  }
+
+  // Match these patterns in any of their permutations:
+  // (X & Y) == Y
+  // (X & Y) != Y
   SDValue X, Y;
   if (N0.getOperand(0) == N1) {
     X = N0.getOperand(1);
@@ -3276,7 +3288,6 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
     return SDValue();
   }
 
-  SelectionDAG &DAG = DCI.DAG;
   SDValue Zero = DAG.getConstant(0, DL, OpVT);
   if (DAG.isKnownToBeAPowerOfTwo(Y)) {
     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
diff --git a/llvm/test/CodeGen/X86/parity-vec.ll b/llvm/test/CodeGen/X86/parity-vec.ll
index 60d9d4be68e79..ed64bb5eddf49 100644
--- a/llvm/test/CodeGen/X86/parity-vec.ll
+++ b/llvm/test/CodeGen/X86/parity-vec.ll
@@ -36,8 +36,8 @@ define i1 @canonical_parity(<16 x i1> %x) {
 ; POPCNT-NEXT:    psllw $7, %xmm0
 ; POPCNT-NEXT:    pmovmskb %xmm0, %eax
 ; POPCNT-NEXT:    popcntl %eax, %eax
-; POPCNT-NEXT:    testb $1, %al
-; POPCNT-NEXT:    setne %al
+; POPCNT-NEXT:    andl $1, %eax
+; POPCNT-NEXT:    # kill: def $al killed $al killed $eax
 ; POPCNT-NEXT:    retq
   %i1 = bitcast <16 x i1> %x to i16
   %i2 = call i16 @llvm.ctpop.i16(i16 %i1)

From 2e26633af0c88ea23e3e8783ef60e621f282d3fb Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 23 Jan 2022 11:11:26 -0500
Subject: [PATCH 289/946] [IR] document and update ctlz/cttz intrinsics to
 optionally return poison rather than undef

The behavior in Analysis (knownbits) implements poison semantics already,
and we expect the transforms (for example, in instcombine) derived from
those semantics, so this patch changes the LangRef and remaining code to
be consistent. This is one more step in removing "undef" from LLVM.

Without this, I think https://github.com/llvm/llvm-project/issues/53330
has a legitimate complaint because that report wants to allow subsequent
code to mask off bits, and that is allowed with undef values. The clang
builtins are not actually documented anywhere AFAICT, but we might want
to add that to remove more uncertainty.

Differential Revision: https://reviews.llvm.org/D117912
---
 llvm/docs/LangRef.rst                         | 42 ++++++--------
 llvm/lib/Analysis/ConstantFolding.cpp         |  4 +-
 llvm/lib/Analysis/ValueTracking.cpp           |  4 +-
 .../InstCombine/InstCombineCalls.cpp          |  6 +-
 .../InstCombine/InstCombineSelect.cpp         |  6 +-
 .../InstCombine/intrinsic-select.ll           |  2 +-
 .../test/Transforms/InstCombine/intrinsics.ll | 56 +++++++++----------
 .../InstSimplify/ConstProp/bitcount.ll        | 36 ++++++------
 8 files changed, 75 insertions(+), 81 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index f748fc5d4d213..aa010193d114b 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -14958,12 +14958,8 @@ targets support all bit widths or vector types, however.
 
 ::
 
-      declare i8   @llvm.ctlz.i8  (i8   <src>, i1 <is_zero_undef>)
-      declare i16  @llvm.ctlz.i16 (i16  <src>, i1 <is_zero_undef>)
-      declare i32  @llvm.ctlz.i32 (i32  <src>, i1 <is_zero_undef>)
-      declare i64  @llvm.ctlz.i64 (i64  <src>, i1 <is_zero_undef>)
-      declare i256 @llvm.ctlz.i256(i256 <src>, i1 <is_zero_undef>)
-      declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32> <src>, i1 <is_zero_undef>)
+      declare i8   @llvm.ctlz.i8  (i8   <src>, i1 <is_zero_poison>)
+      declare <2 x i37> @llvm.ctlz.v2i37(<2 x i37> <src>, i1 <is_zero_poison>)
 
 Overview:
 """""""""
@@ -14978,11 +14974,12 @@ The first argument is the value to be counted. This argument may be of
 any integer type, or a vector with integer element type. The return
 type must match the first argument type.
 
-The second argument must be a constant and is a flag to indicate whether
-the intrinsic should ensure that a zero as the first argument produces a
-defined result. Historically some architectures did not provide a
-defined result for zero values as efficiently, and many algorithms are
-now predicated on avoiding zero-value inputs.
+The second argument is a constant flag that indicates whether the intrinsic
+returns a valid result if the first argument is zero. If the first
+argument is zero and the second argument is true, the result is poison.
+Historically some architectures did not provide a defined result for zero
+values as efficiently, and many algorithms are now predicated on avoiding
+zero-value inputs.
 
 Semantics:
 """"""""""
@@ -14990,7 +14987,7 @@ Semantics:
 The '``llvm.ctlz``' intrinsic counts the leading (most significant)
 zeros in a variable, or within each element of the vector. If
 ``src == 0`` then the result is the size in bits of the type of ``src``
-if ``is_zero_undef == 0`` and ``undef`` otherwise. For example,
+if ``is_zero_poison == 0`` and ``poison`` otherwise. For example,
 ``llvm.ctlz(i32 2) = 30``.
 
 '``llvm.cttz.*``' Intrinsic
@@ -15005,12 +15002,8 @@ support all bit widths or vector types, however.
 
 ::
 
-      declare i8   @llvm.cttz.i8  (i8   <src>, i1 <is_zero_undef>)
-      declare i16  @llvm.cttz.i16 (i16  <src>, i1 <is_zero_undef>)
-      declare i32  @llvm.cttz.i32 (i32  <src>, i1 <is_zero_undef>)
-      declare i64  @llvm.cttz.i64 (i64  <src>, i1 <is_zero_undef>)
-      declare i256 @llvm.cttz.i256(i256 <src>, i1 <is_zero_undef>)
-      declare <2 x i32> @llvm.cttz.v2i32(<2 x i32> <src>, i1 <is_zero_undef>)
+      declare i42   @llvm.cttz.i42  (i42   <src>, i1 <is_zero_poison>)
+      declare <2 x i32> @llvm.cttz.v2i32(<2 x i32> <src>, i1 <is_zero_poison>)
 
 Overview:
 """""""""
@@ -15025,11 +15018,12 @@ The first argument is the value to be counted. This argument may be of
 any integer type, or a vector with integer element type. The return
 type must match the first argument type.
 
-The second argument must be a constant and is a flag to indicate whether
-the intrinsic should ensure that a zero as the first argument produces a
-defined result. Historically some architectures did not provide a
-defined result for zero values as efficiently, and many algorithms are
-now predicated on avoiding zero-value inputs.
+The second argument is a constant flag that indicates whether the intrinsic
+returns a valid result if the first argument is zero. If the first
+argument is zero and the second argument is true, the result is poison.
+Historically some architectures did not provide a defined result for zero
+values as efficiently, and many algorithms are now predicated on avoiding
+zero-value inputs.
 
 Semantics:
 """"""""""
@@ -15037,7 +15031,7 @@ Semantics:
 The '``llvm.cttz``' intrinsic counts the trailing (least significant)
 zeros in a variable, or within each element of a vector. If ``src == 0``
 then the result is the size in bits of the type of ``src`` if
-``is_zero_undef == 0`` and ``undef`` otherwise. For example,
+``is_zero_poison == 0`` and ``poison`` otherwise. For example,
 ``llvm.cttz(2) = 1``.
 
 .. _int_overflow:
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 38c9cc7b9df29..772316e7469d9 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -2572,9 +2572,9 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
     case Intrinsic::ctlz:
       assert(C1 && "Must be constant int");
 
-      // cttz(0, 1) and ctlz(0, 1) are undef.
+      // cttz(0, 1) and ctlz(0, 1) are poison.
       if (C1->isOne() && (!C0 || C0->isZero()))
-        return UndefValue::get(Ty);
+        return PoisonValue::get(Ty);
       if (!C0)
         return Constant::getNullValue(Ty);
       if (IntrinsicID == Intrinsic::cttz)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 942dbe043ab3f..34358739f9a85 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1593,7 +1593,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
         // If we have a known 1, its position is our upper bound.
         unsigned PossibleLZ = Known2.countMaxLeadingZeros();
-        // If this call is undefined for 0, the result will be less than 2^n.
+        // If this call is poison for 0 input, the result will be less than 2^n.
         if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
           PossibleLZ = std::min(PossibleLZ, BitWidth - 1);
         unsigned LowBits = Log2_32(PossibleLZ)+1;
@@ -1604,7 +1604,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
         computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q);
         // If we have a known 1, its position is our upper bound.
         unsigned PossibleTZ = Known2.countMaxTrailingZeros();
-        // If this call is undefined for 0, the result will be less than 2^n.
+        // If this call is poison for 0 input, the result will be less than 2^n.
         if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext()))
           PossibleTZ = std::min(PossibleTZ, BitWidth - 1);
         unsigned LowBits = Log2_32(PossibleTZ)+1;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index f63a186166ecc..3a3f169d2f516 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -494,7 +494,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
     // ctlz/cttz i1 Op0 --> not Op0
     if (match(Op1, m_Zero()))
       return BinaryOperator::CreateNot(Op0);
-    // If zero is undef, then the input can be assumed to be "true", so the
+    // If zero is poison, then the input can be assumed to be "true", so the
     // instruction simplifies to "false".
     assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
     return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
@@ -519,7 +519,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
     }
 
     // Zext doesn't change the number of trailing zeros, so narrow:
-    // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsUndef' parameter is 'true'.
+    // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
     if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
       auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
                                                     IC.Builder.getTrue());
@@ -556,7 +556,7 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
   }
 
   // If the input to cttz/ctlz is known to be non-zero,
-  // then change the 'ZeroIsUndef' parameter to 'true'
+  // then change the 'ZeroIsPoison' parameter to 'true'
   // because we know the zero behavior can't affect the result.
   if (!Known.One.isZero() ||
       isKnownNonZero(Op0, IC.getDataLayout(), 0, &IC.getAssumptionCache(), &II,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index cbdf04572042c..65e60498ff954 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -943,7 +943,7 @@ static Instruction *foldSelectCtlzToCttz(ICmpInst *ICI, Value *TrueVal,
 }
 
 /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
-/// call to cttz/ctlz with flag 'is_zero_undef' cleared.
+/// call to cttz/ctlz with flag 'is_zero_poison' cleared.
 ///
 /// For example, we can fold the following code sequence:
 /// \code
@@ -987,7 +987,7 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
   // sizeof in bits of 'Count'.
   unsigned SizeOfInBits = Count->getType()->getScalarSizeInBits();
   if (match(ValueOnZero, m_SpecificInt(SizeOfInBits))) {
-    // Explicitly clear the 'undef_on_zero' flag. It's always valid to go from
+    // Explicitly clear the 'is_zero_poison' flag. It's always valid to go from
     // true to false on this flag, so we can replace it for all users.
     II->setArgOperand(1, ConstantInt::getFalse(II->getContext()));
     return SelectArg;
@@ -995,7 +995,7 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
 
   // The ValueOnZero is not the bitwidth. But if the cttz/ctlz (and optional
   // zext/trunc) have one use (ending at the select), the cttz/ctlz result will
-  // not be used if the input is zero. Relax to 'undef_on_zero' for that case.
+  // not be used if the input is zero. Relax to 'zero is poison' for that case.
   if (II->hasOneUse() && SelectArg->hasOneUse() &&
       !match(II->getArgOperand(1), m_One()))
     II->setArgOperand(1, ConstantInt::getTrue(II->getContext()));
diff --git a/llvm/test/Transforms/InstCombine/intrinsic-select.ll b/llvm/test/Transforms/InstCombine/intrinsic-select.ll
index 92c5e3b38bd76..cbabbb8b4456f 100644
--- a/llvm/test/Transforms/InstCombine/intrinsic-select.ll
+++ b/llvm/test/Transforms/InstCombine/intrinsic-select.ll
@@ -36,7 +36,7 @@ define i32 @ctlz_sel_const_true(i1 %b, i32 %x) {
 define <3 x i17> @ctlz_sel_const_false(<3 x i1> %b, <3 x i17> %x) {
 ; CHECK-LABEL: @ctlz_sel_const_false(
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i17> @llvm.ctlz.v3i17(<3 x i17> [[X:%.*]], i1 true)
-; CHECK-NEXT:    [[C:%.*]] = select <3 x i1> [[B:%.*]], <3 x i17> [[TMP1]], <3 x i17> <i17 14, i17 0, i17 undef>
+; CHECK-NEXT:    [[C:%.*]] = select <3 x i1> [[B:%.*]], <3 x i17> [[TMP1]], <3 x i17> <i17 14, i17 0, i17 poison>
 ; CHECK-NEXT:    ret <3 x i17> [[C]]
 ;
   %s = select <3 x i1> %b, <3 x i17> %x, <3 x i17> <i17 7, i17 -1, i17 0>
diff --git a/llvm/test/Transforms/InstCombine/intrinsics.ll b/llvm/test/Transforms/InstCombine/intrinsics.ll
index cabd82c3044eb..fc1c1e5aed3a7 100644
--- a/llvm/test/Transforms/InstCombine/intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/intrinsics.ll
@@ -79,8 +79,8 @@ define i1 @cttz_i1(i1 %arg) {
   ret i1 %cnt
 }
 
-define i1 @cttz_i1_zero_is_undef(i1 %arg) {
-; CHECK-LABEL: @cttz_i1_zero_is_undef(
+define i1 @cttz_i1_zero_is_poison(i1 %arg) {
+; CHECK-LABEL: @cttz_i1_zero_is_poison(
 ; CHECK-NEXT:    ret i1 false
 ;
   %cnt = call i1 @llvm.cttz.i1(i1 %arg, i1 true) nounwind readnone
@@ -96,8 +96,8 @@ define <2 x i1> @cttz_v2i1(<2 x i1> %arg) {
   ret <2 x i1> %cnt
 }
 
-define <2 x i1> @cttz_v2i1_zero_is_undef(<2 x i1> %arg) {
-; CHECK-LABEL: @cttz_v2i1_zero_is_undef(
+define <2 x i1> @cttz_v2i1_zero_is_poison(<2 x i1> %arg) {
+; CHECK-LABEL: @cttz_v2i1_zero_is_poison(
 ; CHECK-NEXT:    ret <2 x i1> zeroinitializer
 ;
   %cnt = call <2 x i1> @llvm.cttz.v2i1(<2 x i1> %arg, i1 true) nounwind readnone
@@ -196,8 +196,8 @@ define i1 @ctlz_i1(i1 %arg) {
   ret i1 %cnt
 }
 
-define i1 @ctlz_i1_zero_is_undef(i1 %arg) {
-; CHECK-LABEL: @ctlz_i1_zero_is_undef(
+define i1 @ctlz_i1_zero_is_poison(i1 %arg) {
+; CHECK-LABEL: @ctlz_i1_zero_is_poison(
 ; CHECK-NEXT:    ret i1 false
 ;
   %cnt = call i1 @llvm.ctlz.i1(i1 %arg, i1 true) nounwind readnone
@@ -213,8 +213,8 @@ define <2 x i1> @ctlz_v2i1(<2 x i1> %arg) {
   ret <2 x i1> %cnt
 }
 
-define <2 x i1> @ctlz_v2i1_zero_is_undef(<2 x i1> %arg) {
-; CHECK-LABEL: @ctlz_v2i1_zero_is_undef(
+define <2 x i1> @ctlz_v2i1_zero_is_poison(<2 x i1> %arg) {
+; CHECK-LABEL: @ctlz_v2i1_zero_is_poison(
 ; CHECK-NEXT:    ret <2 x i1> zeroinitializer
 ;
   %cnt = call <2 x i1> @llvm.ctlz.v2i1(<2 x i1> %arg, i1 true) nounwind readnone
@@ -283,24 +283,24 @@ define <2 x i1> @ctlz_knownbits3_vec(<2 x i8> %arg) {
   ret <2 x i1> %res
 }
 
-define i32 @ctlz_undef(i32 %Value) {
-; CHECK-LABEL: @ctlz_undef(
-; CHECK-NEXT:    ret i32 undef
+define i32 @ctlz_poison(i32 %Value) {
+; CHECK-LABEL: @ctlz_poison(
+; CHECK-NEXT:    ret i32 poison
 ;
   %ctlz = call i32 @llvm.ctlz.i32(i32 0, i1 true)
   ret i32 %ctlz
 }
 
-define <2 x i32> @ctlz_undef_vec(<2 x i32> %Value) {
-; CHECK-LABEL: @ctlz_undef_vec(
-; CHECK-NEXT:    ret <2 x i32> undef
+define <2 x i32> @ctlz_poison_vec(<2 x i32> %Value) {
+; CHECK-LABEL: @ctlz_poison_vec(
+; CHECK-NEXT:    ret <2 x i32> poison
 ;
   %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> zeroinitializer, i1 true)
   ret <2 x i32> %ctlz
 }
 
-define i32 @ctlz_make_undef(i32 %a) {
-; CHECK-LABEL: @ctlz_make_undef(
+define i32 @ctlz_no_zero(i32 %a) {
+; CHECK-LABEL: @ctlz_no_zero(
 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[A:%.*]], 8
 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[OR]], i1 true), !range [[RNG2:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[CTLZ]]
@@ -310,8 +310,8 @@ define i32 @ctlz_make_undef(i32 %a) {
   ret i32 %ctlz
 }
 
-define <2 x i32> @ctlz_make_undef_vec(<2 x i32> %a) {
-; CHECK-LABEL: @ctlz_make_undef_vec(
+define <2 x i32> @ctlz_no_zero_vec(<2 x i32> %a) {
+; CHECK-LABEL: @ctlz_no_zero_vec(
 ; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[A:%.*]], <i32 8, i32 8>
 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[OR]], i1 true)
 ; CHECK-NEXT:    ret <2 x i32> [[CTLZ]]
@@ -321,24 +321,24 @@ define <2 x i32> @ctlz_make_undef_vec(<2 x i32> %a) {
   ret <2 x i32> %ctlz
 }
 
-define i32 @cttz_undef(i32 %Value) nounwind {
-; CHECK-LABEL: @cttz_undef(
-; CHECK-NEXT:    ret i32 undef
+define i32 @cttz_poison(i32 %Value) {
+; CHECK-LABEL: @cttz_poison(
+; CHECK-NEXT:    ret i32 poison
 ;
   %cttz = call i32 @llvm.cttz.i32(i32 0, i1 true)
   ret i32 %cttz
 }
 
-define <2 x i32> @cttz_undef_vec(<2 x i32> %Value) nounwind {
-; CHECK-LABEL: @cttz_undef_vec(
-; CHECK-NEXT:    ret <2 x i32> undef
+define <2 x i32> @cttz_poison_vec(<2 x i32> %Value) {
+; CHECK-LABEL: @cttz_poison_vec(
+; CHECK-NEXT:    ret <2 x i32> poison
 ;
   %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> zeroinitializer, i1 true)
   ret <2 x i32> %cttz
 }
 
-define i32 @cttz_make_undef(i32 %a) {
-; CHECK-LABEL: @cttz_make_undef(
+define i32 @cttz_no_zero(i32 %a) {
+; CHECK-LABEL: @cttz_no_zero(
 ; CHECK-NEXT:    [[OR:%.*]] = or i32 [[A:%.*]], 8
 ; CHECK-NEXT:    [[CTTZ:%.*]] = tail call i32 @llvm.cttz.i32(i32 [[OR]], i1 true), !range [[RNG3:![0-9]+]]
 ; CHECK-NEXT:    ret i32 [[CTTZ]]
@@ -348,8 +348,8 @@ define i32 @cttz_make_undef(i32 %a) {
   ret i32 %cttz
 }
 
-define <2 x i32> @cttz_make_undef_vec(<2 x i32> %a) {
-; CHECK-LABEL: @cttz_make_undef_vec(
+define <2 x i32> @cttz_no_zero_vec(<2 x i32> %a) {
+; CHECK-LABEL: @cttz_no_zero_vec(
 ; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[A:%.*]], <i32 8, i32 8>
 ; CHECK-NEXT:    [[CTTZ:%.*]] = tail call <2 x i32> @llvm.cttz.v2i32(<2 x i32> [[OR]], i1 true)
 ; CHECK-NEXT:    ret <2 x i32> [[CTTZ]]
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/bitcount.ll b/llvm/test/Transforms/InstSimplify/ConstProp/bitcount.ll
index f737653a6f3bc..5d020e9677e3b 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/bitcount.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/bitcount.ll
@@ -48,9 +48,9 @@ define i32 @cttz_zero_defined() {
   ret i32 %x
 }
 
-define i32 @cttz_zero_undefined() {
-; CHECK-LABEL: @cttz_zero_undefined(
-; CHECK-NEXT:    ret i32 undef
+define i32 @cttz_zero_is_poison() {
+; CHECK-LABEL: @cttz_zero_is_poison(
+; CHECK-NEXT:    ret i32 poison
 ;
   %x = call i32 @llvm.cttz.i32(i32 0, i1 true)
   ret i32 %x
@@ -64,9 +64,9 @@ define i33 @ctlz_zero_defined() {
   ret i33 %x
 }
 
-define i33 @ctlz_zero_undefined() {
-; CHECK-LABEL: @ctlz_zero_undefined(
-; CHECK-NEXT:    ret i33 undef
+define i33 @ctlz_zero_is_poison() {
+; CHECK-LABEL: @ctlz_zero_is_poison(
+; CHECK-NEXT:    ret i33 poison
 ;
   %x = call i33 @llvm.ctlz.i33(i33 0, i1 true)
   ret i33 %x
@@ -88,9 +88,9 @@ define i32 @cttz_undef_defined() {
   ret i32 %x
 }
 
-define i32 @cttz_undef_undefined() {
-; CHECK-LABEL: @cttz_undef_undefined(
-; CHECK-NEXT:    ret i32 undef
+define i32 @cttz_undef_zero_is_poison() {
+; CHECK-LABEL: @cttz_undef_zero_is_poison(
+; CHECK-NEXT:    ret i32 poison
 ;
   %x = call i32 @llvm.cttz.i32(i32 undef, i1 true)
   ret i32 %x
@@ -104,9 +104,9 @@ define i33 @ctlz_undef_defined() {
   ret i33 %x
 }
 
-define i33 @ctlz_undef_undefined() {
-; CHECK-LABEL: @ctlz_undef_undefined(
-; CHECK-NEXT:    ret i33 undef
+define i33 @ctlz_undef_zero_is_poison() {
+; CHECK-LABEL: @ctlz_undef_zero_is_poison(
+; CHECK-NEXT:    ret i33 poison
 ;
   %x = call i33 @llvm.ctlz.i33(i33 undef, i1 true)
   ret i33 %x
@@ -144,9 +144,9 @@ define <2 x i32> @cttz_vector_undef_defined() {
   ret <2 x i32> %x
 }
 
-define <2 x i32> @cttz_vector_undef_undefined() {
-; CHECK-LABEL: @cttz_vector_undef_undefined(
-; CHECK-NEXT:    ret <2 x i32> undef
+define <2 x i32> @cttz_vector_undef_zero_is_poison() {
+; CHECK-LABEL: @cttz_vector_undef_zero_is_poison(
+; CHECK-NEXT:    ret <2 x i32> poison
 ;
   %x = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> <i32 0, i32 undef>, i1 true)
   ret <2 x i32> %x
@@ -168,9 +168,9 @@ define <2 x i33> @ctlz_vector_undef_defined() {
   ret <2 x i33> %x
 }
 
-define <2 x i33> @ctlz_vector_undef_undefined() {
-; CHECK-LABEL: @ctlz_vector_undef_undefined(
-; CHECK-NEXT:    ret <2 x i33> undef
+define <2 x i33> @ctlz_vector_undef_zero_is_poison() {
+; CHECK-LABEL: @ctlz_vector_undef_zero_is_poison(
+; CHECK-NEXT:    ret <2 x i33> poison
 ;
   %x = call <2 x i33> @llvm.ctlz.v2i33(<2 x i33> <i33 0, i33 undef>, i1 true)
   ret <2 x i33> %x

From d2e8fb331835fcc565929720781a5fd64e66fc17 Mon Sep 17 00:00:00 2001
From: Richard <legalize@xmission.com>
Date: Sat, 1 Jan 2022 22:47:22 -0700
Subject: [PATCH 290/946] [clang-tidy] Add readability-duplicate-include check

Looks for duplicate includes and removes them.

Every time an include directive is processed, check a vector of filenames
to see if the included file has already been included.  If so, it issues
a warning and a replacement to remove the entire line containing the
duplicated include directive.

When a macro is defined or undefined, the vector of filenames is cleared.
This enables including the same file multiple times, but getting
different expansions based on the set of active macros at the time of
inclusion.  For example:

  #undef NDEBUG
  #include "assertion.h"
  // ...code with assertions enabled

  #define NDEBUG
  #include "assertion.h"
  // ...code with assertions disabled

Since macros are redefined between the inclusion of assertion.h,
they are not flagged as redundant.

Differential Revision: https://reviews.llvm.org/D7982
---
 .../clang-tidy/readability/CMakeLists.txt     |   1 +
 .../readability/DuplicateIncludeCheck.cpp     | 116 ++++++++++++++++++
 .../readability/DuplicateIncludeCheck.h       |  35 ++++++
 .../readability/ReadabilityTidyModule.cpp     |   3 +
 clang-tools-extra/docs/ReleaseNotes.rst       |   5 +
 .../docs/clang-tidy/checks/list.rst           |   1 +
 .../checks/readability-duplicate-include.rst  |  35 ++++++
 .../readability-duplicate-include.h           |  15 +++
 .../readability-duplicate-include2.h          |   1 +
 .../system/iostream                           |   1 +
 .../system/string.h                           |   1 +
 .../system/sys/types.h                        |   1 +
 .../system/types.h                            |   1 +
 .../readability-duplicate-include.cpp         |  72 +++++++++++
 14 files changed, 288 insertions(+)
 create mode 100644 clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp
 create mode 100644 clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h
 create mode 100644 clang-tools-extra/docs/clang-tidy/checks/readability-duplicate-include.rst
 create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include.h
 create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include2.h
 create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/iostream
 create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/string.h
 create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/sys/types.h
 create mode 100644 clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/types.h
 create mode 100644 clang-tools-extra/test/clang-tidy/checkers/readability-duplicate-include.cpp

diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
index eba0ab98cb37a..22ce8f62751ec 100644
--- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
@@ -11,6 +11,7 @@ add_clang_library(clangTidyReadabilityModule
   ContainerSizeEmptyCheck.cpp
   ConvertMemberFunctionsToStatic.cpp
   DeleteNullPointerCheck.cpp
+  DuplicateIncludeCheck.cpp
   ElseAfterReturnCheck.cpp
   FunctionCognitiveComplexityCheck.cpp
   FunctionSizeCheck.cpp
diff --git a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp
new file mode 100644
index 0000000000000..681b8399154a7
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp
@@ -0,0 +1,116 @@
+//===--- DuplicateIncludeCheck.cpp - clang-tidy ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "DuplicateIncludeCheck.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Lex/Preprocessor.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include <memory>
+
+namespace clang {
+namespace tidy {
+namespace readability {
+
+static SourceLocation advanceBeyondCurrentLine(const SourceManager &SM,
+                                               SourceLocation Start,
+                                               int Offset) {
+  const FileID Id = SM.getFileID(Start);
+  const unsigned LineNumber = SM.getSpellingLineNumber(Start);
+  while (SM.getFileID(Start) == Id &&
+         SM.getSpellingLineNumber(Start.getLocWithOffset(Offset)) == LineNumber)
+    Start = Start.getLocWithOffset(Offset);
+  return Start;
+}
+
+namespace {
+
+using FileList = SmallVector<StringRef>;
+
+class DuplicateIncludeCallbacks : public PPCallbacks {
+public:
+  DuplicateIncludeCallbacks(DuplicateIncludeCheck &Check,
+                            const SourceManager &SM)
+      : Check(Check), SM(SM) {
+    // The main file doesn't participate in the FileChanged notification.
+    Files.emplace_back();
+  }
+
+  void FileChanged(SourceLocation Loc, FileChangeReason Reason,
+                   SrcMgr::CharacteristicKind FileType,
+                   FileID PrevFID) override;
+
+  void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
+                          StringRef FileName, bool IsAngled,
+                          CharSourceRange FilenameRange, const FileEntry *File,
+                          StringRef SearchPath, StringRef RelativePath,
+                          const Module *Imported,
+                          SrcMgr::CharacteristicKind FileType) override;
+
+  void MacroDefined(const Token &MacroNameTok,
+                    const MacroDirective *MD) override;
+
+  void MacroUndefined(const Token &MacroNameTok, const MacroDefinition &MD,
+                      const MacroDirective *Undef) override;
+
+private:
+  // A list of included files is kept for each file we enter.
+  SmallVector<FileList> Files;
+  DuplicateIncludeCheck &Check;
+  const SourceManager &SM;
+};
+
+void DuplicateIncludeCallbacks::FileChanged(SourceLocation Loc,
+                                            FileChangeReason Reason,
+                                            SrcMgr::CharacteristicKind FileType,
+                                            FileID PrevFID) {
+  if (Reason == EnterFile)
+    Files.emplace_back();
+  else
+    Files.pop_back();
+}
+
+void DuplicateIncludeCallbacks::InclusionDirective(
+    SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName,
+    bool IsAngled, CharSourceRange FilenameRange, const FileEntry *File,
+    StringRef SearchPath, StringRef RelativePath, const Module *Imported,
+    SrcMgr::CharacteristicKind FileType) {
+  if (llvm::find(Files.back(), FileName) != Files.back().end()) {
+    // We want to delete the entire line, so make sure that [Start,End] covers
+    // everything.
+    SourceLocation Start =
+        advanceBeyondCurrentLine(SM, HashLoc, -1).getLocWithOffset(-1);
+    SourceLocation End =
+        advanceBeyondCurrentLine(SM, FilenameRange.getEnd(), 1);
+    Check.diag(HashLoc, "duplicate include")
+        << FixItHint::CreateRemoval(SourceRange{Start, End});
+  } else
+    Files.back().push_back(FileName);
+}
+
+void DuplicateIncludeCallbacks::MacroDefined(const Token &MacroNameTok,
+                                             const MacroDirective *MD) {
+  Files.back().clear();
+}
+
+void DuplicateIncludeCallbacks::MacroUndefined(const Token &MacroNameTok,
+                                               const MacroDefinition &MD,
+                                               const MacroDirective *Undef) {
+  Files.back().clear();
+}
+
+} // namespace
+
+void DuplicateIncludeCheck::registerPPCallbacks(
+    const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
+  PP->addPPCallbacks(std::make_unique<DuplicateIncludeCallbacks>(*this, SM));
+}
+
+} // namespace readability
+} // namespace tidy
+} // namespace clang
diff --git a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h
new file mode 100644
index 0000000000000..b213e3a4b73cc
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h
@@ -0,0 +1,35 @@
+//===--- DuplicateIncludeCheck.h - clang-tidy -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DUPLICATE_INCLUDE_CHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DUPLICATE_INCLUDE_CHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang {
+namespace tidy {
+namespace readability {
+
+/// \brief Find and remove duplicate #include directives.
+///
+/// Only consecutive include directives without any other preprocessor
+/// directives between them are analyzed.
+class DuplicateIncludeCheck : public ClangTidyCheck {
+public:
+  DuplicateIncludeCheck(StringRef Name, ClangTidyContext *Context)
+      : ClangTidyCheck(Name, Context) {}
+
+  void registerPPCallbacks(const SourceManager &SM, Preprocessor *PP,
+                           Preprocessor *ModuleExpanderPP) override;
+};
+
+} // namespace readability
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_DUPLICATE_INCLUDE_CHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
index 2d6540283ded5..b0493d43ff318 100644
--- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
@@ -16,6 +16,7 @@
 #include "ContainerSizeEmptyCheck.h"
 #include "ConvertMemberFunctionsToStatic.h"
 #include "DeleteNullPointerCheck.h"
+#include "DuplicateIncludeCheck.h"
 #include "ElseAfterReturnCheck.h"
 #include "FunctionCognitiveComplexityCheck.h"
 #include "FunctionSizeCheck.h"
@@ -71,6 +72,8 @@ class ReadabilityModule : public ClangTidyModule {
         "readability-convert-member-functions-to-static");
     CheckFactories.registerCheck<DeleteNullPointerCheck>(
         "readability-delete-null-pointer");
+    CheckFactories.registerCheck<DuplicateIncludeCheck>(
+        "readability-duplicate-include");
     CheckFactories.registerCheck<ElseAfterReturnCheck>(
         "readability-else-after-return");
     CheckFactories.registerCheck<FunctionCognitiveComplexityCheck>(
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 1f7228d5732bd..060af42521552 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -124,6 +124,11 @@ New checks
   Finds cases where code could use ``data()`` rather than the address of the
   element at index 0 in a container.
 
+- New :doc:`readability-duplicate-include
+  <clang-tidy/checks/readability-duplicate-include>` check.
+
+  Looks for duplicate includes and removes them.  
+
 - New :doc:`readability-identifier-length
   <clang-tidy/checks/readability-identifier-length>` check.
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 9785c9f43a4b4..5878345bdfcfd 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -294,6 +294,7 @@ Clang-Tidy Checks
    `readability-container-size-empty <readability-container-size-empty.html>`_, "Yes"
    `readability-convert-member-functions-to-static <readability-convert-member-functions-to-static.html>`_, "Yes"
    `readability-delete-null-pointer <readability-delete-null-pointer.html>`_, "Yes"
+   `readability-duplicate-include <readability-duplicate-include.html>`_, "Yes"
    `readability-else-after-return <readability-else-after-return.html>`_, "Yes"
    `readability-function-cognitive-complexity <readability-function-cognitive-complexity.html>`_,
    `readability-function-size <readability-function-size.html>`_,
diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-duplicate-include.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-duplicate-include.rst
new file mode 100644
index 0000000000000..45df7e1b84f3f
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/readability-duplicate-include.rst
@@ -0,0 +1,35 @@
+.. title:: clang-tidy - readability-duplicate-include
+
+readability-duplicate-include
+=============================
+
+Looks for duplicate includes and removes them.  The check maintains a list of
+included files and looks for duplicates.  If a macro is defined or undefined
+then the list of included files is cleared.
+
+Examples:
+
+.. code-block:: c++
+
+  #include <memory>
+  #include <vector>
+  #include <memory>
+
+becomes
+
+.. code-block:: c++
+
+  #include <memory>
+  #include <vector>
+
+Because of the intervening macro definitions, this code remains unchanged:
+
+.. code-block:: c++
+
+  #undef NDEBUG
+  #include "assertion.h"
+  // ...code with assertions enabled
+
+  #define NDEBUG
+  #include "assertion.h"
+  // ...code with assertions disabled
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include.h
new file mode 100644
index 0000000000000..7d84adb816622
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include.h
@@ -0,0 +1,15 @@
+#ifndef READABILITY_DUPLICATE_INCLUDE_H
+#define READABILITY_DUPLICATE_INCLUDE_H
+
+extern int g;
+#include "readability-duplicate-include2.h"
+extern int h;
+#include "readability-duplicate-include2.h"
+extern int i;
+// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: duplicate include
+// CHECK-FIXES:      {{^extern int g;$}}
+// CHECK-FIXES-NEXT: {{^#include "readability-duplicate-include2.h"$}}
+// CHECK-FIXES-NEXT: {{^extern int h;$}}
+// CHECK-FIXES-NEXT: {{^extern int i;$}}
+
+#endif
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include2.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include2.h
new file mode 100644
index 0000000000000..58dfa757ee7ae
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/readability-duplicate-include2.h
@@ -0,0 +1 @@
+// This file is intentionally empty.
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/iostream b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/iostream
new file mode 100644
index 0000000000000..fcbabe12fc378
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/iostream
@@ -0,0 +1 @@
+// This file is intentionally empty.
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/string.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/string.h
new file mode 100644
index 0000000000000..fcbabe12fc378
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/string.h
@@ -0,0 +1 @@
+// This file is intentionally empty.
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/sys/types.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/sys/types.h
new file mode 100644
index 0000000000000..58dfa757ee7ae
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/sys/types.h
@@ -0,0 +1 @@
+// This file is intentionally empty.
diff --git a/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/types.h b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/types.h
new file mode 100644
index 0000000000000..fcbabe12fc378
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/Inputs/readability-duplicate-include/system/types.h
@@ -0,0 +1 @@
+// This file is intentionally empty.
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-duplicate-include.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-duplicate-include.cpp
new file mode 100644
index 0000000000000..f9a3c70ef86f4
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability-duplicate-include.cpp
@@ -0,0 +1,72 @@
+// RUN: %check_clang_tidy %s readability-duplicate-include %t -- -- -isystem %S/Inputs/readability-duplicate-include/system -I %S/Inputs/readability-duplicate-include
+
+int a;
+#include <string.h>
+int b;
+#include <string.h>
+int c;
+// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: duplicate include [readability-duplicate-include]
+// CHECK-FIXES:      {{^int a;$}}
+// CHECK-FIXES-NEXT: {{^#include <string.h>$}}
+// CHECK-FIXES-NEXT: {{^int b;$}}
+// CHECK-FIXES-NEXT: {{^int c;$}}
+
+int d;
+#include <iostream>
+int e;
+#include <iostream> // extra stuff that will also be removed
+int f;
+// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: duplicate include
+// CHECK-FIXES:      {{^int d;$}}
+// CHECK-FIXES-NEXT: {{^#include <iostream>$}}
+// CHECK-FIXES-NEXT: {{^int e;$}}
+// CHECK-FIXES-NEXT: {{^int f;$}}
+
+int g;
+#include "readability-duplicate-include.h"
+int h;
+#include "readability-duplicate-include.h"
+int i;
+// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: duplicate include
+// CHECK-FIXES:      {{^int g;$}}
+// CHECK-FIXES-NEXT: {{^#include "readability-duplicate-include.h"$}}
+// CHECK-FIXES-NEXT: {{^int h;$}}
+// CHECK-FIXES-NEXT: {{^int i;$}}
+
+#include <types.h>
+
+int j;
+#include <sys/types.h>
+int k;
+#include <sys/types.h>
+int l;
+// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: duplicate include
+// CHECK-FIXES:      {{^int j;$}}
+// CHECK-FIXES-NEXT: {{^#include <sys/types.h>$}}
+// CHECK-FIXES-NEXT: {{^int k;$}}
+// CHECK-FIXES-NEXT: {{^int l;$}}
+
+int m;
+        #          include             <string.h>  // lots of space
+int n;
+// CHECK-MESSAGES: :[[@LINE-2]]:9: warning: duplicate include
+// CHECK-FIXES:      {{^int m;$}}
+// CHECK-FIXES-NEXT: {{^int n;$}}
+
+// defining a macro in the main file resets the included file cache
+#define ARBITRARY_MACRO
+int o;
+#include <sys/types.h>
+int p;
+// CHECK-FIXES:      {{^int o;$}}
+// CHECK-FIXES-NEXT: {{^#include <sys/types.h>$}}
+// CHECK-FIXES-NEXT: {{^int p;$}}
+
+// undefining a macro resets the cache
+#undef ARBITRARY_MACRO
+int q;
+#include <sys/types.h>
+int r;
+// CHECK-FIXES:      {{^int q;$}}
+// CHECK-FIXES-NEXT: {{^#include <sys/types.h>$}}
+// CHECK-FIXES-NEXT: {{^int r;$}}

From 6605057992b15183663f0d918b7707f371862fd7 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 16:28:38 +0000
Subject: [PATCH 291/946] Revert rG7c66aaddb128dc0f342830c1efaeb7a278bfc48c
 "[DAG] Fold (X & Y) != 0 --> zextOrTrunc(X & Y) iff everything but LSB is
 known zero (PR51312)"

Noticed a typo in the getBooleanContents call just after I pressed commit :(
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 19 ++++---------------
 llvm/test/CodeGen/X86/parity-vec.ll           |  4 ++--
 2 files changed, 6 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 269c332ae28a1..3b53a5b8b7532 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3254,29 +3254,17 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
                                          ISD::CondCode Cond, const SDLoc &DL,
                                          DAGCombinerInfo &DCI) const {
+  // Match these patterns in any of their permutations:
+  // (X & Y) == Y
+  // (X & Y) != Y
   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
     std::swap(N0, N1);
 
-  SelectionDAG &DAG = DCI.DAG;
   EVT OpVT = N0.getValueType();
   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
     return SDValue();
 
-  // (X & Y) != 0 --> zextOrTrunc(X & Y)
-  // iff everything but LSB is known zero:
-  if (Cond == ISD::SETNE && isNullConstant(N1) &&
-      (getBooleanContents(VT) == TargetLowering::UndefinedBooleanContent ||
-       getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent)) {
-    unsigned NumEltBits = OpVT.getScalarSizeInBits();
-    APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
-    if (DAG.MaskedValueIsZero(N0, UpperBits))
-      return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
-  }
-
-  // Match these patterns in any of their permutations:
-  // (X & Y) == Y
-  // (X & Y) != Y
   SDValue X, Y;
   if (N0.getOperand(0) == N1) {
     X = N0.getOperand(1);
@@ -3288,6 +3276,7 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
     return SDValue();
   }
 
+  SelectionDAG &DAG = DCI.DAG;
   SDValue Zero = DAG.getConstant(0, DL, OpVT);
   if (DAG.isKnownToBeAPowerOfTwo(Y)) {
     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
diff --git a/llvm/test/CodeGen/X86/parity-vec.ll b/llvm/test/CodeGen/X86/parity-vec.ll
index ed64bb5eddf49..60d9d4be68e79 100644
--- a/llvm/test/CodeGen/X86/parity-vec.ll
+++ b/llvm/test/CodeGen/X86/parity-vec.ll
@@ -36,8 +36,8 @@ define i1 @canonical_parity(<16 x i1> %x) {
 ; POPCNT-NEXT:    psllw $7, %xmm0
 ; POPCNT-NEXT:    pmovmskb %xmm0, %eax
 ; POPCNT-NEXT:    popcntl %eax, %eax
-; POPCNT-NEXT:    andl $1, %eax
-; POPCNT-NEXT:    # kill: def $al killed $al killed $eax
+; POPCNT-NEXT:    testb $1, %al
+; POPCNT-NEXT:    setne %al
 ; POPCNT-NEXT:    retq
   %i1 = bitcast <16 x i1> %x to i16
   %i2 = call i16 @llvm.ctpop.i16(i16 %i1)

From 631f3e621586d1d9a1e57ba0698e7eb35496d9fe Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Sun, 23 Jan 2022 16:30:34 +0000
Subject: [PATCH 292/946] [gn build] Port d2e8fb331835

---
 .../secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
index 660fcb70eba0a..1e24a52be4725 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
@@ -19,6 +19,7 @@ static_library("readability") {
     "ContainerSizeEmptyCheck.cpp",
     "ConvertMemberFunctionsToStatic.cpp",
     "DeleteNullPointerCheck.cpp",
+    "DuplicateIncludeCheck.cpp",
     "ElseAfterReturnCheck.cpp",
     "FunctionCognitiveComplexityCheck.cpp",
     "FunctionSizeCheck.cpp",

From accc07e65465094dc5e12e78bee45b4d459c4ccd Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 16:36:18 +0000
Subject: [PATCH 293/946] [DAG] Fold (X & Y) != 0 --> zextOrTrunc(X & Y) iff
 everything but LSB is known zero (PR51312)

Fixes parity codegen issue where we know all but the lowest bit is zero, we can replace the ICMPNE with 0 comparison with a ext/trunc

Differential Revision: https://reviews.llvm.org/D117983
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 19 +++++++++++++++----
 llvm/test/CodeGen/X86/parity-vec.ll           |  4 ++--
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3b53a5b8b7532..a98c21f16c712 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3254,17 +3254,29 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
 SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
                                          ISD::CondCode Cond, const SDLoc &DL,
                                          DAGCombinerInfo &DCI) const {
-  // Match these patterns in any of their permutations:
-  // (X & Y) == Y
-  // (X & Y) != Y
   if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
     std::swap(N0, N1);
 
+  SelectionDAG &DAG = DCI.DAG;
   EVT OpVT = N0.getValueType();
   if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
       (Cond != ISD::SETEQ && Cond != ISD::SETNE))
     return SDValue();
 
+  // (X & Y) != 0 --> zextOrTrunc(X & Y)
+  // iff everything but LSB is known zero:
+  if (Cond == ISD::SETNE && isNullConstant(N1) &&
+      (getBooleanContents(OpVT) == TargetLowering::UndefinedBooleanContent ||
+       getBooleanContents(OpVT) == TargetLowering::ZeroOrOneBooleanContent)) {
+    unsigned NumEltBits = OpVT.getScalarSizeInBits();
+    APInt UpperBits = APInt::getHighBitsSet(NumEltBits, NumEltBits - 1);
+    if (DAG.MaskedValueIsZero(N0, UpperBits))
+      return DAG.getBoolExtOrTrunc(N0, DL, VT, OpVT);
+  }
+
+  // Match these patterns in any of their permutations:
+  // (X & Y) == Y
+  // (X & Y) != Y
   SDValue X, Y;
   if (N0.getOperand(0) == N1) {
     X = N0.getOperand(1);
@@ -3276,7 +3288,6 @@ SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
     return SDValue();
   }
 
-  SelectionDAG &DAG = DCI.DAG;
   SDValue Zero = DAG.getConstant(0, DL, OpVT);
   if (DAG.isKnownToBeAPowerOfTwo(Y)) {
     // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
diff --git a/llvm/test/CodeGen/X86/parity-vec.ll b/llvm/test/CodeGen/X86/parity-vec.ll
index 60d9d4be68e79..ed64bb5eddf49 100644
--- a/llvm/test/CodeGen/X86/parity-vec.ll
+++ b/llvm/test/CodeGen/X86/parity-vec.ll
@@ -36,8 +36,8 @@ define i1 @canonical_parity(<16 x i1> %x) {
 ; POPCNT-NEXT:    psllw $7, %xmm0
 ; POPCNT-NEXT:    pmovmskb %xmm0, %eax
 ; POPCNT-NEXT:    popcntl %eax, %eax
-; POPCNT-NEXT:    testb $1, %al
-; POPCNT-NEXT:    setne %al
+; POPCNT-NEXT:    andl $1, %eax
+; POPCNT-NEXT:    # kill: def $al killed $al killed $eax
 ; POPCNT-NEXT:    retq
   %i1 = bitcast <16 x i1> %x to i16
   %i2 = call i16 @llvm.ctpop.i16(i16 %i1)

From 0b799791807e6b23a568526484f6cdaf0984cf02 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sat, 22 Jan 2022 23:05:19 -0800
Subject: [PATCH 294/946] [RISCV] Merge some rvv intrinsic test cases that only
 differ by XLen type.

Instead of having a test for i32 XLen and i64 XLen, use sed to
replace iXLen with i32/i64 before running llc.

This change covers all of the floating point tests.
---
 llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll     | 1356 -----------------
 .../RISCV/rvv/{vfadd-rv32.ll => vfadd.ll}     |  426 +++---
 llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll   |  692 ---------
 .../RISCV/rvv/{vfclass-rv32.ll => vfclass.ll} |  186 +--
 llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll |  617 --------
 .../rvv/{vfcvt-f-x-rv32.ll => vfcvt-f-x.ll}   |  216 +--
 .../test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll |  617 --------
 .../rvv/{vfcvt-f-xu-rv32.ll => vfcvt-f-xu.ll} |  216 +--
 .../CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll   |  617 --------
 ...vfcvt-rtz-x-f-rv64.ll => vfcvt-rtz-x-f.ll} |  216 +--
 .../CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll  |  617 --------
 ...cvt-rtz-xu-f-rv32.ll => vfcvt-rtz-xu-f.ll} |  216 +--
 llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll |  617 --------
 .../rvv/{vfcvt-x-f-rv32.ll => vfcvt-x-f.ll}   |  216 +--
 .../test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll |  617 --------
 .../rvv/{vfcvt-xu-f-rv32.ll => vfcvt-xu-f.ll} |  216 +--
 llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll     | 1355 ----------------
 .../RISCV/rvv/{vfdiv-rv32.ll => vfdiv.ll}     |  426 +++---
 llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll    | 1106 --------------
 .../RISCV/rvv/{vfmacc-rv64.ll => vfmacc.ll}   |  294 ++--
 llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll    | 1106 --------------
 .../RISCV/rvv/{vfmadd-rv32.ll => vfmadd.ll}   |  294 ++--
 llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll     | 1355 ----------------
 .../RISCV/rvv/{vfmax-rv64.ll => vfmax.ll}     |  426 +++---
 llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll   |  902 -----------
 .../RISCV/rvv/{vfmerge-rv64.ll => vfmerge.ll} |  246 +--
 llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll     | 1355 ----------------
 .../RISCV/rvv/{vfmin-rv64.ll => vfmin.ll}     |  426 +++---
 llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll    | 1106 --------------
 .../RISCV/rvv/{vfmsac-rv32.ll => vfmsac.ll}   |  294 ++--
 llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll    | 1106 --------------
 .../RISCV/rvv/{vfmsub-rv32.ll => vfmsub.ll}   |  294 ++--
 llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll     | 1355 ----------------
 .../RISCV/rvv/{vfmul-rv32.ll => vfmul.ll}     |  426 +++---
 llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll  |  197 ---
 .../rvv/{vfmv.s.f-rv32.ll => vfmv.s.f.ll}     |   95 +-
 llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll  |  482 ------
 .../rvv/{vfmv.v.f-rv32.ll => vfmv.v.f.ll}     |  156 +-
 .../test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll |  380 -----
 .../rvv/{vfncvt-f-f-rv32.ll => vfncvt-f-f.ll} |  132 +-
 .../test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll |  380 -----
 .../rvv/{vfncvt-f-x-rv32.ll => vfncvt-f-x.ll} |  132 +-
 .../CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll     |  380 -----
 .../{vfncvt-f-xu-rv32.ll => vfncvt-f-xu.ll}   |  132 +-
 .../CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll  |  380 -----
 ...ncvt-rod-f-f-rv32.ll => vfncvt-rod-f-f.ll} |  132 +-
 .../CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll  |  632 --------
 ...ncvt-rtz-x-f-rv32.ll => vfncvt-rtz-x-f.ll} |  216 +--
 .../CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll |  632 --------
 ...vt-rtz-xu-f-rv32.ll => vfncvt-rtz-xu-f.ll} |  216 +--
 .../test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll |  632 --------
 .../rvv/{vfncvt-x-f-rv32.ll => vfncvt-x-f.ll} |  216 +--
 .../CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll     |  632 --------
 .../{vfncvt-xu-f-rv32.ll => vfncvt-xu-f.ll}   |  216 +--
 llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll   | 1106 --------------
 .../RISCV/rvv/{vfnmacc-rv32.ll => vfnmacc.ll} |  294 ++--
 llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll   | 1106 --------------
 .../RISCV/rvv/{vfnmadd-rv32.ll => vfnmadd.ll} |  294 ++--
 llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll   | 1106 --------------
 .../RISCV/rvv/{vfnmsac-rv32.ll => vfnmsac.ll} |  294 ++--
 llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll   | 1106 --------------
 .../RISCV/rvv/{vfnmsub-rv64.ll => vfnmsub.ll} |  294 ++--
 llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll    |  677 --------
 .../RISCV/rvv/{vfrdiv-rv32.ll => vfrdiv.ll}   |  216 +--
 llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll    |  617 --------
 .../RISCV/rvv/{vfrec7-rv32.ll => vfrec7.ll}   |  216 +--
 llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll  |  692 ---------
 .../rvv/{vfredmax-rv32.ll => vfredmax.ll}     |  186 +--
 llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll  |  692 ---------
 .../rvv/{vfredmin-rv32.ll => vfredmin.ll}     |  186 +--
 llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll |  692 ---------
 .../rvv/{vfredosum-rv32.ll => vfredosum.ll}   |  186 +--
 llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll |  692 ---------
 .../rvv/{vfredusum-rv32.ll => vfredusum.ll}   |  186 +--
 llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll  |  617 --------
 .../rvv/{vfrsqrt7-rv32.ll => vfrsqrt7.ll}     |  216 +--
 llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll    |  678 ---------
 .../RISCV/rvv/{vfrsub-rv32.ll => vfrsub.ll}   |  216 +--
 llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll    | 1355 ----------------
 .../RISCV/rvv/{vfsgnj-rv32.ll => vfsgnj.ll}   |  426 +++---
 llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll   | 1355 ----------------
 .../RISCV/rvv/{vfsgnjn-rv32.ll => vfsgnjn.ll} |  426 +++---
 llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll   | 1355 ----------------
 .../RISCV/rvv/{vfsgnjx-rv32.ll => vfsgnjx.ll} |  426 +++---
 .../CodeGen/RISCV/rvv/vfslide1down-rv64.ll    |  677 --------
 .../{vfslide1down-rv32.ll => vfslide1down.ll} |  216 +--
 .../test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll |  692 ---------
 .../rvv/{vfslide1up-rv32.ll => vfslide1up.ll} |  216 +--
 llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll    |  548 -------
 .../RISCV/rvv/{vfsqrt-rv64.ll => vfsqrt.ll}   |  306 ++--
 llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll     | 1356 -----------------
 .../RISCV/rvv/{vfsub-rv32.ll => vfsub.ll}     |  426 +++---
 llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll    |  830 ----------
 .../RISCV/rvv/{vfwadd-rv32.ll => vfwadd.ll}   |  258 ++--
 llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll  | 1248 ---------------
 .../rvv/{vfwadd.w-rv32.ll => vfwadd.w.ll}     |  362 ++---
 .../test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll |  380 -----
 .../rvv/{vfwcvt-f-f-rv32.ll => vfwcvt-f-f.ll} |  132 +-
 .../test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll |  632 --------
 .../rvv/{vfwcvt-f-x-rv32.ll => vfwcvt-f-x.ll} |  216 +--
 .../CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll     |  632 --------
 .../{vfwcvt-f-xu-rv32.ll => vfwcvt-f-xu.ll}   |  216 +--
 .../CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll  |  380 -----
 ...wcvt-rtz-x-f-rv32.ll => vfwcvt-rtz-x-f.ll} |  132 +-
 .../CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll |  380 -----
 ...vt-rtz-xu-f-rv64.ll => vfwcvt-rtz-xu-f.ll} |  132 +-
 .../test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll |  380 -----
 .../rvv/{vfwcvt-x-f-rv32.ll => vfwcvt-x-f.ll} |  132 +-
 .../CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll     |  380 -----
 .../{vfwcvt-xu-f-rv32.ll => vfwcvt-xu-f.ll}   |  132 +-
 llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll   |  830 ----------
 .../RISCV/rvv/{vfwmacc-rv32.ll => vfwmacc.ll} |  222 +--
 llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll   |  830 ----------
 .../RISCV/rvv/{vfwmsac-rv32.ll => vfwmsac.ll} |  222 +--
 llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll    |  830 ----------
 .../RISCV/rvv/{vfwmul-rv64.ll => vfwmul.ll}   |  258 ++--
 llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll  |  830 ----------
 .../rvv/{vfwnmacc-rv32.ll => vfwnmacc.ll}     |  222 +--
 llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll  |  830 ----------
 .../rvv/{vfwnmsac-rv32.ll => vfwnmsac.ll}     |  222 +--
 .../test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll |  508 ------
 .../rvv/{vfwredosum-rv32.ll => vfwredosum.ll} |  138 +-
 .../test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll |  508 ------
 .../rvv/{vfwredusum-rv32.ll => vfwredusum.ll} |  138 +-
 llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll    |  830 ----------
 .../RISCV/rvv/{vfwsub-rv32.ll => vfwsub.ll}   |  258 ++--
 llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll  | 1248 ---------------
 .../rvv/{vfwsub.w-rv64.ll => vfwsub.w.ll}     |  362 ++---
 128 files changed, 7848 insertions(+), 58637 deletions(-)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfadd-rv32.ll => vfadd.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfclass-rv32.ll => vfclass.ll} (91%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfcvt-f-x-rv32.ll => vfcvt-f-x.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfcvt-f-xu-rv32.ll => vfcvt-f-xu.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfcvt-rtz-x-f-rv64.ll => vfcvt-rtz-x-f.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfcvt-rtz-xu-f-rv32.ll => vfcvt-rtz-xu-f.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfcvt-x-f-rv32.ll => vfcvt-x-f.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfcvt-xu-f-rv32.ll => vfcvt-xu-f.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfdiv-rv32.ll => vfdiv.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfmacc-rv64.ll => vfmacc.ll} (89%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfmadd-rv32.ll => vfmadd.ll} (89%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfmax-rv64.ll => vfmax.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfmerge-rv64.ll => vfmerge.ll} (88%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfmin-rv64.ll => vfmin.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfmsac-rv32.ll => vfmsac.ll} (89%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfmsub-rv32.ll => vfmsub.ll} (89%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfmul-rv32.ll => vfmul.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfmv.s.f-rv32.ll => vfmv.s.f.ll} (74%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfmv.v.f-rv32.ll => vfmv.v.f.ll} (82%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-f-f-rv32.ll => vfncvt-f-f.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-f-x-rv32.ll => vfncvt-f-x.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-f-xu-rv32.ll => vfncvt-f-xu.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-rod-f-f-rv32.ll => vfncvt-rod-f-f.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-rtz-x-f-rv32.ll => vfncvt-rtz-x-f.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-rtz-xu-f-rv32.ll => vfncvt-rtz-xu-f.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-x-f-rv32.ll => vfncvt-x-f.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfncvt-xu-f-rv32.ll => vfncvt-xu-f.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfnmacc-rv32.ll => vfnmacc.ll} (90%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfnmadd-rv32.ll => vfnmadd.ll} (90%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfnmsac-rv32.ll => vfnmsac.ll} (90%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfnmsub-rv64.ll => vfnmsub.ll} (90%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfrdiv-rv32.ll => vfrdiv.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfrec7-rv32.ll => vfrec7.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfredmax-rv32.ll => vfredmax.ll} (88%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfredmin-rv32.ll => vfredmin.ll} (88%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfredosum-rv32.ll => vfredosum.ll} (88%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfredusum-rv32.ll => vfredusum.ll} (88%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfrsqrt7-rv32.ll => vfrsqrt7.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfrsub-rv32.ll => vfrsub.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfsgnj-rv32.ll => vfsgnj.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfsgnjn-rv32.ll => vfsgnjn.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfsgnjx-rv32.ll => vfsgnjx.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfslide1down-rv32.ll => vfslide1down.ll} (85%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfslide1up-rv32.ll => vfslide1up.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfsqrt-rv64.ll => vfsqrt.ll} (80%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfsub-rv32.ll => vfsub.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwadd-rv32.ll => vfwadd.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwadd.w-rv32.ll => vfwadd.w.ll} (88%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-f-f-rv32.ll => vfwcvt-f-f.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-f-x-rv32.ll => vfwcvt-f-x.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-f-xu-rv32.ll => vfwcvt-f-xu.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-rtz-x-f-rv32.ll => vfwcvt-rtz-x-f.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-rtz-xu-f-rv64.ll => vfwcvt-rtz-xu-f.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-x-f-rv32.ll => vfwcvt-x-f.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwcvt-xu-f-rv32.ll => vfwcvt-xu-f.ll} (87%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwmacc-rv32.ll => vfwmacc.ll} (89%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwmsac-rv32.ll => vfwmsac.ll} (89%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwmul-rv64.ll => vfwmul.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwnmacc-rv32.ll => vfwnmacc.ll} (89%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwnmsac-rv32.ll => vfwnmsac.ll} (89%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwredosum-rv32.ll => vfwredosum.ll} (88%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwredusum-rv32.ll => vfwredusum.ll} (88%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwsub-rv32.ll => vfwsub.ll} (86%)
 delete mode 100644 llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll
 rename llvm/test/CodeGen/RISCV/rvv/{vfwsub.w-rv64.ll => vfwsub.w.ll} (88%)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll
deleted file mode 100644
index b91d86befbc87..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv64.ll
+++ /dev/null
@@ -1,1356 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \
-; RUN:   -mattr=+d -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfadd.mask.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfadd_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfadd.mask.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfadd.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfadd_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfadd.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfadd.mask.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfadd_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfadd.mask.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfadd.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfadd_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfadd.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfadd.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfadd_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfadd.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfadd.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfadd_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfadd.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfadd.mask.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfadd_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfadd.mask.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfadd.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfadd_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfadd.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfadd.mask.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfadd_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfadd.mask.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfadd.nxv32f16.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfadd_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv32f16_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfadd.nxv32f16.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfadd.mask.nxv32f16.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfadd_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv32f16_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re16.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfadd.mask.nxv32f16.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfadd_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfadd.mask.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfadd_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.mask.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfadd_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfadd.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfadd_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfadd.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfadd_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfadd.mask.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfadd_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfadd.mask.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfadd.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfadd_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfadd.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfadd.mask.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfadd_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfadd.mask.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfadd.nxv16f32.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfadd_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv16f32_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfadd.nxv16f32.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfadd.mask.nxv16f32.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfadd_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv16f32_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re32.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfadd.mask.nxv16f32.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfadd_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfadd.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfadd_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfadd.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfadd.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfadd_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfadd.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfadd.mask.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfadd_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfadd.mask.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfadd_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfadd.mask.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfadd_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfadd.mask.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfadd.nxv8f64.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfadd_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vv_nxv8f64_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfadd.nxv8f64.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfadd.mask.nxv8f64.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfadd_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8f64_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re64.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfadd.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfadd.mask.nxv8f64.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfadd.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfadd.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfadd.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfadd.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfadd.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfadd.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfadd.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfadd.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfadd.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfadd.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfadd.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfadd.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfadd.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfadd.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfadd.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfadd.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfadd.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfadd.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfadd.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfadd.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfadd.mask.nxv32f16.f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfadd.mask.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    half %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfadd.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfadd.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfadd.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfadd.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfadd.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfadd.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfadd.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfadd.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfadd.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfadd.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfadd.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfadd.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfadd.mask.nxv16f32.f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfadd.mask.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    float %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfadd.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfadd.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    double %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfadd.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfadd.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfadd.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfadd.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    double %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfadd.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfadd.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    double %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfadd.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfadd.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfadd.mask.nxv8f64.f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfadd.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfadd.mask.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    double %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfadd.ll
index 5df1881bffb23..041580b2b49d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfadd.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfadd.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfadd_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfadd_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfadd.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfadd_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfadd_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfadd.nxv2f16.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfadd.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfadd_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfadd_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfadd.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfadd_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfadd_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfadd.nxv4f16.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfadd.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfadd_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfadd_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfadd.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfadd_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfadd_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfadd.nxv8f16.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfadd.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfadd_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfadd_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfadd.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfadd_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfadd_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfadd.nxv16f16.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfadd.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfadd_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfadd_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -221,7 +223,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -229,9 +231,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfadd.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfadd_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfadd_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -241,7 +243,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfadd.nxv32f16.nxv32f16(
     <vscale x 32 x half> %0,
     <vscale x 32 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -251,10 +253,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfadd.mask.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfadd_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfadd_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re16.v v24, (a0)
@@ -267,7 +269,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 32 x half> %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -275,9 +277,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfadd_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfadd_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -287,7 +289,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -297,10 +299,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfadd.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfadd_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfadd_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -312,7 +314,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -320,9 +322,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfadd_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfadd_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -332,7 +334,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -342,10 +344,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfadd.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfadd_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfadd_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -357,7 +359,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -365,9 +367,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfadd_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfadd_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -377,7 +379,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -387,10 +389,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfadd.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfadd_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfadd_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -402,7 +404,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -410,9 +412,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfadd.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfadd_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfadd_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -422,7 +424,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfadd.nxv8f32.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -432,10 +434,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfadd.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfadd_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfadd_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -447,7 +449,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -455,9 +457,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfadd.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfadd_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfadd_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -467,7 +469,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfadd.nxv16f32.nxv16f32(
     <vscale x 16 x float> %0,
     <vscale x 16 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -477,10 +479,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfadd.mask.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfadd_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfadd_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re32.v v24, (a0)
@@ -493,7 +495,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 16 x float> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -501,9 +503,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfadd_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfadd_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -513,7 +515,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -523,10 +525,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfadd.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfadd_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfadd_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -538,7 +540,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -546,9 +548,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfadd.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfadd_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfadd_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -558,7 +560,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfadd.nxv2f64.nxv2f64(
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -568,10 +570,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfadd.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfadd_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfadd_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -583,7 +585,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -591,9 +593,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfadd_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfadd_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -603,7 +605,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.nxv4f64(
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -613,10 +615,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfadd.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfadd_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfadd_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -628,7 +630,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -636,9 +638,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfadd.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfadd_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfadd_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -648,7 +650,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfadd.nxv8f64.nxv8f64(
     <vscale x 8 x double> %0,
     <vscale x 8 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -658,10 +660,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfadd.mask.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfadd_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfadd_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re64.v v24, (a0)
@@ -674,7 +676,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 8 x double> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -682,9 +684,9 @@ entry:
 declare <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -694,7 +696,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfadd.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -704,10 +706,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfadd.mask.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -719,7 +721,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -727,9 +729,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfadd.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -739,7 +741,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfadd.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -749,10 +751,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfadd.mask.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -764,7 +766,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -772,9 +774,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfadd.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -784,7 +786,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfadd.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -794,10 +796,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfadd.mask.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -809,7 +811,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -817,9 +819,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfadd.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -829,7 +831,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfadd.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -839,10 +841,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfadd.mask.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -854,7 +856,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -862,9 +864,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfadd.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -874,7 +876,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfadd.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -884,10 +886,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfadd.mask.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -899,7 +901,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -907,9 +909,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfadd.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -919,7 +921,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfadd.nxv32f16.f16(
     <vscale x 32 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -929,10 +931,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfadd.mask.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -944,7 +946,7 @@ entry:
     <vscale x 32 x half> %1,
     half %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -952,9 +954,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -964,7 +966,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfadd.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -974,10 +976,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfadd.mask.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -989,7 +991,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -997,9 +999,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1009,7 +1011,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfadd.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -1019,10 +1021,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfadd.mask.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1034,7 +1036,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -1042,9 +1044,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1054,7 +1056,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfadd.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -1064,10 +1066,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfadd.mask.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1079,7 +1081,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -1087,9 +1089,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfadd.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1099,7 +1101,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfadd.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -1109,10 +1111,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfadd.mask.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1124,7 +1126,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -1132,9 +1134,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfadd.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1144,7 +1146,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfadd.nxv16f32.f32(
     <vscale x 16 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -1154,10 +1156,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfadd.mask.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1169,7 +1171,7 @@ entry:
     <vscale x 16 x float> %1,
     float %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -1177,9 +1179,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1189,7 +1191,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.f64(
     <vscale x 1 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -1199,10 +1201,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfadd.mask.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1214,7 +1216,7 @@ entry:
     <vscale x 1 x double> %1,
     double %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -1222,9 +1224,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfadd.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1234,7 +1236,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfadd.nxv2f64.f64(
     <vscale x 2 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -1244,10 +1246,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfadd.mask.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1259,7 +1261,7 @@ entry:
     <vscale x 2 x double> %1,
     double %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -1267,9 +1269,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1279,7 +1281,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfadd.nxv4f64.f64(
     <vscale x 4 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -1289,10 +1291,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfadd.mask.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1304,7 +1306,7 @@ entry:
     <vscale x 4 x double> %1,
     double %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -1312,9 +1314,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfadd.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1324,7 +1326,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfadd.nxv8f64.f64(
     <vscale x 8 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -1334,10 +1336,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfadd.mask.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfadd_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1349,7 +1351,7 @@ entry:
     <vscale x 8 x double> %1,
     double %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll
deleted file mode 100644
index c86e1f334712a..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv64.ll
+++ /dev/null
@@ -1,692 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfclass_v_nxv1i16_nxv1f16(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv1i16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 1 x half> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vfclass.mask.nxv1i16(
-  <vscale x 1 x i16>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfclass_mask_v_nxv1i16_nxv1f16(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 1 x i16> %0,
-  <vscale x 1 x half> %1,
-  <vscale x 1 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfclass.mask.nxv1i16(
-    <vscale x 1 x i16> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfclass.nxv2i16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfclass_v_nxv2i16_nxv2f16(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv2i16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 2 x half> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfclass.nxv2i16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfclass.mask.nxv2i16(
-  <vscale x 2 x i16>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfclass_mask_v_nxv2i16_nxv2f16(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 2 x i16> %0,
-  <vscale x 2 x half> %1,
-  <vscale x 2 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfclass.mask.nxv2i16(
-    <vscale x 2 x i16> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfclass.nxv4i16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfclass_v_nxv4i16_nxv4f16(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv4i16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 4 x half> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfclass.nxv4i16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfclass.mask.nxv4i16(
-  <vscale x 4 x i16>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfclass_mask_v_nxv4i16_nxv4f16(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 4 x i16> %0,
-  <vscale x 4 x half> %1,
-  <vscale x 4 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfclass.mask.nxv4i16(
-    <vscale x 4 x i16> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfclass.nxv8i16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfclass_v_nxv8i16_nxv8f16(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv8i16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 8 x half> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfclass.nxv8i16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfclass.mask.nxv8i16(
-  <vscale x 8 x i16>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfclass_mask_v_nxv8i16_nxv8f16(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 8 x i16> %0,
-  <vscale x 8 x half> %1,
-  <vscale x 8 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfclass.mask.nxv8i16(
-    <vscale x 8 x i16> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfclass.nxv16i16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfclass_v_nxv16i16_nxv16f16(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv16i16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 16 x half> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfclass.nxv16i16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfclass.mask.nxv16i16(
-  <vscale x 16 x i16>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfclass_mask_v_nxv16i16_nxv16f16(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv16i16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 16 x i16> %0,
-  <vscale x 16 x half> %1,
-  <vscale x 16 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfclass.mask.nxv16i16(
-    <vscale x 16 x i16> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.vfclass.nxv32i16(
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x i16> @intrinsic_vfclass_v_nxv32i16_nxv32f16(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv32i16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 32 x half> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vfclass.nxv32i16(
-    <vscale x 32 x half> %0,
-    i64 %1)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.vfclass.mask.nxv32i16(
-  <vscale x 32 x i16>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 32 x i16> @intrinsic_vfclass_mask_v_nxv32i16_nxv32f16(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv32i16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 32 x i16> %0,
-  <vscale x 32 x half> %1,
-  <vscale x 32 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vfclass.mask.nxv32i16(
-    <vscale x 32 x i16> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfclass.nxv1i32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfclass_v_nxv1i32_nxv1f32(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv1i32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 1 x float> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfclass.nxv1i32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfclass.mask.nxv1i32(
-  <vscale x 1 x i32>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfclass_mask_v_nxv1i32_nxv1f32(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 1 x i32> %0,
-  <vscale x 1 x float> %1,
-  <vscale x 1 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfclass.mask.nxv1i32(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfclass_v_nxv2i32_nxv2f32(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv2i32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 2 x float> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfclass.mask.nxv2i32(
-  <vscale x 2 x i32>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfclass_mask_v_nxv2i32_nxv2f32(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 2 x i32> %0,
-  <vscale x 2 x float> %1,
-  <vscale x 2 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfclass.mask.nxv2i32(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfclass.nxv4i32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfclass_v_nxv4i32_nxv4f32(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv4i32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 4 x float> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfclass.nxv4i32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfclass.mask.nxv4i32(
-  <vscale x 4 x i32>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfclass_mask_v_nxv4i32_nxv4f32(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 4 x i32> %0,
-  <vscale x 4 x float> %1,
-  <vscale x 4 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfclass.mask.nxv4i32(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfclass.nxv8i32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfclass_v_nxv8i32_nxv8f32(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv8i32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 8 x float> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfclass.nxv8i32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfclass.mask.nxv8i32(
-  <vscale x 8 x i32>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfclass_mask_v_nxv8i32_nxv8f32(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 8 x i32> %0,
-  <vscale x 8 x float> %1,
-  <vscale x 8 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfclass.mask.nxv8i32(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfclass.nxv16i32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfclass_v_nxv16i32_nxv16f32(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv16i32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 16 x float> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfclass.nxv16i32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfclass.mask.nxv16i32(
-  <vscale x 16 x i32>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfclass_mask_v_nxv16i32_nxv16f32(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv16i32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 16 x i32> %0,
-  <vscale x 16 x float> %1,
-  <vscale x 16 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfclass.mask.nxv16i32(
-    <vscale x 16 x i32> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfclass.nxv1i64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfclass_v_nxv1i64_nxv1f64(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv1i64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 1 x double> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfclass.nxv1i64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfclass.mask.nxv1i64(
-  <vscale x 1 x i64>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfclass_mask_v_nxv1i64_nxv1f64(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 1 x i64> %0,
-  <vscale x 1 x double> %1,
-  <vscale x 1 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfclass.mask.nxv1i64(
-    <vscale x 1 x i64> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfclass.nxv2i64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfclass_v_nxv2i64_nxv2f64(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv2i64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 2 x double> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfclass.nxv2i64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfclass.mask.nxv2i64(
-  <vscale x 2 x i64>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfclass_mask_v_nxv2i64_nxv2f64(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 2 x i64> %0,
-  <vscale x 2 x double> %1,
-  <vscale x 2 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfclass.mask.nxv2i64(
-    <vscale x 2 x i64> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfclass.nxv4i64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfclass_v_nxv4i64_nxv4f64(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv4i64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 4 x double> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfclass.nxv4i64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfclass.mask.nxv4i64(
-  <vscale x 4 x i64>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfclass_mask_v_nxv4i64_nxv4f64(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 4 x i64> %0,
-  <vscale x 4 x double> %1,
-  <vscale x 4 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfclass.mask.nxv4i64(
-    <vscale x 4 x i64> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfclass.nxv8i64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfclass_v_nxv8i64_nxv8f64(
-; CHECK-LABEL: intrinsic_vfclass_v_nxv8i64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfclass.v v8, v8
-; CHECK-NEXT:    ret
-  <vscale x 8 x double> %0,
-  i64 %1) nounwind {
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfclass.nxv8i64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfclass.mask.nxv8i64(
-  <vscale x 8 x i64>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfclass_mask_v_nxv8i64_nxv8f64(
-; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
-; CHECK-NEXT:    vfclass.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-  <vscale x 8 x i64> %0,
-  <vscale x 8 x double> %1,
-  <vscale x 8 x i1> %2,
-  i64 %3) nounwind {
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfclass.mask.nxv8i64(
-    <vscale x 8 x i64> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3)
-
-  ret <vscale x 8 x i64> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfclass.ll
similarity index 91%
rename from llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfclass.ll
index ae9df2aefa4d9..e6aa398310956 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfclass-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfclass.ll
@@ -1,9 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
 define <vscale x 1 x i16> @intrinsic_vfclass_v_nxv1i16_nxv1f16(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv1i16_nxv1f16:
@@ -12,11 +14,11 @@ define <vscale x 1 x i16> @intrinsic_vfclass_v_nxv1i16_nxv1f16(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 1 x half> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfclass.nxv1i16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i16> %a
 }
@@ -25,7 +27,7 @@ declare <vscale x 1 x i16> @llvm.riscv.vfclass.mask.nxv1i16(
   <vscale x 1 x i16>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 1 x i16> @intrinsic_vfclass_mask_v_nxv1i16_nxv1f16(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i16_nxv1f16:
@@ -36,20 +38,20 @@ define <vscale x 1 x i16> @intrinsic_vfclass_mask_v_nxv1i16_nxv1f16(
   <vscale x 1 x i16> %0,
   <vscale x 1 x half> %1,
   <vscale x 1 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfclass.mask.nxv1i16(
     <vscale x 1 x i16> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x i16> %a
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vfclass.nxv2i16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
 define <vscale x 2 x i16> @intrinsic_vfclass_v_nxv2i16_nxv2f16(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv2i16_nxv2f16:
@@ -58,11 +60,11 @@ define <vscale x 2 x i16> @intrinsic_vfclass_v_nxv2i16_nxv2f16(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 2 x half> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfclass.nxv2i16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i16> %a
 }
@@ -71,7 +73,7 @@ declare <vscale x 2 x i16> @llvm.riscv.vfclass.mask.nxv2i16(
   <vscale x 2 x i16>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 2 x i16> @intrinsic_vfclass_mask_v_nxv2i16_nxv2f16(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i16_nxv2f16:
@@ -82,20 +84,20 @@ define <vscale x 2 x i16> @intrinsic_vfclass_mask_v_nxv2i16_nxv2f16(
   <vscale x 2 x i16> %0,
   <vscale x 2 x half> %1,
   <vscale x 2 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfclass.mask.nxv2i16(
     <vscale x 2 x i16> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x i16> %a
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vfclass.nxv4i16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
 define <vscale x 4 x i16> @intrinsic_vfclass_v_nxv4i16_nxv4f16(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv4i16_nxv4f16:
@@ -104,11 +106,11 @@ define <vscale x 4 x i16> @intrinsic_vfclass_v_nxv4i16_nxv4f16(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 4 x half> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfclass.nxv4i16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i16> %a
 }
@@ -117,7 +119,7 @@ declare <vscale x 4 x i16> @llvm.riscv.vfclass.mask.nxv4i16(
   <vscale x 4 x i16>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 4 x i16> @intrinsic_vfclass_mask_v_nxv4i16_nxv4f16(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i16_nxv4f16:
@@ -128,20 +130,20 @@ define <vscale x 4 x i16> @intrinsic_vfclass_mask_v_nxv4i16_nxv4f16(
   <vscale x 4 x i16> %0,
   <vscale x 4 x half> %1,
   <vscale x 4 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfclass.mask.nxv4i16(
     <vscale x 4 x i16> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x i16> %a
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vfclass.nxv8i16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
 define <vscale x 8 x i16> @intrinsic_vfclass_v_nxv8i16_nxv8f16(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv8i16_nxv8f16:
@@ -150,11 +152,11 @@ define <vscale x 8 x i16> @intrinsic_vfclass_v_nxv8i16_nxv8f16(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 8 x half> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfclass.nxv8i16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i16> %a
 }
@@ -163,7 +165,7 @@ declare <vscale x 8 x i16> @llvm.riscv.vfclass.mask.nxv8i16(
   <vscale x 8 x i16>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 8 x i16> @intrinsic_vfclass_mask_v_nxv8i16_nxv8f16(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i16_nxv8f16:
@@ -174,20 +176,20 @@ define <vscale x 8 x i16> @intrinsic_vfclass_mask_v_nxv8i16_nxv8f16(
   <vscale x 8 x i16> %0,
   <vscale x 8 x half> %1,
   <vscale x 8 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfclass.mask.nxv8i16(
     <vscale x 8 x i16> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x i16> %a
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vfclass.nxv16i16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
 define <vscale x 16 x i16> @intrinsic_vfclass_v_nxv16i16_nxv16f16(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv16i16_nxv16f16:
@@ -196,11 +198,11 @@ define <vscale x 16 x i16> @intrinsic_vfclass_v_nxv16i16_nxv16f16(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 16 x half> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfclass.nxv16i16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i16> %a
 }
@@ -209,7 +211,7 @@ declare <vscale x 16 x i16> @llvm.riscv.vfclass.mask.nxv16i16(
   <vscale x 16 x i16>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 16 x i16> @intrinsic_vfclass_mask_v_nxv16i16_nxv16f16(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv16i16_nxv16f16:
@@ -220,20 +222,20 @@ define <vscale x 16 x i16> @intrinsic_vfclass_mask_v_nxv16i16_nxv16f16(
   <vscale x 16 x i16> %0,
   <vscale x 16 x half> %1,
   <vscale x 16 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfclass.mask.nxv16i16(
     <vscale x 16 x i16> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x i16> %a
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vfclass.nxv32i16(
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
 define <vscale x 32 x i16> @intrinsic_vfclass_v_nxv32i16_nxv32f16(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv32i16_nxv32f16:
@@ -242,11 +244,11 @@ define <vscale x 32 x i16> @intrinsic_vfclass_v_nxv32i16_nxv32f16(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 32 x half> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vfclass.nxv32i16(
     <vscale x 32 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x i16> %a
 }
@@ -255,7 +257,7 @@ declare <vscale x 32 x i16> @llvm.riscv.vfclass.mask.nxv32i16(
   <vscale x 32 x i16>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 32 x i16> @intrinsic_vfclass_mask_v_nxv32i16_nxv32f16(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv32i16_nxv32f16:
@@ -266,20 +268,20 @@ define <vscale x 32 x i16> @intrinsic_vfclass_mask_v_nxv32i16_nxv32f16(
   <vscale x 32 x i16> %0,
   <vscale x 32 x half> %1,
   <vscale x 32 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vfclass.mask.nxv32i16(
     <vscale x 32 x i16> %0,
     <vscale x 32 x half> %1,
     <vscale x 32 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 32 x i16> %a
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vfclass.nxv1i32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
 define <vscale x 1 x i32> @intrinsic_vfclass_v_nxv1i32_nxv1f32(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv1i32_nxv1f32:
@@ -288,11 +290,11 @@ define <vscale x 1 x i32> @intrinsic_vfclass_v_nxv1i32_nxv1f32(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 1 x float> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfclass.nxv1i32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -301,7 +303,7 @@ declare <vscale x 1 x i32> @llvm.riscv.vfclass.mask.nxv1i32(
   <vscale x 1 x i32>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 1 x i32> @intrinsic_vfclass_mask_v_nxv1i32_nxv1f32(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i32_nxv1f32:
@@ -312,20 +314,20 @@ define <vscale x 1 x i32> @intrinsic_vfclass_mask_v_nxv1i32_nxv1f32(
   <vscale x 1 x i32> %0,
   <vscale x 1 x float> %1,
   <vscale x 1 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfclass.mask.nxv1i32(
     <vscale x 1 x i32> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
 define <vscale x 2 x i32> @intrinsic_vfclass_v_nxv2i32_nxv2f32(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv2i32_nxv2f32:
@@ -334,11 +336,11 @@ define <vscale x 2 x i32> @intrinsic_vfclass_v_nxv2i32_nxv2f32(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 2 x float> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfclass.nxv2i32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -347,7 +349,7 @@ declare <vscale x 2 x i32> @llvm.riscv.vfclass.mask.nxv2i32(
   <vscale x 2 x i32>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 2 x i32> @intrinsic_vfclass_mask_v_nxv2i32_nxv2f32(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i32_nxv2f32:
@@ -358,20 +360,20 @@ define <vscale x 2 x i32> @intrinsic_vfclass_mask_v_nxv2i32_nxv2f32(
   <vscale x 2 x i32> %0,
   <vscale x 2 x float> %1,
   <vscale x 2 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfclass.mask.nxv2i32(
     <vscale x 2 x i32> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vfclass.nxv4i32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
 define <vscale x 4 x i32> @intrinsic_vfclass_v_nxv4i32_nxv4f32(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv4i32_nxv4f32:
@@ -380,11 +382,11 @@ define <vscale x 4 x i32> @intrinsic_vfclass_v_nxv4i32_nxv4f32(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 4 x float> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfclass.nxv4i32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -393,7 +395,7 @@ declare <vscale x 4 x i32> @llvm.riscv.vfclass.mask.nxv4i32(
   <vscale x 4 x i32>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 4 x i32> @intrinsic_vfclass_mask_v_nxv4i32_nxv4f32(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i32_nxv4f32:
@@ -404,20 +406,20 @@ define <vscale x 4 x i32> @intrinsic_vfclass_mask_v_nxv4i32_nxv4f32(
   <vscale x 4 x i32> %0,
   <vscale x 4 x float> %1,
   <vscale x 4 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfclass.mask.nxv4i32(
     <vscale x 4 x i32> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vfclass.nxv8i32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
 define <vscale x 8 x i32> @intrinsic_vfclass_v_nxv8i32_nxv8f32(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv8i32_nxv8f32:
@@ -426,11 +428,11 @@ define <vscale x 8 x i32> @intrinsic_vfclass_v_nxv8i32_nxv8f32(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 8 x float> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfclass.nxv8i32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -439,7 +441,7 @@ declare <vscale x 8 x i32> @llvm.riscv.vfclass.mask.nxv8i32(
   <vscale x 8 x i32>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 8 x i32> @intrinsic_vfclass_mask_v_nxv8i32_nxv8f32(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i32_nxv8f32:
@@ -450,20 +452,20 @@ define <vscale x 8 x i32> @intrinsic_vfclass_mask_v_nxv8i32_nxv8f32(
   <vscale x 8 x i32> %0,
   <vscale x 8 x float> %1,
   <vscale x 8 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfclass.mask.nxv8i32(
     <vscale x 8 x i32> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x i32> %a
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vfclass.nxv16i32(
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
 define <vscale x 16 x i32> @intrinsic_vfclass_v_nxv16i32_nxv16f32(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv16i32_nxv16f32:
@@ -472,11 +474,11 @@ define <vscale x 16 x i32> @intrinsic_vfclass_v_nxv16i32_nxv16f32(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 16 x float> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vfclass.nxv16i32(
     <vscale x 16 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i32> %a
 }
@@ -485,7 +487,7 @@ declare <vscale x 16 x i32> @llvm.riscv.vfclass.mask.nxv16i32(
   <vscale x 16 x i32>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 16 x i32> @intrinsic_vfclass_mask_v_nxv16i32_nxv16f32(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv16i32_nxv16f32:
@@ -496,20 +498,20 @@ define <vscale x 16 x i32> @intrinsic_vfclass_mask_v_nxv16i32_nxv16f32(
   <vscale x 16 x i32> %0,
   <vscale x 16 x float> %1,
   <vscale x 16 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vfclass.mask.nxv16i32(
     <vscale x 16 x i32> %0,
     <vscale x 16 x float> %1,
     <vscale x 16 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x i32> %a
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vfclass.nxv1i64(
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
 define <vscale x 1 x i64> @intrinsic_vfclass_v_nxv1i64_nxv1f64(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv1i64_nxv1f64:
@@ -518,11 +520,11 @@ define <vscale x 1 x i64> @intrinsic_vfclass_v_nxv1i64_nxv1f64(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 1 x double> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vfclass.nxv1i64(
     <vscale x 1 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i64> %a
 }
@@ -531,7 +533,7 @@ declare <vscale x 1 x i64> @llvm.riscv.vfclass.mask.nxv1i64(
   <vscale x 1 x i64>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 1 x i64> @intrinsic_vfclass_mask_v_nxv1i64_nxv1f64(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv1i64_nxv1f64:
@@ -542,20 +544,20 @@ define <vscale x 1 x i64> @intrinsic_vfclass_mask_v_nxv1i64_nxv1f64(
   <vscale x 1 x i64> %0,
   <vscale x 1 x double> %1,
   <vscale x 1 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vfclass.mask.nxv1i64(
     <vscale x 1 x i64> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x i64> %a
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vfclass.nxv2i64(
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
 define <vscale x 2 x i64> @intrinsic_vfclass_v_nxv2i64_nxv2f64(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv2i64_nxv2f64:
@@ -564,11 +566,11 @@ define <vscale x 2 x i64> @intrinsic_vfclass_v_nxv2i64_nxv2f64(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 2 x double> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vfclass.nxv2i64(
     <vscale x 2 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i64> %a
 }
@@ -577,7 +579,7 @@ declare <vscale x 2 x i64> @llvm.riscv.vfclass.mask.nxv2i64(
   <vscale x 2 x i64>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 2 x i64> @intrinsic_vfclass_mask_v_nxv2i64_nxv2f64(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv2i64_nxv2f64:
@@ -588,20 +590,20 @@ define <vscale x 2 x i64> @intrinsic_vfclass_mask_v_nxv2i64_nxv2f64(
   <vscale x 2 x i64> %0,
   <vscale x 2 x double> %1,
   <vscale x 2 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vfclass.mask.nxv2i64(
     <vscale x 2 x i64> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x i64> %a
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vfclass.nxv4i64(
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
 define <vscale x 4 x i64> @intrinsic_vfclass_v_nxv4i64_nxv4f64(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv4i64_nxv4f64:
@@ -610,11 +612,11 @@ define <vscale x 4 x i64> @intrinsic_vfclass_v_nxv4i64_nxv4f64(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 4 x double> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vfclass.nxv4i64(
     <vscale x 4 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i64> %a
 }
@@ -623,7 +625,7 @@ declare <vscale x 4 x i64> @llvm.riscv.vfclass.mask.nxv4i64(
   <vscale x 4 x i64>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 4 x i64> @intrinsic_vfclass_mask_v_nxv4i64_nxv4f64(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv4i64_nxv4f64:
@@ -634,20 +636,20 @@ define <vscale x 4 x i64> @intrinsic_vfclass_mask_v_nxv4i64_nxv4f64(
   <vscale x 4 x i64> %0,
   <vscale x 4 x double> %1,
   <vscale x 4 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vfclass.mask.nxv4i64(
     <vscale x 4 x i64> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x i64> %a
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vfclass.nxv8i64(
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
 define <vscale x 8 x i64> @intrinsic_vfclass_v_nxv8i64_nxv8f64(
 ; CHECK-LABEL: intrinsic_vfclass_v_nxv8i64_nxv8f64:
@@ -656,11 +658,11 @@ define <vscale x 8 x i64> @intrinsic_vfclass_v_nxv8i64_nxv8f64(
 ; CHECK-NEXT:    vfclass.v v8, v8
 ; CHECK-NEXT:    ret
   <vscale x 8 x double> %0,
-  i32 %1) nounwind {
+  iXLen %1) nounwind {
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vfclass.nxv8i64(
     <vscale x 8 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i64> %a
 }
@@ -669,7 +671,7 @@ declare <vscale x 8 x i64> @llvm.riscv.vfclass.mask.nxv8i64(
   <vscale x 8 x i64>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
 define <vscale x 8 x i64> @intrinsic_vfclass_mask_v_nxv8i64_nxv8f64(
 ; CHECK-LABEL: intrinsic_vfclass_mask_v_nxv8i64_nxv8f64:
@@ -680,13 +682,13 @@ define <vscale x 8 x i64> @intrinsic_vfclass_mask_v_nxv8i64_nxv8f64(
   <vscale x 8 x i64> %0,
   <vscale x 8 x double> %1,
   <vscale x 8 x i1> %2,
-  i32 %3) nounwind {
+  iXLen %3) nounwind {
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vfclass.mask.nxv8i64(
     <vscale x 8 x i64> %0,
     <vscale x 8 x double> %1,
     <vscale x 8 x i1> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x i64> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll
deleted file mode 100644
index 65270dc06336d..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv64.ll
+++ /dev/null
@@ -1,617 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfcvt.f.x.v.nxv1f16.nxv1i16(
-  <vscale x 1 x i16>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16(<vscale x 1 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfcvt.f.x.v.nxv1f16.nxv1i16(
-    <vscale x 1 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv1f16.nxv1i16(
-  <vscale x 1 x half>,
-  <vscale x 1 x i16>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfcvt_mask_f.x.v_nxv1f16_nxv1i16(<vscale x 1 x half> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv1f16_nxv1i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv1f16.nxv1i16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x i16> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfcvt.f.x.v.nxv2f16.nxv2i16(
-  <vscale x 2 x i16>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfcvt_f.x.v_nxv2f16_nxv2i16(<vscale x 2 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv2f16_nxv2i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfcvt.f.x.v.nxv2f16.nxv2i16(
-    <vscale x 2 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv2f16.nxv2i16(
-  <vscale x 2 x half>,
-  <vscale x 2 x i16>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfcvt_mask_f.x.v_nxv2f16_nxv2i16(<vscale x 2 x half> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv2f16_nxv2i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv2f16.nxv2i16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x i16> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfcvt.f.x.v.nxv4f16.nxv4i16(
-  <vscale x 4 x i16>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfcvt_f.x.v_nxv4f16_nxv4i16(<vscale x 4 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv4f16_nxv4i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfcvt.f.x.v.nxv4f16.nxv4i16(
-    <vscale x 4 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv4f16.nxv4i16(
-  <vscale x 4 x half>,
-  <vscale x 4 x i16>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfcvt_mask_f.x.v_nxv4f16_nxv4i16(<vscale x 4 x half> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv4f16_nxv4i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv4f16.nxv4i16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x i16> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfcvt.f.x.v.nxv8f16.nxv8i16(
-  <vscale x 8 x i16>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfcvt_f.x.v_nxv8f16_nxv8i16(<vscale x 8 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv8f16_nxv8i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfcvt.f.x.v.nxv8f16.nxv8i16(
-    <vscale x 8 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv8f16.nxv8i16(
-  <vscale x 8 x half>,
-  <vscale x 8 x i16>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfcvt_mask_f.x.v_nxv8f16_nxv8i16(<vscale x 8 x half> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv8f16_nxv8i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv8f16.nxv8i16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x i16> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfcvt.f.x.v.nxv16f16.nxv16i16(
-  <vscale x 16 x i16>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfcvt_f.x.v_nxv16f16_nxv16i16(<vscale x 16 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv16f16_nxv16i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfcvt.f.x.v.nxv16f16.nxv16i16(
-    <vscale x 16 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv16f16.nxv16i16(
-  <vscale x 16 x half>,
-  <vscale x 16 x i16>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfcvt_mask_f.x.v_nxv16f16_nxv16i16(<vscale x 16 x half> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv16f16_nxv16i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv16f16.nxv16i16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x i16> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfcvt.f.x.v.nxv32f16.nxv32i16(
-  <vscale x 32 x i16>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfcvt_f.x.v_nxv32f16_nxv32i16(<vscale x 32 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv32f16_nxv32i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfcvt.f.x.v.nxv32f16.nxv32i16(
-    <vscale x 32 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv32f16.nxv32i16(
-  <vscale x 32 x half>,
-  <vscale x 32 x i16>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfcvt_mask_f.x.v_nxv32f16_nxv32i16(<vscale x 32 x half> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv32f16_nxv32i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv32f16.nxv32i16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x i16> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfcvt.f.x.v.nxv1f32.nxv1i32(
-  <vscale x 1 x i32>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfcvt_f.x.v_nxv1f32_nxv1i32(<vscale x 1 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f32_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfcvt.f.x.v.nxv1f32.nxv1i32(
-    <vscale x 1 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv1f32.nxv1i32(
-  <vscale x 1 x float>,
-  <vscale x 1 x i32>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfcvt_mask_f.x.v_nxv1f32_nxv1i32(<vscale x 1 x float> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv1f32_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv1f32.nxv1i32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x i32> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfcvt.f.x.v.nxv2f32.nxv2i32(
-  <vscale x 2 x i32>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfcvt_f.x.v_nxv2f32_nxv2i32(<vscale x 2 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv2f32_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfcvt.f.x.v.nxv2f32.nxv2i32(
-    <vscale x 2 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv2f32.nxv2i32(
-  <vscale x 2 x float>,
-  <vscale x 2 x i32>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfcvt_mask_f.x.v_nxv2f32_nxv2i32(<vscale x 2 x float> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv2f32_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv2f32.nxv2i32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x i32> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfcvt.f.x.v.nxv4f32.nxv4i32(
-  <vscale x 4 x i32>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfcvt_f.x.v_nxv4f32_nxv4i32(<vscale x 4 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv4f32_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfcvt.f.x.v.nxv4f32.nxv4i32(
-    <vscale x 4 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv4f32.nxv4i32(
-  <vscale x 4 x float>,
-  <vscale x 4 x i32>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfcvt_mask_f.x.v_nxv4f32_nxv4i32(<vscale x 4 x float> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv4f32_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv4f32.nxv4i32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x i32> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfcvt.f.x.v.nxv8f32.nxv8i32(
-  <vscale x 8 x i32>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfcvt_f.x.v_nxv8f32_nxv8i32(<vscale x 8 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv8f32_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfcvt.f.x.v.nxv8f32.nxv8i32(
-    <vscale x 8 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv8f32.nxv8i32(
-  <vscale x 8 x float>,
-  <vscale x 8 x i32>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfcvt_mask_f.x.v_nxv8f32_nxv8i32(<vscale x 8 x float> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv8f32_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv8f32.nxv8i32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x i32> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfcvt.f.x.v.nxv16f32.nxv16i32(
-  <vscale x 16 x i32>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfcvt_f.x.v_nxv16f32_nxv16i32(<vscale x 16 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv16f32_nxv16i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfcvt.f.x.v.nxv16f32.nxv16i32(
-    <vscale x 16 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv16f32.nxv16i32(
-  <vscale x 16 x float>,
-  <vscale x 16 x i32>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfcvt_mask_f.x.v_nxv16f32_nxv16i32(<vscale x 16 x float> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv16f32_nxv16i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv16f32.nxv16i32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x i32> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfcvt.f.x.v.nxv1f64.nxv1i64(
-  <vscale x 1 x i64>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfcvt_f.x.v_nxv1f64_nxv1i64(<vscale x 1 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f64_nxv1i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfcvt.f.x.v.nxv1f64.nxv1i64(
-    <vscale x 1 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfcvt.f.x.v.mask.nxv1f64.nxv1i64(
-  <vscale x 1 x double>,
-  <vscale x 1 x i64>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfcvt_mask_f.x.v_nxv1f64_nxv1i64(<vscale x 1 x double> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv1f64_nxv1i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfcvt.f.x.v.mask.nxv1f64.nxv1i64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x i64> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfcvt.f.x.v.nxv2f64.nxv2i64(
-  <vscale x 2 x i64>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfcvt_f.x.v_nxv2f64_nxv2i64(<vscale x 2 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv2f64_nxv2i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfcvt.f.x.v.nxv2f64.nxv2i64(
-    <vscale x 2 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfcvt.f.x.v.mask.nxv2f64.nxv2i64(
-  <vscale x 2 x double>,
-  <vscale x 2 x i64>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfcvt_mask_f.x.v_nxv2f64_nxv2i64(<vscale x 2 x double> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv2f64_nxv2i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfcvt.f.x.v.mask.nxv2f64.nxv2i64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x i64> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfcvt.f.x.v.nxv4f64.nxv4i64(
-  <vscale x 4 x i64>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfcvt_f.x.v_nxv4f64_nxv4i64(<vscale x 4 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv4f64_nxv4i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfcvt.f.x.v.nxv4f64.nxv4i64(
-    <vscale x 4 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfcvt.f.x.v.mask.nxv4f64.nxv4i64(
-  <vscale x 4 x double>,
-  <vscale x 4 x i64>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfcvt_mask_f.x.v_nxv4f64_nxv4i64(<vscale x 4 x double> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv4f64_nxv4i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfcvt.f.x.v.mask.nxv4f64.nxv4i64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x i64> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfcvt.f.x.v.nxv8f64.nxv8i64(
-  <vscale x 8 x i64>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfcvt_f.x.v_nxv8f64_nxv8i64(<vscale x 8 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv8f64_nxv8i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfcvt.f.x.v.nxv8f64.nxv8i64(
-    <vscale x 8 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfcvt.f.x.v.mask.nxv8f64.nxv8i64(
-  <vscale x 8 x double>,
-  <vscale x 8 x i64>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfcvt_mask_f.x.v_nxv8f64_nxv8i64(<vscale x 8 x double> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv8f64_nxv8i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfcvt.f.x.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfcvt.f.x.v.mask.nxv8f64.nxv8i64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x i64> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x.ll
index 5549960bb7736..e8d6257f1e652 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-x.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfcvt.f.x.v.nxv1f16.nxv1i16(
   <vscale x 1 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16(<vscale x 1 x i16> %0, i32 %1) nounwind {
+define <vscale x 1 x half> @intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16(<vscale x 1 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -14,7 +16,7 @@ define <vscale x 1 x half> @intrinsic_vfcvt_f.x.v_nxv1f16_nxv1i16(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfcvt.f.x.v.nxv1f16.nxv1i16(
     <vscale x 1 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x half> %a
 }
@@ -23,10 +25,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv1f16.nxv1i16(
   <vscale x 1 x half>,
   <vscale x 1 x i16>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfcvt_mask_f.x.v_nxv1f16_nxv1i16(<vscale x 1 x half> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x half> @intrinsic_vfcvt_mask_f.x.v_nxv1f16_nxv1i16(<vscale x 1 x half> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv1f16_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -37,16 +39,16 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x i16> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vfcvt.f.x.v.nxv2f16.nxv2i16(
   <vscale x 2 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfcvt_f.x.v_nxv2f16_nxv2i16(<vscale x 2 x i16> %0, i32 %1) nounwind {
+define <vscale x 2 x half> @intrinsic_vfcvt_f.x.v_nxv2f16_nxv2i16(<vscale x 2 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv2f16_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -55,7 +57,7 @@ define <vscale x 2 x half> @intrinsic_vfcvt_f.x.v_nxv2f16_nxv2i16(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfcvt.f.x.v.nxv2f16.nxv2i16(
     <vscale x 2 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x half> %a
 }
@@ -64,10 +66,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv2f16.nxv2i16(
   <vscale x 2 x half>,
   <vscale x 2 x i16>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfcvt_mask_f.x.v_nxv2f16_nxv2i16(<vscale x 2 x half> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x half> @intrinsic_vfcvt_mask_f.x.v_nxv2f16_nxv2i16(<vscale x 2 x half> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv2f16_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -78,16 +80,16 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x i16> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vfcvt.f.x.v.nxv4f16.nxv4i16(
   <vscale x 4 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfcvt_f.x.v_nxv4f16_nxv4i16(<vscale x 4 x i16> %0, i32 %1) nounwind {
+define <vscale x 4 x half> @intrinsic_vfcvt_f.x.v_nxv4f16_nxv4i16(<vscale x 4 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv4f16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -96,7 +98,7 @@ define <vscale x 4 x half> @intrinsic_vfcvt_f.x.v_nxv4f16_nxv4i16(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfcvt.f.x.v.nxv4f16.nxv4i16(
     <vscale x 4 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x half> %a
 }
@@ -105,10 +107,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv4f16.nxv4i16(
   <vscale x 4 x half>,
   <vscale x 4 x i16>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfcvt_mask_f.x.v_nxv4f16_nxv4i16(<vscale x 4 x half> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfcvt_mask_f.x.v_nxv4f16_nxv4i16(<vscale x 4 x half> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv4f16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -119,16 +121,16 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x i16> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vfcvt.f.x.v.nxv8f16.nxv8i16(
   <vscale x 8 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfcvt_f.x.v_nxv8f16_nxv8i16(<vscale x 8 x i16> %0, i32 %1) nounwind {
+define <vscale x 8 x half> @intrinsic_vfcvt_f.x.v_nxv8f16_nxv8i16(<vscale x 8 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv8f16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -137,7 +139,7 @@ define <vscale x 8 x half> @intrinsic_vfcvt_f.x.v_nxv8f16_nxv8i16(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfcvt.f.x.v.nxv8f16.nxv8i16(
     <vscale x 8 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x half> %a
 }
@@ -146,10 +148,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv8f16.nxv8i16(
   <vscale x 8 x half>,
   <vscale x 8 x i16>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfcvt_mask_f.x.v_nxv8f16_nxv8i16(<vscale x 8 x half> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x half> @intrinsic_vfcvt_mask_f.x.v_nxv8f16_nxv8i16(<vscale x 8 x half> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv8f16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -160,16 +162,16 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x i16> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vfcvt.f.x.v.nxv16f16.nxv16i16(
   <vscale x 16 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfcvt_f.x.v_nxv16f16_nxv16i16(<vscale x 16 x i16> %0, i32 %1) nounwind {
+define <vscale x 16 x half> @intrinsic_vfcvt_f.x.v_nxv16f16_nxv16i16(<vscale x 16 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv16f16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -178,7 +180,7 @@ define <vscale x 16 x half> @intrinsic_vfcvt_f.x.v_nxv16f16_nxv16i16(<vscale x 1
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfcvt.f.x.v.nxv16f16.nxv16i16(
     <vscale x 16 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x half> %a
 }
@@ -187,10 +189,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv16f16.nxv16i16(
   <vscale x 16 x half>,
   <vscale x 16 x i16>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfcvt_mask_f.x.v_nxv16f16_nxv16i16(<vscale x 16 x half> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x half> @intrinsic_vfcvt_mask_f.x.v_nxv16f16_nxv16i16(<vscale x 16 x half> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv16f16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -201,16 +203,16 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x i16> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vfcvt.f.x.v.nxv32f16.nxv32i16(
   <vscale x 32 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfcvt_f.x.v_nxv32f16_nxv32i16(<vscale x 32 x i16> %0, i32 %1) nounwind {
+define <vscale x 32 x half> @intrinsic_vfcvt_f.x.v_nxv32f16_nxv32i16(<vscale x 32 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv32f16_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -219,7 +221,7 @@ define <vscale x 32 x half> @intrinsic_vfcvt_f.x.v_nxv32f16_nxv32i16(<vscale x 3
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfcvt.f.x.v.nxv32f16.nxv32i16(
     <vscale x 32 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x half> %a
 }
@@ -228,10 +230,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfcvt.f.x.v.mask.nxv32f16.nxv32i16(
   <vscale x 32 x half>,
   <vscale x 32 x i16>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfcvt_mask_f.x.v_nxv32f16_nxv32i16(<vscale x 32 x half> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x half> @intrinsic_vfcvt_mask_f.x.v_nxv32f16_nxv32i16(<vscale x 32 x half> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv32f16_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -242,16 +244,16 @@ entry:
     <vscale x 32 x half> %0,
     <vscale x 32 x i16> %1,
     <vscale x 32 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vfcvt.f.x.v.nxv1f32.nxv1i32(
   <vscale x 1 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfcvt_f.x.v_nxv1f32_nxv1i32(<vscale x 1 x i32> %0, i32 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vfcvt_f.x.v_nxv1f32_nxv1i32(<vscale x 1 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f32_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -260,7 +262,7 @@ define <vscale x 1 x float> @intrinsic_vfcvt_f.x.v_nxv1f32_nxv1i32(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfcvt.f.x.v.nxv1f32.nxv1i32(
     <vscale x 1 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x float> %a
 }
@@ -269,10 +271,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv1f32.nxv1i32(
   <vscale x 1 x float>,
   <vscale x 1 x i32>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfcvt_mask_f.x.v_nxv1f32_nxv1i32(<vscale x 1 x float> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfcvt_mask_f.x.v_nxv1f32_nxv1i32(<vscale x 1 x float> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv1f32_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -283,16 +285,16 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x i32> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vfcvt.f.x.v.nxv2f32.nxv2i32(
   <vscale x 2 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfcvt_f.x.v_nxv2f32_nxv2i32(<vscale x 2 x i32> %0, i32 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vfcvt_f.x.v_nxv2f32_nxv2i32(<vscale x 2 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv2f32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -301,7 +303,7 @@ define <vscale x 2 x float> @intrinsic_vfcvt_f.x.v_nxv2f32_nxv2i32(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfcvt.f.x.v.nxv2f32.nxv2i32(
     <vscale x 2 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x float> %a
 }
@@ -310,10 +312,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv2f32.nxv2i32(
   <vscale x 2 x float>,
   <vscale x 2 x i32>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfcvt_mask_f.x.v_nxv2f32_nxv2i32(<vscale x 2 x float> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfcvt_mask_f.x.v_nxv2f32_nxv2i32(<vscale x 2 x float> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv2f32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -324,16 +326,16 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x i32> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vfcvt.f.x.v.nxv4f32.nxv4i32(
   <vscale x 4 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfcvt_f.x.v_nxv4f32_nxv4i32(<vscale x 4 x i32> %0, i32 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vfcvt_f.x.v_nxv4f32_nxv4i32(<vscale x 4 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv4f32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -342,7 +344,7 @@ define <vscale x 4 x float> @intrinsic_vfcvt_f.x.v_nxv4f32_nxv4i32(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfcvt.f.x.v.nxv4f32.nxv4i32(
     <vscale x 4 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x float> %a
 }
@@ -351,10 +353,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv4f32.nxv4i32(
   <vscale x 4 x float>,
   <vscale x 4 x i32>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfcvt_mask_f.x.v_nxv4f32_nxv4i32(<vscale x 4 x float> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfcvt_mask_f.x.v_nxv4f32_nxv4i32(<vscale x 4 x float> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv4f32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -365,16 +367,16 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x i32> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vfcvt.f.x.v.nxv8f32.nxv8i32(
   <vscale x 8 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfcvt_f.x.v_nxv8f32_nxv8i32(<vscale x 8 x i32> %0, i32 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vfcvt_f.x.v_nxv8f32_nxv8i32(<vscale x 8 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv8f32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -383,7 +385,7 @@ define <vscale x 8 x float> @intrinsic_vfcvt_f.x.v_nxv8f32_nxv8i32(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfcvt.f.x.v.nxv8f32.nxv8i32(
     <vscale x 8 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x float> %a
 }
@@ -392,10 +394,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv8f32.nxv8i32(
   <vscale x 8 x float>,
   <vscale x 8 x i32>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfcvt_mask_f.x.v_nxv8f32_nxv8i32(<vscale x 8 x float> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfcvt_mask_f.x.v_nxv8f32_nxv8i32(<vscale x 8 x float> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv8f32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -406,16 +408,16 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x i32> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vfcvt.f.x.v.nxv16f32.nxv16i32(
   <vscale x 16 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfcvt_f.x.v_nxv16f32_nxv16i32(<vscale x 16 x i32> %0, i32 %1) nounwind {
+define <vscale x 16 x float> @intrinsic_vfcvt_f.x.v_nxv16f32_nxv16i32(<vscale x 16 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv16f32_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -424,7 +426,7 @@ define <vscale x 16 x float> @intrinsic_vfcvt_f.x.v_nxv16f32_nxv16i32(<vscale x
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfcvt.f.x.v.nxv16f32.nxv16i32(
     <vscale x 16 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x float> %a
 }
@@ -433,10 +435,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfcvt.f.x.v.mask.nxv16f32.nxv16i32(
   <vscale x 16 x float>,
   <vscale x 16 x i32>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfcvt_mask_f.x.v_nxv16f32_nxv16i32(<vscale x 16 x float> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vfcvt_mask_f.x.v_nxv16f32_nxv16i32(<vscale x 16 x float> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv16f32_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -447,16 +449,16 @@ entry:
     <vscale x 16 x float> %0,
     <vscale x 16 x i32> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vfcvt.f.x.v.nxv1f64.nxv1i64(
   <vscale x 1 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfcvt_f.x.v_nxv1f64_nxv1i64(<vscale x 1 x i64> %0, i32 %1) nounwind {
+define <vscale x 1 x double> @intrinsic_vfcvt_f.x.v_nxv1f64_nxv1i64(<vscale x 1 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv1f64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -465,7 +467,7 @@ define <vscale x 1 x double> @intrinsic_vfcvt_f.x.v_nxv1f64_nxv1i64(<vscale x 1
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfcvt.f.x.v.nxv1f64.nxv1i64(
     <vscale x 1 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x double> %a
 }
@@ -474,10 +476,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfcvt.f.x.v.mask.nxv1f64.nxv1i64(
   <vscale x 1 x double>,
   <vscale x 1 x i64>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfcvt_mask_f.x.v_nxv1f64_nxv1i64(<vscale x 1 x double> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfcvt_mask_f.x.v_nxv1f64_nxv1i64(<vscale x 1 x double> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv1f64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -488,16 +490,16 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x i64> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vfcvt.f.x.v.nxv2f64.nxv2i64(
   <vscale x 2 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfcvt_f.x.v_nxv2f64_nxv2i64(<vscale x 2 x i64> %0, i32 %1) nounwind {
+define <vscale x 2 x double> @intrinsic_vfcvt_f.x.v_nxv2f64_nxv2i64(<vscale x 2 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv2f64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -506,7 +508,7 @@ define <vscale x 2 x double> @intrinsic_vfcvt_f.x.v_nxv2f64_nxv2i64(<vscale x 2
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfcvt.f.x.v.nxv2f64.nxv2i64(
     <vscale x 2 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x double> %a
 }
@@ -515,10 +517,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfcvt.f.x.v.mask.nxv2f64.nxv2i64(
   <vscale x 2 x double>,
   <vscale x 2 x i64>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfcvt_mask_f.x.v_nxv2f64_nxv2i64(<vscale x 2 x double> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vfcvt_mask_f.x.v_nxv2f64_nxv2i64(<vscale x 2 x double> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv2f64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -529,16 +531,16 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x i64> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vfcvt.f.x.v.nxv4f64.nxv4i64(
   <vscale x 4 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfcvt_f.x.v_nxv4f64_nxv4i64(<vscale x 4 x i64> %0, i32 %1) nounwind {
+define <vscale x 4 x double> @intrinsic_vfcvt_f.x.v_nxv4f64_nxv4i64(<vscale x 4 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv4f64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -547,7 +549,7 @@ define <vscale x 4 x double> @intrinsic_vfcvt_f.x.v_nxv4f64_nxv4i64(<vscale x 4
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfcvt.f.x.v.nxv4f64.nxv4i64(
     <vscale x 4 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x double> %a
 }
@@ -556,10 +558,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfcvt.f.x.v.mask.nxv4f64.nxv4i64(
   <vscale x 4 x double>,
   <vscale x 4 x i64>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfcvt_mask_f.x.v_nxv4f64_nxv4i64(<vscale x 4 x double> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vfcvt_mask_f.x.v_nxv4f64_nxv4i64(<vscale x 4 x double> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv4f64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -570,16 +572,16 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x i64> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vfcvt.f.x.v.nxv8f64.nxv8i64(
   <vscale x 8 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfcvt_f.x.v_nxv8f64_nxv8i64(<vscale x 8 x i64> %0, i32 %1) nounwind {
+define <vscale x 8 x double> @intrinsic_vfcvt_f.x.v_nxv8f64_nxv8i64(<vscale x 8 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.x.v_nxv8f64_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -588,7 +590,7 @@ define <vscale x 8 x double> @intrinsic_vfcvt_f.x.v_nxv8f64_nxv8i64(<vscale x 8
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfcvt.f.x.v.nxv8f64.nxv8i64(
     <vscale x 8 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x double> %a
 }
@@ -597,10 +599,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfcvt.f.x.v.mask.nxv8f64.nxv8i64(
   <vscale x 8 x double>,
   <vscale x 8 x i64>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfcvt_mask_f.x.v_nxv8f64_nxv8i64(<vscale x 8 x double> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vfcvt_mask_f.x.v_nxv8f64_nxv8i64(<vscale x 8 x double> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.x.v_nxv8f64_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -611,7 +613,7 @@ entry:
     <vscale x 8 x double> %0,
     <vscale x 8 x i64> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll
deleted file mode 100644
index 6fc87d15dac18..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv64.ll
+++ /dev/null
@@ -1,617 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfcvt.f.xu.v.nxv1f16.nxv1i16(
-  <vscale x 1 x i16>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfcvt_f.xu.v_nxv1f16_nxv1i16(<vscale x 1 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv1f16_nxv1i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfcvt.f.xu.v.nxv1f16.nxv1i16(
-    <vscale x 1 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f16.nxv1i16(
-  <vscale x 1 x half>,
-  <vscale x 1 x i16>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv1f16_nxv1i16(<vscale x 1 x half> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv1f16_nxv1i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f16.nxv1i16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x i16> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfcvt.f.xu.v.nxv2f16.nxv2i16(
-  <vscale x 2 x i16>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfcvt_f.xu.v_nxv2f16_nxv2i16(<vscale x 2 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv2f16_nxv2i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfcvt.f.xu.v.nxv2f16.nxv2i16(
-    <vscale x 2 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f16.nxv2i16(
-  <vscale x 2 x half>,
-  <vscale x 2 x i16>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv2f16_nxv2i16(<vscale x 2 x half> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv2f16_nxv2i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f16.nxv2i16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x i16> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfcvt.f.xu.v.nxv4f16.nxv4i16(
-  <vscale x 4 x i16>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfcvt_f.xu.v_nxv4f16_nxv4i16(<vscale x 4 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv4f16_nxv4i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfcvt.f.xu.v.nxv4f16.nxv4i16(
-    <vscale x 4 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f16.nxv4i16(
-  <vscale x 4 x half>,
-  <vscale x 4 x i16>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv4f16_nxv4i16(<vscale x 4 x half> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv4f16_nxv4i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f16.nxv4i16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x i16> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfcvt.f.xu.v.nxv8f16.nxv8i16(
-  <vscale x 8 x i16>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfcvt_f.xu.v_nxv8f16_nxv8i16(<vscale x 8 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv8f16_nxv8i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfcvt.f.xu.v.nxv8f16.nxv8i16(
-    <vscale x 8 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f16.nxv8i16(
-  <vscale x 8 x half>,
-  <vscale x 8 x i16>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv8f16_nxv8i16(<vscale x 8 x half> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv8f16_nxv8i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f16.nxv8i16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x i16> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfcvt.f.xu.v.nxv16f16.nxv16i16(
-  <vscale x 16 x i16>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfcvt_f.xu.v_nxv16f16_nxv16i16(<vscale x 16 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv16f16_nxv16i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfcvt.f.xu.v.nxv16f16.nxv16i16(
-    <vscale x 16 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv16f16.nxv16i16(
-  <vscale x 16 x half>,
-  <vscale x 16 x i16>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv16f16_nxv16i16(<vscale x 16 x half> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv16f16_nxv16i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv16f16.nxv16i16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x i16> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfcvt.f.xu.v.nxv32f16.nxv32i16(
-  <vscale x 32 x i16>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfcvt_f.xu.v_nxv32f16_nxv32i16(<vscale x 32 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv32f16_nxv32i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfcvt.f.xu.v.nxv32f16.nxv32i16(
-    <vscale x 32 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv32f16.nxv32i16(
-  <vscale x 32 x half>,
-  <vscale x 32 x i16>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv32f16_nxv32i16(<vscale x 32 x half> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv32f16_nxv32i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv32f16.nxv32i16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x i16> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfcvt.f.xu.v.nxv1f32.nxv1i32(
-  <vscale x 1 x i32>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfcvt_f.xu.v_nxv1f32_nxv1i32(<vscale x 1 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv1f32_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfcvt.f.xu.v.nxv1f32.nxv1i32(
-    <vscale x 1 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f32.nxv1i32(
-  <vscale x 1 x float>,
-  <vscale x 1 x i32>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv1f32_nxv1i32(<vscale x 1 x float> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv1f32_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f32.nxv1i32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x i32> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfcvt.f.xu.v.nxv2f32.nxv2i32(
-  <vscale x 2 x i32>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfcvt_f.xu.v_nxv2f32_nxv2i32(<vscale x 2 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv2f32_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfcvt.f.xu.v.nxv2f32.nxv2i32(
-    <vscale x 2 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f32.nxv2i32(
-  <vscale x 2 x float>,
-  <vscale x 2 x i32>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv2f32_nxv2i32(<vscale x 2 x float> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv2f32_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f32.nxv2i32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x i32> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfcvt.f.xu.v.nxv4f32.nxv4i32(
-  <vscale x 4 x i32>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfcvt_f.xu.v_nxv4f32_nxv4i32(<vscale x 4 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv4f32_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfcvt.f.xu.v.nxv4f32.nxv4i32(
-    <vscale x 4 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f32.nxv4i32(
-  <vscale x 4 x float>,
-  <vscale x 4 x i32>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv4f32_nxv4i32(<vscale x 4 x float> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv4f32_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f32.nxv4i32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x i32> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfcvt.f.xu.v.nxv8f32.nxv8i32(
-  <vscale x 8 x i32>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfcvt_f.xu.v_nxv8f32_nxv8i32(<vscale x 8 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv8f32_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfcvt.f.xu.v.nxv8f32.nxv8i32(
-    <vscale x 8 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f32.nxv8i32(
-  <vscale x 8 x float>,
-  <vscale x 8 x i32>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv8f32_nxv8i32(<vscale x 8 x float> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv8f32_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f32.nxv8i32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x i32> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfcvt.f.xu.v.nxv16f32.nxv16i32(
-  <vscale x 16 x i32>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfcvt_f.xu.v_nxv16f32_nxv16i32(<vscale x 16 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv16f32_nxv16i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfcvt.f.xu.v.nxv16f32.nxv16i32(
-    <vscale x 16 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv16f32.nxv16i32(
-  <vscale x 16 x float>,
-  <vscale x 16 x i32>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv16f32_nxv16i32(<vscale x 16 x float> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv16f32_nxv16i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv16f32.nxv16i32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x i32> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfcvt.f.xu.v.nxv1f64.nxv1i64(
-  <vscale x 1 x i64>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfcvt_f.xu.v_nxv1f64_nxv1i64(<vscale x 1 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv1f64_nxv1i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfcvt.f.xu.v.nxv1f64.nxv1i64(
-    <vscale x 1 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f64.nxv1i64(
-  <vscale x 1 x double>,
-  <vscale x 1 x i64>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfcvt_mask_f.xu.v_nxv1f64_nxv1i64(<vscale x 1 x double> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv1f64_nxv1i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f64.nxv1i64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x i64> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfcvt.f.xu.v.nxv2f64.nxv2i64(
-  <vscale x 2 x i64>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfcvt_f.xu.v_nxv2f64_nxv2i64(<vscale x 2 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv2f64_nxv2i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfcvt.f.xu.v.nxv2f64.nxv2i64(
-    <vscale x 2 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f64.nxv2i64(
-  <vscale x 2 x double>,
-  <vscale x 2 x i64>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfcvt_mask_f.xu.v_nxv2f64_nxv2i64(<vscale x 2 x double> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv2f64_nxv2i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f64.nxv2i64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x i64> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfcvt.f.xu.v.nxv4f64.nxv4i64(
-  <vscale x 4 x i64>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfcvt_f.xu.v_nxv4f64_nxv4i64(<vscale x 4 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv4f64_nxv4i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfcvt.f.xu.v.nxv4f64.nxv4i64(
-    <vscale x 4 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f64.nxv4i64(
-  <vscale x 4 x double>,
-  <vscale x 4 x i64>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfcvt_mask_f.xu.v_nxv4f64_nxv4i64(<vscale x 4 x double> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv4f64_nxv4i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f64.nxv4i64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x i64> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfcvt.f.xu.v.nxv8f64.nxv8i64(
-  <vscale x 8 x i64>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfcvt_f.xu.v_nxv8f64_nxv8i64(<vscale x 8 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv8f64_nxv8i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfcvt.f.xu.v.nxv8f64.nxv8i64(
-    <vscale x 8 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f64.nxv8i64(
-  <vscale x 8 x double>,
-  <vscale x 8 x i64>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfcvt_mask_f.xu.v_nxv8f64_nxv8i64(<vscale x 8 x double> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv8f64_nxv8i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfcvt.f.xu.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f64.nxv8i64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x i64> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu.ll
index 1c8c2a80c90db..82ec8ca7cc74d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-f-xu.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfcvt.f.xu.v.nxv1f16.nxv1i16(
   <vscale x 1 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfcvt_f.xu.v_nxv1f16_nxv1i16(<vscale x 1 x i16> %0, i32 %1) nounwind {
+define <vscale x 1 x half> @intrinsic_vfcvt_f.xu.v_nxv1f16_nxv1i16(<vscale x 1 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv1f16_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -14,7 +16,7 @@ define <vscale x 1 x half> @intrinsic_vfcvt_f.xu.v_nxv1f16_nxv1i16(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfcvt.f.xu.v.nxv1f16.nxv1i16(
     <vscale x 1 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x half> %a
 }
@@ -23,10 +25,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f16.nxv1i16(
   <vscale x 1 x half>,
   <vscale x 1 x i16>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv1f16_nxv1i16(<vscale x 1 x half> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv1f16_nxv1i16(<vscale x 1 x half> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv1f16_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -37,16 +39,16 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x i16> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vfcvt.f.xu.v.nxv2f16.nxv2i16(
   <vscale x 2 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfcvt_f.xu.v_nxv2f16_nxv2i16(<vscale x 2 x i16> %0, i32 %1) nounwind {
+define <vscale x 2 x half> @intrinsic_vfcvt_f.xu.v_nxv2f16_nxv2i16(<vscale x 2 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv2f16_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -55,7 +57,7 @@ define <vscale x 2 x half> @intrinsic_vfcvt_f.xu.v_nxv2f16_nxv2i16(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfcvt.f.xu.v.nxv2f16.nxv2i16(
     <vscale x 2 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x half> %a
 }
@@ -64,10 +66,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f16.nxv2i16(
   <vscale x 2 x half>,
   <vscale x 2 x i16>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv2f16_nxv2i16(<vscale x 2 x half> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv2f16_nxv2i16(<vscale x 2 x half> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv2f16_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -78,16 +80,16 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x i16> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vfcvt.f.xu.v.nxv4f16.nxv4i16(
   <vscale x 4 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfcvt_f.xu.v_nxv4f16_nxv4i16(<vscale x 4 x i16> %0, i32 %1) nounwind {
+define <vscale x 4 x half> @intrinsic_vfcvt_f.xu.v_nxv4f16_nxv4i16(<vscale x 4 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv4f16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -96,7 +98,7 @@ define <vscale x 4 x half> @intrinsic_vfcvt_f.xu.v_nxv4f16_nxv4i16(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfcvt.f.xu.v.nxv4f16.nxv4i16(
     <vscale x 4 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x half> %a
 }
@@ -105,10 +107,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f16.nxv4i16(
   <vscale x 4 x half>,
   <vscale x 4 x i16>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv4f16_nxv4i16(<vscale x 4 x half> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv4f16_nxv4i16(<vscale x 4 x half> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv4f16_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -119,16 +121,16 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x i16> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vfcvt.f.xu.v.nxv8f16.nxv8i16(
   <vscale x 8 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfcvt_f.xu.v_nxv8f16_nxv8i16(<vscale x 8 x i16> %0, i32 %1) nounwind {
+define <vscale x 8 x half> @intrinsic_vfcvt_f.xu.v_nxv8f16_nxv8i16(<vscale x 8 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv8f16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -137,7 +139,7 @@ define <vscale x 8 x half> @intrinsic_vfcvt_f.xu.v_nxv8f16_nxv8i16(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfcvt.f.xu.v.nxv8f16.nxv8i16(
     <vscale x 8 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x half> %a
 }
@@ -146,10 +148,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f16.nxv8i16(
   <vscale x 8 x half>,
   <vscale x 8 x i16>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv8f16_nxv8i16(<vscale x 8 x half> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv8f16_nxv8i16(<vscale x 8 x half> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv8f16_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -160,16 +162,16 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x i16> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vfcvt.f.xu.v.nxv16f16.nxv16i16(
   <vscale x 16 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfcvt_f.xu.v_nxv16f16_nxv16i16(<vscale x 16 x i16> %0, i32 %1) nounwind {
+define <vscale x 16 x half> @intrinsic_vfcvt_f.xu.v_nxv16f16_nxv16i16(<vscale x 16 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv16f16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -178,7 +180,7 @@ define <vscale x 16 x half> @intrinsic_vfcvt_f.xu.v_nxv16f16_nxv16i16(<vscale x
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfcvt.f.xu.v.nxv16f16.nxv16i16(
     <vscale x 16 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x half> %a
 }
@@ -187,10 +189,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv16f16.nxv16i16(
   <vscale x 16 x half>,
   <vscale x 16 x i16>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv16f16_nxv16i16(<vscale x 16 x half> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv16f16_nxv16i16(<vscale x 16 x half> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv16f16_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -201,16 +203,16 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x i16> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vfcvt.f.xu.v.nxv32f16.nxv32i16(
   <vscale x 32 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfcvt_f.xu.v_nxv32f16_nxv32i16(<vscale x 32 x i16> %0, i32 %1) nounwind {
+define <vscale x 32 x half> @intrinsic_vfcvt_f.xu.v_nxv32f16_nxv32i16(<vscale x 32 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv32f16_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -219,7 +221,7 @@ define <vscale x 32 x half> @intrinsic_vfcvt_f.xu.v_nxv32f16_nxv32i16(<vscale x
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfcvt.f.xu.v.nxv32f16.nxv32i16(
     <vscale x 32 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x half> %a
 }
@@ -228,10 +230,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfcvt.f.xu.v.mask.nxv32f16.nxv32i16(
   <vscale x 32 x half>,
   <vscale x 32 x i16>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv32f16_nxv32i16(<vscale x 32 x half> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x half> @intrinsic_vfcvt_mask_f.xu.v_nxv32f16_nxv32i16(<vscale x 32 x half> %0, <vscale x 32 x i16> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv32f16_nxv32i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -242,16 +244,16 @@ entry:
     <vscale x 32 x half> %0,
     <vscale x 32 x i16> %1,
     <vscale x 32 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vfcvt.f.xu.v.nxv1f32.nxv1i32(
   <vscale x 1 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfcvt_f.xu.v_nxv1f32_nxv1i32(<vscale x 1 x i32> %0, i32 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vfcvt_f.xu.v_nxv1f32_nxv1i32(<vscale x 1 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv1f32_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -260,7 +262,7 @@ define <vscale x 1 x float> @intrinsic_vfcvt_f.xu.v_nxv1f32_nxv1i32(<vscale x 1
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfcvt.f.xu.v.nxv1f32.nxv1i32(
     <vscale x 1 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x float> %a
 }
@@ -269,10 +271,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f32.nxv1i32(
   <vscale x 1 x float>,
   <vscale x 1 x i32>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv1f32_nxv1i32(<vscale x 1 x float> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv1f32_nxv1i32(<vscale x 1 x float> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv1f32_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -283,16 +285,16 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x i32> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vfcvt.f.xu.v.nxv2f32.nxv2i32(
   <vscale x 2 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfcvt_f.xu.v_nxv2f32_nxv2i32(<vscale x 2 x i32> %0, i32 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vfcvt_f.xu.v_nxv2f32_nxv2i32(<vscale x 2 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv2f32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -301,7 +303,7 @@ define <vscale x 2 x float> @intrinsic_vfcvt_f.xu.v_nxv2f32_nxv2i32(<vscale x 2
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfcvt.f.xu.v.nxv2f32.nxv2i32(
     <vscale x 2 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x float> %a
 }
@@ -310,10 +312,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f32.nxv2i32(
   <vscale x 2 x float>,
   <vscale x 2 x i32>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv2f32_nxv2i32(<vscale x 2 x float> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv2f32_nxv2i32(<vscale x 2 x float> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv2f32_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -324,16 +326,16 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x i32> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vfcvt.f.xu.v.nxv4f32.nxv4i32(
   <vscale x 4 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfcvt_f.xu.v_nxv4f32_nxv4i32(<vscale x 4 x i32> %0, i32 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vfcvt_f.xu.v_nxv4f32_nxv4i32(<vscale x 4 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv4f32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -342,7 +344,7 @@ define <vscale x 4 x float> @intrinsic_vfcvt_f.xu.v_nxv4f32_nxv4i32(<vscale x 4
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfcvt.f.xu.v.nxv4f32.nxv4i32(
     <vscale x 4 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x float> %a
 }
@@ -351,10 +353,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f32.nxv4i32(
   <vscale x 4 x float>,
   <vscale x 4 x i32>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv4f32_nxv4i32(<vscale x 4 x float> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv4f32_nxv4i32(<vscale x 4 x float> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv4f32_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -365,16 +367,16 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x i32> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vfcvt.f.xu.v.nxv8f32.nxv8i32(
   <vscale x 8 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfcvt_f.xu.v_nxv8f32_nxv8i32(<vscale x 8 x i32> %0, i32 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vfcvt_f.xu.v_nxv8f32_nxv8i32(<vscale x 8 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv8f32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -383,7 +385,7 @@ define <vscale x 8 x float> @intrinsic_vfcvt_f.xu.v_nxv8f32_nxv8i32(<vscale x 8
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfcvt.f.xu.v.nxv8f32.nxv8i32(
     <vscale x 8 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x float> %a
 }
@@ -392,10 +394,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f32.nxv8i32(
   <vscale x 8 x float>,
   <vscale x 8 x i32>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv8f32_nxv8i32(<vscale x 8 x float> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv8f32_nxv8i32(<vscale x 8 x float> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv8f32_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -406,16 +408,16 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x i32> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vfcvt.f.xu.v.nxv16f32.nxv16i32(
   <vscale x 16 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfcvt_f.xu.v_nxv16f32_nxv16i32(<vscale x 16 x i32> %0, i32 %1) nounwind {
+define <vscale x 16 x float> @intrinsic_vfcvt_f.xu.v_nxv16f32_nxv16i32(<vscale x 16 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv16f32_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -424,7 +426,7 @@ define <vscale x 16 x float> @intrinsic_vfcvt_f.xu.v_nxv16f32_nxv16i32(<vscale x
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfcvt.f.xu.v.nxv16f32.nxv16i32(
     <vscale x 16 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x float> %a
 }
@@ -433,10 +435,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfcvt.f.xu.v.mask.nxv16f32.nxv16i32(
   <vscale x 16 x float>,
   <vscale x 16 x i32>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv16f32_nxv16i32(<vscale x 16 x float> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vfcvt_mask_f.xu.v_nxv16f32_nxv16i32(<vscale x 16 x float> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv16f32_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -447,16 +449,16 @@ entry:
     <vscale x 16 x float> %0,
     <vscale x 16 x i32> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vfcvt.f.xu.v.nxv1f64.nxv1i64(
   <vscale x 1 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfcvt_f.xu.v_nxv1f64_nxv1i64(<vscale x 1 x i64> %0, i32 %1) nounwind {
+define <vscale x 1 x double> @intrinsic_vfcvt_f.xu.v_nxv1f64_nxv1i64(<vscale x 1 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv1f64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -465,7 +467,7 @@ define <vscale x 1 x double> @intrinsic_vfcvt_f.xu.v_nxv1f64_nxv1i64(<vscale x 1
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfcvt.f.xu.v.nxv1f64.nxv1i64(
     <vscale x 1 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x double> %a
 }
@@ -474,10 +476,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfcvt.f.xu.v.mask.nxv1f64.nxv1i64(
   <vscale x 1 x double>,
   <vscale x 1 x i64>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfcvt_mask_f.xu.v_nxv1f64_nxv1i64(<vscale x 1 x double> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfcvt_mask_f.xu.v_nxv1f64_nxv1i64(<vscale x 1 x double> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv1f64_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -488,16 +490,16 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x i64> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vfcvt.f.xu.v.nxv2f64.nxv2i64(
   <vscale x 2 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfcvt_f.xu.v_nxv2f64_nxv2i64(<vscale x 2 x i64> %0, i32 %1) nounwind {
+define <vscale x 2 x double> @intrinsic_vfcvt_f.xu.v_nxv2f64_nxv2i64(<vscale x 2 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv2f64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -506,7 +508,7 @@ define <vscale x 2 x double> @intrinsic_vfcvt_f.xu.v_nxv2f64_nxv2i64(<vscale x 2
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfcvt.f.xu.v.nxv2f64.nxv2i64(
     <vscale x 2 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x double> %a
 }
@@ -515,10 +517,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfcvt.f.xu.v.mask.nxv2f64.nxv2i64(
   <vscale x 2 x double>,
   <vscale x 2 x i64>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfcvt_mask_f.xu.v_nxv2f64_nxv2i64(<vscale x 2 x double> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vfcvt_mask_f.xu.v_nxv2f64_nxv2i64(<vscale x 2 x double> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv2f64_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -529,16 +531,16 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x i64> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vfcvt.f.xu.v.nxv4f64.nxv4i64(
   <vscale x 4 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfcvt_f.xu.v_nxv4f64_nxv4i64(<vscale x 4 x i64> %0, i32 %1) nounwind {
+define <vscale x 4 x double> @intrinsic_vfcvt_f.xu.v_nxv4f64_nxv4i64(<vscale x 4 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv4f64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -547,7 +549,7 @@ define <vscale x 4 x double> @intrinsic_vfcvt_f.xu.v_nxv4f64_nxv4i64(<vscale x 4
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfcvt.f.xu.v.nxv4f64.nxv4i64(
     <vscale x 4 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x double> %a
 }
@@ -556,10 +558,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfcvt.f.xu.v.mask.nxv4f64.nxv4i64(
   <vscale x 4 x double>,
   <vscale x 4 x i64>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfcvt_mask_f.xu.v_nxv4f64_nxv4i64(<vscale x 4 x double> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vfcvt_mask_f.xu.v_nxv4f64_nxv4i64(<vscale x 4 x double> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv4f64_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -570,16 +572,16 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x i64> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vfcvt.f.xu.v.nxv8f64.nxv8i64(
   <vscale x 8 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfcvt_f.xu.v_nxv8f64_nxv8i64(<vscale x 8 x i64> %0, i32 %1) nounwind {
+define <vscale x 8 x double> @intrinsic_vfcvt_f.xu.v_nxv8f64_nxv8i64(<vscale x 8 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_f.xu.v_nxv8f64_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -588,7 +590,7 @@ define <vscale x 8 x double> @intrinsic_vfcvt_f.xu.v_nxv8f64_nxv8i64(<vscale x 8
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfcvt.f.xu.v.nxv8f64.nxv8i64(
     <vscale x 8 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x double> %a
 }
@@ -597,10 +599,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfcvt.f.xu.v.mask.nxv8f64.nxv8i64(
   <vscale x 8 x double>,
   <vscale x 8 x i64>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfcvt_mask_f.xu.v_nxv8f64_nxv8i64(<vscale x 8 x double> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vfcvt_mask_f.xu.v_nxv8f64_nxv8i64(<vscale x 8 x double> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_f.xu.v_nxv8f64_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -611,7 +613,7 @@ entry:
     <vscale x 8 x double> %0,
     <vscale x 8 x i64> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll
deleted file mode 100644
index 75c0a7ff62a47..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv32.ll
+++ /dev/null
@@ -1,617 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i16.nxv1f16(
-  <vscale x 1 x half>,
-  i32);
-
-define <vscale x 1 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv1i16_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv1i16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i16.nxv1f16(
-    <vscale x 1 x half> %0,
-    i32 %1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i16.nxv1f16(
-  <vscale x 1 x i16>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i16_nxv1f16(<vscale x 1 x i16> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i16.nxv1f16(
-    <vscale x 1 x i16> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i16.nxv2f16(
-  <vscale x 2 x half>,
-  i32);
-
-define <vscale x 2 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv2i16_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv2i16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i16.nxv2f16(
-    <vscale x 2 x half> %0,
-    i32 %1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i16.nxv2f16(
-  <vscale x 2 x i16>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i16_nxv2f16(<vscale x 2 x i16> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i16.nxv2f16(
-    <vscale x 2 x i16> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i16.nxv4f16(
-  <vscale x 4 x half>,
-  i32);
-
-define <vscale x 4 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv4i16_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv4i16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i16.nxv4f16(
-    <vscale x 4 x half> %0,
-    i32 %1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i16.nxv4f16(
-  <vscale x 4 x i16>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i16_nxv4f16(<vscale x 4 x i16> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i16.nxv4f16(
-    <vscale x 4 x i16> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i16.nxv8f16(
-  <vscale x 8 x half>,
-  i32);
-
-define <vscale x 8 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv8i16_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv8i16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i16.nxv8f16(
-    <vscale x 8 x half> %0,
-    i32 %1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i16.nxv8f16(
-  <vscale x 8 x i16>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i16_nxv8f16(<vscale x 8 x i16> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i16.nxv8f16(
-    <vscale x 8 x i16> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i16.nxv16f16(
-  <vscale x 16 x half>,
-  i32);
-
-define <vscale x 16 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv16i16_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv16i16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i16.nxv16f16(
-    <vscale x 16 x half> %0,
-    i32 %1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv16i16.nxv16f16(
-  <vscale x 16 x i16>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i16_nxv16f16(<vscale x 16 x i16> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv16i16.nxv16f16(
-    <vscale x 16 x i16> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv32i16.nxv32f16(
-  <vscale x 32 x half>,
-  i32);
-
-define <vscale x 32 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv32i16_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv32i16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv32i16.nxv32f16(
-    <vscale x 32 x half> %0,
-    i32 %1)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv32i16.nxv32f16(
-  <vscale x 32 x i16>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i32,
-  i32);
-
-define <vscale x 32 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv32i16_nxv32f16(<vscale x 32 x i16> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv32i16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv32i16.nxv32f16(
-    <vscale x 32 x i16> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i32.nxv1f32(
-  <vscale x 1 x float>,
-  i32);
-
-define <vscale x 1 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv1i32_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv1i32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i32.nxv1f32(
-    <vscale x 1 x float> %0,
-    i32 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i32.nxv1f32(
-  <vscale x 1 x i32>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i32_nxv1f32(<vscale x 1 x i32> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i32.nxv1f32(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i32.nxv2f32(
-  <vscale x 2 x float>,
-  i32);
-
-define <vscale x 2 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv2i32_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv2i32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i32.nxv2f32(
-    <vscale x 2 x float> %0,
-    i32 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i32.nxv2f32(
-  <vscale x 2 x i32>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i32_nxv2f32(<vscale x 2 x i32> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i32.nxv2f32(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i32.nxv4f32(
-  <vscale x 4 x float>,
-  i32);
-
-define <vscale x 4 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv4i32_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv4i32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i32.nxv4f32(
-    <vscale x 4 x float> %0,
-    i32 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i32.nxv4f32(
-  <vscale x 4 x i32>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i32_nxv4f32(<vscale x 4 x i32> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i32.nxv4f32(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i32.nxv8f32(
-  <vscale x 8 x float>,
-  i32);
-
-define <vscale x 8 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv8i32_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv8i32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i32.nxv8f32(
-    <vscale x 8 x float> %0,
-    i32 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i32.nxv8f32(
-  <vscale x 8 x i32>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i32_nxv8f32(<vscale x 8 x i32> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i32.nxv8f32(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i32.nxv16f32(
-  <vscale x 16 x float>,
-  i32);
-
-define <vscale x 16 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv16i32_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv16i32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i32.nxv16f32(
-    <vscale x 16 x float> %0,
-    i32 %1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv16i32.nxv16f32(
-  <vscale x 16 x i32>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i32_nxv16f32(<vscale x 16 x i32> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv16i32.nxv16f32(
-    <vscale x 16 x i32> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i64.nxv1f64(
-  <vscale x 1 x double>,
-  i32);
-
-define <vscale x 1 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv1i64_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv1i64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i64.nxv1f64(
-    <vscale x 1 x double> %0,
-    i32 %1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i64.nxv1f64(
-  <vscale x 1 x i64>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x i64> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i64_nxv1f64(<vscale x 1 x i64> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i64.nxv1f64(
-    <vscale x 1 x i64> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i64.nxv2f64(
-  <vscale x 2 x double>,
-  i32);
-
-define <vscale x 2 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv2i64_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv2i64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i64.nxv2f64(
-    <vscale x 2 x double> %0,
-    i32 %1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i64.nxv2f64(
-  <vscale x 2 x i64>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x i64> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i64_nxv2f64(<vscale x 2 x i64> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i64.nxv2f64(
-    <vscale x 2 x i64> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i64.nxv4f64(
-  <vscale x 4 x double>,
-  i32);
-
-define <vscale x 4 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv4i64_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv4i64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i64.nxv4f64(
-    <vscale x 4 x double> %0,
-    i32 %1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i64.nxv4f64(
-  <vscale x 4 x i64>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x i64> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i64_nxv4f64(<vscale x 4 x i64> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i64.nxv4f64(
-    <vscale x 4 x i64> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i64.nxv8f64(
-  <vscale x 8 x double>,
-  i32);
-
-define <vscale x 8 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv8i64_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv8i64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i64.nxv8f64(
-    <vscale x 8 x double> %0,
-    i32 %1)
-
-  ret <vscale x 8 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i64.nxv8f64(
-  <vscale x 8 x i64>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x i64> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i64.nxv8f64(
-    <vscale x 8 x i64> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 8 x i64> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f.ll
index 9c3e2a03f1141..0dbc0f221e7e6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-x-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i16.nxv1f16(
   <vscale x 1 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv1i16_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv1i16_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv1i16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -14,7 +16,7 @@ define <vscale x 1 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv1i16_nxv1f16(<vscale x 1
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i16.nxv1f16(
     <vscale x 1 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i16> %a
 }
@@ -23,10 +25,10 @@ declare <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i16.nxv1f16(
   <vscale x 1 x i16>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i16_nxv1f16(<vscale x 1 x i16> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i16_nxv1f16(<vscale x 1 x i16> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -37,16 +39,16 @@ entry:
     <vscale x 1 x i16> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i16> %a
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i16.nxv2f16(
   <vscale x 2 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv2i16_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv2i16_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv2i16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -55,7 +57,7 @@ define <vscale x 2 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv2i16_nxv2f16(<vscale x 2
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i16.nxv2f16(
     <vscale x 2 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i16> %a
 }
@@ -64,10 +66,10 @@ declare <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i16.nxv2f16(
   <vscale x 2 x i16>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i16_nxv2f16(<vscale x 2 x i16> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i16_nxv2f16(<vscale x 2 x i16> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -78,16 +80,16 @@ entry:
     <vscale x 2 x i16> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i16> %a
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i16.nxv4f16(
   <vscale x 4 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv4i16_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv4i16_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv4i16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -96,7 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv4i16_nxv4f16(<vscale x 4
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i16.nxv4f16(
     <vscale x 4 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i16> %a
 }
@@ -105,10 +107,10 @@ declare <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i16.nxv4f16(
   <vscale x 4 x i16>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i16_nxv4f16(<vscale x 4 x i16> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i16_nxv4f16(<vscale x 4 x i16> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -119,16 +121,16 @@ entry:
     <vscale x 4 x i16> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i16> %a
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i16.nxv8f16(
   <vscale x 8 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv8i16_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv8i16_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv8i16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -137,7 +139,7 @@ define <vscale x 8 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv8i16_nxv8f16(<vscale x 8
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i16.nxv8f16(
     <vscale x 8 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i16> %a
 }
@@ -146,10 +148,10 @@ declare <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i16.nxv8f16(
   <vscale x 8 x i16>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i16_nxv8f16(<vscale x 8 x i16> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i16_nxv8f16(<vscale x 8 x i16> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -160,16 +162,16 @@ entry:
     <vscale x 8 x i16> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i16> %a
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i16.nxv16f16(
   <vscale x 16 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv16i16_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv16i16_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv16i16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -178,7 +180,7 @@ define <vscale x 16 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv16i16_nxv16f16(<vscale
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i16.nxv16f16(
     <vscale x 16 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i16> %a
 }
@@ -187,10 +189,10 @@ declare <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv16i16.nxv16f16(
   <vscale x 16 x i16>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i16_nxv16f16(<vscale x 16 x i16> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i16_nxv16f16(<vscale x 16 x i16> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -201,16 +203,16 @@ entry:
     <vscale x 16 x i16> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i16> %a
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv32i16.nxv32f16(
   <vscale x 32 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 32 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv32i16_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
+define <vscale x 32 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv32i16_nxv32f16(<vscale x 32 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv32i16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -219,7 +221,7 @@ define <vscale x 32 x i16> @intrinsic_vfcvt_rtz.x.f.v_nxv32i16_nxv32f16(<vscale
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.nxv32i16.nxv32f16(
     <vscale x 32 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 32 x i16> %a
 }
@@ -228,10 +230,10 @@ declare <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv32i16.nxv32f16(
   <vscale x 32 x i16>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv32i16_nxv32f16(<vscale x 32 x i16> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
+define <vscale x 32 x i16> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv32i16_nxv32f16(<vscale x 32 x i16> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv32i16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -242,16 +244,16 @@ entry:
     <vscale x 32 x i16> %0,
     <vscale x 32 x half> %1,
     <vscale x 32 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x i16> %a
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i32.nxv1f32(
   <vscale x 1 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv1i32_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv1i32_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv1i32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -260,7 +262,7 @@ define <vscale x 1 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv1i32_nxv1f32(<vscale x 1
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i32.nxv1f32(
     <vscale x 1 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -269,10 +271,10 @@ declare <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i32.nxv1f32(
   <vscale x 1 x i32>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i32_nxv1f32(<vscale x 1 x i32> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i32_nxv1f32(<vscale x 1 x i32> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -283,16 +285,16 @@ entry:
     <vscale x 1 x i32> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i32.nxv2f32(
   <vscale x 2 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv2i32_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv2i32_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv2i32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -301,7 +303,7 @@ define <vscale x 2 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv2i32_nxv2f32(<vscale x 2
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i32.nxv2f32(
     <vscale x 2 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -310,10 +312,10 @@ declare <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i32.nxv2f32(
   <vscale x 2 x i32>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i32_nxv2f32(<vscale x 2 x i32> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i32_nxv2f32(<vscale x 2 x i32> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -324,16 +326,16 @@ entry:
     <vscale x 2 x i32> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i32.nxv4f32(
   <vscale x 4 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv4i32_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv4i32_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv4i32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -342,7 +344,7 @@ define <vscale x 4 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv4i32_nxv4f32(<vscale x 4
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i32.nxv4f32(
     <vscale x 4 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -351,10 +353,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i32.nxv4f32(
   <vscale x 4 x i32>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i32_nxv4f32(<vscale x 4 x i32> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i32_nxv4f32(<vscale x 4 x i32> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -365,16 +367,16 @@ entry:
     <vscale x 4 x i32> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i32.nxv8f32(
   <vscale x 8 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv8i32_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv8i32_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv8i32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -383,7 +385,7 @@ define <vscale x 8 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv8i32_nxv8f32(<vscale x 8
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i32.nxv8f32(
     <vscale x 8 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -392,10 +394,10 @@ declare <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i32.nxv8f32(
   <vscale x 8 x i32>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i32_nxv8f32(<vscale x 8 x i32> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i32_nxv8f32(<vscale x 8 x i32> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -406,16 +408,16 @@ entry:
     <vscale x 8 x i32> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i32> %a
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i32.nxv16f32(
   <vscale x 16 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv16i32_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv16i32_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv16i32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -424,7 +426,7 @@ define <vscale x 16 x i32> @intrinsic_vfcvt_rtz.x.f.v_nxv16i32_nxv16f32(<vscale
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.nxv16i32.nxv16f32(
     <vscale x 16 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i32> %a
 }
@@ -433,10 +435,10 @@ declare <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv16i32.nxv16f32(
   <vscale x 16 x i32>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i32_nxv16f32(<vscale x 16 x i32> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i32_nxv16f32(<vscale x 16 x i32> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv16i32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -447,16 +449,16 @@ entry:
     <vscale x 16 x i32> %0,
     <vscale x 16 x float> %1,
     <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i32> %a
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i64.nxv1f64(
   <vscale x 1 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv1i64_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv1i64_nxv1f64(<vscale x 1 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv1i64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -465,7 +467,7 @@ define <vscale x 1 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv1i64_nxv1f64(<vscale x 1
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv1i64.nxv1f64(
     <vscale x 1 x double> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i64> %a
 }
@@ -474,10 +476,10 @@ declare <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv1i64.nxv1f64(
   <vscale x 1 x i64>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i64_nxv1f64(<vscale x 1 x i64> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i64_nxv1f64(<vscale x 1 x i64> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv1i64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -488,16 +490,16 @@ entry:
     <vscale x 1 x i64> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i64> %a
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i64.nxv2f64(
   <vscale x 2 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv2i64_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv2i64_nxv2f64(<vscale x 2 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv2i64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -506,7 +508,7 @@ define <vscale x 2 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv2i64_nxv2f64(<vscale x 2
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv2i64.nxv2f64(
     <vscale x 2 x double> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i64> %a
 }
@@ -515,10 +517,10 @@ declare <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv2i64.nxv2f64(
   <vscale x 2 x i64>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i64_nxv2f64(<vscale x 2 x i64> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i64_nxv2f64(<vscale x 2 x i64> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv2i64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -529,16 +531,16 @@ entry:
     <vscale x 2 x i64> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i64> %a
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i64.nxv4f64(
   <vscale x 4 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv4i64_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv4i64_nxv4f64(<vscale x 4 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv4i64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -547,7 +549,7 @@ define <vscale x 4 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv4i64_nxv4f64(<vscale x 4
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv4i64.nxv4f64(
     <vscale x 4 x double> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i64> %a
 }
@@ -556,10 +558,10 @@ declare <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv4i64.nxv4f64(
   <vscale x 4 x i64>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i64_nxv4f64(<vscale x 4 x i64> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i64_nxv4f64(<vscale x 4 x i64> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv4i64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -570,16 +572,16 @@ entry:
     <vscale x 4 x i64> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i64> %a
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i64.nxv8f64(
   <vscale x 8 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv8i64_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv8i64_nxv8f64(<vscale x 8 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.x.f.v_nxv8i64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -588,7 +590,7 @@ define <vscale x 8 x i64> @intrinsic_vfcvt_rtz.x.f.v_nxv8i64_nxv8f64(<vscale x 8
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.nxv8i64.nxv8f64(
     <vscale x 8 x double> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i64> %a
 }
@@ -597,10 +599,10 @@ declare <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.x.f.v.mask.nxv8i64.nxv8f64(
   <vscale x 8 x i64>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.x.f.v_nxv8i64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -611,7 +613,7 @@ entry:
     <vscale x 8 x i64> %0,
     <vscale x 8 x double> %1,
     <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i64> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll
deleted file mode 100644
index 3a309eea35dbd..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv64.ll
+++ /dev/null
@@ -1,617 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i16.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv1i16_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv1i16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i16.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i16.nxv1f16(
-  <vscale x 1 x i16>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i16_nxv1f16(<vscale x 1 x i16> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i16.nxv1f16(
-    <vscale x 1 x i16> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i16.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv2i16_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv2i16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i16.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i16.nxv2f16(
-  <vscale x 2 x i16>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i16_nxv2f16(<vscale x 2 x i16> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i16.nxv2f16(
-    <vscale x 2 x i16> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i16.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv4i16_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv4i16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i16.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i16.nxv4f16(
-  <vscale x 4 x i16>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i16_nxv4f16(<vscale x 4 x i16> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i16.nxv4f16(
-    <vscale x 4 x i16> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i16.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv8i16_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv8i16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i16.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i16.nxv8f16(
-  <vscale x 8 x i16>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i16_nxv8f16(<vscale x 8 x i16> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i16.nxv8f16(
-    <vscale x 8 x i16> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i16.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv16i16_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv16i16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i16.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv16i16.nxv16f16(
-  <vscale x 16 x i16>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i16_nxv16f16(<vscale x 16 x i16> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv16i16.nxv16f16(
-    <vscale x 16 x i16> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv32i16.nxv32f16(
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv32i16_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv32i16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv32i16.nxv32f16(
-    <vscale x 32 x half> %0,
-    i64 %1)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv32i16.nxv32f16(
-  <vscale x 32 x i16>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv32i16_nxv32f16(<vscale x 32 x i16> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv32i16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv32i16.nxv32f16(
-    <vscale x 32 x i16> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i32.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv1i32_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv1i32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i32.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i32.nxv1f32(
-  <vscale x 1 x i32>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f32(<vscale x 1 x i32> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i32.nxv1f32(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i32.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv2i32_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv2i32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i32.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i32.nxv2f32(
-  <vscale x 2 x i32>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f32(<vscale x 2 x i32> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i32.nxv2f32(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i32.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv4i32_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv4i32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i32.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i32.nxv4f32(
-  <vscale x 4 x i32>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f32(<vscale x 4 x i32> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i32.nxv4f32(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i32.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv8i32_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv8i32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i32.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i32.nxv8f32(
-  <vscale x 8 x i32>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f32(<vscale x 8 x i32> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i32.nxv8f32(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i32.nxv16f32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv16i32_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv16i32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i32.nxv16f32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv16i32.nxv16f32(
-  <vscale x 16 x i32>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f32(<vscale x 16 x i32> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv16i32.nxv16f32(
-    <vscale x 16 x i32> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i64.nxv1f64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv1i64_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv1i64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i64.nxv1f64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i64.nxv1f64(
-  <vscale x 1 x i64>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f64(<vscale x 1 x i64> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i64.nxv1f64(
-    <vscale x 1 x i64> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i64.nxv2f64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv2i64_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv2i64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i64.nxv2f64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i64.nxv2f64(
-  <vscale x 2 x i64>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f64(<vscale x 2 x i64> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i64.nxv2f64(
-    <vscale x 2 x i64> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i64.nxv4f64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv4i64_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv4i64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i64.nxv4f64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i64.nxv4f64(
-  <vscale x 4 x i64>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f64(<vscale x 4 x i64> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i64.nxv4f64(
-    <vscale x 4 x i64> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i64.nxv8f64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv8i64_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv8i64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i64.nxv8f64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i64.nxv8f64(
-  <vscale x 8 x i64>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfcvt.rtz.xu.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i64.nxv8f64(
-    <vscale x 8 x i64> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i64> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f.ll
index 966a5d6f85a0f..457a93587ec2b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-rtz-xu-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i16.nxv1f16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv1i16_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv1i16_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv1i16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -14,7 +16,7 @@ define <vscale x 1 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv1i16_nxv1f16(<vscale x
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i16.nxv1f16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i16> %a
 }
@@ -23,10 +25,10 @@ declare <vscale x 1 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i16.nxv1f16(
   <vscale x 1 x i16>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i16_nxv1f16(<vscale x 1 x i16> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i16_nxv1f16(<vscale x 1 x i16> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -37,16 +39,16 @@ entry:
     <vscale x 1 x i16> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i16> %a
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i16.nxv2f16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv2i16_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv2i16_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv2i16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -55,7 +57,7 @@ define <vscale x 2 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv2i16_nxv2f16(<vscale x
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i16.nxv2f16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i16> %a
 }
@@ -64,10 +66,10 @@ declare <vscale x 2 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i16.nxv2f16(
   <vscale x 2 x i16>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i16_nxv2f16(<vscale x 2 x i16> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i16_nxv2f16(<vscale x 2 x i16> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -78,16 +80,16 @@ entry:
     <vscale x 2 x i16> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i16> %a
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i16.nxv4f16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv4i16_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv4i16_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv4i16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -96,7 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv4i16_nxv4f16(<vscale x
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i16.nxv4f16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i16> %a
 }
@@ -105,10 +107,10 @@ declare <vscale x 4 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i16.nxv4f16(
   <vscale x 4 x i16>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i16_nxv4f16(<vscale x 4 x i16> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i16_nxv4f16(<vscale x 4 x i16> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -119,16 +121,16 @@ entry:
     <vscale x 4 x i16> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i16> %a
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i16.nxv8f16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv8i16_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv8i16_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv8i16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -137,7 +139,7 @@ define <vscale x 8 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv8i16_nxv8f16(<vscale x
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i16.nxv8f16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i16> %a
 }
@@ -146,10 +148,10 @@ declare <vscale x 8 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i16.nxv8f16(
   <vscale x 8 x i16>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i16_nxv8f16(<vscale x 8 x i16> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i16_nxv8f16(<vscale x 8 x i16> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -160,16 +162,16 @@ entry:
     <vscale x 8 x i16> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i16> %a
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i16.nxv16f16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv16i16_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv16i16_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv16i16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -178,7 +180,7 @@ define <vscale x 16 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv16i16_nxv16f16(<vscale
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i16.nxv16f16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i16> %a
 }
@@ -187,10 +189,10 @@ declare <vscale x 16 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv16i16.nxv16f16(
   <vscale x 16 x i16>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i16_nxv16f16(<vscale x 16 x i16> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i16_nxv16f16(<vscale x 16 x i16> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -201,16 +203,16 @@ entry:
     <vscale x 16 x i16> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i16> %a
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv32i16.nxv32f16(
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv32i16_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
+define <vscale x 32 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv32i16_nxv32f16(<vscale x 32 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv32i16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -219,7 +221,7 @@ define <vscale x 32 x i16> @intrinsic_vfcvt_rtz.xu.f.v_nxv32i16_nxv32f16(<vscale
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv32i16.nxv32f16(
     <vscale x 32 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x i16> %a
 }
@@ -228,10 +230,10 @@ declare <vscale x 32 x i16> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv32i16.nxv32f16(
   <vscale x 32 x i16>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv32i16_nxv32f16(<vscale x 32 x i16> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x i16> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv32i16_nxv32f16(<vscale x 32 x i16> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv32i16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -242,16 +244,16 @@ entry:
     <vscale x 32 x i16> %0,
     <vscale x 32 x half> %1,
     <vscale x 32 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x i16> %a
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i32.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv1i32_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv1i32_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv1i32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -260,7 +262,7 @@ define <vscale x 1 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv1i32_nxv1f32(<vscale x
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i32.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -269,10 +271,10 @@ declare <vscale x 1 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i32.nxv1f32(
   <vscale x 1 x i32>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f32(<vscale x 1 x i32> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f32(<vscale x 1 x i32> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -283,16 +285,16 @@ entry:
     <vscale x 1 x i32> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i32.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv2i32_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv2i32_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv2i32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -301,7 +303,7 @@ define <vscale x 2 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv2i32_nxv2f32(<vscale x
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i32.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -310,10 +312,10 @@ declare <vscale x 2 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i32.nxv2f32(
   <vscale x 2 x i32>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f32(<vscale x 2 x i32> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f32(<vscale x 2 x i32> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -324,16 +326,16 @@ entry:
     <vscale x 2 x i32> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i32.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv4i32_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv4i32_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv4i32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -342,7 +344,7 @@ define <vscale x 4 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv4i32_nxv4f32(<vscale x
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i32.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -351,10 +353,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i32.nxv4f32(
   <vscale x 4 x i32>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f32(<vscale x 4 x i32> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f32(<vscale x 4 x i32> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -365,16 +367,16 @@ entry:
     <vscale x 4 x i32> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i32.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv8i32_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv8i32_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv8i32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -383,7 +385,7 @@ define <vscale x 8 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv8i32_nxv8f32(<vscale x
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i32.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -392,10 +394,10 @@ declare <vscale x 8 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i32.nxv8f32(
   <vscale x 8 x i32>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f32(<vscale x 8 x i32> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f32(<vscale x 8 x i32> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -406,16 +408,16 @@ entry:
     <vscale x 8 x i32> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i32> %a
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i32.nxv16f32(
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv16i32_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv16i32_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv16i32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -424,7 +426,7 @@ define <vscale x 16 x i32> @intrinsic_vfcvt_rtz.xu.f.v_nxv16i32_nxv16f32(<vscale
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv16i32.nxv16f32(
     <vscale x 16 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i32> %a
 }
@@ -433,10 +435,10 @@ declare <vscale x 16 x i32> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv16i32.nxv16f32(
   <vscale x 16 x i32>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f32(<vscale x 16 x i32> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f32(<vscale x 16 x i32> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -447,16 +449,16 @@ entry:
     <vscale x 16 x i32> %0,
     <vscale x 16 x float> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i32> %a
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i64.nxv1f64(
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv1i64_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv1i64_nxv1f64(<vscale x 1 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv1i64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -465,7 +467,7 @@ define <vscale x 1 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv1i64_nxv1f64(<vscale x
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv1i64.nxv1f64(
     <vscale x 1 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i64> %a
 }
@@ -474,10 +476,10 @@ declare <vscale x 1 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv1i64.nxv1f64(
   <vscale x 1 x i64>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f64(<vscale x 1 x i64> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f64(<vscale x 1 x i64> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -488,16 +490,16 @@ entry:
     <vscale x 1 x i64> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i64> %a
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i64.nxv2f64(
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv2i64_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv2i64_nxv2f64(<vscale x 2 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv2i64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -506,7 +508,7 @@ define <vscale x 2 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv2i64_nxv2f64(<vscale x
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv2i64.nxv2f64(
     <vscale x 2 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i64> %a
 }
@@ -515,10 +517,10 @@ declare <vscale x 2 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv2i64.nxv2f64(
   <vscale x 2 x i64>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f64(<vscale x 2 x i64> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f64(<vscale x 2 x i64> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -529,16 +531,16 @@ entry:
     <vscale x 2 x i64> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i64> %a
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i64.nxv4f64(
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv4i64_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv4i64_nxv4f64(<vscale x 4 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv4i64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -547,7 +549,7 @@ define <vscale x 4 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv4i64_nxv4f64(<vscale x
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv4i64.nxv4f64(
     <vscale x 4 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i64> %a
 }
@@ -556,10 +558,10 @@ declare <vscale x 4 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv4i64.nxv4f64(
   <vscale x 4 x i64>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f64(<vscale x 4 x i64> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f64(<vscale x 4 x i64> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -570,16 +572,16 @@ entry:
     <vscale x 4 x i64> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i64> %a
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i64.nxv8f64(
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv8i64_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv8i64_nxv8f64(<vscale x 8 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_rtz.xu.f.v_nxv8i64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -588,7 +590,7 @@ define <vscale x 8 x i64> @intrinsic_vfcvt_rtz.xu.f.v_nxv8i64_nxv8f64(<vscale x
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.nxv8i64.nxv8f64(
     <vscale x 8 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i64> %a
 }
@@ -597,10 +599,10 @@ declare <vscale x 8 x i64> @llvm.riscv.vfcvt.rtz.xu.f.v.mask.nxv8i64.nxv8f64(
   <vscale x 8 x i64>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -611,7 +613,7 @@ entry:
     <vscale x 8 x i64> %0,
     <vscale x 8 x double> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i64> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll
deleted file mode 100644
index f5984a512e0fe..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv64.ll
+++ /dev/null
@@ -1,617 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i16> @llvm.riscv.vfcvt.x.f.v.nxv1i16.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfcvt_x.f.v_nxv1i16_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv1i16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfcvt.x.f.v.nxv1i16.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv1i16.nxv1f16(
-  <vscale x 1 x i16>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv1i16_nxv1f16(<vscale x 1 x i16> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv1i16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv1i16.nxv1f16(
-    <vscale x 1 x i16> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfcvt.x.f.v.nxv2i16.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfcvt_x.f.v_nxv2i16_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv2i16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfcvt.x.f.v.nxv2i16.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv2i16.nxv2f16(
-  <vscale x 2 x i16>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv2i16_nxv2f16(<vscale x 2 x i16> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv2i16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv2i16.nxv2f16(
-    <vscale x 2 x i16> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfcvt.x.f.v.nxv4i16.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfcvt_x.f.v_nxv4i16_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv4i16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfcvt.x.f.v.nxv4i16.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv4i16.nxv4f16(
-  <vscale x 4 x i16>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv4i16_nxv4f16(<vscale x 4 x i16> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv4i16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv4i16.nxv4f16(
-    <vscale x 4 x i16> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfcvt.x.f.v.nxv8i16.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfcvt_x.f.v_nxv8i16_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv8i16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfcvt.x.f.v.nxv8i16.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv8i16.nxv8f16(
-  <vscale x 8 x i16>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv8i16_nxv8f16(<vscale x 8 x i16> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv8i16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv8i16.nxv8f16(
-    <vscale x 8 x i16> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfcvt.x.f.v.nxv16i16.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfcvt_x.f.v_nxv16i16_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv16i16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfcvt.x.f.v.nxv16i16.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv16i16.nxv16f16(
-  <vscale x 16 x i16>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv16i16_nxv16f16(<vscale x 16 x i16> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv16i16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv16i16.nxv16f16(
-    <vscale x 16 x i16> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.vfcvt.x.f.v.nxv32i16.nxv32f16(
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x i16> @intrinsic_vfcvt_x.f.v_nxv32i16_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv32i16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vfcvt.x.f.v.nxv32i16.nxv32f16(
-    <vscale x 32 x half> %0,
-    i64 %1)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv32i16.nxv32f16(
-  <vscale x 32 x i16>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv32i16_nxv32f16(<vscale x 32 x i16> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv32i16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv32i16.nxv32f16(
-    <vscale x 32 x i16> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfcvt.x.f.v.nxv1i32.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfcvt_x.f.v_nxv1i32_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv1i32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfcvt.x.f.v.nxv1i32.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv1i32.nxv1f32(
-  <vscale x 1 x i32>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv1i32_nxv1f32(<vscale x 1 x i32> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv1i32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv1i32.nxv1f32(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfcvt.x.f.v.nxv2i32.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfcvt_x.f.v_nxv2i32_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv2i32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfcvt.x.f.v.nxv2i32.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv2i32.nxv2f32(
-  <vscale x 2 x i32>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv2i32_nxv2f32(<vscale x 2 x i32> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv2i32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv2i32.nxv2f32(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfcvt.x.f.v.nxv4i32.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfcvt_x.f.v_nxv4i32_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv4i32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfcvt.x.f.v.nxv4i32.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv4i32.nxv4f32(
-  <vscale x 4 x i32>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv4i32_nxv4f32(<vscale x 4 x i32> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv4i32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv4i32.nxv4f32(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfcvt.x.f.v.nxv8i32.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfcvt_x.f.v_nxv8i32_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv8i32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfcvt.x.f.v.nxv8i32.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv8i32.nxv8f32(
-  <vscale x 8 x i32>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv8i32_nxv8f32(<vscale x 8 x i32> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv8i32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv8i32.nxv8f32(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfcvt.x.f.v.nxv16i32.nxv16f32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfcvt_x.f.v_nxv16i32_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv16i32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfcvt.x.f.v.nxv16i32.nxv16f32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv16i32.nxv16f32(
-  <vscale x 16 x i32>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv16i32_nxv16f32(<vscale x 16 x i32> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv16i32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv16i32.nxv16f32(
-    <vscale x 16 x i32> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfcvt.x.f.v.nxv1i64.nxv1f64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfcvt_x.f.v_nxv1i64_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv1i64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfcvt.x.f.v.nxv1i64.nxv1f64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfcvt.x.f.v.mask.nxv1i64.nxv1f64(
-  <vscale x 1 x i64>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfcvt_mask_x.f.v_nxv1i64_nxv1f64(<vscale x 1 x i64> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv1i64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfcvt.x.f.v.mask.nxv1i64.nxv1f64(
-    <vscale x 1 x i64> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfcvt.x.f.v.nxv2i64.nxv2f64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfcvt_x.f.v_nxv2i64_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv2i64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfcvt.x.f.v.nxv2i64.nxv2f64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfcvt.x.f.v.mask.nxv2i64.nxv2f64(
-  <vscale x 2 x i64>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfcvt_mask_x.f.v_nxv2i64_nxv2f64(<vscale x 2 x i64> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv2i64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfcvt.x.f.v.mask.nxv2i64.nxv2f64(
-    <vscale x 2 x i64> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfcvt.x.f.v.nxv4i64.nxv4f64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfcvt_x.f.v_nxv4i64_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv4i64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfcvt.x.f.v.nxv4i64.nxv4f64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfcvt.x.f.v.mask.nxv4i64.nxv4f64(
-  <vscale x 4 x i64>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfcvt_mask_x.f.v_nxv4i64_nxv4f64(<vscale x 4 x i64> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv4i64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfcvt.x.f.v.mask.nxv4i64.nxv4f64(
-    <vscale x 4 x i64> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfcvt.x.f.v.nxv8i64.nxv8f64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfcvt_x.f.v_nxv8i64_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv8i64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.x.f.v.nxv8i64.nxv8f64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfcvt.x.f.v.mask.nxv8i64.nxv8f64(
-  <vscale x 8 x i64>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfcvt_mask_x.f.v_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv8i64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.x.f.v.mask.nxv8i64.nxv8f64(
-    <vscale x 8 x i64> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i64> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f.ll
index 26632717dfa9f..88205bb75ce3d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-x-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfcvt.x.f.v.nxv1i16.nxv1f16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfcvt_x.f.v_nxv1i16_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfcvt_x.f.v_nxv1i16_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv1i16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -14,7 +16,7 @@ define <vscale x 1 x i16> @intrinsic_vfcvt_x.f.v_nxv1i16_nxv1f16(<vscale x 1 x h
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfcvt.x.f.v.nxv1i16.nxv1f16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i16> %a
 }
@@ -23,10 +25,10 @@ declare <vscale x 1 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv1i16.nxv1f16(
   <vscale x 1 x i16>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv1i16_nxv1f16(<vscale x 1 x i16> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv1i16_nxv1f16(<vscale x 1 x i16> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv1i16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -37,16 +39,16 @@ entry:
     <vscale x 1 x i16> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i16> %a
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vfcvt.x.f.v.nxv2i16.nxv2f16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfcvt_x.f.v_nxv2i16_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfcvt_x.f.v_nxv2i16_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv2i16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -55,7 +57,7 @@ define <vscale x 2 x i16> @intrinsic_vfcvt_x.f.v_nxv2i16_nxv2f16(<vscale x 2 x h
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfcvt.x.f.v.nxv2i16.nxv2f16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i16> %a
 }
@@ -64,10 +66,10 @@ declare <vscale x 2 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv2i16.nxv2f16(
   <vscale x 2 x i16>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv2i16_nxv2f16(<vscale x 2 x i16> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv2i16_nxv2f16(<vscale x 2 x i16> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv2i16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -78,16 +80,16 @@ entry:
     <vscale x 2 x i16> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i16> %a
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vfcvt.x.f.v.nxv4i16.nxv4f16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfcvt_x.f.v_nxv4i16_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfcvt_x.f.v_nxv4i16_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv4i16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -96,7 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vfcvt_x.f.v_nxv4i16_nxv4f16(<vscale x 4 x h
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfcvt.x.f.v.nxv4i16.nxv4f16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i16> %a
 }
@@ -105,10 +107,10 @@ declare <vscale x 4 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv4i16.nxv4f16(
   <vscale x 4 x i16>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv4i16_nxv4f16(<vscale x 4 x i16> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv4i16_nxv4f16(<vscale x 4 x i16> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv4i16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -119,16 +121,16 @@ entry:
     <vscale x 4 x i16> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i16> %a
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vfcvt.x.f.v.nxv8i16.nxv8f16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfcvt_x.f.v_nxv8i16_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfcvt_x.f.v_nxv8i16_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv8i16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -137,7 +139,7 @@ define <vscale x 8 x i16> @intrinsic_vfcvt_x.f.v_nxv8i16_nxv8f16(<vscale x 8 x h
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfcvt.x.f.v.nxv8i16.nxv8f16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i16> %a
 }
@@ -146,10 +148,10 @@ declare <vscale x 8 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv8i16.nxv8f16(
   <vscale x 8 x i16>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv8i16_nxv8f16(<vscale x 8 x i16> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv8i16_nxv8f16(<vscale x 8 x i16> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv8i16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -160,16 +162,16 @@ entry:
     <vscale x 8 x i16> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i16> %a
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vfcvt.x.f.v.nxv16i16.nxv16f16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfcvt_x.f.v_nxv16i16_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfcvt_x.f.v_nxv16i16_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv16i16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -178,7 +180,7 @@ define <vscale x 16 x i16> @intrinsic_vfcvt_x.f.v_nxv16i16_nxv16f16(<vscale x 16
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfcvt.x.f.v.nxv16i16.nxv16f16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i16> %a
 }
@@ -187,10 +189,10 @@ declare <vscale x 16 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv16i16.nxv16f16(
   <vscale x 16 x i16>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv16i16_nxv16f16(<vscale x 16 x i16> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv16i16_nxv16f16(<vscale x 16 x i16> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv16i16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -201,16 +203,16 @@ entry:
     <vscale x 16 x i16> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i16> %a
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vfcvt.x.f.v.nxv32i16.nxv32f16(
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i16> @intrinsic_vfcvt_x.f.v_nxv32i16_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
+define <vscale x 32 x i16> @intrinsic_vfcvt_x.f.v_nxv32i16_nxv32f16(<vscale x 32 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv32i16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -219,7 +221,7 @@ define <vscale x 32 x i16> @intrinsic_vfcvt_x.f.v_nxv32i16_nxv32f16(<vscale x 32
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vfcvt.x.f.v.nxv32i16.nxv32f16(
     <vscale x 32 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x i16> %a
 }
@@ -228,10 +230,10 @@ declare <vscale x 32 x i16> @llvm.riscv.vfcvt.x.f.v.mask.nxv32i16.nxv32f16(
   <vscale x 32 x i16>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv32i16_nxv32f16(<vscale x 32 x i16> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x i16> @intrinsic_vfcvt_mask_x.f.v_nxv32i16_nxv32f16(<vscale x 32 x i16> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv32i16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -242,16 +244,16 @@ entry:
     <vscale x 32 x i16> %0,
     <vscale x 32 x half> %1,
     <vscale x 32 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x i16> %a
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vfcvt.x.f.v.nxv1i32.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfcvt_x.f.v_nxv1i32_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfcvt_x.f.v_nxv1i32_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv1i32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -260,7 +262,7 @@ define <vscale x 1 x i32> @intrinsic_vfcvt_x.f.v_nxv1i32_nxv1f32(<vscale x 1 x f
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfcvt.x.f.v.nxv1i32.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -269,10 +271,10 @@ declare <vscale x 1 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv1i32.nxv1f32(
   <vscale x 1 x i32>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv1i32_nxv1f32(<vscale x 1 x i32> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv1i32_nxv1f32(<vscale x 1 x i32> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv1i32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -283,16 +285,16 @@ entry:
     <vscale x 1 x i32> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vfcvt.x.f.v.nxv2i32.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfcvt_x.f.v_nxv2i32_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfcvt_x.f.v_nxv2i32_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv2i32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -301,7 +303,7 @@ define <vscale x 2 x i32> @intrinsic_vfcvt_x.f.v_nxv2i32_nxv2f32(<vscale x 2 x f
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfcvt.x.f.v.nxv2i32.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -310,10 +312,10 @@ declare <vscale x 2 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv2i32.nxv2f32(
   <vscale x 2 x i32>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv2i32_nxv2f32(<vscale x 2 x i32> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv2i32_nxv2f32(<vscale x 2 x i32> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv2i32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -324,16 +326,16 @@ entry:
     <vscale x 2 x i32> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vfcvt.x.f.v.nxv4i32.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfcvt_x.f.v_nxv4i32_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfcvt_x.f.v_nxv4i32_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv4i32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -342,7 +344,7 @@ define <vscale x 4 x i32> @intrinsic_vfcvt_x.f.v_nxv4i32_nxv4f32(<vscale x 4 x f
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfcvt.x.f.v.nxv4i32.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -351,10 +353,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv4i32.nxv4f32(
   <vscale x 4 x i32>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv4i32_nxv4f32(<vscale x 4 x i32> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv4i32_nxv4f32(<vscale x 4 x i32> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv4i32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -365,16 +367,16 @@ entry:
     <vscale x 4 x i32> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vfcvt.x.f.v.nxv8i32.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfcvt_x.f.v_nxv8i32_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfcvt_x.f.v_nxv8i32_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv8i32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -383,7 +385,7 @@ define <vscale x 8 x i32> @intrinsic_vfcvt_x.f.v_nxv8i32_nxv8f32(<vscale x 8 x f
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfcvt.x.f.v.nxv8i32.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -392,10 +394,10 @@ declare <vscale x 8 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv8i32.nxv8f32(
   <vscale x 8 x i32>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv8i32_nxv8f32(<vscale x 8 x i32> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv8i32_nxv8f32(<vscale x 8 x i32> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv8i32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -406,16 +408,16 @@ entry:
     <vscale x 8 x i32> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i32> %a
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vfcvt.x.f.v.nxv16i32.nxv16f32(
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfcvt_x.f.v_nxv16i32_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfcvt_x.f.v_nxv16i32_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv16i32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -424,7 +426,7 @@ define <vscale x 16 x i32> @intrinsic_vfcvt_x.f.v_nxv16i32_nxv16f32(<vscale x 16
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vfcvt.x.f.v.nxv16i32.nxv16f32(
     <vscale x 16 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i32> %a
 }
@@ -433,10 +435,10 @@ declare <vscale x 16 x i32> @llvm.riscv.vfcvt.x.f.v.mask.nxv16i32.nxv16f32(
   <vscale x 16 x i32>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv16i32_nxv16f32(<vscale x 16 x i32> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfcvt_mask_x.f.v_nxv16i32_nxv16f32(<vscale x 16 x i32> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv16i32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -447,16 +449,16 @@ entry:
     <vscale x 16 x i32> %0,
     <vscale x 16 x float> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i32> %a
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vfcvt.x.f.v.nxv1i64.nxv1f64(
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfcvt_x.f.v_nxv1i64_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfcvt_x.f.v_nxv1i64_nxv1f64(<vscale x 1 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv1i64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -465,7 +467,7 @@ define <vscale x 1 x i64> @intrinsic_vfcvt_x.f.v_nxv1i64_nxv1f64(<vscale x 1 x d
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vfcvt.x.f.v.nxv1i64.nxv1f64(
     <vscale x 1 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i64> %a
 }
@@ -474,10 +476,10 @@ declare <vscale x 1 x i64> @llvm.riscv.vfcvt.x.f.v.mask.nxv1i64.nxv1f64(
   <vscale x 1 x i64>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfcvt_mask_x.f.v_nxv1i64_nxv1f64(<vscale x 1 x i64> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfcvt_mask_x.f.v_nxv1i64_nxv1f64(<vscale x 1 x i64> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv1i64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -488,16 +490,16 @@ entry:
     <vscale x 1 x i64> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i64> %a
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vfcvt.x.f.v.nxv2i64.nxv2f64(
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfcvt_x.f.v_nxv2i64_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfcvt_x.f.v_nxv2i64_nxv2f64(<vscale x 2 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv2i64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -506,7 +508,7 @@ define <vscale x 2 x i64> @intrinsic_vfcvt_x.f.v_nxv2i64_nxv2f64(<vscale x 2 x d
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vfcvt.x.f.v.nxv2i64.nxv2f64(
     <vscale x 2 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i64> %a
 }
@@ -515,10 +517,10 @@ declare <vscale x 2 x i64> @llvm.riscv.vfcvt.x.f.v.mask.nxv2i64.nxv2f64(
   <vscale x 2 x i64>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfcvt_mask_x.f.v_nxv2i64_nxv2f64(<vscale x 2 x i64> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfcvt_mask_x.f.v_nxv2i64_nxv2f64(<vscale x 2 x i64> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv2i64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -529,16 +531,16 @@ entry:
     <vscale x 2 x i64> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i64> %a
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vfcvt.x.f.v.nxv4i64.nxv4f64(
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfcvt_x.f.v_nxv4i64_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfcvt_x.f.v_nxv4i64_nxv4f64(<vscale x 4 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv4i64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -547,7 +549,7 @@ define <vscale x 4 x i64> @intrinsic_vfcvt_x.f.v_nxv4i64_nxv4f64(<vscale x 4 x d
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vfcvt.x.f.v.nxv4i64.nxv4f64(
     <vscale x 4 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i64> %a
 }
@@ -556,10 +558,10 @@ declare <vscale x 4 x i64> @llvm.riscv.vfcvt.x.f.v.mask.nxv4i64.nxv4f64(
   <vscale x 4 x i64>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfcvt_mask_x.f.v_nxv4i64_nxv4f64(<vscale x 4 x i64> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfcvt_mask_x.f.v_nxv4i64_nxv4f64(<vscale x 4 x i64> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv4i64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -570,16 +572,16 @@ entry:
     <vscale x 4 x i64> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i64> %a
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vfcvt.x.f.v.nxv8i64.nxv8f64(
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfcvt_x.f.v_nxv8i64_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfcvt_x.f.v_nxv8i64_nxv8f64(<vscale x 8 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_x.f.v_nxv8i64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -588,7 +590,7 @@ define <vscale x 8 x i64> @intrinsic_vfcvt_x.f.v_nxv8i64_nxv8f64(<vscale x 8 x d
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.x.f.v.nxv8i64.nxv8f64(
     <vscale x 8 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i64> %a
 }
@@ -597,10 +599,10 @@ declare <vscale x 8 x i64> @llvm.riscv.vfcvt.x.f.v.mask.nxv8i64.nxv8f64(
   <vscale x 8 x i64>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfcvt_mask_x.f.v_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfcvt_mask_x.f.v_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_x.f.v_nxv8i64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -611,7 +613,7 @@ entry:
     <vscale x 8 x i64> %0,
     <vscale x 8 x double> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i64> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll
deleted file mode 100644
index 8fb44d2d1ecf0..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv64.ll
+++ /dev/null
@@ -1,617 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv1i16.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfcvt_xu.f.v_nxv1i16_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv1i16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv1i16.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i16.nxv1f16(
-  <vscale x 1 x i16>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv1i16_nxv1f16(<vscale x 1 x i16> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv1i16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i16.nxv1f16(
-    <vscale x 1 x i16> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv2i16.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfcvt_xu.f.v_nxv2i16_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv2i16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv2i16.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i16.nxv2f16(
-  <vscale x 2 x i16>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv2i16_nxv2f16(<vscale x 2 x i16> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv2i16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i16.nxv2f16(
-    <vscale x 2 x i16> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv4i16.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfcvt_xu.f.v_nxv4i16_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv4i16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv4i16.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i16.nxv4f16(
-  <vscale x 4 x i16>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv4i16_nxv4f16(<vscale x 4 x i16> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv4i16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i16.nxv4f16(
-    <vscale x 4 x i16> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv8i16.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfcvt_xu.f.v_nxv8i16_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv8i16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv8i16.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i16.nxv8f16(
-  <vscale x 8 x i16>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv8i16_nxv8f16(<vscale x 8 x i16> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv8i16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i16.nxv8f16(
-    <vscale x 8 x i16> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv16i16.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfcvt_xu.f.v_nxv16i16_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv16i16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv16i16.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv16i16.nxv16f16(
-  <vscale x 16 x i16>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv16i16_nxv16f16(<vscale x 16 x i16> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv16i16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv16i16.nxv16f16(
-    <vscale x 16 x i16> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv32i16.nxv32f16(
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x i16> @intrinsic_vfcvt_xu.f.v_nxv32i16_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv32i16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv32i16.nxv32f16(
-    <vscale x 32 x half> %0,
-    i64 %1)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 32 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv32i16.nxv32f16(
-  <vscale x 32 x i16>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv32i16_nxv32f16(<vscale x 32 x i16> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv32i16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv32i16.nxv32f16(
-    <vscale x 32 x i16> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 32 x i16> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv1i32.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfcvt_xu.f.v_nxv1i32_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv1i32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv1i32.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i32.nxv1f32(
-  <vscale x 1 x i32>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv1i32_nxv1f32(<vscale x 1 x i32> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv1i32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i32.nxv1f32(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv2i32.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfcvt_xu.f.v_nxv2i32_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv2i32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv2i32.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i32.nxv2f32(
-  <vscale x 2 x i32>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv2i32_nxv2f32(<vscale x 2 x i32> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv2i32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i32.nxv2f32(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv4i32.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfcvt_xu.f.v_nxv4i32_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv4i32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv4i32.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i32.nxv4f32(
-  <vscale x 4 x i32>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv4i32_nxv4f32(<vscale x 4 x i32> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv4i32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i32.nxv4f32(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv8i32.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfcvt_xu.f.v_nxv8i32_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv8i32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv8i32.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i32.nxv8f32(
-  <vscale x 8 x i32>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv8i32_nxv8f32(<vscale x 8 x i32> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv8i32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i32.nxv8f32(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv16i32.nxv16f32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfcvt_xu.f.v_nxv16i32_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv16i32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv16i32.nxv16f32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv16i32.nxv16f32(
-  <vscale x 16 x i32>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv16i32_nxv16f32(<vscale x 16 x i32> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv16i32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv16i32.nxv16f32(
-    <vscale x 16 x i32> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv1i64.nxv1f64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfcvt_xu.f.v_nxv1i64_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv1i64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv1i64.nxv1f64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i64.nxv1f64(
-  <vscale x 1 x i64>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfcvt_mask_xu.f.v_nxv1i64_nxv1f64(<vscale x 1 x i64> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv1i64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i64.nxv1f64(
-    <vscale x 1 x i64> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv2i64.nxv2f64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfcvt_xu.f.v_nxv2i64_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv2i64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv2i64.nxv2f64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i64.nxv2f64(
-  <vscale x 2 x i64>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfcvt_mask_xu.f.v_nxv2i64_nxv2f64(<vscale x 2 x i64> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv2i64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i64.nxv2f64(
-    <vscale x 2 x i64> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv4i64.nxv4f64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfcvt_xu.f.v_nxv4i64_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv4i64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv4i64.nxv4f64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i64.nxv4f64(
-  <vscale x 4 x i64>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfcvt_mask_xu.f.v_nxv4i64_nxv4f64(<vscale x 4 x i64> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv4i64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i64.nxv4f64(
-    <vscale x 4 x i64> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv8i64.nxv8f64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfcvt_xu.f.v_nxv8i64_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv8i64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv8i64.nxv8f64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i64.nxv8f64(
-  <vscale x 8 x i64>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfcvt_mask_xu.f.v_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv8i64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfcvt.xu.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i64.nxv8f64(
-    <vscale x 8 x i64> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i64> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f.ll
index e76b0db05446e..fb8a4797f0ae8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcvt-xu-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv1i16.nxv1f16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfcvt_xu.f.v_nxv1i16_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfcvt_xu.f.v_nxv1i16_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv1i16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -14,7 +16,7 @@ define <vscale x 1 x i16> @intrinsic_vfcvt_xu.f.v_nxv1i16_nxv1f16(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv1i16.nxv1f16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i16> %a
 }
@@ -23,10 +25,10 @@ declare <vscale x 1 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i16.nxv1f16(
   <vscale x 1 x i16>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv1i16_nxv1f16(<vscale x 1 x i16> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv1i16_nxv1f16(<vscale x 1 x i16> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv1i16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -37,16 +39,16 @@ entry:
     <vscale x 1 x i16> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i16> %a
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv2i16.nxv2f16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfcvt_xu.f.v_nxv2i16_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfcvt_xu.f.v_nxv2i16_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv2i16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -55,7 +57,7 @@ define <vscale x 2 x i16> @intrinsic_vfcvt_xu.f.v_nxv2i16_nxv2f16(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv2i16.nxv2f16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i16> %a
 }
@@ -64,10 +66,10 @@ declare <vscale x 2 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i16.nxv2f16(
   <vscale x 2 x i16>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv2i16_nxv2f16(<vscale x 2 x i16> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv2i16_nxv2f16(<vscale x 2 x i16> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv2i16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -78,16 +80,16 @@ entry:
     <vscale x 2 x i16> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i16> %a
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv4i16.nxv4f16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfcvt_xu.f.v_nxv4i16_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfcvt_xu.f.v_nxv4i16_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv4i16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -96,7 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vfcvt_xu.f.v_nxv4i16_nxv4f16(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv4i16.nxv4f16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i16> %a
 }
@@ -105,10 +107,10 @@ declare <vscale x 4 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i16.nxv4f16(
   <vscale x 4 x i16>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv4i16_nxv4f16(<vscale x 4 x i16> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv4i16_nxv4f16(<vscale x 4 x i16> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv4i16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -119,16 +121,16 @@ entry:
     <vscale x 4 x i16> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i16> %a
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv8i16.nxv8f16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfcvt_xu.f.v_nxv8i16_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfcvt_xu.f.v_nxv8i16_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv8i16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -137,7 +139,7 @@ define <vscale x 8 x i16> @intrinsic_vfcvt_xu.f.v_nxv8i16_nxv8f16(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv8i16.nxv8f16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i16> %a
 }
@@ -146,10 +148,10 @@ declare <vscale x 8 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i16.nxv8f16(
   <vscale x 8 x i16>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv8i16_nxv8f16(<vscale x 8 x i16> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv8i16_nxv8f16(<vscale x 8 x i16> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv8i16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -160,16 +162,16 @@ entry:
     <vscale x 8 x i16> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i16> %a
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv16i16.nxv16f16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfcvt_xu.f.v_nxv16i16_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfcvt_xu.f.v_nxv16i16_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv16i16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -178,7 +180,7 @@ define <vscale x 16 x i16> @intrinsic_vfcvt_xu.f.v_nxv16i16_nxv16f16(<vscale x 1
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv16i16.nxv16f16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i16> %a
 }
@@ -187,10 +189,10 @@ declare <vscale x 16 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv16i16.nxv16f16(
   <vscale x 16 x i16>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv16i16_nxv16f16(<vscale x 16 x i16> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv16i16_nxv16f16(<vscale x 16 x i16> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv16i16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -201,16 +203,16 @@ entry:
     <vscale x 16 x i16> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i16> %a
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv32i16.nxv32f16(
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i16> @intrinsic_vfcvt_xu.f.v_nxv32i16_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
+define <vscale x 32 x i16> @intrinsic_vfcvt_xu.f.v_nxv32i16_nxv32f16(<vscale x 32 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv32i16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -219,7 +221,7 @@ define <vscale x 32 x i16> @intrinsic_vfcvt_xu.f.v_nxv32i16_nxv32f16(<vscale x 3
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vfcvt.xu.f.v.nxv32i16.nxv32f16(
     <vscale x 32 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x i16> %a
 }
@@ -228,10 +230,10 @@ declare <vscale x 32 x i16> @llvm.riscv.vfcvt.xu.f.v.mask.nxv32i16.nxv32f16(
   <vscale x 32 x i16>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv32i16_nxv32f16(<vscale x 32 x i16> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x i16> @intrinsic_vfcvt_mask_xu.f.v_nxv32i16_nxv32f16(<vscale x 32 x i16> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv32i16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -242,16 +244,16 @@ entry:
     <vscale x 32 x i16> %0,
     <vscale x 32 x half> %1,
     <vscale x 32 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x i16> %a
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv1i32.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfcvt_xu.f.v_nxv1i32_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfcvt_xu.f.v_nxv1i32_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv1i32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -260,7 +262,7 @@ define <vscale x 1 x i32> @intrinsic_vfcvt_xu.f.v_nxv1i32_nxv1f32(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv1i32.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -269,10 +271,10 @@ declare <vscale x 1 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i32.nxv1f32(
   <vscale x 1 x i32>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv1i32_nxv1f32(<vscale x 1 x i32> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv1i32_nxv1f32(<vscale x 1 x i32> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv1i32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -283,16 +285,16 @@ entry:
     <vscale x 1 x i32> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv2i32.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfcvt_xu.f.v_nxv2i32_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfcvt_xu.f.v_nxv2i32_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv2i32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -301,7 +303,7 @@ define <vscale x 2 x i32> @intrinsic_vfcvt_xu.f.v_nxv2i32_nxv2f32(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv2i32.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -310,10 +312,10 @@ declare <vscale x 2 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i32.nxv2f32(
   <vscale x 2 x i32>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv2i32_nxv2f32(<vscale x 2 x i32> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv2i32_nxv2f32(<vscale x 2 x i32> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv2i32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -324,16 +326,16 @@ entry:
     <vscale x 2 x i32> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv4i32.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfcvt_xu.f.v_nxv4i32_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfcvt_xu.f.v_nxv4i32_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv4i32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -342,7 +344,7 @@ define <vscale x 4 x i32> @intrinsic_vfcvt_xu.f.v_nxv4i32_nxv4f32(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv4i32.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -351,10 +353,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i32.nxv4f32(
   <vscale x 4 x i32>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv4i32_nxv4f32(<vscale x 4 x i32> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv4i32_nxv4f32(<vscale x 4 x i32> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv4i32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -365,16 +367,16 @@ entry:
     <vscale x 4 x i32> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv8i32.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfcvt_xu.f.v_nxv8i32_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfcvt_xu.f.v_nxv8i32_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv8i32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -383,7 +385,7 @@ define <vscale x 8 x i32> @intrinsic_vfcvt_xu.f.v_nxv8i32_nxv8f32(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv8i32.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -392,10 +394,10 @@ declare <vscale x 8 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i32.nxv8f32(
   <vscale x 8 x i32>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv8i32_nxv8f32(<vscale x 8 x i32> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv8i32_nxv8f32(<vscale x 8 x i32> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv8i32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -406,16 +408,16 @@ entry:
     <vscale x 8 x i32> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i32> %a
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv16i32.nxv16f32(
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfcvt_xu.f.v_nxv16i32_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfcvt_xu.f.v_nxv16i32_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv16i32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -424,7 +426,7 @@ define <vscale x 16 x i32> @intrinsic_vfcvt_xu.f.v_nxv16i32_nxv16f32(<vscale x 1
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vfcvt.xu.f.v.nxv16i32.nxv16f32(
     <vscale x 16 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i32> %a
 }
@@ -433,10 +435,10 @@ declare <vscale x 16 x i32> @llvm.riscv.vfcvt.xu.f.v.mask.nxv16i32.nxv16f32(
   <vscale x 16 x i32>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv16i32_nxv16f32(<vscale x 16 x i32> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfcvt_mask_xu.f.v_nxv16i32_nxv16f32(<vscale x 16 x i32> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv16i32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -447,16 +449,16 @@ entry:
     <vscale x 16 x i32> %0,
     <vscale x 16 x float> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i32> %a
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv1i64.nxv1f64(
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfcvt_xu.f.v_nxv1i64_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfcvt_xu.f.v_nxv1i64_nxv1f64(<vscale x 1 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv1i64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -465,7 +467,7 @@ define <vscale x 1 x i64> @intrinsic_vfcvt_xu.f.v_nxv1i64_nxv1f64(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv1i64.nxv1f64(
     <vscale x 1 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i64> %a
 }
@@ -474,10 +476,10 @@ declare <vscale x 1 x i64> @llvm.riscv.vfcvt.xu.f.v.mask.nxv1i64.nxv1f64(
   <vscale x 1 x i64>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfcvt_mask_xu.f.v_nxv1i64_nxv1f64(<vscale x 1 x i64> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfcvt_mask_xu.f.v_nxv1i64_nxv1f64(<vscale x 1 x i64> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv1i64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -488,16 +490,16 @@ entry:
     <vscale x 1 x i64> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i64> %a
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv2i64.nxv2f64(
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfcvt_xu.f.v_nxv2i64_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfcvt_xu.f.v_nxv2i64_nxv2f64(<vscale x 2 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv2i64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -506,7 +508,7 @@ define <vscale x 2 x i64> @intrinsic_vfcvt_xu.f.v_nxv2i64_nxv2f64(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv2i64.nxv2f64(
     <vscale x 2 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i64> %a
 }
@@ -515,10 +517,10 @@ declare <vscale x 2 x i64> @llvm.riscv.vfcvt.xu.f.v.mask.nxv2i64.nxv2f64(
   <vscale x 2 x i64>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfcvt_mask_xu.f.v_nxv2i64_nxv2f64(<vscale x 2 x i64> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfcvt_mask_xu.f.v_nxv2i64_nxv2f64(<vscale x 2 x i64> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv2i64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -529,16 +531,16 @@ entry:
     <vscale x 2 x i64> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i64> %a
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv4i64.nxv4f64(
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfcvt_xu.f.v_nxv4i64_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfcvt_xu.f.v_nxv4i64_nxv4f64(<vscale x 4 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv4i64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -547,7 +549,7 @@ define <vscale x 4 x i64> @intrinsic_vfcvt_xu.f.v_nxv4i64_nxv4f64(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv4i64.nxv4f64(
     <vscale x 4 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i64> %a
 }
@@ -556,10 +558,10 @@ declare <vscale x 4 x i64> @llvm.riscv.vfcvt.xu.f.v.mask.nxv4i64.nxv4f64(
   <vscale x 4 x i64>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfcvt_mask_xu.f.v_nxv4i64_nxv4f64(<vscale x 4 x i64> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfcvt_mask_xu.f.v_nxv4i64_nxv4f64(<vscale x 4 x i64> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv4i64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -570,16 +572,16 @@ entry:
     <vscale x 4 x i64> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i64> %a
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv8i64.nxv8f64(
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfcvt_xu.f.v_nxv8i64_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfcvt_xu.f.v_nxv8i64_nxv8f64(<vscale x 8 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_xu.f.v_nxv8i64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -588,7 +590,7 @@ define <vscale x 8 x i64> @intrinsic_vfcvt_xu.f.v_nxv8i64_nxv8f64(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vfcvt.xu.f.v.nxv8i64.nxv8f64(
     <vscale x 8 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i64> %a
 }
@@ -597,10 +599,10 @@ declare <vscale x 8 x i64> @llvm.riscv.vfcvt.xu.f.v.mask.nxv8i64.nxv8f64(
   <vscale x 8 x i64>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfcvt_mask_xu.f.v_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfcvt_mask_xu.f.v_nxv8i64_nxv8f64(<vscale x 8 x i64> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfcvt_mask_xu.f.v_nxv8i64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -611,7 +613,7 @@ entry:
     <vscale x 8 x i64> %0,
     <vscale x 8 x double> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i64> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll
deleted file mode 100644
index 2d4a16e1bf4e8..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv64.ll
+++ /dev/null
@@ -1,1355 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfdiv_vv_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfdiv.mask.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfdiv_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfdiv.mask.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfdiv.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfdiv_vv_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfdiv.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfdiv.mask.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfdiv_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfdiv.mask.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfdiv.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfdiv_vv_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfdiv.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfdiv.mask.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfdiv_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfdiv.mask.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfdiv.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfdiv_vv_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfdiv.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfdiv.mask.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfdiv_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfdiv.mask.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfdiv.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfdiv_vv_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfdiv.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfdiv.mask.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfdiv_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfdiv.mask.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfdiv.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfdiv_vv_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfdiv.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfdiv.mask.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfdiv_mask_vv_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re16.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfdiv.mask.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfdiv.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfdiv_vv_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfdiv.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfdiv.mask.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfdiv_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfdiv.mask.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfdiv.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfdiv_vv_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfdiv.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfdiv.mask.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfdiv_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfdiv.mask.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfdiv_vv_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfdiv.mask.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfdiv_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfdiv.mask.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfdiv.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfdiv_vv_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfdiv.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfdiv.mask.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfdiv_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfdiv.mask.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfdiv.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfdiv_vv_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfdiv.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfdiv.mask.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfdiv_mask_vv_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re32.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfdiv.mask.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfdiv.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfdiv_vv_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfdiv.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfdiv.mask.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfdiv_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfdiv.mask.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfdiv.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfdiv_vv_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfdiv.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfdiv.mask.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfdiv_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfdiv.mask.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfdiv.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfdiv_vv_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfdiv.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfdiv.mask.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfdiv_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfdiv.mask.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfdiv.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfdiv_vv_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vv_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfdiv.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfdiv.mask.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfdiv_mask_vv_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re64.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfdiv.mask.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfdiv_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfdiv.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfdiv_mask_vf_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfdiv.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfdiv.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfdiv_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfdiv.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfdiv.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfdiv_mask_vf_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfdiv.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfdiv.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfdiv_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfdiv.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfdiv.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfdiv_mask_vf_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfdiv.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfdiv.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfdiv_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfdiv.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfdiv.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfdiv_mask_vf_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfdiv.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfdiv.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfdiv_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfdiv.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfdiv.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfdiv_mask_vf_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfdiv.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfdiv.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfdiv_vf_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfdiv.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfdiv.mask.nxv32f16.f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfdiv_mask_vf_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfdiv.mask.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    half %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfdiv.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfdiv_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfdiv.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfdiv.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfdiv_mask_vf_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfdiv.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfdiv.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfdiv_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfdiv.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfdiv.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfdiv_mask_vf_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfdiv.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfdiv_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfdiv.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfdiv_mask_vf_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfdiv.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfdiv.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfdiv_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfdiv.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfdiv.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfdiv_mask_vf_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfdiv.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfdiv.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfdiv_vf_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfdiv.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfdiv.mask.nxv16f32.f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfdiv_mask_vf_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfdiv.mask.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    float %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfdiv.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfdiv_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfdiv.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfdiv.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfdiv_mask_vf_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfdiv.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    double %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfdiv.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfdiv_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfdiv.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfdiv.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfdiv_mask_vf_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfdiv.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    double %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfdiv.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfdiv_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfdiv.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfdiv.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfdiv_mask_vf_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfdiv.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    double %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfdiv.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfdiv_vf_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfdiv.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfdiv.mask.nxv8f64.f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfdiv_mask_vf_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfdiv.mask.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    double %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfdiv.ll
index 01bfb50ed9b49..0145f2ad764ec 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfdiv-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfdiv_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfdiv_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfdiv.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfdiv_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfdiv_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfdiv.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfdiv_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfdiv_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfdiv.nxv2f16.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfdiv.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfdiv_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfdiv_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfdiv.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfdiv_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfdiv_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfdiv.nxv4f16.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfdiv.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfdiv_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfdiv_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfdiv.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfdiv_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfdiv_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfdiv.nxv8f16.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfdiv.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfdiv_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfdiv_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfdiv.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfdiv_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfdiv_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfdiv.nxv16f16.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfdiv.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfdiv_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfdiv_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -221,7 +223,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -229,9 +231,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfdiv.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfdiv_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfdiv_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -241,7 +243,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfdiv.nxv32f16.nxv32f16(
     <vscale x 32 x half> %0,
     <vscale x 32 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -251,10 +253,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfdiv.mask.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfdiv_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfdiv_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re16.v v24, (a0)
@@ -267,7 +269,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 32 x half> %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -275,9 +277,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfdiv.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfdiv_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfdiv_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -287,7 +289,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfdiv.nxv1f32.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -297,10 +299,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfdiv.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfdiv_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfdiv_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -312,7 +314,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -320,9 +322,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfdiv.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfdiv_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfdiv_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -332,7 +334,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfdiv.nxv2f32.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -342,10 +344,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfdiv.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfdiv_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfdiv_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -357,7 +359,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -365,9 +367,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfdiv_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfdiv_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -377,7 +379,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -387,10 +389,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfdiv.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfdiv_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfdiv_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -402,7 +404,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -410,9 +412,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfdiv.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfdiv_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfdiv_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -422,7 +424,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfdiv.nxv8f32.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -432,10 +434,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfdiv.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfdiv_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfdiv_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -447,7 +449,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -455,9 +457,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfdiv.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfdiv_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfdiv_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -467,7 +469,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfdiv.nxv16f32.nxv16f32(
     <vscale x 16 x float> %0,
     <vscale x 16 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -477,10 +479,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfdiv.mask.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfdiv_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfdiv_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re32.v v24, (a0)
@@ -493,7 +495,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 16 x float> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -501,9 +503,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfdiv.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfdiv_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfdiv_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -513,7 +515,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfdiv.nxv1f64.nxv1f64(
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -523,10 +525,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfdiv.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfdiv_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfdiv_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -538,7 +540,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -546,9 +548,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfdiv.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfdiv_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfdiv_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -558,7 +560,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfdiv.nxv2f64.nxv2f64(
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -568,10 +570,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfdiv.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfdiv_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfdiv_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -583,7 +585,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -591,9 +593,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfdiv.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfdiv_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfdiv_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -603,7 +605,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfdiv.nxv4f64.nxv4f64(
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -613,10 +615,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfdiv.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfdiv_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfdiv_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -628,7 +630,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -636,9 +638,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfdiv.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfdiv_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfdiv_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -648,7 +650,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfdiv.nxv8f64.nxv8f64(
     <vscale x 8 x double> %0,
     <vscale x 8 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -658,10 +660,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfdiv.mask.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfdiv_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfdiv_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re64.v v24, (a0)
@@ -674,7 +676,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 8 x double> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -682,9 +684,9 @@ entry:
 declare <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfdiv_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfdiv_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -694,7 +696,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfdiv.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -704,10 +706,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfdiv.mask.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfdiv_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfdiv_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -719,7 +721,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -727,9 +729,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfdiv.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfdiv_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfdiv_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -739,7 +741,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfdiv.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -749,10 +751,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfdiv.mask.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfdiv_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfdiv_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -764,7 +766,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -772,9 +774,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfdiv.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfdiv_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfdiv_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -784,7 +786,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfdiv.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -794,10 +796,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfdiv.mask.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfdiv_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfdiv_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -809,7 +811,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -817,9 +819,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfdiv.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfdiv_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfdiv_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -829,7 +831,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfdiv.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -839,10 +841,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfdiv.mask.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfdiv_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfdiv_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -854,7 +856,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -862,9 +864,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfdiv.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfdiv_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfdiv_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -874,7 +876,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfdiv.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -884,10 +886,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfdiv.mask.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfdiv_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfdiv_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -899,7 +901,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -907,9 +909,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfdiv.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfdiv_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfdiv_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -919,7 +921,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfdiv.nxv32f16.f16(
     <vscale x 32 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -929,10 +931,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfdiv.mask.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfdiv_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfdiv_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -944,7 +946,7 @@ entry:
     <vscale x 32 x half> %1,
     half %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -952,9 +954,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfdiv.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfdiv_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfdiv_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -964,7 +966,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfdiv.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -974,10 +976,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfdiv.mask.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfdiv_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfdiv_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -989,7 +991,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -997,9 +999,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfdiv.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfdiv_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfdiv_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1009,7 +1011,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfdiv.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -1019,10 +1021,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfdiv.mask.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfdiv_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfdiv_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1034,7 +1036,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -1042,9 +1044,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfdiv_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfdiv_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1054,7 +1056,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfdiv.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -1064,10 +1066,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfdiv.mask.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfdiv_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfdiv_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1079,7 +1081,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -1087,9 +1089,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfdiv.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfdiv_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfdiv_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1099,7 +1101,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfdiv.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -1109,10 +1111,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfdiv.mask.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfdiv_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfdiv_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1124,7 +1126,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -1132,9 +1134,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfdiv.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfdiv_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfdiv_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1144,7 +1146,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfdiv.nxv16f32.f32(
     <vscale x 16 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -1154,10 +1156,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfdiv.mask.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfdiv_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfdiv_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1169,7 +1171,7 @@ entry:
     <vscale x 16 x float> %1,
     float %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -1177,9 +1179,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfdiv.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfdiv_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfdiv_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1189,7 +1191,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfdiv.nxv1f64.f64(
     <vscale x 1 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -1199,10 +1201,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfdiv.mask.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfdiv_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfdiv_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1214,7 +1216,7 @@ entry:
     <vscale x 1 x double> %1,
     double %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -1222,9 +1224,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfdiv.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfdiv_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfdiv_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1234,7 +1236,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfdiv.nxv2f64.f64(
     <vscale x 2 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -1244,10 +1246,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfdiv.mask.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfdiv_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfdiv_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1259,7 +1261,7 @@ entry:
     <vscale x 2 x double> %1,
     double %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -1267,9 +1269,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfdiv.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfdiv_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfdiv_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1279,7 +1281,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfdiv.nxv4f64.f64(
     <vscale x 4 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -1289,10 +1291,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfdiv.mask.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfdiv_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfdiv_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1304,7 +1306,7 @@ entry:
     <vscale x 4 x double> %1,
     double %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -1312,9 +1314,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfdiv.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfdiv_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfdiv_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1324,7 +1326,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfdiv.nxv8f64.f64(
     <vscale x 8 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -1334,10 +1336,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfdiv.mask.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfdiv_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfdiv_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfdiv_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1349,7 +1351,7 @@ entry:
     <vscale x 8 x double> %1,
     double %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll
deleted file mode 100644
index 16d305bad846c..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv32.ll
+++ /dev/null
@@ -1,1106 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i32);
-
-define <vscale x 1 x half>  @intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    i32 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmacc.mask.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x half>  @intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmacc.mask.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i32);
-
-define <vscale x 2 x half>  @intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    i32 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmacc.mask.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x half>  @intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmacc.mask.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i32);
-
-define <vscale x 4 x half>  @intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i32 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmacc.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x half>  @intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmacc.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i32);
-
-define <vscale x 8 x half>  @intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    i32 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmacc.mask.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x half>  @intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmacc.mask.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i32);
-
-define <vscale x 16 x half>  @intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    i32 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmacc.mask.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32);
-
-define <vscale x 16 x half>  @intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmacc.mask.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i32);
-
-define <vscale x 1 x float>  @intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    i32 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmacc.mask.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x float>  @intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmacc.mask.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i32);
-
-define <vscale x 2 x float>  @intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i32 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmacc.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x float>  @intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmacc.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i32);
-
-define <vscale x 4 x float>  @intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    i32 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmacc.mask.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x float>  @intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmacc.mask.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i32);
-
-define <vscale x 8 x float>  @intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    i32 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmacc.mask.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x float>  @intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmacc.mask.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i32);
-
-define <vscale x 1 x double>  @intrinsic_vfmacc_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    i32 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmacc.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x double>  @intrinsic_vfmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmacc.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i32);
-
-define <vscale x 2 x double>  @intrinsic_vfmacc_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    i32 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmacc.mask.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x double>  @intrinsic_vfmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmacc.mask.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i32);
-
-define <vscale x 4 x double>  @intrinsic_vfmacc_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    i32 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmacc.mask.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x double>  @intrinsic_vfmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmacc.mask.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  i32);
-
-define <vscale x 1 x half>  @intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    i32 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmacc.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmacc.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  i32);
-
-define <vscale x 2 x half>  @intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    i32 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmacc.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmacc.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  i32);
-
-define <vscale x 4 x half>  @intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    i32 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmacc.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmacc.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  i32);
-
-define <vscale x 8 x half>  @intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    i32 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmacc.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmacc.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  i32);
-
-define <vscale x 16 x half>  @intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    i32 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmacc.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmacc.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  i32);
-
-define <vscale x 1 x float>  @intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    i32 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmacc.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmacc.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  i32);
-
-define <vscale x 2 x float>  @intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    i32 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmacc.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmacc.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  i32);
-
-define <vscale x 4 x float>  @intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    i32 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmacc.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmacc.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  i32);
-
-define <vscale x 8 x float>  @intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    i32 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmacc.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmacc.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  i32);
-
-define <vscale x 1 x double>  @intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    i32 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmacc.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmacc.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  i32);
-
-define <vscale x 2 x double>  @intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    i32 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmacc.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmacc.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  i32);
-
-define <vscale x 4 x double>  @intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    i32 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmacc.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmacc.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmacc.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 4 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc.ll
similarity index 89%
rename from llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfmacc.ll
index c5809888ff17b..5115a7548e2ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfmacc.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmacc.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmacc.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmacc.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmacc.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmacc.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmacc.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmacc.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmacc.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfmacc_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfmacc_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmacc.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfmacc_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfmacc_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmacc.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfmacc_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfmacc_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmacc.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfmacc.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 1 x half> %0,
     half %1,
     <vscale x 1 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfmacc.mask.nxv1f16.f16(
   half,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -594,7 +596,7 @@ entry:
     half %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmacc.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 2 x half> %0,
     half %1,
     <vscale x 2 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmacc.mask.nxv2f16.f16(
   half,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -640,7 +642,7 @@ entry:
     half %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmacc.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 4 x half> %0,
     half %1,
     <vscale x 4 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmacc.mask.nxv4f16.f16(
   half,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -686,7 +688,7 @@ entry:
     half %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -695,9 +697,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmacc.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -708,7 +710,7 @@ entry:
     <vscale x 8 x half> %0,
     half %1,
     <vscale x 8 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -718,9 +720,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmacc.mask.nxv8f16.f16(
   half,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -732,7 +734,7 @@ entry:
     half %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -741,9 +743,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmacc.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -754,7 +756,7 @@ entry:
     <vscale x 16 x half> %0,
     half %1,
     <vscale x 16 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -764,9 +766,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmacc.mask.nxv16f16.f16(
   half,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -778,7 +780,7 @@ entry:
     half %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -787,9 +789,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmacc.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -800,7 +802,7 @@ entry:
     <vscale x 1 x float> %0,
     float %1,
     <vscale x 1 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -810,9 +812,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmacc.mask.nxv1f32.f32(
   float,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -824,7 +826,7 @@ entry:
     float %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -833,9 +835,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmacc.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -846,7 +848,7 @@ entry:
     <vscale x 2 x float> %0,
     float %1,
     <vscale x 2 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -856,9 +858,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmacc.mask.nxv2f32.f32(
   float,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -870,7 +872,7 @@ entry:
     float %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -879,9 +881,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmacc.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -892,7 +894,7 @@ entry:
     <vscale x 4 x float> %0,
     float %1,
     <vscale x 4 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -902,9 +904,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmacc.mask.nxv4f32.f32(
   float,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -916,7 +918,7 @@ entry:
     float %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -925,9 +927,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmacc.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -938,7 +940,7 @@ entry:
     <vscale x 8 x float> %0,
     float %1,
     <vscale x 8 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -948,9 +950,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmacc.mask.nxv8f32.f32(
   float,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -962,7 +964,7 @@ entry:
     float %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -971,9 +973,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmacc.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -984,7 +986,7 @@ entry:
     <vscale x 1 x double> %0,
     double %1,
     <vscale x 1 x double> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -994,9 +996,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmacc.mask.nxv1f64.f64(
   double,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -1008,7 +1010,7 @@ entry:
     double %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -1017,9 +1019,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmacc.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1030,7 +1032,7 @@ entry:
     <vscale x 2 x double> %0,
     double %1,
     <vscale x 2 x double> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -1040,9 +1042,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmacc.mask.nxv2f64.f64(
   double,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1054,7 +1056,7 @@ entry:
     double %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -1063,9 +1065,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmacc.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1076,7 +1078,7 @@ entry:
     <vscale x 4 x double> %0,
     double %1,
     <vscale x 4 x double> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -1086,9 +1088,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmacc.mask.nxv4f64.f64(
   double,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmacc_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1100,7 +1102,7 @@ entry:
     double %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll
deleted file mode 100644
index afd41bd8a2122..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv64.ll
+++ /dev/null
@@ -1,1106 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmadd.mask.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmadd.mask.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmadd.mask.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmadd.mask.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmadd.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmadd.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmadd.mask.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmadd.mask.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmadd.mask.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmadd.mask.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmadd.mask.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmadd.mask.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmadd.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmadd.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmadd.mask.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmadd.mask.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmadd.mask.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmadd.mask.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfmadd_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmadd.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmadd.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfmadd_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmadd.mask.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmadd.mask.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfmadd_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmadd.mask.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmadd.mask.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmadd.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmadd.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmadd.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmadd.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmadd.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmadd.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmadd.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmadd.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmadd.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmadd.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmadd.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmadd.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmadd.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmadd.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmadd.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmadd.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmadd.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmadd.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmadd.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmadd.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmadd.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmadd.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmadd.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmadd.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmadd.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd.ll
similarity index 89%
rename from llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfmadd.ll
index cfb32cfab4cdc..9313e440e500f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfmadd.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmadd.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmadd.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmadd.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmadd.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmadd.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmadd.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmadd.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmadd.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfmadd_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfmadd_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmadd.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfmadd_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfmadd_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmadd.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfmadd_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfmadd_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmadd.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfmadd.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 1 x half> %0,
     half %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfmadd.mask.nxv1f16.f16(
   half,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -594,7 +596,7 @@ entry:
     half %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmadd.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 2 x half> %0,
     half %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmadd.mask.nxv2f16.f16(
   half,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -640,7 +642,7 @@ entry:
     half %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmadd.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 4 x half> %0,
     half %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmadd.mask.nxv4f16.f16(
   half,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -686,7 +688,7 @@ entry:
     half %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -695,9 +697,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmadd.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -708,7 +710,7 @@ entry:
     <vscale x 8 x half> %0,
     half %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -718,9 +720,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmadd.mask.nxv8f16.f16(
   half,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -732,7 +734,7 @@ entry:
     half %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -741,9 +743,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmadd.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -754,7 +756,7 @@ entry:
     <vscale x 16 x half> %0,
     half %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -764,9 +766,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmadd.mask.nxv16f16.f16(
   half,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -778,7 +780,7 @@ entry:
     half %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -787,9 +789,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmadd.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -800,7 +802,7 @@ entry:
     <vscale x 1 x float> %0,
     float %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -810,9 +812,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmadd.mask.nxv1f32.f32(
   float,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -824,7 +826,7 @@ entry:
     float %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -833,9 +835,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmadd.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -846,7 +848,7 @@ entry:
     <vscale x 2 x float> %0,
     float %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -856,9 +858,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmadd.mask.nxv2f32.f32(
   float,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -870,7 +872,7 @@ entry:
     float %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -879,9 +881,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmadd.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -892,7 +894,7 @@ entry:
     <vscale x 4 x float> %0,
     float %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -902,9 +904,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmadd.mask.nxv4f32.f32(
   float,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -916,7 +918,7 @@ entry:
     float %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -925,9 +927,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmadd.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -938,7 +940,7 @@ entry:
     <vscale x 8 x float> %0,
     float %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -948,9 +950,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmadd.mask.nxv8f32.f32(
   float,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -962,7 +964,7 @@ entry:
     float %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -971,9 +973,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmadd.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -984,7 +986,7 @@ entry:
     <vscale x 1 x double> %0,
     double %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -994,9 +996,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmadd.mask.nxv1f64.f64(
   double,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -1008,7 +1010,7 @@ entry:
     double %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -1017,9 +1019,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmadd.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1030,7 +1032,7 @@ entry:
     <vscale x 2 x double> %0,
     double %1,
     <vscale x 2 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -1040,9 +1042,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmadd.mask.nxv2f64.f64(
   double,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1054,7 +1056,7 @@ entry:
     double %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -1063,9 +1065,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmadd.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1076,7 +1078,7 @@ entry:
     <vscale x 4 x double> %0,
     double %1,
     <vscale x 4 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -1086,9 +1088,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmadd.mask.nxv4f64.f64(
   double,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmadd_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1100,7 +1102,7 @@ entry:
     double %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll
deleted file mode 100644
index 98b4cf71da14d..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv32.ll
+++ /dev/null
@@ -1,1355 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    i32 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmax.mask.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmax.mask.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmax.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmax.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    i32 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmax.mask.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmax.mask.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmax.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmax.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    i32 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmax.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmax.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmax.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmax.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    i32 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmax.mask.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmax.mask.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmax.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmax.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    i32 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmax.mask.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmax.mask.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmax.nxv32f16.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmax.nxv32f16.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    i32 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmax.mask.nxv32f16.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i32,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re16.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmax.mask.nxv32f16.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmax.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmax.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    i32 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmax.mask.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmax.mask.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmax.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmax.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    i32 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmax.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmax.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    i32 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmax.mask.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmax.mask.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmax.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmax.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    i32 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmax.mask.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmax.mask.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmax.nxv16f32.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmax.nxv16f32.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    i32 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmax.mask.nxv16f32.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re32.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmax.mask.nxv16f32.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmax.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfmax_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmax.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    i32 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmax.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfmax_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmax.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmax.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfmax_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmax.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    i32 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmax.mask.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfmax_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmax.mask.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmax.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfmax_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmax.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    i32 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmax.mask.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfmax_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmax.mask.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmax.nxv8f64.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfmax_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f64_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmax.nxv8f64.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    i32 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmax.mask.nxv8f64.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re64.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfmax.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmax.mask.nxv8f64.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmax.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmax.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmax.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmax.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmax.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmax.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmax.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmax.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmax.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmax.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmax.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmax.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmax.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmax.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmax.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmax.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmax.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmax.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmax.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmax.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmax.mask.nxv32f16.f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i32,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmax.mask.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    half %2,
-    <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmax.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmax.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmax.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmax.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmax.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmax.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmax.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmax.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmax.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmax.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmax.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmax.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmax.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmax.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmax.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmax.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmax.mask.nxv16f32.f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmax.mask.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    float %2,
-    <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmax.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmax.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    i32 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmax.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmax.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    double %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmax.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmax.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    i32 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmax.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmax.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    double %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmax.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmax.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    i32 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmax.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmax.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    double %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmax.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmax.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    i32 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmax.mask.nxv8f64.f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfmax.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmax.mask.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    double %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfmax.ll
index 4fc7319fb2b29..446981928b6cf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmax-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmax.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfmax.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfmax.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmax.nxv2f16.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfmax.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfmax.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmax.nxv4f16.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfmax.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfmax.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmax.nxv8f16.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfmax.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfmax.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmax.nxv16f16.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfmax.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -221,7 +223,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -229,9 +231,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfmax.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i64 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -241,7 +243,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmax.nxv32f16.nxv32f16(
     <vscale x 32 x half> %0,
     <vscale x 32 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -251,10 +253,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfmax.mask.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re16.v v24, (a0)
@@ -267,7 +269,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 32 x half> %2,
     <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -275,9 +277,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfmax.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -287,7 +289,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmax.nxv1f32.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -297,10 +299,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfmax.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -312,7 +314,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -320,9 +322,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfmax.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -332,7 +334,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmax.nxv2f32.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -342,10 +344,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfmax.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -357,7 +359,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -365,9 +367,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -377,7 +379,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -387,10 +389,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfmax.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -402,7 +404,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -410,9 +412,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfmax.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -422,7 +424,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmax.nxv8f32.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -432,10 +434,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfmax.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -447,7 +449,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -455,9 +457,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfmax.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i64 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -467,7 +469,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmax.nxv16f32.nxv16f32(
     <vscale x 16 x float> %0,
     <vscale x 16 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -477,10 +479,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfmax.mask.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re32.v v24, (a0)
@@ -493,7 +495,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 16 x float> %2,
     <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -501,9 +503,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfmax.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmax_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmax_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -513,7 +515,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmax.nxv1f64.nxv1f64(
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -523,10 +525,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfmax.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmax_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmax_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -538,7 +540,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -546,9 +548,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfmax.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmax_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmax_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -558,7 +560,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmax.nxv2f64.nxv2f64(
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -568,10 +570,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfmax.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmax_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmax_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -583,7 +585,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -591,9 +593,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfmax.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmax_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmax_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -603,7 +605,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmax.nxv4f64.nxv4f64(
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -613,10 +615,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfmax.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmax_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmax_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -628,7 +630,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -636,9 +638,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfmax.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmax_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmax_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -648,7 +650,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmax.nxv8f64.nxv8f64(
     <vscale x 8 x double> %0,
     <vscale x 8 x double> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -658,10 +660,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfmax.mask.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re64.v v24, (a0)
@@ -674,7 +676,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 8 x double> %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -682,9 +684,9 @@ entry:
 declare <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -694,7 +696,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmax.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -704,10 +706,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfmax.mask.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -719,7 +721,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -727,9 +729,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfmax.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -739,7 +741,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmax.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -749,10 +751,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfmax.mask.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -764,7 +766,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -772,9 +774,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfmax.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -784,7 +786,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmax.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -794,10 +796,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfmax.mask.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -809,7 +811,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -817,9 +819,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfmax.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -829,7 +831,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmax.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -839,10 +841,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfmax.mask.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -854,7 +856,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -862,9 +864,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfmax.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -874,7 +876,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmax.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -884,10 +886,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfmax.mask.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -899,7 +901,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -907,9 +909,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfmax.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -919,7 +921,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmax.nxv32f16.f16(
     <vscale x 32 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -929,10 +931,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfmax.mask.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -944,7 +946,7 @@ entry:
     <vscale x 32 x half> %1,
     half %2,
     <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -952,9 +954,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfmax.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -964,7 +966,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmax.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -974,10 +976,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfmax.mask.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -989,7 +991,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -997,9 +999,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfmax.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1009,7 +1011,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmax.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -1019,10 +1021,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfmax.mask.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1034,7 +1036,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -1042,9 +1044,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1054,7 +1056,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmax.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -1064,10 +1066,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfmax.mask.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1079,7 +1081,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -1087,9 +1089,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfmax.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1099,7 +1101,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmax.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -1109,10 +1111,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfmax.mask.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1124,7 +1126,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -1132,9 +1134,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfmax.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1144,7 +1146,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmax.nxv16f32.f32(
     <vscale x 16 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -1154,10 +1156,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfmax.mask.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1169,7 +1171,7 @@ entry:
     <vscale x 16 x float> %1,
     float %2,
     <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -1177,9 +1179,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfmax.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1189,7 +1191,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmax.nxv1f64.f64(
     <vscale x 1 x double> %0,
     double %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -1199,10 +1201,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfmax.mask.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1214,7 +1216,7 @@ entry:
     <vscale x 1 x double> %1,
     double %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -1222,9 +1224,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfmax.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1234,7 +1236,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmax.nxv2f64.f64(
     <vscale x 2 x double> %0,
     double %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -1244,10 +1246,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfmax.mask.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1259,7 +1261,7 @@ entry:
     <vscale x 2 x double> %1,
     double %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -1267,9 +1269,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfmax.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1279,7 +1281,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmax.nxv4f64.f64(
     <vscale x 4 x double> %0,
     double %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -1289,10 +1291,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfmax.mask.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1304,7 +1306,7 @@ entry:
     <vscale x 4 x double> %1,
     double %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -1312,9 +1314,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfmax.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
-  i64);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1324,7 +1326,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmax.nxv8f64.f64(
     <vscale x 8 x double> %0,
     double %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -1334,10 +1336,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfmax.mask.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmax_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1349,7 +1351,7 @@ entry:
     <vscale x 8 x double> %1,
     double %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll
deleted file mode 100644
index 3dc1240d0f7b6..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv32.ll
+++ /dev/null
@@ -1,902 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmerge.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmerge.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmerge.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmerge.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmerge.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmerge.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmerge.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmerge.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmerge.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmerge.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmerge.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmerge.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmerge.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmerge.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmerge.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmerge.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmerge.nxv32f16.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmerge.nxv32f16.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmerge.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmerge.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    <vscale x 32 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    <vscale x 16 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    <vscale x 8 x i1> %2,
-    i32 %3)
-
-  ret <vscale x 8 x double> %a
-}
-
-define <vscale x 1 x half> @intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half zeroinitializer,
-    <vscale x 1 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-define <vscale x 2 x half> @intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmerge.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half zeroinitializer,
-    <vscale x 2 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-define <vscale x 4 x half> @intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmerge.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half zeroinitializer,
-    <vscale x 4 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-define <vscale x 8 x half> @intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmerge.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half zeroinitializer,
-    <vscale x 8 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-define <vscale x 16 x half> @intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmerge.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half zeroinitializer,
-    <vscale x 16 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-define <vscale x 32 x half> @intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmerge.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half zeroinitializer,
-    <vscale x 32 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-define <vscale x 1 x float> @intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float zeroinitializer,
-    <vscale x 1 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-define <vscale x 2 x float> @intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float zeroinitializer,
-    <vscale x 2 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-define <vscale x 4 x float> @intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float zeroinitializer,
-    <vscale x 4 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-define <vscale x 8 x float> @intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float zeroinitializer,
-    <vscale x 8 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-define <vscale x 16 x float> @intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float zeroinitializer,
-    <vscale x 16 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-define <vscale x 1 x double> @intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double zeroinitializer,
-    <vscale x 1 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-define <vscale x 2 x double> @intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double zeroinitializer,
-    <vscale x 2 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-define <vscale x 4 x double> @intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double zeroinitializer,
-    <vscale x 4 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-define <vscale x 8 x double> @intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x i1> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double zeroinitializer,
-    <vscale x 8 x i1> %1,
-    i32 %2)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmerge.ll
similarity index 88%
rename from llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfmerge.ll
index b23d908c7edda..eb3efd1fa0373 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmerge-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmerge.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -27,9 +29,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfmerge.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -40,7 +42,7 @@ entry:
     <vscale x 1 x half> %0,
     half %1,
     <vscale x 1 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -49,9 +51,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmerge.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -62,7 +64,7 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -71,9 +73,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmerge.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -84,7 +86,7 @@ entry:
     <vscale x 2 x half> %0,
     half %1,
     <vscale x 2 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -93,9 +95,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmerge.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -115,9 +117,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmerge.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -128,7 +130,7 @@ entry:
     <vscale x 4 x half> %0,
     half %1,
     <vscale x 4 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -137,9 +139,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmerge.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -150,7 +152,7 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -159,9 +161,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmerge.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -172,7 +174,7 @@ entry:
     <vscale x 8 x half> %0,
     half %1,
     <vscale x 8 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -181,9 +183,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmerge.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -194,7 +196,7 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -203,9 +205,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmerge.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -216,7 +218,7 @@ entry:
     <vscale x 16 x half> %0,
     half %1,
     <vscale x 16 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -225,9 +227,9 @@ declare <vscale x 32 x half> @llvm.riscv.vfmerge.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -238,7 +240,7 @@ entry:
     <vscale x 32 x half> %0,
     <vscale x 32 x half> %1,
     <vscale x 32 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 32 x half> %a
 }
@@ -247,9 +249,9 @@ declare <vscale x 32 x half> @llvm.riscv.vfmerge.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -260,7 +262,7 @@ entry:
     <vscale x 32 x half> %0,
     half %1,
     <vscale x 32 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 32 x half> %a
 }
@@ -269,9 +271,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -282,7 +284,7 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -291,9 +293,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmerge.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -304,7 +306,7 @@ entry:
     <vscale x 1 x float> %0,
     float %1,
     <vscale x 1 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -313,9 +315,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -326,7 +328,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -335,9 +337,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmerge.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -348,7 +350,7 @@ entry:
     <vscale x 2 x float> %0,
     float %1,
     <vscale x 2 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -357,9 +359,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -370,7 +372,7 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -379,9 +381,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmerge.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -392,7 +394,7 @@ entry:
     <vscale x 4 x float> %0,
     float %1,
     <vscale x 4 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -401,9 +403,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -414,7 +416,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -423,9 +425,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmerge.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -436,7 +438,7 @@ entry:
     <vscale x 8 x float> %0,
     float %1,
     <vscale x 8 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -445,9 +447,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -458,7 +460,7 @@ entry:
     <vscale x 16 x float> %0,
     <vscale x 16 x float> %1,
     <vscale x 16 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 16 x float> %a
 }
@@ -467,9 +469,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfmerge.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -480,7 +482,7 @@ entry:
     <vscale x 16 x float> %0,
     float %1,
     <vscale x 16 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 16 x float> %a
 }
@@ -489,9 +491,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmerge.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 1 x double> %0,
     double %1,
     <vscale x 1 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -533,9 +535,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -546,7 +548,7 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -555,9 +557,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmerge.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -568,7 +570,7 @@ entry:
     <vscale x 2 x double> %0,
     double %1,
     <vscale x 2 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -577,9 +579,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -590,7 +592,7 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -599,9 +601,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmerge.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -612,7 +614,7 @@ entry:
     <vscale x 4 x double> %0,
     double %1,
     <vscale x 4 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -621,9 +623,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vvm_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -634,7 +636,7 @@ entry:
     <vscale x 8 x double> %0,
     <vscale x 8 x double> %1,
     <vscale x 8 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x double> %a
 }
@@ -643,9 +645,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfmerge.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vfm_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -656,12 +658,12 @@ entry:
     <vscale x 8 x double> %0,
     double %1,
     <vscale x 8 x i1> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x double> %a
 }
 
-define <vscale x 1 x half> @intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -672,12 +674,12 @@ entry:
     <vscale x 1 x half> %0,
     half zeroinitializer,
     <vscale x 1 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
 
-define <vscale x 2 x half> @intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -688,12 +690,12 @@ entry:
     <vscale x 2 x half> %0,
     half zeroinitializer,
     <vscale x 2 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
 
-define <vscale x 4 x half> @intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -704,12 +706,12 @@ entry:
     <vscale x 4 x half> %0,
     half zeroinitializer,
     <vscale x 4 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
 
-define <vscale x 8 x half> @intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -720,12 +722,12 @@ entry:
     <vscale x 8 x half> %0,
     half zeroinitializer,
     <vscale x 8 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
 
-define <vscale x 16 x half> @intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -736,12 +738,12 @@ entry:
     <vscale x 16 x half> %0,
     half zeroinitializer,
     <vscale x 16 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
 
-define <vscale x 32 x half> @intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x i1> %1, i64 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -752,12 +754,12 @@ entry:
     <vscale x 32 x half> %0,
     half zeroinitializer,
     <vscale x 32 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
 
-define <vscale x 1 x float> @intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -768,12 +770,12 @@ entry:
     <vscale x 1 x float> %0,
     float zeroinitializer,
     <vscale x 1 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
 
-define <vscale x 2 x float> @intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -784,12 +786,12 @@ entry:
     <vscale x 2 x float> %0,
     float zeroinitializer,
     <vscale x 2 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
 
-define <vscale x 4 x float> @intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -800,12 +802,12 @@ entry:
     <vscale x 4 x float> %0,
     float zeroinitializer,
     <vscale x 4 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
 
-define <vscale x 8 x float> @intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -816,12 +818,12 @@ entry:
     <vscale x 8 x float> %0,
     float zeroinitializer,
     <vscale x 8 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
 
-define <vscale x 16 x float> @intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x i1> %1, i64 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -832,12 +834,12 @@ entry:
     <vscale x 16 x float> %0,
     float zeroinitializer,
     <vscale x 16 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
 
-define <vscale x 1 x double> @intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x i1> %1, i64 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -848,12 +850,12 @@ entry:
     <vscale x 1 x double> %0,
     double zeroinitializer,
     <vscale x 1 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
 
-define <vscale x 2 x double> @intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x i1> %1, i64 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -864,12 +866,12 @@ entry:
     <vscale x 2 x double> %0,
     double zeroinitializer,
     <vscale x 2 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
 
-define <vscale x 4 x double> @intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x i1> %1, i64 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -880,12 +882,12 @@ entry:
     <vscale x 4 x double> %0,
     double zeroinitializer,
     <vscale x 4 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
 
-define <vscale x 8 x double> @intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x i1> %1, i64 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x i1> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmerge_vzm_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -896,7 +898,7 @@ entry:
     <vscale x 8 x double> %0,
     double zeroinitializer,
     <vscale x 8 x i1> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll
deleted file mode 100644
index 0861a787440e4..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv32.ll
+++ /dev/null
@@ -1,1355 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    i32 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmin.mask.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmin.mask.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmin.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmin.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    i32 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmin.mask.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmin.mask.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmin.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmin.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    i32 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmin.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmin.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmin.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmin.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    i32 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmin.mask.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmin.mask.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmin.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmin.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    i32 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmin.mask.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmin.mask.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmin.nxv32f16.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmin.nxv32f16.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    i32 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmin.mask.nxv32f16.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i32,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re16.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmin.mask.nxv32f16.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmin.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmin.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    i32 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmin.mask.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmin.mask.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmin.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmin.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    i32 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmin.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmin.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    i32 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmin.mask.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmin.mask.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmin.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmin.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    i32 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmin.mask.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmin.mask.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmin.nxv16f32.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmin.nxv16f32.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    i32 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmin.mask.nxv16f32.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re32.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmin.mask.nxv16f32.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmin.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfmin_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmin.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    i32 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmin.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfmin_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmin.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmin.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfmin_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmin.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    i32 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmin.mask.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfmin_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmin.mask.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmin.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfmin_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmin.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    i32 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmin.mask.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfmin_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmin.mask.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmin.nxv8f64.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfmin_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f64_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmin.nxv8f64.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    i32 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmin.mask.nxv8f64.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re64.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfmin.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmin.mask.nxv8f64.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmin.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmin.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmin.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmin.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmin.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmin.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmin.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmin.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmin.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmin.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmin.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmin.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmin.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmin.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmin.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmin.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmin.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmin.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmin.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmin.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmin.mask.nxv32f16.f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i32,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmin.mask.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    half %2,
-    <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmin.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmin.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmin.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmin.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmin.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmin.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmin.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmin.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmin.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmin.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmin.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmin.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmin.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmin.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmin.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmin.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmin.mask.nxv16f32.f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmin.mask.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    float %2,
-    <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmin.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmin.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    i32 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmin.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmin.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    double %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmin.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmin.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    i32 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmin.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmin.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    double %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmin.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmin.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    i32 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmin.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmin.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    double %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmin.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmin.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    i32 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmin.mask.nxv8f64.f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfmin.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmin.mask.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    double %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfmin.ll
index e647fe51ffb17..e151e9fb695de 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmin-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmin.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfmin.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfmin.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmin.nxv2f16.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfmin.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfmin.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmin.nxv4f16.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfmin.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfmin.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmin.nxv8f16.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfmin.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfmin.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmin.nxv16f16.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfmin.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -221,7 +223,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -229,9 +231,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfmin.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i64 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -241,7 +243,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmin.nxv32f16.nxv32f16(
     <vscale x 32 x half> %0,
     <vscale x 32 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -251,10 +253,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfmin.mask.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re16.v v24, (a0)
@@ -267,7 +269,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 32 x half> %2,
     <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -275,9 +277,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfmin.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -287,7 +289,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmin.nxv1f32.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -297,10 +299,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfmin.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -312,7 +314,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -320,9 +322,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfmin.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -332,7 +334,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmin.nxv2f32.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -342,10 +344,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfmin.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -357,7 +359,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -365,9 +367,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -377,7 +379,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -387,10 +389,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfmin.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -402,7 +404,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -410,9 +412,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfmin.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -422,7 +424,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmin.nxv8f32.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -432,10 +434,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfmin.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -447,7 +449,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -455,9 +457,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfmin.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i64 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -467,7 +469,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmin.nxv16f32.nxv16f32(
     <vscale x 16 x float> %0,
     <vscale x 16 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -477,10 +479,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfmin.mask.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re32.v v24, (a0)
@@ -493,7 +495,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 16 x float> %2,
     <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -501,9 +503,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfmin.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmin_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmin_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -513,7 +515,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmin.nxv1f64.nxv1f64(
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -523,10 +525,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfmin.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmin_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmin_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -538,7 +540,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -546,9 +548,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfmin.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmin_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmin_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -558,7 +560,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmin.nxv2f64.nxv2f64(
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -568,10 +570,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfmin.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmin_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmin_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -583,7 +585,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -591,9 +593,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfmin.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmin_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmin_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -603,7 +605,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmin.nxv4f64.nxv4f64(
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -613,10 +615,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfmin.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmin_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmin_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -628,7 +630,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -636,9 +638,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfmin.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmin_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmin_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -648,7 +650,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmin.nxv8f64.nxv8f64(
     <vscale x 8 x double> %0,
     <vscale x 8 x double> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -658,10 +660,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfmin.mask.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re64.v v24, (a0)
@@ -674,7 +676,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 8 x double> %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -682,9 +684,9 @@ entry:
 declare <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -694,7 +696,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmin.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -704,10 +706,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfmin.mask.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -719,7 +721,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -727,9 +729,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfmin.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -739,7 +741,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmin.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -749,10 +751,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfmin.mask.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -764,7 +766,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -772,9 +774,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfmin.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -784,7 +786,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmin.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -794,10 +796,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfmin.mask.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -809,7 +811,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -817,9 +819,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfmin.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -829,7 +831,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmin.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -839,10 +841,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfmin.mask.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -854,7 +856,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -862,9 +864,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfmin.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -874,7 +876,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmin.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -884,10 +886,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfmin.mask.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -899,7 +901,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -907,9 +909,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfmin.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -919,7 +921,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmin.nxv32f16.f16(
     <vscale x 32 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -929,10 +931,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfmin.mask.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -944,7 +946,7 @@ entry:
     <vscale x 32 x half> %1,
     half %2,
     <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -952,9 +954,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfmin.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -964,7 +966,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmin.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -974,10 +976,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfmin.mask.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -989,7 +991,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -997,9 +999,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfmin.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1009,7 +1011,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmin.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -1019,10 +1021,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfmin.mask.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1034,7 +1036,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -1042,9 +1044,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1054,7 +1056,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmin.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -1064,10 +1066,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfmin.mask.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1079,7 +1081,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -1087,9 +1089,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfmin.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1099,7 +1101,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmin.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -1109,10 +1111,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfmin.mask.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1124,7 +1126,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -1132,9 +1134,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfmin.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1144,7 +1146,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmin.nxv16f32.f32(
     <vscale x 16 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -1154,10 +1156,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfmin.mask.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1169,7 +1171,7 @@ entry:
     <vscale x 16 x float> %1,
     float %2,
     <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -1177,9 +1179,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfmin.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1189,7 +1191,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmin.nxv1f64.f64(
     <vscale x 1 x double> %0,
     double %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -1199,10 +1201,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfmin.mask.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1214,7 +1216,7 @@ entry:
     <vscale x 1 x double> %1,
     double %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -1222,9 +1224,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfmin.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1234,7 +1236,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmin.nxv2f64.f64(
     <vscale x 2 x double> %0,
     double %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -1244,10 +1246,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfmin.mask.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1259,7 +1261,7 @@ entry:
     <vscale x 2 x double> %1,
     double %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -1267,9 +1269,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfmin.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1279,7 +1281,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmin.nxv4f64.f64(
     <vscale x 4 x double> %0,
     double %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -1289,10 +1291,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfmin.mask.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1304,7 +1306,7 @@ entry:
     <vscale x 4 x double> %1,
     double %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -1312,9 +1314,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfmin.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
-  i64);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1324,7 +1326,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmin.nxv8f64.f64(
     <vscale x 8 x double> %0,
     double %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -1334,10 +1336,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfmin.mask.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmin_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1349,7 +1351,7 @@ entry:
     <vscale x 8 x double> %1,
     double %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll
deleted file mode 100644
index 2a5fb2896aa56..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv64.ll
+++ /dev/null
@@ -1,1106 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmsac.mask.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmsac.mask.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmsac.mask.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmsac.mask.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmsac.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmsac.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmsac.mask.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmsac.mask.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmsac.mask.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmsac.mask.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmsac.mask.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmsac.mask.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmsac.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmsac.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmsac.mask.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmsac.mask.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmsac.mask.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmsac.mask.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfmsac_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmsac.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmsac.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfmsac_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmsac.mask.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmsac.mask.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfmsac_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmsac.mask.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmsac.mask.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmsac.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmsac.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmsac.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmsac.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmsac.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmsac.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmsac.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmsac.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmsac.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmsac.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmsac.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmsac.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmsac.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmsac.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmsac.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmsac.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmsac.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmsac.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmsac.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmsac.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmsac.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmsac.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmsac.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsac.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmsac.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac.ll
similarity index 89%
rename from llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfmsac.ll
index c8407dfe64730..cf9df7550fcc7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsac.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsac.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsac.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsac.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsac.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsac.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsac.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsac.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsac.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfmsac_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfmsac_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsac.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfmsac_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfmsac_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsac.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfmsac_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfmsac_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsac.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsac.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 1 x half> %0,
     half %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsac.mask.nxv1f16.f16(
   half,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -594,7 +596,7 @@ entry:
     half %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsac.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 2 x half> %0,
     half %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsac.mask.nxv2f16.f16(
   half,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -640,7 +642,7 @@ entry:
     half %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsac.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 4 x half> %0,
     half %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsac.mask.nxv4f16.f16(
   half,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -686,7 +688,7 @@ entry:
     half %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -695,9 +697,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsac.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -708,7 +710,7 @@ entry:
     <vscale x 8 x half> %0,
     half %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -718,9 +720,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsac.mask.nxv8f16.f16(
   half,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -732,7 +734,7 @@ entry:
     half %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -741,9 +743,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsac.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -754,7 +756,7 @@ entry:
     <vscale x 16 x half> %0,
     half %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -764,9 +766,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsac.mask.nxv16f16.f16(
   half,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -778,7 +780,7 @@ entry:
     half %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -787,9 +789,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsac.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -800,7 +802,7 @@ entry:
     <vscale x 1 x float> %0,
     float %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -810,9 +812,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsac.mask.nxv1f32.f32(
   float,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -824,7 +826,7 @@ entry:
     float %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -833,9 +835,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsac.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -846,7 +848,7 @@ entry:
     <vscale x 2 x float> %0,
     float %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -856,9 +858,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsac.mask.nxv2f32.f32(
   float,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -870,7 +872,7 @@ entry:
     float %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -879,9 +881,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsac.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -892,7 +894,7 @@ entry:
     <vscale x 4 x float> %0,
     float %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -902,9 +904,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsac.mask.nxv4f32.f32(
   float,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -916,7 +918,7 @@ entry:
     float %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -925,9 +927,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsac.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -938,7 +940,7 @@ entry:
     <vscale x 8 x float> %0,
     float %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -948,9 +950,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsac.mask.nxv8f32.f32(
   float,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -962,7 +964,7 @@ entry:
     float %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -971,9 +973,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsac.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -984,7 +986,7 @@ entry:
     <vscale x 1 x double> %0,
     double %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -994,9 +996,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsac.mask.nxv1f64.f64(
   double,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -1008,7 +1010,7 @@ entry:
     double %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -1017,9 +1019,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsac.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1030,7 +1032,7 @@ entry:
     <vscale x 2 x double> %0,
     double %1,
     <vscale x 2 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -1040,9 +1042,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsac.mask.nxv2f64.f64(
   double,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1054,7 +1056,7 @@ entry:
     double %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -1063,9 +1065,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsac.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1076,7 +1078,7 @@ entry:
     <vscale x 4 x double> %0,
     double %1,
     <vscale x 4 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -1086,9 +1088,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsac.mask.nxv4f64.f64(
   double,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsac_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1100,7 +1102,7 @@ entry:
     double %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll
deleted file mode 100644
index 70efc0da21f5a..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv64.ll
+++ /dev/null
@@ -1,1106 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmsub.mask.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmsub.mask.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmsub.mask.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmsub.mask.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmsub.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmsub.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmsub.mask.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmsub.mask.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmsub.mask.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmsub.mask.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmsub.mask.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmsub.mask.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmsub.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmsub.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmsub.mask.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmsub.mask.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmsub.mask.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmsub.mask.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfmsub_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmsub.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmsub.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfmsub_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmsub.mask.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmsub.mask.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfmsub_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmsub.mask.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmsub.mask.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmsub.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmsub.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmsub.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmsub.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmsub.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmsub.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmsub.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmsub.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmsub.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmsub.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmsub.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmsub.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmsub.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmsub.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmsub.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmsub.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmsub.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmsub.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmsub.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmsub.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmsub.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmsub.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmsub.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmsub.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmsub.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub.ll
similarity index 89%
rename from llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfmsub.ll
index 620c3dcb1025a..d071893ceb085 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsub.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsub.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsub.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsub.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsub.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsub.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsub.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsub.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsub.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfmsub_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfmsub_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsub.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfmsub_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfmsub_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsub.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfmsub_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfmsub_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsub.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsub.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 1 x half> %0,
     half %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfmsub.mask.nxv1f16.f16(
   half,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -594,7 +596,7 @@ entry:
     half %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsub.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 2 x half> %0,
     half %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfmsub.mask.nxv2f16.f16(
   half,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -640,7 +642,7 @@ entry:
     half %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsub.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 4 x half> %0,
     half %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfmsub.mask.nxv4f16.f16(
   half,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -686,7 +688,7 @@ entry:
     half %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -695,9 +697,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsub.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -708,7 +710,7 @@ entry:
     <vscale x 8 x half> %0,
     half %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -718,9 +720,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfmsub.mask.nxv8f16.f16(
   half,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -732,7 +734,7 @@ entry:
     half %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -741,9 +743,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsub.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -754,7 +756,7 @@ entry:
     <vscale x 16 x half> %0,
     half %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -764,9 +766,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfmsub.mask.nxv16f16.f16(
   half,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -778,7 +780,7 @@ entry:
     half %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -787,9 +789,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsub.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -800,7 +802,7 @@ entry:
     <vscale x 1 x float> %0,
     float %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -810,9 +812,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfmsub.mask.nxv1f32.f32(
   float,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -824,7 +826,7 @@ entry:
     float %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -833,9 +835,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsub.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -846,7 +848,7 @@ entry:
     <vscale x 2 x float> %0,
     float %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -856,9 +858,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfmsub.mask.nxv2f32.f32(
   float,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -870,7 +872,7 @@ entry:
     float %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -879,9 +881,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsub.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -892,7 +894,7 @@ entry:
     <vscale x 4 x float> %0,
     float %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -902,9 +904,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfmsub.mask.nxv4f32.f32(
   float,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -916,7 +918,7 @@ entry:
     float %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -925,9 +927,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsub.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -938,7 +940,7 @@ entry:
     <vscale x 8 x float> %0,
     float %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -948,9 +950,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfmsub.mask.nxv8f32.f32(
   float,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -962,7 +964,7 @@ entry:
     float %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -971,9 +973,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsub.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -984,7 +986,7 @@ entry:
     <vscale x 1 x double> %0,
     double %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -994,9 +996,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfmsub.mask.nxv1f64.f64(
   double,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -1008,7 +1010,7 @@ entry:
     double %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -1017,9 +1019,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsub.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1030,7 +1032,7 @@ entry:
     <vscale x 2 x double> %0,
     double %1,
     <vscale x 2 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -1040,9 +1042,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfmsub.mask.nxv2f64.f64(
   double,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1054,7 +1056,7 @@ entry:
     double %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -1063,9 +1065,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsub.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1076,7 +1078,7 @@ entry:
     <vscale x 4 x double> %0,
     double %1,
     <vscale x 4 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -1086,9 +1088,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfmsub.mask.nxv4f64.f64(
   double,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmsub_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1100,7 +1102,7 @@ entry:
     double %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll
deleted file mode 100644
index 08aa64b6de7fc..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv64.ll
+++ /dev/null
@@ -1,1355 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfmul_vv_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmul.mask.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfmul_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmul.mask.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmul.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfmul_vv_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmul.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmul.mask.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfmul_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmul.mask.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmul.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfmul_vv_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmul.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmul.mask.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfmul_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmul.mask.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmul.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfmul_vv_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmul.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmul.mask.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfmul_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmul.mask.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmul.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfmul_vv_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmul.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmul.mask.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfmul_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmul.mask.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmul.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfmul_vv_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmul.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmul.mask.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfmul_mask_vv_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re16.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmul.mask.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmul.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfmul_vv_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmul.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmul.mask.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfmul_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmul.mask.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmul.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfmul_vv_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmul.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmul.mask.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfmul_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmul.mask.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfmul_vv_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmul.mask.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfmul_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmul.mask.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmul.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfmul_vv_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmul.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmul.mask.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfmul_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmul.mask.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmul.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfmul_vv_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmul.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmul.mask.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfmul_mask_vv_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re32.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmul.mask.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfmul_vv_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmul.mask.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfmul_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmul.mask.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmul.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfmul_vv_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmul.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmul.mask.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfmul_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmul.mask.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmul.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfmul_vv_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmul.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmul.mask.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfmul_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmul.mask.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmul.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfmul_vv_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vv_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmul.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmul.mask.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfmul_mask_vv_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re64.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfmul.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmul.mask.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfmul_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfmul.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfmul_mask_vf_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmul.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmul.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfmul_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmul.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmul.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfmul_mask_vf_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmul.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmul.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfmul_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmul.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmul.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfmul_mask_vf_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmul.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmul.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfmul_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmul.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmul.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfmul_mask_vf_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmul.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmul.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfmul_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmul.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmul.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfmul_mask_vf_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmul.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmul.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfmul_vf_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmul.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmul.mask.nxv32f16.f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfmul_mask_vf_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmul.mask.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    half %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmul.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfmul_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmul.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmul.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfmul_mask_vf_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmul.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmul.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfmul_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmul.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmul.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfmul_mask_vf_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmul.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfmul_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmul.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfmul_mask_vf_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmul.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmul.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfmul_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmul.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmul.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfmul_mask_vf_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmul.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmul.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfmul_vf_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmul.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmul.mask.nxv16f32.f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfmul_mask_vf_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmul.mask.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    float %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfmul_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmul.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfmul_mask_vf_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmul.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    double %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmul.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfmul_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmul.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmul.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfmul_mask_vf_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmul.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    double %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmul.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfmul_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmul.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmul.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfmul_mask_vf_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmul.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    double %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmul.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfmul_vf_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmul.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmul.mask.nxv8f64.f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfmul_mask_vf_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfmul.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmul.mask.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    double %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfmul.ll
index 50ebccd92e64c..0f4c738025617 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmul-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmul.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmul_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmul_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfmul.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmul_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmul_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfmul.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmul_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmul_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmul.nxv2f16.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfmul.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmul_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmul_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfmul.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmul_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmul_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmul.nxv4f16.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfmul.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmul_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmul_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfmul.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmul_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmul_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmul.nxv8f16.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfmul.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmul_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmul_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfmul.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmul_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmul_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmul.nxv16f16.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfmul.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmul_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmul_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -221,7 +223,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -229,9 +231,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfmul.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmul_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmul_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -241,7 +243,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmul.nxv32f16.nxv32f16(
     <vscale x 32 x half> %0,
     <vscale x 32 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -251,10 +253,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfmul.mask.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmul_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmul_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re16.v v24, (a0)
@@ -267,7 +269,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 32 x half> %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -275,9 +277,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfmul.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmul_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmul_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -287,7 +289,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmul.nxv1f32.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -297,10 +299,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfmul.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmul_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmul_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -312,7 +314,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -320,9 +322,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfmul.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmul_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmul_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -332,7 +334,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmul.nxv2f32.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -342,10 +344,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfmul.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmul_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmul_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -357,7 +359,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -365,9 +367,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmul_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmul_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -377,7 +379,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -387,10 +389,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfmul.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmul_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmul_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -402,7 +404,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -410,9 +412,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfmul.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmul_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmul_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -422,7 +424,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmul.nxv8f32.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -432,10 +434,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfmul.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmul_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmul_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -447,7 +449,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -455,9 +457,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfmul.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmul_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmul_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -467,7 +469,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmul.nxv16f32.nxv16f32(
     <vscale x 16 x float> %0,
     <vscale x 16 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -477,10 +479,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfmul.mask.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmul_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmul_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re32.v v24, (a0)
@@ -493,7 +495,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 16 x float> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -501,9 +503,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmul_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmul_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -513,7 +515,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.nxv1f64(
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -523,10 +525,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfmul.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmul_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmul_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -538,7 +540,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -546,9 +548,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfmul.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmul_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmul_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -558,7 +560,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmul.nxv2f64.nxv2f64(
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -568,10 +570,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfmul.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmul_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmul_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -583,7 +585,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -591,9 +593,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfmul.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmul_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmul_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -603,7 +605,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmul.nxv4f64.nxv4f64(
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -613,10 +615,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfmul.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmul_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmul_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -628,7 +630,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -636,9 +638,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfmul.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmul_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmul_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -648,7 +650,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmul.nxv8f64.nxv8f64(
     <vscale x 8 x double> %0,
     <vscale x 8 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -658,10 +660,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfmul.mask.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmul_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmul_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re64.v v24, (a0)
@@ -674,7 +676,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 8 x double> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -682,9 +684,9 @@ entry:
 declare <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmul_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmul_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -694,7 +696,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmul.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -704,10 +706,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfmul.mask.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmul_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmul_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -719,7 +721,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -727,9 +729,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfmul.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmul_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmul_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -739,7 +741,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmul.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -749,10 +751,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfmul.mask.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmul_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmul_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -764,7 +766,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -772,9 +774,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfmul.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmul_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmul_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -784,7 +786,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmul.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -794,10 +796,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfmul.mask.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmul_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmul_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -809,7 +811,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -817,9 +819,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfmul.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmul_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmul_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -829,7 +831,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmul.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -839,10 +841,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfmul.mask.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmul_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmul_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -854,7 +856,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -862,9 +864,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfmul.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmul_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmul_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -874,7 +876,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmul.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -884,10 +886,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfmul.mask.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmul_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmul_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -899,7 +901,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -907,9 +909,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfmul.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmul_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmul_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -919,7 +921,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmul.nxv32f16.f16(
     <vscale x 32 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -929,10 +931,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfmul.mask.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmul_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmul_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -944,7 +946,7 @@ entry:
     <vscale x 32 x half> %1,
     half %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -952,9 +954,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfmul.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmul_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmul_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -964,7 +966,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmul.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -974,10 +976,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfmul.mask.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmul_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmul_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -989,7 +991,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -997,9 +999,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfmul.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmul_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmul_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1009,7 +1011,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmul.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -1019,10 +1021,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfmul.mask.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmul_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmul_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1034,7 +1036,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -1042,9 +1044,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmul_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmul_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1054,7 +1056,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmul.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -1064,10 +1066,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfmul.mask.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmul_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmul_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1079,7 +1081,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -1087,9 +1089,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfmul.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmul_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmul_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1099,7 +1101,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmul.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -1109,10 +1111,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfmul.mask.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmul_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmul_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1124,7 +1126,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -1132,9 +1134,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfmul.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmul_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmul_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1144,7 +1146,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmul.nxv16f32.f32(
     <vscale x 16 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -1154,10 +1156,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfmul.mask.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmul_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmul_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1169,7 +1171,7 @@ entry:
     <vscale x 16 x float> %1,
     float %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -1177,9 +1179,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmul_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmul_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1189,7 +1191,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmul.nxv1f64.f64(
     <vscale x 1 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -1199,10 +1201,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfmul.mask.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmul_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmul_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1214,7 +1216,7 @@ entry:
     <vscale x 1 x double> %1,
     double %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -1222,9 +1224,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfmul.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmul_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmul_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1234,7 +1236,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmul.nxv2f64.f64(
     <vscale x 2 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -1244,10 +1246,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfmul.mask.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmul_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmul_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1259,7 +1261,7 @@ entry:
     <vscale x 2 x double> %1,
     double %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -1267,9 +1269,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfmul.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmul_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmul_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1279,7 +1281,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmul.nxv4f64.f64(
     <vscale x 4 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -1289,10 +1291,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfmul.mask.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmul_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmul_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1304,7 +1306,7 @@ entry:
     <vscale x 4 x double> %1,
     double %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -1312,9 +1314,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfmul.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmul_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmul_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1324,7 +1326,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmul.nxv8f64.f64(
     <vscale x 8 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -1334,10 +1336,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfmul.mask.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmul_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmul_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfmul_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1349,7 +1351,7 @@ entry:
     <vscale x 8 x double> %1,
     double %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll
deleted file mode 100644
index ee619309b9e3c..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv64.ll
+++ /dev/null
@@ -1,197 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+zfh -target-abi lp64d -verify-machineinstrs < %s | FileCheck %s
-
-declare <vscale x 1 x half> @llvm.riscv.vfmv.s.f.nxv1f16(<vscale x 1 x half>, half, i64)
-
-define <vscale x 1 x half> @intrinsic_vfmv.s.f_f_nxv1f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmv.s.f.nxv1f16(<vscale x 1 x half> %0, half %1, i64 %2)
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmv.s.f.nxv2f16(<vscale x 2 x half>, half, i64)
-
-define <vscale x 2 x half> @intrinsic_vfmv.s.f_f_nxv2f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmv.s.f.nxv2f16(<vscale x 2 x half> %0, half %1, i64 %2)
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmv.s.f.nxv4f16(<vscale x 4 x half>, half, i64)
-
-define <vscale x 4 x half> @intrinsic_vfmv.s.f_f_nxv4f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmv.s.f.nxv4f16(<vscale x 4 x half> %0, half %1, i64 %2)
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmv.s.f.nxv8f16(<vscale x 8 x half>, half, i64)
-
-define <vscale x 8 x half> @intrinsic_vfmv.s.f_f_nxv8f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmv.s.f.nxv8f16(<vscale x 8 x half> %0, half %1, i64 %2)
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmv.s.f.nxv16f16(<vscale x 16 x half>, half, i64)
-
-define <vscale x 16 x half> @intrinsic_vfmv.s.f_f_nxv16f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmv.s.f.nxv16f16(<vscale x 16 x half> %0, half %1, i64 %2)
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmv.s.f.nxv32f16(<vscale x 32 x half>, half, i64)
-
-define <vscale x 32 x half> @intrinsic_vfmv.s.f_f_nxv32f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmv.s.f.nxv32f16(<vscale x 32 x half> %0, half %1, i64 %2)
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmv.s.f.nxv1f32(<vscale x 1 x float>, float, i64)
-
-define <vscale x 1 x float> @intrinsic_vfmv.s.f_f_nxv1f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmv.s.f.nxv1f32(<vscale x 1 x float> %0, float %1, i64 %2)
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmv.s.f.nxv2f32(<vscale x 2 x float>, float, i64)
-
-define <vscale x 2 x float> @intrinsic_vfmv.s.f_f_nxv2f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmv.s.f.nxv2f32(<vscale x 2 x float> %0, float %1, i64 %2)
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmv.s.f.nxv4f32(<vscale x 4 x float>, float, i64)
-
-define <vscale x 4 x float> @intrinsic_vfmv.s.f_f_nxv4f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmv.s.f.nxv4f32(<vscale x 4 x float> %0, float %1, i64 %2)
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmv.s.f.nxv8f32(<vscale x 8 x float>, float, i64)
-
-define <vscale x 8 x float> @intrinsic_vfmv.s.f_f_nxv8f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmv.s.f.nxv8f32(<vscale x 8 x float> %0, float %1, i64 %2)
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmv.s.f.nxv16f32(<vscale x 16 x float>, float, i64)
-
-define <vscale x 16 x float> @intrinsic_vfmv.s.f_f_nxv16f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmv.s.f.nxv16f32(<vscale x 16 x float> %0, float %1, i64 %2)
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64(<vscale x 1 x double>, double, i64)
-
-define <vscale x 1 x double> @intrinsic_vfmv.s.f_f_nxv1f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64(<vscale x 1 x double> %0, double %1, i64 %2)
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmv.s.f.nxv2f64(<vscale x 2 x double>, double, i64)
-
-define <vscale x 2 x double> @intrinsic_vfmv.s.f_f_nxv2f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmv.s.f.nxv2f64(<vscale x 2 x double> %0, double %1, i64 %2)
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmv.s.f.nxv4f64(<vscale x 4 x double>, double, i64)
-
-define <vscale x 4 x double> @intrinsic_vfmv.s.f_f_nxv4f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmv.s.f.nxv4f64(<vscale x 4 x double> %0, double %1, i64 %2)
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmv.s.f.nxv8f64(<vscale x 8 x double>, double, i64)
-
-define <vscale x 8 x double> @intrinsic_vfmv.s.f_f_nxv8f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
-; CHECK-NEXT:    vfmv.s.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmv.s.f.nxv8f64(<vscale x 8 x double> %0, double %1, i64 %2)
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f.ll
similarity index 74%
rename from llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfmv.s.f.ll
index 4d47c000788fb..8464dc2f62997 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f.ll
@@ -1,197 +1,200 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+zfh -target-abi ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 
-declare <vscale x 1 x half> @llvm.riscv.vfmv.s.f.nxv1f16(<vscale x 1 x half>, half, i32)
+declare <vscale x 1 x half> @llvm.riscv.vfmv.s.f.nxv1f16(<vscale x 1 x half>, half, iXLen)
 
-define <vscale x 1 x half> @intrinsic_vfmv.s.f_f_nxv1f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmv.s.f_f_nxv1f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmv.s.f.nxv1f16(<vscale x 1 x half> %0, half %1, i32 %2)
+  %a = call <vscale x 1 x half> @llvm.riscv.vfmv.s.f.nxv1f16(<vscale x 1 x half> %0, half %1, iXLen %2)
   ret <vscale x 1 x half> %a
 }
 
-declare <vscale x 2 x half> @llvm.riscv.vfmv.s.f.nxv2f16(<vscale x 2 x half>, half, i32)
+declare <vscale x 2 x half> @llvm.riscv.vfmv.s.f.nxv2f16(<vscale x 2 x half>, half, iXLen)
 
-define <vscale x 2 x half> @intrinsic_vfmv.s.f_f_nxv2f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmv.s.f_f_nxv2f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmv.s.f.nxv2f16(<vscale x 2 x half> %0, half %1, i32 %2)
+  %a = call <vscale x 2 x half> @llvm.riscv.vfmv.s.f.nxv2f16(<vscale x 2 x half> %0, half %1, iXLen %2)
   ret <vscale x 2 x half> %a
 }
 
-declare <vscale x 4 x half> @llvm.riscv.vfmv.s.f.nxv4f16(<vscale x 4 x half>, half, i32)
+declare <vscale x 4 x half> @llvm.riscv.vfmv.s.f.nxv4f16(<vscale x 4 x half>, half, iXLen)
 
-define <vscale x 4 x half> @intrinsic_vfmv.s.f_f_nxv4f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmv.s.f_f_nxv4f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmv.s.f.nxv4f16(<vscale x 4 x half> %0, half %1, i32 %2)
+  %a = call <vscale x 4 x half> @llvm.riscv.vfmv.s.f.nxv4f16(<vscale x 4 x half> %0, half %1, iXLen %2)
   ret <vscale x 4 x half> %a
 }
 
-declare <vscale x 8 x half> @llvm.riscv.vfmv.s.f.nxv8f16(<vscale x 8 x half>, half, i32)
+declare <vscale x 8 x half> @llvm.riscv.vfmv.s.f.nxv8f16(<vscale x 8 x half>, half, iXLen)
 
-define <vscale x 8 x half> @intrinsic_vfmv.s.f_f_nxv8f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmv.s.f_f_nxv8f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmv.s.f.nxv8f16(<vscale x 8 x half> %0, half %1, i32 %2)
+  %a = call <vscale x 8 x half> @llvm.riscv.vfmv.s.f.nxv8f16(<vscale x 8 x half> %0, half %1, iXLen %2)
   ret <vscale x 8 x half> %a
 }
 
-declare <vscale x 16 x half> @llvm.riscv.vfmv.s.f.nxv16f16(<vscale x 16 x half>, half, i32)
+declare <vscale x 16 x half> @llvm.riscv.vfmv.s.f.nxv16f16(<vscale x 16 x half>, half, iXLen)
 
-define <vscale x 16 x half> @intrinsic_vfmv.s.f_f_nxv16f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmv.s.f_f_nxv16f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmv.s.f.nxv16f16(<vscale x 16 x half> %0, half %1, i32 %2)
+  %a = call <vscale x 16 x half> @llvm.riscv.vfmv.s.f.nxv16f16(<vscale x 16 x half> %0, half %1, iXLen %2)
   ret <vscale x 16 x half> %a
 }
 
-declare <vscale x 32 x half> @llvm.riscv.vfmv.s.f.nxv32f16(<vscale x 32 x half>, half, i32)
+declare <vscale x 32 x half> @llvm.riscv.vfmv.s.f.nxv32f16(<vscale x 32 x half>, half, iXLen)
 
-define <vscale x 32 x half> @intrinsic_vfmv.s.f_f_nxv32f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmv.s.f_f_nxv32f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmv.s.f.nxv32f16(<vscale x 32 x half> %0, half %1, i32 %2)
+  %a = call <vscale x 32 x half> @llvm.riscv.vfmv.s.f.nxv32f16(<vscale x 32 x half> %0, half %1, iXLen %2)
   ret <vscale x 32 x half> %a
 }
 
-declare <vscale x 1 x float> @llvm.riscv.vfmv.s.f.nxv1f32(<vscale x 1 x float>, float, i32)
+declare <vscale x 1 x float> @llvm.riscv.vfmv.s.f.nxv1f32(<vscale x 1 x float>, float, iXLen)
 
-define <vscale x 1 x float> @intrinsic_vfmv.s.f_f_nxv1f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmv.s.f_f_nxv1f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmv.s.f.nxv1f32(<vscale x 1 x float> %0, float %1, i32 %2)
+  %a = call <vscale x 1 x float> @llvm.riscv.vfmv.s.f.nxv1f32(<vscale x 1 x float> %0, float %1, iXLen %2)
   ret <vscale x 1 x float> %a
 }
 
-declare <vscale x 2 x float> @llvm.riscv.vfmv.s.f.nxv2f32(<vscale x 2 x float>, float, i32)
+declare <vscale x 2 x float> @llvm.riscv.vfmv.s.f.nxv2f32(<vscale x 2 x float>, float, iXLen)
 
-define <vscale x 2 x float> @intrinsic_vfmv.s.f_f_nxv2f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmv.s.f_f_nxv2f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmv.s.f.nxv2f32(<vscale x 2 x float> %0, float %1, i32 %2)
+  %a = call <vscale x 2 x float> @llvm.riscv.vfmv.s.f.nxv2f32(<vscale x 2 x float> %0, float %1, iXLen %2)
   ret <vscale x 2 x float> %a
 }
 
-declare <vscale x 4 x float> @llvm.riscv.vfmv.s.f.nxv4f32(<vscale x 4 x float>, float, i32)
+declare <vscale x 4 x float> @llvm.riscv.vfmv.s.f.nxv4f32(<vscale x 4 x float>, float, iXLen)
 
-define <vscale x 4 x float> @intrinsic_vfmv.s.f_f_nxv4f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmv.s.f_f_nxv4f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmv.s.f.nxv4f32(<vscale x 4 x float> %0, float %1, i32 %2)
+  %a = call <vscale x 4 x float> @llvm.riscv.vfmv.s.f.nxv4f32(<vscale x 4 x float> %0, float %1, iXLen %2)
   ret <vscale x 4 x float> %a
 }
 
-declare <vscale x 8 x float> @llvm.riscv.vfmv.s.f.nxv8f32(<vscale x 8 x float>, float, i32)
+declare <vscale x 8 x float> @llvm.riscv.vfmv.s.f.nxv8f32(<vscale x 8 x float>, float, iXLen)
 
-define <vscale x 8 x float> @intrinsic_vfmv.s.f_f_nxv8f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmv.s.f_f_nxv8f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmv.s.f.nxv8f32(<vscale x 8 x float> %0, float %1, i32 %2)
+  %a = call <vscale x 8 x float> @llvm.riscv.vfmv.s.f.nxv8f32(<vscale x 8 x float> %0, float %1, iXLen %2)
   ret <vscale x 8 x float> %a
 }
 
-declare <vscale x 16 x float> @llvm.riscv.vfmv.s.f.nxv16f32(<vscale x 16 x float>, float, i32)
+declare <vscale x 16 x float> @llvm.riscv.vfmv.s.f.nxv16f32(<vscale x 16 x float>, float, iXLen)
 
-define <vscale x 16 x float> @intrinsic_vfmv.s.f_f_nxv16f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmv.s.f_f_nxv16f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmv.s.f.nxv16f32(<vscale x 16 x float> %0, float %1, i32 %2)
+  %a = call <vscale x 16 x float> @llvm.riscv.vfmv.s.f.nxv16f32(<vscale x 16 x float> %0, float %1, iXLen %2)
   ret <vscale x 16 x float> %a
 }
 
-declare <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64(<vscale x 1 x double>, double, i32)
+declare <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64(<vscale x 1 x double>, double, iXLen)
 
-define <vscale x 1 x double> @intrinsic_vfmv.s.f_f_nxv1f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmv.s.f_f_nxv1f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64(<vscale x 1 x double> %0, double %1, i32 %2)
+  %a = call <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64(<vscale x 1 x double> %0, double %1, iXLen %2)
   ret <vscale x 1 x double> %a
 }
 
-declare <vscale x 2 x double> @llvm.riscv.vfmv.s.f.nxv2f64(<vscale x 2 x double>, double, i32)
+declare <vscale x 2 x double> @llvm.riscv.vfmv.s.f.nxv2f64(<vscale x 2 x double>, double, iXLen)
 
-define <vscale x 2 x double> @intrinsic_vfmv.s.f_f_nxv2f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmv.s.f_f_nxv2f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmv.s.f.nxv2f64(<vscale x 2 x double> %0, double %1, i32 %2)
+  %a = call <vscale x 2 x double> @llvm.riscv.vfmv.s.f.nxv2f64(<vscale x 2 x double> %0, double %1, iXLen %2)
   ret <vscale x 2 x double> %a
 }
 
-declare <vscale x 4 x double> @llvm.riscv.vfmv.s.f.nxv4f64(<vscale x 4 x double>, double, i32)
+declare <vscale x 4 x double> @llvm.riscv.vfmv.s.f.nxv4f64(<vscale x 4 x double>, double, iXLen)
 
-define <vscale x 4 x double> @intrinsic_vfmv.s.f_f_nxv4f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmv.s.f_f_nxv4f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmv.s.f.nxv4f64(<vscale x 4 x double> %0, double %1, i32 %2)
+  %a = call <vscale x 4 x double> @llvm.riscv.vfmv.s.f.nxv4f64(<vscale x 4 x double> %0, double %1, iXLen %2)
   ret <vscale x 4 x double> %a
 }
 
-declare <vscale x 8 x double> @llvm.riscv.vfmv.s.f.nxv8f64(<vscale x 8 x double>, double, i32)
+declare <vscale x 8 x double> @llvm.riscv.vfmv.s.f.nxv8f64(<vscale x 8 x double>, double, iXLen)
 
-define <vscale x 8 x double> @intrinsic_vfmv.s.f_f_nxv8f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmv.s.f_f_nxv8f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
 ; CHECK-NEXT:    vfmv.s.f v8, fa0
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmv.s.f.nxv8f64(<vscale x 8 x double> %0, double %1, i32 %2)
+  %a = call <vscale x 8 x double> @llvm.riscv.vfmv.s.f.nxv8f64(<vscale x 8 x double> %0, double %1, iXLen %2)
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll
deleted file mode 100644
index 1c4d9f6e89dc4..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv64.ll
+++ /dev/null
@@ -1,482 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -target-abi lp64d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfmv.v.f.nxv1f16(
-  half,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfmv.v.f_f_nxv1f16(half %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmv.v.f.nxv1f16(
-    half %0,
-    i64 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfmv.v.f.nxv2f16(
-  half,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfmv.v.f_f_nxv2f16(half %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmv.v.f.nxv2f16(
-    half %0,
-    i64 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfmv.v.f.nxv4f16(
-  half,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfmv.v.f_f_nxv4f16(half %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmv.v.f.nxv4f16(
-    half %0,
-    i64 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfmv.v.f.nxv8f16(
-  half,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfmv.v.f_f_nxv8f16(half %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmv.v.f.nxv8f16(
-    half %0,
-    i64 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfmv.v.f.nxv16f16(
-  half,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfmv.v.f_f_nxv16f16(half %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmv.v.f.nxv16f16(
-    half %0,
-    i64 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfmv.v.f.nxv32f16(
-  half,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfmv.v.f_f_nxv32f16(half %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmv.v.f.nxv32f16(
-    half %0,
-    i64 %1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfmv.v.f.nxv1f32(
-  float,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfmv.v.f_f_nxv1f32(float %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmv.v.f.nxv1f32(
-    float %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32(
-  float,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfmv.v.f_f_nxv2f32(float %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32(
-    float %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfmv.v.f.nxv4f32(
-  float,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfmv.v.f_f_nxv4f32(float %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmv.v.f.nxv4f32(
-    float %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfmv.v.f.nxv8f32(
-  float,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfmv.v.f_f_nxv8f32(float %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmv.v.f.nxv8f32(
-    float %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfmv.v.f.nxv16f32(
-  float,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfmv.v.f_f_nxv16f32(float %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmv.v.f.nxv16f32(
-    float %0,
-    i64 %1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64(
-  double,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfmv.v.f_f_nxv1f64(double %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64(
-    double %0,
-    i64 %1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfmv.v.f.nxv2f64(
-  double,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfmv.v.f_f_nxv2f64(double %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmv.v.f.nxv2f64(
-    double %0,
-    i64 %1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfmv.v.f.nxv4f64(
-  double,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfmv.v.f_f_nxv4f64(double %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmv.v.f.nxv4f64(
-    double %0,
-    i64 %1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfmv.v.f.nxv8f64(
-  double,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfmv.v.f_f_nxv8f64(double %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmv.v.f.nxv8f64(
-    double %0,
-    i64 %1)
-
-  ret <vscale x 8 x double> %a
-}
-
-define <vscale x 1 x half> @intrinsic_vfmv.v.f_zero_nxv1f16(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vfmv.v.f_zero_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfmv.v.f.nxv1f16(
-    half 0.0,
-    i64 %0)
-
-  ret <vscale x 1 x half> %a
-}
-
-define <vscale x 2 x half> @intrinsic_vmv.v.i_zero_nxv2f16(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfmv.v.f.nxv2f16(
-    half 0.0,
-    i64 %0)
-
-  ret <vscale x 2 x half> %a
-}
-
-define <vscale x 4 x half> @intrinsic_vmv.v.i_zero_nxv4f16(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfmv.v.f.nxv4f16(
-    half 0.0,
-    i64 %0)
-
-  ret <vscale x 4 x half> %a
-}
-
-define <vscale x 8 x half> @intrinsic_vmv.v.i_zero_nxv8f16(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfmv.v.f.nxv8f16(
-    half 0.0,
-    i64 %0)
-
-  ret <vscale x 8 x half> %a
-}
-
-define <vscale x 16 x half> @intrinsic_vmv.v.i_zero_nxv16f16(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfmv.v.f.nxv16f16(
-    half 0.0,
-    i64 %0)
-
-  ret <vscale x 16 x half> %a
-}
-
-define <vscale x 32 x half> @intrinsic_vmv.v.i_zero_nxv32f16(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfmv.v.f.nxv32f16(
-    half 0.0,
-    i64 %0)
-
-  ret <vscale x 32 x half> %a
-}
-
-define <vscale x 1 x float> @intrinsic_vmv.v.i_zero_nxv1f32(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfmv.v.f.nxv1f32(
-    float 0.0,
-    i64 %0)
-
-  ret <vscale x 1 x float> %a
-}
-
-define <vscale x 2 x float> @intrinsic_vmv.v.i_zero_nxv2f32(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32(
-    float 0.0,
-    i64 %0)
-
-  ret <vscale x 2 x float> %a
-}
-
-define <vscale x 4 x float> @intrinsic_vmv.v.i_zero_nxv4f32(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfmv.v.f.nxv4f32(
-    float 0.0,
-    i64 %0)
-
-  ret <vscale x 4 x float> %a
-}
-
-define <vscale x 8 x float> @intrinsic_vmv.v.i_zero_nxv8f32(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfmv.v.f.nxv8f32(
-    float 0.0,
-    i64 %0)
-
-  ret <vscale x 8 x float> %a
-}
-
-define <vscale x 16 x float> @intrinsic_vmv.v.i_zero_nxv16f32(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfmv.v.f.nxv16f32(
-    float 0.0,
-    i64 %0)
-
-  ret <vscale x 16 x float> %a
-}
-
-define <vscale x 1 x double> @intrinsic_vmv.v.i_zero_nxv1f64(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64(
-    double 0.0,
-    i64 %0)
-
-  ret <vscale x 1 x double> %a
-}
-
-define <vscale x 2 x double> @intrinsic_vmv.v.i_zero_nxv2f64(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfmv.v.f.nxv2f64(
-    double 0.0,
-    i64 %0)
-
-  ret <vscale x 2 x double> %a
-}
-
-define <vscale x 4 x double> @intrinsic_vmv.v.i_zero_nxv4f64(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfmv.v.f.nxv4f64(
-    double 0.0,
-    i64 %0)
-
-  ret <vscale x 4 x double> %a
-}
-
-define <vscale x 8 x double> @intrinsic_vmv.v.i_zero_nxv8f64(i64 %0) nounwind {
-; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfmv.v.f.nxv8f64(
-    double 0.0,
-    i64 %0)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll
similarity index 82%
rename from llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll
index b4acc57dcd81e..6e0613e3e49ba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -target-abi ilp32d -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfmv.v.f.nxv1f16(
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfmv.v.f_f_nxv1f16(half %0, i32 %1) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmv.v.f_f_nxv1f16(half %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -14,16 +16,16 @@ define <vscale x 1 x half> @intrinsic_vfmv.v.f_f_nxv1f16(half %0, i32 %1) nounwi
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmv.v.f.nxv1f16(
     half %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x half> %a
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vfmv.v.f.nxv2f16(
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfmv.v.f_f_nxv2f16(half %0, i32 %1) nounwind {
+define <vscale x 2 x half> @intrinsic_vfmv.v.f_f_nxv2f16(half %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -32,16 +34,16 @@ define <vscale x 2 x half> @intrinsic_vfmv.v.f_f_nxv2f16(half %0, i32 %1) nounwi
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmv.v.f.nxv2f16(
     half %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x half> %a
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vfmv.v.f.nxv4f16(
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfmv.v.f_f_nxv4f16(half %0, i32 %1) nounwind {
+define <vscale x 4 x half> @intrinsic_vfmv.v.f_f_nxv4f16(half %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -50,16 +52,16 @@ define <vscale x 4 x half> @intrinsic_vfmv.v.f_f_nxv4f16(half %0, i32 %1) nounwi
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmv.v.f.nxv4f16(
     half %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x half> %a
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vfmv.v.f.nxv8f16(
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfmv.v.f_f_nxv8f16(half %0, i32 %1) nounwind {
+define <vscale x 8 x half> @intrinsic_vfmv.v.f_f_nxv8f16(half %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -68,16 +70,16 @@ define <vscale x 8 x half> @intrinsic_vfmv.v.f_f_nxv8f16(half %0, i32 %1) nounwi
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmv.v.f.nxv8f16(
     half %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x half> %a
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vfmv.v.f.nxv16f16(
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfmv.v.f_f_nxv16f16(half %0, i32 %1) nounwind {
+define <vscale x 16 x half> @intrinsic_vfmv.v.f_f_nxv16f16(half %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -86,16 +88,16 @@ define <vscale x 16 x half> @intrinsic_vfmv.v.f_f_nxv16f16(half %0, i32 %1) noun
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmv.v.f.nxv16f16(
     half %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x half> %a
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vfmv.v.f.nxv32f16(
   half,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfmv.v.f_f_nxv32f16(half %0, i32 %1) nounwind {
+define <vscale x 32 x half> @intrinsic_vfmv.v.f_f_nxv32f16(half %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -104,16 +106,16 @@ define <vscale x 32 x half> @intrinsic_vfmv.v.f_f_nxv32f16(half %0, i32 %1) noun
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmv.v.f.nxv32f16(
     half %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x half> %a
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vfmv.v.f.nxv1f32(
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfmv.v.f_f_nxv1f32(float %0, i32 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vfmv.v.f_f_nxv1f32(float %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -122,16 +124,16 @@ define <vscale x 1 x float> @intrinsic_vfmv.v.f_f_nxv1f32(float %0, i32 %1) noun
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmv.v.f.nxv1f32(
     float %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x float> %a
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32(
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfmv.v.f_f_nxv2f32(float %0, i32 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vfmv.v.f_f_nxv2f32(float %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -140,16 +142,16 @@ define <vscale x 2 x float> @intrinsic_vfmv.v.f_f_nxv2f32(float %0, i32 %1) noun
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32(
     float %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x float> %a
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vfmv.v.f.nxv4f32(
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfmv.v.f_f_nxv4f32(float %0, i32 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vfmv.v.f_f_nxv4f32(float %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -158,16 +160,16 @@ define <vscale x 4 x float> @intrinsic_vfmv.v.f_f_nxv4f32(float %0, i32 %1) noun
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmv.v.f.nxv4f32(
     float %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x float> %a
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vfmv.v.f.nxv8f32(
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfmv.v.f_f_nxv8f32(float %0, i32 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vfmv.v.f_f_nxv8f32(float %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -176,16 +178,16 @@ define <vscale x 8 x float> @intrinsic_vfmv.v.f_f_nxv8f32(float %0, i32 %1) noun
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmv.v.f.nxv8f32(
     float %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x float> %a
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vfmv.v.f.nxv16f32(
   float,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfmv.v.f_f_nxv16f32(float %0, i32 %1) nounwind {
+define <vscale x 16 x float> @intrinsic_vfmv.v.f_f_nxv16f32(float %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -194,16 +196,16 @@ define <vscale x 16 x float> @intrinsic_vfmv.v.f_f_nxv16f32(float %0, i32 %1) no
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmv.v.f.nxv16f32(
     float %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x float> %a
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64(
   double,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfmv.v.f_f_nxv1f64(double %0, i32 %1) nounwind {
+define <vscale x 1 x double> @intrinsic_vfmv.v.f_f_nxv1f64(double %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -212,16 +214,16 @@ define <vscale x 1 x double> @intrinsic_vfmv.v.f_f_nxv1f64(double %0, i32 %1) no
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64(
     double %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x double> %a
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vfmv.v.f.nxv2f64(
   double,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfmv.v.f_f_nxv2f64(double %0, i32 %1) nounwind {
+define <vscale x 2 x double> @intrinsic_vfmv.v.f_f_nxv2f64(double %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -230,16 +232,16 @@ define <vscale x 2 x double> @intrinsic_vfmv.v.f_f_nxv2f64(double %0, i32 %1) no
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmv.v.f.nxv2f64(
     double %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x double> %a
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vfmv.v.f.nxv4f64(
   double,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfmv.v.f_f_nxv4f64(double %0, i32 %1) nounwind {
+define <vscale x 4 x double> @intrinsic_vfmv.v.f_f_nxv4f64(double %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -248,16 +250,16 @@ define <vscale x 4 x double> @intrinsic_vfmv.v.f_f_nxv4f64(double %0, i32 %1) no
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmv.v.f.nxv4f64(
     double %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x double> %a
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vfmv.v.f.nxv8f64(
   double,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfmv.v.f_f_nxv8f64(double %0, i32 %1) nounwind {
+define <vscale x 8 x double> @intrinsic_vfmv.v.f_f_nxv8f64(double %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_f_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -266,12 +268,12 @@ define <vscale x 8 x double> @intrinsic_vfmv.v.f_f_nxv8f64(double %0, i32 %1) no
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmv.v.f.nxv8f64(
     double %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x double> %a
 }
 
-define <vscale x 1 x half> @intrinsic_vfmv.v.f_zero_nxv1f16(i32 %0) nounwind {
+define <vscale x 1 x half> @intrinsic_vfmv.v.f_zero_nxv1f16(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vfmv.v.f_zero_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -280,12 +282,12 @@ define <vscale x 1 x half> @intrinsic_vfmv.v.f_zero_nxv1f16(i32 %0) nounwind {
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfmv.v.f.nxv1f16(
     half 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 1 x half> %a
 }
 
-define <vscale x 2 x half> @intrinsic_vmv.v.i_zero_nxv2f16(i32 %0) nounwind {
+define <vscale x 2 x half> @intrinsic_vmv.v.i_zero_nxv2f16(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -294,12 +296,12 @@ define <vscale x 2 x half> @intrinsic_vmv.v.i_zero_nxv2f16(i32 %0) nounwind {
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfmv.v.f.nxv2f16(
     half 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 2 x half> %a
 }
 
-define <vscale x 4 x half> @intrinsic_vmv.v.i_zero_nxv4f16(i32 %0) nounwind {
+define <vscale x 4 x half> @intrinsic_vmv.v.i_zero_nxv4f16(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -308,12 +310,12 @@ define <vscale x 4 x half> @intrinsic_vmv.v.i_zero_nxv4f16(i32 %0) nounwind {
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfmv.v.f.nxv4f16(
     half 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 4 x half> %a
 }
 
-define <vscale x 8 x half> @intrinsic_vmv.v.i_zero_nxv8f16(i32 %0) nounwind {
+define <vscale x 8 x half> @intrinsic_vmv.v.i_zero_nxv8f16(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -322,12 +324,12 @@ define <vscale x 8 x half> @intrinsic_vmv.v.i_zero_nxv8f16(i32 %0) nounwind {
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfmv.v.f.nxv8f16(
     half 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 8 x half> %a
 }
 
-define <vscale x 16 x half> @intrinsic_vmv.v.i_zero_nxv16f16(i32 %0) nounwind {
+define <vscale x 16 x half> @intrinsic_vmv.v.i_zero_nxv16f16(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -336,12 +338,12 @@ define <vscale x 16 x half> @intrinsic_vmv.v.i_zero_nxv16f16(i32 %0) nounwind {
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfmv.v.f.nxv16f16(
     half 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 16 x half> %a
 }
 
-define <vscale x 32 x half> @intrinsic_vmv.v.i_zero_nxv32f16(i32 %0) nounwind {
+define <vscale x 32 x half> @intrinsic_vmv.v.i_zero_nxv32f16(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -350,12 +352,12 @@ define <vscale x 32 x half> @intrinsic_vmv.v.i_zero_nxv32f16(i32 %0) nounwind {
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfmv.v.f.nxv32f16(
     half 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 32 x half> %a
 }
 
-define <vscale x 1 x float> @intrinsic_vmv.v.i_zero_nxv1f32(i32 %0) nounwind {
+define <vscale x 1 x float> @intrinsic_vmv.v.i_zero_nxv1f32(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -364,12 +366,12 @@ define <vscale x 1 x float> @intrinsic_vmv.v.i_zero_nxv1f32(i32 %0) nounwind {
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfmv.v.f.nxv1f32(
     float 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 1 x float> %a
 }
 
-define <vscale x 2 x float> @intrinsic_vmv.v.i_zero_nxv2f32(i32 %0) nounwind {
+define <vscale x 2 x float> @intrinsic_vmv.v.i_zero_nxv2f32(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -378,12 +380,12 @@ define <vscale x 2 x float> @intrinsic_vmv.v.i_zero_nxv2f32(i32 %0) nounwind {
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfmv.v.f.nxv2f32(
     float 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 2 x float> %a
 }
 
-define <vscale x 4 x float> @intrinsic_vmv.v.i_zero_nxv4f32(i32 %0) nounwind {
+define <vscale x 4 x float> @intrinsic_vmv.v.i_zero_nxv4f32(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -392,12 +394,12 @@ define <vscale x 4 x float> @intrinsic_vmv.v.i_zero_nxv4f32(i32 %0) nounwind {
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfmv.v.f.nxv4f32(
     float 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 4 x float> %a
 }
 
-define <vscale x 8 x float> @intrinsic_vmv.v.i_zero_nxv8f32(i32 %0) nounwind {
+define <vscale x 8 x float> @intrinsic_vmv.v.i_zero_nxv8f32(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -406,12 +408,12 @@ define <vscale x 8 x float> @intrinsic_vmv.v.i_zero_nxv8f32(i32 %0) nounwind {
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfmv.v.f.nxv8f32(
     float 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 8 x float> %a
 }
 
-define <vscale x 16 x float> @intrinsic_vmv.v.i_zero_nxv16f32(i32 %0) nounwind {
+define <vscale x 16 x float> @intrinsic_vmv.v.i_zero_nxv16f32(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -420,12 +422,12 @@ define <vscale x 16 x float> @intrinsic_vmv.v.i_zero_nxv16f32(i32 %0) nounwind {
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfmv.v.f.nxv16f32(
     float 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 16 x float> %a
 }
 
-define <vscale x 1 x double> @intrinsic_vmv.v.i_zero_nxv1f64(i32 %0) nounwind {
+define <vscale x 1 x double> @intrinsic_vmv.v.i_zero_nxv1f64(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -434,12 +436,12 @@ define <vscale x 1 x double> @intrinsic_vmv.v.i_zero_nxv1f64(i32 %0) nounwind {
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfmv.v.f.nxv1f64(
     double 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 1 x double> %a
 }
 
-define <vscale x 2 x double> @intrinsic_vmv.v.i_zero_nxv2f64(i32 %0) nounwind {
+define <vscale x 2 x double> @intrinsic_vmv.v.i_zero_nxv2f64(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -448,12 +450,12 @@ define <vscale x 2 x double> @intrinsic_vmv.v.i_zero_nxv2f64(i32 %0) nounwind {
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfmv.v.f.nxv2f64(
     double 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 2 x double> %a
 }
 
-define <vscale x 4 x double> @intrinsic_vmv.v.i_zero_nxv4f64(i32 %0) nounwind {
+define <vscale x 4 x double> @intrinsic_vmv.v.i_zero_nxv4f64(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -462,12 +464,12 @@ define <vscale x 4 x double> @intrinsic_vmv.v.i_zero_nxv4f64(i32 %0) nounwind {
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfmv.v.f.nxv4f64(
     double 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 4 x double> %a
 }
 
-define <vscale x 8 x double> @intrinsic_vmv.v.i_zero_nxv8f64(i32 %0) nounwind {
+define <vscale x 8 x double> @intrinsic_vmv.v.i_zero_nxv8f64(iXLen %0) nounwind {
 ; CHECK-LABEL: intrinsic_vmv.v.i_zero_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -476,7 +478,7 @@ define <vscale x 8 x double> @intrinsic_vmv.v.i_zero_nxv8f64(i32 %0) nounwind {
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfmv.v.f.nxv8f64(
     double 0.0,
-    i32 %0)
+    iXLen %0)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll
deleted file mode 100644
index 2e35f75eb89c0..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv64.ll
+++ /dev/null
@@ -1,380 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.f.w.nxv1f16.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfncvt_f.f.w_nxv1f16_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv1f16_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.f.w.nxv1f16.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv1f16.nxv1f32(
-  <vscale x 1 x half>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfncvt_mask_f.f.w_nxv1f16_nxv1f32(<vscale x 1 x half> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv1f16_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv1f16.nxv1f32(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfncvt.f.f.w.nxv2f16.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfncvt_f.f.w_nxv2f16_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv2f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.f.f.w.nxv2f16.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv2f16.nxv2f32(
-  <vscale x 2 x half>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfncvt_mask_f.f.w_nxv2f16_nxv2f32(<vscale x 2 x half> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv2f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv2f16.nxv2f32(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfncvt_f.f.w_nxv4f16_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv4f16_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv4f16.nxv4f32(
-  <vscale x 4 x half>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfncvt_mask_f.f.w_nxv4f16_nxv4f32(<vscale x 4 x half> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv4f16_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv4f16.nxv4f32(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfncvt.f.f.w.nxv8f16.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfncvt_f.f.w_nxv8f16_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv8f16_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.f.f.w.nxv8f16.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv8f16.nxv8f32(
-  <vscale x 8 x half>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfncvt_mask_f.f.w_nxv8f16_nxv8f32(<vscale x 8 x half> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv8f16_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv8f16.nxv8f32(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfncvt.f.f.w.nxv16f16.nxv16f32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfncvt_f.f.w_nxv16f16_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv16f16_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.f.f.w.nxv16f16.nxv16f32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv16f16.nxv16f32(
-  <vscale x 16 x half>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfncvt_mask_f.f.w_nxv16f16_nxv16f32(<vscale x 16 x half> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv16f16_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv16f16.nxv16f32(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfncvt.f.f.w.nxv1f32.nxv1f64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfncvt_f.f.w_nxv1f32_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv1f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.f.f.w.nxv1f32.nxv1f64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfncvt.f.f.w.mask.nxv1f32.nxv1f64(
-  <vscale x 1 x float>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfncvt_mask_f.f.w_nxv1f32_nxv1f64(<vscale x 1 x float> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv1f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.f.f.w.mask.nxv1f32.nxv1f64(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfncvt.f.f.w.nxv2f32.nxv2f64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfncvt_f.f.w_nxv2f32_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv2f32_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.f.f.w.nxv2f32.nxv2f64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfncvt.f.f.w.mask.nxv2f32.nxv2f64(
-  <vscale x 2 x float>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfncvt_mask_f.f.w_nxv2f32_nxv2f64(<vscale x 2 x float> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv2f32_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.f.f.w.mask.nxv2f32.nxv2f64(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfncvt.f.f.w.nxv4f32.nxv4f64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfncvt_f.f.w_nxv4f32_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv4f32_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.f.f.w.nxv4f32.nxv4f64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfncvt.f.f.w.mask.nxv4f32.nxv4f64(
-  <vscale x 4 x float>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfncvt_mask_f.f.w_nxv4f32_nxv4f64(<vscale x 4 x float> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv4f32_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.f.f.w.mask.nxv4f32.nxv4f64(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfncvt.f.f.w.nxv8f32.nxv8f64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfncvt_f.f.w_nxv8f32_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv8f32_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.f.f.w.nxv8f32.nxv8f64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfncvt.f.f.w.mask.nxv8f32.nxv8f64(
-  <vscale x 8 x float>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfncvt_mask_f.f.w_nxv8f32_nxv8f64(<vscale x 8 x float> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv8f32_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.f.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.f.f.w.mask.nxv8f32.nxv8f64(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f.ll
index 014ff81bada19..e757261e7363d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.f.w.nxv1f16.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfncvt_f.f.w_nxv1f16_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x half> @intrinsic_vfncvt_f.f.w_nxv1f16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv1f16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x half> @intrinsic_vfncvt_f.f.w_nxv1f16_nxv1f32(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.f.w.nxv1f16.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x half> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv1f16.nxv1f32(
   <vscale x 1 x half>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfncvt_mask_f.f.w_nxv1f16_nxv1f32(<vscale x 1 x half> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x half> @intrinsic_vfncvt_mask_f.f.w_nxv1f16_nxv1f32(<vscale x 1 x half> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv1f16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vfncvt.f.f.w.nxv2f16.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfncvt_f.f.w_nxv2f16_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x half> @intrinsic_vfncvt_f.f.w_nxv2f16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv2f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x half> @intrinsic_vfncvt_f.f.w_nxv2f16_nxv2f32(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.f.f.w.nxv2f16.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x half> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv2f16.nxv2f32(
   <vscale x 2 x half>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfncvt_mask_f.f.w_nxv2f16_nxv2f32(<vscale x 2 x half> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x half> @intrinsic_vfncvt_mask_f.f.w_nxv2f16_nxv2f32(<vscale x 2 x half> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv2f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfncvt_f.f.w_nxv4f16_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x half> @intrinsic_vfncvt_f.f.w_nxv4f16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv4f16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x half> @intrinsic_vfncvt_f.f.w_nxv4f16_nxv4f32(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.nxv4f16.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x half> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv4f16.nxv4f32(
   <vscale x 4 x half>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfncvt_mask_f.f.w_nxv4f16_nxv4f32(<vscale x 4 x half> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfncvt_mask_f.f.w_nxv4f16_nxv4f32(<vscale x 4 x half> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv4f16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vfncvt.f.f.w.nxv8f16.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfncvt_f.f.w_nxv8f16_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x half> @intrinsic_vfncvt_f.f.w_nxv8f16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv8f16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x half> @intrinsic_vfncvt_f.f.w_nxv8f16_nxv8f32(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.f.f.w.nxv8f16.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x half> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv8f16.nxv8f32(
   <vscale x 8 x half>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfncvt_mask_f.f.w_nxv8f16_nxv8f32(<vscale x 8 x half> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x half> @intrinsic_vfncvt_mask_f.f.w_nxv8f16_nxv8f32(<vscale x 8 x half> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv8f16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vfncvt.f.f.w.nxv16f16.nxv16f32(
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfncvt_f.f.w_nxv16f16_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
+define <vscale x 16 x half> @intrinsic_vfncvt_f.f.w_nxv16f16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv16f16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x half> @intrinsic_vfncvt_f.f.w_nxv16f16_nxv16f32(<vscale x
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.f.f.w.nxv16f16.nxv16f32(
     <vscale x 16 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x half> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfncvt.f.f.w.mask.nxv16f16.nxv16f32(
   <vscale x 16 x half>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfncvt_mask_f.f.w_nxv16f16_nxv16f32(<vscale x 16 x half> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x half> @intrinsic_vfncvt_mask_f.f.w_nxv16f16_nxv16f32(<vscale x 16 x half> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv16f16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x float> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vfncvt.f.f.w.nxv1f32.nxv1f64(
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfncvt_f.f.w_nxv1f32_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vfncvt_f.f.w_nxv1f32_nxv1f64(<vscale x 1 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv1f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 1 x float> @intrinsic_vfncvt_f.f.w_nxv1f32_nxv1f64(<vscale x 1
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.f.f.w.nxv1f32.nxv1f64(
     <vscale x 1 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x float> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfncvt.f.f.w.mask.nxv1f32.nxv1f64(
   <vscale x 1 x float>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfncvt_mask_f.f.w_nxv1f32_nxv1f64(<vscale x 1 x float> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfncvt_mask_f.f.w_nxv1f32_nxv1f64(<vscale x 1 x float> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv1f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vfncvt.f.f.w.nxv2f32.nxv2f64(
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfncvt_f.f.w_nxv2f32_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vfncvt_f.f.w_nxv2f32_nxv2f64(<vscale x 2 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv2f32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 2 x float> @intrinsic_vfncvt_f.f.w_nxv2f32_nxv2f64(<vscale x 2
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.f.f.w.nxv2f32.nxv2f64(
     <vscale x 2 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x float> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfncvt.f.f.w.mask.nxv2f32.nxv2f64(
   <vscale x 2 x float>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfncvt_mask_f.f.w_nxv2f32_nxv2f64(<vscale x 2 x float> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfncvt_mask_f.f.w_nxv2f32_nxv2f64(<vscale x 2 x float> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv2f32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vfncvt.f.f.w.nxv4f32.nxv4f64(
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfncvt_f.f.w_nxv4f32_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vfncvt_f.f.w_nxv4f32_nxv4f64(<vscale x 4 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv4f32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 4 x float> @intrinsic_vfncvt_f.f.w_nxv4f32_nxv4f64(<vscale x 4
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.f.f.w.nxv4f32.nxv4f64(
     <vscale x 4 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x float> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfncvt.f.f.w.mask.nxv4f32.nxv4f64(
   <vscale x 4 x float>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfncvt_mask_f.f.w_nxv4f32_nxv4f64(<vscale x 4 x float> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfncvt_mask_f.f.w_nxv4f32_nxv4f64(<vscale x 4 x float> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv4f32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vfncvt.f.f.w.nxv8f32.nxv8f64(
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfncvt_f.f.w_nxv8f32_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vfncvt_f.f.w_nxv8f32_nxv8f64(<vscale x 8 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.f.w_nxv8f32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 8 x float> @intrinsic_vfncvt_f.f.w_nxv8f32_nxv8f64(<vscale x 8
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.f.f.w.nxv8f32.nxv8f64(
     <vscale x 8 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x float> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfncvt.f.f.w.mask.nxv8f32.nxv8f64(
   <vscale x 8 x float>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfncvt_mask_f.f.w_nxv8f32_nxv8f64(<vscale x 8 x float> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfncvt_mask_f.f.w_nxv8f32_nxv8f64(<vscale x 8 x float> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.f.w_nxv8f32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -374,7 +376,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x double> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll
deleted file mode 100644
index d8d55df8e4589..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv64.ll
+++ /dev/null
@@ -1,380 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.x.w.nxv1f16.nxv1i32(
-  <vscale x 1 x i32>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfncvt_f.x.w_nxv1f16_nxv1i32(<vscale x 1 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv1f16_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.x.w.nxv1f16.nxv1i32(
-    <vscale x 1 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv1f16.nxv1i32(
-  <vscale x 1 x half>,
-  <vscale x 1 x i32>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfncvt_mask_f.x.w_nxv1f16_nxv1i32(<vscale x 1 x half> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv1f16_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv1f16.nxv1i32(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x i32> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfncvt.f.x.w.nxv2f16.nxv2i32(
-  <vscale x 2 x i32>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfncvt_f.x.w_nxv2f16_nxv2i32(<vscale x 2 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv2f16_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.f.x.w.nxv2f16.nxv2i32(
-    <vscale x 2 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv2f16.nxv2i32(
-  <vscale x 2 x half>,
-  <vscale x 2 x i32>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfncvt_mask_f.x.w_nxv2f16_nxv2i32(<vscale x 2 x half> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv2f16_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv2f16.nxv2i32(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x i32> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfncvt.f.x.w.nxv4f16.nxv4i32(
-  <vscale x 4 x i32>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfncvt_f.x.w_nxv4f16_nxv4i32(<vscale x 4 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv4f16_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.x.w.nxv4f16.nxv4i32(
-    <vscale x 4 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv4f16.nxv4i32(
-  <vscale x 4 x half>,
-  <vscale x 4 x i32>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfncvt_mask_f.x.w_nxv4f16_nxv4i32(<vscale x 4 x half> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv4f16_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv4f16.nxv4i32(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x i32> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfncvt.f.x.w.nxv8f16.nxv8i32(
-  <vscale x 8 x i32>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfncvt_f.x.w_nxv8f16_nxv8i32(<vscale x 8 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv8f16_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.f.x.w.nxv8f16.nxv8i32(
-    <vscale x 8 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv8f16.nxv8i32(
-  <vscale x 8 x half>,
-  <vscale x 8 x i32>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfncvt_mask_f.x.w_nxv8f16_nxv8i32(<vscale x 8 x half> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv8f16_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv8f16.nxv8i32(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x i32> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfncvt.f.x.w.nxv16f16.nxv16i32(
-  <vscale x 16 x i32>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfncvt_f.x.w_nxv16f16_nxv16i32(<vscale x 16 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv16f16_nxv16i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.f.x.w.nxv16f16.nxv16i32(
-    <vscale x 16 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv16f16.nxv16i32(
-  <vscale x 16 x half>,
-  <vscale x 16 x i32>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfncvt_mask_f.x.w_nxv16f16_nxv16i32(<vscale x 16 x half> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv16f16_nxv16i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv16f16.nxv16i32(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x i32> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfncvt.f.x.w.nxv1f32.nxv1i64(
-  <vscale x 1 x i64>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfncvt_f.x.w_nxv1f32_nxv1i64(<vscale x 1 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv1f32_nxv1i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.f.x.w.nxv1f32.nxv1i64(
-    <vscale x 1 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfncvt.f.x.w.mask.nxv1f32.nxv1i64(
-  <vscale x 1 x float>,
-  <vscale x 1 x i64>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfncvt_mask_f.x.w_nxv1f32_nxv1i64(<vscale x 1 x float> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv1f32_nxv1i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.f.x.w.mask.nxv1f32.nxv1i64(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x i64> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfncvt.f.x.w.nxv2f32.nxv2i64(
-  <vscale x 2 x i64>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfncvt_f.x.w_nxv2f32_nxv2i64(<vscale x 2 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv2f32_nxv2i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.f.x.w.nxv2f32.nxv2i64(
-    <vscale x 2 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfncvt.f.x.w.mask.nxv2f32.nxv2i64(
-  <vscale x 2 x float>,
-  <vscale x 2 x i64>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfncvt_mask_f.x.w_nxv2f32_nxv2i64(<vscale x 2 x float> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv2f32_nxv2i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.f.x.w.mask.nxv2f32.nxv2i64(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x i64> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfncvt.f.x.w.nxv4f32.nxv4i64(
-  <vscale x 4 x i64>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfncvt_f.x.w_nxv4f32_nxv4i64(<vscale x 4 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv4f32_nxv4i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.f.x.w.nxv4f32.nxv4i64(
-    <vscale x 4 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfncvt.f.x.w.mask.nxv4f32.nxv4i64(
-  <vscale x 4 x float>,
-  <vscale x 4 x i64>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfncvt_mask_f.x.w_nxv4f32_nxv4i64(<vscale x 4 x float> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv4f32_nxv4i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.f.x.w.mask.nxv4f32.nxv4i64(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x i64> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfncvt.f.x.w.nxv8f32.nxv8i64(
-  <vscale x 8 x i64>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfncvt_f.x.w_nxv8f32_nxv8i64(<vscale x 8 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv8f32_nxv8i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.f.x.w.nxv8f32.nxv8i64(
-    <vscale x 8 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfncvt.f.x.w.mask.nxv8f32.nxv8i64(
-  <vscale x 8 x float>,
-  <vscale x 8 x i64>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfncvt_mask_f.x.w_nxv8f32_nxv8i64(<vscale x 8 x float> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv8f32_nxv8i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.f.x.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.f.x.w.mask.nxv8f32.nxv8i64(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x i64> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x.ll
index ab5f66bf692fa..eedc7c1633999 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-x.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.x.w.nxv1f16.nxv1i32(
   <vscale x 1 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfncvt_f.x.w_nxv1f16_nxv1i32(<vscale x 1 x i32> %0, i32 %1) nounwind {
+define <vscale x 1 x half> @intrinsic_vfncvt_f.x.w_nxv1f16_nxv1i32(<vscale x 1 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv1f16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x half> @intrinsic_vfncvt_f.x.w_nxv1f16_nxv1i32(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.x.w.nxv1f16.nxv1i32(
     <vscale x 1 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x half> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv1f16.nxv1i32(
   <vscale x 1 x half>,
   <vscale x 1 x i32>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfncvt_mask_f.x.w_nxv1f16_nxv1i32(<vscale x 1 x half> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x half> @intrinsic_vfncvt_mask_f.x.w_nxv1f16_nxv1i32(<vscale x 1 x half> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv1f16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x i32> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vfncvt.f.x.w.nxv2f16.nxv2i32(
   <vscale x 2 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfncvt_f.x.w_nxv2f16_nxv2i32(<vscale x 2 x i32> %0, i32 %1) nounwind {
+define <vscale x 2 x half> @intrinsic_vfncvt_f.x.w_nxv2f16_nxv2i32(<vscale x 2 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv2f16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x half> @intrinsic_vfncvt_f.x.w_nxv2f16_nxv2i32(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.f.x.w.nxv2f16.nxv2i32(
     <vscale x 2 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x half> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv2f16.nxv2i32(
   <vscale x 2 x half>,
   <vscale x 2 x i32>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfncvt_mask_f.x.w_nxv2f16_nxv2i32(<vscale x 2 x half> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x half> @intrinsic_vfncvt_mask_f.x.w_nxv2f16_nxv2i32(<vscale x 2 x half> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv2f16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x i32> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vfncvt.f.x.w.nxv4f16.nxv4i32(
   <vscale x 4 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfncvt_f.x.w_nxv4f16_nxv4i32(<vscale x 4 x i32> %0, i32 %1) nounwind {
+define <vscale x 4 x half> @intrinsic_vfncvt_f.x.w_nxv4f16_nxv4i32(<vscale x 4 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv4f16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x half> @intrinsic_vfncvt_f.x.w_nxv4f16_nxv4i32(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.x.w.nxv4f16.nxv4i32(
     <vscale x 4 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x half> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv4f16.nxv4i32(
   <vscale x 4 x half>,
   <vscale x 4 x i32>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfncvt_mask_f.x.w_nxv4f16_nxv4i32(<vscale x 4 x half> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfncvt_mask_f.x.w_nxv4f16_nxv4i32(<vscale x 4 x half> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv4f16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x i32> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vfncvt.f.x.w.nxv8f16.nxv8i32(
   <vscale x 8 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfncvt_f.x.w_nxv8f16_nxv8i32(<vscale x 8 x i32> %0, i32 %1) nounwind {
+define <vscale x 8 x half> @intrinsic_vfncvt_f.x.w_nxv8f16_nxv8i32(<vscale x 8 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv8f16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x half> @intrinsic_vfncvt_f.x.w_nxv8f16_nxv8i32(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.f.x.w.nxv8f16.nxv8i32(
     <vscale x 8 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x half> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv8f16.nxv8i32(
   <vscale x 8 x half>,
   <vscale x 8 x i32>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfncvt_mask_f.x.w_nxv8f16_nxv8i32(<vscale x 8 x half> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x half> @intrinsic_vfncvt_mask_f.x.w_nxv8f16_nxv8i32(<vscale x 8 x half> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv8f16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x i32> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vfncvt.f.x.w.nxv16f16.nxv16i32(
   <vscale x 16 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfncvt_f.x.w_nxv16f16_nxv16i32(<vscale x 16 x i32> %0, i32 %1) nounwind {
+define <vscale x 16 x half> @intrinsic_vfncvt_f.x.w_nxv16f16_nxv16i32(<vscale x 16 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv16f16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x half> @intrinsic_vfncvt_f.x.w_nxv16f16_nxv16i32(<vscale x
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.f.x.w.nxv16f16.nxv16i32(
     <vscale x 16 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x half> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfncvt.f.x.w.mask.nxv16f16.nxv16i32(
   <vscale x 16 x half>,
   <vscale x 16 x i32>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfncvt_mask_f.x.w_nxv16f16_nxv16i32(<vscale x 16 x half> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x half> @intrinsic_vfncvt_mask_f.x.w_nxv16f16_nxv16i32(<vscale x 16 x half> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv16f16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x i32> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vfncvt.f.x.w.nxv1f32.nxv1i64(
   <vscale x 1 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfncvt_f.x.w_nxv1f32_nxv1i64(<vscale x 1 x i64> %0, i32 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vfncvt_f.x.w_nxv1f32_nxv1i64(<vscale x 1 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv1f32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 1 x float> @intrinsic_vfncvt_f.x.w_nxv1f32_nxv1i64(<vscale x 1
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.f.x.w.nxv1f32.nxv1i64(
     <vscale x 1 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x float> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfncvt.f.x.w.mask.nxv1f32.nxv1i64(
   <vscale x 1 x float>,
   <vscale x 1 x i64>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfncvt_mask_f.x.w_nxv1f32_nxv1i64(<vscale x 1 x float> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfncvt_mask_f.x.w_nxv1f32_nxv1i64(<vscale x 1 x float> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv1f32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x i64> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vfncvt.f.x.w.nxv2f32.nxv2i64(
   <vscale x 2 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfncvt_f.x.w_nxv2f32_nxv2i64(<vscale x 2 x i64> %0, i32 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vfncvt_f.x.w_nxv2f32_nxv2i64(<vscale x 2 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 2 x float> @intrinsic_vfncvt_f.x.w_nxv2f32_nxv2i64(<vscale x 2
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.f.x.w.nxv2f32.nxv2i64(
     <vscale x 2 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x float> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfncvt.f.x.w.mask.nxv2f32.nxv2i64(
   <vscale x 2 x float>,
   <vscale x 2 x i64>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfncvt_mask_f.x.w_nxv2f32_nxv2i64(<vscale x 2 x float> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfncvt_mask_f.x.w_nxv2f32_nxv2i64(<vscale x 2 x float> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x i64> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vfncvt.f.x.w.nxv4f32.nxv4i64(
   <vscale x 4 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfncvt_f.x.w_nxv4f32_nxv4i64(<vscale x 4 x i64> %0, i32 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vfncvt_f.x.w_nxv4f32_nxv4i64(<vscale x 4 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv4f32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 4 x float> @intrinsic_vfncvt_f.x.w_nxv4f32_nxv4i64(<vscale x 4
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.f.x.w.nxv4f32.nxv4i64(
     <vscale x 4 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x float> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfncvt.f.x.w.mask.nxv4f32.nxv4i64(
   <vscale x 4 x float>,
   <vscale x 4 x i64>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfncvt_mask_f.x.w_nxv4f32_nxv4i64(<vscale x 4 x float> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfncvt_mask_f.x.w_nxv4f32_nxv4i64(<vscale x 4 x float> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv4f32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x i64> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vfncvt.f.x.w.nxv8f32.nxv8i64(
   <vscale x 8 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfncvt_f.x.w_nxv8f32_nxv8i64(<vscale x 8 x i64> %0, i32 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vfncvt_f.x.w_nxv8f32_nxv8i64(<vscale x 8 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.x.w_nxv8f32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 8 x float> @intrinsic_vfncvt_f.x.w_nxv8f32_nxv8i64(<vscale x 8
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.f.x.w.nxv8f32.nxv8i64(
     <vscale x 8 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x float> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfncvt.f.x.w.mask.nxv8f32.nxv8i64(
   <vscale x 8 x float>,
   <vscale x 8 x i64>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfncvt_mask_f.x.w_nxv8f32_nxv8i64(<vscale x 8 x float> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfncvt_mask_f.x.w_nxv8f32_nxv8i64(<vscale x 8 x float> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.x.w_nxv8f32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -374,7 +376,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x i64> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll
deleted file mode 100644
index 32a23932b074b..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv64.ll
+++ /dev/null
@@ -1,380 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.xu.w.nxv1f16.nxv1i32(
-  <vscale x 1 x i32>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfncvt_f.xu.w_nxv1f16_nxv1i32(<vscale x 1 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv1f16_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.xu.w.nxv1f16.nxv1i32(
-    <vscale x 1 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv1f16.nxv1i32(
-  <vscale x 1 x half>,
-  <vscale x 1 x i32>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv1f16_nxv1i32(<vscale x 1 x half> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv1f16_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv1f16.nxv1i32(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x i32> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfncvt.f.xu.w.nxv2f16.nxv2i32(
-  <vscale x 2 x i32>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfncvt_f.xu.w_nxv2f16_nxv2i32(<vscale x 2 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv2f16_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.f.xu.w.nxv2f16.nxv2i32(
-    <vscale x 2 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv2f16.nxv2i32(
-  <vscale x 2 x half>,
-  <vscale x 2 x i32>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv2f16_nxv2i32(<vscale x 2 x half> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv2f16_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv2f16.nxv2i32(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x i32> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfncvt.f.xu.w.nxv4f16.nxv4i32(
-  <vscale x 4 x i32>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfncvt_f.xu.w_nxv4f16_nxv4i32(<vscale x 4 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv4f16_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.xu.w.nxv4f16.nxv4i32(
-    <vscale x 4 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv4f16.nxv4i32(
-  <vscale x 4 x half>,
-  <vscale x 4 x i32>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv4f16_nxv4i32(<vscale x 4 x half> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv4f16_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv4f16.nxv4i32(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x i32> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfncvt.f.xu.w.nxv8f16.nxv8i32(
-  <vscale x 8 x i32>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfncvt_f.xu.w_nxv8f16_nxv8i32(<vscale x 8 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv8f16_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.f.xu.w.nxv8f16.nxv8i32(
-    <vscale x 8 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv8f16.nxv8i32(
-  <vscale x 8 x half>,
-  <vscale x 8 x i32>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv8f16_nxv8i32(<vscale x 8 x half> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv8f16_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv8f16.nxv8i32(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x i32> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfncvt.f.xu.w.nxv16f16.nxv16i32(
-  <vscale x 16 x i32>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfncvt_f.xu.w_nxv16f16_nxv16i32(<vscale x 16 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv16f16_nxv16i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.f.xu.w.nxv16f16.nxv16i32(
-    <vscale x 16 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv16f16.nxv16i32(
-  <vscale x 16 x half>,
-  <vscale x 16 x i32>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv16f16_nxv16i32(<vscale x 16 x half> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv16f16_nxv16i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv16f16.nxv16i32(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x i32> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfncvt.f.xu.w.nxv1f32.nxv1i64(
-  <vscale x 1 x i64>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfncvt_f.xu.w_nxv1f32_nxv1i64(<vscale x 1 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv1f32_nxv1i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.f.xu.w.nxv1f32.nxv1i64(
-    <vscale x 1 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfncvt.f.xu.w.mask.nxv1f32.nxv1i64(
-  <vscale x 1 x float>,
-  <vscale x 1 x i64>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfncvt_mask_f.xu.w_nxv1f32_nxv1i64(<vscale x 1 x float> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv1f32_nxv1i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.f.xu.w.mask.nxv1f32.nxv1i64(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x i64> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfncvt.f.xu.w.nxv2f32.nxv2i64(
-  <vscale x 2 x i64>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfncvt_f.xu.w_nxv2f32_nxv2i64(<vscale x 2 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv2f32_nxv2i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.f.xu.w.nxv2f32.nxv2i64(
-    <vscale x 2 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfncvt.f.xu.w.mask.nxv2f32.nxv2i64(
-  <vscale x 2 x float>,
-  <vscale x 2 x i64>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfncvt_mask_f.xu.w_nxv2f32_nxv2i64(<vscale x 2 x float> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv2f32_nxv2i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.f.xu.w.mask.nxv2f32.nxv2i64(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x i64> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfncvt.f.xu.w.nxv4f32.nxv4i64(
-  <vscale x 4 x i64>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfncvt_f.xu.w_nxv4f32_nxv4i64(<vscale x 4 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv4f32_nxv4i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.f.xu.w.nxv4f32.nxv4i64(
-    <vscale x 4 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfncvt.f.xu.w.mask.nxv4f32.nxv4i64(
-  <vscale x 4 x float>,
-  <vscale x 4 x i64>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfncvt_mask_f.xu.w_nxv4f32_nxv4i64(<vscale x 4 x float> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv4f32_nxv4i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.f.xu.w.mask.nxv4f32.nxv4i64(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x i64> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfncvt.f.xu.w.nxv8f32.nxv8i64(
-  <vscale x 8 x i64>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfncvt_f.xu.w_nxv8f32_nxv8i64(<vscale x 8 x i64> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv8f32_nxv8i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.f.xu.w.nxv8f32.nxv8i64(
-    <vscale x 8 x i64> %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfncvt.f.xu.w.mask.nxv8f32.nxv8i64(
-  <vscale x 8 x float>,
-  <vscale x 8 x i64>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfncvt_mask_f.xu.w_nxv8f32_nxv8i64(<vscale x 8 x float> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv8f32_nxv8i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.f.xu.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.f.xu.w.mask.nxv8f32.nxv8i64(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x i64> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu.ll
index 4835d4e5c5916..e6842b749492c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-f-xu.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.xu.w.nxv1f16.nxv1i32(
   <vscale x 1 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfncvt_f.xu.w_nxv1f16_nxv1i32(<vscale x 1 x i32> %0, i32 %1) nounwind {
+define <vscale x 1 x half> @intrinsic_vfncvt_f.xu.w_nxv1f16_nxv1i32(<vscale x 1 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv1f16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x half> @intrinsic_vfncvt_f.xu.w_nxv1f16_nxv1i32(<vscale x 1
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.f.xu.w.nxv1f16.nxv1i32(
     <vscale x 1 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x half> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv1f16.nxv1i32(
   <vscale x 1 x half>,
   <vscale x 1 x i32>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv1f16_nxv1i32(<vscale x 1 x half> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv1f16_nxv1i32(<vscale x 1 x half> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv1f16_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x i32> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vfncvt.f.xu.w.nxv2f16.nxv2i32(
   <vscale x 2 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfncvt_f.xu.w_nxv2f16_nxv2i32(<vscale x 2 x i32> %0, i32 %1) nounwind {
+define <vscale x 2 x half> @intrinsic_vfncvt_f.xu.w_nxv2f16_nxv2i32(<vscale x 2 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv2f16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x half> @intrinsic_vfncvt_f.xu.w_nxv2f16_nxv2i32(<vscale x 2
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.f.xu.w.nxv2f16.nxv2i32(
     <vscale x 2 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x half> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv2f16.nxv2i32(
   <vscale x 2 x half>,
   <vscale x 2 x i32>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv2f16_nxv2i32(<vscale x 2 x half> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv2f16_nxv2i32(<vscale x 2 x half> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv2f16_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x i32> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vfncvt.f.xu.w.nxv4f16.nxv4i32(
   <vscale x 4 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfncvt_f.xu.w_nxv4f16_nxv4i32(<vscale x 4 x i32> %0, i32 %1) nounwind {
+define <vscale x 4 x half> @intrinsic_vfncvt_f.xu.w_nxv4f16_nxv4i32(<vscale x 4 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv4f16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x half> @intrinsic_vfncvt_f.xu.w_nxv4f16_nxv4i32(<vscale x 4
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.f.xu.w.nxv4f16.nxv4i32(
     <vscale x 4 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x half> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv4f16.nxv4i32(
   <vscale x 4 x half>,
   <vscale x 4 x i32>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv4f16_nxv4i32(<vscale x 4 x half> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv4f16_nxv4i32(<vscale x 4 x half> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv4f16_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x i32> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vfncvt.f.xu.w.nxv8f16.nxv8i32(
   <vscale x 8 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfncvt_f.xu.w_nxv8f16_nxv8i32(<vscale x 8 x i32> %0, i32 %1) nounwind {
+define <vscale x 8 x half> @intrinsic_vfncvt_f.xu.w_nxv8f16_nxv8i32(<vscale x 8 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv8f16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x half> @intrinsic_vfncvt_f.xu.w_nxv8f16_nxv8i32(<vscale x 8
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.f.xu.w.nxv8f16.nxv8i32(
     <vscale x 8 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x half> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv8f16.nxv8i32(
   <vscale x 8 x half>,
   <vscale x 8 x i32>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv8f16_nxv8i32(<vscale x 8 x half> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv8f16_nxv8i32(<vscale x 8 x half> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv8f16_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x i32> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vfncvt.f.xu.w.nxv16f16.nxv16i32(
   <vscale x 16 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfncvt_f.xu.w_nxv16f16_nxv16i32(<vscale x 16 x i32> %0, i32 %1) nounwind {
+define <vscale x 16 x half> @intrinsic_vfncvt_f.xu.w_nxv16f16_nxv16i32(<vscale x 16 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv16f16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x half> @intrinsic_vfncvt_f.xu.w_nxv16f16_nxv16i32(<vscale x
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.f.xu.w.nxv16f16.nxv16i32(
     <vscale x 16 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x half> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfncvt.f.xu.w.mask.nxv16f16.nxv16i32(
   <vscale x 16 x half>,
   <vscale x 16 x i32>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv16f16_nxv16i32(<vscale x 16 x half> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x half> @intrinsic_vfncvt_mask_f.xu.w_nxv16f16_nxv16i32(<vscale x 16 x half> %0, <vscale x 16 x i32> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv16f16_nxv16i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x i32> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vfncvt.f.xu.w.nxv1f32.nxv1i64(
   <vscale x 1 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfncvt_f.xu.w_nxv1f32_nxv1i64(<vscale x 1 x i64> %0, i32 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vfncvt_f.xu.w_nxv1f32_nxv1i64(<vscale x 1 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv1f32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 1 x float> @intrinsic_vfncvt_f.xu.w_nxv1f32_nxv1i64(<vscale x 1
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.f.xu.w.nxv1f32.nxv1i64(
     <vscale x 1 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x float> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfncvt.f.xu.w.mask.nxv1f32.nxv1i64(
   <vscale x 1 x float>,
   <vscale x 1 x i64>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfncvt_mask_f.xu.w_nxv1f32_nxv1i64(<vscale x 1 x float> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfncvt_mask_f.xu.w_nxv1f32_nxv1i64(<vscale x 1 x float> %0, <vscale x 1 x i64> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv1f32_nxv1i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x i64> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vfncvt.f.xu.w.nxv2f32.nxv2i64(
   <vscale x 2 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfncvt_f.xu.w_nxv2f32_nxv2i64(<vscale x 2 x i64> %0, i32 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vfncvt_f.xu.w_nxv2f32_nxv2i64(<vscale x 2 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 2 x float> @intrinsic_vfncvt_f.xu.w_nxv2f32_nxv2i64(<vscale x 2
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.f.xu.w.nxv2f32.nxv2i64(
     <vscale x 2 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x float> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfncvt.f.xu.w.mask.nxv2f32.nxv2i64(
   <vscale x 2 x float>,
   <vscale x 2 x i64>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfncvt_mask_f.xu.w_nxv2f32_nxv2i64(<vscale x 2 x float> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfncvt_mask_f.xu.w_nxv2f32_nxv2i64(<vscale x 2 x float> %0, <vscale x 2 x i64> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv2f32_nxv2i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x i64> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vfncvt.f.xu.w.nxv4f32.nxv4i64(
   <vscale x 4 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfncvt_f.xu.w_nxv4f32_nxv4i64(<vscale x 4 x i64> %0, i32 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vfncvt_f.xu.w_nxv4f32_nxv4i64(<vscale x 4 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv4f32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 4 x float> @intrinsic_vfncvt_f.xu.w_nxv4f32_nxv4i64(<vscale x 4
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.f.xu.w.nxv4f32.nxv4i64(
     <vscale x 4 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x float> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfncvt.f.xu.w.mask.nxv4f32.nxv4i64(
   <vscale x 4 x float>,
   <vscale x 4 x i64>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfncvt_mask_f.xu.w_nxv4f32_nxv4i64(<vscale x 4 x float> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfncvt_mask_f.xu.w_nxv4f32_nxv4i64(<vscale x 4 x float> %0, <vscale x 4 x i64> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv4f32_nxv4i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x i64> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vfncvt.f.xu.w.nxv8f32.nxv8i64(
   <vscale x 8 x i64>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfncvt_f.xu.w_nxv8f32_nxv8i64(<vscale x 8 x i64> %0, i32 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vfncvt_f.xu.w_nxv8f32_nxv8i64(<vscale x 8 x i64> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_f.xu.w_nxv8f32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 8 x float> @intrinsic_vfncvt_f.xu.w_nxv8f32_nxv8i64(<vscale x 8
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.f.xu.w.nxv8f32.nxv8i64(
     <vscale x 8 x i64> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x float> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfncvt.f.xu.w.mask.nxv8f32.nxv8i64(
   <vscale x 8 x float>,
   <vscale x 8 x i64>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfncvt_mask_f.xu.w_nxv8f32_nxv8i64(<vscale x 8 x float> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfncvt_mask_f.xu.w_nxv8f32_nxv8i64(<vscale x 8 x float> %0, <vscale x 8 x i64> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_f.xu.w_nxv8f32_nxv8i64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -374,7 +376,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x i64> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll
deleted file mode 100644
index 4020c1d5d1a34..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv64.ll
+++ /dev/null
@@ -1,380 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv1f16.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv1f16.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1f16.nxv1f32(
-  <vscale x 1 x half>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv1f16_nxv1f32(<vscale x 1 x half> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv1f16_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1f16.nxv1f32(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv2f16.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv2f16.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2f16.nxv2f32(
-  <vscale x 2 x half>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv2f16_nxv2f32(<vscale x 2 x half> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv2f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2f16.nxv2f32(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv4f16.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv4f16.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4f16.nxv4f32(
-  <vscale x 4 x half>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv4f16_nxv4f32(<vscale x 4 x half> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv4f16_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4f16.nxv4f32(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv8f16.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv8f16.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8f16.nxv8f32(
-  <vscale x 8 x half>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv8f16_nxv8f32(<vscale x 8 x half> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv8f16_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8f16.nxv8f32(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv16f16.nxv16f32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv16f16.nxv16f32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16f16.nxv16f32(
-  <vscale x 16 x half>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv16f16_nxv16f32(<vscale x 16 x half> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv16f16_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16f16.nxv16f32(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv1f32.nxv1f64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv1f32.nxv1f64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1f32.nxv1f64(
-  <vscale x 1 x float>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfncvt_mask_rod.f.f.w_nxv1f32_nxv1f64(<vscale x 1 x float> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv1f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1f32.nxv1f64(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv2f32.nxv2f64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv2f32.nxv2f64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2f32.nxv2f64(
-  <vscale x 2 x float>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfncvt_mask_rod.f.f.w_nxv2f32_nxv2f64(<vscale x 2 x float> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv2f32_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2f32.nxv2f64(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv4f32.nxv4f64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv4f32.nxv4f64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4f32.nxv4f64(
-  <vscale x 4 x float>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfncvt_mask_rod.f.f.w_nxv4f32_nxv4f64(<vscale x 4 x float> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv4f32_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4f32.nxv4f64(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv8f32.nxv8f64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv8f32.nxv8f64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8f32.nxv8f64(
-  <vscale x 8 x float>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfncvt_mask_rod.f.f.w_nxv8f32_nxv8f64(<vscale x 8 x float> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv8f32_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rod.f.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8f32.nxv8f64(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f.ll
index b464fdde6db25..2a7c30939f108 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rod-f-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv1f16.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x half> @intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x half> @intrinsic_vfncvt_rod.f.f.w_nxv1f16_nxv1f32(<vscale x
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv1f16.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x half> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1f16.nxv1f32(
   <vscale x 1 x half>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv1f16_nxv1f32(<vscale x 1 x half> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv1f16_nxv1f32(<vscale x 1 x half> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv1f16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv2f16.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x half> @intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x half> @intrinsic_vfncvt_rod.f.f.w_nxv2f16_nxv2f32(<vscale x
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv2f16.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x half> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2f16.nxv2f32(
   <vscale x 2 x half>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv2f16_nxv2f32(<vscale x 2 x half> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv2f16_nxv2f32(<vscale x 2 x half> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv2f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv4f16.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x half> @intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x half> @intrinsic_vfncvt_rod.f.f.w_nxv4f16_nxv4f32(<vscale x
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv4f16.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x half> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4f16.nxv4f32(
   <vscale x 4 x half>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv4f16_nxv4f32(<vscale x 4 x half> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv4f16_nxv4f32(<vscale x 4 x half> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv4f16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv8f16.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x half> @intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x half> @intrinsic_vfncvt_rod.f.f.w_nxv8f16_nxv8f32(<vscale x
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv8f16.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x half> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8f16.nxv8f32(
   <vscale x 8 x half>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv8f16_nxv8f32(<vscale x 8 x half> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv8f16_nxv8f32(<vscale x 8 x half> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv8f16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv16f16.nxv16f32(
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
+define <vscale x 16 x half> @intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x half> @intrinsic_vfncvt_rod.f.f.w_nxv16f16_nxv16f32(<vscal
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfncvt.rod.f.f.w.nxv16f16.nxv16f32(
     <vscale x 16 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x half> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv16f16.nxv16f32
   <vscale x 16 x half>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv16f16_nxv16f32(<vscale x 16 x half> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x half> @intrinsic_vfncvt_mask_rod.f.f.w_nxv16f16_nxv16f32(<vscale x 16 x half> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv16f16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x float> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv1f32.nxv1f64(
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64(<vscale x 1 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 1 x float> @intrinsic_vfncvt_rod.f.f.w_nxv1f32_nxv1f64(<vscale
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv1f32.nxv1f64(
     <vscale x 1 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x float> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv1f32.nxv1f64(
   <vscale x 1 x float>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfncvt_mask_rod.f.f.w_nxv1f32_nxv1f64(<vscale x 1 x float> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfncvt_mask_rod.f.f.w_nxv1f32_nxv1f64(<vscale x 1 x float> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv1f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv2f32.nxv2f64(
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64(<vscale x 2 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 2 x float> @intrinsic_vfncvt_rod.f.f.w_nxv2f32_nxv2f64(<vscale
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv2f32.nxv2f64(
     <vscale x 2 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x float> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv2f32.nxv2f64(
   <vscale x 2 x float>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfncvt_mask_rod.f.f.w_nxv2f32_nxv2f64(<vscale x 2 x float> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfncvt_mask_rod.f.f.w_nxv2f32_nxv2f64(<vscale x 2 x float> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv2f32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv4f32.nxv4f64(
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64(<vscale x 4 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 4 x float> @intrinsic_vfncvt_rod.f.f.w_nxv4f32_nxv4f64(<vscale
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv4f32.nxv4f64(
     <vscale x 4 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x float> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv4f32.nxv4f64(
   <vscale x 4 x float>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfncvt_mask_rod.f.f.w_nxv4f32_nxv4f64(<vscale x 4 x float> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfncvt_mask_rod.f.f.w_nxv4f32_nxv4f64(<vscale x 4 x float> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv4f32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv8f32.nxv8f64(
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64(<vscale x 8 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 8 x float> @intrinsic_vfncvt_rod.f.f.w_nxv8f32_nxv8f64(<vscale
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfncvt.rod.f.f.w.nxv8f32.nxv8f64(
     <vscale x 8 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x float> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfncvt.rod.f.f.w.mask.nxv8f32.nxv8f64(
   <vscale x 8 x float>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfncvt_mask_rod.f.f.w_nxv8f32_nxv8f64(<vscale x 8 x float> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfncvt_mask_rod.f.f.w_nxv8f32_nxv8f64(<vscale x 8 x float> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rod.f.f.w_nxv8f32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -374,7 +376,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x double> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll
deleted file mode 100644
index ad695704aae0a..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv64.ll
+++ /dev/null
@@ -1,632 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i8> %a
-}
-
-declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1f16(
-  <vscale x 1 x i8>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1f16(<vscale x 1 x i8> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1f16(
-    <vscale x 1 x i8> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i8> %a
-}
-
-declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i8> %a
-}
-
-declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2f16(
-  <vscale x 2 x i8>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2f16(<vscale x 2 x i8> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2f16(
-    <vscale x 2 x i8> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i8> %a
-}
-
-declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i8> %a
-}
-
-declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4f16(
-  <vscale x 4 x i8>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4f16(<vscale x 4 x i8> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4f16(
-    <vscale x 4 x i8> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i8> %a
-}
-
-declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i8> %a
-}
-
-declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8f16(
-  <vscale x 8 x i8>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8f16(<vscale x 8 x i8> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8f16(
-    <vscale x 8 x i8> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i8> %a
-}
-
-declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i8> %a
-}
-
-declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16f16(
-  <vscale x 16 x i8>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16f16(<vscale x 16 x i8> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16f16(
-    <vscale x 16 x i8> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i8> %a
-}
-
-declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32f16(
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32f16(
-    <vscale x 32 x half> %0,
-    i64 %1)
-
-  ret <vscale x 32 x i8> %a
-}
-
-declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32f16(
-  <vscale x 32 x i8>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32f16(<vscale x 32 x i8> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32f16(
-    <vscale x 32 x i8> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 32 x i8> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i16.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i16.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i16.nxv1f32(
-  <vscale x 1 x i16>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i16_nxv1f32(<vscale x 1 x i16> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i16_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i16.nxv1f32(
-    <vscale x 1 x i16> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i16.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i16.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i16.nxv2f32(
-  <vscale x 2 x i16>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i16_nxv2f32(<vscale x 2 x i16> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i16.nxv2f32(
-    <vscale x 2 x i16> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i16.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i16.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i16.nxv4f32(
-  <vscale x 4 x i16>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i16_nxv4f32(<vscale x 4 x i16> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i16_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i16.nxv4f32(
-    <vscale x 4 x i16> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i16.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i16.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i16.nxv8f32(
-  <vscale x 8 x i16>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i16_nxv8f32(<vscale x 8 x i16> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i16_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i16.nxv8f32(
-    <vscale x 8 x i16> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i16.nxv16f32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i16.nxv16f32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i16.nxv16f32(
-  <vscale x 16 x i16>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i16_nxv16f32(<vscale x 16 x i16> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i16_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i16.nxv16f32(
-    <vscale x 16 x i16> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i32.nxv1f64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i32.nxv1f64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i32.nxv1f64(
-  <vscale x 1 x i32>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i32_nxv1f64(<vscale x 1 x i32> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i32.nxv1f64(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i32.nxv2f64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i32.nxv2f64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i32.nxv2f64(
-  <vscale x 2 x i32>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i32_nxv2f64(<vscale x 2 x i32> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i32_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i32.nxv2f64(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i32.nxv4f64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i32.nxv4f64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i32.nxv4f64(
-  <vscale x 4 x i32>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i32_nxv4f64(<vscale x 4 x i32> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i32_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i32.nxv4f64(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i32.nxv8f64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i32.nxv8f64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i32.nxv8f64(
-  <vscale x 8 x i32>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i32_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.x.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i32.nxv8f64(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i32> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f.ll
index 227210e6f2f01..9a14df186dd51 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-x-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1f16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv1i8_nxv1f16(<vscale x 1
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i8.nxv1f16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i8> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i8.nxv1f16(
   <vscale x 1 x i8>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1f16(<vscale x 1 x i8> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1f16(<vscale x 1 x i8> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i8_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x i8> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i8> %a
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2f16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv2i8_nxv2f16(<vscale x 2
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i8.nxv2f16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i8> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i8.nxv2f16(
   <vscale x 2 x i8>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2f16(<vscale x 2 x i8> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2f16(<vscale x 2 x i8> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i8_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x i8> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i8> %a
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4f16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv4i8_nxv4f16(<vscale x 4
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i8.nxv4f16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i8> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i8.nxv4f16(
   <vscale x 4 x i8>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4f16(<vscale x 4 x i8> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4f16(<vscale x 4 x i8> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i8_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x i8> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i8> %a
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8f16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv8i8_nxv8f16(<vscale x 8
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i8.nxv8f16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i8> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i8.nxv8f16(
   <vscale x 8 x i8>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8f16(<vscale x 8 x i8> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8f16(<vscale x 8 x i8> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i8_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x i8> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i8> %a
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16f16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv16i8_nxv16f16(<vscale x
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i8.nxv16f16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i8> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i8.nxv16f16(
   <vscale x 16 x i8>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16f16(<vscale x 16 x i8> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16f16(<vscale x 16 x i8> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i8_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x i8> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i8> %a
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32f16(
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
+define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16(<vscale x 32 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.x.f.w_nxv32i8_nxv32f16(<vscale x
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.nxv32i8.nxv32f16(
     <vscale x 32 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x i8> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv32i8.nxv32f16(
   <vscale x 32 x i8>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32f16(<vscale x 32 x i8> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32f16(<vscale x 32 x i8> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv32i8_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 32 x i8> %0,
     <vscale x 32 x half> %1,
     <vscale x 32 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x i8> %a
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i16.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 1 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv1i16_nxv1f32(<vscale x
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i16.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i16> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i16.nxv1f32(
   <vscale x 1 x i16>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i16_nxv1f32(<vscale x 1 x i16> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i16_nxv1f32(<vscale x 1 x i16> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 1 x i16> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i16> %a
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i16.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 2 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv2i16_nxv2f32(<vscale x
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i16.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i16> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i16.nxv2f32(
   <vscale x 2 x i16>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i16_nxv2f32(<vscale x 2 x i16> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i16_nxv2f32(<vscale x 2 x i16> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 2 x i16> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i16> %a
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i16.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 4 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv4i16_nxv4f32(<vscale x
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i16.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i16> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i16.nxv4f32(
   <vscale x 4 x i16>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i16_nxv4f32(<vscale x 4 x i16> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i16_nxv4f32(<vscale x 4 x i16> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -374,16 +376,16 @@ entry:
     <vscale x 4 x i16> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i16> %a
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i16.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -393,7 +395,7 @@ define <vscale x 8 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv8i16_nxv8f32(<vscale x
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i16.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i16> %a
 }
@@ -402,10 +404,10 @@ declare <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i16.nxv8f32(
   <vscale x 8 x i16>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i16_nxv8f32(<vscale x 8 x i16> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i16_nxv8f32(<vscale x 8 x i16> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -416,16 +418,16 @@ entry:
     <vscale x 8 x i16> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i16> %a
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i16.nxv16f32(
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -435,7 +437,7 @@ define <vscale x 16 x i16> @intrinsic_vfncvt_rtz.x.f.w_nxv16i16_nxv16f32(<vscale
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.nxv16i16.nxv16f32(
     <vscale x 16 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i16> %a
 }
@@ -444,10 +446,10 @@ declare <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv16i16.nxv16f32(
   <vscale x 16 x i16>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i16_nxv16f32(<vscale x 16 x i16> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i16_nxv16f32(<vscale x 16 x i16> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -458,16 +460,16 @@ entry:
     <vscale x 16 x i16> %0,
     <vscale x 16 x float> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i16> %a
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i32.nxv1f64(
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64(<vscale x 1 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -477,7 +479,7 @@ define <vscale x 1 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv1i32_nxv1f64(<vscale x
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv1i32.nxv1f64(
     <vscale x 1 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -486,10 +488,10 @@ declare <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv1i32.nxv1f64(
   <vscale x 1 x i32>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i32_nxv1f64(<vscale x 1 x i32> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i32_nxv1f64(<vscale x 1 x i32> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv1i32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -500,16 +502,16 @@ entry:
     <vscale x 1 x i32> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i32.nxv2f64(
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64(<vscale x 2 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -519,7 +521,7 @@ define <vscale x 2 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv2i32_nxv2f64(<vscale x
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv2i32.nxv2f64(
     <vscale x 2 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -528,10 +530,10 @@ declare <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv2i32.nxv2f64(
   <vscale x 2 x i32>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i32_nxv2f64(<vscale x 2 x i32> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i32_nxv2f64(<vscale x 2 x i32> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -542,16 +544,16 @@ entry:
     <vscale x 2 x i32> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i32.nxv4f64(
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64(<vscale x 4 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -561,7 +563,7 @@ define <vscale x 4 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv4i32_nxv4f64(<vscale x
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv4i32.nxv4f64(
     <vscale x 4 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -570,10 +572,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv4i32.nxv4f64(
   <vscale x 4 x i32>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i32_nxv4f64(<vscale x 4 x i32> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i32_nxv4f64(<vscale x 4 x i32> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -584,16 +586,16 @@ entry:
     <vscale x 4 x i32> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i32.nxv8f64(
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64(<vscale x 8 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -603,7 +605,7 @@ define <vscale x 8 x i32> @intrinsic_vfncvt_rtz.x.f.w_nxv8i32_nxv8f64(<vscale x
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.nxv8i32.nxv8f64(
     <vscale x 8 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -612,10 +614,10 @@ declare <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.x.f.w.mask.nxv8i32.nxv8f64(
   <vscale x 8 x i32>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.x.f.w_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -626,7 +628,7 @@ entry:
     <vscale x 8 x i32> %0,
     <vscale x 8 x double> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i32> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll
deleted file mode 100644
index f7f873dd05155..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv64.ll
+++ /dev/null
@@ -1,632 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i8> %a
-}
-
-declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1f16(
-  <vscale x 1 x i8>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x i8> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1f16(
-    <vscale x 1 x i8> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i8> %a
-}
-
-declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i8> %a
-}
-
-declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2f16(
-  <vscale x 2 x i8>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x i8> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2f16(
-    <vscale x 2 x i8> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i8> %a
-}
-
-declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i8> %a
-}
-
-declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4f16(
-  <vscale x 4 x i8>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x i8> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4f16(
-    <vscale x 4 x i8> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i8> %a
-}
-
-declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i8> %a
-}
-
-declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8f16(
-  <vscale x 8 x i8>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x i8> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8f16(
-    <vscale x 8 x i8> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i8> %a
-}
-
-declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i8> %a
-}
-
-declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16f16(
-  <vscale x 16 x i8>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16f16(<vscale x 16 x i8> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16f16(
-    <vscale x 16 x i8> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i8> %a
-}
-
-declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32f16(
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32f16(
-    <vscale x 32 x half> %0,
-    i64 %1)
-
-  ret <vscale x 32 x i8> %a
-}
-
-declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32f16(
-  <vscale x 32 x i8>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32f16(<vscale x 32 x i8> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32f16(
-    <vscale x 32 x i8> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 32 x i8> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i16.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i16.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i16.nxv1f32(
-  <vscale x 1 x i16>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x i16> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i16_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i16.nxv1f32(
-    <vscale x 1 x i16> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i16.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i16.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i16.nxv2f32(
-  <vscale x 2 x i16>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x i16> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i16.nxv2f32(
-    <vscale x 2 x i16> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i16.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i16.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i16.nxv4f32(
-  <vscale x 4 x i16>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x i16> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i16_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i16.nxv4f32(
-    <vscale x 4 x i16> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i16.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i16.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i16.nxv8f32(
-  <vscale x 8 x i16>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x i16> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i16_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i16.nxv8f32(
-    <vscale x 8 x i16> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i16.nxv16f32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i16.nxv16f32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i16.nxv16f32(
-  <vscale x 16 x i16>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i16_nxv16f32(<vscale x 16 x i16> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i16_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i16.nxv16f32(
-    <vscale x 16 x i16> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i32.nxv1f64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i32.nxv1f64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i32.nxv1f64(
-  <vscale x 1 x i32>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x i32> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i32.nxv1f64(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i32.nxv2f64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i32.nxv2f64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i32.nxv2f64(
-  <vscale x 2 x i32>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x i32> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i32_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i32.nxv2f64(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i32.nxv4f64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i32.nxv4f64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i32.nxv4f64(
-  <vscale x 4 x i32>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x i32> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i32_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i32.nxv4f64(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i32.nxv8f64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i32.nxv8f64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i32.nxv8f64(
-  <vscale x 8 x i32>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i32_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.rtz.xu.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i32.nxv8f64(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i32> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f.ll
index 4bfe331db7e00..24d75d6d09b19 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-rtz-xu-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1f16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i8_nxv1f16(<vscale x 1
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i8.nxv1f16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i8> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i8.nxv1f16(
   <vscale x 1 x i8>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x i8> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x i8> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i8_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x i8> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i8> %a
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2f16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i8_nxv2f16(<vscale x 2
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i8.nxv2f16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i8> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i8.nxv2f16(
   <vscale x 2 x i8>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x i8> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x i8> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i8_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x i8> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i8> %a
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4f16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i8_nxv4f16(<vscale x 4
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i8.nxv4f16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i8> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i8.nxv4f16(
   <vscale x 4 x i8>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x i8> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x i8> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i8_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x i8> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i8> %a
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8f16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i8_nxv8f16(<vscale x 8
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i8.nxv8f16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i8> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i8.nxv8f16(
   <vscale x 8 x i8>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x i8> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x i8> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i8_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x i8> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i8> %a
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16f16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i8_nxv16f16(<vscale
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i8.nxv16f16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i8> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i8.nxv16f16(
   <vscale x 16 x i8>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16f16(<vscale x 16 x i8> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16f16(<vscale x 16 x i8> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i8_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x i8> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i8> %a
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32f16(
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
+define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16(<vscale x 32 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 32 x i8> @intrinsic_vfncvt_rtz.xu.f.w_nxv32i8_nxv32f16(<vscale
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv32i8.nxv32f16(
     <vscale x 32 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x i8> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 32 x i8> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv32i8.nxv32f16(
   <vscale x 32 x i8>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32f16(<vscale x 32 x i8> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32f16(<vscale x 32 x i8> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv32i8_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 32 x i8> %0,
     <vscale x 32 x half> %1,
     <vscale x 32 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x i8> %a
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i16.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 1 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i16_nxv1f32(<vscale x
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i16.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i16> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 1 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i16.nxv1f32(
   <vscale x 1 x i16>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x i16> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x i16> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 1 x i16> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i16> %a
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i16.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 2 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i16_nxv2f32(<vscale x
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i16.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i16> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 2 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i16.nxv2f32(
   <vscale x 2 x i16>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x i16> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x i16> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 2 x i16> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i16> %a
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i16.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 4 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i16_nxv4f32(<vscale x
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i16.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i16> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 4 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i16.nxv4f32(
   <vscale x 4 x i16>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x i16> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x i16> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -374,16 +376,16 @@ entry:
     <vscale x 4 x i16> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i16> %a
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i16.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -393,7 +395,7 @@ define <vscale x 8 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i16_nxv8f32(<vscale x
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i16.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i16> %a
 }
@@ -402,10 +404,10 @@ declare <vscale x 8 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i16.nxv8f32(
   <vscale x 8 x i16>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x i16> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x i16> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -416,16 +418,16 @@ entry:
     <vscale x 8 x i16> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i16> %a
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i16.nxv16f32(
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -435,7 +437,7 @@ define <vscale x 16 x i16> @intrinsic_vfncvt_rtz.xu.f.w_nxv16i16_nxv16f32(<vscal
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv16i16.nxv16f32(
     <vscale x 16 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i16> %a
 }
@@ -444,10 +446,10 @@ declare <vscale x 16 x i16> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv16i16.nxv16f32
   <vscale x 16 x i16>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i16_nxv16f32(<vscale x 16 x i16> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i16_nxv16f32(<vscale x 16 x i16> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -458,16 +460,16 @@ entry:
     <vscale x 16 x i16> %0,
     <vscale x 16 x float> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i16> %a
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i32.nxv1f64(
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -477,7 +479,7 @@ define <vscale x 1 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv1i32_nxv1f64(<vscale x
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv1i32.nxv1f64(
     <vscale x 1 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -486,10 +488,10 @@ declare <vscale x 1 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv1i32.nxv1f64(
   <vscale x 1 x i32>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x i32> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x i32> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv1i32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -500,16 +502,16 @@ entry:
     <vscale x 1 x i32> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i32.nxv2f64(
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -519,7 +521,7 @@ define <vscale x 2 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv2i32_nxv2f64(<vscale x
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv2i32.nxv2f64(
     <vscale x 2 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -528,10 +530,10 @@ declare <vscale x 2 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv2i32.nxv2f64(
   <vscale x 2 x i32>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x i32> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x i32> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -542,16 +544,16 @@ entry:
     <vscale x 2 x i32> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i32.nxv4f64(
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -561,7 +563,7 @@ define <vscale x 4 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv4i32_nxv4f64(<vscale x
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv4i32.nxv4f64(
     <vscale x 4 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -570,10 +572,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv4i32.nxv4f64(
   <vscale x 4 x i32>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x i32> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x i32> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -584,16 +586,16 @@ entry:
     <vscale x 4 x i32> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i32.nxv8f64(
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -603,7 +605,7 @@ define <vscale x 8 x i32> @intrinsic_vfncvt_rtz.xu.f.w_nxv8i32_nxv8f64(<vscale x
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.nxv8i32.nxv8f64(
     <vscale x 8 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -612,10 +614,10 @@ declare <vscale x 8 x i32> @llvm.riscv.vfncvt.rtz.xu.f.w.mask.nxv8i32.nxv8f64(
   <vscale x 8 x i32>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_rtz.xu.f.w_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -626,7 +628,7 @@ entry:
     <vscale x 8 x i32> %0,
     <vscale x 8 x double> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i32> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll
deleted file mode 100644
index d78d695f3df20..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv64.ll
+++ /dev/null
@@ -1,632 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x i8> @intrinsic_vfncvt_x.f.w_nxv1i8_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i8_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i8> %a
-}
-
-declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1f16(
-  <vscale x 1 x i8>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1f16(<vscale x 1 x i8> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1f16(
-    <vscale x 1 x i8> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i8> %a
-}
-
-declare <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x i8> @intrinsic_vfncvt_x.f.w_nxv2i8_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i8_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i8> %a
-}
-
-declare <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2f16(
-  <vscale x 2 x i8>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2f16(<vscale x 2 x i8> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2f16(
-    <vscale x 2 x i8> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i8> %a
-}
-
-declare <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x i8> @intrinsic_vfncvt_x.f.w_nxv4i8_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i8_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i8> %a
-}
-
-declare <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4f16(
-  <vscale x 4 x i8>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4f16(<vscale x 4 x i8> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4f16(
-    <vscale x 4 x i8> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i8> %a
-}
-
-declare <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x i8> @intrinsic_vfncvt_x.f.w_nxv8i8_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i8_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i8> %a
-}
-
-declare <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8f16(
-  <vscale x 8 x i8>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8f16(<vscale x 8 x i8> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8f16(
-    <vscale x 8 x i8> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i8> %a
-}
-
-declare <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x i8> @intrinsic_vfncvt_x.f.w_nxv16i8_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv16i8_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i8> %a
-}
-
-declare <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16f16(
-  <vscale x 16 x i8>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16f16(<vscale x 16 x i8> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16f16(
-    <vscale x 16 x i8> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i8> %a
-}
-
-declare <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32f16(
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x i8> @intrinsic_vfncvt_x.f.w_nxv32i8_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv32i8_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32f16(
-    <vscale x 32 x half> %0,
-    i64 %1)
-
-  ret <vscale x 32 x i8> %a
-}
-
-declare <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32f16(
-  <vscale x 32 x i8>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32f16(<vscale x 32 x i8> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32f16(
-    <vscale x 32 x i8> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 32 x i8> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vfncvt.x.f.w.nxv1i16.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfncvt_x.f.w_nxv1i16_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i16_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.x.f.w.nxv1i16.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i16.nxv1f32(
-  <vscale x 1 x i16>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv1i16_nxv1f32(<vscale x 1 x i16> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i16_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i16.nxv1f32(
-    <vscale x 1 x i16> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfncvt.x.f.w.nxv2i16.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfncvt_x.f.w_nxv2i16_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.x.f.w.nxv2i16.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i16.nxv2f32(
-  <vscale x 2 x i16>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv2i16_nxv2f32(<vscale x 2 x i16> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i16.nxv2f32(
-    <vscale x 2 x i16> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfncvt.x.f.w.nxv4i16.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfncvt_x.f.w_nxv4i16_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i16_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.x.f.w.nxv4i16.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i16.nxv4f32(
-  <vscale x 4 x i16>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv4i16_nxv4f32(<vscale x 4 x i16> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i16_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i16.nxv4f32(
-    <vscale x 4 x i16> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfncvt.x.f.w.nxv8i16.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfncvt_x.f.w_nxv8i16_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i16_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.x.f.w.nxv8i16.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i16.nxv8f32(
-  <vscale x 8 x i16>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv8i16_nxv8f32(<vscale x 8 x i16> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i16_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i16.nxv8f32(
-    <vscale x 8 x i16> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfncvt.x.f.w.nxv16i16.nxv16f32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfncvt_x.f.w_nxv16i16_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv16i16_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.x.f.w.nxv16i16.nxv16f32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i16.nxv16f32(
-  <vscale x 16 x i16>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv16i16_nxv16f32(<vscale x 16 x i16> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv16i16_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i16.nxv16f32(
-    <vscale x 16 x i16> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfncvt.x.f.w.nxv1i32.nxv1f64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfncvt_x.f.w_nxv1i32_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.x.f.w.nxv1i32.nxv1f64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i32.nxv1f64(
-  <vscale x 1 x i32>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfncvt_mask_x.f.w_nxv1i32_nxv1f64(<vscale x 1 x i32> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i32.nxv1f64(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfncvt.x.f.w.nxv2i32.nxv2f64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfncvt_x.f.w_nxv2i32_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i32_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.x.f.w.nxv2i32.nxv2f64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i32.nxv2f64(
-  <vscale x 2 x i32>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfncvt_mask_x.f.w_nxv2i32_nxv2f64(<vscale x 2 x i32> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i32_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i32.nxv2f64(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfncvt.x.f.w.nxv4i32.nxv4f64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfncvt_x.f.w_nxv4i32_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i32_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.x.f.w.nxv4i32.nxv4f64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i32.nxv4f64(
-  <vscale x 4 x i32>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfncvt_mask_x.f.w_nxv4i32_nxv4f64(<vscale x 4 x i32> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i32_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i32.nxv4f64(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfncvt.x.f.w.nxv8i32.nxv8f64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfncvt_x.f.w_nxv8i32_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i32_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.x.f.w.nxv8i32.nxv8f64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i32.nxv8f64(
-  <vscale x 8 x i32>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfncvt_mask_x.f.w_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i32_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.x.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i32.nxv8f64(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i32> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll
index 6c97455f1992e..0b8f9c62e50ab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-x-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1f16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i8> @intrinsic_vfncvt_x.f.w_nxv1i8_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+define <vscale x 1 x i8> @intrinsic_vfncvt_x.f.w_nxv1i8_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i8_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x i8> @intrinsic_vfncvt_x.f.w_nxv1i8_nxv1f16(<vscale x 1 x ha
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.nxv1i8.nxv1f16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i8> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i8.nxv1f16(
   <vscale x 1 x i8>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1f16(<vscale x 1 x i8> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1f16(<vscale x 1 x i8> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i8_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x i8> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i8> %a
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2f16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i8> @intrinsic_vfncvt_x.f.w_nxv2i8_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+define <vscale x 2 x i8> @intrinsic_vfncvt_x.f.w_nxv2i8_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i8_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x i8> @intrinsic_vfncvt_x.f.w_nxv2i8_nxv2f16(<vscale x 2 x ha
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.nxv2i8.nxv2f16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i8> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i8.nxv2f16(
   <vscale x 2 x i8>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2f16(<vscale x 2 x i8> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2f16(<vscale x 2 x i8> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i8_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x i8> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i8> %a
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4f16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i8> @intrinsic_vfncvt_x.f.w_nxv4i8_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+define <vscale x 4 x i8> @intrinsic_vfncvt_x.f.w_nxv4i8_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i8_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x i8> @intrinsic_vfncvt_x.f.w_nxv4i8_nxv4f16(<vscale x 4 x ha
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.nxv4i8.nxv4f16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i8> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i8.nxv4f16(
   <vscale x 4 x i8>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4f16(<vscale x 4 x i8> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4f16(<vscale x 4 x i8> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i8_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x i8> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i8> %a
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8f16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i8> @intrinsic_vfncvt_x.f.w_nxv8i8_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+define <vscale x 8 x i8> @intrinsic_vfncvt_x.f.w_nxv8i8_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i8_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x i8> @intrinsic_vfncvt_x.f.w_nxv8i8_nxv8f16(<vscale x 8 x ha
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.nxv8i8.nxv8f16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i8> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i8.nxv8f16(
   <vscale x 8 x i8>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8f16(<vscale x 8 x i8> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8f16(<vscale x 8 x i8> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i8_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x i8> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i8> %a
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16f16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i8> @intrinsic_vfncvt_x.f.w_nxv16i8_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+define <vscale x 16 x i8> @intrinsic_vfncvt_x.f.w_nxv16i8_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv16i8_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x i8> @intrinsic_vfncvt_x.f.w_nxv16i8_nxv16f16(<vscale x 16
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.nxv16i8.nxv16f16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i8> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i8.nxv16f16(
   <vscale x 16 x i8>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16f16(<vscale x 16 x i8> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16f16(<vscale x 16 x i8> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv16i8_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x i8> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i8> %a
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32f16(
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i8> @intrinsic_vfncvt_x.f.w_nxv32i8_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
+define <vscale x 32 x i8> @intrinsic_vfncvt_x.f.w_nxv32i8_nxv32f16(<vscale x 32 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv32i8_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 32 x i8> @intrinsic_vfncvt_x.f.w_nxv32i8_nxv32f16(<vscale x 32
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.nxv32i8.nxv32f16(
     <vscale x 32 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x i8> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 32 x i8> @llvm.riscv.vfncvt.x.f.w.mask.nxv32i8.nxv32f16(
   <vscale x 32 x i8>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32f16(<vscale x 32 x i8> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32f16(<vscale x 32 x i8> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv32i8_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 32 x i8> %0,
     <vscale x 32 x half> %1,
     <vscale x 32 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x i8> %a
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vfncvt.x.f.w.nxv1i16.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfncvt_x.f.w_nxv1i16_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfncvt_x.f.w_nxv1i16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 1 x i16> @intrinsic_vfncvt_x.f.w_nxv1i16_nxv1f32(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.x.f.w.nxv1i16.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i16> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 1 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i16.nxv1f32(
   <vscale x 1 x i16>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv1i16_nxv1f32(<vscale x 1 x i16> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv1i16_nxv1f32(<vscale x 1 x i16> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 1 x i16> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i16> %a
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vfncvt.x.f.w.nxv2i16.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfncvt_x.f.w_nxv2i16_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfncvt_x.f.w_nxv2i16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 2 x i16> @intrinsic_vfncvt_x.f.w_nxv2i16_nxv2f32(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.x.f.w.nxv2i16.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i16> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 2 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i16.nxv2f32(
   <vscale x 2 x i16>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv2i16_nxv2f32(<vscale x 2 x i16> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv2i16_nxv2f32(<vscale x 2 x i16> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 2 x i16> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i16> %a
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vfncvt.x.f.w.nxv4i16.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfncvt_x.f.w_nxv4i16_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfncvt_x.f.w_nxv4i16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 4 x i16> @intrinsic_vfncvt_x.f.w_nxv4i16_nxv4f32(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.x.f.w.nxv4i16.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i16> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 4 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i16.nxv4f32(
   <vscale x 4 x i16>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv4i16_nxv4f32(<vscale x 4 x i16> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv4i16_nxv4f32(<vscale x 4 x i16> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -374,16 +376,16 @@ entry:
     <vscale x 4 x i16> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i16> %a
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vfncvt.x.f.w.nxv8i16.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfncvt_x.f.w_nxv8i16_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfncvt_x.f.w_nxv8i16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -393,7 +395,7 @@ define <vscale x 8 x i16> @intrinsic_vfncvt_x.f.w_nxv8i16_nxv8f32(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.x.f.w.nxv8i16.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i16> %a
 }
@@ -402,10 +404,10 @@ declare <vscale x 8 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i16.nxv8f32(
   <vscale x 8 x i16>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv8i16_nxv8f32(<vscale x 8 x i16> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv8i16_nxv8f32(<vscale x 8 x i16> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -416,16 +418,16 @@ entry:
     <vscale x 8 x i16> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i16> %a
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vfncvt.x.f.w.nxv16i16.nxv16f32(
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfncvt_x.f.w_nxv16i16_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfncvt_x.f.w_nxv16i16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -435,7 +437,7 @@ define <vscale x 16 x i16> @intrinsic_vfncvt_x.f.w_nxv16i16_nxv16f32(<vscale x 1
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.x.f.w.nxv16i16.nxv16f32(
     <vscale x 16 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i16> %a
 }
@@ -444,10 +446,10 @@ declare <vscale x 16 x i16> @llvm.riscv.vfncvt.x.f.w.mask.nxv16i16.nxv16f32(
   <vscale x 16 x i16>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv16i16_nxv16f32(<vscale x 16 x i16> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfncvt_mask_x.f.w_nxv16i16_nxv16f32(<vscale x 16 x i16> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -458,16 +460,16 @@ entry:
     <vscale x 16 x i16> %0,
     <vscale x 16 x float> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i16> %a
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vfncvt.x.f.w.nxv1i32.nxv1f64(
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfncvt_x.f.w_nxv1i32_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfncvt_x.f.w_nxv1i32_nxv1f64(<vscale x 1 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv1i32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -477,7 +479,7 @@ define <vscale x 1 x i32> @intrinsic_vfncvt_x.f.w_nxv1i32_nxv1f64(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.x.f.w.nxv1i32.nxv1f64(
     <vscale x 1 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -486,10 +488,10 @@ declare <vscale x 1 x i32> @llvm.riscv.vfncvt.x.f.w.mask.nxv1i32.nxv1f64(
   <vscale x 1 x i32>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfncvt_mask_x.f.w_nxv1i32_nxv1f64(<vscale x 1 x i32> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfncvt_mask_x.f.w_nxv1i32_nxv1f64(<vscale x 1 x i32> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv1i32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -500,16 +502,16 @@ entry:
     <vscale x 1 x i32> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vfncvt.x.f.w.nxv2i32.nxv2f64(
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfncvt_x.f.w_nxv2i32_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfncvt_x.f.w_nxv2i32_nxv2f64(<vscale x 2 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -519,7 +521,7 @@ define <vscale x 2 x i32> @intrinsic_vfncvt_x.f.w_nxv2i32_nxv2f64(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.x.f.w.nxv2i32.nxv2f64(
     <vscale x 2 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -528,10 +530,10 @@ declare <vscale x 2 x i32> @llvm.riscv.vfncvt.x.f.w.mask.nxv2i32.nxv2f64(
   <vscale x 2 x i32>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfncvt_mask_x.f.w_nxv2i32_nxv2f64(<vscale x 2 x i32> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfncvt_mask_x.f.w_nxv2i32_nxv2f64(<vscale x 2 x i32> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -542,16 +544,16 @@ entry:
     <vscale x 2 x i32> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vfncvt.x.f.w.nxv4i32.nxv4f64(
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfncvt_x.f.w_nxv4i32_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfncvt_x.f.w_nxv4i32_nxv4f64(<vscale x 4 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -561,7 +563,7 @@ define <vscale x 4 x i32> @intrinsic_vfncvt_x.f.w_nxv4i32_nxv4f64(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.x.f.w.nxv4i32.nxv4f64(
     <vscale x 4 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -570,10 +572,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vfncvt.x.f.w.mask.nxv4i32.nxv4f64(
   <vscale x 4 x i32>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfncvt_mask_x.f.w_nxv4i32_nxv4f64(<vscale x 4 x i32> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfncvt_mask_x.f.w_nxv4i32_nxv4f64(<vscale x 4 x i32> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -584,16 +586,16 @@ entry:
     <vscale x 4 x i32> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vfncvt.x.f.w.nxv8i32.nxv8f64(
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfncvt_x.f.w_nxv8i32_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfncvt_x.f.w_nxv8i32_nxv8f64(<vscale x 8 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_x.f.w_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -603,7 +605,7 @@ define <vscale x 8 x i32> @intrinsic_vfncvt_x.f.w_nxv8i32_nxv8f64(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.x.f.w.nxv8i32.nxv8f64(
     <vscale x 8 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -612,10 +614,10 @@ declare <vscale x 8 x i32> @llvm.riscv.vfncvt.x.f.w.mask.nxv8i32.nxv8f64(
   <vscale x 8 x i32>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfncvt_mask_x.f.w_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfncvt_mask_x.f.w_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_x.f.w_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -626,7 +628,7 @@ entry:
     <vscale x 8 x i32> %0,
     <vscale x 8 x double> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i32> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll
deleted file mode 100644
index c7bb913f3797f..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv64.ll
+++ /dev/null
@@ -1,632 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x i8> @intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i8> %a
-}
-
-declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1f16(
-  <vscale x 1 x i8>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x i8> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1f16(
-    <vscale x 1 x i8> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i8> %a
-}
-
-declare <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x i8> @intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i8> %a
-}
-
-declare <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2f16(
-  <vscale x 2 x i8>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x i8> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2f16(
-    <vscale x 2 x i8> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i8> %a
-}
-
-declare <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x i8> @intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i8> %a
-}
-
-declare <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4f16(
-  <vscale x 4 x i8>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x i8> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4f16(
-    <vscale x 4 x i8> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i8> %a
-}
-
-declare <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x i8> @intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i8> %a
-}
-
-declare <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8f16(
-  <vscale x 8 x i8>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x i8> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8f16(
-    <vscale x 8 x i8> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i8> %a
-}
-
-declare <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x i8> @intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i8> %a
-}
-
-declare <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16f16(
-  <vscale x 16 x i8>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16f16(<vscale x 16 x i8> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16f16(
-    <vscale x 16 x i8> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i8> %a
-}
-
-declare <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32f16(
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x i8> @intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32f16(
-    <vscale x 32 x half> %0,
-    i64 %1)
-
-  ret <vscale x 32 x i8> %a
-}
-
-declare <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32f16(
-  <vscale x 32 x i8>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32f16(<vscale x 32 x i8> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32f16(
-    <vscale x 32 x i8> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 32 x i8> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv1i16.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfncvt_xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i16_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv1i16.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 1 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i16.nxv1f32(
-  <vscale x 1 x i16>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x i16> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i16_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i16.nxv1f32(
-    <vscale x 1 x i16> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv2i16.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfncvt_xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv2i16.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 2 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i16.nxv2f32(
-  <vscale x 2 x i16>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x i16> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i16.nxv2f32(
-    <vscale x 2 x i16> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv4i16.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfncvt_xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i16_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv4i16.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 4 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i16.nxv4f32(
-  <vscale x 4 x i16>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x i16> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i16_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i16.nxv4f32(
-    <vscale x 4 x i16> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv8i16.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfncvt_xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i16_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv8i16.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 8 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i16.nxv8f32(
-  <vscale x 8 x i16>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x i16> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i16_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i16.nxv8f32(
-    <vscale x 8 x i16> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv16i16.nxv16f32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfncvt_xu.f.w_nxv16i16_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv16i16_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv16i16.nxv16f32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 16 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i16.nxv16f32(
-  <vscale x 16 x i16>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv16i16_nxv16f32(<vscale x 16 x i16> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv16i16_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i16.nxv16f32(
-    <vscale x 16 x i16> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i16> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv1i32.nxv1f64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfncvt_xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv1i32.nxv1f64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i32.nxv1f64(
-  <vscale x 1 x i32>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfncvt_mask_xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x i32> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i32.nxv1f64(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv2i32.nxv2f64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfncvt_xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i32_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v10, v8
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv2i32.nxv2f64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i32.nxv2f64(
-  <vscale x 2 x i32>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfncvt_mask_xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x i32> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i32_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i32.nxv2f64(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv4i32.nxv4f64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfncvt_xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i32_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v12, v8
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv4i32.nxv4f64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i32.nxv4f64(
-  <vscale x 4 x i32>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfncvt_mask_xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x i32> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i32_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i32.nxv4f64(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv8i32.nxv8f64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfncvt_xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i32_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v16, v8
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv8i32.nxv8f64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i32.nxv8f64(
-  <vscale x 8 x i32>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfncvt_mask_xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i32_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfncvt.xu.f.w v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i32.nxv8f64(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i32> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll
index 4981f8b16d741..7d802cabd3f77 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfncvt-xu-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1f16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i8> @intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+define <vscale x 1 x i8> @intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x i8> @intrinsic_vfncvt_xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x h
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv1i8.nxv1f16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i8> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i8.nxv1f16(
   <vscale x 1 x i8>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x i8> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1f16(<vscale x 1 x i8> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i8_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x i8> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i8> %a
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2f16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i8> @intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+define <vscale x 2 x i8> @intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x i8> @intrinsic_vfncvt_xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x h
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv2i8.nxv2f16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i8> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i8.nxv2f16(
   <vscale x 2 x i8>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x i8> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2f16(<vscale x 2 x i8> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i8_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x i8> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i8> %a
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4f16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i8> @intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+define <vscale x 4 x i8> @intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x i8> @intrinsic_vfncvt_xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x h
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv4i8.nxv4f16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i8> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i8.nxv4f16(
   <vscale x 4 x i8>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x i8> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4f16(<vscale x 4 x i8> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i8_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x i8> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i8> %a
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8f16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i8> @intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+define <vscale x 8 x i8> @intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x i8> @intrinsic_vfncvt_xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x h
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv8i8.nxv8f16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i8> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i8.nxv8f16(
   <vscale x 8 x i8>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x i8> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8f16(<vscale x 8 x i8> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i8_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x i8> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i8> %a
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16f16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i8> @intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+define <vscale x 16 x i8> @intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x i8> @intrinsic_vfncvt_xu.f.w_nxv16i8_nxv16f16(<vscale x 16
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv16i8.nxv16f16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i8> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i8.nxv16f16(
   <vscale x 16 x i8>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16f16(<vscale x 16 x i8> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16f16(<vscale x 16 x i8> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv16i8_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x i8> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i8> %a
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32f16(
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x i8> @intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
+define <vscale x 32 x i8> @intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32f16(<vscale x 32 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 32 x i8> @intrinsic_vfncvt_xu.f.w_nxv32i8_nxv32f16(<vscale x 32
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.nxv32i8.nxv32f16(
     <vscale x 32 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x i8> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 32 x i8> @llvm.riscv.vfncvt.xu.f.w.mask.nxv32i8.nxv32f16(
   <vscale x 32 x i8>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32f16(<vscale x 32 x i8> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x i8> @intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32f16(<vscale x 32 x i8> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv32i8_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 32 x i8> %0,
     <vscale x 32 x half> %1,
     <vscale x 32 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x i8> %a
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv1i16.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfncvt_xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfncvt_xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 1 x i16> @intrinsic_vfncvt_xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv1i16.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i16> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 1 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i16.nxv1f32(
   <vscale x 1 x i16>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x i16> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv1i16_nxv1f32(<vscale x 1 x i16> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i16_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 1 x i16> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i16> %a
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv2i16.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfncvt_xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfncvt_xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 2 x i16> @intrinsic_vfncvt_xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv2i16.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i16> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 2 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i16.nxv2f32(
   <vscale x 2 x i16>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x i16> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv2i16_nxv2f32(<vscale x 2 x i16> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 2 x i16> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i16> %a
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv4i16.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfncvt_xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfncvt_xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 4 x i16> @intrinsic_vfncvt_xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv4i16.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i16> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 4 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i16.nxv4f32(
   <vscale x 4 x i16>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x i16> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv4i16_nxv4f32(<vscale x 4 x i16> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i16_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -374,16 +376,16 @@ entry:
     <vscale x 4 x i16> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i16> %a
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv8i16.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfncvt_xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfncvt_xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -393,7 +395,7 @@ define <vscale x 8 x i16> @intrinsic_vfncvt_xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv8i16.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i16> %a
 }
@@ -402,10 +404,10 @@ declare <vscale x 8 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i16.nxv8f32(
   <vscale x 8 x i16>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x i16> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv8i16_nxv8f32(<vscale x 8 x i16> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i16_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -416,16 +418,16 @@ entry:
     <vscale x 8 x i16> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i16> %a
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv16i16.nxv16f32(
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfncvt_xu.f.w_nxv16i16_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfncvt_xu.f.w_nxv16i16_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -435,7 +437,7 @@ define <vscale x 16 x i16> @intrinsic_vfncvt_xu.f.w_nxv16i16_nxv16f32(<vscale x
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vfncvt.xu.f.w.nxv16i16.nxv16f32(
     <vscale x 16 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i16> %a
 }
@@ -444,10 +446,10 @@ declare <vscale x 16 x i16> @llvm.riscv.vfncvt.xu.f.w.mask.nxv16i16.nxv16f32(
   <vscale x 16 x i16>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv16i16_nxv16f32(<vscale x 16 x i16> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i16> @intrinsic_vfncvt_mask_xu.f.w_nxv16i16_nxv16f32(<vscale x 16 x i16> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv16i16_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -458,16 +460,16 @@ entry:
     <vscale x 16 x i16> %0,
     <vscale x 16 x float> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i16> %a
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv1i32.nxv1f64(
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfncvt_xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfncvt_xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv1i32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -477,7 +479,7 @@ define <vscale x 1 x i32> @intrinsic_vfncvt_xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv1i32.nxv1f64(
     <vscale x 1 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -486,10 +488,10 @@ declare <vscale x 1 x i32> @llvm.riscv.vfncvt.xu.f.w.mask.nxv1i32.nxv1f64(
   <vscale x 1 x i32>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfncvt_mask_xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x i32> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfncvt_mask_xu.f.w_nxv1i32_nxv1f64(<vscale x 1 x i32> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv1i32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -500,16 +502,16 @@ entry:
     <vscale x 1 x i32> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv2i32.nxv2f64(
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfncvt_xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfncvt_xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -519,7 +521,7 @@ define <vscale x 2 x i32> @intrinsic_vfncvt_xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv2i32.nxv2f64(
     <vscale x 2 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -528,10 +530,10 @@ declare <vscale x 2 x i32> @llvm.riscv.vfncvt.xu.f.w.mask.nxv2i32.nxv2f64(
   <vscale x 2 x i32>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfncvt_mask_xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x i32> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfncvt_mask_xu.f.w_nxv2i32_nxv2f64(<vscale x 2 x i32> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv2i32_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -542,16 +544,16 @@ entry:
     <vscale x 2 x i32> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv4i32.nxv4f64(
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfncvt_xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfncvt_xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -561,7 +563,7 @@ define <vscale x 4 x i32> @intrinsic_vfncvt_xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv4i32.nxv4f64(
     <vscale x 4 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -570,10 +572,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vfncvt.xu.f.w.mask.nxv4i32.nxv4f64(
   <vscale x 4 x i32>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfncvt_mask_xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x i32> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfncvt_mask_xu.f.w_nxv4i32_nxv4f64(<vscale x 4 x i32> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv4i32_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -584,16 +586,16 @@ entry:
     <vscale x 4 x i32> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv8i32.nxv8f64(
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfncvt_xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfncvt_xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_xu.f.w_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -603,7 +605,7 @@ define <vscale x 8 x i32> @intrinsic_vfncvt_xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfncvt.xu.f.w.nxv8i32.nxv8f64(
     <vscale x 8 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -612,10 +614,10 @@ declare <vscale x 8 x i32> @llvm.riscv.vfncvt.xu.f.w.mask.nxv8i32.nxv8f64(
   <vscale x 8 x i32>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfncvt_mask_xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfncvt_mask_xu.f.w_nxv8i32_nxv8f64(<vscale x 8 x i32> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfncvt_mask_xu.f.w_nxv8i32_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -626,7 +628,7 @@ entry:
     <vscale x 8 x i32> %0,
     <vscale x 8 x double> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i32> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll
deleted file mode 100644
index f8419e81f7d06..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv64.ll
+++ /dev/null
@@ -1,1106 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfnmacc.mask.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmacc.mask.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmacc.mask.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmacc.mask.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmacc.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmacc.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmacc.mask.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmacc.mask.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmacc.mask.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmacc.mask.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmacc.mask.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmacc.mask.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmacc.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmacc.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmacc.mask.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmacc.mask.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmacc.mask.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmacc.mask.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfnmacc_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmacc.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfnmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmacc.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfnmacc_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmacc.mask.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfnmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmacc.mask.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfnmacc_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmacc.mask.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfnmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmacc.mask.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfnmacc.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmacc.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmacc.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmacc.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmacc.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmacc.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmacc.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmacc.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmacc.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmacc.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmacc.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmacc.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmacc.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmacc.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmacc.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmacc.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmacc.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmacc.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmacc.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmacc.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmacc.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmacc.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmacc.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmacc.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmacc.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc.ll
similarity index 90%
rename from llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfnmacc.ll
index fa1767202c126..d46c29f3be78b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmacc.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmacc.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmacc.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmacc.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmacc.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmacc.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmacc.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmacc.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmacc.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfnmacc_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfnmacc_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmacc.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfnmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfnmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfnmacc_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfnmacc_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmacc.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfnmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfnmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfnmacc_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfnmacc_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmacc.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfnmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfnmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmacc.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 1 x half> %0,
     half %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmacc.mask.nxv1f16.f16(
   half,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -594,7 +596,7 @@ entry:
     half %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmacc.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 2 x half> %0,
     half %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmacc.mask.nxv2f16.f16(
   half,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -640,7 +642,7 @@ entry:
     half %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmacc.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 4 x half> %0,
     half %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmacc.mask.nxv4f16.f16(
   half,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -686,7 +688,7 @@ entry:
     half %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -695,9 +697,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmacc.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -708,7 +710,7 @@ entry:
     <vscale x 8 x half> %0,
     half %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -718,9 +720,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmacc.mask.nxv8f16.f16(
   half,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -732,7 +734,7 @@ entry:
     half %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -741,9 +743,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmacc.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -754,7 +756,7 @@ entry:
     <vscale x 16 x half> %0,
     half %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -764,9 +766,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmacc.mask.nxv16f16.f16(
   half,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -778,7 +780,7 @@ entry:
     half %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -787,9 +789,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmacc.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -800,7 +802,7 @@ entry:
     <vscale x 1 x float> %0,
     float %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -810,9 +812,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmacc.mask.nxv1f32.f32(
   float,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -824,7 +826,7 @@ entry:
     float %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -833,9 +835,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmacc.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -846,7 +848,7 @@ entry:
     <vscale x 2 x float> %0,
     float %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -856,9 +858,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmacc.mask.nxv2f32.f32(
   float,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -870,7 +872,7 @@ entry:
     float %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -879,9 +881,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmacc.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -892,7 +894,7 @@ entry:
     <vscale x 4 x float> %0,
     float %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -902,9 +904,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmacc.mask.nxv4f32.f32(
   float,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -916,7 +918,7 @@ entry:
     float %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -925,9 +927,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmacc.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -938,7 +940,7 @@ entry:
     <vscale x 8 x float> %0,
     float %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -948,9 +950,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmacc.mask.nxv8f32.f32(
   float,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -962,7 +964,7 @@ entry:
     float %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -971,9 +973,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmacc.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -984,7 +986,7 @@ entry:
     <vscale x 1 x double> %0,
     double %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -994,9 +996,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmacc.mask.nxv1f64.f64(
   double,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -1008,7 +1010,7 @@ entry:
     double %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -1017,9 +1019,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmacc.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1030,7 +1032,7 @@ entry:
     <vscale x 2 x double> %0,
     double %1,
     <vscale x 2 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -1040,9 +1042,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmacc.mask.nxv2f64.f64(
   double,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1054,7 +1056,7 @@ entry:
     double %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -1063,9 +1065,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmacc.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1076,7 +1078,7 @@ entry:
     <vscale x 4 x double> %0,
     double %1,
     <vscale x 4 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -1086,9 +1088,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmacc.mask.nxv4f64.f64(
   double,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmacc_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1100,7 +1102,7 @@ entry:
     double %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll
deleted file mode 100644
index ab407427952ac..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv64.ll
+++ /dev/null
@@ -1,1106 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfnmadd.mask.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmadd.mask.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmadd.mask.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmadd.mask.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmadd.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmadd.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmadd.mask.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmadd.mask.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmadd.mask.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmadd.mask.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmadd.mask.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmadd.mask.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmadd.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmadd.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmadd.mask.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmadd.mask.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmadd.mask.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmadd.mask.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfnmadd_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmadd.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfnmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmadd.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfnmadd_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmadd.mask.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfnmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmadd.mask.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfnmadd_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmadd.mask.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfnmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmadd.mask.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfnmadd.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmadd.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmadd.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmadd.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmadd.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmadd.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmadd.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmadd.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmadd.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmadd.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmadd.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmadd.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmadd.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmadd.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmadd.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmadd.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmadd.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmadd.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmadd.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmadd.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmadd.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmadd.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmadd.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmadd.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmadd.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd.ll
similarity index 90%
rename from llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfnmadd.ll
index e0d33062322b8..44810af5ab31b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmadd.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmadd.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmadd.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmadd.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmadd.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmadd.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmadd.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmadd.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmadd.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfnmadd_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfnmadd_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmadd.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfnmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfnmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfnmadd_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfnmadd_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmadd.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfnmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfnmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfnmadd_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfnmadd_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmadd.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfnmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfnmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmadd.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 1 x half> %0,
     half %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmadd.mask.nxv1f16.f16(
   half,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -594,7 +596,7 @@ entry:
     half %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmadd.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 2 x half> %0,
     half %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmadd.mask.nxv2f16.f16(
   half,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -640,7 +642,7 @@ entry:
     half %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmadd.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 4 x half> %0,
     half %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmadd.mask.nxv4f16.f16(
   half,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -686,7 +688,7 @@ entry:
     half %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -695,9 +697,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmadd.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -708,7 +710,7 @@ entry:
     <vscale x 8 x half> %0,
     half %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -718,9 +720,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmadd.mask.nxv8f16.f16(
   half,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -732,7 +734,7 @@ entry:
     half %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -741,9 +743,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmadd.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -754,7 +756,7 @@ entry:
     <vscale x 16 x half> %0,
     half %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -764,9 +766,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmadd.mask.nxv16f16.f16(
   half,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -778,7 +780,7 @@ entry:
     half %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -787,9 +789,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmadd.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -800,7 +802,7 @@ entry:
     <vscale x 1 x float> %0,
     float %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -810,9 +812,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmadd.mask.nxv1f32.f32(
   float,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -824,7 +826,7 @@ entry:
     float %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -833,9 +835,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmadd.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -846,7 +848,7 @@ entry:
     <vscale x 2 x float> %0,
     float %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -856,9 +858,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmadd.mask.nxv2f32.f32(
   float,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -870,7 +872,7 @@ entry:
     float %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -879,9 +881,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmadd.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -892,7 +894,7 @@ entry:
     <vscale x 4 x float> %0,
     float %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -902,9 +904,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmadd.mask.nxv4f32.f32(
   float,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -916,7 +918,7 @@ entry:
     float %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -925,9 +927,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmadd.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -938,7 +940,7 @@ entry:
     <vscale x 8 x float> %0,
     float %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -948,9 +950,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmadd.mask.nxv8f32.f32(
   float,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -962,7 +964,7 @@ entry:
     float %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -971,9 +973,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmadd.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -984,7 +986,7 @@ entry:
     <vscale x 1 x double> %0,
     double %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -994,9 +996,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmadd.mask.nxv1f64.f64(
   double,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -1008,7 +1010,7 @@ entry:
     double %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -1017,9 +1019,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmadd.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1030,7 +1032,7 @@ entry:
     <vscale x 2 x double> %0,
     double %1,
     <vscale x 2 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -1040,9 +1042,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmadd.mask.nxv2f64.f64(
   double,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1054,7 +1056,7 @@ entry:
     double %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -1063,9 +1065,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmadd.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1076,7 +1078,7 @@ entry:
     <vscale x 4 x double> %0,
     double %1,
     <vscale x 4 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -1086,9 +1088,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmadd.mask.nxv4f64.f64(
   double,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmadd_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1100,7 +1102,7 @@ entry:
     double %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll
deleted file mode 100644
index 58e489618bc4e..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv64.ll
+++ /dev/null
@@ -1,1106 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfnmsac.mask.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmsac.mask.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmsac.mask.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmsac.mask.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmsac.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmsac.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmsac.mask.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmsac.mask.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmsac.mask.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmsac.mask.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmsac.mask.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmsac.mask.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmsac.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmsac.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmsac.mask.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmsac.mask.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmsac.mask.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmsac.mask.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfnmsac_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmsac.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfnmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmsac.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfnmsac_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmsac.mask.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfnmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmsac.mask.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfnmsac_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmsac.mask.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfnmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmsac.mask.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half>  @intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfnmsac.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmsac.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half>  @intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmsac.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmsac.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half>  @intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmsac.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmsac.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half>  @intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmsac.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmsac.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half>  @intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmsac.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmsac.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmsac.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmsac.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmsac.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmsac.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmsac.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmsac.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmsac.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmsac.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmsac.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmsac.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmsac.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmsac.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmsac.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsac.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmsac.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac.ll
similarity index 90%
rename from llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfnmsac.ll
index 834938c7d6e74..ff1bcfa86d3a8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsac.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsac.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsac.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsac.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsac.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsac.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsac.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsac.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsac.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfnmsac_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfnmsac_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsac.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfnmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfnmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfnmsac_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfnmsac_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsac.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfnmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfnmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfnmsac_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfnmsac_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsac.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfnmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfnmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsac.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 1 x half> %0,
     half %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsac.mask.nxv1f16.f16(
   half,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -594,7 +596,7 @@ entry:
     half %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsac.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 2 x half> %0,
     half %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsac.mask.nxv2f16.f16(
   half,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -640,7 +642,7 @@ entry:
     half %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsac.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 4 x half> %0,
     half %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsac.mask.nxv4f16.f16(
   half,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -686,7 +688,7 @@ entry:
     half %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -695,9 +697,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsac.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -708,7 +710,7 @@ entry:
     <vscale x 8 x half> %0,
     half %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -718,9 +720,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsac.mask.nxv8f16.f16(
   half,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -732,7 +734,7 @@ entry:
     half %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -741,9 +743,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsac.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -754,7 +756,7 @@ entry:
     <vscale x 16 x half> %0,
     half %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -764,9 +766,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsac.mask.nxv16f16.f16(
   half,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -778,7 +780,7 @@ entry:
     half %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -787,9 +789,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsac.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -800,7 +802,7 @@ entry:
     <vscale x 1 x float> %0,
     float %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -810,9 +812,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsac.mask.nxv1f32.f32(
   float,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -824,7 +826,7 @@ entry:
     float %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -833,9 +835,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsac.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -846,7 +848,7 @@ entry:
     <vscale x 2 x float> %0,
     float %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -856,9 +858,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsac.mask.nxv2f32.f32(
   float,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -870,7 +872,7 @@ entry:
     float %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -879,9 +881,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsac.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -892,7 +894,7 @@ entry:
     <vscale x 4 x float> %0,
     float %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -902,9 +904,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsac.mask.nxv4f32.f32(
   float,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -916,7 +918,7 @@ entry:
     float %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -925,9 +927,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsac.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -938,7 +940,7 @@ entry:
     <vscale x 8 x float> %0,
     float %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -948,9 +950,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsac.mask.nxv8f32.f32(
   float,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -962,7 +964,7 @@ entry:
     float %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -971,9 +973,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsac.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -984,7 +986,7 @@ entry:
     <vscale x 1 x double> %0,
     double %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -994,9 +996,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsac.mask.nxv1f64.f64(
   double,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -1008,7 +1010,7 @@ entry:
     double %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -1017,9 +1019,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsac.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1030,7 +1032,7 @@ entry:
     <vscale x 2 x double> %0,
     double %1,
     <vscale x 2 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -1040,9 +1042,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsac.mask.nxv2f64.f64(
   double,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1054,7 +1056,7 @@ entry:
     double %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -1063,9 +1065,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsac.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1076,7 +1078,7 @@ entry:
     <vscale x 4 x double> %0,
     double %1,
     <vscale x 4 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -1086,9 +1088,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsac.mask.nxv4f64.f64(
   double,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsac_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1100,7 +1102,7 @@ entry:
     double %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll
deleted file mode 100644
index 67dbb5a92dfa3..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv32.ll
+++ /dev/null
@@ -1,1106 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i32);
-
-define <vscale x 1 x half>  @intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    i32 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfnmsub.mask.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x half>  @intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmsub.mask.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i32);
-
-define <vscale x 2 x half>  @intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    i32 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmsub.mask.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x half>  @intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmsub.mask.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i32);
-
-define <vscale x 4 x half>  @intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i32 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmsub.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x half>  @intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmsub.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i32);
-
-define <vscale x 8 x half>  @intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    i32 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmsub.mask.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x half>  @intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmsub.mask.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i32);
-
-define <vscale x 16 x half>  @intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    i32 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmsub.mask.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32);
-
-define <vscale x 16 x half>  @intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmsub.mask.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i32);
-
-define <vscale x 1 x float>  @intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    i32 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmsub.mask.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x float>  @intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmsub.mask.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i32);
-
-define <vscale x 2 x float>  @intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i32 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmsub.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x float>  @intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmsub.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i32);
-
-define <vscale x 4 x float>  @intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    i32 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmsub.mask.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x float>  @intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmsub.mask.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i32);
-
-define <vscale x 8 x float>  @intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    i32 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmsub.mask.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x float>  @intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmsub.mask.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i32);
-
-define <vscale x 1 x double>  @intrinsic_vfnmsub_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    i32 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmsub.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x double>  @intrinsic_vfnmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmsub.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i32);
-
-define <vscale x 2 x double>  @intrinsic_vfnmsub_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v10, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    i32 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmsub.mask.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x double>  @intrinsic_vfnmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmsub.mask.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i32);
-
-define <vscale x 4 x double>  @intrinsic_vfnmsub_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v12, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    i32 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmsub.mask.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x double>  @intrinsic_vfnmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmsub.mask.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  i32);
-
-define <vscale x 1 x half>  @intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    i32 %3)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfnmsub.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfnmsub.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  i32);
-
-define <vscale x 2 x half>  @intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    i32 %3)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfnmsub.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfnmsub.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  i32);
-
-define <vscale x 4 x half>  @intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    i32 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfnmsub.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfnmsub.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  i32);
-
-define <vscale x 8 x half>  @intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    i32 %3)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfnmsub.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfnmsub.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  i32);
-
-define <vscale x 16 x half>  @intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    i32 %3)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfnmsub.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfnmsub.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  i32);
-
-define <vscale x 1 x float>  @intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    i32 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfnmsub.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfnmsub.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  i32);
-
-define <vscale x 2 x float>  @intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    i32 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfnmsub.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfnmsub.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  i32);
-
-define <vscale x 4 x float>  @intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    i32 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfnmsub.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfnmsub.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  i32);
-
-define <vscale x 8 x float>  @intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    i32 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfnmsub.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfnmsub.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  i32);
-
-define <vscale x 1 x double>  @intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    i32 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfnmsub.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfnmsub.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  i32);
-
-define <vscale x 2 x double>  @intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    i32 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfnmsub.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfnmsub.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  i32);
-
-define <vscale x 4 x double>  @intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    i32 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfnmsub.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfnmsub.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfnmsub.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4)
-
-  ret <vscale x 4 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub.ll
similarity index 90%
rename from llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfnmsub.ll
index 07b23dbfb066d..e6ca32f34752c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsub.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsub.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsub.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsub.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsub.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsub.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsub.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsub.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsub.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfnmsub_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfnmsub_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsub.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfnmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfnmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfnmsub_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfnmsub_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsub.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfnmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfnmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfnmsub_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfnmsub_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsub.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfnmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfnmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsub.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half>  @intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
+define <vscale x 1 x half>  @intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 1 x half> %0,
     half %1,
     <vscale x 1 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x half> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 1 x half> @llvm.riscv.vfnmsub.mask.nxv1f16.f16(
   half,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16(<vscale x 1 x half> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f16_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -594,7 +596,7 @@ entry:
     half %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 1 x half> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsub.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half>  @intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
+define <vscale x 2 x half>  @intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 2 x half> %0,
     half %1,
     <vscale x 2 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x half> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 2 x half> @llvm.riscv.vfnmsub.mask.nxv2f16.f16(
   half,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16(<vscale x 2 x half> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f16_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -640,7 +642,7 @@ entry:
     half %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 2 x half> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsub.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half>  @intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
+define <vscale x 4 x half>  @intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 4 x half> %0,
     half %1,
     <vscale x 4 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfnmsub.mask.nxv4f16.f16(
   half,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16(<vscale x 4 x half> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f16_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -686,7 +688,7 @@ entry:
     half %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -695,9 +697,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsub.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half>  @intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
+define <vscale x 8 x half>  @intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -708,7 +710,7 @@ entry:
     <vscale x 8 x half> %0,
     half %1,
     <vscale x 8 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x half> %a
 }
@@ -718,9 +720,9 @@ declare <vscale x 8 x half> @llvm.riscv.vfnmsub.mask.nxv8f16.f16(
   half,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16(<vscale x 8 x half> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f16_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -732,7 +734,7 @@ entry:
     half %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 8 x half> %a
 }
@@ -741,9 +743,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsub.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half>  @intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
+define <vscale x 16 x half>  @intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -754,7 +756,7 @@ entry:
     <vscale x 16 x half> %0,
     half %1,
     <vscale x 16 x half> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 16 x half> %a
 }
@@ -764,9 +766,9 @@ declare <vscale x 16 x half> @llvm.riscv.vfnmsub.mask.nxv16f16.f16(
   half,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16(<vscale x 16 x half> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv16f16_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -778,7 +780,7 @@ entry:
     half %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 16 x half> %a
 }
@@ -787,9 +789,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsub.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -800,7 +802,7 @@ entry:
     <vscale x 1 x float> %0,
     float %1,
     <vscale x 1 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -810,9 +812,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfnmsub.mask.nxv1f32.f32(
   float,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32(<vscale x 1 x float> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f32_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -824,7 +826,7 @@ entry:
     float %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -833,9 +835,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsub.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -846,7 +848,7 @@ entry:
     <vscale x 2 x float> %0,
     float %1,
     <vscale x 2 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -856,9 +858,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfnmsub.mask.nxv2f32.f32(
   float,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32(<vscale x 2 x float> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f32_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -870,7 +872,7 @@ entry:
     float %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -879,9 +881,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsub.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -892,7 +894,7 @@ entry:
     <vscale x 4 x float> %0,
     float %1,
     <vscale x 4 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -902,9 +904,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfnmsub.mask.nxv4f32.f32(
   float,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32(<vscale x 4 x float> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f32_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -916,7 +918,7 @@ entry:
     float %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -925,9 +927,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsub.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -938,7 +940,7 @@ entry:
     <vscale x 8 x float> %0,
     float %1,
     <vscale x 8 x float> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -948,9 +950,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfnmsub.mask.nxv8f32.f32(
   float,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32(<vscale x 8 x float> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv8f32_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -962,7 +964,7 @@ entry:
     float %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -971,9 +973,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsub.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, i64 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -984,7 +986,7 @@ entry:
     <vscale x 1 x double> %0,
     double %1,
     <vscale x 1 x double> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -994,9 +996,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfnmsub.mask.nxv1f64.f64(
   double,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64(<vscale x 1 x double> %0, double %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv1f64_f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -1008,7 +1010,7 @@ entry:
     double %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -1017,9 +1019,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsub.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, i64 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1030,7 +1032,7 @@ entry:
     <vscale x 2 x double> %0,
     double %1,
     <vscale x 2 x double> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -1040,9 +1042,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfnmsub.mask.nxv2f64.f64(
   double,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64(<vscale x 2 x double> %0, double %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv2f64_f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -1054,7 +1056,7 @@ entry:
     double %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -1063,9 +1065,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsub.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, i64 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1076,7 +1078,7 @@ entry:
     <vscale x 4 x double> %0,
     double %1,
     <vscale x 4 x double> %2,
-    i64 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -1086,9 +1088,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfnmsub.mask.nxv4f64.f64(
   double,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64(<vscale x 4 x double> %0, double %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfnmsub_mask_vf_nxv4f64_f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -1100,7 +1102,7 @@ entry:
     double %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i64 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll
deleted file mode 100644
index ccdd6ad371896..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv64.ll
+++ /dev/null
@@ -1,677 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfrdiv_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfrdiv.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfrdiv_mask_vf_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrdiv.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrdiv.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfrdiv_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrdiv.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrdiv.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfrdiv_mask_vf_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrdiv.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrdiv.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfrdiv_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrdiv.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrdiv.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfrdiv_mask_vf_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrdiv.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrdiv.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfrdiv_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrdiv.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrdiv.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfrdiv_mask_vf_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrdiv.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrdiv.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfrdiv_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrdiv.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrdiv.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfrdiv_mask_vf_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrdiv.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrdiv.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfrdiv_vf_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrdiv.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrdiv.mask.nxv32f16.f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfrdiv_mask_vf_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrdiv.mask.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    half %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrdiv.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfrdiv_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrdiv.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrdiv.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfrdiv_mask_vf_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrdiv.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrdiv.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfrdiv_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrdiv.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrdiv.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfrdiv_mask_vf_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrdiv.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrdiv.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfrdiv_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrdiv.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrdiv.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfrdiv_mask_vf_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrdiv.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrdiv.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfrdiv_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrdiv.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrdiv.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfrdiv_mask_vf_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrdiv.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrdiv.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfrdiv_vf_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrdiv.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrdiv.mask.nxv16f32.f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfrdiv_mask_vf_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrdiv.mask.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    float %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrdiv.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfrdiv_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrdiv.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrdiv.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfrdiv_mask_vf_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrdiv.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    double %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrdiv.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfrdiv_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrdiv.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrdiv.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfrdiv_mask_vf_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrdiv.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    double %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrdiv.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfrdiv_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrdiv.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrdiv.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfrdiv_mask_vf_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrdiv.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    double %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrdiv.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfrdiv_vf_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrdiv.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrdiv.mask.nxv8f64.f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfrdiv_mask_vf_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfrdiv.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrdiv.mask.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    double %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrdiv.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfrdiv.ll
index 1d502c84b1981..58dc39b995055 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrdiv-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrdiv.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfrdiv.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfrdiv.mask.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfrdiv_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfrdiv_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfrdiv.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfrdiv_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfrdiv_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfrdiv.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfrdiv.mask.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfrdiv_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfrdiv_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfrdiv.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfrdiv_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfrdiv_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfrdiv.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfrdiv.mask.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfrdiv_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfrdiv_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfrdiv.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfrdiv_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfrdiv_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfrdiv.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfrdiv.mask.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfrdiv_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfrdiv_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfrdiv.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfrdiv_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfrdiv_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfrdiv.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfrdiv.mask.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfrdiv_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfrdiv_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -221,7 +223,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -229,9 +231,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfrdiv.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfrdiv_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfrdiv_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -241,7 +243,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfrdiv.nxv32f16.f16(
     <vscale x 32 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -251,10 +253,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfrdiv.mask.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfrdiv_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfrdiv_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -266,7 +268,7 @@ entry:
     <vscale x 32 x half> %1,
     half %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -274,9 +276,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfrdiv.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfrdiv_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfrdiv_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -286,7 +288,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfrdiv.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -296,10 +298,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfrdiv.mask.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfrdiv_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfrdiv_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -311,7 +313,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -319,9 +321,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfrdiv.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfrdiv_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfrdiv_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -331,7 +333,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfrdiv.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -341,10 +343,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfrdiv.mask.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfrdiv_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfrdiv_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -356,7 +358,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -364,9 +366,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfrdiv.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfrdiv_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfrdiv_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -376,7 +378,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfrdiv.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -386,10 +388,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfrdiv.mask.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfrdiv_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfrdiv_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -401,7 +403,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -409,9 +411,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfrdiv.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfrdiv_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfrdiv_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -421,7 +423,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfrdiv.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -431,10 +433,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfrdiv.mask.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfrdiv_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfrdiv_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -446,7 +448,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -454,9 +456,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfrdiv.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfrdiv_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfrdiv_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -466,7 +468,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfrdiv.nxv16f32.f32(
     <vscale x 16 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -476,10 +478,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfrdiv.mask.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfrdiv_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfrdiv_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -491,7 +493,7 @@ entry:
     <vscale x 16 x float> %1,
     float %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -499,9 +501,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfrdiv.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfrdiv_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfrdiv_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -511,7 +513,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfrdiv.nxv1f64.f64(
     <vscale x 1 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -521,10 +523,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfrdiv.mask.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfrdiv_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfrdiv_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -536,7 +538,7 @@ entry:
     <vscale x 1 x double> %1,
     double %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -544,9 +546,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfrdiv.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfrdiv_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfrdiv_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -556,7 +558,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfrdiv.nxv2f64.f64(
     <vscale x 2 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -566,10 +568,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfrdiv.mask.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfrdiv_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfrdiv_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -581,7 +583,7 @@ entry:
     <vscale x 2 x double> %1,
     double %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -589,9 +591,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfrdiv.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfrdiv_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfrdiv_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -601,7 +603,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfrdiv.nxv4f64.f64(
     <vscale x 4 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -611,10 +613,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfrdiv.mask.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfrdiv_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfrdiv_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -626,7 +628,7 @@ entry:
     <vscale x 4 x double> %1,
     double %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -634,9 +636,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfrdiv.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfrdiv_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfrdiv_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -646,7 +648,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfrdiv.nxv8f64.f64(
     <vscale x 8 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -656,10 +658,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfrdiv.mask.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfrdiv_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfrdiv_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrdiv_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -671,7 +673,7 @@ entry:
     <vscale x 8 x double> %1,
     double %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll
deleted file mode 100644
index 4e0fe7b9fc62f..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv64.ll
+++ /dev/null
@@ -1,617 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfrec7.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfrec7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrec7.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfrec7.mask.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrec7.mask.nxv1f16(
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrec7.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfrec7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrec7.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrec7.mask.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrec7.mask.nxv2f16(
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrec7.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfrec7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrec7.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrec7.mask.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrec7.mask.nxv4f16(
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrec7.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfrec7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrec7.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrec7.mask.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrec7.mask.nxv8f16(
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrec7.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfrec7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrec7.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrec7.mask.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrec7.mask.nxv16f16(
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrec7.nxv32f16(
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfrec7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrec7.nxv32f16(
-    <vscale x 32 x half> %0,
-    i64 %1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrec7.mask.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrec7.mask.nxv32f16(
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrec7.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfrec7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrec7.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrec7.mask.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrec7.mask.nxv1f32(
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfrec7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrec7.mask.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.mask.nxv2f32(
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrec7.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfrec7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrec7.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrec7.mask.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrec7.mask.nxv4f32(
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrec7.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfrec7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrec7.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrec7.mask.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrec7.mask.nxv8f32(
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrec7.nxv16f32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfrec7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrec7.nxv16f32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrec7.mask.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrec7.mask.nxv16f32(
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrec7.nxv1f64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfrec7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrec7.nxv1f64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrec7.mask.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrec7.mask.nxv1f64(
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrec7.nxv2f64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfrec7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrec7.nxv2f64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrec7.mask.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrec7.mask.nxv2f64(
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrec7.nxv4f64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfrec7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrec7.nxv4f64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrec7.mask.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrec7.mask.nxv4f64(
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrec7.nxv8f64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfrec7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrec7.nxv8f64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrec7.mask.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfrec7.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrec7.mask.nxv8f64(
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrec7.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfrec7.ll
index 30897b95deea2..3be9f912d7f2a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrec7-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrec7.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrec7.nxv1f16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfrec7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+define <vscale x 1 x half> @intrinsic_vfrec7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -14,7 +16,7 @@ define <vscale x 1 x half> @intrinsic_vfrec7_v_nxv1f16_nxv1f16(<vscale x 1 x hal
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfrec7.nxv1f16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x half> %a
 }
@@ -23,10 +25,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfrec7.mask.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half> @intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -37,16 +39,16 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vfrec7.nxv2f16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfrec7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+define <vscale x 2 x half> @intrinsic_vfrec7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -55,7 +57,7 @@ define <vscale x 2 x half> @intrinsic_vfrec7_v_nxv2f16_nxv2f16(<vscale x 2 x hal
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfrec7.nxv2f16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x half> %a
 }
@@ -64,10 +66,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfrec7.mask.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half> @intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -78,16 +80,16 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vfrec7.nxv4f16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfrec7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+define <vscale x 4 x half> @intrinsic_vfrec7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -96,7 +98,7 @@ define <vscale x 4 x half> @intrinsic_vfrec7_v_nxv4f16_nxv4f16(<vscale x 4 x hal
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfrec7.nxv4f16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x half> %a
 }
@@ -105,10 +107,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfrec7.mask.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -119,16 +121,16 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vfrec7.nxv8f16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfrec7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+define <vscale x 8 x half> @intrinsic_vfrec7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -137,7 +139,7 @@ define <vscale x 8 x half> @intrinsic_vfrec7_v_nxv8f16_nxv8f16(<vscale x 8 x hal
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfrec7.nxv8f16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x half> %a
 }
@@ -146,10 +148,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfrec7.mask.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half> @intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -160,16 +162,16 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vfrec7.nxv16f16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfrec7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+define <vscale x 16 x half> @intrinsic_vfrec7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -178,7 +180,7 @@ define <vscale x 16 x half> @intrinsic_vfrec7_v_nxv16f16_nxv16f16(<vscale x 16 x
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfrec7.nxv16f16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x half> %a
 }
@@ -187,10 +189,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfrec7.mask.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half> @intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -201,16 +203,16 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vfrec7.nxv32f16(
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfrec7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
+define <vscale x 32 x half> @intrinsic_vfrec7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -219,7 +221,7 @@ define <vscale x 32 x half> @intrinsic_vfrec7_v_nxv32f16_nxv32f16(<vscale x 32 x
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfrec7.nxv32f16(
     <vscale x 32 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x half> %a
 }
@@ -228,10 +230,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfrec7.mask.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, i32 %3) nounwind {
+define <vscale x 32 x half> @intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -242,16 +244,16 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 32 x half> %2,
     <vscale x 32 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vfrec7.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfrec7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vfrec7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -260,7 +262,7 @@ define <vscale x 1 x float> @intrinsic_vfrec7_v_nxv1f32_nxv1f32(<vscale x 1 x fl
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfrec7.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x float> %a
 }
@@ -269,10 +271,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfrec7.mask.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -283,16 +285,16 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfrec7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vfrec7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -301,7 +303,7 @@ define <vscale x 2 x float> @intrinsic_vfrec7_v_nxv2f32_nxv2f32(<vscale x 2 x fl
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfrec7.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x float> %a
 }
@@ -310,10 +312,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfrec7.mask.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -324,16 +326,16 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vfrec7.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfrec7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vfrec7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -342,7 +344,7 @@ define <vscale x 4 x float> @intrinsic_vfrec7_v_nxv4f32_nxv4f32(<vscale x 4 x fl
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfrec7.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x float> %a
 }
@@ -351,10 +353,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfrec7.mask.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -365,16 +367,16 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vfrec7.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfrec7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vfrec7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -383,7 +385,7 @@ define <vscale x 8 x float> @intrinsic_vfrec7_v_nxv8f32_nxv8f32(<vscale x 8 x fl
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfrec7.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x float> %a
 }
@@ -392,10 +394,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfrec7.mask.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -406,16 +408,16 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vfrec7.nxv16f32(
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfrec7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
+define <vscale x 16 x float> @intrinsic_vfrec7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -424,7 +426,7 @@ define <vscale x 16 x float> @intrinsic_vfrec7_v_nxv16f32_nxv16f32(<vscale x 16
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfrec7.nxv16f32(
     <vscale x 16 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x float> %a
 }
@@ -433,10 +435,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfrec7.mask.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, i32 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -447,16 +449,16 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 16 x float> %2,
     <vscale x 16 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vfrec7.nxv1f64(
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfrec7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
+define <vscale x 1 x double> @intrinsic_vfrec7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -465,7 +467,7 @@ define <vscale x 1 x double> @intrinsic_vfrec7_v_nxv1f64_nxv1f64(<vscale x 1 x d
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfrec7.nxv1f64(
     <vscale x 1 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x double> %a
 }
@@ -474,10 +476,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfrec7.mask.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -488,16 +490,16 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vfrec7.nxv2f64(
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfrec7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
+define <vscale x 2 x double> @intrinsic_vfrec7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -506,7 +508,7 @@ define <vscale x 2 x double> @intrinsic_vfrec7_v_nxv2f64_nxv2f64(<vscale x 2 x d
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfrec7.nxv2f64(
     <vscale x 2 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x double> %a
 }
@@ -515,10 +517,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfrec7.mask.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -529,16 +531,16 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vfrec7.nxv4f64(
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfrec7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
+define <vscale x 4 x double> @intrinsic_vfrec7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -547,7 +549,7 @@ define <vscale x 4 x double> @intrinsic_vfrec7_v_nxv4f64_nxv4f64(<vscale x 4 x d
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfrec7.nxv4f64(
     <vscale x 4 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x double> %a
 }
@@ -556,10 +558,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfrec7.mask.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -570,16 +572,16 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vfrec7.nxv8f64(
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfrec7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
+define <vscale x 8 x double> @intrinsic_vfrec7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_v_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -588,7 +590,7 @@ define <vscale x 8 x double> @intrinsic_vfrec7_v_nxv8f64_nxv8f64(<vscale x 8 x d
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfrec7.nxv8f64(
     <vscale x 8 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x double> %a
 }
@@ -597,10 +599,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfrec7.mask.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, i32 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrec7_mask_v_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -611,7 +613,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 8 x double> %2,
     <vscale x 8 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll
deleted file mode 100644
index bb173e91ecf31..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv64.ll
+++ /dev/null
@@ -1,692 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv1f16(
-  <vscale x 4 x half>,
-  <vscale x 1 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv1f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv1f16(
-    <vscale x 4 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv1f16(
-  <vscale x 4 x half>,
-  <vscale x 1 x half>,
-  <vscale x 4 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv1f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv1f16(
-    <vscale x 4 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv2f16(
-  <vscale x 4 x half>,
-  <vscale x 2 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv2f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv2f16(
-    <vscale x 4 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv2f16(
-  <vscale x 4 x half>,
-  <vscale x 2 x half>,
-  <vscale x 4 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv2f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv2f16(
-    <vscale x 4 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv8f16(
-  <vscale x 4 x half>,
-  <vscale x 8 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv8f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv8f16(
-    <vscale x 4 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv8f16(
-  <vscale x 4 x half>,
-  <vscale x 8 x half>,
-  <vscale x 4 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv8f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv8f16(
-    <vscale x 4 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv16f16(
-  <vscale x 4 x half>,
-  <vscale x 16 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv16f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv16f16(
-    <vscale x 4 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv16f16(
-  <vscale x 4 x half>,
-  <vscale x 16 x half>,
-  <vscale x 4 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv16f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv16f16(
-    <vscale x 4 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv32f16(
-  <vscale x 4 x half>,
-  <vscale x 32 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv32f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv32f16(
-    <vscale x 4 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv32f16(
-  <vscale x 4 x half>,
-  <vscale x 32 x half>,
-  <vscale x 4 x half>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv32f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv32f16(
-    <vscale x 4 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 32 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv1f32(
-  <vscale x 2 x float>,
-  <vscale x 1 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv1f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv1f32(
-    <vscale x 2 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv1f32(
-  <vscale x 2 x float>,
-  <vscale x 1 x float>,
-  <vscale x 2 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv1f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv1f32(
-    <vscale x 2 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv4f32(
-  <vscale x 2 x float>,
-  <vscale x 4 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv4f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv4f32(
-    <vscale x 2 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv4f32(
-  <vscale x 2 x float>,
-  <vscale x 4 x float>,
-  <vscale x 2 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv4f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv4f32(
-    <vscale x 2 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv8f32(
-  <vscale x 2 x float>,
-  <vscale x 8 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv8f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv8f32(
-    <vscale x 2 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv8f32(
-  <vscale x 2 x float>,
-  <vscale x 8 x float>,
-  <vscale x 2 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv8f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv8f32(
-    <vscale x 2 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv16f32(
-  <vscale x 2 x float>,
-  <vscale x 16 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv16f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv16f32(
-    <vscale x 2 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv16f32(
-  <vscale x 2 x float>,
-  <vscale x 16 x float>,
-  <vscale x 2 x float>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv16f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv16f32(
-    <vscale x 2 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmax.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmax_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmax.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmax.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmax_mask_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmax.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmax.nxv1f64.nxv2f64(
-  <vscale x 1 x double>,
-  <vscale x 2 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmax_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv2f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmax.nxv1f64.nxv2f64(
-    <vscale x 1 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmax.mask.nxv1f64.nxv2f64(
-  <vscale x 1 x double>,
-  <vscale x 2 x double>,
-  <vscale x 1 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmax_mask_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv2f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmax.mask.nxv1f64.nxv2f64(
-    <vscale x 1 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmax.nxv1f64.nxv4f64(
-  <vscale x 1 x double>,
-  <vscale x 4 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmax_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv4f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmax.nxv1f64.nxv4f64(
-    <vscale x 1 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmax.mask.nxv1f64.nxv4f64(
-  <vscale x 1 x double>,
-  <vscale x 4 x double>,
-  <vscale x 1 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmax_mask_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv4f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmax.mask.nxv1f64.nxv4f64(
-    <vscale x 1 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmax.nxv1f64.nxv8f64(
-  <vscale x 1 x double>,
-  <vscale x 8 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmax_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv8f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmax.nxv1f64.nxv8f64(
-    <vscale x 1 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmax.mask.nxv1f64.nxv8f64(
-  <vscale x 1 x double>,
-  <vscale x 8 x double>,
-  <vscale x 1 x double>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmax_mask_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv8f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
-; CHECK-NEXT:    vfredmax.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmax.mask.nxv1f64.nxv8f64(
-    <vscale x 1 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmax.ll
similarity index 88%
rename from llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfredmax.ll
index 25ed3f1ab3676..0a2d72bf382aa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfredmax-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfredmax.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv1f16(
   <vscale x 4 x half>,
   <vscale x 1 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv1f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv1f16.nxv1i1(
   <vscale x 1 x half>,
   <vscale x 4 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv1f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv2f16(
   <vscale x 4 x half>,
   <vscale x 2 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv2f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv2f16.nxv2i1(
   <vscale x 2 x half>,
   <vscale x 4 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv2f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv4f16.nxv4i1(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv8f16(
   <vscale x 4 x half>,
   <vscale x 8 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv8f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv8f16.nxv8i1(
   <vscale x 8 x half>,
   <vscale x 4 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv8f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv16f16(
   <vscale x 4 x half>,
   <vscale x 16 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv16f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv16f16.nxv16i1(
   <vscale x 16 x half>,
   <vscale x 4 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv16f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmax.nxv4f16.nxv32f16(
   <vscale x 4 x half>,
   <vscale x 32 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmax_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv4f16_nxv32f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 32 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmax.mask.nxv4f16.nxv32f16.nxv32i1(
   <vscale x 32 x half>,
   <vscale x 4 x half>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmax_mask_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv4f16_nxv32f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 32 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv1f32(
   <vscale x 2 x float>,
   <vscale x 1 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv1f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv1f32.nxv1i1(
   <vscale x 1 x float>,
   <vscale x 2 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv1f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv2f32.nxv2i1(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv4f32(
   <vscale x 2 x float>,
   <vscale x 4 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv4f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv4f32.nxv4i1(
   <vscale x 4 x float>,
   <vscale x 2 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv4f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv8f32(
   <vscale x 2 x float>,
   <vscale x 8 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv8f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv8f32.nxv8i1(
   <vscale x 8 x float>,
   <vscale x 2 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv8f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmax.nxv2f32.nxv16f32(
   <vscale x 2 x float>,
   <vscale x 16 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmax_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv2f32_nxv16f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 16 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmax.mask.nxv2f32.nxv16f32.nxv16i1(
   <vscale x 16 x float>,
   <vscale x 2 x float>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmax_mask_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv2f32_nxv16f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmax.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmax_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmax_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmax.mask.nxv1f64.nxv1f64.nxv1i1(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmax_mask_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmax_mask_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmax.nxv1f64.nxv2f64(
   <vscale x 1 x double>,
   <vscale x 2 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmax_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmax_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv2f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmax.mask.nxv1f64.nxv2f64.nxv2i1(
   <vscale x 2 x double>,
   <vscale x 1 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmax_mask_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmax_mask_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv2f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -594,7 +596,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmax.nxv1f64.nxv4f64(
   <vscale x 1 x double>,
   <vscale x 4 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmax_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmax_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv4f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmax.mask.nxv1f64.nxv4f64.nxv4i1(
   <vscale x 4 x double>,
   <vscale x 1 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmax_mask_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmax_mask_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv4f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -640,7 +642,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmax.nxv1f64.nxv8f64(
   <vscale x 1 x double>,
   <vscale x 8 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmax_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmax_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_vs_nxv1f64_nxv8f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 8 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmax.mask.nxv1f64.nxv8f64.nxv8i1(
   <vscale x 8 x double>,
   <vscale x 1 x double>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmax_mask_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmax_mask_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmax_mask_vs_nxv1f64_nxv8f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
@@ -686,7 +688,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll
deleted file mode 100644
index d04ef7a6707de..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv64.ll
+++ /dev/null
@@ -1,692 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv1f16(
-  <vscale x 4 x half>,
-  <vscale x 1 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv1f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv1f16(
-    <vscale x 4 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv1f16(
-  <vscale x 4 x half>,
-  <vscale x 1 x half>,
-  <vscale x 4 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv1f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv1f16(
-    <vscale x 4 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv2f16(
-  <vscale x 4 x half>,
-  <vscale x 2 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv2f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv2f16(
-    <vscale x 4 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv2f16(
-  <vscale x 4 x half>,
-  <vscale x 2 x half>,
-  <vscale x 4 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv2f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv2f16(
-    <vscale x 4 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv8f16(
-  <vscale x 4 x half>,
-  <vscale x 8 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv8f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv8f16(
-    <vscale x 4 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv8f16(
-  <vscale x 4 x half>,
-  <vscale x 8 x half>,
-  <vscale x 4 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv8f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv8f16(
-    <vscale x 4 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv16f16(
-  <vscale x 4 x half>,
-  <vscale x 16 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv16f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv16f16(
-    <vscale x 4 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv16f16(
-  <vscale x 4 x half>,
-  <vscale x 16 x half>,
-  <vscale x 4 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv16f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv16f16(
-    <vscale x 4 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv32f16(
-  <vscale x 4 x half>,
-  <vscale x 32 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv32f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv32f16(
-    <vscale x 4 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv32f16(
-  <vscale x 4 x half>,
-  <vscale x 32 x half>,
-  <vscale x 4 x half>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv32f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv32f16(
-    <vscale x 4 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 32 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv1f32(
-  <vscale x 2 x float>,
-  <vscale x 1 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv1f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv1f32(
-    <vscale x 2 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv1f32(
-  <vscale x 2 x float>,
-  <vscale x 1 x float>,
-  <vscale x 2 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv1f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv1f32(
-    <vscale x 2 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv4f32(
-  <vscale x 2 x float>,
-  <vscale x 4 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv4f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv4f32(
-    <vscale x 2 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv4f32(
-  <vscale x 2 x float>,
-  <vscale x 4 x float>,
-  <vscale x 2 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv4f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv4f32(
-    <vscale x 2 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv8f32(
-  <vscale x 2 x float>,
-  <vscale x 8 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv8f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv8f32(
-    <vscale x 2 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv8f32(
-  <vscale x 2 x float>,
-  <vscale x 8 x float>,
-  <vscale x 2 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv8f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv8f32(
-    <vscale x 2 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv16f32(
-  <vscale x 2 x float>,
-  <vscale x 16 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv16f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv16f32(
-    <vscale x 2 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv16f32(
-  <vscale x 2 x float>,
-  <vscale x 16 x float>,
-  <vscale x 2 x float>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv16f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv16f32(
-    <vscale x 2 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmin.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmin_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmin.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmin.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmin_mask_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmin.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmin.nxv1f64.nxv2f64(
-  <vscale x 1 x double>,
-  <vscale x 2 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmin_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv2f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmin.nxv1f64.nxv2f64(
-    <vscale x 1 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmin.mask.nxv1f64.nxv2f64(
-  <vscale x 1 x double>,
-  <vscale x 2 x double>,
-  <vscale x 1 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmin_mask_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv2f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmin.mask.nxv1f64.nxv2f64(
-    <vscale x 1 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmin.nxv1f64.nxv4f64(
-  <vscale x 1 x double>,
-  <vscale x 4 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmin_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv4f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmin.nxv1f64.nxv4f64(
-    <vscale x 1 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmin.mask.nxv1f64.nxv4f64(
-  <vscale x 1 x double>,
-  <vscale x 4 x double>,
-  <vscale x 1 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmin_mask_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv4f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmin.mask.nxv1f64.nxv4f64(
-    <vscale x 1 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmin.nxv1f64.nxv8f64(
-  <vscale x 1 x double>,
-  <vscale x 8 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmin_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv8f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmin.nxv1f64.nxv8f64(
-    <vscale x 1 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredmin.mask.nxv1f64.nxv8f64(
-  <vscale x 1 x double>,
-  <vscale x 8 x double>,
-  <vscale x 1 x double>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredmin_mask_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv8f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
-; CHECK-NEXT:    vfredmin.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredmin.mask.nxv1f64.nxv8f64(
-    <vscale x 1 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredmin.ll
similarity index 88%
rename from llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfredmin.ll
index 9561be7b6fc09..4d0301d3485ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfredmin-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfredmin.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv1f16(
   <vscale x 4 x half>,
   <vscale x 1 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv1f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv1f16.nxv1i1(
   <vscale x 1 x half>,
   <vscale x 4 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv1f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv2f16(
   <vscale x 4 x half>,
   <vscale x 2 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv2f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv2f16.nxv2i1(
   <vscale x 2 x half>,
   <vscale x 4 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv2f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv4f16.nxv4i1(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv8f16(
   <vscale x 4 x half>,
   <vscale x 8 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv8f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv8f16.nxv8i1(
   <vscale x 8 x half>,
   <vscale x 4 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv8f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv16f16(
   <vscale x 4 x half>,
   <vscale x 16 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv16f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv16f16.nxv16i1(
   <vscale x 16 x half>,
   <vscale x 4 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv16f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmin.nxv4f16.nxv32f16(
   <vscale x 4 x half>,
   <vscale x 32 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmin_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv4f16_nxv32f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 32 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredmin.mask.nxv4f16.nxv32f16.nxv32i1(
   <vscale x 32 x half>,
   <vscale x 4 x half>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredmin_mask_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv4f16_nxv32f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 32 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv1f32(
   <vscale x 2 x float>,
   <vscale x 1 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv1f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv1f32.nxv1i1(
   <vscale x 1 x float>,
   <vscale x 2 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv1f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv2f32.nxv2i1(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv4f32(
   <vscale x 2 x float>,
   <vscale x 4 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv4f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv4f32.nxv4i1(
   <vscale x 4 x float>,
   <vscale x 2 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv4f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv8f32(
   <vscale x 2 x float>,
   <vscale x 8 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv8f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv8f32.nxv8i1(
   <vscale x 8 x float>,
   <vscale x 2 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv8f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmin.nxv2f32.nxv16f32(
   <vscale x 2 x float>,
   <vscale x 16 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmin_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv2f32_nxv16f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 16 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredmin.mask.nxv2f32.nxv16f32.nxv16i1(
   <vscale x 16 x float>,
   <vscale x 2 x float>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredmin_mask_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv2f32_nxv16f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmin.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmin_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmin_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmin.mask.nxv1f64.nxv1f64.nxv1i1(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmin_mask_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmin_mask_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmin.nxv1f64.nxv2f64(
   <vscale x 1 x double>,
   <vscale x 2 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmin_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmin_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv2f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmin.mask.nxv1f64.nxv2f64.nxv2i1(
   <vscale x 2 x double>,
   <vscale x 1 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmin_mask_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmin_mask_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv2f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -594,7 +596,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmin.nxv1f64.nxv4f64(
   <vscale x 1 x double>,
   <vscale x 4 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmin_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmin_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv4f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmin.mask.nxv1f64.nxv4f64.nxv4i1(
   <vscale x 4 x double>,
   <vscale x 1 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmin_mask_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmin_mask_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv4f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -640,7 +642,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmin.nxv1f64.nxv8f64(
   <vscale x 1 x double>,
   <vscale x 8 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmin_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmin_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_vs_nxv1f64_nxv8f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 8 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredmin.mask.nxv1f64.nxv8f64.nxv8i1(
   <vscale x 8 x double>,
   <vscale x 1 x double>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredmin_mask_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredmin_mask_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredmin_mask_vs_nxv1f64_nxv8f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
@@ -686,7 +688,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll
deleted file mode 100644
index 8c42e43d9094d..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv64.ll
+++ /dev/null
@@ -1,692 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv1f16(
-  <vscale x 4 x half>,
-  <vscale x 1 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv1f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv1f16(
-    <vscale x 4 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv1f16(
-  <vscale x 4 x half>,
-  <vscale x 1 x half>,
-  <vscale x 4 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv1f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv1f16(
-    <vscale x 4 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv2f16(
-  <vscale x 4 x half>,
-  <vscale x 2 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv2f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv2f16(
-    <vscale x 4 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv2f16(
-  <vscale x 4 x half>,
-  <vscale x 2 x half>,
-  <vscale x 4 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv2f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv2f16(
-    <vscale x 4 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv8f16(
-  <vscale x 4 x half>,
-  <vscale x 8 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv8f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv8f16(
-    <vscale x 4 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv8f16(
-  <vscale x 4 x half>,
-  <vscale x 8 x half>,
-  <vscale x 4 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv8f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv8f16(
-    <vscale x 4 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv16f16(
-  <vscale x 4 x half>,
-  <vscale x 16 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv16f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv16f16(
-    <vscale x 4 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv16f16(
-  <vscale x 4 x half>,
-  <vscale x 16 x half>,
-  <vscale x 4 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv16f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv16f16(
-    <vscale x 4 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv32f16(
-  <vscale x 4 x half>,
-  <vscale x 32 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv32f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv32f16(
-    <vscale x 4 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv32f16(
-  <vscale x 4 x half>,
-  <vscale x 32 x half>,
-  <vscale x 4 x half>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv32f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv32f16(
-    <vscale x 4 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 32 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv1f32(
-  <vscale x 2 x float>,
-  <vscale x 1 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv1f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv1f32(
-    <vscale x 2 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv1f32(
-  <vscale x 2 x float>,
-  <vscale x 1 x float>,
-  <vscale x 2 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv1f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv1f32(
-    <vscale x 2 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv4f32(
-  <vscale x 2 x float>,
-  <vscale x 4 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv4f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv4f32(
-    <vscale x 2 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv4f32(
-  <vscale x 2 x float>,
-  <vscale x 4 x float>,
-  <vscale x 2 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv4f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv4f32(
-    <vscale x 2 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv8f32(
-  <vscale x 2 x float>,
-  <vscale x 8 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv8f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv8f32(
-    <vscale x 2 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv8f32(
-  <vscale x 2 x float>,
-  <vscale x 8 x float>,
-  <vscale x 2 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv8f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv8f32(
-    <vscale x 2 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv16f32(
-  <vscale x 2 x float>,
-  <vscale x 16 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv16f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv16f32(
-    <vscale x 2 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv16f32(
-  <vscale x 2 x float>,
-  <vscale x 16 x float>,
-  <vscale x 2 x float>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv16f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv16f32(
-    <vscale x 2 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredosum.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredosum_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredosum.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredosum.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredosum_mask_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredosum.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredosum.nxv1f64.nxv2f64(
-  <vscale x 1 x double>,
-  <vscale x 2 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredosum_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv2f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredosum.nxv1f64.nxv2f64(
-    <vscale x 1 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredosum.mask.nxv1f64.nxv2f64(
-  <vscale x 1 x double>,
-  <vscale x 2 x double>,
-  <vscale x 1 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredosum_mask_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv2f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredosum.mask.nxv1f64.nxv2f64(
-    <vscale x 1 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredosum.nxv1f64.nxv4f64(
-  <vscale x 1 x double>,
-  <vscale x 4 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredosum_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv4f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredosum.nxv1f64.nxv4f64(
-    <vscale x 1 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredosum.mask.nxv1f64.nxv4f64(
-  <vscale x 1 x double>,
-  <vscale x 4 x double>,
-  <vscale x 1 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredosum_mask_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv4f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredosum.mask.nxv1f64.nxv4f64(
-    <vscale x 1 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredosum.nxv1f64.nxv8f64(
-  <vscale x 1 x double>,
-  <vscale x 8 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredosum_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv8f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredosum.nxv1f64.nxv8f64(
-    <vscale x 1 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredosum.mask.nxv1f64.nxv8f64(
-  <vscale x 1 x double>,
-  <vscale x 8 x double>,
-  <vscale x 1 x double>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredosum_mask_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv8f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
-; CHECK-NEXT:    vfredosum.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredosum.mask.nxv1f64.nxv8f64(
-    <vscale x 1 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredosum.ll
similarity index 88%
rename from llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfredosum.ll
index 1f1e68e0dbc9b..b814315d90cd1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfredosum-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfredosum.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv1f16(
   <vscale x 4 x half>,
   <vscale x 1 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv1f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv1f16.nxv1i1(
   <vscale x 1 x half>,
   <vscale x 4 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv1f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv2f16(
   <vscale x 4 x half>,
   <vscale x 2 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv2f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv2f16.nxv2i1(
   <vscale x 2 x half>,
   <vscale x 4 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv2f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv4f16.nxv4i1(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv8f16(
   <vscale x 4 x half>,
   <vscale x 8 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv8f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv8f16.nxv8i1(
   <vscale x 8 x half>,
   <vscale x 4 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv8f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv16f16(
   <vscale x 4 x half>,
   <vscale x 16 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv16f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv16f16.nxv16i1(
   <vscale x 16 x half>,
   <vscale x 4 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv16f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredosum.nxv4f16.nxv32f16(
   <vscale x 4 x half>,
   <vscale x 32 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredosum_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv4f16_nxv32f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 32 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredosum.mask.nxv4f16.nxv32f16.nxv32i1(
   <vscale x 32 x half>,
   <vscale x 4 x half>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredosum_mask_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv4f16_nxv32f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 32 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv1f32(
   <vscale x 2 x float>,
   <vscale x 1 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv1f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv1f32.nxv1i1(
   <vscale x 1 x float>,
   <vscale x 2 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv1f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv2f32.nxv2i1(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv4f32(
   <vscale x 2 x float>,
   <vscale x 4 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv4f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv4f32.nxv4i1(
   <vscale x 4 x float>,
   <vscale x 2 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv4f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv8f32(
   <vscale x 2 x float>,
   <vscale x 8 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv8f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv8f32.nxv8i1(
   <vscale x 8 x float>,
   <vscale x 2 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv8f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredosum.nxv2f32.nxv16f32(
   <vscale x 2 x float>,
   <vscale x 16 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredosum_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv2f32_nxv16f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 16 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredosum.mask.nxv2f32.nxv16f32.nxv16i1
   <vscale x 16 x float>,
   <vscale x 2 x float>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredosum_mask_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv2f32_nxv16f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredosum.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredosum_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredosum_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredosum.mask.nxv1f64.nxv1f64.nxv1i1(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredosum_mask_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredosum_mask_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredosum.nxv1f64.nxv2f64(
   <vscale x 1 x double>,
   <vscale x 2 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredosum_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredosum_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv2f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredosum.mask.nxv1f64.nxv2f64.nxv2i1(
   <vscale x 2 x double>,
   <vscale x 1 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredosum_mask_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredosum_mask_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv2f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -594,7 +596,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredosum.nxv1f64.nxv4f64(
   <vscale x 1 x double>,
   <vscale x 4 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredosum_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredosum_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv4f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredosum.mask.nxv1f64.nxv4f64.nxv4i1(
   <vscale x 4 x double>,
   <vscale x 1 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredosum_mask_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredosum_mask_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv4f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -640,7 +642,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredosum.nxv1f64.nxv8f64(
   <vscale x 1 x double>,
   <vscale x 8 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredosum_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredosum_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_vs_nxv1f64_nxv8f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 8 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredosum.mask.nxv1f64.nxv8f64.nxv8i1(
   <vscale x 8 x double>,
   <vscale x 1 x double>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredosum_mask_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredosum_mask_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredosum_mask_vs_nxv1f64_nxv8f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
@@ -686,7 +688,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll
deleted file mode 100644
index 9264397afa0e7..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv64.ll
+++ /dev/null
@@ -1,692 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv1f16(
-  <vscale x 4 x half>,
-  <vscale x 1 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv1f16(
-    <vscale x 4 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv1f16(
-  <vscale x 4 x half>,
-  <vscale x 1 x half>,
-  <vscale x 4 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv1f16(
-    <vscale x 4 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv2f16(
-  <vscale x 4 x half>,
-  <vscale x 2 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv2f16(
-    <vscale x 4 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv2f16(
-  <vscale x 4 x half>,
-  <vscale x 2 x half>,
-  <vscale x 4 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv2f16(
-    <vscale x 4 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv8f16(
-  <vscale x 4 x half>,
-  <vscale x 8 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv8f16(
-    <vscale x 4 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv8f16(
-  <vscale x 4 x half>,
-  <vscale x 8 x half>,
-  <vscale x 4 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv8f16(
-    <vscale x 4 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv16f16(
-  <vscale x 4 x half>,
-  <vscale x 16 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv16f16(
-    <vscale x 4 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv16f16(
-  <vscale x 4 x half>,
-  <vscale x 16 x half>,
-  <vscale x 4 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv16f16(
-    <vscale x 4 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv32f16(
-  <vscale x 4 x half>,
-  <vscale x 32 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv32f16(
-    <vscale x 4 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv32f16(
-  <vscale x 4 x half>,
-  <vscale x 32 x half>,
-  <vscale x 4 x half>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv32f16(
-    <vscale x 4 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 32 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv1f32(
-  <vscale x 2 x float>,
-  <vscale x 1 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv1f32(
-    <vscale x 2 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv1f32(
-  <vscale x 2 x float>,
-  <vscale x 1 x float>,
-  <vscale x 2 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv1f32(
-    <vscale x 2 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv4f32(
-  <vscale x 2 x float>,
-  <vscale x 4 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv4f32(
-    <vscale x 2 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv4f32(
-  <vscale x 2 x float>,
-  <vscale x 4 x float>,
-  <vscale x 2 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv4f32(
-    <vscale x 2 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv8f32(
-  <vscale x 2 x float>,
-  <vscale x 8 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv8f32(
-    <vscale x 2 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv8f32(
-  <vscale x 2 x float>,
-  <vscale x 8 x float>,
-  <vscale x 2 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv8f32(
-    <vscale x 2 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv16f32(
-  <vscale x 2 x float>,
-  <vscale x 16 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv16f32(
-    <vscale x 2 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv16f32(
-  <vscale x 2 x float>,
-  <vscale x 16 x float>,
-  <vscale x 2 x float>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv16f32(
-    <vscale x 2 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredusum.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredusum.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredusum.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredusum.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredusum.nxv1f64.nxv2f64(
-  <vscale x 1 x double>,
-  <vscale x 2 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredusum.nxv1f64.nxv2f64(
-    <vscale x 1 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredusum.mask.nxv1f64.nxv2f64(
-  <vscale x 1 x double>,
-  <vscale x 2 x double>,
-  <vscale x 1 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredusum.mask.nxv1f64.nxv2f64(
-    <vscale x 1 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredusum.nxv1f64.nxv4f64(
-  <vscale x 1 x double>,
-  <vscale x 4 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredusum.nxv1f64.nxv4f64(
-    <vscale x 1 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredusum.mask.nxv1f64.nxv4f64(
-  <vscale x 1 x double>,
-  <vscale x 4 x double>,
-  <vscale x 1 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredusum.mask.nxv1f64.nxv4f64(
-    <vscale x 1 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredusum.nxv1f64.nxv8f64(
-  <vscale x 1 x double>,
-  <vscale x 8 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredusum.nxv1f64.nxv8f64(
-    <vscale x 1 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfredusum.mask.nxv1f64.nxv8f64(
-  <vscale x 1 x double>,
-  <vscale x 8 x double>,
-  <vscale x 1 x double>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
-; CHECK-NEXT:    vfredusum.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfredusum.mask.nxv1f64.nxv8f64(
-    <vscale x 1 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfredusum.ll
similarity index 88%
rename from llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfredusum.ll
index 7b3691c74887c..e6ff649ab398c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfredusum-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfredusum.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv1f16(
   <vscale x 4 x half>,
   <vscale x 1 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv1f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv1f16.nxv1i1(
   <vscale x 1 x half>,
   <vscale x 4 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 1 x half> %1, <vscale x 4 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv1f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv2f16(
   <vscale x 4 x half>,
   <vscale x 2 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv2f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv2f16.nxv2i1(
   <vscale x 2 x half>,
   <vscale x 4 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 2 x half> %1, <vscale x 4 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv2f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv4f16.nxv4i1(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv8f16(
   <vscale x 4 x half>,
   <vscale x 8 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv8f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv8f16.nxv8i1(
   <vscale x 8 x half>,
   <vscale x 4 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 8 x half> %1, <vscale x 4 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv8f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv16f16(
   <vscale x 4 x half>,
   <vscale x 16 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv16f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv16f16.nxv16i1(
   <vscale x 16 x half>,
   <vscale x 4 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 16 x half> %1, <vscale x 4 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv16f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredusum.nxv4f16.nxv32f16(
   <vscale x 4 x half>,
   <vscale x 32 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv4f16_nxv32f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 32 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x half> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 4 x half> @llvm.riscv.vfredusum.mask.nxv4f16.nxv32f16.nxv32i1(
   <vscale x 32 x half>,
   <vscale x 4 x half>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 32 x half> %1, <vscale x 4 x half> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv4f16_nxv32f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 32 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x half> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv1f32(
   <vscale x 2 x float>,
   <vscale x 1 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv1f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv1f32.nxv1i1(
   <vscale x 1 x float>,
   <vscale x 2 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x float> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv1f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv2f32.nxv2i1(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv4f32(
   <vscale x 2 x float>,
   <vscale x 4 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv4f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv4f32.nxv4i1(
   <vscale x 4 x float>,
   <vscale x 2 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x float> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv4f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv8f32(
   <vscale x 2 x float>,
   <vscale x 8 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv8f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv8f32.nxv8i1(
   <vscale x 8 x float>,
   <vscale x 2 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x float> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv8f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredusum.nxv2f32.nxv16f32(
   <vscale x 2 x float>,
   <vscale x 16 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv2f32_nxv16f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 16 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfredusum.mask.nxv2f32.nxv16f32.nxv16i1
   <vscale x 16 x float>,
   <vscale x 2 x float>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x float> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv2f32_nxv16f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredusum.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredusum.mask.nxv1f64.nxv1f64.nxv1i1(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredusum.nxv1f64.nxv2f64(
   <vscale x 1 x double>,
   <vscale x 2 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv2f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredusum.mask.nxv1f64.nxv2f64.nxv2i1(
   <vscale x 2 x double>,
   <vscale x 1 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x double> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv2f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
@@ -594,7 +596,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredusum.nxv1f64.nxv4f64(
   <vscale x 1 x double>,
   <vscale x 4 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv4f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredusum.mask.nxv1f64.nxv4f64.nxv4i1(
   <vscale x 4 x double>,
   <vscale x 1 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x double> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv4f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
@@ -640,7 +642,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredusum.nxv1f64.nxv8f64(
   <vscale x 1 x double>,
   <vscale x 8 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_vs_nxv1f64_nxv8f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 8 x double> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfredusum.mask.nxv1f64.nxv8f64.nxv8i1(
   <vscale x 8 x double>,
   <vscale x 1 x double>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x double> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfredusum_mask_vs_nxv1f64_nxv8f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
@@ -686,7 +688,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll
deleted file mode 100644
index cf9d7a4c0af5a..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv64.ll
+++ /dev/null
@@ -1,617 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfrsqrt7.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrsqrt7.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfrsqrt7.mask.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrsqrt7.mask.nxv1f16(
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrsqrt7.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrsqrt7.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrsqrt7.mask.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrsqrt7.mask.nxv2f16(
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrsqrt7.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrsqrt7.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrsqrt7.mask.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrsqrt7.mask.nxv4f16(
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrsqrt7.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrsqrt7.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrsqrt7.mask.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrsqrt7.mask.nxv8f16(
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrsqrt7.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrsqrt7.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrsqrt7.mask.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrsqrt7.mask.nxv16f16(
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrsqrt7.nxv32f16(
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrsqrt7.nxv32f16(
-    <vscale x 32 x half> %0,
-    i64 %1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrsqrt7.mask.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrsqrt7.mask.nxv32f16(
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrsqrt7.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrsqrt7.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrsqrt7.mask.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrsqrt7.mask.nxv1f32(
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrsqrt7.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrsqrt7.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrsqrt7.mask.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrsqrt7.mask.nxv2f32(
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrsqrt7.mask.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.mask.nxv4f32(
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrsqrt7.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrsqrt7.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrsqrt7.mask.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrsqrt7.mask.nxv8f32(
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrsqrt7.nxv16f32(
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrsqrt7.nxv16f32(
-    <vscale x 16 x float> %0,
-    i64 %1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrsqrt7.mask.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrsqrt7.mask.nxv16f32(
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrsqrt7.nxv1f64(
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrsqrt7.nxv1f64(
-    <vscale x 1 x double> %0,
-    i64 %1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrsqrt7.mask.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrsqrt7.mask.nxv1f64(
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrsqrt7.nxv2f64(
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrsqrt7.nxv2f64(
-    <vscale x 2 x double> %0,
-    i64 %1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrsqrt7.mask.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrsqrt7.mask.nxv2f64(
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrsqrt7.nxv4f64(
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrsqrt7.nxv4f64(
-    <vscale x 4 x double> %0,
-    i64 %1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrsqrt7.mask.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrsqrt7.mask.nxv4f64(
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrsqrt7.nxv8f64(
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrsqrt7.nxv8f64(
-    <vscale x 8 x double> %0,
-    i64 %1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrsqrt7.mask.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfrsqrt7.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrsqrt7.mask.nxv8f64(
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %0,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfrsqrt7.ll
index d0198a85b0c52..a521b6c2f2b57 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsqrt7.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrsqrt7.nxv1f16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+define <vscale x 1 x half> @intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -14,7 +16,7 @@ define <vscale x 1 x half> @intrinsic_vfrsqrt7_v_nxv1f16_nxv1f16(<vscale x 1 x h
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfrsqrt7.nxv1f16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x half> %a
 }
@@ -23,10 +25,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfrsqrt7.mask.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x half> @intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -37,16 +39,16 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vfrsqrt7.nxv2f16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+define <vscale x 2 x half> @intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -55,7 +57,7 @@ define <vscale x 2 x half> @intrinsic_vfrsqrt7_v_nxv2f16_nxv2f16(<vscale x 2 x h
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfrsqrt7.nxv2f16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x half> %a
 }
@@ -64,10 +66,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfrsqrt7.mask.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x half> @intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -78,16 +80,16 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vfrsqrt7.nxv4f16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+define <vscale x 4 x half> @intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -96,7 +98,7 @@ define <vscale x 4 x half> @intrinsic_vfrsqrt7_v_nxv4f16_nxv4f16(<vscale x 4 x h
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfrsqrt7.nxv4f16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x half> %a
 }
@@ -105,10 +107,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfrsqrt7.mask.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -119,16 +121,16 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vfrsqrt7.nxv8f16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+define <vscale x 8 x half> @intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -137,7 +139,7 @@ define <vscale x 8 x half> @intrinsic_vfrsqrt7_v_nxv8f16_nxv8f16(<vscale x 8 x h
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfrsqrt7.nxv8f16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x half> %a
 }
@@ -146,10 +148,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfrsqrt7.mask.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x half> @intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -160,16 +162,16 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vfrsqrt7.nxv16f16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+define <vscale x 16 x half> @intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -178,7 +180,7 @@ define <vscale x 16 x half> @intrinsic_vfrsqrt7_v_nxv16f16_nxv16f16(<vscale x 16
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfrsqrt7.nxv16f16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x half> %a
 }
@@ -187,10 +189,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfrsqrt7.mask.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x half> @intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -201,16 +203,16 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vfrsqrt7.nxv32f16(
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
+define <vscale x 32 x half> @intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -219,7 +221,7 @@ define <vscale x 32 x half> @intrinsic_vfrsqrt7_v_nxv32f16_nxv32f16(<vscale x 32
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfrsqrt7.nxv32f16(
     <vscale x 32 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x half> %a
 }
@@ -228,10 +230,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfrsqrt7.mask.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, i32 %3) nounwind {
+define <vscale x 32 x half> @intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16(<vscale x 32 x i1> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -242,16 +244,16 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 32 x half> %2,
     <vscale x 32 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vfrsqrt7.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -260,7 +262,7 @@ define <vscale x 1 x float> @intrinsic_vfrsqrt7_v_nxv1f32_nxv1f32(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfrsqrt7.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x float> %a
 }
@@ -269,10 +271,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfrsqrt7.mask.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -283,16 +285,16 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vfrsqrt7.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -301,7 +303,7 @@ define <vscale x 2 x float> @intrinsic_vfrsqrt7_v_nxv2f32_nxv2f32(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfrsqrt7.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x float> %a
 }
@@ -310,10 +312,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfrsqrt7.mask.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -324,16 +326,16 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -342,7 +344,7 @@ define <vscale x 4 x float> @intrinsic_vfrsqrt7_v_nxv4f32_nxv4f32(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfrsqrt7.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x float> %a
 }
@@ -351,10 +353,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfrsqrt7.mask.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -365,16 +367,16 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vfrsqrt7.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -383,7 +385,7 @@ define <vscale x 8 x float> @intrinsic_vfrsqrt7_v_nxv8f32_nxv8f32(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfrsqrt7.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x float> %a
 }
@@ -392,10 +394,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfrsqrt7.mask.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -406,16 +408,16 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vfrsqrt7.nxv16f32(
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
+define <vscale x 16 x float> @intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -424,7 +426,7 @@ define <vscale x 16 x float> @intrinsic_vfrsqrt7_v_nxv16f32_nxv16f32(<vscale x 1
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfrsqrt7.nxv16f32(
     <vscale x 16 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x float> %a
 }
@@ -433,10 +435,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfrsqrt7.mask.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, i32 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32(<vscale x 16 x i1> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -447,16 +449,16 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 16 x float> %2,
     <vscale x 16 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vfrsqrt7.nxv1f64(
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
+define <vscale x 1 x double> @intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -465,7 +467,7 @@ define <vscale x 1 x double> @intrinsic_vfrsqrt7_v_nxv1f64_nxv1f64(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfrsqrt7.nxv1f64(
     <vscale x 1 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x double> %a
 }
@@ -474,10 +476,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfrsqrt7.mask.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -488,16 +490,16 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vfrsqrt7.nxv2f64(
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
+define <vscale x 2 x double> @intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -506,7 +508,7 @@ define <vscale x 2 x double> @intrinsic_vfrsqrt7_v_nxv2f64_nxv2f64(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfrsqrt7.nxv2f64(
     <vscale x 2 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x double> %a
 }
@@ -515,10 +517,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfrsqrt7.mask.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -529,16 +531,16 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vfrsqrt7.nxv4f64(
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
+define <vscale x 4 x double> @intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -547,7 +549,7 @@ define <vscale x 4 x double> @intrinsic_vfrsqrt7_v_nxv4f64_nxv4f64(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfrsqrt7.nxv4f64(
     <vscale x 4 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x double> %a
 }
@@ -556,10 +558,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfrsqrt7.mask.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -570,16 +572,16 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vfrsqrt7.nxv8f64(
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
+define <vscale x 8 x double> @intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -588,7 +590,7 @@ define <vscale x 8 x double> @intrinsic_vfrsqrt7_v_nxv8f64_nxv8f64(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfrsqrt7.nxv8f64(
     <vscale x 8 x double> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x double> %a
 }
@@ -597,10 +599,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfrsqrt7.mask.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, i32 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64(<vscale x 8 x i1> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsqrt7_mask_v_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -611,7 +613,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 8 x double> %2,
     <vscale x 8 x i1> %0,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll
deleted file mode 100644
index 0477554c91419..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv64.ll
+++ /dev/null
@@ -1,678 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \
-; RUN:   -mattr=+d -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfrsub.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfrsub.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrsub.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrsub.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfrsub.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfrsub.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrsub.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrsub.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfrsub.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfrsub.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrsub.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrsub.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfrsub.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfrsub.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrsub.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrsub.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfrsub.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfrsub.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrsub.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrsub.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfrsub.mask.nxv32f16.f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfrsub.mask.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    half %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrsub.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrsub.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfrsub.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfrsub.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrsub.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrsub.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfrsub.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfrsub.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrsub.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrsub.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfrsub.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfrsub.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrsub.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrsub.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfrsub.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfrsub.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrsub.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrsub.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfrsub.mask.nxv16f32.f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfrsub.mask.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    float %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrsub.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrsub.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfrsub.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfrsub.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    double %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrsub.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrsub.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfrsub.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfrsub.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    double %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrsub.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrsub.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfrsub.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfrsub.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    double %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrsub.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrsub.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfrsub.mask.nxv8f64.f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfrsub.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfrsub.mask.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    double %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfrsub.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfrsub.ll
index eab5b2a414e18..3fb281562088c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfrsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfrsub.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfrsub.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfrsub.mask.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfrsub.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfrsub.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfrsub.mask.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfrsub.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfrsub.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfrsub.mask.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfrsub.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfrsub.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfrsub.mask.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfrsub.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfrsub.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfrsub.mask.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -221,7 +223,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -229,9 +231,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfrsub.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -241,7 +243,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfrsub.nxv32f16.f16(
     <vscale x 32 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -251,10 +253,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfrsub.mask.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -266,7 +268,7 @@ entry:
     <vscale x 32 x half> %1,
     half %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -274,9 +276,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfrsub.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -286,7 +288,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfrsub.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -296,10 +298,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfrsub.mask.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -311,7 +313,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -319,9 +321,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfrsub.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -331,7 +333,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfrsub.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -341,10 +343,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfrsub.mask.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -356,7 +358,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -364,9 +366,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfrsub.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -376,7 +378,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfrsub.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -386,10 +388,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfrsub.mask.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -401,7 +403,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -409,9 +411,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfrsub.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -421,7 +423,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfrsub.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -431,10 +433,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfrsub.mask.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -446,7 +448,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -454,9 +456,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfrsub.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -466,7 +468,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfrsub.nxv16f32.f32(
     <vscale x 16 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -476,10 +478,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfrsub.mask.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -491,7 +493,7 @@ entry:
     <vscale x 16 x float> %1,
     float %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -499,9 +501,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfrsub.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -511,7 +513,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfrsub.nxv1f64.f64(
     <vscale x 1 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -521,10 +523,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfrsub.mask.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -536,7 +538,7 @@ entry:
     <vscale x 1 x double> %1,
     double %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -544,9 +546,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfrsub.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -556,7 +558,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfrsub.nxv2f64.f64(
     <vscale x 2 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -566,10 +568,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfrsub.mask.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -581,7 +583,7 @@ entry:
     <vscale x 2 x double> %1,
     double %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -589,9 +591,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfrsub.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -601,7 +603,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfrsub.nxv4f64.f64(
     <vscale x 4 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -611,10 +613,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfrsub.mask.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -626,7 +628,7 @@ entry:
     <vscale x 4 x double> %1,
     double %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -634,9 +636,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfrsub.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -646,7 +648,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfrsub.nxv8f64.f64(
     <vscale x 8 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -656,10 +658,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfrsub.mask.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfrsub_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -671,7 +673,7 @@ entry:
     <vscale x 8 x double> %1,
     double %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll
deleted file mode 100644
index d71fb8fb25352..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv64.ll
+++ /dev/null
@@ -1,1355 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsgnj_vv_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfsgnj.mask.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsgnj_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnj.mask.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsgnj.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsgnj_vv_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsgnj.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsgnj.mask.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsgnj_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsgnj.mask.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsgnj.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsgnj_vv_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsgnj.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsgnj.mask.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsgnj_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsgnj.mask.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsgnj.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsgnj_vv_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsgnj.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsgnj.mask.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsgnj_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsgnj.mask.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsgnj.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsgnj_vv_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsgnj.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsgnj.mask.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsgnj_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsgnj.mask.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsgnj.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsgnj_vv_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsgnj.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsgnj.mask.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsgnj_mask_vv_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re16.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsgnj.mask.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsgnj.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsgnj_vv_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsgnj.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsgnj.mask.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsgnj_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsgnj.mask.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsgnj.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsgnj_vv_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsgnj.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsgnj.mask.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsgnj_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsgnj.mask.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsgnj_vv_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsgnj.mask.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsgnj_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsgnj.mask.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsgnj.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsgnj_vv_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsgnj.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsgnj.mask.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsgnj_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsgnj.mask.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsgnj.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsgnj_vv_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsgnj.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsgnj.mask.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsgnj_mask_vv_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re32.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsgnj.mask.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsgnj.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsgnj_vv_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsgnj.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsgnj.mask.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsgnj_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsgnj.mask.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsgnj.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsgnj_vv_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsgnj.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsgnj.mask.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsgnj_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsgnj.mask.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsgnj.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsgnj_vv_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsgnj.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsgnj.mask.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsgnj_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsgnj.mask.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsgnj.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsgnj_vv_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsgnj.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsgnj.mask.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsgnj_mask_vv_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re64.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsgnj.mask.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsgnj_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfsgnj.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsgnj_mask_vf_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnj.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsgnj.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsgnj_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsgnj.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsgnj.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsgnj_mask_vf_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsgnj.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsgnj.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsgnj_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsgnj.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsgnj.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsgnj_mask_vf_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsgnj.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsgnj.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsgnj_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsgnj.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsgnj.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsgnj_mask_vf_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsgnj.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsgnj.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsgnj_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsgnj.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsgnj.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsgnj_mask_vf_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsgnj.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsgnj.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsgnj_vf_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsgnj.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsgnj.mask.nxv32f16.f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsgnj_mask_vf_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsgnj.mask.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    half %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsgnj.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsgnj_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsgnj.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsgnj.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsgnj_mask_vf_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsgnj.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsgnj.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsgnj_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsgnj.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsgnj.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsgnj_mask_vf_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsgnj.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsgnj_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsgnj.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsgnj_mask_vf_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsgnj.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsgnj.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsgnj_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsgnj.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsgnj.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsgnj_mask_vf_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsgnj.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsgnj.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsgnj_vf_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsgnj.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsgnj.mask.nxv16f32.f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsgnj_mask_vf_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsgnj.mask.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    float %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsgnj.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsgnj_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsgnj.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsgnj.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsgnj_mask_vf_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsgnj.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    double %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsgnj.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsgnj_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsgnj.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsgnj.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsgnj_mask_vf_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsgnj.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    double %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsgnj.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsgnj_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsgnj.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsgnj.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsgnj_mask_vf_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsgnj.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    double %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsgnj.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsgnj_vf_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsgnj.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsgnj.mask.nxv8f64.f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsgnj_mask_vf_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnj.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsgnj.mask.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    double %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfsgnj.ll
index c0e999a2433d0..65a5592775cb5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnj-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsgnj_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsgnj_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnj.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsgnj_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsgnj_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfsgnj.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsgnj_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsgnj_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnj.nxv2f16.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnj.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsgnj_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsgnj_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfsgnj.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsgnj_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsgnj_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnj.nxv4f16.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnj.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsgnj_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsgnj_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfsgnj.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsgnj_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsgnj_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnj.nxv8f16.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnj.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsgnj_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsgnj_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfsgnj.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsgnj_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsgnj_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnj.nxv16f16.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnj.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsgnj_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsgnj_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -221,7 +223,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -229,9 +231,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfsgnj.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsgnj_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsgnj_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -241,7 +243,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnj.nxv32f16.nxv32f16(
     <vscale x 32 x half> %0,
     <vscale x 32 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -251,10 +253,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnj.mask.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsgnj_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsgnj_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re16.v v24, (a0)
@@ -267,7 +269,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 32 x half> %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -275,9 +277,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfsgnj.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsgnj_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsgnj_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -287,7 +289,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnj.nxv1f32.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -297,10 +299,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnj.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsgnj_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsgnj_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -312,7 +314,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -320,9 +322,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfsgnj.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsgnj_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsgnj_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -332,7 +334,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnj.nxv2f32.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -342,10 +344,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnj.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsgnj_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsgnj_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -357,7 +359,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -365,9 +367,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsgnj_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsgnj_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -377,7 +379,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -387,10 +389,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnj.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsgnj_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsgnj_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -402,7 +404,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -410,9 +412,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfsgnj.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsgnj_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsgnj_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -422,7 +424,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnj.nxv8f32.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -432,10 +434,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnj.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsgnj_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsgnj_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -447,7 +449,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -455,9 +457,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfsgnj.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsgnj_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsgnj_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -467,7 +469,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnj.nxv16f32.nxv16f32(
     <vscale x 16 x float> %0,
     <vscale x 16 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -477,10 +479,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnj.mask.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsgnj_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsgnj_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re32.v v24, (a0)
@@ -493,7 +495,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 16 x float> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -501,9 +503,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfsgnj.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsgnj_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsgnj_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -513,7 +515,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnj.nxv1f64.nxv1f64(
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -523,10 +525,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnj.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsgnj_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsgnj_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -538,7 +540,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -546,9 +548,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfsgnj.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsgnj_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsgnj_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -558,7 +560,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnj.nxv2f64.nxv2f64(
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -568,10 +570,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnj.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsgnj_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsgnj_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -583,7 +585,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -591,9 +593,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfsgnj.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsgnj_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsgnj_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -603,7 +605,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnj.nxv4f64.nxv4f64(
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -613,10 +615,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnj.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsgnj_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsgnj_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -628,7 +630,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -636,9 +638,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfsgnj.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsgnj_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsgnj_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -648,7 +650,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnj.nxv8f64.nxv8f64(
     <vscale x 8 x double> %0,
     <vscale x 8 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -658,10 +660,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnj.mask.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsgnj_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsgnj_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re64.v v24, (a0)
@@ -674,7 +676,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 8 x double> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -682,9 +684,9 @@ entry:
 declare <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsgnj_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsgnj_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -694,7 +696,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnj.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -704,10 +706,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnj.mask.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsgnj_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsgnj_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -719,7 +721,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -727,9 +729,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfsgnj.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsgnj_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsgnj_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -739,7 +741,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnj.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -749,10 +751,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnj.mask.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsgnj_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsgnj_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -764,7 +766,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -772,9 +774,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfsgnj.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsgnj_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsgnj_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -784,7 +786,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnj.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -794,10 +796,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnj.mask.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsgnj_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsgnj_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -809,7 +811,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -817,9 +819,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfsgnj.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsgnj_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsgnj_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -829,7 +831,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnj.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -839,10 +841,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnj.mask.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsgnj_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsgnj_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -854,7 +856,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -862,9 +864,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfsgnj.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsgnj_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsgnj_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -874,7 +876,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnj.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -884,10 +886,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnj.mask.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsgnj_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsgnj_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -899,7 +901,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -907,9 +909,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfsgnj.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsgnj_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsgnj_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -919,7 +921,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnj.nxv32f16.f16(
     <vscale x 32 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -929,10 +931,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnj.mask.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsgnj_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsgnj_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -944,7 +946,7 @@ entry:
     <vscale x 32 x half> %1,
     half %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -952,9 +954,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfsgnj.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsgnj_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsgnj_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -964,7 +966,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnj.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -974,10 +976,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnj.mask.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsgnj_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsgnj_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -989,7 +991,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -997,9 +999,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfsgnj.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsgnj_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsgnj_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1009,7 +1011,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnj.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -1019,10 +1021,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnj.mask.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsgnj_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsgnj_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1034,7 +1036,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -1042,9 +1044,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsgnj_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsgnj_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1054,7 +1056,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnj.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -1064,10 +1066,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnj.mask.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsgnj_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsgnj_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1079,7 +1081,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -1087,9 +1089,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfsgnj.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsgnj_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsgnj_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1099,7 +1101,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnj.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -1109,10 +1111,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnj.mask.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsgnj_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsgnj_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1124,7 +1126,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -1132,9 +1134,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfsgnj.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsgnj_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsgnj_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1144,7 +1146,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnj.nxv16f32.f32(
     <vscale x 16 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -1154,10 +1156,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnj.mask.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsgnj_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsgnj_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1169,7 +1171,7 @@ entry:
     <vscale x 16 x float> %1,
     float %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -1177,9 +1179,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfsgnj.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsgnj_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsgnj_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1189,7 +1191,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnj.nxv1f64.f64(
     <vscale x 1 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -1199,10 +1201,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnj.mask.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsgnj_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsgnj_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1214,7 +1216,7 @@ entry:
     <vscale x 1 x double> %1,
     double %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -1222,9 +1224,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfsgnj.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsgnj_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsgnj_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1234,7 +1236,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnj.nxv2f64.f64(
     <vscale x 2 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -1244,10 +1246,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnj.mask.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsgnj_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsgnj_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1259,7 +1261,7 @@ entry:
     <vscale x 2 x double> %1,
     double %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -1267,9 +1269,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfsgnj.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsgnj_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsgnj_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1279,7 +1281,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnj.nxv4f64.f64(
     <vscale x 4 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -1289,10 +1291,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnj.mask.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsgnj_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsgnj_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1304,7 +1306,7 @@ entry:
     <vscale x 4 x double> %1,
     double %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -1312,9 +1314,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfsgnj.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsgnj_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsgnj_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1324,7 +1326,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnj.nxv8f64.f64(
     <vscale x 8 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -1334,10 +1336,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnj.mask.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsgnj_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsgnj_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnj_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1349,7 +1351,7 @@ entry:
     <vscale x 8 x double> %1,
     double %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll
deleted file mode 100644
index f751fd7406389..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv64.ll
+++ /dev/null
@@ -1,1355 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.mask.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsgnjn_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjn.mask.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsgnjn.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsgnjn_vv_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjn.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsgnjn.mask.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsgnjn_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjn.mask.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsgnjn.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsgnjn_vv_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjn.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsgnjn.mask.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsgnjn_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjn.mask.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsgnjn.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsgnjn_vv_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjn.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsgnjn.mask.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsgnjn_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjn.mask.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsgnjn.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsgnjn_vv_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjn.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsgnjn.mask.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsgnjn_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjn.mask.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsgnjn.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsgnjn_vv_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjn.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsgnjn.mask.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsgnjn_mask_vv_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re16.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjn.mask.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsgnjn.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsgnjn_vv_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjn.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsgnjn.mask.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsgnjn_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjn.mask.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsgnjn.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsgnjn_vv_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjn.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsgnjn.mask.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsgnjn_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjn.mask.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsgnjn_vv_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsgnjn.mask.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsgnjn_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjn.mask.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsgnjn.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsgnjn_vv_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjn.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsgnjn.mask.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsgnjn_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjn.mask.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsgnjn.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsgnjn_vv_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjn.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsgnjn.mask.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsgnjn_mask_vv_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re32.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjn.mask.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsgnjn.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsgnjn_vv_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjn.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsgnjn.mask.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsgnjn_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjn.mask.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsgnjn.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsgnjn_vv_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjn.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsgnjn.mask.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsgnjn_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjn.mask.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsgnjn.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsgnjn_vv_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjn.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsgnjn.mask.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsgnjn_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjn.mask.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsgnjn.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsgnjn_vv_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjn.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsgnjn.mask.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsgnjn_mask_vv_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re64.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjn.mask.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsgnjn_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsgnjn_mask_vf_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjn.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsgnjn.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsgnjn_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjn.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsgnjn.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsgnjn_mask_vf_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjn.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsgnjn.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsgnjn_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjn.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsgnjn.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsgnjn_mask_vf_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjn.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsgnjn.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsgnjn_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjn.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsgnjn.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsgnjn_mask_vf_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjn.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsgnjn.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsgnjn_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjn.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsgnjn.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsgnjn_mask_vf_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjn.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsgnjn.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsgnjn_vf_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjn.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsgnjn.mask.nxv32f16.f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsgnjn_mask_vf_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjn.mask.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    half %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsgnjn.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsgnjn_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjn.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsgnjn.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsgnjn_mask_vf_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjn.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsgnjn.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsgnjn_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjn.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsgnjn.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsgnjn_mask_vf_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjn.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsgnjn_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsgnjn.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsgnjn_mask_vf_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjn.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsgnjn.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsgnjn_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjn.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsgnjn.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsgnjn_mask_vf_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjn.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsgnjn.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsgnjn_vf_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjn.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsgnjn.mask.nxv16f32.f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsgnjn_mask_vf_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjn.mask.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    float %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsgnjn.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsgnjn_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjn.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsgnjn.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsgnjn_mask_vf_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjn.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    double %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsgnjn.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsgnjn_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjn.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsgnjn.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsgnjn_mask_vf_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjn.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    double %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsgnjn.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsgnjn_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjn.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsgnjn.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsgnjn_mask_vf_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjn.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    double %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsgnjn.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsgnjn_vf_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjn.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsgnjn.mask.nxv8f64.f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsgnjn_mask_vf_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjn.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjn.mask.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    double %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfsgnjn.ll
index 0287f9ea2cbf3..f16c8a6db1fb9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsgnjn_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsgnjn_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfsgnjn.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsgnjn_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsgnjn_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjn.nxv2f16.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnjn.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsgnjn_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsgnjn_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfsgnjn.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsgnjn_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsgnjn_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjn.nxv4f16.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnjn.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsgnjn_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsgnjn_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfsgnjn.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsgnjn_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsgnjn_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjn.nxv8f16.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnjn.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsgnjn_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsgnjn_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfsgnjn.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsgnjn_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsgnjn_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjn.nxv16f16.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnjn.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsgnjn_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsgnjn_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -221,7 +223,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -229,9 +231,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfsgnjn.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsgnjn_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsgnjn_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -241,7 +243,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjn.nxv32f16.nxv32f16(
     <vscale x 32 x half> %0,
     <vscale x 32 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -251,10 +253,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnjn.mask.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsgnjn_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsgnjn_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re16.v v24, (a0)
@@ -267,7 +269,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 32 x half> %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -275,9 +277,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfsgnjn.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsgnjn_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsgnjn_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -287,7 +289,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjn.nxv1f32.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -297,10 +299,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnjn.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsgnjn_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsgnjn_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -312,7 +314,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -320,9 +322,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfsgnjn.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsgnjn_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsgnjn_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -332,7 +334,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjn.nxv2f32.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -342,10 +344,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnjn.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsgnjn_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsgnjn_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -357,7 +359,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -365,9 +367,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsgnjn_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsgnjn_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -377,7 +379,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -387,10 +389,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnjn.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsgnjn_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsgnjn_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -402,7 +404,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -410,9 +412,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfsgnjn.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsgnjn_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsgnjn_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -422,7 +424,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjn.nxv8f32.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -432,10 +434,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnjn.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsgnjn_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsgnjn_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -447,7 +449,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -455,9 +457,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfsgnjn.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsgnjn_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsgnjn_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -467,7 +469,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjn.nxv16f32.nxv16f32(
     <vscale x 16 x float> %0,
     <vscale x 16 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -477,10 +479,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnjn.mask.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsgnjn_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsgnjn_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re32.v v24, (a0)
@@ -493,7 +495,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 16 x float> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -501,9 +503,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfsgnjn.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsgnjn_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsgnjn_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -513,7 +515,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjn.nxv1f64.nxv1f64(
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -523,10 +525,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnjn.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsgnjn_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsgnjn_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -538,7 +540,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -546,9 +548,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfsgnjn.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsgnjn_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsgnjn_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -558,7 +560,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjn.nxv2f64.nxv2f64(
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -568,10 +570,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnjn.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsgnjn_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsgnjn_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -583,7 +585,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -591,9 +593,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfsgnjn.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsgnjn_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsgnjn_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -603,7 +605,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjn.nxv4f64.nxv4f64(
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -613,10 +615,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnjn.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsgnjn_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsgnjn_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -628,7 +630,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -636,9 +638,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfsgnjn.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsgnjn_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsgnjn_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -648,7 +650,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjn.nxv8f64.nxv8f64(
     <vscale x 8 x double> %0,
     <vscale x 8 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -658,10 +660,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnjn.mask.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsgnjn_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsgnjn_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re64.v v24, (a0)
@@ -674,7 +676,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 8 x double> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -682,9 +684,9 @@ entry:
 declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsgnjn_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsgnjn_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -694,7 +696,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjn.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -704,10 +706,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnjn.mask.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsgnjn_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsgnjn_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -719,7 +721,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -727,9 +729,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfsgnjn.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsgnjn_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsgnjn_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -739,7 +741,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjn.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -749,10 +751,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnjn.mask.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsgnjn_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsgnjn_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -764,7 +766,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -772,9 +774,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfsgnjn.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsgnjn_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsgnjn_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -784,7 +786,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjn.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -794,10 +796,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnjn.mask.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsgnjn_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsgnjn_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -809,7 +811,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -817,9 +819,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfsgnjn.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsgnjn_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsgnjn_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -829,7 +831,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjn.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -839,10 +841,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnjn.mask.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsgnjn_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsgnjn_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -854,7 +856,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -862,9 +864,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfsgnjn.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsgnjn_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsgnjn_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -874,7 +876,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjn.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -884,10 +886,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnjn.mask.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsgnjn_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsgnjn_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -899,7 +901,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -907,9 +909,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfsgnjn.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsgnjn_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsgnjn_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -919,7 +921,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjn.nxv32f16.f16(
     <vscale x 32 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -929,10 +931,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnjn.mask.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsgnjn_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsgnjn_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -944,7 +946,7 @@ entry:
     <vscale x 32 x half> %1,
     half %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -952,9 +954,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfsgnjn.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsgnjn_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsgnjn_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -964,7 +966,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjn.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -974,10 +976,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnjn.mask.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsgnjn_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsgnjn_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -989,7 +991,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -997,9 +999,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfsgnjn.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsgnjn_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsgnjn_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1009,7 +1011,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjn.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -1019,10 +1021,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnjn.mask.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsgnjn_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsgnjn_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1034,7 +1036,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -1042,9 +1044,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsgnjn_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsgnjn_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1054,7 +1056,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjn.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -1064,10 +1066,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnjn.mask.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsgnjn_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsgnjn_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1079,7 +1081,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -1087,9 +1089,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfsgnjn.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsgnjn_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsgnjn_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1099,7 +1101,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjn.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -1109,10 +1111,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnjn.mask.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsgnjn_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsgnjn_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1124,7 +1126,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -1132,9 +1134,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfsgnjn.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsgnjn_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsgnjn_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1144,7 +1146,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjn.nxv16f32.f32(
     <vscale x 16 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -1154,10 +1156,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnjn.mask.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsgnjn_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsgnjn_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1169,7 +1171,7 @@ entry:
     <vscale x 16 x float> %1,
     float %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -1177,9 +1179,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfsgnjn.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsgnjn_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsgnjn_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1189,7 +1191,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjn.nxv1f64.f64(
     <vscale x 1 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -1199,10 +1201,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnjn.mask.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsgnjn_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsgnjn_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1214,7 +1216,7 @@ entry:
     <vscale x 1 x double> %1,
     double %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -1222,9 +1224,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfsgnjn.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsgnjn_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsgnjn_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1234,7 +1236,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjn.nxv2f64.f64(
     <vscale x 2 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -1244,10 +1246,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnjn.mask.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsgnjn_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsgnjn_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1259,7 +1261,7 @@ entry:
     <vscale x 2 x double> %1,
     double %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -1267,9 +1269,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfsgnjn.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsgnjn_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsgnjn_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1279,7 +1281,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjn.nxv4f64.f64(
     <vscale x 4 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -1289,10 +1291,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnjn.mask.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsgnjn_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsgnjn_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1304,7 +1306,7 @@ entry:
     <vscale x 4 x double> %1,
     double %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -1312,9 +1314,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfsgnjn.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsgnjn_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsgnjn_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1324,7 +1326,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjn.nxv8f64.f64(
     <vscale x 8 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -1334,10 +1336,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnjn.mask.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsgnjn_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsgnjn_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1349,7 +1351,7 @@ entry:
     <vscale x 8 x double> %1,
     double %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll
deleted file mode 100644
index 4ae69f0d4f613..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv64.ll
+++ /dev/null
@@ -1,1355 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.mask.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsgnjx_mask_vv_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjx.mask.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsgnjx.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsgnjx_vv_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjx.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsgnjx.mask.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsgnjx_mask_vv_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjx.mask.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsgnjx.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsgnjx_vv_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjx.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsgnjx.mask.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsgnjx_mask_vv_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjx.mask.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsgnjx.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsgnjx_vv_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjx.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsgnjx.mask.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsgnjx_mask_vv_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjx.mask.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsgnjx.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsgnjx_vv_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjx.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsgnjx.mask.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsgnjx_mask_vv_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjx.mask.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsgnjx.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsgnjx_vv_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjx.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsgnjx.mask.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsgnjx_mask_vv_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re16.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjx.mask.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsgnjx.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsgnjx_vv_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjx.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsgnjx.mask.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsgnjx_mask_vv_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjx.mask.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsgnjx.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsgnjx_vv_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjx.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsgnjx.mask.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsgnjx_mask_vv_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjx.mask.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsgnjx_vv_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsgnjx.mask.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsgnjx_mask_vv_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjx.mask.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsgnjx.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsgnjx_vv_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjx.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsgnjx.mask.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsgnjx_mask_vv_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjx.mask.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsgnjx.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsgnjx_vv_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjx.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsgnjx.mask.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsgnjx_mask_vv_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re32.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjx.mask.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsgnjx.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsgnjx_vv_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjx.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsgnjx.mask.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsgnjx_mask_vv_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjx.mask.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsgnjx.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsgnjx_vv_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjx.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsgnjx.mask.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsgnjx_mask_vv_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjx.mask.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsgnjx.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsgnjx_vv_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjx.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsgnjx.mask.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsgnjx_mask_vv_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjx.mask.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsgnjx.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsgnjx_vv_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjx.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsgnjx.mask.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsgnjx_mask_vv_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re64.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjx.mask.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsgnjx_vf_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsgnjx_mask_vf_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjx.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsgnjx.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsgnjx_vf_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjx.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsgnjx.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsgnjx_mask_vf_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjx.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsgnjx.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsgnjx_vf_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjx.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsgnjx.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsgnjx_mask_vf_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjx.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsgnjx.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsgnjx_vf_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjx.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsgnjx.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsgnjx_mask_vf_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjx.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsgnjx.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsgnjx_vf_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjx.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsgnjx.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsgnjx_mask_vf_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjx.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsgnjx.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsgnjx_vf_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjx.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsgnjx.mask.nxv32f16.f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsgnjx_mask_vf_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjx.mask.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    half %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsgnjx.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsgnjx_vf_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjx.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsgnjx.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsgnjx_mask_vf_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjx.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsgnjx.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsgnjx_vf_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjx.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsgnjx.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsgnjx_mask_vf_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjx.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsgnjx_vf_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsgnjx.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsgnjx_mask_vf_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjx.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsgnjx.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsgnjx_vf_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjx.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsgnjx.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsgnjx_mask_vf_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjx.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsgnjx.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsgnjx_vf_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjx.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsgnjx.mask.nxv16f32.f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsgnjx_mask_vf_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjx.mask.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    float %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsgnjx.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsgnjx_vf_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjx.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsgnjx.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsgnjx_mask_vf_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjx.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    double %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsgnjx.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsgnjx_vf_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjx.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsgnjx.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsgnjx_mask_vf_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjx.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    double %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsgnjx.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsgnjx_vf_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjx.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsgnjx.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsgnjx_mask_vf_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjx.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    double %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsgnjx.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsgnjx_vf_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjx.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsgnjx.mask.nxv8f64.f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsgnjx_mask_vf_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsgnjx.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjx.mask.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    double %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfsgnjx.ll
index fba4eceb23fb6..edfd578ce8aa0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsgnjx_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsgnjx_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfsgnjx.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsgnjx_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsgnjx_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjx.nxv2f16.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnjx.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsgnjx_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsgnjx_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfsgnjx.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsgnjx_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsgnjx_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjx.nxv4f16.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnjx.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsgnjx_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsgnjx_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfsgnjx.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsgnjx_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsgnjx_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjx.nxv8f16.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnjx.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsgnjx_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsgnjx_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfsgnjx.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsgnjx_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsgnjx_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjx.nxv16f16.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnjx.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsgnjx_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsgnjx_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -221,7 +223,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -229,9 +231,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfsgnjx.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsgnjx_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsgnjx_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -241,7 +243,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjx.nxv32f16.nxv32f16(
     <vscale x 32 x half> %0,
     <vscale x 32 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -251,10 +253,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnjx.mask.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsgnjx_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsgnjx_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re16.v v24, (a0)
@@ -267,7 +269,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 32 x half> %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -275,9 +277,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfsgnjx.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsgnjx_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsgnjx_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -287,7 +289,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjx.nxv1f32.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -297,10 +299,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnjx.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsgnjx_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsgnjx_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -312,7 +314,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -320,9 +322,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfsgnjx.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsgnjx_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsgnjx_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -332,7 +334,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjx.nxv2f32.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -342,10 +344,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnjx.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsgnjx_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsgnjx_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -357,7 +359,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -365,9 +367,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsgnjx_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsgnjx_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -377,7 +379,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -387,10 +389,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnjx.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsgnjx_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsgnjx_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -402,7 +404,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -410,9 +412,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfsgnjx.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsgnjx_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsgnjx_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -422,7 +424,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjx.nxv8f32.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -432,10 +434,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnjx.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsgnjx_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsgnjx_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -447,7 +449,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -455,9 +457,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfsgnjx.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsgnjx_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsgnjx_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -467,7 +469,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjx.nxv16f32.nxv16f32(
     <vscale x 16 x float> %0,
     <vscale x 16 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -477,10 +479,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnjx.mask.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsgnjx_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsgnjx_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re32.v v24, (a0)
@@ -493,7 +495,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 16 x float> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -501,9 +503,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfsgnjx.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsgnjx_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsgnjx_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -513,7 +515,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjx.nxv1f64.nxv1f64(
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -523,10 +525,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnjx.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsgnjx_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsgnjx_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -538,7 +540,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -546,9 +548,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfsgnjx.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsgnjx_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsgnjx_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -558,7 +560,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjx.nxv2f64.nxv2f64(
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -568,10 +570,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnjx.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsgnjx_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsgnjx_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -583,7 +585,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -591,9 +593,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfsgnjx.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsgnjx_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsgnjx_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -603,7 +605,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjx.nxv4f64.nxv4f64(
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -613,10 +615,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnjx.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsgnjx_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsgnjx_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -628,7 +630,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -636,9 +638,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfsgnjx.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsgnjx_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsgnjx_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -648,7 +650,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjx.nxv8f64.nxv8f64(
     <vscale x 8 x double> %0,
     <vscale x 8 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -658,10 +660,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnjx.mask.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsgnjx_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsgnjx_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re64.v v24, (a0)
@@ -674,7 +676,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 8 x double> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -682,9 +684,9 @@ entry:
 declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsgnjx_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsgnjx_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -694,7 +696,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsgnjx.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -704,10 +706,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfsgnjx.mask.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsgnjx_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsgnjx_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -719,7 +721,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -727,9 +729,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfsgnjx.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsgnjx_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsgnjx_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -739,7 +741,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsgnjx.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -749,10 +751,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfsgnjx.mask.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsgnjx_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsgnjx_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -764,7 +766,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -772,9 +774,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfsgnjx.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsgnjx_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsgnjx_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -784,7 +786,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsgnjx.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -794,10 +796,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfsgnjx.mask.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsgnjx_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsgnjx_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -809,7 +811,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -817,9 +819,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfsgnjx.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsgnjx_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsgnjx_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -829,7 +831,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsgnjx.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -839,10 +841,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfsgnjx.mask.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsgnjx_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsgnjx_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -854,7 +856,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -862,9 +864,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfsgnjx.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsgnjx_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsgnjx_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -874,7 +876,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsgnjx.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -884,10 +886,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfsgnjx.mask.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsgnjx_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsgnjx_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -899,7 +901,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -907,9 +909,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfsgnjx.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsgnjx_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsgnjx_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -919,7 +921,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsgnjx.nxv32f16.f16(
     <vscale x 32 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -929,10 +931,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfsgnjx.mask.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsgnjx_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsgnjx_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -944,7 +946,7 @@ entry:
     <vscale x 32 x half> %1,
     half %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -952,9 +954,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfsgnjx.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsgnjx_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsgnjx_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -964,7 +966,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsgnjx.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -974,10 +976,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfsgnjx.mask.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsgnjx_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsgnjx_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -989,7 +991,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -997,9 +999,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfsgnjx.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsgnjx_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsgnjx_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1009,7 +1011,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsgnjx.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -1019,10 +1021,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfsgnjx.mask.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsgnjx_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsgnjx_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1034,7 +1036,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -1042,9 +1044,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsgnjx_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsgnjx_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1054,7 +1056,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsgnjx.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -1064,10 +1066,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfsgnjx.mask.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsgnjx_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsgnjx_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1079,7 +1081,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -1087,9 +1089,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfsgnjx.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsgnjx_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsgnjx_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1099,7 +1101,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsgnjx.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -1109,10 +1111,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfsgnjx.mask.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsgnjx_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsgnjx_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1124,7 +1126,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -1132,9 +1134,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfsgnjx.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsgnjx_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsgnjx_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1144,7 +1146,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsgnjx.nxv16f32.f32(
     <vscale x 16 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -1154,10 +1156,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfsgnjx.mask.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsgnjx_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsgnjx_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1169,7 +1171,7 @@ entry:
     <vscale x 16 x float> %1,
     float %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -1177,9 +1179,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfsgnjx.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsgnjx_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsgnjx_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1189,7 +1191,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsgnjx.nxv1f64.f64(
     <vscale x 1 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -1199,10 +1201,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfsgnjx.mask.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsgnjx_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsgnjx_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1214,7 +1216,7 @@ entry:
     <vscale x 1 x double> %1,
     double %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -1222,9 +1224,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfsgnjx.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsgnjx_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsgnjx_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1234,7 +1236,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsgnjx.nxv2f64.f64(
     <vscale x 2 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -1244,10 +1246,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfsgnjx.mask.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsgnjx_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsgnjx_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1259,7 +1261,7 @@ entry:
     <vscale x 2 x double> %1,
     double %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -1267,9 +1269,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfsgnjx.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsgnjx_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsgnjx_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1279,7 +1281,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsgnjx.nxv4f64.f64(
     <vscale x 4 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -1289,10 +1291,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfsgnjx.mask.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsgnjx_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsgnjx_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1304,7 +1306,7 @@ entry:
     <vscale x 4 x double> %1,
     double %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -1312,9 +1314,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfsgnjx.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsgnjx_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsgnjx_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1324,7 +1326,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsgnjx.nxv8f64.f64(
     <vscale x 8 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -1334,10 +1336,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfsgnjx.mask.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsgnjx_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsgnjx_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1349,7 +1351,7 @@ entry:
     <vscale x 8 x double> %1,
     double %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll
deleted file mode 100644
index a0ba31ea26686..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv64.ll
+++ /dev/null
@@ -1,677 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfslide1down.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfslide1down.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfslide1down.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfslide1down.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfslide1down.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfslide1down.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfslide1down.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfslide1down.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfslide1down.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfslide1down.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfslide1down.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfslide1down.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfslide1down.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfslide1down.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfslide1down.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfslide1down.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfslide1down.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfslide1down.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfslide1down.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfslide1down.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfslide1down.mask.nxv32f16.f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfslide1down.mask.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    half %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfslide1down.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfslide1down.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfslide1down.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfslide1down.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfslide1down.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfslide1down.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfslide1down.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfslide1down.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfslide1down.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfslide1down.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfslide1down.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfslide1down.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfslide1down.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfslide1down.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfslide1down.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfslide1down.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfslide1down.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfslide1down.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfslide1down.mask.nxv16f32.f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfslide1down.mask.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    float %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfslide1down.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfslide1down.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfslide1down.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfslide1down.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    double %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfslide1down.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfslide1down.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfslide1down.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfslide1down.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    double %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfslide1down.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfslide1down.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfslide1down.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfslide1down.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    double %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfslide1down.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfslide1down.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfslide1down.mask.nxv8f64.f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfslide1down.mask.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    double %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1down.ll
similarity index 85%
rename from llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfslide1down.ll
index 5baadc48d857e..6cbba483a8d9f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfslide1down-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1down.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfslide1down.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfslide1down.mask.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfslide1down.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfslide1down.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfslide1down.mask.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfslide1down.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfslide1down.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfslide1down.mask.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfslide1down.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfslide1down.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfslide1down.mask.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfslide1down.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfslide1down.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfslide1down.mask.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -221,7 +223,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -229,9 +231,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfslide1down.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -241,7 +243,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfslide1down.nxv32f16.f16(
     <vscale x 32 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -251,10 +253,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfslide1down.mask.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -266,7 +268,7 @@ entry:
     <vscale x 32 x half> %1,
     half %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -274,9 +276,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfslide1down.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -286,7 +288,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfslide1down.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -296,10 +298,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfslide1down.mask.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -311,7 +313,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -319,9 +321,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfslide1down.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -331,7 +333,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfslide1down.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -341,10 +343,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfslide1down.mask.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -356,7 +358,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -364,9 +366,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfslide1down.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -376,7 +378,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfslide1down.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -386,10 +388,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfslide1down.mask.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -401,7 +403,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -409,9 +411,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfslide1down.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -421,7 +423,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfslide1down.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -431,10 +433,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfslide1down.mask.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -446,7 +448,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -454,9 +456,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfslide1down.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -466,7 +468,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfslide1down.nxv16f32.f32(
     <vscale x 16 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -476,10 +478,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfslide1down.mask.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -491,7 +493,7 @@ entry:
     <vscale x 16 x float> %1,
     float %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -499,9 +501,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfslide1down.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -511,7 +513,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfslide1down.nxv1f64.f64(
     <vscale x 1 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -521,10 +523,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfslide1down.mask.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -536,7 +538,7 @@ entry:
     <vscale x 1 x double> %1,
     double %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -544,9 +546,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfslide1down.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -556,7 +558,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfslide1down.nxv2f64.f64(
     <vscale x 2 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -566,10 +568,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfslide1down.mask.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -581,7 +583,7 @@ entry:
     <vscale x 2 x double> %1,
     double %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -589,9 +591,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfslide1down.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -601,7 +603,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfslide1down.nxv4f64.f64(
     <vscale x 4 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -611,10 +613,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfslide1down.mask.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -626,7 +628,7 @@ entry:
     <vscale x 4 x double> %1,
     double %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -634,9 +636,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfslide1down.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -646,7 +648,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfslide1down.nxv8f64.f64(
     <vscale x 8 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -656,10 +658,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfslide1down.mask.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1down_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -671,7 +673,7 @@ entry:
     <vscale x 8 x double> %1,
     double %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll
deleted file mode 100644
index 4b7d1fe55e244..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv64.ll
+++ /dev/null
@@ -1,692 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfslide1up.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfslide1up.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfslide1up.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfslide1up.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfslide1up.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfslide1up.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfslide1up.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfslide1up.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfslide1up.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
-; CHECK-NEXT:    vmv.v.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfslide1up.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfslide1up.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfslide1up.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfslide1up.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa0
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfslide1up.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfslide1up.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfslide1up.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfslide1up.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v12, v8, fa0
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfslide1up.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfslide1up.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfslide1up.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfslide1up.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v16, v8, fa0
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfslide1up.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfslide1up.mask.nxv32f16.f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfslide1up.mask.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    half %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfslide1up.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfslide1up.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfslide1up.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfslide1up.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfslide1up.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
-; CHECK-NEXT:    vmv.v.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfslide1up.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfslide1up.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfslide1up.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfslide1up.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa0
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfslide1up.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfslide1up.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfslide1up.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfslide1up.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v12, v8, fa0
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfslide1up.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfslide1up.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfslide1up.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfslide1up.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v16, v8, fa0
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfslide1up.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfslide1up.mask.nxv16f32.f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfslide1up.mask.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    float %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfslide1up.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
-; CHECK-NEXT:    vmv.v.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfslide1up.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfslide1up.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfslide1up.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    double %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfslide1up.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v10, v8, fa0
-; CHECK-NEXT:    vmv.v.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfslide1up.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfslide1up.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfslide1up.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    double %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfslide1up.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v12, v8, fa0
-; CHECK-NEXT:    vmv.v.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfslide1up.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfslide1up.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfslide1up.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    double %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfslide1up.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v16, v8, fa0
-; CHECK-NEXT:    vmv.v.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfslide1up.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfslide1up.mask.nxv8f64.f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfslide1up.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfslide1up.mask.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    double %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfslide1up.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfslide1up.ll
index 271bf70522bfa..695cf7aab3f6c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfslide1up-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfslide1up.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfslide1up.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -17,7 +19,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfslide1up.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -27,10 +29,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfslide1up.mask.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -50,9 +52,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfslide1up.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -63,7 +65,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfslide1up.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -73,10 +75,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfslide1up.mask.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -96,9 +98,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfslide1up.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -109,7 +111,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfslide1up.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -119,10 +121,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfslide1up.mask.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -142,9 +144,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfslide1up.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -155,7 +157,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfslide1up.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -165,10 +167,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfslide1up.mask.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -188,9 +190,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfslide1up.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -201,7 +203,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfslide1up.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -211,10 +213,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfslide1up.mask.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -234,9 +236,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfslide1up.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -247,7 +249,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfslide1up.nxv32f16.f16(
     <vscale x 32 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -257,10 +259,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfslide1up.mask.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 32 x half> %1,
     half %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -280,9 +282,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfslide1up.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -293,7 +295,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfslide1up.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -303,10 +305,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfslide1up.mask.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -326,9 +328,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfslide1up.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -339,7 +341,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfslide1up.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -349,10 +351,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfslide1up.mask.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -372,9 +374,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfslide1up.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -385,7 +387,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfslide1up.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -395,10 +397,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfslide1up.mask.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -418,9 +420,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfslide1up.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -431,7 +433,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfslide1up.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -441,10 +443,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfslide1up.mask.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -464,9 +466,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfslide1up.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -477,7 +479,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfslide1up.nxv16f32.f32(
     <vscale x 16 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -487,10 +489,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfslide1up.mask.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 16 x float> %1,
     float %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -510,9 +512,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfslide1up.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -523,7 +525,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfslide1up.nxv1f64.f64(
     <vscale x 1 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -533,10 +535,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfslide1up.mask.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 1 x double> %1,
     double %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -556,9 +558,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfslide1up.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -569,7 +571,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfslide1up.nxv2f64.f64(
     <vscale x 2 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -579,10 +581,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfslide1up.mask.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -594,7 +596,7 @@ entry:
     <vscale x 2 x double> %1,
     double %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -602,9 +604,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfslide1up.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -615,7 +617,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfslide1up.nxv4f64.f64(
     <vscale x 4 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -625,10 +627,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfslide1up.mask.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -640,7 +642,7 @@ entry:
     <vscale x 4 x double> %1,
     double %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -648,9 +650,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfslide1up.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -661,7 +663,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfslide1up.nxv8f64.f64(
     <vscale x 8 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -671,10 +673,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfslide1up.mask.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfslide1up_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -686,7 +688,7 @@ entry:
     <vscale x 8 x double> %1,
     double %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll
deleted file mode 100644
index 3b86fd763f3c9..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll
+++ /dev/null
@@ -1,548 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
-  <vscale x 1 x half>,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfsqrt_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
-    <vscale x 1 x half> %0,
-    i32 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfsqrt.mask.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x half> @intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16(<vscale x 1 x i1> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsqrt.mask.nxv1f16(
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %0,
-    i32 %3, i32 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsqrt.nxv2f16(
-  <vscale x 2 x half>,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfsqrt_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsqrt.nxv2f16(
-    <vscale x 2 x half> %0,
-    i32 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsqrt.mask.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x half> @intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16(<vscale x 2 x i1> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsqrt.mask.nxv2f16(
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %0,
-    i32 %3, i32 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsqrt.nxv4f16(
-  <vscale x 4 x half>,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfsqrt_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsqrt.nxv4f16(
-    <vscale x 4 x half> %0,
-    i32 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsqrt.mask.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x half> @intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16(<vscale x 4 x i1> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsqrt.mask.nxv4f16(
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %0,
-    i32 %3, i32 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsqrt.nxv8f16(
-  <vscale x 8 x half>,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfsqrt_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsqrt.nxv8f16(
-    <vscale x 8 x half> %0,
-    i32 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsqrt.mask.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x half> @intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16(<vscale x 8 x i1> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsqrt.mask.nxv8f16(
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %0,
-    i32 %3, i32 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsqrt.nxv16f16(
-  <vscale x 16 x half>,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfsqrt_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsqrt.nxv16f16(
-    <vscale x 16 x half> %0,
-    i32 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsqrt.mask.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x half> @intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16(<vscale x 16 x i1> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsqrt.mask.nxv16f16(
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %0,
-    i32 %3, i32 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsqrt.nxv32f16(
-  <vscale x 32 x half>,
-  i32);
-
-define <vscale x 32 x half> @intrinsic_vfsqrt_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsqrt.nxv32f16(
-    <vscale x 32 x half> %0,
-    i32 %1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsqrt.nxv1f32(
-  <vscale x 1 x float>,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfsqrt_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsqrt.nxv1f32(
-    <vscale x 1 x float> %0,
-    i32 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsqrt.mask.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32(<vscale x 1 x i1> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsqrt.mask.nxv1f32(
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %0,
-    i32 %3, i32 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(
-  <vscale x 2 x float>,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfsqrt_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(
-    <vscale x 2 x float> %0,
-    i32 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsqrt.mask.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32(<vscale x 2 x i1> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.mask.nxv2f32(
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %0,
-    i32 %3, i32 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(
-  <vscale x 4 x float>,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfsqrt_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(
-    <vscale x 4 x float> %0,
-    i32 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsqrt.mask.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32(<vscale x 4 x i1> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsqrt.mask.nxv4f32(
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %0,
-    i32 %3, i32 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsqrt.nxv8f32(
-  <vscale x 8 x float>,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfsqrt_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsqrt.nxv8f32(
-    <vscale x 8 x float> %0,
-    i32 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsqrt.mask.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32(<vscale x 8 x i1> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsqrt.mask.nxv8f32(
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %0,
-    i32 %3, i32 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsqrt.nxv16f32(
-  <vscale x 16 x float>,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfsqrt_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsqrt.nxv16f32(
-    <vscale x 16 x float> %0,
-    i32 %1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsqrt.nxv1f64(
-  <vscale x 1 x double>,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfsqrt_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsqrt.nxv1f64(
-    <vscale x 1 x double> %0,
-    i32 %1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsqrt.mask.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64(<vscale x 1 x i1> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsqrt.mask.nxv1f64(
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %0,
-    i32 %3, i32 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsqrt.nxv2f64(
-  <vscale x 2 x double>,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfsqrt_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsqrt.nxv2f64(
-    <vscale x 2 x double> %0,
-    i32 %1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsqrt.mask.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64(<vscale x 2 x i1> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsqrt.mask.nxv2f64(
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %0,
-    i32 %3, i32 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsqrt.nxv4f64(
-  <vscale x 4 x double>,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfsqrt_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsqrt.nxv4f64(
-    <vscale x 4 x double> %0,
-    i32 %1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsqrt.mask.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64(<vscale x 4 x i1> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsqrt.mask.nxv4f64(
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %0,
-    i32 %3, i32 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsqrt.nxv8f64(
-  <vscale x 8 x double>,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfsqrt_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsqrt.v v8, v8
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsqrt.nxv8f64(
-    <vscale x 8 x double> %0,
-    i32 %1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt.ll
similarity index 80%
rename from llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfsqrt.ll
index c810a516f3b35..d944375d645cb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt.ll
@@ -1,22 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
   <vscale x 1 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsqrt_v_nxv1f16_nxv1f16(
+define <vscale x 1 x half> @intrinsic_vfsqrt_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 1 x half> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
     <vscale x 1 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 1 x half> %a
 }
@@ -25,45 +25,39 @@ declare <vscale x 1 x half> @llvm.riscv.vfsqrt.mask.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16(
+define <vscale x 1 x half> @intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 1 x half> %0,
-  <vscale x 1 x half> %1,
-  <vscale x 1 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsqrt.mask.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vfsqrt.nxv2f16(
   <vscale x 2 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsqrt_v_nxv2f16_nxv2f16(
+define <vscale x 2 x half> @intrinsic_vfsqrt_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 2 x half> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsqrt.nxv2f16(
     <vscale x 2 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 2 x half> %a
 }
@@ -72,45 +66,39 @@ declare <vscale x 2 x half> @llvm.riscv.vfsqrt.mask.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16(
+define <vscale x 2 x half> @intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 2 x half> %0,
-  <vscale x 2 x half> %1,
-  <vscale x 2 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsqrt.mask.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vfsqrt.nxv4f16(
   <vscale x 4 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsqrt_v_nxv4f16_nxv4f16(
+define <vscale x 4 x half> @intrinsic_vfsqrt_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 4 x half> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsqrt.nxv4f16(
     <vscale x 4 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 4 x half> %a
 }
@@ -119,45 +107,39 @@ declare <vscale x 4 x half> @llvm.riscv.vfsqrt.mask.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16(
+define <vscale x 4 x half> @intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 4 x half> %0,
-  <vscale x 4 x half> %1,
-  <vscale x 4 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsqrt.mask.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vfsqrt.nxv8f16(
   <vscale x 8 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsqrt_v_nxv8f16_nxv8f16(
+define <vscale x 8 x half> @intrinsic_vfsqrt_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 8 x half> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsqrt.nxv8f16(
     <vscale x 8 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 8 x half> %a
 }
@@ -166,45 +148,39 @@ declare <vscale x 8 x half> @llvm.riscv.vfsqrt.mask.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16(
+define <vscale x 8 x half> @intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v10, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 8 x half> %0,
-  <vscale x 8 x half> %1,
-  <vscale x 8 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsqrt.mask.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vfsqrt.nxv16f16(
   <vscale x 16 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsqrt_v_nxv16f16_nxv16f16(
+define <vscale x 16 x half> @intrinsic_vfsqrt_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 16 x half> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsqrt.nxv16f16(
     <vscale x 16 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 16 x half> %a
 }
@@ -213,45 +189,39 @@ declare <vscale x 16 x half> @llvm.riscv.vfsqrt.mask.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16(
+define <vscale x 16 x half> @intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v12, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 16 x half> %0,
-  <vscale x 16 x half> %1,
-  <vscale x 16 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsqrt.mask.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vfsqrt.nxv32f16(
   <vscale x 32 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsqrt_v_nxv32f16_nxv32f16(
+define <vscale x 32 x half> @intrinsic_vfsqrt_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 32 x half> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsqrt.nxv32f16(
     <vscale x 32 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 32 x half> %a
 }
@@ -260,45 +230,39 @@ declare <vscale x 32 x half> @llvm.riscv.vfsqrt.mask.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16(
+define <vscale x 32 x half> @intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v16, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 32 x half> %0,
-  <vscale x 32 x half> %1,
-  <vscale x 32 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsqrt.mask.nxv32f16(
     <vscale x 32 x half> %0,
     <vscale x 32 x half> %1,
     <vscale x 32 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vfsqrt.nxv1f32(
   <vscale x 1 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsqrt_v_nxv1f32_nxv1f32(
+define <vscale x 1 x float> @intrinsic_vfsqrt_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 1 x float> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsqrt.nxv1f32(
     <vscale x 1 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 1 x float> %a
 }
@@ -307,45 +271,39 @@ declare <vscale x 1 x float> @llvm.riscv.vfsqrt.mask.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32(
+define <vscale x 1 x float> @intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 1 x float> %0,
-  <vscale x 1 x float> %1,
-  <vscale x 1 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsqrt.mask.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(
   <vscale x 2 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsqrt_v_nxv2f32_nxv2f32(
+define <vscale x 2 x float> @intrinsic_vfsqrt_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 2 x float> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(
     <vscale x 2 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 2 x float> %a
 }
@@ -354,45 +312,39 @@ declare <vscale x 2 x float> @llvm.riscv.vfsqrt.mask.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32(
+define <vscale x 2 x float> @intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 2 x float> %0,
-  <vscale x 2 x float> %1,
-  <vscale x 2 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.mask.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(
   <vscale x 4 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsqrt_v_nxv4f32_nxv4f32(
+define <vscale x 4 x float> @intrinsic_vfsqrt_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 4 x float> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(
     <vscale x 4 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 4 x float> %a
 }
@@ -401,45 +353,39 @@ declare <vscale x 4 x float> @llvm.riscv.vfsqrt.mask.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32(
+define <vscale x 4 x float> @intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v10, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 4 x float> %0,
-  <vscale x 4 x float> %1,
-  <vscale x 4 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsqrt.mask.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vfsqrt.nxv8f32(
   <vscale x 8 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsqrt_v_nxv8f32_nxv8f32(
+define <vscale x 8 x float> @intrinsic_vfsqrt_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 8 x float> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsqrt.nxv8f32(
     <vscale x 8 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 8 x float> %a
 }
@@ -448,45 +394,39 @@ declare <vscale x 8 x float> @llvm.riscv.vfsqrt.mask.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32(
+define <vscale x 8 x float> @intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v12, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 8 x float> %0,
-  <vscale x 8 x float> %1,
-  <vscale x 8 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsqrt.mask.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vfsqrt.nxv16f32(
   <vscale x 16 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsqrt_v_nxv16f32_nxv16f32(
+define <vscale x 16 x float> @intrinsic_vfsqrt_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 16 x float> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsqrt.nxv16f32(
     <vscale x 16 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 16 x float> %a
 }
@@ -495,45 +435,39 @@ declare <vscale x 16 x float> @llvm.riscv.vfsqrt.mask.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32(
+define <vscale x 16 x float> @intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v16, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 16 x float> %0,
-  <vscale x 16 x float> %1,
-  <vscale x 16 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsqrt.mask.nxv16f32(
     <vscale x 16 x float> %0,
     <vscale x 16 x float> %1,
     <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vfsqrt.nxv1f64(
   <vscale x 1 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsqrt_v_nxv1f64_nxv1f64(
+define <vscale x 1 x double> @intrinsic_vfsqrt_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 1 x double> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsqrt.nxv1f64(
     <vscale x 1 x double> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 1 x double> %a
 }
@@ -542,45 +476,39 @@ declare <vscale x 1 x double> @llvm.riscv.vfsqrt.mask.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64(
+define <vscale x 1 x double> @intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v9, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 1 x double> %0,
-  <vscale x 1 x double> %1,
-  <vscale x 1 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsqrt.mask.nxv1f64(
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
     <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vfsqrt.nxv2f64(
   <vscale x 2 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsqrt_v_nxv2f64_nxv2f64(
+define <vscale x 2 x double> @intrinsic_vfsqrt_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 2 x double> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsqrt.nxv2f64(
     <vscale x 2 x double> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 2 x double> %a
 }
@@ -589,45 +517,39 @@ declare <vscale x 2 x double> @llvm.riscv.vfsqrt.mask.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64(
+define <vscale x 2 x double> @intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v10, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 2 x double> %0,
-  <vscale x 2 x double> %1,
-  <vscale x 2 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsqrt.mask.nxv2f64(
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
     <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vfsqrt.nxv4f64(
   <vscale x 4 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsqrt_v_nxv4f64_nxv4f64(
+define <vscale x 4 x double> @intrinsic_vfsqrt_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 4 x double> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsqrt.nxv4f64(
     <vscale x 4 x double> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 4 x double> %a
 }
@@ -636,45 +558,39 @@ declare <vscale x 4 x double> @llvm.riscv.vfsqrt.mask.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64(
+define <vscale x 4 x double> @intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v12, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 4 x double> %0,
-  <vscale x 4 x double> %1,
-  <vscale x 4 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsqrt.mask.nxv4f64(
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
     <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vfsqrt.nxv8f64(
   <vscale x 8 x double>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsqrt_v_nxv8f64_nxv8f64(
+define <vscale x 8 x double> @intrinsic_vfsqrt_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v8
 ; CHECK-NEXT:    ret
-  <vscale x 8 x double> %0,
-  i64 %1) nounwind {
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsqrt.nxv8f64(
     <vscale x 8 x double> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 8 x double> %a
 }
@@ -683,25 +599,21 @@ declare <vscale x 8 x double> @llvm.riscv.vfsqrt.mask.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsqrt_mask_v_nxv8f64_nxv8f64(
+define <vscale x 8 x double> @intrinsic_vfsqrt_mask_v_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
 ; CHECK-NEXT:    vfsqrt.v v8, v16, v0.t
 ; CHECK-NEXT:    ret
-  <vscale x 8 x double> %0,
-  <vscale x 8 x double> %1,
-  <vscale x 8 x i1> %2,
-  i64 %3) nounwind {
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsqrt.mask.nxv8f64(
     <vscale x 8 x double> %0,
     <vscale x 8 x double> %1,
     <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll
deleted file mode 100644
index 7445cfb806d43..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv64.ll
+++ /dev/null
@@ -1,1356 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v -mattr=+zfh \
-; RUN:   -mattr=+d -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfsub.mask.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsub_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1f16_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsub.mask.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsub.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsub_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsub.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsub.mask.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsub_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2f16_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsub.mask.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsub.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsub_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsub.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsub.mask.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsub_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4f16_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsub.mask.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsub.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsub_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsub.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsub.mask.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsub_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8f16_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsub.mask.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsub.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsub_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsub.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsub.mask.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsub_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv16f16_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsub.mask.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsub.nxv32f16.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsub_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv32f16_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsub.nxv32f16.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsub.mask.nxv32f16.nxv32f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsub_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv32f16_nxv32f16_nxv32f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re16.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e16, m8, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsub.mask.nxv32f16.nxv32f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 32 x half> %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsub.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsub_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsub.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsub.mask.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsub_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1f32_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsub.mask.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsub.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsub_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsub.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsub.mask.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsub_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2f32_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsub.mask.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsub_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsub.mask.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsub_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4f32_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsub.mask.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsub.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsub_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsub.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsub.mask.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsub_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8f32_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsub.mask.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsub.nxv16f32.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsub_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv16f32_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsub.nxv16f32.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsub.mask.nxv16f32.nxv16f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsub_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv16f32_nxv16f32_nxv16f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re32.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsub.mask.nxv16f32.nxv16f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x float> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsub_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsub.mask.nxv1f64.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsub_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1f64_nxv1f64_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsub.mask.nxv1f64.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsub.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsub_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsub.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsub.mask.nxv2f64.nxv2f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsub_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2f64_nxv2f64_nxv2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsub.mask.nxv2f64.nxv2f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsub.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsub_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsub.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsub.mask.nxv4f64.nxv4f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsub_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4f64_nxv4f64_nxv4f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsub.mask.nxv4f64.nxv4f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsub.nxv8f64.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsub_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vv_nxv8f64_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsub.nxv8f64.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsub.mask.nxv8f64.nxv8f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsub_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8f64_nxv8f64_nxv8f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl8re64.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsub.mask.nxv8f64.nxv8f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x double> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfsub.mask.nxv1f16.f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfsub.mask.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsub.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsub.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfsub.mask.nxv2f16.f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfsub.mask.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsub.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsub.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfsub.mask.nxv4f16.f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfsub.mask.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsub.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsub.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfsub.mask.nxv8f16.f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfsub.mask.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsub.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsub.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfsub.mask.nxv16f16.f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfsub.mask.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsub.nxv32f16.f16(
-  <vscale x 32 x half>,
-  half,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsub.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfsub.mask.nxv32f16.f16(
-  <vscale x 32 x half>,
-  <vscale x 32 x half>,
-  half,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfsub.mask.nxv32f16.f16(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x half> %1,
-    half %2,
-    <vscale x 32 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsub.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsub.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfsub.mask.nxv1f32.f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfsub.mask.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsub.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsub.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfsub.mask.nxv2f32.f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfsub.mask.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfsub.mask.nxv4f32.f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfsub.mask.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsub.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsub.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfsub.mask.nxv8f32.f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfsub.mask.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsub.nxv16f32.f32(
-  <vscale x 16 x float>,
-  float,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsub.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfsub.mask.nxv16f32.f32(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  float,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfsub.mask.nxv16f32.f32(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    float %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.f64(
-  <vscale x 1 x double>,
-  double,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfsub.mask.nxv1f64.f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  double,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfsub.mask.nxv1f64.f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    double %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsub.nxv2f64.f64(
-  <vscale x 2 x double>,
-  double,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsub.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfsub.mask.nxv2f64.f64(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  double,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfsub.mask.nxv2f64.f64(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    double %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsub.nxv4f64.f64(
-  <vscale x 4 x double>,
-  double,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsub.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfsub.mask.nxv4f64.f64(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  double,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfsub.mask.nxv4f64.f64(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    double %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsub.nxv8f64.f64(
-  <vscale x 8 x double>,
-  double,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsub.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    double %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfsub.mask.nxv8f64.f64(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  double,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsub.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfsub.mask.nxv8f64.f64(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    double %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfsub.ll
index 86371c1685fc0..645fb340ffa30 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsub.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfsub.mask.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsub_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsub_mask_vv_nxv1f16_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1f16_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfsub.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsub_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsub_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsub.nxv2f16.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfsub.mask.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsub_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsub_mask_vv_nxv2f16_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2f16_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfsub.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsub_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsub_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsub.nxv4f16.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfsub.mask.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsub_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsub_mask_vv_nxv4f16_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4f16_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfsub.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsub_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsub_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsub.nxv8f16.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfsub.mask.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsub_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsub_mask_vv_nxv8f16_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8f16_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfsub.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsub_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsub_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsub.nxv16f16.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfsub.mask.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsub_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsub_mask_vv_nxv16f16_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv16f16_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -221,7 +223,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -229,9 +231,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfsub.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsub_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsub_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -241,7 +243,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsub.nxv32f16.nxv32f16(
     <vscale x 32 x half> %0,
     <vscale x 32 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -251,10 +253,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfsub.mask.nxv32f16.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 32 x half>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsub_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsub_mask_vv_nxv32f16_nxv32f16_nxv32f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, <vscale x 32 x half> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv32f16_nxv32f16_nxv32f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re16.v v24, (a0)
@@ -267,7 +269,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 32 x half> %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -275,9 +277,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfsub.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsub_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsub_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -287,7 +289,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsub.nxv1f32.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -297,10 +299,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfsub.mask.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsub_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsub_mask_vv_nxv1f32_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1f32_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -312,7 +314,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -320,9 +322,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfsub.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsub_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsub_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -332,7 +334,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsub.nxv2f32.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -342,10 +344,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfsub.mask.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsub_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsub_mask_vv_nxv2f32_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2f32_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -357,7 +359,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -365,9 +367,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsub_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsub_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -377,7 +379,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -387,10 +389,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfsub.mask.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsub_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsub_mask_vv_nxv4f32_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4f32_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -402,7 +404,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -410,9 +412,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfsub.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsub_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsub_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -422,7 +424,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsub.nxv8f32.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -432,10 +434,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfsub.mask.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsub_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsub_mask_vv_nxv8f32_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8f32_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -447,7 +449,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -455,9 +457,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfsub.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsub_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsub_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -467,7 +469,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsub.nxv16f32.nxv16f32(
     <vscale x 16 x float> %0,
     <vscale x 16 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -477,10 +479,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfsub.mask.nxv16f32.nxv16f32(
   <vscale x 16 x float>,
   <vscale x 16 x float>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsub_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsub_mask_vv_nxv16f32_nxv16f32_nxv16f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv16f32_nxv16f32_nxv16f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re32.v v24, (a0)
@@ -493,7 +495,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 16 x float> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -501,9 +503,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsub_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsub_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -513,7 +515,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.nxv1f64(
     <vscale x 1 x double> %0,
     <vscale x 1 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -523,10 +525,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfsub.mask.nxv1f64.nxv1f64(
   <vscale x 1 x double>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsub_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsub_mask_vv_nxv1f64_nxv1f64_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv1f64_nxv1f64_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -538,7 +540,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -546,9 +548,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfsub.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsub_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsub_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -558,7 +560,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsub.nxv2f64.nxv2f64(
     <vscale x 2 x double> %0,
     <vscale x 2 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -568,10 +570,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfsub.mask.nxv2f64.nxv2f64(
   <vscale x 2 x double>,
   <vscale x 2 x double>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsub_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsub_mask_vv_nxv2f64_nxv2f64_nxv2f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv2f64_nxv2f64_nxv2f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -583,7 +585,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -591,9 +593,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfsub.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsub_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsub_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -603,7 +605,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsub.nxv4f64.nxv4f64(
     <vscale x 4 x double> %0,
     <vscale x 4 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -613,10 +615,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfsub.mask.nxv4f64.nxv4f64(
   <vscale x 4 x double>,
   <vscale x 4 x double>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsub_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsub_mask_vv_nxv4f64_nxv4f64_nxv4f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv4f64_nxv4f64_nxv4f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -628,7 +630,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -636,9 +638,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfsub.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsub_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsub_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -648,7 +650,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsub.nxv8f64.nxv8f64(
     <vscale x 8 x double> %0,
     <vscale x 8 x double> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -658,10 +660,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfsub.mask.nxv8f64.nxv8f64(
   <vscale x 8 x double>,
   <vscale x 8 x double>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsub_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsub_mask_vv_nxv8f64_nxv8f64_nxv8f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8f64_nxv8f64_nxv8f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl8re64.v v24, (a0)
@@ -674,7 +676,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 8 x double> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -682,9 +684,9 @@ entry:
 declare <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -694,7 +696,7 @@ entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfsub.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x half> %a
 }
@@ -704,10 +706,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfsub.mask.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x half> @intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f16_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -719,7 +721,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
@@ -727,9 +729,9 @@ entry:
 declare <vscale x 2 x half> @llvm.riscv.vfsub.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -739,7 +741,7 @@ entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfsub.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x half> %a
 }
@@ -749,10 +751,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfsub.mask.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x half> @intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f16_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -764,7 +766,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
@@ -772,9 +774,9 @@ entry:
 declare <vscale x 4 x half> @llvm.riscv.vfsub.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -784,7 +786,7 @@ entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfsub.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x half> %a
 }
@@ -794,10 +796,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfsub.mask.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x half> @intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f16_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -809,7 +811,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
@@ -817,9 +819,9 @@ entry:
 declare <vscale x 8 x half> @llvm.riscv.vfsub.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -829,7 +831,7 @@ entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfsub.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x half> %a
 }
@@ -839,10 +841,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfsub.mask.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x half> @intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f16_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -854,7 +856,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
@@ -862,9 +864,9 @@ entry:
 declare <vscale x 16 x half> @llvm.riscv.vfsub.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -874,7 +876,7 @@ entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfsub.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x half> %a
 }
@@ -884,10 +886,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfsub.mask.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x half> @intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f16_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -899,7 +901,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
@@ -907,9 +909,9 @@ entry:
 declare <vscale x 32 x half> @llvm.riscv.vfsub.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -919,7 +921,7 @@ entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfsub.nxv32f16.f16(
     <vscale x 32 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 32 x half> %a
 }
@@ -929,10 +931,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfsub.mask.nxv32f16.f16(
   <vscale x 32 x half>,
   half,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 32 x half> @intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16(<vscale x 32 x half> %0, <vscale x 32 x half> %1, half %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv32f16_nxv32f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, ta, mu
@@ -944,7 +946,7 @@ entry:
     <vscale x 32 x half> %1,
     half %2,
     <vscale x 32 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
@@ -952,9 +954,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfsub.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -964,7 +966,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfsub.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -974,10 +976,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfsub.mask.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f32_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -989,7 +991,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -997,9 +999,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfsub.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1009,7 +1011,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfsub.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -1019,10 +1021,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfsub.mask.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f32_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1034,7 +1036,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -1042,9 +1044,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1054,7 +1056,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfsub.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -1064,10 +1066,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfsub.mask.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1079,7 +1081,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -1087,9 +1089,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfsub.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1099,7 +1101,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfsub.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -1109,10 +1111,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfsub.mask.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f32_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1124,7 +1126,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -1132,9 +1134,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfsub.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1144,7 +1146,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfsub.nxv16f32.f32(
     <vscale x 16 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -1154,10 +1156,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfsub.mask.nxv16f32.f32(
   <vscale x 16 x float>,
   float,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32(<vscale x 16 x float> %0, <vscale x 16 x float> %1, float %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv16f32_nxv16f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, mu
@@ -1169,7 +1171,7 @@ entry:
     <vscale x 16 x float> %1,
     float %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -1177,9 +1179,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1189,7 +1191,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfsub.nxv1f64.f64(
     <vscale x 1 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -1199,10 +1201,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfsub.mask.nxv1f64.f64(
   <vscale x 1 x double>,
   double,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64(<vscale x 1 x double> %0, <vscale x 1 x double> %1, double %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv1f64_nxv1f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
@@ -1214,7 +1216,7 @@ entry:
     <vscale x 1 x double> %1,
     double %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -1222,9 +1224,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfsub.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1234,7 +1236,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfsub.nxv2f64.f64(
     <vscale x 2 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -1244,10 +1246,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfsub.mask.nxv2f64.f64(
   <vscale x 2 x double>,
   double,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64(<vscale x 2 x double> %0, <vscale x 2 x double> %1, double %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv2f64_nxv2f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
@@ -1259,7 +1261,7 @@ entry:
     <vscale x 2 x double> %1,
     double %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -1267,9 +1269,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfsub.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1279,7 +1281,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfsub.nxv4f64.f64(
     <vscale x 4 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -1289,10 +1291,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfsub.mask.nxv4f64.f64(
   <vscale x 4 x double>,
   double,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64(<vscale x 4 x double> %0, <vscale x 4 x double> %1, double %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv4f64_nxv4f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
@@ -1304,7 +1306,7 @@ entry:
     <vscale x 4 x double> %1,
     double %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -1312,9 +1314,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfsub.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, double %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1324,7 +1326,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfsub.nxv8f64.f64(
     <vscale x 8 x double> %0,
     double %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -1334,10 +1336,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfsub.mask.nxv8f64.f64(
   <vscale x 8 x double>,
   double,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64(<vscale x 8 x double> %0, <vscale x 8 x double> %1, double %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfsub_mask_vf_nxv8f64_nxv8f64_f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
@@ -1349,7 +1351,7 @@ entry:
     <vscale x 8 x double> %1,
     double %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll
deleted file mode 100644
index a04b9a54b9306..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv64.ll
+++ /dev/null
@@ -1,830 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.vv v10, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwadd_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwadd_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vv v10, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwadd_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vv v10, v8, v9
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwadd_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vv v12, v8, v10
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwadd_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vv v16, v8, v12
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwadd_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwadd.nxv1f64.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwadd_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vv v10, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.nxv1f64.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwadd_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vv v10, v8, v9
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwadd_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vv v12, v8, v10
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwadd_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vv v16, v8, v12
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwadd_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v9, v8, fa0
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v9, v8, fa0
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vf v10, v8, fa0
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v12, v8, fa0
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v16, v8, fa0
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwadd.nxv1f64.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v9, v8, fa0
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.nxv1f64.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vf v10, v8, fa0
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v12, v8, fa0
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v16, v8, fa0
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwadd.ll
index a3bdcc4573287..541f2b8564f28 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -17,7 +19,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -27,10 +29,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwadd_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwadd_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -50,9 +52,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwadd_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwadd_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -63,7 +65,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2f16.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -73,10 +75,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwadd_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwadd_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -96,9 +98,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -109,7 +111,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -119,10 +121,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwadd_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwadd_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -142,9 +144,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -155,7 +157,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -165,10 +167,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwadd_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwadd_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -188,9 +190,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -201,7 +203,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -211,10 +213,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.nxv16f16
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwadd_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwadd_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -234,9 +236,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfwadd.nxv1f64.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwadd_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwadd_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -247,7 +249,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.nxv1f64.nxv1f32.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -257,10 +259,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwadd_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwadd_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -280,9 +282,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -293,7 +295,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -303,10 +305,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwadd_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwadd_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -326,9 +328,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -339,7 +341,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -349,10 +351,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwadd_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwadd_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -372,9 +374,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -385,7 +387,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -395,10 +397,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwadd_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwadd_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -418,9 +420,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -431,7 +433,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.nxv1f32.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -441,10 +443,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfwadd.mask.nxv1f32.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -464,9 +466,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -477,7 +479,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.nxv2f32.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -487,10 +489,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfwadd.mask.nxv2f32.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -510,9 +512,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -523,7 +525,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.nxv4f32.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -533,10 +535,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.mask.nxv4f32.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -556,9 +558,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -569,7 +571,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.nxv8f32.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -579,10 +581,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.mask.nxv8f32.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -594,7 +596,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -602,9 +604,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -615,7 +617,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.nxv16f32.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -625,10 +627,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.mask.nxv16f32.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -640,7 +642,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -648,9 +650,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfwadd.nxv1f64.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -661,7 +663,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.nxv1f64.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -671,10 +673,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfwadd.mask.nxv1f64.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -686,7 +688,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -694,9 +696,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -707,7 +709,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.nxv2f64.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -717,10 +719,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.mask.nxv2f64.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -732,7 +734,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -740,9 +742,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -753,7 +755,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.nxv4f64.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -763,10 +765,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.mask.nxv4f64.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -778,7 +780,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -786,9 +788,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -799,7 +801,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.nxv8f64.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -809,10 +811,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.mask.nxv8f64.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd_mask_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -824,7 +826,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll
deleted file mode 100644
index 3586ec64e9dfe..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll
+++ /dev/null
@@ -1,1248 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl4re16.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wv_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv1f64_nxv1f64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wv_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv2f64_nxv2f64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wv_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv4f64_nxv4f64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wv_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv8f64_nxv8f64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl4re32.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.f16(
-  <vscale x 1 x float>,
-  half,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.f16(
-  <vscale x 2 x float>,
-  half,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.f16(
-  <vscale x 4 x float>,
-  half,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.f16(
-  <vscale x 8 x float>,
-  half,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.f16(
-  <vscale x 16 x float>,
-  half,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.f32(
-  <vscale x 1 x double>,
-  float,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.f32(
-  <vscale x 2 x double>,
-  float,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.f32(
-  <vscale x 4 x double>,
-  float,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.f32(
-  <vscale x 8 x double>,
-  float,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wv v8, v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %0,
-    half %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %0,
-    half %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %0,
-    half %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %0,
-    half %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %0,
-    half %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %0,
-    float %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %0,
-    float %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %0,
-    float %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %0,
-    float %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwadd.wv v10, v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %1,
-    <vscale x 1 x half> %0,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v10, v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %1,
-    <vscale x 2 x half> %0,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wv v12, v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %1,
-    <vscale x 4 x half> %0,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v16, v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %1,
-    <vscale x 8 x half> %0,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v10, v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %1,
-    <vscale x 1 x float> %0,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwadd.wv v12, v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %1,
-    <vscale x 2 x float> %0,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwadd.wv v16, v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %1,
-    <vscale x 4 x float> %0,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwadd.wv v24, v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v24
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %1,
-    <vscale x 8 x float> %0,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll
similarity index 88%
rename from llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll
index 3d046d2ba8057..28cdfbf621b3d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
     <vscale x 1 x float> %0,
     <vscale x 1 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv1f32_nxv1f32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
   <vscale x 2 x float>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
     <vscale x 2 x float> %0,
     <vscale x 2 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.nxv2f16(
   <vscale x 2 x float>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv2f32_nxv2f32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
   <vscale x 4 x float>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
     <vscale x 4 x float> %0,
     <vscale x 4 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.nxv4f16(
   <vscale x 4 x float>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv4f32_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
   <vscale x 8 x float>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
     <vscale x 8 x float> %0,
     <vscale x 8 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.nxv8f16(
   <vscale x 8 x float>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv8f32_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16(
   <vscale x 16 x float>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16(
     <vscale x 16 x float> %0,
     <vscale x 16 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16f16(
   <vscale x 16 x float>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl4re16.v v24, (a0)
@@ -222,7 +224,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -230,9 +232,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
   <vscale x 1 x double>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -242,7 +244,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
     <vscale x 1 x double> %0,
     <vscale x 1 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -252,10 +254,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.nxv1f32(
   <vscale x 1 x double>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wv_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wv_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv1f64_nxv1f64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -267,7 +269,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -275,9 +277,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
   <vscale x 2 x double>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -287,7 +289,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
     <vscale x 2 x double> %0,
     <vscale x 2 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -297,10 +299,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.nxv2f32(
   <vscale x 2 x double>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wv_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wv_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv2f64_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -312,7 +314,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -320,9 +322,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
   <vscale x 4 x double>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -332,7 +334,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
     <vscale x 4 x double> %0,
     <vscale x 4 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -342,10 +344,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.nxv4f32(
   <vscale x 4 x double>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wv_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wv_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv4f64_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -357,7 +359,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -365,9 +367,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
   <vscale x 8 x double>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -377,7 +379,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
     <vscale x 8 x double> %0,
     <vscale x 8 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -387,10 +389,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.nxv8f32(
   <vscale x 8 x double>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wv_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wv_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv8f64_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl4re32.v v24, (a0)
@@ -403,7 +405,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -411,9 +413,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.f16(
   <vscale x 1 x float>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -423,7 +425,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.f16(
     <vscale x 1 x float> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -433,10 +435,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfwadd.w.mask.nxv1f32.f16(
   <vscale x 1 x float>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -448,7 +450,7 @@ entry:
     <vscale x 1 x float> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -456,9 +458,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.f16(
   <vscale x 2 x float>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -468,7 +470,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.f16(
     <vscale x 2 x float> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -478,10 +480,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfwadd.w.mask.nxv2f32.f16(
   <vscale x 2 x float>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -493,7 +495,7 @@ entry:
     <vscale x 2 x float> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -501,9 +503,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.f16(
   <vscale x 4 x float>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -513,7 +515,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.f16(
     <vscale x 4 x float> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -523,10 +525,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfwadd.w.mask.nxv4f32.f16(
   <vscale x 4 x float>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -538,7 +540,7 @@ entry:
     <vscale x 4 x float> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -546,9 +548,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.f16(
   <vscale x 8 x float>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -558,7 +560,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.f16(
     <vscale x 8 x float> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -568,10 +570,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfwadd.w.mask.nxv8f32.f16(
   <vscale x 8 x float>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -583,7 +585,7 @@ entry:
     <vscale x 8 x float> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -591,9 +593,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.f16(
   <vscale x 16 x float>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -603,7 +605,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.f16(
     <vscale x 16 x float> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -613,10 +615,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfwadd.w.mask.nxv16f32.f16(
   <vscale x 16 x float>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -628,7 +630,7 @@ entry:
     <vscale x 16 x float> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -636,9 +638,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.f32(
   <vscale x 1 x double>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -648,7 +650,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.f32(
     <vscale x 1 x double> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -658,10 +660,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfwadd.w.mask.nxv1f64.f32(
   <vscale x 1 x double>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -673,7 +675,7 @@ entry:
     <vscale x 1 x double> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -681,9 +683,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.f32(
   <vscale x 2 x double>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -693,7 +695,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.f32(
     <vscale x 2 x double> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -703,10 +705,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfwadd.w.mask.nxv2f64.f32(
   <vscale x 2 x double>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -718,7 +720,7 @@ entry:
     <vscale x 2 x double> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -726,9 +728,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.f32(
   <vscale x 4 x double>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -738,7 +740,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.f32(
     <vscale x 4 x double> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -748,10 +750,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfwadd.w.mask.nxv4f64.f32(
   <vscale x 4 x double>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -763,7 +765,7 @@ entry:
     <vscale x 4 x double> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -771,9 +773,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.f32(
   <vscale x 8 x double>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wf_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -783,7 +785,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.f32(
     <vscale x 8 x double> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -793,10 +795,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfwadd.w.mask.nxv8f64.f32(
   <vscale x 8 x double>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -808,12 +810,12 @@ entry:
     <vscale x 8 x double> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
 
-define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -825,12 +827,12 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
-define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -842,12 +844,12 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
-define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -859,12 +861,12 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
-define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -876,12 +878,12 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
 
-define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -893,12 +895,12 @@ entry:
     <vscale x 16 x float> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
 
-define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -910,12 +912,12 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
 
-define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -927,12 +929,12 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
 
-define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -944,12 +946,12 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
 
-define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -961,12 +963,12 @@ entry:
     <vscale x 8 x double> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
 
-define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -978,12 +980,12 @@ entry:
     <vscale x 1 x float> %0,
     half %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
-define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -995,12 +997,12 @@ entry:
     <vscale x 2 x float> %0,
     half %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
-define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -1012,12 +1014,12 @@ entry:
     <vscale x 4 x float> %0,
     half %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
-define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -1029,12 +1031,12 @@ entry:
     <vscale x 8 x float> %0,
     half %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
 
-define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -1046,12 +1048,12 @@ entry:
     <vscale x 16 x float> %0,
     half %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
 
-define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -1063,12 +1065,12 @@ entry:
     <vscale x 1 x double> %0,
     float %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
 
-define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1080,12 +1082,12 @@ entry:
     <vscale x 2 x double> %0,
     float %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
 
-define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1097,12 +1099,12 @@ entry:
     <vscale x 4 x double> %0,
     float %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
 
-define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wf_tie_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1114,12 +1116,12 @@ entry:
     <vscale x 8 x double> %0,
     float %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
 
-define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -1130,12 +1132,12 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
     <vscale x 1 x float> %1,
     <vscale x 1 x half> %0,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
 
-define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -1146,12 +1148,12 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
     <vscale x 2 x float> %1,
     <vscale x 2 x half> %0,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
 
-define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -1162,12 +1164,12 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
     <vscale x 4 x float> %1,
     <vscale x 4 x half> %0,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
 
-define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -1178,12 +1180,12 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
     <vscale x 8 x float> %1,
     <vscale x 8 x half> %0,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
 
-define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -1194,12 +1196,12 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
     <vscale x 1 x double> %1,
     <vscale x 1 x float> %0,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
 
-define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1210,12 +1212,12 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
     <vscale x 2 x double> %1,
     <vscale x 2 x float> %0,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
 
-define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1226,12 +1228,12 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
     <vscale x 4 x double> %1,
     <vscale x 4 x float> %0,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
 
-define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1242,7 +1244,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
     <vscale x 8 x double> %1,
     <vscale x 8 x float> %0,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll
deleted file mode 100644
index e050090b7761c..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv64.ll
+++ /dev/null
@@ -1,380 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.f.v.nxv1f32.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwcvt_f.f.v_nxv1f32_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv1f32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwcvt.f.f.v.nxv1f32.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv1f32.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv1f32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwcvt.f.f.v.nxv2f32.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwcvt_f.f.v_nxv2f32_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv2f32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwcvt.f.f.v.nxv2f32.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv2f32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv4f32.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv4f32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwcvt.f.f.v.nxv8f32.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.f.v.nxv8f32.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv8f32.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv8f32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwcvt.f.f.v.nxv16f32.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.f.v.nxv16f32.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv16f32.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv16f32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwcvt.f.f.v.nxv1f64.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwcvt_f.f.v_nxv1f64_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv1f64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwcvt.f.f.v.nxv1f64.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwcvt.f.f.v.mask.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwcvt_mask_f.f.v_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv1f64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwcvt.f.f.v.mask.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwcvt.f.f.v.nxv2f64.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.f.v.nxv2f64.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwcvt.f.f.v.mask.nxv2f64.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwcvt_mask_f.f.v_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv2f64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.f.v.mask.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwcvt.f.f.v.nxv4f64.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.f.v.nxv4f64.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwcvt.f.f.v.mask.nxv4f64.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwcvt_mask_f.f.v_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv4f64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.f.v.mask.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwcvt.f.f.v.nxv8f64.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.f.v.nxv8f64.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwcvt.f.f.v.mask.nxv8f64.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwcvt_mask_f.f.v_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv8f64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.f.v.mask.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll
index 460488388b5ea..386fc9a4822ab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.f.v.nxv1f32.nxv1f16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwcvt_f.f.v_nxv1f32_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwcvt_f.f.v_nxv1f32_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv1f32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x float> @intrinsic_vfwcvt_f.f.v_nxv1f32_nxv1f16(<vscale x 1
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwcvt.f.f.v.nxv1f32.nxv1f16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x float> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv1f32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vfwcvt.f.f.v.nxv2f32.nxv2f16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwcvt_f.f.v_nxv2f32_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwcvt_f.f.v_nxv2f32_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv2f32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x float> @intrinsic_vfwcvt_f.f.v_nxv2f32_nxv2f16(<vscale x 2
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwcvt.f.f.v.nxv2f32.nxv2f16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x float> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv2f32.nxv2f16(
   <vscale x 2 x float>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv2f32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x float> @intrinsic_vfwcvt_f.f.v_nxv4f32_nxv4f16(<vscale x 4
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.nxv4f32.nxv4f16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x float> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv4f32.nxv4f16(
   <vscale x 4 x float>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vfwcvt.f.f.v.nxv8f32.nxv8f16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x float> @intrinsic_vfwcvt_f.f.v_nxv8f32_nxv8f16(<vscale x 8
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.f.v.nxv8f32.nxv8f16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x float> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv8f32.nxv8f16(
   <vscale x 8 x float>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vfwcvt.f.f.v.nxv16f32.nxv16f16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x float> @intrinsic_vfwcvt_f.f.v_nxv16f32_nxv16f16(<vscale x
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.f.v.nxv16f32.nxv16f16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x float> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfwcvt.f.f.v.mask.nxv16f32.nxv16f16(
   <vscale x 16 x float>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwcvt_mask_f.f.v_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv16f32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x float> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vfwcvt.f.f.v.nxv1f64.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwcvt_f.f.v_nxv1f64_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwcvt_f.f.v_nxv1f64_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv1f64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 1 x double> @intrinsic_vfwcvt_f.f.v_nxv1f64_nxv1f32(<vscale x 1
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwcvt.f.f.v.nxv1f64.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x double> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfwcvt.f.f.v.mask.nxv1f64.nxv1f32(
   <vscale x 1 x double>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwcvt_mask_f.f.v_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwcvt_mask_f.f.v_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv1f64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vfwcvt.f.f.v.nxv2f64.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 2 x double> @intrinsic_vfwcvt_f.f.v_nxv2f64_nxv2f32(<vscale x 2
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.f.v.nxv2f64.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x double> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfwcvt.f.f.v.mask.nxv2f64.nxv2f32(
   <vscale x 2 x double>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwcvt_mask_f.f.v_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwcvt_mask_f.f.v_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vfwcvt.f.f.v.nxv4f64.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 4 x double> @intrinsic_vfwcvt_f.f.v_nxv4f64_nxv4f32(<vscale x 4
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.f.v.nxv4f64.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x double> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfwcvt.f.f.v.mask.nxv4f64.nxv4f32(
   <vscale x 4 x double>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwcvt_mask_f.f.v_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwcvt_mask_f.f.v_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vfwcvt.f.f.v.nxv8f64.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 8 x double> @intrinsic_vfwcvt_f.f.v_nxv8f64_nxv8f32(<vscale x 8
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.f.v.nxv8f64.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x double> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfwcvt.f.f.v.mask.nxv8f64.nxv8f32(
   <vscale x 8 x double>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwcvt_mask_f.f.v_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwcvt_mask_f.f.v_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.f.v_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -374,7 +376,7 @@ entry:
     <vscale x 8 x double> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll
deleted file mode 100644
index e294cbe085f73..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv64.ll
+++ /dev/null
@@ -1,632 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfwcvt.f.x.v.nxv1f16.nxv1i8(
-  <vscale x 1 x i8>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfwcvt_f.x.v_nxv1f16_nxv1i8(<vscale x 1 x i8> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1f16_nxv1i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfwcvt.f.x.v.nxv1f16.nxv1i8(
-    <vscale x 1 x i8> %0,
-    i64 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f16.nxv1i8(
-  <vscale x 1 x half>,
-  <vscale x 1 x i8>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv1f16_nxv1i8(<vscale x 1 x half> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1f16_nxv1i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f16.nxv1i8(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x i8> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfwcvt.f.x.v.nxv2f16.nxv2i8(
-  <vscale x 2 x i8>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfwcvt_f.x.v_nxv2f16_nxv2i8(<vscale x 2 x i8> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f16_nxv2i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfwcvt.f.x.v.nxv2f16.nxv2i8(
-    <vscale x 2 x i8> %0,
-    i64 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f16.nxv2i8(
-  <vscale x 2 x half>,
-  <vscale x 2 x i8>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv2f16_nxv2i8(<vscale x 2 x half> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2f16_nxv2i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f16.nxv2i8(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x i8> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfwcvt.f.x.v.nxv4f16.nxv4i8(
-  <vscale x 4 x i8>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfwcvt_f.x.v_nxv4f16_nxv4i8(<vscale x 4 x i8> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f16_nxv4i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfwcvt.f.x.v.nxv4f16.nxv4i8(
-    <vscale x 4 x i8> %0,
-    i64 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f16.nxv4i8(
-  <vscale x 4 x half>,
-  <vscale x 4 x i8>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv4f16_nxv4i8(<vscale x 4 x half> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4f16_nxv4i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f16.nxv4i8(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x i8> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfwcvt.f.x.v.nxv8f16.nxv8i8(
-  <vscale x 8 x i8>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8(<vscale x 8 x i8> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfwcvt.f.x.v.nxv8f16.nxv8i8(
-    <vscale x 8 x i8> %0,
-    i64 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f16.nxv8i8(
-  <vscale x 8 x half>,
-  <vscale x 8 x i8>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv8f16_nxv8i8(<vscale x 8 x half> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8f16_nxv8i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f16.nxv8i8(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x i8> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfwcvt.f.x.v.nxv16f16.nxv16i8(
-  <vscale x 16 x i8>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8(<vscale x 16 x i8> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfwcvt.f.x.v.nxv16f16.nxv16i8(
-    <vscale x 16 x i8> %0,
-    i64 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16f16.nxv16i8(
-  <vscale x 16 x half>,
-  <vscale x 16 x i8>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv16f16_nxv16i8(<vscale x 16 x half> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv16f16_nxv16i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16f16.nxv16i8(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x i8> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfwcvt.f.x.v.nxv32f16.nxv32i8(
-  <vscale x 32 x i8>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8(<vscale x 32 x i8> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfwcvt.f.x.v.nxv32f16.nxv32i8(
-    <vscale x 32 x i8> %0,
-    i64 %1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv32f16.nxv32i8(
-  <vscale x 32 x half>,
-  <vscale x 32 x i8>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv32f16_nxv32i8(<vscale x 32 x half> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv32f16_nxv32i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv32f16.nxv32i8(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x i8> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.x.v.nxv1f32.nxv1i16(
-  <vscale x 1 x i16>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwcvt_f.x.v_nxv1f32_nxv1i16(<vscale x 1 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1f32_nxv1i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwcvt.f.x.v.nxv1f32.nxv1i16(
-    <vscale x 1 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f32.nxv1i16(
-  <vscale x 1 x float>,
-  <vscale x 1 x i16>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv1f32_nxv1i16(<vscale x 1 x float> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1f32_nxv1i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f32.nxv1i16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x i16> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwcvt.f.x.v.nxv2f32.nxv2i16(
-  <vscale x 2 x i16>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwcvt_f.x.v_nxv2f32_nxv2i16(<vscale x 2 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f32_nxv2i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwcvt.f.x.v.nxv2f32.nxv2i16(
-    <vscale x 2 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f32.nxv2i16(
-  <vscale x 2 x float>,
-  <vscale x 2 x i16>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv2f32_nxv2i16(<vscale x 2 x float> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2f32_nxv2i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f32.nxv2i16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x i16> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwcvt.f.x.v.nxv4f32.nxv4i16(
-  <vscale x 4 x i16>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16(<vscale x 4 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.x.v.nxv4f32.nxv4i16(
-    <vscale x 4 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f32.nxv4i16(
-  <vscale x 4 x float>,
-  <vscale x 4 x i16>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv4f32_nxv4i16(<vscale x 4 x float> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4f32_nxv4i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f32.nxv4i16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x i16> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwcvt.f.x.v.nxv8f32.nxv8i16(
-  <vscale x 8 x i16>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16(<vscale x 8 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.x.v.nxv8f32.nxv8i16(
-    <vscale x 8 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f32.nxv8i16(
-  <vscale x 8 x float>,
-  <vscale x 8 x i16>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv8f32_nxv8i16(<vscale x 8 x float> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8f32_nxv8i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f32.nxv8i16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x i16> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwcvt.f.x.v.nxv16f32.nxv16i16(
-  <vscale x 16 x i16>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16(<vscale x 16 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.x.v.nxv16f32.nxv16i16(
-    <vscale x 16 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16f32.nxv16i16(
-  <vscale x 16 x float>,
-  <vscale x 16 x i16>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv16f32_nxv16i16(<vscale x 16 x float> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv16f32_nxv16i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16f32.nxv16i16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x i16> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwcvt.f.x.v.nxv1f64.nxv1i32(
-  <vscale x 1 x i32>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwcvt_f.x.v_nxv1f64_nxv1i32(<vscale x 1 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1f64_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwcvt.f.x.v.nxv1f64.nxv1i32(
-    <vscale x 1 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f64.nxv1i32(
-  <vscale x 1 x double>,
-  <vscale x 1 x i32>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwcvt_mask_f.x.v_nxv1f64_nxv1i32(<vscale x 1 x double> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1f64_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f64.nxv1i32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x i32> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwcvt.f.x.v.nxv2f64.nxv2i32(
-  <vscale x 2 x i32>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32(<vscale x 2 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.x.v.nxv2f64.nxv2i32(
-    <vscale x 2 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f64.nxv2i32(
-  <vscale x 2 x double>,
-  <vscale x 2 x i32>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwcvt_mask_f.x.v_nxv2f64_nxv2i32(<vscale x 2 x double> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2f64_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f64.nxv2i32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x i32> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwcvt.f.x.v.nxv4f64.nxv4i32(
-  <vscale x 4 x i32>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32(<vscale x 4 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.x.v.nxv4f64.nxv4i32(
-    <vscale x 4 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f64.nxv4i32(
-  <vscale x 4 x double>,
-  <vscale x 4 x i32>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwcvt_mask_f.x.v_nxv4f64_nxv4i32(<vscale x 4 x double> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4f64_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f64.nxv4i32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x i32> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwcvt.f.x.v.nxv8f64.nxv8i32(
-  <vscale x 8 x i32>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32(<vscale x 8 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.x.v.nxv8f64.nxv8i32(
-    <vscale x 8 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f64.nxv8i32(
-  <vscale x 8 x double>,
-  <vscale x 8 x i32>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwcvt_mask_f.x.v_nxv8f64_nxv8i32(<vscale x 8 x double> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8f64_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.x.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f64.nxv8i32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x i32> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll
index 467b64c20fedd..ad4a3a4a5eb6a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-x.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfwcvt.f.x.v.nxv1f16.nxv1i8(
   <vscale x 1 x i8>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfwcvt_f.x.v_nxv1f16_nxv1i8(<vscale x 1 x i8> %0, i32 %1) nounwind {
+define <vscale x 1 x half> @intrinsic_vfwcvt_f.x.v_nxv1f16_nxv1i8(<vscale x 1 x i8> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1f16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x half> @intrinsic_vfwcvt_f.x.v_nxv1f16_nxv1i8(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfwcvt.f.x.v.nxv1f16.nxv1i8(
     <vscale x 1 x i8> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x half> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f16.nxv1i8(
   <vscale x 1 x half>,
   <vscale x 1 x i8>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv1f16_nxv1i8(<vscale x 1 x half> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv1f16_nxv1i8(<vscale x 1 x half> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1f16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x i8> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vfwcvt.f.x.v.nxv2f16.nxv2i8(
   <vscale x 2 x i8>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfwcvt_f.x.v_nxv2f16_nxv2i8(<vscale x 2 x i8> %0, i32 %1) nounwind {
+define <vscale x 2 x half> @intrinsic_vfwcvt_f.x.v_nxv2f16_nxv2i8(<vscale x 2 x i8> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x half> @intrinsic_vfwcvt_f.x.v_nxv2f16_nxv2i8(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfwcvt.f.x.v.nxv2f16.nxv2i8(
     <vscale x 2 x i8> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x half> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f16.nxv2i8(
   <vscale x 2 x half>,
   <vscale x 2 x i8>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv2f16_nxv2i8(<vscale x 2 x half> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv2f16_nxv2i8(<vscale x 2 x half> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2f16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x i8> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vfwcvt.f.x.v.nxv4f16.nxv4i8(
   <vscale x 4 x i8>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfwcvt_f.x.v_nxv4f16_nxv4i8(<vscale x 4 x i8> %0, i32 %1) nounwind {
+define <vscale x 4 x half> @intrinsic_vfwcvt_f.x.v_nxv4f16_nxv4i8(<vscale x 4 x i8> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x half> @intrinsic_vfwcvt_f.x.v_nxv4f16_nxv4i8(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfwcvt.f.x.v.nxv4f16.nxv4i8(
     <vscale x 4 x i8> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x half> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f16.nxv4i8(
   <vscale x 4 x half>,
   <vscale x 4 x i8>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv4f16_nxv4i8(<vscale x 4 x half> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv4f16_nxv4i8(<vscale x 4 x half> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4f16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x i8> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vfwcvt.f.x.v.nxv8f16.nxv8i8(
   <vscale x 8 x i8>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8(<vscale x 8 x i8> %0, i32 %1) nounwind {
+define <vscale x 8 x half> @intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8(<vscale x 8 x i8> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x half> @intrinsic_vfwcvt_f.x.v_nxv8f16_nxv8i8(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfwcvt.f.x.v.nxv8f16.nxv8i8(
     <vscale x 8 x i8> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x half> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f16.nxv8i8(
   <vscale x 8 x half>,
   <vscale x 8 x i8>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv8f16_nxv8i8(<vscale x 8 x half> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv8f16_nxv8i8(<vscale x 8 x half> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x i8> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vfwcvt.f.x.v.nxv16f16.nxv16i8(
   <vscale x 16 x i8>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8(<vscale x 16 x i8> %0, i32 %1) nounwind {
+define <vscale x 16 x half> @intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8(<vscale x 16 x i8> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x half> @intrinsic_vfwcvt_f.x.v_nxv16f16_nxv16i8(<vscale x 1
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfwcvt.f.x.v.nxv16f16.nxv16i8(
     <vscale x 16 x i8> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x half> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16f16.nxv16i8(
   <vscale x 16 x half>,
   <vscale x 16 x i8>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv16f16_nxv16i8(<vscale x 16 x half> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv16f16_nxv16i8(<vscale x 16 x half> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x i8> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vfwcvt.f.x.v.nxv32f16.nxv32i8(
   <vscale x 32 x i8>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8(<vscale x 32 x i8> %0, i32 %1) nounwind {
+define <vscale x 32 x half> @intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8(<vscale x 32 x i8> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 32 x half> @intrinsic_vfwcvt_f.x.v_nxv32f16_nxv32i8(<vscale x 3
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfwcvt.f.x.v.nxv32f16.nxv32i8(
     <vscale x 32 x i8> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x half> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfwcvt.f.x.v.mask.nxv32f16.nxv32i8(
   <vscale x 32 x half>,
   <vscale x 32 x i8>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv32f16_nxv32i8(<vscale x 32 x half> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x half> @intrinsic_vfwcvt_mask_f.x.v_nxv32f16_nxv32i8(<vscale x 32 x half> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 32 x half> %0,
     <vscale x 32 x i8> %1,
     <vscale x 32 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.x.v.nxv1f32.nxv1i16(
   <vscale x 1 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwcvt_f.x.v_nxv1f32_nxv1i16(<vscale x 1 x i16> %0, i32 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwcvt_f.x.v_nxv1f32_nxv1i16(<vscale x 1 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1f32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 1 x float> @intrinsic_vfwcvt_f.x.v_nxv1f32_nxv1i16(<vscale x 1
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwcvt.f.x.v.nxv1f32.nxv1i16(
     <vscale x 1 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x float> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f32.nxv1i16(
   <vscale x 1 x float>,
   <vscale x 1 x i16>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv1f32_nxv1i16(<vscale x 1 x float> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv1f32_nxv1i16(<vscale x 1 x float> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1f32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x i16> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vfwcvt.f.x.v.nxv2f32.nxv2i16(
   <vscale x 2 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwcvt_f.x.v_nxv2f32_nxv2i16(<vscale x 2 x i16> %0, i32 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwcvt_f.x.v_nxv2f32_nxv2i16(<vscale x 2 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 2 x float> @intrinsic_vfwcvt_f.x.v_nxv2f32_nxv2i16(<vscale x 2
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwcvt.f.x.v.nxv2f32.nxv2i16(
     <vscale x 2 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x float> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f32.nxv2i16(
   <vscale x 2 x float>,
   <vscale x 2 x i16>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv2f32_nxv2i16(<vscale x 2 x float> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv2f32_nxv2i16(<vscale x 2 x float> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2f32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x i16> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vfwcvt.f.x.v.nxv4f32.nxv4i16(
   <vscale x 4 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16(<vscale x 4 x i16> %0, i32 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16(<vscale x 4 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 4 x float> @intrinsic_vfwcvt_f.x.v_nxv4f32_nxv4i16(<vscale x 4
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.x.v.nxv4f32.nxv4i16(
     <vscale x 4 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x float> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f32.nxv4i16(
   <vscale x 4 x float>,
   <vscale x 4 x i16>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv4f32_nxv4i16(<vscale x 4 x float> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv4f32_nxv4i16(<vscale x 4 x float> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -374,16 +376,16 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x i16> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vfwcvt.f.x.v.nxv8f32.nxv8i16(
   <vscale x 8 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16(<vscale x 8 x i16> %0, i32 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16(<vscale x 8 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -393,7 +395,7 @@ define <vscale x 8 x float> @intrinsic_vfwcvt_f.x.v_nxv8f32_nxv8i16(<vscale x 8
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.x.v.nxv8f32.nxv8i16(
     <vscale x 8 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x float> %a
 }
@@ -402,10 +404,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f32.nxv8i16(
   <vscale x 8 x float>,
   <vscale x 8 x i16>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv8f32_nxv8i16(<vscale x 8 x float> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv8f32_nxv8i16(<vscale x 8 x float> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -416,16 +418,16 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x i16> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vfwcvt.f.x.v.nxv16f32.nxv16i16(
   <vscale x 16 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16(<vscale x 16 x i16> %0, i32 %1) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16(<vscale x 16 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -435,7 +437,7 @@ define <vscale x 16 x float> @intrinsic_vfwcvt_f.x.v_nxv16f32_nxv16i16(<vscale x
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.x.v.nxv16f32.nxv16i16(
     <vscale x 16 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x float> %a
 }
@@ -444,10 +446,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfwcvt.f.x.v.mask.nxv16f32.nxv16i16(
   <vscale x 16 x float>,
   <vscale x 16 x i16>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv16f32_nxv16i16(<vscale x 16 x float> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwcvt_mask_f.x.v_nxv16f32_nxv16i16(<vscale x 16 x float> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -458,16 +460,16 @@ entry:
     <vscale x 16 x float> %0,
     <vscale x 16 x i16> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vfwcvt.f.x.v.nxv1f64.nxv1i32(
   <vscale x 1 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwcvt_f.x.v_nxv1f64_nxv1i32(<vscale x 1 x i32> %0, i32 %1) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwcvt_f.x.v_nxv1f64_nxv1i32(<vscale x 1 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv1f64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -477,7 +479,7 @@ define <vscale x 1 x double> @intrinsic_vfwcvt_f.x.v_nxv1f64_nxv1i32(<vscale x 1
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwcvt.f.x.v.nxv1f64.nxv1i32(
     <vscale x 1 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x double> %a
 }
@@ -486,10 +488,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfwcvt.f.x.v.mask.nxv1f64.nxv1i32(
   <vscale x 1 x double>,
   <vscale x 1 x i32>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwcvt_mask_f.x.v_nxv1f64_nxv1i32(<vscale x 1 x double> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwcvt_mask_f.x.v_nxv1f64_nxv1i32(<vscale x 1 x double> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv1f64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -500,16 +502,16 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x i32> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vfwcvt.f.x.v.nxv2f64.nxv2i32(
   <vscale x 2 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32(<vscale x 2 x i32> %0, i32 %1) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32(<vscale x 2 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -519,7 +521,7 @@ define <vscale x 2 x double> @intrinsic_vfwcvt_f.x.v_nxv2f64_nxv2i32(<vscale x 2
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.x.v.nxv2f64.nxv2i32(
     <vscale x 2 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x double> %a
 }
@@ -528,10 +530,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfwcvt.f.x.v.mask.nxv2f64.nxv2i32(
   <vscale x 2 x double>,
   <vscale x 2 x i32>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwcvt_mask_f.x.v_nxv2f64_nxv2i32(<vscale x 2 x double> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwcvt_mask_f.x.v_nxv2f64_nxv2i32(<vscale x 2 x double> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -542,16 +544,16 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x i32> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vfwcvt.f.x.v.nxv4f64.nxv4i32(
   <vscale x 4 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32(<vscale x 4 x i32> %0, i32 %1) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32(<vscale x 4 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -561,7 +563,7 @@ define <vscale x 4 x double> @intrinsic_vfwcvt_f.x.v_nxv4f64_nxv4i32(<vscale x 4
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.x.v.nxv4f64.nxv4i32(
     <vscale x 4 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x double> %a
 }
@@ -570,10 +572,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfwcvt.f.x.v.mask.nxv4f64.nxv4i32(
   <vscale x 4 x double>,
   <vscale x 4 x i32>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwcvt_mask_f.x.v_nxv4f64_nxv4i32(<vscale x 4 x double> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwcvt_mask_f.x.v_nxv4f64_nxv4i32(<vscale x 4 x double> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -584,16 +586,16 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x i32> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vfwcvt.f.x.v.nxv8f64.nxv8i32(
   <vscale x 8 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32(<vscale x 8 x i32> %0, i32 %1) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32(<vscale x 8 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -603,7 +605,7 @@ define <vscale x 8 x double> @intrinsic_vfwcvt_f.x.v_nxv8f64_nxv8i32(<vscale x 8
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.x.v.nxv8f64.nxv8i32(
     <vscale x 8 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x double> %a
 }
@@ -612,10 +614,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfwcvt.f.x.v.mask.nxv8f64.nxv8i32(
   <vscale x 8 x double>,
   <vscale x 8 x i32>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwcvt_mask_f.x.v_nxv8f64_nxv8i32(<vscale x 8 x double> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwcvt_mask_f.x.v_nxv8f64_nxv8i32(<vscale x 8 x double> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.x.v_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -626,7 +628,7 @@ entry:
     <vscale x 8 x double> %0,
     <vscale x 8 x i32> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll
deleted file mode 100644
index 107813b7879bd..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv64.ll
+++ /dev/null
@@ -1,632 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv1f16.nxv1i8(
-  <vscale x 1 x i8>,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfwcvt_f.xu.v_nxv1f16_nxv1i8(<vscale x 1 x i8> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1f16_nxv1i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv1f16.nxv1i8(
-    <vscale x 1 x i8> %0,
-    i64 %1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 1 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f16.nxv1i8(
-  <vscale x 1 x half>,
-  <vscale x 1 x i8>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv1f16_nxv1i8(<vscale x 1 x half> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1f16_nxv1i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f16.nxv1i8(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x i8> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv2f16.nxv2i8(
-  <vscale x 2 x i8>,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfwcvt_f.xu.v_nxv2f16_nxv2i8(<vscale x 2 x i8> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f16_nxv2i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv2f16.nxv2i8(
-    <vscale x 2 x i8> %0,
-    i64 %1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 2 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f16.nxv2i8(
-  <vscale x 2 x half>,
-  <vscale x 2 x i8>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv2f16_nxv2i8(<vscale x 2 x half> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2f16_nxv2i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f16.nxv2i8(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x i8> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv4f16.nxv4i8(
-  <vscale x 4 x i8>,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfwcvt_f.xu.v_nxv4f16_nxv4i8(<vscale x 4 x i8> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f16_nxv4i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv4f16.nxv4i8(
-    <vscale x 4 x i8> %0,
-    i64 %1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 4 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f16.nxv4i8(
-  <vscale x 4 x half>,
-  <vscale x 4 x i8>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv4f16_nxv4i8(<vscale x 4 x half> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4f16_nxv4i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f16.nxv4i8(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x i8> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv8f16.nxv8i8(
-  <vscale x 8 x i8>,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8(<vscale x 8 x i8> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv8f16.nxv8i8(
-    <vscale x 8 x i8> %0,
-    i64 %1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 8 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f16.nxv8i8(
-  <vscale x 8 x half>,
-  <vscale x 8 x i8>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv8f16_nxv8i8(<vscale x 8 x half> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8f16_nxv8i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f16.nxv8i8(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x i8> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv16f16.nxv16i8(
-  <vscale x 16 x i8>,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8(<vscale x 16 x i8> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv16f16.nxv16i8(
-    <vscale x 16 x i8> %0,
-    i64 %1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 16 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16f16.nxv16i8(
-  <vscale x 16 x half>,
-  <vscale x 16 x i8>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv16f16_nxv16i8(<vscale x 16 x half> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv16f16_nxv16i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16f16.nxv16i8(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x i8> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv32f16.nxv32i8(
-  <vscale x 32 x i8>,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8(<vscale x 32 x i8> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv32f16.nxv32i8(
-    <vscale x 32 x i8> %0,
-    i64 %1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 32 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32f16.nxv32i8(
-  <vscale x 32 x half>,
-  <vscale x 32 x i8>,
-  <vscale x 32 x i1>,
-  i64,
-  i64);
-
-define <vscale x 32 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv32f16_nxv32i8(<vscale x 32 x half> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv32f16_nxv32i8:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 32 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32f16.nxv32i8(
-    <vscale x 32 x half> %0,
-    <vscale x 32 x i8> %1,
-    <vscale x 32 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 32 x half> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv1f32.nxv1i16(
-  <vscale x 1 x i16>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwcvt_f.xu.v_nxv1f32_nxv1i16(<vscale x 1 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1f32_nxv1i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv1f32.nxv1i16(
-    <vscale x 1 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f32.nxv1i16(
-  <vscale x 1 x float>,
-  <vscale x 1 x i16>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv1f32_nxv1i16(<vscale x 1 x float> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1f32_nxv1i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f32.nxv1i16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x i16> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv2f32.nxv2i16(
-  <vscale x 2 x i16>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwcvt_f.xu.v_nxv2f32_nxv2i16(<vscale x 2 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f32_nxv2i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv2f32.nxv2i16(
-    <vscale x 2 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f32.nxv2i16(
-  <vscale x 2 x float>,
-  <vscale x 2 x i16>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv2f32_nxv2i16(<vscale x 2 x float> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2f32_nxv2i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f32.nxv2i16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x i16> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv4f32.nxv4i16(
-  <vscale x 4 x i16>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16(<vscale x 4 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv4f32.nxv4i16(
-    <vscale x 4 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f32.nxv4i16(
-  <vscale x 4 x float>,
-  <vscale x 4 x i16>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv4f32_nxv4i16(<vscale x 4 x float> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4f32_nxv4i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f32.nxv4i16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x i16> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv8f32.nxv8i16(
-  <vscale x 8 x i16>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16(<vscale x 8 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv8f32.nxv8i16(
-    <vscale x 8 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f32.nxv8i16(
-  <vscale x 8 x float>,
-  <vscale x 8 x i16>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv8f32_nxv8i16(<vscale x 8 x float> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8f32_nxv8i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f32.nxv8i16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x i16> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv16f32.nxv16i16(
-  <vscale x 16 x i16>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16(<vscale x 16 x i16> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv16f32.nxv16i16(
-    <vscale x 16 x i16> %0,
-    i64 %1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16f32.nxv16i16(
-  <vscale x 16 x float>,
-  <vscale x 16 x i16>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv16f32_nxv16i16(<vscale x 16 x float> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv16f32_nxv16i16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16f32.nxv16i16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x i16> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv1f64.nxv1i32(
-  <vscale x 1 x i32>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwcvt_f.xu.v_nxv1f64_nxv1i32(<vscale x 1 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1f64_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv1f64.nxv1i32(
-    <vscale x 1 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f64.nxv1i32(
-  <vscale x 1 x double>,
-  <vscale x 1 x i32>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwcvt_mask_f.xu.v_nxv1f64_nxv1i32(<vscale x 1 x double> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1f64_nxv1i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f64.nxv1i32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x i32> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv2f64.nxv2i32(
-  <vscale x 2 x i32>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32(<vscale x 2 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv2f64.nxv2i32(
-    <vscale x 2 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f64.nxv2i32(
-  <vscale x 2 x double>,
-  <vscale x 2 x i32>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwcvt_mask_f.xu.v_nxv2f64_nxv2i32(<vscale x 2 x double> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2f64_nxv2i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f64.nxv2i32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x i32> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv4f64.nxv4i32(
-  <vscale x 4 x i32>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32(<vscale x 4 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv4f64.nxv4i32(
-    <vscale x 4 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f64.nxv4i32(
-  <vscale x 4 x double>,
-  <vscale x 4 x i32>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwcvt_mask_f.xu.v_nxv4f64_nxv4i32(<vscale x 4 x double> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4f64_nxv4i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f64.nxv4i32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x i32> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv8f64.nxv8i32(
-  <vscale x 8 x i32>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32(<vscale x 8 x i32> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv8f64.nxv8i32(
-    <vscale x 8 x i32> %0,
-    i64 %1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f64.nxv8i32(
-  <vscale x 8 x double>,
-  <vscale x 8 x i32>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwcvt_mask_f.xu.v_nxv8f64_nxv8i32(<vscale x 8 x double> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8f64_nxv8i32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.f.xu.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f64.nxv8i32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x i32> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll
index fc0af066c39ec..9eef34d4de1e1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-f-xu.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv1f16.nxv1i8(
   <vscale x 1 x i8>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfwcvt_f.xu.v_nxv1f16_nxv1i8(<vscale x 1 x i8> %0, i32 %1) nounwind {
+define <vscale x 1 x half> @intrinsic_vfwcvt_f.xu.v_nxv1f16_nxv1i8(<vscale x 1 x i8> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1f16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x half> @intrinsic_vfwcvt_f.xu.v_nxv1f16_nxv1i8(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv1f16.nxv1i8(
     <vscale x 1 x i8> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x half> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f16.nxv1i8(
   <vscale x 1 x half>,
   <vscale x 1 x i8>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv1f16_nxv1i8(<vscale x 1 x half> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv1f16_nxv1i8(<vscale x 1 x half> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1f16_nxv1i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x half> %0,
     <vscale x 1 x i8> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x half> %a
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv2f16.nxv2i8(
   <vscale x 2 x i8>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfwcvt_f.xu.v_nxv2f16_nxv2i8(<vscale x 2 x i8> %0, i32 %1) nounwind {
+define <vscale x 2 x half> @intrinsic_vfwcvt_f.xu.v_nxv2f16_nxv2i8(<vscale x 2 x i8> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x half> @intrinsic_vfwcvt_f.xu.v_nxv2f16_nxv2i8(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv2f16.nxv2i8(
     <vscale x 2 x i8> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x half> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f16.nxv2i8(
   <vscale x 2 x half>,
   <vscale x 2 x i8>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv2f16_nxv2i8(<vscale x 2 x half> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv2f16_nxv2i8(<vscale x 2 x half> %0, <vscale x 2 x i8> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2f16_nxv2i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x half> %0,
     <vscale x 2 x i8> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x half> %a
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv4f16.nxv4i8(
   <vscale x 4 x i8>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfwcvt_f.xu.v_nxv4f16_nxv4i8(<vscale x 4 x i8> %0, i32 %1) nounwind {
+define <vscale x 4 x half> @intrinsic_vfwcvt_f.xu.v_nxv4f16_nxv4i8(<vscale x 4 x i8> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x half> @intrinsic_vfwcvt_f.xu.v_nxv4f16_nxv4i8(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv4f16.nxv4i8(
     <vscale x 4 x i8> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x half> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f16.nxv4i8(
   <vscale x 4 x half>,
   <vscale x 4 x i8>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv4f16_nxv4i8(<vscale x 4 x half> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv4f16_nxv4i8(<vscale x 4 x half> %0, <vscale x 4 x i8> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4f16_nxv4i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x half> %0,
     <vscale x 4 x i8> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x half> %a
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv8f16.nxv8i8(
   <vscale x 8 x i8>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8(<vscale x 8 x i8> %0, i32 %1) nounwind {
+define <vscale x 8 x half> @intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8(<vscale x 8 x i8> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x half> @intrinsic_vfwcvt_f.xu.v_nxv8f16_nxv8i8(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv8f16.nxv8i8(
     <vscale x 8 x i8> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x half> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f16.nxv8i8(
   <vscale x 8 x half>,
   <vscale x 8 x i8>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv8f16_nxv8i8(<vscale x 8 x half> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv8f16_nxv8i8(<vscale x 8 x half> %0, <vscale x 8 x i8> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8f16_nxv8i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m1, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x half> %0,
     <vscale x 8 x i8> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x half> %a
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv16f16.nxv16i8(
   <vscale x 16 x i8>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8(<vscale x 16 x i8> %0, i32 %1) nounwind {
+define <vscale x 16 x half> @intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8(<vscale x 16 x i8> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x half> @intrinsic_vfwcvt_f.xu.v_nxv16f16_nxv16i8(<vscale x
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv16f16.nxv16i8(
     <vscale x 16 x i8> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x half> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16f16.nxv16i8(
   <vscale x 16 x half>,
   <vscale x 16 x i8>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv16f16_nxv16i8(<vscale x 16 x half> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv16f16_nxv16i8(<vscale x 16 x half> %0, <vscale x 16 x i8> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv16f16_nxv16i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x half> %0,
     <vscale x 16 x i8> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x half> %a
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv32f16.nxv32i8(
   <vscale x 32 x i8>,
-  i32);
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8(<vscale x 32 x i8> %0, i32 %1) nounwind {
+define <vscale x 32 x half> @intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8(<vscale x 32 x i8> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 32 x half> @intrinsic_vfwcvt_f.xu.v_nxv32f16_nxv32i8(<vscale x
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vfwcvt.f.xu.v.nxv32f16.nxv32i8(
     <vscale x 32 x i8> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 32 x half> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 32 x half> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv32f16.nxv32i8(
   <vscale x 32 x half>,
   <vscale x 32 x i8>,
   <vscale x 32 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 32 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv32f16_nxv32i8(<vscale x 32 x half> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, i32 %3) nounwind {
+define <vscale x 32 x half> @intrinsic_vfwcvt_mask_f.xu.v_nxv32f16_nxv32i8(<vscale x 32 x half> %0, <vscale x 32 x i8> %1, <vscale x 32 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv32f16_nxv32i8:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 32 x half> %0,
     <vscale x 32 x i8> %1,
     <vscale x 32 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 32 x half> %a
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv1f32.nxv1i16(
   <vscale x 1 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwcvt_f.xu.v_nxv1f32_nxv1i16(<vscale x 1 x i16> %0, i32 %1) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwcvt_f.xu.v_nxv1f32_nxv1i16(<vscale x 1 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1f32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 1 x float> @intrinsic_vfwcvt_f.xu.v_nxv1f32_nxv1i16(<vscale x 1
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv1f32.nxv1i16(
     <vscale x 1 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x float> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f32.nxv1i16(
   <vscale x 1 x float>,
   <vscale x 1 x i16>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv1f32_nxv1i16(<vscale x 1 x float> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv1f32_nxv1i16(<vscale x 1 x float> %0, <vscale x 1 x i16> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1f32_nxv1i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x i16> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv2f32.nxv2i16(
   <vscale x 2 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwcvt_f.xu.v_nxv2f32_nxv2i16(<vscale x 2 x i16> %0, i32 %1) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwcvt_f.xu.v_nxv2f32_nxv2i16(<vscale x 2 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 2 x float> @intrinsic_vfwcvt_f.xu.v_nxv2f32_nxv2i16(<vscale x 2
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv2f32.nxv2i16(
     <vscale x 2 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x float> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f32.nxv2i16(
   <vscale x 2 x float>,
   <vscale x 2 x i16>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv2f32_nxv2i16(<vscale x 2 x float> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv2f32_nxv2i16(<vscale x 2 x float> %0, <vscale x 2 x i16> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2f32_nxv2i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x i16> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv4f32.nxv4i16(
   <vscale x 4 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16(<vscale x 4 x i16> %0, i32 %1) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16(<vscale x 4 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 4 x float> @intrinsic_vfwcvt_f.xu.v_nxv4f32_nxv4i16(<vscale x 4
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv4f32.nxv4i16(
     <vscale x 4 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x float> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f32.nxv4i16(
   <vscale x 4 x float>,
   <vscale x 4 x i16>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv4f32_nxv4i16(<vscale x 4 x float> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv4f32_nxv4i16(<vscale x 4 x float> %0, <vscale x 4 x i16> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4f32_nxv4i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -374,16 +376,16 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x i16> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv8f32.nxv8i16(
   <vscale x 8 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16(<vscale x 8 x i16> %0, i32 %1) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16(<vscale x 8 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -393,7 +395,7 @@ define <vscale x 8 x float> @intrinsic_vfwcvt_f.xu.v_nxv8f32_nxv8i16(<vscale x 8
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv8f32.nxv8i16(
     <vscale x 8 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x float> %a
 }
@@ -402,10 +404,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f32.nxv8i16(
   <vscale x 8 x float>,
   <vscale x 8 x i16>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv8f32_nxv8i16(<vscale x 8 x float> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv8f32_nxv8i16(<vscale x 8 x float> %0, <vscale x 8 x i16> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8f32_nxv8i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -416,16 +418,16 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x i16> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv16f32.nxv16i16(
   <vscale x 16 x i16>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16(<vscale x 16 x i16> %0, i32 %1) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16(<vscale x 16 x i16> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -435,7 +437,7 @@ define <vscale x 16 x float> @intrinsic_vfwcvt_f.xu.v_nxv16f32_nxv16i16(<vscale
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwcvt.f.xu.v.nxv16f32.nxv16i16(
     <vscale x 16 x i16> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x float> %a
 }
@@ -444,10 +446,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv16f32.nxv16i16(
   <vscale x 16 x float>,
   <vscale x 16 x i16>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv16f32_nxv16i16(<vscale x 16 x float> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwcvt_mask_f.xu.v_nxv16f32_nxv16i16(<vscale x 16 x float> %0, <vscale x 16 x i16> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv16f32_nxv16i16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -458,16 +460,16 @@ entry:
     <vscale x 16 x float> %0,
     <vscale x 16 x i16> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv1f64.nxv1i32(
   <vscale x 1 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwcvt_f.xu.v_nxv1f64_nxv1i32(<vscale x 1 x i32> %0, i32 %1) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwcvt_f.xu.v_nxv1f64_nxv1i32(<vscale x 1 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv1f64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -477,7 +479,7 @@ define <vscale x 1 x double> @intrinsic_vfwcvt_f.xu.v_nxv1f64_nxv1i32(<vscale x
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv1f64.nxv1i32(
     <vscale x 1 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x double> %a
 }
@@ -486,10 +488,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv1f64.nxv1i32(
   <vscale x 1 x double>,
   <vscale x 1 x i32>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwcvt_mask_f.xu.v_nxv1f64_nxv1i32(<vscale x 1 x double> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwcvt_mask_f.xu.v_nxv1f64_nxv1i32(<vscale x 1 x double> %0, <vscale x 1 x i32> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv1f64_nxv1i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -500,16 +502,16 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x i32> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv2f64.nxv2i32(
   <vscale x 2 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32(<vscale x 2 x i32> %0, i32 %1) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32(<vscale x 2 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -519,7 +521,7 @@ define <vscale x 2 x double> @intrinsic_vfwcvt_f.xu.v_nxv2f64_nxv2i32(<vscale x
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv2f64.nxv2i32(
     <vscale x 2 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x double> %a
 }
@@ -528,10 +530,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv2f64.nxv2i32(
   <vscale x 2 x double>,
   <vscale x 2 x i32>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwcvt_mask_f.xu.v_nxv2f64_nxv2i32(<vscale x 2 x double> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwcvt_mask_f.xu.v_nxv2f64_nxv2i32(<vscale x 2 x double> %0, <vscale x 2 x i32> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv2f64_nxv2i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -542,16 +544,16 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x i32> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv4f64.nxv4i32(
   <vscale x 4 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32(<vscale x 4 x i32> %0, i32 %1) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32(<vscale x 4 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -561,7 +563,7 @@ define <vscale x 4 x double> @intrinsic_vfwcvt_f.xu.v_nxv4f64_nxv4i32(<vscale x
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv4f64.nxv4i32(
     <vscale x 4 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x double> %a
 }
@@ -570,10 +572,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv4f64.nxv4i32(
   <vscale x 4 x double>,
   <vscale x 4 x i32>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwcvt_mask_f.xu.v_nxv4f64_nxv4i32(<vscale x 4 x double> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwcvt_mask_f.xu.v_nxv4f64_nxv4i32(<vscale x 4 x double> %0, <vscale x 4 x i32> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv4f64_nxv4i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -584,16 +586,16 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x i32> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv8f64.nxv8i32(
   <vscale x 8 x i32>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32(<vscale x 8 x i32> %0, i32 %1) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32(<vscale x 8 x i32> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -603,7 +605,7 @@ define <vscale x 8 x double> @intrinsic_vfwcvt_f.xu.v_nxv8f64_nxv8i32(<vscale x
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwcvt.f.xu.v.nxv8f64.nxv8i32(
     <vscale x 8 x i32> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x double> %a
 }
@@ -612,10 +614,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfwcvt.f.xu.v.mask.nxv8f64.nxv8i32(
   <vscale x 8 x double>,
   <vscale x 8 x i32>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwcvt_mask_f.xu.v_nxv8f64_nxv8i32(<vscale x 8 x double> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwcvt_mask_f.xu.v_nxv8f64_nxv8i32(<vscale x 8 x double> %0, <vscale x 8 x i32> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_f.xu.v_nxv8f64_nxv8i32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -626,7 +628,7 @@ entry:
     <vscale x 8 x double> %0,
     <vscale x 8 x i32> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll
deleted file mode 100644
index 4d551f62ec52f..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv64.ll
+++ /dev/null
@@ -1,380 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i32.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv1i32_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv1i32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i32.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv1i32.nxv1f16(
-  <vscale x 1 x i32>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i32_nxv1f16(<vscale x 1 x i32> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv1i32.nxv1f16(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i32.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv2i32_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv2i32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i32.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv2i32.nxv2f16(
-  <vscale x 2 x i32>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i32_nxv2f16(<vscale x 2 x i32> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv2i32.nxv2f16(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i32.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i32.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv4i32.nxv4f16(
-  <vscale x 4 x i32>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i32_nxv4f16(<vscale x 4 x i32> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv4i32.nxv4f16(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i32.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i32.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv8i32.nxv8f16(
-  <vscale x 8 x i32>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i32_nxv8f16(<vscale x 8 x i32> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv8i32.nxv8f16(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv16i32.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv16i32.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv16i32.nxv16f16(
-  <vscale x 16 x i32>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv16i32_nxv16f16(<vscale x 16 x i32> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv16i32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv16i32.nxv16f16(
-    <vscale x 16 x i32> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i64.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv1i64_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv1i64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i64.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv1i64.nxv1f32(
-  <vscale x 1 x i64>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i64_nxv1f32(<vscale x 1 x i64> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv1i64.nxv1f32(
-    <vscale x 1 x i64> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i64.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i64.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv2i64.nxv2f32(
-  <vscale x 2 x i64>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i64_nxv2f32(<vscale x 2 x i64> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv2i64.nxv2f32(
-    <vscale x 2 x i64> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i64.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i64.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv4i64.nxv4f32(
-  <vscale x 4 x i64>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i64_nxv4f32(<vscale x 4 x i64> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv4i64.nxv4f32(
-    <vscale x 4 x i64> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i64.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i64.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv8i64.nxv8f32(
-  <vscale x 8 x i64>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv8i64.nxv8f32(
-    <vscale x 8 x i64> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i64> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll
index d6cf1b3563105..0f7a46aadfd15 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-x-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i32.nxv1f16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv1i32_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv1i32_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv1i32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv1i32_nxv1f16(<vscale x
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i32.nxv1f16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv1i32.nxv1f16(
   <vscale x 1 x i32>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i32_nxv1f16(<vscale x 1 x i32> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i32_nxv1f16(<vscale x 1 x i32> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x i32> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i32.nxv2f16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv2i32_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv2i32_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv2i32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv2i32_nxv2f16(<vscale x
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i32.nxv2f16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv2i32.nxv2f16(
   <vscale x 2 x i32>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i32_nxv2f16(<vscale x 2 x i32> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i32_nxv2f16(<vscale x 2 x i32> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x i32> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i32.nxv4f16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv4i32_nxv4f16(<vscale x
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i32.nxv4f16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv4i32.nxv4f16(
   <vscale x 4 x i32>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i32_nxv4f16(<vscale x 4 x i32> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i32_nxv4f16(<vscale x 4 x i32> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x i32> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i32.nxv8f16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv8i32_nxv8f16(<vscale x
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i32.nxv8f16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv8i32.nxv8f16(
   <vscale x 8 x i32>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i32_nxv8f16(<vscale x 8 x i32> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i32_nxv8f16(<vscale x 8 x i32> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x i32> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i32> %a
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv16i32.nxv16f16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x i32> @intrinsic_vfwcvt_rtz.x.f.v_nxv16i32_nxv16f16(<vscale
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv16i32.nxv16f16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i32> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv16i32.nxv16f16(
   <vscale x 16 x i32>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv16i32_nxv16f16(<vscale x 16 x i32> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv16i32_nxv16f16(<vscale x 16 x i32> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x i32> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i32> %a
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i64.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv1i64_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv1i64_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv1i64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 1 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv1i64_nxv1f32(<vscale x
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv1i64.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i64> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv1i64.nxv1f32(
   <vscale x 1 x i64>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i64_nxv1f32(<vscale x 1 x i64> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i64_nxv1f32(<vscale x 1 x i64> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv1i64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 1 x i64> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i64> %a
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i64.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 2 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv2i64_nxv2f32(<vscale x
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv2i64.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i64> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv2i64.nxv2f32(
   <vscale x 2 x i64>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i64_nxv2f32(<vscale x 2 x i64> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i64_nxv2f32(<vscale x 2 x i64> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 2 x i64> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i64> %a
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i64.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 4 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv4i64_nxv4f32(<vscale x
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv4i64.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i64> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv4i64.nxv4f32(
   <vscale x 4 x i64>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i64_nxv4f32(<vscale x 4 x i64> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i64_nxv4f32(<vscale x 4 x i64> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 4 x i64> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i64> %a
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i64.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 8 x i64> @intrinsic_vfwcvt_rtz.x.f.v_nxv8i64_nxv8f32(<vscale x
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.nxv8i64.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i64> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.x.f.v.mask.nxv8i64.nxv8f32(
   <vscale x 8 x i64>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.x.f.v_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -374,7 +376,7 @@ entry:
     <vscale x 8 x i64> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i64> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll
deleted file mode 100644
index c419e08471ca1..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv32.ll
+++ /dev/null
@@ -1,380 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i32.nxv1f16(
-  <vscale x 1 x half>,
-  i32);
-
-define <vscale x 1 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i32_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv1i32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i32.nxv1f16(
-    <vscale x 1 x half> %0,
-    i32 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv1i32.nxv1f16(
-  <vscale x 1 x i32>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f16(<vscale x 1 x i32> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv1i32.nxv1f16(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i32.nxv2f16(
-  <vscale x 2 x half>,
-  i32);
-
-define <vscale x 2 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i32_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv2i32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i32.nxv2f16(
-    <vscale x 2 x half> %0,
-    i32 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv2i32.nxv2f16(
-  <vscale x 2 x i32>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f16(<vscale x 2 x i32> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv2i32.nxv2f16(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i32.nxv4f16(
-  <vscale x 4 x half>,
-  i32);
-
-define <vscale x 4 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i32.nxv4f16(
-    <vscale x 4 x half> %0,
-    i32 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv4i32.nxv4f16(
-  <vscale x 4 x i32>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x i32> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv4i32.nxv4f16(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i32.nxv8f16(
-  <vscale x 8 x half>,
-  i32);
-
-define <vscale x 8 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i32.nxv8f16(
-    <vscale x 8 x half> %0,
-    i32 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv8i32.nxv8f16(
-  <vscale x 8 x i32>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x i32> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv8i32.nxv8f16(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv16i32.nxv16f16(
-  <vscale x 16 x half>,
-  i32);
-
-define <vscale x 16 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv16i32.nxv16f16(
-    <vscale x 16 x half> %0,
-    i32 %1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv16i32.nxv16f16(
-  <vscale x 16 x i32>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f16(<vscale x 16 x i32> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv16i32.nxv16f16(
-    <vscale x 16 x i32> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i64.nxv1f32(
-  <vscale x 1 x float>,
-  i32);
-
-define <vscale x 1 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i64_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv1i64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i64.nxv1f32(
-    <vscale x 1 x float> %0,
-    i32 %1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv1i64.nxv1f32(
-  <vscale x 1 x i64>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x i64> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f32(<vscale x 1 x i64> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv1i64.nxv1f32(
-    <vscale x 1 x i64> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i64.nxv2f32(
-  <vscale x 2 x float>,
-  i32);
-
-define <vscale x 2 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i64.nxv2f32(
-    <vscale x 2 x float> %0,
-    i32 %1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv2i64.nxv2f32(
-  <vscale x 2 x i64>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x i64> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x i64> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv2i64.nxv2f32(
-    <vscale x 2 x i64> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i64.nxv4f32(
-  <vscale x 4 x float>,
-  i32);
-
-define <vscale x 4 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i64.nxv4f32(
-    <vscale x 4 x float> %0,
-    i32 %1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv4i64.nxv4f32(
-  <vscale x 4 x i64>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x i64> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x i64> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv4i64.nxv4f32(
-    <vscale x 4 x i64> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i64.nxv8f32(
-  <vscale x 8 x float>,
-  i32);
-
-define <vscale x 8 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i64.nxv8f32(
-    <vscale x 8 x float> %0,
-    i32 %1)
-
-  ret <vscale x 8 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv8i64.nxv8f32(
-  <vscale x 8 x i64>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.rtz.xu.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv8i64.nxv8f32(
-    <vscale x 8 x i64> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 8 x i64> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll
index 6b881aba8f652..f3d786a37fbe4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-rtz-xu-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i32.nxv1f16(
   <vscale x 1 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i32_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i32_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv1i32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i32_nxv1f16(<vscale x
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i32.nxv1f16(
     <vscale x 1 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv1i32.nxv1f16(
   <vscale x 1 x i32>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f16(<vscale x 1 x i32> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f16(<vscale x 1 x i32> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x i32> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i32.nxv2f16(
   <vscale x 2 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i32_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i32_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv2i32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i32_nxv2f16(<vscale x
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i32.nxv2f16(
     <vscale x 2 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv2i32.nxv2f16(
   <vscale x 2 x i32>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f16(<vscale x 2 x i32> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f16(<vscale x 2 x i32> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x i32> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i32.nxv4f16(
   <vscale x 4 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i32_nxv4f16(<vscale x
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i32.nxv4f16(
     <vscale x 4 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv4i32.nxv4f16(
   <vscale x 4 x i32>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x i32> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x i32> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x i32> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i32.nxv8f16(
   <vscale x 8 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i32_nxv8f16(<vscale x
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i32.nxv8f16(
     <vscale x 8 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv8i32.nxv8f16(
   <vscale x 8 x i32>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x i32> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x i32> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x i32> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i32> %a
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv16i32.nxv16f16(
   <vscale x 16 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x i32> @intrinsic_vfwcvt_rtz.xu.f.v_nxv16i32_nxv16f16(<vscal
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv16i32.nxv16f16(
     <vscale x 16 x half> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i32> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv16i32.nxv16f16
   <vscale x 16 x i32>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f16(<vscale x 16 x i32> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f16(<vscale x 16 x i32> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x i32> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i32> %a
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i64.nxv1f32(
   <vscale x 1 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i64_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i64_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv1i64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 1 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv1i64_nxv1f32(<vscale x
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv1i64.nxv1f32(
     <vscale x 1 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i64> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv1i64.nxv1f32(
   <vscale x 1 x i64>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f32(<vscale x 1 x i64> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f32(<vscale x 1 x i64> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv1i64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 1 x i64> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i64> %a
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i64.nxv2f32(
   <vscale x 2 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 2 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv2i64_nxv2f32(<vscale x
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv2i64.nxv2f32(
     <vscale x 2 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i64> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv2i64.nxv2f32(
   <vscale x 2 x i64>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x i64> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x i64> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 2 x i64> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i64> %a
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i64.nxv4f32(
   <vscale x 4 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 4 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv4i64_nxv4f32(<vscale x
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv4i64.nxv4f32(
     <vscale x 4 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i64> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv4i64.nxv4f32(
   <vscale x 4 x i64>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x i64> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x i64> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 4 x i64> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i64> %a
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i64.nxv8f32(
   <vscale x 8 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 8 x i64> @intrinsic_vfwcvt_rtz.xu.f.v_nxv8i64_nxv8f32(<vscale x
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.nxv8i64.nxv8f32(
     <vscale x 8 x float> %0,
-    i64 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i64> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.rtz.xu.f.v.mask.nxv8i64.nxv8f32(
   <vscale x 8 x i64>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_rtz.xu.f.v_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -374,7 +376,7 @@ entry:
     <vscale x 8 x i64> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i64> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll
deleted file mode 100644
index fd01c64df0d36..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv64.ll
+++ /dev/null
@@ -1,380 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv1i32.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfwcvt_x.f.v_nxv1i32_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv1i32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv1i32.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv1i32.nxv1f16(
-  <vscale x 1 x i32>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv1i32_nxv1f16(<vscale x 1 x i32> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv1i32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv1i32.nxv1f16(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv2i32.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfwcvt_x.f.v_nxv2i32_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv2i32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv2i32.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv2i32.nxv2f16(
-  <vscale x 2 x i32>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv2i32_nxv2f16(<vscale x 2 x i32> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv2i32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv2i32.nxv2f16(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv4i32.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv4i32.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv4i32.nxv4f16(
-  <vscale x 4 x i32>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv4i32_nxv4f16(<vscale x 4 x i32> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv4i32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv4i32.nxv4f16(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv8i32.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv8i32.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv8i32.nxv8f16(
-  <vscale x 8 x i32>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv8i32_nxv8f16(<vscale x 8 x i32> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv8i32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv8i32.nxv8f16(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv16i32.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv16i32.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv16i32.nxv16f16(
-  <vscale x 16 x i32>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv16i32_nxv16f16(<vscale x 16 x i32> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv16i32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv16i32.nxv16f16(
-    <vscale x 16 x i32> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv1i64.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfwcvt_x.f.v_nxv1i64_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv1i64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv1i64.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.x.f.v.mask.nxv1i64.nxv1f32(
-  <vscale x 1 x i64>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfwcvt_mask_x.f.v_nxv1i64_nxv1f32(<vscale x 1 x i64> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv1i64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfwcvt.x.f.v.mask.nxv1i64.nxv1f32(
-    <vscale x 1 x i64> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv2i64.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv2i64.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.x.f.v.mask.nxv2i64.nxv2f32(
-  <vscale x 2 x i64>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfwcvt_mask_x.f.v_nxv2i64_nxv2f32(<vscale x 2 x i64> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv2i64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.x.f.v.mask.nxv2i64.nxv2f32(
-    <vscale x 2 x i64> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv4i64.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv4i64.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.x.f.v.mask.nxv4i64.nxv4f32(
-  <vscale x 4 x i64>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfwcvt_mask_x.f.v_nxv4i64_nxv4f32(<vscale x 4 x i64> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv4i64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.x.f.v.mask.nxv4i64.nxv4f32(
-    <vscale x 4 x i64> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv8i64.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv8i64.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.x.f.v.mask.nxv8i64.nxv8f32(
-  <vscale x 8 x i64>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_x.f.v_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv8i64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.x.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.x.f.v.mask.nxv8i64.nxv8f32(
-    <vscale x 8 x i64> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i64> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll
index b8d88e1c64e55..6c3c2d7702eb1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-x-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv1i32.nxv1f16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfwcvt_x.f.v_nxv1i32_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfwcvt_x.f.v_nxv1i32_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv1i32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x i32> @intrinsic_vfwcvt_x.f.v_nxv1i32_nxv1f16(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv1i32.nxv1f16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv1i32.nxv1f16(
   <vscale x 1 x i32>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv1i32_nxv1f16(<vscale x 1 x i32> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv1i32_nxv1f16(<vscale x 1 x i32> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv1i32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x i32> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv2i32.nxv2f16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfwcvt_x.f.v_nxv2i32_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfwcvt_x.f.v_nxv2i32_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv2i32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x i32> @intrinsic_vfwcvt_x.f.v_nxv2i32_nxv2f16(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv2i32.nxv2f16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv2i32.nxv2f16(
   <vscale x 2 x i32>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv2i32_nxv2f16(<vscale x 2 x i32> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv2i32_nxv2f16(<vscale x 2 x i32> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv2i32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x i32> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv4i32.nxv4f16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x i32> @intrinsic_vfwcvt_x.f.v_nxv4i32_nxv4f16(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv4i32.nxv4f16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv4i32.nxv4f16(
   <vscale x 4 x i32>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv4i32_nxv4f16(<vscale x 4 x i32> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv4i32_nxv4f16(<vscale x 4 x i32> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x i32> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv8i32.nxv8f16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x i32> @intrinsic_vfwcvt_x.f.v_nxv8i32_nxv8f16(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv8i32.nxv8f16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv8i32.nxv8f16(
   <vscale x 8 x i32>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv8i32_nxv8f16(<vscale x 8 x i32> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv8i32_nxv8f16(<vscale x 8 x i32> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x i32> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i32> %a
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv16i32.nxv16f16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x i32> @intrinsic_vfwcvt_x.f.v_nxv16i32_nxv16f16(<vscale x 1
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.x.f.v.nxv16i32.nxv16f16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i32> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.x.f.v.mask.nxv16i32.nxv16f16(
   <vscale x 16 x i32>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv16i32_nxv16f16(<vscale x 16 x i32> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfwcvt_mask_x.f.v_nxv16i32_nxv16f16(<vscale x 16 x i32> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x i32> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i32> %a
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv1i64.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfwcvt_x.f.v_nxv1i64_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfwcvt_x.f.v_nxv1i64_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv1i64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 1 x i64> @intrinsic_vfwcvt_x.f.v_nxv1i64_nxv1f32(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv1i64.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i64> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.x.f.v.mask.nxv1i64.nxv1f32(
   <vscale x 1 x i64>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfwcvt_mask_x.f.v_nxv1i64_nxv1f32(<vscale x 1 x i64> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfwcvt_mask_x.f.v_nxv1i64_nxv1f32(<vscale x 1 x i64> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv1i64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 1 x i64> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i64> %a
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv2i64.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 2 x i64> @intrinsic_vfwcvt_x.f.v_nxv2i64_nxv2f32(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv2i64.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i64> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.x.f.v.mask.nxv2i64.nxv2f32(
   <vscale x 2 x i64>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfwcvt_mask_x.f.v_nxv2i64_nxv2f32(<vscale x 2 x i64> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfwcvt_mask_x.f.v_nxv2i64_nxv2f32(<vscale x 2 x i64> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 2 x i64> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i64> %a
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv4i64.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 4 x i64> @intrinsic_vfwcvt_x.f.v_nxv4i64_nxv4f32(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv4i64.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i64> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.x.f.v.mask.nxv4i64.nxv4f32(
   <vscale x 4 x i64>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfwcvt_mask_x.f.v_nxv4i64_nxv4f32(<vscale x 4 x i64> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfwcvt_mask_x.f.v_nxv4i64_nxv4f32(<vscale x 4 x i64> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 4 x i64> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i64> %a
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv8i64.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 8 x i64> @intrinsic_vfwcvt_x.f.v_nxv8i64_nxv8f32(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.x.f.v.nxv8i64.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i64> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.x.f.v.mask.nxv8i64.nxv8f32(
   <vscale x 8 x i64>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_x.f.v_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_x.f.v_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_x.f.v_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -374,7 +376,7 @@ entry:
     <vscale x 8 x i64> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i64> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll
deleted file mode 100644
index dc461d60b0be9..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv64.ll
+++ /dev/null
@@ -1,380 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv1i32.nxv1f16(
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfwcvt_xu.f.v_nxv1i32_nxv1f16(<vscale x 1 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv1i32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv1i32.nxv1f16(
-    <vscale x 1 x half> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv1i32.nxv1f16(
-  <vscale x 1 x i32>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv1i32_nxv1f16(<vscale x 1 x i32> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv1i32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv1i32.nxv1f16(
-    <vscale x 1 x i32> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv2i32.nxv2f16(
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfwcvt_xu.f.v_nxv2i32_nxv2f16(<vscale x 2 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv2i32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv2i32.nxv2f16(
-    <vscale x 2 x half> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv2i32.nxv2f16(
-  <vscale x 2 x i32>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv2i32_nxv2f16(<vscale x 2 x i32> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv2i32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv2i32.nxv2f16(
-    <vscale x 2 x i32> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv4i32.nxv4f16(
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv4i32.nxv4f16(
-    <vscale x 4 x half> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv4i32.nxv4f16(
-  <vscale x 4 x i32>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x i32> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv4i32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv4i32.nxv4f16(
-    <vscale x 4 x i32> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv8i32.nxv8f16(
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv8i32.nxv8f16(
-    <vscale x 8 x half> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv8i32.nxv8f16(
-  <vscale x 8 x i32>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x i32> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv8i32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv8i32.nxv8f16(
-    <vscale x 8 x i32> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv16i32.nxv16f16(
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv16i32.nxv16f16(
-    <vscale x 16 x half> %0,
-    i64 %1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv16i32.nxv16f16(
-  <vscale x 16 x i32>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv16i32_nxv16f16(<vscale x 16 x i32> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv16i32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv16i32.nxv16f16(
-    <vscale x 16 x i32> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 16 x i32> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv1i64.nxv1f32(
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfwcvt_xu.f.v_nxv1i64_nxv1f32(<vscale x 1 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv1i64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv1i64.nxv1f32(
-    <vscale x 1 x float> %0,
-    i64 %1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv1i64.nxv1f32(
-  <vscale x 1 x i64>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x i64> @intrinsic_vfwcvt_mask_xu.f.v_nxv1i64_nxv1f32(<vscale x 1 x i64> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv1i64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x i64> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv1i64.nxv1f32(
-    <vscale x 1 x i64> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 1 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv2i64.nxv2f32(
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv2i64.nxv2f32(
-    <vscale x 2 x float> %0,
-    i64 %1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv2i64.nxv2f32(
-  <vscale x 2 x i64>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x i64> @intrinsic_vfwcvt_mask_xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x i64> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv2i64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv2i64.nxv2f32(
-    <vscale x 2 x i64> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 2 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv4i64.nxv4f32(
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv4i64.nxv4f32(
-    <vscale x 4 x float> %0,
-    i64 %1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv4i64.nxv4f32(
-  <vscale x 4 x i64>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x i64> @intrinsic_vfwcvt_mask_xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x i64> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv4i64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv4i64.nxv4f32(
-    <vscale x 4 x i64> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 4 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv8i64.nxv8f32(
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, i64 %1) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv8i64.nxv8f32(
-    <vscale x 8 x float> %0,
-    i64 %1)
-
-  ret <vscale x 8 x i64> %a
-}
-
-declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv8i64.nxv8f32(
-  <vscale x 8 x i64>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv8i64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwcvt.xu.f.v v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv8i64.nxv8f32(
-    <vscale x 8 x i64> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
-
-  ret <vscale x 8 x i64> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll
similarity index 87%
rename from llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll
index c2b0a222709e4..10bd22304ed83 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwcvt-xu-f.ll
@@ -1,11 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv1i32.nxv1f16(
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfwcvt_xu.f.v_nxv1i32_nxv1f16(<vscale x 1 x half> %0, i32 %1) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfwcvt_xu.f.v_nxv1i32_nxv1f16(<vscale x 1 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv1i32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -15,7 +17,7 @@ define <vscale x 1 x i32> @intrinsic_vfwcvt_xu.f.v_nxv1i32_nxv1f16(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv1i32.nxv1f16(
     <vscale x 1 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i32> %a
 }
@@ -24,10 +26,10 @@ declare <vscale x 1 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv1i32.nxv1f16(
   <vscale x 1 x i32>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv1i32_nxv1f16(<vscale x 1 x i32> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv1i32_nxv1f16(<vscale x 1 x i32> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv1i32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -38,16 +40,16 @@ entry:
     <vscale x 1 x i32> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i32> %a
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv2i32.nxv2f16(
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfwcvt_xu.f.v_nxv2i32_nxv2f16(<vscale x 2 x half> %0, i32 %1) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfwcvt_xu.f.v_nxv2i32_nxv2f16(<vscale x 2 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv2i32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -57,7 +59,7 @@ define <vscale x 2 x i32> @intrinsic_vfwcvt_xu.f.v_nxv2i32_nxv2f16(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv2i32.nxv2f16(
     <vscale x 2 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i32> %a
 }
@@ -66,10 +68,10 @@ declare <vscale x 2 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv2i32.nxv2f16(
   <vscale x 2 x i32>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv2i32_nxv2f16(<vscale x 2 x i32> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv2i32_nxv2f16(<vscale x 2 x i32> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv2i32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -80,16 +82,16 @@ entry:
     <vscale x 2 x i32> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i32> %a
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv4i32.nxv4f16(
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, i32 %1) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -99,7 +101,7 @@ define <vscale x 4 x i32> @intrinsic_vfwcvt_xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv4i32.nxv4f16(
     <vscale x 4 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i32> %a
 }
@@ -108,10 +110,10 @@ declare <vscale x 4 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv4i32.nxv4f16(
   <vscale x 4 x i32>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x i32> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv4i32_nxv4f16(<vscale x 4 x i32> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv4i32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -122,16 +124,16 @@ entry:
     <vscale x 4 x i32> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i32> %a
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv8i32.nxv8f16(
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, i32 %1) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -141,7 +143,7 @@ define <vscale x 8 x i32> @intrinsic_vfwcvt_xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv8i32.nxv8f16(
     <vscale x 8 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i32> %a
 }
@@ -150,10 +152,10 @@ declare <vscale x 8 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv8i32.nxv8f16(
   <vscale x 8 x i32>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x i32> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv8i32_nxv8f16(<vscale x 8 x i32> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv8i32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -164,16 +166,16 @@ entry:
     <vscale x 8 x i32> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i32> %a
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv16i32.nxv16f16(
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, i32 %1) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16(<vscale x 16 x half> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -183,7 +185,7 @@ define <vscale x 16 x i32> @intrinsic_vfwcvt_xu.f.v_nxv16i32_nxv16f16(<vscale x
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vfwcvt.xu.f.v.nxv16i32.nxv16f16(
     <vscale x 16 x half> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 16 x i32> %a
 }
@@ -192,10 +194,10 @@ declare <vscale x 16 x i32> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv16i32.nxv16f16(
   <vscale x 16 x i32>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv16i32_nxv16f16(<vscale x 16 x i32> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
+define <vscale x 16 x i32> @intrinsic_vfwcvt_mask_xu.f.v_nxv16i32_nxv16f16(<vscale x 16 x i32> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv16i32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -206,16 +208,16 @@ entry:
     <vscale x 16 x i32> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x i32> %a
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv1i64.nxv1f32(
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfwcvt_xu.f.v_nxv1i64_nxv1f32(<vscale x 1 x float> %0, i32 %1) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfwcvt_xu.f.v_nxv1i64_nxv1f32(<vscale x 1 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv1i64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -225,7 +227,7 @@ define <vscale x 1 x i64> @intrinsic_vfwcvt_xu.f.v_nxv1i64_nxv1f32(<vscale x 1 x
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv1i64.nxv1f32(
     <vscale x 1 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 1 x i64> %a
 }
@@ -234,10 +236,10 @@ declare <vscale x 1 x i64> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv1i64.nxv1f32(
   <vscale x 1 x i64>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x i64> @intrinsic_vfwcvt_mask_xu.f.v_nxv1i64_nxv1f32(<vscale x 1 x i64> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+define <vscale x 1 x i64> @intrinsic_vfwcvt_mask_xu.f.v_nxv1i64_nxv1f32(<vscale x 1 x i64> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv1i64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -248,16 +250,16 @@ entry:
     <vscale x 1 x i64> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x i64> %a
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv2i64.nxv2f32(
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, i32 %1) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -267,7 +269,7 @@ define <vscale x 2 x i64> @intrinsic_vfwcvt_xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv2i64.nxv2f32(
     <vscale x 2 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 2 x i64> %a
 }
@@ -276,10 +278,10 @@ declare <vscale x 2 x i64> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv2i64.nxv2f32(
   <vscale x 2 x i64>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x i64> @intrinsic_vfwcvt_mask_xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x i64> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
+define <vscale x 2 x i64> @intrinsic_vfwcvt_mask_xu.f.v_nxv2i64_nxv2f32(<vscale x 2 x i64> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv2i64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -290,16 +292,16 @@ entry:
     <vscale x 2 x i64> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x i64> %a
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv4i64.nxv4f32(
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, i32 %1) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -309,7 +311,7 @@ define <vscale x 4 x i64> @intrinsic_vfwcvt_xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv4i64.nxv4f32(
     <vscale x 4 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 4 x i64> %a
 }
@@ -318,10 +320,10 @@ declare <vscale x 4 x i64> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv4i64.nxv4f32(
   <vscale x 4 x i64>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x i64> @intrinsic_vfwcvt_mask_xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x i64> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
+define <vscale x 4 x i64> @intrinsic_vfwcvt_mask_xu.f.v_nxv4i64_nxv4f32(<vscale x 4 x i64> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv4i64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -332,16 +334,16 @@ entry:
     <vscale x 4 x i64> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x i64> %a
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv8i64.nxv8f32(
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, i32 %1) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x float> %0, iXLen %1) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -351,7 +353,7 @@ define <vscale x 8 x i64> @intrinsic_vfwcvt_xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vfwcvt.xu.f.v.nxv8i64.nxv8f32(
     <vscale x 8 x float> %0,
-    i32 %1)
+    iXLen %1)
 
   ret <vscale x 8 x i64> %a
 }
@@ -360,10 +362,10 @@ declare <vscale x 8 x i64> @llvm.riscv.vfwcvt.xu.f.v.mask.nxv8i64.nxv8f32(
   <vscale x 8 x i64>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
+define <vscale x 8 x i64> @intrinsic_vfwcvt_mask_xu.f.v_nxv8i64_nxv8f32(<vscale x 8 x i64> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwcvt_mask_xu.f.v_nxv8i64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -374,7 +376,7 @@ entry:
     <vscale x 8 x i64> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x i64> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll
deleted file mode 100644
index eb21c54c18e90..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv64.ll
+++ /dev/null
@@ -1,830 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwmacc.mask.nxv1f32.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwmacc.mask.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwmacc.mask.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwmacc.mask.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v10, v11
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwmacc.mask.nxv4f32.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwmacc.mask.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v12, v14
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwmacc.mask.nxv8f32.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwmacc.mask.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x float>  @intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v16, v20
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwmacc.mask.nxv16f32.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x float>  @intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwmacc.mask.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfwmacc_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwmacc.mask.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfwmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwmacc.mask.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfwmacc_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v10, v11
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwmacc.mask.nxv2f64.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfwmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwmacc.mask.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfwmacc_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v12, v14
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwmacc.mask.nxv4f64.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfwmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwmacc.mask.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x double>  @intrinsic_vfwmacc_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v16, v20
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwmacc.mask.nxv8f64.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x double>  @intrinsic_vfwmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwmacc.mask.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.f16(
-  <vscale x 1 x float>,
-  half,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwmacc.mask.nxv1f32.f16(
-  <vscale x 1 x float>,
-  half,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwmacc.mask.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.f16(
-  <vscale x 2 x float>,
-  half,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwmacc.mask.nxv2f32.f16(
-  <vscale x 2 x float>,
-  half,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwmacc.mask.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.f16(
-  <vscale x 4 x float>,
-  half,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwmacc.mask.nxv4f32.f16(
-  <vscale x 4 x float>,
-  half,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwmacc.mask.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.f16(
-  <vscale x 8 x float>,
-  half,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwmacc.mask.nxv8f32.f16(
-  <vscale x 8 x float>,
-  half,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwmacc.mask.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.f16(
-  <vscale x 16 x float>,
-  half,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x float>  @intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwmacc.mask.nxv16f32.f16(
-  <vscale x 16 x float>,
-  half,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwmacc.mask.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.f32(
-  <vscale x 1 x double>,
-  float,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwmacc.mask.nxv1f64.f32(
-  <vscale x 1 x double>,
-  float,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwmacc.mask.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.f32(
-  <vscale x 2 x double>,
-  float,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwmacc.mask.nxv2f64.f32(
-  <vscale x 2 x double>,
-  float,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwmacc.mask.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.f32(
-  <vscale x 4 x double>,
-  float,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwmacc.mask.nxv4f64.f32(
-  <vscale x 4 x double>,
-  float,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwmacc.mask.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.f32(
-  <vscale x 8 x double>,
-  float,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x double>  @intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwmacc.mask.nxv8f64.f32(
-  <vscale x 8 x double>,
-  float,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmacc.vf v8, fa0, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwmacc.mask.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc.ll
similarity index 89%
rename from llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwmacc.ll
index 0de121cb3f002..f5db61b5e8c7c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmacc.mask.nxv1f32.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.nxv2f16(
   <vscale x 2 x float>,
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmacc.mask.nxv2f32.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.nxv4f16(
   <vscale x 4 x float>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmacc.mask.nxv4f32.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.nxv8f16(
   <vscale x 8 x float>,
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmacc.mask.nxv8f32.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.nxv16f16(
   <vscale x 16 x float>,
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float>  @intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x float>  @intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 16 x float> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x float> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmacc.mask.nxv16f32.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float>  @intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float>  @intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x float> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.nxv1f32(
   <vscale x 1 x double>,
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfwmacc_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfwmacc_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmacc.mask.nxv1f64.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfwmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfwmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.nxv2f32(
   <vscale x 2 x double>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfwmacc_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfwmacc_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmacc.mask.nxv2f64.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfwmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfwmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.nxv4f32(
   <vscale x 4 x double>,
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfwmacc_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfwmacc_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmacc.mask.nxv4f64.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfwmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfwmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.nxv8f32(
   <vscale x 8 x double>,
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double>  @intrinsic_vfwmacc_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x double>  @intrinsic_vfwmacc_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 8 x double> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x double> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmacc.mask.nxv8f64.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double>  @intrinsic_vfwmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double>  @intrinsic_vfwmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x double> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmacc.nxv1f32.f16(
   <vscale x 1 x float>,
   half,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x float> %0,
     half %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmacc.mask.nxv1f32.f16(
   half,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -456,7 +458,7 @@ entry:
     half %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmacc.nxv2f32.f16(
   <vscale x 2 x float>,
   half,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x float> %0,
     half %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmacc.mask.nxv2f32.f16(
   half,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -502,7 +504,7 @@ entry:
     half %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmacc.nxv4f32.f16(
   <vscale x 4 x float>,
   half,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 4 x float> %0,
     half %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmacc.mask.nxv4f32.f16(
   half,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -548,7 +550,7 @@ entry:
     half %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmacc.nxv8f32.f16(
   <vscale x 8 x float>,
   half,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 8 x float> %0,
     half %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmacc.mask.nxv8f32.f16(
   half,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -594,7 +596,7 @@ entry:
     half %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmacc.nxv16f32.f16(
   <vscale x 16 x float>,
   half,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float>  @intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x float>  @intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 16 x float> %0,
     half %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x float> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmacc.mask.nxv16f32.f16(
   half,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -640,7 +642,7 @@ entry:
     half %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x float> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmacc.nxv1f64.f32(
   <vscale x 1 x double>,
   float,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 1 x double> %0,
     float %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmacc.mask.nxv1f64.f32(
   float,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -686,7 +688,7 @@ entry:
     float %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -695,9 +697,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmacc.nxv2f64.f32(
   <vscale x 2 x double>,
   float,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -708,7 +710,7 @@ entry:
     <vscale x 2 x double> %0,
     float %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -718,9 +720,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmacc.mask.nxv2f64.f32(
   float,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -732,7 +734,7 @@ entry:
     float %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -741,9 +743,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmacc.nxv4f64.f32(
   <vscale x 4 x double>,
   float,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -754,7 +756,7 @@ entry:
     <vscale x 4 x double> %0,
     float %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -764,9 +766,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmacc.mask.nxv4f64.f32(
   float,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -778,7 +780,7 @@ entry:
     float %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -787,9 +789,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmacc.nxv8f64.f32(
   <vscale x 8 x double>,
   float,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double>  @intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x double>  @intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -800,7 +802,7 @@ entry:
     <vscale x 8 x double> %0,
     float %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x double> %a
 }
@@ -810,9 +812,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmacc.mask.nxv8f64.f32(
   float,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmacc_mask_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -824,7 +826,7 @@ entry:
     float %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll
deleted file mode 100644
index b2e1e235e9695..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv64.ll
+++ /dev/null
@@ -1,830 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v10, v11
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v12, v14
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x float>  @intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v16, v20
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x float>  @intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfwmsac_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwmsac.mask.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfwmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwmsac.mask.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfwmsac_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v10, v11
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwmsac.mask.nxv2f64.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfwmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwmsac.mask.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfwmsac_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v12, v14
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwmsac.mask.nxv4f64.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfwmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwmsac.mask.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x double>  @intrinsic_vfwmsac_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v16, v20
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwmsac.mask.nxv8f64.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x double>  @intrinsic_vfwmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwmsac.mask.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.f16(
-  <vscale x 1 x float>,
-  half,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.f16(
-  <vscale x 1 x float>,
-  half,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.f16(
-  <vscale x 2 x float>,
-  half,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.f16(
-  <vscale x 2 x float>,
-  half,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.f16(
-  <vscale x 4 x float>,
-  half,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.f16(
-  <vscale x 4 x float>,
-  half,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.f16(
-  <vscale x 8 x float>,
-  half,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.f16(
-  <vscale x 8 x float>,
-  half,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.f16(
-  <vscale x 16 x float>,
-  half,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x float>  @intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.f16(
-  <vscale x 16 x float>,
-  half,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.f32(
-  <vscale x 1 x double>,
-  float,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwmsac.mask.nxv1f64.f32(
-  <vscale x 1 x double>,
-  float,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwmsac.mask.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.f32(
-  <vscale x 2 x double>,
-  float,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwmsac.mask.nxv2f64.f32(
-  <vscale x 2 x double>,
-  float,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwmsac.mask.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.f32(
-  <vscale x 4 x double>,
-  float,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwmsac.mask.nxv4f64.f32(
-  <vscale x 4 x double>,
-  float,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwmsac.mask.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.f32(
-  <vscale x 8 x double>,
-  float,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x double>  @intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwmsac.mask.nxv8f64.f32(
-  <vscale x 8 x double>,
-  float,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwmsac.vf v8, fa0, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwmsac.mask.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmsac.ll
similarity index 89%
rename from llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwmsac.ll
index 82c4fad996e71..884ee36575b4a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmsac.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.nxv2f16(
   <vscale x 2 x float>,
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.nxv4f16(
   <vscale x 4 x float>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.nxv8f16(
   <vscale x 8 x float>,
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.nxv16f16(
   <vscale x 16 x float>,
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float>  @intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x float>  @intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 16 x float> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x float> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float>  @intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float>  @intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x float> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.nxv1f32(
   <vscale x 1 x double>,
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfwmsac_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfwmsac_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmsac.mask.nxv1f64.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfwmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfwmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.nxv2f32(
   <vscale x 2 x double>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfwmsac_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfwmsac_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmsac.mask.nxv2f64.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfwmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfwmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.nxv4f32(
   <vscale x 4 x double>,
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfwmsac_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfwmsac_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmsac.mask.nxv4f64.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfwmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfwmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.nxv8f32(
   <vscale x 8 x double>,
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double>  @intrinsic_vfwmsac_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x double>  @intrinsic_vfwmsac_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 8 x double> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x double> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmsac.mask.nxv8f64.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double>  @intrinsic_vfwmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double>  @intrinsic_vfwmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x double> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmsac.nxv1f32.f16(
   <vscale x 1 x float>,
   half,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x float> %0,
     half %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmsac.mask.nxv1f32.f16(
   half,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -456,7 +458,7 @@ entry:
     half %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmsac.nxv2f32.f16(
   <vscale x 2 x float>,
   half,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x float> %0,
     half %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmsac.mask.nxv2f32.f16(
   half,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -502,7 +504,7 @@ entry:
     half %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmsac.nxv4f32.f16(
   <vscale x 4 x float>,
   half,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 4 x float> %0,
     half %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmsac.mask.nxv4f32.f16(
   half,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -548,7 +550,7 @@ entry:
     half %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmsac.nxv8f32.f16(
   <vscale x 8 x float>,
   half,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 8 x float> %0,
     half %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmsac.mask.nxv8f32.f16(
   half,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -594,7 +596,7 @@ entry:
     half %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmsac.nxv16f32.f16(
   <vscale x 16 x float>,
   half,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float>  @intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x float>  @intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 16 x float> %0,
     half %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x float> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmsac.mask.nxv16f32.f16(
   half,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -640,7 +642,7 @@ entry:
     half %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x float> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmsac.nxv1f64.f32(
   <vscale x 1 x double>,
   float,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 1 x double> %0,
     float %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmsac.mask.nxv1f64.f32(
   float,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -686,7 +688,7 @@ entry:
     float %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -695,9 +697,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmsac.nxv2f64.f32(
   <vscale x 2 x double>,
   float,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -708,7 +710,7 @@ entry:
     <vscale x 2 x double> %0,
     float %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -718,9 +720,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmsac.mask.nxv2f64.f32(
   float,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -732,7 +734,7 @@ entry:
     float %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -741,9 +743,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmsac.nxv4f64.f32(
   <vscale x 4 x double>,
   float,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -754,7 +756,7 @@ entry:
     <vscale x 4 x double> %0,
     float %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -764,9 +766,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmsac.mask.nxv4f64.f32(
   float,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -778,7 +780,7 @@ entry:
     float %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -787,9 +789,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmsac.nxv8f64.f32(
   <vscale x 8 x double>,
   float,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double>  @intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x double>  @intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -800,7 +802,7 @@ entry:
     <vscale x 8 x double> %0,
     float %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x double> %a
 }
@@ -810,9 +812,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmsac.mask.nxv8f64.f32(
   float,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmsac_mask_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -824,7 +826,7 @@ entry:
     float %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll
deleted file mode 100644
index 670c79975a2e0..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv32.ll
+++ /dev/null
@@ -1,830 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
-declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwmul.vv v10, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    i32 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfwmul_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwmul.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfwmul_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vv v10, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    i32 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfwmul_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vv v10, v8, v9
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    i32 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfwmul_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vv v12, v8, v10
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    i32 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfwmul_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vv v16, v8, v12
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    i32 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfwmul_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwmul.nxv1f64.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfwmul_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vv v10, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwmul.nxv1f64.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    i32 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfwmul_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vv v10, v8, v9
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    i32 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfwmul_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vv v12, v8, v10
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    i32 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfwmul_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vv v16, v8, v12
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    i32 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfwmul_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v9, v8, fa0
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v9, v8, fa0
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vf v10, v8, fa0
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v12, v8, fa0
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v16, v8, fa0
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwmul.nxv1f64.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v9, v8, fa0
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwmul.nxv1f64.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vf v10, v8, fa0
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v12, v8, fa0
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v16, v8, fa0
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwmul.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmul.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwmul.ll
index fc7d8dcb59e31..b1ec8464047e1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwmul-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwmul.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -17,7 +19,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -27,10 +29,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwmul_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwmul_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -50,9 +52,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwmul_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwmul_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -63,7 +65,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2f16.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -73,10 +75,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwmul_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwmul_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -96,9 +98,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -109,7 +111,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -119,10 +121,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwmul_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwmul_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -142,9 +144,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -155,7 +157,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -165,10 +167,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwmul_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwmul_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -188,9 +190,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -201,7 +203,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -211,10 +213,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.nxv16f16
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwmul_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwmul_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -234,9 +236,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfwmul.nxv1f64.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwmul_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwmul_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -247,7 +249,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwmul.nxv1f64.nxv1f32.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -257,10 +259,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwmul_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwmul_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -280,9 +282,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -293,7 +295,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -303,10 +305,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwmul_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwmul_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -326,9 +328,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -339,7 +341,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -349,10 +351,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwmul_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwmul_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -372,9 +374,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -385,7 +387,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -395,10 +397,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwmul_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwmul_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -418,9 +420,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -431,7 +433,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwmul.nxv1f32.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -441,10 +443,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfwmul.mask.nxv1f32.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -464,9 +466,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -477,7 +479,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwmul.nxv2f32.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -487,10 +489,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfwmul.mask.nxv2f32.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -510,9 +512,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -523,7 +525,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwmul.nxv4f32.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -533,10 +535,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfwmul.mask.nxv4f32.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -556,9 +558,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -569,7 +571,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwmul.nxv8f32.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -579,10 +581,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfwmul.mask.nxv8f32.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -594,7 +596,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -602,9 +604,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -615,7 +617,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwmul.nxv16f32.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -625,10 +627,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfwmul.mask.nxv16f32.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -640,7 +642,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -648,9 +650,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfwmul.nxv1f64.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -661,7 +663,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwmul.nxv1f64.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -671,10 +673,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfwmul.mask.nxv1f64.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -686,7 +688,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -694,9 +696,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -707,7 +709,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwmul.nxv2f64.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -717,10 +719,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfwmul.mask.nxv2f64.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -732,7 +734,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -740,9 +742,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -753,7 +755,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwmul.nxv4f64.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -763,10 +765,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfwmul.mask.nxv4f64.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -778,7 +780,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -786,9 +788,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -799,7 +801,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwmul.nxv8f64.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -809,10 +811,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfwmul.mask.nxv8f64.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwmul_mask_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -824,7 +826,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll
deleted file mode 100644
index ff2b40cfac2cd..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv64.ll
+++ /dev/null
@@ -1,830 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v10, v11
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v12, v14
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x float>  @intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v16, v20
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x float>  @intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfwnmacc_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwnmacc.mask.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfwnmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.mask.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfwnmacc_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v10, v11
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwnmacc.mask.nxv2f64.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfwnmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.mask.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfwnmacc_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v12, v14
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwnmacc.mask.nxv4f64.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfwnmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.mask.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x double>  @intrinsic_vfwnmacc_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v16, v20
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwnmacc.mask.nxv8f64.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x double>  @intrinsic_vfwnmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.mask.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.f16(
-  <vscale x 1 x float>,
-  half,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.f16(
-  <vscale x 1 x float>,
-  half,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.f16(
-  <vscale x 2 x float>,
-  half,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.f16(
-  <vscale x 2 x float>,
-  half,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.f16(
-  <vscale x 4 x float>,
-  half,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.f16(
-  <vscale x 4 x float>,
-  half,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.f16(
-  <vscale x 8 x float>,
-  half,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.f16(
-  <vscale x 8 x float>,
-  half,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.f16(
-  <vscale x 16 x float>,
-  half,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x float>  @intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.f16(
-  <vscale x 16 x float>,
-  half,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.f32(
-  <vscale x 1 x double>,
-  float,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwnmacc.mask.nxv1f64.f32(
-  <vscale x 1 x double>,
-  float,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwnmacc.mask.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.f32(
-  <vscale x 2 x double>,
-  float,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwnmacc.mask.nxv2f64.f32(
-  <vscale x 2 x double>,
-  float,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwnmacc.mask.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.f32(
-  <vscale x 4 x double>,
-  float,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwnmacc.mask.nxv4f64.f32(
-  <vscale x 4 x double>,
-  float,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwnmacc.mask.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.f32(
-  <vscale x 8 x double>,
-  float,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x double>  @intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwnmacc.mask.nxv8f64.f32(
-  <vscale x 8 x double>,
-  float,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmacc.vf v8, fa0, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwnmacc.mask.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc.ll
similarity index 89%
rename from llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwnmacc.ll
index 02842609f5685..4ccd0f8c55835 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.nxv2f16(
   <vscale x 2 x float>,
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.nxv4f16(
   <vscale x 4 x float>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.nxv8f16(
   <vscale x 8 x float>,
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.nxv16f16(
   <vscale x 16 x float>,
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float>  @intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x float>  @intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 16 x float> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x float> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float>  @intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float>  @intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x float> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.nxv1f32(
   <vscale x 1 x double>,
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfwnmacc_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfwnmacc_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmacc.mask.nxv1f64.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfwnmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfwnmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.nxv2f32(
   <vscale x 2 x double>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfwnmacc_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfwnmacc_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmacc.mask.nxv2f64.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfwnmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfwnmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.nxv4f32(
   <vscale x 4 x double>,
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfwnmacc_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfwnmacc_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmacc.mask.nxv4f64.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfwnmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfwnmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.nxv8f32(
   <vscale x 8 x double>,
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double>  @intrinsic_vfwnmacc_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x double>  @intrinsic_vfwnmacc_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 8 x double> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x double> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmacc.mask.nxv8f64.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double>  @intrinsic_vfwnmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double>  @intrinsic_vfwnmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x double> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.nxv1f32.f16(
   <vscale x 1 x float>,
   half,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x float> %0,
     half %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmacc.mask.nxv1f32.f16(
   half,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -456,7 +458,7 @@ entry:
     half %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.nxv2f32.f16(
   <vscale x 2 x float>,
   half,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x float> %0,
     half %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmacc.mask.nxv2f32.f16(
   half,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -502,7 +504,7 @@ entry:
     half %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.nxv4f32.f16(
   <vscale x 4 x float>,
   half,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 4 x float> %0,
     half %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmacc.mask.nxv4f32.f16(
   half,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -548,7 +550,7 @@ entry:
     half %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.nxv8f32.f16(
   <vscale x 8 x float>,
   half,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 8 x float> %0,
     half %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmacc.mask.nxv8f32.f16(
   half,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -594,7 +596,7 @@ entry:
     half %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.nxv16f32.f16(
   <vscale x 16 x float>,
   half,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float>  @intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x float>  @intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 16 x float> %0,
     half %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x float> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmacc.mask.nxv16f32.f16(
   half,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -640,7 +642,7 @@ entry:
     half %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x float> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmacc.nxv1f64.f32(
   <vscale x 1 x double>,
   float,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 1 x double> %0,
     float %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmacc.mask.nxv1f64.f32(
   float,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -686,7 +688,7 @@ entry:
     float %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -695,9 +697,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmacc.nxv2f64.f32(
   <vscale x 2 x double>,
   float,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -708,7 +710,7 @@ entry:
     <vscale x 2 x double> %0,
     float %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -718,9 +720,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmacc.mask.nxv2f64.f32(
   float,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -732,7 +734,7 @@ entry:
     float %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -741,9 +743,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmacc.nxv4f64.f32(
   <vscale x 4 x double>,
   float,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -754,7 +756,7 @@ entry:
     <vscale x 4 x double> %0,
     float %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -764,9 +766,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmacc.mask.nxv4f64.f32(
   float,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -778,7 +780,7 @@ entry:
     float %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -787,9 +789,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmacc.nxv8f64.f32(
   <vscale x 8 x double>,
   float,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double>  @intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x double>  @intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -800,7 +802,7 @@ entry:
     <vscale x 8 x double> %0,
     float %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x double> %a
 }
@@ -810,9 +812,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmacc.mask.nxv8f64.f32(
   float,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmacc_mask_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -824,7 +826,7 @@ entry:
     float %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll
deleted file mode 100644
index 2fe370bb1d82f..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv64.ll
+++ /dev/null
@@ -1,830 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v10, v11
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v12, v14
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x float>  @intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v16, v20
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x float>  @intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfwnmsac_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwnmsac.mask.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfwnmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.mask.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfwnmsac_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v10, v11
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwnmsac.mask.nxv2f64.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfwnmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.mask.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfwnmsac_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v12, v14
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwnmsac.mask.nxv4f64.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfwnmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.mask.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x double>  @intrinsic_vfwnmsac_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v16, v20
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwnmsac.mask.nxv8f64.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x double>  @intrinsic_vfwnmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.mask.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.f16(
-  <vscale x 1 x float>,
-  half,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x float>  @intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    i64 %3)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.f16(
-  <vscale x 1 x float>,
-  half,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    half %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.f16(
-  <vscale x 2 x float>,
-  half,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x float>  @intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.f16(
-  <vscale x 2 x float>,
-  half,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    half %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.f16(
-  <vscale x 4 x float>,
-  half,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x float>  @intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    i64 %3)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.f16(
-  <vscale x 4 x float>,
-  half,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    half %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.f16(
-  <vscale x 8 x float>,
-  half,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x float>  @intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    i64 %3)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.f16(
-  <vscale x 8 x float>,
-  half,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    half %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.f16(
-  <vscale x 16 x float>,
-  half,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x float>  @intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    i64 %3)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.f16(
-  <vscale x 16 x float>,
-  half,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    half %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.f32(
-  <vscale x 1 x double>,
-  float,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x double>  @intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwnmsac.mask.nxv1f64.f32(
-  <vscale x 1 x double>,
-  float,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwnmsac.mask.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    float %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.f32(
-  <vscale x 2 x double>,
-  float,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x double>  @intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwnmsac.mask.nxv2f64.f32(
-  <vscale x 2 x double>,
-  float,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwnmsac.mask.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    float %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.f32(
-  <vscale x 4 x double>,
-  float,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x double>  @intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    i64 %3)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwnmsac.mask.nxv4f64.f32(
-  <vscale x 4 x double>,
-  float,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwnmsac.mask.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    float %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.f32(
-  <vscale x 8 x double>,
-  float,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x double>  @intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    i64 %3)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwnmsac.mask.nxv8f64.f32(
-  <vscale x 8 x double>,
-  float,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwnmsac.vf v8, fa0, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwnmsac.mask.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    float %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac.ll
similarity index 89%
rename from llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwnmsac.ll
index fe9683ed15adb..26fcb06d89167 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwnmsac-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmsac.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.nxv2f16(
   <vscale x 2 x float>,
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.nxv4f16(
   <vscale x 4 x float>,
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.nxv8f16(
   <vscale x 8 x float>,
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.nxv16f16(
   <vscale x 16 x float>,
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float>  @intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x float>  @intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 16 x float> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x float> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float>  @intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float>  @intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x float> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.nxv1f32(
   <vscale x 1 x double>,
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfwnmsac_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfwnmsac_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmsac.mask.nxv1f64.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfwnmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfwnmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.nxv2f32(
   <vscale x 2 x double>,
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfwnmsac_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfwnmsac_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmsac.mask.nxv2f64.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfwnmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfwnmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.nxv4f32(
   <vscale x 4 x double>,
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfwnmsac_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfwnmsac_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmsac.mask.nxv4f64.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfwnmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfwnmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.nxv8f32(
   <vscale x 8 x double>,
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double>  @intrinsic_vfwnmsac_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x double>  @intrinsic_vfwnmsac_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 8 x double> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x double> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmsac.mask.nxv8f64.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double>  @intrinsic_vfwnmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double>  @intrinsic_vfwnmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x double> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.nxv1f32.f16(
   <vscale x 1 x float>,
   half,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float>  @intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, i32 %3) nounwind {
+define <vscale x 1 x float>  @intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x float> %0,
     half %1,
     <vscale x 1 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x float> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x float> @llvm.riscv.vfwnmsac.mask.nxv1f32.f16(
   half,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f32_f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -456,7 +458,7 @@ entry:
     half %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x float> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.nxv2f32.f16(
   <vscale x 2 x float>,
   half,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float>  @intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, i32 %3) nounwind {
+define <vscale x 2 x float>  @intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 2 x float> %0,
     half %1,
     <vscale x 2 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwnmsac.mask.nxv2f32.f16(
   half,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f32_f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -502,7 +504,7 @@ entry:
     half %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -511,9 +513,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.nxv4f32.f16(
   <vscale x 4 x float>,
   half,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float>  @intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, i32 %3) nounwind {
+define <vscale x 4 x float>  @intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -524,7 +526,7 @@ entry:
     <vscale x 4 x float> %0,
     half %1,
     <vscale x 4 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x float> %a
 }
@@ -534,9 +536,9 @@ declare <vscale x 4 x float> @llvm.riscv.vfwnmsac.mask.nxv4f32.f16(
   half,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f32_f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -548,7 +550,7 @@ entry:
     half %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x float> %a
 }
@@ -557,9 +559,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.nxv8f32.f16(
   <vscale x 8 x float>,
   half,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float>  @intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, i32 %3) nounwind {
+define <vscale x 8 x float>  @intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -570,7 +572,7 @@ entry:
     <vscale x 8 x float> %0,
     half %1,
     <vscale x 8 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x float> %a
 }
@@ -580,9 +582,9 @@ declare <vscale x 8 x float> @llvm.riscv.vfwnmsac.mask.nxv8f32.f16(
   half,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f32_f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -594,7 +596,7 @@ entry:
     half %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x float> %a
 }
@@ -603,9 +605,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.nxv16f32.f16(
   <vscale x 16 x float>,
   half,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float>  @intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, i32 %3) nounwind {
+define <vscale x 16 x float>  @intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -616,7 +618,7 @@ entry:
     <vscale x 16 x float> %0,
     half %1,
     <vscale x 16 x half> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 16 x float> %a
 }
@@ -626,9 +628,9 @@ declare <vscale x 16 x float> @llvm.riscv.vfwnmsac.mask.nxv16f32.f16(
   half,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv16f32_f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -640,7 +642,7 @@ entry:
     half %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 16 x float> %a
 }
@@ -649,9 +651,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmsac.nxv1f64.f32(
   <vscale x 1 x double>,
   float,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double>  @intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, i32 %3) nounwind {
+define <vscale x 1 x double>  @intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -662,7 +664,7 @@ entry:
     <vscale x 1 x double> %0,
     float %1,
     <vscale x 1 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -672,9 +674,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwnmsac.mask.nxv1f64.f32(
   float,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv1f64_f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -686,7 +688,7 @@ entry:
     float %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -695,9 +697,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmsac.nxv2f64.f32(
   <vscale x 2 x double>,
   float,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double>  @intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x double>  @intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -708,7 +710,7 @@ entry:
     <vscale x 2 x double> %0,
     float %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x double> %a
 }
@@ -718,9 +720,9 @@ declare <vscale x 2 x double> @llvm.riscv.vfwnmsac.mask.nxv2f64.f32(
   float,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv2f64_f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -732,7 +734,7 @@ entry:
     float %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x double> %a
 }
@@ -741,9 +743,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmsac.nxv4f64.f32(
   <vscale x 4 x double>,
   float,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double>  @intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, i32 %3) nounwind {
+define <vscale x 4 x double>  @intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -754,7 +756,7 @@ entry:
     <vscale x 4 x double> %0,
     float %1,
     <vscale x 4 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 4 x double> %a
 }
@@ -764,9 +766,9 @@ declare <vscale x 4 x double> @llvm.riscv.vfwnmsac.mask.nxv4f64.f32(
   float,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv4f64_f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -778,7 +780,7 @@ entry:
     float %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 4 x double> %a
 }
@@ -787,9 +789,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmsac.nxv8f64.f32(
   <vscale x 8 x double>,
   float,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double>  @intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, i32 %3) nounwind {
+define <vscale x 8 x double>  @intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -800,7 +802,7 @@ entry:
     <vscale x 8 x double> %0,
     float %1,
     <vscale x 8 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 8 x double> %a
 }
@@ -810,9 +812,9 @@ declare <vscale x 8 x double> @llvm.riscv.vfwnmsac.mask.nxv8f64.f32(
   float,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwnmsac_mask_vf_nxv8f64_f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -824,7 +826,7 @@ entry:
     float %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll
deleted file mode 100644
index 2282bd5fbc860..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv64.ll
+++ /dev/null
@@ -1,508 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv1f16(
-  <vscale x 2 x float>,
-  <vscale x 1 x half>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv1f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv1f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv1f16(
-    <vscale x 2 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv1f16.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 1 x half>,
-  <vscale x 2 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv1f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x half> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv1f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv1f16.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv2f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv2f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv2f16.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv2f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv2f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv2f16.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv4f16(
-  <vscale x 2 x float>,
-  <vscale x 4 x half>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv4f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv4f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv4f16(
-    <vscale x 2 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv4f16.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 4 x half>,
-  <vscale x 2 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv4f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x half> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv4f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv4f16.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv8f16(
-  <vscale x 2 x float>,
-  <vscale x 8 x half>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv8f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv8f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv8f16(
-    <vscale x 2 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv8f16.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 8 x half>,
-  <vscale x 2 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv8f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x half> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv8f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv8f16.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv16f16(
-  <vscale x 2 x float>,
-  <vscale x 16 x half>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv16f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv16f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv16f16(
-    <vscale x 2 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv16f16.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 16 x half>,
-  <vscale x 2 x float>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv16f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x half> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv16f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv16f16.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16(
-  <vscale x 2 x float>,
-  <vscale x 32 x half>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16(
-    <vscale x 2 x float> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16(
-  <vscale x 2 x float>,
-  <vscale x 32 x half>,
-  <vscale x 2 x float>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16(
-    <vscale x 2 x float> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 32 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv1f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv1f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv1f32.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv1f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv1f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv1f32.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv2f32(
-  <vscale x 1 x double>,
-  <vscale x 2 x float>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv2f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv2f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv2f32(
-    <vscale x 1 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv2f32.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 2 x float>,
-  <vscale x 1 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv2f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x float> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv2f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv2f32.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv4f32(
-  <vscale x 1 x double>,
-  <vscale x 4 x float>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv4f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv4f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv4f32(
-    <vscale x 1 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv4f32.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 4 x float>,
-  <vscale x 1 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv4f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x float> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv4f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv4f32.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv8f32(
-  <vscale x 1 x double>,
-  <vscale x 8 x float>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv8f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv8f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv8f32(
-    <vscale x 1 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv8f32.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 8 x float>,
-  <vscale x 1 x double>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv8f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x float> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv8f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv8f32.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv16f32(
-  <vscale x 1 x double>,
-  <vscale x 16 x float>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv16f32(
-    <vscale x 1 x double> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv16f32(
-  <vscale x 1 x double>,
-  <vscale x 16 x float>,
-  <vscale x 1 x double>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfwredosum.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv16f32(
-    <vscale x 1 x double> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredosum.ll
similarity index 88%
rename from llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwredosum.ll
index 37240159bd908..7eb16dd4b8a80 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwredosum-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwredosum.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv1f16(
   <vscale x 2 x float>,
   <vscale x 1 x half>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv1f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv1f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x half> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv1f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 1 x half> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv1f16.nxv2f32
   <vscale x 1 x half>,
   <vscale x 2 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv1f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x half> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv1f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x half> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv1f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 2 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv2f16(
   <vscale x 2 x float>,
   <vscale x 2 x half>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv2f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv2f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv2f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv2f16.nxv2f32
   <vscale x 2 x half>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv2f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv2f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv2f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv4f16(
   <vscale x 2 x float>,
   <vscale x 4 x half>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv4f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv4f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x half> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv4f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 4 x half> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv4f16.nxv2f32
   <vscale x 4 x half>,
   <vscale x 2 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv4f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x half> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv4f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x half> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv4f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 2 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv8f16(
   <vscale x 2 x float>,
   <vscale x 8 x half>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv8f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv8f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x half> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv8f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 8 x half> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv8f16.nxv2f32
   <vscale x 8 x half>,
   <vscale x 2 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv8f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x half> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv8f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x half> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv8f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 2 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv16f16(
   <vscale x 2 x float>,
   <vscale x 16 x half>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv16f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv16f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x half> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv16f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 16 x half> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv16f16.nxv2f3
   <vscale x 16 x half>,
   <vscale x 2 x float>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv16f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x half> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv16f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x half> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv16f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 2 x float> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredosum.nxv2f32.nxv32f16(
   <vscale x 2 x float>,
   <vscale x 32 x half>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv2f32_nxv32f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 32 x half> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredosum.mask.nxv2f32.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 2 x float>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv2f32_nxv32f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 2 x float> %2,
     <vscale x 32 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv1f32(
   <vscale x 1 x double>,
   <vscale x 1 x float>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv1f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv1f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv1f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv1f32.nxv1f6
   <vscale x 1 x float>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv1f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv1f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv1f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv2f32(
   <vscale x 1 x double>,
   <vscale x 2 x float>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv2f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x float> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv2f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x float> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv2f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 2 x float> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv2f32.nxv1f6
   <vscale x 2 x float>,
   <vscale x 1 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv2f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x float> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv2f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x float> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv2f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 1 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv4f32(
   <vscale x 1 x double>,
   <vscale x 4 x float>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv4f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x float> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv4f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x float> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv4f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 4 x float> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv4f32.nxv1f6
   <vscale x 4 x float>,
   <vscale x 1 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv4f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x float> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv4f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x float> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv4f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 1 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv8f32(
   <vscale x 1 x double>,
   <vscale x 8 x float>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv8f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x float> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv8f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x float> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv8f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 8 x float> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv8f32.nxv1f6
   <vscale x 8 x float>,
   <vscale x 1 x double>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv8f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x float> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv8f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x float> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv8f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 1 x double> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredosum.nxv1f64.nxv16f32(
   <vscale x 1 x double>,
   <vscale x 16 x float>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_vs_nxv1f64_nxv16f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 16 x float> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredosum.mask.nxv1f64.nxv16f32.nxv1f
   <vscale x 16 x float>,
   <vscale x 1 x double>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredosum_mask_vs_nxv1f64_nxv16f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 1 x double> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll
deleted file mode 100644
index 52bde877eb7d1..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv64.ll
+++ /dev/null
@@ -1,508 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
-declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv1f16(
-  <vscale x 2 x float>,
-  <vscale x 1 x half>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv1f16(
-    <vscale x 2 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv1f16.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 1 x half>,
-  <vscale x 2 x float>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x half> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv1f16.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv2f16.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv2f16.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv4f16(
-  <vscale x 2 x float>,
-  <vscale x 4 x half>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv4f16(
-    <vscale x 2 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv4f16.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 4 x half>,
-  <vscale x 2 x float>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x half> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv4f16.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv8f16(
-  <vscale x 2 x float>,
-  <vscale x 8 x half>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv8f16(
-    <vscale x 2 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv8f16.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 8 x half>,
-  <vscale x 2 x float>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x half> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv8f16.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv16f16(
-  <vscale x 2 x float>,
-  <vscale x 16 x half>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv16f16(
-    <vscale x 2 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv16f16.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 16 x half>,
-  <vscale x 2 x float>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x half> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv16f16.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv32f16(
-  <vscale x 2 x float>,
-  <vscale x 32 x half>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv32f16(
-    <vscale x 2 x float> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 2 x float> %2,
-    i64 %3)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv32f16(
-  <vscale x 2 x float>,
-  <vscale x 32 x half>,
-  <vscale x 2 x float>,
-  <vscale x 32 x i1>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv32f16(
-    <vscale x 2 x float> %0,
-    <vscale x 32 x half> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 32 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv1f32.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x double>,
-  <vscale x 1 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv1f32.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv2f32(
-  <vscale x 1 x double>,
-  <vscale x 2 x float>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v9, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv2f32(
-    <vscale x 1 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv2f32.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 2 x float>,
-  <vscale x 1 x double>,
-  <vscale x 2 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x float> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv2f32.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv4f32(
-  <vscale x 1 x double>,
-  <vscale x 4 x float>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v10, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv4f32(
-    <vscale x 1 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv4f32.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 4 x float>,
-  <vscale x 1 x double>,
-  <vscale x 4 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x float> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v10, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv4f32.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv8f32(
-  <vscale x 1 x double>,
-  <vscale x 8 x float>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v12, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv8f32(
-    <vscale x 1 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv8f32.nxv1f64(
-  <vscale x 1 x double>,
-  <vscale x 8 x float>,
-  <vscale x 1 x double>,
-  <vscale x 8 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x float> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v12, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv8f32.nxv1f64(
-    <vscale x 1 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv16f32(
-  <vscale x 1 x double>,
-  <vscale x 16 x float>,
-  <vscale x 1 x double>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, i64 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v16, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv16f32(
-    <vscale x 1 x double> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 1 x double> %2,
-    i64 %3)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv16f32(
-  <vscale x 1 x double>,
-  <vscale x 16 x float>,
-  <vscale x 1 x double>,
-  <vscale x 16 x i1>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
-; CHECK-NEXT:    vfwredusum.vs v8, v16, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv16f32(
-    <vscale x 1 x double> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 1 x double> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4)
-
-  ret <vscale x 1 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwredusum.ll
similarity index 88%
rename from llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwredusum.ll
index fe56d0c6bd0d0..897cd61fb437a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwredusum-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwredusum.ll
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv1f16(
   <vscale x 2 x float>,
   <vscale x 1 x half>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x half> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv1f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -18,7 +20,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 1 x half> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -28,9 +30,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv1f16.nxv2f32
   <vscale x 1 x half>,
   <vscale x 2 x float>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x half> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 1 x half> %1, <vscale x 2 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv1f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 2 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -51,9 +53,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv2f16(
   <vscale x 2 x float>,
   <vscale x 2 x half>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv2f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -64,7 +66,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -74,9 +76,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv2f16.nxv2f32
   <vscale x 2 x half>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv2f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -97,9 +99,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv4f16(
   <vscale x 2 x float>,
   <vscale x 4 x half>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x half> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv4f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -110,7 +112,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 4 x half> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -120,9 +122,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv4f16.nxv2f32
   <vscale x 4 x half>,
   <vscale x 2 x float>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x half> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 4 x half> %1, <vscale x 2 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv4f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 2 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -143,9 +145,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv8f16(
   <vscale x 2 x float>,
   <vscale x 8 x half>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x half> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv8f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -156,7 +158,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 8 x half> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -166,9 +168,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv8f16.nxv2f32
   <vscale x 8 x half>,
   <vscale x 2 x float>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x half> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 8 x half> %1, <vscale x 2 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv8f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 2 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -189,9 +191,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv16f16(
   <vscale x 2 x float>,
   <vscale x 16 x half>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x half> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv16f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -202,7 +204,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 16 x half> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -212,9 +214,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv16f16.nxv2f3
   <vscale x 16 x half>,
   <vscale x 2 x float>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x half> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 16 x half> %1, <vscale x 2 x float> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv16f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 2 x float> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -235,9 +237,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredusum.nxv2f32.nxv32f16(
   <vscale x 2 x float>,
   <vscale x 32 x half>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, i32 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv2f32_nxv32f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -248,7 +250,7 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 32 x half> %1,
     <vscale x 2 x float> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 2 x float> %a
 }
@@ -258,9 +260,9 @@ declare <vscale x 2 x float> @llvm.riscv.vfwredusum.mask.nxv2f32.nxv32f16(
   <vscale x 32 x half>,
   <vscale x 2 x float>,
   <vscale x 32 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32(<vscale x 2 x float> %0, <vscale x 32 x half> %1, <vscale x 2 x float> %2, <vscale x 32 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv2f32_nxv32f16_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 32 x half> %1,
     <vscale x 2 x float> %2,
     <vscale x 32 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 2 x float> %a
 }
@@ -281,9 +283,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv1f32(
   <vscale x 1 x double>,
   <vscale x 1 x float>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv1f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -294,7 +296,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -304,9 +306,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv1f32.nxv1f6
   <vscale x 1 x float>,
   <vscale x 1 x double>,
   <vscale x 1 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x double> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv1f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x double> %2,
     <vscale x 1 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -327,9 +329,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv2f32(
   <vscale x 1 x double>,
   <vscale x 2 x float>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x float> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x float> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv2f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -340,7 +342,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 2 x float> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -350,9 +352,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv2f32.nxv1f6
   <vscale x 2 x float>,
   <vscale x 1 x double>,
   <vscale x 2 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x float> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 2 x float> %1, <vscale x 1 x double> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv2f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 1 x double> %2,
     <vscale x 2 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -373,9 +375,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv4f32(
   <vscale x 1 x double>,
   <vscale x 4 x float>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x float> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x float> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv4f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -386,7 +388,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 4 x float> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -396,9 +398,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv4f32.nxv1f6
   <vscale x 4 x float>,
   <vscale x 1 x double>,
   <vscale x 4 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x float> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 4 x float> %1, <vscale x 1 x double> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv4f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 1 x double> %2,
     <vscale x 4 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -419,9 +421,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv8f32(
   <vscale x 1 x double>,
   <vscale x 8 x float>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x float> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x float> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv8f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -432,7 +434,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 8 x float> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -442,9 +444,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv8f32.nxv1f6
   <vscale x 8 x float>,
   <vscale x 1 x double>,
   <vscale x 8 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x float> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 8 x float> %1, <vscale x 1 x double> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv8f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 1 x double> %2,
     <vscale x 8 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
@@ -465,9 +467,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredusum.nxv1f64.nxv16f32(
   <vscale x 1 x double>,
   <vscale x 16 x float>,
   <vscale x 1 x double>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, i32 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_vs_nxv1f64_nxv16f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -478,7 +480,7 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 16 x float> %1,
     <vscale x 1 x double> %2,
-    i32 %3)
+    iXLen %3)
 
   ret <vscale x 1 x double> %a
 }
@@ -488,9 +490,9 @@ declare <vscale x 1 x double> @llvm.riscv.vfwredusum.mask.nxv1f64.nxv16f32.nxv1f
   <vscale x 16 x float>,
   <vscale x 1 x double>,
   <vscale x 16 x i1>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64(<vscale x 1 x double> %0, <vscale x 16 x float> %1, <vscale x 1 x double> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwredusum_mask_vs_nxv1f64_nxv16f32_nxv1f64:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 1 x double> %2,
     <vscale x 16 x i1> %3,
-    i32 %4)
+    iXLen %4)
 
   ret <vscale x 1 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll
deleted file mode 100644
index d4b0780f03c57..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv64.ll
+++ /dev/null
@@ -1,830 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
-declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16(
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.vv v10, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16(
-    <vscale x 1 x half> %0,
-    <vscale x 1 x half> %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwsub_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv1f32_nxv1f16_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2f16.nxv2f16(
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwsub_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vv v10, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2f16.nxv2f16(
-    <vscale x 2 x half> %0,
-    <vscale x 2 x half> %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwsub_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv2f32_nxv2f16_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.nxv4f16(
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vv v10, v8, v9
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.nxv4f16(
-    <vscale x 4 x half> %0,
-    <vscale x 4 x half> %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwsub_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv4f32_nxv4f16_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.nxv8f16(
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vv v12, v8, v10
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.nxv8f16(
-    <vscale x 8 x half> %0,
-    <vscale x 8 x half> %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwsub_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv8f32_nxv8f16_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.nxv16f16(
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vv v16, v8, v12
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.nxv16f16(
-    <vscale x 16 x half> %0,
-    <vscale x 16 x half> %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwsub_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv16f32_nxv16f16_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwsub.nxv1f64.nxv1f32.nxv1f32(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwsub_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vv v10, v8, v9
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.nxv1f64.nxv1f32.nxv1f32(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwsub_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv1f64_nxv1f32_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.nxv2f32(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vv v10, v8, v9
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.nxv2f32(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwsub_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv2f64_nxv2f32_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vv v8, v10, v11, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vv v12, v8, v10
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwsub_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv4f64_nxv4f32_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vv v8, v12, v14, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.nxv8f32(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vv v16, v8, v12
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.nxv8f32(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwsub_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv8f64_nxv8f32_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vv v8, v16, v20, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.f16(
-  <vscale x 1 x half>,
-  half,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v9, v8, fa0
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.f16(
-    <vscale x 1 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  half,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x float> @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2f16.f16(
-  <vscale x 2 x half>,
-  half,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v9, v8, fa0
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2f16.f16(
-    <vscale x 2 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  half,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x float> @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16(
-  <vscale x 4 x half>,
-  half,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vf v10, v8, fa0
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16(
-    <vscale x 4 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  half,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x float> @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16(
-  <vscale x 8 x half>,
-  half,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v12, v8, fa0
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16(
-    <vscale x 8 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  half,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x float> @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16(
-  <vscale x 16 x half>,
-  half,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v16, v8, fa0
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16(
-    <vscale x 16 x half> %0,
-    half %1,
-    i64 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  half,
-  <vscale x 16 x i1>,
-  i64,
-  i64);
-
-define <vscale x 16 x float> @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwsub.nxv1f64.nxv1f32.f32(
-  <vscale x 1 x float>,
-  float,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v9, v8, fa0
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.nxv1f64.nxv1f32.f32(
-    <vscale x 1 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  float,
-  <vscale x 1 x i1>,
-  i64,
-  i64);
-
-define <vscale x 1 x double> @intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32(
-  <vscale x 2 x float>,
-  float,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vf v10, v8, fa0
-; CHECK-NEXT:    vmv2r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32(
-    <vscale x 2 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  float,
-  <vscale x 2 x i1>,
-  i64,
-  i64);
-
-define <vscale x 2 x double> @intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32(
-  <vscale x 4 x float>,
-  float,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v12, v8, fa0
-; CHECK-NEXT:    vmv4r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32(
-    <vscale x 4 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  float,
-  <vscale x 4 x i1>,
-  i64,
-  i64);
-
-define <vscale x 4 x double> @intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32(
-  <vscale x 8 x float>,
-  float,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i64 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v16, v8, fa0
-; CHECK-NEXT:    vmv8r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32(
-    <vscale x 8 x float> %0,
-    float %1,
-    i64 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  float,
-  <vscale x 8 x i1>,
-  i64,
-  i64);
-
-define <vscale x 8 x double> @intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.vf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.ll
similarity index 86%
rename from llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwsub.ll
index e81121f848ddb..916abcae0de0d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -17,7 +19,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.nxv1f16(
     <vscale x 1 x half> %0,
     <vscale x 1 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -27,10 +29,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.nxv1f16(
   <vscale x 1 x half>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwsub_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwsub_mask_vv_nxv1f32_nxv1f16_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv1f32_nxv1f16_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -42,7 +44,7 @@ entry:
     <vscale x 1 x half> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -50,9 +52,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwsub_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwsub_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -63,7 +65,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2f16.nxv2f16(
     <vscale x 2 x half> %0,
     <vscale x 2 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -73,10 +75,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.nxv2f16(
   <vscale x 2 x half>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwsub_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwsub_mask_vv_nxv2f32_nxv2f16_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv2f32_nxv2f16_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -88,7 +90,7 @@ entry:
     <vscale x 2 x half> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -96,9 +98,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -109,7 +111,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.nxv4f16(
     <vscale x 4 x half> %0,
     <vscale x 4 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -119,10 +121,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.nxv4f16(
   <vscale x 4 x half>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwsub_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwsub_mask_vv_nxv4f32_nxv4f16_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv4f32_nxv4f16_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -134,7 +136,7 @@ entry:
     <vscale x 4 x half> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -142,9 +144,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -155,7 +157,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.nxv8f16(
     <vscale x 8 x half> %0,
     <vscale x 8 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -165,10 +167,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.nxv8f16(
   <vscale x 8 x half>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwsub_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwsub_mask_vv_nxv8f32_nxv8f16_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv8f32_nxv8f16_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -180,7 +182,7 @@ entry:
     <vscale x 8 x half> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -188,9 +190,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.nxv16f16(
   <vscale x 16 x half>,
   <vscale x 16 x half>,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x half> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -201,7 +203,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.nxv16f16(
     <vscale x 16 x half> %0,
     <vscale x 16 x half> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -211,10 +213,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.nxv16f16
   <vscale x 16 x half>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwsub_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwsub_mask_vv_nxv16f32_nxv16f16_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv16f32_nxv16f16_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -226,7 +228,7 @@ entry:
     <vscale x 16 x half> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -234,9 +236,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfwsub.nxv1f64.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwsub_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwsub_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -247,7 +249,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.nxv1f64.nxv1f32.nxv1f32(
     <vscale x 1 x float> %0,
     <vscale x 1 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -257,10 +259,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.nxv1f32(
   <vscale x 1 x float>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwsub_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwsub_mask_vv_nxv1f64_nxv1f32_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv1f64_nxv1f32_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -272,7 +274,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -280,9 +282,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -293,7 +295,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.nxv2f32(
     <vscale x 2 x float> %0,
     <vscale x 2 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -303,10 +305,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.nxv2f32(
   <vscale x 2 x float>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwsub_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwsub_mask_vv_nxv2f64_nxv2f32_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv2f64_nxv2f32_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -318,7 +320,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -326,9 +328,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -339,7 +341,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.nxv4f32(
     <vscale x 4 x float> %0,
     <vscale x 4 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -349,10 +351,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.nxv4f32(
   <vscale x 4 x float>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwsub_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwsub_mask_vv_nxv4f64_nxv4f32_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv4f64_nxv4f32_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -364,7 +366,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -372,9 +374,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -385,7 +387,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.nxv8f32(
     <vscale x 8 x float> %0,
     <vscale x 8 x float> %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -395,10 +397,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.nxv8f32(
   <vscale x 8 x float>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwsub_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwsub_mask_vv_nxv8f64_nxv8f32_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vv_nxv8f64_nxv8f32_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -410,7 +412,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -418,9 +420,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -431,7 +433,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.nxv1f32.nxv1f16.f16(
     <vscale x 1 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -441,10 +443,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfwsub.mask.nxv1f32.nxv1f16.f16(
   <vscale x 1 x half>,
   half,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f32_nxv1f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -456,7 +458,7 @@ entry:
     <vscale x 1 x half> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -464,9 +466,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -477,7 +479,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.nxv2f32.nxv2f16.f16(
     <vscale x 2 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -487,10 +489,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfwsub.mask.nxv2f32.nxv2f16.f16(
   <vscale x 2 x half>,
   half,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f32_nxv2f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -502,7 +504,7 @@ entry:
     <vscale x 2 x half> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -510,9 +512,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -523,7 +525,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.nxv4f32.nxv4f16.f16(
     <vscale x 4 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -533,10 +535,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.mask.nxv4f32.nxv4f16.f16(
   <vscale x 4 x half>,
   half,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f32_nxv4f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -548,7 +550,7 @@ entry:
     <vscale x 4 x half> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -556,9 +558,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -569,7 +571,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.nxv8f32.nxv8f16.f16(
     <vscale x 8 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -579,10 +581,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.mask.nxv8f32.nxv8f16.f16(
   <vscale x 8 x half>,
   half,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f32_nxv8f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -594,7 +596,7 @@ entry:
     <vscale x 8 x half> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -602,9 +604,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
-  i32);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, i32 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x half> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -615,7 +617,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.nxv16f32.nxv16f16.f16(
     <vscale x 16 x half> %0,
     half %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -625,10 +627,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.mask.nxv16f32.nxv16f16.f16(
   <vscale x 16 x half>,
   half,
   <vscale x 16 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv16f32_nxv16f16_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -640,7 +642,7 @@ entry:
     <vscale x 16 x half> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -648,9 +650,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfwsub.nxv1f64.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -661,7 +663,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.nxv1f64.nxv1f32.f32(
     <vscale x 1 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -671,10 +673,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfwsub.mask.nxv1f64.nxv1f32.f32(
   <vscale x 1 x float>,
   float,
   <vscale x 1 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv1f64_nxv1f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -686,7 +688,7 @@ entry:
     <vscale x 1 x float> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -694,9 +696,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -707,7 +709,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.nxv2f64.nxv2f32.f32(
     <vscale x 2 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -717,10 +719,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.mask.nxv2f64.nxv2f32.f32(
   <vscale x 2 x float>,
   float,
   <vscale x 2 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv2f64_nxv2f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -732,7 +734,7 @@ entry:
     <vscale x 2 x float> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -740,9 +742,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -753,7 +755,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.nxv4f64.nxv4f32.f32(
     <vscale x 4 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -763,10 +765,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.mask.nxv4f64.nxv4f32.f32(
   <vscale x 4 x float>,
   float,
   <vscale x 4 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv4f64_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -778,7 +780,7 @@ entry:
     <vscale x 4 x float> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -786,9 +788,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
-  i32);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, i32 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x float> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -799,7 +801,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.nxv8f64.nxv8f32.f32(
     <vscale x 8 x float> %0,
     float %1,
-    i32 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -809,10 +811,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.mask.nxv8f64.nxv8f32.f32(
   <vscale x 8 x float>,
   float,
   <vscale x 8 x i1>,
-  i32,
-  i32);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub_mask_vf_nxv8f64_nxv8f32_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -824,7 +826,7 @@ entry:
     <vscale x 8 x float> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll
deleted file mode 100644
index da2290be93d27..0000000000000
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll
+++ /dev/null
@@ -1,1248 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=ilp32d < %s | FileCheck %s
-declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    i32 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  <vscale x 1 x half>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x half> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    i32 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  <vscale x 2 x half>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x half> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    i32 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  <vscale x 4 x half>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x half> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    i32 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  <vscale x 8 x half>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x half> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    i32 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  <vscale x 16 x half>,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl4re16.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    <vscale x 16 x half> %2,
-    <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v9
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    i32 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.nxv1f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  <vscale x 1 x float>,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wv_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv1f64_nxv1f64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v9, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    <vscale x 1 x float> %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    i32 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.nxv2f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  <vscale x 2 x float>,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wv_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv2f64_nxv2f64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v10, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    <vscale x 2 x float> %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    i32 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.nxv4f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  <vscale x 4 x float>,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wv_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv4f64_nxv4f64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v12, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    <vscale x 4 x float> %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    i32 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.nxv8f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  <vscale x 8 x float>,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wv_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv8f64_nxv8f64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vl4re32.v v24, (a0)
-; CHECK-NEXT:    vsetvli zero, a1, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v16, v24, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    <vscale x 8 x float> %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.f16(
-  <vscale x 1 x float>,
-  half,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.f16(
-  <vscale x 1 x float>,
-  <vscale x 1 x float>,
-  half,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, half %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %1,
-    half %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.f16(
-  <vscale x 2 x float>,
-  half,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.f16(
-  <vscale x 2 x float>,
-  <vscale x 2 x float>,
-  half,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, half %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %1,
-    half %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.f16(
-  <vscale x 4 x float>,
-  half,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.f16(
-  <vscale x 4 x float>,
-  <vscale x 4 x float>,
-  half,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, half %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %1,
-    half %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.f16(
-  <vscale x 8 x float>,
-  half,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.f16(
-  <vscale x 8 x float>,
-  <vscale x 8 x float>,
-  half,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, half %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %1,
-    half %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.f16(
-  <vscale x 16 x float>,
-  half,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    half %1,
-    i32 %2)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.f16(
-  <vscale x 16 x float>,
-  <vscale x 16 x float>,
-  half,
-  <vscale x 16 x i1>,
-  i32,
-  i32);
-
-define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, half %2, <vscale x 16 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %1,
-    half %2,
-    <vscale x 16 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.f32(
-  <vscale x 1 x double>,
-  float,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.f32(
-  <vscale x 1 x double>,
-  <vscale x 1 x double>,
-  float,
-  <vscale x 1 x i1>,
-  i32,
-  i32);
-
-define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, float %2, <vscale x 1 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v9, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %1,
-    float %2,
-    <vscale x 1 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.f32(
-  <vscale x 2 x double>,
-  float,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.f32(
-  <vscale x 2 x double>,
-  <vscale x 2 x double>,
-  float,
-  <vscale x 2 x i1>,
-  i32,
-  i32);
-
-define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, float %2, <vscale x 2 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v10, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %1,
-    float %2,
-    <vscale x 2 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.f32(
-  <vscale x 4 x double>,
-  float,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.f32(
-  <vscale x 4 x double>,
-  <vscale x 4 x double>,
-  float,
-  <vscale x 4 x i1>,
-  i32,
-  i32);
-
-define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, float %2, <vscale x 4 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v12, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %1,
-    float %2,
-    <vscale x 4 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.f32(
-  <vscale x 8 x double>,
-  float,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    float %1,
-    i32 %2)
-
-  ret <vscale x 8 x double> %a
-}
-
-declare <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.f32(
-  <vscale x 8 x double>,
-  <vscale x 8 x double>,
-  float,
-  <vscale x 8 x i1>,
-  i32,
-  i32);
-
-define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, float %2, <vscale x 8 x i1> %3, i32 %4) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v16, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %1,
-    float %2,
-    <vscale x 8 x i1> %3,
-    i32 %4, i32 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %0,
-    <vscale x 1 x half> %1,
-    <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %0,
-    <vscale x 2 x half> %1,
-    <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %0,
-    <vscale x 4 x half> %1,
-    <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %0,
-    <vscale x 8 x half> %1,
-    <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %0,
-    <vscale x 16 x half> %1,
-    <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v9, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %0,
-    <vscale x 1 x float> %1,
-    <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v10, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %0,
-    <vscale x 2 x float> %1,
-    <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v12, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %0,
-    <vscale x 4 x float> %1,
-    <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wv v8, v8, v16, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %0,
-    <vscale x 8 x float> %1,
-    <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.f16(
-    <vscale x 1 x float> %0,
-    <vscale x 1 x float> %0,
-    half %1,
-    <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 1 x float> %a
-}
-
-define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.f16(
-    <vscale x 2 x float> %0,
-    <vscale x 2 x float> %0,
-    half %1,
-    <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 2 x float> %a
-}
-
-define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.f16(
-    <vscale x 4 x float> %0,
-    <vscale x 4 x float> %0,
-    half %1,
-    <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 4 x float> %a
-}
-
-define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.f16(
-    <vscale x 8 x float> %0,
-    <vscale x 8 x float> %0,
-    half %1,
-    <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 8 x float> %a
-}
-
-define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.f16(
-    <vscale x 16 x float> %0,
-    <vscale x 16 x float> %0,
-    half %1,
-    <vscale x 16 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 16 x float> %a
-}
-
-define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.f32(
-    <vscale x 1 x double> %0,
-    <vscale x 1 x double> %0,
-    float %1,
-    <vscale x 1 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 1 x double> %a
-}
-
-define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.f32(
-    <vscale x 2 x double> %0,
-    <vscale x 2 x double> %0,
-    float %1,
-    <vscale x 2 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 2 x double> %a
-}
-
-define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.f32(
-    <vscale x 4 x double> %0,
-    <vscale x 4 x double> %0,
-    float %1,
-    <vscale x 4 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 4 x double> %a
-}
-
-define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x i1> %2, i32 %3) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wf v8, v8, fa0, v0.t
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.f32(
-    <vscale x 8 x double> %0,
-    <vscale x 8 x double> %0,
-    float %1,
-    <vscale x 8 x i1> %2,
-    i32 %3, i32 1)
-
-  ret <vscale x 8 x double> %a
-}
-
-define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfwsub.wv v10, v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
-    <vscale x 1 x float> %1,
-    <vscale x 1 x half> %0,
-    i32 %2)
-
-  ret <vscale x 1 x float> %a
-}
-
-define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v10, v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
-    <vscale x 2 x float> %1,
-    <vscale x 2 x half> %0,
-    i32 %2)
-
-  ret <vscale x 2 x float> %a
-}
-
-define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wv v12, v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
-    <vscale x 4 x float> %1,
-    <vscale x 4 x half> %0,
-    i32 %2)
-
-  ret <vscale x 4 x float> %a
-}
-
-define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v16, v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
-    <vscale x 8 x float> %1,
-    <vscale x 8 x half> %0,
-    i32 %2)
-
-  ret <vscale x 8 x float> %a
-}
-
-define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v10, v9, v8
-; CHECK-NEXT:    vmv1r.v v8, v10
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
-    <vscale x 1 x double> %1,
-    <vscale x 1 x float> %0,
-    i32 %2)
-
-  ret <vscale x 1 x double> %a
-}
-
-define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfwsub.wv v12, v10, v8
-; CHECK-NEXT:    vmv2r.v v8, v12
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
-    <vscale x 2 x double> %1,
-    <vscale x 2 x float> %0,
-    i32 %2)
-
-  ret <vscale x 2 x double> %a
-}
-
-define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfwsub.wv v16, v12, v8
-; CHECK-NEXT:    vmv4r.v v8, v16
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
-    <vscale x 4 x double> %1,
-    <vscale x 4 x float> %0,
-    i32 %2)
-
-  ret <vscale x 4 x double> %a
-}
-
-define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
-; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfwsub.wv v24, v16, v8
-; CHECK-NEXT:    vmv8r.v v8, v24
-; CHECK-NEXT:    ret
-entry:
-  %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
-    <vscale x 8 x double> %1,
-    <vscale x 8 x float> %0,
-    i32 %2)
-
-  ret <vscale x 8 x double> %a
-}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll
similarity index 88%
rename from llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll
rename to llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll
index ec0bd527dafe6..b5d008c3e1ed5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll
@@ -1,12 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
-; RUN:   -target-abi=lp64d < %s | FileCheck %s
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=ilp32d | FileCheck %s
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh \
+; RUN:   -verify-machineinstrs -target-abi=lp64d | FileCheck %s
 declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, i64 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -16,7 +18,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
     <vscale x 1 x float> %0,
     <vscale x 1 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -26,10 +28,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.nxv1f16(
   <vscale x 1 x float>,
   <vscale x 1 x half>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, <vscale x 1 x half> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv1f32_nxv1f32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -41,7 +43,7 @@ entry:
     <vscale x 1 x float> %1,
     <vscale x 1 x half> %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -49,9 +51,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
   <vscale x 2 x float>,
   <vscale x 2 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, i64 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -61,7 +63,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
     <vscale x 2 x float> %0,
     <vscale x 2 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -71,10 +73,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.nxv2f16(
   <vscale x 2 x float>,
   <vscale x 2 x half>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, <vscale x 2 x half> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv2f32_nxv2f32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -86,7 +88,7 @@ entry:
     <vscale x 2 x float> %1,
     <vscale x 2 x half> %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -94,9 +96,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
   <vscale x 4 x float>,
   <vscale x 4 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, i64 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -106,7 +108,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
     <vscale x 4 x float> %0,
     <vscale x 4 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -116,10 +118,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.nxv4f16(
   <vscale x 4 x float>,
   <vscale x 4 x half>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, <vscale x 4 x half> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv4f32_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -131,7 +133,7 @@ entry:
     <vscale x 4 x float> %1,
     <vscale x 4 x half> %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -139,9 +141,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
   <vscale x 8 x float>,
   <vscale x 8 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, i64 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -151,7 +153,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
     <vscale x 8 x float> %0,
     <vscale x 8 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -161,10 +163,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.nxv8f16(
   <vscale x 8 x float>,
   <vscale x 8 x half>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, <vscale x 8 x half> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv8f32_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -176,7 +178,7 @@ entry:
     <vscale x 8 x float> %1,
     <vscale x 8 x half> %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -184,9 +186,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16(
   <vscale x 16 x float>,
   <vscale x 16 x half>,
-  i64);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, i64 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -196,7 +198,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16(
     <vscale x 16 x float> %0,
     <vscale x 16 x half> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -206,10 +208,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16f16(
   <vscale x 16 x float>,
   <vscale x 16 x half>,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, <vscale x 16 x half> %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl4re16.v v24, (a0)
@@ -222,7 +224,7 @@ entry:
     <vscale x 16 x float> %1,
     <vscale x 16 x half> %2,
     <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -230,9 +232,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
   <vscale x 1 x double>,
   <vscale x 1 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -242,7 +244,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
     <vscale x 1 x double> %0,
     <vscale x 1 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -252,10 +254,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.nxv1f32(
   <vscale x 1 x double>,
   <vscale x 1 x float>,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wv_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wv_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, <vscale x 1 x float> %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv1f64_nxv1f64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -267,7 +269,7 @@ entry:
     <vscale x 1 x double> %1,
     <vscale x 1 x float> %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -275,9 +277,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
   <vscale x 2 x double>,
   <vscale x 2 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -287,7 +289,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
     <vscale x 2 x double> %0,
     <vscale x 2 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -297,10 +299,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.nxv2f32(
   <vscale x 2 x double>,
   <vscale x 2 x float>,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wv_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wv_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, <vscale x 2 x float> %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv2f64_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -312,7 +314,7 @@ entry:
     <vscale x 2 x double> %1,
     <vscale x 2 x float> %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -320,9 +322,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
   <vscale x 4 x double>,
   <vscale x 4 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -332,7 +334,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
     <vscale x 4 x double> %0,
     <vscale x 4 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -342,10 +344,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.nxv4f32(
   <vscale x 4 x double>,
   <vscale x 4 x float>,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wv_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wv_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, <vscale x 4 x float> %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv4f64_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -357,7 +359,7 @@ entry:
     <vscale x 4 x double> %1,
     <vscale x 4 x float> %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -365,9 +367,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
   <vscale x 8 x double>,
   <vscale x 8 x float>,
-  i64);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -377,7 +379,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
     <vscale x 8 x double> %0,
     <vscale x 8 x float> %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -387,10 +389,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.nxv8f32(
   <vscale x 8 x double>,
   <vscale x 8 x float>,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wv_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wv_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, <vscale x 8 x float> %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv8f64_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vl4re32.v v24, (a0)
@@ -403,7 +405,7 @@ entry:
     <vscale x 8 x double> %1,
     <vscale x 8 x float> %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
@@ -411,9 +413,9 @@ entry:
 declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.f16(
   <vscale x 1 x float>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, i64 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -423,7 +425,7 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.f16(
     <vscale x 1 x float> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
@@ -433,10 +435,10 @@ declare <vscale x 1 x float> @llvm.riscv.vfwsub.w.mask.nxv1f32.f16(
   <vscale x 1 x float>,
   half,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, half %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, <vscale x 1 x float> %1, half %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -448,7 +450,7 @@ entry:
     <vscale x 1 x float> %1,
     half %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
@@ -456,9 +458,9 @@ entry:
 declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.f16(
   <vscale x 2 x float>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, i64 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -468,7 +470,7 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.f16(
     <vscale x 2 x float> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
@@ -478,10 +480,10 @@ declare <vscale x 2 x float> @llvm.riscv.vfwsub.w.mask.nxv2f32.f16(
   <vscale x 2 x float>,
   half,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, half %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, <vscale x 2 x float> %1, half %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -493,7 +495,7 @@ entry:
     <vscale x 2 x float> %1,
     half %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
@@ -501,9 +503,9 @@ entry:
 declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.f16(
   <vscale x 4 x float>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, i64 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -513,7 +515,7 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.f16(
     <vscale x 4 x float> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
@@ -523,10 +525,10 @@ declare <vscale x 4 x float> @llvm.riscv.vfwsub.w.mask.nxv4f32.f16(
   <vscale x 4 x float>,
   half,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, half %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, <vscale x 4 x float> %1, half %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -538,7 +540,7 @@ entry:
     <vscale x 4 x float> %1,
     half %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
@@ -546,9 +548,9 @@ entry:
 declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.f16(
   <vscale x 8 x float>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, i64 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -558,7 +560,7 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.f16(
     <vscale x 8 x float> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
@@ -568,10 +570,10 @@ declare <vscale x 8 x float> @llvm.riscv.vfwsub.w.mask.nxv8f32.f16(
   <vscale x 8 x float>,
   half,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, half %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, <vscale x 8 x float> %1, half %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -583,7 +585,7 @@ entry:
     <vscale x 8 x float> %1,
     half %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
@@ -591,9 +593,9 @@ entry:
 declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.f16(
   <vscale x 16 x float>,
   half,
-  i64);
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, i64 %2) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -603,7 +605,7 @@ entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.f16(
     <vscale x 16 x float> %0,
     half %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 16 x float> %a
 }
@@ -613,10 +615,10 @@ declare <vscale x 16 x float> @llvm.riscv.vfwsub.w.mask.nxv16f32.f16(
   <vscale x 16 x float>,
   half,
   <vscale x 16 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, half %2, <vscale x 16 x i1> %3, i64 %4) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, <vscale x 16 x float> %1, half %2, <vscale x 16 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -628,7 +630,7 @@ entry:
     <vscale x 16 x float> %1,
     half %2,
     <vscale x 16 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
@@ -636,9 +638,9 @@ entry:
 declare <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.f32(
   <vscale x 1 x double>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, i64 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -648,7 +650,7 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.f32(
     <vscale x 1 x double> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
@@ -658,10 +660,10 @@ declare <vscale x 1 x double> @llvm.riscv.vfwsub.w.mask.nxv1f64.f32(
   <vscale x 1 x double>,
   float,
   <vscale x 1 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, float %2, <vscale x 1 x i1> %3, i64 %4) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, <vscale x 1 x double> %1, float %2, <vscale x 1 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -673,7 +675,7 @@ entry:
     <vscale x 1 x double> %1,
     float %2,
     <vscale x 1 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
@@ -681,9 +683,9 @@ entry:
 declare <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.f32(
   <vscale x 2 x double>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, i64 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -693,7 +695,7 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.f32(
     <vscale x 2 x double> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
@@ -703,10 +705,10 @@ declare <vscale x 2 x double> @llvm.riscv.vfwsub.w.mask.nxv2f64.f32(
   <vscale x 2 x double>,
   float,
   <vscale x 2 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, float %2, <vscale x 2 x i1> %3, i64 %4) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, <vscale x 2 x double> %1, float %2, <vscale x 2 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -718,7 +720,7 @@ entry:
     <vscale x 2 x double> %1,
     float %2,
     <vscale x 2 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
@@ -726,9 +728,9 @@ entry:
 declare <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.f32(
   <vscale x 4 x double>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, i64 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -738,7 +740,7 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.f32(
     <vscale x 4 x double> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
@@ -748,10 +750,10 @@ declare <vscale x 4 x double> @llvm.riscv.vfwsub.w.mask.nxv4f64.f32(
   <vscale x 4 x double>,
   float,
   <vscale x 4 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, float %2, <vscale x 4 x i1> %3, i64 %4) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, <vscale x 4 x double> %1, float %2, <vscale x 4 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -763,7 +765,7 @@ entry:
     <vscale x 4 x double> %1,
     float %2,
     <vscale x 4 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
@@ -771,9 +773,9 @@ entry:
 declare <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.f32(
   <vscale x 8 x double>,
   float,
-  i64);
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, i64 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wf_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -783,7 +785,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.f32(
     <vscale x 8 x double> %0,
     float %1,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }
@@ -793,10 +795,10 @@ declare <vscale x 8 x double> @llvm.riscv.vfwsub.w.mask.nxv8f64.f32(
   <vscale x 8 x double>,
   float,
   <vscale x 8 x i1>,
-  i64,
-  i64);
+  iXLen,
+  iXLen);
 
-define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, float %2, <vscale x 8 x i1> %3, i64 %4) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, <vscale x 8 x double> %1, float %2, <vscale x 8 x i1> %3, iXLen %4) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -808,12 +810,12 @@ entry:
     <vscale x 8 x double> %1,
     float %2,
     <vscale x 8 x i1> %3,
-    i64 %4, i64 1)
+    iXLen %4, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
 
-define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x float> %0, <vscale x 1 x half> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv1f32_nxv1f32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -825,12 +827,12 @@ entry:
     <vscale x 1 x float> %0,
     <vscale x 1 x half> %1,
     <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
-define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x float> %0, <vscale x 2 x half> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv2f32_nxv2f32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -842,12 +844,12 @@ entry:
     <vscale x 2 x float> %0,
     <vscale x 2 x half> %1,
     <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
-define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x float> %0, <vscale x 4 x half> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv4f32_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -859,12 +861,12 @@ entry:
     <vscale x 4 x float> %0,
     <vscale x 4 x half> %1,
     <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
-define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x float> %0, <vscale x 8 x half> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv8f32_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -876,12 +878,12 @@ entry:
     <vscale x 8 x float> %0,
     <vscale x 8 x half> %1,
     <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
 
-define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16(<vscale x 16 x float> %0, <vscale x 16 x half> %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv16f32_nxv16f32_nxv16f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -893,12 +895,12 @@ entry:
     <vscale x 16 x float> %0,
     <vscale x 16 x half> %1,
     <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
 
-define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x double> %0, <vscale x 1 x float> %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv1f64_nxv1f64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -910,12 +912,12 @@ entry:
     <vscale x 1 x double> %0,
     <vscale x 1 x float> %1,
     <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
 
-define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x double> %0, <vscale x 2 x float> %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv2f64_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -927,12 +929,12 @@ entry:
     <vscale x 2 x double> %0,
     <vscale x 2 x float> %1,
     <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
 
-define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x double> %0, <vscale x 4 x float> %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv4f64_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -944,12 +946,12 @@ entry:
     <vscale x 4 x double> %0,
     <vscale x 4 x float> %1,
     <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
 
-define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x double> %0, <vscale x 8 x float> %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_tie_nxv8f64_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -961,12 +963,12 @@ entry:
     <vscale x 8 x double> %0,
     <vscale x 8 x float> %1,
     <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
 
-define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16(<vscale x 1 x float> %0, half %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f32_nxv1f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -978,12 +980,12 @@ entry:
     <vscale x 1 x float> %0,
     half %1,
     <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x float> %a
 }
 
-define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16(<vscale x 2 x float> %0, half %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f32_nxv2f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -995,12 +997,12 @@ entry:
     <vscale x 2 x float> %0,
     half %1,
     <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x float> %a
 }
 
-define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16(<vscale x 4 x float> %0, half %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f32_nxv4f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -1012,12 +1014,12 @@ entry:
     <vscale x 4 x float> %0,
     half %1,
     <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x float> %a
 }
 
-define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16(<vscale x 8 x float> %0, half %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f32_nxv8f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -1029,12 +1031,12 @@ entry:
     <vscale x 8 x float> %0,
     half %1,
     <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x float> %a
 }
 
-define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x i1> %2, i64 %3) nounwind {
+define <vscale x 16 x float> @intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16(<vscale x 16 x float> %0, half %1, <vscale x 16 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv16f32_nxv16f32_f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, mu
@@ -1046,12 +1048,12 @@ entry:
     <vscale x 16 x float> %0,
     half %1,
     <vscale x 16 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 16 x float> %a
 }
 
-define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32(<vscale x 1 x double> %0, float %1, <vscale x 1 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv1f64_nxv1f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -1063,12 +1065,12 @@ entry:
     <vscale x 1 x double> %0,
     float %1,
     <vscale x 1 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 1 x double> %a
 }
 
-define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x i1> %2, i64 %3) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32(<vscale x 2 x double> %0, float %1, <vscale x 2 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv2f64_nxv2f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1080,12 +1082,12 @@ entry:
     <vscale x 2 x double> %0,
     float %1,
     <vscale x 2 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 2 x double> %a
 }
 
-define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x i1> %2, i64 %3) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32(<vscale x 4 x double> %0, float %1, <vscale x 4 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv4f64_nxv4f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1097,12 +1099,12 @@ entry:
     <vscale x 4 x double> %0,
     float %1,
     <vscale x 4 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 4 x double> %a
 }
 
-define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x i1> %2, i64 %3) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32(<vscale x 8 x double> %0, float %1, <vscale x 8 x i1> %2, iXLen %3) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wf_tie_nxv8f64_nxv8f64_f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1114,12 +1116,12 @@ entry:
     <vscale x 8 x double> %0,
     float %1,
     <vscale x 8 x i1> %2,
-    i64 %3, i64 1)
+    iXLen %3, iXLen 1)
 
   ret <vscale x 8 x double> %a
 }
 
-define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
@@ -1130,12 +1132,12 @@ entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
     <vscale x 1 x float> %1,
     <vscale x 1 x half> %0,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x float> %a
 }
 
-define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
+define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
@@ -1146,12 +1148,12 @@ entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
     <vscale x 2 x float> %1,
     <vscale x 2 x half> %0,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x float> %a
 }
 
-define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
+define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
@@ -1162,12 +1164,12 @@ entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
     <vscale x 4 x float> %1,
     <vscale x 4 x half> %0,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x float> %a
 }
 
-define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
+define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
@@ -1178,12 +1180,12 @@ entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
     <vscale x 8 x float> %1,
     <vscale x 8 x half> %0,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x float> %a
 }
 
-define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
+define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
@@ -1194,12 +1196,12 @@ entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
     <vscale x 1 x double> %1,
     <vscale x 1 x float> %0,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 1 x double> %a
 }
 
-define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
+define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
@@ -1210,12 +1212,12 @@ entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
     <vscale x 2 x double> %1,
     <vscale x 2 x float> %0,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 2 x double> %a
 }
 
-define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
+define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
@@ -1226,12 +1228,12 @@ entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
     <vscale x 4 x double> %1,
     <vscale x 4 x float> %0,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 4 x double> %a
 }
 
-define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
+define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, iXLen %2) nounwind {
 ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
@@ -1242,7 +1244,7 @@ entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
     <vscale x 8 x double> %1,
     <vscale x 8 x float> %0,
-    i64 %2)
+    iXLen %2)
 
   ret <vscale x 8 x double> %a
 }

From f69379d0a43bbe14e58e45286de3ae1cf8a58147 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 18:03:12 +0000
Subject: [PATCH 295/946] [InstCombine] Add test coverage for PR48683

D108992 added self-multiply handling to KnownBits::mul but we don't use it yet..
---
 .../Transforms/InstCombine/mul-masked-bits.ll | 47 +++++++++++++++++--
 1 file changed, 44 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll
index fcff725cdf6f1..4886cd581a284 100644
--- a/llvm/test/Transforms/InstCombine/mul-masked-bits.ll
+++ b/llvm/test/Transforms/InstCombine/mul-masked-bits.ll
@@ -1,10 +1,10 @@
-; NOTE: Assertions have been autogenerated by update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @foo(i32 %x, i32 %y) {
 ; CHECK-LABEL: @foo(
-; CHECK-NEXT:    [[A:%.*]] = and i32 %x, 7
-; CHECK-NEXT:    [[B:%.*]] = and i32 %y, 7
+; CHECK-NEXT:    [[A:%.*]] = and i32 [[X:%.*]], 7
+; CHECK-NEXT:    [[B:%.*]] = and i32 [[Y:%.*]], 7
 ; CHECK-NEXT:    [[C:%.*]] = mul nuw nsw i32 [[A]], [[B]]
 ; CHECK-NEXT:    [[D:%.*]] = shl nuw i32 [[C]], 26
 ; CHECK-NEXT:    [[E:%.*]] = ashr exact i32 [[D]], 26
@@ -17,3 +17,44 @@ define i32 @foo(i32 %x, i32 %y) {
   %e = ashr i32 %d, 26
   ret i32 %e
 }
+
+; PR48683 'Quadratic Reciprocity' - and(mul(x,x),2) -> 0
+
+define i1 @PR48683(i32 %x) {
+; CHECK-LABEL: @PR48683(
+; CHECK-NEXT:    [[A:%.*]] = mul i32 [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[B:%.*]] = and i32 [[A]], 2
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[B]], 0
+; CHECK-NEXT:    ret i1 [[C]]
+;
+  %a = mul i32 %x, %x
+  %b = and i32 %a, 2
+  %c = icmp ne i32 %b, 0
+  ret i1 %c
+}
+
+define <4 x i1> @PR48683_vec(<4 x i32> %x) {
+; CHECK-LABEL: @PR48683_vec(
+; CHECK-NEXT:    [[A:%.*]] = mul <4 x i32> [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[B:%.*]] = and <4 x i32> [[A]], <i32 2, i32 2, i32 2, i32 2>
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <4 x i32> [[B]], zeroinitializer
+; CHECK-NEXT:    ret <4 x i1> [[C]]
+;
+  %a = mul <4 x i32> %x, %x
+  %b = and <4 x i32> %a, <i32 2, i32 2, i32 2, i32 2>
+  %c = icmp ne <4 x i32> %b, zeroinitializer
+  ret <4 x i1> %c
+}
+
+define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) {
+; CHECK-LABEL: @PR48683_vec_undef(
+; CHECK-NEXT:    [[A:%.*]] = mul <4 x i32> [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[B:%.*]] = and <4 x i32> [[A]], <i32 2, i32 2, i32 2, i32 undef>
+; CHECK-NEXT:    [[C:%.*]] = icmp ne <4 x i32> [[B]], zeroinitializer
+; CHECK-NEXT:    ret <4 x i1> [[C]]
+;
+  %a = mul <4 x i32> %x, %x
+  %b = and <4 x i32> %a, <i32 2, i32 2, i32 2, i32 undef>
+  %c = icmp ne <4 x i32> %b, zeroinitializer
+  ret <4 x i1> %c
+}

From 818cfb10c57487cd9c8b57d8136d9e42b31a50aa Mon Sep 17 00:00:00 2001
From: Casey Carter <Casey@Carter.net>
Date: Wed, 29 Dec 2021 16:02:00 -0800
Subject: [PATCH 296/946] [libcxx][test] Make MSVC `<charconv>` test compile
 when testing MSVC

<meme>How many layers of irony are you on?</meme>

Differential Revision: https://reviews.llvm.org/D117967
---
 .../test/std/utilities/charconv/charconv.msvc/test.pass.cpp  | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/libcxx/test/std/utilities/charconv/charconv.msvc/test.pass.cpp b/libcxx/test/std/utilities/charconv/charconv.msvc/test.pass.cpp
index 66854205937a2..1c4cdb9558b38 100644
--- a/libcxx/test/std/utilities/charconv/charconv.msvc/test.pass.cpp
+++ b/libcxx/test/std/utilities/charconv/charconv.msvc/test.pass.cpp
@@ -31,6 +31,10 @@
 #  define sprintf_s snprintf
 #endif
 
+#ifdef _MSVC_STL_VERSION
+#include <xutility>
+using std::_Bit_cast;
+#else
 // FUNCTION TEMPLATE _Bit_cast
 template <class _To, class _From,
           std::enable_if_t<sizeof(_To) == sizeof(_From) && std::is_trivially_copyable_v<_To> &&
@@ -39,6 +43,7 @@ template <class _To, class _From,
 [[nodiscard]] constexpr _To _Bit_cast(const _From& _From_obj) noexcept {
   return __builtin_bit_cast(_To, _From_obj);
 }
+#endif
 
 // Includes Microsoft's test that tests the entire header.
 

From 8e382ae91b97161930a128e56774d6e1242b6514 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sun, 23 Jan 2022 10:35:44 -0800
Subject: [PATCH 297/946] [Support] Simplify parallelForEach{,N}

* Merge parallel_for_each into parallelForEach (this removes 1 `Fn(...)` call)
* Change parallelForEach to use parallelForEachN
* Move parallelForEachN into Parallel.cpp

My x86-64 `lld` executable is 100KiB smaller.
No noticeable difference in performance.

Reviewed By: lattner

Differential Revision: https://reviews.llvm.org/D117510
---
 llvm/include/llvm/Support/Parallel.h | 80 ++--------------------------
 llvm/lib/Support/Parallel.cpp        | 32 +++++++++++
 2 files changed, 35 insertions(+), 77 deletions(-)

diff --git a/llvm/include/llvm/Support/Parallel.h b/llvm/include/llvm/Support/Parallel.h
index 5c3b26d5754c2..04caf5eac961d 100644
--- a/llvm/include/llvm/Support/Parallel.h
+++ b/llvm/include/llvm/Support/Parallel.h
@@ -130,64 +130,6 @@ void parallel_sort(RandomAccessIterator Start, RandomAccessIterator End,
 // improving to take the number of available cores into account.)
 enum { MaxTasksPerGroup = 1024 };
 
-template <class IterTy, class FuncTy>
-void parallel_for_each(IterTy Begin, IterTy End, FuncTy Fn) {
-  // If we have zero or one items, then do not incur the overhead of spinning up
-  // a task group.  They are surprisingly expensive, and because they do not
-  // support nested parallelism, a single entry task group can block parallel
-  // execution underneath them.
-  auto NumItems = std::distance(Begin, End);
-  if (NumItems <= 1) {
-    if (NumItems)
-      Fn(*Begin);
-    return;
-  }
-
-  // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
-  // overhead on large inputs.
-  ptrdiff_t TaskSize = NumItems / MaxTasksPerGroup;
-  if (TaskSize == 0)
-    TaskSize = 1;
-
-  TaskGroup TG;
-  while (TaskSize < std::distance(Begin, End)) {
-    TG.spawn([=, &Fn] { std::for_each(Begin, Begin + TaskSize, Fn); });
-    Begin += TaskSize;
-  }
-  std::for_each(Begin, End, Fn);
-}
-
-template <class IndexTy, class FuncTy>
-void parallel_for_each_n(IndexTy Begin, IndexTy End, FuncTy Fn) {
-  // If we have zero or one items, then do not incur the overhead of spinning up
-  // a task group.  They are surprisingly expensive, and because they do not
-  // support nested parallelism, a single entry task group can block parallel
-  // execution underneath them.
-  auto NumItems = End - Begin;
-  if (NumItems <= 1) {
-    if (NumItems)
-      Fn(Begin);
-    return;
-  }
-
-  // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
-  // overhead on large inputs.
-  ptrdiff_t TaskSize = NumItems / MaxTasksPerGroup;
-  if (TaskSize == 0)
-    TaskSize = 1;
-
-  TaskGroup TG;
-  IndexTy I = Begin;
-  for (; I + TaskSize < End; I += TaskSize) {
-    TG.spawn([=, &Fn] {
-      for (IndexTy J = I, E = I + TaskSize; J != E; ++J)
-        Fn(J);
-    });
-  }
-  for (IndexTy J = I; J < End; ++J)
-    Fn(J);
-}
-
 template <class IterTy, class ResultTy, class ReduceFuncTy,
           class TransformFuncTy>
 ResultTy parallel_transform_reduce(IterTy Begin, IterTy End, ResultTy Init,
@@ -251,27 +193,11 @@ void parallelSort(RandomAccessIterator Start, RandomAccessIterator End,
   llvm::sort(Start, End, Comp);
 }
 
+void parallelForEachN(size_t Begin, size_t End, function_ref<void(size_t)> Fn);
+
 template <class IterTy, class FuncTy>
 void parallelForEach(IterTy Begin, IterTy End, FuncTy Fn) {
-#if LLVM_ENABLE_THREADS
-  if (parallel::strategy.ThreadsRequested != 1) {
-    parallel::detail::parallel_for_each(Begin, End, Fn);
-    return;
-  }
-#endif
-  std::for_each(Begin, End, Fn);
-}
-
-template <class FuncTy>
-void parallelForEachN(size_t Begin, size_t End, FuncTy Fn) {
-#if LLVM_ENABLE_THREADS
-  if (parallel::strategy.ThreadsRequested != 1) {
-    parallel::detail::parallel_for_each_n(Begin, End, Fn);
-    return;
-  }
-#endif
-  for (size_t I = Begin; I != End; ++I)
-    Fn(I);
+  parallelForEachN(0, End - Begin, [&](size_t I) { Fn(Begin[I]); });
 }
 
 template <class IterTy, class ResultTy, class ReduceFuncTy,
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index 71e3a1362f7eb..4977c188f934f 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -174,3 +174,35 @@ void TaskGroup::spawn(std::function<void()> F) {
 } // namespace parallel
 } // namespace llvm
 #endif // LLVM_ENABLE_THREADS
+
+void llvm::parallelForEachN(size_t Begin, size_t End,
+                            llvm::function_ref<void(size_t)> Fn) {
+  // If we have zero or one items, then do not incur the overhead of spinning up
+  // a task group.  They are surprisingly expensive, and because they do not
+  // support nested parallelism, a single entry task group can block parallel
+  // execution underneath them.
+#if LLVM_ENABLE_THREADS
+  auto NumItems = End - Begin;
+  if (NumItems > 1 && parallel::strategy.ThreadsRequested != 1) {
+    // Limit the number of tasks to MaxTasksPerGroup to limit job scheduling
+    // overhead on large inputs.
+    auto TaskSize = NumItems / parallel::detail::MaxTasksPerGroup;
+    if (TaskSize == 0)
+      TaskSize = 1;
+
+    parallel::detail::TaskGroup TG;
+    for (; Begin + TaskSize < End; Begin += TaskSize) {
+      TG.spawn([=, &Fn] {
+        for (size_t I = Begin, E = Begin + TaskSize; I != E; ++I)
+          Fn(I);
+      });
+    }
+    for (; Begin != End; ++Begin)
+      Fn(Begin);
+    return;
+  }
+#endif
+
+  for (; Begin != End; ++Begin)
+    Fn(Begin);
+}

From 1a5dea9e2b97a74a277e82bfe010d521f1690eea Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes@tecnico.ulisboa.pt>
Date: Sun, 23 Jan 2022 19:06:21 +0000
Subject: [PATCH 298/946] [NewGVN][NFC] precommit tests for PR53277

---
 .../Transforms/NewGVN/phi-of-ops-loads.ll     | 148 ++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100644 llvm/test/Transforms/NewGVN/phi-of-ops-loads.ll

diff --git a/llvm/test/Transforms/NewGVN/phi-of-ops-loads.ll b/llvm/test/Transforms/NewGVN/phi-of-ops-loads.ll
new file mode 100644
index 0000000000000..63a16d60dfc78
--- /dev/null
+++ b/llvm/test/Transforms/NewGVN/phi-of-ops-loads.ll
@@ -0,0 +1,148 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=newgvn -enable-phi-of-ops=true -S -o - %s | FileCheck %s
+
+define void @store-in-loop(i8* %p, i8* %q) {
+; CHECK-LABEL: @store-in-loop(
+; CHECK-NEXT:  bb56:
+; CHECK-NEXT:    br label [[BB57:%.*]]
+; CHECK:       bb57:
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB56:%.*]] ], [ [[N62:%.*]], [[BB229:%.*]] ]
+; CHECK-NEXT:    [[N59:%.*]] = phi i1 [ false, [[BB229]] ], [ true, [[BB56]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ 0, [[BB56]] ], [ [[INC:%.*]], [[BB229]] ]
+; CHECK-NEXT:    [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[N62]] = icmp ne i8 [[N60]], 2
+; CHECK-NEXT:    br i1 [[PHIOFOPS]], label [[BB229]], label [[BB237:%.*]]
+; CHECK:       bb229:
+; CHECK-NEXT:    [[INC]] = add i8 [[IDX]], 1
+; CHECK-NEXT:    store i8 [[INC]], i8* [[Q:%.*]], align 1
+; CHECK-NEXT:    br label [[BB57]]
+; CHECK:       bb237:
+; CHECK-NEXT:    ret void
+;
+bb56:
+  br label %bb57
+
+bb57:
+  %n59 = phi i1 [ false, %bb229 ], [ true, %bb56 ]
+  %idx = phi i8 [0, %bb56], [%inc, %bb229]
+  %n60 = load i8, i8* %p
+  %n62 = icmp ne i8 %n60, 2
+  %n63 = or i1 %n59, %n62
+  br i1 %n63, label %bb229, label %bb237
+
+bb229:
+  %inc = add i8 %idx, 1
+  store i8 %inc, i8* %q
+  br label %bb57
+
+bb237:
+  ret void
+}
+
+define void @no-alias-store-in-loop(i8* noalias %p, i8* noalias %q) {
+; CHECK-LABEL: @no-alias-store-in-loop(
+; CHECK-NEXT:  bb56:
+; CHECK-NEXT:    br label [[BB57:%.*]]
+; CHECK:       bb57:
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB56:%.*]] ], [ [[N62:%.*]], [[BB229:%.*]] ]
+; CHECK-NEXT:    [[N59:%.*]] = phi i1 [ false, [[BB229]] ], [ true, [[BB56]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ 0, [[BB56]] ], [ [[INC:%.*]], [[BB229]] ]
+; CHECK-NEXT:    [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[N62]] = icmp ne i8 [[N60]], 2
+; CHECK-NEXT:    br i1 [[PHIOFOPS]], label [[BB229]], label [[BB237:%.*]]
+; CHECK:       bb229:
+; CHECK-NEXT:    [[INC]] = add i8 [[IDX]], 1
+; CHECK-NEXT:    store i8 [[INC]], i8* [[Q:%.*]], align 1
+; CHECK-NEXT:    br label [[BB57]]
+; CHECK:       bb237:
+; CHECK-NEXT:    ret void
+;
+bb56:
+  br label %bb57
+
+bb57:
+  %n59 = phi i1 [ false, %bb229 ], [ true, %bb56 ]
+  %idx = phi i8 [0, %bb56], [%inc, %bb229]
+  %n60 = load i8, i8* %p
+  %n62 = icmp ne i8 %n60, 2
+  %n63 = or i1 %n59, %n62
+  br i1 %n63, label %bb229, label %bb237
+
+bb229:
+  %inc = add i8 %idx, 1
+  store i8 %inc, i8* %q
+  br label %bb57
+
+bb237:
+  ret void
+}
+
+define void @function-in-loop(i8* %p) {
+; CHECK-LABEL: @function-in-loop(
+; CHECK-NEXT:  bb56:
+; CHECK-NEXT:    br label [[BB57:%.*]]
+; CHECK:       bb57:
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB56:%.*]] ], [ [[N62:%.*]], [[BB229:%.*]] ]
+; CHECK-NEXT:    [[N59:%.*]] = phi i1 [ false, [[BB229]] ], [ true, [[BB56]] ]
+; CHECK-NEXT:    [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[N62]] = icmp ne i8 [[N60]], 2
+; CHECK-NEXT:    br i1 [[PHIOFOPS]], label [[BB229]], label [[BB237:%.*]]
+; CHECK:       bb229:
+; CHECK-NEXT:    call void @f()
+; CHECK-NEXT:    br label [[BB57]]
+; CHECK:       bb237:
+; CHECK-NEXT:    ret void
+;
+bb56:
+  br label %bb57
+
+bb57:
+  %n59 = phi i1 [ false, %bb229 ], [ true, %bb56 ]
+  %n60 = load i8, i8* %p
+  %n62 = icmp ne i8 %n60, 2
+  %n63 = or i1 %n59, %n62
+  br i1 %n63, label %bb229, label %bb237
+
+bb229:
+  call void @f()
+  br label %bb57
+
+bb237:
+  ret void
+}
+
+define void @nowrite-function-in-loop(i8* %p) {
+; CHECK-LABEL: @nowrite-function-in-loop(
+; CHECK-NEXT:  bb56:
+; CHECK-NEXT:    br label [[BB57:%.*]]
+; CHECK:       bb57:
+; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB56:%.*]] ], [ [[N62:%.*]], [[BB229:%.*]] ]
+; CHECK-NEXT:    [[N59:%.*]] = phi i1 [ false, [[BB229]] ], [ true, [[BB56]] ]
+; CHECK-NEXT:    [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[N62]] = icmp ne i8 [[N60]], 2
+; CHECK-NEXT:    br i1 [[PHIOFOPS]], label [[BB229]], label [[BB237:%.*]]
+; CHECK:       bb229:
+; CHECK-NEXT:    call void @f() #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    br label [[BB57]]
+; CHECK:       bb237:
+; CHECK-NEXT:    ret void
+;
+bb56:
+  br label %bb57
+
+bb57:
+  %n59 = phi i1 [ false, %bb229 ], [ true, %bb56 ]
+  %n60 = load i8, i8* %p
+  %n62 = icmp ne i8 %n60, 2
+  %n63 = or i1 %n59, %n62
+  br i1 %n63, label %bb229, label %bb237
+
+bb229:
+  call void @f() inaccessiblememonly
+  br label %bb57
+
+bb237:
+  ret void
+}
+
+declare void @f()

From 7a29b0b58383e8ceb751144fe638c46cacc6fe40 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 23 Jan 2022 11:07:12 -0800
Subject: [PATCH 299/946] [llvm] Fix header guards (NFC)

Identified with llvm-header-guard.
---
 llvm/include/llvm/Analysis/NoInferenceModelRunner.h | 2 +-
 llvm/include/llvm/Analysis/ReleaseModeModelRunner.h | 6 ++++++
 llvm/include/llvm/Demangle/ItaniumDemangle.h        | 6 +++---
 llvm/include/llvm/Demangle/StringView.h             | 4 ++--
 llvm/include/llvm/Demangle/Utility.h                | 4 ++--
 llvm/include/llvm/ProfileData/MemProfData.inc       | 4 ++--
 6 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/Analysis/NoInferenceModelRunner.h b/llvm/include/llvm/Analysis/NoInferenceModelRunner.h
index 1e8cb26cc3c14..5bcedf98865ca 100644
--- a/llvm/include/llvm/Analysis/NoInferenceModelRunner.h
+++ b/llvm/include/llvm/Analysis/NoInferenceModelRunner.h
@@ -40,4 +40,4 @@ class NoInferenceModelRunner : public MLModelRunner {
 };
 } // namespace llvm
 #endif // defined(LLVM_HAVE_TF_API)
-#endif // defined(LLVM_ANALYSIS_NOINFERENCEMODELRUNNER_H)
+#endif // LLVM_ANALYSIS_NOINFERENCEMODELRUNNER_H
diff --git a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
index bb19d2c7d9260..1bf2e853980c7 100644
--- a/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
+++ b/llvm/include/llvm/Analysis/ReleaseModeModelRunner.h
@@ -10,6 +10,10 @@
 // Only inference is supported.
 //
 //===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H
+#define LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H
+
 #include "llvm/Analysis/MLModelRunner.h"
 
 #include <memory>
@@ -70,3 +74,5 @@ class ReleaseModeModelRunner final : public MLModelRunner {
   std::unique_ptr<TGen> CompiledModel;
 };
 } // namespace llvm
+
+#endif // LLVM_ANALYSIS_RELEASEMODEMODELRUNNER_H
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 01f414a7257bf..4df092c70ee1f 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef DEMANGLE_ITANIUMDEMANGLE_H
-#define DEMANGLE_ITANIUMDEMANGLE_H
+#ifndef LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+#define LLVM_DEMANGLE_ITANIUMDEMANGLE_H
 
 // FIXME: (possibly) incomplete list of features that clang mangles that this
 // file does not yet support:
@@ -5749,4 +5749,4 @@ struct ManglingParser : AbstractManglingParser<ManglingParser<Alloc>, Alloc> {
 
 DEMANGLE_NAMESPACE_END
 
-#endif // DEMANGLE_ITANIUMDEMANGLE_H
+#endif // LLVM_DEMANGLE_ITANIUMDEMANGLE_H
diff --git a/llvm/include/llvm/Demangle/StringView.h b/llvm/include/llvm/Demangle/StringView.h
index 7c8cb482ae1c1..6b5d01c09fe5f 100644
--- a/llvm/include/llvm/Demangle/StringView.h
+++ b/llvm/include/llvm/Demangle/StringView.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef DEMANGLE_STRINGVIEW_H
-#define DEMANGLE_STRINGVIEW_H
+#ifndef LLVM_DEMANGLE_STRINGVIEW_H
+#define LLVM_DEMANGLE_STRINGVIEW_H
 
 #include "DemangleConfig.h"
 #include <cassert>
diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index 587c0e4bec36d..dcd12b0daa88b 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -10,8 +10,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef DEMANGLE_UTILITY_H
-#define DEMANGLE_UTILITY_H
+#ifndef LLVM_DEMANGLE_UTILITY_H
+#define LLVM_DEMANGLE_UTILITY_H
 
 #include "StringView.h"
 #include <array>
diff --git a/llvm/include/llvm/ProfileData/MemProfData.inc b/llvm/include/llvm/ProfileData/MemProfData.inc
index d64227e4ba31d..f2cb3738f0531 100644
--- a/llvm/include/llvm/ProfileData/MemProfData.inc
+++ b/llvm/include/llvm/ProfileData/MemProfData.inc
@@ -1,5 +1,5 @@
-#ifndef MEMPROF_DATA_INC
-#define MEMPROF_DATA_INC
+#ifndef LLVM_PROFILEDATA_MEMPROFDATA_INC
+#define LLVM_PROFILEDATA_MEMPROFDATA_INC
 /*===-- MemProfData.inc - MemProf profiling runtime structures -*- C++ -*-=== *\
 |*
 |* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

From abb0ed44957cb4ba1bc94d19202860f10369cea1 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 23 Jan 2022 11:07:14 -0800
Subject: [PATCH 300/946] [Commands] Remove redundant member initialization
 (NFC)

Identified with readability-redundant-member-init.
---
 lldb/source/Commands/CommandCompletions.cpp   |  2 +-
 .../Commands/CommandObjectBreakpoint.cpp      | 56 +++++++------------
 .../CommandObjectBreakpointCommand.cpp        |  9 ++-
 .../source/Commands/CommandObjectCommands.cpp | 29 ++++------
 .../Commands/CommandObjectDisassemble.cpp     |  3 +-
 .../Commands/CommandObjectExpression.cpp      |  6 +-
 lldb/source/Commands/CommandObjectFrame.cpp   | 18 +++---
 lldb/source/Commands/CommandObjectHelp.cpp    |  3 +-
 lldb/source/Commands/CommandObjectHelp.h      |  2 +-
 lldb/source/Commands/CommandObjectLog.cpp     |  5 +-
 lldb/source/Commands/CommandObjectMemory.cpp  | 25 ++++-----
 .../Commands/CommandObjectMemoryTag.cpp       |  5 +-
 .../source/Commands/CommandObjectPlatform.cpp | 36 +++++-------
 lldb/source/Commands/CommandObjectProcess.cpp | 45 ++++++---------
 .../Commands/CommandObjectRegexCommand.cpp    |  2 +-
 .../source/Commands/CommandObjectRegister.cpp |  6 +-
 .../Commands/CommandObjectReproducer.cpp      |  6 +-
 lldb/source/Commands/CommandObjectScript.h    |  2 +-
 lldb/source/Commands/CommandObjectSession.cpp |  6 +-
 .../source/Commands/CommandObjectSettings.cpp | 15 ++---
 lldb/source/Commands/CommandObjectSource.cpp  | 12 ++--
 lldb/source/Commands/CommandObjectStats.cpp   |  2 +-
 lldb/source/Commands/CommandObjectTarget.cpp  | 50 +++++++----------
 lldb/source/Commands/CommandObjectThread.cpp  | 43 ++++++--------
 lldb/source/Commands/CommandObjectTrace.cpp   | 15 ++---
 lldb/source/Commands/CommandObjectType.cpp    | 44 +++++++--------
 .../Commands/CommandObjectWatchpoint.cpp      | 26 ++++-----
 .../CommandObjectWatchpointCommand.cpp        |  5 +-
 .../Commands/CommandOptionsProcessLaunch.h    |  2 +-
 29 files changed, 193 insertions(+), 287 deletions(-)

diff --git a/lldb/source/Commands/CommandCompletions.cpp b/lldb/source/Commands/CommandCompletions.cpp
index ff825cce813ec..ae1ee1fdd30b8 100644
--- a/lldb/source/Commands/CommandCompletions.cpp
+++ b/lldb/source/Commands/CommandCompletions.cpp
@@ -129,7 +129,7 @@ class SourceFileCompleter : public Completer {
 public:
   SourceFileCompleter(CommandInterpreter &interpreter,
                       CompletionRequest &request)
-      : Completer(interpreter, request), m_matching_files() {
+      : Completer(interpreter, request) {
     FileSpec partial_spec(m_request.GetCursorArgumentPrefix());
     m_file_name = partial_spec.GetFilename().GetCString();
     m_dir_name = partial_spec.GetDirectory().GetCString();
diff --git a/lldb/source/Commands/CommandObjectBreakpoint.cpp b/lldb/source/Commands/CommandObjectBreakpoint.cpp
index 3f88a2fa63780..c4e55fdb3b9c0 100644
--- a/lldb/source/Commands/CommandObjectBreakpoint.cpp
+++ b/lldb/source/Commands/CommandObjectBreakpoint.cpp
@@ -49,7 +49,7 @@ static void AddBreakpointDescription(Stream *s, Breakpoint *bp,
 
 class lldb_private::BreakpointOptionGroup : public OptionGroup {
 public:
-  BreakpointOptionGroup() : OptionGroup(), m_bp_opts(false) {}
+  BreakpointOptionGroup() : m_bp_opts(false) {}
 
   ~BreakpointOptionGroup() override = default;
 
@@ -179,7 +179,7 @@ class lldb_private::BreakpointOptionGroup : public OptionGroup {
 
 class BreakpointDummyOptionGroup : public OptionGroup {
 public:
-  BreakpointDummyOptionGroup() : OptionGroup() {}
+  BreakpointDummyOptionGroup() {}
 
   ~BreakpointDummyOptionGroup() override = default;
 
@@ -234,8 +234,7 @@ class CommandObjectBreakpointSet : public CommandObjectParsed {
             interpreter, "breakpoint set",
             "Sets a breakpoint or set of breakpoints in the executable.",
             "breakpoint set <cmd-options>"),
-        m_bp_opts(), m_python_class_options("scripted breakpoint", true, 'P'),
-        m_options() {
+        m_python_class_options("scripted breakpoint", true, 'P') {
     // We're picking up all the normal options, commands and disable.
     m_all_options.Append(&m_python_class_options,
                          LLDB_OPT_SET_1 | LLDB_OPT_SET_2, LLDB_OPT_SET_11);
@@ -253,9 +252,7 @@ class CommandObjectBreakpointSet : public CommandObjectParsed {
 
   class CommandOptions : public OptionGroup {
   public:
-    CommandOptions()
-        : OptionGroup(), m_condition(), m_filenames(), m_func_names(),
-          m_func_regexp(), m_source_text_regexp(), m_modules() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -809,8 +806,7 @@ class CommandObjectBreakpointModify : public CommandObjectParsed {
                             "created breakpoint.  "
                             "With the exception of -e, -d and -i, passing an "
                             "empty argument clears the modification.",
-                            nullptr),
-        m_options() {
+                            nullptr) {
     CommandArgumentEntry arg;
     CommandObject::AddIDsArgumentData(arg, eArgTypeBreakpointID,
                                       eArgTypeBreakpointIDRange);
@@ -1100,8 +1096,7 @@ class CommandObjectBreakpointList : public CommandObjectParsed {
       : CommandObjectParsed(
             interpreter, "breakpoint list",
             "List some or all breakpoints at configurable levels of detail.",
-            nullptr),
-        m_options() {
+            nullptr) {
     CommandArgumentEntry arg;
     CommandArgumentData bp_id_arg;
 
@@ -1123,7 +1118,7 @@ class CommandObjectBreakpointList : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -1246,8 +1241,7 @@ class CommandObjectBreakpointClear : public CommandObjectParsed {
       : CommandObjectParsed(interpreter, "breakpoint clear",
                             "Delete or disable breakpoints matching the "
                             "specified source file and line.",
-                            "breakpoint clear <cmd-options>"),
-        m_options() {}
+                            "breakpoint clear <cmd-options>") {}
 
   ~CommandObjectBreakpointClear() override = default;
 
@@ -1255,7 +1249,7 @@ class CommandObjectBreakpointClear : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options(), m_filename() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -1384,8 +1378,7 @@ class CommandObjectBreakpointDelete : public CommandObjectParsed {
       : CommandObjectParsed(interpreter, "breakpoint delete",
                             "Delete the specified breakpoint(s).  If no "
                             "breakpoints are specified, delete them all.",
-                            nullptr),
-        m_options() {
+                            nullptr) {
     CommandArgumentEntry arg;
     CommandObject::AddIDsArgumentData(arg, eArgTypeBreakpointID,
                                       eArgTypeBreakpointIDRange);
@@ -1408,7 +1401,7 @@ class CommandObjectBreakpointDelete : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -1565,8 +1558,7 @@ class CommandObjectBreakpointDelete : public CommandObjectParsed {
 class BreakpointNameOptionGroup : public OptionGroup {
 public:
   BreakpointNameOptionGroup()
-      : OptionGroup(), m_breakpoint(LLDB_INVALID_BREAK_ID), m_use_dummy(false) {
-  }
+      : m_breakpoint(LLDB_INVALID_BREAK_ID), m_use_dummy(false) {}
 
   ~BreakpointNameOptionGroup() override = default;
 
@@ -1626,7 +1618,7 @@ class BreakpointNameOptionGroup : public OptionGroup {
 
 class BreakpointAccessOptionGroup : public OptionGroup {
 public:
-  BreakpointAccessOptionGroup() : OptionGroup() {}
+  BreakpointAccessOptionGroup() {}
 
   ~BreakpointAccessOptionGroup() override = default;
 
@@ -1696,8 +1688,7 @@ class CommandObjectBreakpointNameConfigure : public CommandObjectParsed {
             "the breakpoint, otherwise only the options specified will be set "
             "on the name.",
             "breakpoint name configure <command-options> "
-            "<breakpoint-name-list>"),
-        m_bp_opts(), m_option_group() {
+            "<breakpoint-name-list>") {
     // Create the first variant for the first (and only) argument for this
     // command.
     CommandArgumentEntry arg1;
@@ -1787,8 +1778,7 @@ class CommandObjectBreakpointNameAdd : public CommandObjectParsed {
   CommandObjectBreakpointNameAdd(CommandInterpreter &interpreter)
       : CommandObjectParsed(
             interpreter, "add", "Add a name to the breakpoints provided.",
-            "breakpoint name add <command-options> <breakpoint-id-list>"),
-        m_name_options(), m_option_group() {
+            "breakpoint name add <command-options> <breakpoint-id-list>") {
     // Create the first variant for the first (and only) argument for this
     // command.
     CommandArgumentEntry arg1;
@@ -1872,8 +1862,7 @@ class CommandObjectBreakpointNameDelete : public CommandObjectParsed {
       : CommandObjectParsed(
             interpreter, "delete",
             "Delete a name from the breakpoints provided.",
-            "breakpoint name delete <command-options> <breakpoint-id-list>"),
-        m_name_options(), m_option_group() {
+            "breakpoint name delete <command-options> <breakpoint-id-list>") {
     // Create the first variant for the first (and only) argument for this
     // command.
     CommandArgumentEntry arg1;
@@ -1956,8 +1945,7 @@ class CommandObjectBreakpointNameList : public CommandObjectParsed {
                             "List either the names for a breakpoint or info "
                             "about a given name.  With no arguments, lists all "
                             "names",
-                            "breakpoint name list <command-options>"),
-        m_name_options(), m_option_group() {
+                            "breakpoint name list <command-options>") {
     m_option_group.Append(&m_name_options, LLDB_OPT_SET_3, LLDB_OPT_SET_ALL);
     m_option_group.Finalize();
   }
@@ -2063,8 +2051,7 @@ class CommandObjectBreakpointRead : public CommandObjectParsed {
       : CommandObjectParsed(interpreter, "breakpoint read",
                             "Read and set the breakpoints previously saved to "
                             "a file with \"breakpoint write\".  ",
-                            nullptr),
-        m_options() {}
+                            nullptr) {}
 
   ~CommandObjectBreakpointRead() override = default;
 
@@ -2072,7 +2059,7 @@ class CommandObjectBreakpointRead : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -2245,8 +2232,7 @@ class CommandObjectBreakpointWrite : public CommandObjectParsed {
                             "Write the breakpoints listed to a file that can "
                             "be read in with \"breakpoint read\".  "
                             "If given no arguments, writes all breakpoints.",
-                            nullptr),
-        m_options() {
+                            nullptr) {
     CommandArgumentEntry arg;
     CommandObject::AddIDsArgumentData(arg, eArgTypeBreakpointID,
                                       eArgTypeBreakpointIDRange);
@@ -2269,7 +2255,7 @@ class CommandObjectBreakpointWrite : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
diff --git a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp
index 26d35c82f57d0..637e8b8bd5783 100644
--- a/lldb/source/Commands/CommandObjectBreakpointCommand.cpp
+++ b/lldb/source/Commands/CommandObjectBreakpointCommand.cpp
@@ -69,7 +69,7 @@ class CommandObjectBreakpointCommandAdd : public CommandObjectParsed,
                             nullptr),
         IOHandlerDelegateMultiline("DONE",
                                    IOHandlerDelegate::Completion::LLDBCommand),
-        m_options(), m_func_options("breakpoint command", false, 'F') {
+        m_func_options("breakpoint command", false, 'F') {
     SetHelpLong(
         R"(
 General information about entering breakpoint commands
@@ -281,7 +281,7 @@ are no syntax errors may indicate that a function was declared but never called.
 
   class CommandOptions : public OptionGroup {
   public:
-    CommandOptions() : OptionGroup(), m_one_liner() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -479,8 +479,7 @@ class CommandObjectBreakpointCommandDelete : public CommandObjectParsed {
   CommandObjectBreakpointCommandDelete(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "delete",
                             "Delete the set of commands from a breakpoint.",
-                            nullptr),
-        m_options() {
+                            nullptr) {
     CommandArgumentEntry arg;
     CommandArgumentData bp_id_arg;
 
@@ -502,7 +501,7 @@ class CommandObjectBreakpointCommandDelete : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp
index defa21af7c170..4b4932dd367ba 100644
--- a/lldb/source/Commands/CommandObjectCommands.cpp
+++ b/lldb/source/Commands/CommandObjectCommands.cpp
@@ -38,8 +38,7 @@ class CommandObjectCommandsSource : public CommandObjectParsed {
       : CommandObjectParsed(
             interpreter, "command source",
             "Read and execute LLDB commands from the file <filename>.",
-            nullptr),
-        m_options() {
+            nullptr) {
     CommandArgumentEntry arg;
     CommandArgumentData file_arg;
 
@@ -76,8 +75,8 @@ class CommandObjectCommandsSource : public CommandObjectParsed {
   class CommandOptions : public Options {
   public:
     CommandOptions()
-        : Options(), m_stop_on_error(true), m_silent_run(false),
-          m_stop_on_continue(true), m_cmd_relative_to_command_file(false) {}
+        : m_stop_on_error(true), m_silent_run(false), m_stop_on_continue(true),
+          m_cmd_relative_to_command_file(false) {}
 
     ~CommandOptions() override = default;
 
@@ -207,7 +206,7 @@ class CommandObjectCommandsAlias : public CommandObjectRaw {
 protected:
   class CommandOptions : public OptionGroup {
   public:
-    CommandOptions() : OptionGroup(), m_help(), m_long_help() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -258,8 +257,7 @@ class CommandObjectCommandsAlias : public CommandObjectRaw {
   CommandObjectCommandsAlias(CommandInterpreter &interpreter)
       : CommandObjectRaw(
             interpreter, "command alias",
-            "Define a custom command in terms of an existing command."),
-        m_option_group(), m_command_options() {
+            "Define a custom command in terms of an existing command.") {
     m_option_group.Append(&m_command_options);
     m_option_group.Finalize();
 
@@ -793,8 +791,7 @@ class CommandObjectCommandsAddRegex : public CommandObjectParsed,
             "regular expressions.",
             "command regex <cmd-name> [s/<regex>/<subst>/ ...]"),
         IOHandlerDelegateMultiline("",
-                                   IOHandlerDelegate::Completion::LLDBCommand),
-        m_options() {
+                                   IOHandlerDelegate::Completion::LLDBCommand) {
     SetHelpLong(
         R"(
 )"
@@ -1025,7 +1022,7 @@ a number follows 'f':"
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -1239,8 +1236,7 @@ class CommandObjectCommandsScriptImport : public CommandObjectParsed {
 public:
   CommandObjectCommandsScriptImport(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "command script import",
-                            "Import a scripting module in LLDB.", nullptr),
-        m_options() {
+                            "Import a scripting module in LLDB.", nullptr) {
     CommandArgumentEntry arg1;
     CommandArgumentData cmd_arg;
 
@@ -1271,7 +1267,7 @@ class CommandObjectCommandsScriptImport : public CommandObjectParsed {
 protected:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -1395,7 +1391,7 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed,
                             "must be a path to a user-added container "
                             "command, and the last element will be the new "
                             "command name."),
-        IOHandlerDelegateMultiline("DONE"), m_options() {
+        IOHandlerDelegateMultiline("DONE") {
     CommandArgumentEntry arg1;
     CommandArgumentData cmd_arg;
 
@@ -1426,8 +1422,7 @@ class CommandObjectCommandsScriptAdd : public CommandObjectParsed,
 protected:
   class CommandOptions : public Options {
   public:
-    CommandOptions()
-        : Options(), m_class_name(), m_funct_name(), m_short_help() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -1884,7 +1879,7 @@ class CommandObjectCommandsContainerAdd : public CommandObjectParsed {
 protected:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options(), m_short_help(), m_long_help() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
diff --git a/lldb/source/Commands/CommandObjectDisassemble.cpp b/lldb/source/Commands/CommandObjectDisassemble.cpp
index 02a16622c76b9..e3c40ed73cf63 100644
--- a/lldb/source/Commands/CommandObjectDisassemble.cpp
+++ b/lldb/source/Commands/CommandObjectDisassemble.cpp
@@ -30,8 +30,7 @@ using namespace lldb_private;
 #define LLDB_OPTIONS_disassemble
 #include "CommandOptions.inc"
 
-CommandObjectDisassemble::CommandOptions::CommandOptions()
-    : Options(), func_name(), plugin_name(), flavor_string(), arch() {
+CommandObjectDisassemble::CommandOptions::CommandOptions() {
   OptionParsingStarting(nullptr);
 }
 
diff --git a/lldb/source/Commands/CommandObjectExpression.cpp b/lldb/source/Commands/CommandObjectExpression.cpp
index 9d13ccab6d3ec..e1a289b219c3c 100644
--- a/lldb/source/Commands/CommandObjectExpression.cpp
+++ b/lldb/source/Commands/CommandObjectExpression.cpp
@@ -24,7 +24,7 @@
 using namespace lldb;
 using namespace lldb_private;
 
-CommandObjectExpression::CommandOptions::CommandOptions() : OptionGroup() {}
+CommandObjectExpression::CommandOptions::CommandOptions() {}
 
 CommandObjectExpression::CommandOptions::~CommandOptions() = default;
 
@@ -200,10 +200,10 @@ CommandObjectExpression::CommandObjectExpression(
                        "",
                        eCommandProcessMustBePaused | eCommandTryTargetAPILock),
       IOHandlerDelegate(IOHandlerDelegate::Completion::Expression),
-      m_option_group(), m_format_options(eFormatDefault),
+      m_format_options(eFormatDefault),
       m_repl_option(LLDB_OPT_SET_1, false, "repl", 'r', "Drop into REPL", false,
                     true),
-      m_command_options(), m_expr_line_count(0), m_expr_lines() {
+      m_command_options(), m_expr_line_count(0) {
   SetHelpLong(
       R"(
 Single and multi-line expressions:
diff --git a/lldb/source/Commands/CommandObjectFrame.cpp b/lldb/source/Commands/CommandObjectFrame.cpp
index 8dd1a79d38959..70881f2d00612 100644
--- a/lldb/source/Commands/CommandObjectFrame.cpp
+++ b/lldb/source/Commands/CommandObjectFrame.cpp
@@ -49,7 +49,7 @@ class CommandObjectFrameDiagnose : public CommandObjectParsed {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     ~CommandOptions() override = default;
 
@@ -110,8 +110,7 @@ class CommandObjectFrameDiagnose : public CommandObjectParsed {
                             nullptr,
                             eCommandRequiresThread | eCommandTryTargetAPILock |
                                 eCommandProcessMustBeLaunched |
-                                eCommandProcessMustBePaused),
-        m_options() {
+                                eCommandProcessMustBePaused) {
     CommandArgumentEntry arg;
     CommandArgumentData index_arg;
 
@@ -222,7 +221,7 @@ class CommandObjectFrameSelect : public CommandObjectParsed {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     ~CommandOptions() override = default;
 
@@ -267,8 +266,7 @@ class CommandObjectFrameSelect : public CommandObjectParsed {
                             nullptr,
                             eCommandRequiresThread | eCommandTryTargetAPILock |
                                 eCommandProcessMustBeLaunched |
-                                eCommandProcessMustBePaused),
-        m_options() {
+                                eCommandProcessMustBePaused) {
     CommandArgumentEntry arg;
     CommandArgumentData index_arg;
 
@@ -399,10 +397,9 @@ class CommandObjectFrameVariable : public CommandObjectParsed {
             eCommandRequiresFrame | eCommandTryTargetAPILock |
                 eCommandProcessMustBeLaunched | eCommandProcessMustBePaused |
                 eCommandRequiresProcess),
-        m_option_group(),
         m_option_variable(
             true), // Include the frame specific options by passing "true"
-        m_option_format(eFormatDefault), m_varobj_options() {
+        m_option_format(eFormatDefault) {
     SetHelpLong(R"(
 Children of aggregate variables can be specified such as 'var->child.x'.  In
 'frame variable', the operators -> and [] do not invoke operator overloads if
@@ -729,7 +726,7 @@ class CommandObjectFrameRecognizerAdd : public CommandObjectParsed {
 private:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
     ~CommandOptions() override = default;
 
     Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg,
@@ -798,8 +795,7 @@ class CommandObjectFrameRecognizerAdd : public CommandObjectParsed {
 public:
   CommandObjectFrameRecognizerAdd(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "frame recognizer add",
-                            "Add a new frame recognizer.", nullptr),
-        m_options() {
+                            "Add a new frame recognizer.", nullptr) {
     SetHelpLong(R"(
 Frame recognizers allow for retrieving information about special frames based on
 ABI, arguments or other special properties of that frame, even without source
diff --git a/lldb/source/Commands/CommandObjectHelp.cpp b/lldb/source/Commands/CommandObjectHelp.cpp
index 8c24efaa08ee3..a2f682049ae03 100644
--- a/lldb/source/Commands/CommandObjectHelp.cpp
+++ b/lldb/source/Commands/CommandObjectHelp.cpp
@@ -46,8 +46,7 @@ CommandObjectHelp::CommandObjectHelp(CommandInterpreter &interpreter)
                           "Show a list of all debugger "
                           "commands, or give details "
                           "about a specific command.",
-                          "help [<cmd-name>]"),
-      m_options() {
+                          "help [<cmd-name>]") {
   CommandArgumentEntry arg;
   CommandArgumentData command_arg;
 
diff --git a/lldb/source/Commands/CommandObjectHelp.h b/lldb/source/Commands/CommandObjectHelp.h
index c924dda7c6d41..71799ebb31217 100644
--- a/lldb/source/Commands/CommandObjectHelp.h
+++ b/lldb/source/Commands/CommandObjectHelp.h
@@ -32,7 +32,7 @@ class CommandObjectHelp : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
diff --git a/lldb/source/Commands/CommandObjectLog.cpp b/lldb/source/Commands/CommandObjectLog.cpp
index 05ffba27e65fd..d432ab244805f 100644
--- a/lldb/source/Commands/CommandObjectLog.cpp
+++ b/lldb/source/Commands/CommandObjectLog.cpp
@@ -45,8 +45,7 @@ class CommandObjectLogEnable : public CommandObjectParsed {
   CommandObjectLogEnable(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "log enable",
                             "Enable logging for a single log channel.",
-                            nullptr),
-        m_options() {
+                            nullptr) {
     CommandArgumentEntry arg1;
     CommandArgumentEntry arg2;
     CommandArgumentData channel_arg;
@@ -76,7 +75,7 @@ class CommandObjectLogEnable : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options(), log_file() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp
index e59cd8028998a..0b5f39bc7a8f4 100644
--- a/lldb/source/Commands/CommandObjectMemory.cpp
+++ b/lldb/source/Commands/CommandObjectMemory.cpp
@@ -48,7 +48,7 @@ using namespace lldb_private;
 class OptionGroupReadMemory : public OptionGroup {
 public:
   OptionGroupReadMemory()
-      : m_num_per_line(1, 1), m_view_as_type(), m_offset(0, 0),
+      : m_num_per_line(1, 1), m_offset(0, 0),
         m_language_for_type(eLanguageTypeUnknown) {}
 
   ~OptionGroupReadMemory() override = default;
@@ -287,12 +287,10 @@ class CommandObjectMemoryRead : public CommandObjectParsed {
             interpreter, "memory read",
             "Read from the memory of the current target process.", nullptr,
             eCommandRequiresTarget | eCommandProcessMustBePaused),
-        m_option_group(), m_format_options(eFormatBytesWithASCII, 1, 8),
-        m_memory_options(), m_outfile_options(), m_varobj_options(),
+        m_format_options(eFormatBytesWithASCII, 1, 8),
+
         m_next_addr(LLDB_INVALID_ADDRESS), m_prev_byte_size(0),
-        m_prev_format_options(eFormatBytesWithASCII, 1, 8),
-        m_prev_memory_options(), m_prev_outfile_options(),
-        m_prev_varobj_options() {
+        m_prev_format_options(eFormatBytesWithASCII, 1, 8) {
     CommandArgumentEntry arg1;
     CommandArgumentEntry arg2;
     CommandArgumentData start_addr_arg;
@@ -890,7 +888,7 @@ class CommandObjectMemoryFind : public CommandObjectParsed {
 public:
   class OptionGroupFindMemory : public OptionGroup {
   public:
-    OptionGroupFindMemory() : OptionGroup(), m_count(1), m_offset(0) {}
+    OptionGroupFindMemory() : m_count(1), m_offset(0) {}
 
     ~OptionGroupFindMemory() override = default;
 
@@ -944,8 +942,7 @@ class CommandObjectMemoryFind : public CommandObjectParsed {
       : CommandObjectParsed(
             interpreter, "memory find",
             "Find a value in the memory of the current target process.",
-            nullptr, eCommandRequiresProcess | eCommandProcessMustBeLaunched),
-        m_option_group(), m_memory_options() {
+            nullptr, eCommandRequiresProcess | eCommandProcessMustBeLaunched) {
     CommandArgumentEntry arg1;
     CommandArgumentEntry arg2;
     CommandArgumentData addr_arg;
@@ -1178,7 +1175,7 @@ class CommandObjectMemoryWrite : public CommandObjectParsed {
 public:
   class OptionGroupWriteMemory : public OptionGroup {
   public:
-    OptionGroupWriteMemory() : OptionGroup() {}
+    OptionGroupWriteMemory() {}
 
     ~OptionGroupWriteMemory() override = default;
 
@@ -1230,16 +1227,14 @@ class CommandObjectMemoryWrite : public CommandObjectParsed {
             interpreter, "memory write",
             "Write to the memory of the current target process.", nullptr,
             eCommandRequiresProcess | eCommandProcessMustBeLaunched),
-        m_option_group(),
         m_format_options(
             eFormatBytes, 1, UINT64_MAX,
             {std::make_tuple(
                  eArgTypeFormat,
                  "The format to use for each of the value to be written."),
-             std::make_tuple(
-                 eArgTypeByteSize,
-                 "The size in bytes to write from input file or each value.")}),
-        m_memory_options() {
+             std::make_tuple(eArgTypeByteSize,
+                             "The size in bytes to write from input file or "
+                             "each value.")}) {
     CommandArgumentEntry arg1;
     CommandArgumentEntry arg2;
     CommandArgumentData addr_arg;
diff --git a/lldb/source/Commands/CommandObjectMemoryTag.cpp b/lldb/source/Commands/CommandObjectMemoryTag.cpp
index 666d5c21b8714..d108cf58b18c0 100644
--- a/lldb/source/Commands/CommandObjectMemoryTag.cpp
+++ b/lldb/source/Commands/CommandObjectMemoryTag.cpp
@@ -138,7 +138,7 @@ class CommandObjectMemoryTagWrite : public CommandObjectParsed {
 public:
   class OptionGroupTagWrite : public OptionGroup {
   public:
-    OptionGroupTagWrite() : OptionGroup(), m_end_addr(LLDB_INVALID_ADDRESS) {}
+    OptionGroupTagWrite() : m_end_addr(LLDB_INVALID_ADDRESS) {}
 
     ~OptionGroupTagWrite() override = default;
 
@@ -177,8 +177,7 @@ class CommandObjectMemoryTagWrite : public CommandObjectParsed {
                             "contains the given address.",
                             nullptr,
                             eCommandRequiresTarget | eCommandRequiresProcess |
-                                eCommandProcessMustBePaused),
-        m_option_group(), m_tag_write_options() {
+                                eCommandProcessMustBePaused) {
     // Address
     m_arguments.push_back(
         CommandArgumentEntry{CommandArgumentData(eArgTypeAddressOrExpression)});
diff --git a/lldb/source/Commands/CommandObjectPlatform.cpp b/lldb/source/Commands/CommandObjectPlatform.cpp
index 10dd878249116..4c18465c868a0 100644
--- a/lldb/source/Commands/CommandObjectPlatform.cpp
+++ b/lldb/source/Commands/CommandObjectPlatform.cpp
@@ -145,7 +145,6 @@ class CommandObjectPlatformSelect : public CommandObjectParsed {
                             "Create a platform if needed and select it as the "
                             "current platform.",
                             "platform select <platform-name>", 0),
-        m_option_group(),
         m_platform_options(
             false) // Don't include the "--platform" option by passing false
   {
@@ -377,7 +376,6 @@ class CommandObjectPlatformSettings : public CommandObjectParsed {
                             "Set settings for the current target's platform, "
                             "or for a platform by name.",
                             "platform settings", 0),
-        m_options(),
         m_option_working_dir(LLDB_OPT_SET_1, false, "working-dir", 'w',
                              CommandCompletions::eRemoteDiskDirectoryCompletion,
                              eArgTypePath,
@@ -417,8 +415,7 @@ class CommandObjectPlatformMkDir : public CommandObjectParsed {
   CommandObjectPlatformMkDir(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "platform mkdir",
                             "Make a new directory on the remote end.", nullptr,
-                            0),
-        m_options() {}
+                            0) {}
 
   ~CommandObjectPlatformMkDir() override = default;
 
@@ -464,8 +461,7 @@ class CommandObjectPlatformFOpen : public CommandObjectParsed {
 public:
   CommandObjectPlatformFOpen(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "platform file open",
-                            "Open a file on the remote end.", nullptr, 0),
-        m_options() {}
+                            "Open a file on the remote end.", nullptr, 0) {}
 
   ~CommandObjectPlatformFOpen() override = default;
 
@@ -566,8 +562,7 @@ class CommandObjectPlatformFRead : public CommandObjectParsed {
   CommandObjectPlatformFRead(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "platform file read",
                             "Read data from a file on the remote end.", nullptr,
-                            0),
-        m_options() {}
+                            0) {}
 
   ~CommandObjectPlatformFRead() override = default;
 
@@ -605,7 +600,7 @@ class CommandObjectPlatformFRead : public CommandObjectParsed {
 protected:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -660,8 +655,7 @@ class CommandObjectPlatformFWrite : public CommandObjectParsed {
   CommandObjectPlatformFWrite(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "platform file write",
                             "Write data to a file on the remote end.", nullptr,
-                            0),
-        m_options() {}
+                            0) {}
 
   ~CommandObjectPlatformFWrite() override = default;
 
@@ -698,7 +692,7 @@ class CommandObjectPlatformFWrite : public CommandObjectParsed {
 protected:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -1124,8 +1118,7 @@ class CommandObjectPlatformProcessLaunch : public CommandObjectParsed {
       : CommandObjectParsed(interpreter, "platform process launch",
                             "Launch a new process on a remote platform.",
                             "platform process launch program",
-                            eCommandRequiresTarget | eCommandTryTargetAPILock),
-        m_options(), m_all_options() {
+                            eCommandRequiresTarget | eCommandTryTargetAPILock) {
     m_all_options.Append(&m_options);
     m_all_options.Finalize();
   }
@@ -1217,8 +1210,7 @@ class CommandObjectPlatformProcessList : public CommandObjectParsed {
       : CommandObjectParsed(interpreter, "platform process list",
                             "List processes on a remote platform by name, pid, "
                             "or many other matching attributes.",
-                            "platform process list", 0),
-        m_options() {}
+                            "platform process list", 0) {}
 
   ~CommandObjectPlatformProcessList() override = default;
 
@@ -1324,7 +1316,7 @@ class CommandObjectPlatformProcessList : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options(), match_info() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -1560,7 +1552,7 @@ class CommandObjectPlatformProcessAttach : public CommandObjectParsed {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {
+    CommandOptions() {
       // Keep default values of all options in one place: OptionParsingStarting
       // ()
       OptionParsingStarting(nullptr);
@@ -1622,8 +1614,7 @@ class CommandObjectPlatformProcessAttach : public CommandObjectParsed {
   CommandObjectPlatformProcessAttach(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "platform process attach",
                             "Attach to a process.",
-                            "platform process attach <cmd-options>"),
-        m_options() {}
+                            "platform process attach <cmd-options>") {}
 
   ~CommandObjectPlatformProcessAttach() override = default;
 
@@ -1689,7 +1680,7 @@ class CommandObjectPlatformShell : public CommandObjectRaw {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -1747,8 +1738,7 @@ class CommandObjectPlatformShell : public CommandObjectRaw {
   CommandObjectPlatformShell(CommandInterpreter &interpreter)
       : CommandObjectRaw(interpreter, "platform shell",
                          "Run a shell command on the current platform.",
-                         "platform shell <shell-command>", 0),
-        m_options() {}
+                         "platform shell <shell-command>", 0) {}
 
   ~CommandObjectPlatformShell() override = default;
 
diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp
index 5fd1718e84840..c73f0df0aaf25 100644
--- a/lldb/source/Commands/CommandObjectProcess.cpp
+++ b/lldb/source/Commands/CommandObjectProcess.cpp
@@ -110,9 +110,8 @@ class CommandObjectProcessLaunch : public CommandObjectProcessLaunchOrAttach {
             interpreter, "process launch",
             "Launch the executable in the debugger.", nullptr,
             eCommandRequiresTarget, "restart"),
-        m_options(),
-        m_class_options("scripted process", true, 'C', 'k', 'v', 0),
-        m_all_options() {
+
+        m_class_options("scripted process", true, 'C', 'k', 'v', 0) {
     m_all_options.Append(&m_options);
     m_all_options.Append(&m_class_options, LLDB_OPT_SET_1 | LLDB_OPT_SET_2,
                          LLDB_OPT_SET_ALL);
@@ -300,7 +299,7 @@ class CommandObjectProcessAttach : public CommandObjectProcessLaunchOrAttach {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {
+    CommandOptions() {
       // Keep default values of all options in one place: OptionParsingStarting
       // ()
       OptionParsingStarting(nullptr);
@@ -364,8 +363,7 @@ class CommandObjectProcessAttach : public CommandObjectProcessLaunchOrAttach {
   CommandObjectProcessAttach(CommandInterpreter &interpreter)
       : CommandObjectProcessLaunchOrAttach(
             interpreter, "process attach", "Attach to a process.",
-            "process attach <cmd-options>", 0, "attach"),
-        m_options() {}
+            "process attach <cmd-options>", 0, "attach") {}
 
   ~CommandObjectProcessAttach() override = default;
 
@@ -502,15 +500,14 @@ class CommandObjectProcessContinue : public CommandObjectParsed {
             "Continue execution of all threads in the current process.",
             "process continue",
             eCommandRequiresProcess | eCommandTryTargetAPILock |
-                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused),
-        m_options() {}
+                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) {}
 
   ~CommandObjectProcessContinue() override = default;
 
 protected:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {
+    CommandOptions() {
       // Keep default values of all options in one place: OptionParsingStarting
       // ()
       OptionParsingStarting(nullptr);
@@ -651,7 +648,7 @@ class CommandObjectProcessDetach : public CommandObjectParsed {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     ~CommandOptions() override = default;
 
@@ -698,8 +695,7 @@ class CommandObjectProcessDetach : public CommandObjectParsed {
                             "Detach from the current target process.",
                             "process detach",
                             eCommandRequiresProcess | eCommandTryTargetAPILock |
-                                eCommandProcessMustBeLaunched),
-        m_options() {}
+                                eCommandProcessMustBeLaunched) {}
 
   ~CommandObjectProcessDetach() override = default;
 
@@ -741,7 +737,7 @@ class CommandObjectProcessConnect : public CommandObjectParsed {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {
+    CommandOptions() {
       // Keep default values of all options in one place: OptionParsingStarting
       // ()
       OptionParsingStarting(nullptr);
@@ -781,8 +777,7 @@ class CommandObjectProcessConnect : public CommandObjectParsed {
   CommandObjectProcessConnect(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "process connect",
                             "Connect to a remote debug service.",
-                            "process connect <remote-url>", 0),
-        m_options() {}
+                            "process connect <remote-url>", 0) {}
 
   ~CommandObjectProcessConnect() override = default;
 
@@ -863,7 +858,7 @@ class CommandObjectProcessLoad : public CommandObjectParsed {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {
+    CommandOptions() {
       // Keep default values of all options in one place: OptionParsingStarting
       // ()
       OptionParsingStarting(nullptr);
@@ -907,8 +902,7 @@ class CommandObjectProcessLoad : public CommandObjectParsed {
                             "process load <filename> [<filename> ...]",
                             eCommandRequiresProcess | eCommandTryTargetAPILock |
                                 eCommandProcessMustBeLaunched |
-                                eCommandProcessMustBePaused),
-        m_options() {}
+                                eCommandProcessMustBePaused) {}
 
   ~CommandObjectProcessLoad() override = default;
 
@@ -1220,8 +1214,7 @@ class CommandObjectProcessSaveCore : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions()
-        : Options(), m_requested_save_core_style(eSaveCoreUnspecified) {}
+    CommandOptions() : m_requested_save_core_style(eSaveCoreUnspecified) {}
 
     ~CommandOptions() override = default;
 
@@ -1316,8 +1309,7 @@ class CommandObjectProcessStatus : public CommandObjectParsed {
             interpreter, "process status",
             "Show status and stop location for the current target process.",
             "process status",
-            eCommandRequiresProcess | eCommandTryTargetAPILock),
-        m_options() {}
+            eCommandRequiresProcess | eCommandTryTargetAPILock) {}
 
   ~CommandObjectProcessStatus() override = default;
 
@@ -1325,7 +1317,7 @@ class CommandObjectProcessStatus : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -1430,7 +1422,7 @@ class CommandObjectProcessHandle : public CommandObjectParsed {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     ~CommandOptions() override = default;
 
@@ -1477,8 +1469,7 @@ class CommandObjectProcessHandle : public CommandObjectParsed {
                             "Manage LLDB handling of OS signals for the "
                             "current target process.  Defaults to showing "
                             "current policy.",
-                            nullptr, eCommandRequiresTarget),
-        m_options() {
+                            nullptr, eCommandRequiresTarget) {
     SetHelpLong("\nIf no signals are specified, update them all.  If no update "
                 "option is specified, list the current values.");
     CommandArgumentEntry arg;
@@ -1687,7 +1678,7 @@ class CommandObjectProcessTraceSave : public CommandObjectParsed {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg,
                           ExecutionContext *execution_context) override {
diff --git a/lldb/source/Commands/CommandObjectRegexCommand.cpp b/lldb/source/Commands/CommandObjectRegexCommand.cpp
index 46295421834a8..7ddc5c0c7e083 100644
--- a/lldb/source/Commands/CommandObjectRegexCommand.cpp
+++ b/lldb/source/Commands/CommandObjectRegexCommand.cpp
@@ -20,7 +20,7 @@ CommandObjectRegexCommand::CommandObjectRegexCommand(
     bool is_removable)
     : CommandObjectRaw(interpreter, name, help, syntax),
       m_max_matches(max_matches), m_completion_type_mask(completion_type_mask),
-      m_entries(), m_is_removable(is_removable) {}
+      m_is_removable(is_removable) {}
 
 // Destructor
 CommandObjectRegexCommand::~CommandObjectRegexCommand() = default;
diff --git a/lldb/source/Commands/CommandObjectRegister.cpp b/lldb/source/Commands/CommandObjectRegister.cpp
index 6fd71c90c327f..933c243dedd51 100644
--- a/lldb/source/Commands/CommandObjectRegister.cpp
+++ b/lldb/source/Commands/CommandObjectRegister.cpp
@@ -43,8 +43,7 @@ class CommandObjectRegisterRead : public CommandObjectParsed {
             nullptr,
             eCommandRequiresFrame | eCommandRequiresRegContext |
                 eCommandProcessMustBeLaunched | eCommandProcessMustBePaused),
-        m_option_group(), m_format_options(eFormatDefault),
-        m_command_options() {
+        m_format_options(eFormatDefault) {
     CommandArgumentEntry arg;
     CommandArgumentData register_arg;
 
@@ -232,8 +231,7 @@ class CommandObjectRegisterRead : public CommandObjectParsed {
   class CommandOptions : public OptionGroup {
   public:
     CommandOptions()
-        : OptionGroup(),
-          set_indexes(OptionValue::ConvertTypeToMask(OptionValue::eTypeUInt64)),
+        : set_indexes(OptionValue::ConvertTypeToMask(OptionValue::eTypeUInt64)),
           dump_all_sets(false, false), // Initial and default values are false
           alternate_name(false, false) {}
 
diff --git a/lldb/source/Commands/CommandObjectReproducer.cpp b/lldb/source/Commands/CommandObjectReproducer.cpp
index 4db3e070df3c1..7e0ea65e148ee 100644
--- a/lldb/source/Commands/CommandObjectReproducer.cpp
+++ b/lldb/source/Commands/CommandObjectReproducer.cpp
@@ -227,7 +227,7 @@ class CommandObjectReproducerXCrash : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -355,7 +355,7 @@ class CommandObjectReproducerDump : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options(), file() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -602,7 +602,7 @@ class CommandObjectReproducerVerify : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options(), file() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
diff --git a/lldb/source/Commands/CommandObjectScript.h b/lldb/source/Commands/CommandObjectScript.h
index b9fee7124818a..97fc05421bd06 100644
--- a/lldb/source/Commands/CommandObjectScript.h
+++ b/lldb/source/Commands/CommandObjectScript.h
@@ -21,7 +21,7 @@ class CommandObjectScript : public CommandObjectRaw {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
     ~CommandOptions() override = default;
     Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg,
                           ExecutionContext *execution_context) override;
diff --git a/lldb/source/Commands/CommandObjectSession.cpp b/lldb/source/Commands/CommandObjectSession.cpp
index c2cdfa29a3f64..c11839a48de0a 100644
--- a/lldb/source/Commands/CommandObjectSession.cpp
+++ b/lldb/source/Commands/CommandObjectSession.cpp
@@ -62,8 +62,7 @@ class CommandObjectSessionHistory : public CommandObjectParsed {
                             "using \"!<INDEX>\".   \"!-<OFFSET>\" will re-run "
                             "the command that is <OFFSET> commands from the end"
                             " of the list (counting the current command).",
-                            nullptr),
-        m_options() {}
+                            nullptr) {}
 
   ~CommandObjectSessionHistory() override = default;
 
@@ -73,8 +72,7 @@ class CommandObjectSessionHistory : public CommandObjectParsed {
   class CommandOptions : public Options {
   public:
     CommandOptions()
-        : Options(), m_start_idx(0), m_stop_idx(0), m_count(0), m_clear(false) {
-    }
+        : m_start_idx(0), m_stop_idx(0), m_count(0), m_clear(false) {}
 
     ~CommandOptions() override = default;
 
diff --git a/lldb/source/Commands/CommandObjectSettings.cpp b/lldb/source/Commands/CommandObjectSettings.cpp
index 13ff27c78deaa..391e728d9d8a4 100644
--- a/lldb/source/Commands/CommandObjectSettings.cpp
+++ b/lldb/source/Commands/CommandObjectSettings.cpp
@@ -27,8 +27,7 @@ class CommandObjectSettingsSet : public CommandObjectRaw {
 public:
   CommandObjectSettingsSet(CommandInterpreter &interpreter)
       : CommandObjectRaw(interpreter, "settings set",
-                         "Set the value of the specified debugger setting."),
-        m_options() {
+                         "Set the value of the specified debugger setting.") {
     CommandArgumentEntry arg1;
     CommandArgumentEntry arg2;
     CommandArgumentData var_name_arg;
@@ -87,7 +86,7 @@ insert-before or insert-after.");
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -304,8 +303,7 @@ class CommandObjectSettingsWrite : public CommandObjectParsed {
             "Write matching debugger settings and their "
             "current values to a file that can be read in with "
             "\"settings read\". Defaults to writing all settings.",
-            nullptr),
-        m_options() {
+            nullptr) {
     CommandArgumentEntry arg1;
     CommandArgumentData var_name_arg;
 
@@ -327,7 +325,7 @@ class CommandObjectSettingsWrite : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -417,8 +415,7 @@ class CommandObjectSettingsRead : public CommandObjectParsed {
       : CommandObjectParsed(
             interpreter, "settings read",
             "Read settings previously saved to a file with \"settings write\".",
-            nullptr),
-        m_options() {}
+            nullptr) {}
 
   ~CommandObjectSettingsRead() override = default;
 
@@ -426,7 +423,7 @@ class CommandObjectSettingsRead : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
diff --git a/lldb/source/Commands/CommandObjectSource.cpp b/lldb/source/Commands/CommandObjectSource.cpp
index fb33f41b8ef96..6c6706f4a98b0 100644
--- a/lldb/source/Commands/CommandObjectSource.cpp
+++ b/lldb/source/Commands/CommandObjectSource.cpp
@@ -36,7 +36,7 @@ using namespace lldb_private;
 class CommandObjectSourceInfo : public CommandObjectParsed {
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -118,8 +118,7 @@ class CommandObjectSourceInfo : public CommandObjectParsed {
             "Display source line information for the current target "
             "process.  Defaults to instruction pointer in current stack "
             "frame.",
-            nullptr, eCommandRequiresTarget),
-        m_options() {}
+            nullptr, eCommandRequiresTarget) {}
 
   ~CommandObjectSourceInfo() override = default;
 
@@ -624,7 +623,7 @@ class CommandObjectSourceInfo : public CommandObjectParsed {
 class CommandObjectSourceList : public CommandObjectParsed {
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -723,8 +722,7 @@ class CommandObjectSourceList : public CommandObjectParsed {
       : CommandObjectParsed(interpreter, "source list",
                             "Display source code for the current target "
                             "process as specified by options.",
-                            nullptr, eCommandRequiresTarget),
-        m_options() {}
+                            nullptr, eCommandRequiresTarget) {}
 
   ~CommandObjectSourceList() override = default;
 
@@ -757,7 +755,7 @@ class CommandObjectSourceList : public CommandObjectParsed {
     SourceInfo(ConstString name, const LineEntry &line_entry)
         : function(name), line_entry(line_entry) {}
 
-    SourceInfo() : function(), line_entry() {}
+    SourceInfo() {}
 
     bool IsValid() const { return (bool)function && line_entry.IsValid(); }
 
diff --git a/lldb/source/Commands/CommandObjectStats.cpp b/lldb/source/Commands/CommandObjectStats.cpp
index f32d559ca039f..63aa36b39f4d3 100644
--- a/lldb/source/Commands/CommandObjectStats.cpp
+++ b/lldb/source/Commands/CommandObjectStats.cpp
@@ -65,7 +65,7 @@ class CommandObjectStatsDisable : public CommandObjectParsed {
 class CommandObjectStatsDump : public CommandObjectParsed {
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg,
                           ExecutionContext *execution_context) override {
diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp
index bd19ac513d017..157065bde10e6 100644
--- a/lldb/source/Commands/CommandObjectTarget.cpp
+++ b/lldb/source/Commands/CommandObjectTarget.cpp
@@ -216,7 +216,6 @@ class CommandObjectTargetCreate : public CommandObjectParsed {
             interpreter, "target create",
             "Create a target using the argument as the main executable.",
             nullptr),
-        m_option_group(), m_arch_option(),
         m_platform_options(true), // Include the --platform option.
         m_core_file(LLDB_OPT_SET_1, false, "core", 'c', 0, eArgTypeFilename,
                     "Fullpath to a core file to use for this target."),
@@ -227,8 +226,7 @@ class CommandObjectTargetCreate : public CommandObjectParsed {
                       "are not in the executable."),
         m_remote_file(
             LLDB_OPT_SET_1, false, "remote-file", 'r', 0, eArgTypeFilename,
-            "Fullpath to the file on the remote host if debugging remotely."),
-        m_add_dependents() {
+            "Fullpath to the file on the remote host if debugging remotely.") {
     CommandArgumentEntry arg;
     CommandArgumentData file_arg;
 
@@ -534,8 +532,8 @@ class CommandObjectTargetDelete : public CommandObjectParsed {
       : CommandObjectParsed(interpreter, "target delete",
                             "Delete one or more targets by target index.",
                             nullptr),
-        m_option_group(), m_all_option(LLDB_OPT_SET_1, false, "all", 'a',
-                                       "Delete all targets.", false, true),
+        m_all_option(LLDB_OPT_SET_1, false, "all", 'a', "Delete all targets.",
+                     false, true),
         m_cleanup_option(
             LLDB_OPT_SET_1, false, "clean", 'c',
             "Perform extra cleanup to minimize memory consumption after "
@@ -678,7 +676,6 @@ class CommandObjectTargetVariable : public CommandObjectParsed {
                             "Read global variables for the current target, "
                             "before or while running a process.",
                             nullptr, eCommandRequiresTarget),
-        m_option_group(),
         m_option_variable(false), // Don't include frame options
         m_option_format(eFormatDefault),
         m_option_compile_units(LLDB_OPT_SET_1, false, "file", SHORT_OPTION_FILE,
@@ -691,8 +688,7 @@ class CommandObjectTargetVariable : public CommandObjectParsed {
             eArgTypeFilename,
             "A basename or fullpath to a shared library to use in the search "
             "for global "
-            "variables. This option can be specified multiple times."),
-        m_varobj_options() {
+            "variables. This option can be specified multiple times.") {
     CommandArgumentEntry arg;
     CommandArgumentData var_name_arg;
 
@@ -1928,8 +1924,7 @@ class CommandObjectTargetModulesDumpSymtab
       : CommandObjectTargetModulesModuleAutoComplete(
             interpreter, "target modules dump symtab",
             "Dump the symbol table from one or more target modules.", nullptr,
-            eCommandRequiresTarget),
-        m_options() {}
+            eCommandRequiresTarget) {}
 
   ~CommandObjectTargetModulesDumpSymtab() override = default;
 
@@ -1937,7 +1932,7 @@ class CommandObjectTargetModulesDumpSymtab
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -2354,7 +2349,7 @@ class CommandObjectTargetModulesDumpLineTable
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     Status SetOptionValue(uint32_t option_idx, llvm::StringRef option_arg,
                           ExecutionContext *execution_context) override {
@@ -2423,11 +2418,11 @@ class CommandObjectTargetModulesAdd : public CommandObjectParsed {
                             "Add a new module to the current target's modules.",
                             "target modules add [<module>]",
                             eCommandRequiresTarget),
-        m_option_group(), m_symbol_file(LLDB_OPT_SET_1, false, "symfile", 's',
-                                        0, eArgTypeFilename,
-                                        "Fullpath to a stand alone debug "
-                                        "symbols file for when debug symbols "
-                                        "are not in the executable.") {
+        m_symbol_file(LLDB_OPT_SET_1, false, "symfile", 's', 0,
+                      eArgTypeFilename,
+                      "Fullpath to a stand alone debug "
+                      "symbols file for when debug symbols "
+                      "are not in the executable.") {
     m_option_group.Append(&m_uuid_option_group, LLDB_OPT_SET_ALL,
                           LLDB_OPT_SET_1);
     m_option_group.Append(&m_symbol_file, LLDB_OPT_SET_ALL, LLDB_OPT_SET_1);
@@ -2575,7 +2570,6 @@ class CommandObjectTargetModulesLoad
             "target modules load [--file <module> --uuid <uuid>] <sect-name> "
             "<address> [<sect-name> <address> ....]",
             eCommandRequiresTarget),
-        m_option_group(),
         m_file_option(LLDB_OPT_SET_1, false, "file", 'f', 0, eArgTypeName,
                       "Fullpath or basename for module to load.", ""),
         m_load_option(LLDB_OPT_SET_1, false, "load", 'l',
@@ -2843,7 +2837,7 @@ class CommandObjectTargetModulesList : public CommandObjectParsed {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options(), m_format_array() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -2886,8 +2880,7 @@ class CommandObjectTargetModulesList : public CommandObjectParsed {
       : CommandObjectParsed(
             interpreter, "target modules list",
             "List current executable and dependent shared library images.",
-            "target modules list [<cmd-options>]"),
-        m_options() {}
+            "target modules list [<cmd-options>]") {}
 
   ~CommandObjectTargetModulesList() override = default;
 
@@ -3186,7 +3179,7 @@ class CommandObjectTargetModulesShowUnwind : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options(), m_str() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -3243,8 +3236,7 @@ class CommandObjectTargetModulesShowUnwind : public CommandObjectParsed {
             interpreter, "target modules show-unwind",
             "Show synthesized unwind instructions for a function.", nullptr,
             eCommandRequiresTarget | eCommandRequiresProcess |
-                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused),
-        m_options() {}
+                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) {}
 
   ~CommandObjectTargetModulesShowUnwind() override = default;
 
@@ -3533,7 +3525,7 @@ class CommandObjectTargetModulesLookup : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     ~CommandOptions() override = default;
 
@@ -3648,8 +3640,7 @@ class CommandObjectTargetModulesLookup : public CommandObjectParsed {
       : CommandObjectParsed(interpreter, "target modules lookup",
                             "Look up information within executable and "
                             "dependent shared library images.",
-                            nullptr, eCommandRequiresTarget),
-        m_options() {
+                            nullptr, eCommandRequiresTarget) {
     CommandArgumentEntry arg;
     CommandArgumentData file_arg;
 
@@ -3955,7 +3946,6 @@ class CommandObjectTargetSymbolsAdd : public CommandObjectParsed {
             "to specify a module.",
             "target symbols add <cmd-options> [<symfile>]",
             eCommandRequiresTarget),
-        m_option_group(),
         m_file_option(
             LLDB_OPT_SET_1, false, "shlib", 's',
             CommandCompletions::eModuleCompletion, eArgTypeShlibName,
@@ -4442,7 +4432,7 @@ class CommandObjectTargetStopHookAdd : public CommandObjectParsed,
 public:
   class CommandOptions : public OptionGroup {
   public:
-    CommandOptions() : OptionGroup(), m_line_end(UINT_MAX), m_one_liner() {}
+    CommandOptions() : m_line_end(UINT_MAX) {}
 
     ~CommandOptions() override = default;
 
@@ -4599,7 +4589,7 @@ class CommandObjectTargetStopHookAdd : public CommandObjectParsed,
                             "target stop-hook add"),
         IOHandlerDelegateMultiline("DONE",
                                    IOHandlerDelegate::Completion::LLDBCommand),
-        m_options(), m_python_class_options("scripted stop-hook", true, 'P') {
+        m_python_class_options("scripted stop-hook", true, 'P') {
     SetHelpLong(
         R"(
 Command Based stop-hooks:
diff --git a/lldb/source/Commands/CommandObjectThread.cpp b/lldb/source/Commands/CommandObjectThread.cpp
index 71e67f6ba2087..137aaa81c61a0 100644
--- a/lldb/source/Commands/CommandObjectThread.cpp
+++ b/lldb/source/Commands/CommandObjectThread.cpp
@@ -47,7 +47,7 @@ class CommandObjectThreadBacktrace : public CommandObjectIterateOverThreads {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {
+    CommandOptions() {
       // Keep default values of all options in one place: OptionParsingStarting
       // ()
       OptionParsingStarting(nullptr);
@@ -119,8 +119,7 @@ class CommandObjectThreadBacktrace : public CommandObjectIterateOverThreads {
             nullptr,
             eCommandRequiresProcess | eCommandRequiresThread |
                 eCommandTryTargetAPILock | eCommandProcessMustBeLaunched |
-                eCommandProcessMustBePaused),
-        m_options() {}
+                eCommandProcessMustBePaused) {}
 
   ~CommandObjectThreadBacktrace() override = default;
 
@@ -203,7 +202,7 @@ static constexpr OptionEnumValues TriRunningModes() {
 
 class ThreadStepScopeOptionGroup : public OptionGroup {
 public:
-  ThreadStepScopeOptionGroup() : OptionGroup() {
+  ThreadStepScopeOptionGroup() {
     // Keep default values of all options in one place: OptionParsingStarting
     // ()
     OptionParsingStarting(nullptr);
@@ -327,7 +326,7 @@ class CommandObjectThreadStepWithTypeAndScope : public CommandObjectParsed {
                                 eCommandTryTargetAPILock |
                                 eCommandProcessMustBeLaunched |
                                 eCommandProcessMustBePaused),
-        m_step_type(step_type), m_step_scope(step_scope), m_options(),
+        m_step_type(step_type), m_step_scope(step_scope),
         m_class_options("scripted step") {
     CommandArgumentEntry arg;
     CommandArgumentData thread_id_arg;
@@ -780,7 +779,7 @@ class CommandObjectThreadUntil : public CommandObjectParsed {
     uint32_t m_thread_idx = LLDB_INVALID_THREAD_ID;
     uint32_t m_frame_idx = LLDB_INVALID_FRAME_ID;
 
-    CommandOptions() : Options() {
+    CommandOptions() {
       // Keep default values of all options in one place: OptionParsingStarting
       // ()
       OptionParsingStarting(nullptr);
@@ -861,8 +860,7 @@ class CommandObjectThreadUntil : public CommandObjectParsed {
             " is provided, stepping will stop when the first one is hit.",
             nullptr,
             eCommandRequiresThread | eCommandTryTargetAPILock |
-                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused),
-        m_options() {
+                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) {
     CommandArgumentEntry arg;
     CommandArgumentData line_num_arg;
 
@@ -1186,7 +1184,7 @@ class CommandObjectThreadInfo : public CommandObjectIterateOverThreads {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     ~CommandOptions() override = default;
 
@@ -1231,8 +1229,7 @@ class CommandObjectThreadInfo : public CommandObjectIterateOverThreads {
             "current thread.",
             "thread info",
             eCommandRequiresProcess | eCommandTryTargetAPILock |
-                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused),
-        m_options() {
+                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) {
     m_add_return = false;
   }
 
@@ -1331,7 +1328,7 @@ class CommandObjectThreadReturn : public CommandObjectRaw {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {
+    CommandOptions() {
       // Keep default values of all options in one place: OptionParsingStarting
       // ()
       OptionParsingStarting(nullptr);
@@ -1386,8 +1383,7 @@ class CommandObjectThreadReturn : public CommandObjectRaw {
                          "thread return",
                          eCommandRequiresFrame | eCommandTryTargetAPILock |
                              eCommandProcessMustBeLaunched |
-                             eCommandProcessMustBePaused),
-        m_options() {
+                             eCommandProcessMustBePaused) {
     CommandArgumentEntry arg;
     CommandArgumentData expression_arg;
 
@@ -1496,7 +1492,7 @@ class CommandObjectThreadJump : public CommandObjectParsed {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     ~CommandOptions() override = default;
 
@@ -1556,8 +1552,7 @@ class CommandObjectThreadJump : public CommandObjectParsed {
             interpreter, "thread jump",
             "Sets the program counter to a new address.", "thread jump",
             eCommandRequiresFrame | eCommandTryTargetAPILock |
-                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused),
-        m_options() {}
+                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) {}
 
   ~CommandObjectThreadJump() override = default;
 
@@ -1633,7 +1628,7 @@ class CommandObjectThreadPlanList : public CommandObjectIterateOverThreads {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {
+    CommandOptions() {
       // Keep default values of all options in one place: OptionParsingStarting
       // ()
       OptionParsingStarting(nullptr);
@@ -1695,8 +1690,7 @@ class CommandObjectThreadPlanList : public CommandObjectIterateOverThreads {
             nullptr,
             eCommandRequiresProcess | eCommandRequiresThread |
                 eCommandTryTargetAPILock | eCommandProcessMustBeLaunched |
-                eCommandProcessMustBePaused),
-        m_options() {}
+                eCommandProcessMustBePaused) {}
 
   ~CommandObjectThreadPlanList() override = default;
 
@@ -2004,7 +1998,7 @@ class CommandObjectTraceDumpInstructions
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     ~CommandOptions() override = default;
 
@@ -2085,7 +2079,7 @@ class CommandObjectTraceDumpInstructions
             eCommandRequiresProcess | eCommandTryTargetAPILock |
                 eCommandProcessMustBeLaunched | eCommandProcessMustBePaused |
                 eCommandProcessMustBeTraced),
-        m_options(), m_create_repeat_command_just_invoked(false) {}
+        m_create_repeat_command_just_invoked(false) {}
 
   ~CommandObjectTraceDumpInstructions() override = default;
 
@@ -2165,7 +2159,7 @@ class CommandObjectTraceDumpInfo : public CommandObjectIterateOverThreads {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     ~CommandOptions() override = default;
 
@@ -2213,8 +2207,7 @@ class CommandObjectTraceDumpInfo : public CommandObjectIterateOverThreads {
             nullptr,
             eCommandRequiresProcess | eCommandTryTargetAPILock |
                 eCommandProcessMustBeLaunched | eCommandProcessMustBePaused |
-                eCommandProcessMustBeTraced),
-        m_options() {}
+                eCommandProcessMustBeTraced) {}
 
   ~CommandObjectTraceDumpInfo() override = default;
 
diff --git a/lldb/source/Commands/CommandObjectTrace.cpp b/lldb/source/Commands/CommandObjectTrace.cpp
index 62ee48ca05469..53f1b0a32e607 100644
--- a/lldb/source/Commands/CommandObjectTrace.cpp
+++ b/lldb/source/Commands/CommandObjectTrace.cpp
@@ -40,7 +40,7 @@ class CommandObjectTraceLoad : public CommandObjectParsed {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     ~CommandOptions() override = default;
 
@@ -74,8 +74,7 @@ class CommandObjectTraceLoad : public CommandObjectParsed {
   CommandObjectTraceLoad(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "trace load",
                             "Load a processor trace session from a JSON file.",
-                            "trace load"),
-        m_options() {}
+                            "trace load") {}
 
   ~CommandObjectTraceLoad() override = default;
 
@@ -139,7 +138,7 @@ class CommandObjectTraceDump : public CommandObjectParsed {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     ~CommandOptions() override = default;
 
@@ -173,8 +172,7 @@ class CommandObjectTraceDump : public CommandObjectParsed {
   CommandObjectTraceDump(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "trace dump",
                             "Dump the loaded processor trace data.",
-                            "trace dump"),
-        m_options() {}
+                            "trace dump") {}
 
   ~CommandObjectTraceDump() override = default;
 
@@ -205,7 +203,7 @@ class CommandObjectTraceSchema : public CommandObjectParsed {
 public:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() { OptionParsingStarting(nullptr); }
+    CommandOptions() { OptionParsingStarting(nullptr); }
 
     ~CommandOptions() override = default;
 
@@ -240,8 +238,7 @@ class CommandObjectTraceSchema : public CommandObjectParsed {
       : CommandObjectParsed(interpreter, "trace schema",
                             "Show the schema of the given trace plugin.",
                             "trace schema <plug-in>. Use the plug-in name "
-                            "\"all\" to see all schemas.\n"),
-        m_options() {}
+                            "\"all\" to see all schemas.\n") {}
 
   ~CommandObjectTraceSchema() override = default;
 
diff --git a/lldb/source/Commands/CommandObjectType.cpp b/lldb/source/Commands/CommandObjectType.cpp
index 0562b6be3cb5e..f9e1d0f91fb76 100644
--- a/lldb/source/Commands/CommandObjectType.cpp
+++ b/lldb/source/Commands/CommandObjectType.cpp
@@ -69,7 +69,7 @@ class SynthAddOptions {
 
   SynthAddOptions(bool sptr, bool sref, bool casc, bool regx, std::string catg)
       : m_skip_pointers(sptr), m_skip_references(sref), m_cascade(casc),
-        m_regex(regx), m_target_types(), m_category(catg) {}
+        m_regex(regx), m_category(catg) {}
 
   typedef std::shared_ptr<SynthAddOptions> SharedPointer;
 };
@@ -103,7 +103,7 @@ class CommandObjectTypeSummaryAdd : public CommandObjectParsed,
 private:
   class CommandOptions : public Options {
   public:
-    CommandOptions(CommandInterpreter &interpreter) : Options() {}
+    CommandOptions(CommandInterpreter &interpreter) {}
 
     ~CommandOptions() override = default;
 
@@ -286,7 +286,7 @@ class CommandObjectTypeSynthAdd : public CommandObjectParsed,
 private:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -498,7 +498,7 @@ class CommandObjectTypeFormatAdd : public CommandObjectParsed {
 private:
   class CommandOptions : public OptionGroup {
   public:
-    CommandOptions() : OptionGroup() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -571,8 +571,7 @@ class CommandObjectTypeFormatAdd : public CommandObjectParsed {
   CommandObjectTypeFormatAdd(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "type format add",
                             "Add a new formatting style for a type.", nullptr),
-        m_option_group(), m_format_options(eFormatInvalid),
-        m_command_options() {
+        m_format_options(eFormatInvalid) {
     CommandArgumentEntry type_arg;
     CommandArgumentData type_style_arg;
 
@@ -708,7 +707,7 @@ class CommandObjectTypeFormatterDelete : public CommandObjectParsed {
 protected:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -760,7 +759,7 @@ class CommandObjectTypeFormatterDelete : public CommandObjectParsed {
   CommandObjectTypeFormatterDelete(CommandInterpreter &interpreter,
                                    uint32_t formatter_kind_mask,
                                    const char *name, const char *help)
-      : CommandObjectParsed(interpreter, name, help, nullptr), m_options(),
+      : CommandObjectParsed(interpreter, name, help, nullptr),
         m_formatter_kind_mask(formatter_kind_mask) {
     CommandArgumentEntry type_arg;
     CommandArgumentData type_style_arg;
@@ -873,7 +872,7 @@ class CommandObjectTypeFormatterClear : public CommandObjectParsed {
 private:
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -914,7 +913,7 @@ class CommandObjectTypeFormatterClear : public CommandObjectParsed {
   CommandObjectTypeFormatterClear(CommandInterpreter &interpreter,
                                   uint32_t formatter_kind_mask,
                                   const char *name, const char *help)
-      : CommandObjectParsed(interpreter, name, help, nullptr), m_options(),
+      : CommandObjectParsed(interpreter, name, help, nullptr),
         m_formatter_kind_mask(formatter_kind_mask) {}
 
   ~CommandObjectTypeFormatterClear() override = default;
@@ -1713,7 +1712,7 @@ class CommandObjectTypeCategoryDefine : public CommandObjectParsed {
   class CommandOptions : public Options {
   public:
     CommandOptions()
-        : Options(), m_define_enabled(false, false),
+        : m_define_enabled(false, false),
           m_cate_language(eLanguageTypeUnknown, eLanguageTypeUnknown) {}
 
     ~CommandOptions() override = default;
@@ -1760,8 +1759,7 @@ class CommandObjectTypeCategoryDefine : public CommandObjectParsed {
   CommandObjectTypeCategoryDefine(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "type category define",
                             "Define a new category as a source of formatters.",
-                            nullptr),
-        m_options() {
+                            nullptr) {
     CommandArgumentEntry type_arg;
     CommandArgumentData type_style_arg;
 
@@ -1817,7 +1815,7 @@ class CommandObjectTypeCategoryDefine : public CommandObjectParsed {
 class CommandObjectTypeCategoryEnable : public CommandObjectParsed {
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -1863,8 +1861,7 @@ class CommandObjectTypeCategoryEnable : public CommandObjectParsed {
   CommandObjectTypeCategoryEnable(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "type category enable",
                             "Enable a category as a source of formatters.",
-                            nullptr),
-        m_options() {
+                            nullptr) {
     CommandArgumentEntry type_arg;
     CommandArgumentData type_style_arg;
 
@@ -1995,7 +1992,7 @@ class CommandObjectTypeCategoryDelete : public CommandObjectParsed {
 class CommandObjectTypeCategoryDisable : public CommandObjectParsed {
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -2041,8 +2038,7 @@ class CommandObjectTypeCategoryDisable : public CommandObjectParsed {
   CommandObjectTypeCategoryDisable(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "type category disable",
                             "Disable a category as a source of formatters.",
-                            nullptr),
-        m_options() {
+                            nullptr) {
     CommandArgumentEntry type_arg;
     CommandArgumentData type_style_arg;
 
@@ -2409,7 +2405,7 @@ class CommandObjectTypeFilterAdd : public CommandObjectParsed {
     typedef std::vector<std::string> option_vector;
 
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -2528,8 +2524,7 @@ class CommandObjectTypeFilterAdd : public CommandObjectParsed {
 public:
   CommandObjectTypeFilterAdd(CommandInterpreter &interpreter)
       : CommandObjectParsed(interpreter, "type filter add",
-                            "Add a new filter for a type.", nullptr),
-        m_options() {
+                            "Add a new filter for a type.", nullptr) {
     CommandArgumentEntry type_arg;
     CommandArgumentData type_style_arg;
 
@@ -2666,7 +2661,7 @@ class CommandObjectTypeLookup : public CommandObjectRaw {
 
   class CommandOptions : public OptionGroup {
   public:
-    CommandOptions() : OptionGroup() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -2716,8 +2711,7 @@ class CommandObjectTypeLookup : public CommandObjectRaw {
                          "Lookup types and declarations in the current target, "
                          "following language-specific naming conventions.",
                          "type lookup <type-specifier>",
-                         eCommandRequiresTarget),
-        m_option_group(), m_command_options() {
+                         eCommandRequiresTarget) {
     m_option_group.Append(&m_command_options);
     m_option_group.Finalize();
   }
diff --git a/lldb/source/Commands/CommandObjectWatchpoint.cpp b/lldb/source/Commands/CommandObjectWatchpoint.cpp
index 9fbf036a19d1a..9701553bdda9f 100644
--- a/lldb/source/Commands/CommandObjectWatchpoint.cpp
+++ b/lldb/source/Commands/CommandObjectWatchpoint.cpp
@@ -149,8 +149,7 @@ class CommandObjectWatchpointList : public CommandObjectParsed {
       : CommandObjectParsed(
             interpreter, "watchpoint list",
             "List all watchpoints at configurable levels of detail.", nullptr,
-            eCommandRequiresTarget),
-        m_options() {
+            eCommandRequiresTarget) {
     CommandArgumentEntry arg;
     CommandObject::AddIDsArgumentData(arg, eArgTypeWatchpointID,
                                       eArgTypeWatchpointIDRange);
@@ -165,7 +164,7 @@ class CommandObjectWatchpointList : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -432,8 +431,7 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed {
       : CommandObjectParsed(interpreter, "watchpoint delete",
                             "Delete the specified watchpoint(s).  If no "
                             "watchpoints are specified, delete them all.",
-                            nullptr, eCommandRequiresTarget),
-        m_options() {
+                            nullptr, eCommandRequiresTarget) {
     CommandArgumentEntry arg;
     CommandObject::AddIDsArgumentData(arg, eArgTypeWatchpointID,
                                       eArgTypeWatchpointIDRange);
@@ -456,7 +454,7 @@ class CommandObjectWatchpointDelete : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -556,8 +554,7 @@ class CommandObjectWatchpointIgnore : public CommandObjectParsed {
       : CommandObjectParsed(interpreter, "watchpoint ignore",
                             "Set ignore count on the specified watchpoint(s).  "
                             "If no watchpoints are specified, set them all.",
-                            nullptr, eCommandRequiresTarget),
-        m_options() {
+                            nullptr, eCommandRequiresTarget) {
     CommandArgumentEntry arg;
     CommandObject::AddIDsArgumentData(arg, eArgTypeWatchpointID,
                                       eArgTypeWatchpointIDRange);
@@ -580,7 +577,7 @@ class CommandObjectWatchpointIgnore : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -682,8 +679,7 @@ class CommandObjectWatchpointModify : public CommandObjectParsed {
             "If no watchpoint is specified, act on the last created "
             "watchpoint.  "
             "Passing an empty argument clears the modification.",
-            nullptr, eCommandRequiresTarget),
-        m_options() {
+            nullptr, eCommandRequiresTarget) {
     CommandArgumentEntry arg;
     CommandObject::AddIDsArgumentData(arg, eArgTypeWatchpointID,
                                       eArgTypeWatchpointIDRange);
@@ -706,7 +702,7 @@ class CommandObjectWatchpointModify : public CommandObjectParsed {
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options(), m_condition() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
@@ -813,8 +809,7 @@ class CommandObjectWatchpointSetVariable : public CommandObjectParsed {
             "to free up resources.",
             nullptr,
             eCommandRequiresFrame | eCommandTryTargetAPILock |
-                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused),
-        m_option_group(), m_option_watchpoint() {
+                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) {
     SetHelpLong(
         R"(
 Examples:
@@ -1006,8 +1001,7 @@ class CommandObjectWatchpointSetExpression : public CommandObjectRaw {
             "to free up resources.",
             "",
             eCommandRequiresFrame | eCommandTryTargetAPILock |
-                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused),
-        m_option_group(), m_option_watchpoint() {
+                eCommandProcessMustBeLaunched | eCommandProcessMustBePaused) {
     SetHelpLong(
         R"(
 Examples:
diff --git a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp
index 1f4e953663857..a429e568c61ae 100644
--- a/lldb/source/Commands/CommandObjectWatchpointCommand.cpp
+++ b/lldb/source/Commands/CommandObjectWatchpointCommand.cpp
@@ -66,8 +66,7 @@ class CommandObjectWatchpointCommandAdd : public CommandObjectParsed,
                             "commands previously added to it.",
                             nullptr, eCommandRequiresTarget),
         IOHandlerDelegateMultiline("DONE",
-                                   IOHandlerDelegate::Completion::LLDBCommand),
-        m_options() {
+                                   IOHandlerDelegate::Completion::LLDBCommand) {
     SetHelpLong(
         R"(
 General information about entering watchpoint commands
@@ -314,7 +313,7 @@ are no syntax errors may indicate that a function was declared but never called.
 
   class CommandOptions : public Options {
   public:
-    CommandOptions() : Options(), m_one_liner(), m_function_name() {}
+    CommandOptions() {}
 
     ~CommandOptions() override = default;
 
diff --git a/lldb/source/Commands/CommandOptionsProcessLaunch.h b/lldb/source/Commands/CommandOptionsProcessLaunch.h
index d18a23245080d..7ab7fabe10503 100644
--- a/lldb/source/Commands/CommandOptionsProcessLaunch.h
+++ b/lldb/source/Commands/CommandOptionsProcessLaunch.h
@@ -18,7 +18,7 @@ namespace lldb_private {
 
 class CommandOptionsProcessLaunch : public lldb_private::OptionGroup {
 public:
-  CommandOptionsProcessLaunch() : lldb_private::OptionGroup() {
+  CommandOptionsProcessLaunch() {
     // Keep default values of all options in one place: OptionParsingStarting
     // ()
     OptionParsingStarting(nullptr);

From f8ddcb4131256fcb1777cba617d3c5277024f9ec Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 23 Jan 2022 11:07:16 -0800
Subject: [PATCH 301/946] [Object] Remove a redundant return statement (NFC)

Identified with readability-redundant-control-flow.
---
 llvm/lib/Object/Archive.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Object/Archive.cpp b/llvm/lib/Object/Archive.cpp
index 91d8a0bf69aec..9a4ef055faa4d 100644
--- a/llvm/lib/Object/Archive.cpp
+++ b/llvm/lib/Object/Archive.cpp
@@ -1174,5 +1174,4 @@ BigArchive::BigArchive(MemoryBufferRef Source, Error &Err)
   }
   setFirstRegular(*I);
   Err = Error::success();
-  return;
 }

From ad36f37ce2b4ab6b1aadf318456fb2b8bb141d71 Mon Sep 17 00:00:00 2001
From: Groverkss <groverkss@gmail.com>
Date: Mon, 24 Jan 2022 01:08:54 +0530
Subject: [PATCH 302/946] [MLIR][Presburger] Clean PresburgerSet identifier
 interface to match IntegerPolyhedron's interface

This patch changes names of identifiers and their corresponding getters in
PresburgerSet to match those of IntegerPolyhedron.

Reviewed By: arjunp

Differential Revision: https://reviews.llvm.org/D117998
---
 .../mlir/Analysis/Presburger/PresburgerSet.h  | 18 +++++-----
 .../lib/Analysis/Presburger/PresburgerSet.cpp | 35 ++++++++++---------
 2 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/mlir/include/mlir/Analysis/Presburger/PresburgerSet.h b/mlir/include/mlir/Analysis/Presburger/PresburgerSet.h
index 5963e60071cca..e6be939027596 100644
--- a/mlir/include/mlir/Analysis/Presburger/PresburgerSet.h
+++ b/mlir/include/mlir/Analysis/Presburger/PresburgerSet.h
@@ -36,10 +36,10 @@ class PresburgerSet {
   unsigned getNumPolys() const;
 
   /// Return the number of real dimensions.
-  unsigned getNumDims() const;
+  unsigned getNumDimIds() const;
 
   /// Return the number of symbolic dimensions.
-  unsigned getNumSyms() const;
+  unsigned getNumSymbolIds() const;
 
   /// Return a reference to the list of IntegerPolyhedrons.
   ArrayRef<IntegerPolyhedron> getAllIntegerPolyhedron() const;
@@ -82,9 +82,11 @@ class PresburgerSet {
   bool isEqual(const PresburgerSet &set) const;
 
   /// Return a universe set of the specified type that contains all points.
-  static PresburgerSet getUniverse(unsigned nDim = 0, unsigned nSym = 0);
+  static PresburgerSet getUniverse(unsigned numDims = 0,
+                                   unsigned numSymbols = 0);
   /// Return an empty set of the specified type that contains no points.
-  static PresburgerSet getEmptySet(unsigned nDim = 0, unsigned nSym = 0);
+  static PresburgerSet getEmptySet(unsigned numDims = 0,
+                                   unsigned numSymbols = 0);
 
   /// Return true if all the sets in the union are known to be integer empty
   /// false otherwise.
@@ -102,19 +104,19 @@ class PresburgerSet {
 
 private:
   /// Construct an empty PresburgerSet.
-  PresburgerSet(unsigned nDim = 0, unsigned nSym = 0)
-      : nDim(nDim), nSym(nSym) {}
+  PresburgerSet(unsigned numDims = 0, unsigned numSymbols = 0)
+      : numDims(numDims), numSymbols(numSymbols) {}
 
   /// Return the set difference poly \ set.
   static PresburgerSet getSetDifference(IntegerPolyhedron poly,
                                         const PresburgerSet &set);
 
   /// Number of identifiers corresponding to real dimensions.
-  unsigned nDim;
+  unsigned numDims;
 
   /// Number of symbolic dimensions, unknown but constant for analysis, as in
   /// IntegerPolyhedron.
-  unsigned nSym;
+  unsigned numSymbols;
 
   /// The list of integerPolyhedrons that this set is the union of.
   SmallVector<IntegerPolyhedron, 2> integerPolyhedrons;
diff --git a/mlir/lib/Analysis/Presburger/PresburgerSet.cpp b/mlir/lib/Analysis/Presburger/PresburgerSet.cpp
index 829d786c8883d..08d15bc88c74e 100644
--- a/mlir/lib/Analysis/Presburger/PresburgerSet.cpp
+++ b/mlir/lib/Analysis/Presburger/PresburgerSet.cpp
@@ -16,7 +16,7 @@ using namespace mlir;
 using namespace presburger_utils;
 
 PresburgerSet::PresburgerSet(const IntegerPolyhedron &poly)
-    : nDim(poly.getNumDimIds()), nSym(poly.getNumSymbolIds()) {
+    : numDims(poly.getNumDimIds()), numSymbols(poly.getNumSymbolIds()) {
   unionPolyInPlace(poly);
 }
 
@@ -24,9 +24,9 @@ unsigned PresburgerSet::getNumPolys() const {
   return integerPolyhedrons.size();
 }
 
-unsigned PresburgerSet::getNumDims() const { return nDim; }
+unsigned PresburgerSet::getNumDimIds() const { return numDims; }
 
-unsigned PresburgerSet::getNumSyms() const { return nSym; }
+unsigned PresburgerSet::getNumSymbolIds() const { return numSymbols; }
 
 ArrayRef<IntegerPolyhedron> PresburgerSet::getAllIntegerPolyhedron() const {
   return integerPolyhedrons;
@@ -42,10 +42,10 @@ PresburgerSet::getIntegerPolyhedron(unsigned index) const {
 /// compatible spaces.
 static void assertDimensionsCompatible(const IntegerPolyhedron &poly,
                                        const PresburgerSet &set) {
-  assert(poly.getNumDimIds() == set.getNumDims() &&
+  assert(poly.getNumDimIds() == set.getNumDimIds() &&
          "Number of dimensions of the IntegerPolyhedron and PresburgerSet"
          "do not match!");
-  assert(poly.getNumSymbolIds() == set.getNumSyms() &&
+  assert(poly.getNumSymbolIds() == set.getNumSymbolIds() &&
          "Number of symbols of the IntegerPolyhedron and PresburgerSet"
          "do not match!");
 }
@@ -53,9 +53,9 @@ static void assertDimensionsCompatible(const IntegerPolyhedron &poly,
 /// Assert that the two PresburgerSets live in compatible spaces.
 static void assertDimensionsCompatible(const PresburgerSet &setA,
                                        const PresburgerSet &setB) {
-  assert(setA.getNumDims() == setB.getNumDims() &&
+  assert(setA.getNumDimIds() == setB.getNumDimIds() &&
          "Number of dimensions of the PresburgerSets do not match!");
-  assert(setA.getNumSyms() == setB.getNumSyms() &&
+  assert(setA.getNumSymbolIds() == setB.getNumSymbolIds() &&
          "Number of symbols of the PresburgerSets do not match!");
 }
 
@@ -91,14 +91,16 @@ bool PresburgerSet::containsPoint(ArrayRef<int64_t> point) const {
   });
 }
 
-PresburgerSet PresburgerSet::getUniverse(unsigned nDim, unsigned nSym) {
-  PresburgerSet result(nDim, nSym);
-  result.unionPolyInPlace(IntegerPolyhedron::getUniverse(nDim, nSym));
+PresburgerSet PresburgerSet::getUniverse(unsigned numDims,
+                                         unsigned numSymbols) {
+  PresburgerSet result(numDims, numSymbols);
+  result.unionPolyInPlace(IntegerPolyhedron::getUniverse(numDims, numSymbols));
   return result;
 }
 
-PresburgerSet PresburgerSet::getEmptySet(unsigned nDim, unsigned nSym) {
-  return PresburgerSet(nDim, nSym);
+PresburgerSet PresburgerSet::getEmptySet(unsigned numDims,
+                                         unsigned numSymbols) {
+  return PresburgerSet(numDims, numSymbols);
 }
 
 // Return the intersection of this set with the given set.
@@ -111,7 +113,7 @@ PresburgerSet PresburgerSet::getEmptySet(unsigned nDim, unsigned nSym) {
 PresburgerSet PresburgerSet::intersect(const PresburgerSet &set) const {
   assertDimensionsCompatible(set, *this);
 
-  PresburgerSet result(nDim, nSym);
+  PresburgerSet result(getNumDimIds(), getNumSymbolIds());
   for (const IntegerPolyhedron &csA : integerPolyhedrons) {
     for (const IntegerPolyhedron &csB : set.integerPolyhedrons) {
       IntegerPolyhedron csACopy = csA, csBCopy = csB;
@@ -336,14 +338,14 @@ PresburgerSet PresburgerSet::getSetDifference(IntegerPolyhedron poly,
 /// Return the complement of this set.
 PresburgerSet PresburgerSet::complement() const {
   return getSetDifference(
-      IntegerPolyhedron::getUniverse(getNumDims(), getNumSyms()), *this);
+      IntegerPolyhedron::getUniverse(getNumDimIds(), getNumSymbolIds()), *this);
 }
 
 /// Return the result of subtract the given set from this set, i.e.,
 /// return `this \ set`.
 PresburgerSet PresburgerSet::subtract(const PresburgerSet &set) const {
   assertDimensionsCompatible(set, *this);
-  PresburgerSet result(nDim, nSym);
+  PresburgerSet result(getNumDimIds(), getNumSymbolIds());
   // We compute (U_i t_i) \ (U_i set_i) as U_i (t_i \ V_i set_i).
   for (const IntegerPolyhedron &poly : integerPolyhedrons)
     result.unionSetInPlace(getSetDifference(poly, set));
@@ -386,7 +388,8 @@ bool PresburgerSet::findIntegerSample(SmallVectorImpl<int64_t> &sample) {
 }
 
 PresburgerSet PresburgerSet::coalesce() const {
-  PresburgerSet newSet = PresburgerSet::getEmptySet(getNumDims(), getNumSyms());
+  PresburgerSet newSet =
+      PresburgerSet::getEmptySet(getNumDimIds(), getNumSymbolIds());
   llvm::SmallBitVector isRedundant(getNumPolys());
 
   for (unsigned i = 0, e = integerPolyhedrons.size(); i < e; ++i) {

From 413684313d9dd7c83ab0c40830cccbd31a94bd7c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sun, 23 Jan 2022 09:42:18 -0800
Subject: [PATCH 303/946] [RISCV] Adjust the header comment in
 RISCVInstrInfoZb.td to better integrate Zbk* extensions.

The Zbk* extensions have some overlap with Zb so have been placed in this file.

Reviewed By: VincentWu

Differential Revision: https://reviews.llvm.org/D117958
---
 llvm/lib/Target/RISCV/RISCVInstrInfoZb.td | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 560ebb4eb08c6..25c5c35c03509 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -12,16 +12,21 @@
 //   Zbb - 1.0
 //   Zbc - 1.0
 //   Zbs - 1.0
-//   Zbe - 0.93
-//   Zbf - 0.93
-//   Zbm - 0.93
-//   Zbp - 0.93
-//   Zbr - 0.93
-//   Zbt - 0.93
+//   Zbe - 0.93 *experimental
+//   Zbf - 0.93 *experimental
+//   Zbm - 0.93 *experimental
+//   Zbp - 0.93 *experimental
+//   Zbr - 0.93 *experimental
+//   Zbt - 0.93 *experimental
 //
-// Zba, Zbb, Zbc, and Zbs have been ratified and are considered stable. The
-// other extensions are experimental as they have not yet been ratiied and are
-// subject to change.
+// The experimental extensions appeared in an earlier draft of the Bitmanip
+// extensions. They are not ratified and subject to change.
+//
+// This file also describes RISC-V instructions from the Zbk* extensions in
+// Cryptography Extensions Volume I: Scalar & Entropy Source Instructions,
+// versions:
+//   Zbkb - 1.0
+//   Zbkc - 1.0
 //
 //===----------------------------------------------------------------------===//
 

From 32dc14f876c4b196dccb5b8db56510e401fa91ab Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 21:13:58 +0000
Subject: [PATCH 304/946] [X86] LowerFunnelShift - use
 supportedVectorShiftWithBaseAmnt to check for supported scalar shifts

Allows us to reuse the ISD shift opcode instead of a mixture of ISD/X86ISD variants
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 17079116a6ae1..b3672abfef957 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29832,6 +29832,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
     SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);
     SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
 
+    unsigned ShiftOpc = IsFSHR ? ISD::SRL : ISD::SHL;
     unsigned NumElts = VT.getVectorNumElements();
     MVT ExtSVT = MVT::getIntegerVT(2 * EltSizeInBits);
     MVT ExtVT = MVT::getVectorVT(ExtSVT, NumElts / 2);
@@ -29848,20 +29849,19 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
     }
 
     // Attempt to fold scalar shift as unpack(y,x) << zext(splat(z))
-    if (SDValue ScalarAmt = DAG.getSplatValue(AmtMod)) {
-      unsigned ShiftX86Opc = IsFSHR ? X86ISD::VSRLI : X86ISD::VSHLI;
-      SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
-      SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));
-      ScalarAmt = DAG.getZExtOrTrunc(ScalarAmt, DL, MVT::i32);
-      Lo = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Lo, ScalarAmt, Subtarget,
-                               DAG);
-      Hi = getTargetVShiftNode(ShiftX86Opc, DL, ExtVT, Hi, ScalarAmt, Subtarget,
-                               DAG);
-      return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR);
+    if (supportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, ShiftOpc)) {
+      if (SDValue ScalarAmt = DAG.getSplatValue(AmtMod)) {
+        SDValue Lo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
+        SDValue Hi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));
+        ScalarAmt = DAG.getZExtOrTrunc(ScalarAmt, DL, MVT::i32);
+        Lo = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Lo, ScalarAmt, Subtarget,
+                                 DAG);
+        Hi = getTargetVShiftNode(ShiftOpc, DL, ExtVT, Hi, ScalarAmt, Subtarget,
+                                 DAG);
+        return getPack(DAG, Subtarget, DL, VT, Lo, Hi, !IsFSHR);
+      }
     }
 
-    unsigned ShiftOpc = IsFSHR ? ISD::SRL : ISD::SHL;
-
     MVT WideSVT = MVT::getIntegerVT(
         std::min<unsigned>(EltSizeInBits * 2, Subtarget.hasBWI() ? 16 : 32));
     MVT WideVT = MVT::getVectorVT(WideSVT, NumElts);

From ab1add6adc444371268ddbcb169a509559abd9dc Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 23 Jan 2022 13:28:02 -0800
Subject: [PATCH 305/946] [clang] Move the definition of ASTDiff (NFC)

This patch moves the definition of ASTDiff later within the header
file.

Without this patch, the header depends on the forward decalrations of
SyntaxTree and ComparisonOptions from another header file, which is
not desirable.  Since SyntaxTree and ComparisonOptions are defined in
ASTDiff.h, we can move the definition of ASTDiff later and stop
relying on the forward declarations from another header file.
---
 clang/include/clang/Tooling/ASTDiff/ASTDiff.h | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/Tooling/ASTDiff/ASTDiff.h b/clang/include/clang/Tooling/ASTDiff/ASTDiff.h
index c772ad84c1390..5fe6db6cb133b 100644
--- a/clang/include/clang/Tooling/ASTDiff/ASTDiff.h
+++ b/clang/include/clang/Tooling/ASTDiff/ASTDiff.h
@@ -48,20 +48,6 @@ struct Node {
   llvm::Optional<std::string> getQualifiedIdentifier() const;
 };
 
-class ASTDiff {
-public:
-  ASTDiff(SyntaxTree &Src, SyntaxTree &Dst, const ComparisonOptions &Options);
-  ~ASTDiff();
-
-  // Returns the ID of the node that is mapped to the given node in SourceTree.
-  NodeId getMapped(const SyntaxTree &SourceTree, NodeId Id) const;
-
-  class Impl;
-
-private:
-  std::unique_ptr<Impl> DiffImpl;
-};
-
 /// SyntaxTree objects represent subtrees of the AST.
 /// They can be constructed from any Decl or Stmt.
 class SyntaxTree {
@@ -120,6 +106,20 @@ struct ComparisonOptions {
   }
 };
 
+class ASTDiff {
+public:
+  ASTDiff(SyntaxTree &Src, SyntaxTree &Dst, const ComparisonOptions &Options);
+  ~ASTDiff();
+
+  // Returns the ID of the node that is mapped to the given node in SourceTree.
+  NodeId getMapped(const SyntaxTree &SourceTree, NodeId Id) const;
+
+  class Impl;
+
+private:
+  std::unique_ptr<Impl> DiffImpl;
+};
+
 } // end namespace diff
 } // end namespace clang
 

From ee591a64a795995fad96d8c16484baa7cacce99f Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 23 Jan 2022 13:28:04 -0800
Subject: [PATCH 306/946] [clang] Forward-declare DynTypedNode (NFC)

This patch adds a forward declaraiton of DynTypedNode.

DumpAST.h is relying on the forward declaration of DynTypedNode in
ASTContext.h, which is undesirable.
---
 clang-tools-extra/clangd/DumpAST.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang-tools-extra/clangd/DumpAST.h b/clang-tools-extra/clangd/DumpAST.h
index 424025aeca796..c72fe59179fd8 100644
--- a/clang-tools-extra/clangd/DumpAST.h
+++ b/clang-tools-extra/clangd/DumpAST.h
@@ -34,6 +34,7 @@
 #include "clang/AST/ASTContext.h"
 
 namespace clang {
+class DynTypedNode;
 namespace syntax {
 class TokenBuffer;
 } // namespace syntax

From e59964b67e026cde7a1438a8e91ca077a90810e0 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 23 Jan 2022 13:28:06 -0800
Subject: [PATCH 307/946] [clang] Remove unused forward declarations (NFC)

---
 clang/include/clang/AST/ASTContext.h                  | 1 -
 clang/include/clang/Tooling/ASTDiff/ASTDiffInternal.h | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index c5946b662cb29..ed35e73ce4cf9 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -99,7 +99,6 @@ class CXXMethodDecl;
 class CXXRecordDecl;
 class DiagnosticsEngine;
 class ParentMapContext;
-class DynTypedNode;
 class DynTypedNodeList;
 class Expr;
 enum class FloatModeKind;
diff --git a/clang/include/clang/Tooling/ASTDiff/ASTDiffInternal.h b/clang/include/clang/Tooling/ASTDiff/ASTDiffInternal.h
index fb7bd4e8afa23..b74af5e8f24f5 100644
--- a/clang/include/clang/Tooling/ASTDiff/ASTDiffInternal.h
+++ b/clang/include/clang/Tooling/ASTDiff/ASTDiffInternal.h
@@ -17,9 +17,6 @@ namespace diff {
 
 using DynTypedNode = DynTypedNode;
 
-class SyntaxTree;
-struct ComparisonOptions;
-
 /// Within a tree, this identifies a node by its preorder offset.
 struct NodeId {
 private:

From 4762c077e7102326306c7788494e3ea16e0f4cec Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 21:34:55 +0000
Subject: [PATCH 308/946] [X86] LowerFunnelShift - always lower vXi8 fshl by
 constant amounts as unpack(y,x) << zext(z)

This can always be lowered as PMULLW+PSRLWI+PACKUSWB
---
 llvm/lib/Target/X86/X86ISelLowering.cpp  |   6 +-
 llvm/test/CodeGen/X86/vector-fshl-128.ll | 165 ++++++-----------------
 llvm/test/CodeGen/X86/vector-fshl-256.ll | 117 ++++------------
 llvm/test/CodeGen/X86/vector-fshl-512.ll | 130 ++++++------------
 4 files changed, 112 insertions(+), 306 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b3672abfef957..be85c116bb037 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29831,6 +29831,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
 
     SDValue AmtMask = DAG.getConstant(EltSizeInBits - 1, DL, VT);
     SDValue AmtMod = DAG.getNode(ISD::AND, DL, VT, Amt, AmtMask);
+    bool IsCst = ISD::isBuildVectorOfConstantSDNodes(AmtMod.getNode());
 
     unsigned ShiftOpc = IsFSHR ? ISD::SRL : ISD::SHL;
     unsigned NumElts = VT.getVectorNumElements();
@@ -29867,7 +29868,7 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
     MVT WideVT = MVT::getVectorVT(WideSVT, NumElts);
 
     // If per-element shifts are legal, fallback to generic expansion.
-    if (supportedVectorVarShift(VT, Subtarget, ShiftOpc))
+    if (supportedVectorVarShift(VT, Subtarget, ShiftOpc) || Subtarget.hasXOP())
       return SDValue();
 
     // Attempt to fold as:
@@ -29889,7 +29890,8 @@ static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget,
     }
 
     // Attempt to fold per-element (ExtVT) shift as unpack(y,x) << zext(z)
-    if (supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) {
+    if ((IsCst && !IsFSHR && EltSizeInBits == 8) ||
+        supportedVectorVarShift(ExtVT, Subtarget, ShiftOpc)) {
       SDValue Z = DAG.getConstant(0, DL, VT);
       SDValue RLo = DAG.getBitcast(ExtVT, getUnpackl(DAG, DL, VT, Op1, Op0));
       SDValue RHi = DAG.getBitcast(ExtVT, getUnpackh(DAG, DL, VT, Op1, Op0));
diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
index 7dda02a147341..b5d8c4f21a219 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -2182,119 +2182,50 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 }
 
 define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
-; SSE2-LABEL: constant_funnnel_v16i8:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; SSE2-NEXT:    pand %xmm3, %xmm2
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE2-NEXT:    pand %xmm3, %xmm0
-; SSE2-NEXT:    packuswb %xmm2, %xmm0
-; SSE2-NEXT:    psrlw $1, %xmm1
-; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT:    pxor %xmm2, %xmm2
-; SSE2-NEXT:    movdqa %xmm1, %xmm3
-; SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
-; SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
-; SSE2-NEXT:    psrlw $8, %xmm3
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
-; SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE2-NEXT:    psrlw $8, %xmm1
-; SSE2-NEXT:    packuswb %xmm3, %xmm1
-; SSE2-NEXT:    por %xmm1, %xmm0
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: constant_funnnel_v16i8:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; SSE41-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; SSE41-NEXT:    pand %xmm3, %xmm0
-; SSE41-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
-; SSE41-NEXT:    pand %xmm3, %xmm2
-; SSE41-NEXT:    packuswb %xmm0, %xmm2
-; SSE41-NEXT:    psrlw $1, %xmm1
-; SSE41-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT:    pxor %xmm3, %xmm3
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; SSE41-NEXT:    punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm3[8],xmm1[9],xmm3[9],xmm1[10],xmm3[10],xmm1[11],xmm3[11],xmm1[12],xmm3[12],xmm1[13],xmm3[13],xmm1[14],xmm3[14],xmm1[15],xmm3[15]
-; SSE41-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
-; SSE41-NEXT:    psrlw $8, %xmm1
-; SSE41-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE41-NEXT:    psrlw $8, %xmm0
-; SSE41-NEXT:    packuswb %xmm1, %xmm0
-; SSE41-NEXT:    por %xmm2, %xmm0
-; SSE41-NEXT:    retq
-;
-; AVX1-LABEL: constant_funnnel_v16i8:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vpand %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpsrlw $1, %xmm1, %xmm1
-; AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15]
-; AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX1-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
-; AVX1-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    retq
+; SSE-LABEL: constant_funnnel_v16i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
+; SSE-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
+; SSE-NEXT:    psrlw $8, %xmm2
+; SSE-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; SSE-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT:    psrlw $8, %xmm1
+; SSE-NEXT:    packuswb %xmm2, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm0
+; SSE-NEXT:    retq
 ;
-; AVX2-LABEL: constant_funnnel_v16i8:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX2-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpsrlw $1, %xmm1, %xmm1
-; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX2-NEXT:    vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX2-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT:    vpackuswb %xmm2, %xmm1, %xmm1
-; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
+; AVX-LABEL: constant_funnnel_v16i8:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; AVX-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; AVX-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    retq
 ;
 ; AVX512F-LABEL: constant_funnnel_v16i8:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512F-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
-; AVX512F-NEXT:    vpsrlw $1, %xmm1, %xmm1
-; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512F-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512F-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
-; AVX512F-NEXT:    vpord %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT:    vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; AVX512F-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512F-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; AVX512F-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX512F-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX512F-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: constant_funnnel_v16i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512VL-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
-; AVX512VL-NEXT:    vpsrlw $1, %xmm1, %xmm1
-; AVX512VL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
-; AVX512VL-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1
-; AVX512VL-NEXT:    vpord %zmm1, %zmm0, %zmm0
-; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
-; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; AVX512VL-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
+; AVX512VL-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; AVX512VL-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX512VL-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: constant_funnnel_v16i8:
@@ -2360,27 +2291,15 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ;
 ; X86-SSE2-LABEL: constant_funnnel_v16i8:
 ; X86-SSE2:       # %bb.0:
-; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
-; X86-SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
+; X86-SSE2-NEXT:    movdqa %xmm1, %xmm2
+; X86-SSE2-NEXT:    punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15]
 ; X86-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2
-; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255]
-; X86-SSE2-NEXT:    pand %xmm3, %xmm2
-; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; X86-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-SSE2-NEXT:    pand %xmm3, %xmm0
-; X86-SSE2-NEXT:    packuswb %xmm2, %xmm0
-; X86-SSE2-NEXT:    psrlw $1, %xmm1
-; X86-SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
-; X86-SSE2-NEXT:    pxor %xmm2, %xmm2
-; X86-SSE2-NEXT:    movdqa %xmm1, %xmm3
-; X86-SSE2-NEXT:    punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
-; X86-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3
-; X86-SSE2-NEXT:    psrlw $8, %xmm3
-; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
+; X86-SSE2-NEXT:    psrlw $8, %xmm2
+; X86-SSE2-NEXT:    punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
 ; X86-SSE2-NEXT:    pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
 ; X86-SSE2-NEXT:    psrlw $8, %xmm1
-; X86-SSE2-NEXT:    packuswb %xmm3, %xmm1
-; X86-SSE2-NEXT:    por %xmm1, %xmm0
+; X86-SSE2-NEXT:    packuswb %xmm2, %xmm1
+; X86-SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; X86-SSE2-NEXT:    retl
   %res = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1>)
   ret <16 x i8> %res
diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index 8f9c88792353d..41f980886d000 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -1935,118 +1935,57 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
 ; AVX1-LABEL: constant_funnnel_v32i8:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm8 = [1,128,64,32,16,8,4,2]
-; AVX1-NEXT:    vpmullw %xmm3, %xmm8, %xmm3
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm9 = [1,2,4,8,16,32,64,128]
-; AVX1-NEXT:    vpmullw %xmm2, %xmm9, %xmm2
-; AVX1-NEXT:    vpand %xmm5, %xmm2, %xmm2
-; AVX1-NEXT:    vpackuswb %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpsrlw $1, %xmm3, %xmm3
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm10 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX1-NEXT:    vpand %xmm3, %xmm10, %xmm3
-; AVX1-NEXT:    vpxor %xmm11, %xmm11, %xmm11
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm6 = xmm3[8],xmm11[8],xmm3[9],xmm11[9],xmm3[10],xmm11[10],xmm3[11],xmm11[11],xmm3[12],xmm11[12],xmm3[13],xmm11[13],xmm3[14],xmm11[14],xmm3[15],xmm11[15]
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [2,256,128,64,32,16,8,4]
-; AVX1-NEXT:    vpmullw %xmm7, %xmm6, %xmm6
-; AVX1-NEXT:    vpsrlw $8, %xmm6, %xmm6
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero
-; AVX1-NEXT:    vmovdqa {{.*#+}} xmm4 = [2,4,8,16,32,64,128,256]
-; AVX1-NEXT:    vpmullw %xmm4, %xmm3, %xmm3
-; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
-; AVX1-NEXT:    vpackuswb %xmm6, %xmm3, %xmm3
-; AVX1-NEXT:    vpor %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
-; AVX1-NEXT:    vpmullw %xmm3, %xmm8, %xmm3
-; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX1-NEXT:    vpmullw %xmm0, %xmm9, %xmm0
-; AVX1-NEXT:    vpand %xmm5, %xmm0, %xmm0
-; AVX1-NEXT:    vpackuswb %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpsrlw $1, %xmm1, %xmm1
-; AVX1-NEXT:    vpand %xmm1, %xmm10, %xmm1
-; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm11[8],xmm1[9],xmm11[9],xmm1[10],xmm11[10],xmm1[11],xmm11[11],xmm1[12],xmm11[12],xmm1[13],xmm11[13],xmm1[14],xmm11[14],xmm1[15],xmm11[15]
-; AVX1-NEXT:    vpmullw %xmm7, %xmm3, %xmm3
-; AVX1-NEXT:    vpsrlw $8, %xmm3, %xmm3
-; AVX1-NEXT:    vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
-; AVX1-NEXT:    vpmullw %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vpsrlw $8, %xmm1, %xmm1
-; AVX1-NEXT:    vpackuswb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [1,128,64,32,16,8,4,2]
+; AVX1-NEXT:    vpmullw %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm4
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7]
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [1,2,4,8,16,32,64,128]
+; AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
+; AVX1-NEXT:    vpackuswb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpunpckhbw {{.*#+}} xmm4 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15]
+; AVX1-NEXT:    vpmullw %xmm5, %xmm4, %xmm4
+; AVX1-NEXT:    vpsrlw $8, %xmm4, %xmm4
+; AVX1-NEXT:    vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
+; AVX1-NEXT:    vpmullw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm4, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: constant_funnnel_v32i8:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
 ; AVX2-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; AVX2-NEXT:    vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX2-NEXT:    vpand %ymm3, %ymm2, %ymm2
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX2-NEXT:    vpsrlw $8, %ymm2, %ymm2
+; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
 ; AVX2-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
 ; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpsrlw $1, %ymm1, %ymm1
-; AVX2-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
-; AVX2-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
-; AVX2-NEXT:    vpsrlw $8, %ymm3, %ymm3
-; AVX2-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
-; AVX2-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX2-NEXT:    vpsrlw $8, %ymm1, %ymm1
-; AVX2-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
 ;
 ; AVX512F-LABEL: constant_funnnel_v32i8:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512F-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
 ; AVX512F-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-NEXT:    vpand %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512F-NEXT:    vpsrlw $8, %ymm2, %ymm2
+; AVX512F-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
 ; AVX512F-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512F-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpsrlw $8, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; AVX512F-NEXT:    vpsrlw $1, %ymm1, %ymm1
-; AVX512F-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512F-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
-; AVX512F-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
-; AVX512F-NEXT:    vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
-; AVX512F-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX512F-NEXT:    vpsrlw $8, %ymm1, %ymm1
-; AVX512F-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: constant_funnnel_v32i8:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
+; AVX512VL-NEXT:    vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
 ; AVX512VL-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512VL-NEXT:    vpand %ymm3, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
+; AVX512VL-NEXT:    vpsrlw $8, %ymm2, %ymm2
+; AVX512VL-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
 ; AVX512VL-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
-; AVX512VL-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpsrlw $8, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpsrlw $1, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX512VL-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512VL-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31]
-; AVX512VL-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3
-; AVX512VL-NEXT:    vpsrlw $8, %ymm3, %ymm3
-; AVX512VL-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23]
-; AVX512VL-NEXT:    vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: constant_funnnel_v32i8:
diff --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll
index 8ba01c9ae3144..9954447f6836e 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll
@@ -911,105 +911,51 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
 ; AVX512F-LABEL: constant_funnnel_v64i8:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; AVX512F-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; AVX512F-NEXT:    vbroadcasti128 {{.*#+}} ymm4 = [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2]
-; AVX512F-NEXT:    # ymm4 = mem[0,1,0,1]
-; AVX512F-NEXT:    vpmullw %ymm4, %ymm3, %ymm3
-; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512F-NEXT:    vpand %ymm5, %ymm3, %ymm3
-; AVX512F-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512F-NEXT:    vbroadcasti128 {{.*#+}} ymm6 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
-; AVX512F-NEXT:    # ymm6 = mem[0,1,0,1]
-; AVX512F-NEXT:    vpmullw %ymm6, %ymm2, %ymm2
-; AVX512F-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX512F-NEXT:    vpackuswb %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; AVX512F-NEXT:    vpmullw %ymm4, %ymm3, %ymm3
-; AVX512F-NEXT:    vpand %ymm5, %ymm3, %ymm3
-; AVX512F-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512F-NEXT:    vpmullw %ymm6, %ymm0, %ymm0
-; AVX512F-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX512F-NEXT:    vpackuswb %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
-; AVX512F-NEXT:    vpsrlw $1, %ymm2, %ymm2
-; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX512F-NEXT:    vpand %ymm3, %ymm2, %ymm2
-; AVX512F-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512F-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15],ymm2[24],ymm4[24],ymm2[25],ymm4[25],ymm2[26],ymm4[26],ymm2[27],ymm4[27],ymm2[28],ymm4[28],ymm2[29],ymm4[29],ymm2[30],ymm4[30],ymm2[31],ymm4[31]
-; AVX512F-NEXT:    vbroadcasti128 {{.*#+}} ymm6 = [2,256,128,64,32,16,8,4,2,256,128,64,32,16,8,4]
-; AVX512F-NEXT:    # ymm6 = mem[0,1,0,1]
-; AVX512F-NEXT:    vpmullw %ymm6, %ymm5, %ymm5
-; AVX512F-NEXT:    vpsrlw $8, %ymm5, %ymm5
-; AVX512F-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[16],ymm4[16],ymm2[17],ymm4[17],ymm2[18],ymm4[18],ymm2[19],ymm4[19],ymm2[20],ymm4[20],ymm2[21],ymm4[21],ymm2[22],ymm4[22],ymm2[23],ymm4[23]
-; AVX512F-NEXT:    vbroadcasti128 {{.*#+}} ymm7 = [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256]
-; AVX512F-NEXT:    # ymm7 = mem[0,1,0,1]
-; AVX512F-NEXT:    vpmullw %ymm7, %ymm2, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
+; AVX512F-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15],ymm3[24],ymm2[24],ymm3[25],ymm2[25],ymm3[26],ymm2[26],ymm3[27],ymm2[27],ymm3[28],ymm2[28],ymm3[29],ymm2[29],ymm3[30],ymm2[30],ymm3[31],ymm2[31]
+; AVX512F-NEXT:    vbroadcasti128 {{.*#+}} ymm5 = [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2]
+; AVX512F-NEXT:    # ymm5 = mem[0,1,0,1]
+; AVX512F-NEXT:    vpmullw %ymm5, %ymm4, %ymm4
+; AVX512F-NEXT:    vpsrlw $8, %ymm4, %ymm4
+; AVX512F-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[16],ymm2[16],ymm3[17],ymm2[17],ymm3[18],ymm2[18],ymm3[19],ymm2[19],ymm3[20],ymm2[20],ymm3[21],ymm2[21],ymm3[22],ymm2[22],ymm3[23],ymm2[23]
+; AVX512F-NEXT:    vbroadcasti128 {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; AVX512F-NEXT:    # ymm3 = mem[0,1,0,1]
+; AVX512F-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpsrlw $8, %ymm2, %ymm2
-; AVX512F-NEXT:    vpackuswb %ymm5, %ymm2, %ymm2
-; AVX512F-NEXT:    vpsrlw $1, %ymm1, %ymm1
-; AVX512F-NEXT:    vpand %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm4[8],ymm1[9],ymm4[9],ymm1[10],ymm4[10],ymm1[11],ymm4[11],ymm1[12],ymm4[12],ymm1[13],ymm4[13],ymm1[14],ymm4[14],ymm1[15],ymm4[15],ymm1[24],ymm4[24],ymm1[25],ymm4[25],ymm1[26],ymm4[26],ymm1[27],ymm4[27],ymm1[28],ymm4[28],ymm1[29],ymm4[29],ymm1[30],ymm4[30],ymm1[31],ymm4[31]
-; AVX512F-NEXT:    vpmullw %ymm6, %ymm3, %ymm3
-; AVX512F-NEXT:    vpsrlw $8, %ymm3, %ymm3
-; AVX512F-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm4[0],ymm1[1],ymm4[1],ymm1[2],ymm4[2],ymm1[3],ymm4[3],ymm1[4],ymm4[4],ymm1[5],ymm4[5],ymm1[6],ymm4[6],ymm1[7],ymm4[7],ymm1[16],ymm4[16],ymm1[17],ymm4[17],ymm1[18],ymm4[18],ymm1[19],ymm4[19],ymm1[20],ymm4[20],ymm1[21],ymm4[21],ymm1[22],ymm4[22],ymm1[23],ymm4[23]
-; AVX512F-NEXT:    vpmullw %ymm7, %ymm1, %ymm1
-; AVX512F-NEXT:    vpsrlw $8, %ymm1, %ymm1
-; AVX512F-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512F-NEXT:    vporq %zmm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vpackuswb %ymm4, %ymm2, %ymm2
+; AVX512F-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
+; AVX512F-NEXT:    vpmullw %ymm5, %ymm4, %ymm4
+; AVX512F-NEXT:    vpsrlw $8, %ymm4, %ymm4
+; AVX512F-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
+; AVX512F-NEXT:    vpmullw %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512F-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: constant_funnnel_v64i8:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
-; AVX512VL-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; AVX512VL-NEXT:    vbroadcasti128 {{.*#+}} ymm4 = [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2]
-; AVX512VL-NEXT:    # ymm4 = mem[0,1,0,1]
-; AVX512VL-NEXT:    vpmullw %ymm4, %ymm3, %ymm3
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
-; AVX512VL-NEXT:    vpand %ymm5, %ymm3, %ymm3
-; AVX512VL-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512VL-NEXT:    vbroadcasti128 {{.*#+}} ymm6 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
-; AVX512VL-NEXT:    # ymm6 = mem[0,1,0,1]
-; AVX512VL-NEXT:    vpmullw %ymm6, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpand %ymm5, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpackuswb %ymm3, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31]
-; AVX512VL-NEXT:    vpmullw %ymm4, %ymm3, %ymm3
-; AVX512VL-NEXT:    vpand %ymm5, %ymm3, %ymm3
-; AVX512VL-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23]
-; AVX512VL-NEXT:    vpmullw %ymm6, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpand %ymm5, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpackuswb %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512VL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
-; AVX512VL-NEXT:    vpsrlw $1, %ymm2, %ymm2
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
-; AVX512VL-NEXT:    vpand %ymm3, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512VL-NEXT:    vpunpckhbw {{.*#+}} ymm5 = ymm2[8],ymm4[8],ymm2[9],ymm4[9],ymm2[10],ymm4[10],ymm2[11],ymm4[11],ymm2[12],ymm4[12],ymm2[13],ymm4[13],ymm2[14],ymm4[14],ymm2[15],ymm4[15],ymm2[24],ymm4[24],ymm2[25],ymm4[25],ymm2[26],ymm4[26],ymm2[27],ymm4[27],ymm2[28],ymm4[28],ymm2[29],ymm4[29],ymm2[30],ymm4[30],ymm2[31],ymm4[31]
-; AVX512VL-NEXT:    vbroadcasti128 {{.*#+}} ymm6 = [2,256,128,64,32,16,8,4,2,256,128,64,32,16,8,4]
-; AVX512VL-NEXT:    # ymm6 = mem[0,1,0,1]
-; AVX512VL-NEXT:    vpmullw %ymm6, %ymm5, %ymm5
-; AVX512VL-NEXT:    vpsrlw $8, %ymm5, %ymm5
-; AVX512VL-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm4[0],ymm2[1],ymm4[1],ymm2[2],ymm4[2],ymm2[3],ymm4[3],ymm2[4],ymm4[4],ymm2[5],ymm4[5],ymm2[6],ymm4[6],ymm2[7],ymm4[7],ymm2[16],ymm4[16],ymm2[17],ymm4[17],ymm2[18],ymm4[18],ymm2[19],ymm4[19],ymm2[20],ymm4[20],ymm2[21],ymm4[21],ymm2[22],ymm4[22],ymm2[23],ymm4[23]
-; AVX512VL-NEXT:    vbroadcasti128 {{.*#+}} ymm7 = [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256]
-; AVX512VL-NEXT:    # ymm7 = mem[0,1,0,1]
-; AVX512VL-NEXT:    vpmullw %ymm7, %ymm2, %ymm2
+; AVX512VL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
+; AVX512VL-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm3[8],ymm2[8],ymm3[9],ymm2[9],ymm3[10],ymm2[10],ymm3[11],ymm2[11],ymm3[12],ymm2[12],ymm3[13],ymm2[13],ymm3[14],ymm2[14],ymm3[15],ymm2[15],ymm3[24],ymm2[24],ymm3[25],ymm2[25],ymm3[26],ymm2[26],ymm3[27],ymm2[27],ymm3[28],ymm2[28],ymm3[29],ymm2[29],ymm3[30],ymm2[30],ymm3[31],ymm2[31]
+; AVX512VL-NEXT:    vbroadcasti128 {{.*#+}} ymm5 = [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2]
+; AVX512VL-NEXT:    # ymm5 = mem[0,1,0,1]
+; AVX512VL-NEXT:    vpmullw %ymm5, %ymm4, %ymm4
+; AVX512VL-NEXT:    vpsrlw $8, %ymm4, %ymm4
+; AVX512VL-NEXT:    vpunpcklbw {{.*#+}} ymm2 = ymm3[0],ymm2[0],ymm3[1],ymm2[1],ymm3[2],ymm2[2],ymm3[3],ymm2[3],ymm3[4],ymm2[4],ymm3[5],ymm2[5],ymm3[6],ymm2[6],ymm3[7],ymm2[7],ymm3[16],ymm2[16],ymm3[17],ymm2[17],ymm3[18],ymm2[18],ymm3[19],ymm2[19],ymm3[20],ymm2[20],ymm3[21],ymm2[21],ymm3[22],ymm2[22],ymm3[23],ymm2[23]
+; AVX512VL-NEXT:    vbroadcasti128 {{.*#+}} ymm3 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128]
+; AVX512VL-NEXT:    # ymm3 = mem[0,1,0,1]
+; AVX512VL-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
 ; AVX512VL-NEXT:    vpsrlw $8, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpackuswb %ymm5, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpsrlw $1, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpand %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm4[8],ymm1[9],ymm4[9],ymm1[10],ymm4[10],ymm1[11],ymm4[11],ymm1[12],ymm4[12],ymm1[13],ymm4[13],ymm1[14],ymm4[14],ymm1[15],ymm4[15],ymm1[24],ymm4[24],ymm1[25],ymm4[25],ymm1[26],ymm4[26],ymm1[27],ymm4[27],ymm1[28],ymm4[28],ymm1[29],ymm4[29],ymm1[30],ymm4[30],ymm1[31],ymm4[31]
-; AVX512VL-NEXT:    vpmullw %ymm6, %ymm3, %ymm3
-; AVX512VL-NEXT:    vpsrlw $8, %ymm3, %ymm3
-; AVX512VL-NEXT:    vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm4[0],ymm1[1],ymm4[1],ymm1[2],ymm4[2],ymm1[3],ymm4[3],ymm1[4],ymm4[4],ymm1[5],ymm4[5],ymm1[6],ymm4[6],ymm1[7],ymm4[7],ymm1[16],ymm4[16],ymm1[17],ymm4[17],ymm1[18],ymm4[18],ymm1[19],ymm4[19],ymm1[20],ymm4[20],ymm1[21],ymm4[21],ymm1[22],ymm4[22],ymm1[23],ymm4[23]
-; AVX512VL-NEXT:    vpmullw %ymm7, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpackuswb %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512VL-NEXT:    vporq %zmm1, %zmm0, %zmm0
+; AVX512VL-NEXT:    vpackuswb %ymm4, %ymm2, %ymm2
+; AVX512VL-NEXT:    vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31]
+; AVX512VL-NEXT:    vpmullw %ymm5, %ymm4, %ymm4
+; AVX512VL-NEXT:    vpsrlw $8, %ymm4, %ymm4
+; AVX512VL-NEXT:    vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23]
+; AVX512VL-NEXT:    vpmullw %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpackuswb %ymm4, %ymm0, %ymm0
+; AVX512VL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: constant_funnnel_v64i8:

From 88f33cff4bee87ea31e129f734df232274098a78 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sun, 23 Jan 2022 09:42:18 -0800
Subject: [PATCH 309/946] [RISCV] Add bitreverse tests to
 bswap-ctlz-cttz-ctpop.ll. Add Zbb command lines. NFC

Rename to include bitreverse. Add additional tests and Zbb command lines.

There's some overlapping tests with rv32zbb.ll and rv64zbb.ll. Maybe
I'll clean that up in a future patch.
---
 ...ll => bswap-bitreverse-ctlz-cttz-ctpop.ll} | 819 ++++++++++++++++--
 1 file changed, 769 insertions(+), 50 deletions(-)
 rename llvm/test/CodeGen/RISCV/{bswap-ctlz-cttz-ctpop.ll => bswap-bitreverse-ctlz-cttz-ctpop.ll} (58%)

diff --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
similarity index 58%
rename from llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
rename to llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
index 4be19eefa948f..8a0c0db7aaa38 100644
--- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
@@ -3,10 +3,18 @@
 ; RUN:   | FileCheck %s -check-prefix=RV32I
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32ZBB
+; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZBB
 
 declare i16 @llvm.bswap.i16(i16)
 declare i32 @llvm.bswap.i32(i32)
 declare i64 @llvm.bswap.i64(i64)
+declare i8 @llvm.bitreverse.i8(i8)
+declare i16 @llvm.bitreverse.i16(i16)
+declare i32 @llvm.bitreverse.i32(i32)
+declare i64 @llvm.bitreverse.i64(i64)
 declare i8 @llvm.cttz.i8(i8, i1)
 declare i16 @llvm.cttz.i16(i16, i1)
 declare i32 @llvm.cttz.i32(i32, i1)
@@ -31,6 +39,18 @@ define i16 @test_bswap_i16(i16 %a) nounwind {
 ; RV64I-NEXT:    srli a0, a0, 56
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_bswap_i16:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    srli a0, a0, 16
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_bswap_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a0, a0, 48
+; RV64ZBB-NEXT:    ret
   %tmp = call i16 @llvm.bswap.i16(i16 %a)
   ret i16 %tmp
 }
@@ -67,6 +87,17 @@ define i32 @test_bswap_i32(i32 %a) nounwind {
 ; RV64I-NEXT:    or a0, a0, a2
 ; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_bswap_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_bswap_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a0, a0, 32
+; RV64ZBB-NEXT:    ret
   %tmp = call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %tmp
 }
@@ -129,15 +160,542 @@ define i64 @test_bswap_i64(i64 %a) nounwind {
 ; RV64I-NEXT:    or a0, a0, a2
 ; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_bswap_i64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    rev8 a2, a1
+; RV32ZBB-NEXT:    rev8 a1, a0
+; RV32ZBB-NEXT:    mv a0, a2
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_bswap_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    ret
   %tmp = call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %tmp
 }
 
+define i8 @test_bitreverse_i8(i8 %a) nounwind {
+; RV32I-LABEL: test_bitreverse_i8:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a1, a0, 15
+; RV32I-NEXT:    slli a1, a1, 4
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    srli a0, a0, 28
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    andi a1, a0, 51
+; RV32I-NEXT:    slli a1, a1, 2
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    andi a0, a0, 51
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    andi a1, a0, 85
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    andi a0, a0, 85
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_bitreverse_i8:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a1, a0, 15
+; RV64I-NEXT:    slli a1, a1, 4
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srli a0, a0, 60
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    andi a1, a0, 51
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    andi a0, a0, 51
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    andi a1, a0, 85
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    andi a0, a0, 85
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_bitreverse_i8:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    andi a1, a0, 15
+; RV32ZBB-NEXT:    slli a1, a1, 4
+; RV32ZBB-NEXT:    slli a0, a0, 24
+; RV32ZBB-NEXT:    srli a0, a0, 28
+; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    andi a1, a0, 51
+; RV32ZBB-NEXT:    slli a1, a1, 2
+; RV32ZBB-NEXT:    srli a0, a0, 2
+; RV32ZBB-NEXT:    andi a0, a0, 51
+; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    andi a1, a0, 85
+; RV32ZBB-NEXT:    slli a1, a1, 1
+; RV32ZBB-NEXT:    srli a0, a0, 1
+; RV32ZBB-NEXT:    andi a0, a0, 85
+; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_bitreverse_i8:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    andi a1, a0, 15
+; RV64ZBB-NEXT:    slli a1, a1, 4
+; RV64ZBB-NEXT:    slli a0, a0, 56
+; RV64ZBB-NEXT:    srli a0, a0, 60
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    andi a1, a0, 51
+; RV64ZBB-NEXT:    slli a1, a1, 2
+; RV64ZBB-NEXT:    srli a0, a0, 2
+; RV64ZBB-NEXT:    andi a0, a0, 51
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    andi a1, a0, 85
+; RV64ZBB-NEXT:    slli a1, a1, 1
+; RV64ZBB-NEXT:    srli a0, a0, 1
+; RV64ZBB-NEXT:    andi a0, a0, 85
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %tmp = call i8 @llvm.bitreverse.i8(i8 %a)
+  ret i8 %tmp
+}
+
+define i16 @test_bitreverse_i16(i16 %a) nounwind {
+; RV32I-LABEL: test_bitreverse_i16:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    lui a2, 1
+; RV32I-NEXT:    addi a2, a2, -241
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    lui a2, 3
+; RV32I-NEXT:    addi a2, a2, 819
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 5
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_bitreverse_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    lui a2, 1
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    lui a2, 3
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 5
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_bitreverse_i16:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    srli a1, a0, 12
+; RV32ZBB-NEXT:    lui a2, 15
+; RV32ZBB-NEXT:    addi a2, a2, 240
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    srli a0, a0, 20
+; RV32ZBB-NEXT:    andi a0, a0, -241
+; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    lui a2, 3
+; RV32ZBB-NEXT:    addi a2, a2, 819
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    lui a2, 5
+; RV32ZBB-NEXT:    addi a2, a2, 1365
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_bitreverse_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a1, a0, 44
+; RV64ZBB-NEXT:    lui a2, 15
+; RV64ZBB-NEXT:    addiw a2, a2, 240
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    srli a0, a0, 52
+; RV64ZBB-NEXT:    andi a0, a0, -241
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    srli a1, a0, 2
+; RV64ZBB-NEXT:    lui a2, 3
+; RV64ZBB-NEXT:    addiw a2, a2, 819
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 1
+; RV64ZBB-NEXT:    lui a2, 5
+; RV64ZBB-NEXT:    addiw a2, a2, 1365
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = call i16 @llvm.bitreverse.i16(i16 %a)
+  ret i16 %tmp
+}
+
+define i32 @test_bitreverse_i32(i32 %a) nounwind {
+; RV32I-LABEL: test_bitreverse_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 24
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    lui a3, 4080
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    lui a2, 61681
+; RV32I-NEXT:    addi a2, a2, -241
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    lui a2, 209715
+; RV32I-NEXT:    addi a2, a2, 819
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_bitreverse_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srliw a2, a0, 24
+; RV64I-NEXT:    or a1, a1, a2
+; RV64I-NEXT:    slli a2, a0, 8
+; RV64I-NEXT:    lui a3, 4080
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    slliw a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    lui a2, 61681
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slliw a0, a0, 4
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    lui a2, 209715
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slliw a0, a0, 2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slliw a0, a0, 1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_bitreverse_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    lui a2, 61681
+; RV32ZBB-NEXT:    addi a2, a2, -241
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    lui a2, 209715
+; RV32ZBB-NEXT:    addi a2, a2, 819
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    lui a2, 349525
+; RV32ZBB-NEXT:    addi a2, a2, 1365
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_bitreverse_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a1, a0, 36
+; RV64ZBB-NEXT:    lui a2, 61681
+; RV64ZBB-NEXT:    addiw a2, a2, -241
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    srli a0, a0, 28
+; RV64ZBB-NEXT:    lui a2, 986895
+; RV64ZBB-NEXT:    addiw a2, a2, 240
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 2
+; RV64ZBB-NEXT:    lui a2, 209715
+; RV64ZBB-NEXT:    addiw a2, a2, 819
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slliw a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 1
+; RV64ZBB-NEXT:    lui a2, 349525
+; RV64ZBB-NEXT:    addiw a2, a2, 1365
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slliw a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = call i32 @llvm.bitreverse.i32(i32 %a)
+  ret i32 %tmp
+}
+
+define i64 @test_bitreverse_i64(i64 %a) nounwind {
+; RV32I-LABEL: test_bitreverse_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a2, a1, 8
+; RV32I-NEXT:    lui a3, 16
+; RV32I-NEXT:    addi a3, a3, -256
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    srli a4, a1, 24
+; RV32I-NEXT:    or a2, a2, a4
+; RV32I-NEXT:    slli a4, a1, 8
+; RV32I-NEXT:    lui a5, 4080
+; RV32I-NEXT:    and a4, a4, a5
+; RV32I-NEXT:    slli a1, a1, 24
+; RV32I-NEXT:    or a1, a1, a4
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    srli a2, a1, 4
+; RV32I-NEXT:    lui a4, 61681
+; RV32I-NEXT:    addi a4, a4, -241
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    slli a1, a1, 4
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a2, a1, 2
+; RV32I-NEXT:    lui a6, 209715
+; RV32I-NEXT:    addi a6, a6, 819
+; RV32I-NEXT:    and a2, a2, a6
+; RV32I-NEXT:    and a1, a1, a6
+; RV32I-NEXT:    slli a1, a1, 2
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a2, a1, 1
+; RV32I-NEXT:    lui a7, 349525
+; RV32I-NEXT:    addi a7, a7, 1365
+; RV32I-NEXT:    and a2, a2, a7
+; RV32I-NEXT:    and a1, a1, a7
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    or a2, a2, a1
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    srli a3, a0, 24
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    slli a3, a0, 8
+; RV32I-NEXT:    and a3, a3, a5
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    and a1, a1, a6
+; RV32I-NEXT:    and a0, a0, a6
+; RV32I-NEXT:    slli a0, a0, 2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    and a1, a1, a7
+; RV32I-NEXT:    and a0, a0, a7
+; RV32I-NEXT:    slli a0, a0, 1
+; RV32I-NEXT:    or a1, a1, a0
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_bitreverse_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 24
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 8
+; RV64I-NEXT:    li a3, 255
+; RV64I-NEXT:    slli a4, a3, 24
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a2, a0, 40
+; RV64I-NEXT:    lui a4, 16
+; RV64I-NEXT:    addiw a4, a4, -256
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    srli a4, a0, 56
+; RV64I-NEXT:    or a2, a2, a4
+; RV64I-NEXT:    or a1, a1, a2
+; RV64I-NEXT:    slli a2, a0, 24
+; RV64I-NEXT:    slli a4, a3, 40
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    srliw a4, a0, 24
+; RV64I-NEXT:    slli a4, a4, 32
+; RV64I-NEXT:    or a2, a2, a4
+; RV64I-NEXT:    slli a4, a0, 40
+; RV64I-NEXT:    slli a3, a3, 48
+; RV64I-NEXT:    and a3, a4, a3
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    lui a3, %hi(.LCPI6_0)
+; RV64I-NEXT:    ld a3, %lo(.LCPI6_0)(a3)
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    and a1, a1, a3
+; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, %hi(.LCPI6_1)
+; RV64I-NEXT:    ld a2, %lo(.LCPI6_1)(a2)
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    lui a2, %hi(.LCPI6_2)
+; RV64I-NEXT:    ld a2, %lo(.LCPI6_2)(a2)
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_bitreverse_i64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    rev8 a1, a1
+; RV32ZBB-NEXT:    srli a2, a1, 4
+; RV32ZBB-NEXT:    lui a3, 61681
+; RV32ZBB-NEXT:    addi a3, a3, -241
+; RV32ZBB-NEXT:    and a2, a2, a3
+; RV32ZBB-NEXT:    and a1, a1, a3
+; RV32ZBB-NEXT:    slli a1, a1, 4
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 2
+; RV32ZBB-NEXT:    lui a4, 209715
+; RV32ZBB-NEXT:    addi a4, a4, 819
+; RV32ZBB-NEXT:    and a2, a2, a4
+; RV32ZBB-NEXT:    and a1, a1, a4
+; RV32ZBB-NEXT:    slli a1, a1, 2
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 1
+; RV32ZBB-NEXT:    lui a5, 349525
+; RV32ZBB-NEXT:    addi a5, a5, 1365
+; RV32ZBB-NEXT:    and a2, a2, a5
+; RV32ZBB-NEXT:    and a1, a1, a5
+; RV32ZBB-NEXT:    slli a1, a1, 1
+; RV32ZBB-NEXT:    or a2, a2, a1
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    and a1, a1, a3
+; RV32ZBB-NEXT:    and a0, a0, a3
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    and a1, a1, a4
+; RV32ZBB-NEXT:    and a0, a0, a4
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    and a1, a1, a5
+; RV32ZBB-NEXT:    and a0, a0, a5
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a1, a1, a0
+; RV32ZBB-NEXT:    mv a0, a2
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_bitreverse_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI6_0)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI6_0)(a1)
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a2, a0, 4
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI6_1)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI6_1)(a1)
+; RV64ZBB-NEXT:    slli a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    srli a2, a0, 2
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI6_2)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI6_2)(a1)
+; RV64ZBB-NEXT:    slli a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    srli a2, a0, 1
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = call i64 @llvm.bitreverse.i64(i64 %a)
+  ret i64 %tmp
+}
+
 define i8 @test_cttz_i8(i8 %a) nounwind {
 ; RV32I-LABEL: test_cttz_i8:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a1, a0, 255
-; RV32I-NEXT:    beqz a1, .LBB3_2
+; RV32I-NEXT:    beqz a1, .LBB7_2
 ; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    addi a1, a0, -1
 ; RV32I-NEXT:    not a0, a0
@@ -153,14 +711,14 @@ define i8 @test_cttz_i8(i8 %a) nounwind {
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    andi a0, a0, 15
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB3_2:
+; RV32I-NEXT:  .LBB7_2:
 ; RV32I-NEXT:    li a0, 8
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_cttz_i8:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    andi a1, a0, 255
-; RV64I-NEXT:    beqz a1, .LBB3_2
+; RV64I-NEXT:    beqz a1, .LBB7_2
 ; RV64I-NEXT:  # %bb.1: # %cond.false
 ; RV64I-NEXT:    addi a1, a0, -1
 ; RV64I-NEXT:    not a0, a0
@@ -176,9 +734,21 @@ define i8 @test_cttz_i8(i8 %a) nounwind {
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 15
 ; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB3_2:
+; RV64I-NEXT:  .LBB7_2:
 ; RV64I-NEXT:    li a0, 8
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_cttz_i8:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    ori a0, a0, 256
+; RV32ZBB-NEXT:    ctz a0, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_cttz_i8:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ori a0, a0, 256
+; RV64ZBB-NEXT:    ctz a0, a0
+; RV64ZBB-NEXT:    ret
   %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 false)
   ret i8 %tmp
 }
@@ -188,7 +758,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    slli a1, a0, 16
 ; RV32I-NEXT:    srli a1, a1, 16
-; RV32I-NEXT:    beqz a1, .LBB4_2
+; RV32I-NEXT:    beqz a1, .LBB8_2
 ; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    addi a1, a0, -1
 ; RV32I-NEXT:    not a0, a0
@@ -214,7 +784,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
 ; RV32I-NEXT:    slli a0, a0, 19
 ; RV32I-NEXT:    srli a0, a0, 27
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB4_2:
+; RV32I-NEXT:  .LBB8_2:
 ; RV32I-NEXT:    li a0, 16
 ; RV32I-NEXT:    ret
 ;
@@ -222,7 +792,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a1, a0, 48
 ; RV64I-NEXT:    srli a1, a1, 48
-; RV64I-NEXT:    beqz a1, .LBB4_2
+; RV64I-NEXT:    beqz a1, .LBB8_2
 ; RV64I-NEXT:  # %bb.1: # %cond.false
 ; RV64I-NEXT:    addi a1, a0, -1
 ; RV64I-NEXT:    not a0, a0
@@ -248,9 +818,23 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
 ; RV64I-NEXT:    slli a0, a0, 51
 ; RV64I-NEXT:    srli a0, a0, 59
 ; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB4_2:
+; RV64I-NEXT:  .LBB8_2:
 ; RV64I-NEXT:    li a0, 16
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_cttz_i16:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    lui a1, 16
+; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    ctz a0, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_cttz_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, 16
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    ctz a0, a0
+; RV64ZBB-NEXT:    ret
   %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 false)
   ret i16 %tmp
 }
@@ -258,7 +842,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
 define i32 @test_cttz_i32(i32 %a) nounwind {
 ; RV32I-LABEL: test_cttz_i32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    beqz a0, .LBB5_2
+; RV32I-NEXT:    beqz a0, .LBB9_2
 ; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    addi sp, sp, -16
 ; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
@@ -288,14 +872,14 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB5_2:
+; RV32I-NEXT:  .LBB9_2:
 ; RV32I-NEXT:    li a0, 32
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_cttz_i32:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    beqz a1, .LBB5_2
+; RV64I-NEXT:    beqz a1, .LBB9_2
 ; RV64I-NEXT:  # %bb.1: # %cond.false
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
@@ -325,9 +909,19 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB5_2:
+; RV64I-NEXT:  .LBB9_2:
 ; RV64I-NEXT:    li a0, 32
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_cttz_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    ctz a0, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_cttz_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctzw a0, a0
+; RV64ZBB-NEXT:    ret
   %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 false)
   ret i32 %tmp
 }
@@ -335,7 +929,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
 define i32 @test_ctlz_i32(i32 %a) nounwind {
 ; RV32I-LABEL: test_ctlz_i32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    beqz a0, .LBB6_2
+; RV32I-NEXT:    beqz a0, .LBB10_2
 ; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    addi sp, sp, -16
 ; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
@@ -373,14 +967,14 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB6_2:
+; RV32I-NEXT:  .LBB10_2:
 ; RV32I-NEXT:    li a0, 32
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_ctlz_i32:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    beqz a1, .LBB6_2
+; RV64I-NEXT:    beqz a1, .LBB10_2
 ; RV64I-NEXT:  # %bb.1: # %cond.false
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
@@ -418,9 +1012,19 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB6_2:
+; RV64I-NEXT:  .LBB10_2:
 ; RV64I-NEXT:    li a0, 32
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_ctlz_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    clz a0, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_ctlz_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    clzw a0, a0
+; RV64ZBB-NEXT:    ret
   %tmp = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
   ret i32 %tmp
 }
@@ -478,14 +1082,14 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    and a0, a0, s6
 ; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3@plt
-; RV32I-NEXT:    bnez s2, .LBB7_2
+; RV32I-NEXT:    bnez s2, .LBB11_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    srli a0, a0, 24
 ; RV32I-NEXT:    addi a0, a0, 32
-; RV32I-NEXT:    j .LBB7_3
-; RV32I-NEXT:  .LBB7_2:
+; RV32I-NEXT:    j .LBB11_3
+; RV32I-NEXT:  .LBB11_2:
 ; RV32I-NEXT:    srli a0, s0, 24
-; RV32I-NEXT:  .LBB7_3:
+; RV32I-NEXT:  .LBB11_3:
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -500,39 +1104,57 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ;
 ; RV64I-LABEL: test_cttz_i64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    beqz a0, .LBB7_2
+; RV64I-NEXT:    beqz a0, .LBB11_2
 ; RV64I-NEXT:  # %bb.1: # %cond.false
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    addi a1, a0, -1
 ; RV64I-NEXT:    not a0, a0
 ; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    lui a1, %hi(.LCPI7_0)
-; RV64I-NEXT:    ld a1, %lo(.LCPI7_0)(a1)
-; RV64I-NEXT:    lui a2, %hi(.LCPI7_1)
-; RV64I-NEXT:    ld a2, %lo(.LCPI7_1)(a2)
+; RV64I-NEXT:    lui a1, %hi(.LCPI11_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI11_0)(a1)
+; RV64I-NEXT:    lui a2, %hi(.LCPI11_1)
+; RV64I-NEXT:    ld a2, %lo(.LCPI11_1)(a2)
 ; RV64I-NEXT:    srli a3, a0, 1
 ; RV64I-NEXT:    and a1, a3, a1
 ; RV64I-NEXT:    sub a0, a0, a1
 ; RV64I-NEXT:    and a1, a0, a2
 ; RV64I-NEXT:    srli a0, a0, 2
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a2, %hi(.LCPI7_2)
-; RV64I-NEXT:    ld a2, %lo(.LCPI7_2)(a2)
+; RV64I-NEXT:    lui a2, %hi(.LCPI11_2)
+; RV64I-NEXT:    ld a2, %lo(.LCPI11_2)(a2)
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a1, %hi(.LCPI7_3)
-; RV64I-NEXT:    ld a1, %lo(.LCPI7_3)(a1)
+; RV64I-NEXT:    lui a1, %hi(.LCPI11_3)
+; RV64I-NEXT:    ld a1, %lo(.LCPI11_3)(a1)
 ; RV64I-NEXT:    call __muldi3@plt
 ; RV64I-NEXT:    srli a0, a0, 56
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB7_2:
+; RV64I-NEXT:  .LBB11_2:
 ; RV64I-NEXT:    li a0, 64
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_cttz_i64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    bnez a0, .LBB11_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    ctz a0, a1
+; RV32ZBB-NEXT:    addi a0, a0, 32
+; RV32ZBB-NEXT:    li a1, 0
+; RV32ZBB-NEXT:    ret
+; RV32ZBB-NEXT:  .LBB11_2:
+; RV32ZBB-NEXT:    ctz a0, a0
+; RV32ZBB-NEXT:    li a1, 0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_cttz_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctz a0, a0
+; RV64ZBB-NEXT:    ret
   %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 false)
   ret i64 %tmp
 }
@@ -571,6 +1193,16 @@ define i8 @test_cttz_i8_zero_undef(i8 %a) nounwind {
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 15
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_cttz_i8_zero_undef:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    ctz a0, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_cttz_i8_zero_undef:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctz a0, a0
+; RV64ZBB-NEXT:    ret
   %tmp = call i8 @llvm.cttz.i8(i8 %a, i1 true)
   ret i8 %tmp
 }
@@ -629,6 +1261,16 @@ define i16 @test_cttz_i16_zero_undef(i16 %a) nounwind {
 ; RV64I-NEXT:    slli a0, a0, 51
 ; RV64I-NEXT:    srli a0, a0, 59
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_cttz_i16_zero_undef:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    ctz a0, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_cttz_i16_zero_undef:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctz a0, a0
+; RV64ZBB-NEXT:    ret
   %tmp = call i16 @llvm.cttz.i16(i16 %a, i1 true)
   ret i16 %tmp
 }
@@ -695,6 +1337,16 @@ define i32 @test_cttz_i32_zero_undef(i32 %a) nounwind {
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_cttz_i32_zero_undef:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    ctz a0, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_cttz_i32_zero_undef:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctzw a0, a0
+; RV64ZBB-NEXT:    ret
   %tmp = call i32 @llvm.cttz.i32(i32 %a, i1 true)
   ret i32 %tmp
 }
@@ -752,14 +1404,14 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
 ; RV32I-NEXT:    and a0, a0, s6
 ; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3@plt
-; RV32I-NEXT:    bnez s2, .LBB11_2
+; RV32I-NEXT:    bnez s2, .LBB15_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    srli a0, a0, 24
 ; RV32I-NEXT:    addi a0, a0, 32
-; RV32I-NEXT:    j .LBB11_3
-; RV32I-NEXT:  .LBB11_2:
+; RV32I-NEXT:    j .LBB15_3
+; RV32I-NEXT:  .LBB15_2:
 ; RV32I-NEXT:    srli a0, s0, 24
-; RV32I-NEXT:  .LBB11_3:
+; RV32I-NEXT:  .LBB15_3:
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -779,29 +1431,47 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
 ; RV64I-NEXT:    addi a1, a0, -1
 ; RV64I-NEXT:    not a0, a0
 ; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    lui a1, %hi(.LCPI11_0)
-; RV64I-NEXT:    ld a1, %lo(.LCPI11_0)(a1)
-; RV64I-NEXT:    lui a2, %hi(.LCPI11_1)
-; RV64I-NEXT:    ld a2, %lo(.LCPI11_1)(a2)
+; RV64I-NEXT:    lui a1, %hi(.LCPI15_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI15_0)(a1)
+; RV64I-NEXT:    lui a2, %hi(.LCPI15_1)
+; RV64I-NEXT:    ld a2, %lo(.LCPI15_1)(a2)
 ; RV64I-NEXT:    srli a3, a0, 1
 ; RV64I-NEXT:    and a1, a3, a1
 ; RV64I-NEXT:    sub a0, a0, a1
 ; RV64I-NEXT:    and a1, a0, a2
 ; RV64I-NEXT:    srli a0, a0, 2
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a2, %hi(.LCPI11_2)
-; RV64I-NEXT:    ld a2, %lo(.LCPI11_2)(a2)
+; RV64I-NEXT:    lui a2, %hi(.LCPI15_2)
+; RV64I-NEXT:    ld a2, %lo(.LCPI15_2)(a2)
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a1, %hi(.LCPI11_3)
-; RV64I-NEXT:    ld a1, %lo(.LCPI11_3)(a1)
+; RV64I-NEXT:    lui a1, %hi(.LCPI15_3)
+; RV64I-NEXT:    ld a1, %lo(.LCPI15_3)(a1)
 ; RV64I-NEXT:    call __muldi3@plt
 ; RV64I-NEXT:    srli a0, a0, 56
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_cttz_i64_zero_undef:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    bnez a0, .LBB15_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    ctz a0, a1
+; RV32ZBB-NEXT:    addi a0, a0, 32
+; RV32ZBB-NEXT:    li a1, 0
+; RV32ZBB-NEXT:    ret
+; RV32ZBB-NEXT:  .LBB15_2:
+; RV32ZBB-NEXT:    ctz a0, a0
+; RV32ZBB-NEXT:    li a1, 0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_cttz_i64_zero_undef:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    ctz a0, a0
+; RV64ZBB-NEXT:    ret
   %tmp = call i64 @llvm.cttz.i64(i64 %a, i1 true)
   ret i64 %tmp
 }
@@ -862,6 +1532,16 @@ define i32 @test_ctpop_i32(i32 %a) nounwind {
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_ctpop_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    cpop a0, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_ctpop_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    ret
   %1 = call i32 @llvm.ctpop.i32(i32 %a)
   ret i32 %1
 }
@@ -928,29 +1608,42 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lui a1, %hi(.LCPI13_0)
-; RV64I-NEXT:    ld a1, %lo(.LCPI13_0)(a1)
-; RV64I-NEXT:    lui a2, %hi(.LCPI13_1)
-; RV64I-NEXT:    ld a2, %lo(.LCPI13_1)(a2)
+; RV64I-NEXT:    lui a1, %hi(.LCPI17_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI17_0)(a1)
+; RV64I-NEXT:    lui a2, %hi(.LCPI17_1)
+; RV64I-NEXT:    ld a2, %lo(.LCPI17_1)(a2)
 ; RV64I-NEXT:    srli a3, a0, 1
 ; RV64I-NEXT:    and a1, a3, a1
 ; RV64I-NEXT:    sub a0, a0, a1
 ; RV64I-NEXT:    and a1, a0, a2
 ; RV64I-NEXT:    srli a0, a0, 2
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a2, %hi(.LCPI13_2)
-; RV64I-NEXT:    ld a2, %lo(.LCPI13_2)(a2)
+; RV64I-NEXT:    lui a2, %hi(.LCPI17_2)
+; RV64I-NEXT:    ld a2, %lo(.LCPI17_2)(a2)
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a1, %hi(.LCPI13_3)
-; RV64I-NEXT:    ld a1, %lo(.LCPI13_3)(a1)
+; RV64I-NEXT:    lui a1, %hi(.LCPI17_3)
+; RV64I-NEXT:    ld a1, %lo(.LCPI17_3)(a1)
 ; RV64I-NEXT:    call __muldi3@plt
 ; RV64I-NEXT:    srli a0, a0, 56
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_ctpop_i64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    cpop a1, a1
+; RV32ZBB-NEXT:    cpop a0, a0
+; RV32ZBB-NEXT:    add a0, a0, a1
+; RV32ZBB-NEXT:    li a1, 0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_ctpop_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    cpop a0, a0
+; RV64ZBB-NEXT:    ret
   %1 = call i64 @llvm.ctpop.i64(i64 %a)
   ret i64 %1
 }
@@ -987,6 +1680,18 @@ define i32 @test_parity_i32(i32 %a) {
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 1
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_parity_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    cpop a0, a0
+; RV32ZBB-NEXT:    andi a0, a0, 1
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_parity_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    cpopw a0, a0
+; RV64ZBB-NEXT:    andi a0, a0, 1
+; RV64ZBB-NEXT:    ret
   %1 = call i32 @llvm.ctpop.i32(i32 %a)
   %2 = and i32 %1, 1
   ret i32 %2
@@ -1026,6 +1731,20 @@ define i64 @test_parity_i64(i64 %a) {
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 1
 ; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_parity_i64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    xor a0, a0, a1
+; RV32ZBB-NEXT:    cpop a0, a0
+; RV32ZBB-NEXT:    andi a0, a0, 1
+; RV32ZBB-NEXT:    li a1, 0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_parity_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    cpop a0, a0
+; RV64ZBB-NEXT:    andi a0, a0, 1
+; RV64ZBB-NEXT:    ret
   %1 = call i64 @llvm.ctpop.i64(i64 %a)
   %2 = and i64 %1, 1
   ret i64 %2

From 3575700b286f8c3150abb3de7968a9f36dd1cceb Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sun, 23 Jan 2022 13:37:33 -0800
Subject: [PATCH 310/946] [RISCV] Add tests that do a bitreverse before or
 after a bswap. NFC

We don't optimize this as well as we could. Bitreverse is always
expanded to bswap and a shift/and/or sequence to swap bits within a
byte. The newly created bswap will either becomes a shift/and/or
sequence or rev8 instruction. We don't always realize the bswap is
redundant with another bswap before or after the bitreverse.

Found while thinking about the brev8 instruction from the
Cryptography extension. It's equivalent to bswap(bitreverse(x)) or
bitreverse(bswap(x)).
---
 .../RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll | 1118 ++++++++++++++++-
 1 file changed, 1064 insertions(+), 54 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
index 8a0c0db7aaa38..435ea9c0d80df 100644
--- a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
@@ -691,11 +691,1021 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
   ret i64 %tmp
 }
 
+define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
+; RV32I-LABEL: test_bswap_bitreverse_i16:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    slli a2, a0, 16
+; RV32I-NEXT:    srli a2, a2, 24
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    slli a1, a1, 8
+; RV32I-NEXT:    andi a0, a0, 255
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    lui a2, 1
+; RV32I-NEXT:    addi a2, a2, -241
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    lui a2, 3
+; RV32I-NEXT:    addi a2, a2, 819
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 5
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_bswap_bitreverse_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    slli a2, a0, 48
+; RV64I-NEXT:    srli a2, a2, 56
+; RV64I-NEXT:    or a1, a1, a2
+; RV64I-NEXT:    slli a1, a1, 8
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    lui a2, 1
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    lui a2, 3
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 5
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_bswap_bitreverse_i16:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    srli a0, a0, 16
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    srli a1, a0, 12
+; RV32ZBB-NEXT:    lui a2, 15
+; RV32ZBB-NEXT:    addi a2, a2, 240
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    srli a0, a0, 20
+; RV32ZBB-NEXT:    andi a0, a0, -241
+; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    lui a2, 3
+; RV32ZBB-NEXT:    addi a2, a2, 819
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    lui a2, 5
+; RV32ZBB-NEXT:    addi a2, a2, 1365
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_bswap_bitreverse_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a0, a0, 48
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a1, a0, 44
+; RV64ZBB-NEXT:    lui a2, 15
+; RV64ZBB-NEXT:    addiw a2, a2, 240
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    srli a0, a0, 52
+; RV64ZBB-NEXT:    andi a0, a0, -241
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    srli a1, a0, 2
+; RV64ZBB-NEXT:    lui a2, 3
+; RV64ZBB-NEXT:    addiw a2, a2, 819
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 1
+; RV64ZBB-NEXT:    lui a2, 5
+; RV64ZBB-NEXT:    addiw a2, a2, 1365
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = call i16 @llvm.bswap.i16(i16 %a)
+  %tmp2 = call i16 @llvm.bitreverse.i16(i16 %tmp)
+  ret i16 %tmp2
+}
+
+define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
+; RV32I-LABEL: test_bswap_bitreverse_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a3, a0, 24
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    slli a3, a0, 8
+; RV32I-NEXT:    lui a4, 4080
+; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 24
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    lui a2, 61681
+; RV32I-NEXT:    addi a2, a2, -241
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    lui a2, 209715
+; RV32I-NEXT:    addi a2, a2, 819
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_bswap_bitreverse_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    slli a3, a0, 8
+; RV64I-NEXT:    lui a4, 4080
+; RV64I-NEXT:    and a3, a3, a4
+; RV64I-NEXT:    slliw a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srliw a2, a0, 24
+; RV64I-NEXT:    or a1, a1, a2
+; RV64I-NEXT:    slli a2, a0, 8
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    slliw a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    lui a2, 61681
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slliw a0, a0, 4
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    lui a2, 209715
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slliw a0, a0, 2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slliw a0, a0, 1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_bswap_bitreverse_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    lui a2, 61681
+; RV32ZBB-NEXT:    addi a2, a2, -241
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    lui a2, 209715
+; RV32ZBB-NEXT:    addi a2, a2, 819
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    lui a2, 349525
+; RV32ZBB-NEXT:    addi a2, a2, 1365
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_bswap_bitreverse_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a0, a0, 32
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a1, a0, 36
+; RV64ZBB-NEXT:    lui a2, 61681
+; RV64ZBB-NEXT:    addiw a2, a2, -241
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    srli a0, a0, 28
+; RV64ZBB-NEXT:    lui a2, 986895
+; RV64ZBB-NEXT:    addiw a2, a2, 240
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 2
+; RV64ZBB-NEXT:    lui a2, 209715
+; RV64ZBB-NEXT:    addiw a2, a2, 819
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slliw a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 1
+; RV64ZBB-NEXT:    lui a2, 349525
+; RV64ZBB-NEXT:    addiw a2, a2, 1365
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slliw a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = call i32 @llvm.bswap.i32(i32 %a)
+  %tmp2 = call i32 @llvm.bitreverse.i32(i32 %tmp)
+  ret i32 %tmp2
+}
+
+define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind {
+; RV32I-LABEL: test_bswap_bitreverse_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a2, a1, 8
+; RV32I-NEXT:    lui a3, 16
+; RV32I-NEXT:    addi a3, a3, -256
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    srli a4, a1, 24
+; RV32I-NEXT:    or a2, a2, a4
+; RV32I-NEXT:    slli a4, a1, 8
+; RV32I-NEXT:    lui a5, 4080
+; RV32I-NEXT:    and a4, a4, a5
+; RV32I-NEXT:    slli a1, a1, 24
+; RV32I-NEXT:    or a1, a1, a4
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 8
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    srli a4, a0, 24
+; RV32I-NEXT:    or a2, a2, a4
+; RV32I-NEXT:    slli a4, a0, 8
+; RV32I-NEXT:    and a4, a4, a5
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a4
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    srli a2, a0, 8
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    srli a4, a0, 24
+; RV32I-NEXT:    or a2, a2, a4
+; RV32I-NEXT:    slli a4, a0, 8
+; RV32I-NEXT:    and a4, a4, a5
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a4
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    srli a2, a0, 4
+; RV32I-NEXT:    lui a4, 61681
+; RV32I-NEXT:    addi a4, a4, -241
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a0, 2
+; RV32I-NEXT:    lui a6, 209715
+; RV32I-NEXT:    addi a6, a6, 819
+; RV32I-NEXT:    and a2, a2, a6
+; RV32I-NEXT:    and a0, a0, a6
+; RV32I-NEXT:    slli a0, a0, 2
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a0, 1
+; RV32I-NEXT:    lui a7, 349525
+; RV32I-NEXT:    addi a7, a7, 1365
+; RV32I-NEXT:    and a2, a2, a7
+; RV32I-NEXT:    and a0, a0, a7
+; RV32I-NEXT:    slli a0, a0, 1
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a1, 8
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    srli a3, a1, 24
+; RV32I-NEXT:    or a2, a2, a3
+; RV32I-NEXT:    slli a3, a1, 8
+; RV32I-NEXT:    and a3, a3, a5
+; RV32I-NEXT:    slli a1, a1, 24
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    srli a2, a1, 4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    slli a1, a1, 4
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a2, a1, 2
+; RV32I-NEXT:    and a2, a2, a6
+; RV32I-NEXT:    and a1, a1, a6
+; RV32I-NEXT:    slli a1, a1, 2
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a2, a1, 1
+; RV32I-NEXT:    and a2, a2, a7
+; RV32I-NEXT:    and a1, a1, a7
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_bswap_bitreverse_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 24
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a3, a0, 8
+; RV64I-NEXT:    li a4, 255
+; RV64I-NEXT:    slli a5, a4, 24
+; RV64I-NEXT:    and a3, a3, a5
+; RV64I-NEXT:    or a1, a3, a1
+; RV64I-NEXT:    srli a3, a0, 40
+; RV64I-NEXT:    lui a6, 16
+; RV64I-NEXT:    addiw a6, a6, -256
+; RV64I-NEXT:    and a3, a3, a6
+; RV64I-NEXT:    srli a7, a0, 56
+; RV64I-NEXT:    or a3, a3, a7
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    slli a3, a0, 24
+; RV64I-NEXT:    slli a7, a4, 40
+; RV64I-NEXT:    and a3, a3, a7
+; RV64I-NEXT:    srliw t0, a0, 24
+; RV64I-NEXT:    slli t0, t0, 32
+; RV64I-NEXT:    or a3, a3, t0
+; RV64I-NEXT:    slli t0, a0, 40
+; RV64I-NEXT:    slli a4, a4, 48
+; RV64I-NEXT:    and t0, t0, a4
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, t0
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 40
+; RV64I-NEXT:    and a1, a1, a6
+; RV64I-NEXT:    srli a3, a0, 56
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    srli a3, a0, 24
+; RV64I-NEXT:    and a2, a3, a2
+; RV64I-NEXT:    srli a3, a0, 8
+; RV64I-NEXT:    and a3, a3, a5
+; RV64I-NEXT:    or a2, a3, a2
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    slli a2, a0, 24
+; RV64I-NEXT:    and a2, a2, a7
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    slli a3, a3, 32
+; RV64I-NEXT:    or a2, a2, a3
+; RV64I-NEXT:    slli a3, a0, 40
+; RV64I-NEXT:    and a3, a3, a4
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    lui a3, %hi(.LCPI9_0)
+; RV64I-NEXT:    ld a3, %lo(.LCPI9_0)(a3)
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    and a1, a1, a3
+; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, %hi(.LCPI9_1)
+; RV64I-NEXT:    ld a2, %lo(.LCPI9_1)(a2)
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    lui a2, %hi(.LCPI9_2)
+; RV64I-NEXT:    ld a2, %lo(.LCPI9_2)(a2)
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_bswap_bitreverse_i64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    srli a2, a0, 4
+; RV32ZBB-NEXT:    lui a3, 61681
+; RV32ZBB-NEXT:    addi a3, a3, -241
+; RV32ZBB-NEXT:    and a2, a2, a3
+; RV32ZBB-NEXT:    and a0, a0, a3
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a2, a0
+; RV32ZBB-NEXT:    srli a2, a0, 2
+; RV32ZBB-NEXT:    lui a4, 209715
+; RV32ZBB-NEXT:    addi a4, a4, 819
+; RV32ZBB-NEXT:    and a2, a2, a4
+; RV32ZBB-NEXT:    and a0, a0, a4
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a2, a0
+; RV32ZBB-NEXT:    srli a2, a0, 1
+; RV32ZBB-NEXT:    lui a5, 349525
+; RV32ZBB-NEXT:    addi a5, a5, 1365
+; RV32ZBB-NEXT:    and a2, a2, a5
+; RV32ZBB-NEXT:    and a0, a0, a5
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a2, a0
+; RV32ZBB-NEXT:    srli a2, a1, 4
+; RV32ZBB-NEXT:    and a2, a2, a3
+; RV32ZBB-NEXT:    and a1, a1, a3
+; RV32ZBB-NEXT:    slli a1, a1, 4
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 2
+; RV32ZBB-NEXT:    and a2, a2, a4
+; RV32ZBB-NEXT:    and a1, a1, a4
+; RV32ZBB-NEXT:    slli a1, a1, 2
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 1
+; RV32ZBB-NEXT:    and a2, a2, a5
+; RV32ZBB-NEXT:    and a1, a1, a5
+; RV32ZBB-NEXT:    slli a1, a1, 1
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_bswap_bitreverse_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI9_0)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI9_0)(a1)
+; RV64ZBB-NEXT:    srli a2, a0, 4
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI9_1)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI9_1)(a1)
+; RV64ZBB-NEXT:    slli a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    srli a2, a0, 2
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI9_2)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI9_2)(a1)
+; RV64ZBB-NEXT:    slli a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    srli a2, a0, 1
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = call i64 @llvm.bswap.i64(i64 %a)
+  %tmp2 = call i64 @llvm.bitreverse.i64(i64 %tmp)
+  ret i64 %tmp2
+}
+
+define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
+; RV32I-LABEL: test_bitreverse_bswap_i16:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    lui a2, 1
+; RV32I-NEXT:    addi a2, a2, -241
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    lui a2, 3
+; RV32I-NEXT:    addi a2, a2, 819
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 5
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a0, a0, 1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    slli a0, a0, 8
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_bitreverse_bswap_i16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    lui a2, 1
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    lui a2, 3
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 5
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 8
+; RV64I-NEXT:    slli a0, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_bitreverse_bswap_i16:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    srli a1, a0, 12
+; RV32ZBB-NEXT:    lui a2, 15
+; RV32ZBB-NEXT:    addi a2, a2, 240
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    srli a0, a0, 20
+; RV32ZBB-NEXT:    andi a0, a0, -241
+; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    lui a2, 3
+; RV32ZBB-NEXT:    addi a2, a2, 819
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    lui a2, 5
+; RV32ZBB-NEXT:    addi a2, a2, 1365
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    srli a0, a0, 16
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_bitreverse_bswap_i16:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a1, a0, 44
+; RV64ZBB-NEXT:    lui a2, 15
+; RV64ZBB-NEXT:    addiw a2, a2, 240
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    srli a0, a0, 52
+; RV64ZBB-NEXT:    andi a0, a0, -241
+; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    srli a1, a0, 2
+; RV64ZBB-NEXT:    lui a2, 3
+; RV64ZBB-NEXT:    addiw a2, a2, 819
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 1
+; RV64ZBB-NEXT:    lui a2, 5
+; RV64ZBB-NEXT:    addiw a2, a2, 1365
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a0, a0, 48
+; RV64ZBB-NEXT:    ret
+  %tmp = call i16 @llvm.bitreverse.i16(i16 %a)
+  %tmp2 = call i16 @llvm.bswap.i16(i16 %tmp)
+  ret i16 %tmp2
+}
+
+define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
+; RV32I-LABEL: test_bitreverse_bswap_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a3, a0, 24
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    slli a3, a0, 8
+; RV32I-NEXT:    lui a4, 4080
+; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    lui a3, 61681
+; RV32I-NEXT:    addi a3, a3, -241
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    lui a3, 209715
+; RV32I-NEXT:    addi a3, a3, 819
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    slli a0, a0, 2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a3, 349525
+; RV32I-NEXT:    addi a3, a3, 1365
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    slli a0, a0, 1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 24
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_bitreverse_bswap_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    slli a3, a0, 8
+; RV64I-NEXT:    lui a4, 4080
+; RV64I-NEXT:    and a3, a3, a4
+; RV64I-NEXT:    slliw a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    lui a3, 61681
+; RV64I-NEXT:    addiw a3, a3, -241
+; RV64I-NEXT:    and a1, a1, a3
+; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    slliw a0, a0, 4
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    lui a3, 209715
+; RV64I-NEXT:    addiw a3, a3, 819
+; RV64I-NEXT:    and a1, a1, a3
+; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    slliw a0, a0, 2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a3, 349525
+; RV64I-NEXT:    addiw a3, a3, 1365
+; RV64I-NEXT:    and a1, a1, a3
+; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    slliw a0, a0, 1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srliw a1, a0, 8
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srliw a2, a0, 24
+; RV64I-NEXT:    or a1, a1, a2
+; RV64I-NEXT:    slli a2, a0, 8
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    slliw a0, a0, 24
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_bitreverse_bswap_i32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    lui a2, 61681
+; RV32ZBB-NEXT:    addi a2, a2, -241
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 2
+; RV32ZBB-NEXT:    lui a2, 209715
+; RV32ZBB-NEXT:    addi a2, a2, 819
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    srli a1, a0, 1
+; RV32ZBB-NEXT:    lui a2, 349525
+; RV32ZBB-NEXT:    addi a2, a2, 1365
+; RV32ZBB-NEXT:    and a1, a1, a2
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a1, a0
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_bitreverse_bswap_i32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a1, a0, 36
+; RV64ZBB-NEXT:    lui a2, 61681
+; RV64ZBB-NEXT:    addiw a2, a2, -241
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    srli a0, a0, 28
+; RV64ZBB-NEXT:    lui a2, 986895
+; RV64ZBB-NEXT:    addiw a2, a2, 240
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 2
+; RV64ZBB-NEXT:    lui a2, 209715
+; RV64ZBB-NEXT:    addiw a2, a2, 819
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slliw a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    srli a1, a0, 1
+; RV64ZBB-NEXT:    lui a2, 349525
+; RV64ZBB-NEXT:    addiw a2, a2, 1365
+; RV64ZBB-NEXT:    and a1, a1, a2
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a1, a0
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a0, a0, 32
+; RV64ZBB-NEXT:    ret
+  %tmp = call i32 @llvm.bitreverse.i32(i32 %a)
+  %tmp2 = call i32 @llvm.bswap.i32(i32 %tmp)
+  ret i32 %tmp2
+}
+
+define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind {
+; RV32I-LABEL: test_bitreverse_bswap_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a3, a1, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a3, a3, a2
+; RV32I-NEXT:    srli a4, a1, 24
+; RV32I-NEXT:    or a4, a3, a4
+; RV32I-NEXT:    slli a5, a1, 8
+; RV32I-NEXT:    lui a3, 4080
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    slli a1, a1, 24
+; RV32I-NEXT:    or a1, a1, a5
+; RV32I-NEXT:    or a1, a1, a4
+; RV32I-NEXT:    srli a4, a1, 4
+; RV32I-NEXT:    lui a5, 61681
+; RV32I-NEXT:    addi a5, a5, -241
+; RV32I-NEXT:    and a4, a4, a5
+; RV32I-NEXT:    and a1, a1, a5
+; RV32I-NEXT:    slli a1, a1, 4
+; RV32I-NEXT:    or a1, a4, a1
+; RV32I-NEXT:    srli a4, a1, 2
+; RV32I-NEXT:    lui a6, 209715
+; RV32I-NEXT:    addi a6, a6, 819
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    and a1, a1, a6
+; RV32I-NEXT:    slli a1, a1, 2
+; RV32I-NEXT:    or a1, a4, a1
+; RV32I-NEXT:    srli a4, a1, 1
+; RV32I-NEXT:    lui a7, 349525
+; RV32I-NEXT:    addi a7, a7, 1365
+; RV32I-NEXT:    and a4, a4, a7
+; RV32I-NEXT:    and a1, a1, a7
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    or a1, a4, a1
+; RV32I-NEXT:    srli a4, a0, 8
+; RV32I-NEXT:    and a4, a4, a2
+; RV32I-NEXT:    srli t0, a0, 24
+; RV32I-NEXT:    or a4, a4, t0
+; RV32I-NEXT:    slli t0, a0, 8
+; RV32I-NEXT:    and t0, t0, a3
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, t0
+; RV32I-NEXT:    or a0, a0, a4
+; RV32I-NEXT:    srli a4, a0, 4
+; RV32I-NEXT:    and a4, a4, a5
+; RV32I-NEXT:    and a0, a0, a5
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    or a0, a4, a0
+; RV32I-NEXT:    srli a4, a0, 2
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    and a0, a0, a6
+; RV32I-NEXT:    slli a0, a0, 2
+; RV32I-NEXT:    or a0, a4, a0
+; RV32I-NEXT:    srli a4, a0, 1
+; RV32I-NEXT:    and a4, a4, a7
+; RV32I-NEXT:    and a0, a0, a7
+; RV32I-NEXT:    slli a0, a0, 1
+; RV32I-NEXT:    or a0, a4, a0
+; RV32I-NEXT:    srli a4, a0, 8
+; RV32I-NEXT:    and a4, a4, a2
+; RV32I-NEXT:    srli a5, a0, 24
+; RV32I-NEXT:    or a4, a4, a5
+; RV32I-NEXT:    slli a5, a0, 8
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a5
+; RV32I-NEXT:    or a0, a0, a4
+; RV32I-NEXT:    srli a4, a1, 8
+; RV32I-NEXT:    and a2, a4, a2
+; RV32I-NEXT:    srli a4, a1, 24
+; RV32I-NEXT:    or a2, a2, a4
+; RV32I-NEXT:    slli a4, a1, 8
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    slli a1, a1, 24
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: test_bitreverse_bswap_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 24
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a3, a0, 8
+; RV64I-NEXT:    li a4, 255
+; RV64I-NEXT:    slli a5, a4, 24
+; RV64I-NEXT:    and a3, a3, a5
+; RV64I-NEXT:    or a1, a3, a1
+; RV64I-NEXT:    srli a3, a0, 40
+; RV64I-NEXT:    lui a6, 16
+; RV64I-NEXT:    addiw a6, a6, -256
+; RV64I-NEXT:    and a3, a3, a6
+; RV64I-NEXT:    srli a7, a0, 56
+; RV64I-NEXT:    or a3, a3, a7
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    slli a3, a0, 24
+; RV64I-NEXT:    slli a7, a4, 40
+; RV64I-NEXT:    and a3, a3, a7
+; RV64I-NEXT:    srliw t0, a0, 24
+; RV64I-NEXT:    slli t0, t0, 32
+; RV64I-NEXT:    or a3, a3, t0
+; RV64I-NEXT:    slli t0, a0, 40
+; RV64I-NEXT:    slli a4, a4, 48
+; RV64I-NEXT:    and t0, t0, a4
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, t0
+; RV64I-NEXT:    lui t0, %hi(.LCPI12_0)
+; RV64I-NEXT:    ld t0, %lo(.LCPI12_0)(t0)
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    and a1, a1, t0
+; RV64I-NEXT:    and a0, a0, t0
+; RV64I-NEXT:    lui a3, %hi(.LCPI12_1)
+; RV64I-NEXT:    ld a3, %lo(.LCPI12_1)(a3)
+; RV64I-NEXT:    slli a0, a0, 4
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    and a1, a1, a3
+; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a3, %hi(.LCPI12_2)
+; RV64I-NEXT:    ld a3, %lo(.LCPI12_2)(a3)
+; RV64I-NEXT:    slli a0, a0, 2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    and a1, a1, a3
+; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 40
+; RV64I-NEXT:    and a1, a1, a6
+; RV64I-NEXT:    srli a3, a0, 56
+; RV64I-NEXT:    or a1, a1, a3
+; RV64I-NEXT:    srli a3, a0, 24
+; RV64I-NEXT:    and a2, a3, a2
+; RV64I-NEXT:    srli a3, a0, 8
+; RV64I-NEXT:    and a3, a3, a5
+; RV64I-NEXT:    or a2, a3, a2
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    slli a2, a0, 24
+; RV64I-NEXT:    and a2, a2, a7
+; RV64I-NEXT:    srliw a3, a0, 24
+; RV64I-NEXT:    slli a3, a3, 32
+; RV64I-NEXT:    or a2, a2, a3
+; RV64I-NEXT:    slli a3, a0, 40
+; RV64I-NEXT:    and a3, a3, a4
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV32ZBB-LABEL: test_bitreverse_bswap_i64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    rev8 a1, a1
+; RV32ZBB-NEXT:    srli a2, a1, 4
+; RV32ZBB-NEXT:    lui a3, 61681
+; RV32ZBB-NEXT:    addi a3, a3, -241
+; RV32ZBB-NEXT:    and a2, a2, a3
+; RV32ZBB-NEXT:    and a1, a1, a3
+; RV32ZBB-NEXT:    slli a1, a1, 4
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 2
+; RV32ZBB-NEXT:    lui a4, 209715
+; RV32ZBB-NEXT:    addi a4, a4, 819
+; RV32ZBB-NEXT:    and a2, a2, a4
+; RV32ZBB-NEXT:    and a1, a1, a4
+; RV32ZBB-NEXT:    slli a1, a1, 2
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 1
+; RV32ZBB-NEXT:    lui a5, 349525
+; RV32ZBB-NEXT:    addi a5, a5, 1365
+; RV32ZBB-NEXT:    and a2, a2, a5
+; RV32ZBB-NEXT:    and a1, a1, a5
+; RV32ZBB-NEXT:    slli a1, a1, 1
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    srli a2, a0, 4
+; RV32ZBB-NEXT:    and a2, a2, a3
+; RV32ZBB-NEXT:    and a0, a0, a3
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a2, a0
+; RV32ZBB-NEXT:    srli a2, a0, 2
+; RV32ZBB-NEXT:    and a2, a2, a4
+; RV32ZBB-NEXT:    and a0, a0, a4
+; RV32ZBB-NEXT:    slli a0, a0, 2
+; RV32ZBB-NEXT:    or a0, a2, a0
+; RV32ZBB-NEXT:    srli a2, a0, 1
+; RV32ZBB-NEXT:    and a2, a2, a5
+; RV32ZBB-NEXT:    and a0, a0, a5
+; RV32ZBB-NEXT:    slli a0, a0, 1
+; RV32ZBB-NEXT:    or a0, a2, a0
+; RV32ZBB-NEXT:    rev8 a0, a0
+; RV32ZBB-NEXT:    rev8 a1, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64ZBB-LABEL: test_bitreverse_bswap_i64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI12_0)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI12_0)(a1)
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    srli a2, a0, 4
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI12_1)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI12_1)(a1)
+; RV64ZBB-NEXT:    slli a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    srli a2, a0, 2
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    lui a1, %hi(.LCPI12_2)
+; RV64ZBB-NEXT:    ld a1, %lo(.LCPI12_2)(a1)
+; RV64ZBB-NEXT:    slli a0, a0, 2
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    srli a2, a0, 1
+; RV64ZBB-NEXT:    and a2, a2, a1
+; RV64ZBB-NEXT:    and a0, a0, a1
+; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    or a0, a2, a0
+; RV64ZBB-NEXT:    rev8 a0, a0
+; RV64ZBB-NEXT:    ret
+  %tmp = call i64 @llvm.bitreverse.i64(i64 %a)
+  %tmp2 = call i64 @llvm.bswap.i64(i64 %tmp)
+  ret i64 %tmp2
+}
+
 define i8 @test_cttz_i8(i8 %a) nounwind {
 ; RV32I-LABEL: test_cttz_i8:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    andi a1, a0, 255
-; RV32I-NEXT:    beqz a1, .LBB7_2
+; RV32I-NEXT:    beqz a1, .LBB13_2
 ; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    addi a1, a0, -1
 ; RV32I-NEXT:    not a0, a0
@@ -711,14 +1721,14 @@ define i8 @test_cttz_i8(i8 %a) nounwind {
 ; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    andi a0, a0, 15
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB7_2:
+; RV32I-NEXT:  .LBB13_2:
 ; RV32I-NEXT:    li a0, 8
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_cttz_i8:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    andi a1, a0, 255
-; RV64I-NEXT:    beqz a1, .LBB7_2
+; RV64I-NEXT:    beqz a1, .LBB13_2
 ; RV64I-NEXT:  # %bb.1: # %cond.false
 ; RV64I-NEXT:    addi a1, a0, -1
 ; RV64I-NEXT:    not a0, a0
@@ -734,7 +1744,7 @@ define i8 @test_cttz_i8(i8 %a) nounwind {
 ; RV64I-NEXT:    addw a0, a0, a1
 ; RV64I-NEXT:    andi a0, a0, 15
 ; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB7_2:
+; RV64I-NEXT:  .LBB13_2:
 ; RV64I-NEXT:    li a0, 8
 ; RV64I-NEXT:    ret
 ;
@@ -758,7 +1768,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    slli a1, a0, 16
 ; RV32I-NEXT:    srli a1, a1, 16
-; RV32I-NEXT:    beqz a1, .LBB8_2
+; RV32I-NEXT:    beqz a1, .LBB14_2
 ; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    addi a1, a0, -1
 ; RV32I-NEXT:    not a0, a0
@@ -784,7 +1794,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
 ; RV32I-NEXT:    slli a0, a0, 19
 ; RV32I-NEXT:    srli a0, a0, 27
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB8_2:
+; RV32I-NEXT:  .LBB14_2:
 ; RV32I-NEXT:    li a0, 16
 ; RV32I-NEXT:    ret
 ;
@@ -792,7 +1802,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    slli a1, a0, 48
 ; RV64I-NEXT:    srli a1, a1, 48
-; RV64I-NEXT:    beqz a1, .LBB8_2
+; RV64I-NEXT:    beqz a1, .LBB14_2
 ; RV64I-NEXT:  # %bb.1: # %cond.false
 ; RV64I-NEXT:    addi a1, a0, -1
 ; RV64I-NEXT:    not a0, a0
@@ -818,7 +1828,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
 ; RV64I-NEXT:    slli a0, a0, 51
 ; RV64I-NEXT:    srli a0, a0, 59
 ; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB8_2:
+; RV64I-NEXT:  .LBB14_2:
 ; RV64I-NEXT:    li a0, 16
 ; RV64I-NEXT:    ret
 ;
@@ -842,7 +1852,7 @@ define i16 @test_cttz_i16(i16 %a) nounwind {
 define i32 @test_cttz_i32(i32 %a) nounwind {
 ; RV32I-LABEL: test_cttz_i32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    beqz a0, .LBB9_2
+; RV32I-NEXT:    beqz a0, .LBB15_2
 ; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    addi sp, sp, -16
 ; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
@@ -872,14 +1882,14 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB9_2:
+; RV32I-NEXT:  .LBB15_2:
 ; RV32I-NEXT:    li a0, 32
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_cttz_i32:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    beqz a1, .LBB9_2
+; RV64I-NEXT:    beqz a1, .LBB15_2
 ; RV64I-NEXT:  # %bb.1: # %cond.false
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
@@ -909,7 +1919,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB9_2:
+; RV64I-NEXT:  .LBB15_2:
 ; RV64I-NEXT:    li a0, 32
 ; RV64I-NEXT:    ret
 ;
@@ -929,7 +1939,7 @@ define i32 @test_cttz_i32(i32 %a) nounwind {
 define i32 @test_ctlz_i32(i32 %a) nounwind {
 ; RV32I-LABEL: test_ctlz_i32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    beqz a0, .LBB10_2
+; RV32I-NEXT:    beqz a0, .LBB16_2
 ; RV32I-NEXT:  # %bb.1: # %cond.false
 ; RV32I-NEXT:    addi sp, sp, -16
 ; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
@@ -967,14 +1977,14 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
-; RV32I-NEXT:  .LBB10_2:
+; RV32I-NEXT:  .LBB16_2:
 ; RV32I-NEXT:    li a0, 32
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_ctlz_i32:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sext.w a1, a0
-; RV64I-NEXT:    beqz a1, .LBB10_2
+; RV64I-NEXT:    beqz a1, .LBB16_2
 ; RV64I-NEXT:  # %bb.1: # %cond.false
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
@@ -1012,7 +2022,7 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB10_2:
+; RV64I-NEXT:  .LBB16_2:
 ; RV64I-NEXT:    li a0, 32
 ; RV64I-NEXT:    ret
 ;
@@ -1082,14 +2092,14 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    and a0, a0, s6
 ; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3@plt
-; RV32I-NEXT:    bnez s2, .LBB11_2
+; RV32I-NEXT:    bnez s2, .LBB17_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    srli a0, a0, 24
 ; RV32I-NEXT:    addi a0, a0, 32
-; RV32I-NEXT:    j .LBB11_3
-; RV32I-NEXT:  .LBB11_2:
+; RV32I-NEXT:    j .LBB17_3
+; RV32I-NEXT:  .LBB17_2:
 ; RV32I-NEXT:    srli a0, s0, 24
-; RV32I-NEXT:  .LBB11_3:
+; RV32I-NEXT:  .LBB17_3:
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -1104,49 +2114,49 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ;
 ; RV64I-LABEL: test_cttz_i64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    beqz a0, .LBB11_2
+; RV64I-NEXT:    beqz a0, .LBB17_2
 ; RV64I-NEXT:  # %bb.1: # %cond.false
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    addi a1, a0, -1
 ; RV64I-NEXT:    not a0, a0
 ; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    lui a1, %hi(.LCPI11_0)
-; RV64I-NEXT:    ld a1, %lo(.LCPI11_0)(a1)
-; RV64I-NEXT:    lui a2, %hi(.LCPI11_1)
-; RV64I-NEXT:    ld a2, %lo(.LCPI11_1)(a2)
+; RV64I-NEXT:    lui a1, %hi(.LCPI17_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI17_0)(a1)
+; RV64I-NEXT:    lui a2, %hi(.LCPI17_1)
+; RV64I-NEXT:    ld a2, %lo(.LCPI17_1)(a2)
 ; RV64I-NEXT:    srli a3, a0, 1
 ; RV64I-NEXT:    and a1, a3, a1
 ; RV64I-NEXT:    sub a0, a0, a1
 ; RV64I-NEXT:    and a1, a0, a2
 ; RV64I-NEXT:    srli a0, a0, 2
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a2, %hi(.LCPI11_2)
-; RV64I-NEXT:    ld a2, %lo(.LCPI11_2)(a2)
+; RV64I-NEXT:    lui a2, %hi(.LCPI17_2)
+; RV64I-NEXT:    ld a2, %lo(.LCPI17_2)(a2)
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a1, %hi(.LCPI11_3)
-; RV64I-NEXT:    ld a1, %lo(.LCPI11_3)(a1)
+; RV64I-NEXT:    lui a1, %hi(.LCPI17_3)
+; RV64I-NEXT:    ld a1, %lo(.LCPI17_3)(a1)
 ; RV64I-NEXT:    call __muldi3@plt
 ; RV64I-NEXT:    srli a0, a0, 56
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
-; RV64I-NEXT:  .LBB11_2:
+; RV64I-NEXT:  .LBB17_2:
 ; RV64I-NEXT:    li a0, 64
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: test_cttz_i64:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    bnez a0, .LBB11_2
+; RV32ZBB-NEXT:    bnez a0, .LBB17_2
 ; RV32ZBB-NEXT:  # %bb.1:
 ; RV32ZBB-NEXT:    ctz a0, a1
 ; RV32ZBB-NEXT:    addi a0, a0, 32
 ; RV32ZBB-NEXT:    li a1, 0
 ; RV32ZBB-NEXT:    ret
-; RV32ZBB-NEXT:  .LBB11_2:
+; RV32ZBB-NEXT:  .LBB17_2:
 ; RV32ZBB-NEXT:    ctz a0, a0
 ; RV32ZBB-NEXT:    li a1, 0
 ; RV32ZBB-NEXT:    ret
@@ -1404,14 +2414,14 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
 ; RV32I-NEXT:    and a0, a0, s6
 ; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3@plt
-; RV32I-NEXT:    bnez s2, .LBB15_2
+; RV32I-NEXT:    bnez s2, .LBB21_2
 ; RV32I-NEXT:  # %bb.1:
 ; RV32I-NEXT:    srli a0, a0, 24
 ; RV32I-NEXT:    addi a0, a0, 32
-; RV32I-NEXT:    j .LBB15_3
-; RV32I-NEXT:  .LBB15_2:
+; RV32I-NEXT:    j .LBB21_3
+; RV32I-NEXT:  .LBB21_2:
 ; RV32I-NEXT:    srli a0, s0, 24
-; RV32I-NEXT:  .LBB15_3:
+; RV32I-NEXT:  .LBB21_3:
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -1431,24 +2441,24 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
 ; RV64I-NEXT:    addi a1, a0, -1
 ; RV64I-NEXT:    not a0, a0
 ; RV64I-NEXT:    and a0, a0, a1
-; RV64I-NEXT:    lui a1, %hi(.LCPI15_0)
-; RV64I-NEXT:    ld a1, %lo(.LCPI15_0)(a1)
-; RV64I-NEXT:    lui a2, %hi(.LCPI15_1)
-; RV64I-NEXT:    ld a2, %lo(.LCPI15_1)(a2)
+; RV64I-NEXT:    lui a1, %hi(.LCPI21_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI21_0)(a1)
+; RV64I-NEXT:    lui a2, %hi(.LCPI21_1)
+; RV64I-NEXT:    ld a2, %lo(.LCPI21_1)(a2)
 ; RV64I-NEXT:    srli a3, a0, 1
 ; RV64I-NEXT:    and a1, a3, a1
 ; RV64I-NEXT:    sub a0, a0, a1
 ; RV64I-NEXT:    and a1, a0, a2
 ; RV64I-NEXT:    srli a0, a0, 2
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a2, %hi(.LCPI15_2)
-; RV64I-NEXT:    ld a2, %lo(.LCPI15_2)(a2)
+; RV64I-NEXT:    lui a2, %hi(.LCPI21_2)
+; RV64I-NEXT:    ld a2, %lo(.LCPI21_2)(a2)
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a1, %hi(.LCPI15_3)
-; RV64I-NEXT:    ld a1, %lo(.LCPI15_3)(a1)
+; RV64I-NEXT:    lui a1, %hi(.LCPI21_3)
+; RV64I-NEXT:    ld a1, %lo(.LCPI21_3)(a1)
 ; RV64I-NEXT:    call __muldi3@plt
 ; RV64I-NEXT:    srli a0, a0, 56
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
@@ -1457,13 +2467,13 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
 ;
 ; RV32ZBB-LABEL: test_cttz_i64_zero_undef:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    bnez a0, .LBB15_2
+; RV32ZBB-NEXT:    bnez a0, .LBB21_2
 ; RV32ZBB-NEXT:  # %bb.1:
 ; RV32ZBB-NEXT:    ctz a0, a1
 ; RV32ZBB-NEXT:    addi a0, a0, 32
 ; RV32ZBB-NEXT:    li a1, 0
 ; RV32ZBB-NEXT:    ret
-; RV32ZBB-NEXT:  .LBB15_2:
+; RV32ZBB-NEXT:  .LBB21_2:
 ; RV32ZBB-NEXT:    ctz a0, a0
 ; RV32ZBB-NEXT:    li a1, 0
 ; RV32ZBB-NEXT:    ret
@@ -1608,24 +2618,24 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lui a1, %hi(.LCPI17_0)
-; RV64I-NEXT:    ld a1, %lo(.LCPI17_0)(a1)
-; RV64I-NEXT:    lui a2, %hi(.LCPI17_1)
-; RV64I-NEXT:    ld a2, %lo(.LCPI17_1)(a2)
+; RV64I-NEXT:    lui a1, %hi(.LCPI23_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI23_0)(a1)
+; RV64I-NEXT:    lui a2, %hi(.LCPI23_1)
+; RV64I-NEXT:    ld a2, %lo(.LCPI23_1)(a2)
 ; RV64I-NEXT:    srli a3, a0, 1
 ; RV64I-NEXT:    and a1, a3, a1
 ; RV64I-NEXT:    sub a0, a0, a1
 ; RV64I-NEXT:    and a1, a0, a2
 ; RV64I-NEXT:    srli a0, a0, 2
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a2, %hi(.LCPI17_2)
-; RV64I-NEXT:    ld a2, %lo(.LCPI17_2)(a2)
+; RV64I-NEXT:    lui a2, %hi(.LCPI23_2)
+; RV64I-NEXT:    ld a2, %lo(.LCPI23_2)(a2)
 ; RV64I-NEXT:    add a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a1, %hi(.LCPI17_3)
-; RV64I-NEXT:    ld a1, %lo(.LCPI17_3)(a1)
+; RV64I-NEXT:    lui a1, %hi(.LCPI23_3)
+; RV64I-NEXT:    ld a1, %lo(.LCPI23_3)(a1)
 ; RV64I-NEXT:    call __muldi3@plt
 ; RV64I-NEXT:    srli a0, a0, 56
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload

From 47d7e922d8438c801198a2901a01fcd2cfbdb353 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 23 Jan 2022 13:59:59 -0800
Subject: [PATCH 311/946] [mlir] Ensure a newline at the end of a file (NFC)

---
 mlir/lib/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/CMakeLists.txt b/mlir/lib/CMakeLists.txt
index 467b0ae33dd87..05bfbb47e28e7 100644
--- a/mlir/lib/CMakeLists.txt
+++ b/mlir/lib/CMakeLists.txt
@@ -20,4 +20,4 @@ add_subdirectory(Translation)
 # Only enable the ExecutionEngine if the native target is configured in.
 if(TARGET ${LLVM_NATIVE_ARCH})
   add_subdirectory(ExecutionEngine)
-endif()
\ No newline at end of file
+endif()

From fa90fc6e0566a245cafa0afa4da4967cf4831779 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 23 Jan 2022 14:00:01 -0800
Subject: [PATCH 312/946] [Sema] Fix a bugprone argument comment (NFC)

Identified with bugprone-argument-comment.
---
 clang/lib/Sema/SemaTemplateDeduction.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index 8e1f1d294d6ec..22dd395d99439 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -5355,7 +5355,7 @@ static bool isAtLeastAsSpecializedAs(Sema &S, QualType T1, QualType T2,
   bool AtLeastAsSpecialized;
   S.runWithSufficientStackSpace(Info.getLocation(), [&] {
     AtLeastAsSpecialized = !FinishTemplateArgumentDeduction(
-        S, P2, /*PartialOrdering=*/true,
+        S, P2, /*IsPartialOrdering=*/true,
         TemplateArgumentList(TemplateArgumentList::OnStack,
                              TST1->template_arguments()),
         Deduced, Info);

From 448d0dfab701ab00d081136d8373304fd91693a0 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 23 Jan 2022 14:00:03 -0800
Subject: [PATCH 313/946] [Analysis] Remove a redundant const from a return
 type (NFC)

Identified with readability-const-return-type.
---
 llvm/lib/Analysis/ScalarEvolution.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index e2d5df84be52f..3019ff526b66d 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3486,7 +3486,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
   return S;
 }
 
-static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
+const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) {
   APInt A = C1->getAPInt().abs();
   APInt B = C2->getAPInt().abs();
   uint32_t ABW = A.getBitWidth();

From ab4756338c5b2216d52d9152b2f7e65f233c4dac Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Fri, 21 Jan 2022 20:20:39 -0800
Subject: [PATCH 314/946] DebugInfo: Don't put types in type units if they
 reference internal linkage types

Doing this causes a declaration of the internal linkage (anonymous
namespace) type to be emitted in the type unit, which would then be
ambiguous as to which internal linkage definition it refers to (since
the name is only valid internally).

It's possible these internal linkage types could be resolved relative to
the unit the TU is referred to from - but that doesn't seem ideal, and
there's no reason to put the type in a type unit since it can only be
defined in one CU anyway (since otherwise it'd be an ODR violation) & so
avoiding the type unit should be a smaller DWARF encoding anyway.

This also addresses an issue with Simplified Template Names where the
template parameter could not be rebuilt from the declaration emitted
into the TU (specifically for an enum non-type template parameter, where
looking up the enumerators is necessary to rebuild the full template
name)
---
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 12 +++--
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h   |  3 ++
 llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp  | 22 +++++++--
 llvm/test/DebugInfo/X86/tu-to-non-tu.ll    | 54 +++++++++++-----------
 4 files changed, 58 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 609b568f28beb..680b9586228f7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -3367,7 +3367,8 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
   // Fast path if we're building some type units and one has already used the
   // address pool we know we're going to throw away all this work anyway, so
   // don't bother building dependent types.
-  if (!TypeUnitsUnderConstruction.empty() && AddrPool.hasBeenUsed())
+  if (!TypeUnitsUnderConstruction.empty() &&
+      (AddrPool.hasBeenUsed() || SeenLocalType))
     return;
 
   auto Ins = TypeSignatures.insert(std::make_pair(CTy, 0));
@@ -3378,6 +3379,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
 
   bool TopLevelType = TypeUnitsUnderConstruction.empty();
   AddrPool.resetUsedFlag();
+  SeenLocalType = false;
 
   auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
                                                     getDwoLineTable(CU));
@@ -3421,7 +3423,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
 
     // Types referencing entries in the address table cannot be placed in type
     // units.
-    if (AddrPool.hasBeenUsed()) {
+    if (AddrPool.hasBeenUsed() || SeenLocalType) {
 
       // Remove all the types built while building this type.
       // This is pessimistic as some of these types might not be dependent on
@@ -3449,14 +3451,18 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
 
 DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD)
     : DD(DD),
-      TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)), AddrPoolUsed(DD->AddrPool.hasBeenUsed()) {
+      TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)),
+      AddrPoolUsed(DD->AddrPool.hasBeenUsed()),
+      SeenLocalType(DD->SeenLocalType) {
   DD->TypeUnitsUnderConstruction.clear();
   DD->AddrPool.resetUsedFlag();
+  DD->SeenLocalType = false;
 }
 
 DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() {
   DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction);
   DD->AddrPool.resetUsedFlag(AddrPoolUsed);
+  DD->SeenLocalType = SeenLocalType;
 }
 
 DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 4e1a1b1e068df..0043000652e89 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -433,6 +433,7 @@ class DwarfDebug : public DebugHandlerBase {
   DenseMap<const DIStringType *, unsigned> StringTypeLocMap;
 
   AddressPool AddrPool;
+  bool SeenLocalType = false;
 
   /// Accelerator tables.
   AccelTable<DWARF5AccelTableData> AccelDebugNames;
@@ -671,6 +672,7 @@ class DwarfDebug : public DebugHandlerBase {
     DwarfDebug *DD;
     decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
     bool AddrPoolUsed;
+    bool SeenLocalType;
     friend class DwarfDebug;
     NonTypeUnitContext(DwarfDebug *DD);
   public:
@@ -679,6 +681,7 @@ class DwarfDebug : public DebugHandlerBase {
   };
 
   NonTypeUnitContext enterNonTypeUnitContext();
+  void seenLocalType() { SeenLocalType = true; }
 
   /// Add a label so that arange data can be generated for it.
   void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 6b2eb8f2bf1d9..956ed33b9eaa1 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -597,10 +597,8 @@ DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
       // Skip updating the accelerator tables since this is not the full type.
       if (MDString *TypeId = CTy->getRawIdentifier())
         DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
-      else {
-        auto X = DD->enterNonTypeUnitContext();
+      else
         finishNonUnitTypeDIE(TyDIE, CTy);
-      }
       return &TyDIE;
     }
     constructTypeDIE(TyDIE, CTy);
@@ -1842,5 +1840,23 @@ void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
   StringRef Name = CTy->getName();
   if (!Name.empty())
     addString(D, dwarf::DW_AT_name, Name);
+  // If the type is in an anonymous namespace, we can't reference it from a TU
+  // (since the type would be CU local and the TU doesn't specify which TU has
+  // the appropriate type definition) - so flag this emission as such and skip
+  // the rest of the emission now since we're going to throw out all this work
+  // and put the outer/referencing type in the CU instead.
+  // FIXME: Probably good to generalize this to a DICompositeType flag populated
+  // by the frontend, then we could use that to have types that can have
+  // decl+def merged by LTO but where the definition still doesn't go in a type
+  // unit because the type has only one definition.
+  for (DIScope *S = CTy->getScope(); S; S = S->getScope()) {
+    if (auto *NS = dyn_cast<DINamespace>(S)) {
+      if (NS->getName().empty()) {
+        DD->seenLocalType();
+        break;
+      }
+    }
+  }
+  auto X = DD->enterNonTypeUnitContext();
   getCU().createTypeDIE(CTy);
 }
diff --git a/llvm/test/DebugInfo/X86/tu-to-non-tu.ll b/llvm/test/DebugInfo/X86/tu-to-non-tu.ll
index cdd4f52fc7252..6b9e0d9e60999 100644
--- a/llvm/test/DebugInfo/X86/tu-to-non-tu.ll
+++ b/llvm/test/DebugInfo/X86/tu-to-non-tu.ll
@@ -1,28 +1,24 @@
 ; RUN: llc -filetype=obj -O0 -generate-type-units -mtriple=x86_64-unknown-linux-gnu < %s \
 ; RUN:     | llvm-dwarfdump -debug-info -debug-types - | FileCheck %s
 
-; Test that a type unit referencing a non-type unit (in this case, it's
-; bordering on an ODR violation - a type with linkage references a type without
-; linkage, so there's no way for the first type to be defined in more than one
-; translation unit, so there's no need for it to be in a type unit - but this
-; is quirky/rare and an easy way to test a broader issue). The type unit should
-; not end up with a whole definition of the referenced type - instead it should
-; have a declaration of the type, while the definition remains in the primary
-; CU.
-; (again, arguably in this instance - since the type is only referenced once, it
-; could go in the TU only - but that requires tracking usage & then deciding
-; where to put types, which isn't worthwhile right now)
+; Test that a type unit referencing a non-type unit produces a declaration of
+; the referent in the referee.
+
+; Also check that an attempt to reference an internal linkage (defined in an anonymous
+; namespace) type from a type unit (could happen with a pimpl idiom, for instance -
+; it does mean the linkage-having type can only be defined in one translation
+; unit anyway) forces the referent to not be placed in a type unit (because the
+; declaration of the internal linkage type would be ambiguous/wouldn't allow a
+; consumer to find the definition with certainty)
 
 ; CHECK: Type Unit:
 
 ; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name {{.*}}"bar"
+; CHECK-NEXT: DW_AT_name {{.*}}"t1"
 
-; CHECK: DW_TAG_namespace
-; CHECK-NOT: {{DW_AT_name|DW_TAG}}
 ; CHECK: DW_TAG_structure_type
 ; CHECK-NEXT: DW_AT_declaration
-; CHECK-NEXT: DW_AT_name {{.*}}"foo"
+; CHECK-NEXT: DW_AT_name {{.*}}"t2"
 
 ; CHECK: Compile Unit:
 
@@ -30,32 +26,36 @@
 ; CHECK-NEXT: DW_AT_declaration
 ; CHECK-NEXT: DW_AT_signature
 
-; CHECK: DW_TAG_namespace
-; CHECK-NOT: {{DW_AT_name|DW_TAG}}
 ; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name {{.*}}"foo"
+; CHECK-NEXT: DW_AT_name {{.*}}"t2"
 ; CHECK-NEXT: DW_AT_byte_size
 
-%struct.bar = type { %"struct.(anonymous namespace)::foo" }
-%"struct.(anonymous namespace)::foo" = type { i8 }
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}}"t3"
 
-@b = global %struct.bar zeroinitializer, align 1, !dbg !0
+; CHECK: DW_TAG_namespace
+; CHECK-NOT: {{DW_TAG|DW_AT}}
+
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_name {{.*}}"t4"
 
 !llvm.dbg.cu = !{!2}
 !llvm.module.flags = !{!11, !13}
 !llvm.ident = !{!12}
 
-!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
-!1 = distinct !DIGlobalVariable(name: "b", scope: !2, file: !3, line: 8, type: !6, isLocal: false, isDefinition: true)
-!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 5.0.0 (trunk 294954) (llvm/trunk 294959)", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "tu-to-non-tu.dwo", emissionKind: FullDebug, enums: !4, globals: !5)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 5.0.0 (trunk 294954) (llvm/trunk 294959)", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "tu-to-non-tu.dwo", emissionKind: FullDebug, enums: !4, retainedTypes: !14)
 !3 = !DIFile(filename: "tu.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
 !4 = !{}
-!5 = !{!0}
-!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "bar", file: !3, line: 5, size: 8, elements: !7, identifier: "_ZTS3bar")
+!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1", file: !3, line: 5, size: 8, elements: !7, identifier: "_ZTS2t1")
 !7 = !{!8}
 !8 = !DIDerivedType(tag: DW_TAG_member, name: "f", scope: !6, file: !3, line: 6, baseType: !9, size: 8)
-!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "foo", scope: !10, file: !3, line: 2, size: 8, elements: !4)
+!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t2", file: !3, line: 2, size: 8, elements: !4)
 !10 = !DINamespace(scope: null)
 !11 = !{i32 2, !"Debug Info Version", i32 3}
 !12 = !{!"clang version 5.0.0 (trunk 294954) (llvm/trunk 294959)"}
 !13 = !{i32 2, !"Dwarf Version", i32 5}
+!14 = !{!6, !15}
+!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t3", file: !3, line: 5, size: 8, elements: !16, identifier: "_ZTS2t3")
+!16 = !{!17}
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "f", scope: !15, file: !3, line: 6, baseType: !18, size: 8)
+!18 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t4", scope: !10, file: !3, line: 2, size: 8, elements: !4)

From 7c77df1528c8ef6ab430bca18661aaf578066b96 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Sun, 23 Jan 2022 22:48:26 +0000
Subject: [PATCH 315/946] [X86] Add some basic tests for PR46809

---
 llvm/test/CodeGen/X86/select-lea.ll | 177 ++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/select-lea.ll

diff --git a/llvm/test/CodeGen/X86/select-lea.ll b/llvm/test/CodeGen/X86/select-lea.ll
new file mode 100644
index 0000000000000..4b50fdc2ca4e0
--- /dev/null
+++ b/llvm/test/CodeGen/X86/select-lea.ll
@@ -0,0 +1,177 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+cmov | FileCheck %s --check-prefixes=CMOV
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=NOCMOV
+
+; PR46809
+
+define i32 @sadd_add_imm(i32 %x, i32 %y) {
+; X64-LABEL: sadd_add_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rsi), %eax
+; X64-NEXT:    addl $100, %eax
+; X64-NEXT:    addl %esi, %edi
+; X64-NEXT:    cmovnol %edi, %eax
+; X64-NEXT:    retq
+;
+; CMOV-LABEL: sadd_add_imm:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    leal (%eax,%ecx), %edx
+; CMOV-NEXT:    addl $100, %edx
+; CMOV-NEXT:    addl %ecx, %eax
+; CMOV-NEXT:    cmovol %edx, %eax
+; CMOV-NEXT:    retl
+;
+; NOCMOV-LABEL: sadd_add_imm:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOCMOV-NEXT:    leal (%eax,%edx), %ecx
+; NOCMOV-NEXT:    addl %edx, %eax
+; NOCMOV-NEXT:    jno .LBB0_2
+; NOCMOV-NEXT:  # %bb.1:
+; NOCMOV-NEXT:    addl $100, %ecx
+; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:  .LBB0_2:
+; NOCMOV-NEXT:    retl
+  %o = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
+  %v1 = extractvalue { i32, i1 } %o, 1
+  %v2 = extractvalue { i32, i1 } %o, 0
+  %a = add i32 %v2, 100
+  %r = select i1 %v1, i32 %a, i32 %v2
+  ret i32 %r
+}
+
+define i32 @uadd_add_imm(i32 %x, i32 %y) {
+; X64-LABEL: uadd_add_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rsi), %eax
+; X64-NEXT:    addl $100, %eax
+; X64-NEXT:    addl %esi, %edi
+; X64-NEXT:    cmovael %edi, %eax
+; X64-NEXT:    retq
+;
+; CMOV-LABEL: uadd_add_imm:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    leal (%eax,%ecx), %edx
+; CMOV-NEXT:    addl $100, %edx
+; CMOV-NEXT:    addl %ecx, %eax
+; CMOV-NEXT:    cmovbl %edx, %eax
+; CMOV-NEXT:    retl
+;
+; NOCMOV-LABEL: uadd_add_imm:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOCMOV-NEXT:    leal (%eax,%edx), %ecx
+; NOCMOV-NEXT:    addl %edx, %eax
+; NOCMOV-NEXT:    jae .LBB1_2
+; NOCMOV-NEXT:  # %bb.1:
+; NOCMOV-NEXT:    addl $100, %ecx
+; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:  .LBB1_2:
+; NOCMOV-NEXT:    retl
+  %o = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+  %v1 = extractvalue { i32, i1 } %o, 1
+  %v2 = extractvalue { i32, i1 } %o, 0
+  %a = add i32 %v2, 100
+  %r = select i1 %v1, i32 %a, i32 %v2
+  ret i32 %r
+}
+
+define i32 @ssub_add_imm(i32 %x, i32 %y) {
+; X64-LABEL: ssub_add_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    addl $100, %eax
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    cmovnol %edi, %eax
+; X64-NEXT:    retq
+;
+; CMOV-LABEL: ssub_add_imm:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    movl %eax, %edx
+; CMOV-NEXT:    subl %ecx, %edx
+; CMOV-NEXT:    addl $100, %edx
+; CMOV-NEXT:    subl %ecx, %eax
+; CMOV-NEXT:    cmovol %edx, %eax
+; CMOV-NEXT:    retl
+;
+; NOCMOV-LABEL: ssub_add_imm:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOCMOV-NEXT:    movl %eax, %ecx
+; NOCMOV-NEXT:    subl %edx, %ecx
+; NOCMOV-NEXT:    subl %edx, %eax
+; NOCMOV-NEXT:    jno .LBB2_2
+; NOCMOV-NEXT:  # %bb.1:
+; NOCMOV-NEXT:    addl $100, %ecx
+; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:  .LBB2_2:
+; NOCMOV-NEXT:    retl
+  %o = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 %y)
+  %v1 = extractvalue { i32, i1 } %o, 1
+  %v2 = extractvalue { i32, i1 } %o, 0
+  %a = add i32 %v2, 100
+  %r = select i1 %v1, i32 %a, i32 %v2
+  ret i32 %r
+}
+
+define i32 @usub_add_imm(i32 %x, i32 %y) {
+; X64-LABEL: usub_add_imm:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    addl $100, %eax
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    cmovael %edi, %eax
+; X64-NEXT:    retq
+;
+; CMOV-LABEL: usub_add_imm:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    movl %eax, %edx
+; CMOV-NEXT:    subl %ecx, %edx
+; CMOV-NEXT:    addl $100, %edx
+; CMOV-NEXT:    subl %ecx, %eax
+; CMOV-NEXT:    cmovbl %edx, %eax
+; CMOV-NEXT:    retl
+;
+; NOCMOV-LABEL: usub_add_imm:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOCMOV-NEXT:    movl %eax, %ecx
+; NOCMOV-NEXT:    subl %edx, %ecx
+; NOCMOV-NEXT:    subl %edx, %eax
+; NOCMOV-NEXT:    jae .LBB3_2
+; NOCMOV-NEXT:  # %bb.1:
+; NOCMOV-NEXT:    addl $100, %ecx
+; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:  .LBB3_2:
+; NOCMOV-NEXT:    retl
+  %o = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y)
+  %v1 = extractvalue { i32, i1 } %o, 1
+  %v2 = extractvalue { i32, i1 } %o, 0
+  %a = add i32 %v2, 100
+  %r = select i1 %v1, i32 %a, i32 %v2
+  ret i32 %r
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32)
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32)

From 2e58a18910867ba6795066e044293e6daf89edf5 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Sun, 23 Jan 2022 16:07:43 -0800
Subject: [PATCH 316/946] DebugInfo: Include template parameters for simplified
 template decls in type units

LLVM DebugInfo CodeGen synthesizes type declarations in type units when
referencing types that are not in type units. When those synthesized
types are templates and simplified template names (or mangled simplified
template names) are in use, the template arguments must be attached to
those declarations.

A deeper fix (with a CU or DICompositeType flag) that would also support
other uses of clang's -debug-forward-template-args (such as Sony's
platform) could/should be implemented to fix this more broadly.
---
 llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp |   2 +
 llvm/test/DebugInfo/X86/tu-to-non-tu.ll   | 308 +++++++++++++++++++---
 2 files changed, 280 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 956ed33b9eaa1..bfc98580002c5 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1840,6 +1840,8 @@ void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
   StringRef Name = CTy->getName();
   if (!Name.empty())
     addString(D, dwarf::DW_AT_name, Name);
+  if (Name.startswith("_STN") || !Name.contains('<'))
+    addTemplateParams(D, CTy->getTemplateParams());
   // If the type is in an anonymous namespace, we can't reference it from a TU
   // (since the type would be CU local and the TU doesn't specify which TU has
   // the appropriate type definition) - so flag this emission as such and skip
diff --git a/llvm/test/DebugInfo/X86/tu-to-non-tu.ll b/llvm/test/DebugInfo/X86/tu-to-non-tu.ll
index 6b9e0d9e60999..517872d2b92e9 100644
--- a/llvm/test/DebugInfo/X86/tu-to-non-tu.ll
+++ b/llvm/test/DebugInfo/X86/tu-to-non-tu.ll
@@ -11,51 +11,299 @@
 ; declaration of the internal linkage type would be ambiguous/wouldn't allow a
 ; consumer to find the definition with certainty)
 
-; CHECK: Type Unit:
+; Built from the following source, compiled with this command:
+; $ clang++-tot decl.cpp -g -fdebug-types-section -c
+; And modified (as noted in the comments) to produce some "simplified" and "mangled"
+; simplified template names, to ensure they get template parameters in declarations
+; created in type units.
 
+; struct non_tu {
+;   virtual void f1();
+; };
+; void non_tu::f1() {}
+; struct tu_ref_non_tu {
+;   non_tu v1;
+; };
+; tu_ref_non_tu v1;
+; 
+; // Reference internal 
+; namespace {
+; struct internal {};
+; }  // namespace
+; struct ref_internal {
+;   internal i;
+; };
+; ref_internal v5;
+; 
+; 
+; template <typename T>
+; struct templ_non_tu;
+;
+; // Reference to (normal, non-mangled/simplified) non-tu type with template
+; // parameters.
+; template <>
+; struct templ_non_tu<int> {
+;   virtual void f1();
+; };
+; void templ_non_tu<int>::f1() {}
+; struct ref_templ_non_tu {
+;   templ_non_tu<int> v1;
+; };
+; ref_templ_non_tu v2;
+; 
+; // Modify templ_non_tu<long>'s name to be simplified (strip template parameter
+; // list from the "name" attribute)
+; template <>
+; struct templ_non_tu<long> {
+;   virtual void f1();
+; };
+; void templ_non_tu<long>::f1() {}
+; struct ref_templ_non_tu_simple {
+;   templ_non_tu<long> v1;
+; };
+; ref_templ_non_tu_simple v3;
+; 
+; // Modify templ_non_tu<long>'s name to be mangled ('_STN' name '|' args)
+; template <>
+; struct templ_non_tu<bool> {
+;   virtual void f1();
+; };
+; void templ_non_tu<bool>::f1() {}
+; struct ref_templ_non_tu_mangled {
+;   templ_non_tu<bool> v1;
+; };
+; ref_templ_non_tu_mangled v4;
+
+
+
+; CHECK-LABEL: Type Unit:
+; CHECK: DW_TAG_structure_type
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}}"tu_ref_non_tu"
+
+; CHECK-LABEL: Type Unit:
+; CHECK: DW_TAG_structure_type
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}}"ref_templ_non_tu"
 ; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name {{.*}}"t1"
+; CHECK-NEXT: DW_AT_declaration       (true)
+; CHECK-NEXT: DW_AT_name      ("templ_non_tu<int>")
 
+; CHECK-LABEL: Type Unit:
 ; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_declaration
-; CHECK-NEXT: DW_AT_name {{.*}}"t2"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}}"ref_templ_non_tu_simple"
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_declaration       (true)
+; CHECK-NEXT: DW_AT_name      ("templ_non_tu")
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_template_type_parameter
+; CHECK-NEXT: DW_AT_type    {{.*}}"long"
+
+; CHECK-LABEL: Type Unit:
+; CHECK: DW_TAG_structure_type
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}}"ref_templ_non_tu_mangled"
+; CHECK: DW_TAG_structure_type
+; CHECK-NEXT: DW_AT_declaration       (true)
+; CHECK-NEXT: DW_AT_name      ("_STNtempl_non_tu|<bool>")
+; CHECK-NOT: DW_TAG
+; CHECK: DW_TAG_template_type_parameter
+; CHECK-NEXT: DW_AT_type    {{.*}}"bool"
+
 
-; CHECK: Compile Unit:
+; CHECK-LABEL: Compile Unit:
 
 ; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_declaration
-; CHECK-NEXT: DW_AT_signature
+; CHECK-NEXT:   DW_AT_declaration       (true)
+; CHECK-NEXT:   DW_AT_signature (0xb1cde890d320f5c2)
 
 ; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name {{.*}}"t2"
-; CHECK-NEXT: DW_AT_byte_size
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}}"non_tu"
+
 
 ; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name {{.*}}"t3"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}}"ref_internal"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_byte_size
 
 ; CHECK: DW_TAG_namespace
 ; CHECK-NOT: {{DW_TAG|DW_AT}}
-
 ; CHECK: DW_TAG_structure_type
-; CHECK-NEXT: DW_AT_name {{.*}}"t4"
+; CHECK-NOT: DW_TAG
+; CHECK: DW_AT_name {{.*}}"internal"
+
+%struct.ref_internal = type { %"struct.(anonymous namespace)::internal" }
+%"struct.(anonymous namespace)::internal" = type { i8 }
+%struct.non_tu = type { i32 (...)** }
+%struct.templ_non_tu = type { i32 (...)** }
+%struct.templ_non_tu.0 = type { i32 (...)** }
+%struct.templ_non_tu.1 = type { i32 (...)** }
+
+@_ZTV6non_tu = dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI6non_tu to i8*), i8* bitcast (void (%struct.non_tu*)* @_ZN6non_tu2f1Ev to i8*)] }, align 8
+@v1 = dso_local global { { i8** } } { { i8** } { i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV6non_tu, i32 0, inrange i32 0, i32 2) } }, align 8, !dbg !0
+@v5 = dso_local global %struct.ref_internal zeroinitializer, align 1, !dbg !5
+@_ZTV12templ_non_tuIiE = dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI12templ_non_tuIiE to i8*), i8* bitcast (void (%struct.templ_non_tu*)* @_ZN12templ_non_tuIiE2f1Ev to i8*)] }, align 8
+@v2 = dso_local global { { i8** } } { { i8** } { i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV12templ_non_tuIiE, i32 0, inrange i32 0, i32 2) } }, align 8, !dbg !13
+@_ZTV12templ_non_tuIlE = dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI12templ_non_tuIlE to i8*), i8* bitcast (void (%struct.templ_non_tu.0*)* @_ZN12templ_non_tuIlE2f1Ev to i8*)] }, align 8
+@v3 = dso_local global { { i8** } } { { i8** } { i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV12templ_non_tuIlE, i32 0, inrange i32 0, i32 2) } }, align 8, !dbg !32
+@_ZTV12templ_non_tuIbE = dso_local unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI12templ_non_tuIbE to i8*), i8* bitcast (void (%struct.templ_non_tu.1*)* @_ZN12templ_non_tuIbE2f1Ev to i8*)] }, align 8
+@v4 = dso_local global { { i8** } } { { i8** } { i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV12templ_non_tuIbE, i32 0, inrange i32 0, i32 2) } }, align 8, !dbg !46
+@_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8*
+@_ZTS6non_tu = dso_local constant [8 x i8] c"6non_tu\00", align 1
+@_ZTI6non_tu = dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([8 x i8], [8 x i8]* @_ZTS6non_tu, i32 0, i32 0) }, align 8
+@_ZTS12templ_non_tuIiE = dso_local constant [18 x i8] c"12templ_non_tuIiE\00", align 1
+@_ZTI12templ_non_tuIiE = dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([18 x i8], [18 x i8]* @_ZTS12templ_non_tuIiE, i32 0, i32 0) }, align 8
+@_ZTS12templ_non_tuIlE = dso_local constant [18 x i8] c"12templ_non_tuIlE\00", align 1
+@_ZTI12templ_non_tuIlE = dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([18 x i8], [18 x i8]* @_ZTS12templ_non_tuIlE, i32 0, i32 0) }, align 8
+@_ZTS12templ_non_tuIbE = dso_local constant [18 x i8] c"12templ_non_tuIbE\00", align 1
+@_ZTI12templ_non_tuIbE = dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([18 x i8], [18 x i8]* @_ZTS12templ_non_tuIbE, i32 0, i32 0) }, align 8
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+define dso_local void @_ZN6non_tu2f1Ev(%struct.non_tu* noundef nonnull align 8 dereferenceable(8) %this) unnamed_addr #0 align 2 !dbg !76 {
+entry:
+  %this.addr = alloca %struct.non_tu*, align 8
+  store %struct.non_tu* %this, %struct.non_tu** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata %struct.non_tu** %this.addr, metadata !77, metadata !DIExpression()), !dbg !79
+  %this1 = load %struct.non_tu*, %struct.non_tu** %this.addr, align 8
+  ret void, !dbg !80
+}
+
+; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+define dso_local void @_ZN12templ_non_tuIiE2f1Ev(%struct.templ_non_tu* noundef nonnull align 8 dereferenceable(8) %this) unnamed_addr #0 align 2 !dbg !81 {
+entry:
+  %this.addr = alloca %struct.templ_non_tu*, align 8
+  store %struct.templ_non_tu* %this, %struct.templ_non_tu** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata %struct.templ_non_tu** %this.addr, metadata !82, metadata !DIExpression()), !dbg !84
+  %this1 = load %struct.templ_non_tu*, %struct.templ_non_tu** %this.addr, align 8
+  ret void, !dbg !85
+}
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+define dso_local void @_ZN12templ_non_tuIlE2f1Ev(%struct.templ_non_tu.0* noundef nonnull align 8 dereferenceable(8) %this) unnamed_addr #0 align 2 !dbg !86 {
+entry:
+  %this.addr = alloca %struct.templ_non_tu.0*, align 8
+  store %struct.templ_non_tu.0* %this, %struct.templ_non_tu.0** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata %struct.templ_non_tu.0** %this.addr, metadata !87, metadata !DIExpression()), !dbg !89
+  %this1 = load %struct.templ_non_tu.0*, %struct.templ_non_tu.0** %this.addr, align 8
+  ret void, !dbg !90
+}
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+define dso_local void @_ZN12templ_non_tuIbE2f1Ev(%struct.templ_non_tu.1* noundef nonnull align 8 dereferenceable(8) %this) unnamed_addr #0 align 2 !dbg !91 {
+entry:
+  %this.addr = alloca %struct.templ_non_tu.1*, align 8
+  store %struct.templ_non_tu.1* %this, %struct.templ_non_tu.1** %this.addr, align 8
+  call void @llvm.dbg.declare(metadata %struct.templ_non_tu.1** %this.addr, metadata !92, metadata !DIExpression()), !dbg !94
+  %this1 = load %struct.templ_non_tu.1*, %struct.templ_non_tu.1** %this.addr, align 8
+  ret void, !dbg !95
+}
+
+attributes #0 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
 
 !llvm.dbg.cu = !{!2}
-!llvm.module.flags = !{!11, !13}
-!llvm.ident = !{!12}
-
-!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 5.0.0 (trunk 294954) (llvm/trunk 294959)", isOptimized: false, runtimeVersion: 0, splitDebugFilename: "tu-to-non-tu.dwo", emissionKind: FullDebug, enums: !4, retainedTypes: !14)
-!3 = !DIFile(filename: "tu.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
-!4 = !{}
-!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t1", file: !3, line: 5, size: 8, elements: !7, identifier: "_ZTS2t1")
-!7 = !{!8}
-!8 = !DIDerivedType(tag: DW_TAG_member, name: "f", scope: !6, file: !3, line: 6, baseType: !9, size: 8)
-!9 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t2", file: !3, line: 2, size: 8, elements: !4)
-!10 = !DINamespace(scope: null)
-!11 = !{i32 2, !"Debug Info Version", i32 3}
-!12 = !{!"clang version 5.0.0 (trunk 294954) (llvm/trunk 294959)"}
-!13 = !{i32 2, !"Dwarf Version", i32 5}
-!14 = !{!6, !15}
-!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t3", file: !3, line: 5, size: 8, elements: !16, identifier: "_ZTS2t3")
+!llvm.module.flags = !{!70, !71, !72, !73, !74}
+!llvm.ident = !{!75}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "v1", scope: !2, file: !3, line: 8, type: !60, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 14.0.0 (git@github.com:llvm/llvm-project.git ab4756338c5b2216d52d9152b2f7e65f233c4dac)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "decl.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch")
+!4 = !{!0, !5, !13, !32, !46}
+!5 = !DIGlobalVariableExpression(var: !6, expr: !DIExpression())
+!6 = distinct !DIGlobalVariable(name: "v5", scope: !2, file: !3, line: 17, type: !7, isLocal: false, isDefinition: true)
+!7 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ref_internal", file: !3, line: 14, size: 8, flags: DIFlagTypePassByValue, elements: !8, identifier: "_ZTS12ref_internal")
+!8 = !{!9}
+!9 = !DIDerivedType(tag: DW_TAG_member, name: "i", scope: !7, file: !3, line: 15, baseType: !10, size: 8)
+!10 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "internal", scope: !11, file: !3, line: 12, size: 8, flags: DIFlagTypePassByValue, elements: !12)
+!11 = !DINamespace(scope: null)
+!12 = !{}
+!13 = !DIGlobalVariableExpression(var: !14, expr: !DIExpression())
+!14 = distinct !DIGlobalVariable(name: "v2", scope: !2, file: !3, line: 33, type: !15, isLocal: false, isDefinition: true)
+!15 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ref_templ_non_tu", file: !3, line: 30, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !16, identifier: "_ZTS16ref_templ_non_tu")
 !16 = !{!17}
-!17 = !DIDerivedType(tag: DW_TAG_member, name: "f", scope: !15, file: !3, line: 6, baseType: !18, size: 8)
-!18 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "t4", scope: !10, file: !3, line: 2, size: 8, elements: !4)
+!17 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !15, file: !3, line: 31, baseType: !18, size: 64)
+!18 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "templ_non_tu<int>", file: !3, line: 26, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !19, vtableHolder: !18, templateParams: !30)
+!19 = !{!20, !26}
+!20 = !DIDerivedType(tag: DW_TAG_member, name: "_vptr$templ_non_tu", scope: !3, file: !3, baseType: !21, size: 64, flags: DIFlagArtificial)
+!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !22, size: 64)
+!22 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "__vtbl_ptr_type", baseType: !23, size: 64)
+!23 = !DISubroutineType(types: !24)
+!24 = !{!25}
+!25 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!26 = !DISubprogram(name: "f1", linkageName: "_ZN12templ_non_tuIiE2f1Ev", scope: !18, file: !3, line: 27, type: !27, scopeLine: 27, containingType: !18, virtualIndex: 0, flags: DIFlagPrototyped, spFlags: DISPFlagVirtual)
+!27 = !DISubroutineType(types: !28)
+!28 = !{null, !29}
+!29 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
+!30 = !{!31}
+!31 = !DITemplateTypeParameter(name: "T", type: !25)
+!32 = !DIGlobalVariableExpression(var: !33, expr: !DIExpression())
+!33 = distinct !DIGlobalVariable(name: "v3", scope: !2, file: !3, line: 45, type: !34, isLocal: false, isDefinition: true)
+!34 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ref_templ_non_tu_simple", file: !3, line: 42, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !35, identifier: "_ZTS23ref_templ_non_tu_simple")
+!35 = !{!36}
+!36 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !34, file: !3, line: 43, baseType: !37, size: 64)
+!37 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "templ_non_tu", file: !3, line: 38, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !38, vtableHolder: !37, templateParams: !43)
+!38 = !{!20, !39}
+!39 = !DISubprogram(name: "f1", linkageName: "_ZN12templ_non_tuIlE2f1Ev", scope: !37, file: !3, line: 39, type: !40, scopeLine: 39, containingType: !37, virtualIndex: 0, flags: DIFlagPrototyped, spFlags: DISPFlagVirtual)
+!40 = !DISubroutineType(types: !41)
+!41 = !{null, !42}
+!42 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !37, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
+!43 = !{!44}
+!44 = !DITemplateTypeParameter(name: "T", type: !45)
+!45 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed)
+!46 = !DIGlobalVariableExpression(var: !47, expr: !DIExpression())
+!47 = distinct !DIGlobalVariable(name: "v4", scope: !2, file: !3, line: 56, type: !48, isLocal: false, isDefinition: true)
+!48 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ref_templ_non_tu_mangled", file: !3, line: 53, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !49, identifier: "_ZTS24ref_templ_non_tu_mangled")
+!49 = !{!50}
+!50 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !48, file: !3, line: 54, baseType: !51, size: 64)
+!51 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "_STNtempl_non_tu|<bool>", file: !3, line: 49, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !52, vtableHolder: !51, templateParams: !57)
+!52 = !{!20, !53}
+!53 = !DISubprogram(name: "f1", linkageName: "_ZN12templ_non_tuIbE2f1Ev", scope: !51, file: !3, line: 50, type: !54, scopeLine: 50, containingType: !51, virtualIndex: 0, flags: DIFlagPrototyped, spFlags: DISPFlagVirtual)
+!54 = !DISubroutineType(types: !55)
+!55 = !{null, !56}
+!56 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !51, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
+!57 = !{!58}
+!58 = !DITemplateTypeParameter(name: "T", type: !59)
+!59 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean)
+!60 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "tu_ref_non_tu", file: !3, line: 5, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !61, identifier: "_ZTS13tu_ref_non_tu")
+!61 = !{!62}
+!62 = !DIDerivedType(tag: DW_TAG_member, name: "v1", scope: !60, file: !3, line: 6, baseType: !63, size: 64)
+!63 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "non_tu", file: !3, line: 1, size: 64, flags: DIFlagTypePassByReference | DIFlagNonTrivial, elements: !64, vtableHolder: !63)
+!64 = !{!65, !66}
+!65 = !DIDerivedType(tag: DW_TAG_member, name: "_vptr$non_tu", scope: !3, file: !3, baseType: !21, size: 64, flags: DIFlagArtificial)
+!66 = !DISubprogram(name: "f1", linkageName: "_ZN6non_tu2f1Ev", scope: !63, file: !3, line: 2, type: !67, scopeLine: 2, containingType: !63, virtualIndex: 0, flags: DIFlagPrototyped, spFlags: DISPFlagVirtual)
+!67 = !DISubroutineType(types: !68)
+!68 = !{null, !69}
+!69 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !63, size: 64, flags: DIFlagArtificial | DIFlagObjectPointer)
+!70 = !{i32 7, !"Dwarf Version", i32 5}
+!71 = !{i32 2, !"Debug Info Version", i32 3}
+!72 = !{i32 1, !"wchar_size", i32 4}
+!73 = !{i32 7, !"uwtable", i32 1}
+!74 = !{i32 7, !"frame-pointer", i32 2}
+!75 = !{!"clang version 14.0.0 (git@github.com:llvm/llvm-project.git ab4756338c5b2216d52d9152b2f7e65f233c4dac)"}
+!76 = distinct !DISubprogram(name: "f1", linkageName: "_ZN6non_tu2f1Ev", scope: !63, file: !3, line: 4, type: !67, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, declaration: !66, retainedNodes: !12)
+!77 = !DILocalVariable(name: "this", arg: 1, scope: !76, type: !78, flags: DIFlagArtificial | DIFlagObjectPointer)
+!78 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !63, size: 64)
+!79 = !DILocation(line: 0, scope: !76)
+!80 = !DILocation(line: 4, column: 20, scope: !76)
+!81 = distinct !DISubprogram(name: "f1", linkageName: "_ZN12templ_non_tuIiE2f1Ev", scope: !18, file: !3, line: 29, type: !27, scopeLine: 29, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, declaration: !26, retainedNodes: !12)
+!82 = !DILocalVariable(name: "this", arg: 1, scope: !81, type: !83, flags: DIFlagArtificial | DIFlagObjectPointer)
+!83 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !18, size: 64)
+!84 = !DILocation(line: 0, scope: !81)
+!85 = !DILocation(line: 29, column: 31, scope: !81)
+!86 = distinct !DISubprogram(name: "f1", linkageName: "_ZN12templ_non_tuIlE2f1Ev", scope: !37, file: !3, line: 41, type: !40, scopeLine: 41, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, declaration: !39, retainedNodes: !12)
+!87 = !DILocalVariable(name: "this", arg: 1, scope: !86, type: !88, flags: DIFlagArtificial | DIFlagObjectPointer)
+!88 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !37, size: 64)
+!89 = !DILocation(line: 0, scope: !86)
+!90 = !DILocation(line: 41, column: 32, scope: !86)
+!91 = distinct !DISubprogram(name: "f1", linkageName: "_ZN12templ_non_tuIbE2f1Ev", scope: !51, file: !3, line: 52, type: !54, scopeLine: 52, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, declaration: !53, retainedNodes: !12)
+!92 = !DILocalVariable(name: "this", arg: 1, scope: !91, type: !93, flags: DIFlagArtificial | DIFlagObjectPointer)
+!93 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !51, size: 64)
+!94 = !DILocation(line: 0, scope: !91)
+!95 = !DILocation(line: 52, column: 32, scope: !91)

From 3a3af2bbc97e7db045eccb8683e93b9aa7ef562b Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Mon, 24 Jan 2022 10:22:33 +0800
Subject: [PATCH 317/946] [C++20] [Module] fix bug 47716 and implement
 [module.interface]/p6

This fixes bug 47716.

According to [module.interface]p2, it is meaningless to export an entity
which is not in namespace scope.
The reason why the compiler crashes is that the compiler missed
ExportDecl when the compiler traverse the subclass of DeclContext. So
here is the crash.

Also, the patch implements [module.interface]p6 in
Sema::CheckRedeclaration* functions.

Reviewed By: aaron.ballman, urnathan

Differential Revision: https://reviews.llvm.org/D112903
---
 clang/include/clang/AST/DeclBase.h            | 14 +++
 .../clang/Basic/DiagnosticSemaKinds.td        |  5 +
 clang/include/clang/Sema/Sema.h               |  2 +
 clang/lib/AST/DeclBase.cpp                    |  9 ++
 clang/lib/Sema/SemaDecl.cpp                   | 49 +++++++++-
 clang/lib/Sema/SemaDeclCXX.cpp                |  2 +-
 clang/lib/Sema/SemaTemplate.cpp               |  2 +-
 .../test/CXX/module/module.interface/p2-2.cpp | 37 ++++++++
 clang/test/CXX/module/module.interface/p6.cpp | 93 +++++++++++++++++++
 9 files changed, 207 insertions(+), 6 deletions(-)
 create mode 100644 clang/test/CXX/module/module.interface/p2-2.cpp
 create mode 100644 clang/test/CXX/module/module.interface/p6.cpp

diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h
index 1328d377d00fa..06d2f17d14300 100644
--- a/clang/include/clang/AST/DeclBase.h
+++ b/clang/include/clang/AST/DeclBase.h
@@ -607,6 +607,20 @@ class alignas(8) Decl {
     return getModuleOwnershipKind() == ModuleOwnershipKind::ModulePrivate;
   }
 
+  /// Whether this declaration was exported in a lexical context.
+  /// e.g.:
+  ///
+  ///   export namespace A {
+  ///      void f1();        // isInExportDeclContext() == true
+  ///   }
+  ///   void A::f1();        // isInExportDeclContext() == false
+  ///
+  ///   namespace B {
+  ///      void f2();        // isInExportDeclContext() == false
+  ///   }
+  ///   export void B::f2(); // isInExportDeclContext() == true
+  bool isInExportDeclContext() const;
+
   /// Return true if this declaration has an attribute which acts as
   /// definition of the entity, such as 'alias' or 'ifunc'.
   bool hasDefiningAttr() const;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 88e430d8eb09f..7fccdcaa9fc6a 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7803,6 +7803,11 @@ def err_expected_class_or_namespace : Error<"%0 is not a class"
   "%select{ or namespace|, namespace, or enumeration}1">;
 def err_invalid_declarator_scope : Error<"cannot define or redeclare %0 here "
   "because namespace %1 does not enclose namespace %2">;
+def err_export_non_namespace_scope_name : Error<
+  "cannot export %0 as it is not at namespace scope">;
+def err_redeclaration_non_exported : Error <
+  "cannot export redeclaration %0 here since the previous declaration is not "
+  "exported">;
 def err_invalid_declarator_global_scope : Error<
   "definition or redeclaration of %0 cannot name the global scope">;
 def err_invalid_declarator_in_function : Error<
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index b1ef02865328f..4b609f4b1477c 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -4320,6 +4320,8 @@ class Sema final {
                             bool ConsiderLinkage, bool AllowInlineNamespace);
 
   bool CheckRedeclarationModuleOwnership(NamedDecl *New, NamedDecl *Old);
+  bool CheckRedeclarationExported(NamedDecl *New, NamedDecl *Old);
+  bool CheckRedeclarationInModule(NamedDecl *New, NamedDecl *Old);
 
   void DiagnoseAmbiguousLookup(LookupResult &Result);
   //@}
diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp
index 98a5c6b664713..9ee1cc0830867 100644
--- a/clang/lib/AST/DeclBase.cpp
+++ b/clang/lib/AST/DeclBase.cpp
@@ -995,6 +995,15 @@ bool Decl::AccessDeclContextCheck() const {
   return true;
 }
 
+bool Decl::isInExportDeclContext() const {
+  const DeclContext *DC = getLexicalDeclContext();
+
+  while (DC && !isa<ExportDecl>(DC))
+    DC = DC->getLexicalParent();
+
+  return DC && isa<ExportDecl>(DC);
+}
+
 static Decl::Kind getKind(const Decl *D) { return D->getKind(); }
 static Decl::Kind getKind(const DeclContext *DC) { return DC->getDeclKind(); }
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index d4ed721e0545b..a29409461f575 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -1628,6 +1628,39 @@ bool Sema::CheckRedeclarationModuleOwnership(NamedDecl *New, NamedDecl *Old) {
   return false;
 }
 
+// [module.interface]p6:
+// A redeclaration of an entity X is implicitly exported if X was introduced by
+// an exported declaration; otherwise it shall not be exported.
+bool Sema::CheckRedeclarationExported(NamedDecl *New, NamedDecl *Old) {
+  bool IsNewExported = New->isInExportDeclContext();
+  bool IsOldExported = Old->isInExportDeclContext();
+
+  // It should be irrevelant if both of them are not exported.
+  if (!IsNewExported && !IsOldExported)
+    return false;
+
+  if (IsOldExported)
+    return false;
+
+  assert(IsNewExported);
+
+  Diag(New->getLocation(), diag::err_redeclaration_non_exported) << New;
+  Diag(Old->getLocation(), diag::note_previous_declaration);
+  return true;
+}
+
+// A wrapper function for checking the semantic restrictions of
+// a redeclaration within a module.
+bool Sema::CheckRedeclarationInModule(NamedDecl *New, NamedDecl *Old) {
+  if (CheckRedeclarationModuleOwnership(New, Old))
+    return true;
+
+  if (CheckRedeclarationExported(New, Old))
+    return true;
+
+  return false;
+}
+
 static bool isUsingDecl(NamedDecl *D) {
   return isa<UsingShadowDecl>(D) ||
          isa<UnresolvedUsingTypenameDecl>(D) ||
@@ -3390,7 +3423,7 @@ bool Sema::MergeFunctionDecl(FunctionDecl *New, NamedDecl *&OldD,
     }
   }
 
-  if (CheckRedeclarationModuleOwnership(New, Old))
+  if (CheckRedeclarationInModule(New, Old))
     return true;
 
   if (!getLangOpts().CPlusPlus) {
@@ -4269,7 +4302,7 @@ void Sema::MergeVarDecl(VarDecl *New, LookupResult &Previous) {
     return New->setInvalidDecl();
   }
 
-  if (CheckRedeclarationModuleOwnership(New, Old))
+  if (CheckRedeclarationInModule(New, Old))
     return;
 
   // Variables with external linkage are analyzed in FinalizeDeclaratorGroup.
@@ -5759,7 +5792,15 @@ bool Sema::diagnoseQualifiedDeclaration(CXXScopeSpec &SS, DeclContext *DC,
     else if (isa<BlockDecl>(Cur))
       Diag(Loc, diag::err_invalid_declarator_in_block)
         << Name << SS.getRange();
-    else
+    else if (isa<ExportDecl>(Cur)) {
+      if (!isa<NamespaceDecl>(DC))
+        Diag(Loc, diag::err_export_non_namespace_scope_name)
+            << Name << SS.getRange();
+      else
+        // The cases that DC is not NamespaceDecl should be handled in
+        // CheckRedeclarationExported.
+        return false;
+    } else
       Diag(Loc, diag::err_invalid_declarator_scope)
       << Name << cast<NamedDecl>(Cur) << cast<NamedDecl>(DC) << SS.getRange();
 
@@ -16535,7 +16576,7 @@ Decl *Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK,
     SetMemberAccessSpecifier(New, PrevDecl, AS);
 
   if (PrevDecl)
-    CheckRedeclarationModuleOwnership(New, PrevDecl);
+    CheckRedeclarationInModule(New, PrevDecl);
 
   if (TUK == TUK_Definition && (!SkipBody || !SkipBody->ShouldSkip))
     New->startDefinition();
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 01f0079198c74..16cdb7e577237 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -13012,7 +13012,7 @@ Decl *Sema::ActOnAliasDeclaration(Scope *S, AccessSpecifier AS,
       NewDecl->setInvalidDecl();
     else if (OldDecl) {
       NewDecl->setPreviousDecl(OldDecl);
-      CheckRedeclarationModuleOwnership(NewDecl, OldDecl);
+      CheckRedeclarationInModule(NewDecl, OldDecl);
     }
 
     NewND = NewDecl;
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 3278ca143dcce..64a0b45feb980 100644
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -2063,7 +2063,7 @@ DeclResult Sema::CheckClassTemplate(
   }
 
   if (PrevClassTemplate)
-    CheckRedeclarationModuleOwnership(NewTemplate, PrevClassTemplate);
+    CheckRedeclarationInModule(NewTemplate, PrevClassTemplate);
 
   if (Invalid) {
     NewTemplate->setInvalidDecl();
diff --git a/clang/test/CXX/module/module.interface/p2-2.cpp b/clang/test/CXX/module/module.interface/p2-2.cpp
new file mode 100644
index 0000000000000..359e068d230af
--- /dev/null
+++ b/clang/test/CXX/module/module.interface/p2-2.cpp
@@ -0,0 +1,37 @@
+// The intention of this file to check we could only export declarations in namesapce scope.
+//
+// RUN: %clang_cc1 -std=c++20 %s -verify
+
+export module X;
+
+export template <typename T>
+struct X {
+  struct iterator {
+    T node;
+  };
+  void foo() {}
+  template <typename U>
+  U bar();
+};
+
+export template <typename T> X<T>::iterator;                      // expected-error {{cannot export 'iterator' as it is not at namespace scope}}
+export template <typename T> void X<T>::foo();                    // expected-error {{cannot export 'foo' as it is not at namespace scope}}
+export template <typename T> template <typename U> U X<T>::bar(); // expected-error {{cannot export 'bar' as it is not at namespace scope}}
+
+export struct Y {
+  struct iterator {
+    int node;
+  };
+  void foo() {}
+  template <typename U>
+  U bar();
+};
+
+export struct Y::iterator;               // expected-error {{cannot export 'iterator' as it is not at namespace scope}}
+export void Y::foo();                    // expected-error {{cannot export 'foo' as it is not at namespace scope}}
+export template <typename U> U Y::bar(); // expected-error {{cannot export 'bar' as it is not at namespace scope}}
+
+export {
+  template <typename T> X<T>::iterator; // expected-error {{cannot export 'iterator' as it is not at namespace scope}}
+  struct Y::iterator;                   // expected-error {{cannot export 'iterator' as it is not at namespace scope}}
+}
diff --git a/clang/test/CXX/module/module.interface/p6.cpp b/clang/test/CXX/module/module.interface/p6.cpp
new file mode 100644
index 0000000000000..a696851ccbff4
--- /dev/null
+++ b/clang/test/CXX/module/module.interface/p6.cpp
@@ -0,0 +1,93 @@
+// The test is check we couldn't export a redeclaration which isn't exported previously and
+// check it is OK to redeclare no matter exported nor not if is the previous declaration is exported.
+// RUN: %clang_cc1 -std=c++20 %s -verify
+
+export module X;
+
+struct S { // expected-note {{previous declaration is here}}
+  int n;
+};
+typedef S S;
+export typedef S S; // OK, does not redeclare an entity
+export struct S;    // expected-error {{cannot export redeclaration 'S' here since the previous declaration is not exported}}
+
+namespace A {
+struct X; // expected-note {{previous declaration is here}}
+export struct Y;
+} // namespace A
+
+namespace A {
+export struct X; // expected-error {{cannot export redeclaration 'X' here since the previous declaration is not exported}}
+export struct Y; // OK
+struct Z;        // expected-note {{previous declaration is here}}
+export struct Z; // expected-error {{cannot export redeclaration 'Z' here since the previous declaration is not exported}}
+} // namespace A
+
+namespace A {
+struct B;    // expected-note {{previous declaration is here}}
+struct C {}; // expected-note {{previous declaration is here}}
+} // namespace A
+
+namespace A {
+export struct B {}; // expected-error {{cannot export redeclaration 'B' here since the previous declaration is not exported}}
+export struct C;    // expected-error {{cannot export redeclaration 'C' here since the previous declaration is not exported}}
+} // namespace A
+
+template <typename T>
+struct TemplS; // expected-note {{previous declaration is here}}
+
+export template <typename T>
+struct TemplS {}; // expected-error {{cannot export redeclaration 'TemplS' here since the previous declaration is not exported}}
+
+template <typename T>
+struct TemplS2; // expected-note {{previous declaration is here}}
+
+export template <typename U>
+struct TemplS2 {}; // expected-error {{cannot export redeclaration 'TemplS2' here since the previous declaration is not exported}}
+
+void baz();        // expected-note {{previous declaration is here}}
+export void baz(); // expected-error {{cannot export redeclaration 'baz' here since the previous declaration is not exported}}
+
+namespace A {
+export void foo();
+void bar();        // expected-note {{previous declaration is here}}
+export void bar(); // expected-error {{cannot export redeclaration 'bar' here since the previous declaration is not exported}}
+void f1();         // expected-note {{previous declaration is here}}
+} // namespace A
+
+// OK
+//
+// [module.interface]/p6
+// A redeclaration of an entity X is implicitly exported if X was introduced by an exported declaration
+void A::foo();
+
+// The compiler couldn't export A::f1() here since A::f1() is declared above without exported.
+// See [module.interface]/p6 for details.
+export void A::f1(); // expected-error {{cannot export redeclaration 'f1' here since the previous declaration is not exported}}
+
+template <typename T>
+void TemplFunc(); // expected-note {{previous declaration is here}}
+
+export template <typename T>
+void TemplFunc() { // expected-error {{cannot export redeclaration 'TemplFunc' here since the previous declaration is not exported}}
+}
+
+namespace A {
+template <typename T>
+void TemplFunc2(); // expected-note {{previous declaration is here}}
+export template <typename T>
+void TemplFunc2() {} // expected-error {{cannot export redeclaration 'TemplFunc2' here since the previous declaration is not exported}}
+template <typename T>
+void TemplFunc3(); // expected-note {{previous declaration is here}}
+} // namespace A
+
+export template <typename T>
+void A::TemplFunc3() {} // expected-error {{cannot export redeclaration 'TemplFunc3' here since the previous declaration is not exported}}
+
+int var;        // expected-note {{previous declaration is here}}
+export int var; // expected-error {{cannot export redeclaration 'var' here since the previous declaration is not exported}}
+
+template <typename T>
+T TemplVar; // expected-note {{previous declaration is here}}
+export template <typename T>
+T TemplVar; // expected-error {{cannot export redeclaration 'TemplVar' here since the previous declaration is not exported}}

From 3f24cdec2572741f018457d5f24ef479e1291f1c Mon Sep 17 00:00:00 2001
From: Jim Lin <jim@andestech.com>
Date: Mon, 24 Jan 2022 10:20:16 +0800
Subject: [PATCH 318/946] [RISCV][NFC] Remove tailing whitespaces in
 RISCVInstrInfoVSDPatterns.td and RISCVInstrInfoVVLPatterns.td

---
 .../Target/RISCV/RISCVInstrInfoVSDPatterns.td    | 16 ++++++++--------
 .../Target/RISCV/RISCVInstrInfoVVLPatterns.td    |  4 ++--
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index 078025051e716..e452a84a9a6f2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -368,22 +368,22 @@ multiclass VPatWidenBinarySDNode_VV_VX_WV_WX<SDNode op, PatFrags extop, string i
     def : Pat<(op (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs2))),
                   (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
               (!cast<Instruction>(instruction_name#"_VV_"#vti.Vti.LMul.MX)
-                 vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1, 
+                 vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
                  vti.Vti.AVL, vti.Vti.Log2SEW)>;
     def : Pat<(op (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs2))),
                   (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))),
               (!cast<Instruction>(instruction_name#"_VX_"#vti.Vti.LMul.MX)
-                 vti.Vti.RegClass:$rs2, GPR:$rs1, 
+                 vti.Vti.RegClass:$rs2, GPR:$rs1,
                  vti.Vti.AVL, vti.Vti.Log2SEW)>;
     def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
                   (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
               (!cast<Instruction>(instruction_name#"_WV_"#vti.Vti.LMul.MX)
-                 vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1, 
+                 vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
                  vti.Vti.AVL, vti.Vti.Log2SEW)>;
     def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
                   (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))),
               (!cast<Instruction>(instruction_name#"_WX_"#vti.Vti.LMul.MX)
-                 vti.Wti.RegClass:$rs2, GPR:$rs1, 
+                 vti.Wti.RegClass:$rs2, GPR:$rs1,
                  vti.Vti.AVL, vti.Vti.Log2SEW)>;
   }
 }
@@ -418,12 +418,12 @@ multiclass VPatWidenBinaryFPSDNode_VV_VF<SDNode op, string instruction_name> {
     def : Pat<(op (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs2))),
                   (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
               (!cast<Instruction>(instruction_name#"_VV_"#vti.Vti.LMul.MX)
-                 vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1, 
+                 vti.Vti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
                  vti.Vti.AVL, vti.Vti.Log2SEW)>;
     def : Pat<(op (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs2))),
                   (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector (SplatPat vti.Vti.ScalarRegClass:$rs1))))),
               (!cast<Instruction>(instruction_name#"_V"#vti.Vti.ScalarSuffix#"_"#vti.Vti.LMul.MX)
-                 vti.Vti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1, 
+                 vti.Vti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1,
                  vti.Vti.AVL, vti.Vti.Log2SEW)>;
   }
 }
@@ -433,12 +433,12 @@ multiclass VPatWidenBinaryFPSDNode_WV_WF<SDNode op, string instruction_name> {
     def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
                   (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
               (!cast<Instruction>(instruction_name#"_WV_"#vti.Vti.LMul.MX)
-                 vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1, 
+                 vti.Wti.RegClass:$rs2, vti.Vti.RegClass:$rs1,
                  vti.Vti.AVL, vti.Vti.Log2SEW)>;
     def : Pat<(op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
                   (vti.Wti.Vector (fpext_oneuse (vti.Vti.Vector (SplatPat vti.Vti.ScalarRegClass:$rs1))))),
               (!cast<Instruction>(instruction_name#"_W"#vti.Vti.ScalarSuffix#"_"#vti.Vti.LMul.MX)
-                 vti.Wti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1, 
+                 vti.Wti.RegClass:$rs2, vti.Vti.ScalarRegClass:$rs1,
                  vti.Vti.AVL, vti.Vti.Log2SEW)>;
   }
 }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 28cb8fc413793..0ac959d79a024 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -616,7 +616,7 @@ multiclass VPatReductionVL<SDNode vop, string instruction_name, bit is_float> {
 multiclass VPatBinarySDNodeExt_V_WV<SDNode op, PatFrags extop, string instruction_name> {
   foreach vti = AllWidenableIntVectors in {
     def : Pat<
-      (vti.Vti.Vector 
+      (vti.Vti.Vector
         (riscv_trunc_vector_vl
           (op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
               (vti.Wti.Vector (extop (vti.Vti.Vector vti.Vti.RegClass:$rs1)))),
@@ -631,7 +631,7 @@ multiclass VPatBinarySDNodeExt_V_WV<SDNode op, PatFrags extop, string instructio
 multiclass VPatBinarySDNodeExt_V_WX<SDNode op, PatFrags extop, string instruction_name> {
   foreach vti = AllWidenableIntVectors in {
     def : Pat<
-      (vti.Vti.Vector 
+      (vti.Vti.Vector
         (riscv_trunc_vector_vl
           (op (vti.Wti.Vector vti.Wti.RegClass:$rs2),
               (vti.Wti.Vector (extop (vti.Vti.Vector (SplatPat GPR:$rs1))))),

From b574048239bc6fbd9dd356fbaa8bd475fa4b64e6 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Mon, 24 Jan 2022 11:03:12 +0800
Subject: [PATCH 319/946] [NFC] [Coroutines] Rename tests in coro-align

This is required by ychen. See https://reviews.llvm.org/D117542
---
 .../{coro-align-03.ll => coro-align16.ll}     |  0
 .../Transforms/Coroutines/coro-align32.ll     | 60 +++++++++++++++++++
 .../{coro-align-05.ll => coro-align64-02.ll}  |  0
 .../{coro-align-04.ll => coro-align64.ll}     |  0
 .../{coro-align-02.ll => coro-align8-02.ll}   |  0
 .../{coro-align-01.ll => coro-align8.ll}      |  0
 6 files changed, 60 insertions(+)
 rename llvm/test/Transforms/Coroutines/{coro-align-03.ll => coro-align16.ll} (100%)
 create mode 100644 llvm/test/Transforms/Coroutines/coro-align32.ll
 rename llvm/test/Transforms/Coroutines/{coro-align-05.ll => coro-align64-02.ll} (100%)
 rename llvm/test/Transforms/Coroutines/{coro-align-04.ll => coro-align64.ll} (100%)
 rename llvm/test/Transforms/Coroutines/{coro-align-02.ll => coro-align8-02.ll} (100%)
 rename llvm/test/Transforms/Coroutines/{coro-align-01.ll => coro-align8.ll} (100%)

diff --git a/llvm/test/Transforms/Coroutines/coro-align-03.ll b/llvm/test/Transforms/Coroutines/coro-align16.ll
similarity index 100%
rename from llvm/test/Transforms/Coroutines/coro-align-03.ll
rename to llvm/test/Transforms/Coroutines/coro-align16.ll
diff --git a/llvm/test/Transforms/Coroutines/coro-align32.ll b/llvm/test/Transforms/Coroutines/coro-align32.ll
new file mode 100644
index 0000000000000..8cd01f42ed15c
--- /dev/null
+++ b/llvm/test/Transforms/Coroutines/coro-align32.ll
@@ -0,0 +1,60 @@
+; Tests that the coro.align intrinsic could be lowered to correct alignment
+; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s
+
+define i8* @f() "coroutine.presplit"="1" {
+entry:
+  %x = alloca i64, align 16
+  %y = alloca i32, align 32
+  %z = alloca i32, align 16
+  %alpha = alloca i1, align 8
+  %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
+  %size = call i32 @llvm.coro.size.i32()
+  %align = call i32 @llvm.coro.align.i32()
+  %alloc = call i8* @aligned_alloc(i32 %align, i32 %size)
+  %hdl = call i8* @llvm.coro.begin(token %id, i8* %alloc)
+  %sp1 = call i8 @llvm.coro.suspend(token none, i1 false)
+  switch i8 %sp1, label %suspend [i8 0, label %resume
+                                  i8 1, label %cleanup]
+resume:
+  %x.alias = bitcast i64* %x to i32*
+  call void @capture_call(i32* %x.alias)
+  %y.alias = bitcast i32* %y to i32*
+  call void @capture_call(i32* %y.alias)
+  %z.alias = bitcast i32* %z to i32*
+  call void @capture_call(i32* %z.alias)
+  %alpha.alias = bitcast i1* %alpha to i32*
+  call void @capture_call(i32* %alpha.alias)
+  br label %cleanup
+
+cleanup:
+  %mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
+  call void @free(i8* %mem)
+  br label %suspend
+
+suspend:
+  call i1 @llvm.coro.end(i8* %hdl, i1 0)
+  ret i8* %hdl
+}
+
+; %x needs to go to the frame since it's escaped; %y will stay as local since it doesn't escape.
+; CHECK:        %f.Frame = type { void (%f.Frame*)*, void (%f.Frame*)*, i64, i1, i1, [6 x i8], i32, [12 x i8], i32 }
+; CHECK-LABEL:  define i8* @f()
+; CHECK:          %[[ALLOC:.+]] = call i8* @aligned_alloc(i32 32, i32 56)
+; CHECK-NEXT:     call noalias nonnull i8* @llvm.coro.begin(token %id, i8* %[[ALLOC]])
+
+declare i8* @llvm.coro.free(token, i8*)
+declare i32 @llvm.coro.size.i32()
+declare i32 @llvm.coro.align.i32()
+declare i8  @llvm.coro.suspend(token, i1)
+declare void @llvm.coro.resume(i8*)
+declare void @llvm.coro.destroy(i8*)
+
+declare token @llvm.coro.id(i32, i8*, i8*, i8*)
+declare i1 @llvm.coro.alloc(token)
+declare i8* @llvm.coro.begin(token, i8*)
+declare i1 @llvm.coro.end(i8*, i1)
+
+declare void @capture_call(i32*)
+declare void @nocapture_call(i32* nocapture)
+declare noalias i8* @aligned_alloc(i32, i32)
+declare void @free(i8*)
diff --git a/llvm/test/Transforms/Coroutines/coro-align-05.ll b/llvm/test/Transforms/Coroutines/coro-align64-02.ll
similarity index 100%
rename from llvm/test/Transforms/Coroutines/coro-align-05.ll
rename to llvm/test/Transforms/Coroutines/coro-align64-02.ll
diff --git a/llvm/test/Transforms/Coroutines/coro-align-04.ll b/llvm/test/Transforms/Coroutines/coro-align64.ll
similarity index 100%
rename from llvm/test/Transforms/Coroutines/coro-align-04.ll
rename to llvm/test/Transforms/Coroutines/coro-align64.ll
diff --git a/llvm/test/Transforms/Coroutines/coro-align-02.ll b/llvm/test/Transforms/Coroutines/coro-align8-02.ll
similarity index 100%
rename from llvm/test/Transforms/Coroutines/coro-align-02.ll
rename to llvm/test/Transforms/Coroutines/coro-align8-02.ll
diff --git a/llvm/test/Transforms/Coroutines/coro-align-01.ll b/llvm/test/Transforms/Coroutines/coro-align8.ll
similarity index 100%
rename from llvm/test/Transforms/Coroutines/coro-align-01.ll
rename to llvm/test/Transforms/Coroutines/coro-align8.ll

From 943aa1bfacaa143ef98caa360bc98a648703ce2e Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 23 Jan 2022 20:32:52 -0800
Subject: [PATCH 320/946] Add modernize-use-default-member-init.UseAssignment
 to .clang-tidy

---
 .clang-tidy | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.clang-tidy b/.clang-tidy
index 1d4438dbfda0c..879c3661b302d 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -18,3 +18,5 @@ CheckOptions:
     value:           1
   - key:             readability-redundant-member-init.IgnoreBaseInCopyConstructors
     value:           1
+  - key:             modernize-use-default-member-init.UseAssignment
+    value:           1

From f63a9cd99db79e0d79f03169a9fa8a1baad54f1f Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 23 Jan 2022 20:32:54 -0800
Subject: [PATCH 321/946] [Vectorize] Remove unused variables (NFC)

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 475ab7e1f495e..8bfd3aa525d55 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8727,7 +8727,6 @@ class HorizontalReduction {
 
   static RecurKind getRdxKind(Instruction *I) {
     assert(I && "Expected instruction for reduction matching");
-    TargetTransformInfo::ReductionFlags RdxFlags;
     if (match(I, m_Add(m_Value(), m_Value())))
       return RecurKind::Add;
     if (match(I, m_Mul(m_Value(), m_Value())))
@@ -8801,7 +8800,6 @@ class HorizontalReduction {
           return RecurKind::None;
       }
 
-      TargetTransformInfo::ReductionFlags RdxFlags;
       switch (Pred) {
       default:
         return RecurKind::None;

From b752eb887f7ef160e000c83e4c720d9ecb2bf620 Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 23 Jan 2022 20:32:56 -0800
Subject: [PATCH 322/946] [Analysis] Use default member initialization (NFC)

Identified with modernize-use-default-member-init.
---
 .../llvm/Analysis/AliasAnalysisEvaluator.h    | 16 ++++-----
 .../llvm/Analysis/DependenceAnalysis.h        | 17 ++++------
 .../llvm/Analysis/DivergenceAnalysis.h        |  2 +-
 .../llvm/Analysis/IRSimilarityIdentifier.h    |  2 +-
 .../llvm/Analysis/InstructionSimplify.h       |  2 +-
 .../llvm/Analysis/LazyBlockFrequencyInfo.h    | 11 +++----
 .../llvm/Analysis/LazyBranchProbabilityInfo.h |  4 +--
 .../llvm/Analysis/LoopAccessAnalysis.h        | 33 +++++++++----------
 .../llvm/Analysis/LoopAnalysisManager.h       |  4 +--
 llvm/include/llvm/Analysis/MemorySSA.h        |  4 +--
 llvm/include/llvm/Analysis/PHITransAddr.h     |  4 +--
 .../llvm/Analysis/TargetTransformInfo.h       | 10 +++---
 llvm/lib/Analysis/CaptureTracking.cpp         | 12 +++----
 llvm/lib/Analysis/CostModel.cpp               |  6 ++--
 llvm/lib/Analysis/DivergenceAnalysis.cpp      |  2 +-
 llvm/lib/Analysis/IRSimilarityIdentifier.cpp  |  2 +-
 llvm/lib/Analysis/InlineCost.cpp              |  7 ++--
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      | 10 ++----
 llvm/lib/Analysis/LoopInfo.cpp                |  5 ++-
 llvm/lib/Analysis/MemorySSA.cpp               |  4 +--
 llvm/lib/Analysis/ReplayInlineAdvisor.cpp     |  3 +-
 21 files changed, 73 insertions(+), 87 deletions(-)

diff --git a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
index 972eceaa3ba92..043b1b7ca2dce 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
@@ -31,17 +31,15 @@ namespace llvm {
 class AAResults;
 
 class AAEvaluator : public PassInfoMixin<AAEvaluator> {
-  int64_t FunctionCount;
-  int64_t NoAliasCount, MayAliasCount, PartialAliasCount, MustAliasCount;
-  int64_t NoModRefCount, ModCount, RefCount, ModRefCount;
-  int64_t MustCount, MustRefCount, MustModCount, MustModRefCount;
+  int64_t FunctionCount = 0;
+  int64_t NoAliasCount = 0, MayAliasCount = 0, PartialAliasCount = 0;
+  int64_t MustAliasCount = 0;
+  int64_t NoModRefCount = 0, ModCount = 0, RefCount = 0, ModRefCount = 0;
+  int64_t MustCount = 0, MustRefCount = 0, MustModCount = 0;
+  int64_t MustModRefCount = 0;
 
 public:
-  AAEvaluator()
-      : FunctionCount(), NoAliasCount(), MayAliasCount(), PartialAliasCount(),
-        MustAliasCount(), NoModRefCount(), ModCount(), RefCount(),
-        ModRefCount(), MustCount(), MustRefCount(), MustModCount(),
-        MustModRefCount() {}
+  AAEvaluator() = default;
   AAEvaluator(AAEvaluator &&Arg)
       : FunctionCount(Arg.FunctionCount), NoAliasCount(Arg.NoAliasCount),
         MayAliasCount(Arg.MayAliasCount),
diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index 305c9b1d88f22..8c852e85b04a9 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -74,12 +74,8 @@ namespace llvm {
     Dependence &operator=(Dependence &&) = default;
 
   public:
-    Dependence(Instruction *Source,
-               Instruction *Destination) :
-      Src(Source),
-      Dst(Destination),
-      NextPredecessor(nullptr),
-      NextSuccessor(nullptr) {}
+    Dependence(Instruction *Source, Instruction *Destination)
+        : Src(Source), Dst(Destination) {}
     virtual ~Dependence() {}
 
     /// Dependence::DVEntry - Each level in the distance/direction vector
@@ -99,9 +95,10 @@ namespace llvm {
       bool PeelFirst : 1; // Peeling the first iteration will break dependence.
       bool PeelLast  : 1; // Peeling the last iteration will break the dependence.
       bool Splitable : 1; // Splitting the loop will break dependence.
-      const SCEV *Distance; // NULL implies no distance available.
-      DVEntry() : Direction(ALL), Scalar(true), PeelFirst(false),
-                  PeelLast(false), Splitable(false), Distance(nullptr) { }
+      const SCEV *Distance = nullptr; // NULL implies no distance available.
+      DVEntry()
+          : Direction(ALL), Scalar(true), PeelFirst(false), PeelLast(false),
+            Splitable(false) {}
     };
 
     /// getSrc - Returns the source instruction for this dependence.
@@ -200,7 +197,7 @@ namespace llvm {
 
   private:
     Instruction *Src, *Dst;
-    const Dependence *NextPredecessor, *NextSuccessor;
+    const Dependence *NextPredecessor = nullptr, *NextSuccessor = nullptr;
     friend class DependenceInfo;
   };
 
diff --git a/llvm/include/llvm/Analysis/DivergenceAnalysis.h b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
index 7e526b2fad84c..c52b42ae8dc22 100644
--- a/llvm/include/llvm/Analysis/DivergenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DivergenceAnalysis.h
@@ -146,7 +146,7 @@ class DivergenceInfo {
   // analysis can run indefinitely. We set ContainsIrreducible and no
   // analysis is actually performed on the function. All values in
   // this function are conservatively reported as divergent instead.
-  bool ContainsIrreducible;
+  bool ContainsIrreducible = false;
   std::unique_ptr<SyncDependenceAnalysis> SDA;
   std::unique_ptr<DivergenceAnalysisImpl> DA;
 
diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 51c5c620230ba..2f8ae205657d8 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -121,7 +121,7 @@ struct IRInstructionData
   /// and is used when checking when two instructions are considered similar.
   /// If either instruction is not legal, the instructions are automatically not
   /// considered similar.
-  bool Legal;
+  bool Legal = false;
 
   /// This is only relevant if we are wrapping a CmpInst where we needed to
   /// change the predicate of a compare instruction from a greater than form
diff --git a/llvm/include/llvm/Analysis/InstructionSimplify.h b/llvm/include/llvm/Analysis/InstructionSimplify.h
index 29973fe95ef79..8b49c115f1019 100644
--- a/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -63,7 +63,7 @@ class Value;
 /// results if the users specified it is safe to use.
 struct InstrInfoQuery {
   InstrInfoQuery(bool UMD) : UseInstrInfo(UMD) {}
-  InstrInfoQuery() : UseInstrInfo(true) {}
+  InstrInfoQuery() = default;
   bool UseInstrInfo = true;
 
   MDNode *getMetadata(const Instruction *I, unsigned KindID) const {
diff --git a/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h b/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h
index ab6d6ce9ec5ae..a6d8b76b12aea 100644
--- a/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h
+++ b/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h
@@ -33,8 +33,7 @@ template <typename FunctionT, typename BranchProbabilityInfoPassT,
           typename LoopInfoT, typename BlockFrequencyInfoT>
 class LazyBlockFrequencyInfo {
 public:
-  LazyBlockFrequencyInfo()
-      : Calculated(false), F(nullptr), BPIPass(nullptr), LI(nullptr) {}
+  LazyBlockFrequencyInfo() = default;
 
   /// Set up the per-function input.
   void setAnalysis(const FunctionT *F, BranchProbabilityInfoPassT *BPIPass,
@@ -67,10 +66,10 @@ class LazyBlockFrequencyInfo {
 
 private:
   BlockFrequencyInfoT BFI;
-  bool Calculated;
-  const FunctionT *F;
-  BranchProbabilityInfoPassT *BPIPass;
-  const LoopInfoT *LI;
+  bool Calculated = false;
+  const FunctionT *F = nullptr;
+  BranchProbabilityInfoPassT *BPIPass = nullptr;
+  const LoopInfoT *LI = nullptr;
 };
 
 /// This is an alternative analysis pass to
diff --git a/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h
index 3c632f02905a7..bad7423616b45 100644
--- a/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h
+++ b/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h
@@ -57,7 +57,7 @@ class LazyBranchProbabilityInfoPass : public FunctionPass {
   public:
     LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI,
                               const TargetLibraryInfo *TLI)
-        : Calculated(false), F(F), LI(LI), TLI(TLI) {}
+        : F(F), LI(LI), TLI(TLI) {}
 
     /// Retrieve the BPI with the branch probabilities computed.
     BranchProbabilityInfo &getCalculated() {
@@ -75,7 +75,7 @@ class LazyBranchProbabilityInfoPass : public FunctionPass {
 
   private:
     BranchProbabilityInfo BPI;
-    bool Calculated;
+    bool Calculated = false;
     const Function *F;
     const LoopInfo *LI;
     const TargetLibraryInfo *TLI;
diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index e55a90b0ea41f..c83a04991b040 100644
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -169,10 +169,7 @@ class MemoryDepChecker {
   };
 
   MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L)
-      : PSE(PSE), InnermostLoop(L), AccessIdx(0), MaxSafeDepDistBytes(0),
-        MaxSafeVectorWidthInBits(-1U),
-        FoundNonConstantDistanceDependence(false),
-        Status(VectorizationSafetyStatus::Safe), RecordDependences(true) {}
+      : PSE(PSE), InnermostLoop(L) {}
 
   /// Register the location (instructions are given increasing numbers)
   /// of a write access.
@@ -264,30 +261,30 @@ class MemoryDepChecker {
   SmallVector<Instruction *, 16> InstMap;
 
   /// The program order index to be used for the next instruction.
-  unsigned AccessIdx;
+  unsigned AccessIdx = 0;
 
   // We can access this many bytes in parallel safely.
-  uint64_t MaxSafeDepDistBytes;
+  uint64_t MaxSafeDepDistBytes = 0;
 
   /// Number of elements (from consecutive iterations) that are safe to
   /// operate on simultaneously, multiplied by the size of the element in bits.
   /// The size of the element is taken from the memory access that is most
   /// restrictive.
-  uint64_t MaxSafeVectorWidthInBits;
+  uint64_t MaxSafeVectorWidthInBits = -1U;
 
   /// If we see a non-constant dependence distance we can still try to
   /// vectorize this loop with runtime checks.
-  bool FoundNonConstantDistanceDependence;
+  bool FoundNonConstantDistanceDependence = false;
 
   /// Result of the dependence checks, indicating whether the checked
   /// dependences are safe for vectorization, require RT checks or are known to
   /// be unsafe.
-  VectorizationSafetyStatus Status;
+  VectorizationSafetyStatus Status = VectorizationSafetyStatus::Safe;
 
   //// True if Dependences reflects the dependences in the
   //// loop.  If false we exceeded MaxDependences and
   //// Dependences is invalid.
-  bool RecordDependences;
+  bool RecordDependences = true;
 
   /// Memory dependences collected during the analysis.  Only valid if
   /// RecordDependences is true.
@@ -395,7 +392,7 @@ class RuntimePointerChecking {
           AliasSetId(AliasSetId), Expr(Expr) {}
   };
 
-  RuntimePointerChecking(ScalarEvolution *SE) : Need(false), SE(SE) {}
+  RuntimePointerChecking(ScalarEvolution *SE) : SE(SE) {}
 
   /// Reset the state of the pointer runtime information.
   void reset() {
@@ -444,7 +441,7 @@ class RuntimePointerChecking {
                    unsigned Depth = 0) const;
 
   /// This flag indicates if we need to add the runtime check.
-  bool Need;
+  bool Need = false;
 
   /// Information about the pointers that may require checking.
   SmallVector<PointerInfo, 2> Pointers;
@@ -620,17 +617,17 @@ class LoopAccessInfo {
 
   Loop *TheLoop;
 
-  unsigned NumLoads;
-  unsigned NumStores;
+  unsigned NumLoads = 0;
+  unsigned NumStores = 0;
 
-  uint64_t MaxSafeDepDistBytes;
+  uint64_t MaxSafeDepDistBytes = -1;
 
   /// Cache the result of analyzeLoop.
-  bool CanVecMem;
-  bool HasConvergentOp;
+  bool CanVecMem = false;
+  bool HasConvergentOp = false;
 
   /// Indicator that there are non vectorizable stores to a uniform address.
-  bool HasDependenceInvolvingLoopInvariantAddress;
+  bool HasDependenceInvolvingLoopInvariantAddress = false;
 
   /// The diagnostics report generated for the analysis.  E.g. why we
   /// couldn't analyze the loop.
diff --git a/llvm/include/llvm/Analysis/LoopAnalysisManager.h b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
index bc8a1e74e447d..d07e6977fed1e 100644
--- a/llvm/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/llvm/include/llvm/Analysis/LoopAnalysisManager.h
@@ -87,7 +87,7 @@ typedef InnerAnalysisManagerProxy<LoopAnalysisManager, Function>
 template <> class LoopAnalysisManagerFunctionProxy::Result {
 public:
   explicit Result(LoopAnalysisManager &InnerAM, LoopInfo &LI)
-      : InnerAM(&InnerAM), LI(&LI), MSSAUsed(false) {}
+      : InnerAM(&InnerAM), LI(&LI) {}
   Result(Result &&Arg)
       : InnerAM(std::move(Arg.InnerAM)), LI(Arg.LI), MSSAUsed(Arg.MSSAUsed) {
     // We have to null out the analysis manager in the moved-from state
@@ -136,7 +136,7 @@ template <> class LoopAnalysisManagerFunctionProxy::Result {
 private:
   LoopAnalysisManager *InnerAM;
   LoopInfo *LI;
-  bool MSSAUsed;
+  bool MSSAUsed = false;
 };
 
 /// Provide a specialized run method for the \c LoopAnalysisManagerFunctionProxy
diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h
index 9198bd8412e68..b41f5771bacdb 100644
--- a/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/llvm/include/llvm/Analysis/MemorySSA.h
@@ -865,7 +865,7 @@ class MemorySSA {
   AccessList *getOrCreateAccessList(const BasicBlock *);
   DefsList *getOrCreateDefsList(const BasicBlock *);
   void renumberBlock(const BasicBlock *) const;
-  AliasAnalysis *AA;
+  AliasAnalysis *AA = nullptr;
   DominatorTree *DT;
   Function &F;
 
@@ -892,7 +892,7 @@ class MemorySSA {
   std::unique_ptr<ClobberWalkerBase<AliasAnalysis>> WalkerBase;
   std::unique_ptr<CachingWalker<AliasAnalysis>> Walker;
   std::unique_ptr<SkipSelfWalker<AliasAnalysis>> SkipWalker;
-  unsigned NextID;
+  unsigned NextID = 0;
 };
 
 /// Enables verification of MemorySSA.
diff --git a/llvm/include/llvm/Analysis/PHITransAddr.h b/llvm/include/llvm/Analysis/PHITransAddr.h
index 54a07f0534787..a23f8e61c3033 100644
--- a/llvm/include/llvm/Analysis/PHITransAddr.h
+++ b/llvm/include/llvm/Analysis/PHITransAddr.h
@@ -40,7 +40,7 @@ class PHITransAddr {
   const DataLayout &DL;
 
   /// TLI - The target library info if known, otherwise null.
-  const TargetLibraryInfo *TLI;
+  const TargetLibraryInfo *TLI = nullptr;
 
   /// A cache of \@llvm.assume calls used by SimplifyInstruction.
   AssumptionCache *AC;
@@ -50,7 +50,7 @@ class PHITransAddr {
 
 public:
   PHITransAddr(Value *addr, const DataLayout &DL, AssumptionCache *AC)
-      : Addr(addr), DL(DL), TLI(nullptr), AC(AC) {
+      : Addr(addr), DL(DL), AC(AC) {
     // If the address is an instruction, the whole thing is considered an input.
     if (Instruction *I = dyn_cast<Instruction>(Addr))
       InstInputs.push_back(I);
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 179f0e8e548bf..34ef9cc61c4ff 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1364,10 +1364,12 @@ class TargetTransformInfo {
 
   /// Flags describing the kind of vector reduction.
   struct ReductionFlags {
-    ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
-    bool IsMaxOp;  ///< If the op a min/max kind, true if it's a max operation.
-    bool IsSigned; ///< Whether the operation is a signed int reduction.
-    bool NoNaN;    ///< If op is an fp min/max, whether NaNs may be present.
+    ReductionFlags() = default;
+    bool IsMaxOp =
+        false; ///< If the op a min/max kind, true if it's a max operation.
+    bool IsSigned = false; ///< Whether the operation is a signed int reduction.
+    bool NoNaN =
+        false; ///< If op is an fp min/max, whether NaNs may be present.
   };
 
   /// \returns True if the target prefers reductions in loop.
diff --git a/llvm/lib/Analysis/CaptureTracking.cpp b/llvm/lib/Analysis/CaptureTracking.cpp
index 9b45f455be087..ba8462e659d53 100644
--- a/llvm/lib/Analysis/CaptureTracking.cpp
+++ b/llvm/lib/Analysis/CaptureTracking.cpp
@@ -75,7 +75,7 @@ bool CaptureTracker::isDereferenceableOrNull(Value *O, const DataLayout &DL) {
 namespace {
   struct SimpleCaptureTracker : public CaptureTracker {
     explicit SimpleCaptureTracker(bool ReturnCaptures)
-      : ReturnCaptures(ReturnCaptures), Captured(false) {}
+        : ReturnCaptures(ReturnCaptures) {}
 
     void tooManyUses() override { Captured = true; }
 
@@ -89,7 +89,7 @@ namespace {
 
     bool ReturnCaptures;
 
-    bool Captured;
+    bool Captured = false;
   };
 
   /// Only find pointer captures which happen before the given instruction. Uses
@@ -101,7 +101,7 @@ namespace {
     CapturesBefore(bool ReturnCaptures, const Instruction *I,
                    const DominatorTree *DT, bool IncludeI, const LoopInfo *LI)
         : BeforeHere(I), DT(DT), ReturnCaptures(ReturnCaptures),
-          IncludeI(IncludeI), Captured(false), LI(LI) {}
+          IncludeI(IncludeI), LI(LI) {}
 
     void tooManyUses() override { Captured = true; }
 
@@ -139,7 +139,7 @@ namespace {
     bool ReturnCaptures;
     bool IncludeI;
 
-    bool Captured;
+    bool Captured = false;
 
     const LoopInfo *LI;
   };
@@ -155,7 +155,7 @@ namespace {
   struct EarliestCaptures : public CaptureTracker {
 
     EarliestCaptures(bool ReturnCaptures, Function &F, const DominatorTree &DT)
-        : DT(DT), ReturnCaptures(ReturnCaptures), Captured(false), F(F) {}
+        : DT(DT), ReturnCaptures(ReturnCaptures), F(F) {}
 
     void tooManyUses() override {
       Captured = true;
@@ -199,7 +199,7 @@ namespace {
 
     bool ReturnCaptures;
 
-    bool Captured;
+    bool Captured = false;
 
     Function &F;
   };
diff --git a/llvm/lib/Analysis/CostModel.cpp b/llvm/lib/Analysis/CostModel.cpp
index f407ec0d017a8..326bacad01fe1 100644
--- a/llvm/lib/Analysis/CostModel.cpp
+++ b/llvm/lib/Analysis/CostModel.cpp
@@ -50,7 +50,7 @@ namespace {
 
   public:
     static char ID; // Class identification, replacement for typeinfo
-    CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) {
+    CostModelAnalysis() : FunctionPass(ID) {
       initializeCostModelAnalysisPass(
         *PassRegistry::getPassRegistry());
     }
@@ -69,9 +69,9 @@ namespace {
     void print(raw_ostream &OS, const Module*) const override;
 
     /// The function that we analyze.
-    Function *F;
+    Function *F = nullptr;
     /// Target information.
-    const TargetTransformInfo *TTI;
+    const TargetTransformInfo *TTI = nullptr;
   };
 }  // End of anonymous namespace
 
diff --git a/llvm/lib/Analysis/DivergenceAnalysis.cpp b/llvm/lib/Analysis/DivergenceAnalysis.cpp
index 62ac6f693fe30..39e80c2ad51c1 100644
--- a/llvm/lib/Analysis/DivergenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DivergenceAnalysis.cpp
@@ -348,7 +348,7 @@ DivergenceInfo::DivergenceInfo(Function &F, const DominatorTree &DT,
                                const PostDominatorTree &PDT, const LoopInfo &LI,
                                const TargetTransformInfo &TTI,
                                bool KnownReducible)
-    : F(F), ContainsIrreducible(false) {
+    : F(F) {
   if (!KnownReducible) {
     using RPOTraversal = ReversePostOrderTraversal<const Function *>;
     RPOTraversal FuncRPOT(&F);
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index ca1a2907e51cf..ea08e50bf7283 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -62,7 +62,7 @@ void IRInstructionData::initializeInstruction() {
 }
 
 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
-    : Inst(nullptr), Legal(false), IDL(&IDList) {}
+    : IDL(&IDList) {}
 
 void IRInstructionData::setBranchSuccessors(
     DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index 5dce896feb442..d5411d916c777 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -361,10 +361,10 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// Model the elimination of repeated loads that is expected to happen
   /// whenever we simplify away the stores that would otherwise cause them to be
   /// loads.
-  bool EnableLoadElimination;
+  bool EnableLoadElimination = true;
 
   /// Whether we allow inlining for recursive call.
-  bool AllowRecursiveCall;
+  bool AllowRecursiveCall = false;
 
   SmallPtrSet<Value *, 16> LoadAddrSet;
 
@@ -455,8 +455,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
                OptimizationRemarkEmitter *ORE = nullptr)
       : TTI(TTI), GetAssumptionCache(GetAssumptionCache), GetBFI(GetBFI),
         PSI(PSI), F(Callee), DL(F.getParent()->getDataLayout()), ORE(ORE),
-        CandidateCall(Call), EnableLoadElimination(true),
-        AllowRecursiveCall(false) {}
+        CandidateCall(Call) {}
 
   InlineResult analyze();
 
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index b8b1b5ad53c9e..2ab78d2b7ee23 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -519,8 +519,7 @@ class AccessAnalysis {
   AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
                  MemoryDepChecker::DepCandidates &DA,
                  PredicatedScalarEvolution &PSE)
-      : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
-        IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
+      : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA), PSE(PSE) {}
 
   /// Register a load  and whether it is only read from.
   void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
@@ -620,7 +619,7 @@ class AccessAnalysis {
   /// memcheck analysis without dependency checking
   /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
   /// cleared while this remains set if we have potentially dependent accesses.
-  bool IsRTCheckAnalysisNeeded;
+  bool IsRTCheckAnalysisNeeded = false;
 
   /// The SCEV predicate containing all the SCEV-related assumptions.
   PredicatedScalarEvolution &PSE;
@@ -2244,10 +2243,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
                                DominatorTree *DT, LoopInfo *LI)
     : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
       PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
-      DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
-      NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
-      HasConvergentOp(false),
-      HasDependenceInvolvingLoopInvariantAddress(false) {
+      DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L) {
   if (canAnalyzeLoop())
     analyzeLoop(AA, LI, TLI, DT);
 }
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index b35fb2a190f62..dd69587161272 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -695,11 +695,10 @@ class UnloopUpdater {
 
   // Flag the presence of an irreducible backedge whose destination is a block
   // directly contained by the original unloop.
-  bool FoundIB;
+  bool FoundIB = false;
 
 public:
-  UnloopUpdater(Loop *UL, LoopInfo *LInfo)
-      : Unloop(*UL), LI(LInfo), DFS(UL), FoundIB(false) {}
+  UnloopUpdater(Loop *UL, LoopInfo *LInfo) : Unloop(*UL), LI(LInfo), DFS(UL) {}
 
   void updateBlockParents();
 
diff --git a/llvm/lib/Analysis/MemorySSA.cpp b/llvm/lib/Analysis/MemorySSA.cpp
index ac20e20f0c0d0..57f431ec21f56 100644
--- a/llvm/lib/Analysis/MemorySSA.cpp
+++ b/llvm/lib/Analysis/MemorySSA.cpp
@@ -1265,8 +1265,8 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
 }
 
 MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
-    : AA(nullptr), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
-      SkipWalker(nullptr), NextID(0) {
+    : DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
+      SkipWalker(nullptr) {
   // Build MemorySSA using a batch alias analysis. This reuses the internal
   // state that AA collects during an alias()/getModRefInfo() call. This is
   // safe because there are no CFG changes while building MemorySSA and can
diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
index f83d8b0fd2301..294bc38c17ad0 100644
--- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
@@ -28,8 +28,7 @@ ReplayInlineAdvisor::ReplayInlineAdvisor(
     std::unique_ptr<InlineAdvisor> OriginalAdvisor,
     const ReplayInlinerSettings &ReplaySettings, bool EmitRemarks)
     : InlineAdvisor(M, FAM), OriginalAdvisor(std::move(OriginalAdvisor)),
-      HasReplayRemarks(false), ReplaySettings(ReplaySettings),
-      EmitRemarks(EmitRemarks) {
+      ReplaySettings(ReplaySettings), EmitRemarks(EmitRemarks) {
 
   auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(ReplaySettings.ReplayFile);
   std::error_code EC = BufferOrErr.getError();

From d3b26dea16108c427b19b5480c9edc76edf8f5b4 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Sun, 23 Jan 2022 20:45:25 -0800
Subject: [PATCH 323/946] Clang: Change the default DWARF version to 5

(except on platforms that already opt in to specific versions - SCE,
Android, and Darwin using DWARFv4 explicitly, for instance)
---
 clang/docs/ReleaseNotes.rst                 | 8 ++++++++
 clang/include/clang/Driver/ToolChain.h      | 2 +-
 clang/lib/Driver/ToolChains/Linux.h         | 1 +
 clang/test/CodeGen/debug-info-extern-call.c | 4 ++--
 clang/test/CodeGen/dwarf-version.c          | 8 ++++----
 clang/test/Driver/cl-options.c              | 2 +-
 clang/test/Driver/clang-g-opts.c            | 2 +-
 clang/test/Driver/ve-toolchain.c            | 2 +-
 clang/test/Driver/ve-toolchain.cpp          | 2 +-
 9 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2eec63901932e..4fe037741256f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -252,6 +252,14 @@ X86 Support in Clang
 
 - Support for ``AVX512-FP16`` instructions has been added.
 
+DWARF Support in Clang
+----------------------
+
+- The default DWARF version has increased from DWARFv4 to DWARFv5.  You can opt
+  back in to the old behavior with -gdwarf-4. Some platforms (Darwin, Android,
+  and SCE for instance) already opt out of this version bump as is suitable for
+  the platform
+
 Arm and AArch64 Support in Clang
 --------------------------------
 
diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index eb95806a2f75d..37011de6bd6d7 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -510,7 +510,7 @@ class ToolChain {
 
   // Return the DWARF version to emit, in the absence of arguments
   // to the contrary.
-  virtual unsigned GetDefaultDwarfVersion() const { return 4; }
+  virtual unsigned GetDefaultDwarfVersion() const { return 5; }
 
   // Some toolchains may have different restrictions on the DWARF version and
   // may need to adjust it. E.g. NVPTX may need to enforce DWARF2 even when host
diff --git a/clang/lib/Driver/ToolChains/Linux.h b/clang/lib/Driver/ToolChains/Linux.h
index a5ec33bd44f10..a5648d79d655f 100644
--- a/clang/lib/Driver/ToolChains/Linux.h
+++ b/clang/lib/Driver/ToolChains/Linux.h
@@ -40,6 +40,7 @@ class LLVM_LIBRARY_VISIBILITY Linux : public Generic_ELF {
   void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                            llvm::opt::ArgStringList &CC1Args) const override;
   RuntimeLibType GetDefaultRuntimeLibType() const override;
+  unsigned GetDefaultDwarfVersion() const override;
   CXXStdlibType GetDefaultCXXStdlibType() const override;
   bool
   IsAArch64OutlineAtomicsDefault(const llvm::opt::ArgList &Args) const override;
diff --git a/clang/test/CodeGen/debug-info-extern-call.c b/clang/test/CodeGen/debug-info-extern-call.c
index 7cf90550ac00a..fad52d0df0b3f 100644
--- a/clang/test/CodeGen/debug-info-extern-call.c
+++ b/clang/test/CodeGen/debug-info-extern-call.c
@@ -12,13 +12,13 @@
 // decls so that the dwarf generator can describe information needed for tail
 // call frame reconstrution.
 //
-// RUN: %clang -g -O2 -target x86_64-none-linux-gnu -ggdb -S -emit-llvm %s -o - \
+// RUN: %clang -gdwarf-4 -O2 -target x86_64-none-linux-gnu -ggdb -S -emit-llvm %s -o - \
 // RUN:   | FileCheck %s -check-prefix=DECLS-FOR-EXTERN
 //
 // Do not emit a subprogram for extern decls when entry values are disabled and
 // the tuning is not set to gdb.
 //
-// RUN: %clang -g -O2 -target x86_64-none-linux-gnu -gsce -S -emit-llvm %s -o - \
+// RUN: %clang -gdwarf-4 -O2 -target x86_64-none-linux-gnu -gsce -S -emit-llvm %s -o - \
 // RUN:   | FileCheck %s -check-prefix=NO-DECLS-FOR-EXTERN
 
 // DECLS-FOR-EXTERN-NOT: !DICompileUnit({{.*}}retainedTypes: !{{[0-9]+}}
diff --git a/clang/test/CodeGen/dwarf-version.c b/clang/test/CodeGen/dwarf-version.c
index 6d131c470d5b3..b329556ae0d9d 100644
--- a/clang/test/CodeGen/dwarf-version.c
+++ b/clang/test/CodeGen/dwarf-version.c
@@ -2,8 +2,8 @@
 // RUN: %clang -target x86_64-linux-gnu -gdwarf-3 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER3
 // RUN: %clang -target x86_64-linux-gnu -gdwarf-4 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4
 // RUN: %clang -target x86_64-linux-gnu -gdwarf-5 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER5
-// RUN: %clang -target x86_64-linux-gnu -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4
-// RUN: %clang -target x86_64-linux-gnu -gdwarf -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER4
+// RUN: %clang -target x86_64-linux-gnu -g -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER5
+// RUN: %clang -target x86_64-linux-gnu -gdwarf -S -emit-llvm -o - %s | FileCheck %s --check-prefix=VER5
 
 // The -isysroot is used as a hack to avoid LIT messing with the SDKROOT
 // environment variable which indirecty overrides the version in the target
@@ -28,10 +28,10 @@
 // RUN:     | FileCheck %s --check-prefixes=NODWARF,CODEVIEW
 //     Explicitly request DWARF.
 // RUN: %clang -target i686-pc-windows-msvc -gdwarf -S -emit-llvm -o - %s \
-// RUN:     | FileCheck %s --check-prefixes=VER4,NOCODEVIEW
+// RUN:     | FileCheck %s --check-prefixes=VER5,NOCODEVIEW
 //     Explicitly request both.
 // RUN: %clang -target i686-pc-windows-msvc -gdwarf -gcodeview -S -emit-llvm -o - %s \
-// RUN:     | FileCheck %s --check-prefixes=VER4,CODEVIEW
+// RUN:     | FileCheck %s --check-prefixes=VER5,CODEVIEW
 // RUN: %clang -target powerpc-ibm-aix-xcoff -g -S -emit-llvm -o - %s | \
 // RUN:   FileCheck %s --check-prefix=VER3
 // RUN: %clang -target powerpc-ibm-aix-xcoff -gdwarf-2 -S -emit-llvm -o - %s | \
diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c
index f39db87660125..733e733de738e 100644
--- a/clang/test/Driver/cl-options.c
+++ b/clang/test/Driver/cl-options.c
@@ -596,7 +596,7 @@
 // RUN: %clang_cl /Z7 -gdwarf /c -### -- %s 2>&1 | FileCheck -check-prefix=Z7_gdwarf %s
 // Z7_gdwarf: "-gcodeview"
 // Z7_gdwarf: "-debug-info-kind=constructor"
-// Z7_gdwarf: "-dwarf-version=4"
+// Z7_gdwarf: "-dwarf-version=
 
 // RUN: %clang_cl -fmsc-version=1800 -TP -### -- %s 2>&1 | FileCheck -check-prefix=CXX11 %s
 // CXX11: -std=c++11
diff --git a/clang/test/Driver/clang-g-opts.c b/clang/test/Driver/clang-g-opts.c
index bb129e75769c9..d982b1070cae1 100644
--- a/clang/test/Driver/clang-g-opts.c
+++ b/clang/test/Driver/clang-g-opts.c
@@ -32,7 +32,7 @@
 
 // CHECK-WITHOUT-G-NOT: -debug-info-kind
 // CHECK-WITH-G: "-debug-info-kind=constructor"
-// CHECK-WITH-G: "-dwarf-version=4"
+// CHECK-WITH-G: "-dwarf-version=5"
 // CHECK-WITH-G-DWARF2: "-dwarf-version=2"
 
 // CHECK-WITH-G-STANDALONE: "-debug-info-kind=standalone"
diff --git a/clang/test/Driver/ve-toolchain.c b/clang/test/Driver/ve-toolchain.c
index 8878bd8f83cc0..35af3c81c4c6f 100644
--- a/clang/test/Driver/ve-toolchain.c
+++ b/clang/test/Driver/ve-toolchain.c
@@ -6,7 +6,7 @@
 /// Checking dwarf-version
 
 // RUN: %clang -### -g -target ve %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
-// DWARF_VER: "-dwarf-version=4"
+// DWARF_VER: "-dwarf-version=5"
 
 ///-----------------------------------------------------------------------------
 /// Checking include-path
diff --git a/clang/test/Driver/ve-toolchain.cpp b/clang/test/Driver/ve-toolchain.cpp
index 7666cfbfe8b27..7447f34b70e0c 100644
--- a/clang/test/Driver/ve-toolchain.cpp
+++ b/clang/test/Driver/ve-toolchain.cpp
@@ -7,7 +7,7 @@
 
 // RUN: %clangxx -### -g -target ve-unknown-linux-gnu \
 // RUN:     %s 2>&1 | FileCheck -check-prefix=DWARF_VER %s
-// DWARF_VER: "-dwarf-version=4"
+// DWARF_VER: "-dwarf-version=5"
 
 ///-----------------------------------------------------------------------------
 /// Checking include-path

From 90abe181da7c61d982e4873c97fd12bc06fefe09 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Sun, 23 Jan 2022 21:10:16 -0800
Subject: [PATCH 324/946] Add missing function implementation from DWARF
 default change

Fix for d3b26dea16108c427b19b5480c9edc76edf8f5b4
---
 clang/lib/Driver/ToolChains/Linux.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp
index e413640abad35..af74b108e04ee 100644
--- a/clang/lib/Driver/ToolChains/Linux.cpp
+++ b/clang/lib/Driver/ToolChains/Linux.cpp
@@ -324,6 +324,12 @@ ToolChain::RuntimeLibType Linux::GetDefaultRuntimeLibType() const {
   return Generic_ELF::GetDefaultRuntimeLibType();
 }
 
+unsigned Linux::GetDefaultDwarfVersion() const {
+  if (getTriple().isAndroid())
+    return 4;
+  return ToolChain::GetDefaultDwarfVersion();
+}
+
 ToolChain::CXXStdlibType Linux::GetDefaultCXXStdlibType() const {
   if (getTriple().isAndroid())
     return ToolChain::CST_Libcxx;

From 68b70d17d8dea3fe9fa8e8f8bffd37bfe8125a65 Mon Sep 17 00:00:00 2001
From: Abinav Puthan Purayil <abinav.puthanpurayil@amd.com>
Date: Mon, 3 Jan 2022 15:45:52 +0530
Subject: [PATCH 325/946] [GlobalISel] Fold or of shifts with constant amount
 to funnel shift.

This change folds (or (shl x, C0), (lshr y, C1)) to funnel shift iff C0
and C1 are constants where C0 + C1 is the bit-width of the shift
instructions.

Differential Revision: https://reviews.llvm.org/D116529
---
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  47 +++---
 llvm/test/CodeGen/AArch64/arm64-rev.ll        |   7 +-
 .../CodeGen/AMDGPU/GlobalISel/combine-fsh.mir |  57 ++++++-
 .../CodeGen/AMDGPU/GlobalISel/combine-rot.mir |  54 ++++++-
 .../GlobalISel/llvm.amdgcn.intersect_ray.ll   |  24 ++-
 .../test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll | 146 ++++++++----------
 .../test/CodeGen/AMDGPU/GlobalISel/usubsat.ll | 146 ++++++++----------
 7 files changed, 268 insertions(+), 213 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index ed1aa9d80840c..4b5a19155c672 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -3878,39 +3878,48 @@ bool CombinerHelper::matchOrShiftToFunnelShift(MachineInstr &MI,
   LLT Ty = MRI.getType(Dst);
   unsigned BitWidth = Ty.getScalarSizeInBits();
 
-  Register ShlSrc, ShlAmt, LShrSrc, LShrAmt;
+  Register ShlSrc, ShlAmt, LShrSrc, LShrAmt, Amt;
   unsigned FshOpc = 0;
 
-  // Match (or (shl x, amt), (lshr y, sub(bw, amt))).
-  if (mi_match(
-          Dst, MRI,
-          // m_GOr() handles the commuted version as well.
-          m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
-                m_GLShr(m_Reg(LShrSrc), m_GSub(m_SpecificICstOrSplat(BitWidth),
-                                               m_Reg(LShrAmt)))))) {
+  // Match (or (shl ...), (lshr ...)).
+  if (!mi_match(Dst, MRI,
+                // m_GOr() handles the commuted version as well.
+                m_GOr(m_GShl(m_Reg(ShlSrc), m_Reg(ShlAmt)),
+                      m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)))))
+    return false;
+
+  // Given constants C0 and C1 such that C0 + C1 is bit-width:
+  // (or (shl x, C0), (lshr y, C1)) -> (fshl x, y, C0) or (fshr x, y, C1)
+  // TODO: Match constant splat.
+  int64_t CstShlAmt, CstLShrAmt;
+  if (mi_match(ShlAmt, MRI, m_ICst(CstShlAmt)) &&
+      mi_match(LShrAmt, MRI, m_ICst(CstLShrAmt)) &&
+      CstShlAmt + CstLShrAmt == BitWidth) {
+    FshOpc = TargetOpcode::G_FSHR;
+    Amt = LShrAmt;
+
+  } else if (mi_match(LShrAmt, MRI,
+                      m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
+             ShlAmt == Amt) {
+    // (or (shl x, amt), (lshr y, (sub bw, amt))) -> (fshl x, y, amt)
     FshOpc = TargetOpcode::G_FSHL;
 
-    // Match (or (shl x, sub(bw, amt)), (lshr y, amt)).
-  } else if (mi_match(Dst, MRI,
-                      m_GOr(m_GLShr(m_Reg(LShrSrc), m_Reg(LShrAmt)),
-                            m_GShl(m_Reg(ShlSrc),
-                                   m_GSub(m_SpecificICstOrSplat(BitWidth),
-                                          m_Reg(ShlAmt)))))) {
+  } else if (mi_match(ShlAmt, MRI,
+                      m_GSub(m_SpecificICstOrSplat(BitWidth), m_Reg(Amt))) &&
+             LShrAmt == Amt) {
+    // (or (shl x, (sub bw, amt)), (lshr y, amt)) -> (fshr x, y, amt)
     FshOpc = TargetOpcode::G_FSHR;
 
   } else {
     return false;
   }
 
-  if (ShlAmt != LShrAmt)
-    return false;
-
-  LLT AmtTy = MRI.getType(ShlAmt);
+  LLT AmtTy = MRI.getType(Amt);
   if (!isLegalOrBeforeLegalizer({FshOpc, {Ty, AmtTy}}))
     return false;
 
   MatchInfo = [=](MachineIRBuilder &B) {
-    B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, ShlAmt});
+    B.buildInstr(FshOpc, {Dst}, {ShlSrc, LShrSrc, Amt});
   };
   return true;
 }
diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll
index df481b8e39f45..aa223eefbbfaf 100644
--- a/llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -216,8 +216,7 @@ define i64 @test_rev16_x(i64 %a) nounwind {
 ; GISEL-LABEL: test_rev16_x:
 ; GISEL:       // %bb.0: // %entry
 ; GISEL-NEXT:    rev x8, x0
-; GISEL-NEXT:    lsl x9, x8, #48
-; GISEL-NEXT:    orr x0, x9, x8, lsr #16
+; GISEL-NEXT:    ror x0, x8, #16
 ; GISEL-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
@@ -235,9 +234,7 @@ define i64 @test_rev32_x(i64 %a) nounwind {
 ;
 ; GISEL-LABEL: test_rev32_x:
 ; GISEL:       // %bb.0: // %entry
-; GISEL-NEXT:    rev x8, x0
-; GISEL-NEXT:    lsl x9, x8, #32
-; GISEL-NEXT:    orr x0, x9, x8, lsr #32
+; GISEL-NEXT:    rev32 x0, x0
 ; GISEL-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir
index 0e2816cbc9393..ad93f1bf4d39e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir
@@ -107,13 +107,66 @@ body: |
 ...
 
 ---
-name: fshl_i32_bad_const
+name: fsh_i32_const
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: fsh_i32_const
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: %amt1:_(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: %or:_(s32) = G_FSHR %a, %b, %amt1(s32)
+    ; CHECK-NEXT: $vgpr2 = COPY %or(s32)
+    %a:_(s32) = COPY $vgpr0
+    %b:_(s32) = COPY $vgpr1
+    %amt0:_(s32) = G_CONSTANT i32 20
+    %amt1:_(s32) = G_CONSTANT i32 12
+    %shl:_(s32) = G_SHL %a, %amt0
+    %lshr:_(s32) = G_LSHR %b, %amt1
+    %or:_(s32) = G_OR %shl, %lshr
+    $vgpr2 = COPY %or
+...
+
+---
+name: fsh_i32_bad_const
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: fsh_i32_bad_const
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %b:_(s32) = COPY $vgpr1
+    ; CHECK-NEXT: %amt0:_(s32) = G_CONSTANT i32 20
+    ; CHECK-NEXT: %amt1:_(s32) = G_CONSTANT i32 11
+    ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt0(s32)
+    ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %b, %amt1(s32)
+    ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr
+    ; CHECK-NEXT: $vgpr2 = COPY %or(s32)
+    %a:_(s32) = COPY $vgpr0
+    %b:_(s32) = COPY $vgpr1
+    %amt0:_(s32) = G_CONSTANT i32 20
+    %amt1:_(s32) = G_CONSTANT i32 11
+    %shl:_(s32) = G_SHL %a, %amt0
+    %lshr:_(s32) = G_LSHR %b, %amt1
+    %or:_(s32) = G_OR %shl, %lshr
+    $vgpr2 = COPY %or
+...
+
+---
+name: fshl_i32_bad_bw
 tracksRegLiveness: true
 body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
 
-    ; CHECK-LABEL: name: fshl_i32_bad_const
+    ; CHECK-LABEL: name: fshl_i32_bad_bw
     ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir
index 60791842443d8..2649ee4bdf72a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir
@@ -99,13 +99,63 @@ body: |
 ...
 
 ---
-name: rotl_i32_bad_const
+name: rot_i32_const
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: rot_i32_const
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %amt1:_(s32) = G_CONSTANT i32 12
+    ; CHECK-NEXT: %or:_(s32) = G_ROTR %a, %amt1(s32)
+    ; CHECK-NEXT: $vgpr1 = COPY %or(s32)
+    %a:_(s32) = COPY $vgpr0
+    %amt0:_(s32) = G_CONSTANT i32 20
+    %amt1:_(s32) = G_CONSTANT i32 12
+    %shl:_(s32) = G_SHL %a, %amt0
+    %lshr:_(s32) = G_LSHR %a, %amt1
+    %or:_(s32) = G_OR %shl, %lshr
+    $vgpr1 = COPY %or
+...
+
+---
+name: rot_i32_bad_const
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; CHECK-LABEL: name: rot_i32_bad_const
+    ; CHECK: liveins: $vgpr0, $vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %amt0:_(s32) = G_CONSTANT i32 20
+    ; CHECK-NEXT: %amt1:_(s32) = G_CONSTANT i32 11
+    ; CHECK-NEXT: %shl:_(s32) = G_SHL %a, %amt0(s32)
+    ; CHECK-NEXT: %lshr:_(s32) = G_LSHR %a, %amt1(s32)
+    ; CHECK-NEXT: %or:_(s32) = G_OR %shl, %lshr
+    ; CHECK-NEXT: $vgpr1 = COPY %or(s32)
+    %a:_(s32) = COPY $vgpr0
+    %amt0:_(s32) = G_CONSTANT i32 20
+    %amt1:_(s32) = G_CONSTANT i32 11
+    %shl:_(s32) = G_SHL %a, %amt0
+    %lshr:_(s32) = G_LSHR %a, %amt1
+    %or:_(s32) = G_OR %shl, %lshr
+    $vgpr1 = COPY %or
+...
+
+
+---
+name: rotl_i32_bad_bw
 tracksRegLiveness: true
 body: |
   bb.0:
     liveins: $vgpr0, $vgpr1, $vgpr2
 
-    ; CHECK-LABEL: name: rotl_i32_bad_const
+    ; CHECK-LABEL: name: rotl_i32_bad_bw
     ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %a:_(s32) = COPY $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
index 0236ebc947267..15755f4455cb5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.intersect_ray.ll
@@ -52,11 +52,10 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16(i32 %node_ptr, float %
 ; GCN-NEXT:    s_mov_b32 s4, 0xffff
 ; GCN-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
 ; GCN-NEXT:    v_and_b32_e32 v10, s4, v7
-; GCN-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
 ; GCN-NEXT:    v_and_b32_e32 v8, s4, v8
 ; GCN-NEXT:    v_lshlrev_b32_e32 v9, 16, v9
 ; GCN-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
-; GCN-NEXT:    v_lshl_or_b32 v7, v8, 16, v7
+; GCN-NEXT:    v_alignbit_b32 v7, v8, v7, 16
 ; GCN-NEXT:    v_and_or_b32 v5, v5, s4, v9
 ; GCN-NEXT:    v_and_or_b32 v6, v6, s4, v10
 ; GCN-NEXT:    image_bvh_intersect_ray v[0:3], v[0:7], s[0:3] a16
@@ -105,11 +104,10 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 %node_ptr, float
 ; GCN-NEXT:    s_mov_b32 s4, 0xffff
 ; GCN-NEXT:    v_lshrrev_b32_e32 v10, 16, v6
 ; GCN-NEXT:    v_and_b32_e32 v11, s4, v8
-; GCN-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
 ; GCN-NEXT:    v_and_b32_e32 v9, s4, v9
 ; GCN-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
 ; GCN-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
-; GCN-NEXT:    v_lshl_or_b32 v8, v9, 16, v8
+; GCN-NEXT:    v_alignbit_b32 v8, v9, v8, 16
 ; GCN-NEXT:    v_and_or_b32 v6, v6, s4, v10
 ; GCN-NEXT:    v_and_or_b32 v7, v7, s4, v11
 ; GCN-NEXT:    image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3] a16
@@ -210,16 +208,15 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
 ; GFX1030-NEXT:    v_lshrrev_b32_e32 v0, 16, v5
 ; GFX1030-NEXT:    v_and_b32_e32 v1, s0, v7
 ; GFX1030-NEXT:    v_mov_b32_e32 v15, v2
+; GFX1030-NEXT:    v_and_b32_e32 v2, s0, v8
 ; GFX1030-NEXT:    v_mov_b32_e32 v16, v3
-; GFX1030-NEXT:    v_lshrrev_b32_e32 v2, 16, v7
 ; GFX1030-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX1030-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT:    v_and_b32_e32 v3, s0, v8
 ; GFX1030-NEXT:    v_mov_b32_e32 v17, v4
+; GFX1030-NEXT:    v_alignbit_b32 v20, v2, v7, 16
 ; GFX1030-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1030-NEXT:    v_and_or_b32 v18, v5, s0, v0
 ; GFX1030-NEXT:    v_and_or_b32 v19, v6, s0, v1
-; GFX1030-NEXT:    v_lshl_or_b32 v20, v3, 16, v2
 ; GFX1030-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1030-NEXT:    v_readfirstlane_b32 s4, v9
 ; GFX1030-NEXT:    v_readfirstlane_b32 s5, v10
@@ -252,12 +249,11 @@ define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16_vgpr_descr(i32 %node_p
 ; GFX1013-NEXT:    s_mov_b32 s0, 0xffff
 ; GFX1013-NEXT:    v_lshrrev_b32_e32 v13, 16, v5
 ; GFX1013-NEXT:    v_and_b32_e32 v14, s0, v7
-; GFX1013-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
 ; GFX1013-NEXT:    v_and_b32_e32 v8, s0, v8
 ; GFX1013-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1013-NEXT:    v_lshlrev_b32_e32 v13, 16, v13
 ; GFX1013-NEXT:    v_lshlrev_b32_e32 v14, 16, v14
-; GFX1013-NEXT:    v_lshl_or_b32 v7, v8, 16, v7
+; GFX1013-NEXT:    v_alignbit_b32 v7, v8, v7, 16
 ; GFX1013-NEXT:    v_and_or_b32 v5, v5, s0, v13
 ; GFX1013-NEXT:    v_and_or_b32 v6, v6, s0, v14
 ; GFX1013-NEXT:  .LBB7_1: ; =>This Inner Loop Header: Depth=1
@@ -381,16 +377,15 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
 ; GFX1030-NEXT:    v_lshrrev_b32_e32 v0, 16, v6
 ; GFX1030-NEXT:    v_and_b32_e32 v1, s0, v8
 ; GFX1030-NEXT:    v_mov_b32_e32 v16, v2
+; GFX1030-NEXT:    v_and_b32_e32 v2, s0, v9
 ; GFX1030-NEXT:    v_mov_b32_e32 v17, v3
-; GFX1030-NEXT:    v_lshrrev_b32_e32 v2, 16, v8
 ; GFX1030-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX1030-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX1030-NEXT:    v_and_b32_e32 v3, s0, v9
 ; GFX1030-NEXT:    v_mov_b32_e32 v18, v4
 ; GFX1030-NEXT:    v_mov_b32_e32 v19, v5
+; GFX1030-NEXT:    v_alignbit_b32 v22, v2, v8, 16
 ; GFX1030-NEXT:    v_and_or_b32 v20, v6, s0, v0
 ; GFX1030-NEXT:    v_and_or_b32 v21, v7, s0, v1
-; GFX1030-NEXT:    v_lshl_or_b32 v22, v3, 16, v2
 ; GFX1030-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1030-NEXT:  .LBB9_1: ; =>This Inner Loop Header: Depth=1
 ; GFX1030-NEXT:    v_readfirstlane_b32 s4, v10
@@ -427,13 +422,12 @@ define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16_vgpr_descr(i64 %node
 ; GFX1013-NEXT:    v_mov_b32_e32 v17, v11
 ; GFX1013-NEXT:    v_lshrrev_b32_e32 v10, 16, v6
 ; GFX1013-NEXT:    v_and_b32_e32 v11, s0, v8
-; GFX1013-NEXT:    v_lshrrev_b32_e32 v8, 16, v8
 ; GFX1013-NEXT:    v_and_b32_e32 v9, s0, v9
 ; GFX1013-NEXT:    v_mov_b32_e32 v18, v12
+; GFX1013-NEXT:    v_mov_b32_e32 v19, v13
 ; GFX1013-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
 ; GFX1013-NEXT:    v_lshlrev_b32_e32 v11, 16, v11
-; GFX1013-NEXT:    v_mov_b32_e32 v19, v13
-; GFX1013-NEXT:    v_lshl_or_b32 v8, v9, 16, v8
+; GFX1013-NEXT:    v_alignbit_b32 v8, v9, v8, 16
 ; GFX1013-NEXT:    s_mov_b32 s1, exec_lo
 ; GFX1013-NEXT:    v_and_or_b32 v6, v6, s0, v10
 ; GFX1013-NEXT:    v_and_or_b32 v7, v7, s0, v11
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
index 503b45d49991c..c248c4236e067 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/uaddsat.ll
@@ -383,14 +383,12 @@ define i32 @v_uaddsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 24, v7
 ; GFX6-NEXT:    v_xor_b32_e32 v5, -1, v3
-; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
 ; GFX6-NEXT:    v_min_u32_e32 v4, v5, v4
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
+; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 24, v2
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 24, v3
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX6-NEXT:    v_alignbit_b32 v0, v1, v0, 24
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
@@ -534,18 +532,18 @@ define amdgpu_ps i32 @s_uaddsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX6-NEXT:    s_lshl_b32 s3, s4, 24
 ; GFX6-NEXT:    s_lshl_b32 s4, s7, 24
 ; GFX6-NEXT:    s_not_b32 s5, s3
-; GFX6-NEXT:    s_lshr_b32 s1, s1, 24
 ; GFX6-NEXT:    s_min_u32 s4, s5, s4
-; GFX6-NEXT:    s_lshr_b32 s0, s0, 24
+; GFX6-NEXT:    s_lshr_b32 s1, s1, 24
 ; GFX6-NEXT:    s_lshr_b32 s2, s2, 24
 ; GFX6-NEXT:    s_add_i32 s3, s3, s4
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    s_lshr_b32 s3, s3, 24
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
-; GFX6-NEXT:    s_lshl_b32 s1, s2, 16
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
-; GFX6-NEXT:    s_lshl_b32 s1, s3, 24
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
+; GFX6-NEXT:    v_alignbit_b32 v0, s1, v0, 24
+; GFX6-NEXT:    s_lshl_b32 s0, s2, 16
+; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
+; GFX6-NEXT:    s_lshl_b32 s0, s3, 24
+; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: s_uaddsat_v4i8:
@@ -1814,9 +1812,9 @@ define amdgpu_ps i32 @s_uaddsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX6-NEXT:    s_min_u32 s2, s3, s2
 ; GFX6-NEXT:    s_add_i32 s1, s1, s2
 ; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
-; GFX6-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_alignbit_b32 v0, s1, v0, 16
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: s_uaddsat_v2i16:
@@ -1864,9 +1862,7 @@ define amdgpu_ps float @uaddsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 ; GFX6-NEXT:    v_min_u32_e32 v1, s1, v1
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, s0, v1
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX6-NEXT:    v_alignbit_b32 v0, v1, v0, 16
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: uaddsat_v2i16_sv:
@@ -1908,9 +1904,7 @@ define amdgpu_ps float @uaddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ; GFX6-NEXT:    v_min_u32_e32 v2, s0, v2
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v2
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX6-NEXT:    v_alignbit_b32 v0, v1, v0, 16
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: uaddsat_v2i16_vs:
@@ -1972,15 +1966,11 @@ define <2 x float> @v_uaddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v7
 ; GFX6-NEXT:    v_xor_b32_e32 v5, -1, v3
 ; GFX6-NEXT:    v_min_u32_e32 v4, v5, v4
-; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
-; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
+; GFX6-NEXT:    v_alignbit_b32 v0, v1, v0, 16
+; GFX6-NEXT:    v_alignbit_b32 v1, v3, v2, 16
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_uaddsat_v4i16:
@@ -2038,15 +2028,15 @@ define amdgpu_ps <2 x i32> @s_uaddsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX6-NEXT:    s_lshl_b32 s4, s7, 16
 ; GFX6-NEXT:    s_not_b32 s5, s3
 ; GFX6-NEXT:    s_min_u32 s4, s5, s4
-; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
 ; GFX6-NEXT:    s_add_i32 s3, s3, s4
-; GFX6-NEXT:    s_lshr_b32 s0, s0, 16
+; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
 ; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
-; GFX6-NEXT:    s_lshr_b32 s2, s2, 16
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
-; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
-; GFX6-NEXT:    s_or_b32 s1, s2, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s2
+; GFX6-NEXT:    v_alignbit_b32 v0, s1, v0, 16
+; GFX6-NEXT:    v_alignbit_b32 v1, s3, v1, 16
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: s_uaddsat_v4i16:
@@ -2137,20 +2127,14 @@ define <3 x float> @v_uaddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v4, v6
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v11
 ; GFX6-NEXT:    v_xor_b32_e32 v7, -1, v5
-; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_min_u32_e32 v6, v7, v6
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v6
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
-; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
-; GFX6-NEXT:    v_or_b32_e32 v2, v4, v2
+; GFX6-NEXT:    v_alignbit_b32 v0, v1, v0, 16
+; GFX6-NEXT:    v_alignbit_b32 v1, v3, v2, 16
+; GFX6-NEXT:    v_alignbit_b32 v2, v5, v4, 16
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_uaddsat_v6i16:
@@ -2224,20 +2208,20 @@ define amdgpu_ps <3 x i32> @s_uaddsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX6-NEXT:    s_add_i32 s4, s4, s6
 ; GFX6-NEXT:    s_lshl_b32 s6, s11, 16
 ; GFX6-NEXT:    s_not_b32 s7, s5
-; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
 ; GFX6-NEXT:    s_min_u32 s6, s7, s6
-; GFX6-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
 ; GFX6-NEXT:    s_add_i32 s5, s5, s6
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
-; GFX6-NEXT:    s_lshr_b32 s2, s2, 16
+; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
+; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
 ; GFX6-NEXT:    s_lshr_b32 s5, s5, 16
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
-; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
-; GFX6-NEXT:    s_lshr_b32 s4, s4, 16
-; GFX6-NEXT:    s_or_b32 s1, s2, s1
-; GFX6-NEXT:    s_lshl_b32 s2, s5, 16
-; GFX6-NEXT:    s_or_b32 s2, s4, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s2
+; GFX6-NEXT:    v_mov_b32_e32 v2, s4
+; GFX6-NEXT:    v_alignbit_b32 v0, s1, v0, 16
+; GFX6-NEXT:    v_alignbit_b32 v1, s3, v1, 16
+; GFX6-NEXT:    v_alignbit_b32 v2, s5, v2, 16
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX6-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: s_uaddsat_v6i16:
@@ -2341,24 +2325,16 @@ define <4 x float> @v_uaddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX6-NEXT:    v_add_i32_e32 v6, vcc, v6, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v15
 ; GFX6-NEXT:    v_xor_b32_e32 v9, -1, v7
-; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_min_u32_e32 v8, v9, v8
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_add_i32_e32 v7, vcc, v7, v8
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
-; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
-; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
-; GFX6-NEXT:    v_or_b32_e32 v2, v4, v2
-; GFX6-NEXT:    v_or_b32_e32 v3, v6, v3
+; GFX6-NEXT:    v_alignbit_b32 v0, v1, v0, 16
+; GFX6-NEXT:    v_alignbit_b32 v1, v3, v2, 16
+; GFX6-NEXT:    v_alignbit_b32 v2, v5, v4, 16
+; GFX6-NEXT:    v_alignbit_b32 v3, v7, v6, 16
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_uaddsat_v8i16:
@@ -2448,24 +2424,24 @@ define amdgpu_ps <4 x i32> @s_uaddsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX6-NEXT:    s_add_i32 s6, s6, s8
 ; GFX6-NEXT:    s_lshl_b32 s8, s15, 16
 ; GFX6-NEXT:    s_not_b32 s9, s7
-; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
 ; GFX6-NEXT:    s_min_u32 s8, s9, s8
-; GFX6-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
 ; GFX6-NEXT:    s_add_i32 s7, s7, s8
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
-; GFX6-NEXT:    s_lshr_b32 s2, s2, 16
+; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
+; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
 ; GFX6-NEXT:    s_lshr_b32 s5, s5, 16
 ; GFX6-NEXT:    s_lshr_b32 s7, s7, 16
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
-; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
-; GFX6-NEXT:    s_lshr_b32 s4, s4, 16
-; GFX6-NEXT:    s_lshr_b32 s6, s6, 16
-; GFX6-NEXT:    s_or_b32 s1, s2, s1
-; GFX6-NEXT:    s_lshl_b32 s2, s5, 16
-; GFX6-NEXT:    s_lshl_b32 s3, s7, 16
-; GFX6-NEXT:    s_or_b32 s2, s4, s2
-; GFX6-NEXT:    s_or_b32 s3, s6, s3
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s2
+; GFX6-NEXT:    v_mov_b32_e32 v2, s4
+; GFX6-NEXT:    v_mov_b32_e32 v3, s6
+; GFX6-NEXT:    v_alignbit_b32 v0, s1, v0, 16
+; GFX6-NEXT:    v_alignbit_b32 v1, s3, v1, 16
+; GFX6-NEXT:    v_alignbit_b32 v2, s5, v2, 16
+; GFX6-NEXT:    v_alignbit_b32 v3, s7, v3, 16
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX6-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX6-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: s_uaddsat_v8i16:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
index a4da5822dac57..9676db3eba17e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/usubsat.ll
@@ -371,14 +371,12 @@ define i32 @v_usubsat_v4i8(i32 %lhs.arg, i32 %rhs.arg) {
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v3
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 24, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 24, v7
-; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
 ; GFX6-NEXT:    v_min_u32_e32 v4, v3, v4
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
+; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 24, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, v3, v4
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 24, v3
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX6-NEXT:    v_alignbit_b32 v0, v1, v0, 24
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 24, v3
@@ -518,18 +516,18 @@ define amdgpu_ps i32 @s_usubsat_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg) {
 ; GFX6-NEXT:    s_sub_i32 s2, s2, s3
 ; GFX6-NEXT:    s_lshl_b32 s3, s4, 24
 ; GFX6-NEXT:    s_lshl_b32 s4, s7, 24
-; GFX6-NEXT:    s_lshr_b32 s1, s1, 24
 ; GFX6-NEXT:    s_min_u32 s4, s3, s4
-; GFX6-NEXT:    s_lshr_b32 s0, s0, 24
+; GFX6-NEXT:    s_lshr_b32 s1, s1, 24
 ; GFX6-NEXT:    s_lshr_b32 s2, s2, 24
 ; GFX6-NEXT:    s_sub_i32 s3, s3, s4
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 8
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    s_lshr_b32 s3, s3, 24
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
-; GFX6-NEXT:    s_lshl_b32 s1, s2, 16
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
-; GFX6-NEXT:    s_lshl_b32 s1, s3, 24
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
+; GFX6-NEXT:    v_alignbit_b32 v0, s1, v0, 24
+; GFX6-NEXT:    s_lshl_b32 s0, s2, 16
+; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
+; GFX6-NEXT:    s_lshl_b32 s0, s3, 24
+; GFX6-NEXT:    v_or_b32_e32 v0, s0, v0
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: s_usubsat_v4i8:
@@ -1724,9 +1722,9 @@ define amdgpu_ps i32 @s_usubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs
 ; GFX6-NEXT:    s_min_u32 s2, s1, s2
 ; GFX6-NEXT:    s_sub_i32 s1, s1, s2
 ; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
-; GFX6-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_alignbit_b32 v0, s1, v0, 16
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: s_usubsat_v2i16:
@@ -1772,9 +1770,7 @@ define amdgpu_ps float @usubsat_v2i16_sv(<2 x i16> inreg %lhs, <2 x i16> %rhs) {
 ; GFX6-NEXT:    v_min_u32_e32 v1, s0, v1
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX6-NEXT:    v_alignbit_b32 v0, v1, v0, 16
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: usubsat_v2i16_sv:
@@ -1814,9 +1810,7 @@ define amdgpu_ps float @usubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) {
 ; GFX6-NEXT:    v_min_u32_e32 v2, s0, v1
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX6-NEXT:    v_alignbit_b32 v0, v1, v0, 16
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: usubsat_v2i16_vs:
@@ -1874,15 +1868,11 @@ define <2 x float> @v_usubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 16, v7
 ; GFX6-NEXT:    v_min_u32_e32 v4, v3, v4
-; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, v3, v4
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
+; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
-; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
+; GFX6-NEXT:    v_alignbit_b32 v0, v1, v0, 16
+; GFX6-NEXT:    v_alignbit_b32 v1, v3, v2, 16
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_usubsat_v4i16:
@@ -1936,15 +1926,15 @@ define amdgpu_ps <2 x i32> @s_usubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inre
 ; GFX6-NEXT:    s_lshl_b32 s3, s3, 16
 ; GFX6-NEXT:    s_lshl_b32 s4, s7, 16
 ; GFX6-NEXT:    s_min_u32 s4, s3, s4
-; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
 ; GFX6-NEXT:    s_sub_i32 s3, s3, s4
-; GFX6-NEXT:    s_lshr_b32 s0, s0, 16
+; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
 ; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
-; GFX6-NEXT:    s_lshr_b32 s2, s2, 16
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
-; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
-; GFX6-NEXT:    s_or_b32 s1, s2, s1
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s2
+; GFX6-NEXT:    v_alignbit_b32 v0, s1, v0, 16
+; GFX6-NEXT:    v_alignbit_b32 v1, s3, v1, 16
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: s_usubsat_v4i16:
@@ -2029,20 +2019,14 @@ define <3 x float> @v_usubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) {
 ; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, v4, v6
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v6, 16, v11
-; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_min_u32_e32 v6, v5, v6
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, v5, v6
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
-; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
-; GFX6-NEXT:    v_or_b32_e32 v2, v4, v2
+; GFX6-NEXT:    v_alignbit_b32 v0, v1, v0, 16
+; GFX6-NEXT:    v_alignbit_b32 v1, v3, v2, 16
+; GFX6-NEXT:    v_alignbit_b32 v2, v5, v4, 16
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_usubsat_v6i16:
@@ -2110,20 +2094,20 @@ define amdgpu_ps <3 x i32> @s_usubsat_v6i16(<6 x i16> inreg %lhs, <6 x i16> inre
 ; GFX6-NEXT:    s_sub_i32 s4, s4, s6
 ; GFX6-NEXT:    s_lshl_b32 s5, s5, 16
 ; GFX6-NEXT:    s_lshl_b32 s6, s11, 16
-; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
 ; GFX6-NEXT:    s_min_u32 s6, s5, s6
-; GFX6-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
 ; GFX6-NEXT:    s_sub_i32 s5, s5, s6
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
-; GFX6-NEXT:    s_lshr_b32 s2, s2, 16
+; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
+; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
 ; GFX6-NEXT:    s_lshr_b32 s5, s5, 16
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
-; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
-; GFX6-NEXT:    s_lshr_b32 s4, s4, 16
-; GFX6-NEXT:    s_or_b32 s1, s2, s1
-; GFX6-NEXT:    s_lshl_b32 s2, s5, 16
-; GFX6-NEXT:    s_or_b32 s2, s4, s2
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s2
+; GFX6-NEXT:    v_mov_b32_e32 v2, s4
+; GFX6-NEXT:    v_alignbit_b32 v0, s1, v0, 16
+; GFX6-NEXT:    v_alignbit_b32 v1, s3, v1, 16
+; GFX6-NEXT:    v_alignbit_b32 v2, s5, v2, 16
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX6-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: s_usubsat_v6i16:
@@ -2219,24 +2203,16 @@ define <4 x float> @v_usubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
 ; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, v6, v8
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v8, 16, v15
-; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_min_u32_e32 v8, v7, v8
-; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
-; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_sub_i32_e32 v7, vcc, v7, v8
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v7, 16, v7
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
-; GFX6-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GFX6-NEXT:    v_lshrrev_b32_e32 v6, 16, v6
-; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
-; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v7
-; GFX6-NEXT:    v_or_b32_e32 v2, v4, v2
-; GFX6-NEXT:    v_or_b32_e32 v3, v6, v3
+; GFX6-NEXT:    v_alignbit_b32 v0, v1, v0, 16
+; GFX6-NEXT:    v_alignbit_b32 v1, v3, v2, 16
+; GFX6-NEXT:    v_alignbit_b32 v2, v5, v4, 16
+; GFX6-NEXT:    v_alignbit_b32 v3, v7, v6, 16
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX8-LABEL: v_usubsat_v8i16:
@@ -2318,24 +2294,24 @@ define amdgpu_ps <4 x i32> @s_usubsat_v8i16(<8 x i16> inreg %lhs, <8 x i16> inre
 ; GFX6-NEXT:    s_sub_i32 s6, s6, s8
 ; GFX6-NEXT:    s_lshl_b32 s7, s7, 16
 ; GFX6-NEXT:    s_lshl_b32 s8, s15, 16
-; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
 ; GFX6-NEXT:    s_min_u32 s8, s7, s8
-; GFX6-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
 ; GFX6-NEXT:    s_sub_i32 s7, s7, s8
-; GFX6-NEXT:    s_lshl_b32 s1, s1, 16
-; GFX6-NEXT:    s_lshr_b32 s2, s2, 16
+; GFX6-NEXT:    s_lshr_b32 s1, s1, 16
+; GFX6-NEXT:    s_lshr_b32 s3, s3, 16
 ; GFX6-NEXT:    s_lshr_b32 s5, s5, 16
 ; GFX6-NEXT:    s_lshr_b32 s7, s7, 16
-; GFX6-NEXT:    s_or_b32 s0, s0, s1
-; GFX6-NEXT:    s_lshl_b32 s1, s3, 16
-; GFX6-NEXT:    s_lshr_b32 s4, s4, 16
-; GFX6-NEXT:    s_lshr_b32 s6, s6, 16
-; GFX6-NEXT:    s_or_b32 s1, s2, s1
-; GFX6-NEXT:    s_lshl_b32 s2, s5, 16
-; GFX6-NEXT:    s_lshl_b32 s3, s7, 16
-; GFX6-NEXT:    s_or_b32 s2, s4, s2
-; GFX6-NEXT:    s_or_b32 s3, s6, s3
+; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s2
+; GFX6-NEXT:    v_mov_b32_e32 v2, s4
+; GFX6-NEXT:    v_mov_b32_e32 v3, s6
+; GFX6-NEXT:    v_alignbit_b32 v0, s1, v0, 16
+; GFX6-NEXT:    v_alignbit_b32 v1, s3, v1, 16
+; GFX6-NEXT:    v_alignbit_b32 v2, s5, v2, 16
+; GFX6-NEXT:    v_alignbit_b32 v3, s7, v3, 16
+; GFX6-NEXT:    v_readfirstlane_b32 s0, v0
+; GFX6-NEXT:    v_readfirstlane_b32 s1, v1
+; GFX6-NEXT:    v_readfirstlane_b32 s2, v2
+; GFX6-NEXT:    v_readfirstlane_b32 s3, v3
 ; GFX6-NEXT:    ; return to shader part epilog
 ;
 ; GFX8-LABEL: s_usubsat_v8i16:

From 8b280df504b97a13d06a929fbc85348903456fdd Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Sun, 23 Jan 2022 21:24:05 -0800
Subject: [PATCH 326/946] Rough guess at fixing lldb tests to handle Clang
 defaulting to DWARFv5

---
 .../basic_entry_values/TestBasicEntryValues.py                | 2 +-
 .../unambiguous_sequence/TestUnambiguousTailCalls.py          | 2 +-
 .../SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp  | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/TestBasicEntryValues.py b/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/TestBasicEntryValues.py
index 4b9a814764158..f4ae1fc015569 100644
--- a/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/TestBasicEntryValues.py
+++ b/lldb/test/API/functionalities/param_entry_vals/basic_entry_values/TestBasicEntryValues.py
@@ -15,4 +15,4 @@
 lldbinline.MakeInlineTest(__file__, globals(),
         decorators=decorators+[skipIf(debug_info="dsym")],
         name="BasicEntryValues_GNU",
-        build_dict=dict(CXXFLAGS_EXTRAS="-O2 -ggdb"))
+        build_dict=dict(CXXFLAGS_EXTRAS="-O2 -ggdb -gdwarf-4"))
diff --git a/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/TestUnambiguousTailCalls.py b/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/TestUnambiguousTailCalls.py
index cbdf40e2416f7..19aad2ab1ec32 100644
--- a/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/TestUnambiguousTailCalls.py
+++ b/lldb/test/API/functionalities/tail_call_frames/unambiguous_sequence/TestUnambiguousTailCalls.py
@@ -7,5 +7,5 @@
 lldbinline.MakeInlineTest(__file__, globals(), name="UnambiguousTailCalls_V5",
         build_dict=dict(CFLAGS_EXTRAS="-O2 -glldb"), decorators=decor)
 lldbinline.MakeInlineTest(__file__, globals(), name="UnambiguousTailCalls_GNU",
-        build_dict=dict(CFLAGS_EXTRAS="-O2 -ggdb"),
+        build_dict=dict(CFLAGS_EXTRAS="-O2 -ggdb -gdwarf-4"),
         decorators=decor+[decorators.skipIf(debug_info="dsym")])
diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp
index 29adff62cd1ee..0e29cb3e7f16e 100644
--- a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp
+++ b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp
@@ -7,8 +7,8 @@
 // RUN:   -fdebug-types-section -gsplit-dwarf -c -o %t1.o -DONE
 // RUN: %clang %s -target x86_64-pc-linux -fno-standalone-debug -g \
 // RUN:   -fdebug-types-section -gsplit-dwarf -c -o %t2.o -DTWO
-// RUN: llvm-dwarfdump %t1.dwo -debug-types | FileCheck --check-prefix=ONEUNIT %s
-// RUN: llvm-dwarfdump %t2.dwo -debug-types | FileCheck --check-prefix=ONEUNIT %s
+// RUN: llvm-dwarfdump %t1.dwo -debug-types -debug-info | FileCheck --check-prefix=ONEUNIT %s
+// RUN: llvm-dwarfdump %t2.dwo -debug-types -debug-info | FileCheck --check-prefix=ONEUNIT %s
 // RUN: ld.lld %t1.o %t2.o -o %t
 // RUN: %lldb %t -o "target var a b **b.a" -b | FileCheck %s
 

From 1f4a0531b3fdb9b4747c155805393a91926fe058 Mon Sep 17 00:00:00 2001
From: Julian Lettner <julian.lettner@apple.com>
Date: Sun, 23 Jan 2022 22:01:48 -0800
Subject: [PATCH 327/946] [TSan] Mark test unsupported on Darwin

---
 compiler-rt/test/tsan/vfork.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/compiler-rt/test/tsan/vfork.cpp b/compiler-rt/test/tsan/vfork.cpp
index 2d669b305a9de..1e34a568fb5a8 100644
--- a/compiler-rt/test/tsan/vfork.cpp
+++ b/compiler-rt/test/tsan/vfork.cpp
@@ -1,4 +1,6 @@
 // RUN: %clangxx_tsan -O1 %s -o %t && %run %t 2>&1 | FileCheck %s
+// UNSUPPORTED: ios
+
 #include <pthread.h>
 #include <stdio.h>
 #include <stdlib.h>

From e29d8fb16978c463c7ea08cb255f5a97eca16d36 Mon Sep 17 00:00:00 2001
From: Wu Xinlong <821408745@qq.com>
Date: Mon, 24 Jan 2022 12:00:09 +0800
Subject: [PATCH 328/946] [RISCV] Initially support the K-extension
 instructions on the LLVM MC layer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This commit is currently implementing supports for scalar cryptography extension for LLVM according to version v1.0.0 of [K Ext specification](https://github.com/riscv/riscv-crypto/releases)(scala crypto has been ratified already). Currently, we are implementing the MC (Machine Code) layer of his extension and the majority of work is done under `llvm/lib/Target/RISCV` directory. There are also some test files in `llvm/test/MC/RISCV` directory.

Remove the subfeature of Zbk* which conflict with b extensions to reduce the size of the patch.
(Zbk* will be resubmit after this patch has been merged)

**Co-author：**@ksyx & @VincentWu & @lihongliang & @achieveartificialintelligence

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D98136
---
 llvm/lib/Support/RISCVISAInfo.cpp             |  16 +++
 .../Target/RISCV/AsmParser/RISCVAsmParser.cpp |  13 ++
 .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h |   1 +
 llvm/lib/Target/RISCV/RISCV.td                |  78 +++++++++++
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      |   3 +
 llvm/lib/Target/RISCV/RISCVInstrInfo.td       |   1 +
 llvm/lib/Target/RISCV/RISCVInstrInfoZk.td     | 123 ++++++++++++++++++
 llvm/lib/Target/RISCV/RISCVSchedRocket.td     |   5 +-
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td    |   4 +-
 llvm/lib/Target/RISCV/RISCVSubtarget.h        |  16 +++
 llvm/lib/Target/RISCV/RISCVSystemOperands.td  |   6 +
 llvm/test/CodeGen/RISCV/attributes.ll         |  41 +++++-
 llvm/test/MC/RISCV/attribute-arch.s           |  30 +++++
 llvm/test/MC/RISCV/rv32zknd-only-invalid.s    |  17 +++
 llvm/test/MC/RISCV/rv32zknd-only-valid.s      |  13 ++
 llvm/test/MC/RISCV/rv32zkne-only-invalid.s    |  17 +++
 llvm/test/MC/RISCV/rv32zkne-only-valid.s      |  13 ++
 llvm/test/MC/RISCV/rv32zknh-only-valid.s      |  29 +++++
 llvm/test/MC/RISCV/rv32zknh-valid.s           |  26 ++++
 llvm/test/MC/RISCV/rv32zksed-invalid.s        |  13 ++
 llvm/test/MC/RISCV/rv32zksed-valid.s          |  18 +++
 llvm/test/MC/RISCV/rv32zksh-valid.s           |  18 +++
 llvm/test/MC/RISCV/rv64zknd-only-valid.s      |  25 ++++
 llvm/test/MC/RISCV/rv64zkne-only-invalid.s    |  17 +++
 llvm/test/MC/RISCV/rv64zkne-only-valid.s      |  21 +++
 llvm/test/MC/RISCV/rv64zknh-only-valid.s      |  21 +++
 llvm/test/MC/RISCV/rv64zksed-invalid.s        |  13 ++
 llvm/test/MC/RISCV/rvk-user-csr-name.s        |  29 +++++
 28 files changed, 624 insertions(+), 3 deletions(-)
 create mode 100644 llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
 create mode 100644 llvm/test/MC/RISCV/rv32zknd-only-invalid.s
 create mode 100644 llvm/test/MC/RISCV/rv32zknd-only-valid.s
 create mode 100644 llvm/test/MC/RISCV/rv32zkne-only-invalid.s
 create mode 100644 llvm/test/MC/RISCV/rv32zkne-only-valid.s
 create mode 100644 llvm/test/MC/RISCV/rv32zknh-only-valid.s
 create mode 100644 llvm/test/MC/RISCV/rv32zknh-valid.s
 create mode 100644 llvm/test/MC/RISCV/rv32zksed-invalid.s
 create mode 100644 llvm/test/MC/RISCV/rv32zksed-valid.s
 create mode 100644 llvm/test/MC/RISCV/rv32zksh-valid.s
 create mode 100644 llvm/test/MC/RISCV/rv64zknd-only-valid.s
 create mode 100644 llvm/test/MC/RISCV/rv64zkne-only-invalid.s
 create mode 100644 llvm/test/MC/RISCV/rv64zkne-only-valid.s
 create mode 100644 llvm/test/MC/RISCV/rv64zknh-only-valid.s
 create mode 100644 llvm/test/MC/RISCV/rv64zksed-invalid.s
 create mode 100644 llvm/test/MC/RISCV/rvk-user-csr-name.s

diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index f8a17f7440f6a..c34817920e1bc 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -58,6 +58,16 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
 
     {"zbkb", RISCVExtensionVersion{1, 0}},
     {"zbkc", RISCVExtensionVersion{1, 0}},
+    {"zknd", RISCVExtensionVersion{1, 0}},
+    {"zkne", RISCVExtensionVersion{1, 0}},
+    {"zknh", RISCVExtensionVersion{1, 0}},
+    {"zksed", RISCVExtensionVersion{1, 0}},
+    {"zksh", RISCVExtensionVersion{1, 0}},
+    {"zkr", RISCVExtensionVersion{1, 0}},
+    {"zkn", RISCVExtensionVersion{1, 0}},
+    {"zks", RISCVExtensionVersion{1, 0}},
+    {"zkt", RISCVExtensionVersion{1, 0}},
+    {"zk", RISCVExtensionVersion{1, 0}},
 
     {"v", RISCVExtensionVersion{1, 0}},
     {"zvl32b", RISCVExtensionVersion{1, 0}},
@@ -751,6 +761,9 @@ static const char *ImpliedExtsZvl512b[] = {"zvl256b"};
 static const char *ImpliedExtsZvl256b[] = {"zvl128b"};
 static const char *ImpliedExtsZvl128b[] = {"zvl64b"};
 static const char *ImpliedExtsZvl64b[] = {"zvl32b"};
+static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"};
+static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zkne", "zknd", "zknh"};
+static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zksed", "zksh"};
 
 struct ImpliedExtsEntry {
   StringLiteral Name;
@@ -766,6 +779,9 @@ struct ImpliedExtsEntry {
 static constexpr ImpliedExtsEntry ImpliedExts[] = {
     {{"v"}, {ImpliedExtsV}},
     {{"zfh"}, {ImpliedExtsZfh}},
+    {{"zk"}, {ImpliedExtsZk}},
+    {{"zkn"}, {ImpliedExtsZkn}},
+    {{"zks"}, {ImpliedExtsZks}},
     {{"zve32f"}, {ImpliedExtsZve32f}},
     {{"zve32x"}, {ImpliedExtsZve32x}},
     {{"zve64d"}, {ImpliedExtsZve64d}},
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 1c8ed0d60d8e8..a2ea34fe11c73 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -566,6 +566,16 @@ struct RISCVOperand : public MCParsedAsmOperand {
     return IsConstantImm && isUInt<7>(Imm) && VK == RISCVMCExpr::VK_RISCV_None;
   }
 
+  bool isRnumArg() const {
+    int64_t Imm;
+    RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None;
+    if (!isImm())
+      return false;
+    bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK);
+    return IsConstantImm && Imm >= INT64_C(0) && Imm <= INT64_C(10) &&
+           VK == RISCVMCExpr::VK_RISCV_None;
+  }
+
   bool isSImm5() const {
     if (!isImm())
       return false;
@@ -1240,6 +1250,9 @@ bool RISCVAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                       (1 << 4),
                                       "immediate must be in the range");
   }
+  case Match_InvalidRnumArg: {
+    return generateImmOutOfRangeError(Operands, ErrorInfo, 0, 10);
+  }
   }
 
   llvm_unreachable("Unknown match type detected!");
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 9cfd36745f46f..72d91b1044d65 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -192,6 +192,7 @@ enum OperandType : unsigned {
   OPERAND_UIMM20,
   OPERAND_UIMMLOG2XLEN,
   OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN,
+  OPERAND_RVKRNUM,
   // Operand is either a register or uimm5, this is used by V extension pseudo
   // instructions to represent a value that be passed as AVL to either vsetvli
   // or vsetivli.
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index dea042348d4db..72caa88104e85 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -178,6 +178,84 @@ def HasStdExtZbcOrZbkc
                                    "'Zbc' (Carry-Less 'B' Instructions) or "
                                    "'Zbkc' (Carry-less multiply instructions for Cryptography)">;
 
+def FeatureStdExtZknd
+    : SubtargetFeature<"zknd", "HasStdExtZknd", "true",
+                       "'Zknd' (NIST Suite: AES Decryption)">;
+def HasStdExtZknd : Predicate<"Subtarget->hasStdExtZknd()">,
+                             AssemblerPredicate<(all_of FeatureStdExtZknd),
+                             "'Zknd' (NIST Suite: AES Decryption)">;
+
+def FeatureStdExtZkne
+    : SubtargetFeature<"zkne", "HasStdExtZkne", "true",
+                       "'Zkne' (NIST Suite: AES Encryption)">;
+def HasStdExtZkne : Predicate<"Subtarget->hasStdExtZkne()">,
+                             AssemblerPredicate<(all_of FeatureStdExtZkne),
+                             "'Zkne' (NIST Suite: AES Encryption)">;
+
+// Some instructions belong to both Zknd and Zkne subextensions.
+// They should be enabled if either has been specified.
+def HasStdExtZkndOrZkne
+    : Predicate<"Subtarget->hasStdExtZknd() || Subtarget->hasStdExtZkne()">,
+                AssemblerPredicate<(any_of FeatureStdExtZknd, FeatureStdExtZkne),
+                                   "'Zknd' (NIST Suite: AES Decryption) or "
+                                   "'Zkne' (NIST Suite: AES Encryption)">;
+
+def FeatureStdExtZknh
+    : SubtargetFeature<"zknh", "HasStdExtZknh", "true",
+                       "'Zknh' (NIST Suite: Hash Function Instructions)">;
+def HasStdExtZknh : Predicate<"Subtarget->hasStdExtZknh()">,
+                             AssemblerPredicate<(all_of FeatureStdExtZknh),
+                             "'Zknh' (NIST Suite: Hash Function Instructions)">;
+
+def FeatureStdExtZksed
+    : SubtargetFeature<"zksed", "HasStdExtZksed", "true",
+                       "'Zksed' (ShangMi Suite: SM4 Block Cipher Instructions)">;
+def HasStdExtZksed : Predicate<"Subtarget->hasStdExtZksed()">,
+                             AssemblerPredicate<(all_of FeatureStdExtZksed),
+                             "'Zksed' (ShangMi Suite: SM4 Block Cipher Instructions)">;
+
+def FeatureStdExtZksh
+    : SubtargetFeature<"zksh", "HasStdExtZksh", "true",
+                       "'Zksh' (ShangMi Suite: SM3 Hash Function Instructions)">;
+def HasStdExtZksh : Predicate<"Subtarget->hasStdExtZksh()">,
+                             AssemblerPredicate<(all_of FeatureStdExtZksh),
+                             "'Zksh' (ShangMi Suite: SM3 Hash Function Instructions)">;
+
+def FeatureStdExtZkr
+    : SubtargetFeature<"zkr", "HasStdExtZkr", "true",
+                       "'Zkr' (Entropy Source Extension)">;
+def HasStdExtZkr : Predicate<"Subtarget->hasStdExtZkr()">,
+                             AssemblerPredicate<(all_of FeatureStdExtZkr),
+                             "'Zkr' (Entropy Source Extension)">;
+
+def FeatureStdExtZkn
+    : SubtargetFeature<"zkn", "HasStdExtZkn", "true",
+                       "'Zkn' (NIST Algorithm Suite)",
+                       [FeatureStdExtZbkb,
+                        FeatureStdExtZbkc,
+                        FeatureStdExtZkne,
+                        FeatureStdExtZknd,
+                        FeatureStdExtZknh]>;
+
+def FeatureStdExtZks
+    : SubtargetFeature<"zks", "HasStdExtZks", "true",
+                       "'Zks' (ShangMi Algorithm Suite)",
+                       [FeatureStdExtZbkb,
+                        FeatureStdExtZbkc,
+                        FeatureStdExtZksed,
+                        FeatureStdExtZksh]>;
+
+def FeatureStdExtZkt
+    : SubtargetFeature<"zkt", "HasStdExtZkt", "true",
+                       "'Zkt' (Data Independent Execution Latency)">;
+
+def FeatureStdExtZk
+    : SubtargetFeature<"zk", "HasStdExtZk", "true",
+                       "'Zk' (Standard scalar cryptography extension)",
+                       [FeatureStdExtZkn,
+                        FeatureStdExtZkr,
+                        FeatureStdExtZkt]>;
+
 def FeatureNoRVCHints
     : SubtargetFeature<"no-rvc-hints", "EnableRVCHintInstrs", "false",
                        "Disable RVC Hint Instructions.">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index a4e752b7e8839..2ab9ab653328d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1146,6 +1146,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
           else
             Ok = isUInt<5>(Imm);
           break;
+        case RISCVOp::OPERAND_RVKRNUM:
+          Ok = Imm >= 0 && Imm <= 10;
+          break;
         }
         if (!Ok) {
           ErrInfo = "Invalid immediate";
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index dd1627231db4c..64cd89cda06a8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1516,5 +1516,6 @@ include "RISCVInstrInfoF.td"
 include "RISCVInstrInfoD.td"
 include "RISCVInstrInfoC.td"
 include "RISCVInstrInfoZb.td"
+include "RISCVInstrInfoZk.td"
 include "RISCVInstrInfoV.td"
 include "RISCVInstrInfoZfh.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
new file mode 100644
index 0000000000000..52a29526a541a
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
@@ -0,0 +1,123 @@
+//===- RISCVInstrInfoZk.td - RISC-V Scalar Crypto instructions - tablegen -*===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions from the standard 'Zk',
+// Scalar Cryptography Instructions extension, version 1.0.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+def RnumArg : AsmOperandClass {
+  let Name = "RnumArg";
+  let RenderMethod = "addImmOperands";
+  let DiagnosticType = "InvalidRnumArg";
+}
+
+def rnum : Operand<XLenVT>, ImmLeaf<XLenVT, [{return (Imm >= 0 && Imm <= 10);}]> {
+  let ParserMatchClass = RnumArg;
+  let EncoderMethod = "getImmOpValue";
+  let DecoderMethod = "decodeUImmOperand<4>";
+  let OperandType = "OPERAND_RVKRNUM";
+  let OperandNamespace = "RISCVOp";
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction class templates
+//===----------------------------------------------------------------------===//
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVKUnary<bits<12> imm12_in, bits<3> funct3, string opcodestr>
+    : RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1),
+              opcodestr, "$rd, $rs1">{
+  let imm12 = imm12_in;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVKByteSelect<bits<5> funct5, string opcodestr>
+    : RVInstR<{0b00, funct5}, 0b000, OPC_OP, (outs GPR:$rd),
+              (ins GPR:$rs1, GPR:$rs2, uimm2:$bs),
+              opcodestr, "$rd, $rs1, $rs2, $bs">{
+  bits<2> bs;
+  let Inst{31-30} = bs;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVKUnary_rnum<bits<7> funct7, bits<3> funct3, string opcodestr>
+    : RVInstI<funct3, OPC_OP_IMM, (outs GPR:$rd), (ins GPR:$rs1, rnum:$rnum),
+              opcodestr, "$rd, $rs1, $rnum">{
+    bits<4> rnum;
+    let Inst{31-25} = funct7;
+    let Inst{24} = 1;
+    let Inst{23-20} = rnum;
+}
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+let Predicates = [HasStdExtZknd, IsRV32] in {
+def AES32DSI  : RVKByteSelect<0b10101, "aes32dsi">;
+def AES32DSMI : RVKByteSelect<0b10111, "aes32dsmi">;
+} // Predicates = [HasStdExtZknd, IsRV32]
+
+let Predicates = [HasStdExtZknd, IsRV64] in {
+def AES64DS  : ALU_rr<0b0011101, 0b000, "aes64ds">;
+def AES64DSM : ALU_rr<0b0011111, 0b000, "aes64dsm">;
+
+def AES64IM  : RVKUnary<0b001100000000, 0b001, "aes64im">;
+} // Predicates = [HasStdExtZknd, IsRV64]
+
+let Predicates = [HasStdExtZkndOrZkne, IsRV64] in {
+def AES64KS2  : ALU_rr<0b0111111, 0b000, "aes64ks2">;
+
+def AES64KS1I : RVKUnary_rnum<0b0011000, 0b001, "aes64ks1i">;
+} // Predicates = [HasStdExtZkndOrZkne, IsRV64]
+
+let Predicates = [HasStdExtZkne, IsRV32] in {
+def AES32ESI  : RVKByteSelect<0b10001, "aes32esi">;
+def AES32ESMI : RVKByteSelect<0b10011, "aes32esmi">;
+} // Predicates = [HasStdExtZkne, IsRV32]
+
+let Predicates = [HasStdExtZkne, IsRV64] in {
+def AES64ES   : ALU_rr<0b0011001, 0b000, "aes64es">;
+def AES64ESM  : ALU_rr<0b0011011, 0b000, "aes64esm">;
+} // Predicates = [HasStdExtZkne, IsRV64]
+
+let Predicates = [HasStdExtZknh] in {
+def SHA256SIG0 : RVKUnary<0b000100000010, 0b001, "sha256sig0">;
+def SHA256SIG1 : RVKUnary<0b000100000011, 0b001, "sha256sig1">;
+def SHA256SUM0 : RVKUnary<0b000100000000, 0b001, "sha256sum0">;
+def SHA256SUM1 : RVKUnary<0b000100000001, 0b001, "sha256sum1">;
+} // Predicates = [HasStdExtZknh]
+
+let Predicates = [HasStdExtZknh, IsRV32] in {
+def SHA512SIG0H : ALU_rr<0b0101110, 0b000, "sha512sig0h">;
+def SHA512SIG0L : ALU_rr<0b0101010, 0b000, "sha512sig0l">;
+def SHA512SIG1H : ALU_rr<0b0101111, 0b000, "sha512sig1h">;
+def SHA512SIG1L : ALU_rr<0b0101011, 0b000, "sha512sig1l">;
+def SHA512SUM0R : ALU_rr<0b0101000, 0b000, "sha512sum0r">;
+def SHA512SUM1R : ALU_rr<0b0101001, 0b000, "sha512sum1r">;
+} // [HasStdExtZknh, IsRV32]
+
+let Predicates = [HasStdExtZknh, IsRV64] in {
+def SHA512SIG0 : RVKUnary<0b000100000110, 0b001, "sha512sig0">;
+def SHA512SIG1 : RVKUnary<0b000100000111, 0b001, "sha512sig1">;
+def SHA512SUM0 : RVKUnary<0b000100000100, 0b001, "sha512sum0">;
+def SHA512SUM1 : RVKUnary<0b000100000101, 0b001, "sha512sum1">;
+} // Predicates = [HasStdExtZknh, IsRV64]
+
+let Predicates = [HasStdExtZksed] in {
+def SM4ED : RVKByteSelect<0b11000, "sm4ed">;
+def SM4KS : RVKByteSelect<0b11010, "sm4ks">;
+} // Predicates = [HasStdExtZksed]
+
+let Predicates = [HasStdExtZksh] in {
+def SM3P0 : RVKUnary<0b000100001000, 0b001, "sm3p0">;
+def SM3P1 : RVKUnary<0b000100001001, 0b001, "sm3p1">;
+} // Predicates = [HasStdExtZksh]
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index 783e65c1aa185..92dd3175a460a 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -17,7 +17,10 @@ def RocketModel : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = false;
-  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasVInstructions, HasVInstructionsI64];
+  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZknd, 
+                             HasStdExtZkne, HasStdExtZknh, HasStdExtZksed,
+                             HasStdExtZksh, HasStdExtZkr, HasVInstructions,
+                             HasVInstructionsI64];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index d164514ce70f0..e5eaad2a6dd08 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -15,7 +15,9 @@ def SiFive7Model : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = 0;
-  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasVInstructions];
+  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZknd, 
+                             HasStdExtZkne, HasStdExtZknh, HasStdExtZksed,
+                             HasStdExtZksh, HasStdExtZkr, HasVInstructions];
 }
 
 // The SiFive7 microarchitecture has two pipelines: A and B.
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index bacb8fae37941..ac1df37345859 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -85,6 +85,16 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool HasStdExtZfh = false;
   bool HasStdExtZbkb = false;
   bool HasStdExtZbkc = false;
+  bool HasStdExtZknd = false;
+  bool HasStdExtZkne = false;
+  bool HasStdExtZknh = false;
+  bool HasStdExtZksed = false;
+  bool HasStdExtZksh = false;
+  bool HasStdExtZkr = false;
+  bool HasStdExtZkn = false;
+  bool HasStdExtZks = false;
+  bool HasStdExtZkt = false;
+  bool HasStdExtZk = false;
   bool HasRV64 = false;
   bool IsRV32E = false;
   bool EnableLinkerRelax = false;
@@ -160,6 +170,12 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool hasStdExtZfh() const { return HasStdExtZfh; }
   bool hasStdExtZbkb() const { return HasStdExtZbkb; }
   bool hasStdExtZbkc() const { return HasStdExtZbkc; }
+  bool hasStdExtZknd() const { return HasStdExtZknd; }
+  bool hasStdExtZkne() const { return HasStdExtZkne; }
+  bool hasStdExtZknh() const { return HasStdExtZknh; }
+  bool hasStdExtZksed() const { return HasStdExtZksed; }
+  bool hasStdExtZksh() const { return HasStdExtZksh; }
+  bool hasStdExtZkr() const { return HasStdExtZkr; }
   bool is64Bit() const { return HasRV64; }
   bool isRV32E() const { return IsRV32E; }
   bool enableLinkerRelax() const { return EnableLinkerRelax; }
diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
index 3a3d5ba732b60..b9aa25b321b08 100644
--- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td
+++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td
@@ -372,3 +372,9 @@ foreach i = 0...3 in {
   let isRV32Only = 1 in
   def : SysReg<"hstateen"#i#"h", !add(0x61C, i)>;
 }
+
+//===-----------------------------------------------
+// Entropy Source CSR
+//===-----------------------------------------------
+
+def SEED : SysReg<"seed", 0x015>;
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index d37d86fc2f04d..3f7ca36844cc5 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -21,6 +21,16 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+v,+f %s -o - | FileCheck --check-prefix=RV32COMBINED %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV32ZBKB %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV32ZBKC %s
+; RUN: llc -mtriple=riscv32 -mattr=+zknd %s -o - | FileCheck --check-prefix=RV32ZKND %s
+; RUN: llc -mtriple=riscv32 -mattr=+zkne %s -o - | FileCheck --check-prefix=RV32ZKNE %s
+; RUN: llc -mtriple=riscv32 -mattr=+zknh %s -o - | FileCheck --check-prefix=RV32ZKNH %s
+; RUN: llc -mtriple=riscv32 -mattr=+zksed %s -o - | FileCheck --check-prefix=RV32ZKSED %s
+; RUN: llc -mtriple=riscv32 -mattr=+zksh %s -o - | FileCheck --check-prefix=RV32ZKSH %s
+; RUN: llc -mtriple=riscv32 -mattr=+zkr %s -o - | FileCheck --check-prefix=RV32ZKR %s
+; RUN: llc -mtriple=riscv32 -mattr=+zkn %s -o - | FileCheck --check-prefix=RV32ZKN %s
+; RUN: llc -mtriple=riscv32 -mattr=+zks %s -o - | FileCheck --check-prefix=RV32ZKS %s
+; RUN: llc -mtriple=riscv32 -mattr=+zkt %s -o - | FileCheck --check-prefix=RV32ZKT %s
+; RUN: llc -mtriple=riscv32 -mattr=+zk %s -o - | FileCheck --check-prefix=RV32ZK %s
 ; RUN: llc -mtriple=riscv64 -mattr=+m %s -o - | FileCheck --check-prefix=RV64M %s
 ; RUN: llc -mtriple=riscv64 -mattr=+a %s -o - | FileCheck --check-prefix=RV64A %s
 ; RUN: llc -mtriple=riscv64 -mattr=+f %s -o - | FileCheck --check-prefix=RV64F %s
@@ -42,7 +52,16 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+v,+f %s -o - | FileCheck --check-prefix=RV64COMBINED %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV64ZBKB %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV64ZBKC %s
-
+; RUN: llc -mtriple=riscv64 -mattr=+zknd %s -o - | FileCheck --check-prefix=RV64ZKND %s
+; RUN: llc -mtriple=riscv64 -mattr=+zkne %s -o - | FileCheck --check-prefix=RV64ZKNE %s
+; RUN: llc -mtriple=riscv64 -mattr=+zknh %s -o - | FileCheck --check-prefix=RV64ZKNH %s
+; RUN: llc -mtriple=riscv64 -mattr=+zksed %s -o - | FileCheck --check-prefix=RV64ZKSED %s
+; RUN: llc -mtriple=riscv64 -mattr=+zksh %s -o - | FileCheck --check-prefix=RV64ZKSH %s
+; RUN: llc -mtriple=riscv64 -mattr=+zkr %s -o - | FileCheck --check-prefix=RV64ZKR %s
+; RUN: llc -mtriple=riscv64 -mattr=+zkn %s -o - | FileCheck --check-prefix=RV64ZKN %s
+; RUN: llc -mtriple=riscv64 -mattr=+zks %s -o - | FileCheck --check-prefix=RV64ZKS %s
+; RUN: llc -mtriple=riscv64 -mattr=+zkt %s -o - | FileCheck --check-prefix=RV64ZKT %s
+; RUN: llc -mtriple=riscv64 -mattr=+zk %s -o - | FileCheck --check-prefix=RV64ZK %s
 
 ; RV32M: .attribute 5, "rv32i2p0_m2p0"
 ; RV32A: .attribute 5, "rv32i2p0_a2p0"
@@ -65,6 +84,16 @@
 ; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 ; RV32ZBKB: .attribute 5, "rv32i2p0_zbkb1p0"
 ; RV32ZBKC: .attribute 5, "rv32i2p0_zbkc1p0"
+; RV32ZKND: .attribute 5, "rv32i2p0_zknd1p0"
+; RV32ZKNE: .attribute 5, "rv32i2p0_zkne1p0"
+; RV32ZKNH: .attribute 5, "rv32i2p0_zknh1p0"
+; RV32ZKSED: .attribute 5, "rv32i2p0_zksed1p0"
+; RV32ZKSH: .attribute 5, "rv32i2p0_zksh1p0"
+; RV32ZKR: .attribute 5, "rv32i2p0_zkr1p0"
+; RV32ZKN: .attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0"
+; RV32ZKS: .attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zks1p0_zksed1p0_zksh1p0"
+; RV32ZKT: .attribute 5, "rv32i2p0_zkt1p0"
+; RV32ZK: .attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zk1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0_zkr1p0_zkt1p0"
 
 ; RV64M: .attribute 5, "rv64i2p0_m2p0"
 ; RV64A: .attribute 5, "rv64i2p0_a2p0"
@@ -87,6 +116,16 @@
 ; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 ; RV64ZBKB: .attribute 5, "rv64i2p0_zbkb1p0"
 ; RV64ZBKC: .attribute 5, "rv64i2p0_zbkc1p0"
+; RV64ZKND: .attribute 5, "rv64i2p0_zknd1p0"
+; RV64ZKNE: .attribute 5, "rv64i2p0_zkne1p0"
+; RV64ZKNH: .attribute 5, "rv64i2p0_zknh1p0"
+; RV64ZKSED: .attribute 5, "rv64i2p0_zksed1p0"
+; RV64ZKSH: .attribute 5, "rv64i2p0_zksh1p0"
+; RV64ZKR: .attribute 5, "rv64i2p0_zkr1p0"
+; RV64ZKN: .attribute 5, "rv64i2p0_zbkb1p0_zbkc1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0"
+; RV64ZKS: .attribute 5, "rv64i2p0_zbkb1p0_zbkc1p0_zks1p0_zksed1p0_zksh1p0"
+; RV64ZKT: .attribute 5, "rv64i2p0_zkt1p0"
+; RV64ZK: .attribute 5, "rv64i2p0_zbkb1p0_zbkc1p0_zk1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0_zkr1p0_zkt1p0"
 
 define i32 @addi(i32 %a) {
   %1 = add i32 %a, 1
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index 15ee933dcdd1d..27dc70a7b6f75 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -130,3 +130,33 @@
 
 .attribute arch, "rv32i_zbkc1p0"
 # CHECK: attribute      5, "rv32i2p0_zbkc1p0"
+
+.attribute arch, "rv32i_zknd1p0"
+# CHECK: attribute      5, "rv32i2p0_zknd1p0"
+
+.attribute arch, "rv32i_zkne1p0"
+# CHECK: attribute      5, "rv32i2p0_zkne1p0"
+
+.attribute arch, "rv32i_zknh1p0"
+# CHECK: attribute      5, "rv32i2p0_zknh1p0"
+
+.attribute arch, "rv32i_zksed1p0"
+# CHECK: attribute      5, "rv32i2p0_zksed1p0"
+
+.attribute arch, "rv32i_zksh1p0"
+# CHECK: attribute      5, "rv32i2p0_zksh1p0"
+
+.attribute arch, "rv32i_zkr1p0"
+# CHECK: attribute      5, "rv32i2p0_zkr1p0"
+
+.attribute arch, "rv32i_zkn1p0"
+# CHECK: attribute      5, "rv32i2p0_zbkb1p0_zbkc1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0"
+
+.attribute arch, "rv32i_zks1p0"
+# CHECK: attribute      5, "rv32i2p0_zbkb1p0_zbkc1p0_zks1p0_zksed1p0_zksh1p0"
+
+.attribute arch, "rv32i_zkt1p0"
+# CHECK: attribute      5, "rv32i2p0_zkt1p0"
+
+.attribute arch, "rv32i_zk1p0"
+# CHECK: attribute      5, "rv32i2p0_zbkb1p0_zbkc1p0_zk1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0_zkr1p0_zkt1p0"
diff --git a/llvm/test/MC/RISCV/rv32zknd-only-invalid.s b/llvm/test/MC/RISCV/rv32zknd-only-invalid.s
new file mode 100644
index 0000000000000..05ae48491a037
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zknd-only-invalid.s
@@ -0,0 +1,17 @@
+# With Zk extension:
+# RUN: not llvm-mc -triple=riscv32 -mattr=+zk < %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+# With Zkn extension:
+# RUN: not llvm-mc -triple=riscv32 -mattr=+zkn < %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+# With Zknd extension:
+# RUN: not llvm-mc -triple=riscv32 -mattr=+zknd < %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+# CHECK-ERROR: immediate must be an integer in the range [0, 3]
+aes32dsmi a0, a1, a2, 8
+
+# CHECK-ERROR: immediate must be an integer in the range [0, 3]
+aes32dsi a0, a1, a2, 8
diff --git a/llvm/test/MC/RISCV/rv32zknd-only-valid.s b/llvm/test/MC/RISCV/rv32zknd-only-valid.s
new file mode 100644
index 0000000000000..db139595879e6
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zknd-only-valid.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+zknd -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zknd < %s \
+# RUN:     | llvm-objdump --mattr=+zknd -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: aes32dsi a0, a1, a2, 3
+# CHECK-ASM: [0x33,0x85,0xc5,0xea]
+aes32dsi a0, a1, a2, 3
+
+# CHECK-ASM-AND-OBJ: aes32dsmi a0, a1, a2, 3
+# CHECK-ASM: [0x33,0x85,0xc5,0xee]
+aes32dsmi a0, a1, a2, 3
diff --git a/llvm/test/MC/RISCV/rv32zkne-only-invalid.s b/llvm/test/MC/RISCV/rv32zkne-only-invalid.s
new file mode 100644
index 0000000000000..9ace21cae5eff
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zkne-only-invalid.s
@@ -0,0 +1,17 @@
+# With Zk extension:
+# RUN: not llvm-mc -triple=riscv32 -mattr=+zk < %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+# With Zkn extension:
+# RUN: not llvm-mc -triple=riscv32 -mattr=+zkn < %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+# With Zkne extension:
+# RUN: not llvm-mc -triple=riscv32 -mattr=+zkne < %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+# CHECK-ERROR: immediate must be an integer in the range [0, 3]
+aes32esmi a0, a1, a2, 8
+
+# CHECK-ERROR: immediate must be an integer in the range [0, 3]
+aes32esi a0, a1, a2, 8
diff --git a/llvm/test/MC/RISCV/rv32zkne-only-valid.s b/llvm/test/MC/RISCV/rv32zkne-only-valid.s
new file mode 100644
index 0000000000000..de99aedd5ebdf
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zkne-only-valid.s
@@ -0,0 +1,13 @@
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+zkne -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zkne < %s \
+# RUN:     | llvm-objdump --mattr=+zkne -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: aes32esi a0, a1, a2, 3
+# CHECK-ASM: [0x33,0x85,0xc5,0xe2]
+aes32esi a0, a1, a2, 3
+
+# CHECK-ASM-AND-OBJ: aes32esmi a0, a1, a2, 3
+# CHECK-ASM: [0x33,0x85,0xc5,0xe6]
+aes32esmi a0, a1, a2, 3
diff --git a/llvm/test/MC/RISCV/rv32zknh-only-valid.s b/llvm/test/MC/RISCV/rv32zknh-only-valid.s
new file mode 100644
index 0000000000000..d48dd4949dd46
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zknh-only-valid.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+zknh -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zknh < %s \
+# RUN:     | llvm-objdump --mattr=+zknh -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: sha512sig0h a0, a1, a2
+# CHECK-ASM: [0x33,0x85,0xc5,0x5c]
+sha512sig0h a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: sha512sig1h a0, a1, a2
+# CHECK-ASM: [0x33,0x85,0xc5,0x5e]
+sha512sig1h a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: sha512sig0l a0, a1, a2
+# CHECK-ASM: [0x33,0x85,0xc5,0x54]
+sha512sig0l a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: sha512sig1l a0, a1, a2
+# CHECK-ASM: [0x33,0x85,0xc5,0x56]
+sha512sig1l a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: sha512sum0r a0, a1, a2
+# CHECK-ASM: [0x33,0x85,0xc5,0x50]
+sha512sum0r a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: sha512sum1r a0, a1, a2
+# CHECK-ASM: [0x33,0x85,0xc5,0x52]
+sha512sum1r a0, a1, a2
diff --git a/llvm/test/MC/RISCV/rv32zknh-valid.s b/llvm/test/MC/RISCV/rv32zknh-valid.s
new file mode 100644
index 0000000000000..e1dbc0feea34a
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zknh-valid.s
@@ -0,0 +1,26 @@
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+zknh -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zknh -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zknh < %s \
+# RUN:     | llvm-objdump --mattr=+zknh -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zknh < %s \
+# RUN:     | llvm-objdump --mattr=+zknh -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: sha256sig0 a0, a1
+# CHECK-ASM: [0x13,0x95,0x25,0x10]
+sha256sig0 a0, a1
+
+# CHECK-ASM-AND-OBJ: sha256sig1 a0, a1
+# CHECK-ASM: [0x13,0x95,0x35,0x10]
+sha256sig1 a0, a1
+
+# CHECK-ASM-AND-OBJ: sha256sum0 a0, a1
+# CHECK-ASM: [0x13,0x95,0x05,0x10]
+sha256sum0 a0, a1
+
+# CHECK-ASM-AND-OBJ: sha256sum1 a0, a1
+# CHECK-ASM: [0x13,0x95,0x15,0x10]
+sha256sum1 a0, a1
diff --git a/llvm/test/MC/RISCV/rv32zksed-invalid.s b/llvm/test/MC/RISCV/rv32zksed-invalid.s
new file mode 100644
index 0000000000000..feb9bc09b60bf
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zksed-invalid.s
@@ -0,0 +1,13 @@
+# With Zks extension:
+# RUN: not llvm-mc -triple=riscv32 -mattr=+zks < %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+# With Zksed extension:
+# RUN: not llvm-mc -triple=riscv32 -mattr=+zksed < %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+# CHECK-ERROR: immediate must be an integer in the range [0, 3]
+sm4ed a0, a1, a2, 8
+
+# CHECK-ERROR: immediate must be an integer in the range [0, 3]
+sm4ks a0, a1, a2, 8
diff --git a/llvm/test/MC/RISCV/rv32zksed-valid.s b/llvm/test/MC/RISCV/rv32zksed-valid.s
new file mode 100644
index 0000000000000..25d618c082e6e
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zksed-valid.s
@@ -0,0 +1,18 @@
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+zksed -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zksed -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zksed < %s \
+# RUN:     | llvm-objdump --mattr=+zksed -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zksed < %s \
+# RUN:     | llvm-objdump --mattr=+zksed -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: sm4ed a0, a1, a2, 3
+# CHECK-ASM: [0x33,0x85,0xc5,0xf0]
+sm4ed a0, a1, a2, 3
+
+# CHECK-ASM-AND-OBJ: sm4ks a0, a1, a2, 3
+# CHECK-ASM: [0x33,0x85,0xc5,0xf4]
+sm4ks a0, a1, a2, 3
diff --git a/llvm/test/MC/RISCV/rv32zksh-valid.s b/llvm/test/MC/RISCV/rv32zksh-valid.s
new file mode 100644
index 0000000000000..48ae6652c850e
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zksh-valid.s
@@ -0,0 +1,18 @@
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+zksh -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zksh -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+zksh < %s \
+# RUN:     | llvm-objdump --mattr=+zksh -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zksh < %s \
+# RUN:     | llvm-objdump --mattr=+zksh -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: sm3p0 a0, a1
+# CHECK-ASM: [0x13,0x95,0x85,0x10]
+sm3p0 a0, a1
+
+# CHECK-ASM-AND-OBJ: sm3p1 a0, a1
+# CHECK-ASM: [0x13,0x95,0x95,0x10]
+sm3p1 a0, a1
diff --git a/llvm/test/MC/RISCV/rv64zknd-only-valid.s b/llvm/test/MC/RISCV/rv64zknd-only-valid.s
new file mode 100644
index 0000000000000..03656136d20ae
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64zknd-only-valid.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zknd -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zknd < %s \
+# RUN:     | llvm-objdump --mattr=+zknd -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: aes64ds a0, a1, a2
+# CHECK-ASM: [0x33,0x85,0xc5,0x3a]
+aes64ds a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: aes64dsm a0, a1, a2
+# CHECK-ASM: [0x33,0x85,0xc5,0x3e]
+aes64dsm a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: aes64im a0, a1
+# CHECK-ASM: [0x13,0x95,0x05,0x30]
+aes64im a0, a1
+
+# CHECK-ASM-AND-OBJ: aes64ks1i a0, a1, 5
+# CHECK-ASM: [0x13,0x95,0x55,0x31]
+aes64ks1i a0, a1, 5
+
+# CHECK-ASM-AND-OBJ: aes64ks2 a0, a1, a2
+# CHECK-ASM: [0x33,0x85,0xc5,0x7e]
+aes64ks2 a0, a1, a2
diff --git a/llvm/test/MC/RISCV/rv64zkne-only-invalid.s b/llvm/test/MC/RISCV/rv64zkne-only-invalid.s
new file mode 100644
index 0000000000000..5a7331fa0a9bb
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64zkne-only-invalid.s
@@ -0,0 +1,17 @@
+# With Zk extension:
+# RUN: not llvm-mc -triple=riscv64 -mattr=+zk < %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+# With Zkn extension:
+# RUN: not llvm-mc -triple=riscv64 -mattr=+zkn < %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+# With Zkne extension:
+# RUN: not llvm-mc -triple=riscv64 -mattr=+zkne < %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+# CHECK-ERROR: immediate must be an integer in the range [0, 10]
+aes64ks1i a0, a1, 11
+
+# CHECK-ERROR: immediate must be an integer in the range [0, 10]
+aes64ks1i a0, a1, -1
diff --git a/llvm/test/MC/RISCV/rv64zkne-only-valid.s b/llvm/test/MC/RISCV/rv64zkne-only-valid.s
new file mode 100644
index 0000000000000..78950b85f51b8
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64zkne-only-valid.s
@@ -0,0 +1,21 @@
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zkne -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zkne < %s \
+# RUN:     | llvm-objdump --mattr=+zkne -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: aes64es a0, a1, a2
+# CHECK-ASM: [0x33,0x85,0xc5,0x32]
+aes64es a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: aes64esm a0, a1, a2
+# CHECK-ASM: [0x33,0x85,0xc5,0x36]
+aes64esm a0, a1, a2
+
+# CHECK-ASM-AND-OBJ: aes64ks1i a0, a1, 5
+# CHECK-ASM: [0x13,0x95,0x55,0x31]
+aes64ks1i a0, a1, 5
+
+# CHECK-ASM-AND-OBJ: aes64ks2 a0, a1, a2
+# CHECK-ASM: [0x33,0x85,0xc5,0x7e]
+aes64ks2 a0, a1, a2
diff --git a/llvm/test/MC/RISCV/rv64zknh-only-valid.s b/llvm/test/MC/RISCV/rv64zknh-only-valid.s
new file mode 100644
index 0000000000000..9478b6004a4a5
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64zknh-only-valid.s
@@ -0,0 +1,21 @@
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zknh -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+zknh < %s \
+# RUN:     | llvm-objdump --mattr=+zknh -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: sha512sig0 a0, a1
+# CHECK-ASM: [0x13,0x95,0x65,0x10]
+sha512sig0 a0, a1
+
+# CHECK-ASM-AND-OBJ: sha512sig1 a0, a1
+# CHECK-ASM: [0x13,0x95,0x75,0x10]
+sha512sig1 a0, a1
+
+# CHECK-ASM-AND-OBJ: sha512sum0 a0, a1
+# CHECK-ASM: [0x13,0x95,0x45,0x10]
+sha512sum0 a0, a1
+
+# CHECK-ASM-AND-OBJ: sha512sum1 a0, a1
+# CHECK-ASM: [0x13,0x95,0x55,0x10]
+sha512sum1 a0, a1
diff --git a/llvm/test/MC/RISCV/rv64zksed-invalid.s b/llvm/test/MC/RISCV/rv64zksed-invalid.s
new file mode 100644
index 0000000000000..2c55ac461f51f
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv64zksed-invalid.s
@@ -0,0 +1,13 @@
+# With Zks extension:
+# RUN: not llvm-mc -triple=riscv64 -mattr=+zks < %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+# With Zksed extension:
+# RUN: not llvm-mc -triple=riscv64 -mattr=+zksed < %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+
+sm4ed a0, a1, a2, 8
+# CHECK-ERROR: immediate must be an integer in the range [0, 3]
+
+sm4ks a0, a1, a2, 8
+# CHECK-ERROR: immediate must be an integer in the range [0, 3]
diff --git a/llvm/test/MC/RISCV/rvk-user-csr-name.s b/llvm/test/MC/RISCV/rvk-user-csr-name.s
new file mode 100644
index 0000000000000..cacadf794d95b
--- /dev/null
+++ b/llvm/test/MC/RISCV/rvk-user-csr-name.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc %s -triple=riscv32 -riscv-no-aliases -mattr=+f -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-INST,CHECK-ENC %s
+# RUN: llvm-mc -filetype=obj -triple riscv32 -mattr=+zkr < %s \
+# RUN:     | llvm-objdump -d --mattr=+zkr - \
+# RUN:     | FileCheck -check-prefix=CHECK-INST-ALIAS %s
+#
+# RUN: llvm-mc %s -triple=riscv64 -riscv-no-aliases -mattr=+f -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-INST,CHECK-ENC %s
+# RUN: llvm-mc -filetype=obj -triple riscv64 -mattr=+zkr < %s \
+# RUN:     | llvm-objdump -d --mattr=+zkr - \
+# RUN:     | FileCheck -check-prefix=CHECK-INST-ALIAS %s
+
+##################################
+# Entropy Source CSR
+##################################
+
+# seed
+# name
+# CHECK-INST: csrrs t1, seed, zero
+# CHECK-ENC:  encoding: [0x73,0x23,0x50,0x01]
+# CHECK-INST-ALIAS: csrr t1, seed
+# uimm12
+# CHECK-INST: csrrs t2, seed, zero
+# CHECK-ENC:  encoding: [0xf3,0x23,0x50,0x01]
+# CHECK-INST-ALIAS: csrr t2, seed
+# name
+csrrs t1, seed, zero
+# uimm12
+csrrs t2, 0x015, zero

From bf039a8620f1779d02280cb0a33c4d818073623b Mon Sep 17 00:00:00 2001
From: Kazu Hirata <kazu@google.com>
Date: Sun, 23 Jan 2022 22:53:15 -0800
Subject: [PATCH 329/946] [Target] Use range-based for loops (NFC)

---
 llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp       | 13 +++++--------
 llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp      |  9 ++++-----
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp            |  5 ++---
 llvm/lib/Target/PowerPC/PPCMIPeephole.cpp           |  8 ++++----
 llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp  | 11 +++++------
 llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp         | 12 +++++-------
 llvm/lib/Target/SystemZ/SystemZISelLowering.cpp     |  6 ++----
 llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp      |  8 ++++----
 llvm/lib/Target/VE/LVLGen.cpp                       |  4 ++--
 llvm/lib/Target/VE/VEMCInstLower.cpp                |  3 +--
 .../lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp |  4 ++--
 llvm/lib/Target/X86/X86ISelLowering.cpp             |  9 +++------
 llvm/lib/Target/X86/X86PadShortFunction.cpp         |  7 +++----
 13 files changed, 42 insertions(+), 57 deletions(-)

diff --git a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
index fc0d5cc6fbfa8..eeedce2d99cb0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXImageOptimizer.cpp
@@ -57,12 +57,9 @@ bool NVPTXImageOptimizer::runOnFunction(Function &F) {
   InstrToDelete.clear();
 
   // Look for call instructions in the function
-  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;
-       ++BI) {
-    for (BasicBlock::iterator I = (*BI).begin(), E = (*BI).end();
-         I != E; ++I) {
-      Instruction &Instr = *I;
-      if (CallInst *CI = dyn_cast<CallInst>(I)) {
+  for (BasicBlock &BB : F) {
+    for (Instruction &Instr : BB) {
+      if (CallInst *CI = dyn_cast<CallInst>(&Instr)) {
         Function *CalledF = CI->getCalledFunction();
         if (CalledF && CalledF->isIntrinsic()) {
           // This is an intrinsic function call, check if its an istypep
@@ -84,8 +81,8 @@ bool NVPTXImageOptimizer::runOnFunction(Function &F) {
   }
 
   // Delete any istypep instances we replaced in the IR
-  for (unsigned i = 0, e = InstrToDelete.size(); i != e; ++i)
-    InstrToDelete[i]->eraseFromParent();
+  for (Instruction *I : InstrToDelete)
+    I->eraseFromParent();
 
   return Changed;
 }
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index 6cf59d285e8d3..f655f25602bc3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -66,10 +66,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
       getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
 
   // Collect all aggregate loads and mem* calls.
-  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
-    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
-         ++II) {
-      if (LoadInst *LI = dyn_cast<LoadInst>(II)) {
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
         if (!LI->hasOneUse())
           continue;
 
@@ -81,7 +80,7 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
             continue;
           AggrLoads.push_back(LI);
         }
-      } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(II)) {
+      } else if (MemIntrinsic *IntrCall = dyn_cast<MemIntrinsic>(&I)) {
         // Convert intrinsic calls with variable size or with constant size
         // larger than the MaxAggrCopySize threshold.
         if (ConstantInt *LenCI = dyn_cast<ConstantInt>(IntrCall->getLength())) {
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 7edec82c6e067..eada872c2a7db 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2339,9 +2339,8 @@ bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI,
           Found = true;
         }
       } else if (MO.isRegMask()) {
-        for (TargetRegisterClass::iterator I = RC->begin(),
-             IE = RC->end(); I != IE; ++I)
-          if (MO.clobbersPhysReg(*I)) {
+        for (MCPhysReg R : *RC)
+          if (MO.clobbersPhysReg(R)) {
             Pred.push_back(MO);
             Found = true;
           }
diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index 0c920582843ad..e5fa02bc8ccf0 100644
--- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -258,12 +258,12 @@ void PPCMIPeephole::UpdateTOCSaves(
   }
 
   bool Keep = true;
-  for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
-    MachineInstr *CurrInst = It->first;
+  for (auto &I : TOCSaves) {
+    MachineInstr *CurrInst = I.first;
     // If new instruction dominates an existing one, mark existing one as
     // redundant.
-    if (It->second && MDT->dominates(MI, CurrInst))
-      It->second = false;
+    if (I.second && MDT->dominates(MI, CurrInst))
+      I.second = false;
     // Check if the new instruction is redundant.
     if (MDT->dominates(CurrInst, MI)) {
       Keep = false;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 707c1396e5728..cc5738a5d7b63 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -374,11 +374,10 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
   // clobbers ctr.
   auto asmClobbersCTR = [](InlineAsm *IA) {
     InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
-    for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
-      InlineAsm::ConstraintInfo &C = CIV[i];
+    for (const InlineAsm::ConstraintInfo &C : CIV) {
       if (C.Type != InlineAsm::isInput)
-        for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
-          if (StringRef(C.Codes[j]).equals_insensitive("{ctr}"))
+        for (const auto &Code : C.Codes)
+          if (StringRef(Code).equals_insensitive("{ctr}"))
             return true;
     }
     return false;
@@ -1301,8 +1300,8 @@ bool PPCTTIImpl::canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
                             LoopInfo *LI, DominatorTree *DT,
                             AssumptionCache *AC, TargetLibraryInfo *LibInfo) {
   // Process nested loops first.
-  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
-    if (canSaveCmp(*I, BI, SE, LI, DT, AC, LibInfo))
+  for (Loop *I : *L)
+    if (canSaveCmp(I, BI, SE, LI, DT, AC, LibInfo))
       return false; // Stop search.
 
   HardwareLoopInfo HWLoopInfo(L);
diff --git a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 0be35adc35c72..8a7d324ddfe15 100644
--- a/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -297,18 +297,16 @@ namespace {
         // fma result.
 
         LiveInterval &NewFMAInt = LIS->getInterval(KilledProdReg);
-        for (LiveInterval::iterator AI = FMAInt.begin(), AE = FMAInt.end();
-             AI != AE; ++AI) {
+        for (auto &AI : FMAInt) {
           // Don't add the segment that corresponds to the original copy.
-          if (AI->valno == AddendValNo)
+          if (AI.valno == AddendValNo)
             continue;
 
           VNInfo *NewFMAValNo =
-            NewFMAInt.getNextValue(AI->start,
-                                   LIS->getVNInfoAllocator());
+              NewFMAInt.getNextValue(AI.start, LIS->getVNInfoAllocator());
 
-          NewFMAInt.addSegment(LiveInterval::Segment(AI->start, AI->end,
-                                                     NewFMAValNo));
+          NewFMAInt.addSegment(
+              LiveInterval::Segment(AI.start, AI.end, NewFMAValNo));
         }
         LLVM_DEBUG(dbgs() << "  extended: " << NewFMAInt << '\n');
 
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 881346bbe47ed..f10651d5c5d7e 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -8314,13 +8314,11 @@ MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
   // Add FPR/VR clobbers.
   if (!NoFloat && (Control & 4) != 0) {
     if (Subtarget.hasVector()) {
-      for (int I = 0; I < 32; I++) {
-        unsigned Reg = SystemZMC::VR128Regs[I];
+      for (unsigned Reg : SystemZMC::VR128Regs) {
         MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
       }
     } else {
-      for (int I = 0; I < 16; I++) {
-        unsigned Reg = SystemZMC::FP64Regs[I];
+      for (unsigned Reg : SystemZMC::FP64Regs) {
         MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
       }
     }
diff --git a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp
index 4b95d0d67389d..5a2cfc53da494 100644
--- a/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZPostRewrite.cpp
@@ -170,15 +170,15 @@ bool SystemZPostRewrite::expandCondMove(MachineBasicBlock &MBB,
   MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB);
   RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end());
   RestMBB->transferSuccessors(&MBB);
-  for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
-    RestMBB->addLiveIn(*I);
+  for (MCPhysReg R : LiveRegs)
+    RestMBB->addLiveIn(R);
 
   // Create a new block MoveMBB to hold the move instruction.
   MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB);
   MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB);
   MoveMBB->addLiveIn(SrcReg);
-  for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
-    MoveMBB->addLiveIn(*I);
+  for (MCPhysReg R : LiveRegs)
+    MoveMBB->addLiveIn(R);
 
   // At the end of MBB, create a conditional branch to RestMBB if the
   // condition is false, otherwise fall through to MoveMBB.
diff --git a/llvm/lib/Target/VE/LVLGen.cpp b/llvm/lib/Target/VE/LVLGen.cpp
index c4588926af9ed..4db6a59284c27 100644
--- a/llvm/lib/Target/VE/LVLGen.cpp
+++ b/llvm/lib/Target/VE/LVLGen.cpp
@@ -125,8 +125,8 @@ bool LVLGen::runOnMachineFunction(MachineFunction &F) {
   TII = Subtarget.getInstrInfo();
   TRI = Subtarget.getRegisterInfo();
 
-  for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI)
-    Changed |= runOnMachineBasicBlock(*FI);
+  for (MachineBasicBlock &MBB : F)
+    Changed |= runOnMachineBasicBlock(MBB);
 
   if (Changed) {
     LLVM_DEBUG(dbgs() << "\n");
diff --git a/llvm/lib/Target/VE/VEMCInstLower.cpp b/llvm/lib/Target/VE/VEMCInstLower.cpp
index bc5577ce4f978..57195f238cf6e 100644
--- a/llvm/lib/Target/VE/VEMCInstLower.cpp
+++ b/llvm/lib/Target/VE/VEMCInstLower.cpp
@@ -78,8 +78,7 @@ void llvm::LowerVEMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                        AsmPrinter &AP) {
   OutMI.setOpcode(MI->getOpcode());
 
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  for (const MachineOperand &MO : MI->operands()) {
     MCOperand MCOp = LowerOperand(MI, MO, AP);
 
     if (MCOp.isValid())
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 9da0a8129f230..4440bdc3d58f4 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -281,8 +281,8 @@ void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) {
       {codeview::RegisterId::AMD64_XMM31, X86::XMM31},
 
   };
-  for (unsigned I = 0; I < array_lengthof(RegMap); ++I)
-    MRI->mapLLVMRegToCVReg(RegMap[I].Reg, static_cast<int>(RegMap[I].CVReg));
+  for (const auto &I : RegMap)
+    MRI->mapLLVMRegToCVReg(I.Reg, static_cast<int>(I.CVReg));
 }
 
 MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(const Triple &TT,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index be85c116bb037..618b97a2e8dbd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -5490,10 +5490,9 @@ bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
 /// materialize the FP immediate as a load from a constant pool.
 bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
                                      bool ForCodeSize) const {
-  for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
-    if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
+  for (const APFloat &FPImm : LegalFPImmediates)
+    if (Imm.bitwiseIsEqual(FPImm))
       return true;
-  }
   return false;
 }
 
@@ -33438,9 +33437,7 @@ bool X86TargetLowering::areJTsAllowed(const Function *Fn) const {
 static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr,
                               MachineBasicBlock *BB) {
   // Scan forward through BB for a use/def of EFLAGS.
-  for (MachineBasicBlock::iterator miI = std::next(Itr), miE = BB->end();
-         miI != miE; ++miI) {
-    const MachineInstr& mi = *miI;
+  for (const MachineInstr &mi : llvm::make_range(std::next(Itr), BB->end())) {
     if (mi.readsRegister(X86::EFLAGS))
       return true;
     // If we found a def, we can stop searching.
diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp
index 47ae517ae76d7..e92b1b002bb01 100644
--- a/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -129,10 +129,9 @@ bool PadShortFunc::runOnMachineFunction(MachineFunction &MF) {
   bool MadeChange = false;
 
   // Pad the identified basic blocks with NOOPs
-  for (DenseMap<MachineBasicBlock*, unsigned int>::iterator I = ReturnBBs.begin();
-       I != ReturnBBs.end(); ++I) {
-    MachineBasicBlock *MBB = I->first;
-    unsigned Cycles = I->second;
+  for (const auto &ReturnBB : ReturnBBs) {
+    MachineBasicBlock *MBB = ReturnBB.first;
+    unsigned Cycles = ReturnBB.second;
 
     // Function::hasOptSize is already checked above.
     bool OptForSize = llvm::shouldOptimizeForSize(MBB, PSI, MBFI);

From ba16e3c31f66f02df08ec41394b765aa568a3107 Mon Sep 17 00:00:00 2001
From: jacquesguan <Jianjian.Guan@streamcomputing.com>
Date: Fri, 21 Jan 2022 14:35:20 +0800
Subject: [PATCH 330/946] [RISCV] Decouple Zve* extensions and the V extension.

According to the spec, there are some difference between V and Zve64d. For example, the vmulh integer multiply variants that return the high word of the product (vmulh.vv, vmulh.vx, vmulhu.vv, vmulhu.vx, vmulhsu.vv, vmulhsu.vx) are not included for EEW=64 in Zve64*, but V extension does support these instructions. So we should decouple Zve* extensions and the V extension.

Differential Revision: https://reviews.llvm.org/D117854
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 +-
 clang/lib/Basic/Targets/RISCV.cpp             |  2 +-
 clang/lib/Sema/SemaChecking.cpp               | 48 ++++++++++++-------
 .../RISCV/rvb-intrinsics/riscv32-zbb-error.c  |  2 +-
 .../CodeGen/RISCV/rvv-intrinsics/rvv-error.c  | 18 +++++++
 clang/utils/TableGen/RISCVVEmitter.cpp        |  2 +-
 llvm/lib/Support/RISCVISAInfo.cpp             | 17 ++++++-
 llvm/lib/Target/RISCV/RISCV.td                |  8 ++--
 llvm/lib/Target/RISCV/RISCVSubtarget.h        | 16 +++++--
 llvm/test/CodeGen/RISCV/attributes.ll         |  8 ++--
 llvm/test/MC/RISCV/attribute-arch.s           | 26 +++++-----
 11 files changed, 101 insertions(+), 48 deletions(-)
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics/rvv-error.c

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 7fccdcaa9fc6a..db1047586a473 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11471,7 +11471,7 @@ def warn_tcb_enforcement_violation : Warning<
 
 // RISC-V builtin required extension warning
 def err_riscv_builtin_requires_extension : Error<
-  "builtin requires '%0' extension support to be enabled">;
+  "builtin requires at least one of the following extensions support to be enabled : %0">;
 def err_riscv_builtin_invalid_lmul : Error<
   "LMUL argument must be in the range [0,3] or [5,7]">;
 } // end of sema component.
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index dc4a451726bbe..0680cad5b07c5 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -188,7 +188,7 @@ void RISCVTargetInfo::getTargetDefines(const LangOptions &Opts,
   if (ISAInfo->hasExtension("c"))
     Builder.defineMacro("__riscv_compressed");
 
-  if (ISAInfo->hasExtension("zve32x"))
+  if (ISAInfo->hasExtension("zve32x") || ISAInfo->hasExtension("v"))
     Builder.defineMacro("__riscv_vector");
 }
 
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e2b78fa212b81..c8fb36b8311a4 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3976,23 +3976,39 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI,
 
   // Check if each required feature is included
   for (StringRef F : ReqFeatures) {
-    if (TI.hasFeature(F))
-      continue;
-
-    // If the feature is 64bit, alter the string so it will print better in
-    // the diagnostic.
-    if (F == "64bit")
-      F = "RV64";
-
-    // Convert features like "zbr" and "experimental-zbr" to "Zbr".
-    F.consume_front("experimental-");
-    std::string FeatureStr = F.str();
-    FeatureStr[0] = std::toupper(FeatureStr[0]);
+    SmallVector<StringRef> ReqOpFeatures;
+    F.split(ReqOpFeatures, '|');
+    bool HasFeature = false;
+    for (StringRef OF : ReqOpFeatures) {
+      if (TI.hasFeature(OF)) {
+        HasFeature = true;
+        continue;
+      }
+    }
 
-    // Error message
-    FeatureMissing = true;
-    Diag(TheCall->getBeginLoc(), diag::err_riscv_builtin_requires_extension)
-        << TheCall->getSourceRange() << StringRef(FeatureStr);
+    if (!HasFeature) {
+      std::string FeatureStrs = "";
+      for (StringRef OF : ReqOpFeatures) {
+        // If the feature is 64bit, alter the string so it will print better in
+        // the diagnostic.
+        if (OF == "64bit")
+          OF = "RV64";
+
+        // Convert features like "zbr" and "experimental-zbr" to "Zbr".
+        OF.consume_front("experimental-");
+        std::string FeatureStr = OF.str();
+        FeatureStr[0] = std::toupper(FeatureStr[0]);
+        // Combine strings.
+        FeatureStrs += FeatureStrs == "" ? "" : ", ";
+        FeatureStrs += "'";
+        FeatureStrs += FeatureStr;
+        FeatureStrs += "'";
+      }
+      // Error message
+      FeatureMissing = true;
+      Diag(TheCall->getBeginLoc(), diag::err_riscv_builtin_requires_extension)
+          << TheCall->getSourceRange() << StringRef(FeatureStrs);
+    }
   }
 
   if (FeatureMissing)
diff --git a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c
index b831bfb9402b4..a544434105c8b 100644
--- a/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c
+++ b/clang/test/CodeGen/RISCV/rvb-intrinsics/riscv32-zbb-error.c
@@ -2,5 +2,5 @@
 // RUN: %clang_cc1 -triple riscv32 -target-feature +zbb -verify %s -o -
 
 int orc_b_64(int a) {
-  return __builtin_riscv_orc_b_64(a); // expected-error {{builtin requires 'RV64' extension support to be enabled}}
+  return __builtin_riscv_orc_b_64(a); // expected-error {{builtin requires at least one of the following extensions support to be enabled : 'RV64'}}
 }
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/rvv-error.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/rvv-error.c
new file mode 100644
index 0000000000000..7de132e1ce6d0
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/rvv-error.c
@@ -0,0 +1,18 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64V %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64V %s
+// RUN: not %clang_cc1 -triple riscv64 -emit-llvm-only %s 2>&1 | FileCheck %s --check-prefix=CHECK-RV64-ERR 
+
+// CHECK-RV64V-LABEL: @test(
+// CHECK-RV64V-NEXT:  entry:
+// CHECK-RV64V-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.vsetvli.i64(i64 1, i64 0, i64 0)
+// CHECK-RV64V-NEXT:    [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
+// CHECK-RV64V-NEXT:    ret i32 [[CONV]]
+//
+
+// CHECK-RV64-ERR: error: builtin requires at least one of the following extensions support to be enabled : 'Zve32x', 'V'
+
+int test() {
+  return __builtin_rvv_vsetvli(1, 0, 0);
+}
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index 837226f4e2a54..c063b766e4a65 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -1034,7 +1034,7 @@ void RVVEmitter::createBuiltins(raw_ostream &OS) {
 
   OS << "#if defined(TARGET_BUILTIN) && !defined(RISCVV_BUILTIN)\n";
   OS << "#define RISCVV_BUILTIN(ID, TYPE, ATTRS) TARGET_BUILTIN(ID, TYPE, "
-        "ATTRS, \"zve32x\")\n";
+        "ATTRS, \"zve32x|v\")\n";
   OS << "#endif\n";
   for (auto &Def : Defs) {
     auto P =
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index c34817920e1bc..e6df48e5bb416 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -701,9 +701,11 @@ Error RISCVISAInfo::checkDependency() {
   bool HasE = Exts.count("e") == 1;
   bool HasD = Exts.count("d") == 1;
   bool HasF = Exts.count("f") == 1;
-  bool HasVector = Exts.count("zve32x") == 1;
+  bool HasZve32x = Exts.count("zve32x") == 1;
   bool HasZve32f = Exts.count("zve32f") == 1;
   bool HasZve64d = Exts.count("zve64d") == 1;
+  bool HasV = Exts.count("v") == 1;
+  bool HasVector = HasZve32x || HasV;
   bool HasZvl = MinVLen != 0;
 
   if (HasE && !IsRv32)
@@ -736,6 +738,12 @@ Error RISCVISAInfo::checkDependency() {
         errc::invalid_argument,
         "zvl*b requires v or zve* extension to also be specified");
 
+  // Could not implement Zve* extension and the V extension at the same time.
+  if (HasZve32x && HasV)
+    return createStringError(
+        errc::invalid_argument,
+        "It is illegal to specify the v extension with zve* extensions");
+
   // Additional dependency checks.
   // TODO: The 'q' extension requires rv64.
   // TODO: It is illegal to specify 'e' extensions with 'f' and 'd'.
@@ -743,7 +751,7 @@ Error RISCVISAInfo::checkDependency() {
   return Error::success();
 }
 
-static const char *ImpliedExtsV[] = {"zvl128b", "zve64d", "f", "d"};
+static const char *ImpliedExtsV[] = {"zvl128b", "f", "d"};
 static const char *ImpliedExtsZfh[] = {"zfhmin"};
 static const char *ImpliedExtsZve64d[] = {"zve64f"};
 static const char *ImpliedExtsZve64f[] = {"zve64x", "zve32f"};
@@ -872,6 +880,11 @@ void RISCVISAInfo::updateMaxELen() {
       ExtName.getAsInteger(10, ZveELen);
       MaxELen = std::max(MaxELen, ZveELen);
     }
+    if (ExtName == "v") {
+      MaxELenFp = 64;
+      MaxELen = 64;
+      return;
+    }
   }
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 72caa88104e85..cd8885f6a6e8d 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -306,21 +306,21 @@ def FeatureStdExtZve64d
 def FeatureStdExtV
     : SubtargetFeature<"v", "HasStdExtV", "true",
                        "'V' (Vector Extension for Application Processors)",
-                       [FeatureStdExtZvl128b, FeatureStdExtZve64d, FeatureStdExtF, FeatureStdExtD]>;
+                       [FeatureStdExtZvl128b, FeatureStdExtF, FeatureStdExtD]>;
 
 def HasVInstructions    : Predicate<"Subtarget->hasVInstructions()">,
       AssemblerPredicate<
-          (any_of FeatureStdExtZve32x),
+          (any_of FeatureStdExtZve32x, FeatureStdExtV),
           "'V' (Vector Extension for Application Processors), 'Zve32x' or "
           "'Zve64x' (Vector Extensions for Embedded Processors)">;
 def HasVInstructionsI64 : Predicate<"Subtarget->hasVInstructionsI64()">,
       AssemblerPredicate<
-          (any_of FeatureStdExtZve64x),
+          (any_of FeatureStdExtZve64x, FeatureStdExtV),
           "'V' (Vector Extension for Application Processors) or 'Zve64x' "
           "(Vector Extensions for Embedded Processors)">;
 def HasVInstructionsAnyF : Predicate<"Subtarget->hasVInstructionsAnyF()">,
       AssemblerPredicate<
-          (any_of FeatureStdExtZve32f),
+          (any_of FeatureStdExtZve32f, FeatureStdExtV),
           "'V' (Vector Extension for Application Processors), 'Zve32f', "
           "'Zve64f' or 'Zve64d' (Vector Extensions for Embedded Processors)">;
 
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index ac1df37345859..62b3d54350779 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -199,13 +199,19 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   }
 
   // Vector codegen related methods.
-  bool hasVInstructions() const { return HasStdExtZve32x; }
-  bool hasVInstructionsI64() const { return HasStdExtZve64x; }
-  bool hasVInstructionsF16() const { return HasStdExtZve32f && HasStdExtZfh; }
+  bool hasVInstructions() const { return HasStdExtV || HasStdExtZve32x; }
+  bool hasVInstructionsI64() const { return HasStdExtV || HasStdExtZve64x; }
+  bool hasVInstructionsF16() const {
+    return (HasStdExtV || HasStdExtZve32f) && HasStdExtZfh;
+  }
   // FIXME: Consider Zfinx in the future
-  bool hasVInstructionsF32() const { return HasStdExtZve32f && HasStdExtF; }
+  bool hasVInstructionsF32() const {
+    return HasStdExtV || (HasStdExtZve32f && HasStdExtF);
+  }
   // FIXME: Consider Zdinx in the future
-  bool hasVInstructionsF64() const { return HasStdExtZve64d && HasStdExtD; }
+  bool hasVInstructionsF64() const {
+    return HasStdExtV || (HasStdExtZve64d && HasStdExtD);
+  }
   // F16 and F64 both require F32.
   bool hasVInstructionsAnyF() const { return hasVInstructionsF32(); }
   unsigned getMaxInterleaveFactor() const {
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index 3f7ca36844cc5..86b384f6df6fc 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -80,8 +80,8 @@
 ; RV32ZBR: .attribute 5, "rv32i2p0_zbr0p93"
 ; RV32ZBS: .attribute 5, "rv32i2p0_zbs1p0"
 ; RV32ZBT: .attribute 5, "rv32i2p0_zbt0p93"
-; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
-; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
+; RV32V: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
+; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 ; RV32ZBKB: .attribute 5, "rv32i2p0_zbkb1p0"
 ; RV32ZBKC: .attribute 5, "rv32i2p0_zbkc1p0"
 ; RV32ZKND: .attribute 5, "rv32i2p0_zknd1p0"
@@ -112,8 +112,8 @@
 ; RV64ZBR: .attribute 5, "rv64i2p0_zbr0p93"
 ; RV64ZBS: .attribute 5, "rv64i2p0_zbs1p0"
 ; RV64ZBT: .attribute 5, "rv64i2p0_zbt0p93"
-; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
-; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
+; RV64V: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
+; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 ; RV64ZBKB: .attribute 5, "rv64i2p0_zbkb1p0"
 ; RV64ZBKC: .attribute 5, "rv64i2p0_zbkc1p0"
 ; RV64ZKND: .attribute 5, "rv64i2p0_zknd1p0"
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index 27dc70a7b6f75..d39566eb81cd9 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -34,43 +34,43 @@
 # CHECK: attribute      5, "rv32i2p0_m2p0_a2p0_f2p0_d2p0_c2p0"
 
 .attribute arch, "rv32iv"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 
 .attribute arch, "rv32ivzvl32b"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 
 .attribute arch, "rv32ivzvl64b"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 
 .attribute arch, "rv32ivzvl128b"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 
 .attribute arch, "rv32ivzvl256b"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl64b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl64b1p0"
 
 .attribute arch, "rv32ivzvl512b"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
 
 .attribute arch, "rv32ivzvl1024b"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
 
 .attribute arch, "rv32ivzvl2048b"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl512b1p0_zvl64b1p0"
 
 .attribute arch, "rv32ivzvl4096b"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0"
 
 .attribute arch, "rv32ivzvl8192b"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
 
 .attribute arch, "rv32ivzvl16384b"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
 
 .attribute arch, "rv32ivzvl32768b"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl8192b1p0"
 
 .attribute arch, "rv32ivzvl65536b"
-# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl65536b1p0_zvl8192b1p0"
+# CHECK: attribute      5, "rv32i2p0_f2p0_d2p0_v1p0_zvl1024b1p0_zvl128b1p0_zvl16384b1p0_zvl2048b1p0_zvl256b1p0_zvl32768b1p0_zvl32b1p0_zvl4096b1p0_zvl512b1p0_zvl64b1p0_zvl65536b1p0_zvl8192b1p0"
 
 .attribute arch, "rv32izve32x"
 # CHECK: attribute      5, "rv32i2p0_zve32x1p0_zvl32b1p0"

From c5590396d041e77a84101cdcc4249788403e4e40 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Mon, 24 Jan 2022 15:23:28 +0800
Subject: [PATCH 331/946] [PowerPC] Emit warning for ieeelongdouble on older
 GNU toolchain

GCC 12 should have proper support for IEEE-754 compliant 128-bit
floating point in libstdc++. So warning is needed when linking against
older libstdc++ versions or LLVM libc++.

Glibc starts supporting float128 in both header and libraries since
2.32.

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D112906
---
 .../clang/Basic/DiagnosticDriverKinds.td      |  3 +
 clang/lib/Driver/ToolChains/PPCLinux.cpp      | 56 +++++++++++++++++++
 clang/lib/Driver/ToolChains/PPCLinux.h        |  7 ++-
 .../gcc/powerpc64le-linux-gnu/11.2.0/.keep    |  0
 clang/test/Driver/ppc-float-abi-warning.cpp   | 13 +++++
 5 files changed, 77 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/Driver/Inputs/powerpc64le-linux-gnu-tree/gcc-11.2.0/lib/gcc/powerpc64le-linux-gnu/11.2.0/.keep
 create mode 100644 clang/test/Driver/ppc-float-abi-warning.cpp

diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 3ea32a8876c91..e635be6b6d1bc 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -380,6 +380,9 @@ def warn_drv_deprecated_arg : Warning<
   "argument '%0' is deprecated, use '%1' instead">, InGroup<Deprecated>;
 def warn_drv_assuming_mfloat_abi_is : Warning<
   "unknown platform, assuming -mfloat-abi=%0">;
+def warn_drv_unsupported_float_abi_by_lib : Warning<
+  "float ABI '%0' is not supported by current library">,
+  InGroup<DiagGroup<"unsupported-abi">>;
 def warn_ignoring_ftabstop_value : Warning<
   "ignoring invalid -ftabstop value '%0', using default value %1">;
 def warn_drv_overriding_flag_option : Warning<
diff --git a/clang/lib/Driver/ToolChains/PPCLinux.cpp b/clang/lib/Driver/ToolChains/PPCLinux.cpp
index af2e3a21a0af7..e5e1aa06f4b1d 100644
--- a/clang/lib/Driver/ToolChains/PPCLinux.cpp
+++ b/clang/lib/Driver/ToolChains/PPCLinux.cpp
@@ -8,11 +8,50 @@
 
 #include "PPCLinux.h"
 #include "clang/Driver/Driver.h"
+#include "clang/Driver/DriverDiagnostic.h"
 #include "clang/Driver/Options.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 
 using namespace clang::driver::toolchains;
 using namespace llvm::opt;
+using namespace llvm::sys;
+
+// Glibc older than 2.32 doesn't fully support IEEE float128. Here we check
+// glibc version by looking at dynamic linker name.
+static bool GlibcSupportsFloat128(const std::string &Linker) {
+  llvm::SmallVector<char, 16> Path;
+
+  // Resolve potential symlinks to linker.
+  if (fs::real_path(Linker, Path))
+    return false;
+  llvm::StringRef LinkerName =
+      path::filename(llvm::StringRef(Path.data(), Path.size()));
+
+  // Since glibc 2.34, the installed .so file is not symlink anymore. But we can
+  // still safely assume it's newer than 2.32.
+  if (LinkerName.startswith("ld64.so"))
+    return true;
+
+  if (!LinkerName.startswith("ld-2."))
+    return false;
+  unsigned Minor = (LinkerName[5] - '0') * 10 + (LinkerName[6] - '0');
+  if (Minor < 32)
+    return false;
+
+  return true;
+}
+
+PPCLinuxToolChain::PPCLinuxToolChain(const Driver &D,
+                                     const llvm::Triple &Triple,
+                                     const llvm::opt::ArgList &Args)
+    : Linux(D, Triple, Args) {
+  if (Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) {
+    StringRef ABIName = A->getValue();
+    if (ABIName == "ieeelongdouble" && !SupportIEEEFloat128(D, Triple, Args))
+      D.Diag(diag::warn_drv_unsupported_float_abi_by_lib) << ABIName;
+  }
+}
 
 void PPCLinuxToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
                                                   ArgStringList &CC1Args) const {
@@ -26,3 +65,20 @@ void PPCLinuxToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
 
   Linux::AddClangSystemIncludeArgs(DriverArgs, CC1Args);
 }
+
+bool PPCLinuxToolChain::SupportIEEEFloat128(
+    const Driver &D, const llvm::Triple &Triple,
+    const llvm::opt::ArgList &Args) const {
+  if (!Triple.isLittleEndian() || !Triple.isPPC64())
+    return false;
+
+  if (Args.hasArg(options::OPT_nostdlib, options::OPT_nostdlibxx))
+    return true;
+
+  bool HasUnsupportedCXXLib =
+      ToolChain::GetCXXStdlibType(Args) == CST_Libcxx &&
+      GCCInstallation.getVersion().isOlderThan(12, 1, 0);
+
+  return GlibcSupportsFloat128(Linux::getDynamicLinker(Args)) &&
+         !(D.CCCIsCXX() && HasUnsupportedCXXLib);
+}
diff --git a/clang/lib/Driver/ToolChains/PPCLinux.h b/clang/lib/Driver/ToolChains/PPCLinux.h
index b3ef7b61dc3aa..e0318ae8a3a2a 100644
--- a/clang/lib/Driver/ToolChains/PPCLinux.h
+++ b/clang/lib/Driver/ToolChains/PPCLinux.h
@@ -18,12 +18,15 @@ namespace toolchains {
 class LLVM_LIBRARY_VISIBILITY PPCLinuxToolChain : public Linux {
 public:
   PPCLinuxToolChain(const Driver &D, const llvm::Triple &Triple,
-                    const llvm::opt::ArgList &Args)
-      : Linux(D, Triple, Args) {}
+                    const llvm::opt::ArgList &Args);
 
   void
   AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                             llvm::opt::ArgStringList &CC1Args) const override;
+
+private:
+  bool SupportIEEEFloat128(const Driver &D, const llvm::Triple &Triple,
+                           const llvm::opt::ArgList &Args) const;
 };
 
 } // end namespace toolchains
diff --git a/clang/test/Driver/Inputs/powerpc64le-linux-gnu-tree/gcc-11.2.0/lib/gcc/powerpc64le-linux-gnu/11.2.0/.keep b/clang/test/Driver/Inputs/powerpc64le-linux-gnu-tree/gcc-11.2.0/lib/gcc/powerpc64le-linux-gnu/11.2.0/.keep
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/ppc-float-abi-warning.cpp b/clang/test/Driver/ppc-float-abi-warning.cpp
new file mode 100644
index 0000000000000..3ccb9415a021d
--- /dev/null
+++ b/clang/test/Driver/ppc-float-abi-warning.cpp
@@ -0,0 +1,13 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang -### --driver-mode=g++ -target powerpc64le-linux-gnu %s \
+// RUN:  --gcc-toolchain=%S/Inputs/powerpc64le-linux-gnu-tree/gcc-11.2.0 \
+// RUN:  -mabi=ieeelongdouble -stdlib=libstdc++ 2>&1 | FileCheck %s
+// RUN: %clang -### --driver-mode=g++ -target powerpc64le-linux-gnu %s \
+// RUN:  -mabi=ieeelongdouble -stdlib=libc++ 2>&1 | FileCheck %s
+// RUN: %clang -### --driver-mode=g++ -target powerpc64le-linux-gnu %s\
+// RUN:  -mabi=ieeelongdouble -stdlib=libc++ -Wno-unsupported-abi 2>&1 | \
+// RUN:  FileCheck %s --check-prefix=NOWARN
+
+// CHECK: warning: float ABI 'ieeelongdouble' is not supported by current library
+// NOWARN-NOT: warning: float ABI 'ieeelongdouble' is not supported by current library
+long double foo(long double x) { return x; }

From ea2112ea15a0f43cb469b29e00cda3d7a48ae875 Mon Sep 17 00:00:00 2001
From: Marek Kurdej <marek.kurdej+llvm.org@gmail.com>
Date: Mon, 24 Jan 2022 08:34:24 +0100
Subject: [PATCH 332/946] [clang-format] Remove unused assignment. NFC.

Fixes scan-build reported warning: https://llvm.org/reports/scan-build/report-QualifierAlignmentFixer.cpp-analyzeRight-55-191910.html#EndPath.
---
 clang/lib/Format/QualifierAlignmentFixer.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp
index a53db5d11848d..b3a4684bead1a 100644
--- a/clang/lib/Format/QualifierAlignmentFixer.cpp
+++ b/clang/lib/Format/QualifierAlignmentFixer.cpp
@@ -261,10 +261,8 @@ const FormatToken *LeftRightQualifierAlignmentFixer::analyzeRight(
       // Move to the end of any template class members e.g.
       // `Foo<int>::iterator`.
       if (Next && Next->startsSequence(TT_TemplateCloser, tok::coloncolon,
-                                       tok::identifier)) {
-        Next = Next->Next->Next;
+                                       tok::identifier))
         return Tok;
-      }
       assert(Next && "Missing template opener");
       Next = Next->Next;
     }

From 3519dcfec22963fbb84e154cecc2df22e6c7724f Mon Sep 17 00:00:00 2001
From: Nimish Mishra <neelam.nimish@gmail.com>
Date: Mon, 24 Jan 2022 10:02:58 +0530
Subject: [PATCH 333/946] Added OpenMP 5.0 specification based semantic checks
 for atomic update construct

---
 flang/lib/Semantics/check-omp-structure.cpp | 154 +++++++++++++++++-
 flang/lib/Semantics/check-omp-structure.h   |   6 +
 flang/test/Semantics/omp-atomic01.f90       |  48 +++++-
 flang/test/Semantics/omp-atomic02.f90       | 109 +++++++++++++
 flang/test/Semantics/omp-atomic03.f90       |  93 +++++++++++
 flang/test/Semantics/omp-atomic04.f90       | 168 ++++++++++++++++++++
 flang/test/Semantics/omp-atomic05.f90       |  26 +++
 7 files changed, 600 insertions(+), 4 deletions(-)
 create mode 100644 flang/test/Semantics/omp-atomic02.f90
 create mode 100644 flang/test/Semantics/omp-atomic03.f90
 create mode 100644 flang/test/Semantics/omp-atomic04.f90
 create mode 100644 flang/test/Semantics/omp-atomic05.f90

diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index df66a42dede52..802fdf650a07c 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -1324,13 +1324,163 @@ void OmpStructureChecker::Leave(const parser::OmpEndBlockDirective &x) {
   }
 }
 
+template <typename T, typename D>
+bool OmpStructureChecker::IsOperatorValid(const T &node, const D &variable) {
+  using AllowedBinaryOperators =
+      std::variant<parser::Expr::Add, parser::Expr::Multiply,
+          parser::Expr::Subtract, parser::Expr::Divide, parser::Expr::AND,
+          parser::Expr::OR, parser::Expr::EQV, parser::Expr::NEQV>;
+  using BinaryOperators = std::variant<parser::Expr::Add,
+      parser::Expr::Multiply, parser::Expr::Subtract, parser::Expr::Divide,
+      parser::Expr::AND, parser::Expr::OR, parser::Expr::EQV,
+      parser::Expr::NEQV, parser::Expr::Power, parser::Expr::Concat,
+      parser::Expr::LT, parser::Expr::LE, parser::Expr::EQ, parser::Expr::NE,
+      parser::Expr::GE, parser::Expr::GT>;
+
+  if constexpr (common::HasMember<T, BinaryOperators>) {
+    const auto &variableName{variable.GetSource().ToString()};
+    const auto &exprLeft{std::get<0>(node.t)};
+    const auto &exprRight{std::get<1>(node.t)};
+    if ((exprLeft.value().source.ToString() != variableName) &&
+        (exprRight.value().source.ToString() != variableName)) {
+      context_.Say(variable.GetSource(),
+          "Atomic update variable '%s' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct"_err_en_US,
+          variableName);
+    }
+    return common::HasMember<T, AllowedBinaryOperators>;
+  }
+  return true;
+}
+
+void OmpStructureChecker::CheckAtomicUpdateAssignmentStmt(
+    const parser::AssignmentStmt &assignment) {
+  const auto &expr{std::get<parser::Expr>(assignment.t)};
+  const auto &var{std::get<parser::Variable>(assignment.t)};
+  std::visit(
+      common::visitors{
+          [&](const common::Indirection<parser::FunctionReference> &x) {
+            const auto &procedureDesignator{
+                std::get<parser::ProcedureDesignator>(x.value().v.t)};
+            const parser::Name *name{
+                std::get_if<parser::Name>(&procedureDesignator.u)};
+            if (name &&
+                !(name->source == "max" || name->source == "min" ||
+                    name->source == "iand" || name->source == "ior" ||
+                    name->source == "ieor")) {
+              context_.Say(expr.source,
+                  "Invalid intrinsic procedure name in OpenMP ATOMIC (UPDATE) statement"_err_en_US);
+            } else if (name) {
+              bool foundMatch{false};
+              if (auto varDesignatorIndirection =
+                      std::get_if<Fortran::common::Indirection<
+                          Fortran::parser::Designator>>(&var.u)) {
+                const auto &varDesignator = varDesignatorIndirection->value();
+                if (const auto *dataRef = std::get_if<Fortran::parser::DataRef>(
+                        &varDesignator.u)) {
+                  if (const auto *name =
+                          std::get_if<Fortran::parser::Name>(&dataRef->u)) {
+                    const auto &varSymbol = *name->symbol;
+                    if (const auto *e{GetExpr(expr)}) {
+                      for (const Symbol &symbol :
+                          evaluate::CollectSymbols(*e)) {
+                        if (symbol == varSymbol) {
+                          foundMatch = true;
+                          break;
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+              if (!foundMatch) {
+                context_.Say(expr.source,
+                    "Atomic update variable '%s' not found in the argument list of intrinsic procedure"_err_en_US,
+                    var.GetSource().ToString());
+              }
+            }
+          },
+          [&](const auto &x) {
+            if (!IsOperatorValid(x, var)) {
+              context_.Say(expr.source,
+                  "Invalid operator in OpenMP ATOMIC (UPDATE) statement"_err_en_US);
+            }
+          },
+      },
+      expr.u);
+}
+
+void OmpStructureChecker::CheckAtomicMemoryOrderClause(
+    const parser::OmpAtomicClauseList &clauseList) {
+  int numMemoryOrderClause = 0;
+  for (const auto &clause : clauseList.v) {
+    if (std::get_if<Fortran::parser::OmpMemoryOrderClause>(&clause.u)) {
+      numMemoryOrderClause++;
+      if (numMemoryOrderClause > 1) {
+        context_.Say(clause.source,
+            "More than one memory order clause not allowed on OpenMP Atomic construct"_err_en_US);
+        return;
+      }
+    }
+  }
+}
+
+void OmpStructureChecker::CheckAtomicMemoryOrderClause(
+    const parser::OmpAtomicClauseList &leftHandClauseList,
+    const parser::OmpAtomicClauseList &rightHandClauseList) {
+  int numMemoryOrderClause = 0;
+  for (const auto &clause : leftHandClauseList.v) {
+    if (std::get_if<Fortran::parser::OmpMemoryOrderClause>(&clause.u)) {
+      numMemoryOrderClause++;
+      if (numMemoryOrderClause > 1) {
+        context_.Say(clause.source,
+            "More than one memory order clause not allowed on OpenMP Atomic construct"_err_en_US);
+        return;
+      }
+    }
+  }
+  for (const auto &clause : rightHandClauseList.v) {
+    if (std::get_if<Fortran::parser::OmpMemoryOrderClause>(&clause.u)) {
+      numMemoryOrderClause++;
+      if (numMemoryOrderClause > 1) {
+        context_.Say(clause.source,
+            "More than one memory order clause not allowed on OpenMP Atomic construct"_err_en_US);
+        return;
+      }
+    }
+  }
+}
+
 void OmpStructureChecker::Enter(const parser::OpenMPAtomicConstruct &x) {
   std::visit(
       common::visitors{
-          [&](const auto &someAtomicConstruct) {
-            const auto &dir{std::get<parser::Verbatim>(someAtomicConstruct.t)};
+          [&](const parser::OmpAtomic &atomicConstruct) {
+            const auto &dir{std::get<parser::Verbatim>(atomicConstruct.t)};
+            PushContextAndClauseSets(
+                dir.source, llvm::omp::Directive::OMPD_atomic);
+            CheckAtomicUpdateAssignmentStmt(
+                std::get<parser::Statement<parser::AssignmentStmt>>(
+                    atomicConstruct.t)
+                    .statement);
+            CheckAtomicMemoryOrderClause(
+                std::get<parser::OmpAtomicClauseList>(atomicConstruct.t));
+          },
+          [&](const parser::OmpAtomicUpdate &atomicConstruct) {
+            const auto &dir{std::get<parser::Verbatim>(atomicConstruct.t)};
+            PushContextAndClauseSets(
+                dir.source, llvm::omp::Directive::OMPD_atomic);
+            CheckAtomicUpdateAssignmentStmt(
+                std::get<parser::Statement<parser::AssignmentStmt>>(
+                    atomicConstruct.t)
+                    .statement);
+            CheckAtomicMemoryOrderClause(
+                std::get<0>(atomicConstruct.t), std::get<2>(atomicConstruct.t));
+          },
+          [&](const auto &atomicConstruct) {
+            const auto &dir{std::get<parser::Verbatim>(atomicConstruct.t)};
             PushContextAndClauseSets(
                 dir.source, llvm::omp::Directive::OMPD_atomic);
+            CheckAtomicMemoryOrderClause(
+                std::get<0>(atomicConstruct.t), std::get<2>(atomicConstruct.t));
           },
       },
       x.u);
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index bf98f360ed58b..88005ad4b64d5 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -229,6 +229,12 @@ class OmpStructureChecker
   void CheckLoopItrVariableIsInt(const parser::OpenMPLoopConstruct &x);
   void CheckDoWhile(const parser::OpenMPLoopConstruct &x);
   void CheckCycleConstraints(const parser::OpenMPLoopConstruct &x);
+  template <typename T, typename D> bool IsOperatorValid(const T &, const D &);
+  void CheckAtomicMemoryOrderClause(
+      const parser::OmpAtomicClauseList &, const parser::OmpAtomicClauseList &);
+  void CheckAtomicMemoryOrderClause(const parser::OmpAtomicClauseList &);
+  void CheckAtomicUpdateAssignmentStmt(const parser::AssignmentStmt &);
+  void CheckAtomicConstructStructure(const parser::OpenMPAtomicConstruct &);
   void CheckDistLinear(const parser::OpenMPLoopConstruct &x);
   void CheckSIMDNest(const parser::OpenMPConstruct &x);
   void CheckTargetNest(const parser::OpenMPConstruct &x);
diff --git a/flang/test/Semantics/omp-atomic01.f90 b/flang/test/Semantics/omp-atomic01.f90
index 9f9e26f9081f9..b668de202c17e 100644
--- a/flang/test/Semantics/omp-atomic01.f90
+++ b/flang/test/Semantics/omp-atomic01.f90
@@ -12,152 +12,184 @@
 ! At most one memory-order-clause may appear on the construct.
 
 !READ
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one SEQ_CST clause can appear on the READ directive
   !$omp atomic seq_cst seq_cst read
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one SEQ_CST clause can appear on the READ directive
   !$omp atomic read seq_cst seq_cst
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one SEQ_CST clause can appear on the READ directive
   !$omp atomic seq_cst read seq_cst
     i = j
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one ACQUIRE clause can appear on the READ directive
   !$omp atomic acquire acquire read
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one ACQUIRE clause can appear on the READ directive
   !$omp atomic read acquire acquire
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one ACQUIRE clause can appear on the READ directive
   !$omp atomic acquire read acquire
     i = j
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELAXED clause can appear on the READ directive
   !$omp atomic relaxed relaxed read
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELAXED clause can appear on the READ directive
   !$omp atomic read relaxed relaxed
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELAXED clause can appear on the READ directive
   !$omp atomic relaxed read relaxed
     i = j
 
 !UPDATE
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one SEQ_CST clause can appear on the UPDATE directive
   !$omp atomic seq_cst seq_cst update
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one SEQ_CST clause can appear on the UPDATE directive
   !$omp atomic update seq_cst seq_cst
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one SEQ_CST clause can appear on the UPDATE directive
   !$omp atomic seq_cst update seq_cst
     i = j
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELEASE clause can appear on the UPDATE directive
   !$omp atomic release release update
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELEASE clause can appear on the UPDATE directive
   !$omp atomic update release release
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELEASE clause can appear on the UPDATE directive
   !$omp atomic release update release
     i = j
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELAXED clause can appear on the UPDATE directive
   !$omp atomic relaxed relaxed update
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELAXED clause can appear on the UPDATE directive
   !$omp atomic update relaxed relaxed
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELAXED clause can appear on the UPDATE directive
   !$omp atomic relaxed update relaxed
     i = j
 
 !CAPTURE
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one SEQ_CST clause can appear on the CAPTURE directive
   !$omp atomic seq_cst seq_cst capture
     i = j
     j = k
   !$omp end atomic
-
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one SEQ_CST clause can appear on the CAPTURE directive
   !$omp atomic capture seq_cst seq_cst
     i = j
     j = k
   !$omp end atomic
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one SEQ_CST clause can appear on the CAPTURE directive
   !$omp atomic seq_cst capture seq_cst
     i = j
     j = k
   !$omp end atomic
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELEASE clause can appear on the CAPTURE directive
   !$omp atomic release release capture
     i = j
     j = k
   !$omp end atomic
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELEASE clause can appear on the CAPTURE directive
   !$omp atomic capture release release
     i = j
     j = k
   !$omp end atomic
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELEASE clause can appear on the CAPTURE directive
   !$omp atomic release capture release
     i = j
     j = k
   !$omp end atomic
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELAXED clause can appear on the CAPTURE directive
   !$omp atomic relaxed relaxed capture
     i = j
     j = k
   !$omp end atomic
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELAXED clause can appear on the CAPTURE directive
   !$omp atomic capture relaxed relaxed
     i = j
     j = k
   !$omp end atomic
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELAXED clause can appear on the CAPTURE directive
   !$omp atomic relaxed capture relaxed
     i = j
     j = k
   !$omp end atomic
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one ACQ_REL clause can appear on the CAPTURE directive
   !$omp atomic acq_rel acq_rel capture
     i = j
     j = k
   !$omp end atomic
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one ACQ_REL clause can appear on the CAPTURE directive
   !$omp atomic capture acq_rel acq_rel
     i = j
     j = k
   !$omp end atomic
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one ACQ_REL clause can appear on the CAPTURE directive
   !$omp atomic acq_rel capture acq_rel
     i = j
     j = k
   !$omp end atomic
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one ACQUIRE clause can appear on the CAPTURE directive
   !$omp atomic acquire acquire capture
     i = j
     j = k
   !$omp end atomic
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one ACQUIRE clause can appear on the CAPTURE directive
   !$omp atomic capture acquire acquire
     i = j
     j = k
   !$omp end atomic
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one ACQUIRE clause can appear on the CAPTURE directive
   !$omp atomic acquire capture acquire
     i = j
@@ -165,43 +197,55 @@
   !$omp end atomic
 
 !WRITE
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one SEQ_CST clause can appear on the WRITE directive
   !$omp atomic seq_cst seq_cst write
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one SEQ_CST clause can appear on the WRITE directive
   !$omp atomic write seq_cst seq_cst
     i = j
+
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one SEQ_CST clause can appear on the WRITE directive
   !$omp atomic seq_cst write seq_cst
     i = j
 
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELEASE clause can appear on the WRITE directive
   !$omp atomic release release write
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELEASE clause can appear on the WRITE directive
   !$omp atomic write release release
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELEASE clause can appear on the WRITE directive
   !$omp atomic release write release
     i = j
-
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELAXED clause can appear on the WRITE directive
   !$omp atomic relaxed relaxed write
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELAXED clause can appear on the WRITE directive
   !$omp atomic write relaxed relaxed
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELAXED clause can appear on the WRITE directive
   !$omp atomic relaxed write relaxed
     i = j
 
 !No atomic-clause
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELAXED clause can appear on the ATOMIC directive
   !$omp atomic relaxed relaxed
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one SEQ_CST clause can appear on the ATOMIC directive
   !$omp atomic seq_cst seq_cst
     i = j
+  !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
   !ERROR: At most one RELEASE clause can appear on the ATOMIC directive
   !$omp atomic release release
     i = j
diff --git a/flang/test/Semantics/omp-atomic02.f90 b/flang/test/Semantics/omp-atomic02.f90
new file mode 100644
index 0000000000000..ec04fda86afc5
--- /dev/null
+++ b/flang/test/Semantics/omp-atomic02.f90
@@ -0,0 +1,109 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp
+
+! OpenMP Atomic construct
+! section 2.17.7
+! operator is one of +, *, -, /, .AND., .OR., .EQV., or .NEQV
+
+program OmpAtomic
+   use omp_lib
+   CHARACTER c*3, d*3
+   LOGICAL l, m, n
+
+   a = 1
+   b = 2
+   c = 'foo'
+   d = 'bar'
+   m = .TRUE.
+   n = .FALSE.
+   !$omp parallel num_threads(4)
+
+   !$omp atomic
+   a = a + (4*2)
+   !$omp atomic
+   a = a*(b + 1)
+   !$omp atomic
+   a = a - 3
+   !$omp atomic
+   a = a/(b + 1)
+   !$omp atomic
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   a = a**4
+   !$omp atomic
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   c = c//d
+   !$omp atomic
+   !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   l = a .LT. b
+   !$omp atomic
+   !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   l = a .LE. b
+   !$omp atomic
+   !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   l = a .EQ. b
+   !$omp atomic
+   !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   l = a .NE. b
+   !$omp atomic
+   !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   l = a .GE. b
+   !$omp atomic
+   !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   l = a .GT. b
+   !$omp atomic
+   m = m .AND. n
+   !$omp atomic
+   m = m .OR. n
+   !$omp atomic
+   m = m .EQV. n
+   !$omp atomic
+   m = m .NEQV. n
+   !$omp atomic update
+   a = a + (4*2)
+   !$omp atomic update
+   a = a*(b + 1)
+   !$omp atomic update
+   a = a - 3
+   !$omp atomic update
+   a = a/(b + 1)
+   !$omp atomic update
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   a = a**4
+   !$omp atomic update
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   c = c//d
+   !$omp atomic update
+   !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   l = a .LT. b
+   !$omp atomic update
+   !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   l = a .LE. b
+   !$omp atomic update
+   !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   l = a .EQ. b
+   !$omp atomic update
+   !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   l = a .GE. b
+   !$omp atomic update
+   !ERROR: Atomic update variable 'l' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   !ERROR: Invalid operator in OpenMP ATOMIC (UPDATE) statement
+   l = a .GT. b
+   !$omp atomic update
+   m = m .AND. n
+   !$omp atomic update
+   m = m .OR. n
+   !$omp atomic update
+   m = m .EQV. n
+   !$omp atomic update
+   m = m .NEQV. n
+   !$omp end parallel
+end program OmpAtomic
diff --git a/flang/test/Semantics/omp-atomic03.f90 b/flang/test/Semantics/omp-atomic03.f90
new file mode 100644
index 0000000000000..4262416fa9930
--- /dev/null
+++ b/flang/test/Semantics/omp-atomic03.f90
@@ -0,0 +1,93 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp
+
+! OpenMP Atomic construct
+! section 2.17.7
+! Intrinsic procedure name is one of MAX, MIN, IAND, IOR, or IEOR.
+
+program OmpAtomic
+   use omp_lib
+   real x
+   integer :: y, z, a, b, c, d
+   x = 5.73
+   y = 3
+   z = 1
+!$omp atomic
+   y = IAND(y, 4)
+!$omp atomic
+   y = IOR(y, 5)
+!$omp atomic
+   y = IEOR(y, 6)
+!$omp atomic
+   y = MAX(y, 7)
+!$omp atomic
+   y = MIN(y, 8)
+
+!$omp atomic
+   !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure
+   z = IAND(y, 4)
+!$omp atomic
+   !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure
+   z = IOR(y, 5)
+!$omp atomic
+   !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure
+   z = IEOR(y, 6)
+!$omp atomic
+   !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure
+   z = MAX(y, 7, b, c)
+!$omp atomic
+   !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure
+   z = MIN(y, 8, a, d)
+
+!$omp atomic
+   !ERROR: Invalid intrinsic procedure name in OpenMP ATOMIC (UPDATE) statement
+   y = FRACTION(x)
+!$omp atomic
+   !ERROR: Invalid intrinsic procedure name in OpenMP ATOMIC (UPDATE) statement
+   y = REAL(x)
+!$omp atomic update
+   y = IAND(y, 4)
+!$omp atomic update
+   y = IOR(y, 5)
+!$omp atomic update
+   y = IEOR(y, 6)
+!$omp atomic update
+   y = MAX(y, 7)
+!$omp atomic update
+   y = MIN(y, 8)
+
+!$omp atomic update
+   !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure
+   z = IAND(y, 4)
+!$omp atomic update
+   !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure
+   z = IOR(y, 5)
+!$omp atomic update
+   !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure
+   z = IEOR(y, 6)
+!$omp atomic update
+   !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure
+   z = MAX(y, 7)
+!$omp atomic update
+   !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure
+   z = MIN(y, 8)
+
+!$omp atomic update
+   !ERROR: Invalid intrinsic procedure name in OpenMP ATOMIC (UPDATE) statement
+   y = MOD(y, 9)
+!$omp atomic update
+   !ERROR: Invalid intrinsic procedure name in OpenMP ATOMIC (UPDATE) statement
+   x = ABS(x)
+end program OmpAtomic
+
+subroutine conflicting_types()
+    type simple
+    integer :: z
+    end type
+    real x
+    integer :: y, z
+    type(simple) ::s
+    z = 1
+    !$omp atomic
+    !ERROR: Atomic update variable 'z' not found in the argument list of intrinsic procedure
+    z = IAND(s%z, 4)
+end subroutine
diff --git a/flang/test/Semantics/omp-atomic04.f90 b/flang/test/Semantics/omp-atomic04.f90
new file mode 100644
index 0000000000000..15b832cd3bcdc
--- /dev/null
+++ b/flang/test/Semantics/omp-atomic04.f90
@@ -0,0 +1,168 @@
+! RUN: %python %S/test_errors.py %s %flang_fc1 -fopenmp
+
+! OpenMP Atomic construct
+! section 2.17.7
+! Update assignment must be 'var = var op expr' or 'var = expr op var'
+
+program OmpAtomic
+   use omp_lib
+   real x
+   integer y
+   logical m, n, l
+   x = 5.73
+   y = 3
+   m = .TRUE.
+   n = .FALSE.
+!$omp atomic
+   x = x + 1
+!$omp atomic
+   x = 1 + x
+!$omp atomic
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = y + 1
+!$omp atomic
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = 1 + y
+
+!$omp atomic
+   x = x - 1
+!$omp atomic
+   x = 1 - x
+!$omp atomic
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = y - 1
+!$omp atomic
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = 1 - y
+
+!$omp atomic
+   x = x*1
+!$omp atomic
+   x = 1*x
+!$omp atomic
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = y*1
+!$omp atomic
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = 1*y
+
+!$omp atomic
+   x = x/1
+!$omp atomic
+   x = 1/x
+!$omp atomic
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = y/1
+!$omp atomic
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = 1/y
+
+!$omp atomic
+   m = m .AND. n
+!$omp atomic
+   m = n .AND. m
+!$omp atomic
+   !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   m = n .AND. l
+
+!$omp atomic
+   m = m .OR. n
+!$omp atomic
+   m = n .OR. m
+!$omp atomic
+   !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   m = n .OR. l
+
+!$omp atomic
+   m = m .EQV. n
+!$omp atomic
+   m = n .EQV. m
+!$omp atomic
+   !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   m = n .EQV. l
+
+!$omp atomic
+   m = m .NEQV. n
+!$omp atomic
+   m = n .NEQV. m
+!$omp atomic
+   !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   m = n .NEQV. l
+
+!$omp atomic update
+   x = x + 1
+!$omp atomic update
+   x = 1 + x
+!$omp atomic update
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = y + 1
+!$omp atomic update
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = 1 + y
+
+!$omp atomic update
+   x = x - 1
+!$omp atomic update
+   x = 1 - x
+!$omp atomic update
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = y - 1
+!$omp atomic update
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = 1 - y
+
+!$omp atomic update
+   x = x*1
+!$omp atomic update
+   x = 1*x
+!$omp atomic update
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = y*1
+!$omp atomic update
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = 1*y
+
+!$omp atomic update
+   x = x/1
+!$omp atomic update
+   x = 1/x
+!$omp atomic update
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = y/1
+!$omp atomic update
+   !ERROR: Atomic update variable 'x' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   x = 1/y
+
+!$omp atomic update
+   m = m .AND. n
+!$omp atomic update
+   m = n .AND. m
+!$omp atomic update
+   !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   m = n .AND. l
+
+!$omp atomic update
+   m = m .OR. n
+!$omp atomic update
+   m = n .OR. m
+!$omp atomic update
+   !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   m = n .OR. l
+
+!$omp atomic update
+   m = m .EQV. n
+!$omp atomic update
+   m = n .EQV. m
+!$omp atomic update
+   !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   m = n .EQV. l
+
+!$omp atomic update
+   m = m .NEQV. n
+!$omp atomic update
+   m = n .NEQV. m
+!$omp atomic update
+   !ERROR: Atomic update variable 'm' not found in the RHS of the assignment statement in an ATOMIC (UPDATE) construct
+   m = n .NEQV. l
+
+end program OmpAtomic
diff --git a/flang/test/Semantics/omp-atomic05.f90 b/flang/test/Semantics/omp-atomic05.f90
new file mode 100644
index 0000000000000..1ff13d6cd29ce
--- /dev/null
+++ b/flang/test/Semantics/omp-atomic05.f90
@@ -0,0 +1,26 @@
+! RUN: %python %S/test_errors.py %s %flang -fopenmp
+
+! This tests the various semantics related to the clauses of various OpenMP atomic constructs
+
+program OmpAtomic
+    use omp_lib
+    integer :: g, x
+
+    !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+    !$omp atomic relaxed, seq_cst
+        x = x + 1
+    !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+    !$omp atomic read seq_cst, relaxed
+        x = g
+    !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+    !$omp atomic write relaxed, release
+        x = 2 * 4
+    !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+    !$omp atomic update release, seq_cst
+        x = 10
+    !ERROR: More than one memory order clause not allowed on OpenMP Atomic construct
+    !$omp atomic capture release, seq_cst
+        x = g
+        g = x * 10
+    !$omp end atomic
+end program OmpAtomic

From 670a721de2a19d0307ceea47349fd9e986a8484f Mon Sep 17 00:00:00 2001
From: Marek Kurdej <marek.kurdej+llvm.org@gmail.com>
Date: Mon, 24 Jan 2022 08:48:14 +0100
Subject: [PATCH 334/946] [clang-format] Assert Line->First. NFC.

Cf. scan-build reports:
* https://llvm.org/reports/scan-build/report-AffectedRangeManager.cpp-nonPPLineAffected-34-16c04b.html#EndPath
* https://llvm.org/reports/scan-build/report-SortJavaScriptImports.cpp-parseModuleReferences-34-96a7f8.html#EndPath
* https://llvm.org/reports/scan-build/report-TokenAnnotator.cpp-setCommentLineLevels-26-77bdba.html#EndPath
* https://llvm.org/reports/scan-build/report-AffectedRangeManager.cpp-nonPPLineAffected-31-714434.html#EndPath
* https://llvm.org/reports/scan-build/report-TokenAnnotator.cpp-setCommentLineLevels-16-bd39d0.html#EndPath
* https://llvm.org/reports/scan-build/report-UnwrappedLineFormatter.cpp-format-90-668b2d.html#EndPath
---
 clang/lib/Format/AffectedRangeManager.cpp   |  2 ++
 clang/lib/Format/SortJavaScriptImports.cpp  |  1 +
 clang/lib/Format/TokenAnnotator.cpp         | 22 +++++++++++----------
 clang/lib/Format/UnwrappedLineFormatter.cpp |  3 ++-
 4 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/clang/lib/Format/AffectedRangeManager.cpp b/clang/lib/Format/AffectedRangeManager.cpp
index 3b735c4e68596..f69f65c5ddf1f 100644
--- a/clang/lib/Format/AffectedRangeManager.cpp
+++ b/clang/lib/Format/AffectedRangeManager.cpp
@@ -27,6 +27,7 @@ bool AffectedRangeManager::computeAffectedLines(
   const AnnotatedLine *PreviousLine = nullptr;
   while (I != E) {
     AnnotatedLine *Line = *I;
+    assert(Line->First);
     Line->LeadingEmptyLinesAffected = affectsLeadingEmptyLines(*Line->First);
 
     // If a line is part of a preprocessor directive, it needs to be formatted
@@ -113,6 +114,7 @@ bool AffectedRangeManager::nonPPLineAffected(
   // affected.
   bool SomeFirstChildAffected = false;
 
+  assert(Line->First);
   for (FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
     // Determine whether 'Tok' was affected.
     if (affectsTokenRange(*Tok, *Tok, IncludeLeadingNewlines))
diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp
index 21f0bdd7323d4..37e79bb15b58b 100644
--- a/clang/lib/Format/SortJavaScriptImports.cpp
+++ b/clang/lib/Format/SortJavaScriptImports.cpp
@@ -361,6 +361,7 @@ class JavaScriptImportSorter : public TokenAnalyzer {
     bool AnyImportAffected = false;
     bool FormattingOff = false;
     for (auto *Line : AnnotatedLines) {
+      assert(Line->First);
       Current = Line->First;
       LineEnd = Line->Last;
       // clang-format comments toggle formatting on/off.
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 3ba81dfed38c2..cc8b48387fc9e 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -2353,9 +2353,10 @@ class ExpressionParser {
 void TokenAnnotator::setCommentLineLevels(
     SmallVectorImpl<AnnotatedLine *> &Lines) {
   const AnnotatedLine *NextNonCommentLine = nullptr;
-  for (AnnotatedLine *AL : llvm::reverse(Lines)) {
+  for (AnnotatedLine *Line : llvm::reverse(Lines)) {
+    assert(Line->First);
     bool CommentLine = true;
-    for (const FormatToken *Tok = AL->First; Tok; Tok = Tok->Next) {
+    for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
       if (!Tok->is(tok::comment)) {
         CommentLine = false;
         break;
@@ -2367,20 +2368,21 @@ void TokenAnnotator::setCommentLineLevels(
     if (NextNonCommentLine && CommentLine &&
         NextNonCommentLine->First->NewlinesBefore <= 1 &&
         NextNonCommentLine->First->OriginalColumn ==
-            AL->First->OriginalColumn) {
+            Line->First->OriginalColumn) {
       // Align comments for preprocessor lines with the # in column 0 if
       // preprocessor lines are not indented. Otherwise, align with the next
       // line.
-      AL->Level = (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
-                   (NextNonCommentLine->Type == LT_PreprocessorDirective ||
-                    NextNonCommentLine->Type == LT_ImportStatement))
-                      ? 0
-                      : NextNonCommentLine->Level;
+      Line->Level =
+          (Style.IndentPPDirectives != FormatStyle::PPDIS_BeforeHash &&
+           (NextNonCommentLine->Type == LT_PreprocessorDirective ||
+            NextNonCommentLine->Type == LT_ImportStatement))
+              ? 0
+              : NextNonCommentLine->Level;
     } else {
-      NextNonCommentLine = AL->First->isNot(tok::r_brace) ? AL : nullptr;
+      NextNonCommentLine = Line->First->isNot(tok::r_brace) ? Line : nullptr;
     }
 
-    setCommentLineLevels(AL->Children);
+    setCommentLineLevels(Line->Children);
   }
 }
 
diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp
index 08d1eeb18a9d0..293a693fd4818 100644
--- a/clang/lib/Format/UnwrappedLineFormatter.cpp
+++ b/clang/lib/Format/UnwrappedLineFormatter.cpp
@@ -1213,6 +1213,7 @@ unsigned UnwrappedLineFormatter::format(
            Joiner.getNextMergedLine(DryRun, IndentTracker);
        Line; PrevPrevLine = PreviousLine, PreviousLine = Line, Line = NextLine,
                            FirstLine = false) {
+    assert(Line->First);
     const AnnotatedLine &TheLine = *Line;
     unsigned Indent = IndentTracker.getIndent();
 
@@ -1240,7 +1241,7 @@ unsigned UnwrappedLineFormatter::format(
 
     if (ShouldFormat && TheLine.Type != LT_Invalid) {
       if (!DryRun) {
-        bool LastLine = Line->First->is(tok::eof);
+        bool LastLine = TheLine.First->is(tok::eof);
         formatFirstToken(TheLine, PreviousLine, PrevPrevLine, Lines, Indent,
                          LastLine ? LastStartColumn : NextStartColumn + Indent);
       }

From f533011252578f67a9615fb4ef56dc1ef555551b Mon Sep 17 00:00:00 2001
From: Jim Lin <jim@andestech.com>
Date: Mon, 24 Jan 2022 13:31:23 +0800
Subject: [PATCH 335/946] [Hexagon] Use llvm::Register instead of unsigned in
 HexagonConstExtenders.cpp. NFC.

Reviewed By: kparzysz

Differential Revision: https://reviews.llvm.org/D117851
---
 llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
index d3fcdb6ae9a85..d8af35cbf3a89 100644
--- a/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonConstExtenders.cpp
@@ -229,7 +229,7 @@ namespace {
   private:
     struct Register {
       Register() = default;
-      Register(unsigned R, unsigned S) : Reg(R), Sub(S) {}
+      Register(llvm::Register R, unsigned S) : Reg(R), Sub(S) {}
       Register(const MachineOperand &Op)
         : Reg(Op.getReg()), Sub(Op.getSubReg()) {}
       Register &operator=(const MachineOperand &Op) {
@@ -1573,7 +1573,7 @@ HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) {
         // No compounds are available. It is not clear whether we should
         // even process such extenders where the initializer cannot be
         // a single instruction, but do it for now.
-        unsigned TmpR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
+        llvm::Register TmpR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
         BuildMI(MBB, At, dl, HII->get(Hexagon::S2_asl_i_r), TmpR)
           .add(MachineOperand(Ex.Rs))
           .addImm(Ex.S);

From d6f8f56da04b2e975110c4a5ae15f00a22164e62 Mon Sep 17 00:00:00 2001
From: Lorenzo Chelini <l.chelini@icloud.com>
Date: Mon, 24 Jan 2022 09:07:20 +0100
Subject: [PATCH 336/946] [MLIR][Presburger] Silence -Wdangling-else warning
 (NFC)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gcc suggests explicit braces to avoid ambiguous ‘else’.
---
 mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp
index 5d1cfb4c6e781..cf68c92650548 100644
--- a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp
+++ b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp
@@ -636,8 +636,9 @@ static void checkDivisionRepresentation(
   // denominator for a division is zero, we ignore its dividend.
   EXPECT_TRUE(dividends.size() == expectedDividends.size());
   for (unsigned i = 0, e = dividends.size(); i < e; ++i)
-    if (denominators[i] != 0)
+    if (denominators[i] != 0) {
       EXPECT_TRUE(expectedDividends[i] == dividends[i]);
+    }
 }
 
 TEST(IntegerPolyhedronTest, computeLocalReprSimple) {

From 81793bd276afefea0e525307676181478fc614c9 Mon Sep 17 00:00:00 2001
From: Marek Kurdej <marek.kurdej+llvm.org@gmail.com>
Date: Mon, 24 Jan 2022 09:28:11 +0100
Subject: [PATCH 337/946] [clang-format] Assert Line->First and
 State.NextToken->Previous. NFC.

Cf. scan-build reports:
* https://llvm.org/reports/scan-build/report-FormatToken.cpp-precomputeFormattingInfos-35-93e1e1.html#EndPath
* https://llvm.org/reports/scan-build/report-ContinuationIndenter.cpp-addTokenOnCurrentLine-15-dfdc6d.html#EndPath
---
 clang/lib/Format/ContinuationIndenter.cpp | 9 +++++++--
 clang/lib/Format/FormatToken.cpp          | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp
index 28f13c06e3088..b66584652bc82 100644
--- a/clang/lib/Format/ContinuationIndenter.cpp
+++ b/clang/lib/Format/ContinuationIndenter.cpp
@@ -543,13 +543,15 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
                                                bool DryRun,
                                                unsigned ExtraSpaces) {
   const FormatToken &Current = *State.NextToken;
+  assert(State.NextToken->Previous);
+  const FormatToken &Previous = *State.NextToken->Previous;
 
   assert(!State.Stack.empty());
   State.NoContinuation = false;
 
   if ((Current.is(TT_ImplicitStringLiteral) &&
-       (Current.Previous->Tok.getIdentifierInfo() == nullptr ||
-        Current.Previous->Tok.getIdentifierInfo()->getPPKeywordID() ==
+       (Previous.Tok.getIdentifierInfo() == nullptr ||
+        Previous.Tok.getIdentifierInfo()->getPPKeywordID() ==
             tok::pp_not_keyword))) {
     unsigned EndColumn =
         SourceMgr.getSpellingColumnNumber(Current.WhitespaceRange.getEnd());
@@ -579,7 +581,9 @@ unsigned ContinuationIndenter::addTokenToState(LineState &State, bool Newline,
 void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
                                                  unsigned ExtraSpaces) {
   FormatToken &Current = *State.NextToken;
+  assert(State.NextToken->Previous);
   const FormatToken &Previous = *State.NextToken->Previous;
+
   if (Current.is(tok::equal) &&
       (State.Line->First->is(tok::kw_for) || Current.NestingLevel == 0) &&
       State.Stack.back().VariablePos == 0) {
@@ -775,6 +779,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun,
 unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State,
                                                  bool DryRun) {
   FormatToken &Current = *State.NextToken;
+  assert(State.NextToken->Previous);
   const FormatToken &Previous = *State.NextToken->Previous;
 
   // Extra penalty that needs to be added because of the way certain line
diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp
index def5663d04498..59d6f29bb54d2 100644
--- a/clang/lib/Format/FormatToken.cpp
+++ b/clang/lib/Format/FormatToken.cpp
@@ -189,6 +189,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) {
 
   bool HasSeparatingComment = false;
   for (unsigned i = 0, e = Commas.size() + 1; i != e; ++i) {
+    assert(ItemBegin);
     // Skip comments on their own line.
     while (ItemBegin->HasUnescapedNewline && ItemBegin->isTrailingComment()) {
       ItemBegin = ItemBegin->Next;

From 9aaa74aeeff37b85e29369287ee94773e699b8b7 Mon Sep 17 00:00:00 2001
From: "Chenbing.Zheng" <Chenbing.Zheng@streamcomputing.com>
Date: Mon, 24 Jan 2022 08:48:39 +0000
Subject: [PATCH 338/946] [RISCV] Add patterns of SET[U]LT_VI for STECC forms

This patch optmizes "li a0, 5
                     vmsgt[u].vx v10, v8, a0"
                 -> "vmsgt[u].vi v10, v8, 5"

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D118014
---
 .../Target/RISCV/RISCVInstrInfoVVLPatterns.td |  2 ++
 .../RISCV/rvv/fixed-vectors-int-setcc.ll      | 22 ++++++++++++++++---
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 0ac959d79a024..7b556174bbf0c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -820,6 +820,8 @@ foreach vti = AllIntegerVectors in {
   defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSNE",  SETNE,  SETNE>;
   defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSLE",  SETLE,  SETGE>;
   defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSLEU", SETULE, SETUGE>;
+  defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSGT",  SETGT,  SETLT>;
+  defm : VPatIntegerSetCCVL_VI_Swappable<vti, "PseudoVMSGTU", SETUGT, SETULT>;
 
   defm : VPatIntegerSetCCVL_VIPlus1<vti, "PseudoVMSLE",  SETLT,
                                     SplatPat_simm5_plus1>;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll
index 9d0fb925a46e2..403fc6d7f43ed 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-setcc.ll
@@ -553,7 +553,7 @@ define void @setgt_vi_v64i8(<64 x i8>* %x, <64 x i1>* %z) {
 ; CHECK-NEXT:    li a2, 64
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, mu
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    vmsgt.vx v12, v8, zero
+; CHECK-NEXT:    vmsgt.vi v12, v8, 0
 ; CHECK-NEXT:    vsm.v v12, (a1)
 ; CHECK-NEXT:    ret
   %a = load <64 x i8>, <64 x i8>* %x
@@ -564,6 +564,23 @@ define void @setgt_vi_v64i8(<64 x i8>* %x, <64 x i1>* %z) {
   ret void
 }
 
+define void @setgt_vi_v64i8_nonzero(<64 x i8>* %x, <64 x i1>* %z) {
+; CHECK-LABEL: setgt_vi_v64i8_nonzero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a2, 64
+; CHECK-NEXT:    vsetvli zero, a2, e8, m4, ta, mu
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vmsgt.vi v12, v8, 5
+; CHECK-NEXT:    vsm.v v12, (a1)
+; CHECK-NEXT:    ret
+  %a = load <64 x i8>, <64 x i8>* %x
+  %b = insertelement <64 x i8> undef, i8 5, i32 0
+  %c = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer
+  %d = icmp sgt <64 x i8> %a, %c
+  store <64 x i1> %d, <64 x i1>* %z
+  ret void
+}
+
 define void @setlt_vi_v128i8(<128 x i8>* %x, <128 x i1>* %z) {
 ; CHECK-LABEL: setlt_vi_v128i8:
 ; CHECK:       # %bb.0:
@@ -619,8 +636,7 @@ define void @setugt_vi_v32i8(<32 x i8>* %x, <32 x i1>* %z) {
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    vsetvli zero, a2, e8, m2, ta, mu
 ; CHECK-NEXT:    vle8.v v8, (a0)
-; CHECK-NEXT:    li a0, 5
-; CHECK-NEXT:    vmsgtu.vx v10, v8, a0
+; CHECK-NEXT:    vmsgtu.vi v10, v8, 5
 ; CHECK-NEXT:    vsm.v v10, (a1)
 ; CHECK-NEXT:    ret
   %a = load <32 x i8>, <32 x i8>* %x

From ba845787b3fdd03380b8651d6ce11afeac9d6bba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder@redhat.com>
Date: Tue, 4 Jan 2022 13:38:10 +0100
Subject: [PATCH 339/946] [clang][sema] Add missing diagnostic parameter

The test case otherwise fails an assertion in Diagnostic::getArgKind().

Differential Revision: https://reviews.llvm.org/D116595
---
 clang/lib/Sema/SemaModule.cpp              | 2 +-
 clang/test/Modules/cxx20-export-import.cpp | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Modules/cxx20-export-import.cpp

diff --git a/clang/lib/Sema/SemaModule.cpp b/clang/lib/Sema/SemaModule.cpp
index 996063f83e946..747734f2d0ff0 100644
--- a/clang/lib/Sema/SemaModule.cpp
+++ b/clang/lib/Sema/SemaModule.cpp
@@ -395,7 +395,7 @@ DeclResult Sema::ActOnModuleImport(SourceLocation StartLoc,
     // [module.interface]p1:
     // An export-declaration shall inhabit a namespace scope and appear in the
     // purview of a module interface unit.
-    Diag(ExportLoc, diag::err_export_not_in_module_interface);
+    Diag(ExportLoc, diag::err_export_not_in_module_interface) << 0;
   }
 
   return Import;
diff --git a/clang/test/Modules/cxx20-export-import.cpp b/clang/test/Modules/cxx20-export-import.cpp
new file mode 100644
index 0000000000000..a2620bd600649
--- /dev/null
+++ b/clang/test/Modules/cxx20-export-import.cpp
@@ -0,0 +1,3 @@
+
+// RUN: %clang_cc1 -std=c++20 -fmodules -fmodules-cache-path=%t -fimplicit-module-maps -I%S/Inputs -verify %s
+export import dummy; // expected-error {{export declaration can only be used within a module interface unit after the module declaration}}

From 3ad6de31c0cf5064867e6f9bf99e27e0b5c4128d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder@redhat.com>
Date: Fri, 21 Jan 2022 09:48:43 +0100
Subject: [PATCH 340/946] [clang][tests] Fix a c++/libc++ -stdlib value typo

"c++" is not usually a valid value for -stdlib.

Differential Revision: https://reviews.llvm.org/D117862
---
 clang/test/Driver/wasm-toolchain.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/clang/test/Driver/wasm-toolchain.cpp b/clang/test/Driver/wasm-toolchain.cpp
index 18ebddc2093bf..df11324f2024b 100644
--- a/clang/test/Driver/wasm-toolchain.cpp
+++ b/clang/test/Driver/wasm-toolchain.cpp
@@ -14,35 +14,35 @@
 
 // A basic C++ link command-line with unknown OS.
 
-// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo --stdlib=c++ %s 2>&1 \
+// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo --stdlib=libc++ %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK %s
 // LINK: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // A basic C++ link command-line with optimization with unknown OS.
 
-// RUN: %clangxx -### -O2 -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s --stdlib=c++ 2>&1 \
+// RUN: %clangxx -### -O2 -no-canonical-prefixes -target wasm32-unknown-unknown --sysroot=/foo %s --stdlib=libc++ 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK_OPT %s
 // LINK_OPT: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_OPT: wasm-ld{{.*}}" "-L/foo/lib" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // A basic C++ link command-line with known OS.
 
-// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo --stdlib=c++ %s 2>&1 \
+// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo --stdlib=libc++ %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK_KNOWN %s
 // LINK_KNOWN: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // A basic C++ link command-line with optimization with known OS.
 
-// RUN: %clangxx -### -O2 -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo %s --stdlib=c++ 2>&1 \
+// RUN: %clangxx -### -O2 -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo %s --stdlib=libc++ 2>&1 \
 // RUN:   | FileCheck -check-prefix=LINK_OPT_KNOWN %s
 // LINK_OPT_KNOWN: clang{{.*}}" "-cc1" {{.*}} "-o" "[[temp:[^"]*]]"
 // LINK_OPT_KNOWN: wasm-ld{{.*}}" "-L/foo/lib/wasm32-wasi" "crt1.o" "[[temp]]" "-lc++" "-lc++abi" "-lc" "{{.*[/\\]}}libclang_rt.builtins-wasm32.a" "-o" "a.out"
 
 // A basic C++ compile command-line with known OS.
 
-// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo --stdlib=c++ %s 2>&1 \
+// RUN: %clangxx -### -no-canonical-prefixes -target wasm32-wasi --sysroot=/foo --stdlib=libc++ %s 2>&1 \
 // RUN:   | FileCheck -check-prefix=COMPILE %s
 // COMPILE: clang{{.*}}" "-cc1"
 // COMPILE: "-resource-dir" "[[RESOURCE_DIR:[^"]*]]"

From d29e319263de17516f50cd46edbf1e62c1289dd4 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 21 Jan 2022 12:48:31 +0100
Subject: [PATCH 341/946] [OpaquePtrs] Add getNonOpaquePointerElementType()
 method (NFC)

This method is intended for use in places that cannot be reached
with opaque pointers, or part of deprecated methods. This makes
it easier to see that some uses of getPointerElementType() don't
need further action.

Differential Revision: https://reviews.llvm.org/D117870
---
 llvm/include/llvm/IR/Type.h                   |  9 +++++++
 llvm/lib/AsmParser/LLParser.cpp               | 12 ++++-----
 llvm/lib/IR/Core.cpp                          | 25 ++++++++-----------
 llvm/lib/Transforms/Coroutines/Coroutines.cpp |  4 +--
 llvm/lib/Transforms/Scalar/SROA.cpp           |  6 +++--
 5 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index c899c46d40554..98c97375ad7b2 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -366,7 +366,16 @@ class Type {
     return ContainedTys[0];
   }
 
+  /// This method is deprecated without replacement. Pointer element types are
+  /// not available with opaque pointers.
   Type *getPointerElementType() const {
+    return getNonOpaquePointerElementType();
+  }
+
+  /// Only use this method in code that is not reachable with opaque pointers,
+  /// or part of deprecated methods that will be removed as part of the opaque
+  /// pointers transition.
+  Type *getNonOpaquePointerElementType() const {
     assert(getTypeID() == PointerTyID);
     assert(NumContainedTys &&
            "Attempting to get element type of opaque pointer");
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 18c1c31e101dc..cec4ffd82f818 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1410,14 +1410,14 @@ static inline GlobalValue *createGlobalFwdRef(Module *M, PointerType *PTy) {
                               nullptr, GlobalVariable::NotThreadLocal,
                               PTy->getAddressSpace());
 
-  if (auto *FT = dyn_cast<FunctionType>(PTy->getPointerElementType()))
+  Type *ElemTy = PTy->getNonOpaquePointerElementType();
+  if (auto *FT = dyn_cast<FunctionType>(ElemTy))
     return Function::Create(FT, GlobalValue::ExternalWeakLinkage,
                             PTy->getAddressSpace(), "", M);
   else
-    return new GlobalVariable(*M, PTy->getPointerElementType(), false,
-                              GlobalValue::ExternalWeakLinkage, nullptr, "",
-                              nullptr, GlobalVariable::NotThreadLocal,
-                              PTy->getAddressSpace());
+    return new GlobalVariable(
+        *M, ElemTy, false, GlobalValue::ExternalWeakLinkage, nullptr, "",
+        nullptr, GlobalVariable::NotThreadLocal, PTy->getAddressSpace());
 }
 
 Value *LLParser::checkValidVariableType(LocTy Loc, const Twine &Name, Type *Ty,
@@ -5602,7 +5602,7 @@ bool LLParser::parseFunctionHeader(Function *&Fn, bool IsDefine) {
     if (FRVI != ForwardRefVals.end()) {
       FwdFn = FRVI->second.first;
       if (!FwdFn->getType()->isOpaque()) {
-        if (!FwdFn->getType()->getPointerElementType()->isFunctionTy())
+        if (!FwdFn->getType()->getNonOpaquePointerElementType()->isFunctionTy())
           return error(FRVI->second.second, "invalid forward reference to "
                                             "function as global value!");
         if (FwdFn->getType() != PFT)
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 9415e35a8f512..3f899471843fc 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -1691,8 +1691,7 @@ LLVMValueRef LLVMConstGEP(LLVMValueRef ConstantVal,
   ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
                                NumIndices);
   Constant *Val = unwrap<Constant>(ConstantVal);
-  Type *Ty =
-      cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+  Type *Ty = Val->getType()->getScalarType()->getNonOpaquePointerElementType();
   return wrap(ConstantExpr::getGetElementPtr(Ty, Val, IdxList));
 }
 
@@ -1710,8 +1709,7 @@ LLVMValueRef LLVMConstInBoundsGEP(LLVMValueRef ConstantVal,
   ArrayRef<Constant *> IdxList(unwrap<Constant>(ConstantIndices, NumIndices),
                                NumIndices);
   Constant *Val = unwrap<Constant>(ConstantVal);
-  Type *Ty =
-      cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+  Type *Ty = Val->getType()->getScalarType()->getNonOpaquePointerElementType();
   return wrap(ConstantExpr::getInBoundsGetElementPtr(Ty, Val, IdxList));
 }
 
@@ -2278,7 +2276,8 @@ void LLVMSetExternallyInitialized(LLVMValueRef GlobalVar, LLVMBool IsExtInit) {
 LLVMValueRef LLVMAddAlias(LLVMModuleRef M, LLVMTypeRef Ty, LLVMValueRef Aliasee,
                           const char *Name) {
   auto *PTy = cast<PointerType>(unwrap(Ty));
-  return wrap(GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(),
+  return wrap(GlobalAlias::create(PTy->getNonOpaquePointerElementType(),
+                                  PTy->getAddressSpace(),
                                   GlobalValue::ExternalLinkage, Name,
                                   unwrap<Constant>(Aliasee), unwrap(M)));
 }
@@ -3218,7 +3217,7 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
                              const char *Name) {
   Value *V = unwrap(Fn);
   FunctionType *FnT =
-      cast<FunctionType>(cast<PointerType>(V->getType())->getElementType());
+      cast<FunctionType>(V->getType()->getNonOpaquePointerElementType());
 
   return wrap(
       unwrap(B)->CreateInvoke(FnT, unwrap(Fn), unwrap(Then), unwrap(Catch),
@@ -3590,7 +3589,8 @@ LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal,
   Value *V = unwrap(PointerVal);
   PointerType *Ty = cast<PointerType>(V->getType());
 
-  return wrap(unwrap(B)->CreateLoad(Ty->getElementType(), V, Name));
+  return wrap(
+      unwrap(B)->CreateLoad(Ty->getNonOpaquePointerElementType(), V, Name));
 }
 
 LLVMValueRef LLVMBuildLoad2(LLVMBuilderRef B, LLVMTypeRef Ty,
@@ -3692,8 +3692,7 @@ LLVMValueRef LLVMBuildGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                           const char *Name) {
   ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
   Value *Val = unwrap(Pointer);
-  Type *Ty =
-      cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+  Type *Ty = Val->getType()->getScalarType()->getNonOpaquePointerElementType();
   return wrap(unwrap(B)->CreateGEP(Ty, Val, IdxList, Name));
 }
 
@@ -3709,8 +3708,7 @@ LLVMValueRef LLVMBuildInBoundsGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                                   const char *Name) {
   ArrayRef<Value *> IdxList(unwrap(Indices), NumIndices);
   Value *Val = unwrap(Pointer);
-  Type *Ty =
-      cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+  Type *Ty = Val->getType()->getScalarType()->getNonOpaquePointerElementType();
   return wrap(unwrap(B)->CreateInBoundsGEP(Ty, Val, IdxList, Name));
 }
 
@@ -3725,8 +3723,7 @@ LLVMValueRef LLVMBuildInBoundsGEP2(LLVMBuilderRef B, LLVMTypeRef Ty,
 LLVMValueRef LLVMBuildStructGEP(LLVMBuilderRef B, LLVMValueRef Pointer,
                                 unsigned Idx, const char *Name) {
   Value *Val = unwrap(Pointer);
-  Type *Ty =
-      cast<PointerType>(Val->getType()->getScalarType())->getElementType();
+  Type *Ty = Val->getType()->getScalarType()->getNonOpaquePointerElementType();
   return wrap(unwrap(B)->CreateStructGEP(Ty, Val, Idx, Name));
 }
 
@@ -3947,7 +3944,7 @@ LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
                            const char *Name) {
   Value *V = unwrap(Fn);
   FunctionType *FnT =
-      cast<FunctionType>(cast<PointerType>(V->getType())->getElementType());
+      cast<FunctionType>(V->getType()->getNonOpaquePointerElementType());
 
   return wrap(unwrap(B)->CreateCall(FnT, unwrap(Fn),
                                     makeArrayRef(unwrap(Args), NumArgs), Name));
diff --git a/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
index a8123aee319ef..965a146c143fa 100644
--- a/llvm/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -679,8 +679,8 @@ static void checkAsyncFuncPointer(const Instruction *I, Value *V) {
   if (AsyncFuncPtrAddr->getType()->isOpaquePointerTy())
     return;
 
-  auto *StructTy =
-      cast<StructType>(AsyncFuncPtrAddr->getType()->getPointerElementType());
+  auto *StructTy = cast<StructType>(
+      AsyncFuncPtrAddr->getType()->getNonOpaquePointerElementType());
   if (StructTy->isOpaque() || !StructTy->isPacked() ||
       StructTy->getNumElements() != 2 ||
       !StructTy->getElementType(0)->isIntegerTy(32) ||
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 1b0290cf5709d..2ed87ce6295b3 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1437,8 +1437,10 @@ static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
   if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
     return BasePtr;
 
-  return IRB.CreateInBoundsGEP(BasePtr->getType()->getPointerElementType(),
-                               BasePtr, Indices, NamePrefix + "sroa_idx");
+  // buildGEP() is only called for non-opaque pointers.
+  return IRB.CreateInBoundsGEP(
+      BasePtr->getType()->getNonOpaquePointerElementType(), BasePtr, Indices,
+      NamePrefix + "sroa_idx");
 }
 
 /// Get a natural GEP off of the BasePtr walking through Ty toward

From 67346b43e0ed28047b3a4e40ea18d2218febcbf8 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 21 Jan 2022 12:01:18 +0100
Subject: [PATCH 342/946] [Attributor] Use MemoryLocation to get pointer
 operand and accessed type (NFCI)

This relies on existing APIs and avoids accessing the pointer
element type. The alternative would be to extend getPointerOperand()
to also return the accessed type, but I figured going through
MemoryLocation would be cleaner.

Differential Revision: https://reviews.llvm.org/D117868
---
 .../Transforms/IPO/AttributorAttributes.cpp   | 75 +++++++------------
 1 file changed, 27 insertions(+), 48 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index d0e13dc269385..76420783b2d15 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -417,12 +417,10 @@ const Value *stripAndAccumulateMinimalOffsets(
                                                 AttributorAnalysis);
 }
 
-static const Value *getMinimalBaseOfAccessPointerOperand(
-    Attributor &A, const AbstractAttribute &QueryingAA, const Instruction *I,
-    int64_t &BytesOffset, const DataLayout &DL, bool AllowNonInbounds = false) {
-  const Value *Ptr = getPointerOperand(I, /* AllowVolatile */ false);
-  if (!Ptr)
-    return nullptr;
+static const Value *
+getMinimalBaseOfPointer(Attributor &A, const AbstractAttribute &QueryingAA,
+                        const Value *Ptr, int64_t &BytesOffset,
+                        const DataLayout &DL, bool AllowNonInbounds = false) {
   APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
   const Value *Base = stripAndAccumulateMinimalOffsets(
       A, QueryingAA, Ptr, DL, OffsetAPInt, AllowNonInbounds);
@@ -431,18 +429,6 @@ static const Value *getMinimalBaseOfAccessPointerOperand(
   return Base;
 }
 
-static const Value *
-getBasePointerOfAccessPointerOperand(const Instruction *I, int64_t &BytesOffset,
-                                     const DataLayout &DL,
-                                     bool AllowNonInbounds = false) {
-  const Value *Ptr = getPointerOperand(I, /* AllowVolatile */ false);
-  if (!Ptr)
-    return nullptr;
-
-  return GetPointerBaseWithConstantOffset(Ptr, BytesOffset, DL,
-                                          AllowNonInbounds);
-}
-
 /// Clamp the information known for all returned values of a function
 /// (identified by \p QueryingAA) into \p S.
 template <typename AAType, typename StateType = typename AAType::StateType>
@@ -2151,31 +2137,26 @@ static int64_t getKnownNonNullAndDerefBytesForUse(
     return DerefAA.getKnownDereferenceableBytes();
   }
 
+  Optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
+  if (!Loc || Loc->Ptr != UseV || !Loc->Size.isPrecise() || I->isVolatile())
+    return 0;
+
   int64_t Offset;
   const Value *Base =
-      getMinimalBaseOfAccessPointerOperand(A, QueryingAA, I, Offset, DL);
-  if (Base) {
-    if (Base == &AssociatedValue &&
-        getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
-      int64_t DerefBytes =
-          (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType()) + Offset;
-
-      IsNonNull |= !NullPointerIsDefined;
-      return std::max(int64_t(0), DerefBytes);
-    }
+      getMinimalBaseOfPointer(A, QueryingAA, Loc->Ptr, Offset, DL);
+  if (Base && Base == &AssociatedValue) {
+    int64_t DerefBytes = Loc->Size.getValue() + Offset;
+    IsNonNull |= !NullPointerIsDefined;
+    return std::max(int64_t(0), DerefBytes);
   }
 
   /// Corner case when an offset is 0.
-  Base = getBasePointerOfAccessPointerOperand(I, Offset, DL,
-                                              /*AllowNonInbounds*/ true);
-  if (Base) {
-    if (Offset == 0 && Base == &AssociatedValue &&
-        getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
-      int64_t DerefBytes =
-          (int64_t)DL.getTypeStoreSize(PtrTy->getPointerElementType());
-      IsNonNull |= !NullPointerIsDefined;
-      return std::max(int64_t(0), DerefBytes);
-    }
+  Base = GetPointerBaseWithConstantOffset(Loc->Ptr, Offset, DL,
+                                          /*AllowNonInbounds*/ true);
+  if (Base && Base == &AssociatedValue && Offset == 0) {
+    int64_t DerefBytes = Loc->Size.getValue();
+    IsNonNull |= !NullPointerIsDefined;
+    return std::max(int64_t(0), DerefBytes);
   }
 
   return 0;
@@ -4083,17 +4064,15 @@ struct AADereferenceableImpl : AADereferenceable {
     if (!UseV->getType()->isPointerTy())
       return;
 
-    Type *PtrTy = UseV->getType();
-    const DataLayout &DL = A.getDataLayout();
+    Optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I);
+    if (!Loc || Loc->Ptr != UseV || !Loc->Size.isPrecise() || I->isVolatile())
+      return;
+
     int64_t Offset;
-    if (const Value *Base = getBasePointerOfAccessPointerOperand(
-            I, Offset, DL, /*AllowNonInbounds*/ true)) {
-      if (Base == &getAssociatedValue() &&
-          getPointerOperand(I, /* AllowVolatile */ false) == UseV) {
-        uint64_t Size = DL.getTypeStoreSize(PtrTy->getPointerElementType());
-        State.addAccessedBytes(Offset, Size);
-      }
-    }
+    const Value *Base = GetPointerBaseWithConstantOffset(
+        Loc->Ptr, Offset, A.getDataLayout(), /*AllowNonInbounds*/ true);
+    if (Base && Base == &getAssociatedValue())
+      State.addAccessedBytes(Offset, Loc->Size.getValue());
   }
 
   /// See followUsesInMBEC

From 7ccacaf4428d1712029594184baa6f617a51c340 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski@arm.com>
Date: Tue, 11 Jan 2022 13:16:03 +0000
Subject: [PATCH 343/946] [flang][examples] Add missing CMake dependencies

Currently, everything that includes "flang/Parser/parse-tree.h" in Flang
depends on the `gen_acc` and `gen_omp` CMake targets (these targets
generate include files that are used in "parse-tree.h"). The examples in
Flang do use this header file and hence also depend on
`gen_acc`/`gen_omp`. This patch updates relevant CMake scripts
accordingly.

I've also taken the liberty to rename some of the example files so that
their names follow LLVM's coding guidelines.

Differential Revision: https://reviews.llvm.org/D117016
---
 flang/examples/CMakeLists.txt                            | 2 +-
 flang/examples/FlangOmpReport/CMakeLists.txt             | 9 +++++++++
 .../FlangOmpReport.cpp}                                  | 2 +-
 .../FlangOmpReportVisitor.cpp}                           | 2 +-
 .../FlangOmpReportVisitor.h}                             | 0
 .../requirements.txt                                     | 0
 .../yaml_summarizer.py                                   | 0
 flang/examples/PrintFlangFunctionNames/CMakeLists.txt    | 7 +++++--
 flang/examples/flang-omp-report-plugin/CMakeLists.txt    | 6 ------
 9 files changed, 17 insertions(+), 11 deletions(-)
 create mode 100644 flang/examples/FlangOmpReport/CMakeLists.txt
 rename flang/examples/{flang-omp-report-plugin/flang-omp-report.cpp => FlangOmpReport/FlangOmpReport.cpp} (98%)
 rename flang/examples/{flang-omp-report-plugin/flang-omp-report-visitor.cpp => FlangOmpReport/FlangOmpReportVisitor.cpp} (99%)
 rename flang/examples/{flang-omp-report-plugin/flang-omp-report-visitor.h => FlangOmpReport/FlangOmpReportVisitor.h} (100%)
 rename flang/examples/{flang-omp-report-plugin => FlangOmpReport}/requirements.txt (100%)
 rename flang/examples/{flang-omp-report-plugin => FlangOmpReport}/yaml_summarizer.py (100%)
 delete mode 100644 flang/examples/flang-omp-report-plugin/CMakeLists.txt

diff --git a/flang/examples/CMakeLists.txt b/flang/examples/CMakeLists.txt
index f9b656ce86fe1..b0a78eeaa4eaa 100644
--- a/flang/examples/CMakeLists.txt
+++ b/flang/examples/CMakeLists.txt
@@ -12,4 +12,4 @@ target_link_libraries(external-hello-world
 )
 
 add_subdirectory(PrintFlangFunctionNames)
-add_subdirectory(flang-omp-report-plugin)
+add_subdirectory(FlangOmpReport)
diff --git a/flang/examples/FlangOmpReport/CMakeLists.txt b/flang/examples/FlangOmpReport/CMakeLists.txt
new file mode 100644
index 0000000000000..aebebe92d6b30
--- /dev/null
+++ b/flang/examples/FlangOmpReport/CMakeLists.txt
@@ -0,0 +1,9 @@
+add_llvm_library(flangOmpReport
+  MODULE
+  FlangOmpReport.cpp
+  FlangOmpReportVisitor.cpp
+
+  DEPENDS
+  acc_gen
+  omp_gen
+)
diff --git a/flang/examples/flang-omp-report-plugin/flang-omp-report.cpp b/flang/examples/FlangOmpReport/FlangOmpReport.cpp
similarity index 98%
rename from flang/examples/flang-omp-report-plugin/flang-omp-report.cpp
rename to flang/examples/FlangOmpReport/FlangOmpReport.cpp
index 9ee8eb1a80cbd..0fa7582561d27 100644
--- a/flang/examples/flang-omp-report-plugin/flang-omp-report.cpp
+++ b/flang/examples/FlangOmpReport/FlangOmpReport.cpp
@@ -15,7 +15,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "flang-omp-report-visitor.h"
+#include "FlangOmpReportVisitor.h"
 
 #include "flang/Frontend/CompilerInstance.h"
 #include "flang/Frontend/FrontendActions.h"
diff --git a/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.cpp b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
similarity index 99%
rename from flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.cpp
rename to flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
index 32dcef25fedab..a0c3b194bfd31 100644
--- a/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.cpp
+++ b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "flang-omp-report-visitor.h"
+#include "FlangOmpReportVisitor.h"
 #include "llvm/ADT/StringExtras.h"
 
 namespace Fortran {
diff --git a/flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.h b/flang/examples/FlangOmpReport/FlangOmpReportVisitor.h
similarity index 100%
rename from flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.h
rename to flang/examples/FlangOmpReport/FlangOmpReportVisitor.h
diff --git a/flang/examples/flang-omp-report-plugin/requirements.txt b/flang/examples/FlangOmpReport/requirements.txt
similarity index 100%
rename from flang/examples/flang-omp-report-plugin/requirements.txt
rename to flang/examples/FlangOmpReport/requirements.txt
diff --git a/flang/examples/flang-omp-report-plugin/yaml_summarizer.py b/flang/examples/FlangOmpReport/yaml_summarizer.py
similarity index 100%
rename from flang/examples/flang-omp-report-plugin/yaml_summarizer.py
rename to flang/examples/FlangOmpReport/yaml_summarizer.py
diff --git a/flang/examples/PrintFlangFunctionNames/CMakeLists.txt b/flang/examples/PrintFlangFunctionNames/CMakeLists.txt
index 6b107b4e1ea53..490f2ea895163 100644
--- a/flang/examples/PrintFlangFunctionNames/CMakeLists.txt
+++ b/flang/examples/PrintFlangFunctionNames/CMakeLists.txt
@@ -1,7 +1,10 @@
 # TODO: Note that this is currently only available on Linux.
 # On Windows, we would also have to specify e.g. `PLUGIN_TOOL`.
-add_llvm_library(
-    flangPrintFunctionNames
+add_llvm_library(flangPrintFunctionNames
     MODULE
     PrintFlangFunctionNames.cpp
+
+    DEPENDS
+    acc_gen
+    omp_gen
 )
diff --git a/flang/examples/flang-omp-report-plugin/CMakeLists.txt b/flang/examples/flang-omp-report-plugin/CMakeLists.txt
deleted file mode 100644
index 993ba320d7570..0000000000000
--- a/flang/examples/flang-omp-report-plugin/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-add_llvm_library(
-  flangOmpReport
-  MODULE
-  flang-omp-report.cpp
-  flang-omp-report-visitor.cpp
-)

From 4f8fdf78279f0cb298dc0dc215ee56b0342235ee Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Wed, 19 Jan 2022 14:40:22 +0000
Subject: [PATCH 344/946] [ISEL] Canonicalise constant splats to RHS.

SelectionDAG::getNode() canonicalises constants to the RHS if the
operation is commutative, but it doesn't do so for constant splat
vectors. Doing this early helps making certain folds on vector types,
simplifying the code required for target DAGCombines that are enabled
before Type legalization.

Somewhat to my surprise, DAGCombine doesn't seem to traverse the
DAG in a post-order DFS, so at the time of doing some custom fold where
the input is a MUL, DAGCombiner::visitMUL hasn't yet reordered the
constant splat to the RHS.

This patch leads to a few improvements, but also a few  minor regressions,
which I traced down to D46492. When I tried reverting this change to see
if the changes were still necessary, I ran into some segfaults. Not sure
if there is some latent bug there.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117794
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 19 ++++----
 ...-masked-merge-vector-variablemask-const.ll |  8 ++--
 llvm/test/CodeGen/PowerPC/combine-fneg.ll     |  8 ++--
 .../CodeGen/PowerPC/repeated-fp-divisors.ll   |  4 +-
 llvm/test/CodeGen/X86/dpbusd_const.ll         | 17 +++----
 llvm/test/CodeGen/X86/extractelement-fp.ll    |  8 ++--
 llvm/test/CodeGen/X86/fp-round.ll             | 14 +++---
 llvm/test/CodeGen/X86/fp128-cast.ll           |  2 +-
 ...ist-and-by-const-from-shl-in-eqcmp-zero.ll | 16 +++----
 llvm/test/CodeGen/X86/pr43509.ll              |  8 ++--
 ...-masked-merge-vector-variablemask-const.ll | 47 ++++++++++---------
 11 files changed, 75 insertions(+), 76 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 52c69feebaf7e..7ca6f9aa4cf0a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5610,22 +5610,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
   assert(N1.getOpcode() != ISD::DELETED_NODE &&
          N2.getOpcode() != ISD::DELETED_NODE &&
          "Operand is DELETED_NODE!");
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
-  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
-  ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
-
   // Canonicalize constant to RHS if commutative.
   if (TLI->isCommutativeBinOp(Opcode)) {
-    if (N1C && !N2C) {
-      std::swap(N1C, N2C);
-      std::swap(N1, N2);
-    } else if (N1CFP && !N2CFP) {
-      std::swap(N1CFP, N2CFP);
+    bool IsN1C = isConstantIntBuildVectorOrConstantInt(N1);
+    bool IsN2C = isConstantIntBuildVectorOrConstantInt(N2);
+    bool IsN1CFP = isConstantFPBuildVectorOrConstantFP(N1);
+    bool IsN2CFP = isConstantFPBuildVectorOrConstantFP(N2);
+    if ((IsN1C && !IsN2C) || (IsN1CFP && !IsN2CFP))
       std::swap(N1, N2);
-    }
   }
 
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+
   switch (Opcode) {
   default: break;
   case ISD::TokenFactor:
diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll
index 2e385fdd6f25f..aa0b7e14afc56 100644
--- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll
+++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask-const.ll
@@ -126,7 +126,8 @@ define <4 x i32> @out_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %
 define <4 x i32> @in_constant_mone_vary(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
 ; CHECK-LABEL: in_constant_mone_vary:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    orr v0.16b, v1.16b, v2.16b
+; CHECK-NEXT:    bic v0.16b, v2.16b, v1.16b
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ret
   %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
   %n1 = and <4 x i32> %n0, %mask
@@ -152,8 +153,9 @@ define <4 x i32> @out_constant_mone_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4
 define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> %x, <4 x i32> %y, <4 x i32> %mask) {
 ; CHECK-LABEL: in_constant_mone_vary_invmask:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    and v0.16b, v1.16b, v2.16b
-; CHECK-NEXT:    orn v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    mvn v0.16b, v1.16b
+; CHECK-NEXT:    bic v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    eor v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ret
   %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
   %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x
diff --git a/llvm/test/CodeGen/PowerPC/combine-fneg.ll b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
index 1124fbd22a0e5..771c05f184a04 100644
--- a/llvm/test/CodeGen/PowerPC/combine-fneg.ll
+++ b/llvm/test/CodeGen/PowerPC/combine-fneg.ll
@@ -13,10 +13,10 @@ define <4 x double> @fneg_fdiv_splat(double %a0, <4 x double> %a1) {
 ; CHECK-NEXT:    xvredp 2, 0
 ; CHECK-NEXT:    xxswapd 1, 1
 ; CHECK-NEXT:    xxlor 3, 1, 1
-; CHECK-NEXT:    xvmaddadp 3, 0, 2
-; CHECK-NEXT:    xvnmsubadp 2, 2, 3
-; CHECK-NEXT:    xvmaddadp 1, 0, 2
-; CHECK-NEXT:    xvmsubadp 2, 2, 1
+; CHECK-NEXT:    xvnmsubadp 3, 0, 2
+; CHECK-NEXT:    xvmaddadp 2, 2, 3
+; CHECK-NEXT:    xvnmsubadp 1, 0, 2
+; CHECK-NEXT:    xvnmaddadp 2, 2, 1
 ; CHECK-NEXT:    xvmuldp 34, 34, 2
 ; CHECK-NEXT:    xvmuldp 35, 35, 2
 ; CHECK-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
index 2b93974263286..1185737fb0c96 100644
--- a/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
+++ b/llvm/test/CodeGen/PowerPC/repeated-fp-divisors.ll
@@ -36,9 +36,9 @@ define <4 x float> @repeated_fp_divisor(float %a, <4 x float> %b) {
 ; CHECK-NEXT:    lvx 4, 0, 3
 ; CHECK-NEXT:    xxspltw 0, 0, 0
 ; CHECK-NEXT:    xvresp 1, 0
-; CHECK-NEXT:    xvnmsubasp 35, 0, 1
+; CHECK-NEXT:    xvmaddasp 35, 0, 1
 ; CHECK-NEXT:    xvmulsp 0, 34, 36
-; CHECK-NEXT:    xvmaddasp 1, 1, 35
+; CHECK-NEXT:    xvnmsubasp 1, 1, 35
 ; CHECK-NEXT:    xvmulsp 34, 0, 1
 ; CHECK-NEXT:    blr
   %ins = insertelement <4 x float> undef, float %a, i32 0
diff --git a/llvm/test/CodeGen/X86/dpbusd_const.ll b/llvm/test/CodeGen/X86/dpbusd_const.ll
index aa780fe3b94ad..b0ffb23c9ced3 100644
--- a/llvm/test/CodeGen/X86/dpbusd_const.ll
+++ b/llvm/test/CodeGen/X86/dpbusd_const.ll
@@ -68,8 +68,7 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) {
 ; AVXVNNI-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; AVXVNNI-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVXVNNI-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVXVNNI-NEXT:    vmovdqa {{.*#+}} xmm2 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0]
-; AVXVNNI-NEXT:    {vex} vpdpbusd %xmm0, %xmm2, %xmm1
+; AVXVNNI-NEXT:    {vex} vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
 ; AVXVNNI-NEXT:    vmovd %xmm1, %eax
 ; AVXVNNI-NEXT:    addl %edi, %eax
 ; AVXVNNI-NEXT:    retq
@@ -80,10 +79,9 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) {
 ; AVX512VNNI-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; AVX512VNNI-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512VNNI-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
-; AVX512VNNI-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0]
-; AVX512VNNI-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512VNNI-NEXT:    vpdpbusd %zmm0, %zmm1, %zmm2
-; AVX512VNNI-NEXT:    vmovd %xmm2, %eax
+; AVX512VNNI-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512VNNI-NEXT:    vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1
+; AVX512VNNI-NEXT:    vmovd %xmm1, %eax
 ; AVX512VNNI-NEXT:    addl %edi, %eax
 ; AVX512VNNI-NEXT:    vzeroupper
 ; AVX512VNNI-NEXT:    retq
@@ -92,10 +90,9 @@ define i32 @mul_4xi4_cz(<4 x i4> %a, i32 %c) {
 ; AVX512VLVNNI:       # %bb.0: # %entry
 ; AVX512VLVNNI-NEXT:    vpmovdb %xmm0, %xmm0
 ; AVX512VLVNNI-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX512VLVNNI-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,1,2,127,0,0,0,0,0,0,0,0,0,0,0,0]
-; AVX512VLVNNI-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX512VLVNNI-NEXT:    vpdpbusd %xmm0, %xmm1, %xmm2
-; AVX512VLVNNI-NEXT:    vmovd %xmm2, %eax
+; AVX512VLVNNI-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512VLVNNI-NEXT:    vpdpbusd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; AVX512VLVNNI-NEXT:    vmovd %xmm1, %eax
 ; AVX512VLVNNI-NEXT:    addl %edi, %eax
 ; AVX512VLVNNI-NEXT:    retq
 entry:
diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll
index 8ff73d5a7ffa8..c398fb8c74cb8 100644
--- a/llvm/test/CodeGen/X86/extractelement-fp.ll
+++ b/llvm/test/CodeGen/X86/extractelement-fp.ll
@@ -1070,7 +1070,7 @@ define float @round_v4f32(<4 x float> %x) nounwind {
 ; X64-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
 ; X64-NEXT:    vandps %xmm1, %xmm0, %xmm1
 ; X64-NEXT:    vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; X64-NEXT:    vorps %xmm1, %xmm2, %xmm1
+; X64-NEXT:    vorps %xmm2, %xmm1, %xmm1
 ; X64-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; X64-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
 ; X64-NEXT:    retq
@@ -1081,7 +1081,7 @@ define float @round_v4f32(<4 x float> %x) nounwind {
 ; X86-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
 ; X86-NEXT:    vandps %xmm1, %xmm0, %xmm1
 ; X86-NEXT:    vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; X86-NEXT:    vorps %xmm1, %xmm2, %xmm1
+; X86-NEXT:    vorps %xmm2, %xmm1, %xmm1
 ; X86-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; X86-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
 ; X86-NEXT:    vmovss %xmm0, (%esp)
@@ -1099,7 +1099,7 @@ define double @round_v4f64(<4 x double> %x) nounwind {
 ; X64-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
 ; X64-NEXT:    vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1]
 ; X64-NEXT:    # xmm2 = mem[0,0]
-; X64-NEXT:    vorpd %xmm1, %xmm2, %xmm1
+; X64-NEXT:    vorpd %xmm2, %xmm1, %xmm1
 ; X64-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; X64-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
 ; X64-NEXT:    vzeroupper
@@ -1114,7 +1114,7 @@ define double @round_v4f64(<4 x double> %x) nounwind {
 ; X86-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
 ; X86-NEXT:    vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1]
 ; X86-NEXT:    # xmm2 = mem[0,0]
-; X86-NEXT:    vorpd %xmm1, %xmm2, %xmm1
+; X86-NEXT:    vorpd %xmm2, %xmm1, %xmm1
 ; X86-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; X86-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
 ; X86-NEXT:    vmovsd %xmm0, (%esp)
diff --git a/llvm/test/CodeGen/X86/fp-round.ll b/llvm/test/CodeGen/X86/fp-round.ll
index 7798ab682d41e..955501544ff55 100644
--- a/llvm/test/CodeGen/X86/fp-round.ll
+++ b/llvm/test/CodeGen/X86/fp-round.ll
@@ -41,7 +41,7 @@ define half @round_f16(half %h) {
 ; AVX1-NEXT:    callq ___extendhfsf2
 ; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
 ; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX1-NEXT:    vorps %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vorps %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
 ; AVX1-NEXT:    callq ___truncsfhf2
@@ -94,7 +94,7 @@ define float @round_f32(float %x) {
 ; AVX1:       ## %bb.0:
 ; AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
 ; AVX1-NEXT:    vbroadcastss {{.*#+}} xmm2 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX1-NEXT:    vorps %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vorps %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
@@ -130,7 +130,7 @@ define double @round_f64(double %x) {
 ; AVX1-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm2 = [4.9999999999999994E-1,4.9999999999999994E-1]
 ; AVX1-NEXT:    ## xmm2 = mem[0,0]
-; AVX1-NEXT:    vorpd %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vorpd %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
@@ -521,11 +521,11 @@ define <16 x float> @round_v16f32(<16 x float> %x) {
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
 ; AVX1-NEXT:    vandps %ymm2, %ymm0, %ymm3
 ; AVX1-NEXT:    vmovaps {{.*#+}} ymm4 = [4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1,4.9999997E-1]
-; AVX1-NEXT:    vorps %ymm3, %ymm4, %ymm3
+; AVX1-NEXT:    vorps %ymm4, %ymm3, %ymm3
 ; AVX1-NEXT:    vaddps %ymm3, %ymm0, %ymm0
 ; AVX1-NEXT:    vroundps $11, %ymm0, %ymm0
 ; AVX1-NEXT:    vandps %ymm2, %ymm1, %ymm2
-; AVX1-NEXT:    vorps %ymm2, %ymm4, %ymm2
+; AVX1-NEXT:    vorps %ymm4, %ymm2, %ymm2
 ; AVX1-NEXT:    vaddps %ymm2, %ymm1, %ymm1
 ; AVX1-NEXT:    vroundps $11, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
@@ -620,11 +620,11 @@ define <8 x double> @round_v8f64(<8 x double> %x) {
 ; AVX1-NEXT:    vmovapd {{.*#+}} ymm2 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
 ; AVX1-NEXT:    vandpd %ymm2, %ymm0, %ymm3
 ; AVX1-NEXT:    vmovapd {{.*#+}} ymm4 = [4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1,4.9999999999999994E-1]
-; AVX1-NEXT:    vorpd %ymm3, %ymm4, %ymm3
+; AVX1-NEXT:    vorpd %ymm4, %ymm3, %ymm3
 ; AVX1-NEXT:    vaddpd %ymm3, %ymm0, %ymm0
 ; AVX1-NEXT:    vroundpd $11, %ymm0, %ymm0
 ; AVX1-NEXT:    vandpd %ymm2, %ymm1, %ymm2
-; AVX1-NEXT:    vorpd %ymm2, %ymm4, %ymm2
+; AVX1-NEXT:    vorpd %ymm4, %ymm2, %ymm2
 ; AVX1-NEXT:    vaddpd %ymm2, %ymm1, %ymm1
 ; AVX1-NEXT:    vroundpd $11, %ymm1, %ymm1
 ; AVX1-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll
index 500cb0c677ff5..530ae967e1c07 100644
--- a/llvm/test/CodeGen/X86/fp128-cast.ll
+++ b/llvm/test/CodeGen/X86/fp128-cast.ll
@@ -1326,7 +1326,7 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind {
 ; X64-AVX-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovddup {{.*#+}} xmm1 = [+Inf,+Inf]
 ; X64-AVX-NEXT:    # xmm1 = mem[0,0]
-; X64-AVX-NEXT:    vorps %xmm0, %xmm1, %xmm0
+; X64-AVX-NEXT:    vorps %xmm1, %xmm0, %xmm0
 ; X64-AVX-NEXT:    callq __extenddftf2@PLT
 ; X64-AVX-NEXT:    addq $8, %rsp
 ; X64-AVX-NEXT:  .LBB26_2: # %cleanup
diff --git a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
index 8a452ddc06b62..54d74c3c86d8e 100644
--- a/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
+++ b/llvm/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll
@@ -465,9 +465,9 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X86-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm1
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm3
-; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm2
-; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
 ; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; X86-SSE2-NEXT:    pand %xmm1, %xmm0
@@ -491,9 +491,9 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; X64-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X64-SSE2-NEXT:    pmuludq %xmm2, %xmm1
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; X64-SSE2-NEXT:    pmuludq %xmm2, %xmm3
-; X64-SSE2-NEXT:    pmuludq %xmm1, %xmm2
-; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
 ; X64-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; X64-SSE2-NEXT:    pand %xmm1, %xmm0
@@ -611,9 +611,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi
 ; X86-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
 ; X86-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm1
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; X86-SSE2-NEXT:    pmuludq %xmm2, %xmm3
-; X86-SSE2-NEXT:    pmuludq %xmm1, %xmm2
-; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
 ; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
 ; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; X86-SSE2-NEXT:    pand %xmm1, %xmm0
@@ -637,9 +637,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwi
 ; X64-SSE2-NEXT:    cvttps2dq %xmm1, %xmm1
 ; X64-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,1,1]
 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; X64-SSE2-NEXT:    pmuludq %xmm2, %xmm1
+; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
 ; X64-SSE2-NEXT:    pmuludq %xmm2, %xmm3
-; X64-SSE2-NEXT:    pmuludq %xmm1, %xmm2
-; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
 ; X64-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
 ; X64-SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
 ; X64-SSE2-NEXT:    pand %xmm1, %xmm0
diff --git a/llvm/test/CodeGen/X86/pr43509.ll b/llvm/test/CodeGen/X86/pr43509.ll
index e2c3affd952bb..87ddad03e9c45 100644
--- a/llvm/test/CodeGen/X86/pr43509.ll
+++ b/llvm/test/CodeGen/X86/pr43509.ll
@@ -4,12 +4,10 @@
 define <8 x i8> @foo(<8 x float> %arg) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %bb
-; CHECK-NEXT:    vcmpgtps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k0
-; CHECK-NEXT:    vpmovm2b %k0, %xmm1
-; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; CHECK-NEXT:    vcmpltps %ymm2, %ymm0, %k1
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vcmpltps %ymm1, %ymm0, %k1
+; CHECK-NEXT:    vcmpgtps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %k1 {%k1}
 ; CHECK-NEXT:    vmovdqu8 {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 {%k1} {z}
-; CHECK-NEXT:    vpand %xmm0, %xmm1, %xmm0
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq
 bb:
diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
index 2d6c4dc829ed9..705d655939c30 100644
--- a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
+++ b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll
@@ -336,23 +336,26 @@ define <4 x i32> @in_constant_mone_vary(<4 x i32> *%px, <4 x i32> *%py, <4 x i32
 ; CHECK-SSE1-LABEL: in_constant_mone_vary:
 ; CHECK-SSE1:       # %bb.0:
 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
-; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
+; CHECK-SSE1-NEXT:    movaps (%rdx), %xmm0
 ; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm1
-; CHECK-SSE1-NEXT:    andnps (%rdx), %xmm1
-; CHECK-SSE1-NEXT:    orps %xmm0, %xmm1
+; CHECK-SSE1-NEXT:    andnps (%rcx), %xmm1
+; CHECK-SSE1-NEXT:    xorps %xmm0, %xmm1
 ; CHECK-SSE1-NEXT:    movaps %xmm1, (%rdi)
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: in_constant_mone_vary:
 ; CHECK-SSE2:       # %bb.0:
-; CHECK-SSE2-NEXT:    movaps (%rsi), %xmm0
-; CHECK-SSE2-NEXT:    orps (%rdx), %xmm0
+; CHECK-SSE2-NEXT:    movaps (%rsi), %xmm1
+; CHECK-SSE2-NEXT:    movaps %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    andnps (%rdx), %xmm0
+; CHECK-SSE2-NEXT:    xorps %xmm1, %xmm0
 ; CHECK-SSE2-NEXT:    retq
 ;
 ; CHECK-XOP-LABEL: in_constant_mone_vary:
 ; CHECK-XOP:       # %bb.0:
 ; CHECK-XOP-NEXT:    vmovaps (%rsi), %xmm0
-; CHECK-XOP-NEXT:    vorps (%rdx), %xmm0, %xmm0
+; CHECK-XOP-NEXT:    vandnps (%rdx), %xmm0, %xmm1
+; CHECK-XOP-NEXT:    vxorps %xmm0, %xmm1, %xmm0
 ; CHECK-XOP-NEXT:    retq
   %x = load <4 x i32>, <4 x i32> *%px, align 16
   %y = load <4 x i32>, <4 x i32> *%py, align 16
@@ -408,30 +411,32 @@ define <4 x i32> @in_constant_mone_vary_invmask(<4 x i32> *%px, <4 x i32> *%py,
 ; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask:
 ; CHECK-SSE1:       # %bb.0:
 ; CHECK-SSE1-NEXT:    movq %rdi, %rax
-; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm0
-; CHECK-SSE1-NEXT:    movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
-; CHECK-SSE1-NEXT:    xorps %xmm0, %xmm1
-; CHECK-SSE1-NEXT:    andps (%rdx), %xmm0
-; CHECK-SSE1-NEXT:    orps %xmm1, %xmm0
-; CHECK-SSE1-NEXT:    movaps %xmm0, (%rdi)
+; CHECK-SSE1-NEXT:    movaps (%rdx), %xmm0
+; CHECK-SSE1-NEXT:    movaps (%rcx), %xmm1
+; CHECK-SSE1-NEXT:    xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; CHECK-SSE1-NEXT:    movaps %xmm0, %xmm2
+; CHECK-SSE1-NEXT:    andnps %xmm1, %xmm2
+; CHECK-SSE1-NEXT:    xorps %xmm0, %xmm2
+; CHECK-SSE1-NEXT:    movaps %xmm2, (%rdi)
 ; CHECK-SSE1-NEXT:    retq
 ;
 ; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask:
 ; CHECK-SSE2:       # %bb.0:
-; CHECK-SSE2-NEXT:    movdqa (%rdx), %xmm0
-; CHECK-SSE2-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-SSE2-NEXT:    pxor %xmm0, %xmm1
-; CHECK-SSE2-NEXT:    pand (%rsi), %xmm0
-; CHECK-SSE2-NEXT:    por %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    movdqa (%rsi), %xmm1
+; CHECK-SSE2-NEXT:    pcmpeqd %xmm2, %xmm2
+; CHECK-SSE2-NEXT:    pxor (%rdx), %xmm2
+; CHECK-SSE2-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    pandn %xmm2, %xmm0
+; CHECK-SSE2-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-SSE2-NEXT:    retq
 ;
 ; CHECK-XOP-LABEL: in_constant_mone_vary_invmask:
 ; CHECK-XOP:       # %bb.0:
-; CHECK-XOP-NEXT:    vmovdqa (%rdx), %xmm0
+; CHECK-XOP-NEXT:    vmovdqa (%rsi), %xmm0
 ; CHECK-XOP-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-XOP-NEXT:    vpxor %xmm1, %xmm0, %xmm1
-; CHECK-XOP-NEXT:    vpand (%rsi), %xmm0, %xmm0
-; CHECK-XOP-NEXT:    vpor %xmm0, %xmm1, %xmm0
+; CHECK-XOP-NEXT:    vpxor (%rdx), %xmm1, %xmm1
+; CHECK-XOP-NEXT:    vpandn %xmm1, %xmm0, %xmm1
+; CHECK-XOP-NEXT:    vpxor %xmm0, %xmm1, %xmm0
 ; CHECK-XOP-NEXT:    retq
   %x = load <4 x i32>, <4 x i32> *%px, align 16
   %y = load <4 x i32>, <4 x i32> *%py, align 16

From e7c9a6cae09d99388d8384ca7c0fb5b24b353975 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 17 Jan 2022 15:48:01 +0100
Subject: [PATCH 345/946] [SDAG] Don't move DBG_VALUE instructions after
 insertion point during scheduling (PR53243)

EmitSchedule() shouldn't be touching instructions after the provided
insertion point. The change introduced in D83561 performs a scan to
the end of the block, and thus may move unrelated instructions. In
particular, this ends up moving instructions that have been produced
by FastISel and will later be deleted. Moving them means that more
instructions than intended are removed.

Fix this by stopping the iteration when the insertion point is
reached.

Fixes https://github.com/llvm/llvm-project/issues/53243.

Differential Revision: https://reviews.llvm.org/D117489
---
 .../SelectionDAG/ScheduleDAGSDNodes.cpp       |  7 ++--
 .../CodeGen/X86/pr53243-tail-call-fastisel.ll | 39 +++++++++++++++++++
 2 files changed, 43 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/pr53243-tail-call-fastisel.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index bec240d6c4d49..403f345738998 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -1057,12 +1057,13 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
            "first terminator cannot be a debug value");
     for (MachineInstr &MI : make_early_inc_range(
              make_range(std::next(FirstTerm), InsertBB->end()))) {
+      // Only scan up to insertion point.
+      if (&MI == InsertPos)
+        break;
+
       if (!MI.isDebugValue())
         continue;
 
-      if (&MI == InsertPos)
-        InsertPos = std::prev(InsertPos->getIterator());
-
       // The DBG_VALUE was referencing a value produced by a terminator. By
       // moving the DBG_VALUE, the referenced value also needs invalidating.
       MI.getOperand(0).ChangeToRegister(0, false);
diff --git a/llvm/test/CodeGen/X86/pr53243-tail-call-fastisel.ll b/llvm/test/CodeGen/X86/pr53243-tail-call-fastisel.ll
new file mode 100644
index 0000000000000..333eff8fb0081
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr53243-tail-call-fastisel.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -fast-isel -mtriple=x86_64-- < %s | FileCheck %s
+
+define void @test() {
+; CHECK-LABEL: test:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jmp set_state@PLT # TAILCALL
+  tail call void @set_state()
+  call void @llvm.dbg.value(metadata i64 0, metadata !10, metadata !DIExpression()), !dbg !16
+  ret void
+}
+
+declare void @set_state()
+
+; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
+declare void @llvm.dbg.value(metadata, metadata, metadata) #0
+
+attributes #0 = { nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.module.flags = !{!0}
+!llvm.dbg.cu = !{!1}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !2, producer: "clang LLVM (rustc version 1.60.0-nightly (ec4bcaac4 2022-01-15))", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !3)
+!2 = !DIFile(filename: "src/lib.rs/@/bug.63e521cd-cgu.0", directory: "/tmp/rust-bug")
+!3 = !{!4}
+!4 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Option", file: !5, baseType: !6, size: 8, align: 8, flags: DIFlagEnumClass, elements: !7)
+!5 = !DIFile(filename: "<unknown>", directory: "")
+!6 = !DIBasicType(name: "u8", size: 8, encoding: DW_ATE_unsigned)
+!7 = !{!8, !9}
+!8 = !DIEnumerator(name: "None", value: 0)
+!9 = !DIEnumerator(name: "Some", value: 1)
+!10 = !DILocalVariable(name: "msg", arg: 2, scope: !11, file: !12, line: 689, type: !6)
+!11 = distinct !DISubprogram(name: "expect<()>", linkageName: "_ZN4core6option15Option$LT$T$GT$6expect17h9a574c18f194c213E", scope: !4, file: !12, line: 689, type: !13, scopeLine: 689, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !1, templateParams: !15, retainedNodes: !15)
+!12 = !DIFile(filename: "/rustc/ec4bcaac450279b029f3480b8b8f1b82ab36a5eb/library/core/src/option.rs", directory: "", checksumkind: CSK_MD5, checksum: "4120c8557937a0772190a676ec193800")
+!13 = !DISubroutineType(types: !14)
+!14 = !{null, !4}
+!15 = !{}
+!16 = !DILocation(line: 0, scope: !11)

From 0d1308a7b77c9ed87386c22a728a6c97e2fb4887 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 21 Jan 2022 10:32:21 +0100
Subject: [PATCH 346/946] [AArch64][GlobalISel] Support returned argument with
 multiple registers

The call lowering code assumed that a returned argument could only
consist of one register. Pass an ArrayRef<Register> instead of
Register to make sure that all parts get assigned.

Fixes https://github.com/llvm/llvm-project/issues/53315.

Differential Revision: https://reviews.llvm.org/D117866
---
 .../llvm/CodeGen/GlobalISel/CallLowering.h    | 14 +++++------
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp  | 11 ++++----
 .../AArch64/GISel/AArch64CallLowering.cpp     |  2 +-
 .../CodeGen/AArch64/pr53315-returned-i128.ll  | 25 +++++++++++++++++++
 4 files changed, 39 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/pr53315-returned-i128.ll

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index 82c125993ec3d..3a4b3ee18e1bc 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -388,12 +388,12 @@ class CallLowering {
   /// \p Handler to move them to the assigned locations.
   ///
   /// \return True if everything has succeeded, false otherwise.
-  bool determineAndHandleAssignments(ValueHandler &Handler,
-                                     ValueAssigner &Assigner,
-                                     SmallVectorImpl<ArgInfo> &Args,
-                                     MachineIRBuilder &MIRBuilder,
-                                     CallingConv::ID CallConv, bool IsVarArg,
-                                     Register ThisReturnReg = Register()) const;
+  bool
+  determineAndHandleAssignments(ValueHandler &Handler, ValueAssigner &Assigner,
+                                SmallVectorImpl<ArgInfo> &Args,
+                                MachineIRBuilder &MIRBuilder,
+                                CallingConv::ID CallConv, bool IsVarArg,
+                                ArrayRef<Register> ThisReturnRegs = None) const;
 
   /// Use \p Handler to insert code to handle the argument/return values
   /// represented by \p Args. It's expected determineAssignments previously
@@ -402,7 +402,7 @@ class CallLowering {
                          CCState &CCState,
                          SmallVectorImpl<CCValAssign> &ArgLocs,
                          MachineIRBuilder &MIRBuilder,
-                         Register ThisReturnReg = Register()) const;
+                         ArrayRef<Register> ThisReturnRegs = None) const;
 
   /// Check whether parameters to a call that are passed in callee saved
   /// registers are the same as from the calling function.  This needs to be
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index d061664e8c5d1..486eff4dc7100 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -509,7 +509,8 @@ static void buildCopyToRegs(MachineIRBuilder &B, ArrayRef<Register> DstRegs,
 bool CallLowering::determineAndHandleAssignments(
     ValueHandler &Handler, ValueAssigner &Assigner,
     SmallVectorImpl<ArgInfo> &Args, MachineIRBuilder &MIRBuilder,
-    CallingConv::ID CallConv, bool IsVarArg, Register ThisReturnReg) const {
+    CallingConv::ID CallConv, bool IsVarArg,
+    ArrayRef<Register> ThisReturnRegs) const {
   MachineFunction &MF = MIRBuilder.getMF();
   const Function &F = MF.getFunction();
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -519,7 +520,7 @@ bool CallLowering::determineAndHandleAssignments(
     return false;
 
   return handleAssignments(Handler, Args, CCInfo, ArgLocs, MIRBuilder,
-                           ThisReturnReg);
+                           ThisReturnRegs);
 }
 
 static unsigned extendOpFromFlags(llvm::ISD::ArgFlagsTy Flags) {
@@ -596,7 +597,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
                                      CCState &CCInfo,
                                      SmallVectorImpl<CCValAssign> &ArgLocs,
                                      MachineIRBuilder &MIRBuilder,
-                                     Register ThisReturnReg) const {
+                                     ArrayRef<Register> ThisReturnRegs) const {
   MachineFunction &MF = MIRBuilder.getMF();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const Function &F = MF.getFunction();
@@ -740,10 +741,10 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
 
       assert(!VA.needsCustom() && "custom loc should have been handled already");
 
-      if (i == 0 && ThisReturnReg.isValid() &&
+      if (i == 0 && !ThisReturnRegs.empty() &&
           Handler.isIncomingArgumentHandler() &&
           isTypeIsValidForThisReturn(ValVT)) {
-        Handler.assignValueToReg(Args[i].Regs[i], ThisReturnReg, VA);
+        Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA);
         continue;
       }
 
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index ac08ee8ae8dd0..677e7a6684d56 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -1179,7 +1179,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
     if (!determineAndHandleAssignments(
             UsingReturnedArg ? ReturnedArgHandler : Handler, Assigner, InArgs,
             MIRBuilder, Info.CallConv, Info.IsVarArg,
-            UsingReturnedArg ? OutArgs[0].Regs[0] : Register()))
+            UsingReturnedArg ? makeArrayRef(OutArgs[0].Regs) : None))
       return false;
   }
 
diff --git a/llvm/test/CodeGen/AArch64/pr53315-returned-i128.ll b/llvm/test/CodeGen/AArch64/pr53315-returned-i128.ll
new file mode 100644
index 0000000000000..0418720231288
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/pr53315-returned-i128.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -mtriple=aarch64-unknown-linux-gnu -verify-machineinstrs < %s | FileCheck %s
+
+define void @test() nounwind {
+; CHECK-LABEL: test:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    mov x1, xzr
+; CHECK-NEXT:    str x1, [sp, #8] // 8-byte Folded Spill
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    bl returns_arg
+; CHECK-NEXT:    ldr x1, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    bl accepts_arg
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    ret
+  %x = call i128 @returns_arg(i128 0)
+  call void @accepts_arg(i128 %x)
+  ret void
+}
+
+declare i128 @returns_arg(i128 returned)
+declare void @accepts_arg(i128)

From a08447d0de5d85d915318ffd9ef3404363d02c64 Mon Sep 17 00:00:00 2001
From: Peter Smith <peter.smith@arm.com>
Date: Fri, 21 Jan 2022 16:19:22 +0000
Subject: [PATCH 347/946] [LLD][ELF][AArch64] Update test with incorrect
 REQUIRES line [NFC]

D54759 introduced aarch64-combined-dynrel.s and
aarch64-combined-dynrel-ifunc.s . Unfortunately the requires line
at the top was AArch64 instead of aarch64 which means they were never
run. Update the tests to use aarch64 and fix to match current lld output.

Differential Revision: https://reviews.llvm.org/D117896
---
 lld/test/ELF/aarch64-combined-dynrel-ifunc.s | 5 +++--
 lld/test/ELF/aarch64-combined-dynrel.s       | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/lld/test/ELF/aarch64-combined-dynrel-ifunc.s b/lld/test/ELF/aarch64-combined-dynrel-ifunc.s
index 5e84b03d7b2e7..fa0f10eb8941b 100644
--- a/lld/test/ELF/aarch64-combined-dynrel-ifunc.s
+++ b/lld/test/ELF/aarch64-combined-dynrel-ifunc.s
@@ -1,4 +1,4 @@
-// REQUIRES: AArch64
+// REQUIRES: aarch64
 // RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %p/Inputs/shared.s -o %t-lib.o
 // RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu %s -o %t.o
 // RUN: ld.lld %t-lib.o --shared -o %t.so
@@ -42,10 +42,11 @@ main:
 // CHECK-NEXT:     Type: SHT_RELA
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_INFO_LINK
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address:
 // CHECK-NEXT:     Offset:
 // CHECK-NEXT:     Size: 72
 
 // CHECK:      0x0000000000000008 RELASZ               72
-// CHECK:      0x0000000000000002 PLTRELSZ             48
+// CHECK:      0x0000000000000002 PLTRELSZ             24
diff --git a/lld/test/ELF/aarch64-combined-dynrel.s b/lld/test/ELF/aarch64-combined-dynrel.s
index 438c2509906e6..d9c88054d3abd 100644
--- a/lld/test/ELF/aarch64-combined-dynrel.s
+++ b/lld/test/ELF/aarch64-combined-dynrel.s
@@ -1,4 +1,4 @@
-// REQUIRES: AArch64
+// REQUIRES: aarch64
 // RUN: llvm-mc --triple=aarch64-linux-gnu -filetype=obj -o %t.o %s
 // RUN: echo "SECTIONS { \
 // RUN:         .text : { *(.text) } \
@@ -32,6 +32,7 @@ _start:
 // CHECK-NEXT:         Type: SHT_RELA
 // CHECK-NEXT:     Flags [
 // CHECK-NEXT:       SHF_ALLOC
+// CHECK-NEXT:       SHF_INFO_LINK
 // CHECK-NEXT:     ]
 // CHECK-NEXT:     Address:
 // CHECK-NEXT:     Offset:

From 906ebd5830e6053b50c52bf098e3586b567e8499 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Fri, 21 Jan 2022 17:03:03 +0000
Subject: [PATCH 348/946] [AMDGPU][GlobalISel] Regenerate checks in
 inst-select-*ext.mir

---
 .../AMDGPU/GlobalISel/inst-select-anyext.mir  | 56 ++++++++++---------
 .../AMDGPU/GlobalISel/inst-select-sext.mir    | 36 ++++++------
 .../AMDGPU/GlobalISel/inst-select-zext.mir    | 36 ++++++------
 3 files changed, 66 insertions(+), 62 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir
index dcad0a85e8e0e..3ae9735d11b90 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir
@@ -13,7 +13,7 @@ body: |
 
     ; GCN-LABEL: name: anyext_sgpr_s16_to_sgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: $sgpr0 = COPY [[COPY]]
+    ; GCN-NEXT: $sgpr0 = COPY [[COPY]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s16) = G_TRUNC %0
     %2:sgpr(s32) = G_ANYEXT %1
@@ -22,7 +22,7 @@ body: |
 ...
 
 ---
-name:  anyext_sgpr_s32_to_sgpr_s64
+name: anyext_sgpr_s32_to_sgpr_s64
 legalized:       true
 regBankSelected: true
 tracksRegLiveness: true
@@ -32,10 +32,11 @@ body:             |
 
     ; GCN-LABEL: name: anyext_sgpr_s32_to_sgpr_s64
     ; GCN: liveins: $sgpr0
-    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
-    ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s64) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -43,7 +44,7 @@ body:             |
 ...
 
 ---
-name:  anyext_sgpr_s16_to_sgpr_s64
+name: anyext_sgpr_s16_to_sgpr_s64
 legalized:       true
 regBankSelected: true
 tracksRegLiveness: true
@@ -53,10 +54,11 @@ body:             |
 
     ; GCN-LABEL: name: anyext_sgpr_s16_to_sgpr_s64
     ; GCN: liveins: $sgpr0
-    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
-    ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s16) = G_TRUNC %0
     %2:sgpr(s64) = G_ANYEXT %1
@@ -65,7 +67,7 @@ body:             |
 ...
 
 ---
-name:  anyext_vgpr_s32_to_vgpr_s64
+name: anyext_vgpr_s32_to_vgpr_s64
 legalized:       true
 regBankSelected: true
 tracksRegLiveness: true
@@ -75,10 +77,11 @@ body:             |
 
     ; GCN-LABEL: name: anyext_vgpr_s32_to_vgpr_s64
     ; GCN: liveins: $vgpr0
-    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
-    ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s64) = G_ANYEXT %0
     S_ENDPGM 0, implicit %1
@@ -86,7 +89,7 @@ body:             |
 ...
 
 ---
-name:  anyext_vgpr_s16_to_vgpr_s64
+name: anyext_vgpr_s16_to_vgpr_s64
 legalized:       true
 regBankSelected: true
 tracksRegLiveness: true
@@ -96,10 +99,11 @@ body:             |
 
     ; GCN-LABEL: name: anyext_vgpr_s16_to_vgpr_s64
     ; GCN: liveins: $vgpr0
-    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
-    ; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s16) = G_TRUNC %0
     %2:vgpr(s64) = G_ANYEXT %1
@@ -134,7 +138,7 @@ body: |
 
     ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: $sgpr0 = COPY [[COPY]]
+    ; GCN-NEXT: $sgpr0 = COPY [[COPY]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s1) = G_TRUNC %0
     %2:sgpr(s32) = G_ANYEXT %1
@@ -152,7 +156,7 @@ body: |
 
     ; GCN-LABEL: name: anyext_vgpr_s1_to_vgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN: $vgpr0 = COPY [[COPY]]
+    ; GCN-NEXT: $vgpr0 = COPY [[COPY]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s1) = G_TRUNC %0
     %2:vgpr(s32) = G_ANYEXT %1
@@ -170,7 +174,7 @@ body: |
 
     ; GCN-LABEL: name: anyext_sgpr_s1_to_vgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: $sgpr0 = COPY [[COPY]]
+    ; GCN-NEXT: $sgpr0 = COPY [[COPY]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s1) = G_TRUNC %0
     %2:sgpr(s32) = G_ANYEXT %1
@@ -188,7 +192,7 @@ body: |
 
     ; GCN-LABEL: name: anyext_vgpr_s16_to_vgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN: $vgpr0 = COPY [[COPY]]
+    ; GCN-NEXT: $vgpr0 = COPY [[COPY]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s16) = G_TRUNC %0
     %2:vgpr(s32) = G_ANYEXT %1
@@ -209,7 +213,7 @@ body: |
 
     ; GCN-LABEL: name: anyext_regclass_sgpr_s1_to_sgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: $sgpr0 = COPY [[COPY]]
+    ; GCN-NEXT: $sgpr0 = COPY [[COPY]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sreg_32(s1) = G_TRUNC %0
     %2:sgpr(s32) = G_ANYEXT %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir
index d642dc0ce6124..ef7be3f7cd4a8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir
@@ -12,8 +12,8 @@ body: |
 
     ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc
-    ; GCN: $sgpr0 = COPY [[S_BFE_I32_]]
+    ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc
+    ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s1) = G_TRUNC %0
     %2:sgpr(s32) = G_SEXT %1
@@ -31,10 +31,10 @@ body: |
 
     ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s64
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
-    ; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 65536, implicit-def $scc
-    ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
+    ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 65536, implicit-def $scc
+    ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s1) = G_TRUNC %0
     %2:sgpr(s64) = G_SEXT %1
@@ -52,8 +52,8 @@ body: |
 
     ; GCN-LABEL: name: sext_sgpr_s16_to_sgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]]
-    ; GCN: $sgpr0 = COPY [[S_SEXT_I32_I16_]]
+    ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[COPY]]
+    ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s16) = G_TRUNC %0
     %2:sgpr(s32) = G_SEXT %1
@@ -72,10 +72,10 @@ body: |
 
     ; GCN-LABEL: name: sext_sgpr_s16_to_sgpr_s64
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
-    ; GCN: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc
-    ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
+    ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 1048576, implicit-def $scc
+    ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s16) = G_TRUNC %0
     %2:sgpr(s64) = G_SEXT %1
@@ -109,8 +109,8 @@ body: |
 
     ; GCN-LABEL: name: sext_vgpr_s1_to_vgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec
-    ; GCN: $vgpr0 = COPY [[V_BFE_I32_e64_]]
+    ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec
+    ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s1) = G_TRUNC %0
     %2:vgpr(s32) = G_SEXT %1
@@ -128,8 +128,8 @@ body: |
 
     ; GCN-LABEL: name: sext_vgpr_s16_to_vgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 16, implicit $exec
-    ; GCN: $vgpr0 = COPY [[V_BFE_I32_e64_]]
+    ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 16, implicit $exec
+    ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s16) = G_TRUNC %0
     %2:vgpr(s32) = G_SEXT %1
@@ -148,8 +148,8 @@ body: |
 
     ; GCN-LABEL: name: sext_sgpr_reg_class_s1_to_sgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc
-    ; GCN: $sgpr0 = COPY [[S_BFE_I32_]]
+    ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc
+    ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_I32_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sreg_32(s1) = G_TRUNC %0
     %2:sgpr(s32) = G_SEXT %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir
index 50b3306e92ed5..c8d8468689762 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir
@@ -12,8 +12,8 @@ body: |
 
     ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc
-    ; GCN: $sgpr0 = COPY [[S_AND_B32_]]
+    ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc
+    ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s1) = G_TRUNC %0
     %2:sgpr(s32) = G_ZEXT %1
@@ -31,10 +31,10 @@ body: |
 
     ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s64
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
-    ; GCN: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 65536, implicit-def $scc
-    ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
+    ; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 65536, implicit-def $scc
+    ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s1) = G_TRUNC %0
     %2:sgpr(s64) = G_ZEXT %1
@@ -52,8 +52,8 @@ body: |
 
     ; GCN-LABEL: name: zext_sgpr_s16_to_sgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc
-    ; GCN: $sgpr0 = COPY [[S_BFE_U32_]]
+    ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc
+    ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s16) = G_TRUNC %0
     %2:sgpr(s32) = G_ZEXT %1
@@ -72,10 +72,10 @@ body: |
 
     ; GCN-LABEL: name: zext_sgpr_s16_to_sgpr_s64
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
-    ; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
-    ; GCN: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 1048576, implicit-def $scc
-    ; GCN: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]]
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
+    ; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 1048576, implicit-def $scc
+    ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sgpr(s16) = G_TRUNC %0
     %2:sgpr(s64) = G_ZEXT %1
@@ -109,8 +109,8 @@ body: |
 
     ; GCN-LABEL: name: zext_vgpr_s1_to_vgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
-    ; GCN: $vgpr0 = COPY [[V_AND_B32_e32_]]
+    ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: $vgpr0 = COPY [[V_AND_B32_e32_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s1) = G_TRUNC %0
     %2:vgpr(s32) = G_ZEXT %1
@@ -128,8 +128,8 @@ body: |
 
     ; GCN-LABEL: name: zext_vgpr_s16_to_vgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], 0, 16, implicit $exec
-    ; GCN: $vgpr0 = COPY [[V_BFE_U32_e64_]]
+    ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], 0, 16, implicit $exec
+    ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s16) = G_TRUNC %0
     %2:vgpr(s32) = G_ZEXT %1
@@ -148,8 +148,8 @@ body: |
 
     ; GCN-LABEL: name: zext_sgpr_reg_class_s1_to_sgpr_s32
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc
-    ; GCN: $sgpr0 = COPY [[S_AND_B32_]]
+    ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc
+    ; GCN-NEXT: $sgpr0 = COPY [[S_AND_B32_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:sreg_32(s1) = G_TRUNC %0
     %2:sgpr(s32) = G_ZEXT %1

From aa50b93e7cf926dec5dd69920e6f48906ea8ad25 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Fri, 21 Jan 2022 17:18:03 +0000
Subject: [PATCH 349/946] [AMDGPU][GlobalISel] Add more sign/zero/any-extension
 tests

Add s1 to s16 cases, and for sgprs s1 to s64 and s32 to s64.
---
 .../AMDGPU/GlobalISel/inst-select-anyext.mir  | 60 ++++++++++++++++++
 .../AMDGPU/GlobalISel/inst-select-sext.mir    | 63 +++++++++++++++++++
 .../AMDGPU/GlobalISel/inst-select-zext.mir    | 63 +++++++++++++++++++
 3 files changed, 186 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir
index 3ae9735d11b90..cde4df501dfe1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-anyext.mir
@@ -129,6 +129,26 @@ body:             |
 
 ---
 
+name: anyext_sgpr_s1_to_sgpr_s16
+legalized:       true
+regBankSelected: true
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s16
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[COPY]], 1048576, implicit-def $scc
+    ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s1) = G_TRUNC %0
+    %2:sgpr(s16) = G_ANYEXT %1
+    %3:sgpr(s32) = G_ZEXT %2
+    $sgpr0 = COPY %3
+...
+
+---
+
 name: anyext_sgpr_s1_to_sgpr_s32
 legalized:       true
 regBankSelected: true
@@ -147,6 +167,46 @@ body: |
 
 ---
 
+name: anyext_sgpr_s1_to_sgpr_s64
+legalized:       true
+regBankSelected: true
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; GCN-LABEL: name: anyext_sgpr_s1_to_sgpr_s64
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
+    ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[REG_SEQUENCE]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s1) = G_TRUNC %0
+    %2:sgpr(s64) = G_ANYEXT %1
+    $sgpr0_sgpr1 = COPY %2
+...
+
+---
+
+name: anyext_vgpr_s1_to_vgpr_s16
+legalized:       true
+regBankSelected: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: anyext_vgpr_s1_to_vgpr_s16
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[COPY]], 0, 16, implicit $exec
+    ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s1) = G_TRUNC %0
+    %2:vgpr(s16) = G_ANYEXT %1
+    %3:vgpr(s32) = G_ZEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+
 name: anyext_vgpr_s1_to_vgpr_s32
 legalized:       true
 regBankSelected: true
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir
index ef7be3f7cd4a8..0e7e12f27f71d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sext.mir
@@ -3,6 +3,27 @@
 
 ---
 
+name: sext_sgpr_s1_to_sgpr_s16
+legalized:       true
+regBankSelected: true
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; GCN-LABEL: name: sext_sgpr_s1_to_sgpr_s16
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN-NEXT: [[S_BFE_I32_:%[0-9]+]]:sreg_32 = S_BFE_I32 [[COPY]], 65536, implicit-def $scc
+    ; GCN-NEXT: [[S_BFE_U32_:%[0-9]+]]:sreg_32 = S_BFE_U32 [[S_BFE_I32_]], 1048576, implicit-def $scc
+    ; GCN-NEXT: $sgpr0 = COPY [[S_BFE_U32_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s1) = G_TRUNC %0
+    %2:sgpr(s16) = G_SEXT %1
+    %3:sgpr(s32) = G_ZEXT %2
+    $sgpr0 = COPY %3
+...
+
+---
+
 name: sext_sgpr_s1_to_sgpr_s32
 legalized:       true
 regBankSelected: true
@@ -83,6 +104,27 @@ body: |
 
 ...
 
+---
+
+name: sext_sgpr_s32_to_sgpr_s64
+legalized:       true
+regBankSelected: true
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; GCN-LABEL: name: sext_sgpr_s32_to_sgpr_s64
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
+    ; GCN-NEXT: [[S_BFE_I64_:%[0-9]+]]:sreg_64 = S_BFE_I64 [[REG_SEQUENCE]], 2097152, implicit-def $scc
+    ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_I64_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s64) = G_SEXT %0
+    $sgpr0_sgpr1 = COPY %1
+
+...
+
 # ---
 
 # name: sext_vcc_s1_to_vgpr_s32
@@ -100,6 +142,27 @@ body: |
 
 ---
 
+name: sext_vgpr_s1_to_vgpr_s16
+legalized:       true
+regBankSelected: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: sext_vgpr_s1_to_vgpr_s16
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[COPY]], 0, 1, implicit $exec
+    ; GCN-NEXT: [[V_BFE_U32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_U32_e64 [[V_BFE_I32_e64_]], 0, 16, implicit $exec
+    ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_U32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s1) = G_TRUNC %0
+    %2:vgpr(s16) = G_SEXT %1
+    %3:vgpr(s32) = G_ZEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+
 name: sext_vgpr_s1_to_vgpr_s32
 legalized:       true
 regBankSelected: true
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir
index c8d8468689762..821d05f1f03af 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-zext.mir
@@ -3,6 +3,27 @@
 
 ---
 
+name: zext_sgpr_s1_to_sgpr_s16
+legalized:       true
+regBankSelected: true
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; GCN-LABEL: name: zext_sgpr_s1_to_sgpr_s16
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], 1, implicit-def $scc
+    ; GCN-NEXT: [[S_SEXT_I32_I16_:%[0-9]+]]:sreg_32 = S_SEXT_I32_I16 [[S_AND_B32_]]
+    ; GCN-NEXT: $sgpr0 = COPY [[S_SEXT_I32_I16_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s1) = G_TRUNC %0
+    %2:sgpr(s16) = G_ZEXT %1
+    %3:sgpr(s32) = G_SEXT %2
+    $sgpr0 = COPY %3
+...
+
+---
+
 name: zext_sgpr_s1_to_sgpr_s32
 legalized:       true
 regBankSelected: true
@@ -83,6 +104,27 @@ body: |
 
 ...
 
+---
+
+name: zext_sgpr_s32_to_sgpr_s64
+legalized:       true
+regBankSelected: true
+body: |
+  bb.0:
+    liveins: $sgpr0
+
+    ; GCN-LABEL: name: zext_sgpr_s32_to_sgpr_s64
+    ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[DEF]], %subreg.sub1
+    ; GCN-NEXT: [[S_BFE_U64_:%[0-9]+]]:sreg_64 = S_BFE_U64 [[REG_SEQUENCE]], 2097152, implicit-def $scc
+    ; GCN-NEXT: $sgpr0_sgpr1 = COPY [[S_BFE_U64_]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s64) = G_ZEXT %0
+    $sgpr0_sgpr1 = COPY %1
+
+...
+
 # ---
 
 # name: zext_vcc_s1_to_vgpr_s32
@@ -100,6 +142,27 @@ body: |
 
 ---
 
+name: zext_vgpr_s1_to_vgpr_s16
+legalized:       true
+regBankSelected: true
+body: |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: zext_vgpr_s1_to_vgpr_s16
+    ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 1, [[COPY]], implicit $exec
+    ; GCN-NEXT: [[V_BFE_I32_e64_:%[0-9]+]]:vgpr_32 = V_BFE_I32_e64 [[V_AND_B32_e32_]], 0, 16, implicit $exec
+    ; GCN-NEXT: $vgpr0 = COPY [[V_BFE_I32_e64_]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s1) = G_TRUNC %0
+    %2:vgpr(s16) = G_ZEXT %1
+    %3:vgpr(s32) = G_SEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+
 name: zext_vgpr_s1_to_vgpr_s32
 legalized:       true
 regBankSelected: true

From 7d19566c3bfb3efacb629d18839e2d85761156ab Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Thu, 13 Jan 2022 16:54:04 +0000
Subject: [PATCH 350/946] [lldb] Ignore non-address bits in "memory find"
 arguments

This removes the non-address bits before we try to use
the addresses.

Meaning that when results are shown, those results won't
show non-address bits either. This follows what "memory read"
has done. On the grounds that non-address bits are a property
of a pointer, not the memory pointed to.

I've added testing and merged the find and read tests into one
file.

Note that there are no API side changes because "memory find"
does not have an equivalent API call.

Reviewed By: omjavaid

Differential Revision: https://reviews.llvm.org/D117299
---
 lldb/source/Commands/CommandObjectMemory.cpp  |  6 +++
 .../Makefile                                  |  0
 .../TestAArch64LinuxTaggedMemoryAccess.py}    | 40 ++++++++++++++++---
 .../main.c                                    |  8 +++-
 4 files changed, 46 insertions(+), 8 deletions(-)
 rename lldb/test/API/linux/aarch64/{tagged_memory_read => tagged_memory_access}/Makefile (100%)
 rename lldb/test/API/linux/aarch64/{tagged_memory_read/TestAArch64LinuxTaggedMemoryRead.py => tagged_memory_access/TestAArch64LinuxTaggedMemoryAccess.py} (62%)
 rename lldb/test/API/linux/aarch64/{tagged_memory_read => tagged_memory_access}/main.c (80%)

diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp
index 0b5f39bc7a8f4..f5e2cb4ed44ea 100644
--- a/lldb/source/Commands/CommandObjectMemory.cpp
+++ b/lldb/source/Commands/CommandObjectMemory.cpp
@@ -1032,6 +1032,12 @@ class CommandObjectMemoryFind : public CommandObjectParsed {
       return false;
     }
 
+    ABISP abi = m_exe_ctx.GetProcessPtr()->GetABI();
+    if (abi) {
+      low_addr = abi->FixDataAddress(low_addr);
+      high_addr = abi->FixDataAddress(high_addr);
+    }
+
     if (high_addr <= low_addr) {
       result.AppendError(
           "starting address must be smaller than ending address");
diff --git a/lldb/test/API/linux/aarch64/tagged_memory_read/Makefile b/lldb/test/API/linux/aarch64/tagged_memory_access/Makefile
similarity index 100%
rename from lldb/test/API/linux/aarch64/tagged_memory_read/Makefile
rename to lldb/test/API/linux/aarch64/tagged_memory_access/Makefile
diff --git a/lldb/test/API/linux/aarch64/tagged_memory_read/TestAArch64LinuxTaggedMemoryRead.py b/lldb/test/API/linux/aarch64/tagged_memory_access/TestAArch64LinuxTaggedMemoryAccess.py
similarity index 62%
rename from lldb/test/API/linux/aarch64/tagged_memory_read/TestAArch64LinuxTaggedMemoryRead.py
rename to lldb/test/API/linux/aarch64/tagged_memory_access/TestAArch64LinuxTaggedMemoryAccess.py
index 2f55b951a7548..3cc0d70a3ce34 100644
--- a/lldb/test/API/linux/aarch64/tagged_memory_read/TestAArch64LinuxTaggedMemoryRead.py
+++ b/lldb/test/API/linux/aarch64/tagged_memory_access/TestAArch64LinuxTaggedMemoryAccess.py
@@ -1,6 +1,9 @@
 """
-Test that "memory read" removes non address bits from
-memory read arguments.
+Test that "memory read" and "memory find" remove non address bits from
+address arguments.
+
+These tests use the top byte ignore feature of AArch64. Which Linux
+always enables.
 """
 
 
@@ -17,10 +20,7 @@ class AArch64LinuxTaggedMemoryReadTestCase(TestBase):
 
     NO_DEBUG_INFO_TESTCASE = True
 
-    # AArch64 Linux always enables top byte ignore
-    @skipUnlessArch("aarch64")
-    @skipUnlessPlatform(["linux"])
-    def test_tagged_memory_read(self):
+    def setup_test(self):
         self.build()
         self.runCmd("file " + self.getBuildArtifact("a.out"), CURRENT_EXECUTABLE_SET)
 
@@ -37,6 +37,11 @@ def test_tagged_memory_read(self):
             substrs=['stopped',
                      'stop reason = breakpoint'])
 
+    @skipUnlessArch("aarch64")
+    @skipUnlessPlatform(["linux"])
+    def test_tagged_memory_read(self):
+        self.setup_test()
+
         # If we do not remove non address bits, this can fail in two ways.
         # 1. We attempt to read much more than 16 bytes, probably more than
         #    the default 1024 byte read size. Which will error.
@@ -53,3 +58,26 @@ def test_tagged_memory_read(self):
         # Would fail if we don't remove non address bits because 0x56... > 0x34...
         self.expect("memory read ptr2 ptr1+16", patterns=[tagged_addr_pattern], matching=False)
         self.expect("memory read", patterns=[tagged_addr_pattern], matching=False)
+
+    @skipUnlessArch("aarch64")
+    @skipUnlessPlatform(["linux"])
+    def test_tagged_memory_find(self):
+        self.setup_test()
+
+        # If memory find doesn't remove non-address bits one of two
+        # things happen.
+        # 1. It tries to search a gigantic amount of memory.
+        #    We're not going to test for this because a failure
+        #    would take a very long time and perhaps even find the
+        #    target value randomly.
+        # 2. It thinks high address <= low address, which we check below.
+
+        self.runCmd("memory find -s '?' ptr2 ptr1+32")
+
+        self.assertTrue(self.res.Succeeded())
+        out = self.res.GetOutput()
+        # memory find does not fail when it doesn't find the data.
+        # First check we actually got something.
+        self.assertRegexpMatches(out, "data found at location: 0x[0-9A-Fa-f]+")
+        # Then that the location found does not display the tag bits.
+        self.assertNotRegexpMatches(out, "data found at location: 0x(34|56)[0-9A-Fa-f]+")
diff --git a/lldb/test/API/linux/aarch64/tagged_memory_read/main.c b/lldb/test/API/linux/aarch64/tagged_memory_access/main.c
similarity index 80%
rename from lldb/test/API/linux/aarch64/tagged_memory_read/main.c
rename to lldb/test/API/linux/aarch64/tagged_memory_access/main.c
index 72ee30cef7869..3dd064b00bd31 100644
--- a/lldb/test/API/linux/aarch64/tagged_memory_read/main.c
+++ b/lldb/test/API/linux/aarch64/tagged_memory_access/main.c
@@ -5,11 +5,15 @@ static char *set_non_address_bits(char *ptr, size_t tag) {
   return (char *)((size_t)ptr | (tag << 56));
 }
 
-int main(int argc, char const *argv[]) {
-  char buf[32];
+// Global to zero init
+char buf[32];
 
+int main(int argc, char const *argv[]) {
   char *ptr1 = set_non_address_bits(buf, 0x34);
   char *ptr2 = set_non_address_bits(buf, 0x56);
 
+  // Target value for "memory find"
+  buf[15] = '?';
+
   return 0; // Set break point at this line.
 }

From 022600334dcb914d24230c6659487b2744af702b Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski@arm.com>
Date: Tue, 11 Jan 2022 13:16:03 +0000
Subject: [PATCH 351/946] [flang] Update the description of
 `!fir.coordinate_of`

This change was suggested in one of the comments for
https://reviews.llvm.org/D115333. Basically, the following usage is
valid, but the current wording suggests otherwise:
```
%1 = fir.coordinate_of %a, %k : (!fir.ref<!fir.array<10 x 10 x i32>>, index) -> !fir.ref<!fir.array<10 x i32>>
```
A test is also added to better document this particular case.

Differential revision: https://reviews.llvm.org/D115929
---
 .../include/flang/Optimizer/Dialect/FIROps.td  |  2 +-
 flang/test/Fir/convert-to-llvm.fir             | 18 ++++++++++++++++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index cd4d77eab521b..a484d60fa3b95 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -1633,7 +1633,7 @@ def fir_CoordinateOp : fir_Op<"coordinate_of", [NoSideEffect]> {
     Compute the internal coordinate address starting from a boxed value or
     unboxed memory reference. Returns a memory reference. When computing the
     coordinate of an array element, the rank of the array must be known and
-    the number of indexing expressions must equal the rank of the array.
+    the number of indexing expressions must not exceed the rank of the array.
 
     This operation will apply the access map from a boxed value implicitly.
 
diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir
index 9b4961a974a47..e5256629062fb 100644
--- a/flang/test/Fir/convert-to-llvm.fir
+++ b/flang/test/Fir/convert-to-llvm.fir
@@ -2471,11 +2471,11 @@ func @coordinate_array_known_size_1d(%arg0: !fir.ref<!fir.array<10 x i32>>, %arg
 
 // -----
 
-func @coordinate_array_known_size_2d(%arg0: !fir.ref<!fir.array<10 x 10 x i32>>, %arg1 : index, %arg2 : index) {
+func @coordinate_array_known_size_2d_get_i32(%arg0: !fir.ref<!fir.array<10 x 10 x i32>>, %arg1 : index, %arg2 : index) {
    %q = fir.coordinate_of %arg0, %arg1, %arg2 : (!fir.ref<!fir.array<10 x 10 x i32>>, index, index) -> !fir.ref<i32>
    return
 }
-// CHECK-LABEL:   llvm.func @coordinate_array_known_size_2d(
+// CHECK-LABEL:   llvm.func @coordinate_array_known_size_2d_get_i32(
 // CHECK-SAME:    %[[VAL_0:.*]]: !llvm.ptr<array<10 x array<10 x i32>>>,
 // CHECK-SAME:    %[[VAL_1:.*]]: i64,
 // CHECK-SAME:    %[[VAL_2:.*]]: i64) {
@@ -2486,6 +2486,20 @@ func @coordinate_array_known_size_2d(%arg0: !fir.ref<!fir.array<10 x 10 x i32>>,
 
 // -----
 
+func @coordinate_array_known_size_2d_get_array(%arg0: !fir.ref<!fir.array<10 x 10 x i32>>, %arg1 : index) {
+   %q = fir.coordinate_of %arg0, %arg1 : (!fir.ref<!fir.array<10 x 10 x i32>>, index) -> !fir.ref<!fir.array<10 x i32>>
+   return
+}
+// CHECK-LABEL:   llvm.func @coordinate_array_known_size_2d_get_array(
+// CHECK-SAME:    %[[VAL_0:.*]]: !llvm.ptr<array<10 x array<10 x i32>>>,
+// CHECK-SAME:    %[[VAL_1:.*]]: i64) {
+// CHECK:           %[[VAL_2:.*]] = llvm.mlir.constant(0 : i64) : i64
+// CHECK:           %[[VAL_3:.*]] = llvm.getelementptr %[[VAL_0]][%[[VAL_2]], %[[VAL_1]]] : (!llvm.ptr<array<10 x array<10 x i32>>>, i64, i64) -> !llvm.ptr<array<10 x i32>>
+// CHECK:           llvm.return
+// CHECK:         }
+
+// -----
+
 // 5.2. `fir.derived`
 func @coordinate_ref_derived(%arg0: !fir.ref<!fir.type<dervied_4{field_1:i32, field_2:i32}>>) {
   %idx = fir.field_index field_2, !fir.type<dervied_4{field_1:i32, field_2:i32}>

From 912af6b570d6f70e107e4ddf54bc85cb8b63cc70 Mon Sep 17 00:00:00 2001
From: Abinav Puthan Purayil <abinav.puthanpurayil@amd.com>
Date: Mon, 24 Jan 2022 16:24:54 +0530
Subject: [PATCH 352/946] [AMDGPU][GlobalISel] Remove the post ':' part of vreg
 operands in fsh combine tests.

---
 .../CodeGen/AMDGPU/GlobalISel/combine-fsh.mir | 50 +++++++++----------
 .../CodeGen/AMDGPU/GlobalISel/combine-rot.mir | 50 +++++++++----------
 2 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir
index ad93f1bf4d39e..a1eabb487448e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fsh.mir
@@ -20,10 +20,10 @@ body: |
     %b:_(s32) = COPY $vgpr1
     %amt:_(s32) = COPY $vgpr2
     %bw:_(s32) = G_CONSTANT i32 32
-    %shl:_(s32) = G_SHL %a:_, %amt:_(s32)
-    %sub:_(s32) = G_SUB %bw:_, %amt:_
-    %lshr:_(s32) = G_LSHR %b:_, %sub:_(s32)
-    %or:_(s32) = G_OR %shl:_, %lshr:_
+    %shl:_(s32) = G_SHL %a, %amt
+    %sub:_(s32) = G_SUB %bw, %amt
+    %lshr:_(s32) = G_LSHR %b, %sub
+    %or:_(s32) = G_OR %shl, %lshr
     $vgpr3 = COPY %or
 ...
 
@@ -46,11 +46,11 @@ body: |
     %b:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %amt:_(<2 x s32>) = COPY $vgpr4_vgpr5
     %scalar_bw:_(s32) = G_CONSTANT i32 32
-    %bw:_(<2 x s32>) = G_BUILD_VECTOR %scalar_bw(s32), %scalar_bw(s32)
-    %shl:_(<2 x s32>) = G_SHL %a:_, %amt:_(<2 x s32>)
-    %sub:_(<2 x s32>) = G_SUB %bw:_, %amt:_
-    %lshr:_(<2 x s32>) = G_LSHR %b:_, %sub:_(<2 x s32>)
-    %or:_(<2 x s32>) = G_OR %shl:_, %lshr:_
+    %bw:_(<2 x s32>) = G_BUILD_VECTOR %scalar_bw, %scalar_bw
+    %shl:_(<2 x s32>) = G_SHL %a, %amt
+    %sub:_(<2 x s32>) = G_SUB %bw, %amt
+    %lshr:_(<2 x s32>) = G_LSHR %b, %sub
+    %or:_(<2 x s32>) = G_OR %shl, %lshr
     $vgpr6_vgpr7 = COPY %or
 ...
 
@@ -73,10 +73,10 @@ body: |
     %b:_(s32) = COPY $vgpr1
     %amt:_(s32) = COPY $vgpr2
     %bw:_(s32) = G_CONSTANT i32 32
-    %shl:_(s32) = G_SHL %a:_, %amt:_(s32)
-    %sub:_(s32) = G_SUB %bw:_, %amt:_
-    %lshr:_(s32) = G_LSHR %b:_, %sub:_(s32)
-    %or:_(s32) = G_OR %lshr:_, %shl:_
+    %shl:_(s32) = G_SHL %a, %amt
+    %sub:_(s32) = G_SUB %bw, %amt
+    %lshr:_(s32) = G_LSHR %b, %sub
+    %or:_(s32) = G_OR %lshr, %shl
     $vgpr3 = COPY %or
 ...
 
@@ -99,10 +99,10 @@ body: |
     %b:_(s32) = COPY $vgpr1
     %amt:_(s32) = COPY $vgpr2
     %bw:_(s32) = G_CONSTANT i32 32
-    %lshr:_(s32) = G_LSHR %b:_, %amt:_(s32)
-    %sub:_(s32) = G_SUB %bw:_, %amt:_
-    %shl:_(s32) = G_SHL %a:_, %sub:_(s32)
-    %or:_(s32) = G_OR %shl:_, %lshr:_
+    %lshr:_(s32) = G_LSHR %b, %amt
+    %sub:_(s32) = G_SUB %bw, %amt
+    %shl:_(s32) = G_SHL %a, %sub
+    %or:_(s32) = G_OR %shl, %lshr
     $vgpr3 = COPY %or
 ...
 
@@ -182,10 +182,10 @@ body: |
     %b:_(s32) = COPY $vgpr1
     %amt:_(s32) = COPY $vgpr2
     %bw:_(s32) = G_CONSTANT i32 31
-    %shl:_(s32) = G_SHL %a:_, %amt:_(s32)
-    %sub:_(s32) = G_SUB %bw:_, %amt:_
-    %lshr:_(s32) = G_LSHR %b:_, %sub:_(s32)
-    %or:_(s32) = G_OR %shl:_, %lshr:_
+    %shl:_(s32) = G_SHL %a, %amt
+    %sub:_(s32) = G_SUB %bw, %amt
+    %lshr:_(s32) = G_LSHR %b, %sub
+    %or:_(s32) = G_OR %shl, %lshr
     $vgpr3 = COPY %or
 ...
 
@@ -214,9 +214,9 @@ body: |
     %amt:_(s32) = COPY $vgpr2
     %amt1:_(s32) = COPY $vgpr3
     %bw:_(s32) = G_CONSTANT i32 32
-    %shl:_(s32) = G_SHL %a:_, %amt:_(s32)
-    %sub:_(s32) = G_SUB %bw:_, %amt1:_
-    %lshr:_(s32) = G_LSHR %b:_, %sub:_(s32)
-    %or:_(s32) = G_OR %shl:_, %lshr:_
+    %shl:_(s32) = G_SHL %a, %amt
+    %sub:_(s32) = G_SUB %bw, %amt1
+    %lshr:_(s32) = G_LSHR %b, %sub
+    %or:_(s32) = G_OR %shl, %lshr
     $vgpr4 = COPY %or
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir
index 2649ee4bdf72a..e83bcbc293d51 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-rot.mir
@@ -18,10 +18,10 @@ body: |
     %a:_(s32) = COPY $vgpr0
     %amt:_(s32) = COPY $vgpr1
     %bw:_(s32) = G_CONSTANT i32 32
-    %shl:_(s32) = G_SHL %a:_, %amt:_(s32)
-    %sub:_(s32) = G_SUB %bw:_, %amt:_
-    %lshr:_(s32) = G_LSHR %a:_, %sub:_(s32)
-    %or:_(s32) = G_OR %shl:_, %lshr:_
+    %shl:_(s32) = G_SHL %a, %amt
+    %sub:_(s32) = G_SUB %bw, %amt
+    %lshr:_(s32) = G_LSHR %a, %sub
+    %or:_(s32) = G_OR %shl, %lshr
     $vgpr2 = COPY %or
 ...
 
@@ -42,11 +42,11 @@ body: |
     %a:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %amt:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %scalar_bw:_(s32) = G_CONSTANT i32 32
-    %bw:_(<2 x s32>) = G_BUILD_VECTOR %scalar_bw(s32), %scalar_bw(s32)
-    %shl:_(<2 x s32>) = G_SHL %a:_, %amt:_(<2 x s32>)
-    %sub:_(<2 x s32>) = G_SUB %bw:_, %amt:_
-    %lshr:_(<2 x s32>) = G_LSHR %a:_, %sub:_(<2 x s32>)
-    %or:_(<2 x s32>) = G_OR %shl:_, %lshr:_
+    %bw:_(<2 x s32>) = G_BUILD_VECTOR %scalar_bw, %scalar_bw
+    %shl:_(<2 x s32>) = G_SHL %a, %amt
+    %sub:_(<2 x s32>) = G_SUB %bw, %amt
+    %lshr:_(<2 x s32>) = G_LSHR %a, %sub
+    %or:_(<2 x s32>) = G_OR %shl, %lshr
     $vgpr4_vgpr5 = COPY %or
 ...
 
@@ -67,10 +67,10 @@ body: |
     %a:_(s32) = COPY $vgpr0
     %amt:_(s32) = COPY $vgpr1
     %bw:_(s32) = G_CONSTANT i32 32
-    %shl:_(s32) = G_SHL %a:_, %amt:_(s32)
-    %sub:_(s32) = G_SUB %bw:_, %amt:_
-    %lshr:_(s32) = G_LSHR %a:_, %sub:_(s32)
-    %or:_(s32) = G_OR %lshr:_, %shl:_
+    %shl:_(s32) = G_SHL %a, %amt
+    %sub:_(s32) = G_SUB %bw, %amt
+    %lshr:_(s32) = G_LSHR %a, %sub
+    %or:_(s32) = G_OR %lshr, %shl
     $vgpr2 = COPY %or
 ...
 
@@ -91,10 +91,10 @@ body: |
     %a:_(s32) = COPY $vgpr0
     %amt:_(s32) = COPY $vgpr1
     %bw:_(s32) = G_CONSTANT i32 32
-    %lshr:_(s32) = G_LSHR %a:_, %amt:_(s32)
-    %sub:_(s32) = G_SUB %bw:_, %amt:_
-    %shl:_(s32) = G_SHL %a:_, %sub:_(s32)
-    %or:_(s32) = G_OR %shl:_, %lshr:_
+    %lshr:_(s32) = G_LSHR %a, %amt
+    %sub:_(s32) = G_SUB %bw, %amt
+    %shl:_(s32) = G_SHL %a, %sub
+    %or:_(s32) = G_OR %shl, %lshr
     $vgpr2 = COPY %or
 ...
 
@@ -169,10 +169,10 @@ body: |
     %a:_(s32) = COPY $vgpr0
     %amt:_(s32) = COPY $vgpr1
     %bw:_(s32) = G_CONSTANT i32 31
-    %shl:_(s32) = G_SHL %a:_, %amt:_(s32)
-    %sub:_(s32) = G_SUB %bw:_, %amt:_
-    %lshr:_(s32) = G_LSHR %a:_, %sub:_(s32)
-    %or:_(s32) = G_OR %shl:_, %lshr:_
+    %shl:_(s32) = G_SHL %a, %amt
+    %sub:_(s32) = G_SUB %bw, %amt
+    %lshr:_(s32) = G_LSHR %a, %sub
+    %or:_(s32) = G_OR %shl, %lshr
     $vgpr2 = COPY %or
 ...
 
@@ -199,9 +199,9 @@ body: |
     %amt:_(s32) = COPY $vgpr1
     %amt1:_(s32) = COPY $vgpr2
     %bw:_(s32) = G_CONSTANT i32 32
-    %shl:_(s32) = G_SHL %a:_, %amt:_(s32)
-    %sub:_(s32) = G_SUB %bw:_, %amt1:_
-    %lshr:_(s32) = G_LSHR %a:_, %sub:_(s32)
-    %or:_(s32) = G_OR %shl:_, %lshr:_
+    %shl:_(s32) = G_SHL %a, %amt
+    %sub:_(s32) = G_SUB %bw, %amt1
+    %lshr:_(s32) = G_LSHR %a, %sub
+    %or:_(s32) = G_OR %shl, %lshr
     $vgpr3 = COPY %or
 ...

From 577a6dc9a1864fcc0e938052beae2a9b5362367a Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 24 Jan 2022 11:08:13 +0000
Subject: [PATCH 353/946] [X86] getVectorMaskingNode - fix indentation. NFC.

clang-format
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 618b97a2e8dbd..10fdcc3d6ca81 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -25785,9 +25785,9 @@ static SDValue getMaskNode(SDValue Mask, MVT MaskVT,
 /// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the
 /// necessary casting or extending for \p Mask when lowering masking intrinsics
 static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
-                  SDValue PreservedSrc,
-                  const X86Subtarget &Subtarget,
-                  SelectionDAG &DAG) {
+                                    SDValue PreservedSrc,
+                                    const X86Subtarget &Subtarget,
+                                    SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
   MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
   unsigned OpcodeSelect = ISD::VSELECT;

From e7926e8d972e8129d4b64e10a38719b066a526be Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser@codeplay.com>
Date: Thu, 20 Jan 2022 14:46:28 +0000
Subject: [PATCH 354/946] [RISCV] Match VF variants for masked VFRDIV/VFRSUB

This patch follows up on D117697 to help the simple binary operations
behave similarly in the presence of masks.

It also enables CGP sinking support for vp.fdiv and vp.fsub intrinsics,
now that VFRDIV and VFRSUB are consistently matched with a LHS splat for
masked and unmasked variants.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117783
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  8 ++--
 .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 11 ++++-
 .../RISCV/rvv/fixed-vectors-vfrdiv-vp.ll      | 48 +++++--------------
 .../RISCV/rvv/fixed-vectors-vfrsub-vp.ll      | 48 +++++--------------
 .../CodeGen/RISCV/rvv/sink-splat-operands.ll  | 22 ++++-----
 5 files changed, 46 insertions(+), 91 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 411191343cf04..5073fe66c15a9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1272,9 +1272,7 @@ bool RISCVTargetLowering::shouldSinkOperands(
         case Intrinsic::vp_or:
         case Intrinsic::vp_xor:
         case Intrinsic::vp_fadd:
-        case Intrinsic::vp_fsub:
         case Intrinsic::vp_fmul:
-        case Intrinsic::vp_fdiv:
         case Intrinsic::vp_shl:
         case Intrinsic::vp_lshr:
         case Intrinsic::vp_ashr:
@@ -1283,9 +1281,11 @@ bool RISCVTargetLowering::shouldSinkOperands(
         case Intrinsic::vp_urem:
         case Intrinsic::vp_srem:
           return Operand == 1;
-        // ... the one exception is vp.sub which has explicit patterns for both
-        // LHS and RHS (as vrsub).
+        // ... with the exception of vp.sub/vp.fsub/vp.fdiv, which have
+        // explicit patterns for both LHS and RHS (as 'vr' versions).
         case Intrinsic::vp_sub:
+        case Intrinsic::vp_fsub:
+        case Intrinsic::vp_fdiv:
           return Operand == 0 || Operand == 1;
         default:
           return false;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 7b556174bbf0c..9e47cb80349bf 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -404,7 +404,7 @@ multiclass VPatBinaryFPVL_VV_VF<SDNode vop, string instruction_name> {
 }
 
 multiclass VPatBinaryFPVL_R_VF<SDNode vop, string instruction_name> {
-  foreach fvti = AllFloatVectors in
+  foreach fvti = AllFloatVectors in {
     def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2),
                                 fvti.RegClass:$rs1,
                                 (fvti.Mask true_mask),
@@ -412,6 +412,15 @@ multiclass VPatBinaryFPVL_R_VF<SDNode vop, string instruction_name> {
               (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)
                            fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
                            GPR:$vl, fvti.Log2SEW)>;
+    def : Pat<(fvti.Vector (vop (SplatFPOp fvti.ScalarRegClass:$rs2),
+                                fvti.RegClass:$rs1,
+                                (fvti.Mask V0),
+                                VLOpFrag)),
+              (!cast<Instruction>(instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_MASK")
+                           (fvti.Vector (IMPLICIT_DEF)),
+                           fvti.RegClass:$rs1, fvti.ScalarRegClass:$rs2,
+                           (fvti.Mask V0), GPR:$vl, fvti.Log2SEW, TAIL_AGNOSTIC)>;
+  }
 }
 
 multiclass VPatIntegerSetCCVL_VV<VTypeInfo vti, string instruction_name,
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll
index c7a4364b8d497..b902ff1a03d56 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrdiv-vp.ll
@@ -9,10 +9,8 @@ declare <2 x half> @llvm.vp.fdiv.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
 define <2 x half> @vfrdiv_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrdiv_vf_v2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <2 x half> undef, half %b, i32 0
   %vb = shufflevector <2 x half> %elt.head, <2 x half> undef, <2 x i32> zeroinitializer
@@ -39,10 +37,8 @@ declare <4 x half> @llvm.vp.fdiv.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32)
 define <4 x half> @vfrdiv_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrdiv_vf_v4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <4 x half> undef, half %b, i32 0
   %vb = shufflevector <4 x half> %elt.head, <4 x half> undef, <4 x i32> zeroinitializer
@@ -69,10 +65,8 @@ declare <8 x half> @llvm.vp.fdiv.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32)
 define <8 x half> @vfrdiv_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrdiv_vf_v8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x half> undef, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> undef, <8 x i32> zeroinitializer
@@ -99,10 +93,8 @@ declare <16 x half> @llvm.vp.fdiv.v16f16(<16 x half>, <16 x half>, <16 x i1>, i3
 define <16 x half> @vfrdiv_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrdiv_vf_v16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <16 x half> undef, half %b, i32 0
   %vb = shufflevector <16 x half> %elt.head, <16 x half> undef, <16 x i32> zeroinitializer
@@ -129,10 +121,8 @@ declare <2 x float> @llvm.vp.fdiv.v2f32(<2 x float>, <2 x float>, <2 x i1>, i32)
 define <2 x float> @vfrdiv_vf_v2f32(<2 x float> %va, float %b, <2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrdiv_vf_v2f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <2 x float> undef, float %b, i32 0
   %vb = shufflevector <2 x float> %elt.head, <2 x float> undef, <2 x i32> zeroinitializer
@@ -159,10 +149,8 @@ declare <4 x float> @llvm.vp.fdiv.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
 define <4 x float> @vfrdiv_vf_v4f32(<4 x float> %va, float %b, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrdiv_vf_v4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <4 x float> undef, float %b, i32 0
   %vb = shufflevector <4 x float> %elt.head, <4 x float> undef, <4 x i32> zeroinitializer
@@ -189,10 +177,8 @@ declare <8 x float> @llvm.vp.fdiv.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32)
 define <8 x float> @vfrdiv_vf_v8f32(<8 x float> %va, float %b, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrdiv_vf_v8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x float> undef, float %b, i32 0
   %vb = shufflevector <8 x float> %elt.head, <8 x float> undef, <8 x i32> zeroinitializer
@@ -219,10 +205,8 @@ declare <16 x float> @llvm.vp.fdiv.v16f32(<16 x float>, <16 x float>, <16 x i1>,
 define <16 x float> @vfrdiv_vf_v16f32(<16 x float> %va, float %b, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrdiv_vf_v16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vfmv.v.f v12, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <16 x float> undef, float %b, i32 0
   %vb = shufflevector <16 x float> %elt.head, <16 x float> undef, <16 x i32> zeroinitializer
@@ -249,10 +233,8 @@ declare <2 x double> @llvm.vp.fdiv.v2f64(<2 x double>, <2 x double>, <2 x i1>, i
 define <2 x double> @vfrdiv_vf_v2f64(<2 x double> %va, double %b, <2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrdiv_vf_v2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <2 x double> undef, double %b, i32 0
   %vb = shufflevector <2 x double> %elt.head, <2 x double> undef, <2 x i32> zeroinitializer
@@ -279,10 +261,8 @@ declare <4 x double> @llvm.vp.fdiv.v4f64(<4 x double>, <4 x double>, <4 x i1>, i
 define <4 x double> @vfrdiv_vf_v4f64(<4 x double> %va, double %b, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrdiv_vf_v4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <4 x double> undef, double %b, i32 0
   %vb = shufflevector <4 x double> %elt.head, <4 x double> undef, <4 x i32> zeroinitializer
@@ -309,10 +289,8 @@ declare <8 x double> @llvm.vp.fdiv.v8f64(<8 x double>, <8 x double>, <8 x i1>, i
 define <8 x double> @vfrdiv_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrdiv_vf_v8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; CHECK-NEXT:    vfmv.v.f v12, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> undef, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> undef, <8 x i32> zeroinitializer
@@ -339,10 +317,8 @@ declare <16 x double> @llvm.vp.fdiv.v16f64(<16 x double>, <16 x double>, <16 x i
 define <16 x double> @vfrdiv_vf_v16f64(<16 x double> %va, double %b, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrdiv_vf_v16f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
-; CHECK-NEXT:    vfmv.v.f v16, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfdiv.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <16 x double> undef, double %b, i32 0
   %vb = shufflevector <16 x double> %elt.head, <16 x double> undef, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll
index bdf7c6601d8d8..dfb436437e765 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfrsub-vp.ll
@@ -9,10 +9,8 @@ declare <2 x half> @llvm.vp.fsub.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
 define <2 x half> @vfrsub_vf_v2f16(<2 x half> %va, half %b, <2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrsub_vf_v2f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <2 x half> undef, half %b, i32 0
   %vb = shufflevector <2 x half> %elt.head, <2 x half> undef, <2 x i32> zeroinitializer
@@ -39,10 +37,8 @@ declare <4 x half> @llvm.vp.fsub.v4f16(<4 x half>, <4 x half>, <4 x i1>, i32)
 define <4 x half> @vfrsub_vf_v4f16(<4 x half> %va, half %b, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrsub_vf_v4f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <4 x half> undef, half %b, i32 0
   %vb = shufflevector <4 x half> %elt.head, <4 x half> undef, <4 x i32> zeroinitializer
@@ -69,10 +65,8 @@ declare <8 x half> @llvm.vp.fsub.v8f16(<8 x half>, <8 x half>, <8 x i1>, i32)
 define <8 x half> @vfrsub_vf_v8f16(<8 x half> %va, half %b, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrsub_vf_v8f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x half> undef, half %b, i32 0
   %vb = shufflevector <8 x half> %elt.head, <8 x half> undef, <8 x i32> zeroinitializer
@@ -99,10 +93,8 @@ declare <16 x half> @llvm.vp.fsub.v16f16(<16 x half>, <16 x half>, <16 x i1>, i3
 define <16 x half> @vfrsub_vf_v16f16(<16 x half> %va, half %b, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrsub_vf_v16f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e16, m2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <16 x half> undef, half %b, i32 0
   %vb = shufflevector <16 x half> %elt.head, <16 x half> undef, <16 x i32> zeroinitializer
@@ -129,10 +121,8 @@ declare <2 x float> @llvm.vp.fsub.v2f32(<2 x float>, <2 x float>, <2 x i1>, i32)
 define <2 x float> @vfrsub_vf_v2f32(<2 x float> %va, float %b, <2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrsub_vf_v2f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <2 x float> undef, float %b, i32 0
   %vb = shufflevector <2 x float> %elt.head, <2 x float> undef, <2 x i32> zeroinitializer
@@ -159,10 +149,8 @@ declare <4 x float> @llvm.vp.fsub.v4f32(<4 x float>, <4 x float>, <4 x i1>, i32)
 define <4 x float> @vfrsub_vf_v4f32(<4 x float> %va, float %b, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrsub_vf_v4f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <4 x float> undef, float %b, i32 0
   %vb = shufflevector <4 x float> %elt.head, <4 x float> undef, <4 x i32> zeroinitializer
@@ -189,10 +177,8 @@ declare <8 x float> @llvm.vp.fsub.v8f32(<8 x float>, <8 x float>, <8 x i1>, i32)
 define <8 x float> @vfrsub_vf_v8f32(<8 x float> %va, float %b, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrsub_vf_v8f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x float> undef, float %b, i32 0
   %vb = shufflevector <8 x float> %elt.head, <8 x float> undef, <8 x i32> zeroinitializer
@@ -219,10 +205,8 @@ declare <16 x float> @llvm.vp.fsub.v16f32(<16 x float>, <16 x float>, <16 x i1>,
 define <16 x float> @vfrsub_vf_v16f32(<16 x float> %va, float %b, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrsub_vf_v16f32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; CHECK-NEXT:    vfmv.v.f v12, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m4, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <16 x float> undef, float %b, i32 0
   %vb = shufflevector <16 x float> %elt.head, <16 x float> undef, <16 x i32> zeroinitializer
@@ -249,10 +233,8 @@ declare <2 x double> @llvm.vp.fsub.v2f64(<2 x double>, <2 x double>, <2 x i1>, i
 define <2 x double> @vfrsub_vf_v2f64(<2 x double> %va, double %b, <2 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrsub_vf_v2f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
-; CHECK-NEXT:    vfmv.v.f v9, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v9, v8, v0.t
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <2 x double> undef, double %b, i32 0
   %vb = shufflevector <2 x double> %elt.head, <2 x double> undef, <2 x i32> zeroinitializer
@@ -279,10 +261,8 @@ declare <4 x double> @llvm.vp.fsub.v4f64(<4 x double>, <4 x double>, <4 x i1>, i
 define <4 x double> @vfrsub_vf_v4f64(<4 x double> %va, double %b, <4 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrsub_vf_v4f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
-; CHECK-NEXT:    vfmv.v.f v10, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m2, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v10, v8, v0.t
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <4 x double> undef, double %b, i32 0
   %vb = shufflevector <4 x double> %elt.head, <4 x double> undef, <4 x i32> zeroinitializer
@@ -309,10 +289,8 @@ declare <8 x double> @llvm.vp.fsub.v8f64(<8 x double>, <8 x double>, <8 x i1>, i
 define <8 x double> @vfrsub_vf_v8f64(<8 x double> %va, double %b, <8 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrsub_vf_v8f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; CHECK-NEXT:    vfmv.v.f v12, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m4, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v12, v8, v0.t
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <8 x double> undef, double %b, i32 0
   %vb = shufflevector <8 x double> %elt.head, <8 x double> undef, <8 x i32> zeroinitializer
@@ -339,10 +317,8 @@ declare <16 x double> @llvm.vp.fsub.v16f64(<16 x double>, <16 x double>, <16 x i
 define <16 x double> @vfrsub_vf_v16f64(<16 x double> %va, double %b, <16 x i1> %m, i32 zeroext %evl) {
 ; CHECK-LABEL: vfrsub_vf_v16f64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
-; CHECK-NEXT:    vfmv.v.f v16, fa0
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
-; CHECK-NEXT:    vfsub.vv v8, v16, v8, v0.t
+; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
 ; CHECK-NEXT:    ret
   %elt.head = insertelement <16 x double> undef, double %b, i32 0
   %vb = shufflevector <16 x double> %elt.head, <16 x double> undef, <16 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
index e902e1cd577a8..5bfa79e02e437 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll
@@ -3364,22 +3364,19 @@ for.cond.cleanup:                                 ; preds = %vector.body
   ret void
 }
 
-; FIXME: vfrdiv.vf doesn't match against masked instructions
-
 define void @sink_splat_vp_frdiv(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: sink_splat_vp_frdiv:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, ft0
 ; CHECK-NEXT:    li a1, 1024
 ; CHECK-NEXT:  .LBB56_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
-; CHECK-NEXT:    vfdiv.vv v9, v8, v9, v0.t
+; CHECK-NEXT:    vfrdiv.vf v8, v8, ft0, v0.t
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vse32.v v9, (a0)
+; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    addi a1, a1, -4
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    bnez a1, .LBB56_1
@@ -3490,22 +3487,19 @@ for.cond.cleanup:                                 ; preds = %vector.body
 
 declare <4 x float> @llvm.vp.frsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
 
-; FIXME: vfrsub.vf doesn't match against masked instructions
-
 define void @sink_splat_vp_frsub(float* nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: sink_splat_vp_frsub:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    fmv.w.x ft0, a1
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vfmv.v.f v8, ft0
 ; CHECK-NEXT:    li a1, 1024
 ; CHECK-NEXT:  .LBB59_1: # %vector.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, mu
-; CHECK-NEXT:    vfsub.vv v9, v8, v9, v0.t
+; CHECK-NEXT:    vfrsub.vf v8, v8, ft0, v0.t
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; CHECK-NEXT:    vse32.v v9, (a0)
+; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    addi a1, a1, -4
 ; CHECK-NEXT:    addi a0, a0, 16
 ; CHECK-NEXT:    bnez a1, .LBB59_1

From af773a18181dc1a1e3846f518b2d44f2abbbdf87 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser@codeplay.com>
Date: Tue, 18 Jan 2022 14:13:13 +0000
Subject: [PATCH 355/946] [RISCV][VP] Lower VP_MERGE to RVV instructions

This patch adds lowering of the llvm.vp.merge.* intrinsic
(ISD::VP_MERGE) to RVV vmerge/vfmerge instructions. It introduces a
special pseudo form of vmerge which allows a tied merge operand,
allowing us to specify the tail elements as being equal to the "on
false" operand, using a tied-def constraint and a "tail undisturbed"
policy.

While this strategy allows us to often lower the intrinsic to just one
instruction, it may be less efficient in fixed-vector types as the
number of tail elements may extend far beyond the length of the fixed
vector. Another strategy could be to use a vmerge/vfmerge instruction
with an AVL equal to the length of the vector type, and manipulate the
condition operand such that mask elements greater than the operation's
EVL are false.

I've also observed inefficient codegen in which our 'VF' patterns don't
match raw floating-point SPLAT_VECTORs, which occur in scalable-vector
code.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117561
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |    8 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.h     |    4 +
 .../Target/RISCV/RISCVInstrInfoVPseudos.td    |   75 +-
 .../Target/RISCV/RISCVInstrInfoVVLPatterns.td |   64 +-
 .../RISCV/rvv/fixed-vectors-vpmerge.ll        |  953 ++++++++++++
 llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 1280 +++++++++++++++++
 6 files changed, 2372 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 5073fe66c15a9..7d224e3968545 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -521,12 +521,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         ISD::VP_SHL,         ISD::VP_REDUCE_ADD,  ISD::VP_REDUCE_AND,
         ISD::VP_REDUCE_OR,   ISD::VP_REDUCE_XOR,  ISD::VP_REDUCE_SMAX,
         ISD::VP_REDUCE_SMIN, ISD::VP_REDUCE_UMAX, ISD::VP_REDUCE_UMIN,
-        ISD::VP_SELECT};
+        ISD::VP_MERGE,       ISD::VP_SELECT};
 
     static const unsigned FloatingPointVPOps[] = {
         ISD::VP_FADD,        ISD::VP_FSUB,        ISD::VP_FMUL,
         ISD::VP_FDIV,        ISD::VP_REDUCE_FADD, ISD::VP_REDUCE_SEQ_FADD,
-        ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_SELECT};
+        ISD::VP_REDUCE_FMIN, ISD::VP_REDUCE_FMAX, ISD::VP_MERGE,
+        ISD::VP_SELECT};
 
     if (!Subtarget.is64Bit()) {
       // We must custom-lower certain vXi64 operations on RV32 due to the vector
@@ -3441,6 +3442,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
     return lowerSET_ROUNDING(Op, DAG);
   case ISD::VP_SELECT:
     return lowerVPOp(Op, DAG, RISCVISD::VSELECT_VL);
+  case ISD::VP_MERGE:
+    return lowerVPOp(Op, DAG, RISCVISD::VP_MERGE_VL);
   case ISD::VP_ADD:
     return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
   case ISD::VP_SUB:
@@ -10087,6 +10090,7 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(VWADDU_VL)
   NODE_NAME_CASE(SETCC_VL)
   NODE_NAME_CASE(VSELECT_VL)
+  NODE_NAME_CASE(VP_MERGE_VL)
   NODE_NAME_CASE(VMAND_VL)
   NODE_NAME_CASE(VMOR_VL)
   NODE_NAME_CASE(VMXOR_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 23857f93e0159..58b7ec89f8758 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -253,6 +253,10 @@ enum NodeType : unsigned {
 
   // Vector select with an additional VL operand. This operation is unmasked.
   VSELECT_VL,
+  // Vector select with operand #2 (the value when the condition is false) tied
+  // to the destination and an additional VL operand. This operation is
+  // unmasked.
+  VP_MERGE_VL,
 
   // Mask binary operators.
   VMAND_VL,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index a4e92c80ff140..798f848a50b7a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -579,10 +579,11 @@ class PseudoToVInst<string PseudoInst> {
                  !subst("_B64", "",
                  !subst("_MASK", "",
                  !subst("_TIED", "",
+                 !subst("_TU", "",
                  !subst("F16", "F",
                  !subst("F32", "F",
                  !subst("F64", "F",
-                 !subst("Pseudo", "", PseudoInst))))))))))))))))))));
+                 !subst("Pseudo", "", PseudoInst)))))))))))))))))))));
 }
 
 // The destination vector register group for a masked vector instruction cannot
@@ -928,6 +929,9 @@ class VPseudoBinaryNoMask<VReg RetClass,
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
+// Special version of VPseudoBinaryNoMask where we pretend the first source is
+// tied to the destination.
+// This allows maskedoff and rs2 to be the same register.
 class VPseudoTiedBinaryNoMask<VReg RetClass,
                               DAGOperand Op2Class,
                               string Constraint> :
@@ -1079,6 +1083,30 @@ class VPseudoBinaryCarryIn<VReg RetClass,
   let VLMul = MInfo.value;
 }
 
+class VPseudoTiedBinaryCarryIn<VReg RetClass,
+                               VReg Op1Class,
+                               DAGOperand Op2Class,
+                               LMULInfo MInfo,
+                               bit CarryIn,
+                               string Constraint> :
+        Pseudo<(outs RetClass:$rd),
+               !if(CarryIn,
+                  (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, VMV0:$carry, AVL:$vl,
+                       ixlenimm:$sew),
+                  (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew)), []>,
+        RISCVVPseudo {
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let Constraints = Join<[Constraint, "$rd = $merge"], ",">.ret;
+  let HasVLOp = 1;
+  let HasSEWOp = 1;
+  let HasMergeOp = 1;
+  let HasVecPolicyOp = 0;
+  let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+  let VLMul = MInfo.value;
+}
+
 class VPseudoTernaryNoMask<VReg RetClass,
                            RegisterClass Op1Class,
                            DAGOperand Op2Class,
@@ -1741,6 +1769,16 @@ multiclass VPseudoBinaryV_VM<bit CarryOut = 0, bit CarryIn = 1,
                            m.vrclass, m.vrclass, m, CarryIn, Constraint>;
 }
 
+multiclass VPseudoTiedBinaryV_VM<bit CarryOut = 0, bit CarryIn = 1,
+                                 string Constraint = ""> {
+  foreach m = MxList in
+    def "_VV" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU" :
+      VPseudoTiedBinaryCarryIn<!if(CarryOut, VR,
+                               !if(!and(CarryIn, !not(CarryOut)),
+                                   GetVRegNoV0<m.vrclass>.R, m.vrclass)),
+                               m.vrclass, m.vrclass, m, CarryIn, Constraint>;
+}
+
 multiclass VPseudoBinaryV_XM<bit CarryOut = 0, bit CarryIn = 1,
                              string Constraint = ""> {
   foreach m = MxList in
@@ -1751,13 +1789,29 @@ multiclass VPseudoBinaryV_XM<bit CarryOut = 0, bit CarryIn = 1,
                            m.vrclass, GPR, m, CarryIn, Constraint>;
 }
 
+multiclass VPseudoTiedBinaryV_XM<bit CarryOut = 0, bit CarryIn = 1,
+                                 string Constraint = ""> {
+  foreach m = MxList in
+    def "_VX" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU":
+      VPseudoTiedBinaryCarryIn<!if(CarryOut, VR,
+                               !if(!and(CarryIn, !not(CarryOut)),
+                                   GetVRegNoV0<m.vrclass>.R, m.vrclass)),
+                               m.vrclass, GPR, m, CarryIn, Constraint>;
+}
+
 multiclass VPseudoVMRG_FM {
   foreach f = FPList in
-    foreach m = f.MxList in
+    foreach m = f.MxList in {
       def "_V" # f.FX # "M_" # m.MX :
         VPseudoBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
                              m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">,
         Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>;
+      // Tied version to allow codegen control over the tail elements
+      def "_V" # f.FX # "M_" # m.MX # "_TU":
+        VPseudoTiedBinaryCarryIn<GetVRegNoV0<m.vrclass>.R,
+                                 m.vrclass, f.fprclass, m, /*CarryIn=*/1, "">,
+        Sched<[WriteVFMergeV, ReadVFMergeV, ReadVFMergeF, ReadVMask]>;
+    }
 }
 
 multiclass VPseudoBinaryV_IM<bit CarryOut = 0, bit CarryIn = 1,
@@ -1770,6 +1824,16 @@ multiclass VPseudoBinaryV_IM<bit CarryOut = 0, bit CarryIn = 1,
                            m.vrclass, simm5, m, CarryIn, Constraint>;
 }
 
+multiclass VPseudoTiedBinaryV_IM<bit CarryOut = 0, bit CarryIn = 1,
+                                 string Constraint = ""> {
+  foreach m = MxList in
+    def "_VI" # !if(CarryIn, "M", "") # "_" # m.MX # "_TU":
+      VPseudoTiedBinaryCarryIn<!if(CarryOut, VR,
+                               !if(!and(CarryIn, !not(CarryOut)),
+                                   GetVRegNoV0<m.vrclass>.R, m.vrclass)),
+                               m.vrclass, simm5, m, CarryIn, Constraint>;
+}
+
 multiclass VPseudoUnaryVMV_V_X_I {
   foreach m = MxList in {
     let VLMul = m.value in {
@@ -2104,6 +2168,13 @@ multiclass VPseudoVMRG_VM_XM_IM {
             Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>;
   defm "" : VPseudoBinaryV_IM,
             Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>;
+  // Tied versions to allow codegen control over the tail elements
+  defm "" : VPseudoTiedBinaryV_VM,
+            Sched<[WriteVIMergeV, ReadVIMergeV, ReadVIMergeV, ReadVMask]>;
+  defm "" : VPseudoTiedBinaryV_XM,
+            Sched<[WriteVIMergeX, ReadVIMergeV, ReadVIMergeX, ReadVMask]>;
+  defm "" : VPseudoTiedBinaryV_IM,
+            Sched<[WriteVIMergeI, ReadVIMergeV, ReadVMask]>;
 }
 
 multiclass VPseudoVCALU_VM_XM_IM {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 9e47cb80349bf..5cff16c32fe78 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -177,14 +177,13 @@ def riscv_vrgatherei16_vv_vl : SDNode<"RISCVISD::VRGATHEREI16_VV_VL",
                                                            SDTCisSameNumEltsAs<0, 3>,
                                                            SDTCisVT<4, XLenVT>]>>;
 
-def riscv_vselect_vl : SDNode<"RISCVISD::VSELECT_VL",
-                              SDTypeProfile<1, 4, [SDTCisVec<0>,
-                                                   SDTCisVec<1>,
-                                                   SDTCisSameNumEltsAs<0, 1>,
-                                                   SDTCVecEltisVT<1, i1>,
-                                                   SDTCisSameAs<0, 2>,
-                                                   SDTCisSameAs<2, 3>,
-                                                   SDTCisVT<4, XLenVT>]>>;
+def SDT_RISCVSelect_VL  : SDTypeProfile<1, 4, [
+  SDTCisVec<0>, SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, SDTCVecEltisVT<1, i1>,
+  SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisVT<4, XLenVT>
+]>;
+
+def riscv_vselect_vl  : SDNode<"RISCVISD::VSELECT_VL", SDT_RISCVSelect_VL>;
+def riscv_vp_merge_vl : SDNode<"RISCVISD::VP_MERGE_VL", SDT_RISCVSelect_VL>;
 
 def SDT_RISCVMaskBinOp_VL : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
                                                  SDTCisSameAs<0, 2>,
@@ -976,6 +975,30 @@ foreach vti = AllIntegerVectors in {
                                           VLOpFrag)),
             (!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX)
                  vti.RegClass:$rs2, simm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+
+  def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
+                                           vti.RegClass:$rs1,
+                                           vti.RegClass:$rs2,
+                                           VLOpFrag)),
+            (!cast<Instruction>("PseudoVMERGE_VVM_"#vti.LMul.MX#"_TU")
+                 vti.RegClass:$rs2, vti.RegClass:$rs2, vti.RegClass:$rs1,
+                 (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+
+  def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
+                                           (SplatPat XLenVT:$rs1),
+                                           vti.RegClass:$rs2,
+                                           VLOpFrag)),
+            (!cast<Instruction>("PseudoVMERGE_VXM_"#vti.LMul.MX#"_TU")
+                 vti.RegClass:$rs2, vti.RegClass:$rs2, GPR:$rs1,
+                 (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
+
+  def : Pat<(vti.Vector (riscv_vp_merge_vl (vti.Mask V0),
+                                           (SplatPat_simm5 simm5:$rs1),
+                                           vti.RegClass:$rs2,
+                                           VLOpFrag)),
+            (!cast<Instruction>("PseudoVMERGE_VIM_"#vti.LMul.MX#"_TU")
+                 vti.RegClass:$rs2, vti.RegClass:$rs2, simm5:$rs1,
+                 (vti.Mask V0), GPR:$vl, vti.Log2SEW)>;
 }
 
 // 12.16. Vector Integer Move Instructions
@@ -1223,6 +1246,31 @@ foreach fvti = AllFloatVectors in {
             (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX)
                  fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
 
+  def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
+                                            fvti.RegClass:$rs1,
+                                            fvti.RegClass:$rs2,
+                                            VLOpFrag)),
+            (!cast<Instruction>("PseudoVMERGE_VVM_"#fvti.LMul.MX#"_TU")
+                 fvti.RegClass:$rs2, fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0),
+                 GPR:$vl, fvti.Log2SEW)>;
+
+  def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
+                                            (SplatFPOp fvti.ScalarRegClass:$rs1),
+                                            fvti.RegClass:$rs2,
+                                            VLOpFrag)),
+            (!cast<Instruction>("PseudoVFMERGE_V"#fvti.ScalarSuffix#"M_"#fvti.LMul.MX#"_TU")
+                 fvti.RegClass:$rs2, fvti.RegClass:$rs2,
+                 (fvti.Scalar fvti.ScalarRegClass:$rs1),
+                 (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>;
+
+  def : Pat<(fvti.Vector (riscv_vp_merge_vl (fvti.Mask V0),
+                                            (SplatFPOp (fvti.Scalar fpimm0)),
+                                            fvti.RegClass:$rs2,
+                                            VLOpFrag)),
+            (!cast<Instruction>("PseudoVMERGE_VIM_"#fvti.LMul.MX#"_TU")
+                 fvti.RegClass:$rs2, fvti.RegClass:$rs2, 0, (fvti.Mask V0),
+                 GPR:$vl, fvti.Log2SEW)>;
+
   // 14.16. Vector Floating-Point Move Instruction
   // If we're splatting fpimm0, use vmv.v.x vd, x0.
   def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
new file mode 100644
index 0000000000000..e0cc13db7f118
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -0,0 +1,953 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <2 x i8> @llvm.vp.merge.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32)
+
+define <2 x i8> @vpmerge_vv_v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vpmerge_vx_v2i8(i8 %a, <2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl)
+  ret <2 x i8> %v
+}
+
+define <2 x i8> @vpmerge_vi_v2i8(<2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i8> poison, i8 2, i32 0
+  %va = shufflevector <2 x i8> %elt.head, <2 x i8> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i8> @llvm.vp.merge.v2i8(<2 x i1> %m, <2 x i8> %va, <2 x i8> %vb, i32 %evl)
+  ret <2 x i8> %v
+}
+
+declare <4 x i8> @llvm.vp.merge.v4i8(<4 x i1>, <4 x i8>, <4 x i8>, i32)
+
+define <4 x i8> @vpmerge_vv_v4i8(<4 x i8> %va, <4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vpmerge_vx_v4i8(i8 %a, <4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl)
+  ret <4 x i8> %v
+}
+
+define <4 x i8> @vpmerge_vi_v4i8(<4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i8> poison, i8 2, i32 0
+  %va = shufflevector <4 x i8> %elt.head, <4 x i8> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i8> @llvm.vp.merge.v4i8(<4 x i1> %m, <4 x i8> %va, <4 x i8> %vb, i32 %evl)
+  ret <4 x i8> %v
+}
+
+declare <8 x i8> @llvm.vp.merge.v8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32)
+
+define <8 x i8> @vpmerge_vv_v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vpmerge_vx_v8i8(i8 %a, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl)
+  ret <8 x i8> %v
+}
+
+define <8 x i8> @vpmerge_vi_v8i8(<8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i8> poison, i8 2, i32 0
+  %va = shufflevector <8 x i8> %elt.head, <8 x i8> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i8> @llvm.vp.merge.v8i8(<8 x i1> %m, <8 x i8> %va, <8 x i8> %vb, i32 %evl)
+  ret <8 x i8> %v
+}
+
+declare <16 x i8> @llvm.vp.merge.v16i8(<16 x i1>, <16 x i8>, <16 x i8>, i32)
+
+define <16 x i8> @vpmerge_vv_v16i8(<16 x i8> %va, <16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vpmerge_vx_v16i8(i8 %a, <16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl)
+  ret <16 x i8> %v
+}
+
+define <16 x i8> @vpmerge_vi_v16i8(<16 x i8> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i8> poison, i8 2, i32 0
+  %va = shufflevector <16 x i8> %elt.head, <16 x i8> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i8> @llvm.vp.merge.v16i8(<16 x i1> %m, <16 x i8> %va, <16 x i8> %vb, i32 %evl)
+  ret <16 x i8> %v
+}
+
+declare <2 x i16> @llvm.vp.merge.v2i16(<2 x i1>, <2 x i16>, <2 x i16>, i32)
+
+define <2 x i16> @vpmerge_vv_v2i16(<2 x i16> %va, <2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vpmerge_vx_v2i16(i16 %a, <2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 %a, i32 0
+  %va = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl)
+  ret <2 x i16> %v
+}
+
+define <2 x i16> @vpmerge_vi_v2i16(<2 x i16> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i16> poison, i16 2, i32 0
+  %va = shufflevector <2 x i16> %elt.head, <2 x i16> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i16> @llvm.vp.merge.v2i16(<2 x i1> %m, <2 x i16> %va, <2 x i16> %vb, i32 %evl)
+  ret <2 x i16> %v
+}
+
+declare <4 x i16> @llvm.vp.merge.v4i16(<4 x i1>, <4 x i16>, <4 x i16>, i32)
+
+define <4 x i16> @vpmerge_vv_v4i16(<4 x i16> %va, <4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vpmerge_vx_v4i16(i16 %a, <4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 %a, i32 0
+  %va = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl)
+  ret <4 x i16> %v
+}
+
+define <4 x i16> @vpmerge_vi_v4i16(<4 x i16> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i16> poison, i16 2, i32 0
+  %va = shufflevector <4 x i16> %elt.head, <4 x i16> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i16> @llvm.vp.merge.v4i16(<4 x i1> %m, <4 x i16> %va, <4 x i16> %vb, i32 %evl)
+  ret <4 x i16> %v
+}
+
+declare <8 x i16> @llvm.vp.merge.v8i16(<8 x i1>, <8 x i16>, <8 x i16>, i32)
+
+define <8 x i16> @vpmerge_vv_v8i16(<8 x i16> %va, <8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vpmerge_vx_v8i16(i16 %a, <8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 %a, i32 0
+  %va = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl)
+  ret <8 x i16> %v
+}
+
+define <8 x i16> @vpmerge_vi_v8i16(<8 x i16> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i16> poison, i16 2, i32 0
+  %va = shufflevector <8 x i16> %elt.head, <8 x i16> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i16> @llvm.vp.merge.v8i16(<8 x i1> %m, <8 x i16> %va, <8 x i16> %vb, i32 %evl)
+  ret <8 x i16> %v
+}
+
+declare <16 x i16> @llvm.vp.merge.v16i16(<16 x i1>, <16 x i16>, <16 x i16>, i32)
+
+define <16 x i16> @vpmerge_vv_v16i16(<16 x i16> %va, <16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vpmerge_vx_v16i16(i16 %a, <16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 %a, i32 0
+  %va = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl)
+  ret <16 x i16> %v
+}
+
+define <16 x i16> @vpmerge_vi_v16i16(<16 x i16> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i16> poison, i16 2, i32 0
+  %va = shufflevector <16 x i16> %elt.head, <16 x i16> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i16> @llvm.vp.merge.v16i16(<16 x i1> %m, <16 x i16> %va, <16 x i16> %vb, i32 %evl)
+  ret <16 x i16> %v
+}
+
+declare <2 x i32> @llvm.vp.merge.v2i32(<2 x i1>, <2 x i32>, <2 x i32>, i32)
+
+define <2 x i32> @vpmerge_vv_v2i32(<2 x i32> %va, <2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vpmerge_vx_v2i32(i32 %a, <2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 %a, i32 0
+  %va = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl)
+  ret <2 x i32> %v
+}
+
+define <2 x i32> @vpmerge_vi_v2i32(<2 x i32> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i32> poison, i32 2, i32 0
+  %va = shufflevector <2 x i32> %elt.head, <2 x i32> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i32> @llvm.vp.merge.v2i32(<2 x i1> %m, <2 x i32> %va, <2 x i32> %vb, i32 %evl)
+  ret <2 x i32> %v
+}
+
+declare <4 x i32> @llvm.vp.merge.v4i32(<4 x i1>, <4 x i32>, <4 x i32>, i32)
+
+define <4 x i32> @vpmerge_vv_v4i32(<4 x i32> %va, <4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vpmerge_vx_v4i32(i32 %a, <4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 %a, i32 0
+  %va = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl)
+  ret <4 x i32> %v
+}
+
+define <4 x i32> @vpmerge_vi_v4i32(<4 x i32> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i32> poison, i32 2, i32 0
+  %va = shufflevector <4 x i32> %elt.head, <4 x i32> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i32> @llvm.vp.merge.v4i32(<4 x i1> %m, <4 x i32> %va, <4 x i32> %vb, i32 %evl)
+  ret <4 x i32> %v
+}
+
+declare <8 x i32> @llvm.vp.merge.v8i32(<8 x i1>, <8 x i32>, <8 x i32>, i32)
+
+define <8 x i32> @vpmerge_vv_v8i32(<8 x i32> %va, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vpmerge_vx_v8i32(i32 %a, <8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 %a, i32 0
+  %va = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl)
+  ret <8 x i32> %v
+}
+
+define <8 x i32> @vpmerge_vi_v8i32(<8 x i32> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i32> poison, i32 2, i32 0
+  %va = shufflevector <8 x i32> %elt.head, <8 x i32> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i32> @llvm.vp.merge.v8i32(<8 x i1> %m, <8 x i32> %va, <8 x i32> %vb, i32 %evl)
+  ret <8 x i32> %v
+}
+
+declare <16 x i32> @llvm.vp.merge.v16i32(<16 x i1>, <16 x i32>, <16 x i32>, i32)
+
+define <16 x i32> @vpmerge_vv_v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vpmerge_vx_v16i32(i32 %a, <16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 %a, i32 0
+  %va = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl)
+  ret <16 x i32> %v
+}
+
+define <16 x i32> @vpmerge_vi_v16i32(<16 x i32> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i32> poison, i32 2, i32 0
+  %va = shufflevector <16 x i32> %elt.head, <16 x i32> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i32> @llvm.vp.merge.v16i32(<16 x i1> %m, <16 x i32> %va, <16 x i32> %vb, i32 %evl)
+  ret <16 x i32> %v
+}
+
+declare <2 x i64> @llvm.vp.merge.v2i64(<2 x i1>, <2 x i64>, <2 x i64>, i32)
+
+define <2 x i64> @vpmerge_vv_v2i64(<2 x i64> %va, <2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vpmerge_vx_v2i64(i64 %a, <2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_vx_v2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, mu
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
+; RV32-NEXT:    vmerge.vvm v8, v8, v9, v0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_vx_v2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
+; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 %a, i32 0
+  %va = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl)
+  ret <2 x i64> %v
+}
+
+define <2 x i64> @vpmerge_vi_v2i64(<2 x i64> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x i64> poison, i64 2, i32 0
+  %va = shufflevector <2 x i64> %elt.head, <2 x i64> poison, <2 x i32> zeroinitializer
+  %v = call <2 x i64> @llvm.vp.merge.v2i64(<2 x i1> %m, <2 x i64> %va, <2 x i64> %vb, i32 %evl)
+  ret <2 x i64> %v
+}
+
+declare <4 x i64> @llvm.vp.merge.v4i64(<4 x i1>, <4 x i64>, <4 x i64>, i32)
+
+define <4 x i64> @vpmerge_vv_v4i64(<4 x i64> %va, <4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vpmerge_vx_v4i64(i64 %a, <4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_vx_v4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
+; RV32-NEXT:    vmerge.vvm v8, v8, v10, v0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_vx_v4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
+; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 %a, i32 0
+  %va = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl)
+  ret <4 x i64> %v
+}
+
+define <4 x i64> @vpmerge_vi_v4i64(<4 x i64> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x i64> poison, i64 2, i32 0
+  %va = shufflevector <4 x i64> %elt.head, <4 x i64> poison, <4 x i32> zeroinitializer
+  %v = call <4 x i64> @llvm.vp.merge.v4i64(<4 x i1> %m, <4 x i64> %va, <4 x i64> %vb, i32 %evl)
+  ret <4 x i64> %v
+}
+
+declare <8 x i64> @llvm.vp.merge.v8i64(<8 x i1>, <8 x i64>, <8 x i64>, i32)
+
+define <8 x i64> @vpmerge_vv_v8i64(<8 x i64> %va, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vpmerge_vx_v8i64(i64 %a, <8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_vx_v8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
+; RV32-NEXT:    vmerge.vvm v8, v8, v12, v0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_vx_v8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
+; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 %a, i32 0
+  %va = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl)
+  ret <8 x i64> %v
+}
+
+define <8 x i64> @vpmerge_vi_v8i64(<8 x i64> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x i64> poison, i64 2, i32 0
+  %va = shufflevector <8 x i64> %elt.head, <8 x i64> poison, <8 x i32> zeroinitializer
+  %v = call <8 x i64> @llvm.vp.merge.v8i64(<8 x i1> %m, <8 x i64> %va, <8 x i64> %vb, i32 %evl)
+  ret <8 x i64> %v
+}
+
+declare <16 x i64> @llvm.vp.merge.v16i64(<16 x i1>, <16 x i64>, <16 x i64>, i32)
+
+define <16 x i64> @vpmerge_vv_v16i64(<16 x i64> %va, <16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vpmerge_vx_v16i64(i64 %a, <16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_vx_v16i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, tu, mu
+; RV32-NEXT:    vmerge.vvm v8, v8, v16, v0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_vx_v16i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
+; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 %a, i32 0
+  %va = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl)
+  ret <16 x i64> %v
+}
+
+define <16 x i64> @vpmerge_vi_v16i64(<16 x i64> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v16i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x i64> poison, i64 2, i32 0
+  %va = shufflevector <16 x i64> %elt.head, <16 x i64> poison, <16 x i32> zeroinitializer
+  %v = call <16 x i64> @llvm.vp.merge.v16i64(<16 x i1> %m, <16 x i64> %va, <16 x i64> %vb, i32 %evl)
+  ret <16 x i64> %v
+}
+
+declare <2 x half> @llvm.vp.merge.v2f16(<2 x i1>, <2 x half>, <2 x half>, i32)
+
+define <2 x half> @vpmerge_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %va, <2 x half> %vb, i32 %evl)
+  ret <2 x half> %v
+}
+
+define <2 x half> @vpmerge_vf_v2f16(half %a, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x half> poison, half %a, i32 0
+  %va = shufflevector <2 x half> %elt.head, <2 x half> poison, <2 x i32> zeroinitializer
+  %v = call <2 x half> @llvm.vp.merge.v2f16(<2 x i1> %m, <2 x half> %va, <2 x half> %vb, i32 %evl)
+  ret <2 x half> %v
+}
+
+declare <4 x half> @llvm.vp.merge.v4f16(<4 x i1>, <4 x half>, <4 x half>, i32)
+
+define <4 x half> @vpmerge_vv_v4f16(<4 x half> %va, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %va, <4 x half> %vb, i32 %evl)
+  ret <4 x half> %v
+}
+
+define <4 x half> @vpmerge_vf_v4f16(half %a, <4 x half> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x half> poison, half %a, i32 0
+  %va = shufflevector <4 x half> %elt.head, <4 x half> poison, <4 x i32> zeroinitializer
+  %v = call <4 x half> @llvm.vp.merge.v4f16(<4 x i1> %m, <4 x half> %va, <4 x half> %vb, i32 %evl)
+  ret <4 x half> %v
+}
+
+declare <8 x half> @llvm.vp.merge.v8f16(<8 x i1>, <8 x half>, <8 x half>, i32)
+
+define <8 x half> @vpmerge_vv_v8f16(<8 x half> %va, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %va, <8 x half> %vb, i32 %evl)
+  ret <8 x half> %v
+}
+
+define <8 x half> @vpmerge_vf_v8f16(half %a, <8 x half> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x half> poison, half %a, i32 0
+  %va = shufflevector <8 x half> %elt.head, <8 x half> poison, <8 x i32> zeroinitializer
+  %v = call <8 x half> @llvm.vp.merge.v8f16(<8 x i1> %m, <8 x half> %va, <8 x half> %vb, i32 %evl)
+  ret <8 x half> %v
+}
+
+declare <16 x half> @llvm.vp.merge.v16f16(<16 x i1>, <16 x half>, <16 x half>, i32)
+
+define <16 x half> @vpmerge_vv_v16f16(<16 x half> %va, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %va, <16 x half> %vb, i32 %evl)
+  ret <16 x half> %v
+}
+
+define <16 x half> @vpmerge_vf_v16f16(half %a, <16 x half> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x half> poison, half %a, i32 0
+  %va = shufflevector <16 x half> %elt.head, <16 x half> poison, <16 x i32> zeroinitializer
+  %v = call <16 x half> @llvm.vp.merge.v16f16(<16 x i1> %m, <16 x half> %va, <16 x half> %vb, i32 %evl)
+  ret <16 x half> %v
+}
+
+declare <2 x float> @llvm.vp.merge.v2f32(<2 x i1>, <2 x float>, <2 x float>, i32)
+
+define <2 x float> @vpmerge_vv_v2f32(<2 x float> %va, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %va, <2 x float> %vb, i32 %evl)
+  ret <2 x float> %v
+}
+
+define <2 x float> @vpmerge_vf_v2f32(float %a, <2 x float> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x float> poison, float %a, i32 0
+  %va = shufflevector <2 x float> %elt.head, <2 x float> poison, <2 x i32> zeroinitializer
+  %v = call <2 x float> @llvm.vp.merge.v2f32(<2 x i1> %m, <2 x float> %va, <2 x float> %vb, i32 %evl)
+  ret <2 x float> %v
+}
+
+declare <4 x float> @llvm.vp.merge.v4f32(<4 x i1>, <4 x float>, <4 x float>, i32)
+
+define <4 x float> @vpmerge_vv_v4f32(<4 x float> %va, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %va, <4 x float> %vb, i32 %evl)
+  ret <4 x float> %v
+}
+
+define <4 x float> @vpmerge_vf_v4f32(float %a, <4 x float> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x float> poison, float %a, i32 0
+  %va = shufflevector <4 x float> %elt.head, <4 x float> poison, <4 x i32> zeroinitializer
+  %v = call <4 x float> @llvm.vp.merge.v4f32(<4 x i1> %m, <4 x float> %va, <4 x float> %vb, i32 %evl)
+  ret <4 x float> %v
+}
+
+declare <8 x float> @llvm.vp.merge.v8f32(<8 x i1>, <8 x float>, <8 x float>, i32)
+
+define <8 x float> @vpmerge_vv_v8f32(<8 x float> %va, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %va, <8 x float> %vb, i32 %evl)
+  ret <8 x float> %v
+}
+
+define <8 x float> @vpmerge_vf_v8f32(float %a, <8 x float> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x float> poison, float %a, i32 0
+  %va = shufflevector <8 x float> %elt.head, <8 x float> poison, <8 x i32> zeroinitializer
+  %v = call <8 x float> @llvm.vp.merge.v8f32(<8 x i1> %m, <8 x float> %va, <8 x float> %vb, i32 %evl)
+  ret <8 x float> %v
+}
+
+declare <16 x float> @llvm.vp.merge.v16f32(<16 x i1>, <16 x float>, <16 x float>, i32)
+
+define <16 x float> @vpmerge_vv_v16f32(<16 x float> %va, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v16f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %va, <16 x float> %vb, i32 %evl)
+  ret <16 x float> %v
+}
+
+define <16 x float> @vpmerge_vf_v16f32(float %a, <16 x float> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v16f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x float> poison, float %a, i32 0
+  %va = shufflevector <16 x float> %elt.head, <16 x float> poison, <16 x i32> zeroinitializer
+  %v = call <16 x float> @llvm.vp.merge.v16f32(<16 x i1> %m, <16 x float> %va, <16 x float> %vb, i32 %evl)
+  ret <16 x float> %v
+}
+
+declare <2 x double> @llvm.vp.merge.v2f64(<2 x i1>, <2 x double>, <2 x double>, i32)
+
+define <2 x double> @vpmerge_vv_v2f64(<2 x double> %va, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %va, <2 x double> %vb, i32 %evl)
+  ret <2 x double> %v
+}
+
+define <2 x double> @vpmerge_vf_v2f64(double %a, <2 x double> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <2 x double> poison, double %a, i32 0
+  %va = shufflevector <2 x double> %elt.head, <2 x double> poison, <2 x i32> zeroinitializer
+  %v = call <2 x double> @llvm.vp.merge.v2f64(<2 x i1> %m, <2 x double> %va, <2 x double> %vb, i32 %evl)
+  ret <2 x double> %v
+}
+
+declare <4 x double> @llvm.vp.merge.v4f64(<4 x i1>, <4 x double>, <4 x double>, i32)
+
+define <4 x double> @vpmerge_vv_v4f64(<4 x double> %va, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %va, <4 x double> %vb, i32 %evl)
+  ret <4 x double> %v
+}
+
+define <4 x double> @vpmerge_vf_v4f64(double %a, <4 x double> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <4 x double> poison, double %a, i32 0
+  %va = shufflevector <4 x double> %elt.head, <4 x double> poison, <4 x i32> zeroinitializer
+  %v = call <4 x double> @llvm.vp.merge.v4f64(<4 x i1> %m, <4 x double> %va, <4 x double> %vb, i32 %evl)
+  ret <4 x double> %v
+}
+
+declare <8 x double> @llvm.vp.merge.v8f64(<8 x i1>, <8 x double>, <8 x double>, i32)
+
+define <8 x double> @vpmerge_vv_v8f64(<8 x double> %va, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %va, <8 x double> %vb, i32 %evl)
+  ret <8 x double> %v
+}
+
+define <8 x double> @vpmerge_vf_v8f64(double %a, <8 x double> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <8 x double> poison, double %a, i32 0
+  %va = shufflevector <8 x double> %elt.head, <8 x double> poison, <8 x i32> zeroinitializer
+  %v = call <8 x double> @llvm.vp.merge.v8f64(<8 x i1> %m, <8 x double> %va, <8 x double> %vb, i32 %evl)
+  ret <8 x double> %v
+}
+
+declare <16 x double> @llvm.vp.merge.v16f64(<16 x i1>, <16 x double>, <16 x double>, i32)
+
+define <16 x double> @vpmerge_vv_v16f64(<16 x double> %va, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v16f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl)
+  ret <16 x double> %v
+}
+
+define <16 x double> @vpmerge_vf_v16f64(double %a, <16 x double> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v16f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <16 x double> poison, double %a, i32 0
+  %va = shufflevector <16 x double> %elt.head, <16 x double> poison, <16 x i32> zeroinitializer
+  %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl)
+  ret <16 x double> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
new file mode 100644
index 0000000000000..46ebd0a6acf50
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -0,0 +1,1280 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
+
+define <vscale x 1 x i8> @vpmerge_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vpmerge_vx_nxv1i8(i8 %a, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+define <vscale x 1 x i8> @vpmerge_vi_nxv1i8(<vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv1i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i8> poison, i8 2, i32 0
+  %va = shufflevector <vscale x 1 x i8> %elt.head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1> %m, <vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, i32 %evl)
+  ret <vscale x 1 x i8> %v
+}
+
+declare <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1>, <vscale x 2 x i8>, <vscale x 2 x i8>, i32)
+
+define <vscale x 2 x i8> @vpmerge_vv_nxv2i8(<vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vpmerge_vx_nxv2i8(i8 %a, <vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+define <vscale x 2 x i8> @vpmerge_vi_nxv2i8(<vscale x 2 x i8> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv2i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i8> poison, i8 2, i32 0
+  %va = shufflevector <vscale x 2 x i8> %elt.head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i8> @llvm.vp.merge.nxv2i8(<vscale x 2 x i1> %m, <vscale x 2 x i8> %va, <vscale x 2 x i8> %vb, i32 %evl)
+  ret <vscale x 2 x i8> %v
+}
+
+declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
+
+define <vscale x 4 x i8> @vpmerge_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vpmerge_vx_nxv4i8(i8 %a, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+define <vscale x 4 x i8> @vpmerge_vi_nxv4i8(<vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv4i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i8> poison, i8 2, i32 0
+  %va = shufflevector <vscale x 4 x i8> %elt.head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1> %m, <vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, i32 %evl)
+  ret <vscale x 4 x i8> %v
+}
+
+declare <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1>, <vscale x 8 x i8>, <vscale x 8 x i8>, i32)
+
+define <vscale x 8 x i8> @vpmerge_vv_nxv8i8(<vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vpmerge_vx_nxv8i8(i8 %a, <vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m1, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+define <vscale x 8 x i8> @vpmerge_vi_nxv8i8(<vscale x 8 x i8> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv8i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m1, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i8> poison, i8 2, i32 0
+  %va = shufflevector <vscale x 8 x i8> %elt.head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i8> @llvm.vp.merge.nxv8i8(<vscale x 8 x i1> %m, <vscale x 8 x i8> %va, <vscale x 8 x i8> %vb, i32 %evl)
+  ret <vscale x 8 x i8> %v
+}
+
+declare <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+
+define <vscale x 16 x i8> @vpmerge_vv_nxv16i8(<vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vpmerge_vx_nxv16i8(i8 %a, <vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+define <vscale x 16 x i8> @vpmerge_vi_nxv16i8(<vscale x 16 x i8> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv16i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i8> poison, i8 2, i32 0
+  %va = shufflevector <vscale x 16 x i8> %elt.head, <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i8> @llvm.vp.merge.nxv16i8(<vscale x 16 x i1> %m, <vscale x 16 x i8> %va, <vscale x 16 x i8> %vb, i32 %evl)
+  ret <vscale x 16 x i8> %v
+}
+
+declare <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1>, <vscale x 32 x i8>, <vscale x 32 x i8>, i32)
+
+define <vscale x 32 x i8> @vpmerge_vv_nxv32i8(<vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vpmerge_vx_nxv32i8(i8 %a, <vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m4, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+define <vscale x 32 x i8> @vpmerge_vi_nxv32i8(<vscale x 32 x i8> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv32i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i8> poison, i8 2, i32 0
+  %va = shufflevector <vscale x 32 x i8> %elt.head, <vscale x 32 x i8> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i8> @llvm.vp.merge.nxv32i8(<vscale x 32 x i1> %m, <vscale x 32 x i8> %va, <vscale x 32 x i8> %vb, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
+declare <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1>, <vscale x 64 x i8>, <vscale x 64 x i8>, i32)
+
+define <vscale x 64 x i8> @vpmerge_vv_nxv64i8(<vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vpmerge_vx_nxv64i8(i8 %a, <vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, m8, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+define <vscale x 64 x i8> @vpmerge_vi_nxv64i8(<vscale x 64 x i8> %vb, <vscale x 64 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv64i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 64 x i8> poison, i8 2, i32 0
+  %va = shufflevector <vscale x 64 x i8> %elt.head, <vscale x 64 x i8> poison, <vscale x 64 x i32> zeroinitializer
+  %v = call <vscale x 64 x i8> @llvm.vp.merge.nxv64i8(<vscale x 64 x i1> %m, <vscale x 64 x i8> %va, <vscale x 64 x i8> %vb, i32 %evl)
+  ret <vscale x 64 x i8> %v
+}
+
+declare <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, i32)
+
+define <vscale x 1 x i16> @vpmerge_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vpmerge_vx_nxv1i16(i16 %a, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 %a, i32 0
+  %va = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+define <vscale x 1 x i16> @vpmerge_vi_nxv1i16(<vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv1i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i16> poison, i16 2, i32 0
+  %va = shufflevector <vscale x 1 x i16> %elt.head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1> %m, <vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, i32 %evl)
+  ret <vscale x 1 x i16> %v
+}
+
+declare <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1>, <vscale x 2 x i16>, <vscale x 2 x i16>, i32)
+
+define <vscale x 2 x i16> @vpmerge_vv_nxv2i16(<vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vpmerge_vx_nxv2i16(i16 %a, <vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 %a, i32 0
+  %va = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+define <vscale x 2 x i16> @vpmerge_vi_nxv2i16(<vscale x 2 x i16> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv2i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i16> poison, i16 2, i32 0
+  %va = shufflevector <vscale x 2 x i16> %elt.head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i16> @llvm.vp.merge.nxv2i16(<vscale x 2 x i1> %m, <vscale x 2 x i16> %va, <vscale x 2 x i16> %vb, i32 %evl)
+  ret <vscale x 2 x i16> %v
+}
+
+declare <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1>, <vscale x 4 x i16>, <vscale x 4 x i16>, i32)
+
+define <vscale x 4 x i16> @vpmerge_vv_nxv4i16(<vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vpmerge_vx_nxv4i16(i16 %a, <vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m1, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 %a, i32 0
+  %va = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+define <vscale x 4 x i16> @vpmerge_vi_nxv4i16(<vscale x 4 x i16> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv4i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i16> poison, i16 2, i32 0
+  %va = shufflevector <vscale x 4 x i16> %elt.head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i16> @llvm.vp.merge.nxv4i16(<vscale x 4 x i1> %m, <vscale x 4 x i16> %va, <vscale x 4 x i16> %vb, i32 %evl)
+  ret <vscale x 4 x i16> %v
+}
+
+declare <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, i32)
+
+define <vscale x 8 x i16> @vpmerge_vv_nxv8i16(<vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vpmerge_vx_nxv8i16(i16 %a, <vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 %a, i32 0
+  %va = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+define <vscale x 8 x i16> @vpmerge_vi_nxv8i16(<vscale x 8 x i16> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv8i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i16> poison, i16 2, i32 0
+  %va = shufflevector <vscale x 8 x i16> %elt.head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i16> @llvm.vp.merge.nxv8i16(<vscale x 8 x i1> %m, <vscale x 8 x i16> %va, <vscale x 8 x i16> %vb, i32 %evl)
+  ret <vscale x 8 x i16> %v
+}
+
+declare <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1>, <vscale x 16 x i16>, <vscale x 16 x i16>, i32)
+
+define <vscale x 16 x i16> @vpmerge_vv_nxv16i16(<vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vpmerge_vx_nxv16i16(i16 %a, <vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m4, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 %a, i32 0
+  %va = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+define <vscale x 16 x i16> @vpmerge_vi_nxv16i16(<vscale x 16 x i16> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv16i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i16> poison, i16 2, i32 0
+  %va = shufflevector <vscale x 16 x i16> %elt.head, <vscale x 16 x i16> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i16> @llvm.vp.merge.nxv16i16(<vscale x 16 x i1> %m, <vscale x 16 x i16> %va, <vscale x 16 x i16> %vb, i32 %evl)
+  ret <vscale x 16 x i16> %v
+}
+
+declare <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1>, <vscale x 32 x i16>, <vscale x 32 x i16>, i32)
+
+define <vscale x 32 x i16> @vpmerge_vv_nxv32i16(<vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vpmerge_vx_nxv32i16(i16 %a, <vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e16, m8, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 %a, i32 0
+  %va = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+define <vscale x 32 x i16> @vpmerge_vi_nxv32i16(<vscale x 32 x i16> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv32i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x i16> poison, i16 2, i32 0
+  %va = shufflevector <vscale x 32 x i16> %elt.head, <vscale x 32 x i16> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x i16> @llvm.vp.merge.nxv32i16(<vscale x 32 x i1> %m, <vscale x 32 x i16> %va, <vscale x 32 x i16> %vb, i32 %evl)
+  ret <vscale x 32 x i16> %v
+}
+
+declare <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1>, <vscale x 1 x i32>, <vscale x 1 x i32>, i32)
+
+define <vscale x 1 x i32> @vpmerge_vv_nxv1i32(<vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vpmerge_vx_nxv1i32(i32 %a, <vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 %a, i32 0
+  %va = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+define <vscale x 1 x i32> @vpmerge_vi_nxv1i32(<vscale x 1 x i32> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv1i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i32> poison, i32 2, i32 0
+  %va = shufflevector <vscale x 1 x i32> %elt.head, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i32> @llvm.vp.merge.nxv1i32(<vscale x 1 x i1> %m, <vscale x 1 x i32> %va, <vscale x 1 x i32> %vb, i32 %evl)
+  ret <vscale x 1 x i32> %v
+}
+
+declare <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1>, <vscale x 2 x i32>, <vscale x 2 x i32>, i32)
+
+define <vscale x 2 x i32> @vpmerge_vv_nxv2i32(<vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vpmerge_vx_nxv2i32(i32 %a, <vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 %a, i32 0
+  %va = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+define <vscale x 2 x i32> @vpmerge_vi_nxv2i32(<vscale x 2 x i32> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv2i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i32> poison, i32 2, i32 0
+  %va = shufflevector <vscale x 2 x i32> %elt.head, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %va, <vscale x 2 x i32> %vb, i32 %evl)
+  ret <vscale x 2 x i32> %v
+}
+
+declare <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+
+define <vscale x 4 x i32> @vpmerge_vv_nxv4i32(<vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vpmerge_vx_nxv4i32(i32 %a, <vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 %a, i32 0
+  %va = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+define <vscale x 4 x i32> @vpmerge_vi_nxv4i32(<vscale x 4 x i32> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i32> poison, i32 2, i32 0
+  %va = shufflevector <vscale x 4 x i32> %elt.head, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> %m, <vscale x 4 x i32> %va, <vscale x 4 x i32> %vb, i32 %evl)
+  ret <vscale x 4 x i32> %v
+}
+
+declare <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1>, <vscale x 8 x i32>, <vscale x 8 x i32>, i32)
+
+define <vscale x 8 x i32> @vpmerge_vv_nxv8i32(<vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vpmerge_vx_nxv8i32(i32 %a, <vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m4, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 %a, i32 0
+  %va = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+define <vscale x 8 x i32> @vpmerge_vi_nxv8i32(<vscale x 8 x i32> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv8i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i32> poison, i32 2, i32 0
+  %va = shufflevector <vscale x 8 x i32> %elt.head, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i32> @llvm.vp.merge.nxv8i32(<vscale x 8 x i1> %m, <vscale x 8 x i32> %va, <vscale x 8 x i32> %vb, i32 %evl)
+  ret <vscale x 8 x i32> %v
+}
+
+declare <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1>, <vscale x 16 x i32>, <vscale x 16 x i32>, i32)
+
+define <vscale x 16 x i32> @vpmerge_vv_nxv16i32(<vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vpmerge_vx_nxv16i32(i32 %a, <vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e32, m8, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 %a, i32 0
+  %va = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+define <vscale x 16 x i32> @vpmerge_vi_nxv16i32(<vscale x 16 x i32> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv16i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x i32> poison, i32 2, i32 0
+  %va = shufflevector <vscale x 16 x i32> %elt.head, <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x i32> @llvm.vp.merge.nxv16i32(<vscale x 16 x i1> %m, <vscale x 16 x i32> %va, <vscale x 16 x i32> %vb, i32 %evl)
+  ret <vscale x 16 x i32> %v
+}
+
+declare <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1>, <vscale x 1 x i64>, <vscale x 1 x i64>, i32)
+
+define <vscale x 1 x i64> @vpmerge_vv_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vpmerge_vx_nxv1i64(i64 %a, <vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_vx_nxv1i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v9, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m1, tu, mu
+; RV32-NEXT:    vmerge.vvm v8, v8, v9, v0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_vx_nxv1i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m1, tu, mu
+; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 %a, i32 0
+  %va = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+define <vscale x 1 x i64> @vpmerge_vi_nxv1i64(<vscale x 1 x i64> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv1i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x i64> poison, i64 2, i32 0
+  %va = shufflevector <vscale x 1 x i64> %elt.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x i64> @llvm.vp.merge.nxv1i64(<vscale x 1 x i1> %m, <vscale x 1 x i64> %va, <vscale x 1 x i64> %vb, i32 %evl)
+  ret <vscale x 1 x i64> %v
+}
+
+declare <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+define <vscale x 2 x i64> @vpmerge_vv_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vpmerge_vx_nxv2i64(i64 %a, <vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_vx_nxv2i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v10, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m2, tu, mu
+; RV32-NEXT:    vmerge.vvm v8, v8, v10, v0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_vx_nxv2i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m2, tu, mu
+; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 %a, i32 0
+  %va = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+define <vscale x 2 x i64> @vpmerge_vi_nxv2i64(<vscale x 2 x i64> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv2i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x i64> poison, i64 2, i32 0
+  %va = shufflevector <vscale x 2 x i64> %elt.head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x i64> @llvm.vp.merge.nxv2i64(<vscale x 2 x i1> %m, <vscale x 2 x i64> %va, <vscale x 2 x i64> %vb, i32 %evl)
+  ret <vscale x 2 x i64> %v
+}
+
+declare <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1>, <vscale x 4 x i64>, <vscale x 4 x i64>, i32)
+
+define <vscale x 4 x i64> @vpmerge_vv_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vpmerge_vx_nxv4i64(i64 %a, <vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_vx_nxv4i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v12, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m4, tu, mu
+; RV32-NEXT:    vmerge.vvm v8, v8, v12, v0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_vx_nxv4i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m4, tu, mu
+; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 %a, i32 0
+  %va = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+define <vscale x 4 x i64> @vpmerge_vi_nxv4i64(<vscale x 4 x i64> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv4i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x i64> poison, i64 2, i32 0
+  %va = shufflevector <vscale x 4 x i64> %elt.head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x i64> @llvm.vp.merge.nxv4i64(<vscale x 4 x i1> %m, <vscale x 4 x i64> %va, <vscale x 4 x i64> %vb, i32 %evl)
+  ret <vscale x 4 x i64> %v
+}
+
+declare <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1>, <vscale x 8 x i64>, <vscale x 8 x i64>, i32)
+
+define <vscale x 8 x i64> @vpmerge_vv_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vpmerge_vx_nxv8i64(i64 %a, <vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_vx_nxv8i64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    sw a0, 8(sp)
+; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; RV32-NEXT:    addi a0, sp, 8
+; RV32-NEXT:    vlse64.v v16, (a0), zero
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, tu, mu
+; RV32-NEXT:    vmerge.vvm v8, v8, v16, v0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_vx_nxv8i64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
+; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 %a, i32 0
+  %va = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+define <vscale x 8 x i64> @vpmerge_vi_nxv8i64(<vscale x 8 x i64> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv8i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x i64> poison, i64 2, i32 0
+  %va = shufflevector <vscale x 8 x i64> %elt.head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x i64> @llvm.vp.merge.nxv8i64(<vscale x 8 x i1> %m, <vscale x 8 x i64> %va, <vscale x 8 x i64> %vb, i32 %evl)
+  ret <vscale x 8 x i64> %v
+}
+
+declare <vscale x 1 x half> @llvm.vp.merge.nxv1f16(<vscale x 1 x i1>, <vscale x 1 x half>, <vscale x 1 x half>, i32)
+
+define <vscale x 1 x half> @vpmerge_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv1f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x half> @llvm.vp.merge.nxv1f16(<vscale x 1 x i1> %m, <vscale x 1 x half> %va, <vscale x 1 x half> %vb, i32 %evl)
+  ret <vscale x 1 x half> %v
+}
+
+define <vscale x 1 x half> @vpmerge_vf_nxv1f16(half %a, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv1f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, mu
+; CHECK-NEXT:    vfmv.v.f v9, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x half> poison, half %a, i32 0
+  %va = shufflevector <vscale x 1 x half> %elt.head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x half> @llvm.vp.merge.nxv1f16(<vscale x 1 x i1> %m, <vscale x 1 x half> %va, <vscale x 1 x half> %vb, i32 %evl)
+  ret <vscale x 1 x half> %v
+}
+
+declare <vscale x 2 x half> @llvm.vp.merge.nxv2f16(<vscale x 2 x i1>, <vscale x 2 x half>, <vscale x 2 x half>, i32)
+
+define <vscale x 2 x half> @vpmerge_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x half> @llvm.vp.merge.nxv2f16(<vscale x 2 x i1> %m, <vscale x 2 x half> %va, <vscale x 2 x half> %vb, i32 %evl)
+  ret <vscale x 2 x half> %v
+}
+
+define <vscale x 2 x half> @vpmerge_vf_nxv2f16(half %a, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv2f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
+; CHECK-NEXT:    vfmv.v.f v9, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e16, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x half> poison, half %a, i32 0
+  %va = shufflevector <vscale x 2 x half> %elt.head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x half> @llvm.vp.merge.nxv2f16(<vscale x 2 x i1> %m, <vscale x 2 x half> %va, <vscale x 2 x half> %vb, i32 %evl)
+  ret <vscale x 2 x half> %v
+}
+
+declare <vscale x 4 x half> @llvm.vp.merge.nxv4f16(<vscale x 4 x i1>, <vscale x 4 x half>, <vscale x 4 x half>, i32)
+
+define <vscale x 4 x half> @vpmerge_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x half> @llvm.vp.merge.nxv4f16(<vscale x 4 x i1> %m, <vscale x 4 x half> %va, <vscale x 4 x half> %vb, i32 %evl)
+  ret <vscale x 4 x half> %v
+}
+
+define <vscale x 4 x half> @vpmerge_vf_nxv4f16(half %a, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv4f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, mu
+; CHECK-NEXT:    vfmv.v.f v9, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x half> poison, half %a, i32 0
+  %va = shufflevector <vscale x 4 x half> %elt.head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x half> @llvm.vp.merge.nxv4f16(<vscale x 4 x i1> %m, <vscale x 4 x half> %va, <vscale x 4 x half> %vb, i32 %evl)
+  ret <vscale x 4 x half> %v
+}
+
+declare <vscale x 8 x half> @llvm.vp.merge.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, i32)
+
+define <vscale x 8 x half> @vpmerge_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x half> @llvm.vp.merge.nxv8f16(<vscale x 8 x i1> %m, <vscale x 8 x half> %va, <vscale x 8 x half> %vb, i32 %evl)
+  ret <vscale x 8 x half> %v
+}
+
+define <vscale x 8 x half> @vpmerge_vf_nxv8f16(half %a, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv8f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
+; CHECK-NEXT:    vfmv.v.f v10, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x half> poison, half %a, i32 0
+  %va = shufflevector <vscale x 8 x half> %elt.head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x half> @llvm.vp.merge.nxv8f16(<vscale x 8 x i1> %m, <vscale x 8 x half> %va, <vscale x 8 x half> %vb, i32 %evl)
+  ret <vscale x 8 x half> %v
+}
+
+declare <vscale x 16 x half> @llvm.vp.merge.nxv16f16(<vscale x 16 x i1>, <vscale x 16 x half>, <vscale x 16 x half>, i32)
+
+define <vscale x 16 x half> @vpmerge_vv_nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x half> @llvm.vp.merge.nxv16f16(<vscale x 16 x i1> %m, <vscale x 16 x half> %va, <vscale x 16 x half> %vb, i32 %evl)
+  ret <vscale x 16 x half> %v
+}
+
+define <vscale x 16 x half> @vpmerge_vf_nxv16f16(half %a, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv16f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, mu
+; CHECK-NEXT:    vfmv.v.f v12, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x half> poison, half %a, i32 0
+  %va = shufflevector <vscale x 16 x half> %elt.head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x half> @llvm.vp.merge.nxv16f16(<vscale x 16 x i1> %m, <vscale x 16 x half> %va, <vscale x 16 x half> %vb, i32 %evl)
+  ret <vscale x 16 x half> %v
+}
+
+declare <vscale x 32 x half> @llvm.vp.merge.nxv32f16(<vscale x 32 x i1>, <vscale x 32 x half>, <vscale x 32 x half>, i32)
+
+define <vscale x 32 x half> @vpmerge_vv_nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv32f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 32 x half> @llvm.vp.merge.nxv32f16(<vscale x 32 x i1> %m, <vscale x 32 x half> %va, <vscale x 32 x half> %vb, i32 %evl)
+  ret <vscale x 32 x half> %v
+}
+
+define <vscale x 32 x half> @vpmerge_vf_nxv32f16(half %a, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv32f16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m8, ta, mu
+; CHECK-NEXT:    vfmv.v.f v16, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e16, m8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 32 x half> poison, half %a, i32 0
+  %va = shufflevector <vscale x 32 x half> %elt.head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
+  %v = call <vscale x 32 x half> @llvm.vp.merge.nxv32f16(<vscale x 32 x i1> %m, <vscale x 32 x half> %va, <vscale x 32 x half> %vb, i32 %evl)
+  ret <vscale x 32 x half> %v
+}
+
+declare <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1>, <vscale x 1 x float>, <vscale x 1 x float>, i32)
+
+define <vscale x 1 x float> @vpmerge_vv_nxv1f32(<vscale x 1 x float> %va, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv1f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %va, <vscale x 1 x float> %vb, i32 %evl)
+  ret <vscale x 1 x float> %v
+}
+
+define <vscale x 1 x float> @vpmerge_vf_nxv1f32(float %a, <vscale x 1 x float> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv1f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, mu
+; CHECK-NEXT:    vfmv.v.f v9, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e32, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x float> poison, float %a, i32 0
+  %va = shufflevector <vscale x 1 x float> %elt.head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x float> @llvm.vp.merge.nxv1f32(<vscale x 1 x i1> %m, <vscale x 1 x float> %va, <vscale x 1 x float> %vb, i32 %evl)
+  ret <vscale x 1 x float> %v
+}
+
+declare <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>, <vscale x 2 x float>, i32)
+
+define <vscale x 2 x float> @vpmerge_vv_nxv2f32(<vscale x 2 x float> %va, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %va, <vscale x 2 x float> %vb, i32 %evl)
+  ret <vscale x 2 x float> %v
+}
+
+define <vscale x 2 x float> @vpmerge_vf_nxv2f32(float %a, <vscale x 2 x float> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv2f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vfmv.v.f v9, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x float> poison, float %a, i32 0
+  %va = shufflevector <vscale x 2 x float> %elt.head, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x float> @llvm.vp.merge.nxv2f32(<vscale x 2 x i1> %m, <vscale x 2 x float> %va, <vscale x 2 x float> %vb, i32 %evl)
+  ret <vscale x 2 x float> %v
+}
+
+declare <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, i32)
+
+define <vscale x 4 x float> @vpmerge_vv_nxv4f32(<vscale x 4 x float> %va, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> %m, <vscale x 4 x float> %va, <vscale x 4 x float> %vb, i32 %evl)
+  ret <vscale x 4 x float> %v
+}
+
+define <vscale x 4 x float> @vpmerge_vf_nxv4f32(float %a, <vscale x 4 x float> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv4f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
+; CHECK-NEXT:    vfmv.v.f v10, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x float> poison, float %a, i32 0
+  %va = shufflevector <vscale x 4 x float> %elt.head, <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> %m, <vscale x 4 x float> %va, <vscale x 4 x float> %vb, i32 %evl)
+  ret <vscale x 4 x float> %v
+}
+
+declare <vscale x 8 x float> @llvm.vp.merge.nxv8f32(<vscale x 8 x i1>, <vscale x 8 x float>, <vscale x 8 x float>, i32)
+
+define <vscale x 8 x float> @vpmerge_vv_nxv8f32(<vscale x 8 x float> %va, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x float> @llvm.vp.merge.nxv8f32(<vscale x 8 x i1> %m, <vscale x 8 x float> %va, <vscale x 8 x float> %vb, i32 %evl)
+  ret <vscale x 8 x float> %v
+}
+
+define <vscale x 8 x float> @vpmerge_vf_nxv8f32(float %a, <vscale x 8 x float> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv8f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; CHECK-NEXT:    vfmv.v.f v12, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x float> poison, float %a, i32 0
+  %va = shufflevector <vscale x 8 x float> %elt.head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x float> @llvm.vp.merge.nxv8f32(<vscale x 8 x i1> %m, <vscale x 8 x float> %va, <vscale x 8 x float> %vb, i32 %evl)
+  ret <vscale x 8 x float> %v
+}
+
+declare <vscale x 16 x float> @llvm.vp.merge.nxv16f32(<vscale x 16 x i1>, <vscale x 16 x float>, <vscale x 16 x float>, i32)
+
+define <vscale x 16 x float> @vpmerge_vv_nxv16f32(<vscale x 16 x float> %va, <vscale x 16 x float> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv16f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 16 x float> @llvm.vp.merge.nxv16f32(<vscale x 16 x i1> %m, <vscale x 16 x float> %va, <vscale x 16 x float> %vb, i32 %evl)
+  ret <vscale x 16 x float> %v
+}
+
+define <vscale x 16 x float> @vpmerge_vf_nxv16f32(float %a, <vscale x 16 x float> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv16f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e32, m8, ta, mu
+; CHECK-NEXT:    vfmv.v.f v16, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 16 x float> poison, float %a, i32 0
+  %va = shufflevector <vscale x 16 x float> %elt.head, <vscale x 16 x float> poison, <vscale x 16 x i32> zeroinitializer
+  %v = call <vscale x 16 x float> @llvm.vp.merge.nxv16f32(<vscale x 16 x i1> %m, <vscale x 16 x float> %va, <vscale x 16 x float> %vb, i32 %evl)
+  ret <vscale x 16 x float> %v
+}
+
+declare <vscale x 1 x double> @llvm.vp.merge.nxv1f64(<vscale x 1 x i1>, <vscale x 1 x double>, <vscale x 1 x double>, i32)
+
+define <vscale x 1 x double> @vpmerge_vv_nxv1f64(<vscale x 1 x double> %va, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv1f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 1 x double> @llvm.vp.merge.nxv1f64(<vscale x 1 x i1> %m, <vscale x 1 x double> %va, <vscale x 1 x double> %vb, i32 %evl)
+  ret <vscale x 1 x double> %v
+}
+
+define <vscale x 1 x double> @vpmerge_vf_nxv1f64(double %a, <vscale x 1 x double> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv1f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; CHECK-NEXT:    vfmv.v.f v9, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m1, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 1 x double> poison, double %a, i32 0
+  %va = shufflevector <vscale x 1 x double> %elt.head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
+  %v = call <vscale x 1 x double> @llvm.vp.merge.nxv1f64(<vscale x 1 x i1> %m, <vscale x 1 x double> %va, <vscale x 1 x double> %vb, i32 %evl)
+  ret <vscale x 1 x double> %v
+}
+
+declare <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, i32)
+
+define <vscale x 2 x double> @vpmerge_vv_nxv2f64(<vscale x 2 x double> %va, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v10, v10, v8, v0
+; CHECK-NEXT:    vmv2r.v v8, v10
+; CHECK-NEXT:    ret
+  %v = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %va, <vscale x 2 x double> %vb, i32 %evl)
+  ret <vscale x 2 x double> %v
+}
+
+define <vscale x 2 x double> @vpmerge_vf_nxv2f64(double %a, <vscale x 2 x double> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv2f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; CHECK-NEXT:    vfmv.v.f v10, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v10, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 2 x double> poison, double %a, i32 0
+  %va = shufflevector <vscale x 2 x double> %elt.head, <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+  %v = call <vscale x 2 x double> @llvm.vp.merge.nxv2f64(<vscale x 2 x i1> %m, <vscale x 2 x double> %va, <vscale x 2 x double> %vb, i32 %evl)
+  ret <vscale x 2 x double> %v
+}
+
+declare <vscale x 4 x double> @llvm.vp.merge.nxv4f64(<vscale x 4 x i1>, <vscale x 4 x double>, <vscale x 4 x double>, i32)
+
+define <vscale x 4 x double> @vpmerge_vv_nxv4f64(<vscale x 4 x double> %va, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v12, v12, v8, v0
+; CHECK-NEXT:    vmv4r.v v8, v12
+; CHECK-NEXT:    ret
+  %v = call <vscale x 4 x double> @llvm.vp.merge.nxv4f64(<vscale x 4 x i1> %m, <vscale x 4 x double> %va, <vscale x 4 x double> %vb, i32 %evl)
+  ret <vscale x 4 x double> %v
+}
+
+define <vscale x 4 x double> @vpmerge_vf_nxv4f64(double %a, <vscale x 4 x double> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv4f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; CHECK-NEXT:    vfmv.v.f v12, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m4, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v12, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 4 x double> poison, double %a, i32 0
+  %va = shufflevector <vscale x 4 x double> %elt.head, <vscale x 4 x double> poison, <vscale x 4 x i32> zeroinitializer
+  %v = call <vscale x 4 x double> @llvm.vp.merge.nxv4f64(<vscale x 4 x i1> %m, <vscale x 4 x double> %va, <vscale x 4 x double> %vb, i32 %evl)
+  ret <vscale x 4 x double> %v
+}
+
+declare <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1>, <vscale x 8 x double>, <vscale x 8 x double>, i32)
+
+define <vscale x 8 x double> @vpmerge_vv_nxv8f64(<vscale x 8 x double> %va, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v16, v16, v8, v0
+; CHECK-NEXT:    vmv8r.v v8, v16
+; CHECK-NEXT:    ret
+  %v = call <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1> %m, <vscale x 8 x double> %va, <vscale x 8 x double> %vb, i32 %evl)
+  ret <vscale x 8 x double> %v
+}
+
+define <vscale x 8 x double> @vpmerge_vf_nxv8f64(double %a, <vscale x 8 x double> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_nxv8f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; CHECK-NEXT:    vfmv.v.f v16, fa0
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
+; CHECK-NEXT:    vmerge.vvm v8, v8, v16, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 8 x double> poison, double %a, i32 0
+  %va = shufflevector <vscale x 8 x double> %elt.head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
+  %v = call <vscale x 8 x double> @llvm.vp.merge.nxv8f64(<vscale x 8 x i1> %m, <vscale x 8 x double> %va, <vscale x 8 x double> %vb, i32 %evl)
+  ret <vscale x 8 x double> %v
+}

From 3e6be0241b31265953d82bad0164d3e0c24cf9d7 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Mon, 24 Jan 2022 11:16:34 +0000
Subject: [PATCH 356/946] [lldb] Update release notes with non-address bit
 handling changes

This adds the "memory find" (https://reviews.llvm.org/D117299)
and "memory tag" (https://reviews.llvm.org/D117672) commands
and puts them all in one list.
---
 llvm/docs/ReleaseNotes.rst | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 5825a6e81eb34..15f7428a7fde2 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -163,9 +163,14 @@ Changes to LLDB
 * A change in Clang's type printing has changed the way LLDB names array types
   (from ``int [N]`` to ``int[N]``) - LLDB pretty printer type name matching
   code may need to be updated to handle this.
-* The ``memory read`` command now ignores non-address bits in start and end
-  addresses. In addition, non-address bits will not be shown in the addresses
-  in the output.
+* The following commands now ignore non-address bits (e.g. AArch64 pointer
+  signatures) in address arguments. In addition, non-address bits will not
+  be shown in the output of the commands.
+
+  * ``memory find``
+  * ``memory read``
+  * ``memory tag read``
+  * ``memory tag write``
 
 Changes to Sanitizers
 ---------------------

From 12a499eb00e36bb0944c6b1f7f8721fd90a5bd8f Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Date: Tue, 18 Jan 2022 19:48:35 +0100
Subject: [PATCH 357/946] Pre-commit test case for trunc+lshr+load folds

This is a pre-commit of test cases relevant for D117406.

@srl_load_narrowing1 is showing a pattern that could be folded into
a more narrow load.

@srl_load_narrowing2 is showing a similar pattern that happens to
be optimized already, but that happens in two steps (first triggering
a combine based on SRL and later another combine based on TRUNCATE).

Differential Revision: https://reviews.llvm.org/D117588
---
 llvm/test/CodeGen/X86/shift-folding.ll | 27 ++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/llvm/test/CodeGen/X86/shift-folding.ll b/llvm/test/CodeGen/X86/shift-folding.ll
index 539649b7cd476..cc03b4b9480a2 100644
--- a/llvm/test/CodeGen/X86/shift-folding.ll
+++ b/llvm/test/CodeGen/X86/shift-folding.ll
@@ -83,3 +83,30 @@ define i32 @overshift(i32 %a) {
   ret i32 %xor
 }
 
+; Should be possible to adjust the pointer and narrow the load to 16 bits.
+define i16 @srl_load_narrowing1(i32* %arg) {
+; CHECK-LABEL: srl_load_narrowing1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movl (%eax), %eax
+; CHECK-NEXT:    shrl $8, %eax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retl
+  %tmp1 = load i32, i32* %arg, align 1
+  %tmp2 = lshr i32 %tmp1, 8
+  %tmp3 = trunc i32 %tmp2 to i16
+  ret i16 %tmp3
+}
+
+define i16 @srl_load_narrowing2(i32* %arg) {
+; CHECK-LABEL: srl_load_narrowing2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT:    movzbl 3(%eax), %eax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    retl
+  %tmp1 = load i32, i32* %arg, align 1
+  %tmp2 = lshr i32 %tmp1, 24
+  %tmp3 = trunc i32 %tmp2 to i16
+  ret i16 %tmp3
+}

From 46cacdbb21c221a4304c489cb4a1abbc51967bb1 Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Date: Tue, 18 Jan 2022 19:50:50 +0100
Subject: [PATCH 358/946] [DAGCombiner] Adjust some checks in
 DAGCombiner::reduceLoadWidth

In code review for D117104 two slightly weird checks were found
in DAGCombiner::reduceLoadWidth. They were typically checking
if BitsA was a mulitple of BitsB by looking at (BitsA & (BitsB - 1)),
but such a comparison actually only make sense if BitsB is a power
of two.

The checks were related to the code that attempted to shrink a load
based on the fact that the loaded value would be right shifted.

Afaict the legality of the value types is checked later (typically in
isLegalNarrowLdSt), so the existing checks were both overly
conservative as well as being wrong whenever ExtVTBits wasn't a
power of two. The latter was a situation triggered by a number of
lit tests so we could not just assert on ExtVTBIts being a power of
two).

When attempting to simply remove the checks I found some problems,
that seems to have been guarded by the checks (maybe just out of
luck). A typical example would be a pattern like this:

  t1 = load i96* ptr
  t2 = srl t1, 64
  t3 = truncate t2 to i64

When DAGCombine is visiting the truncate reduceLoadWidth is called
attempting to narrow the load to 64 bits (ExtVT := MVT::i64). Then
the SRL is detected and we set ShAmt to 64.

In the past we've bailed out due to i96 not being a multiple of 64.
If we simply remove that check then we would end up replacing the
load with a new load that would read 64 bits but with a base pointer
adjusted by 64 bits. So we would read 32 bits the wasn't accessed by
the original load.
This patch will instead utilize the fact that the logical left shift
can be folded away by using a zextload. Thus, the pattern above will
now be combined into

  t3 = load i32* ptr+offset, zext to i64


Another case is shown in the X86/shift-folding.ll test case:

  t1 = load i32* ptr
  t2 = srl i32 t1, 8
  t3 = truncate t2 to i16

In the past we bailed out due to the shift count (8) not being a
multiple of 16. Now the narrowing kicks in and we get

  t3 = load i16* ptr+offset

Differential Revision: https://reviews.llvm.org/D117406
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 25 ++++++++++++-------
 llvm/test/CodeGen/ARM/shift-combine.ll        | 20 ++++-----------
 llvm/test/CodeGen/X86/shift-folding.ll        |  4 +--
 3 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 861beee6386bf..bf4409e77a916 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12214,7 +12214,8 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
     // accessing any of the loaded bytes.  If the load was a zextload/extload
     // then the result of the shift+trunc is zero/undef (handled elsewhere).
     ShAmt = SRL1C->getZExtValue();
-    if (ShAmt >= LN->getMemoryVT().getSizeInBits())
+    uint64_t MemoryWidth = LN->getMemoryVT().getSizeInBits();
+    if (ShAmt >= MemoryWidth)
       return SDValue();
 
     // Because a SRL must be assumed to *need* to zero-extend the high bits
@@ -12223,13 +12224,19 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
     if (LN->getExtensionType() == ISD::SEXTLOAD)
       return SDValue();
 
-    unsigned ExtVTBits = ExtVT.getScalarSizeInBits();
-    // Is the shift amount a multiple of size of ExtVT?
-    if ((ShAmt & (ExtVTBits - 1)) != 0)
-      return SDValue();
-    // Is the load width a multiple of size of ExtVT?
-    if ((SRL.getScalarValueSizeInBits() & (ExtVTBits - 1)) != 0)
-      return SDValue();
+    // Avoid reading outside the memory accessed by the original load (could
+    // happened if we only adjust the load base pointer by ShAmt). Instead we
+    // try to narrow the load even further. The typical scenario here is:
+    //   (i64 (truncate (i96 (srl (load x), 64)))) ->
+    //     (i64 (truncate (i96 (zextload (load i32 + offset) from i32))))
+    if (ExtVT.getScalarSizeInBits() > MemoryWidth - ShAmt) {
+      // Don't replace sextload by zextload.
+      if (ExtType == ISD::SEXTLOAD)
+        return SDValue();
+      // Narrow the load.
+      ExtType = ISD::ZEXTLOAD;
+      ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
+    }
 
     // If the SRL is only used by a masking AND, we may be able to adjust
     // the ExtVT to make the AND redundant.
@@ -12241,7 +12248,7 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
         EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
                                          ShiftMask.countTrailingOnes());
         // If the mask is smaller, recompute the type.
-        if ((ExtVTBits > MaskedVT.getScalarSizeInBits()) &&
+        if ((ExtVT.getScalarSizeInBits() > MaskedVT.getScalarSizeInBits()) &&
             TLI.isLoadExtLegal(ExtType, SRL.getValueType(), MaskedVT))
           ExtVT = MaskedVT;
       }
diff --git a/llvm/test/CodeGen/ARM/shift-combine.ll b/llvm/test/CodeGen/ARM/shift-combine.ll
index de1beb740bfbd..549d709f62237 100644
--- a/llvm/test/CodeGen/ARM/shift-combine.ll
+++ b/llvm/test/CodeGen/ARM/shift-combine.ll
@@ -302,9 +302,7 @@ define arm_aapcscc i32 @test_lshr_load64_4_unaligned(i64* %a) {
 ;
 ; CHECK-BE-LABEL: test_lshr_load64_4_unaligned:
 ; CHECK-BE:       @ %bb.0: @ %entry
-; CHECK-BE-NEXT:    ldr r1, [r0]
-; CHECK-BE-NEXT:    ldrh r0, [r0, #4]
-; CHECK-BE-NEXT:    orr r0, r0, r1, lsl #16
+; CHECK-BE-NEXT:    ldr r0, [r0, #2]
 ; CHECK-BE-NEXT:    bx lr
 ;
 ; CHECK-THUMB-LABEL: test_lshr_load64_4_unaligned:
@@ -341,9 +339,7 @@ define arm_aapcscc i32 @test_lshr_load64_1_lsb(i64* %a) {
 ;
 ; CHECK-BE-LABEL: test_lshr_load64_1_lsb:
 ; CHECK-BE:       @ %bb.0: @ %entry
-; CHECK-BE-NEXT:    ldr r1, [r0]
-; CHECK-BE-NEXT:    ldrb r0, [r0, #4]
-; CHECK-BE-NEXT:    orr r0, r0, r1, lsl #8
+; CHECK-BE-NEXT:    ldr r0, [r0, #1]
 ; CHECK-BE-NEXT:    bx lr
 ;
 ; CHECK-THUMB-LABEL: test_lshr_load64_1_lsb:
@@ -441,23 +437,17 @@ entry:
 define arm_aapcscc i32 @test_lshr_load4_fail(i64* %a) {
 ; CHECK-ARM-LABEL: test_lshr_load4_fail:
 ; CHECK-ARM:       @ %bb.0: @ %entry
-; CHECK-ARM-NEXT:    ldrd r0, r1, [r0]
-; CHECK-ARM-NEXT:    lsr r0, r0, #8
-; CHECK-ARM-NEXT:    orr r0, r0, r1, lsl #24
+; CHECK-ARM-NEXT:    ldr r0, [r0, #1]
 ; CHECK-ARM-NEXT:    bx lr
 ;
 ; CHECK-BE-LABEL: test_lshr_load4_fail:
 ; CHECK-BE:       @ %bb.0: @ %entry
-; CHECK-BE-NEXT:    ldrd r0, r1, [r0]
-; CHECK-BE-NEXT:    lsr r1, r1, #8
-; CHECK-BE-NEXT:    orr r0, r1, r0, lsl #24
+; CHECK-BE-NEXT:    ldr r0, [r0, #3]
 ; CHECK-BE-NEXT:    bx lr
 ;
 ; CHECK-THUMB-LABEL: test_lshr_load4_fail:
 ; CHECK-THUMB:       @ %bb.0: @ %entry
-; CHECK-THUMB-NEXT:    ldrd r0, r1, [r0]
-; CHECK-THUMB-NEXT:    lsrs r0, r0, #8
-; CHECK-THUMB-NEXT:    orr.w r0, r0, r1, lsl #24
+; CHECK-THUMB-NEXT:    ldr.w r0, [r0, #1]
 ; CHECK-THUMB-NEXT:    bx lr
 ;
 ; CHECK-ALIGN-LABEL: test_lshr_load4_fail:
diff --git a/llvm/test/CodeGen/X86/shift-folding.ll b/llvm/test/CodeGen/X86/shift-folding.ll
index cc03b4b9480a2..bb59cdf504103 100644
--- a/llvm/test/CodeGen/X86/shift-folding.ll
+++ b/llvm/test/CodeGen/X86/shift-folding.ll
@@ -88,9 +88,7 @@ define i16 @srl_load_narrowing1(i32* %arg) {
 ; CHECK-LABEL: srl_load_narrowing1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl (%eax), %eax
-; CHECK-NEXT:    shrl $8, %eax
-; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
+; CHECK-NEXT:    movzwl 1(%eax), %eax
 ; CHECK-NEXT:    retl
   %tmp1 = load i32, i32* %arg, align 1
   %tmp2 = lshr i32 %tmp1, 8

From e5147f82e1cba6791252d8f44c1a014cd9ea7927 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 24 Jan 2022 11:15:45 +0000
Subject: [PATCH 359/946] [X86] Remove __builtin_ia32_pabs intrinsics and use
 generic __builtin_elementwise_abs

D111986 added the generic `__builtin_elementwise_abs()` intrinsic with the same integer absolute behaviour as the SSE/AVX instructions (abs(INT_MIN) == INT_MIN)

This patch removes the `__builtin_ia32_pabs*` intrinsics and just uses `__builtin_elementwise_abs` - the existing tests see no changes:
```
__m256i test_mm256_abs_epi8(__m256i a) {
  // CHECK-LABEL: test_mm256_abs_epi8
  // CHECK: [[ABS:%.*]] = call <32 x i8> @llvm.abs.v32i8(<32 x i8> %{{.*}}, i1 false)
  return _mm256_abs_epi8(a);
}
```
This requires us to add a `__v64qs` explicitly signed char vector type (we already have `__v16qs` and `__v32qs`).

Differential Revision: https://reviews.llvm.org/D117791
---
 clang/include/clang/Basic/BuiltinsX86.def | 12 ------------
 clang/lib/CodeGen/CGBuiltin.cpp           | 15 ---------------
 clang/lib/Headers/avx2intrin.h            |  6 +++---
 clang/lib/Headers/avx512bwintrin.h        |  4 ++--
 clang/lib/Headers/avx512fintrin.h         |  8 ++++++--
 clang/lib/Headers/avx512vlintrin.h        |  4 ++--
 clang/lib/Headers/tmmintrin.h             |  6 +++---
 clang/test/CodeGen/builtins-x86.c         |  3 ---
 8 files changed, 16 insertions(+), 42 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index bc6208be45606..9b7c763b0c6c7 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -296,9 +296,6 @@ TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "ncV:128:", "ssse3")
 TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "ncV:128:", "ssse3")
 TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "ncV:128:", "ssse3")
 TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "ncV:128:", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "ncV:128:", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "ncV:128:", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "ncV:128:", "ssse3")
 
 TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse")
 TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh","xmmintrin.h", ALL_LANGUAGES, "sse")
@@ -558,9 +555,6 @@ TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx")
 
 // AVX2
 TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2")
@@ -927,8 +921,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:",
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f")
@@ -1045,8 +1037,6 @@ TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx5
 TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8OiV8OiIiUc", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw")
 
-TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
@@ -1198,8 +1188,6 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5
 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index a49c035002786..49f054ec1a982 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14285,21 +14285,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       return Builder.CreateCall(F, Ops[0]);
     }
   }
-  case X86::BI__builtin_ia32_pabsb128:
-  case X86::BI__builtin_ia32_pabsw128:
-  case X86::BI__builtin_ia32_pabsd128:
-  case X86::BI__builtin_ia32_pabsb256:
-  case X86::BI__builtin_ia32_pabsw256:
-  case X86::BI__builtin_ia32_pabsd256:
-  case X86::BI__builtin_ia32_pabsq128:
-  case X86::BI__builtin_ia32_pabsq256:
-  case X86::BI__builtin_ia32_pabsb512:
-  case X86::BI__builtin_ia32_pabsw512:
-  case X86::BI__builtin_ia32_pabsd512:
-  case X86::BI__builtin_ia32_pabsq512: {
-    Function *F = CGM.getIntrinsic(Intrinsic::abs, Ops[0]->getType());
-    return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
-  }
   case X86::BI__builtin_ia32_pmaxsb128:
   case X86::BI__builtin_ia32_pmaxsw128:
   case X86::BI__builtin_ia32_pmaxsd128:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 5064c87c2bb19..c9ad74ce3fa42 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -26,19 +26,19 @@
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi8(__m256i __a)
 {
-    return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);
+    return (__m256i)__builtin_elementwise_abs((__v32qs)__a);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi16(__m256i __a)
 {
-    return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);
+    return (__m256i)__builtin_elementwise_abs((__v16hi)__a);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi32(__m256i __a)
 {
-    return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);
+    return (__m256i)__builtin_elementwise_abs((__v8si)__a);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 6aee8aed84871..53319eb23011d 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -485,7 +485,7 @@ _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi8 (__m512i __A)
 {
-  return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A);
+  return (__m512i)__builtin_elementwise_abs((__v64qs)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -507,7 +507,7 @@ _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi16 (__m512i __A)
 {
-  return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A);
+  return (__m512i)__builtin_elementwise_abs((__v32hi)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index df298640523b7..9b02a7cffc64d 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -26,6 +26,10 @@ typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
 typedef unsigned long long __v8du __attribute__((__vector_size__(64)));
 typedef unsigned int __v16su __attribute__((__vector_size__(64)));
 
+/* We need an explicitly signed variant for char. Note that this shouldn't
+ * appear in the interface though. */
+typedef signed char __v64qs __attribute__((__vector_size__(64)));
+
 typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
 typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64)));
 typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64)));
@@ -1846,7 +1850,7 @@ _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi64(__m512i __A)
 {
-  return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
+  return (__m512i)__builtin_elementwise_abs((__v8di)__A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1868,7 +1872,7 @@ _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi32(__m512i __A)
 {
-  return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
+  return (__m512i)__builtin_elementwise_abs((__v16si) __A);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 0519dba59081a..eddb99902e3d5 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -2988,7 +2988,7 @@ _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_abs_epi64 (__m128i __A) {
-  return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
+  return (__m128i)__builtin_elementwise_abs((__v2di)__A);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3007,7 +3007,7 @@ _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi64 (__m256i __A) {
-  return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
+  return (__m256i)__builtin_elementwise_abs((__v4di)__A);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h
index bcffa8187801c..cb9be2349de5a 100644
--- a/clang/lib/Headers/tmmintrin.h
+++ b/clang/lib/Headers/tmmintrin.h
@@ -53,7 +53,7 @@ _mm_abs_pi8(__m64 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi8(__m128i __a)
 {
-    return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
+    return (__m128i)__builtin_elementwise_abs((__v16qs)__a);
 }
 
 /// Computes the absolute value of each of the packed 16-bit signed
@@ -89,7 +89,7 @@ _mm_abs_pi16(__m64 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi16(__m128i __a)
 {
-    return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
+    return (__m128i)__builtin_elementwise_abs((__v8hi)__a);
 }
 
 /// Computes the absolute value of each of the packed 32-bit signed
@@ -125,7 +125,7 @@ _mm_abs_pi32(__m64 __a)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_abs_epi32(__m128i __a)
 {
-    return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
+    return (__m128i)__builtin_elementwise_abs((__v4si)__a);
 }
 
 /// Concatenates the two 128-bit integer vector operands, and
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index 61b9d53c74f9d..bfcd30072fc1f 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -259,11 +259,8 @@ void f0() {
   tmp_V4s = __builtin_ia32_psignw(tmp_V4s, tmp_V4s);
   tmp_V4i = __builtin_ia32_psignd128(tmp_V4i, tmp_V4i);
   tmp_V2i = __builtin_ia32_psignd(tmp_V2i, tmp_V2i);
-  tmp_V16c = __builtin_ia32_pabsb128(tmp_V16c);
   tmp_V8c = __builtin_ia32_pabsb(tmp_V8c);
-  tmp_V8s = __builtin_ia32_pabsw128(tmp_V8s);
   tmp_V4s = __builtin_ia32_pabsw(tmp_V4s);
-  tmp_V4i = __builtin_ia32_pabsd128(tmp_V4i);
   tmp_V2i = __builtin_ia32_pabsd(tmp_V2i);
   tmp_V4s = __builtin_ia32_psllw(tmp_V4s, tmp_V1LLi);
   tmp_V2i = __builtin_ia32_pslld(tmp_V2i, tmp_V1LLi);

From b2499bf3e851c67ef623766b922de520de9235d5 Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Mon, 24 Jan 2022 20:18:40 +0900
Subject: [PATCH 360/946] [mlir][bufferize][NFC] Refactor createAlloc function
 signature

Pass a ValueRange instead of an ArrayRef<Value> for better compatibility. Also provide an additional function overload that automatically deallocates the buffer if specified.

Differential Revision: https://reviews.llvm.org/D118025
---
 .../IR/BufferizableOpInterface.h              | 16 +++++++++---
 .../IR/BufferizableOpInterface.cpp            | 26 +++++++++++++++++--
 .../Transforms/ComprehensiveBufferizePass.cpp |  2 +-
 3 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
index f679a22fa7a6c..bbac6e59aeeb2 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
@@ -36,8 +36,8 @@ class BufferizationState;
 
 /// Options for ComprehensiveBufferize.
 struct BufferizationOptions {
-  using AllocationFn = std::function<FailureOr<Value>(
-      OpBuilder &, Location, MemRefType, ArrayRef<Value>)>;
+  using AllocationFn = std::function<FailureOr<Value>(OpBuilder &, Location,
+                                                      MemRefType, ValueRange)>;
   using DeallocationFn =
       std::function<LogicalResult(OpBuilder &, Location, Value)>;
   using MemCpyFn =
@@ -298,15 +298,23 @@ UnrankedMemRefType getUnrankedMemRefType(Type elementType,
 MemRefType getDynamicMemRefType(RankedTensorType tensorType,
                                 unsigned addressSpace = 0);
 
-/// Creates a memref allocation.
+/// Creates a memref allocation with the given type and dynamic extents.
 FailureOr<Value> createAlloc(OpBuilder &b, Location loc, MemRefType type,
-                             ArrayRef<Value> dynShape,
+                             ValueRange dynShape,
+                             const BufferizationOptions &options);
+
+/// Creates a memref allocation with the given type and dynamic extents. If
+/// `createDealloc`, a deallocation op is inserted at the point where the
+/// allocation goes out of scope.
+FailureOr<Value> createAlloc(OpBuilder &b, Location loc, MemRefType type,
+                             ValueRange dynShape, bool deallocMemref,
                              const BufferizationOptions &options);
 
 /// Creates a memref allocation for the given shaped value. This function may
 /// perform additional optimizations such as buffer allocation hoisting. If
 /// `createDealloc`, a deallocation op is inserted at the point where the
 /// allocation goes out of scope.
+// TODO: Allocation hoisting should be a cleanup pass.
 FailureOr<Value> createAlloc(OpBuilder &b, Location loc, Value shapedValue,
                              bool deallocMemref,
                              const BufferizationOptions &options);
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index fb081d3d6c3cd..e565f41a39d5a 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -433,10 +433,10 @@ bufferization::createAlloc(OpBuilder &b, Location loc, Value shapedValue,
   return casted;
 }
 
-/// Create a memref allocation.
+/// Create a memref allocation with the given type and dynamic extents.
 FailureOr<Value>
 bufferization::createAlloc(OpBuilder &b, Location loc, MemRefType type,
-                           ArrayRef<Value> dynShape,
+                           ValueRange dynShape,
                            const BufferizationOptions &options) {
   if (options.allocationFn)
     return (*options.allocationFn)(b, loc, type, dynShape);
@@ -447,6 +447,28 @@ bufferization::createAlloc(OpBuilder &b, Location loc, MemRefType type,
   return allocated;
 }
 
+/// Create a memref allocation with the given type and dynamic extents. May also
+/// deallocate the memref again.
+FailureOr<Value>
+bufferization::createAlloc(OpBuilder &b, Location loc, MemRefType type,
+                           ValueRange dynShape, bool deallocMemref,
+                           const BufferizationOptions &options) {
+  OpBuilder::InsertionGuard g(b);
+
+  FailureOr<Value> alloc = createAlloc(b, loc, type, dynShape, options);
+  if (failed(alloc))
+    return failure();
+
+  if (deallocMemref) {
+    // Dealloc at the end of the block.
+    b.setInsertionPoint(alloc.getValue().getParentBlock()->getTerminator());
+    if (failed(createDealloc(b, loc, *alloc, options)))
+      return failure();
+  }
+
+  return alloc;
+}
+
 /// Create a memref deallocation.
 LogicalResult
 bufferization::createDealloc(OpBuilder &b, Location loc, Value allocatedBuffer,
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
index 3c8b9c9606952..9409492e12dba 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -73,7 +73,7 @@ static void applyEnablingTransformations(ModuleOp moduleOp) {
 
 static FailureOr<Value> allocationFnUsingAlloca(OpBuilder &b, Location loc,
                                                 MemRefType type,
-                                                ArrayRef<Value> dynShape) {
+                                                ValueRange dynShape) {
   Value allocated = b.create<memref::AllocaOp>(
       loc, type, dynShape, b.getI64IntegerAttr(kBufferAlignments));
   return allocated;

From 3e50593b18840ab4508a25d0f761afb65535a38d Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 24 Jan 2022 11:40:16 +0000
Subject: [PATCH 361/946] [X86] Remove `__builtin_ia32_pmax/min` intrinsics and
 use generic `__builtin_elementwise_max/min`

D111985 added the generic `__builtin_elementwise_max` and `__builtin_elementwise_min` intrinsics with the same integer behaviour as the SSE/AVX instructions

This patch removes the `__builtin_ia32_pmax/min` intrinsics and just uses `__builtin_elementwise_max/min` - the existing tests see no changes:
```
__m256i test_mm256_max_epu32(__m256i a, __m256i b) {
  // CHECK-LABEL: test_mm256_max_epu32
  // CHECK: call <8 x i32> @llvm.umax.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
  return _mm256_max_epu32(a, b);
}
```
This requires us to add a `__v64qs` explicitly signed char vector type (we already have `__v16qs` and `__v32qs`).

Sibling patch to D117791

Differential Revision: https://reviews.llvm.org/D117798
---
 clang/include/clang/Basic/BuiltinsX86.def | 48 ---------------------
 clang/lib/CodeGen/CGBuiltin.cpp           | 52 -----------------------
 clang/lib/Headers/avx2intrin.h            | 24 +++++------
 clang/lib/Headers/avx512bwintrin.h        | 16 +++----
 clang/lib/Headers/avx512fintrin.h         | 16 +++----
 clang/lib/Headers/avx512vlintrin.h        | 16 +++----
 clang/lib/Headers/emmintrin.h             |  8 ++--
 clang/lib/Headers/smmintrin.h             | 16 +++----
 clang/test/CodeGen/builtins-x86.c         | 12 ------
 9 files changed, 48 insertions(+), 160 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 9b7c763b0c6c7..a8f5567248624 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -265,10 +265,6 @@ TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "ncV:128:", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "ncV:128:", "sse2")
@@ -377,14 +373,6 @@ TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "ncV:128:", "sse4.1")
 
-TARGET_BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "ncV:128:", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2OiV4iV4i", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "ncV:128:", "sse4.1")
@@ -580,18 +568,6 @@ TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "ncV:256:", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4OiV8iV8i", "ncV:256:", "avx2")
 TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "ncV:256:", "avx2")
@@ -921,14 +897,6 @@ TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:",
 TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminud512, "V16iV16iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8OiV8OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8OiV16iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8OiV16iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "nV:512:", "avx512f")
@@ -1047,14 +1015,6 @@ TARGET_BUILTIN(__builtin_ia32_paddusb512, "V64cV64cV64c", "ncV:512:", "avx512bw"
 TARGET_BUILTIN(__builtin_ia32_paddusw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pavgb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pavgw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaxuw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pminsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pminsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "ncV:512:", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psubsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_psubsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
@@ -1188,14 +1148,6 @@ TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx5
 TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2OiV2OiV2Oi", "ncV:128:", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4OiV4OiV4Oi", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl")
 TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 49f054ec1a982..4c68b20067b99 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14285,58 +14285,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
       return Builder.CreateCall(F, Ops[0]);
     }
   }
-  case X86::BI__builtin_ia32_pmaxsb128:
-  case X86::BI__builtin_ia32_pmaxsw128:
-  case X86::BI__builtin_ia32_pmaxsd128:
-  case X86::BI__builtin_ia32_pmaxsq128:
-  case X86::BI__builtin_ia32_pmaxsb256:
-  case X86::BI__builtin_ia32_pmaxsw256:
-  case X86::BI__builtin_ia32_pmaxsd256:
-  case X86::BI__builtin_ia32_pmaxsq256:
-  case X86::BI__builtin_ia32_pmaxsb512:
-  case X86::BI__builtin_ia32_pmaxsw512:
-  case X86::BI__builtin_ia32_pmaxsd512:
-  case X86::BI__builtin_ia32_pmaxsq512:
-    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smax);
-  case X86::BI__builtin_ia32_pmaxub128:
-  case X86::BI__builtin_ia32_pmaxuw128:
-  case X86::BI__builtin_ia32_pmaxud128:
-  case X86::BI__builtin_ia32_pmaxuq128:
-  case X86::BI__builtin_ia32_pmaxub256:
-  case X86::BI__builtin_ia32_pmaxuw256:
-  case X86::BI__builtin_ia32_pmaxud256:
-  case X86::BI__builtin_ia32_pmaxuq256:
-  case X86::BI__builtin_ia32_pmaxub512:
-  case X86::BI__builtin_ia32_pmaxuw512:
-  case X86::BI__builtin_ia32_pmaxud512:
-  case X86::BI__builtin_ia32_pmaxuq512:
-    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umax);
-  case X86::BI__builtin_ia32_pminsb128:
-  case X86::BI__builtin_ia32_pminsw128:
-  case X86::BI__builtin_ia32_pminsd128:
-  case X86::BI__builtin_ia32_pminsq128:
-  case X86::BI__builtin_ia32_pminsb256:
-  case X86::BI__builtin_ia32_pminsw256:
-  case X86::BI__builtin_ia32_pminsd256:
-  case X86::BI__builtin_ia32_pminsq256:
-  case X86::BI__builtin_ia32_pminsb512:
-  case X86::BI__builtin_ia32_pminsw512:
-  case X86::BI__builtin_ia32_pminsd512:
-  case X86::BI__builtin_ia32_pminsq512:
-    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::smin);
-  case X86::BI__builtin_ia32_pminub128:
-  case X86::BI__builtin_ia32_pminuw128:
-  case X86::BI__builtin_ia32_pminud128:
-  case X86::BI__builtin_ia32_pminuq128:
-  case X86::BI__builtin_ia32_pminub256:
-  case X86::BI__builtin_ia32_pminuw256:
-  case X86::BI__builtin_ia32_pminud256:
-  case X86::BI__builtin_ia32_pminuq256:
-  case X86::BI__builtin_ia32_pminub512:
-  case X86::BI__builtin_ia32_pminuw512:
-  case X86::BI__builtin_ia32_pminud512:
-  case X86::BI__builtin_ia32_pminuq512:
-    return EmitX86BinaryIntrinsic(*this, Ops, Intrinsic::umin);
 
   case X86::BI__builtin_ia32_pmuludq128:
   case X86::BI__builtin_ia32_pmuludq256:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index c9ad74ce3fa42..e33514a60ff3e 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -253,73 +253,73 @@ _mm256_madd_epi16(__m256i __a, __m256i __b)
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);
+  return (__m256i)__builtin_elementwise_max((__v32qs)__a, (__v32qs)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);
+  return (__m256i)__builtin_elementwise_max((__v16hi)__a, (__v16hi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi32(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);
+  return (__m256i)__builtin_elementwise_max((__v8si)__a, (__v8si)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);
+  return (__m256i)__builtin_elementwise_max((__v32qu)__a, (__v32qu)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);
+  return (__m256i)__builtin_elementwise_max((__v16hu)__a, (__v16hu)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu32(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);
+  return (__m256i)__builtin_elementwise_max((__v8su)__a, (__v8su)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);
+  return (__m256i)__builtin_elementwise_min((__v32qs)__a, (__v32qs)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);
+  return (__m256i)__builtin_elementwise_min((__v16hi)__a, (__v16hi)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi32(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);
+  return (__m256i)__builtin_elementwise_min((__v8si)__a, (__v8si)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu8(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);
+  return (__m256i)__builtin_elementwise_min((__v32qu)__a, (__v32qu)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu16(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);
+  return (__m256i)__builtin_elementwise_min((__v16hu)__a, (__v16hu)__b);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu32(__m256i __a, __m256i __b)
 {
-  return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);
+  return (__m256i)__builtin_elementwise_min((__v8su)__a, (__v8su)__b);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 53319eb23011d..522ef100bab1a 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -751,7 +751,7 @@ _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epi8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B);
+  return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -773,7 +773,7 @@ _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epi16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B);
+  return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -796,7 +796,7 @@ _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B);
+  return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -818,7 +818,7 @@ _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B);
+  return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -840,7 +840,7 @@ _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epi8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B);
+  return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -862,7 +862,7 @@ _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epi16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B);
+  return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -884,7 +884,7 @@ _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu8 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B);
+  return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -906,7 +906,7 @@ _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu16 (__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B);
+  return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 9b02a7cffc64d..8695aeb94de24 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1090,7 +1090,7 @@ static __inline __m512i
 __DEFAULT_FN_ATTRS512
 _mm512_max_epi32(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_elementwise_max((__v16si)__A, (__v16si)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1112,7 +1112,7 @@ _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu32(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_elementwise_max((__v16su)__A, (__v16su)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1134,7 +1134,7 @@ _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epi64(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_elementwise_max((__v8di)__A, (__v8di)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1156,7 +1156,7 @@ _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu64(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_elementwise_max((__v8du)__A, (__v8du)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1325,7 +1325,7 @@ static __inline __m512i
 __DEFAULT_FN_ATTRS512
 _mm512_min_epi32(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_elementwise_min((__v16si)__A, (__v16si)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1347,7 +1347,7 @@ _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu32(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
+  return (__m512i)__builtin_elementwise_min((__v16su)__A, (__v16su)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1369,7 +1369,7 @@ _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epi64(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_elementwise_min((__v8di)__A, (__v8di)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
@@ -1391,7 +1391,7 @@ _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
 static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu64(__m512i __A, __m512i __B)
 {
-  return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
+  return (__m512i)__builtin_elementwise_min((__v8du)__A, (__v8du)__B);
 }
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS512
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index eddb99902e3d5..178c9dbc0e6ea 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -3054,7 +3054,7 @@ _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_max_epi64 (__m128i __A, __m128i __B) {
-  return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3073,7 +3073,7 @@ _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi64 (__m256i __A, __m256i __B) {
-  return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -3120,7 +3120,7 @@ _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_max_epu64 (__m128i __A, __m128i __B) {
-  return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3139,7 +3139,7 @@ _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu64 (__m256i __A, __m256i __B) {
-  return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -3186,7 +3186,7 @@ _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_min_epi64 (__m128i __A, __m128i __B) {
-  return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3205,7 +3205,7 @@ _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi64 (__m256i __A, __m256i __B) {
-  return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
@@ -3252,7 +3252,7 @@ _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_min_epu64 (__m128i __A, __m128i __B) {
-  return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
+  return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -3271,7 +3271,7 @@ _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu64 (__m256i __A, __m256i __B) {
-  return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
+  return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS256
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 6e9c3032c21f7..4618b808efc48 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -2375,7 +2375,7 @@ _mm_madd_epi16(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_max_epi16(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b);
+  return (__m128i)__builtin_elementwise_max((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Compares corresponding elements of two 128-bit unsigned [16 x i8]
@@ -2395,7 +2395,7 @@ _mm_max_epi16(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_max_epu8(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b);
+  return (__m128i)__builtin_elementwise_max((__v16qu)__a, (__v16qu)__b);
 }
 
 /// Compares corresponding elements of two 128-bit signed [8 x i16]
@@ -2415,7 +2415,7 @@ _mm_max_epu8(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_min_epi16(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b);
+  return (__m128i)__builtin_elementwise_min((__v8hi)__a, (__v8hi)__b);
 }
 
 /// Compares corresponding elements of two 128-bit unsigned [16 x i8]
@@ -2435,7 +2435,7 @@ _mm_min_epi16(__m128i __a, __m128i __b)
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_min_epu8(__m128i __a, __m128i __b)
 {
-  return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b);
+  return (__m128i)__builtin_elementwise_min((__v16qu)__a, (__v16qu)__b);
 }
 
 /// Multiplies the corresponding elements of two signed [8 x i16]
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 710e55aaa1203..0df59c5fcc592 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -668,7 +668,7 @@ _mm_stream_load_si128 (__m128i const *__V)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epi8 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);
+  return (__m128i) __builtin_elementwise_min((__v16qs) __V1, (__v16qs) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -687,7 +687,7 @@ _mm_min_epi8 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epi8 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);
+  return (__m128i) __builtin_elementwise_max((__v16qs) __V1, (__v16qs) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -706,7 +706,7 @@ _mm_max_epi8 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epu16 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);
+  return (__m128i) __builtin_elementwise_min((__v8hu) __V1, (__v8hu) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -725,7 +725,7 @@ _mm_min_epu16 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epu16 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);
+  return (__m128i) __builtin_elementwise_max((__v8hu) __V1, (__v8hu) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -744,7 +744,7 @@ _mm_max_epu16 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epi32 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);
+  return (__m128i) __builtin_elementwise_min((__v4si) __V1, (__v4si) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -763,7 +763,7 @@ _mm_min_epi32 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epi32 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);
+  return (__m128i) __builtin_elementwise_max((__v4si) __V1, (__v4si) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -782,7 +782,7 @@ _mm_max_epi32 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_min_epu32 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);
+  return (__m128i) __builtin_elementwise_min((__v4su) __V1, (__v4su) __V2);
 }
 
 /// Compares the corresponding elements of two 128-bit vectors of
@@ -801,7 +801,7 @@ _mm_min_epu32 (__m128i __V1, __m128i __V2)
 static __inline__  __m128i __DEFAULT_FN_ATTRS
 _mm_max_epu32 (__m128i __V1, __m128i __V2)
 {
-  return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);
+  return (__m128i) __builtin_elementwise_max((__v4su) __V1, (__v4su) __V2);
 }
 
 /* SSE4 Insertion and Extraction from XMM Register Instructions.  */
diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c
index bfcd30072fc1f..9eb5f2f5d149e 100644
--- a/clang/test/CodeGen/builtins-x86.c
+++ b/clang/test/CodeGen/builtins-x86.c
@@ -221,10 +221,6 @@ void f0() {
   tmp_V16c = __builtin_ia32_psubusb128(tmp_V16c, tmp_V16c);
   tmp_V8s = __builtin_ia32_psubusw128(tmp_V8s, tmp_V8s);
   tmp_V8s = __builtin_ia32_pmulhw128(tmp_V8s, tmp_V8s);
-  tmp_V16c = __builtin_ia32_pmaxub128(tmp_V16c, tmp_V16c);
-  tmp_V8s = __builtin_ia32_pmaxsw128(tmp_V8s, tmp_V8s);
-  tmp_V16c = __builtin_ia32_pminub128(tmp_V16c, tmp_V16c);
-  tmp_V8s = __builtin_ia32_pminsw128(tmp_V8s, tmp_V8s);
   tmp_V16c = __builtin_ia32_packsswb128(tmp_V8s, tmp_V8s);
   tmp_V8s = __builtin_ia32_packssdw128(tmp_V4i, tmp_V4i);
   tmp_V16c = __builtin_ia32_packuswb128(tmp_V8s, tmp_V8s);
@@ -455,14 +451,6 @@ void f0() {
   tmp_V2d = __builtin_ia32_blendvpd(tmp_V2d, tmp_V2d, tmp_V2d);
   tmp_V4f = __builtin_ia32_blendvps(tmp_V4f, tmp_V4f, tmp_V4f);
   tmp_V8s = __builtin_ia32_packusdw128(tmp_V4i, tmp_V4i);
-  tmp_V16c = __builtin_ia32_pmaxsb128(tmp_V16c, tmp_V16c);
-  tmp_V4i = __builtin_ia32_pmaxsd128(tmp_V4i, tmp_V4i);
-  tmp_V4i = __builtin_ia32_pmaxud128(tmp_V4i, tmp_V4i);
-  tmp_V8s = __builtin_ia32_pmaxuw128(tmp_V8s, tmp_V8s);
-  tmp_V16c = __builtin_ia32_pminsb128(tmp_V16c, tmp_V16c);
-  tmp_V4i = __builtin_ia32_pminsd128(tmp_V4i, tmp_V4i);
-  tmp_V4i = __builtin_ia32_pminud128(tmp_V4i, tmp_V4i);
-  tmp_V8s = __builtin_ia32_pminuw128(tmp_V8s, tmp_V8s);
   tmp_V2LLi = __builtin_ia32_pmuldq128(tmp_V4i, tmp_V4i);
   tmp_V4f = __builtin_ia32_roundps(tmp_V4f, imm_i_0_16);
   tmp_V4f = __builtin_ia32_roundss(tmp_V4f, tmp_V4f, imm_i_0_16);

From 3696c70e67d9b9e54307ef25077bae7a6f76636e Mon Sep 17 00:00:00 2001
From: Adrian Vogelsgesang <avogelsgesang@tableau.com>
Date: Wed, 27 Oct 2021 11:49:00 -0700
Subject: [PATCH 362/946] [clang-tidy] Add `readability-container-contains`
 check

This commit introduces a new check `readability-container-contains` which finds
usages of `container.count()` and `container.find() != container.end()` and
instead recommends the `container.contains()` method introduced in C++20.

For containers which permit multiple entries per key (`multimap`, `multiset`,
...), `contains` is more efficient than `count` because `count` has to do
unnecessary additional work.

While this this performance difference does not exist for containers with only
a single entry per key (`map`, `unordered_map`, ...), `contains` still conveys
the intent better.

Reviewed By: xazax.hun, whisperity

Differential Revision: http://reviews.llvm.org/D112646
---
 .../clang-tidy/readability/CMakeLists.txt     |   1 +
 .../readability/ContainerContainsCheck.cpp    | 144 +++++++++++
 .../readability/ContainerContainsCheck.h      |  40 +++
 .../readability/ReadabilityTidyModule.cpp     |   3 +
 clang-tools-extra/docs/ReleaseNotes.rst       |   6 +
 .../docs/clang-tidy/checks/list.rst           |   1 +
 .../checks/readability-container-contains.rst |  25 ++
 .../readability-container-contains.cpp        | 230 ++++++++++++++++++
 8 files changed, 450 insertions(+)
 create mode 100644 clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp
 create mode 100644 clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h
 create mode 100644 clang-tools-extra/docs/clang-tidy/checks/readability-container-contains.rst
 create mode 100644 clang-tools-extra/test/clang-tidy/checkers/readability-container-contains.cpp

diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
index 22ce8f62751ec..ea09b2193eb7c 100644
--- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt
@@ -7,6 +7,7 @@ add_clang_library(clangTidyReadabilityModule
   AvoidConstParamsInDecls.cpp
   BracesAroundStatementsCheck.cpp
   ConstReturnTypeCheck.cpp
+  ContainerContainsCheck.cpp
   ContainerDataPointerCheck.cpp
   ContainerSizeEmptyCheck.cpp
   ConvertMemberFunctionsToStatic.cpp
diff --git a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp
new file mode 100644
index 0000000000000..7a20480fb501c
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp
@@ -0,0 +1,144 @@
+//===--- ContainerContainsCheck.cpp - clang-tidy --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ContainerContainsCheck.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+
+using namespace clang::ast_matchers;
+
+namespace clang {
+namespace tidy {
+namespace readability {
+
+void ContainerContainsCheck::registerMatchers(MatchFinder *Finder) {
+  const auto SupportedContainers = hasType(
+      hasUnqualifiedDesugaredType(recordType(hasDeclaration(cxxRecordDecl(
+          hasAnyName("::std::set", "::std::unordered_set", "::std::map",
+                     "::std::unordered_map", "::std::multiset",
+                     "::std::unordered_multiset", "::std::multimap",
+                     "::std::unordered_multimap"))))));
+
+  const auto CountCall =
+      cxxMemberCallExpr(on(SupportedContainers),
+                        callee(cxxMethodDecl(hasName("count"))),
+                        argumentCountIs(1))
+          .bind("call");
+
+  const auto FindCall =
+      cxxMemberCallExpr(on(SupportedContainers),
+                        callee(cxxMethodDecl(hasName("find"))),
+                        argumentCountIs(1))
+          .bind("call");
+
+  const auto EndCall = cxxMemberCallExpr(on(SupportedContainers),
+                                         callee(cxxMethodDecl(hasName("end"))),
+                                         argumentCountIs(0));
+
+  const auto Literal0 = integerLiteral(equals(0));
+  const auto Literal1 = integerLiteral(equals(1));
+
+  auto AddSimpleMatcher = [&](auto Matcher) {
+    Finder->addMatcher(
+        traverse(TK_IgnoreUnlessSpelledInSource, std::move(Matcher)), this);
+  };
+
+  // Find membership tests which use `count()`.
+  Finder->addMatcher(implicitCastExpr(hasImplicitDestinationType(booleanType()),
+                                      hasSourceExpression(CountCall))
+                         .bind("positiveComparison"),
+                     this);
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(CountCall), hasOperatorName("!="), hasRHS(Literal0))
+          .bind("positiveComparison"));
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(Literal0), hasOperatorName("!="), hasRHS(CountCall))
+          .bind("positiveComparison"));
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(CountCall), hasOperatorName(">"), hasRHS(Literal0))
+          .bind("positiveComparison"));
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(Literal0), hasOperatorName("<"), hasRHS(CountCall))
+          .bind("positiveComparison"));
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(CountCall), hasOperatorName(">="), hasRHS(Literal1))
+          .bind("positiveComparison"));
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(Literal1), hasOperatorName("<="), hasRHS(CountCall))
+          .bind("positiveComparison"));
+
+  // Find inverted membership tests which use `count()`.
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(CountCall), hasOperatorName("=="), hasRHS(Literal0))
+          .bind("negativeComparison"));
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(Literal0), hasOperatorName("=="), hasRHS(CountCall))
+          .bind("negativeComparison"));
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(CountCall), hasOperatorName("<="), hasRHS(Literal0))
+          .bind("negativeComparison"));
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(Literal0), hasOperatorName(">="), hasRHS(CountCall))
+          .bind("negativeComparison"));
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(CountCall), hasOperatorName("<"), hasRHS(Literal1))
+          .bind("negativeComparison"));
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(Literal1), hasOperatorName(">"), hasRHS(CountCall))
+          .bind("negativeComparison"));
+
+  // Find membership tests based on `find() == end()`.
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(FindCall), hasOperatorName("!="), hasRHS(EndCall))
+          .bind("positiveComparison"));
+  AddSimpleMatcher(
+      binaryOperator(hasLHS(FindCall), hasOperatorName("=="), hasRHS(EndCall))
+          .bind("negativeComparison"));
+}
+
+void ContainerContainsCheck::check(const MatchFinder::MatchResult &Result) {
+  // Extract the information about the match
+  const auto *Call = Result.Nodes.getNodeAs<CXXMemberCallExpr>("call");
+  const auto *PositiveComparison =
+      Result.Nodes.getNodeAs<Expr>("positiveComparison");
+  const auto *NegativeComparison =
+      Result.Nodes.getNodeAs<Expr>("negativeComparison");
+  assert(
+      !PositiveComparison ||
+      !NegativeComparison &&
+          "only one of PositiveComparison or NegativeComparison should be set");
+  bool Negated = NegativeComparison != nullptr;
+  const auto *Comparison = Negated ? NegativeComparison : PositiveComparison;
+
+  // Diagnose the issue.
+  auto Diag =
+      diag(Call->getExprLoc(), "use 'contains' to check for membership");
+
+  // Don't fix it if it's in a macro invocation. Leave fixing it to the user.
+  SourceLocation FuncCallLoc = Comparison->getEndLoc();
+  if (!FuncCallLoc.isValid() || FuncCallLoc.isMacroID())
+    return;
+
+  // Create the fix it.
+  const auto *Member = cast<MemberExpr>(Call->getCallee());
+  Diag << FixItHint::CreateReplacement(
+      Member->getMemberNameInfo().getSourceRange(), "contains");
+  SourceLocation ComparisonBegin = Comparison->getSourceRange().getBegin();
+  SourceLocation ComparisonEnd = Comparison->getSourceRange().getEnd();
+  SourceLocation CallBegin = Call->getSourceRange().getBegin();
+  SourceLocation CallEnd = Call->getSourceRange().getEnd();
+  Diag << FixItHint::CreateReplacement(
+      CharSourceRange::getCharRange(ComparisonBegin, CallBegin),
+      Negated ? "!" : "");
+  Diag << FixItHint::CreateRemoval(CharSourceRange::getCharRange(
+      CallEnd.getLocWithOffset(1), ComparisonEnd.getLocWithOffset(1)));
+}
+
+} // namespace readability
+} // namespace tidy
+} // namespace clang
diff --git a/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h
new file mode 100644
index 0000000000000..0c2705d437797
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h
@@ -0,0 +1,40 @@
+//===--- ContainerContainsCheck.h - clang-tidy ------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONTAINERCONTAINSCHECK_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONTAINERCONTAINSCHECK_H
+
+#include "../ClangTidyCheck.h"
+
+namespace clang {
+namespace tidy {
+namespace readability {
+
+/// Finds usages of `container.count()` and `find() == end()` which should be
+/// replaced by a call to the `container.contains()` method introduced in C++20.
+///
+/// For the user-facing documentation see:
+/// http://clang.llvm.org/extra/clang-tidy/checks/readability-container-contains.html
+class ContainerContainsCheck : public ClangTidyCheck {
+public:
+  ContainerContainsCheck(StringRef Name, ClangTidyContext *Context)
+      : ClangTidyCheck(Name, Context) {}
+  void registerMatchers(ast_matchers::MatchFinder *Finder) final;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) final;
+
+protected:
+  bool isLanguageVersionSupported(const LangOptions &LO) const final {
+    return LO.CPlusPlus20;
+  }
+};
+
+} // namespace readability
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_CONTAINERCONTAINSCHECK_H
diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
index b0493d43ff318..6bbef6b7fa07c 100644
--- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
+++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp
@@ -12,6 +12,7 @@
 #include "AvoidConstParamsInDecls.h"
 #include "BracesAroundStatementsCheck.h"
 #include "ConstReturnTypeCheck.h"
+#include "ContainerContainsCheck.h"
 #include "ContainerDataPointerCheck.h"
 #include "ContainerSizeEmptyCheck.h"
 #include "ConvertMemberFunctionsToStatic.h"
@@ -64,6 +65,8 @@ class ReadabilityModule : public ClangTidyModule {
         "readability-braces-around-statements");
     CheckFactories.registerCheck<ConstReturnTypeCheck>(
         "readability-const-return-type");
+    CheckFactories.registerCheck<ContainerContainsCheck>(
+        "readability-container-contains");
     CheckFactories.registerCheck<ContainerDataPointerCheck>(
         "readability-container-data-pointer");
     CheckFactories.registerCheck<ContainerSizeEmptyCheck>(
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index 060af42521552..ba0e530b7fec4 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -118,6 +118,12 @@ New checks
 
   Reports identifier with unicode right-to-left characters.
 
+- New :doc:`readability-container-contains
+  <clang-tidy/checks/readability-container-contains>` check.
+
+  Finds usages of ``container.count()`` and ``container.find() == container.end()`` which should
+  be replaced by a call to the ``container.contains()`` method introduced in C++20.
+
 - New :doc:`readability-container-data-pointer
   <clang-tidy/checks/readability-container-data-pointer>` check.
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst
index 5878345bdfcfd..fcf661a406959 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/list.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst
@@ -290,6 +290,7 @@ Clang-Tidy Checks
    `readability-avoid-const-params-in-decls <readability-avoid-const-params-in-decls.html>`_, "Yes"
    `readability-braces-around-statements <readability-braces-around-statements.html>`_, "Yes"
    `readability-const-return-type <readability-const-return-type.html>`_, "Yes"
+   `readability-container-contains <readability-container-contains.html>`_, "Yes"
    `readability-container-data-pointer <readability-container-data-pointer.html>`_, "Yes"
    `readability-container-size-empty <readability-container-size-empty.html>`_, "Yes"
    `readability-convert-member-functions-to-static <readability-convert-member-functions-to-static.html>`_, "Yes"
diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-container-contains.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-container-contains.rst
new file mode 100644
index 0000000000000..07d1e352d3b1b
--- /dev/null
+++ b/clang-tools-extra/docs/clang-tidy/checks/readability-container-contains.rst
@@ -0,0 +1,25 @@
+.. title:: clang-tidy - readability-container-contains
+
+readability-container-contains
+==============================
+
+Finds usages of ``container.count()`` and ``container.find() == container.end()`` which should be replaced by a call to the ``container.contains()`` method introduced in C++ 20.
+
+Whether an element is contained inside a container should be checked with ``contains`` instead of ``count``/``find`` because ``contains`` conveys the intent more clearly. Furthermore, for containers which permit multiple entries per key (``multimap``, ``multiset``, ...), ``contains`` is more efficient than ``count`` because ``count`` has to do unnecessary additional work.
+
+Examples:
+
+===========================================  ==============================
+Initial expression                           Result
+-------------------------------------------  ------------------------------
+``myMap.find(x) == myMap.end()``             ``!myMap.contains(x)``
+``myMap.find(x) != myMap.end()``             ``myMap.contains(x)``
+``if (myMap.count(x))``                      ``if (myMap.contains(x))``
+``bool exists = myMap.count(x)``             ``bool exists = myMap.contains(x)``
+``bool exists = myMap.count(x) > 0``         ``bool exists = myMap.contains(x)``
+``bool exists = myMap.count(x) >= 1``        ``bool exists = myMap.contains(x)``
+``bool missing = myMap.count(x) == 0``       ``bool missing = !myMap.contains(x)``
+===========================================  ==============================
+
+This check applies to ``std::set``, ``std::unordered_set``, ``std::map``, ``std::unordered_map`` and the corresponding multi-key variants.
+It is only active for C++20 and later, as the ``contains`` method was only added in C++20.
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-container-contains.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-container-contains.cpp
new file mode 100644
index 0000000000000..c4ea1e27e63e6
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability-container-contains.cpp
@@ -0,0 +1,230 @@
+// RUN: %check_clang_tidy -std=c++20 %s readability-container-contains %t
+
+// Some *very* simplified versions of `map` etc.
+namespace std {
+
+template <class Key, class T>
+struct map {
+  unsigned count(const Key &K) const;
+  bool contains(const Key &K) const;
+  void *find(const Key &K);
+  void *end();
+};
+
+template <class Key>
+struct set {
+  unsigned count(const Key &K) const;
+  bool contains(const Key &K) const;
+};
+
+template <class Key>
+struct unordered_set {
+  unsigned count(const Key &K) const;
+  bool contains(const Key &K) const;
+};
+
+template <class Key, class T>
+struct multimap {
+  unsigned count(const Key &K) const;
+  bool contains(const Key &K) const;
+};
+
+} // namespace std
+
+// Check that we detect various common ways to check for membership
+int testDifferentCheckTypes(std::map<int, int> &MyMap) {
+  if (MyMap.count(0))
+    // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: use 'contains' to check for membership [readability-container-contains]
+    // CHECK-FIXES: if (MyMap.contains(0))
+    return 1;
+  bool C1 = MyMap.count(1);
+  // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: bool C1 = MyMap.contains(1);
+  auto C2 = static_cast<bool>(MyMap.count(1));
+  // CHECK-MESSAGES: :[[@LINE-1]]:37: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: auto C2 = static_cast<bool>(MyMap.contains(1));
+  auto C3 = MyMap.count(2) != 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: auto C3 = MyMap.contains(2);
+  auto C4 = MyMap.count(3) > 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: auto C4 = MyMap.contains(3);
+  auto C5 = MyMap.count(4) >= 1;
+  // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: auto C5 = MyMap.contains(4);
+  auto C6 = MyMap.find(5) != MyMap.end();
+  // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: auto C6 = MyMap.contains(5);
+  return C1 + C2 + C3 + C4 + C5 + C6;
+}
+
+// Check that we detect various common ways to check for non-membership
+int testNegativeChecks(std::map<int, int> &MyMap) {
+  bool C1 = !MyMap.count(-1);
+  // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: bool C1 = !MyMap.contains(-1);
+  auto C2 = MyMap.count(-2) == 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: auto C2 = !MyMap.contains(-2);
+  auto C3 = MyMap.count(-3) <= 0;
+  // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: auto C3 = !MyMap.contains(-3);
+  auto C4 = MyMap.count(-4) < 1;
+  // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: auto C4 = !MyMap.contains(-4);
+  auto C5 = MyMap.find(-5) == MyMap.end();
+  // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: auto C5 = !MyMap.contains(-5);
+  return C1 + C2 + C3 + C4 + C5;
+}
+
+// Check for various types
+int testDifferentTypes(std::map<int, int> &M, std::unordered_set<int> &US, std::set<int> &S, std::multimap<int, int> &MM) {
+  bool C1 = M.count(1001);
+  // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: bool C1 = M.contains(1001);
+  bool C2 = US.count(1002);
+  // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: bool C2 = US.contains(1002);
+  bool C3 = S.count(1003);
+  // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: bool C3 = S.contains(1003);
+  bool C4 = MM.count(1004);
+  // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: bool C4 = MM.contains(1004);
+  return C1 + C2 + C3 + C4;
+}
+
+// The check detects all kind of `const`, reference, rvalue-reference and value types.
+int testQualifiedTypes(std::map<int, int> ValueM, std::map<int, int> &RefM, const std::map<int, int> &ConstRefM, std::map<int, int> &&RValueM) {
+  bool C1 = ValueM.count(2001);
+  // CHECK-MESSAGES: :[[@LINE-1]]:20: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: bool C1 = ValueM.contains(2001);
+  bool C2 = RefM.count(2002);
+  // CHECK-MESSAGES: :[[@LINE-1]]:18: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: bool C2 = RefM.contains(2002);
+  bool C3 = ConstRefM.count(2003);
+  // CHECK-MESSAGES: :[[@LINE-1]]:23: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: bool C3 = ConstRefM.contains(2003);
+  bool C4 = RValueM.count(2004);
+  // CHECK-MESSAGES: :[[@LINE-1]]:21: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: bool C4 = RValueM.contains(2004);
+  return C1 + C2 + C3 + C4;
+}
+
+// This is effectively a membership check, as the result is implicitly casted
+// to `bool`.
+bool returnContains(std::map<int, int> &M) {
+  return M.count(42);
+  // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: return M.contains(42);
+}
+
+// This returns the actual count and should not be rewritten
+int actualCount(std::multimap<int, int> &M) {
+  return M.count(21);
+  // NO-WARNING.
+  // CHECK-FIXES: return M.count(21);
+}
+
+// Check that we are not confused by aliases
+namespace s2 = std;
+using MyMapT = s2::map<int, int>;
+int typeAliases(MyMapT &MyMap) {
+  bool C1 = MyMap.count(99);
+  // CHECK-MESSAGES: :[[@LINE-1]]:19: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: bool C1 = MyMap.contains(99);
+  return C1;
+}
+
+// Check that the tests also trigger for a local variable and not only for
+// function arguments.
+bool localVar() {
+  using namespace std;
+  map<int, int> LocalM;
+  return LocalM.count(42);
+  // CHECK-MESSAGES: :[[@LINE-1]]:17: warning: use 'contains' to check for membership [readability-container-contains]
+  // CHECK-FIXES: return LocalM.contains(42);
+}
+
+// Check various usages of an actual `count` which isn't rewritten
+int nonRewrittenCount(std::multimap<int, int> &MyMap) {
+  // This is an actual test if we have at least 2 usages. Shouldn't be rewritten.
+  bool C1 = MyMap.count(1) >= 2;
+  // NO-WARNING.
+  // CHECK-FIXES: bool C1 = MyMap.count(1) >= 2;
+
+  // "< 0" makes little sense and is always `false`. Still, let's ensure we
+  // don't accidentally rewrite it to 'contains'.
+  bool C2 = MyMap.count(2) < 0;
+  // NO-WARNING.
+  // CHECK-FIXES: bool C2 = MyMap.count(2) < 0;
+
+  // The `count` is used in some more complicated formula.
+  bool C3 = MyMap.count(1) + MyMap.count(2) * 2 + MyMap.count(3) / 3 >= 20;
+  // NO-WARNING.
+  // CHECK-FIXES: bool C3 = MyMap.count(1) + MyMap.count(2) * 2 + MyMap.count(3) / 3 >= 20;
+
+  // This could theoretically be rewritten into a 'contains' after removig the
+  // `4` on both sides of the comparison. For the time being, we don't detect
+  // this case.
+  bool C4 = MyMap.count(1) + 4 > 4;
+  // NO-WARNING.
+  // CHECK-FIXES: bool C4 = MyMap.count(1) + 4 > 4;
+
+  return C1 + C2 + C3 + C4;
+}
+
+// We don't want to rewrite if the `contains` call is from a macro expansion
+int testMacroExpansion(std::unordered_set<int> &MySet) {
+#define COUNT_ONES(SET) SET.count(1)
+  // Rewriting the macro would break the code
+  // CHECK-FIXES: #define COUNT_ONES(SET) SET.count(1)
+  // We still want to warn the user even if we don't offer a fixit
+  if (COUNT_ONES(MySet)) {
+    // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: use 'contains' to check for membership [readability-container-contains]
+    // CHECK-MESSAGES: note: expanded from macro 'COUNT_ONES'
+    return COUNT_ONES(MySet);
+  }
+#undef COUNT_ONES
+#define COUNT_ONES count(1)
+  // Rewriting the macro would break the code
+  // CHECK-FIXES: #define COUNT_ONES count(1)
+  // We still want to warn the user even if we don't offer a fixit
+  if (MySet.COUNT_ONES) {
+    // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: use 'contains' to check for membership [readability-container-contains]
+    // CHECK-MESSAGES: note: expanded from macro 'COUNT_ONES'
+    return MySet.COUNT_ONES;
+  }
+#undef COUNT_ONES
+#define MY_SET MySet
+  // CHECK-FIXES: #define MY_SET MySet
+  // We still want to rewrite one of the two calls to `count`
+  if (MY_SET.count(1)) {
+    // CHECK-MESSAGES: :[[@LINE-1]]:14: warning: use 'contains' to check for membership [readability-container-contains]
+    // CHECK-FIXES: if (MY_SET.contains(1)) {
+    return MY_SET.count(1);
+  }
+#undef MY_SET
+  return 0;
+}
+
+// The following map has the same interface like `std::map`.
+template <class Key, class T>
+struct CustomMap {
+  unsigned count(const Key &K) const;
+  bool contains(const Key &K) const;
+  void *find(const Key &K);
+  void *end();
+};
+
+// The clang-tidy check is currently hard-coded against the `std::` containers
+// and hence won't annotate the following instance. We might change this in the
+// future and also detect the following case.
+void *testDifferentCheckTypes(CustomMap<int, int> &MyMap) {
+  if (MyMap.count(0))
+    // NO-WARNING.
+    // CHECK-FIXES: if (MyMap.count(0))
+    return nullptr;
+  return MyMap.find(2);
+}

From e4074432d5bf5c295f96eeed27c5b693f5b3bf16 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 24 Jan 2022 11:57:23 +0000
Subject: [PATCH 363/946] [X86] Remove avx512f integer and/or/xor/min/max
 reduction intrinsics and use generic equivalents

None of these have any reordering issues, and they still emit the same reduction intrinsics without any change in the existing test coverage:

llvm-project\clang\test\CodeGen\X86\avx512-reduceIntrin.c
llvm-project\clang\test\CodeGen\X86\avx512-reduceMinMaxIntrin.c

Differential Revision: https://reviews.llvm.org/D117881
---
 clang/include/clang/Basic/BuiltinsX86.def | 12 ------
 clang/lib/CodeGen/CGBuiltin.cpp           | 36 -----------------
 clang/lib/Headers/avx512fintrin.h         | 48 +++++++++++------------
 3 files changed, 24 insertions(+), 72 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index a8f5567248624..0669a96b942b3 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2015,8 +2015,6 @@ TARGET_BUILTIN(__builtin_ia32_selectsd_128, "V2dUcV2dV2d", "ncV:128:", "avx512f"
 // generic reduction intrinsics
 TARGET_BUILTIN(__builtin_ia32_reduce_add_d512, "iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_add_q512, "OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_and_d512, "iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_and_q512, "OiV8Oi", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_fadd_pd512, "ddV8d", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ps512, "ffV16f", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_fadd_ph512, "xxV32x", "ncV:512:", "avx512fp16")
@@ -2039,16 +2037,6 @@ TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph256, "xxV16x", "ncV:256:", "avx512fp
 TARGET_BUILTIN(__builtin_ia32_reduce_fmul_ph128, "xxV8x", "ncV:128:", "avx512fp16,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_reduce_mul_d512, "iV16i", "ncV:512:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_reduce_mul_q512, "OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_or_d512, "iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_or_q512, "OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_smax_d512, "iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_smax_q512, "OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_smin_d512, "iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_smin_q512, "OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_umax_d512, "iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_umax_q512, "OiV8Oi", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_umin_d512, "iV16i", "ncV:512:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_reduce_umin_q512, "OiV8Oi", "ncV:512:", "avx512f")
 
 // MONITORX/MWAITX
 TARGET_BUILTIN(__builtin_ia32_monitorx, "vvC*UiUi", "n", "mwaitx")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4c68b20067b99..cd35e7cbe76f7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -14365,12 +14365,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
         CGM.getIntrinsic(Intrinsic::vector_reduce_add, Ops[0]->getType());
     return Builder.CreateCall(F, {Ops[0]});
   }
-  case X86::BI__builtin_ia32_reduce_and_d512:
-  case X86::BI__builtin_ia32_reduce_and_q512: {
-    Function *F =
-        CGM.getIntrinsic(Intrinsic::vector_reduce_and, Ops[0]->getType());
-    return Builder.CreateCall(F, {Ops[0]});
-  }
   case X86::BI__builtin_ia32_reduce_fadd_pd512:
   case X86::BI__builtin_ia32_reduce_fadd_ps512:
   case X86::BI__builtin_ia32_reduce_fadd_ph512:
@@ -14417,36 +14411,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
         CGM.getIntrinsic(Intrinsic::vector_reduce_mul, Ops[0]->getType());
     return Builder.CreateCall(F, {Ops[0]});
   }
-  case X86::BI__builtin_ia32_reduce_or_d512:
-  case X86::BI__builtin_ia32_reduce_or_q512: {
-    Function *F =
-        CGM.getIntrinsic(Intrinsic::vector_reduce_or, Ops[0]->getType());
-    return Builder.CreateCall(F, {Ops[0]});
-  }
-  case X86::BI__builtin_ia32_reduce_smax_d512:
-  case X86::BI__builtin_ia32_reduce_smax_q512: {
-    Function *F =
-        CGM.getIntrinsic(Intrinsic::vector_reduce_smax, Ops[0]->getType());
-    return Builder.CreateCall(F, {Ops[0]});
-  }
-  case X86::BI__builtin_ia32_reduce_smin_d512:
-  case X86::BI__builtin_ia32_reduce_smin_q512: {
-    Function *F =
-        CGM.getIntrinsic(Intrinsic::vector_reduce_smin, Ops[0]->getType());
-    return Builder.CreateCall(F, {Ops[0]});
-  }
-  case X86::BI__builtin_ia32_reduce_umax_d512:
-  case X86::BI__builtin_ia32_reduce_umax_q512: {
-    Function *F =
-        CGM.getIntrinsic(Intrinsic::vector_reduce_umax, Ops[0]->getType());
-    return Builder.CreateCall(F, {Ops[0]});
-  }
-  case X86::BI__builtin_ia32_reduce_umin_d512:
-  case X86::BI__builtin_ia32_reduce_umin_q512: {
-    Function *F =
-        CGM.getIntrinsic(Intrinsic::vector_reduce_umin, Ops[0]->getType());
-    return Builder.CreateCall(F, {Ops[0]});
-  }
 
   // 3DNow!
   case X86::BI__builtin_ia32_pswapdsf:
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 8695aeb94de24..50e0e287d9fc7 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -9324,11 +9324,11 @@ static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
-  return __builtin_ia32_reduce_and_q512(__W);
+  return __builtin_reduce_and((__v8di)__W);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
-  return __builtin_ia32_reduce_or_q512(__W);
+  return __builtin_reduce_or((__v8di)__W);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
@@ -9346,13 +9346,13 @@ _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
   __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __W);
-  return __builtin_ia32_reduce_and_q512(__W);
+  return __builtin_reduce_and((__v8di)__W);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
   __W = _mm512_maskz_mov_epi64(__M, __W);
-  return __builtin_ia32_reduce_or_q512(__W);
+  return __builtin_reduce_or((__v8di)__W);
 }
 
 // -0.0 is used to ignore the start value since it is the neutral value of
@@ -9390,12 +9390,12 @@ _mm512_reduce_mul_epi32(__m512i __W) {
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_and_epi32(__m512i __W) {
-  return __builtin_ia32_reduce_and_d512((__v16si)__W);
+  return __builtin_reduce_and((__v16si)__W);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_or_epi32(__m512i __W) {
-  return __builtin_ia32_reduce_or_d512((__v16si)__W);
+  return __builtin_reduce_or((__v16si)__W);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
@@ -9413,13 +9413,13 @@ _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
   __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __W);
-  return __builtin_ia32_reduce_and_d512((__v16si)__W);
+  return __builtin_reduce_and((__v16si)__W);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
   __W = _mm512_maskz_mov_epi32(__M, __W);
-  return __builtin_ia32_reduce_or_d512((__v16si)__W);
+  return __builtin_reduce_or((__v16si)__W);
 }
 
 static __inline__ float __DEFAULT_FN_ATTRS512
@@ -9446,89 +9446,89 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_epi64(__m512i __V) {
-  return __builtin_ia32_reduce_smax_q512(__V);
+  return __builtin_reduce_max((__v8di)__V);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_epu64(__m512i __V) {
-  return __builtin_ia32_reduce_umax_q512(__V);
+  return __builtin_reduce_max((__v8du)__V);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_epi64(__m512i __V) {
-  return __builtin_ia32_reduce_smin_q512(__V);
+  return __builtin_reduce_min((__v8di)__V);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_epu64(__m512i __V) {
-  return __builtin_ia32_reduce_umin_q512(__V);
+  return __builtin_reduce_min((__v8du)__V);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
-  return __builtin_ia32_reduce_smax_q512(__V);
+  return __builtin_reduce_max((__v8di)__V);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
   __V = _mm512_maskz_mov_epi64(__M, __V);
-  return __builtin_ia32_reduce_umax_q512(__V);
+  return __builtin_reduce_max((__v8du)__V);
 }
 
 static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
-  return __builtin_ia32_reduce_smin_q512(__V);
+  return __builtin_reduce_min((__v8di)__V);
 }
 
 static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V);
-  return __builtin_ia32_reduce_umin_q512(__V);
+  return __builtin_reduce_min((__v8du)__V);
 }
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_epi32(__m512i __V) {
-  return __builtin_ia32_reduce_smax_d512((__v16si)__V);
+  return __builtin_reduce_max((__v16si)__V);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_epu32(__m512i __V) {
-  return __builtin_ia32_reduce_umax_d512((__v16si)__V);
+  return __builtin_reduce_max((__v16su)__V);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_epi32(__m512i __V) {
-  return __builtin_ia32_reduce_smin_d512((__v16si)__V);
+  return __builtin_reduce_min((__v16si)__V);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_epu32(__m512i __V) {
-  return __builtin_ia32_reduce_umin_d512((__v16si)__V);
+  return __builtin_reduce_min((__v16su)__V);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
-  return __builtin_ia32_reduce_smax_d512((__v16si)__V);
+  return __builtin_reduce_max((__v16si)__V);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
   __V = _mm512_maskz_mov_epi32(__M, __V);
-  return __builtin_ia32_reduce_umax_d512((__v16si)__V);
+  return __builtin_reduce_max((__v16su)__V);
 }
 
 static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
-  return __builtin_ia32_reduce_smin_d512((__v16si)__V);
+  return __builtin_reduce_min((__v16si)__V);
 }
 
 static __inline__ unsigned int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V);
-  return __builtin_ia32_reduce_umin_d512((__v16si)__V);
+  return __builtin_reduce_min((__v16su)__V);
 }
 
 static __inline__ double __DEFAULT_FN_ATTRS512

From 8082ab2fc391cb88ab142d3ce8f63e6eb8641a23 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin@arm.com>
Date: Mon, 24 Jan 2022 10:41:51 +0000
Subject: [PATCH 364/946] [LoopVectorize] Support epilogue vectorisation of
 loops with reductions

isCandidateForEpilogueVectorization will currently return false for loops
which contain reductions. This patch removes this restriction and makes
the following changes to support epilogue vectorisation with reductions:

- `fixReduction`: If fixReduction is being called during vectorisation of the
    epilogue, the phi node it creates will need to additionally carry incoming
     values from the middle block of the main loop.

- `createEpilogueVectorizedLoopSkeleton`: The incoming values of the phi
    created by fixReduction are updated after the vec.epilog.iter.check block
    is added. The phi is also moved to the preheader of the epilogue.

- `processLoop`: The start value of any VPReductionPHIRecipes are updated before
    vectorising the epilogue loop. The getResumeInstr function added to the ILV
    will return the resume instruction associated with the recurrence descriptor.

Reviewed By: sdesmalen

Differential Revision: https://reviews.llvm.org/D116928
---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  79 ++-
 .../sve-epilog-vect-inloop-reductions.ll      | 121 ++++
 .../AArch64/sve-epilog-vect-reductions.ll     | 121 ++++
 .../sve-epilog-vect-strict-reductions.ll      | 116 ++++
 .../X86/invariant-store-vectorization.ll      | 106 ++--
 .../Transforms/LoopVectorize/X86/pr35432.ll   |   2 +-
 .../Transforms/LoopVectorize/X86/pr42674.ll   |   2 +-
 .../epilog-vectorization-reductions.ll        | 529 ++++++++++++++++++
 ...ptimal-epilog-vectorization-limitations.ll |  33 --
 9 files changed, 1030 insertions(+), 79 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 7b90dcff7bc1e..d11f4146b5905 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -557,6 +557,10 @@ class InnerLoopVectorizer {
   /// vector of instructions.
   void addMetadata(ArrayRef<Value *> To, Instruction *From);
 
+  // Returns the resume value (bc.merge.rdx) for a reduction as
+  // generated by fixReduction.
+  PHINode *getReductionResumeValue(const RecurrenceDescriptor &RdxDesc);
+
 protected:
   friend class LoopVectorizationPlanner;
 
@@ -823,6 +827,11 @@ class InnerLoopVectorizer {
   /// Structure to hold information about generated runtime checks, responsible
   /// for cleaning the checks, if vectorization turns out unprofitable.
   GeneratedRTChecks &RTChecks;
+
+  // Holds the resume values for reductions in the loops, used to set the
+  // correct start value of reduction PHIs when vectorizing the epilogue.
+  SmallMapVector<const RecurrenceDescriptor *, PHINode *, 4>
+      ReductionResumeValues;
 };
 
 class InnerLoopUnroller : public InnerLoopVectorizer {
@@ -1218,6 +1227,14 @@ void InnerLoopVectorizer::addMetadata(ArrayRef<Value *> To,
   }
 }
 
+PHINode *InnerLoopVectorizer::getReductionResumeValue(
+    const RecurrenceDescriptor &RdxDesc) {
+  auto It = ReductionResumeValues.find(&RdxDesc);
+  assert(It != ReductionResumeValues.end() &&
+         "Expected to find a resume value for the reduction.");
+  return It->second;
+}
+
 namespace llvm {
 
 // Loop vectorization cost-model hints how the scalar epilogue loop should be
@@ -4287,13 +4304,29 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
                            : Builder.CreateZExt(ReducedPartRdx, PhiTy);
   }
 
+  PHINode *ResumePhi =
+      dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
+
   // Create a phi node that merges control-flow from the backedge-taken check
   // block and the middle block.
   PHINode *BCBlockPhi = PHINode::Create(PhiTy, 2, "bc.merge.rdx",
                                         LoopScalarPreHeader->getTerminator());
-  for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
-    BCBlockPhi->addIncoming(ReductionStartValue, LoopBypassBlocks[I]);
-  BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
+
+  // If we are fixing reductions in the epilogue loop then we should already
+  // have created a bc.merge.rdx Phi after the main vector body. Ensure that
+  // we carry over the incoming values correctly.
+  for (auto *Incoming : predecessors(LoopScalarPreHeader)) {
+    if (Incoming == LoopMiddleBlock)
+      BCBlockPhi->addIncoming(ReducedPartRdx, Incoming);
+    else if (ResumePhi && llvm::is_contained(ResumePhi->blocks(), Incoming))
+      BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
+                              Incoming);
+    else
+      BCBlockPhi->addIncoming(ReductionStartValue, Incoming);
+  }
+
+  // Set the resume value for this reduction
+  ReductionResumeValues.insert({&RdxDesc, BCBlockPhi});
 
   // Now, we need to fix the users of the reduction variable
   // inside and outside of the scalar remainder loop.
@@ -5811,10 +5844,8 @@ bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization(
     const Loop &L, ElementCount VF) const {
   // Cross iteration phis such as reductions need special handling and are
   // currently unsupported.
-  if (any_of(L.getHeader()->phis(), [&](PHINode &Phi) {
-        return Legal->isFirstOrderRecurrence(&Phi) ||
-               Legal->isReductionVariable(&Phi);
-      }))
+  if (any_of(L.getHeader()->phis(),
+             [&](PHINode &Phi) { return Legal->isFirstOrderRecurrence(&Phi); }))
     return false;
 
   // Phis with uses outside of the loop require special handling and are
@@ -8248,6 +8279,25 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
     LoopBypassBlocks.push_back(EPI.MemSafetyCheck);
   LoopBypassBlocks.push_back(EPI.EpilogueIterationCountCheck);
 
+  // The vec.epilog.iter.check block may contain Phi nodes from reductions which
+  // merge control-flow from the latch block and the middle block. Update the
+  // incoming values here and move the Phi into the preheader.
+  SmallVector<PHINode *, 4> PhisInBlock;
+  for (PHINode &Phi : VecEpilogueIterationCountCheck->phis())
+    PhisInBlock.push_back(&Phi);
+
+  for (PHINode *Phi : PhisInBlock) {
+    Phi->replaceIncomingBlockWith(
+        VecEpilogueIterationCountCheck->getSinglePredecessor(),
+        VecEpilogueIterationCountCheck);
+    Phi->removeIncomingValue(EPI.EpilogueIterationCountCheck);
+    if (EPI.SCEVSafetyCheck)
+      Phi->removeIncomingValue(EPI.SCEVSafetyCheck);
+    if (EPI.MemSafetyCheck)
+      Phi->removeIncomingValue(EPI.MemSafetyCheck);
+    Phi->moveBefore(LoopVectorPreHeader->getFirstNonPHI());
+  }
+
   // Generate a resume induction for the vector epilogue and put it in the
   // vector epilogue preheader
   Type *IdxTy = Legal->getWidestInductionType();
@@ -10567,6 +10617,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
                                                  Checks);
 
         VPlan &BestEpiPlan = LVP.getBestPlanFor(EPI.EpilogueVF);
+
+        // Ensure that the start values for any VPReductionPHIRecipes are
+        // updated before vectorising the epilogue loop.
+        VPBasicBlock *Header = BestEpiPlan.getEntry()->getEntryBasicBlock();
+        for (VPRecipeBase &R : Header->phis()) {
+          if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
+            if (auto *Resume = MainILV.getReductionResumeValue(
+                    ReductionPhi->getRecurrenceDescriptor())) {
+              VPValue *StartVal = new VPValue(Resume);
+              BestEpiPlan.addExternalDef(StartVal);
+              ReductionPhi->setOperand(0, StartVal);
+            }
+          }
+        }
+
         LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
                         DT);
         ++LoopsEpilogueVectorized;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll
new file mode 100644
index 0000000000000..b8f941a7a4481
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-inloop-reductions.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -epilogue-vectorization-force-VF=2 -prefer-inloop-reductions -S | FileCheck %s
+
+;
+; In-loop integer and reduction
+;
+define i64 @int_reduction_and(i64* noalias nocapture %a, i64 %N) {
+; CHECK-LABEL: @int_reduction_and(
+; CHECK-NEXT:  iter.check:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
+; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi i64 [ 1, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi i64 [ -1, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 1
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i64* [[TMP12]] to <vscale x 2 x i64>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[TMP15:%.*]] = mul i32 [[TMP14]], 2
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i32 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i64* [[TMP16]] to <vscale x 2 x i64>*
+; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP18:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[TMP19]] = and i64 [[TMP18]], [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP20:%.*]] = call i64 @llvm.vector.reduce.and.nxv2i64(<vscale x 2 x i64> [[WIDE_LOAD3]])
+; CHECK-NEXT:    [[TMP21]] = and i64 [[TMP20]], [[VEC_PHI2]]
+; CHECK-NEXT:    [[TMP22:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP23:%.*]] = mul i64 [[TMP22]], 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP23]]
+; CHECK-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = and i64 [[TMP21]], [[TMP19]]
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK:       vec.epilog.iter.check:
+; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK:       vec.epilog.ph:
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 1, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ]
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[N_MOD_VF5:%.*]] = urem i64 [[N]], 2
+; CHECK-NEXT:    [[N_VEC6:%.*]] = sub i64 [[N]], [[N_MOD_VF5]]
+; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK:       vec.epilog.vector.body:
+; CHECK-NEXT:    [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI9:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP30:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX8]], 0
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP25]]
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i64, i64* [[TMP26]], i32 0
+; CHECK-NEXT:    [[TMP28:%.*]] = bitcast i64* [[TMP27]] to <2 x i64>*
+; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP28]], align 4
+; CHECK-NEXT:    [[TMP29:%.*]] = call i64 @llvm.vector.reduce.and.v2i64(<2 x i64> [[WIDE_LOAD10]])
+; CHECK-NEXT:    [[TMP30]] = and i64 [[TMP29]], [[VEC_PHI9]]
+; CHECK-NEXT:    [[INDEX_NEXT11]] = add nuw i64 [[INDEX8]], 2
+; CHECK-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC6]]
+; CHECK-NEXT:    br i1 [[TMP31]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       vec.epilog.middle.block:
+; CHECK-NEXT:    [[CMP_N7:%.*]] = icmp eq i64 [[N]], [[N_VEC6]]
+; CHECK-NEXT:    br i1 [[CMP_N7]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK:       vec.epilog.scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX12:%.*]] = phi i64 [ 1, [[ITER_CHECK]] ], [ [[BIN_RDX]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP30]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[RDX:%.*]] = phi i64 [ [[AND:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX12]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[L2:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[L3:%.*]] = load i64, i64* [[L2]], align 4
+; CHECK-NEXT:    [[AND]] = and i64 [[RDX]], [[L3]]
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    [[AND_LCSSA4:%.*]] = phi i64 [ [[AND]], [[FOR_BODY]] ], [ [[TMP30]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[AND_LCSSA:%.*]] = phi i64 [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ], [ [[AND_LCSSA4]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i64 [[AND_LCSSA]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+  %rdx = phi i64 [ %and, %for.body ], [ 1, %entry ]
+  %l2 = getelementptr inbounds i64, i64* %a, i64 %iv
+  %l3 = load i64, i64* %l2
+  %and = and i64 %rdx, %l3
+  %iv.next = add i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret i64 %and
+}
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.interleave.count", i32 2}
+!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll
new file mode 100644
index 0000000000000..e3e219cc1601a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-reductions.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -epilogue-vectorization-force-VF=2 -S | FileCheck %s
+
+;
+; Integer reduction with interleaving & a start value of 5
+;
+define i64 @int_reduction_add(i64* %a, i64 %N) {
+; CHECK-LABEL: @int_reduction_add(
+; CHECK-NEXT:  iter.check:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
+; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ insertelement (<vscale x 2 x i64> zeroinitializer, i64 5, i32 0), [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 2
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 1
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i64* [[TMP12]] to <vscale x 2 x i64>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[TMP15:%.*]] = mul i32 [[TMP14]], 2
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i64, i64* [[TMP10]], i32 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = bitcast i64* [[TMP16]] to <vscale x 2 x i64>*
+; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP18]] = add <vscale x 2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT:    [[TMP19]] = add <vscale x 2 x i64> [[WIDE_LOAD3]], [[VEC_PHI2]]
+; CHECK-NEXT:    [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP21:%.*]] = mul i64 [[TMP20]], 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]]
+; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <vscale x 2 x i64> [[TMP19]], [[TMP18]]
+; CHECK-NEXT:    [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[BIN_RDX]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK:       vec.epilog.iter.check:
+; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK:       vec.epilog.ph:
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 5, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP23]], [[VEC_EPILOG_ITER_CHECK]] ]
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[N_MOD_VF5:%.*]] = urem i64 [[N]], 2
+; CHECK-NEXT:    [[N_VEC6:%.*]] = sub i64 [[N]], [[N_MOD_VF5]]
+; CHECK-NEXT:    [[TMP24:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
+; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK:       vec.epilog.vector.body:
+; CHECK-NEXT:    [[INDEX8:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI9:%.*]] = phi <2 x i64> [ [[TMP24]], [[VEC_EPILOG_PH]] ], [ [[TMP29:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP25:%.*]] = add i64 [[INDEX8]], 0
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP25]]
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i64, i64* [[TMP26]], i32 0
+; CHECK-NEXT:    [[TMP28:%.*]] = bitcast i64* [[TMP27]] to <2 x i64>*
+; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <2 x i64>, <2 x i64>* [[TMP28]], align 4
+; CHECK-NEXT:    [[TMP29]] = add <2 x i64> [[WIDE_LOAD10]], [[VEC_PHI9]]
+; CHECK-NEXT:    [[INDEX_NEXT11]] = add nuw i64 [[INDEX8]], 2
+; CHECK-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT11]], [[N_VEC6]]
+; CHECK-NEXT:    br i1 [[TMP30]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       vec.epilog.middle.block:
+; CHECK-NEXT:    [[TMP31:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP29]])
+; CHECK-NEXT:    [[CMP_N7:%.*]] = icmp eq i64 [[N]], [[N_VEC6]]
+; CHECK-NEXT:    br i1 [[CMP_N7]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK:       vec.epilog.scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX12:%.*]] = phi i64 [ 5, [[ITER_CHECK]] ], [ [[TMP23]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP31]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX12]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP32:%.*]] = load i64, i64* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD]] = add i64 [[TMP32]], [[SUM]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA4:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP31]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i64 [ [[TMP23]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA4]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i64 [[ADD_LCSSA]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum = phi i64 [ 5, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv
+  %0 = load i64, i64* %arrayidx
+  %add = add i64 %0, %sum
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret i64 %add
+}
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.interleave.count", i32 2}
+!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll
new file mode 100644
index 0000000000000..a32a6723e83bf
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect-strict-reductions.ll
@@ -0,0 +1,116 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -epilogue-vectorization-force-VF=2 -S | FileCheck %s
+
+;
+; Strict fadd reduction with interleaving
+;
+define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
+; CHECK-LABEL: @fadd_strict(
+; CHECK-NEXT:  iter.check:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 8
+; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 8
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 4
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP6]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 1
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP4]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP12]] to <vscale x 4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* [[TMP13]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT:    [[TMP15:%.*]] = mul i32 [[TMP14]], 4
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP10]], i32 [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = bitcast float* [[TMP16]] to <vscale x 4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD2:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP18:%.*]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[VEC_PHI]], <vscale x 4 x float> [[WIDE_LOAD]])
+; CHECK-NEXT:    [[TMP19]] = call float @llvm.vector.reduce.fadd.nxv4f32(float [[TMP18]], <vscale x 4 x float> [[WIDE_LOAD2]])
+; CHECK-NEXT:    [[TMP20:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP21:%.*]] = mul i64 [[TMP20]], 8
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP21]]
+; CHECK-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK:       vec.epilog.iter.check:
+; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK:       vec.epilog.ph:
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ]
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[N_MOD_VF4:%.*]] = urem i64 [[N]], 2
+; CHECK-NEXT:    [[N_VEC5:%.*]] = sub i64 [[N]], [[N_MOD_VF4]]
+; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK:       vec.epilog.vector.body:
+; CHECK-NEXT:    [[INDEX7:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI8:%.*]] = phi float [ [[BC_MERGE_RDX]], [[VEC_EPILOG_PH]] ], [ [[TMP27:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP23:%.*]] = add i64 [[INDEX7]], 0
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP23]]
+; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds float, float* [[TMP24]], i32 0
+; CHECK-NEXT:    [[TMP26:%.*]] = bitcast float* [[TMP25]] to <2 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD9:%.*]] = load <2 x float>, <2 x float>* [[TMP26]], align 4
+; CHECK-NEXT:    [[TMP27]] = call float @llvm.vector.reduce.fadd.v2f32(float [[VEC_PHI8]], <2 x float> [[WIDE_LOAD9]])
+; CHECK-NEXT:    [[INDEX_NEXT10]] = add nuw i64 [[INDEX7]], 2
+; CHECK-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT10]], [[N_VEC5]]
+; CHECK-NEXT:    br i1 [[TMP28]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       vec.epilog.middle.block:
+; CHECK-NEXT:    [[CMP_N6:%.*]] = icmp eq i64 [[N]], [[N_VEC5]]
+; CHECK-NEXT:    br i1 [[CMP_N6]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK:       vec.epilog.scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX11:%.*]] = phi float [ 0xFFFFFFFFE0000000, [[ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM_07:%.*]] = phi float [ [[BC_MERGE_RDX11]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP29:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD]] = fadd float [[TMP29]], [[SUM_07]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA3:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi float [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA3]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    ret float [[ADD_LCSSA]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum.07 = phi float [ 0xFFFFFFFFE0000000, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
+  %0 = load float, float* %arrayidx, align 4
+  %add = fadd float %0, %sum.07
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  ret float %add
+}
+
+!0 = distinct !{!0, !1, !2}
+!1 = !{!"llvm.loop.interleave.count", i32 2}
+!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll
index ab5003f1b74fd..91ab21df4b273 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-store-vectorization.ll
@@ -9,11 +9,11 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define i32 @inv_val_store_to_inv_address_with_reduction(i32* %a, i64 %n, i32* %b) {
 ; CHECK-LABEL: @inv_val_store_to_inv_address_with_reduction(
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  iter.check:
 ; CHECK-NEXT:    [[NTRUNC:%.*]] = trunc i64 [[N:%.*]] to i32
 ; CHECK-NEXT:    [[SMAX6:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX6]], 64
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX6]], 8
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK:       vector.memcheck:
 ; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 1
 ; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
@@ -21,59 +21,91 @@ define i32 @inv_val_store_to_inv_address_with_reduction(i32* %a, i64 %n, i32* %b
 ; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[A]]
 ; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
-; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[SMAX6]], 64
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK7]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX6]], 9223372036854775744
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI7:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI8:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI9:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI8:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI9:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI10:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !0
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 16
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <16 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD10:%.*]] = load <16 x i32>, <16 x i32>* [[TMP3]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <16 x i32>, <16 x i32>* [[TMP3]], align 8, !alias.scope !0
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 32
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <16 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD11:%.*]] = load <16 x i32>, <16 x i32>* [[TMP5]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[WIDE_LOAD12:%.*]] = load <16 x i32>, <16 x i32>* [[TMP5]], align 8, !alias.scope !0
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 48
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <16 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD12:%.*]] = load <16 x i32>, <16 x i32>* [[TMP7]], align 8, !alias.scope !0
+; CHECK-NEXT:    [[WIDE_LOAD13:%.*]] = load <16 x i32>, <16 x i32>* [[TMP7]], align 8, !alias.scope !0
 ; CHECK-NEXT:    [[TMP8]] = add <16 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP9]] = add <16 x i32> [[VEC_PHI7]], [[WIDE_LOAD10]]
-; CHECK-NEXT:    [[TMP10]] = add <16 x i32> [[VEC_PHI8]], [[WIDE_LOAD11]]
-; CHECK-NEXT:    [[TMP11]] = add <16 x i32> [[VEC_PHI9]], [[WIDE_LOAD12]]
+; CHECK-NEXT:    [[TMP9]] = add <16 x i32> [[VEC_PHI8]], [[WIDE_LOAD11]]
+; CHECK-NEXT:    [[TMP10]] = add <16 x i32> [[VEC_PHI9]], [[WIDE_LOAD12]]
+; CHECK-NEXT:    [[TMP11]] = add <16 x i32> [[VEC_PHI10]], [[WIDE_LOAD13]]
 ; CHECK-NEXT:    store i32 [[NTRUNC]], i32* [[A]], align 4, !alias.scope !3, !noalias !0
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64
 ; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <16 x i32> [[TMP9]], [[TMP8]]
-; CHECK-NEXT:    [[BIN_RDX13:%.*]] = add <16 x i32> [[TMP10]], [[BIN_RDX]]
-; CHECK-NEXT:    [[BIN_RDX14:%.*]] = add <16 x i32> [[TMP11]], [[BIN_RDX13]]
-; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX14]])
+; CHECK-NEXT:    [[BIN_RDX14:%.*]] = add <16 x i32> [[TMP10]], [[BIN_RDX]]
+; CHECK-NEXT:    [[BIN_RDX15:%.*]] = add <16 x i32> [[TMP11]], [[BIN_RDX14]]
+; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX15]])
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK:       vec.epilog.iter.check:
+; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = and i64 [[SMAX6]], 56
+; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp eq i64 [[N_VEC_REMAINING]], 0
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK:       vec.epilog.ph:
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP13]], [[VEC_EPILOG_ITER_CHECK]] ]
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
+; CHECK-NEXT:    [[SMAX16:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 1)
+; CHECK-NEXT:    [[N_VEC18:%.*]] = and i64 [[SMAX16]], 9223372036854775800
+; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <8 x i32> <i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 [[BC_MERGE_RDX]], i64 0
+; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK:       vec.epilog.vector.body:
+; CHECK-NEXT:    [[INDEX20:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI21:%.*]] = phi <8 x i32> [ [[TMP14]], [[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX20]]
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <8 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD22:%.*]] = load <8 x i32>, <8 x i32>* [[TMP16]], align 8
+; CHECK-NEXT:    [[TMP17]] = add <8 x i32> [[VEC_PHI21]], [[WIDE_LOAD22]]
+; CHECK-NEXT:    store i32 [[NTRUNC]], i32* [[A]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT23]] = add nuw i64 [[INDEX20]], 8
+; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT23]], [[N_VEC18]]
+; CHECK-NEXT:    br i1 [[TMP18]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       vec.epilog.middle.block:
+; CHECK-NEXT:    [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP17]])
+; CHECK-NEXT:    [[CMP_N19:%.*]] = icmp eq i64 [[SMAX16]], [[N_VEC18]]
+; CHECK-NEXT:    br i1 [[CMP_N19]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK:       vec.epilog.scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX24:%.*]] = phi i32 [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP13]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[T0:%.*]] = phi i32 [ [[T3:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[T0:%.*]] = phi i32 [ [[T3:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX24]], [[VEC_EPILOG_SCALAR_PH]] ]
 ; CHECK-NEXT:    [[T1:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[I]]
 ; CHECK-NEXT:    [[T2:%.*]] = load i32, i32* [[T1]], align 8
 ; CHECK-NEXT:    [[T3]] = add i32 [[T0]], [[T2]]
 ; CHECK-NEXT:    store i32 [[NTRUNC]], i32* [[A]], align 4
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    [[T3_LCSSA:%.*]] = phi i32 [ [[T3]], [[FOR_BODY]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_END]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[T4:%.*]] = phi i32 [ [[T3]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[T4:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ [[T3_LCSSA]], [[FOR_END_LOOPEXIT]] ]
 ; CHECK-NEXT:    ret i32 [[T4]]
 ;
 entry:
@@ -129,14 +161,14 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b,
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !8, !noalias !11
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !10, !noalias !13
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
-; CHECK-NEXT:    store <16 x i32> [[BROADCAST_SPLAT9]], <16 x i32>* [[TMP3]], align 4, !alias.scope !8, !noalias !11
-; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[BROADCAST_SPLAT9]], <16 x i32*> [[BROADCAST_SPLAT11]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !11
+; CHECK-NEXT:    store <16 x i32> [[BROADCAST_SPLAT9]], <16 x i32>* [[TMP3]], align 4, !alias.scope !10, !noalias !13
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[BROADCAST_SPLAT9]], <16 x i32*> [[BROADCAST_SPLAT11]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !13
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX6]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
@@ -166,7 +198,7 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b,
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[BROADCAST_SPLAT21]], <8 x i32*> [[BROADCAST_SPLAT23]], i32 4, <8 x i1> [[TMP7]])
 ; CHECK-NEXT:    [[INDEX_NEXT24]] = add nuw i64 [[INDEX16]], 8
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC14]]
-; CHECK-NEXT:    br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
 ; CHECK-NEXT:    [[CMP_N15:%.*]] = icmp eq i64 [[SMAX12]], [[N_VEC14]]
 ; CHECK-NEXT:    br i1 [[CMP_N15]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -186,7 +218,7 @@ define void @inv_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32* %b,
 ; CHECK:       latch:
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP17:![0-9]+]]
 ; CHECK:       for.end.loopexit:
 ; CHECK-NEXT:    br label [[FOR_END]]
 ; CHECK:       for.end:
@@ -257,17 +289,17 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32*
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !17, !noalias !20
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i32>, <16 x i32>* [[TMP1]], align 8, !alias.scope !18, !noalias !21
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <16 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP0]] to <16 x i32>*
-; CHECK-NEXT:    store <16 x i32> [[BROADCAST_SPLAT19]], <16 x i32>* [[TMP3]], align 4, !alias.scope !17, !noalias !20
+; CHECK-NEXT:    store <16 x i32> [[BROADCAST_SPLAT19]], <16 x i32>* [[TMP3]], align 4, !alias.scope !18, !noalias !21
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, i32* [[C]], i64 [[INDEX]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <16 x i32>*
-; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP5]], i32 8, <16 x i1> [[TMP2]], <16 x i32> poison), !alias.scope !23
-; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x i32*> [[BROADCAST_SPLAT21]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !24, !noalias !23
+; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>* [[TMP5]], i32 8, <16 x i1> [[TMP2]], <16 x i32> poison), !alias.scope !24
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> [[WIDE_MASKED_LOAD]], <16 x i32*> [[BROADCAST_SPLAT21]], i32 4, <16 x i1> [[TMP2]]), !alias.scope !25, !noalias !24
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX16]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
@@ -300,7 +332,7 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32*
 ; CHECK-NEXT:    call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> [[WIDE_MASKED_LOAD32]], <8 x i32*> [[BROADCAST_SPLAT34]], i32 4, <8 x i1> [[TMP9]])
 ; CHECK-NEXT:    [[INDEX_NEXT35]] = add nuw i64 [[INDEX26]], 8
 ; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT35]], [[N_VEC24]]
-; CHECK-NEXT:    br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
 ; CHECK:       vec.epilog.middle.block:
 ; CHECK-NEXT:    [[CMP_N25:%.*]] = icmp eq i64 [[SMAX22]], [[N_VEC24]]
 ; CHECK-NEXT:    br i1 [[CMP_N25]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
@@ -322,7 +354,7 @@ define void @variant_val_store_to_inv_address_conditional(i32* %a, i64 %n, i32*
 ; CHECK:       latch:
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT]], !llvm.loop [[LOOP28:![0-9]+]]
 ; CHECK:       for.end.loopexit:
 ; CHECK-NEXT:    br label [[FOR_END]]
 ; CHECK:       for.end:
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
index 4bd408ccd4036..ebf9b5467b582 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr35432.ll
@@ -89,7 +89,7 @@ define i32 @main() local_unnamed_addr #0 {
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY8:%.*]]
 ; CHECK:       for.body8:
 ; CHECK-NEXT:    [[INC5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll b/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll
index bb50d0997e58e..7516c055ab732 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr42674.ll
@@ -8,7 +8,7 @@
 ; the vector loop was dead code leaving only a scalar remainder.
 define zeroext i8 @sum() {
 ; CHECK-LABEL: @sum(
-; CHECK-NEXT:  entry:
+; CHECK-NEXT:  iter.check:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
new file mode 100644
index 0000000000000..57ddff65ff90a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
@@ -0,0 +1,529 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 -S | FileCheck %s
+
+;
+; Integer reduction with a start value of 5
+;
+define i64 @int_reduction_add(i64* %a, i64 %N) {
+; CHECK-LABEL: @int_reduction_add(
+; CHECK-NEXT:  iter.check:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 5, i64 0, i64 0, i64 0>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <4 x i64>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, <4 x i64>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4]] = add <4 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP4]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK:       vec.epilog.iter.check:
+; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK:       vec.epilog.ph:
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 5, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ]
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[N_MOD_VF3:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]]
+; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
+; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK:       vec.epilog.vector.body:
+; CHECK-NEXT:    [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI7:%.*]] = phi <4 x i64> [ [[TMP7]], [[VEC_EPILOG_PH]] ], [ [[TMP12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX6]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i64, i64* [[TMP9]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i64* [[TMP10]] to <4 x i64>*
+; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x i64>, <4 x i64>* [[TMP11]], align 4
+; CHECK-NEXT:    [[TMP12]] = add <4 x i64> [[WIDE_LOAD8]], [[VEC_PHI7]]
+; CHECK-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4
+; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]]
+; CHECK-NEXT:    br i1 [[TMP13]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       vec.epilog.middle.block:
+; CHECK-NEXT:    [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP12]])
+; CHECK-NEXT:    [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]]
+; CHECK-NEXT:    br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK:       vec.epilog.scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX10:%.*]] = phi i64 [ 5, [[ITER_CHECK]] ], [ [[TMP6]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i64, i64* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD]] = add i64 [[TMP15]], [[SUM]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA2:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i64 [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA2]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i64 [[ADD_LCSSA]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum = phi i64 [ 5, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv
+  %0 = load i64, i64* %arrayidx
+  %add = add i64 %0, %sum
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+  ret i64 %add
+}
+
+;
+; Floating point max reduction
+;
+define float @fp_reduction_max(float* noalias %a, i64 %N) {
+; CHECK-LABEL: @fp_reduction_max(
+; CHECK-NEXT:  iter.check:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fcmp fast ogt <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP5]] = select <4 x i1> [[TMP4]], <4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP5]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK:       vec.epilog.iter.check:
+; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK:       vec.epilog.ph:
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ]
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[N_MOD_VF3:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]]
+; CHECK-NEXT:    [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[BC_MERGE_RDX]], i32 0
+; CHECK-NEXT:    [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK:       vec.epilog.vector.body:
+; CHECK-NEXT:    [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI7:%.*]] = phi <4 x float> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX6]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP9]], i32 0
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP11]], align 4
+; CHECK-NEXT:    [[TMP12:%.*]] = fcmp fast ogt <4 x float> [[VEC_PHI7]], [[WIDE_LOAD8]]
+; CHECK-NEXT:    [[TMP13]] = select <4 x i1> [[TMP12]], <4 x float> [[VEC_PHI7]], <4 x float> [[WIDE_LOAD8]]
+; CHECK-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]]
+; CHECK-NEXT:    br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK:       vec.epilog.middle.block:
+; CHECK-NEXT:    [[TMP15:%.*]] = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP13]])
+; CHECK-NEXT:    [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]]
+; CHECK-NEXT:    br i1 [[CMP_N5]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK:       vec.epilog.scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX10:%.*]] = phi float [ 0.000000e+00, [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[RESULT_08:%.*]] = phi float [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[L0:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[C0:%.*]] = fcmp fast ogt float [[RESULT_08]], [[L0]]
+; CHECK-NEXT:    [[V0]] = select fast i1 [[C0]], float [[RESULT_08]], float [[L0]]
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    [[V0_LCSSA2:%.*]] = phi float [ [[V0]], [[FOR_BODY]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[V0_LCSSA:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[V0_LCSSA2]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    ret float [[V0_LCSSA]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+  %result.08 = phi float [ %v0, %for.body ], [ 0.000000e+00, %entry ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
+  %l0 = load float, float* %arrayidx
+  %c0 = fcmp fast ogt float %result.08, %l0
+  %v0 = select fast i1 %c0, float %result.08, float %l0
+  %iv.next = add i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %iv.next, %N
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret float %v0
+}
+
+;
+; Extension is required before the reduction operation & result is truncated
+;
+define i16 @reduction_or_trunc(i16* noalias nocapture %ptr) {
+; CHECK-LABEL: @reduction_or_trunc(
+; CHECK-NEXT:  iter.check:
+; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
+; CHECK-NEXT:    [[TMP6:%.*]] = or <4 x i32> [[TMP1]], [[TMP5]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    [[TMP8:%.*]] = trunc <4 x i32> [[TMP6]] to <4 x i16>
+; CHECK-NEXT:    [[TMP9]] = zext <4 x i16> [[TMP8]] to <4 x i32>
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP10:%.*]] = trunc <4 x i32> [[TMP9]] to <4 x i16>
+; CHECK-NEXT:    [[TMP11:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP10]])
+; CHECK-NEXT:    [[TMP12:%.*]] = zext i16 [[TMP11]] to i32
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 256, 256
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK:       vec.epilog.iter.check:
+; CHECK-NEXT:    br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK:       vec.epilog.ph:
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP12]], [[VEC_EPILOG_ITER_CHECK]] ]
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 256, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
+; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK:       vec.epilog.vector.body:
+; CHECK-NEXT:    [[INDEX3:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT6:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI4:%.*]] = phi <4 x i32> [ [[TMP13]], [[VEC_EPILOG_PH]] ], [ [[TMP23:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[INDEX3]], 0
+; CHECK-NEXT:    [[TMP15:%.*]] = and <4 x i32> [[VEC_PHI4]], <i32 65535, i32 65535, i32 65535, i32 65535>
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i32 [[TMP14]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i16, i16* [[TMP16]], i32 0
+; CHECK-NEXT:    [[TMP18:%.*]] = bitcast i16* [[TMP17]] to <4 x i16>*
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i16>, <4 x i16>* [[TMP18]], align 2
+; CHECK-NEXT:    [[TMP19:%.*]] = zext <4 x i16> [[WIDE_LOAD5]] to <4 x i32>
+; CHECK-NEXT:    [[TMP20:%.*]] = or <4 x i32> [[TMP15]], [[TMP19]]
+; CHECK-NEXT:    [[INDEX_NEXT6]] = add nuw i32 [[INDEX3]], 4
+; CHECK-NEXT:    [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT6]], 256
+; CHECK-NEXT:    [[TMP22:%.*]] = trunc <4 x i32> [[TMP20]] to <4 x i16>
+; CHECK-NEXT:    [[TMP23]] = zext <4 x i16> [[TMP22]] to <4 x i32>
+; CHECK-NEXT:    br i1 [[TMP21]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       vec.epilog.middle.block:
+; CHECK-NEXT:    [[TMP24:%.*]] = trunc <4 x i32> [[TMP23]] to <4 x i16>
+; CHECK-NEXT:    [[TMP25:%.*]] = call i16 @llvm.vector.reduce.or.v4i16(<4 x i16> [[TMP24]])
+; CHECK-NEXT:    [[TMP26:%.*]] = zext i16 [[TMP25]] to i32
+; CHECK-NEXT:    [[CMP_N2:%.*]] = icmp eq i32 256, 256
+; CHECK-NEXT:    br i1 [[CMP_N2]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK:       vec.epilog.scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 256, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 256, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX7:%.*]] = phi i32 [ 0, [[ITER_CHECK]] ], [ [[TMP12]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[SUM_02P:%.*]] = phi i32 [ [[XOR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX7]], [[VEC_EPILOG_SCALAR_PH]] ]
+; CHECK-NEXT:    [[SUM_02:%.*]] = and i32 [[SUM_02P]], 65535
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i32 [[IV]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i16, i16* [[GEP]], align 2
+; CHECK-NEXT:    [[EXT:%.*]] = zext i16 [[LOAD]] to i32
+; CHECK-NEXT:    [[XOR]] = or i32 [[SUM_02]], [[EXT]]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[IV_NEXT]], 256
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    [[XOR_LCSSA1:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP26]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[XOR_LCSSA:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ [[XOR_LCSSA1]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[XOR_LCSSA]] to i16
+; CHECK-NEXT:    ret i16 [[RET]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
+  %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ]
+  %sum.02 = and i32 %sum.02p, 65535
+  %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv
+  %load = load i16, i16* %gep
+  %ext = zext i16 %load to i32
+  %xor = or i32 %sum.02, %ext
+  %iv.next = add i32 %iv, 1
+  %exitcond = icmp eq i32 %iv.next, 256
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  %ret = trunc i32 %xor to i16
+  ret i16 %ret
+}
+
+;
+; More than one reduction in the loop
+;
+define float @multiple_fp_rdx(float* %A, i64 %N) {
+; CHECK-LABEL: @multiple_fp_rdx(
+; CHECK-NEXT:  iter.check:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float 1.500000e+01, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x float> [ <float 1.000000e+01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4]] = fadd fast <4 x float> [[VEC_PHI2]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP5]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP4]])
+; CHECK-NEXT:    [[TMP8:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP5]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK:       vec.epilog.iter.check:
+; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK:       vec.epilog.ph:
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ 1.500000e+01, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP8]], [[VEC_EPILOG_ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX3:%.*]] = phi float [ 1.000000e+01, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ]
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[N_MOD_VF6:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC7:%.*]] = sub i64 [[N]], [[N_MOD_VF6]]
+; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, float [[BC_MERGE_RDX]], i32 0
+; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x float> zeroinitializer, float [[BC_MERGE_RDX3]], i32 0
+; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK:       vec.epilog.vector.body:
+; CHECK-NEXT:    [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI10:%.*]] = phi <4 x float> [ [[TMP9]], [[VEC_EPILOG_PH]] ], [ [[TMP16:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI11:%.*]] = phi <4 x float> [ [[TMP10]], [[VEC_EPILOG_PH]] ], [ [[TMP15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX9]], 0
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 0
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast float* [[TMP13]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD12:%.*]] = load <4 x float>, <4 x float>* [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP15]] = fadd fast <4 x float> [[VEC_PHI11]], [[WIDE_LOAD12]]
+; CHECK-NEXT:    [[TMP16]] = fmul fast <4 x float> [[VEC_PHI10]], [[WIDE_LOAD12]]
+; CHECK-NEXT:    [[INDEX_NEXT13]] = add nuw i64 [[INDEX9]], 4
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC7]]
+; CHECK-NEXT:    br i1 [[TMP17]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK:       vec.epilog.middle.block:
+; CHECK-NEXT:    [[TMP18:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[TMP15]])
+; CHECK-NEXT:    [[TMP19:%.*]] = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> [[TMP16]])
+; CHECK-NEXT:    [[CMP_N8:%.*]] = icmp eq i64 [[N]], [[N_VEC7]]
+; CHECK-NEXT:    br i1 [[CMP_N8]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK:       vec.epilog.scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC7]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX14:%.*]] = phi float [ 1.500000e+01, [[ITER_CHECK]] ], [ [[TMP8]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX15:%.*]] = phi float [ 1.000000e+01, [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[PROD:%.*]] = phi float [ [[BC_MERGE_RDX14]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[MUL:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi float [ [[BC_MERGE_RDX15]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD]] = fadd fast float [[SUM]], [[TMP20]]
+; CHECK-NEXT:    [[MUL]] = fmul fast float [[PROD]], [[TMP20]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA5:%.*]] = phi float [ [[ADD]], [[FOR_BODY]] ], [ [[TMP18]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[MUL_LCSSA4:%.*]] = phi float [ [[MUL]], [[FOR_BODY]] ], [ [[TMP19]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi float [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[ADD_LCSSA5]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[MUL_LCSSA:%.*]] = phi float [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[MUL_LCSSA4]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[MUL_LCSSA]], [[ADD_LCSSA]]
+; CHECK-NEXT:    ret float [[DIV]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %prod = phi float [ 15.000000e+00, %entry ], [ %mul, %for.body ]
+  %sum = phi float [ 10.000000e+00, %entry ], [ %add, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %A, i64 %iv
+  %0 = load float, float* %arrayidx
+  %add = fadd fast float %sum, %0
+  %mul = fmul fast float %prod, %0
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+  %div = fdiv float %mul, %add
+  ret float %div
+}
+
+;
+; Start value of the reduction is a Phi node from the outer loop
+;
+define i32 @reduction_phi_start_val(i32* %A, i64 %N) {
+; CHECK-LABEL: @reduction_phi_start_val(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[ITER_CHECK:%.*]]
+; CHECK:       iter.check:
+; CHECK-NEXT:    [[OUTER_IV:%.*]] = phi i64 [ [[OUTER_IV_NEXT:%.*]], [[FOR_COND:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[START_SUM:%.*]] = phi i32 [ [[SUB_LCSSA:%.*]], [[FOR_COND]] ], [ 5, [[ENTRY]] ]
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK:       vector.main.loop.iter.check:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[START_SUM]], i32 0
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP5]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK:       vec.epilog.iter.check:
+; CHECK-NEXT:    [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]]
+; CHECK-NEXT:    [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
+; CHECK-NEXT:    br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
+; CHECK:       vec.epilog.ph:
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[START_SUM]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ]
+; CHECK-NEXT:    [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
+; CHECK-NEXT:    [[N_MOD_VF3:%.*]] = urem i64 [[N]], 4
+; CHECK-NEXT:    [[N_VEC4:%.*]] = sub i64 [[N]], [[N_MOD_VF3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
+; CHECK-NEXT:    br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
+; CHECK:       vec.epilog.vector.body:
+; CHECK-NEXT:    [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI7:%.*]] = phi <4 x i32> [ [[TMP8]], [[VEC_EPILOG_PH]] ], [ [[TMP13:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX6]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP13]] = sub <4 x i32> [[VEC_PHI7]], [[WIDE_LOAD8]]
+; CHECK-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 4
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC4]]
+; CHECK-NEXT:    br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK:       vec.epilog.middle.block:
+; CHECK-NEXT:    [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP13]])
+; CHECK-NEXT:    [[CMP_N5:%.*]] = icmp eq i64 [[N]], [[N_VEC4]]
+; CHECK-NEXT:    br i1 [[CMP_N5]], label [[FOR_COND_LOOPEXIT:%.*]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK:       vec.epilog.scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC4]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX10:%.*]] = phi i32 [ [[START_SUM]], [[ITER_CHECK]] ], [ [[TMP7]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SUB:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[SUB]] = sub nsw i32 [[SUM]], [[LOAD]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK:       for.cond.loopexit:
+; CHECK-NEXT:    [[SUB_LCSSA2:%.*]] = phi i32 [ [[SUB]], [[FOR_BODY]] ], [ [[TMP15]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[SUB_LCSSA]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[SUB_LCSSA2]], [[FOR_COND_LOOPEXIT]] ]
+; CHECK-NEXT:    [[OUTER_IV_NEXT]] = add nuw nsw i64 [[OUTER_IV]], 1
+; CHECK-NEXT:    [[OUTER_EXITCOND_NOT:%.*]] = icmp eq i64 [[OUTER_IV_NEXT]], [[N]]
+; CHECK-NEXT:    br i1 [[OUTER_EXITCOND_NOT]], label [[FOR_END:%.*]], label [[ITER_CHECK]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[SUB_LCSSA_LCSSA:%.*]] = phi i32 [ [[SUB_LCSSA]], [[FOR_COND]] ]
+; CHECK-NEXT:    ret i32 [[SUB_LCSSA_LCSSA]]
+;
+entry:
+  br label %for.cond.preheader
+
+for.cond.preheader:
+  %outer.iv = phi i64 [ %outer.iv.next, %for.cond ], [ 0, %entry ]
+  %start.sum = phi i32 [ %sub, %for.cond ], [ 5, %entry ]
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %for.cond.preheader ], [ %iv.next, %for.body ]
+  %sum = phi i32 [ %start.sum, %for.cond.preheader ], [ %sub, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %iv
+  %load = load i32, i32* %arrayidx, align 4
+  %sub = sub nsw i32 %sum, %load
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.cond, label %for.body
+
+for.cond:
+  %outer.iv.next = add nuw nsw i64 %outer.iv, 1
+  %outer.exitcond.not = icmp eq i64 %outer.iv.next, %N
+  br i1 %outer.exitcond.not, label %for.end, label %for.cond.preheader
+
+for.end:
+  ret i32 %sub
+}
diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll
index c4a8f0ded86b7..b34c317f18d77 100644
--- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll
+++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll
@@ -3,39 +3,6 @@
 
 target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512"
 
-; Currently we cannot handle reduction loops.
-; CHECK: LV: Checking a loop in "f1"
-; CHECK: LEV: Unable to vectorize epilogue because the loop is not a supported candidate.
-
-define signext i32 @f1(i8* noalias %A, i32 signext %n) {
-entry:
-  %cmp1 = icmp sgt i32 %n, 0
-  br i1 %cmp1, label %for.body.preheader, label %for.end
-
-for.body.preheader:                               ; preds = %entry
-  %wide.trip.count = zext i32 %n to i64
-  br label %for.body
-
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
-  %sum.02 = phi i32 [ %add, %for.body ], [ 0, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv
-  %0 = load i8, i8* %arrayidx, align 1
-  %conv = zext i8 %0 to i32
-  %add = add nuw nsw i32 %sum.02, %conv
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
-  br i1 %exitcond, label %for.body, label %for.end.loopexit
-
-for.end.loopexit:                                 ; preds = %for.body
-  %add.lcssa = phi i32 [ %add, %for.body ]
-  br label %for.end
-
-for.end:                                          ; preds = %for.end.loopexit, %entry
-  %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.end.loopexit ]
-  ret i32 %sum.0.lcssa
-}
-
 ; Currently we cannot handle live-out variables that are recurrences.
 ; CHECK: LV: Checking a loop in "f2"
 ; CHECK: LEV: Unable to vectorize epilogue because the loop is not a supported candidate.

From 54f1d950667cac9d1cbf549c72972bcb487b6fa7 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Mon, 24 Jan 2022 12:06:49 +0000
Subject: [PATCH 365/946] [gn build] Port 3696c70e67d9

---
 .../secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
index 1e24a52be4725..d5743e8b209e0 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn
@@ -15,6 +15,7 @@ static_library("readability") {
     "AvoidConstParamsInDecls.cpp",
     "BracesAroundStatementsCheck.cpp",
     "ConstReturnTypeCheck.cpp",
+    "ContainerContainsCheck.cpp",
     "ContainerDataPointerCheck.cpp",
     "ContainerSizeEmptyCheck.cpp",
     "ConvertMemberFunctionsToStatic.cpp",

From b7f69b8d46502adbe7007a89bdf6ea6b6c8f95f9 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Mon, 24 Jan 2022 11:27:27 +0000
Subject: [PATCH 366/946] [LV] Name values and blocks in same induction tests
 (NFC).

This reduces the churn in the test in future updates due to numbering
changes.
---
 .../Transforms/LoopVectorize/induction.ll     | 138 ++++++++++--------
 1 file changed, 78 insertions(+), 60 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index de2d405a6da95..4eff30b1fc331 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -2860,6 +2860,7 @@ for.end:
 
 define i32 @i8_loop() nounwind readnone ssp uwtable {
 ; CHECK-LABEL: @i8_loop(
+; CHECK-NEXT: entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -2881,18 +2882,19 @@ define i32 @i8_loop() nounwind readnone ssp uwtable {
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ]
 ; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    br label [[TMP6:%.*]]
-; CHECK:       6:
+; CHECK:       loop:
 ; CHECK-NEXT:    [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP7:%.*]], [[TMP6]] ]
 ; CHECK-NEXT:    [[B_0:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP8:%.*]], [[TMP6]] ]
 ; CHECK-NEXT:    [[TMP7]] = and i32 [[A_0]], 4
 ; CHECK-NEXT:    [[TMP8]] = add i8 [[B_0]], -1
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[TMP10]], label [[TMP6]], !llvm.loop [[LOOP29:![0-9]+]]
-; CHECK:       10:
+; CHECK:       exit:
 ; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP7]], [[TMP6]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[DOTLCSSA]]
 ;
 ; IND-LABEL: @i8_loop(
+; IND-NEXT:  entry:
 ; IND-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; IND:       vector.ph:
 ; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -2905,12 +2907,13 @@ define i32 @i8_loop() nounwind readnone ssp uwtable {
 ; IND-NEXT:    br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]]
 ; IND:       scalar.ph:
 ; IND-NEXT:    br label [[TMP2:%.*]]
-; IND:       2:
+; IND:       loop:
 ; IND-NEXT:    br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP29:![0-9]+]]
-; IND:       3:
+; IND:       exit:
 ; IND-NEXT:    ret i32 0
 ;
 ; UNROLL-LABEL: @i8_loop(
+; UNROLL-NEXT:  entry:
 ; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL:       vector.ph:
 ; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -2923,12 +2926,13 @@ define i32 @i8_loop() nounwind readnone ssp uwtable {
 ; UNROLL-NEXT:    br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]]
 ; UNROLL:       scalar.ph:
 ; UNROLL-NEXT:    br label [[TMP2:%.*]]
-; UNROLL:       2:
+; UNROLL:       loop:
 ; UNROLL-NEXT:    br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP29:![0-9]+]]
-; UNROLL:       3:
+; UNROLL:       exit:
 ; UNROLL-NEXT:    ret i32 0
 ;
 ; UNROLL-NO-IC-LABEL: @i8_loop(
+; UNROLL-NO-IC-NEXT:  entry:
 ; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NO-IC:       vector.ph:
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -2954,18 +2958,19 @@ define i32 @i8_loop() nounwind readnone ssp uwtable {
 ; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ]
 ; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-IC-NEXT:    br label [[TMP8:%.*]]
-; UNROLL-NO-IC:       8:
+; UNROLL-NO-IC:       loop:
 ; UNROLL-NO-IC-NEXT:    [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP9:%.*]], [[TMP8]] ]
 ; UNROLL-NO-IC-NEXT:    [[B_0:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP10:%.*]], [[TMP8]] ]
 ; UNROLL-NO-IC-NEXT:    [[TMP9]] = and i32 [[A_0]], 4
 ; UNROLL-NO-IC-NEXT:    [[TMP10]] = add i8 [[B_0]], -1
 ; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP11]], label [[TMP12]], label [[TMP8]], !llvm.loop [[LOOP29:![0-9]+]]
-; UNROLL-NO-IC:       12:
+; UNROLL-NO-IC:       exit:
 ; UNROLL-NO-IC-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP9]], [[TMP8]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-IC-NEXT:    ret i32 [[DOTLCSSA]]
 ;
 ; INTERLEAVE-LABEL: @i8_loop(
+; INTERLEAVE-NEXT:  entry:
 ; INTERLEAVE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; INTERLEAVE:       vector.ph:
 ; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -2978,28 +2983,30 @@ define i32 @i8_loop() nounwind readnone ssp uwtable {
 ; INTERLEAVE-NEXT:    br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]]
 ; INTERLEAVE:       scalar.ph:
 ; INTERLEAVE-NEXT:    br label [[TMP2:%.*]]
-; INTERLEAVE:       2:
+; INTERLEAVE:       loop:
 ; INTERLEAVE-NEXT:    br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP29:![0-9]+]]
-; INTERLEAVE:       3:
+; INTERLEAVE:       exit:
 ; INTERLEAVE-NEXT:    ret i32 0
 ;
-  br label %1
+entry:
+  br label %loop
 
-; <label>:1                                       ; preds = %1, %0
-  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
-  %b.0 = phi i8 [ 0, %0 ], [ %3, %1 ]
-  %2 = and i32 %a.0, 4
-  %3 = add i8 %b.0, -1
-  %4 = icmp eq i8 %3, 0
-  br i1 %4, label %5, label %1
+loop:
+  %a.0 = phi i32 [ 1, %entry ], [ %a.0.and, %loop ]
+  %b.0 = phi i8 [ 0, %entry ], [ %b.next, %loop ]
+  %a.0.and = and i32 %a.0, 4
+  %b.next = add i8 %b.0, -1
+  %ec = icmp eq i8 %b.next, 0
+  br i1 %ec, label %exit, label %loop
 
-; <label>:5                                       ; preds = %1
-  ret i32 %2
+exit:
+  ret i32 %a.0.and
 }
 
 
 define i32 @i16_loop() nounwind readnone ssp uwtable {
 ; CHECK-LABEL: @i16_loop(
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -3021,18 +3028,19 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ]
 ; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    br label [[TMP6:%.*]]
-; CHECK:       6:
+; CHECK:       loop:
 ; CHECK-NEXT:    [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP7:%.*]], [[TMP6]] ]
 ; CHECK-NEXT:    [[B_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP8:%.*]], [[TMP6]] ]
 ; CHECK-NEXT:    [[TMP7]] = and i32 [[A_0]], 4
 ; CHECK-NEXT:    [[TMP8]] = add i16 [[B_0]], -1
 ; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i16 [[TMP8]], 0
 ; CHECK-NEXT:    br i1 [[TMP9]], label [[TMP10]], label [[TMP6]], !llvm.loop [[LOOP31:![0-9]+]]
-; CHECK:       10:
+; CHECK:       exit:
 ; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP7]], [[TMP6]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[DOTLCSSA]]
 ;
 ; IND-LABEL: @i16_loop(
+; IND-NEXT:  entry:
 ; IND-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; IND:       vector.ph:
 ; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -3045,12 +3053,13 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
 ; IND-NEXT:    br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]]
 ; IND:       scalar.ph:
 ; IND-NEXT:    br label [[TMP2:%.*]]
-; IND:       2:
+; IND:       loop:
 ; IND-NEXT:    br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP31:![0-9]+]]
-; IND:       3:
+; IND:       exit:
 ; IND-NEXT:    ret i32 0
 ;
 ; UNROLL-LABEL: @i16_loop(
+; UNROLL-NEXT:  entry:
 ; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL:       vector.ph:
 ; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -3063,12 +3072,13 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
 ; UNROLL-NEXT:    br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]]
 ; UNROLL:       scalar.ph:
 ; UNROLL-NEXT:    br label [[TMP2:%.*]]
-; UNROLL:       2:
+; UNROLL:       loop:
 ; UNROLL-NEXT:    br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP31:![0-9]+]]
-; UNROLL:       3:
+; UNROLL:       exit:
 ; UNROLL-NEXT:    ret i32 0
 ;
 ; UNROLL-NO-IC-LABEL: @i16_loop(
+; UNROLL-NO-IC-NEXT:  entry:
 ; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NO-IC:       vector.ph:
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -3094,18 +3104,19 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
 ; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ]
 ; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-IC-NEXT:    br label [[TMP8:%.*]]
-; UNROLL-NO-IC:       8:
+; UNROLL-NO-IC:       loop:
 ; UNROLL-NO-IC-NEXT:    [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP9:%.*]], [[TMP8]] ]
 ; UNROLL-NO-IC-NEXT:    [[B_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP10:%.*]], [[TMP8]] ]
 ; UNROLL-NO-IC-NEXT:    [[TMP9]] = and i32 [[A_0]], 4
 ; UNROLL-NO-IC-NEXT:    [[TMP10]] = add i16 [[B_0]], -1
 ; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = icmp eq i16 [[TMP10]], 0
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP11]], label [[TMP12]], label [[TMP8]], !llvm.loop [[LOOP31:![0-9]+]]
-; UNROLL-NO-IC:       12:
+; UNROLL-NO-IC:       exit:
 ; UNROLL-NO-IC-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP9]], [[TMP8]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-IC-NEXT:    ret i32 [[DOTLCSSA]]
 ;
 ; INTERLEAVE-LABEL: @i16_loop(
+; INTERLEAVE-NEXT:  entry:
 ; INTERLEAVE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; INTERLEAVE:       vector.ph:
 ; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -3118,23 +3129,24 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
 ; INTERLEAVE-NEXT:    br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]]
 ; INTERLEAVE:       scalar.ph:
 ; INTERLEAVE-NEXT:    br label [[TMP2:%.*]]
-; INTERLEAVE:       2:
+; INTERLEAVE:       loop:
 ; INTERLEAVE-NEXT:    br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP31:![0-9]+]]
-; INTERLEAVE:       3:
+; INTERLEAVE:       exit:
 ; INTERLEAVE-NEXT:    ret i32 0
 ;
-  br label %1
+entry:
+  br label %loop
 
-; <label>:1                                       ; preds = %1, %0
-  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
-  %b.0 = phi i16 [ 0, %0 ], [ %3, %1 ]
-  %2 = and i32 %a.0, 4
-  %3 = add i16 %b.0, -1
-  %4 = icmp eq i16 %3, 0
-  br i1 %4, label %5, label %1
+loop:
+  %a.0 = phi i32 [ 1, %entry ], [ %a.0.and, %loop ]
+  %b.0 = phi i16 [ 0, %entry ], [ %b.0.next, %loop ]
+  %a.0.and = and i32 %a.0, 4
+  %b.0.next = add i16 %b.0, -1
+  %ec = icmp eq i16 %b.0.next, 0
+  br i1 %ec, label %exit, label %loop
 
-; <label>:5                                       ; preds = %1
-  ret i32 %2
+exit:
+  ret i32 %a.0.and
 }
 
 ; This loop has a backedge taken count of i32_max. We need to check for this
@@ -3144,6 +3156,7 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
 
 define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
 ; CHECK-LABEL: @max_i32_backedgetaken(
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -3164,18 +3177,19 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ]
 ; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    br label [[TMP5:%.*]]
-; CHECK:       5:
+; CHECK:       loop:
 ; CHECK-NEXT:    [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP6:%.*]], [[TMP5]] ]
 ; CHECK-NEXT:    [[B_0:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP7:%.*]], [[TMP5]] ]
 ; CHECK-NEXT:    [[TMP6]] = and i32 [[A_0]], 4
 ; CHECK-NEXT:    [[TMP7]] = add i32 [[B_0]], -1
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
 ; CHECK-NEXT:    br i1 [[TMP8]], label [[TMP9]], label [[TMP5]], !llvm.loop [[LOOP33:![0-9]+]]
-; CHECK:       9:
+; CHECK:       exit:
 ; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[DOTLCSSA]]
 ;
 ; IND-LABEL: @max_i32_backedgetaken(
+; IND-NEXT:  entry:
 ; IND-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; IND:       vector.ph:
 ; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -3185,15 +3199,16 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
 ; IND-NEXT:    br i1 undef, label [[TMP4:%.*]], label [[SCALAR_PH]]
 ; IND:       scalar.ph:
 ; IND-NEXT:    br label [[TMP1:%.*]]
-; IND:       1:
+; IND:       loop:
 ; IND-NEXT:    [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TMP2:%.*]], [[TMP1]] ]
 ; IND-NEXT:    [[TMP2]] = add i32 [[B_0]], -1
 ; IND-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
 ; IND-NEXT:    br i1 [[TMP3]], label [[TMP4]], label [[TMP1]], !llvm.loop [[LOOP33:![0-9]+]]
-; IND:       4:
+; IND:       exit:
 ; IND-NEXT:    ret i32 0
 ;
 ; UNROLL-LABEL: @max_i32_backedgetaken(
+; UNROLL-NEXT:  entry:
 ; UNROLL-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL:       vector.ph:
 ; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -3203,15 +3218,16 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
 ; UNROLL-NEXT:    br i1 undef, label [[TMP4:%.*]], label [[SCALAR_PH]]
 ; UNROLL:       scalar.ph:
 ; UNROLL-NEXT:    br label [[TMP1:%.*]]
-; UNROLL:       1:
+; UNROLL:       loop:
 ; UNROLL-NEXT:    [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TMP2:%.*]], [[TMP1]] ]
 ; UNROLL-NEXT:    [[TMP2]] = add i32 [[B_0]], -1
 ; UNROLL-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
 ; UNROLL-NEXT:    br i1 [[TMP3]], label [[TMP4]], label [[TMP1]], !llvm.loop [[LOOP33:![0-9]+]]
-; UNROLL:       4:
+; UNROLL:       exit:
 ; UNROLL-NEXT:    ret i32 0
 ;
 ; UNROLL-NO-IC-LABEL: @max_i32_backedgetaken(
+; UNROLL-NO-IC-NEXT:  entry:
 ; UNROLL-NO-IC-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; UNROLL-NO-IC:       vector.ph:
 ; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -3236,18 +3252,19 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
 ; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ]
 ; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-IC-NEXT:    br label [[TMP7:%.*]]
-; UNROLL-NO-IC:       7:
+; UNROLL-NO-IC:       loop:
 ; UNROLL-NO-IC-NEXT:    [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP8:%.*]], [[TMP7]] ]
 ; UNROLL-NO-IC-NEXT:    [[B_0:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP9:%.*]], [[TMP7]] ]
 ; UNROLL-NO-IC-NEXT:    [[TMP8]] = and i32 [[A_0]], 4
 ; UNROLL-NO-IC-NEXT:    [[TMP9]] = add i32 [[B_0]], -1
 ; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0
 ; UNROLL-NO-IC-NEXT:    br i1 [[TMP10]], label [[TMP11]], label [[TMP7]], !llvm.loop [[LOOP33:![0-9]+]]
-; UNROLL-NO-IC:       11:
+; UNROLL-NO-IC:       exit:
 ; UNROLL-NO-IC-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
 ; UNROLL-NO-IC-NEXT:    ret i32 [[DOTLCSSA]]
 ;
 ; INTERLEAVE-LABEL: @max_i32_backedgetaken(
+; INTERLEAVE-NEXT:  entry:
 ; INTERLEAVE-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; INTERLEAVE:       vector.ph:
 ; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -3257,26 +3274,27 @@ define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
 ; INTERLEAVE-NEXT:    br i1 undef, label [[TMP4:%.*]], label [[SCALAR_PH]]
 ; INTERLEAVE:       scalar.ph:
 ; INTERLEAVE-NEXT:    br label [[TMP1:%.*]]
-; INTERLEAVE:       1:
+; INTERLEAVE:       loop:
 ; INTERLEAVE-NEXT:    [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TMP2:%.*]], [[TMP1]] ]
 ; INTERLEAVE-NEXT:    [[TMP2]] = add i32 [[B_0]], -1
 ; INTERLEAVE-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
 ; INTERLEAVE-NEXT:    br i1 [[TMP3]], label [[TMP4]], label [[TMP1]], !llvm.loop [[LOOP33:![0-9]+]]
-; INTERLEAVE:       4:
+; INTERLEAVE:       exit:
 ; INTERLEAVE-NEXT:    ret i32 0
 ;
-  br label %1
+entry:
+  br label %loop
 
-; <label>:1                                       ; preds = %1, %0
-  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
-  %b.0 = phi i32 [ 0, %0 ], [ %3, %1 ]
-  %2 = and i32 %a.0, 4
-  %3 = add i32 %b.0, -1
-  %4 = icmp eq i32 %3, 0
-  br i1 %4, label %5, label %1
+loop:
+  %a.0 = phi i32 [ 1, %entry ], [ %a.0.and, %loop ]
+  %b.0 = phi i32 [ 0, %entry ], [ %b.next, %loop ]
+  %a.0.and = and i32 %a.0, 4
+  %b.next = add i32 %b.0, -1
+  %ec = icmp eq i32 %b.next, 0
+  br i1 %ec, label %exit, label %loop
 
-; <label>:5                                       ; preds = %1
-  ret i32 %2
+exit:
+  ret i32 %a.0.and
 }
 
 ; When generating the overflow check we must sure that the induction start value

From 70f83f308449710a55c4898f18f06949df2d1559 Mon Sep 17 00:00:00 2001
From: SForeKeeper <zkliu6@gmail.com>
Date: Mon, 24 Jan 2022 20:33:46 +0800
Subject: [PATCH 367/946] [RISCV] add support for zbkx subextension in MC
 layer.

This patch adds support for zbkx extension from K extension(v1.0.0) in MC layer.
Instructions with same functionality and same encoding is defined in the bitmanip extension.
It defines {Xperm8, Xperm4} as instruction aliases for xperm.* in Zbp extension. When Zbkx is enabled while Zbp is not, xperm.h will not be available. When Zbkx and Zbp are both enabled, the instructions will be decoded in Zbp format.

[[ https://reviews.llvm.org/D94999 | D94999 ]] this is the patch that introduces xperm.* instructions.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117889
---
 llvm/lib/Support/RISCVISAInfo.cpp          |  5 +++--
 llvm/lib/Target/RISCV/RISCV.td             | 15 +++++++++++++++
 llvm/lib/Target/RISCV/RISCVInstrInfoZb.td  | 15 +++++++++++++--
 llvm/lib/Target/RISCV/RISCVSchedRocket.td  |  8 ++++----
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td |  7 ++++---
 llvm/lib/Target/RISCV/RISCVSubtarget.h     |  2 ++
 llvm/test/CodeGen/RISCV/attributes.ll      | 16 ++++++++++------
 llvm/test/MC/RISCV/attribute-arch.s        |  9 ++++++---
 llvm/test/MC/RISCV/rv32zbkx-invalid.s      |  9 +++++++++
 llvm/test/MC/RISCV/rv32zbkx-valid.s        | 17 +++++++++++++++++
 10 files changed, 83 insertions(+), 20 deletions(-)
 create mode 100644 llvm/test/MC/RISCV/rv32zbkx-invalid.s
 create mode 100644 llvm/test/MC/RISCV/rv32zbkx-valid.s

diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index e6df48e5bb416..55a644e98aacd 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -58,6 +58,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
 
     {"zbkb", RISCVExtensionVersion{1, 0}},
     {"zbkc", RISCVExtensionVersion{1, 0}},
+    {"zbkx", RISCVExtensionVersion{1, 0}},
     {"zknd", RISCVExtensionVersion{1, 0}},
     {"zkne", RISCVExtensionVersion{1, 0}},
     {"zknh", RISCVExtensionVersion{1, 0}},
@@ -770,8 +771,8 @@ static const char *ImpliedExtsZvl256b[] = {"zvl128b"};
 static const char *ImpliedExtsZvl128b[] = {"zvl64b"};
 static const char *ImpliedExtsZvl64b[] = {"zvl32b"};
 static const char *ImpliedExtsZk[] = {"zkn", "zkt", "zkr"};
-static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zkne", "zknd", "zknh"};
-static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zksed", "zksh"};
+static const char *ImpliedExtsZkn[] = {"zbkb", "zbkc", "zbkx", "zkne", "zknd", "zknh"};
+static const char *ImpliedExtsZks[] = {"zbkb", "zbkc", "zbkx", "zksed", "zksh"};
 
 struct ImpliedExtsEntry {
   StringLiteral Name;
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index cd8885f6a6e8d..aea082d0a0f04 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -150,6 +150,19 @@ def HasStdExtZbkb : Predicate<"Subtarget->hasStdExtZbkb()">,
                              AssemblerPredicate<(all_of FeatureStdExtZbkb),
                              "'Zbkb' (Bitmanip instructions for Cryptography)">;
 
+def FeatureStdExtZbkx
+    : SubtargetFeature<"zbkx", "HasStdExtZbkx", "true",
+                       "'Zbkx' (Crossbar permutation instructions)">;
+def HasStdExtZbkx : Predicate<"Subtarget->hasStdExtZbkx()">,
+                             AssemblerPredicate<(all_of FeatureStdExtZbkx),
+                             "'Zbkx' (Crossbar permutation instructions)">;
+
+def HasStdExtZbpOrZbkx
+    : Predicate<"Subtarget->hasStdExtZbp() || Subtarget->hasStdExtZbkx()">,
+                AssemblerPredicate<(any_of FeatureStdExtZbp, FeatureStdExtZbkx),
+                                   "'Zbp' (Permutation 'Zb' Instructions) or "
+                                   "'Zbkx' (Crossbar permutation instructions)">;
+
 def HasStdExtZbpOrZbkb
     : Predicate<"Subtarget->hasStdExtZbp() || Subtarget->hasStdExtZbkb()">,
                 AssemblerPredicate<(any_of FeatureStdExtZbp, FeatureStdExtZbkb),
@@ -233,6 +246,7 @@ def FeatureStdExtZkn
                        "'Zkn' (NIST Algorithm Suite)",
                        [FeatureStdExtZbkb,
                         FeatureStdExtZbkc,
+                        FeatureStdExtZbkx,
                         FeatureStdExtZkne,
                         FeatureStdExtZknd,
                         FeatureStdExtZknh]>;
@@ -242,6 +256,7 @@ def FeatureStdExtZks
                        "'Zks' (ShangMi Algorithm Suite)",
                        [FeatureStdExtZbkb,
                         FeatureStdExtZbkc,
+                        FeatureStdExtZbkx,
                         FeatureStdExtZksed,
                         FeatureStdExtZksh]>;
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 25c5c35c03509..75f9ec98cc1aa 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -27,6 +27,7 @@
 // versions:
 //   Zbkb - 1.0
 //   Zbkc - 1.0
+//   Zbkx - 1.0
 //
 //===----------------------------------------------------------------------===//
 
@@ -359,9 +360,12 @@ def GORC : ALU_rr<0b0010100, 0b101, "gorc">, Sched<[]>;
 def GREV : ALU_rr<0b0110100, 0b101, "grev">, Sched<[]>;
 } // Predicates = [HasStdExtZbp]
 
+let Predicates = [HasStdExtZbpOrZbkx] in {
+def XPERMN : ALU_rr<0b0010100, 0b010, "xperm4">, Sched<[]>;
+def XPERMB : ALU_rr<0b0010100, 0b100, "xperm8">, Sched<[]>;
+} // Predicates = [HasStdExtZbpOrZbkx]
+
 let Predicates = [HasStdExtZbp] in {
-def XPERMN : ALU_rr<0b0010100, 0b010, "xperm.n">, Sched<[]>;
-def XPERMB : ALU_rr<0b0010100, 0b100, "xperm.b">, Sched<[]>;
 def XPERMH : ALU_rr<0b0010100, 0b110, "xperm.h">, Sched<[]>;
 } // Predicates = [HasStdExtZbp]
 
@@ -768,6 +772,13 @@ def : InstAlias<"gorcw $rd, $rs1, $shamt",
                 (GORCIW  GPR:$rd, GPR:$rs1, uimm5:$shamt), 0>;
 } // Predicates = [HasStdExtZbp, IsRV64]
 
+// Zbp is unratified and that it would likely adopt the already ratified Zbkx names.
+// Thus current Zbp instructions are defined as aliases for Zbkx instructions.
+let Predicates = [HasStdExtZbp] in {
+  def : InstAlias<"xperm.b $rd, $rs1, $rs2", (XPERMB GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+  def : InstAlias<"xperm.n $rd, $rs1, $rs2", (XPERMN GPR:$rd, GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbp]
+
 let Predicates = [HasStdExtZbs] in {
 def : InstAlias<"bset $rd, $rs1, $shamt",
                 (BSETI  GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt), 0>;
diff --git a/llvm/lib/Target/RISCV/RISCVSchedRocket.td b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
index 92dd3175a460a..78cf34c8c582a 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedRocket.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedRocket.td
@@ -17,10 +17,10 @@ def RocketModel : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = false;
-  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZknd, 
-                             HasStdExtZkne, HasStdExtZknh, HasStdExtZksed,
-                             HasStdExtZksh, HasStdExtZkr, HasVInstructions,
-                             HasVInstructionsI64];
+  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
+                             HasStdExtZknd, HasStdExtZkne, HasStdExtZknh,
+                             HasStdExtZksed, HasStdExtZksh, HasStdExtZkr,
+                             HasVInstructions, HasVInstructionsI64];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index e5eaad2a6dd08..9f5e5ff1223cd 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -15,9 +15,10 @@ def SiFive7Model : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = 0;
-  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZknd, 
-                             HasStdExtZkne, HasStdExtZknh, HasStdExtZksed,
-                             HasStdExtZksh, HasStdExtZkr, HasVInstructions];
+  let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
+                             HasStdExtZknd, HasStdExtZkne, HasStdExtZknh,
+                             HasStdExtZksed, HasStdExtZksh, HasStdExtZkr,
+                             HasVInstructions];
 }
 
 // The SiFive7 microarchitecture has two pipelines: A and B.
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 62b3d54350779..8f32e88d57c07 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -85,6 +85,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool HasStdExtZfh = false;
   bool HasStdExtZbkb = false;
   bool HasStdExtZbkc = false;
+  bool HasStdExtZbkx = false;
   bool HasStdExtZknd = false;
   bool HasStdExtZkne = false;
   bool HasStdExtZknh = false;
@@ -170,6 +171,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool hasStdExtZfh() const { return HasStdExtZfh; }
   bool hasStdExtZbkb() const { return HasStdExtZbkb; }
   bool hasStdExtZbkc() const { return HasStdExtZbkc; }
+  bool hasStdExtZbkx() const { return HasStdExtZbkx; }
   bool hasStdExtZknd() const { return HasStdExtZknd; }
   bool hasStdExtZkne() const { return HasStdExtZkne; }
   bool hasStdExtZknh() const { return HasStdExtZknh; }
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index 86b384f6df6fc..32a852050a5ea 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -21,6 +21,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+zbb,+zfh,+v,+f %s -o - | FileCheck --check-prefix=RV32COMBINED %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV32ZBKB %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV32ZBKC %s
+; RUN: llc -mtriple=riscv32 -mattr=+zbkx %s -o - | FileCheck --check-prefix=RV32ZBKX %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zknd %s -o - | FileCheck --check-prefix=RV32ZKND %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zkne %s -o - | FileCheck --check-prefix=RV32ZKNE %s
 ; RUN: llc -mtriple=riscv32 -mattr=+zknh %s -o - | FileCheck --check-prefix=RV32ZKNH %s
@@ -52,6 +53,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+zbb,+zfh,+v,+f %s -o - | FileCheck --check-prefix=RV64COMBINED %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zbkb %s -o - | FileCheck --check-prefix=RV64ZBKB %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zbkc %s -o - | FileCheck --check-prefix=RV64ZBKC %s
+; RUN: llc -mtriple=riscv64 -mattr=+zbkx %s -o - | FileCheck --check-prefix=RV64ZBKX %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zknd %s -o - | FileCheck --check-prefix=RV64ZKND %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zkne %s -o - | FileCheck --check-prefix=RV64ZKNE %s
 ; RUN: llc -mtriple=riscv64 -mattr=+zknh %s -o - | FileCheck --check-prefix=RV64ZKNH %s
@@ -84,16 +86,17 @@
 ; RV32COMBINED: .attribute 5, "rv32i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 ; RV32ZBKB: .attribute 5, "rv32i2p0_zbkb1p0"
 ; RV32ZBKC: .attribute 5, "rv32i2p0_zbkc1p0"
+; RV32ZBKX: .attribute 5, "rv32i2p0_zbkx1p0"
 ; RV32ZKND: .attribute 5, "rv32i2p0_zknd1p0"
 ; RV32ZKNE: .attribute 5, "rv32i2p0_zkne1p0"
 ; RV32ZKNH: .attribute 5, "rv32i2p0_zknh1p0"
 ; RV32ZKSED: .attribute 5, "rv32i2p0_zksed1p0"
 ; RV32ZKSH: .attribute 5, "rv32i2p0_zksh1p0"
 ; RV32ZKR: .attribute 5, "rv32i2p0_zkr1p0"
-; RV32ZKN: .attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0"
-; RV32ZKS: .attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zks1p0_zksed1p0_zksh1p0"
+; RV32ZKN: .attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zbkx1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0"
+; RV32ZKS: .attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zbkx1p0_zks1p0_zksed1p0_zksh1p0"
 ; RV32ZKT: .attribute 5, "rv32i2p0_zkt1p0"
-; RV32ZK: .attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zk1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0_zkr1p0_zkt1p0"
+; RV32ZK: .attribute 5, "rv32i2p0_zbkb1p0_zbkc1p0_zbkx1p0_zk1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0_zkr1p0_zkt1p0"
 
 ; RV64M: .attribute 5, "rv64i2p0_m2p0"
 ; RV64A: .attribute 5, "rv64i2p0_a2p0"
@@ -116,16 +119,17 @@
 ; RV64COMBINED: .attribute 5, "rv64i2p0_f2p0_d2p0_v1p0_zfh1p0_zfhmin1p0_zbb1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
 ; RV64ZBKB: .attribute 5, "rv64i2p0_zbkb1p0"
 ; RV64ZBKC: .attribute 5, "rv64i2p0_zbkc1p0"
+; RV64ZBKX: .attribute 5, "rv64i2p0_zbkx1p0"
 ; RV64ZKND: .attribute 5, "rv64i2p0_zknd1p0"
 ; RV64ZKNE: .attribute 5, "rv64i2p0_zkne1p0"
 ; RV64ZKNH: .attribute 5, "rv64i2p0_zknh1p0"
 ; RV64ZKSED: .attribute 5, "rv64i2p0_zksed1p0"
 ; RV64ZKSH: .attribute 5, "rv64i2p0_zksh1p0"
 ; RV64ZKR: .attribute 5, "rv64i2p0_zkr1p0"
-; RV64ZKN: .attribute 5, "rv64i2p0_zbkb1p0_zbkc1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0"
-; RV64ZKS: .attribute 5, "rv64i2p0_zbkb1p0_zbkc1p0_zks1p0_zksed1p0_zksh1p0"
+; RV64ZKN: .attribute 5, "rv64i2p0_zbkb1p0_zbkc1p0_zbkx1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0"
+; RV64ZKS: .attribute 5, "rv64i2p0_zbkb1p0_zbkc1p0_zbkx1p0_zks1p0_zksed1p0_zksh1p0"
 ; RV64ZKT: .attribute 5, "rv64i2p0_zkt1p0"
-; RV64ZK: .attribute 5, "rv64i2p0_zbkb1p0_zbkc1p0_zk1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0_zkr1p0_zkt1p0"
+; RV64ZK: .attribute 5, "rv64i2p0_zbkb1p0_zbkc1p0_zbkx1p0_zk1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0_zkr1p0_zkt1p0"
 
 define i32 @addi(i32 %a) {
   %1 = add i32 %a, 1
diff --git a/llvm/test/MC/RISCV/attribute-arch.s b/llvm/test/MC/RISCV/attribute-arch.s
index d39566eb81cd9..ee2c6fd6af9b9 100644
--- a/llvm/test/MC/RISCV/attribute-arch.s
+++ b/llvm/test/MC/RISCV/attribute-arch.s
@@ -131,6 +131,9 @@
 .attribute arch, "rv32i_zbkc1p0"
 # CHECK: attribute      5, "rv32i2p0_zbkc1p0"
 
+.attribute arch, "rv32i_zbkx1p0"
+# CHECK: attribute      5, "rv32i2p0_zbkx1p0"
+
 .attribute arch, "rv32i_zknd1p0"
 # CHECK: attribute      5, "rv32i2p0_zknd1p0"
 
@@ -150,13 +153,13 @@
 # CHECK: attribute      5, "rv32i2p0_zkr1p0"
 
 .attribute arch, "rv32i_zkn1p0"
-# CHECK: attribute      5, "rv32i2p0_zbkb1p0_zbkc1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0"
+# CHECK: attribute      5, "rv32i2p0_zbkb1p0_zbkc1p0_zbkx1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0"
 
 .attribute arch, "rv32i_zks1p0"
-# CHECK: attribute      5, "rv32i2p0_zbkb1p0_zbkc1p0_zks1p0_zksed1p0_zksh1p0"
+# CHECK: attribute      5, "rv32i2p0_zbkb1p0_zbkc1p0_zbkx1p0_zks1p0_zksed1p0_zksh1p0"
 
 .attribute arch, "rv32i_zkt1p0"
 # CHECK: attribute      5, "rv32i2p0_zkt1p0"
 
 .attribute arch, "rv32i_zk1p0"
-# CHECK: attribute      5, "rv32i2p0_zbkb1p0_zbkc1p0_zk1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0_zkr1p0_zkt1p0"
+# CHECK: attribute      5, "rv32i2p0_zbkb1p0_zbkc1p0_zbkx1p0_zk1p0_zkn1p0_zknd1p0_zkne1p0_zknh1p0_zkr1p0_zkt1p0"
diff --git a/llvm/test/MC/RISCV/rv32zbkx-invalid.s b/llvm/test/MC/RISCV/rv32zbkx-invalid.s
new file mode 100644
index 0000000000000..d79a2afb391a2
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zbkx-invalid.s
@@ -0,0 +1,9 @@
+# RUN: not llvm-mc -triple riscv32 -mattr=+zbkx < %s 2>&1 | FileCheck %s
+
+# Too few operands
+xperm8 t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
+# Too few operands
+xperm4 t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
+
+# Undefined Zbp instruction in Zbkx
+xperm.h t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zbp' (Permutation 'B' Instructions)
\ No newline at end of file
diff --git a/llvm/test/MC/RISCV/rv32zbkx-valid.s b/llvm/test/MC/RISCV/rv32zbkx-valid.s
new file mode 100644
index 0000000000000..6567dfed8ba7a
--- /dev/null
+++ b/llvm/test/MC/RISCV/rv32zbkx-valid.s
@@ -0,0 +1,17 @@
+# RUN: llvm-mc %s -triple=riscv32 -mattr=+zbkx -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc %s -triple=riscv64 -mattr=+zbkx -show-encoding \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=zbkx < %s \
+# RUN:     | llvm-objdump --mattr=+zbkx -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=zbkx < %s \
+# RUN:     | llvm-objdump --mattr=+zbkx -d -r - \
+# RUN:     | FileCheck --check-prefix=CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: xperm8 t0, t1, t2
+# CHECK-ASM: encoding: [0xb3,0x42,0x73,0x28]
+xperm8 t0, t1, t2
+# CHECK-ASM-AND-OBJ: xperm4 t0, t1, t2
+# CHECK-ASM: encoding: [0xb3,0x22,0x73,0x28]
+xperm4 t0, t1, t2

From b754d09fde0b5f82acfd1d5e0423271823db4905 Mon Sep 17 00:00:00 2001
From: Groverkss <groverkss@gmail.com>
Date: Mon, 24 Jan 2022 18:11:55 +0530
Subject: [PATCH 368/946] [MLIR][Presburger] Refactor duplicate division
 merging to Utils

This patch moves merging of duplicate divisions to presburger utility
functions. This is required to support division merging in structures other
than IntegerPolyhedron.

Reviewed By: arjunp

Differential Revision: https://reviews.llvm.org/D118001
---
 mlir/include/mlir/Analysis/Presburger/Utils.h | 17 +++++++
 .../Analysis/Presburger/IntegerPolyhedron.cpp | 50 ++++--------------
 mlir/lib/Analysis/Presburger/Utils.cpp        | 51 +++++++++++++++++++
 3 files changed, 78 insertions(+), 40 deletions(-)

diff --git a/mlir/include/mlir/Analysis/Presburger/Utils.h b/mlir/include/mlir/Analysis/Presburger/Utils.h
index 637ab59d85fe4..d9d7be4d364c8 100644
--- a/mlir/include/mlir/Analysis/Presburger/Utils.h
+++ b/mlir/include/mlir/Analysis/Presburger/Utils.h
@@ -14,6 +14,7 @@
 #define MLIR_ANALYSIS_PRESBURGER_UTILS_H
 
 #include "mlir/Support/LLVM.h"
+#include "llvm/ADT/STLExtras.h"
 
 namespace mlir {
 
@@ -51,6 +52,22 @@ MaybeLocalRepr computeSingleVarRepr(const IntegerPolyhedron &cst,
                                     SmallVector<int64_t, 8> &dividend,
                                     unsigned &divisor);
 
+/// Given dividends of divisions `divs` and denominators `denoms`, detects and
+/// removes duplicate divisions. `localOffset` is the offset in dividend of a
+/// division from where local identifiers start.
+///
+/// On every possible duplicate division found, `merge(i, j)`, where `i`, `j`
+/// are current index of the duplicate divisions, is called and division at
+/// index `j` is merged into division at index `i`. If `merge(i, j)` returns
+/// `true`, the divisions are merged i.e. `j^th` division gets eliminated and
+/// it's each instance is replaced by `i^th` division. If it returns `false`,
+/// the divisions are not merged. `merge` can also do side effects, For example
+/// it can merge the local identifiers in IntegerPolyhedron.
+void removeDuplicateDivs(
+    std::vector<SmallVector<int64_t, 8>> &divs,
+    SmallVectorImpl<unsigned> &denoms, unsigned localOffset,
+    llvm::function_ref<bool(unsigned i, unsigned j)> merge);
+
 } // namespace presburger_utils
 } // namespace mlir
 
diff --git a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp
index b9360167d3ae9..0080756dfb760 100644
--- a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp
+++ b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp
@@ -1090,47 +1090,17 @@ void IntegerPolyhedron::mergeLocalIds(IntegerPolyhedron &other) {
   std::copy(denomsB.begin() + initLocals, denomsB.end(),
             denomsA.begin() + initLocals);
 
-  // Find and merge duplicate divisions.
-  // TODO: Add division normalization to support divisions that differ by
-  // a constant.
-  // TODO: Add division ordering such that a division representation for local
-  // identifier at position `i` only depends on local identifiers at position <
-  // `i`. This would make sure that all divisions depending on other local
-  // variables that can be merged, are merged.
-  unsigned localOffset = getIdKindOffset(IdKind::Local);
-  for (unsigned i = 0; i < divsA.size(); ++i) {
-    // Check if a division representation exists for the `i^th` local id.
-    if (denomsA[i] == 0)
-      continue;
-    // Check if a division exists which is a duplicate of the division at `i`.
-    for (unsigned j = i + 1; j < divsA.size(); ++j) {
-      // Check if a division representation exists for the `j^th` local id.
-      if (denomsA[j] == 0)
-        continue;
-      // Check if the denominators match.
-      if (denomsA[i] != denomsA[j])
-        continue;
-      // Check if the representations are equal.
-      if (divsA[i] != divsA[j])
-        continue;
-
-      // Merge divisions at position `j` into division at position `i`.
-      eliminateRedundantLocalId(polyA, i, j);
-      eliminateRedundantLocalId(polyB, i, j);
-      for (unsigned k = 0, g = divsA.size(); k < g; ++k) {
-        SmallVector<int64_t, 8> &div = divsA[k];
-        if (denomsA[k] != 0) {
-          div[localOffset + i] += div[localOffset + j];
-          div.erase(div.begin() + localOffset + j);
-        }
-      }
+  // Merge function that merges the local variables in both sets by treating
+  // them as the same identifier.
+  auto merge = [&polyA, &polyB](unsigned i, unsigned j) -> bool {
+    eliminateRedundantLocalId(polyA, i, j);
+    eliminateRedundantLocalId(polyB, i, j);
+    return true;
+  };
 
-      divsA.erase(divsA.begin() + j);
-      denomsA.erase(denomsA.begin() + j);
-      // Since `j` can never be zero, we do not need to worry about overflows.
-      --j;
-    }
-  }
+  // Merge all divisions by removing duplicate divisions.
+  unsigned localOffset = getIdKindOffset(IdKind::Local);
+  removeDuplicateDivs(divsA, denomsA, localOffset, merge);
 }
 
 /// Removes local variables using equalities. Each equality is checked if it
diff --git a/mlir/lib/Analysis/Presburger/Utils.cpp b/mlir/lib/Analysis/Presburger/Utils.cpp
index 7b8fe49b23c83..373dce2746a10 100644
--- a/mlir/lib/Analysis/Presburger/Utils.cpp
+++ b/mlir/lib/Analysis/Presburger/Utils.cpp
@@ -190,3 +190,54 @@ MaybeLocalRepr presburger_utils::computeSingleVarRepr(
   }
   return repr;
 }
+
+void presburger_utils::removeDuplicateDivs(
+    std::vector<SmallVector<int64_t, 8>> &divs,
+    SmallVectorImpl<unsigned> &denoms, unsigned localOffset,
+    llvm::function_ref<bool(unsigned i, unsigned j)> merge) {
+
+  // Find and merge duplicate divisions.
+  // TODO: Add division normalization to support divisions that differ by
+  // a constant.
+  // TODO: Add division ordering such that a division representation for local
+  // identifier at position `i` only depends on local identifiers at position <
+  // `i`. This would make sure that all divisions depending on other local
+  // variables that can be merged, are merged.
+  for (unsigned i = 0; i < divs.size(); ++i) {
+    // Check if a division representation exists for the `i^th` local id.
+    if (denoms[i] == 0)
+      continue;
+    // Check if a division exists which is a duplicate of the division at `i`.
+    for (unsigned j = i + 1; j < divs.size(); ++j) {
+      // Check if a division representation exists for the `j^th` local id.
+      if (denoms[j] == 0)
+        continue;
+      // Check if the denominators match.
+      if (denoms[i] != denoms[j])
+        continue;
+      // Check if the representations are equal.
+      if (divs[i] != divs[j])
+        continue;
+
+      // Merge divisions at position `j` into division at position `i`. If
+      // merge fails, do not merge these divs.
+      bool mergeResult = merge(i, j);
+      if (!mergeResult)
+        continue;
+
+      // Update division information to reflect merging.
+      for (unsigned k = 0, g = divs.size(); k < g; ++k) {
+        SmallVector<int64_t, 8> &div = divs[k];
+        if (denoms[k] != 0) {
+          div[localOffset + i] += div[localOffset + j];
+          div.erase(div.begin() + localOffset + j);
+        }
+      }
+
+      divs.erase(divs.begin() + j);
+      denoms.erase(denoms.begin() + j);
+      // Since `j` can never be zero, we do not need to worry about overflows.
+      --j;
+    }
+  }
+}

From b4b6d6374e2e04640a2afa0415ba326f6d7be9b7 Mon Sep 17 00:00:00 2001
From: Evgeniy Brevnov <ybrevnov@azul.com>
Date: Mon, 24 Jan 2022 18:01:03 +0700
Subject: [PATCH 369/946] [NFC] New test case for BasicAA and memcy/memmove
 with deopt

New test checks results of BasicAA for llvm.memcpy.*/llvm.memmove.* intrinsics in presence of deopt bundle. By specification expected result for unrelated global memory should be Ref. Currently this is not the case and will be fixed in upcoming patches.

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D118031
---
 llvm/test/Analysis/BasicAA/deoptimize.ll | 41 +++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/llvm/test/Analysis/BasicAA/deoptimize.ll b/llvm/test/Analysis/BasicAA/deoptimize.ll
index 927c0e03853f9..89297e2583186 100644
--- a/llvm/test/Analysis/BasicAA/deoptimize.ll
+++ b/llvm/test/Analysis/BasicAA/deoptimize.ll
@@ -1,7 +1,10 @@
 ; RUN: opt < %s -basic-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) #0
+@G1 = external global i32
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1)
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1)
 declare void @llvm.experimental.deoptimize.void(...)
 declare void @unknown_but_readonly() readonly
 
@@ -12,3 +15,39 @@ define void @test1(i8* %p) {
 ; CHECK-LABEL: Function: test1:
 ; CHECK:  Just Ref: Ptr: i8* %p <-> call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"() ]
 }
+
+; By specification calls with deopt bundles reads through all operands and entire heap.
+; Check that global G1 is reported as Ref by memcpy/memmove calls.
+define i32 @test_memcpy_with_deopt() {
+; CHECK-LABEL: Function: test_memcpy_with_deopt:
+; CHECK: Just Mod:  Ptr: i8* %A	<->  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
+; CHECK: Just Ref:  Ptr: i8* %B	<->  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
+; CHECK: NoModRef:  Ptr: i32* @G1	<->  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
+
+  %A = alloca i8
+  %B = alloca i8
+
+  store i32 2, i32* @G1  ;; Not referenced by semantics of memcpy but still may be read due to "deopt"
+
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
+
+  %C = load i32, i32* @G1
+  ret i32 %C
+}
+
+define i32 @test_memmove_with_deopt() {
+; CHECK-LABEL: Function: test_memmove_with_deopt:
+; CHECK: Just Mod:  Ptr: i8* %A	<->  call void @llvm.memmove.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
+; CHECK: Just Ref:  Ptr: i8* %B	<->  call void @llvm.memmove.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
+; CHECK: Both ModRef:  Ptr: i32* @G1	<->  call void @llvm.memmove.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
+
+  %A = alloca i8
+  %B = alloca i8
+
+  store i32 2, i32* @G1  ;; Not referenced by semantics of memcpy but still may be read due to "deopt"
+
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
+
+  %C = load i32, i32* @G1
+  ret i32 %C
+}

From f1e36474b9e57b828f0e25c2932536cce7a558e5 Mon Sep 17 00:00:00 2001
From: Sebastian Neubauer <Sebastian.Neubauer@amd.com>
Date: Mon, 24 Jan 2022 13:44:54 +0100
Subject: [PATCH 370/946] [AMDGPU][NFC] Fix debug prints

Print the instructions instead of pointers.
---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 45c427c187849..f8a10bc8ef6f6 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -863,7 +863,7 @@ bool SIInsertWaitcnts::applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
       Wait.ExpCnt = ~0u;
 
       LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
-                        << "Old Instr: " << MI << "New Instr: " << *WaitcntInstr
+                        << "Old Instr: " << *MI << "New Instr: " << *WaitcntInstr
                         << '\n');
     } else {
       WaitcntInstr->eraseFromParent();
@@ -886,7 +886,7 @@ bool SIInsertWaitcnts::applyPreexistingWaitcnt(WaitcntBrackets &ScoreBrackets,
       Wait.VsCnt = ~0u;
 
       LLVM_DEBUG(dbgs() << "generateWaitcntInstBefore\n"
-                        << "Old Instr: " << MI
+                        << "Old Instr: " << *MI
                         << "New Instr: " << *WaitcntVsCntInstr << '\n');
     } else {
       WaitcntVsCntInstr->eraseFromParent();

From 0e70dd858eb76151b90fedec593794d54aaca8c1 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 24 Jan 2022 12:58:03 +0000
Subject: [PATCH 371/946] [X86] Add PR46249 test case showing poorly widened
 select predicate mask

---
 llvm/test/CodeGen/X86/vselect-avx512.ll | 111 ++++++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/vselect-avx512.ll

diff --git a/llvm/test/CodeGen/X86/vselect-avx512.ll b/llvm/test/CodeGen/X86/vselect-avx512.ll
new file mode 100644
index 0000000000000..5b4faa58ece8c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/vselect-avx512.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK
+
+; Try to ensure that vselect folds into any previous instruction.
+; Shuffle lowering can widen the select mask preventing it being used as a predicate mask move.
+
+define void @PR46249(i32* noalias nocapture noundef %0) {
+; CHECK-LABEL: PR46249:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vmovdqu64 (%rdi), %zmm0
+; CHECK-NEXT:    vpshufd {{.*#+}} zmm1 = zmm0[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-NEXT:    vpminsd %zmm0, %zmm1, %zmm2
+; CHECK-NEXT:    movw $-21846, %ax # imm = 0xAAAA
+; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    vpmaxsd %zmm0, %zmm1, %zmm2 {%k1}
+; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm2[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-NEXT:    vpminsd %zmm2, %zmm0, %zmm1
+; CHECK-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
+; CHECK-NEXT:    movb $-86, %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k2}
+; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm1[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-NEXT:    vpminsd %zmm1, %zmm0, %zmm2
+; CHECK-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm2[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-NEXT:    vpermq {{.*#+}} zmm0 = zmm0[2,3,0,1,6,7,4,5]
+; CHECK-NEXT:    vpminsd %zmm2, %zmm0, %zmm1
+; CHECK-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
+; CHECK-NEXT:    movb $-52, %al
+; CHECK-NEXT:    kmovw %eax, %k3
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k3}
+; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm1[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
+; CHECK-NEXT:    vpminsd %zmm1, %zmm0, %zmm2
+; CHECK-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm2 {%k2}
+; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm2[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-NEXT:    vpminsd %zmm2, %zmm0, %zmm1
+; CHECK-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm1[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
+; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7,0,1,2,3]
+; CHECK-NEXT:    vpminsd %zmm1, %zmm0, %zmm2
+; CHECK-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm1 = zmm2[2,3,0,1],zmm0[6,7,4,5]
+; CHECK-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[4,5,6,7]
+; CHECK-NEXT:    vpminsd %zmm0, %zmm1, %zmm2
+; CHECK-NEXT:    vpmaxsd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm2 {%k3}
+; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm2[2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13]
+; CHECK-NEXT:    vpminsd %zmm2, %zmm0, %zmm1
+; CHECK-NEXT:    vpmaxsd %zmm2, %zmm0, %zmm0
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k2}
+; CHECK-NEXT:    vpshufd {{.*#+}} zmm0 = zmm1[1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14]
+; CHECK-NEXT:    vpminsd %zmm1, %zmm0, %zmm2
+; CHECK-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vmovdqu64 %zmm2, (%rdi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %2 = bitcast i32* %0 to <16 x i32>*
+  %3 = load <16 x i32>, <16 x i32>* %2, align 1
+  %4 = shufflevector <16 x i32> %3, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+  %5 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %4, <16 x i32> %3) #2
+  %6 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %4, <16 x i32> %3) #2
+  %7 = shufflevector <16 x i32> %5, <16 x i32> %6, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+  %8 = shufflevector <16 x i32> %7, <16 x i32> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+  %9 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %8, <16 x i32> %7) #2
+  %10 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %8, <16 x i32> %7) #2
+  %11 = shufflevector <16 x i32> %9, <16 x i32> %10, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
+  %12 = shufflevector <16 x i32> %11, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+  %13 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %12, <16 x i32> %11) #2
+  %14 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %12, <16 x i32> %11) #2
+  %15 = shufflevector <16 x i32> %13, <16 x i32> %14, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+  %16 = shufflevector <16 x i32> %15, <16 x i32> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+  %17 = shufflevector <16 x i32> %16, <16 x i32> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11>
+  %18 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %17, <16 x i32> %15) #2
+  %19 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %17, <16 x i32> %15) #2
+  %20 = shufflevector <16 x i32> %18, <16 x i32> %19, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
+  %21 = shufflevector <16 x i32> %20, <16 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
+  %22 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %21, <16 x i32> %20) #2
+  %23 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %21, <16 x i32> %20) #2
+  %24 = shufflevector <16 x i32> %22, <16 x i32> %23, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
+  %25 = shufflevector <16 x i32> %24, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+  %26 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %25, <16 x i32> %24) #2
+  %27 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %25, <16 x i32> %24) #2
+  %28 = shufflevector <16 x i32> %26, <16 x i32> %27, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+  %29 = shufflevector <16 x i32> %28, <16 x i32> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
+  %30 = shufflevector <16 x i32> %29, <16 x i32> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  %31 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %30, <16 x i32> %28) #2
+  %32 = bitcast <16 x i32> %31 to <8 x i64>
+  %33 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %30, <16 x i32> %28) #2
+  %34 = bitcast <16 x i32> %33 to <8 x i64>
+  %35 = shufflevector <8 x i64> %32, <8 x i64> %34, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+  %36 = shufflevector <8 x i64> %32, <8 x i64> %34, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 12, i32 13>
+  %37 = bitcast <8 x i64> %36 to <16 x i32>
+  %38 = bitcast <8 x i64> %35 to <16 x i32>
+  %39 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %37, <16 x i32> %38) #2
+  %40 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %37, <16 x i32> %38) #2
+  %41 = shufflevector <16 x i32> %39, <16 x i32> %40, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31>
+  %42 = shufflevector <16 x i32> %41, <16 x i32> poison, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
+  %43 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %42, <16 x i32> %41) #2
+  %44 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %42, <16 x i32> %41) #2
+  %45 = shufflevector <16 x i32> %43, <16 x i32> %44, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
+  %46 = shufflevector <16 x i32> %45, <16 x i32> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
+  %47 = tail call <16 x i32> @llvm.smin.v16i32(<16 x i32> %46, <16 x i32> %45) #2
+  %48 = tail call <16 x i32> @llvm.smax.v16i32(<16 x i32> %46, <16 x i32> %45) #2
+  %49 = shufflevector <16 x i32> %47, <16 x i32> %48, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+  store <16 x i32> %49, <16 x i32>* %2, align 1
+  ret void
+}
+declare <16 x i32> @llvm.smin.v16i32(<16 x i32>, <16 x i32>)
+declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>)

From b2a8eff45c55fe7fe05cb94c5a8525b54511a33e Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Mon, 24 Jan 2022 13:04:09 +0000
Subject: [PATCH 372/946] [LV] Make some tests more robust by adding missing
 users.

---
 .../LoopVectorize/AArch64/induction-trunc.ll  |  8 +-
 .../LoopVectorize/AArch64/pr31900.ll          | 12 ++-
 .../LoopVectorize/single-value-blend-phis.ll  | 96 ++-----------------
 .../tail-folding-vectorization-factor-1.ll    | 96 +++++++++++++++----
 .../vector-intrinsic-call-cost.ll             | 26 +++--
 5 files changed, 119 insertions(+), 119 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll
index 2bee64107d7c9..5ed2e8752edb8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-trunc.ll
@@ -11,16 +11,22 @@ target triple = "aarch64--linux-gnu"
 ; CHECK-NEXT:    [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 5
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INDUCTION]] to i32
 ; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[INDUCTION1]] to i32
+; CHECK-NEXT:    [[GEP0:%.+]] = getelementptr inbounds i32, i32* %dst, i32 [[TMP4]]
+; CHECK-NEXT:    [[GEP1:%.+]] = getelementptr inbounds i32, i32* %dst, i32 [[TMP5]]
+; CHECK-NEXT:    store i32 0, i32* [[GEP0]], align 4
+; CHECK-NEXT:    store i32 0, i32* [[GEP1]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body
 ;
-define void @non_primary_iv_trunc_free(i64 %n) {
+define void @non_primary_iv_trunc_free(i64 %n, i32* %dst) {
 entry:
   br label %for.body
 
 for.body:
   %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
   %tmp0 = trunc i64 %i to i32
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i32 %tmp0
+  store i32 0, i32* %gep.dst
   %i.next = add nuw nsw i64 %i, 5
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll
index 5ea38a4a246dc..5a2730aa6ea3f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr31900.ll
@@ -11,21 +11,25 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128-p:16:16-p4:32:16"
 ; CHECK: load i16, i16*
 ; CHECK: load i16, i16 addrspace(4)*
 ; CHECK: load i16, i16 addrspace(4)*
+; CHECK: store <2 x i16>
 
 %rec1445 = type { i16, i16, i16, i16, i16 }
 
-define void @foo() {
+define void @foo(%rec1445* %a, %rec1445 addrspace(4)* %b, i16* noalias %dst) {
 bb1:
   br label %bb4
 
 bb4:
-  %tmp1 = phi i16 [ undef, %bb1 ], [ %_tmp1013, %bb4 ]
-  %tmp2 = phi %rec1445* [ undef, %bb1 ], [ %_tmp1015, %bb4 ]
-  %tmp3 = phi %rec1445 addrspace(4)* [ undef, %bb1 ], [ %_tmp1017, %bb4 ]
+  %tmp1 = phi i16 [ 0, %bb1 ], [ %_tmp1013, %bb4 ]
+  %tmp2 = phi %rec1445* [ %a, %bb1 ], [ %_tmp1015, %bb4 ]
+  %tmp3 = phi %rec1445 addrspace(4)* [ %b, %bb1 ], [ %_tmp1017, %bb4 ]
   %0 = getelementptr %rec1445, %rec1445* %tmp2, i16 0, i32 1
   %_tmp987 = load i16, i16* %0, align 1
   %1 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 0, i32 1
   %_tmp993 = load i16, i16 addrspace(4)* %1, align 1
+  %add = add i16 %_tmp987, %_tmp993
+  %dst.gep = getelementptr inbounds i16, i16* %dst, i16 %tmp1
+  store i16 %add, i16* %dst.gep
   %_tmp1013 = add i16 %tmp1, 1
   %_tmp1015 = getelementptr %rec1445, %rec1445* %tmp2, i16 1
   %_tmp1017 = getelementptr %rec1445, %rec1445 addrspace(4)* %tmp3, i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
index 094507d282763..4d30093195890 100644
--- a/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
+++ b/llvm/test/Transforms/LoopVectorize/single-value-blend-phis.ll
@@ -134,31 +134,6 @@ define void @single_incoming_phi_with_blend_mask(i64 %a, i64 %b) {
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 32, 32
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
-; CHECK:       loop.header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT:    [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16
-; CHECK-NEXT:    [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]]
-; CHECK-NEXT:    br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]]
-; CHECK:       loop.cond:
-; CHECK-NEXT:    [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ]
-; CHECK-NEXT:    [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[BLEND]]
-; CHECK-NEXT:    [[LV:%.*]] = load i16, i16* [[SRC_PTR]], align 1
-; CHECK-NEXT:    [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]]
-; CHECK-NEXT:    br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]]
-; CHECK:       loop.next:
-; CHECK-NEXT:    br label [[LOOP_LATCH]]
-; CHECK:       loop.latch:
-; CHECK-NEXT:    [[RES:%.*]] = phi i16 [ 0, [[LOOP_HEADER]] ], [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ]
-; CHECK-NEXT:    [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 [[IV]]
-; CHECK-NEXT:    store i16 [[RES]], i16* [[DST_PTR]], align 2
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT:    [[CMP439:%.*]] = icmp ult i64 [[IV]], 31
-; CHECK-NEXT:    br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP5:!llvm.loop !.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop.header
@@ -191,7 +166,7 @@ exit:
   ret void
 }
 
-define void @multiple_incoming_phi_with_blend_mask(i64 %a) {
+define void @multiple_incoming_phi_with_blend_mask(i64 %a, i16* noalias %dst) {
 ; CHECK-LABEL: @multiple_incoming_phi_with_blend_mask(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
@@ -215,6 +190,12 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a) {
 ; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[TMP6]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[TMP5]], align 1
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[TMP7]], align 1
+; CHECK-NEXT:    [[INS1:%.+]] = insertelement <2 x i16> poison, i16 [[TMP8]], i32 0
+; CHECK-NEXT:    [[INS2:%.+]] = insertelement <2 x i16> [[INS1]], i16 [[TMP9]], i32 1
+; CHECK-NEXT:    [[DST0:%.+]] = getelementptr inbounds i16, i16* %dst, i64 [[TMP0]]
+; CHECK-NEXT:    [[DST1:%.+]] = getelementptr inbounds i16, i16* [[DST0]], i32 0
+; CHECK-NEXT:    [[DST1_BC:%.+]] = bitcast i16* [[DST1]] to <2 x i16>*
+; CHECK-NEXT:    store <2 x i16> [[INS2]], <2 x i16>* [[DST1_BC]], align 2
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
 ; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], <i16 2, i16 2>
@@ -224,26 +205,6 @@ define void @multiple_incoming_phi_with_blend_mask(i64 %a) {
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 32, 32
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 32, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
-; CHECK:       loop.header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT:    [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16
-; CHECK-NEXT:    [[IV_TRUNC_2:%.*]] = trunc i64 [[IV]] to i16
-; CHECK-NEXT:    [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]]
-; CHECK-NEXT:    br i1 [[CMP_A]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]]
-; CHECK:       loop.next:
-; CHECK-NEXT:    br label [[LOOP_LATCH]]
-; CHECK:       loop.latch:
-; CHECK-NEXT:    [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ], [ [[IV_TRUNC_2]], [[LOOP_NEXT]] ]
-; CHECK-NEXT:    [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[BLEND]]
-; CHECK-NEXT:    [[LV:%.*]] = load i16, i16* [[SRC_PTR]], align 1
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT:    [[CMP439:%.*]] = icmp ult i64 [[IV]], 31
-; CHECK-NEXT:    br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP7:!llvm.loop !.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop.header
@@ -262,6 +223,8 @@ loop.latch:
   %blend = phi i16 [ %iv.trunc, %loop.header ], [ %iv.trunc.2, %loop.next ]
   %src.ptr = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 %blend
   %lv = load i16, i16* %src.ptr, align 1
+  %dst.ptr = getelementptr inbounds i16, i16* %dst, i64 %iv
+  store i16 %lv, i16* %dst.ptr
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp439 = icmp ult i64 %iv, 31
   br i1 %cmp439, label %loop.header, label %exit
@@ -326,31 +289,6 @@ define void @single_incoming_needs_predication(i64 %a, i64 %b) {
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 64, 64
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 64, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
-; CHECK:       loop.header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT:    [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i16
-; CHECK-NEXT:    [[CMP_A:%.*]] = icmp ugt i64 [[IV]], [[A]]
-; CHECK-NEXT:    br i1 [[CMP_A]], label [[LOOP_COND:%.*]], label [[LOOP_LATCH]]
-; CHECK:       loop.cond:
-; CHECK-NEXT:    [[BLEND:%.*]] = phi i16 [ [[IV_TRUNC]], [[LOOP_HEADER]] ]
-; CHECK-NEXT:    [[SRC_PTR:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @src, i16 0, i16 [[BLEND]]
-; CHECK-NEXT:    [[LV:%.*]] = load i16, i16* [[SRC_PTR]], align 1
-; CHECK-NEXT:    [[CMP_B:%.*]] = icmp sgt i64 [[IV]], [[A]]
-; CHECK-NEXT:    br i1 [[CMP_B]], label [[LOOP_NEXT:%.*]], label [[LOOP_LATCH]]
-; CHECK:       loop.next:
-; CHECK-NEXT:    br label [[LOOP_LATCH]]
-; CHECK:       loop.latch:
-; CHECK-NEXT:    [[RES:%.*]] = phi i16 [ 0, [[LOOP_HEADER]] ], [ [[LV]], [[LOOP_COND]] ], [ 1, [[LOOP_NEXT]] ]
-; CHECK-NEXT:    [[DST_PTR:%.*]] = getelementptr inbounds [32 x i16], [32 x i16]* @dst, i16 0, i64 [[IV]]
-; CHECK-NEXT:    store i16 [[RES]], i16* [[DST_PTR]], align 2
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT:    [[CMP439:%.*]] = icmp ult i64 [[IV]], 63
-; CHECK-NEXT:    br i1 [[CMP439]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP9:!llvm.loop !.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop.header
@@ -408,22 +346,6 @@ define void @duplicated_incoming_blocks_blend(i32 %x, i32* %ptr) {
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 1000, 1000
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 1000, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
-; CHECK:       loop.header:
-; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_I:%.*]], [[LOOP_LATCH:%.*]] ]
-; CHECK-NEXT:    [[C_0:%.*]] = icmp ugt i32 [[IV]], [[X]]
-; CHECK-NEXT:    br i1 [[C_0]], label [[LOOP_LATCH]], label [[LOOP_LATCH]]
-; CHECK:       loop.latch:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[IV]], [[LOOP_HEADER]] ], [ [[IV]], [[LOOP_HEADER]] ]
-; CHECK-NEXT:    [[GEP_PTR:%.*]] = getelementptr i32, i32* [[PTR]], i32 [[P]]
-; CHECK-NEXT:    store i32 [[P]], i32* [[GEP_PTR]], align 4
-; CHECK-NEXT:    [[ADD_I]] = add nsw i32 [[P]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[ADD_I]], 1000
-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP_HEADER]], label [[EXIT]], [[LOOP11:!llvm.loop !.*]]
-; CHECK:       exit:
-; CHECK-NEXT:    ret void
 ;
 entry:
   br label %loop.header
diff --git a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
index bb901ed70a827..8406a0d666716 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll
@@ -9,21 +9,49 @@
 ; CHECK-REMARKS-NEXT: remark: {{.*}} interleaved loop (interleaved count: 4)
 ; CHECK-REMARKS-NOT:  remark: {{.*}} vectorized loop
 
-define void @VF1-VPlanExe() {
+define void @VF1-VPlanExe(i32* %dst) {
 ; CHECK-LABEL: @VF1-VPlanExe(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE9:%.*]] ]
 ; CHECK-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
 ; CHECK-NEXT:    [[INDUCTION2:%.*]] = add i64 [[INDEX]], 2
 ; CHECK-NEXT:    [[INDUCTION3:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule i64 [[INDUCTION]], 14
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i64 [[INDUCTION1]], 14
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule i64 [[INDUCTION2]], 14
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule i64 [[INDUCTION3]], 14
+; CHECK-NEXT:    br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK:       pred.store.if:
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[DST:%.*]], i64 [[INDUCTION]]
+; CHECK-NEXT:    store i32 0, i32* [[TMP4]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK:       pred.store.continue:
+; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
+; CHECK:       pred.store.if4:
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION1]]
+; CHECK-NEXT:    store i32 0, i32* [[TMP5]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE5]]
+; CHECK:       pred.store.continue5:
+; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
+; CHECK:       pred.store.if6:
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION2]]
+; CHECK-NEXT:    store i32 0, i32* [[TMP6]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE7]]
+; CHECK:       pred.store.continue7:
+; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]]
+; CHECK:       pred.store.if8:
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDUCTION3]]
+; CHECK-NEXT:    store i32 0, i32* [[TMP7]], align 4
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE9]]
+; CHECK:       pred.store.continue9:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT:    br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -33,9 +61,11 @@ define void @VF1-VPlanExe() {
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[DST_PTR:%.*]] = getelementptr inbounds i32, i32* [[DST]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i32 0, i32* [[DST_PTR]], align 4
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 15
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop !2
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
 ;
 entry:
   br label %for.body
@@ -45,6 +75,8 @@ for.cond.cleanup:
 
 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %dst.ptr = getelementptr inbounds i32, i32* %dst, i64 %indvars.iv
+  store i32 0, i32* %dst.ptr
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 15
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -59,18 +91,46 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(double* %ptr1) {
 ; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr double, double* [[PTR1]], i64 16
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP0]]
-; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT:    [[NEXT_GEP1:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
-; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP2]]
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
-; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP3]]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE12:%.*]] ]
+; CHECK-NEXT:    [[VEC_IV:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[VEC_IV4:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[VEC_IV5:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[VEC_IV6:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ule i64 [[VEC_IV]], 14
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule i64 [[VEC_IV4]], 14
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule i64 [[VEC_IV5]], 14
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ule i64 [[VEC_IV6]], 14
+; CHECK-NEXT:    br i1 [[TMP0]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
+; CHECK:       pred.store.if:
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP4]]
+; CHECK-NEXT:    store double 0.000000e+00, double* [[NEXT_GEP]], align 8
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
+; CHECK:       pred.store.continue:
+; CHECK-NEXT:    br i1 [[TMP1]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
+; CHECK:       pred.store.if7:
+; CHECK-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP1:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP5]]
+; CHECK-NEXT:    store double 0.000000e+00, double* [[NEXT_GEP1]], align 8
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE8]]
+; CHECK:       pred.store.continue8:
+; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
+; CHECK:       pred.store.if9:
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP6]]
+; CHECK-NEXT:    store double 0.000000e+00, double* [[NEXT_GEP2]], align 8
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE10]]
+; CHECK:       pred.store.continue10:
+; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12]]
+; CHECK:       pred.store.if11:
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 3
+; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr double, double* [[PTR1]], i64 [[TMP7]]
+; CHECK-NEXT:    store double 0.000000e+00, double* [[NEXT_GEP3]], align 8
+; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE12]]
+; CHECK:       pred.store.continue12:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !3
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
@@ -80,9 +140,10 @@ define void @VF1-VPWidenCanonicalIVRecipeExe(double* %ptr1) {
 ; CHECK-NEXT:    ret void
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[ADDR:%.*]] = phi double* [ [[PTR:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    store double 0.000000e+00, double* [[ADDR]], align 8
 ; CHECK-NEXT:    [[PTR]] = getelementptr inbounds double, double* [[ADDR]], i64 1
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq double* [[PTR]], [[PTR2]]
-; CHECK-NEXT:    br i1 [[COND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop !4
+; CHECK-NEXT:    br i1 [[COND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
 ;
 entry:
   %ptr2 = getelementptr inbounds double, double* %ptr1, i64 15
@@ -93,6 +154,7 @@ for.cond.cleanup:
 
 for.body:
   %addr = phi double* [ %ptr, %for.body ], [ %ptr1, %entry ]
+  store double 0.0, double* %addr
   %ptr = getelementptr inbounds double, double* %addr, i64 1
   %cond = icmp eq double* %ptr, %ptr2
   br i1 %cond, label %for.cond.cleanup, label %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll b/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll
index 5ab99e4dd305f..013fd5827da1f 100644
--- a/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/vector-intrinsic-call-cost.ll
@@ -2,14 +2,18 @@
 
 ; CHECK-LABEL: @test_fshl
 ; CHECK-LABEL: vector.body:
-; CHECK-NEXT:    %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK-NEXT:    %0 = add i32 %index, 0
-; CHECK-NEXT:    %1 = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> <i16 15, i16 15, i16 15, i16 15>)
-; CHECK-NEXT:    %index.next = add nuw i32 %index, 4
-; CHECK-NEXT:    %2 = icmp eq i32 %index.next, %n.vec
-; CHECK-NEXT:     br i1 %2, label %middle.block, label %vector.body, !llvm.loop !0
+; CHECK-NEXT:    [[IDX:%.+]] = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK-NEXT:    [[IDX0:%.+]] = add i32 %index, 0
+; CHECK-NEXT:    [[FSHL:%.+]] = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> undef, <4 x i16> undef, <4 x i16> <i16 15, i16 15, i16 15, i16 15>)
+; CHECK-NEXT:    [[GEP0:%.+]] = getelementptr inbounds i16, i16* %dst, i32 [[IDX0]]
+; CHECK-NEXT:    [[GEP1:%.+]] = getelementptr inbounds i16, i16* [[GEP0]], i32 0
+; CHECK-NEXT:    [[GEP_BC:%.+]] = bitcast i16* [[GEP1]] to <4 x i16>*
+; CHECK-NEXT:    store <4 x i16> [[FSHL]], <4 x i16>* [[GEP_BC]], align 2
+; CHECK-NEXT:    [[IDX_NEXT:%.+]] = add nuw i32 [[IDX]], 4
+; CHECK-NEXT:    [[EC:%.+]] = icmp eq i32 [[IDX_NEXT]], %n.vec
+; CHECK-NEXT:    br i1 [[EC]], label %middle.block, label %vector.body
 ;
-define void @test_fshl(i32 %width) {
+define void @test_fshl(i32 %width, i16* %dst) {
 entry:
   br label %for.body9.us.us
 
@@ -17,10 +21,12 @@ for.cond6.for.cond.cleanup8_crit_edge.us.us:      ; preds = %for.body9.us.us
   ret void
 
 for.body9.us.us:                                  ; preds = %for.body9.us.us, %entry
-  %x.020.us.us = phi i32 [ 0, %entry ], [ %inc.us.us, %for.body9.us.us ]
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body9.us.us ]
   %conv4.i.us.us = tail call i16 @llvm.fshl.i16(i16 undef, i16 undef, i16 15)
-  %inc.us.us = add nuw i32 %x.020.us.us, 1
-  %exitcond50 = icmp eq i32 %inc.us.us, %width
+  %dst.gep = getelementptr inbounds i16, i16* %dst, i32 %iv
+  store i16 %conv4.i.us.us, i16* %dst.gep
+  %iv.next = add nuw i32 %iv, 1
+  %exitcond50 = icmp eq i32 %iv.next, %width
   br i1 %exitcond50, label %for.cond6.for.cond.cleanup8_crit_edge.us.us, label %for.body9.us.us
 }
 

From 5f290c090a2404238a5b0ae4233f3ae9daec319e Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Mon, 24 Jan 2022 05:57:09 -0500
Subject: [PATCH 373/946] Move STLFunctionalExtras out of STLExtras

Only using that change in StringRef already decreases the number of
preoprocessed lines from 7837621 to 7776151 for LLVMSupport

Perhaps more interestingly, it shows that many files were relying on the
inclusion of StringRef.h to have the declaration from STLExtras.h. This
patch tries hard to patch relevant part of llvm-project impacted by this
hidden dependency removal.

Potential impact:
- "llvm/ADT/StringRef.h" no longer includes <memory>,
  "llvm/ADT/Optional.h" nor "llvm/ADT/STLExtras.h"

Related Discourse thread:
https://llvm.discourse.group/t/include-what-you-use-include-cleanup/5831
---
 clang/include/clang/Basic/DirectoryEntry.h    |  1 +
 clang/tools/diagtool/DiagTool.cpp             |  1 +
 lld/ELF/Relocations.h                         |  1 +
 lldb/include/lldb/Utility/UserIDResolver.h    |  1 +
 llvm/include/llvm/ADT/CombinationGenerator.h  |  2 +-
 llvm/include/llvm/ADT/STLExtras.h             | 61 +--------------
 llvm/include/llvm/ADT/STLFunctionalExtras.h   | 76 +++++++++++++++++++
 llvm/include/llvm/ADT/SmallSet.h              |  1 +
 llvm/include/llvm/ADT/SparseMultiSet.h        |  2 +-
 llvm/include/llvm/ADT/SparseSet.h             |  2 +-
 llvm/include/llvm/ADT/StringMap.h             |  1 +
 llvm/include/llvm/ADT/StringMapEntry.h        |  2 +
 llvm/include/llvm/ADT/StringRef.h             | 11 +--
 llvm/include/llvm/ADT/StringSwitch.h          |  1 +
 llvm/include/llvm/ADT/identity.h              | 34 +++++++++
 .../ExecutionEngine/JITLink/MemoryFlags.h     |  1 +
 llvm/include/llvm/IR/LLVMContext.h            |  1 +
 llvm/include/llvm/MC/SubtargetFeature.h       |  1 +
 .../llvm/Support/CrashRecoveryContext.h       |  2 +-
 llvm/include/llvm/Support/JSON.h              |  1 +
 llvm/include/llvm/Support/TimeProfiler.h      |  1 +
 llvm/include/llvm/Support/Timer.h             |  1 +
 llvm/include/llvm/Support/VirtualFileSystem.h |  1 +
 llvm/include/llvm/Support/raw_ostream.h       |  1 +
 .../llvm/Testing/Support/Annotations.h        |  2 +
 llvm/lib/CodeGen/NonRelocatableStringpool.cpp |  1 +
 llvm/lib/Support/CommandLine.cpp              |  2 +-
 llvm/lib/Support/StringRef.cpp                |  8 ++
 llvm/lib/Support/TimeProfiler.cpp             |  2 +-
 llvm/lib/Target/AMDGPU/AMDGPULibFunc.h        |  1 +
 llvm/unittests/ADT/SequenceTest.cpp           |  1 +
 llvm/unittests/ADT/SimpleIListTest.cpp        |  1 +
 llvm/unittests/ADT/SmallPtrSetTest.cpp        |  1 +
 llvm/unittests/ADT/StringMapTest.cpp          |  1 +
 llvm/unittests/ADT/StringSetTest.cpp          |  1 +
 .../Support/ReverseIterationTest.cpp          |  1 +
 .../mlir/Conversion/GPUCommon/GPUCommonPass.h |  1 +
 mlir/lib/Parser/Token.h                       |  1 +
 38 files changed, 158 insertions(+), 73 deletions(-)
 create mode 100644 llvm/include/llvm/ADT/STLFunctionalExtras.h
 create mode 100644 llvm/include/llvm/ADT/identity.h

diff --git a/clang/include/clang/Basic/DirectoryEntry.h b/clang/include/clang/Basic/DirectoryEntry.h
index edb8031a20b80..ac8e790230fcc 100644
--- a/clang/include/clang/Basic/DirectoryEntry.h
+++ b/clang/include/clang/Basic/DirectoryEntry.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorOr.h"
 
 namespace clang {
diff --git a/clang/tools/diagtool/DiagTool.cpp b/clang/tools/diagtool/DiagTool.cpp
index 81d4e7e44cccc..99abe5755f713 100644
--- a/clang/tools/diagtool/DiagTool.cpp
+++ b/clang/tools/diagtool/DiagTool.cpp
@@ -12,6 +12,7 @@
 
 #include "DiagTool.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include <vector>
 
 using namespace diagtool;
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
index f9909f236d12e..73f6970b80f13 100644
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -11,6 +11,7 @@
 
 #include "lld/Common/LLVM.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include <map>
 #include <vector>
 
diff --git a/lldb/include/lldb/Utility/UserIDResolver.h b/lldb/include/lldb/Utility/UserIDResolver.h
index e0f7b69cc075f..15afafd65a1aa 100644
--- a/lldb/include/lldb/Utility/UserIDResolver.h
+++ b/lldb/include/lldb/Utility/UserIDResolver.h
@@ -10,6 +10,7 @@
 #define LLDB_UTILITY_USERIDRESOLVER_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include <mutex>
 
diff --git a/llvm/include/llvm/ADT/CombinationGenerator.h b/llvm/include/llvm/ADT/CombinationGenerator.h
index ab6afd555726d..a0bec68eaad60 100644
--- a/llvm/include/llvm/ADT/CombinationGenerator.h
+++ b/llvm/include/llvm/ADT/CombinationGenerator.h
@@ -28,7 +28,7 @@
 #define LLVM_ADT_COMBINATIONGENERATOR_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include <cassert>
 #include <cstring>
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index ed5321821a3a2..c3200c9265182 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -16,8 +16,10 @@
 #ifndef LLVM_ADT_STLEXTRAS_H
 #define LLVM_ADT_STLEXTRAS_H
 
+#include "llvm/ADT/identity.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLForwardCompat.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Config/abi-breaking.h"
@@ -199,65 +201,6 @@ struct FirstIndexOfType<T, T, Us...> : std::integral_constant<size_t, 0> {};
 template <size_t I, typename... Ts>
 using TypeAtIndex = std::tuple_element_t<I, std::tuple<Ts...>>;
 
-//===----------------------------------------------------------------------===//
-//     Extra additions to <functional>
-//===----------------------------------------------------------------------===//
-
-template <class Ty> struct identity {
-  using argument_type = Ty;
-
-  Ty &operator()(Ty &self) const {
-    return self;
-  }
-  const Ty &operator()(const Ty &self) const {
-    return self;
-  }
-};
-
-/// An efficient, type-erasing, non-owning reference to a callable. This is
-/// intended for use as the type of a function parameter that is not used
-/// after the function in question returns.
-///
-/// This class does not own the callable, so it is not in general safe to store
-/// a function_ref.
-template<typename Fn> class function_ref;
-
-template<typename Ret, typename ...Params>
-class function_ref<Ret(Params...)> {
-  Ret (*callback)(intptr_t callable, Params ...params) = nullptr;
-  intptr_t callable;
-
-  template<typename Callable>
-  static Ret callback_fn(intptr_t callable, Params ...params) {
-    return (*reinterpret_cast<Callable*>(callable))(
-        std::forward<Params>(params)...);
-  }
-
-public:
-  function_ref() = default;
-  function_ref(std::nullptr_t) {}
-
-  template <typename Callable>
-  function_ref(
-      Callable &&callable,
-      // This is not the copy-constructor.
-      std::enable_if_t<!std::is_same<remove_cvref_t<Callable>,
-                                     function_ref>::value> * = nullptr,
-      // Functor must be callable and return a suitable type.
-      std::enable_if_t<std::is_void<Ret>::value ||
-                       std::is_convertible<decltype(std::declval<Callable>()(
-                                               std::declval<Params>()...)),
-                                           Ret>::value> * = nullptr)
-      : callback(callback_fn<typename std::remove_reference<Callable>::type>),
-        callable(reinterpret_cast<intptr_t>(&callable)) {}
-
-  Ret operator()(Params ...params) const {
-    return callback(callable, std::forward<Params>(params)...);
-  }
-
-  explicit operator bool() const { return callback; }
-};
-
 //===----------------------------------------------------------------------===//
 //     Extra additions to <iterator>
 //===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/ADT/STLFunctionalExtras.h b/llvm/include/llvm/ADT/STLFunctionalExtras.h
new file mode 100644
index 0000000000000..ebe1b1521a5d7
--- /dev/null
+++ b/llvm/include/llvm/ADT/STLFunctionalExtras.h
@@ -0,0 +1,76 @@
+//===- llvm/ADT/STLFunctionalExtras.h - Extras for <functional> -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains some extension to <functional>.
+//
+// No library is required when using these functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_STLFUNCTIONALEXTRAS_H
+#define LLVM_ADT_STLFUNCTIONALEXTRAS_H
+
+#include "llvm/ADT/STLForwardCompat.h"
+
+#include <type_traits>
+#include <utility>
+#include <cstdint>
+
+namespace llvm {
+
+//===----------------------------------------------------------------------===//
+//     Extra additions to <functional>
+//===----------------------------------------------------------------------===//
+
+/// An efficient, type-erasing, non-owning reference to a callable. This is
+/// intended for use as the type of a function parameter that is not used
+/// after the function in question returns.
+///
+/// This class does not own the callable, so it is not in general safe to store
+/// a function_ref.
+template<typename Fn> class function_ref;
+
+template<typename Ret, typename ...Params>
+class function_ref<Ret(Params...)> {
+  Ret (*callback)(intptr_t callable, Params ...params) = nullptr;
+  intptr_t callable;
+
+  template<typename Callable>
+  static Ret callback_fn(intptr_t callable, Params ...params) {
+    return (*reinterpret_cast<Callable*>(callable))(
+        std::forward<Params>(params)...);
+  }
+
+public:
+  function_ref() = default;
+  function_ref(std::nullptr_t) {}
+
+  template <typename Callable>
+  function_ref(
+      Callable &&callable,
+      // This is not the copy-constructor.
+      std::enable_if_t<!std::is_same<remove_cvref_t<Callable>,
+                                     function_ref>::value> * = nullptr,
+      // Functor must be callable and return a suitable type.
+      std::enable_if_t<std::is_void<Ret>::value ||
+                       std::is_convertible<decltype(std::declval<Callable>()(
+                                               std::declval<Params>()...)),
+                                           Ret>::value> * = nullptr)
+      : callback(callback_fn<typename std::remove_reference<Callable>::type>),
+        callable(reinterpret_cast<intptr_t>(&callable)) {}
+
+  Ret operator()(Params ...params) const {
+    return callback(callable, std::forward<Params>(params)...);
+  }
+
+  explicit operator bool() const { return callback; }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_STLFUNCTIONALEXTRAS_H
diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h
index 0600e528ee692..fe4f74eac85d8 100644
--- a/llvm/include/llvm/ADT/SmallSet.h
+++ b/llvm/include/llvm/ADT/SmallSet.h
@@ -16,6 +16,7 @@
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/type_traits.h"
diff --git a/llvm/include/llvm/ADT/SparseMultiSet.h b/llvm/include/llvm/ADT/SparseMultiSet.h
index fec0a70a0bef8..f63cef9364336 100644
--- a/llvm/include/llvm/ADT/SparseMultiSet.h
+++ b/llvm/include/llvm/ADT/SparseMultiSet.h
@@ -20,7 +20,7 @@
 #ifndef LLVM_ADT_SPARSEMULTISET_H
 #define LLVM_ADT_SPARSEMULTISET_H
 
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/identity.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseSet.h"
 #include <cassert>
diff --git a/llvm/include/llvm/ADT/SparseSet.h b/llvm/include/llvm/ADT/SparseSet.h
index d8acf1ee2f3a2..e66d76ad88e1f 100644
--- a/llvm/include/llvm/ADT/SparseSet.h
+++ b/llvm/include/llvm/ADT/SparseSet.h
@@ -19,7 +19,7 @@
 #ifndef LLVM_ADT_SPARSESET_H
 #define LLVM_ADT_SPARSESET_H
 
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/identity.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/AllocatorBase.h"
 #include <cassert>
diff --git a/llvm/include/llvm/ADT/StringMap.h b/llvm/include/llvm/ADT/StringMap.h
index 669956d41e0c5..562a2ff1a192c 100644
--- a/llvm/include/llvm/ADT/StringMap.h
+++ b/llvm/include/llvm/ADT/StringMap.h
@@ -14,6 +14,7 @@
 #define LLVM_ADT_STRINGMAP_H
 
 #include "llvm/ADT/StringMapEntry.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/Support/AllocatorBase.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 #include <initializer_list>
diff --git a/llvm/include/llvm/ADT/StringMapEntry.h b/llvm/include/llvm/ADT/StringMapEntry.h
index 8bfad5540230d..120d4f3ca4bc3 100644
--- a/llvm/include/llvm/ADT/StringMapEntry.h
+++ b/llvm/include/llvm/ADT/StringMapEntry.h
@@ -15,7 +15,9 @@
 #ifndef LLVM_ADT_STRINGMAPENTRY_H
 #define LLVM_ADT_STRINGMAPENTRY_H
 
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 9f64250c58a36..118def2f43e12 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -9,7 +9,8 @@
 #ifndef LLVM_ADT_STRINGREF_H
 #define LLVM_ADT_STRINGREF_H
 
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Compiler.h"
 #include <algorithm>
@@ -978,13 +979,7 @@ namespace llvm {
           reinterpret_cast<const char *>(~static_cast<uintptr_t>(1)), 0);
     }
 
-    static unsigned getHashValue(StringRef Val) {
-      assert(Val.data() != getEmptyKey().data() &&
-             "Cannot hash the empty key!");
-      assert(Val.data() != getTombstoneKey().data() &&
-             "Cannot hash the tombstone key!");
-      return (unsigned)(hash_value(Val));
-    }
+    static unsigned getHashValue(StringRef Val);
 
     static bool isEqual(StringRef LHS, StringRef RHS) {
       if (RHS.data() == getEmptyKey().data())
diff --git a/llvm/include/llvm/ADT/StringSwitch.h b/llvm/include/llvm/ADT/StringSwitch.h
index 726d67c8f24ab..4b7882d7ca100 100644
--- a/llvm/include/llvm/ADT/StringSwitch.h
+++ b/llvm/include/llvm/ADT/StringSwitch.h
@@ -12,6 +12,7 @@
 #ifndef LLVM_ADT_STRINGSWITCH_H
 #define LLVM_ADT_STRINGSWITCH_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Compiler.h"
 #include <cassert>
diff --git a/llvm/include/llvm/ADT/identity.h b/llvm/include/llvm/ADT/identity.h
new file mode 100644
index 0000000000000..f07bb6fb39f62
--- /dev/null
+++ b/llvm/include/llvm/ADT/identity.h
@@ -0,0 +1,34 @@
+//===- llvm/ADT/Identity.h - Provide std::identity from C++20 ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides an implementation of std::identity from C++20.
+//
+// No library is required when using these functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_IDENTITY_H
+#define LLVM_ADT_IDENTITY_H
+
+
+namespace llvm {
+
+template <class Ty> struct identity {
+  using argument_type = Ty;
+
+  Ty &operator()(Ty &self) const {
+    return self;
+  }
+  const Ty &operator()(const Ty &self) const {
+    return self;
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_IDENTITY_H
diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h
index 8fdce93ebc564..e9771319ef06a 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/MemoryFlags.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Memory.h"
 #include "llvm/Support/raw_ostream.h"
 
diff --git a/llvm/include/llvm/IR/LLVMContext.h b/llvm/include/llvm/IR/LLVMContext.h
index 12d3a45171ce8..d165a405ce224 100644
--- a/llvm/include/llvm/IR/LLVMContext.h
+++ b/llvm/include/llvm/IR/LLVMContext.h
@@ -15,6 +15,7 @@
 #define LLVM_IR_LLVMCONTEXT_H
 
 #include "llvm-c/Types.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/IR/DiagnosticHandler.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include <cstdint>
diff --git a/llvm/include/llvm/MC/SubtargetFeature.h b/llvm/include/llvm/MC/SubtargetFeature.h
index cc36b25a4965d..032e2a7df1f2d 100644
--- a/llvm/include/llvm/MC/SubtargetFeature.h
+++ b/llvm/include/llvm/MC/SubtargetFeature.h
@@ -18,6 +18,7 @@
 #define LLVM_MC_SUBTARGETFEATURE_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/MathExtras.h"
 #include <array>
 #include <bitset>
diff --git a/llvm/include/llvm/Support/CrashRecoveryContext.h b/llvm/include/llvm/Support/CrashRecoveryContext.h
index 2604ccb384316..f60e7335e197d 100644
--- a/llvm/include/llvm/Support/CrashRecoveryContext.h
+++ b/llvm/include/llvm/Support/CrashRecoveryContext.h
@@ -9,7 +9,7 @@
 #ifndef LLVM_SUPPORT_CRASHRECOVERYCONTEXT_H
 #define LLVM_SUPPORT_CRASHRECOVERYCONTEXT_H
 
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 
 namespace llvm {
 class CrashRecoveryContextCleanup;
diff --git a/llvm/include/llvm/Support/JSON.h b/llvm/include/llvm/Support/JSON.h
index 469f50be40e03..719e8b60d0faf 100644
--- a/llvm/include/llvm/Support/JSON.h
+++ b/llvm/include/llvm/Support/JSON.h
@@ -49,6 +49,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/llvm/include/llvm/Support/TimeProfiler.h b/llvm/include/llvm/Support/TimeProfiler.h
index 6141acc99db28..378253dc2ab9a 100644
--- a/llvm/include/llvm/Support/TimeProfiler.h
+++ b/llvm/include/llvm/Support/TimeProfiler.h
@@ -10,6 +10,7 @@
 #define LLVM_SUPPORT_TIMEPROFILER_H
 
 #include "llvm/Support/Error.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Support/Timer.h b/llvm/include/llvm/Support/Timer.h
index 5a55491b3276a..eb49e805b40db 100644
--- a/llvm/include/llvm/Support/Timer.h
+++ b/llvm/include/llvm/Support/Timer.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
+#include <memory>
 #include <string>
 #include <vector>
 
diff --git a/llvm/include/llvm/Support/VirtualFileSystem.h b/llvm/include/llvm/Support/VirtualFileSystem.h
index 305096dff67eb..f5dde334b0a72 100644
--- a/llvm/include/llvm/Support/VirtualFileSystem.h
+++ b/llvm/include/llvm/Support/VirtualFileSystem.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/Support/Chrono.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
diff --git a/llvm/include/llvm/Support/raw_ostream.h b/llvm/include/llvm/Support/raw_ostream.h
index e288ac27e804d..58adb41cb0ef6 100644
--- a/llvm/include/llvm/Support/raw_ostream.h
+++ b/llvm/include/llvm/Support/raw_ostream.h
@@ -15,6 +15,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
 #include <cstddef>
diff --git a/llvm/include/llvm/Testing/Support/Annotations.h b/llvm/include/llvm/Testing/Support/Annotations.h
index cc99d10615202..4e442269600d0 100644
--- a/llvm/include/llvm/Testing/Support/Annotations.h
+++ b/llvm/include/llvm/Testing/Support/Annotations.h
@@ -16,6 +16,8 @@
 
 namespace llvm {
 
+class raw_ostream;
+
 /// Annotations lets you mark points and ranges inside source code, for tests:
 ///
 ///    Annotations Example(R"cpp(
diff --git a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
index 9ed3471c0fc96..db5217469fbae 100644
--- a/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
+++ b/llvm/lib/CodeGen/NonRelocatableStringpool.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/NonRelocatableStringpool.h"
+#include "llvm/ADT/STLExtras.h"
 
 namespace llvm {
 
diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index 481ba56c4077b..ed4f01f176c2f 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -22,7 +22,7 @@
 #include "llvm-c/Support.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
diff --git a/llvm/lib/Support/StringRef.cpp b/llvm/lib/Support/StringRef.cpp
index 652303fdb6a05..3ed08ed386617 100644
--- a/llvm/lib/Support/StringRef.cpp
+++ b/llvm/lib/Support/StringRef.cpp
@@ -597,3 +597,11 @@ bool StringRef::getAsDouble(double &Result, bool AllowInexact) const {
 hash_code llvm::hash_value(StringRef S) {
   return hash_combine_range(S.begin(), S.end());
 }
+
+unsigned DenseMapInfo<StringRef, void>::getHashValue(StringRef Val) {
+  assert(Val.data() != getEmptyKey().data() &&
+         "Cannot hash the empty key!");
+  assert(Val.data() != getTombstoneKey().data() &&
+         "Cannot hash the tombstone key!");
+  return (unsigned)(hash_value(Val));
+}
diff --git a/llvm/lib/Support/TimeProfiler.cpp b/llvm/lib/Support/TimeProfiler.cpp
index a727bfa51731d..9380fa01c84a4 100644
--- a/llvm/lib/Support/TimeProfiler.cpp
+++ b/llvm/lib/Support/TimeProfiler.cpp
@@ -11,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/TimeProfiler.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/ManagedStatic.h"
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
index c97223b047e88..dc0ac72016f37 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.h
@@ -10,6 +10,7 @@
 #define _AMDGPU_LIBFUNC_H_
 
 #include "llvm/ADT/StringRef.h"
+#include <memory>
 
 namespace llvm {
 
diff --git a/llvm/unittests/ADT/SequenceTest.cpp b/llvm/unittests/ADT/SequenceTest.cpp
index 71fae0b56ffda..aec1ea457aeb3 100644
--- a/llvm/unittests/ADT/SequenceTest.cpp
+++ b/llvm/unittests/ADT/SequenceTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/Sequence.h"
+#include "llvm/ADT/STLExtras.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
diff --git a/llvm/unittests/ADT/SimpleIListTest.cpp b/llvm/unittests/ADT/SimpleIListTest.cpp
index 238896eaefbb4..17dd140c0a4da 100644
--- a/llvm/unittests/ADT/SimpleIListTest.cpp
+++ b/llvm/unittests/ADT/SimpleIListTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/simple_ilist.h"
+#include "llvm/ADT/STLExtras.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/llvm/unittests/ADT/SmallPtrSetTest.cpp b/llvm/unittests/ADT/SmallPtrSetTest.cpp
index 6f3c94eed2737..414298c4e67df 100644
--- a/llvm/unittests/ADT/SmallPtrSetTest.cpp
+++ b/llvm/unittests/ADT/SmallPtrSetTest.cpp
@@ -12,6 +12,7 @@
 
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 #include "gtest/gtest.h"
 
diff --git a/llvm/unittests/ADT/StringMapTest.cpp b/llvm/unittests/ADT/StringMapTest.cpp
index f38a60452e3c7..817fec6c37a2a 100644
--- a/llvm/unittests/ADT/StringMapTest.cpp
+++ b/llvm/unittests/ADT/StringMapTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/DataTypes.h"
 #include "gtest/gtest.h"
diff --git a/llvm/unittests/ADT/StringSetTest.cpp b/llvm/unittests/ADT/StringSetTest.cpp
index 0071f5a3775be..413314976ac2a 100644
--- a/llvm/unittests/ADT/StringSetTest.cpp
+++ b/llvm/unittests/ADT/StringSetTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "gtest/gtest.h"
 using namespace llvm;
 
diff --git a/llvm/unittests/Support/ReverseIterationTest.cpp b/llvm/unittests/Support/ReverseIterationTest.cpp
index 657368283efcc..f199e21a2362f 100644
--- a/llvm/unittests/Support/ReverseIterationTest.cpp
+++ b/llvm/unittests/Support/ReverseIterationTest.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Support/ReverseIteration.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/STLExtras.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
diff --git a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
index a87158658d837..c43eb24973bc3 100644
--- a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
+++ b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
@@ -10,6 +10,7 @@
 
 #include "mlir/Support/LLVM.h"
 #include "llvm/ADT/StringRef.h"
+#include <functional>
 #include <vector>
 
 namespace llvm {
diff --git a/mlir/lib/Parser/Token.h b/mlir/lib/Parser/Token.h
index ad9c23cd60be1..c086a0a194b92 100644
--- a/mlir/lib/Parser/Token.h
+++ b/mlir/lib/Parser/Token.h
@@ -10,6 +10,7 @@
 #define MLIR_LIB_PARSER_TOKEN_H
 
 #include "mlir/Support/LLVM.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/SMLoc.h"
 

From 853e79d8d8af7376a81bf692f26a0c3468811d29 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Mon, 24 Jan 2022 14:14:49 +0100
Subject: [PATCH 374/946] [flang] Update tco tool pipline and add translation
 to LLVM IR

tco is a tool to test the FIR to LLVM IR pipeline of the Flang compiler.

This patch update tco pipelines and adds the translation to LLVM IR.

A simple test is added to make sure the tool is working with a simple
FIR program.
More tests will be upstream in follow up patch from the fir-dev branch.

This patch is part of the upstreaming effort from fir-dev branch.

Reviewed By: kiranchandramohan, awarzynski, schweitz, mehdi_amini

Differential Revision: https://reviews.llvm.org/D117781

Co-authored-by: Eric Schweitz <eschweitz@nvidia.com>
Co-authored-by: Jean Perier <jperier@nvidia.com>
Co-authored-by: Andrzej Warzynski <andrzej.warzynski@arm.com>
---
 .../include/flang/Optimizer/CodeGen/CodeGen.h |  10 +-
 .../include/flang/Optimizer/Support/InitFIR.h |  12 +-
 flang/include/flang/Tools/CLOptions.inc       | 160 ++++++++++++++++++
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       |  37 ++++
 flang/lib/Optimizer/Support/CMakeLists.txt    |   1 +
 flang/lib/Optimizer/Support/InitFIR.cpp       |  20 +++
 flang/test/Fir/basic-program.fir              |  11 ++
 flang/tools/tco/CMakeLists.txt                |  15 +-
 flang/tools/tco/tco.cpp                       |  44 ++++-
 9 files changed, 295 insertions(+), 15 deletions(-)
 create mode 100644 flang/include/flang/Tools/CLOptions.inc
 create mode 100644 flang/lib/Optimizer/Support/InitFIR.cpp
 create mode 100644 flang/test/Fir/basic-program.fir

diff --git a/flang/include/flang/Optimizer/CodeGen/CodeGen.h b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
index 1bd31b207859a..939d6aebb524d 100644
--- a/flang/include/flang/Optimizer/CodeGen/CodeGen.h
+++ b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
@@ -12,6 +12,8 @@
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassRegistry.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
 #include <memory>
 
 namespace fir {
@@ -36,9 +38,13 @@ std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>> createFirTargetRewritePass(
 /// Convert FIR to the LLVM IR dialect
 std::unique_ptr<mlir::Pass> createFIRToLLVMPass();
 
+using LLVMIRLoweringPrinter =
+    std::function<void(llvm::Module &, llvm::raw_ostream &)>;
 /// Convert the LLVM IR dialect to LLVM-IR proper
-std::unique_ptr<mlir::Pass>
-createLLVMDialectToLLVMPass(llvm::raw_ostream &output);
+std::unique_ptr<mlir::Pass> createLLVMDialectToLLVMPass(
+    llvm::raw_ostream &output,
+    LLVMIRLoweringPrinter printer =
+        [](llvm::Module &m, llvm::raw_ostream &out) { m.print(out, nullptr); });
 
 // declarative passes
 #define GEN_PASS_REGISTRATION
diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h
index e78967de2a383..2e8c1685a06f7 100644
--- a/flang/include/flang/Optimizer/Support/InitFIR.h
+++ b/flang/include/flang/Optimizer/Support/InitFIR.h
@@ -13,7 +13,6 @@
 #ifndef FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
 #define FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
 
-#include "flang/Optimizer/CodeGen/CodeGen.h"
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "mlir/Conversion/Passes.h"
 #include "mlir/Dialect/Affine/Passes.h"
@@ -35,11 +34,19 @@ namespace fir::support {
 #define FLANG_DIALECT_LIST                                                     \
   FLANG_NONCODEGEN_DIALECT_LIST, FIRCodeGenDialect, mlir::LLVM::LLVMDialect
 
+inline void registerNonCodegenDialects(mlir::DialectRegistry &registry) {
+  registry.insert<FLANG_NONCODEGEN_DIALECT_LIST>();
+}
+
 /// Register all the dialects used by flang.
 inline void registerDialects(mlir::DialectRegistry &registry) {
   registry.insert<FLANG_DIALECT_LIST>();
 }
 
+inline void loadNonCodegenDialects(mlir::MLIRContext &context) {
+  context.loadDialect<FLANG_NONCODEGEN_DIALECT_LIST>();
+}
+
 /// Forced load of all the dialects used by flang.  Lowering is not an MLIR
 /// pass, but a producer of FIR and MLIR. It is therefore a requirement that the
 /// dialects be preloaded to be able to build the IR.
@@ -75,6 +82,9 @@ inline void registerMLIRPassesForFortranTools() {
   mlir::registerConvertAffineToStandardPass();
 }
 
+/// Register the interfaces needed to lower to LLVM IR.
+void registerLLVMTranslation(mlir::MLIRContext &context);
+
 } // namespace fir::support
 
 #endif // FORTRAN_OPTIMIZER_SUPPORT_INITFIR_H
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
new file mode 100644
index 0000000000000..1c85075d5cc17
--- /dev/null
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -0,0 +1,160 @@
+//===-- CLOptions.inc -- command line options -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+/// This file defines some shared command-line options that can be used when
+/// debugging the test tools. This file must be included into the tool.
+
+#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
+#include "mlir/Pass/PassManager.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "mlir/Transforms/Passes.h"
+#include "flang/Optimizer/CodeGen/CodeGen.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "llvm/Support/CommandLine.h"
+
+#define DisableOption(DOName, DOOption, DODescription) \
+  static llvm::cl::opt<bool> disable##DOName("disable-" DOOption, \
+      llvm::cl::desc("disable " DODescription " pass"), llvm::cl::init(false), \
+      llvm::cl::Hidden)
+
+/// Shared option in tools to control whether dynamically sized array
+/// allocations should always be on the heap.
+static llvm::cl::opt<bool> dynamicArrayStackToHeapAllocation(
+    "fdynamic-heap-array",
+    llvm::cl::desc("place all array allocations of dynamic size on the heap"),
+    llvm::cl::init(false), llvm::cl::Hidden);
+
+/// Shared option in tools to set a maximum value for the number of elements in
+/// a compile-time sized array that can be allocated on the stack.
+static llvm::cl::opt<std::size_t> arrayStackAllocationThreshold(
+    "fstack-array-size",
+    llvm::cl::desc(
+        "place all array allocations more than <size> elements on the heap"),
+    llvm::cl::init(~static_cast<std::size_t>(0)), llvm::cl::Hidden);
+
+namespace {
+/// Optimizer Passes
+DisableOption(CfgConversion, "cfg-conversion", "disable FIR to CFG pass");
+DisableOption(FirAvc, "avc", "array value copy analysis and transformation");
+DisableOption(
+    FirMao, "memory-allocation-opt", "memory allocation optimization");
+
+/// CodeGen Passes
+#if !defined(FLANG_EXCLUDE_CODEGEN)
+DisableOption(CodeGenRewrite, "codegen-rewrite", "rewrite FIR for codegen");
+DisableOption(TargetRewrite, "target-rewrite", "rewrite FIR for target");
+DisableOption(FirToLlvmIr, "fir-to-llvmir", "FIR to LLVM-IR dialect");
+DisableOption(LlvmIrToLlvm, "llvm", "conversion to LLVM");
+#endif
+
+/// Generic for adding a pass to the pass manager if it is not disabled.
+template <typename F>
+void addPassConditionally(
+    mlir::PassManager &pm, llvm::cl::opt<bool> &disabled, F ctor) {
+  if (!disabled)
+    pm.addPass(ctor());
+}
+
+template <typename OP, typename F>
+void addNestedPassConditionally(
+    mlir::PassManager &pm, llvm::cl::opt<bool> &disabled, F ctor) {
+  if (!disabled)
+    pm.addNestedPass<OP>(ctor());
+}
+
+} // namespace
+
+namespace fir {
+
+static void defaultFlangInlinerOptPipeline(mlir::OpPassManager &pm) {
+  mlir::GreedyRewriteConfig config;
+  config.enableRegionSimplification = false;
+  pm.addPass(mlir::createCanonicalizerPass(config));
+}
+
+inline void addCfgConversionPass(mlir::PassManager &pm) {
+  addNestedPassConditionally<mlir::FuncOp>(
+      pm, disableCfgConversion, fir::createFirToCfgPass);
+}
+
+inline void addAVC(mlir::PassManager &pm) {
+  addNestedPassConditionally<mlir::FuncOp>(
+      pm, disableFirAvc, fir::createArrayValueCopyPass);
+}
+
+#if !defined(FLANG_EXCLUDE_CODEGEN)
+inline void addCodeGenRewritePass(mlir::PassManager &pm) {
+  addPassConditionally(
+      pm, disableCodeGenRewrite, fir::createFirCodeGenRewritePass);
+}
+
+inline void addTargetRewritePass(mlir::PassManager &pm) {
+  addPassConditionally(pm, disableTargetRewrite, []() {
+    return fir::createFirTargetRewritePass(fir::TargetRewriteOptions{});
+  });
+}
+
+inline void addFIRToLLVMPass(mlir::PassManager &pm) {
+  addPassConditionally(pm, disableFirToLlvmIr, fir::createFIRToLLVMPass);
+}
+
+inline void addLLVMDialectToLLVMPass(
+    mlir::PassManager &pm, llvm::raw_ostream &output) {
+  addPassConditionally(pm, disableLlvmIrToLlvm,
+      [&]() { return fir::createLLVMDialectToLLVMPass(output); });
+}
+#endif
+
+/// Create a pass pipeline for running default optimization passes for
+/// incremental conversion of FIR.
+///
+/// \param pm - MLIR pass manager that will hold the pipeline definition
+inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm) {
+  // simplify the IR
+  mlir::GreedyRewriteConfig config;
+  config.enableRegionSimplification = false;
+  fir::addAVC(pm);
+  pm.addNestedPass<mlir::FuncOp>(fir::createCharacterConversionPass());
+  pm.addPass(mlir::createCanonicalizerPass(config));
+
+  // The default inliner pass adds the canonicalizer pass with the default
+  // configuration. Create the inliner pass with tco config.
+  llvm::StringMap<mlir::OpPassManager> pipelines;
+  pm.addPass(
+      mlir::createInlinerPass(pipelines, defaultFlangInlinerOptPipeline));
+  pm.addPass(mlir::createCSEPass());
+
+  // convert control flow to CFG form
+  fir::addCfgConversionPass(pm);
+  pm.addPass(mlir::createLowerToCFGPass());
+
+  pm.addPass(mlir::createCanonicalizerPass(config));
+}
+
+#if !defined(FLANG_EXCLUDE_CODEGEN)
+inline void createDefaultFIRCodeGenPassPipeline(mlir::PassManager &pm) {
+  pm.addNestedPass<mlir::FuncOp>(fir::createAbstractResultOptPass());
+  fir::addCodeGenRewritePass(pm);
+  fir::addTargetRewritePass(pm);
+  fir::addFIRToLLVMPass(pm);
+}
+
+/// Create a pass pipeline for lowering from MLIR to LLVM IR
+///
+/// \param pm - MLIR pass manager that will hold the pipeline definition
+inline void createMLIRToLLVMPassPipeline(mlir::PassManager &pm) {
+  // Add default optimizer pass pipeline.
+  fir::createDefaultFIROptimizerPassPipeline(pm);
+
+  // Add codegen pass pipeline.
+  fir::createDefaultFIRCodeGenPassPipeline(pm);
+}
+#undef FLANG_EXCLUDE_CODEGEN
+#endif
+
+} // namespace fir
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index be2e7cde916df..40d6d2017b2fa 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -23,6 +23,7 @@
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Pass/Pass.h"
+#include "mlir/Target/LLVMIR/ModuleTranslation.h"
 #include "llvm/ADT/ArrayRef.h"
 
 #define DEBUG_TYPE "flang-codegen"
@@ -3305,8 +3306,44 @@ class FIRToLLVMLowering : public fir::FIRToLLVMLoweringBase<FIRToLLVMLowering> {
     }
   }
 };
+
+/// Lower from LLVM IR dialect to proper LLVM-IR and dump the module
+struct LLVMIRLoweringPass
+    : public mlir::PassWrapper<LLVMIRLoweringPass,
+                               mlir::OperationPass<mlir::ModuleOp>> {
+  using Printer = fir::LLVMIRLoweringPrinter;
+  LLVMIRLoweringPass(raw_ostream &output, Printer p)
+      : output{output}, printer{p} {}
+
+  mlir::ModuleOp getModule() { return getOperation(); }
+
+  void runOnOperation() override final {
+    auto *ctx = getModule().getContext();
+    auto optName = getModule().getName();
+    llvm::LLVMContext llvmCtx;
+    if (auto llvmModule = mlir::translateModuleToLLVMIR(
+            getModule(), llvmCtx, optName ? *optName : "FIRModule")) {
+      printer(*llvmModule, output);
+      return;
+    }
+
+    mlir::emitError(mlir::UnknownLoc::get(ctx), "could not emit LLVM-IR\n");
+    signalPassFailure();
+  }
+
+private:
+  raw_ostream &output;
+  Printer printer;
+};
+
 } // namespace
 
 std::unique_ptr<mlir::Pass> fir::createFIRToLLVMPass() {
   return std::make_unique<FIRToLLVMLowering>();
 }
+
+std::unique_ptr<mlir::Pass>
+fir::createLLVMDialectToLLVMPass(raw_ostream &output,
+                                 fir::LLVMIRLoweringPrinter printer) {
+  return std::make_unique<LLVMIRLoweringPass>(output, printer);
+}
diff --git a/flang/lib/Optimizer/Support/CMakeLists.txt b/flang/lib/Optimizer/Support/CMakeLists.txt
index 30a163de9ccaf..779e20711513e 100644
--- a/flang/lib/Optimizer/Support/CMakeLists.txt
+++ b/flang/lib/Optimizer/Support/CMakeLists.txt
@@ -2,6 +2,7 @@ get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
 
 add_flang_library(FIRSupport
   FIRContext.cpp
+  InitFIR.cpp
   InternalNames.cpp
   KindMapping.cpp
 
diff --git a/flang/lib/Optimizer/Support/InitFIR.cpp b/flang/lib/Optimizer/Support/InitFIR.cpp
new file mode 100644
index 0000000000000..baa1336d9ca02
--- /dev/null
+++ b/flang/lib/Optimizer/Support/InitFIR.cpp
@@ -0,0 +1,20 @@
+//===-- Optimizer/Support/InitFIR.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Support/InitFIR.h"
+#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
+#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
+
+void fir::support::registerLLVMTranslation(mlir::MLIRContext &context) {
+  mlir::DialectRegistry registry;
+  // Register OpenMP dialect interface here as well.
+  mlir::registerOpenMPDialectTranslation(registry);
+  // Register LLVM-IR dialect interface.
+  registerLLVMDialectTranslation(registry);
+  context.appendDialectRegistry(registry);
+}
diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir
new file mode 100644
index 0000000000000..02463bef99496
--- /dev/null
+++ b/flang/test/Fir/basic-program.fir
@@ -0,0 +1,11 @@
+// RUN: tco --target=x86_64-unknown-linux-gnu %s | FileCheck %s
+
+// Check that tco is working with a basic test.
+
+func @_QQmain() {
+  return
+}
+
+// CHECK: ; ModuleID = 'FIRModule'
+// CHECK-LABEL: define void @_QQmain()
+// CHECK:       ret void
diff --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt
index 1a9c5ac72f153..49ba32f01dd91 100644
--- a/flang/tools/tco/CMakeLists.txt
+++ b/flang/tools/tco/CMakeLists.txt
@@ -1,13 +1,21 @@
-get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+set(LLVM_LINK_COMPONENTS
+  ${LLVM_TARGETS_TO_BUILD}
+)
 
-set(LIBS
+add_flang_tool(tco tco.cpp)
+llvm_update_compile_flags(tco)
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+target_link_libraries(tco PRIVATE
   FIRCodeGen
   FIRDialect
   FIRSupport
   FIRTransforms
+  FIRBuilder
   ${dialect_libs}
   MLIRIR
   MLIRLLVMIR
+  MLIRLLVMToLLVMIRTranslation
+  MLIRTargetLLVMIRExport
   MLIRPass
   MLIRStandardToLLVM
   MLIRTransforms
@@ -19,6 +27,3 @@ set(LIBS
   MLIRSupport
   MLIRVectorToLLVM
 )
-
-add_flang_tool(tco tco.cpp)
-target_link_libraries(tco PRIVATE ${LIBS})
diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp
index 8f2c283bc82f9..db01ea545580c 100644
--- a/flang/tools/tco/tco.cpp
+++ b/flang/tools/tco/tco.cpp
@@ -11,8 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "flang/Optimizer/CodeGen/CodeGen.h"
+#include "flang/Optimizer/Support/FIRContext.h"
 #include "flang/Optimizer/Support/InitFIR.h"
+#include "flang/Optimizer/Support/InternalNames.h"
 #include "flang/Optimizer/Support/KindMapping.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Conversion/SCFToStandard/SCFToStandard.h"
+#include "mlir/IR/AsmState.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/MLIRContext.h"
 #include "mlir/Parser.h"
@@ -25,6 +31,7 @@
 #include "llvm/Support/InitLLVM.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -42,8 +49,14 @@ static cl::opt<bool> emitFir("emit-fir",
                              cl::desc("Parse and pretty-print the input"),
                              cl::init(false));
 
+static cl::opt<std::string> targetTriple("target",
+                                         cl::desc("specify a target triple"),
+                                         cl::init("native"));
+
+#include "flang/Tools/CLOptions.inc"
+
 static void printModuleBody(mlir::ModuleOp mod, raw_ostream &output) {
-  for (auto &op : mod.getBody()->without_terminator())
+  for (auto &op : *mod.getBody())
     output << op << '\n';
 }
 
@@ -65,6 +78,8 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
   mlir::DialectRegistry registry;
   fir::support::registerDialects(registry);
   mlir::MLIRContext context(registry);
+  fir::support::loadDialects(context);
+  fir::support::registerLLVMTranslation(context);
   auto owningRef = mlir::parseSourceFile(sourceMgr, &context);
 
   if (!owningRef) {
@@ -80,21 +95,31 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
   ToolOutputFile out(outputFilename, ec, sys::fs::OF_None);
 
   // run passes
-  mlir::PassManager pm{&context};
+  fir::KindMapping kindMap{&context};
+  fir::setTargetTriple(*owningRef, targetTriple);
+  fir::setKindMapping(*owningRef, kindMap);
+  mlir::PassManager pm(&context, mlir::OpPassManager::Nesting::Implicit);
+  pm.enableVerifier(/*verifyPasses=*/true);
   mlir::applyPassManagerCLOptions(pm);
   if (emitFir) {
     // parse the input and pretty-print it back out
     // -emit-fir intentionally disables all the passes
+  } else if (passPipeline.hasAnyOccurrences()) {
+    auto errorHandler = [&](const Twine &msg) {
+      mlir::emitError(mlir::UnknownLoc::get(pm.getContext())) << msg;
+      return mlir::failure();
+    };
+    if (mlir::failed(passPipeline.addToPipeline(pm, errorHandler)))
+      return mlir::failure();
   } else {
-    // TODO: Actually add passes when added to FIR code base
-    // add all the passes
-    // the user can disable them individually
+    fir::createMLIRToLLVMPassPipeline(pm);
+    fir::addLLVMDialectToLLVMPass(pm, out.os());
   }
 
   // run the pass manager
   if (mlir::succeeded(pm.run(*owningRef))) {
     // passes ran successfully, so keep the output
-    if (emitFir)
+    if (emitFir || passPipeline.hasAnyOccurrences())
       printModuleBody(*owningRef, out.os());
     out.keep();
     return mlir::success();
@@ -107,8 +132,13 @@ compileFIR(const mlir::PassPipelineCLParser &passPipeline) {
 }
 
 int main(int argc, char **argv) {
-  fir::support::registerMLIRPassesForFortranTools();
   [[maybe_unused]] InitLLVM y(argc, argv);
+  fir::support::registerMLIRPassesForFortranTools();
+  fir::registerOptCodeGenPasses();
+  fir::registerOptTransformPasses();
+  InitializeAllTargets();
+  mlir::registerAsmPrinterCLOptions();
+  mlir::registerMLIRContextCLOptions();
   mlir::registerPassManagerCLOptions();
   mlir::PassPipelineCLParser passPipe("", "Compiler passes to run");
   cl::ParseCommandLineOptions(argc, argv, "Tilikum Crossing Optimizer\n");

From 38ffea9b4c1f59ebf434a9774cd7ae741cc87d70 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell <nathan@acm.org>
Date: Mon, 24 Jan 2022 04:11:59 -0800
Subject: [PATCH 375/946] [demangler] Resync demangler sources

Recent commits changed llvm/include/llvm/Demangle without also
changing libcxxabi/src/Demangle, which is the canonical source
location.  This resyncs those commits to the libcxxabi directory.

Commits:
* 1f9e18b6565fd1bb69c4b649b9efd3467b3c7c7d
* f53d359816e66a107195e1e4b581e2a33bbafaa4
* 065044c443f4041f32e0a8d6e633f9d92580fbca

Reviewed By: ChuanqiXu

Differential Revision: https://reviews.llvm.org/D117990.diff
---
 libcxxabi/src/demangle/ItaniumDemangle.h |  3 ++-
 libcxxabi/src/demangle/StringView.h      | 14 +++++++-------
 libcxxabi/src/demangle/Utility.h         |  9 +++++----
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h
index b25139d8a72ba..01f414a7257bf 100644
--- a/libcxxabi/src/demangle/ItaniumDemangle.h
+++ b/libcxxabi/src/demangle/ItaniumDemangle.h
@@ -21,12 +21,13 @@
 #include "DemangleConfig.h"
 #include "StringView.h"
 #include "Utility.h"
+#include <algorithm>
 #include <cassert>
 #include <cctype>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
-#include <numeric>
+#include <limits>
 #include <utility>
 
 #define FOR_EACH_NODE_KIND(X) \
diff --git a/libcxxabi/src/demangle/StringView.h b/libcxxabi/src/demangle/StringView.h
index 1e4d3803f06cd..7c8cb482ae1c1 100644
--- a/libcxxabi/src/demangle/StringView.h
+++ b/libcxxabi/src/demangle/StringView.h
@@ -14,7 +14,6 @@
 #define DEMANGLE_STRINGVIEW_H
 
 #include "DemangleConfig.h"
-#include <algorithm>
 #include <cassert>
 #include <cstring>
 
@@ -38,15 +37,16 @@ class StringView {
 
   StringView substr(size_t Pos, size_t Len = npos) const {
     assert(Pos <= size());
-    return StringView(begin() + Pos, std::min(Len, size() - Pos));
+    if (Len > size() - Pos)
+      Len = size() - Pos;
+    return StringView(begin() + Pos, Len);
   }
 
   size_t find(char C, size_t From = 0) const {
-    size_t FindBegin = std::min(From, size());
     // Avoid calling memchr with nullptr.
-    if (FindBegin < size()) {
+    if (From < size()) {
       // Just forward to memchr, which is faster than a hand-rolled loop.
-      if (const void *P = ::memchr(First + FindBegin, C, size() - FindBegin))
+      if (const void *P = ::memchr(First + From, C, size() - From))
         return size_t(static_cast<const char *>(P) - First);
     }
     return npos;
@@ -98,7 +98,7 @@ class StringView {
   bool startsWith(StringView Str) const {
     if (Str.size() > size())
       return false;
-    return std::equal(Str.begin(), Str.end(), begin());
+    return std::strncmp(Str.begin(), begin(), Str.size()) == 0;
   }
 
   const char &operator[](size_t Idx) const { return *(begin() + Idx); }
@@ -111,7 +111,7 @@ class StringView {
 
 inline bool operator==(const StringView &LHS, const StringView &RHS) {
   return LHS.size() == RHS.size() &&
-         std::equal(LHS.begin(), LHS.end(), RHS.begin());
+         std::strncmp(LHS.begin(), RHS.begin(), LHS.size()) == 0;
 }
 
 DEMANGLE_NAMESPACE_END
diff --git a/libcxxabi/src/demangle/Utility.h b/libcxxabi/src/demangle/Utility.h
index 733d83ad1b6ba..587c0e4bec36d 100644
--- a/libcxxabi/src/demangle/Utility.h
+++ b/libcxxabi/src/demangle/Utility.h
@@ -14,10 +14,11 @@
 #define DEMANGLE_UTILITY_H
 
 #include "StringView.h"
+#include <array>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
-#include <iterator>
+#include <exception>
 #include <limits>
 
 DEMANGLE_NAMESPACE_BEGIN
@@ -48,8 +49,8 @@ class OutputBuffer {
       return;
     }
 
-    char Temp[21];
-    char *TempPtr = std::end(Temp);
+    std::array<char, 21> Temp;
+    char *TempPtr = Temp.data() + Temp.size();
 
     while (N) {
       *--TempPtr = char('0' + N % 10);
@@ -59,7 +60,7 @@ class OutputBuffer {
     // Add negative sign...
     if (isNeg)
       *--TempPtr = '-';
-    this->operator<<(StringView(TempPtr, std::end(Temp)));
+    this->operator<<(StringView(TempPtr, Temp.data() + Temp.size()));
   }
 
 public:

From 897d1bb659c2a23baed8ff4881960945125c40f7 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell <nathan@acm.org>
Date: Mon, 24 Jan 2022 04:14:47 -0800
Subject: [PATCH 376/946] [demangler] write-protect non-canonical source

To try and avoid undesired changes to the non-canonical demangler
sources, change the cp-to-llvm script to (a) write-protect the target
files and (b) prepend 'do not edit' comments that are significant to
emacs[*], and hopefully humans.

Reviewed By: ChuanqiXu

Differential Revision: https://reviews.llvm.org/D118008
---
 libcxxabi/src/demangle/cp-to-llvm.sh         | 14 +++++++++++---
 llvm/include/llvm/Demangle/ItaniumDemangle.h |  8 +++++---
 llvm/include/llvm/Demangle/StringView.h      |  6 ++++--
 llvm/include/llvm/Demangle/Utility.h         |  6 ++++--
 4 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/libcxxabi/src/demangle/cp-to-llvm.sh b/libcxxabi/src/demangle/cp-to-llvm.sh
index 808abbcd99bcc..e4342a0ee2364 100755
--- a/libcxxabi/src/demangle/cp-to-llvm.sh
+++ b/libcxxabi/src/demangle/cp-to-llvm.sh
@@ -5,7 +5,8 @@
 
 set -e
 
-FILES="ItaniumDemangle.h StringView.h Utility.h README.txt"
+cd $(dirname $0)
+HDRS="ItaniumDemangle.h StringView.h Utility.h"
 LLVM_DEMANGLE_DIR=$1
 
 if [[ -z "$LLVM_DEMANGLE_DIR" ]]; then
@@ -21,7 +22,14 @@ read -p "This will overwrite the copies of $FILES in $LLVM_DEMANGLE_DIR; are you
 echo
 
 if [[ $ANSWER =~ ^[Yy]$ ]]; then
-    for I in $FILES ; do
-        cp $I $LLVM_DEMANGLE_DIR/$I
+    cp -f README.txt $LLVM_DEMANGLE_DIR
+    chmod -w $LLVM_DEMANGLE_DIR/README.txt
+    for I in $HDRS ; do
+	rm -f $LLVM_DEMANGLE_DIR/$I
+	cat - $I >$LLVM_DEMANGLE_DIR/$I <<EOF
+// Do not edit! -*- read-only -*-
+// See README.txt for instructions
+EOF
+	chmod -w $LLVM_DEMANGLE_DIR/$I
     done
 fi
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 4df092c70ee1f..98ff73e21cca6 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -1,3 +1,5 @@
+// Do not edit! -*- read-only -*-
+// See README.txt for instructions
 //===------------------------- ItaniumDemangle.h ----------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -11,8 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEMANGLE_ITANIUMDEMANGLE_H
-#define LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+#ifndef DEMANGLE_ITANIUMDEMANGLE_H
+#define DEMANGLE_ITANIUMDEMANGLE_H
 
 // FIXME: (possibly) incomplete list of features that clang mangles that this
 // file does not yet support:
@@ -5749,4 +5751,4 @@ struct ManglingParser : AbstractManglingParser<ManglingParser<Alloc>, Alloc> {
 
 DEMANGLE_NAMESPACE_END
 
-#endif // LLVM_DEMANGLE_ITANIUMDEMANGLE_H
+#endif // DEMANGLE_ITANIUMDEMANGLE_H
diff --git a/llvm/include/llvm/Demangle/StringView.h b/llvm/include/llvm/Demangle/StringView.h
index 6b5d01c09fe5f..323282f69c269 100644
--- a/llvm/include/llvm/Demangle/StringView.h
+++ b/llvm/include/llvm/Demangle/StringView.h
@@ -1,3 +1,5 @@
+// Do not edit! -*- read-only -*-
+// See README.txt for instructions
 //===--- StringView.h -------------------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -10,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEMANGLE_STRINGVIEW_H
-#define LLVM_DEMANGLE_STRINGVIEW_H
+#ifndef DEMANGLE_STRINGVIEW_H
+#define DEMANGLE_STRINGVIEW_H
 
 #include "DemangleConfig.h"
 #include <cassert>
diff --git a/llvm/include/llvm/Demangle/Utility.h b/llvm/include/llvm/Demangle/Utility.h
index dcd12b0daa88b..bec019da86808 100644
--- a/llvm/include/llvm/Demangle/Utility.h
+++ b/llvm/include/llvm/Demangle/Utility.h
@@ -1,3 +1,5 @@
+// Do not edit! -*- read-only -*-
+// See README.txt for instructions
 //===--- Utility.h ----------------------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
@@ -10,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEMANGLE_UTILITY_H
-#define LLVM_DEMANGLE_UTILITY_H
+#ifndef DEMANGLE_UTILITY_H
+#define DEMANGLE_UTILITY_H
 
 #include "StringView.h"
 #include <array>

From 6184e565ad4065026bc121fd13d6a8743a1fe593 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell <nathan@acm.org>
Date: Mon, 24 Jan 2022 04:28:09 -0800
Subject: [PATCH 377/946] [demangler][NFC] Refactor some parsing

There's some unnecessary code duplication in the parser.  This
refactors that and deploys boolean variables to avoid the duplication.
These also happen to help adding module demangling (with an updated
mangling scheme).

1a) The grammar requires some lookahead concerning <template-args>. We
may discover an <unscoped-name> is actually <unscoped-template-name>
<template-args>.  (When <unscoped-name> was a substitution, there must
be a following <template-args>.)  Refactor parseName to only have one
code path looking for the 'I' indicating <template-args>.

1b) While there I altered the control flow to hold the result in a
variable, rather than tail call.  Made it easier to debug (and of
course an optimizer will DTRT here anyway).

2a) An <unscoped-name> can have an St or StL prefix.  No need for
completely separate code paths handling the following unqualified-name
though.

2b) Also no need to look for both 'St' and 'StL' separately.  Look for
'St' and then conditionally swallow an 'L'.

3) We get a similar issue as #1a when parsing a typeName.  Here I just
change the control flow slightly to bring the 'break' out to the end
of the 'S' block and embed the early return inside an if.  That's more
in keeping with the code style.

4) Although NFC, there's a new testcase as that's not covered by the
existing demangler tests and is significant in the #1a case above.

Reviewed By: ChuanqiXu

Differential Revision: https://reviews.llvm.org/D117879
---
 libcxxabi/src/demangle/ItaniumDemangle.h     | 82 +++++++++++---------
 libcxxabi/test/test_demangle.pass.cpp        |  2 +
 llvm/include/llvm/Demangle/ItaniumDemangle.h | 82 +++++++++++---------
 3 files changed, 92 insertions(+), 74 deletions(-)

diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h
index 01f414a7257bf..45bb741f5629f 100644
--- a/libcxxabi/src/demangle/ItaniumDemangle.h
+++ b/libcxxabi/src/demangle/ItaniumDemangle.h
@@ -2593,34 +2593,38 @@ Node *AbstractManglingParser<Derived, Alloc>::parseName(NameState *State) {
   if (look() == 'Z')
     return getDerived().parseLocalName(State);
 
-  //        ::= <unscoped-template-name> <template-args>
-  if (look() == 'S' && look(1) != 't') {
-    Node *S = getDerived().parseSubstitution();
-    if (S == nullptr)
-      return nullptr;
-    if (look() != 'I')
-      return nullptr;
-    Node *TA = getDerived().parseTemplateArgs(State != nullptr);
-    if (TA == nullptr)
-      return nullptr;
-    if (State) State->EndsWithTemplateArgs = true;
-    return make<NameWithTemplateArgs>(S, TA);
+  Node *Result = nullptr;
+  bool IsSubst = look() == 'S' && look(1) != 't';
+  if (IsSubst) {
+    // A substitution must lead to:
+    //        ::= <unscoped-template-name> <template-args>
+    Result = getDerived().parseSubstitution();
+  } else {
+    // An unscoped name can be one of:
+    //        ::= <unscoped-name>
+    //        ::= <unscoped-template-name> <template-args>
+    Result = getDerived().parseUnscopedName(State);
   }
-
-  Node *N = getDerived().parseUnscopedName(State);
-  if (N == nullptr)
+  if (Result == nullptr)
     return nullptr;
-  //        ::= <unscoped-template-name> <template-args>
+
   if (look() == 'I') {
-    Subs.push_back(N);
+    //        ::= <unscoped-template-name> <template-args>
+    if (!IsSubst)
+      // An unscoped-template-name is substitutable.
+      Subs.push_back(Result);
     Node *TA = getDerived().parseTemplateArgs(State != nullptr);
     if (TA == nullptr)
       return nullptr;
-    if (State) State->EndsWithTemplateArgs = true;
-    return make<NameWithTemplateArgs>(N, TA);
+    if (State)
+      State->EndsWithTemplateArgs = true;
+    Result = make<NameWithTemplateArgs>(Result, TA);
+  } else if (IsSubst) {
+    // The substitution case must be followed by <template-args>.
+    return nullptr;
   }
-  //        ::= <unscoped-name>
-  return N;
+
+  return Result;
 }
 
 // <local-name> := Z <function encoding> E <entity name> [<discriminator>]
@@ -2665,13 +2669,17 @@ Node *AbstractManglingParser<Derived, Alloc>::parseLocalName(NameState *State) {
 template <typename Derived, typename Alloc>
 Node *
 AbstractManglingParser<Derived, Alloc>::parseUnscopedName(NameState *State) {
-  if (consumeIf("StL") || consumeIf("St")) {
-    Node *R = getDerived().parseUnqualifiedName(State);
-    if (R == nullptr)
-      return nullptr;
-    return make<StdQualifiedName>(R);
-  }
-  return getDerived().parseUnqualifiedName(State);
+  bool IsStd = consumeIf("St");
+  if (IsStd)
+    consumeIf('L');
+
+  Node *Result = getDerived().parseUnqualifiedName(State);
+  if (Result == nullptr)
+    return nullptr;
+  if (IsStd)
+    Result = make<StdQualifiedName>(Result);
+
+  return Result;
 }
 
 // <unqualified-name> ::= <operator-name> [abi-tags]
@@ -4066,9 +4074,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
   }
   //             ::= <substitution>  # See Compression below
   case 'S': {
-    if (look(1) && look(1) != 't') {
-      Node *Sub = getDerived().parseSubstitution();
-      if (Sub == nullptr)
+    if (look(1) != 't') {
+      Result = getDerived().parseSubstitution();
+      if (Result == nullptr)
         return nullptr;
 
       // Sub could be either of:
@@ -4085,13 +4093,13 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
         Node *TA = getDerived().parseTemplateArgs();
         if (TA == nullptr)
           return nullptr;
-        Result = make<NameWithTemplateArgs>(Sub, TA);
-        break;
+        Result = make<NameWithTemplateArgs>(Result, TA);
+      } else {
+        // If all we parsed was a substitution, don't re-insert into the
+        // substitution table.
+        return Result;
       }
-
-      // If all we parsed was a substitution, don't re-insert into the
-      // substitution table.
-      return Sub;
+      break;
     }
     DEMANGLE_FALLTHROUGH;
   }
diff --git a/libcxxabi/test/test_demangle.pass.cpp b/libcxxabi/test/test_demangle.pass.cpp
index b1bc05661144a..7bbb17c581cc1 100644
--- a/libcxxabi/test/test_demangle.pass.cpp
+++ b/libcxxabi/test/test_demangle.pass.cpp
@@ -29855,6 +29855,8 @@ const char* cases[][2] =
     // This should be invalid, but it is currently not recognized as such
     // See https://llvm.org/PR51407
     {"_Zcv1BIRT_EIS1_E", "operator B<><>"},
+
+    {"_Z3TPLIiET_S0_", "int TPL<int>(int)"},
 };
 
 const unsigned N = sizeof(cases) / sizeof(cases[0]);
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index 98ff73e21cca6..a3a0381edf97d 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -2595,34 +2595,38 @@ Node *AbstractManglingParser<Derived, Alloc>::parseName(NameState *State) {
   if (look() == 'Z')
     return getDerived().parseLocalName(State);
 
-  //        ::= <unscoped-template-name> <template-args>
-  if (look() == 'S' && look(1) != 't') {
-    Node *S = getDerived().parseSubstitution();
-    if (S == nullptr)
-      return nullptr;
-    if (look() != 'I')
-      return nullptr;
-    Node *TA = getDerived().parseTemplateArgs(State != nullptr);
-    if (TA == nullptr)
-      return nullptr;
-    if (State) State->EndsWithTemplateArgs = true;
-    return make<NameWithTemplateArgs>(S, TA);
+  Node *Result = nullptr;
+  bool IsSubst = look() == 'S' && look(1) != 't';
+  if (IsSubst) {
+    // A substitution must lead to:
+    //        ::= <unscoped-template-name> <template-args>
+    Result = getDerived().parseSubstitution();
+  } else {
+    // An unscoped name can be one of:
+    //        ::= <unscoped-name>
+    //        ::= <unscoped-template-name> <template-args>
+    Result = getDerived().parseUnscopedName(State);
   }
-
-  Node *N = getDerived().parseUnscopedName(State);
-  if (N == nullptr)
+  if (Result == nullptr)
     return nullptr;
-  //        ::= <unscoped-template-name> <template-args>
+
   if (look() == 'I') {
-    Subs.push_back(N);
+    //        ::= <unscoped-template-name> <template-args>
+    if (!IsSubst)
+      // An unscoped-template-name is substitutable.
+      Subs.push_back(Result);
     Node *TA = getDerived().parseTemplateArgs(State != nullptr);
     if (TA == nullptr)
       return nullptr;
-    if (State) State->EndsWithTemplateArgs = true;
-    return make<NameWithTemplateArgs>(N, TA);
+    if (State)
+      State->EndsWithTemplateArgs = true;
+    Result = make<NameWithTemplateArgs>(Result, TA);
+  } else if (IsSubst) {
+    // The substitution case must be followed by <template-args>.
+    return nullptr;
   }
-  //        ::= <unscoped-name>
-  return N;
+
+  return Result;
 }
 
 // <local-name> := Z <function encoding> E <entity name> [<discriminator>]
@@ -2667,13 +2671,17 @@ Node *AbstractManglingParser<Derived, Alloc>::parseLocalName(NameState *State) {
 template <typename Derived, typename Alloc>
 Node *
 AbstractManglingParser<Derived, Alloc>::parseUnscopedName(NameState *State) {
-  if (consumeIf("StL") || consumeIf("St")) {
-    Node *R = getDerived().parseUnqualifiedName(State);
-    if (R == nullptr)
-      return nullptr;
-    return make<StdQualifiedName>(R);
-  }
-  return getDerived().parseUnqualifiedName(State);
+  bool IsStd = consumeIf("St");
+  if (IsStd)
+    consumeIf('L');
+
+  Node *Result = getDerived().parseUnqualifiedName(State);
+  if (Result == nullptr)
+    return nullptr;
+  if (IsStd)
+    Result = make<StdQualifiedName>(Result);
+
+  return Result;
 }
 
 // <unqualified-name> ::= <operator-name> [abi-tags]
@@ -4068,9 +4076,9 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
   }
   //             ::= <substitution>  # See Compression below
   case 'S': {
-    if (look(1) && look(1) != 't') {
-      Node *Sub = getDerived().parseSubstitution();
-      if (Sub == nullptr)
+    if (look(1) != 't') {
+      Result = getDerived().parseSubstitution();
+      if (Result == nullptr)
         return nullptr;
 
       // Sub could be either of:
@@ -4087,13 +4095,13 @@ Node *AbstractManglingParser<Derived, Alloc>::parseType() {
         Node *TA = getDerived().parseTemplateArgs();
         if (TA == nullptr)
           return nullptr;
-        Result = make<NameWithTemplateArgs>(Sub, TA);
-        break;
+        Result = make<NameWithTemplateArgs>(Result, TA);
+      } else {
+        // If all we parsed was a substitution, don't re-insert into the
+        // substitution table.
+        return Result;
       }
-
-      // If all we parsed was a substitution, don't re-insert into the
-      // substitution table.
-      return Sub;
+      break;
     }
     DEMANGLE_FALLTHROUGH;
   }

From 589a93907222cf2380198ca2172ff6697dd43d87 Mon Sep 17 00:00:00 2001
From: Evgeny Shulgin <izaronplatz@gmail.com>
Date: Mon, 24 Jan 2022 08:35:42 -0500
Subject: [PATCH 378/946] Add `isConstinit` matcher

Support C++20 constinit variables for AST Matchers.
---
 clang/docs/LibASTMatchersReference.html       | 13 +++++++++++++
 clang/docs/ReleaseNotes.rst                   |  2 ++
 clang/include/clang/ASTMatchers/ASTMatchers.h | 17 +++++++++++++++++
 clang/lib/ASTMatchers/Dynamic/Registry.cpp    |  1 +
 .../ASTMatchers/ASTMatchersNarrowingTest.cpp  | 19 +++++++++++++++++++
 5 files changed, 52 insertions(+)

diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html
index fb856de0936dc..4c3916c0325c9 100644
--- a/clang/docs/LibASTMatchersReference.html
+++ b/clang/docs/LibASTMatchersReference.html
@@ -5628,6 +5628,19 @@ <h2 id="narrowing-matchers">Narrowing Matchers</h2>
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;</td><td class="name" onclick="toggle('isConstinit0')"><a name="isConstinit0Anchor">isConstinit</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isConstinit0"><pre>Matches constinit variable declarations.
+
+Given:
+  constinit int foo = 42;
+  constinit const char* bar = "baz";
+  int baz = 42;
+  [[clang::require_constant_initialization]] int xyz = 42;
+varDecl(isConstinit())
+  matches the declaration of `foo` and `bar`, but not `baz` and `xyz`.
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1VarDecl.html">VarDecl</a>&gt;</td><td class="name" onclick="toggle('isDefinition1')"><a name="isDefinition1Anchor">isDefinition</a></td><td></td></tr>
 <tr><td colspan="4" class="doc" id="isDefinition1"><pre>Matches if a declaration has a body attached.
 
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 4fe037741256f..5cd2896de54d5 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -331,6 +331,8 @@ AST Matchers
   underlying type.
 - Added the ``isConsteval`` matcher to match ``consteval`` function
   declarations as well as `if consteval` and `if ! consteval` statements.
+- Added the ``isConstinit`` matcher to match ``constinit`` variable
+  declarations.
 
 clang-format
 ------------
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index c934b708cb96c..86bd44091b593 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -5211,6 +5211,23 @@ AST_POLYMORPHIC_MATCHER(isConstexpr,
   return Node.isConstexpr();
 }
 
+/// Matches constinit variable declarations.
+///
+/// Given:
+/// \code
+///   constinit int foo = 42;
+///   constinit const char* bar = "bar";
+///   int baz = 42;
+///   [[clang::require_constant_initialization]] int xyz = 42;
+/// \endcode
+/// varDecl(isConstinit())
+///   matches the declaration of `foo` and `bar`, but not `baz` and `xyz`.
+AST_MATCHER(VarDecl, isConstinit) {
+  if (const auto *CIA = Node.getAttr<ConstInitAttr>())
+    return CIA->isConstinit();
+  return false;
+}
+
 /// Matches selection statements with initializer.
 ///
 /// Given:
diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
index 2210c5413cc5a..47db6b51966ac 100644
--- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -406,6 +406,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(isConstQualified);
   REGISTER_MATCHER(isConsteval);
   REGISTER_MATCHER(isConstexpr);
+  REGISTER_MATCHER(isConstinit);
   REGISTER_MATCHER(isCopyAssignmentOperator);
   REGISTER_MATCHER(isCopyConstructor);
   REGISTER_MATCHER(isDefaultConstructor);
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
index 51946e1430cf6..d1c9790401f02 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
@@ -1841,6 +1841,25 @@ TEST_P(ASTMatchersTest, IsConstexpr_MatchesIfConstexpr) {
       notMatches("void baz() { if (1 > 0) {} }", ifStmt(isConstexpr())));
 }
 
+TEST_P(ASTMatchersTest, IsConstinit) {
+  if (!GetParam().isCXX20OrLater())
+    return;
+
+  EXPECT_TRUE(matches("constinit int foo = 1;",
+                      varDecl(hasName("foo"), isConstinit())));
+  EXPECT_TRUE(matches("extern constinit int foo;",
+                      varDecl(hasName("foo"), isConstinit())));
+  EXPECT_TRUE(matches("constinit const char* foo = \"bar\";",
+                      varDecl(hasName("foo"), isConstinit())));
+  EXPECT_TRUE(
+      notMatches("[[clang::require_constant_initialization]] int foo = 1;",
+                 varDecl(hasName("foo"), isConstinit())));
+  EXPECT_TRUE(notMatches("constexpr int foo = 1;",
+                         varDecl(hasName("foo"), isConstinit())));
+  EXPECT_TRUE(notMatches("static inline int foo = 1;",
+                         varDecl(hasName("foo"), isConstinit())));
+}
+
 TEST_P(ASTMatchersTest, HasInitStatement_MatchesSelectionInitializers) {
   EXPECT_TRUE(notMatches("void baz() { if (1 > 0) {} }",
                          ifStmt(hasInitStatement(anything()))));

From a0d5e938fe9c1fd6ca492a91cdc3d841aa03fc0d Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Mon, 24 Jan 2022 14:40:51 +0100
Subject: [PATCH 379/946] Add missing include llvm/ADT/STLExtras

---
 clang-tools-extra/clang-tidy/GlobList.cpp   | 1 +
 llvm/include/llvm/ADT/CoalescingBitVector.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/clang-tools-extra/clang-tidy/GlobList.cpp b/clang-tools-extra/clang-tidy/GlobList.cpp
index b594d943cc075..fe41feef38abf 100644
--- a/clang-tools-extra/clang-tidy/GlobList.cpp
+++ b/clang-tools-extra/clang-tidy/GlobList.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "GlobList.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 
 namespace clang {
diff --git a/llvm/include/llvm/ADT/CoalescingBitVector.h b/llvm/include/llvm/ADT/CoalescingBitVector.h
index 18803ecf209f4..82e2e1a9f9e25 100644
--- a/llvm/include/llvm/ADT/CoalescingBitVector.h
+++ b/llvm/include/llvm/ADT/CoalescingBitVector.h
@@ -15,6 +15,7 @@
 #define LLVM_ADT_COALESCINGBITVECTOR_H
 
 #include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Debug.h"

From f6ac8088b0e890765974fee5f5820a340736f9bf Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Mon, 24 Jan 2022 11:41:06 +0000
Subject: [PATCH 380/946] [LoopFlatten] Added comments about usage of various
 Loop APIs. NFC.

---
 llvm/lib/Transforms/Scalar/LoopFlatten.cpp | 81 ++++++++++++++++------
 1 file changed, 61 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index 9b2463b3cd357..4d9578934d9e6 100644
--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -10,10 +10,13 @@
 //
 // The intention is to optimise loop nests like this, which together access an
 // array linearly:
+//
 //   for (int i = 0; i < N; ++i)
 //     for (int j = 0; j < M; ++j)
 //       f(A[i*M+j]);
+//
 // into one loop:
+//
 //   for (int i = 0; i < (N*M); ++i)
 //     f(A[i]);
 //
@@ -22,7 +25,27 @@
 // expression like i*M+j. If they had any other uses, we would have to insert a
 // div/mod to reconstruct the original values, so this wouldn't be profitable.
 //
-// We also need to prove that N*M will not overflow.
+// We also need to prove that N*M will not overflow. The preferred solution is
+// to widen the IV, which avoids overflow checks, so that is tried first. If
+// the IV cannot be widened, then we try to determine that this new tripcount
+// expression won't overflow.
+//
+// Q: Does LoopFlatten use SCEV?
+// Short answer: Yes and no.
+//
+// Long answer:
+// For this transformation to be valid, we require all uses of the induction
+// variables to be linear expressions of the form i*M+j. The different Loop
+// APIs are used to get some loop components like the induction variable,
+// compare statement, etc. In addition, we do some pattern matching to find the
+// linear expressions and other loop components like the loop increment. The
+// latter are examples of expressions that do use the induction variable, but
+// are safe to ignore when we check all uses to be of the form i*M+j. We keep
+// track of all of this in bookkeeping struct FlattenInfo.
+// We assume the loops to be canonical, i.e. starting at 0 and increment with
+// 1. This makes RHS of the compare the loop tripcount (with the right
+// predicate). We use SCEV to then sanity check that this tripcount matches
+// with the tripcount as computed by SCEV.
 //
 //===----------------------------------------------------------------------===//
 
@@ -75,30 +98,48 @@ static cl::opt<bool>
             cl::desc("Widen the loop induction variables, if possible, so "
                      "overflow checks won't reject flattening"));
 
+// We require all uses of both induction variables to match this pattern:
+//
+//   (OuterPHI * InnerTripCount) + InnerPHI
+//
+// I.e., it needs to be a linear expression of the induction variables and the
+// inner loop trip count. We keep track of all different expressions on which
+// checks will be performed in this bookkeeping struct.
+//
 struct FlattenInfo {
-  Loop *OuterLoop = nullptr;
+  Loop *OuterLoop = nullptr;  // The loop pair to be flattened.
   Loop *InnerLoop = nullptr;
-  // These PHINodes correspond to loop induction variables, which are expected
-  // to start at zero and increment by one on each loop.
-  PHINode *InnerInductionPHI = nullptr;
-  PHINode *OuterInductionPHI = nullptr;
-  Value *InnerTripCount = nullptr;
-  Value *OuterTripCount = nullptr;
-  BinaryOperator *InnerIncrement = nullptr;
-  BinaryOperator *OuterIncrement = nullptr;
-  BranchInst *InnerBranch = nullptr;
-  BranchInst *OuterBranch = nullptr;
-  SmallPtrSet<Value *, 4> LinearIVUses;
+
+  PHINode *InnerInductionPHI = nullptr; // These PHINodes correspond to loop
+  PHINode *OuterInductionPHI = nullptr; // induction variables, which are
+                                        // expected to start at zero and
+                                        // increment by one on each loop.
+
+  Value *InnerTripCount = nullptr; // The product of these two tripcounts
+  Value *OuterTripCount = nullptr; // will be the new flattened loop
+                                   // tripcount. Also used to recognise a
+                                   // linear expression that will be replaced.
+
+  SmallPtrSet<Value *, 4> LinearIVUses;  // Contains the linear expressions
+                                         // of the form i*M+j that will be
+                                         // replaced.
+
+  BinaryOperator *InnerIncrement = nullptr;  // Uses of induction variables in
+  BinaryOperator *OuterIncrement = nullptr;  // loop control statements that
+  BranchInst *InnerBranch = nullptr;         // are safe to ignore.
+
+  BranchInst *OuterBranch = nullptr; // The instruction that needs to be
+                                     // updated with new tripcount.
+
   SmallPtrSet<PHINode *, 4> InnerPHIsToTransform;
 
-  // Whether this holds the flatten info before or after widening.
-  bool Widened = false;
+  bool Widened = false; // Whether this holds the flatten info before or after
+                        // widening.
 
-  // Holds the old/narrow induction phis, i.e. the Phis before IV widening has
-  // been applied. This bookkeeping is used so we can skip some checks on these
-  // phi nodes.
-  PHINode *NarrowInnerInductionPHI = nullptr;
-  PHINode *NarrowOuterInductionPHI = nullptr;
+  PHINode *NarrowInnerInductionPHI = nullptr; // Holds the old/narrow induction
+  PHINode *NarrowOuterInductionPHI = nullptr; // phis, i.e. the Phis before IV
+                                              // has been apllied. Used to skip
+                                              // checks on phi nodes.
 
   FlattenInfo(Loop *OL, Loop *IL) : OuterLoop(OL), InnerLoop(IL){};
 

From ada6d78a7802f8057f1ab7cee0bed25f91fcc4b4 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Mon, 24 Jan 2022 12:54:16 +0000
Subject: [PATCH 381/946] [LoopFlatten] Address FIXME about
 getTripCountFromExitCount. NFC.

Together with the previous commit which mainly documents better LoopFlatten's
overall strategy, this addresses a concern added as a FIXME comment in D110587;
the code refactoring (NFC) introduces functions (also for the SCEV usage) to
make this clearer.
---
 llvm/lib/Transforms/Scalar/LoopFlatten.cpp | 351 ++++++++++++---------
 1 file changed, 199 insertions(+), 152 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index 4d9578934d9e6..c46db4e63bfee 100644
--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -149,6 +149,118 @@ struct FlattenInfo {
       return false;
     return NarrowInnerInductionPHI == Phi || NarrowOuterInductionPHI == Phi;
   }
+  bool isInnerLoopIncrement(User *U) {
+    return InnerIncrement == U;
+  }
+  bool isOuterLoopIncrement(User *U) {
+    return OuterIncrement == U;
+  }
+  bool isInnerLoopTest(User *U) {
+    return InnerBranch->getCondition() == U;
+  }
+
+  bool checkOuterInductionPhiUsers(SmallPtrSet<Value *, 4> &ValidOuterPHIUses) {
+    for (User *U : OuterInductionPHI->users()) {
+      if (isOuterLoopIncrement(U))
+        continue;
+
+      auto IsValidOuterPHIUses = [&] (User *U) -> bool {
+        LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump());
+        if (!ValidOuterPHIUses.count(U)) {
+          LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
+          return false;
+        }
+        LLVM_DEBUG(dbgs() << "Use is optimisable\n");
+        return true;
+      };
+
+      if (auto *V = dyn_cast<TruncInst>(U)) {
+        for (auto *K : V->users()) {
+          if (!IsValidOuterPHIUses(K))
+            return false;
+        }
+        continue;
+      }
+
+      if (!IsValidOuterPHIUses(U))
+        return false;
+    }
+    return true;
+  }
+
+  bool matchLinearIVUser(User *U, Value *InnerTripCount,
+                         SmallPtrSet<Value *, 4> &ValidOuterPHIUses) {
+    LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
+    Value *MatchedMul = nullptr;
+    Value *MatchedItCount = nullptr;
+
+    bool IsAdd = match(U, m_c_Add(m_Specific(InnerInductionPHI),
+                                  m_Value(MatchedMul))) &&
+                 match(MatchedMul, m_c_Mul(m_Specific(OuterInductionPHI),
+                                           m_Value(MatchedItCount)));
+
+    // Matches the same pattern as above, except it also looks for truncs
+    // on the phi, which can be the result of widening the induction variables.
+    bool IsAddTrunc =
+        match(U, m_c_Add(m_Trunc(m_Specific(InnerInductionPHI)),
+                         m_Value(MatchedMul))) &&
+        match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(OuterInductionPHI)),
+                                  m_Value(MatchedItCount)));
+
+    if (!MatchedItCount)
+      return false;
+
+    // Look through extends if the IV has been widened.
+    if (Widened &&
+        (isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
+      assert(MatchedItCount->getType() == InnerInductionPHI->getType() &&
+             "Unexpected type mismatch in types after widening");
+      MatchedItCount = isa<SExtInst>(MatchedItCount)
+                           ? dyn_cast<SExtInst>(MatchedItCount)->getOperand(0)
+                           : dyn_cast<ZExtInst>(MatchedItCount)->getOperand(0);
+    }
+
+    if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
+      LLVM_DEBUG(dbgs() << "Use is optimisable\n");
+      ValidOuterPHIUses.insert(MatchedMul);
+      LinearIVUses.insert(U);
+      return true;
+    }
+
+    LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
+    return false;
+  }
+
+  bool checkInnerInductionPhiUsers(SmallPtrSet<Value *, 4> &ValidOuterPHIUses) {
+    Value *SExtInnerTripCount = InnerTripCount;
+    if (Widened &&
+        (isa<SExtInst>(InnerTripCount) || isa<ZExtInst>(InnerTripCount)))
+      SExtInnerTripCount = cast<Instruction>(InnerTripCount)->getOperand(0);
+
+    for (User *U : InnerInductionPHI->users()) {
+      if (isInnerLoopIncrement(U))
+        continue;
+
+      // After widening the IVs, a trunc instruction might have been introduced,
+      // so look through truncs.
+      if (isa<TruncInst>(U)) {
+        if (!U->hasOneUse())
+          return false;
+        U = *U->user_begin();
+      }
+
+      // If the use is in the compare (which is also the condition of the inner
+      // branch) then the compare has been altered by another transformation e.g
+      // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
+      // a constant. Ignore this use as the compare gets removed later anyway.
+      if (isInnerLoopTest(U))
+        continue;
+
+      if (!matchLinearIVUser(U, SExtInnerTripCount, ValidOuterPHIUses))
+        return false;
+    }
+    return true;
+  }
 };
 
 static bool
@@ -162,6 +274,77 @@ setLoopComponents(Value *&TC, Value *&TripCount, BinaryOperator *&Increment,
   return true;
 }
 
+// Given the RHS of the loop latch compare instruction, verify with SCEV
+// that this is indeed the loop tripcount.
+// TODO: This used to be a straightforward check but has grown to be quite
+// complicated now. It is therefore worth revisiting what the additional
+// benefits are of this (compared to relying on canonical loops and pattern
+// matching).
+static bool verifyTripCount(Value *RHS, Loop *L,
+     SmallPtrSetImpl<Instruction *> &IterationInstructions,
+    PHINode *&InductionPHI, Value *&TripCount, BinaryOperator *&Increment,
+    BranchInst *&BackBranch, ScalarEvolution *SE, bool IsWidened) {
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
+    LLVM_DEBUG(dbgs() << "Backedge-taken count is not predictable\n");
+    return false;
+  }
+
+  // The Extend=false flag is used for getTripCountFromExitCount as we want
+  // to verify and match it with the pattern matched tripcount. Please note
+  // that overflow checks are performed in checkOverflow, but are first tried
+  // to avoid by widening the IV.
+  const SCEV *SCEVTripCount =
+      SE->getTripCountFromExitCount(BackedgeTakenCount, /*Extend=*/false);
+
+  const SCEV *SCEVRHS = SE->getSCEV(RHS);
+  if (SCEVRHS == SCEVTripCount)
+    return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
+  ConstantInt *ConstantRHS = dyn_cast<ConstantInt>(RHS);
+  if (ConstantRHS) {
+    const SCEV *BackedgeTCExt = nullptr;
+    if (IsWidened) {
+      const SCEV *SCEVTripCountExt;
+      // Find the extended backedge taken count and extended trip count using
+      // SCEV. One of these should now match the RHS of the compare.
+      BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
+      SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt, false);
+      if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
+        LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+        return false;
+      }
+    }
+    // If the RHS of the compare is equal to the backedge taken count we need
+    // to add one to get the trip count.
+    if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
+      ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
+      Value *NewRHS = ConstantInt::get(
+          ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
+      return setLoopComponents(NewRHS, TripCount, Increment,
+                               IterationInstructions);
+    }
+    return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
+  }
+  // If the RHS isn't a constant then check that the reason it doesn't match
+  // the SCEV trip count is because the RHS is a ZExt or SExt instruction
+  // (and take the trip count to be the RHS).
+  if (!IsWidened) {
+    LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+    return false;
+  }
+  auto *TripCountInst = dyn_cast<Instruction>(RHS);
+  if (!TripCountInst) {
+    LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
+    return false;
+  }
+  if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
+      SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
+    LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
+    return false;
+  }
+  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
+}
+
 // Finds the induction variable, increment and trip count for a simple loop that
 // we can flatten.
 static bool findLoopComponents(
@@ -238,63 +421,9 @@ static bool findLoopComponents(
   // another transformation has changed the compare (e.g. icmp ult %inc,
   // tripcount -> icmp ult %j, tripcount-1), or both.
   Value *RHS = Compare->getOperand(1);
-  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
-  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
-    LLVM_DEBUG(dbgs() << "Backedge-taken count is not predictable\n");
-    return false;
-  }
-  // The use of the Extend=false flag on getTripCountFromExitCount was added
-  // during a refactoring to preserve existing behavior.  However, there's
-  // nothing obvious in the surrounding code when handles the overflow case.
-  // FIXME: audit code to establish whether there's a latent bug here.
-  const SCEV *SCEVTripCount =
-      SE->getTripCountFromExitCount(BackedgeTakenCount, false);
-  const SCEV *SCEVRHS = SE->getSCEV(RHS);
-  if (SCEVRHS == SCEVTripCount)
-    return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
-  ConstantInt *ConstantRHS = dyn_cast<ConstantInt>(RHS);
-  if (ConstantRHS) {
-    const SCEV *BackedgeTCExt = nullptr;
-    if (IsWidened) {
-      const SCEV *SCEVTripCountExt;
-      // Find the extended backedge taken count and extended trip count using
-      // SCEV. One of these should now match the RHS of the compare.
-      BackedgeTCExt = SE->getZeroExtendExpr(BackedgeTakenCount, RHS->getType());
-      SCEVTripCountExt = SE->getTripCountFromExitCount(BackedgeTCExt, false);
-      if (SCEVRHS != BackedgeTCExt && SCEVRHS != SCEVTripCountExt) {
-        LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
-        return false;
-      }
-    }
-    // If the RHS of the compare is equal to the backedge taken count we need
-    // to add one to get the trip count.
-    if (SCEVRHS == BackedgeTCExt || SCEVRHS == BackedgeTakenCount) {
-      ConstantInt *One = ConstantInt::get(ConstantRHS->getType(), 1);
-      Value *NewRHS = ConstantInt::get(
-          ConstantRHS->getContext(), ConstantRHS->getValue() + One->getValue());
-      return setLoopComponents(NewRHS, TripCount, Increment,
-                               IterationInstructions);
-    }
-    return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
-  }
-  // If the RHS isn't a constant then check that the reason it doesn't match
-  // the SCEV trip count is because the RHS is a ZExt or SExt instruction
-  // (and take the trip count to be the RHS).
-  if (!IsWidened) {
-    LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
-    return false;
-  }
-  auto *TripCountInst = dyn_cast<Instruction>(RHS);
-  if (!TripCountInst) {
-    LLVM_DEBUG(dbgs() << "Could not find valid trip count\n");
-    return false;
-  }
-  if ((!isa<ZExtInst>(TripCountInst) && !isa<SExtInst>(TripCountInst)) ||
-      SE->getSCEV(TripCountInst->getOperand(0)) != SCEVTripCount) {
-    LLVM_DEBUG(dbgs() << "Could not find valid extended trip count\n");
-    return false;
-  }
-  return setLoopComponents(RHS, TripCount, Increment, IterationInstructions);
+
+  return verifyTripCount(RHS, L, IterationInstructions, InductionPHI, TripCount,
+                         Increment, BackBranch, SE, IsWidened);
 }
 
 static bool checkPHIs(FlattenInfo &FI, const TargetTransformInfo *TTI) {
@@ -440,108 +569,26 @@ checkOuterLoopInsts(FlattenInfo &FI,
   return true;
 }
 
-static bool checkIVUsers(FlattenInfo &FI) {
-  // We require all uses of both induction variables to match this pattern:
-  //
-  //   (OuterPHI * InnerTripCount) + InnerPHI
-  //
-  // Any uses of the induction variables not matching that pattern would
-  // require a div/mod to reconstruct in the flattened loop, so the
-  // transformation wouldn't be profitable.
-
-  Value *InnerTripCount = FI.InnerTripCount;
-  if (FI.Widened &&
-      (isa<SExtInst>(InnerTripCount) || isa<ZExtInst>(InnerTripCount)))
-    InnerTripCount = cast<Instruction>(InnerTripCount)->getOperand(0);
 
+
+// We require all uses of both induction variables to match this pattern:
+//
+//   (OuterPHI * InnerTripCount) + InnerPHI
+//
+// Any uses of the induction variables not matching that pattern would
+// require a div/mod to reconstruct in the flattened loop, so the
+// transformation wouldn't be profitable.
+static bool checkIVUsers(FlattenInfo &FI) {
   // Check that all uses of the inner loop's induction variable match the
   // expected pattern, recording the uses of the outer IV.
   SmallPtrSet<Value *, 4> ValidOuterPHIUses;
-  for (User *U : FI.InnerInductionPHI->users()) {
-    if (U == FI.InnerIncrement)
-      continue;
-
-    // After widening the IVs, a trunc instruction might have been introduced,
-    // so look through truncs.
-    if (isa<TruncInst>(U)) {
-      if (!U->hasOneUse())
-        return false;
-      U = *U->user_begin();
-    }
-
-    // If the use is in the compare (which is also the condition of the inner
-    // branch) then the compare has been altered by another transformation e.g
-    // icmp ult %inc, tripcount -> icmp ult %j, tripcount-1, where tripcount is
-    // a constant. Ignore this use as the compare gets removed later anyway.
-    if (U == FI.InnerBranch->getCondition())
-      continue;
-
-    LLVM_DEBUG(dbgs() << "Found use of inner induction variable: "; U->dump());
-
-    Value *MatchedMul = nullptr;
-    Value *MatchedItCount = nullptr;
-    bool IsAdd = match(U, m_c_Add(m_Specific(FI.InnerInductionPHI),
-                                  m_Value(MatchedMul))) &&
-                 match(MatchedMul, m_c_Mul(m_Specific(FI.OuterInductionPHI),
-                                           m_Value(MatchedItCount)));
-
-    // Matches the same pattern as above, except it also looks for truncs
-    // on the phi, which can be the result of widening the induction variables.
-    bool IsAddTrunc =
-        match(U, m_c_Add(m_Trunc(m_Specific(FI.InnerInductionPHI)),
-                         m_Value(MatchedMul))) &&
-        match(MatchedMul, m_c_Mul(m_Trunc(m_Specific(FI.OuterInductionPHI)),
-                                  m_Value(MatchedItCount)));
-
-    if (!MatchedItCount)
-      return false;
-    // Look through extends if the IV has been widened.
-    if (FI.Widened &&
-        (isa<SExtInst>(MatchedItCount) || isa<ZExtInst>(MatchedItCount))) {
-      assert(MatchedItCount->getType() == FI.InnerInductionPHI->getType() &&
-             "Unexpected type mismatch in types after widening");
-      MatchedItCount = isa<SExtInst>(MatchedItCount)
-                           ? dyn_cast<SExtInst>(MatchedItCount)->getOperand(0)
-                           : dyn_cast<ZExtInst>(MatchedItCount)->getOperand(0);
-    }
-
-    if ((IsAdd || IsAddTrunc) && MatchedItCount == InnerTripCount) {
-      LLVM_DEBUG(dbgs() << "Use is optimisable\n");
-      ValidOuterPHIUses.insert(MatchedMul);
-      FI.LinearIVUses.insert(U);
-    } else {
-      LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
-      return false;
-    }
-  }
+  if (!FI.checkInnerInductionPhiUsers(ValidOuterPHIUses))
+    return false;
 
   // Check that there are no uses of the outer IV other than the ones found
   // as part of the pattern above.
-  for (User *U : FI.OuterInductionPHI->users()) {
-    if (U == FI.OuterIncrement)
-      continue;
-
-    auto IsValidOuterPHIUses = [&] (User *U) -> bool {
-      LLVM_DEBUG(dbgs() << "Found use of outer induction variable: "; U->dump());
-      if (!ValidOuterPHIUses.count(U)) {
-        LLVM_DEBUG(dbgs() << "Did not match expected pattern, bailing\n");
-        return false;
-      }
-      LLVM_DEBUG(dbgs() << "Use is optimisable\n");
-      return true;
-    };
-
-    if (auto *V = dyn_cast<TruncInst>(U)) {
-      for (auto *K : V->users()) {
-        if (!IsValidOuterPHIUses(K))
-          return false;
-      }
-      continue;
-    }
-
-    if (!IsValidOuterPHIUses(U))
-      return false;
-  }
+  if (!FI.checkOuterInductionPhiUsers(ValidOuterPHIUses))
+    return false;
 
   LLVM_DEBUG(dbgs() << "checkIVUsers: OK\n";
              dbgs() << "Found " << FI.LinearIVUses.size()

From d42678b453bc2587a42eef1ba4e5782b2c8c5ff1 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser@codeplay.com>
Date: Fri, 21 Jan 2022 17:58:39 +0000
Subject: [PATCH 382/946] [RISCV] Add side-effect-free vsetvli intrinsics

This patch introduces new intrinsics that enable the use of vsetvli in
contexts where only the returned vector length is of interest. The
pre-existing intrinsics are marked with side-effects, which prevents
even trivial optimizations on/across them.

These intrinsics are intended to be used in situations where the vector
length is fed in turn to RVV intrinsics or to vector-predication
intrinsics during loop vectorization, for example. Those codegen paths
ensure that instructions are generated with their own implicit vsetvli,
so the vector length and vtype can be relied upon to be correct.

No corresponding C builtins are planned at this stage, though that is a
possibility for the future if the need arises.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117910
---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  16 +++
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp   | 121 +++++++++++-------
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h     |   2 +
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |   8 +-
 .../RISCV/rvv/rv32-vsetvli-intrinsics.ll      |  64 +++++++++
 .../RISCV/rvv/rv64-vsetvli-intrinsics.ll      |  64 +++++++++
 6 files changed, 226 insertions(+), 49 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index c332eb82a5906..99dd152fc0fc5 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -159,6 +159,22 @@ let TargetPrefix = "riscv" in {
                                        ImmArg<ArgIndex<0>>,
                                        ImmArg<ArgIndex<1>>]>;
 
+  // Versions without side effects: better optimizable and usable if only the
+  // returned vector length is important.
+  def int_riscv_vsetvli_opt   : Intrinsic<[llvm_anyint_ty],
+                               /* AVL */  [LLVMMatchType<0>,
+                               /* VSEW */  LLVMMatchType<0>,
+                               /* VLMUL */ LLVMMatchType<0>],
+                                          [IntrNoMem,
+                                           ImmArg<ArgIndex<1>>,
+                                           ImmArg<ArgIndex<2>>]>;
+  def int_riscv_vsetvlimax_opt : Intrinsic<[llvm_anyint_ty],
+                                /* VSEW */ [LLVMMatchType<0>,
+                                /* VLMUL */ LLVMMatchType<0>],
+                                          [IntrNoMem,
+                                           ImmArg<ArgIndex<0>>,
+                                           ImmArg<ArgIndex<1>>]>;
+
   // For unit stride load
   // Input: (pointer, vl)
   class RISCVUSLoad
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index e46aa6114fca2..df4e955ef583b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -495,6 +495,75 @@ void RISCVDAGToDAGISel::selectVSXSEG(SDNode *Node, bool IsMasked,
   ReplaceNode(Node, Store);
 }
 
+void RISCVDAGToDAGISel::selectVSETVLI(SDNode *Node) {
+  if (!Subtarget->hasVInstructions())
+    return;
+
+  assert((Node->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+          Node->getOpcode() == ISD::INTRINSIC_WO_CHAIN) &&
+         "Unexpected opcode");
+
+  SDLoc DL(Node);
+  MVT XLenVT = Subtarget->getXLenVT();
+
+  bool HasChain = Node->getOpcode() == ISD::INTRINSIC_W_CHAIN;
+  unsigned IntNoOffset = HasChain ? 1 : 0;
+  unsigned IntNo = Node->getConstantOperandVal(IntNoOffset);
+
+  assert((IntNo == Intrinsic::riscv_vsetvli ||
+          IntNo == Intrinsic::riscv_vsetvlimax ||
+          IntNo == Intrinsic::riscv_vsetvli_opt ||
+          IntNo == Intrinsic::riscv_vsetvlimax_opt) &&
+         "Unexpected vsetvli intrinsic");
+
+  bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax ||
+               IntNo == Intrinsic::riscv_vsetvlimax_opt;
+  unsigned Offset = IntNoOffset + (VLMax ? 1 : 2);
+
+  assert(Node->getNumOperands() == Offset + 2 &&
+         "Unexpected number of operands");
+
+  unsigned SEW =
+      RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
+  RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
+      Node->getConstantOperandVal(Offset + 1) & 0x7);
+
+  unsigned VTypeI = RISCVVType::encodeVTYPE(VLMul, SEW, /*TailAgnostic*/ true,
+                                            /*MaskAgnostic*/ false);
+  SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
+
+  SmallVector<EVT, 2> VTs = {XLenVT};
+  if (HasChain)
+    VTs.push_back(MVT::Other);
+
+  SDValue VLOperand;
+  unsigned Opcode = RISCV::PseudoVSETVLI;
+  if (VLMax) {
+    VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
+    Opcode = RISCV::PseudoVSETVLIX0;
+  } else {
+    VLOperand = Node->getOperand(IntNoOffset + 1);
+
+    if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
+      uint64_t AVL = C->getZExtValue();
+      if (isUInt<5>(AVL)) {
+        SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
+        SmallVector<SDValue, 3> Ops = {VLImm, VTypeIOp};
+        if (HasChain)
+          Ops.push_back(Node->getOperand(0));
+        ReplaceNode(
+            Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, VTs, Ops));
+        return;
+      }
+    }
+  }
+
+  SmallVector<SDValue, 3> Ops = {VLOperand, VTypeIOp};
+  if (HasChain)
+    Ops.push_back(Node->getOperand(0));
+
+  ReplaceNode(Node, CurDAG->getMachineNode(Opcode, DL, VTs, Ops));
+}
 
 void RISCVDAGToDAGISel::Select(SDNode *Node) {
   // If we have a custom node, we have already selected.
@@ -1017,6 +1086,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
                                                {Cmp, Mask, VL, MaskSEW}));
       return;
     }
+    case Intrinsic::riscv_vsetvli_opt:
+    case Intrinsic::riscv_vsetvlimax_opt:
+      return selectVSETVLI(Node);
     }
     break;
   }
@@ -1026,54 +1098,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
       // By default we do not custom select any intrinsic.
     default:
       break;
-
     case Intrinsic::riscv_vsetvli:
-    case Intrinsic::riscv_vsetvlimax: {
-      if (!Subtarget->hasVInstructions())
-        break;
-
-      bool VLMax = IntNo == Intrinsic::riscv_vsetvlimax;
-      unsigned Offset = VLMax ? 2 : 3;
-
-      assert(Node->getNumOperands() == Offset + 2 &&
-             "Unexpected number of operands");
-
-      unsigned SEW =
-          RISCVVType::decodeVSEW(Node->getConstantOperandVal(Offset) & 0x7);
-      RISCVII::VLMUL VLMul = static_cast<RISCVII::VLMUL>(
-          Node->getConstantOperandVal(Offset + 1) & 0x7);
-
-      unsigned VTypeI = RISCVVType::encodeVTYPE(
-          VLMul, SEW, /*TailAgnostic*/ true, /*MaskAgnostic*/ false);
-      SDValue VTypeIOp = CurDAG->getTargetConstant(VTypeI, DL, XLenVT);
-
-      SDValue VLOperand;
-      unsigned Opcode = RISCV::PseudoVSETVLI;
-      if (VLMax) {
-        VLOperand = CurDAG->getRegister(RISCV::X0, XLenVT);
-        Opcode = RISCV::PseudoVSETVLIX0;
-      } else {
-        VLOperand = Node->getOperand(2);
-
-        if (auto *C = dyn_cast<ConstantSDNode>(VLOperand)) {
-          uint64_t AVL = C->getZExtValue();
-          if (isUInt<5>(AVL)) {
-            SDValue VLImm = CurDAG->getTargetConstant(AVL, DL, XLenVT);
-            ReplaceNode(
-                Node, CurDAG->getMachineNode(RISCV::PseudoVSETIVLI, DL, XLenVT,
-                                             MVT::Other, VLImm, VTypeIOp,
-                                             /* Chain */ Node->getOperand(0)));
-            return;
-          }
-        }
-      }
-
-      ReplaceNode(Node,
-                  CurDAG->getMachineNode(Opcode, DL, XLenVT,
-                                         MVT::Other, VLOperand, VTypeIOp,
-                                         /* Chain */ Node->getOperand(0)));
-      return;
-    }
+    case Intrinsic::riscv_vsetvlimax:
+      return selectVSETVLI(Node);
     case Intrinsic::riscv_vlseg2:
     case Intrinsic::riscv_vlseg3:
     case Intrinsic::riscv_vlseg4:
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index a2770089995d8..f4d6fdddca390 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -87,6 +87,8 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
   void selectVSSEG(SDNode *Node, bool IsMasked, bool IsStrided);
   void selectVSXSEG(SDNode *Node, bool IsMasked, bool IsOrdered);
 
+  void selectVSETVLI(SDNode *Node);
+
   // Return the RISC-V condition code that matches the given DAG integer
   // condition code. The CondCode must be one of those supported by the RISC-V
   // ISA (see translateSetCCForBranch).
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7d224e3968545..f7e4e36a20d15 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8162,14 +8162,18 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     // We assume VLENB is no more than 65536 / 8 bytes.
     Known.Zero.setBitsFrom(14);
     break;
-  case ISD::INTRINSIC_W_CHAIN: {
-    unsigned IntNo = Op.getConstantOperandVal(1);
+  case ISD::INTRINSIC_W_CHAIN:
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntNo =
+        Op.getConstantOperandVal(Opc == ISD::INTRINSIC_WO_CHAIN ? 0 : 1);
     switch (IntNo) {
     default:
       // We can't do anything for most intrinsics.
       break;
     case Intrinsic::riscv_vsetvli:
     case Intrinsic::riscv_vsetvlimax:
+    case Intrinsic::riscv_vsetvli_opt:
+    case Intrinsic::riscv_vsetvlimax_opt:
       // Assume that VL output is positive and would fit in an int32_t.
       // TODO: VLEN might be capped at 16 bits in a future V spec update.
       if (BitWidth >= 32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
index 081743b31b701..1c3c219c13041 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
@@ -3,6 +3,8 @@
 
 declare i32 @llvm.riscv.vsetvli.i32(i32, i32, i32)
 declare i32 @llvm.riscv.vsetvlimax.i32(i32, i32)
+declare i32 @llvm.riscv.vsetvli.opt.i32(i32, i32, i32)
+declare i32 @llvm.riscv.vsetvlimax.opt.i32(i32, i32)
 
 define void @test_vsetvli_e64mf8(i32 %avl) nounwind {
 ; CHECK-LABEL: test_vsetvli_e64mf8:
@@ -31,6 +33,68 @@ define void @test_vsetvlimax_e64m8() nounwind {
   ret void
 }
 
+define i32 @test_vsetvli_opt_e8m1(i32 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e8m1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, m1, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 0, i32 0)
+  ret i32 %vl
+}
+
+; Check that we remove the intrinsic if it's unused.
+define void @test_vsetvli_opt_e8m1_nouse(i32 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e8m1_nouse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 0, i32 0)
+  ret void
+}
+
+define i32 @test_vsetvli_opt_e16mf4(i32 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e16mf4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 %avl, i32 1, i32 6)
+  ret i32 %vl
+}
+
+define i32 @test_vsetvli_opt_e32mf8_zero_avl() nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e32mf8_zero_avl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a0, 0, e16, mf4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvli.opt.i32(i32 0, i32 1, i32 6)
+  ret i32 %vl
+}
+
+define i32 @test_vsetvlimax_opt_e32m2() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e32m2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 2, i32 1)
+  ret i32 %vl
+}
+
+define void @test_vsetvlimax_opt_e32m2_nouse() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e32m2_nouse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 2, i32 1)
+  ret void
+}
+
+define i32 @test_vsetvlimax_opt_e64m4() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e64m4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i32 @llvm.riscv.vsetvlimax.opt.i32(i32 3, i32 2)
+  ret i32 %vl
+}
+
 declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32>*, i32)
 
 ; Check that we remove the redundant vsetvli when followed by another operation
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
index 2b745cb5eddaa..26c3aeeba38fb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
@@ -3,6 +3,8 @@
 
 declare i64 @llvm.riscv.vsetvli.i64(i64, i64, i64)
 declare i64 @llvm.riscv.vsetvlimax.i64(i64, i64)
+declare i64 @llvm.riscv.vsetvli.opt.i64(i64, i64, i64)
+declare i64 @llvm.riscv.vsetvlimax.opt.i64(i64, i64)
 
 define void @test_vsetvli_e8m1(i64 %avl) nounwind {
 ; CHECK-LABEL: test_vsetvli_e8m1:
@@ -49,6 +51,68 @@ define void @test_vsetvlimax_e64m4() nounwind {
   ret void
 }
 
+define i64 @test_vsetvli_opt_e8m1(i64 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e8m1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e8, m1, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 0, i64 0)
+  ret i64 %vl
+}
+
+; Check that we remove the intrinsic if it's unused.
+define void @test_vsetvli_opt_e8m1_nouse(i64 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e8m1_nouse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 0, i64 0)
+  ret void
+}
+
+define i64 @test_vsetvli_opt_e16mf4(i64 %avl) nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e16mf4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, a0, e16, mf4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 %avl, i64 1, i64 6)
+  ret i64 %vl
+}
+
+define i64 @test_vsetvli_opt_e32mf8_zero_avl() nounwind {
+; CHECK-LABEL: test_vsetvli_opt_e32mf8_zero_avl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli a0, 0, e16, mf4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvli.opt.i64(i64 0, i64 1, i64 6)
+  ret i64 %vl
+}
+
+define i64 @test_vsetvlimax_opt_e32m2() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e32m2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 2, i64 1)
+  ret i64 %vl
+}
+
+define void @test_vsetvlimax_opt_e32m2_nouse() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e32m2_nouse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 2, i64 1)
+  ret void
+}
+
+define i64 @test_vsetvlimax_opt_e64m4() nounwind {
+; CHECK-LABEL: test_vsetvlimax_opt_e64m4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; CHECK-NEXT:    ret
+  %vl = call i64 @llvm.riscv.vsetvlimax.opt.i64(i64 3, i64 2)
+  ret i64 %vl
+}
+
 declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>*, i64)
 
 ; Check that we remove the redundant vsetvli when followed by another operation

From 25e8f5f827bfbc8aed24c9db116c4869f437fb9d Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Mon, 24 Jan 2022 15:02:44 +0100
Subject: [PATCH 383/946] Add missing STLExtras.h include from
 lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp

---
 lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp b/lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp
index 2e103b9e61f1f..b34e01bd01a05 100644
--- a/lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp
+++ b/lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "MockTildeExpressionResolver.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Path.h"
 
 using namespace lldb_private;

From f7079bf9ee68aa46c6eef5836279902a2fd1fe50 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 24 Jan 2022 13:36:22 +0000
Subject: [PATCH 384/946] [X86] Fix v8i8 -> v8i16 typo in bool reductions

We were supposed to be testing <8 x i16> reductions
---
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 66 ++++++++-----------
 .../test/CodeGen/X86/vector-reduce-or-bool.ll | 65 ++++++++----------
 .../CodeGen/X86/vector-reduce-xor-bool.ll     | 66 ++++++++-----------
 3 files changed, 83 insertions(+), 114 deletions(-)

diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 068148d69498d..7db9ad872326d 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -111,30 +111,18 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @trunc_v8i16_v8i1(<8 x i8>) {
-; SSE2-LABEL: trunc_v8i16_v8i1:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psllw $15, %xmm0
-; SSE2-NEXT:    packsswb %xmm0, %xmm0
-; SSE2-NEXT:    pmovmskb %xmm0, %eax
-; SSE2-NEXT:    cmpb $-1, %al
-; SSE2-NEXT:    sete %al
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: trunc_v8i16_v8i1:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    psllw $15, %xmm0
-; SSE41-NEXT:    packsswb %xmm0, %xmm0
-; SSE41-NEXT:    pmovmskb %xmm0, %eax
-; SSE41-NEXT:    cmpb $-1, %al
-; SSE41-NEXT:    sete %al
-; SSE41-NEXT:    retq
+define i1 @trunc_v8i16_v8i1(<8 x i16>) {
+; SSE-LABEL: trunc_v8i16_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    psllw $15, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpb $-1, %al
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: trunc_v8i16_v8i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX-NEXT:    vpsllw $15, %xmm0, %xmm0
 ; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vpmovmskb %xmm0, %eax
@@ -144,9 +132,9 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512F-LABEL: trunc_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    cmpb $-1, %al
 ; AVX512F-NEXT:    sete %al
@@ -155,8 +143,8 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512BW-LABEL: trunc_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
+; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    cmpb $-1, %al
 ; AVX512BW-NEXT:    sete %al
@@ -165,13 +153,13 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512VL-LABEL: trunc_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512VL-NEXT:    vpmovb2m %xmm0, %k0
+; AVX512VL-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovw2m %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
 ; AVX512VL-NEXT:    cmpb $-1, %al
 ; AVX512VL-NEXT:    sete %al
 ; AVX512VL-NEXT:    retq
-  %a = trunc <8 x i8> %0 to <8 x i1>
+  %a = trunc <8 x i16> %0 to <8 x i1>
   %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
   ret i1 %b
 }
@@ -949,11 +937,12 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i16_v8i1(<8 x i8>) {
+define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; SSE-LABEL: icmp_v8i16_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
-; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
+; SSE-NEXT:    packsswb %xmm1, %xmm1
 ; SSE-NEXT:    pmovmskb %xmm1, %eax
 ; SSE-NEXT:    cmpb $-1, %al
 ; SSE-NEXT:    sete %al
@@ -962,7 +951,8 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX-LABEL: icmp_v8i16_v8i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX-NEXT:    cmpb $-1, %al
 ; AVX-NEXT:    sete %al
@@ -971,9 +961,9 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX512F-LABEL: icmp_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    cmpb $-1, %al
 ; AVX512F-NEXT:    sete %al
@@ -983,7 +973,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX512BW-LABEL: icmp_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    cmpb $-1, %al
 ; AVX512BW-NEXT:    sete %al
@@ -992,12 +982,12 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512VL-LABEL: icmp_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vptestnmb %xmm0, %xmm0, %k0
+; AVX512VL-NEXT:    vptestnmw %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
 ; AVX512VL-NEXT:    cmpb $-1, %al
 ; AVX512VL-NEXT:    sete %al
 ; AVX512VL-NEXT:    retq
-  %a = icmp eq <8 x i8> %0, zeroinitializer
+  %a = icmp eq <8 x i16> %0, zeroinitializer
   %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
   ret i1 %b
 }
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
index 6269cd4df1119..b06822b93fd95 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
@@ -111,28 +111,17 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @trunc_v8i16_v8i1(<8 x i8>) {
-; SSE2-LABEL: trunc_v8i16_v8i1:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psllw $15, %xmm0
-; SSE2-NEXT:    pmovmskb %xmm0, %eax
-; SSE2-NEXT:    testl $43690, %eax # imm = 0xAAAA
-; SSE2-NEXT:    setne %al
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: trunc_v8i16_v8i1:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    psllw $15, %xmm0
-; SSE41-NEXT:    pmovmskb %xmm0, %eax
-; SSE41-NEXT:    testl $43690, %eax # imm = 0xAAAA
-; SSE41-NEXT:    setne %al
-; SSE41-NEXT:    retq
+define i1 @trunc_v8i16_v8i1(<8 x i16>) {
+; SSE-LABEL: trunc_v8i16_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    psllw $15, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl $43690, %eax # imm = 0xAAAA
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: trunc_v8i16_v8i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX-NEXT:    vpsllw $15, %xmm0, %xmm0
 ; AVX-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX-NEXT:    testl $43690, %eax # imm = 0xAAAA
@@ -141,9 +130,9 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512F-LABEL: trunc_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb %al, %al
 ; AVX512F-NEXT:    setne %al
@@ -152,8 +141,8 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512BW-LABEL: trunc_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
+; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    testb %al, %al
 ; AVX512BW-NEXT:    setne %al
@@ -162,13 +151,13 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512VL-LABEL: trunc_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512VL-NEXT:    vpmovb2m %xmm0, %k0
+; AVX512VL-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovw2m %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
 ; AVX512VL-NEXT:    testb %al, %al
 ; AVX512VL-NEXT:    setne %al
 ; AVX512VL-NEXT:    retq
-  %a = trunc <8 x i8> %0 to <8 x i1>
+  %a = trunc <8 x i16> %0 to <8 x i1>
   %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
   ret i1 %b
 }
@@ -947,31 +936,31 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i16_v8i1(<8 x i8>) {
+define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; SSE-LABEL: icmp_v8i16_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
-; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
 ; SSE-NEXT:    pmovmskb %xmm1, %eax
-; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    testl %eax, %eax
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: icmp_v8i16_v8i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vpmovmskb %xmm0, %eax
-; AVX-NEXT:    testb %al, %al
+; AVX-NEXT:    testl %eax, %eax
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
 ; AVX512F-LABEL: icmp_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb %al, %al
 ; AVX512F-NEXT:    setne %al
@@ -981,7 +970,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX512BW-LABEL: icmp_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    testb %al, %al
 ; AVX512BW-NEXT:    setne %al
@@ -990,12 +979,12 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512VL-LABEL: icmp_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vptestnmb %xmm0, %xmm0, %k0
+; AVX512VL-NEXT:    vptestnmw %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
 ; AVX512VL-NEXT:    testb %al, %al
 ; AVX512VL-NEXT:    setne %al
 ; AVX512VL-NEXT:    retq
-  %a = icmp eq <8 x i8> %0, zeroinitializer
+  %a = icmp eq <8 x i16> %0, zeroinitializer
   %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
   ret i1 %b
 }
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
index 7ef49de025531..0d0accd3f1f18 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
@@ -111,30 +111,18 @@ define i1 @trunc_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @trunc_v8i16_v8i1(<8 x i8>) {
-; SSE2-LABEL: trunc_v8i16_v8i1:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    psllw $15, %xmm0
-; SSE2-NEXT:    packsswb %xmm0, %xmm0
-; SSE2-NEXT:    pmovmskb %xmm0, %eax
-; SSE2-NEXT:    testb %al, %al
-; SSE2-NEXT:    setnp %al
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: trunc_v8i16_v8i1:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; SSE41-NEXT:    psllw $15, %xmm0
-; SSE41-NEXT:    packsswb %xmm0, %xmm0
-; SSE41-NEXT:    pmovmskb %xmm0, %eax
-; SSE41-NEXT:    testb %al, %al
-; SSE41-NEXT:    setnp %al
-; SSE41-NEXT:    retq
+define i1 @trunc_v8i16_v8i1(<8 x i16>) {
+; SSE-LABEL: trunc_v8i16_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    psllw $15, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: trunc_v8i16_v8i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX-NEXT:    vpsllw $15, %xmm0, %xmm0
 ; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vpmovmskb %xmm0, %eax
@@ -144,9 +132,9 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512F-LABEL: trunc_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb %al, %al
 ; AVX512F-NEXT:    setnp %al
@@ -155,8 +143,8 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512BW-LABEL: trunc_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
-; AVX512BW-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512BW-NEXT:    vpmovb2m %zmm0, %k0
+; AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    testb %al, %al
 ; AVX512BW-NEXT:    setnp %al
@@ -165,13 +153,13 @@ define i1 @trunc_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512VL-LABEL: trunc_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX512VL-NEXT:    vpmovb2m %xmm0, %k0
+; AVX512VL-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX512VL-NEXT:    vpmovw2m %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
 ; AVX512VL-NEXT:    testb %al, %al
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
-  %a = trunc <8 x i8> %0 to <8 x i1>
+  %a = trunc <8 x i16> %0 to <8 x i1>
   %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
   ret i1 %b
 }
@@ -1027,11 +1015,12 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i16_v8i1(<8 x i8>) {
+define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; SSE-LABEL: icmp_v8i16_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
-; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
+; SSE-NEXT:    packsswb %xmm1, %xmm1
 ; SSE-NEXT:    pmovmskb %xmm1, %eax
 ; SSE-NEXT:    testb %al, %al
 ; SSE-NEXT:    setnp %al
@@ -1040,7 +1029,8 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX-LABEL: icmp_v8i16_v8i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
 ; AVX-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX-NEXT:    testb %al, %al
 ; AVX-NEXT:    setnp %al
@@ -1049,9 +1039,9 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX512F-LABEL: icmp_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    testb %al, %al
 ; AVX512F-NEXT:    setnp %al
@@ -1061,7 +1051,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ; AVX512BW-LABEL: icmp_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
+; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    testb %al, %al
 ; AVX512BW-NEXT:    setnp %al
@@ -1070,12 +1060,12 @@ define i1 @icmp_v8i16_v8i1(<8 x i8>) {
 ;
 ; AVX512VL-LABEL: icmp_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vptestnmb %xmm0, %xmm0, %k0
+; AVX512VL-NEXT:    vptestnmw %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
 ; AVX512VL-NEXT:    testb %al, %al
 ; AVX512VL-NEXT:    setnp %al
 ; AVX512VL-NEXT:    retq
-  %a = icmp eq <8 x i8> %0, zeroinitializer
+  %a = icmp eq <8 x i16> %0, zeroinitializer
   %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
   ret i1 %b
 }

From 4436d4cd7c86ed544b1184db2eec691b38c9c77b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 24 Jan 2022 13:44:08 +0000
Subject: [PATCH 385/946] [X86] Rename cmp-with-zero bool reductions

Explicitly name them icmp0_* - I'm intending to add PR53379 test coverage shortly
---
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 172 +++++++++---------
 .../test/CodeGen/X86/vector-reduce-or-bool.ll | 164 ++++++++---------
 .../CodeGen/X86/vector-reduce-xor-bool.ll     | 168 ++++++++---------
 3 files changed, 252 insertions(+), 252 deletions(-)

diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 7db9ad872326d..c1ddc9454b939 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -826,11 +826,11 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) {
 }
 
 ;
-; Comparison
+; Comparison With Zero
 ;
 
-define i1 @icmp_v2i64_v2i1(<2 x i64>) {
-; SSE2-LABEL: icmp_v2i64_v2i1:
+define i1 @icmp0_v2i64_v2i1(<2 x i64>) {
+; SSE2-LABEL: icmp0_v2i64_v2i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
@@ -839,19 +839,19 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v2i64_v2i1:
+; SSE41-LABEL: icmp0_v2i64_v2i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    ptest %xmm0, %xmm0
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v2i64_v2i1:
+; AVX-LABEL: icmp0_v2i64_v2i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v2i64_v2i1:
+; AVX512F-LABEL: icmp0_v2i64_v2i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -861,7 +861,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v2i64_v2i1:
+; AVX512BW-LABEL: icmp0_v2i64_v2i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -871,7 +871,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v2i64_v2i1:
+; AVX512VL-LABEL: icmp0_v2i64_v2i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -883,8 +883,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v4i32_v4i1(<4 x i32>) {
-; SSE2-LABEL: icmp_v4i32_v4i1:
+define i1 @icmp0_v4i32_v4i1(<4 x i32>) {
+; SSE2-LABEL: icmp0_v4i32_v4i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
@@ -893,19 +893,19 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v4i32_v4i1:
+; SSE41-LABEL: icmp0_v4i32_v4i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    ptest %xmm0, %xmm0
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v4i32_v4i1:
+; AVX-LABEL: icmp0_v4i32_v4i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v4i32_v4i1:
+; AVX512F-LABEL: icmp0_v4i32_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -915,7 +915,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v4i32_v4i1:
+; AVX512BW-LABEL: icmp0_v4i32_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -925,7 +925,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v4i32_v4i1:
+; AVX512VL-LABEL: icmp0_v4i32_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -937,8 +937,8 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i16_v8i1(<8 x i16>) {
-; SSE-LABEL: icmp_v8i16_v8i1:
+define i1 @icmp0_v8i16_v8i1(<8 x i16>) {
+; SSE-LABEL: icmp0_v8i16_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
@@ -948,7 +948,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v8i16_v8i1:
+; AVX-LABEL: icmp0_v8i16_v8i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -958,7 +958,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i16_v8i1:
+; AVX512F-LABEL: icmp0_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -970,7 +970,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i16_v8i1:
+; AVX512BW-LABEL: icmp0_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
@@ -980,7 +980,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i16_v8i1:
+; AVX512VL-LABEL: icmp0_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -992,8 +992,8 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i8_v16i1(<16 x i8>) {
-; SSE2-LABEL: icmp_v16i8_v16i1:
+define i1 @icmp0_v16i8_v16i1(<16 x i8>) {
+; SSE2-LABEL: icmp0_v16i8_v16i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
@@ -1002,25 +1002,25 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v16i8_v16i1:
+; SSE41-LABEL: icmp0_v16i8_v16i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    ptest %xmm0, %xmm0
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v16i8_v16i1:
+; AVX-LABEL: icmp0_v16i8_v16i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i8_v16i1:
+; AVX512F-LABEL: icmp0_v16i8_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vptest %xmm0, %xmm0
 ; AVX512F-NEXT:    sete %al
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i8_v16i1:
+; AVX512BW-LABEL: icmp0_v16i8_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
@@ -1029,7 +1029,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i8_v16i1:
+; AVX512VL-LABEL: icmp0_v16i8_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kortestw %k0, %k0
@@ -1040,8 +1040,8 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
   ret i1 %b
 }
 
-define i1 @icmp_v4i64_v4i1(<4 x i64>) {
-; SSE2-LABEL: icmp_v4i64_v4i1:
+define i1 @icmp0_v4i64_v4i1(<4 x i64>) {
+; SSE2-LABEL: icmp0_v4i64_v4i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
@@ -1053,7 +1053,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v4i64_v4i1:
+; SSE41-LABEL: icmp0_v4i64_v4i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm2, %xmm2
 ; SSE41-NEXT:    pcmpeqq %xmm2, %xmm1
@@ -1064,7 +1064,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v4i64_v4i1:
+; AVX1-LABEL: icmp0_v4i64_v4i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1077,14 +1077,14 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v4i64_v4i1:
+; AVX2-LABEL: icmp0_v4i64_v4i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v4i64_v4i1:
+; AVX512F-LABEL: icmp0_v4i64_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -1094,7 +1094,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v4i64_v4i1:
+; AVX512BW-LABEL: icmp0_v4i64_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
@@ -1104,7 +1104,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v4i64_v4i1:
+; AVX512VL-LABEL: icmp0_v4i64_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1117,8 +1117,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i32_v8i1(<8 x i32>) {
-; SSE-LABEL: icmp_v8i32_v8i1:
+define i1 @icmp0_v8i32_v8i1(<8 x i32>) {
+; SSE-LABEL: icmp0_v8i32_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqd %xmm2, %xmm1
@@ -1130,7 +1130,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v8i32_v8i1:
+; AVX1-LABEL: icmp0_v8i32_v8i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1143,14 +1143,14 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v8i32_v8i1:
+; AVX2-LABEL: icmp0_v8i32_v8i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i32_v8i1:
+; AVX512F-LABEL: icmp0_v8i32_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1160,7 +1160,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i32_v8i1:
+; AVX512BW-LABEL: icmp0_v8i32_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1170,7 +1170,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i32_v8i1:
+; AVX512VL-LABEL: icmp0_v8i32_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1183,8 +1183,8 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i16_v16i1(<16 x i16>) {
-; SSE-LABEL: icmp_v16i16_v16i1:
+define i1 @icmp0_v16i16_v16i1(<16 x i16>) {
+; SSE-LABEL: icmp0_v16i16_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
@@ -1195,7 +1195,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v16i16_v16i1:
+; AVX1-LABEL: icmp0_v16i16_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1208,7 +1208,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v16i16_v16i1:
+; AVX2-LABEL: icmp0_v16i16_v16i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
@@ -1218,7 +1218,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i16_v16i1:
+; AVX512F-LABEL: icmp0_v16i16_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
@@ -1229,7 +1229,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i16_v16i1:
+; AVX512BW-LABEL: icmp0_v16i16_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
@@ -1238,7 +1238,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i16_v16i1:
+; AVX512VL-LABEL: icmp0_v16i16_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kortestw %k0, %k0
@@ -1250,8 +1250,8 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v32i8_v32i1(<32 x i8>) {
-; SSE2-LABEL: icmp_v32i8_v32i1:
+define i1 @icmp0_v32i8_v32i1(<32 x i8>) {
+; SSE2-LABEL: icmp0_v32i8_v32i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
@@ -1261,14 +1261,14 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v32i8_v32i1:
+; SSE41-LABEL: icmp0_v32i8_v32i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    por %xmm1, %xmm0
 ; SSE41-NEXT:    ptest %xmm0, %xmm0
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v32i8_v32i1:
+; AVX1-LABEL: icmp0_v32i8_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1281,14 +1281,14 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v32i8_v32i1:
+; AVX2-LABEL: icmp0_v32i8_v32i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v32i8_v32i1:
+; AVX512F-LABEL: icmp0_v32i8_v32i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -1309,7 +1309,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v32i8_v32i1:
+; AVX512BW-LABEL: icmp0_v32i8_v32i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
@@ -1318,7 +1318,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v32i8_v32i1:
+; AVX512VL-LABEL: icmp0_v32i8_v32i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kortestd %k0, %k0
@@ -1330,8 +1330,8 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i64_v8i1(<8 x i64>) {
-; SSE2-LABEL: icmp_v8i64_v8i1:
+define i1 @icmp0_v8i64_v8i1(<8 x i64>) {
+; SSE2-LABEL: icmp0_v8i64_v8i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqd %xmm4, %xmm3
@@ -1346,7 +1346,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v8i64_v8i1:
+; SSE41-LABEL: icmp0_v8i64_v8i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm4, %xmm4
 ; SSE41-NEXT:    pcmpeqq %xmm4, %xmm3
@@ -1362,7 +1362,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v8i64_v8i1:
+; AVX1-LABEL: icmp0_v8i64_v8i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1380,7 +1380,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v8i64_v8i1:
+; AVX2-LABEL: icmp0_v8i64_v8i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
@@ -1392,7 +1392,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i64_v8i1:
+; AVX512F-LABEL: icmp0_v8i64_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
@@ -1401,7 +1401,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i64_v8i1:
+; AVX512BW-LABEL: icmp0_v8i64_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
@@ -1410,7 +1410,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i64_v8i1:
+; AVX512VL-LABEL: icmp0_v8i64_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1423,8 +1423,8 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i32_v16i1(<16 x i32>) {
-; SSE-LABEL: icmp_v16i32_v16i1:
+define i1 @icmp0_v16i32_v16i1(<16 x i32>) {
+; SSE-LABEL: icmp0_v16i32_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqd %xmm4, %xmm3
@@ -1439,7 +1439,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v16i32_v16i1:
+; AVX1-LABEL: icmp0_v16i32_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1457,7 +1457,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v16i32_v16i1:
+; AVX2-LABEL: icmp0_v16i32_v16i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
@@ -1471,7 +1471,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: icmp_v16i32_v16i1:
+; AVX512-LABEL: icmp0_v16i32_v16i1:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512-NEXT:    kortestw %k0, %k0
@@ -1483,8 +1483,8 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v32i16_v32i1(<32 x i16>) {
-; SSE-LABEL: icmp_v32i16_v32i1:
+define i1 @icmp0_v32i16_v32i1(<32 x i16>) {
+; SSE-LABEL: icmp0_v32i16_v32i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqw %xmm4, %xmm1
@@ -1499,7 +1499,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v32i16_v32i1:
+; AVX1-LABEL: icmp0_v32i16_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1517,7 +1517,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v32i16_v32i1:
+; AVX2-LABEL: icmp0_v32i16_v32i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm1, %ymm1
@@ -1529,7 +1529,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v32i16_v32i1:
+; AVX512F-LABEL: icmp0_v32i16_v32i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1551,7 +1551,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v32i16_v32i1:
+; AVX512BW-LABEL: icmp0_v32i16_v32i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kortestd %k0, %k0
@@ -1559,7 +1559,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v32i16_v32i1:
+; AVX512VL-LABEL: icmp0_v32i16_v32i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kortestd %k0, %k0
@@ -1571,8 +1571,8 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v64i8_v64i1(<64 x i8>) {
-; SSE2-LABEL: icmp_v64i8_v64i1:
+define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
+; SSE2-LABEL: icmp0_v64i8_v64i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    por %xmm3, %xmm1
 ; SSE2-NEXT:    pxor %xmm3, %xmm3
@@ -1584,7 +1584,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v64i8_v64i1:
+; SSE41-LABEL: icmp0_v64i8_v64i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    por %xmm3, %xmm1
 ; SSE41-NEXT:    por %xmm2, %xmm1
@@ -1593,7 +1593,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v64i8_v64i1:
+; AVX1-LABEL: icmp0_v64i8_v64i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
@@ -1607,7 +1607,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v64i8_v64i1:
+; AVX2-LABEL: icmp0_v64i8_v64i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vptest %ymm0, %ymm0
@@ -1615,7 +1615,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v64i8_v64i1:
+; AVX512F-LABEL: icmp0_v64i8_v64i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1642,7 +1642,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v64i8_v64i1:
+; AVX512BW-LABEL: icmp0_v64i8_v64i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kortestq %k0, %k0
@@ -1650,7 +1650,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v64i8_v64i1:
+; AVX512VL-LABEL: icmp0_v64i8_v64i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kortestq %k0, %k0
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
index b06822b93fd95..b73ff78330c7b 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
@@ -820,11 +820,11 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) {
 }
 
 ;
-; Comparison
+; Comparison With Zero
 ;
 
-define i1 @icmp_v2i64_v2i1(<2 x i64>) {
-; SSE2-LABEL: icmp_v2i64_v2i1:
+define i1 @icmp0_v2i64_v2i1(<2 x i64>) {
+; SSE2-LABEL: icmp0_v2i64_v2i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
@@ -835,7 +835,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; SSE2-NEXT:    setne %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v2i64_v2i1:
+; SSE41-LABEL: icmp0_v2i64_v2i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm1, %xmm1
 ; SSE41-NEXT:    pcmpeqq %xmm0, %xmm1
@@ -844,7 +844,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; SSE41-NEXT:    setne %al
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v2i64_v2i1:
+; AVX-LABEL: icmp0_v2i64_v2i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
@@ -853,7 +853,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v2i64_v2i1:
+; AVX512F-LABEL: icmp0_v2i64_v2i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -863,7 +863,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v2i64_v2i1:
+; AVX512BW-LABEL: icmp0_v2i64_v2i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -873,7 +873,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v2i64_v2i1:
+; AVX512VL-LABEL: icmp0_v2i64_v2i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -885,8 +885,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v4i32_v4i1(<4 x i32>) {
-; SSE-LABEL: icmp_v4i32_v4i1:
+define i1 @icmp0_v4i32_v4i1(<4 x i32>) {
+; SSE-LABEL: icmp0_v4i32_v4i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
@@ -895,7 +895,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v4i32_v4i1:
+; AVX-LABEL: icmp0_v4i32_v4i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -904,7 +904,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v4i32_v4i1:
+; AVX512F-LABEL: icmp0_v4i32_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -914,7 +914,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v4i32_v4i1:
+; AVX512BW-LABEL: icmp0_v4i32_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -924,7 +924,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v4i32_v4i1:
+; AVX512VL-LABEL: icmp0_v4i32_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -936,8 +936,8 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i16_v8i1(<8 x i16>) {
-; SSE-LABEL: icmp_v8i16_v8i1:
+define i1 @icmp0_v8i16_v8i1(<8 x i16>) {
+; SSE-LABEL: icmp0_v8i16_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
@@ -946,7 +946,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v8i16_v8i1:
+; AVX-LABEL: icmp0_v8i16_v8i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -955,7 +955,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i16_v8i1:
+; AVX512F-LABEL: icmp0_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -967,7 +967,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i16_v8i1:
+; AVX512BW-LABEL: icmp0_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
@@ -977,7 +977,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i16_v8i1:
+; AVX512VL-LABEL: icmp0_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -989,8 +989,8 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i8_v16i1(<16 x i8>) {
-; SSE-LABEL: icmp_v16i8_v16i1:
+define i1 @icmp0_v16i8_v16i1(<16 x i8>) {
+; SSE-LABEL: icmp0_v16i8_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
@@ -999,7 +999,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v16i8_v16i1:
+; AVX-LABEL: icmp0_v16i8_v16i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -1008,7 +1008,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i8_v16i1:
+; AVX512F-LABEL: icmp0_v16i8_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -1017,7 +1017,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX512F-NEXT:    setne %al
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i8_v16i1:
+; AVX512BW-LABEL: icmp0_v16i8_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
@@ -1026,7 +1026,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i8_v16i1:
+; AVX512VL-LABEL: icmp0_v16i8_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kortestw %k0, %k0
@@ -1037,8 +1037,8 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
   ret i1 %b
 }
 
-define i1 @icmp_v4i64_v4i1(<4 x i64>) {
-; SSE2-LABEL: icmp_v4i64_v4i1:
+define i1 @icmp0_v4i64_v4i1(<4 x i64>) {
+; SSE2-LABEL: icmp0_v4i64_v4i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
@@ -1053,7 +1053,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; SSE2-NEXT:    setne %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v4i64_v4i1:
+; SSE41-LABEL: icmp0_v4i64_v4i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm2, %xmm2
 ; SSE41-NEXT:    pcmpeqq %xmm2, %xmm1
@@ -1064,7 +1064,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; SSE41-NEXT:    setne %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v4i64_v4i1:
+; AVX1-LABEL: icmp0_v4i64_v4i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1077,7 +1077,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v4i64_v4i1:
+; AVX2-LABEL: icmp0_v4i64_v4i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
@@ -1087,7 +1087,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v4i64_v4i1:
+; AVX512F-LABEL: icmp0_v4i64_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -1097,7 +1097,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v4i64_v4i1:
+; AVX512BW-LABEL: icmp0_v4i64_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -1107,7 +1107,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v4i64_v4i1:
+; AVX512VL-LABEL: icmp0_v4i64_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1120,8 +1120,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i32_v8i1(<8 x i32>) {
-; SSE-LABEL: icmp_v8i32_v8i1:
+define i1 @icmp0_v8i32_v8i1(<8 x i32>) {
+; SSE-LABEL: icmp0_v8i32_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqd %xmm2, %xmm1
@@ -1132,7 +1132,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v8i32_v8i1:
+; AVX1-LABEL: icmp0_v8i32_v8i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1145,7 +1145,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v8i32_v8i1:
+; AVX2-LABEL: icmp0_v8i32_v8i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
@@ -1155,7 +1155,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i32_v8i1:
+; AVX512F-LABEL: icmp0_v8i32_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1165,7 +1165,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i32_v8i1:
+; AVX512BW-LABEL: icmp0_v8i32_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1175,7 +1175,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i32_v8i1:
+; AVX512VL-LABEL: icmp0_v8i32_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1188,8 +1188,8 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i16_v16i1(<16 x i16>) {
-; SSE-LABEL: icmp_v16i16_v16i1:
+define i1 @icmp0_v16i16_v16i1(<16 x i16>) {
+; SSE-LABEL: icmp0_v16i16_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
@@ -1200,7 +1200,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v16i16_v16i1:
+; AVX1-LABEL: icmp0_v16i16_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1213,7 +1213,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v16i16_v16i1:
+; AVX2-LABEL: icmp0_v16i16_v16i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
@@ -1223,7 +1223,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i16_v16i1:
+; AVX512F-LABEL: icmp0_v16i16_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
@@ -1234,7 +1234,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i16_v16i1:
+; AVX512BW-LABEL: icmp0_v16i16_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
@@ -1243,7 +1243,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i16_v16i1:
+; AVX512VL-LABEL: icmp0_v16i16_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kortestw %k0, %k0
@@ -1255,8 +1255,8 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v32i8_v32i1(<32 x i8>) {
-; SSE-LABEL: icmp_v32i8_v32i1:
+define i1 @icmp0_v32i8_v32i1(<32 x i8>) {
+; SSE-LABEL: icmp0_v32i8_v32i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -1267,7 +1267,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v32i8_v32i1:
+; AVX1-LABEL: icmp0_v32i8_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1280,7 +1280,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v32i8_v32i1:
+; AVX2-LABEL: icmp0_v32i8_v32i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -1290,7 +1290,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v32i8_v32i1:
+; AVX512F-LABEL: icmp0_v32i8_v32i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -1311,7 +1311,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v32i8_v32i1:
+; AVX512BW-LABEL: icmp0_v32i8_v32i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
@@ -1320,7 +1320,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v32i8_v32i1:
+; AVX512VL-LABEL: icmp0_v32i8_v32i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kortestd %k0, %k0
@@ -1332,8 +1332,8 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i64_v8i1(<8 x i64>) {
-; SSE2-LABEL: icmp_v8i64_v8i1:
+define i1 @icmp0_v8i64_v8i1(<8 x i64>) {
+; SSE2-LABEL: icmp0_v8i64_v8i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqd %xmm4, %xmm3
@@ -1356,7 +1356,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; SSE2-NEXT:    setne %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v8i64_v8i1:
+; SSE41-LABEL: icmp0_v8i64_v8i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm4, %xmm4
 ; SSE41-NEXT:    pcmpeqq %xmm4, %xmm3
@@ -1371,7 +1371,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; SSE41-NEXT:    setne %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v8i64_v8i1:
+; AVX1-LABEL: icmp0_v8i64_v8i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1389,7 +1389,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v8i64_v8i1:
+; AVX2-LABEL: icmp0_v8i64_v8i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
@@ -1401,7 +1401,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i64_v8i1:
+; AVX512F-LABEL: icmp0_v8i64_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
@@ -1410,7 +1410,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i64_v8i1:
+; AVX512BW-LABEL: icmp0_v8i64_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
@@ -1419,7 +1419,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i64_v8i1:
+; AVX512VL-LABEL: icmp0_v8i64_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1432,8 +1432,8 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i32_v16i1(<16 x i32>) {
-; SSE-LABEL: icmp_v16i32_v16i1:
+define i1 @icmp0_v16i32_v16i1(<16 x i32>) {
+; SSE-LABEL: icmp0_v16i32_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqd %xmm4, %xmm3
@@ -1448,7 +1448,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v16i32_v16i1:
+; AVX1-LABEL: icmp0_v16i32_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1466,7 +1466,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v16i32_v16i1:
+; AVX2-LABEL: icmp0_v16i32_v16i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
@@ -1478,7 +1478,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512-LABEL: icmp_v16i32_v16i1:
+; AVX512-LABEL: icmp0_v16i32_v16i1:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512-NEXT:    kortestw %k0, %k0
@@ -1490,8 +1490,8 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v32i16_v32i1(<32 x i16>) {
-; SSE-LABEL: icmp_v32i16_v32i1:
+define i1 @icmp0_v32i16_v32i1(<32 x i16>) {
+; SSE-LABEL: icmp0_v32i16_v32i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqw %xmm4, %xmm1
@@ -1506,7 +1506,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v32i16_v32i1:
+; AVX1-LABEL: icmp0_v32i16_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1524,7 +1524,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v32i16_v32i1:
+; AVX2-LABEL: icmp0_v32i16_v32i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm1, %ymm1
@@ -1536,7 +1536,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v32i16_v32i1:
+; AVX512F-LABEL: icmp0_v32i16_v32i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1558,7 +1558,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v32i16_v32i1:
+; AVX512BW-LABEL: icmp0_v32i16_v32i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kortestd %k0, %k0
@@ -1566,7 +1566,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v32i16_v32i1:
+; AVX512VL-LABEL: icmp0_v32i16_v32i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kortestd %k0, %k0
@@ -1578,8 +1578,8 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v64i8_v64i1(<64 x i8>) {
-; SSE-LABEL: icmp_v64i8_v64i1:
+define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
+; SSE-LABEL: icmp0_v64i8_v64i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqb %xmm4, %xmm2
@@ -1594,7 +1594,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v64i8_v64i1:
+; AVX1-LABEL: icmp0_v64i8_v64i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm3
@@ -1612,7 +1612,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v64i8_v64i1:
+; AVX2-LABEL: icmp0_v64i8_v64i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
@@ -1624,7 +1624,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v64i8_v64i1:
+; AVX512F-LABEL: icmp0_v64i8_v64i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1651,7 +1651,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v64i8_v64i1:
+; AVX512BW-LABEL: icmp0_v64i8_v64i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kortestq %k0, %k0
@@ -1659,7 +1659,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v64i8_v64i1:
+; AVX512VL-LABEL: icmp0_v64i8_v64i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kortestq %k0, %k0
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
index 0d0accd3f1f18..9fb94c386bab8 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
@@ -899,11 +899,11 @@ define i1 @trunc_v64i8_v64i1(<64 x i8>) {
 }
 
 ;
-; Comparison
+; Comparison With Zero
 ;
 
-define i1 @icmp_v2i64_v2i1(<2 x i64>) {
-; SSE2-LABEL: icmp_v2i64_v2i1:
+define i1 @icmp0_v2i64_v2i1(<2 x i64>) {
+; SSE2-LABEL: icmp0_v2i64_v2i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm1, %xmm1
 ; SSE2-NEXT:    pcmpeqd %xmm0, %xmm1
@@ -914,7 +914,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; SSE2-NEXT:    setnp %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v2i64_v2i1:
+; SSE41-LABEL: icmp0_v2i64_v2i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm1, %xmm1
 ; SSE41-NEXT:    pcmpeqq %xmm0, %xmm1
@@ -923,7 +923,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; SSE41-NEXT:    setnp %al
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v2i64_v2i1:
+; AVX-LABEL: icmp0_v2i64_v2i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
@@ -932,7 +932,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX-NEXT:    setnp %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v2i64_v2i1:
+; AVX512F-LABEL: icmp0_v2i64_v2i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -942,7 +942,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v2i64_v2i1:
+; AVX512BW-LABEL: icmp0_v2i64_v2i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -952,7 +952,7 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v2i64_v2i1:
+; AVX512VL-LABEL: icmp0_v2i64_v2i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -964,8 +964,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v4i32_v4i1(<4 x i32>) {
-; SSE-LABEL: icmp_v4i32_v4i1:
+define i1 @icmp0_v4i32_v4i1(<4 x i32>) {
+; SSE-LABEL: icmp0_v4i32_v4i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqd %xmm0, %xmm1
@@ -974,7 +974,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v4i32_v4i1:
+; AVX-LABEL: icmp0_v4i32_v4i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
@@ -983,7 +983,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX-NEXT:    setnp %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v4i32_v4i1:
+; AVX512F-LABEL: icmp0_v4i32_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -993,7 +993,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v4i32_v4i1:
+; AVX512BW-LABEL: icmp0_v4i32_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1003,7 +1003,7 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v4i32_v4i1:
+; AVX512VL-LABEL: icmp0_v4i32_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1015,8 +1015,8 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i16_v8i1(<8 x i16>) {
-; SSE-LABEL: icmp_v8i16_v8i1:
+define i1 @icmp0_v8i16_v8i1(<8 x i16>) {
+; SSE-LABEL: icmp0_v8i16_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
@@ -1026,7 +1026,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v8i16_v8i1:
+; AVX-LABEL: icmp0_v8i16_v8i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -1036,7 +1036,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX-NEXT:    setnp %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i16_v8i1:
+; AVX512F-LABEL: icmp0_v8i16_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
@@ -1048,7 +1048,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i16_v8i1:
+; AVX512BW-LABEL: icmp0_v8i16_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
@@ -1058,7 +1058,7 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i16_v8i1:
+; AVX512VL-LABEL: icmp0_v8i16_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1070,8 +1070,8 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i8_v16i1(<16 x i8>) {
-; SSE-LABEL: icmp_v16i8_v16i1:
+define i1 @icmp0_v16i8_v16i1(<16 x i8>) {
+; SSE-LABEL: icmp0_v16i8_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm1, %xmm1
 ; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
@@ -1080,7 +1080,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX-LABEL: icmp_v16i8_v16i1:
+; AVX-LABEL: icmp0_v16i8_v16i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -1089,7 +1089,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX-NEXT:    setnp %al
 ; AVX-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i8_v16i1:
+; AVX512F-LABEL: icmp0_v16i8_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
@@ -1098,7 +1098,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX512F-NEXT:    setnp %al
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i8_v16i1:
+; AVX512BW-LABEL: icmp0_v16i8_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
@@ -1110,7 +1110,7 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i8_v16i1:
+; AVX512VL-LABEL: icmp0_v16i8_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %xmm0, %xmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1124,8 +1124,8 @@ define i1 @icmp_v16i8_v16i1(<16 x i8>) {
   ret i1 %b
 }
 
-define i1 @icmp_v4i64_v4i1(<4 x i64>) {
-; SSE2-LABEL: icmp_v4i64_v4i1:
+define i1 @icmp0_v4i64_v4i1(<4 x i64>) {
+; SSE2-LABEL: icmp0_v4i64_v4i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm2, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm1
@@ -1140,7 +1140,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; SSE2-NEXT:    setnp %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v4i64_v4i1:
+; SSE41-LABEL: icmp0_v4i64_v4i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm2, %xmm2
 ; SSE41-NEXT:    pcmpeqq %xmm2, %xmm1
@@ -1151,7 +1151,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; SSE41-NEXT:    setnp %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v4i64_v4i1:
+; AVX1-LABEL: icmp0_v4i64_v4i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1164,7 +1164,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v4i64_v4i1:
+; AVX2-LABEL: icmp0_v4i64_v4i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
@@ -1174,7 +1174,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v4i64_v4i1:
+; AVX512F-LABEL: icmp0_v4i64_v4i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -1184,7 +1184,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v4i64_v4i1:
+; AVX512BW-LABEL: icmp0_v4i64_v4i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
@@ -1194,7 +1194,7 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v4i64_v4i1:
+; AVX512VL-LABEL: icmp0_v4i64_v4i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1207,8 +1207,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i32_v8i1(<8 x i32>) {
-; SSE-LABEL: icmp_v8i32_v8i1:
+define i1 @icmp0_v8i32_v8i1(<8 x i32>) {
+; SSE-LABEL: icmp0_v8i32_v8i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqd %xmm2, %xmm1
@@ -1220,7 +1220,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v8i32_v8i1:
+; AVX1-LABEL: icmp0_v8i32_v8i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1233,7 +1233,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v8i32_v8i1:
+; AVX2-LABEL: icmp0_v8i32_v8i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
@@ -1243,7 +1243,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i32_v8i1:
+; AVX512F-LABEL: icmp0_v8i32_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1253,7 +1253,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i32_v8i1:
+; AVX512BW-LABEL: icmp0_v8i32_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
@@ -1263,7 +1263,7 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i32_v8i1:
+; AVX512VL-LABEL: icmp0_v8i32_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1276,8 +1276,8 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i16_v16i1(<16 x i16>) {
-; SSE-LABEL: icmp_v16i16_v16i1:
+define i1 @icmp0_v16i16_v16i1(<16 x i16>) {
+; SSE-LABEL: icmp0_v16i16_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
@@ -1288,7 +1288,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v16i16_v16i1:
+; AVX1-LABEL: icmp0_v16i16_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1301,7 +1301,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v16i16_v16i1:
+; AVX2-LABEL: icmp0_v16i16_v16i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
@@ -1313,7 +1313,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i16_v16i1:
+; AVX512F-LABEL: icmp0_v16i16_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
@@ -1327,7 +1327,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i16_v16i1:
+; AVX512BW-LABEL: icmp0_v16i16_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
@@ -1339,7 +1339,7 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i16_v16i1:
+; AVX512VL-LABEL: icmp0_v16i16_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1354,8 +1354,8 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v32i8_v32i1(<32 x i8>) {
-; SSE-LABEL: icmp_v32i8_v32i1:
+define i1 @icmp0_v32i8_v32i1(<32 x i8>) {
+; SSE-LABEL: icmp0_v32i8_v32i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm2, %xmm2
 ; SSE-NEXT:    pcmpeqb %xmm2, %xmm1
@@ -1366,7 +1366,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v32i8_v32i1:
+; AVX1-LABEL: icmp0_v32i8_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1379,7 +1379,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v32i8_v32i1:
+; AVX2-LABEL: icmp0_v32i8_v32i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -1392,7 +1392,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v32i8_v32i1:
+; AVX512F-LABEL: icmp0_v32i8_v32i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vpxor %xmm1, %xmm1, %xmm1
 ; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
@@ -1413,7 +1413,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v32i8_v32i1:
+; AVX512BW-LABEL: icmp0_v32i8_v32i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
@@ -1426,7 +1426,7 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v32i8_v32i1:
+; AVX512VL-LABEL: icmp0_v32i8_v32i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %ymm0, %ymm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1442,8 +1442,8 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>) {
   ret i1 %b
 }
 
-define i1 @icmp_v8i64_v8i1(<8 x i64>) {
-; SSE2-LABEL: icmp_v8i64_v8i1:
+define i1 @icmp0_v8i64_v8i1(<8 x i64>) {
+; SSE2-LABEL: icmp0_v8i64_v8i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pxor %xmm4, %xmm4
 ; SSE2-NEXT:    pcmpeqd %xmm4, %xmm3
@@ -1467,7 +1467,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; SSE2-NEXT:    setnp %al
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: icmp_v8i64_v8i1:
+; SSE41-LABEL: icmp0_v8i64_v8i1:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    pxor %xmm4, %xmm4
 ; SSE41-NEXT:    pcmpeqq %xmm4, %xmm3
@@ -1483,7 +1483,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; SSE41-NEXT:    setnp %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v8i64_v8i1:
+; AVX1-LABEL: icmp0_v8i64_v8i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1501,7 +1501,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v8i64_v8i1:
+; AVX2-LABEL: icmp0_v8i64_v8i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm1, %ymm1
@@ -1514,7 +1514,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v8i64_v8i1:
+; AVX512F-LABEL: icmp0_v8i64_v8i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
@@ -1523,7 +1523,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v8i64_v8i1:
+; AVX512BW-LABEL: icmp0_v8i64_v8i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
@@ -1532,7 +1532,7 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v8i64_v8i1:
+; AVX512VL-LABEL: icmp0_v8i64_v8i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmq %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1545,8 +1545,8 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>) {
   ret i1 %b
 }
 
-define i1 @icmp_v16i32_v16i1(<16 x i32>) {
-; SSE-LABEL: icmp_v16i32_v16i1:
+define i1 @icmp0_v16i32_v16i1(<16 x i32>) {
+; SSE-LABEL: icmp0_v16i32_v16i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqd %xmm4, %xmm3
@@ -1561,7 +1561,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v16i32_v16i1:
+; AVX1-LABEL: icmp0_v16i32_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1579,7 +1579,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v16i32_v16i1:
+; AVX2-LABEL: icmp0_v16i32_v16i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm1, %ymm1
@@ -1594,7 +1594,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v16i32_v16i1:
+; AVX512F-LABEL: icmp0_v16i32_v16i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kmovw %k0, %eax
@@ -1605,7 +1605,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v16i32_v16i1:
+; AVX512BW-LABEL: icmp0_v16i32_v16i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
@@ -1616,7 +1616,7 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v16i32_v16i1:
+; AVX512VL-LABEL: icmp0_v16i32_v16i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1631,8 +1631,8 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>) {
   ret i1 %b
 }
 
-define i1 @icmp_v32i16_v32i1(<32 x i16>) {
-; SSE-LABEL: icmp_v32i16_v32i1:
+define i1 @icmp0_v32i16_v32i1(<32 x i16>) {
+; SSE-LABEL: icmp0_v32i16_v32i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqw %xmm4, %xmm1
@@ -1647,7 +1647,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v32i16_v32i1:
+; AVX1-LABEL: icmp0_v32i16_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
@@ -1665,7 +1665,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v32i16_v32i1:
+; AVX2-LABEL: icmp0_v32i16_v32i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm1, %ymm1
@@ -1681,7 +1681,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v32i16_v32i1:
+; AVX512F-LABEL: icmp0_v32i16_v32i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1703,7 +1703,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v32i16_v32i1:
+; AVX512BW-LABEL: icmp0_v32i16_v32i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
@@ -1715,7 +1715,7 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v32i16_v32i1:
+; AVX512VL-LABEL: icmp0_v32i16_v32i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmw %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kmovd %k0, %eax
@@ -1731,8 +1731,8 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>) {
   ret i1 %b
 }
 
-define i1 @icmp_v64i8_v64i1(<64 x i8>) {
-; SSE-LABEL: icmp_v64i8_v64i1:
+define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
+; SSE-LABEL: icmp0_v64i8_v64i1:
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    pxor %xmm4, %xmm4
 ; SSE-NEXT:    pcmpeqb %xmm4, %xmm2
@@ -1747,7 +1747,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; SSE-NEXT:    setnp %al
 ; SSE-NEXT:    retq
 ;
-; AVX1-LABEL: icmp_v64i8_v64i1:
+; AVX1-LABEL: icmp0_v64i8_v64i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm3
@@ -1765,7 +1765,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: icmp_v64i8_v64i1:
+; AVX2-LABEL: icmp0_v64i8_v64i1:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
 ; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm1, %ymm1
@@ -1780,7 +1780,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
 ;
-; AVX512F-LABEL: icmp_v64i8_v64i1:
+; AVX512F-LABEL: icmp0_v64i8_v64i1:
 ; AVX512F:       # %bb.0:
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512F-NEXT:    vpxor %xmm2, %xmm2, %xmm2
@@ -1807,7 +1807,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX512F-NEXT:    vzeroupper
 ; AVX512F-NEXT:    retq
 ;
-; AVX512BW-LABEL: icmp_v64i8_v64i1:
+; AVX512BW-LABEL: icmp0_v64i8_v64i1:
 ; AVX512BW:       # %bb.0:
 ; AVX512BW-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovq %k0, %rax
@@ -1822,7 +1822,7 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>) {
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
 ;
-; AVX512VL-LABEL: icmp_v64i8_v64i1:
+; AVX512VL-LABEL: icmp0_v64i8_v64i1:
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vptestnmb %zmm0, %zmm0, %k0
 ; AVX512VL-NEXT:    kmovq %k0, %rax

From 0553f5e61ac7e919c3eb573f57ed0e1096ccbbeb Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 24 Jan 2022 14:04:42 +0000
Subject: [PATCH 386/946] [X86] Add cmp-equality bool reductions

PR53379 test coverage
---
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 844 ++++++++++++++++
 .../test/CodeGen/X86/vector-reduce-or-bool.ll | 836 ++++++++++++++++
 .../CodeGen/X86/vector-reduce-xor-bool.ll     | 927 ++++++++++++++++++
 3 files changed, 2607 insertions(+)

diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index c1ddc9454b939..c9aef7b8e7404 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -1662,6 +1662,850 @@ define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
   ret i1 %b
 }
 
+;
+; Comparison
+;
+
+define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
+; SSE2-LABEL: icmp_v2i64_v2i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    movmskpd %xmm1, %eax
+; SSE2-NEXT:    cmpb $3, %al
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v2i64_v2i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT:    movmskpd %xmm0, %eax
+; SSE41-NEXT:    cmpb $3, %al
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: icmp_v2i64_v2i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovmskpd %xmm0, %eax
+; AVX-NEXT:    cmpb $3, %al
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v2i64_v2i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpcmpneqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $3, %al
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v2i64_v2i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpneqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $3, %al
+; AVX512BW-NEXT:    sete %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v2i64_v2i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    cmpb $3, %al
+; AVX512VL-NEXT:    sete %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <2 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) {
+; SSE-LABEL: icmp_v4i32_v4i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE-NEXT:    movmskps %xmm0, %eax
+; SSE-NEXT:    cmpb $15, %al
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v4i32_v4i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovmskps %xmm0, %eax
+; AVX-NEXT:    cmpb $15, %al
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v4i32_v4i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $15, %al
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v4i32_v4i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $15, %al
+; AVX512BW-NEXT:    sete %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v4i32_v4i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    cmpb $15, %al
+; AVX512VL-NEXT:    sete %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <4 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) {
+; SSE-LABEL: icmp_v8i16_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpb $-1, %al
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v8i16_v8i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    cmpb $-1, %al
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i16_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    cmpb $-1, %al
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i16_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    cmpb $-1, %al
+; AVX512BW-NEXT:    sete %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i16_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    cmpb $-1, %al
+; AVX512VL-NEXT:    sete %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) {
+; SSE-LABEL: icmp_v16i8_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v16i8_v16i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    cmpw $-1, %ax
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i8_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovmskb %xmm0, %eax
+; AVX512F-NEXT:    cmpw $-1, %ax
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i8_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestw %k0, %k0
+; AVX512BW-NEXT:    setb %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i8_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kortestw %k0, %k0
+; AVX512VL-NEXT:    setb %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
+; SSE2-LABEL: icmp_v4i64_v4i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packssdw %xmm3, %xmm1
+; SSE2-NEXT:    movmskps %xmm1, %eax
+; SSE2-NEXT:    cmpb $15, %al
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v4i64_v4i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm3, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm2, %xmm0
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    movmskps %xmm0, %eax
+; SSE41-NEXT:    cmpb $15, %al
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v4i64_v4i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskpd %ymm0, %eax
+; AVX1-NEXT:    cmpb $15, %al
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v4i64_v4i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskpd %ymm0, %eax
+; AVX2-NEXT:    cmpb $15, %al
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v4i64_v4i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpcmpneqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $15, %al
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v4i64_v4i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpneqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $15, %al
+; AVX512BW-NEXT:    sete %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v4i64_v4i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    cmpb $15, %al
+; AVX512VL-NEXT:    sete %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <4 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
+; SSE-LABEL: icmp_v8i32_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpb $-1, %al
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v8i32_v8i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskps %ymm0, %eax
+; AVX1-NEXT:    cmpb $-1, %al
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v8i32_v8i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskps %ymm0, %eax
+; AVX2-NEXT:    cmpb $-1, %al
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i32_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    cmpb $-1, %al
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i32_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    cmpb $-1, %al
+; AVX512BW-NEXT:    sete %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i32_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    cmpb $-1, %al
+; AVX512VL-NEXT:    sete %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) {
+; SSE-LABEL: icmp_v16i16_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
+; SSE-NEXT:    packsswb %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v16i16_v16i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v16i16_v16i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i16_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kortestw %k0, %k0
+; AVX512F-NEXT:    setb %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i16_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestw %k0, %k0
+; AVX512BW-NEXT:    setb %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i16_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kortestw %k0, %k0
+; AVX512VL-NEXT:    setb %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) {
+; SSE-LABEL: icmp_v32i8_v32i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE-NEXT:    pand %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v32i8_v32i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v32i8_v32i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v32i8_v32i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v32i8_v32i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestd %k0, %k0
+; AVX512BW-NEXT:    setb %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v32i8_v32i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kortestd %k0, %k0
+; AVX512VL-NEXT:    setb %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <32 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) {
+; SSE2-LABEL: icmp_v8i64_v8i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm7, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,0,3,2]
+; SSE2-NEXT:    pand %xmm3, %xmm7
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
+; SSE2-NEXT:    pand %xmm2, %xmm3
+; SSE2-NEXT:    packssdw %xmm7, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packssdw %xmm2, %xmm1
+; SSE2-NEXT:    packssdw %xmm3, %xmm1
+; SSE2-NEXT:    packsswb %xmm1, %xmm1
+; SSE2-NEXT:    pmovmskb %xmm1, %eax
+; SSE2-NEXT:    cmpb $-1, %al
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v8i64_v8i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm7, %xmm3
+; SSE41-NEXT:    pcmpeqq %xmm6, %xmm2
+; SSE41-NEXT:    packssdw %xmm3, %xmm2
+; SSE41-NEXT:    pcmpeqq %xmm5, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    packssdw %xmm2, %xmm0
+; SSE41-NEXT:    packsswb %xmm0, %xmm0
+; SSE41-NEXT:    pmovmskb %xmm0, %eax
+; SSE41-NEXT:    cmpb $-1, %al
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v8i64_v8i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskps %ymm0, %eax
+; AVX1-NEXT:    cmpb $-1, %al
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v8i64_v8i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskps %ymm0, %eax
+; AVX2-NEXT:    cmpb $-1, %al
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i64_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    cmpb $-1, %al
+; AVX512F-NEXT:    sete %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i64_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    cmpb $-1, %al
+; AVX512BW-NEXT:    sete %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i64_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    cmpb $-1, %al
+; AVX512VL-NEXT:    sete %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) {
+; SSE-LABEL: icmp_v16i32_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqd %xmm6, %xmm2
+; SSE-NEXT:    packssdw %xmm3, %xmm2
+; SSE-NEXT:    pcmpeqd %xmm5, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm2, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v16i32_v16i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v16i32_v16i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovmskb %xmm0, %eax
+; AVX2-NEXT:    cmpw $-1, %ax
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: icmp_v16i32_v16i1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    setb %al
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %a = icmp eq <16 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) {
+; SSE-LABEL: icmp_v32i16_v32i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm5, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm4, %xmm0
+; SSE-NEXT:    packsswb %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqw %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqw %xmm6, %xmm2
+; SSE-NEXT:    packsswb %xmm3, %xmm2
+; SSE-NEXT:    pand %xmm0, %xmm2
+; SSE-NEXT:    pmovmskb %xmm2, %eax
+; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v32i16_v32i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpacksswb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v32i16_v32i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v32i16_v32i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v32i16_v32i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestd %k0, %k0
+; AVX512BW-NEXT:    setb %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v32i16_v32i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kortestd %k0, %k0
+; AVX512VL-NEXT:    setb %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <32 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) {
+; SSE-LABEL: icmp_v64i8_v64i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm6, %xmm2
+; SSE-NEXT:    pcmpeqb %xmm4, %xmm0
+; SSE-NEXT:    pcmpeqb %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqb %xmm5, %xmm1
+; SSE-NEXT:    pand %xmm3, %xmm1
+; SSE-NEXT:    pand %xmm2, %xmm1
+; SSE-NEXT:    pand %xmm0, %xmm1
+; SSE-NEXT:    pmovmskb %xmm1, %eax
+; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v64i8_v64i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpand %xmm0, %xmm5, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    sete %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v64i8_v64i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    sete %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v64i8_v64i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpand %xmm1, %xmm3, %xmm1
+; AVX512F-NEXT:    vpand %xmm1, %xmm2, %xmm1
+; AVX512F-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    kandw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v64i8_v64i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestq %k0, %k0
+; AVX512BW-NEXT:    setb %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v64i8_v64i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kortestq %k0, %k0
+; AVX512VL-NEXT:    setb %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <64 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %a)
+  ret i1 %b
+}
+
 declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1>)
 declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
 declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
index b73ff78330c7b..6409a8ff4761f 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
@@ -1671,6 +1671,842 @@ define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
   ret i1 %b
 }
 
+; Comparison
+;
+
+define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
+; SSE2-LABEL: icmp_v2i64_v2i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    movmskpd %xmm1, %eax
+; SSE2-NEXT:    testl %eax, %eax
+; SSE2-NEXT:    setne %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v2i64_v2i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT:    movmskpd %xmm0, %eax
+; SSE41-NEXT:    testl %eax, %eax
+; SSE41-NEXT:    setne %al
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: icmp_v2i64_v2i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovmskpd %xmm0, %eax
+; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v2i64_v2i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $3, %al
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v2i64_v2i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $3, %al
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v2i64_v2i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <2 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) {
+; SSE-LABEL: icmp_v4i32_v4i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE-NEXT:    movmskps %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v4i32_v4i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovmskps %xmm0, %eax
+; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v4i32_v4i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $15, %al
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v4i32_v4i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $15, %al
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v4i32_v4i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <4 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) {
+; SSE-LABEL: icmp_v8i16_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v8i16_v8i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i16_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i16_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i16_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) {
+; SSE-LABEL: icmp_v16i8_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v16i8_v16i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    testl %eax, %eax
+; AVX-NEXT:    setne %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i8_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovmskb %xmm0, %eax
+; AVX512F-NEXT:    testl %eax, %eax
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i8_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestw %k0, %k0
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i8_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kortestw %k0, %k0
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
+; SSE2-LABEL: icmp_v4i64_v4i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packssdw %xmm3, %xmm1
+; SSE2-NEXT:    movmskps %xmm1, %eax
+; SSE2-NEXT:    testl %eax, %eax
+; SSE2-NEXT:    setne %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v4i64_v4i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm3, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm2, %xmm0
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    movmskps %xmm0, %eax
+; SSE41-NEXT:    testl %eax, %eax
+; SSE41-NEXT:    setne %al
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v4i64_v4i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskpd %ymm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v4i64_v4i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskpd %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v4i64_v4i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $15, %al
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v4i64_v4i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $15, %al
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v4i64_v4i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <4 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
+; SSE-LABEL: icmp_v8i32_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v8i32_v8i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskps %ymm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v8i32_v8i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskps %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i32_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i32_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i32_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) {
+; SSE-LABEL: icmp_v16i16_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
+; SSE-NEXT:    packsswb %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v16i16_v16i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v16i16_v16i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i16_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kortestw %k0, %k0
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i16_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestw %k0, %k0
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i16_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kortestw %k0, %k0
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) {
+; SSE-LABEL: icmp_v32i8_v32i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v32i8_v32i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v32i8_v32i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v32i8_v32i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v32i8_v32i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestd %k0, %k0
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v32i8_v32i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kortestd %k0, %k0
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <32 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) {
+; SSE2-LABEL: icmp_v8i64_v8i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm7, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,0,3,2]
+; SSE2-NEXT:    pand %xmm3, %xmm7
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
+; SSE2-NEXT:    pand %xmm2, %xmm3
+; SSE2-NEXT:    packssdw %xmm7, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packssdw %xmm2, %xmm1
+; SSE2-NEXT:    packssdw %xmm3, %xmm1
+; SSE2-NEXT:    pmovmskb %xmm1, %eax
+; SSE2-NEXT:    testl $43690, %eax # imm = 0xAAAA
+; SSE2-NEXT:    setne %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v8i64_v8i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm7, %xmm3
+; SSE41-NEXT:    pcmpeqq %xmm6, %xmm2
+; SSE41-NEXT:    packssdw %xmm3, %xmm2
+; SSE41-NEXT:    pcmpeqq %xmm5, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    packssdw %xmm2, %xmm0
+; SSE41-NEXT:    pmovmskb %xmm0, %eax
+; SSE41-NEXT:    testl $43690, %eax # imm = 0xAAAA
+; SSE41-NEXT:    setne %al
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v8i64_v8i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskps %ymm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v8i64_v8i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskps %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i64_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    setne %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i64_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i64_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) {
+; SSE-LABEL: icmp_v16i32_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqd %xmm6, %xmm2
+; SSE-NEXT:    packssdw %xmm3, %xmm2
+; SSE-NEXT:    pcmpeqd %xmm5, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm2, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v16i32_v16i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v16i32_v16i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512-LABEL: icmp_v16i32_v16i1:
+; AVX512:       # %bb.0:
+; AVX512-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512-NEXT:    kortestw %k0, %k0
+; AVX512-NEXT:    setne %al
+; AVX512-NEXT:    vzeroupper
+; AVX512-NEXT:    retq
+  %a = icmp eq <16 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) {
+; SSE-LABEL: icmp_v32i16_v32i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm5, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm4, %xmm0
+; SSE-NEXT:    packsswb %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqw %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqw %xmm6, %xmm2
+; SSE-NEXT:    packsswb %xmm3, %xmm2
+; SSE-NEXT:    por %xmm0, %xmm2
+; SSE-NEXT:    pmovmskb %xmm2, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v32i16_v32i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpacksswb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v32i16_v32i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v32i16_v32i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpor %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v32i16_v32i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestd %k0, %k0
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v32i16_v32i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kortestd %k0, %k0
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <32 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) {
+; SSE-LABEL: icmp_v64i8_v64i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm6, %xmm2
+; SSE-NEXT:    pcmpeqb %xmm4, %xmm0
+; SSE-NEXT:    pcmpeqb %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqb %xmm5, %xmm1
+; SSE-NEXT:    por %xmm3, %xmm1
+; SSE-NEXT:    por %xmm2, %xmm1
+; SSE-NEXT:    por %xmm0, %xmm1
+; SSE-NEXT:    pmovmskb %xmm1, %eax
+; SSE-NEXT:    testl %eax, %eax
+; SSE-NEXT:    setne %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v64i8_v64i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpor %xmm0, %xmm5, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    testl %eax, %eax
+; AVX1-NEXT:    setne %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v64i8_v64i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    testl %eax, %eax
+; AVX2-NEXT:    setne %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v64i8_v64i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpor %xmm1, %xmm3, %xmm1
+; AVX512F-NEXT:    vpor %xmm1, %xmm2, %xmm1
+; AVX512F-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    korw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v64i8_v64i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kortestq %k0, %k0
+; AVX512BW-NEXT:    setne %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v64i8_v64i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kortestq %k0, %k0
+; AVX512VL-NEXT:    setne %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <64 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %a)
+  ret i1 %b
+}
+
 declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1>)
 declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1>)
 declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1>)
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
index 9fb94c386bab8..493a1168a84ae 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
@@ -1841,6 +1841,933 @@ define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
   ret i1 %b
 }
 
+; Comparison
+;
+
+define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
+; SSE2-LABEL: icmp_v2i64_v2i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    movmskpd %xmm1, %eax
+; SSE2-NEXT:    testb %al, %al
+; SSE2-NEXT:    setnp %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v2i64_v2i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
+; SSE41-NEXT:    movmskpd %xmm0, %eax
+; SSE41-NEXT:    testb %al, %al
+; SSE41-NEXT:    setnp %al
+; SSE41-NEXT:    retq
+;
+; AVX-LABEL: icmp_v2i64_v2i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovmskpd %xmm0, %eax
+; AVX-NEXT:    testb %al, %al
+; AVX-NEXT:    setnp %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v2i64_v2i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $3, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v2i64_v2i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $3, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v2i64_v2i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb $3, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <2 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) {
+; SSE-LABEL: icmp_v4i32_v4i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE-NEXT:    movmskps %xmm0, %eax
+; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v4i32_v4i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovmskps %xmm0, %eax
+; AVX-NEXT:    testb %al, %al
+; AVX-NEXT:    setnp %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v4i32_v4i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $15, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v4i32_v4i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $15, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v4i32_v4i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb $15, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <4 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) {
+; SSE-LABEL: icmp_v8i16_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v8i16_v8i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    testb %al, %al
+; AVX-NEXT:    setnp %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i16_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i16_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i16_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) {
+; SSE-LABEL: icmp_v16i8_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    xorb %ah, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: icmp_v16i8_v16i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    xorb %ah, %al
+; AVX-NEXT:    setnp %al
+; AVX-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i8_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovmskb %xmm0, %eax
+; AVX512F-NEXT:    xorb %ah, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i8_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    movl %eax, %ecx
+; AVX512BW-NEXT:    shrl $8, %ecx
+; AVX512BW-NEXT:    xorb %al, %cl
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i8_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    movl %eax, %ecx
+; AVX512VL-NEXT:    shrl $8, %ecx
+; AVX512VL-NEXT:    xorb %al, %cl
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
+; SSE2-LABEL: icmp_v4i64_v4i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packssdw %xmm3, %xmm1
+; SSE2-NEXT:    movmskps %xmm1, %eax
+; SSE2-NEXT:    testb %al, %al
+; SSE2-NEXT:    setnp %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v4i64_v4i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm3, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm2, %xmm0
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    movmskps %xmm0, %eax
+; SSE41-NEXT:    testb %al, %al
+; SSE41-NEXT:    setnp %al
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v4i64_v4i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskpd %ymm0, %eax
+; AVX1-NEXT:    testb %al, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v4i64_v4i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskpd %ymm0, %eax
+; AVX2-NEXT:    testb %al, %al
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v4i64_v4i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb $15, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v4i64_v4i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb $15, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v4i64_v4i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb $15, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <4 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
+; SSE-LABEL: icmp_v8i32_v8i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm2, %xmm0
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm0, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v8i32_v8i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskps %ymm0, %eax
+; AVX1-NEXT:    testb %al, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v8i32_v8i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vmovmskps %ymm0, %eax
+; AVX2-NEXT:    testb %al, %al
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i32_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i32_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i32_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) {
+; SSE-LABEL: icmp_v16i16_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
+; SSE-NEXT:    packsswb %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    xorb %ah, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v16i16_v16i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    xorb %ah, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v16i16_v16i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpmovmskb %xmm0, %eax
+; AVX2-NEXT:    xorb %ah, %al
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i16_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    movl %eax, %ecx
+; AVX512F-NEXT:    shrl $8, %ecx
+; AVX512F-NEXT:    xorb %al, %cl
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i16_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    movl %eax, %ecx
+; AVX512BW-NEXT:    shrl $8, %ecx
+; AVX512BW-NEXT:    xorb %al, %cl
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i16_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    movl %eax, %ecx
+; AVX512VL-NEXT:    shrl $8, %ecx
+; AVX512VL-NEXT:    xorb %al, %cl
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) {
+; SSE-LABEL: icmp_v32i8_v32i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    xorb %ah, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v32i8_v32i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    xorb %ah, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v32i8_v32i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    movl %eax, %ecx
+; AVX2-NEXT:    shrl $16, %ecx
+; AVX2-NEXT:    xorl %eax, %ecx
+; AVX2-NEXT:    xorb %ch, %cl
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v32i8_v32i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v32i8_v32i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    movl %eax, %ecx
+; AVX512BW-NEXT:    shrl $16, %ecx
+; AVX512BW-NEXT:    xorl %eax, %ecx
+; AVX512BW-NEXT:    xorb %ch, %cl
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v32i8_v32i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    movl %eax, %ecx
+; AVX512VL-NEXT:    shrl $16, %ecx
+; AVX512VL-NEXT:    xorl %eax, %ecx
+; AVX512VL-NEXT:    xorb %ch, %cl
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <32 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) {
+; SSE2-LABEL: icmp_v8i64_v8i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm7, %xmm3
+; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,0,3,2]
+; SSE2-NEXT:    pand %xmm3, %xmm7
+; SSE2-NEXT:    pcmpeqd %xmm6, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
+; SSE2-NEXT:    pand %xmm2, %xmm3
+; SSE2-NEXT:    packssdw %xmm7, %xmm3
+; SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
+; SSE2-NEXT:    pand %xmm1, %xmm2
+; SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
+; SSE2-NEXT:    pand %xmm0, %xmm1
+; SSE2-NEXT:    packssdw %xmm2, %xmm1
+; SSE2-NEXT:    packssdw %xmm3, %xmm1
+; SSE2-NEXT:    packsswb %xmm1, %xmm1
+; SSE2-NEXT:    pmovmskb %xmm1, %eax
+; SSE2-NEXT:    testb %al, %al
+; SSE2-NEXT:    setnp %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v8i64_v8i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    pcmpeqq %xmm7, %xmm3
+; SSE41-NEXT:    pcmpeqq %xmm6, %xmm2
+; SSE41-NEXT:    packssdw %xmm3, %xmm2
+; SSE41-NEXT:    pcmpeqq %xmm5, %xmm1
+; SSE41-NEXT:    pcmpeqq %xmm4, %xmm0
+; SSE41-NEXT:    packssdw %xmm1, %xmm0
+; SSE41-NEXT:    packssdw %xmm2, %xmm0
+; SSE41-NEXT:    packsswb %xmm0, %xmm0
+; SSE41-NEXT:    pmovmskb %xmm0, %eax
+; SSE41-NEXT:    testb %al, %al
+; SSE41-NEXT:    setnp %al
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v8i64_v8i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqq %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqq %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqq %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT:    vmovmskps %ymm0, %eax
+; AVX1-NEXT:    testb %al, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v8i64_v8i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqq %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqq %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT:    vmovmskps %ymm0, %eax
+; AVX2-NEXT:    testb %al, %al
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v8i64_v8i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v8i64_v8i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    testb %al, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v8i64_v8i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    testb %al, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <8 x i64> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) {
+; SSE-LABEL: icmp_v16i32_v16i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqd %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqd %xmm6, %xmm2
+; SSE-NEXT:    packssdw %xmm3, %xmm2
+; SSE-NEXT:    pcmpeqd %xmm5, %xmm1
+; SSE-NEXT:    pcmpeqd %xmm4, %xmm0
+; SSE-NEXT:    packssdw %xmm1, %xmm0
+; SSE-NEXT:    packsswb %xmm2, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
+; SSE-NEXT:    xorb %ah, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v16i32_v16i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqd %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpackssdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    xorb %ah, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v16i32_v16i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqd %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqd %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX2-NEXT:    vpmovmskb %xmm0, %eax
+; AVX2-NEXT:    xorb %ah, %al
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v16i32_v16i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    movl %eax, %ecx
+; AVX512F-NEXT:    shrl $8, %ecx
+; AVX512F-NEXT:    xorb %al, %cl
+; AVX512F-NEXT:    setnp %al
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v16i32_v16i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    movl %eax, %ecx
+; AVX512BW-NEXT:    shrl $8, %ecx
+; AVX512BW-NEXT:    xorb %al, %cl
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v16i32_v16i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    movl %eax, %ecx
+; AVX512VL-NEXT:    shrl $8, %ecx
+; AVX512VL-NEXT:    xorb %al, %cl
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <16 x i32> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) {
+; SSE-LABEL: icmp_v32i16_v32i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqw %xmm5, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm4, %xmm0
+; SSE-NEXT:    packsswb %xmm1, %xmm0
+; SSE-NEXT:    pcmpeqw %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqw %xmm6, %xmm2
+; SSE-NEXT:    packsswb %xmm3, %xmm2
+; SSE-NEXT:    pxor %xmm0, %xmm2
+; SSE-NEXT:    pmovmskb %xmm2, %eax
+; SSE-NEXT:    xorb %ah, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v32i16_v32i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
+; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpacksswb %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
+; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpacksswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    xorb %ah, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v32i16_v32i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    movl %eax, %ecx
+; AVX2-NEXT:    shrl $16, %ecx
+; AVX2-NEXT:    xorl %eax, %ecx
+; AVX2-NEXT:    xorb %ch, %cl
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v32i16_v32i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vpxor %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v32i16_v32i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovd %k0, %eax
+; AVX512BW-NEXT:    movl %eax, %ecx
+; AVX512BW-NEXT:    shrl $16, %ecx
+; AVX512BW-NEXT:    xorl %eax, %ecx
+; AVX512BW-NEXT:    xorb %ch, %cl
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v32i16_v32i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kmovd %k0, %eax
+; AVX512VL-NEXT:    movl %eax, %ecx
+; AVX512VL-NEXT:    shrl $16, %ecx
+; AVX512VL-NEXT:    xorl %eax, %ecx
+; AVX512VL-NEXT:    xorb %ch, %cl
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <32 x i16> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a)
+  ret i1 %b
+}
+
+define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) {
+; SSE-LABEL: icmp_v64i8_v64i1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pcmpeqb %xmm6, %xmm2
+; SSE-NEXT:    pcmpeqb %xmm4, %xmm0
+; SSE-NEXT:    pcmpeqb %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqb %xmm5, %xmm1
+; SSE-NEXT:    pxor %xmm3, %xmm1
+; SSE-NEXT:    pxor %xmm2, %xmm1
+; SSE-NEXT:    pxor %xmm0, %xmm1
+; SSE-NEXT:    pmovmskb %xmm1, %eax
+; SSE-NEXT:    xorb %ah, %al
+; SSE-NEXT:    setnp %al
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: icmp_v64i8_v64i1:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm4
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm3
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm0, %xmm4, %xmm0
+; AVX1-NEXT:    vpxor %xmm0, %xmm5, %xmm0
+; AVX1-NEXT:    vpmovmskb %xmm0, %eax
+; AVX1-NEXT:    xorb %ah, %al
+; AVX1-NEXT:    setnp %al
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: icmp_v64i8_v64i1:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpmovmskb %ymm0, %eax
+; AVX2-NEXT:    movl %eax, %ecx
+; AVX2-NEXT:    shrl $16, %ecx
+; AVX2-NEXT:    xorl %eax, %ecx
+; AVX2-NEXT:    xorb %ch, %cl
+; AVX2-NEXT:    setnp %al
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+;
+; AVX512F-LABEL: icmp_v64i8_v64i1:
+; AVX512F:       # %bb.0:
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
+; AVX512F-NEXT:    vpcmpeqb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX512F-NEXT:    vextracti128 $1, %ymm2, %xmm1
+; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm3
+; AVX512F-NEXT:    vpxor %xmm1, %xmm3, %xmm1
+; AVX512F-NEXT:    vpxor %xmm1, %xmm2, %xmm1
+; AVX512F-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT:    kshiftrw $8, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $4, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $2, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kshiftrw $1, %k0, %k1
+; AVX512F-NEXT:    kxorw %k1, %k0, %k0
+; AVX512F-NEXT:    kmovw %k0, %eax
+; AVX512F-NEXT:    # kill: def $al killed $al killed $eax
+; AVX512F-NEXT:    vzeroupper
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: icmp_v64i8_v64i1:
+; AVX512BW:       # %bb.0:
+; AVX512BW-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT:    kmovq %k0, %rax
+; AVX512BW-NEXT:    movq %rax, %rcx
+; AVX512BW-NEXT:    shrq $32, %rcx
+; AVX512BW-NEXT:    xorl %eax, %ecx
+; AVX512BW-NEXT:    movl %ecx, %eax
+; AVX512BW-NEXT:    shrl $16, %eax
+; AVX512BW-NEXT:    xorl %ecx, %eax
+; AVX512BW-NEXT:    xorb %ah, %al
+; AVX512BW-NEXT:    setnp %al
+; AVX512BW-NEXT:    vzeroupper
+; AVX512BW-NEXT:    retq
+;
+; AVX512VL-LABEL: icmp_v64i8_v64i1:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
+; AVX512VL-NEXT:    kmovq %k0, %rax
+; AVX512VL-NEXT:    movq %rax, %rcx
+; AVX512VL-NEXT:    shrq $32, %rcx
+; AVX512VL-NEXT:    xorl %eax, %ecx
+; AVX512VL-NEXT:    movl %ecx, %eax
+; AVX512VL-NEXT:    shrl $16, %eax
+; AVX512VL-NEXT:    xorl %ecx, %eax
+; AVX512VL-NEXT:    xorb %ah, %al
+; AVX512VL-NEXT:    setnp %al
+; AVX512VL-NEXT:    vzeroupper
+; AVX512VL-NEXT:    retq
+  %a = icmp eq <64 x i8> %0, %1
+  %b = call i1 @llvm.vector.reduce.xor.v64i1(<64 x i1> %a)
+  ret i1 %b
+}
+
 declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1>)
 declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1>)
 declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1>)

From 34aedbe90d7667a3fd1e0427808648364b594034 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Mon, 24 Jan 2022 12:03:16 +0000
Subject: [PATCH 387/946] [AArch64] Regenerate CHECK lines for
 llvm/test/CodeGen/AArch64/sve2-int-mul.ll

---
 llvm/test/CodeGen/AArch64/sve2-int-mul.ll | 208 +++++++++++++---------
 1 file changed, 121 insertions(+), 87 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll
index 6e495b0989c22..57d5775f7c39e 100644
--- a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll
@@ -1,13 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
 
 ;
 ; MUL with SPLAT
 ;
 define <vscale x 8 x i16> @mul_i16_imm(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: mul_i16_imm
-; CHECK: mov w[[W:[0-9]+]], #255
-; CHECK-NEXT: mov z1.h, w[[W]]
-; CHECK-NEXT: mul z0.h, z0.h, z1.h
+; CHECK-LABEL: mul_i16_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #255
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    mul z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 8 x i16> undef, i16 255, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %res = mul <vscale x 8 x i16> %a, %splat
@@ -15,10 +18,12 @@ define <vscale x 8 x i16> @mul_i16_imm(<vscale x 8 x i16> %a) {
 }
 
 define <vscale x 8 x i16> @mul_i16_imm_neg(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: mul_i16_imm_neg
-; CHECK: mov w[[W:[0-9]+]], #-200
-; CHECK-NEXT: mov z1.h, w[[W]]
-; CHECK-NEXT: mul z0.h, z0.h, z1.h
+; CHECK-LABEL: mul_i16_imm_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-200
+; CHECK-NEXT:    mov z1.h, w8
+; CHECK-NEXT:    mul z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 8 x i16> undef, i16 -200, i32 0
   %splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
   %res = mul <vscale x 8 x i16> %a, %splat
@@ -26,10 +31,12 @@ define <vscale x 8 x i16> @mul_i16_imm_neg(<vscale x 8 x i16> %a) {
 }
 
 define <vscale x 4 x i32> @mul_i32_imm(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: mul_i32_imm
-; CHECK: mov w[[W:[0-9]+]], #255
-; CHECK-NEXT: mov z1.s, w[[W]]
-; CHECK-NEXT: mul z0.s, z0.s, z1.s
+; CHECK-LABEL: mul_i32_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #255
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mul z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 4 x i32> undef, i32 255, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %res = mul <vscale x 4 x i32> %a, %splat
@@ -37,10 +44,12 @@ define <vscale x 4 x i32> @mul_i32_imm(<vscale x 4 x i32> %a) {
 }
 
 define <vscale x 4 x i32> @mul_i32_imm_neg(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: mul_i32_imm_neg
-; CHECK: mov w[[W:[0-9]+]], #-200
-; CHECK-NEXT: mov z1.s, w[[W]]
-; CHECK-NEXT: mul z0.s, z0.s, z1.s
+; CHECK-LABEL: mul_i32_imm_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #-200
+; CHECK-NEXT:    mov z1.s, w8
+; CHECK-NEXT:    mul z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 4 x i32> undef, i32 -200, i32 0
   %splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
   %res = mul <vscale x 4 x i32> %a, %splat
@@ -48,10 +57,12 @@ define <vscale x 4 x i32> @mul_i32_imm_neg(<vscale x 4 x i32> %a) {
 }
 
 define <vscale x 2 x i64> @mul_i64_imm(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: mul_i64_imm
-; CHECK: mov w[[X:[0-9]+]], #255
-; CHECK-NEXT: z1.d, x[[X]]
-; CHECK-NEXT: mul z0.d, z0.d, z1.d
+; CHECK-LABEL: mul_i64_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #255
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mul z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %res = mul <vscale x 2 x i64> %a, %splat
@@ -59,10 +70,12 @@ define <vscale x 2 x i64> @mul_i64_imm(<vscale x 2 x i64> %a) {
 }
 
 define <vscale x 2 x i64> @mul_i64_imm_neg(<vscale x 2 x i64> %a) {
-; CHECK-LABEL: mul_i64_imm_neg
-; CHECK: mov x[[X:[0-9]+]], #-200
-; CHECK-NEXT: z1.d, x[[X]]
-; CHECK-NEXT: mul z0.d, z0.d, z1.d
+; CHECK-LABEL: mul_i64_imm_neg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-200
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mul z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 2 x i64> undef, i64 -200, i32 0
   %splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %res = mul <vscale x 2 x i64> %a, %splat
@@ -73,37 +86,41 @@ define <vscale x 2 x i64> @mul_i64_imm_neg(<vscale x 2 x i64> %a) {
 ; MUL (vector, unpredicated)
 ;
 define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i8> %a,
+; CHECK-LABEL: mul_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
                                   <vscale x 16 x i8> %b) {
-; CHECK-LABEL: mul_i8
-; CHECK: mul z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
   %res = mul <vscale x 16 x i8> %a, %b
   ret <vscale x 16 x i8> %res
 }
 
 define <vscale x 8 x i16> @mul_i16(<vscale x 8 x i16> %a,
+; CHECK-LABEL: mul_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
                                   <vscale x 8 x i16> %b) {
-; CHECK-LABEL: mul_i16
-; CHECK: mul z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
   %res = mul <vscale x 8 x i16> %a, %b
   ret <vscale x 8 x i16> %res
 }
 
 define <vscale x 4 x i32> @mul_i32(<vscale x 4 x i32> %a,
+; CHECK-LABEL: mul_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
                                   <vscale x 4 x i32> %b) {
-; CHECK-LABEL: mul_i32
-; CHECK: mul z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
   %res = mul <vscale x 4 x i32> %a, %b
   ret <vscale x 4 x i32> %res
 }
 
 define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i64> %a,
+; CHECK-LABEL: mul_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mul z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
                                   <vscale x 2 x i64> %b) {
-; CHECK-LABEL: mul_i64
-; CHECK: mul z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
   %res = mul <vscale x 2 x i64> %a, %b
   ret <vscale x 2 x i64> %res
 }
@@ -112,10 +129,11 @@ define <vscale x 2 x i64> @mul_i64(<vscale x 2 x i64> %a,
 ; SMULH (vector, unpredicated)
 ;
 define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a,
+; CHECK-LABEL: smulh_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
                                     <vscale x 16 x i8> %b) {
-; CHECK-LABEL: smulh_i8
-; CHECK: smulh z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
   %sel = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> %sel, <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -123,10 +141,11 @@ define <vscale x 16 x i8> @smulh_i8(<vscale x 16 x i8> %a,
 }
 
 define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a,
+; CHECK-LABEL: smulh_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
                                      <vscale x 8 x i16> %b) {
-; CHECK-LABEL: smulh_i16
-; CHECK: smulh z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
   %sel = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> %sel, <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -134,10 +153,11 @@ define <vscale x 8 x i16> @smulh_i16(<vscale x 8 x i16> %a,
 }
 
 define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a,
+; CHECK-LABEL: smulh_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
                                      <vscale x 4 x i32> %b) {
-; CHECK-LABEL: smulh_i32
-; CHECK: smulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
   %sel = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> %sel, <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -145,10 +165,11 @@ define <vscale x 4 x i32> @smulh_i32(<vscale x 4 x i32> %a,
 }
 
 define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a,
+; CHECK-LABEL: smulh_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
                                      <vscale x 2 x i64> %b) {
-; CHECK-LABEL: smulh_i64
-; CHECK: smulh z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
   %sel = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> %sel, <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -159,10 +180,11 @@ define <vscale x 2 x i64> @smulh_i64(<vscale x 2 x i64> %a,
 ; UMULH (vector, unpredicated)
 ;
 define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a,
+; CHECK-LABEL: umulh_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
                                     <vscale x 16 x i8> %b) {
-; CHECK-LABEL: umulh_i8
-; CHECK: umulh z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
   %sel = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> %sel, <vscale x 16 x i8> %a,
                                                                  <vscale x 16 x i8> %b)
@@ -170,10 +192,11 @@ define <vscale x 16 x i8> @umulh_i8(<vscale x 16 x i8> %a,
 }
 
 define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a,
+; CHECK-LABEL: umulh_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
                                      <vscale x 8 x i16> %b) {
-; CHECK-LABEL: umulh_i16
-; CHECK: umulh z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
   %sel = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> %sel, <vscale x 8 x i16> %a,
                                                                  <vscale x 8 x i16> %b)
@@ -181,10 +204,11 @@ define <vscale x 8 x i16> @umulh_i16(<vscale x 8 x i16> %a,
 }
 
 define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a,
+; CHECK-LABEL: umulh_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
                                      <vscale x 4 x i32> %b) {
-; CHECK-LABEL: umulh_i32
-; CHECK: umulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
   %sel = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %sel, <vscale x 4 x i32> %a,
                                                                  <vscale x 4 x i32> %b)
@@ -192,10 +216,11 @@ define <vscale x 4 x i32> @umulh_i32(<vscale x 4 x i32> %a,
 }
 
 define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a,
+; CHECK-LABEL: umulh_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    umulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
                                      <vscale x 2 x i64> %b) {
-; CHECK-LABEL: umulh_i64
-; CHECK: umulh z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
   %sel = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> %sel, <vscale x 2 x i64> %a,
                                                                  <vscale x 2 x i64> %b)
@@ -206,10 +231,11 @@ define <vscale x 2 x i64> @umulh_i64(<vscale x 2 x i64> %a,
 ; PMUL (vector, unpredicated)
 ;
 define <vscale x 16 x i8> @pmul_i8(<vscale x 16 x i8> %a,
+; CHECK-LABEL: pmul_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pmul z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
                                    <vscale x 16 x i8> %b) {
-; CHECK-LABEL: pmul_i8
-; CHECK: pmul z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.pmul.nxv16i8(<vscale x 16 x i8> %a,
                                                                 <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %res
@@ -219,40 +245,44 @@ define <vscale x 16 x i8> @pmul_i8(<vscale x 16 x i8> %a,
 ; SQDMULH (vector, unpredicated)
 ;
 define <vscale x 16 x i8> @sqdmulh_i8(<vscale x 16 x i8> %a,
+; CHECK-LABEL: sqdmulh_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
                                       <vscale x 16 x i8> %b) {
-; CHECK-LABEL: sqdmulh_i8
-; CHECK: sqdmulh z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8> %a,
                                                                    <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %res
 }
 
 define <vscale x 8 x i16> @sqdmulh_i16(<vscale x 8 x i16> %a,
+; CHECK-LABEL: sqdmulh_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
                                        <vscale x 8 x i16> %b) {
-; CHECK-LABEL: sqdmulh_i16
-; CHECK: sqdmulh z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16> %a,
                                                                    <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %res
 }
 
 define <vscale x 4 x i32> @sqdmulh_i32(<vscale x 4 x i32> %a,
+; CHECK-LABEL: sqdmulh_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
                                        <vscale x 4 x i32> %b) {
-; CHECK-LABEL: sqdmulh_i32
-; CHECK: sqdmulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32> %a,
                                                                    <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %res
 }
 
 define <vscale x 2 x i64> @sqdmulh_i64(<vscale x 2 x i64> %a,
+; CHECK-LABEL: sqdmulh_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
                                        <vscale x 2 x i64> %b) {
-; CHECK-LABEL: sqdmulh_i64
-; CHECK: sqdmulh z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64> %a,
                                                                    <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %res
@@ -262,40 +292,44 @@ define <vscale x 2 x i64> @sqdmulh_i64(<vscale x 2 x i64> %a,
 ; SQRDMULH (vector, unpredicated)
 ;
 define <vscale x 16 x i8> @sqrdmulh_i8(<vscale x 16 x i8> %a,
+; CHECK-LABEL: sqrdmulh_i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.b, z0.b, z1.b
+; CHECK-NEXT:    ret
                                        <vscale x 16 x i8> %b) {
-; CHECK-LABEL: sqrdmulh_i8
-; CHECK: sqrdmulh z0.b, z0.b, z1.b
-; CHECK-NEXT: ret
   %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrdmulh.nxv16i8(<vscale x 16 x i8> %a,
                                                                     <vscale x 16 x i8> %b)
   ret <vscale x 16 x i8> %res
 }
 
 define <vscale x 8 x i16> @sqrdmulh_i16(<vscale x 8 x i16> %a,
+; CHECK-LABEL: sqrdmulh_i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.h, z0.h, z1.h
+; CHECK-NEXT:    ret
                                         <vscale x 8 x i16> %b) {
-; CHECK-LABEL: sqrdmulh_i16
-; CHECK: sqrdmulh z0.h, z0.h, z1.h
-; CHECK-NEXT: ret
   %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.nxv8i16(<vscale x 8 x i16> %a,
                                                                     <vscale x 8 x i16> %b)
   ret <vscale x 8 x i16> %res
 }
 
 define <vscale x 4 x i32> @sqrdmulh_i32(<vscale x 4 x i32> %a,
+; CHECK-LABEL: sqrdmulh_i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
                                         <vscale x 4 x i32> %b) {
-; CHECK-LABEL: sqrdmulh_i32
-; CHECK: sqrdmulh z0.s, z0.s, z1.s
-; CHECK-NEXT: ret
   %res = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.nxv4i32(<vscale x 4 x i32> %a,
                                                                     <vscale x 4 x i32> %b)
   ret <vscale x 4 x i32> %res
 }
 
 define <vscale x 2 x i64> @sqrdmulh_i64(<vscale x 2 x i64> %a,
+; CHECK-LABEL: sqrdmulh_i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqrdmulh z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
                                         <vscale x 2 x i64> %b) {
-; CHECK-LABEL: sqrdmulh_i64
-; CHECK: sqrdmulh z0.d, z0.d, z1.d
-; CHECK-NEXT: ret
   %res = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.nxv2i64(<vscale x 2 x i64> %a,
                                                                     <vscale x 2 x i64> %b)
   ret <vscale x 2 x i64> %res

From 5e5efd8a91f2e340e79a73bedbc6ab66ad4a4281 Mon Sep 17 00:00:00 2001
From: ksyx <18738953+ksyx@users.noreply.github.com>
Date: Mon, 17 Jan 2022 18:43:49 -0500
Subject: [PATCH 388/946] [clang-format] Fix SeparateDefinitionBlocks issues

- Fixes https://github.com/llvm/llvm-project/issues/53227 that wrongly
  indents multiline comments
- Fixes wrong detection of single-line opening braces when used along
  with those only opening scopes, causing crashes due to duplicated
  replacements on the same token:
    void foo()
    {
      {
        int x;
      }
    }
- Fixes wrong recognition of first line of definition when the line
  starts with block comment, causing crashes due to duplicated
  replacements on the same token for this leads toward skipping the line
  starting with inline block comment:
    /*
      Some descriptions about function
    */
    /*inline*/ void bar() {
    }
- Fixes wrong recognition of enum when used as a type name rather than
  starting definition block, causing crashes due to duplicated
  replacements on the same token since both actions for enum and for
  definition blocks were taken place:
    void foobar(const enum EnumType e) {
    }
- Change to use function keyword for JavaScript instead of comparing
  strings
- Resolves formatting conflict with options EmptyLineAfterAccessModifier
  and EmptyLineBeforeAccessModifier (prompts with --dry-run (-n) or
  --output-replacement-xml but no observable change)
- Recognize long (len>=5) uppercased name taking a single line as return
  type and fix the problem of adding newline below it, with adding new
  token type FunctionLikeOrFreestandingMacro and marking tokens in
  UnwrappedLineParser:
    void
    afunc(int x) {
      return;
    }
    TYPENAME
    func(int x, int y) {
      // ...
    }
- Remove redundant and repeated initialization
- Do no change to newlines before EOF

Reviewed By: MyDeveloperDay, curdeius, HazardyKnusperkeks
Differential Revision: https://reviews.llvm.org/D117520
---
 clang/lib/Format/DefinitionBlockSeparator.cpp |  86 ++++++++++---
 clang/lib/Format/DefinitionBlockSeparator.h   |   2 +-
 clang/lib/Format/FormatToken.h                |   1 +
 clang/lib/Format/TokenAnnotator.cpp           |   2 +-
 clang/lib/Format/UnwrappedLineParser.cpp      |   3 +
 .../Format/DefinitionBlockSeparatorTest.cpp   | 117 +++++++++++++++---
 6 files changed, 169 insertions(+), 42 deletions(-)

diff --git a/clang/lib/Format/DefinitionBlockSeparator.cpp b/clang/lib/Format/DefinitionBlockSeparator.cpp
index d09cd0bd33fbe..827564357f788 100644
--- a/clang/lib/Format/DefinitionBlockSeparator.cpp
+++ b/clang/lib/Format/DefinitionBlockSeparator.cpp
@@ -25,22 +25,27 @@ std::pair<tooling::Replacements, unsigned> DefinitionBlockSeparator::analyze(
   assert(Style.SeparateDefinitionBlocks != FormatStyle::SDS_Leave);
   AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
   tooling::Replacements Result;
-  separateBlocks(AnnotatedLines, Result);
+  separateBlocks(AnnotatedLines, Result, Tokens);
   return {Result, 0};
 }
 
 void DefinitionBlockSeparator::separateBlocks(
-    SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result) {
+    SmallVectorImpl<AnnotatedLine *> &Lines, tooling::Replacements &Result,
+    FormatTokenLexer &Tokens) {
   const bool IsNeverStyle =
       Style.SeparateDefinitionBlocks == FormatStyle::SDS_Never;
-  auto LikelyDefinition = [this](const AnnotatedLine *Line) {
+  const AdditionalKeywords &ExtraKeywords = Tokens.getKeywords();
+  auto LikelyDefinition = [this, ExtraKeywords](const AnnotatedLine *Line,
+                                                bool ExcludeEnum = false) {
     if ((Line->MightBeFunctionDecl && Line->mightBeFunctionDefinition()) ||
         Line->startsWithNamespace())
       return true;
     FormatToken *CurrentToken = Line->First;
     while (CurrentToken) {
-      if (CurrentToken->isOneOf(tok::kw_class, tok::kw_struct, tok::kw_enum) ||
-          (Style.isJavaScript() && CurrentToken->TokenText == "function"))
+      if (CurrentToken->isOneOf(tok::kw_class, tok::kw_struct) ||
+          (Style.isJavaScript() && CurrentToken->is(ExtraKeywords.kw_function)))
+        return true;
+      if (!ExcludeEnum && CurrentToken->is(tok::kw_enum))
         return true;
       CurrentToken = CurrentToken->Next;
     }
@@ -63,18 +68,25 @@ void DefinitionBlockSeparator::separateBlocks(
     AnnotatedLine *TargetLine;
     auto OpeningLineIndex = CurrentLine->MatchingOpeningBlockLineIndex;
     AnnotatedLine *OpeningLine = nullptr;
+    const auto IsAccessSpecifierToken = [](const FormatToken *Token) {
+      return Token->isAccessSpecifier() || Token->isObjCAccessSpecifier();
+    };
     const auto InsertReplacement = [&](const int NewlineToInsert) {
       assert(TargetLine);
       assert(TargetToken);
 
       // Do not handle EOF newlines.
-      if (TargetToken->is(tok::eof) && NewlineToInsert > 0)
+      if (TargetToken->is(tok::eof))
+        return;
+      if (IsAccessSpecifierToken(TargetToken) ||
+          (OpeningLineIndex > 0 &&
+           IsAccessSpecifierToken(Lines[OpeningLineIndex - 1]->First)))
         return;
       if (!TargetLine->Affected)
         return;
       Whitespaces.replaceWhitespace(*TargetToken, NewlineToInsert,
-                                    TargetToken->SpacesRequiredBefore - 1,
-                                    TargetToken->StartsColumn);
+                                    TargetToken->OriginalColumn,
+                                    TargetToken->OriginalColumn);
     };
     const auto IsPPConditional = [&](const size_t LineIndex) {
       const auto &Line = Lines[LineIndex];
@@ -89,26 +101,57 @@ void DefinitionBlockSeparator::separateBlocks(
              Lines[OpeningLineIndex - 1]->Last->opensScope() ||
              IsPPConditional(OpeningLineIndex - 1);
     };
-    const auto HasEnumOnLine = [CurrentLine]() {
+    const auto HasEnumOnLine = [&]() {
       FormatToken *CurrentToken = CurrentLine->First;
+      bool FoundEnumKeyword = false;
       while (CurrentToken) {
         if (CurrentToken->is(tok::kw_enum))
+          FoundEnumKeyword = true;
+        else if (FoundEnumKeyword && CurrentToken->is(tok::l_brace))
           return true;
         CurrentToken = CurrentToken->Next;
       }
-      return false;
+      return FoundEnumKeyword && I + 1 < Lines.size() &&
+             Lines[I + 1]->First->is(tok::l_brace);
     };
 
     bool IsDefBlock = false;
     const auto MayPrecedeDefinition = [&](const int Direction = -1) {
+      assert(Direction >= -1);
+      assert(Direction <= 1);
       const size_t OperateIndex = OpeningLineIndex + Direction;
       assert(OperateIndex < Lines.size());
       const auto &OperateLine = Lines[OperateIndex];
-      return (Style.isCSharp() && OperateLine->First->is(TT_AttributeSquare)) ||
-             OperateLine->First->is(tok::comment);
+      if (LikelyDefinition(OperateLine))
+        return false;
+
+      if (OperateLine->First->is(tok::comment))
+        return true;
+
+      // A single line identifier that is not in the last line.
+      if (OperateLine->First->is(tok::identifier) &&
+          OperateLine->First == OperateLine->Last &&
+          OperateIndex + 1 < Lines.size()) {
+        // UnwrappedLineParser's recognition of free-standing macro like
+        // Q_OBJECT may also recognize some uppercased type names that may be
+        // used as return type as that kind of macros, which is a bit hard to
+        // distinguish one from another purely from token patterns. Here, we
+        // try not to add new lines below those identifiers.
+        AnnotatedLine *NextLine = Lines[OperateIndex + 1];
+        if (NextLine->MightBeFunctionDecl &&
+            NextLine->mightBeFunctionDefinition() &&
+            NextLine->First->NewlinesBefore == 1 &&
+            OperateLine->First->is(TT_FunctionLikeOrFreestandingMacro))
+          return true;
+      }
+
+      if ((Style.isCSharp() && OperateLine->First->is(TT_AttributeSquare)))
+        return true;
+      return false;
     };
 
-    if (HasEnumOnLine()) {
+    if (HasEnumOnLine() &&
+        !LikelyDefinition(CurrentLine, /*ExcludeEnum=*/true)) {
       // We have no scope opening/closing information for enum.
       IsDefBlock = true;
       OpeningLineIndex = I;
@@ -132,8 +175,13 @@ void DefinitionBlockSeparator::separateBlocks(
     } else if (CurrentLine->First->closesScope()) {
       if (OpeningLineIndex > Lines.size())
         continue;
-      // Handling the case that opening bracket has its own line.
-      OpeningLineIndex -= Lines[OpeningLineIndex]->First->is(tok::l_brace);
+      // Handling the case that opening brace has its own line, with checking
+      // whether the last line already had an opening brace to guard against
+      // misrecognition.
+      if (OpeningLineIndex > 0 &&
+          Lines[OpeningLineIndex]->First->is(tok::l_brace) &&
+          Lines[OpeningLineIndex - 1]->Last->isNot(tok::l_brace))
+        --OpeningLineIndex;
       OpeningLine = Lines[OpeningLineIndex];
       // Closing a function definition.
       if (LikelyDefinition(OpeningLine)) {
@@ -168,15 +216,13 @@ void DefinitionBlockSeparator::separateBlocks(
           ++OpeningLineIndex;
         TargetLine = Lines[OpeningLineIndex];
         if (!LikelyDefinition(TargetLine)) {
+          OpeningLineIndex = I + 1;
           TargetLine = Lines[I + 1];
           TargetToken = TargetLine->First;
           InsertReplacement(NewlineCount);
         }
-      } else if (IsNeverStyle) {
-        TargetLine = Lines[I + 1];
-        TargetToken = TargetLine->First;
-        InsertReplacement(OpeningLineIndex != 0);
-      }
+      } else if (IsNeverStyle)
+        InsertReplacement(/*NewlineToInsert=*/1);
     }
   }
   for (const auto &R : Whitespaces.generateReplacements())
diff --git a/clang/lib/Format/DefinitionBlockSeparator.h b/clang/lib/Format/DefinitionBlockSeparator.h
index 13b90c5ab083d..31c0f34d6e198 100644
--- a/clang/lib/Format/DefinitionBlockSeparator.h
+++ b/clang/lib/Format/DefinitionBlockSeparator.h
@@ -33,7 +33,7 @@ class DefinitionBlockSeparator : public TokenAnalyzer {
 
 private:
   void separateBlocks(SmallVectorImpl<AnnotatedLine *> &Lines,
-                      tooling::Replacements &Result);
+                      tooling::Replacements &Result, FormatTokenLexer &Tokens);
 };
 } // namespace format
 } // namespace clang
diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h
index b087f9f120411..7cc090cb77def 100644
--- a/clang/lib/Format/FormatToken.h
+++ b/clang/lib/Format/FormatToken.h
@@ -51,6 +51,7 @@ namespace format {
   TYPE(FunctionAnnotationRParen)                                               \
   TYPE(FunctionDeclarationName)                                                \
   TYPE(FunctionLBrace)                                                         \
+  TYPE(FunctionLikeOrFreestandingMacro)                                        \
   TYPE(FunctionTypeLParen)                                                     \
   TYPE(IfMacro)                                                                \
   TYPE(ImplicitStringLiteral)                                                  \
diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index cc8b48387fc9e..b9535f7965598 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1423,7 +1423,7 @@ class AnnotatingParser {
             TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator,
             TT_RegexLiteral, TT_TemplateString, TT_ObjCStringLiteral,
             TT_UntouchableMacroFunc, TT_ConstraintJunctions,
-            TT_StatementAttributeLikeMacro))
+            TT_StatementAttributeLikeMacro, TT_FunctionLikeOrFreestandingMacro))
       CurrentToken->setType(TT_Unknown);
     CurrentToken->Role.reset();
     CurrentToken->MatchingParen = nullptr;
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 67b7b3937b07e..96d227b7fe763 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -1682,6 +1682,8 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
 
       // See if the following token should start a new unwrapped line.
       StringRef Text = FormatTok->TokenText;
+
+      FormatToken *PreviousToken = FormatTok;
       nextToken();
 
       // JS doesn't have macros, and within classes colons indicate fields, not
@@ -1710,6 +1712,7 @@ void UnwrappedLineParser::parseStructuralElement(IfStmtKind *IfKind,
 
         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
             tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
+          PreviousToken->setType(TT_FunctionLikeOrFreestandingMacro);
           addUnwrappedLine();
           return;
         }
diff --git a/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp
index 69c87cb4b51fd..4cbae0f55b036 100644
--- a/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp
+++ b/clang/unittests/Format/DefinitionBlockSeparatorTest.cpp
@@ -131,6 +131,73 @@ TEST_F(DefinitionBlockSeparatorTest, Basic) {
                "\n"
                "enum Bar { FOOBAR, BARFOO };\n",
                Style);
+
+  FormatStyle BreakAfterReturnTypeStyle = Style;
+  BreakAfterReturnTypeStyle.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All;
+  // Test uppercased long typename
+  verifyFormat("class Foo {\n"
+               "  void\n"
+               "  Bar(int t, int p) {\n"
+               "    int r = t + p;\n"
+               "    return r;\n"
+               "  }\n"
+               "\n"
+               "  HRESULT\n"
+               "  Foobar(int t, int p) {\n"
+               "    int r = t * p;\n"
+               "    return r;\n"
+               "  }\n"
+               "}\n",
+               BreakAfterReturnTypeStyle);
+}
+
+TEST_F(DefinitionBlockSeparatorTest, FormatConflict) {
+  FormatStyle Style = getLLVMStyle();
+  Style.SeparateDefinitionBlocks = FormatStyle::SDS_Always;
+  llvm::StringRef Code = "class Test {\n"
+                         "public:\n"
+                         "  static void foo() {\n"
+                         "    int t;\n"
+                         "    return 1;\n"
+                         "  }\n"
+                         "};";
+  std::vector<tooling::Range> Ranges = {1, tooling::Range(0, Code.size())};
+  EXPECT_EQ(reformat(Style, Code, Ranges, "<stdin>").size(), 0u);
+}
+
+TEST_F(DefinitionBlockSeparatorTest, CommentBlock) {
+  FormatStyle Style = getLLVMStyle();
+  Style.SeparateDefinitionBlocks = FormatStyle::SDS_Always;
+  std::string Prefix = "enum Foo { FOO, BAR };\n"
+                       "\n"
+                       "/*\n"
+                       "test1\n"
+                       "test2\n"
+                       "*/\n"
+                       "int foo(int i, int j) {\n"
+                       "  int r = i + j;\n"
+                       "  return r;\n"
+                       "}\n";
+  std::string Suffix = "enum Bar { FOOBAR, BARFOO };\n"
+                       "\n"
+                       "/* Comment block in one line*/\n"
+                       "int bar3(int j, int k) {\n"
+                       "  // A comment\n"
+                       "  int r = j % k;\n"
+                       "  return r;\n"
+                       "}\n";
+  std::string CommentedCode = "/*\n"
+                              "int bar2(int j, int k) {\n"
+                              "  int r = j / k;\n"
+                              "  return r;\n"
+                              "}\n"
+                              "*/\n";
+  verifyFormat(removeEmptyLines(Prefix) + "\n" + CommentedCode + "\n" +
+                   removeEmptyLines(Suffix),
+               Style, Prefix + "\n" + CommentedCode + "\n" + Suffix);
+  verifyFormat(removeEmptyLines(Prefix) + "\n" + CommentedCode +
+                   removeEmptyLines(Suffix),
+               Style, Prefix + "\n" + CommentedCode + Suffix);
 }
 
 TEST_F(DefinitionBlockSeparatorTest, UntouchBlockStartStyle) {
@@ -175,13 +242,15 @@ TEST_F(DefinitionBlockSeparatorTest, UntouchBlockStartStyle) {
   FormatStyle NeverStyle = getLLVMStyle();
   NeverStyle.SeparateDefinitionBlocks = FormatStyle::SDS_Never;
 
-  auto TestKit = MakeUntouchTest("#ifdef FOO\n\n", "\n#elifndef BAR\n\n",
-                                 "\n#endif\n\n", false);
+  auto TestKit = MakeUntouchTest("/* FOOBAR */\n"
+                                 "#ifdef FOO\n\n",
+                                 "\n#elifndef BAR\n\n", "\n#endif\n\n", false);
   verifyFormat(TestKit.first, AlwaysStyle, TestKit.second);
   verifyFormat(TestKit.second, NeverStyle, removeEmptyLines(TestKit.second));
 
-  TestKit =
-      MakeUntouchTest("#ifdef FOO\n", "#elifndef BAR\n", "#endif\n", false);
+  TestKit = MakeUntouchTest("/* FOOBAR */\n"
+                            "#ifdef FOO\n",
+                            "#elifndef BAR\n", "#endif\n", false);
   verifyFormat(TestKit.first, AlwaysStyle, TestKit.second);
   verifyFormat(TestKit.second, NeverStyle, removeEmptyLines(TestKit.second));
 
@@ -213,7 +282,7 @@ TEST_F(DefinitionBlockSeparatorTest, Always) {
                       "test1\n"
                       "test2\n"
                       "*/\n"
-                      "int foo(int i, int j) {\n"
+                      "/*const*/ int foo(int i, int j) {\n"
                       "  int r = i + j;\n"
                       "  return r;\n"
                       "}\n"
@@ -225,8 +294,10 @@ TEST_F(DefinitionBlockSeparatorTest, Always) {
                       "// Comment line 2\n"
                       "// Comment line 3\n"
                       "int bar(int j, int k) {\n"
-                      "  int r = j * k;\n"
-                      "  return r;\n"
+                      "  {\n"
+                      "    int r = j * k;\n"
+                      "    return r;\n"
+                      "  }\n"
                       "}\n"
                       "\n"
                       "int bar2(int j, int k) {\n"
@@ -237,7 +308,7 @@ TEST_F(DefinitionBlockSeparatorTest, Always) {
                       "/* Comment block in one line*/\n"
                       "enum Bar { FOOBAR, BARFOO };\n"
                       "\n"
-                      "int bar3(int j, int k) {\n"
+                      "int bar3(int j, int k, const enum Bar b) {\n"
                       "  // A comment\n"
                       "  int r = j % k;\n"
                       "  return r;\n"
@@ -264,7 +335,7 @@ TEST_F(DefinitionBlockSeparatorTest, Never) {
                         "test1\n"
                         "test2\n"
                         "*/\n"
-                        "int foo(int i, int j) {\n"
+                        "/*const*/ int foo(int i, int j) {\n"
                         "  int r = i + j;\n"
                         "  return r;\n"
                         "}\n"
@@ -276,8 +347,10 @@ TEST_F(DefinitionBlockSeparatorTest, Never) {
                         "// Comment line 2\n"
                         "// Comment line 3\n"
                         "int bar(int j, int k) {\n"
-                        "  int r = j * k;\n"
-                        "  return r;\n"
+                        "  {\n"
+                        "    int r = j * k;\n"
+                        "    return r;\n"
+                        "  }\n"
                         "}\n"
                         "\n"
                         "int bar2(int j, int k) {\n"
@@ -288,7 +361,7 @@ TEST_F(DefinitionBlockSeparatorTest, Never) {
                         "/* Comment block in one line*/\n"
                         "enum Bar { FOOBAR, BARFOO };\n"
                         "\n"
-                        "int bar3(int j, int k) {\n"
+                        "int bar3(int j, int k, const enum Bar b) {\n"
                         "  // A comment\n"
                         "  int r = j % k;\n"
                         "  return r;\n"
@@ -316,7 +389,7 @@ TEST_F(DefinitionBlockSeparatorTest, OpeningBracketOwnsLine) {
                "test1\n"
                "test2\n"
                "*/\n"
-               "int foo(int i, int j)\n"
+               "/*const*/ int foo(int i, int j)\n"
                "{\n"
                "  int r = i + j;\n"
                "  return r;\n"
@@ -330,8 +403,10 @@ TEST_F(DefinitionBlockSeparatorTest, OpeningBracketOwnsLine) {
                "// Comment line 3\n"
                "int bar(int j, int k)\n"
                "{\n"
-               "  int r = j * k;\n"
-               "  return r;\n"
+               "  {\n"
+               "    int r = j * k;\n"
+               "    return r;\n"
+               "  }\n"
                "}\n"
                "\n"
                "int bar2(int j, int k)\n"
@@ -346,7 +421,7 @@ TEST_F(DefinitionBlockSeparatorTest, OpeningBracketOwnsLine) {
                "  BARFOO\n"
                "};\n"
                "\n"
-               "int bar3(int j, int k)\n"
+               "int bar3(int j, int k, const enum Bar b)\n"
                "{\n"
                "  // A comment\n"
                "  int r = j % k;\n"
@@ -370,7 +445,7 @@ TEST_F(DefinitionBlockSeparatorTest, Leave) {
                         "test1\n"
                         "test2\n"
                         "*/\n"
-                        "int foo(int i, int j) {\n"
+                        "/*const*/ int foo(int i, int j) {\n"
                         "  int r = i + j;\n"
                         "  return r;\n"
                         "}\n"
@@ -382,8 +457,10 @@ TEST_F(DefinitionBlockSeparatorTest, Leave) {
                         "// Comment line 2\n"
                         "// Comment line 3\n"
                         "int bar(int j, int k) {\n"
-                        "  int r = j * k;\n"
-                        "  return r;\n"
+                        "  {\n"
+                        "    int r = j * k;\n"
+                        "    return r;\n"
+                        "  }\n"
                         "}\n"
                         "\n"
                         "int bar2(int j, int k) {\n"
@@ -393,7 +470,7 @@ TEST_F(DefinitionBlockSeparatorTest, Leave) {
                         "\n"
                         "// Comment for inline enum\n"
                         "enum Bar { FOOBAR, BARFOO };\n"
-                        "int bar3(int j, int k) {\n"
+                        "int bar3(int j, int k, const enum Bar b) {\n"
                         "  // A comment\n"
                         "  int r = j % k;\n"
                         "  return r;\n"

From 7a5b0a2934f3b82ae93f03d1e7603371fe5c42d1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 14 Jan 2022 21:02:15 -0500
Subject: [PATCH 389/946] Reapply "IR: Make getRetAlign check callee function
 attributes"

Reapply 3d2d208f6a0a421b23937c39b9d371183a5913a3, reverted in
a97e20a3a8a58be751f023e610758310d5664562
---
 llvm/include/llvm/IR/InstrTypes.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index 179aa579fa96b..b3d2a2c8ed9dd 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1723,7 +1723,13 @@ class CallBase : public Instruction {
   }
 
   /// Extract the alignment of the return value.
-  MaybeAlign getRetAlign() const { return Attrs.getRetAlignment(); }
+  MaybeAlign getRetAlign() const {
+    if (auto Align = Attrs.getRetAlignment())
+      return Align;
+    if (const Function *F = getCalledFunction())
+      return F->getAttributes().getRetAlignment();
+    return None;
+  }
 
   /// Extract the alignment for a call or parameter (0=unknown).
   MaybeAlign getParamAlign(unsigned ArgNo) const {

From 99e8e17313e76c50a0d6606394fed98832fd8fec Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 14 Jan 2022 18:18:26 -0500
Subject: [PATCH 390/946] Reapply "Revert "GlobalISel: Add G_ASSERT_ALIGN hint
 instruction"

This reverts commit a97e20a3a8a58be751f023e610758310d5664562.
---
 .../CodeGen/GlobalISel/MachineIRBuilder.h     |  25 ++-
 llvm/include/llvm/Support/TargetOpcodes.def   |   3 +-
 llvm/include/llvm/Target/GenericOpcodes.td    |   7 +
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp  |  27 ++-
 .../lib/CodeGen/GlobalISel/GISelKnownBits.cpp |  17 ++
 .../CodeGen/GlobalISel/MachineIRBuilder.cpp   |  12 -
 llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp |   3 +-
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   2 +
 .../AArch64/GISel/AArch64CallLowering.cpp     |   1 +
 llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp |   1 +
 .../AArch64/GlobalISel/assert-align.ll        |  28 +++
 .../GlobalISel/regbank-assert-align.mir       |  30 +++
 .../CodeGen/AMDGPU/GlobalISel/assert-align.ll |  55 +++++
 .../GlobalISel/irtranslator-assert-align.ll   | 208 ++++++++++++++++++
 .../GlobalISel/regbankselect-assert-align.mir |  62 ++++++
 .../CodeGen/GlobalISel/KnownBitsTest.cpp      |  55 +++++
 16 files changed, 518 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/assert-align.ll
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-align.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-align.mir

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index fde0cb3cf1af5..c4c2fc076dd8e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -836,17 +836,38 @@ class MachineIRBuilder {
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op);
 
+
+  /// Build and insert G_ASSERT_SEXT, G_ASSERT_ZEXT, or G_ASSERT_ALIGN
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAssertOp(unsigned Opc, const DstOp &Res, const SrcOp &Op,
+				    unsigned Val) {
+    return buildInstr(Opc, Res, Op).addImm(Val);
+  }
+
   /// Build and insert \p Res = G_ASSERT_ZEXT Op, Size
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op,
-                                      unsigned Size);
+                                      unsigned Size) {
+    return buildAssertOp(TargetOpcode::G_ASSERT_ZEXT, Res, Op, Size);
+  }
 
   /// Build and insert \p Res = G_ASSERT_SEXT Op, Size
   ///
   /// \return a MachineInstrBuilder for the newly created instruction.
   MachineInstrBuilder buildAssertSExt(const DstOp &Res, const SrcOp &Op,
-                                      unsigned Size);
+                                      unsigned Size) {
+    return buildAssertOp(TargetOpcode::G_ASSERT_SEXT, Res, Op, Size);
+  }
+
+  /// Build and insert \p Res = G_ASSERT_ALIGN Op, AlignVal
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAssertAlign(const DstOp &Res, const SrcOp &Op,
+				       Align AlignVal) {
+    return buildAssertOp(TargetOpcode::G_ASSERT_ALIGN, Res, Op, AlignVal.value());
+  }
 
   /// Build and insert `Res = G_LOAD Addr, MMO`.
   ///
diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index b34b885ddc357..428cbb44705d8 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -228,10 +228,11 @@ HANDLE_TARGET_OPCODE(ICALL_BRANCH_FUNNEL)
 /// generate code. These instructions only act as optimization hints.
 HANDLE_TARGET_OPCODE(G_ASSERT_SEXT)
 HANDLE_TARGET_OPCODE(G_ASSERT_ZEXT)
+HANDLE_TARGET_OPCODE(G_ASSERT_ALIGN)
 HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPTIMIZATION_HINT_START,
                             G_ASSERT_SEXT)
 HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPTIMIZATION_HINT_END,
-                            G_ASSERT_ZEXT)
+                            G_ASSERT_ALIGN)
 
 /// Generic ADD instruction. This is an integer add.
 HANDLE_TARGET_OPCODE(G_ADD)
diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td
index 72c974834a2fa..2af20ab6a53f5 100644
--- a/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/llvm/include/llvm/Target/GenericOpcodes.td
@@ -1434,3 +1434,10 @@ def G_ASSERT_SEXT : GenericInstruction {
   let InOperandList = (ins type0:$src, untyped_imm_0:$sz);
   let hasSideEffects = false;
 }
+
+// Asserts that a value has at least the given alignment.
+def G_ASSERT_ALIGN : GenericInstruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, untyped_imm_0:$align);
+  let hasSideEffects = false;
+}
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 486eff4dc7100..1ec7868f22345 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -86,6 +86,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
   CallLoweringInfo Info;
   const DataLayout &DL = MIRBuilder.getDataLayout();
   MachineFunction &MF = MIRBuilder.getMF();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
   bool CanBeTailCalled = CB.isTailCall() &&
                          isInTailCallPosition(CB, MF.getTarget()) &&
                          (MF.getFunction()
@@ -109,6 +110,7 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
     CanBeTailCalled = false;
   }
 
+
   // First step is to marshall all the function's parameters into the correct
   // physregs and memory locations. Gather the sequence of argument types that
   // we'll pass to the assigner function.
@@ -136,10 +138,23 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
   else
     Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
 
+  Register ReturnHintAlignReg;
+  Align ReturnHintAlign;
+
   Info.OrigRet = ArgInfo{ResRegs, RetTy, 0, ISD::ArgFlagsTy{}};
-  if (!Info.OrigRet.Ty->isVoidTy())
+
+  if (!Info.OrigRet.Ty->isVoidTy()) {
     setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CB);
 
+    if (MaybeAlign Alignment = CB.getRetAlign()) {
+      if (*Alignment > Align(1)) {
+        ReturnHintAlignReg = MRI.cloneVirtualRegister(ResRegs[0]);
+        Info.OrigRet.Regs[0] = ReturnHintAlignReg;
+        ReturnHintAlign = *Alignment;
+      }
+    }
+  }
+
   Info.CB = &CB;
   Info.KnownCallees = CB.getMetadata(LLVMContext::MD_callees);
   Info.CallConv = CallConv;
@@ -147,7 +162,15 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
   Info.IsMustTailCall = CB.isMustTailCall();
   Info.IsTailCall = CanBeTailCalled;
   Info.IsVarArg = IsVarArg;
-  return lowerCall(MIRBuilder, Info);
+  if (!lowerCall(MIRBuilder, Info))
+    return false;
+
+  if (ReturnHintAlignReg && !Info.IsTailCall) {
+    MIRBuilder.buildAssertAlign(ResRegs[0], ReturnHintAlignReg,
+                                ReturnHintAlign);
+  }
+
+  return true;
 }
 
 template <typename FuncInfoTy>
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index 306af808659a9..64c2f0d5f8e49 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -37,6 +37,11 @@ Align GISelKnownBits::computeKnownAlignment(Register R, unsigned Depth) {
   switch (MI->getOpcode()) {
   case TargetOpcode::COPY:
     return computeKnownAlignment(MI->getOperand(1).getReg(), Depth);
+  case TargetOpcode::G_ASSERT_ALIGN: {
+    // TODO: Min with source
+    int64_t LogAlign = MI->getOperand(2).getImm();
+    return Align(1ull << LogAlign);
+  }
   case TargetOpcode::G_FRAME_INDEX: {
     int FrameIdx = MI->getOperand(1).getIndex();
     return MF.getFrameInfo().getObjectAlign(FrameIdx);
@@ -466,6 +471,18 @@ void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
       Known.Zero.setBitsFrom(SrcBitWidth);
     break;
   }
+  case TargetOpcode::G_ASSERT_ALIGN: {
+    int64_t LogOfAlign = MI.getOperand(2).getImm();
+    if (LogOfAlign == 0)
+      break;
+
+    // TODO: Should use maximum with source
+    // If a node is guaranteed to be aligned, set low zero bits accordingly as
+    // well as clearing one bits.
+    Known.Zero.setLowBits(LogOfAlign);
+    Known.One.clearLowBits(LogOfAlign);
+    break;
+  }
   case TargetOpcode::G_MERGE_VALUES: {
     unsigned NumOps = MI.getNumOperands();
     unsigned OpSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 391251886fbb2..c6720568b362b 100644
--- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -282,18 +282,6 @@ MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
   return buildInstr(TargetOpcode::COPY, Res, Op);
 }
 
-MachineInstrBuilder MachineIRBuilder::buildAssertSExt(const DstOp &Res,
-                                                      const SrcOp &Op,
-                                                      unsigned Size) {
-  return buildInstr(TargetOpcode::G_ASSERT_SEXT, Res, Op).addImm(Size);
-}
-
-MachineInstrBuilder MachineIRBuilder::buildAssertZExt(const DstOp &Res,
-                                                      const SrcOp &Op,
-                                                      unsigned Size) {
-  return buildInstr(TargetOpcode::G_ASSERT_ZEXT, Res, Op).addImm(Size);
-}
-
 MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
                                                     const ConstantInt &Val) {
   LLT Ty = Res.getLLTTy(*getMRI());
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 8d2677ea67e0e..01af6bb51bb79 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -626,7 +626,8 @@ bool RegBankSelect::assignInstr(MachineInstr &MI) {
   unsigned Opc = MI.getOpcode();
   if (isPreISelGenericOptimizationHint(Opc)) {
     assert((Opc == TargetOpcode::G_ASSERT_ZEXT ||
-            Opc == TargetOpcode::G_ASSERT_SEXT) &&
+            Opc == TargetOpcode::G_ASSERT_SEXT ||
+            Opc == TargetOpcode::G_ASSERT_ALIGN) &&
            "Unexpected hint opcode!");
     // The only correct mapping for these is to always use the source register
     // bank.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 7ca6f9aa4cf0a..447bb326e6511 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3381,6 +3381,8 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
   case ISD::AssertAlign: {
     unsigned LogOfAlign = Log2(cast<AssertAlignSDNode>(Op)->getAlign());
     assert(LogOfAlign != 0);
+
+    // TODO: Should use maximum with source
     // If a node is guaranteed to be aligned, set low zero bits accordingly as
     // well as clearing one bits.
     Known.Zero.setLowBits(LogOfAlign);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 677e7a6684d56..097b93e4fccae 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -1112,6 +1112,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
     return false;
   }
 
+  Info.IsTailCall = CanTailCallOpt;
   if (CanTailCallOpt)
     return lowerTailCall(MIRBuilder, Info, OutArgs);
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index c6aa697f94d45..76eebb327010e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -1338,6 +1338,7 @@ bool AMDGPUCallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
     return false;
   }
 
+  Info.IsTailCall = CanTailCallOpt;
   if (CanTailCallOpt)
     return lowerTailCall(MIRBuilder, Info, OutArgs);
 
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/assert-align.ll b/llvm/test/CodeGen/AArch64/GlobalISel/assert-align.ll
new file mode 100644
index 0000000000000..f9ce504c3d68b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/assert-align.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s
+
+declare i8* @foo()
+
+define void @call_assert_align() {
+; CHECK-LABEL: call_assert_align:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    bl foo
+; CHECK-NEXT:    strb wzr, [x0]
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  %ptr = call align 8 i8* @foo()
+  store i8 0, i8* %ptr
+  ret void
+}
+
+define i8* @tailcall_assert_align() {
+; CHECK-LABEL: tailcall_assert_align:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    b foo
+entry:
+  %call = tail call align 4 i8* @foo()
+  ret i8* %call
+}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-align.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-align.mir
new file mode 100644
index 0000000000000..618a4532b2362
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-assert-align.mir
@@ -0,0 +1,30 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Verify register banks for G_ASSERT_ALIGN.
+#
+
+---
+name:            gpr
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+
+    ; G_ASSERT_ALIGN should end up on a GPR.
+
+    ; CHECK-LABEL: name: gpr
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:gpr(p0) = COPY $x0
+    ; CHECK-NEXT: %copy_assert_align:gpr(p0) = G_ASSERT_ALIGN %copy, 4
+    ; CHECK-NEXT: $x1 = COPY %copy_assert_align(p0)
+    ; CHECK-NEXT: RET_ReallyLR implicit $x1
+    %copy:_(p0) = COPY $x0
+    %copy_assert_align:_(p0) = G_ASSERT_ALIGN %copy(p0), 4
+    $x1 = COPY %copy_assert_align
+    RET_ReallyLR implicit $x1
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll
new file mode 100644
index 0000000000000..225dec6d39d72
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/assert-align.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - %s | FileCheck %s
+
+declare hidden i32 addrspace(1)* @ext(i8 addrspace(1)*)
+
+define i32 addrspace(1)* @call_assert_align() {
+; CHECK-LABEL: call_assert_align:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_or_saveexec_b64 s[16:17], -1
+; CHECK-NEXT:    buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
+; CHECK-NEXT:    s_mov_b64 exec, s[16:17]
+; CHECK-NEXT:    v_writelane_b32 v40, s33, 2
+; CHECK-NEXT:    s_mov_b32 s33, s32
+; CHECK-NEXT:    s_addk_i32 s32, 0x400
+; CHECK-NEXT:    v_writelane_b32 v40, s30, 0
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    v_writelane_b32 v40, s31, 1
+; CHECK-NEXT:    s_getpc_b64 s[16:17]
+; CHECK-NEXT:    s_add_u32 s16, s16, ext@rel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s17, s17, ext@rel32@hi+12
+; CHECK-NEXT:    s_swappc_b64 s[30:31], s[16:17]
+; CHECK-NEXT:    v_mov_b32_e32 v2, 0
+; CHECK-NEXT:    v_readlane_b32 s4, v40, 0
+; CHECK-NEXT:    global_store_dword v[0:1], v2, off
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_readlane_b32 s5, v40, 1
+; CHECK-NEXT:    s_addk_i32 s32, 0xfc00
+; CHECK-NEXT:    v_readlane_b32 s33, v40, 2
+; CHECK-NEXT:    s_or_saveexec_b64 s[6:7], -1
+; CHECK-NEXT:    buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
+; CHECK-NEXT:    s_mov_b64 exec, s[6:7]
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    s_setpc_b64 s[4:5]
+entry:
+  %call = call align 4 i32 addrspace(1)* @ext(i8 addrspace(1)* null)
+  store volatile i32 0, i32 addrspace(1)* %call
+  ret i32 addrspace(1)* %call
+}
+
+define i32 addrspace(1)* @tail_call_assert_align() {
+; CHECK-LABEL: tail_call_assert_align:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    s_getpc_b64 s[16:17]
+; CHECK-NEXT:    s_add_u32 s16, s16, ext@rel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s17, s17, ext@rel32@hi+12
+; CHECK-NEXT:    s_setpc_b64 s[16:17]
+entry:
+  %call = tail call align 4 i32 addrspace(1)* @ext(i8 addrspace(1)* null)
+  ret i32 addrspace(1)* %call
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
new file mode 100644
index 0000000000000..7124c8c700481
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-assert-align.ll
@@ -0,0 +1,208 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=fiji -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs -o - %s | FileCheck %s
+
+; TODO: Could potentially insert it here
+define void @arg_align_8(i8 addrspace(1)* align 8 %arg0) {
+  ; CHECK-LABEL: name: arg_align_8
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+  ; CHECK-NEXT:   G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.arg0, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY3]]
+  store i8 0, i8 addrspace(1)* %arg0, align 8
+  ret void
+}
+
+declare i8 addrspace(1)* @returns_ptr()
+declare align 8 i8 addrspace(1)* @returns_ptr_align8()
+
+define void @call_result_align_1() {
+  ; CHECK-LABEL: name: call_result_align_1
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   G_STORE [[C]](s8), [[MV]](p1) :: (store (s8) into %ir.ptr, addrspace 1)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY20]]
+  %ptr = call align 1 i8 addrspace(1)* @returns_ptr()
+  store i8 0, i8 addrspace(1)* %ptr, align 1
+  ret void
+}
+
+define void @call_result_align_8() {
+  ; CHECK-LABEL: name: call_result_align_8
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8
+  ; CHECK-NEXT:   G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY20]]
+  %ptr = call align 8 i8 addrspace(1)* @returns_ptr()
+  store i8 0, i8 addrspace(1)* %ptr, align 8
+  ret void
+}
+
+define void @declaration_result_align_8() {
+  ; CHECK-LABEL: name: declaration_result_align_8
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 0
+  ; CHECK-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @returns_ptr_align8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   $sgpr30_sgpr31 = G_SI_CALL [[GV]](p0), @returns_ptr_align8, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31, implicit-def $vgpr0, implicit-def $vgpr1
+  ; CHECK-NEXT:   [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $scc
+  ; CHECK-NEXT:   [[ASSERT_ALIGN:%[0-9]+]]:_(p1) = G_ASSERT_ALIGN [[MV]], 8
+  ; CHECK-NEXT:   G_STORE [[C]](s8), [[ASSERT_ALIGN]](p1) :: (store (s8) into %ir.ptr, align 8, addrspace 1)
+  ; CHECK-NEXT:   [[COPY20:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY20]]
+  %ptr = call i8 addrspace(1)* @returns_ptr_align8()
+  store i8 0, i8 addrspace(1)* %ptr, align 8
+  ret void
+}
+
+define i8 addrspace(1)* @tail_call_assert_align() {
+  ; CHECK-LABEL: name: tail_call_assert_align
+  ; CHECK: bb.1.entry:
+  ; CHECK-NEXT:   liveins: $sgpr12, $sgpr13, $sgpr14, $vgpr31, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY $vgpr31
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr14
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr13
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr12
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr10_sgpr11
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:sgpr_64 = COPY $sgpr8_sgpr9
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:sgpr_64 = COPY $sgpr6_sgpr7
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:sgpr_64 = COPY $sgpr4_sgpr5
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[GV:%[0-9]+]]:sreg_64(p0) = G_GLOBAL_VALUE @returns_ptr_align8
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]]
+  ; CHECK-NEXT:   [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY6]]
+  ; CHECK-NEXT:   [[COPY11:%[0-9]+]]:_(p4) = COPY [[COPY5]]
+  ; CHECK-NEXT:   [[COPY12:%[0-9]+]]:_(s64) = COPY [[COPY4]]
+  ; CHECK-NEXT:   [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY3]]
+  ; CHECK-NEXT:   [[COPY14:%[0-9]+]]:_(s32) = COPY [[COPY2]]
+  ; CHECK-NEXT:   [[COPY15:%[0-9]+]]:_(s32) = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY16:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+  ; CHECK-NEXT:   [[COPY17:%[0-9]+]]:_(<4 x s32>) = COPY $sgpr0_sgpr1_sgpr2_sgpr3
+  ; CHECK-NEXT:   $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY17]](<4 x s32>)
+  ; CHECK-NEXT:   $sgpr4_sgpr5 = COPY [[COPY9]](p4)
+  ; CHECK-NEXT:   $sgpr6_sgpr7 = COPY [[COPY10]](p4)
+  ; CHECK-NEXT:   $sgpr8_sgpr9 = COPY [[COPY11]](p4)
+  ; CHECK-NEXT:   $sgpr10_sgpr11 = COPY [[COPY12]](s64)
+  ; CHECK-NEXT:   $sgpr12 = COPY [[COPY13]](s32)
+  ; CHECK-NEXT:   $sgpr13 = COPY [[COPY14]](s32)
+  ; CHECK-NEXT:   $sgpr14 = COPY [[COPY15]](s32)
+  ; CHECK-NEXT:   $vgpr31 = COPY [[COPY16]](s32)
+  ; CHECK-NEXT:   SI_TCRETURN [[GV]](p0), @returns_ptr_align8, 0, csr_amdgpu_highregs, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $vgpr31
+entry:
+  %call = tail call i8 addrspace(1)* @returns_ptr_align8()
+  ret i8 addrspace(1)* %call
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-align.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-align.mir
new file mode 100644
index 0000000000000..783a1e9a6797e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-assert-align.mir
@@ -0,0 +1,62 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=regbankselect %s -verify-machineinstrs -o - | FileCheck %s
+
+---
+name:            assert_align_vgpr
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: assert_align_vgpr
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: %assert_align:vgpr(p1) = G_ASSERT_ALIGN %copy, 4
+    ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_align(p1)
+    %copy:_(p1) = COPY $vgpr0_vgpr1
+    %assert_align:_(p1) = G_ASSERT_ALIGN %copy, 4
+    S_ENDPGM 0, implicit %assert_align
+...
+
+---
+name:            assert_align_sgpr
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr8_sgpr9
+
+    ; CHECK-LABEL: name: assert_align_sgpr
+    ; CHECK: liveins: $sgpr8_sgpr9
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:sgpr(p1) = COPY $sgpr8_sgpr9
+    ; CHECK-NEXT: %assert_align:sgpr(p1) = G_ASSERT_ALIGN %copy, 4
+    ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_align(p1)
+    %copy:_(p1) = COPY $sgpr8_sgpr9
+    %assert_align:_(p1) = G_ASSERT_ALIGN %copy, 4
+    S_ENDPGM 0, implicit %assert_align
+...
+
+---
+name:            assert_align_agpr
+alignment:       4
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $agpr0_agpr1
+
+    ; CHECK-LABEL: name: assert_align_agpr
+    ; CHECK: liveins: $agpr0_agpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:agpr(p1) = COPY $agpr0_agpr1
+    ; CHECK-NEXT: %assert_align:agpr(p1) = G_ASSERT_ALIGN %copy, 4
+    ; CHECK-NEXT: S_ENDPGM 0, implicit %assert_align(p1)
+    %copy:_(p1) = COPY $agpr0_agpr1
+    %assert_align:_(p1) = G_ASSERT_ALIGN %copy, 4
+    S_ENDPGM 0, implicit %assert_align
+...
diff --git a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp
index f5594d016305e..7a2fc0ff1d51d 100644
--- a/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/KnownBitsTest.cpp
@@ -1917,3 +1917,58 @@ TEST_F(AMDGPUGISelMITest, TestNumSignBitsSBFX) {
   EXPECT_EQ(1u, Info.computeNumSignBits(CopyUnkValBfxReg));
   EXPECT_EQ(1u, Info.computeNumSignBits(CopyUnkOffBfxReg));
 }
+
+TEST_F(AMDGPUGISelMITest, TestKnownBitsAssertAlign) {
+  StringRef MIRString = R"MIR(
+   %val:_(s64) = COPY $vgpr0_vgpr1
+   %ptrval:_(p1) = COPY $vgpr0_vgpr1
+
+   %assert_align0:_(s64) = G_ASSERT_ALIGN %val, 0
+   %copy_assert_align0:_(s64) = COPY %assert_align0
+
+   %assert_align1:_(s64) = G_ASSERT_ALIGN %val, 1
+   %copy_assert_align1:_(s64) = COPY %assert_align1
+
+   %assert_align2:_(s64) = G_ASSERT_ALIGN %val, 2
+   %copy_assert_align2:_(s64) = COPY %assert_align2
+
+   %assert_align3:_(s64) = G_ASSERT_ALIGN %val, 3
+   %copy_assert_align3:_(s64) = COPY %assert_align3
+
+   %assert_align8:_(s64) = G_ASSERT_ALIGN %val, 8
+   %copy_assert_align8:_(s64) = COPY %assert_align8
+
+   %assert_maxalign:_(s64) = G_ASSERT_ALIGN %val, 30
+   %copy_assert_maxalign:_(s64) = COPY %assert_maxalign
+
+   %assert_ptr_align5:_(p1) = G_ASSERT_ALIGN %ptrval, 5
+   %copy_assert_ptr_align5:_(p1) = COPY %assert_ptr_align5
+)MIR";
+  setUp(MIRString);
+  if (!TM)
+    return;
+  GISelKnownBits Info(*MF);
+
+  KnownBits Res;
+  auto GetKB = [&](unsigned Idx) {
+    Register CopyReg = Copies[Idx];
+    auto *Copy = MRI->getVRegDef(CopyReg);
+    return Info.getKnownBits(Copy->getOperand(1).getReg());
+  };
+
+  auto CheckBits = [&](unsigned NumBits, unsigned Idx) {
+    Res = GetKB(Idx);
+    EXPECT_EQ(64u, Res.getBitWidth());
+    EXPECT_EQ(NumBits, Res.Zero.countTrailingOnes());
+    EXPECT_EQ(64u, Res.One.countTrailingZeros());
+    EXPECT_EQ(Align(1ull << NumBits), Info.computeKnownAlignment(Copies[Idx]));
+  };
+
+  CheckBits(0, Copies.size() - 7);
+  CheckBits(1, Copies.size() - 6);
+  CheckBits(2, Copies.size() - 5);
+  CheckBits(3, Copies.size() - 4);
+  CheckBits(8, Copies.size() - 3);
+  CheckBits(30, Copies.size() - 2);
+  CheckBits(5, Copies.size() - 1);
+}

From 354b2c36ee46a60f316e816d31737559a0694515 Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Date: Mon, 10 Jan 2022 11:32:08 +0100
Subject: [PATCH 391/946] Pre-commit test cases for (sra (load)) -> (sextload)
 folds. NFC

Add test case to show missing folds for (sra (load)) -> (sextload).

Differential Revision: https://reviews.llvm.org/D116929
---
 llvm/test/CodeGen/X86/combine-sra-load.ll | 107 ++++++++++++++++++++++
 1 file changed, 107 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/combine-sra-load.ll

diff --git a/llvm/test/CodeGen/X86/combine-sra-load.ll b/llvm/test/CodeGen/X86/combine-sra-load.ll
new file mode 100644
index 0000000000000..119acaa6a02b5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/combine-sra-load.ll
@@ -0,0 +1,107 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK
+
+; FIXME: fold (sra (load i32), 16)) -> (sextload i16)
+define i32 @sra_half(i32* %p) {
+; CHECK-LABEL: sra_half:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    sarl $16, %eax
+; CHECK-NEXT:    retq
+  %load = load i32, i32* %p
+  %shift = ashr i32 %load, 16
+  ret i32 %shift
+}
+
+; Vector version not folded.
+define <4 x i32> @sra_half_vec(<4 x i32>* %p) {
+; CHECK-LABEL: sra_half_vec:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movdqa (%rdi), %xmm0
+; CHECK-NEXT:    psrad $16, %xmm0
+; CHECK-NEXT:    retq
+  %load = load <4 x i32>, <4 x i32>* %p
+  %shift = ashr <4 x i32> %load, <i32 16, i32 16, i32 16, i32 16>
+  ret <4 x i32> %shift
+}
+
+; FIXME: fold (sra (load i64), 48)) -> (sextload i16)
+define i64 @sra_large_shift(i64* %r) {
+; CHECK-LABEL: sra_large_shift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    sarq $48, %rax
+; CHECK-NEXT:    retq
+  %t0 = load i64, i64* %r
+  %conv = ashr i64 %t0, 48
+  ret i64 %conv
+}
+
+; Negative test, no fold expected.
+define i32 @sra_small_shift(i32* %p) {
+; CHECK-LABEL: sra_small_shift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    sarl $8, %eax
+; CHECK-NEXT:    retq
+  %load = load i32, i32* %p
+  %shift = ashr i32 %load, 8
+  ret i32 %shift
+}
+
+; This should be folded to a zextload.
+define i32 @sra_of_zextload(i16* %p) {
+; CHECK-LABEL: sra_of_zextload:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movzbl 1(%rdi), %eax
+; CHECK-NEXT:    retq
+  %load = load i16, i16* %p
+  %zext = zext i16 %load to i32
+  %shift = ashr i32 %zext, 8
+  ret i32 %shift
+}
+
+; FIXME: fold (sra (sextload i16 to i32), 8) -> (sextload i8)
+define i32 @sra_of_sextload(i16* %p) {
+; CHECK-LABEL: sra_of_sextload:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movswl (%rdi), %eax
+; CHECK-NEXT:    sarl $8, %eax
+; CHECK-NEXT:    retq
+  %load = load i16, i16* %p
+  %sext = sext i16 %load to i32
+  %shift = ashr i32 %sext, 8
+  ret i32 %shift
+}
+
+; Negative test. If the shift amount is larger than the memory type then
+; we're not accessing any of the loaded bytes (only the extended bits). So the
+; shift is expected to remain.
+define i32 @sra_of_sextload_no_fold(i16* %p) {
+; CHECK-LABEL: sra_of_sextload_no_fold:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movswl (%rdi), %eax
+; CHECK-NEXT:    sarl $16, %eax
+; CHECK-NEXT:    retq
+  %load = load i16, i16* %p
+  %sext = sext i16 %load to i32
+  %shift = ashr i32 %sext, 16
+  ret i32 %shift
+}
+
+; FIXME: Fold even if SRA has multiple uses.
+define i32 @sra_to_sextload_multiple_sra_uses(i32* %p) {
+; CHECK-LABEL: sra_to_sextload_multiple_sra_uses:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl (%rdi), %ecx
+; CHECK-NEXT:    sarl $16, %ecx
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    xorl $6, %eax
+; CHECK-NEXT:    orl %ecx, %eax
+; CHECK-NEXT:    retq
+  %load = load i32, i32* %p
+  %shift = ashr i32 %load, 16
+  %use1 = xor i32 %shift, 6
+  %use2 = or i32 %shift, %use1
+  ret i32 %use2
+}

From 18aabae8e2b6ccea4575ac9e4fb4d38ec7e4e971 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 20 Jan 2022 14:57:23 -0500
Subject: [PATCH 392/946] AMDGPU: Fix assertion on fixed stack objects with
 VGPR->AGPR spills

These have negative / out of bounds frame index values and would
assert when trying to set the BitVector. Fixed stack objects can't be
colored away so ignore them.
---
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp    |  3 +-
 .../test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll | 39 +++++++++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index a347e91f3fad2..6078f4a0577ab 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1195,7 +1195,8 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
           }
         } else if (TII->isStoreToStackSlot(MI, FrameIndex) ||
                    TII->isLoadFromStackSlot(MI, FrameIndex))
-          NonVGPRSpillFIs.set(FrameIndex);
+          if (!MFI.isFixedObjectIndex(FrameIndex))
+            NonVGPRSpillFIs.set(FrameIndex);
       }
     }
 
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
index 4c5e7ec3e727d..ab3fc800f4a7e 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr.ll
@@ -281,6 +281,45 @@ st:
   ret void
 }
 
+; Make sure there's no crash when we have loads from fixed stack
+; objects and are processing VGPR spills
+
+; GCN-LABEL: {{^}}stack_args_vgpr_spill:
+; GFX908: v_accvgpr_write_b32
+; GFX908: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32
+; GFX908: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s32 offset:4
+define void @stack_args_vgpr_spill(<32 x float> %arg0, <32 x float> %arg1, <32 x float> addrspace(1)* %p) #1 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x()
+  %p1 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p, i32 %tid
+  %p2 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p1, i32 %tid
+  %p3 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p2, i32 %tid
+  %p4 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p3, i32 %tid
+  %p5 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p4, i32 %tid
+  %p6 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p5, i32 %tid
+  %p7 = getelementptr inbounds <32 x float>, <32 x float> addrspace(1)* %p6, i32 %tid
+  %v1 = load volatile <32 x float>, <32 x float> addrspace(1)* %p1
+  %v2 = load volatile <32 x float>, <32 x float> addrspace(1)* %p2
+  %v3 = load volatile <32 x float>, <32 x float> addrspace(1)* %p3
+  %v4 = load volatile <32 x float>, <32 x float> addrspace(1)* %p4
+  %v5 = load volatile <32 x float>, <32 x float> addrspace(1)* %p5
+  %v6 = load volatile <32 x float>, <32 x float> addrspace(1)* %p6
+  %v7 = load volatile <32 x float>, <32 x float> addrspace(1)* %p7
+  br label %st
+
+st:
+  store volatile <32 x float> %arg0, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %arg1, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v1, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v2, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v3, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v4, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v5, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v6, <32 x float> addrspace(1)* undef
+  store volatile <32 x float> %v7, <32 x float> addrspace(1)* undef
+  ret void
+}
+
+
 declare i32 @llvm.amdgcn.workitem.id.x()
 
 attributes #0 = { nounwind "amdgpu-num-vgpr"="10" }

From 49e37000691a17a073003219a584414e5f481e11 Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Mon, 24 Jan 2022 23:16:29 +0900
Subject: [PATCH 393/946] [mlir][tensor] Move BufferizableOpInterface impl to
 tensor dialect

This is in preparation of unifying the existing bufferization with One-Shot bufferization.

A subsequent commit will replace `tensor-bufferize`'s implementation with the BufferizableOpInterface-based implementation and move over missing test cases.

Differential Revision: https://reviews.llvm.org/D117984
---
 .../Bufferization/Transforms/Bufferize.h      |  7 ++--
 .../TensorInterfaceImpl.h                     | 27 --------------
 .../Transforms/BufferizableOpInterfaceImpl.h  | 20 +++++++++++
 .../Bufferization/Transforms/Bufferize.cpp    |  4 +++
 .../ComprehensiveBufferize/CMakeLists.txt     | 11 ------
 .../Dialect/Linalg/Transforms/CMakeLists.txt  |  2 +-
 .../Transforms/ComprehensiveBufferizePass.cpp |  4 +--
 .../BufferizableOpInterfaceImpl.cpp}          | 36 ++++++++-----------
 .../Dialect/Tensor/Transforms/CMakeLists.txt  |  2 ++
 mlir/test/lib/Dialect/Linalg/CMakeLists.txt   |  2 +-
 .../Linalg/TestComprehensiveBufferize.cpp     |  4 +--
 .../llvm-project-overlay/mlir/BUILD.bazel     | 27 ++++----------
 .../mlir/test/BUILD.bazel                     |  2 +-
 13 files changed, 58 insertions(+), 90 deletions(-)
 delete mode 100644 mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h
 create mode 100644 mlir/include/mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h
 rename mlir/lib/Dialect/{Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp => Tensor/Transforms/BufferizableOpInterfaceImpl.cpp} (94%)

diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
index c2254dfa3a168..b587955d65b13 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
@@ -28,6 +28,9 @@
 namespace mlir {
 namespace bufferization {
 
+class BufferizationState;
+struct BufferizationOptions;
+
 /// A helper type converter class that automatically populates the relevant
 /// materializations and type conversions for bufferization.
 class BufferizeTypeConverter : public TypeConverter {
@@ -52,8 +55,6 @@ void populateBufferizeMaterializationLegality(ConversionTarget &target);
 void populateEliminateBufferizeMaterializationsPatterns(
     BufferizeTypeConverter &typeConverter, RewritePatternSet &patterns);
 
-class BufferizationState;
-
 /// Bufferize `op` and its nested ops that implement `BufferizableOpInterface`.
 /// Whether buffer copies are needed or not is queried from `state`.
 ///
@@ -61,7 +62,7 @@ class BufferizationState;
 /// unknown op (that does not implement `BufferizableOpInterface`) is found. No
 /// to_tensor/to_memref ops are inserted in that case.
 ///
-/// Note: Tje layout map chosen to bufferize is the most dynamic canonical
+/// Note: The layout map chosen to bufferize is the most dynamic canonical
 /// strided layout of the proper rank. This ensures compatibility with expected
 /// layouts after transformations. Combinations of memref.cast +
 /// canonicalization are responsible for clean ups.
diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h
deleted file mode 100644
index cc3ba5aac84b5..0000000000000
--- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h
+++ /dev/null
@@ -1,27 +0,0 @@
-//===- LinalgInterfaceImpl.h - Linalg Impl. of BufferizableOpInterface ----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_TENSORINTERFACEIMPL_H
-#define MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_TENSORINTERFACEIMPL_H
-
-namespace mlir {
-
-class DialectRegistry;
-
-namespace linalg {
-namespace comprehensive_bufferize {
-namespace tensor_ext {
-
-void registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry);
-
-} // namespace tensor_ext
-} // namespace comprehensive_bufferize
-} // namespace linalg
-} // namespace mlir
-
-#endif // MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_TENSORINTERFACEIMPL_H
diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h
new file mode 100644
index 0000000000000..298c2259c1fab
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h
@@ -0,0 +1,20 @@
+//===- BufferizableOpInterfaceImpl.h - Impl. of BufferizableOpInterface ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_TENSOR_BUFFERIZABLEOPINTERFACEIMPL_H
+#define MLIR_DIALECT_TENSOR_BUFFERIZABLEOPINTERFACEIMPL_H
+
+namespace mlir {
+class DialectRegistry;
+
+namespace tensor {
+void registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry);
+} // namespace tensor
+} // namespace mlir
+
+#endif // MLIR_DIALECT_TENSOR_BUFFERIZABLEOPINTERFACEIMPL_H
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index 0bacc21d46889..07bd8fdbb0bb4 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -132,6 +132,10 @@ mlir::bufferization::createFinalizingBufferizePass() {
   return std::make_unique<FinalizingBufferizePass>();
 }
 
+//===----------------------------------------------------------------------===//
+// BufferizableOpInterface-based Bufferization
+//===----------------------------------------------------------------------===//
+
 static bool isaTensor(Type t) { return t.isa<TensorType>(); }
 
 /// Return true if the given op has a tensor result or a tensor operand.
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
index f3601f3c3935e..21e22de2eee24 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
@@ -5,7 +5,6 @@ set(LLVM_OPTIONAL_SOURCES
   ModuleBufferization.cpp
   SCFInterfaceImpl.cpp
   StdInterfaceImpl.cpp
-  TensorInterfaceImpl.cpp
   VectorInterfaceImpl.cpp
 )
 
@@ -57,16 +56,6 @@ add_mlir_dialect_library(MLIRStdBufferizableOpInterfaceImpl
   MLIRStandard
 )
 
-add_mlir_dialect_library(MLIRTensorBufferizableOpInterfaceImpl
-  TensorInterfaceImpl.cpp
-
-  LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
-  MLIRIR
-  MLIRMemRef
-  MLIRTensor
-)
-
 add_mlir_dialect_library(MLIRVectorBufferizableOpInterfaceImpl
   VectorInterfaceImpl.cpp
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
index bea51784b14c8..6059d51260975 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@@ -55,7 +55,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
   MLIRStandardOpsTransforms
   MLIRStandardToLLVM
   MLIRTensor
-  MLIRTensorBufferizableOpInterfaceImpl
+  MLIRTensorTransforms
   MLIRTransforms
   MLIRTransformUtils
   MLIRVector
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
index 9409492e12dba..90335f952d559 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -18,9 +18,9 @@
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/StdInterfaceImpl.h"
-#include "mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/VectorInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/Passes.h"
+#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
@@ -59,7 +59,7 @@ struct LinalgComprehensiveModuleBufferize
     scf_ext::registerBufferizableOpInterfaceExternalModels(registry);
     std_ext::registerModuleBufferizationExternalModels(registry);
     std_ext::registerBufferizableOpInterfaceExternalModels(registry);
-    tensor_ext::registerBufferizableOpInterfaceExternalModels(registry);
+    tensor::registerBufferizableOpInterfaceExternalModels(registry);
     vector_ext::registerBufferizableOpInterfaceExternalModels(registry);
   }
 };
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
similarity index 94%
rename from mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp
rename to mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index f6748985dde14..03fa45f04c68f 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -1,4 +1,4 @@
-//===- TensorInterfaceImpl.cpp - Tensor Impl. of BufferizableOpInterface --===//
+//===- BufferizableOpInterfaceImpl.cpp - Impl. of BufferizableOpInterface -===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h"
+#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
@@ -15,14 +15,11 @@
 
 using namespace mlir;
 using namespace mlir::bufferization;
+using namespace mlir::tensor;
 
 namespace mlir {
-namespace linalg {
-namespace comprehensive_bufferize {
-namespace tensor_ext {
-
-using tensor::ExtractSliceOp;
-using tensor::InsertSliceOp;
+namespace tensor {
+namespace {
 
 struct CastOpInterface
     : public BufferizableOpInterface::ExternalModel<CastOpInterface,
@@ -466,19 +463,16 @@ struct InsertSliceOpInterface
   }
 };
 
-} // namespace tensor_ext
-} // namespace comprehensive_bufferize
-} // namespace linalg
+} // namespace
+} // namespace tensor
 } // namespace mlir
 
-void mlir::linalg::comprehensive_bufferize::tensor_ext::
-    registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry) {
-  registry.addOpInterface<tensor::CastOp, tensor_ext::CastOpInterface>();
-  registry.addOpInterface<tensor::DimOp, tensor_ext::DimOpInterface>();
-  registry.addOpInterface<tensor::ExtractSliceOp,
-                          tensor_ext::ExtractSliceOpInterface>();
-  registry.addOpInterface<tensor::ExtractOp, tensor_ext::ExtractOpInterface>();
-  registry.addOpInterface<tensor::InsertOp, tensor_ext::InsertOpInterface>();
-  registry.addOpInterface<tensor::InsertSliceOp,
-                          tensor_ext::InsertSliceOpInterface>();
+void mlir::tensor::registerBufferizableOpInterfaceExternalModels(
+    DialectRegistry &registry) {
+  registry.addOpInterface<CastOp, CastOpInterface>();
+  registry.addOpInterface<DimOp, DimOpInterface>();
+  registry.addOpInterface<ExtractSliceOp, ExtractSliceOpInterface>();
+  registry.addOpInterface<ExtractOp, ExtractOpInterface>();
+  registry.addOpInterface<InsertOp, InsertOpInterface>();
+  registry.addOpInterface<InsertSliceOp, InsertSliceOpInterface>();
 }
diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
index 7b2ccd46162c0..98787344c582b 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_mlir_dialect_library(MLIRTensorTransforms
+  BufferizableOpInterfaceImpl.cpp
   Bufferize.cpp
 
   ADDITIONAL_HEADER_DIRS
@@ -9,6 +10,7 @@ add_mlir_dialect_library(MLIRTensorTransforms
 
   LINK_LIBS PUBLIC
   MLIRArithmetic
+  MLIRBufferizableOpInterface
   MLIRBufferizationTransforms
   MLIRIR
   MLIRMemRef
diff --git a/mlir/test/lib/Dialect/Linalg/CMakeLists.txt b/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
index 10657c8c514f8..c784461c2dc3d 100644
--- a/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
@@ -30,7 +30,7 @@ add_mlir_library(MLIRLinalgTestPasses
   MLIRStdBufferizableOpInterfaceImpl
   MLIRStandard
   MLIRTensor
-  MLIRTensorBufferizableOpInterfaceImpl
+  MLIRTensorTransforms
   MLIRTransformUtils
   MLIRVector
   MLIRVectorBufferizableOpInterfaceImpl
diff --git a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
index 1ba0b891692b1..3e65330addb4d 100644
--- a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
@@ -21,11 +21,11 @@
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/StdInterfaceImpl.h"
-#include "mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/VectorInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
+#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/Passes.h"
@@ -65,7 +65,7 @@ struct TestComprehensiveFunctionBufferize
     linalg_ext::registerBufferizableOpInterfaceExternalModels(registry);
     scf_ext::registerBufferizableOpInterfaceExternalModels(registry);
     std_ext::registerBufferizableOpInterfaceExternalModels(registry);
-    tensor_ext::registerBufferizableOpInterfaceExternalModels(registry);
+    tensor::registerBufferizableOpInterfaceExternalModels(registry);
     vector_ext::registerBufferizableOpInterfaceExternalModels(registry);
   }
 
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index e6b4654097d2c..74d10d9190b5d 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -4415,11 +4415,15 @@ cc_library(
             "lib/Dialect/Tensor/Transforms/*.h",
         ],
     ),
-    hdrs = ["include/mlir/Dialect/Tensor/Transforms/Passes.h"],
+    hdrs = [
+        "include/mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h",
+        "include/mlir/Dialect/Tensor/Transforms/Passes.h",
+    ],
     includes = ["include"],
     deps = [
         ":ArithmeticDialect",
         ":Async",
+        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationTransforms",
         ":IR",
@@ -6684,25 +6688,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "TensorBufferizableOpInterfaceImpl",
-    srcs = [
-        "lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp",
-    ],
-    hdrs = [
-        "include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h",
-    ],
-    includes = ["include"],
-    deps = [
-        ":BufferizableOpInterface",
-        ":IR",
-        ":MemRefDialect",
-        ":Support",
-        ":TensorDialect",
-        "//llvm:Support",
-    ],
-)
-
 cc_library(
     name = "VectorBufferizableOpInterfaceImpl",
     srcs = [
@@ -6946,8 +6931,8 @@ cc_library(
         ":StandardOpsTransforms",
         ":StdBufferizableOpInterfaceImpl",
         ":Support",
-        ":TensorBufferizableOpInterfaceImpl",
         ":TensorDialect",
+        ":TensorTransforms",
         ":TensorUtils",
         ":TransformUtils",
         ":VectorBufferizableOpInterfaceImpl",
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index c494b994a4f33..8c24a43000c78 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -403,8 +403,8 @@ cc_library(
         "//mlir:SCFTransforms",
         "//mlir:StandardOps",
         "//mlir:StdBufferizableOpInterfaceImpl",
-        "//mlir:TensorBufferizableOpInterfaceImpl",
         "//mlir:TensorDialect",
+        "//mlir:TensorTransforms",
         "//mlir:TransformUtils",
         "//mlir:VectorBufferizableOpInterfaceImpl",
         "//mlir:VectorOps",

From 473aa8e10c49aeed7083a53f275176c5831711b3 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Mon, 24 Jan 2022 14:55:06 +0000
Subject: [PATCH 394/946] [llvm][docs] Fix code-block in the testing guide

Without a langauge name it's an error (with some verisons of Sphinx
it seems) or the block is simply missing in the output.
---
 llvm/docs/TestingGuide.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
index 6449661e5d38a..d0f890dd0a097 100644
--- a/llvm/docs/TestingGuide.rst
+++ b/llvm/docs/TestingGuide.rst
@@ -294,7 +294,7 @@ which first check the ``NOTE:`` line exists and matches the script name.
 These are the most common scripts and their purposes/applications in generating
 assertions:
 
-.. code-block::
+.. code-block:: none
 
   update_analyze_test_checks.py
   opt --analyze --costmodel

From 2d9ed1aba236b4e50ce4bbaf955347590d536a55 Mon Sep 17 00:00:00 2001
From: Denys Shabalin <shabalin@google.com>
Date: Mon, 24 Jan 2022 12:44:47 +0100
Subject: [PATCH 395/946] [mlir] Fix broken __repr__ implementation in Linalg
 OpDSL

Reviewed By: gysit

Differential Revision: https://reviews.llvm.org/D118027
---
 mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py b/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py
index ddbebb29fd6f0..f6f3e01443b84 100644
--- a/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py
+++ b/mlir/python/mlir/dialects/linalg/opdsl/lang/comprehension.py
@@ -468,7 +468,7 @@ def visit_tensor_exprs(self, callback):
     self.arg.visit_tensor_exprs(callback)
 
   def __repr__(self):
-    return f"{repr(self.type_fn)}({type_var}, {self.arg})"
+    return f"{repr(self.type_fn)}({self.type_var}, {self.arg})"
 
 
 class const(TensorExpression):

From d193f7be7898f165e98ee5fc38b07588f43fa856 Mon Sep 17 00:00:00 2001
From: Sean Fertile <sd.fertile@gmail.com>
Date: Fri, 21 Jan 2022 11:05:31 -0500
Subject: [PATCH 396/946] [libc++][AIX] Do not assert chmod return value is
 non-zero.

A number of the filesystem tests create a directory that contains a bad
symlink. On AIX recursively setting permissions on said directory will
return a non-zero value because of the bad symlink, however the
following rm -r still completes successfully. Avoid the assertion on
AIX, and rely on the return value of the remove command to detect
problems.

Differential Revision: https://reviews.llvm.org/D112086
---
 .../directory_entry.cons/path.pass.cpp                      | 2 --
 .../directory_entry.mods/refresh.pass.cpp                   | 2 --
 .../directory_entry.mods/replace_filename.pass.cpp          | 2 --
 .../directory_entry.obs/file_size.pass.cpp                  | 2 --
 .../directory_entry.obs/file_type_obs.pass.cpp              | 2 --
 .../directory_entry.obs/hard_link_count.pass.cpp            | 2 --
 .../directory_entry.obs/last_write_time.pass.cpp            | 2 --
 .../directory_entry.obs/status.pass.cpp                     | 2 --
 .../directory_entry.obs/symlink_status.pass.cpp             | 2 --
 .../directory_iterator.members/copy.pass.cpp                | 2 --
 .../directory_iterator.members/copy_assign.pass.cpp         | 2 --
 .../directory_iterator.members/ctor.pass.cpp                | 2 --
 .../directory_iterator.members/increment.pass.cpp           | 2 --
 .../directory_iterator.members/move.pass.cpp                | 2 --
 .../directory_iterator.members/move_assign.pass.cpp         | 2 --
 .../directory_iterator.nonmembers/begin_end.pass.cpp        | 2 --
 .../class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp     | 2 --
 .../rec.dir.itr.members/copy_assign.pass.cpp                | 2 --
 .../class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp     | 2 --
 .../class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp    | 2 --
 .../rec.dir.itr.members/disable_recursion_pending.pass.cpp  | 2 --
 .../rec.dir.itr.members/increment.pass.cpp                  | 2 --
 .../class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp     | 2 --
 .../rec.dir.itr.members/move_assign.pass.cpp                | 2 --
 .../class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp      | 2 --
 .../rec.dir.itr.members/recursion_pending.pass.cpp          | 2 --
 .../rec.dir.itr.nonmembers/begin_end.pass.cpp               | 2 --
 .../fs.op.funcs/fs.op.canonical/canonical.pass.cpp          | 2 --
 .../filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp        | 2 --
 .../fs.op.create_directories/create_directories.pass.cpp    | 2 --
 .../create_directory_with_attributes.pass.cpp               | 2 --
 .../fs.op.funcs/fs.op.current_path/current_path.pass.cpp    | 2 --
 .../fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp        | 2 --
 .../filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp    | 2 --
 .../fs.op.funcs/fs.op.file_size/file_size.pass.cpp          | 2 --
 .../fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp   | 2 --
 .../fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp  | 2 --
 .../fs.op.is_char_file/is_character_file.pass.cpp           | 2 --
 .../fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp    | 2 --
 .../fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp            | 2 --
 .../filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp  | 2 --
 .../fs.op.funcs/fs.op.is_other/is_other.pass.cpp            | 2 --
 .../fs.op.is_regular_file/is_regular_file.pass.cpp          | 2 --
 .../fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp          | 2 --
 .../fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp        | 2 --
 .../fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp    | 2 --
 .../fs.op.funcs/fs.op.relative/relative.pass.cpp            | 2 --
 .../filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp    | 2 --
 .../fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp      | 2 --
 .../filesystems/fs.op.funcs/fs.op.space/space.pass.cpp      | 2 --
 .../filesystems/fs.op.funcs/fs.op.status/status.pass.cpp    | 2 --
 .../fs.op.symlink_status/symlink_status.pass.cpp            | 2 --
 .../fs.op.weakly_canonical/weakly_canonical.pass.cpp        | 2 --
 libcxx/test/support/filesystem_test_helper.h                | 6 ++++++
 54 files changed, 6 insertions(+), 106 deletions(-)

diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp
index 29e9e92fb7de2..19148e7c779de 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.cons/path.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class directory_entry
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp
index ce87f4325fa75..da436fc6b959c 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/refresh.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // The string reported on errors changed, which makes those tests fail when run
 // against already-released libc++'s.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp
index 0cf99f2658b5b..564e0e21ad6cd 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.mods/replace_filename.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class directory_entry
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
index e584fdd79ff17..271a6e826f2b7 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_size.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // The string reported on errors changed, which makes those tests fail when run
 // against already-released libc++'s.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
index f1603abca4e10..541a6d9c9ffb0 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/file_type_obs.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class directory_entry
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
index 3daf5fcb6a1f8..44eac78fe8f46 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/hard_link_count.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // The string reported on errors changed, which makes those tests fail when run
 // against already-released libc++'s.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp
index 3e5f43ff4a347..928248b3c2b87 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/last_write_time.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // The string reported on errors changed, which makes those tests fail when run
 // against already-released libc++'s.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp
index 546170fc5141a..ce4f286e2b2c3 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/status.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class directory_entry
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp
index e27f7e47fed40..364b832bafa75 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_entry/directory_entry.obs/symlink_status.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class directory_entry
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp
index bfe107ee3feb2..47041cb455a9f 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp
index fe1cc43378e38..e6f6d1657c32f 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/copy_assign.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp
index d5eeb21e96b32..4ab9a2dea3360 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/ctor.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp
index 3727c1527140f..09b513974aaa9 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/increment.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp
index 29ed2090aaf62..6ae6eed1ac022 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp
index 6a9aaae28f0b7..d29f7b330862a 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.members/move_assign.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp
index 407af95705c46..f6f0bbe7687a8 100644
--- a/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.directory_iterator/directory_iterator.nonmembers/begin_end.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp
index 1b731a4b39fed..aacf160a22293 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp
index ff3320e73f576..dc689ad3d9f50 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/copy_assign.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp
index f4fde828b3279..5a18c46546897 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/ctor.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp
index bdf23b17beaf9..4983665978d10 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/depth.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp
index 0a6b73af42fa0..e9dc9648ec687 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/disable_recursion_pending.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp
index 4d363b91aa3bf..4b8390e2a125d 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/increment.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp
index 44b939f227572..7dcc47d610020 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp
index f4d5410e1346b..03c85f27f608c 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/move_assign.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp
index 6df6746e323a4..40b09f1e79a99 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/pop.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp
index 5fcf0f1078c7e..82c82c71b181c 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.members/recursion_pending.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp
index fc3c846db9006..104e419fa9e91 100644
--- a/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/class.rec.dir.itr/rec.dir.itr.nonmembers/begin_end.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // class recursive_directory_iterator
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp
index 98a8a91a44f53..707b646be1798 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.canonical/canonical.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // path canonical(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp
index 603d2e78fb9cc..3f9574de0bb01 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.copy/copy.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // void copy(const path& from, const path& to);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp
index 8c57cbc771108..d6b18e2e043be 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directories/create_directories.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // This test requires the dylib support introduced in D92769.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
 
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp
index 424e7e55df1f9..4d5cdf31e5b59 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.create_directory/create_directory_with_attributes.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // This test requires the dylib support introduced in D92769.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
 
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp
index 64f8effeb9fa1..5e90c4452a9db 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.current_path/current_path.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // path current_path();
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp
index e2fdeff1159aa..5fe888609a92e 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.equivalent/equivalent.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // bool equivalent(path const& lhs, path const& rhs);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp
index 8fc355cc3d547..13e8b95d1e540 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.exists/exists.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // bool exists(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp
index cce1499176012..413ba881b59f1 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.file_size/file_size.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // The string reported on errors changed, which makes those tests fail when run
 // against already-released libc++'s.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.15
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp
index 6e1ea695b40d9..38b26710f1a95 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.hard_lk_ct/hard_link_count.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // uintmax_t hard_link_count(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp
index 85e297d2f58a3..d28898472a94b 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_block_file/is_block_file.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // bool is_block_file(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp
index 6efb66e312387..738e06cc1ad55 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_char_file/is_character_file.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // bool is_character_file(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp
index 51b063b40a97a..8b1e41ef7f1b8 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_directory/is_directory.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // bool is_directory(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp
index 930541bd5e357..8478037a03c68 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_empty/is_empty.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // bool is_empty(path const& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp
index 39b059b5552ac..0169fed28f54f 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_fifo/is_fifo.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // bool is_fifo(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp
index 9fc1dfd083509..f84eb7dd32d81 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_other/is_other.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // bool is_other(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp
index a1fb918a36aad..441f15a9c5d11 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_regular_file/is_regular_file.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // bool is_regular_file(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp
index cf6da35300de3..21aa537094344 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_socket/is_socket.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // bool is_socket(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp
index 43552f935c138..d8ec533058c7a 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.is_symlink/is_symlink.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // bool is_symlink(file_status s) noexcept
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp
index c3f8effb4bb32..ae94253903089 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.read_symlink/read_symlink.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // path read_symlink(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp
index 53700e94e00ce..0c056057927d3 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.relative/relative.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // path proximate(const path& p, error_code &ec)
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp
index e5d46f3c992b7..c651bf1785823 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.rename/rename.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // void rename(const path& old_p, const path& new_p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp
index 8405b74801518..504561749759c 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.resize_file/resize_file.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // void resize_file(const path& p, uintmax_t new_size);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp
index 44f60240a497e..c0317966d4fe8 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.space/space.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // space_info space(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp
index f819a0f4aee52..3fa9f58b77b95 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.status/status.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // file_status status(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp
index 3fa0f99538cb5..a1d8ba6e09fc7 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.symlink_status/symlink_status.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // file_status symlink_status(const path& p);
diff --git a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp
index 4ad6ba7e6e277..b0909da011710 100644
--- a/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp
+++ b/libcxx/test/std/input.output/filesystems/fs.op.funcs/fs.op.weakly_canonical/weakly_canonical.pass.cpp
@@ -8,8 +8,6 @@
 
 // UNSUPPORTED: c++03
 
-// XFAIL: LIBCXX-AIX-FIXME
-
 // <filesystem>
 
 // path weakly_canonical(const path& p);
diff --git a/libcxx/test/support/filesystem_test_helper.h b/libcxx/test/support/filesystem_test_helper.h
index dc85a86fa3ed3..746cbe603f521 100644
--- a/libcxx/test/support/filesystem_test_helper.h
+++ b/libcxx/test/support/filesystem_test_helper.h
@@ -148,7 +148,13 @@ struct scoped_test_env
         std::string cmd = "chmod -R 777 " + test_root.string();
 #endif // defined(__MVS__)
         int ret = std::system(cmd.c_str());
+#if !defined(_AIX)
+        // On AIX the chmod command will return non-zero when trying to set
+        // the permissions on a directory that contains a bad symlink. This triggers
+        // the assert, despite being able to delete everything with the following
+        // `rm -r` command.
         assert(ret == 0);
+#endif
 
         cmd = "rm -rf " + test_root.string();
         ret = std::system(cmd.c_str());

From fc08d1c2940609d26a534d7a12e6c6a528891830 Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Tue, 25 Jan 2022 00:09:36 +0900
Subject: [PATCH 397/946] [mlir][tensor][bufferize] Support tensor.rank in
 BufferizableOpInterfaceImpl

This is the only op that is not supported via BufferizableOpInterfaceImpl bufferization. Once this op is supported we can switch `tensor-bufferize` over to the new unified bufferization.

Differential Revision: https://reviews.llvm.org/D117985
---
 .../ModuleBufferization.cpp                   | 20 ++++++++-----
 .../BufferizableOpInterfaceImpl.cpp           | 30 +++++++++++++++++++
 .../comprehensive-module-bufferize.mlir       | 11 +++++++
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
index f4a2a5d692152..0fe79862a69d0 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
@@ -457,16 +457,22 @@ static LogicalResult bufferizeFuncOpBoundary(FuncOp funcOp,
     Value memref = frontBlock.addArgument(memrefType, bbArg.getLoc());
     OpBuilder b(funcOp->getContext());
     b.setInsertionPointToStart(&frontBlock);
-    // Replace all uses of bbArg through a ToMemRefOp by a memref::CastOp.
+    // Replace all uses of bbArg through a ToMemRefOp.
     for (auto &use : llvm::make_early_inc_range(bbArg.getUses())) {
       if (auto toMemrefOp =
               dyn_cast<bufferization::ToMemrefOp>(use.getOwner())) {
-        assert(memref::CastOp::areCastCompatible(
-                   memref.getType(), toMemrefOp.memref().getType()) &&
-               "bufferizeFuncOpBoundary: cast incompatible");
-        auto castOp = b.create<memref::CastOp>(
-            funcOp.getLoc(), toMemrefOp.memref().getType(), memref);
-        toMemrefOp.memref().replaceAllUsesWith(castOp);
+        if (memref.getType() != toMemrefOp.memref().getType()) {
+          // Type has changed, insert a cast.
+          assert(memref::CastOp::areCastCompatible(
+                     memref.getType(), toMemrefOp.memref().getType()) &&
+                 "bufferizeFuncOpBoundary: cast incompatible");
+          auto castOp = b.create<memref::CastOp>(
+              funcOp.getLoc(), toMemrefOp.memref().getType(), memref);
+          toMemrefOp.memref().replaceAllUsesWith(castOp);
+        } else {
+          // Type did not change, replace directly.
+          toMemrefOp.memref().replaceAllUsesWith(memref);
+        }
       }
     }
     // Replace all remaining uses by a to_tensor.
diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index 03fa45f04c68f..ea9d885736f90 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -463,6 +463,35 @@ struct InsertSliceOpInterface
   }
 };
 
+/// Bufferization of tensor.rank. Replace with memref.rank.
+struct RankOpInterface
+    : public BufferizableOpInterface::ExternalModel<RankOpInterface,
+                                                    tensor::RankOp> {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
+                              const BufferizationState &state) const {
+    return true;
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
+                               const BufferizationState &state) const {
+    return false;
+  }
+
+  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
+                               const BufferizationState &state) const {
+    return OpResult();
+  }
+
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          const BufferizationState &state) const {
+    auto rankOp = cast<tensor::RankOp>(op);
+    Value v = *state.getBuffer(rewriter, rankOp->getOpOperand(0) /*source*/);
+    replaceOpWithNewBufferizedOp<memref::RankOp>(rewriter, op, rankOp.getType(),
+                                                 v);
+    return success();
+  }
+};
+
 } // namespace
 } // namespace tensor
 } // namespace mlir
@@ -475,4 +504,5 @@ void mlir::tensor::registerBufferizableOpInterfaceExternalModels(
   registry.addOpInterface<ExtractOp, ExtractOpInterface>();
   registry.addOpInterface<InsertOp, InsertOpInterface>();
   registry.addOpInterface<InsertSliceOp, InsertSliceOpInterface>();
+  registry.addOpInterface<RankOp, RankOpInterface>();
 }
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index a739fc4645ed0..1f301a14c11e1 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -1348,3 +1348,14 @@ func @write_after_select_read_one(
   // CHECK: return %[[f]], %[[select]]
   return %f, %w : f32, tensor<?xf32>
 }
+
+// -----
+
+// CHECK-LABEL: func @tensor_rank(
+//  CHECK-SAME:     %[[arg0:.*]]: memref<*xf32>
+func @tensor_rank(%arg0: tensor<*xf32>) -> index {
+  // CHECK: %[[r:.*]] = memref.rank %[[arg0]]
+  %0 = tensor.rank %arg0 : tensor<*xf32>
+  // CHECK: return %[[r]] : index
+  return %0 : index
+}

From 4d53f88d1a18e288362e1077ae09c98c843593ba Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Mon, 24 Jan 2022 16:32:04 +0100
Subject: [PATCH 398/946] [flang] Add MemoryAllocation pass to the pipeline

Add the MemoryAllocation pass into the pipeline. Add
the possibilty to pass the options directly within the tool (tco).

This patch is part of the upstreaming effort from fir-dev branch.

Reviewed By: jeanPerier

Differential Revision: https://reviews.llvm.org/D117886
---
 .../flang/Optimizer/Transforms/Passes.h       |  2 +
 flang/include/flang/Tools/CLOptions.inc       |  8 ++++
 .../Optimizer/Transforms/MemoryAllocation.cpp | 39 +++++++++++++++++--
 3 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 2e273163ebfc4..4c13572386447 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -35,6 +35,8 @@ std::unique_ptr<mlir::Pass> createExternalNameConversionPass();
 std::unique_ptr<mlir::Pass> createMemDataFlowOptPass();
 std::unique_ptr<mlir::Pass> createPromoteToAffinePass();
 std::unique_ptr<mlir::Pass> createMemoryAllocationPass();
+std::unique_ptr<mlir::Pass>
+createMemoryAllocationPass(bool dynOnHeap, std::size_t maxStackSize);
 
 // declarative passes
 #define GEN_PASS_REGISTRATION
diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc
index 1c85075d5cc17..1be3d59dde490 100644
--- a/flang/include/flang/Tools/CLOptions.inc
+++ b/flang/include/flang/Tools/CLOptions.inc
@@ -87,6 +87,13 @@ inline void addAVC(mlir::PassManager &pm) {
       pm, disableFirAvc, fir::createArrayValueCopyPass);
 }
 
+inline void addMemoryAllocationOpt(mlir::PassManager &pm) {
+  addNestedPassConditionally<mlir::FuncOp>(pm, disableFirMao, [&]() {
+    return fir::createMemoryAllocationPass(
+        dynamicArrayStackToHeapAllocation, arrayStackAllocationThreshold);
+  });
+}
+
 #if !defined(FLANG_EXCLUDE_CODEGEN)
 inline void addCodeGenRewritePass(mlir::PassManager &pm) {
   addPassConditionally(
@@ -121,6 +128,7 @@ inline void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm) {
   fir::addAVC(pm);
   pm.addNestedPass<mlir::FuncOp>(fir::createCharacterConversionPass());
   pm.addPass(mlir::createCanonicalizerPass(config));
+  fir::addMemoryAllocationOpt(pm);
 
   // The default inliner pass adds the canonicalizer pass with the default
   // configuration. Create the inliner pass with tco config.
diff --git a/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp b/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
index 4c0144f757186..83c77d6895841 100644
--- a/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
+++ b/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
@@ -21,7 +21,7 @@
 #define DEBUG_TYPE "flang-memory-allocation-opt"
 
 // Number of elements in an array does not determine where it is allocated.
-static constexpr std::size_t UnlimitedArraySize = ~static_cast<std::size_t>(0);
+static constexpr std::size_t unlimitedArraySize = ~static_cast<std::size_t>(0);
 
 namespace {
 struct MemoryAllocationOptions {
@@ -32,7 +32,7 @@ struct MemoryAllocationOptions {
   // Number of elements in array threshold for moving to heap. In environments
   // with limited stack size, moving large arrays to the heap can avoid running
   // out of stack space.
-  std::size_t maxStackArraySize = UnlimitedArraySize;
+  std::size_t maxStackArraySize = unlimitedArraySize;
 };
 
 class ReturnAnalysis {
@@ -150,13 +150,36 @@ class AllocaOpConversion : public mlir::OpRewritePattern<fir::AllocaOp> {
 class MemoryAllocationOpt
     : public fir::MemoryAllocationOptBase<MemoryAllocationOpt> {
 public:
+  MemoryAllocationOpt() {
+    // Set options with default values. (See Passes.td.) Note that the
+    // command-line options, e.g. dynamicArrayOnHeap,  are not set yet.
+    options = {dynamicArrayOnHeap, maxStackArraySize};
+  }
+
+  MemoryAllocationOpt(bool dynOnHeap, std::size_t maxStackSize) {
+    // Set options with default values. (See Passes.td.)
+    options = {dynOnHeap, maxStackSize};
+  }
+
+  /// Override `options` if command-line options have been set.
+  inline void useCommandLineOptions() {
+    if (dynamicArrayOnHeap)
+      options.dynamicArrayOnHeap = dynamicArrayOnHeap;
+    if (maxStackArraySize != unlimitedArraySize)
+      options.maxStackArraySize = maxStackArraySize;
+  }
+
   void runOnOperation() override {
     auto *context = &getContext();
     auto func = getOperation();
     mlir::OwningRewritePatternList patterns(context);
     mlir::ConversionTarget target(*context);
-    MemoryAllocationOptions options = {dynamicArrayOnHeap.getValue(),
-                                       maxStackArraySize.getValue()};
+
+    useCommandLineOptions();
+    LLVM_DEBUG(llvm::dbgs()
+               << "dynamic arrays on heap: " << options.dynamicArrayOnHeap
+               << "\nmaximum number of elements of array on stack: "
+               << options.maxStackArraySize << '\n');
 
     // If func is a declaration, skip it.
     if (func.empty())
@@ -178,9 +201,17 @@ class MemoryAllocationOpt
       signalPassFailure();
     }
   }
+
+private:
+  MemoryAllocationOptions options;
 };
 } // namespace
 
 std::unique_ptr<mlir::Pass> fir::createMemoryAllocationPass() {
   return std::make_unique<MemoryAllocationOpt>();
 }
+
+std::unique_ptr<mlir::Pass>
+fir::createMemoryAllocationPass(bool dynOnHeap, std::size_t maxStackSize) {
+  return std::make_unique<MemoryAllocationOpt>(dynOnHeap, maxStackSize);
+}

From 3ad35ba4dea5240dd58476f0c85f0fe096d6c7ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krist=C3=B3f=20Umann?= <dkszelethus@gmail.com>
Date: Wed, 10 Nov 2021 15:03:06 +0100
Subject: [PATCH 399/946] [Templight] Don't display empty strings for names of
 unnamed template parameters

Patch originally by oktal3000: https://github.com/mikael-s-persson/templight/pull/40

When a template parameter is unnamed, the name of -templight-dump might return
an empty string. This is fine, they are unnamed after all, but it might be more
user friendly to at least describe what entity is unnamed.

Differential Revision: https://reviews.llvm.org/D115521
---
 clang/lib/Frontend/FrontendActions.cpp        |  95 ++++-
 .../Templight/templight-empty-entries-fix.cpp | 333 ++++++++++++++++++
 2 files changed, 415 insertions(+), 13 deletions(-)
 create mode 100644 clang/test/Templight/templight-empty-entries-fix.cpp

diff --git a/clang/lib/Frontend/FrontendActions.cpp b/clang/lib/Frontend/FrontendActions.cpp
index 5b77c3e01aace..ad2e6039477f8 100644
--- a/clang/lib/Frontend/FrontendActions.cpp
+++ b/clang/lib/Frontend/FrontendActions.cpp
@@ -8,9 +8,10 @@
 
 #include "clang/Frontend/FrontendActions.h"
 #include "clang/AST/ASTConsumer.h"
+#include "clang/AST/Decl.h"
 #include "clang/Basic/FileManager.h"
-#include "clang/Basic/TargetInfo.h"
 #include "clang/Basic/LangStandard.h"
+#include "clang/Basic/TargetInfo.h"
 #include "clang/Frontend/ASTConsumers.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/Frontend/FrontendDiagnostic.h"
@@ -23,6 +24,7 @@
 #include "clang/Sema/TemplateInstCallback.h"
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/ASTWriter.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
@@ -480,25 +482,92 @@ class DefaultTemplateInstCallback : public TemplateInstantiationCallback {
     Out << "---" << YAML << "\n";
   }
 
+  static void printEntryName(const Sema &TheSema, const Decl *Entity,
+                             llvm::raw_string_ostream &OS) {
+    auto *NamedTemplate = cast<NamedDecl>(Entity);
+
+    PrintingPolicy Policy = TheSema.Context.getPrintingPolicy();
+    // FIXME: Also ask for FullyQualifiedNames?
+    Policy.SuppressDefaultTemplateArgs = false;
+    NamedTemplate->getNameForDiagnostic(OS, Policy, true);
+
+    if (!OS.str().empty())
+      return;
+
+    Decl *Ctx = Decl::castFromDeclContext(NamedTemplate->getDeclContext());
+    NamedDecl *NamedCtx = dyn_cast_or_null<NamedDecl>(Ctx);
+
+    if (const auto *Decl = dyn_cast<TagDecl>(NamedTemplate)) {
+      if (const auto *R = dyn_cast<RecordDecl>(Decl)) {
+        if (R->isLambda()) {
+          OS << "lambda at ";
+          Decl->getLocation().print(OS, TheSema.getSourceManager());
+          return;
+        }
+      }
+      OS << "unnamed " << Decl->getKindName();
+      return;
+    }
+
+    if (const auto *Decl = dyn_cast<ParmVarDecl>(NamedTemplate)) {
+      OS << "unnamed function parameter " << Decl->getFunctionScopeIndex()
+         << " ";
+      if (Decl->getFunctionScopeDepth() > 0)
+        OS << "(at depth " << Decl->getFunctionScopeDepth() << ") ";
+      OS << "of ";
+      NamedCtx->getNameForDiagnostic(OS, TheSema.getLangOpts(), true);
+      return;
+    }
+
+    if (const auto *Decl = dyn_cast<TemplateTypeParmDecl>(NamedTemplate)) {
+      if (const Type *Ty = Decl->getTypeForDecl()) {
+        if (const auto *TTPT = dyn_cast_or_null<TemplateTypeParmType>(Ty)) {
+          OS << "unnamed template type parameter " << TTPT->getIndex() << " ";
+          if (TTPT->getDepth() > 0)
+            OS << "(at depth " << TTPT->getDepth() << ") ";
+          OS << "of ";
+          NamedCtx->getNameForDiagnostic(OS, TheSema.getLangOpts(), true);
+          return;
+        }
+      }
+    }
+
+    if (const auto *Decl = dyn_cast<NonTypeTemplateParmDecl>(NamedTemplate)) {
+      OS << "unnamed template non-type parameter " << Decl->getIndex() << " ";
+      if (Decl->getDepth() > 0)
+        OS << "(at depth " << Decl->getDepth() << ") ";
+      OS << "of ";
+      NamedCtx->getNameForDiagnostic(OS, TheSema.getLangOpts(), true);
+      return;
+    }
+
+    if (const auto *Decl = dyn_cast<TemplateTemplateParmDecl>(NamedTemplate)) {
+      OS << "unnamed template template parameter " << Decl->getIndex() << " ";
+      if (Decl->getDepth() > 0)
+        OS << "(at depth " << Decl->getDepth() << ") ";
+      OS << "of ";
+      NamedCtx->getNameForDiagnostic(OS, TheSema.getLangOpts(), true);
+      return;
+    }
+
+    llvm_unreachable("Failed to retrieve a name for this entry!");
+    OS << "unnamed identifier";
+  }
+
   template <bool BeginInstantiation>
   static TemplightEntry getTemplightEntry(const Sema &TheSema,
                                           const CodeSynthesisContext &Inst) {
     TemplightEntry Entry;
     Entry.Kind = toString(Inst.Kind);
     Entry.Event = BeginInstantiation ? "Begin" : "End";
-    if (auto *NamedTemplate = dyn_cast_or_null<NamedDecl>(Inst.Entity)) {
-      llvm::raw_string_ostream OS(Entry.Name);
-      PrintingPolicy Policy = TheSema.Context.getPrintingPolicy();
-      // FIXME: Also ask for FullyQualifiedNames?
-      Policy.SuppressDefaultTemplateArgs = false;
-      NamedTemplate->getNameForDiagnostic(OS, Policy, true);
-      const PresumedLoc DefLoc =
+    llvm::raw_string_ostream OS(Entry.Name);
+    printEntryName(TheSema, Inst.Entity, OS);
+    const PresumedLoc DefLoc =
         TheSema.getSourceManager().getPresumedLoc(Inst.Entity->getLocation());
-      if(!DefLoc.isInvalid())
-        Entry.DefinitionLocation = std::string(DefLoc.getFilename()) + ":" +
-                                   std::to_string(DefLoc.getLine()) + ":" +
-                                   std::to_string(DefLoc.getColumn());
-    }
+    if (!DefLoc.isInvalid())
+      Entry.DefinitionLocation = std::string(DefLoc.getFilename()) + ":" +
+                                 std::to_string(DefLoc.getLine()) + ":" +
+                                 std::to_string(DefLoc.getColumn());
     const PresumedLoc PoiLoc =
         TheSema.getSourceManager().getPresumedLoc(Inst.PointOfInstantiation);
     if (!PoiLoc.isInvalid()) {
diff --git a/clang/test/Templight/templight-empty-entries-fix.cpp b/clang/test/Templight/templight-empty-entries-fix.cpp
new file mode 100644
index 0000000000000..9154e56fa24ab
--- /dev/null
+++ b/clang/test/Templight/templight-empty-entries-fix.cpp
@@ -0,0 +1,333 @@
+// RUN: %clang_cc1 -templight-dump -Wno-unused-value %s 2>&1 | FileCheck %s
+
+void a() {
+  [] {};
+}
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'lambda at .*templight-empty-entries-fix.cpp:4:3'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:4:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:4:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'lambda at .*templight-empty-entries-fix.cpp:4:3'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:4:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:4:3'$}}
+
+template <int = 0> void a() { a(); }
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+a$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:31'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template non-type parameter 0 of a$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:15'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template non-type parameter 0 of a$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:15'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+a$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:31'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'a<0>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:31'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'a<0>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:31'$}}
+
+template <int> struct b { typedef int c; };
+template <bool d = true, class = typename b<d>::c> void a() { a(); }
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+a$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:63'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:16'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:16'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template type parameter 1 of a$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:32'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'b<1>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:59:23'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:43'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'b<1>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:59:23'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:43'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'b<1>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:59:23'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:43'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'b<1>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:59:23'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:43'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'b<1>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:59:23'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:43'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'b<1>'$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:59:23'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:43'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template type parameter 1 of a$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:32'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+a$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:63'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'a<true, int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:63'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'a<true, int>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:60:57'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:63'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+a$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:63'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template non-type parameter 0 of a$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:15'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template non-type parameter 0 of a$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:15'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+a$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:20:25'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:60:63'$}}
+
+template <bool = true> void d(int = 0) { d(); }
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:42'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template non-type parameter 0 of d$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:16'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template non-type parameter 0 of d$}}
+// CHECK: {{^kind:[ ]+DefaultTemplateArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:16'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:42'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'d<true>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:42'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'d<true>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:42'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'unnamed function parameter 0 of d<true>'$}}
+// CHECK: {{^kind:[ ]+DefaultFunctionArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:35'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:42'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'unnamed function parameter 0 of d<true>'$}}
+// CHECK: {{^kind:[ ]+DefaultFunctionArgumentInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:35'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:171:42'$}}
+
+void e() {
+  struct {
+  } f;
+}
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed struct$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:224:5'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed struct$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:224:5'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed struct$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:224:5'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed struct$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:224:5'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed struct$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed struct$}}
+// CHECK: {{^kind:[ ]+Memoization$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:223:3'$}}
+
+
+template <template<typename> class>
+void d();
+
+template <typename T> struct C;
+
+void foo() {
+  d<C>();
+}
+
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:266:6'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template template parameter 0 of d$}}
+// CHECK: {{^kind:[ ]+PriorTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:265:35'$}}
+// CHECK: {{^poi:[ ]+''$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+unnamed template template parameter 0 of d$}}
+// CHECK: {{^kind:[ ]+PriorTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:265:35'$}}
+// CHECK: {{^poi:[ ]+''$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:266:6'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:266:6'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+DeducedTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:266:6'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'d<C>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:266:6'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+'d<C>'$}}
+// CHECK: {{^kind:[ ]+TemplateInstantiation$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:266:6'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+Begin$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}
+// CHECK-LABEL: {{^---$}}
+// CHECK: {{^name:[ ]+d$}}
+// CHECK: {{^kind:[ ]+ExplicitTemplateArgumentSubstitution$}}
+// CHECK: {{^event:[ ]+End$}}
+// CHECK: {{^orig:[ ]+'.*templight-empty-entries-fix.cpp:171:29'$}}
+// CHECK: {{^poi:[ ]+'.*templight-empty-entries-fix.cpp:271:3'$}}

From c30d2893a43d20b587d0abd288472adfa2c8672b Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Tue, 25 Jan 2022 00:34:11 +0900
Subject: [PATCH 400/946] [mlir][bufferize] Change insertion point for
 ToTensorOps

Both insertion points are valid. This is to make BufferizableOpInteface-based bufferization compatible with existing partial bufferization test cases. (So less changes are necessary to unit tests.)

Differential Revision: https://reviews.llvm.org/D117986
---
 .../Dialect/Bufferization/IR/BufferizableOpInterface.cpp    | 2 +-
 .../Dialect/Linalg/comprehensive-function-bufferize.mlir    | 2 +-
 .../Linalg/comprehensive-module-bufferize-alloca.mlir       | 6 +++---
 .../test/Dialect/Linalg/comprehensive-module-bufferize.mlir | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index e565f41a39d5a..9cb99db16d6a7 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -302,7 +302,7 @@ void bufferization::replaceOpWithBufferizedValues(RewriterBase &rewriter,
       // The existing uses of the OpResult still expect a tensor. Insert a
       // ToTensorOp. Throughout bufferization, this ToTensorOp will gradually
       // loose all of its users and eventually DCE away.
-      setInsertionPointAfter(rewriter, replacement);
+      rewriter.setInsertionPointAfter(op);
       replacement = rewriter.create<bufferization::ToTensorOp>(
           replacement.getLoc(), replacement);
     }
diff --git a/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir
index 609a0df7a7cb6..1a3b266ee4b80 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir
@@ -30,9 +30,9 @@ func @return_tensor(%A : tensor<?xf32>, %v : vector<4xf32>) -> (tensor<?xf32>) {
   // CHECK: %[[dim:.*]] = tensor.dim %[[A]]
   // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]])
   // CHECK: %[[casted:.*]] = memref.cast %[[alloc]]
-  // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[casted]]
   // CHECK: memref.copy %[[A_memref]], %[[alloc]]
   // CHECK: vector.transfer_write %{{.*}}, %[[alloc]]
+  // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[casted]]
   %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor<?xf32>
 
   // CHECK: return %[[res_tensor]]
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-alloca.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-alloca.mlir
index 991429cb18cb0..8b4db05f8251e 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-alloca.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-alloca.mlir
@@ -34,16 +34,16 @@ func @main() {
   // CHECK-NEXT:   %[[A:.*]] = memref.alloca() {alignment = 128 : i64} : memref<64xf32>
   // CHECK-NEXT:   %[[B:.*]] = memref.alloca() {alignment = 128 : i64} : memref<64xf32>
   // CHECK-NEXT:   %[[C:.*]] = memref.alloca() {alignment = 128 : i64} : memref<f32>
+  // CHECK-NEXT:   %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
+  // CHECK-NEXT:   %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
+  // CHECK-NEXT:   %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, #[[$DYN_0D_MAP]]>
   %A = linalg.init_tensor [64] : tensor<64xf32>
   %B = linalg.init_tensor [64] : tensor<64xf32>
   %C = linalg.init_tensor [] : tensor<f32>
 
   // CHECK-NEXT:   linalg.fill(%[[C1]], %[[A]]) : f32, memref<64xf32>
-  // CHECK-NEXT:   %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
   // CHECK-NEXT:   linalg.fill(%[[C2]], %[[B]]) : f32, memref<64xf32>
-  // CHECK-NEXT:   %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
   // CHECK-NEXT:   linalg.fill(%[[C0]], %[[C]]) : f32, memref<f32>
-  // CHECK-NEXT:   %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, #[[$DYN_0D_MAP]]>
   %AA = linalg.fill(%v1, %A) : f32, tensor<64xf32> -> tensor<64xf32>
   %BB = linalg.fill(%v2, %B) : f32, tensor<64xf32> -> tensor<64xf32>
   %CC = linalg.fill(%v0, %C) : f32, tensor<f32> -> tensor<f32>
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index 1f301a14c11e1..28ee8bea2e9ec 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -597,16 +597,16 @@ func @main() {
   // CHECK-NEXT:   %[[A:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32>
   // CHECK-NEXT:   %[[B:.*]] = memref.alloc() {alignment = 128 : i64} : memref<64xf32>
   // CHECK-NEXT:   %[[C:.*]] = memref.alloc() {alignment = 128 : i64} : memref<f32>
+  // CHECK-NEXT:   %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
+  // CHECK-NEXT:   %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
+  // CHECK-NEXT:   %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, #[[$DYN_0D_MAP]]>
   %A = linalg.init_tensor [64] : tensor<64xf32>
   %B = linalg.init_tensor [64] : tensor<64xf32>
   %C = linalg.init_tensor [] : tensor<f32>
 
   // CHECK-NEXT:   linalg.fill(%[[C1]], %[[A]]) : f32, memref<64xf32>
-  // CHECK-NEXT:   %[[cA:.*]] = memref.cast %[[A]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
   // CHECK-NEXT:   linalg.fill(%[[C2]], %[[B]]) : f32, memref<64xf32>
-  // CHECK-NEXT:   %[[cB:.*]] = memref.cast %[[B]] : memref<64xf32> to memref<64xf32, #[[$DYN_1D_MAP]]>
   // CHECK-NEXT:   linalg.fill(%[[C0]], %[[C]]) : f32, memref<f32>
-  // CHECK-NEXT:   %[[cC:.*]] = memref.cast %[[C]] : memref<f32> to memref<f32, #[[$DYN_0D_MAP]]>
   %AA = linalg.fill(%v1, %A) : f32, tensor<64xf32> -> tensor<64xf32>
   %BB = linalg.fill(%v2, %B) : f32, tensor<64xf32> -> tensor<64xf32>
   %CC = linalg.fill(%v0, %C) : f32, tensor<f32> -> tensor<f32>

From 217570b03bbe810e6d4183aee72637ae5c326fbc Mon Sep 17 00:00:00 2001
From: Lorenzo Chelini <l.chelini@icloud.com>
Date: Sun, 23 Jan 2022 16:13:46 +0100
Subject: [PATCH 401/946] [MLIR][OpenMP] Suppress -Wreturn-type warnings (NFC)

---
 .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp     | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index d1f261e52bf2e..07fdc2de08a91 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -198,6 +198,7 @@ static llvm::omp::ProcBindKind getProcBindKind(omp::ClauseProcBindKind kind) {
   case omp::ClauseProcBindKind::spread:
     return llvm::omp::ProcBindKind::OMP_PROC_BIND_spread;
   }
+  llvm_unreachable("Unknown ClauseProcBindKind kind");
 }
 
 /// Converts the OpenMP parallel operation to LLVM IR.
@@ -891,7 +892,7 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder,
   return success();
 }
 
-// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
+/// Convert an Atomic Ordering attribute to llvm::AtomicOrdering.
 llvm::AtomicOrdering
 convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) {
   if (!ao)
@@ -909,9 +910,10 @@ convertAtomicOrdering(Optional<omp::ClauseMemoryOrderKind> ao) {
   case omp::ClauseMemoryOrderKind::relaxed:
     return llvm::AtomicOrdering::Monotonic;
   }
+  llvm_unreachable("Unknown ClauseMemoryOrderKind kind");
 }
 
-// Convert omp.atomic.read operation to LLVM IR.
+/// Convert omp.atomic.read operation to LLVM IR.
 static LogicalResult
 convertOmpAtomicRead(Operation &opInst, llvm::IRBuilderBase &builder,
                      LLVM::ModuleTranslation &moduleTranslation) {

From c1335166b2659b02784b9dfb562c6b8b1c746407 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Mon, 24 Jan 2022 16:50:49 +0100
Subject: [PATCH 402/946] Don't run test/ClangScanDeps/modules-symlink.c on
 Windows

'ln -s' isn't Windows friendly.
---
 clang/test/ClangScanDeps/modules-symlink.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/test/ClangScanDeps/modules-symlink.c b/clang/test/ClangScanDeps/modules-symlink.c
index 1a2fe2d9f5123..46831b0a3fc00 100644
--- a/clang/test/ClangScanDeps/modules-symlink.c
+++ b/clang/test/ClangScanDeps/modules-symlink.c
@@ -1,5 +1,6 @@
 // RUN: rm -rf %t
 // RUN: split-file %s %t
+// UNSUPPORTED: system-windows
 
 //--- cdb_pch.json
 [

From cd2a9ff39788578f419d41f32d046150462696e2 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Mon, 24 Jan 2022 07:54:59 -0800
Subject: [PATCH 403/946] [RISCV] Select int_riscv_vsll with shift of 1 to
 vadd.vv.

Add might be faster than shift. We can't do this earlier without
using a Freeze instruction.

This is the intrinsic version of D106689.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D118013
---
 .../Target/RISCV/RISCVInstrInfoVPseudos.td    | 24 ++++++++++++++
 llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll      | 32 +++++++++++++++++++
 llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll      | 32 +++++++++++++++++++
 3 files changed, 88 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 798f848a50b7a..3d4864003b518 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -4543,6 +4543,30 @@ defm : VPatBinaryV_VV_VX_VI<"int_riscv_vsrl", "PseudoVSRL", AllIntegerVectors,
 defm : VPatBinaryV_VV_VX_VI<"int_riscv_vsra", "PseudoVSRA", AllIntegerVectors,
                             uimm5>;
 
+foreach vti = AllIntegerVectors in {
+  // Emit shift by 1 as an add since it might be faster.
+  def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector vti.RegClass:$rs1),
+                                        (XLenVT 1), VLOpFrag)),
+            (!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX) vti.RegClass:$rs1,
+                                                              vti.RegClass:$rs1,
+                                                              GPR:$vl,
+                                                              vti.Log2SEW)>;
+  def : Pat<(vti.Vector (int_riscv_vsll_mask (vti.Vector vti.RegClass:$merge),
+                                             (vti.Vector vti.RegClass:$rs1),
+                                             (XLenVT 1),
+                                             (vti.Mask V0),
+                                             VLOpFrag,
+                                             (XLenVT timm:$policy))),
+            (!cast<Instruction>("PseudoVADD_VV_"#vti.LMul.MX#"_MASK")
+                                                        vti.RegClass:$merge,
+                                                        vti.RegClass:$rs1,
+                                                        vti.RegClass:$rs1,
+                                                        (vti.Mask V0),
+                                                        GPR:$vl,
+                                                        vti.Log2SEW,
+                                                        (XLenVT timm:$policy))>;
+}
+
 //===----------------------------------------------------------------------===//
 // 12.7. Vector Narrowing Integer Right Shift Instructions
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll
index 3f555dba39c5f..cd141520ce540 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsll-rv32.ll
@@ -2000,6 +2000,21 @@ entry:
   ret <vscale x 1 x i8> %a
 }
 
+define <vscale x 1 x i8> @intrinsic_vsll_1_nxv1i8_nxv1i8_i8(<vscale x 1 x i8> %0, i32 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsll_1_nxv1i8_nxv1i8_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vsll.nxv1i8(
+    <vscale x 1 x i8> %0,
+    i32 1,
+    i32 %1)
+
+  ret <vscale x 1 x i8> %a
+}
+
 define <vscale x 1 x i8> @intrinsic_vsll_mask_vi_nxv1i8_nxv1i8_i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vsll_mask_vi_nxv1i8_nxv1i8_i8:
 ; CHECK:       # %bb.0: # %entry
@@ -2017,6 +2032,23 @@ entry:
   ret <vscale x 1 x i8> %a
 }
 
+define <vscale x 1 x i8> @intrinsic_vsll_mask_1_nxv1i8_nxv1i8_i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i32 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsll_mask_1_nxv1i8_nxv1i8_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vadd.vv v8, v9, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vsll.mask.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    i32 1,
+    <vscale x 1 x i1> %2,
+    i32 %3, i32 1)
+
+  ret <vscale x 1 x i8> %a
+}
+
 define <vscale x 2 x i8> @intrinsic_vsll_vi_nxv2i8_nxv2i8_i8(<vscale x 2 x i8> %0, i32 %1) nounwind {
 ; CHECK-LABEL: intrinsic_vsll_vi_nxv2i8_nxv2i8_i8:
 ; CHECK:       # %bb.0: # %entry
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll
index 8aa798d3b9e67..305f8712b0f43 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsll-rv64.ll
@@ -2000,6 +2000,21 @@ entry:
   ret <vscale x 1 x i8> %a
 }
 
+define <vscale x 1 x i8> @intrinsic_vsll_1_nxv1i8_nxv1i8_i8(<vscale x 1 x i8> %0, i64 %1) nounwind {
+; CHECK-LABEL: intrinsic_vsll_1_nxv1i8_nxv1i8_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vadd.vv v8, v8, v8
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vsll.nxv1i8(
+    <vscale x 1 x i8> %0,
+    i64 1,
+    i64 %1)
+
+  ret <vscale x 1 x i8> %a
+}
+
 define <vscale x 1 x i8> @intrinsic_vsll_mask_vi_nxv1i8_nxv1i8_i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
 ; CHECK-LABEL: intrinsic_vsll_mask_vi_nxv1i8_nxv1i8_i8:
 ; CHECK:       # %bb.0: # %entry
@@ -2017,6 +2032,23 @@ entry:
   ret <vscale x 1 x i8> %a
 }
 
+define <vscale x 1 x i8> @intrinsic_vsll_mask_1_nxv1i8_nxv1i8_i8(<vscale x 1 x i8> %0, <vscale x 1 x i8> %1, <vscale x 1 x i1> %2, i64 %3) nounwind {
+; CHECK-LABEL: intrinsic_vsll_mask_1_nxv1i8_nxv1i8_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
+; CHECK-NEXT:    vadd.vv v8, v9, v9, v0.t
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vsll.mask.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8> %1,
+    i64 1,
+    <vscale x 1 x i1> %2,
+    i64 %3, i64 1)
+
+  ret <vscale x 1 x i8> %a
+}
+
 define <vscale x 2 x i8> @intrinsic_vsll_vi_nxv2i8_nxv2i8_i8(<vscale x 2 x i8> %0, i64 %1) nounwind {
 ; CHECK-LABEL: intrinsic_vsll_vi_nxv2i8_nxv2i8_i8:
 ; CHECK:       # %bb.0: # %entry

From b8c7cdcc81a04613d01b1f468d510959f1e66416 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Mon, 24 Jan 2022 08:17:45 -0800
Subject: [PATCH 404/946] [SelectionDAG][RISCV] Teach getNode to fold
 bswap(bswap(x))->x.

This can show up during when bitreverse is expanded to bswap and
swap of bits within a byte. If the input is already a bswap, we
should cancel them out before we further transform them in a way
that makes it harder to see the redundancy.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D118007
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |   3 +
 .../RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll | 256 ++++--------------
 2 files changed, 53 insertions(+), 206 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 447bb326e6511..199dee9b0105f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5119,6 +5119,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
            "BSWAP types must be a multiple of 16 bits!");
     if (OpOpcode == ISD::UNDEF)
       return getUNDEF(VT);
+    // bswap(bswap(X)) -> X.
+    if (OpOpcode == ISD::BSWAP)
+      return Operand.getOperand(0);
     break;
   case ISD::BITREVERSE:
     assert(VT.isInteger() && VT == Operand.getValueType() &&
diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
index 435ea9c0d80df..3913427d5ccd0 100644
--- a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
@@ -694,13 +694,6 @@ define i64 @test_bitreverse_i64(i64 %a) nounwind {
 define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
 ; RV32I-LABEL: test_bswap_bitreverse_i16:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a1, a0, 8
-; RV32I-NEXT:    slli a2, a0, 16
-; RV32I-NEXT:    srli a2, a2, 24
-; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    slli a1, a1, 8
-; RV32I-NEXT:    andi a0, a0, 255
-; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    lui a2, 1
 ; RV32I-NEXT:    addi a2, a2, -241
@@ -726,13 +719,6 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
 ;
 ; RV64I-LABEL: test_bswap_bitreverse_i16:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a1, a0, 8
-; RV64I-NEXT:    slli a2, a0, 48
-; RV64I-NEXT:    srli a2, a2, 56
-; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    slli a1, a1, 8
-; RV64I-NEXT:    andi a0, a0, 255
-; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    lui a2, 1
 ; RV64I-NEXT:    addiw a2, a2, -241
@@ -758,16 +744,13 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
 ;
 ; RV32ZBB-LABEL: test_bswap_bitreverse_i16:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    rev8 a0, a0
-; RV32ZBB-NEXT:    srli a0, a0, 16
-; RV32ZBB-NEXT:    rev8 a0, a0
-; RV32ZBB-NEXT:    srli a1, a0, 12
-; RV32ZBB-NEXT:    lui a2, 15
-; RV32ZBB-NEXT:    addi a2, a2, 240
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    lui a2, 1
+; RV32ZBB-NEXT:    addi a2, a2, -241
 ; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    srli a0, a0, 20
-; RV32ZBB-NEXT:    andi a0, a0, -241
-; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
 ; RV32ZBB-NEXT:    srli a1, a0, 2
 ; RV32ZBB-NEXT:    lui a2, 3
 ; RV32ZBB-NEXT:    addi a2, a2, 819
@@ -786,16 +769,13 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
 ;
 ; RV64ZBB-LABEL: test_bswap_bitreverse_i16:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a0, a0, 48
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a1, a0, 44
-; RV64ZBB-NEXT:    lui a2, 15
-; RV64ZBB-NEXT:    addiw a2, a2, 240
+; RV64ZBB-NEXT:    srli a1, a0, 4
+; RV64ZBB-NEXT:    lui a2, 1
+; RV64ZBB-NEXT:    addiw a2, a2, -241
 ; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    srli a0, a0, 52
-; RV64ZBB-NEXT:    andi a0, a0, -241
-; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a1, a0
 ; RV64ZBB-NEXT:    srli a1, a0, 2
 ; RV64ZBB-NEXT:    lui a2, 3
 ; RV64ZBB-NEXT:    addiw a2, a2, 819
@@ -819,27 +799,6 @@ define i16 @test_bswap_bitreverse_i16(i16 %a) nounwind {
 define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
 ; RV32I-LABEL: test_bswap_bitreverse_i32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    lui a2, 16
-; RV32I-NEXT:    addi a2, a2, -256
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    srli a3, a0, 24
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    slli a3, a0, 8
-; RV32I-NEXT:    lui a4, 4080
-; RV32I-NEXT:    and a3, a3, a4
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    srli a2, a0, 24
-; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    slli a2, a0, 8
-; RV32I-NEXT:    and a2, a2, a4
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    lui a2, 61681
 ; RV32I-NEXT:    addi a2, a2, -241
@@ -865,27 +824,6 @@ define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
 ;
 ; RV64I-LABEL: test_bswap_bitreverse_i32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srliw a1, a0, 8
-; RV64I-NEXT:    lui a2, 16
-; RV64I-NEXT:    addiw a2, a2, -256
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srliw a3, a0, 24
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    slli a3, a0, 8
-; RV64I-NEXT:    lui a4, 4080
-; RV64I-NEXT:    and a3, a3, a4
-; RV64I-NEXT:    slliw a0, a0, 24
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srliw a1, a0, 8
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srliw a2, a0, 24
-; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    slli a2, a0, 8
-; RV64I-NEXT:    and a2, a2, a4
-; RV64I-NEXT:    slliw a0, a0, 24
-; RV64I-NEXT:    or a0, a0, a2
-; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    lui a2, 61681
 ; RV64I-NEXT:    addiw a2, a2, -241
@@ -936,18 +874,12 @@ define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
 ;
 ; RV64ZBB-LABEL: test_bswap_bitreverse_i32:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a0, a0, 32
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a1, a0, 36
+; RV64ZBB-NEXT:    srli a1, a0, 4
 ; RV64ZBB-NEXT:    lui a2, 61681
 ; RV64ZBB-NEXT:    addiw a2, a2, -241
 ; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    srli a0, a0, 28
-; RV64ZBB-NEXT:    lui a2, 986895
-; RV64ZBB-NEXT:    addiw a2, a2, 240
 ; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    slliw a0, a0, 4
 ; RV64ZBB-NEXT:    or a0, a1, a0
 ; RV64ZBB-NEXT:    srli a1, a0, 2
 ; RV64ZBB-NEXT:    lui a2, 209715
@@ -972,155 +904,67 @@ define i32 @test_bswap_bitreverse_i32(i32 %a) nounwind {
 define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind {
 ; RV32I-LABEL: test_bswap_bitreverse_i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a2, a1, 8
-; RV32I-NEXT:    lui a3, 16
-; RV32I-NEXT:    addi a3, a3, -256
-; RV32I-NEXT:    and a2, a2, a3
-; RV32I-NEXT:    srli a4, a1, 24
-; RV32I-NEXT:    or a2, a2, a4
-; RV32I-NEXT:    slli a4, a1, 8
-; RV32I-NEXT:    lui a5, 4080
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    slli a1, a1, 24
-; RV32I-NEXT:    or a1, a1, a4
-; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    srli a2, a0, 8
-; RV32I-NEXT:    and a2, a2, a3
-; RV32I-NEXT:    srli a4, a0, 24
-; RV32I-NEXT:    or a2, a2, a4
-; RV32I-NEXT:    slli a4, a0, 8
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a4
-; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    srli a2, a0, 8
-; RV32I-NEXT:    and a2, a2, a3
-; RV32I-NEXT:    srli a4, a0, 24
-; RV32I-NEXT:    or a2, a2, a4
-; RV32I-NEXT:    slli a4, a0, 8
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a4
-; RV32I-NEXT:    or a0, a0, a2
 ; RV32I-NEXT:    srli a2, a0, 4
-; RV32I-NEXT:    lui a4, 61681
-; RV32I-NEXT:    addi a4, a4, -241
-; RV32I-NEXT:    and a2, a2, a4
-; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    lui a3, 61681
+; RV32I-NEXT:    addi a3, a3, -241
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    and a0, a0, a3
 ; RV32I-NEXT:    slli a0, a0, 4
 ; RV32I-NEXT:    or a0, a2, a0
 ; RV32I-NEXT:    srli a2, a0, 2
-; RV32I-NEXT:    lui a6, 209715
-; RV32I-NEXT:    addi a6, a6, 819
-; RV32I-NEXT:    and a2, a2, a6
-; RV32I-NEXT:    and a0, a0, a6
+; RV32I-NEXT:    lui a4, 209715
+; RV32I-NEXT:    addi a4, a4, 819
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a0, a0, a4
 ; RV32I-NEXT:    slli a0, a0, 2
 ; RV32I-NEXT:    or a0, a2, a0
 ; RV32I-NEXT:    srli a2, a0, 1
-; RV32I-NEXT:    lui a7, 349525
-; RV32I-NEXT:    addi a7, a7, 1365
-; RV32I-NEXT:    and a2, a2, a7
-; RV32I-NEXT:    and a0, a0, a7
+; RV32I-NEXT:    lui a5, 349525
+; RV32I-NEXT:    addi a5, a5, 1365
+; RV32I-NEXT:    and a2, a2, a5
+; RV32I-NEXT:    and a0, a0, a5
 ; RV32I-NEXT:    slli a0, a0, 1
 ; RV32I-NEXT:    or a0, a2, a0
-; RV32I-NEXT:    srli a2, a1, 8
-; RV32I-NEXT:    and a2, a2, a3
-; RV32I-NEXT:    srli a3, a1, 24
-; RV32I-NEXT:    or a2, a2, a3
-; RV32I-NEXT:    slli a3, a1, 8
-; RV32I-NEXT:    and a3, a3, a5
-; RV32I-NEXT:    slli a1, a1, 24
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    or a1, a1, a2
 ; RV32I-NEXT:    srli a2, a1, 4
-; RV32I-NEXT:    and a2, a2, a4
-; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    and a1, a1, a3
 ; RV32I-NEXT:    slli a1, a1, 4
 ; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    srli a2, a1, 2
-; RV32I-NEXT:    and a2, a2, a6
-; RV32I-NEXT:    and a1, a1, a6
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a1, a1, a4
 ; RV32I-NEXT:    slli a1, a1, 2
 ; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    srli a2, a1, 1
-; RV32I-NEXT:    and a2, a2, a7
-; RV32I-NEXT:    and a1, a1, a7
+; RV32I-NEXT:    and a2, a2, a5
+; RV32I-NEXT:    and a1, a1, a5
 ; RV32I-NEXT:    slli a1, a1, 1
 ; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_bswap_bitreverse_i64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srli a1, a0, 24
-; RV64I-NEXT:    lui a2, 4080
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    li a4, 255
-; RV64I-NEXT:    slli a5, a4, 24
-; RV64I-NEXT:    and a3, a3, a5
-; RV64I-NEXT:    or a1, a3, a1
-; RV64I-NEXT:    srli a3, a0, 40
-; RV64I-NEXT:    lui a6, 16
-; RV64I-NEXT:    addiw a6, a6, -256
-; RV64I-NEXT:    and a3, a3, a6
-; RV64I-NEXT:    srli a7, a0, 56
-; RV64I-NEXT:    or a3, a3, a7
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    slli a3, a0, 24
-; RV64I-NEXT:    slli a7, a4, 40
-; RV64I-NEXT:    and a3, a3, a7
-; RV64I-NEXT:    srliw t0, a0, 24
-; RV64I-NEXT:    slli t0, t0, 32
-; RV64I-NEXT:    or a3, a3, t0
-; RV64I-NEXT:    slli t0, a0, 40
-; RV64I-NEXT:    slli a4, a4, 48
-; RV64I-NEXT:    and t0, t0, a4
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    or a0, a0, t0
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srli a1, a0, 40
-; RV64I-NEXT:    and a1, a1, a6
-; RV64I-NEXT:    srli a3, a0, 56
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    srli a3, a0, 24
-; RV64I-NEXT:    and a2, a3, a2
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    and a3, a3, a5
-; RV64I-NEXT:    or a2, a3, a2
-; RV64I-NEXT:    or a1, a2, a1
-; RV64I-NEXT:    slli a2, a0, 24
-; RV64I-NEXT:    and a2, a2, a7
-; RV64I-NEXT:    srliw a3, a0, 24
-; RV64I-NEXT:    slli a3, a3, 32
-; RV64I-NEXT:    or a2, a2, a3
-; RV64I-NEXT:    slli a3, a0, 40
-; RV64I-NEXT:    and a3, a3, a4
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    lui a3, %hi(.LCPI9_0)
-; RV64I-NEXT:    ld a3, %lo(.LCPI9_0)(a3)
-; RV64I-NEXT:    or a0, a0, a2
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srli a1, a0, 4
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
-; RV64I-NEXT:    lui a2, %hi(.LCPI9_1)
-; RV64I-NEXT:    ld a2, %lo(.LCPI9_1)(a2)
+; RV64I-NEXT:    lui a1, %hi(.LCPI9_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI9_0)(a1)
+; RV64I-NEXT:    srli a2, a0, 4
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI9_1)
+; RV64I-NEXT:    ld a1, %lo(.LCPI9_1)(a1)
 ; RV64I-NEXT:    slli a0, a0, 4
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 2
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    and a0, a0, a2
-; RV64I-NEXT:    lui a2, %hi(.LCPI9_2)
-; RV64I-NEXT:    ld a2, %lo(.LCPI9_2)(a2)
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    srli a2, a0, 2
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI9_2)
+; RV64I-NEXT:    ld a1, %lo(.LCPI9_2)(a1)
 ; RV64I-NEXT:    slli a0, a0, 2
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 1
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    srli a2, a0, 1
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    slli a0, a0, 1
-; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: test_bswap_bitreverse_i64:

From b00ee46b5e4bf5f0b5700373ca6302c3c50b10b9 Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Tue, 25 Jan 2022 00:51:41 +0900
Subject: [PATCH 405/946] [mlir][bufferize][NFC] Implement
 BufferizableOpInterface on bufferization ops directly

No longer go through an external model. Also put BufferizableOpInterface into the same build target as the BufferizationDialect. This allows for some code reuse between BufferizationOps canonicalizers and BufferizableOpInterface implementations.

Differential Revision: https://reviews.llvm.org/D117987
---
 .../Dialect/Bufferization/IR/Bufferization.h  |   1 +
 .../IR/BufferizationInterfaceImpl.h           |  25 ----
 .../Bufferization/IR/BufferizationOps.td      |  95 ++++++++++--
 .../IR/BufferizationInterfaceImpl.cpp         | 127 ----------------
 .../Bufferization/IR/BufferizationOps.cpp     | 139 +++++++++++-------
 .../Dialect/Bufferization/IR/CMakeLists.txt   |  16 +-
 .../Bufferization/Transforms/CMakeLists.txt   |   1 -
 .../ComprehensiveBufferize/CMakeLists.txt     |  14 +-
 mlir/lib/Dialect/Linalg/IR/CMakeLists.txt     |   2 +-
 .../Dialect/Linalg/Transforms/CMakeLists.txt  |   2 +-
 .../Transforms/ComprehensiveBufferizePass.cpp |   2 -
 .../SparseTensor/Transforms/CMakeLists.txt    |   2 +-
 .../Dialect/Tensor/Transforms/CMakeLists.txt  |   2 +-
 mlir/test/lib/Dialect/Linalg/CMakeLists.txt   |   2 +-
 .../Linalg/TestComprehensiveBufferize.cpp     |   2 -
 .../llvm-project-overlay/mlir/BUILD.bazel     |  51 ++-----
 .../mlir/test/BUILD.bazel                     |   1 -
 17 files changed, 198 insertions(+), 286 deletions(-)
 delete mode 100644 mlir/include/mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h
 delete mode 100644 mlir/lib/Dialect/Bufferization/IR/BufferizationInterfaceImpl.cpp

diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
index 21aeb91ff2290..2cbfc901f239b 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
@@ -10,6 +10,7 @@
 #define MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZATION_H_
 
 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h
deleted file mode 100644
index 7b903b59f1769..0000000000000
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h
+++ /dev/null
@@ -1,25 +0,0 @@
-//===- BufferizationInterfaceImpl.h - Bufferization Impl. of Op Interface -===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZATIONINTERFACEIMPL_H_
-#define MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZATIONINTERFACEIMPL_H_
-
-namespace mlir {
-
-class DialectRegistry;
-
-namespace bufferization {
-namespace bufferization_ext {
-
-void registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry);
-
-} // namespace bufferization_ext
-} // namespace bufferization
-} // namespace mlir
-
-#endif // MLIR_DIALECT_BUFFERIZATION_IR_BUFFERIZATIONINTERFACEIMPL_H_
diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
index 9b977a7d250f5..1a76f8b3eea00 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizationOps.td
@@ -10,6 +10,7 @@
 #define BUFFERIZATION_OPS
 
 include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.td"
+include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.td"
 include "mlir/Dialect/Bufferization/IR/BufferizationBase.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/CopyOpInterface.td"
@@ -64,11 +65,14 @@ def Bufferization_CloneOp : Bufferization_Op<"clone", [
 // ToTensorOp
 //===----------------------------------------------------------------------===//
 
-def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor",
-    [SameOperandsAndResultShape, SameOperandsAndResultElementType,
-     TypesMatchWith<"result type matches tensor equivalent of 'memref'",
-                    "memref", "result",
-                    "memref::getTensorTypeFromMemRefType($_self)">]> {
+def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor", [
+    BufferizableOpInterface,
+    SameOperandsAndResultShape,
+    SameOperandsAndResultElementType,
+    TypesMatchWith<"result type matches tensor equivalent of 'memref'",
+                   "memref", "result",
+                   "memref::getTensorTypeFromMemRefType($_self)">
+  ]> {
   let summary = "memref to tensor operation";
   let description = [{
     Create a tensor from a memref, making an independent copy of the element
@@ -110,6 +114,35 @@ def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor",
         return resultType.cast<TensorType>();
       return {};
     }
+
+    //===------------------------------------------------------------------===//
+    // BufferizableOpInterface implementation
+    //===------------------------------------------------------------------===//
+
+    // ToTensorOp conceptually loads a tensor from a memory location. The
+    // One-Shot analysis has no information about the memref that is loaded from
+    // by ToTensorOp. We have to assume that the loaded tensor may after
+    // bufferization potentially alias with any other bufferized tensor. Since
+    // ToTensorOp and ToMemrefOp have no aliasing OpOperand/OpResult pairs, this
+    // cannot be encoded directly in the analysis. However, declaring ToTensorOp
+    // results as not writable enforces a buffer copy and has the same effect.
+
+    LogicalResult bufferize(RewriterBase &rewriter,
+                            const BufferizationState &state) const {
+      // to_tensor cannot be bufferized. However, other ops that are using
+      // to_tensor's result will eventually be bufferized. At that point, they
+      // will start using to_tensor's memref operand. Once all users of
+      // to_tensor are bufferized, the op will not have any users anymore and
+      // DCE away. In case of partial bufferization, to_memref(to_tensor(x))
+      // constructs may be left over. These are folded by the canonicalizer or
+      // FinalizingBufferize.
+      return failure();
+    }
+
+    bool isWritable(Value value, const BufferizationState &state) const {
+      // It is unknown whether the memref operand is writable or not.
+      return false;
+    }
   }];
 
   let assemblyFormat = "$memref attr-dict `:` type($memref)";
@@ -123,11 +156,15 @@ def Bufferization_ToTensorOp : Bufferization_Op<"to_tensor",
 // ToMemrefOp
 //===----------------------------------------------------------------------===//
 
-def Bufferization_ToMemrefOp : Bufferization_Op<"to_memref",
-    [SameOperandsAndResultShape, SameOperandsAndResultElementType, NoSideEffect,
-     TypesMatchWith<"type of 'tensor' is the tensor equivalent of 'memref'",
-                    "memref", "tensor",
-                    "memref::getTensorTypeFromMemRefType($_self)">]> {
+def Bufferization_ToMemrefOp : Bufferization_Op<"to_memref", [
+    BufferizableOpInterface,
+    SameOperandsAndResultShape,
+    SameOperandsAndResultElementType,
+    NoSideEffect,
+    TypesMatchWith<"type of 'tensor' is the tensor equivalent of 'memref'",
+                   "memref", "tensor",
+                   "memref::getTensorTypeFromMemRefType($_self)">
+  ]> {
   let summary = "tensor to memref cast operation";
   let description = [{
     Casts a tensor to a memref.
@@ -150,6 +187,44 @@ def Bufferization_ToMemrefOp : Bufferization_Op<"to_memref",
   // This op is fully verified by traits.
   let verifier = ?;
 
+  let extraClassDeclaration = [{
+    //===------------------------------------------------------------------===//
+    // BufferizableOpInterface implementation
+    //===------------------------------------------------------------------===//
+
+    // Note: ToMemrefOp / ToTensorOp are temporary ops that are inserted at the
+    // bufferization boundary. When One-Shot bufferization is complete, there
+    // should be no such ops left over. If `allowUnknownOps` (or after running a
+    // partial bufferization pass), such ops may be part of the resulting IR,
+    // but such IR may no longer be analyzable by One-Shot analysis.
+
+    bool bufferizesToMemoryRead(OpOperand &opOperand,
+                                const BufferizationState &state) const {
+      // It is unknown whether the resulting memref will be read or not.
+      return true;
+    }
+
+    bool bufferizesToMemoryWrite(OpOperand &opOperand,
+                                 const BufferizationState &state) const {
+      // It is unknown whether the resulting MemRef will be written or not.
+      return true;
+    }
+
+    bool mustBufferizeInPlace(OpOperand &opOperand,
+                              const BufferizationState &state) const {
+      // ToMemrefOps always bufferize inplace.
+      return true;
+    }
+
+    OpResult getAliasingOpResult(OpOperand &opOperand,
+                                 const BufferizationState &state) const {
+      return OpResult();
+    }
+
+    LogicalResult bufferize(RewriterBase &rewriter,
+                            const BufferizationState &state);
+  }];
+
   let assemblyFormat = "$tensor attr-dict `:` type($memref)";
 
   let hasFolder = 1;
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationInterfaceImpl.cpp
deleted file mode 100644
index 835a153eb8548..0000000000000
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationInterfaceImpl.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-//===- BufferizationInterfaceImpl.cpp - Bufferization Impl. of Interface --===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h"
-#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
-#include "mlir/Dialect/Bufferization/IR/Bufferization.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/IR/Dialect.h"
-#include "mlir/IR/Operation.h"
-
-using namespace mlir;
-using namespace mlir::bufferization;
-
-namespace mlir {
-namespace bufferization {
-namespace bufferization_ext {
-
-// TODO: These ops should implement BufferizableOpInterface.
-
-/// Bufferization of bufferization.to_memref. to_memref(to_tensor(x)) is folded
-/// to x. Other to_memref ops are ignored during bufferization.
-///
-/// ToMemrefOp casts a tensor into a memref. The resulting memref is the memory
-/// location of the incoming tensor once it will be bufferized. In the anlysis,
-/// the incoming tensor is assumed to bufferize to a memory read and to an
-/// inplace memory write, since it is unknown what will happen to the resulting
-/// memref.
-///
-/// Note: ToMemrefOp / ToTensorOp are temporary ops that are inserted at the
-/// bufferization boundary. When bufferization is complete, there should be no
-/// such ops left over. If `allowUnknownOps`, such ops may be part of the
-/// resulting IR, but such IR may no longer be bufferizable by Comprehensive
-/// Bufferize.
-struct ToMemrefOpInterface
-    : public BufferizableOpInterface::ExternalModel<ToMemrefOpInterface,
-                                                    bufferization::ToMemrefOp> {
-  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
-                              const BufferizationState &state) const {
-    // It is unknown whether the resulting memref will be read or not.
-    return true;
-  }
-
-  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
-                               const BufferizationState &state) const {
-    // It is unknown whether the resulting MemRef will be written or not.
-    return true;
-  }
-
-  bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand,
-                            const BufferizationState &state) const {
-    // ToMemrefOps always bufferize inplace.
-    return true;
-  }
-
-  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
-                               const BufferizationState &state) const {
-    return OpResult();
-  }
-
-  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
-                          const BufferizationState &state) const {
-    auto toMemrefOp = cast<bufferization::ToMemrefOp>(op);
-
-    // Fold to_memref(to_tensor(x)) to x. Insert a cast if necessary.
-    if (auto toTensorOp =
-            toMemrefOp.tensor().getDefiningOp<bufferization::ToTensorOp>()) {
-      Value buffer = toTensorOp.memref();
-
-      // Insert cast in case to_memref(to_tensor(x))'s type is different from
-      // x's type.
-      if (toTensorOp.memref().getType() != toMemrefOp.getType()) {
-        assert(memref::CastOp::areCastCompatible(buffer.getType(),
-                                                 toMemrefOp.getType()) &&
-               "ToMemrefOp::bufferize : cast incompatible");
-        buffer = rewriter.create<memref::CastOp>(toMemrefOp.getLoc(), buffer,
-                                                 toMemrefOp.getType());
-      }
-      replaceOpWithBufferizedValues(rewriter, toMemrefOp, buffer);
-      return success();
-    }
-
-    return failure();
-  }
-};
-
-/// Bufferization of bufferization.to_tensor. Such ops cannot be bufferized.
-/// However, other ops that are using to_tensor's result will eventually be
-/// bufferized. At that point, they will start using to_tensor's memref operand.
-/// Once all users of to_tensor are bufferized, the op will not have any users
-/// anymore and DCE away.
-///
-/// ToTensorOp conceptually loads a tensor from a memory location. The analysis
-/// has no information about the memref that is loaded from by ToTensorOp. We
-/// have to assume that the loaded tensor may after bufferization potentially
-/// alias with any other bufferized tensor. Since ToTensorOp and ToMemrefOp have
-/// no aliasing OpOperand/OpResult pairs, this cannot be encoded directly in the
-/// analysis. However, declaring ToTensorOp results as not writable enforces a
-/// buffer copy and has the same effect.
-struct ToTensorOpInterface
-    : public BufferizableOpInterface::ExternalModel<ToTensorOpInterface,
-                                                    bufferization::ToTensorOp> {
-  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
-                          const BufferizationState &state) const {
-    return failure();
-  }
-
-  bool isWritable(Operation *op, Value value,
-                  const BufferizationState &state) const {
-    // It is unknown whether the memref operand is writable or not.
-    return false;
-  }
-};
-
-} // namespace bufferization_ext
-} // namespace bufferization
-} // namespace mlir
-
-void bufferization_ext::registerBufferizableOpInterfaceExternalModels(
-    DialectRegistry &registry) {
-  registry.addOpInterface<ToMemrefOp, bufferization_ext::ToMemrefOpInterface>();
-  registry.addOpInterface<ToTensorOp, bufferization_ext::ToTensorOpInterface>();
-}
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index f81d726108d15..93770c9da5aa6 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -182,6 +182,79 @@ struct ToMemrefOfCast : public OpRewritePattern<ToMemrefOp> {
   }
 };
 
+/// Try to fold to_memref(to_tensor(x)). If x's type and the result type of the
+/// to_memref op are different, a memref.cast is needed.
+static LogicalResult foldToMemrefToTensorPair(RewriterBase &rewriter,
+                                              ToMemrefOp toMemref,
+                                              bool allowSameType = true) {
+  auto memrefToTensor = toMemref.tensor().getDefiningOp<ToTensorOp>();
+  if (!memrefToTensor)
+    return failure();
+
+  // A memref_to_tensor + tensor_to_memref with same types can be folded without
+  // inserting a cast.
+  if (memrefToTensor.memref().getType() == toMemref.getType()) {
+    if (!allowSameType)
+      // Function can be configured to only handle cases where a cast is needed.
+      return failure();
+    rewriter.replaceOp(toMemref, memrefToTensor.memref());
+    return success();
+  }
+
+  // If types are definitely not cast-compatible, bail.
+  if (!memref::CastOp::areCastCompatible(memrefToTensor.memref().getType(),
+                                         toMemref.getType()))
+    return failure();
+
+  // We already know that the types are potentially cast-compatible. However
+  // in case the affine maps are different, we may need to use a copy if we go
+  // from dynamic to static offset or stride (the canonicalization cannot know
+  // at this point that it is really cast compatible).
+  auto isGuaranteedCastCompatible = [](MemRefType source, MemRefType target) {
+    int64_t sourceOffset, targetOffset;
+    SmallVector<int64_t, 4> sourceStrides, targetStrides;
+    if (failed(getStridesAndOffset(source, sourceStrides, sourceOffset)) ||
+        failed(getStridesAndOffset(target, targetStrides, targetOffset)))
+      return false;
+    auto dynamicToStatic = [](int64_t a, int64_t b) {
+      return a == MemRefType::getDynamicStrideOrOffset() &&
+             b != MemRefType::getDynamicStrideOrOffset();
+    };
+    if (dynamicToStatic(sourceOffset, targetOffset))
+      return false;
+    for (auto it : zip(sourceStrides, targetStrides))
+      if (dynamicToStatic(std::get<0>(it), std::get<1>(it)))
+        return false;
+    return true;
+  };
+
+  auto memrefToTensorType =
+      memrefToTensor.memref().getType().dyn_cast<MemRefType>();
+  auto toMemrefType = toMemref.getType().dyn_cast<MemRefType>();
+  if (memrefToTensorType && toMemrefType &&
+      !isGuaranteedCastCompatible(memrefToTensorType, toMemrefType)) {
+    MemRefType resultType = toMemrefType;
+    auto loc = toMemref.getLoc();
+    SmallVector<Value, 4> dynamicOperands;
+    for (int i = 0; i < resultType.getRank(); ++i) {
+      if (resultType.getShape()[i] != ShapedType::kDynamicSize)
+        continue;
+      auto index = rewriter.createOrFold<arith::ConstantIndexOp>(loc, i);
+      Value size = rewriter.create<tensor::DimOp>(loc, memrefToTensor, index);
+      dynamicOperands.push_back(size);
+    }
+    // TODO: Use alloc/memcpy callback from BufferizationOptions if called via
+    // BufferizableOpInterface impl of ToMemrefOp.
+    auto copy =
+        rewriter.create<memref::AllocOp>(loc, resultType, dynamicOperands);
+    rewriter.create<memref::CopyOp>(loc, memrefToTensor.memref(), copy);
+    rewriter.replaceOp(toMemref, {copy});
+  } else
+    rewriter.replaceOpWithNewOp<memref::CastOp>(toMemref, toMemref.getType(),
+                                                memrefToTensor.memref());
+  return success();
+}
+
 /// Canonicalize bufferization.to_tensor + bufferization.to_memref to
 /// memref.cast when type mismatches prevent `ToMemrefOp::fold` to kick in.
 struct TensorLoadToMemref : public OpRewritePattern<ToMemrefOp> {
@@ -189,62 +262,10 @@ struct TensorLoadToMemref : public OpRewritePattern<ToMemrefOp> {
 
   LogicalResult matchAndRewrite(ToMemrefOp toMemref,
                                 PatternRewriter &rewriter) const final {
-    auto memrefToTensor = toMemref.tensor().getDefiningOp<ToTensorOp>();
-    // Bail unless we have a memref_to_tensor + tensor_to_memref with different
-    // types. `ToMemrefOp::fold` handles the same type case.
-    if (!memrefToTensor ||
-        memrefToTensor.memref().getType() == toMemref.getType())
-      return failure();
-    // If types are definitely not cast-compatible, bail.
-    if (!memref::CastOp::areCastCompatible(memrefToTensor.memref().getType(),
-                                           toMemref.getType()))
-      return failure();
-
-    // We already know that the types are potentially cast-compatible. However
-    // in case the affine maps are different, we may need to use a copy if we go
-    // from dynamic to static offset or stride (the canonicalization cannot know
-    // at this point that it is really cast compatible).
-    auto isGuaranteedCastCompatible = [](MemRefType source, MemRefType target) {
-      int64_t sourceOffset, targetOffset;
-      SmallVector<int64_t, 4> sourceStrides, targetStrides;
-      if (failed(getStridesAndOffset(source, sourceStrides, sourceOffset)) ||
-          failed(getStridesAndOffset(target, targetStrides, targetOffset)))
-        return false;
-      auto dynamicToStatic = [](int64_t a, int64_t b) {
-        return a == MemRefType::getDynamicStrideOrOffset() &&
-               b != MemRefType::getDynamicStrideOrOffset();
-      };
-      if (dynamicToStatic(sourceOffset, targetOffset))
-        return false;
-      for (auto it : zip(sourceStrides, targetStrides))
-        if (dynamicToStatic(std::get<0>(it), std::get<1>(it)))
-          return false;
-      return true;
-    };
-
-    auto memrefToTensorType =
-        memrefToTensor.memref().getType().dyn_cast<MemRefType>();
-    auto toMemrefType = toMemref.getType().dyn_cast<MemRefType>();
-    if (memrefToTensorType && toMemrefType &&
-        !isGuaranteedCastCompatible(memrefToTensorType, toMemrefType)) {
-      MemRefType resultType = toMemrefType;
-      auto loc = toMemref.getLoc();
-      SmallVector<Value, 4> dynamicOperands;
-      for (int i = 0; i < resultType.getRank(); ++i) {
-        if (resultType.getShape()[i] != ShapedType::kDynamicSize)
-          continue;
-        auto index = rewriter.createOrFold<arith::ConstantIndexOp>(loc, i);
-        Value size = rewriter.create<tensor::DimOp>(loc, memrefToTensor, index);
-        dynamicOperands.push_back(size);
-      }
-      auto copy =
-          rewriter.create<memref::AllocOp>(loc, resultType, dynamicOperands);
-      rewriter.create<memref::CopyOp>(loc, memrefToTensor.memref(), copy);
-      rewriter.replaceOp(toMemref, {copy});
-    } else
-      rewriter.replaceOpWithNewOp<memref::CastOp>(toMemref, toMemref.getType(),
-                                                  memrefToTensor.memref());
-    return success();
+    // Only handle cases where a cast is needed. The other case is handled by
+    // the folder.
+    return foldToMemrefToTensorPair(rewriter, toMemref,
+                                    /*allowSameType=*/false);
   }
 };
 
@@ -288,6 +309,12 @@ void ToMemrefOp::getCanonicalizationPatterns(RewritePatternSet &results,
       context);
 }
 
+LogicalResult ToMemrefOp::bufferize(RewriterBase &rewriter,
+                                    const BufferizationState &state) {
+  // Fold to_memref(to_tensor(x)) to x. Insert a cast if necessary.
+  return foldToMemrefToTensorPair(rewriter, *this);
+}
+
 Optional<Operation *> CloneOp::buildDealloc(OpBuilder &builder, Value alloc) {
   return builder.create<memref::DeallocOp>(alloc.getLoc(), alloc)
       .getOperation();
diff --git a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
index cdb6656f0f0ae..8ec23af66eac3 100644
--- a/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/IR/CMakeLists.txt
@@ -1,6 +1,6 @@
 add_mlir_dialect_library(MLIRBufferization
-  PARTIAL_SOURCES_INTENDED
   AllocationOpInterface.cpp
+  BufferizableOpInterface.cpp
   BufferizationOps.cpp
   BufferizationDialect.cpp
 
@@ -17,17 +17,3 @@ add_mlir_dialect_library(MLIRBufferization
   MLIRTensor
   MLIRMemRef
   )
-
-add_mlir_dialect_library(MLIRBufferizableOpInterface
-  PARTIAL_SOURCES_INTENDED
-  BufferizableOpInterface.cpp
-  BufferizationInterfaceImpl.cpp
-
-  DEPENDS
-  MLIRBufferizableOpInterfaceIncGen
-
-  LINK_LIBS PUBLIC
-  MLIRIR
-  MLIRBufferization
-  MLIRMemRef
-)
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
index b3f4fb38d003a..b212ef952a05e 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
@@ -10,7 +10,6 @@ add_mlir_dialect_library(MLIRBufferizationTransforms
   MLIRBufferizationPassIncGen
 
   LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
   MLIRBufferization
   MLIRControlFlowInterfaces
   MLIRInferTypeOpInterface
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
index 21e22de2eee24..8b64809e4b97c 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
@@ -13,7 +13,7 @@ add_mlir_dialect_library(MLIRAffineBufferizableOpInterfaceImpl
 
   LINK_LIBS PUBLIC
   MLIRAffine
-  MLIRBufferizableOpInterface
+  MLIRBufferization
 )
 
 add_mlir_dialect_library(MLIRArithBufferizableOpInterfaceImpl
@@ -21,7 +21,7 @@ add_mlir_dialect_library(MLIRArithBufferizableOpInterfaceImpl
 
   LINK_LIBS PUBLIC
   MLIRArithmetic
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRIR
   MLIRMemRef
   MLIRStandardOpsTransforms
@@ -31,7 +31,7 @@ add_mlir_dialect_library(MLIRLinalgBufferizableOpInterfaceImpl
   LinalgInterfaceImpl.cpp
 
   LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRBufferizationTransforms
   MLIRIR
   MLIRLinalg
@@ -42,7 +42,7 @@ add_mlir_dialect_library(MLIRSCFBufferizableOpInterfaceImpl
   SCFInterfaceImpl.cpp
 
   LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRBufferizationTransforms
   MLIRIR
   MLIRSCF
@@ -52,7 +52,7 @@ add_mlir_dialect_library(MLIRStdBufferizableOpInterfaceImpl
   StdInterfaceImpl.cpp
 
   LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRStandard
 )
 
@@ -60,7 +60,7 @@ add_mlir_dialect_library(MLIRVectorBufferizableOpInterfaceImpl
   VectorInterfaceImpl.cpp
 
   LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRIR
   MLIRVector
 )
@@ -69,7 +69,7 @@ add_mlir_dialect_library(MLIRModuleBufferization
   ModuleBufferization.cpp
 
   LINK_LIBS PUBLIC
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRBufferizationTransforms
   MLIRIR
   MLIRMemRef
diff --git a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
index b88e57dcd13b9..0b9142f8d9e0f 100644
--- a/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/IR/CMakeLists.txt
@@ -14,7 +14,7 @@ add_mlir_dialect_library(MLIRLinalg
   LINK_LIBS PUBLIC
   MLIRAffine
   MLIRArithmetic
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRDialectUtils
   MLIRInferTypeOpInterface
   MLIRIR
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
index 6059d51260975..366600f6d33de 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@@ -36,7 +36,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
   MLIRAnalysis
   MLIRArithBufferizableOpInterfaceImpl
   MLIRArithmetic
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRComplex
   MLIRInferTypeOpInterface
   MLIRIR
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
index 90335f952d559..12d43300aacf7 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -10,7 +10,6 @@
 
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
-#include "mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h"
 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h"
@@ -54,7 +53,6 @@ struct LinalgComprehensiveModuleBufferize
                 arith::ArithmeticDialect, StandardOpsDialect, AffineDialect>();
     affine_ext::registerBufferizableOpInterfaceExternalModels(registry);
     arith_ext::registerBufferizableOpInterfaceExternalModels(registry);
-    bufferization_ext::registerBufferizableOpInterfaceExternalModels(registry);
     linalg_ext::registerBufferizableOpInterfaceExternalModels(registry);
     scf_ext::registerBufferizableOpInterfaceExternalModels(registry);
     std_ext::registerModuleBufferizationExternalModels(registry);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt
index 5c4ce30042d69..0c04e729e4cbe 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CMakeLists.txt
@@ -12,7 +12,7 @@ add_mlir_dialect_library(MLIRSparseTensorTransforms
 
   LINK_LIBS PUBLIC
   MLIRArithmetic
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRIR
   MLIRLLVMIR
   MLIRLinalg
diff --git a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
index 98787344c582b..d36e556fd7723 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tensor/Transforms/CMakeLists.txt
@@ -10,7 +10,7 @@ add_mlir_dialect_library(MLIRTensorTransforms
 
   LINK_LIBS PUBLIC
   MLIRArithmetic
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRBufferizationTransforms
   MLIRIR
   MLIRMemRef
diff --git a/mlir/test/lib/Dialect/Linalg/CMakeLists.txt b/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
index c784461c2dc3d..b45786123f723 100644
--- a/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
@@ -16,7 +16,7 @@ add_mlir_library(MLIRLinalgTestPasses
   MLIRAffineBufferizableOpInterfaceImpl
   MLIRArithBufferizableOpInterfaceImpl
   MLIRArithmetic
-  MLIRBufferizableOpInterface
+  MLIRBufferization
   MLIRBufferizationTransforms
   MLIRGPUTransforms
   MLIRLinalg
diff --git a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
index 3e65330addb4d..a9b5ab206d42f 100644
--- a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
@@ -14,7 +14,6 @@
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
-#include "mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h"
 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h"
@@ -61,7 +60,6 @@ struct TestComprehensiveFunctionBufferize
                     arith::ArithmeticDialect, AffineDialect>();
     affine_ext::registerBufferizableOpInterfaceExternalModels(registry);
     arith_ext::registerBufferizableOpInterfaceExternalModels(registry);
-    bufferization_ext::registerBufferizableOpInterfaceExternalModels(registry);
     linalg_ext::registerBufferizableOpInterfaceExternalModels(registry);
     scf_ext::registerBufferizableOpInterfaceExternalModels(registry);
     std_ext::registerBufferizableOpInterfaceExternalModels(registry);
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 74d10d9190b5d..a3876c0b71f4f 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -1958,7 +1958,6 @@ cc_library(
     deps = [
         ":Affine",
         ":ArithmeticDialect",
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":IR",
         ":LLVMDialect",
@@ -4423,7 +4422,6 @@ cc_library(
     deps = [
         ":ArithmeticDialect",
         ":Async",
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationTransforms",
         ":IR",
@@ -6573,27 +6571,6 @@ gentbl_cc_library(
     ],
 )
 
-cc_library(
-    name = "BufferizableOpInterface",
-    srcs = [
-        "lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp",
-        "lib/Dialect/Bufferization/IR/BufferizationInterfaceImpl.cpp",
-    ],
-    hdrs = [
-        "include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h",
-        "include/mlir/Dialect/Bufferization/IR/BufferizationInterfaceImpl.h",
-    ],
-    includes = ["include"],
-    deps = [
-        ":BufferizableOpInterfaceIncGen",
-        ":BufferizationDialect",
-        ":IR",
-        ":MemRefDialect",
-        ":Support",
-        "//llvm:Support",
-    ],
-)
-
 cc_library(
     name = "AffineBufferizableOpInterfaceImpl",
     srcs = [
@@ -6605,7 +6582,7 @@ cc_library(
     includes = ["include"],
     deps = [
         ":Affine",
-        ":BufferizableOpInterface",
+        ":BufferizationDialect",
         "//llvm:Support",
     ],
 )
@@ -6621,7 +6598,7 @@ cc_library(
     includes = ["include"],
     deps = [
         ":ArithmeticDialect",
-        ":BufferizableOpInterface",
+        ":BufferizationDialect",
         ":IR",
         ":MemRefDialect",
         ":Support",
@@ -6640,7 +6617,6 @@ cc_library(
     ],
     includes = ["include"],
     deps = [
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationTransforms",
         ":IR",
@@ -6660,7 +6636,6 @@ cc_library(
     ],
     includes = ["include"],
     deps = [
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationTransforms",
         ":IR",
@@ -6680,7 +6655,7 @@ cc_library(
     ],
     includes = ["include"],
     deps = [
-        ":BufferizableOpInterface",
+        ":BufferizationDialect",
         ":IR",
         ":StandardOps",
         ":Support",
@@ -6698,7 +6673,7 @@ cc_library(
     ],
     includes = ["include"],
     deps = [
-        ":BufferizableOpInterface",
+        ":BufferizationDialect",
         ":IR",
         ":Support",
         ":VectorOps",
@@ -6827,7 +6802,7 @@ cc_library(
     deps = [
         ":Affine",
         ":ArithmeticDialect",
-        ":BufferizableOpInterface",
+        ":BufferizationDialect",
         ":CopyOpInterface",
         ":DialectUtils",
         ":IR",
@@ -6909,7 +6884,6 @@ cc_library(
         ":Analysis",
         ":ArithBufferizableOpInterfaceImpl",
         ":ArithmeticDialect",
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationTransforms",
         ":ComplexDialect",
@@ -6953,7 +6927,6 @@ cc_library(
     ],
     includes = ["include"],
     deps = [
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationTransforms",
         ":DialectUtils",
@@ -7968,19 +7941,27 @@ gentbl_cc_library(
     ],
     tblgen = ":mlir-tblgen",
     td_file = "include/mlir/Dialect/Bufferization/IR/BufferizationOps.td",
-    deps = [":BufferizationOpsTdFiles"],
+    deps = [
+        ":BufferizableOpInterfaceTdFiles",
+        ":BufferizationOpsTdFiles",
+    ],
 )
 
 cc_library(
     name = "BufferizationDialect",
     srcs = [
+        "lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp",
         "lib/Dialect/Bufferization/IR/BufferizationDialect.cpp",
         "lib/Dialect/Bufferization/IR/BufferizationOps.cpp",
     ],
-    hdrs = ["include/mlir/Dialect/Bufferization/IR/Bufferization.h"],
+    hdrs = [
+        "include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h",
+        "include/mlir/Dialect/Bufferization/IR/Bufferization.h",
+    ],
     includes = ["include"],
     deps = [
         ":AllocationOpInterface",
+        ":BufferizableOpInterfaceIncGen",
         ":BufferizationBaseIncGen",
         ":BufferizationOpsIncGen",
         ":ControlFlowInterfaces",
@@ -7989,6 +7970,7 @@ cc_library(
         ":InferTypeOpInterface",
         ":MemRefDialect",
         ":StandardOps",
+        ":Support",
         ":TensorDialect",
         ":ViewLikeInterface",
         "//llvm:Support",
@@ -8025,7 +8007,6 @@ cc_library(
     deps = [
         ":AllocationOpInterface",
         ":Analysis",
-        ":BufferizableOpInterface",
         ":BufferizationDialect",
         ":BufferizationPassIncGen",
         ":ControlFlowInterfaces",
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index 8c24a43000c78..7bc92df875c46 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -388,7 +388,6 @@ cc_library(
         "//mlir:AffineBufferizableOpInterfaceImpl",
         "//mlir:ArithBufferizableOpInterfaceImpl",
         "//mlir:ArithmeticDialect",
-        "//mlir:BufferizableOpInterface",
         "//mlir:BufferizationDialect",
         "//mlir:BufferizationTransforms",
         "//mlir:GPUDialect",

From a43ed49f5b163b2926641729a30a5c17c2116a08 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Mon, 24 Jan 2022 08:23:58 -0800
Subject: [PATCH 406/946] [DAGCombiner][RISCV] Canonicalize
 (bswap(bitreverse(x))->bitreverse(bswap(x)).

If the bitreverse gets expanded, it will introduce a new bswap. By
putting a bswap before the bitreverse, we can ensure it gets cancelled
out when this happens.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D118012
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  11 +
 .../RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll | 384 +++++-------------
 llvm/test/CodeGen/RISCV/rv32zbp.ll            | 150 ++-----
 llvm/test/CodeGen/RISCV/rv64zbp.ll            | 130 ++----
 4 files changed, 196 insertions(+), 479 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bf4409e77a916..77a6e7bba3660 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9375,6 +9375,17 @@ SDValue DAGCombiner::visitBSWAP(SDNode *N) {
   // fold (bswap (bswap x)) -> x
   if (N0.getOpcode() == ISD::BSWAP)
     return N0->getOperand(0);
+
+  // Canonicalize bswap(bitreverse(x)) -> bitreverse(bswap(x)). If bitreverse
+  // isn't supported, it will be expanded to bswap followed by a manual reversal
+  // of bits in each byte. By placing bswaps before bitreverse, we can remove
+  // the two bswaps if the bitreverse gets expanded.
+  if (N0.getOpcode() == ISD::BITREVERSE && N0.hasOneUse()) {
+    SDLoc DL(N);
+    SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, N0.getOperand(0));
+    return DAG.getNode(ISD::BITREVERSE, DL, VT, BSwap);
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
index 3913427d5ccd0..f2f0494a6fb88 100644
--- a/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-bitreverse-ctlz-cttz-ctpop.ll
@@ -1039,10 +1039,6 @@ define i64 @test_bswap_bitreverse_i64(i64 %a) nounwind {
 define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 ; RV32I-LABEL: test_bitreverse_bswap_i16:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a1, a0, 8
-; RV32I-NEXT:    slli a0, a0, 16
-; RV32I-NEXT:    srli a0, a0, 24
-; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 4
 ; RV32I-NEXT:    lui a2, 1
 ; RV32I-NEXT:    addi a2, a2, -241
@@ -1064,17 +1060,10 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 ; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 1
 ; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    slli a0, a0, 8
-; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_bitreverse_bswap_i16:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a1, a0, 8
-; RV64I-NEXT:    slli a0, a0, 48
-; RV64I-NEXT:    srli a0, a0, 56
-; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 4
 ; RV64I-NEXT:    lui a2, 1
 ; RV64I-NEXT:    addiw a2, a2, -241
@@ -1096,21 +1085,17 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 ; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slli a0, a0, 1
 ; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 8
-; RV64I-NEXT:    slli a0, a0, 8
-; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: test_bitreverse_bswap_i16:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    rev8 a0, a0
-; RV32ZBB-NEXT:    srli a1, a0, 12
-; RV32ZBB-NEXT:    lui a2, 15
-; RV32ZBB-NEXT:    addi a2, a2, 240
+; RV32ZBB-NEXT:    srli a1, a0, 4
+; RV32ZBB-NEXT:    lui a2, 1
+; RV32ZBB-NEXT:    addi a2, a2, -241
 ; RV32ZBB-NEXT:    and a1, a1, a2
-; RV32ZBB-NEXT:    srli a0, a0, 20
-; RV32ZBB-NEXT:    andi a0, a0, -241
-; RV32ZBB-NEXT:    or a0, a0, a1
+; RV32ZBB-NEXT:    and a0, a0, a2
+; RV32ZBB-NEXT:    slli a0, a0, 4
+; RV32ZBB-NEXT:    or a0, a1, a0
 ; RV32ZBB-NEXT:    srli a1, a0, 2
 ; RV32ZBB-NEXT:    lui a2, 3
 ; RV32ZBB-NEXT:    addi a2, a2, 819
@@ -1125,20 +1110,17 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 ; RV32ZBB-NEXT:    and a0, a0, a2
 ; RV32ZBB-NEXT:    slli a0, a0, 1
 ; RV32ZBB-NEXT:    or a0, a1, a0
-; RV32ZBB-NEXT:    rev8 a0, a0
-; RV32ZBB-NEXT:    srli a0, a0, 16
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: test_bitreverse_bswap_i16:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a1, a0, 44
-; RV64ZBB-NEXT:    lui a2, 15
-; RV64ZBB-NEXT:    addiw a2, a2, 240
+; RV64ZBB-NEXT:    srli a1, a0, 4
+; RV64ZBB-NEXT:    lui a2, 1
+; RV64ZBB-NEXT:    addiw a2, a2, -241
 ; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    srli a0, a0, 52
-; RV64ZBB-NEXT:    andi a0, a0, -241
-; RV64ZBB-NEXT:    or a0, a0, a1
+; RV64ZBB-NEXT:    and a0, a0, a2
+; RV64ZBB-NEXT:    slli a0, a0, 4
+; RV64ZBB-NEXT:    or a0, a1, a0
 ; RV64ZBB-NEXT:    srli a1, a0, 2
 ; RV64ZBB-NEXT:    lui a2, 3
 ; RV64ZBB-NEXT:    addiw a2, a2, 819
@@ -1153,8 +1135,6 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 ; RV64ZBB-NEXT:    and a0, a0, a2
 ; RV64ZBB-NEXT:    slli a0, a0, 1
 ; RV64ZBB-NEXT:    or a0, a1, a0
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a0, a0, 48
 ; RV64ZBB-NEXT:    ret
   %tmp = call i16 @llvm.bitreverse.i16(i16 %a)
   %tmp2 = call i16 @llvm.bswap.i16(i16 %tmp)
@@ -1164,99 +1144,56 @@ define i16 @test_bitreverse_bswap_i16(i16 %a) nounwind {
 define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
 ; RV32I-LABEL: test_bitreverse_bswap_i32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    lui a2, 16
-; RV32I-NEXT:    addi a2, a2, -256
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    srli a3, a0, 24
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    slli a3, a0, 8
-; RV32I-NEXT:    lui a4, 4080
-; RV32I-NEXT:    and a3, a3, a4
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 4
-; RV32I-NEXT:    lui a3, 61681
-; RV32I-NEXT:    addi a3, a3, -241
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    lui a2, 61681
+; RV32I-NEXT:    addi a2, a2, -241
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 4
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 2
-; RV32I-NEXT:    lui a3, 209715
-; RV32I-NEXT:    addi a3, a3, 819
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    lui a2, 209715
+; RV32I-NEXT:    addi a2, a2, 819
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 2
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 1
-; RV32I-NEXT:    lui a3, 349525
-; RV32I-NEXT:    addi a3, a3, 1365
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 1
 ; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    srli a2, a0, 24
-; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    slli a2, a0, 8
-; RV32I-NEXT:    and a2, a2, a4
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_bitreverse_bswap_i32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srliw a1, a0, 8
-; RV64I-NEXT:    lui a2, 16
-; RV64I-NEXT:    addiw a2, a2, -256
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srliw a3, a0, 24
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    slli a3, a0, 8
-; RV64I-NEXT:    lui a4, 4080
-; RV64I-NEXT:    and a3, a3, a4
-; RV64I-NEXT:    slliw a0, a0, 24
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    srli a1, a0, 4
-; RV64I-NEXT:    lui a3, 61681
-; RV64I-NEXT:    addiw a3, a3, -241
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, 61681
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slliw a0, a0, 4
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 2
-; RV64I-NEXT:    lui a3, 209715
-; RV64I-NEXT:    addiw a3, a3, 819
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, 209715
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slliw a0, a0, 2
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 1
-; RV64I-NEXT:    lui a3, 349525
-; RV64I-NEXT:    addiw a3, a3, 1365
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slliw a0, a0, 1
 ; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srliw a1, a0, 8
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srliw a2, a0, 24
-; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    slli a2, a0, 8
-; RV64I-NEXT:    and a2, a2, a4
-; RV64I-NEXT:    slliw a0, a0, 24
-; RV64I-NEXT:    or a0, a0, a2
-; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: test_bitreverse_bswap_i32:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    rev8 a0, a0
 ; RV32ZBB-NEXT:    srli a1, a0, 4
 ; RV32ZBB-NEXT:    lui a2, 61681
 ; RV32ZBB-NEXT:    addi a2, a2, -241
@@ -1278,21 +1215,16 @@ define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
 ; RV32ZBB-NEXT:    and a0, a0, a2
 ; RV32ZBB-NEXT:    slli a0, a0, 1
 ; RV32ZBB-NEXT:    or a0, a1, a0
-; RV32ZBB-NEXT:    rev8 a0, a0
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: test_bitreverse_bswap_i32:
 ; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a1, a0, 36
+; RV64ZBB-NEXT:    srli a1, a0, 4
 ; RV64ZBB-NEXT:    lui a2, 61681
 ; RV64ZBB-NEXT:    addiw a2, a2, -241
 ; RV64ZBB-NEXT:    and a1, a1, a2
-; RV64ZBB-NEXT:    srli a0, a0, 28
-; RV64ZBB-NEXT:    lui a2, 986895
-; RV64ZBB-NEXT:    addiw a2, a2, 240
 ; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    slliw a0, a0, 4
 ; RV64ZBB-NEXT:    or a0, a1, a0
 ; RV64ZBB-NEXT:    srli a1, a0, 2
 ; RV64ZBB-NEXT:    lui a2, 209715
@@ -1306,10 +1238,8 @@ define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
 ; RV64ZBB-NEXT:    addiw a2, a2, 1365
 ; RV64ZBB-NEXT:    and a1, a1, a2
 ; RV64ZBB-NEXT:    and a0, a0, a2
-; RV64ZBB-NEXT:    slli a0, a0, 1
+; RV64ZBB-NEXT:    slliw a0, a0, 1
 ; RV64ZBB-NEXT:    or a0, a1, a0
-; RV64ZBB-NEXT:    rev8 a0, a0
-; RV64ZBB-NEXT:    srli a0, a0, 32
 ; RV64ZBB-NEXT:    ret
   %tmp = call i32 @llvm.bitreverse.i32(i32 %a)
   %tmp2 = call i32 @llvm.bswap.i32(i32 %tmp)
@@ -1319,206 +1249,113 @@ define i32 @test_bitreverse_bswap_i32(i32 %a) nounwind {
 define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind {
 ; RV32I-LABEL: test_bitreverse_bswap_i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a3, a1, 8
-; RV32I-NEXT:    lui a2, 16
-; RV32I-NEXT:    addi a2, a2, -256
-; RV32I-NEXT:    and a3, a3, a2
-; RV32I-NEXT:    srli a4, a1, 24
-; RV32I-NEXT:    or a4, a3, a4
-; RV32I-NEXT:    slli a5, a1, 8
-; RV32I-NEXT:    lui a3, 4080
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    slli a1, a1, 24
-; RV32I-NEXT:    or a1, a1, a5
-; RV32I-NEXT:    or a1, a1, a4
-; RV32I-NEXT:    srli a4, a1, 4
-; RV32I-NEXT:    lui a5, 61681
-; RV32I-NEXT:    addi a5, a5, -241
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    and a1, a1, a5
-; RV32I-NEXT:    slli a1, a1, 4
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    srli a4, a1, 2
-; RV32I-NEXT:    lui a6, 209715
-; RV32I-NEXT:    addi a6, a6, 819
-; RV32I-NEXT:    and a4, a4, a6
-; RV32I-NEXT:    and a1, a1, a6
-; RV32I-NEXT:    slli a1, a1, 2
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    srli a4, a1, 1
-; RV32I-NEXT:    lui a7, 349525
-; RV32I-NEXT:    addi a7, a7, 1365
-; RV32I-NEXT:    and a4, a4, a7
-; RV32I-NEXT:    and a1, a1, a7
-; RV32I-NEXT:    slli a1, a1, 1
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    srli a4, a0, 8
-; RV32I-NEXT:    and a4, a4, a2
-; RV32I-NEXT:    srli t0, a0, 24
-; RV32I-NEXT:    or a4, a4, t0
-; RV32I-NEXT:    slli t0, a0, 8
-; RV32I-NEXT:    and t0, t0, a3
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, t0
-; RV32I-NEXT:    or a0, a0, a4
-; RV32I-NEXT:    srli a4, a0, 4
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    and a0, a0, a5
+; RV32I-NEXT:    srli a2, a0, 4
+; RV32I-NEXT:    lui a3, 61681
+; RV32I-NEXT:    addi a3, a3, -241
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    and a0, a0, a3
 ; RV32I-NEXT:    slli a0, a0, 4
-; RV32I-NEXT:    or a0, a4, a0
-; RV32I-NEXT:    srli a4, a0, 2
-; RV32I-NEXT:    and a4, a4, a6
-; RV32I-NEXT:    and a0, a0, a6
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a0, 2
+; RV32I-NEXT:    lui a4, 209715
+; RV32I-NEXT:    addi a4, a4, 819
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a0, a0, a4
 ; RV32I-NEXT:    slli a0, a0, 2
-; RV32I-NEXT:    or a0, a4, a0
-; RV32I-NEXT:    srli a4, a0, 1
-; RV32I-NEXT:    and a4, a4, a7
-; RV32I-NEXT:    and a0, a0, a7
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a0, 1
+; RV32I-NEXT:    lui a5, 349525
+; RV32I-NEXT:    addi a5, a5, 1365
+; RV32I-NEXT:    and a2, a2, a5
+; RV32I-NEXT:    and a0, a0, a5
 ; RV32I-NEXT:    slli a0, a0, 1
-; RV32I-NEXT:    or a0, a4, a0
-; RV32I-NEXT:    srli a4, a0, 8
-; RV32I-NEXT:    and a4, a4, a2
-; RV32I-NEXT:    srli a5, a0, 24
-; RV32I-NEXT:    or a4, a4, a5
-; RV32I-NEXT:    slli a5, a0, 8
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a5
-; RV32I-NEXT:    or a0, a0, a4
-; RV32I-NEXT:    srli a4, a1, 8
-; RV32I-NEXT:    and a2, a4, a2
-; RV32I-NEXT:    srli a4, a1, 24
-; RV32I-NEXT:    or a2, a2, a4
-; RV32I-NEXT:    slli a4, a1, 8
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    slli a1, a1, 24
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a1, 4
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    slli a1, a1, 4
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a2, a1, 2
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    slli a1, a1, 2
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a2, a1, 1
+; RV32I-NEXT:    and a2, a2, a5
+; RV32I-NEXT:    and a1, a1, a5
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_bitreverse_bswap_i64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srli a1, a0, 24
-; RV64I-NEXT:    lui a2, 4080
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    li a4, 255
-; RV64I-NEXT:    slli a5, a4, 24
-; RV64I-NEXT:    and a3, a3, a5
-; RV64I-NEXT:    or a1, a3, a1
-; RV64I-NEXT:    srli a3, a0, 40
-; RV64I-NEXT:    lui a6, 16
-; RV64I-NEXT:    addiw a6, a6, -256
-; RV64I-NEXT:    and a3, a3, a6
-; RV64I-NEXT:    srli a7, a0, 56
-; RV64I-NEXT:    or a3, a3, a7
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    slli a3, a0, 24
-; RV64I-NEXT:    slli a7, a4, 40
-; RV64I-NEXT:    and a3, a3, a7
-; RV64I-NEXT:    srliw t0, a0, 24
-; RV64I-NEXT:    slli t0, t0, 32
-; RV64I-NEXT:    or a3, a3, t0
-; RV64I-NEXT:    slli t0, a0, 40
-; RV64I-NEXT:    slli a4, a4, 48
-; RV64I-NEXT:    and t0, t0, a4
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    or a0, a0, t0
-; RV64I-NEXT:    lui t0, %hi(.LCPI12_0)
-; RV64I-NEXT:    ld t0, %lo(.LCPI12_0)(t0)
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srli a1, a0, 4
-; RV64I-NEXT:    and a1, a1, t0
-; RV64I-NEXT:    and a0, a0, t0
-; RV64I-NEXT:    lui a3, %hi(.LCPI12_1)
-; RV64I-NEXT:    ld a3, %lo(.LCPI12_1)(a3)
+; RV64I-NEXT:    lui a1, %hi(.LCPI12_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI12_0)(a1)
+; RV64I-NEXT:    srli a2, a0, 4
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI12_1)
+; RV64I-NEXT:    ld a1, %lo(.LCPI12_1)(a1)
 ; RV64I-NEXT:    slli a0, a0, 4
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 2
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
-; RV64I-NEXT:    lui a3, %hi(.LCPI12_2)
-; RV64I-NEXT:    ld a3, %lo(.LCPI12_2)(a3)
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    srli a2, a0, 2
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI12_2)
+; RV64I-NEXT:    ld a1, %lo(.LCPI12_2)(a1)
 ; RV64I-NEXT:    slli a0, a0, 2
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 1
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    srli a2, a0, 1
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    slli a0, a0, 1
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 40
-; RV64I-NEXT:    and a1, a1, a6
-; RV64I-NEXT:    srli a3, a0, 56
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    srli a3, a0, 24
-; RV64I-NEXT:    and a2, a3, a2
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    and a3, a3, a5
-; RV64I-NEXT:    or a2, a3, a2
-; RV64I-NEXT:    or a1, a2, a1
-; RV64I-NEXT:    slli a2, a0, 24
-; RV64I-NEXT:    and a2, a2, a7
-; RV64I-NEXT:    srliw a3, a0, 24
-; RV64I-NEXT:    slli a3, a3, 32
-; RV64I-NEXT:    or a2, a2, a3
-; RV64I-NEXT:    slli a3, a0, 40
-; RV64I-NEXT:    and a3, a3, a4
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a2
-; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: test_bitreverse_bswap_i64:
 ; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    rev8 a1, a1
-; RV32ZBB-NEXT:    srli a2, a1, 4
+; RV32ZBB-NEXT:    srli a2, a0, 4
 ; RV32ZBB-NEXT:    lui a3, 61681
 ; RV32ZBB-NEXT:    addi a3, a3, -241
 ; RV32ZBB-NEXT:    and a2, a2, a3
-; RV32ZBB-NEXT:    and a1, a1, a3
-; RV32ZBB-NEXT:    slli a1, a1, 4
-; RV32ZBB-NEXT:    or a1, a2, a1
-; RV32ZBB-NEXT:    srli a2, a1, 2
-; RV32ZBB-NEXT:    lui a4, 209715
-; RV32ZBB-NEXT:    addi a4, a4, 819
-; RV32ZBB-NEXT:    and a2, a2, a4
-; RV32ZBB-NEXT:    and a1, a1, a4
-; RV32ZBB-NEXT:    slli a1, a1, 2
-; RV32ZBB-NEXT:    or a1, a2, a1
-; RV32ZBB-NEXT:    srli a2, a1, 1
-; RV32ZBB-NEXT:    lui a5, 349525
-; RV32ZBB-NEXT:    addi a5, a5, 1365
-; RV32ZBB-NEXT:    and a2, a2, a5
-; RV32ZBB-NEXT:    and a1, a1, a5
-; RV32ZBB-NEXT:    slli a1, a1, 1
-; RV32ZBB-NEXT:    or a1, a2, a1
-; RV32ZBB-NEXT:    rev8 a0, a0
-; RV32ZBB-NEXT:    srli a2, a0, 4
-; RV32ZBB-NEXT:    and a2, a2, a3
 ; RV32ZBB-NEXT:    and a0, a0, a3
 ; RV32ZBB-NEXT:    slli a0, a0, 4
 ; RV32ZBB-NEXT:    or a0, a2, a0
 ; RV32ZBB-NEXT:    srli a2, a0, 2
+; RV32ZBB-NEXT:    lui a4, 209715
+; RV32ZBB-NEXT:    addi a4, a4, 819
 ; RV32ZBB-NEXT:    and a2, a2, a4
 ; RV32ZBB-NEXT:    and a0, a0, a4
 ; RV32ZBB-NEXT:    slli a0, a0, 2
 ; RV32ZBB-NEXT:    or a0, a2, a0
 ; RV32ZBB-NEXT:    srli a2, a0, 1
+; RV32ZBB-NEXT:    lui a5, 349525
+; RV32ZBB-NEXT:    addi a5, a5, 1365
 ; RV32ZBB-NEXT:    and a2, a2, a5
 ; RV32ZBB-NEXT:    and a0, a0, a5
 ; RV32ZBB-NEXT:    slli a0, a0, 1
 ; RV32ZBB-NEXT:    or a0, a2, a0
-; RV32ZBB-NEXT:    rev8 a0, a0
-; RV32ZBB-NEXT:    rev8 a1, a1
+; RV32ZBB-NEXT:    srli a2, a1, 4
+; RV32ZBB-NEXT:    and a2, a2, a3
+; RV32ZBB-NEXT:    and a1, a1, a3
+; RV32ZBB-NEXT:    slli a1, a1, 4
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 2
+; RV32ZBB-NEXT:    and a2, a2, a4
+; RV32ZBB-NEXT:    and a1, a1, a4
+; RV32ZBB-NEXT:    slli a1, a1, 2
+; RV32ZBB-NEXT:    or a1, a2, a1
+; RV32ZBB-NEXT:    srli a2, a1, 1
+; RV32ZBB-NEXT:    and a2, a2, a5
+; RV32ZBB-NEXT:    and a1, a1, a5
+; RV32ZBB-NEXT:    slli a1, a1, 1
+; RV32ZBB-NEXT:    or a1, a2, a1
 ; RV32ZBB-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: test_bitreverse_bswap_i64:
 ; RV64ZBB:       # %bb.0:
 ; RV64ZBB-NEXT:    lui a1, %hi(.LCPI12_0)
 ; RV64ZBB-NEXT:    ld a1, %lo(.LCPI12_0)(a1)
-; RV64ZBB-NEXT:    rev8 a0, a0
 ; RV64ZBB-NEXT:    srli a2, a0, 4
 ; RV64ZBB-NEXT:    and a2, a2, a1
 ; RV64ZBB-NEXT:    and a0, a0, a1
@@ -1538,7 +1375,6 @@ define i64 @test_bitreverse_bswap_i64(i64 %a) nounwind {
 ; RV64ZBB-NEXT:    and a0, a0, a1
 ; RV64ZBB-NEXT:    slli a0, a0, 1
 ; RV64ZBB-NEXT:    or a0, a2, a0
-; RV64ZBB-NEXT:    rev8 a0, a0
 ; RV64ZBB-NEXT:    ret
   %tmp = call i64 @llvm.bitreverse.i64(i64 %a)
   %tmp2 = call i64 @llvm.bswap.i64(i64 %tmp)
diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll
index 1df371c407d77..c51151f47962f 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll
@@ -2404,48 +2404,27 @@ define i32 @bswap_rotl_i32(i32 %a) {
 define i32 @bitreverse_bswap_i32(i32 %a) {
 ; RV32I-LABEL: bitreverse_bswap_i32:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    lui a2, 16
-; RV32I-NEXT:    addi a2, a2, -256
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    srli a3, a0, 24
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    slli a3, a0, 8
-; RV32I-NEXT:    lui a4, 4080
-; RV32I-NEXT:    and a3, a3, a4
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a3
-; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    srli a1, a0, 4
-; RV32I-NEXT:    lui a3, 61681
-; RV32I-NEXT:    addi a3, a3, -241
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    lui a2, 61681
+; RV32I-NEXT:    addi a2, a2, -241
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 4
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 2
-; RV32I-NEXT:    lui a3, 209715
-; RV32I-NEXT:    addi a3, a3, 819
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    lui a2, 209715
+; RV32I-NEXT:    addi a2, a2, 819
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 2
 ; RV32I-NEXT:    or a0, a1, a0
 ; RV32I-NEXT:    srli a1, a0, 1
-; RV32I-NEXT:    lui a3, 349525
-; RV32I-NEXT:    addi a3, a3, 1365
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a3
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    and a0, a0, a2
 ; RV32I-NEXT:    slli a0, a0, 1
 ; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    srli a1, a0, 8
-; RV32I-NEXT:    and a1, a1, a2
-; RV32I-NEXT:    srli a2, a0, 24
-; RV32I-NEXT:    or a1, a1, a2
-; RV32I-NEXT:    slli a2, a0, 8
-; RV32I-NEXT:    and a2, a2, a4
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a2
-; RV32I-NEXT:    or a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: bitreverse_bswap_i32:
@@ -2460,81 +2439,42 @@ define i32 @bitreverse_bswap_i32(i32 %a) {
 define i64 @bitreverse_bswap_i64(i64 %a) {
 ; RV32I-LABEL: bitreverse_bswap_i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a3, a1, 8
-; RV32I-NEXT:    lui a2, 16
-; RV32I-NEXT:    addi a2, a2, -256
-; RV32I-NEXT:    and a3, a3, a2
-; RV32I-NEXT:    srli a4, a1, 24
-; RV32I-NEXT:    or a4, a3, a4
-; RV32I-NEXT:    slli a5, a1, 8
-; RV32I-NEXT:    lui a3, 4080
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    slli a1, a1, 24
-; RV32I-NEXT:    or a1, a1, a5
-; RV32I-NEXT:    or a1, a1, a4
-; RV32I-NEXT:    srli a4, a1, 4
-; RV32I-NEXT:    lui a5, 61681
-; RV32I-NEXT:    addi a5, a5, -241
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    and a1, a1, a5
-; RV32I-NEXT:    slli a1, a1, 4
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    srli a4, a1, 2
-; RV32I-NEXT:    lui a6, 209715
-; RV32I-NEXT:    addi a6, a6, 819
-; RV32I-NEXT:    and a4, a4, a6
-; RV32I-NEXT:    and a1, a1, a6
-; RV32I-NEXT:    slli a1, a1, 2
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    srli a4, a1, 1
-; RV32I-NEXT:    lui a7, 349525
-; RV32I-NEXT:    addi a7, a7, 1365
-; RV32I-NEXT:    and a4, a4, a7
-; RV32I-NEXT:    and a1, a1, a7
-; RV32I-NEXT:    slli a1, a1, 1
-; RV32I-NEXT:    or a1, a4, a1
-; RV32I-NEXT:    srli a4, a0, 8
-; RV32I-NEXT:    and a4, a4, a2
-; RV32I-NEXT:    srli t0, a0, 24
-; RV32I-NEXT:    or a4, a4, t0
-; RV32I-NEXT:    slli t0, a0, 8
-; RV32I-NEXT:    and t0, t0, a3
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, t0
-; RV32I-NEXT:    or a0, a0, a4
-; RV32I-NEXT:    srli a4, a0, 4
-; RV32I-NEXT:    and a4, a4, a5
-; RV32I-NEXT:    and a0, a0, a5
+; RV32I-NEXT:    srli a2, a0, 4
+; RV32I-NEXT:    lui a3, 61681
+; RV32I-NEXT:    addi a3, a3, -241
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    and a0, a0, a3
 ; RV32I-NEXT:    slli a0, a0, 4
-; RV32I-NEXT:    or a0, a4, a0
-; RV32I-NEXT:    srli a4, a0, 2
-; RV32I-NEXT:    and a4, a4, a6
-; RV32I-NEXT:    and a0, a0, a6
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a0, 2
+; RV32I-NEXT:    lui a4, 209715
+; RV32I-NEXT:    addi a4, a4, 819
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a0, a0, a4
 ; RV32I-NEXT:    slli a0, a0, 2
-; RV32I-NEXT:    or a0, a4, a0
-; RV32I-NEXT:    srli a4, a0, 1
-; RV32I-NEXT:    and a4, a4, a7
-; RV32I-NEXT:    and a0, a0, a7
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a0, 1
+; RV32I-NEXT:    lui a5, 349525
+; RV32I-NEXT:    addi a5, a5, 1365
+; RV32I-NEXT:    and a2, a2, a5
+; RV32I-NEXT:    and a0, a0, a5
 ; RV32I-NEXT:    slli a0, a0, 1
-; RV32I-NEXT:    or a0, a4, a0
-; RV32I-NEXT:    srli a4, a0, 8
-; RV32I-NEXT:    and a4, a4, a2
-; RV32I-NEXT:    srli a5, a0, 24
-; RV32I-NEXT:    or a4, a4, a5
-; RV32I-NEXT:    slli a5, a0, 8
-; RV32I-NEXT:    and a5, a5, a3
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    or a0, a0, a5
-; RV32I-NEXT:    or a0, a0, a4
-; RV32I-NEXT:    srli a4, a1, 8
-; RV32I-NEXT:    and a2, a4, a2
-; RV32I-NEXT:    srli a4, a1, 24
-; RV32I-NEXT:    or a2, a2, a4
-; RV32I-NEXT:    slli a4, a1, 8
-; RV32I-NEXT:    and a3, a4, a3
-; RV32I-NEXT:    slli a1, a1, 24
-; RV32I-NEXT:    or a1, a1, a3
-; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    srli a2, a1, 4
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    slli a1, a1, 4
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a2, a1, 2
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    slli a1, a1, 2
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a2, a1, 1
+; RV32I-NEXT:    and a2, a2, a5
+; RV32I-NEXT:    and a1, a1, a5
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    or a1, a2, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBP-LABEL: bitreverse_bswap_i64:
diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll
index 260892285643a..b68a98b13052b 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll
@@ -2351,48 +2351,27 @@ define i32 @bswap_rotl_i32(i32 %a) {
 define i32 @bitreverse_bswap_i32(i32 %a) {
 ; RV64I-LABEL: bitreverse_bswap_i32:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srliw a1, a0, 8
-; RV64I-NEXT:    lui a2, 16
-; RV64I-NEXT:    addiw a2, a2, -256
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srliw a3, a0, 24
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    slli a3, a0, 8
-; RV64I-NEXT:    lui a4, 4080
-; RV64I-NEXT:    and a3, a3, a4
-; RV64I-NEXT:    slliw a0, a0, 24
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    srli a1, a0, 4
-; RV64I-NEXT:    lui a3, 61681
-; RV64I-NEXT:    addiw a3, a3, -241
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, 61681
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slliw a0, a0, 4
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 2
-; RV64I-NEXT:    lui a3, 209715
-; RV64I-NEXT:    addiw a3, a3, 819
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, 209715
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slliw a0, a0, 2
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    srli a1, a0, 1
-; RV64I-NEXT:    lui a3, 349525
-; RV64I-NEXT:    addiw a3, a3, 1365
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
 ; RV64I-NEXT:    slliw a0, a0, 1
 ; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srliw a1, a0, 8
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srliw a2, a0, 24
-; RV64I-NEXT:    or a1, a1, a2
-; RV64I-NEXT:    slli a2, a0, 8
-; RV64I-NEXT:    and a2, a2, a4
-; RV64I-NEXT:    slliw a0, a0, 24
-; RV64I-NEXT:    or a0, a0, a2
-; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBP-LABEL: bitreverse_bswap_i32:
@@ -2407,76 +2386,27 @@ define i32 @bitreverse_bswap_i32(i32 %a) {
 define i64 @bitreverse_bswap_i64(i64 %a) {
 ; RV64I-LABEL: bitreverse_bswap_i64:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    srli a1, a0, 24
-; RV64I-NEXT:    lui a2, 4080
-; RV64I-NEXT:    and a1, a1, a2
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    li a4, 255
-; RV64I-NEXT:    slli a5, a4, 24
-; RV64I-NEXT:    and a3, a3, a5
-; RV64I-NEXT:    or a1, a3, a1
-; RV64I-NEXT:    srli a3, a0, 40
-; RV64I-NEXT:    lui a6, 16
-; RV64I-NEXT:    addiw a6, a6, -256
-; RV64I-NEXT:    and a3, a3, a6
-; RV64I-NEXT:    srli a7, a0, 56
-; RV64I-NEXT:    or a3, a3, a7
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    slli a3, a0, 24
-; RV64I-NEXT:    slli a7, a4, 40
-; RV64I-NEXT:    and a3, a3, a7
-; RV64I-NEXT:    srliw t0, a0, 24
-; RV64I-NEXT:    slli t0, t0, 32
-; RV64I-NEXT:    or a3, a3, t0
-; RV64I-NEXT:    slli t0, a0, 40
-; RV64I-NEXT:    slli a4, a4, 48
-; RV64I-NEXT:    and t0, t0, a4
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    or a0, a0, t0
-; RV64I-NEXT:    lui t0, %hi(.LCPI68_0)
-; RV64I-NEXT:    ld t0, %lo(.LCPI68_0)(t0)
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a1
-; RV64I-NEXT:    srli a1, a0, 4
-; RV64I-NEXT:    and a1, a1, t0
-; RV64I-NEXT:    and a0, a0, t0
-; RV64I-NEXT:    lui a3, %hi(.LCPI68_1)
-; RV64I-NEXT:    ld a3, %lo(.LCPI68_1)(a3)
+; RV64I-NEXT:    lui a1, %hi(.LCPI68_0)
+; RV64I-NEXT:    ld a1, %lo(.LCPI68_0)(a1)
+; RV64I-NEXT:    srli a2, a0, 4
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI68_1)
+; RV64I-NEXT:    ld a1, %lo(.LCPI68_1)(a1)
 ; RV64I-NEXT:    slli a0, a0, 4
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 2
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
-; RV64I-NEXT:    lui a3, %hi(.LCPI68_2)
-; RV64I-NEXT:    ld a3, %lo(.LCPI68_2)(a3)
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    srli a2, a0, 2
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, %hi(.LCPI68_2)
+; RV64I-NEXT:    ld a1, %lo(.LCPI68_2)(a1)
 ; RV64I-NEXT:    slli a0, a0, 2
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 1
-; RV64I-NEXT:    and a1, a1, a3
-; RV64I-NEXT:    and a0, a0, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    srli a2, a0, 1
+; RV64I-NEXT:    and a2, a2, a1
+; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    slli a0, a0, 1
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    srli a1, a0, 40
-; RV64I-NEXT:    and a1, a1, a6
-; RV64I-NEXT:    srli a3, a0, 56
-; RV64I-NEXT:    or a1, a1, a3
-; RV64I-NEXT:    srli a3, a0, 24
-; RV64I-NEXT:    and a2, a3, a2
-; RV64I-NEXT:    srli a3, a0, 8
-; RV64I-NEXT:    and a3, a3, a5
-; RV64I-NEXT:    or a2, a3, a2
-; RV64I-NEXT:    or a1, a2, a1
-; RV64I-NEXT:    slli a2, a0, 24
-; RV64I-NEXT:    and a2, a2, a7
-; RV64I-NEXT:    srliw a3, a0, 24
-; RV64I-NEXT:    slli a3, a3, 32
-; RV64I-NEXT:    or a2, a2, a3
-; RV64I-NEXT:    slli a3, a0, 40
-; RV64I-NEXT:    and a3, a3, a4
-; RV64I-NEXT:    slli a0, a0, 56
-; RV64I-NEXT:    or a0, a0, a3
-; RV64I-NEXT:    or a0, a0, a2
-; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBP-LABEL: bitreverse_bswap_i64:

From e494278ceeb70d2973392a349b3ab105da488b13 Mon Sep 17 00:00:00 2001
From: gysit <gysit@google.com>
Date: Mon, 24 Jan 2022 16:25:42 +0000
Subject: [PATCH 407/946] [mlir][linalg] Add transpose support to hoist
 padding.

Add a transpose option to hoist padding to transpose the padded tensor before storing it into the packed tensor. The early transpose improves the memory access patterns of the actual compute kernel. The patch introduces a transpose right after the hoisted pad tensor and a second transpose inside the compute loop. The second transpose can either be fused into the compute operation or will canonicalize away when lowering to vector instructions.

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D117893
---
 .../Dialect/Linalg/Transforms/HoistPadding.h  |  18 +-
 .../Dialect/Linalg/Transforms/Transforms.h    |  36 ++--
 .../include/mlir/Dialect/Linalg/Utils/Utils.h |  10 +-
 .../Linalg/Transforms/HoistPadding.cpp        | 165 +++++++++++-------
 .../Dialect/Linalg/Transforms/Transforms.cpp  |  19 +-
 mlir/lib/Dialect/Linalg/Utils/Utils.cpp       |  78 ++++++---
 mlir/test/Dialect/Linalg/hoist-padding.mlir   |  85 +++++++--
 .../Linalg/TestLinalgCodegenStrategy.cpp      |  23 +++
 8 files changed, 316 insertions(+), 118 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
index 8d3315d5ea971..795dd00cbdfd6 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h
@@ -19,12 +19,17 @@ class PadOp;
 } // namespace tensor
 
 namespace linalg {
+class GenericOp;
 
 /// Mechanically hoist padding operations on tensors by `numLoops` into a new,
 /// generally larger tensor. This achieves packing of multiple padding ops into
-/// a larger tensor. On success, `padTensorOp` is replaced by the cloned version
+/// a larger tensor. On success, `opToHoist` is replaced by the cloned version
 /// in the packing loop so the caller can continue reasoning about the padding
-/// operation.
+/// operation. If `transposeVector` is non-empty, hoist padding introduces a
+/// GenericOp to transpose the padded tensor before inserting it into the packed
+/// tensor. A `transposeVector` can change the storage order of the padded
+/// tensor but does not change the order of the pack or compute loops.
+///
 ///
 /// Example in pseudo-mlir:
 /// =======================
@@ -33,7 +38,7 @@ namespace linalg {
 /// ```
 ///    scf.for (%i, %j, %k)
 ///      %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>
-///      %0 = linalg.pad_tensor %st0 low[0, 0] high[...] {
+///      %0 = tensor.pad %st0 low[0, 0] high[...] {
 ///      ^bb0( ... ):
 ///        linalg.yield %pad
 ///      } : tensor<?x?xf32> to tensor<4x8xf32>
@@ -47,7 +52,7 @@ namespace linalg {
 ///      %packed_init = linalg.init_tensor range(%j) : tensor<?x4x8xf32>
 ///      %packed = scf.for (%k) iter_args(%p : %packed_init) {
 ///        %st0 = tensor.extract_slice f(%i, %k) : ... to tensor<?x?xf32>
-///        %0 = linalg.pad_tensor %st0 low[0, 0] high[...] {
+///        %0 = tensor.pad %st0 low[0, 0] high[...] {
 ///        ^bb0( ... ):
 ///          linalg.yield %pad
 ///        } : tensor<?x?xf32> to tensor<4x8xf32>
@@ -62,8 +67,9 @@ namespace linalg {
 ///      }
 ///    }
 /// ```
-FailureOr<Value> hoistPaddingOnTensors(tensor::PadOp opToHoist, int numLoops,
-                                       tensor::PadOp &hoistedOp);
+FailureOr<Value> hoistPaddingOnTensors(
+    tensor::PadOp opToHoist, int numLoops, ArrayRef<int64_t> transposeVector,
+    tensor::PadOp &hoistedOp, SmallVectorImpl<GenericOp> &transposeOps);
 
 } // namespace linalg
 } // namespace mlir
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index f5e99d5afe83e..61156c733594d 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -484,14 +484,19 @@ using TileSizeComputationFunction =
 using PaddingValueComputationFunction =
     std::function<FailureOr<Value>(OpBuilder &, OpOperand &)>;
 
-/// Callback returning true if the pad tensor operation defining the given
-/// OpOperand shall be marked as nofold to enable packing.
+/// Callback returning true if the PadOp defining the given OpOperand shall be
+/// marked as nofold to enable packing.
 using PaddingNoFoldComputationFunction = std::function<bool(OpOperand &)>;
 
-/// Callback returning the number of loops to hoist the pad tensor operation
-/// defining the given OpOperand.
+/// Callback returning the number of loops to hoist the PadOp defining the given
+/// OpOperand.
 using PaddingHoistComputationFunction = std::function<int64_t(OpOperand &)>;
 
+/// Callback returning the transpose vector used to permute the result tensor
+/// dimensions of the PadOp defining the given OpOperand.
+using PaddingTransposeComputationFunction =
+    std::function<SmallVector<int64_t>(OpOperand &)>;
+
 struct LinalgPaddingOptions {
   /// Callback returning the padding value to use for a given OpOperand or
   /// failure for no padding. Padding operations are introduced if
@@ -506,10 +511,10 @@ struct LinalgPaddingOptions {
     return *this;
   }
 
-  /// Callback returning true if the pad tensor operation defining the given
-  /// OpOperand shall be marked as nofold to enable packing. A padding operation
-  /// is only marked nofold if `paddingNoFoldComputationFunction` is set and
-  /// returns true. Otherwise, the nofold attribute is set to false.
+  /// Callback returning true if the PadOp defining the given OpOperand shall be
+  /// marked as nofold to enable packing. A padding operation is only marked
+  /// nofold if `paddingNoFoldComputationFunction` is set and returns true.
+  /// Otherwise, the nofold attribute is set to false.
   PaddingNoFoldComputationFunction paddingNoFoldComputationFunction = nullptr;
 
   LinalgPaddingOptions &
@@ -518,8 +523,8 @@ struct LinalgPaddingOptions {
     return *this;
   }
 
-  /// Callback returning the number of loops to hoist the pad tensor operation
-  /// defining the given OpOperand.
+  /// Callback returning the number of loops to hoist the PadOp defining the
+  /// given OpOperand.
   PaddingHoistComputationFunction paddingHoistComputationFunction = nullptr;
 
   LinalgPaddingOptions &
@@ -527,6 +532,17 @@ struct LinalgPaddingOptions {
     paddingHoistComputationFunction = std::move(fun);
     return *this;
   }
+
+  /// Callback returning the transpose vector used to permute the result tensor
+  /// dimensions of the PadOp defining the given OpOperand.
+  PaddingTransposeComputationFunction paddingTransposeComputationFunction =
+      nullptr;
+
+  LinalgPaddingOptions &setPaddingTransposeComputationFunction(
+      PaddingTransposeComputationFunction fun) {
+    paddingTransposeComputationFunction = std::move(fun);
+    return *this;
+  }
 };
 
 struct LinalgTilingAndFusionOptions {
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
index b466d7726f502..a2f08e79ac47c 100644
--- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
@@ -117,18 +117,24 @@ tensor::ExtractSliceOp makeComposedExtractSliceOp(
 /// Example:
 /// ```
 /// %0 = tensor.extract_slice %arg0 [%iv0, %iv1] [%sz0, %sz1]
-/// %1 = linalg.pad_tensor %0 low[0, 0] high[...] { linalg.yield %cst }
+/// %1 = tensor.pad %0 low[0, 0] high[...] { tensor.yield %cst }
 /// %2 = linalg.matmul ins(...) outs(%1)
 /// %3 = tensor.extract_slice %2 [0, 0] [%sz0, %sz1]
 /// ```
 /// makeComposedPadHighOp(source=%3, pad=%cst) returns %2
 /// makeComposedPadHighOp(source=%3, pad=%other_cst) returns %4
 /// ```
-/// %4 = linalg.pad_tensor %3 low[0, 0] high[...] { linalg.yield %other_cst }
+/// %4 = tensor.pad %3 low[0, 0] high[...] { tensor.yield %other_cst }
 /// ```
 Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
                             Value source, Value pad, bool nofold);
 
+/// Returns a GenericOp that tansposes `inputTensor` into `outputTensor` using
+/// `transposeVector` to permute the `inputTensor` dimensions.
+GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
+                          Value outputTensor,
+                          ArrayRef<int64_t> transposeVector);
+
 //===----------------------------------------------------------------------===//
 // Fusion / Tiling utilities
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
index 21c92ee304dfa..83c9aa4a54a01 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
@@ -53,32 +53,32 @@ using namespace mlir::linalg;
 ///   8. There is no enclosing scf::ForOp that indexes the padded data.
 /// Other cases succeed and will trigger hoisting of the pad op.
 struct HoistingAnalysis {
-  HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops);
+  HoistingAnalysis(tensor::PadOp padOp, int numLoops);
 
   bool isValid() { return valid; }
 
   /// Footprint of the packedTensor, computed from the packingLoops.
   SmallVector<Value> getPackedTensorSizes(ImplicitLocOpBuilder &b);
 
-  /// The outermost loop, determined by `nLevels` above which `padTensorOp` will
+  /// The outermost loop, determined by `nLevels` above which `padOp` will
   /// be hoisted.
   scf::ForOp outermostEnclosingForOp;
 
-  /// Backward slice rooted at `padTensorOp` and nested under
+  /// Backward slice rooted at `padOp` and nested under
   /// `outermostEnclosingForOp`.
   SetVector<Operation *> backwardSlice;
 
-  /// The scf::ForOp immediately enclosing `padTensorOp` such that:
+  /// The scf::ForOp immediately enclosing `padOp` such that:
   ///  1. they are nested under `outermostEnclosingForOp` (inclusive)
   ///  2. whose induction variable is used, directly or indirectly, in the
-  ///     computation of `padTensorOp`.
+  ///     computation of `padOp`.
   /// The span of these loops determines the footprint of the packed tensor.
   SmallVector<scf::ForOp> packingLoops;
 
 private:
-  /// Drop any non-index dependencies of `padTensorOp` and `sliceOp` from
+  /// Drop any non-index dependencies of `padOp` and `sliceOp` from
   /// `backwardSlice`. The method follows the use-def chains of the index
-  /// operands consumed by `padTensorOp` and `sliceOp` and drops the operations
+  /// operands consumed by `padOp` and `sliceOp` and drops the operations
   /// not part of this index computation. Afterwards, the filtered
   /// `backwardSlice` contains only the loops whose induction variable is used,
   /// directly or indirectly, to index the padded tensor. The method returns
@@ -94,24 +94,24 @@ struct HoistingAnalysis {
   ///       %ubi = affine.min #map(%i)
   ///       %ubj = affine.min #map(%j)
   ///       %slice = tensor.extract_slice %source [%i, %j] [%ubi, %ubj]
-  ///       %padded_slice = linalg.pad_tensor %slice
+  ///       %padded_slice = tensor.pad %slice
   /// ```
   /// dropNonIndexDependencies(%padded_slice, %slice)
   /// removes [scf.for %k, linalg.fill(%cst, %arg1)] from backwardSlice.
-  LogicalResult dropNonIndexDependencies(tensor::PadOp padTensorOp,
+  LogicalResult dropNonIndexDependencies(tensor::PadOp padOp,
                                          tensor::ExtractSliceOp sliceOp);
 
   /// Encodes whether the analysis is valid and hoisting can proceed.
   bool valid;
 };
 
-/// Return true if all uses of `padTensorOp` are an input tensor of some
+/// Return true if all uses of `padOp` are an input tensor of some
 /// LinalgOp.
-static bool isOnlyUsedAsInputOfLinalgOp(tensor::PadOp padTensorOp) {
-  for (OpOperand &use : padTensorOp.result().getUses()) {
+static bool isOnlyUsedAsInputOfLinalgOp(tensor::PadOp padOp) {
+  for (OpOperand &use : padOp.result().getUses()) {
     auto linalgUser = dyn_cast<linalg::LinalgOp>(use.getOwner());
     if (!linalgUser || !linalgUser.isInputTensor(&use)) {
-      LLVM_DEBUG(DBGS() << "Found a use of " << *(padTensorOp)
+      LLVM_DEBUG(DBGS() << "Found a use of " << *(padOp)
                         << "\nthat is not an input tensor of a LinalgOp, "
                         << "cannot hoist\n"
                         << *(use.getOwner()) << "\n");
@@ -126,12 +126,12 @@ static bool isOnlyUsedAsInputOfLinalgOp(tensor::PadOp padTensorOp) {
 /// Multi-loops such as scf.parallel or linalg.tiled_loop are not modeled atm.
 /// Control-flow and other containing ops with regions are not modeled atm.
 static void
-getAtMostNEnclosingLoops(tensor::PadOp padTensorOp, int nLevels,
+getAtMostNEnclosingLoops(tensor::PadOp padOp, int nLevels,
                          SmallVector<scf::ForOp> &reverseEnclosingLoops) {
-  AsmState state(padTensorOp->getParentOfType<mlir::FuncOp>());
+  AsmState state(padOp->getParentOfType<mlir::FuncOp>());
   (void)state;
   scf::ForOp outermostEnclosingForOp = nullptr;
-  Operation *nextEnclosingOp = padTensorOp->getParentOp();
+  Operation *nextEnclosingOp = padOp->getParentOp();
   while (nLevels-- > 0 &&
          (outermostEnclosingForOp = dyn_cast<scf::ForOp>(nextEnclosingOp))) {
     LLVM_DEBUG(
@@ -143,17 +143,38 @@ getAtMostNEnclosingLoops(tensor::PadOp padTensorOp, int nLevels,
   }
 }
 
-HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) {
+/// Returns the transposed `rankedTensorType` if `transposeVector` is non-empty.
+/// Fail if `transposeVector` is no permutation matching the tensor rank.
+static FailureOr<RankedTensorType>
+computeTransposedType(RankedTensorType rankedTensorType,
+                      ArrayRef<int64_t> transposeVector) {
+  if (transposeVector.empty())
+    return rankedTensorType;
+  if (!isPermutation(transposeVector) ||
+      transposeVector.size() != static_cast<size_t>(rankedTensorType.getRank()))
+    return failure();
+
+  SmallVector<int64_t> transposedShape(rankedTensorType.getShape().begin(),
+                                       rankedTensorType.getShape().end());
+  applyPermutationToVector(transposedShape, transposeVector);
+
+  using RTTBuilder = RankedTensorType::Builder;
+  RankedTensorType transposedTensorType =
+      RTTBuilder(rankedTensorType).setShape(transposedShape);
+  return transposedTensorType;
+}
+
+HoistingAnalysis::HoistingAnalysis(tensor::PadOp padOp, int numLoops) {
   valid = false;
 
-  // Bail on any use that isn't an input of a Linalg op.
+  // Bail on any use that isn't an input of a LinalgOp.
   // Hoisting of inplace updates happens after vectorization.
-  if (!isOnlyUsedAsInputOfLinalgOp(padTensorOp))
+  if (!isOnlyUsedAsInputOfLinalgOp(padOp))
     return;
 
   // Get at most `numLoops` of immediately enclosing loops.
   SmallVector<scf::ForOp> reverseEnclosingLoops;
-  getAtMostNEnclosingLoops(padTensorOp, numLoops, reverseEnclosingLoops);
+  getAtMostNEnclosingLoops(padOp, numLoops, reverseEnclosingLoops);
   if (reverseEnclosingLoops.empty()) {
     LLVM_DEBUG(DBGS() << "No immediately enclosing loop -> skip\n");
     return;
@@ -161,7 +182,7 @@ HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) {
 
   outermostEnclosingForOp = reverseEnclosingLoops.back();
 
-  // Get the `sliceOp` that defines the source tensor of `padTensorOp` and
+  // Get the `sliceOp` that defines the source tensor of `padOp` and
   // check its source is defined outside of the outermost loop. This check
   // ensures the padded data is available for packing before entering the
   // outermost enclosing loop.
@@ -174,9 +195,9 @@ HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) {
   //   scf.for %j
   //     scf.for %k
   //       %slice = tensor.extract_slice %source [%i, %j]
-  //       %padded_slice = linalg.pad_tensor %slice
+  //       %padded_slice = tensor.pad %slice
   // ```
-  auto sliceOp = padTensorOp.source().getDefiningOp<tensor::ExtractSliceOp>();
+  auto sliceOp = padOp.source().getDefiningOp<tensor::ExtractSliceOp>();
   if (!sliceOp) {
     LLVM_DEBUG(DBGS() << "Cannot find the extract slice op -> skip\n");
     return;
@@ -186,32 +207,31 @@ HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) {
     return;
   }
 
-  // Check the region of `padTensorOp` depends on a constant only. Adding
+  // Check the region of `padOp` depends on a constant only. Adding
   // hoisting support for arbitrary padding regions would require cloning all
   // dependencies captured by the padding region.
-  Value paddingValue = padTensorOp.getConstantPaddingValue();
+  Value paddingValue = padOp.getConstantPaddingValue();
   if (!paddingValue ||
       !isa_and_nonnull<arith::ConstantOp>(paddingValue.getDefiningOp())) {
     LLVM_DEBUG(DBGS() << "Cannot find constant padding value -> skip\n");
     return;
   }
 
-  // Get all the ops in the backwards slice starting from `padTensorOp` and that
+  // Get all the ops in the backwards slice starting from `padOp` and that
   // are dominated by the outermost enclosing loop.
   DominanceInfo domInfo(outermostEnclosingForOp);
-  getBackwardSlice(padTensorOp.getOperation(), &backwardSlice,
-                   [&](Operation *op) {
-                     return domInfo.dominates(outermostEnclosingForOp, op);
-                   });
+  getBackwardSlice(padOp.getOperation(), &backwardSlice, [&](Operation *op) {
+    return domInfo.dominates(outermostEnclosingForOp, op);
+  });
   if (backwardSlice.empty())
     return;
-  // Add `padTensorOp` itself to the backward slice.
-  backwardSlice.insert(padTensorOp.getOperation());
+  // Add `padOp` itself to the backward slice.
+  backwardSlice.insert(padOp.getOperation());
 
   // Remove all ops in the backward slice that are not used to index the padded
-  // tensor. In particular, keep `padTensorOp`, `sliceOp`, and the loop and
+  // tensor. In particular, keep `padOp`, `sliceOp`, and the loop and
   // affine operations used for the index computation.
-  if (failed(dropNonIndexDependencies(padTensorOp, sliceOp)))
+  if (failed(dropNonIndexDependencies(padOp, sliceOp)))
     return;
 
   // Add only the loops part of the filtered `backwardSlice` to the packing
@@ -232,7 +252,7 @@ HoistingAnalysis::HoistingAnalysis(tensor::PadOp padTensorOp, int numLoops) {
 }
 
 LogicalResult
-HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padTensorOp,
+HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padOp,
                                            tensor::ExtractSliceOp sliceOp) {
   // Set of all values used for index computation.
   SetVector<Value> indexEdges;
@@ -252,7 +272,7 @@ HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padTensorOp,
     });
   };
 
-  // Starting from `padTensorOp` and `sliceOp` walk the use-def edges of index
+  // Starting from `padOp` and `sliceOp` walk the use-def edges of index
   // type in `backwardSlice`. Add the index operands of an operation to
   // `indexEdges` and remove all operations from `backwardSlice` that are not
   // part of the index computation.
@@ -267,16 +287,16 @@ HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padTensorOp,
   //       %ubi = affine.min #map(%i)
   //       %ubj = affine.min #map(%j)
   //       %slice = tensor.extract_slice %source [%i, %j] [%ubi, %ubj]
-  //       %padded_slice = linalg.pad_tensor %slice
+  //       %padded_slice = tensor.pad %slice
   // ```
   // After iterating `backwardSlice` we obtain:
   // indexEdges = [%i, %j, %ubi, %ubj]
   // backwardSlice = backwardSlice / [linalg.fill(%cst, %arg1), scf.for %k]
   SetVector<Operation *> operationsToRemove;
   for (Operation *op : llvm::reverse(backwardSlice)) {
-    // Add the index operands of `padTensorOp` and `sliceOp` to start the
+    // Add the index operands of `padOp` and `sliceOp` to start the
     // exploration of the index computation.
-    if (op == padTensorOp || op == sliceOp) {
+    if (op == padOp || op == sliceOp) {
       addIndexOperandsToIndexEdges(op);
       continue;
     }
@@ -310,7 +330,7 @@ HoistingAnalysis::dropNonIndexDependencies(tensor::PadOp padTensorOp,
       continue;
     }
     // Remove all other operations not used by the index computation. An
-    // exception are constant operations that may be used by `padTensorOp`.
+    // exception are constant operations that may be used by `padOp`.
     if (!isa<arith::ConstantOp>(op))
       operationsToRemove.insert(op);
   }
@@ -373,9 +393,9 @@ static Value buildLoopIterationCount(OpBuilder &b, scf::ForOp outer,
                                        ValueRange{ivVal, lbVal, stepVal});
 }
 
-FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
-                                                     int numLoops,
-                                                     tensor::PadOp &hoistedOp) {
+FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(
+    tensor::PadOp opToHoist, int numLoops, ArrayRef<int64_t> transposeVector,
+    tensor::PadOp &hoistedOp, SmallVectorImpl<GenericOp> &transposeOps) {
   LLVM_DEBUG(DBGS() << "Try to hoist " << *(opToHoist) << " by " << numLoops
                     << " loops\n");
   HoistingAnalysis analysis(opToHoist, numLoops);
@@ -396,14 +416,20 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
   RankedTensorType paddedTensorType = opToHoist.getResultType();
   int paddedRank = paddedTensorType.getRank();
 
-  // Create the packed tensor<?x?x..?xpadded_shape> into which we amortize
+  // Compute the type of the transposed padded tensor.
+  FailureOr<RankedTensorType> transposedTensorType =
+      computeTransposedType(paddedTensorType, transposeVector);
+  if (failed(transposedTensorType))
+    return failure();
+
+  // Create the packed tensor<?x?x..?xtransposedShape> into which we amortize
   // padding.
   SmallVector<int64_t> packedShape(nPackedLoops, ShapedType::kDynamicSize);
   // TODO: go grab dims when necessary, for now tensor::PadOp returns a static
   // tensor.
-  llvm::append_range(packedShape, paddedTensorType.getShape());
-  auto packedTensorType =
-      RankedTensorType::get(packedShape, paddedTensorType.getElementType());
+  llvm::append_range(packedShape, transposedTensorType->getShape());
+  auto packedTensorType = RankedTensorType::get(
+      packedShape, transposedTensorType->getElementType());
   Value packedTensor = b.create<linalg::InitTensorOp>(
       loc, dynamicTensorSizes, packedTensorType.getShape(),
       packedTensorType.getElementType());
@@ -413,9 +439,10 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
   // The implementation proceeds in a stack-like fashion:
   //   1. Iteratively clone and step into the loops, pushing the `packedTensor`
   //      deeper in the stack.
-  //   2. Create a InsertSliceOp at the top of the stack.
-  //   3. Iteratively pop and yield the result of the InsertSliceOp across
-  //     the cloned loops.
+  //   2. Create a GenericOp if `transposeVector` is non-empty.
+  //   3. Create a InsertSliceOp at the top of the stack.
+  //   4. Iteratively pop and yield the result of the InsertSliceOp across
+  //      the cloned loops.
   SmallVector<Value> clonedLoopIvs, leadingPackedTensorIndexings;
   clonedLoopIvs.reserve(nPackedLoops);
   leadingPackedTensorIndexings.reserve(nPackedLoops);
@@ -455,16 +482,14 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
     packedTensor = clonedForOp.getRegionIterArgs().front();
   }
 
-  // Stack step 2. create InsertSliceOp at the top of the stack.
   // offsets = [clonedLoopIvs, 0 .. 0].
   SmallVector<OpFoldResult> offsets(leadingPackedTensorIndexings.begin(),
                                     leadingPackedTensorIndexings.end());
   offsets.append(paddedRank, b.getIndexAttr(0));
-  // sizes = [1 .. 1, paddedShape].
+  // sizes = [1 .. 1, transposedShape].
   SmallVector<OpFoldResult> sizes(nPackedLoops, b.getIndexAttr(1));
-  for (int64_t sz : paddedTensorType.getShape()) {
+  for (int64_t sz : transposedTensorType->getShape()) {
     // TODO: go grab dims when necessary, for now tensor::PadOp returns a static
-    // tensor.
     assert(!ShapedType::isDynamic(sz) && "padded tensor needs static sizes");
     sizes.push_back(b.getIndexAttr(sz));
   }
@@ -472,11 +497,21 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
   SmallVector<OpFoldResult> strides(nPackedLoops + paddedRank,
                                     b.getIndexAttr(1));
 
-  Value inserted =
-      b.create<tensor::InsertSliceOp>(loc, bvm.lookup(opToHoist.result()),
-                                      packedTensor, offsets, sizes, strides);
+  // Stack step 2. create GenericOp if `transposeVector` is non-empty.
+  Value paddedTensor = bvm.lookup(opToHoist.result());
+  if (!transposeVector.empty()) {
+    Value outputTensor = b.create<tensor::ExtractSliceOp>(
+        loc, *transposedTensorType, packedTensor, offsets, sizes, strides);
+    transposeOps.push_back(
+        makeTransposeOp(b, loc, paddedTensor, outputTensor, transposeVector));
+    paddedTensor = transposeOps.back()->getResult(0);
+  }
 
-  // Stack step 3. iteratively pop the stack and propagate the yield.
+  // Stack step 3. create InsertSliceOp at the top of the stack.
+  Value inserted = b.create<tensor::InsertSliceOp>(
+      loc, paddedTensor, packedTensor, offsets, sizes, strides);
+
+  // Stack step 4. iteratively pop the stack and propagate the yield.
   Value valueToYield = inserted;
   for (Value iv : llvm::reverse(clonedLoopIvs)) {
     auto forOp = scf::getForInductionVarOwner(iv);
@@ -498,12 +533,22 @@ FailureOr<Value> mlir::linalg::hoistPaddingOnTensors(tensor::PadOp opToHoist,
   // offsets = [originalLoopIvs, 0 .. 0].
   offsets.assign(loopIterationCounts.begin(), loopIterationCounts.end());
   offsets.append(paddedRank, b.getIndexAttr(0));
-  // sizes = [1 .. 1, paddedShape] (definedabove).
+  // sizes = [1 .. 1, transposedShape] (definedabove).
   // strides = [1 .. 1] (defined above)
   packedTensor =
       scf::getForInductionVarOwner(clonedLoopIvs.front())->getResult(0);
   Value newResult = b.create<tensor::ExtractSliceOp>(
-      loc, opToHoist.getResultType(), packedTensor, offsets, sizes, strides);
+      loc, *transposedTensorType, packedTensor, offsets, sizes, strides);
+
+  // Transpose the packed tensor back to the original storage order.
+  if (!transposeVector.empty()) {
+    Value initTensor =
+        b.create<InitTensorOp>(loc, ValueRange{}, paddedTensorType.getShape(),
+                               paddedTensorType.getElementType());
+    transposeOps.push_back(
+        makeTransposeOp(b, loc, newResult, initTensor, transposeVector));
+    newResult = transposeOps.back()->getResult(0);
+  }
 
   // Make the newly cloned `opToHoist` available to the caller.
   hoistedOp =
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 9eb7b7cfe751c..486a069a60ed0 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -528,20 +528,29 @@ mlir::linalg::LinalgPaddingPattern::returningMatchAndRewrite(
   // Hoist the padding.
   for (const auto &en : enumerate(depths)) {
     OpOperand &opOperand = paddedOp->getOpOperand(en.index());
-    auto padTensorOp = opOperand.get().getDefiningOp<tensor::PadOp>();
-    if (!padTensorOp || en.value() == 0)
+    auto padOp = opOperand.get().getDefiningOp<tensor::PadOp>();
+    if (!padOp || en.value() == 0)
       continue;
     tensor::PadOp hoistedOp;
-    FailureOr<Value> newResult =
-        hoistPaddingOnTensors(padTensorOp, en.value(), hoistedOp);
+    SmallVector<GenericOp> transposeOps;
+    SmallVector<int64_t> transposeVector =
+        options.paddingTransposeComputationFunction(opOperand);
+
+    FailureOr<Value> newResult = hoistPaddingOnTensors(
+        padOp, en.value(), transposeVector, hoistedOp, transposeOps);
     if (failed(newResult))
       continue;
-    rewriter.replaceOp(padTensorOp, newResult.getValue());
+    rewriter.replaceOp(padOp, newResult.getValue());
+
+    // Do not apply hoist padding to the newly introduced transpose operations.
+    for (GenericOp transposeOp : transposeOps)
+      filter.replaceLinalgTransformationFilter(rewriter, transposeOp);
   }
 
   // Replace the original operation to pad.
   rewriter.replaceOp(linalgOp, newResults.getValue());
   filter.replaceLinalgTransformationFilter(rewriter, paddedOp);
+
   return paddedOp;
 }
 
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index bf37719325ccb..1b0a6d7f2ba87 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -340,11 +340,11 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
     OpResult opResult = current.cast<OpResult>();
     current = linalgOp.getOutputOperand(opResult.getResultNumber())->get();
   }
-  auto padTensorOp = current ? current.getDefiningOp<tensor::PadOp>() : nullptr;
+  auto padOp = current ? current.getDefiningOp<tensor::PadOp>() : nullptr;
 
   // Exit if the search fails to match a tensor::PadOp at the end of the matched
   // LinalgOp sequence.
-  if (!padTensorOp)
+  if (!padOp)
     return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Exit if the padded result type does not match.
@@ -352,41 +352,77 @@ Value makeComposedPadHighOp(OpBuilder &b, Location loc, RankedTensorType type,
     return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Exit if the LinalgOps are not high padded.
-  if (llvm::any_of(padTensorOp.getMixedLowPad(), [](OpFoldResult ofr) {
+  if (llvm::any_of(padOp.getMixedLowPad(), [](OpFoldResult ofr) {
         return getConstantIntValue(ofr) != static_cast<int64_t>(0);
       }))
     return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
-  // Exit if `padTensorOpSliceOp`, which defines the slice used by
-  // `padTensorOp`, is rank-reducing.
-  auto padTensorOpSliceOp =
-      padTensorOp.source().getDefiningOp<tensor::ExtractSliceOp>();
-  if (!padTensorOpSliceOp || sliceOp.getMixedSizes().size() !=
-                                 padTensorOpSliceOp.getMixedSizes().size())
+  // Exit if `padOpSliceOp`, which defines the slice used by
+  // `padOp`, is rank-reducing.
+  auto padOpSliceOp = padOp.source().getDefiningOp<tensor::ExtractSliceOp>();
+  if (!padOpSliceOp ||
+      sliceOp.getMixedSizes().size() != padOpSliceOp.getMixedSizes().size())
     return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Exit if the sizes of the dynamic sizes of `sliceOp` do not match the size
-  // of the slice padded by `padTensorOp`.
-  if (llvm::any_of(llvm::zip(sliceOp.getMixedSizes(),
-                             padTensorOpSliceOp.getMixedSizes()),
-                   [](std::tuple<OpFoldResult, OpFoldResult> it) {
-                     return !isEqualConstantIntOrValue(std::get<0>(it),
-                                                       std::get<1>(it));
-                   }))
+  // of the slice padded by `padOp`.
+  if (llvm::any_of(
+          llvm::zip(sliceOp.getMixedSizes(), padOpSliceOp.getMixedSizes()),
+          [](std::tuple<OpFoldResult, OpFoldResult> it) {
+            return !isEqualConstantIntOrValue(std::get<0>(it), std::get<1>(it));
+          }))
     return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Exit if the padding values do not match.
-  Attribute padTensorOpPadAttr, padAttr;
-  Value padTensorOpPad = padTensorOp.getConstantPaddingValue();
-  if (!padTensorOpPad ||
-      !matchPattern(padTensorOpPad, m_Constant(&padTensorOpPadAttr)) ||
-      !matchPattern(pad, m_Constant(&padAttr)) || padTensorOpPadAttr != padAttr)
+  Attribute padOpPadAttr, padAttr;
+  Value padOpPad = padOp.getConstantPaddingValue();
+  if (!padOpPad || !matchPattern(padOpPad, m_Constant(&padOpPadAttr)) ||
+      !matchPattern(pad, m_Constant(&padAttr)) || padOpPadAttr != padAttr)
     return tensor::createPadHighOp(type, source, pad, nofold, loc, b);
 
   // Return the padded result if the padding values and sizes match.
   return sliceOp.source();
 }
 
+GenericOp makeTransposeOp(OpBuilder &b, Location loc, Value inputTensor,
+                          Value outputTensor,
+                          ArrayRef<int64_t> transposeVector) {
+  auto resultTensorType = outputTensor.getType().cast<RankedTensorType>();
+  Type elementType = resultTensorType.getElementType();
+
+  assert(isPermutation(transposeVector) &&
+         "expect transpose vector to be a permutation");
+  assert(transposeVector.size() ==
+             static_cast<size_t>(resultTensorType.getRank()) &&
+         "expect transpose vector size to match result tensor rank");
+
+  // Compute the transpose and the indentity indexing maps.
+  SmallVector<AffineMap> indexingMaps = {
+      inversePermutation(AffineMap::getPermutationMap(
+          SmallVector<unsigned>(transposeVector.begin(), transposeVector.end()),
+          b.getContext())),
+      AffineMap::getMultiDimIdentityMap(transposeVector.size(),
+                                        b.getContext())};
+  SmallVector<llvm::StringRef> iteratorTypes(transposeVector.size(),
+                                             getParallelIteratorTypeName());
+
+  // Create a GenericOp to transpose `inputTensor` into `outputTensor`.
+  auto transposeOp = b.create<GenericOp>(
+      loc, resultTensorType, inputTensor, outputTensor,
+      b.getAffineMapArrayAttr(indexingMaps), b.getStrArrayAttr(iteratorTypes),
+      /*doc=*/nullptr,
+      /*library_call=*/nullptr);
+  Region &body = transposeOp.getRegion();
+  body.push_back(new Block());
+  body.front().addArguments({elementType, elementType}, {loc, loc});
+
+  // Create the body of the transpose operation.
+  OpBuilder::InsertionGuard g(b);
+  b.setInsertionPointToEnd(&body.front());
+  b.create<YieldOp>(loc, transposeOp.getRegion().front().getArgument(0));
+  return transposeOp;
+}
+
 /// Specialization to build an scf "for" nest.
 template <>
 void GenerateLoopNest<scf::ForOp>::doit(
diff --git a/mlir/test/Dialect/Linalg/hoist-padding.mlir b/mlir/test/Dialect/Linalg/hoist-padding.mlir
index 416dfe37e93d0..534e37a1f9ee0 100644
--- a/mlir/test/Dialect/Linalg/hoist-padding.mlir
+++ b/mlir/test/Dialect/Linalg/hoist-padding.mlir
@@ -1,4 +1,5 @@
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matvec pad hoist-paddings=1,1,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=MATVEC
+// RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matvec pad hoist-paddings=1,1,0 transpose-paddings=1:0,0,0 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=TRANSP
 // RUN: mlir-opt %s -test-linalg-codegen-strategy="anchor-op=linalg.matmul pad hoist-paddings=1,2,1 run-enable-pass=false" -cse -canonicalize -split-input-file | FileCheck %s --check-prefix=MATMUL
 
 //  MATVEC-DAG: #[[DIV4:[0-9a-z]+]] = affine_map<(d0) -> (d0 ceildiv 4)>
@@ -30,7 +31,7 @@ func @static_size_divisible(%arg0: tensor<24x12xf32>,
     //  MATVEC-DAG:   %[[T4:.*]] = tensor.extract_slice %[[T0]][%[[IDX0]]
     %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = tensor.pad %2 nofold low[%c0] high[%c0]  {
-    ^bb0(%arg5: index):  
+    ^bb0(%arg5: index):
       tensor.yield %cst : f32
     } : tensor<4xf32> to tensor<4xf32>
 
@@ -81,11 +82,11 @@ func @static_size_not_divisible(%arg0: tensor<24x12xf32>,
     %3 = tensor.extract_slice %arg1[%arg3] [%1] [1] : tensor<12xf32> to tensor<?xf32>
     %4 = affine.apply #map1(%1)
     %5 = tensor.pad %2 low[%c0, %c0] high[%c0, %4]  {
-    ^bb0(%arg5: index, %arg6: index):  
+    ^bb0(%arg5: index, %arg6: index):
       tensor.yield %cst : f32
     } : tensor<24x?xf32> to tensor<24x5xf32>
     %6 = tensor.pad %3 low[%c0] high[%4]  {
-    ^bb0(%arg5: index):  
+    ^bb0(%arg5: index):
       tensor.yield %cst : f32
     } : tensor<?xf32> to tensor<5xf32>
 
@@ -141,11 +142,11 @@ func @dynamic_size(%arg0: tensor<24x?xf32>,
     %4 = tensor.extract_slice %arg1[%arg3] [%2] [1] : tensor<?xf32> to tensor<?xf32>
     %5 = affine.apply #map1(%2)
     %6 = tensor.pad %3 low[%c0, %c0] high[%c0, %5]  {
-    ^bb0(%arg5: index, %arg6: index):  
+    ^bb0(%arg5: index, %arg6: index):
       tensor.yield %cst : f32
     } : tensor<24x?xf32> to tensor<24x4xf32>
     %7 = tensor.pad %4 nofold low[%c0] high[%5]  {
-    ^bb0(%arg5: index):  
+    ^bb0(%arg5: index):
       tensor.yield %cst : f32
     } : tensor<?xf32> to tensor<4xf32>
 
@@ -177,7 +178,7 @@ func @non_constant_padding(%arg0: tensor<24x12xf32>,
     //      MATVEC:  %[[T1:.*]] = tensor.pad %[[T0]]
     %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = tensor.pad %2 nofold low[%c0] high[%c0]  {
-    ^bb0(%arg5: index):  
+    ^bb0(%arg5: index):
       %5 = arith.index_cast %arg3 : index to i32
       %6 = arith.sitofp %5 : i32 to f32
       tensor.yield %6 : f32
@@ -214,7 +215,7 @@ func @non_constant_op_padding(%arg0: tensor<24x12xf32>,
     %2 = tensor.extract_slice %arg1[%arg3] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = tensor.extract %arg1[%arg3] : tensor<12xf32>
     %4 = tensor.pad %2 nofold low[%c0] high[%c0]  {
-    ^bb0(%arg5: index):  
+    ^bb0(%arg5: index):
       tensor.yield %3 : f32
     } : tensor<4xf32> to tensor<4xf32>
 
@@ -251,7 +252,7 @@ func @non_index_operand(%arg0: tensor<24x12xf32>,
     %2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = arith.index_cast %arg3 : i32 to index
     %4 = tensor.pad %2 nofold low[%3] high[%3]  {
-    ^bb0(%arg6: index):  
+    ^bb0(%arg6: index):
       tensor.yield %cst : f32
     } : tensor<4xf32> to tensor<4xf32>
 
@@ -288,7 +289,7 @@ func @memory_effect(%arg0: tensor<24x12xf32>,
     %2 = tensor.extract_slice %arg1[%arg4] [4] [1] : tensor<12xf32> to tensor<4xf32>
     %3 = memref.load %arg3[%c0] : memref<?xindex>
     %4 = tensor.pad %2 nofold low[%3] high[%3]  {
-    ^bb0(%arg6: index):  
+    ^bb0(%arg6: index):
       tensor.yield %cst : f32
     } : tensor<4xf32> to tensor<4xf32>
 
@@ -328,7 +329,7 @@ func @index_result_loop(%arg0: tensor<24x12xf32>,
       scf.yield %6 : index
     }
     %4 = tensor.pad %2 nofold low[%3] high[%3]  {
-    ^bb0(%arg6: index):  
+    ^bb0(%arg6: index):
       tensor.yield %cst : f32
     } : tensor<4xf32> to tensor<4xf32>
 
@@ -373,7 +374,7 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
 
     // Check the fused and padded fill op does not prevent hoisting.
     %4 = tensor.pad %2 nofold low[%c0, %c0] high[%3, %c0]  {
-    ^bb0(%arg5: index, %arg6: index):  
+    ^bb0(%arg5: index, %arg6: index):
       tensor.yield %cst : f32
     } : tensor<?x24xf32> to tensor<5x24xf32>
     %5 = linalg.fill(%cst, %4) : f32, tensor<5x24xf32> -> tensor<5x24xf32>
@@ -394,18 +395,18 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
       %10 = tensor.extract_slice %arg1[%arg5, 0] [3, 24] [1, 1] : tensor<6x24xf32> to tensor<3x24xf32>
       %11 = tensor.extract_slice %arg6[0, 0] [%1, 24] [1, 1] : tensor<?x24xf32> to tensor<?x24xf32>
       %12 = tensor.pad %9 nofold low[%c0, %c0] high[%3, %c0]  {
-      ^bb0(%arg7: index, %arg8: index):  
+      ^bb0(%arg7: index, %arg8: index):
         tensor.yield %cst : f32
       } : tensor<?x3xf32> to tensor<5x3xf32>
       %13 = tensor.pad %10 nofold low[%c0, %c0] high[%c0, %c0]  {
-      ^bb0(%arg7: index, %arg8: index):  
+      ^bb0(%arg7: index, %arg8: index):
         tensor.yield %cst : f32
       } : tensor<3x24xf32> to tensor<3x24xf32>
 
       // Check the output padding is not hoisted.
       //      MATMUL:   %[[T8:.*]] = tensor.pad
       %14 = tensor.pad %11 nofold low[%c0, %c0] high[%3, %c0]  {
-      ^bb0(%arg7: index, %arg8: index):  
+      ^bb0(%arg7: index, %arg8: index):
         tensor.yield %cst : f32
       } : tensor<?x24xf32> to tensor<5x24xf32>
 
@@ -421,3 +422,59 @@ func @tile_and_fuse(%arg0: tensor<12x6xf32>,
   }
   return %0 : tensor<12x24xf32>
 }
+
+// -----
+
+#map0 = affine_map<(d0)[s0] -> (4, -d0 + s0)>
+#map1 = affine_map<(d0) -> (-d0 + 4)>
+
+//      TRANSP:  transpose
+// TRANSP-SAME:    %[[ARG0:[0-9a-zA-Z]*]]: tensor<24x?xf32>
+func @transpose(%arg0: tensor<24x?xf32>,
+                %arg1: tensor<?xf32>,
+                %arg2: tensor<24xf32>) -> tensor<24xf32> {
+  %cst = arith.constant 0.000000e+00 : f32
+  %c0 = arith.constant 0 : index
+  %c1 = arith.constant 1 : index
+  %c4 = arith.constant 4 : index
+  %0 = tensor.dim %arg0, %c1 : tensor<24x?xf32>
+
+  // Transpose the padded matrix.
+  //      TRANSP:  %[[T0:.*]] = scf.for %[[PIV0:[0-9a-z]+]] = {{.*}}iter_args(%[[T1:.*]] =
+  //        TRANSP:   %[[T2:.*]] = tensor.pad
+  //        TRANSP:   %[[T3:.*]] = tensor.extract_slice %[[T1]]
+  //        TRANSP:   %[[T4:.*]] = linalg.generic
+  //   TRANSP-SAME:     ins(%[[T2]] : tensor<24x4xf32>
+  //   TRANSP-SAME:     outs(%[[T3]] : tensor<4x24xf32>
+  //        TRANSP:   %[[T5:.*]] = tensor.insert_slice %[[T4]] into %[[T1]]
+  //        TRANSP:   scf.yield %[[T5:.*]]
+
+  //      TRANSP:  scf.for %[[IV0:[0-9a-zA-Z]*]] =
+  %1 = scf.for %arg3 = %c0 to %0 step %c4 iter_args(%arg4 = %arg2) -> (tensor<24xf32>) {
+    %2 = affine.min #map0(%arg3)[%0]
+    %3 = tensor.extract_slice %arg0[0, %arg3] [24, %2] [1, 1] : tensor<24x?xf32> to tensor<24x?xf32>
+
+    // Index the packed vector and transpose back.
+    //      TRANSP:   %[[T6:.*]] = tensor.extract_slice %[[T0]]
+    //      TRANSP:   %[[T7:.*]] = linalg.init_tensor
+    //      TRANSP:   %[[T8:.*]] = linalg.generic
+    // TRANSP-SAME:     ins(%[[T6]] : tensor<4x24xf32>
+    // TRANSP-SAME:     outs(%[[T7]] : tensor<24x4xf32>
+    %4 = tensor.extract_slice %arg1[%arg3] [%2] [1] : tensor<?xf32> to tensor<?xf32>
+    %5 = affine.apply #map1(%2)
+    %6 = tensor.pad %3 low[%c0, %c0] high[%c0, %5]  {
+    ^bb0(%arg5: index, %arg6: index):  // no predecessors
+      tensor.yield %cst : f32
+    } : tensor<24x?xf32> to tensor<24x4xf32>
+    %7 = tensor.pad %4 nofold low[%c0] high[%5]  {
+    ^bb0(%arg5: index):  // no predecessors
+      tensor.yield %cst : f32
+    } : tensor<?xf32> to tensor<4xf32>
+
+    // Check matvec uses the packed input vector.
+    //      TRANSP:    = linalg.matvec ins(%[[T8]]
+    %8 = linalg.matvec ins(%6, %7 : tensor<24x4xf32>, tensor<4xf32>) outs(%arg4 : tensor<24xf32>) -> tensor<24xf32>
+    scf.yield %8 : tensor<24xf32>
+  }
+  return %1 : tensor<24xf32>
+}
diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
index b79059a3ebb64..5bfd6412854b6 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgCodegenStrategy.cpp
@@ -109,6 +109,17 @@ struct TestLinalgCodegenStrategy
       *this, "hoist-paddings",
       llvm::cl::desc("Operand hoisting depths when test-pad-pattern."),
       llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
+  ListOption<std::string> transposePaddings{
+      *this, "transpose-paddings",
+      llvm::cl::desc(
+          "Transpose paddings when test-pad-pattern. Specify a "
+          "operand dimension interchange using the following format:\n"
+          "-transpose-paddings=1:0:2,0:1,0:1\n"
+          "It defines the interchange [1, 0, 2] for operand one and "
+          "the interchange [0, 1] (no transpose) for the remaining operands."
+          "All interchange vectors have to be permuations matching the "
+          "operand rank."),
+      llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
   Option<bool> generalize{*this, "generalize",
                           llvm::cl::desc("Generalize named operations."),
                           llvm::cl::init(false)};
@@ -257,9 +268,21 @@ void TestLinalgCodegenStrategy::runOnOperation() {
                ? hoistPaddings[opOperand.getOperandNumber()]
                : 0;
   };
+  auto transposeFunc = [&](OpOperand &opOperand) {
+    SmallVector<int64_t> transposeVector = {};
+    if (opOperand.getOperandNumber() >= transposePaddings.size())
+      return transposeVector;
+    SmallVector<StringRef> elems;
+    StringRef(transposePaddings[opOperand.getOperandNumber()])
+        .split(elems, ':');
+    for (StringRef elem : elems)
+      transposeVector.push_back(std::stoi(elem.str()));
+    return transposeVector;
+  };
   paddingOptions.setPaddingValueComputationFunction(getNeutralOfLinalgOp);
   paddingOptions.setPaddingNoFoldComputationFunction(packFunc);
   paddingOptions.setPaddingHoistComputationFunction(hoistingFunc);
+  paddingOptions.setPaddingTransposeComputationFunction(transposeFunc);
 
   // Compute input padding values only an return failure for output operands.
   if (padInputsOnly) {

From cfe17986c952e552a731237da99f4879def3a02b Mon Sep 17 00:00:00 2001
From: Casey Carter <Casey@Carter.net>
Date: Wed, 29 Dec 2021 14:30:52 -0800
Subject: [PATCH 408/946] [libcxx][test] {move,reverse}_iterator cannot be
 instantiated for a type with no `operator*`

Since their nested reference types are defined in terms of `iter_reference_t<T>`, which examines `decltype(*declval<T>())`.

Differential Revision: https://reviews.llvm.org/D117371
---
 .../move.iter.ops/move.iter.op=/move_iterator.pass.cpp        | 4 +++-
 .../reverse.iterators/reverse.iter.cmp/three-way.pass.cpp     | 4 ++++
 .../reverse.iterators/reverse.iter.cons/assign.pass.cpp       | 2 ++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op=/move_iterator.pass.cpp b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op=/move_iterator.pass.cpp
index 42a28cf14cfd0..d4857d7b0082b 100644
--- a/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op=/move_iterator.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/move.iterators/move.iter.ops/move.iter.op=/move_iterator.pass.cpp
@@ -42,7 +42,7 @@ struct ToIter {
     typedef char *pointer;
     typedef char &reference;
     typedef char value_type;
-    typedef value_type difference_type;
+    typedef signed char difference_type;
 
     explicit TEST_CONSTEXPR_CXX17 ToIter() : m_value(0) {}
     TEST_CONSTEXPR_CXX17 ToIter(const ToIter &src) : m_value(src.m_value) {}
@@ -57,6 +57,8 @@ struct ToIter {
         return *this;
     }
     char *m_value;
+
+    reference operator*() const;
 };
 
 TEST_CONSTEXPR_CXX17 bool test_conv_assign()
diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/three-way.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/three-way.pass.cpp
index e8379e24cffae..a48a7ea924c52 100644
--- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/three-way.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cmp/three-way.pass.cpp
@@ -42,6 +42,8 @@ struct Iter {
 
     constexpr Iter(double value): m_value(value) {}
     double m_value;
+
+    reference operator*() const;
 private:
     friend constexpr bool operator==(const Iter& l, const Iter& r) = default;
     friend constexpr std::partial_ordering operator<=>(const Iter& l, const Iter& r) = default;
@@ -57,6 +59,8 @@ struct ConstIter {
     constexpr ConstIter(double value): m_value(value) {}
     constexpr ConstIter(Iter it): m_value(it.m_value) {}
     double m_value;
+
+    reference operator*() const;
 private:
     friend constexpr bool operator==(const ConstIter& l, const ConstIter& r) = default;
     friend constexpr std::partial_ordering operator<=>(const ConstIter& l, const ConstIter& r) = default;
diff --git a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cons/assign.pass.cpp b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cons/assign.pass.cpp
index c3dbfa9a41b66..0e5123a49e2b5 100644
--- a/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cons/assign.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/reverse.iterators/reverse.iter.cons/assign.pass.cpp
@@ -51,6 +51,8 @@ struct ToIter {
         return *this;
     }
     char *m_value;
+
+    reference operator*() const;
 };
 
 TEST_CONSTEXPR_CXX17 bool tests() {

From 699e22a083f2bae120e68ce7254fcddb4aaf97b3 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Mon, 24 Jan 2022 09:39:31 +0000
Subject: [PATCH 409/946] [ISEL] Move trivial step_vector folds to
 FoldConstantArithmetic.

Given that step_vector is practically a constant, doing this early
helps with DAGCombine folds that happen before type legalization.

There is currently no way to test this happens earlier, although existing
tests for step_vector folds continue protect the folds happening at all.

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D117863
---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 199dee9b0105f..e305041f7490d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5418,6 +5418,19 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
     }
   }
 
+  // Fold (mul step_vector(C0), C1) to (step_vector(C0 * C1)).
+  //      (shl step_vector(C0), C1) -> (step_vector(C0 << C1))
+  if ((Opcode == ISD::MUL || Opcode == ISD::SHL) &&
+      Ops[0].getOpcode() == ISD::STEP_VECTOR) {
+    APInt RHSVal;
+    if (ISD::isConstantSplatVector(Ops[1].getNode(), RHSVal)) {
+      APInt NewStep = Opcode == ISD::MUL
+                          ? Ops[0].getConstantOperandAPInt(0) * RHSVal
+                          : Ops[0].getConstantOperandAPInt(0) << RHSVal;
+      return getStepVector(DL, VT, NewStep);
+    }
+  }
+
   auto IsScalarOrSameVectorSize = [NumElts](const SDValue &Op) {
     return !Op.getValueType().isVector() ||
            Op.getValueType().getVectorElementCount() == NumElts;

From 6997f4d07fa4b462dd3a02838a2cfed45db9c8a0 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 24 Jan 2022 16:44:27 +0000
Subject: [PATCH 410/946] [X86] combineSetCCMOVMSK - fold allof(cmpeq(x,y)) ->
 ptest(sub(x,y)) (PR53379)

As suggested on PR53379, for all-of icmp-eq patterns, we can use ptest(sub(x,y)) on SSE41+ targets

This is a generalization of the existing allof(cmpeq(x,0)) -> ptest(x) pattern

We can probably extend this further, in particularly to handle 256-bit cases on pre-AVX2 targets, but this part of the generalization is pretty trivial

Fixes Issue #53379
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 18 +++-
 llvm/test/CodeGen/X86/movmsk-cmp.ll           | 10 +-
 .../test/CodeGen/X86/vector-compare-all_of.ll | 15 ++-
 .../test/CodeGen/X86/vector-compare-any_of.ll | 10 +-
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 92 ++++++++++---------
 5 files changed, 75 insertions(+), 70 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 10fdcc3d6ca81..2f8e97e63fd49 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44457,12 +44457,15 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
 
   // MOVMSK(PCMPEQ(X,0)) == -1 -> PTESTZ(X,X).
   // MOVMSK(PCMPEQ(X,0)) != -1 -> !PTESTZ(X,X).
+  // MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(SUB(X,Y),SUB(X,Y)).
+  // MOVMSK(PCMPEQ(X,Y)) != -1 -> !PTESTZ(SUB(X,Y),SUB(X,Y)).
   if (IsAllOf && Subtarget.hasSSE41()) {
     SDValue BC = peekThroughBitcasts(Vec);
-    if (BC.getOpcode() == X86ISD::PCMPEQ &&
-        ISD::isBuildVectorAllZeros(BC.getOperand(1).getNode())) {
+    if (BC.getOpcode() == X86ISD::PCMPEQ) {
       MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
-      SDValue V = DAG.getBitcast(TestVT, BC.getOperand(0));
+      SDValue V = DAG.getNode(ISD::SUB, SDLoc(BC), BC.getValueType(),
+                              BC.getOperand(0), BC.getOperand(1));
+      V = DAG.getBitcast(TestVT, V);
       return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
     }
   }
@@ -44500,7 +44503,14 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
         VecOp1.getConstantOperandAPInt(1) == 8 &&
         (IsAnyOf || (SignExt0 && SignExt1))) {
       SDLoc DL(EFLAGS);
-      SDValue Result = DAG.getBitcast(MVT::v32i8, VecOp0.getOperand(0));
+      SDValue Result = peekThroughBitcasts(VecOp0.getOperand(0));
+      if (IsAllOf && Result.getOpcode() == X86ISD::PCMPEQ) {
+        SDValue V = DAG.getNode(ISD::SUB, DL, Result.getValueType(),
+                                Result.getOperand(0), Result.getOperand(1));
+        V = DAG.getBitcast(MVT::v4i64, V);
+        return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+      }
+      Result = DAG.getBitcast(MVT::v32i8, Result);
       Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
       unsigned CmpMask = IsAnyOf ? 0 : 0xFFFFFFFF;
       if (!SignExt0 || !SignExt1) {
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 4488aeb273f70..68adeda025d7c 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -4162,17 +4162,15 @@ define i1 @movmsk_or_v2i64(<2 x i64> %x, <2 x i64> %y) {
 ;
 ; SSE41-LABEL: movmsk_or_v2i64:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
-; SSE41-NEXT:    movmskpd %xmm0, %eax
-; SSE41-NEXT:    cmpl $3, %eax
+; SSE41-NEXT:    psubq %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
 ; SSE41-NEXT:    setne %al
 ; SSE41-NEXT:    retq
 ;
 ; AVX1OR2-LABEL: movmsk_or_v2i64:
 ; AVX1OR2:       # %bb.0:
-; AVX1OR2-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX1OR2-NEXT:    vmovmskpd %xmm0, %eax
-; AVX1OR2-NEXT:    cmpl $3, %eax
+; AVX1OR2-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
+; AVX1OR2-NEXT:    vptest %xmm0, %xmm0
 ; AVX1OR2-NEXT:    setne %al
 ; AVX1OR2-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
index a4a8367f93294..b399712553606 100644
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -1249,9 +1249,8 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
 ; AVX2-LABEL: bool_reduction_v8i32:
 ; AVX2:       # %bb.0:
 ; AVX2-NEXT:    vpminud %ymm1, %ymm0, %ymm1
-; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskps %ymm0, %eax
-; AVX2-NEXT:    cmpb $-1, %al
+; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1301,9 +1300,8 @@ define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
 ;
 ; AVX2-LABEL: bool_reduction_v16i16:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1354,9 +1352,8 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
 ;
 ; AVX2-LABEL: bool_reduction_v32i8:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-compare-any_of.ll b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
index ee7e5f983c582..9af3d5e4969e8 100644
--- a/llvm/test/CodeGen/X86/vector-compare-any_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-any_of.ll
@@ -959,17 +959,15 @@ define i1 @bool_reduction_v2i64(<2 x i64> %x, <2 x i64> %y) {
 define i1 @bool_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; SSE-LABEL: bool_reduction_v4i32:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
-; SSE-NEXT:    movmskps %xmm0, %eax
-; SSE-NEXT:    cmpl $15, %eax
+; SSE-NEXT:    psubd %xmm1, %xmm0
+; SSE-NEXT:    ptest %xmm0, %xmm0
 ; SSE-NEXT:    setne %al
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: bool_reduction_v4i32:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %eax
-; AVX-NEXT:    cmpl $15, %eax
+; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    setne %al
 ; AVX-NEXT:    retq
 ;
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index c9aef7b8e7404..be9ebf466eb0d 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -1210,10 +1210,7 @@ define i1 @icmp0_v16i16_v16i1(<16 x i16>) {
 ;
 ; AVX2-LABEL: icmp0_v16i16_v16i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1679,17 +1676,15 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
 ;
 ; SSE41-LABEL: icmp_v2i64_v2i1:
 ; SSE41:       # %bb.0:
-; SSE41-NEXT:    pcmpeqq %xmm1, %xmm0
-; SSE41-NEXT:    movmskpd %xmm0, %eax
-; SSE41-NEXT:    cmpb $3, %al
+; SSE41-NEXT:    psubq %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: icmp_v2i64_v2i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqq %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskpd %xmm0, %eax
-; AVX-NEXT:    cmpb $3, %al
+; AVX-NEXT:    vpsubq %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
@@ -1728,19 +1723,25 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
 }
 
 define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) {
-; SSE-LABEL: icmp_v4i32_v4i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqd %xmm1, %xmm0
-; SSE-NEXT:    movmskps %xmm0, %eax
-; SSE-NEXT:    cmpb $15, %al
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp_v4i32_v4i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
+; SSE2-NEXT:    movmskps %xmm0, %eax
+; SSE2-NEXT:    cmpb $15, %al
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v4i32_v4i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    psubd %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: icmp_v4i32_v4i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %eax
-; AVX-NEXT:    cmpb $15, %al
+; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
@@ -1832,27 +1833,32 @@ define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) {
 }
 
 define i1 @icmp_v16i8_v16i1(<16 x i8>, <16 x i8>) {
-; SSE-LABEL: icmp_v16i8_v16i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqb %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpw $-1, %ax
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp_v16i8_v16i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v16i8_v16i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    psubb %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: icmp_v16i8_v16i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpmovmskb %xmm0, %eax
-; AVX-NEXT:    cmpw $-1, %ax
+; AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
 ; AVX512F-LABEL: icmp_v16i8_v16i1:
 ; AVX512F:       # %bb.0:
-; AVX512F-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT:    vpmovmskb %xmm0, %eax
-; AVX512F-NEXT:    cmpw $-1, %ax
+; AVX512F-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX512F-NEXT:    vptest %xmm0, %xmm0
 ; AVX512F-NEXT:    sete %al
 ; AVX512F-NEXT:    retq
 ;
@@ -1917,9 +1923,8 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
 ;
 ; AVX2-LABEL: icmp_v4i64_v4i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqq %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskpd %ymm0, %eax
-; AVX2-NEXT:    cmpb $15, %al
+; AVX2-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -1986,9 +1991,8 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
 ;
 ; AVX2-LABEL: icmp_v8i32_v8i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqd %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vmovmskps %ymm0, %eax
-; AVX2-NEXT:    cmpb $-1, %al
+; AVX2-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -2054,9 +2058,8 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) {
 ;
 ; AVX2-LABEL: icmp_v16i16_v16i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqw %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq
@@ -2119,9 +2122,8 @@ define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) {
 ;
 ; AVX2-LABEL: icmp_v32i8_v32i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq

From c03fdd340356c9a29242975f39786529eb99f194 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Mon, 24 Jan 2022 09:03:21 -0800
Subject: [PATCH 411/946] [ELF] Fix the branch range computation when reusing a
 thunk

Notation: dst is `t->getThunkTargetSym()->getVA()`

On AArch64, when `src-0x8000000-r_addend <= dst < src-0x8000000`, the condition
`target->inBranchRange(rel.type, src, rel.sym->getVA(rel.addend))` may
incorrectly consider a thunk reusable.
`rel.addend = -getPCBias(rel.type)` resets the addend to 0 for AArch64/PPC
and the zero addend is used by `rel.sym->getVA(rel.addend)` to check
out-of-range relocations.

See the test for a case this computation is wrong:
`error: a.o:(.text_high+0x4): relocation R_AARCH64_JUMP26 out of range: -134217732 is not in [-134217728, 134217727]`
I have seen a real world case with r_addend=19960.

Reviewed By: peter.smith

Differential Revision: https://reviews.llvm.org/D117734
---
 lld/ELF/Relocations.cpp            |  5 +--
 lld/test/ELF/aarch64-thunk-reuse.s | 49 ++++++++++++++++++++++++++++
 lld/test/ELF/arm-thunk-reuse.s     | 52 ++++++++++++++++++++++++++++++
 3 files changed, 104 insertions(+), 2 deletions(-)
 create mode 100644 lld/test/ELF/aarch64-thunk-reuse.s
 create mode 100644 lld/test/ELF/arm-thunk-reuse.s

diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
index 3a4cbde6ab698..074bc71509916 100644
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -2042,7 +2042,8 @@ std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *isec,
   // out in the relocation addend. We compensate for the PC bias so that
   // an Arm and Thumb relocation to the same destination get the same keyAddend,
   // which is usually 0.
-  int64_t keyAddend = rel.addend + getPCBias(rel.type);
+  const int64_t pcBias = getPCBias(rel.type);
+  const int64_t keyAddend = rel.addend + pcBias;
 
   // We use a ((section, offset), addend) pair to find the thunk position if
   // possible so that we create only one thunk for aliased symbols or ICFed
@@ -2061,7 +2062,7 @@ std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *isec,
     if (isThunkSectionCompatible(isec, t->getThunkTargetSym()->section) &&
         t->isCompatibleWith(*isec, rel) &&
         target->inBranchRange(rel.type, src,
-                              t->getThunkTargetSym()->getVA(rel.addend)))
+                              t->getThunkTargetSym()->getVA(-pcBias)))
       return std::make_pair(t, false);
 
   // No existing compatible Thunk in range, create a new one
diff --git a/lld/test/ELF/aarch64-thunk-reuse.s b/lld/test/ELF/aarch64-thunk-reuse.s
new file mode 100644
index 0000000000000..bdbd08ab5f282
--- /dev/null
+++ b/lld/test/ELF/aarch64-thunk-reuse.s
@@ -0,0 +1,49 @@
+# REQUIRES: aarch64
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=aarch64 %t/a.s -o %t/a.o
+# RUN: ld.lld -pie -T %t/lds %t/a.o -o %t/a
+# RUN: llvm-objdump -d --no-show-raw-insn %t/a | FileCheck %s
+
+## We create a thunk for dest.
+# CHECK-LABEL: <mid>:
+# CHECK-NEXT:   8010008:       b       0x801000c <__AArch64ADRPThunk_>
+# CHECK-EMPTY:
+# CHECK-NEXT:  <__AArch64ADRPThunk_>:
+# CHECK-NEXT:   801000c:       adrp    x16, 0x10000
+# CHECK-NEXT:                  add     x16, x16, #4
+# CHECK-NEXT:                  br      x16
+
+## The first instruction can reuse the thunk but the second can't.
+## If we reuse the thunk for b, we will get an "out of range" error.
+# CHECK-LABEL: <high>:
+# CHECK-NEXT:  1001000c:       bl      0x801000c <__AArch64ADRPThunk_>
+# CHECK-NEXT:                  b       0x10010014 <__AArch64ADRPThunk_>
+# CHECK-EMPTY:
+# CHECK-NEXT:  <__AArch64ADRPThunk_>:
+# CHECK-NEXT:  10010014:       adrp    x16, 0x10000
+# CHECK-NEXT:                  add     x16, x16, #4
+# CHECK-NEXT:                  br      x16
+
+#--- a.s
+.section .text_low, "ax", %progbits
+.globl _start
+_start:
+  nop
+dest:
+  ret
+
+.section .text_mid, "ax", %progbits
+mid:
+  b dest
+
+.section .text_high, "ax", %progbits
+high:
+  bl dest
+  b dest
+
+#--- lds
+SECTIONS {
+  .text_low 0x10000: { *(.text_low) }
+  .text_mid 0x8010008 : { *(.text_mid) }
+  .text_high 0x1001000c : { *(.text_high) }
+}
diff --git a/lld/test/ELF/arm-thunk-reuse.s b/lld/test/ELF/arm-thunk-reuse.s
new file mode 100644
index 0000000000000..3959c3cd695f4
--- /dev/null
+++ b/lld/test/ELF/arm-thunk-reuse.s
@@ -0,0 +1,52 @@
+# REQUIRES: arm
+# RUN: rm -rf %t && split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=armv7-a-none-eabi --arm-add-build-attributes %t/a.s -o %t/a.o
+# RUN: ld.lld -pie -T %t/lds %t/a.o -o %t/a
+# RUN: llvm-objdump -d --no-show-raw-insn %t/a | FileCheck %s
+
+## We create a thunk for dest.
+# CHECK-LABEL: <mid>:
+# CHECK-NEXT:   2010004:     b       0x2010008 <__ARMV7PILongThunk_dest>
+# CHECK-EMPTY:
+# CHECK-NEXT:  <__ARMV7PILongThunk_dest>:
+# CHECK-NEXT:   2010008:     movw    r12, #65516
+# CHECK-NEXT:                movt    r12, #65023
+# CHECK-NEXT:                add     r12, r12, pc
+# CHECK-NEXT:                bx      r12
+
+## The first instruction can reuse the thunk but the second can't.
+## If we reuse the thunk for b, we will get an "out of range" error.
+# CHECK-LABEL: <high>:
+# CHECK-NEXT:   4010000:      bl      0x2010008 <__ARMV7PILongThunk_dest>
+# CHECK-NEXT:                 b       0x4010008 <__ARMV7PILongThunk_dest>
+# CHECK-EMPTY:
+# CHECK-NEXT:  <__ARMV7PILongThunk_dest>:
+# CHECK-NEXT:   4010008:      movw    r12, #65516
+# CHECK-NEXT:                 movt    r12, #64511
+# CHECK-NEXT:                 add     r12, r12, pc
+# CHECK-NEXT:                 bx      r12
+
+#--- a.s
+.section .text_low, "ax", %progbits
+
+.globl _start
+_start:
+  nop
+dest:
+  bx lr
+
+.section .text_mid, "ax", %progbits
+mid:
+  b dest
+
+.section .text_high, "ax", %progbits
+high:
+  bl dest
+  b dest
+
+#--- lds
+SECTIONS {
+  .text_low 0x10000: { *(.text_low) }
+  .text_mid 0x2010004 : { *(.text_mid) }
+  .text_high 0x4010000 : { *(.text_high) }
+}

From a2afc8249a9950da861bce5fd724c1ccf6c4eea4 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Mon, 24 Jan 2022 12:05:09 -0500
Subject: [PATCH 412/946] [libc++] Fix benchmark failure

---
 libcxx/benchmarks/filesystem.bench.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/benchmarks/filesystem.bench.cpp b/libcxx/benchmarks/filesystem.bench.cpp
index 95aecd50473fd..4fe4fc503ca13 100644
--- a/libcxx/benchmarks/filesystem.bench.cpp
+++ b/libcxx/benchmarks/filesystem.bench.cpp
@@ -83,7 +83,7 @@ void BM_PathIterateMultipleTimes(benchmark::State &st, GenInputs gen) {
     PP /= Part;
   benchmark::DoNotOptimize(PP.native().data());
   while (st.KeepRunning()) {
-    for (auto &E : PP) {
+    for (auto const& E : PP) {
       benchmark::DoNotOptimize(E.native().data());
     }
     benchmark::ClobberMemory();
@@ -104,7 +104,7 @@ void BM_PathIterateOnce(benchmark::State &st, GenInputs gen) {
   benchmark::DoNotOptimize(PP.native().data());
   while (st.KeepRunning()) {
     const path P = PP.native();
-    for (auto &E : P) {
+    for (auto const& E : P) {
       benchmark::DoNotOptimize(E.native().data());
     }
     benchmark::ClobberMemory();

From 6be77561f82d781cd957c316a7f53660510683a3 Mon Sep 17 00:00:00 2001
From: eopXD <eop.chen@sifive.com>
Date: Sat, 22 Jan 2022 23:19:01 -0800
Subject: [PATCH 413/946] [SLP][NFC] Add debug logs for entry.

Tell the users they are specifying something without vector register.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D117980
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 8bfd3aa525d55..99c265fc5101a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8079,8 +8079,11 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,
 
   // If the target claims to have no vector registers don't attempt
   // vectorization.
-  if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)))
+  if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true))) {
+    LLVM_DEBUG(
+        dbgs() << "SLP: Didn't find any vector registers for target, abort.\n");
     return false;
+  }
 
   // Don't vectorize when the attribute NoImplicitFloat is used.
   if (F.hasFnAttribute(Attribute::NoImplicitFloat))

From 7cd441ff537e00c743236658bfbcfc16c30ce031 Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Mon, 24 Jan 2022 08:44:54 -0800
Subject: [PATCH 414/946] [clang][NFC] Wrap TYPE_SWITCH in "do while (0)" in
 the interpreter

Wraps the expansions of TYPE_SWITCH and COMPOSITE_TYPE_SWITCH in
the constexpr interpreter with "do { ... } while (0)" so that these
macros can be used like this:

if (llvm::Optional<PrimType> T = Ctx.classify(FieldTy))
  TYPE_SWITCH(*T, Ok &= ReturnValue<T>(FP.deref<T>(), Value));
else
  Ok &= Composite(FieldTy, FP, Value);

This bug was found while testing D116316. See also review comment:
https://reviews.llvm.org/D64146?id=208520#inline-584131

Also cleaned up the macro definitions by removing the superfluous
do-while statements and removed the unused INT_TPYE_SWITCH macro.

Differential Revision: https://reviews.llvm.org/D117301
---
 clang/lib/AST/Interp/PrimType.h | 50 ++++++++++++++-------------------
 1 file changed, 21 insertions(+), 29 deletions(-)

diff --git a/clang/lib/AST/Interp/PrimType.h b/clang/lib/AST/Interp/PrimType.h
index f5f4f8e5c32d6..de4bf9bf802e0 100644
--- a/clang/lib/AST/Interp/PrimType.h
+++ b/clang/lib/AST/Interp/PrimType.h
@@ -81,35 +81,27 @@ inline bool isPrimitiveIntegral(PrimType Type) {
 /// Helper macro to simplify type switches.
 /// The macro implicitly exposes a type T in the scope of the inner block.
 #define TYPE_SWITCH_CASE(Name, B) \
-  case Name: { using T = PrimConv<Name>::T; do {B;} while(0); break; }
+  case Name: { using T = PrimConv<Name>::T; B; break; }
 #define TYPE_SWITCH(Expr, B)                                                   \
-  switch (Expr) {                                                              \
-    TYPE_SWITCH_CASE(PT_Sint8, B)                                              \
-    TYPE_SWITCH_CASE(PT_Uint8, B)                                              \
-    TYPE_SWITCH_CASE(PT_Sint16, B)                                             \
-    TYPE_SWITCH_CASE(PT_Uint16, B)                                             \
-    TYPE_SWITCH_CASE(PT_Sint32, B)                                             \
-    TYPE_SWITCH_CASE(PT_Uint32, B)                                             \
-    TYPE_SWITCH_CASE(PT_Sint64, B)                                             \
-    TYPE_SWITCH_CASE(PT_Uint64, B)                                             \
-    TYPE_SWITCH_CASE(PT_Bool, B)                                               \
-    TYPE_SWITCH_CASE(PT_Ptr, B)                                                \
-  }
+  do {                                                                         \
+    switch (Expr) {                                                            \
+      TYPE_SWITCH_CASE(PT_Sint8, B)                                            \
+      TYPE_SWITCH_CASE(PT_Uint8, B)                                            \
+      TYPE_SWITCH_CASE(PT_Sint16, B)                                           \
+      TYPE_SWITCH_CASE(PT_Uint16, B)                                           \
+      TYPE_SWITCH_CASE(PT_Sint32, B)                                           \
+      TYPE_SWITCH_CASE(PT_Uint32, B)                                           \
+      TYPE_SWITCH_CASE(PT_Sint64, B)                                           \
+      TYPE_SWITCH_CASE(PT_Uint64, B)                                           \
+      TYPE_SWITCH_CASE(PT_Bool, B)                                             \
+      TYPE_SWITCH_CASE(PT_Ptr, B)                                              \
+    }                                                                          \
+  } while (0)
 #define COMPOSITE_TYPE_SWITCH(Expr, B, D)                                      \
-  switch (Expr) {                                                              \
-    TYPE_SWITCH_CASE(PT_Ptr, B)                                                \
-    default: do { D; } while(0); break;                                        \
-  }
-#define INT_TYPE_SWITCH(Expr, B)                                               \
-  switch (Expr) {                                                              \
-    TYPE_SWITCH_CASE(PT_Sint8, B)                                              \
-    TYPE_SWITCH_CASE(PT_Uint8, B)                                              \
-    TYPE_SWITCH_CASE(PT_Sint16, B)                                             \
-    TYPE_SWITCH_CASE(PT_Uint16, B)                                             \
-    TYPE_SWITCH_CASE(PT_Sint32, B)                                             \
-    TYPE_SWITCH_CASE(PT_Uint32, B)                                             \
-    TYPE_SWITCH_CASE(PT_Sint64, B)                                             \
-    TYPE_SWITCH_CASE(PT_Uint64, B)                                             \
-    default: llvm_unreachable("not an integer");                               \
-  }
+  do {                                                                         \
+    switch (Expr) {                                                            \
+      TYPE_SWITCH_CASE(PT_Ptr, B)                                              \
+      default: { D; break; }                                                   \
+    }                                                                          \
+  } while (0)
 #endif

From 80532ebb508d0ca62f96df5f253db8caed969397 Mon Sep 17 00:00:00 2001
From: Sebastian Neubauer <Sebastian.Neubauer@amd.com>
Date: Mon, 24 Jan 2022 18:06:33 +0100
Subject: [PATCH 415/946] [AMDGPU][InstCombine] Remove zero image offset

Remove the offset parameter if it is zero.

Differential Revision: https://reviews.llvm.org/D117876
---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     |  17 +
 llvm/lib/Target/AMDGPU/MIMGInstructions.td    |  58 ++
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |   1 +
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |   8 +
 .../InstCombine/AMDGPU/amdgcn-intrinsics.ll   | 565 ++++++++++++++++++
 5 files changed, 649 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index 84363d3c6aa1a..c3a326945557e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -182,6 +182,23 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
     }
   }
 
+  // Optimize _offset away when 'offset' is zero
+  if (const auto *OffsetMappingInfo =
+          AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) {
+    if (auto *ConstantOffset =
+            dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) {
+      if (ConstantOffset->isZero()) {
+        const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
+            AMDGPU::getImageDimIntrinsicByBaseOpcode(
+                OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
+        return modifyIntrinsicCall(
+            II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
+              Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
+            });
+      }
+    }
+  }
+
   // Try to use A16 or G16
   if (!ST->hasA16() && !ST->hasG16())
     return None;
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 49eaa1499bb76..cf03fd6821438 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -147,6 +147,22 @@ def MIMGBiasMappingTable : GenericTable {
   let PrimaryKeyName = "getMIMGBiasMappingInfo";
 }
 
+class MIMGOffsetMapping<MIMGBaseOpcode offset, MIMGBaseOpcode nooffset> {
+  MIMGBaseOpcode Offset = offset;
+  MIMGBaseOpcode NoOffset = nooffset;
+}
+
+def MIMGOffsetMappingTable : GenericTable {
+  let FilterClass = "MIMGOffsetMapping";
+  let CppTypeName = "MIMGOffsetMappingInfo";
+  let Fields = ["Offset", "NoOffset"];
+  string TypeOf_Offset = "MIMGBaseOpcode";
+  string TypeOf_NoOffset = "MIMGBaseOpcode";
+
+  let PrimaryKey = ["Offset"];
+  let PrimaryKeyName = "getMIMGOffsetMappingInfo";
+}
+
 class MIMGG16Mapping<MIMGBaseOpcode g, MIMGBaseOpcode g16> {
   MIMGBaseOpcode G = g;
   MIMGBaseOpcode G16 = g16;
@@ -1174,6 +1190,48 @@ def : MIMGBiasMapping<IMAGE_GATHER4_B_CL_O, IMAGE_GATHER4_CL_O>;
 def : MIMGBiasMapping<IMAGE_GATHER4_C_B_O, IMAGE_GATHER4_C_O>;
 def : MIMGBiasMapping<IMAGE_GATHER4_C_B_CL_O, IMAGE_GATHER4_C_CL_O>;
 
+// Offset to NoOffset Optimization Mapping
+def : MIMGOffsetMapping<IMAGE_SAMPLE_O, IMAGE_SAMPLE>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CL_O, IMAGE_SAMPLE_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_D_O, IMAGE_SAMPLE_D>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_D_CL_O, IMAGE_SAMPLE_D_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_D_O_G16, IMAGE_SAMPLE_D_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_D_CL_O_G16, IMAGE_SAMPLE_D_CL_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_L_O, IMAGE_SAMPLE_L>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_B_O, IMAGE_SAMPLE_B>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_B_CL_O, IMAGE_SAMPLE_B_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_LZ_O, IMAGE_SAMPLE_LZ>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_O, IMAGE_SAMPLE_C>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CL_O, IMAGE_SAMPLE_C_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_O, IMAGE_SAMPLE_C_D>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_CL_O, IMAGE_SAMPLE_C_D_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_O_G16, IMAGE_SAMPLE_C_D_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_D_CL_O_G16, IMAGE_SAMPLE_C_D_CL_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_L_O, IMAGE_SAMPLE_C_L>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_B_CL_O, IMAGE_SAMPLE_C_B_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_B_O, IMAGE_SAMPLE_C_B>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_LZ_O, IMAGE_SAMPLE_C_LZ>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_O, IMAGE_GATHER4>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_CL_O, IMAGE_GATHER4_CL>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_L_O, IMAGE_GATHER4_L>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_B_O, IMAGE_GATHER4_B>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_B_CL_O, IMAGE_GATHER4_B_CL>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_LZ_O, IMAGE_GATHER4_LZ>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_C_O, IMAGE_GATHER4_C>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_C_CL_O, IMAGE_GATHER4_C_CL>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_C_L_O, IMAGE_GATHER4_C_L>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_C_B_O, IMAGE_GATHER4_C_B>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_C_B_CL_O, IMAGE_GATHER4_C_B_CL>;
+def : MIMGOffsetMapping<IMAGE_GATHER4_C_LZ_O, IMAGE_GATHER4_C_LZ>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_O, IMAGE_SAMPLE_CD>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_CL_O, IMAGE_SAMPLE_CD_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_O, IMAGE_SAMPLE_C_CD>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_CL_O, IMAGE_SAMPLE_C_CD_CL>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_O_G16, IMAGE_SAMPLE_CD_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_CD_CL_O_G16, IMAGE_SAMPLE_CD_CL_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_O_G16, IMAGE_SAMPLE_C_CD_G16>;
+def : MIMGOffsetMapping<IMAGE_SAMPLE_C_CD_CL_O_G16, IMAGE_SAMPLE_C_CD_CL_G16>;
+
 // G to G16 Optimization Mapping
 def : MIMGG16Mapping<IMAGE_SAMPLE_D, IMAGE_SAMPLE_D_G16>;
 def : MIMGG16Mapping<IMAGE_SAMPLE_D_CL, IMAGE_SAMPLE_D_CL_G16>;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index fa1fa5b850d57..1e96266eb06c3 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -133,6 +133,7 @@ bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) {
 #define GET_MIMGLZMappingTable_IMPL
 #define GET_MIMGMIPMappingTable_IMPL
 #define GET_MIMGBiasMappingTable_IMPL
+#define GET_MIMGOffsetMappingTable_IMPL
 #define GET_MIMGG16MappingTable_IMPL
 #include "AMDGPUGenSearchableTables.inc"
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index cabae3d1ab7e1..89f928eb8b925 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -336,6 +336,11 @@ struct MIMGBiasMappingInfo {
   MIMGBaseOpcode NoBias;
 };
 
+struct MIMGOffsetMappingInfo {
+  MIMGBaseOpcode Offset;
+  MIMGBaseOpcode NoOffset;
+};
+
 struct MIMGG16MappingInfo {
   MIMGBaseOpcode G;
   MIMGBaseOpcode G16;
@@ -350,6 +355,9 @@ const MIMGMIPMappingInfo *getMIMGMIPMappingInfo(unsigned MIP);
 LLVM_READONLY
 const MIMGBiasMappingInfo *getMIMGBiasMappingInfo(unsigned Bias);
 
+LLVM_READONLY
+const MIMGOffsetMappingInfo *getMIMGOffsetMappingInfo(unsigned Offset);
+
 LLVM_READONLY
 const MIMGG16MappingInfo *getMIMGG16MappingInfo(unsigned G);
 
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index bac4c7826a4fe..709d531b8cee6 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -4630,6 +4630,571 @@ main_body:
   ret void
 }
 
+; --------------------------------------------------------------------
+; llvm.amdgcn.image.sample offset zero
+; --------------------------------------------------------------------
+
+define amdgpu_kernel void @offset_sample_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; CHECK-LABEL: @offset_sample_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+; CHECK-LABEL: @offset_sample_c_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_c_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32 15, i32 0, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
+; CHECK-LABEL: @offset_sample_b_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_b_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_b_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
+; CHECK-LABEL: @offset_sample_c_b_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_b_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_c_b_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_b_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_b_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_b_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_b_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_b_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_b_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_b_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_b_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float [[BIAS:%.*]], float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_d_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
+; CHECK-LABEL: @offset_sample_d_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_d_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_d_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_d_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
+; CHECK-LABEL: @offset_sample_c_d_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_d_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_c_d_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_d_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_d_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_d_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_d_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_d_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_d_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_d_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_d_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_cd_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
+; CHECK-LABEL: @offset_sample_cd_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_cd_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_cd_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_cd_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
+; CHECK-LABEL: @offset_sample_c_cd_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_cd_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_c_cd_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_cd_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_cd_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_cd_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_cd_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f32(i32 15, float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_cd_cl_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_cd_cl_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DSDV:%.*]], float [[S:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_cd_cl_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
+; CHECK-LABEL: @offset_sample_c_cd_cl_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[DSDH:%.*]], float [[DTDH:%.*]], float [[DSDV:%.*]], float [[DTDV:%.*]], float [[S:%.*]], float [[T:%.*]], float [[CLAMP:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32 15, i32 0, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
+; CHECK-LABEL: @offset_sample_l_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.1d.v4f32.f32(i32 15, i32 0, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
+; CHECK-LABEL: @offset_sample_l_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.l.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_l_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
+; CHECK-LABEL: @offset_sample_c_l_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_l_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
+; CHECK-LABEL: @offset_sample_c_l_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], float [[LOD:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_lz_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
+; CHECK-LABEL: @offset_sample_lz_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32 15, i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_lz_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_lz_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32 15, i32 0, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_lz_o_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
+; CHECK-LABEL: @offset_sample_c_lz_o_1d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @offset_sample_c_lz_o_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
+; CHECK-LABEL: @offset_sample_c_lz_o_2d(
+; CHECK-NEXT:  main_body:
+; CHECK-NEXT:    [[V:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float [[ZCOMPARE:%.*]], float [[S:%.*]], float [[T:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[V]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+main_body:
+  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32 15, i32 0, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <4 x float> %v, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+declare <4 x float> @llvm.amdgcn.image.sample.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cl.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.1d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cl.o.2d.v4f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.b.o.1d.v4f32.f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.1d.v4f32.f32.f32(i32, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.o.2d.v4f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
+declare <4 x float> @llvm.amdgcn.image.sample.lz.o.1d.v4f32.f32(i32, i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.lz.o.2d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.1d.v4f32.f32(i32, i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.lz.o.2d.v4f32.f32(i32, i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.is.shared
 ; --------------------------------------------------------------------

From 74db5c8c95e2aed40d288bb2df92eb859b87c827 Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse@sony.com>
Date: Mon, 24 Jan 2022 17:04:13 +0000
Subject: [PATCH 416/946] Revert rG6a605b97a200 due to excessive memory use

Over in the comments for D116821, some use-cases have cropped up where
there's a substantial increase in memory usage. A quick inspection
shows that a) it's a lot of memory and b) there are several things to
be done to reduce it. Reverting (via disabling this feature by default)
to avoid bothering people in the meantime.
---
 llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp | 5 ++++-
 llvm/test/DebugInfo/X86/instr-ref-flag.ll            | 4 ++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 2913506853aa4..3149729b92313 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -124,10 +124,13 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
 
 bool llvm::debuginfoShouldUseDebugInstrRef(const Triple &T) {
   // Enable by default on x86_64, disable if explicitly turned off on cmdline.
+  // Disabled while https://reviews.llvm.org/D116821 is investigated.
+#if 0
   if (T.getArch() == llvm::Triple::x86_64 &&
       ValueTrackingVariableLocations != cl::boolOrDefault::BOU_FALSE)
     return true;
+#endif
 
-  // Otherwise: enable if explicitly requestedo n command line.
+  // Otherwise: enable if explicitly requested on command line.
   return ValueTrackingVariableLocations == cl::boolOrDefault::BOU_TRUE;
 }
diff --git a/llvm/test/DebugInfo/X86/instr-ref-flag.ll b/llvm/test/DebugInfo/X86/instr-ref-flag.ll
index f9d5f99edf77f..56d34aedabd02 100644
--- a/llvm/test/DebugInfo/X86/instr-ref-flag.ll
+++ b/llvm/test/DebugInfo/X86/instr-ref-flag.ll
@@ -13,6 +13,10 @@
 ;; by llc by default, and that it can be turned explicitly on or off as
 ;; desired.
 
+;; Xfail due to faults found in the discussion on
+;; https://reviews.llvm.org/D116821
+; XFAIL: *
+
 ; INSTRREFON: DBG_INSTR_REF
 ; INSTRREFOFF: DBG_VALUE
 

From 50999e82e8844615b1ae53edb9d56cdcace91b04 Mon Sep 17 00:00:00 2001
From: Marek Kurdej <marek.kurdej+llvm.org@gmail.com>
Date: Mon, 24 Jan 2022 17:57:21 +0100
Subject: [PATCH 417/946] [clang-format] Space between attribute closing
 parenthesis and qualified type colon.

Fixes https://github.com/llvm/llvm-project/issues/35711.

Reviewed By: MyDeveloperDay, HazardyKnusperkeks, owenpan

Differential Revision: https://reviews.llvm.org/D117894
---
 clang/lib/Format/TokenAnnotator.cpp   | 3 +++
 clang/unittests/Format/FormatTest.cpp | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index b9535f7965598..a8cd1e30f74e5 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -3339,6 +3339,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line,
     if (Left.is(tok::ellipsis) && Right.is(tok::identifier) &&
         Line.First->is(Keywords.kw_import))
       return false;
+    // Space in __attribute__((attr)) ::type.
+    if (Left.is(TT_AttributeParen) && Right.is(tok::coloncolon))
+      return true;
 
     if (Left.is(tok::kw_operator))
       return Right.is(tok::coloncolon);
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index 0ddbac48d716d..c4e0e14ce5bcd 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -10028,6 +10028,7 @@ TEST_F(FormatTest, UnderstandsAttributes) {
   verifyFormat("SomeType s __attribute__((unused)) (InitValue);");
   verifyFormat("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa __attribute__((unused))\n"
                "aaaaaaaaaaaaaaaaaaaaaaa(int i);");
+  verifyFormat("__attribute__((nodebug)) ::qualified_type f();");
   FormatStyle AfterType = getLLVMStyle();
   AfterType.AlwaysBreakAfterReturnType = FormatStyle::RTBS_All;
   verifyFormat("__attribute__((nodebug)) void\n"
@@ -10131,6 +10132,7 @@ TEST_F(FormatTest, UnderstandsSquareAttributes) {
   verifyFormat("class [[nodiscard]] f {\npublic:\n  f() {}\n}");
   verifyFormat("class [[deprecated(\"so sorry\")]] f {\npublic:\n  f() {}\n}");
   verifyFormat("class [[gnu::unused]] f {\npublic:\n  f() {}\n}");
+  verifyFormat("[[nodiscard]] ::qualified_type f();");
 
   // Make sure we do not mistake attributes for array subscripts.
   verifyFormat("int a() {}\n"

From db2944e34b16387bf4326ddfd2a8c420594572f4 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Mon, 14 Dec 2020 17:39:15 +0100
Subject: [PATCH 418/946] [libc++][format] Adds formatter floating-point.

This properly implements the formatter for floating-point types.

Completes:
- P1652R1 Printf corner cases in std::format
- LWG 3250 std::format: # (alternate form) for NaN and inf
- LWG 3243 std::format and negative zeroes

Implements parts of:
- P0645 Text Formatting

Reviewed By: #libc, ldionne, vitaut

Differential Revision: https://reviews.llvm.org/D114001
---
 libcxx/benchmarks/formatter_float.bench.cpp   |  241 +++
 libcxx/docs/Status/Cxx20Issues.csv            |    4 +-
 libcxx/docs/Status/Cxx20Papers.csv            |    2 +-
 libcxx/include/CMakeLists.txt                 |    1 +
 libcxx/include/__format/formatter.h           |   28 +
 .../__format/formatter_floating_point.h       |  717 +++++++
 libcxx/include/__format/formatter_integral.h  |    2 +-
 .../include/__format/parser_std_format_spec.h |  158 +-
 libcxx/include/format                         |    1 +
 libcxx/include/module.modulemap               |   29 +-
 ...formatter_floating_point.module.verify.cpp |   15 +
 .../std_format_spec_floating_point.pass.cpp   |  353 ++++
 .../formatter.floating_point.pass.cpp         |  418 +++-
 .../format/format.functions/format_tests.h    | 1502 +++++++++++++-
 .../locale-specific_form.pass.cpp             | 1717 +++++++++++++++++
 15 files changed, 5118 insertions(+), 70 deletions(-)
 create mode 100644 libcxx/benchmarks/formatter_float.bench.cpp
 create mode 100644 libcxx/include/__format/formatter_floating_point.h
 create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_floating_point.module.verify.cpp
 create mode 100644 libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_floating_point.pass.cpp

diff --git a/libcxx/benchmarks/formatter_float.bench.cpp b/libcxx/benchmarks/formatter_float.bench.cpp
new file mode 100644
index 0000000000000..3190b3779c5d9
--- /dev/null
+++ b/libcxx/benchmarks/formatter_float.bench.cpp
@@ -0,0 +1,241 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <format>
+
+#include <array>
+#include <limits>
+#include <random>
+#include <string>
+
+#include "CartesianBenchmarks.h"
+#include "benchmark/benchmark.h"
+
+// *** Localization ***
+enum class LocalizationE { False, True };
+struct AllLocalizations : EnumValuesAsTuple<AllLocalizations, LocalizationE, 2> {
+  static constexpr const char* Names[] = {"LocFalse", "LocTrue"};
+};
+
+template <LocalizationE E>
+struct Localization {};
+
+template <>
+struct Localization<LocalizationE::False> {
+  static constexpr const char* fmt = "";
+};
+
+template <>
+struct Localization<LocalizationE::True> {
+  static constexpr const char* fmt = "L";
+};
+
+// *** Types ***
+enum class TypeE { Float, Double, LongDouble };
+// TODO FMT Set to 3 after to_chars has long double suport.
+struct AllTypes : EnumValuesAsTuple<AllTypes, TypeE, 2> {
+  static constexpr const char* Names[] = {"Float", "Double", "LongDouble"};
+};
+
+template <TypeE E>
+struct Type {};
+
+template <>
+struct Type<TypeE::Float> {
+  using type = float;
+};
+
+template <>
+struct Type<TypeE::Double> {
+  using type = double;
+};
+
+template <>
+struct Type<TypeE::LongDouble> {
+  using type = long double;
+};
+
+// *** Values ***
+enum class ValueE { Inf, Random };
+struct AllValues : EnumValuesAsTuple<AllValues, ValueE, 2> {
+  static constexpr const char* Names[] = {"Inf", "Random"};
+};
+
+template <ValueE E>
+struct Value {};
+
+template <>
+struct Value<ValueE::Inf> {
+  template <class F>
+  static std::array<F, 1000> make_data() {
+    std::array<F, 1000> result;
+    std::fill(result.begin(), result.end(), -std::numeric_limits<F>::infinity());
+    return result;
+  }
+};
+
+template <>
+struct Value<ValueE::Random> {
+  template <class F>
+  static std::array<F, 1000> make_data() {
+    std::random_device seed;
+    std::mt19937 generator(seed());
+    std::uniform_int_distribution<std::conditional_t<sizeof(F) == sizeof(uint32_t), uint32_t, uint64_t>> distribution;
+
+    std::array<F, 1000> result;
+    std::generate(result.begin(), result.end(), [&] {
+      while (true) {
+        auto result = std::bit_cast<F>(distribution(generator));
+        if (std::isfinite(result))
+          return result;
+      }
+    });
+    return result;
+  }
+};
+
+// *** Display Type ***
+enum class DisplayTypeE {
+  Default,
+  Hex,
+  Scientific,
+  Fixed,
+  General,
+};
+struct AllDisplayTypes : EnumValuesAsTuple<AllDisplayTypes, DisplayTypeE, 5> {
+  static constexpr const char* Names[] = {"DisplayDefault", "DisplayHex", "DisplayScientific", "DisplayFixed",
+                                          "DisplayGeneral"};
+};
+
+template <DisplayTypeE E>
+struct DisplayType {};
+
+template <>
+struct DisplayType<DisplayTypeE::Default> {
+  static constexpr const char* fmt = "";
+};
+
+template <>
+struct DisplayType<DisplayTypeE::Hex> {
+  static constexpr const char* fmt = "a";
+};
+
+template <>
+struct DisplayType<DisplayTypeE::Scientific> {
+  static constexpr const char* fmt = "e";
+};
+
+template <>
+struct DisplayType<DisplayTypeE::Fixed> {
+  static constexpr const char* fmt = "f";
+};
+
+template <>
+struct DisplayType<DisplayTypeE::General> {
+  static constexpr const char* fmt = "g";
+};
+
+// *** Alignment ***
+enum class AlignmentE { None, Left, Center, Right, ZeroPadding };
+struct AllAlignments : EnumValuesAsTuple<AllAlignments, AlignmentE, 5> {
+  static constexpr const char* Names[] = {"AlignNone", "AlignmentLeft", "AlignmentCenter", "AlignmentRight",
+                                          "ZeroPadding"};
+};
+
+template <AlignmentE E>
+struct Alignment {};
+
+template <>
+struct Alignment<AlignmentE::None> {
+  static constexpr const char* fmt = "";
+};
+
+template <>
+struct Alignment<AlignmentE::Left> {
+  // Width > PrecisionE::Huge
+  static constexpr const char* fmt = "0<17500";
+};
+
+template <>
+struct Alignment<AlignmentE::Center> {
+  // Width > PrecisionE::Huge
+  static constexpr const char* fmt = "0^17500";
+};
+
+template <>
+struct Alignment<AlignmentE::Right> {
+  // Width > PrecisionE::Huge
+  static constexpr const char* fmt = "0>17500";
+};
+
+template <>
+struct Alignment<AlignmentE::ZeroPadding> {
+  // Width > PrecisionE::Huge
+  static constexpr const char* fmt = "017500";
+};
+
+enum class PrecisionE { None, Zero, Small, Huge };
+struct AllPrecisions : EnumValuesAsTuple<AllPrecisions, PrecisionE, 4> {
+  static constexpr const char* Names[] = {"PrecNone", "PrecZero", "PrecSmall", "PrecHuge"};
+};
+
+template <PrecisionE E>
+struct Precision {};
+
+template <>
+struct Precision<PrecisionE::None> {
+  static constexpr const char* fmt = "";
+};
+
+template <>
+struct Precision<PrecisionE::Zero> {
+  static constexpr const char* fmt = ".0";
+};
+
+template <>
+struct Precision<PrecisionE::Small> {
+  static constexpr const char* fmt = ".10";
+};
+
+template <>
+struct Precision<PrecisionE::Huge> {
+  // The maximum precision for a minimal sub normal long double is ±0x1p-16494.
+  // This value is always larger than that value forcing the trailing zero path
+  // to be executed.
+  static constexpr const char* fmt = ".17000";
+};
+
+template <class L, class DT, class T, class V, class A, class P>
+struct FloatingPoint {
+  using F = typename Type<T::value>::type;
+
+  void run(benchmark::State& state) const {
+    std::array<F, 1000> data{Value<V::value>::template make_data<F>()};
+    std::array<char, 20'000> output;
+    std::string fmt{std::string("{:") + Alignment<A::value>::fmt + Precision<P::value>::fmt +
+                    Localization<L::value>::fmt + DisplayType<DT::value>::fmt + "}"};
+
+    while (state.KeepRunningBatch(1000))
+      for (F value : data)
+        benchmark::DoNotOptimize(std::format_to(output.begin(), fmt, value));
+  }
+
+  std::string name() const {
+    return "FloatingPoint" + L::name() + DT::name() + T::name() + V::name() + A::name() + P::name();
+  }
+};
+
+int main(int argc, char** argv) {
+  benchmark::Initialize(&argc, argv);
+  if (benchmark::ReportUnrecognizedArguments(argc, argv))
+    return 1;
+
+  makeCartesianProductBenchmark<FloatingPoint, AllLocalizations, AllDisplayTypes, AllTypes, AllValues, AllAlignments,
+                                AllPrecisions>();
+
+  benchmark::RunSpecifiedBenchmarks();
+}
diff --git a/libcxx/docs/Status/Cxx20Issues.csv b/libcxx/docs/Status/Cxx20Issues.csv
index f93e66fda1849..034fac3ca22f8 100644
--- a/libcxx/docs/Status/Cxx20Issues.csv
+++ b/libcxx/docs/Status/Cxx20Issues.csv
@@ -203,10 +203,10 @@
 "`3237 <https://wg21.link/LWG3237>`__","LWG 3038 and 3190 have inconsistent PRs","Prague","|Complete|","14.0"
 "`3238 <https://wg21.link/LWG3238>`__","Insufficiently-defined behavior of ``std::function``\  deduction guides","Prague","",""
 "`3242 <https://wg21.link/LWG3242>`__","``std::format``\ : missing rules for ``arg-id``\  in ``width``\  and ``precision``\ ","Prague","|Complete|","Clang 14","|format|"
-"`3243 <https://wg21.link/LWG3243>`__","``std::format``\  and negative zeroes","Prague","","","|format|"
+"`3243 <https://wg21.link/LWG3243>`__","``std::format``\  and negative zeroes","Prague","|Complete|","14.0","|format|"
 "`3247 <https://wg21.link/LWG3247>`__","``ranges::iter_move``\  should perform ADL-only lookup of ``iter_move``\ ","Prague","","","|ranges|"
 "`3248 <https://wg21.link/LWG3248>`__","``std::format``\  ``#b``\ , ``#B``\ , ``#o``\ , ``#x``\ , and ``#X``\   presentation types misformat negative numbers","Prague","|Complete|","14.0","|format|"
-"`3250 <https://wg21.link/LWG3250>`__","``std::format``\ : ``#``\  (alternate form) for NaN and inf","Prague","","","|format|"
+"`3250 <https://wg21.link/LWG3250>`__","``std::format``\ : ``#``\  (alternate form) for NaN and inf","Prague","|Complete|","14.0","|format|"
 "`3251 <https://wg21.link/LWG3251>`__","Are ``std::format``\  alignment specifiers applied to string arguments?","Prague","","","|format|"
 "`3252 <https://wg21.link/LWG3252>`__","Parse locale's aware modifiers for commands are not consistent with POSIX spec","Prague","","","|chrono|"
 "`3254 <https://wg21.link/LWG3254>`__","Strike ``stop_token``\ 's ``operator!=``\ ","Prague","",""
diff --git a/libcxx/docs/Status/Cxx20Papers.csv b/libcxx/docs/Status/Cxx20Papers.csv
index d0927e09426ce..5a974a798f7bd 100644
--- a/libcxx/docs/Status/Cxx20Papers.csv
+++ b/libcxx/docs/Status/Cxx20Papers.csv
@@ -129,7 +129,7 @@
 "`P1644R0 <https://wg21.link/P1644R0>`__","LWG","Add wait/notify to atomic<shared_ptr>","Cologne","",""
 "`P1650R0 <https://wg21.link/P1650R0>`__","LWG","Output std::chrono::days with 'd' suffix","Cologne","",""
 "`P1651R0 <https://wg21.link/P1651R0>`__","LWG","bind_front should not unwrap reference_wrapper","Cologne","|Complete|","13.0"
-"`P1652R1 <https://wg21.link/P1652R1>`__","LWG","Printf corner cases in std::format","Cologne","|In Progress|",""
+"`P1652R1 <https://wg21.link/P1652R1>`__","LWG","Printf corner cases in std::format","Cologne","|Complete|","14.0"
 "`P1661R1 <https://wg21.link/P1661R1>`__","LWG","Remove dedicated precalculated hash lookup interface","Cologne","|Nothing To Do|",""
 "`P1754R1 <https://wg21.link/P1754R1>`__","LWG","Rename concepts to standard_case for C++20, while we still can","Cologne","|In Progress|",""
 "","","","","",""
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index b7222540846fa..3886ddba8e4e5 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -178,6 +178,7 @@ set(files
   __format/formatter.h
   __format/formatter_bool.h
   __format/formatter_char.h
+  __format/formatter_floating_point.h
   __format/formatter_integer.h
   __format/formatter_integral.h
   __format/formatter_string.h
diff --git a/libcxx/include/__format/formatter.h b/libcxx/include/__format/formatter.h
index 1adce75a86110..0c0c02ba19173 100644
--- a/libcxx/include/__format/formatter.h
+++ b/libcxx/include/__format/formatter.h
@@ -190,6 +190,34 @@ __write(output_iterator<const _CharT&> auto __out_it, const _CharT* __first,
   return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after, __fill);
 }
 
+/**
+ * @overload
+ *
+ * Writes additional zero's for the precision before the exponent.
+ * This is used when the precision requested in the format string is larger
+ * than the maximum precision of the floating-point type. These precision
+ * digits are always 0.
+ *
+ * @param __exponent           The location of the exponent character.
+ * @param __num_trailing_zeros The number of 0's to write before the exponent
+ *                             character.
+ */
+template <class _CharT, class _Fill>
+_LIBCPP_HIDE_FROM_ABI auto __write(output_iterator<const _CharT&> auto __out_it, const _CharT* __first,
+                                   const _CharT* __last, size_t __size, size_t __width, _Fill __fill,
+                                   __format_spec::_Flags::_Alignment __alignment, const _CharT* __exponent,
+                                   size_t __num_trailing_zeros) -> decltype(__out_it) {
+  _LIBCPP_ASSERT(__first <= __last, "Not a valid range");
+  _LIBCPP_ASSERT(__num_trailing_zeros > 0, "The overload not writing trailing zeros should have been used");
+
+  __padding_size_result __padding = __padding_size(__size + __num_trailing_zeros, __width, __alignment);
+  __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before, __fill);
+  __out_it = _VSTD::copy(__first, __exponent, _VSTD::move(__out_it));
+  __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __num_trailing_zeros, _CharT('0'));
+  __out_it = _VSTD::copy(__exponent, __last, _VSTD::move(__out_it));
+  return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after, __fill);
+}
+
 /**
  * @overload
  *
diff --git a/libcxx/include/__format/formatter_floating_point.h b/libcxx/include/__format/formatter_floating_point.h
new file mode 100644
index 0000000000000..2e710b409deb6
--- /dev/null
+++ b/libcxx/include/__format/formatter_floating_point.h
@@ -0,0 +1,717 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMATTER_FLOATING_POINT_H
+#define _LIBCPP___FORMAT_FORMATTER_FLOATING_POINT_H
+
+#include <__algorithm/copy.h>
+#include <__algorithm/copy_n.h>
+#include <__algorithm/fill_n.h>
+#include <__algorithm/find.h>
+#include <__algorithm/min.h>
+#include <__algorithm/rotate.h>
+#include <__algorithm/transform.h>
+#include <__concepts/arithmetic.h>
+#include <__config>
+#include <__debug>
+#include <__format/format_error.h>
+#include <__format/format_fwd.h>
+#include <__format/format_string.h>
+#include <__format/formatter.h>
+#include <__format/formatter_integral.h>
+#include <__format/parser_std_format_spec.h>
+#include <__utility/move.h>
+#include <charconv>
+#include <cmath>
+
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#  include <locale>
+#endif
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#  if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+namespace __format_spec {
+
+template <floating_point _Tp>
+_LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last, _Tp __value) {
+  to_chars_result __r = _VSTD::to_chars(__first, __last, __value);
+  _LIBCPP_ASSERT(__r.ec == errc(0), "Internal buffer too small");
+  return __r.ptr;
+}
+
+template <floating_point _Tp>
+_LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last, _Tp __value, chars_format __fmt) {
+  to_chars_result __r = _VSTD::to_chars(__first, __last, __value, __fmt);
+  _LIBCPP_ASSERT(__r.ec == errc(0), "Internal buffer too small");
+  return __r.ptr;
+}
+
+template <floating_point _Tp>
+_LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last, _Tp __value, chars_format __fmt, int __precision) {
+  to_chars_result __r = _VSTD::to_chars(__first, __last, __value, __fmt, __precision);
+  _LIBCPP_ASSERT(__r.ec == errc(0), "Internal buffer too small");
+  return __r.ptr;
+}
+
+// https://en.cppreference.com/w/cpp/language/types#cite_note-1
+// float             min subnormal: +/-0x1p-149   max: +/- 3.402,823,4 10^38
+// double            min subnormal: +/-0x1p-1074  max  +/- 1.797,693,134,862,315,7 10^308
+// long double (x86) min subnormal: +/-0x1p-16446 max: +/- 1.189,731,495,357,231,765,021 10^4932
+//
+// The maximum number of digits required for the integral part is based on the
+// maximum's value power of 10. Every power of 10 requires one additional
+// decimal digit.
+// The maximum number of digits required for the fractional part is based on
+// the minimal subnormal hexadecimal output's power of 10. Every division of a
+// fraction's binary 1 by 2, requires one additional decimal digit.
+//
+// The maximum size of a formatted value depends on the selected output format.
+// Ignoring the fact the format string can request a precision larger than the
+// values maximum required, these values are:
+//
+// sign                    1 code unit
+// __max_integral
+// radix point             1 code unit
+// __max_fractional
+// exponent character      1 code unit
+// sign                    1 code unit
+// __max_fractional_value
+// -----------------------------------
+// total                   4 code units extra required.
+//
+// TODO FMT Optimize the storage to avoid storing digits that are known to be zero.
+// https://www.exploringbinary.com/maximum-number-of-decimal-digits-in-binary-floating-point-numbers/
+
+// TODO FMT Add long double specialization when to_chars has proper long double support.
+template <class _Tp>
+struct __traits;
+
+template <floating_point _Fp>
+static constexpr size_t __float_buffer_size(int __precision) {
+  using _Traits = __traits<_Fp>;
+  return 4 + _Traits::__max_integral + __precision + _Traits::__max_fractional_value;
+}
+
+template <>
+struct __traits<float> {
+  static constexpr int __max_integral = 38;
+  static constexpr int __max_fractional = 149;
+  static constexpr int __max_fractional_value = 3;
+  static constexpr size_t __stack_buffer_size = 256;
+
+  static constexpr int __hex_precision_digits = 3;
+};
+
+template <>
+struct __traits<double> {
+  static constexpr int __max_integral = 308;
+  static constexpr int __max_fractional = 1074;
+  static constexpr int __max_fractional_value = 4;
+  static constexpr size_t __stack_buffer_size = 1024;
+
+  static constexpr int __hex_precision_digits = 4;
+};
+
+/// Helper class to store the conversion buffer.
+///
+/// Depending on the maxium size required for a value, the buffer is allocated
+/// on the stack or the heap.
+template <floating_point _Fp>
+class _LIBCPP_TEMPLATE_VIS __float_buffer {
+  using _Traits = __traits<_Fp>;
+
+public:
+  // TODO FMT Improve this constructor to do a better estimate.
+  // When using a scientific formatting with a precision of 6 a stack buffer
+  // will always suffice. At the moment that isn't important since floats and
+  // doubles use a stack buffer, unless the precision used in the format string
+  // is large.
+  // When supporting long doubles the __max_integral part becomes 4932 which
+  // may be too much for some platforms. For these cases a better estimate is
+  // required.
+  explicit _LIBCPP_HIDE_FROM_ABI __float_buffer(int __precision)
+      : __precision_(__precision != -1 ? __precision : _Traits::__max_fractional) {
+
+    // When the precision is larger than _Traits::__max_fractional the digits in
+    // the range (_Traits::__max_fractional, precision] will contain the value
+    // zero. There's no need to request to_chars to write these zeros:
+    // - When the value is large a temporary heap buffer needs to be allocated.
+    // - When to_chars writes the values they need to be "copied" to the output:
+    //   - char: std::fill on the output iterator is faster than std::copy.
+    //   - wchar_t: same argument as char, but additional std::copy won't work.
+    //     The input is always a char buffer, so every char in the buffer needs
+    //     to be converted from a char to a wchar_t.
+    if (__precision_ > _Traits::__max_fractional) {
+      __num_trailing_zeros_ = __precision_ - _Traits::__max_fractional;
+      __precision_ = _Traits::__max_fractional;
+    }
+
+    __size_ = __format_spec::__float_buffer_size<_Fp>(__precision_);
+    if (__size_ > _Traits::__stack_buffer_size)
+      // The allocated buffer's contents don't need initialization.
+      __begin_ = allocator<char>{}.allocate(__size_);
+    else
+      __begin_ = __buffer_;
+  }
+
+  _LIBCPP_HIDE_FROM_ABI ~__float_buffer() {
+    if (__size_ > _Traits::__stack_buffer_size)
+      allocator<char>{}.deallocate(__begin_, __size_);
+  }
+  _LIBCPP_HIDE_FROM_ABI __float_buffer(const __float_buffer&) = delete;
+  _LIBCPP_HIDE_FROM_ABI __float_buffer& operator=(const __float_buffer&) = delete;
+
+  _LIBCPP_HIDE_FROM_ABI char* begin() const { return __begin_; }
+  _LIBCPP_HIDE_FROM_ABI char* end() const { return __begin_ + __size_; }
+
+  _LIBCPP_HIDE_FROM_ABI int __precision() const { return __precision_; }
+  _LIBCPP_HIDE_FROM_ABI int __num_trailing_zeros() const { return __num_trailing_zeros_; }
+  _LIBCPP_HIDE_FROM_ABI void __remove_trailing_zeros() { __num_trailing_zeros_ = 0; }
+
+private:
+  int __precision_;
+  int __num_trailing_zeros_{0};
+  size_t __size_;
+  char* __begin_;
+  char __buffer_[_Traits::__stack_buffer_size];
+};
+
+struct __float_result {
+  /// Points at the beginning of the integral part in the buffer.
+  ///
+  /// When there's no sign character this points at the start of the buffer.
+  char* __integral;
+
+  /// Points at the radix point, when not present it's the same as \ref __last.
+  char* __radix_point;
+
+  /// Points at the exponent character, when not present it's the same as \ref __last.
+  char* __exponent;
+
+  /// Points beyond the last written element in the buffer.
+  char* __last;
+};
+
+/// Finds the position of the exponent character 'e' at the end of the buffer.
+///
+/// Assuming there is an exponent the input will terminate with
+/// eSdd and eSdddd (S = sign, d = digit)
+///
+/// \returns a pointer to the exponent or __last when not found.
+constexpr inline _LIBCPP_HIDE_FROM_ABI char* __find_exponent(char* __first, char* __last) {
+  ptrdiff_t __size = __last - __first;
+  if (__size > 4) {
+    __first = __last - _VSTD::min(__size, ptrdiff_t(6));
+    for (; __first != __last - 3; ++__first) {
+      if (*__first == 'e')
+        return __first;
+    }
+  }
+  return __last;
+}
+
+template <class _Fp, class _Tp>
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_default(const __float_buffer<_Fp>& __buffer, _Tp __value,
+                                                             char* __integral) {
+  __float_result __result;
+  __result.__integral = __integral;
+  __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value);
+
+  __result.__exponent = __format_spec::__find_exponent(__result.__integral, __result.__last);
+
+  // Constrains:
+  // - There's at least one decimal digit before the radix point.
+  // - The radix point, when present, is placed before the exponent.
+  __result.__radix_point = _VSTD::find(__result.__integral + 1, __result.__exponent, '.');
+
+  // When the radix point isn't found its position is the exponent instead of
+  // __result.__last.
+  if (__result.__radix_point == __result.__exponent)
+    __result.__radix_point = __result.__last;
+
+  // clang-format off
+  _LIBCPP_ASSERT((__result.__integral != __result.__last) &&
+                 (__result.__radix_point == __result.__last || *__result.__radix_point == '.') &&
+                 (__result.__exponent == __result.__last || *__result.__exponent == 'e'),
+                 "Post-condition failure.");
+  // clang-format on
+
+  return __result;
+}
+
+template <class _Fp, class _Tp>
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_hexadecimal_lower_case(const __float_buffer<_Fp>& __buffer,
+                                                                            _Tp __value, int __precision,
+                                                                            char* __integral) {
+  __float_result __result;
+  __result.__integral = __integral;
+  if (__precision == -1)
+    __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::hex);
+  else
+    __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::hex, __precision);
+
+  // H = one or more hex-digits
+  // S = sign
+  // D = one or more decimal-digits
+  // When the fractional part is zero and no precision the output is 0p+0
+  // else the output is                                              0.HpSD
+  // So testing the second position can differentiate between these two cases.
+  char* __first = __integral + 1;
+  if (*__first == '.') {
+    __result.__radix_point = __first;
+    // One digit is the minimum
+    // 0.hpSd
+    //       ^-- last
+    //     ^---- integral = end of search
+    // ^-------- start of search
+    // 0123456
+    //
+    // Four digits is the maximum
+    // 0.hpSdddd
+    //          ^-- last
+    //        ^---- integral = end of search
+    //    ^-------- start of search
+    // 0123456789
+    static_assert(__traits<_Fp>::__hex_precision_digits <= 4, "Guard against possible underflow.");
+
+    char* __last = __result.__last - 2;
+    __first = __last - __traits<_Fp>::__hex_precision_digits;
+    __result.__exponent = _VSTD::find(__first, __last, 'p');
+  } else {
+    __result.__radix_point = __result.__last;
+    __result.__exponent = __first;
+  }
+
+  // clang-format off
+  _LIBCPP_ASSERT((__result.__integral != __result.__last) &&
+                 (__result.__radix_point == __result.__last || *__result.__radix_point == '.') &&
+                 (__result.__exponent != __result.__last && *__result.__exponent == 'p'),
+                 "Post-condition failure.");
+  // clang-format on
+
+  return __result;
+}
+
+template <class _Fp, class _Tp>
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_hexadecimal_upper_case(const __float_buffer<_Fp>& __buffer,
+                                                                            _Tp __value, int __precision,
+                                                                            char* __integral) {
+  __float_result __result =
+      __format_spec::__format_buffer_hexadecimal_lower_case(__buffer, __value, __precision, __integral);
+  _VSTD::transform(__result.__integral, __result.__exponent, __result.__integral, __hex_to_upper);
+  *__result.__exponent = 'P';
+  return __result;
+}
+
+template <class _Fp, class _Tp>
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_scientific_lower_case(const __float_buffer<_Fp>& __buffer,
+                                                                           _Tp __value, int __precision,
+                                                                           char* __integral) {
+  __float_result __result;
+  __result.__integral = __integral;
+  __result.__last =
+      __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::scientific, __precision);
+
+  char* __first = __integral + 1;
+  _LIBCPP_ASSERT(__first != __result.__last, "No exponent present");
+  if (*__first == '.') {
+    __result.__radix_point = __first;
+    __result.__exponent = __format_spec::__find_exponent(__first + 1, __result.__last);
+  } else {
+    __result.__radix_point = __result.__last;
+    __result.__exponent = __first;
+  }
+
+  // clang-format off
+  _LIBCPP_ASSERT((__result.__integral != __result.__last) &&
+                 (__result.__radix_point == __result.__last || *__result.__radix_point == '.') &&
+                 (__result.__exponent != __result.__last && *__result.__exponent == 'e'),
+                 "Post-condition failure.");
+  // clang-format on
+  return __result;
+}
+
+template <class _Fp, class _Tp>
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_scientific_upper_case(const __float_buffer<_Fp>& __buffer,
+                                                                           _Tp __value, int __precision,
+                                                                           char* __integral) {
+  __float_result __result =
+      __format_spec::__format_buffer_scientific_lower_case(__buffer, __value, __precision, __integral);
+  *__result.__exponent = 'E';
+  return __result;
+}
+
+template <class _Fp, class _Tp>
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_fixed(const __float_buffer<_Fp>& __buffer, _Tp __value,
+                                                           int __precision, char* __integral) {
+  __float_result __result;
+  __result.__integral = __integral;
+  __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::fixed, __precision);
+
+  // When there's no precision there's no radix point.
+  // Else the radix point is placed at __precision + 1 from the end.
+  // By converting __precision to a bool the subtraction can be done
+  // unconditionally.
+  __result.__radix_point = __result.__last - (__precision + bool(__precision));
+  __result.__exponent = __result.__last;
+
+  // clang-format off
+  _LIBCPP_ASSERT((__result.__integral != __result.__last) &&
+                 (__result.__radix_point == __result.__last || *__result.__radix_point == '.') &&
+                 (__result.__exponent == __result.__last),
+                 "Post-condition failure.");
+  // clang-format on
+  return __result;
+}
+
+template <class _Fp, class _Tp>
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_general_lower_case(__float_buffer<_Fp>& __buffer, _Tp __value,
+                                                                        int __precision, char* __integral) {
+
+  __buffer.__remove_trailing_zeros();
+
+  __float_result __result;
+  __result.__integral = __integral;
+  __result.__last = __format_spec::__to_buffer(__integral, __buffer.end(), __value, chars_format::general, __precision);
+
+  char* __first = __integral + 1;
+  if (__first == __result.__last) {
+    __result.__radix_point = __result.__last;
+    __result.__exponent = __result.__last;
+  } else {
+    __result.__exponent = __format_spec::__find_exponent(__first, __result.__last);
+    if (__result.__exponent != __result.__last)
+      // In scientific mode if there's a radix point it will always be after
+      // the first digit. (This is the position __first points at).
+      __result.__radix_point = *__first == '.' ? __first : __result.__last;
+    else {
+      // In fixed mode the algorithm truncates trailing spaces and possibly the
+      // radix point. There's no good guess for the position of the radix point
+      // therefore scan the output after the first digit.
+      __result.__radix_point = _VSTD::find(__first, __result.__last, '.');
+    }
+  }
+
+  // clang-format off
+  _LIBCPP_ASSERT((__result.__integral != __result.__last) &&
+                 (__result.__radix_point == __result.__last || *__result.__radix_point == '.') &&
+                 (__result.__exponent == __result.__last || *__result.__exponent == 'e'),
+                 "Post-condition failure.");
+  // clang-format on
+
+  return __result;
+}
+
+template <class _Fp, class _Tp>
+_LIBCPP_HIDE_FROM_ABI __float_result __format_buffer_general_upper_case(__float_buffer<_Fp>& __buffer, _Tp __value,
+                                                                        int __precision, char* __integral) {
+  __float_result __result =
+      __format_spec::__format_buffer_general_lower_case(__buffer, __value, __precision, __integral);
+  if (__result.__exponent != __result.__last)
+    *__result.__exponent = 'E';
+  return __result;
+}
+
+#    ifndef _LIBCPP_HAS_NO_LOCALIZATION
+template <class _OutIt, class _Fp, class _CharT>
+_LIBCPP_HIDE_FROM_ABI _OutIt __format_locale_specific_form(_OutIt __out_it, const __float_buffer<_Fp>& __buffer,
+                                                           const __float_result& __result, _VSTD::locale __loc,
+                                                           size_t __width, _Flags::_Alignment __alignment,
+                                                           _CharT __fill) {
+  const auto& __np = use_facet<numpunct<_CharT>>(__loc);
+  string __grouping = __np.grouping();
+  char* __first = __result.__integral;
+  // When no radix point or exponent are present __last will be __result.__last.
+  char* __last = _VSTD::min(__result.__radix_point, __result.__exponent);
+
+  ptrdiff_t __digits = __last - __first;
+  if (!__grouping.empty()) {
+    if (__digits <= __grouping[0])
+      __grouping.clear();
+    else
+      __grouping = __determine_grouping(__digits, __grouping);
+  }
+
+  size_t __size = __result.__last - __buffer.begin() + // Formatted string
+                  __buffer.__num_trailing_zeros() +    // Not yet rendered zeros
+                  __grouping.size() -                  // Grouping contains one
+                  !__grouping.empty();                 // additional character
+
+  __formatter::__padding_size_result __padding = {0, 0};
+  bool __zero_padding = __alignment == _Flags::_Alignment::__default;
+  if (__size < __width) {
+    if (__zero_padding) {
+      __alignment = _Flags::_Alignment::__right;
+      __fill = _CharT('0');
+    }
+
+    __padding = __formatter::__padding_size(__size, __width, __alignment);
+  }
+
+  // sign and (zero padding or alignment)
+  if (__zero_padding && __first != __buffer.begin())
+    *__out_it++ = *__buffer.begin();
+  __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __padding.__before, __fill);
+  if (!__zero_padding && __first != __buffer.begin())
+    *__out_it++ = *__buffer.begin();
+
+  // integral part
+  if (__grouping.empty()) {
+    __out_it = _VSTD::copy_n(__first, __digits, _VSTD::move(__out_it));
+  } else {
+    auto __r = __grouping.rbegin();
+    auto __e = __grouping.rend() - 1;
+    _CharT __sep = __np.thousands_sep();
+    // The output is divided in small groups of numbers to write:
+    // - A group before the first separator.
+    // - A separator and a group, repeated for the number of separators.
+    // - A group after the last separator.
+    // This loop achieves that process by testing the termination condition
+    // midway in the loop.
+    while (true) {
+      __out_it = _VSTD::copy_n(__first, *__r, _VSTD::move(__out_it));
+      __first += *__r;
+
+      if (__r == __e)
+        break;
+
+      ++__r;
+      *__out_it++ = __sep;
+    }
+  }
+
+  // fractional part
+  if (__result.__radix_point != __result.__last) {
+    *__out_it++ = __np.decimal_point();
+    __out_it = _VSTD::copy(__result.__radix_point + 1, __result.__exponent, _VSTD::move(__out_it));
+    __out_it = _VSTD::fill_n(_VSTD::move(__out_it), __buffer.__num_trailing_zeros(), _CharT('0'));
+  }
+
+  // exponent
+  if (__result.__exponent != __result.__last)
+    __out_it = _VSTD::copy(__result.__exponent, __result.__last, _VSTD::move(__out_it));
+
+  // alignment
+  return _VSTD::fill_n(_VSTD::move(__out_it), __padding.__after, __fill);
+}
+
+#    endif // _LIBCPP_HAS_NO_LOCALIZATION
+
+template <__formatter::__char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __formatter_floating_point : public __parser_floating_point<_CharT> {
+public:
+  template <floating_point _Tp>
+  _LIBCPP_HIDE_FROM_ABI auto format(_Tp __value, auto& __ctx) -> decltype(__ctx.out()) {
+    if (this->__width_needs_substitution())
+      this->__substitute_width_arg_id(__ctx.arg(this->__width));
+
+    bool __negative = _VSTD::signbit(__value);
+
+    if (!_VSTD::isfinite(__value)) [[unlikely]]
+      return __format_non_finite(__ctx.out(), __negative, _VSTD::isnan(__value));
+
+    bool __has_precision = this->__has_precision_field();
+    if (this->__precision_needs_substitution())
+      this->__substitute_precision_arg_id(__ctx.arg(this->__precision));
+
+    // Depending on the std-format-spec string the sign and the value
+    // might not be outputted together:
+    // - zero-padding may insert additional '0' characters.
+    // Therefore the value is processed as a non negative value.
+    // The function @ref __insert_sign will insert a '-' when the value was
+    // negative.
+
+    if (__negative)
+      __value = _VSTD::copysign(__value, +1.0);
+
+    // TODO FMT _Fp should just be _Tp when to_chars has proper long double support.
+    using _Fp = conditional_t<same_as<_Tp, long double>, double, _Tp>;
+    // Force the type of the precision to avoid -1 to become an unsigned value.
+    __float_buffer<_Fp> __buffer(__has_precision ? int(this->__precision) : -1);
+    __float_result __result = __format_buffer(__buffer, __value, __negative, __has_precision);
+
+    if (this->__alternate_form && __result.__radix_point == __result.__last) {
+      *__result.__last++ = '.';
+
+      // When there is an exponent the point needs to be moved before the
+      // exponent. When there's no exponent the rotate does nothing. Since
+      // rotate tests whether the operation is a nop, call it unconditionally.
+      _VSTD::rotate(__result.__exponent, __result.__last - 1, __result.__last);
+      __result.__radix_point = __result.__exponent;
+
+      // The radix point is always placed before the exponent.
+      // - No exponent needs to point to the new last.
+      // - An exponent needs to move one position to the right.
+      // So it's safe to increment the value unconditionally.
+      ++__result.__exponent;
+    }
+
+#    ifndef _LIBCPP_HAS_NO_LOCALIZATION
+    if (this->__locale_specific_form)
+      return __format_spec::__format_locale_specific_form(__ctx.out(), __buffer, __result, __ctx.locale(),
+                                                          this->__width, this->__alignment, this->__fill);
+#    endif
+
+    ptrdiff_t __size = __result.__last - __buffer.begin();
+    int __num_trailing_zeros = __buffer.__num_trailing_zeros();
+    if (__size + __num_trailing_zeros >= this->__width) {
+      if (__num_trailing_zeros && __result.__exponent != __result.__last)
+        // Insert trailing zeros before exponent character.
+        return _VSTD::copy(__result.__exponent, __result.__last,
+                           _VSTD::fill_n(_VSTD::copy(__buffer.begin(), __result.__exponent, __ctx.out()),
+                                         __num_trailing_zeros, _CharT('0')));
+
+      return _VSTD::fill_n(_VSTD::copy(__buffer.begin(), __result.__last, __ctx.out()), __num_trailing_zeros,
+                           _CharT('0'));
+    }
+
+    auto __out_it = __ctx.out();
+    char* __first = __buffer.begin();
+    if (this->__alignment == _Flags::_Alignment::__default) {
+      // When there is a sign output it before the padding. Note the __size
+      // doesn't need any adjustment, regardless whether the sign is written
+      // here or in __formatter::__write.
+      if (__first != __result.__integral)
+        *__out_it++ = *__first++;
+      // After the sign is written, zero padding is the same a right alignment
+      // with '0'.
+      this->__alignment = _Flags::_Alignment::__right;
+      this->__fill = _CharT('0');
+    }
+
+    if (__num_trailing_zeros)
+      return __formatter::__write(_VSTD::move(__out_it), __first, __result.__last, __size, this->__width, this->__fill,
+                                  this->__alignment, __result.__exponent, __num_trailing_zeros);
+
+    return __formatter::__write(_VSTD::move(__out_it), __first, __result.__last, __size, this->__width, this->__fill,
+                                this->__alignment);
+  }
+
+private:
+  template <class _OutIt>
+  _LIBCPP_HIDE_FROM_ABI _OutIt __format_non_finite(_OutIt __out_it, bool __negative, bool __isnan) {
+    char __buffer[4];
+    char* __last = __insert_sign(__buffer, __negative, this->__sign);
+
+    // to_char can return inf, infinity, nan, and nan(n-char-sequence).
+    // The format library requires inf and nan.
+    // All in one expression to avoid dangling references.
+    __last = _VSTD::copy_n(&("infnanINFNAN"[6 * (this->__type == _Flags::_Type::__float_hexadecimal_upper_case ||
+                                                 this->__type == _Flags::_Type::__scientific_upper_case ||
+                                                 this->__type == _Flags::_Type::__fixed_upper_case ||
+                                                 this->__type == _Flags::_Type::__general_upper_case) +
+                                            3 * __isnan]),
+                           3, __last);
+
+    // [format.string.std]/13
+    // A zero (0) character preceding the width field pads the field with
+    // leading zeros (following any indication of sign or base) to the field
+    // width, except when applied to an infinity or NaN.
+    if (this->__alignment == _Flags::_Alignment::__default)
+      this->__alignment = _Flags::_Alignment::__right;
+
+    ptrdiff_t __size = __last - __buffer;
+    if (__size >= this->__width)
+      return _VSTD::copy_n(__buffer, __size, _VSTD::move(__out_it));
+
+    return __formatter::__write(_VSTD::move(__out_it), __buffer, __last, __size, this->__width, this->__fill,
+                                this->__alignment);
+  }
+
+  /// Fills the buffer with the data based on the requested formatting.
+  ///
+  /// This function, when needed, turns the characters to upper case and
+  /// determines the "interesting" locations which are returned to the caller.
+  ///
+  /// This means the caller never has to convert the contents of the buffer to
+  /// upper case or search for radix points and the location of the exponent.
+  /// This gives a bit of overhead. The original code didn't do that, but due
+  /// to the number of possible additional work needed to turn this number to
+  /// the proper output the code was littered with tests for upper cases and
+  /// searches for radix points and exponents.
+  /// - When a precision larger than the type's precision is selected
+  ///   additional zero characters need to be written before the exponent.
+  /// - alternate form needs to add a radix point when not present.
+  /// - localization needs to do grouping in the integral part.
+  template <class _Fp, class _Tp>
+  // TODO FMT _Fp should just be _Tp when to_chars has proper long double support.
+  _LIBCPP_HIDE_FROM_ABI __float_result __format_buffer(__float_buffer<_Fp>& __buffer, _Tp __value, bool __negative,
+                                                       bool __has_precision) {
+    char* __first = __insert_sign(__buffer.begin(), __negative, this->__sign);
+    switch (this->__type) {
+    case _Flags::_Type::__default:
+      return __format_spec::__format_buffer_default(__buffer, __value, __first);
+
+    case _Flags::_Type::__float_hexadecimal_lower_case:
+      return __format_spec::__format_buffer_hexadecimal_lower_case(
+          __buffer, __value, __has_precision ? __buffer.__precision() : -1, __first);
+
+    case _Flags::_Type::__float_hexadecimal_upper_case:
+      return __format_spec::__format_buffer_hexadecimal_upper_case(
+          __buffer, __value, __has_precision ? __buffer.__precision() : -1, __first);
+
+    case _Flags::_Type::__scientific_lower_case:
+      return __format_spec::__format_buffer_scientific_lower_case(__buffer, __value, __buffer.__precision(), __first);
+
+    case _Flags::_Type::__scientific_upper_case:
+      return __format_spec::__format_buffer_scientific_upper_case(__buffer, __value, __buffer.__precision(), __first);
+
+    case _Flags::_Type::__fixed_lower_case:
+    case _Flags::_Type::__fixed_upper_case:
+      return __format_spec::__format_buffer_fixed(__buffer, __value, __buffer.__precision(), __first);
+
+    case _Flags::_Type::__general_lower_case:
+      return __format_spec::__format_buffer_general_lower_case(__buffer, __value, __buffer.__precision(), __first);
+
+    case _Flags::_Type::__general_upper_case:
+      return __format_spec::__format_buffer_general_upper_case(__buffer, __value, __buffer.__precision(), __first);
+
+    default:
+      _LIBCPP_ASSERT(false, "The parser should have validated the type");
+      _LIBCPP_UNREACHABLE();
+    }
+  }
+};
+
+} //namespace __format_spec
+
+template <__formatter::__char_type _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<float, _CharT>
+    : public __format_spec::__formatter_floating_point<_CharT> {};
+template <__formatter::__char_type _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<double, _CharT>
+    : public __format_spec::__formatter_floating_point<_CharT> {};
+template <__formatter::__char_type _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<long double, _CharT>
+    : public __format_spec::__formatter_floating_point<_CharT> {};
+
+#  endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___FORMAT_FORMATTER_FLOATING_POINT_H
diff --git a/libcxx/include/__format/formatter_integral.h b/libcxx/include/__format/formatter_integral.h
index 73e2fed8c0b7b..f164ee6109748 100644
--- a/libcxx/include/__format/formatter_integral.h
+++ b/libcxx/include/__format/formatter_integral.h
@@ -82,7 +82,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 namespace __format_spec {
 
 /** Wrapper around @ref to_chars, returning the output pointer. */
-template <class _Tp>
+template <integral _Tp>
 _LIBCPP_HIDE_FROM_ABI char* __to_buffer(char* __first, char* __last,
                                         _Tp __value, int __base) {
   // TODO FMT Evaluate code overhead due to not calling the internal function
diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h
index 6eae27d9ad188..75fc626c84ac1 100644
--- a/libcxx/include/__format/parser_std_format_spec.h
+++ b/libcxx/include/__format/parser_std_format_spec.h
@@ -475,6 +475,21 @@ __parse_type(const _CharT* __begin, _Flags& __flags) {
   return ++__begin;
 }
 
+/**
+ * Process the parsed alignment and zero-padding state of arithmetic types.
+ *
+ * [format.string.std]/13
+ *   If the 0 character and an align option both appear, the 0 character is
+ *   ignored.
+ *
+ * For the formatter a @ref __default alignment means zero-padding.
+ */
+_LIBCPP_HIDE_FROM_ABI constexpr void __process_arithmetic_alignment(_Flags& __flags) {
+  __flags.__zero_padding &= __flags.__alignment == _Flags::_Alignment::__default;
+  if (!__flags.__zero_padding && __flags.__alignment == _Flags::_Alignment::__default)
+    __flags.__alignment = _Flags::_Alignment::__right;
+}
+
 /**
  * The parser for the std-format-spec.
  *
@@ -648,23 +663,9 @@ class _LIBCPP_TEMPLATE_VIS __parser_integral
     return __begin;
   }
 
-  /**
-   * Handles the post-parsing updates for the integer types.
-   *
-   * Updates the zero-padding and alignment for integer types.
-   *
-   * [format.string.std]/13
-   *   If the 0 character and an align option both appear, the 0 character is
-   *   ignored.
-   *
-   * For the formatter a @ref __default alignment means zero-padding. Update
-   * the alignment based on parsed format string.
-   */
+  /** Handles the post-parsing updates for the integer types. */
   _LIBCPP_HIDE_FROM_ABI constexpr void __handle_integer() noexcept {
-    this->__zero_padding &= this->__alignment == _Flags::_Alignment::__default;
-    if (!this->__zero_padding &&
-        this->__alignment == _Flags::_Alignment::__default)
-      this->__alignment = _Flags::_Alignment::__right;
+    __process_arithmetic_alignment(static_cast<_Flags&>(*this));
   }
 
   /**
@@ -701,7 +702,130 @@ class _LIBCPP_TEMPLATE_VIS __parser_integral
   }
 };
 
-// TODO FMT Add a parser for floating-point values.
+/**
+ * The parser for the std-format-spec.
+ *
+ * This implements the parser for the floating-point types.
+ *
+ * See @ref __parser_string.
+ */
+template <class _CharT>
+class _LIBCPP_TEMPLATE_VIS __parser_floating_point
+    : public __parser_width,              // provides __width(|as_arg)
+      public __parser_precision,          // provides __precision(|as_arg)
+      public __parser_fill_align<_CharT>, // provides __fill and uses __flags
+      public _Flags                       // provides __flags
+{
+public:
+  using char_type = _CharT;
+
+  /**
+   * The low-level std-format-spec parse function.
+   *
+   * @pre __begin points at the beginning of the std-format-spec. This means
+   * directly after the ':'.
+   * @pre The std-format-spec parses the entire input, or the first unmatched
+   * character is a '}'.
+   *
+   * @returns The iterator pointing at the last parsed character.
+   */
+  _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx)
+      -> decltype(__parse_ctx.begin()) {
+    auto __it = __parse(__parse_ctx);
+    __process_arithmetic_alignment(static_cast<_Flags&>(*this));
+    __process_display_type();
+    return __it;
+  }
+protected:
+  /**
+   * The low-level std-format-spec parse function.
+   *
+   * @pre __begin points at the beginning of the std-format-spec. This means
+   * directly after the ':'.
+   * @pre The std-format-spec parses the entire input, or the first unmatched
+   * character is a '}'.
+   *
+   * @returns The iterator pointing at the last parsed character.
+   */
+  _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx)
+      -> decltype(__parse_ctx.begin()) {
+    auto __begin = __parse_ctx.begin();
+    auto __end = __parse_ctx.end();
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parser_fill_align<_CharT>::__parse(__begin, __end,
+                                                   static_cast<_Flags&>(*this));
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parse_sign(__begin, static_cast<_Flags&>(*this));
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parse_alternate_form(__begin, static_cast<_Flags&>(*this));
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parse_zero_padding(__begin, static_cast<_Flags&>(*this));
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parser_precision::__parse(__begin, __end, __parse_ctx);
+    if (__begin == __end)
+      return __begin;
+
+    __begin =
+        __parse_locale_specific_form(__begin, static_cast<_Flags&>(*this));
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
+
+    if (__begin != __end && *__begin != _CharT('}'))
+      __throw_format_error(
+          "The format-spec should consume the input or end with a '}'");
+
+    return __begin;
+  }
+
+  /** Processes the parsed std-format-spec based on the parsed display type. */
+  _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() {
+    switch (this->__type) {
+    case _Flags::_Type::__default:
+      // When no precision specified then it keeps default since that
+      // formatting differs from the other types.
+      if (this->__has_precision_field())
+        this->__type = _Flags::_Type::__general_lower_case;
+      break;
+    case _Flags::_Type::__float_hexadecimal_lower_case:
+    case _Flags::_Type::__float_hexadecimal_upper_case:
+      // Precision specific behavior will be handled later.
+      break;
+    case _Flags::_Type::__scientific_lower_case:
+    case _Flags::_Type::__scientific_upper_case:
+    case _Flags::_Type::__fixed_lower_case:
+    case _Flags::_Type::__fixed_upper_case:
+    case _Flags::_Type::__general_lower_case:
+    case _Flags::_Type::__general_upper_case:
+      if (!this->__has_precision_field()) {
+        // Set the default precision for the call to to_chars.
+        this->__precision = 6;
+        this->__precision_as_arg = false;
+      }
+      break;
+
+    default:
+      __throw_format_error("The format-spec type has a type not supported for "
+                           "a floating-point argument");
+    }
+  }
+};
+
 // TODO FMT Add a parser for pointer values.
 
 /** Helper struct returned from @ref __get_string_alignment. */
diff --git a/libcxx/include/format b/libcxx/include/format
index 788b9c299abc0..3a186469dd5c0 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -277,6 +277,7 @@ namespace std {
 #include <__format/formatter.h>
 #include <__format/formatter_bool.h>
 #include <__format/formatter_char.h>
+#include <__format/formatter_floating_point.h>
 #include <__format/formatter_integer.h>
 #include <__format/formatter_string.h>
 #include <__format/parser_std_format_spec.h>
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index a927f9d0e6700..c940a6d11d816 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -488,25 +488,26 @@ module std [system] {
     export *
 
     module __format {
-      module format_arg             { private header "__format/format_arg.h" }
-      module format_args            { private header "__format/format_args.h" }
+      module format_arg               { private header "__format/format_arg.h" }
+      module format_args              { private header "__format/format_args.h" }
       module format_context {
         private header "__format/format_context.h"
         export optional
         export locale
       }
-      module format_error           { private header "__format/format_error.h" }
-      module format_fwd             { private header "__format/format_fwd.h" }
-      module format_parse_context   { private header "__format/format_parse_context.h" }
-      module format_string          { private header "__format/format_string.h" }
-      module format_to_n_result     { private header "__format/format_to_n_result.h" }
-      module formatter              { private header "__format/formatter.h" }
-      module formatter_bool         { private header "__format/formatter_bool.h" }
-      module formatter_char         { private header "__format/formatter_char.h" }
-      module formatter_integer      { private header "__format/formatter_integer.h" }
-      module formatter_integral     { private header "__format/formatter_integral.h" }
-      module formatter_string       { private header "__format/formatter_string.h" }
-      module parser_std_format_spec { private header "__format/parser_std_format_spec.h" }
+      module format_error             { private header "__format/format_error.h" }
+      module format_fwd               { private header "__format/format_fwd.h" }
+      module format_parse_context     { private header "__format/format_parse_context.h" }
+      module format_string            { private header "__format/format_string.h" }
+      module format_to_n_result       { private header "__format/format_to_n_result.h" }
+      module formatter                { private header "__format/formatter.h" }
+      module formatter_bool           { private header "__format/formatter_bool.h" }
+      module formatter_char           { private header "__format/formatter_char.h" }
+      module formatter_floating_point { private header "__format/formatter_floating_point.h" }
+      module formatter_integer        { private header "__format/formatter_integer.h" }
+      module formatter_integral       { private header "__format/formatter_integral.h" }
+      module formatter_string         { private header "__format/formatter_string.h" }
+      module parser_std_format_spec   { private header "__format/parser_std_format_spec.h" }
     }
   }
   module forward_list {
diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_floating_point.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_floating_point.module.verify.cpp
new file mode 100644
index 0000000000000..35f19dcf76a73
--- /dev/null
+++ b/libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_floating_point.module.verify.cpp
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: modules-build
+
+// WARNING: This test was generated by 'generate_private_header_tests.py'
+// and should not be edited manually.
+
+// expected-error@*:* {{use of private header from outside its module: '__format/formatter_floating_point.h'}}
+#include <__format/formatter_floating_point.h>
diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_floating_point.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_floating_point.pass.cpp
new file mode 100644
index 0000000000000..caf38a7e520a0
--- /dev/null
+++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_floating_point.pass.cpp
@@ -0,0 +1,353 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+// UNSUPPORTED: libcpp-has-no-incomplete-format
+
+// <format>
+
+// Tests the parsing of the format string as specified in [format.string.std].
+// It validates whether the std-format-spec is valid for a floating-point type.
+
+#include <format>
+#include <cassert>
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#  include <iostream>
+#endif
+
+#include "concepts_precision.h"
+#include "test_macros.h"
+#include "make_string.h"
+#include "test_exception.h"
+
+#define CSTR(S) MAKE_CSTRING(CharT, S)
+
+using namespace std::__format_spec;
+
+template <class CharT>
+using Parser = __parser_floating_point<CharT>;
+
+template <class CharT>
+struct Expected {
+  CharT fill = CharT(' ');
+  _Flags::_Alignment alignment = _Flags::_Alignment::__right;
+  _Flags::_Sign sign = _Flags::_Sign::__default;
+  bool alternate_form = false;
+  bool zero_padding = false;
+  uint32_t width = 0;
+  bool width_as_arg = false;
+  uint32_t precision = std::__format::__number_max;
+  bool precision_as_arg = true;
+  bool locale_specific_form = false;
+  _Flags::_Type type = _Flags::_Type::__default;
+};
+
+template <class CharT>
+constexpr void test(Expected<CharT> expected, size_t size, std::basic_string_view<CharT> fmt) {
+  // Initialize parser with sufficient arguments to avoid the parsing to fail
+  // due to insufficient arguments.
+  std::basic_format_parse_context<CharT> parse_ctx(fmt, std::__format::__number_max);
+  auto begin = parse_ctx.begin();
+  auto end = parse_ctx.end();
+  Parser<CharT> parser;
+  auto it = parser.parse(parse_ctx);
+
+  assert(begin == parse_ctx.begin());
+  assert(end == parse_ctx.end());
+
+  assert(begin + size == it);
+  assert(parser.__fill == expected.fill);
+  assert(parser.__alignment == expected.alignment);
+  assert(parser.__sign == expected.sign);
+  assert(parser.__alternate_form == expected.alternate_form);
+  assert(parser.__zero_padding == expected.zero_padding);
+  assert(parser.__width == expected.width);
+  assert(parser.__width_as_arg == expected.width_as_arg);
+  assert(parser.__precision == expected.precision);
+  assert(parser.__precision_as_arg == expected.precision_as_arg);
+  assert(parser.__locale_specific_form == expected.locale_specific_form);
+  assert(parser.__type == expected.type);
+}
+
+template <class CharT>
+constexpr void test(Expected<CharT> expected, size_t size, const CharT* f) {
+  // The format-spec is valid if completely consumed or terminates at a '}'.
+  // The valid inputs all end with a '}'. The test is executed twice:
+  // - first with the terminating '}',
+  // - second consuming the entire input.
+  std::basic_string_view<CharT> fmt{f};
+  assert(fmt.back() == CharT('}') && "Pre-condition failure");
+
+  test(expected, size, fmt);
+  fmt.remove_suffix(1);
+  test(expected, size, fmt);
+}
+
+template <class CharT>
+constexpr void test() {
+  Parser<CharT> parser;
+
+  assert(parser.__fill == CharT(' '));
+  assert(parser.__alignment == _Flags::_Alignment::__default);
+  assert(parser.__sign == _Flags::_Sign::__default);
+  assert(parser.__alternate_form == false);
+  assert(parser.__zero_padding == false);
+  assert(parser.__width == 0);
+  assert(parser.__width_as_arg == false);
+  assert(parser.__precision == std::__format::__number_max);
+  assert(parser.__precision_as_arg == true);
+  assert(parser.__locale_specific_form == false);
+  assert(parser.__type == _Flags::_Type::__default);
+
+  // Depending on whether or not a precision is specified the results differ.
+  // Table 65: Meaning of type options for floating-point types [tab:format.type.float]
+
+  test({}, 0, CSTR("}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 2, CSTR(".0}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__general_lower_case}, 4, CSTR(".{1}}"));
+
+  test({.type = _Flags::_Type::__float_hexadecimal_lower_case}, 1, CSTR("a}"));
+  test({.type = _Flags::_Type::__float_hexadecimal_upper_case}, 1, CSTR("A}"));
+
+  test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__scientific_lower_case}, 1, CSTR("e}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__scientific_lower_case}, 3, CSTR(".0e}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__scientific_lower_case}, 5, CSTR(".{1}e}"));
+  test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__scientific_upper_case}, 1, CSTR("E}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__scientific_upper_case}, 3, CSTR(".0E}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__scientific_upper_case}, 5, CSTR(".{1}E}"));
+
+  test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__fixed_lower_case}, 1, CSTR("f}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__fixed_lower_case}, 3, CSTR(".0f}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__fixed_lower_case}, 5, CSTR(".{1}f}"));
+  test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__fixed_upper_case}, 1, CSTR("F}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__fixed_upper_case}, 3, CSTR(".0F}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__fixed_upper_case}, 5, CSTR(".{1}F}"));
+
+  test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 1, CSTR("g}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 3, CSTR(".0g}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__general_lower_case}, 5, CSTR(".{1}g}"));
+  test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__general_upper_case}, 1, CSTR("G}"));
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__general_upper_case}, 3, CSTR(".0G}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__general_upper_case}, 5, CSTR(".{1}G}"));
+
+  // *** Align-fill ***
+  test({.alignment = _Flags::_Alignment::__left}, 1, CSTR("<}"));
+  test({.alignment = _Flags::_Alignment::__center}, 1, "^}");
+  test({.alignment = _Flags::_Alignment::__right}, 1, ">}");
+
+  test({.fill = CharT('L'), .alignment = _Flags::_Alignment::__left}, 2, CSTR("L<}"));
+  test({.fill = CharT('#'), .alignment = _Flags::_Alignment::__center}, 2, CSTR("#^}"));
+  test({.fill = CharT('0'), .alignment = _Flags::_Alignment::__right}, 2, CSTR("0>}"));
+
+  test_exception<Parser<CharT>>("The format-spec fill field contains an invalid character", CSTR("{<"));
+  test_exception<Parser<CharT>>("The format-spec fill field contains an invalid character", CSTR("}<"));
+
+  // *** Sign ***
+  test({.sign = _Flags::_Sign::__minus}, 1, CSTR("-}"));
+  test({.sign = _Flags::_Sign::__plus}, 1, CSTR("+}"));
+  test({.sign = _Flags::_Sign::__space}, 1, CSTR(" }"));
+
+  // *** Alternate form ***
+  test({.alternate_form = true}, 1, CSTR("#}"));
+
+  // *** Zero padding ***
+  // TODO FMT What to do with zero-padding without a width?
+  // [format.string.std]/13
+  //   A zero (0) character preceding the width field pads the field with
+  //   leading zeros (following any indication of sign or base) to the field
+  //   width, except when applied to an infinity or NaN.
+  // Obviously it makes no sense, but should it be allowed or is it a format
+  // error?
+  test({.alignment = _Flags::_Alignment::__default, .zero_padding = true}, 1, CSTR("0}"));
+  test({.alignment = _Flags::_Alignment::__left, .zero_padding = false}, 2, CSTR("<0}"));
+  test({.alignment = _Flags::_Alignment::__center, .zero_padding = false}, 2, CSTR("^0}"));
+  test({.alignment = _Flags::_Alignment::__right, .zero_padding = false}, 2, CSTR(">0}"));
+
+  // *** Width ***
+  test({.width = 0, .width_as_arg = false}, 0, CSTR("}"));
+  test({.width = 1, .width_as_arg = false}, 1, CSTR("1}"));
+  test({.width = 10, .width_as_arg = false}, 2, CSTR("10}"));
+  test({.width = 1000, .width_as_arg = false}, 4, CSTR("1000}"));
+  test({.width = 1000000, .width_as_arg = false}, 7, CSTR("1000000}"));
+
+  test({.width = 0, .width_as_arg = true}, 2, CSTR("{}}"));
+  test({.width = 0, .width_as_arg = true}, 3, CSTR("{0}}"));
+  test({.width = 1, .width_as_arg = true}, 3, CSTR("{1}}"));
+
+  test_exception<Parser<CharT>>("A format-spec width field shouldn't have a leading zero", CSTR("00"));
+
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  test({.width = 2'147'483'647, .width_as_arg = false}, 10, CSTR("2147483647}"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR("2147483648"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR("5000000000"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR("10000000000"));
+
+  test_exception<Parser<CharT>>("End of input while parsing format-spec arg-id", CSTR("{"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR("{0"));
+  test_exception<Parser<CharT>>("The arg-id of the format-spec starts with an invalid character", CSTR("{a"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR("{1"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR("{9"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR("{9:"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR("{9a"));
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  // Note the static_assert tests whether the arg-id is valid.
+  // Therefore the following should be true arg-id < __format::__number_max.
+  test({.width = 2'147'483'646, .width_as_arg = true}, 12, CSTR("{2147483646}}"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR("{2147483648}"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR("{5000000000}"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR("{10000000000}"));
+
+  // *** Precision ***
+  test({.precision = 0, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 2, CSTR(".0}"));
+  test({.precision = 1, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 2, CSTR(".1}"));
+  test({.precision = 10, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 3, CSTR(".10}"));
+  test({.precision = 1000, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 5, CSTR(".1000}"));
+  test({.precision = 1000000, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 8,
+       CSTR(".1000000}"));
+
+  test({.precision = 0, .precision_as_arg = true, .type = _Flags::_Type::__general_lower_case}, 3, CSTR(".{}}"));
+  test({.precision = 0, .precision_as_arg = true, .type = _Flags::_Type::__general_lower_case}, 4, CSTR(".{0}}"));
+  test({.precision = 1, .precision_as_arg = true, .type = _Flags::_Type::__general_lower_case}, 4, CSTR(".{1}}"));
+
+  test_exception<Parser<CharT>>("The format-spec precision field doesn't contain a value or arg-id", CSTR(".a"));
+  test_exception<Parser<CharT>>("The format-spec precision field doesn't contain a value or arg-id", CSTR(".:"));
+
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  test({.precision = 2'147'483'647, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 11,
+       CSTR(".2147483647}"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR(".2147483648"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR(".5000000000"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR(".10000000000"));
+
+  test_exception<Parser<CharT>>("End of input while parsing format-spec arg-id", CSTR(".{"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR(".{0"));
+  test_exception<Parser<CharT>>("The arg-id of the format-spec starts with an invalid character", CSTR(".{a"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR(".{1"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR(".{9"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR(".{9:"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR(".{9a"));
+
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  // Note the static_assert tests whether the arg-id is valid.
+  // Therefore the following should be true arg-id < __format::__number_max.
+  test({.precision = 2'147'483'646, .precision_as_arg = true, .type = _Flags::_Type::__general_lower_case}, 13,
+       CSTR(".{2147483646}}"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR(".{2147483648}"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR(".{5000000000}"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR(".{10000000000}"));
+
+  // *** Width & Precision ***
+  test({.width = 1,
+        .width_as_arg = false,
+        .precision = 0,
+        .precision_as_arg = false,
+        .type = _Flags::_Type::__general_lower_case},
+       3, CSTR("1.0}"));
+  test({.width = 0,
+        .width_as_arg = true,
+        .precision = 1,
+        .precision_as_arg = true,
+        .type = _Flags::_Type::__general_lower_case},
+       5, CSTR("{}.{}}"));
+  test({.width = 10,
+        .width_as_arg = true,
+        .precision = 9,
+        .precision_as_arg = true,
+        .type = _Flags::_Type::__general_lower_case},
+       8, CSTR("{10}.{9}}"));
+
+  // *** Locale-specific form ***
+  test({.locale_specific_form = true}, 1, CSTR("L}"));
+
+  // *** Type ***
+  {
+    const char* unsuported_type = "The format-spec type has a type not supported for a floating-point argument";
+    const char* not_a_type = "The format-spec should consume the input or end with a '}'";
+
+    test({.type = _Flags::_Type::__float_hexadecimal_upper_case}, 1, CSTR("A}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("B}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("C}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("D}"));
+    test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__scientific_upper_case}, 1, CSTR("E}"));
+    test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__fixed_upper_case}, 1, CSTR("F}"));
+    test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__general_upper_case}, 1, CSTR("G}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("H}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("I}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("J}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("K}"));
+    test({.locale_specific_form = true}, 1, CSTR("L}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("M}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("N}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("O}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("P}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("Q}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("R}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("S}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("T}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("U}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("V}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("W}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("X}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("Y}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("Z}"));
+
+    test({.type = _Flags::_Type::__float_hexadecimal_lower_case}, 1, CSTR("a}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("b}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("c}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("d}"));
+    test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__scientific_lower_case}, 1, CSTR("e}"));
+    test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__fixed_lower_case}, 1, CSTR("f}"));
+    test({.precision = 6, .precision_as_arg = false, .type = _Flags::_Type::__general_lower_case}, 1, CSTR("g}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("h}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("i}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("j}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("k}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("l}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("m}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("n}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("o}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("p}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("q}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("r}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("s}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("t}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("u}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("v}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("w}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("x}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("y}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("z}"));
+  }
+  // **** General ***
+  test_exception<Parser<CharT>>("The format-spec should consume the input or end with a '}'", CSTR("ss"));
+}
+
+constexpr bool test() {
+  test<char>();
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test<wchar_t>();
+#endif
+
+  return true;
+}
+
+int main(int, char**) {
+#if !defined(_WIN32) && !defined(_AIX)
+  // Make sure the parsers match the expectations. The layout of the
+  // subobjects is chosen to minimize the size required.
+  static_assert(sizeof(Parser<char>) == 3 * sizeof(uint32_t));
+#  ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  static_assert(sizeof(Parser<wchar_t>) == (sizeof(wchar_t) <= 2 ? 3 * sizeof(uint32_t) : 4 * sizeof(uint32_t)));
+#  endif
+#endif
+
+  test();
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.floating_point.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.floating_point.pass.cpp
index 1ba1189b512c5..0e8e95d2762f6 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.floating_point.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.floating_point.pass.cpp
@@ -8,7 +8,6 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
-// UNSUPPORTED: LIBCXX-DEBUG-FIXME
 
 // <format>
 
@@ -25,11 +24,14 @@
 // - double
 // - long double
 
-// TODO FMT Enable after floating-point support has been enabled
-#if 0
 #include <format>
+
+#include <array>
 #include <cassert>
+#include <cmath>
+#include <charconv>
 #include <concepts>
+#include <string>
 #include <type_traits>
 
 #include "test_macros.h"
@@ -37,9 +39,8 @@
 
 #define STR(S) MAKE_STRING(CharT, S)
 
-template <class StringViewT, class ArithmeticT>
-void test(StringViewT fmt, ArithmeticT arg) {
-  using CharT = typename StringViewT::value_type;
+template <class CharT, class ArithmeticT>
+void test(std::basic_string_view<CharT> fmt, ArithmeticT arg, std::basic_string<CharT> expected) {
   auto parse_ctx = std::basic_format_parse_context<CharT>(fmt);
   std::formatter<ArithmeticT, CharT> formatter;
   static_assert(std::semiregular<decltype(formatter)>);
@@ -51,15 +52,19 @@ void test(StringViewT fmt, ArithmeticT arg) {
   auto out = std::back_inserter(result);
   using FormatCtxT = std::basic_format_context<decltype(out), CharT>;
 
-  auto format_ctx = std::__format_context_create<decltype(out), CharT>(
-      out, std::make_format_args<FormatCtxT>(arg));
+  auto format_ctx = std::__format_context_create<decltype(out), CharT>(out, std::make_format_args<FormatCtxT>(arg));
   formatter.format(arg, format_ctx);
-  std::string expected = std::to_string(arg);
-  assert(result == std::basic_string<CharT>(expected.begin(), expected.end()));
+
+  if (expected.empty()) {
+    std::array<char, 128> buffer;
+    expected.append(buffer.begin(), std::to_chars(buffer.begin(), buffer.end(), arg).ptr);
+  }
+
+  assert(result == expected);
 }
 
 template <class StringT, class ArithmeticT>
-void test_termination_condition(StringT f, ArithmeticT arg) {
+void test_termination_condition(StringT f, ArithmeticT arg, StringT expected = {}) {
   // The format-spec is valid if completely consumed or terminates at a '}'.
   // The valid inputs all end with a '}'. The test is executed twice:
   // - first with the terminating '}',
@@ -68,40 +73,398 @@ void test_termination_condition(StringT f, ArithmeticT arg) {
   std::basic_string_view<CharT> fmt{f};
   assert(fmt.back() == CharT('}') && "Pre-condition failure");
 
-  test(fmt, arg);
+  test(fmt, arg, expected);
   fmt.remove_suffix(1);
-  test(fmt, arg);
+  test(fmt, arg, expected);
+}
+
+template <class CharT, class ArithmeticT>
+void test_hex_lower_case_precision(ArithmeticT value) {
+  std::array<char, 25'000> buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::hex, 20'000).ptr;
+  test_termination_condition(STR(".20000a}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000a}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000a}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000a}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000a}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::hex, 20'000).ptr;
+  test_termination_condition(STR(".20000La}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000La}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000La}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000La}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000La}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#endif
+}
+
+template <class CharT, class ArithmeticT>
+void test_hex_upper_case_precision(ArithmeticT value) {
+  std::array<char, 25'000> buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::hex, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000A}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000A}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000A}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000A}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000A}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::hex, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000LA}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000LA}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000LA}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000LA}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000LA}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#endif
+}
+
+template <class CharT, class ArithmeticT>
+void test_scientific_lower_case_precision(ArithmeticT value) {
+  std::array<char, 25'000> buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::scientific, 20'000).ptr;
+  test_termination_condition(STR(".20000e}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000e}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000e}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000e}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000e}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::scientific, 20'000).ptr;
+  test_termination_condition(STR(".20000Le}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000Le}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000Le}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000Le}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000Le}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#endif
+}
+
+template <class CharT, class ArithmeticT>
+void test_scientific_upper_case_precision(ArithmeticT value) {
+  std::array<char, 25'000> buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::scientific, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000E}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000E}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000E}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000E}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000E}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::scientific, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000LE}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000LE}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000LE}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000LE}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000LE}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#endif
+}
+
+template <class CharT, class ArithmeticT>
+void test_fixed_lower_case_precision(ArithmeticT value) {
+  std::array<char, 25'000> buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::fixed, 20'000).ptr;
+  test_termination_condition(STR(".20000f}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000f}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000f}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000f}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000f}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::fixed, 20'000).ptr;
+  test_termination_condition(STR(".20000Lf}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000Lf}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000Lf}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000Lf}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000Lf}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#endif
+}
+
+template <class CharT, class ArithmeticT>
+void test_fixed_upper_case_precision(ArithmeticT value) {
+  std::array<char, 25'000> buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::fixed, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000F}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000F}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000F}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000F}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000F}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::fixed, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000LF}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000LF}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000LF}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000LF}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000LF}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#endif
+}
+
+template <class CharT, class ArithmeticT>
+void test_general_lower_case_precision(ArithmeticT value) {
+  std::array<char, 25'000> buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::general, 20'000).ptr;
+  test_termination_condition(STR(".20000g}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000g}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000g}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000g}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000g}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::general, 20'000).ptr;
+  test_termination_condition(STR(".20000Lg}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000Lg}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000Lg}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000Lg}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000Lg}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#endif
+}
+
+template <class CharT, class ArithmeticT>
+void test_general_upper_case_precision(ArithmeticT value) {
+  std::array<char, 25'000> buffer;
+  char* end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::general, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000G}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size_t size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000G}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000G}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000G}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000G}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  end = std::to_chars(buffer.begin(), buffer.end(), value, std::chars_format::general, 20'000).ptr;
+  std::transform(buffer.begin(), end, buffer.begin(), [](char c) { return std::toupper(c); });
+  test_termination_condition(STR(".20000LG}"), value, std::basic_string<CharT>{buffer.begin(), end});
+
+  size = buffer.end() - end;
+  std::fill_n(end, size, '#');
+  test_termination_condition(STR("#<25000.20000LG}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - (size / 2), buffer.end());
+  test_termination_condition(STR("#^25000.20000LG}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::rotate(buffer.begin(), buffer.end() - ((size + 1) / 2), buffer.end());
+  test_termination_condition(STR("#>25000.20000LG}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+  std::fill_n(buffer.begin(), size, '0');
+  if (std::signbit(value)) {
+    buffer[0] = '-';
+    buffer[size] = '0';
+  }
+  test_termination_condition(STR("025000.20000LG}"), value, std::basic_string<CharT>{buffer.begin(), buffer.end()});
+#endif
+}
+
+template <class CharT, class ArithmeticT>
+void test_value(ArithmeticT value) {
+  test_hex_lower_case_precision<CharT>(value);
+  test_hex_upper_case_precision<CharT>(value);
+
+  test_scientific_lower_case_precision<CharT>(value);
+  test_scientific_upper_case_precision<CharT>(value);
+
+  test_fixed_lower_case_precision<CharT>(value);
+  test_fixed_upper_case_precision<CharT>(value);
+
+  test_general_lower_case_precision<CharT>(value);
+  test_general_upper_case_precision<CharT>(value);
+}
+
+template <class ArithmeticT, class CharT>
+void test_special_values() {
+  using A = ArithmeticT;
+
+  test_value<CharT>(-std::numeric_limits<A>::max());
+  test_value<CharT>(A(-1.0));
+  test_value<CharT>(-std::numeric_limits<A>::min());
+  test_value<CharT>(-std::numeric_limits<A>::denorm_min());
+  test_value<CharT>(A(-0.0));
+
+  test_value<CharT>(A(0.0));
+  test_value<CharT>(std::numeric_limits<A>::denorm_min());
+  test_value<CharT>(A(1.0));
+  test_value<CharT>(std::numeric_limits<A>::min());
+  test_value<CharT>(std::numeric_limits<A>::max());
 }
 
 template <class ArithmeticT, class CharT>
 void test_float_type() {
   using A = ArithmeticT;
+
   test_termination_condition(STR("}"), A(-std::numeric_limits<float>::max()));
   test_termination_condition(STR("}"), A(-std::numeric_limits<float>::min()));
   test_termination_condition(STR("}"), A(-0.0));
+
   test_termination_condition(STR("}"), A(0.0));
   test_termination_condition(STR("}"), A(std::numeric_limits<float>::min()));
   test_termination_condition(STR("}"), A(std::numeric_limits<float>::max()));
   if (sizeof(A) > sizeof(float)) {
-    test_termination_condition(STR("}"),
-                               A(-std::numeric_limits<double>::max()));
-    test_termination_condition(STR("}"),
-                               A(-std::numeric_limits<double>::min()));
+    test_termination_condition(STR("}"), A(-std::numeric_limits<double>::max()));
+    test_termination_condition(STR("}"), A(-std::numeric_limits<double>::min()));
     test_termination_condition(STR("}"), A(std::numeric_limits<double>::min()));
     test_termination_condition(STR("}"), A(std::numeric_limits<double>::max()));
   }
   if (sizeof(A) > sizeof(double)) {
-    test_termination_condition(STR("}"),
-                               A(-std::numeric_limits<long double>::max()));
-    test_termination_condition(STR("}"),
-                               A(-std::numeric_limits<long double>::min()));
-    test_termination_condition(STR("}"),
-                               A(std::numeric_limits<long double>::min()));
-    test_termination_condition(STR("}"),
-                               A(std::numeric_limits<long double>::max()));
+    test_termination_condition(STR("}"), A(-std::numeric_limits<long double>::max()));
+    test_termination_condition(STR("}"), A(-std::numeric_limits<long double>::min()));
+    test_termination_condition(STR("}"), A(std::numeric_limits<long double>::min()));
+    test_termination_condition(STR("}"), A(std::numeric_limits<long double>::max()));
   }
 
-  // TODO FMT Also test with special floating point values: +/-Inf NaN.
+  // The results of inf and nan may differ from the result of to_chars.
+  test_termination_condition(STR("}"), A(-std::numeric_limits<A>::infinity()), STR("-inf"));
+  test_termination_condition(STR("}"), A(std::numeric_limits<A>::infinity()), STR("inf"));
+
+  A nan = std::numeric_limits<A>::quiet_NaN();
+  test_termination_condition(STR("}"), std::copysign(nan, -1.0), STR("-nan"));
+  test_termination_condition(STR("}"), nan, STR("nan"));
+
+  // TODO FMT Enable long double testing
+  if constexpr (!std::same_as<A, long double>)
+    test_special_values<A, CharT>();
 }
 
 template <class CharT>
@@ -119,6 +482,3 @@ int main(int, char**) {
 
   return 0;
 }
-#else
-int main(int, char**) { return 0; }
-#endif
diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h
index 470da03b8b083..c2eeb236f99d0 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_tests.h
+++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h
@@ -13,6 +13,8 @@
 #include "make_string.h"
 #include "test_macros.h"
 
+#include <cmath>
+
 // In this file the following template types are used:
 // TestFunction must be callable as check(expected-result, string-to-format, args-to-format...)
 // ExceptionTest must be callable as check_exception(expected-exception, string-to-format, args-to-format...)
@@ -992,6 +994,1496 @@ void format_test_char_as_integer(TestFunction check,
         fmt, '*');
 }
 
+template <class F, class CharT, class TestFunction>
+void format_test_floating_point_hex_lower_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits<F>::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // Test whether the hexadecimal letters are the proper case.
+  // The precision is too large for float, so two tests are used.
+  check(STR("answer is '1.abcp+0'"), STR("answer is '{:a}'"), F(0x1.abcp+0));
+  check(STR("answer is '1.defp+0'"), STR("answer is '{:a}'"), F(0x1.defp+0));
+
+  // *** align-fill & width ***
+  check(STR("answer is '   1p-2'"), STR("answer is '{:7a}'"), F(0.25));
+  check(STR("answer is '   1p-2'"), STR("answer is '{:>7a}'"), F(0.25));
+  check(STR("answer is '1p-2   '"), STR("answer is '{:<7a}'"), F(0.25));
+  check(STR("answer is ' 1p-2  '"), STR("answer is '{:^7a}'"), F(0.25));
+
+  check(STR("answer is '---1p-3'"), STR("answer is '{:->7a}'"), F(125e-3));
+  check(STR("answer is '1p-3---'"), STR("answer is '{:-<7a}'"), F(125e-3));
+  check(STR("answer is '-1p-3--'"), STR("answer is '{:-^7a}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6a}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7a}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6a}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6a}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6a}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7a}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7a}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7a}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   1p-2'"), STR("answer is '{:>07a}'"), F(0.25));
+  check(STR("answer is '1p-2   '"), STR("answer is '{:<07a}'"), F(0.25));
+  check(STR("answer is ' 1p-2  '"), STR("answer is '{:^07a}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0p+0'"), STR("answer is '{:a}'"), F(0));
+  check(STR("answer is '0p+0'"), STR("answer is '{:-a}'"), F(0));
+  check(STR("answer is '+0p+0'"), STR("answer is '{:+a}'"), F(0));
+  check(STR("answer is ' 0p+0'"), STR("answer is '{: a}'"), F(0));
+
+  check(STR("answer is '-0p+0'"), STR("answer is '{:a}'"), F(-0.));
+  check(STR("answer is '-0p+0'"), STR("answer is '{:-a}'"), F(-0.));
+  check(STR("answer is '-0p+0'"), STR("answer is '{:+a}'"), F(-0.));
+  check(STR("answer is '-0p+0'"), STR("answer is '{: a}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: a}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: a}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:a}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-a}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+a}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: a}'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:a}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-a}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+a}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: a}'"), nan_neg);
+
+  // *** alternate form ***
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0p+0'"), STR("answer is '{:a}'"), F(0));
+  check(STR("answer is '0.p+0'"), STR("answer is '{:#a}'"), F(0));
+
+  check(STR("answer is '1p+1'"), STR("answer is '{:.0a}'"), F(2.5));
+  check(STR("answer is '1.p+1'"), STR("answer is '{:#.0a}'"), F(2.5));
+  check(STR("answer is '1.4p+1'"), STR("answer is '{:#a}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#a}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#a}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#a}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '1p-5'"), STR("answer is '{:04a}'"), 0.03125);
+  check(STR("answer is '+1p-5'"), STR("answer is '{:+05a}'"), 0.03125);
+  check(STR("answer is '+01p-5'"), STR("answer is '{:+06a}'"), 0.03125);
+
+  check(STR("answer is '0001p-5'"), STR("answer is '{:07a}'"), 0.03125);
+  check(STR("answer is '0001p-5'"), STR("answer is '{:-07a}'"), 0.03125);
+  check(STR("answer is '+001p-5'"), STR("answer is '{:+07a}'"), 0.03125);
+  check(STR("answer is ' 001p-5'"), STR("answer is '{: 07a}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010a}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010a}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010a}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010a}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010a}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010a}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010a}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010a}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010a}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010a}'"), nan_neg);
+
+  // *** precision ***
+  // See format_test_floating_point_hex_lower_case_precision
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template <class F, class CharT, class TestFunction>
+void format_test_floating_point_hex_upper_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits<F>::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // Test whether the hexadecimal letters are the proper case.
+  // The precision is too large for float, so two tests are used.
+  check(STR("answer is '1.ABCP+0'"), STR("answer is '{:A}'"), F(0x1.abcp+0));
+  check(STR("answer is '1.DEFP+0'"), STR("answer is '{:A}'"), F(0x1.defp+0));
+
+  // *** align-fill & width ***
+  check(STR("answer is '   1P-2'"), STR("answer is '{:7A}'"), F(0.25));
+  check(STR("answer is '   1P-2'"), STR("answer is '{:>7A}'"), F(0.25));
+  check(STR("answer is '1P-2   '"), STR("answer is '{:<7A}'"), F(0.25));
+  check(STR("answer is ' 1P-2  '"), STR("answer is '{:^7A}'"), F(0.25));
+
+  check(STR("answer is '---1P-3'"), STR("answer is '{:->7A}'"), F(125e-3));
+  check(STR("answer is '1P-3---'"), STR("answer is '{:-<7A}'"), F(125e-3));
+  check(STR("answer is '-1P-3--'"), STR("answer is '{:-^7A}'"), F(125e-3));
+
+  check(STR("answer is '***INF'"), STR("answer is '{:*>6A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'INF***'"), STR("answer is '{:*<6A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '*INF**'"), STR("answer is '{:*^6A}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '###-INF'"), STR("answer is '{:#>7A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF###'"), STR("answer is '{:#<7A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '#-INF##'"), STR("answer is '{:#^7A}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '^^^NAN'"), STR("answer is '{:^>6A}'"), nan_pos);
+  check(STR("answer is 'NAN^^^'"), STR("answer is '{:^<6A}'"), nan_pos);
+  check(STR("answer is '^NAN^^'"), STR("answer is '{:^^6A}'"), nan_pos);
+
+  check(STR("answer is '000-NAN'"), STR("answer is '{:0>7A}'"), nan_neg);
+  check(STR("answer is '-NAN000'"), STR("answer is '{:0<7A}'"), nan_neg);
+  check(STR("answer is '0-NAN00'"), STR("answer is '{:0^7A}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   1P-2'"), STR("answer is '{:>07A}'"), F(0.25));
+  check(STR("answer is '1P-2   '"), STR("answer is '{:<07A}'"), F(0.25));
+  check(STR("answer is ' 1P-2  '"), STR("answer is '{:^07A}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0P+0'"), STR("answer is '{:A}'"), F(0));
+  check(STR("answer is '0P+0'"), STR("answer is '{:-A}'"), F(0));
+  check(STR("answer is '+0P+0'"), STR("answer is '{:+A}'"), F(0));
+  check(STR("answer is ' 0P+0'"), STR("answer is '{: A}'"), F(0));
+
+  check(STR("answer is '-0P+0'"), STR("answer is '{:A}'"), F(-0.));
+  check(STR("answer is '-0P+0'"), STR("answer is '{:-A}'"), F(-0.));
+  check(STR("answer is '-0P+0'"), STR("answer is '{:+A}'"), F(-0.));
+  check(STR("answer is '-0P+0'"), STR("answer is '{: A}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'INF'"), STR("answer is '{:A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'INF'"), STR("answer is '{:-A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '+INF'"), STR("answer is '{:+A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is ' INF'"), STR("answer is '{: A}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '-INF'"), STR("answer is '{:A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:-A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:+A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{: A}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:A}'"), nan_pos);
+  check(STR("answer is 'NAN'"), STR("answer is '{:-A}'"), nan_pos);
+  check(STR("answer is '+NAN'"), STR("answer is '{:+A}'"), nan_pos);
+  check(STR("answer is ' NAN'"), STR("answer is '{: A}'"), nan_pos);
+
+  check(STR("answer is '-NAN'"), STR("answer is '{:A}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:-A}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:+A}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{: A}'"), nan_neg);
+
+  // *** alternate form ***
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0P+0'"), STR("answer is '{:A}'"), F(0));
+  check(STR("answer is '0.P+0'"), STR("answer is '{:#A}'"), F(0));
+
+  check(STR("answer is '1P+1'"), STR("answer is '{:.0A}'"), F(2.5));
+  check(STR("answer is '1.P+1'"), STR("answer is '{:#.0A}'"), F(2.5));
+  check(STR("answer is '1.4P+1'"), STR("answer is '{:#A}'"), F(2.5));
+
+  check(STR("answer is 'INF'"), STR("answer is '{:#A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:#A}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:#A}'"), nan_pos);
+  check(STR("answer is '-NAN'"), STR("answer is '{:#A}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '1P-5'"), STR("answer is '{:04A}'"), 0.03125);
+  check(STR("answer is '+1P-5'"), STR("answer is '{:+05A}'"), 0.03125);
+  check(STR("answer is '+01P-5'"), STR("answer is '{:+06A}'"), 0.03125);
+
+  check(STR("answer is '0001P-5'"), STR("answer is '{:07A}'"), 0.03125);
+  check(STR("answer is '0001P-5'"), STR("answer is '{:-07A}'"), 0.03125);
+  check(STR("answer is '+001P-5'"), STR("answer is '{:+07A}'"), 0.03125);
+  check(STR("answer is ' 001P-5'"), STR("answer is '{: 07A}'"), 0.03125);
+
+  check(STR("answer is '       INF'"), STR("answer is '{:010A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{:-010A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '      +INF'"), STR("answer is '{:+010A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{: 010A}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '      -INF'"), STR("answer is '{:010A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:-010A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:+010A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{: 010A}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '       NAN'"), STR("answer is '{:010A}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{:-010A}'"), nan_pos);
+  check(STR("answer is '      +NAN'"), STR("answer is '{:+010A}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{: 010A}'"), nan_pos);
+
+  check(STR("answer is '      -NAN'"), STR("answer is '{:010A}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:-010A}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:+010A}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{: 010A}'"), nan_neg);
+
+  // *** precision ***
+  // See format_test_floating_point_hex_upper_case_precision
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template <class F, class CharT, class TestFunction>
+void format_test_floating_point_hex_lower_case_precision(TestFunction check) {
+  auto nan_pos = std::numeric_limits<F>::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   1.000000p-2'"), STR("answer is '{:14.6a}'"), F(0.25));
+  check(STR("answer is '   1.000000p-2'"), STR("answer is '{:>14.6a}'"), F(0.25));
+  check(STR("answer is '1.000000p-2   '"), STR("answer is '{:<14.6a}'"), F(0.25));
+  check(STR("answer is ' 1.000000p-2  '"), STR("answer is '{:^14.6a}'"), F(0.25));
+
+  check(STR("answer is '---1.000000p-3'"), STR("answer is '{:->14.6a}'"), F(125e-3));
+  check(STR("answer is '1.000000p-3---'"), STR("answer is '{:-<14.6a}'"), F(125e-3));
+  check(STR("answer is '-1.000000p-3--'"), STR("answer is '{:-^14.6a}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6.6a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6.6a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6.6a}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7.6a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7.6a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7.6a}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6.6a}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6.6a}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6.6a}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7.6a}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7.6a}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7.6a}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   1.000000p-2'"), STR("answer is '{:>014.6a}'"), F(0.25));
+  check(STR("answer is '1.000000p-2   '"), STR("answer is '{:<014.6a}'"), F(0.25));
+  check(STR("answer is ' 1.000000p-2  '"), STR("answer is '{:^014.6a}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0.000000p+0'"), STR("answer is '{:.6a}'"), F(0));
+  check(STR("answer is '0.000000p+0'"), STR("answer is '{:-.6a}'"), F(0));
+  check(STR("answer is '+0.000000p+0'"), STR("answer is '{:+.6a}'"), F(0));
+  check(STR("answer is ' 0.000000p+0'"), STR("answer is '{: .6a}'"), F(0));
+
+  check(STR("answer is '-0.000000p+0'"), STR("answer is '{:.6a}'"), F(-0.));
+  check(STR("answer is '-0.000000p+0'"), STR("answer is '{:-.6a}'"), F(-0.));
+  check(STR("answer is '-0.000000p+0'"), STR("answer is '{:+.6a}'"), F(-0.));
+  check(STR("answer is '-0.000000p+0'"), STR("answer is '{: .6a}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:.6a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-.6a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+.6a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: .6a}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:.6a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-.6a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+.6a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: .6a}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:.6a}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-.6a}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+.6a}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: .6a}'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:.6a}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-.6a}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+.6a}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: .6a}'"), nan_neg);
+
+  // *** alternate form ***
+  check(STR("answer is '1.400000p+1'"), STR("answer is '{:#.6a}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#.6a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#.6a}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#.6a}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#.6a}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '1.000000p-5'"), STR("answer is '{:011.6a}'"), 0.03125);
+  check(STR("answer is '+1.000000p-5'"), STR("answer is '{:+012.6a}'"), 0.03125);
+  check(STR("answer is '+01.000000p-5'"), STR("answer is '{:+013.6a}'"), 0.03125);
+
+  check(STR("answer is '0001.000000p-5'"), STR("answer is '{:014.6a}'"), 0.03125);
+  check(STR("answer is '0001.000000p-5'"), STR("answer is '{:-014.6a}'"), 0.03125);
+  check(STR("answer is '+001.000000p-5'"), STR("answer is '{:+014.6a}'"), 0.03125);
+  check(STR("answer is ' 001.000000p-5'"), STR("answer is '{: 014.6a}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010.6a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010.6a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010.6a}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010.6a}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010.6a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010.6a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010.6a}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010.6a}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010.6a}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010.6a}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010.6a}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010.6a}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010.6a}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010.6a}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010.6a}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010.6a}'"), nan_neg);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template <class F, class CharT, class TestFunction>
+void format_test_floating_point_hex_upper_case_precision(TestFunction check) {
+  auto nan_pos = std::numeric_limits<F>::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   1.000000P-2'"), STR("answer is '{:14.6A}'"), F(0.25));
+  check(STR("answer is '   1.000000P-2'"), STR("answer is '{:>14.6A}'"), F(0.25));
+  check(STR("answer is '1.000000P-2   '"), STR("answer is '{:<14.6A}'"), F(0.25));
+  check(STR("answer is ' 1.000000P-2  '"), STR("answer is '{:^14.6A}'"), F(0.25));
+
+  check(STR("answer is '---1.000000P-3'"), STR("answer is '{:->14.6A}'"), F(125e-3));
+  check(STR("answer is '1.000000P-3---'"), STR("answer is '{:-<14.6A}'"), F(125e-3));
+  check(STR("answer is '-1.000000P-3--'"), STR("answer is '{:-^14.6A}'"), F(125e-3));
+
+  check(STR("answer is '***INF'"), STR("answer is '{:*>6.6A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'INF***'"), STR("answer is '{:*<6.6A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '*INF**'"), STR("answer is '{:*^6.6A}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '###-INF'"), STR("answer is '{:#>7.6A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF###'"), STR("answer is '{:#<7.6A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '#-INF##'"), STR("answer is '{:#^7.6A}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '^^^NAN'"), STR("answer is '{:^>6.6A}'"), nan_pos);
+  check(STR("answer is 'NAN^^^'"), STR("answer is '{:^<6.6A}'"), nan_pos);
+  check(STR("answer is '^NAN^^'"), STR("answer is '{:^^6.6A}'"), nan_pos);
+
+  check(STR("answer is '000-NAN'"), STR("answer is '{:0>7.6A}'"), nan_neg);
+  check(STR("answer is '-NAN000'"), STR("answer is '{:0<7.6A}'"), nan_neg);
+  check(STR("answer is '0-NAN00'"), STR("answer is '{:0^7.6A}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   1.000000P-2'"), STR("answer is '{:>014.6A}'"), F(0.25));
+  check(STR("answer is '1.000000P-2   '"), STR("answer is '{:<014.6A}'"), F(0.25));
+  check(STR("answer is ' 1.000000P-2  '"), STR("answer is '{:^014.6A}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0.000000P+0'"), STR("answer is '{:.6A}'"), F(0));
+  check(STR("answer is '0.000000P+0'"), STR("answer is '{:-.6A}'"), F(0));
+  check(STR("answer is '+0.000000P+0'"), STR("answer is '{:+.6A}'"), F(0));
+  check(STR("answer is ' 0.000000P+0'"), STR("answer is '{: .6A}'"), F(0));
+
+  check(STR("answer is '-0.000000P+0'"), STR("answer is '{:.6A}'"), F(-0.));
+  check(STR("answer is '-0.000000P+0'"), STR("answer is '{:-.6A}'"), F(-0.));
+  check(STR("answer is '-0.000000P+0'"), STR("answer is '{:+.6A}'"), F(-0.));
+  check(STR("answer is '-0.000000P+0'"), STR("answer is '{: .6A}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'INF'"), STR("answer is '{:.6A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'INF'"), STR("answer is '{:-.6A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '+INF'"), STR("answer is '{:+.6A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is ' INF'"), STR("answer is '{: .6A}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '-INF'"), STR("answer is '{:.6A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:-.6A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:+.6A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{: .6A}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:.6A}'"), nan_pos);
+  check(STR("answer is 'NAN'"), STR("answer is '{:-.6A}'"), nan_pos);
+  check(STR("answer is '+NAN'"), STR("answer is '{:+.6A}'"), nan_pos);
+  check(STR("answer is ' NAN'"), STR("answer is '{: .6A}'"), nan_pos);
+
+  check(STR("answer is '-NAN'"), STR("answer is '{:.6A}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:-.6A}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:+.6A}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{: .6A}'"), nan_neg);
+
+  // *** alternate form ***
+  check(STR("answer is '1.400000P+1'"), STR("answer is '{:#.6A}'"), F(2.5));
+
+  check(STR("answer is 'INF'"), STR("answer is '{:#.6A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:#.6A}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:#.6A}'"), nan_pos);
+  check(STR("answer is '-NAN'"), STR("answer is '{:#.6A}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '1.000000P-5'"), STR("answer is '{:011.6A}'"), 0.03125);
+  check(STR("answer is '+1.000000P-5'"), STR("answer is '{:+012.6A}'"), 0.03125);
+  check(STR("answer is '+01.000000P-5'"), STR("answer is '{:+013.6A}'"), 0.03125);
+
+  check(STR("answer is '0001.000000P-5'"), STR("answer is '{:014.6A}'"), 0.03125);
+  check(STR("answer is '0001.000000P-5'"), STR("answer is '{:-014.6A}'"), 0.03125);
+  check(STR("answer is '+001.000000P-5'"), STR("answer is '{:+014.6A}'"), 0.03125);
+  check(STR("answer is ' 001.000000P-5'"), STR("answer is '{: 014.6A}'"), 0.03125);
+
+  check(STR("answer is '       INF'"), STR("answer is '{:010.6A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{:-010.6A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '      +INF'"), STR("answer is '{:+010.6A}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{: 010.6A}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '      -INF'"), STR("answer is '{:010.6A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:-010.6A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:+010.6A}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{: 010.6A}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '       NAN'"), STR("answer is '{:010.6A}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{:-010.6A}'"), nan_pos);
+  check(STR("answer is '      +NAN'"), STR("answer is '{:+010.6A}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{: 010.6A}'"), nan_pos);
+
+  check(STR("answer is '      -NAN'"), STR("answer is '{:010.6A}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:-010.6A}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:+010.6A}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{: 010.6A}'"), nan_neg);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template <class F, class CharT, class TestFunction>
+void format_test_floating_point_scientific_lower_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits<F>::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   2.500000e-01'"), STR("answer is '{:15e}'"), F(0.25));
+  check(STR("answer is '   2.500000e-01'"), STR("answer is '{:>15e}'"), F(0.25));
+  check(STR("answer is '2.500000e-01   '"), STR("answer is '{:<15e}'"), F(0.25));
+  check(STR("answer is ' 2.500000e-01  '"), STR("answer is '{:^15e}'"), F(0.25));
+
+  check(STR("answer is '---1.250000e-01'"), STR("answer is '{:->15e}'"), F(125e-3));
+  check(STR("answer is '1.250000e-01---'"), STR("answer is '{:-<15e}'"), F(125e-3));
+  check(STR("answer is '-1.250000e-01--'"), STR("answer is '{:-^15e}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6e}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6e}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6e}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7e}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7e}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7e}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6e}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6e}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6e}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7e}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7e}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7e}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   2.500000e-01'"), STR("answer is '{:>015e}'"), F(0.25));
+  check(STR("answer is '2.500000e-01   '"), STR("answer is '{:<015e}'"), F(0.25));
+  check(STR("answer is ' 2.500000e-01  '"), STR("answer is '{:^015e}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0.000000e+00'"), STR("answer is '{:e}'"), F(0));
+  check(STR("answer is '0.000000e+00'"), STR("answer is '{:-e}'"), F(0));
+  check(STR("answer is '+0.000000e+00'"), STR("answer is '{:+e}'"), F(0));
+  check(STR("answer is ' 0.000000e+00'"), STR("answer is '{: e}'"), F(0));
+
+  check(STR("answer is '-0.000000e+00'"), STR("answer is '{:e}'"), F(-0.));
+  check(STR("answer is '-0.000000e+00'"), STR("answer is '{:-e}'"), F(-0.));
+  check(STR("answer is '-0.000000e+00'"), STR("answer is '{:+e}'"), F(-0.));
+  check(STR("answer is '-0.000000e+00'"), STR("answer is '{: e}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:e}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-e}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+e}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: e}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:e}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-e}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+e}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: e}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:e}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-e}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+e}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: e}'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:e}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-e}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+e}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: e}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0e+00'"), STR("answer is '{:.0e}'"), F(0));
+  check(STR("answer is '0.e+00'"), STR("answer is '{:#.0e}'"), F(0));
+
+  check(STR("answer is '0.000000e+00'"), STR("answer is '{:#e}'"), F(0));
+  check(STR("answer is '2.500000e+00'"), STR("answer is '{:#e}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#e}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#e}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#e}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#e}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '3.125000e-02'"), STR("answer is '{:07e}'"), 0.03125);
+  check(STR("answer is '+3.125000e-02'"), STR("answer is '{:+07e}'"), 0.03125);
+  check(STR("answer is '+3.125000e-02'"), STR("answer is '{:+08e}'"), 0.03125);
+  check(STR("answer is '+3.125000e-02'"), STR("answer is '{:+09e}'"), 0.03125);
+
+  check(STR("answer is '003.125000e-02'"), STR("answer is '{:014e}'"), 0.03125);
+  check(STR("answer is '003.125000e-02'"), STR("answer is '{:-014e}'"), 0.03125);
+  check(STR("answer is '+03.125000e-02'"), STR("answer is '{:+014e}'"), 0.03125);
+  check(STR("answer is ' 03.125000e-02'"), STR("answer is '{: 014e}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010e}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010e}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010e}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010e}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010e}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010e}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010e}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010e}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010e}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010e}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010e}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010e}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010e}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010e}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010e}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010e}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '3e-02'"), STR("answer is '{:.0e}'"), 0.03125);
+  check(STR("answer is '3.1e-02'"), STR("answer is '{:.1e}'"), 0.03125);
+  check(STR("answer is '3.125e-02'"), STR("answer is '{:.3e}'"), 0.03125);
+  check(STR("answer is '3.1250000000e-02'"), STR("answer is '{:.10e}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template <class F, class CharT, class TestFunction>
+void format_test_floating_point_scientific_upper_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits<F>::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   2.500000E-01'"), STR("answer is '{:15E}'"), F(0.25));
+  check(STR("answer is '   2.500000E-01'"), STR("answer is '{:>15E}'"), F(0.25));
+  check(STR("answer is '2.500000E-01   '"), STR("answer is '{:<15E}'"), F(0.25));
+  check(STR("answer is ' 2.500000E-01  '"), STR("answer is '{:^15E}'"), F(0.25));
+
+  check(STR("answer is '---1.250000E-01'"), STR("answer is '{:->15E}'"), F(125e-3));
+  check(STR("answer is '1.250000E-01---'"), STR("answer is '{:-<15E}'"), F(125e-3));
+  check(STR("answer is '-1.250000E-01--'"), STR("answer is '{:-^15E}'"), F(125e-3));
+
+  check(STR("answer is '***INF'"), STR("answer is '{:*>6E}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'INF***'"), STR("answer is '{:*<6E}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '*INF**'"), STR("answer is '{:*^6E}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '###-INF'"), STR("answer is '{:#>7E}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF###'"), STR("answer is '{:#<7E}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '#-INF##'"), STR("answer is '{:#^7E}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '^^^NAN'"), STR("answer is '{:^>6E}'"), nan_pos);
+  check(STR("answer is 'NAN^^^'"), STR("answer is '{:^<6E}'"), nan_pos);
+  check(STR("answer is '^NAN^^'"), STR("answer is '{:^^6E}'"), nan_pos);
+
+  check(STR("answer is '000-NAN'"), STR("answer is '{:0>7E}'"), nan_neg);
+  check(STR("answer is '-NAN000'"), STR("answer is '{:0<7E}'"), nan_neg);
+  check(STR("answer is '0-NAN00'"), STR("answer is '{:0^7E}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   2.500000E-01'"), STR("answer is '{:>015E}'"), F(0.25));
+  check(STR("answer is '2.500000E-01   '"), STR("answer is '{:<015E}'"), F(0.25));
+  check(STR("answer is ' 2.500000E-01  '"), STR("answer is '{:^015E}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0.000000E+00'"), STR("answer is '{:E}'"), F(0));
+  check(STR("answer is '0.000000E+00'"), STR("answer is '{:-E}'"), F(0));
+  check(STR("answer is '+0.000000E+00'"), STR("answer is '{:+E}'"), F(0));
+  check(STR("answer is ' 0.000000E+00'"), STR("answer is '{: E}'"), F(0));
+
+  check(STR("answer is '-0.000000E+00'"), STR("answer is '{:E}'"), F(-0.));
+  check(STR("answer is '-0.000000E+00'"), STR("answer is '{:-E}'"), F(-0.));
+  check(STR("answer is '-0.000000E+00'"), STR("answer is '{:+E}'"), F(-0.));
+  check(STR("answer is '-0.000000E+00'"), STR("answer is '{: E}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'INF'"), STR("answer is '{:E}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'INF'"), STR("answer is '{:-E}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '+INF'"), STR("answer is '{:+E}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is ' INF'"), STR("answer is '{: E}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '-INF'"), STR("answer is '{:E}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:-E}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:+E}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{: E}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:E}'"), nan_pos);
+  check(STR("answer is 'NAN'"), STR("answer is '{:-E}'"), nan_pos);
+  check(STR("answer is '+NAN'"), STR("answer is '{:+E}'"), nan_pos);
+  check(STR("answer is ' NAN'"), STR("answer is '{: E}'"), nan_pos);
+
+  check(STR("answer is '-NAN'"), STR("answer is '{:E}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:-E}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:+E}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{: E}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0E+00'"), STR("answer is '{:.0E}'"), F(0));
+  check(STR("answer is '0.E+00'"), STR("answer is '{:#.0E}'"), F(0));
+
+  check(STR("answer is '0.000000E+00'"), STR("answer is '{:#E}'"), F(0));
+  check(STR("answer is '2.500000E+00'"), STR("answer is '{:#E}'"), F(2.5));
+
+  check(STR("answer is 'INF'"), STR("answer is '{:#E}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:#E}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:#E}'"), nan_pos);
+  check(STR("answer is '-NAN'"), STR("answer is '{:#E}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '3.125000E-02'"), STR("answer is '{:07E}'"), 0.03125);
+  check(STR("answer is '+3.125000E-02'"), STR("answer is '{:+07E}'"), 0.03125);
+  check(STR("answer is '+3.125000E-02'"), STR("answer is '{:+08E}'"), 0.03125);
+  check(STR("answer is '+3.125000E-02'"), STR("answer is '{:+09E}'"), 0.03125);
+
+  check(STR("answer is '003.125000E-02'"), STR("answer is '{:014E}'"), 0.03125);
+  check(STR("answer is '003.125000E-02'"), STR("answer is '{:-014E}'"), 0.03125);
+  check(STR("answer is '+03.125000E-02'"), STR("answer is '{:+014E}'"), 0.03125);
+  check(STR("answer is ' 03.125000E-02'"), STR("answer is '{: 014E}'"), 0.03125);
+
+  check(STR("answer is '       INF'"), STR("answer is '{:010E}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{:-010E}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '      +INF'"), STR("answer is '{:+010E}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{: 010E}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '      -INF'"), STR("answer is '{:010E}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:-010E}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:+010E}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{: 010E}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '       NAN'"), STR("answer is '{:010E}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{:-010E}'"), nan_pos);
+  check(STR("answer is '      +NAN'"), STR("answer is '{:+010E}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{: 010E}'"), nan_pos);
+
+  check(STR("answer is '      -NAN'"), STR("answer is '{:010E}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:-010E}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:+010E}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{: 010E}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '3E-02'"), STR("answer is '{:.0E}'"), 0.03125);
+  check(STR("answer is '3.1E-02'"), STR("answer is '{:.1E}'"), 0.03125);
+  check(STR("answer is '3.125E-02'"), STR("answer is '{:.3E}'"), 0.03125);
+  check(STR("answer is '3.1250000000E-02'"), STR("answer is '{:.10E}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template <class F, class CharT, class TestFunction>
+void format_test_floating_point_fixed_lower_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits<F>::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   0.250000'"), STR("answer is '{:11f}'"), F(0.25));
+  check(STR("answer is '   0.250000'"), STR("answer is '{:>11f}'"), F(0.25));
+  check(STR("answer is '0.250000   '"), STR("answer is '{:<11f}'"), F(0.25));
+  check(STR("answer is ' 0.250000  '"), STR("answer is '{:^11f}'"), F(0.25));
+
+  check(STR("answer is '---0.125000'"), STR("answer is '{:->11f}'"), F(125e-3));
+  check(STR("answer is '0.125000---'"), STR("answer is '{:-<11f}'"), F(125e-3));
+  check(STR("answer is '-0.125000--'"), STR("answer is '{:-^11f}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6f}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6f}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6f}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7f}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7f}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7f}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6f}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6f}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6f}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7f}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7f}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7f}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   0.250000'"), STR("answer is '{:>011f}'"), F(0.25));
+  check(STR("answer is '0.250000   '"), STR("answer is '{:<011f}'"), F(0.25));
+  check(STR("answer is ' 0.250000  '"), STR("answer is '{:^011f}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0.000000'"), STR("answer is '{:f}'"), F(0));
+  check(STR("answer is '0.000000'"), STR("answer is '{:-f}'"), F(0));
+  check(STR("answer is '+0.000000'"), STR("answer is '{:+f}'"), F(0));
+  check(STR("answer is ' 0.000000'"), STR("answer is '{: f}'"), F(0));
+
+  check(STR("answer is '-0.000000'"), STR("answer is '{:f}'"), F(-0.));
+  check(STR("answer is '-0.000000'"), STR("answer is '{:-f}'"), F(-0.));
+  check(STR("answer is '-0.000000'"), STR("answer is '{:+f}'"), F(-0.));
+  check(STR("answer is '-0.000000'"), STR("answer is '{: f}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:f}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-f}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+f}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: f}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:f}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-f}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+f}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: f}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:f}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-f}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+f}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: f}'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:f}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-f}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+f}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: f}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0'"), STR("answer is '{:.0f}'"), F(0));
+  check(STR("answer is '0.'"), STR("answer is '{:#.0f}'"), F(0));
+
+  check(STR("answer is '0.000000'"), STR("answer is '{:#f}'"), F(0));
+  check(STR("answer is '2.500000'"), STR("answer is '{:#f}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#f}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#f}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#f}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#f}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '0.031250'"), STR("answer is '{:07f}'"), 0.03125);
+  check(STR("answer is '+0.031250'"), STR("answer is '{:+07f}'"), 0.03125);
+  check(STR("answer is '+0.031250'"), STR("answer is '{:+08f}'"), 0.03125);
+  check(STR("answer is '+0.031250'"), STR("answer is '{:+09f}'"), 0.03125);
+
+  check(STR("answer is '000.031250'"), STR("answer is '{:010f}'"), 0.03125);
+  check(STR("answer is '000.031250'"), STR("answer is '{:-010f}'"), 0.03125);
+  check(STR("answer is '+00.031250'"), STR("answer is '{:+010f}'"), 0.03125);
+  check(STR("answer is ' 00.031250'"), STR("answer is '{: 010f}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010f}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010f}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010f}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010f}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010f}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010f}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010f}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010f}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010f}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010f}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010f}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010f}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010f}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010f}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010f}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010f}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '0'"), STR("answer is '{:.0f}'"), 0.03125);
+  check(STR("answer is '0.0'"), STR("answer is '{:.1f}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.5f}'"), 0.03125);
+  check(STR("answer is '0.0312500000'"), STR("answer is '{:.10f}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template <class F, class CharT, class TestFunction>
+void format_test_floating_point_fixed_upper_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits<F>::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   0.250000'"), STR("answer is '{:11F}'"), F(0.25));
+  check(STR("answer is '   0.250000'"), STR("answer is '{:>11F}'"), F(0.25));
+  check(STR("answer is '0.250000   '"), STR("answer is '{:<11F}'"), F(0.25));
+  check(STR("answer is ' 0.250000  '"), STR("answer is '{:^11F}'"), F(0.25));
+
+  check(STR("answer is '---0.125000'"), STR("answer is '{:->11F}'"), F(125e-3));
+  check(STR("answer is '0.125000---'"), STR("answer is '{:-<11F}'"), F(125e-3));
+  check(STR("answer is '-0.125000--'"), STR("answer is '{:-^11F}'"), F(125e-3));
+
+  check(STR("answer is '***INF'"), STR("answer is '{:*>6F}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'INF***'"), STR("answer is '{:*<6F}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '*INF**'"), STR("answer is '{:*^6F}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '###-INF'"), STR("answer is '{:#>7F}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF###'"), STR("answer is '{:#<7F}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '#-INF##'"), STR("answer is '{:#^7F}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '^^^NAN'"), STR("answer is '{:^>6F}'"), nan_pos);
+  check(STR("answer is 'NAN^^^'"), STR("answer is '{:^<6F}'"), nan_pos);
+  check(STR("answer is '^NAN^^'"), STR("answer is '{:^^6F}'"), nan_pos);
+
+  check(STR("answer is '000-NAN'"), STR("answer is '{:0>7F}'"), nan_neg);
+  check(STR("answer is '-NAN000'"), STR("answer is '{:0<7F}'"), nan_neg);
+  check(STR("answer is '0-NAN00'"), STR("answer is '{:0^7F}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   0.250000'"), STR("answer is '{:>011F}'"), F(0.25));
+  check(STR("answer is '0.250000   '"), STR("answer is '{:<011F}'"), F(0.25));
+  check(STR("answer is ' 0.250000  '"), STR("answer is '{:^011F}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0.000000'"), STR("answer is '{:F}'"), F(0));
+  check(STR("answer is '0.000000'"), STR("answer is '{:-F}'"), F(0));
+  check(STR("answer is '+0.000000'"), STR("answer is '{:+F}'"), F(0));
+  check(STR("answer is ' 0.000000'"), STR("answer is '{: F}'"), F(0));
+
+  check(STR("answer is '-0.000000'"), STR("answer is '{:F}'"), F(-0.));
+  check(STR("answer is '-0.000000'"), STR("answer is '{:-F}'"), F(-0.));
+  check(STR("answer is '-0.000000'"), STR("answer is '{:+F}'"), F(-0.));
+  check(STR("answer is '-0.000000'"), STR("answer is '{: F}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'INF'"), STR("answer is '{:F}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'INF'"), STR("answer is '{:-F}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '+INF'"), STR("answer is '{:+F}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is ' INF'"), STR("answer is '{: F}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '-INF'"), STR("answer is '{:F}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:-F}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:+F}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{: F}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:F}'"), nan_pos);
+  check(STR("answer is 'NAN'"), STR("answer is '{:-F}'"), nan_pos);
+  check(STR("answer is '+NAN'"), STR("answer is '{:+F}'"), nan_pos);
+  check(STR("answer is ' NAN'"), STR("answer is '{: F}'"), nan_pos);
+
+  check(STR("answer is '-NAN'"), STR("answer is '{:F}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:-F}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:+F}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{: F}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0'"), STR("answer is '{:.0F}'"), F(0));
+  check(STR("answer is '0.'"), STR("answer is '{:#.0F}'"), F(0));
+
+  check(STR("answer is '0.000000'"), STR("answer is '{:#F}'"), F(0));
+  check(STR("answer is '2.500000'"), STR("answer is '{:#F}'"), F(2.5));
+
+  check(STR("answer is 'INF'"), STR("answer is '{:#F}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:#F}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:#F}'"), nan_pos);
+  check(STR("answer is '-NAN'"), STR("answer is '{:#F}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '0.031250'"), STR("answer is '{:07F}'"), 0.03125);
+  check(STR("answer is '+0.031250'"), STR("answer is '{:+07F}'"), 0.03125);
+  check(STR("answer is '+0.031250'"), STR("answer is '{:+08F}'"), 0.03125);
+  check(STR("answer is '+0.031250'"), STR("answer is '{:+09F}'"), 0.03125);
+
+  check(STR("answer is '000.031250'"), STR("answer is '{:010F}'"), 0.03125);
+  check(STR("answer is '000.031250'"), STR("answer is '{:-010F}'"), 0.03125);
+  check(STR("answer is '+00.031250'"), STR("answer is '{:+010F}'"), 0.03125);
+  check(STR("answer is ' 00.031250'"), STR("answer is '{: 010F}'"), 0.03125);
+
+  check(STR("answer is '       INF'"), STR("answer is '{:010F}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{:-010F}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '      +INF'"), STR("answer is '{:+010F}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{: 010F}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '      -INF'"), STR("answer is '{:010F}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:-010F}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:+010F}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{: 010F}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '       NAN'"), STR("answer is '{:010F}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{:-010F}'"), nan_pos);
+  check(STR("answer is '      +NAN'"), STR("answer is '{:+010F}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{: 010F}'"), nan_pos);
+
+  check(STR("answer is '      -NAN'"), STR("answer is '{:010F}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:-010F}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:+010F}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{: 010F}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '0'"), STR("answer is '{:.0F}'"), 0.03125);
+  check(STR("answer is '0.0'"), STR("answer is '{:.1F}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.5F}'"), 0.03125);
+  check(STR("answer is '0.0312500000'"), STR("answer is '{:.10F}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template <class F, class CharT, class TestFunction>
+void format_test_floating_point_general_lower_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits<F>::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   0.25'"), STR("answer is '{:7g}'"), F(0.25));
+  check(STR("answer is '   0.25'"), STR("answer is '{:>7g}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<7g}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^7g}'"), F(0.25));
+
+  check(STR("answer is '---0.125'"), STR("answer is '{:->8g}'"), F(125e-3));
+  check(STR("answer is '0.125---'"), STR("answer is '{:-<8g}'"), F(125e-3));
+  check(STR("answer is '-0.125--'"), STR("answer is '{:-^8g}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6g}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6g}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6g}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7g}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7g}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7g}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6g}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6g}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6g}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7g}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7g}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7g}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   0.25'"), STR("answer is '{:>07g}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<07g}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^07g}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0'"), STR("answer is '{:g}'"), F(0));
+  check(STR("answer is '0'"), STR("answer is '{:-g}'"), F(0));
+  check(STR("answer is '+0'"), STR("answer is '{:+g}'"), F(0));
+  check(STR("answer is ' 0'"), STR("answer is '{: g}'"), F(0));
+
+  check(STR("answer is '-0'"), STR("answer is '{:g}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:-g}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:+g}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{: g}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:g}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-g}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+g}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: g}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:g}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-g}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+g}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: g}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:g}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-g}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+g}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: g}'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:g}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-g}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+g}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: g}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0'"), STR("answer is '{:.0g}'"), F(0));
+  check(STR("answer is '0.'"), STR("answer is '{:#.0g}'"), F(0));
+
+  check(STR("answer is '0.'"), STR("answer is '{:#g}'"), F(0));
+  check(STR("answer is '2.5'"), STR("answer is '{:#g}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#g}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#g}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#g}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#g}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '0.03125'"), STR("answer is '{:06g}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+06g}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+07g}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+08g}'"), 0.03125);
+
+  check(STR("answer is '000.03125'"), STR("answer is '{:09g}'"), 0.03125);
+  check(STR("answer is '000.03125'"), STR("answer is '{:-09g}'"), 0.03125);
+  check(STR("answer is '+00.03125'"), STR("answer is '{:+09g}'"), 0.03125);
+  check(STR("answer is ' 00.03125'"), STR("answer is '{: 09g}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010g}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010g}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010g}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010g}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010g}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010g}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010g}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010g}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010g}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010g}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010g}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010g}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010g}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010g}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010g}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010g}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '0.03'"), STR("answer is '{:.0g}'"), 0.03125);
+  check(STR("answer is '0.03'"), STR("answer is '{:.1g}'"), 0.03125);
+  check(STR("answer is '0.031'"), STR("answer is '{:.2g}'"), 0.03125);
+  check(STR("answer is '0.0312'"), STR("answer is '{:.3g}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.4g}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.5g}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.10g}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template <class F, class CharT, class TestFunction>
+void format_test_floating_point_general_upper_case(TestFunction check) {
+  auto nan_pos = std::numeric_limits<F>::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   0.25'"), STR("answer is '{:7G}'"), F(0.25));
+  check(STR("answer is '   0.25'"), STR("answer is '{:>7G}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<7G}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^7G}'"), F(0.25));
+
+  check(STR("answer is '---0.125'"), STR("answer is '{:->8G}'"), F(125e-3));
+  check(STR("answer is '0.125---'"), STR("answer is '{:-<8G}'"), F(125e-3));
+  check(STR("answer is '-0.125--'"), STR("answer is '{:-^8G}'"), F(125e-3));
+
+  check(STR("answer is '***INF'"), STR("answer is '{:*>6G}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'INF***'"), STR("answer is '{:*<6G}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '*INF**'"), STR("answer is '{:*^6G}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '###-INF'"), STR("answer is '{:#>7G}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF###'"), STR("answer is '{:#<7G}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '#-INF##'"), STR("answer is '{:#^7G}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '^^^NAN'"), STR("answer is '{:^>6G}'"), nan_pos);
+  check(STR("answer is 'NAN^^^'"), STR("answer is '{:^<6G}'"), nan_pos);
+  check(STR("answer is '^NAN^^'"), STR("answer is '{:^^6G}'"), nan_pos);
+
+  check(STR("answer is '000-NAN'"), STR("answer is '{:0>7G}'"), nan_neg);
+  check(STR("answer is '-NAN000'"), STR("answer is '{:0<7G}'"), nan_neg);
+  check(STR("answer is '0-NAN00'"), STR("answer is '{:0^7G}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   0.25'"), STR("answer is '{:>07G}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<07G}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^07G}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0'"), STR("answer is '{:G}'"), F(0));
+  check(STR("answer is '0'"), STR("answer is '{:-G}'"), F(0));
+  check(STR("answer is '+0'"), STR("answer is '{:+G}'"), F(0));
+  check(STR("answer is ' 0'"), STR("answer is '{: G}'"), F(0));
+
+  check(STR("answer is '-0'"), STR("answer is '{:G}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:-G}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:+G}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{: G}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'INF'"), STR("answer is '{:G}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'INF'"), STR("answer is '{:-G}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '+INF'"), STR("answer is '{:+G}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is ' INF'"), STR("answer is '{: G}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '-INF'"), STR("answer is '{:G}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:-G}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:+G}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{: G}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:G}'"), nan_pos);
+  check(STR("answer is 'NAN'"), STR("answer is '{:-G}'"), nan_pos);
+  check(STR("answer is '+NAN'"), STR("answer is '{:+G}'"), nan_pos);
+  check(STR("answer is ' NAN'"), STR("answer is '{: G}'"), nan_pos);
+
+  check(STR("answer is '-NAN'"), STR("answer is '{:G}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:-G}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{:+G}'"), nan_neg);
+  check(STR("answer is '-NAN'"), STR("answer is '{: G}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0'"), STR("answer is '{:.0G}'"), F(0));
+  check(STR("answer is '0.'"), STR("answer is '{:#.0G}'"), F(0));
+
+  check(STR("answer is '0.'"), STR("answer is '{:#G}'"), F(0));
+  check(STR("answer is '2.5'"), STR("answer is '{:#G}'"), F(2.5));
+
+  check(STR("answer is 'INF'"), STR("answer is '{:#G}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '-INF'"), STR("answer is '{:#G}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'NAN'"), STR("answer is '{:#G}'"), nan_pos);
+  check(STR("answer is '-NAN'"), STR("answer is '{:#G}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '0.03125'"), STR("answer is '{:06G}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+06G}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+07G}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+08G}'"), 0.03125);
+
+  check(STR("answer is '000.03125'"), STR("answer is '{:09G}'"), 0.03125);
+  check(STR("answer is '000.03125'"), STR("answer is '{:-09G}'"), 0.03125);
+  check(STR("answer is '+00.03125'"), STR("answer is '{:+09G}'"), 0.03125);
+  check(STR("answer is ' 00.03125'"), STR("answer is '{: 09G}'"), 0.03125);
+
+  check(STR("answer is '       INF'"), STR("answer is '{:010G}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{:-010G}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '      +INF'"), STR("answer is '{:+010G}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       INF'"), STR("answer is '{: 010G}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '      -INF'"), STR("answer is '{:010G}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:-010G}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{:+010G}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -INF'"), STR("answer is '{: 010G}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '       NAN'"), STR("answer is '{:010G}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{:-010G}'"), nan_pos);
+  check(STR("answer is '      +NAN'"), STR("answer is '{:+010G}'"), nan_pos);
+  check(STR("answer is '       NAN'"), STR("answer is '{: 010G}'"), nan_pos);
+
+  check(STR("answer is '      -NAN'"), STR("answer is '{:010G}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:-010G}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{:+010G}'"), nan_neg);
+  check(STR("answer is '      -NAN'"), STR("answer is '{: 010G}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '0.03'"), STR("answer is '{:.0G}'"), 0.03125);
+  check(STR("answer is '0.03'"), STR("answer is '{:.1G}'"), 0.03125);
+  check(STR("answer is '0.031'"), STR("answer is '{:.2G}'"), 0.03125);
+  check(STR("answer is '0.0312'"), STR("answer is '{:.3G}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.4G}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.5G}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.10G}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template <class F, class CharT, class TestFunction>
+void format_test_floating_point_default(TestFunction check) {
+  auto nan_pos = std::numeric_limits<F>::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   0.25'"), STR("answer is '{:7}'"), F(0.25));
+  check(STR("answer is '   0.25'"), STR("answer is '{:>7}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<7}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^7}'"), F(0.25));
+
+  check(STR("answer is '---0.125'"), STR("answer is '{:->8}'"), F(125e-3));
+  check(STR("answer is '0.125---'"), STR("answer is '{:-<8}'"), F(125e-3));
+  check(STR("answer is '-0.125--'"), STR("answer is '{:-^8}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   0.25'"), STR("answer is '{:>07}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<07}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^07}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0'"), STR("answer is '{:}'"), F(0));
+  check(STR("answer is '0'"), STR("answer is '{:-}'"), F(0));
+  check(STR("answer is '+0'"), STR("answer is '{:+}'"), F(0));
+  check(STR("answer is ' 0'"), STR("answer is '{: }'"), F(0));
+
+  check(STR("answer is '-0'"), STR("answer is '{:}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:-}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:+}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{: }'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: }'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: }'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: }'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: }'"), nan_neg);
+
+  // *** alternate form ***
+  check(STR("answer is '0.'"), STR("answer is '{:#}'"), F(0));
+  check(STR("answer is '2.5'"), STR("answer is '{:#}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '0.03125'"), STR("answer is '{:07}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+07}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+08}'"), 0.03125);
+  check(STR("answer is '+00.03125'"), STR("answer is '{:+09}'"), 0.03125);
+
+  check(STR("answer is '0000.03125'"), STR("answer is '{:010}'"), 0.03125);
+  check(STR("answer is '0000.03125'"), STR("answer is '{:-010}'"), 0.03125);
+  check(STR("answer is '+000.03125'"), STR("answer is '{:+010}'"), 0.03125);
+  check(STR("answer is ' 000.03125'"), STR("answer is '{: 010}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010}'"), nan_neg);
+
+  // *** precision ***
+  // See format_test_floating_point_default_precision
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template <class F, class CharT, class TestFunction>
+void format_test_floating_point_default_precision(TestFunction check) {
+
+  auto nan_pos = std::numeric_limits<F>::quiet_NaN(); // "nan"
+  auto nan_neg = std::copysign(nan_pos, -1.0);        // "-nan"
+
+  // *** align-fill & width ***
+  check(STR("answer is '   0.25'"), STR("answer is '{:7.6}'"), F(0.25));
+  check(STR("answer is '   0.25'"), STR("answer is '{:>7.6}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<7.6}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^7.6}'"), F(0.25));
+
+  check(STR("answer is '---0.125'"), STR("answer is '{:->8.6}'"), F(125e-3));
+  check(STR("answer is '0.125---'"), STR("answer is '{:-<8.6}'"), F(125e-3));
+  check(STR("answer is '-0.125--'"), STR("answer is '{:-^8.6}'"), F(125e-3));
+
+  check(STR("answer is '***inf'"), STR("answer is '{:*>6.6}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf***'"), STR("answer is '{:*<6.6}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '*inf**'"), STR("answer is '{:*^6.6}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '###-inf'"), STR("answer is '{:#>7.6}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf###'"), STR("answer is '{:#<7.6}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '#-inf##'"), STR("answer is '{:#^7.6}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '^^^nan'"), STR("answer is '{:^>6.6}'"), nan_pos);
+  check(STR("answer is 'nan^^^'"), STR("answer is '{:^<6.6}'"), nan_pos);
+  check(STR("answer is '^nan^^'"), STR("answer is '{:^^6.6}'"), nan_pos);
+
+  check(STR("answer is '000-nan'"), STR("answer is '{:0>7.6}'"), nan_neg);
+  check(STR("answer is '-nan000'"), STR("answer is '{:0<7.6}'"), nan_neg);
+  check(STR("answer is '0-nan00'"), STR("answer is '{:0^7.6}'"), nan_neg);
+
+  // Test whether zero padding is ignored
+  check(STR("answer is '   0.25'"), STR("answer is '{:>07.6}'"), F(0.25));
+  check(STR("answer is '0.25   '"), STR("answer is '{:<07.6}'"), F(0.25));
+  check(STR("answer is ' 0.25  '"), STR("answer is '{:^07.6}'"), F(0.25));
+
+  // *** Sign ***
+  check(STR("answer is '0'"), STR("answer is '{:.6}'"), F(0));
+  check(STR("answer is '0'"), STR("answer is '{:-.6}'"), F(0));
+  check(STR("answer is '+0'"), STR("answer is '{:+.6}'"), F(0));
+  check(STR("answer is ' 0'"), STR("answer is '{: .6}'"), F(0));
+
+  check(STR("answer is '-0'"), STR("answer is '{:.6}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:-.6}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{:+.6}'"), F(-0.));
+  check(STR("answer is '-0'"), STR("answer is '{: .6}'"), F(-0.));
+
+  // [format.string.std]/5 The sign option applies to floating-point infinity and NaN.
+  check(STR("answer is 'inf'"), STR("answer is '{:.6}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is 'inf'"), STR("answer is '{:-.6}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '+inf'"), STR("answer is '{:+.6}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is ' inf'"), STR("answer is '{: .6}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '-inf'"), STR("answer is '{:.6}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:-.6}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:+.6}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{: .6}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:.6}'"), nan_pos);
+  check(STR("answer is 'nan'"), STR("answer is '{:-.6}'"), nan_pos);
+  check(STR("answer is '+nan'"), STR("answer is '{:+.6}'"), nan_pos);
+  check(STR("answer is ' nan'"), STR("answer is '{: .6}'"), nan_pos);
+
+  check(STR("answer is '-nan'"), STR("answer is '{:.6}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:-.6}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{:+.6}'"), nan_neg);
+  check(STR("answer is '-nan'"), STR("answer is '{: .6}'"), nan_neg);
+
+  // *** alternate form **
+  // When precision is zero there's no decimal point except when the alternate form is specified.
+  check(STR("answer is '0'"), STR("answer is '{:.0}'"), F(0));
+  check(STR("answer is '0.'"), STR("answer is '{:#.0}'"), F(0));
+
+  check(STR("answer is '0.'"), STR("answer is '{:#.6}'"), F(0));
+  check(STR("answer is '2.5'"), STR("answer is '{:#.6}'"), F(2.5));
+
+  check(STR("answer is 'inf'"), STR("answer is '{:#.6}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '-inf'"), STR("answer is '{:#.6}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is 'nan'"), STR("answer is '{:#.6}'"), nan_pos);
+  check(STR("answer is '-nan'"), STR("answer is '{:#.6}'"), nan_neg);
+
+  // *** zero-padding & width ***
+  check(STR("answer is '0.03125'"), STR("answer is '{:06.6}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+06.6}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+07.6}'"), 0.03125);
+  check(STR("answer is '+0.03125'"), STR("answer is '{:+08.6}'"), 0.03125);
+
+  check(STR("answer is '000.03125'"), STR("answer is '{:09.6}'"), 0.03125);
+  check(STR("answer is '000.03125'"), STR("answer is '{:-09.6}'"), 0.03125);
+  check(STR("answer is '+00.03125'"), STR("answer is '{:+09.6}'"), 0.03125);
+  check(STR("answer is ' 00.03125'"), STR("answer is '{: 09.6}'"), 0.03125);
+
+  check(STR("answer is '       inf'"), STR("answer is '{:010.6}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{:-010.6}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '      +inf'"), STR("answer is '{:+010.6}'"), std::numeric_limits<F>::infinity());
+  check(STR("answer is '       inf'"), STR("answer is '{: 010.6}'"), std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '      -inf'"), STR("answer is '{:010.6}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:-010.6}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{:+010.6}'"), -std::numeric_limits<F>::infinity());
+  check(STR("answer is '      -inf'"), STR("answer is '{: 010.6}'"), -std::numeric_limits<F>::infinity());
+
+  check(STR("answer is '       nan'"), STR("answer is '{:010.6}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{:-010.6}'"), nan_pos);
+  check(STR("answer is '      +nan'"), STR("answer is '{:+010.6}'"), nan_pos);
+  check(STR("answer is '       nan'"), STR("answer is '{: 010.6}'"), nan_pos);
+
+  check(STR("answer is '      -nan'"), STR("answer is '{:010.6}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:-010.6}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{:+010.6}'"), nan_neg);
+  check(STR("answer is '      -nan'"), STR("answer is '{: 010.6}'"), nan_neg);
+
+  // *** precision ***
+  check(STR("answer is '0.03'"), STR("answer is '{:.0}'"), 0.03125);
+  check(STR("answer is '0.03'"), STR("answer is '{:.1}'"), 0.03125);
+  check(STR("answer is '0.031'"), STR("answer is '{:.2}'"), 0.03125);
+  check(STR("answer is '0.0312'"), STR("answer is '{:.3}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.4}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.5}'"), 0.03125);
+  check(STR("answer is '0.03125'"), STR("answer is '{:.10}'"), 0.03125);
+
+  // *** locale-specific form ***
+  // See locale-specific_form.pass.cpp
+}
+
+template <class F, class CharT, class TestFunction, class ExceptionTest>
+void format_test_floating_point(TestFunction check, ExceptionTest check_exception) {
+  format_test_floating_point_hex_lower_case<F, CharT>(check);
+  format_test_floating_point_hex_upper_case<F, CharT>(check);
+  format_test_floating_point_hex_lower_case_precision<F, CharT>(check);
+  format_test_floating_point_hex_upper_case_precision<F, CharT>(check);
+
+  format_test_floating_point_scientific_lower_case<F, CharT>(check);
+  format_test_floating_point_scientific_upper_case<F, CharT>(check);
+
+  format_test_floating_point_fixed_lower_case<F, CharT>(check);
+  format_test_floating_point_fixed_upper_case<F, CharT>(check);
+
+  format_test_floating_point_general_lower_case<F, CharT>(check);
+  format_test_floating_point_general_upper_case<F, CharT>(check);
+
+  format_test_floating_point_default<F, CharT>(check);
+  format_test_floating_point_default_precision<F, CharT>(check);
+
+  // *** type ***
+  for (const auto& fmt : invalid_types<CharT>("aAeEfFgG"))
+    check_exception("The format-spec type has a type not supported for a floating-point argument", fmt, F(1));
+}
+
+template <class CharT, class TestFunction, class ExceptionTest>
+void format_test_floating_point(TestFunction check, ExceptionTest check_exception) {
+  format_test_floating_point<float, CharT>(check, check_exception);
+  format_test_floating_point<double, CharT>(check, check_exception);
+  format_test_floating_point<long double, CharT>(check, check_exception);
+}
+
 template <class CharT, class TestFunction, class ExceptionTest>
 void format_tests(TestFunction check, ExceptionTest check_exception) {
   // *** Test escaping  ***
@@ -1115,12 +2607,10 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
   format_test_unsigned_integer<CharT>(check, check_exception);
 
   // *** Test floating point format argument ***
-// TODO FMT Enable after floating-point support has been enabled
-#if 0
-  check(STR("hello 42.000000"), STR("hello {}"), static_cast<float>(42));
-  check(STR("hello 42.000000"), STR("hello {}"), static_cast<double>(42));
-  check(STR("hello 42.000000"), STR("hello {}"), static_cast<long double>(42));
-#endif
+  check(STR("hello 42"), STR("hello {}"), static_cast<float>(42));
+  check(STR("hello 42"), STR("hello {}"), static_cast<double>(42));
+  check(STR("hello 42"), STR("hello {}"), static_cast<long double>(42));
+  format_test_floating_point<CharT>(check, check_exception);
 }
 
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
diff --git a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
index 7fe6907f2f4bd..5d86e46be7e6b 100644
--- a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
@@ -110,6 +110,7 @@ struct numpunct<char> : std::numpunct<char> {
 
   std::string do_grouping() const override { return "\1\2\3\2\1"; };
   char do_thousands_sep() const override { return '_'; }
+  char do_decimal_point() const override { return '#'; }
 };
 
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS
@@ -120,6 +121,7 @@ struct numpunct<wchar_t> : std::numpunct<wchar_t> {
 
   std::string do_grouping() const override { return "\1\2\3\2\1"; };
   wchar_t do_thousands_sep() const override { return L'_'; }
+  wchar_t do_decimal_point() const override { return L'#'; }
 };
 #endif
 
@@ -607,10 +609,1725 @@ void test_integer() {
   test(STR("-0X004_A"), loc, STR("{:#08LX}"), -0x4a);
 }
 
+template <class F, class CharT>
+void test_floating_point_hex_lower_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct<CharT>());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.23456p-3"), STR("{:La}"), F(0x1.23456p-3));
+  test(STR("1.23456p-2"), STR("{:La}"), F(0x1.23456p-2));
+  test(STR("1.23456p-1"), STR("{:La}"), F(0x1.23456p-1));
+  test(STR("1.23456p+0"), STR("{:La}"), F(0x1.23456p0));
+  test(STR("1.23456p+1"), STR("{:La}"), F(0x1.23456p+1));
+  test(STR("1.23456p+2"), STR("{:La}"), F(0x1.23456p+2));
+  test(STR("1.23456p+3"), STR("{:La}"), F(0x1.23456p+3));
+  test(STR("1.23456p+20"), STR("{:La}"), F(0x1.23456p+20));
+
+  std::locale::global(loc);
+  test(STR("1#23456p-3"), STR("{:La}"), F(0x1.23456p-3));
+  test(STR("1#23456p-2"), STR("{:La}"), F(0x1.23456p-2));
+  test(STR("1#23456p-1"), STR("{:La}"), F(0x1.23456p-1));
+  test(STR("1#23456p+0"), STR("{:La}"), F(0x1.23456p0));
+  test(STR("1#23456p+1"), STR("{:La}"), F(0x1.23456p+1));
+  test(STR("1#23456p+2"), STR("{:La}"), F(0x1.23456p+2));
+  test(STR("1#23456p+3"), STR("{:La}"), F(0x1.23456p+3));
+  test(STR("1#23456p+20"), STR("{:La}"), F(0x1.23456p+20));
+
+  test(STR("1.23456p-3"), en_US, STR("{:La}"), F(0x1.23456p-3));
+  test(STR("1.23456p-2"), en_US, STR("{:La}"), F(0x1.23456p-2));
+  test(STR("1.23456p-1"), en_US, STR("{:La}"), F(0x1.23456p-1));
+  test(STR("1.23456p+0"), en_US, STR("{:La}"), F(0x1.23456p0));
+  test(STR("1.23456p+1"), en_US, STR("{:La}"), F(0x1.23456p+1));
+  test(STR("1.23456p+2"), en_US, STR("{:La}"), F(0x1.23456p+2));
+  test(STR("1.23456p+3"), en_US, STR("{:La}"), F(0x1.23456p+3));
+  test(STR("1.23456p+20"), en_US, STR("{:La}"), F(0x1.23456p+20));
+
+  std::locale::global(en_US);
+  test(STR("1#23456p-3"), loc, STR("{:La}"), F(0x1.23456p-3));
+  test(STR("1#23456p-2"), loc, STR("{:La}"), F(0x1.23456p-2));
+  test(STR("1#23456p-1"), loc, STR("{:La}"), F(0x1.23456p-1));
+  test(STR("1#23456p+0"), loc, STR("{:La}"), F(0x1.23456p0));
+  test(STR("1#23456p+1"), loc, STR("{:La}"), F(0x1.23456p+1));
+  test(STR("1#23456p+2"), loc, STR("{:La}"), F(0x1.23456p+2));
+  test(STR("1#23456p+3"), loc, STR("{:La}"), F(0x1.23456p+3));
+  test(STR("1#23456p+20"), loc, STR("{:La}"), F(0x1.23456p+20));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1.23456p+3$$$"), STR("{:$<13La}"), F(0x1.23456p3));
+  test(STR("$$$1.23456p+3"), STR("{:$>13La}"), F(0x1.23456p3));
+  test(STR("$1.23456p+3$$"), STR("{:$^13La}"), F(0x1.23456p3));
+  test(STR("0001.23456p+3"), STR("{:013La}"), F(0x1.23456p3));
+  test(STR("-1.23456p+3$$$"), STR("{:$<14La}"), F(-0x1.23456p3));
+  test(STR("$$$-1.23456p+3"), STR("{:$>14La}"), F(-0x1.23456p3));
+  test(STR("$-1.23456p+3$$"), STR("{:$^14La}"), F(-0x1.23456p3));
+  test(STR("-0001.23456p+3"), STR("{:014La}"), F(-0x1.23456p3));
+
+  std::locale::global(loc);
+  test(STR("1#23456p+3$$$"), STR("{:$<13La}"), F(0x1.23456p3));
+  test(STR("$$$1#23456p+3"), STR("{:$>13La}"), F(0x1.23456p3));
+  test(STR("$1#23456p+3$$"), STR("{:$^13La}"), F(0x1.23456p3));
+  test(STR("0001#23456p+3"), STR("{:013La}"), F(0x1.23456p3));
+  test(STR("-1#23456p+3$$$"), STR("{:$<14La}"), F(-0x1.23456p3));
+  test(STR("$$$-1#23456p+3"), STR("{:$>14La}"), F(-0x1.23456p3));
+  test(STR("$-1#23456p+3$$"), STR("{:$^14La}"), F(-0x1.23456p3));
+  test(STR("-0001#23456p+3"), STR("{:014La}"), F(-0x1.23456p3));
+
+  test(STR("1.23456p+3$$$"), en_US, STR("{:$<13La}"), F(0x1.23456p3));
+  test(STR("$$$1.23456p+3"), en_US, STR("{:$>13La}"), F(0x1.23456p3));
+  test(STR("$1.23456p+3$$"), en_US, STR("{:$^13La}"), F(0x1.23456p3));
+  test(STR("0001.23456p+3"), en_US, STR("{:013La}"), F(0x1.23456p3));
+  test(STR("-1.23456p+3$$$"), en_US, STR("{:$<14La}"), F(-0x1.23456p3));
+  test(STR("$$$-1.23456p+3"), en_US, STR("{:$>14La}"), F(-0x1.23456p3));
+  test(STR("$-1.23456p+3$$"), en_US, STR("{:$^14La}"), F(-0x1.23456p3));
+  test(STR("-0001.23456p+3"), en_US, STR("{:014La}"), F(-0x1.23456p3));
+
+  std::locale::global(en_US);
+  test(STR("1#23456p+3$$$"), loc, STR("{:$<13La}"), F(0x1.23456p3));
+  test(STR("$$$1#23456p+3"), loc, STR("{:$>13La}"), F(0x1.23456p3));
+  test(STR("$1#23456p+3$$"), loc, STR("{:$^13La}"), F(0x1.23456p3));
+  test(STR("0001#23456p+3"), loc, STR("{:013La}"), F(0x1.23456p3));
+  test(STR("-1#23456p+3$$$"), loc, STR("{:$<14La}"), F(-0x1.23456p3));
+  test(STR("$$$-1#23456p+3"), loc, STR("{:$>14La}"), F(-0x1.23456p3));
+  test(STR("$-1#23456p+3$$"), loc, STR("{:$^14La}"), F(-0x1.23456p3));
+  test(STR("-0001#23456p+3"), loc, STR("{:014La}"), F(-0x1.23456p3));
+}
+
+template <class F, class CharT>
+void test_floating_point_hex_upper_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct<CharT>());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.23456P-3"), STR("{:LA}"), F(0x1.23456p-3));
+  test(STR("1.23456P-2"), STR("{:LA}"), F(0x1.23456p-2));
+  test(STR("1.23456P-1"), STR("{:LA}"), F(0x1.23456p-1));
+  test(STR("1.23456P+0"), STR("{:LA}"), F(0x1.23456p0));
+  test(STR("1.23456P+1"), STR("{:LA}"), F(0x1.23456p+1));
+  test(STR("1.23456P+2"), STR("{:LA}"), F(0x1.23456p+2));
+  test(STR("1.23456P+3"), STR("{:LA}"), F(0x1.23456p+3));
+  test(STR("1.23456P+20"), STR("{:LA}"), F(0x1.23456p+20));
+
+  std::locale::global(loc);
+  test(STR("1#23456P-3"), STR("{:LA}"), F(0x1.23456p-3));
+  test(STR("1#23456P-2"), STR("{:LA}"), F(0x1.23456p-2));
+  test(STR("1#23456P-1"), STR("{:LA}"), F(0x1.23456p-1));
+  test(STR("1#23456P+0"), STR("{:LA}"), F(0x1.23456p0));
+  test(STR("1#23456P+1"), STR("{:LA}"), F(0x1.23456p+1));
+  test(STR("1#23456P+2"), STR("{:LA}"), F(0x1.23456p+2));
+  test(STR("1#23456P+3"), STR("{:LA}"), F(0x1.23456p+3));
+  test(STR("1#23456P+20"), STR("{:LA}"), F(0x1.23456p+20));
+
+  test(STR("1.23456P-3"), en_US, STR("{:LA}"), F(0x1.23456p-3));
+  test(STR("1.23456P-2"), en_US, STR("{:LA}"), F(0x1.23456p-2));
+  test(STR("1.23456P-1"), en_US, STR("{:LA}"), F(0x1.23456p-1));
+  test(STR("1.23456P+0"), en_US, STR("{:LA}"), F(0x1.23456p0));
+  test(STR("1.23456P+1"), en_US, STR("{:LA}"), F(0x1.23456p+1));
+  test(STR("1.23456P+2"), en_US, STR("{:LA}"), F(0x1.23456p+2));
+  test(STR("1.23456P+3"), en_US, STR("{:LA}"), F(0x1.23456p+3));
+  test(STR("1.23456P+20"), en_US, STR("{:LA}"), F(0x1.23456p+20));
+
+  std::locale::global(en_US);
+  test(STR("1#23456P-3"), loc, STR("{:LA}"), F(0x1.23456p-3));
+  test(STR("1#23456P-2"), loc, STR("{:LA}"), F(0x1.23456p-2));
+  test(STR("1#23456P-1"), loc, STR("{:LA}"), F(0x1.23456p-1));
+  test(STR("1#23456P+0"), loc, STR("{:LA}"), F(0x1.23456p0));
+  test(STR("1#23456P+1"), loc, STR("{:LA}"), F(0x1.23456p+1));
+  test(STR("1#23456P+2"), loc, STR("{:LA}"), F(0x1.23456p+2));
+  test(STR("1#23456P+3"), loc, STR("{:LA}"), F(0x1.23456p+3));
+  test(STR("1#23456P+20"), loc, STR("{:LA}"), F(0x1.23456p+20));
+
+  // *** Fill, align, zero Padding ***
+  std::locale::global(en_US);
+  test(STR("1.23456P+3$$$"), STR("{:$<13LA}"), F(0x1.23456p3));
+  test(STR("$$$1.23456P+3"), STR("{:$>13LA}"), F(0x1.23456p3));
+  test(STR("$1.23456P+3$$"), STR("{:$^13LA}"), F(0x1.23456p3));
+  test(STR("0001.23456P+3"), STR("{:013LA}"), F(0x1.23456p3));
+  test(STR("-1.23456P+3$$$"), STR("{:$<14LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1.23456P+3"), STR("{:$>14LA}"), F(-0x1.23456p3));
+  test(STR("$-1.23456P+3$$"), STR("{:$^14LA}"), F(-0x1.23456p3));
+  test(STR("-0001.23456P+3"), STR("{:014LA}"), F(-0x1.23456p3));
+
+  std::locale::global(loc);
+  test(STR("1#23456P+3$$$"), STR("{:$<13LA}"), F(0x1.23456p3));
+  test(STR("$$$1#23456P+3"), STR("{:$>13LA}"), F(0x1.23456p3));
+  test(STR("$1#23456P+3$$"), STR("{:$^13LA}"), F(0x1.23456p3));
+  test(STR("0001#23456P+3"), STR("{:013LA}"), F(0x1.23456p3));
+  test(STR("-1#23456P+3$$$"), STR("{:$<14LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1#23456P+3"), STR("{:$>14LA}"), F(-0x1.23456p3));
+  test(STR("$-1#23456P+3$$"), STR("{:$^14LA}"), F(-0x1.23456p3));
+  test(STR("-0001#23456P+3"), STR("{:014LA}"), F(-0x1.23456p3));
+
+  test(STR("1.23456P+3$$$"), en_US, STR("{:$<13LA}"), F(0x1.23456p3));
+  test(STR("$$$1.23456P+3"), en_US, STR("{:$>13LA}"), F(0x1.23456p3));
+  test(STR("$1.23456P+3$$"), en_US, STR("{:$^13LA}"), F(0x1.23456p3));
+  test(STR("0001.23456P+3"), en_US, STR("{:013LA}"), F(0x1.23456p3));
+  test(STR("-1.23456P+3$$$"), en_US, STR("{:$<14LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1.23456P+3"), en_US, STR("{:$>14LA}"), F(-0x1.23456p3));
+  test(STR("$-1.23456P+3$$"), en_US, STR("{:$^14LA}"), F(-0x1.23456p3));
+  test(STR("-0001.23456P+3"), en_US, STR("{:014LA}"), F(-0x1.23456p3));
+
+  std::locale::global(en_US);
+  test(STR("1#23456P+3$$$"), loc, STR("{:$<13LA}"), F(0x1.23456p3));
+  test(STR("$$$1#23456P+3"), loc, STR("{:$>13LA}"), F(0x1.23456p3));
+  test(STR("$1#23456P+3$$"), loc, STR("{:$^13LA}"), F(0x1.23456p3));
+  test(STR("0001#23456P+3"), loc, STR("{:013LA}"), F(0x1.23456p3));
+  test(STR("-1#23456P+3$$$"), loc, STR("{:$<14LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1#23456P+3"), loc, STR("{:$>14LA}"), F(-0x1.23456p3));
+  test(STR("$-1#23456P+3$$"), loc, STR("{:$^14LA}"), F(-0x1.23456p3));
+  test(STR("-0001#23456P+3"), loc, STR("{:014LA}"), F(-0x1.23456p3));
+}
+
+template <class F, class CharT>
+void test_floating_point_hex_lower_case_precision() {
+  std::locale loc = std::locale(std::locale(), new numpunct<CharT>());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.234560p-3"), STR("{:.6La}"), F(0x1.23456p-3));
+  test(STR("1.234560p-2"), STR("{:.6La}"), F(0x1.23456p-2));
+  test(STR("1.234560p-1"), STR("{:.6La}"), F(0x1.23456p-1));
+  test(STR("1.234560p+0"), STR("{:.6La}"), F(0x1.23456p0));
+  test(STR("1.234560p+1"), STR("{:.6La}"), F(0x1.23456p+1));
+  test(STR("1.234560p+2"), STR("{:.6La}"), F(0x1.23456p+2));
+  test(STR("1.234560p+3"), STR("{:.6La}"), F(0x1.23456p+3));
+  test(STR("1.234560p+20"), STR("{:.6La}"), F(0x1.23456p+20));
+
+  std::locale::global(loc);
+  test(STR("1#234560p-3"), STR("{:.6La}"), F(0x1.23456p-3));
+  test(STR("1#234560p-2"), STR("{:.6La}"), F(0x1.23456p-2));
+  test(STR("1#234560p-1"), STR("{:.6La}"), F(0x1.23456p-1));
+  test(STR("1#234560p+0"), STR("{:.6La}"), F(0x1.23456p0));
+  test(STR("1#234560p+1"), STR("{:.6La}"), F(0x1.23456p+1));
+  test(STR("1#234560p+2"), STR("{:.6La}"), F(0x1.23456p+2));
+  test(STR("1#234560p+3"), STR("{:.6La}"), F(0x1.23456p+3));
+  test(STR("1#234560p+20"), STR("{:.6La}"), F(0x1.23456p+20));
+
+  test(STR("1.234560p-3"), en_US, STR("{:.6La}"), F(0x1.23456p-3));
+  test(STR("1.234560p-2"), en_US, STR("{:.6La}"), F(0x1.23456p-2));
+  test(STR("1.234560p-1"), en_US, STR("{:.6La}"), F(0x1.23456p-1));
+  test(STR("1.234560p+0"), en_US, STR("{:.6La}"), F(0x1.23456p0));
+  test(STR("1.234560p+1"), en_US, STR("{:.6La}"), F(0x1.23456p+1));
+  test(STR("1.234560p+2"), en_US, STR("{:.6La}"), F(0x1.23456p+2));
+  test(STR("1.234560p+3"), en_US, STR("{:.6La}"), F(0x1.23456p+3));
+  test(STR("1.234560p+20"), en_US, STR("{:.6La}"), F(0x1.23456p+20));
+
+  std::locale::global(en_US);
+  test(STR("1#234560p-3"), loc, STR("{:.6La}"), F(0x1.23456p-3));
+  test(STR("1#234560p-2"), loc, STR("{:.6La}"), F(0x1.23456p-2));
+  test(STR("1#234560p-1"), loc, STR("{:.6La}"), F(0x1.23456p-1));
+  test(STR("1#234560p+0"), loc, STR("{:.6La}"), F(0x1.23456p0));
+  test(STR("1#234560p+1"), loc, STR("{:.6La}"), F(0x1.23456p+1));
+  test(STR("1#234560p+2"), loc, STR("{:.6La}"), F(0x1.23456p+2));
+  test(STR("1#234560p+3"), loc, STR("{:.6La}"), F(0x1.23456p+3));
+  test(STR("1#234560p+20"), loc, STR("{:.6La}"), F(0x1.23456p+20));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1.234560p+3$$$"), STR("{:$<14.6La}"), F(0x1.23456p3));
+  test(STR("$$$1.234560p+3"), STR("{:$>14.6La}"), F(0x1.23456p3));
+  test(STR("$1.234560p+3$$"), STR("{:$^14.6La}"), F(0x1.23456p3));
+  test(STR("0001.234560p+3"), STR("{:014.6La}"), F(0x1.23456p3));
+  test(STR("-1.234560p+3$$$"), STR("{:$<15.6La}"), F(-0x1.23456p3));
+  test(STR("$$$-1.234560p+3"), STR("{:$>15.6La}"), F(-0x1.23456p3));
+  test(STR("$-1.234560p+3$$"), STR("{:$^15.6La}"), F(-0x1.23456p3));
+  test(STR("-0001.234560p+3"), STR("{:015.6La}"), F(-0x1.23456p3));
+
+  std::locale::global(loc);
+  test(STR("1#234560p+3$$$"), STR("{:$<14.6La}"), F(0x1.23456p3));
+  test(STR("$$$1#234560p+3"), STR("{:$>14.6La}"), F(0x1.23456p3));
+  test(STR("$1#234560p+3$$"), STR("{:$^14.6La}"), F(0x1.23456p3));
+  test(STR("0001#234560p+3"), STR("{:014.6La}"), F(0x1.23456p3));
+  test(STR("-1#234560p+3$$$"), STR("{:$<15.6La}"), F(-0x1.23456p3));
+  test(STR("$$$-1#234560p+3"), STR("{:$>15.6La}"), F(-0x1.23456p3));
+  test(STR("$-1#234560p+3$$"), STR("{:$^15.6La}"), F(-0x1.23456p3));
+  test(STR("-0001#234560p+3"), STR("{:015.6La}"), F(-0x1.23456p3));
+
+  test(STR("1.234560p+3$$$"), en_US, STR("{:$<14.6La}"), F(0x1.23456p3));
+  test(STR("$$$1.234560p+3"), en_US, STR("{:$>14.6La}"), F(0x1.23456p3));
+  test(STR("$1.234560p+3$$"), en_US, STR("{:$^14.6La}"), F(0x1.23456p3));
+  test(STR("0001.234560p+3"), en_US, STR("{:014.6La}"), F(0x1.23456p3));
+  test(STR("-1.234560p+3$$$"), en_US, STR("{:$<15.6La}"), F(-0x1.23456p3));
+  test(STR("$$$-1.234560p+3"), en_US, STR("{:$>15.6La}"), F(-0x1.23456p3));
+  test(STR("$-1.234560p+3$$"), en_US, STR("{:$^15.6La}"), F(-0x1.23456p3));
+  test(STR("-0001.234560p+3"), en_US, STR("{:015.6La}"), F(-0x1.23456p3));
+
+  std::locale::global(en_US);
+  test(STR("1#234560p+3$$$"), loc, STR("{:$<14.6La}"), F(0x1.23456p3));
+  test(STR("$$$1#234560p+3"), loc, STR("{:$>14.6La}"), F(0x1.23456p3));
+  test(STR("$1#234560p+3$$"), loc, STR("{:$^14.6La}"), F(0x1.23456p3));
+  test(STR("0001#234560p+3"), loc, STR("{:014.6La}"), F(0x1.23456p3));
+  test(STR("-1#234560p+3$$$"), loc, STR("{:$<15.6La}"), F(-0x1.23456p3));
+  test(STR("$$$-1#234560p+3"), loc, STR("{:$>15.6La}"), F(-0x1.23456p3));
+  test(STR("$-1#234560p+3$$"), loc, STR("{:$^15.6La}"), F(-0x1.23456p3));
+  test(STR("-0001#234560p+3"), loc, STR("{:015.6La}"), F(-0x1.23456p3));
+}
+
+template <class F, class CharT>
+void test_floating_point_hex_upper_case_precision() {
+  std::locale loc = std::locale(std::locale(), new numpunct<CharT>());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.234560P-3"), STR("{:.6LA}"), F(0x1.23456p-3));
+  test(STR("1.234560P-2"), STR("{:.6LA}"), F(0x1.23456p-2));
+  test(STR("1.234560P-1"), STR("{:.6LA}"), F(0x1.23456p-1));
+  test(STR("1.234560P+0"), STR("{:.6LA}"), F(0x1.23456p0));
+  test(STR("1.234560P+1"), STR("{:.6LA}"), F(0x1.23456p+1));
+  test(STR("1.234560P+2"), STR("{:.6LA}"), F(0x1.23456p+2));
+  test(STR("1.234560P+3"), STR("{:.6LA}"), F(0x1.23456p+3));
+  test(STR("1.234560P+20"), STR("{:.6LA}"), F(0x1.23456p+20));
+
+  std::locale::global(loc);
+  test(STR("1#234560P-3"), STR("{:.6LA}"), F(0x1.23456p-3));
+  test(STR("1#234560P-2"), STR("{:.6LA}"), F(0x1.23456p-2));
+  test(STR("1#234560P-1"), STR("{:.6LA}"), F(0x1.23456p-1));
+  test(STR("1#234560P+0"), STR("{:.6LA}"), F(0x1.23456p0));
+  test(STR("1#234560P+1"), STR("{:.6LA}"), F(0x1.23456p+1));
+  test(STR("1#234560P+2"), STR("{:.6LA}"), F(0x1.23456p+2));
+  test(STR("1#234560P+3"), STR("{:.6LA}"), F(0x1.23456p+3));
+  test(STR("1#234560P+20"), STR("{:.6LA}"), F(0x1.23456p+20));
+
+  test(STR("1.234560P-3"), en_US, STR("{:.6LA}"), F(0x1.23456p-3));
+  test(STR("1.234560P-2"), en_US, STR("{:.6LA}"), F(0x1.23456p-2));
+  test(STR("1.234560P-1"), en_US, STR("{:.6LA}"), F(0x1.23456p-1));
+  test(STR("1.234560P+0"), en_US, STR("{:.6LA}"), F(0x1.23456p0));
+  test(STR("1.234560P+1"), en_US, STR("{:.6LA}"), F(0x1.23456p+1));
+  test(STR("1.234560P+2"), en_US, STR("{:.6LA}"), F(0x1.23456p+2));
+  test(STR("1.234560P+3"), en_US, STR("{:.6LA}"), F(0x1.23456p+3));
+  test(STR("1.234560P+20"), en_US, STR("{:.6LA}"), F(0x1.23456p+20));
+
+  std::locale::global(en_US);
+  test(STR("1#234560P-3"), loc, STR("{:.6LA}"), F(0x1.23456p-3));
+  test(STR("1#234560P-2"), loc, STR("{:.6LA}"), F(0x1.23456p-2));
+  test(STR("1#234560P-1"), loc, STR("{:.6LA}"), F(0x1.23456p-1));
+  test(STR("1#234560P+0"), loc, STR("{:.6LA}"), F(0x1.23456p0));
+  test(STR("1#234560P+1"), loc, STR("{:.6LA}"), F(0x1.23456p+1));
+  test(STR("1#234560P+2"), loc, STR("{:.6LA}"), F(0x1.23456p+2));
+  test(STR("1#234560P+3"), loc, STR("{:.6LA}"), F(0x1.23456p+3));
+  test(STR("1#234560P+20"), loc, STR("{:.6LA}"), F(0x1.23456p+20));
+
+  // *** Fill, align, zero Padding ***
+  std::locale::global(en_US);
+  test(STR("1.234560P+3$$$"), STR("{:$<14.6LA}"), F(0x1.23456p3));
+  test(STR("$$$1.234560P+3"), STR("{:$>14.6LA}"), F(0x1.23456p3));
+  test(STR("$1.234560P+3$$"), STR("{:$^14.6LA}"), F(0x1.23456p3));
+  test(STR("0001.234560P+3"), STR("{:014.6LA}"), F(0x1.23456p3));
+  test(STR("-1.234560P+3$$$"), STR("{:$<15.6LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1.234560P+3"), STR("{:$>15.6LA}"), F(-0x1.23456p3));
+  test(STR("$-1.234560P+3$$"), STR("{:$^15.6LA}"), F(-0x1.23456p3));
+  test(STR("-0001.234560P+3"), STR("{:015.6LA}"), F(-0x1.23456p3));
+
+  std::locale::global(loc);
+  test(STR("1#234560P+3$$$"), STR("{:$<14.6LA}"), F(0x1.23456p3));
+  test(STR("$$$1#234560P+3"), STR("{:$>14.6LA}"), F(0x1.23456p3));
+  test(STR("$1#234560P+3$$"), STR("{:$^14.6LA}"), F(0x1.23456p3));
+  test(STR("0001#234560P+3"), STR("{:014.6LA}"), F(0x1.23456p3));
+  test(STR("-1#234560P+3$$$"), STR("{:$<15.6LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1#234560P+3"), STR("{:$>15.6LA}"), F(-0x1.23456p3));
+  test(STR("$-1#234560P+3$$"), STR("{:$^15.6LA}"), F(-0x1.23456p3));
+  test(STR("-0001#234560P+3"), STR("{:015.6LA}"), F(-0x1.23456p3));
+
+  test(STR("1.234560P+3$$$"), en_US, STR("{:$<14.6LA}"), F(0x1.23456p3));
+  test(STR("$$$1.234560P+3"), en_US, STR("{:$>14.6LA}"), F(0x1.23456p3));
+  test(STR("$1.234560P+3$$"), en_US, STR("{:$^14.6LA}"), F(0x1.23456p3));
+  test(STR("0001.234560P+3"), en_US, STR("{:014.6LA}"), F(0x1.23456p3));
+  test(STR("-1.234560P+3$$$"), en_US, STR("{:$<15.6LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1.234560P+3"), en_US, STR("{:$>15.6LA}"), F(-0x1.23456p3));
+  test(STR("$-1.234560P+3$$"), en_US, STR("{:$^15.6LA}"), F(-0x1.23456p3));
+  test(STR("-0001.234560P+3"), en_US, STR("{:015.6LA}"), F(-0x1.23456p3));
+
+  std::locale::global(en_US);
+  test(STR("1#234560P+3$$$"), loc, STR("{:$<14.6LA}"), F(0x1.23456p3));
+  test(STR("$$$1#234560P+3"), loc, STR("{:$>14.6LA}"), F(0x1.23456p3));
+  test(STR("$1#234560P+3$$"), loc, STR("{:$^14.6LA}"), F(0x1.23456p3));
+  test(STR("0001#234560P+3"), loc, STR("{:014.6LA}"), F(0x1.23456p3));
+  test(STR("-1#234560P+3$$$"), loc, STR("{:$<15.6LA}"), F(-0x1.23456p3));
+  test(STR("$$$-1#234560P+3"), loc, STR("{:$>15.6LA}"), F(-0x1.23456p3));
+  test(STR("$-1#234560P+3$$"), loc, STR("{:$^15.6LA}"), F(-0x1.23456p3));
+  test(STR("-0001#234560P+3"), loc, STR("{:015.6LA}"), F(-0x1.23456p3));
+}
+
+template <class F, class CharT>
+void test_floating_point_scientific_lower_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct<CharT>());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.234567e-03"), STR("{:.6Le}"), F(1.234567e-3));
+  test(STR("1.234567e-02"), STR("{:.6Le}"), F(1.234567e-2));
+  test(STR("1.234567e-01"), STR("{:.6Le}"), F(1.234567e-1));
+  test(STR("1.234567e+00"), STR("{:.6Le}"), F(1.234567e0));
+  test(STR("1.234567e+01"), STR("{:.6Le}"), F(1.234567e1));
+  test(STR("1.234567e+02"), STR("{:.6Le}"), F(1.234567e2));
+  test(STR("1.234567e+03"), STR("{:.6Le}"), F(1.234567e3));
+  test(STR("1.234567e+20"), STR("{:.6Le}"), F(1.234567e20));
+  test(STR("-1.234567e-03"), STR("{:.6Le}"), F(-1.234567e-3));
+  test(STR("-1.234567e-02"), STR("{:.6Le}"), F(-1.234567e-2));
+  test(STR("-1.234567e-01"), STR("{:.6Le}"), F(-1.234567e-1));
+  test(STR("-1.234567e+00"), STR("{:.6Le}"), F(-1.234567e0));
+  test(STR("-1.234567e+01"), STR("{:.6Le}"), F(-1.234567e1));
+  test(STR("-1.234567e+02"), STR("{:.6Le}"), F(-1.234567e2));
+  test(STR("-1.234567e+03"), STR("{:.6Le}"), F(-1.234567e3));
+  test(STR("-1.234567e+20"), STR("{:.6Le}"), F(-1.234567e20));
+
+  std::locale::global(loc);
+  test(STR("1#234567e-03"), STR("{:.6Le}"), F(1.234567e-3));
+  test(STR("1#234567e-02"), STR("{:.6Le}"), F(1.234567e-2));
+  test(STR("1#234567e-01"), STR("{:.6Le}"), F(1.234567e-1));
+  test(STR("1#234567e+00"), STR("{:.6Le}"), F(1.234567e0));
+  test(STR("1#234567e+01"), STR("{:.6Le}"), F(1.234567e1));
+  test(STR("1#234567e+02"), STR("{:.6Le}"), F(1.234567e2));
+  test(STR("1#234567e+03"), STR("{:.6Le}"), F(1.234567e3));
+  test(STR("1#234567e+20"), STR("{:.6Le}"), F(1.234567e20));
+  test(STR("-1#234567e-03"), STR("{:.6Le}"), F(-1.234567e-3));
+  test(STR("-1#234567e-02"), STR("{:.6Le}"), F(-1.234567e-2));
+  test(STR("-1#234567e-01"), STR("{:.6Le}"), F(-1.234567e-1));
+  test(STR("-1#234567e+00"), STR("{:.6Le}"), F(-1.234567e0));
+  test(STR("-1#234567e+01"), STR("{:.6Le}"), F(-1.234567e1));
+  test(STR("-1#234567e+02"), STR("{:.6Le}"), F(-1.234567e2));
+  test(STR("-1#234567e+03"), STR("{:.6Le}"), F(-1.234567e3));
+  test(STR("-1#234567e+20"), STR("{:.6Le}"), F(-1.234567e20));
+
+  test(STR("1.234567e-03"), en_US, STR("{:.6Le}"), F(1.234567e-3));
+  test(STR("1.234567e-02"), en_US, STR("{:.6Le}"), F(1.234567e-2));
+  test(STR("1.234567e-01"), en_US, STR("{:.6Le}"), F(1.234567e-1));
+  test(STR("1.234567e+00"), en_US, STR("{:.6Le}"), F(1.234567e0));
+  test(STR("1.234567e+01"), en_US, STR("{:.6Le}"), F(1.234567e1));
+  test(STR("1.234567e+02"), en_US, STR("{:.6Le}"), F(1.234567e2));
+  test(STR("1.234567e+03"), en_US, STR("{:.6Le}"), F(1.234567e3));
+  test(STR("1.234567e+20"), en_US, STR("{:.6Le}"), F(1.234567e20));
+  test(STR("-1.234567e-03"), en_US, STR("{:.6Le}"), F(-1.234567e-3));
+  test(STR("-1.234567e-02"), en_US, STR("{:.6Le}"), F(-1.234567e-2));
+  test(STR("-1.234567e-01"), en_US, STR("{:.6Le}"), F(-1.234567e-1));
+  test(STR("-1.234567e+00"), en_US, STR("{:.6Le}"), F(-1.234567e0));
+  test(STR("-1.234567e+01"), en_US, STR("{:.6Le}"), F(-1.234567e1));
+  test(STR("-1.234567e+02"), en_US, STR("{:.6Le}"), F(-1.234567e2));
+  test(STR("-1.234567e+03"), en_US, STR("{:.6Le}"), F(-1.234567e3));
+  test(STR("-1.234567e+20"), en_US, STR("{:.6Le}"), F(-1.234567e20));
+
+  std::locale::global(en_US);
+  test(STR("1#234567e-03"), loc, STR("{:.6Le}"), F(1.234567e-3));
+  test(STR("1#234567e-02"), loc, STR("{:.6Le}"), F(1.234567e-2));
+  test(STR("1#234567e-01"), loc, STR("{:.6Le}"), F(1.234567e-1));
+  test(STR("1#234567e+00"), loc, STR("{:.6Le}"), F(1.234567e0));
+  test(STR("1#234567e+01"), loc, STR("{:.6Le}"), F(1.234567e1));
+  test(STR("1#234567e+02"), loc, STR("{:.6Le}"), F(1.234567e2));
+  test(STR("1#234567e+03"), loc, STR("{:.6Le}"), F(1.234567e3));
+  test(STR("1#234567e+20"), loc, STR("{:.6Le}"), F(1.234567e20));
+  test(STR("-1#234567e-03"), loc, STR("{:.6Le}"), F(-1.234567e-3));
+  test(STR("-1#234567e-02"), loc, STR("{:.6Le}"), F(-1.234567e-2));
+  test(STR("-1#234567e-01"), loc, STR("{:.6Le}"), F(-1.234567e-1));
+  test(STR("-1#234567e+00"), loc, STR("{:.6Le}"), F(-1.234567e0));
+  test(STR("-1#234567e+01"), loc, STR("{:.6Le}"), F(-1.234567e1));
+  test(STR("-1#234567e+02"), loc, STR("{:.6Le}"), F(-1.234567e2));
+  test(STR("-1#234567e+03"), loc, STR("{:.6Le}"), F(-1.234567e3));
+  test(STR("-1#234567e+20"), loc, STR("{:.6Le}"), F(-1.234567e20));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1.234567e+03$$$"), STR("{:$<15.6Le}"), F(1.234567e3));
+  test(STR("$$$1.234567e+03"), STR("{:$>15.6Le}"), F(1.234567e3));
+  test(STR("$1.234567e+03$$"), STR("{:$^15.6Le}"), F(1.234567e3));
+  test(STR("0001.234567e+03"), STR("{:015.6Le}"), F(1.234567e3));
+  test(STR("-1.234567e+03$$$"), STR("{:$<16.6Le}"), F(-1.234567e3));
+  test(STR("$$$-1.234567e+03"), STR("{:$>16.6Le}"), F(-1.234567e3));
+  test(STR("$-1.234567e+03$$"), STR("{:$^16.6Le}"), F(-1.234567e3));
+  test(STR("-0001.234567e+03"), STR("{:016.6Le}"), F(-1.234567e3));
+
+  std::locale::global(loc);
+  test(STR("1#234567e+03$$$"), STR("{:$<15.6Le}"), F(1.234567e3));
+  test(STR("$$$1#234567e+03"), STR("{:$>15.6Le}"), F(1.234567e3));
+  test(STR("$1#234567e+03$$"), STR("{:$^15.6Le}"), F(1.234567e3));
+  test(STR("0001#234567e+03"), STR("{:015.6Le}"), F(1.234567e3));
+  test(STR("-1#234567e+03$$$"), STR("{:$<16.6Le}"), F(-1.234567e3));
+  test(STR("$$$-1#234567e+03"), STR("{:$>16.6Le}"), F(-1.234567e3));
+  test(STR("$-1#234567e+03$$"), STR("{:$^16.6Le}"), F(-1.234567e3));
+  test(STR("-0001#234567e+03"), STR("{:016.6Le}"), F(-1.234567e3));
+
+  test(STR("1.234567e+03$$$"), en_US, STR("{:$<15.6Le}"), F(1.234567e3));
+  test(STR("$$$1.234567e+03"), en_US, STR("{:$>15.6Le}"), F(1.234567e3));
+  test(STR("$1.234567e+03$$"), en_US, STR("{:$^15.6Le}"), F(1.234567e3));
+  test(STR("0001.234567e+03"), en_US, STR("{:015.6Le}"), F(1.234567e3));
+  test(STR("-1.234567e+03$$$"), en_US, STR("{:$<16.6Le}"), F(-1.234567e3));
+  test(STR("$$$-1.234567e+03"), en_US, STR("{:$>16.6Le}"), F(-1.234567e3));
+  test(STR("$-1.234567e+03$$"), en_US, STR("{:$^16.6Le}"), F(-1.234567e3));
+  test(STR("-0001.234567e+03"), en_US, STR("{:016.6Le}"), F(-1.234567e3));
+
+  std::locale::global(en_US);
+  test(STR("1#234567e+03$$$"), loc, STR("{:$<15.6Le}"), F(1.234567e3));
+  test(STR("$$$1#234567e+03"), loc, STR("{:$>15.6Le}"), F(1.234567e3));
+  test(STR("$1#234567e+03$$"), loc, STR("{:$^15.6Le}"), F(1.234567e3));
+  test(STR("0001#234567e+03"), loc, STR("{:015.6Le}"), F(1.234567e3));
+  test(STR("-1#234567e+03$$$"), loc, STR("{:$<16.6Le}"), F(-1.234567e3));
+  test(STR("$$$-1#234567e+03"), loc, STR("{:$>16.6Le}"), F(-1.234567e3));
+  test(STR("$-1#234567e+03$$"), loc, STR("{:$^16.6Le}"), F(-1.234567e3));
+  test(STR("-0001#234567e+03"), loc, STR("{:016.6Le}"), F(-1.234567e3));
+}
+
+template <class F, class CharT>
+void test_floating_point_scientific_upper_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct<CharT>());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.234567E-03"), STR("{:.6LE}"), F(1.234567e-3));
+  test(STR("1.234567E-02"), STR("{:.6LE}"), F(1.234567e-2));
+  test(STR("1.234567E-01"), STR("{:.6LE}"), F(1.234567e-1));
+  test(STR("1.234567E+00"), STR("{:.6LE}"), F(1.234567e0));
+  test(STR("1.234567E+01"), STR("{:.6LE}"), F(1.234567e1));
+  test(STR("1.234567E+02"), STR("{:.6LE}"), F(1.234567e2));
+  test(STR("1.234567E+03"), STR("{:.6LE}"), F(1.234567e3));
+  test(STR("1.234567E+20"), STR("{:.6LE}"), F(1.234567e20));
+  test(STR("-1.234567E-03"), STR("{:.6LE}"), F(-1.234567e-3));
+  test(STR("-1.234567E-02"), STR("{:.6LE}"), F(-1.234567e-2));
+  test(STR("-1.234567E-01"), STR("{:.6LE}"), F(-1.234567e-1));
+  test(STR("-1.234567E+00"), STR("{:.6LE}"), F(-1.234567e0));
+  test(STR("-1.234567E+01"), STR("{:.6LE}"), F(-1.234567e1));
+  test(STR("-1.234567E+02"), STR("{:.6LE}"), F(-1.234567e2));
+  test(STR("-1.234567E+03"), STR("{:.6LE}"), F(-1.234567e3));
+  test(STR("-1.234567E+20"), STR("{:.6LE}"), F(-1.234567e20));
+
+  std::locale::global(loc);
+  test(STR("1#234567E-03"), STR("{:.6LE}"), F(1.234567e-3));
+  test(STR("1#234567E-02"), STR("{:.6LE}"), F(1.234567e-2));
+  test(STR("1#234567E-01"), STR("{:.6LE}"), F(1.234567e-1));
+  test(STR("1#234567E+00"), STR("{:.6LE}"), F(1.234567e0));
+  test(STR("1#234567E+01"), STR("{:.6LE}"), F(1.234567e1));
+  test(STR("1#234567E+02"), STR("{:.6LE}"), F(1.234567e2));
+  test(STR("1#234567E+03"), STR("{:.6LE}"), F(1.234567e3));
+  test(STR("1#234567E+20"), STR("{:.6LE}"), F(1.234567e20));
+  test(STR("-1#234567E-03"), STR("{:.6LE}"), F(-1.234567e-3));
+  test(STR("-1#234567E-02"), STR("{:.6LE}"), F(-1.234567e-2));
+  test(STR("-1#234567E-01"), STR("{:.6LE}"), F(-1.234567e-1));
+  test(STR("-1#234567E+00"), STR("{:.6LE}"), F(-1.234567e0));
+  test(STR("-1#234567E+01"), STR("{:.6LE}"), F(-1.234567e1));
+  test(STR("-1#234567E+02"), STR("{:.6LE}"), F(-1.234567e2));
+  test(STR("-1#234567E+03"), STR("{:.6LE}"), F(-1.234567e3));
+  test(STR("-1#234567E+20"), STR("{:.6LE}"), F(-1.234567e20));
+
+  test(STR("1.234567E-03"), en_US, STR("{:.6LE}"), F(1.234567e-3));
+  test(STR("1.234567E-02"), en_US, STR("{:.6LE}"), F(1.234567e-2));
+  test(STR("1.234567E-01"), en_US, STR("{:.6LE}"), F(1.234567e-1));
+  test(STR("1.234567E+00"), en_US, STR("{:.6LE}"), F(1.234567e0));
+  test(STR("1.234567E+01"), en_US, STR("{:.6LE}"), F(1.234567e1));
+  test(STR("1.234567E+02"), en_US, STR("{:.6LE}"), F(1.234567e2));
+  test(STR("1.234567E+03"), en_US, STR("{:.6LE}"), F(1.234567e3));
+  test(STR("1.234567E+20"), en_US, STR("{:.6LE}"), F(1.234567e20));
+  test(STR("-1.234567E-03"), en_US, STR("{:.6LE}"), F(-1.234567e-3));
+  test(STR("-1.234567E-02"), en_US, STR("{:.6LE}"), F(-1.234567e-2));
+  test(STR("-1.234567E-01"), en_US, STR("{:.6LE}"), F(-1.234567e-1));
+  test(STR("-1.234567E+00"), en_US, STR("{:.6LE}"), F(-1.234567e0));
+  test(STR("-1.234567E+01"), en_US, STR("{:.6LE}"), F(-1.234567e1));
+  test(STR("-1.234567E+02"), en_US, STR("{:.6LE}"), F(-1.234567e2));
+  test(STR("-1.234567E+03"), en_US, STR("{:.6LE}"), F(-1.234567e3));
+  test(STR("-1.234567E+20"), en_US, STR("{:.6LE}"), F(-1.234567e20));
+
+  std::locale::global(en_US);
+  test(STR("1#234567E-03"), loc, STR("{:.6LE}"), F(1.234567e-3));
+  test(STR("1#234567E-02"), loc, STR("{:.6LE}"), F(1.234567e-2));
+  test(STR("1#234567E-01"), loc, STR("{:.6LE}"), F(1.234567e-1));
+  test(STR("1#234567E+00"), loc, STR("{:.6LE}"), F(1.234567e0));
+  test(STR("1#234567E+01"), loc, STR("{:.6LE}"), F(1.234567e1));
+  test(STR("1#234567E+02"), loc, STR("{:.6LE}"), F(1.234567e2));
+  test(STR("1#234567E+03"), loc, STR("{:.6LE}"), F(1.234567e3));
+  test(STR("1#234567E+20"), loc, STR("{:.6LE}"), F(1.234567e20));
+  test(STR("-1#234567E-03"), loc, STR("{:.6LE}"), F(-1.234567e-3));
+  test(STR("-1#234567E-02"), loc, STR("{:.6LE}"), F(-1.234567e-2));
+  test(STR("-1#234567E-01"), loc, STR("{:.6LE}"), F(-1.234567e-1));
+  test(STR("-1#234567E+00"), loc, STR("{:.6LE}"), F(-1.234567e0));
+  test(STR("-1#234567E+01"), loc, STR("{:.6LE}"), F(-1.234567e1));
+  test(STR("-1#234567E+02"), loc, STR("{:.6LE}"), F(-1.234567e2));
+  test(STR("-1#234567E+03"), loc, STR("{:.6LE}"), F(-1.234567e3));
+  test(STR("-1#234567E+20"), loc, STR("{:.6LE}"), F(-1.234567e20));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1.234567E+03$$$"), STR("{:$<15.6LE}"), F(1.234567e3));
+  test(STR("$$$1.234567E+03"), STR("{:$>15.6LE}"), F(1.234567e3));
+  test(STR("$1.234567E+03$$"), STR("{:$^15.6LE}"), F(1.234567e3));
+  test(STR("0001.234567E+03"), STR("{:015.6LE}"), F(1.234567e3));
+  test(STR("-1.234567E+03$$$"), STR("{:$<16.6LE}"), F(-1.234567e3));
+  test(STR("$$$-1.234567E+03"), STR("{:$>16.6LE}"), F(-1.234567e3));
+  test(STR("$-1.234567E+03$$"), STR("{:$^16.6LE}"), F(-1.234567e3));
+  test(STR("-0001.234567E+03"), STR("{:016.6LE}"), F(-1.234567e3));
+
+  std::locale::global(loc);
+  test(STR("1#234567E+03$$$"), STR("{:$<15.6LE}"), F(1.234567e3));
+  test(STR("$$$1#234567E+03"), STR("{:$>15.6LE}"), F(1.234567e3));
+  test(STR("$1#234567E+03$$"), STR("{:$^15.6LE}"), F(1.234567e3));
+  test(STR("0001#234567E+03"), STR("{:015.6LE}"), F(1.234567e3));
+  test(STR("-1#234567E+03$$$"), STR("{:$<16.6LE}"), F(-1.234567e3));
+  test(STR("$$$-1#234567E+03"), STR("{:$>16.6LE}"), F(-1.234567e3));
+  test(STR("$-1#234567E+03$$"), STR("{:$^16.6LE}"), F(-1.234567e3));
+  test(STR("-0001#234567E+03"), STR("{:016.6LE}"), F(-1.234567e3));
+
+  test(STR("1.234567E+03$$$"), en_US, STR("{:$<15.6LE}"), F(1.234567e3));
+  test(STR("$$$1.234567E+03"), en_US, STR("{:$>15.6LE}"), F(1.234567e3));
+  test(STR("$1.234567E+03$$"), en_US, STR("{:$^15.6LE}"), F(1.234567e3));
+  test(STR("0001.234567E+03"), en_US, STR("{:015.6LE}"), F(1.234567e3));
+  test(STR("-1.234567E+03$$$"), en_US, STR("{:$<16.6LE}"), F(-1.234567e3));
+  test(STR("$$$-1.234567E+03"), en_US, STR("{:$>16.6LE}"), F(-1.234567e3));
+  test(STR("$-1.234567E+03$$"), en_US, STR("{:$^16.6LE}"), F(-1.234567e3));
+  test(STR("-0001.234567E+03"), en_US, STR("{:016.6LE}"), F(-1.234567e3));
+
+  std::locale::global(en_US);
+  test(STR("1#234567E+03$$$"), loc, STR("{:$<15.6LE}"), F(1.234567e3));
+  test(STR("$$$1#234567E+03"), loc, STR("{:$>15.6LE}"), F(1.234567e3));
+  test(STR("$1#234567E+03$$"), loc, STR("{:$^15.6LE}"), F(1.234567e3));
+  test(STR("0001#234567E+03"), loc, STR("{:015.6LE}"), F(1.234567e3));
+  test(STR("-1#234567E+03$$$"), loc, STR("{:$<16.6LE}"), F(-1.234567e3));
+  test(STR("$$$-1#234567E+03"), loc, STR("{:$>16.6LE}"), F(-1.234567e3));
+  test(STR("$-1#234567E+03$$"), loc, STR("{:$^16.6LE}"), F(-1.234567e3));
+  test(STR("-0001#234567E+03"), loc, STR("{:016.6LE}"), F(-1.234567e3));
+}
+
+template <class F, class CharT>
+void test_floating_point_fixed_lower_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct<CharT>());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("0.000001"), STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0.000012"), STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0.000123"), STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0.001235"), STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0.012346"), STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0.123457"), STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1.234567"), STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("12.345670"), STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("123.456700"), STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1,234.567000"), STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12,345.670000"), STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123,456.700000"), STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1,234,567.000000"), STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12,345,670.000000"), STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("123,456,700,000,000,000,000.000000"), STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0.000001"), STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0.000012"), STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0.000123"), STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0.001235"), STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0.012346"), STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0.123457"), STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1.234567"), STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-12.345670"), STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-123.456700"), STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1,234.567000"), STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12,345.670000"), STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123,456.700000"), STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1,234,567.000000"), STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12,345,670.000000"), STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-123,456,700,000,000,000,000.000000"), STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  std::locale::global(loc);
+  test(STR("0#000001"), STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0#000012"), STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0#000123"), STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0#001235"), STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0#012346"), STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0#123457"), STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1#234567"), STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("1_2#345670"), STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("12_3#456700"), STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1_23_4#567000"), STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12_34_5#670000"), STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123_45_6#700000"), STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1_234_56_7#000000"), STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12_345_67_0#000000"), STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0#000001"), STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0#000012"), STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0#000123"), STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0#001235"), STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0#012346"), STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0#123457"), STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1#234567"), STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-1_2#345670"), STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-12_3#456700"), STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1_23_4#567000"), STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12_34_5#670000"), STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123_45_6#700000"), STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1_234_56_7#000000"), STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12_345_67_0#000000"), STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  test(STR("0.000001"), en_US, STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0.000012"), en_US, STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0.000123"), en_US, STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0.001235"), en_US, STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0.012346"), en_US, STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0.123457"), en_US, STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1.234567"), en_US, STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("12.345670"), en_US, STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("123.456700"), en_US, STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1,234.567000"), en_US, STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12,345.670000"), en_US, STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123,456.700000"), en_US, STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1,234,567.000000"), en_US, STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12,345,670.000000"), en_US, STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("123,456,700,000,000,000,000.000000"), en_US, STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0.000001"), en_US, STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0.000012"), en_US, STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0.000123"), en_US, STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0.001235"), en_US, STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0.012346"), en_US, STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0.123457"), en_US, STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1.234567"), en_US, STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-12.345670"), en_US, STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-123.456700"), en_US, STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1,234.567000"), en_US, STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12,345.670000"), en_US, STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123,456.700000"), en_US, STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1,234,567.000000"), en_US, STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12,345,670.000000"), en_US, STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-123,456,700,000,000,000,000.000000"), en_US, STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  std::locale::global(en_US);
+  test(STR("0#000001"), loc, STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0#000012"), loc, STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0#000123"), loc, STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0#001235"), loc, STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0#012346"), loc, STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0#123457"), loc, STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1#234567"), loc, STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("1_2#345670"), loc, STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("12_3#456700"), loc, STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1_23_4#567000"), loc, STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12_34_5#670000"), loc, STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123_45_6#700000"), loc, STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1_234_56_7#000000"), loc, STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12_345_67_0#000000"), loc, STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), loc, STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0#000001"), loc, STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0#000012"), loc, STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0#000123"), loc, STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0#001235"), loc, STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0#012346"), loc, STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0#123457"), loc, STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1#234567"), loc, STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-1_2#345670"), loc, STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-12_3#456700"), loc, STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1_23_4#567000"), loc, STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12_34_5#670000"), loc, STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123_45_6#700000"), loc, STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1_234_56_7#000000"), loc, STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12_345_67_0#000000"), loc, STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), loc, STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  // *** Fill, align, zero padding ***
+  if constexpr (sizeof(F) > sizeof(float)) {
+    std::locale::global(en_US);
+    test(STR("1,234.567000$$$"), STR("{:$<15.6Lf}"), F(1.234567e3));
+    test(STR("$$$1,234.567000"), STR("{:$>15.6Lf}"), F(1.234567e3));
+    test(STR("$1,234.567000$$"), STR("{:$^15.6Lf}"), F(1.234567e3));
+    test(STR("0001,234.567000"), STR("{:015.6Lf}"), F(1.234567e3));
+    test(STR("-1,234.567000$$$"), STR("{:$<16.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1,234.567000"), STR("{:$>16.6Lf}"), F(-1.234567e3));
+    test(STR("$-1,234.567000$$"), STR("{:$^16.6Lf}"), F(-1.234567e3));
+    test(STR("-0001,234.567000"), STR("{:016.6Lf}"), F(-1.234567e3));
+
+    std::locale::global(loc);
+    test(STR("1_23_4#567000$$$"), STR("{:$<16.6Lf}"), F(1.234567e3));
+    test(STR("$$$1_23_4#567000"), STR("{:$>16.6Lf}"), F(1.234567e3));
+    test(STR("$1_23_4#567000$$"), STR("{:$^16.6Lf}"), F(1.234567e3));
+    test(STR("0001_23_4#567000"), STR("{:016.6Lf}"), F(1.234567e3));
+    test(STR("-1_23_4#567000$$$"), STR("{:$<17.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1_23_4#567000"), STR("{:$>17.6Lf}"), F(-1.234567e3));
+    test(STR("$-1_23_4#567000$$"), STR("{:$^17.6Lf}"), F(-1.234567e3));
+    test(STR("-0001_23_4#567000"), STR("{:017.6Lf}"), F(-1.234567e3));
+
+    test(STR("1,234.567000$$$"), en_US, STR("{:$<15.6Lf}"), F(1.234567e3));
+    test(STR("$$$1,234.567000"), en_US, STR("{:$>15.6Lf}"), F(1.234567e3));
+    test(STR("$1,234.567000$$"), en_US, STR("{:$^15.6Lf}"), F(1.234567e3));
+    test(STR("0001,234.567000"), en_US, STR("{:015.6Lf}"), F(1.234567e3));
+    test(STR("-1,234.567000$$$"), en_US, STR("{:$<16.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1,234.567000"), en_US, STR("{:$>16.6Lf}"), F(-1.234567e3));
+    test(STR("$-1,234.567000$$"), en_US, STR("{:$^16.6Lf}"), F(-1.234567e3));
+    test(STR("-0001,234.567000"), en_US, STR("{:016.6Lf}"), F(-1.234567e3));
+
+    std::locale::global(en_US);
+    test(STR("1_23_4#567000$$$"), loc, STR("{:$<16.6Lf}"), F(1.234567e3));
+    test(STR("$$$1_23_4#567000"), loc, STR("{:$>16.6Lf}"), F(1.234567e3));
+    test(STR("$1_23_4#567000$$"), loc, STR("{:$^16.6Lf}"), F(1.234567e3));
+    test(STR("0001_23_4#567000"), loc, STR("{:016.6Lf}"), F(1.234567e3));
+    test(STR("-1_23_4#567000$$$"), loc, STR("{:$<17.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1_23_4#567000"), loc, STR("{:$>17.6Lf}"), F(-1.234567e3));
+    test(STR("$-1_23_4#567000$$"), loc, STR("{:$^17.6Lf}"), F(-1.234567e3));
+    test(STR("-0001_23_4#567000"), loc, STR("{:017.6Lf}"), F(-1.234567e3));
+  }
+}
+
+template <class F, class CharT>
+void test_floating_point_fixed_upper_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct<CharT>());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("0.000001"), STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0.000012"), STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0.000123"), STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0.001235"), STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0.012346"), STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0.123457"), STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1.234567"), STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("12.345670"), STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("123.456700"), STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1,234.567000"), STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12,345.670000"), STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123,456.700000"), STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1,234,567.000000"), STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12,345,670.000000"), STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("123,456,700,000,000,000,000.000000"), STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0.000001"), STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0.000012"), STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0.000123"), STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0.001235"), STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0.012346"), STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0.123457"), STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1.234567"), STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-12.345670"), STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-123.456700"), STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1,234.567000"), STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12,345.670000"), STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123,456.700000"), STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1,234,567.000000"), STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12,345,670.000000"), STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-123,456,700,000,000,000,000.000000"), STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  std::locale::global(loc);
+  test(STR("0#000001"), STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0#000012"), STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0#000123"), STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0#001235"), STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0#012346"), STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0#123457"), STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1#234567"), STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("1_2#345670"), STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("12_3#456700"), STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1_23_4#567000"), STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12_34_5#670000"), STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123_45_6#700000"), STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1_234_56_7#000000"), STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12_345_67_0#000000"), STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0#000001"), STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0#000012"), STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0#000123"), STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0#001235"), STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0#012346"), STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0#123457"), STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1#234567"), STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-1_2#345670"), STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-12_3#456700"), STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1_23_4#567000"), STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12_34_5#670000"), STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123_45_6#700000"), STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1_234_56_7#000000"), STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12_345_67_0#000000"), STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  test(STR("0.000001"), en_US, STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0.000012"), en_US, STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0.000123"), en_US, STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0.001235"), en_US, STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0.012346"), en_US, STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0.123457"), en_US, STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1.234567"), en_US, STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("12.345670"), en_US, STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("123.456700"), en_US, STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1,234.567000"), en_US, STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12,345.670000"), en_US, STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123,456.700000"), en_US, STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1,234,567.000000"), en_US, STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12,345,670.000000"), en_US, STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("123,456,700,000,000,000,000.000000"), en_US, STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0.000001"), en_US, STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0.000012"), en_US, STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0.000123"), en_US, STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0.001235"), en_US, STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0.012346"), en_US, STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0.123457"), en_US, STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1.234567"), en_US, STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-12.345670"), en_US, STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-123.456700"), en_US, STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1,234.567000"), en_US, STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12,345.670000"), en_US, STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123,456.700000"), en_US, STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1,234,567.000000"), en_US, STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12,345,670.000000"), en_US, STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-123,456,700,000,000,000,000.000000"), en_US, STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  std::locale::global(en_US);
+  test(STR("0#000001"), loc, STR("{:.6Lf}"), F(1.234567e-6));
+  test(STR("0#000012"), loc, STR("{:.6Lf}"), F(1.234567e-5));
+  test(STR("0#000123"), loc, STR("{:.6Lf}"), F(1.234567e-4));
+  test(STR("0#001235"), loc, STR("{:.6Lf}"), F(1.234567e-3));
+  test(STR("0#012346"), loc, STR("{:.6Lf}"), F(1.234567e-2));
+  test(STR("0#123457"), loc, STR("{:.6Lf}"), F(1.234567e-1));
+  test(STR("1#234567"), loc, STR("{:.6Lf}"), F(1.234567e0));
+  test(STR("1_2#345670"), loc, STR("{:.6Lf}"), F(1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("12_3#456700"), loc, STR("{:.6Lf}"), F(1.234567e2));
+    test(STR("1_23_4#567000"), loc, STR("{:.6Lf}"), F(1.234567e3));
+    test(STR("12_34_5#670000"), loc, STR("{:.6Lf}"), F(1.234567e4));
+    test(STR("123_45_6#700000"), loc, STR("{:.6Lf}"), F(1.234567e5));
+    test(STR("1_234_56_7#000000"), loc, STR("{:.6Lf}"), F(1.234567e6));
+    test(STR("12_345_67_0#000000"), loc, STR("{:.6Lf}"), F(1.234567e7));
+    test(STR("1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), loc, STR("{:.6Lf}"), F(1.234567e20));
+  }
+  test(STR("-0#000001"), loc, STR("{:.6Lf}"), F(-1.234567e-6));
+  test(STR("-0#000012"), loc, STR("{:.6Lf}"), F(-1.234567e-5));
+  test(STR("-0#000123"), loc, STR("{:.6Lf}"), F(-1.234567e-4));
+  test(STR("-0#001235"), loc, STR("{:.6Lf}"), F(-1.234567e-3));
+  test(STR("-0#012346"), loc, STR("{:.6Lf}"), F(-1.234567e-2));
+  test(STR("-0#123457"), loc, STR("{:.6Lf}"), F(-1.234567e-1));
+  test(STR("-1#234567"), loc, STR("{:.6Lf}"), F(-1.234567e0));
+  test(STR("-1_2#345670"), loc, STR("{:.6Lf}"), F(-1.234567e1));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-12_3#456700"), loc, STR("{:.6Lf}"), F(-1.234567e2));
+    test(STR("-1_23_4#567000"), loc, STR("{:.6Lf}"), F(-1.234567e3));
+    test(STR("-12_34_5#670000"), loc, STR("{:.6Lf}"), F(-1.234567e4));
+    test(STR("-123_45_6#700000"), loc, STR("{:.6Lf}"), F(-1.234567e5));
+    test(STR("-1_234_56_7#000000"), loc, STR("{:.6Lf}"), F(-1.234567e6));
+    test(STR("-12_345_67_0#000000"), loc, STR("{:.6Lf}"), F(-1.234567e7));
+    test(STR("-1_2_3_4_5_6_7_0_0_0_0_0_0_00_000_00_0#000000"), loc, STR("{:.6Lf}"), F(-1.234567e20));
+  }
+
+  // *** Fill, align, zero padding ***
+  if constexpr (sizeof(F) > sizeof(float)) {
+    std::locale::global(en_US);
+    test(STR("1,234.567000$$$"), STR("{:$<15.6Lf}"), F(1.234567e3));
+    test(STR("$$$1,234.567000"), STR("{:$>15.6Lf}"), F(1.234567e3));
+    test(STR("$1,234.567000$$"), STR("{:$^15.6Lf}"), F(1.234567e3));
+    test(STR("0001,234.567000"), STR("{:015.6Lf}"), F(1.234567e3));
+    test(STR("-1,234.567000$$$"), STR("{:$<16.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1,234.567000"), STR("{:$>16.6Lf}"), F(-1.234567e3));
+    test(STR("$-1,234.567000$$"), STR("{:$^16.6Lf}"), F(-1.234567e3));
+    test(STR("-0001,234.567000"), STR("{:016.6Lf}"), F(-1.234567e3));
+
+    std::locale::global(loc);
+    test(STR("1_23_4#567000$$$"), STR("{:$<16.6Lf}"), F(1.234567e3));
+    test(STR("$$$1_23_4#567000"), STR("{:$>16.6Lf}"), F(1.234567e3));
+    test(STR("$1_23_4#567000$$"), STR("{:$^16.6Lf}"), F(1.234567e3));
+    test(STR("0001_23_4#567000"), STR("{:016.6Lf}"), F(1.234567e3));
+    test(STR("-1_23_4#567000$$$"), STR("{:$<17.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1_23_4#567000"), STR("{:$>17.6Lf}"), F(-1.234567e3));
+    test(STR("$-1_23_4#567000$$"), STR("{:$^17.6Lf}"), F(-1.234567e3));
+    test(STR("-0001_23_4#567000"), STR("{:017.6Lf}"), F(-1.234567e3));
+
+    test(STR("1,234.567000$$$"), en_US, STR("{:$<15.6Lf}"), F(1.234567e3));
+    test(STR("$$$1,234.567000"), en_US, STR("{:$>15.6Lf}"), F(1.234567e3));
+    test(STR("$1,234.567000$$"), en_US, STR("{:$^15.6Lf}"), F(1.234567e3));
+    test(STR("0001,234.567000"), en_US, STR("{:015.6Lf}"), F(1.234567e3));
+    test(STR("-1,234.567000$$$"), en_US, STR("{:$<16.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1,234.567000"), en_US, STR("{:$>16.6Lf}"), F(-1.234567e3));
+    test(STR("$-1,234.567000$$"), en_US, STR("{:$^16.6Lf}"), F(-1.234567e3));
+    test(STR("-0001,234.567000"), en_US, STR("{:016.6Lf}"), F(-1.234567e3));
+
+    std::locale::global(en_US);
+    test(STR("1_23_4#567000$$$"), loc, STR("{:$<16.6Lf}"), F(1.234567e3));
+    test(STR("$$$1_23_4#567000"), loc, STR("{:$>16.6Lf}"), F(1.234567e3));
+    test(STR("$1_23_4#567000$$"), loc, STR("{:$^16.6Lf}"), F(1.234567e3));
+    test(STR("0001_23_4#567000"), loc, STR("{:016.6Lf}"), F(1.234567e3));
+    test(STR("-1_23_4#567000$$$"), loc, STR("{:$<17.6Lf}"), F(-1.234567e3));
+    test(STR("$$$-1_23_4#567000"), loc, STR("{:$>17.6Lf}"), F(-1.234567e3));
+    test(STR("$-1_23_4#567000$$"), loc, STR("{:$^17.6Lf}"), F(-1.234567e3));
+    test(STR("-0001_23_4#567000"), loc, STR("{:017.6Lf}"), F(-1.234567e3));
+  }
+}
+
+template <class F, class CharT>
+void test_floating_point_general_lower_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct<CharT>());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.23457e-06"), STR("{:.6Lg}"), F(1.234567e-6));
+  test(STR("1.23457e-05"), STR("{:.6Lg}"), F(1.234567e-5));
+  test(STR("0.000123457"), STR("{:.6Lg}"), F(1.234567e-4));
+  test(STR("0.00123457"), STR("{:.6Lg}"), F(1.234567e-3));
+  test(STR("0.0123457"), STR("{:.6Lg}"), F(1.234567e-2));
+  test(STR("0.123457"), STR("{:.6Lg}"), F(1.234567e-1));
+  test(STR("1.23457"), STR("{:.6Lg}"), F(1.234567e0));
+  test(STR("12.3457"), STR("{:.6Lg}"), F(1.234567e1));
+  test(STR("123.457"), STR("{:.6Lg}"), F(1.234567e2));
+  test(STR("1,234.57"), STR("{:.6Lg}"), F(1.234567e3));
+  test(STR("12,345.7"), STR("{:.6Lg}"), F(1.234567e4));
+  test(STR("123,457"), STR("{:.6Lg}"), F(1.234567e5));
+  test(STR("1.23457e+06"), STR("{:.6Lg}"), F(1.234567e6));
+  test(STR("1.23457e+07"), STR("{:.6Lg}"), F(1.234567e7));
+  test(STR("-1.23457e-06"), STR("{:.6Lg}"), F(-1.234567e-6));
+  test(STR("-1.23457e-05"), STR("{:.6Lg}"), F(-1.234567e-5));
+  test(STR("-0.000123457"), STR("{:.6Lg}"), F(-1.234567e-4));
+  test(STR("-0.00123457"), STR("{:.6Lg}"), F(-1.234567e-3));
+  test(STR("-0.0123457"), STR("{:.6Lg}"), F(-1.234567e-2));
+  test(STR("-0.123457"), STR("{:.6Lg}"), F(-1.234567e-1));
+  test(STR("-1.23457"), STR("{:.6Lg}"), F(-1.234567e0));
+  test(STR("-12.3457"), STR("{:.6Lg}"), F(-1.234567e1));
+  test(STR("-123.457"), STR("{:.6Lg}"), F(-1.234567e2));
+  test(STR("-1,234.57"), STR("{:.6Lg}"), F(-1.234567e3));
+  test(STR("-12,345.7"), STR("{:.6Lg}"), F(-1.234567e4));
+  test(STR("-123,457"), STR("{:.6Lg}"), F(-1.234567e5));
+  test(STR("-1.23457e+06"), STR("{:.6Lg}"), F(-1.234567e6));
+  test(STR("-1.23457e+07"), STR("{:.6Lg}"), F(-1.234567e7));
+
+  std::locale::global(loc);
+  test(STR("1#23457e-06"), STR("{:.6Lg}"), F(1.234567e-6));
+  test(STR("1#23457e-05"), STR("{:.6Lg}"), F(1.234567e-5));
+  test(STR("0#000123457"), STR("{:.6Lg}"), F(1.234567e-4));
+  test(STR("0#00123457"), STR("{:.6Lg}"), F(1.234567e-3));
+  test(STR("0#0123457"), STR("{:.6Lg}"), F(1.234567e-2));
+  test(STR("0#123457"), STR("{:.6Lg}"), F(1.234567e-1));
+  test(STR("1#23457"), STR("{:.6Lg}"), F(1.234567e0));
+  test(STR("1_2#3457"), STR("{:.6Lg}"), F(1.234567e1));
+  test(STR("12_3#457"), STR("{:.6Lg}"), F(1.234567e2));
+  test(STR("1_23_4#57"), STR("{:.6Lg}"), F(1.234567e3));
+  test(STR("12_34_5#7"), STR("{:.6Lg}"), F(1.234567e4));
+  test(STR("123_45_7"), STR("{:.6Lg}"), F(1.234567e5));
+  test(STR("1#23457e+06"), STR("{:.6Lg}"), F(1.234567e6));
+  test(STR("1#23457e+07"), STR("{:.6Lg}"), F(1.234567e7));
+  test(STR("-1#23457e-06"), STR("{:.6Lg}"), F(-1.234567e-6));
+  test(STR("-1#23457e-05"), STR("{:.6Lg}"), F(-1.234567e-5));
+  test(STR("-0#000123457"), STR("{:.6Lg}"), F(-1.234567e-4));
+  test(STR("-0#00123457"), STR("{:.6Lg}"), F(-1.234567e-3));
+  test(STR("-0#0123457"), STR("{:.6Lg}"), F(-1.234567e-2));
+  test(STR("-0#123457"), STR("{:.6Lg}"), F(-1.234567e-1));
+  test(STR("-1#23457"), STR("{:.6Lg}"), F(-1.234567e0));
+  test(STR("-1_2#3457"), STR("{:.6Lg}"), F(-1.234567e1));
+  test(STR("-12_3#457"), STR("{:.6Lg}"), F(-1.234567e2));
+  test(STR("-1_23_4#57"), STR("{:.6Lg}"), F(-1.234567e3));
+  test(STR("-12_34_5#7"), STR("{:.6Lg}"), F(-1.234567e4));
+  test(STR("-123_45_7"), STR("{:.6Lg}"), F(-1.234567e5));
+  test(STR("-1#23457e+06"), STR("{:.6Lg}"), F(-1.234567e6));
+  test(STR("-1#23457e+07"), STR("{:.6Lg}"), F(-1.234567e7));
+
+  test(STR("1.23457e-06"), en_US, STR("{:.6Lg}"), F(1.234567e-6));
+  test(STR("1.23457e-05"), en_US, STR("{:.6Lg}"), F(1.234567e-5));
+  test(STR("0.000123457"), en_US, STR("{:.6Lg}"), F(1.234567e-4));
+  test(STR("0.00123457"), en_US, STR("{:.6Lg}"), F(1.234567e-3));
+  test(STR("0.0123457"), en_US, STR("{:.6Lg}"), F(1.234567e-2));
+  test(STR("0.123457"), en_US, STR("{:.6Lg}"), F(1.234567e-1));
+  test(STR("1.23457"), en_US, STR("{:.6Lg}"), F(1.234567e0));
+  test(STR("12.3457"), en_US, STR("{:.6Lg}"), F(1.234567e1));
+  test(STR("123.457"), en_US, STR("{:.6Lg}"), F(1.234567e2));
+  test(STR("1,234.57"), en_US, STR("{:.6Lg}"), F(1.234567e3));
+  test(STR("12,345.7"), en_US, STR("{:.6Lg}"), F(1.234567e4));
+  test(STR("123,457"), en_US, STR("{:.6Lg}"), F(1.234567e5));
+  test(STR("1.23457e+06"), en_US, STR("{:.6Lg}"), F(1.234567e6));
+  test(STR("1.23457e+07"), en_US, STR("{:.6Lg}"), F(1.234567e7));
+  test(STR("-1.23457e-06"), en_US, STR("{:.6Lg}"), F(-1.234567e-6));
+  test(STR("-1.23457e-05"), en_US, STR("{:.6Lg}"), F(-1.234567e-5));
+  test(STR("-0.000123457"), en_US, STR("{:.6Lg}"), F(-1.234567e-4));
+  test(STR("-0.00123457"), en_US, STR("{:.6Lg}"), F(-1.234567e-3));
+  test(STR("-0.0123457"), en_US, STR("{:.6Lg}"), F(-1.234567e-2));
+  test(STR("-0.123457"), en_US, STR("{:.6Lg}"), F(-1.234567e-1));
+  test(STR("-1.23457"), en_US, STR("{:.6Lg}"), F(-1.234567e0));
+  test(STR("-12.3457"), en_US, STR("{:.6Lg}"), F(-1.234567e1));
+  test(STR("-123.457"), en_US, STR("{:.6Lg}"), F(-1.234567e2));
+  test(STR("-1,234.57"), en_US, STR("{:.6Lg}"), F(-1.234567e3));
+  test(STR("-12,345.7"), en_US, STR("{:.6Lg}"), F(-1.234567e4));
+  test(STR("-123,457"), en_US, STR("{:.6Lg}"), F(-1.234567e5));
+  test(STR("-1.23457e+06"), en_US, STR("{:.6Lg}"), F(-1.234567e6));
+  test(STR("-1.23457e+07"), en_US, STR("{:.6Lg}"), F(-1.234567e7));
+
+  std::locale::global(en_US);
+  test(STR("1#23457e-06"), loc, STR("{:.6Lg}"), F(1.234567e-6));
+  test(STR("1#23457e-05"), loc, STR("{:.6Lg}"), F(1.234567e-5));
+  test(STR("0#000123457"), loc, STR("{:.6Lg}"), F(1.234567e-4));
+  test(STR("0#00123457"), loc, STR("{:.6Lg}"), F(1.234567e-3));
+  test(STR("0#0123457"), loc, STR("{:.6Lg}"), F(1.234567e-2));
+  test(STR("0#123457"), loc, STR("{:.6Lg}"), F(1.234567e-1));
+  test(STR("1#23457"), loc, STR("{:.6Lg}"), F(1.234567e0));
+  test(STR("1_2#3457"), loc, STR("{:.6Lg}"), F(1.234567e1));
+  test(STR("12_3#457"), loc, STR("{:.6Lg}"), F(1.234567e2));
+  test(STR("1_23_4#57"), loc, STR("{:.6Lg}"), F(1.234567e3));
+  test(STR("12_34_5#7"), loc, STR("{:.6Lg}"), F(1.234567e4));
+  test(STR("123_45_7"), loc, STR("{:.6Lg}"), F(1.234567e5));
+  test(STR("1#23457e+06"), loc, STR("{:.6Lg}"), F(1.234567e6));
+  test(STR("1#23457e+07"), loc, STR("{:.6Lg}"), F(1.234567e7));
+  test(STR("-1#23457e-06"), loc, STR("{:.6Lg}"), F(-1.234567e-6));
+  test(STR("-1#23457e-05"), loc, STR("{:.6Lg}"), F(-1.234567e-5));
+  test(STR("-0#000123457"), loc, STR("{:.6Lg}"), F(-1.234567e-4));
+  test(STR("-0#00123457"), loc, STR("{:.6Lg}"), F(-1.234567e-3));
+  test(STR("-0#0123457"), loc, STR("{:.6Lg}"), F(-1.234567e-2));
+  test(STR("-0#123457"), loc, STR("{:.6Lg}"), F(-1.234567e-1));
+  test(STR("-1#23457"), loc, STR("{:.6Lg}"), F(-1.234567e0));
+  test(STR("-1_2#3457"), loc, STR("{:.6Lg}"), F(-1.234567e1));
+  test(STR("-12_3#457"), loc, STR("{:.6Lg}"), F(-1.234567e2));
+  test(STR("-1_23_4#57"), loc, STR("{:.6Lg}"), F(-1.234567e3));
+  test(STR("-12_34_5#7"), loc, STR("{:.6Lg}"), F(-1.234567e4));
+  test(STR("-123_45_7"), loc, STR("{:.6Lg}"), F(-1.234567e5));
+  test(STR("-1#23457e+06"), loc, STR("{:.6Lg}"), F(-1.234567e6));
+  test(STR("-1#23457e+07"), loc, STR("{:.6Lg}"), F(-1.234567e7));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1,234.57$$$"), STR("{:$<11.6Lg}"), F(1.234567e3));
+  test(STR("$$$1,234.57"), STR("{:$>11.6Lg}"), F(1.234567e3));
+  test(STR("$1,234.57$$"), STR("{:$^11.6Lg}"), F(1.234567e3));
+  test(STR("0001,234.57"), STR("{:011.6Lg}"), F(1.234567e3));
+  test(STR("-1,234.57$$$"), STR("{:$<12.6Lg}"), F(-1.234567e3));
+  test(STR("$$$-1,234.57"), STR("{:$>12.6Lg}"), F(-1.234567e3));
+  test(STR("$-1,234.57$$"), STR("{:$^12.6Lg}"), F(-1.234567e3));
+  test(STR("-0001,234.57"), STR("{:012.6Lg}"), F(-1.234567e3));
+
+  std::locale::global(loc);
+  test(STR("1_23_4#57$$$"), STR("{:$<12.6Lg}"), F(1.234567e3));
+  test(STR("$$$1_23_4#57"), STR("{:$>12.6Lg}"), F(1.234567e3));
+  test(STR("$1_23_4#57$$"), STR("{:$^12.6Lg}"), F(1.234567e3));
+  test(STR("0001_23_4#57"), STR("{:012.6Lg}"), F(1.234567e3));
+  test(STR("-1_23_4#57$$$"), STR("{:$<13.6Lg}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#57"), STR("{:$>13.6Lg}"), F(-1.234567e3));
+  test(STR("$-1_23_4#57$$"), STR("{:$^13.6Lg}"), F(-1.234567e3));
+  test(STR("-0001_23_4#57"), STR("{:013.6Lg}"), F(-1.234567e3));
+
+  test(STR("1,234.57$$$"), en_US, STR("{:$<11.6Lg}"), F(1.234567e3));
+  test(STR("$$$1,234.57"), en_US, STR("{:$>11.6Lg}"), F(1.234567e3));
+  test(STR("$1,234.57$$"), en_US, STR("{:$^11.6Lg}"), F(1.234567e3));
+  test(STR("0001,234.57"), en_US, STR("{:011.6Lg}"), F(1.234567e3));
+  test(STR("-1,234.57$$$"), en_US, STR("{:$<12.6Lg}"), F(-1.234567e3));
+  test(STR("$$$-1,234.57"), en_US, STR("{:$>12.6Lg}"), F(-1.234567e3));
+  test(STR("$-1,234.57$$"), en_US, STR("{:$^12.6Lg}"), F(-1.234567e3));
+  test(STR("-0001,234.57"), en_US, STR("{:012.6Lg}"), F(-1.234567e3));
+
+  std::locale::global(en_US);
+  test(STR("1_23_4#57$$$"), loc, STR("{:$<12.6Lg}"), F(1.234567e3));
+  test(STR("$$$1_23_4#57"), loc, STR("{:$>12.6Lg}"), F(1.234567e3));
+  test(STR("$1_23_4#57$$"), loc, STR("{:$^12.6Lg}"), F(1.234567e3));
+  test(STR("0001_23_4#57"), loc, STR("{:012.6Lg}"), F(1.234567e3));
+  test(STR("-1_23_4#57$$$"), loc, STR("{:$<13.6Lg}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#57"), loc, STR("{:$>13.6Lg}"), F(-1.234567e3));
+  test(STR("$-1_23_4#57$$"), loc, STR("{:$^13.6Lg}"), F(-1.234567e3));
+  test(STR("-0001_23_4#57"), loc, STR("{:013.6Lg}"), F(-1.234567e3));
+}
+
+template <class F, class CharT>
+void test_floating_point_general_upper_case() {
+  std::locale loc = std::locale(std::locale(), new numpunct<CharT>());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.23457E-06"), STR("{:.6LG}"), F(1.234567e-6));
+  test(STR("1.23457E-05"), STR("{:.6LG}"), F(1.234567e-5));
+  test(STR("0.000123457"), STR("{:.6LG}"), F(1.234567e-4));
+  test(STR("0.00123457"), STR("{:.6LG}"), F(1.234567e-3));
+  test(STR("0.0123457"), STR("{:.6LG}"), F(1.234567e-2));
+  test(STR("0.123457"), STR("{:.6LG}"), F(1.234567e-1));
+  test(STR("1.23457"), STR("{:.6LG}"), F(1.234567e0));
+  test(STR("12.3457"), STR("{:.6LG}"), F(1.234567e1));
+  test(STR("123.457"), STR("{:.6LG}"), F(1.234567e2));
+  test(STR("1,234.57"), STR("{:.6LG}"), F(1.234567e3));
+  test(STR("12,345.7"), STR("{:.6LG}"), F(1.234567e4));
+  test(STR("123,457"), STR("{:.6LG}"), F(1.234567e5));
+  test(STR("1.23457E+06"), STR("{:.6LG}"), F(1.234567e6));
+  test(STR("1.23457E+07"), STR("{:.6LG}"), F(1.234567e7));
+  test(STR("-1.23457E-06"), STR("{:.6LG}"), F(-1.234567e-6));
+  test(STR("-1.23457E-05"), STR("{:.6LG}"), F(-1.234567e-5));
+  test(STR("-0.000123457"), STR("{:.6LG}"), F(-1.234567e-4));
+  test(STR("-0.00123457"), STR("{:.6LG}"), F(-1.234567e-3));
+  test(STR("-0.0123457"), STR("{:.6LG}"), F(-1.234567e-2));
+  test(STR("-0.123457"), STR("{:.6LG}"), F(-1.234567e-1));
+  test(STR("-1.23457"), STR("{:.6LG}"), F(-1.234567e0));
+  test(STR("-12.3457"), STR("{:.6LG}"), F(-1.234567e1));
+  test(STR("-123.457"), STR("{:.6LG}"), F(-1.234567e2));
+  test(STR("-1,234.57"), STR("{:.6LG}"), F(-1.234567e3));
+  test(STR("-12,345.7"), STR("{:.6LG}"), F(-1.234567e4));
+  test(STR("-123,457"), STR("{:.6LG}"), F(-1.234567e5));
+  test(STR("-1.23457E+06"), STR("{:.6LG}"), F(-1.234567e6));
+  test(STR("-1.23457E+07"), STR("{:.6LG}"), F(-1.234567e7));
+
+  std::locale::global(loc);
+  test(STR("1#23457E-06"), STR("{:.6LG}"), F(1.234567e-6));
+  test(STR("1#23457E-05"), STR("{:.6LG}"), F(1.234567e-5));
+  test(STR("0#000123457"), STR("{:.6LG}"), F(1.234567e-4));
+  test(STR("0#00123457"), STR("{:.6LG}"), F(1.234567e-3));
+  test(STR("0#0123457"), STR("{:.6LG}"), F(1.234567e-2));
+  test(STR("0#123457"), STR("{:.6LG}"), F(1.234567e-1));
+  test(STR("1#23457"), STR("{:.6LG}"), F(1.234567e0));
+  test(STR("1_2#3457"), STR("{:.6LG}"), F(1.234567e1));
+  test(STR("12_3#457"), STR("{:.6LG}"), F(1.234567e2));
+  test(STR("1_23_4#57"), STR("{:.6LG}"), F(1.234567e3));
+  test(STR("12_34_5#7"), STR("{:.6LG}"), F(1.234567e4));
+  test(STR("123_45_7"), STR("{:.6LG}"), F(1.234567e5));
+  test(STR("1#23457E+06"), STR("{:.6LG}"), F(1.234567e6));
+  test(STR("1#23457E+07"), STR("{:.6LG}"), F(1.234567e7));
+  test(STR("-1#23457E-06"), STR("{:.6LG}"), F(-1.234567e-6));
+  test(STR("-1#23457E-05"), STR("{:.6LG}"), F(-1.234567e-5));
+  test(STR("-0#000123457"), STR("{:.6LG}"), F(-1.234567e-4));
+  test(STR("-0#00123457"), STR("{:.6LG}"), F(-1.234567e-3));
+  test(STR("-0#0123457"), STR("{:.6LG}"), F(-1.234567e-2));
+  test(STR("-0#123457"), STR("{:.6LG}"), F(-1.234567e-1));
+  test(STR("-1#23457"), STR("{:.6LG}"), F(-1.234567e0));
+  test(STR("-1_2#3457"), STR("{:.6LG}"), F(-1.234567e1));
+  test(STR("-12_3#457"), STR("{:.6LG}"), F(-1.234567e2));
+  test(STR("-1_23_4#57"), STR("{:.6LG}"), F(-1.234567e3));
+  test(STR("-12_34_5#7"), STR("{:.6LG}"), F(-1.234567e4));
+  test(STR("-123_45_7"), STR("{:.6LG}"), F(-1.234567e5));
+  test(STR("-1#23457E+06"), STR("{:.6LG}"), F(-1.234567e6));
+  test(STR("-1#23457E+07"), STR("{:.6LG}"), F(-1.234567e7));
+
+  test(STR("1.23457E-06"), en_US, STR("{:.6LG}"), F(1.234567e-6));
+  test(STR("1.23457E-05"), en_US, STR("{:.6LG}"), F(1.234567e-5));
+  test(STR("0.000123457"), en_US, STR("{:.6LG}"), F(1.234567e-4));
+  test(STR("0.00123457"), en_US, STR("{:.6LG}"), F(1.234567e-3));
+  test(STR("0.0123457"), en_US, STR("{:.6LG}"), F(1.234567e-2));
+  test(STR("0.123457"), en_US, STR("{:.6LG}"), F(1.234567e-1));
+  test(STR("1.23457"), en_US, STR("{:.6LG}"), F(1.234567e0));
+  test(STR("12.3457"), en_US, STR("{:.6LG}"), F(1.234567e1));
+  test(STR("123.457"), en_US, STR("{:.6LG}"), F(1.234567e2));
+  test(STR("1,234.57"), en_US, STR("{:.6LG}"), F(1.234567e3));
+  test(STR("12,345.7"), en_US, STR("{:.6LG}"), F(1.234567e4));
+  test(STR("123,457"), en_US, STR("{:.6LG}"), F(1.234567e5));
+  test(STR("1.23457E+06"), en_US, STR("{:.6LG}"), F(1.234567e6));
+  test(STR("1.23457E+07"), en_US, STR("{:.6LG}"), F(1.234567e7));
+  test(STR("-1.23457E-06"), en_US, STR("{:.6LG}"), F(-1.234567e-6));
+  test(STR("-1.23457E-05"), en_US, STR("{:.6LG}"), F(-1.234567e-5));
+  test(STR("-0.000123457"), en_US, STR("{:.6LG}"), F(-1.234567e-4));
+  test(STR("-0.00123457"), en_US, STR("{:.6LG}"), F(-1.234567e-3));
+  test(STR("-0.0123457"), en_US, STR("{:.6LG}"), F(-1.234567e-2));
+  test(STR("-0.123457"), en_US, STR("{:.6LG}"), F(-1.234567e-1));
+  test(STR("-1.23457"), en_US, STR("{:.6LG}"), F(-1.234567e0));
+  test(STR("-12.3457"), en_US, STR("{:.6LG}"), F(-1.234567e1));
+  test(STR("-123.457"), en_US, STR("{:.6LG}"), F(-1.234567e2));
+  test(STR("-1,234.57"), en_US, STR("{:.6LG}"), F(-1.234567e3));
+  test(STR("-12,345.7"), en_US, STR("{:.6LG}"), F(-1.234567e4));
+  test(STR("-123,457"), en_US, STR("{:.6LG}"), F(-1.234567e5));
+  test(STR("-1.23457E+06"), en_US, STR("{:.6LG}"), F(-1.234567e6));
+  test(STR("-1.23457E+07"), en_US, STR("{:.6LG}"), F(-1.234567e7));
+
+  std::locale::global(en_US);
+  test(STR("1#23457E-06"), loc, STR("{:.6LG}"), F(1.234567e-6));
+  test(STR("1#23457E-05"), loc, STR("{:.6LG}"), F(1.234567e-5));
+  test(STR("0#000123457"), loc, STR("{:.6LG}"), F(1.234567e-4));
+  test(STR("0#00123457"), loc, STR("{:.6LG}"), F(1.234567e-3));
+  test(STR("0#0123457"), loc, STR("{:.6LG}"), F(1.234567e-2));
+  test(STR("0#123457"), loc, STR("{:.6LG}"), F(1.234567e-1));
+  test(STR("1#23457"), loc, STR("{:.6LG}"), F(1.234567e0));
+  test(STR("1_2#3457"), loc, STR("{:.6LG}"), F(1.234567e1));
+  test(STR("12_3#457"), loc, STR("{:.6LG}"), F(1.234567e2));
+  test(STR("1_23_4#57"), loc, STR("{:.6LG}"), F(1.234567e3));
+  test(STR("12_34_5#7"), loc, STR("{:.6LG}"), F(1.234567e4));
+  test(STR("123_45_7"), loc, STR("{:.6LG}"), F(1.234567e5));
+  test(STR("1#23457E+06"), loc, STR("{:.6LG}"), F(1.234567e6));
+  test(STR("1#23457E+07"), loc, STR("{:.6LG}"), F(1.234567e7));
+  test(STR("-1#23457E-06"), loc, STR("{:.6LG}"), F(-1.234567e-6));
+  test(STR("-1#23457E-05"), loc, STR("{:.6LG}"), F(-1.234567e-5));
+  test(STR("-0#000123457"), loc, STR("{:.6LG}"), F(-1.234567e-4));
+  test(STR("-0#00123457"), loc, STR("{:.6LG}"), F(-1.234567e-3));
+  test(STR("-0#0123457"), loc, STR("{:.6LG}"), F(-1.234567e-2));
+  test(STR("-0#123457"), loc, STR("{:.6LG}"), F(-1.234567e-1));
+  test(STR("-1#23457"), loc, STR("{:.6LG}"), F(-1.234567e0));
+  test(STR("-1_2#3457"), loc, STR("{:.6LG}"), F(-1.234567e1));
+  test(STR("-12_3#457"), loc, STR("{:.6LG}"), F(-1.234567e2));
+  test(STR("-1_23_4#57"), loc, STR("{:.6LG}"), F(-1.234567e3));
+  test(STR("-12_34_5#7"), loc, STR("{:.6LG}"), F(-1.234567e4));
+  test(STR("-123_45_7"), loc, STR("{:.6LG}"), F(-1.234567e5));
+  test(STR("-1#23457E+06"), loc, STR("{:.6LG}"), F(-1.234567e6));
+  test(STR("-1#23457E+07"), loc, STR("{:.6LG}"), F(-1.234567e7));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1,234.57$$$"), STR("{:$<11.6LG}"), F(1.234567e3));
+  test(STR("$$$1,234.57"), STR("{:$>11.6LG}"), F(1.234567e3));
+  test(STR("$1,234.57$$"), STR("{:$^11.6LG}"), F(1.234567e3));
+  test(STR("0001,234.57"), STR("{:011.6LG}"), F(1.234567e3));
+  test(STR("-1,234.57$$$"), STR("{:$<12.6LG}"), F(-1.234567e3));
+  test(STR("$$$-1,234.57"), STR("{:$>12.6LG}"), F(-1.234567e3));
+  test(STR("$-1,234.57$$"), STR("{:$^12.6LG}"), F(-1.234567e3));
+  test(STR("-0001,234.57"), STR("{:012.6LG}"), F(-1.234567e3));
+
+  std::locale::global(loc);
+  test(STR("1_23_4#57$$$"), STR("{:$<12.6LG}"), F(1.234567e3));
+  test(STR("$$$1_23_4#57"), STR("{:$>12.6LG}"), F(1.234567e3));
+  test(STR("$1_23_4#57$$"), STR("{:$^12.6LG}"), F(1.234567e3));
+  test(STR("0001_23_4#57"), STR("{:012.6LG}"), F(1.234567e3));
+  test(STR("-1_23_4#57$$$"), STR("{:$<13.6LG}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#57"), STR("{:$>13.6LG}"), F(-1.234567e3));
+  test(STR("$-1_23_4#57$$"), STR("{:$^13.6LG}"), F(-1.234567e3));
+  test(STR("-0001_23_4#57"), STR("{:013.6LG}"), F(-1.234567e3));
+
+  test(STR("1,234.57$$$"), en_US, STR("{:$<11.6LG}"), F(1.234567e3));
+  test(STR("$$$1,234.57"), en_US, STR("{:$>11.6LG}"), F(1.234567e3));
+  test(STR("$1,234.57$$"), en_US, STR("{:$^11.6LG}"), F(1.234567e3));
+  test(STR("0001,234.57"), en_US, STR("{:011.6LG}"), F(1.234567e3));
+  test(STR("-1,234.57$$$"), en_US, STR("{:$<12.6LG}"), F(-1.234567e3));
+  test(STR("$$$-1,234.57"), en_US, STR("{:$>12.6LG}"), F(-1.234567e3));
+  test(STR("$-1,234.57$$"), en_US, STR("{:$^12.6LG}"), F(-1.234567e3));
+  test(STR("-0001,234.57"), en_US, STR("{:012.6LG}"), F(-1.234567e3));
+
+  std::locale::global(en_US);
+  test(STR("1_23_4#57$$$"), loc, STR("{:$<12.6LG}"), F(1.234567e3));
+  test(STR("$$$1_23_4#57"), loc, STR("{:$>12.6LG}"), F(1.234567e3));
+  test(STR("$1_23_4#57$$"), loc, STR("{:$^12.6LG}"), F(1.234567e3));
+  test(STR("0001_23_4#57"), loc, STR("{:012.6LG}"), F(1.234567e3));
+  test(STR("-1_23_4#57$$$"), loc, STR("{:$<13.6LG}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#57"), loc, STR("{:$>13.6LG}"), F(-1.234567e3));
+  test(STR("$-1_23_4#57$$"), loc, STR("{:$^13.6LG}"), F(-1.234567e3));
+  test(STR("-0001_23_4#57"), loc, STR("{:013.6LG}"), F(-1.234567e3));
+}
+
+template <class F, class CharT>
+void test_floating_point_default() {
+  std::locale loc = std::locale(std::locale(), new numpunct<CharT>());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.234567e-06"), STR("{:L}"), F(1.234567e-6));
+  test(STR("1.234567e-05"), STR("{:L}"), F(1.234567e-5));
+  test(STR("0.0001234567"), STR("{:L}"), F(1.234567e-4));
+  test(STR("0.001234567"), STR("{:L}"), F(1.234567e-3));
+  test(STR("0.01234567"), STR("{:L}"), F(1.234567e-2));
+  test(STR("0.1234567"), STR("{:L}"), F(1.234567e-1));
+  test(STR("1.234567"), STR("{:L}"), F(1.234567e0));
+  test(STR("12.34567"), STR("{:L}"), F(1.234567e1));
+  test(STR("123.4567"), STR("{:L}"), F(1.234567e2));
+  test(STR("1,234.567"), STR("{:L}"), F(1.234567e3));
+  test(STR("12,345.67"), STR("{:L}"), F(1.234567e4));
+  test(STR("123,456.7"), STR("{:L}"), F(1.234567e5));
+  test(STR("1,234,567"), STR("{:L}"), F(1.234567e6));
+  test(STR("12,345,670"), STR("{:L}"), F(1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("123,456,700"), STR("{:L}"), F(1.234567e8));
+    test(STR("1,234,567,000"), STR("{:L}"), F(1.234567e9));
+    test(STR("12,345,670,000"), STR("{:L}"), F(1.234567e10));
+    test(STR("123,456,700,000"), STR("{:L}"), F(1.234567e11));
+    test(STR("1.234567e+12"), STR("{:L}"), F(1.234567e12));
+    test(STR("1.234567e+13"), STR("{:L}"), F(1.234567e13));
+  }
+  test(STR("-1.234567e-06"), STR("{:L}"), F(-1.234567e-6));
+  test(STR("-1.234567e-05"), STR("{:L}"), F(-1.234567e-5));
+  test(STR("-0.0001234567"), STR("{:L}"), F(-1.234567e-4));
+  test(STR("-0.001234567"), STR("{:L}"), F(-1.234567e-3));
+  test(STR("-0.01234567"), STR("{:L}"), F(-1.234567e-2));
+  test(STR("-0.1234567"), STR("{:L}"), F(-1.234567e-1));
+  test(STR("-1.234567"), STR("{:L}"), F(-1.234567e0));
+  test(STR("-12.34567"), STR("{:L}"), F(-1.234567e1));
+  test(STR("-123.4567"), STR("{:L}"), F(-1.234567e2));
+  test(STR("-1,234.567"), STR("{:L}"), F(-1.234567e3));
+  test(STR("-12,345.67"), STR("{:L}"), F(-1.234567e4));
+  test(STR("-123,456.7"), STR("{:L}"), F(-1.234567e5));
+  test(STR("-1,234,567"), STR("{:L}"), F(-1.234567e6));
+  test(STR("-12,345,670"), STR("{:L}"), F(-1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-123,456,700"), STR("{:L}"), F(-1.234567e8));
+    test(STR("-1,234,567,000"), STR("{:L}"), F(-1.234567e9));
+    test(STR("-12,345,670,000"), STR("{:L}"), F(-1.234567e10));
+    test(STR("-123,456,700,000"), STR("{:L}"), F(-1.234567e11));
+    test(STR("-1.234567e+12"), STR("{:L}"), F(-1.234567e12));
+    test(STR("-1.234567e+13"), STR("{:L}"), F(-1.234567e13));
+  }
+
+  std::locale::global(loc);
+  test(STR("1#234567e-06"), STR("{:L}"), F(1.234567e-6));
+  test(STR("1#234567e-05"), STR("{:L}"), F(1.234567e-5));
+  test(STR("0#0001234567"), STR("{:L}"), F(1.234567e-4));
+  test(STR("0#001234567"), STR("{:L}"), F(1.234567e-3));
+  test(STR("0#01234567"), STR("{:L}"), F(1.234567e-2));
+  test(STR("0#1234567"), STR("{:L}"), F(1.234567e-1));
+  test(STR("1#234567"), STR("{:L}"), F(1.234567e0));
+  test(STR("1_2#34567"), STR("{:L}"), F(1.234567e1));
+  test(STR("12_3#4567"), STR("{:L}"), F(1.234567e2));
+  test(STR("1_23_4#567"), STR("{:L}"), F(1.234567e3));
+  test(STR("12_34_5#67"), STR("{:L}"), F(1.234567e4));
+  test(STR("123_45_6#7"), STR("{:L}"), F(1.234567e5));
+  test(STR("1_234_56_7"), STR("{:L}"), F(1.234567e6));
+  test(STR("12_345_67_0"), STR("{:L}"), F(1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("1_23_456_70_0"), STR("{:L}"), F(1.234567e8));
+    test(STR("1_2_34_567_00_0"), STR("{:L}"), F(1.234567e9));
+    test(STR("1_2_3_45_670_00_0"), STR("{:L}"), F(1.234567e10));
+    test(STR("1_2_3_4_56_700_00_0"), STR("{:L}"), F(1.234567e11));
+    test(STR("1#234567e+12"), STR("{:L}"), F(1.234567e12));
+    test(STR("1#234567e+13"), STR("{:L}"), F(1.234567e13));
+  }
+  test(STR("-1#234567e-06"), STR("{:L}"), F(-1.234567e-6));
+  test(STR("-1#234567e-05"), STR("{:L}"), F(-1.234567e-5));
+  test(STR("-0#0001234567"), STR("{:L}"), F(-1.234567e-4));
+  test(STR("-0#001234567"), STR("{:L}"), F(-1.234567e-3));
+  test(STR("-0#01234567"), STR("{:L}"), F(-1.234567e-2));
+  test(STR("-0#1234567"), STR("{:L}"), F(-1.234567e-1));
+  test(STR("-1#234567"), STR("{:L}"), F(-1.234567e0));
+  test(STR("-1_2#34567"), STR("{:L}"), F(-1.234567e1));
+  test(STR("-12_3#4567"), STR("{:L}"), F(-1.234567e2));
+  test(STR("-1_23_4#567"), STR("{:L}"), F(-1.234567e3));
+  test(STR("-12_34_5#67"), STR("{:L}"), F(-1.234567e4));
+  test(STR("-123_45_6#7"), STR("{:L}"), F(-1.234567e5));
+  test(STR("-1_234_56_7"), STR("{:L}"), F(-1.234567e6));
+  test(STR("-12_345_67_0"), STR("{:L}"), F(-1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-1_23_456_70_0"), STR("{:L}"), F(-1.234567e8));
+    test(STR("-1_2_34_567_00_0"), STR("{:L}"), F(-1.234567e9));
+    test(STR("-1_2_3_45_670_00_0"), STR("{:L}"), F(-1.234567e10));
+    test(STR("-1_2_3_4_56_700_00_0"), STR("{:L}"), F(-1.234567e11));
+    test(STR("-1#234567e+12"), STR("{:L}"), F(-1.234567e12));
+    test(STR("-1#234567e+13"), STR("{:L}"), F(-1.234567e13));
+  }
+
+  test(STR("1.234567e-06"), en_US, STR("{:L}"), F(1.234567e-6));
+  test(STR("1.234567e-05"), en_US, STR("{:L}"), F(1.234567e-5));
+  test(STR("0.0001234567"), en_US, STR("{:L}"), F(1.234567e-4));
+  test(STR("0.001234567"), en_US, STR("{:L}"), F(1.234567e-3));
+  test(STR("0.01234567"), en_US, STR("{:L}"), F(1.234567e-2));
+  test(STR("0.1234567"), en_US, STR("{:L}"), F(1.234567e-1));
+  test(STR("1.234567"), en_US, STR("{:L}"), F(1.234567e0));
+  test(STR("12.34567"), en_US, STR("{:L}"), F(1.234567e1));
+  test(STR("123.4567"), en_US, STR("{:L}"), F(1.234567e2));
+  test(STR("1,234.567"), en_US, STR("{:L}"), F(1.234567e3));
+  test(STR("12,345.67"), en_US, STR("{:L}"), F(1.234567e4));
+  test(STR("123,456.7"), en_US, STR("{:L}"), F(1.234567e5));
+  test(STR("1,234,567"), en_US, STR("{:L}"), F(1.234567e6));
+  test(STR("12,345,670"), en_US, STR("{:L}"), F(1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("123,456,700"), en_US, STR("{:L}"), F(1.234567e8));
+    test(STR("1,234,567,000"), en_US, STR("{:L}"), F(1.234567e9));
+    test(STR("12,345,670,000"), en_US, STR("{:L}"), F(1.234567e10));
+    test(STR("123,456,700,000"), en_US, STR("{:L}"), F(1.234567e11));
+    test(STR("1.234567e+12"), en_US, STR("{:L}"), F(1.234567e12));
+    test(STR("1.234567e+13"), en_US, STR("{:L}"), F(1.234567e13));
+  }
+  test(STR("-1.234567e-06"), en_US, STR("{:L}"), F(-1.234567e-6));
+  test(STR("-1.234567e-05"), en_US, STR("{:L}"), F(-1.234567e-5));
+  test(STR("-0.0001234567"), en_US, STR("{:L}"), F(-1.234567e-4));
+  test(STR("-0.001234567"), en_US, STR("{:L}"), F(-1.234567e-3));
+  test(STR("-0.01234567"), en_US, STR("{:L}"), F(-1.234567e-2));
+  test(STR("-0.1234567"), en_US, STR("{:L}"), F(-1.234567e-1));
+  test(STR("-1.234567"), en_US, STR("{:L}"), F(-1.234567e0));
+  test(STR("-12.34567"), en_US, STR("{:L}"), F(-1.234567e1));
+  test(STR("-123.4567"), en_US, STR("{:L}"), F(-1.234567e2));
+  test(STR("-1,234.567"), en_US, STR("{:L}"), F(-1.234567e3));
+  test(STR("-12,345.67"), en_US, STR("{:L}"), F(-1.234567e4));
+  test(STR("-123,456.7"), en_US, STR("{:L}"), F(-1.234567e5));
+  test(STR("-1,234,567"), en_US, STR("{:L}"), F(-1.234567e6));
+  test(STR("-12,345,670"), en_US, STR("{:L}"), F(-1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-123,456,700"), en_US, STR("{:L}"), F(-1.234567e8));
+    test(STR("-1,234,567,000"), en_US, STR("{:L}"), F(-1.234567e9));
+    test(STR("-12,345,670,000"), en_US, STR("{:L}"), F(-1.234567e10));
+    test(STR("-123,456,700,000"), en_US, STR("{:L}"), F(-1.234567e11));
+    test(STR("-1.234567e+12"), en_US, STR("{:L}"), F(-1.234567e12));
+    test(STR("-1.234567e+13"), en_US, STR("{:L}"), F(-1.234567e13));
+  }
+
+  std::locale::global(en_US);
+  test(STR("1#234567e-06"), loc, STR("{:L}"), F(1.234567e-6));
+  test(STR("1#234567e-05"), loc, STR("{:L}"), F(1.234567e-5));
+  test(STR("0#0001234567"), loc, STR("{:L}"), F(1.234567e-4));
+  test(STR("0#001234567"), loc, STR("{:L}"), F(1.234567e-3));
+  test(STR("0#01234567"), loc, STR("{:L}"), F(1.234567e-2));
+  test(STR("0#1234567"), loc, STR("{:L}"), F(1.234567e-1));
+  test(STR("1#234567"), loc, STR("{:L}"), F(1.234567e0));
+  test(STR("1_2#34567"), loc, STR("{:L}"), F(1.234567e1));
+  test(STR("12_3#4567"), loc, STR("{:L}"), F(1.234567e2));
+  test(STR("1_23_4#567"), loc, STR("{:L}"), F(1.234567e3));
+  test(STR("12_34_5#67"), loc, STR("{:L}"), F(1.234567e4));
+  test(STR("123_45_6#7"), loc, STR("{:L}"), F(1.234567e5));
+  test(STR("1_234_56_7"), loc, STR("{:L}"), F(1.234567e6));
+  test(STR("12_345_67_0"), loc, STR("{:L}"), F(1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("1_23_456_70_0"), loc, STR("{:L}"), F(1.234567e8));
+    test(STR("1_2_34_567_00_0"), loc, STR("{:L}"), F(1.234567e9));
+    test(STR("1_2_3_45_670_00_0"), loc, STR("{:L}"), F(1.234567e10));
+    test(STR("1_2_3_4_56_700_00_0"), loc, STR("{:L}"), F(1.234567e11));
+    test(STR("1#234567e+12"), loc, STR("{:L}"), F(1.234567e12));
+    test(STR("1#234567e+13"), loc, STR("{:L}"), F(1.234567e13));
+  }
+  test(STR("-1#234567e-06"), loc, STR("{:L}"), F(-1.234567e-6));
+  test(STR("-1#234567e-05"), loc, STR("{:L}"), F(-1.234567e-5));
+  test(STR("-0#0001234567"), loc, STR("{:L}"), F(-1.234567e-4));
+  test(STR("-0#001234567"), loc, STR("{:L}"), F(-1.234567e-3));
+  test(STR("-0#01234567"), loc, STR("{:L}"), F(-1.234567e-2));
+  test(STR("-0#1234567"), loc, STR("{:L}"), F(-1.234567e-1));
+  test(STR("-1#234567"), loc, STR("{:L}"), F(-1.234567e0));
+  test(STR("-1_2#34567"), loc, STR("{:L}"), F(-1.234567e1));
+  test(STR("-12_3#4567"), loc, STR("{:L}"), F(-1.234567e2));
+  test(STR("-1_23_4#567"), loc, STR("{:L}"), F(-1.234567e3));
+  test(STR("-12_34_5#67"), loc, STR("{:L}"), F(-1.234567e4));
+  test(STR("-123_45_6#7"), loc, STR("{:L}"), F(-1.234567e5));
+  test(STR("-1_234_56_7"), loc, STR("{:L}"), F(-1.234567e6));
+  test(STR("-12_345_67_0"), loc, STR("{:L}"), F(-1.234567e7));
+  if constexpr (sizeof(F) > sizeof(float)) {
+    test(STR("-1_23_456_70_0"), loc, STR("{:L}"), F(-1.234567e8));
+    test(STR("-1_2_34_567_00_0"), loc, STR("{:L}"), F(-1.234567e9));
+    test(STR("-1_2_3_45_670_00_0"), loc, STR("{:L}"), F(-1.234567e10));
+    test(STR("-1_2_3_4_56_700_00_0"), loc, STR("{:L}"), F(-1.234567e11));
+    test(STR("-1#234567e+12"), loc, STR("{:L}"), F(-1.234567e12));
+    test(STR("-1#234567e+13"), loc, STR("{:L}"), F(-1.234567e13));
+  }
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1,234.567$$$"), STR("{:$<12L}"), F(1.234567e3));
+  test(STR("$$$1,234.567"), STR("{:$>12L}"), F(1.234567e3));
+  test(STR("$1,234.567$$"), STR("{:$^12L}"), F(1.234567e3));
+  test(STR("0001,234.567"), STR("{:012L}"), F(1.234567e3));
+  test(STR("-1,234.567$$$"), STR("{:$<13L}"), F(-1.234567e3));
+  test(STR("$$$-1,234.567"), STR("{:$>13L}"), F(-1.234567e3));
+  test(STR("$-1,234.567$$"), STR("{:$^13L}"), F(-1.234567e3));
+  test(STR("-0001,234.567"), STR("{:013L}"), F(-1.234567e3));
+
+  std::locale::global(loc);
+  test(STR("1_23_4#567$$$"), STR("{:$<13L}"), F(1.234567e3));
+  test(STR("$$$1_23_4#567"), STR("{:$>13L}"), F(1.234567e3));
+  test(STR("$1_23_4#567$$"), STR("{:$^13L}"), F(1.234567e3));
+  test(STR("0001_23_4#567"), STR("{:013L}"), F(1.234567e3));
+  test(STR("-1_23_4#567$$$"), STR("{:$<14L}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#567"), STR("{:$>14L}"), F(-1.234567e3));
+  test(STR("$-1_23_4#567$$"), STR("{:$^14L}"), F(-1.234567e3));
+  test(STR("-0001_23_4#567"), STR("{:014L}"), F(-1.234567e3));
+
+  test(STR("1,234.567$$$"), en_US, STR("{:$<12L}"), F(1.234567e3));
+  test(STR("$$$1,234.567"), en_US, STR("{:$>12L}"), F(1.234567e3));
+  test(STR("$1,234.567$$"), en_US, STR("{:$^12L}"), F(1.234567e3));
+  test(STR("0001,234.567"), en_US, STR("{:012L}"), F(1.234567e3));
+  test(STR("-1,234.567$$$"), en_US, STR("{:$<13L}"), F(-1.234567e3));
+  test(STR("$$$-1,234.567"), en_US, STR("{:$>13L}"), F(-1.234567e3));
+  test(STR("$-1,234.567$$"), en_US, STR("{:$^13L}"), F(-1.234567e3));
+  test(STR("-0001,234.567"), en_US, STR("{:013L}"), F(-1.234567e3));
+
+  std::locale::global(en_US);
+  test(STR("1_23_4#567$$$"), loc, STR("{:$<13L}"), F(1.234567e3));
+  test(STR("$$$1_23_4#567"), loc, STR("{:$>13L}"), F(1.234567e3));
+  test(STR("$1_23_4#567$$"), loc, STR("{:$^13L}"), F(1.234567e3));
+  test(STR("0001_23_4#567"), loc, STR("{:013L}"), F(1.234567e3));
+  test(STR("-1_23_4#567$$$"), loc, STR("{:$<14L}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#567"), loc, STR("{:$>14L}"), F(-1.234567e3));
+  test(STR("$-1_23_4#567$$"), loc, STR("{:$^14L}"), F(-1.234567e3));
+  test(STR("-0001_23_4#567"), loc, STR("{:014L}"), F(-1.234567e3));
+}
+
+template <class F, class CharT>
+void test_floating_point_default_precision() {
+  std::locale loc = std::locale(std::locale(), new numpunct<CharT>());
+  std::locale en_US = std::locale(LOCALE_en_US_UTF_8);
+
+  // *** Basic ***
+  std::locale::global(en_US);
+  test(STR("1.23457e-06"), STR("{:.6L}"), F(1.234567e-6));
+  test(STR("1.23457e-05"), STR("{:.6L}"), F(1.234567e-5));
+  test(STR("0.000123457"), STR("{:.6L}"), F(1.234567e-4));
+  test(STR("0.00123457"), STR("{:.6L}"), F(1.234567e-3));
+  test(STR("0.0123457"), STR("{:.6L}"), F(1.234567e-2));
+  test(STR("0.123457"), STR("{:.6L}"), F(1.234567e-1));
+  test(STR("1.23457"), STR("{:.6L}"), F(1.234567e0));
+  test(STR("12.3457"), STR("{:.6L}"), F(1.234567e1));
+  test(STR("123.457"), STR("{:.6L}"), F(1.234567e2));
+  test(STR("1,234.57"), STR("{:.6L}"), F(1.234567e3));
+  test(STR("12,345.7"), STR("{:.6L}"), F(1.234567e4));
+  test(STR("123,457"), STR("{:.6L}"), F(1.234567e5));
+  test(STR("1.23457e+06"), STR("{:.6L}"), F(1.234567e6));
+  test(STR("1.23457e+07"), STR("{:.6L}"), F(1.234567e7));
+  test(STR("-1.23457e-06"), STR("{:.6L}"), F(-1.234567e-6));
+  test(STR("-1.23457e-05"), STR("{:.6L}"), F(-1.234567e-5));
+  test(STR("-0.000123457"), STR("{:.6L}"), F(-1.234567e-4));
+  test(STR("-0.00123457"), STR("{:.6L}"), F(-1.234567e-3));
+  test(STR("-0.0123457"), STR("{:.6L}"), F(-1.234567e-2));
+  test(STR("-0.123457"), STR("{:.6L}"), F(-1.234567e-1));
+  test(STR("-1.23457"), STR("{:.6L}"), F(-1.234567e0));
+  test(STR("-12.3457"), STR("{:.6L}"), F(-1.234567e1));
+  test(STR("-123.457"), STR("{:.6L}"), F(-1.234567e2));
+  test(STR("-1,234.57"), STR("{:.6L}"), F(-1.234567e3));
+  test(STR("-12,345.7"), STR("{:.6L}"), F(-1.234567e4));
+  test(STR("-123,457"), STR("{:.6L}"), F(-1.234567e5));
+  test(STR("-1.23457e+06"), STR("{:.6L}"), F(-1.234567e6));
+  test(STR("-1.23457e+07"), STR("{:.6L}"), F(-1.234567e7));
+
+  std::locale::global(loc);
+  test(STR("1#23457e-06"), STR("{:.6L}"), F(1.234567e-6));
+  test(STR("1#23457e-05"), STR("{:.6L}"), F(1.234567e-5));
+  test(STR("0#000123457"), STR("{:.6L}"), F(1.234567e-4));
+  test(STR("0#00123457"), STR("{:.6L}"), F(1.234567e-3));
+  test(STR("0#0123457"), STR("{:.6L}"), F(1.234567e-2));
+  test(STR("0#123457"), STR("{:.6L}"), F(1.234567e-1));
+  test(STR("1#23457"), STR("{:.6L}"), F(1.234567e0));
+  test(STR("1_2#3457"), STR("{:.6L}"), F(1.234567e1));
+  test(STR("12_3#457"), STR("{:.6L}"), F(1.234567e2));
+  test(STR("1_23_4#57"), STR("{:.6L}"), F(1.234567e3));
+  test(STR("12_34_5#7"), STR("{:.6L}"), F(1.234567e4));
+  test(STR("123_45_7"), STR("{:.6L}"), F(1.234567e5));
+  test(STR("1#23457e+06"), STR("{:.6L}"), F(1.234567e6));
+  test(STR("1#23457e+07"), STR("{:.6L}"), F(1.234567e7));
+  test(STR("-1#23457e-06"), STR("{:.6L}"), F(-1.234567e-6));
+  test(STR("-1#23457e-05"), STR("{:.6L}"), F(-1.234567e-5));
+  test(STR("-0#000123457"), STR("{:.6L}"), F(-1.234567e-4));
+  test(STR("-0#00123457"), STR("{:.6L}"), F(-1.234567e-3));
+  test(STR("-0#0123457"), STR("{:.6L}"), F(-1.234567e-2));
+  test(STR("-0#123457"), STR("{:.6L}"), F(-1.234567e-1));
+  test(STR("-1#23457"), STR("{:.6L}"), F(-1.234567e0));
+  test(STR("-1_2#3457"), STR("{:.6L}"), F(-1.234567e1));
+  test(STR("-12_3#457"), STR("{:.6L}"), F(-1.234567e2));
+  test(STR("-1_23_4#57"), STR("{:.6L}"), F(-1.234567e3));
+  test(STR("-12_34_5#7"), STR("{:.6L}"), F(-1.234567e4));
+  test(STR("-123_45_7"), STR("{:.6L}"), F(-1.234567e5));
+  test(STR("-1#23457e+06"), STR("{:.6L}"), F(-1.234567e6));
+  test(STR("-1#23457e+07"), STR("{:.6L}"), F(-1.234567e7));
+
+  test(STR("1.23457e-06"), en_US, STR("{:.6L}"), F(1.234567e-6));
+  test(STR("1.23457e-05"), en_US, STR("{:.6L}"), F(1.234567e-5));
+  test(STR("0.000123457"), en_US, STR("{:.6L}"), F(1.234567e-4));
+  test(STR("0.00123457"), en_US, STR("{:.6L}"), F(1.234567e-3));
+  test(STR("0.0123457"), en_US, STR("{:.6L}"), F(1.234567e-2));
+  test(STR("0.123457"), en_US, STR("{:.6L}"), F(1.234567e-1));
+  test(STR("1.23457"), en_US, STR("{:.6L}"), F(1.234567e0));
+  test(STR("12.3457"), en_US, STR("{:.6L}"), F(1.234567e1));
+  test(STR("123.457"), en_US, STR("{:.6L}"), F(1.234567e2));
+  test(STR("1,234.57"), en_US, STR("{:.6L}"), F(1.234567e3));
+  test(STR("12,345.7"), en_US, STR("{:.6L}"), F(1.234567e4));
+  test(STR("123,457"), en_US, STR("{:.6L}"), F(1.234567e5));
+  test(STR("1.23457e+06"), en_US, STR("{:.6L}"), F(1.234567e6));
+  test(STR("1.23457e+07"), en_US, STR("{:.6L}"), F(1.234567e7));
+  test(STR("-1.23457e-06"), en_US, STR("{:.6L}"), F(-1.234567e-6));
+  test(STR("-1.23457e-05"), en_US, STR("{:.6L}"), F(-1.234567e-5));
+  test(STR("-0.000123457"), en_US, STR("{:.6L}"), F(-1.234567e-4));
+  test(STR("-0.00123457"), en_US, STR("{:.6L}"), F(-1.234567e-3));
+  test(STR("-0.0123457"), en_US, STR("{:.6L}"), F(-1.234567e-2));
+  test(STR("-0.123457"), en_US, STR("{:.6L}"), F(-1.234567e-1));
+  test(STR("-1.23457"), en_US, STR("{:.6L}"), F(-1.234567e0));
+  test(STR("-12.3457"), en_US, STR("{:.6L}"), F(-1.234567e1));
+  test(STR("-123.457"), en_US, STR("{:.6L}"), F(-1.234567e2));
+  test(STR("-1,234.57"), en_US, STR("{:.6L}"), F(-1.234567e3));
+  test(STR("-12,345.7"), en_US, STR("{:.6L}"), F(-1.234567e4));
+  test(STR("-123,457"), en_US, STR("{:.6L}"), F(-1.234567e5));
+  test(STR("-1.23457e+06"), en_US, STR("{:.6L}"), F(-1.234567e6));
+  test(STR("-1.23457e+07"), en_US, STR("{:.6L}"), F(-1.234567e7));
+
+  std::locale::global(en_US);
+  test(STR("1#23457e-06"), loc, STR("{:.6L}"), F(1.234567e-6));
+  test(STR("1#23457e-05"), loc, STR("{:.6L}"), F(1.234567e-5));
+  test(STR("0#000123457"), loc, STR("{:.6L}"), F(1.234567e-4));
+  test(STR("0#00123457"), loc, STR("{:.6L}"), F(1.234567e-3));
+  test(STR("0#0123457"), loc, STR("{:.6L}"), F(1.234567e-2));
+  test(STR("0#123457"), loc, STR("{:.6L}"), F(1.234567e-1));
+  test(STR("1#23457"), loc, STR("{:.6L}"), F(1.234567e0));
+  test(STR("1_2#3457"), loc, STR("{:.6L}"), F(1.234567e1));
+  test(STR("12_3#457"), loc, STR("{:.6L}"), F(1.234567e2));
+  test(STR("1_23_4#57"), loc, STR("{:.6L}"), F(1.234567e3));
+  test(STR("12_34_5#7"), loc, STR("{:.6L}"), F(1.234567e4));
+  test(STR("123_45_7"), loc, STR("{:.6L}"), F(1.234567e5));
+  test(STR("1#23457e+06"), loc, STR("{:.6L}"), F(1.234567e6));
+  test(STR("1#23457e+07"), loc, STR("{:.6L}"), F(1.234567e7));
+  test(STR("-1#23457e-06"), loc, STR("{:.6L}"), F(-1.234567e-6));
+  test(STR("-1#23457e-05"), loc, STR("{:.6L}"), F(-1.234567e-5));
+  test(STR("-0#000123457"), loc, STR("{:.6L}"), F(-1.234567e-4));
+  test(STR("-0#00123457"), loc, STR("{:.6L}"), F(-1.234567e-3));
+  test(STR("-0#0123457"), loc, STR("{:.6L}"), F(-1.234567e-2));
+  test(STR("-0#123457"), loc, STR("{:.6L}"), F(-1.234567e-1));
+  test(STR("-1#23457"), loc, STR("{:.6L}"), F(-1.234567e0));
+  test(STR("-1_2#3457"), loc, STR("{:.6L}"), F(-1.234567e1));
+  test(STR("-12_3#457"), loc, STR("{:.6L}"), F(-1.234567e2));
+  test(STR("-1_23_4#57"), loc, STR("{:.6L}"), F(-1.234567e3));
+  test(STR("-12_34_5#7"), loc, STR("{:.6L}"), F(-1.234567e4));
+  test(STR("-123_45_7"), loc, STR("{:.6L}"), F(-1.234567e5));
+  test(STR("-1#23457e+06"), loc, STR("{:.6L}"), F(-1.234567e6));
+  test(STR("-1#23457e+07"), loc, STR("{:.6L}"), F(-1.234567e7));
+
+  // *** Fill, align, zero padding ***
+  std::locale::global(en_US);
+  test(STR("1,234.57$$$"), STR("{:$<11.6L}"), F(1.234567e3));
+  test(STR("$$$1,234.57"), STR("{:$>11.6L}"), F(1.234567e3));
+  test(STR("$1,234.57$$"), STR("{:$^11.6L}"), F(1.234567e3));
+  test(STR("0001,234.57"), STR("{:011.6L}"), F(1.234567e3));
+  test(STR("-1,234.57$$$"), STR("{:$<12.6L}"), F(-1.234567e3));
+  test(STR("$$$-1,234.57"), STR("{:$>12.6L}"), F(-1.234567e3));
+  test(STR("$-1,234.57$$"), STR("{:$^12.6L}"), F(-1.234567e3));
+  test(STR("-0001,234.57"), STR("{:012.6L}"), F(-1.234567e3));
+
+  std::locale::global(loc);
+  test(STR("1_23_4#57$$$"), STR("{:$<12.6L}"), F(1.234567e3));
+  test(STR("$$$1_23_4#57"), STR("{:$>12.6L}"), F(1.234567e3));
+  test(STR("$1_23_4#57$$"), STR("{:$^12.6L}"), F(1.234567e3));
+  test(STR("0001_23_4#57"), STR("{:012.6L}"), F(1.234567e3));
+  test(STR("-1_23_4#57$$$"), STR("{:$<13.6L}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#57"), STR("{:$>13.6L}"), F(-1.234567e3));
+  test(STR("$-1_23_4#57$$"), STR("{:$^13.6L}"), F(-1.234567e3));
+  test(STR("-0001_23_4#57"), STR("{:013.6L}"), F(-1.234567e3));
+
+  test(STR("1,234.57$$$"), en_US, STR("{:$<11.6L}"), F(1.234567e3));
+  test(STR("$$$1,234.57"), en_US, STR("{:$>11.6L}"), F(1.234567e3));
+  test(STR("$1,234.57$$"), en_US, STR("{:$^11.6L}"), F(1.234567e3));
+  test(STR("0001,234.57"), en_US, STR("{:011.6L}"), F(1.234567e3));
+  test(STR("-1,234.57$$$"), en_US, STR("{:$<12.6L}"), F(-1.234567e3));
+  test(STR("$$$-1,234.57"), en_US, STR("{:$>12.6L}"), F(-1.234567e3));
+  test(STR("$-1,234.57$$"), en_US, STR("{:$^12.6L}"), F(-1.234567e3));
+  test(STR("-0001,234.57"), en_US, STR("{:012.6L}"), F(-1.234567e3));
+
+  std::locale::global(en_US);
+  test(STR("1_23_4#57$$$"), loc, STR("{:$<12.6L}"), F(1.234567e3));
+  test(STR("$$$1_23_4#57"), loc, STR("{:$>12.6L}"), F(1.234567e3));
+  test(STR("$1_23_4#57$$"), loc, STR("{:$^12.6L}"), F(1.234567e3));
+  test(STR("0001_23_4#57"), loc, STR("{:012.6L}"), F(1.234567e3));
+  test(STR("-1_23_4#57$$$"), loc, STR("{:$<13.6L}"), F(-1.234567e3));
+  test(STR("$$$-1_23_4#57"), loc, STR("{:$>13.6L}"), F(-1.234567e3));
+  test(STR("$-1_23_4#57$$"), loc, STR("{:$^13.6L}"), F(-1.234567e3));
+  test(STR("-0001_23_4#57"), loc, STR("{:013.6L}"), F(-1.234567e3));
+}
+
+template <class F, class CharT >
+void test_floating_point() {
+  test_floating_point_hex_lower_case<F, CharT>();
+  test_floating_point_hex_upper_case<F, CharT>();
+  test_floating_point_hex_lower_case_precision<F, CharT>();
+  test_floating_point_hex_upper_case_precision<F, CharT>();
+
+  test_floating_point_scientific_lower_case<F, CharT>();
+  test_floating_point_scientific_upper_case<F, CharT>();
+
+  test_floating_point_fixed_lower_case<F, CharT>();
+  test_floating_point_fixed_upper_case<F, CharT>();
+
+  test_floating_point_general_lower_case<F, CharT>();
+  test_floating_point_general_upper_case<F, CharT>();
+
+  test_floating_point_default<F, CharT>();
+  test_floating_point_default_precision<F, CharT>();
+}
+
 template <class CharT>
 void test() {
   test_bool<CharT>();
   test_integer<CharT>();
+  test_floating_point<float, CharT>();
+  test_floating_point<double, CharT>();
+  test_floating_point<long double, CharT>();
 }
 
 int main(int, char**) {

From 787ccd345cbb3a569ba751580bb806552b4b6e57 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Sun, 28 Nov 2021 14:43:43 +0100
Subject: [PATCH 419/946] [libc++][format] Adds formatter pointer.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This implements the last required formatter specialization.

Completes:
- LWG 3251 Are std::format alignment specifiers applied to string arguments?
- LWG 3340 Formatting functions should throw on argument/format string mismatch in §[format.functions]
- LWG 3540 §[format.arg] There should be no const in basic_format_arg(const T* p)

Implements parts of:
- P0645 Text Formatting

Depends on D114001

Reviewed By: ldionne, vitaut, #libc

Differential Revision: https://reviews.llvm.org/D115988
---
 libcxx/docs/Status/Cxx20Issues.csv            |   4 +-
 libcxx/docs/Status/Cxx2bIssues.csv            |   2 +-
 libcxx/include/CMakeLists.txt                 |   1 +
 libcxx/include/__format/format_arg.h          |   4 +-
 libcxx/include/__format/formatter_pointer.h   |  91 +++++++
 .../include/__format/parser_std_format_spec.h | 103 ++++++-
 libcxx/include/format                         |   1 +
 libcxx/include/module.modulemap               |   1 +
 .../formatter_pointer.module.verify.cpp       |  15 ++
 .../format.arg/visit_format_arg.pass.cpp      |   4 +
 .../std_format_spec_pointer.pass.cpp          | 254 ++++++++++++++++++
 .../formatter.pointer.pass.cpp                | 107 ++++++++
 .../format/format.functions/format_tests.h    |  47 ++++
 13 files changed, 629 insertions(+), 5 deletions(-)
 create mode 100644 libcxx/include/__format/formatter_pointer.h
 create mode 100644 libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_pointer.module.verify.cpp
 create mode 100644 libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_pointer.pass.cpp
 create mode 100644 libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.pointer.pass.cpp

diff --git a/libcxx/docs/Status/Cxx20Issues.csv b/libcxx/docs/Status/Cxx20Issues.csv
index 034fac3ca22f8..daa6466f87515 100644
--- a/libcxx/docs/Status/Cxx20Issues.csv
+++ b/libcxx/docs/Status/Cxx20Issues.csv
@@ -207,7 +207,7 @@
 "`3247 <https://wg21.link/LWG3247>`__","``ranges::iter_move``\  should perform ADL-only lookup of ``iter_move``\ ","Prague","","","|ranges|"
 "`3248 <https://wg21.link/LWG3248>`__","``std::format``\  ``#b``\ , ``#B``\ , ``#o``\ , ``#x``\ , and ``#X``\   presentation types misformat negative numbers","Prague","|Complete|","14.0","|format|"
 "`3250 <https://wg21.link/LWG3250>`__","``std::format``\ : ``#``\  (alternate form) for NaN and inf","Prague","|Complete|","14.0","|format|"
-"`3251 <https://wg21.link/LWG3251>`__","Are ``std::format``\  alignment specifiers applied to string arguments?","Prague","","","|format|"
+"`3251 <https://wg21.link/LWG3251>`__","Are ``std::format``\  alignment specifiers applied to string arguments?","Prague","|Complete|","14.0","|format|"
 "`3252 <https://wg21.link/LWG3252>`__","Parse locale's aware modifiers for commands are not consistent with POSIX spec","Prague","","","|chrono|"
 "`3254 <https://wg21.link/LWG3254>`__","Strike ``stop_token``\ 's ``operator!=``\ ","Prague","",""
 "`3255 <https://wg21.link/LWG3255>`__","``span``\ 's ``array``\  constructor is too strict","Prague","|Complete|",""
@@ -256,7 +256,7 @@
 "`3334 <https://wg21.link/LWG3334>`__","``basic_osyncstream``\  move assignment and destruction calls ``basic_syncbuf::emit()``\  twice","Prague","",""
 "`3335 <https://wg21.link/LWG3335>`__","Resolve C++20 NB comments US 273 and GB 274","Prague","","","|ranges|"
 "`3338 <https://wg21.link/LWG3338>`__","Rename ``default_constructible``\  to ``default_initializable``\ ","Prague","|Complete|","13.0"
-"`3340 <https://wg21.link/LWG3340>`__","Formatting functions should throw on argument/format string mismatch in |sect|\ [format.functions]","Prague","","","|format|"
+"`3340 <https://wg21.link/LWG3340>`__","Formatting functions should throw on argument/format string mismatch in |sect|\ [format.functions]","Prague","|Complete|","14.0","|format|"
 "`3346 <https://wg21.link/LWG3346>`__","``pair``\  and ``tuple``\  copy and move constructor have backwards specification","Prague","",""
 "`3347 <https://wg21.link/LWG3347>`__","``std::pair<T, U>``\  now requires ``T``\  and ``U``\  to be less-than-comparable","Prague","",""
 "`3348 <https://wg21.link/LWG3348>`__","``__cpp_lib_unwrap_ref``\  in wrong header","Prague","|Complete|","12.0"
diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index 27bc1e5832967..cbc12d27a5614 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -84,7 +84,7 @@
 `3533 <https://wg21.link/LWG3533>`__,"Make ``base() const &`` consistent across iterator wrappers that supports ``input_iterators``","June 2021","","","|ranges|"
 `3536 <https://wg21.link/LWG3536>`__,"Should ``chrono::from_stream()`` assign zero to duration for failure?","June 2021","","","|chrono|"
 `3539 <https://wg21.link/LWG3539>`__,"``format_to`` must not copy models of ``output_iterator<const charT&>``","June 2021","","","|format|"
-`3540 <https://wg21.link/LWG3540>`__,"§[format.arg] There should be no const in ``basic_format_arg(const T* p)``","June 2021","","","|format|"
+`3540 <https://wg21.link/LWG3540>`__,"§[format.arg] There should be no const in ``basic_format_arg(const T* p)``","June 2021","|Complete|","14.0","|format|"
 `3541 <https://wg21.link/LWG3541>`__,"``indirectly_readable_traits`` should be SFINAE-friendly for all types","June 2021","|Complete|","14.0","|ranges|"
 `3542 <https://wg21.link/LWG3542>`__,"``basic_format_arg`` mishandles ``basic_string_view`` with custom traits","June 2021","|Complete|","14.0","|format|"
 `3543 <https://wg21.link/LWG3543>`__,"Definition of when ``counted_iterators`` refer to the same sequence isn't quite right","June 2021","","","|ranges|"
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index 3886ddba8e4e5..278e848e6ecbf 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -181,6 +181,7 @@ set(files
   __format/formatter_floating_point.h
   __format/formatter_integer.h
   __format/formatter_integral.h
+  __format/formatter_pointer.h
   __format/formatter_string.h
   __format/parser_std_format_spec.h
   __functional/binary_function.h
diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h
index 59429c13d4154..2153287a8a61c 100644
--- a/libcxx/include/__format/format_arg.h
+++ b/libcxx/include/__format/format_arg.h
@@ -245,7 +245,9 @@ class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg {
   explicit basic_format_arg(nullptr_t) noexcept
       : __ptr(nullptr), __type_(__format::__arg_t::__ptr) {}
 
-  // TODO FMT Implement the _Tp* constructor.
+  template <class _Tp>
+  requires is_void_v<_Tp> _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(_Tp* __p) noexcept
+      : __ptr(__p), __type_(__format::__arg_t::__ptr) {}
 };
 
 #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
diff --git a/libcxx/include/__format/formatter_pointer.h b/libcxx/include/__format/formatter_pointer.h
new file mode 100644
index 0000000000000..aa2eb641c6c6d
--- /dev/null
+++ b/libcxx/include/__format/formatter_pointer.h
@@ -0,0 +1,91 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___FORMAT_FORMATTER_POINTER_H
+#define _LIBCPP___FORMAT_FORMATTER_POINTER_H
+
+#include <__algorithm/copy.h>
+#include <__availability>
+#include <__config>
+#include <__debug>
+#include <__format/format_error.h>
+#include <__format/format_fwd.h>
+#include <__format/formatter.h>
+#include <__format/formatter_integral.h>
+#include <__format/parser_std_format_spec.h>
+#include <__iterator/access.h>
+#include <__nullptr>
+#include <cstdint>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER > 17
+
+// TODO FMT Remove this once we require compilers with proper C++20 support.
+// If the compiler has no concepts support, the format header will be disabled.
+// Without concepts support enable_if needs to be used and that too much effort
+// to support compilers with partial C++20 support.
+#  if !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+namespace __format_spec {
+
+template <__formatter::__char_type _CharT>
+class _LIBCPP_TEMPLATE_VIS __formatter_pointer : public __parser_pointer<_CharT> {
+public:
+  _LIBCPP_HIDE_FROM_ABI auto format(const void* __ptr, auto& __ctx) -> decltype(__ctx.out()) {
+    _LIBCPP_ASSERT(this->__alignment != _Flags::_Alignment::__default,
+                   "The call to parse should have updated the alignment");
+    if (this->__width_needs_substitution())
+      this->__substitute_width_arg_id(__ctx.arg(this->__width));
+
+    // This code looks a lot like the code to format a hexadecimal integral,
+    // but that code isn't public. Making that code public requires some
+    // refactoring.
+    // TODO FMT Remove code duplication.
+    char __buffer[2 + 2 * sizeof(uintptr_t)];
+    __buffer[0] = '0';
+    __buffer[1] = 'x';
+    char* __last = __to_buffer(__buffer + 2, _VSTD::end(__buffer), reinterpret_cast<uintptr_t>(__ptr), 16);
+
+    unsigned __size = __last - __buffer;
+    if (__size >= this->__width)
+      return _VSTD::copy(__buffer, __last, __ctx.out());
+
+    return __formatter::__write(__ctx.out(), __buffer, __last, __size, this->__width, this->__fill, this->__alignment);
+  }
+};
+
+} // namespace __format_spec
+
+// [format.formatter.spec]/2.4
+// For each charT, the pointer type specializations template<>
+// - struct formatter<nullptr_t, charT>;
+// - template<> struct formatter<void*, charT>;
+// - template<> struct formatter<const void*, charT>;
+template <__formatter::__char_type _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<nullptr_t, _CharT>
+    : public __format_spec::__formatter_pointer<_CharT> {};
+template <__formatter::__char_type _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<void*, _CharT>
+    : public __format_spec::__formatter_pointer<_CharT> {};
+template <__formatter::__char_type _CharT>
+struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<const void*, _CharT>
+    : public __format_spec::__formatter_pointer<_CharT> {};
+
+#  endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
+
+#endif //_LIBCPP_STD_VER > 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif // _LIBCPP___FORMAT_FORMATTER_POINTER_H
diff --git a/libcxx/include/__format/parser_std_format_spec.h b/libcxx/include/__format/parser_std_format_spec.h
index 75fc626c84ac1..9d893e9ced27f 100644
--- a/libcxx/include/__format/parser_std_format_spec.h
+++ b/libcxx/include/__format/parser_std_format_spec.h
@@ -826,7 +826,108 @@ class _LIBCPP_TEMPLATE_VIS __parser_floating_point
   }
 };
 
-// TODO FMT Add a parser for pointer values.
+/**
+ * The parser for the std-format-spec.
+ *
+ * This implements the parser for the pointer types.
+ *
+ * See @ref __parser_string.
+ */
+template <class _CharT>
+class _LIBCPP_TEMPLATE_VIS __parser_pointer : public __parser_width,              // provides __width(|as_arg)
+                                              public __parser_fill_align<_CharT>, // provides __fill and uses __flags
+                                              public _Flags                       // provides __flags
+{
+public:
+  using char_type = _CharT;
+
+  _LIBCPP_HIDE_FROM_ABI constexpr __parser_pointer() {
+    // Implements LWG3612 Inconsistent pointer alignment in std::format.
+    // The issue's current status is "Tentatively Ready" and libc++ status is
+    // still experimental.
+    //
+    // TODO FMT Validate this with the final resolution of LWG3612.
+    this->__alignment = _Flags::_Alignment::__right;
+  }
+
+  /**
+   * The low-level std-format-spec parse function.
+   *
+   * @pre __begin points at the beginning of the std-format-spec. This means
+   * directly after the ':'.
+   * @pre The std-format-spec parses the entire input, or the first unmatched
+   * character is a '}'.
+   *
+   * @returns The iterator pointing at the last parsed character.
+   */
+  _LIBCPP_HIDE_FROM_ABI constexpr auto parse(auto& __parse_ctx) -> decltype(__parse_ctx.begin()) {
+    auto __it = __parse(__parse_ctx);
+    __process_display_type();
+    return __it;
+  }
+
+protected:
+  /**
+   * The low-level std-format-spec parse function.
+   *
+   * @pre __begin points at the beginning of the std-format-spec. This means
+   * directly after the ':'.
+   * @pre The std-format-spec parses the entire input, or the first unmatched
+   * character is a '}'.
+   *
+   * @returns The iterator pointing at the last parsed character.
+   */
+  _LIBCPP_HIDE_FROM_ABI constexpr auto __parse(auto& __parse_ctx) -> decltype(__parse_ctx.begin()) {
+    auto __begin = __parse_ctx.begin();
+    auto __end = __parse_ctx.end();
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parser_fill_align<_CharT>::__parse(__begin, __end, static_cast<_Flags&>(*this));
+    if (__begin == __end)
+      return __begin;
+
+    // An integer presentation type isn't defined in the Standard.
+    // Since a pointer is formatted as an integer it can be argued it's an
+    // integer presentation type. However there are two LWG-issues asserting it
+    // isn't an integer presentation type:
+    // - LWG3612 Inconsistent pointer alignment in std::format
+    // - LWG3644 std::format does not define "integer presentation type"
+    //
+    // There's a paper to make additional clarifications on the status of
+    // formatting pointers and proposes additional fields to be valid. That
+    // paper hasn't been reviewed by the Committee yet.
+    // - P2510 Formatting pointers
+    //
+    // The current implementation assumes formatting pointers isn't covered by
+    // "integer presentation type".
+    // TODO FMT Apply the LWG-issues/papers after approval/rejection by the Committee.
+
+    __begin = __parser_width::__parse(__begin, __end, __parse_ctx);
+    if (__begin == __end)
+      return __begin;
+
+    __begin = __parse_type(__begin, static_cast<_Flags&>(*this));
+
+    if (__begin != __end && *__begin != _CharT('}'))
+      __throw_format_error("The format-spec should consume the input or end with a '}'");
+
+    return __begin;
+  }
+
+  /** Processes the parsed std-format-spec based on the parsed display type. */
+  _LIBCPP_HIDE_FROM_ABI constexpr void __process_display_type() {
+    switch (this->__type) {
+    case _Flags::_Type::__default:
+      this->__type = _Flags::_Type::__pointer;
+      break;
+    case _Flags::_Type::__pointer:
+      break;
+    default:
+      __throw_format_error("The format-spec type has a type not supported for a pointer argument");
+    }
+  }
+};
 
 /** Helper struct returned from @ref __get_string_alignment. */
 template <class _CharT>
diff --git a/libcxx/include/format b/libcxx/include/format
index 3a186469dd5c0..53de4a0e6ca06 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -279,6 +279,7 @@ namespace std {
 #include <__format/formatter_char.h>
 #include <__format/formatter_floating_point.h>
 #include <__format/formatter_integer.h>
+#include <__format/formatter_pointer.h>
 #include <__format/formatter_string.h>
 #include <__format/parser_std_format_spec.h>
 #include <__variant/monostate.h>
diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap
index c940a6d11d816..90fae9bb8362d 100644
--- a/libcxx/include/module.modulemap
+++ b/libcxx/include/module.modulemap
@@ -506,6 +506,7 @@ module std [system] {
       module formatter_floating_point { private header "__format/formatter_floating_point.h" }
       module formatter_integer        { private header "__format/formatter_integer.h" }
       module formatter_integral       { private header "__format/formatter_integral.h" }
+      module formatter_pointer        { private header "__format/formatter_pointer.h" }
       module formatter_string         { private header "__format/formatter_string.h" }
       module parser_std_format_spec   { private header "__format/parser_std_format_spec.h" }
     }
diff --git a/libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_pointer.module.verify.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_pointer.module.verify.cpp
new file mode 100644
index 0000000000000..abb82de85f37a
--- /dev/null
+++ b/libcxx/test/libcxx/diagnostics/detail.headers/format/formatter_pointer.module.verify.cpp
@@ -0,0 +1,15 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: modules-build
+
+// WARNING: This test was generated by 'generate_private_header_tests.py'
+// and should not be edited manually.
+
+// expected-error@*:* {{use of private header from outside its module: '__format/formatter_pointer.h'}}
+#include <__format/formatter_pointer.h>
diff --git a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
index b6e34d78c8ac8..76548c97d77ba 100644
--- a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
+++ b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
@@ -346,6 +346,10 @@ void test() {
   // Test pointer types.
 
   test<Context, const void*>(nullptr);
+  int i = 0;
+  test<Context, const void*>(static_cast<void*>(&i));
+  const int ci = 0;
+  test<Context, const void*>(static_cast<const void*>(&ci));
 }
 
 void test() {
diff --git a/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_pointer.pass.cpp b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_pointer.pass.cpp
new file mode 100644
index 0000000000000..7a34bbeb8e25a
--- /dev/null
+++ b/libcxx/test/libcxx/utilities/format/format.string/format.string.std/std_format_spec_pointer.pass.cpp
@@ -0,0 +1,254 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+// UNSUPPORTED: libcpp-has-no-incomplete-format
+
+// <format>
+
+// Tests the parsing of the format string as specified in [format.string.std].
+// It validates whether the std-format-spec is valid for a pointer type.
+
+#include <format>
+#include <cassert>
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#  include <iostream>
+#endif
+
+#include "concepts_precision.h"
+#include "test_macros.h"
+#include "make_string.h"
+#include "test_exception.h"
+
+#define CSTR(S) MAKE_CSTRING(CharT, S)
+
+using namespace std::__format_spec;
+
+template <class CharT>
+using Parser = __parser_pointer<CharT>;
+
+template <class CharT>
+struct Expected {
+  CharT fill = CharT(' ');
+  _Flags::_Alignment alignment = _Flags::_Alignment::__right;
+  _Flags::_Sign sign = _Flags::_Sign::__default;
+  bool alternate_form = false;
+  bool zero_padding = false;
+  uint32_t width = 0;
+  bool width_as_arg = false;
+  bool locale_specific_form = false;
+  _Flags::_Type type = _Flags::_Type::__pointer;
+};
+
+template <class CharT>
+constexpr void test(Expected<CharT> expected, size_t size, std::basic_string_view<CharT> fmt) {
+  // Initialize parser with sufficient arguments to avoid the parsing to fail
+  // due to insufficient arguments.
+  std::basic_format_parse_context<CharT> parse_ctx(fmt, std::__format::__number_max);
+  auto begin = parse_ctx.begin();
+  auto end = parse_ctx.end();
+  Parser<CharT> parser;
+  auto it = parser.parse(parse_ctx);
+
+  assert(begin == parse_ctx.begin());
+  assert(end == parse_ctx.end());
+
+  assert(begin + size == it);
+  assert(parser.__fill == expected.fill);
+  assert(parser.__alignment == expected.alignment);
+  assert(parser.__sign == expected.sign);
+  assert(parser.__alternate_form == expected.alternate_form);
+  assert(parser.__zero_padding == expected.zero_padding);
+  assert(parser.__width == expected.width);
+  assert(parser.__width_as_arg == expected.width_as_arg);
+  assert(parser.__locale_specific_form == expected.locale_specific_form);
+  assert(parser.__type == expected.type);
+}
+
+template <class CharT>
+constexpr void test(Expected<CharT> expected, size_t size, const CharT* f) {
+  // The format-spec is valid if completely consumed or terminates at a '}'.
+  // The valid inputs all end with a '}'. The test is executed twice:
+  // - first with the terminating '}',
+  // - second consuming the entire input.
+  std::basic_string_view<CharT> fmt{f};
+  assert(fmt.back() == CharT('}') && "Pre-condition failure");
+
+  test(expected, size, fmt);
+  fmt.remove_suffix(1);
+  test(expected, size, fmt);
+}
+
+template <class CharT>
+constexpr void test() {
+  Parser<CharT> parser;
+
+  assert(parser.__fill == CharT(' '));
+  assert(parser.__alignment == _Flags::_Alignment::__right);
+  assert(parser.__sign == _Flags::_Sign::__default);
+  assert(parser.__alternate_form == false);
+  assert(parser.__zero_padding == false);
+  assert(parser.__width == 0);
+  assert(parser.__width_as_arg == false);
+  assert(parser.__locale_specific_form == false);
+  assert(parser.__type == _Flags::_Type::__default);
+
+  test({}, 0, CSTR("}"));
+
+  // *** Align-fill ***
+  test({.alignment = _Flags::_Alignment::__left}, 1, CSTR("<}"));
+  test({.alignment = _Flags::_Alignment::__center}, 1, "^}");
+  test({.alignment = _Flags::_Alignment::__right}, 1, ">}");
+
+  test({.fill = CharT('L'), .alignment = _Flags::_Alignment::__left}, 2, CSTR("L<}"));
+  test({.fill = CharT('#'), .alignment = _Flags::_Alignment::__center}, 2, CSTR("#^}"));
+  test({.fill = CharT('0'), .alignment = _Flags::_Alignment::__right}, 2, CSTR("0>}"));
+
+  test_exception<Parser<CharT>>("The format-spec fill field contains an invalid character", CSTR("{<"));
+  test_exception<Parser<CharT>>("The format-spec fill field contains an invalid character", CSTR("}<"));
+
+  // *** Sign ***
+  test_exception<Parser<CharT>>("The format-spec should consume the input or end with a '}'", CSTR("+"));
+  test_exception<Parser<CharT>>("The format-spec should consume the input or end with a '}'", CSTR("-"));
+  test_exception<Parser<CharT>>("The format-spec should consume the input or end with a '}'", CSTR(" "));
+
+  // *** Alternate form ***
+  test_exception<Parser<CharT>>("The format-spec should consume the input or end with a '}'", CSTR("#"));
+
+  // *** Zero padding ***
+  test_exception<Parser<CharT>>("A format-spec width field shouldn't have a leading zero", CSTR("0"));
+
+  // *** Width ***
+  test({.width = 0, .width_as_arg = false}, 0, CSTR("}"));
+  test({.width = 1, .width_as_arg = false}, 1, CSTR("1}"));
+  test({.width = 10, .width_as_arg = false}, 2, CSTR("10}"));
+  test({.width = 1000, .width_as_arg = false}, 4, CSTR("1000}"));
+  test({.width = 1000000, .width_as_arg = false}, 7, CSTR("1000000}"));
+
+  test({.width = 0, .width_as_arg = true}, 2, CSTR("{}}"));
+  test({.width = 0, .width_as_arg = true}, 3, CSTR("{0}}"));
+  test({.width = 1, .width_as_arg = true}, 3, CSTR("{1}}"));
+
+  test_exception<Parser<CharT>>("A format-spec width field shouldn't have a leading zero", CSTR("00"));
+
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  test({.width = 2'147'483'647, .width_as_arg = false}, 10, CSTR("2147483647}"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR("2147483648"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR("5000000000"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR("10000000000"));
+
+  test_exception<Parser<CharT>>("End of input while parsing format-spec arg-id", CSTR("{"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR("{0"));
+  test_exception<Parser<CharT>>("The arg-id of the format-spec starts with an invalid character", CSTR("{a"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR("{1"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR("{9"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR("{9:"));
+  test_exception<Parser<CharT>>("Invalid arg-id", CSTR("{9a"));
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  // Note the static_assert tests whether the arg-id is valid.
+  // Therefore the following should be true arg-id < __format::__number_max.
+  test({.width = 2'147'483'646, .width_as_arg = true}, 12, CSTR("{2147483646}}"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR("{2147483648}"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR("{5000000000}"));
+  test_exception<Parser<CharT>>("The numeric value of the format-spec is too large", CSTR("{10000000000}"));
+
+  // *** Precision ***
+  test_exception<Parser<CharT>>("The format-spec should consume the input or end with a '}'", CSTR("."));
+  test_exception<Parser<CharT>>("The format-spec should consume the input or end with a '}'", CSTR(".1"));
+
+  // *** Locale-specific form ***
+  test_exception<Parser<CharT>>("The format-spec should consume the input or end with a '}'", CSTR("L"));
+
+  // *** Type ***
+  {
+    const char* unsuported_type = "The format-spec type has a type not supported for a pointer argument";
+    const char* not_a_type = "The format-spec should consume the input or end with a '}'";
+
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("A}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("B}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("C}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("D}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("E}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("F}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("G}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("H}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("I}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("J}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("K}"));
+    test_exception<Parser<CharT>>("The format-spec should consume the input or end with a '}'", CSTR("L"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("M}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("N}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("O}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("P}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("Q}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("R}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("S}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("T}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("U}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("V}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("W}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("X}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("Y}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("Z}"));
+
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("a}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("b}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("c}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("d}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("e}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("f}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("g}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("h}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("i}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("j}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("k}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("l}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("m}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("n}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("o}"));
+    test({.type = _Flags::_Type::__pointer}, 1, CSTR("p}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("q}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("r}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("s}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("t}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("u}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("v}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("w}"));
+    test_exception<Parser<CharT>>(unsuported_type, CSTR("x}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("y}"));
+    test_exception<Parser<CharT>>(not_a_type, CSTR("z}"));
+  }
+
+  // **** General ***
+  test_exception<Parser<CharT>>("The format-spec should consume the input or end with a '}'", CSTR("ss"));
+}
+
+constexpr bool test() {
+  test<char>();
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test<wchar_t>();
+#endif
+
+  return true;
+}
+
+int main(int, char**) {
+#if !defined(_WIN32) && !defined(_AIX)
+  // Make sure the parsers match the expectations. The layout of the
+  // subobjects is chosen to minimize the size required.
+  static_assert(sizeof(Parser<char>) == 2 * sizeof(uint32_t));
+#  ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  static_assert(sizeof(Parser<wchar_t>) == (sizeof(wchar_t) <= 2 ? 2 * sizeof(uint32_t) : 3 * sizeof(uint32_t)));
+#  endif
+#endif
+
+  test();
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.pointer.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.pointer.pass.cpp
new file mode 100644
index 0000000000000..b60943becdefe
--- /dev/null
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.pointer.pass.cpp
@@ -0,0 +1,107 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+// UNSUPPORTED: libcpp-has-no-incomplete-format
+
+// <format>
+
+// [format.formatter.spec]:
+// Each header that declares the template `formatter` provides the following
+// enabled specializations:
+// ...
+// For each charT, the pointer type specializations
+// - template<> struct formatter<nullptr_t, charT>;
+// - template<> struct formatter<void*, charT>;
+// - template<> struct formatter<const void*, charT>;
+
+#include <format>
+
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <charconv>
+#include <concepts>
+#include <string>
+#include <type_traits>
+
+#include "test_macros.h"
+#include "make_string.h"
+
+#define STR(S) MAKE_STRING(CharT, S)
+
+template <class StringT, class StringViewT, class PointerT>
+void test(StringT expected, StringViewT fmt, PointerT arg) {
+  using CharT = typename StringT::value_type;
+  auto parse_ctx = std::basic_format_parse_context<CharT>(fmt);
+  std::formatter<PointerT, CharT> formatter;
+  static_assert(std::semiregular<decltype(formatter)>);
+
+  auto it = formatter.parse(parse_ctx);
+  assert(it == fmt.end() - (!fmt.empty() && fmt.back() == '}'));
+
+  StringT result;
+  auto out = std::back_inserter(result);
+  using FormatCtxT = std::basic_format_context<decltype(out), CharT>;
+
+  auto format_ctx = std::__format_context_create<decltype(out), CharT>(out, std::make_format_args<FormatCtxT>(arg));
+  formatter.format(arg, format_ctx);
+
+  if (expected.empty()) {
+    std::array<char, 128> buffer;
+    buffer[0] = CharT('0');
+    buffer[1] = CharT('x');
+    expected.append(buffer.begin(),
+                    std::to_chars(buffer.begin() + 2, buffer.end(), reinterpret_cast<uintptr_t>(arg), 16).ptr);
+  }
+  assert(result == expected);
+}
+
+template <class StringT, class PointerT>
+void test_termination_condition(StringT expected, StringT f, PointerT arg) {
+  // The format-spec is valid if completely consumed or terminates at a '}'.
+  // The valid inputs all end with a '}'. The test is executed twice:
+  // - first with the terminating '}',
+  // - second consuming the entire input.
+  using CharT = typename StringT::value_type;
+  std::basic_string_view<CharT> fmt{f};
+  assert(fmt.back() == CharT('}') && "Pre-condition failure");
+
+  test(expected, fmt, arg);
+  fmt.remove_suffix(1);
+  test(expected, fmt, arg);
+}
+
+template <class CharT>
+void test_nullptr_t() {
+  test_termination_condition(STR("0x0"), STR("}"), nullptr);
+}
+
+template <class PointerT, class CharT>
+void test_pointer_type() {
+  test_termination_condition(STR("0x0"), STR("}"), PointerT(0));
+  test_termination_condition(STR("0x42"), STR("}"), PointerT(0x42));
+  test_termination_condition(STR("0xffff"), STR("}"), PointerT(0xffff));
+  test_termination_condition(STR(""), STR("}"), PointerT(-1));
+}
+
+template <class CharT>
+void test_all_pointer_types() {
+  test_nullptr_t<CharT>();
+  test_pointer_type<void*, CharT>();
+  test_pointer_type<const void*, CharT>();
+}
+
+int main(int, char**) {
+  test_all_pointer_types<char>();
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test_all_pointer_types<wchar_t>();
+#endif
+
+  return 0;
+}
diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h
index c2eeb236f99d0..4b269bbd4e64c 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_tests.h
+++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h
@@ -2484,6 +2484,47 @@ void format_test_floating_point(TestFunction check, ExceptionTest check_exceptio
   format_test_floating_point<long double, CharT>(check, check_exception);
 }
 
+template <class P, class CharT, class TestFunction, class ExceptionTest>
+void format_test_pointer(TestFunction check, ExceptionTest check_exception) {
+  // *** align-fill & width ***
+  check(STR("answer is '   0x0'"), STR("answer is '{:6}'"), P(nullptr));
+  check(STR("answer is '   0x0'"), STR("answer is '{:>6}'"), P(nullptr));
+  check(STR("answer is '0x0   '"), STR("answer is '{:<6}'"), P(nullptr));
+  check(STR("answer is ' 0x0  '"), STR("answer is '{:^6}'"), P(nullptr));
+
+  check(STR("answer is '---0x0'"), STR("answer is '{:->6}'"), P(nullptr));
+  check(STR("answer is '0x0---'"), STR("answer is '{:-<6}'"), P(nullptr));
+  check(STR("answer is '-0x0--'"), STR("answer is '{:-^6}'"), P(nullptr));
+
+  // *** Sign ***
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:-}"), P(nullptr));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:+}"), P(nullptr));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{: }"), P(nullptr));
+
+  // *** alternate form ***
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:#}"), P(nullptr));
+
+  // *** zero-padding ***
+  check_exception("A format-spec width field shouldn't have a leading zero", STR("{:0}"), P(nullptr));
+
+  // *** precision ***
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), P(nullptr));
+
+  // *** locale-specific form ***
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:L}"), P(nullptr));
+
+  // *** type ***
+  for (const auto& fmt : invalid_types<CharT>("p"))
+    check_exception("The format-spec type has a type not supported for a pointer argument", fmt, P(nullptr));
+}
+
+template <class CharT, class TestFunction, class ExceptionTest>
+void format_test_pointer(TestFunction check, ExceptionTest check_exception) {
+  format_test_pointer<std::nullptr_t, CharT>(check, check_exception);
+  format_test_pointer<void*, CharT>(check, check_exception);
+  format_test_pointer<const void*, CharT>(check, check_exception);
+}
+
 template <class CharT, class TestFunction, class ExceptionTest>
 void format_tests(TestFunction check, ExceptionTest check_exception) {
   // *** Test escaping  ***
@@ -2611,6 +2652,12 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
   check(STR("hello 42"), STR("hello {}"), static_cast<double>(42));
   check(STR("hello 42"), STR("hello {}"), static_cast<long double>(42));
   format_test_floating_point<CharT>(check, check_exception);
+
+  // *** Test pointer formater argument ***
+  check(STR("hello 0x0"), STR("hello {}"), nullptr);
+  check(STR("hello 0x42"), STR("hello {}"), reinterpret_cast<void*>(0x42));
+  check(STR("hello 0x42"), STR("hello {}"), reinterpret_cast<const void*>(0x42));
+  format_test_pointer<CharT>(check, check_exception);
 }
 
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS

From 2b8b48c5a0c40d33569c74924f72cc31055a7b56 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 14 Dec 2021 19:46:10 +0100
Subject: [PATCH 420/946] [libc++][format] Disable default formatter.

[format.formatter.spec]/5 lists the requirements for the default
formatter. The original implementation didn't implement this. This
implements the default formatter according to the Standard.

This adds additional test to validate the default formatter is disabled
and the required standard formatters are enabled.

While adding the tests it seems the formatters needed a constraint for the
character types they were valid for.

Implements parts of:
- P0645 Text Formatting

Depends on D115988

Reviewed By: ldionne, #libc

Differential Revision: https://reviews.llvm.org/D115989
---
 libcxx/include/__format/formatter.h           |  30 +-
 libcxx/include/__format/formatter_bool.h      |   2 +-
 libcxx/include/__format/formatter_integer.h   |  24 +-
 libcxx/include/__format/formatter_string.h    |  10 +-
 .../types.compile.pass.cpp                    | 370 ++++++++++++++++++
 5 files changed, 400 insertions(+), 36 deletions(-)
 create mode 100644 libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/types.compile.pass.cpp

diff --git a/libcxx/include/__format/formatter.h b/libcxx/include/__format/formatter.h
index 0c0c02ba19173..38b73bba32f3e 100644
--- a/libcxx/include/__format/formatter.h
+++ b/libcxx/include/__format/formatter.h
@@ -38,26 +38,20 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 // to support compilers with partial C++20 support.
 #if !defined(_LIBCPP_HAS_NO_CONCEPTS)
 
-// Currently not implemented specializations throw an exception when used. This
-// does not conform to the Standard. However not all Standard defined formatters
-// have been implemented yet. Until that time the current behavior is intended.
-// TODO FMT Disable the default template.
+/// The default formatter template.
+///
+/// [format.formatter.spec]/5
+/// If F is a disabled specialization of formatter, these values are false:
+/// - is_default_constructible_v<F>,
+/// - is_copy_constructible_v<F>,
+/// - is_move_constructible_v<F>,
+/// - is_copy_assignable<F>, and
+/// - is_move_assignable<F>.
 template <class _Tp, class _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter {
-  _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI auto parse(auto& __parse_ctx)
-      -> decltype(__parse_ctx.begin()) {
-    __throw();
-  }
-
-  _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI auto format(_Tp, auto& __ctx)
-      -> decltype(__ctx.out()) {
-    __throw();
-  }
-
-private:
-  _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI void __throw() {
-    __throw_format_error("Argument type not implemented yet");
-  }
+  formatter() = delete;
+  formatter(const formatter&) = delete;
+  formatter& operator=(const formatter&) = delete;
 };
 
 namespace __format_spec {
diff --git a/libcxx/include/__format/formatter_bool.h b/libcxx/include/__format/formatter_bool.h
index fdd1d75355d24..1e40bc0a435ae 100644
--- a/libcxx/include/__format/formatter_bool.h
+++ b/libcxx/include/__format/formatter_bool.h
@@ -102,7 +102,7 @@ using __formatter_bool = __formatter_integral<__parser_bool<_CharT>>;
 // For each charT, for each cv-unqualified arithmetic type ArithmeticT other
 // than char, wchar_t, char8_t, char16_t, or char32_t, a specialization
 
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<bool, _CharT>
     : public __format_spec::__formatter_bool<_CharT> {
   using _Base = __format_spec::__formatter_bool<_CharT>;
diff --git a/libcxx/include/__format/formatter_integer.h b/libcxx/include/__format/formatter_integer.h
index 767df36e61eb7..e1f3d4e34897b 100644
--- a/libcxx/include/__format/formatter_integer.h
+++ b/libcxx/include/__format/formatter_integer.h
@@ -81,25 +81,25 @@ using __formatter_integer = __formatter_integral<__parser_integer<_CharT>>;
 // than char, wchar_t, char8_t, char16_t, or char32_t, a specialization
 
 // Signed integral types.
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<signed char, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {};
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<short, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {};
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<int, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {};
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<long, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {};
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<long long, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {};
 #ifndef _LIBCPP_HAS_NO_INT128
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<__int128_t, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {
@@ -119,28 +119,28 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
 #endif
 
 // Unsigned integral types.
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<unsigned char, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {};
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<unsigned short, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {};
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<unsigned, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {};
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<unsigned long, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {};
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<unsigned long long, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {};
 #ifndef _LIBCPP_HAS_NO_INT128
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<__uint128_t, _CharT>
     : public __format_spec::__formatter_integer<_CharT> {
diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h
index 75a81f5184a0d..04950faa4a21e 100644
--- a/libcxx/include/__format/formatter_string.h
+++ b/libcxx/include/__format/formatter_string.h
@@ -64,7 +64,7 @@ class _LIBCPP_TEMPLATE_VIS __formatter_string : public __parser_string<_CharT> {
 // [format.formatter.spec]/2.2 For each charT, the string type specializations
 
 // Formatter const char*.
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<const _CharT*, _CharT>
     : public __format_spec::__formatter_string<_CharT> {
@@ -98,7 +98,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
 };
 
 // Formatter char*.
-template <class _CharT>
+template <__formatter::__char_type _CharT>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<_CharT*, _CharT> : public formatter<const _CharT*, _CharT> {
   using _Base = formatter<const _CharT*, _CharT>;
@@ -110,7 +110,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
 };
 
 // Formatter const char[].
-template <class _CharT, size_t _Size>
+template <__formatter::__char_type _CharT, size_t _Size>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<const _CharT[_Size], _CharT>
     : public __format_spec::__formatter_string<_CharT> {
@@ -123,7 +123,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
 };
 
 // Formatter std::string.
-template <class _CharT, class _Traits, class _Allocator>
+template <__formatter::__char_type _CharT, class _Traits, class _Allocator>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
     formatter<basic_string<_CharT, _Traits, _Allocator>, _CharT>
     : public __format_spec::__formatter_string<_CharT> {
@@ -138,7 +138,7 @@ struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT
 };
 
 // Formatter std::string_view.
-template <class _CharT, class _Traits>
+template <__formatter::__char_type _CharT, class _Traits>
 struct _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT formatter<basic_string_view<_CharT, _Traits>, _CharT>
     : public __format_spec::__formatter_string<_CharT> {
   using _Base = __format_spec::__formatter_string<_CharT>;
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/types.compile.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/types.compile.pass.cpp
new file mode 100644
index 0000000000000..522a583f349cb
--- /dev/null
+++ b/libcxx/test/std/utilities/format/format.formatter/format.formatter.spec/types.compile.pass.cpp
@@ -0,0 +1,370 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+// UNSUPPORTED: libcpp-has-no-incomplete-format
+
+// <format>
+
+// template <class _Tp, class _CharT = char>
+// struct formatter;
+
+// Tests the enabled and disabled requirements for std::formatter.
+
+#include <array>
+#include <bitset>
+#include <bitset>
+#include <chrono>
+#include <complex>
+#include <concepts>
+#include <deque>
+#ifndef _LIBCPP_HAS_NO_FILESYSTEM_LIBRARY
+#  include <filesystem>
+#endif
+#include <format>
+#include <forward_list>
+#include <list>
+#include <memory>
+#include <map>
+#include <optional>
+#include <queue>
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#  include <regex>
+#endif
+#include <set>
+#include <stack>
+#include <span>
+#ifndef _LIBCPP_HAS_NO_THREADS
+#  include <thread>
+#endif
+#include <tuple>
+#include <type_traits>
+#include <unordered_map>
+#include <unordered_set>
+#include <valarray>
+#include <variant>
+
+#include "test_macros.h"
+
+// Validate default template argument.
+static_assert(std::same_as<std::formatter<int>, std::formatter<int, char>>);
+
+// Concept for an enabled formatter.
+//
+// Since it's not possible to extract the T and CharT types from the formatter
+// they are specified and the proper formatter is always intended to be
+// defaulted.
+//
+// [formatter.requirements]/2
+// A type F meets the Formatter requirements if it meets the BasicFormatter
+// requirements and the expressions shown in Table 71 are valid and have the
+// indicated semantics.
+template <class T, class CharT, class F = std::formatter<T, CharT>>
+concept enabled =
+    // The basic BasicFormatter requirements:
+    std::default_initializable<F> && std::copyable<F> && std::destructible<F> && std::swappable<F> &&
+    // The expressions shown in Table 71
+    requires(F f, std::basic_format_parse_context<CharT> pc, T u, std::basic_format_context<CharT*, CharT> fc) {
+  { f.parse(pc) } -> std::same_as<typename decltype(pc)::iterator>;
+  { f.format(u, fc) } -> std::same_as<typename decltype(fc)::iterator>;
+};
+
+// Concept for a disabled formatter.
+//
+// This uses the same template arguments as enable. This isn't required since
+// the concept doesn't need to inspect T and CharT. This makes it easier for
+// future changes. For example P2286 formatting ranges intents to change
+// std::formatter<std::vector<int>> from disabled to enabled. The current way
+// makes it easy to define a macro like
+// #if TEST_STD_VER > 23
+//   TEST_ENABLED_AFTER_CXX23(T, CharT) enabled<T, CharT>
+// #else
+//   TEST_ENABLED_AFTER_CXX23(T, CharT) disabled<T, CharT>
+// #endif
+template <class T, class CharT, class F = std::formatter<T, CharT>>
+// [formatter.requirements]/5
+// If F is a disabled specialization of formatter, these values are false:
+concept disabled = !std::is_default_constructible_v<F> && !std::is_copy_constructible_v<F> &&
+                   !std::is_move_constructible_v<F> && !std::is_copy_assignable_v<F> && !std::is_move_assignable_v<F>;
+
+template <class T, class CharT>
+void assert_formatter_is_disabled() {
+  static_assert(disabled<T, CharT>);
+}
+
+template <class T, class CharT>
+void assert_formatter_is_enabled() {
+  // Only formatters for CharT == char || CharT == wchar_t are enabled for the
+  // standard formatters. When CharT is a different type the formatter should
+  // be disabled.
+  if constexpr (std::same_as<CharT, char>
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+                || std::same_as<CharT, wchar_t>
+#endif
+  )
+    static_assert(enabled<T, CharT>);
+  else
+    assert_formatter_is_disabled<T, CharT>();
+}
+
+// Tests for P0645 Text Formatting
+template <class CharT>
+void test_P0645() {
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  // Tests the special formatter that converts a char to a wchar_t.
+  assert_formatter_is_enabled<char, wchar_t>();
+#endif
+  assert_formatter_is_enabled<CharT, CharT>();
+
+  assert_formatter_is_enabled<CharT*, CharT>();
+  assert_formatter_is_enabled<const CharT*, CharT>();
+  assert_formatter_is_enabled<const CharT[42], CharT>();
+  assert_formatter_is_enabled<std::basic_string<CharT>, CharT>();
+  assert_formatter_is_enabled<std::basic_string_view<CharT>, CharT>();
+
+  assert_formatter_is_enabled<bool, CharT>();
+
+  assert_formatter_is_enabled<signed char, CharT>();
+  assert_formatter_is_enabled<signed short, CharT>();
+  assert_formatter_is_enabled<signed int, CharT>();
+  assert_formatter_is_enabled<signed long, CharT>();
+  assert_formatter_is_enabled<signed long long, CharT>();
+#ifndef _LIBCPP_HAS_NO_INT128
+  assert_formatter_is_enabled<__int128_t, CharT>();
+#endif
+
+  assert_formatter_is_enabled<unsigned char, CharT>();
+  assert_formatter_is_enabled<unsigned short, CharT>();
+  assert_formatter_is_enabled<unsigned int, CharT>();
+  assert_formatter_is_enabled<unsigned long, CharT>();
+  assert_formatter_is_enabled<unsigned long long, CharT>();
+#ifndef _LIBCPP_HAS_NO_INT128
+  assert_formatter_is_enabled<__uint128_t, CharT>();
+#endif
+
+  assert_formatter_is_enabled<float, CharT>();
+  assert_formatter_is_enabled<double, CharT>();
+  assert_formatter_is_enabled<long double, CharT>();
+
+  assert_formatter_is_enabled<std::nullptr_t, CharT>();
+  assert_formatter_is_enabled<void*, CharT>();
+  assert_formatter_is_enabled<const void*, CharT>();
+}
+
+// Tests for P1361 Integration of chrono with text formatting
+//
+// Some tests are commented out since these types haven't been implemented in
+// chrono yet. After P1361 has been implemented these formatters should be all
+// enabled.
+template <class CharT>
+void test_P1361() {
+  assert_formatter_is_disabled<std::chrono::microseconds, CharT>();
+
+  assert_formatter_is_disabled<std::chrono::sys_time<std::chrono::microseconds>, CharT>();
+  //assert_formatter_is_enabled<std::chrono::utc_time<std::chrono::microseconds>, CharT>();
+  //assert_formatter_is_enabled<std::chrono::tai_time<std::chrono::microseconds>, CharT>();
+  //assert_formatter_is_enabled<std::chrono::gps_time<std::chrono::microseconds>, CharT>();
+  assert_formatter_is_disabled<std::chrono::file_time<std::chrono::microseconds>, CharT>();
+  assert_formatter_is_disabled<std::chrono::local_time<std::chrono::microseconds>, CharT>();
+
+  assert_formatter_is_disabled<std::chrono::day, CharT>();
+  assert_formatter_is_disabled<std::chrono::month, CharT>();
+  assert_formatter_is_disabled<std::chrono::year, CharT>();
+
+  assert_formatter_is_disabled<std::chrono::weekday, CharT>();
+  assert_formatter_is_disabled<std::chrono::weekday_indexed, CharT>();
+  assert_formatter_is_disabled<std::chrono::weekday_last, CharT>();
+
+  assert_formatter_is_disabled<std::chrono::month_day, CharT>();
+  assert_formatter_is_disabled<std::chrono::month_day_last, CharT>();
+  assert_formatter_is_disabled<std::chrono::month_weekday, CharT>();
+  assert_formatter_is_disabled<std::chrono::month_weekday_last, CharT>();
+
+  assert_formatter_is_disabled<std::chrono::year_month, CharT>();
+  assert_formatter_is_disabled<std::chrono::year_month_day, CharT>();
+  assert_formatter_is_disabled<std::chrono::year_month_day_last, CharT>();
+  assert_formatter_is_disabled<std::chrono::year_month_weekday, CharT>();
+  assert_formatter_is_disabled<std::chrono::year_month_weekday_last, CharT>();
+
+  assert_formatter_is_disabled<std::chrono::hh_mm_ss<std::chrono::microseconds>, CharT>();
+
+  //assert_formatter_is_enabled<std::chrono::sys_info, CharT>();
+  //assert_formatter_is_enabled<std::chrono::local_info, CharT>();
+
+  //assert_formatter_is_enabled<std::chrono::zoned_time, CharT>();
+}
+
+// Tests for P1636 Formatters for library types
+//
+// The paper hasn't been voted in so currently all formatters are disabled.
+// TODO validate whether the test is correct after the paper has been accepted.
+template <class CharT>
+void test_P1636() {
+  assert_formatter_is_disabled<std::basic_streambuf<CharT>, CharT>();
+  assert_formatter_is_disabled<std::bitset<42>, CharT>();
+  assert_formatter_is_disabled<std::complex<double>, CharT>();
+  assert_formatter_is_disabled<std::error_code, CharT>();
+#ifndef _LIBCPP_HAS_NO_FILESYSTEM_LIBRARY
+  assert_formatter_is_disabled<std::filesystem::path, CharT>();
+#endif
+  assert_formatter_is_disabled<std::shared_ptr<int>, CharT>();
+#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+  assert_formatter_is_disabled<std::sub_match<CharT*>, CharT>();
+#endif
+#ifndef _LIBCPP_HAS_NO_THREADS
+  assert_formatter_is_disabled<std::thread::id, CharT>();
+#endif
+  assert_formatter_is_disabled<std::unique_ptr<int>, CharT>();
+}
+
+// Tests for P2286 Formatting ranges
+//
+// The paper hasn't been voted in so currently all formatters are disabled.
+// TODO validate whether the test is correct after the paper has been accepted.
+template <class CharT>
+void test_P2286() {
+  assert_formatter_is_disabled<std::array<int, 42>, CharT>();
+  assert_formatter_is_disabled<std::vector<int>, CharT>();
+  assert_formatter_is_disabled<std::deque<int>, CharT>();
+  assert_formatter_is_disabled<std::forward_list<int>, CharT>();
+  assert_formatter_is_disabled<std::list<int>, CharT>();
+
+  assert_formatter_is_disabled<std::set<int>, CharT>();
+  assert_formatter_is_disabled<std::map<int, int>, CharT>();
+  assert_formatter_is_disabled<std::multiset<int>, CharT>();
+  assert_formatter_is_disabled<std::multimap<int, int>, CharT>();
+
+  assert_formatter_is_disabled<std::unordered_set<int>, CharT>();
+  assert_formatter_is_disabled<std::unordered_map<int, int>, CharT>();
+  assert_formatter_is_disabled<std::unordered_multiset<int>, CharT>();
+  assert_formatter_is_disabled<std::unordered_multimap<int, int>, CharT>();
+
+  assert_formatter_is_disabled<std::stack<int>, CharT>();
+  assert_formatter_is_disabled<std::queue<int>, CharT>();
+  assert_formatter_is_disabled<std::priority_queue<int>, CharT>();
+
+  assert_formatter_is_disabled<std::span<int>, CharT>();
+
+  assert_formatter_is_disabled<std::valarray<int>, CharT>();
+
+  assert_formatter_is_disabled<std::pair<int, int>, CharT>();
+  assert_formatter_is_disabled<std::tuple<int>, CharT>();
+}
+
+class c {
+  void f();
+  void fc() const;
+  static void sf();
+};
+enum e { a };
+enum class ec { a };
+template <class CharT>
+void test_disabled() {
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  assert_formatter_is_disabled<const char*, wchar_t>();
+#endif
+  assert_formatter_is_disabled<const char*, char8_t>();
+  assert_formatter_is_disabled<const char*, char16_t>();
+  assert_formatter_is_disabled<const char*, char32_t>();
+
+  assert_formatter_is_disabled<c, CharT>();
+  assert_formatter_is_disabled<const c, CharT>();
+  assert_formatter_is_disabled<volatile c, CharT>();
+  assert_formatter_is_disabled<const volatile c, CharT>();
+
+  assert_formatter_is_disabled<e, CharT>();
+  assert_formatter_is_disabled<const e, CharT>();
+  assert_formatter_is_disabled<volatile e, CharT>();
+  assert_formatter_is_disabled<const volatile e, CharT>();
+
+  assert_formatter_is_disabled<ec, CharT>();
+  assert_formatter_is_disabled<const ec, CharT>();
+  assert_formatter_is_disabled<volatile ec, CharT>();
+  assert_formatter_is_disabled<const volatile ec, CharT>();
+
+  assert_formatter_is_disabled<int*, CharT>();
+  assert_formatter_is_disabled<const int*, CharT>();
+  assert_formatter_is_disabled<volatile int*, CharT>();
+  assert_formatter_is_disabled<const volatile int*, CharT>();
+
+  assert_formatter_is_disabled<c*, CharT>();
+  assert_formatter_is_disabled<const c*, CharT>();
+  assert_formatter_is_disabled<volatile c*, CharT>();
+  assert_formatter_is_disabled<const volatile c*, CharT>();
+
+  assert_formatter_is_disabled<e*, CharT>();
+  assert_formatter_is_disabled<const e*, CharT>();
+  assert_formatter_is_disabled<volatile e*, CharT>();
+  assert_formatter_is_disabled<const volatile e*, CharT>();
+
+  assert_formatter_is_disabled<ec*, CharT>();
+  assert_formatter_is_disabled<const ec*, CharT>();
+  assert_formatter_is_disabled<volatile ec*, CharT>();
+  assert_formatter_is_disabled<const volatile ec*, CharT>();
+
+  assert_formatter_is_disabled<void (*)(), CharT>();
+  assert_formatter_is_disabled<void (c::*)(), CharT>();
+  assert_formatter_is_disabled<void (c::*)() const, CharT>();
+
+  assert_formatter_is_disabled<std::optional<int>, CharT>();
+  assert_formatter_is_disabled<std::variant<int>, CharT>();
+
+  assert_formatter_is_disabled<std::shared_ptr<c>, CharT>();
+  assert_formatter_is_disabled<std::unique_ptr<c>, CharT>();
+
+  assert_formatter_is_disabled<std::array<c, 42>, CharT>();
+  assert_formatter_is_disabled<std::vector<c>, CharT>();
+  assert_formatter_is_disabled<std::deque<c>, CharT>();
+  assert_formatter_is_disabled<std::forward_list<c>, CharT>();
+  assert_formatter_is_disabled<std::list<c>, CharT>();
+
+  assert_formatter_is_disabled<std::set<c>, CharT>();
+  assert_formatter_is_disabled<std::map<c, int>, CharT>();
+  assert_formatter_is_disabled<std::multiset<c>, CharT>();
+  assert_formatter_is_disabled<std::multimap<c, int>, CharT>();
+
+  assert_formatter_is_disabled<std::unordered_set<c>, CharT>();
+  assert_formatter_is_disabled<std::unordered_map<c, int>, CharT>();
+  assert_formatter_is_disabled<std::unordered_multiset<c>, CharT>();
+  assert_formatter_is_disabled<std::unordered_multimap<c, int>, CharT>();
+
+  assert_formatter_is_disabled<std::stack<c>, CharT>();
+  assert_formatter_is_disabled<std::queue<c>, CharT>();
+  assert_formatter_is_disabled<std::priority_queue<c>, CharT>();
+
+  assert_formatter_is_disabled<std::span<c>, CharT>();
+
+  assert_formatter_is_disabled<std::valarray<c>, CharT>();
+
+  assert_formatter_is_disabled<std::pair<c, int>, CharT>();
+  assert_formatter_is_disabled<std::tuple<c>, CharT>();
+
+  assert_formatter_is_disabled<std::optional<c>, CharT>();
+  assert_formatter_is_disabled<std::variant<c>, CharT>();
+}
+
+template <class CharT>
+void test() {
+  test_P0645<CharT>();
+  test_P1361<CharT>();
+  test_P1636<CharT>();
+  test_P2286<CharT>();
+  test_disabled<CharT>();
+}
+
+void test() {
+  test<char>();
+#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+  test<wchar_t>();
+#endif
+  test<char8_t>();
+  test<char16_t>();
+  test<char32_t>();
+
+  test<int>();
+}

From ade6d0d8fa1d9e327e9a1975351aa6b4b5dbf800 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Thu, 16 Dec 2021 18:17:47 +0100
Subject: [PATCH 421/946] [libc++][format] Adds formatter handle.

This implements the handler according to P0645. P2418 changes the wording
in the Standard. That isn't implemented and requires changes in more
places. LWG3631 applies modifications to P2418, but is currently
unresolved.

Implements parts of:
* P0645 Text Formatting

Depends on D115989

Reviewed By: ldionne, #libc

Differential Revision: https://reviews.llvm.org/D115991
---
 libcxx/include/__format/format_arg.h          |  40 +++++-
 libcxx/include/format                         |   2 +
 .../format.arg/visit_format_arg.pass.cpp      |   2 +
 .../format.arguments/format.args/get.pass.cpp |   2 +
 .../format.arg.store/class.pass.cpp           |   2 +
 .../format.arg.store/make_format_args.sh.cpp  |   2 +
 .../format.arg/operator_bool.pass.cpp         |   2 +
 .../format.args/ctor.pass.cpp                 |   2 +
 .../format.context/arg.pass.cpp               |   2 +
 .../format.context/ctor.pass.cpp              |   2 +
 .../format.context/locale.pass.cpp            |   2 +
 .../formatter.char.pass.cpp                   |   2 +
 .../formatter.handle.pass.cpp                 |  76 +++++++++++
 .../formatter.signed_integral.pass.cpp        |   2 +
 .../formatter.unsigned_integral.pass.cpp      |   2 +
 .../format/format.functions/format_tests.h    | 120 +++++++++++++++++-
 16 files changed, 256 insertions(+), 6 deletions(-)
 create mode 100644 libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.handle.pass.cpp

diff --git a/libcxx/include/__format/format_arg.h b/libcxx/include/__format/format_arg.h
index 2153287a8a61c..da829d52fbfe4 100644
--- a/libcxx/include/__format/format_arg.h
+++ b/libcxx/include/__format/format_arg.h
@@ -14,7 +14,9 @@
 #include <__config>
 #include <__format/format_error.h>
 #include <__format/format_fwd.h>
+#include <__format/format_parse_context.h>
 #include <__functional_base>
+#include <__memory/addressof.h>
 #include <__variant/monostate.h>
 #include <string>
 #include <string_view>
@@ -56,7 +58,8 @@ enum class _LIBCPP_ENUM_VIS __arg_t : uint8_t {
   __long_double,
   __const_char_type_ptr,
   __string_view,
-  __ptr
+  __ptr,
+  __handle
 };
 } // namespace __format
 
@@ -104,6 +107,8 @@ visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) {
     return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__string_view);
   case __format::__arg_t::__ptr:
     return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__ptr);
+  case __format::__arg_t::__handle:
+    return _VSTD::invoke(_VSTD::forward<_Visitor>(__vis), __arg.__handle);
   }
   _LIBCPP_UNREACHABLE();
 }
@@ -111,8 +116,7 @@ visit_format_arg(_Visitor&& __vis, basic_format_arg<_Context> __arg) {
 template <class _Context>
 class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg {
 public:
-  // TODO FMT Define the handle class.
-  class handle;
+  class _LIBCPP_TEMPLATE_VIS handle;
 
   _LIBCPP_HIDE_FROM_ABI basic_format_arg() noexcept
       : __type_{__format::__arg_t::__none} {}
@@ -161,7 +165,7 @@ class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg {
     const char_type* __const_char_type_ptr;
     basic_string_view<char_type> __string_view;
     const void* __ptr;
-    // TODO FMT Add the handle.
+    handle __handle;
   };
   __format::__arg_t __type_;
 
@@ -248,6 +252,34 @@ class _LIBCPP_TEMPLATE_VIS _LIBCPP_AVAILABILITY_FORMAT basic_format_arg {
   template <class _Tp>
   requires is_void_v<_Tp> _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(_Tp* __p) noexcept
       : __ptr(__p), __type_(__format::__arg_t::__ptr) {}
+
+  template <class _Tp>
+  _LIBCPP_HIDE_FROM_ABI explicit basic_format_arg(const _Tp& __v) noexcept
+      : __handle(__v), __type_(__format::__arg_t::__handle) {}
+};
+
+template <class _Context>
+class _LIBCPP_TEMPLATE_VIS basic_format_arg<_Context>::handle {
+  friend class basic_format_arg<_Context>;
+
+public:
+  _LIBCPP_HIDE_FROM_ABI
+  void format(basic_format_parse_context<char_type>& __parse_ctx, _Context& __ctx) const {
+    __format_(__parse_ctx, __ctx, __ptr_);
+  }
+
+private:
+  const void* __ptr_;
+  void (*__format_)(basic_format_parse_context<char_type>&, _Context&, const void*);
+
+  template <class _Tp>
+  _LIBCPP_HIDE_FROM_ABI explicit handle(const _Tp& __v) noexcept
+      : __ptr_(_VSTD::addressof(__v)),
+        __format_([](basic_format_parse_context<char_type>& __parse_ctx, _Context& __ctx, const void* __ptr) {
+          typename _Context::template formatter_type<_Tp> __f;
+          __parse_ctx.advance_to(__f.parse(__parse_ctx));
+          __ctx.advance_to(__f.format(*static_cast<const _Tp*>(__ptr), __ctx));
+        }) {}
 };
 
 #endif // !defined(_LIBCPP_HAS_NO_CONCEPTS)
diff --git a/libcxx/include/format b/libcxx/include/format
index 53de4a0e6ca06..c1f1be7d31b98 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -369,6 +369,8 @@ __handle_replacement_field(const _CharT* __begin, const _CharT* __end,
       [&](auto __arg) {
         if constexpr (same_as<decltype(__arg), monostate>)
           __throw_format_error("Argument index out of bounds");
+        else if constexpr (same_as<decltype(__arg), typename basic_format_arg<_Ctx>::handle>)
+          __arg.format(__parse_ctx, __ctx);
         else {
           formatter<decltype(__arg), _CharT> __formatter;
           __parse_ctx.advance_to(__formatter.parse(__parse_ctx));
diff --git a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
index 76548c97d77ba..7a534d7282ef7 100644
--- a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
+++ b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // This test requires the dylib support introduced in D92214.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}}
diff --git a/libcxx/test/libcxx/utilities/format/format.arguments/format.args/get.pass.cpp b/libcxx/test/libcxx/utilities/format/format.arguments/format.args/get.pass.cpp
index 6f8035bff7707..da02e6961012c 100644
--- a/libcxx/test/libcxx/utilities/format/format.arguments/format.args/get.pass.cpp
+++ b/libcxx/test/libcxx/utilities/format/format.arguments/format.args/get.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // This test requires the dylib support introduced in D92214.
 // XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}}
diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/class.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/class.pass.cpp
index 5472944af6684..b2f0dae3edf83 100644
--- a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/class.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/class.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // <format>
 
diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.sh.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.sh.cpp
index f3539b3c4ad71..72f1d6b4787b8 100644
--- a/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.sh.cpp
+++ b/libcxx/test/std/utilities/format/format.arguments/format.arg.store/make_format_args.sh.cpp
@@ -9,6 +9,8 @@
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
 // UNSUPPORTED: libcpp-has-no-wide-characters
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // Validate it works regardless of the signedness of `char`.
 // RUN: %{cxx} %{flags} %{compile_flags} -fsigned-char -fsyntax-only %s
diff --git a/libcxx/test/std/utilities/format/format.arguments/format.arg/operator_bool.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.arg/operator_bool.pass.cpp
index 50f6885ba8c67..31c66917a5ff5 100644
--- a/libcxx/test/std/utilities/format/format.arguments/format.arg/operator_bool.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.arguments/format.arg/operator_bool.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // <format>
 
diff --git a/libcxx/test/std/utilities/format/format.arguments/format.args/ctor.pass.cpp b/libcxx/test/std/utilities/format/format.arguments/format.args/ctor.pass.cpp
index 2a73d951cc952..0232df433ab06 100644
--- a/libcxx/test/std/utilities/format/format.arguments/format.args/ctor.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.arguments/format.args/ctor.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // <format>
 
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/arg.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/arg.pass.cpp
index e345878b478a1..aae23cb108794 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/arg.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/arg.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // <format>
 
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/ctor.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/ctor.pass.cpp
index 2a4b3faee9685..41eb8d3797f80 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/ctor.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/ctor.pass.cpp
@@ -9,6 +9,8 @@
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-localization
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // REQUIRES: locale.en_US.UTF-8
 // REQUIRES: locale.fr_FR.UTF-8
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/locale.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/locale.pass.cpp
index 89f3a36d011f7..fe2a1938000a5 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.context/locale.pass.cpp
@@ -9,6 +9,8 @@
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-localization
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // REQUIRES: locale.en_US.UTF-8
 // REQUIRES: locale.fr_FR.UTF-8
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.char.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.char.pass.cpp
index 65bca30220d26..3ca3297a151bd 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.char.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.char.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // <format>
 
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.handle.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.handle.pass.cpp
new file mode 100644
index 0000000000000..6d28b4f423fae
--- /dev/null
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.handle.pass.cpp
@@ -0,0 +1,76 @@
+//===----------------------------------------------------------------------===//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+// UNSUPPORTED: libcpp-has-no-incomplete-format
+
+// <format>
+
+// A user defined formatter using
+// template<class Context>
+// class basic_format_arg<Context>::handle
+
+#include <format>
+
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <charconv>
+#include <concepts>
+#include <string>
+#include <type_traits>
+
+#include "test_macros.h"
+
+enum class color { black, red, gold };
+const char* color_names[] = {"black", "red", "gold"};
+
+template <>
+struct std::formatter<color> : std::formatter<const char*> {
+  auto format(color c, format_context& ctx) {
+    return formatter<const char*>::format(color_names[static_cast<int>(c)], ctx);
+  }
+};
+
+void test(std::string expected, std::string_view fmt, color arg) {
+  auto parse_ctx = std::format_parse_context(fmt);
+  std::formatter<color, char> formatter;
+  static_assert(std::semiregular<decltype(formatter)>);
+
+  auto it = formatter.parse(parse_ctx);
+  assert(it == fmt.end() - (!fmt.empty() && fmt.back() == '}'));
+
+  std::string result;
+  auto out = std::back_inserter(result);
+  using FormatCtxT = std::basic_format_context<decltype(out), char>;
+
+  auto format_ctx = std::__format_context_create<decltype(out), char>(out, std::make_format_args<FormatCtxT>(arg));
+  formatter.format(arg, format_ctx);
+  assert(result == expected);
+}
+
+void test_termination_condition(std::string expected, std::string f, color arg) {
+  // The format-spec is valid if completely consumed or terminates at a '}'.
+  // The valid inputs all end with a '}'. The test is executed twice:
+  // - first with the terminating '}',
+  // - second consuming the entire input.
+  std::string_view fmt{f};
+  assert(fmt.back() == '}' && "Pre-condition failure");
+
+  test(expected, fmt, arg);
+  fmt.remove_suffix(1);
+  test(expected, fmt, arg);
+}
+
+int main(int, char**) {
+  test_termination_condition("black", "}", color::black);
+  test_termination_condition("red", "}", color::red);
+  test_termination_condition("gold", "}", color::gold);
+
+  return 0;
+}
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.signed_integral.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.signed_integral.pass.cpp
index 8f2d5af2dab91..d25441799483e 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.signed_integral.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.signed_integral.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // <format>
 
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.unsigned_integral.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.unsigned_integral.pass.cpp
index 22e330c9daa81..4ee1cfc9dc808 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.unsigned_integral.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.unsigned_integral.pass.cpp
@@ -8,6 +8,8 @@
 // UNSUPPORTED: c++03, c++11, c++14, c++17
 // UNSUPPORTED: libcpp-no-concepts
 // UNSUPPORTED: libcpp-has-no-incomplete-format
+// TODO FMT Evaluate gcc-11 status
+// UNSUPPORTED: gcc-11
 
 // <format>
 
diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h
index 4b269bbd4e64c..8deed4da43635 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_tests.h
+++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h
@@ -10,11 +10,14 @@
 
 #include <format>
 
+#include <algorithm>
+#include <charconv>
+#include <cmath>
+#include <cstdint>
+
 #include "make_string.h"
 #include "test_macros.h"
 
-#include <cmath>
-
 // In this file the following template types are used:
 // TestFunction must be callable as check(expected-result, string-to-format, args-to-format...)
 // ExceptionTest must be callable as check_exception(expected-exception, string-to-format, args-to-format...)
@@ -40,6 +43,93 @@ struct context<wchar_t> {
 template <class T>
 using context_t = typename context<T>::type;
 
+// A user-defined type used to test the handle formatter.
+enum class status : uint16_t { foo = 0xAAAA, bar = 0x5555, foobar = 0xAA55 };
+
+// The formatter for a user-defined type used to test the handle formatter.
+template <class CharT>
+struct std::formatter<status, CharT> {
+  int type = 0;
+
+  constexpr auto parse(auto& parse_ctx) -> decltype(parse_ctx.begin()) {
+    auto begin = parse_ctx.begin();
+    auto end = parse_ctx.end();
+    if (begin == end)
+      return begin;
+
+    switch (*begin) {
+    case CharT('x'):
+      break;
+    case CharT('X'):
+      type = 1;
+      break;
+    case CharT('s'):
+      type = 2;
+      break;
+    case CharT('}'):
+      return begin;
+    default:
+      throw_format_error("The format-spec type has a type not supported for a status argument");
+    }
+
+    ++begin;
+    if (begin != end && *begin != CharT('}'))
+      throw_format_error("The format-spec should consume the input or end with a '}'");
+
+    return begin;
+  }
+
+  auto format(status s, auto& ctx) -> decltype(ctx.out()) {
+    const char* names[] = {"foo", "bar", "foobar"};
+    char buffer[6];
+    const char* begin;
+    const char* end;
+    switch (type) {
+    case 0:
+      begin = buffer;
+      buffer[0] = '0';
+      buffer[1] = 'x';
+      end = std::to_chars(&buffer[2], std::end(buffer), static_cast<uint16_t>(s), 16).ptr;
+      break;
+
+    case 1:
+      begin = buffer;
+      buffer[0] = '0';
+      buffer[1] = 'X';
+      end = std::to_chars(&buffer[2], std::end(buffer), static_cast<uint16_t>(s), 16).ptr;
+      std::transform(static_cast<const char*>(&buffer[2]), end, &buffer[2], [](char c) { return std::toupper(c); });
+      break;
+
+    case 2:
+      switch (s) {
+      case status::foo:
+        begin = names[0];
+        break;
+      case status::bar:
+        begin = names[1];
+        break;
+      case status::foobar:
+        begin = names[2];
+        break;
+      }
+      end = begin + strlen(begin);
+      break;
+    }
+
+    return std::copy(begin, end, ctx.out());
+  }
+
+private:
+  void throw_format_error(const char* s) {
+#ifndef _LIBCPP_NO_EXCEPTIONS
+    throw std::format_error(s);
+#else
+    (void)s;
+    std::abort();
+#endif
+  }
+};
+
 template <class CharT>
 std::vector<std::basic_string<CharT>> invalid_types(std::string valid) {
   std::vector<std::basic_string<CharT>> result;
@@ -2518,6 +2608,29 @@ void format_test_pointer(TestFunction check, ExceptionTest check_exception) {
     check_exception("The format-spec type has a type not supported for a pointer argument", fmt, P(nullptr));
 }
 
+template <class CharT, class TestFunction, class ExceptionTest>
+void format_test_handle(TestFunction check, ExceptionTest check_exception) {
+  // *** Valid permuatations ***
+  check(STR("answer is '0xaaaa'"), STR("answer is '{}'"), status::foo);
+  check(STR("answer is '0xaaaa'"), STR("answer is '{:x}'"), status::foo);
+  check(STR("answer is '0XAAAA'"), STR("answer is '{:X}'"), status::foo);
+  check(STR("answer is 'foo'"), STR("answer is '{:s}'"), status::foo);
+
+  check(STR("answer is '0x5555'"), STR("answer is '{}'"), status::bar);
+  check(STR("answer is '0x5555'"), STR("answer is '{:x}'"), status::bar);
+  check(STR("answer is '0X5555'"), STR("answer is '{:X}'"), status::bar);
+  check(STR("answer is 'bar'"), STR("answer is '{:s}'"), status::bar);
+
+  check(STR("answer is '0xaa55'"), STR("answer is '{}'"), status::foobar);
+  check(STR("answer is '0xaa55'"), STR("answer is '{:x}'"), status::foobar);
+  check(STR("answer is '0XAA55'"), STR("answer is '{:X}'"), status::foobar);
+  check(STR("answer is 'foobar'"), STR("answer is '{:s}'"), status::foobar);
+
+  // *** type ***
+  for (const auto& fmt : invalid_types<CharT>("xXs"))
+    check_exception("The format-spec type has a type not supported for a status argument", fmt, status::foo);
+}
+
 template <class CharT, class TestFunction, class ExceptionTest>
 void format_test_pointer(TestFunction check, ExceptionTest check_exception) {
   format_test_pointer<std::nullptr_t, CharT>(check, check_exception);
@@ -2658,6 +2771,9 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
   check(STR("hello 0x42"), STR("hello {}"), reinterpret_cast<void*>(0x42));
   check(STR("hello 0x42"), STR("hello {}"), reinterpret_cast<const void*>(0x42));
   format_test_pointer<CharT>(check, check_exception);
+
+  // *** Test handle formatter argument ***
+  format_test_handle<CharT>(check, check_exception);
 }
 
 #ifndef TEST_HAS_NO_WIDE_CHARACTERS

From a922324590a13ae544491d21eb035a284a9e75e5 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Mon, 24 Jan 2022 17:15:33 +0000
Subject: [PATCH 422/946] [gn build] Port 787ccd345cbb

---
 llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index 3e28727f082ed..0797908a50c31 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -240,6 +240,7 @@ if (current_toolchain == default_toolchain) {
       "__format/formatter_char.h",
       "__format/formatter_integer.h",
       "__format/formatter_integral.h",
+      "__format/formatter_pointer.h",
       "__format/formatter_string.h",
       "__format/parser_std_format_spec.h",
       "__functional/binary_function.h",

From 5fa40fb293241affeac45c9ec4e129e2280f7510 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Mon, 24 Jan 2022 17:15:34 +0000
Subject: [PATCH 423/946] [gn build] Port db2944e34b16

---
 llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
index 0797908a50c31..e4fa480a6aa34 100644
--- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn
@@ -238,6 +238,7 @@ if (current_toolchain == default_toolchain) {
       "__format/formatter.h",
       "__format/formatter_bool.h",
       "__format/formatter_char.h",
+      "__format/formatter_floating_point.h",
       "__format/formatter_integer.h",
       "__format/formatter_integral.h",
       "__format/formatter_pointer.h",

From ea17d29a6c834a34a698c87193a86eeab04922d2 Mon Sep 17 00:00:00 2001
From: Stephen Tozer <stephen.tozer@sony.com>
Date: Tue, 18 Jan 2022 11:11:57 +0000
Subject: [PATCH 424/946] [llvm] Do not replace dead constant references in
 metadata with undef

This patch removes an incorrect behaviour in Constants.cpp, which would
replace dead constant references in metadata with an undef value. This
blanket replacement resulted in undef values being inserted into
metadata that would not accept them. The replacement was intended for
debug info metadata, but this is now instead handled in the RAUW
handler.

Differential Revision: https://reviews.llvm.org/D117300
---
 llvm/lib/IR/Constants.cpp                     |  9 +----
 .../Resolution/X86/Inputs/no-undef-type-md.ll | 13 +++++++
 .../LTO/Resolution/X86/no-undef-type-md.ll    | 37 +++++++++++++++++++
 3 files changed, 51 insertions(+), 8 deletions(-)
 create mode 100644 llvm/test/LTO/Resolution/X86/Inputs/no-undef-type-md.ll
 create mode 100644 llvm/test/LTO/Resolution/X86/no-undef-type-md.ll

diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index e031f889caf69..c13990af360ec 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -739,15 +739,8 @@ static bool constantIsDead(const Constant *C, bool RemoveDeadUsers) {
       ++I;
   }
 
-  if (RemoveDeadUsers) {
-    // If C is only used by metadata, it should not be preserved but should
-    // have its uses replaced.
-    if (C->isUsedByMetadata()) {
-      const_cast<Constant *>(C)->replaceAllUsesWith(
-          UndefValue::get(C->getType()));
-    }
+  if (RemoveDeadUsers)
     const_cast<Constant *>(C)->destroyConstant();
-  }
 
   return true;
 }
diff --git a/llvm/test/LTO/Resolution/X86/Inputs/no-undef-type-md.ll b/llvm/test/LTO/Resolution/X86/Inputs/no-undef-type-md.ll
new file mode 100644
index 0000000000000..94166f39e2ae3
--- /dev/null
+++ b/llvm/test/LTO/Resolution/X86/Inputs/no-undef-type-md.ll
@@ -0,0 +1,13 @@
+; ModuleID = 'test.cpp.o'
+source_filename = "test.cpp"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @a()
+
+!llvm.module.flags = !{!9, !39}
+
+!9 = !{i32 1, !"EnableSplitLTOUnit", i32 1}
+!39 = !{i32 5, !"CG Profile", !40}
+!40 = !{!41}
+!41 = distinct !{null, i32 ()* bitcast (void ()* @a to i32 ()*), i64 2594092}
diff --git a/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll b/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll
new file mode 100644
index 0000000000000..afee5e656df17
--- /dev/null
+++ b/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll
@@ -0,0 +1,37 @@
+; RUN: opt <%s -o %t0.o -thinlto-bc -thinlto-split-lto-unit
+; RUN: llvm-as -o %t1.o %S/Inputs/no-undef-type-md.ll
+; RUN: llvm-lto2 run -o a.out \
+; RUN: %t0.o \
+; RUN: -r=%t0.o,a, \
+; RUN: -r=%t0.o,b,pl \
+; RUN: %t1.o \
+; RUN: -r=%t1.o,a,pl \
+; RUN: | FileCheck --allow-empty --check-prefix=ERROR %s
+; RUN llvm-nm a.out.0 a.out.1 -S | FileCheck %s
+
+; ERROR-NOT: expected a Function or null
+; ERROR-NOT: i32 (%0*, i32*)* undef
+
+; CHECK: a.out.0:
+; CHECK: a.out.1:
+
+; ModuleID = 'test.cpp.o'
+source_filename = "test.cpp"
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @a() {
+entry:
+  ret i32 0
+}
+
+define i32 @b() {
+entry:
+  ret i32 0
+}
+
+!llvm.module.flags = !{!39}
+
+!39 = !{i32 5, !"CG Profile", !40}
+!40 = !{!41}
+!41 = !{i32 ()* @b, i32 ()* @a, i64 2594092}

From 11cea7e5ce4d3f6a0d2fac016d503f99c52cdc96 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Fri, 21 Jan 2022 11:28:16 +0000
Subject: [PATCH 425/946] [AArch64] NFC: Clarify and auto-generate some CodeGen
 tests.

* For ext-narrow-index.ll, move vscale_range attribute closer to the
  function definition, rather than through indirect #<num> attribute. This
  makes the test a bit easier to read.
* auto-generated CHECK lines for sve-cmp-select.ll and
  named-vector-shuffles-sve.ll.
* re-generated CHECK lines for tests that had a mention they were
  auto-generated, but where the CHECK lines were out of date.
---
 llvm/test/CodeGen/AArch64/concat-vector.ll    |   2 +-
 llvm/test/CodeGen/AArch64/ext-narrow-index.ll | 229 +++++++++---------
 .../AArch64/named-vector-shuffles-sve.ll      |  48 ++--
 llvm/test/CodeGen/AArch64/neon-stepvector.ll  |   1 -
 llvm/test/CodeGen/AArch64/shift_minsize.ll    |  66 +++++
 llvm/test/CodeGen/AArch64/sve-cmp-select.ll   |  37 +--
 .../AArch64/sve-fixed-length-sdiv-pow2.ll     |  17 +-
 .../CodeGen/AArch64/sve-punpklo-combine.ll    |   8 +-
 8 files changed, 240 insertions(+), 168 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/concat-vector.ll b/llvm/test/CodeGen/AArch64/concat-vector.ll
index 690fb716771af..1e5d2660a79eb 100644
--- a/llvm/test/CodeGen/AArch64/concat-vector.ll
+++ b/llvm/test/CodeGen/AArch64/concat-vector.ll
@@ -88,7 +88,7 @@ define <8 x i32> @concat8(<4 x i32>* %A, <4 x i32>* %B) {
 define <4 x half> @concat9(<2 x half> %A, <2 x half> %B) {
 ; CHECK-LABEL: concat9:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    zip1    v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    zip1 v0.2s, v0.2s, v1.2s
 ; CHECK-NEXT:    ret
    %v4half= shufflevector <2 x half> %A, <2 x half> %B, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    ret <4 x half> %v4half
diff --git a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
index f7f143ff49e31..b296a79ce4f40 100644
--- a/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
+++ b/llvm/test/CodeGen/AArch64/ext-narrow-index.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64 | FileCheck %s
 
 ; Tests of shufflevector where the index operand is half the width of the vector
@@ -6,9 +7,9 @@
 ; i8 tests
 define <8 x i8> @i8_off0(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %shuffle
@@ -16,10 +17,10 @@ entry:
 
 define <8 x i8> @i8_off1(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
   ret <8 x i8> %shuffle
@@ -27,10 +28,10 @@ entry:
 
 define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off8:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   ret <8 x i8> %shuffle
@@ -38,9 +39,10 @@ entry:
 
 define <8 x i8> @i8_off15(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off15:
-; CHECK: ext v0.16b, v0.16b, v1.16b, #15
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #15
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
   ret <8 x i8> %shuffle
@@ -48,9 +50,10 @@ entry:
 
 define <8 x i8> @i8_off22(<16 x i8> %arg1, <16 x i8> %arg2) {
 ; CHECK-LABEL: i8_off22:
-; CHECK: ext v0.16b, v1.16b, v1.16b, #6
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v1.16b, v1.16b, #6
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29>
   ret <8 x i8> %shuffle
@@ -59,9 +62,9 @@ entry:
 ; i16 tests
 define <4 x i16> @i16_off0(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i16> %shuffle
@@ -69,10 +72,10 @@ entry:
 
 define <4 x i16> @i16_off1(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   ret <4 x i16> %shuffle
@@ -80,9 +83,10 @@ entry:
 
 define <4 x i16> @i16_off7(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off7:
-; CHECK: ext v0.16b, v0.16b, v1.16b, #14
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #14
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 7, i32 8, i32 9, i32 10>
   ret <4 x i16> %shuffle
@@ -90,9 +94,10 @@ entry:
 
 define <4 x i16> @i16_off8(<8 x i16> %arg1, <8 x i16> %arg2) {
 ; CHECK-LABEL: i16_off8:
-; CHECK: mov v0.16b, v1.16b
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> %arg2, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
   ret <4 x i16> %shuffle
@@ -101,9 +106,9 @@ entry:
 ; i32 tests
 define <2 x i32> @i32_off0(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 0, i32 1>
   ret <2 x i32> %shuffle
@@ -111,10 +116,10 @@ entry:
 
 define <2 x i32> @i32_off1(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 1, i32 2>
   ret <2 x i32> %shuffle
@@ -122,9 +127,10 @@ entry:
 
 define <2 x i32> @i32_off3(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off3:
-; CHECK: ext v0.16b, v0.16b, v1.16b, #12
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 3, i32 4>
   ret <2 x i32> %shuffle
@@ -132,9 +138,10 @@ entry:
 
 define <2 x i32> @i32_off4(<4 x i32> %arg1, <4 x i32> %arg2) {
 ; CHECK-LABEL: i32_off4:
-; CHECK: mov v0.16b, v1.16b
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> %arg2, <2 x i32> <i32 4, i32 5>
   ret <2 x i32> %shuffle
@@ -143,9 +150,9 @@ entry:
 ; i64 tests
 define <1 x i64> @i64_off0(<2 x i64> %arg1, <2 x i64> %arg2) {
 ; CHECK-LABEL: i64_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> <i32 0>
   ret <1 x i64> %shuffle
@@ -153,10 +160,10 @@ entry:
 
 define <1 x i64> @i64_off1(<2 x i64> %arg1, <2 x i64> %arg2) {
 ; CHECK-LABEL: i64_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> <i32 1>
   ret <1 x i64> %shuffle
@@ -164,9 +171,10 @@ entry:
 
 define <1 x i64> @i64_off2(<2 x i64> %arg1, <2 x i64> %arg2) {
 ; CHECK-LABEL: i64_off2:
-; CHECK: mov v0.16b, v1.16b
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov v0.16b, v1.16b
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> %arg2, <1 x i32> <i32 2>
   ret <1 x i64> %shuffle
@@ -175,9 +183,9 @@ entry:
 ; i8 tests with second operand zero
 define <8 x i8> @i8_zero_off0(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   ret <8 x i8> %shuffle
@@ -185,10 +193,10 @@ entry:
 
 define <8 x i8> @i8_zero_off1(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #1
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
   ret <8 x i8> %shuffle
@@ -196,10 +204,10 @@ entry:
 
 define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off8:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   ret <8 x i8> %shuffle
@@ -207,10 +215,11 @@ entry:
 
 define <8 x i8> @i8_zero_off15(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off15:
-; CHECK: movi [[REG:v[0-9]+]].2d, #0
-; CHECK: ext v0.16b, v0.16b, [[REG]].16b, #15
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #15
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22>
   ret <8 x i8> %shuffle
@@ -218,9 +227,9 @@ entry:
 
 define <8 x i8> @i8_zero_off22(<16 x i8> %arg1) {
 ; CHECK-LABEL: i8_zero_off22:
-; CHECK: movi v0.2d, #0
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29>
   ret <8 x i8> %shuffle
@@ -229,9 +238,9 @@ entry:
 ; i16 tests with second operand zero
 define <4 x i16> @i16_zero_off0(<8 x i16> %arg1) {
 ; CHECK-LABEL: i16_zero_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   ret <4 x i16> %shuffle
@@ -239,10 +248,10 @@ entry:
 
 define <4 x i16> @i16_zero_off1(<8 x i16> %arg1) {
 ; CHECK-LABEL: i16_zero_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #2
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #2
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   ret <4 x i16> %shuffle
@@ -250,10 +259,11 @@ entry:
 
 define <4 x i16> @i16_zero_off7(<8 x i16> %arg1) {
 ; CHECK-LABEL: i16_zero_off7:
-; CHECK: movi [[REG:v[0-9]+]].2d, #0
-; CHECK: ext v0.16b, v0.16b, [[REG]].16b, #14
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #14
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 7, i32 8, i32 9, i32 10>
   ret <4 x i16> %shuffle
@@ -261,9 +271,9 @@ entry:
 
 define <4 x i16> @i16_zero_off8(<8 x i16> %arg1) {
 ; CHECK-LABEL: i16_zero_off8:
-; CHECK: movi v0.2d, #0
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <8 x i16> %arg1, <8 x i16> zeroinitializer, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
   ret <4 x i16> %shuffle
@@ -272,9 +282,9 @@ entry:
 ; i32 tests with second operand zero
 define <2 x i32> @i32_zero_off0(<4 x i32> %arg1) {
 ; CHECK-LABEL: i32_zero_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1>
   ret <2 x i32> %shuffle
@@ -282,10 +292,10 @@ entry:
 
 define <2 x i32> @i32_zero_off1(<4 x i32> %arg1) {
 ; CHECK-LABEL: i32_zero_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #4
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #4
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 1, i32 2>
   ret <2 x i32> %shuffle
@@ -293,10 +303,11 @@ entry:
 
 define <2 x i32> @i32_zero_off3(<4 x i32> %arg1) {
 ; CHECK-LABEL: i32_zero_off3:
-; CHECK: movi [[REG:v[0-9]+]].2d, #0
-; CHECK: ext v0.16b, v0.16b, [[REG]].16b, #12
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2d, #0000000000000000
+; CHECK-NEXT:    ext v0.16b, v0.16b, v1.16b, #12
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 3, i32 4>
   ret <2 x i32> %shuffle
@@ -304,9 +315,9 @@ entry:
 
 define <2 x i32> @i32_zero_off4(<4 x i32> %arg1) {
 ; CHECK-LABEL: i32_zero_off4:
-; CHECK: movi v0.2d, #0
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <4 x i32> %arg1, <4 x i32> zeroinitializer, <2 x i32> <i32 4, i32 5>
   ret <2 x i32> %shuffle
@@ -315,9 +326,9 @@ entry:
 ; i64 tests with second operand zero
 define <1 x i64> @i64_zero_off0(<2 x i64> %arg1) {
 ; CHECK-LABEL: i64_zero_off0:
-; CHECK-NOT: mov
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> <i32 0>
   ret <1 x i64> %shuffle
@@ -325,10 +336,10 @@ entry:
 
 define <1 x i64> @i64_zero_off1(<2 x i64> %arg1) {
 ; CHECK-LABEL: i64_zero_off1:
-; CHECK-NOT: mov
-; CHECK: ext v0.16b, v0.16b, v0.16b, #8
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> <i32 1>
   ret <1 x i64> %shuffle
@@ -336,9 +347,9 @@ entry:
 
 define <1 x i64> @i64_zero_off2(<2 x i64> %arg1) {
 ; CHECK-LABEL: i64_zero_off2:
-; CHECK: fmov d0, xzr
-; CHECK-NOT: ext
-; CHECK: ret
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmov d0, xzr
+; CHECK-NEXT:    ret
 entry:
   %shuffle = shufflevector <2 x i64> %arg1, <2 x i64> zeroinitializer, <1 x i32> <i32 2>
   ret <1 x i64> %shuffle
diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
index bce9e035da8c1..404811433ac20 100644
--- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
+++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll
@@ -24,7 +24,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_first_idx(<vscale x 16 x i8> %a, <vsca
   ret <vscale x 16 x i8> %res
 }
 
-define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #1 {
+define <vscale x 16 x i8> @splice_nxv16i8_last_idx(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv16i8_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #255
@@ -51,7 +51,7 @@ define <vscale x 4 x i32> @splice_nxv4i32_first_idx(<vscale x 4 x i32> %a, <vsca
   ret <vscale x 4 x i32> %res
 }
 
-define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #1 {
+define <vscale x 4 x i32> @splice_nxv4i32_last_idx(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv4i32_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #252
@@ -69,7 +69,7 @@ define <vscale x 2 x i64> @splice_nxv2i64_first_idx(<vscale x 2 x i64> %a, <vsca
   ret <vscale x 2 x i64> %res
 }
 
-define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #1 {
+define <vscale x 2 x i64> @splice_nxv2i64_last_idx(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv2i64_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -109,7 +109,7 @@ define <vscale x 2 x half> @splice_nxv2f16_first_idx(<vscale x 2 x half> %a, <vs
   ret <vscale x 2 x half> %res
 }
 
-define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) #1 {
+define <vscale x 2 x half> @splice_nxv2f16_last_idx(<vscale x 2 x half> %a, <vscale x 2 x half> %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv2f16_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -149,7 +149,7 @@ define <vscale x 4 x half> @splice_nxv4f16_first_idx(<vscale x 4 x half> %a, <vs
   ret <vscale x 4 x half> %res
 }
 
-define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) #1 {
+define <vscale x 4 x half> @splice_nxv4f16_last_idx(<vscale x 4 x half> %a, <vscale x 4 x half> %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv4f16_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #252
@@ -167,7 +167,7 @@ define <vscale x 8 x half> @splice_nxv8f16_first_idx(<vscale x 8 x half> %a, <vs
   ret <vscale x 8 x half> %res
 }
 
-define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #1 {
+define <vscale x 8 x half> @splice_nxv8f16_last_idx(<vscale x 8 x half> %a, <vscale x 8 x half> %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv8f16_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #254
@@ -207,7 +207,7 @@ define <vscale x 2 x float> @splice_nxv2f32_first_idx(<vscale x 2 x float> %a, <
   ret <vscale x 2 x float> %res
 }
 
-define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) #1 {
+define <vscale x 2 x float> @splice_nxv2f32_last_idx(<vscale x 2 x float> %a, <vscale x 2 x float> %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv2f32_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -225,7 +225,7 @@ define <vscale x 4 x float> @splice_nxv4f32_first_idx(<vscale x 4 x float> %a, <
   ret <vscale x 4 x float> %res
 }
 
-define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #1 {
+define <vscale x 4 x float> @splice_nxv4f32_last_idx(<vscale x 4 x float> %a, <vscale x 4 x float> %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv4f32_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #252
@@ -243,7 +243,7 @@ define <vscale x 2 x double> @splice_nxv2f64_first_idx(<vscale x 2 x double> %a,
   ret <vscale x 2 x double> %res
 }
 
-define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #1 {
+define <vscale x 2 x double> @splice_nxv2f64_last_idx(<vscale x 2 x double> %a, <vscale x 2 x double> %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv2f64_last_idx:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ext z0.b, z0.b, z1.b, #248
@@ -345,7 +345,7 @@ define <vscale x 8 x i32> @splice_nxv8i32_idx(<vscale x 8 x i32> %a, <vscale x 8
 }
 
 ; Verify splitvec type legalisation works as expected.
-define <vscale x 16 x float> @splice_nxv16f32_16(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #2 {
+define <vscale x 16 x float> @splice_nxv16f32_16(<vscale x 16 x float> %a, <vscale x 16 x float> %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv16f32_16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
@@ -392,7 +392,7 @@ define <vscale x 16 x i8> @splice_nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x
   ret <vscale x 16 x i8> %res
 }
 
-define <vscale x 16 x i8> @splice_nxv16i8_neg32(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #2 {
+define <vscale x 16 x i8> @splice_nxv16i8_neg32(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv16i8_neg32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl32
@@ -403,7 +403,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_neg32(<vscale x 16 x i8> %a, <vscale x
   ret <vscale x 16 x i8> %res
 }
 
-define <vscale x 16 x i8> @splice_nxv16i8_neg64(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #3 {
+define <vscale x 16 x i8> @splice_nxv16i8_neg64(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(4,16) #0 {
 ; CHECK-LABEL: splice_nxv16i8_neg64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl64
@@ -414,7 +414,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_neg64(<vscale x 16 x i8> %a, <vscale x
   ret <vscale x 16 x i8> %res
 }
 
-define <vscale x 16 x i8> @splice_nxv16i8_neg128(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #4 {
+define <vscale x 16 x i8> @splice_nxv16i8_neg128(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(8,16) #0 {
 ; CHECK-LABEL: splice_nxv16i8_neg128:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl128
@@ -425,7 +425,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_neg128(<vscale x 16 x i8> %a, <vscale
   ret <vscale x 16 x i8> %res
 }
 
-define <vscale x 16 x i8> @splice_nxv16i8_neg256(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #1 {
+define <vscale x 16 x i8> @splice_nxv16i8_neg256(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(16,16) #0 {
 ; CHECK-LABEL: splice_nxv16i8_neg256:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.b, vl256
@@ -447,7 +447,7 @@ define <vscale x 16 x i8> @splice_nxv16i8_1(<vscale x 16 x i8> %a, <vscale x 16
   ret <vscale x 16 x i8> %res
 }
 
-define <vscale x 16 x i8> @splice_nxv16i8_neg17(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #2 {
+define <vscale x 16 x i8> @splice_nxv16i8_neg17(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv16i8_neg17:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
@@ -492,7 +492,7 @@ define <vscale x 8 x i16> @splice_nxv8i16_1(<vscale x 8 x i16> %a, <vscale x 8 x
   ret <vscale x 8 x i16> %res
 }
 
-define <vscale x 8 x i16> @splice_nxv8i16_neg9(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #2 {
+define <vscale x 8 x i16> @splice_nxv8i16_neg9(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv8i16_neg9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
@@ -537,7 +537,7 @@ define <vscale x 4 x i32> @splice_nxv4i32_1(<vscale x 4 x i32> %a, <vscale x 4 x
   ret <vscale x 4 x i32> %res
 }
 
-define <vscale x 4 x i32> @splice_nxv4i32_neg5(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #2 {
+define <vscale x 4 x i32> @splice_nxv4i32_neg5(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv4i32_neg5:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl5
@@ -570,7 +570,7 @@ define <vscale x 2 x i64> @splice_nxv2i64_1(<vscale x 2 x i64> %a, <vscale x 2 x
   ret <vscale x 2 x i64> %res
 }
 
-define <vscale x 2 x i64> @splice_nxv2i64_neg3(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #2 {
+define <vscale x 2 x i64> @splice_nxv2i64_neg3(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv2i64_neg3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl3
@@ -603,7 +603,7 @@ define <vscale x 8 x half> @splice_nxv8f16_1(<vscale x 8 x half> %a, <vscale x 8
   ret <vscale x 8 x half> %res
 }
 
-define <vscale x 8 x half> @splice_nxv8f16_neg9(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #2 {
+define <vscale x 8 x half> @splice_nxv8f16_neg9(<vscale x 8 x half> %a, <vscale x 8 x half> %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv8f16_neg9:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
@@ -648,7 +648,7 @@ define <vscale x 4 x float> @splice_nxv4f32_1(<vscale x 4 x float> %a, <vscale x
   ret <vscale x 4 x float> %res
 }
 
-define <vscale x 4 x float> @splice_nxv4f32_neg5(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #2 {
+define <vscale x 4 x float> @splice_nxv4f32_neg5(<vscale x 4 x float> %a, <vscale x 4 x float> %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv4f32_neg5:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s, vl5
@@ -681,7 +681,7 @@ define <vscale x 2 x double> @splice_nxv2f64_1(<vscale x 2 x double> %a, <vscale
   ret <vscale x 2 x double> %res
 }
 
-define <vscale x 2 x double> @splice_nxv2f64_neg3(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #2 {
+define <vscale x 2 x double> @splice_nxv2f64_neg3(<vscale x 2 x double> %a, <vscale x 2 x double> %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv2f64_neg3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d, vl3
@@ -797,7 +797,7 @@ define <vscale x 8 x i32> @splice_nxv8i32(<vscale x 8 x i32> %a, <vscale x 8 x i
 }
 
 ; Verify splitvec type legalisation works as expected.
-define <vscale x 16 x float> @splice_nxv16f32_neg17(<vscale x 16 x float> %a, <vscale x 16 x float> %b) #2 {
+define <vscale x 16 x float> @splice_nxv16f32_neg17(<vscale x 16 x float> %a, <vscale x 16 x float> %b) vscale_range(2,16) #0 {
 ; CHECK-LABEL: splice_nxv16f32_neg17:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    str x29, [sp, #-16]! // 8-byte Folded Spill
@@ -848,7 +848,3 @@ declare <vscale x 16 x float> @llvm.experimental.vector.splice.nxv16f32(<vscale
 declare <vscale x 2 x double> @llvm.experimental.vector.splice.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, i32)
 
 attributes #0 = { nounwind "target-features"="+sve" }
-attributes #1 = { nounwind "target-features"="+sve" vscale_range(16,16) }
-attributes #2 = { nounwind "target-features"="+sve" vscale_range(2,16) }
-attributes #3 = { nounwind "target-features"="+sve" vscale_range(4,16) }
-attributes #4 = { nounwind "target-features"="+sve" vscale_range(8,16) }
diff --git a/llvm/test/CodeGen/AArch64/neon-stepvector.ll b/llvm/test/CodeGen/AArch64/neon-stepvector.ll
index 05308bf5f6d76..7255574f42097 100644
--- a/llvm/test/CodeGen/AArch64/neon-stepvector.ll
+++ b/llvm/test/CodeGen/AArch64/neon-stepvector.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK
 
 ; LEGAL INTEGER TYPES
diff --git a/llvm/test/CodeGen/AArch64/shift_minsize.ll b/llvm/test/CodeGen/AArch64/shift_minsize.ll
index 78d87ff77762c..cc29e3a5f04f5 100644
--- a/llvm/test/CodeGen/AArch64/shift_minsize.ll
+++ b/llvm/test/CodeGen/AArch64/shift_minsize.ll
@@ -16,6 +16,11 @@ define i64 @f0(i64 %val, i64 %amt) minsize optsize {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    lsl x0, x0, x1
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: f0:
+; CHECK-DARWIN:       ; %bb.0:
+; CHECK-DARWIN-NEXT:    lsl x0, x0, x1
+; CHECK-DARWIN-NEXT:    ret
   %res = shl i64 %val, %amt
   ret i64 %res
 }
@@ -26,6 +31,12 @@ define i32 @f1(i64 %x, i64 %y) minsize optsize {
 ; CHECK-NEXT:    lsl x0, x0, x1
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: f1:
+; CHECK-DARWIN:       ; %bb.0:
+; CHECK-DARWIN-NEXT:    lsl x0, x0, x1
+; CHECK-DARWIN-NEXT:    ; kill: def $w0 killed $w0 killed $x0
+; CHECK-DARWIN-NEXT:    ret
 	%a = shl i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
@@ -37,6 +48,12 @@ define i32 @f2(i64 %x, i64 %y) minsize optsize {
 ; CHECK-NEXT:    asr x0, x0, x1
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: f2:
+; CHECK-DARWIN:       ; %bb.0:
+; CHECK-DARWIN-NEXT:    asr x0, x0, x1
+; CHECK-DARWIN-NEXT:    ; kill: def $w0 killed $w0 killed $x0
+; CHECK-DARWIN-NEXT:    ret
 	%a = ashr i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
@@ -48,6 +65,12 @@ define i32 @f3(i64 %x, i64 %y) minsize optsize {
 ; CHECK-NEXT:    lsr x0, x0, x1
 ; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: f3:
+; CHECK-DARWIN:       ; %bb.0:
+; CHECK-DARWIN-NEXT:    lsr x0, x0, x1
+; CHECK-DARWIN-NEXT:    ; kill: def $w0 killed $w0 killed $x0
+; CHECK-DARWIN-NEXT:    ret
 	%a = lshr i64 %x, %y
 	%b = trunc i64 %a to i32
 	ret i32 %b
@@ -62,6 +85,20 @@ define dso_local { i64, i64 } @shl128(i64 %x.coerce0, i64 %x.coerce1, i8 signext
 ; CHECK-NEXT:    bl __ashlti3
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: shl128:
+; CHECK-DARWIN:       ; %bb.0: ; %entry
+; CHECK-DARWIN-NEXT:    mvn w8, w2
+; CHECK-DARWIN-NEXT:    mov w9, w2
+; CHECK-DARWIN-NEXT:    lsr x10, x0, #1
+; CHECK-DARWIN-NEXT:    tst x9, #0x40
+; CHECK-DARWIN-NEXT:    lsr x8, x10, x8
+; CHECK-DARWIN-NEXT:    lsl x10, x1, x9
+; CHECK-DARWIN-NEXT:    orr x8, x10, x8
+; CHECK-DARWIN-NEXT:    lsl x10, x0, x9
+; CHECK-DARWIN-NEXT:    csel x1, x10, x8, ne
+; CHECK-DARWIN-NEXT:    csel x0, xzr, x10, ne
+; CHECK-DARWIN-NEXT:    ret
 
 entry:
   %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
@@ -88,6 +125,21 @@ define dso_local { i64, i64 } @ashr128(i64 %x.coerce0, i64 %x.coerce1, i8 signex
 ; CHECK-NEXT:    bl __ashrti3
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: ashr128:
+; CHECK-DARWIN:       ; %bb.0: ; %entry
+; CHECK-DARWIN-NEXT:    mov w8, w2
+; CHECK-DARWIN-NEXT:    mvn w9, w2
+; CHECK-DARWIN-NEXT:    lsl x10, x1, #1
+; CHECK-DARWIN-NEXT:    tst x8, #0x40
+; CHECK-DARWIN-NEXT:    lsr x11, x0, x8
+; CHECK-DARWIN-NEXT:    lsl x9, x10, x9
+; CHECK-DARWIN-NEXT:    asr x10, x1, x8
+; CHECK-DARWIN-NEXT:    orr x9, x9, x11
+; CHECK-DARWIN-NEXT:    asr x8, x1, #63
+; CHECK-DARWIN-NEXT:    csel x0, x10, x9, ne
+; CHECK-DARWIN-NEXT:    csel x1, x8, x10, ne
+; CHECK-DARWIN-NEXT:    ret
 entry:
   %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
   %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
@@ -113,6 +165,20 @@ define dso_local { i64, i64 } @lshr128(i64 %x.coerce0, i64 %x.coerce1, i8 signex
 ; CHECK-NEXT:    bl __lshrti3
 ; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
+;
+; CHECK-DARWIN-LABEL: lshr128:
+; CHECK-DARWIN:       ; %bb.0: ; %entry
+; CHECK-DARWIN-NEXT:    mov w8, w2
+; CHECK-DARWIN-NEXT:    mvn w9, w2
+; CHECK-DARWIN-NEXT:    lsl x10, x1, #1
+; CHECK-DARWIN-NEXT:    tst x8, #0x40
+; CHECK-DARWIN-NEXT:    lsr x11, x0, x8
+; CHECK-DARWIN-NEXT:    lsl x9, x10, x9
+; CHECK-DARWIN-NEXT:    orr x9, x9, x11
+; CHECK-DARWIN-NEXT:    lsr x10, x1, x8
+; CHECK-DARWIN-NEXT:    csel x0, x10, x9, ne
+; CHECK-DARWIN-NEXT:    csel x1, xzr, x10, ne
+; CHECK-DARWIN-NEXT:    ret
 entry:
   %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128
   %x.sroa.2.0.insert.shift = shl nuw i128 %x.sroa.2.0.insert.ext, 64
diff --git a/llvm/test/CodeGen/AArch64/sve-cmp-select.ll b/llvm/test/CodeGen/AArch64/sve-cmp-select.ll
index b04e8d922c803..1a30005fa4674 100644
--- a/llvm/test/CodeGen/AArch64/sve-cmp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-cmp-select.ll
@@ -1,36 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-unknown -mattr=+sve -o - < %s | FileCheck %s
 
 define <vscale x 16 x i8> @vselect_cmp_ne(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
-  ; CHECK-LABEL: vselect_cmp_ne
-  ; CHECK:       // %bb.0:
-	; CHECK-NEXT:    ptrue	p0.b
-	; CHECK-NEXT:    cmpne	p0.b, p0/z, z0.b, z1.b
-	; CHECK-NEXT:    sel	z0.b, p0, z1.b, z2.b
-	; CHECK-NEXT:    ret
+; CHECK-LABEL: vselect_cmp_ne:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpne p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    sel z0.b, p0, z1.b, z2.b
+; CHECK-NEXT:    ret
   %cmp = icmp ne <vscale x 16 x i8> %a, %b
   %d = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c
   ret <vscale x 16 x i8> %d
 }
 
 define <vscale x 16 x i8> @vselect_cmp_sgt(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
-  ; CHECK-LABEL: vselect_cmp_sgt
-  ; CHECK:       // %bb.0:
-  ; CHECK-NEXT: 	ptrue	p0.b
-  ; CHECK-NEXT: 	cmpgt	p0.b, p0/z, z0.b, z1.b
-  ; CHECK-NEXT: 	sel	z0.b, p0, z1.b, z2.b
-  ; CHECK-NEXT: 	ret
+; CHECK-LABEL: vselect_cmp_sgt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmpgt p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    sel z0.b, p0, z1.b, z2.b
+; CHECK-NEXT:    ret
   %cmp = icmp sgt <vscale x 16 x i8> %a, %b
   %d = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c
   ret <vscale x 16 x i8> %d
 }
 
 define <vscale x 16 x i8> @vselect_cmp_ugt(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
-  ; CHECK-LABEL: vselect_cmp_ugt
-  ; CHECK:       // %bb.0:
-  ; CHECK-NEXT: 	ptrue	p0.b
-  ; CHECK-NEXT: 	cmphi	p0.b, p0/z, z0.b, z1.b
-  ; CHECK-NEXT: 	sel	z0.b, p0, z1.b, z2.b
-  ; CHECK-NEXT: 	ret
+; CHECK-LABEL: vselect_cmp_ugt:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    cmphi p0.b, p0/z, z0.b, z1.b
+; CHECK-NEXT:    sel z0.b, p0, z1.b, z2.b
+; CHECK-NEXT:    ret
   %cmp = icmp ugt <vscale x 16 x i8> %a, %b
   %d = select <vscale x 16 x i1> %cmp, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c
   ret <vscale x 16 x i8> %d
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
index 76163f37a2aca..e09a79fb838f0 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -aarch64-sve-vector-bits-min=256  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256
 ; RUN: llc -aarch64-sve-vector-bits-min=384  < %s | FileCheck %s -check-prefixes=CHECK
 ; RUN: llc -aarch64-sve-vector-bits-min=512  < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
@@ -21,7 +20,7 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) #0 {
 ; CHECK-LABEL: sdiv_v8i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    ptrue p0.b, vl8
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
@@ -33,7 +32,7 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) #0 {
 ; CHECK-LABEL: sdiv_v16i8:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    ptrue p0.b, vl16
 ; CHECK-NEXT:    asrd z0.b, p0/m, z0.b, #5
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
@@ -113,7 +112,7 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) #0 {
 ; CHECK-LABEL: sdiv_v4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ptrue p0.h, vl4
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
@@ -125,7 +124,7 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) #0 {
 ; CHECK-LABEL: sdiv_v8i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ptrue p0.h, vl8
 ; CHECK-NEXT:    asrd z0.h, p0/m, z0.h, #5
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
@@ -205,7 +204,7 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) #0 {
 ; CHECK-LABEL: sdiv_v2i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ptrue p0.s, vl2
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
@@ -217,7 +216,7 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) #0 {
 ; CHECK-LABEL: sdiv_v4i32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ptrue p0.s, vl4
 ; CHECK-NEXT:    asrd z0.s, p0/m, z0.s, #5
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
@@ -297,7 +296,7 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) #0 {
 ; CHECK-LABEL: sdiv_v1i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ptrue p0.d, vl1
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
 ; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $z0
 ; CHECK-NEXT:    ret
@@ -310,7 +309,7 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) #0 {
 ; CHECK-LABEL: sdiv_v2i64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 def $z0
-; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ptrue p0.d, vl2
 ; CHECK-NEXT:    asrd z0.d, p0/m, z0.d, #5
 ; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll b/llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
index 4e737424506d0..ddc2f5bd81284 100644
--- a/llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-punpklo-combine.ll
@@ -177,11 +177,11 @@ define <vscale x 2 x i1> @masked_load_sext_i8i64_parg(i8* %ap, <vscale x 16 x i8
 define <vscale x 8 x i1> @masked_load_sext_i8i16_ptrue_all(i8* %ap, <vscale x 16 x i8> %b) #0 {
 ; CHECK-LABEL: masked_load_sext_i8i16_ptrue_all:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    ptrue p0.b, vl64
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    mov z0.h, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    ptrue p0.h, vl32
 ; CHECK-NEXT:    cmpne p0.h, p0/z, z0.h, #0
 ; CHECK-NEXT:    ret
   %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)
@@ -197,12 +197,12 @@ define <vscale x 8 x i1> @masked_load_sext_i8i16_ptrue_all(i8* %ap, <vscale x 16
 define <vscale x 4 x i1> @masked_load_sext_i8i32_ptrue_all(i8* %ap, <vscale x 16 x i8> %b) #0 {
 ; CHECK-LABEL: masked_load_sext_i8i32_ptrue_all:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    ptrue p0.b, vl64
 ; CHECK-NEXT:    cmpeq p0.b, p0/z, z0.b, #0
 ; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    punpklo p0.h, p0.b
 ; CHECK-NEXT:    mov z0.s, p0/z, #-1 // =0xffffffffffffffff
-; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ptrue p0.s, vl32
 ; CHECK-NEXT:    cmpne p0.s, p0/z, z0.s, #0
 ; CHECK-NEXT:    ret
   %p0 = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 11)

From 57eb5033cdffd4a4b35e2ba308876f50bc9cce62 Mon Sep 17 00:00:00 2001
From: Clint Caywood <caywood@google.com>
Date: Mon, 24 Jan 2022 09:40:38 -0800
Subject: [PATCH 426/946] [libc] Add bazel definition for hypot/hypotf.

Patch by Clint Caywood.

Differential Revision: https://reviews.llvm.org/D118053
---
 utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index e17f138e74ab9..b709565b14830 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -75,6 +75,7 @@ fputil_common_hdrs = [
     "src/__support/FPUtil/FEnvUtils.h",
     "src/__support/FPUtil/FPBits.h",
     "src/__support/FPUtil/FloatProperties.h",
+    "src/__support/FPUtil/Hypot.h",
     "src/__support/FPUtil/ManipulationFunctions.h",
     "src/__support/FPUtil/NearestIntegerOperations.h",
     "src/__support/FPUtil/NormalFloat.h",
@@ -371,6 +372,10 @@ libc_math_function(name = "frexpf")
 
 libc_math_function(name = "frexpl")
 
+libc_math_function(name = "hypot")
+
+libc_math_function(name = "hypotf")
+
 libc_math_function(name = "logb")
 
 libc_math_function(name = "logbf")

From 38e16e1cebb891ad47b85727bf46f7dac6d7da94 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Mon, 24 Jan 2022 19:01:38 +0100
Subject: [PATCH 427/946] Use -gdwarf-4 in
 compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c

otherwise the test fails after the recent DWARF 4 -> 5 default change,
see https://github.com/llvm/llvm-project/issues/53387
---
 compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
index c78614f28b11e..9aa24d72376ca 100644
--- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
+++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
@@ -1,7 +1,7 @@
 // REQUIRES: zlib
 
 // Value profiling is currently not supported in lightweight mode.
-// RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
+// RUN: %clang_pgogen -o %t -g -gdwarf-4 -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
 // RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t
 // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite
 

From d27f02261442a15b0edb627023a8568735b2d110 Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse@sony.com>
Date: Mon, 24 Jan 2022 17:52:52 +0000
Subject: [PATCH 428/946] [NFC][DebugInfo] Strip out an undesired #if 0 block

As mentioned in discussion of D116821, it's better to just delete this
block than keep it hanging around.
---
 llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
index 3149729b92313..8f697611a82c0 100644
--- a/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp
@@ -123,14 +123,6 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
 }
 
 bool llvm::debuginfoShouldUseDebugInstrRef(const Triple &T) {
-  // Enable by default on x86_64, disable if explicitly turned off on cmdline.
-  // Disabled while https://reviews.llvm.org/D116821 is investigated.
-#if 0
-  if (T.getArch() == llvm::Triple::x86_64 &&
-      ValueTrackingVariableLocations != cl::boolOrDefault::BOU_FALSE)
-    return true;
-#endif
-
-  // Otherwise: enable if explicitly requested on command line.
+  // Enable if explicitly requested on command line.
   return ValueTrackingVariableLocations == cl::boolOrDefault::BOU_TRUE;
 }

From 830df62a07031d84257a8a208798a6a2b4c0461a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Mon, 24 Jan 2022 18:21:52 +0000
Subject: [PATCH 429/946] [ConstraintElimination] Add test from PR53123.

---
 .../ConstraintElimination/sub-nuw.ll          | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
index bcefc9930f3ca..4d3b259b0143b 100644
--- a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
+++ b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
@@ -237,4 +237,38 @@ if.end:                                           ; preds = %entry
   ret void
 }
 
+define i16 @test_pr53123_sub_constraint_sign(i16 %v) {
+; CHECK-LABEL: @test_pr53123_sub_constraint_sign(
+; CHECK-NEXT:  bb.0:
+; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i16 32767, [[V:%.*]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ugt i16 [[V]], [[SUB]]
+; CHECK-NEXT:    br i1 [[CMP1]], label [[BB_2:%.*]], label [[BB_1:%.*]]
+; CHECK:       bb.1:
+; CHECK-NEXT:    [[ADD:%.*]] = shl nuw nsw i16 [[V]], 1
+; CHECK-NEXT:    [[SUB9:%.*]] = sub nuw nsw i16 32767, [[ADD]]
+; CHECK-NEXT:    [[CMP11:%.*]] = icmp ugt i16 [[ADD]], [[SUB9]]
+; CHECK-NEXT:    br i1 false, label [[BB_3:%.*]], label [[BB_2]]
+; CHECK:       bb.2:
+; CHECK-NEXT:    ret i16 1
+; CHECK:       bb.3:
+; CHECK-NEXT:    ret i16 0
+;
+bb.0:
+  %sub = sub nuw nsw i16 32767, %v
+  %cmp1 = icmp ugt i16 %v, %sub
+  br i1 %cmp1, label %bb.2, label %bb.1
+
+bb.1:
+  %add = shl nuw nsw i16 %v, 1
+  %sub9 = sub nuw nsw i16 32767, %add
+  %cmp11 = icmp ugt i16 %add, %sub9
+  br i1 %cmp11, label %bb.3, label %bb.2
+
+bb.2:
+  ret i16 1
+
+bb.3:
+  ret i16 0
+}
+
 declare void @use(i1)

From 8a15caaae56182815839741de414a0ba60037a9a Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Mon, 24 Jan 2022 18:32:32 +0000
Subject: [PATCH 430/946] [ConstraintElimination] Fix sign of sub
 decomposition.

Update the decomposition code to make sure the right coefficient (-1) is
used for the second operand of the subtract.

Fixes PR53123.
---
 llvm/lib/Transforms/Scalar/ConstraintElimination.cpp  | 2 +-
 llvm/test/Transforms/ConstraintElimination/sub-nuw.ll | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 7f2d5d7d99871..fcb88b54a094c 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -108,7 +108,7 @@ static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
   if (match(V, m_NUWSub(m_Value(Op0), m_ConstantInt(CI))))
     return {{-1 * CI->getSExtValue(), nullptr}, {1, Op0}};
   if (match(V, m_NUWSub(m_Value(Op0), m_Value(Op1))))
-    return {{0, nullptr}, {1, Op0}, {1, Op1}};
+    return {{0, nullptr}, {1, Op0}, {-1, Op1}};
 
   return {{0, nullptr}, {1, V}};
 }
diff --git a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
index 4d3b259b0143b..25594db9905d0 100644
--- a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
+++ b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll
@@ -247,7 +247,7 @@ define i16 @test_pr53123_sub_constraint_sign(i16 %v) {
 ; CHECK-NEXT:    [[ADD:%.*]] = shl nuw nsw i16 [[V]], 1
 ; CHECK-NEXT:    [[SUB9:%.*]] = sub nuw nsw i16 32767, [[ADD]]
 ; CHECK-NEXT:    [[CMP11:%.*]] = icmp ugt i16 [[ADD]], [[SUB9]]
-; CHECK-NEXT:    br i1 false, label [[BB_3:%.*]], label [[BB_2]]
+; CHECK-NEXT:    br i1 [[CMP11]], label [[BB_3:%.*]], label [[BB_2]]
 ; CHECK:       bb.2:
 ; CHECK-NEXT:    ret i16 1
 ; CHECK:       bb.3:

From 0a3d946e7bb4cd3519370c879dac7cbb58d13a55 Mon Sep 17 00:00:00 2001
From: John Ericson <John.Ericson@Obsidian.Systems>
Date: Sun, 23 Jan 2022 05:30:32 +0000
Subject: [PATCH 431/946] [libc][cmake] Make `add_tablegen` calls match others

in all the other `add_tablegen` calls, the project name is so transformed so it
can be a prefix of a CMake variable. I think it is better to do do that here
too for consistency.

Reviewed By: sivachandra

Differential Revision: https://reviews.llvm.org/D117979
---
 libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt | 2 +-
 libc/utils/tools/WrapperGen/CMakeLists.txt        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt b/libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt
index c90fde76dd58c..9e25c21c6b359 100644
--- a/libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt
+++ b/libc/utils/HdrGen/PrototypeTestGen/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_tablegen(libc-prototype-testgen llvm-libc
+add_tablegen(libc-prototype-testgen LLVM_LIBC
   PrototypeTestGen.cpp
 )
 target_link_libraries(libc-prototype-testgen PRIVATE LibcTableGenUtil)
diff --git a/libc/utils/tools/WrapperGen/CMakeLists.txt b/libc/utils/tools/WrapperGen/CMakeLists.txt
index fe8ffcce94a58..5fd78591e9b3c 100644
--- a/libc/utils/tools/WrapperGen/CMakeLists.txt
+++ b/libc/utils/tools/WrapperGen/CMakeLists.txt
@@ -1,6 +1,6 @@
 set(LLVM_LINK_COMPONENTS Support)
 
-add_tablegen(libc-wrappergen llvm-libc
+add_tablegen(libc-wrappergen LLVM_LIBC
   Main.cpp
 )
 

From eadf7268d578396d4f2fc2a0f7eda8096c041007 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer <arthur.j.odwyer@gmail.com>
Date: Sat, 15 Jan 2022 13:29:26 -0500
Subject: [PATCH 432/946] [libc++] Fix bugs in common_iterator; add test
 coverage.

Differential Revision: https://reviews.llvm.org/D117400
---
 libcxx/docs/Status/Cxx2bIssues.csv            |   4 +-
 libcxx/include/__iterator/common_iterator.h   |  67 ++++------
 .../constraints.compile.pass.cpp              |  28 ++++
 .../iterators.common/ctor.converting.pass.cpp |  48 +++++++
 .../iterators.common/ctor.default.pass.cpp    |  41 ++++++
 .../iterators.common/ctor.iter.pass.cpp       |  50 ++++++++
 .../iterators.common/ctor.pass.cpp            |  90 -------------
 .../iterators.common/ctor.sentinel.pass.cpp   |  63 +++++++++
 .../iterators.common/iter_move.pass.cpp       |  74 ++++++++---
 .../iterators.common/iter_swap.pass.cpp       | 121 +++++++++++++-----
 .../predef.iterators/iterators.common/types.h |  26 ----
 libcxx/test/support/test_iterators.h          |  32 -----
 12 files changed, 408 insertions(+), 236 deletions(-)
 create mode 100644 libcxx/test/std/iterators/predef.iterators/iterators.common/constraints.compile.pass.cpp
 create mode 100644 libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.converting.pass.cpp
 create mode 100644 libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.default.pass.cpp
 create mode 100644 libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.iter.pass.cpp
 delete mode 100644 libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.pass.cpp
 create mode 100644 libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.sentinel.pass.cpp

diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index cbc12d27a5614..9baefabe05da8 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -126,7 +126,7 @@
 `3571 <https://wg21.link/LWG3571>`__,"``flush_emit`` should set ``badbit`` if the ``emit`` call fails","October 2021","",""
 `3572 <https://wg21.link/LWG3572>`__,"``copyable-box`` should be fully ``constexpr``","October 2021","","","|ranges|"
 `3573 <https://wg21.link/LWG3573>`__,"Missing Throws element for ``basic_string_view(It begin, End end)``","October 2021","|Complete|","14.0"
-`3574 <https://wg21.link/LWG3574>`__,"``common_iterator`` should be completely ``constexpr``-able","October 2021","","","|ranges|"
+`3574 <https://wg21.link/LWG3574>`__,"``common_iterator`` should be completely ``constexpr``-able","October 2021","|Complete|","14.0","|ranges|"
 `3580 <https://wg21.link/LWG3580>`__,"``iota_view``'s ``iterator``'s binary ``operator+`` should be improved","October 2021","","","|ranges|"
 `3581 <https://wg21.link/LWG3581>`__,"The range constructor makes ``basic_string_view`` not trivially move constructible","October 2021","","","|ranges|"
 `3585 <https://wg21.link/LWG3585>`__,"``variant`` converting assignment with immovable alternative","October 2021","",""
@@ -135,7 +135,7 @@
 `3591 <https://wg21.link/LWG3591>`__,"``lazy_split_view<input_view>::inner-iterator::base() &&`` invalidates outer iterators","October 2021","","","|ranges|"
 `3592 <https://wg21.link/LWG3592>`__,"``lazy_split_view`` needs to check the simpleness of Pattern","October 2021","","","|ranges|"
 `3593 <https://wg21.link/LWG3593>`__,"Several iterators' ``base() const &`` and ``lazy_split_view::outer-iterator::value_type::end()`` missing ``noexcept``","October 2021","","","|ranges|"
-`3595 <https://wg21.link/LWG3595>`__,"Exposition-only classes proxy and postfix-proxy for ``common_iterator`` should be fully ``constexpr``","October 2021","","","|ranges|"
+`3595 <https://wg21.link/LWG3595>`__,"Exposition-only classes proxy and postfix-proxy for ``common_iterator`` should be fully ``constexpr``","October 2021","|Complete|","14.0","|ranges|"
 "","","","",""
 `3645 <https://wg21.link/LWG3645>`__,"``resize_and_overwrite`` is overspecified to call its callback with lvalues", "Not voted in","|Complete|","14.0",""
 "","","","",""
diff --git a/libcxx/include/__iterator/common_iterator.h b/libcxx/include/__iterator/common_iterator.h
index 9a142769e55a7..df4c7bd18e8d2 100644
--- a/libcxx/include/__iterator/common_iterator.h
+++ b/libcxx/include/__iterator/common_iterator.h
@@ -41,7 +41,7 @@ class common_iterator {
       : __value(_VSTD::move(__x)) {}
 
   public:
-    const iter_value_t<_Iter>* operator->() const {
+    constexpr const iter_value_t<_Iter>* operator->() const noexcept {
       return _VSTD::addressof(__value);
     }
   };
@@ -58,7 +58,7 @@ class common_iterator {
       constructible_from<iter_value_t<_Iter>, iter_reference_t<_Iter>> &&
       move_constructible<iter_value_t<_Iter>>;
 
-    const iter_value_t<_Iter>& operator*() const {
+    constexpr const iter_value_t<_Iter>& operator*() const noexcept {
       return __value;
     }
   };
@@ -75,7 +75,7 @@ class common_iterator {
     requires convertible_to<const _I2&, _Iter> && convertible_to<const _S2&, _Sent>
   constexpr common_iterator(const common_iterator<_I2, _S2>& __other)
     : __hold_([&]() -> variant<_Iter, _Sent> {
-      _LIBCPP_ASSERT(!__other.__hold_.valueless_by_exception(), "Constructed from valueless iterator.");
+      _LIBCPP_ASSERT(!__other.__hold_.valueless_by_exception(), "Attempted to construct from a valueless common_iterator");
       if (__other.__hold_.index() == 0)
         return variant<_Iter, _Sent>{in_place_index<0>, _VSTD::__unchecked_get<0>(__other.__hold_)};
       return variant<_Iter, _Sent>{in_place_index<1>, _VSTD::__unchecked_get<1>(__other.__hold_)};
@@ -85,7 +85,7 @@ class common_iterator {
     requires convertible_to<const _I2&, _Iter> && convertible_to<const _S2&, _Sent> &&
              assignable_from<_Iter&, const _I2&> && assignable_from<_Sent&, const _S2&>
   common_iterator& operator=(const common_iterator<_I2, _S2>& __other) {
-    _LIBCPP_ASSERT(!__other.__hold_.valueless_by_exception(), "Assigned from valueless iterator.");
+    _LIBCPP_ASSERT(!__other.__hold_.valueless_by_exception(), "Attempted to assign from a valueless common_iterator");
 
     auto __idx = __hold_.index();
     auto __other_idx = __other.__hold_.index();
@@ -105,18 +105,16 @@ class common_iterator {
     return *this;
   }
 
-  decltype(auto) operator*()
+  constexpr decltype(auto) operator*()
   {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_),
-                   "Cannot dereference sentinel. Common iterator not holding an iterator.");
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_), "Attempted to dereference a non-dereferenceable common_iterator");
     return *_VSTD::__unchecked_get<_Iter>(__hold_);
   }
 
-  decltype(auto) operator*() const
+  constexpr decltype(auto) operator*() const
     requires __dereferenceable<const _Iter>
   {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_),
-                   "Cannot dereference sentinel. Common iterator not holding an iterator.");
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_), "Attempted to dereference a non-dereferenceable common_iterator");
     return *_VSTD::__unchecked_get<_Iter>(__hold_);
   }
 
@@ -127,9 +125,7 @@ class common_iterator {
      is_reference_v<iter_reference_t<_I2>> ||
      constructible_from<iter_value_t<_I2>, iter_reference_t<_I2>>)
   {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_),
-                   "Cannot dereference sentinel. Common iterator not holding an iterator.");
-
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_), "Attempted to dereference a non-dereferenceable common_iterator");
     if constexpr (is_pointer_v<_Iter> || requires(const _Iter& __i) { __i.operator->(); })    {
       return _VSTD::__unchecked_get<_Iter>(__hold_);
     } else if constexpr (is_reference_v<iter_reference_t<_Iter>>) {
@@ -141,15 +137,12 @@ class common_iterator {
   }
 
   common_iterator& operator++() {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_),
-                   "Cannot increment sentinel. Common iterator not holding an iterator.");
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_), "Attempted to increment a non-dereferenceable common_iterator");
     ++_VSTD::__unchecked_get<_Iter>(__hold_); return *this;
   }
 
   decltype(auto) operator++(int) {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_),
-                   "Cannot increment sentinel. Common iterator not holding an iterator.");
-
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__hold_), "Attempted to increment a non-dereferenceable common_iterator");
     if constexpr (forward_iterator<_Iter>) {
       auto __tmp = *this;
       ++*this;
@@ -166,10 +159,9 @@ class common_iterator {
 
   template<class _I2, sentinel_for<_Iter> _S2>
     requires sentinel_for<_Sent, _I2>
-  friend bool operator==(const common_iterator& __x, const common_iterator<_I2, _S2>& __y) {
-    _LIBCPP_ASSERT(!__x.__hold_.valueless_by_exception() &&
-                   !__y.__hold_.valueless_by_exception(),
-                   "One or both common_iterators are valueless. (Cannot compare valueless iterators.)");
+  friend constexpr bool operator==(const common_iterator& __x, const common_iterator<_I2, _S2>& __y) {
+    _LIBCPP_ASSERT(!__x.__hold_.valueless_by_exception(), "Attempted to compare a valueless common_iterator");
+    _LIBCPP_ASSERT(!__y.__hold_.valueless_by_exception(), "Attempted to compare a valueless common_iterator");
 
     auto __x_index = __x.__hold_.index();
     auto __y_index = __y.__hold_.index();
@@ -185,10 +177,9 @@ class common_iterator {
 
   template<class _I2, sentinel_for<_Iter> _S2>
     requires sentinel_for<_Sent, _I2> && equality_comparable_with<_Iter, _I2>
-  friend bool operator==(const common_iterator& __x, const common_iterator<_I2, _S2>& __y) {
-    _LIBCPP_ASSERT(!__x.__hold_.valueless_by_exception() &&
-                   !__y.__hold_.valueless_by_exception(),
-                   "One or both common_iterators are valueless. (Cannot compare valueless iterators.)");
+  friend constexpr bool operator==(const common_iterator& __x, const common_iterator<_I2, _S2>& __y) {
+    _LIBCPP_ASSERT(!__x.__hold_.valueless_by_exception(), "Attempted to compare a valueless common_iterator");
+    _LIBCPP_ASSERT(!__y.__hold_.valueless_by_exception(), "Attempted to compare a valueless common_iterator");
 
     auto __x_index = __x.__hold_.index();
     auto __y_index = __y.__hold_.index();
@@ -207,10 +198,9 @@ class common_iterator {
 
   template<sized_sentinel_for<_Iter> _I2, sized_sentinel_for<_Iter> _S2>
     requires sized_sentinel_for<_Sent, _I2>
-  friend iter_difference_t<_I2> operator-(const common_iterator& __x, const common_iterator<_I2, _S2>& __y) {
-    _LIBCPP_ASSERT(!__x.__hold_.valueless_by_exception() &&
-                   !__y.__hold_.valueless_by_exception(),
-                   "One or both common_iterators are valueless. (Cannot subtract valueless iterators.)");
+  friend constexpr iter_difference_t<_I2> operator-(const common_iterator& __x, const common_iterator<_I2, _S2>& __y) {
+    _LIBCPP_ASSERT(!__x.__hold_.valueless_by_exception(), "Attempted to subtract from a valueless common_iterator");
+    _LIBCPP_ASSERT(!__y.__hold_.valueless_by_exception(), "Attempted to subtract a valueless common_iterator");
 
     auto __x_index = __x.__hold_.index();
     auto __y_index = __y.__hold_.index();
@@ -227,24 +217,21 @@ class common_iterator {
     return _VSTD::__unchecked_get<_Sent>(__x.__hold_) - _VSTD::__unchecked_get<_I2>(__y.__hold_);
   }
 
-  friend iter_rvalue_reference_t<_Iter> iter_move(const common_iterator& __i)
+  friend constexpr iter_rvalue_reference_t<_Iter> iter_move(const common_iterator& __i)
     noexcept(noexcept(ranges::iter_move(declval<const _Iter&>())))
       requires input_iterator<_Iter>
   {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__i.__hold_),
-                   "Cannot iter_move a sentinel. Common iterator not holding an iterator.");
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__i.__hold_), "Attempted to iter_move a non-dereferenceable common_iterator");
     return ranges::iter_move( _VSTD::__unchecked_get<_Iter>(__i.__hold_));
   }
 
   template<indirectly_swappable<_Iter> _I2, class _S2>
-  friend void iter_swap(const common_iterator& __x, const common_iterator<_I2, _S2>& __y)
+  friend constexpr void iter_swap(const common_iterator& __x, const common_iterator<_I2, _S2>& __y)
       noexcept(noexcept(ranges::iter_swap(declval<const _Iter&>(), declval<const _I2&>())))
   {
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__x.__hold_),
-                   "Cannot swap __y with a sentinel. Common iterator (__x) not holding an iterator.");
-    _LIBCPP_ASSERT(holds_alternative<_Iter>(__y.__hold_),
-                   "Cannot swap __x with a sentinel. Common iterator (__y) not holding an iterator.");
-    return ranges::iter_swap( _VSTD::__unchecked_get<_Iter>(__x.__hold_),  _VSTD::__unchecked_get<_Iter>(__y.__hold_));
+    _LIBCPP_ASSERT(holds_alternative<_Iter>(__x.__hold_), "Attempted to iter_swap a non-dereferenceable common_iterator");
+    _LIBCPP_ASSERT(holds_alternative<_I2>(__y.__hold_), "Attempted to iter_swap a non-dereferenceable common_iterator");
+    return ranges::iter_swap(_VSTD::__unchecked_get<_Iter>(__x.__hold_), _VSTD::__unchecked_get<_I2>(__y.__hold_));
   }
 };
 
@@ -271,7 +258,7 @@ struct __arrow_type_or_void {
 template<class _Iter, class _Sent>
   requires __common_iter_has_ptr_op<_Iter, _Sent>
 struct __arrow_type_or_void<_Iter, _Sent> {
-    using type = decltype(declval<const common_iterator<_Iter, _Sent>>().operator->());
+    using type = decltype(declval<const common_iterator<_Iter, _Sent>&>().operator->());
 };
 
 template<class _Iter, class _Sent>
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/constraints.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/constraints.compile.pass.cpp
new file mode 100644
index 0000000000000..3eb99e1428673
--- /dev/null
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/constraints.compile.pass.cpp
@@ -0,0 +1,28 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+
+// template<input_or_output_iterator I, sentinel_for<I> S>
+//   requires (!same_as<I, S> && copyable<I>)
+
+#include <iterator>
+
+#include "test_iterators.h"
+
+template<class I, class S>
+concept ValidCommonIterator = requires {
+  typename std::common_iterator<I, S>;
+};
+
+static_assert( ValidCommonIterator<int*, const int*>);
+static_assert(!ValidCommonIterator<int, int>); // !input_or_output_iterator<I>
+static_assert(!ValidCommonIterator<int*, float*>); // !sentinel_for<S, I>
+static_assert(!ValidCommonIterator<int*, int*>); // !same_as<I, S>
+static_assert(!ValidCommonIterator<cpp20_input_iterator<int*>, sentinel_wrapper<cpp20_input_iterator<int*>>>); // !copyable<I>
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.converting.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.converting.pass.cpp
new file mode 100644
index 0000000000000..b28120537b5a4
--- /dev/null
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.converting.pass.cpp
@@ -0,0 +1,48 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+
+// template<class I2, class S2>
+//   requires convertible_to<const I2&, I> && convertible_to<const S2&, S>
+//     constexpr common_iterator(const common_iterator<I2, S2>& x);
+
+#include <iterator>
+#include <cassert>
+
+#include "test_macros.h"
+
+constexpr bool test()
+{
+  struct Base {};
+  struct Derived : Base {};
+
+  using BaseIt = std::common_iterator<Base*, const Base*>;
+  using DerivedIt = std::common_iterator<Derived*, const Derived*>;
+  static_assert(std::is_convertible_v<DerivedIt, BaseIt>); // Derived* to Base*
+  static_assert(!std::is_constructible_v<DerivedIt, BaseIt>); // Base* to Derived*
+
+  Derived a[10] = {};
+  DerivedIt it = DerivedIt(a); // the iterator type
+  BaseIt jt = BaseIt(it);
+  assert(jt == BaseIt(a));
+
+  it = DerivedIt((const Derived*)a); // the sentinel type
+  jt = BaseIt(it);
+  assert(jt == BaseIt(a));
+
+  return true;
+}
+
+int main(int, char**) {
+  test();
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.default.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.default.pass.cpp
new file mode 100644
index 0000000000000..199ceb66893e6
--- /dev/null
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.default.pass.cpp
@@ -0,0 +1,41 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+
+// constexpr common_iterator() requires default_initializable<I> = default;
+
+#include <iterator>
+#include <cassert>
+
+#include "test_iterators.h"
+
+constexpr bool test()
+{
+  {
+    using It = cpp17_input_iterator<int*>;
+    using CommonIt = std::common_iterator<It, sentinel_wrapper<It>>;
+    static_assert(!std::is_default_constructible_v<It>); // premise
+    static_assert(!std::is_default_constructible_v<CommonIt>); // conclusion
+  }
+  {
+    // The base iterator is value-initialized.
+    std::common_iterator<int*, sentinel_wrapper<int*>> c;
+    assert(c == nullptr);
+  }
+
+  return true;
+}
+
+int main(int, char**) {
+  test();
+  static_assert(test());
+
+  return 0;
+}
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.iter.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.iter.pass.cpp
new file mode 100644
index 0000000000000..fd47612eca389
--- /dev/null
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.iter.pass.cpp
@@ -0,0 +1,50 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+
+// constexpr common_iterator(I i);
+
+#include <iterator>
+#include <cassert>
+
+#include "test_iterators.h"
+
+template<class It>
+constexpr bool test() {
+  using CommonIt = std::common_iterator<It, sentinel_wrapper<It>>;
+  int a[] = {1,2,3};
+  It it = It(a);
+  CommonIt lv = CommonIt(it);
+  assert(&*lv == a);
+  CommonIt rv = CommonIt(std::move(it));
+  assert(&*rv == a);
+
+  return true;
+}
+
+int main(int, char**) {
+  test<cpp17_input_iterator<int*>>();
+  test<forward_iterator<int*>>();
+  test<bidirectional_iterator<int*>>();
+  test<random_access_iterator<int*>>();
+  test<contiguous_iterator<int*>>();
+  test<int*>();
+  test<const int*>();
+
+  static_assert(test<cpp17_input_iterator<int*>>());
+  static_assert(test<forward_iterator<int*>>());
+  static_assert(test<bidirectional_iterator<int*>>());
+  static_assert(test<random_access_iterator<int*>>());
+  static_assert(test<contiguous_iterator<int*>>());
+  static_assert(test<int*>());
+  static_assert(test<const int*>());
+
+  return 0;
+}
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.pass.cpp
deleted file mode 100644
index c329e8b8a81a7..0000000000000
--- a/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.pass.cpp
+++ /dev/null
@@ -1,90 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// UNSUPPORTED: c++03, c++11, c++14, c++17
-// UNSUPPORTED: libcpp-no-concepts
-
-// constexpr common_iterator() requires default_initializable<I> = default;
-// constexpr common_iterator(I i);
-// constexpr common_iterator(S s);
-// template<class I2, class S2>
-//   requires convertible_to<const I2&, I> && convertible_to<const S2&, S>
-//     constexpr common_iterator(const common_iterator<I2, S2>& x);
-
-#include <iterator>
-#include <cassert>
-
-#include "test_macros.h"
-#include "types.h"
-
-template<class I, class S>
-concept ValidCommonIterator = requires {
-  typename std::common_iterator<I, S>;
-};
-
-template<class I, class I2>
-concept ConvCtorEnabled = requires(std::common_iterator<I2, sentinel_type<int*>> ci) {
-  std::common_iterator<I, sentinel_type<int*>>(ci);
-};
-
-void test() {
-  int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
-
-  static_assert( std::is_default_constructible_v<std::common_iterator<int*, sentinel_type<int*>>>);
-  static_assert(!std::is_default_constructible_v<std::common_iterator<non_default_constructible_iterator<int*>, sentinel_type<int*>>>);
-
-  // Not copyable:
-  static_assert(!ValidCommonIterator<cpp20_input_iterator<int*>, sentinel_type<int*>>);
-  // Same iter and sent:
-  static_assert(!ValidCommonIterator<cpp17_input_iterator<int*>, cpp17_input_iterator<int*>>);
-
-  {
-    auto iter1 = cpp17_input_iterator<int*>(buffer);
-    auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
-    auto commonSent1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(sentinel_type<int*>{buffer + 8});
-
-    assert(*iter1 == 1);
-    assert(*commonIter1 == 1);
-    assert(commonIter1 != commonSent1);
-  }
-  {
-    auto iter1 = forward_iterator<int*>(buffer);
-    auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
-    auto commonSent1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(sentinel_type<int*>{buffer + 8});
-
-    assert(*iter1 == 1);
-    assert(*commonIter1 == 1);
-    assert(commonIter1 != commonSent1);
-  }
-  {
-    auto iter1 = random_access_iterator<int*>(buffer);
-    auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
-    auto commonSent1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(sentinel_type<int*>{buffer + 8});
-
-    assert(*iter1 == 1);
-    assert(*commonIter1 == 1);
-    assert(commonIter1 != commonSent1);
-  }
-
-  // Conversion constructor:
-  {
-    convertible_iterator<int*> conv{buffer};
-    auto commonIter1 = std::common_iterator<convertible_iterator<int*>, sentinel_type<int*>>(conv);
-    auto commonIter2 = std::common_iterator<forward_iterator<int*>, sentinel_type<int*>>(commonIter1);
-    assert(*commonIter2 == 1);
-
-    static_assert( ConvCtorEnabled<forward_iterator<int*>, convertible_iterator<int*>>);
-    static_assert(!ConvCtorEnabled<forward_iterator<int*>, random_access_iterator<int*>>);
-  }
-}
-
-int main(int, char**) {
-  test();
-
-  return 0;
-}
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.sentinel.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.sentinel.pass.cpp
new file mode 100644
index 0000000000000..c6c0f301ead99
--- /dev/null
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/ctor.sentinel.pass.cpp
@@ -0,0 +1,63 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+
+// constexpr common_iterator(S s);
+
+#include <iterator>
+#include <cassert>
+#include <type_traits>
+
+#include "test_iterators.h"
+
+template<class It>
+constexpr bool test() {
+  using Sent = sentinel_wrapper<It>;
+  using CommonIt = std::common_iterator<It, Sent>;
+  int a[] = {1,2,3};
+  It it = It(a);
+  Sent sent = Sent(It(a+1));
+
+  CommonIt lv = CommonIt(sent);
+  assert(lv == CommonIt(sent));
+  assert(lv != CommonIt(it));
+  if (!std::is_constant_evaluated()) {
+    assert(lv == std::next(CommonIt(it)));
+  }
+
+  CommonIt rv = CommonIt(std::move(sent));
+  assert(rv == CommonIt(sent));
+  assert(rv != CommonIt(it));
+  if (!std::is_constant_evaluated()) {
+    assert(rv == std::next(CommonIt(it)));
+  }
+
+  return true;
+}
+
+int main(int, char**) {
+  test<cpp17_input_iterator<int*>>();
+  test<forward_iterator<int*>>();
+  test<bidirectional_iterator<int*>>();
+  test<random_access_iterator<int*>>();
+  test<contiguous_iterator<int*>>();
+  test<int*>();
+  test<const int*>();
+
+  static_assert(test<cpp17_input_iterator<int*>>());
+  static_assert(test<forward_iterator<int*>>());
+  static_assert(test<bidirectional_iterator<int*>>());
+  static_assert(test<random_access_iterator<int*>>());
+  static_assert(test<contiguous_iterator<int*>>());
+  static_assert(test<int*>());
+  static_assert(test<const int*>());
+
+  return 0;
+}
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_move.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_move.pass.cpp
index 0bb4d455c34d3..4c8ae2dae729d 100644
--- a/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_move.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_move.pass.cpp
@@ -15,35 +15,79 @@
 
 #include <iterator>
 #include <cassert>
+#include <type_traits>
 
+#include "test_iterators.h"
 #include "test_macros.h"
-#include "types.h"
 
-void test() {
-  int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
+struct IterMovingIt {
+  using value_type = int;
+  using difference_type = int;
+  explicit IterMovingIt() = default;
+  IterMovingIt(const IterMovingIt&); // copyable, but this test shouldn't make copies
+  IterMovingIt(IterMovingIt&&) = default;
+  IterMovingIt& operator=(const IterMovingIt&);
+  int& operator*() const;
+  constexpr IterMovingIt& operator++() { return *this; }
+  IterMovingIt operator++(int);
+  friend constexpr int iter_move(const IterMovingIt&) {
+    return 42;
+  }
+  bool operator==(std::default_sentinel_t) const;
+};
+static_assert(std::input_iterator<IterMovingIt>);
 
+constexpr bool test() {
   {
-    auto iter1 = cpp17_input_iterator<int*>(buffer);
-    auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
-    assert(std::ranges::iter_move(commonIter1) == 1);
-    ASSERT_SAME_TYPE(decltype(std::ranges::iter_move(commonIter1)), int&&);
+    using It = int*;
+    using CommonIt = std::common_iterator<It, sentinel_wrapper<It>>;
+    int a[] = {1, 2, 3};
+    CommonIt it = CommonIt(It(a));
+    ASSERT_NOEXCEPT(iter_move(it));
+    ASSERT_NOEXCEPT(std::ranges::iter_move(it));
+    ASSERT_SAME_TYPE(decltype(iter_move(it)), int&&);
+    ASSERT_SAME_TYPE(decltype(std::ranges::iter_move(it)), int&&);
+    assert(iter_move(it) == 1);
+    if (!std::is_constant_evaluated()) {
+      ++it;
+      assert(iter_move(it) == 2);
+    }
   }
   {
-    auto iter1 = forward_iterator<int*>(buffer);
-    auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
-    assert(std::ranges::iter_move(commonIter1) == 1);
-    ASSERT_SAME_TYPE(decltype(std::ranges::iter_move(commonIter1)), int&&);
+    using It = const int*;
+    using CommonIt = std::common_iterator<It, sentinel_wrapper<It>>;
+    int a[] = {1, 2, 3};
+    CommonIt it = CommonIt(It(a));
+    ASSERT_NOEXCEPT(iter_move(it));
+    ASSERT_NOEXCEPT(std::ranges::iter_move(it));
+    ASSERT_SAME_TYPE(decltype(iter_move(it)), const int&&);
+    ASSERT_SAME_TYPE(decltype(std::ranges::iter_move(it)), const int&&);
+    assert(iter_move(it) == 1);
+    if (!std::is_constant_evaluated()) {
+      ++it;
+      assert(iter_move(it) == 2);
+    }
   }
   {
-    auto iter1 = random_access_iterator<int*>(buffer);
-    auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
-    assert(std::ranges::iter_move(commonIter1) == 1);
-    ASSERT_SAME_TYPE(decltype(std::ranges::iter_move(commonIter1)), int&&);
+    using It = IterMovingIt;
+    using CommonIt = std::common_iterator<It, std::default_sentinel_t>;
+    CommonIt it = CommonIt(It());
+    ASSERT_NOT_NOEXCEPT(iter_move(it));
+    ASSERT_NOT_NOEXCEPT(std::ranges::iter_move(it));
+    ASSERT_SAME_TYPE(decltype(iter_move(it)), int);
+    ASSERT_SAME_TYPE(decltype(std::ranges::iter_move(it)), int);
+    assert(iter_move(it) == 42);
+    if (!std::is_constant_evaluated()) {
+      ++it;
+      assert(iter_move(it) == 42);
+    }
   }
+  return true;
 }
 
 int main(int, char**) {
   test();
+  static_assert(test());
 
   return 0;
 }
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_swap.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_swap.pass.cpp
index f649d334b9cb6..3b69a6d7bd4b5 100644
--- a/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_swap.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/iter_swap.pass.cpp
@@ -10,55 +10,114 @@
 // UNSUPPORTED: libcpp-no-concepts
 
 // template<indirectly_swappable<I> I2, class S2>
-//   friend void iter_swap(const common_iterator& x, const common_iterator<I2, S2>& y)
+//   friend constexpr void iter_swap(const common_iterator& x, const common_iterator<I2, S2>& y)
 //     noexcept(noexcept(ranges::iter_swap(declval<const I&>(), declval<const I2&>())));
 
 #include <iterator>
 #include <cassert>
+#include <type_traits>
 
+#include "test_iterators.h"
 #include "test_macros.h"
-#include "types.h"
 
-void test() {
-  int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
+template<int K>
+struct IterSwappingIt {
+  using value_type = int;
+  using difference_type = int;
+  constexpr explicit IterSwappingIt(int *swaps) : swaps_(swaps) {}
+  IterSwappingIt(const IterSwappingIt&); // copyable, but this test shouldn't make copies
+  IterSwappingIt(IterSwappingIt&&) = default;
+  IterSwappingIt& operator=(const IterSwappingIt&);
+  int& operator*() const;
+  constexpr IterSwappingIt& operator++() { return *this; }
+  IterSwappingIt operator++(int);
 
+  template<int L>
+  friend constexpr int iter_swap(const IterSwappingIt<K>& lhs, const IterSwappingIt<L>& rhs) {
+    *lhs.swaps_ += 10;
+    *rhs.swaps_ += 1;
+    return 42; // should be accepted but ignored
+  }
+
+  bool operator==(std::default_sentinel_t) const;
+
+  int *swaps_ = nullptr;
+};
+static_assert(std::input_iterator<IterSwappingIt<0>>);
+static_assert(std::indirectly_swappable<IterSwappingIt<0>, IterSwappingIt<0>>);
+static_assert(std::indirectly_swappable<IterSwappingIt<0>, IterSwappingIt<1>>);
+
+constexpr bool test() {
   {
-    auto iter1 = cpp17_input_iterator<int*>(buffer);
-    auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
-    auto commonIter2 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
-    for (auto i = 0; i < 4; ++i) ++commonIter2;
-    assert(*commonIter2 == 5);
-    std::ranges::iter_swap(commonIter1, commonIter2);
-    assert(*commonIter1 == 5);
-    assert(*commonIter2 == 1);
-    std::ranges::iter_swap(commonIter2, commonIter1);
+    using It = int*;
+    using CommonIt = std::common_iterator<It, sentinel_wrapper<It>>;
+    static_assert(std::indirectly_swappable<CommonIt, CommonIt>);
+
+    int a[] = {1, 2, 3};
+    CommonIt it = CommonIt(It(a));
+    CommonIt jt = CommonIt(It(a+1));
+    ASSERT_NOEXCEPT(iter_swap(it, jt));
+    ASSERT_SAME_TYPE(decltype(iter_swap(it, jt)), void);
+    iter_swap(it, jt);
+    assert(a[0] == 2);
+    assert(a[1] == 1);
   }
   {
-    auto iter1 = forward_iterator<int*>(buffer);
-    auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
-    auto commonIter2 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
-    for (auto i = 0; i < 4; ++i) ++commonIter2;
-    assert(*commonIter2 == 5);
-    std::ranges::iter_swap(commonIter1, commonIter2);
-    assert(*commonIter1 == 5);
-    assert(*commonIter2 == 1);
-    std::ranges::iter_swap(commonIter2, commonIter1);
+    using It = const int*;
+    using CommonIt = std::common_iterator<It, sentinel_wrapper<It>>;
+    static_assert(!std::indirectly_swappable<CommonIt, CommonIt>);
   }
   {
-    auto iter1 = random_access_iterator<int*>(buffer);
-    auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
-    auto commonIter2 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
-    for (auto i = 0; i < 4; ++i) ++commonIter2;
-    assert(*commonIter2 == 5);
-    std::ranges::iter_swap(commonIter1, commonIter2);
-    assert(*commonIter1 == 5);
-    assert(*commonIter2 == 1);
-    std::ranges::iter_swap(commonIter2, commonIter1);
+    using It = IterSwappingIt<0>;
+    using CommonIt = std::common_iterator<It, std::default_sentinel_t>;
+    static_assert(std::indirectly_swappable<CommonIt, CommonIt>);
+
+    int iswaps = 100;
+    int jswaps = 100;
+    CommonIt it = CommonIt(It(&iswaps));
+    CommonIt jt = CommonIt(It(&jswaps));
+    ASSERT_NOT_NOEXCEPT(iter_swap(it, jt));
+    ASSERT_SAME_TYPE(decltype(iter_swap(it, jt)), void);
+    iter_swap(it, jt); // lvalue iterators
+    assert(iswaps == 110);
+    assert(jswaps == 101);
+    iter_swap(CommonIt(It(&iswaps)), CommonIt(It(&jswaps))); // rvalue iterators
+    assert(iswaps == 120);
+    assert(jswaps == 102);
+    std::ranges::iter_swap(it, jt);
+    assert(iswaps == 130);
+    assert(jswaps == 103);
+  }
+  {
+    using It = IterSwappingIt<0>;
+    using Jt = IterSwappingIt<1>;
+    static_assert(std::indirectly_swappable<It, Jt>);
+    using CommonIt = std::common_iterator<It, std::default_sentinel_t>;
+    using CommonJt = std::common_iterator<Jt, std::default_sentinel_t>;
+    static_assert(std::indirectly_swappable<CommonIt, CommonJt>);
+
+    int iswaps = 100;
+    int jswaps = 100;
+    CommonIt it = CommonIt(It(&iswaps));
+    CommonJt jt = CommonJt(Jt(&jswaps));
+    ASSERT_NOT_NOEXCEPT(iter_swap(it, jt));
+    ASSERT_SAME_TYPE(decltype(iter_swap(it, jt)), void);
+    iter_swap(it, jt); // lvalue iterators
+    assert(iswaps == 110);
+    assert(jswaps == 101);
+    iter_swap(CommonIt(It(&iswaps)), CommonJt(Jt(&jswaps))); // rvalue iterators
+    assert(iswaps == 120);
+    assert(jswaps == 102);
+    std::ranges::iter_swap(it, jt);
+    assert(iswaps == 130);
+    assert(jswaps == 103);
   }
+  return true;
 }
 
 int main(int, char**) {
   test();
+  static_assert(test());
 
   return 0;
 }
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/types.h b/libcxx/test/std/iterators/predef.iterators/iterators.common/types.h
index d5068b3cf013f..03b94f63f631e 100644
--- a/libcxx/test/std/iterators/predef.iterators/iterators.common/types.h
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/types.h
@@ -157,32 +157,6 @@ class comparable_iterator
     }
 };
 
-template <class It>
-class convertible_iterator
-{
-    It it_;
-
-public:
-    typedef          std::input_iterator_tag                   iterator_category;
-    typedef typename std::iterator_traits<It>::value_type      value_type;
-    typedef typename std::iterator_traits<It>::difference_type difference_type;
-    typedef It                                                 pointer;
-    typedef typename std::iterator_traits<It>::reference       reference;
-
-    constexpr It base() const {return it_;}
-
-    convertible_iterator() = default;
-    explicit constexpr convertible_iterator(It it) : it_(it) {}
-
-    constexpr reference operator*() const {return *it_;}
-
-    constexpr convertible_iterator& operator++() {++it_; return *this;}
-    constexpr convertible_iterator operator++(int)
-        {convertible_iterator tmp(*this); ++(*this); return tmp;}
-
-    operator forward_iterator<It>() const { return forward_iterator<It>(it_); }
-};
-
 template <class It>
 class non_const_deref_iterator
 {
diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h
index 5e084d3222c68..60313a5889a52 100644
--- a/libcxx/test/support/test_iterators.h
+++ b/libcxx/test/support/test_iterators.h
@@ -117,38 +117,6 @@ class forward_iterator
     void operator,(T const &) = delete;
 };
 
-template <class It>
-class non_default_constructible_iterator
-{
-    It it_;
-
-    template <class U> friend class non_default_constructible_iterator;
-public:
-    typedef          std::input_iterator_tag                   iterator_category;
-    typedef typename std::iterator_traits<It>::value_type      value_type;
-    typedef typename std::iterator_traits<It>::difference_type difference_type;
-    typedef It                                                 pointer;
-    typedef typename std::iterator_traits<It>::reference       reference;
-
-    non_default_constructible_iterator() = delete;
-
-    TEST_CONSTEXPR explicit non_default_constructible_iterator(It it) : it_(it) {}
-    template <class U>
-        TEST_CONSTEXPR non_default_constructible_iterator(const non_default_constructible_iterator<U>& u) : it_(u.it_) {}
-
-    TEST_CONSTEXPR reference operator*() const {return *it_;}
-    TEST_CONSTEXPR pointer operator->() const {return it_;}
-
-    TEST_CONSTEXPR_CXX14 non_default_constructible_iterator& operator++() {++it_; return *this;}
-    TEST_CONSTEXPR_CXX14 non_default_constructible_iterator operator++(int) {return non_default_constructible_iterator(it_++);}
-
-    friend TEST_CONSTEXPR bool operator==(const non_default_constructible_iterator& x, const non_default_constructible_iterator& y) {return x.it_ == y.it_;}
-    friend TEST_CONSTEXPR bool operator!=(const non_default_constructible_iterator& x, const non_default_constructible_iterator& y) {return x.it_ != y.it_;}
-
-    template <class T>
-    void operator,(T const &) = delete;
-};
-
 template <class It>
 class bidirectional_iterator
 {

From 4684857abfd7cadde9693eed8cfd21446047c579 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Sat, 18 Dec 2021 15:03:26 +0100
Subject: [PATCH 433/946] [libc++][format] Finish P0645 Text Formatting.

This adjust the version macro and sets it as completed. All parts of the paper
have been implemented, except for the parts replaced by later papers and
LWG-issues.

Adjusted the synopsis to match the synopsis in the Standard. Not yet
implemented parts of P2216 and P2418 still use the P0645 wording.

Completes:
- P0645 Text Formatting

Depends on D115991

Reviewed By: ldionne, #libc

Differential Revision: https://reviews.llvm.org/D115999
---
 libcxx/docs/ReleaseNotes.rst        |  11 +-
 libcxx/docs/Status/Cxx20.rst        |   1 +
 libcxx/docs/Status/Cxx20Papers.csv  |   2 +-
 libcxx/docs/Status/FormatIssues.csv |   2 +-
 libcxx/include/format               | 158 ++--------------------------
 5 files changed, 18 insertions(+), 156 deletions(-)

diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst
index e1be3a6c114c1..b35c8a3329515 100644
--- a/libcxx/docs/ReleaseNotes.rst
+++ b/libcxx/docs/ReleaseNotes.rst
@@ -38,10 +38,13 @@ What's New in Libc++ 14.0.0?
 New Features
 ------------
 
-- There's initial support for the C++20 header ``<format>``. The implementation
-  is incomplete. Some functions are known to be inefficient; both in memory
-  usage and performance. The implementation is considered experimental and isn't
-  considered ABI stable.
+- There's support for the C++20 header ``<format>``. Some parts are still
+  missing, most notably the compile-time format string validation. Some
+  functions are known to be inefficient, both in memory usage and performance.
+  The implementation isn't API- or ABI-stable and therefore considered
+  experimental. (Some not-yet-implemented papers require an API-break.)
+  Vendors can still disable this header by turning the CMake option
+  `LIBCXX_ENABLE_INCOMPLETE_FEATURES` off.
 
 - There's a new CMake option ``LIBCXX_ENABLE_UNICODE`` to disable Unicode
   support in the ``<format>`` header. This only affects the estimation of the
diff --git a/libcxx/docs/Status/Cxx20.rst b/libcxx/docs/Status/Cxx20.rst
index d59fd8a8b35f7..43744077638ff 100644
--- a/libcxx/docs/Status/Cxx20.rst
+++ b/libcxx/docs/Status/Cxx20.rst
@@ -41,6 +41,7 @@ Paper Status
 .. note::
 
    .. [#note-P0600] P0600: The missing bits in P0600 are in |sect|\ [mem.res.class] and |sect|\ [mem.poly.allocator.class].
+   .. [#note-P0645] P0645: The paper is implemented but still marked as an incomplete feature. Not yet implemented LWG-issues will cause API and ABI breakage.
    .. [#note-P0966] P0966: It was previously erroneously marked as complete in version 8.0. See `bug 45368 <https://llvm.org/PR45368>`__.
    .. [#note-P0619] P0619: Only sections D.8, D.9, D.10 and D.13 are implemented. Sections D.4, D.7, D.11, D.12, and D.14 remain undone.
    .. [#note-P0883] P0883: shared_ptr and floating-point changes weren't applied as they themselves aren't implemented yet.
diff --git a/libcxx/docs/Status/Cxx20Papers.csv b/libcxx/docs/Status/Cxx20Papers.csv
index 5a974a798f7bd..9e4e7284109be 100644
--- a/libcxx/docs/Status/Cxx20Papers.csv
+++ b/libcxx/docs/Status/Cxx20Papers.csv
@@ -103,7 +103,7 @@
 "`P0466R5 <https://wg21.link/P0466R5>`__","LWG","Layout-compatibility and Pointer-interconvertibility Traits","Cologne","",""
 "`P0553R4 <https://wg21.link/P0553R4>`__","LWG","Bit operations","Cologne","|Complete|","9.0"
 "`P0631R8 <https://wg21.link/P0631R8>`__","LWG","Math Constants","Cologne","|Complete|","11.0"
-"`P0645R10 <https://wg21.link/P0645R10>`__","LWG","Text Formatting","Cologne","|In Progress|",""
+"`P0645R10 <https://wg21.link/P0645R10>`__","LWG","Text Formatting","Cologne","|Complete| [#note-P0645]_","14.0"
 "`P0660R10 <https://wg21.link/P0660R10>`__","LWG","Stop Token and Joining Thread, Rev 10","Cologne","",""
 "`P0784R7 <https://wg21.link/P0784R7>`__","CWG","More constexpr containers","Cologne","|Complete|","12.0"
 "`P0980R1 <https://wg21.link/P0980R1>`__","LWG","Making std::string constexpr","Cologne","",""
diff --git a/libcxx/docs/Status/FormatIssues.csv b/libcxx/docs/Status/FormatIssues.csv
index 0984712fdb22a..b979b0de3a18d 100644
--- a/libcxx/docs/Status/FormatIssues.csv
+++ b/libcxx/docs/Status/FormatIssues.csv
@@ -1,5 +1,5 @@
 Number,Name,Assignee,Patch,Status,First released version
-`P0645 <https://wg21.link/P0645>`_,"Text Formatting",Mark de Wever,,|Partial|,
+`P0645 <https://wg21.link/P0645>`_,"Text Formatting",Mark de Wever,,|Complete|,Clang 14
 `P1652 <https://wg21.link/P1652>`_,"Printf corner cases in std::format",Mark de Wever,"`D103433 <https://reviews.llvm.org/D103433>`__, `D114001 <https://reviews.llvm.org/D114001>`__",|Review|,
 `P1892 <https://wg21.link/P1892>`_,"Extended locale-specific presentation specifiers for std::format",Mark de Wever,`D103368 <https://reviews.llvm.org/D103368>`__,|Complete|,Clang 14
 `P1868 <https://wg21.link/P1868>`_,"width: clarifying units of width and precision in std::format (Implements the unicode support.)",Mark de Wever,"`D103413 <https://reviews.llvm.org/D103413>`__ `D103425 <https://reviews.llvm.org/D103425>`__ `D103670 <https://reviews.llvm.org/D103670>`__",|Complete|,Clang 14
diff --git a/libcxx/include/format b/libcxx/include/format
index c1f1be7d31b98..4cf146ead17a2 100644
--- a/libcxx/include/format
+++ b/libcxx/include/format
@@ -14,43 +14,15 @@
 
 namespace std {
   // [format.context], class template basic_format_context
-  template<class Out, class charT>
-  class basic_format_context {
-    basic_format_args<basic_format_context> args_;      // exposition only
-    Out out_;                                           // exposition only
-
-  public:
-    using iterator = Out;
-    using char_type = charT;
-    template<class T> using formatter_type = formatter<T, charT>;
-
-    basic_format_arg<basic_format_context> arg(size_t id) const;
-    std::locale locale();
-
-    iterator out();
-    void advance_to(iterator it);
-  };
+  template<class Out, class charT> class basic_format_context;
   using format_context = basic_format_context<unspecified, char>;
   using wformat_context = basic_format_context<unspecified, wchar_t>;
 
   // [format.args], class template basic_format_args
-  template<class Context>
-  class basic_format_args {
-    size_t size_;                               // exposition only
-    const basic_format_arg<Context>* data_;     // exposition only
-
-  public:
-    basic_format_args() noexcept;
-
-    template<class... Args>
-      basic_format_args(const format-arg-store<Context, Args...>& store) noexcept;
-
-    basic_format_arg<Context> get(size_t i) const noexcept;
-  };
+  template<class Context> class basic_format_args;
   using format_args = basic_format_args<format_context>;
   using wformat_args = basic_format_args<wformat_context>;
 
-
   // [format.functions], formatting functions
   template<class... Args>
     string format(string_view fmt, const Args&... args);
@@ -90,8 +62,7 @@ namespace std {
     Out out;
     iter_difference_t<Out> size;
   };
-
- template<class Out, class... Args>
+  template<class Out, class... Args>
     format_to_n_result<Out> format_to_n(Out out, iter_difference_t<Out> n,
                                         string_view fmt, const Args&... args);
   template<class Out, class... Args>
@@ -116,99 +87,22 @@ namespace std {
     size_t formatted_size(const locale& loc, wstring_view fmt, const Args&... args);
 
   // [format.formatter], formatter
-  template<> struct formatter<char, char>;
-  template<> struct formatter<char, wchar_t>;
-  template<> struct formatter<wchar_t, wchar_t>;
-
-  template<> struct formatter<charT*, charT>;
-  template<> struct formatter<const charT*, charT>;
-  template<size_t N> struct formatter<const charT[N], charT>;
-  template<class traits, class Allocator>
-    struct formatter<basic_string<charT, traits, Allocator>, charT>;
-  template<class traits>
-    struct formatter<basic_string_view<charT, traits>, charT>;
+  template<class T, class charT = char> struct formatter;
 
   // [format.parse.ctx], class template basic_format_parse_context
-  template<class charT>
-  class basic_format_parse_context {
-  public:
-    using char_type = charT;
-    using const_iterator = typename basic_string_view<charT>::const_iterator;
-    using iterator = const_iterator;
-
-  private:
-    iterator begin_;                                    // exposition only
-    iterator end_;                                      // exposition only
-    enum indexing { unknown, manual, automatic };       // exposition only
-    indexing indexing_;                                 // exposition only
-    size_t next_arg_id_;                                // exposition only
-    size_t num_args_;                                   // exposition only
-
-  public:
-    constexpr explicit basic_format_parse_context(basic_string_view<charT> fmt,
-                                                  size_t num_args = 0) noexcept;
-    basic_format_parse_context(const basic_format_parse_context&) = delete;
-    basic_format_parse_context& operator=(const basic_format_parse_context&) = delete;
-
-    constexpr const_iterator begin() const noexcept;
-    constexpr const_iterator end() const noexcept;
-    constexpr void advance_to(const_iterator it);
-
-    constexpr size_t next_arg_id();
-    constexpr void check_arg_id(size_t id);
-  };
+  template<class charT> class basic_format_parse_context;
   using format_parse_context = basic_format_parse_context<char>;
   using wformat_parse_context = basic_format_parse_context<wchar_t>;
 
   // [format.arguments], arguments
   // [format.arg], class template basic_format_arg
-  template<class Context>
-  class basic_format_arg {
-  public:
-    class handle;
-
-  private:
-    using char_type = typename Context::char_type;                              // exposition only
-
-    variant<monostate, bool, char_type,
-            int, unsigned int, long long int, unsigned long long int,
-            float, double, long double,
-            const char_type*, basic_string_view<char_type>,
-            const void*, handle> value;                                         // exposition only
-
-    template<class T> explicit basic_format_arg(const T& v) noexcept;           // exposition only
-    explicit basic_format_arg(float n) noexcept;                                // exposition only
-    explicit basic_format_arg(double n) noexcept;                               // exposition only
-    explicit basic_format_arg(long double n) noexcept;                          // exposition only
-    explicit basic_format_arg(const char_type* s);                              // exposition only
-
-    template<class traits>
-      explicit basic_format_arg(
-        basic_string_view<char_type, traits> s) noexcept;                       // exposition only
-
-    template<class traits, class Allocator>
-      explicit basic_format_arg(
-        const basic_string<char_type, traits, Allocator>& s) noexcept;          // exposition only
-
-    explicit basic_format_arg(nullptr_t) noexcept;                              // exposition only
-
-    template<class T>
-      explicit basic_format_arg(const T* p) noexcept;                           // exposition only
-
-  public:
-    basic_format_arg() noexcept;
-
-    explicit operator bool() const noexcept;
-  };
+  template<class Context> class basic_format_arg;
 
   template<class Visitor, class Context>
     see below visit_format_arg(Visitor&& vis, basic_format_arg<Context> arg);
 
   // [format.arg.store], class template format-arg-store
-  template<class Context, class... Args>
-  struct format-arg-store {      // exposition only
-    array<basic_format_arg<Context>, sizeof...(Args)> args;
-  };
+  template<class Context, class... Args> struct format-arg-store;      // exposition only
 
   template<class Context = format_context, class... Args>
     format-arg-store<Context, Args...>
@@ -218,43 +112,7 @@ namespace std {
       make_wformat_args(const Args&... args);
 
   // [format.error], class format_error
-  class format_error : public runtime_error {
-  public:
-    explicit format_error(const string& what_arg);
-    explicit format_error(const char* what_arg);
-  };
-
-  // [format.parse.ctx], class template basic_format_parse_context
-  template<class charT>
-  class basic_format_parse_context {
-  public:
-    using char_type = charT;
-    using const_iterator = typename basic_string_view<charT>::const_iterator;
-    using iterator = const_iterator;
-
-  private:
-    iterator begin_;                                    // exposition only
-    iterator end_;                                      // exposition only
-    enum indexing { unknown, manual, automatic };       // exposition only
-    indexing indexing_;                                 // exposition only
-    size_t next_arg_id_;                                // exposition only
-    size_t num_args_;                                   // exposition only
-
-  public:
-    constexpr explicit basic_format_parse_context(basic_string_view<charT> fmt,
-                                                  size_t num_args = 0) noexcept;
-    basic_format_parse_context(const basic_format_parse_context&) = delete;
-    basic_format_parse_context& operator=(const basic_format_parse_context&) = delete;
-
-    constexpr const_iterator begin() const noexcept;
-    constexpr const_iterator end() const noexcept;
-    constexpr void advance_to(const_iterator it);
-
-    constexpr size_t next_arg_id();
-    constexpr void check_arg_id(size_t id);
-  };
-  using format_parse_context = basic_format_parse_context<char>;
-  using wformat_parse_context = basic_format_parse_context<wchar_t>;
+  class format_error;
 }
 
 */

From b1af01fe6aa7cb733461b2d475add77b947b79fb Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Mon, 24 Jan 2022 11:18:02 -0800
Subject: [PATCH 434/946] [NFC][MLGO] Simplify conditional compilation

Most of the code that's shared between 'release' and 'development'
modes doesn't depend on anything special.
---
 llvm/lib/Analysis/MLInlineAdvisor.cpp       |  9 +--------
 llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp | 22 +++++++++------------
 2 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Analysis/MLInlineAdvisor.cpp b/llvm/lib/Analysis/MLInlineAdvisor.cpp
index 203e0b025e6c6..0480c1cd28428 100644
--- a/llvm/lib/Analysis/MLInlineAdvisor.cpp
+++ b/llvm/lib/Analysis/MLInlineAdvisor.cpp
@@ -36,11 +36,7 @@
 
 using namespace llvm;
 
-#ifdef LLVM_HAVE_TF_AOT_INLINERSIZEMODEL
-#define LLVM_HAVE_TF_AOT
-#endif
-
-#if defined(LLVM_HAVE_TF_AOT)
+#if defined(LLVM_HAVE_TF_AOT_INLINERSIZEMODEL)
 // codegen-ed file
 #include "InlinerSizeModel.h" // NOLINT
 
@@ -55,8 +51,6 @@ llvm::getReleaseModeAdvisor(Module &M, ModuleAnalysisManager &MAM) {
 
 #define DEBUG_TYPE "inline-ml"
 
-#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
-
 static cl::opt<float> SizeIncreaseThreshold(
     "ml-advisor-size-increase-threshold", cl::Hidden,
     cl::desc("Maximum factor by which expected native size may increase before "
@@ -417,4 +411,3 @@ void MLInlineAdvice::recordUnattemptedInliningImpl() {
     return R;
   });
 }
-#endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
diff --git a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
index 848f63da288de..a74c57690640c 100644
--- a/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
+++ b/llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp
@@ -42,11 +42,9 @@
 using namespace llvm;
 
 #define DEBUG_TYPE "ml-regalloc"
-#ifdef LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL
-#define LLVM_HAVE_TF_AOT
-#endif
+
 // Generated header in release (AOT) mode
-#if defined LLVM_HAVE_TF_AOT
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
 #include "RegallocEvictModel.h"
 #endif
 
@@ -104,7 +102,6 @@ INITIALIZE_PASS(RegAllocScoring, "regallocscoringpass",
 // ===================================
 // Common ML Advisor declarations
 // ===================================
-#if defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
 namespace {
 // This is the maximum number of interfererring ranges. That's the number of
 // distinct AllocationOrder values, which comes from MCRegisterClass::RegsSize.
@@ -193,7 +190,9 @@ static const std::vector<int64_t> PerLiveRangeShape{1, NumberOfInterferences};
 // of the output tensor.
 // The contract with the model is that the output will be guaranteed to be to a
 // mask == 1 position.
-const char *const DecisionName = "index_to_evict";
+// Using a macro here to avoid 'not used' warnings (and keep cond compilation to
+// a minimum)
+#define DecisionName "index_to_evict"
 
 // Named features index.
 enum FeatureIDs {
@@ -296,13 +295,12 @@ class MLEvictAdvisor : public RegAllocEvictionAdvisor {
 // ===================================
 // Release (AOT) - specifics
 // ===================================
-#ifdef LLVM_HAVE_TF_AOT
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
 const std::array<std::string, FeatureIDs::FeatureCount> FeatureNames{
 #define _GETNAME(_, NAME, __, ___) #NAME,
     RA_EVICT_FEATURES_LIST(_GETNAME)
 #undef _GETNAME
 };
-
 class ReleaseModeEvictionAdvisorAnalysis final
     : public RegAllocEvictionAdvisorAnalysis {
 public:
@@ -331,7 +329,7 @@ class ReleaseModeEvictionAdvisorAnalysis final
   }
   std::unique_ptr<ReleaseModeModelRunner<RegallocEvictModel>> Runner;
 };
-#endif // LLVM_HAVE_TF_AOT
+#endif
 
 // ===================================
 // Development mode-specifics
@@ -852,13 +850,11 @@ bool RegAllocScoring::runOnMachineFunction(MachineFunction &MF) {
 }
 #endif // #ifdef LLVM_HAVE_TF_API
 
-// Release mode specific implementations
-#if defined LLVM_HAVE_TF_AOT
+#if defined(LLVM_HAVE_TF_AOT_REGALLOCEVICTMODEL)
 RegAllocEvictionAdvisorAnalysis *llvm::createReleaseModeAdvisor() {
   return new ReleaseModeEvictionAdvisorAnalysis();
 }
-#endif // defined(LLVM_HAVE_TF_AOT)
-#endif // defined(LLVM_HAVE_TF_AOT) || defined(LLVM_HAVE_TF_API)
+#endif
 
 // In all cases except development mode, we don't need scoring.
 #if !defined(LLVM_HAVE_TF_API)

From c27f8fb96882ee8c684d03068836cf610d4f0640 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Fri, 21 Jan 2022 09:55:34 -0800
Subject: [PATCH 435/946] [AMDGPU] Remove cndmask from readsExecAsData

Differential Revision: https://reviews.llvm.org/D117909
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp |   5 -
 llvm/test/CodeGen/AMDGPU/licm-valu.mir | 144 -------------------------
 2 files changed, 149 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 3ac04ee717dea..f89f109d0d3a2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -138,11 +138,6 @@ static bool readsExecAsData(const MachineInstr &MI) {
   default:
     break;
   case AMDGPU::V_READFIRSTLANE_B32:
-  case AMDGPU::V_CNDMASK_B64_PSEUDO:
-  case AMDGPU::V_CNDMASK_B32_dpp:
-  case AMDGPU::V_CNDMASK_B32_e32:
-  case AMDGPU::V_CNDMASK_B32_e64:
-  case AMDGPU::V_CNDMASK_B32_sdwa:
     return true;
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/licm-valu.mir b/llvm/test/CodeGen/AMDGPU/licm-valu.mir
index 45a050cffde9c..0bf2c7c2bc3ba 100644
--- a/llvm/test/CodeGen/AMDGPU/licm-valu.mir
+++ b/llvm/test/CodeGen/AMDGPU/licm-valu.mir
@@ -99,147 +99,3 @@ body:             |
   bb.2:
     S_ENDPGM 0
 ...
----
-name: no_hoist_cndmask_e64
-tracksRegLiveness: true
-body:             |
-  ; GCN-LABEL: name: no_hoist_cndmask_e64
-  ; GCN: bb.0:
-  ; GCN-NEXT:   successors: %bb.1(0x80000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
-  ; GCN-NEXT:   S_BRANCH %bb.1
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.1:
-  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[DEF]], 0, [[DEF]], [[DEF1]], implicit $exec
-  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
-  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
-  ; GCN-NEXT:   S_BRANCH %bb.2
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.2:
-  ; GCN-NEXT:   S_ENDPGM 0
-  bb.0:
-    %0:vgpr_32 = IMPLICIT_DEF
-    %1:sreg_64_xexec = IMPLICIT_DEF
-    S_BRANCH %bb.1
-
-  bb.1:
-    %2:vgpr_32 = V_CNDMASK_B32_e64 0, %0, 0, %0, %1, implicit $exec
-    $exec = S_OR_B64 $exec, 1, implicit-def $scc
-    S_CBRANCH_EXECNZ %bb.1, implicit $exec
-    S_BRANCH %bb.2
-
-  bb.2:
-    S_ENDPGM 0
-...
----
-name: no_hoist_cndmask_e32
-tracksRegLiveness: true
-body:             |
-  ; GCN-LABEL: name: no_hoist_cndmask_e32
-  ; GCN: bb.0:
-  ; GCN-NEXT:   successors: %bb.1(0x80000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
-  ; GCN-NEXT:   S_BRANCH %bb.1
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.1:
-  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 [[DEF]], [[DEF]], implicit undef $vcc, implicit $exec
-  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
-  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
-  ; GCN-NEXT:   S_BRANCH %bb.2
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.2:
-  ; GCN-NEXT:   S_ENDPGM 0
-  bb.0:
-    %0:vgpr_32 = IMPLICIT_DEF
-    %1:sreg_64_xexec = IMPLICIT_DEF
-    S_BRANCH %bb.1
-
-  bb.1:
-    %2:vgpr_32 = V_CNDMASK_B32_e32 %0, %0, implicit undef $vcc, implicit $exec
-    $exec = S_OR_B64 $exec, 1, implicit-def $scc
-    S_CBRANCH_EXECNZ %bb.1, implicit $exec
-    S_BRANCH %bb.2
-
-  bb.2:
-    S_ENDPGM 0
-...
----
-name: no_hoist_cndmask_dpp
-tracksRegLiveness: true
-body:             |
-  ; GCN-LABEL: name: no_hoist_cndmask_dpp
-  ; GCN: bb.0:
-  ; GCN-NEXT:   successors: %bb.1(0x80000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
-  ; GCN-NEXT:   S_BRANCH %bb.1
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.1:
-  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[V_CNDMASK_B32_dpp:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_dpp [[DEF]], 0, [[DEF]], 0, [[DEF]], 1, 15, 15, 10, implicit $exec, implicit undef $vcc
-  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
-  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
-  ; GCN-NEXT:   S_BRANCH %bb.2
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.2:
-  ; GCN-NEXT:   S_ENDPGM 0
-  bb.0:
-    %0:vgpr_32 = IMPLICIT_DEF
-    %1:sreg_64_xexec = IMPLICIT_DEF
-    S_BRANCH %bb.1
-
-  bb.1:
-    %2:vgpr_32 = V_CNDMASK_B32_dpp %0:vgpr_32, 0, %0:vgpr_32, 0, %0:vgpr_32, 1, 15, 15, 10, implicit $exec, implicit undef $vcc
-    $exec = S_OR_B64 $exec, 1, implicit-def $scc
-    S_CBRANCH_EXECNZ %bb.1, implicit $exec
-    S_BRANCH %bb.2
-
-  bb.2:
-    S_ENDPGM 0
-...
----
-name: no_hoist_cndmask_sdwa
-tracksRegLiveness: true
-body:             |
-  ; GCN-LABEL: name: no_hoist_cndmask_sdwa
-  ; GCN: bb.0:
-  ; GCN-NEXT:   successors: %bb.1(0x80000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN-NEXT:   [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
-  ; GCN-NEXT:   S_BRANCH %bb.1
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.1:
-  ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT:   [[V_CNDMASK_B32_sdwa:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_sdwa 0, [[DEF]], 0, [[DEF]], 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
-  ; GCN-NEXT:   $exec = S_OR_B64 $exec, 1, implicit-def $scc
-  ; GCN-NEXT:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
-  ; GCN-NEXT:   S_BRANCH %bb.2
-  ; GCN-NEXT: {{  $}}
-  ; GCN-NEXT: bb.2:
-  ; GCN-NEXT:   S_ENDPGM 0
-  bb.0:
-    %0:vgpr_32 = IMPLICIT_DEF
-    %1:sreg_64_xexec = IMPLICIT_DEF
-    S_BRANCH %bb.1
-
-  bb.1:
-    %2:vgpr_32 = V_CNDMASK_B32_sdwa 0, %0:vgpr_32, 0, %0:vgpr_32, 0, 0, 0, 0, 0, implicit $exec, implicit undef $vcc
-    $exec = S_OR_B64 $exec, 1, implicit-def $scc
-    S_CBRANCH_EXECNZ %bb.1, implicit $exec
-    S_BRANCH %bb.2
-
-  bb.2:
-    S_ENDPGM 0
-...

From 4858fe04a1571e78ff97b778c0fb6a46855c3d6a Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani <medismail.bennani@gmail.com>
Date: Tue, 18 Jan 2022 12:44:48 +0100
Subject: [PATCH 436/946] [lldb/Plugins] Add ScriptedProcess::GetThreadsInfo
 interface

This patch adds a new method to the Scripted Process interface to
retrive a dictionary of Scripted Threads. It uses the thread ID as a key
and the Scripted Thread instance as the value.

This dictionary will be used to create Scripted Threads in lldb and
perform calls to the python scripted thread object.

rdar://87427126

Differential Revision: https://reviews.llvm.org/D117068

Signed-off-by: Med Ismail Bennani <medismail.bennani@gmail.com>
---
 .../python/scripted_process/scripted_process.py       | 11 +++++++++++
 .../lldb/Interpreter/ScriptedProcessInterface.h       |  2 ++
 .../Python/ScriptedProcessPythonInterface.cpp         | 11 +++++++++++
 .../Python/ScriptedProcessPythonInterface.h           |  2 ++
 4 files changed, 26 insertions(+)

diff --git a/lldb/examples/python/scripted_process/scripted_process.py b/lldb/examples/python/scripted_process/scripted_process.py
index 16dcc72748532..ec751b495fdb6 100644
--- a/lldb/examples/python/scripted_process/scripted_process.py
+++ b/lldb/examples/python/scripted_process/scripted_process.py
@@ -19,6 +19,7 @@ class ScriptedProcess:
     memory_regions = None
     stack_memory_dump = None
     loaded_images = None
+    threads = {}
 
     @abstractmethod
     def __init__(self, target, args):
@@ -51,6 +52,16 @@ def get_memory_region_containing_address(self, addr):
         """
         pass
 
+    def get_threads_info(self):
+        """ Get the dictionary describing the process' Scripted Threads.
+
+        Returns:
+            Dict: The dictionary of threads, with the thread ID as the key and
+            a Scripted Thread instance as the value.
+            The dictionary can be empty.
+        """
+        return self.threads
+
     @abstractmethod
     def get_thread_with_id(self, tid):
         """ Get the scripted process thread with a specific ID.
diff --git a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
index 26fd956f96bbc..efdea6df2d417 100644
--- a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
+++ b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
@@ -41,6 +41,8 @@ class ScriptedProcessInterface : virtual public ScriptedInterface {
     return {};
   }
 
+  virtual StructuredData::DictionarySP GetThreadsInfo() { return nullptr; }
+
   virtual StructuredData::DictionarySP GetThreadWithID(lldb::tid_t tid) {
     return nullptr;
   }
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
index da8ff42213552..447bceebb00b4 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
@@ -92,6 +92,17 @@ ScriptedProcessPythonInterface::GetMemoryRegionContainingAddress(
   return mem_region;
 }
 
+StructuredData::DictionarySP ScriptedProcessPythonInterface::GetThreadsInfo() {
+  Status error;
+  StructuredData::DictionarySP dict =
+      Dispatch<StructuredData::DictionarySP>("get_threads_info", error);
+
+  if (!CheckStructuredDataObject(LLVM_PRETTY_FUNCTION, dict, error))
+    return {};
+
+  return dict;
+}
+
 StructuredData::DictionarySP
 ScriptedProcessPythonInterface::GetThreadWithID(lldb::tid_t tid) {
   Status error;
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
index 421bdd59887ce..ac4e768b2d31b 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
@@ -39,6 +39,8 @@ class ScriptedProcessPythonInterface : public ScriptedProcessInterface,
   GetMemoryRegionContainingAddress(lldb::addr_t address,
                                    Status &error) override;
 
+  StructuredData::DictionarySP GetThreadsInfo() override;
+
   StructuredData::DictionarySP GetThreadWithID(lldb::tid_t tid) override;
 
   StructuredData::DictionarySP GetRegistersForThread(lldb::tid_t tid) override;

From 1b86344fa80bd11853e0347ea33dc6cb5a460c4f Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani <medismail.bennani@gmail.com>
Date: Tue, 18 Jan 2022 12:45:27 +0100
Subject: [PATCH 437/946] [lldb/Plugins] Move ScriptedThreadInterface to
 ScriptedThread

Since we can have multiple Scripted Threads per Scripted Process, having
only a single ScriptedThreadInterface (with a single object instance)
will cause the method calls to be done on the wrong object.

Instead, this patch creates a separate ScriptedThreadInterface for each
new lldb_private::ScriptedThread to make sure we interact with the right
instance.

rdar://87427911

Differential Revision: https://reviews.llvm.org/D117070

Signed-off-by: Med Ismail Bennani <medismail.bennani@gmail.com>
---
 lldb/include/lldb/Interpreter/ScriptedProcessInterface.h  | 4 +---
 lldb/source/Plugins/Process/scripted/ScriptedThread.cpp   | 6 ++++--
 lldb/source/Plugins/Process/scripted/ScriptedThread.h     | 1 +
 .../Python/ScriptedProcessPythonInterface.cpp             | 8 ++------
 .../Python/ScriptedProcessPythonInterface.h               | 2 +-
 5 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
index efdea6df2d417..d62767417f339 100644
--- a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
+++ b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
@@ -68,11 +68,9 @@ class ScriptedProcessInterface : virtual public ScriptedInterface {
 
 protected:
   friend class ScriptedThread;
-  virtual lldb::ScriptedThreadInterfaceSP GetScriptedThreadInterface() {
+  virtual lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() {
     return nullptr;
   }
-
-  lldb::ScriptedThreadInterfaceSP m_scripted_thread_interface_sp = nullptr;
 };
 
 class ScriptedThreadInterface : virtual public ScriptedInterface {
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
index 959b8c5818852..4185e1b67587b 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
@@ -29,7 +29,9 @@ void ScriptedThread::CheckInterpreterAndScriptObject() const {
 }
 
 ScriptedThread::ScriptedThread(ScriptedProcess &process, Status &error)
-    : Thread(process, LLDB_INVALID_THREAD_ID), m_scripted_process(process) {
+    : Thread(process, LLDB_INVALID_THREAD_ID), m_scripted_process(process),
+      m_scripted_thread_interface_sp(
+          m_scripted_process.GetInterface().CreateScriptedThreadInterface()) {
   if (!process.IsValid()) {
     error.SetErrorString("Invalid scripted process");
     return;
@@ -190,7 +192,7 @@ void ScriptedThread::RefreshStateAfterStop() {
 }
 
 lldb::ScriptedThreadInterfaceSP ScriptedThread::GetInterface() const {
-  return m_scripted_process.GetInterface().GetScriptedThreadInterface();
+  return m_scripted_thread_interface_sp;
 }
 
 std::shared_ptr<DynamicRegisterInfo> ScriptedThread::GetDynamicRegisterInfo() {
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.h b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
index cdcd543702a48..54b095777ab73 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.h
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
@@ -59,6 +59,7 @@ class ScriptedThread : public lldb_private::Thread {
   std::shared_ptr<DynamicRegisterInfo> GetDynamicRegisterInfo();
 
   const ScriptedProcess &m_scripted_process;
+  lldb::ScriptedThreadInterfaceSP m_scripted_thread_interface_sp = nullptr;
   std::shared_ptr<DynamicRegisterInfo> m_register_info_sp = nullptr;
   lldb_private::StructuredData::ObjectSP m_script_object_sp = nullptr;
 };
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
index 447bceebb00b4..29516c4c4501e 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
@@ -165,12 +165,8 @@ ScriptedProcessPythonInterface::GetScriptedThreadPluginName() {
 }
 
 lldb::ScriptedThreadInterfaceSP
-ScriptedProcessPythonInterface::GetScriptedThreadInterface() {
-  if (!m_scripted_thread_interface_sp)
-    m_scripted_thread_interface_sp =
-        std::make_shared<ScriptedThreadPythonInterface>(m_interpreter);
-
-  return m_scripted_thread_interface_sp;
+ScriptedProcessPythonInterface::CreateScriptedThreadInterface() {
+  return std::make_shared<ScriptedThreadPythonInterface>(m_interpreter);
 }
 
 #endif
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
index ac4e768b2d31b..83507a93bb973 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
@@ -57,7 +57,7 @@ class ScriptedProcessPythonInterface : public ScriptedProcessInterface,
   llvm::Optional<std::string> GetScriptedThreadPluginName() override;
 
 private:
-  lldb::ScriptedThreadInterfaceSP GetScriptedThreadInterface() override;
+  lldb::ScriptedThreadInterfaceSP CreateScriptedThreadInterface() override;
 };
 } // namespace lldb_private
 

From d3e0f7e1503b1bca8baa6483d3b5c452a91f60a6 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani <medismail.bennani@gmail.com>
Date: Tue, 18 Jan 2022 12:45:57 +0100
Subject: [PATCH 438/946] [lldb/Plugins] Add support of multiple
 ScriptedThreads in a ScriptedProcess

This patch adds support of multiple Scripted Threads in a ScriptedProcess.

This is done by fetching the Scripted Threads info dictionary at every
ScriptedProcess::DoUpdateThreadList and iterate over each element to
create a new ScriptedThread using the object instance, if it was not
already available.

This patch also adds the ability to pass a pointer of a script interpreter
object instance to initialize a ScriptedInterface instead of having to call
the script object initializer in the ScriptedInterface constructor.

This is used to instantiate the ScriptedThreadInterface from the
ScriptedThread constructor, to be able to perform call on that script
interpreter object instance.

Finally, the patch also updates the scripted process test to check for
multiple threads.

rdar://84507704

Differential Revision: https://reviews.llvm.org/D117071

Signed-off-by: Med Ismail Bennani <medismail.bennani@gmail.com>
---
 .../scripted_process/scripted_process.py      |  9 +--
 .../lldb/Interpreter/ScriptedInterface.h      |  3 +-
 .../Interpreter/ScriptedProcessInterface.h    |  6 +-
 .../Process/scripted/ScriptedProcess.cpp      | 61 ++++++++++++++-----
 .../Process/scripted/ScriptedThread.cpp       | 24 +++++---
 .../Plugins/Process/scripted/ScriptedThread.h |  3 +-
 .../Python/ScriptedProcessPythonInterface.cpp |  5 +-
 .../Python/ScriptedProcessPythonInterface.h   |  3 +-
 .../Python/ScriptedThreadPythonInterface.cpp  | 15 +++--
 .../Python/ScriptedThreadPythonInterface.h    |  3 +-
 .../functionalities/scripted_process/Makefile |  4 +-
 .../scripted_process/TestScriptedProcess.py   | 15 ++---
 .../invalid_scripted_process.py               |  3 +-
 .../functionalities/scripted_process/main.c   |  8 ---
 .../functionalities/scripted_process/main.cpp | 34 +++++++++++
 .../stack_core_scripted_process.py            | 53 ++++++++++++----
 16 files changed, 176 insertions(+), 73 deletions(-)
 delete mode 100644 lldb/test/API/functionalities/scripted_process/main.c
 create mode 100644 lldb/test/API/functionalities/scripted_process/main.cpp

diff --git a/lldb/examples/python/scripted_process/scripted_process.py b/lldb/examples/python/scripted_process/scripted_process.py
index ec751b495fdb6..83ec3513cfcd7 100644
--- a/lldb/examples/python/scripted_process/scripted_process.py
+++ b/lldb/examples/python/scripted_process/scripted_process.py
@@ -70,7 +70,7 @@ def get_thread_with_id(self, tid):
             tid (int): Thread ID to look for in the scripted process.
 
         Returns:
-            Dict: The thread represented as a dictionary, withr the
+            Dict: The thread represented as a dictionary, with the
                 tid thread ID. None if tid doesn't match any of the scripted
                 process threads.
         """
@@ -212,11 +212,12 @@ def __init__(self, process, args):
         self.target = None
         self.process = None
         self.args = None
-        if isinstance(process, lldb.SBProcess) and process.IsValid():
-            self.process = process
-            self.target = process.GetTarget()
+        if isinstance(process, ScriptedProcess):
+            self.target = process.target
+            self.process = self.target.GetProcess()
 
         self.id = None
+        self.idx = None
         self.name = None
         self.queue = None
         self.state = None
diff --git a/lldb/include/lldb/Interpreter/ScriptedInterface.h b/lldb/include/lldb/Interpreter/ScriptedInterface.h
index 427fa3f4f793a..27cf9f036e5fd 100644
--- a/lldb/include/lldb/Interpreter/ScriptedInterface.h
+++ b/lldb/include/lldb/Interpreter/ScriptedInterface.h
@@ -27,7 +27,8 @@ class ScriptedInterface {
 
   virtual StructuredData::GenericSP
   CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx,
-                     StructuredData::DictionarySP args_sp) = 0;
+                     StructuredData::DictionarySP args_sp,
+                     StructuredData::Generic *script_obj = nullptr) = 0;
 
   template <typename Ret>
   Ret ErrorWithMessage(llvm::StringRef caller_name, llvm::StringRef error_msg,
diff --git a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
index d62767417f339..0712b3bf4a3ee 100644
--- a/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
+++ b/lldb/include/lldb/Interpreter/ScriptedProcessInterface.h
@@ -23,7 +23,8 @@ class ScriptedProcessInterface : virtual public ScriptedInterface {
 public:
   StructuredData::GenericSP
   CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx,
-                     StructuredData::DictionarySP args_sp) override {
+                     StructuredData::DictionarySP args_sp,
+                     StructuredData::Generic *script_obj = nullptr) override {
     return nullptr;
   }
 
@@ -77,7 +78,8 @@ class ScriptedThreadInterface : virtual public ScriptedInterface {
 public:
   StructuredData::GenericSP
   CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx,
-                     StructuredData::DictionarySP args_sp) override {
+                     StructuredData::DictionarySP args_sp,
+                     StructuredData::Generic *script_obj = nullptr) override {
     return nullptr;
   }
 
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
index cb21a3e7e65f3..f01e599ad5585 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
@@ -164,9 +164,6 @@ Status ScriptedProcess::DoLaunch(Module *exe_module,
 
   SetPrivateState(eStateStopped);
 
-  UpdateThreadListIfNeeded();
-  GetThreadList();
-
   return {};
 }
 
@@ -304,19 +301,55 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
             .str(),
         error);
 
-  lldb::ThreadSP thread_sp;
-  thread_sp = std::make_shared<ScriptedThread>(*this, error);
-
-  if (!thread_sp || error.Fail())
-    return GetInterface().ErrorWithMessage<bool>(LLVM_PRETTY_FUNCTION,
-                                                 error.AsCString(), error);
+  StructuredData::DictionarySP thread_info_sp = GetInterface().GetThreadsInfo();
 
-  RegisterContextSP reg_ctx_sp = thread_sp->GetRegisterContext();
-  if (!reg_ctx_sp)
+  if (!thread_info_sp)
     return GetInterface().ErrorWithMessage<bool>(
-        LLVM_PRETTY_FUNCTION, "Invalid Register Context", error);
-
-  new_thread_list.AddThread(thread_sp);
+        LLVM_PRETTY_FUNCTION,
+        "Couldn't fetch thread list from Scripted Process.", error);
+
+  auto create_scripted_thread =
+      [this, &old_thread_list, &error,
+       &new_thread_list](ConstString key, StructuredData::Object *val) -> bool {
+    if (!val)
+      return GetInterface().ErrorWithMessage<bool>(
+          LLVM_PRETTY_FUNCTION, "Invalid thread info object", error);
+
+    lldb::tid_t tid = LLDB_INVALID_THREAD_ID;
+    if (!llvm::to_integer(key.AsCString(), tid))
+      return GetInterface().ErrorWithMessage<bool>(LLVM_PRETTY_FUNCTION,
+                                                   "Invalid thread id", error);
+
+    if (ThreadSP thread_sp =
+            old_thread_list.FindThreadByID(tid, false /*=can_update*/)) {
+      // If the thread was already in the old_thread_list,
+      // just add it back to the new_thread_list.
+      new_thread_list.AddThread(thread_sp);
+      return true;
+    }
+
+    lldb::ThreadSP thread_sp =
+        std::make_shared<ScriptedThread>(*this, error, val->GetAsGeneric());
+
+    if (!thread_sp || error.Fail())
+      return GetInterface().ErrorWithMessage<bool>(LLVM_PRETTY_FUNCTION,
+                                                   error.AsCString(), error);
+
+    RegisterContextSP reg_ctx_sp = thread_sp->GetRegisterContext();
+    if (!reg_ctx_sp)
+      return GetInterface().ErrorWithMessage<bool>(
+          LLVM_PRETTY_FUNCTION,
+          llvm::Twine("Invalid Register Context for thread " +
+                      llvm::Twine(key.AsCString()))
+              .str(),
+          error);
+
+    new_thread_list.AddThread(thread_sp);
+
+    return true;
+  };
+
+  thread_info_sp->ForEach(create_scripted_thread);
 
   return new_thread_list.GetSize(false) > 0;
 }
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
index 4185e1b67587b..1b9841c2048ea 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
@@ -28,7 +28,8 @@ void ScriptedThread::CheckInterpreterAndScriptObject() const {
   lldbassert(GetInterface() && "Invalid Scripted Thread Interface.");
 }
 
-ScriptedThread::ScriptedThread(ScriptedProcess &process, Status &error)
+ScriptedThread::ScriptedThread(ScriptedProcess &process, Status &error,
+                               StructuredData::Generic *script_object)
     : Thread(process, LLDB_INVALID_THREAD_ID), m_scripted_process(process),
       m_scripted_thread_interface_sp(
           m_scripted_process.GetInterface().CreateScriptedThreadInterface()) {
@@ -54,18 +55,23 @@ ScriptedThread::ScriptedThread(ScriptedProcess &process, Status &error)
 
   ExecutionContext exe_ctx(process);
 
-  StructuredData::GenericSP object_sp =
-      scripted_thread_interface->CreatePluginObject(
-          class_name->c_str(), exe_ctx,
-          process.m_scripted_process_info.GetArgsSP());
-  if (!object_sp || !object_sp->IsValid()) {
-    error.SetErrorString("Failed to create valid script object");
+  m_script_object_sp = scripted_thread_interface->CreatePluginObject(
+      class_name->c_str(), exe_ctx, process.m_scripted_process_info.GetArgsSP(),
+      script_object);
+
+  if (!m_script_object_sp) {
+    error.SetErrorString("Failed to create script object");
     return;
   }
 
-  m_script_object_sp = object_sp;
+  if (!m_script_object_sp->IsValid()) {
+    m_script_object_sp = nullptr;
+    error.SetErrorString("Created script object is invalid");
+    return;
+  }
 
-  SetID(scripted_thread_interface->GetThreadID());
+  lldb::tid_t tid = scripted_thread_interface->GetThreadID();
+  SetID(tid);
 }
 
 ScriptedThread::~ScriptedThread() { DestroyThread(); }
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.h b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
index 54b095777ab73..d3cd26c57826d 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.h
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
@@ -26,7 +26,8 @@ namespace lldb_private {
 
 class ScriptedThread : public lldb_private::Thread {
 public:
-  ScriptedThread(ScriptedProcess &process, Status &error);
+  ScriptedThread(ScriptedProcess &process, Status &error,
+                 StructuredData::Generic *script_object = nullptr);
 
   ~ScriptedThread() override;
 
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
index 29516c4c4501e..e39f8be73e49a 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.cpp
@@ -32,7 +32,7 @@ ScriptedProcessPythonInterface::ScriptedProcessPythonInterface(
 
 StructuredData::GenericSP ScriptedProcessPythonInterface::CreatePluginObject(
     llvm::StringRef class_name, ExecutionContext &exe_ctx,
-    StructuredData::DictionarySP args_sp) {
+    StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj) {
   if (class_name.empty())
     return {};
 
@@ -47,9 +47,6 @@ StructuredData::GenericSP ScriptedProcessPythonInterface::CreatePluginObject(
       class_name.str().c_str(), m_interpreter.GetDictionaryName(), target_sp,
       args_impl, error_string);
 
-  if (!ret_val)
-    return {};
-
   m_object_instance_sp =
       StructuredData::GenericSP(new StructuredPythonObject(std::move(ret_val)));
 
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
index 83507a93bb973..e34a181849eb4 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedProcessPythonInterface.h
@@ -25,7 +25,8 @@ class ScriptedProcessPythonInterface : public ScriptedProcessInterface,
   StructuredData::GenericSP
   CreatePluginObject(const llvm::StringRef class_name,
                      ExecutionContext &exe_ctx,
-                     StructuredData::DictionarySP args_sp) override;
+                     StructuredData::DictionarySP args_sp,
+                     StructuredData::Generic *script_obj = nullptr) override;
 
   Status Launch() override;
 
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
index fb55d44aca840..511a42fe2c26a 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
@@ -31,8 +31,7 @@ ScriptedThreadPythonInterface::ScriptedThreadPythonInterface(
 
 StructuredData::GenericSP ScriptedThreadPythonInterface::CreatePluginObject(
     const llvm::StringRef class_name, ExecutionContext &exe_ctx,
-    StructuredData::DictionarySP args_sp) {
-
+    StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj) {
   if (class_name.empty())
     return {};
 
@@ -43,9 +42,15 @@ StructuredData::GenericSP ScriptedThreadPythonInterface::CreatePluginObject(
   Locker py_lock(&m_interpreter, Locker::AcquireLock | Locker::NoSTDIN,
                  Locker::FreeLock);
 
-  PythonObject ret_val = LLDBSwigPythonCreateScriptedThread(
-      class_name.str().c_str(), m_interpreter.GetDictionaryName(), process_sp,
-      args_impl, error_string);
+  PythonObject ret_val;
+
+  if (!script_obj)
+    ret_val = LLDBSwigPythonCreateScriptedThread(
+        class_name.str().c_str(), m_interpreter.GetDictionaryName(), process_sp,
+        args_impl, error_string);
+  else
+    ret_val = PythonObject(PyRefType::Borrowed,
+                           static_cast<PyObject *>(script_obj->GetValue()));
 
   if (!ret_val)
     return {};
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h
index 4222cdfa8fcfe..59bb182ae3f3d 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.h
@@ -24,7 +24,8 @@ class ScriptedThreadPythonInterface : public ScriptedThreadInterface,
 
   StructuredData::GenericSP
   CreatePluginObject(llvm::StringRef class_name, ExecutionContext &exe_ctx,
-                     StructuredData::DictionarySP args_sp) override;
+                     StructuredData::DictionarySP args_sp,
+                     StructuredData::Generic *script_obj = nullptr) override;
 
   lldb::tid_t GetThreadID() override;
 
diff --git a/lldb/test/API/functionalities/scripted_process/Makefile b/lldb/test/API/functionalities/scripted_process/Makefile
index 692ba17322859..785b17c7fd698 100644
--- a/lldb/test/API/functionalities/scripted_process/Makefile
+++ b/lldb/test/API/functionalities/scripted_process/Makefile
@@ -1,4 +1,4 @@
-C_SOURCES := main.c
-
+CXX_SOURCES := main.cpp
+ENABLE_THREADS := YES
 include Makefile.rules
 
diff --git a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
index 2a5eff3122145..be55771c14fb1 100644
--- a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
+++ b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
@@ -130,7 +130,8 @@ def cleanup():
 
     def create_stack_skinny_corefile(self, file):
         self.build()
-        target, process, thread, _ = lldbutil.run_to_source_breakpoint(self, "// break here", lldb.SBFileSpec("main.c"))
+        target, process, thread, _ = lldbutil.run_to_source_breakpoint(self, "// break here",
+                                                                       lldb.SBFileSpec("main.cpp"))
         self.assertTrue(process.IsValid(), "Process is invalid.")
         # FIXME: Use SBAPI to save the process corefile.
         self.runCmd("process save-core -s stack  " + file)
@@ -186,14 +187,14 @@ def cleanup():
         self.assertTrue(process, PROCESS_IS_VALID)
         self.assertEqual(process.GetProcessID(), 42)
 
-        self.assertEqual(process.GetNumThreads(), 1)
+        self.assertEqual(process.GetNumThreads(), 3)
         thread = process.GetSelectedThread()
         self.assertTrue(thread, "Invalid thread.")
-        self.assertEqual(thread.GetName(), "StackCoreScriptedThread.thread-1")
+        self.assertEqual(thread.GetName(), "StackCoreScriptedThread.thread-0")
 
-        self.assertEqual(thread.GetNumFrames(), 3)
+        self.assertEqual(thread.GetNumFrames(), 2)
         frame = thread.GetSelectedFrame()
         self.assertTrue(frame, "Invalid frame.")
-        self.assertEqual(frame.GetFunctionName(), "bar")
-        self.assertEqual(int(frame.FindValue("i", lldb.eValueTypeVariableArgument).GetValue()), 42)
-        self.assertEqual(int(frame.FindValue("j", lldb.eValueTypeVariableLocal).GetValue()), 42 * 42)
+        # self.assertEqual(frame.GetFunctionName(), "bar")
+        # self.assertEqual(int(frame.FindValue("i", lldb.eValueTypeVariableArgument).GetValue()), 42)
+        # self.assertEqual(int(frame.FindValue("j", lldb.eValueTypeVariableLocal).GetValue()), 42 * 42)
diff --git a/lldb/test/API/functionalities/scripted_process/invalid_scripted_process.py b/lldb/test/API/functionalities/scripted_process/invalid_scripted_process.py
index 7dfc55bc30c5b..0a2977c69d457 100644
--- a/lldb/test/API/functionalities/scripted_process/invalid_scripted_process.py
+++ b/lldb/test/API/functionalities/scripted_process/invalid_scripted_process.py
@@ -9,6 +9,7 @@
 class InvalidScriptedProcess(ScriptedProcess):
     def __init__(self, target: lldb.SBTarget, args : lldb.SBStructuredData):
         super().__init__(target, args)
+        self.threads[0] = InvalidScriptedThread(self, None)
 
     def get_memory_region_containing_address(self, addr: int) -> lldb.SBMemoryRegionInfo:
         return None
@@ -81,4 +82,4 @@ def __lldb_init_module(debugger, dict):
                                      InvalidScriptedProcess.__name__))
     else:
         print("Name of the class that will manage the scripted process: '%s.%s'"
-                % (__name__, InvalidScriptedProcess.__name__))
\ No newline at end of file
+                % (__name__, InvalidScriptedProcess.__name__))
diff --git a/lldb/test/API/functionalities/scripted_process/main.c b/lldb/test/API/functionalities/scripted_process/main.c
deleted file mode 100644
index 67d3732441da2..0000000000000
--- a/lldb/test/API/functionalities/scripted_process/main.c
+++ /dev/null
@@ -1,8 +0,0 @@
-int bar(int i) {
-  int j = i * i;
-  return j; // break here
-}
-
-int foo(int i) { return bar(i); }
-
-int main() { return foo(42); }
diff --git a/lldb/test/API/functionalities/scripted_process/main.cpp b/lldb/test/API/functionalities/scripted_process/main.cpp
new file mode 100644
index 0000000000000..26dc123558921
--- /dev/null
+++ b/lldb/test/API/functionalities/scripted_process/main.cpp
@@ -0,0 +1,34 @@
+#include <iostream>
+#include <mutex>
+#include <thread>
+
+int bar(int i) {
+  int j = i * i;
+  return j; // break here
+}
+
+int foo(int i) { return bar(i); }
+
+void call_and_wait(int &n) {
+  std::cout << "waiting for computation!" << std::endl;
+  while (n != 42 * 42)
+    ;
+  std::cout << "finished computation!" << std::endl;
+}
+
+void compute_pow(int &n) { n = foo(n); }
+
+int main() {
+  int n = 42;
+  std::mutex mutex;
+  std::unique_lock<std::mutex> lock(mutex);
+
+  std::thread thread_1(call_and_wait, std::ref(n));
+  std::thread thread_2(compute_pow, std::ref(n));
+  lock.unlock();
+
+  thread_1.join();
+  thread_2.join();
+
+  return 0;
+}
diff --git a/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py b/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py
index da7c69ee7b993..ac455fe3d2717 100644
--- a/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py
+++ b/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py
@@ -1,4 +1,4 @@
-import os,struct,signal
+import os,json,struct,signal
 
 from typing import Any, Dict
 
@@ -21,6 +21,14 @@ def __init__(self, target: lldb.SBTarget, args : lldb.SBStructuredData):
                 idx = int(self.backing_target_idx.GetStringValue(100))
             self.corefile_target = target.GetDebugger().GetTargetAtIndex(idx)
             self.corefile_process = self.corefile_target.GetProcess()
+            for corefile_thread in self.corefile_process:
+                structured_data = lldb.SBStructuredData()
+                structured_data.SetFromJSON(json.dumps({
+                    "backing_target_idx" : idx,
+                    "thread_idx" : corefile_thread.GetIndexID()
+                }))
+
+                self.threads[corefile_thread.GetThreadID()] = StackCoreScriptedThread(self, structured_data)
 
     def get_memory_region_containing_address(self, addr: int) -> lldb.SBMemoryRegionInfo:
         mem_region = lldb.SBMemoryRegionInfo()
@@ -70,23 +78,43 @@ def get_scripted_thread_plugin(self):
 class StackCoreScriptedThread(ScriptedThread):
     def __init__(self, process, args):
         super().__init__(process, args)
-        self.backing_target_idx = args.GetValueForKey("backing_target_idx")
+        backing_target_idx = args.GetValueForKey("backing_target_idx")
+        thread_idx = args.GetValueForKey("thread_idx")
+
+        def extract_value_from_structured_data(data, default_val):
+            if data and data.IsValid():
+                if data.GetType() == lldb.eStructuredDataTypeInteger:
+                    return data.GetIntegerValue(default_val)
+                if data.GetType() == lldb.eStructuredDataTypeString:
+                    return int(data.GetStringValue(100))
+            return None
+
+        #TODO: Change to Walrus operator (:=) with oneline if assignment
+        # Requires python 3.8
+        val = extract_value_from_structured_data(thread_idx, 0)
+        if val is not None:
+            self.idx = val
 
         self.corefile_target = None
         self.corefile_process = None
-        if (self.backing_target_idx and self.backing_target_idx.IsValid()):
-            if self.backing_target_idx.GetType() == lldb.eStructuredDataTypeInteger:
-                idx = self.backing_target_idx.GetIntegerValue(42)
-            if self.backing_target_idx.GetType() == lldb.eStructuredDataTypeString:
-                idx = int(self.backing_target_idx.GetStringValue(100))
-            self.corefile_target = self.target.GetDebugger().GetTargetAtIndex(idx)
+        self.corefile_thread = None
+
+        #TODO: Change to Walrus operator (:=) with oneline if assignment
+        # Requires python 3.8
+        val = extract_value_from_structured_data(backing_target_idx, 42)
+        if val is not None:
+            self.corefile_target = self.target.GetDebugger().GetTargetAtIndex(val)
             self.corefile_process = self.corefile_target.GetProcess()
+            self.corefile_thread = self.corefile_process.GetThreadByIndexID(self.idx)
+
+        if self.corefile_thread:
+            self.id = self.corefile_thread.GetThreadID()
 
     def get_thread_id(self) -> int:
-        return 0x19
+        return self.id
 
     def get_name(self) -> str:
-        return StackCoreScriptedThread.__name__ + ".thread-1"
+        return StackCoreScriptedThread.__name__ + ".thread-" + str(self.id)
 
     def get_stop_reason(self) -> Dict[str, Any]:
         return { "type": lldb.eStopReasonSignal, "data": {
@@ -109,10 +137,9 @@ def __init__(idx, cfa, pc, symbol_ctx):
         return self.frame_zero[0:0]
 
     def get_register_context(self) -> str:
-        thread = self.corefile_process.GetSelectedThread()
-        if not thread or thread.GetNumFrames() == 0:
+        if not self.corefile_thread or self.corefile_thread.GetNumFrames() == 0:
             return None
-        frame = thread.GetFrameAtIndex(0)
+        frame = self.corefile_thread.GetFrameAtIndex(0)
 
         GPRs = None
         registerSet = frame.registers # Returns an SBValueList.

From cfa55bfe781474a30467b1bbf2e7874985171196 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani <medismail.bennani@gmail.com>
Date: Tue, 18 Jan 2022 12:46:17 +0100
Subject: [PATCH 439/946] [lldb/Plugins] Enrich ScriptedThreads Stop Reasons
 with Exceptions

This patch adds Exceptions to the list of supported stop reasons for
Scripted Threads.

The main motivation for this is that breakpoints are triggered as a
special exception class on ARM platforms, so adding it as a stop reason
allows the ScriptedProcess to selected the ScriptedThread that stopped at
a breakpoint (or crashed :p).

rdar://87430376

Differential Revision: https://reviews.llvm.org/D117074

Signed-off-by: Med Ismail Bennani <medismail.bennani@gmail.com>
---
 .../Process/scripted/ScriptedProcess.cpp      |  1 -
 .../Process/scripted/ScriptedThread.cpp       | 19 +++++++++++++++++--
 .../scripted_process/TestScriptedProcess.py   | 12 ++++++------
 .../stack_core_scripted_process.py            | 18 +++++++++++++++---
 4 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
index f01e599ad5585..e28658e33cdad 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
@@ -357,7 +357,6 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
 void ScriptedProcess::RefreshStateAfterStop() {
   // Let all threads recover from stopping and do any clean up based on the
   // previous thread state (if any).
-  m_thread_list.RefreshStateAfterStop();
 }
 
 bool ScriptedProcess::GetProcessInfo(ProcessInstanceInfo &info) {
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
index 1b9841c2048ea..14f4f99cf9c4a 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
@@ -145,6 +145,11 @@ bool ScriptedThread::CalculateStopInfo() {
   StructuredData::DictionarySP dict_sp = GetInterface()->GetStopReason();
 
   Status error;
+  if (!dict_sp)
+    return GetInterface()->ErrorWithMessage<bool>(
+        LLVM_PRETTY_FUNCTION, "Failed to get scripted thread stop info.", error,
+        LIBLLDB_LOG_THREAD);
+
   lldb::StopInfoSP stop_info_sp;
   lldb::StopReason stop_reason_type;
 
@@ -158,12 +163,12 @@ bool ScriptedThread::CalculateStopInfo() {
   if (!dict_sp->GetValueForKeyAsDictionary("data", data_dict))
     return GetInterface()->ErrorWithMessage<bool>(
         LLVM_PRETTY_FUNCTION,
-        "Couldn't find value for key 'type' in stop reason dictionary.", error,
+        "Couldn't find value for key 'data' in stop reason dictionary.", error,
         LIBLLDB_LOG_THREAD);
 
   switch (stop_reason_type) {
   case lldb::eStopReasonNone:
-    break;
+    return true;
   case lldb::eStopReasonBreakpoint: {
     lldb::break_id_t break_id;
     data_dict->GetValueForKeyAsInteger("break_id", break_id,
@@ -180,6 +185,13 @@ bool ScriptedThread::CalculateStopInfo() {
     stop_info_sp =
         StopInfo::CreateStopReasonWithSignal(*this, signal, description.data());
   } break;
+  case lldb::eStopReasonException: {
+    llvm::StringRef description;
+    data_dict->GetValueForKeyAsString("desc", description);
+
+    stop_info_sp =
+        StopInfo::CreateStopReasonWithException(*this, description.data());
+  } break;
   default:
     return GetInterface()->ErrorWithMessage<bool>(
         LLVM_PRETTY_FUNCTION,
@@ -189,6 +201,9 @@ bool ScriptedThread::CalculateStopInfo() {
         error, LIBLLDB_LOG_THREAD);
   }
 
+  if (!stop_info_sp)
+    return false;
+
   SetStopInfo(stop_info_sp);
   return true;
 }
diff --git a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
index be55771c14fb1..4831d48a0b5a9 100644
--- a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
+++ b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
@@ -188,13 +188,13 @@ def cleanup():
         self.assertEqual(process.GetProcessID(), 42)
 
         self.assertEqual(process.GetNumThreads(), 3)
-        thread = process.GetSelectedThread()
+        thread = process.GetThreadAtIndex(2)
         self.assertTrue(thread, "Invalid thread.")
-        self.assertEqual(thread.GetName(), "StackCoreScriptedThread.thread-0")
+        self.assertEqual(thread.GetName(), "StackCoreScriptedThread.thread-2")
 
-        self.assertEqual(thread.GetNumFrames(), 2)
+        self.assertEqual(thread.GetNumFrames(), 6)
         frame = thread.GetSelectedFrame()
         self.assertTrue(frame, "Invalid frame.")
-        # self.assertEqual(frame.GetFunctionName(), "bar")
-        # self.assertEqual(int(frame.FindValue("i", lldb.eValueTypeVariableArgument).GetValue()), 42)
-        # self.assertEqual(int(frame.FindValue("j", lldb.eValueTypeVariableLocal).GetValue()), 42 * 42)
+        self.assertIn("bar", frame.GetFunctionName())
+        self.assertEqual(int(frame.FindValue("i", lldb.eValueTypeVariableArgument).GetValue()), 42)
+        self.assertEqual(int(frame.FindValue("j", lldb.eValueTypeVariableLocal).GetValue()), 42 * 42)
diff --git a/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py b/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py
index ac455fe3d2717..1fabcf464e7df 100644
--- a/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py
+++ b/lldb/test/API/functionalities/scripted_process/stack_core_scripted_process.py
@@ -117,9 +117,21 @@ def get_name(self) -> str:
         return StackCoreScriptedThread.__name__ + ".thread-" + str(self.id)
 
     def get_stop_reason(self) -> Dict[str, Any]:
-        return { "type": lldb.eStopReasonSignal, "data": {
-            "signal": signal.SIGINT
-        } }
+        stop_reason = { "type": lldb.eStopReasonInvalid, "data": {  }}
+
+        if self.corefile_thread and self.corefile_thread.IsValid:
+            stop_reason["type"] = self.corefile_thread.GetStopReason()
+
+            if self.corefile_thread.GetStopReasonDataCount() > 0:
+                if stop_reason["type"] == lldb.eStopReasonBreakpoint:
+                    stop_reason["data"]["break_id"] = self.corefile_thread.GetStopReasonDataAtIndex(0)
+                    stop_reason["data"]["break_loc_id"] = self.corefile_thread.GetStopReasonDataAtIndex(1)
+                elif stop_reason["type"] == lldb.eStopReasonSignal:
+                    stop_reason["data"]["signal"] = signal.SIGINT
+                elif stop_reason["type"] == lldb.eStopReasonException:
+                    stop_reason["data"]["desc"] = self.corefile_thread.GetStopDescription(100)
+
+        return stop_reason
 
     def get_stackframes(self):
         class ScriptedStackFrame:

From 45148bfe8aece6ca319dcc32351e20bba26b2ea7 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani <medismail.bennani@gmail.com>
Date: Tue, 18 Jan 2022 12:52:24 +0100
Subject: [PATCH 440/946] [lldb/Plugins] Fix ScriptedThread IndexID reporting

When listing all the Scripted Threads of a ScriptedProcess, we can see that all
have the thread index set to 1. This is caused by the lldb_private::Thread
constructor, which sets the m_index_id member using the provided thread id `tid`.

Because the call to the super constructor is done before instantiating
the `ScriptedThreadInterface`, lldb can't fetch the thread id from the
script instance, so it uses `LLDB_INVALID_THREAD_ID` instead.

To mitigate this, this patch takes advantage of the `ScriptedThread::Create`
fallible constructor idiom to defer calling the `ScriptedThread` constructor
(and the `Thread` super constructor with it), until we can fetch a valid
thread id `tid` from the `ScriptedThreadInterface`.

rdar://87432065

Differential Revision: https://reviews.llvm.org/D117076

Signed-off-by: Med Ismail Bennani <medismail.bennani@gmail.com>
---
 lldb/include/lldb/Target/Thread.h             |  2 +-
 .../Process/scripted/ScriptedProcess.cpp      | 12 +--
 .../Process/scripted/ScriptedThread.cpp       | 80 ++++++++++---------
 .../Plugins/Process/scripted/ScriptedThread.h | 12 ++-
 .../Python/ScriptedThreadPythonInterface.cpp  |  2 +-
 5 files changed, 62 insertions(+), 46 deletions(-)

diff --git a/lldb/include/lldb/Target/Thread.h b/lldb/include/lldb/Target/Thread.h
index 91feed310eb97..587b29eb4c661 100644
--- a/lldb/include/lldb/Target/Thread.h
+++ b/lldb/include/lldb/Target/Thread.h
@@ -1244,7 +1244,7 @@ class Thread : public std::enable_shared_from_this<Thread>,
                                          // the stop info was checked against
                                          // the stop info override
   const uint32_t m_index_id; ///< A unique 1 based index assigned to each thread
-                             ///for easy UI/command line access.
+                             /// for easy UI/command line access.
   lldb::RegisterContextSP m_reg_context_sp; ///< The register context for this
                                             ///thread's current register state.
   lldb::StateType m_state;                  ///< The state of our process.
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
index e28658e33cdad..e71a0fdf8de92 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
@@ -328,12 +328,14 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
       return true;
     }
 
-    lldb::ThreadSP thread_sp =
-        std::make_shared<ScriptedThread>(*this, error, val->GetAsGeneric());
+    auto thread_or_error = ScriptedThread::Create(*this, val->GetAsGeneric());
 
-    if (!thread_sp || error.Fail())
-      return GetInterface().ErrorWithMessage<bool>(LLVM_PRETTY_FUNCTION,
-                                                   error.AsCString(), error);
+    if (!thread_or_error)
+      return GetInterface().ErrorWithMessage<bool>(
+          LLVM_PRETTY_FUNCTION, toString(thread_or_error.takeError()), error);
+
+    ThreadSP thread_sp = thread_or_error.get();
+    lldbassert(thread_sp && "Couldn't initialize scripted thread.");
 
     RegisterContextSP reg_ctx_sp = thread_sp->GetRegisterContext();
     if (!reg_ctx_sp)
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
index 14f4f99cf9c4a..b6cbb62fd6e6a 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.cpp
@@ -28,52 +28,60 @@ void ScriptedThread::CheckInterpreterAndScriptObject() const {
   lldbassert(GetInterface() && "Invalid Scripted Thread Interface.");
 }
 
-ScriptedThread::ScriptedThread(ScriptedProcess &process, Status &error,
-                               StructuredData::Generic *script_object)
-    : Thread(process, LLDB_INVALID_THREAD_ID), m_scripted_process(process),
-      m_scripted_thread_interface_sp(
-          m_scripted_process.GetInterface().CreateScriptedThreadInterface()) {
-  if (!process.IsValid()) {
-    error.SetErrorString("Invalid scripted process");
-    return;
-  }
+llvm::Expected<std::shared_ptr<ScriptedThread>>
+ScriptedThread::Create(ScriptedProcess &process,
+                       StructuredData::Generic *script_object) {
+  if (!process.IsValid())
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "Invalid scripted process.");
 
   process.CheckInterpreterAndScriptObject();
 
-  auto scripted_thread_interface = GetInterface();
-  if (!scripted_thread_interface) {
-    error.SetErrorString("Failed to get scripted thread interface.");
-    return;
-  }
-
-  llvm::Optional<std::string> class_name =
-      process.GetInterface().GetScriptedThreadPluginName();
-  if (!class_name || class_name->empty()) {
-    error.SetErrorString("Failed to get scripted thread class name.");
-    return;
+  auto scripted_thread_interface =
+      process.GetInterface().CreateScriptedThreadInterface();
+  if (!scripted_thread_interface)
+    return llvm::createStringError(
+        llvm::inconvertibleErrorCode(),
+        "Failed to create scripted thread interface.");
+
+  llvm::StringRef thread_class_name;
+  if (!script_object) {
+    llvm::Optional<std::string> class_name =
+        process.GetInterface().GetScriptedThreadPluginName();
+    if (!class_name || class_name->empty())
+      return llvm::createStringError(
+          llvm::inconvertibleErrorCode(),
+          "Failed to get scripted thread class name.");
+    thread_class_name = *class_name;
   }
 
   ExecutionContext exe_ctx(process);
-
-  m_script_object_sp = scripted_thread_interface->CreatePluginObject(
-      class_name->c_str(), exe_ctx, process.m_scripted_process_info.GetArgsSP(),
-      script_object);
-
-  if (!m_script_object_sp) {
-    error.SetErrorString("Failed to create script object");
-    return;
-  }
-
-  if (!m_script_object_sp->IsValid()) {
-    m_script_object_sp = nullptr;
-    error.SetErrorString("Created script object is invalid");
-    return;
-  }
+  StructuredData::GenericSP owned_script_object_sp =
+      scripted_thread_interface->CreatePluginObject(
+          thread_class_name, exe_ctx,
+          process.m_scripted_process_info.GetArgsSP(), script_object);
+
+  if (!owned_script_object_sp)
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "Failed to create script object.");
+  if (!owned_script_object_sp->IsValid())
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "Created script object is invalid.");
 
   lldb::tid_t tid = scripted_thread_interface->GetThreadID();
-  SetID(tid);
+
+  return std::make_shared<ScriptedThread>(process, scripted_thread_interface,
+                                          tid, owned_script_object_sp);
 }
 
+ScriptedThread::ScriptedThread(ScriptedProcess &process,
+                               ScriptedThreadInterfaceSP interface_sp,
+                               lldb::tid_t tid,
+                               StructuredData::GenericSP script_object_sp)
+    : Thread(process, tid), m_scripted_process(process),
+      m_scripted_thread_interface_sp(interface_sp),
+      m_script_object_sp(script_object_sp) {}
+
 ScriptedThread::~ScriptedThread() { DestroyThread(); }
 
 const char *ScriptedThread::GetName() {
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedThread.h b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
index d3cd26c57826d..8d8a7c2a3df90 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedThread.h
+++ b/lldb/source/Plugins/Process/scripted/ScriptedThread.h
@@ -25,12 +25,18 @@ class ScriptedProcess;
 namespace lldb_private {
 
 class ScriptedThread : public lldb_private::Thread {
+
 public:
-  ScriptedThread(ScriptedProcess &process, Status &error,
-                 StructuredData::Generic *script_object = nullptr);
+  ScriptedThread(ScriptedProcess &process,
+                 lldb::ScriptedThreadInterfaceSP interface_sp, lldb::tid_t tid,
+                 StructuredData::GenericSP script_object_sp = nullptr);
 
   ~ScriptedThread() override;
 
+  static llvm::Expected<std::shared_ptr<ScriptedThread>>
+  Create(ScriptedProcess &process,
+         StructuredData::Generic *script_object = nullptr);
+
   lldb::RegisterContextSP GetRegisterContext() override;
 
   lldb::RegisterContextSP
@@ -61,8 +67,8 @@ class ScriptedThread : public lldb_private::Thread {
 
   const ScriptedProcess &m_scripted_process;
   lldb::ScriptedThreadInterfaceSP m_scripted_thread_interface_sp = nullptr;
+  lldb_private::StructuredData::GenericSP m_script_object_sp = nullptr;
   std::shared_ptr<DynamicRegisterInfo> m_register_info_sp = nullptr;
-  lldb_private::StructuredData::ObjectSP m_script_object_sp = nullptr;
 };
 
 } // namespace lldb_private
diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
index 511a42fe2c26a..d471b2c5f7e3d 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/ScriptedThreadPythonInterface.cpp
@@ -32,7 +32,7 @@ ScriptedThreadPythonInterface::ScriptedThreadPythonInterface(
 StructuredData::GenericSP ScriptedThreadPythonInterface::CreatePluginObject(
     const llvm::StringRef class_name, ExecutionContext &exe_ctx,
     StructuredData::DictionarySP args_sp, StructuredData::Generic *script_obj) {
-  if (class_name.empty())
+  if (class_name.empty() && !script_obj)
     return {};
 
   ProcessSP process_sp = exe_ctx.GetProcessSP();

From 91bb116190cdc598863a7a3fda57e431dd832449 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani <medismail.bennani@gmail.com>
Date: Tue, 18 Jan 2022 12:56:42 +0100
Subject: [PATCH 441/946] [lldb/Interpreter] Make
 `ScriptedInterface::ErrorWithMessage` static (NFC)

This patch changes the `ScriptedInterface::ErrorWithMessage` method to
make it `static` which makes it easier to call.

The patch also updates its various call sites to reflect this change.

Differential Revision: https://reviews.llvm.org/D117374

Signed-off-by: Med Ismail Bennani <medismail.bennani@gmail.com>
---
 .../lldb/Interpreter/ScriptedInterface.h      |  6 +++---
 .../Process/scripted/ScriptedProcess.cpp      | 20 +++++++++----------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/lldb/include/lldb/Interpreter/ScriptedInterface.h b/lldb/include/lldb/Interpreter/ScriptedInterface.h
index 27cf9f036e5fd..9eb11832003e6 100644
--- a/lldb/include/lldb/Interpreter/ScriptedInterface.h
+++ b/lldb/include/lldb/Interpreter/ScriptedInterface.h
@@ -31,9 +31,9 @@ class ScriptedInterface {
                      StructuredData::Generic *script_obj = nullptr) = 0;
 
   template <typename Ret>
-  Ret ErrorWithMessage(llvm::StringRef caller_name, llvm::StringRef error_msg,
-                       Status &error,
-                       uint32_t log_caterogy = LIBLLDB_LOG_PROCESS) {
+  static Ret ErrorWithMessage(llvm::StringRef caller_name,
+                              llvm::StringRef error_msg, Status &error,
+                              uint32_t log_caterogy = LIBLLDB_LOG_PROCESS) {
     LLDB_LOGF(GetLogIfAllCategoriesSet(log_caterogy), "%s ERROR = %s",
               caller_name.data(), error_msg.data());
     error.SetErrorString(llvm::Twine(caller_name + llvm::Twine(" ERROR = ") +
diff --git a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
index e71a0fdf8de92..5eb7cb0e6a5c8 100644
--- a/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
+++ b/lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp
@@ -222,8 +222,8 @@ bool ScriptedProcess::IsAlive() {
 size_t ScriptedProcess::DoReadMemory(lldb::addr_t addr, void *buf, size_t size,
                                      Status &error) {
   if (!m_interpreter)
-    return GetInterface().ErrorWithMessage<size_t>(LLVM_PRETTY_FUNCTION,
-                                                   "No interpreter.", error);
+    return ScriptedInterface::ErrorWithMessage<size_t>(
+        LLVM_PRETTY_FUNCTION, "No interpreter.", error);
 
   lldb::DataExtractorSP data_extractor_sp =
       GetInterface().ReadMemoryAtAddress(addr, size, error);
@@ -235,7 +235,7 @@ size_t ScriptedProcess::DoReadMemory(lldb::addr_t addr, void *buf, size_t size,
       0, data_extractor_sp->GetByteSize(), buf, size, GetByteOrder());
 
   if (!bytes_copied || bytes_copied == LLDB_INVALID_OFFSET)
-    return GetInterface().ErrorWithMessage<size_t>(
+    return ScriptedInterface::ErrorWithMessage<size_t>(
         LLVM_PRETTY_FUNCTION, "Failed to copy read memory to buffer.", error);
 
   return size;
@@ -293,7 +293,7 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
   ScriptLanguage language = m_interpreter->GetLanguage();
 
   if (language != eScriptLanguagePython)
-    return GetInterface().ErrorWithMessage<bool>(
+    return ScriptedInterface::ErrorWithMessage<bool>(
         LLVM_PRETTY_FUNCTION,
         llvm::Twine("ScriptInterpreter language (" +
                     llvm::Twine(m_interpreter->LanguageToString(language)) +
@@ -304,7 +304,7 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
   StructuredData::DictionarySP thread_info_sp = GetInterface().GetThreadsInfo();
 
   if (!thread_info_sp)
-    return GetInterface().ErrorWithMessage<bool>(
+    return ScriptedInterface::ErrorWithMessage<bool>(
         LLVM_PRETTY_FUNCTION,
         "Couldn't fetch thread list from Scripted Process.", error);
 
@@ -312,13 +312,13 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
       [this, &old_thread_list, &error,
        &new_thread_list](ConstString key, StructuredData::Object *val) -> bool {
     if (!val)
-      return GetInterface().ErrorWithMessage<bool>(
+      return ScriptedInterface::ErrorWithMessage<bool>(
           LLVM_PRETTY_FUNCTION, "Invalid thread info object", error);
 
     lldb::tid_t tid = LLDB_INVALID_THREAD_ID;
     if (!llvm::to_integer(key.AsCString(), tid))
-      return GetInterface().ErrorWithMessage<bool>(LLVM_PRETTY_FUNCTION,
-                                                   "Invalid thread id", error);
+      return ScriptedInterface::ErrorWithMessage<bool>(
+          LLVM_PRETTY_FUNCTION, "Invalid thread id", error);
 
     if (ThreadSP thread_sp =
             old_thread_list.FindThreadByID(tid, false /*=can_update*/)) {
@@ -331,7 +331,7 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
     auto thread_or_error = ScriptedThread::Create(*this, val->GetAsGeneric());
 
     if (!thread_or_error)
-      return GetInterface().ErrorWithMessage<bool>(
+      return ScriptedInterface::ErrorWithMessage<bool>(
           LLVM_PRETTY_FUNCTION, toString(thread_or_error.takeError()), error);
 
     ThreadSP thread_sp = thread_or_error.get();
@@ -339,7 +339,7 @@ bool ScriptedProcess::DoUpdateThreadList(ThreadList &old_thread_list,
 
     RegisterContextSP reg_ctx_sp = thread_sp->GetRegisterContext();
     if (!reg_ctx_sp)
-      return GetInterface().ErrorWithMessage<bool>(
+      return ScriptedInterface::ErrorWithMessage<bool>(
           LLVM_PRETTY_FUNCTION,
           llvm::Twine("Invalid Register Context for thread " +
                       llvm::Twine(key.AsCString()))

From bb1fe369774adb86a6bdeec3f56684b6a01c7ff3 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Wed, 19 Jan 2022 12:51:04 -0800
Subject: [PATCH 442/946] [AMDGPU] Make v8i16/v8f16 legal

Differential Revision: https://reviews.llvm.org/D117721
---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp |   7 +
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 156 ++-
 llvm/lib/Target/AMDGPU/SIInstructions.td      |  40 +
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td      |   8 +-
 .../test/Analysis/CostModel/AMDGPU/add-sub.ll |   4 +-
 .../Analysis/CostModel/AMDGPU/arith-ssat.ll   |  32 +-
 .../Analysis/CostModel/AMDGPU/arith-usat.ll   |  32 +-
 llvm/test/Analysis/CostModel/AMDGPU/cast.ll   |  76 +-
 llvm/test/Analysis/CostModel/AMDGPU/fadd.ll   |   4 +-
 llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll   |   8 +-
 llvm/test/Analysis/CostModel/AMDGPU/fma.ll    |  92 +-
 llvm/test/Analysis/CostModel/AMDGPU/fmul.ll   |   4 +-
 llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll |  12 +-
 llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll |  12 +-
 llvm/test/Analysis/CostModel/AMDGPU/fsub.ll   |   4 +-
 llvm/test/Analysis/CostModel/AMDGPU/mul.ll    |  20 +-
 .../GlobalISel/inst-select-load-constant.mir  |  24 +-
 .../GlobalISel/inst-select-load-flat.mir      |  24 +-
 .../GlobalISel/inst-select-load-global.mir    |  30 +-
 .../GlobalISel/inst-select-load-local-128.mir |  64 +-
 .../GlobalISel/inst-select-store-flat.mir     |  24 +-
 .../GlobalISel/inst-select-store-global.mir   |  30 +-
 llvm/test/CodeGen/AMDGPU/add.v2i16.ll         |   5 +-
 .../CodeGen/AMDGPU/coalesce-vgpr-alignment.ll |   1 -
 .../CodeGen/AMDGPU/extract-subvector-16bit.ll | 437 +++++++++
 .../CodeGen/AMDGPU/extract_vector_elt-f16.ll  |  13 +
 .../CodeGen/AMDGPU/extract_vector_elt-i16.ll  |  49 +
 llvm/test/CodeGen/AMDGPU/function-returns.ll  |   4 +-
 llvm/test/CodeGen/AMDGPU/idot8s.ll            | 586 ++++++------
 llvm/test/CodeGen/AMDGPU/idot8u.ll            | 896 +++++++++---------
 .../CodeGen/AMDGPU/inlineasm-illegal-type.ll  |   6 +-
 .../CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 339 +++++++
 llvm/test/CodeGen/AMDGPU/kernel-args.ll       |  11 +-
 llvm/test/CodeGen/AMDGPU/load-constant-i16.ll |   8 +-
 llvm/test/CodeGen/AMDGPU/load-global-i16.ll   |   6 +-
 llvm/test/CodeGen/AMDGPU/sub.v2i16.ll         |  10 +-
 36 files changed, 2065 insertions(+), 1013 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 63e8c85b84137..b9d0655feef72 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -360,6 +360,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f16, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i16, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4f16, Custom);
+  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i16, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
   setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v3f32, Custom);
@@ -1408,6 +1410,11 @@ SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
       Start != 1)
     return Op;
 
+  if (((SrcVT == MVT::v8f16 && VT == MVT::v4f16) ||
+       (SrcVT == MVT::v8i16 && VT == MVT::v4i16)) &&
+      (Start == 0 || Start == 4))
+    return Op;
+
   DAG.ExtractVectorElements(Op.getOperand(0), Args, Start,
                             VT.getVectorNumElements());
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 4008c7e36e4f4..ec610160b2278 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -134,6 +134,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     addRegisterClass(MVT::v2f16, &AMDGPU::SReg_32RegClass);
     addRegisterClass(MVT::v4i16, &AMDGPU::SReg_64RegClass);
     addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
+    addRegisterClass(MVT::v8i16, &AMDGPU::SGPR_128RegClass);
+    addRegisterClass(MVT::v8f16, &AMDGPU::SGPR_128RegClass);
   }
 
   addRegisterClass(MVT::v32i32, &AMDGPU::VReg_1024RegClass);
@@ -269,7 +271,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
                   MVT::v2i64, MVT::v2f64, MVT::v4i16, MVT::v4f16,
                   MVT::v3i64, MVT::v3f64, MVT::v6i32, MVT::v6f32,
                   MVT::v4i64, MVT::v4f64, MVT::v8i64, MVT::v8f64,
-                  MVT::v16i64, MVT::v16f64, MVT::v32i32, MVT::v32f32 }) {
+                  MVT::v8i16, MVT::v8f16, MVT::v16i64, MVT::v16f64,
+                  MVT::v32i32, MVT::v32f32 }) {
     for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
       switch (Op) {
       case ISD::LOAD:
@@ -611,7 +614,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     if (STI.hasMadF16())
       setOperationAction(ISD::FMAD, MVT::f16, Legal);
 
-    for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16}) {
+    for (MVT VT : {MVT::v2i16, MVT::v2f16, MVT::v4i16, MVT::v4f16, MVT::v8i16,
+                   MVT::v8f16}) {
       for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
         switch (Op) {
         case ISD::LOAD:
@@ -673,6 +677,21 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::STORE, MVT::v4f16, Promote);
     AddPromotedToType(ISD::STORE, MVT::v4f16, MVT::v2i32);
 
+    setOperationAction(ISD::LOAD, MVT::v8i16, Promote);
+    AddPromotedToType(ISD::LOAD, MVT::v8i16, MVT::v4i32);
+    setOperationAction(ISD::LOAD, MVT::v8f16, Promote);
+    AddPromotedToType(ISD::LOAD, MVT::v8f16, MVT::v4i32);
+
+    setOperationAction(ISD::STORE, MVT::v4i16, Promote);
+    AddPromotedToType(ISD::STORE, MVT::v4i16, MVT::v2i32);
+    setOperationAction(ISD::STORE, MVT::v4f16, Promote);
+    AddPromotedToType(ISD::STORE, MVT::v4f16, MVT::v2i32);
+
+    setOperationAction(ISD::STORE, MVT::v8i16, Promote);
+    AddPromotedToType(ISD::STORE, MVT::v8i16, MVT::v4i32);
+    setOperationAction(ISD::STORE, MVT::v8f16, Promote);
+    AddPromotedToType(ISD::STORE, MVT::v8f16, MVT::v4i32);
+
     setOperationAction(ISD::ANY_EXTEND, MVT::v2i32, Expand);
     setOperationAction(ISD::ZERO_EXTEND, MVT::v2i32, Expand);
     setOperationAction(ISD::SIGN_EXTEND, MVT::v2i32, Expand);
@@ -682,6 +701,10 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Expand);
     setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Expand);
 
+    setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Expand);
+    setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Expand);
+    setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Expand);
+
     if (!Subtarget->hasVOP3PInsts()) {
       setOperationAction(ISD::BUILD_VECTOR, MVT::v2i16, Custom);
       setOperationAction(ISD::BUILD_VECTOR, MVT::v2f16, Custom);
@@ -699,9 +722,20 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
 
     setOperationAction(ISD::FMINNUM_IEEE, MVT::v4f16, Custom);
     setOperationAction(ISD::FMAXNUM_IEEE, MVT::v4f16, Custom);
+    setOperationAction(ISD::FMINNUM_IEEE, MVT::v8f16, Custom);
+    setOperationAction(ISD::FMAXNUM_IEEE, MVT::v8f16, Custom);
 
     setOperationAction(ISD::FMINNUM, MVT::v4f16, Expand);
     setOperationAction(ISD::FMAXNUM, MVT::v4f16, Expand);
+    setOperationAction(ISD::FMINNUM, MVT::v8f16, Expand);
+    setOperationAction(ISD::FMAXNUM, MVT::v8f16, Expand);
+
+    for (MVT Vec16 : { MVT::v8i16, MVT::v8f16 }) {
+      setOperationAction(ISD::BUILD_VECTOR, Vec16, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, Vec16, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT, Vec16, Expand);
+      setOperationAction(ISD::SCALAR_TO_VECTOR, Vec16, Expand);
+    }
   }
 
   if (Subtarget->hasVOP3PInsts()) {
@@ -735,34 +769,42 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
 
     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f16, Custom);
     setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f16, Custom);
+    setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom);
 
-    setOperationAction(ISD::SHL, MVT::v4i16, Custom);
-    setOperationAction(ISD::SRA, MVT::v4i16, Custom);
-    setOperationAction(ISD::SRL, MVT::v4i16, Custom);
-    setOperationAction(ISD::ADD, MVT::v4i16, Custom);
-    setOperationAction(ISD::SUB, MVT::v4i16, Custom);
-    setOperationAction(ISD::MUL, MVT::v4i16, Custom);
+    for (MVT VT : { MVT::v4i16, MVT::v8i16 }) {
+      // Split vector operations.
+      setOperationAction(ISD::SHL, VT, Custom);
+      setOperationAction(ISD::SRA, VT, Custom);
+      setOperationAction(ISD::SRL, VT, Custom);
+      setOperationAction(ISD::ADD, VT, Custom);
+      setOperationAction(ISD::SUB, VT, Custom);
+      setOperationAction(ISD::MUL, VT, Custom);
 
-    setOperationAction(ISD::SMIN, MVT::v4i16, Custom);
-    setOperationAction(ISD::SMAX, MVT::v4i16, Custom);
-    setOperationAction(ISD::UMIN, MVT::v4i16, Custom);
-    setOperationAction(ISD::UMAX, MVT::v4i16, Custom);
+      setOperationAction(ISD::SMIN, VT, Custom);
+      setOperationAction(ISD::SMAX, VT, Custom);
+      setOperationAction(ISD::UMIN, VT, Custom);
+      setOperationAction(ISD::UMAX, VT, Custom);
 
-    setOperationAction(ISD::UADDSAT, MVT::v4i16, Custom);
-    setOperationAction(ISD::SADDSAT, MVT::v4i16, Custom);
-    setOperationAction(ISD::USUBSAT, MVT::v4i16, Custom);
-    setOperationAction(ISD::SSUBSAT, MVT::v4i16, Custom);
+      setOperationAction(ISD::UADDSAT, VT, Custom);
+      setOperationAction(ISD::SADDSAT, VT, Custom);
+      setOperationAction(ISD::USUBSAT, VT, Custom);
+      setOperationAction(ISD::SSUBSAT, VT, Custom);
+    }
 
-    setOperationAction(ISD::FADD, MVT::v4f16, Custom);
-    setOperationAction(ISD::FMUL, MVT::v4f16, Custom);
-    setOperationAction(ISD::FMA, MVT::v4f16, Custom);
+    for (MVT VT : { MVT::v4f16, MVT::v8f16 }) {
+      // Split vector operations.
+      setOperationAction(ISD::FADD, VT, Custom);
+      setOperationAction(ISD::FMUL, VT, Custom);
+      setOperationAction(ISD::FMA, VT, Custom);
+      setOperationAction(ISD::FCANONICALIZE, VT, Custom);
+    }
 
     setOperationAction(ISD::FMAXNUM, MVT::v2f16, Custom);
     setOperationAction(ISD::FMINNUM, MVT::v2f16, Custom);
 
     setOperationAction(ISD::FMINNUM, MVT::v4f16, Custom);
     setOperationAction(ISD::FMAXNUM, MVT::v4f16, Custom);
-    setOperationAction(ISD::FCANONICALIZE, MVT::v4f16, Custom);
 
     setOperationAction(ISD::FEXP, MVT::v2f16, Custom);
     setOperationAction(ISD::SELECT, MVT::v4i16, Custom);
@@ -799,7 +841,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FABS, MVT::v2f16, Custom);
   }
 
-  for (MVT VT : { MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8 }) {
+  for (MVT VT : { MVT::v4i16, MVT::v4f16, MVT::v2i8, MVT::v4i8, MVT::v8i8,
+                  MVT::v8i16, MVT::v8f16 }) {
     setOperationAction(ISD::SELECT, VT, Custom);
   }
 
@@ -4610,7 +4653,8 @@ SDValue SITargetLowering::splitBinaryVectorOp(SDValue Op,
   unsigned Opc = Op.getOpcode();
   EVT VT = Op.getValueType();
   assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
-         VT == MVT::v8f32 || VT == MVT::v16f32 || VT == MVT::v32f32);
+         VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8f32 ||
+         VT == MVT::v16f32 || VT == MVT::v32f32);
 
   SDValue Lo0, Hi0;
   std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0);
@@ -4631,21 +4675,26 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
                                               SelectionDAG &DAG) const {
   unsigned Opc = Op.getOpcode();
   EVT VT = Op.getValueType();
-  assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4f32 ||
-         VT == MVT::v8f32 || VT == MVT::v16f32 || VT == MVT::v32f32);
+  assert(VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v8i16 ||
+         VT == MVT::v8f16 || VT == MVT::v4f32 || VT == MVT::v8f32 ||
+         VT == MVT::v16f32 || VT == MVT::v32f32);
 
   SDValue Lo0, Hi0;
-  std::tie(Lo0, Hi0) = DAG.SplitVectorOperand(Op.getNode(), 0);
+  SDValue Op0 = Op.getOperand(0);
+  std::tie(Lo0, Hi0) = Op0.getValueType().isVector()
+                         ? DAG.SplitVectorOperand(Op.getNode(), 0)
+                         : std::make_pair(Op0, Op0);
   SDValue Lo1, Hi1;
   std::tie(Lo1, Hi1) = DAG.SplitVectorOperand(Op.getNode(), 1);
   SDValue Lo2, Hi2;
   std::tie(Lo2, Hi2) = DAG.SplitVectorOperand(Op.getNode(), 2);
 
   SDLoc SL(Op);
+  auto ResVT = DAG.GetSplitDestVTs(VT);
 
-  SDValue OpLo = DAG.getNode(Opc, SL, Lo0.getValueType(), Lo0, Lo1, Lo2,
+  SDValue OpLo = DAG.getNode(Opc, SL, ResVT.first, Lo0, Lo1, Lo2,
                              Op->getFlags());
-  SDValue OpHi = DAG.getNode(Opc, SL, Hi0.getValueType(), Hi0, Hi1, Hi2,
+  SDValue OpHi = DAG.getNode(Opc, SL, ResVT.second, Hi0, Hi1, Hi2,
                              Op->getFlags());
 
   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), VT, OpLo, OpHi);
@@ -5307,7 +5356,7 @@ SDValue SITargetLowering::lowerFMINNUM_FMAXNUM(SDValue Op,
   if (IsIEEEMode)
     return expandFMINNUM_FMAXNUM(Op.getNode(), DAG);
 
-  if (VT == MVT::v4f16)
+  if (VT == MVT::v4f16 || VT == MVT::v8f16)
     return splitBinaryVectorOp(Op, DAG);
   return Op;
 }
@@ -5709,7 +5758,6 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
   EVT VecVT = Vec.getValueType();
   unsigned VecSize = VecVT.getSizeInBits();
   EVT EltVT = VecVT.getVectorElementType();
-  assert(VecSize <= 64);
 
   DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
 
@@ -5720,6 +5768,28 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
   if (SDValue Combined = performExtractVectorEltCombine(Op.getNode(), DCI))
     return Combined;
 
+  if (VecSize == 128) {
+    SDValue Lo, Hi;
+    EVT LoVT, HiVT;
+    SDValue V2 = DAG.getBitcast(MVT::v2i64, Vec);
+    std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
+    Lo =
+        DAG.getBitcast(LoVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64,
+                                         V2, DAG.getConstant(0, SL, MVT::i32)));
+    Hi =
+        DAG.getBitcast(HiVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i64,
+                                         V2, DAG.getConstant(1, SL, MVT::i32)));
+    EVT IdxVT = Idx.getValueType();
+    unsigned NElem = VecVT.getVectorNumElements();
+    assert(isPowerOf2_32(NElem));
+    SDValue IdxMask = DAG.getConstant(NElem / 2 - 1, SL, IdxVT);
+    SDValue NewIdx = DAG.getNode(ISD::AND, SL, IdxVT, Idx, IdxMask);
+    SDValue Half = DAG.getSelectCC(SL, Idx, IdxMask, Hi, Lo, ISD::SETUGT);
+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, Half, NewIdx);
+  }
+
+  assert(VecSize <= 64);
+
   unsigned EltSize = EltVT.getSizeInBits();
   assert(isPowerOf2_32(EltSize));
 
@@ -5802,20 +5872,27 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
   SDLoc SL(Op);
   EVT VT = Op.getValueType();
 
-  if (VT == MVT::v4i16 || VT == MVT::v4f16) {
-    EVT HalfVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(), 2);
+  if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
+      VT == MVT::v8i16 || VT == MVT::v8f16) {
+    EVT HalfVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(),
+                                  VT.getVectorNumElements() / 2);
+    MVT HalfIntVT = MVT::getIntegerVT(HalfVT.getSizeInBits());
 
     // Turn into pair of packed build_vectors.
     // TODO: Special case for constants that can be materialized with s_mov_b64.
-    SDValue Lo = DAG.getBuildVector(HalfVT, SL,
-                                    { Op.getOperand(0), Op.getOperand(1) });
-    SDValue Hi = DAG.getBuildVector(HalfVT, SL,
-                                    { Op.getOperand(2), Op.getOperand(3) });
+    SmallVector<SDValue, 4> LoOps, HiOps;
+    for (unsigned I = 0, E = VT.getVectorNumElements() / 2; I != E; ++I) {
+      LoOps.push_back(Op.getOperand(I));
+      HiOps.push_back(Op.getOperand(I + E));
+    }
+    SDValue Lo = DAG.getBuildVector(HalfVT, SL, LoOps);
+    SDValue Hi = DAG.getBuildVector(HalfVT, SL, HiOps);
 
-    SDValue CastLo = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Lo);
-    SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, MVT::i32, Hi);
+    SDValue CastLo = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Lo);
+    SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, HalfIntVT, Hi);
 
-    SDValue Blend = DAG.getBuildVector(MVT::v2i32, SL, { CastLo, CastHi });
+    SDValue Blend = DAG.getBuildVector(MVT::getVectorVT(HalfIntVT, 2), SL,
+                                       { CastLo, CastHi });
     return DAG.getNode(ISD::BITCAST, SL, VT, Blend);
   }
 
@@ -8427,6 +8504,9 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
+  if (VT.getSizeInBits() == 128)
+    return splitTernaryVectorOp(Op, DAG);
+
   assert(VT.getSizeInBits() == 64);
 
   SDLoc DL(Op);
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index b6f19bcc7f132..cba9a77864aaf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1192,6 +1192,26 @@ def : Pat <
   (v2f16 (EXTRACT_SUBREG v4f16:$vec, sub1))
 >;
 
+def : Pat <
+  (extract_subvector v8i16:$vec, (i32 0)),
+  (v4i16 (EXTRACT_SUBREG v8i16:$vec, sub0_sub1))
+>;
+
+def : Pat <
+  (extract_subvector v8i16:$vec, (i32 4)),
+  (v4i16 (EXTRACT_SUBREG v8i16:$vec, sub2_sub3))
+>;
+
+def : Pat <
+  (extract_subvector v8f16:$vec, (i32 0)),
+  (v4f16 (EXTRACT_SUBREG v8f16:$vec, sub0_sub1))
+>;
+
+def : Pat <
+  (extract_subvector v8f16:$vec, (i32 4)),
+  (v4f16 (EXTRACT_SUBREG v8f16:$vec, sub2_sub3))
+>;
+
 foreach Index = 0-31 in {
   def Extract_Element_v32i32_#Index : Extract_Element <
     i32, v32i32, Index, !cast<SubRegIndex>(sub#Index)
@@ -1287,6 +1307,26 @@ def : BitConvert <v2i64, v2f64, VReg_128>;
 def : BitConvert <v2f64, v2i64, VReg_128>;
 def : BitConvert <v4f32, v2i64, VReg_128>;
 def : BitConvert <v2i64, v4f32, VReg_128>;
+def : BitConvert <v8i16, v4i32, SReg_128>;
+def : BitConvert <v4i32, v8i16, SReg_128>;
+def : BitConvert <v8f16, v4f32, VReg_128>;
+def : BitConvert <v8f16, v4i32, VReg_128>;
+def : BitConvert <v4f32, v8f16, VReg_128>;
+def : BitConvert <v4i32, v8f16, VReg_128>;
+def : BitConvert <v8i16, v8f16, VReg_128>;
+def : BitConvert <v8f16, v8i16, VReg_128>;
+def : BitConvert <v4f32, v8i16, VReg_128>;
+def : BitConvert <v8i16, v4f32, VReg_128>;
+def : BitConvert <v8i16, v8f16, SReg_128>;
+def : BitConvert <v8i16, v2i64, SReg_128>;
+def : BitConvert <v8i16, v2f64, SReg_128>;
+def : BitConvert <v8f16, v2i64, SReg_128>;
+def : BitConvert <v8f16, v2f64, SReg_128>;
+def : BitConvert <v8f16, v8i16, SReg_128>;
+def : BitConvert <v2i64, v8i16, SReg_128>;
+def : BitConvert <v2f64, v8i16, SReg_128>;
+def : BitConvert <v2i64, v8f16, SReg_128>;
+def : BitConvert <v2f64, v8f16, SReg_128>;
 
 // 160-bit bitcast
 def : BitConvert <v5i32, v5f32, SReg_160>;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 340e2b48e5cd9..eb9452f4b85ee 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -617,7 +617,7 @@ def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16
   let HasSGPR = 1;
 }
 
-def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64], 32,
+def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16], 32,
   (add PRIVATE_RSRC_REG)> {
   let isAllocatable = 0;
   let CopyCost = -1;
@@ -784,7 +784,7 @@ multiclass SRegClass<int numRegs, int priority,
 }
 
 defm "" : SRegClass<3, 14, [v3i32, v3f32], SGPR_96Regs, TTMP_96Regs>;
-defm "" : SRegClass<4, 15, [v4i32, v4f32, v2i64], SGPR_128Regs, TTMP_128Regs>;
+defm "" : SRegClass<4, 15, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], SGPR_128Regs, TTMP_128Regs>;
 defm "" : SRegClass<5, 16, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
 defm "" : SRegClass<6, 17, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
 defm "" : SRegClass<7, 18, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
@@ -824,7 +824,7 @@ multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
 defm VReg_64 : VRegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16, p0, p1, p4],
                                 (add VGPR_64)>;
 defm VReg_96 : VRegClass<3, [v3i32, v3f32], (add VGPR_96)>;
-defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64], (add VGPR_128)>;
+defm VReg_128 : VRegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add VGPR_128)>;
 defm VReg_160 : VRegClass<5, [v5i32, v5f32], (add VGPR_160)>;
 
 defm VReg_192 : VRegClass<6, [v6i32, v6f32, v3i64, v3f64], (add VGPR_192)>;
@@ -846,7 +846,7 @@ multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
 defm AReg_64 : ARegClass<2, [i64, f64, v2i32, v2f32, v4f16, v4i16],
                         (add AGPR_64)>;
 defm AReg_96 : ARegClass<3, [v3i32, v3f32], (add AGPR_96)>;
-defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64], (add AGPR_128)>;
+defm AReg_128 : ARegClass<4, [v4i32, v4f32, v2i64, v2f64, v8i16, v8f16], (add AGPR_128)>;
 defm AReg_160 : ARegClass<5, [v5i32, v5f32], (add AGPR_160)>;
 defm AReg_192 : ARegClass<6, [v6i32, v6f32, v3i64, v3f64], (add AGPR_192)>;
 defm AReg_224 : ARegClass<7, [v7i32, v7f32], (add AGPR_224)>;
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
index 2bd77e252ed01..998cdecabb72e 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/add-sub.ll
@@ -76,7 +76,7 @@ define amdgpu_kernel void @add_i16() #0 {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17i16 = add <17 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-LABEL: 'add_i16'
@@ -98,7 +98,7 @@ define amdgpu_kernel void @add_i16() #0 {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5i16 = add <5 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v6i16 = add <6 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16i16 = add <16 x i16> undef, undef
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17i16 = add <17 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17i16 = add <17 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'add_i16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll
index 54636e70502ba..2b0c5ef65f2e5 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-ssat.ll
@@ -55,10 +55,10 @@ define i32 @add(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
@@ -113,10 +113,10 @@ define i32 @add(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.sadd.sat.i16(i16 undef, i16 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V17I16 = call <17 x i16> @llvm.sadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.sadd.sat.i8(i8 undef, i8 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.sadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
@@ -235,10 +235,10 @@ define i32 @sub(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
@@ -293,10 +293,10 @@ define i32 @sub(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.ssub.sat.i16(i16 undef, i16 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V17I16 = call <17 x i16> @llvm.ssub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.ssub.sat.i8(i8 undef, i8 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.ssub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/arith-usat.ll b/llvm/test/Analysis/CostModel/AMDGPU/arith-usat.ll
index 3fe7583041e9a..d347d37cf02d1 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/arith-usat.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/arith-usat.ll
@@ -55,10 +55,10 @@ define i32 @add(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
@@ -113,10 +113,10 @@ define i32 @add(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.uadd.sat.i16(i16 undef, i16 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V17I16 = call <17 x i16> @llvm.uadd.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.uadd.sat.i8(i8 undef, i8 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.uadd.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
@@ -235,10 +235,10 @@ define i32 @sub(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
@@ -293,10 +293,10 @@ define i32 @sub(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = call i16 @llvm.usub.sat.i16(i16 undef, i16 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2I16 = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> undef, <2 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> undef, <4 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> undef, <8 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> undef, <16 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V17I16 = call <17 x i16> @llvm.usub.sat.v17i16(<17 x i16> undef, <17 x i16> undef)
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> undef, <32 x i16> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = call i8 @llvm.usub.sat.i8(i8 undef, i8 undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I8 = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> undef, <2 x i8> undef)
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4I8 = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> undef, <4 x i8> undef)
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/cast.ll b/llvm/test/Analysis/CostModel/AMDGPU/cast.ll
index 308dd4c40a100..e6f6b23430a6d 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/cast.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/cast.ll
@@ -299,33 +299,19 @@ define void @sitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
 }
 
 define void @sitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
-; FAST-LABEL: 'sitofp8'
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; SLOW-LABEL: 'sitofp8'
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; FAST-SIZE-LABEL: 'sitofp8'
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; ALL-LABEL: 'sitofp8'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; SLOW-SIZE-LABEL: 'sitofp8'
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; ALL-SIZE-LABEL: 'sitofp8'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = sitofp <8 x i1> %a to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = sitofp <8 x i8> %b to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = sitofp <8 x i16> %c to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = sitofp <8 x i32> %d to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %A1 = sitofp <8 x i1> %a to <8 x float>
   %B1 = sitofp <8 x i8> %b to <8 x float>
@@ -391,33 +377,19 @@ define void @uitofp4(<4 x i1> %a, <4 x i8> %b, <4 x i16> %c, <4 x i32> %d) {
 }
 
 define void @uitofp8(<8 x i1> %a, <8 x i8> %b, <8 x i16> %c, <8 x i32> %d) {
-; FAST-LABEL: 'uitofp8'
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; SLOW-LABEL: 'uitofp8'
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
-; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; FAST-SIZE-LABEL: 'uitofp8'
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; ALL-LABEL: 'uitofp8'
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
+; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; SLOW-SIZE-LABEL: 'uitofp8'
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
-; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; ALL-SIZE-LABEL: 'uitofp8'
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %A1 = uitofp <8 x i1> %a to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %B1 = uitofp <8 x i8> %b to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %C1 = uitofp <8 x i16> %c to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %D1 = uitofp <8 x i32> %d to <8 x float>
+; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %A1 = uitofp <8 x i1> %a to <8 x float>
   %B1 = uitofp <8 x i8> %b to <8 x float>
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
index 12760920733d1..6961e4d6a4090 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fadd.ll
@@ -115,7 +115,7 @@ define amdgpu_kernel void @fadd_f16() #0 {
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fadd <5 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fadd <16 x half> undef, undef
-; FASTF16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fadd <17 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17f16 = fadd <17 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'fadd_f16'
@@ -135,7 +135,7 @@ define amdgpu_kernel void @fadd_f16() #0 {
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fadd <4 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fadd <5 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fadd <16 x half> undef, undef
-; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fadd <17 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17f16 = fadd <17 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOWF64-SIZE-LABEL: 'fadd_f16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
index fe5b57be5d3bf..298557b53c7e4 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fdiv.ll
@@ -165,7 +165,7 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v16f16 = fdiv <16 x half> undef, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %v17f16 = fdiv <17 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ieee'
@@ -185,7 +185,7 @@ define amdgpu_kernel void @fdiv_f16_f32ieee() #0 {
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16f16 = fdiv <16 x half> undef, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v17f16 = fdiv <17 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fdiv half undef, undef
@@ -216,7 +216,7 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v4f16 = fdiv <4 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v16f16 = fdiv <16 x half> undef, undef
-; FP16-NEXT:  Cost Model: Found an estimated cost of 480 for instruction: %v17f16 = fdiv <17 x half> undef, undef
+; FP16-NEXT:  Cost Model: Found an estimated cost of 576 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; FP16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; NOFP16-SIZE-LABEL: 'fdiv_f16_f32ftzdaz'
@@ -236,7 +236,7 @@ define amdgpu_kernel void @fdiv_f16_f32ftzdaz() #1 {
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v4f16 = fdiv <4 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %v5f16 = fdiv <5 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %v16f16 = fdiv <16 x half> undef, undef
-; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 320 for instruction: %v17f16 = fdiv <17 x half> undef, undef
+; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 384 for instruction: %v17f16 = fdiv <17 x half> undef, undef
 ; FP16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %f16 = fdiv half undef, undef
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fma.ll b/llvm/test/Analysis/CostModel/AMDGPU/fma.ll
index cec33c25c6157..e6d635fd5dc0f 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fma.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fma.ll
@@ -1,34 +1,34 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST,SLOWF64,NOPACKEDF32 %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST,FASTF64,PACKEDF32 %s
-; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST,SLOWF64,NOPACKEDF32 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST,SLOWF64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST,FASTF64 %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST,SLOWF64 %s
 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST-SIZE,SLOWF64-SIZE,NOPACKEDF32-SIZE %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST-SIZE,FASTF64-SIZE,PACKEDF32-SIZE %s
-; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST-SIZE,SLOWF64-SIZE,NOPACKEDF32-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST-SIZE,SLOWF64-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST-SIZE,FASTF64-SIZE %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FAST-SIZE,SLOWF64-SIZE %s
 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=SLOW-SIZE %s
 ; END.
 
 define amdgpu_kernel void @fma_f32() #0 {
-; NOPACKEDF32-LABEL: 'fma_f32'
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
-; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
-;
-; PACKEDF32-LABEL: 'fma_f32'
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
-; PACKEDF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+; SLOWF64-LABEL: 'fma_f32'
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
+; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
+;
+; FASTF64-LABEL: 'fma_f32'
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
+; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW-LABEL: 'fma_f32'
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
@@ -40,25 +40,25 @@ define amdgpu_kernel void @fma_f32() #0 {
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 192 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
-; NOPACKEDF32-SIZE-LABEL: 'fma_f32'
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
-; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
-;
-; PACKEDF32-SIZE-LABEL: 'fma_f32'
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
-; PACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+; SLOWF64-SIZE-LABEL: 'fma_f32'
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
+; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
+;
+; FASTF64-SIZE-LABEL: 'fma_f32'
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = call <2 x float> @llvm.fma.v2f32(<2 x float> undef, <2 x float> undef, <2 x float> undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f32 = call <3 x float> @llvm.fma.v3f32(<3 x float> undef, <3 x float> undef, <3 x float> undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v5f32 = call <5 x float> @llvm.fma.v5f32(<5 x float> undef, <5 x float> undef, <5 x float> undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = call <9 x float> @llvm.fma.v9f32(<9 x float> undef, <9 x float> undef, <9 x float> undef) #2
+; FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW-SIZE-LABEL: 'fma_f32'
 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f32 = call float @llvm.fma.f32(float undef, float undef, float undef) #2
@@ -145,7 +145,7 @@ define amdgpu_kernel void @fma_f16() #0 {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #2
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #2
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef) #2
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v17f16 = call <17 x half> @llvm.fma.v17f16(<17 x half> undef, <17 x half> undef, <17 x half> undef) #2
+; FAST-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v17f16 = call <17 x half> @llvm.fma.v17f16(<17 x half> undef, <17 x half> undef, <17 x half> undef) #2
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW-LABEL: 'fma_f16'
@@ -165,7 +165,7 @@ define amdgpu_kernel void @fma_f16() #0 {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = call <4 x half> @llvm.fma.v4f16(<4 x half> undef, <4 x half> undef, <4 x half> undef) #2
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = call <5 x half> @llvm.fma.v5f16(<5 x half> undef, <5 x half> undef, <5 x half> undef) #2
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = call <16 x half> @llvm.fma.v16f16(<16 x half> undef, <16 x half> undef, <16 x half> undef) #2
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v17f16 = call <17 x half> @llvm.fma.v17f16(<17 x half> undef, <17 x half> undef, <17 x half> undef) #2
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v17f16 = call <17 x half> @llvm.fma.v17f16(<17 x half> undef, <17 x half> undef, <17 x half> undef) #2
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW-SIZE-LABEL: 'fma_f16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
index a5906b070d58e..758d4aa874400 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fmul.ll
@@ -115,7 +115,7 @@ define amdgpu_kernel void @fmul_f16() #0 {
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fmul <5 x half> undef, undef
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fmul <16 x half> undef, undef
-; GFX9-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fmul <17 x half> undef, undef
+; GFX9-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17f16 = fmul <17 x half> undef, undef
 ; GFX9-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW-LABEL: 'fmul_f16'
@@ -135,7 +135,7 @@ define amdgpu_kernel void @fmul_f16() #0 {
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fmul <5 x half> undef, undef
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fmul <16 x half> undef, undef
-; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fmul <17 x half> undef, undef
+; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17f16 = fmul <17 x half> undef, undef
 ; GFX9-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOW-SIZE-LABEL: 'fmul_f16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll b/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll
index 3c24d6cdcd286..0c2cccf7083ba 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fptosi.ll
@@ -59,7 +59,7 @@ define i32 @fptosi_double_i16(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'fptosi_double_i16'
@@ -73,7 +73,7 @@ define i32 @fptosi_double_i16(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi double undef to i16
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x double> undef to <2 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x double> undef to <4 x i16>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptosi <8 x double> undef to <8 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLOW-SIZE-LABEL: 'fptosi_double_i16'
@@ -181,8 +181,8 @@ define i32 @fptosi_float_i16(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'fptosi_float_i16'
@@ -197,8 +197,8 @@ define i32 @fptosi_float_i16(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptosi float undef to i16
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptosi <2 x float> undef to <2 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptosi <4 x float> undef to <4 x i16>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptosi <8 x float> undef to <8 x i16>
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = fptosi <16 x float> undef to <16 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLOW-SIZE-LABEL: 'fptosi_float_i16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll b/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll
index e2f75a9f302cf..97c463345fba0 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fptoui.ll
@@ -59,7 +59,7 @@ define i32 @fptoui_double_i16(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'fptoui_double_i16'
@@ -73,7 +73,7 @@ define i32 @fptoui_double_i16(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui double undef to i16
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x double> undef to <2 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x double> undef to <4 x i16>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptoui <8 x double> undef to <8 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLOW-SIZE-LABEL: 'fptoui_double_i16'
@@ -181,8 +181,8 @@ define i32 @fptoui_float_i16(i32 %arg) {
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; FAST-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; FAST-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 ;
 ; SLOW-LABEL: 'fptoui_float_i16'
@@ -197,8 +197,8 @@ define i32 @fptoui_float_i16(i32 %arg) {
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %I16 = fptoui float undef to i16
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2I16 = fptoui <2 x float> undef to <2 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4I16 = fptoui <4 x float> undef to <4 x i16>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
-; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 22 for instruction: %V8I16 = fptoui <8 x float> undef to <8 x i16>
+; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 44 for instruction: %V16I16 = fptoui <16 x float> undef to <16 x i16>
 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 ;
 ; SLOW-SIZE-LABEL: 'fptoui_float_i16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
index 38fec3cfe5349..a49d4e35c1ce3 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fsub.ll
@@ -115,7 +115,7 @@ define amdgpu_kernel void @fsub_f16() #0 {
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef
-; FASTF16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef
+; FASTF16-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17f16 = fsub <17 x half> undef, undef
 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOWF64-LABEL: 'fsub_f16'
@@ -135,7 +135,7 @@ define amdgpu_kernel void @fsub_f16() #0 {
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef
-; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef
+; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v17f16 = fsub <17 x half> undef, undef
 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
 ; SLOWF64-SIZE-LABEL: 'fsub_f16'
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
index 8fddcbefaf764..99a2de65180d7 100644
--- a/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
+++ b/llvm/test/Analysis/CostModel/AMDGPU/mul.ll
@@ -79,7 +79,7 @@ define amdgpu_kernel void @mul_i16() #0 {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4i16 = mul <4 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v5i16 = mul <5 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %v16i16 = mul <16 x i16> undef, undef
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %v17i16 = mul <17 x i16> undef, undef
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v17i16 = mul <17 x i16> undef, undef
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 ;
 ; SLOW16-SIZE-LABEL: 'mul_i16'
@@ -99,7 +99,7 @@ define amdgpu_kernel void @mul_i16() #0 {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4i16 = mul <4 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5i16 = mul <5 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16i16 = mul <16 x i16> undef, undef
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %v17i16 = mul <17 x i16> undef, undef
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v17i16 = mul <17 x i16> undef, undef
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 ;
   %i16 = mul i16 undef, undef
@@ -144,7 +144,7 @@ define i32 @mul_constpow2() {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
@@ -182,7 +182,7 @@ define i32 @mul_constpow2() {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = mul <32 x i16> undef, <i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 128, i16 256>
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, <i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16, i8 2, i8 4, i8 8, i8 16>
@@ -244,7 +244,7 @@ define i32 @mul_uniformconstpow2() {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, 16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, 16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
@@ -282,7 +282,7 @@ define i32 @mul_uniformconstpow2() {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, 16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = mul <32 x i16> undef, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16>
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, 16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16>
@@ -344,7 +344,7 @@ define i32 @mul_constnegpow2() {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
@@ -382,7 +382,7 @@ define i32 @mul_constnegpow2() {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256, i16 -2, i16 -4, i16 -8, i16 -16, i16 -32, i16 -64, i16 -128, i16 -256>
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16, i8 -2, i8 -4, i8 -8, i8 -16>
@@ -444,7 +444,7 @@ define i32 @mul_uniformconstnegpow2() {
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I16 = mul i16 undef, -16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST16-NEXT:  Cost Model: Found an estimated cost of 80 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST16-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %I8 = mul i8 undef, -16
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
 ; FAST16-NEXT:  Cost Model: Found an estimated cost of 128 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
@@ -482,7 +482,7 @@ define i32 @mul_uniformconstnegpow2() {
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I16 = mul i16 undef, -16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = mul <8 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = mul <16 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
-; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 40 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
+; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = mul <32 x i16> undef, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16>
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %I8 = mul i8 undef, -16
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16i8 = mul <16 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
 ; FAST16-SIZE-NEXT:  Cost Model: Found an estimated cost of 64 for instruction: %V32i8 = mul <32 x i8> undef, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16>
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
index 9acb09cb48ff9..2b120b6fd3b67 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-constant.mir
@@ -715,27 +715,27 @@ body: |
     ; GFX6-LABEL: name: load_constant_v8s16
     ; GFX6: liveins: $sgpr0_sgpr1
     ; GFX6-NEXT: {{  $}}
-    ; GFX6-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
-    ; GFX6-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 4, addrspace 4)
-    ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX6-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX6-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4)
+    ; GFX6-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
     ; GFX7-LABEL: name: load_constant_v8s16
     ; GFX7: liveins: $sgpr0_sgpr1
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
-    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 4, addrspace 4)
-    ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX7-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4)
+    ; GFX7-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
     ; GFX8-LABEL: name: load_constant_v8s16
     ; GFX8: liveins: $sgpr0_sgpr1
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
-    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 4, addrspace 4)
-    ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX8-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4)
+    ; GFX8-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
     ; GFX10-LABEL: name: load_constant_v8s16
     ; GFX10: liveins: $sgpr0_sgpr1
     ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
-    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:sgpr_128(<8 x s16>) = G_LOAD [[COPY]](p4) :: (load (<8 x s16>), align 4, addrspace 4)
-    ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
+    ; GFX10-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY]], 0, 0 :: (load (<8 x s16>), align 4, addrspace 4)
+    ; GFX10-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[S_LOAD_DWORDX4_IMM]]
     %0:sgpr(p4) = COPY $sgpr0_sgpr1
     %1:sgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 4)
     $sgpr0_sgpr1_sgpr2_sgpr3 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
index f4e1d172dda7e..cce37182ae7b6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
@@ -741,27 +741,27 @@ body: |
     ; GFX7-LABEL: name: load_flat_v8s16
     ; GFX7: liveins: $vgpr0_vgpr1
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4)
-    ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4)
+    ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     ; GFX8-LABEL: name: load_flat_v8s16
     ; GFX8: liveins: $vgpr0_vgpr1
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4)
-    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4)
+    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     ; GFX9-LABEL: name: load_flat_v8s16
     ; GFX9: liveins: $vgpr0_vgpr1
     ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4)
-    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     ; GFX10-LABEL: name: load_flat_v8s16
     ; GFX10: liveins: $vgpr0_vgpr1
     ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4)
-    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(<8 x  s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 0)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
index d2bd5e6fcc7d1..df681f95d4e3e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
@@ -873,33 +873,33 @@ body: |
     ; GFX7-LABEL: name: load_global_v8s16
     ; GFX7: liveins: $vgpr0_vgpr1
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
-    ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1)
+    ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     ; GFX7-FLAT-LABEL: name: load_global_v8s16
     ; GFX7-FLAT: liveins: $vgpr0_vgpr1
     ; GFX7-FLAT-NEXT: {{  $}}
-    ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-FLAT-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
-    ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-FLAT-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1)
+    ; GFX7-FLAT-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     ; GFX8-LABEL: name: load_global_v8s16
     ; GFX8: liveins: $vgpr0_vgpr1
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX8-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
-    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8-NEXT: [[FLAT_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = FLAT_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec, implicit $flat_scr :: (load (<8 x s16>), align 4, addrspace 1)
+    ; GFX8-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FLAT_LOAD_DWORDX4_]]
     ; GFX9-LABEL: name: load_global_v8s16
     ; GFX9: liveins: $vgpr0_vgpr1
     ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
-    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
     ; GFX10-LABEL: name: load_global_v8s16
     ; GFX10: liveins: $vgpr0_vgpr1
     ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p1) :: (load (<8 x s16>), align 4, addrspace 1)
-    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec :: (load (<8 x s16>), align 4, addrspace 1)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[GLOBAL_LOAD_DWORDX4_]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 4, addrspace 1)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir
index 361f1e80d25e3..1083d43381939 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local-128.mir
@@ -1,7 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX7 %s
 # RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX9 %s
+# RUN: llc -amdgpu-global-isel-new-legality -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=0 -o - %s | FileCheck -check-prefix=GFX10 %s
 
 ---
 
@@ -27,6 +27,12 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v4s32_align16
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128 = DS_READ_B128_gfx9 [[COPY]], 0, 0, implicit $exec :: (load (<4 x s32>), addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ_B128_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 16, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -57,6 +63,12 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v4s32_align_8
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<4 x s32>) = G_LOAD %0 :: (load (<4 x s32>), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -87,6 +99,12 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v4s32_align_8_offset_160
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 50, 51, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = G_CONSTANT i32 400
     %2:vgpr(p3) = G_PTR_ADD %0, %1
@@ -123,6 +141,14 @@ body: |
     ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v4s32_align_8_offset_320
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4000, implicit $exec
+    ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[V_ADD_U32_e64_]], 0, 1, 0, implicit $exec :: (load (<4 x s32>), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s32) = G_CONSTANT i32 4000
     %2:vgpr(p3) = G_PTR_ADD %0, %1
@@ -155,6 +181,12 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v2s64
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<2 x s64>), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<2 x s64>) = G_LOAD %0 :: (load (<2 x s64>), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -185,6 +217,12 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
     ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
+    ; GFX10-LABEL: name: load_local_v2p1
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<2 x p1>) = G_LOAD [[COPY]](p3) :: (load (<2 x p1>), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<2 x p1>)
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<2 x p1>) = G_LOAD %0 :: (load (<2 x p1>), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -215,6 +253,12 @@ body: |
     ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
     ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3)
     ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
+    ; GFX10-LABEL: name: load_local_s128
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX10-NEXT: [[LOAD:%[0-9]+]]:vreg_128(s128) = G_LOAD [[COPY]](p3) :: (load (s128), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(s128) = G_LOAD %0 :: (load (s128), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
@@ -235,16 +279,22 @@ body: |
     ; GFX7-LABEL: name: load_local_v8s16
     ; GFX7: liveins: $vgpr0
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX7-NEXT: $m0 = S_MOV_B32 -1
-    ; GFX7-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load (<8 x s16>), align 8, addrspace 3)
-    ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX7-NEXT: [[DS_READ2_B64_:%[0-9]+]]:vreg_128 = DS_READ2_B64 [[COPY]], 0, 1, 0, implicit $m0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3)
+    ; GFX7-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_]]
     ; GFX9-LABEL: name: load_local_v8s16
     ; GFX9: liveins: $vgpr0
     ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
-    ; GFX9-NEXT: [[LOAD:%[0-9]+]]:vreg_128(<8 x s16>) = G_LOAD [[COPY]](p3) :: (load (<8 x s16>), align 8, addrspace 3)
-    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](<8 x s16>)
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3)
+    ; GFX9-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
+    ; GFX10-LABEL: name: load_local_v8s16
+    ; GFX10: liveins: $vgpr0
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10-NEXT: [[DS_READ2_B64_gfx9_:%[0-9]+]]:vreg_128 = DS_READ2_B64_gfx9 [[COPY]], 0, 1, 0, implicit $exec :: (load (<8 x s16>), align 8, addrspace 3)
+    ; GFX10-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[DS_READ2_B64_gfx9_]]
     %0:vgpr(p3) = COPY $vgpr0
     %1:vgpr(<8 x s16>) = G_LOAD %0 :: (load (<8 x s16>), align 8, addrspace 3)
     $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
index 35b7f1a684c43..a626a7af89c5b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
@@ -503,27 +503,27 @@ body: |
     ; GFX7-LABEL: name: store_flat_v8s16
     ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX7-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>))
+    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>))
     ; GFX8-LABEL: name: store_flat_v8s16
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX8-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>))
+    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>))
     ; GFX9-LABEL: name: store_flat_v8s16
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>))
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>))
     ; GFX10-LABEL: name: store_flat_v8s16
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX10-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>))
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>))
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 0)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
index 8e883bfcc6669..8568c0e879f15 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
@@ -549,33 +549,33 @@ body: |
     ; GFX7-LABEL: name: store_global_v8s16
     ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX7-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1)
+    ; GFX7-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1)
     ; GFX7-FLAT-LABEL: name: store_global_v8s16
     ; GFX7-FLAT: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX7-FLAT-NEXT: {{  $}}
-    ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX7-FLAT-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1)
+    ; GFX7-FLAT-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX7-FLAT-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX7-FLAT-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1)
     ; GFX8-LABEL: name: store_global_v8s16
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX8-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1)
+    ; GFX8-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX8-NEXT: FLAT_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (store (<8 x s16>), addrspace 1)
     ; GFX9-LABEL: name: store_global_v8s16
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX9-NEXT: {{  $}}
-    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX9-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1)
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX9-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1)
     ; GFX10-LABEL: name: store_global_v8s16
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4_vgpr5
     ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
-    ; GFX10-NEXT: G_STORE [[COPY1]](<8 x s16>), [[COPY]](p1) :: (store (<8 x s16>), addrspace 1)
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY $vgpr2_vgpr3_vgpr4_vgpr5
+    ; GFX10-NEXT: GLOBAL_STORE_DWORDX4 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (store (<8 x s16>), addrspace 1)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(<8 x s16>) = COPY $vgpr2_vgpr3_vgpr4_vgpr5
     G_STORE %1, %0 :: (store (<8 x s16>), align 16, addrspace 1)
diff --git a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
index 56e318c96b2f9..3de1aa1dd656d 100644
--- a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
@@ -200,12 +200,13 @@ define amdgpu_kernel void @v_test_add_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)
 
 ; FIXME: Need to handle non-uniform case for function below (load without gep).
 ; GCN-LABEL: {{^}}v_test_add_v2i16_zext_to_v2i64:
+; GFX9PLUS: v_mov_b32_e32 [[MASK:v[0-9+]]], 0xffff
 ; GFX9PLUS: global_load_dword [[A:v[0-9]+]]
 ; GFX9PLUS: global_load_dword [[B:v[0-9]+]]
 
 ; GFX9PLUS: v_pk_add_u16 [[ADD:v[0-9]+]], [[A]], [[B]]
-; GFX9PLUS-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], 0xffff, [[ADD]]
-; GFX9PLUS-DAG: v_lshrrev_b32_e32 v[[ELT1:[0-9]+]], 16, [[ADD]]
+; GFX9PLUS-DAG: v_and_b32_e32 v[[ELT0:[0-9]+]], [[MASK]], [[ADD]]
+; GFX9PLUS-DAG: v_and_b32_sdwa v{{[0-9]+}}, [[MASK]], v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9PLUS: buffer_store_dwordx4
 
 ; VI-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll b/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
index b3f581280b644..04d6b0d85b957 100644
--- a/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
+++ b/llvm/test/CodeGen/AMDGPU/coalesce-vgpr-alignment.ll
@@ -23,7 +23,6 @@ bb:
 ; GCN-LABEL: {{^}}test_vector_creation:
 ; GCN:     global_load_dwordx2 v[{{[0-9]*[02468]}}:{{[0-9]+}}],
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]*}}[[LO:[02468]]], v{{[0-9]+}}
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]*}}[[HI:[13579]]], v{{[0-9]+}}
 ; GCN:     global_store_dwordx4 v[{{[0-9]*[02468]:[0-9]*[13579]}}], v[{{[0-9]*[02468]:[0-9]*[13579]}}]
 define amdgpu_kernel void @test_vector_creation() {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
new file mode 100644
index 0000000000000..c686598225a25
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll
@@ -0,0 +1,437 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mtriple=amdgcn-- -verify-machineinstrs -o - %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
+
+define <4 x i16> @extract_4xi16(<8 x i16> addrspace(1) * %p0, <8 x i16> addrspace(1) * %p1) {
+; SI-LABEL: extract_4xi16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_cbranch_scc0 .LBB0_2
+; SI-NEXT:  ; %bb.1: ; %F
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v5, v[2:3], s[4:7], 0 addr64 offset:2 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[2:3], s[4:7], 0 addr64 offset:4 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v7, v[2:3], s[4:7], 0 addr64 offset:6 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:8 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:10 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:12 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[2:3], s[4:7], 0 addr64 offset:14 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v7
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
+; SI-NEXT:    v_or_b32_e32 v2, v6, v2
+; SI-NEXT:    v_or_b32_e32 v3, v4, v3
+; SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; SI-NEXT:    s_mov_b64 vcc, exec
+; SI-NEXT:    s_cbranch_execz .LBB0_3
+; SI-NEXT:    s_branch .LBB0_4
+; SI-NEXT:  .LBB0_2:
+; SI-NEXT:    ; implicit-def: $vgpr3
+; SI-NEXT:    ; implicit-def: $vgpr4
+; SI-NEXT:    ; implicit-def: $vgpr2
+; SI-NEXT:    s_mov_b64 vcc, 0
+; SI-NEXT:  .LBB0_3: ; %T
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:2 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:6 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:10 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:12 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:14 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 16, v5
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; SI-NEXT:    v_or_b32_e32 v2, v4, v0
+; SI-NEXT:    v_or_b32_e32 v3, v3, v1
+; SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
+; SI-NEXT:  .LBB0_4: ; %exit
+; SI-NEXT:    v_bfe_i32 v0, v3, 0, 16
+; SI-NEXT:    v_bfe_i32 v1, v4, 0, 16
+; SI-NEXT:    v_bfe_i32 v2, v2, 0, 16
+; SI-NEXT:    s_mov_b32 s4, 0xffff
+; SI-NEXT:    v_mov_b32_e32 v3, 0x8000
+; SI-NEXT:    v_mov_b32_e32 v4, 0xffff0000
+; SI-NEXT:    v_bfrev_b32_e32 v5, 1
+; SI-NEXT:    v_mov_b32_e32 v6, 0xffff8000
+; SI-NEXT:    v_mov_b32_e32 v7, s4
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v7, v3, vcc
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v1
+; SI-NEXT:    v_cndmask_b32_e32 v1, v4, v5, vcc
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v2
+; SI-NEXT:    v_cndmask_b32_e32 v2, -1, v6, vcc
+; SI-NEXT:    v_or_b32_e32 v0, v0, v1
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v2
+; SI-NEXT:    v_and_b32_e32 v2, s4, v2
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    v_alignbit_b32 v1, v2, v1, 16
+; SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: extract_4xi16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_cbranch_scc0 .LBB0_2
+; GFX9-NEXT:  ; %bb.1: ; %F
+; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_cbranch_execz .LBB0_3
+; GFX9-NEXT:    s_branch .LBB0_4
+; GFX9-NEXT:  .LBB0_2:
+; GFX9-NEXT:    s_mov_b32 s8, 0
+; GFX9-NEXT:    s_mov_b32 s9, s8
+; GFX9-NEXT:    s_mov_b32 s10, s8
+; GFX9-NEXT:    s_mov_b32 s11, s8
+; GFX9-NEXT:    v_mov_b32_e32 v2, s8
+; GFX9-NEXT:    v_mov_b32_e32 v3, s9
+; GFX9-NEXT:    v_mov_b32_e32 v4, s10
+; GFX9-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-NEXT:  .LBB0_3: ; %T
+; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:  .LBB0_4: ; %exit
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_ashrrev_i16 v0, 15, v3 op_sel_hi:[0,0]
+; GFX9-NEXT:    s_movk_i32 s4, 0x8000
+; GFX9-NEXT:    v_or_b32_sdwa v1, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v3, s4, v0
+; GFX9-NEXT:    v_pk_ashrrev_i16 v0, 15, v2 op_sel_hi:[0,1]
+; GFX9-NEXT:    v_or_b32_sdwa v2, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v0, s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
+; GFX9-NEXT:    v_and_b32_e32 v0, v4, v0
+; GFX9-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
+; GFX9-NEXT:    v_and_b32_e32 v2, v4, v3
+; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v2
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  br i1 undef, label %T, label %F
+
+T:
+  %t = load volatile <8 x i16>, <8 x i16> addrspace(1) * %p0
+  br label %exit
+
+F:
+  %f = load volatile <8 x i16>, <8 x i16> addrspace(1) * %p1
+  br label %exit
+
+exit:
+  %m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
+  %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
+  %r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
+  ret <4 x i16> %r2
+}
+
+define <4 x i16> @extract_4xi16_2(<8 x i16> addrspace(1) * %p0, <8 x i16> addrspace(1) * %p1) {
+; SI-LABEL: extract_4xi16_2:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_cbranch_scc0 .LBB1_2
+; SI-NEXT:  ; %bb.1: ; %F
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:2 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:4 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:6 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 offset:8 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v5, v[2:3], s[4:7], 0 addr64 offset:10 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[2:3], s[4:7], 0 addr64 offset:12 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[2:3], s[4:7], 0 addr64 offset:14 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
+; SI-NEXT:    v_or_b32_e32 v2, v6, v2
+; SI-NEXT:    v_or_b32_e32 v3, v4, v3
+; SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; SI-NEXT:    s_mov_b64 vcc, exec
+; SI-NEXT:    s_cbranch_execz .LBB1_3
+; SI-NEXT:    s_branch .LBB1_4
+; SI-NEXT:  .LBB1_2:
+; SI-NEXT:    ; implicit-def: $vgpr3
+; SI-NEXT:    ; implicit-def: $vgpr5
+; SI-NEXT:    ; implicit-def: $vgpr2
+; SI-NEXT:    ; implicit-def: $vgpr4
+; SI-NEXT:    s_mov_b64 vcc, 0
+; SI-NEXT:  .LBB1_3: ; %T
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:2 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:6 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 offset:10 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:12 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:14 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; SI-NEXT:    v_or_b32_e32 v2, v4, v0
+; SI-NEXT:    v_or_b32_e32 v3, v3, v1
+; SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
+; SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; SI-NEXT:  .LBB1_4: ; %exit
+; SI-NEXT:    v_bfe_i32 v0, v3, 0, 16
+; SI-NEXT:    v_bfe_i32 v1, v5, 0, 16
+; SI-NEXT:    v_bfe_i32 v2, v2, 0, 16
+; SI-NEXT:    v_bfe_i32 v3, v4, 0, 16
+; SI-NEXT:    v_mov_b32_e32 v4, 0xffff
+; SI-NEXT:    v_mov_b32_e32 v5, 0x8000
+; SI-NEXT:    v_mov_b32_e32 v6, 0xffff0000
+; SI-NEXT:    v_bfrev_b32_e32 v7, 1
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v4, v5, vcc
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v1
+; SI-NEXT:    v_cndmask_b32_e32 v1, v6, v7, vcc
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v2
+; SI-NEXT:    v_cndmask_b32_e32 v2, v4, v5, vcc
+; SI-NEXT:    v_cmp_lt_i32_e32 vcc, -1, v3
+; SI-NEXT:    v_cndmask_b32_e32 v3, v6, v7, vcc
+; SI-NEXT:    v_or_b32_e32 v0, v0, v1
+; SI-NEXT:    v_or_b32_e32 v2, v2, v3
+; SI-NEXT:    v_alignbit_b32 v1, v2, v1, 16
+; SI-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: extract_4xi16_2:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_cbranch_scc0 .LBB1_2
+; GFX9-NEXT:  ; %bb.1: ; %F
+; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_cbranch_execz .LBB1_3
+; GFX9-NEXT:    s_branch .LBB1_4
+; GFX9-NEXT:  .LBB1_2:
+; GFX9-NEXT:    s_mov_b32 s8, 0
+; GFX9-NEXT:    s_mov_b32 s9, s8
+; GFX9-NEXT:    s_mov_b32 s10, s8
+; GFX9-NEXT:    s_mov_b32 s11, s8
+; GFX9-NEXT:    v_mov_b32_e32 v2, s8
+; GFX9-NEXT:    v_mov_b32_e32 v3, s9
+; GFX9-NEXT:    v_mov_b32_e32 v4, s10
+; GFX9-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-NEXT:  .LBB1_3: ; %T
+; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:  .LBB1_4: ; %exit
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_pk_ashrrev_i16 v0, 15, v5 op_sel_hi:[0,1]
+; GFX9-NEXT:    s_movk_i32 s4, 0x8000
+; GFX9-NEXT:    v_or_b32_sdwa v1, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v2, s4, v0
+; GFX9-NEXT:    v_pk_ashrrev_i16 v0, 15, v4 op_sel_hi:[0,1]
+; GFX9-NEXT:    v_or_b32_sdwa v3, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v0, s4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
+; GFX9-NEXT:    v_and_b32_e32 v0, v4, v0
+; GFX9-NEXT:    v_and_b32_e32 v2, v4, v2
+; GFX9-NEXT:    v_lshl_or_b32 v0, v3, 16, v0
+; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v2
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  br i1 undef, label %T, label %F
+
+T:
+  %t = load volatile <8 x i16>, <8 x i16> addrspace(1) * %p0
+  br label %exit
+
+F:
+  %f = load volatile <8 x i16>, <8 x i16> addrspace(1) * %p1
+  br label %exit
+
+exit:
+  %m = phi <8 x i16> [ %t, %T ], [ %f, %F ]
+  %v2 = shufflevector <8 x i16> %m, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %b2 = icmp sgt <4 x i16> %v2, <i16 -1, i16 -1, i16 -1, i16 -1>
+  %r2 = select <4 x i1> %b2, <4 x i16> <i16 -32768, i16 -32768, i16 -32768, i16 -32768>, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>
+  ret <4 x i16> %r2
+}
+
+define <4 x half> @extract_4xf16(<8 x half> addrspace(1) * %p0, <8 x half> addrspace(1) * %p1) {
+; SI-LABEL: extract_4xf16:
+; SI:       ; %bb.0:
+; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT:    s_cbranch_scc0 .LBB2_2
+; SI-NEXT:  ; %bb.1: ; %F
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    buffer_load_ushort v4, v[2:3], s[4:7], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v5, v[2:3], s[4:7], 0 addr64 offset:2 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[2:3], s[4:7], 0 addr64 offset:4 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v7, v[2:3], s[4:7], 0 addr64 offset:6 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:8 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:10 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v8, v[2:3], s[4:7], 0 addr64 offset:12 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v2, v[2:3], s[4:7], 0 addr64 offset:14 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v7
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
+; SI-NEXT:    v_or_b32_e32 v2, v6, v2
+; SI-NEXT:    v_or_b32_e32 v4, v4, v3
+; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-NEXT:    v_cvt_f32_f16_e32 v3, v4
+; SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
+; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
+; SI-NEXT:    s_mov_b64 vcc, exec
+; SI-NEXT:    s_cbranch_execz .LBB2_3
+; SI-NEXT:    s_branch .LBB2_4
+; SI-NEXT:  .LBB2_2:
+; SI-NEXT:    ; implicit-def: $vgpr3
+; SI-NEXT:    ; implicit-def: $vgpr4
+; SI-NEXT:    ; implicit-def: $vgpr2
+; SI-NEXT:    s_mov_b64 vcc, 0
+; SI-NEXT:  .LBB2_3: ; %T
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s4, s6
+; SI-NEXT:    s_mov_b32 s5, s6
+; SI-NEXT:    buffer_load_ushort v2, v[0:1], s[4:7], 0 addr64 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v3, v[0:1], s[4:7], 0 addr64 offset:2 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v4, v[0:1], s[4:7], 0 addr64 offset:4 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:6 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:8 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:10 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:12 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:14 glc
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_lshlrev_b32_e32 v0, 16, v5
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
+; SI-NEXT:    v_or_b32_e32 v0, v4, v0
+; SI-NEXT:    v_or_b32_e32 v1, v2, v1
+; SI-NEXT:    v_cvt_f32_f16_e32 v2, v0
+; SI-NEXT:    v_cvt_f32_f16_e32 v3, v1
+; SI-NEXT:    v_lshrrev_b32_e32 v0, 16, v1
+; SI-NEXT:    v_cvt_f32_f16_e32 v4, v0
+; SI-NEXT:  .LBB2_4: ; %exit
+; SI-NEXT:    v_cvt_f16_f32_e32 v0, v3
+; SI-NEXT:    v_cvt_f16_f32_e32 v1, v4
+; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-NEXT:    v_mov_b32_e32 v3, 0x3fa00000
+; SI-NEXT:    v_mov_b32_e32 v4, 0x3f200000
+; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; SI-NEXT:    v_cmp_nge_f32_e32 vcc, 0.5, v0
+; SI-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
+; SI-NEXT:    v_cmp_nge_f32_e32 vcc, 0.5, v1
+; SI-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; SI-NEXT:    v_cmp_nge_f32_e32 vcc, 0.5, v2
+; SI-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
+; SI-NEXT:    v_mov_b32_e32 v3, v2
+; SI-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: extract_4xf16:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    s_cbranch_scc0 .LBB2_2
+; GFX9-NEXT:  ; %bb.1: ; %F
+; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[2:3], off glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    s_cbranch_execz .LBB2_3
+; GFX9-NEXT:    s_branch .LBB2_4
+; GFX9-NEXT:  .LBB2_2:
+; GFX9-NEXT:    s_mov_b32 s8, 0
+; GFX9-NEXT:    s_mov_b32 s9, s8
+; GFX9-NEXT:    s_mov_b32 s10, s8
+; GFX9-NEXT:    s_mov_b32 s11, s8
+; GFX9-NEXT:    v_mov_b32_e32 v2, s8
+; GFX9-NEXT:    v_mov_b32_e32 v3, s9
+; GFX9-NEXT:    v_mov_b32_e32 v4, s10
+; GFX9-NEXT:    v_mov_b32_e32 v5, s11
+; GFX9-NEXT:  .LBB2_3: ; %T
+; GFX9-NEXT:    global_load_dwordx4 v[2:5], v[0:1], off glc
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:  .LBB2_4: ; %exit
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v3
+; GFX9-NEXT:    v_lshl_or_b32 v0, v3, 16, v0
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3800
+; GFX9-NEXT:    v_mov_b32_e32 v3, 0x3900
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0x3d00
+; GFX9-NEXT:    v_cmp_ge_f16_e32 vcc, 0.5, v0
+; GFX9-NEXT:    v_cndmask_b32_e32 v5, v3, v4, vcc
+; GFX9-NEXT:    v_cmp_nle_f16_sdwa vcc, v0, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e32 v6, v4, v3, vcc
+; GFX9-NEXT:    v_cmp_ge_f16_e32 vcc, 0.5, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v4, vcc
+; GFX9-NEXT:    v_cmp_le_f16_sdwa vcc, v2, v1 src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v3, v4, vcc
+; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
+; GFX9-NEXT:    v_pack_b32_f16 v1, v5, v6
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
+  br i1 undef, label %T, label %F
+
+T:
+  %t = load volatile <8 x half>, <8 x half> addrspace(1) * %p0
+  br label %exit
+
+F:
+  %f = load volatile <8 x half>, <8 x half> addrspace(1) * %p1
+  br label %exit
+
+exit:
+  %m = phi <8 x half> [ %t, %T ], [ %f, %F ]
+  %v2 = shufflevector <8 x half> %m, <8 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
+  %b2 = fcmp ugt <4 x half> %v2, <half 0xH3800, half 0xH3800, half 0xH3800, half 0xH3800>
+  %r2 = select <4 x i1> %b2, <4 x half> <half 0xH3900, half 0xH3900, half 0xH3900, half 0xH3900>, <4 x half> <half 0xH3D00, half 0xH3D00, half 0xH3D00, half 0xH3D00>
+  ret <4 x half> %r2
+}
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
index 090a1e2674556..6fad3653e475e 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-f16.ll
@@ -166,6 +166,19 @@ define amdgpu_kernel void @reduce_load_vector_v8f16_extract_23(<16 x half> addrs
   ret void
 }
 
+; GCN-LABEL: {{^}}v_extractelement_v8f16_dynamic_sgpr:
+; GCN-COUNT-7: v_cndmask_b32_e32
+define amdgpu_kernel void @v_extractelement_v8f16_dynamic_sgpr(half addrspace(1)* %out, <8 x half> addrspace(1)* %in, i32 %n) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x half>, <8 x half> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x half>, <8 x half> addrspace(1)* %in.gep
+  %vec.extract = extractelement <8 x half> %vec, i32 %n
+  store half %vec.extract, half addrspace(1)* %out.gep
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
index 3f7be164a0b6d..133c7e3b07875 100644
--- a/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/extract_vector_elt-i16.ll
@@ -166,6 +166,55 @@ define amdgpu_kernel void @reduce_load_vector_v8i16_extract_23(<16 x i16> addrsp
   ret void
 }
 
+; GCN-LABEL: {{^}}v_extractelement_v8i16_2:
+; SI: buffer_load_dword [[RES:v[0-9]+]], v[{{[0-9:]+}}], s[{{[0-9:]+}}], 0 addr64 offset:4
+; SI: buffer_store_short [[RES]]
+; VI: flat_load_dword [[RES:v[0-9]+]]
+; VI: flat_store_short v[{{[0-9:]+}}], [[RES]]
+; GFX9: global_load_dword [[RES:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9:]+}}] offset:4
+; GFX9: global_store_short v{{[0-9]+}}, [[RES]]
+define amdgpu_kernel void @v_extractelement_v8i16_2(i16 addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x i16>, <8 x i16> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep
+  %vec.extract = extractelement <8 x i16> %vec, i32 2
+  store i16 %vec.extract, i16 addrspace(1)* %out.gep
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_extractelement_v8i16_6:
+; SI: buffer_load_dword [[RES:v[0-9]+]], v[{{[0-9:]+}}], s[{{[0-9:]+}}], 0 addr64 offset:12
+; SI: buffer_store_short [[RES]]
+; VI: flat_load_dword [[RES:v[0-9]+]]
+; VI: flat_store_short v[{{[0-9:]+}}], [[RES]]
+; GFX9: global_load_dword [[RES:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9:]+}}] offset:12
+; GFX9: global_store_short v{{[0-9]+}}, [[RES]]
+define amdgpu_kernel void @v_extractelement_v8i16_6(i16 addrspace(1)* %out, <8 x i16> addrspace(1)* %in) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x i16>, <8 x i16> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep
+  %vec.extract = extractelement <8 x i16> %vec, i32 6
+  store i16 %vec.extract, i16 addrspace(1)* %out.gep
+  ret void
+}
+
+; GCN-LABEL: {{^}}v_extractelement_v8i16_dynamic_sgpr:
+; GCN-COUNT-7: v_cndmask_b32_e32
+define amdgpu_kernel void @v_extractelement_v8i16_dynamic_sgpr(i16 addrspace(1)* %out, <8 x i16> addrspace(1)* %in, i32 %n) #0 {
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x i16>, <8 x i16> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds i16, i16 addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep
+  %vec.extract = extractelement <8 x i16> %vec, i32 %n
+  store i16 %vec.extract, i16 addrspace(1)* %out.gep
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll
index 22402597a192f..ff0171095b842 100644
--- a/llvm/test/CodeGen/AMDGPU/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll
@@ -388,9 +388,7 @@ define <4 x half> @v4f16_func_void() #0 {
 ; FIXME: Mixing buffer and global
 ; FIXME: Should not scalarize
 ; GCN-LABEL: {{^}}v5i16_func_void:
-; GFX9: buffer_load_dwordx2 v[0:1]
-; GFX9-NEXT: s_nop 0
-; GFX9-NEXT: global_load_short_d16 v2
+; GFX9: buffer_load_dwordx4 v[0:3]
 ; GFX9-NEXT: s_waitcnt
 ; GFX9-NEXT: s_setpc_b64
 define <5 x i16> @v5i16_func_void() #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/idot8s.ll b/llvm/test/CodeGen/AMDGPU/idot8s.ll
index d3153d12641c0..8563d321c83ae 100644
--- a/llvm/test/CodeGen/AMDGPU/idot8s.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot8s.ll
@@ -2289,58 +2289,58 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_add_u32 s8, s8, s3
 ; GFX8-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX8-NEXT:    s_waitcnt vmcnt(2)
-; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 4, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v7, 8, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 12, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 20, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 28, v3
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v16, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
-; GFX8-NEXT:    s_waitcnt vmcnt(1)
-; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 4, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 8, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 12, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v14, 20, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v15, 28, v2
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v17, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 28, v3
+; GFX8-NEXT:    v_lshlrev_b16_sdwa v7, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 20, v3
+; GFX8-NEXT:    v_lshlrev_b16_sdwa v9, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 12, v3
+; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 8, v3
+; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 4, v3
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v3, 12, v3
-; GFX8-NEXT:    v_lshlrev_b16_sdwa v18, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX8-NEXT:    s_waitcnt vmcnt(1)
+; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 28, v2
+; GFX8-NEXT:    v_lshlrev_b16_sdwa v14, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX8-NEXT:    v_lshrrev_b32_e32 v15, 20, v2
 ; GFX8-NEXT:    v_lshlrev_b16_sdwa v5, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX8-NEXT:    v_lshrrev_b32_e32 v16, 12, v2
+; GFX8-NEXT:    v_lshrrev_b32_e32 v17, 8, v2
+; GFX8-NEXT:    v_lshrrev_b32_e32 v18, 4, v2
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v2, 12, v2
-; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 12, v6
+; GFX8-NEXT:    v_lshlrev_b16_e32 v12, 12, v12
 ; GFX8-NEXT:    v_ashrrev_i16_e32 v3, 12, v3
-; GFX8-NEXT:    v_lshlrev_b16_e32 v11, 12, v11
+; GFX8-NEXT:    v_lshlrev_b16_e32 v18, 12, v18
 ; GFX8-NEXT:    v_ashrrev_i16_e32 v2, 12, v2
-; GFX8-NEXT:    v_lshlrev_b16_e32 v7, 12, v7
-; GFX8-NEXT:    v_lshlrev_b16_e32 v12, 12, v12
-; GFX8-NEXT:    v_ashrrev_i16_e32 v6, 12, v6
-; GFX8-NEXT:    v_ashrrev_i16_e32 v11, 12, v11
+; GFX8-NEXT:    v_lshlrev_b16_e32 v11, 12, v11
+; GFX8-NEXT:    v_lshlrev_b16_e32 v17, 12, v17
+; GFX8-NEXT:    v_ashrrev_i16_e32 v12, 12, v12
+; GFX8-NEXT:    v_ashrrev_i16_e32 v18, 12, v18
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_mad_u16 v2, v3, v2, v4
-; GFX8-NEXT:    v_lshlrev_b16_e32 v8, 12, v8
-; GFX8-NEXT:    v_lshlrev_b16_e32 v13, 12, v13
-; GFX8-NEXT:    v_ashrrev_i16_e32 v7, 12, v7
-; GFX8-NEXT:    v_ashrrev_i16_e32 v12, 12, v12
-; GFX8-NEXT:    v_mad_u16 v2, v6, v11, v2
-; GFX8-NEXT:    v_ashrrev_i16_e32 v8, 12, v8
-; GFX8-NEXT:    v_ashrrev_i16_e32 v13, 12, v13
-; GFX8-NEXT:    v_mad_u16 v2, v7, v12, v2
-; GFX8-NEXT:    v_lshlrev_b16_e32 v9, 12, v9
-; GFX8-NEXT:    v_ashrrev_i16_e32 v17, 12, v17
-; GFX8-NEXT:    v_lshlrev_b16_e32 v14, 12, v14
-; GFX8-NEXT:    v_ashrrev_i16_e32 v5, 12, v5
-; GFX8-NEXT:    v_mad_u16 v2, v8, v13, v2
-; GFX8-NEXT:    v_ashrrev_i16_e32 v9, 12, v9
-; GFX8-NEXT:    v_ashrrev_i16_e32 v14, 12, v14
-; GFX8-NEXT:    v_mad_u16 v2, v17, v5, v2
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v10, 12, v10
+; GFX8-NEXT:    v_lshlrev_b16_e32 v16, 12, v16
+; GFX8-NEXT:    v_ashrrev_i16_e32 v11, 12, v11
+; GFX8-NEXT:    v_ashrrev_i16_e32 v17, 12, v17
+; GFX8-NEXT:    v_mad_u16 v2, v12, v18, v2
+; GFX8-NEXT:    v_ashrrev_i16_e32 v10, 12, v10
 ; GFX8-NEXT:    v_ashrrev_i16_e32 v16, 12, v16
+; GFX8-NEXT:    v_mad_u16 v2, v11, v17, v2
+; GFX8-NEXT:    v_lshlrev_b16_e32 v8, 12, v8
+; GFX8-NEXT:    v_ashrrev_i16_e32 v9, 12, v9
 ; GFX8-NEXT:    v_lshlrev_b16_e32 v15, 12, v15
-; GFX8-NEXT:    v_ashrrev_i16_e32 v18, 12, v18
-; GFX8-NEXT:    v_mad_u16 v2, v9, v14, v2
-; GFX8-NEXT:    v_ashrrev_i16_e32 v10, 12, v10
+; GFX8-NEXT:    v_ashrrev_i16_e32 v5, 12, v5
+; GFX8-NEXT:    v_mad_u16 v2, v10, v16, v2
+; GFX8-NEXT:    v_ashrrev_i16_e32 v8, 12, v8
 ; GFX8-NEXT:    v_ashrrev_i16_e32 v15, 12, v15
-; GFX8-NEXT:    v_mad_u16 v2, v16, v18, v2
-; GFX8-NEXT:    v_mad_u16 v2, v10, v15, v2
+; GFX8-NEXT:    v_mad_u16 v2, v9, v5, v2
+; GFX8-NEXT:    v_lshlrev_b16_e32 v6, 12, v6
+; GFX8-NEXT:    v_ashrrev_i16_e32 v7, 12, v7
+; GFX8-NEXT:    v_lshlrev_b16_e32 v13, 12, v13
+; GFX8-NEXT:    v_ashrrev_i16_e32 v14, 12, v14
+; GFX8-NEXT:    v_mad_u16 v2, v8, v15, v2
+; GFX8-NEXT:    v_ashrrev_i16_e32 v6, 12, v6
+; GFX8-NEXT:    v_ashrrev_i16_e32 v13, 12, v13
+; GFX8-NEXT:    v_mad_u16 v2, v7, v14, v2
+; GFX8-NEXT:    v_mad_u16 v2, v6, v13, v2
 ; GFX8-NEXT:    flat_store_short v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
 ;
@@ -2354,7 +2354,7 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
+; GFX9-NEXT:    v_mov_b32_e32 v4, 12
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[4:5]
 ; GFX9-NEXT:    global_load_dword v2, v0, s[6:7]
@@ -2362,68 +2362,70 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    global_load_ushort v3, v0, s[2:3]
 ; GFX9-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 28, v1
-; GFX9-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX9-NEXT:    v_bfe_u32 v7, v1, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v8, v1, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v9, v1, 12, 4
-; GFX9-NEXT:    v_bfe_u32 v10, v1, 8, 4
-; GFX9-NEXT:    v_bfe_u32 v11, v1, 4, 4
-; GFX9-NEXT:    v_and_b32_e32 v1, 15, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 4, v1
+; GFX9-NEXT:    v_lshlrev_b16_e32 v6, 12, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 12, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 8, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 20, v1
+; GFX9-NEXT:    v_lshlrev_b16_sdwa v10, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_lshrrev_b32_e32 v12, 28, v2
-; GFX9-NEXT:    v_bfe_u32 v13, v2, 24, 4
-; GFX9-NEXT:    v_bfe_u32 v14, v2, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v15, v2, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v16, v2, 12, 4
-; GFX9-NEXT:    v_bfe_u32 v17, v2, 8, 4
-; GFX9-NEXT:    v_bfe_u32 v18, v2, 4, 4
-; GFX9-NEXT:    v_and_b32_e32 v2, 15, v2
-; GFX9-NEXT:    v_and_b32_e32 v1, v4, v1
-; GFX9-NEXT:    v_and_b32_e32 v2, v4, v2
-; GFX9-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
-; GFX9-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
-; GFX9-NEXT:    v_and_b32_e32 v10, v4, v10
-; GFX9-NEXT:    v_and_b32_e32 v6, v4, v6
-; GFX9-NEXT:    v_and_b32_e32 v17, v4, v17
-; GFX9-NEXT:    v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_lshlrev_b16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
-; GFX9-NEXT:    v_lshl_or_b32 v5, v5, 16, v6
-; GFX9-NEXT:    v_lshl_or_b32 v6, v16, 16, v17
-; GFX9-NEXT:    v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_and_b32_e32 v8, v4, v8
-; GFX9-NEXT:    v_and_b32_e32 v15, v4, v15
-; GFX9-NEXT:    v_and_b32_e32 v4, v4, v13
-; GFX9-NEXT:    v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_lshlrev_b16 v6, 12, v6 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
-; GFX9-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
-; GFX9-NEXT:    v_lshl_or_b32 v8, v14, 16, v15
-; GFX9-NEXT:    v_lshl_or_b32 v4, v12, 16, v4
-; GFX9-NEXT:    v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_ashrrev_i16 v6, 12, v6 op_sel_hi:[0,1]
+; GFX9-NEXT:    v_lshrrev_b32_e32 v12, 4, v2
+; GFX9-NEXT:    v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX9-NEXT:    v_lshlrev_b16_e32 v13, 12, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v14, 12, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v15, 8, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v16, 20, v2
+; GFX9-NEXT:    v_lshlrev_b16_sdwa v17, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX9-NEXT:    v_lshlrev_b16_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX9-NEXT:    v_lshlrev_b16_e32 v4, 12, v5
+; GFX9-NEXT:    v_ashrrev_i16_e32 v5, 12, v6
+; GFX9-NEXT:    v_lshlrev_b16_e32 v6, 12, v7
+; GFX9-NEXT:    v_lshlrev_b16_e32 v7, 12, v8
+; GFX9-NEXT:    v_lshlrev_b16_e32 v8, 12, v9
+; GFX9-NEXT:    v_ashrrev_i16_e32 v9, 12, v10
+; GFX9-NEXT:    v_lshlrev_b16_e32 v10, 12, v11
+; GFX9-NEXT:    v_lshlrev_b16_e32 v11, 12, v12
+; GFX9-NEXT:    v_ashrrev_i16_e32 v12, 12, v13
+; GFX9-NEXT:    v_ashrrev_i16_e32 v4, 12, v4
+; GFX9-NEXT:    v_ashrrev_i16_e32 v6, 12, v6
+; GFX9-NEXT:    v_ashrrev_i16_e32 v7, 12, v7
+; GFX9-NEXT:    v_ashrrev_i16_e32 v11, 12, v11
+; GFX9-NEXT:    v_lshlrev_b16_e32 v13, 12, v14
+; GFX9-NEXT:    v_lshlrev_b16_e32 v14, 12, v15
+; GFX9-NEXT:    v_lshl_or_b32 v6, v6, 16, v7
+; GFX9-NEXT:    v_lshl_or_b32 v7, v11, 16, v12
+; GFX9-NEXT:    v_lshl_or_b32 v4, v4, 16, v5
+; GFX9-NEXT:    v_ashrrev_i16_e32 v8, 12, v8
+; GFX9-NEXT:    v_ashrrev_i16_e32 v13, 12, v13
+; GFX9-NEXT:    v_ashrrev_i16_e32 v14, 12, v14
+; GFX9-NEXT:    v_pk_mul_lo_u16 v4, v4, v7
+; GFX9-NEXT:    v_lshlrev_b16_e32 v15, 12, v16
+; GFX9-NEXT:    v_ashrrev_i16_e32 v16, 12, v17
+; GFX9-NEXT:    v_lshlrev_b16_e32 v17, 12, v18
+; GFX9-NEXT:    v_lshl_or_b32 v8, v8, 16, v9
+; GFX9-NEXT:    v_lshl_or_b32 v9, v13, 16, v14
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_u16_e32 v2, v1, v3
-; GFX9-NEXT:    v_pk_lshlrev_b16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_lshlrev_b16 v5, 12, v5 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_lshlrev_b16 v8, 12, v8 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_lshlrev_b16 v4, 12, v4 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_mul_lo_u16 v6, v9, v6
+; GFX9-NEXT:    v_add_u16_e32 v3, v4, v3
+; GFX9-NEXT:    v_ashrrev_i16_e32 v1, 12, v1
+; GFX9-NEXT:    v_ashrrev_i16_e32 v2, 12, v2
+; GFX9-NEXT:    v_ashrrev_i16_e32 v10, 12, v10
+; GFX9-NEXT:    v_ashrrev_i16_e32 v15, 12, v15
+; GFX9-NEXT:    v_ashrrev_i16_e32 v17, 12, v17
+; GFX9-NEXT:    v_pk_mul_lo_u16 v5, v6, v9
+; GFX9-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshl_or_b32 v2, v17, 16, v2
+; GFX9-NEXT:    v_lshl_or_b32 v1, v10, 16, v1
+; GFX9-NEXT:    v_lshl_or_b32 v10, v15, 16, v16
+; GFX9-NEXT:    v_add_u16_e32 v3, v3, v5
+; GFX9-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX9-NEXT:    v_pk_mul_lo_u16 v2, v8, v10
+; GFX9-NEXT:    v_add_u16_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_add_u16_e32 v3, v3, v2
+; GFX9-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_add_u16_e32 v2, v2, v1
 ; GFX9-NEXT:    v_add_u16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_pk_ashrrev_i16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_ashrrev_i16 v5, 12, v5 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_ashrrev_i16 v8, 12, v8 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_pk_ashrrev_i16 v4, 12, v4 op_sel_hi:[0,1]
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v6
-; GFX9-NEXT:    v_pk_mul_lo_u16 v4, v5, v4
-; GFX9-NEXT:    v_pk_mul_lo_u16 v5, v7, v8
-; GFX9-NEXT:    v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v5
-; GFX9-NEXT:    v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v4
-; GFX9-NEXT:    v_add_u16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -2437,7 +2439,7 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-DL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
 ; GFX9-DL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-DL-NEXT:    v_mov_b32_e32 v4, 0xffff
+; GFX9-DL-NEXT:    v_mov_b32_e32 v4, 12
 ; GFX9-DL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DL-NEXT:    global_load_dword v1, v0, s[4:5]
 ; GFX9-DL-NEXT:    global_load_dword v2, v0, s[6:7]
@@ -2445,68 +2447,70 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    global_load_ushort v3, v0, s[2:3]
 ; GFX9-DL-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v5, 28, v1
-; GFX9-DL-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v7, v1, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v8, v1, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 12, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v10, v1, 8, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v11, v1, 4, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v1, 15, v1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v5, 4, v1
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v6, 12, v1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v7, 12, v1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v8, 8, v1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v9, 20, v1
+; GFX9-DL-NEXT:    v_lshlrev_b16_sdwa v10, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v12, 28, v2
-; GFX9-DL-NEXT:    v_bfe_u32 v13, v2, 24, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v14, v2, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v15, v2, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 12, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v17, v2, 8, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v18, v2, 4, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v2, 15, v2
-; GFX9-DL-NEXT:    v_and_b32_e32 v1, v4, v1
-; GFX9-DL-NEXT:    v_and_b32_e32 v2, v4, v2
-; GFX9-DL-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
-; GFX9-DL-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
-; GFX9-DL-NEXT:    v_and_b32_e32 v10, v4, v10
-; GFX9-DL-NEXT:    v_and_b32_e32 v6, v4, v6
-; GFX9-DL-NEXT:    v_and_b32_e32 v17, v4, v17
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
-; GFX9-DL-NEXT:    v_lshl_or_b32 v5, v5, 16, v6
-; GFX9-DL-NEXT:    v_lshl_or_b32 v6, v16, 16, v17
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_and_b32_e32 v8, v4, v8
-; GFX9-DL-NEXT:    v_and_b32_e32 v15, v4, v15
-; GFX9-DL-NEXT:    v_and_b32_e32 v4, v4, v13
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v6, 12, v6 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
-; GFX9-DL-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
-; GFX9-DL-NEXT:    v_lshl_or_b32 v8, v14, 16, v15
-; GFX9-DL-NEXT:    v_lshl_or_b32 v4, v12, 16, v4
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v6, 12, v6 op_sel_hi:[0,1]
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v12, 4, v2
+; GFX9-DL-NEXT:    v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v13, 12, v2
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v14, 12, v2
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v15, 8, v2
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v16, 20, v2
+; GFX9-DL-NEXT:    v_lshlrev_b16_sdwa v17, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX9-DL-NEXT:    v_lshlrev_b16_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v4, 12, v5
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v5, 12, v6
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v6, 12, v7
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v7, 12, v8
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v8, 12, v9
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v9, 12, v10
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v10, 12, v11
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v11, 12, v12
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v12, 12, v13
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v4, 12, v4
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v6, 12, v6
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v7, 12, v7
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v11, 12, v11
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v13, 12, v14
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v14, 12, v15
+; GFX9-DL-NEXT:    v_lshl_or_b32 v6, v6, 16, v7
+; GFX9-DL-NEXT:    v_lshl_or_b32 v7, v11, 16, v12
+; GFX9-DL-NEXT:    v_lshl_or_b32 v4, v4, 16, v5
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v8, 12, v8
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v13, 12, v13
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v14, 12, v14
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v4, v4, v7
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v15, 12, v16
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v16, 12, v17
+; GFX9-DL-NEXT:    v_lshlrev_b16_e32 v17, 12, v18
+; GFX9-DL-NEXT:    v_lshl_or_b32 v8, v8, 16, v9
+; GFX9-DL-NEXT:    v_lshl_or_b32 v9, v13, 16, v14
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-DL-NEXT:    v_add_u16_e32 v2, v1, v3
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v5, 12, v5 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v8, 12, v8 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_lshlrev_b16 v4, 12, v4 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v6, v9, v6
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v4, v3
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v1, 12, v1
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v2, 12, v2
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v10, 12, v10
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v15, 12, v15
+; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v17, 12, v17
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v5, v6, v9
+; GFX9-DL-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_lshl_or_b32 v2, v17, 16, v2
+; GFX9-DL-NEXT:    v_lshl_or_b32 v1, v10, 16, v1
+; GFX9-DL-NEXT:    v_lshl_or_b32 v10, v15, 16, v16
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v5
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v2, v8, v10
+; GFX9-DL-NEXT:    v_add_u16_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v2
+; GFX9-DL-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_add_u16_e32 v2, v2, v1
 ; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v5, 12, v5 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v8, 12, v8 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_pk_ashrrev_i16 v4, 12, v4 op_sel_hi:[0,1]
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v6
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v4, v5, v4
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v5, v7, v8
-; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v5
-; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v4
-; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-DL-NEXT:    global_store_short v0, v1, s[2:3]
 ; GFX9-DL-NEXT:    s_endpgm
 ;
@@ -2529,71 +2533,85 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX10-DL-XNACK-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-DL-XNACK-NEXT:    global_load_ushort v3, v0, s[0:1]
 ; GFX10-DL-XNACK-NEXT:    s_waitcnt vmcnt(2)
-; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v5, 28, v1
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v7, v1, 20, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v8, v1, 16, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v9, v1, 12, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v10, v1, 8, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v11, v1, 4, 4
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v1, 15, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v5, 4, v1
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v6, 12, v1
 ; GFX10-DL-XNACK-NEXT:    s_waitcnt vmcnt(1)
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v13, 15, v2
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v16, v2, 4, 4
-; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v12, 28, v2
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v14, v2, 24, 4
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v12, 4, v2
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v13, 12, v2
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v8, 8, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v15, 8, v2
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v5, 12, v5
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v6, 12, v6
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v12, 12, v12
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v13, 12, v13
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v7, 12, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v14, 12, v2
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v8, 12, v8
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v15, 12, v15
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v5, 12, v5
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v12, 12, v12
 ; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v13, v4, v13
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v15, v2, 20, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v17, v2, 16, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v18, v2, 12, 4
-; GFX10-DL-XNACK-NEXT:    v_bfe_u32 v2, v2, 8, 4
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v11, v16, 16, v13
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v6, v4, v6
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v17, 16, v2
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v7, 12, v7
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v14, 12, v14
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v8, 12, v8
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v15, 12, v15
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v12, v12, 16, v13
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v5, v5, 16, v6
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v9, 20, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v16, 20, v2
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v10, 12, v10
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v17, 12, v17
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v7, 12, v7
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v6, 12, v14
+; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v13, v4, v15
 ; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v8, v4, v8
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v2, v4, v2
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v11, 12, v11 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
+; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v5, v5, v12
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v2, 24, v2
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v9, 12, v9
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v16, 12, v16
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v10, 12, v10
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v14, 12, v17
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v6, v6, 16, v13
 ; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v8, 12, v11 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v10, v4, v17
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v1, v1, v8
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v8, v15, 16, v10
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v8, 16, v5
 ; GFX10-DL-XNACK-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v3
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v3, v4, v6
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v6, 12, v8 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v4, v4, v14
-; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v2, v9, v2
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v10
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v3, v5, 16, v3
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v5, 12, v6 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v4, v12, 16, v4
-; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v2
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v2, 12, v3 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_lshlrev_b16 v3, 12, v4 op_sel_hi:[0,1]
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v3, v5, v3
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v1, 12, v1
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v9, 12, v9
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v12, 12, v16
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v2, 12, v2
+; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v5, v4, v14
+; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v6, v7, v6
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v3, v3, v8
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v11, 12, v11
+; GFX10-DL-XNACK-NEXT:    v_lshlrev_b16 v18, 12, v18
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v1, 12, v1
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v2, 12, v2
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v5, v12, 16, v5
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v7, v9, 16, v10
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v3, v3, v6
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v11, 12, v11
+; GFX10-DL-XNACK-NEXT:    v_ashrrev_i16 v6, 12, v18
+; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v2, v4, v2
+; GFX10-DL-XNACK-NEXT:    v_and_b32_e32 v1, v4, v1
 ; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v4, v7, v5
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v6
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v2, 12, v2 op_sel_hi:[0,1]
-; GFX10-DL-XNACK-NEXT:    v_pk_ashrrev_i16 v3, 12, v3 op_sel_hi:[0,1]
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v3, v3, v8
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v2, v6, 16, v2
+; GFX10-DL-XNACK-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
 ; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v4
-; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v2, v2, v3
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v5
-; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v2
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v3, v3, v4
+; GFX10-DL-XNACK-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v2, v3, v5
+; GFX10-DL-XNACK-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v2, v1
 ; GFX10-DL-XNACK-NEXT:    v_add_nc_u16 v1, v1, v3
 ; GFX10-DL-XNACK-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-DL-XNACK-NEXT:    s_endpgm
@@ -2617,71 +2635,85 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX10-DL-NOXNACK-NEXT:    global_load_dword v0, v0, s[6:7]
 ; GFX10-DL-NOXNACK-NEXT:    global_load_ushort v3, v2, s[0:1]
 ; GFX10-DL-NOXNACK-NEXT:    s_waitcnt vmcnt(2)
-; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v5, 28, v1
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v7, v1, 20, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v8, v1, 16, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v9, v1, 12, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v10, v1, 8, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v11, v1, 4, 4
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v1, 15, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v5, 4, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v6, 12, v1
 ; GFX10-DL-NOXNACK-NEXT:    s_waitcnt vmcnt(1)
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v13, 15, v0
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v16, v0, 4, 4
-; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v12, 28, v0
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v14, v0, 24, 4
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v12, 4, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v13, 12, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v8, 8, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v15, 8, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v5, 12, v5
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v6, 12, v6
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v12, 12, v12
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v13, 12, v13
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v7, 12, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v14, 12, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v8, 12, v8
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v15, 12, v15
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v5, 12, v5
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v12, 12, v12
 ; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v13, v4, v13
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v15, v0, 20, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v17, v0, 16, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v18, v0, 12, 4
-; GFX10-DL-NOXNACK-NEXT:    v_bfe_u32 v0, v0, 8, 4
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v11, v16, 16, v13
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v6, v4, v6
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v17, 16, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v7, 12, v7
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v14, 12, v14
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v8, 12, v8
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v15, 12, v15
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v12, v12, 16, v13
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v5, v5, 16, v6
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v9, 20, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v16, 20, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v10, 12, v10
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v17, 12, v17
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v7, 12, v7
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v6, 12, v14
+; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v13, v4, v15
 ; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v8, v4, v8
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v0, v4, v0
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v11, 12, v11 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
+; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v5, v5, v12
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v1, 24, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v18, 28, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v0, 24, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v9, 12, v9
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v16, 12, v16
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v10, 12, v10
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v14, 12, v17
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v6, v6, 16, v13
 ; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v0, v18, 16, v0
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v8, 12, v11 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v10, v4, v17
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v0, 12, v0 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v1, v1, v8
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v8, v15, 16, v10
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v9, 12, v9 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v0, 12, v0 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v7, 12, v7 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v10, 16, v1
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v8, 16, v5
 ; GFX10-DL-NOXNACK-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v1, v1, v3
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v3, v4, v6
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v6, 12, v8 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v4, v4, v14
-; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v0, v9, v0
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v1, v1, v10
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v3, v5, 16, v3
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v5, 12, v6 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v4, v12, 16, v4
-; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v1, v0
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v1, 12, v3 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_lshlrev_b16 v3, 12, v4 op_sel_hi:[0,1]
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v3, v5, v3
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v1, 12, v1
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v9, 12, v9
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v12, 12, v16
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v0, 12, v0
+; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v5, v4, v14
+; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v6, v7, v6
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v3, v3, v8
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v11, 12, v11
+; GFX10-DL-NOXNACK-NEXT:    v_lshlrev_b16 v18, 12, v18
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v1, 12, v1
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v0, 12, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v5, v12, 16, v5
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v7, v9, 16, v10
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v8, 16, v6
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v3, v3, v6
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v11, 12, v11
+; GFX10-DL-NOXNACK-NEXT:    v_ashrrev_i16 v6, 12, v18
+; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v0, v4, v0
+; GFX10-DL-NOXNACK-NEXT:    v_and_b32_e32 v1, v4, v1
 ; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v4, v7, v5
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v0, v6
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v1, 12, v1 op_sel_hi:[0,1]
-; GFX10-DL-NOXNACK-NEXT:    v_pk_ashrrev_i16 v3, 12, v3 op_sel_hi:[0,1]
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v3, v3, v8
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v0, v6, 16, v0
+; GFX10-DL-NOXNACK-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
 ; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v0, v4
-; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v1, v1, v3
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v0, v5
-; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
-; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v0, v1
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v3, v3, v4
+; GFX10-DL-NOXNACK-NEXT:    v_pk_mul_lo_u16 v0, v1, v0
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v1, v3, v5
+; GFX10-DL-NOXNACK-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
+; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v1, v0
 ; GFX10-DL-NOXNACK-NEXT:    v_add_nc_u16 v0, v0, v3
 ; GFX10-DL-NOXNACK-NEXT:    global_store_short v2, v0, s[0:1]
 ; GFX10-DL-NOXNACK-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/idot8u.ll b/llvm/test/CodeGen/AMDGPU/idot8u.ll
index f26a6ebeaf8cc..0b99cb5cdc813 100644
--- a/llvm/test/CodeGen/AMDGPU/idot8u.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot8u.ll
@@ -2187,32 +2187,32 @@ define amdgpu_kernel void @udot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_add_u32 s8, s8, s3
 ; GFX8-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX8-NEXT:    s_waitcnt vmcnt(2)
-; GFX8-NEXT:    v_and_b32_e32 v5, 15, v3
-; GFX8-NEXT:    v_bfe_u32 v6, v3, 4, 4
-; GFX8-NEXT:    v_bfe_u32 v7, v3, 8, 4
-; GFX8-NEXT:    v_bfe_u32 v8, v3, 12, 4
-; GFX8-NEXT:    v_bfe_u32 v9, v3, 16, 4
-; GFX8-NEXT:    v_bfe_u32 v10, v3, 20, 4
+; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 28, v3
+; GFX8-NEXT:    v_bfe_u32 v6, v3, 24, 4
+; GFX8-NEXT:    v_bfe_u32 v7, v3, 20, 4
+; GFX8-NEXT:    v_bfe_u32 v8, v3, 16, 4
+; GFX8-NEXT:    v_bfe_u32 v9, v3, 12, 4
+; GFX8-NEXT:    v_bfe_u32 v10, v3, 8, 4
+; GFX8-NEXT:    v_bfe_u32 v11, v3, 4, 4
+; GFX8-NEXT:    v_and_b32_e32 v3, 15, v3
 ; GFX8-NEXT:    s_waitcnt vmcnt(1)
-; GFX8-NEXT:    v_and_b32_e32 v12, 15, v2
-; GFX8-NEXT:    v_bfe_u32 v13, v2, 4, 4
-; GFX8-NEXT:    v_bfe_u32 v14, v2, 8, 4
+; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 28, v2
+; GFX8-NEXT:    v_bfe_u32 v13, v2, 24, 4
+; GFX8-NEXT:    v_bfe_u32 v14, v2, 20, 4
+; GFX8-NEXT:    v_bfe_u32 v15, v2, 16, 4
+; GFX8-NEXT:    v_bfe_u32 v16, v2, 12, 4
+; GFX8-NEXT:    v_bfe_u32 v17, v2, 8, 4
+; GFX8-NEXT:    v_bfe_u32 v18, v2, 4, 4
+; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_mad_u16 v4, v5, v12, v4
-; GFX8-NEXT:    v_mad_u16 v4, v6, v13, v4
-; GFX8-NEXT:    v_bfe_u32 v15, v2, 12, 4
-; GFX8-NEXT:    v_mad_u16 v4, v7, v14, v4
-; GFX8-NEXT:    v_bfe_u32 v16, v2, 16, 4
-; GFX8-NEXT:    v_mad_u16 v4, v8, v15, v4
-; GFX8-NEXT:    v_bfe_u32 v17, v2, 20, 4
-; GFX8-NEXT:    v_mad_u16 v4, v9, v16, v4
-; GFX8-NEXT:    v_bfe_u32 v11, v3, 24, 4
-; GFX8-NEXT:    v_bfe_u32 v18, v2, 24, 4
-; GFX8-NEXT:    v_mad_u16 v4, v10, v17, v4
-; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 28, v3
-; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 28, v2
-; GFX8-NEXT:    v_mad_u16 v4, v11, v18, v4
 ; GFX8-NEXT:    v_mad_u16 v2, v3, v2, v4
+; GFX8-NEXT:    v_mad_u16 v2, v11, v18, v2
+; GFX8-NEXT:    v_mad_u16 v2, v10, v17, v2
+; GFX8-NEXT:    v_mad_u16 v2, v9, v16, v2
+; GFX8-NEXT:    v_mad_u16 v2, v8, v15, v2
+; GFX8-NEXT:    v_mad_u16 v2, v7, v14, v2
+; GFX8-NEXT:    v_mad_u16 v2, v6, v13, v2
+; GFX8-NEXT:    v_mad_u16 v2, v5, v12, v2
 ; GFX8-NEXT:    flat_store_short v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
 ;
@@ -2234,52 +2234,52 @@ define amdgpu_kernel void @udot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    global_load_ushort v3, v0, s[2:3]
 ; GFX9-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-NEXT:    v_bfe_u32 v5, v1, 24, 4
-; GFX9-NEXT:    v_bfe_u32 v7, v1, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v9, v1, 8, 4
-; GFX9-NEXT:    v_and_b32_e32 v11, 15, v1
+; GFX9-NEXT:    v_bfe_u32 v5, v1, 4, 4
+; GFX9-NEXT:    v_and_b32_e32 v6, 15, v1
+; GFX9-NEXT:    v_bfe_u32 v7, v1, 12, 4
+; GFX9-NEXT:    v_bfe_u32 v8, v1, 8, 4
+; GFX9-NEXT:    v_bfe_u32 v9, v1, 20, 4
+; GFX9-NEXT:    v_bfe_u32 v10, v1, 16, 4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
+; GFX9-NEXT:    v_bfe_u32 v1, v1, 24, 4
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_bfe_u32 v12, v2, 24, 4
-; GFX9-NEXT:    v_bfe_u32 v14, v2, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v16, v2, 8, 4
-; GFX9-NEXT:    v_and_b32_e32 v18, 15, v2
-; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 28, v1
-; GFX9-NEXT:    v_bfe_u32 v8, v1, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v10, v1, 12, 4
-; GFX9-NEXT:    v_bfe_u32 v1, v1, 4, 4
-; GFX9-NEXT:    v_lshrrev_b32_e32 v13, 28, v2
-; GFX9-NEXT:    v_bfe_u32 v15, v2, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v17, v2, 12, 4
-; GFX9-NEXT:    v_bfe_u32 v2, v2, 4, 4
-; GFX9-NEXT:    v_and_b32_e32 v12, v4, v12
-; GFX9-NEXT:    v_and_b32_e32 v5, v4, v5
-; GFX9-NEXT:    v_and_b32_e32 v14, v4, v14
-; GFX9-NEXT:    v_and_b32_e32 v7, v4, v7
-; GFX9-NEXT:    v_and_b32_e32 v16, v4, v16
-; GFX9-NEXT:    v_and_b32_e32 v9, v4, v9
-; GFX9-NEXT:    v_and_b32_e32 v18, v4, v18
-; GFX9-NEXT:    v_and_b32_e32 v4, v4, v11
-; GFX9-NEXT:    v_lshl_or_b32 v2, v2, 16, v18
-; GFX9-NEXT:    v_lshl_or_b32 v1, v1, 16, v4
-; GFX9-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
-; GFX9-NEXT:    v_lshl_or_b32 v11, v13, 16, v12
-; GFX9-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
-; GFX9-NEXT:    v_lshl_or_b32 v6, v15, 16, v14
-; GFX9-NEXT:    v_lshl_or_b32 v7, v8, 16, v7
-; GFX9-NEXT:    v_lshl_or_b32 v8, v17, 16, v16
-; GFX9-NEXT:    v_lshl_or_b32 v9, v10, 16, v9
+; GFX9-NEXT:    v_bfe_u32 v12, v2, 4, 4
+; GFX9-NEXT:    v_and_b32_e32 v13, 15, v2
+; GFX9-NEXT:    v_bfe_u32 v14, v2, 12, 4
+; GFX9-NEXT:    v_bfe_u32 v15, v2, 8, 4
+; GFX9-NEXT:    v_bfe_u32 v16, v2, 20, 4
+; GFX9-NEXT:    v_bfe_u32 v17, v2, 16, 4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX9-NEXT:    v_bfe_u32 v2, v2, 24, 4
+; GFX9-NEXT:    v_and_b32_e32 v2, v4, v2
+; GFX9-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX9-NEXT:    v_and_b32_e32 v17, v4, v17
+; GFX9-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX9-NEXT:    v_and_b32_e32 v15, v4, v15
+; GFX9-NEXT:    v_and_b32_e32 v8, v4, v8
+; GFX9-NEXT:    v_and_b32_e32 v13, v4, v13
+; GFX9-NEXT:    v_and_b32_e32 v4, v4, v6
+; GFX9-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
+; GFX9-NEXT:    v_lshl_or_b32 v8, v12, 16, v13
+; GFX9-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX9-NEXT:    v_pk_mul_lo_u16 v4, v4, v8
+; GFX9-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
+; GFX9-NEXT:    v_lshl_or_b32 v10, v14, 16, v15
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_u16_e32 v2, v1, v3
-; GFX9-NEXT:    v_pk_mul_lo_u16 v4, v5, v11
-; GFX9-NEXT:    v_pk_mul_lo_u16 v5, v7, v6
-; GFX9-NEXT:    v_pk_mul_lo_u16 v6, v9, v8
+; GFX9-NEXT:    v_add_u16_e32 v3, v4, v3
+; GFX9-NEXT:    v_pk_mul_lo_u16 v5, v7, v10
+; GFX9-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
+; GFX9-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
+; GFX9-NEXT:    v_lshl_or_b32 v6, v16, 16, v17
+; GFX9-NEXT:    v_add_u16_e32 v3, v3, v5
+; GFX9-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX9-NEXT:    v_pk_mul_lo_u16 v2, v9, v6
+; GFX9-NEXT:    v_add_u16_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_add_u16_e32 v3, v3, v2
+; GFX9-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_add_u16_e32 v2, v2, v1
 ; GFX9-NEXT:    v_add_u16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v6
-; GFX9-NEXT:    v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v5
-; GFX9-NEXT:    v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v4
-; GFX9-NEXT:    v_add_u16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    global_store_short v0, v1, s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -2301,52 +2301,52 @@ define amdgpu_kernel void @udot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    global_load_ushort v3, v0, s[2:3]
 ; GFX9-DL-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-DL-NEXT:    v_bfe_u32 v5, v1, 24, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v7, v1, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 8, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v11, 15, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v5, v1, 4, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v6, 15, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v7, v1, 12, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v8, v1, 8, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 20, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v10, v1, 16, 4
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v1, v1, 24, 4
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-DL-NEXT:    v_bfe_u32 v12, v2, 24, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v14, v2, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 8, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v18, 15, v2
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v6, 28, v1
-; GFX9-DL-NEXT:    v_bfe_u32 v8, v1, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v10, v1, 12, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v1, v1, 4, 4
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v13, 28, v2
-; GFX9-DL-NEXT:    v_bfe_u32 v15, v2, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v17, v2, 12, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v2, v2, 4, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v12, v4, v12
-; GFX9-DL-NEXT:    v_and_b32_e32 v5, v4, v5
-; GFX9-DL-NEXT:    v_and_b32_e32 v14, v4, v14
-; GFX9-DL-NEXT:    v_and_b32_e32 v7, v4, v7
-; GFX9-DL-NEXT:    v_and_b32_e32 v16, v4, v16
-; GFX9-DL-NEXT:    v_and_b32_e32 v9, v4, v9
-; GFX9-DL-NEXT:    v_and_b32_e32 v18, v4, v18
-; GFX9-DL-NEXT:    v_and_b32_e32 v4, v4, v11
-; GFX9-DL-NEXT:    v_lshl_or_b32 v2, v2, 16, v18
-; GFX9-DL-NEXT:    v_lshl_or_b32 v1, v1, 16, v4
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
-; GFX9-DL-NEXT:    v_lshl_or_b32 v11, v13, 16, v12
-; GFX9-DL-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
-; GFX9-DL-NEXT:    v_lshl_or_b32 v6, v15, 16, v14
-; GFX9-DL-NEXT:    v_lshl_or_b32 v7, v8, 16, v7
-; GFX9-DL-NEXT:    v_lshl_or_b32 v8, v17, 16, v16
-; GFX9-DL-NEXT:    v_lshl_or_b32 v9, v10, 16, v9
+; GFX9-DL-NEXT:    v_bfe_u32 v12, v2, 4, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v13, 15, v2
+; GFX9-DL-NEXT:    v_bfe_u32 v14, v2, 12, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v15, v2, 8, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 20, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v17, v2, 16, 4
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX9-DL-NEXT:    v_bfe_u32 v2, v2, 24, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v2, v4, v2
+; GFX9-DL-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX9-DL-NEXT:    v_and_b32_e32 v17, v4, v17
+; GFX9-DL-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX9-DL-NEXT:    v_and_b32_e32 v15, v4, v15
+; GFX9-DL-NEXT:    v_and_b32_e32 v8, v4, v8
+; GFX9-DL-NEXT:    v_and_b32_e32 v13, v4, v13
+; GFX9-DL-NEXT:    v_and_b32_e32 v4, v4, v6
+; GFX9-DL-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
+; GFX9-DL-NEXT:    v_lshl_or_b32 v8, v12, 16, v13
+; GFX9-DL-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v4, v4, v8
+; GFX9-DL-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
+; GFX9-DL-NEXT:    v_lshl_or_b32 v10, v14, 16, v15
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-DL-NEXT:    v_add_u16_e32 v2, v1, v3
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v4, v5, v11
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v5, v7, v6
-; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v6, v9, v8
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v4, v3
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v5, v7, v10
+; GFX9-DL-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
+; GFX9-DL-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
+; GFX9-DL-NEXT:    v_lshl_or_b32 v6, v16, 16, v17
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v5
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v2, v9, v6
+; GFX9-DL-NEXT:    v_add_u16_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v2
+; GFX9-DL-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_add_u16_e32 v2, v2, v1
 ; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v6
-; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v5
-; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v4
-; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-DL-NEXT:    global_store_short v0, v1, s[2:3]
 ; GFX9-DL-NEXT:    s_endpgm
 ;
@@ -2372,52 +2372,52 @@ define amdgpu_kernel void @udot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX10-DL-NEXT:    v_and_b32_e32 v7, 15, v1
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(1)
 ; GFX10-DL-NEXT:    v_and_b32_e32 v6, 15, v2
-; GFX10-DL-NEXT:    v_bfe_u32 v9, v1, 4, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v5, v1, 4, 4
 ; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 4, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v12, v1, 8, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v9, v1, 8, 4
 ; GFX10-DL-NEXT:    v_and_b32_e32 v7, v4, v7
 ; GFX10-DL-NEXT:    v_and_b32_e32 v6, v4, v6
 ; GFX10-DL-NEXT:    v_bfe_u32 v13, v2, 8, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v11, v1, 16, 4
-; GFX10-DL-NEXT:    v_and_b32_e32 v12, v4, v12
-; GFX10-DL-NEXT:    v_lshl_or_b32 v7, v9, 16, v7
+; GFX10-DL-NEXT:    v_bfe_u32 v8, v1, 12, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v9, v4, v9
+; GFX10-DL-NEXT:    v_lshl_or_b32 v5, v5, 16, v7
 ; GFX10-DL-NEXT:    v_lshl_or_b32 v6, v10, 16, v6
-; GFX10-DL-NEXT:    v_bfe_u32 v9, v1, 12, 4
 ; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 12, 4
 ; GFX10-DL-NEXT:    v_and_b32_e32 v13, v4, v13
-; GFX10-DL-NEXT:    v_bfe_u32 v5, v1, 24, 4
-; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v6, v7, v6
-; GFX10-DL-NEXT:    v_bfe_u32 v7, v2, 16, 4
-; GFX10-DL-NEXT:    v_lshl_or_b32 v9, v9, 16, v12
+; GFX10-DL-NEXT:    v_bfe_u32 v12, v1, 16, 4
+; GFX10-DL-NEXT:    v_lshl_or_b32 v8, v8, 16, v9
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v5, v5, v6
+; GFX10-DL-NEXT:    v_bfe_u32 v6, v2, 16, 4
 ; GFX10-DL-NEXT:    v_lshl_or_b32 v10, v10, 16, v13
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v8, 28, v1
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v12, 16, v6
+; GFX10-DL-NEXT:    v_bfe_u32 v11, v1, 20, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v12, v4, v12
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v6, v3
-; GFX10-DL-NEXT:    v_bfe_u32 v1, v1, 20, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v6, v2, 20, 4
-; GFX10-DL-NEXT:    v_and_b32_e32 v7, v4, v7
-; GFX10-DL-NEXT:    v_and_b32_e32 v11, v4, v11
-; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v9, v9, v10
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v12
-; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 24, 4
-; GFX10-DL-NEXT:    v_lshl_or_b32 v6, v6, 16, v7
-; GFX10-DL-NEXT:    v_lshl_or_b32 v1, v1, 16, v11
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v7, 16, v9
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v5, v3
+; GFX10-DL-NEXT:    v_bfe_u32 v5, v2, 20, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v6, v4, v6
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v8, v8, v10
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v7, 28, v1
 ; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v9
+; GFX10-DL-NEXT:    v_bfe_u32 v1, v1, 24, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v9, v2, 24, 4
+; GFX10-DL-NEXT:    v_lshl_or_b32 v5, v5, 16, v6
+; GFX10-DL-NEXT:    v_lshl_or_b32 v6, v11, 16, v12
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v8
 ; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v2, 28, v2
-; GFX10-DL-NEXT:    v_and_b32_e32 v9, v4, v10
-; GFX10-DL-NEXT:    v_and_b32_e32 v4, v4, v5
-; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v6
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v7
-; GFX10-DL-NEXT:    v_lshl_or_b32 v2, v2, 16, v9
-; GFX10-DL-NEXT:    v_lshl_or_b32 v4, v8, 16, v4
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
-; GFX10-DL-NEXT:    v_add_nc_u16 v1, v3, v1
-; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v2, v4, v2
-; GFX10-DL-NEXT:    v_add_nc_u16 v1, v1, v5
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; GFX10-DL-NEXT:    v_add_nc_u16 v1, v1, v2
+; GFX10-DL-NEXT:    v_and_b32_e32 v8, v4, v9
+; GFX10-DL-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v4, v6, v5
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v10
+; GFX10-DL-NEXT:    v_lshl_or_b32 v2, v2, 16, v8
+; GFX10-DL-NEXT:    v_lshl_or_b32 v1, v7, 16, v1
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v4
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX10-DL-NEXT:    v_add_nc_u16 v2, v3, v5
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
+; GFX10-DL-NEXT:    v_add_nc_u16 v1, v2, v1
 ; GFX10-DL-NEXT:    v_add_nc_u16 v1, v1, v3
 ; GFX10-DL-NEXT:    global_store_short v0, v1, s[0:1]
 ; GFX10-DL-NEXT:    s_endpgm
@@ -2575,52 +2575,52 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_add_u32 s8, s8, s3
 ; GFX8-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX8-NEXT:    s_waitcnt vmcnt(2)
-; GFX8-NEXT:    v_bfe_u32 v5, v3, 16, 4
-; GFX8-NEXT:    v_bfe_u32 v6, v3, 20, 4
-; GFX8-NEXT:    v_bfe_u32 v7, v3, 24, 4
-; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 28, v3
-; GFX8-NEXT:    v_bfe_u32 v9, v3, 8, 4
-; GFX8-NEXT:    v_bfe_u32 v10, v3, 12, 4
+; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 28, v3
+; GFX8-NEXT:    v_bfe_u32 v10, v3, 24, 4
+; GFX8-NEXT:    v_bfe_u32 v11, v3, 20, 4
+; GFX8-NEXT:    v_bfe_u32 v7, v3, 12, 4
+; GFX8-NEXT:    v_bfe_u32 v8, v3, 8, 4
+; GFX8-NEXT:    v_bfe_u32 v12, v3, 16, 4
 ; GFX8-NEXT:    s_waitcnt vmcnt(1)
-; GFX8-NEXT:    v_bfe_u32 v12, v2, 16, 4
-; GFX8-NEXT:    v_bfe_u32 v13, v2, 20, 4
-; GFX8-NEXT:    v_bfe_u32 v14, v2, 24, 4
-; GFX8-NEXT:    v_lshrrev_b32_e32 v15, 28, v2
-; GFX8-NEXT:    v_and_b32_e32 v11, 15, v3
-; GFX8-NEXT:    v_bfe_u32 v3, v3, 4, 4
-; GFX8-NEXT:    v_bfe_u32 v16, v2, 8, 4
-; GFX8-NEXT:    v_bfe_u32 v17, v2, 12, 4
-; GFX8-NEXT:    v_and_b32_e32 v18, 15, v2
-; GFX8-NEXT:    v_bfe_u32 v2, v2, 4, 4
-; GFX8-NEXT:    v_mul_lo_u16_e32 v19, v5, v12
-; GFX8-NEXT:    v_mul_lo_u16_sdwa v6, v6, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_mul_lo_u16_e32 v13, v7, v14
-; GFX8-NEXT:    v_mul_lo_u16_sdwa v8, v8, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_mul_lo_u16_e32 v9, v9, v16
-; GFX8-NEXT:    v_mul_lo_u16_sdwa v10, v10, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_mul_lo_u16_sdwa v15, v3, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_e32 v3, v19, v6
-; GFX8-NEXT:    v_or_b32_e32 v6, v13, v8
-; GFX8-NEXT:    v_or_b32_e32 v8, v9, v10
-; GFX8-NEXT:    v_lshlrev_b32_e32 v10, 16, v6
-; GFX8-NEXT:    v_mul_lo_u16_e32 v11, v11, v18
-; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v8
-; GFX8-NEXT:    v_or_b32_sdwa v3, v3, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_or_b32_e32 v9, v11, v15
-; GFX8-NEXT:    v_or_b32_e32 v10, v15, v2
-; GFX8-NEXT:    v_lshrrev_b32_e32 v11, 8, v3
+; GFX8-NEXT:    v_lshrrev_b32_e32 v16, 28, v2
+; GFX8-NEXT:    v_bfe_u32 v17, v2, 24, 4
+; GFX8-NEXT:    v_bfe_u32 v18, v2, 20, 4
+; GFX8-NEXT:    v_bfe_u32 v14, v2, 12, 4
+; GFX8-NEXT:    v_bfe_u32 v15, v2, 8, 4
+; GFX8-NEXT:    v_bfe_u32 v19, v2, 16, 4
+; GFX8-NEXT:    v_mul_lo_u16_sdwa v11, v11, v18 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_mul_lo_u16_e32 v18, v10, v17
+; GFX8-NEXT:    v_mul_lo_u16_sdwa v9, v9, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_bfe_u32 v5, v3, 4, 4
+; GFX8-NEXT:    v_and_b32_e32 v6, 15, v3
+; GFX8-NEXT:    v_bfe_u32 v3, v2, 4, 4
+; GFX8-NEXT:    v_and_b32_e32 v13, 15, v2
+; GFX8-NEXT:    v_mul_lo_u16_e32 v2, v12, v19
+; GFX8-NEXT:    v_mul_lo_u16_e32 v8, v8, v15
+; GFX8-NEXT:    v_mul_lo_u16_sdwa v7, v7, v14 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v9, v18, v9
+; GFX8-NEXT:    v_mul_lo_u16_sdwa v5, v5, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v3, v2, v11
+; GFX8-NEXT:    v_or_b32_e32 v7, v8, v7
+; GFX8-NEXT:    v_lshlrev_b32_e32 v8, 16, v9
+; GFX8-NEXT:    v_mul_lo_u16_e32 v6, v6, v13
+; GFX8-NEXT:    v_lshlrev_b32_e32 v2, 16, v7
+; GFX8-NEXT:    v_or_b32_sdwa v3, v3, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX8-NEXT:    v_or_b32_e32 v6, v6, v5
+; GFX8-NEXT:    v_or_b32_e32 v5, v5, v2
+; GFX8-NEXT:    v_lshrrev_b32_e32 v8, 8, v3
 ; GFX8-NEXT:    v_lshrrev_b64 v[2:3], 24, v[2:3]
-; GFX8-NEXT:    v_lshrrev_b32_e32 v10, 8, v10
+; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 8, v5
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_add_u16_e32 v3, v9, v4
-; GFX8-NEXT:    v_add_u16_e32 v3, v3, v10
-; GFX8-NEXT:    v_add_u16_e32 v3, v3, v8
+; GFX8-NEXT:    v_add_u16_e32 v3, v6, v4
+; GFX8-NEXT:    v_add_u16_e32 v3, v3, v5
+; GFX8-NEXT:    v_add_u16_e32 v3, v3, v7
 ; GFX8-NEXT:    v_add_u16_e32 v2, v3, v2
-; GFX8-NEXT:    v_mad_u16 v2, v5, v12, v2
-; GFX8-NEXT:    v_add_u16_e32 v2, v2, v11
-; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 8, v6
-; GFX8-NEXT:    v_mad_u16 v2, v7, v14, v2
-; GFX8-NEXT:    v_add_u16_e32 v2, v2, v6
+; GFX8-NEXT:    v_mad_u16 v2, v12, v19, v2
+; GFX8-NEXT:    v_add_u16_e32 v2, v2, v8
+; GFX8-NEXT:    v_lshrrev_b32_e32 v9, 8, v9
+; GFX8-NEXT:    v_mad_u16 v2, v10, v17, v2
+; GFX8-NEXT:    v_add_u16_e32 v2, v2, v9
 ; GFX8-NEXT:    flat_store_byte v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
 ;
@@ -2641,52 +2641,52 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    global_load_ubyte v4, v3, s[2:3]
 ; GFX9-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-NEXT:    v_bfe_u32 v0, v1, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 28, v1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 28, v1
+; GFX9-NEXT:    v_bfe_u32 v9, v1, 24, 4
+; GFX9-NEXT:    v_bfe_u32 v10, v1, 20, 4
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_bfe_u32 v12, v2, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v13, v2, 24, 4
-; GFX9-NEXT:    v_lshrrev_b32_e32 v14, 28, v2
-; GFX9-NEXT:    v_bfe_u32 v5, v1, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v8, v1, 8, 4
-; GFX9-NEXT:    v_bfe_u32 v9, v1, 12, 4
-; GFX9-NEXT:    v_and_b32_e32 v10, 15, v1
-; GFX9-NEXT:    v_bfe_u32 v1, v1, 4, 4
-; GFX9-NEXT:    v_bfe_u32 v11, v2, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v15, v2, 8, 4
-; GFX9-NEXT:    v_bfe_u32 v16, v2, 12, 4
-; GFX9-NEXT:    v_and_b32_e32 v17, 15, v2
-; GFX9-NEXT:    v_bfe_u32 v2, v2, 4, 4
-; GFX9-NEXT:    v_mul_lo_u16_sdwa v0, v0, v12 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT:    v_mul_lo_u16_e32 v12, v6, v13
-; GFX9-NEXT:    v_mul_lo_u16_sdwa v7, v7, v14 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT:    v_mul_lo_u16_e32 v18, v5, v11
-; GFX9-NEXT:    v_mul_lo_u16_e32 v8, v8, v15
-; GFX9-NEXT:    v_mul_lo_u16_sdwa v9, v9, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT:    v_mul_lo_u16_e32 v10, v10, v17
-; GFX9-NEXT:    v_mul_lo_u16_sdwa v2, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT:    v_or_b32_e32 v7, v12, v7
-; GFX9-NEXT:    v_or_b32_e32 v1, v18, v0
-; GFX9-NEXT:    v_or_b32_e32 v8, v8, v9
-; GFX9-NEXT:    v_or_b32_e32 v9, v10, v2
-; GFX9-NEXT:    v_lshlrev_b32_e32 v10, 16, v7
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 16, v8
-; GFX9-NEXT:    v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX9-NEXT:    v_or_b32_e32 v2, v2, v0
+; GFX9-NEXT:    v_lshrrev_b32_e32 v15, 28, v2
+; GFX9-NEXT:    v_bfe_u32 v16, v2, 24, 4
+; GFX9-NEXT:    v_bfe_u32 v17, v2, 20, 4
+; GFX9-NEXT:    v_bfe_u32 v0, v1, 4, 4
+; GFX9-NEXT:    v_and_b32_e32 v5, 15, v1
+; GFX9-NEXT:    v_bfe_u32 v6, v1, 12, 4
+; GFX9-NEXT:    v_bfe_u32 v7, v1, 8, 4
+; GFX9-NEXT:    v_bfe_u32 v11, v1, 16, 4
+; GFX9-NEXT:    v_bfe_u32 v1, v2, 4, 4
+; GFX9-NEXT:    v_and_b32_e32 v12, 15, v2
+; GFX9-NEXT:    v_bfe_u32 v13, v2, 12, 4
+; GFX9-NEXT:    v_bfe_u32 v14, v2, 8, 4
+; GFX9-NEXT:    v_bfe_u32 v2, v2, 16, 4
+; GFX9-NEXT:    v_mul_lo_u16_sdwa v10, v10, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT:    v_mul_lo_u16_e32 v17, v9, v16
+; GFX9-NEXT:    v_mul_lo_u16_sdwa v8, v8, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT:    v_mul_lo_u16_e32 v18, v11, v2
+; GFX9-NEXT:    v_mul_lo_u16_e32 v7, v7, v14
+; GFX9-NEXT:    v_mul_lo_u16_sdwa v6, v6, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v8, v17, v8
+; GFX9-NEXT:    v_mul_lo_u16_e32 v5, v5, v12
+; GFX9-NEXT:    v_mul_lo_u16_sdwa v12, v0, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v1, v18, v10
+; GFX9-NEXT:    v_or_b32_e32 v6, v7, v6
+; GFX9-NEXT:    v_lshlrev_b32_e32 v7, 16, v8
+; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 16, v6
+; GFX9-NEXT:    v_or_b32_sdwa v1, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX9-NEXT:    v_or_b32_e32 v5, v5, v12
+; GFX9-NEXT:    v_or_b32_e32 v7, v12, v0
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 8, v1
 ; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 24, v[0:1]
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 8, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_u16_e32 v1, v9, v4
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v2
-; GFX9-NEXT:    v_add_u16_e32 v1, v1, v8
+; GFX9-NEXT:    v_add_u16_e32 v1, v5, v4
+; GFX9-NEXT:    v_add_u16_e32 v1, v1, v7
+; GFX9-NEXT:    v_add_u16_e32 v1, v1, v6
 ; GFX9-NEXT:    v_add_u16_e32 v0, v1, v0
-; GFX9-NEXT:    v_mad_legacy_u16 v0, v5, v11, v0
+; GFX9-NEXT:    v_mad_legacy_u16 v0, v11, v2, v0
 ; GFX9-NEXT:    v_add_u16_e32 v0, v0, v10
-; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
-; GFX9-NEXT:    v_mad_legacy_u16 v0, v6, v13, v0
-; GFX9-NEXT:    v_add_u16_e32 v0, v0, v7
+; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 8, v8
+; GFX9-NEXT:    v_mad_legacy_u16 v0, v9, v16, v0
+; GFX9-NEXT:    v_add_u16_e32 v0, v0, v8
 ; GFX9-NEXT:    global_store_byte v3, v0, s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -2707,52 +2707,52 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    global_load_ubyte v4, v3, s[2:3]
 ; GFX9-DL-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-DL-NEXT:    v_bfe_u32 v0, v1, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v7, 28, v1
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v8, 28, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 24, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v10, v1, 20, 4
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-DL-NEXT:    v_bfe_u32 v12, v2, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v13, v2, 24, 4
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v14, 28, v2
-; GFX9-DL-NEXT:    v_bfe_u32 v5, v1, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v8, v1, 8, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 12, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v10, 15, v1
-; GFX9-DL-NEXT:    v_bfe_u32 v1, v1, 4, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v11, v2, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v15, v2, 8, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 12, 4
-; GFX9-DL-NEXT:    v_and_b32_e32 v17, 15, v2
-; GFX9-DL-NEXT:    v_bfe_u32 v2, v2, 4, 4
-; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v0, v0, v12 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v12, v6, v13
-; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v7, v7, v14 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v18, v5, v11
-; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v8, v8, v15
-; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v9, v9, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v10, v10, v17
-; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v2, v1, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-DL-NEXT:    v_or_b32_e32 v7, v12, v7
-; GFX9-DL-NEXT:    v_or_b32_e32 v1, v18, v0
-; GFX9-DL-NEXT:    v_or_b32_e32 v8, v8, v9
-; GFX9-DL-NEXT:    v_or_b32_e32 v9, v10, v2
-; GFX9-DL-NEXT:    v_lshlrev_b32_e32 v10, 16, v7
-; GFX9-DL-NEXT:    v_lshlrev_b32_e32 v0, 16, v8
-; GFX9-DL-NEXT:    v_or_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX9-DL-NEXT:    v_or_b32_e32 v2, v2, v0
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v15, 28, v2
+; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 24, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v17, v2, 20, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v0, v1, 4, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v5, 15, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v6, v1, 12, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v7, v1, 8, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v11, v1, 16, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v1, v2, 4, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v12, 15, v2
+; GFX9-DL-NEXT:    v_bfe_u32 v13, v2, 12, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v14, v2, 8, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v2, v2, 16, 4
+; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v10, v10, v17 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v17, v9, v16
+; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v8, v8, v15 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v18, v11, v2
+; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v7, v7, v14
+; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v6, v6, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-DL-NEXT:    v_or_b32_e32 v8, v17, v8
+; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v5, v5, v12
+; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v12, v0, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-DL-NEXT:    v_or_b32_e32 v1, v18, v10
+; GFX9-DL-NEXT:    v_or_b32_e32 v6, v7, v6
+; GFX9-DL-NEXT:    v_lshlrev_b32_e32 v7, 16, v8
+; GFX9-DL-NEXT:    v_lshlrev_b32_e32 v0, 16, v6
+; GFX9-DL-NEXT:    v_or_b32_sdwa v1, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX9-DL-NEXT:    v_or_b32_e32 v5, v5, v12
+; GFX9-DL-NEXT:    v_or_b32_e32 v7, v12, v0
 ; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v10, 8, v1
 ; GFX9-DL-NEXT:    v_lshrrev_b64 v[0:1], 24, v[0:1]
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v2, 8, v2
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v9, v4
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v2
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v8
+; GFX9-DL-NEXT:    v_add_u16_e32 v1, v5, v4
+; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v7
+; GFX9-DL-NEXT:    v_add_u16_e32 v1, v1, v6
 ; GFX9-DL-NEXT:    v_add_u16_e32 v0, v1, v0
-; GFX9-DL-NEXT:    v_mad_legacy_u16 v0, v5, v11, v0
+; GFX9-DL-NEXT:    v_mad_legacy_u16 v0, v11, v2, v0
 ; GFX9-DL-NEXT:    v_add_u16_e32 v0, v0, v10
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v7, 8, v7
-; GFX9-DL-NEXT:    v_mad_legacy_u16 v0, v6, v13, v0
-; GFX9-DL-NEXT:    v_add_u16_e32 v0, v0, v7
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v8, 8, v8
+; GFX9-DL-NEXT:    v_mad_legacy_u16 v0, v9, v16, v0
+; GFX9-DL-NEXT:    v_add_u16_e32 v0, v0, v8
 ; GFX9-DL-NEXT:    global_store_byte v3, v0, s[2:3]
 ; GFX9-DL-NEXT:    s_endpgm
 ;
@@ -2774,55 +2774,55 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX10-DL-NEXT:    global_load_dword v2, v0, s[6:7]
 ; GFX10-DL-NEXT:    global_load_ubyte v3, v4, s[0:1]
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(2)
-; GFX10-DL-NEXT:    v_bfe_u32 v9, v1, 12, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v6, v1, 12, 4
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(1)
 ; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 12, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v8, v1, 8, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v7, v1, 8, 4
 ; GFX10-DL-NEXT:    v_bfe_u32 v13, v2, 8, 4
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v7, 28, v1
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v8, 28, v1
 ; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v14, 28, v2
-; GFX10-DL-NEXT:    v_mul_lo_u16 v9, v9, v10
-; GFX10-DL-NEXT:    v_bfe_u32 v5, v1, 16, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v0, v1, 20, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v6, v1, 24, 4
-; GFX10-DL-NEXT:    v_and_b32_e32 v11, 15, v1
-; GFX10-DL-NEXT:    v_bfe_u32 v1, v1, 4, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v15, v2, 4, 4
-; GFX10-DL-NEXT:    v_mul_lo_u16 v8, v8, v13
-; GFX10-DL-NEXT:    v_lshlrev_b16 v9, 8, v9
-; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 20, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v13, v2, 24, 4
-; GFX10-DL-NEXT:    v_mul_lo_u16 v7, v7, v14
-; GFX10-DL-NEXT:    v_bfe_u32 v12, v2, 16, 4
-; GFX10-DL-NEXT:    v_and_b32_e32 v2, 15, v2
-; GFX10-DL-NEXT:    v_mul_lo_u16 v1, v1, v15
-; GFX10-DL-NEXT:    v_or_b32_e32 v8, v8, v9
-; GFX10-DL-NEXT:    v_mul_lo_u16 v9, v0, v10
-; GFX10-DL-NEXT:    v_mul_lo_u16 v10, v6, v13
-; GFX10-DL-NEXT:    v_lshlrev_b16 v7, 8, v7
+; GFX10-DL-NEXT:    v_mul_lo_u16 v6, v6, v10
+; GFX10-DL-NEXT:    v_bfe_u32 v0, v1, 4, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v5, 15, v1
+; GFX10-DL-NEXT:    v_bfe_u32 v9, v1, 24, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v11, v1, 20, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v12, v1, 16, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v1, v2, 4, 4
+; GFX10-DL-NEXT:    v_mul_lo_u16 v7, v7, v13
+; GFX10-DL-NEXT:    v_lshlrev_b16 v6, 8, v6
+; GFX10-DL-NEXT:    v_and_b32_e32 v10, 15, v2
+; GFX10-DL-NEXT:    v_bfe_u32 v15, v2, 24, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v13, v2, 20, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v16, v2, 16, 4
+; GFX10-DL-NEXT:    v_mul_lo_u16 v2, v8, v14
+; GFX10-DL-NEXT:    v_mul_lo_u16 v0, v0, v1
+; GFX10-DL-NEXT:    v_or_b32_e32 v6, v7, v6
+; GFX10-DL-NEXT:    v_mul_lo_u16 v1, v11, v13
+; GFX10-DL-NEXT:    v_mul_lo_u16 v7, v9, v15
+; GFX10-DL-NEXT:    v_lshlrev_b16 v2, 8, v2
+; GFX10-DL-NEXT:    v_lshlrev_b16 v8, 8, v0
+; GFX10-DL-NEXT:    v_lshlrev_b32_e32 v0, 16, v6
+; GFX10-DL-NEXT:    v_mul_lo_u16 v5, v5, v10
+; GFX10-DL-NEXT:    v_mul_lo_u16 v10, v12, v16
 ; GFX10-DL-NEXT:    v_lshlrev_b16 v1, 8, v1
-; GFX10-DL-NEXT:    v_lshlrev_b32_e32 v0, 16, v8
-; GFX10-DL-NEXT:    v_mul_lo_u16 v2, v11, v2
-; GFX10-DL-NEXT:    v_mul_lo_u16 v11, v5, v12
-; GFX10-DL-NEXT:    v_lshlrev_b16 v9, 8, v9
-; GFX10-DL-NEXT:    v_or_b32_e32 v7, v10, v7
-; GFX10-DL-NEXT:    v_or_b32_sdwa v10, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX10-DL-NEXT:    v_or_b32_e32 v1, v2, v1
-; GFX10-DL-NEXT:    v_or_b32_e32 v2, v11, v9
-; GFX10-DL-NEXT:    v_lshlrev_b32_e32 v9, 16, v7
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v10, 8, v10
+; GFX10-DL-NEXT:    v_or_b32_e32 v7, v7, v2
+; GFX10-DL-NEXT:    v_or_b32_sdwa v2, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX10-DL-NEXT:    v_or_b32_e32 v5, v5, v8
+; GFX10-DL-NEXT:    v_or_b32_e32 v1, v10, v1
+; GFX10-DL-NEXT:    v_lshlrev_b32_e32 v8, 16, v7
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v2, 8, v2
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v1, v3
-; GFX10-DL-NEXT:    v_or_b32_sdwa v1, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX10-DL-NEXT:    v_add_nc_u16 v9, v3, v10
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v5, v3
+; GFX10-DL-NEXT:    v_or_b32_sdwa v1, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; GFX10-DL-NEXT:    v_add_nc_u16 v5, v3, v2
 ; GFX10-DL-NEXT:    v_lshrrev_b64 v[2:3], 24, v[0:1]
 ; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
-; GFX10-DL-NEXT:    v_add_nc_u16 v0, v9, v8
+; GFX10-DL-NEXT:    v_add_nc_u16 v0, v5, v6
 ; GFX10-DL-NEXT:    v_add_nc_u16 v0, v0, v2
-; GFX10-DL-NEXT:    v_mad_u16 v0, v5, v12, v0
+; GFX10-DL-NEXT:    v_mad_u16 v0, v12, v16, v0
 ; GFX10-DL-NEXT:    v_add_nc_u16 v0, v0, v1
 ; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v1, 8, v7
-; GFX10-DL-NEXT:    v_mad_u16 v0, v6, v13, v0
+; GFX10-DL-NEXT:    v_mad_u16 v0, v9, v15, v0
 ; GFX10-DL-NEXT:    v_add_nc_u16 v0, v0, v1
 ; GFX10-DL-NEXT:    global_store_byte v4, v0, s[0:1]
 ; GFX10-DL-NEXT:    s_endpgm
@@ -2941,40 +2941,32 @@ define amdgpu_kernel void @udot8_acc4_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_add_u32 s8, s8, s3
 ; GFX8-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX8-NEXT:    s_waitcnt vmcnt(2)
-; GFX8-NEXT:    v_and_b32_e32 v5, 15, v3
-; GFX8-NEXT:    v_bfe_u32 v6, v3, 4, 4
-; GFX8-NEXT:    v_bfe_u32 v7, v3, 8, 4
-; GFX8-NEXT:    v_bfe_u32 v8, v3, 12, 4
-; GFX8-NEXT:    v_bfe_u32 v9, v3, 16, 4
-; GFX8-NEXT:    v_bfe_u32 v10, v3, 20, 4
+; GFX8-NEXT:    v_lshrrev_b32_e32 v5, 28, v3
+; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 24, v3
+; GFX8-NEXT:    v_bfe_u32 v7, v3, 20, 4
+; GFX8-NEXT:    v_bfe_u32 v8, v3, 16, 4
+; GFX8-NEXT:    v_bfe_u32 v9, v3, 12, 4
+; GFX8-NEXT:    v_bfe_u32 v10, v3, 8, 4
+; GFX8-NEXT:    v_bfe_u32 v11, v3, 4, 4
+; GFX8-NEXT:    v_and_b32_e32 v3, 15, v3
 ; GFX8-NEXT:    s_waitcnt vmcnt(1)
-; GFX8-NEXT:    v_and_b32_e32 v12, 15, v2
-; GFX8-NEXT:    v_bfe_u32 v13, v2, 4, 4
-; GFX8-NEXT:    v_mul_u32_u24_e32 v5, v5, v12
-; GFX8-NEXT:    v_bfe_u32 v14, v2, 8, 4
-; GFX8-NEXT:    v_mul_u32_u24_e32 v6, v6, v13
+; GFX8-NEXT:    v_lshrrev_b32_e32 v12, 28, v2
+; GFX8-NEXT:    v_lshrrev_b32_e32 v13, 24, v2
+; GFX8-NEXT:    v_bfe_u32 v14, v2, 20, 4
+; GFX8-NEXT:    v_bfe_u32 v15, v2, 16, 4
+; GFX8-NEXT:    v_bfe_u32 v16, v2, 12, 4
+; GFX8-NEXT:    v_bfe_u32 v17, v2, 8, 4
+; GFX8-NEXT:    v_bfe_u32 v18, v2, 4, 4
+; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_add_u16_e32 v4, v5, v4
-; GFX8-NEXT:    v_bfe_u32 v15, v2, 12, 4
-; GFX8-NEXT:    v_mul_u32_u24_e32 v7, v7, v14
-; GFX8-NEXT:    v_add_u16_e32 v4, v4, v6
-; GFX8-NEXT:    v_bfe_u32 v16, v2, 16, 4
-; GFX8-NEXT:    v_mul_u32_u24_e32 v8, v8, v15
-; GFX8-NEXT:    v_add_u16_e32 v4, v4, v7
-; GFX8-NEXT:    v_bfe_u32 v17, v2, 20, 4
-; GFX8-NEXT:    v_mul_u32_u24_e32 v9, v9, v16
-; GFX8-NEXT:    v_add_u16_e32 v4, v4, v8
-; GFX8-NEXT:    v_bfe_u32 v11, v3, 24, 4
-; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 28, v3
-; GFX8-NEXT:    v_bfe_u32 v18, v2, 24, 4
-; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 28, v2
-; GFX8-NEXT:    v_mul_u32_u24_e32 v10, v10, v17
-; GFX8-NEXT:    v_add_u16_e32 v4, v4, v9
-; GFX8-NEXT:    v_mul_u32_u24_e32 v2, v3, v2
-; GFX8-NEXT:    v_mul_u32_u24_e32 v3, v11, v18
-; GFX8-NEXT:    v_add_u16_e32 v4, v4, v10
-; GFX8-NEXT:    v_add_u16_e32 v3, v4, v3
-; GFX8-NEXT:    v_add_u16_e32 v2, v3, v2
+; GFX8-NEXT:    v_mad_u16 v2, v3, v2, v4
+; GFX8-NEXT:    v_mad_u16 v2, v11, v18, v2
+; GFX8-NEXT:    v_mad_u16 v2, v10, v17, v2
+; GFX8-NEXT:    v_mad_u16 v2, v9, v16, v2
+; GFX8-NEXT:    v_mad_u16 v2, v8, v15, v2
+; GFX8-NEXT:    v_mad_u16 v2, v7, v14, v2
+; GFX8-NEXT:    v_mad_u16 v2, v6, v13, v2
+; GFX8-NEXT:    v_mad_u16 v2, v5, v12, v2
 ; GFX8-NEXT:    v_and_b32_e32 v2, 15, v2
 ; GFX8-NEXT:    flat_store_byte v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
@@ -2989,47 +2981,60 @@ define amdgpu_kernel void @udot8_acc4_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v1, v0, s[4:5]
 ; GFX9-NEXT:    global_load_dword v2, v0, s[6:7]
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    global_load_ubyte v3, v0, s[2:3]
+; GFX9-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-NEXT:    v_and_b32_e32 v4, 15, v1
-; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_and_b32_e32 v11, 15, v2
 ; GFX9-NEXT:    v_bfe_u32 v5, v1, 4, 4
+; GFX9-NEXT:    v_and_b32_e32 v6, 15, v1
+; GFX9-NEXT:    v_bfe_u32 v7, v1, 12, 4
+; GFX9-NEXT:    v_bfe_u32 v8, v1, 8, 4
+; GFX9-NEXT:    v_bfe_u32 v9, v1, 20, 4
+; GFX9-NEXT:    v_bfe_u32 v10, v1, 16, 4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
+; GFX9-NEXT:    v_bfe_u32 v1, v1, 24, 4
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
 ; GFX9-NEXT:    v_bfe_u32 v12, v2, 4, 4
-; GFX9-NEXT:    v_mul_u32_u24_e32 v4, v4, v11
-; GFX9-NEXT:    v_bfe_u32 v6, v1, 8, 4
-; GFX9-NEXT:    v_bfe_u32 v13, v2, 8, 4
-; GFX9-NEXT:    v_mul_u32_u24_e32 v5, v5, v12
+; GFX9-NEXT:    v_and_b32_e32 v13, 15, v2
+; GFX9-NEXT:    v_bfe_u32 v14, v2, 12, 4
+; GFX9-NEXT:    v_bfe_u32 v15, v2, 8, 4
+; GFX9-NEXT:    v_bfe_u32 v16, v2, 20, 4
+; GFX9-NEXT:    v_bfe_u32 v17, v2, 16, 4
+; GFX9-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX9-NEXT:    v_bfe_u32 v2, v2, 24, 4
+; GFX9-NEXT:    v_and_b32_e32 v2, v4, v2
+; GFX9-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX9-NEXT:    v_and_b32_e32 v17, v4, v17
+; GFX9-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX9-NEXT:    v_and_b32_e32 v15, v4, v15
+; GFX9-NEXT:    v_and_b32_e32 v8, v4, v8
+; GFX9-NEXT:    v_and_b32_e32 v13, v4, v13
+; GFX9-NEXT:    v_and_b32_e32 v4, v4, v6
+; GFX9-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
+; GFX9-NEXT:    v_lshl_or_b32 v8, v12, 16, v13
+; GFX9-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX9-NEXT:    v_pk_mul_lo_u16 v4, v4, v8
+; GFX9-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
+; GFX9-NEXT:    v_lshl_or_b32 v10, v14, 16, v15
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_add_u16_e32 v3, v4, v3
-; GFX9-NEXT:    v_bfe_u32 v7, v1, 12, 4
-; GFX9-NEXT:    v_bfe_u32 v14, v2, 12, 4
-; GFX9-NEXT:    v_mul_u32_u24_e32 v6, v6, v13
+; GFX9-NEXT:    v_pk_mul_lo_u16 v5, v7, v10
+; GFX9-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
+; GFX9-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
+; GFX9-NEXT:    v_lshl_or_b32 v6, v16, 16, v17
 ; GFX9-NEXT:    v_add_u16_e32 v3, v3, v5
-; GFX9-NEXT:    v_bfe_u32 v8, v1, 16, 4
-; GFX9-NEXT:    v_bfe_u32 v15, v2, 16, 4
-; GFX9-NEXT:    v_mul_u32_u24_e32 v7, v7, v14
-; GFX9-NEXT:    v_add_u16_e32 v3, v3, v6
-; GFX9-NEXT:    v_bfe_u32 v9, v1, 20, 4
-; GFX9-NEXT:    v_bfe_u32 v16, v2, 20, 4
-; GFX9-NEXT:    v_mul_u32_u24_e32 v8, v8, v15
-; GFX9-NEXT:    v_add_u16_e32 v3, v3, v7
-; GFX9-NEXT:    v_bfe_u32 v10, v1, 24, 4
-; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 28, v1
-; GFX9-NEXT:    v_bfe_u32 v17, v2, 24, 4
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 28, v2
-; GFX9-NEXT:    v_mul_u32_u24_e32 v9, v9, v16
-; GFX9-NEXT:    v_add_u16_e32 v3, v3, v8
-; GFX9-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX9-NEXT:    v_mul_u32_u24_e32 v2, v10, v17
-; GFX9-NEXT:    v_add_u16_e32 v3, v3, v9
-; GFX9-NEXT:    v_add_u16_e32 v2, v3, v2
-; GFX9-NEXT:    v_add_u16_e32 v1, v2, v1
+; GFX9-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX9-NEXT:    v_pk_mul_lo_u16 v2, v9, v6
+; GFX9-NEXT:    v_add_u16_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_add_u16_e32 v3, v3, v2
+; GFX9-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_add_u16_e32 v2, v2, v1
+; GFX9-NEXT:    v_add_u16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    v_and_b32_e32 v1, 15, v1
 ; GFX9-NEXT:    global_store_byte v0, v1, s[2:3]
 ; GFX9-NEXT:    s_endpgm
@@ -3044,106 +3049,135 @@ define amdgpu_kernel void @udot8_acc4_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-DL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
 ; GFX9-DL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; GFX9-DL-NEXT:    s_addc_u32 s9, s9, 0
+; GFX9-DL-NEXT:    v_mov_b32_e32 v4, 0xffff
 ; GFX9-DL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DL-NEXT:    global_load_dword v1, v0, s[4:5]
 ; GFX9-DL-NEXT:    global_load_dword v2, v0, s[6:7]
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-DL-NEXT:    global_load_ubyte v3, v0, s[2:3]
+; GFX9-DL-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-DL-NEXT:    v_and_b32_e32 v4, 15, v1
-; GFX9-DL-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-DL-NEXT:    v_and_b32_e32 v11, 15, v2
 ; GFX9-DL-NEXT:    v_bfe_u32 v5, v1, 4, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v6, 15, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v7, v1, 12, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v8, v1, 8, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 20, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v10, v1, 16, 4
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v11, 28, v1
+; GFX9-DL-NEXT:    v_bfe_u32 v1, v1, 24, 4
+; GFX9-DL-NEXT:    s_waitcnt vmcnt(1)
 ; GFX9-DL-NEXT:    v_bfe_u32 v12, v2, 4, 4
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v4, v4, v11
-; GFX9-DL-NEXT:    v_bfe_u32 v6, v1, 8, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v13, v2, 8, 4
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v5, v5, v12
+; GFX9-DL-NEXT:    v_and_b32_e32 v13, 15, v2
+; GFX9-DL-NEXT:    v_bfe_u32 v14, v2, 12, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v15, v2, 8, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 20, 4
+; GFX9-DL-NEXT:    v_bfe_u32 v17, v2, 16, 4
+; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v18, 28, v2
+; GFX9-DL-NEXT:    v_bfe_u32 v2, v2, 24, 4
+; GFX9-DL-NEXT:    v_and_b32_e32 v2, v4, v2
+; GFX9-DL-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX9-DL-NEXT:    v_and_b32_e32 v17, v4, v17
+; GFX9-DL-NEXT:    v_and_b32_e32 v10, v4, v10
+; GFX9-DL-NEXT:    v_and_b32_e32 v15, v4, v15
+; GFX9-DL-NEXT:    v_and_b32_e32 v8, v4, v8
+; GFX9-DL-NEXT:    v_and_b32_e32 v13, v4, v13
+; GFX9-DL-NEXT:    v_and_b32_e32 v4, v4, v6
+; GFX9-DL-NEXT:    v_lshl_or_b32 v7, v7, 16, v8
+; GFX9-DL-NEXT:    v_lshl_or_b32 v8, v12, 16, v13
+; GFX9-DL-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v4, v4, v8
+; GFX9-DL-NEXT:    v_lshl_or_b32 v9, v9, 16, v10
+; GFX9-DL-NEXT:    v_lshl_or_b32 v10, v14, 16, v15
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-DL-NEXT:    v_add_u16_e32 v3, v4, v3
-; GFX9-DL-NEXT:    v_bfe_u32 v7, v1, 12, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v14, v2, 12, 4
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v6, v6, v13
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v5, v7, v10
+; GFX9-DL-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_lshl_or_b32 v2, v18, 16, v2
+; GFX9-DL-NEXT:    v_lshl_or_b32 v1, v11, 16, v1
+; GFX9-DL-NEXT:    v_lshl_or_b32 v6, v16, 16, v17
 ; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v5
-; GFX9-DL-NEXT:    v_bfe_u32 v8, v1, 16, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v15, v2, 16, 4
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v7, v7, v14
-; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v6
-; GFX9-DL-NEXT:    v_bfe_u32 v9, v1, 20, 4
-; GFX9-DL-NEXT:    v_bfe_u32 v16, v2, 20, 4
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v8, v8, v15
-; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v7
-; GFX9-DL-NEXT:    v_bfe_u32 v10, v1, 24, 4
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v1, 28, v1
-; GFX9-DL-NEXT:    v_bfe_u32 v17, v2, 24, 4
-; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v2, 28, v2
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v9, v9, v16
-; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v8
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX9-DL-NEXT:    v_mul_u32_u24_e32 v2, v10, v17
-; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v9
-; GFX9-DL-NEXT:    v_add_u16_e32 v2, v3, v2
-; GFX9-DL-NEXT:    v_add_u16_e32 v1, v2, v1
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX9-DL-NEXT:    v_pk_mul_lo_u16 v2, v9, v6
+; GFX9-DL-NEXT:    v_add_u16_sdwa v3, v3, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_add_u16_e32 v3, v3, v2
+; GFX9-DL-NEXT:    v_add_u16_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-DL-NEXT:    v_add_u16_e32 v2, v2, v1
+; GFX9-DL-NEXT:    v_add_u16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-DL-NEXT:    v_and_b32_e32 v1, 15, v1
 ; GFX9-DL-NEXT:    global_store_byte v0, v1, s[2:3]
 ; GFX9-DL-NEXT:    s_endpgm
 ;
 ; GFX10-DL-LABEL: udot8_acc4_vecMul:
 ; GFX10-DL:       ; %bb.0: ; %entry
+; GFX10-DL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX10-DL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-DL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX10-DL-NEXT:    v_mov_b32_e32 v4, 0xffff
 ; GFX10-DL-NEXT:    s_mov_b32 s8, SCRATCH_RSRC_DWORD0
 ; GFX10-DL-NEXT:    s_mov_b32 s9, SCRATCH_RSRC_DWORD1
 ; GFX10-DL-NEXT:    s_mov_b32 s10, -1
 ; GFX10-DL-NEXT:    s_mov_b32 s11, 0x31c16000
 ; GFX10-DL-NEXT:    s_add_u32 s8, s8, s3
-; GFX10-DL-NEXT:    s_clause 0x1
-; GFX10-DL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX10-DL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX10-DL-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; GFX10-DL-NEXT:    s_addc_u32 s9, s9, 0
 ; GFX10-DL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-DL-NEXT:    s_clause 0x1
 ; GFX10-DL-NEXT:    global_load_dword v1, v0, s[4:5]
 ; GFX10-DL-NEXT:    global_load_dword v2, v0, s[6:7]
 ; GFX10-DL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10-DL-NEXT:    global_load_ubyte v3, v0, s[2:3]
+; GFX10-DL-NEXT:    global_load_ubyte v3, v0, s[0:1]
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(2)
-; GFX10-DL-NEXT:    v_and_b32_e32 v4, 15, v1
+; GFX10-DL-NEXT:    v_and_b32_e32 v7, 15, v1
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(1)
-; GFX10-DL-NEXT:    v_and_b32_e32 v5, 15, v2
-; GFX10-DL-NEXT:    v_bfe_u32 v6, v1, 4, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v7, v2, 4, 4
-; GFX10-DL-NEXT:    v_bfe_u32 v8, v2, 8, 4
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v4, v4, v5
-; GFX10-DL-NEXT:    v_bfe_u32 v5, v1, 8, 4
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v6, v6, v7
-; GFX10-DL-NEXT:    v_bfe_u32 v7, v2, 12, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v6, 15, v2
+; GFX10-DL-NEXT:    v_bfe_u32 v5, v1, 4, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 4, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v9, v1, 8, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v7, v4, v7
+; GFX10-DL-NEXT:    v_and_b32_e32 v6, v4, v6
+; GFX10-DL-NEXT:    v_bfe_u32 v13, v2, 8, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v8, v1, 12, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v9, v4, v9
+; GFX10-DL-NEXT:    v_lshl_or_b32 v5, v5, 16, v7
+; GFX10-DL-NEXT:    v_lshl_or_b32 v6, v10, 16, v6
+; GFX10-DL-NEXT:    v_bfe_u32 v10, v2, 12, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v13, v4, v13
+; GFX10-DL-NEXT:    v_bfe_u32 v12, v1, 16, 4
+; GFX10-DL-NEXT:    v_lshl_or_b32 v8, v8, 16, v9
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v5, v5, v6
+; GFX10-DL-NEXT:    v_bfe_u32 v6, v2, 16, 4
+; GFX10-DL-NEXT:    v_lshl_or_b32 v10, v10, 16, v13
+; GFX10-DL-NEXT:    v_bfe_u32 v11, v1, 20, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v12, v4, v12
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
 ; GFX10-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v4, v3
-; GFX10-DL-NEXT:    v_bfe_u32 v4, v1, 12, 4
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v5, v5, v8
-; GFX10-DL-NEXT:    v_bfe_u32 v8, v2, 16, 4
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v6
-; GFX10-DL-NEXT:    v_bfe_u32 v6, v1, 16, 4
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v4, v4, v7
-; GFX10-DL-NEXT:    v_bfe_u32 v7, v2, 20, 4
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v5
-; GFX10-DL-NEXT:    v_bfe_u32 v5, v1, 20, 4
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v6, v6, v8
-; GFX10-DL-NEXT:    v_bfe_u32 v8, v2, 24, 4
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v5, v3
+; GFX10-DL-NEXT:    v_bfe_u32 v5, v2, 20, 4
+; GFX10-DL-NEXT:    v_and_b32_e32 v6, v4, v6
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v8, v8, v10
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v7, 28, v1
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v9
+; GFX10-DL-NEXT:    v_bfe_u32 v1, v1, 24, 4
+; GFX10-DL-NEXT:    v_bfe_u32 v9, v2, 24, 4
+; GFX10-DL-NEXT:    v_lshl_or_b32 v5, v5, 16, v6
+; GFX10-DL-NEXT:    v_lshl_or_b32 v6, v11, 16, v12
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v10, 16, v8
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v8
 ; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v2, 28, v2
+; GFX10-DL-NEXT:    v_and_b32_e32 v8, v4, v9
+; GFX10-DL-NEXT:    v_and_b32_e32 v1, v4, v1
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v4, v6, v5
+; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v10
+; GFX10-DL-NEXT:    v_lshl_or_b32 v2, v2, 16, v8
+; GFX10-DL-NEXT:    v_lshl_or_b32 v1, v7, 16, v1
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
 ; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v4
-; GFX10-DL-NEXT:    v_bfe_u32 v4, v1, 24, 4
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v5, v5, v7
-; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v1, 28, v1
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v6
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v4, v4, v8
-; GFX10-DL-NEXT:    v_mul_u32_u24_e32 v1, v1, v2
-; GFX10-DL-NEXT:    v_add_nc_u16 v3, v3, v5
-; GFX10-DL-NEXT:    v_add_nc_u16 v2, v3, v4
+; GFX10-DL-NEXT:    v_pk_mul_lo_u16 v1, v1, v2
+; GFX10-DL-NEXT:    v_add_nc_u16 v2, v3, v5
+; GFX10-DL-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
 ; GFX10-DL-NEXT:    v_add_nc_u16 v1, v2, v1
+; GFX10-DL-NEXT:    v_add_nc_u16 v1, v1, v3
 ; GFX10-DL-NEXT:    v_and_b32_e32 v1, 15, v1
-; GFX10-DL-NEXT:    global_store_byte v0, v1, s[2:3]
+; GFX10-DL-NEXT:    global_store_byte v0, v1, s[0:1]
 ; GFX10-DL-NEXT:    s_endpgm
                                              <8 x i4> addrspace(1)* %src2,
                                              i4 addrspace(1)* nocapture %dst) {
diff --git a/llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll b/llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll
index cda3f4871381a..8db5c5754e208 100644
--- a/llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll
+++ b/llvm/test/CodeGen/AMDGPU/inlineasm-illegal-type.ll
@@ -20,9 +20,9 @@ define amdgpu_kernel void @v_input_output_i8() {
 
 ; GCN: error: couldn't allocate output register for constraint 's'
 ; GCN: error: couldn't allocate input reg for constraint 's'
-define amdgpu_kernel void @s_input_output_v8f16() {
-  %v = tail call <8 x half> asm sideeffect "s_mov_b32 $0, -1", "=s"()
-  tail call void asm sideeffect "; use $0", "s"(<8 x half> %v)
+define amdgpu_kernel void @s_input_output_v16f16() {
+  %v = tail call <16 x half> asm sideeffect "s_mov_b32 $0, -1", "=s"()
+  tail call void asm sideeffect "; use $0", "s"(<16 x half> %v)
   ret void
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index f18d77b190e2b..a4eb3a409c47f 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -1745,6 +1745,345 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(<4 x half> addrspa
   ret void
 }
 
+define amdgpu_kernel void @v_insertelement_v8f16_3(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in, i32 %val) {
+; GFX9-LABEL: v_insertelement_v8f16_3:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
+; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x10
+; GFX9-NEXT:    s_add_u32 s0, s0, s7
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v4, s[10:11]
+; GFX9-NEXT:    v_mov_b32_e32 v5, s6
+; GFX9-NEXT:    buffer_store_short v5, off, s[0:3], 0 offset:16
+; GFX9-NEXT:    buffer_load_dword v5, off, s[0:3], 0 offset:16
+; GFX9-NEXT:    s_waitcnt vmcnt(2)
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_lshl_or_b32 v1, v5, 16, v1
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[8:9]
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_insertelement_v8f16_3:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x10
+; VI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; VI-NEXT:    s_add_u32 s0, s0, s7
+; VI-NEXT:    s_addc_u32 s1, s1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s11
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s10, v4
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_mov_b32_e32 v5, s4
+; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; VI-NEXT:    buffer_store_short v5, off, s[0:3], 0 offset:16
+; VI-NEXT:    buffer_load_dword v6, off, s[0:3], 0 offset:16
+; VI-NEXT:    v_mov_b32_e32 v5, s9
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s8, v4
+; VI-NEXT:    s_mov_b32 s4, 0xffff
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; VI-NEXT:    s_waitcnt vmcnt(2)
+; VI-NEXT:    v_bfi_b32 v3, s4, v3, v3
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
+; VI-NEXT:    v_or_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; VI-NEXT:    s_endpgm
+;
+; CI-LABEL: v_insertelement_v8f16_3:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CI-NEXT:    s_load_dword s4, s[4:5], 0x4
+; CI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v1, s3
+; CI-NEXT:    v_add_i32_e32 v0, vcc, s2, v4
+; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; CI-NEXT:    v_mov_b32_e32 v5, s1
+; CI-NEXT:    v_add_i32_e32 v4, vcc, s0, v4
+; CI-NEXT:    s_lshl_b32 s0, s4, 16
+; CI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CI-NEXT:    s_waitcnt vmcnt(0)
+; CI-NEXT:    v_and_b32_e32 v1, 0xffff, v1
+; CI-NEXT:    v_or_b32_e32 v1, s0, v1
+; CI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; CI-NEXT:    s_endpgm
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x half>, <8 x half> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <8 x half>, <8 x half> addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x half>, <8 x half> addrspace(1)* %in.gep
+  %val.trunc = trunc i32 %val to i16
+  %val.cvt = bitcast i16 %val.trunc to half
+  %vecins = insertelement <8 x half> %vec, half %val.cvt, i32 3
+  store <8 x half> %vecins, <8 x half> addrspace(1)* %out.gep
+  ret void
+}
+
+define amdgpu_kernel void @v_insertelement_v8i16_6(<8 x i16> addrspace(1)* %out, <8 x i16> addrspace(1)* %in, i32 %val) {
+; GFX9-LABEL: v_insertelement_v8i16_6:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
+; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x10
+; GFX9-NEXT:    s_add_u32 s0, s0, s7
+; GFX9-NEXT:    s_addc_u32 s1, s1, 0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v4, s[10:11]
+; GFX9-NEXT:    v_mov_b32_e32 v5, s6
+; GFX9-NEXT:    buffer_store_short v5, off, s[0:3], 0 offset:16
+; GFX9-NEXT:    buffer_load_dword v5, off, s[0:3], 0 offset:16
+; GFX9-NEXT:    v_mov_b32_e32 v6, 0xffff
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_bfi_b32 v3, v6, v5, v3
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[8:9]
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_insertelement_v8i16_6:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x10
+; VI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; VI-NEXT:    s_add_u32 s0, s0, s7
+; VI-NEXT:    s_addc_u32 s1, s1, 0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s11
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s10, v4
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_mov_b32_e32 v5, s4
+; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; VI-NEXT:    buffer_store_short v5, off, s[0:3], 0 offset:16
+; VI-NEXT:    buffer_load_dword v6, off, s[0:3], 0 offset:16
+; VI-NEXT:    s_mov_b32 s4, 0xffff
+; VI-NEXT:    v_mov_b32_e32 v5, s9
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s8, v4
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; VI-NEXT:    s_waitcnt vmcnt(2)
+; VI-NEXT:    v_bfi_b32 v1, s4, v1, v1
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_bfi_b32 v3, s4, v6, v3
+; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; VI-NEXT:    s_endpgm
+;
+; CI-LABEL: v_insertelement_v8i16_6:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CI-NEXT:    s_load_dword s4, s[4:5], 0x4
+; CI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v1, s3
+; CI-NEXT:    v_add_i32_e32 v0, vcc, s2, v4
+; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; CI-NEXT:    v_mov_b32_e32 v5, s1
+; CI-NEXT:    v_add_i32_e32 v4, vcc, s0, v4
+; CI-NEXT:    s_mov_b32 s0, 0xffff
+; CI-NEXT:    v_mov_b32_e32 v6, s4
+; CI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CI-NEXT:    s_waitcnt vmcnt(0)
+; CI-NEXT:    v_bfi_b32 v3, s0, v6, v3
+; CI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; CI-NEXT:    s_endpgm
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x i16>, <8 x i16> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <8 x i16>, <8 x i16> addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x i16>, <8 x i16> addrspace(1)* %in.gep
+  %val.trunc = trunc i32 %val to i16
+  %val.cvt = bitcast i16 %val.trunc to i16
+  %vecins = insertelement <8 x i16> %vec, i16 %val.cvt, i32 6
+  store <8 x i16> %vecins, <8 x i16> addrspace(1)* %out.gep
+  ret void
+}
+
+define amdgpu_kernel void @v_insertelement_v8f16_dynamic(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in, i32 %val, i32 %n) {
+; GFX9-LABEL: v_insertelement_v8f16_dynamic:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v4, s[2:3]
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 7
+; GFX9-NEXT:    v_mov_b32_e32 v6, s6
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 6
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 5
+; GFX9-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 4
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 3
+; GFX9-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
+; GFX9-NEXT:    v_and_b32_e32 v3, v5, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 2
+; GFX9-NEXT:    v_lshl_or_b32 v3, v7, 16, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 1
+; GFX9-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
+; GFX9-NEXT:    v_and_b32_e32 v2, v5, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    s_cmp_eq_u32 s7, 0
+; GFX9-NEXT:    v_lshl_or_b32 v2, v8, 16, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v8, v10, v6, vcc
+; GFX9-NEXT:    s_cselect_b64 vcc, -1, 0
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GFX9-NEXT:    v_and_b32_e32 v1, v5, v1
+; GFX9-NEXT:    v_and_b32_e32 v0, v5, v0
+; GFX9-NEXT:    v_lshl_or_b32 v1, v7, 16, v1
+; GFX9-NEXT:    v_lshl_or_b32 v0, v8, 16, v0
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
+; GFX9-NEXT:    s_endpgm
+;
+; VI-LABEL: v_insertelement_v8f16_dynamic:
+; VI:       ; %bb.0:
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x10
+; VI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v5, s1
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v4
+; VI-NEXT:    s_cmp_eq_u32 s5, 6
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; VI-NEXT:    v_mov_b32_e32 v6, s4
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 7
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    v_cndmask_b32_e32 v7, v3, v6, vcc
+; VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 4
+; VI-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 5
+; VI-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; VI-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 2
+; VI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; VI-NEXT:    v_cndmask_b32_e32 v8, v8, v6, vcc
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 3
+; VI-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
+; VI-NEXT:    v_or_b32_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_lshlrev_b32_e32 v7, 16, v8
+; VI-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 0
+; VI-NEXT:    v_or_b32_sdwa v2, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_cndmask_b32_e32 v7, v9, v6, vcc
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    s_cmp_eq_u32 s5, 1
+; VI-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
+; VI-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; VI-NEXT:    s_cselect_b64 vcc, -1, 0
+; VI-NEXT:    v_cndmask_b32_e32 v6, v10, v6, vcc
+; VI-NEXT:    v_lshlrev_b32_e32 v7, 16, v7
+; VI-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
+; VI-NEXT:    v_or_b32_sdwa v1, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; VI-NEXT:    s_endpgm
+;
+; CI-LABEL: v_insertelement_v8f16_dynamic:
+; CI:       ; %bb.0:
+; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; CI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x4
+; CI-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    v_mov_b32_e32 v1, s3
+; CI-NEXT:    v_add_i32_e32 v0, vcc, s2, v4
+; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
+; CI-NEXT:    v_mov_b32_e32 v5, s1
+; CI-NEXT:    v_add_i32_e32 v4, vcc, s0, v4
+; CI-NEXT:    v_cvt_f32_f16_e32 v6, s4
+; CI-NEXT:    s_cmp_eq_u32 s5, 7
+; CI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CI-NEXT:    s_cselect_b64 vcc, -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 6
+; CI-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 5
+; CI-NEXT:    s_cselect_b64 s[2:3], -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 4
+; CI-NEXT:    s_waitcnt vmcnt(0)
+; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
+; CI-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT:    v_lshrrev_b32_e32 v9, 16, v1
+; CI-NEXT:    v_cvt_f32_f16_e32 v7, v7
+; CI-NEXT:    v_cvt_f32_f16_e32 v9, v9
+; CI-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; CI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; CI-NEXT:    v_lshrrev_b32_e32 v10, 16, v0
+; CI-NEXT:    v_cvt_f32_f16_e32 v8, v8
+; CI-NEXT:    v_cvt_f32_f16_e32 v10, v10
+; CI-NEXT:    v_cndmask_b32_e64 v3, v3, v6, s[0:1]
+; CI-NEXT:    s_cselect_b64 s[0:1], -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 3
+; CI-NEXT:    v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; CI-NEXT:    v_cndmask_b32_e32 v7, v7, v6, vcc
+; CI-NEXT:    s_cselect_b64 vcc, -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 2
+; CI-NEXT:    v_cndmask_b32_e32 v9, v9, v6, vcc
+; CI-NEXT:    s_cselect_b64 vcc, -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 1
+; CI-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; CI-NEXT:    s_cselect_b64 vcc, -1, 0
+; CI-NEXT:    s_cmp_eq_u32 s5, 0
+; CI-NEXT:    v_cndmask_b32_e64 v8, v8, v6, s[2:3]
+; CI-NEXT:    v_cvt_f16_f32_e32 v7, v7
+; CI-NEXT:    v_cndmask_b32_e32 v10, v10, v6, vcc
+; CI-NEXT:    s_cselect_b64 vcc, -1, 0
+; CI-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s[0:1]
+; CI-NEXT:    v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT:    v_cvt_f16_f32_e32 v8, v8
+; CI-NEXT:    v_cvt_f16_f32_e32 v9, v9
+; CI-NEXT:    v_cvt_f16_f32_e32 v10, v10
+; CI-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; CI-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT:    v_cvt_f16_f32_e32 v1, v1
+; CI-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
+; CI-NEXT:    v_lshlrev_b32_e32 v7, 16, v8
+; CI-NEXT:    v_lshlrev_b32_e32 v8, 16, v9
+; CI-NEXT:    v_or_b32_e32 v3, v3, v6
+; CI-NEXT:    v_lshlrev_b32_e32 v6, 16, v10
+; CI-NEXT:    v_or_b32_e32 v2, v2, v7
+; CI-NEXT:    v_or_b32_e32 v1, v1, v8
+; CI-NEXT:    v_or_b32_e32 v0, v0, v6
+; CI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
+; CI-NEXT:    s_endpgm
+  %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %tid.ext = sext i32 %tid to i64
+  %in.gep = getelementptr inbounds <8 x half>, <8 x half> addrspace(1)* %in, i64 %tid.ext
+  %out.gep = getelementptr inbounds <8 x half>, <8 x half> addrspace(1)* %out, i64 %tid.ext
+  %vec = load <8 x half>, <8 x half> addrspace(1)* %in.gep
+  %val.trunc = trunc i32 %val to i16
+  %val.cvt = bitcast i16 %val.trunc to half
+  %vecins = insertelement <8 x half> %vec, half %val.cvt, i32 %n
+  store <8 x half> %vecins, <8 x half> addrspace(1)* %out.gep
+  ret void
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x() #1
 
 attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index ec5445f96e046..3105f4b11b91e 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -1875,16 +1875,15 @@ define amdgpu_kernel void @v5i16_arg(<5 x i16> addrspace(1)* nocapture %out, <5
 ;
 ; GFX9-LABEL: v5i16_arg:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x18
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x10
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x10
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v3, s6
+; GFX9-NEXT:    v_mov_b32_e32 v3, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    global_store_short v2, v3, s[2:3] offset:8
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
+; GFX9-NEXT:    global_store_short v2, v3, s[6:7] offset:8
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[6:7]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; EG-LABEL: v5i16_arg:
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
index 5ebd97802f6b6..23150da4e53bc 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i16.ll
@@ -4727,10 +4727,10 @@ define amdgpu_kernel void @constant_zextload_v2i16_to_v2i64(<2 x i64> addrspace(
 ; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
 ; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
 ; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NOHSA-VI-NEXT:    s_and_b32 s0, s2, 0xffff
-; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s1, s2, 16
-; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s0
-; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s1
+; GCN-NOHSA-VI-NEXT:    s_lshr_b32 s0, s2, 16
+; GCN-NOHSA-VI-NEXT:    s_and_b32 s1, s2, 0xffff
+; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v0, s1
+; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v2, s0
 ; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; GCN-NOHSA-VI-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
index 5e99dc0e60c22..29c8d6a8b21d0 100644
--- a/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i16.ll
@@ -5581,14 +5581,14 @@ define amdgpu_kernel void @global_zextload_v2i16_to_v2i64(<2 x i64> addrspace(1)
 ; GCN-NOHSA-VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NOHSA-VI-NEXT:    s_mov_b32 s8, s2
 ; GCN-NOHSA-VI-NEXT:    s_mov_b32 s9, s3
-; GCN-NOHSA-VI-NEXT:    buffer_load_dword v2, off, s[8:11], 0
+; GCN-NOHSA-VI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
 ; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-NOHSA-VI-NEXT:    s_mov_b32 s4, s0
 ; GCN-NOHSA-VI-NEXT:    s_mov_b32 s5, s1
 ; GCN-NOHSA-VI-NEXT:    v_mov_b32_e32 v3, v1
 ; GCN-NOHSA-VI-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v2
-; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GCN-NOHSA-VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
+; GCN-NOHSA-VI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GCN-NOHSA-VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; GCN-NOHSA-VI-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
index 4e6608dd0bb5b..a229ce1146aba 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -571,6 +571,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v2, v0, s[6:7] glc
@@ -580,8 +581,8 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)
 ; GFX9-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s6, -1
 ; GFX9-NEXT:    v_pk_sub_i16 v2, v2, v3
-; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v2
-; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX9-NEXT:    v_and_b32_e32 v0, v4, v2
+; GFX9-NEXT:    v_and_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    v_mov_b32_e32 v3, v1
 ; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; GFX9-NEXT:    s_endpgm
@@ -617,6 +618,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)
 ; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT:    v_mov_b32_e32 v3, 0xffff
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_load_dword v1, v0, s[6:7] glc dlc
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
@@ -627,8 +629,8 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)
 ; GFX10-NEXT:    s_mov_b32 s6, -1
 ; GFX10-NEXT:    v_pk_sub_i16 v2, v1, v2
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10-NEXT:    v_and_b32_e32 v0, 0xffff, v2
-; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
+; GFX10-NEXT:    v_and_b32_e32 v0, v3, v2
+; GFX10-NEXT:    v_and_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX10-NEXT:    v_mov_b32_e32 v3, v1
 ; GFX10-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; GFX10-NEXT:    s_endpgm

From 9d32847b331565eb1b38749aa7a721c6be8b64aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Bylica?= <chfast@gmail.com>
Date: Mon, 24 Jan 2022 19:22:56 +0100
Subject: [PATCH 443/946] [DAGCombine] Remove unused param in
 combineCarryDiamond(). NFC

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 77a6e7bba3660..1137f8b16977f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3058,9 +3058,8 @@ static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
 //
 // Our goal is to identify A, B, and CarryIn and produce ADDCARRY/SUBCARRY with
 // a single path for carry/borrow out propagation:
-static SDValue combineCarryDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
-                                   const TargetLowering &TLI, SDValue Carry0,
-                                   SDValue Carry1, SDNode *N) {
+static SDValue combineCarryDiamond(SelectionDAG &DAG, const TargetLowering &TLI,
+                                   SDValue Carry0, SDValue Carry1, SDNode *N) {
   if (Carry0.getResNo() != 1 || Carry1.getResNo() != 1)
     return SDValue();
   unsigned Opcode = Carry0.getOpcode();
@@ -5879,7 +5878,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     if (SDValue Shuffle = XformToShuffleWithZero(N))
       return Shuffle;
 
-  if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+  if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
     return Combined;
 
   // fold (and (or x, C), D) -> D if (C & D) == D
@@ -6676,7 +6675,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   if (SDValue Combined = visitORLike(N0, N1, N))
     return Combined;
 
-  if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+  if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
     return Combined;
 
   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
@@ -8173,7 +8172,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
   if (SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
-  if (SDValue Combined = combineCarryDiamond(*this, DAG, TLI, N0, N1, N))
+  if (SDValue Combined = combineCarryDiamond(DAG, TLI, N0, N1, N))
     return Combined;
 
   return SDValue();

From d4be9720e7e68f1316cda971e9b29dc880222200 Mon Sep 17 00:00:00 2001
From: Jordan Rupprecht <rupprecht@google.com>
Date: Mon, 24 Jan 2022 12:01:25 -0800
Subject: [PATCH 444/946] [test] Fix no-undef-type-md.ll.

There are two test issues:
- The test assumes the current directory is writeable, but it may not be. Use `%t.o`-like paths instead of implicit `a.out`.
- The `RUN llvm-nm` line is missing a colon, so the test was not being exercised.
---
 llvm/test/LTO/Resolution/X86/no-undef-type-md.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll b/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll
index afee5e656df17..8748375c84315 100644
--- a/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll
+++ b/llvm/test/LTO/Resolution/X86/no-undef-type-md.ll
@@ -1,19 +1,19 @@
 ; RUN: opt <%s -o %t0.o -thinlto-bc -thinlto-split-lto-unit
 ; RUN: llvm-as -o %t1.o %S/Inputs/no-undef-type-md.ll
-; RUN: llvm-lto2 run -o a.out \
+; RUN: llvm-lto2 run -o %t-obj.o \
 ; RUN: %t0.o \
 ; RUN: -r=%t0.o,a, \
 ; RUN: -r=%t0.o,b,pl \
 ; RUN: %t1.o \
 ; RUN: -r=%t1.o,a,pl \
 ; RUN: | FileCheck --allow-empty --check-prefix=ERROR %s
-; RUN llvm-nm a.out.0 a.out.1 -S | FileCheck %s
+; RUN: llvm-nm %t-obj.o.0 %t-obj.o.1 -S | FileCheck %s
 
 ; ERROR-NOT: expected a Function or null
 ; ERROR-NOT: i32 (%0*, i32*)* undef
 
-; CHECK: a.out.0:
-; CHECK: a.out.1:
+; CHECK: -obj.o.0:
+; CHECK: -obj.o.1:
 
 ; ModuleID = 'test.cpp.o'
 source_filename = "test.cpp"

From c548bc258c693df2087eda824308a8da55fd6003 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 24 Jan 2022 14:16:23 -0500
Subject: [PATCH 445/946] [InstCombine] add tests for icmp with masked mul
 operand; NFC

More coverage for D114272
---
 .../Transforms/InstCombine/icmp-mul-and.ll    | 97 +++++++++++++++++++
 1 file changed, 97 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll
index b15404c0aaa2f..2d63bfac0ffcc 100644
--- a/llvm/test/Transforms/InstCombine/icmp-mul-and.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-mul-and.ll
@@ -1,6 +1,103 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
+declare void @use(i8)
+
+define i1 @mul_mask_pow2_eq0(i8 %x) {
+; CHECK-LABEL: @mul_mask_pow2_eq0(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 44
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 44
+  %and = and i8 %mul, 4
+  %cmp = icmp eq i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @mul_mask_pow2_ne0_use1(i8 %x) {
+; CHECK-LABEL: @mul_mask_pow2_ne0_use1(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 40
+; CHECK-NEXT:    call void @use(i8 [[MUL]])
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 40
+  call void @use(i8 %mul)
+  %and = and i8 %mul, 8
+  %cmp = icmp ne i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @mul_mask_pow2_ne0_use2(i8 %x) {
+; CHECK-LABEL: @mul_mask_pow2_ne0_use2(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 40
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 8
+; CHECK-NEXT:    call void @use(i8 [[AND]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 40
+  %and = and i8 %mul, 8
+  call void @use(i8 %and)
+  %cmp = icmp ne i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @mul_mask_pow2_sgt0(i8 %x) {
+; CHECK-LABEL: @mul_mask_pow2_sgt0(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 44
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 44
+  %and = and i8 %mul, 4
+  %cmp = icmp sgt i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @mul_mask_fakepow2_ne0(i8 %x) {
+; CHECK-LABEL: @mul_mask_fakepow2_ne0(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 44
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 44
+  %and = and i8 %mul, 5
+  %cmp = icmp ne i8 %and, 0
+  ret i1 %cmp
+}
+
+define i1 @mul_mask_pow2_eq4(i8 %x) {
+; CHECK-LABEL: @mul_mask_pow2_eq4(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 44
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 44
+  %and = and i8 %mul, 4
+  %cmp = icmp eq i8 %and, 4
+  ret i1 %cmp
+}
+
+define i1 @mul_mask_notpow2_ne(i8 %x) {
+; CHECK-LABEL: @mul_mask_notpow2_ne(
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[X:%.*]], 60
+; CHECK-NEXT:    [[AND:%.*]] = and i8 [[MUL]], 12
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[AND]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %mul = mul i8 %x, 60
+  %and = and i8 %mul, 12
+  %cmp = icmp ne i8 %and, 0
+  ret i1 %cmp
+}
+
 define i1 @pr40493(i32 %area) {
 ; CHECK-LABEL: @pr40493(
 ; CHECK-NEXT:  entry:

From 6d020a5ac2d5e31ab8d472e139e9caec405a5006 Mon Sep 17 00:00:00 2001
From: Amir Ayupov <amir.aupov@gmail.com>
Date: Mon, 24 Jan 2022 12:03:35 -0800
Subject: [PATCH 446/946] [BOLT] Add missing <memory> in
 InstrumentationRuntimeLibrary.h

<memory> is no longer included as a result of 5f290c090a24
("Move STLFunctionalExtras out of STLExtras").

Reviewed By: maksfb

Differential Revision: https://reviews.llvm.org/D118064
---
 bolt/include/bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bolt/include/bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h b/bolt/include/bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h
index 3257deab1db45..55f86dd42fa26 100644
--- a/bolt/include/bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h
+++ b/bolt/include/bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h
@@ -16,6 +16,7 @@
 
 #include "bolt/Passes/InstrumentationSummary.h"
 #include "bolt/RuntimeLibs/RuntimeLibrary.h"
+#include <memory>
 
 namespace llvm {
 namespace bolt {

From 0407ab4114dbdbd1845df712639bdbc84ec6df2c Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Fri, 14 Jan 2022 12:30:22 -0500
Subject: [PATCH 447/946] [libc++] Make sure basic_string::reserve(n) never
 shrinks in all Standard modes

Since basic_string::reserve(n) is instantiated in the shared library but also
available to the compiler for inlining, its definition should not depend on
things like the Standard mode in use. Indeed, that flag may not match between
how the shared library is compiled and how users are compiling their own code,
resulting in ODR violations.

However, note that we retain the behavior of basic_string::reserve() to
shrink the string for backwards compatibility reasons. While it would
technically be conforming to not shrink, we believe user expectation is
for it to shrink, and so existing code might have been written based on
that assumption. We prefer to not break such code, even though that makes
basic_string::reserve() and basic_string::reserve(0) not equivalent anymore.

Fixes llvm-project#53170

Differential Revision: https://reviews.llvm.org/D117332
---
 libcxx/docs/ReleaseNotes.rst                  | 12 +++
 libcxx/include/string                         |  9 ++-
 .../string.capacity/PR53170.pass.cpp          | 79 +++++++++++++++++++
 .../string.capacity/reserve.pass.cpp          | 50 ------------
 4 files changed, 96 insertions(+), 54 deletions(-)
 create mode 100644 libcxx/test/libcxx/strings/basic.string/string.capacity/PR53170.pass.cpp
 delete mode 100644 libcxx/test/libcxx/strings/basic.string/string.capacity/reserve.pass.cpp

diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst
index b35c8a3329515..7210e1971b10f 100644
--- a/libcxx/docs/ReleaseNotes.rst
+++ b/libcxx/docs/ReleaseNotes.rst
@@ -115,6 +115,18 @@ API Changes
   You must now explicitly initialize with a ``chrono::month`` and
   ``chrono::weekday_indexed`` instead of "meh, whenever".
 
+- C++20 requires that ``std::basic_string::reserve(n)`` never reduce the capacity
+  of the string. (For that, use ``shrink_to_fit()``.) Prior to this release, libc++'s
+  ``std::basic_string::reserve(n)`` could reduce capacity in C++17 and before, but
+  not in C++20 and later. This caused ODR violations when mixing code compiled under
+  different Standard modes. After this change, libc++'s ``std::basic_string::reserve(n)``
+  never reduces capacity, even in C++17 and before.
+  C++20 deprecates the zero-argument overload of ``std::basic_string::reserve()``,
+  but specifically permits it to reduce capacity. To avoid breaking existing code
+  assuming that ``std::basic_string::reserve()`` will shrink, libc++ maintains
+  the behavior to shrink, even though that makes ``std::basic_string::reserve()`` not
+  a synonym for ``std::basic_string::reserve(0)`` in any Standard mode anymore.
+
 ABI Changes
 -----------
 
diff --git a/libcxx/include/string b/libcxx/include/string
index b2eef646f9827..6f22f02afa0ba 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -3265,10 +3265,11 @@ basic_string<_CharT, _Traits, _Allocator>::reserve(size_type __requested_capacit
     if (__requested_capacity > max_size())
         this->__throw_length_error();
 
-#if _LIBCPP_STD_VER > 17
-    // Reserve never shrinks as of C++20.
-    if (__requested_capacity <= capacity()) return;
-#endif
+    // Make sure reserve(n) never shrinks. This is technically only required in C++20
+    // and later (since P0966R1), however we provide consistent behavior in all Standard
+    // modes because this function is instantiated in the shared library.
+    if (__requested_capacity <= capacity())
+        return;
 
     size_type __target_capacity = _VSTD::max(__requested_capacity, size());
     __target_capacity = __recommend(__target_capacity);
diff --git a/libcxx/test/libcxx/strings/basic.string/string.capacity/PR53170.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.capacity/PR53170.pass.cpp
new file mode 100644
index 0000000000000..1be3dc0af2675
--- /dev/null
+++ b/libcxx/test/libcxx/strings/basic.string/string.capacity/PR53170.pass.cpp
@@ -0,0 +1,79 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <string>
+
+// void reserve(); // Deprecated in C++20.
+// void reserve(size_type);
+
+// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
+
+// This test ensures that libc++ implements https://wg21.link/P0966R1 (reserve never shrinks)
+// even before C++20. This is required in order to avoid ODR violations because basic_string::reserve(size)
+// is compiled into the shared library. Hence, it needs to have the same definition in all Standard modes.
+//
+// However, note that reserve() does shrink, and it does so in all Standard modes.
+//
+// Reported as https://llvm.org/PR53170.
+
+// reserve(n) used to shrink the string until https://llvm.org/D117332 was shipped.
+// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx10.{{9|10|11|12|13|14|15}}
+// XFAIL: use_system_cxx_lib && target={{.+}}-apple-macosx{{11|12}}
+
+#include <string>
+#include <stdexcept>
+#include <cassert>
+
+#include "test_macros.h"
+#include "min_allocator.h"
+
+template <class S>
+void test() {
+    // Test that a call to reserve() does shrink the string.
+    {
+        S s(1000, 'a');
+        typename S::size_type old_cap = s.capacity();
+        s.resize(20);
+        assert(s.capacity() == old_cap);
+
+        s.reserve();
+        assert(s.capacity() < old_cap);
+    }
+
+    // Test that a call to reserve(smaller-than-capacity) never shrinks the string.
+    {
+        S s(1000, 'a');
+        typename S::size_type old_cap = s.capacity();
+        s.resize(20);
+        assert(s.capacity() == old_cap);
+
+        s.reserve(10);
+        assert(s.capacity() == old_cap);
+    }
+
+    // In particular, test that reserve(0) does NOT shrink the string.
+    {
+        S s(1000, 'a');
+        typename S::size_type old_cap = s.capacity();
+        s.resize(20);
+        assert(s.capacity() == old_cap);
+
+        s.reserve(0);
+        assert(s.capacity() == old_cap);
+    }
+}
+
+int main(int, char**) {
+    test<std::string>();
+
+#if TEST_STD_VER >= 11
+    test<std::basic_string<char, std::char_traits<char>, min_allocator<char> > >();
+#endif
+
+    return 0;
+}
diff --git a/libcxx/test/libcxx/strings/basic.string/string.capacity/reserve.pass.cpp b/libcxx/test/libcxx/strings/basic.string/string.capacity/reserve.pass.cpp
deleted file mode 100644
index 358f51fd6e4cf..0000000000000
--- a/libcxx/test/libcxx/strings/basic.string/string.capacity/reserve.pass.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// <string>
-
-// void reserve(); // Deprecated in C++20.
-
-// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
-
-#include <string>
-#include <stdexcept>
-#include <cassert>
-
-#include "test_macros.h"
-#include "min_allocator.h"
-
-template <class S>
-void
-test()
-{
-    // Tests that a call to reserve() on a long string is equivalent to shrink_to_fit().
-    S s(1000, 'a');
-    typename S::size_type old_cap = s.capacity();
-    s.resize(20);
-    assert(s.capacity() == old_cap);
-    s.reserve();
-    assert(s.capacity() < old_cap);
-}
-
-int main(int, char**)
-{
-    {
-    typedef std::string S;
-    test<S>();
-    }
-#if TEST_STD_VER >= 11
-    {
-    typedef min_allocator<char> A;
-    typedef std::basic_string<char, std::char_traits<char>, A> S;
-    test<S>();
-    }
-#endif
-
-    return 0;
-}

From 1f6af9c9cefd226f2edbc36040f2ffb81c68a3eb Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Sun, 9 Jan 2022 09:36:08 -0500
Subject: [PATCH 448/946] [libc++][CI] Re-enable all CI jobs

This essentially reverts commit 89f4a18f371d8 now that our CI is back
online at full capacity.

Differential Revision: https://reviews.llvm.org/D116891
---
 libcxx/utils/ci/buildkite-pipeline.yml | 467 ++++++++++++-------------
 1 file changed, 233 insertions(+), 234 deletions(-)

diff --git a/libcxx/utils/ci/buildkite-pipeline.yml b/libcxx/utils/ci/buildkite-pipeline.yml
index c6f4d700abafb..cf2b9d9c82120 100644
--- a/libcxx/utils/ci/buildkite-pipeline.yml
+++ b/libcxx/utils/ci/buildkite-pipeline.yml
@@ -160,8 +160,6 @@ steps:
   #
   - wait
 
-  # TODO: Due to ongoing CI outage on our Linux nodes, most configurations running on Linux
-  #       are disabled. We are currently running off of a much smaller fleet than normally.
   # Tests with the supported compilers.
   - label: "GCC 11 / C++11"
     command: "libcxx/utils/ci/run-buildbot generic-gcc-cxx11"
@@ -189,18 +187,18 @@ steps:
           limit: 2
     timeout_in_minutes: 120
 
-  # - label: "Clang 13"
-  #   command: "libcxx/utils/ci/run-buildbot generic-clang-13"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Clang 13"
+    command: "libcxx/utils/ci/run-buildbot generic-clang-13"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
   # Tests with the sanitizers.
   - label: "ASAN"
@@ -216,33 +214,34 @@ steps:
           limit: 2
     timeout_in_minutes: 120
 
-  # - label: "TSAN"
-  #   command: "libcxx/utils/ci/run-buildbot generic-tsan"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "TSAN"
+    command: "libcxx/utils/ci/run-buildbot generic-tsan"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "UBSAN"
-  #   command: "libcxx/utils/ci/run-buildbot generic-ubsan"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "UBSAN"
+    command: "libcxx/utils/ci/run-buildbot generic-ubsan"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
   # # Tests with the various supported ways to build libc++.
+  # TODO: Fix failures with the GDB pretty printers
   # - label: "Bootstrapping build"
   #   command: "libcxx/utils/ci/run-buildbot bootstrapping-build"
   #   artifact_paths:
@@ -256,58 +255,58 @@ steps:
   #         limit: 2
   #   timeout_in_minutes: 120
 
-  # - label: "Legacy Lit configuration"
-  #   command: "libcxx/utils/ci/run-buildbot legacy-test-config"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Legacy Lit configuration"
+    command: "libcxx/utils/ci/run-buildbot legacy-test-config"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "Legacy standalone build"
-  #   command: "libcxx/utils/ci/run-buildbot legacy-standalone"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Legacy standalone build"
+    command: "libcxx/utils/ci/run-buildbot legacy-standalone"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "Legacy LLVM_ENABLE_PROJECTS build"
-  #   command: "libcxx/utils/ci/run-buildbot legacy-project-build"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Legacy LLVM_ENABLE_PROJECTS build"
+    command: "libcxx/utils/ci/run-buildbot legacy-project-build"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # # Tests with various build configurations.
-  # - label: "-fno-exceptions"
-  #   command: "libcxx/utils/ci/run-buildbot generic-noexceptions"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  # Tests with various build configurations.
+  - label: "-fno-exceptions"
+    command: "libcxx/utils/ci/run-buildbot generic-noexceptions"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
   - label: "Modular build"
     command: "libcxx/utils/ci/run-buildbot generic-modules"
@@ -322,164 +321,164 @@ steps:
           limit: 2
     timeout_in_minutes: 120
 
-  # - label: "Static libraries"
-  #   command: "libcxx/utils/ci/run-buildbot generic-static"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Static libraries"
+    command: "libcxx/utils/ci/run-buildbot generic-static"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "Assertions enabled"
-  #   command: "libcxx/utils/ci/run-buildbot generic-assertions"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #     - "**/*.abilist"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Assertions enabled"
+    command: "libcxx/utils/ci/run-buildbot generic-assertions"
+    artifact_paths:
+      - "**/test-results.xml"
+      - "**/*.abilist"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "Debug iterators"
-  #   command: "libcxx/utils/ci/run-buildbot generic-debug-iterators"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #     - "**/*.abilist"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Debug iterators"
+    command: "libcxx/utils/ci/run-buildbot generic-debug-iterators"
+    artifact_paths:
+      - "**/test-results.xml"
+      - "**/*.abilist"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "With LLVM's libunwind"
-  #   command: "libcxx/utils/ci/run-buildbot generic-with_llvm_unwinder"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "With LLVM's libunwind"
+    command: "libcxx/utils/ci/run-buildbot generic-with_llvm_unwinder"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "Single-threaded"
-  #   command: "libcxx/utils/ci/run-buildbot generic-singlethreaded"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "Single-threaded"
+    command: "libcxx/utils/ci/run-buildbot generic-singlethreaded"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "No debug mode"
-  #   command: "libcxx/utils/ci/run-buildbot generic-no-debug"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "No debug mode"
+    command: "libcxx/utils/ci/run-buildbot generic-no-debug"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "No filesystem"
-  #   command: "libcxx/utils/ci/run-buildbot generic-no-filesystem"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "No filesystem"
+    command: "libcxx/utils/ci/run-buildbot generic-no-filesystem"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "No random device"
-  #   command: "libcxx/utils/ci/run-buildbot generic-no-random_device"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "No random device"
+    command: "libcxx/utils/ci/run-buildbot generic-no-random_device"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "No locale"
-  #   command: "libcxx/utils/ci/run-buildbot generic-no-localization"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "No locale"
+    command: "libcxx/utils/ci/run-buildbot generic-no-localization"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "No Unicode"
-  #   command: "libcxx/utils/ci/run-buildbot generic-no-unicode"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "No Unicode"
+    command: "libcxx/utils/ci/run-buildbot generic-no-unicode"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # - label: "No wide characters"
-  #   command: "libcxx/utils/ci/run-buildbot generic-no-wide-characters"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  - label: "No wide characters"
+    command: "libcxx/utils/ci/run-buildbot generic-no-wide-characters"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
-  # # Other non-testing CI jobs
-  # - label: "Benchmarks"
-  #   command: "libcxx/utils/ci/run-buildbot benchmarks"
-  #   artifact_paths:
-  #     - "**/test-results.xml"
-  #   agents:
-  #     queue: "libcxx-builders"
-  #     os: "linux"
-  #   retry:
-  #     automatic:
-  #       - exit_status: -1  # Agent was lost
-  #         limit: 2
-  #   timeout_in_minutes: 120
+  # Other non-testing CI jobs
+  - label: "Benchmarks"
+    command: "libcxx/utils/ci/run-buildbot benchmarks"
+    artifact_paths:
+      - "**/test-results.xml"
+    agents:
+      queue: "libcxx-builders"
+      os: "linux"
+    retry:
+      automatic:
+        - exit_status: -1  # Agent was lost
+          limit: 2
+    timeout_in_minutes: 120
 
   - label: "Documentation"
     command: "libcxx/utils/ci/run-buildbot documentation"

From c3ca2c6b14f91f8232525373c4f5b1dc504a39a1 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani <medismail.bennani@gmail.com>
Date: Mon, 24 Jan 2022 21:45:00 +0100
Subject: [PATCH 449/946] [lldb/test] Fix
 `TestScriptedProcess.test_scripted_process_and_scripted_thread`

This patch updates `dummy_scripted_process.py` to report the dummy
thread correctly to reflect the changes introduced by `d3e0f7e`.

Signed-off-by: Med Ismail Bennani <medismail.bennani@gmail.com>
---
 .../scripted_process/TestScriptedProcess.py                | 7 +++++--
 .../scripted_process/dummy_scripted_process.py             | 1 +
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
index 4831d48a0b5a9..0c215f082c5d3 100644
--- a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
+++ b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
@@ -41,6 +41,7 @@ def test_python_plugin_package(self):
         self.expect('script dir(ScriptedProcess)',
                     substrs=["launch"])
 
+    @skipUnlessDarwin
     def test_invalid_scripted_register_context(self):
         """Test that we can launch an lldb scripted process with an invalid
         Scripted Thread, with invalid register context."""
@@ -77,7 +78,7 @@ def cleanup():
 
         self.assertIn("Failed to get scripted thread registers data.", log)
 
-    @skipIf(archs=no_match(['x86_64']))
+    @skipIf(archs=no_match(['x86_64', 'arm64', 'arm64e']))
     def test_scripted_process_and_scripted_thread(self):
         """Test that we can launch an lldb scripted process using the SBAPI,
         check its process ID, read string from memory, check scripted thread
@@ -124,8 +125,10 @@ def cleanup():
                 break
 
         self.assertTrue(GPRs, "Invalid General Purpose Registers Set")
-        self.assertEqual(GPRs.GetNumChildren(), 21)
+        self.assertGreater(GPRs.GetNumChildren(), 0)
         for idx, reg in enumerate(GPRs, start=1):
+            if idx > 21:
+                break
             self.assertEqual(idx, int(reg.value, 16))
 
     def create_stack_skinny_corefile(self, file):
diff --git a/lldb/test/API/functionalities/scripted_process/dummy_scripted_process.py b/lldb/test/API/functionalities/scripted_process/dummy_scripted_process.py
index d7f428d408457..67850cf57a73d 100644
--- a/lldb/test/API/functionalities/scripted_process/dummy_scripted_process.py
+++ b/lldb/test/API/functionalities/scripted_process/dummy_scripted_process.py
@@ -9,6 +9,7 @@
 class DummyScriptedProcess(ScriptedProcess):
     def __init__(self, target: lldb.SBTarget, args : lldb.SBStructuredData):
         super().__init__(target, args)
+        self.threads[0] = DummyScriptedThread(self, None)
 
     def get_memory_region_containing_address(self, addr: int) -> lldb.SBMemoryRegionInfo:
         return None

From 997e128e2a78f5a5434fc75997441ae1ee76f8a4 Mon Sep 17 00:00:00 2001
From: Casey Carter <Casey@Carter.net>
Date: Wed, 29 Dec 2021 17:21:52 -0800
Subject: [PATCH 450/946] [libcxx][test] the domain of == for forward iterators
 is iterator values from the same range

* Default-initialized `basic_string` iterators are not portably in the domain of `==`.
* Avoid comparing iterators from non-equal string_views which MSVCSTL considers not to be in the domain of equality.
* Don't test invalid range `[in, out + N)`.

Also silence some truncation warnings by testing with a non-narrowing conversion.

Differential Revision: https://reviews.llvm.org/D118049
---
 .../range.all/range.owning.view/begin_end.pass.cpp     |  8 ++++----
 .../range.transform/iterator/plus_minus.pass.cpp       |  4 ++--
 .../basic.string/string.iterators/iterators.pass.cpp   |  1 -
 .../format.parse.ctx/advance_to.pass.cpp               |  4 ++--
 .../ranges_uninitialized_copy.pass.cpp                 | 10 +++++-----
 .../ranges_uninitialized_copy_n.pass.cpp               |  4 ++--
 .../ranges_uninitialized_move.pass.cpp                 | 10 +++++-----
 .../ranges_uninitialized_move_n.pass.cpp               |  4 ++--
 8 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/libcxx/test/std/ranges/range.adaptors/range.all/range.owning.view/begin_end.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.all/range.owning.view/begin_end.pass.cpp
index 9319ac3b8986a..34898f0d3df94 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.all/range.owning.view/begin_end.pass.cpp
+++ b/libcxx/test/std/ranges/range.adaptors/range.all/range.owning.view/begin_end.pass.cpp
@@ -117,10 +117,10 @@ constexpr bool test()
     // Test a non-view.
     std::array<int, 2> a = {1, 2};
     auto ov = std::ranges::owning_view(std::move(a));
-    assert(ov.begin() != a.begin()); // because it points into the copy
-    assert(std::as_const(ov).begin() != a.begin());
-    assert(ov.end() != a.end());
-    assert(std::as_const(ov).end() != a.end());
+    assert(std::to_address(ov.begin()) != std::to_address(a.begin())); // because it points into the copy
+    assert(std::to_address(std::as_const(ov).begin()) != std::to_address(a.begin()));
+    assert(std::to_address(ov.end()) != std::to_address(a.end()));
+    assert(std::to_address(std::as_const(ov).end()) != std::to_address(a.end()));
   }
   return true;
 }
diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/plus_minus.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/plus_minus.pass.cpp
index 9baff10cc034d..6138df97929a2 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/plus_minus.pass.cpp
+++ b/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/plus_minus.pass.cpp
@@ -21,12 +21,12 @@ constexpr bool test() {
   std::ranges::transform_view<MoveOnlyView, PlusOneMutable> transformView1;
   auto iter1 = std::move(transformView1).begin();
   std::ranges::transform_view<MoveOnlyView, PlusOneMutable> transformView2;
-  auto iter2 = std::move(transformView2).begin();
+  [[maybe_unused]] auto iter2 = std::move(transformView2).begin();
   iter1 += 4;
   assert((iter1 + 1).base() == globalBuff + 5);
   assert((1 + iter1).base() == globalBuff + 5);
   assert((iter1 - 1).base() == globalBuff + 3);
-  assert(iter1 - iter2 == 4);
+  LIBCPP_ASSERT(iter1 - iter2 == 4);
   assert((iter1 + 2) - 2 == iter1);
   assert((iter1 - 2) + 2 == iter1);
 
diff --git a/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp b/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp
index 187452b6020e0..0ab3a549dd5af 100644
--- a/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp
+++ b/libcxx/test/std/strings/basic.string/string.iterators/iterators.pass.cpp
@@ -54,7 +54,6 @@ TEST_CONSTEXPR_CXX20 void test()
         C a;
         typename C::iterator i1 = a.begin();
         typename C::iterator i2;
-        assert ( i1 != i2 );
         i2 = i1;
         assert ( i1 == i2 );
     }
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/advance_to.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/advance_to.pass.cpp
index fe0405f2e534c..d392f993343a6 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/advance_to.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.parse.ctx/advance_to.pass.cpp
@@ -39,10 +39,10 @@ constexpr void test(const CharT* fmt) {
     std::basic_format_parse_context context(view);
 
     context.advance_to(context.begin() + 1);
-    assert(context.begin() == view.begin() + 1);
+    assert(std::to_address(context.begin()) == std::to_address(view.begin()) + 1);
 
     context.advance_to(context.begin() + 1);
-    assert(context.begin() == view.begin() + 2);
+    assert(std::to_address(context.begin()) == std::to_address(view.begin()) + 2);
 
     context.advance_to(context.begin() + 1);
     assert(context.begin() == context.end());
diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp
index f6b40fd145635..4ce2d720aa66f 100644
--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp
+++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy.pass.cpp
@@ -218,7 +218,7 @@ int main(int, char**) {
     constexpr int N = 5;
     int in[N] = {1, 2, 3, 4, 5};
     int out[N] = {6, 7, 8, 9, 10};
-    assert(!std::equal(in, in + N, in, out + N));
+    assert(!std::equal(in, in + N, out, out + N));
 
     std::ranges::uninitialized_copy(in, in + 1, out, out + N);
     assert(out[0] == 1);
@@ -314,8 +314,8 @@ int main(int, char**) {
   // Conversions, (iter, sentinel) overload.
   {
     constexpr int N = 3;
-    double in[N] = {1.0, 2.0, 3.0};
-    Buffer<int, N> out;
+    int in[N] = {1, 2, 3};
+    Buffer<double, N> out;
 
     std::ranges::uninitialized_copy(in, in + N, out.begin(), out.end());
     assert(std::equal(in, in + N, out.begin(), out.end()));
@@ -324,8 +324,8 @@ int main(int, char**) {
   // Conversions, (range) overload.
   {
     constexpr int N = 3;
-    double in[N] = {1.0, 2.0, 3.0};
-    Buffer<int, N> out;
+    int in[N] = {1, 2, 3};
+    Buffer<double, N> out;
 
     std::ranges::uninitialized_copy(in, out);
     assert(std::equal(in, in + N, out.begin(), out.end()));
diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp
index f32af6cf4b38b..aee2cfd1f91b5 100644
--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp
+++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.copy/ranges_uninitialized_copy_n.pass.cpp
@@ -122,8 +122,8 @@ int main(int, char**) {
   // Conversions.
   {
     constexpr int N = 3;
-    double in[N] = {1.0, 2.0, 3.0};
-    Buffer<int, N> out;
+    int in[N] = {1, 2, 3};
+    Buffer<double, N> out;
 
     std::ranges::uninitialized_copy_n(in, N, out.begin(), out.end());
     assert(std::equal(in, in + N, out.begin(), out.end()));
diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp
index 934ac6a4f23fe..c764440d84e77 100644
--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp
+++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move.pass.cpp
@@ -250,7 +250,7 @@ int main(int, char**) {
     constexpr int N = 5;
     int in[N] = {1, 2, 3, 4, 5};
     int out[N] = {6, 7, 8, 9, 10};
-    assert(!std::equal(in, in + N, in, out + N));
+    assert(!std::equal(in, in + N, out, out + N));
 
     std::ranges::uninitialized_move(in, in + 1, out, out + N);
     assert(out[0] == 1);
@@ -350,8 +350,8 @@ int main(int, char**) {
   // Conversions, (iter, sentinel) overload.
   {
     constexpr int N = 3;
-    double in[N] = {1.0, 2.0, 3.0};
-    Buffer<int, N> out;
+    int in[N] = {1, 2, 3};
+    Buffer<double, N> out;
 
     std::ranges::uninitialized_move(in, in + N, out.begin(), out.end());
     assert(std::equal(in, in + N, out.begin(), out.end()));
@@ -360,8 +360,8 @@ int main(int, char**) {
   // Conversions, (range) overload.
   {
     constexpr int N = 3;
-    double in[N] = {1.0, 2.0, 3.0};
-    Buffer<int, N> out;
+    int in[N] = {1, 2, 3};
+    Buffer<double, N> out;
 
     std::ranges::uninitialized_move(in, out);
     assert(std::equal(in, in + N, out.begin(), out.end()));
diff --git a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp
index 9c6691de92297..01f3c6ca8e00d 100644
--- a/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp
+++ b/libcxx/test/std/utilities/memory/specialized.algorithms/uninitialized.move/ranges_uninitialized_move_n.pass.cpp
@@ -156,8 +156,8 @@ int main(int, char**) {
   // Conversions.
   {
     constexpr int N = 3;
-    double in[N] = {1.0, 2.0, 3.0};
-    Buffer<int, N> out;
+    int in[N] = {1, 2, 3};
+    Buffer<double, N> out;
 
     std::ranges::uninitialized_move_n(in, N, out.begin(), out.end());
     assert(std::equal(in, in + N, out.begin(), out.end()));

From 6a028296fe62252791a6b470eeb8409b17d48cd0 Mon Sep 17 00:00:00 2001
From: Quinn Pham <quinn.pham@ibm.com>
Date: Mon, 29 Nov 2021 09:12:51 -0600
Subject: [PATCH 451/946] [PowerPC] Emit warning when SP is clobbered by asm

This patch emits a warning when the stack pointer register (`R1`) is found in
the clobber list of an inline asm statement. Clobbering the stack pointer is
not supported.

Reviewed By: #powerpc, nemanjai

Differential Revision: https://reviews.llvm.org/D112073
---
 clang/lib/Basic/Targets/PPC.cpp               | 39 +++++++++++--------
 .../Misc/ppc-inline-asm-clobber-warning.c     | 38 ++++++++++++++++++
 llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp   | 12 ++++++
 llvm/lib/Target/PowerPC/PPCRegisterInfo.h     |  2 +
 .../PowerPC/inline-asm-clobber-warning.ll     | 22 +++++++++++
 5 files changed, 96 insertions(+), 17 deletions(-)
 create mode 100644 clang/test/Misc/ppc-inline-asm-clobber-warning.c
 create mode 100644 llvm/test/CodeGen/PowerPC/inline-asm-clobber-warning.ll

diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 19b7ded40402a..1eb0317af60b6 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -734,23 +734,28 @@ ArrayRef<const char *> PPCTargetInfo::getGCCRegNames() const {
 const TargetInfo::GCCRegAlias PPCTargetInfo::GCCRegAliases[] = {
     // While some of these aliases do map to different registers
     // they still share the same register name.
-    {{"0"}, "r0"},     {{"1"}, "r1"},     {{"2"}, "r2"},     {{"3"}, "r3"},
-    {{"4"}, "r4"},     {{"5"}, "r5"},     {{"6"}, "r6"},     {{"7"}, "r7"},
-    {{"8"}, "r8"},     {{"9"}, "r9"},     {{"10"}, "r10"},   {{"11"}, "r11"},
-    {{"12"}, "r12"},   {{"13"}, "r13"},   {{"14"}, "r14"},   {{"15"}, "r15"},
-    {{"16"}, "r16"},   {{"17"}, "r17"},   {{"18"}, "r18"},   {{"19"}, "r19"},
-    {{"20"}, "r20"},   {{"21"}, "r21"},   {{"22"}, "r22"},   {{"23"}, "r23"},
-    {{"24"}, "r24"},   {{"25"}, "r25"},   {{"26"}, "r26"},   {{"27"}, "r27"},
-    {{"28"}, "r28"},   {{"29"}, "r29"},   {{"30"}, "r30"},   {{"31"}, "r31"},
-    {{"fr0"}, "f0"},   {{"fr1"}, "f1"},   {{"fr2"}, "f2"},   {{"fr3"}, "f3"},
-    {{"fr4"}, "f4"},   {{"fr5"}, "f5"},   {{"fr6"}, "f6"},   {{"fr7"}, "f7"},
-    {{"fr8"}, "f8"},   {{"fr9"}, "f9"},   {{"fr10"}, "f10"}, {{"fr11"}, "f11"},
-    {{"fr12"}, "f12"}, {{"fr13"}, "f13"}, {{"fr14"}, "f14"}, {{"fr15"}, "f15"},
-    {{"fr16"}, "f16"}, {{"fr17"}, "f17"}, {{"fr18"}, "f18"}, {{"fr19"}, "f19"},
-    {{"fr20"}, "f20"}, {{"fr21"}, "f21"}, {{"fr22"}, "f22"}, {{"fr23"}, "f23"},
-    {{"fr24"}, "f24"}, {{"fr25"}, "f25"}, {{"fr26"}, "f26"}, {{"fr27"}, "f27"},
-    {{"fr28"}, "f28"}, {{"fr29"}, "f29"}, {{"fr30"}, "f30"}, {{"fr31"}, "f31"},
-    {{"cc"}, "cr0"},
+    {{"0"}, "r0"},     {{"1", "sp"}, "r1"}, {{"2"}, "r2"},
+    {{"3"}, "r3"},     {{"4"}, "r4"},       {{"5"}, "r5"},
+    {{"6"}, "r6"},     {{"7"}, "r7"},       {{"8"}, "r8"},
+    {{"9"}, "r9"},     {{"10"}, "r10"},     {{"11"}, "r11"},
+    {{"12"}, "r12"},   {{"13"}, "r13"},     {{"14"}, "r14"},
+    {{"15"}, "r15"},   {{"16"}, "r16"},     {{"17"}, "r17"},
+    {{"18"}, "r18"},   {{"19"}, "r19"},     {{"20"}, "r20"},
+    {{"21"}, "r21"},   {{"22"}, "r22"},     {{"23"}, "r23"},
+    {{"24"}, "r24"},   {{"25"}, "r25"},     {{"26"}, "r26"},
+    {{"27"}, "r27"},   {{"28"}, "r28"},     {{"29"}, "r29"},
+    {{"30"}, "r30"},   {{"31"}, "r31"},     {{"fr0"}, "f0"},
+    {{"fr1"}, "f1"},   {{"fr2"}, "f2"},     {{"fr3"}, "f3"},
+    {{"fr4"}, "f4"},   {{"fr5"}, "f5"},     {{"fr6"}, "f6"},
+    {{"fr7"}, "f7"},   {{"fr8"}, "f8"},     {{"fr9"}, "f9"},
+    {{"fr10"}, "f10"}, {{"fr11"}, "f11"},   {{"fr12"}, "f12"},
+    {{"fr13"}, "f13"}, {{"fr14"}, "f14"},   {{"fr15"}, "f15"},
+    {{"fr16"}, "f16"}, {{"fr17"}, "f17"},   {{"fr18"}, "f18"},
+    {{"fr19"}, "f19"}, {{"fr20"}, "f20"},   {{"fr21"}, "f21"},
+    {{"fr22"}, "f22"}, {{"fr23"}, "f23"},   {{"fr24"}, "f24"},
+    {{"fr25"}, "f25"}, {{"fr26"}, "f26"},   {{"fr27"}, "f27"},
+    {{"fr28"}, "f28"}, {{"fr29"}, "f29"},   {{"fr30"}, "f30"},
+    {{"fr31"}, "f31"}, {{"cc"}, "cr0"},
 };
 
 ArrayRef<TargetInfo::GCCRegAlias> PPCTargetInfo::getGCCRegAliases() const {
diff --git a/clang/test/Misc/ppc-inline-asm-clobber-warning.c b/clang/test/Misc/ppc-inline-asm-clobber-warning.c
new file mode 100644
index 0000000000000..bc323243b6e2d
--- /dev/null
+++ b/clang/test/Misc/ppc-inline-asm-clobber-warning.c
@@ -0,0 +1,38 @@
+/// This test checks that the warning includes the location in the C source
+/// file that contains the inline asm. Although this warning is emitted in llvm
+/// it cannot be tested from IR as it does not have that location information at
+/// that stage.
+
+// REQUIRES: powerpc-registered-target
+
+// RUN: %clang --target=powerpc-unknown-unknown -mcpu=pwr7 \
+// RUN:   -c %s -o /dev/null 2>&1 | FileCheck %s
+// RUN: %clang --target=powerpc64-unknown-unknown -mcpu=pwr7 \
+// RUN:   -c %s -o /dev/null 2>&1 | FileCheck %s
+
+void test_r1_clobber() {
+  __asm__("nop":::"r1");
+}
+
+// CHECK:      ppc-inline-asm-clobber-warning.c:14:11: warning: inline asm clobber list contains reserved registers: R1 [-Winline-asm]
+// CHECK-NEXT:   __asm__("nop":::"r1");
+// CHECK-NEXT:           ^
+// CHECK-NEXT: ppc-inline-asm-clobber-warning.c:14:11: note: Reserved registers on the clobber list may not be preserved across the asm statement, and clobbering them may lead to undefined behaviour.
+
+void test_1_clobber() {
+  __asm__("nop":::"1");
+}
+
+// CHECK:      ppc-inline-asm-clobber-warning.c:23:11: warning: inline asm clobber list contains reserved registers: R1 [-Winline-asm]
+// CHECK-NEXT:   __asm__("nop":::"1");
+// CHECK-NEXT:           ^
+// CHECK-NEXT: ppc-inline-asm-clobber-warning.c:23:11: note: Reserved registers on the clobber list may not be preserved across the asm statement, and clobbering them may lead to undefined behaviour.
+
+void test_sp_clobber() {
+  __asm__("nop":::"sp");
+}
+
+// CHECK:      ppc-inline-asm-clobber-warning.c:32:11: warning: inline asm clobber list contains reserved registers: R1 [-Winline-asm]
+// CHECK-NEXT:   __asm__("nop":::"sp");
+// CHECK-NEXT:           ^
+// CHECK-NEXT: ppc-inline-asm-clobber-warning.c:32:11: note: Reserved registers on the clobber list may not be preserved across the asm statement, and clobbering them may lead to undefined behaviour.
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 422b3db4f978a..76b016c0ee792 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -390,6 +390,18 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
+bool PPCRegisterInfo::isAsmClobberable(const MachineFunction &MF,
+                                       MCRegister PhysReg) const {
+  // We cannot use getReservedRegs() to find the registers that are not asm
+  // clobberable because there are some reserved registers which can be
+  // clobbered by inline asm. For example, when LR is clobbered, the register is
+  // saved and restored. We will hardcode the registers that are not asm
+  // cloberable in this function.
+
+  // The stack pointer (R1/X1) is not clobberable by inline asm
+  return PhysReg != PPC::R1 && PhysReg != PPC::X1;
+}
+
 bool PPCRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
   const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   const PPCInstrInfo *InstrInfo =  Subtarget.getInstrInfo();
diff --git a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index ce2a343cfa357..114f6d0f4c666 100644
--- a/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -91,6 +91,8 @@ class PPCRegisterInfo : public PPCGenRegisterInfo {
   void adjustStackMapLiveOutMask(uint32_t *Mask) const override;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
+  bool isAsmClobberable(const MachineFunction &MF,
+                        MCRegister PhysReg) const override;
   bool isCallerPreservedPhysReg(MCRegister PhysReg,
                                 const MachineFunction &MF) const override;
 
diff --git a/llvm/test/CodeGen/PowerPC/inline-asm-clobber-warning.ll b/llvm/test/CodeGen/PowerPC/inline-asm-clobber-warning.ll
new file mode 100644
index 0000000000000..7f13f5072d97f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/inline-asm-clobber-warning.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc-unknown-unkown \
+; RUN:   -mcpu=pwr7 2>&1 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=powerpc64-unknown-unkown \
+; RUN:   -mcpu=pwr7 2>&1 | FileCheck %s
+
+define void @test_r1_clobber() {
+entry:
+  call void asm sideeffect "nop", "~{r1}"()
+  ret void
+}
+
+; CHECK: warning: inline asm clobber list contains reserved registers: R1
+; CHECK-NEXT: note: Reserved registers on the clobber list may not be preserved across the asm statement, and clobbering them may lead to undefined behaviour.
+
+define void @test_x1_clobber() {
+entry:
+  call void asm sideeffect "nop", "~{x1}"()
+  ret void
+}
+
+; CHECK: warning: inline asm clobber list contains reserved registers: X1
+; CHECK-NEXT: note: Reserved registers on the clobber list may not be preserved across the asm statement, and clobbering them may lead to undefined behaviour.

From d3932c690d97df83f030f527456ddadf098b3d04 Mon Sep 17 00:00:00 2001
From: Igor Kirillov <Igor.Kirillov@arm.com>
Date: Thu, 13 Jan 2022 12:57:50 +0000
Subject: [PATCH 452/946] [LoopVectorize] Add tests with reductions that are
 stored in invariant address

This patch adds tests for functionality that is to be implemented in D110235.

Differential Revision: https://reviews.llvm.org/D117213
---
 .../reductions-across-inner-and-outer-loop.ll |  38 ++
 .../AArch64/scalable-reductions.ll            |  25 ++
 .../LoopVectorize/AArch64/strict-fadd.ll      |  31 ++
 .../reduction-with-invariant-store.ll         | 333 ++++++++++++++++++
 4 files changed, 427 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll

diff --git a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
index 03bd5b71af947..52e604a00df32 100644
--- a/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
+++ b/llvm/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll
@@ -151,6 +151,44 @@ for1.loopexit:                                 ; preds = %for1.inc
   ret i64 %sum.inc.lcssa2
 }
 
+; Check that we do not interchange if reduction is stored in an invariant address inside inner loop
+; REMARKS: --- !Missed
+; REMARKS-NEXT: Pass:            loop-interchange
+; REMARKS-NEXT: Name:            UnsupportedPHIOuter
+; REMARKS-NEXT: Function:        test4
+
+define i64 @test4([100 x [100 x i64]]* %Arr, i64* %dst) {
+entry:
+  %gep.dst = getelementptr inbounds i64, i64* %dst, i64 42
+  br label %for1.header
+
+for1.header:                                         ; preds = %for1.inc, %entry
+  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for1.inc ]
+  %sum.outer = phi i64 [ 0, %entry ], [ %sum.inc.lcssa, %for1.inc ]
+  br label %for2
+
+for2:                                        ; preds = %for2, %for1.header
+  %indvars.iv = phi i64 [ 0, %for1.header ], [ %indvars.iv.next.3, %for2 ]
+  %sum.inner = phi i64 [ %sum.outer, %for1.header ], [ %sum.inc, %for2 ]
+  %arrayidx = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv23
+  %lv = load i64, i64* %arrayidx, align 4
+  %sum.inc = add i64 %sum.inner, %lv
+  store i64 %sum.inc, i64* %gep.dst, align 4
+  %indvars.iv.next.3 = add nuw nsw i64 %indvars.iv, 1
+  %exit1 = icmp eq i64 %indvars.iv.next.3, 100
+  br i1 %exit1, label %for1.inc, label %for2
+
+for1.inc:                                ; preds = %for2
+  %sum.inc.lcssa = phi i64 [ %sum.inc, %for2 ]
+  %indvars.iv.next24 = add nuw nsw i64 %indvars.iv23, 1
+  %exit2 = icmp eq i64 %indvars.iv.next24, 100
+  br i1 %exit2, label %for1.loopexit, label %for1.header
+
+for1.loopexit:                                 ; preds = %for1.inc
+  %sum.inc.lcssa2 = phi i64 [ %sum.inc.lcssa, %for1.inc ]
+  ret i64 %sum.inc.lcssa2
+}
+
 ; Check that we do not interchange or crash if the PHI in the outer loop gets a
 ; constant from the inner loop.
 ; REMARKS: --- !Missed
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
index 8a0a71ff63365..feafd8831b668 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@@ -318,6 +318,31 @@ for.end:
   ret float %.sroa.speculated
 }
 
+; ADD (with reduction stored in invariant address)
+
+; CHECK-REMARK: loop not vectorized: value that could not be identified as reduction is used outside the loop
+define void @invariant_store(i32* %dst, i32* readonly %src) {
+; CHECK-LABEL: @invariant_store
+; CHECK-NOT: vector.body
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  store i32 0, i32* %gep.dst, align 4
+  br label %for.body
+for.body:
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gep.src = getelementptr inbounds i32, i32* %src, i64 %indvars.iv
+  %0 = load i32, i32* %gep.src, align 4
+  %add = add nsw i32 %sum, %0
+  store i32 %add, i32* %gep.dst, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
 ; Reduction cannot be vectorized
 
 ; MUL
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
index e890468f06ddf..3aa88d7fc18c1 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@@ -1309,6 +1309,37 @@ for.end:
 
 declare float @llvm.fmuladd.f32(float, float, float)
 
+; Test case with invariant store where fadd is strict.
+define void @reduction_store_to_invariant_address(float* %dst, float* readonly %src) {
+; CHECK-ORDERED-LABEL: @reduction_store_to_invariant_address(
+; CHECK-ORDERED-NOT: vector.body
+
+; CHECK-UNORDERED-LABEL: @reduction_store_to_invariant_address(
+; CHECK-UNORDERED-NOT: vector.body
+
+; CHECK-NOT-VECTORIZED-LABEL: @reduction_store_to_invariant_address(
+; CHECK-NOT-VECTORIZED-NOT: vector.body
+
+entry:
+  %arrayidx = getelementptr inbounds float, float* %dst, i64 42
+  store float 0.000000e+00, float* %arrayidx, align 4
+  br label %for.body
+
+for.body:
+  %0 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx1 = getelementptr inbounds float, float* %src, i64 %indvars.iv
+  %1 = load float, float* %arrayidx1, align 4
+  %add = fadd float %0, %1
+  store float %add, float* %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 1000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
 !0 = distinct !{!0, !5, !9, !11}
 !1 = distinct !{!1, !5, !10, !11}
 !2 = distinct !{!2, !6, !9, !11}
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
new file mode 100644
index 0000000000000..d002b1b289ea0
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/reduction-with-invariant-store.ll
@@ -0,0 +1,333 @@
+; RUN: opt < %s -passes="loop-vectorize" -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; This test checks that we can vectorize loop with reduction variable
+; stored in an invariant address.
+;
+; int sum = 0;
+; for(i=0..N) {
+;   sum += src[i];
+;   dst[42] = sum;
+; }
+; CHECK-LABEL: @reduc_store
+; CHECK-NOT: vector.body
+define void @reduc_store(i32* %dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  store i32 0, i32* %gep.dst, align 4
+  br label %for.body
+
+for.body:
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %gep.src, align 4
+  %add = add nsw i32 %sum, %0
+  store i32 %add, i32* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+; Same as above but with floating point numbers instead.
+;
+; float sum = 0;
+; for(i=0..N) {
+;   sum += src[i];
+;   dst[42] = sum;
+; }
+; CHECK-LABEL: @reduc_store_fadd_fast
+; CHECK-NOT: vector.body
+define void @reduc_store_fadd_fast(float* %dst, float* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds float, float* %dst, i64 42
+  store float 0.000000e+00, float* %gep.dst, align 4
+  br label %for.body
+
+for.body:
+  %sum = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %gep.src = getelementptr inbounds float, float* %src, i64 %iv
+  %0 = load float, float* %gep.src, align 4
+  %add = fadd fast float %sum, %0
+  store float %add, float* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+; Check that if we have a read from an invariant address, we do not vectorize.
+;
+; int sum = 0;
+; for(i=0..N) {
+;   sum += src[i];
+;   dst.2[i] = dst[42];
+;   dst[42] = sum;
+; }
+; CHECK-LABEL: @reduc_store_load
+; CHECK-NOT: vector.body
+define void @reduc_store_load(i32* %dst, i32* readonly %src, i32* noalias %dst.2) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  store i32 0, i32* %gep.dst, align 4
+  br label %for.body
+
+for.body:
+  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %gep.src, align 4
+  %add = add nsw i32 %sum, %0
+  %lv = load i32, i32* %gep.dst
+  %gep.dst.2  = getelementptr inbounds i32, i32* %dst.2, i64 %iv
+  store i32 %lv, i32* %gep.dst.2, align 4
+  store i32 %add, i32* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  ret void
+}
+
+; Final value is not guaranteed to be stored in an invariant address.
+; We don't vectorize in that case.
+;
+; int sum = 0;
+; for(i=0..N) {
+;   int diff = y[i] - x[i];
+;   if (diff > 0) {
+;     sum = += diff;
+;     *t = sum;
+;   }
+; }
+; CHECK-LABEL: @reduc_cond_store
+; CHECK-NOT: vector.body
+define void @reduc_cond_store(i32* %t, i32* readonly %x, i32* readonly %y) {
+entry:
+  store i32 0, i32* %t, align 4
+  br label %for.body
+
+for.body:
+  %sum = phi i32 [ 0, %entry ], [ %sum.2, %if.end ]
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %if.end ]
+  %gep.y = getelementptr inbounds i32, i32* %y, i64 %iv
+  %0 = load i32, i32* %gep.y, align 4
+  %gep.x = getelementptr inbounds i32, i32* %x, i64 %iv
+  %1 = load i32, i32* %gep.x, align 4
+  %diff = sub nsw i32 %0, %1
+  %cmp2 = icmp sgt i32 %diff, 0
+  br i1 %cmp2, label %if.then, label %if.end
+
+if.then:
+  %sum.1 = add nsw i32 %diff, %sum
+  store i32 %sum.1, i32* %t, align 4
+  br label %if.end
+
+if.end:
+  %sum.2 = phi i32 [ %sum.1, %if.then ], [ %0, %for.body ]
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+; Check that we can vectorize code with several stores to an invariant address
+; with condition that final reduction value is stored too.
+;
+;  int sum = 0;
+;  for(int i=0; i < 1000; i+=2) {
+;    sum += src[i];
+;    dst[42] = sum;
+;    sum += src[i+1];
+;    dst[42] = sum;
+;  }
+; CHECK-LABEL: @reduc_store_inside_unrolled
+; CHECK-NOT: vector.body
+define void @reduc_store_inside_unrolled(i32* %dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.1, %for.body ]
+  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %gep.src, align 4
+  %sum.1 = add nsw i32 %0, %sum
+  store i32 %sum.1, i32* %gep.dst, align 4
+  %1 = or i64 %iv, 1
+  %gep.src.1 = getelementptr inbounds i32, i32* %src, i64 %1
+  %2 = load i32, i32* %gep.src.1, align 4
+  %sum.2 = add nsw i32 %2, %sum.1
+  store i32 %sum.2, i32* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 2
+  %cmp = icmp slt i64 %iv.next, 1000
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+; We cannot vectorize if two (or more) invariant stores exist in a loop.
+;
+;  int sum = 0;
+;  for(int i=0; i < 1000; i+=2) {
+;    sum += src[i];
+;    dst[42] = sum;
+;    sum += src[i+1];
+;    other_dst[42] = sum;
+;  }
+; CHECK-LABEL: @reduc_double_invariant_store
+; CHECK-NOT: vector.body:
+define void @reduc_double_invariant_store(i32* %dst, i32* %other_dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  %gep.other_dst = getelementptr inbounds i32, i32* %other_dst, i64 42
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.1, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %sum.1 = add nsw i32 %0, %sum
+  store i32 %sum.1, i32* %gep.dst, align 4
+  %1 = or i64 %iv, 1
+  %arrayidx4 = getelementptr inbounds i32, i32* %src, i64 %1
+  %2 = load i32, i32* %arrayidx4, align 4
+  %sum.2 = add nsw i32 %2, %sum.1
+  store i32 %sum.2, i32* %gep.other_dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 2
+  %cmp = icmp slt i64 %iv.next, 1000
+  br i1 %cmp, label %for.body, label %exit
+
+exit:
+  ret void
+}
+
+;  int sum = 0;
+;  for(int i=0; i < 1000; i+=2) {
+;    sum += src[i];
+;    if (src[i+1] > 0)
+;      dst[42] = sum;
+;    sum += src[i+1];
+;    dst[42] = sum;
+;  }
+; CHECK-LABEL: @reduc_store_middle_store_predicated
+; CHECK-NOT: vector.body
+define void @reduc_store_middle_store_predicated(i32* %dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  br label %for.body
+
+for.body:                                         ; preds = %latch, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.2, %latch ]
+  %gep.src = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %gep.src, align 4
+  %sum.1 = add nsw i32 %0, %sum
+  %cmp = icmp sgt i32 %0, 0
+  br i1 %cmp, label %predicated, label %latch
+
+predicated:                                       ; preds = %for.body
+  store i32 %sum.1, i32* %gep.dst, align 4
+  br label %latch
+
+latch:                                            ; preds = %predicated, %for.body
+  %1 = or i64 %iv, 1
+  %gep.src.1 = getelementptr inbounds i32, i32* %src, i64 %1
+  %2 = load i32, i32* %gep.src.1, align 4
+  %sum.2 = add nsw i32 %2, %sum.1
+  store i32 %sum.2, i32* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 2
+  %cmp.1 = icmp slt i64 %iv.next, 1000
+  br i1 %cmp.1, label %for.body, label %exit
+
+exit:                                 ; preds = %latch
+  ret void
+}
+
+;  int sum = 0;
+;  for(int i=0; i < 1000; i+=2) {
+;    sum += src[i];
+;    dst[42] = sum;
+;    sum += src[i+1];
+;    if (src[i+1] > 0)
+;      dst[42] = sum;
+;  }
+; CHECK-LABEL: @reduc_store_final_store_predicated
+; CHECK-NOT: vector.body:
+define void @reduc_store_final_store_predicated(i32* %dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  br label %for.body
+
+for.body:                                         ; preds = %latch, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.1, %latch ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %sum.1 = add nsw i32 %0, %sum
+  store i32 %sum.1, i32* %gep.dst, align 4
+  %1 = or i64 %iv, 1
+  %gep.src.1 = getelementptr inbounds i32, i32* %src, i64 %1
+  %2 = load i32, i32* %gep.src.1, align 4
+  %sum.2 = add nsw i32 %2, %sum.1
+  %cmp1 = icmp sgt i32 %2, 0
+  br i1 %cmp1, label %predicated, label %latch
+
+predicated:                                       ; preds = %for.body
+  store i32 %sum.2, i32* %gep.dst, align 4
+  br label %latch
+
+latch:                                            ; preds = %predicated, %for.body
+  %iv.next = add nuw nsw i64 %iv, 2
+  %cmp = icmp slt i64 %iv.next, 1000
+  br i1 %cmp, label %for.body, label %exit
+
+exit:                                 ; preds = %latch
+  ret void
+}
+
+; Final value used outside of loop does not prevent vectorization
+;
+; int sum = 0;
+; for(int i=0; i < 1000; i++) {
+;   sum += src[i];
+;   dst[42] = sum;
+; }
+; dst[43] = sum;
+; CHECK-LABEL: @reduc_store_inoutside
+; CHECK-NOT: vector.body
+define void @reduc_store_inoutside(i32* %dst, i32* readonly %src) {
+entry:
+  %gep.dst = getelementptr inbounds i32, i32* %dst, i64 42
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum = phi i32 [ 0, %entry ], [ %sum.1, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %sum.1 = add nsw i32 %0, %sum
+  store i32 %sum.1, i32* %gep.dst, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:
+  %sum.lcssa = phi i32 [ %sum.1, %for.body ]
+  %gep.dst.1 = getelementptr inbounds i32, i32* %dst, i64 43
+  store i32 %sum.lcssa, i32* %gep.dst.1, align 4
+  ret void
+}

From adb6494660eb234d009fe333e65bf94e8becf955 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <jurahul@google.com>
Date: Sat, 22 Jan 2022 06:00:29 -0800
Subject: [PATCH 453/946] [MLIR] Add generic walk support to OpState

- This allows calling the generic walkers on specific operation instances.

Differential Revision: https://reviews.llvm.org/D117949
---
 mlir/include/mlir/IR/OpDefinition.h | 32 ++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/IR/OpDefinition.h b/mlir/include/mlir/IR/OpDefinition.h
index c68c6b12bf0a4..d1400c8557834 100644
--- a/mlir/include/mlir/IR/OpDefinition.h
+++ b/mlir/include/mlir/IR/OpDefinition.h
@@ -157,10 +157,40 @@ class OpState {
   /// See Operation::walk for more details.
   template <WalkOrder Order = WalkOrder::PostOrder, typename FnT,
             typename RetT = detail::walkResultType<FnT>>
-  RetT walk(FnT &&callback) {
+  typename std::enable_if<
+      llvm::function_traits<std::decay_t<FnT>>::num_args == 1, RetT>::type
+  walk(FnT &&callback) {
     return state->walk<Order>(std::forward<FnT>(callback));
   }
 
+  /// Generic walker with a stage aware callback. Walk the operation by calling
+  /// the callback for each nested operation (including this one) N+1 times,
+  /// where N is the number of regions attached to that operation.
+  ///
+  /// The callback method can take any of the following forms:
+  ///   void(Operation *, const WalkStage &) : Walk all operation opaquely
+  ///     * op.walk([](Operation *nestedOp, const WalkStage &stage) { ...});
+  ///   void(OpT, const WalkStage &) : Walk all operations of the given derived
+  ///                                  type.
+  ///     * op.walk([](ReturnOp returnOp, const WalkStage &stage) { ...});
+  ///   WalkResult(Operation*|OpT, const WalkStage &stage) : Walk operations,
+  ///          but allow for interruption/skipping.
+  ///     * op.walk([](... op, const WalkStage &stage) {
+  ///         // Skip the walk of this op based on some invariant.
+  ///         if (some_invariant)
+  ///           return WalkResult::skip();
+  ///         // Interrupt, i.e cancel, the walk based on some invariant.
+  ///         if (another_invariant)
+  ///           return WalkResult::interrupt();
+  ///         return WalkResult::advance();
+  ///       });
+  template <typename FnT, typename RetT = detail::walkResultType<FnT>>
+  typename std::enable_if<
+      llvm::function_traits<std::decay_t<FnT>>::num_args == 2, RetT>::type
+  walk(FnT &&callback) {
+    return state->walk(std::forward<FnT>(callback));
+  }
+
   // These are default implementations of customization hooks.
 public:
   /// This hook returns any canonicalization pattern rewrites that the operation

From 8d298355ca3778a47fd6b3110aeee03ea5e8e02b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 24 Jan 2022 21:41:57 +0000
Subject: [PATCH 454/946] [X86] combineSetCCMOVMSK - detect
 and(pcmpeq(),pcmpeq()) ptest pattern.

Handle cases where we've split an allof(cmpeq()) pattern to a legal vector type
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 17 +++++-
 .../test/CodeGen/X86/vector-compare-all_of.ll | 18 +++---
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 61 +++++++++----------
 3 files changed, 54 insertions(+), 42 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2f8e97e63fd49..a790777bbdc2d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44460,14 +44460,29 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
   // MOVMSK(PCMPEQ(X,Y)) == -1 -> PTESTZ(SUB(X,Y),SUB(X,Y)).
   // MOVMSK(PCMPEQ(X,Y)) != -1 -> !PTESTZ(SUB(X,Y),SUB(X,Y)).
   if (IsAllOf && Subtarget.hasSSE41()) {
+    MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
     SDValue BC = peekThroughBitcasts(Vec);
     if (BC.getOpcode() == X86ISD::PCMPEQ) {
-      MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64;
       SDValue V = DAG.getNode(ISD::SUB, SDLoc(BC), BC.getValueType(),
                               BC.getOperand(0), BC.getOperand(1));
       V = DAG.getBitcast(TestVT, V);
       return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
     }
+    // Check for 256-bit split vector cases.
+    if (BC.getOpcode() == ISD::AND &&
+        BC.getOperand(0).getOpcode() == X86ISD::PCMPEQ &&
+        BC.getOperand(1).getOpcode() == X86ISD::PCMPEQ) {
+      SDValue LHS = BC.getOperand(0);
+      SDValue RHS = BC.getOperand(1);
+      LHS = DAG.getNode(ISD::SUB, SDLoc(LHS), LHS.getValueType(),
+                        LHS.getOperand(0), LHS.getOperand(1));
+      RHS = DAG.getNode(ISD::SUB, SDLoc(RHS), RHS.getValueType(),
+                        RHS.getOperand(0), RHS.getOperand(1));
+      LHS = DAG.getBitcast(TestVT, LHS);
+      RHS = DAG.getBitcast(TestVT, RHS);
+      SDValue V = DAG.getNode(ISD::OR, SDLoc(EFLAGS), TestVT, LHS, RHS);
+      return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+    }
   }
 
   // See if we can avoid a PACKSS by calling MOVMSK on the sources.
diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
index b399712553606..91fa60ef09d59 100644
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -1329,11 +1329,10 @@ define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
 define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
 ; SSE-LABEL: bool_reduction_v32i8:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqb %xmm3, %xmm1
-; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
-; SSE-NEXT:    pand %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    psubb %xmm3, %xmm1
+; SSE-NEXT:    psubb %xmm2, %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    ptest %xmm0, %xmm0
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
@@ -1341,11 +1340,10 @@ define i1 @bool_reduction_v32i8(<32 x i8> %x, <32 x i8> %y) {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index be9ebf466eb0d..4f485cabdb1bc 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -1268,12 +1268,8 @@ define i1 @icmp0_v32i8_v32i1(<32 x i8>) {
 ; AVX1-LABEL: icmp0_v32i8_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -1594,12 +1590,8 @@ define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -2097,25 +2089,33 @@ define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) {
 }
 
 define i1 @icmp_v32i8_v32i1(<32 x i8>, <32 x i8>) {
-; SSE-LABEL: icmp_v32i8_v32i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqb %xmm3, %xmm1
-; SSE-NEXT:    pcmpeqb %xmm2, %xmm0
-; SSE-NEXT:    pand %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpw $-1, %ax
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp_v32i8_v32i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqb %xmm3, %xmm1
+; SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v32i8_v32i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    psubb %xmm3, %xmm1
+; SSE41-NEXT:    psubb %xmm2, %xmm0
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: icmp_v32i8_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -2452,11 +2452,10 @@ define i1 @icmp_v64i8_v64i1(<64 x i8>, <64 x i8>) {
 ;
 ; AVX2-LABEL: icmp_v64i8_v64i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqb %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpcmpeqb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq

From 3b64ab574d985b70cb4ec0f69c1fc1c1c4527cde Mon Sep 17 00:00:00 2001
From: Yuanfang Chen <yuanfang.chen@sony.com>
Date: Mon, 24 Jan 2022 13:42:04 -0800
Subject: [PATCH 455/946] [NFC][clangd] Use table to collect option aliases

* Suppress a lot of `-Wtautological-compare` warning
* Speed up file build a little bit

Reviewed By: kadircet

Differential Revision: https://reviews.llvm.org/D98110
---
 clang-tools-extra/clangd/CompileCommands.cpp | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp
index 7d6f612cb8b96..df5f84c894e7b 100644
--- a/clang-tools-extra/clangd/CompileCommands.cpp
+++ b/clang-tools-extra/clangd/CompileCommands.cpp
@@ -463,13 +463,26 @@ llvm::ArrayRef<ArgStripper::Rule> ArgStripper::rulesFor(llvm::StringRef Arg) {
 #define PREFIX(NAME, VALUE) static const char *const NAME[] = VALUE;
 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
                HELP, METAVAR, VALUES)                                          \
-  if (DriverID::OPT_##ALIAS != DriverID::OPT_INVALID && ALIASARGS == nullptr)  \
-    AddAlias(DriverID::OPT_##ID, DriverID::OPT_##ALIAS);                       \
   Prefixes[DriverID::OPT_##ID] = PREFIX;
 #include "clang/Driver/Options.inc"
 #undef OPTION
 #undef PREFIX
 
+    struct {
+      DriverID ID;
+      DriverID AliasID;
+      void *AliasArgs;
+    } AliasTable[] = {
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELP, METAVAR, VALUES)                                          \
+  {DriverID::OPT_##ID, DriverID::OPT_##ALIAS, (void *)ALIASARGS},
+#include "clang/Driver/Options.inc"
+#undef OPTION
+    };
+    for (auto &E : AliasTable)
+      if (E.AliasID != DriverID::OPT_INVALID && E.AliasArgs == nullptr)
+        AddAlias(E.ID, E.AliasID);
+
     auto Result = std::make_unique<TableTy>();
     // Iterate over distinct options (represented by the canonical alias).
     // Every spelling of this option will get the same set of rules.

From d87459a0b8e98afce89309459f1cc5ef33065f8e Mon Sep 17 00:00:00 2001
From: Yuanfang Chen <yuanfang.chen@sony.com>
Date: Mon, 24 Jan 2022 13:42:39 -0800
Subject: [PATCH 456/946] [CMake] Fixes /INCREMENTAL detection when considering
 adding /Brepro

/INCREMENTAL is the linker default (lld-link and MSVC link). Specifying
"/INCREMENTAL:NO" is the only way to disable it. So checking for the
negative flag instead and check exe/module/shared link flags
independently.

Reviewed By: rnk

Differential Revision: https://reviews.llvm.org/D117381
---
 llvm/cmake/modules/HandleLLVMOptions.cmake | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index 81f4f6c9de1af..c2feecc21a802 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -277,6 +277,17 @@ function(add_flag_or_print_warning flag name)
   endif()
 endfunction()
 
+function(has_msvc_incremental_no_flag flags incr_no_flag_on)
+  set(${incr_no_flag_on} OFF PARENT_SCOPE)
+  string(FIND "${flags}" "/INCREMENTAL" idx REVERSE)
+  if (${idx} GREATER -1)
+    string(SUBSTRING "${flags}" ${idx} 15 no_flag)
+    if (${no_flag} MATCHES "/INCREMENTAL:NO")
+      set(${incr_no_flag_on} ON PARENT_SCOPE)
+    endif()
+  endif()
+endfunction()
+
 if( LLVM_ENABLE_LLD )
   if ( LLVM_USE_LINKER )
     message(FATAL_ERROR "LLVM_ENABLE_LLD and LLVM_USE_LINKER can't be set at the same time")
@@ -535,11 +546,13 @@ if( MSVC )
     if (SUPPORTS_BREPRO)
       # Check if /INCREMENTAL is passed to the linker and complain that it
       # won't work with /Brepro.
-      string(FIND "${all_linker_flags_uppercase}" "/INCREMENTAL" linker_flag_idx)
-      if (${linker_flag_idx} GREATER -1)
-        message(WARNING "/Brepro not compatible with /INCREMENTAL linking - builds will be non-deterministic")
-      else()
+      has_msvc_incremental_no_flag("${CMAKE_EXE_LINKER_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${CMAKE_EXE_LINKER_FLAGS}" NO_INCR_EXE)
+      has_msvc_incremental_no_flag("${CMAKE_MODULE_LINKER_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${CMAKE_MODULE_LINKER_FLAGS}" NO_INCR_MODULE)
+      has_msvc_incremental_no_flag("${CMAKE_SHARED_LINKER_FLAGS_${uppercase_CMAKE_BUILD_TYPE}} ${CMAKE_SHARED_LINKER_FLAGS}" NO_INCR_SHARED)
+      if (NO_INCR_EXE AND NO_INCR_MODULE AND NO_INCR_SHARED)
         append("/Brepro" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+      else()
+        message(WARNING "/Brepro not compatible with /INCREMENTAL linking - builds will be non-deterministic")
       endif()
     endif()
   endif()

From da85307ba699ea2260252f523e089c85e863d671 Mon Sep 17 00:00:00 2001
From: Yuanfang Chen <yuanfang.chen@sony.com>
Date: Mon, 24 Jan 2022 13:42:47 -0800
Subject: [PATCH 457/946] [CMake] Pass CMAKE_C/CXX_COMPILER_LAUNCHER down to
 cross-compile and runtime build

Similar to D59032.

Reviewed By: dexonsmith

Differential Revision: https://reviews.llvm.org/D117746
---
 clang/runtime/CMakeLists.txt                      | 2 ++
 llvm/cmake/modules/CrossCompile.cmake             | 2 ++
 llvm/cmake/modules/LLVMExternalProjectUtils.cmake | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/clang/runtime/CMakeLists.txt b/clang/runtime/CMakeLists.txt
index 61b1c60bf590b..0388008792511 100644
--- a/clang/runtime/CMakeLists.txt
+++ b/clang/runtime/CMakeLists.txt
@@ -78,6 +78,8 @@ if(LLVM_BUILD_EXTERNAL_COMPILER_RT AND EXISTS ${COMPILER_RT_SRC_ROOT}/)
                -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang
                -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
                -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
+               -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
+               -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
                -DLLVM_CONFIG_PATH=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-config
                -DLLVM_LIT_ARGS=${LLVM_LIT_ARGS}
                -DCOMPILER_RT_OUTPUT_DIR=${LLVM_LIBRARY_OUTPUT_INTDIR}/clang/${CLANG_VERSION}
diff --git a/llvm/cmake/modules/CrossCompile.cmake b/llvm/cmake/modules/CrossCompile.cmake
index 2d637f035a7c0..2a39b6a40a285 100644
--- a/llvm/cmake/modules/CrossCompile.cmake
+++ b/llvm/cmake/modules/CrossCompile.cmake
@@ -70,6 +70,8 @@ function(llvm_create_cross_target project_name target_name toolchain buildtype)
   add_custom_command(OUTPUT ${${project_name}_${target_name}_BUILD}/CMakeCache.txt
     COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}"
         -DCMAKE_MAKE_PROGRAM="${CMAKE_MAKE_PROGRAM}"
+        -DCMAKE_C_COMPILER_LAUNCHER="${CMAKE_C_COMPILER_LAUNCHER}"
+        -DCMAKE_CXX_COMPILER_LAUNCHER="${CMAKE_CXX_COMPILER_LAUNCHER}"
         ${CROSS_TOOLCHAIN_FLAGS_${target_name}} ${CMAKE_CURRENT_SOURCE_DIR}
         ${CROSS_TOOLCHAIN_FLAGS_${project_name}_${target_name}}
         -DLLVM_TARGET_IS_CROSSCOMPILE_HOST=TRUE
diff --git a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
index 7c417b41cd344..f99a50df22801 100644
--- a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
+++ b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
@@ -314,6 +314,8 @@ function(llvm_ExternalProject_Add name source_dir)
                -DPACKAGE_VERSION=${PACKAGE_VERSION}
                -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
                -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM}
+               -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
+               -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
                -DCMAKE_EXPORT_COMPILE_COMMANDS=1
                ${cmake_args}
                ${PASSTHROUGH_VARIABLES}

From 4cfea311cb8e448e7f66feb25f7aef7f68fa59b8 Mon Sep 17 00:00:00 2001
From: Changpeng Fang <Changpeng.Fang@amd.com>
Date: Mon, 24 Jan 2022 14:33:12 -0800
Subject: [PATCH 458/946] [AMDGPU][NFC] Update to AMDGPUUsage for default Code
 Object Version

Summary:
  Update the documentation for default code object version (from v3 to v4).

Reviewers:
  kzhuravl

Differential Revision:
  https://reviews.llvm.org/D117845
---
 llvm/docs/AMDGPUUsage.rst | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 8875d178015bc..f592660f4965e 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1018,12 +1018,12 @@ The AMDGPU backend uses the following ELF header:
 
   * ``ELFABIVERSION_AMDGPU_HSA_V3`` is used to specify the version of AMD HSA
     runtime ABI for code object V3. Specify using the Clang option
-    ``-mcode-object-version=3``. This is the default code object
-    version if not specified.
+    ``-mcode-object-version=3``.
 
   * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA
     runtime ABI for code object V4. Specify using the Clang option
-    ``-mcode-object-version=4``.
+    ``-mcode-object-version=4``. This is the default code object
+    version if not specified.
 
   * ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
     runtime ABI.
@@ -2990,6 +2990,10 @@ non-AMD key names should be prefixed by "*vendor-name*.".
 Code Object V3 Metadata
 +++++++++++++++++++++++
 
+.. warning::
+  Code object V3 is not the default code object version emitted by this version
+  of LLVM.
+
 Code object V3 to V4 metadata is specified by the ``NT_AMDGPU_METADATA`` note
 record (see :ref:`amdgpu-note-records-v3-v4`).
 
@@ -3425,10 +3429,6 @@ same *vendor-name*.
 Code Object V4 Metadata
 +++++++++++++++++++++++
 
-.. warning::
-  Code object V4 is not the default code object version emitted by this version
-  of LLVM.
-
 Code object V4 metadata is the same as
 :ref:`amdgpu-amdhsa-code-object-metadata-v3` with the changes and additions
 defined in table :ref:`amdgpu-amdhsa-code-object-metadata-map-table-v3`.

From 11bb4a11116c4937b5f7e851189c593abf28e682 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Mon, 24 Jan 2022 22:43:16 +0000
Subject: [PATCH 459/946] [X86] combinePredicateReduction - split vXi16
 allof(cmpeq()) to vXi8 allof(cmpeq())

vXi16 patterns allof(cmp()) reduction patterns will have to be pack the comparison results to vXi8 to use PMOVMSKB.

If we're reducing cmpeq(), then we can compare the vXi8 halves directly - similar to what we already do for vXi64 -> vXi32 for cases without PCMPEQQ.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  15 ++-
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 121 +++++++++---------
 2 files changed, 68 insertions(+), 68 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a790777bbdc2d..4360bc68ffaee 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42215,18 +42215,21 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
       EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
       Movmsk = DAG.getBitcast(MovmskVT, Match);
     } else {
-      // For all_of(setcc(vec,0,eq)) - avoid vXi64 comparisons if we don't have
-      // PCMPEQQ (SSE41+), use PCMPEQD instead.
-      if (BinOp == ISD::AND && !Subtarget.hasSSE41() &&
+      // For all_of(setcc(vec,0,eq))
+      // - avoid vXi64 comparisons without PCMPEQQ (SSE41+), use PCMPEQD.
+      // - avoid vXi16 comparisons, use PMOVMSKB(PCMPEQB()).
+      if (BinOp == ISD::AND &&
           Match.getOpcode() == ISD::SETCC &&
           ISD::isBuildVectorAllZeros(Match.getOperand(1).getNode()) &&
           cast<CondCodeSDNode>(Match.getOperand(2))->get() ==
               ISD::CondCode::SETEQ) {
         SDValue Vec = Match.getOperand(0);
-        if (Vec.getValueType().getScalarType() == MVT::i64 &&
-            (2 * NumElts) <= MaxElts) {
+        EVT VecSVT = Vec.getValueType().getScalarType();
+        if ((VecSVT == MVT::i16 && !Subtarget.hasBWI()) ||
+            (VecSVT == MVT::i64 && !Subtarget.hasSSE41())) {
           NumElts *= 2;
-          EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
+          VecSVT = VecSVT.getHalfSizedIntegerVT(*DAG.getContext());
+          EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), VecSVT, NumElts);
           MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts);
           Match = DAG.getSetCC(
               DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)),
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 4f485cabdb1bc..fa48b72490ebf 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -938,23 +938,24 @@ define i1 @icmp0_v4i32_v4i1(<4 x i32>) {
 }
 
 define i1 @icmp0_v8i16_v8i1(<8 x i16>) {
-; SSE-LABEL: icmp0_v8i16_v8i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pxor %xmm1, %xmm1
-; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
-; SSE-NEXT:    packsswb %xmm1, %xmm1
-; SSE-NEXT:    pmovmskb %xmm1, %eax
-; SSE-NEXT:    cmpb $-1, %al
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp0_v8i16_v8i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pxor %xmm1, %xmm1
+; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT:    pmovmskb %xmm1, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp0_v8i16_v8i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: icmp0_v8i16_v8i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vpmovmskb %xmm0, %eax
-; AVX-NEXT:    cmpb $-1, %al
+; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
@@ -1184,26 +1185,28 @@ define i1 @icmp0_v8i32_v8i1(<8 x i32>) {
 }
 
 define i1 @icmp0_v16i16_v16i1(<16 x i16>) {
-; SSE-LABEL: icmp0_v16i16_v16i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pxor %xmm2, %xmm2
-; SSE-NEXT:    pcmpeqw %xmm2, %xmm1
-; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
-; SSE-NEXT:    packsswb %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpw $-1, %ax
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp0_v16i16_v16i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pxor %xmm2, %xmm2
+; SSE2-NEXT:    por %xmm1, %xmm0
+; SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp0_v16i16_v16i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: icmp0_v16i16_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -1477,47 +1480,41 @@ define i1 @icmp0_v16i32_v16i1(<16 x i32>) {
 }
 
 define i1 @icmp0_v32i16_v32i1(<32 x i16>) {
-; SSE-LABEL: icmp0_v32i16_v32i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pxor %xmm4, %xmm4
-; SSE-NEXT:    pcmpeqw %xmm4, %xmm1
-; SSE-NEXT:    pcmpeqw %xmm4, %xmm0
-; SSE-NEXT:    packsswb %xmm1, %xmm0
-; SSE-NEXT:    pcmpeqw %xmm4, %xmm3
-; SSE-NEXT:    pcmpeqw %xmm4, %xmm2
-; SSE-NEXT:    packsswb %xmm3, %xmm2
-; SSE-NEXT:    pand %xmm0, %xmm2
-; SSE-NEXT:    pmovmskb %xmm2, %eax
-; SSE-NEXT:    cmpw $-1, %ax
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp0_v32i16_v32i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pxor %xmm4, %xmm4
+; SSE2-NEXT:    por %xmm3, %xmm1
+; SSE2-NEXT:    por %xmm2, %xmm1
+; SSE2-NEXT:    por %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqb %xmm4, %xmm1
+; SSE2-NEXT:    pmovmskb %xmm1, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp0_v32i16_v32i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    por %xmm3, %xmm1
+; SSE41-NEXT:    por %xmm2, %xmm1
+; SSE41-NEXT:    por %xmm0, %xmm1
+; SSE41-NEXT:    ptest %xmm1, %xmm1
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: icmp0_v32i16_v32i1:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT:    vpxor %xmm3, %xmm3, %xmm3
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: icmp0_v32i16_v32i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpxor %xmm2, %xmm2, %xmm2
-; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm1, %ymm1
-; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq

From 22b0fe3fd9d404f0aa96b42fe0eeb899e90ad487 Mon Sep 17 00:00:00 2001
From: Nancy Wang <wangn@ca.ibm.com>
Date: Mon, 24 Jan 2022 17:44:17 -0500
Subject: [PATCH 460/946] [SystemZ][z/OS]: fix lit tmp_dir to use - instead of
 _

Latest upstream change in https://reviews.llvm.org/D117179 causes lit regressions on z/OS, when TMPDIR is exported and contains _, ld linker fails, it doesnt recognize _ specified in SYSLIN. this seems a limitation on z/OS. we need to fix lit.

Differential Revision: https://reviews.llvm.org/D118071
---
 llvm/utils/lit/lit/main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/lit/lit/main.py b/llvm/utils/lit/lit/main.py
index 99777eef8ec02..41f124a27ad7f 100755
--- a/llvm/utils/lit/lit/main.py
+++ b/llvm/utils/lit/lit/main.py
@@ -245,7 +245,8 @@ def execute_in_tmp_dir(run, lit_config):
     tmp_dir = None
     if 'LIT_PRESERVES_TMP' not in os.environ:
         import tempfile
-        tmp_dir = tempfile.mkdtemp(prefix='lit_tmp_')
+        # z/OS linker does not support '_' in paths, so use '-'.
+        tmp_dir = tempfile.mkdtemp(prefix='lit-tmp-')
         tmp_dir_envs = {k: tmp_dir for k in ['TMP', 'TMPDIR', 'TEMP', 'TEMPDIR']}
         os.environ.update(tmp_dir_envs)
         for cfg in {t.config for t in run.tests}:

From c1562683ee9a3be4246aa2546bf41e40b9cb123c Mon Sep 17 00:00:00 2001
From: Arjun P <arjunpitchanathan@gmail.com>
Date: Tue, 25 Jan 2022 03:47:02 +0530
Subject: [PATCH 461/946] [MLIR][Presburger] LinearTransform: rename
 multiplication functions to be more intuitive

---
 mlir/include/mlir/Analysis/Presburger/LinearTransform.h   | 5 +++--
 mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp        | 2 +-
 mlir/lib/Analysis/Presburger/LinearTransform.cpp          | 8 ++++----
 .../unittests/Analysis/Presburger/LinearTransformTest.cpp | 3 ++-
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/mlir/include/mlir/Analysis/Presburger/LinearTransform.h b/mlir/include/mlir/Analysis/Presburger/LinearTransform.h
index 73bea008836f8..54d12173826d0 100644
--- a/mlir/include/mlir/Analysis/Presburger/LinearTransform.h
+++ b/mlir/include/mlir/Analysis/Presburger/LinearTransform.h
@@ -39,11 +39,12 @@ class LinearTransform {
 
   // The given vector is interpreted as a row vector v. Post-multiply v with
   // this transform, say T, and return vT.
-  SmallVector<int64_t, 8> postMultiplyRow(ArrayRef<int64_t> rowVec) const;
+  SmallVector<int64_t, 8> preMultiplyWithRow(ArrayRef<int64_t> rowVec) const;
 
   // The given vector is interpreted as a column vector v. Pre-multiply v with
   // this transform, say T, and return Tv.
-  SmallVector<int64_t, 8> preMultiplyColumn(ArrayRef<int64_t> colVec) const;
+  SmallVector<int64_t, 8>
+  postMultiplyWithColumn(ArrayRef<int64_t> colVec) const;
 
 private:
   Matrix matrix;
diff --git a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp
index 0080756dfb760..96519738a1188 100644
--- a/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp
+++ b/mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp
@@ -770,7 +770,7 @@ Optional<SmallVector<int64_t, 8>> IntegerPolyhedron::findIntegerSample() const {
   // 6) Return transform * concat(boundedSample, coneSample).
   SmallVector<int64_t, 8> &sample = boundedSample.getValue();
   sample.append(coneSample.begin(), coneSample.end());
-  return transform.preMultiplyColumn(sample);
+  return transform.postMultiplyWithColumn(sample);
 }
 
 /// Helper to evaluate an affine expression at a point.
diff --git a/mlir/lib/Analysis/Presburger/LinearTransform.cpp b/mlir/lib/Analysis/Presburger/LinearTransform.cpp
index 09d7eb731576e..073b64cf8c337 100644
--- a/mlir/lib/Analysis/Presburger/LinearTransform.cpp
+++ b/mlir/lib/Analysis/Presburger/LinearTransform.cpp
@@ -112,7 +112,7 @@ LinearTransform::makeTransformToColumnEchelon(Matrix m) {
 }
 
 SmallVector<int64_t, 8>
-LinearTransform::postMultiplyRow(ArrayRef<int64_t> rowVec) const {
+LinearTransform::preMultiplyWithRow(ArrayRef<int64_t> rowVec) const {
   assert(rowVec.size() == matrix.getNumRows() &&
          "row vector dimension should match transform output dimension");
 
@@ -124,7 +124,7 @@ LinearTransform::postMultiplyRow(ArrayRef<int64_t> rowVec) const {
 }
 
 SmallVector<int64_t, 8>
-LinearTransform::preMultiplyColumn(ArrayRef<int64_t> colVec) const {
+LinearTransform::postMultiplyWithColumn(ArrayRef<int64_t> colVec) const {
   assert(matrix.getNumColumns() == colVec.size() &&
          "column vector dimension should match transform input dimension");
 
@@ -144,7 +144,7 @@ LinearTransform::applyTo(const IntegerPolyhedron &poly) const {
 
     int64_t c = eq.back();
 
-    SmallVector<int64_t, 8> newEq = postMultiplyRow(eq.drop_back());
+    SmallVector<int64_t, 8> newEq = preMultiplyWithRow(eq.drop_back());
     newEq.push_back(c);
     result.addEquality(newEq);
   }
@@ -154,7 +154,7 @@ LinearTransform::applyTo(const IntegerPolyhedron &poly) const {
 
     int64_t c = ineq.back();
 
-    SmallVector<int64_t, 8> newIneq = postMultiplyRow(ineq.drop_back());
+    SmallVector<int64_t, 8> newIneq = preMultiplyWithRow(ineq.drop_back());
     newIneq.push_back(c);
     result.addInequality(newIneq);
   }
diff --git a/mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp b/mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp
index 8a6650f609f79..01b6b7fc960ce 100644
--- a/mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp
+++ b/mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp
@@ -22,7 +22,8 @@ void testColumnEchelonForm(const Matrix &m, unsigned expectedRank) {
   // In column echelon form, each row's last non-zero value can be at most one
   // column to the right of the last non-zero column among the previous rows.
   for (unsigned row = 0, nRows = m.getNumRows(); row < nRows; ++row) {
-    SmallVector<int64_t, 8> rowVec = transform.postMultiplyRow(m.getRow(row));
+    SmallVector<int64_t, 8> rowVec =
+        transform.preMultiplyWithRow(m.getRow(row));
     for (unsigned col = lastAllowedNonZeroCol + 1, nCols = m.getNumColumns();
          col < nCols; ++col) {
       EXPECT_EQ(rowVec[col], 0);

From 0e98fadc79534dde843275fcb142cc6d3a0eee01 Mon Sep 17 00:00:00 2001
From: Arjun P <arjunpitchanathan@gmail.com>
Date: Tue, 25 Jan 2022 04:08:14 +0530
Subject: [PATCH 462/946] [MLIR][Presburger] use braces for single-line loop
 when inner if uses braces [NFC]

---
 mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp
index cf68c92650548..683c46ea5c0e8 100644
--- a/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp
+++ b/mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp
@@ -635,10 +635,11 @@ static void checkDivisionRepresentation(
   // Check that the `dividends` and `expectedDividends` match. If the
   // denominator for a division is zero, we ignore its dividend.
   EXPECT_TRUE(dividends.size() == expectedDividends.size());
-  for (unsigned i = 0, e = dividends.size(); i < e; ++i)
+  for (unsigned i = 0, e = dividends.size(); i < e; ++i) {
     if (denominators[i] != 0) {
       EXPECT_TRUE(expectedDividends[i] == dividends[i]);
     }
+  }
 }
 
 TEST(IntegerPolyhedronTest, computeLocalReprSimple) {

From fe0c5309c4a8bf022b2ec66a0d28513e7fb1441b Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@google.com>
Date: Mon, 24 Jan 2022 13:31:58 -0800
Subject: [PATCH 463/946] [Fuchsia] Remove i386 from iossim architectures

This is no longer supported in newer SDK versions.

Differential Revision: https://reviews.llvm.org/D118075
---
 clang/cmake/caches/Fuchsia-stage2.cmake | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index de8ac89fbfaa0..aed39d0ca9fff 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -71,9 +71,8 @@ if(APPLE)
   set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
   set(LIBCXX_ABI_VERSION 2 CACHE STRING "")
   set(DARWIN_ios_ARCHS armv7;armv7s;arm64 CACHE STRING "")
-  set(DARWIN_iossim_ARCHS i386;x86_64 CACHE STRING "")
+  set(DARWIN_iossim_ARCHS arm64;x86_64 CACHE STRING "")
   set(DARWIN_osx_ARCHS arm64;x86_64 CACHE STRING "")
-  set(SANITIZER_MIN_OSX_VERSION 10.7 CACHE STRING "")
 endif()
 
 if(WIN32)

From f1c9e7bdc921cec0cc3f61c19c4ac4a7f1bd8525 Mon Sep 17 00:00:00 2001
From: Chaoshuai Lu <lcs@fb.com>
Date: Mon, 24 Jan 2022 14:51:37 -0800
Subject: [PATCH 464/946] [ObjC Availability] Add missing const to getVersion
 function of ObjCAvailabilityCheckExpr class

Add missing const to `getVersion` function of `ObjCAvailabilityCheckExpr` class.

This feels like a bug on the original change D22171. We cannot really call this function from a const object pointer because the function is not marked as const.

This diff adds the missing const specifier to fix the issue.

Reviewed By: manmanren

Differential Revision: https://reviews.llvm.org/D112119
---
 clang/include/clang/AST/ExprObjC.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/AST/ExprObjC.h b/clang/include/clang/AST/ExprObjC.h
index b0f057dbaa02f..3b7ad8662ad95 100644
--- a/clang/include/clang/AST/ExprObjC.h
+++ b/clang/include/clang/AST/ExprObjC.h
@@ -1706,7 +1706,7 @@ class ObjCAvailabilityCheckExpr : public Expr {
 
   /// This may be '*', in which case this should fold to true.
   bool hasVersion() const { return !VersionToCheck.empty(); }
-  VersionTuple getVersion() { return VersionToCheck; }
+  VersionTuple getVersion() const { return VersionToCheck; }
 
   child_range children() {
     return child_range(child_iterator(), child_iterator());

From 572fa9642cb50f3c2d79e138e789c4b23f3ab8cf Mon Sep 17 00:00:00 2001
From: Mogball <jeffniu22@gmail.com>
Date: Mon, 24 Jan 2022 23:00:39 +0000
Subject: [PATCH 465/946] [mlir] Add a ControlFlowSink pass.

Control-Flow Sink moves operations whose only uses are in conditionally-executed regions into those regions so that paths in which their results are not needed do not perform unnecessary computation.

Depends on D115087

Reviewed By: jpienaar, rriddle, bondhugula

Differential Revision: https://reviews.llvm.org/D115088
---
 .../mlir/Interfaces/ControlFlowInterfaces.h   |  29 +++
 .../mlir/Interfaces/ControlFlowInterfaces.td  |  30 ++-
 mlir/include/mlir/Transforms/Passes.h         |   3 +
 mlir/include/mlir/Transforms/Passes.td        |  22 ++
 mlir/include/mlir/Transforms/Utils.h          |  48 ++++
 mlir/lib/Transforms/CMakeLists.txt            |   1 +
 mlir/lib/Transforms/ControlFlowSink.cpp       |  71 ++++++
 mlir/lib/Transforms/Utils/CMakeLists.txt      |   1 +
 .../Transforms/Utils/ControlFlowSinkUtils.cpp | 152 +++++++++++++
 mlir/test/Transforms/control-flow-sink.mlir   | 210 ++++++++++++++++++
 mlir/test/lib/Dialect/Test/TestDialect.cpp    |  34 ++-
 mlir/test/lib/Dialect/Test/TestOps.td         |  13 +-
 12 files changed, 604 insertions(+), 10 deletions(-)
 create mode 100644 mlir/lib/Transforms/ControlFlowSink.cpp
 create mode 100644 mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
 create mode 100644 mlir/test/Transforms/control-flow-sink.mlir

diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
index 8ff76edb3068c..806821a988bf3 100644
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
@@ -83,6 +83,35 @@ class RegionSuccessor {
   ValueRange inputs;
 };
 
+/// This class represents upper and lower bounds on the number of times a region
+/// of a `RegionBranchOpInterface` can be invoked. The lower bound is at least
+/// zero, but the upper bound may not be known.
+class InvocationBounds {
+public:
+  /// Create invocation bounds. The lower bound must be at least 0 and only the
+  /// upper bound can be unknown.
+  InvocationBounds(unsigned lb, Optional<unsigned> ub) : lower(lb), upper(ub) {
+    assert(!ub || ub >= lb && "upper bound cannot be less than lower bound");
+  }
+
+  /// Return the lower bound.
+  unsigned getLowerBound() const { return lower; }
+
+  /// Return the upper bound.
+  Optional<unsigned> getUpperBound() const { return upper; }
+
+  /// Returns the unknown invocation bounds, i.e., there is no information on
+  /// how many times a region may be invoked.
+  static InvocationBounds getUnknown() { return {0, llvm::None}; }
+
+private:
+  /// The minimum number of times the successor region will be invoked.
+  unsigned lower;
+  /// The maximum number of times the successor region will be invoked or `None`
+  /// if an upper bound is not known.
+  Optional<unsigned> upper;
+};
+
 /// Return `true` if `a` and `b` are in mutually exclusive regions as per
 /// RegionBranchOpInterface.
 bool insideMutuallyExclusiveRegions(Operation *a, Operation *b);
diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
index 0633426cf50af..429a5356428f7 100644
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
@@ -102,9 +102,10 @@ def RegionBranchOpInterface : OpInterface<"RegionBranchOpInterface"> {
   let methods = [
     InterfaceMethod<[{
         Returns the operands of this operation used as the entry arguments when
-        entering the region at `index`, which was specified as a successor of this
-        operation by `getSuccessorRegions`. These operands should correspond 1-1
-        with the successor inputs specified in `getSuccessorRegions`.
+        entering the region at `index`, which was specified as a successor of
+        this operation by `getSuccessorRegions`. These operands should
+        correspond 1-1 with the successor inputs specified in
+        `getSuccessorRegions`.
       }],
       "::mlir::OperandRange", "getSuccessorEntryOperands",
       (ins "unsigned":$index), [{}], /*defaultImplementation=*/[{
@@ -127,9 +128,28 @@ def RegionBranchOpInterface : OpInterface<"RegionBranchOpInterface"> {
         successor region must be non-empty.
       }],
       "void", "getSuccessorRegions",
-      (ins "::mlir::Optional<unsigned>":$index, "::mlir::ArrayRef<::mlir::Attribute>":$operands,
+      (ins "::mlir::Optional<unsigned>":$index,
+           "::mlir::ArrayRef<::mlir::Attribute>":$operands,
            "::mlir::SmallVectorImpl<::mlir::RegionSuccessor> &":$regions)
-    >
+    >,
+    InterfaceMethod<[{
+        Populates `invocationBounds` with the minimum and maximum number of
+        times this operation will invoke the attached regions (assuming the
+        regions yield normally, i.e. do not abort or invoke an infinite loop).
+        The minimum number of invocations is at least 0. If the maximum number
+        of invocations cannot be statically determined, then it will not have a
+        value (i.e., it is set to `llvm::None`).
+
+        `operands` is a set of optional attributes that either correspond to a
+        constant values for each operand of this operation, or null if that
+        operand is not a constant.
+      }],
+      "void", "getRegionInvocationBounds",
+      (ins "::mlir::ArrayRef<::mlir::Attribute>":$operands,
+           "::llvm::SmallVectorImpl<::mlir::InvocationBounds> &"
+             :$invocationBounds), [{}],
+       [{ invocationBounds.append($_op->getNumRegions(), {0, ::llvm::None}); }]
+    >,
   ];
 
   let verify = [{
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 6aab120fb469f..46bab0047c1b6 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -74,6 +74,9 @@ createCanonicalizerPass(const GreedyRewriteConfig &config,
                         ArrayRef<std::string> disabledPatterns = llvm::None,
                         ArrayRef<std::string> enabledPatterns = llvm::None);
 
+/// Creates a pass to perform control-flow sinking.
+std::unique_ptr<Pass> createControlFlowSinkPass();
+
 /// Creates a pass to perform common sub expression elimination.
 std::unique_ptr<Pass> createCSEPass();
 
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index ff15df372aa26..56e90644363e5 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -307,6 +307,28 @@ def Canonicalizer : Pass<"canonicalize"> {
   ] # RewritePassUtils.options;
 }
 
+def ControlFlowSink : Pass<"control-flow-sink"> {
+  let summary = "Sink operations into conditional blocks";
+  let description = [{
+    This pass implements a simple control-flow sink on operations that implement
+    `RegionBranchOpInterface` by moving dominating operations whose only uses
+    are in a single conditionally-executed region into that region so that
+    executions paths where their results are not needed do not perform
+    unnecessary computations.
+
+    This is similar (but opposite) to loop-invariant code motion, which hoists
+    operations out of regions executed more than once.
+
+    It is recommended to run canonicalization first to remove unreachable
+    blocks: ops in unreachable blocks may prevent other operations from being
+    sunk as they may contain uses of their results
+  }];
+  let constructor = "::mlir::createControlFlowSinkPass()";
+  let statistics = [
+    Statistic<"numSunk", "num-sunk", "Number of operations sunk">,
+  ];
+}
+
 def CSE : Pass<"cse"> {
   let summary = "Eliminate common sub-expressions";
   let description = [{
diff --git a/mlir/include/mlir/Transforms/Utils.h b/mlir/include/mlir/Transforms/Utils.h
index 5280c2648bfae..5efbb19b08d25 100644
--- a/mlir/include/mlir/Transforms/Utils.h
+++ b/mlir/include/mlir/Transforms/Utils.h
@@ -25,6 +25,7 @@ namespace mlir {
 
 class AffineApplyOp;
 class AffineForOp;
+class DominanceInfo;
 class Location;
 class OpBuilder;
 
@@ -147,6 +148,53 @@ Operation *createComposedAffineApplyOp(OpBuilder &builder, Location loc,
 void createAffineComputationSlice(Operation *opInst,
                                   SmallVectorImpl<AffineApplyOp> *sliceOps);
 
+/// Given a list of regions, perform control flow sinking on them. For each
+/// region, control-flow sinking moves operations that dominate the region but
+/// whose only users are in the region into the regions so that they aren't
+/// executed on paths where their results are not needed.
+///
+/// TODO: For the moment, this is a *simple* control-flow sink, i.e., no
+/// duplicating of ops. It should be made to accept a cost model to determine
+/// whether duplicating a particular op is profitable.
+///
+/// Example:
+///
+/// ```mlir
+/// %0 = arith.addi %arg0, %arg1
+/// scf.if %cond {
+///   scf.yield %0
+/// } else {
+///   scf.yield %arg2
+/// }
+/// ```
+///
+/// After control-flow sink:
+///
+/// ```mlir
+/// scf.if %cond {
+///   %0 = arith.addi %arg0, %arg1
+///   scf.yield %0
+/// } else {
+///   scf.yield %arg2
+/// }
+/// ```
+///
+/// Users must supply a callback `shouldMoveIntoRegion` that determines whether
+/// the given operation that only has users in the given operation should be
+/// moved into that region.
+///
+/// Returns the number of operations sunk.
+size_t
+controlFlowSink(ArrayRef<Region *> regions, DominanceInfo &domInfo,
+                function_ref<bool(Operation *, Region *)> shouldMoveIntoRegion);
+
+/// Populates `regions` with regions of the provided region branch op that are
+/// executed at most once at that are reachable given the current operands of
+/// the op. These regions can be passed to `controlFlowSink` to perform sinking
+/// on the regions of the operation.
+void getSinglyExecutedRegionsToSink(RegionBranchOpInterface branch,
+                                    SmallVectorImpl<Region *> &regions);
+
 } // namespace mlir
 
 #endif // MLIR_TRANSFORMS_UTILS_H
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index ae1468fa17ea3..3e10b4a321311 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -5,6 +5,7 @@ add_mlir_library(MLIRTransforms
   BufferResultsToOutParams.cpp
   BufferUtils.cpp
   Canonicalizer.cpp
+  ControlFlowSink.cpp
   CSE.cpp
   Inliner.cpp
   LocationSnapshot.cpp
diff --git a/mlir/lib/Transforms/ControlFlowSink.cpp b/mlir/lib/Transforms/ControlFlowSink.cpp
new file mode 100644
index 0000000000000..71afc5702edf0
--- /dev/null
+++ b/mlir/lib/Transforms/ControlFlowSink.cpp
@@ -0,0 +1,71 @@
+//===- ControlFlowSink.cpp - Code to perform control-flow sinking ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a basic control-flow sink pass. Control-flow sinking
+// moves operations whose only uses are in conditionally-executed blocks in to
+// those blocks so that they aren't executed on paths where their results are
+// not needed.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "mlir/IR/Dominance.h"
+#include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Transforms/Passes.h"
+#include "mlir/Transforms/Utils.h"
+
+using namespace mlir;
+
+namespace {
+/// A basic control-flow sink pass. This pass analyzes the regions of operations
+/// that implement `RegionBranchOpInterface` that are reachable and executed at
+/// most once and sinks candidate operations that are side-effect free.
+struct ControlFlowSink : public ControlFlowSinkBase<ControlFlowSink> {
+  void runOnOperation() override;
+};
+} // end anonymous namespace
+
+/// Returns true if the given operation is side-effect free as are all of its
+/// nested operations.
+static bool isSideEffectFree(Operation *op) {
+  if (auto memInterface = dyn_cast<MemoryEffectOpInterface>(op)) {
+    // If the op has side-effects, it cannot be moved.
+    if (!memInterface.hasNoEffect())
+      return false;
+    // If the op does not have recursive side effects, then it can be moved.
+    if (!op->hasTrait<OpTrait::HasRecursiveSideEffects>())
+      return true;
+  } else if (!op->hasTrait<OpTrait::HasRecursiveSideEffects>()) {
+    // Otherwise, if the op does not implement the memory effect interface and
+    // it does not have recursive side effects, then it cannot be known that the
+    // op is moveable.
+    return false;
+  }
+
+  // Recurse into the regions and ensure that all nested ops can also be moved.
+  for (Region &region : op->getRegions())
+    for (Operation &op : region.getOps())
+      if (!isSideEffectFree(&op))
+        return false;
+  return true;
+}
+
+void ControlFlowSink::runOnOperation() {
+  auto &domInfo = getAnalysis<DominanceInfo>();
+  getOperation()->walk([&](RegionBranchOpInterface branch) {
+    SmallVector<Region *> regionsToSink;
+    getSinglyExecutedRegionsToSink(branch, regionsToSink);
+    numSunk = mlir::controlFlowSink(
+        regionsToSink, domInfo,
+        [](Operation *op, Region *) { return isSideEffectFree(op); });
+  });
+}
+
+std::unique_ptr<Pass> mlir::createControlFlowSinkPass() {
+  return std::make_unique<ControlFlowSink>();
+}
diff --git a/mlir/lib/Transforms/Utils/CMakeLists.txt b/mlir/lib/Transforms/Utils/CMakeLists.txt
index 33deb17fe1378..c42d45325e1b2 100644
--- a/mlir/lib/Transforms/Utils/CMakeLists.txt
+++ b/mlir/lib/Transforms/Utils/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_mlir_library(MLIRTransformUtils
+  ControlFlowSinkUtils.cpp
   DialectConversion.cpp
   FoldUtils.cpp
   GreedyPatternRewriteDriver.cpp
diff --git a/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
new file mode 100644
index 0000000000000..cffbd922f88c8
--- /dev/null
+++ b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
@@ -0,0 +1,152 @@
+//===- ControlFlowSinkUtils.cpp - Code to perform control-flow sinking ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilityies for control-flow sinking. Control-flow
+// sinking moves operations whose only uses are in conditionally-executed blocks
+// into those blocks so that they aren't executed on paths where their results
+// are not needed.
+//
+// Control-flow sinking is not implemented on BranchOpInterface because
+// sinking ops into the successors of branch operations may move ops into loops.
+// It is idiomatic MLIR to perform optimizations at IR levels that readily
+// provide the necessary information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/IR/Dominance.h"
+#include "mlir/IR/Matchers.h"
+#include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Transforms/Utils.h"
+#include <vector>
+
+#define DEBUG_TYPE "cf-sink"
+
+using namespace mlir;
+
+namespace {
+/// A helper struct for control-flow sinking.
+class Sinker {
+public:
+  /// Create an operation sinker with given dominance info.
+  Sinker(function_ref<bool(Operation *, Region *)> shouldMoveIntoRegion,
+         DominanceInfo &domInfo)
+      : shouldMoveIntoRegion(shouldMoveIntoRegion), domInfo(domInfo),
+        numSunk(0) {}
+
+  /// Given a list of regions, find operations to sink and sink them. Return the
+  /// number of operations sunk.
+  size_t sinkRegions(ArrayRef<Region *> regions) &&;
+
+private:
+  /// Given a region and an op which dominates the region, returns true if all
+  /// users of the given op are dominated by the entry block of the region, and
+  /// thus the operation can be sunk into the region.
+  bool allUsersDominatedBy(Operation *op, Region *region);
+
+  /// Given a region and a top-level op (an op whose parent region is the given
+  /// region), determine whether the defining ops of the op's operands can be
+  /// sunk into the region.
+  ///
+  /// Add moved ops to the work queue.
+  void tryToSinkPredecessors(Operation *user, Region *region,
+                             std::vector<Operation *> &stack);
+
+  /// Iterate over all the ops in a region and try to sink their predecessors.
+  /// Recurse on subgraphs using a work queue.
+  void sinkRegion(Region *region);
+
+  /// The callback to determine whether an op should be moved in to a region.
+  function_ref<bool(Operation *, Region *)> shouldMoveIntoRegion;
+  /// Dominance info to determine op user dominance with respect to regions.
+  DominanceInfo &domInfo;
+  /// The number of operations sunk.
+  size_t numSunk;
+};
+} // end anonymous namespace
+
+bool Sinker::allUsersDominatedBy(Operation *op, Region *region) {
+  assert(region->findAncestorOpInRegion(*op) == nullptr &&
+         "expected op to be defined outside the region");
+  return llvm::all_of(op->getUsers(), [&](Operation *user) {
+    // The user is dominated by the region if its containing block is dominated
+    // by the region's entry block.
+    return domInfo.dominates(&region->front(), user->getBlock());
+  });
+}
+
+void Sinker::tryToSinkPredecessors(Operation *user, Region *region,
+                                   std::vector<Operation *> &stack) {
+  LLVM_DEBUG(user->print(llvm::dbgs() << "\nContained op:\n"));
+  for (Value value : user->getOperands()) {
+    Operation *op = value.getDefiningOp();
+    // Ignore block arguments and ops that are already inside the region.
+    if (!op || op->getParentRegion() == region)
+      continue;
+    LLVM_DEBUG(op->print(llvm::dbgs() << "\nTry to sink:\n"));
+
+    // If the op's users are all in the region and it can be moved, then do so.
+    if (allUsersDominatedBy(op, region) && shouldMoveIntoRegion(op, region)) {
+      // Move the op into the region's entry block. If the op is part of a
+      // subgraph, dependee ops would have been moved first, so inserting before
+      // the start of the block will ensure dominance is preserved. Ops can only
+      // be safely moved into the entry block as the region's other blocks may
+      // for a loop.
+      op->moveBefore(&region->front(), region->front().begin());
+      ++numSunk;
+      // Add the op to the work queue.
+      stack.push_back(op);
+    }
+  }
+}
+
+void Sinker::sinkRegion(Region *region) {
+  // Initialize the work queue with all the ops in the region.
+  std::vector<Operation *> stack;
+  for (Operation &op : region->getOps())
+    stack.push_back(&op);
+
+  // Process all the ops depth-first. This ensures that nodes of subgraphs are
+  // sunk in the correct order.
+  while (!stack.empty()) {
+    Operation *op = stack.back();
+    stack.pop_back();
+    tryToSinkPredecessors(op, region, stack);
+  }
+}
+
+size_t Sinker::sinkRegions(ArrayRef<Region *> regions) && {
+  for (Region *region : regions)
+    if (!region->empty())
+      sinkRegion(region);
+  return numSunk;
+}
+
+size_t mlir::controlFlowSink(
+    ArrayRef<Region *> regions, DominanceInfo &domInfo,
+    function_ref<bool(Operation *, Region *)> shouldMoveIntoRegion) {
+  return Sinker(shouldMoveIntoRegion, domInfo).sinkRegions(regions);
+}
+
+void mlir::getSinglyExecutedRegionsToSink(RegionBranchOpInterface branch,
+                                          SmallVectorImpl<Region *> &regions) {
+  // Collect constant operands.
+  SmallVector<Attribute> operands(branch->getNumOperands(), Attribute());
+  for (auto &it : llvm::enumerate(branch->getOperands()))
+    matchPattern(it.value(), m_Constant(&operands[it.index()]));
+  // Get the invocation bounds.
+  SmallVector<InvocationBounds> bounds;
+  branch.getRegionInvocationBounds(operands, bounds);
+
+  // For a simple control-flow sink, only consider regions that are executed at
+  // most once.
+  for (auto it : llvm::zip(branch->getRegions(), bounds)) {
+    const InvocationBounds &bound = std::get<1>(it);
+    if (bound.getUpperBound() && *bound.getUpperBound() <= 1)
+      regions.push_back(&std::get<0>(it));
+  }
+}
diff --git a/mlir/test/Transforms/control-flow-sink.mlir b/mlir/test/Transforms/control-flow-sink.mlir
new file mode 100644
index 0000000000000..58dffe19c0aae
--- /dev/null
+++ b/mlir/test/Transforms/control-flow-sink.mlir
@@ -0,0 +1,210 @@
+// RUN: mlir-opt -split-input-file -control-flow-sink %s | FileCheck %s
+
+// Test that operations can be sunk.
+
+// CHECK-LABEL: @test_simple_sink
+// CHECK-SAME:  (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
+// CHECK-NEXT: %[[V0:.*]] = arith.subi %[[ARG2]], %[[ARG1]]
+// CHECK-NEXT: %[[V1:.*]] = test.region_if %[[ARG0]]: i1 -> i32 then {
+// CHECK-NEXT:   %[[V2:.*]] = arith.subi %[[ARG1]], %[[ARG2]]
+// CHECK-NEXT:   test.region_if_yield %[[V2]]
+// CHECK-NEXT: } else {
+// CHECK-NEXT:   %[[V2:.*]] = arith.addi %[[ARG1]], %[[ARG1]]
+// CHECK-NEXT:   %[[V3:.*]] = arith.addi %[[V0]], %[[V2]]
+// CHECK-NEXT:   test.region_if_yield %[[V3]]
+// CHECK-NEXT: } join {
+// CHECK-NEXT:   %[[V2:.*]] = arith.addi %[[ARG2]], %[[ARG2]]
+// CHECK-NEXT:   %[[V3:.*]] = arith.addi %[[V2]], %[[V0]]
+// CHECK-NEXT:   test.region_if_yield %[[V3]]
+// CHECK-NEXT: }
+// CHECK-NEXT: return %[[V1]]
+func @test_simple_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
+  %0 = arith.subi %arg1, %arg2 : i32
+  %1 = arith.subi %arg2, %arg1 : i32
+  %2 = arith.addi %arg1, %arg1 : i32
+  %3 = arith.addi %arg2, %arg2 : i32
+  %4 = test.region_if %arg0: i1 -> i32 then {
+    test.region_if_yield %0 : i32
+  } else {
+    %5 = arith.addi %1, %2 : i32
+    test.region_if_yield %5 : i32
+  } join {
+    %5 = arith.addi %3, %1 : i32
+    test.region_if_yield %5 : i32
+  }
+  return %4 : i32
+}
+
+// -----
+
+// Test that a region op can be sunk.
+
+// CHECK-LABEL: @test_region_sink
+// CHECK-SAME:  (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
+// CHECK-NEXT: %[[V0:.*]] = test.region_if %[[ARG0]]: i1 -> i32 then {
+// CHECK-NEXT:   %[[V1:.*]] = test.region_if %[[ARG0]]: i1 -> i32 then {
+// CHECK-NEXT:     test.region_if_yield %[[ARG1]]
+// CHECK-NEXT:   } else {
+// CHECK-NEXT:     %[[V2:.*]] = arith.subi %[[ARG1]], %[[ARG2]]
+// CHECK-NEXT:     test.region_if_yield %[[V2]]
+// CHECK-NEXT:   } join {
+// CHECK-NEXT:     test.region_if_yield %[[ARG2]]
+// CHECK-NEXT:   }
+// CHECK-NEXT:   test.region_if_yield %[[V1]]
+// CHECK-NEXT: } else {
+// CHECK-NEXT:   test.region_if_yield %[[ARG1]]
+// CHECK-NEXT: } join {
+// CHECK-NEXT:   test.region_if_yield %[[ARG2]]
+// CHECK-NEXT: }
+// CHECK-NEXT: return %[[V0]]
+func @test_region_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
+  %0 = arith.subi %arg1, %arg2 : i32
+  %1 = test.region_if %arg0: i1 -> i32 then {
+    test.region_if_yield %arg1 : i32
+  } else {
+    test.region_if_yield %0 : i32
+  } join {
+    test.region_if_yield %arg2 : i32
+  }
+  %2 = test.region_if %arg0: i1 -> i32 then {
+    test.region_if_yield %1 : i32
+  } else {
+    test.region_if_yield %arg1 : i32
+  } join {
+    test.region_if_yield %arg2 : i32
+  }
+  return %2 : i32
+}
+
+// -----
+
+// Test that an entire subgraph can be sunk.
+
+// CHECK-LABEL: @test_subgraph_sink
+// CHECK-SAME:  (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
+// CHECK-NEXT: %[[V0:.*]] = test.region_if %[[ARG0]]: i1 -> i32 then {
+// CHECK-NEXT:   %[[V1:.*]] = arith.subi %[[ARG1]], %[[ARG2]]
+// CHECK-NEXT:   %[[V2:.*]] = arith.addi %[[ARG1]], %[[ARG2]]
+// CHECK-NEXT:   %[[V3:.*]] = arith.subi %[[ARG2]], %[[ARG1]]
+// CHECK-NEXT:   %[[V4:.*]] = arith.muli %[[V3]], %[[V3]]
+// CHECK-NEXT:   %[[V5:.*]] = arith.muli %[[V2]], %[[V1]]
+// CHECK-NEXT:   %[[V6:.*]] = arith.addi %[[V5]], %[[V4]]
+// CHECK-NEXT:   test.region_if_yield %[[V6]]
+// CHECK-NEXT: } else {
+// CHECK-NEXT:   test.region_if_yield %[[ARG1]]
+// CHECK-NEXT: } join {
+// CHECK-NEXT:   test.region_if_yield %[[ARG2]]
+// CHECK-NEXT: }
+// CHECK-NEXT: return %[[V0]]
+func @test_subgraph_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
+  %0 = arith.addi %arg1, %arg2 : i32
+  %1 = arith.subi %arg1, %arg2 : i32
+  %2 = arith.subi %arg2, %arg1 : i32
+  %3 = arith.muli %0, %1 : i32
+  %4 = arith.muli %2, %2 : i32
+  %5 = arith.addi %3, %4 : i32
+  %6 = test.region_if %arg0: i1 -> i32 then {
+    test.region_if_yield %5 : i32
+  } else {
+    test.region_if_yield %arg1 : i32
+  } join {
+    test.region_if_yield %arg2 : i32
+  }
+  return %6 : i32
+}
+
+// -----
+
+// Test that ops can be sunk into regions with multiple blocks.
+
+// CHECK-LABEL: @test_multiblock_region_sink
+// CHECK-SAME:  (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: i32, %[[ARG2:.*]]: i32)
+// CHECK-NEXT: %[[V0:.*]] = arith.addi %[[ARG1]], %[[ARG2]]
+// CHECK-NEXT: %[[V1:.*]] = "test.any_cond"() ({
+// CHECK-NEXT:   %[[V3:.*]] = arith.addi %[[V0]], %[[ARG2]]
+// CHECK-NEXT:   %[[V4:.*]] = arith.addi %[[V3]], %[[ARG1]]
+// CHECK-NEXT:   br ^bb1(%[[V4]] : i32)
+// CHECK-NEXT: ^bb1(%[[V5:.*]]: i32):
+// CHECK-NEXT:   %[[V6:.*]] = arith.addi %[[V5]], %[[V4]]
+// CHECK-NEXT:   "test.yield"(%[[V6]])
+// CHECK-NEXT: })
+// CHECK-NEXT: %[[V2:.*]] = arith.addi %[[V0]], %[[V1]]
+// CHECK-NEXT: return %[[V2]]
+func @test_multiblock_region_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
+  %0 = arith.addi %arg1, %arg2 : i32
+  %1 = arith.addi %0, %arg2 : i32
+  %2 = arith.addi %1, %arg1 : i32
+  %3 = "test.any_cond"() ({
+    br ^bb1(%2 : i32)
+  ^bb1(%5: i32):
+    %6 = arith.addi %5, %2 : i32
+    "test.yield"(%6) : (i32) -> ()
+  }) : () -> i32
+  %4 = arith.addi %0, %3 : i32
+  return %4 : i32
+}
+
+// -----
+
+// Test that ops can be sunk recursively into nested regions.
+
+// CHECK-LABEL: @test_nested_region_sink
+// CHECK-SAME:  (%[[ARG0:.*]]: i1, %[[ARG1:.*]]: i32) -> i32 {
+// CHECK-NEXT: %[[V0:.*]] = test.region_if %[[ARG0]]: i1 -> i32 then {
+// CHECK-NEXT:   %[[V1:.*]] = test.region_if %[[ARG0]]: i1 -> i32 then {
+// CHECK-NEXT:     %[[V2:.*]] = arith.addi %[[ARG1]], %[[ARG1]]
+// CHECK-NEXT:     test.region_if_yield %[[V2]]
+// CHECK-NEXT:   } else {
+// CHECK-NEXT:     test.region_if_yield %[[ARG1]]
+// CHECK-NEXT:   } join {
+// CHECK-NEXT:     test.region_if_yield %[[ARG1]]
+// CHECK-NEXT:   }
+// CHECK-NEXT:   test.region_if_yield %[[V1]]
+// CHECK-NEXT: } else {
+// CHECK-NEXT:   test.region_if_yield %[[ARG1]]
+// CHECK-NEXT: } join {
+// CHECK-NEXT:   test.region_if_yield %[[ARG1]]
+// CHECK-NEXT: }
+// CHECK-NEXT: return %[[V0]]
+func @test_nested_region_sink(%arg0: i1, %arg1: i32) -> i32 {
+  %0 = arith.addi %arg1, %arg1 : i32
+  %1 = test.region_if %arg0: i1 -> i32 then {
+    %2 = test.region_if %arg0: i1 -> i32 then {
+      test.region_if_yield %0 : i32
+    } else {
+      test.region_if_yield %arg1 : i32
+    } join {
+      test.region_if_yield %arg1 : i32
+    }
+    test.region_if_yield %2 : i32
+  } else {
+    test.region_if_yield %arg1 : i32
+  } join {
+    test.region_if_yield %arg1 : i32
+  }
+  return %1 : i32
+}
+
+// -----
+
+// Test that ops are only moved into the entry block, even when their only uses
+// are further along.
+
+// CHECK-LABEL: @test_not_sunk_deeply
+// CHECK-SAME:  (%[[ARG0:.*]]: i32) -> i32 {
+// CHECK-NEXT: %[[V0:.*]] = "test.any_cond"() ({
+// CHECK-NEXT:   %[[V1:.*]] = arith.addi %[[ARG0]], %[[ARG0]]
+// CHECK-NEXT:   br ^bb1
+// CHECK-NEXT: ^bb1:
+// CHECK-NEXT:   "test.yield"(%[[V1]]) : (i32) -> ()
+// CHECK-NEXT: })
+// CHECK-NEXT: return %[[V0]]
+func @test_not_sunk_deeply(%arg0: i32) -> i32 {
+  %0 = arith.addi %arg0, %arg0 : i32
+  %1 = "test.any_cond"() ({
+    br ^bb1
+  ^bb1:
+    "test.yield"(%0) : (i32) -> ()
+  }) : () -> i32
+  return %1 : i32
+}
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp
index 2b915a9fd6c2a..d82f7af73decb 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.cpp
+++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp
@@ -1127,15 +1127,15 @@ static void print(OpAsmPrinter &p, RegionIfOp op) {
   p.printOperands(op.getOperands());
   p << ": " << op.getOperandTypes();
   p.printArrowTypeList(op.getResultTypes());
-  p << " then";
+  p << " then ";
   p.printRegion(op.getThenRegion(),
                 /*printEntryBlockArgs=*/true,
                 /*printBlockTerminators=*/true);
-  p << " else";
+  p << " else ";
   p.printRegion(op.getElseRegion(),
                 /*printEntryBlockArgs=*/true,
                 /*printBlockTerminators=*/true);
-  p << " join";
+  p << " join ";
   p.printRegion(op.getJoinRegion(),
                 /*printEntryBlockArgs=*/true,
                 /*printBlockTerminators=*/true);
@@ -1189,6 +1189,34 @@ void RegionIfOp::getSuccessorRegions(
   regions.push_back(RegionSuccessor(&getElseRegion(), getElseArgs()));
 }
 
+void RegionIfOp::getRegionInvocationBounds(
+    ArrayRef<Attribute> operands,
+    SmallVectorImpl<InvocationBounds> &invocationBounds) {
+  // Each region is invoked at most once.
+  invocationBounds.assign(/*NumElts=*/3, /*Elt=*/{0, 1});
+}
+
+//===----------------------------------------------------------------------===//
+// AnyCondOp
+//===----------------------------------------------------------------------===//
+
+void AnyCondOp::getSuccessorRegions(Optional<unsigned> index,
+                                    ArrayRef<Attribute> operands,
+                                    SmallVectorImpl<RegionSuccessor> &regions) {
+  // The parent op branches into the only region, and the region branches back
+  // to the parent op.
+  if (index)
+    regions.emplace_back(&getRegion());
+  else
+    regions.emplace_back(getResults());
+}
+
+void AnyCondOp::getRegionInvocationBounds(
+    ArrayRef<Attribute> operands,
+    SmallVectorImpl<InvocationBounds> &invocationBounds) {
+  invocationBounds.emplace_back(1, 1);
+}
+
 //===----------------------------------------------------------------------===//
 // SingleNoTerminatorCustomAsmOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index cc9c950ea73fe..f171e19270069 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -2342,14 +2342,15 @@ def RegionIfYieldOp : TEST_Op<"region_if_yield",
 }
 
 def RegionIfOp : TEST_Op<"region_if",
-      [DeclareOpInterfaceMethods<RegionBranchOpInterface>,
+      [DeclareOpInterfaceMethods<RegionBranchOpInterface,
+                                 ["getRegionInvocationBounds"]>,
        SingleBlockImplicitTerminator<"RegionIfYieldOp">,
        RecursiveSideEffects]> {
   let description =[{
     Represents an abstract if-then-else-join pattern. In this context, the then
     and else regions jump to the join region, which finally returns to its
     parent op.
-    }];
+  }];
 
   let printer = [{ return ::print(p, *this); }];
   let parser = [{ return ::parseRegionIfOp(parser, result); }];
@@ -2372,6 +2373,14 @@ def RegionIfOp : TEST_Op<"region_if",
   }];
 }
 
+def AnyCondOp : TEST_Op<"any_cond",
+      [DeclareOpInterfaceMethods<RegionBranchOpInterface,
+                                 ["getRegionInvocationBounds"]>,
+       RecursiveSideEffects]> {
+  let results = (outs Variadic<AnyType>:$results);
+  let regions = (region AnyRegion:$region);
+}
+
 //===----------------------------------------------------------------------===//
 // Test TableGen generated build() methods
 //===----------------------------------------------------------------------===//

From 9407a701790fc8c55795f992ded9a40bd37ca7d1 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Mon, 24 Jan 2022 15:02:49 -0800
Subject: [PATCH 466/946] DWARFv5 default: Switch bolt tests to use DWARFv4
 since Bolt doesn't support v5 yet

Rough attempt to fix these, since I don't have bolt building locally.
Will see how the buildbots go with it...
---
 bolt/test/X86/asm-func-debug.test         | 2 +-
 bolt/test/X86/inline-debug-info.test      | 2 +-
 bolt/test/X86/inlined-function-mixed.test | 2 +-
 bolt/test/keep-aranges.test               | 2 +-
 bolt/test/non-empty-debug-line.test       | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/bolt/test/X86/asm-func-debug.test b/bolt/test/X86/asm-func-debug.test
index 9679d79821ef6..a022ff0822b31 100644
--- a/bolt/test/X86/asm-func-debug.test
+++ b/bolt/test/X86/asm-func-debug.test
@@ -3,7 +3,7 @@
 #
 # The input test case foo() contains nops that we remove.
 
-RUN: %clang -g %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe
+RUN: %clang -gdwarf-4 %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe
 RUN: llvm-bolt %t.exe -o %t -update-debug-sections
 RUN: llvm-dwarfdump -all %t | FileCheck %s
 
diff --git a/bolt/test/X86/inline-debug-info.test b/bolt/test/X86/inline-debug-info.test
index 300fa0be78914..cab3fb9baa22e 100644
--- a/bolt/test/X86/inline-debug-info.test
+++ b/bolt/test/X86/inline-debug-info.test
@@ -3,7 +3,7 @@
 
 # REQUIRES: system-linux
 
-# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
+# RUN: %clang %cflags -O1 -gdwarf-4 %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
 # RUN:   -o %t.exe -Wl,-q
 # RUN: llvm-bolt %t.exe -update-debug-sections -print-debug-info \
 # RUN:   -print-only=main -print-after-lowering -force-inline=foo -o %t.bolt \
diff --git a/bolt/test/X86/inlined-function-mixed.test b/bolt/test/X86/inlined-function-mixed.test
index 1b17c1932c1dd..d2ed274ae6cc5 100644
--- a/bolt/test/X86/inlined-function-mixed.test
+++ b/bolt/test/X86/inlined-function-mixed.test
@@ -2,7 +2,7 @@
 # debug info does not cause a crash.
 
 RUN: %clangxx %S/Inputs/inlined.cpp -c -o %T/inlined.o
-RUN: %clangxx %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g
+RUN: %clangxx %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -gdwarf-4
 RUN: %clangxx %T/inlined.o %T/inlinee.o -o %t
 
 RUN: llvm-bolt %t -o %t.bolt -update-debug-sections -reorder-blocks=reverse \
diff --git a/bolt/test/keep-aranges.test b/bolt/test/keep-aranges.test
index 8b93f19ea31af..bd33686005048 100644
--- a/bolt/test/keep-aranges.test
+++ b/bolt/test/keep-aranges.test
@@ -4,7 +4,7 @@
 REQUIRES: system-linux
 
 RUN: %clang %S/Inputs/icf_baz.c %S/Inputs/icf_main.c -Wl,--icf=all,--gdb-index \
-RUN:   -g -o %t.exe -fuse-ld=lld
+RUN:   -gdwarf-4 -o %t.exe -fuse-ld=lld
 RUN: llvm-bolt %t.exe -o %t -update-debug-sections -keep-aranges
 RUN: llvm-dwarfdump -debug-aranges %t | FileCheck %s
 
diff --git a/bolt/test/non-empty-debug-line.test b/bolt/test/non-empty-debug-line.test
index 28c86be320e8e..677efe319bba8 100644
--- a/bolt/test/non-empty-debug-line.test
+++ b/bolt/test/non-empty-debug-line.test
@@ -3,7 +3,7 @@
 
 REQUIRES: system-linux
 
-RUN: %clang %S/Inputs/hello.c -g -o %t
+RUN: %clang %S/Inputs/hello.c -gdwarf-4 -o %t
 RUN: llvm-bolt %t -o %t1 -update-debug-sections -funcs=_start
 RUN: llvm-readobj -S %t > %t2
 RUN: llvm-readobj -S %t1 >> %t2

From cd8122b27f8fb9cbf222ef946bff3b698625e2f4 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Mon, 24 Jan 2022 15:12:18 -0800
Subject: [PATCH 467/946] [lldb] Add ConstString memory usage statistics

Add statistics about the memory usage of the string pool. I'm
particularly interested in the memory used by the allocator, i.e. the
number of bytes actually used by the allocator it self as well as the
number of bytes allocated through the allocator.

Differential revision: https://reviews.llvm.org/D117914
---
 lldb/include/lldb/Target/Statistics.h         |  6 +++
 lldb/include/lldb/Utility/ConstString.h       | 10 +++++
 lldb/source/Target/Statistics.cpp             | 14 ++++++
 lldb/source/Utility/ConstString.cpp           | 15 +++++++
 .../commands/statistics/basic/TestStats.py    | 45 +++++++++++++++++++
 5 files changed, 90 insertions(+)

diff --git a/lldb/include/lldb/Target/Statistics.h b/lldb/include/lldb/Target/Statistics.h
index 185389b2eeafe..d2b8f746a38c8 100644
--- a/lldb/include/lldb/Target/Statistics.h
+++ b/lldb/include/lldb/Target/Statistics.h
@@ -9,6 +9,7 @@
 #ifndef LLDB_TARGET_STATISTICS_H
 #define LLDB_TARGET_STATISTICS_H
 
+#include "lldb/Utility/ConstString.h"
 #include "lldb/Utility/Stream.h"
 #include "lldb/lldb-forward.h"
 #include "llvm/Support/JSON.h"
@@ -110,6 +111,11 @@ struct ModuleStats {
   bool debug_info_index_saved_to_cache = false;
 };
 
+struct ConstStringStats {
+  llvm::json::Value ToJSON() const;
+  ConstString::MemoryStats stats = ConstString::GetMemoryStats();
+};
+
 /// A class that represents statistics for a since lldb_private::Target.
 class TargetStats {
 public:
diff --git a/lldb/include/lldb/Utility/ConstString.h b/lldb/include/lldb/Utility/ConstString.h
index 2756f1fd72038..937f8271a9a8e 100644
--- a/lldb/include/lldb/Utility/ConstString.h
+++ b/lldb/include/lldb/Utility/ConstString.h
@@ -408,6 +408,16 @@ class ConstString {
   ///     in memory.
   static size_t StaticMemorySize();
 
+  struct MemoryStats {
+    size_t GetBytesTotal() const { return bytes_total; }
+    size_t GetBytesUsed() const { return bytes_used; }
+    size_t GetBytesUnused() const { return bytes_total - bytes_used; }
+    size_t bytes_total = 0;
+    size_t bytes_used = 0;
+  };
+
+  static MemoryStats GetMemoryStats();
+
 protected:
   template <typename T, typename Enable> friend struct ::llvm::DenseMapInfo;
   /// Only used by DenseMapInfo.
diff --git a/lldb/source/Target/Statistics.cpp b/lldb/source/Target/Statistics.cpp
index 8d1e982c3b988..ebddad837d14b 100644
--- a/lldb/source/Target/Statistics.cpp
+++ b/lldb/source/Target/Statistics.cpp
@@ -65,6 +65,14 @@ json::Value ModuleStats::ToJSON() const {
   return module;
 }
 
+llvm::json::Value ConstStringStats::ToJSON() const {
+  json::Object obj;
+  obj.try_emplace<int64_t>("bytesTotal", stats.GetBytesTotal());
+  obj.try_emplace<int64_t>("bytesUsed", stats.GetBytesUsed());
+  obj.try_emplace<int64_t>("bytesUnused", stats.GetBytesUnused());
+  return obj;
+}
+
 json::Value TargetStats::ToJSON(Target &target) {
   CollectStats(target);
 
@@ -212,9 +220,15 @@ llvm::json::Value DebuggerStats::ReportStatistics(Debugger &debugger,
     json_modules.emplace_back(module_stat.ToJSON());
   }
 
+  ConstStringStats const_string_stats;
+  json::Object json_memory{
+      {"strings", const_string_stats.ToJSON()},
+  };
+
   json::Object global_stats{
       {"targets", std::move(json_targets)},
       {"modules", std::move(json_modules)},
+      {"memory", std::move(json_memory)},
       {"totalSymbolTableParseTime", symtab_parse_time},
       {"totalSymbolTableIndexTime", symtab_index_time},
       {"totalSymbolTablesLoadedFromCache", symtabs_loaded},
diff --git a/lldb/source/Utility/ConstString.cpp b/lldb/source/Utility/ConstString.cpp
index e5e1b2387e64d..76270e3f53b96 100644
--- a/lldb/source/Utility/ConstString.cpp
+++ b/lldb/source/Utility/ConstString.cpp
@@ -171,6 +171,17 @@ class Pool {
     return mem_size;
   }
 
+  ConstString::MemoryStats GetMemoryStats() const {
+    ConstString::MemoryStats stats;
+    for (const auto &pool : m_string_pools) {
+      llvm::sys::SmartScopedReader<false> rlock(pool.m_mutex);
+      const Allocator &alloc = pool.m_string_map.getAllocator();
+      stats.bytes_total += alloc.getTotalMemory();
+      stats.bytes_used += alloc.getBytesAllocated();
+    }
+    return stats;
+  }
+
 protected:
   uint8_t hash(const llvm::StringRef &s) const {
     uint32_t h = llvm::djbHash(s);
@@ -332,6 +343,10 @@ size_t ConstString::StaticMemorySize() {
   return StringPool().MemorySize();
 }
 
+ConstString::MemoryStats ConstString::GetMemoryStats() {
+  return StringPool().GetMemoryStats();
+}
+
 void llvm::format_provider<ConstString>::format(const ConstString &CS,
                                                 llvm::raw_ostream &OS,
                                                 llvm::StringRef Options) {
diff --git a/lldb/test/API/commands/statistics/basic/TestStats.py b/lldb/test/API/commands/statistics/basic/TestStats.py
index f69fddc27fbaa..99940ed5061f9 100644
--- a/lldb/test/API/commands/statistics/basic/TestStats.py
+++ b/lldb/test/API/commands/statistics/basic/TestStats.py
@@ -135,6 +135,7 @@ def test_default_no_run(self):
 
         (lldb) statistics dump
         {
+          "memory" : {...},
           "modules" : [...],
           "targets" : [
             {
@@ -160,6 +161,7 @@ def test_default_no_run(self):
         target = self.createTestTarget()
         debug_stats = self.get_stats()
         debug_stat_keys = [
+            'memory',
             'modules',
             'targets',
             'totalSymbolTableParseTime',
@@ -197,6 +199,7 @@ def test_default_with_run(self):
 
         (lldb) statistics dump
         {
+          "memory" : {...},
           "modules" : [...],
           "targets" : [
                 {
@@ -227,6 +230,7 @@ def test_default_with_run(self):
                                           lldb.SBFileSpec("main.c"))
         debug_stats = self.get_stats()
         debug_stat_keys = [
+            'memory',
             'modules',
             'targets',
             'totalSymbolTableParseTime',
@@ -254,6 +258,44 @@ def test_default_with_run(self):
         self.assertGreater(stats['launchOrAttachTime'], 0.0)
         self.assertGreater(stats['targetCreateTime'], 0.0)
 
+    def test_memory(self):
+        """
+            Test "statistics dump" and the memory information.
+        """
+        exe = self.getBuildArtifact("a.out")
+        target = self.createTestTarget(file_path=exe)
+        debug_stats = self.get_stats()
+        debug_stat_keys = [
+            'memory',
+            'modules',
+            'targets',
+            'totalSymbolTableParseTime',
+            'totalSymbolTableIndexTime',
+            'totalSymbolTablesLoadedFromCache',
+            'totalSymbolTablesSavedToCache',
+            'totalDebugInfoParseTime',
+            'totalDebugInfoIndexTime',
+            'totalDebugInfoIndexLoadedFromCache',
+            'totalDebugInfoIndexSavedToCache',
+            'totalDebugInfoByteSize'
+        ]
+        self.verify_keys(debug_stats, '"debug_stats"', debug_stat_keys, None)
+
+        memory = debug_stats['memory']
+        memory_keys= [
+            'strings',
+        ]
+        self.verify_keys(memory, '"memory"', memory_keys, None)
+
+        strings = memory['strings']
+        strings_keys= [
+            'bytesTotal',
+            'bytesUsed',
+            'bytesUnused',
+        ]
+        self.verify_keys(strings, '"strings"', strings_keys, None)
+
+
     def find_module_in_metrics(self, path, stats):
         modules = stats['modules']
         for module in modules:
@@ -269,6 +311,7 @@ def test_modules(self):
         target = self.createTestTarget(file_path=exe)
         debug_stats = self.get_stats()
         debug_stat_keys = [
+            'memory',
             'modules',
             'targets',
             'totalSymbolTableParseTime',
@@ -312,6 +355,7 @@ def test_breakpoints(self):
         Output expected to be something like:
 
         {
+          "memory" : {...},
           "modules" : [...],
           "targets" : [
                 {
@@ -355,6 +399,7 @@ def test_breakpoints(self):
         self.runCmd("b a_function")
         debug_stats = self.get_stats()
         debug_stat_keys = [
+            'memory',
             'modules',
             'targets',
             'totalSymbolTableParseTime',

From 52f37c24c3f891350394c30096be5e93b063f61e Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser@berlin.de>
Date: Tue, 28 Dec 2021 13:09:40 +0100
Subject: [PATCH 468/946] [libc++][NFC] remove this-> when calling member
 functions in <string>

remove `this->` when calling member functions

Reviewed By: Quuxplusone, Mordante, ldionne, #libc

Spies: libcxx-commits

Differential Revision: https://reviews.llvm.org/D116324
---
 libcxx/include/string | 71 ++++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 35 deletions(-)

diff --git a/libcxx/include/string b/libcxx/include/string
index 6f22f02afa0ba..f53e1bfef75e8 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -1866,7 +1866,7 @@ void basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s,
                                                        size_type __reserve)
 {
     if (__reserve > max_size())
-        this->__throw_length_error();
+        __throw_length_error();
     pointer __p;
     if (__fits_in_sso(__reserve))
     {
@@ -1890,7 +1890,7 @@ void
 basic_string<_CharT, _Traits, _Allocator>::__init(const value_type* __s, size_type __sz)
 {
     if (__sz > max_size())
-        this->__throw_length_error();
+        __throw_length_error();
     pointer __p;
     if (__fits_in_sso(__sz))
     {
@@ -1973,7 +1973,7 @@ void basic_string<_CharT, _Traits, _Allocator>::__init_copy_ctor_external(
     __set_short_size(__sz);
   } else {
     if (__sz > max_size())
-      this->__throw_length_error();
+      __throw_length_error();
     size_t __cap = __recommend(__sz);
     __p = __alloc_traits::allocate(__alloc(), __cap + 1);
     __set_long_pointer(__p);
@@ -2029,7 +2029,7 @@ void
 basic_string<_CharT, _Traits, _Allocator>::__init(size_type __n, value_type __c)
 {
     if (__n > max_size())
-        this->__throw_length_error();
+        __throw_length_error();
     pointer __p;
     if (__fits_in_sso(__n))
     {
@@ -2074,7 +2074,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __st
 {
     size_type __str_sz = __str.size();
     if (__pos > __str_sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     __init(__str.data() + __pos, _VSTD::min(__n, __str_sz - __pos));
     _VSTD::__debug_db_insert_c(this);
 }
@@ -2087,7 +2087,7 @@ basic_string<_CharT, _Traits, _Allocator>::basic_string(const basic_string& __st
 {
     size_type __str_sz = __str.size();
     if (__pos > __str_sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     __init(__str.data() + __pos, __str_sz - __pos);
     _VSTD::__debug_db_insert_c(this);
 }
@@ -2160,7 +2160,7 @@ basic_string<_CharT, _Traits, _Allocator>::__init(_ForwardIterator __first, _For
 {
     size_type __sz = static_cast<size_type>(_VSTD::distance(__first, __last));
     if (__sz > max_size())
-        this->__throw_length_error();
+        __throw_length_error();
     pointer __p;
     if (__fits_in_sso(__sz))
     {
@@ -2259,7 +2259,7 @@ basic_string<_CharT, _Traits, _Allocator>::__grow_by_and_replace
 {
     size_type __ms = max_size();
     if (__delta_cap > __ms - __old_cap - 1)
-        this->__throw_length_error();
+        __throw_length_error();
     pointer __old_p = __get_pointer();
     size_type __cap = __old_cap < __ms / 2 - __alignment ?
                           __recommend(_VSTD::max(__old_cap + __delta_cap, 2 * __old_cap)) :
@@ -2291,7 +2291,7 @@ basic_string<_CharT, _Traits, _Allocator>::__grow_by(size_type __old_cap, size_t
 {
     size_type __ms = max_size();
     if (__delta_cap > __ms - __old_cap)
-        this->__throw_length_error();
+        __throw_length_error();
     pointer __old_p = __get_pointer();
     size_type __cap = __old_cap < __ms / 2 - __alignment ?
                           __recommend(_VSTD::max(__old_cap + __delta_cap, 2 * __old_cap)) :
@@ -2523,7 +2523,7 @@ basic_string<_CharT, _Traits, _Allocator>::assign(const basic_string& __str, siz
 {
     size_type __sz = __str.size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return assign(__str.data() + __pos, _VSTD::min(__n, __sz - __pos));
 }
 
@@ -2540,7 +2540,7 @@ basic_string<_CharT, _Traits, _Allocator>::assign(const _Tp & __t, size_type __p
     __self_view __sv = __t;
     size_type __sz = __sv.size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return assign(__sv.data() + __pos, _VSTD::min(__n, __sz - __pos));
 }
 
@@ -2709,7 +2709,7 @@ basic_string<_CharT, _Traits, _Allocator>::append(const basic_string& __str, siz
 {
     size_type __sz = __str.size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return append(__str.data() + __pos, _VSTD::min(__n, __sz - __pos));
 }
 
@@ -2725,7 +2725,7 @@ basic_string<_CharT, _Traits, _Allocator>::append(const _Tp & __t, size_type __p
     __self_view __sv = __t;
     size_type __sz = __sv.size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return append(__sv.data() + __pos, _VSTD::min(__n, __sz - __pos));
 }
 
@@ -2746,7 +2746,7 @@ basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos, const value_t
     _LIBCPP_ASSERT(__n == 0 || __s != nullptr, "string::insert received nullptr");
     size_type __sz = size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     size_type __cap = capacity();
     if (__cap - __sz >= __n)
     {
@@ -2777,7 +2777,7 @@ basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos, size_type __n
 {
     size_type __sz = size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     if (__n)
     {
         size_type __cap = capacity();
@@ -2883,7 +2883,7 @@ basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos1, const basic_
 {
     size_type __str_sz = __str.size();
     if (__pos2 > __str_sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return insert(__pos1, __str.data() + __pos2, _VSTD::min(__n, __str_sz - __pos2));
 }
 
@@ -2900,7 +2900,7 @@ basic_string<_CharT, _Traits, _Allocator>::insert(size_type __pos1, const _Tp& _
     __self_view __sv = __t;
     size_type __str_sz = __sv.size();
     if (__pos2 > __str_sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return insert(__pos1, __sv.data() + __pos2, _VSTD::min(__n, __str_sz - __pos2));
 }
 
@@ -2961,7 +2961,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __
     _LIBCPP_ASSERT(__n2 == 0 || __s != nullptr, "string::replace received nullptr");
     size_type __sz = size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     __n1 = _VSTD::min(__n1, __sz - __pos);
     size_type __cap = capacity();
     if (__cap - __sz + __n1 >= __n2)
@@ -3008,7 +3008,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos, size_type __
 {
     size_type __sz = size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     __n1 = _VSTD::min(__n1, __sz - __pos);
     size_type __cap = capacity();
     value_type* __p;
@@ -3042,7 +3042,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(const_iterator __i1, const_it
                                                    _InputIterator __j1, _InputIterator __j2)
 {
     const basic_string __temp(__j1, __j2, __alloc());
-    return this->replace(__i1, __i2, __temp);
+    return replace(__i1, __i2, __temp);
 }
 
 template <class _CharT, class _Traits, class _Allocator>
@@ -3060,7 +3060,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos1, size_type _
 {
     size_type __str_sz = __str.size();
     if (__pos2 > __str_sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return replace(__pos1, __n1, __str.data() + __pos2, _VSTD::min(__n2, __str_sz - __pos2));
 }
 
@@ -3077,7 +3077,7 @@ basic_string<_CharT, _Traits, _Allocator>::replace(size_type __pos1, size_type _
     __self_view __sv = __t;
     size_type __str_sz = __sv.size();
     if (__pos2 > __str_sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return replace(__pos1, __n1, __sv.data() + __pos2, _VSTD::min(__n2, __str_sz - __pos2));
 }
 
@@ -3147,7 +3147,8 @@ template <class _CharT, class _Traits, class _Allocator>
 basic_string<_CharT, _Traits, _Allocator>&
 basic_string<_CharT, _Traits, _Allocator>::erase(size_type __pos,
                                                  size_type __n) {
-  if (__pos > size()) this->__throw_out_of_range();
+  if (__pos > size())
+    __throw_out_of_range();
   if (__n == npos) {
     __erase_to_end(__pos);
   } else {
@@ -3263,7 +3264,7 @@ void
 basic_string<_CharT, _Traits, _Allocator>::reserve(size_type __requested_capacity)
 {
     if (__requested_capacity > max_size())
-        this->__throw_length_error();
+        __throw_length_error();
 
     // Make sure reserve(n) never shrinks. This is technically only required in C++20
     // and later (since P0966R1), however we provide consistent behavior in all Standard
@@ -3370,7 +3371,7 @@ typename basic_string<_CharT, _Traits, _Allocator>::const_reference
 basic_string<_CharT, _Traits, _Allocator>::at(size_type __n) const
 {
     if (__n >= size())
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return (*this)[__n];
 }
 
@@ -3379,7 +3380,7 @@ typename basic_string<_CharT, _Traits, _Allocator>::reference
 basic_string<_CharT, _Traits, _Allocator>::at(size_type __n)
 {
     if (__n >= size())
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     return (*this)[__n];
 }
 
@@ -3425,7 +3426,7 @@ basic_string<_CharT, _Traits, _Allocator>::copy(value_type* __s, size_type __n,
 {
     size_type __sz = size();
     if (__pos > __sz)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     size_type __rlen = _VSTD::min(__n, __sz - __pos);
     traits_type::copy(__s, data() + __pos, __rlen);
     return __rlen;
@@ -3869,7 +3870,7 @@ basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
     _LIBCPP_ASSERT(__n2 == 0 || __s != nullptr, "string::compare(): received nullptr");
     size_type __sz = size();
     if (__pos1 > __sz || __n2 == npos)
-        this->__throw_out_of_range();
+        __throw_out_of_range();
     size_type __rlen = _VSTD::min(__n1, __sz - __pos1);
     int __r = traits_type::compare(data() + __pos1, __s, _VSTD::min(__rlen, __n2));
     if (__r == 0)
@@ -3933,7 +3934,7 @@ basic_string<_CharT, _Traits, _Allocator>::compare(size_type __pos1,
                                                    size_type __pos2,
                                                    size_type __n2) const
 {
-        return compare(__pos1, __n1, __self_view(__str), __pos2, __n2);
+    return compare(__pos1, __n1, __self_view(__str), __pos2, __n2);
 }
 
 template <class _CharT, class _Traits, class _Allocator>
@@ -4447,16 +4448,16 @@ template<class _CharT, class _Traits, class _Allocator>
 bool
 basic_string<_CharT, _Traits, _Allocator>::__dereferenceable(const const_iterator* __i) const
 {
-    return this->data() <= _VSTD::__to_address(__i->base()) &&
-           _VSTD::__to_address(__i->base()) < this->data() + this->size();
+    return data() <= _VSTD::__to_address(__i->base()) &&
+           _VSTD::__to_address(__i->base()) < data() + size();
 }
 
 template<class _CharT, class _Traits, class _Allocator>
 bool
 basic_string<_CharT, _Traits, _Allocator>::__decrementable(const const_iterator* __i) const
 {
-    return this->data() < _VSTD::__to_address(__i->base()) &&
-           _VSTD::__to_address(__i->base()) <= this->data() + this->size();
+    return data() < _VSTD::__to_address(__i->base()) &&
+           _VSTD::__to_address(__i->base()) <= data() + size();
 }
 
 template<class _CharT, class _Traits, class _Allocator>
@@ -4464,7 +4465,7 @@ bool
 basic_string<_CharT, _Traits, _Allocator>::__addable(const const_iterator* __i, ptrdiff_t __n) const
 {
     const value_type* __p = _VSTD::__to_address(__i->base()) + __n;
-    return this->data() <= __p && __p <= this->data() + this->size();
+    return data() <= __p && __p <= data() + size();
 }
 
 template<class _CharT, class _Traits, class _Allocator>
@@ -4472,7 +4473,7 @@ bool
 basic_string<_CharT, _Traits, _Allocator>::__subscriptable(const const_iterator* __i, ptrdiff_t __n) const
 {
     const value_type* __p = _VSTD::__to_address(__i->base()) + __n;
-    return this->data() <= __p && __p < this->data() + this->size();
+    return data() <= __p && __p < data() + size();
 }
 
 #endif // _LIBCPP_DEBUG_LEVEL == 2

From 014a673441c6050683e059a547ffcbb03004730d Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser@berlin.de>
Date: Mon, 24 Jan 2022 19:44:34 +0100
Subject: [PATCH 469/946] [libc++] Remove std::basic_string's base class in
 ABIv2

Remove `std::basic_string`'s base class in ABI version 2

Reviewed By: Quuxplusone, ldionne, #libc

Spies: libcxx-commits

Differential Revision: https://reviews.llvm.org/D116334
---
 libcxx/include/__config |  2 ++
 libcxx/include/string   | 16 ++++++----------
 libcxx/src/string.cpp   |  2 ++
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index 4c6edd45ff0a5..b99cdc38dc9f8 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -114,6 +114,8 @@
 // compatible. This switch removes these workarounds for platforms that don't care
 // about ABI compatibility.
 #  define _LIBCPP_ABI_NO_RANDOM_DEVICE_COMPATIBILITY_LAYOUT
+// Remove basic_string common base
+#  define _LIBCPP_ABI_NO_BASIC_STRING_BASE_CLASS
 #elif _LIBCPP_ABI_VERSION == 1
 #  if !defined(_LIBCPP_OBJECT_FORMAT_COFF)
 // Enable compiling copies of now inline methods into the dylib to support
diff --git a/libcxx/include/string b/libcxx/include/string
index f53e1bfef75e8..3616de8a214d8 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -618,6 +618,7 @@ operator+(const basic_string<_CharT, _Traits, _Allocator>& __x, _CharT __y);
 
 _LIBCPP_EXTERN_TEMPLATE(_LIBCPP_FUNC_VIS string operator+<char, char_traits<char>, allocator<char> >(char const*, string const&))
 
+#ifndef _LIBCPP_ABI_NO_BASIC_STRING_BASE_CLASS
 template <bool>
 struct __basic_string_common;
 
@@ -627,6 +628,7 @@ struct __basic_string_common<true> {
     _LIBCPP_NORETURN _LIBCPP_EXPORTED_FROM_ABI void __throw_length_error() const;
     _LIBCPP_NORETURN _LIBCPP_EXPORTED_FROM_ABI void __throw_out_of_range() const;
 };
+#endif
 
 template <class _Iter>
 struct __string_is_trivial_iterator : public false_type {};
@@ -680,7 +682,9 @@ class
     _LIBCPP_PREFERRED_NAME(u32string)
 #endif
     basic_string
+#ifndef _LIBCPP_ABI_NO_BASIC_STRING_BASE_CLASS
     : private __basic_string_common<true> // This base class is historical, but it needs to remain for ABI compatibility
+#endif
 {
 public:
     typedef basic_string                                 __self;
@@ -1729,20 +1733,12 @@ private:
 
     _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
     void __throw_length_error() const {
-#ifndef _LIBCPP_NO_EXCEPTIONS
-        __basic_string_common<true>::__throw_length_error();
-#else
-        _VSTD::abort();
-#endif
+        _VSTD::__throw_length_error("basic_string");
     }
 
     _LIBCPP_NORETURN _LIBCPP_HIDE_FROM_ABI
     void __throw_out_of_range() const {
-#ifndef _LIBCPP_NO_EXCEPTIONS
-        __basic_string_common<true>::__throw_out_of_range();
-#else
-        _VSTD::abort();
-#endif
+        _VSTD::__throw_out_of_range("basic_string");
     }
 
     friend basic_string operator+<>(const basic_string&, const basic_string&);
diff --git a/libcxx/src/string.cpp b/libcxx/src/string.cpp
index 608dcb2c5863f..3c63f408240d3 100644
--- a/libcxx/src/string.cpp
+++ b/libcxx/src/string.cpp
@@ -21,6 +21,7 @@
 
 _LIBCPP_BEGIN_NAMESPACE_STD
 
+#ifndef _LIBCPP_ABI_NO_BASIC_STRING_BASE_CLASS
 void __basic_string_common<true>::__throw_length_error() const {
     _VSTD::__throw_length_error("basic_string");
 }
@@ -28,6 +29,7 @@ void __basic_string_common<true>::__throw_length_error() const {
 void __basic_string_common<true>::__throw_out_of_range() const {
     _VSTD::__throw_out_of_range("basic_string");
 }
+#endif
 
 #define _LIBCPP_EXTERN_TEMPLATE_DEFINE(...) template __VA_ARGS__;
 #ifdef _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION

From 59eb542f6070a017f0d407df908aa53639feb0ea Mon Sep 17 00:00:00 2001
From: Siva Chandra Reddy <sivachandra@google.com>
Date: Fri, 21 Jan 2022 06:23:59 +0000
Subject: [PATCH 470/946] [libc] Let header generator generate the type header
 inclusion boiler plate.

Reviewed By: michaelrj

Differential Revision: https://reviews.llvm.org/D117855
---
 libc/config/linux/api.td                   | 227 +++------------------
 libc/config/public_api.td                  |   7 +-
 libc/utils/HdrGen/PublicAPICommand.cpp     |  21 +-
 libc/utils/LibcTableGenUtil/APIIndexer.cpp |   6 +-
 libc/utils/LibcTableGenUtil/APIIndexer.h   |   2 +-
 5 files changed, 40 insertions(+), 223 deletions(-)

diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index 48cd1c1113485..77e22287fe765 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -6,46 +6,6 @@ include "spec/llvm_libc_ext.td"
 include "spec/posix.td"
 include "spec/stdc.td"
 
-// TODO: Eliminate all TypeDecl specializations. Since we define all public
-// types in their own self contained header files, the header generator can
-// produce the boiler plate which pulls in the type definitions.
-
-def SizeT : TypeDecl<"size_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/size_t.h>
-  }];
-}
-
-def SSizeT : TypeDecl<"ssize_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/ssize_t.h>
-  }];
-}
-
-def StructTm: TypeDecl<"struct tm"> {
-  let Decl = [{
-    #include <llvm-libc-types/struct_tm.h>
-  }];
-}
-
-def TimeT: TypeDecl<"time_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/time_t.h>
-  }];
-}
-
-def OffT : TypeDecl<"off_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/off_t.h>
-  }];
-}
-
-def FILE : TypeDecl<"FILE"> {
-  let Decl = [{
-    #include <llvm-libc-types/FILE.h>
-  }];
-}
-
 def AssertMacro : MacroDef<"assert"> {
   let Defn = [{
     #undef assert
@@ -102,16 +62,8 @@ def AssertAPI : PublicAPI<"assert.h"> {
 def CTypeAPI : PublicAPI<"ctype.h"> {
 }
 
-def IMaxDivT : TypeDecl<"imaxdiv_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/imaxdiv_t.h>
-  }];
-}
-
 def IntTypesAPI : PublicAPI<"inttypes.h"> {
-  let TypeDeclarations = [
-    IMaxDivT,
-  ];
+  let Types = ["imaxdiv_t"];
 }
 
 def MathErrHandlingMacro : MacroDef<"math_errhandling"> {
@@ -146,18 +98,6 @@ def IsNanMacro : MacroDef<"isnan"> {
   }];
 }
 
-def FloatT : TypeDecl<"float_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/float_t.h>
-  }];
-}
-
-def DoubleT : TypeDecl<"double_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/double_t.h>
-  }];
-}
-
 def MathAPI : PublicAPI<"math.h"> {
   let Macros = [
     SimpleMacroDef<"MATH_ERRNO", "1">,
@@ -174,22 +114,7 @@ def MathAPI : PublicAPI<"math.h"> {
     IsInfMacro,
     IsNanMacro,
   ];
-  let TypeDeclarations = [
-    DoubleT,
-    FloatT,
-  ];
-}
-
-def FEnvT : TypeDecl<"fenv_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/fenv_t.h>
-  }];
-}
-
-def FExceptT : TypeDecl<"fexcept_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/fexcept_t.h>
-  }];
+  let Types = ["double_t", "float_t"];
 }
 
 def FenvAPI: PublicAPI<"fenv.h"> {
@@ -208,16 +133,11 @@ def FenvAPI: PublicAPI<"fenv.h"> {
 
     SimpleMacroDef<"FE_DFL_ENV", "((fenv_t *)-1)">,
   ];
-  let TypeDeclarations = [
-    FEnvT,
-    FExceptT,
-  ];
+  let Types = ["fenv_t", "fexcept_t"];
 }
 
 def StringAPI : PublicAPI<"string.h"> {
-  let TypeDeclarations = [
-    SizeT,
-  ];
+  let Types = ["size_t"];
 
   let Macros = [
     NullMacro,
@@ -225,66 +145,22 @@ def StringAPI : PublicAPI<"string.h"> {
 }
 
 def StdIOAPI : PublicAPI<"stdio.h"> {
-  let TypeDeclarations = [
-    SizeT,
-    FILE,
-  ];
-}
-
-def DivT : TypeDecl<"div_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/div_t.h>
-  }];
-}
-
-def LDivT : TypeDecl<"ldiv_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/ldiv_t.h>
-  }];
-}
-
-def LLDivT : TypeDecl<"lldiv_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/lldiv_t.h>
-  }];
-}
-
-def BSearchCompareTDefn : TypeDecl<"__bsearchcompare_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/__bsearchcompare_t.h>
-  }];
-}
-
-def QSortCompareTDefn : TypeDecl<"__qsortcompare_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/__qsortcompare_t.h>
-  }];
+  let Types = ["size_t", "FILE"];
 }
 
 def StdlibAPI : PublicAPI<"stdlib.h"> {
-  let TypeDeclarations = [
-    DivT,
-    LDivT,
-    LLDivT,
-    SizeT,
-    BSearchCompareTDefn,
-    QSortCompareTDefn,
+  let Types = [
+    "div_t",
+    "ldiv_t",
+    "lldiv_t",
+    "size_t",
+    "__bsearchcompare_t",
+    "__qsortcompare_t"
   ];
 }
 
 def TimeAPI : PublicAPI<"time.h"> {
-  let TypeDeclarations = [
-    StructTm,
-    TimeT,
-  ];
-
-  let Functions = [
-    "asctime",
-    "asctime_r",
-    "gmtime",
-    "gmtime_r",
-    "mktime",
-  ];
+  let Types = ["time_t", "struct tm"];
 }
 
 def ErrnoAPI : PublicAPI<"errno.h"> {
@@ -307,6 +183,7 @@ def ErrnoAPI : PublicAPI<"errno.h"> {
 }
 
 def SysMManAPI : PublicAPI<"sys/mman.h"> {
+  let Types = ["off_t", "size_t"];
   let Macros = [
     SimpleMacroDef<"PROT_NONE", "0">,
     SimpleMacroDef<"PROT_READ", "1">,
@@ -326,65 +203,10 @@ def SysMManAPI : PublicAPI<"sys/mman.h"> {
     // TODO: Add other MAP_* macros used by Linux.
   ];
 
-  let TypeDeclarations = [
-    SizeT,
-    OffT,
-  ];
-}
-
-def StructSigactionDefn : TypeDecl<"struct sigaction"> {
-  let Decl = [{
-    #include <llvm-libc-types/struct_sigaction.h>
-  }];
-}
-
-def SighandlerTDefn : TypeDecl<"__sighandler_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/__sighandler_t.h>
-  }];
 }
 
 def SignalAPI : PublicAPI<"signal.h"> {
-  let TypeDeclarations = [
-    StructSigactionDefn,
-    SighandlerTDefn,
-  ];
-}
-
-def OnceFlag : TypeDecl<"once_flag"> {
-  let Decl = [{
-    #include <llvm-libc-types/once_flag.h>
-  }];
-}
-
-def MtxT : TypeDecl<"mtx_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/mtx_t.h>
-  }];
-}
-
-def CndT : TypeDecl<"cnd_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/cnd_t.h>
-  }];
-}
-
-def ThrdT : TypeDecl<"thrd_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/thrd_t.h>
-  }];
-}
-
-def ThreadStartT : TypeDecl<"thrd_start_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/thrd_start_t.h>
-  }];
-}
-
-def CallOnceFuncT : TypeDecl<"__call_once_func_t"> {
-  let Decl = [{
-    #include <llvm-libc-types/__call_once_func_t.h>
-  }];
+  let Types = ["struct sigaction", "__sighandler_t"];
 }
 
 def ThreadsAPI : PublicAPI<"threads.h"> {
@@ -392,13 +214,13 @@ def ThreadsAPI : PublicAPI<"threads.h"> {
     SimpleMacroDef<"ONCE_FLAG_INIT", "0">,
   ];
 
-  let TypeDeclarations = [
-    OnceFlag,
-    CallOnceFuncT,
-    MtxT,
-    CndT,
-    ThrdT,
-    ThreadStartT,
+  let Types = [
+    "__call_once_func_t",
+    "once_flag",
+    "cnd_t",
+    "mtx_t",
+    "thrd_t",
+    "thrd_start_t",
   ];
 
   let Enumerations = [
@@ -414,8 +236,5 @@ def ThreadsAPI : PublicAPI<"threads.h"> {
 }
 
 def UniStdAPI : PublicAPI<"unistd.h"> {
-  let TypeDeclarations = [
-    SSizeT,
-    SizeT,
-  ];
+  let Types = ["size_t", "ssize_t"];
 }
diff --git a/libc/config/public_api.td b/libc/config/public_api.td
index b60836901836a..6d2f534f5a593 100644
--- a/libc/config/public_api.td
+++ b/libc/config/public_api.td
@@ -1,10 +1,5 @@
 include "spec/spec.td"
 
-class TypeDecl<string name> {
-  string Name = name;
-  string Decl = "";
-}
-
 class MacroDef<string name> {
   string Name = name;
   string Defn = "";
@@ -23,7 +18,7 @@ class MacroDefineIfNot<string name, string value> : MacroDef<name> {
 class PublicAPI<string name> {
   string HeaderName = name;
   list<MacroDef> Macros = [];
-  list<TypeDecl> TypeDeclarations = [];
+  list<string> Types = [];
   list<string> Enumerations = [];
   list<string> Structs = [];
   list<string> Functions = [];
diff --git a/libc/utils/HdrGen/PublicAPICommand.cpp b/libc/utils/HdrGen/PublicAPICommand.cpp
index 5285b0daac7d5..8f036f7a6d56e 100644
--- a/libc/utils/HdrGen/PublicAPICommand.cpp
+++ b/libc/utils/HdrGen/PublicAPICommand.cpp
@@ -10,6 +10,7 @@
 
 #include "utils/LibcTableGenUtil/APIIndexer.h"
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/SourceMgr.h"
@@ -38,6 +39,12 @@ static void dedentAndWrite(llvm::StringRef Text, llvm::raw_ostream &OS) {
   }
 }
 
+static std::string getTypeHdrName(const std::string &Name) {
+  llvm::SmallVector<llvm::StringRef> Parts;
+  llvm::SplitString(llvm::StringRef(Name), Parts);
+  return llvm::join(Parts.begin(), Parts.end(), "_");
+}
+
 namespace llvm_libc {
 
 void writeAPIFromIndex(APIIndexer &G,
@@ -54,16 +61,12 @@ void writeAPIFromIndex(APIIndexer &G,
     OS << '\n';
   }
 
-  for (auto &Pair : G.TypeDeclsMap) {
-    const std::string &Name = Pair.first;
-    if (G.TypeSpecMap.find(Name) == G.TypeSpecMap.end())
-      llvm::PrintFatalError(Name + " not found in any standard spec.\n");
-
-    llvm::Record *TypeDecl = Pair.second;
-    dedentAndWrite(TypeDecl->getValueAsString("Decl"), OS);
-
-    OS << '\n';
+  for (auto &TypeName : G.RequiredTypes) {
+    if (G.TypeSpecMap.find(TypeName) == G.TypeSpecMap.end())
+      llvm::PrintFatalError(TypeName + " not found in any standard spec.\n");
+    OS << "#include <llvm-libc-types/" << getTypeHdrName(TypeName) << ".h>\n";
   }
+  OS << '\n';
 
   if (G.Enumerations.size() != 0)
     OS << "enum {" << '\n';
diff --git a/libc/utils/LibcTableGenUtil/APIIndexer.cpp b/libc/utils/LibcTableGenUtil/APIIndexer.cpp
index 16aef5880bffe..fd3f53c4150fc 100644
--- a/libc/utils/LibcTableGenUtil/APIIndexer.cpp
+++ b/libc/utils/LibcTableGenUtil/APIIndexer.cpp
@@ -120,9 +120,9 @@ void APIIndexer::indexPublicAPIDef(llvm::Record *PublicAPI) {
   for (llvm::Record *MacroDef : MacroDefList)
     MacroDefsMap[std::string(MacroDef->getValueAsString("Name"))] = MacroDef;
 
-  auto TypeDeclList = PublicAPI->getValueAsListOfDefs("TypeDeclarations");
-  for (llvm::Record *TypeDecl : TypeDeclList)
-    TypeDeclsMap[std::string(TypeDecl->getValueAsString("Name"))] = TypeDecl;
+  auto TypeList = PublicAPI->getValueAsListOfStrings("Types");
+  for (llvm::StringRef TypeName : TypeList)
+    RequiredTypes.insert(std::string(TypeName));
 
   auto StructList = PublicAPI->getValueAsListOfStrings("Structs");
   for (llvm::StringRef StructName : StructList)
diff --git a/libc/utils/LibcTableGenUtil/APIIndexer.h b/libc/utils/LibcTableGenUtil/APIIndexer.h
index 7b4d62a38c615..cbb7ac2e66dcf 100644
--- a/libc/utils/LibcTableGenUtil/APIIndexer.h
+++ b/libc/utils/LibcTableGenUtil/APIIndexer.h
@@ -63,10 +63,10 @@ class APIIndexer {
   NameToRecordMapping EnumerationSpecMap;
   NameToRecordMapping FunctionSpecMap;
   NameToRecordMapping MacroDefsMap;
-  NameToRecordMapping TypeDeclsMap;
 
   std::unordered_map<std::string, std::string> FunctionToHeaderMap;
 
+  NameSet RequiredTypes;
   NameSet Structs;
   NameSet Enumerations;
   NameSet Functions;

From 3628febcf8e3d88db5871c9f82a33ab98611e5a8 Mon Sep 17 00:00:00 2001
From: Mogball <jeffniu22@gmail.com>
Date: Mon, 24 Jan 2022 23:31:00 +0000
Subject: [PATCH 471/946] [mlir] NFC control-flow sink cleanup

---
 .../mlir/Interfaces/ControlFlowInterfaces.td      |  3 ++-
 mlir/include/mlir/Transforms/Passes.td            |  8 +++++---
 mlir/lib/Transforms/ControlFlowSink.cpp           | 13 +++++++------
 .../lib/Transforms/Utils/ControlFlowSinkUtils.cpp | 15 ++++++++-------
 mlir/test/Transforms/control-flow-sink.mlir       | 12 +-----------
 5 files changed, 23 insertions(+), 28 deletions(-)

diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
index 429a5356428f7..896eb501d3799 100644
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
@@ -148,7 +148,8 @@ def RegionBranchOpInterface : OpInterface<"RegionBranchOpInterface"> {
       (ins "::mlir::ArrayRef<::mlir::Attribute>":$operands,
            "::llvm::SmallVectorImpl<::mlir::InvocationBounds> &"
              :$invocationBounds), [{}],
-       [{ invocationBounds.append($_op->getNumRegions(), {0, ::llvm::None}); }]
+       [{ invocationBounds.append($_op->getNumRegions(),
+                                  ::mlir::InvocationBounds::getUnknown()); }]
     >,
   ];
 
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 56e90644363e5..4b1d6ca71e2e1 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -310,14 +310,16 @@ def Canonicalizer : Pass<"canonicalize"> {
 def ControlFlowSink : Pass<"control-flow-sink"> {
   let summary = "Sink operations into conditional blocks";
   let description = [{
-    This pass implements a simple control-flow sink on operations that implement
+    This pass implements control-flow sink on operations that implement
     `RegionBranchOpInterface` by moving dominating operations whose only uses
-    are in a single conditionally-executed region into that region so that
+    are in a conditionally-executed regions into those regions so that
     executions paths where their results are not needed do not perform
     unnecessary computations.
 
     This is similar (but opposite) to loop-invariant code motion, which hoists
-    operations out of regions executed more than once.
+    operations out of regions executed more than once. The implementation of
+    control-flow sink uses a simple and conversative cost model: operations are
+    never duplicated and are only moved into singly-executed regions.
 
     It is recommended to run canonicalization first to remove unreachable
     blocks: ops in unreachable blocks may prevent other operations from being
diff --git a/mlir/lib/Transforms/ControlFlowSink.cpp b/mlir/lib/Transforms/ControlFlowSink.cpp
index 71afc5702edf0..10d41b0f0013f 100644
--- a/mlir/lib/Transforms/ControlFlowSink.cpp
+++ b/mlir/lib/Transforms/ControlFlowSink.cpp
@@ -22,9 +22,7 @@
 using namespace mlir;
 
 namespace {
-/// A basic control-flow sink pass. This pass analyzes the regions of operations
-/// that implement `RegionBranchOpInterface` that are reachable and executed at
-/// most once and sinks candidate operations that are side-effect free.
+/// A control-flow sink pass.
 struct ControlFlowSink : public ControlFlowSinkBase<ControlFlowSink> {
   void runOnOperation() override;
 };
@@ -59,10 +57,13 @@ void ControlFlowSink::runOnOperation() {
   auto &domInfo = getAnalysis<DominanceInfo>();
   getOperation()->walk([&](RegionBranchOpInterface branch) {
     SmallVector<Region *> regionsToSink;
+    // Get the regions are that known to be executed at most once.
     getSinglyExecutedRegionsToSink(branch, regionsToSink);
-    numSunk = mlir::controlFlowSink(
-        regionsToSink, domInfo,
-        [](Operation *op, Region *) { return isSideEffectFree(op); });
+    // Sink side-effect free operations.
+    numSunk =
+        controlFlowSink(regionsToSink, domInfo, [](Operation *op, Region *) {
+          return isSideEffectFree(op);
+        });
   });
 }
 
diff --git a/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
index cffbd922f88c8..868174b261c4a 100644
--- a/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
+++ b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements utilityies for control-flow sinking. Control-flow
+// This file implements utilities for control-flow sinking. Control-flow
 // sinking moves operations whose only uses are in conditionally-executed blocks
 // into those blocks so that they aren't executed on paths where their results
 // are not needed.
@@ -40,7 +40,7 @@ class Sinker {
 
   /// Given a list of regions, find operations to sink and sink them. Return the
   /// number of operations sunk.
-  size_t sinkRegions(ArrayRef<Region *> regions) &&;
+  size_t sinkRegions(ArrayRef<Region *> regions);
 
 private:
   /// Given a region and an op which dominates the region, returns true if all
@@ -93,9 +93,9 @@ void Sinker::tryToSinkPredecessors(Operation *user, Region *region,
     if (allUsersDominatedBy(op, region) && shouldMoveIntoRegion(op, region)) {
       // Move the op into the region's entry block. If the op is part of a
       // subgraph, dependee ops would have been moved first, so inserting before
-      // the start of the block will ensure dominance is preserved. Ops can only
-      // be safely moved into the entry block as the region's other blocks may
-      // for a loop.
+      // the start of the block will ensure SSA dominance is preserved locally
+      // in the subgraph. Ops can only be safely moved into the entry block as
+      // the region's other blocks may for a loop.
       op->moveBefore(&region->front(), region->front().begin());
       ++numSunk;
       // Add the op to the work queue.
@@ -119,7 +119,7 @@ void Sinker::sinkRegion(Region *region) {
   }
 }
 
-size_t Sinker::sinkRegions(ArrayRef<Region *> regions) && {
+size_t Sinker::sinkRegions(ArrayRef<Region *> regions) {
   for (Region *region : regions)
     if (!region->empty())
       sinkRegion(region);
@@ -137,7 +137,8 @@ void mlir::getSinglyExecutedRegionsToSink(RegionBranchOpInterface branch,
   // Collect constant operands.
   SmallVector<Attribute> operands(branch->getNumOperands(), Attribute());
   for (auto &it : llvm::enumerate(branch->getOperands()))
-    matchPattern(it.value(), m_Constant(&operands[it.index()]));
+    (void)matchPattern(it.value(), m_Constant(&operands[it.index()]));
+
   // Get the invocation bounds.
   SmallVector<InvocationBounds> bounds;
   branch.getRegionInvocationBounds(operands, bounds);
diff --git a/mlir/test/Transforms/control-flow-sink.mlir b/mlir/test/Transforms/control-flow-sink.mlir
index 58dffe19c0aae..ba895ebc493e5 100644
--- a/mlir/test/Transforms/control-flow-sink.mlir
+++ b/mlir/test/Transforms/control-flow-sink.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -split-input-file -control-flow-sink %s | FileCheck %s
+// RUN: mlir-opt -control-flow-sink %s | FileCheck %s
 
 // Test that operations can be sunk.
 
@@ -35,8 +35,6 @@ func @test_simple_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
   return %4 : i32
 }
 
-// -----
-
 // Test that a region op can be sunk.
 
 // CHECK-LABEL: @test_region_sink
@@ -76,8 +74,6 @@ func @test_region_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
   return %2 : i32
 }
 
-// -----
-
 // Test that an entire subgraph can be sunk.
 
 // CHECK-LABEL: @test_subgraph_sink
@@ -113,8 +109,6 @@ func @test_subgraph_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
   return %6 : i32
 }
 
-// -----
-
 // Test that ops can be sunk into regions with multiple blocks.
 
 // CHECK-LABEL: @test_multiblock_region_sink
@@ -144,8 +138,6 @@ func @test_multiblock_region_sink(%arg0: i1, %arg1: i32, %arg2: i32) -> i32 {
   return %4 : i32
 }
 
-// -----
-
 // Test that ops can be sunk recursively into nested regions.
 
 // CHECK-LABEL: @test_nested_region_sink
@@ -185,8 +177,6 @@ func @test_nested_region_sink(%arg0: i1, %arg1: i32) -> i32 {
   return %1 : i32
 }
 
-// -----
-
 // Test that ops are only moved into the entry block, even when their only uses
 // are further along.
 

From 3e746c6d9ef0758c1e06901a99a75b638d6a5655 Mon Sep 17 00:00:00 2001
From: Rob Suderman <rob.suderman@gmail.com>
Date: Mon, 24 Jan 2022 15:38:30 -0800
Subject: [PATCH 472/946] [mlir] Add support for ExpM1 to GLSL/OpenCL SPIRV
 Backends

Adding a similar decomposition for exponential minus one to the SPIRV
backends along with the necessary tests.

Reviewed By: antiagainst

Differential Revision: https://reviews.llvm.org/D118081
---
 .../Conversion/MathToSPIRV/MathToSPIRV.cpp    | 31 +++++++++++---
 .../MathToSPIRV/math-to-glsl-spirv.mlir       | 38 +++++++++++-------
 .../MathToSPIRV/math-to-opencl-spirv.mlir     | 40 +++++++++++--------
 3 files changed, 73 insertions(+), 36 deletions(-)

diff --git a/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp b/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp
index ec8402af03009..90588ed9bd5f0 100644
--- a/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp
+++ b/mlir/lib/Conversion/MathToSPIRV/MathToSPIRV.cpp
@@ -30,6 +30,28 @@ using namespace mlir;
 // normal RewritePattern.
 
 namespace {
+/// Converts math.expm1 to SPIR-V ops.
+///
+/// SPIR-V does not have a direct operations for exp(x)-1. Explicitly lower to
+/// these operations.
+template <typename ExpOp>
+class ExpM1OpPattern final : public OpConversionPattern<math::ExpM1Op> {
+public:
+  using OpConversionPattern<math::ExpM1Op>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(math::ExpM1Op operation, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    assert(adaptor.getOperands().size() == 1);
+    Location loc = operation.getLoc();
+    auto type = this->getTypeConverter()->convertType(operation.getType());
+    auto exp = rewriter.create<ExpOp>(loc, type, adaptor.getOperand());
+    auto one = spirv::ConstantOp::getOne(type, loc, rewriter);
+    rewriter.replaceOpWithNewOp<spirv::FSubOp>(operation, exp, one);
+    return success();
+  }
+};
+
 /// Converts math.log1p to SPIR-V ops.
 ///
 /// SPIR-V does not have a direct operations for log(1+x). Explicitly lower to
@@ -44,11 +66,10 @@ class Log1pOpPattern final : public OpConversionPattern<math::Log1pOp> {
                   ConversionPatternRewriter &rewriter) const override {
     assert(adaptor.getOperands().size() == 1);
     Location loc = operation.getLoc();
-    auto type =
-        this->getTypeConverter()->convertType(operation.getOperand().getType());
+    auto type = this->getTypeConverter()->convertType(operation.getType());
     auto one = spirv::ConstantOp::getOne(type, operation.getLoc(), rewriter);
     auto onePlus =
-        rewriter.create<spirv::FAddOp>(loc, one, adaptor.getOperands()[0]);
+        rewriter.create<spirv::FAddOp>(loc, one, adaptor.getOperand());
     rewriter.replaceOpWithNewOp<LogOp>(operation, type, onePlus);
     return success();
   }
@@ -65,7 +86,7 @@ void populateMathToSPIRVPatterns(SPIRVTypeConverter &typeConverter,
 
   // GLSL patterns
   patterns
-      .add<Log1pOpPattern<spirv::GLSLLogOp>,
+      .add<Log1pOpPattern<spirv::GLSLLogOp>, ExpM1OpPattern<spirv::GLSLExpOp>,
            spirv::ElementwiseOpPattern<math::AbsOp, spirv::GLSLFAbsOp>,
            spirv::ElementwiseOpPattern<math::CeilOp, spirv::GLSLCeilOp>,
            spirv::ElementwiseOpPattern<math::CosOp, spirv::GLSLCosOp>,
@@ -81,7 +102,7 @@ void populateMathToSPIRVPatterns(SPIRVTypeConverter &typeConverter,
           typeConverter, patterns.getContext());
 
   // OpenCL patterns
-  patterns.add<Log1pOpPattern<spirv::OCLLogOp>,
+  patterns.add<Log1pOpPattern<spirv::OCLLogOp>, ExpM1OpPattern<spirv::OCLExpOp>,
                spirv::ElementwiseOpPattern<math::AbsOp, spirv::OCLFAbsOp>,
                spirv::ElementwiseOpPattern<math::CeilOp, spirv::OCLCeilOp>,
                spirv::ElementwiseOpPattern<math::CosOp, spirv::OCLCosOp>,
diff --git a/mlir/test/Conversion/MathToSPIRV/math-to-glsl-spirv.mlir b/mlir/test/Conversion/MathToSPIRV/math-to-glsl-spirv.mlir
index f0e0b7e63fdce..c996e7056783a 100644
--- a/mlir/test/Conversion/MathToSPIRV/math-to-glsl-spirv.mlir
+++ b/mlir/test/Conversion/MathToSPIRV/math-to-glsl-spirv.mlir
@@ -8,26 +8,30 @@ func @float32_unary_scalar(%arg0: f32) {
   %0 = math.cos %arg0 : f32
   // CHECK: spv.GLSL.Exp %{{.*}}: f32
   %1 = math.exp %arg0 : f32
+  // CHECK: %[[EXP:.+]] = spv.GLSL.Exp %arg0
+  // CHECK: %[[ONE:.+]] = spv.Constant 1.000000e+00 : f32
+  // CHECK: spv.FSub %[[EXP]], %[[ONE]]
+  %2 = math.expm1 %arg0 : f32
   // CHECK: spv.GLSL.Log %{{.*}}: f32
-  %2 = math.log %arg0 : f32
+  %3 = math.log %arg0 : f32
   // CHECK: %[[ONE:.+]] = spv.Constant 1.000000e+00 : f32
   // CHECK: %[[ADDONE:.+]] = spv.FAdd %[[ONE]], %{{.+}}
   // CHECK: spv.GLSL.Log %[[ADDONE]]
-  %3 = math.log1p %arg0 : f32
+  %4 = math.log1p %arg0 : f32
   // CHECK: spv.GLSL.InverseSqrt %{{.*}}: f32
-  %4 = math.rsqrt %arg0 : f32
+  %5 = math.rsqrt %arg0 : f32
   // CHECK: spv.GLSL.Sqrt %{{.*}}: f32
-  %5 = math.sqrt %arg0 : f32
+  %6 = math.sqrt %arg0 : f32
   // CHECK: spv.GLSL.Tanh %{{.*}}: f32
-  %6 = math.tanh %arg0 : f32
+  %7 = math.tanh %arg0 : f32
   // CHECK: spv.GLSL.Sin %{{.*}}: f32
-  %7 = math.sin %arg0 : f32
+  %8 = math.sin %arg0 : f32
   // CHECK: spv.GLSL.FAbs %{{.*}}: f32
-  %8 = math.abs %arg0 : f32
+  %9 = math.abs %arg0 : f32
   // CHECK: spv.GLSL.Ceil %{{.*}}: f32
-  %9 = math.ceil %arg0 : f32
+  %10 = math.ceil %arg0 : f32
   // CHECK: spv.GLSL.Floor %{{.*}}: f32
-  %10 = math.floor %arg0 : f32
+  %11 = math.floor %arg0 : f32
   return
 }
 
@@ -37,20 +41,24 @@ func @float32_unary_vector(%arg0: vector<3xf32>) {
   %0 = math.cos %arg0 : vector<3xf32>
   // CHECK: spv.GLSL.Exp %{{.*}}: vector<3xf32>
   %1 = math.exp %arg0 : vector<3xf32>
+  // CHECK: %[[EXP:.+]] = spv.GLSL.Exp %arg0
+  // CHECK: %[[ONE:.+]] = spv.Constant dense<1.000000e+00> : vector<3xf32>
+  // CHECK: spv.FSub %[[EXP]], %[[ONE]]
+  %2 = math.expm1 %arg0 : vector<3xf32>
   // CHECK: spv.GLSL.Log %{{.*}}: vector<3xf32>
-  %2 = math.log %arg0 : vector<3xf32>
+  %3 = math.log %arg0 : vector<3xf32>
   // CHECK: %[[ONE:.+]] = spv.Constant dense<1.000000e+00> : vector<3xf32>
   // CHECK: %[[ADDONE:.+]] = spv.FAdd %[[ONE]], %{{.+}}
   // CHECK: spv.GLSL.Log %[[ADDONE]]
-  %3 = math.log1p %arg0 : vector<3xf32>
+  %4 = math.log1p %arg0 : vector<3xf32>
   // CHECK: spv.GLSL.InverseSqrt %{{.*}}: vector<3xf32>
-  %4 = math.rsqrt %arg0 : vector<3xf32>
+  %5 = math.rsqrt %arg0 : vector<3xf32>
   // CHECK: spv.GLSL.Sqrt %{{.*}}: vector<3xf32>
-  %5 = math.sqrt %arg0 : vector<3xf32>
+  %6 = math.sqrt %arg0 : vector<3xf32>
   // CHECK: spv.GLSL.Tanh %{{.*}}: vector<3xf32>
-  %6 = math.tanh %arg0 : vector<3xf32>
+  %7 = math.tanh %arg0 : vector<3xf32>
   // CHECK: spv.GLSL.Sin %{{.*}}: vector<3xf32>
-  %7 = math.sin %arg0 : vector<3xf32>
+  %8 = math.sin %arg0 : vector<3xf32>
   return
 }
 
diff --git a/mlir/test/Conversion/MathToSPIRV/math-to-opencl-spirv.mlir b/mlir/test/Conversion/MathToSPIRV/math-to-opencl-spirv.mlir
index 7580f1f733c49..d0959efc98ab2 100644
--- a/mlir/test/Conversion/MathToSPIRV/math-to-opencl-spirv.mlir
+++ b/mlir/test/Conversion/MathToSPIRV/math-to-opencl-spirv.mlir
@@ -8,28 +8,32 @@ func @float32_unary_scalar(%arg0: f32) {
   %0 = math.cos %arg0 : f32
   // CHECK: spv.OCL.exp %{{.*}}: f32
   %1 = math.exp %arg0 : f32
+  // CHECK: %[[EXP:.+]] = spv.OCL.exp %arg0
+  // CHECK: %[[ONE:.+]] = spv.Constant 1.000000e+00 : f32
+  // CHECK: spv.FSub %[[EXP]], %[[ONE]]
+  %2 = math.expm1 %arg0 : f32
   // CHECK: spv.OCL.log %{{.*}}: f32
-  %2 = math.log %arg0 : f32
+  %3 = math.log %arg0 : f32
   // CHECK: %[[ONE:.+]] = spv.Constant 1.000000e+00 : f32
   // CHECK: %[[ADDONE:.+]] = spv.FAdd %[[ONE]], %{{.+}}
   // CHECK: spv.OCL.log %[[ADDONE]]
-  %3 = math.log1p %arg0 : f32
+  %4 = math.log1p %arg0 : f32
   // CHECK: spv.OCL.rsqrt %{{.*}}: f32
-  %4 = math.rsqrt %arg0 : f32
+  %5 = math.rsqrt %arg0 : f32
   // CHECK: spv.OCL.sqrt %{{.*}}: f32
-  %5 = math.sqrt %arg0 : f32
+  %6 = math.sqrt %arg0 : f32
   // CHECK: spv.OCL.tanh %{{.*}}: f32
-  %6 = math.tanh %arg0 : f32
+  %7 = math.tanh %arg0 : f32
   // CHECK: spv.OCL.sin %{{.*}}: f32
-  %7 = math.sin %arg0 : f32
+  %8 = math.sin %arg0 : f32
   // CHECK: spv.OCL.fabs %{{.*}}: f32
-  %8 = math.abs %arg0 : f32
+  %9 = math.abs %arg0 : f32
   // CHECK: spv.OCL.ceil %{{.*}}: f32
-  %9 = math.ceil %arg0 : f32
+  %10 = math.ceil %arg0 : f32
   // CHECK: spv.OCL.floor %{{.*}}: f32
-  %10 = math.floor %arg0 : f32
+  %11 = math.floor %arg0 : f32
   // CHECK: spv.OCL.erf %{{.*}}: f32
-  %11 = math.erf %arg0 : f32
+  %12 = math.erf %arg0 : f32
   return
 }
 
@@ -39,20 +43,24 @@ func @float32_unary_vector(%arg0: vector<3xf32>) {
   %0 = math.cos %arg0 : vector<3xf32>
   // CHECK: spv.OCL.exp %{{.*}}: vector<3xf32>
   %1 = math.exp %arg0 : vector<3xf32>
+  // CHECK: %[[EXP:.+]] = spv.OCL.exp %arg0
+  // CHECK: %[[ONE:.+]] = spv.Constant dense<1.000000e+00> : vector<3xf32>
+  // CHECK: spv.FSub %[[EXP]], %[[ONE]]
+  %2 = math.expm1 %arg0 : vector<3xf32>
   // CHECK: spv.OCL.log %{{.*}}: vector<3xf32>
-  %2 = math.log %arg0 : vector<3xf32>
+  %3 = math.log %arg0 : vector<3xf32>
   // CHECK: %[[ONE:.+]] = spv.Constant dense<1.000000e+00> : vector<3xf32>
   // CHECK: %[[ADDONE:.+]] = spv.FAdd %[[ONE]], %{{.+}}
   // CHECK: spv.OCL.log %[[ADDONE]]
-  %3 = math.log1p %arg0 : vector<3xf32>
+  %4 = math.log1p %arg0 : vector<3xf32>
   // CHECK: spv.OCL.rsqrt %{{.*}}: vector<3xf32>
-  %4 = math.rsqrt %arg0 : vector<3xf32>
+  %5 = math.rsqrt %arg0 : vector<3xf32>
   // CHECK: spv.OCL.sqrt %{{.*}}: vector<3xf32>
-  %5 = math.sqrt %arg0 : vector<3xf32>
+  %6 = math.sqrt %arg0 : vector<3xf32>
   // CHECK: spv.OCL.tanh %{{.*}}: vector<3xf32>
-  %6 = math.tanh %arg0 : vector<3xf32>
+  %7 = math.tanh %arg0 : vector<3xf32>
   // CHECK: spv.OCL.sin %{{.*}}: vector<3xf32>
-  %7 = math.sin %arg0 : vector<3xf32>
+  %8 = math.sin %arg0 : vector<3xf32>
   return
 }
 

From d0d8d2d572cd1db54d0f6d90f8dd3825f9c7b36b Mon Sep 17 00:00:00 2001
From: Derek Schuff <dschuff@chromium.org>
Date: Mon, 24 Jan 2022 14:59:54 -0800
Subject: [PATCH 473/946] [clang][Driver] use DWARF4 for wasm

Opt into the old default of DWARF4 for now.

Differential Revision: https://reviews.llvm.org/D118082
---
 clang/lib/Driver/ToolChains/WebAssembly.h | 1 +
 clang/test/Driver/debug-options.c         | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/clang/lib/Driver/ToolChains/WebAssembly.h b/clang/lib/Driver/ToolChains/WebAssembly.h
index c84e596759466..b4c3082a089a0 100644
--- a/clang/lib/Driver/ToolChains/WebAssembly.h
+++ b/clang/lib/Driver/ToolChains/WebAssembly.h
@@ -51,6 +51,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssembly final : public ToolChain {
   bool hasBlocksRuntime() const override;
   bool SupportsProfiling() const override;
   bool HasNativeLLVMSupport() const override;
+  unsigned GetDefaultDwarfVersion() const override { return 4; }
   void
   addClangTargetOptions(const llvm::opt::ArgList &DriverArgs,
                         llvm::opt::ArgStringList &CC1Args,
diff --git a/clang/test/Driver/debug-options.c b/clang/test/Driver/debug-options.c
index 22e05140f2618..e4a49269a8c70 100644
--- a/clang/test/Driver/debug-options.c
+++ b/clang/test/Driver/debug-options.c
@@ -132,6 +132,13 @@
 // RUN: %clang -### -c -g %s -target powerpc64-ibm-aix-xcoff -gcolumn-info \
 // RUN:             2>&1 | FileCheck -check-prefix=CI %s
 
+// WebAssembly.
+// WebAssembly should default to DWARF4.
+// RUN: %clang -### -c -g %s -target wasm32 2>&1 \
+// RUN:             | FileCheck -check-prefix=G_DWARF4 %s
+// RUN: %clang -### -c -g %s -target wasm64 2>&1 \
+// RUN:             | FileCheck -check-prefix=G_DWARF4 %s
+
 // RUN: %clang -### -c -gdwarf-2 %s 2>&1 \
 // RUN:             | FileCheck -check-prefix=G_ONLY_DWARF2 %s
 //

From dd01d971aa2c4b464a295ca5c78ff93fc4441dc3 Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous@apple.com>
Date: Fri, 21 Jan 2022 17:11:05 -0800
Subject: [PATCH 474/946] [clang][dataflow] Avoid MaxIterations overflow

unsigned is technically guaranteed to be only 16 bits in which case 1 << 16 would wrap around to zero.

Differential Revision: https://reviews.llvm.org/D117938
---
 .../lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 3782f0f5f69ac..7611395cafb6b 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -210,8 +210,8 @@ runTypeErasedDataflowAnalysis(const ControlFlowContext &CFCtx,
   // FIXME: Consider making the maximum number of iterations configurable.
   // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
   // of iterations, number of functions that time out, etc.
-  unsigned Iterations = 0;
-  static constexpr unsigned MaxIterations = 1 << 16;
+  uint32_t Iterations = 0;
+  static constexpr uint32_t MaxIterations = 1 << 16;
   while (const CFGBlock *Block = Worklist.dequeue()) {
     if (++Iterations > MaxIterations) {
       llvm::errs() << "Maximum number of iterations reached, giving up.\n";

From 902184e6cc263e4c66440c95a21665b6fdffe57c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 25 Jan 2022 00:24:06 +0000
Subject: [PATCH 475/946] [X86] combinePredicateReduction - generalize
 allof(cmpeq(x,0)) handling to allof(cmpeq(x,y))

There's no further reasons to limit this to cmpeq-with-zero, the outstanding regressions with lowering to PTEST have now been addressed

Improves codegen for Issue #53379
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       |   6 +-
 .../test/CodeGen/X86/vector-compare-all_of.ll |  20 ++-
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 143 +++++++++---------
 3 files changed, 81 insertions(+), 88 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4360bc68ffaee..c68774a838a25 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -42215,12 +42215,10 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
       EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts);
       Movmsk = DAG.getBitcast(MovmskVT, Match);
     } else {
-      // For all_of(setcc(vec,0,eq))
+      // For all_of(setcc(x,y,eq))
       // - avoid vXi64 comparisons without PCMPEQQ (SSE41+), use PCMPEQD.
       // - avoid vXi16 comparisons, use PMOVMSKB(PCMPEQB()).
-      if (BinOp == ISD::AND &&
-          Match.getOpcode() == ISD::SETCC &&
-          ISD::isBuildVectorAllZeros(Match.getOperand(1).getNode()) &&
+      if (BinOp == ISD::AND && Match.getOpcode() == ISD::SETCC &&
           cast<CondCodeSDNode>(Match.getOperand(2))->get() ==
               ISD::CondCode::SETEQ) {
         SDValue Vec = Match.getOperand(0);
diff --git a/llvm/test/CodeGen/X86/vector-compare-all_of.ll b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
index 91fa60ef09d59..584b941681831 100644
--- a/llvm/test/CodeGen/X86/vector-compare-all_of.ll
+++ b/llvm/test/CodeGen/X86/vector-compare-all_of.ll
@@ -1277,11 +1277,10 @@ define i1 @bool_reduction_v8i32(<8 x i32> %x, <8 x i32> %y) {
 define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
 ; SSE-LABEL: bool_reduction_v16i16:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
-; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
-; SSE-NEXT:    packsswb %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    psubb %xmm3, %xmm1
+; SSE-NEXT:    psubb %xmm2, %xmm0
+; SSE-NEXT:    por %xmm1, %xmm0
+; SSE-NEXT:    ptest %xmm0, %xmm0
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
@@ -1289,18 +1288,17 @@ define i1 @bool_reduction_v16i16(<16 x i16> %x, <16 x i16> %y) {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: bool_reduction_v16i16:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index fa48b72490ebf..4be3ed18cf901 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -1656,10 +1656,8 @@ define i1 @icmp_v2i64_v2i1(<2 x i64>, <2 x i64>) {
 ; SSE2-LABEL: icmp_v2i64_v2i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pcmpeqd %xmm1, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    movmskpd %xmm1, %eax
-; SSE2-NEXT:    cmpb $3, %al
+; SSE2-NEXT:    movmskps %xmm0, %eax
+; SSE2-NEXT:    cmpb $15, %al
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
@@ -1769,21 +1767,25 @@ define i1 @icmp_v4i32_v4i1(<4 x i32>, <4 x i32>) {
 }
 
 define i1 @icmp_v8i16_v8i1(<8 x i16>, <8 x i16>) {
-; SSE-LABEL: icmp_v8i16_v8i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqw %xmm1, %xmm0
-; SSE-NEXT:    packsswb %xmm0, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpb $-1, %al
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp_v8i16_v8i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqb %xmm1, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v8i16_v8i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    psubb %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX-LABEL: icmp_v8i16_v8i1:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vpmovmskb %xmm0, %eax
-; AVX-NEXT:    cmpb $-1, %al
+; AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
 ;
@@ -1876,14 +1878,11 @@ define i1 @icmp_v4i64_v4i1(<4 x i64>, <4 x i64>) {
 ; SSE2-LABEL: icmp_v4i64_v4i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pcmpeqd %xmm3, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm1[1,0,3,2]
-; SSE2-NEXT:    pand %xmm1, %xmm3
 ; SSE2-NEXT:    pcmpeqd %xmm2, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    packssdw %xmm3, %xmm1
-; SSE2-NEXT:    movmskps %xmm1, %eax
-; SSE2-NEXT:    cmpb $15, %al
+; SSE2-NEXT:    packssdw %xmm1, %xmm0
+; SSE2-NEXT:    packsswb %xmm0, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpb $-1, %al
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
@@ -2022,32 +2021,40 @@ define i1 @icmp_v8i32_v8i1(<8 x i32>, <8 x i32>) {
 }
 
 define i1 @icmp_v16i16_v16i1(<16 x i16>, <16 x i16>) {
-; SSE-LABEL: icmp_v16i16_v16i1:
-; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqw %xmm3, %xmm1
-; SSE-NEXT:    pcmpeqw %xmm2, %xmm0
-; SSE-NEXT:    packsswb %xmm1, %xmm0
-; SSE-NEXT:    pmovmskb %xmm0, %eax
-; SSE-NEXT:    cmpw $-1, %ax
-; SSE-NEXT:    sete %al
-; SSE-NEXT:    retq
+; SSE2-LABEL: icmp_v16i16_v16i1:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    pcmpeqb %xmm3, %xmm1
+; SSE2-NEXT:    pcmpeqb %xmm2, %xmm0
+; SSE2-NEXT:    pand %xmm1, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: icmp_v16i16_v16i1:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    psubb %xmm3, %xmm1
+; SSE41-NEXT:    psubb %xmm2, %xmm0
+; SSE41-NEXT:    por %xmm1, %xmm0
+; SSE41-NEXT:    ptest %xmm0, %xmm0
+; SSE41-NEXT:    sete %al
+; SSE41-NEXT:    retq
 ;
 ; AVX1-LABEL: icmp_v16i16_v16i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm3, %xmm2
-; AVX1-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpmovmskb %xmm0, %eax
-; AVX1-NEXT:    cmpw $-1, %ax
+; AVX1-NEXT:    vpsubb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vptest %xmm0, %xmm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
 ;
 ; AVX2-LABEL: icmp_v16i16_v16i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpsubw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vpsubb %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
@@ -2171,23 +2178,14 @@ define i1 @icmp_v8i64_v8i1(<8 x i64>, <8 x i64>) {
 ; SSE2-LABEL: icmp_v8i64_v8i1:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    pcmpeqd %xmm7, %xmm3
-; SSE2-NEXT:    pshufd {{.*#+}} xmm7 = xmm3[1,0,3,2]
-; SSE2-NEXT:    pand %xmm3, %xmm7
 ; SSE2-NEXT:    pcmpeqd %xmm6, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,0,3,2]
-; SSE2-NEXT:    pand %xmm2, %xmm3
-; SSE2-NEXT:    packssdw %xmm7, %xmm3
+; SSE2-NEXT:    packssdw %xmm3, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm5, %xmm1
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,0,3,2]
-; SSE2-NEXT:    pand %xmm1, %xmm2
 ; SSE2-NEXT:    pcmpeqd %xmm4, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2]
-; SSE2-NEXT:    pand %xmm0, %xmm1
-; SSE2-NEXT:    packssdw %xmm2, %xmm1
-; SSE2-NEXT:    packssdw %xmm3, %xmm1
-; SSE2-NEXT:    packsswb %xmm1, %xmm1
-; SSE2-NEXT:    pmovmskb %xmm1, %eax
-; SSE2-NEXT:    cmpb $-1, %al
+; SSE2-NEXT:    packssdw %xmm1, %xmm0
+; SSE2-NEXT:    packsswb %xmm2, %xmm0
+; SSE2-NEXT:    pmovmskb %xmm0, %eax
+; SSE2-NEXT:    cmpw $-1, %ax
 ; SSE2-NEXT:    sete %al
 ; SSE2-NEXT:    retq
 ;
@@ -2329,14 +2327,14 @@ define i1 @icmp_v16i32_v16i1(<16 x i32>, <16 x i32>) {
 define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) {
 ; SSE-LABEL: icmp_v32i16_v32i1:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    pcmpeqw %xmm5, %xmm1
-; SSE-NEXT:    pcmpeqw %xmm4, %xmm0
-; SSE-NEXT:    packsswb %xmm1, %xmm0
-; SSE-NEXT:    pcmpeqw %xmm7, %xmm3
-; SSE-NEXT:    pcmpeqw %xmm6, %xmm2
-; SSE-NEXT:    packsswb %xmm3, %xmm2
-; SSE-NEXT:    pand %xmm0, %xmm2
-; SSE-NEXT:    pmovmskb %xmm2, %eax
+; SSE-NEXT:    pcmpeqb %xmm7, %xmm3
+; SSE-NEXT:    pcmpeqb %xmm5, %xmm1
+; SSE-NEXT:    pand %xmm3, %xmm1
+; SSE-NEXT:    pcmpeqb %xmm6, %xmm2
+; SSE-NEXT:    pand %xmm1, %xmm2
+; SSE-NEXT:    pcmpeqb %xmm4, %xmm0
+; SSE-NEXT:    pand %xmm2, %xmm0
+; SSE-NEXT:    pmovmskb %xmm0, %eax
 ; SSE-NEXT:    cmpw $-1, %ax
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
@@ -2345,14 +2343,14 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) {
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm4
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm5
-; AVX1-NEXT:    vpcmpeqw %xmm4, %xmm5, %xmm4
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT:    vpacksswb %xmm4, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm3
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT:    vpcmpeqw %xmm3, %xmm4, %xmm3
-; AVX1-NEXT:    vpcmpeqw %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpacksswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT:    vpcmpeqb %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vextractf128 $1, %ymm2, %xmm5
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm6
+; AVX1-NEXT:    vpcmpeqb %xmm5, %xmm6, %xmm5
+; AVX1-NEXT:    vpand %xmm4, %xmm5, %xmm4
+; AVX1-NEXT:    vpcmpeqb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT:    vpand %xmm4, %xmm1, %xmm1
+; AVX1-NEXT:    vpcmpeqb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    cmpw $-1, %ax
@@ -2362,11 +2360,10 @@ define i1 @icmp_v32i16_v32i1(<32 x i16>, <32 x i16>) {
 ;
 ; AVX2-LABEL: icmp_v32i16_v32i1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpcmpeqw %ymm3, %ymm1, %ymm1
-; AVX2-NEXT:    vpcmpeqw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT:    vpmovmskb %ymm0, %eax
-; AVX2-NEXT:    cmpl $-1, %eax
+; AVX2-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
+; AVX2-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX2-NEXT:    vptest %ymm0, %ymm0
 ; AVX2-NEXT:    sete %al
 ; AVX2-NEXT:    vzeroupper
 ; AVX2-NEXT:    retq

From fd0a4bc76bd93d81ca3acecb2ce07513d64060be Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Mon, 24 Jan 2022 16:56:29 -0800
Subject: [PATCH 476/946] [RISCV] Add missing space to 'clang-format on'
 directive. NFC

Without a space after the comment characters it seems to be ignored.
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 2ab9ab653328d..7baed2793e4e2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -1729,7 +1729,7 @@ MachineInstr *RISCVInstrInfo::convertToThreeAddress(MachineInstr &MI,
     CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
     CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
     }
-    //clang-format on
+    // clang-format on
 
     MachineBasicBlock &MBB = *MI.getParent();
     MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc))

From 15f7857412aebcaa0277803f8eafbd366e236820 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Sun, 23 Jan 2022 07:51:24 -0800
Subject: [PATCH 477/946] [tests] Refresh autogen tests for SLP

---
 .../SLPVectorizer/AArch64/64-bit-vector.ll    |  8 ++---
 .../SLPVectorizer/AArch64/slp-or-reduction.ll |  1 +
 .../SLPVectorizer/AArch64/spillcost-di.ll     | 28 ++++++++--------
 .../AMDGPU/add_sub_sat-inseltpoison.ll        |  8 ++---
 .../SLPVectorizer/AMDGPU/add_sub_sat.ll       |  8 ++---
 .../address-space-ptr-sze-gep-index-assert.ll | 28 ++++++++++------
 .../SLPVectorizer/PowerPC/aggregate.ll        |  2 +-
 .../Transforms/SLPVectorizer/X86/PR34635.ll   |  2 +-
 .../Transforms/SLPVectorizer/X86/PR36280.ll   |  4 +--
 .../Transforms/SLPVectorizer/X86/aggregate.ll |  2 +-
 .../X86/alternate-cast-inseltpoison.ll        | 16 +++++-----
 .../SLPVectorizer/X86/alternate-cast.ll       | 16 +++++-----
 .../SLPVectorizer/X86/arith-add-ssat.ll       | 28 ++++++++++------
 .../SLPVectorizer/X86/arith-sub-ssat.ll       | 28 ++++++++++------
 .../test/Transforms/SLPVectorizer/X86/call.ll | 13 +++-----
 .../X86/crash_netbsd_decompress.ll            |  8 ++---
 .../X86/crash_scheduling-inseltpoison.ll      |  4 +--
 .../SLPVectorizer/X86/crash_scheduling.ll     |  4 +--
 .../SLPVectorizer/X86/crash_vectorizeTree.ll  | 27 ++++++++++------
 .../SLPVectorizer/X86/debug_info.ll           | 30 ++++++++---------
 .../SLPVectorizer/X86/external_user.ll        |  2 +-
 .../SLPVectorizer/X86/extract_in_tree_user.ll |  6 ++--
 .../SLPVectorizer/X86/gep_mismatch.ll         |  2 +-
 .../SLPVectorizer/X86/hadd-inseltpoison.ll    | 16 +++++-----
 .../test/Transforms/SLPVectorizer/X86/hadd.ll | 16 +++++-----
 .../SLPVectorizer/X86/hsub-inseltpoison.ll    | 16 +++++-----
 .../test/Transforms/SLPVectorizer/X86/hsub.ll | 16 +++++-----
 .../SLPVectorizer/X86/insertvalue.ll          |  8 ++---
 .../Transforms/SLPVectorizer/X86/metadata.ll  | 13 ++++----
 .../SLPVectorizer/X86/multi_block.ll          |  6 ++--
 .../SLPVectorizer/X86/no_alternate_divrem.ll  | 32 +++++++++----------
 .../Transforms/SLPVectorizer/X86/pr16899.ll   |  6 ++--
 .../Transforms/SLPVectorizer/X86/reduction.ll | 10 +++---
 .../SLPVectorizer/X86/remark_listcost.ll      | 14 ++++----
 .../SLPVectorizer/X86/undef_vect.ll           | 10 +++---
 35 files changed, 235 insertions(+), 203 deletions(-)

diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll
index ad970b2bec1bf..10883987aa758 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/64-bit-vector.ll
@@ -21,14 +21,14 @@ define void @f(float* %r, float* %w) {
 ; NO_SLP-LABEL: @f(
 ; NO_SLP-NEXT:    [[R0:%.*]] = getelementptr inbounds float, float* [[R:%.*]], i64 0
 ; NO_SLP-NEXT:    [[R1:%.*]] = getelementptr inbounds float, float* [[R]], i64 1
-; NO_SLP-NEXT:    [[F0:%.*]] = load float, float* [[R0]]
-; NO_SLP-NEXT:    [[F1:%.*]] = load float, float* [[R1]]
+; NO_SLP-NEXT:    [[F0:%.*]] = load float, float* [[R0]], align 4
+; NO_SLP-NEXT:    [[F1:%.*]] = load float, float* [[R1]], align 4
 ; NO_SLP-NEXT:    [[ADD0:%.*]] = fadd float [[F0]], [[F0]]
 ; NO_SLP-NEXT:    [[ADD1:%.*]] = fadd float [[F1]], [[F1]]
 ; NO_SLP-NEXT:    [[W0:%.*]] = getelementptr inbounds float, float* [[W:%.*]], i64 0
 ; NO_SLP-NEXT:    [[W1:%.*]] = getelementptr inbounds float, float* [[W]], i64 1
-; NO_SLP-NEXT:    store float [[ADD0]], float* [[W0]]
-; NO_SLP-NEXT:    store float [[ADD1]], float* [[W1]]
+; NO_SLP-NEXT:    store float [[ADD0]], float* [[W0]], align 4
+; NO_SLP-NEXT:    store float [[ADD1]], float* [[W1]], align 4
 ; NO_SLP-NEXT:    ret void
 ;
   %r0 = getelementptr inbounds float, float* %r, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll
index 185eb8a73ffb1..53126ee407e98 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/slp-or-reduction.ll
@@ -29,6 +29,7 @@ define i8 @reduce_or(%struct.buf* %a, %struct.buf* %b) {
 ; CHECK-NEXT:    [[TMP4:%.*]] = xor <8 x i8> [[TMP3]], [[TMP1]]
 ; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP4]])
 ; CHECK-NEXT:    ret i8 [[TMP5]]
+;
 
 entry:
   %arrayidx = getelementptr inbounds %struct.buf, %struct.buf* %a, i64 0, i32 0, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
index 98a9fd482e872..39f2f885bc26b 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll
@@ -9,20 +9,20 @@ target triple = "aarch64"
 define void @patatino(i64 %n, i64 %i, %struct.S* %p) !dbg !7 {
 ; CHECK-LABEL: @patatino(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[N:%.*]], metadata !18, metadata !DIExpression()), !dbg !23
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[I:%.*]], metadata !19, metadata !DIExpression()), !dbg !24
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata %struct.S* [[P:%.*]], metadata !20, metadata !DIExpression()), !dbg !25
-; CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P]], i64 [[N]], i32 0, !dbg !26
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata !21, metadata !DIExpression()), !dbg !27
-; CHECK-NEXT:    [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[N]], i32 1, !dbg !28
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[X1]] to <2 x i64>*, !dbg !26
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8, !dbg !26, !tbaa !29
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata !22, metadata !DIExpression()), !dbg !33
-; CHECK-NEXT:    [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 0, !dbg !34
-; CHECK-NEXT:    [[Y7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 1, !dbg !35
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg !36
-; CHECK-NEXT:    store <2 x i64> [[TMP1]], <2 x i64>* [[TMP2]], align 8, !dbg !36, !tbaa !29
-; CHECK-NEXT:    ret void, !dbg !37
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[N:%.*]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 [[I:%.*]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata %struct.S* [[P:%.*]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]]
+; CHECK-NEXT:    [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], %struct.S* [[P]], i64 [[N]], i32 0, !dbg [[DBG26:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG27:![0-9]+]]
+; CHECK-NEXT:    [[Y3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[N]], i32 1, !dbg [[DBG28:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i64* [[X1]] to <2 x i64>*, !dbg [[DBG26]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 8, !dbg [[DBG26]], !tbaa [[TBAA29:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i64 undef, metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG33:![0-9]+]]
+; CHECK-NEXT:    [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 0, !dbg [[DBG34:![0-9]+]]
+; CHECK-NEXT:    [[Y7:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[P]], i64 [[I]], i32 1, !dbg [[DBG35:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64* [[X5]] to <2 x i64>*, !dbg [[DBG36:![0-9]+]]
+; CHECK-NEXT:    store <2 x i64> [[TMP1]], <2 x i64>* [[TMP2]], align 8, !dbg [[DBG36]], !tbaa [[TBAA29]]
+; CHECK-NEXT:    ret void, !dbg [[DBG37:![0-9]+]]
 ;
 entry:
   call void @llvm.dbg.value(metadata i64 %n, metadata !18, metadata !DIExpression()), !dbg !23
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll
index f2c4b7e899c12..a86fd0e9b8693 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll
@@ -248,8 +248,8 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
 ; GFX8-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> <i32 0, i32 1>
 ; GFX8-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
 ; GFX8-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
-; GFX8-NEXT:    [[INS_11:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 undef>
-; GFX8-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[INS_11]], i16 [[ADD_2]], i64 2
+; GFX8-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 undef>
+; GFX8-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
 ; GFX8-NEXT:    ret <3 x i16> [[INS_2]]
 ;
 bb:
@@ -297,8 +297,8 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
 ; GFX8-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
 ; GFX8-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
 ; GFX8-NEXT:    [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
-; GFX8-NEXT:    [[INS_32:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX8-NEXT:    ret <4 x i16> [[INS_32]]
+; GFX8-NEXT:    [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; GFX8-NEXT:    ret <4 x i16> [[INS_31]]
 ;
 bb:
   %arg0.0 = extractelement <4 x i16> %arg0, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll
index af1b7f42f9973..503e947ebeed2 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll
@@ -248,8 +248,8 @@ define <3 x i16> @uadd_sat_v3i16(<3 x i16> %arg0, <3 x i16> %arg1) {
 ; GFX8-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i16> [[ARG1]], <3 x i16> undef, <2 x i32> <i32 0, i32 1>
 ; GFX8-NEXT:    [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
 ; GFX8-NEXT:    [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
-; GFX8-NEXT:    [[INS_11:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 undef>
-; GFX8-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[INS_11]], i16 [[ADD_2]], i64 2
+; GFX8-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <3 x i32> <i32 0, i32 1, i32 undef>
+; GFX8-NEXT:    [[INS_2:%.*]] = insertelement <3 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
 ; GFX8-NEXT:    ret <3 x i16> [[INS_2]]
 ;
 bb:
@@ -297,8 +297,8 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
 ; GFX8-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
 ; GFX8-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> undef, <2 x i32> <i32 2, i32 3>
 ; GFX8-NEXT:    [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
-; GFX8-NEXT:    [[INS_32:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; GFX8-NEXT:    ret <4 x i16> [[INS_32]]
+; GFX8-NEXT:    [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; GFX8-NEXT:    ret <4 x i16> [[INS_31]]
 ;
 bb:
   %arg0.0 = extractelement <4 x i16> %arg0, i64 0
diff --git a/llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll b/llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll
index 4f85482e3ae00..fc6347dcaa427 100644
--- a/llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll
@@ -12,8 +12,8 @@ define void @slp_scev_assert(i32 %idx, i64 %tmp3) #0 {
 ; CHECK-NEXT:    [[TMP:%.*]] = addrspacecast i8 addrspace(5)* undef to i8*
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8 addrspace(5)* undef, i32 [[IDX:%.*]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP]], i64 [[TMP3:%.*]]
-; CHECK-NEXT:    store i8 0, i8 addrspace(5)* [[TMP2]]
-; CHECK-NEXT:    store i8 0, i8* [[TMP4]]
+; CHECK-NEXT:    store i8 0, i8 addrspace(5)* [[TMP2]], align 1
+; CHECK-NEXT:    store i8 0, i8* [[TMP4]], align 1
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -39,8 +39,8 @@ define void @multi_as_reduction_different_sized(i32 addrspace(3)* %lds, i32 %idx
 ; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, i32* [[FLAT_1]], align 4
 ; CHECK-NEXT:    [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]]
 ; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]]
-; CHECK-NEXT:    store i32 [[SUB0]], i32* undef
-; CHECK-NEXT:    store i32 [[SUB1]], i32* undef
+; CHECK-NEXT:    store i32 [[SUB0]], i32* undef, align 4
+; CHECK-NEXT:    store i32 [[SUB1]], i32* undef, align 4
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -80,8 +80,8 @@ define void @multi_as_reduction_same_size(i32 addrspace(1)* %global, i64 %idx0,
 ; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, i32* [[FLAT_1]], align 4
 ; CHECK-NEXT:    [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_GLOBAL_0]]
 ; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_GLOBAL_1]]
-; CHECK-NEXT:    store i32 [[SUB0]], i32* undef
-; CHECK-NEXT:    store i32 [[SUB1]], i32* undef
+; CHECK-NEXT:    store i32 [[SUB0]], i32* undef, align 4
+; CHECK-NEXT:    store i32 [[SUB1]], i32* undef, align 4
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -122,8 +122,8 @@ define void @multi_as_reduction_different_sized_noncanon(i32 addrspace(3)* %lds,
 ; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, i32* [[FLAT_1]], align 4
 ; CHECK-NEXT:    [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]]
 ; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]]
-; CHECK-NEXT:    store i32 [[SUB0]], i32* undef
-; CHECK-NEXT:    store i32 [[SUB1]], i32* undef
+; CHECK-NEXT:    store i32 [[SUB0]], i32* undef, align 4
+; CHECK-NEXT:    store i32 [[SUB1]], i32* undef, align 4
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -148,9 +148,17 @@ bb:
   ret void
 }
 
-; CHECK-LABEL: slp_crash_on_addrspacecast
-; CHECK: ret void
 define void @slp_crash_on_addrspacecast() {
+; CHECK-LABEL: @slp_crash_on_addrspacecast(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
+; CHECK-NEXT:    [[P0:%.*]] = addrspacecast i64 addrspace(3)* [[TMP0]] to i64*
+; CHECK-NEXT:    store i64 undef, i64* [[P0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
+; CHECK-NEXT:    [[P1:%.*]] = addrspacecast i64 addrspace(3)* [[TMP1]] to i64*
+; CHECK-NEXT:    store i64 undef, i64* [[P1]], align 8
+; CHECK-NEXT:    ret void
+;
 entry:
   %0 = getelementptr inbounds i64, i64 addrspace(3)* undef, i32 undef
   %p0 = addrspacecast i64 addrspace(3)* %0 to i64*
diff --git a/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll b/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
index 99af834e5b81e..974bddc4ba2cf 100644
--- a/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
+++ b/llvm/test/Transforms/SLPVectorizer/PowerPC/aggregate.ll
@@ -9,7 +9,7 @@ define { i64, i64 } @getS() {
 ; CHECK-LABEL: @getS(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* bitcast (i8** getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP0]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { i64, i64 } [[TMP2]], i64 [[TMP1]], 1
 ; CHECK-NEXT:    ret { i64, i64 } [[TMP3]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll
index daa68b12b3300..33be367b35311 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR34635.ll
@@ -2,7 +2,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown-linux -slp-vectorizer -S -mcpu=corei7 | FileCheck %s
 
 define i32 @main() {
-; CHECK-LABEL: define {{[^@]+}}@main(
+; CHECK-LABEL: @main(
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[T:%.*]] = alloca <8 x i32>, align 32
 ; CHECK-NEXT:    [[T1:%.*]] = bitcast <8 x i32>* [[T]] to [8 x i32]*
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/PR36280.ll b/llvm/test/Transforms/SLPVectorizer/X86/PR36280.ll
index 1001468fd6f74..6968171f5db7c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/PR36280.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/PR36280.ll
@@ -5,8 +5,8 @@ define float @jacobi(float* %p, float %x, float %y, float %z) {
 ; CHECK-LABEL: @jacobi(
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr float, float* [[P:%.*]], i64 1
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr float, float* [[P]], i64 2
-; CHECK-NEXT:    [[P1:%.*]] = load float, float* [[GEP1]]
-; CHECK-NEXT:    [[P2:%.*]] = load float, float* [[GEP2]]
+; CHECK-NEXT:    [[P1:%.*]] = load float, float* [[GEP1]], align 4
+; CHECK-NEXT:    [[P2:%.*]] = load float, float* [[GEP2]], align 4
 ; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[P1]], [[X:%.*]]
 ; CHECK-NEXT:    [[MUL2:%.*]] = fmul float [[P2]], [[Y:%.*]]
 ; CHECK-NEXT:    [[ADD1:%.*]] = fadd float [[MUL1]], [[Z:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll b/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll
index f270dbf4f78e7..1af4154a8f338 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/aggregate.ll
@@ -9,7 +9,7 @@ define { i64, i64 } @getS() {
 ; CHECK-LABEL: @getS(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* bitcast (%struct.S* @kS0 to i64*), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* bitcast (i8** getelementptr inbounds (%struct.S, %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* bitcast (i8** getelementptr inbounds ([[STRUCT_S:%.*]], %struct.S* @kS0, i64 0, i32 1) to i64*), align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i64, i64 } undef, i64 [[TMP0]], 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { i64, i64 } [[TMP2]], i64 [[TMP1]], 1
 ; CHECK-NEXT:    ret { i64, i64 } [[TMP3]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
index 44d0f17182694..2416d100f653f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
@@ -165,8 +165,8 @@ define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
-; CHECK-NEXT:    [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <8 x float> [[R72]]
+; CHECK-NEXT:    [[R71:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x float> [[R71]]
 ;
   %a0 = extractelement <4 x i32> %a, i32 0
   %a1 = extractelement <4 x i32> %a, i32 1
@@ -209,12 +209,12 @@ define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16
 ; CHECK-NEXT:    [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float>
 ; CHECK-NEXT:    [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[R53:%.*]] = shufflevector <8 x float> [[R31]], <8 x float> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[R72:%.*]] = shufflevector <8 x float> [[R53]], <8 x float> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-; CHECK-NEXT:    ret <8 x float> [[R72]]
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[R52:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; CHECK-NEXT:    ret <8 x float> [[R71]]
 ;
   %a0 = extractelement <4 x i32> %a, i32 0
   %a1 = extractelement <4 x i32> %a, i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
index 5b537ac6683f4..7d44d988143f9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
@@ -165,8 +165,8 @@ define <8 x float> @sitofp_4i32_8i16(<4 x i32> %a, <8 x i16> %b) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[B:%.*]], <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP3:%.*]] = sitofp <4 x i16> [[TMP2]] to <4 x float>
-; CHECK-NEXT:    [[R72:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <8 x float> [[R72]]
+; CHECK-NEXT:    [[R71:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    ret <8 x float> [[R71]]
 ;
   %a0 = extractelement <4 x i32> %a, i32 0
   %a1 = extractelement <4 x i32> %a, i32 1
@@ -209,12 +209,12 @@ define <8 x float> @sitofp_uitofp_4i32_8i16_16i8(<4 x i32> %a, <8 x i16> %b, <16
 ; CHECK-NEXT:    [[TMP9:%.*]] = sitofp <2 x i8> [[TMP8]] to <2 x float>
 ; CHECK-NEXT:    [[TMP10:%.*]] = uitofp <2 x i8> [[TMP8]] to <2 x float>
 ; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> [[TMP10]], <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    [[R31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[R53:%.*]] = shufflevector <8 x float> [[R31]], <8 x float> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[R72:%.*]] = shufflevector <8 x float> [[R53]], <8 x float> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
-; CHECK-NEXT:    ret <8 x float> [[R72]]
+; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[R52:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> [[TMP13]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 undef, i32 undef>
+; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT:    [[R71:%.*]] = shufflevector <8 x float> [[R52]], <8 x float> [[TMP14]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+; CHECK-NEXT:    ret <8 x float> [[R71]]
 ;
   %a0 = extractelement <4 x i32> %a, i32 0
   %a1 = extractelement <4 x i32> %a, i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll
index a94439f348702..66154522c327d 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll
@@ -96,6 +96,24 @@ define void @add_v8i64() {
 ; SLM-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
 ; SLM-NEXT:    ret void
 ;
+; AVX-LABEL: @add_v8i64(
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP3]])
+; AVX-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> [[TMP2]], <4 x i64> [[TMP4]])
+; AVX-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    ret void
+;
+; AVX512-LABEL: @add_v8i64(
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    ret void
+;
 ; AVX1-LABEL: @add_v8i64(
 ; AVX1-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
 ; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
@@ -114,7 +132,6 @@ define void @add_v8i64() {
 ; AVX1-NEXT:    store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
 ; AVX1-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
 ; AVX1-NEXT:    ret void
-;
 ; AVX2-LABEL: @add_v8i64(
 ; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
 ; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
@@ -125,14 +142,6 @@ define void @add_v8i64() {
 ; AVX2-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
 ; AVX2-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
 ; AVX2-NEXT:    ret void
-;
-; AVX512-LABEL: @add_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
-; AVX512-NEXT:    ret void
-;
 ; AVX256BW-LABEL: @add_v8i64(
 ; AVX256BW-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
 ; AVX256BW-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
@@ -143,7 +152,6 @@ define void @add_v8i64() {
 ; AVX256BW-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
 ; AVX256BW-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
 ; AVX256BW-NEXT:    ret void
-;
   %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
   %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
   %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll
index 3c23d0fd75f4b..88f18cba2b2ee 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll
@@ -96,6 +96,24 @@ define void @sub_v8i64() {
 ; SLM-NEXT:    store i64 [[R7]], i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 7), align 8
 ; SLM-NEXT:    ret void
 ;
+; AVX-LABEL: @sub_v8i64(
+; AVX-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP3:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @b64 to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @b64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    [[TMP5:%.*]] = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> [[TMP1]], <4 x i64> [[TMP3]])
+; AVX-NEXT:    [[TMP6:%.*]] = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> [[TMP2]], <4 x i64> [[TMP4]])
+; AVX-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
+; AVX-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
+; AVX-NEXT:    ret void
+;
+; AVX512-LABEL: @sub_v8i64(
+; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
+; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
+; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
+; AVX512-NEXT:    ret void
+;
 ; AVX1-LABEL: @sub_v8i64(
 ; AVX1-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @a64 to <2 x i64>*), align 8
 ; AVX1-NEXT:    [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2) to <2 x i64>*), align 8
@@ -114,7 +132,6 @@ define void @sub_v8i64() {
 ; AVX1-NEXT:    store <2 x i64> [[TMP11]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <2 x i64>*), align 8
 ; AVX1-NEXT:    store <2 x i64> [[TMP12]], <2 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 6) to <2 x i64>*), align 8
 ; AVX1-NEXT:    ret void
-;
 ; AVX2-LABEL: @sub_v8i64(
 ; AVX2-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
 ; AVX2-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
@@ -125,14 +142,6 @@ define void @sub_v8i64() {
 ; AVX2-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
 ; AVX2-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
 ; AVX2-NEXT:    ret void
-;
-; AVX512-LABEL: @sub_v8i64(
-; AVX512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @a64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP2:%.*]] = load <8 x i64>, <8 x i64>* bitcast ([8 x i64]* @b64 to <8 x i64>*), align 8
-; AVX512-NEXT:    [[TMP3:%.*]] = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> [[TMP1]], <8 x i64> [[TMP2]])
-; AVX512-NEXT:    store <8 x i64> [[TMP3]], <8 x i64>* bitcast ([8 x i64]* @c64 to <8 x i64>*), align 8
-; AVX512-NEXT:    ret void
-;
 ; AVX256BW-LABEL: @sub_v8i64(
 ; AVX256BW-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([8 x i64]* @a64 to <4 x i64>*), align 8
 ; AVX256BW-NEXT:    [[TMP2:%.*]] = load <4 x i64>, <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 4) to <4 x i64>*), align 8
@@ -143,7 +152,6 @@ define void @sub_v8i64() {
 ; AVX256BW-NEXT:    store <4 x i64> [[TMP5]], <4 x i64>* bitcast ([8 x i64]* @c64 to <4 x i64>*), align 8
 ; AVX256BW-NEXT:    store <4 x i64> [[TMP6]], <4 x i64>* bitcast (i64* getelementptr inbounds ([8 x i64], [8 x i64]* @c64, i32 0, i64 4) to <4 x i64>*), align 8
 ; AVX256BW-NEXT:    ret void
-;
   %a0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 0), align 8
   %a1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 1), align 8
   %a2 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @a64, i32 0, i64 2), align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/call.ll b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
index 5cca9363802b4..a0dece0a58b47 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/call.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/call.ll
@@ -125,8 +125,8 @@ define void @sqrt_libm_errno(double* %a, double* %b) {
 ; CHECK-NEXT:    [[A0:%.*]] = load double, double* [[A:%.*]], align 8
 ; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
 ; CHECK-NEXT:    [[A1:%.*]] = load double, double* [[IDX1]], align 8
-; CHECK-NEXT:    [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #2
-; CHECK-NEXT:    [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #2
+; CHECK-NEXT:    [[SQRT1:%.*]] = tail call nnan double @sqrt(double [[A0]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    [[SQRT2:%.*]] = tail call nnan double @sqrt(double [[A1]]) #[[ATTR2]]
 ; CHECK-NEXT:    store double [[SQRT1]], double* [[B:%.*]], align 8
 ; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
 ; CHECK-NEXT:    store double [[SQRT2]], double* [[IDX2]], align 8
@@ -149,8 +149,8 @@ define void @round_custom(i64* %a, i64* %b) {
 ; CHECK-NEXT:    [[A0:%.*]] = load i64, i64* [[A:%.*]], align 8
 ; CHECK-NEXT:    [[IDX1:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 1
 ; CHECK-NEXT:    [[A1:%.*]] = load i64, i64* [[IDX1]], align 8
-; CHECK-NEXT:    [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #3
-; CHECK-NEXT:    [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #3
+; CHECK-NEXT:    [[ROUND1:%.*]] = tail call i64 @round(i64 [[A0]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[ROUND2:%.*]] = tail call i64 @round(i64 [[A1]]) #[[ATTR3]]
 ; CHECK-NEXT:    store i64 [[ROUND1]], i64* [[B:%.*]], align 8
 ; CHECK-NEXT:    [[IDX2:%.*]] = getelementptr inbounds i64, i64* [[B]], i64 1
 ; CHECK-NEXT:    store i64 [[ROUND2]], i64* [[IDX2]], align 8
@@ -168,10 +168,5 @@ define void @round_custom(i64* %a, i64* %b) {
 }
 
 
-; CHECK: declare <2 x double> @llvm.sin.v2f64(<2 x double>) [[ATTR0:#[0-9]+]]
-; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) [[ATTR0]]
-; CHECK: declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) [[ATTR0]]
-; CHECK: declare <2 x double> @llvm.exp2.v2f64(<2 x double>) [[ATTR0]]
 
-; CHECK: attributes [[ATTR0]] = { nofree nosync nounwind readnone speculatable willreturn }
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
index de17d44f39a04..7e40a49fb09e9 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_netbsd_decompress.ll
@@ -15,8 +15,8 @@ target triple = "x86_64-apple-macosx10.8.0"
 define i32 @fn1() {
 ; CHECK-LABEL: @fn1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 0), align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 1), align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_DSTATE:%.*]], %struct.DState* @b, i32 0, i32 0), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_DSTATE]], %struct.DState* @b, i32 0, i32 1), align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* @d, align 4
 ; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP2]], 0
 ; CHECK-NEXT:    br i1 [[COND]], label [[SW_BB:%.*]], label [[SAVE_STATE_AND_RETURN:%.*]]
@@ -33,8 +33,8 @@ define i32 @fn1() {
 ; CHECK:       save_state_and_return:
 ; CHECK-NEXT:    [[T_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP0]], [[SW_BB]] ], [ [[TMP0]], [[SW_BB]] ]
 ; CHECK-NEXT:    [[F_0:%.*]] = phi i32 [ 0, [[IF_END]] ], [ [[TMP1]], [[ENTRY]] ], [ 0, [[SW_BB]] ], [ 0, [[SW_BB]] ]
-; CHECK-NEXT:    store i32 [[T_0]], i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 0), align 4
-; CHECK-NEXT:    store i32 [[F_0]], i32* getelementptr inbounds (%struct.DState, %struct.DState* @b, i32 0, i32 1), align 4
+; CHECK-NEXT:    store i32 [[T_0]], i32* getelementptr inbounds ([[STRUCT_DSTATE]], %struct.DState* @b, i32 0, i32 0), align 4
+; CHECK-NEXT:    store i32 [[F_0]], i32* getelementptr inbounds ([[STRUCT_DSTATE]], %struct.DState* @b, i32 0, i32 1), align 4
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
index 348aed4a48e5a..f529d0fc47338 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling-inseltpoison.ll
@@ -23,11 +23,11 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
 ; CHECK-NEXT:    [[VECINIT_I_I237:%.*]] = insertelement <2 x double> poison, double [[T_0259]], i32 0
 ; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, [[TBAA0:!tbaa !.*]]
+; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[VECINIT_I_I:%.*]] = insertelement <2 x double> poison, double [[P3_ADDR_0258]], i32 0
 ; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
 ; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, [[TBAA0]]
+; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD27]] = fadd double [[MUL19]], [[T_0259]]
 ; CHECK-NEXT:    [[ADD28]] = fadd double [[MUL21]], [[P3_ADDR_0258]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
index 8e93552ff3663..9265ca1731a09 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_scheduling.ll
@@ -23,11 +23,11 @@ define void @_foo(double %p1, double %p2, double %p3) #0 {
 ; CHECK-NEXT:    [[VECINIT_I_I237:%.*]] = insertelement <2 x double> undef, double [[T_0259]], i32 0
 ; CHECK-NEXT:    [[X13:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I237]])
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB1]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa !0
+; CHECK-NEXT:    store i32 [[X13]], i32* [[ARRAYIDX]], align 4, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[VECINIT_I_I:%.*]] = insertelement <2 x double> undef, double [[P3_ADDR_0258]], i32 0
 ; CHECK-NEXT:    [[X14:%.*]] = tail call i32 @_xfn(<2 x double> [[VECINIT_I_I]])
 ; CHECK-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [256 x i32], [256 x i32]* [[TAB2]], i64 0, i64 [[INDVARS_IV266]]
-; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa !0
+; CHECK-NEXT:    store i32 [[X14]], i32* [[ARRAYIDX26]], align 4, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[ADD27]] = fadd double [[MUL19]], [[T_0259]]
 ; CHECK-NEXT:    [[ADD28]] = fadd double [[MUL21]], [[P3_ADDR_0258]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT267]] = add nuw nsw i64 [[INDVARS_IV266]], 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
index d646180f36d98..c1a67ab415fea 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_vectorizeTree.ll
@@ -25,27 +25,36 @@ define void @bar() {
 ; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[TMP0]], %0* undef, i64 0, i32 1, i32 1
 ; CHECK-NEXT:    br label [[TMP7:%.*]]
-; CHECK:         [[TMP8:%.*]] = phi <2 x double> [ <double 1.800000e+01, double 2.800000e+01>, [[TMP0]] ], [ [[TMP11:%.*]], [[TMP21:%.*]] ], [ [[TMP11]], [[TMP18:%.*]] ], [ [[TMP11]], [[TMP18]] ]
+; CHECK:       7:
+; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x double> [ <double 1.800000e+01, double 2.800000e+01>, [[TMP0]] ], [ [[TMP11:%.*]], [[TMP21:%.*]] ], [ [[TMP11]], [[TMP18:%.*]] ], [ [[TMP11]], [[TMP18]] ]
 ; CHECK-NEXT:    [[TMP9:%.*]] = bitcast double* [[TMP1]] to <2 x double>*
 ; CHECK-NEXT:    store <2 x double> [[TMP8]], <2 x double>* [[TMP9]], align 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = bitcast double* [[TMP3]] to <2 x double>*
 ; CHECK-NEXT:    [[TMP11]] = load <2 x double>, <2 x double>* [[TMP10]], align 8
 ; CHECK-NEXT:    br i1 undef, label [[TMP12:%.*]], label [[TMP13:%.*]]
-; CHECK:         ret void
-; CHECK:         [[TMP14:%.*]] = bitcast double* [[TMP5]] to <2 x double>*
+; CHECK:       12:
+; CHECK-NEXT:    ret void
+; CHECK:       13:
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast double* [[TMP5]] to <2 x double>*
 ; CHECK-NEXT:    store <2 x double> [[TMP11]], <2 x double>* [[TMP14]], align 8
 ; CHECK-NEXT:    br i1 undef, label [[TMP15:%.*]], label [[TMP16:%.*]]
-; CHECK:         br label [[TMP16]]
-; CHECK:         br i1 undef, label [[TMP17:%.*]], label [[TMP18]]
-; CHECK:         unreachable
-; CHECK:         [[TMP19:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
+; CHECK:       15:
+; CHECK-NEXT:    br label [[TMP16]]
+; CHECK:       16:
+; CHECK-NEXT:    br i1 undef, label [[TMP17:%.*]], label [[TMP18]]
+; CHECK:       17:
+; CHECK-NEXT:    unreachable
+; CHECK:       18:
+; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x double> [[TMP11]], i32 0
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x double> [[TMP11]], i32 1
 ; CHECK-NEXT:    switch i32 undef, label [[TMP21]] [
 ; CHECK-NEXT:    i32 32, label [[TMP7]]
 ; CHECK-NEXT:    i32 103, label [[TMP7]]
 ; CHECK-NEXT:    ]
-; CHECK:         br i1 undef, label [[TMP7]], label [[TMP22:%.*]]
-; CHECK:         unreachable
+; CHECK:       21:
+; CHECK-NEXT:    br i1 undef, label [[TMP7]], label [[TMP22:%.*]]
+; CHECK:       22:
+; CHECK-NEXT:    unreachable
 ;
   %1 = getelementptr inbounds %0, %0* undef, i64 0, i32 1, i32 0
   %2 = getelementptr inbounds %0, %0* undef, i64 0, i32 1, i32 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
index 25ff6b6ba702a..c717c5d10b30c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -16,24 +16,24 @@ target triple = "x86_64-apple-macosx10.7.0"
 define i32 @depth(double* nocapture %A, i32 %m) #0 !dbg !4 {
 ; CHECK-LABEL: @depth(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata double* [[A:%.*]], metadata !12, metadata !DIExpression()), !dbg !18
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[M:%.*]], metadata !13, metadata !DIExpression()), !dbg !18
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata double 0.000000e+00, metadata !14, metadata !DIExpression()), !dbg !19
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata double 2.000000e-01, metadata !15, metadata !DIExpression()), !dbg !19
-; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 0, metadata !16, metadata !DIExpression()), !dbg !20
-; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[M]], 0, !dbg !20
-; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]], !dbg !20
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata double* [[A:%.*]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 [[M:%.*]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata double 0.000000e+00, metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata double 2.000000e-01, metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata i32 0, metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]]
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[M]], 0, !dbg [[DBG20]]
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]], !dbg [[DBG20]]
 ; CHECK:       for.body.lr.ph:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A]], i64 4, !dbg !21
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*, !dbg !21
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !dbg !21
-; CHECK-NEXT:    br label [[FOR_END]], !dbg !20
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[A]], i64 4, !dbg [[DBG21:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[ARRAYIDX]] to <2 x double>*, !dbg [[DBG21]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !dbg [[DBG21]]
+; CHECK-NEXT:    br label [[FOR_END]], !dbg [[DBG20]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[FOR_BODY_LR_PH]] ], [ <double 0.000000e+00, double 1.000000e+00>, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A]], i64 8, !dbg !23
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>*, !dbg !23
-; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 8, !dbg !23
-; CHECK-NEXT:    ret i32 undef, !dbg !24
+; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[A]], i64 8, !dbg [[DBG23:![0-9]+]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast double* [[ARRAYIDX2]] to <2 x double>*, !dbg [[DBG23]]
+; CHECK-NEXT:    store <2 x double> [[TMP2]], <2 x double>* [[TMP3]], align 8, !dbg [[DBG23]]
+; CHECK-NEXT:    ret i32 undef, !dbg [[DBG24:![0-9]+]]
 ;
 entry:
   tail call void @llvm.dbg.value(metadata double* %A, i64 0, metadata !12, metadata !DIExpression()), !dbg !19
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
index 9f14ff88cc816..012ba4da88033 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/external_user.ll
@@ -82,7 +82,7 @@ define i32 @needtogather(double *noalias %a, i32 *noalias %b,  float * noalias %
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[D:%.*]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sitofp i32 [[TMP0]] to float
-; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[C:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load float, float* [[C:%.*]], align 4
 ; CHECK-NEXT:    [[SUB:%.*]] = fsub float 0.000000e+00, [[TMP1]]
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[SUB]], 0.000000e+00
 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[CONV]], [[MUL]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
index cbf6d8ff4fa95..38b2b97a23cd0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extract_in_tree_user.ll
@@ -111,9 +111,9 @@ define void @externally_used_ptrs() {
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i64*> [[TMP3]], i32 1
 ; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
 ; CHECK-NEXT:    [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[TMP6]], align 8
-; CHECK-NEXT:    [[TMP9:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]]
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
-; CHECK-NEXT:    store <2 x i64> [[TMP9]], <2 x i64>* [[TMP10]], align 8
+; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i64> [[TMP4]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64* [[TMP5]] to <2 x i64>*
+; CHECK-NEXT:    store <2 x i64> [[TMP8]], <2 x i64>* [[TMP9]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll b/llvm/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
index f9b9995066a22..409e2e71d090a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
@@ -13,7 +13,7 @@ define void @foo() {
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[LS1_PH:%.*]] = phi float* [ [[_TMP1:%.*]], [[BB1]] ], [ undef, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[LS2_PH:%.*]] = phi float* [ [[_TMP2:%.*]], [[BB1]] ], [ undef, [[ENTRY]] ]
-; CHECK-NEXT:    store float undef, float* [[LS1_PH]]
+; CHECK-NEXT:    store float undef, float* [[LS1_PH]], align 4
 ; CHECK-NEXT:    [[_TMP1]] = getelementptr float, float* [[LS1_PH]], i32 1
 ; CHECK-NEXT:    [[_TMP2]] = getelementptr float, float* [[LS2_PH]], i64 4
 ; CHECK-NEXT:    br i1 false, label [[BB1]], label [[BB2:%.*]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
index bc6eeb1cdd7fd..8db775a352246 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
@@ -152,8 +152,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
 ; SSE-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    ret <4 x double> [[R032]]
+; SSE-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    ret <4 x double> [[R031]]
 ;
 ; SLM-LABEL: @test_v4f64(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
@@ -162,8 +162,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
 ; SLM-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SLM-NEXT:    ret <4 x double> [[R032]]
+; SLM-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SLM-NEXT:    ret <4 x double> [[R031]]
 ;
 ; AVX-LABEL: @test_v4f64(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -204,8 +204,8 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
 ; SLM-NEXT:    [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R072:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SLM-NEXT:    ret <8 x float> [[R072]]
+; SLM-NEXT:    [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT:    ret <8 x float> [[R071]]
 ;
 ; AVX-LABEL: @test_v8f32(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
@@ -324,8 +324,8 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
 ; SSE-NEXT:    [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[RV152:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE-NEXT:    ret <16 x i16> [[RV152]]
+; SSE-NEXT:    [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE-NEXT:    ret <16 x i16> [[RV151]]
 ;
 ; SLM-LABEL: @test_v16i16(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
index b9d55ecb27ebe..3f332c14126b5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
@@ -152,8 +152,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
 ; SSE-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    ret <4 x double> [[R032]]
+; SSE-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    ret <4 x double> [[R031]]
 ;
 ; SLM-LABEL: @test_v4f64(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
@@ -162,8 +162,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
 ; SLM-NEXT:    [[TMP6:%.*]] = fadd <2 x double> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SLM-NEXT:    ret <4 x double> [[R032]]
+; SLM-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SLM-NEXT:    ret <4 x double> [[R031]]
 ;
 ; AVX-LABEL: @test_v4f64(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -204,8 +204,8 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
 ; SLM-NEXT:    [[TMP6:%.*]] = fadd <4 x float> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R072:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SLM-NEXT:    ret <8 x float> [[R072]]
+; SLM-NEXT:    [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT:    ret <8 x float> [[R071]]
 ;
 ; AVX-LABEL: @test_v8f32(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
@@ -324,8 +324,8 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
 ; SSE-NEXT:    [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[RV152:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE-NEXT:    ret <16 x i16> [[RV152]]
+; SSE-NEXT:    [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE-NEXT:    ret <16 x i16> [[RV151]]
 ;
 ; SLM-LABEL: @test_v16i16(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll
index 3d165eb00b90d..c123956b8856e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll
@@ -152,8 +152,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
 ; SSE-NEXT:    [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    ret <4 x double> [[R032]]
+; SSE-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    ret <4 x double> [[R031]]
 ;
 ; SLM-LABEL: @test_v4f64(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
@@ -162,8 +162,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
 ; SLM-NEXT:    [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SLM-NEXT:    ret <4 x double> [[R032]]
+; SLM-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SLM-NEXT:    ret <4 x double> [[R031]]
 ;
 ; AVX-LABEL: @test_v4f64(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -204,8 +204,8 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
 ; SLM-NEXT:    [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R072:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SLM-NEXT:    ret <8 x float> [[R072]]
+; SLM-NEXT:    [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT:    ret <8 x float> [[R071]]
 ;
 ; AVX-LABEL: @test_v8f32(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
@@ -324,8 +324,8 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
 ; SSE-NEXT:    [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[RV152:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE-NEXT:    ret <16 x i16> [[RV152]]
+; SSE-NEXT:    [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE-NEXT:    ret <16 x i16> [[RV151]]
 ;
 ; SLM-LABEL: @test_v16i16(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
index 9ff8142b269df..ffd320d34e869 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
@@ -152,8 +152,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
 ; SSE-NEXT:    [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SSE-NEXT:    ret <4 x double> [[R032]]
+; SSE-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SSE-NEXT:    ret <4 x double> [[R031]]
 ;
 ; SLM-LABEL: @test_v4f64(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
@@ -162,8 +162,8 @@ define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 2, i32 6>
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 3, i32 7>
 ; SLM-NEXT:    [[TMP6:%.*]] = fsub <2 x double> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R032:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; SLM-NEXT:    ret <4 x double> [[R032]]
+; SLM-NEXT:    [[R031:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; SLM-NEXT:    ret <4 x double> [[R031]]
 ;
 ; AVX-LABEL: @test_v4f64(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
@@ -204,8 +204,8 @@ define <8 x float> @test_v8f32(<8 x float> %a, <8 x float> %b) {
 ; SLM-NEXT:    [[TMP4:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 4, i32 6, i32 12, i32 14>
 ; SLM-NEXT:    [[TMP5:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <4 x i32> <i32 5, i32 7, i32 13, i32 15>
 ; SLM-NEXT:    [[TMP6:%.*]] = fsub <4 x float> [[TMP4]], [[TMP5]]
-; SLM-NEXT:    [[R072:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; SLM-NEXT:    ret <8 x float> [[R072]]
+; SLM-NEXT:    [[R071:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; SLM-NEXT:    ret <8 x float> [[R071]]
 ;
 ; AVX-LABEL: @test_v8f32(
 ; AVX-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
@@ -324,8 +324,8 @@ define <16 x i16> @test_v16i16(<16 x i16> %a, <16 x i16> %b) {
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <16 x i16> [[A]], <16 x i16> [[B]], <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 25, i32 27, i32 29, i32 31>
 ; SSE-NEXT:    [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]]
-; SSE-NEXT:    [[RV152:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; SSE-NEXT:    ret <16 x i16> [[RV152]]
+; SSE-NEXT:    [[RV151:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP6]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; SSE-NEXT:    ret <16 x i16> [[RV151]]
 ;
 ; SLM-LABEL: @test_v16i16(
 ; SLM-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A:%.*]], <16 x i16> [[B:%.*]], <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 16, i32 18, i32 20, i32 22, i32 8, i32 10, i32 12, i32 14, i32 24, i32 26, i32 28, i32 30>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll b/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll
index 7974bce183d45..ed58f407628cd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/insertvalue.ll
@@ -275,12 +275,12 @@ define void @julia_load_struct_of_float(%pseudovec* %a, %pseudovec* %b, %pseudov
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[C_STRUCT0:%.*]] = insertvalue [[PSEUDOVEC]] undef, float [[TMP5]], 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP4]], i32 1
-; CHECK-NEXT:    [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct0, float [[TMP6]], 1
+; CHECK-NEXT:    [[C_STRUCT1:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT0]], float [[TMP6]], 1
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x float> [[TMP4]], i32 2
-; CHECK-NEXT:    [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct1, float [[TMP7]], 2
+; CHECK-NEXT:    [[C_STRUCT2:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT1]], float [[TMP7]], 2
 ; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x float> [[TMP4]], i32 3
-; CHECK-NEXT:    [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] %c_struct2, float [[TMP8]], 3
-; CHECK-NEXT:    store [[PSEUDOVEC]] %c_struct3, %pseudovec* [[C:%.*]], align 4
+; CHECK-NEXT:    [[C_STRUCT3:%.*]] = insertvalue [[PSEUDOVEC]] [[C_STRUCT2]], float [[TMP8]], 3
+; CHECK-NEXT:    store [[PSEUDOVEC]] [[C_STRUCT3]], %pseudovec* [[C:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
 top:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
index 7956ab3411e64..5bfd4f0f5fcf7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/metadata.ll
@@ -8,12 +8,12 @@ define void @test1(double* %a, double* %b, double* %c) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !tbaa !0
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !tbaa [[TBAA0:![0-9]+]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !tbaa !0
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]], !fpmath !4
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[C:%.*]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8, !tbaa !0
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -35,13 +35,13 @@ define void @test2(double* %a, double* %b, i8* %e) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast double* [[A:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !tbaa !0
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast double* [[B:%.*]] to <2 x double>*
-; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !tbaa !0
+; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x double>, <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fmul <2 x double> [[TMP1]], [[TMP3]], !fpmath !5
 ; CHECK-NEXT:    [[C:%.*]] = bitcast i8* [[E:%.*]] to double*
 ; CHECK-NEXT:    [[TMP5:%.*]] = bitcast double* [[C]] to <2 x double>*
-; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8, !tbaa !0
+; CHECK-NEXT:    store <2 x double> [[TMP4]], <2 x double>* [[TMP5]], align 8, !tbaa [[TBAA0]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -61,7 +61,6 @@ entry:
   ret void
 }
 
-;CHECK-DAG: !0 = !{[[TYPEC:!.*]], [[TYPEC]], i64 0}
 ;CHECK-DAG: !4 = !{float 5.000000e+00}
 ;CHECK-DAG: !5 = !{float 2.500000e+00}
 !0 = !{ float 5.0 }
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll
index b0cc02b649f04..1b224cb4109c1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi_block.ll
@@ -24,9 +24,11 @@ define i32 @bar(double* nocapture %A, i32 %d) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = fptrunc <2 x double> [[TMP2]] to <2 x float>
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[D:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP4]], label [[TMP7:%.*]], label [[TMP5:%.*]]
-; CHECK:         [[TMP6:%.*]] = tail call i32 (...) @foo()
+; CHECK:       5:
+; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 (...) @foo()
 ; CHECK-NEXT:    br label [[TMP7]]
-; CHECK:         [[TMP8:%.*]] = fadd <2 x float> [[TMP3]], <float 4.000000e+00, float 5.000000e+00>
+; CHECK:       7:
+; CHECK-NEXT:    [[TMP8:%.*]] = fadd <2 x float> [[TMP3]], <float 4.000000e+00, float 5.000000e+00>
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds double, double* [[A]], i64 8
 ; CHECK-NEXT:    [[TMP10:%.*]] = fpext <2 x float> [[TMP8]] to <2 x double>
 ; CHECK-NEXT:    [[TMP11:%.*]] = fadd <2 x double> [[TMP10]], <double 9.000000e+00, double 5.000000e+00>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
index 468cabc598f1c..c0ed976fd702f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll
@@ -12,10 +12,10 @@ define void @test_add_sdiv(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i3
 ; CHECK-NEXT:    [[GEP2_1:%.*]] = getelementptr i32, i32* [[ARR2]], i32 1
 ; CHECK-NEXT:    [[GEP2_2:%.*]] = getelementptr i32, i32* [[ARR2]], i32 2
 ; CHECK-NEXT:    [[GEP2_3:%.*]] = getelementptr i32, i32* [[ARR2]], i32 3
-; CHECK-NEXT:    [[V0:%.*]] = load i32, i32* [[GEP1_0]]
-; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[GEP1_1]]
-; CHECK-NEXT:    [[V2:%.*]] = load i32, i32* [[GEP1_2]]
-; CHECK-NEXT:    [[V3:%.*]] = load i32, i32* [[GEP1_3]]
+; CHECK-NEXT:    [[V0:%.*]] = load i32, i32* [[GEP1_0]], align 4
+; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[GEP1_1]], align 4
+; CHECK-NEXT:    [[V2:%.*]] = load i32, i32* [[GEP1_2]], align 4
+; CHECK-NEXT:    [[V3:%.*]] = load i32, i32* [[GEP1_3]], align 4
 ; CHECK-NEXT:    [[Y0:%.*]] = add nsw i32 [[A0:%.*]], 1146
 ; CHECK-NEXT:    [[Y1:%.*]] = add nsw i32 [[A1:%.*]], 146
 ; CHECK-NEXT:    [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
@@ -24,10 +24,10 @@ define void @test_add_sdiv(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i3
 ; CHECK-NEXT:    [[RES1:%.*]] = add nsw i32 [[V1]], [[Y1]]
 ; CHECK-NEXT:    [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]]
 ; CHECK-NEXT:    [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]]
-; CHECK-NEXT:    store i32 [[RES0]], i32* [[GEP2_0]]
-; CHECK-NEXT:    store i32 [[RES1]], i32* [[GEP2_1]]
-; CHECK-NEXT:    store i32 [[RES2]], i32* [[GEP2_2]]
-; CHECK-NEXT:    store i32 [[RES3]], i32* [[GEP2_3]]
+; CHECK-NEXT:    store i32 [[RES0]], i32* [[GEP2_0]], align 4
+; CHECK-NEXT:    store i32 [[RES1]], i32* [[GEP2_1]], align 4
+; CHECK-NEXT:    store i32 [[RES2]], i32* [[GEP2_2]], align 4
+; CHECK-NEXT:    store i32 [[RES3]], i32* [[GEP2_3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -76,10 +76,10 @@ define void @test_urem_add(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i3
 ; CHECK-NEXT:    [[GEP2_1:%.*]] = getelementptr i32, i32* [[ARR2]], i32 1
 ; CHECK-NEXT:    [[GEP2_2:%.*]] = getelementptr i32, i32* [[ARR2]], i32 2
 ; CHECK-NEXT:    [[GEP2_3:%.*]] = getelementptr i32, i32* [[ARR2]], i32 3
-; CHECK-NEXT:    [[V0:%.*]] = load i32, i32* [[GEP1_0]]
-; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[GEP1_1]]
-; CHECK-NEXT:    [[V2:%.*]] = load i32, i32* [[GEP1_2]]
-; CHECK-NEXT:    [[V3:%.*]] = load i32, i32* [[GEP1_3]]
+; CHECK-NEXT:    [[V0:%.*]] = load i32, i32* [[GEP1_0]], align 4
+; CHECK-NEXT:    [[V1:%.*]] = load i32, i32* [[GEP1_1]], align 4
+; CHECK-NEXT:    [[V2:%.*]] = load i32, i32* [[GEP1_2]], align 4
+; CHECK-NEXT:    [[V3:%.*]] = load i32, i32* [[GEP1_3]], align 4
 ; CHECK-NEXT:    [[Y0:%.*]] = add nsw i32 [[A0:%.*]], 1146
 ; CHECK-NEXT:    [[Y1:%.*]] = add nsw i32 [[A1:%.*]], 146
 ; CHECK-NEXT:    [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
@@ -88,10 +88,10 @@ define void @test_urem_add(i32 *%arr1, i32 *%arr2, i32 %a0, i32 %a1, i32 %a2, i3
 ; CHECK-NEXT:    [[RES1:%.*]] = urem i32 [[V1]], [[Y1]]
 ; CHECK-NEXT:    [[RES2:%.*]] = urem i32 [[V2]], [[Y2]]
 ; CHECK-NEXT:    [[RES3:%.*]] = add nsw i32 [[V3]], [[Y3]]
-; CHECK-NEXT:    store i32 [[RES0]], i32* [[GEP2_0]]
-; CHECK-NEXT:    store i32 [[RES1]], i32* [[GEP2_1]]
-; CHECK-NEXT:    store i32 [[RES2]], i32* [[GEP2_2]]
-; CHECK-NEXT:    store i32 [[RES3]], i32* [[GEP2_3]]
+; CHECK-NEXT:    store i32 [[RES0]], i32* [[GEP2_0]], align 4
+; CHECK-NEXT:    store i32 [[RES1]], i32* [[GEP2_1]], align 4
+; CHECK-NEXT:    store i32 [[RES2]], i32* [[GEP2_2]], align 4
+; CHECK-NEXT:    store i32 [[RES3]], i32* [[GEP2_3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
index ceb48d98b744d..42c99c21c3a30 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr16899.ll
@@ -9,10 +9,10 @@ target triple = "i386--netbsd"
 define i32 @fn1() #0 {
 ; CHECK-LABEL: @fn1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** @a, align 4, !tbaa !0
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa !4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** @a, align 4, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA4:![0-9]+]]
 ; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4, !tbaa !4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4, !tbaa [[TBAA4]]
 ; CHECK-NEXT:    br label [[DO_BODY:%.*]]
 ; CHECK:       do.body:
 ; CHECK-NEXT:    [[C_0:%.*]] = phi i32 [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ADD2:%.*]], [[DO_BODY]] ]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
index 9132cf26c2218..681d30dff4a86 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction.ll
@@ -81,11 +81,11 @@ define i32 @horiz_max_multiple_uses([32 x i32]* %x, i32* %p) {
 ; CHECK-NEXT:    [[T4:%.*]] = load i32, i32* [[X4]], align 4
 ; CHECK-NEXT:    [[T5:%.*]] = load i32, i32* [[X5]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], [[T4]]
-; CHECK-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 [[T4]]
-; CHECK-NEXT:    [[C012345:%.*]] = icmp sgt i32 [[TMP5]], [[T5]]
-; CHECK-NEXT:    [[T17:%.*]] = select i1 [[C012345]], i32 [[TMP5]], i32 [[T5]]
-; CHECK-NEXT:    [[THREE_OR_FOUR:%.*]] = select i1 [[TMP4]], i32 3, i32 4
+; CHECK-NEXT:    [[MAX_ROOT_CMP:%.*]] = icmp sgt i32 [[TMP3]], [[T4]]
+; CHECK-NEXT:    [[MAX_ROOT_SEL:%.*]] = select i1 [[MAX_ROOT_CMP]], i32 [[TMP3]], i32 [[T4]]
+; CHECK-NEXT:    [[C012345:%.*]] = icmp sgt i32 [[MAX_ROOT_SEL]], [[T5]]
+; CHECK-NEXT:    [[T17:%.*]] = select i1 [[C012345]], i32 [[MAX_ROOT_SEL]], i32 [[T5]]
+; CHECK-NEXT:    [[THREE_OR_FOUR:%.*]] = select i1 [[MAX_ROOT_CMP]], i32 3, i32 4
 ; CHECK-NEXT:    store i32 [[THREE_OR_FOUR]], i32* [[P:%.*]], align 8
 ; CHECK-NEXT:    ret i32 [[T17]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll b/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
index ed2cd870cf6f0..930315a55dc08 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/remark_listcost.ll
@@ -5,21 +5,23 @@
 define void @vsub2_test(i32* %pin1, i32* %pin2, i32* %pout) #0 {
 ; CHECK-LABEL: @vsub2_test(
 ; CHECK-NEXT:    br label [[TMP1:%.*]]
-; CHECK:         [[IDX_04:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[TMP1]] ]
+; CHECK:       1:
+; CHECK-NEXT:    [[IDX_04:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[TMP8:%.*]], [[TMP1]] ]
 ; CHECK-NEXT:    [[PO_03:%.*]] = phi i32* [ [[POUT:%.*]], [[TMP0]] ], [ [[TMP7:%.*]], [[TMP1]] ]
 ; CHECK-NEXT:    [[PTMPI2_02:%.*]] = phi i32* [ [[PIN2:%.*]], [[TMP0]] ], [ [[TMP4:%.*]], [[TMP1]] ]
 ; CHECK-NEXT:    [[PTMPI1_01:%.*]] = phi i32* [ [[PIN1:%.*]], [[TMP0]] ], [ [[TMP2:%.*]], [[TMP1]] ]
 ; CHECK-NEXT:    [[TMP2]] = getelementptr inbounds i32, i32* [[PTMPI1_01]], i64 1
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[PTMPI1_01]], align 4, !tbaa !1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[PTMPI1_01]], align 4, !tbaa [[TBAA1:![0-9]+]]
 ; CHECK-NEXT:    [[TMP4]] = getelementptr inbounds i32, i32* [[PTMPI2_02]], i64 1
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[PTMPI2_02]], align 4, !tbaa !1
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[PTMPI2_02]], align 4, !tbaa [[TBAA1]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw i32 [[TMP3]], [[TMP5]]
 ; CHECK-NEXT:    [[TMP7]] = getelementptr inbounds i32, i32* [[PO_03]], i64 1
-; CHECK-NEXT:    store i32 [[TMP6]], i32* [[PO_03]], align 4, !tbaa !1
+; CHECK-NEXT:    store i32 [[TMP6]], i32* [[PO_03]], align 4, !tbaa [[TBAA1]]
 ; CHECK-NEXT:    [[TMP8]] = add nuw nsw i32 [[IDX_04]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TMP8]], 64
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[TMP9:%.*]], label [[TMP1]], !llvm.loop !5
-; CHECK:         ret void
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[TMP9:%.*]], label [[TMP1]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       9:
+; CHECK-NEXT:    ret void
 ;
   br label %1
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll
index 3b5a53e44dec1..fefbbf1c75fd3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/undef_vect.ll
@@ -17,11 +17,11 @@ define void @_Z2azv() local_unnamed_addr {
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[DOTSROA_CAST_4]] to <8 x i32>*
 ; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP0]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp sgt i32 [[TMP2]], undef
-; CHECK-NEXT:    [[OP_EXTRA:%.*]] = select i1 [[TMP3]], i32 [[TMP2]], i32 undef
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[OP_EXTRA]], undef
-; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[TMP4]], i32 [[OP_EXTRA]], i32 undef
-; CHECK-NEXT:    [[DOTSROA_SPECULATED_9:%.*]] = select i1 undef, i32 undef, i32 [[OP_EXTRA1]]
+; CHECK-NEXT:    [[OP_EXTRA:%.*]] = icmp sgt i32 [[TMP2]], undef
+; CHECK-NEXT:    [[OP_EXTRA1:%.*]] = select i1 [[OP_EXTRA]], i32 [[TMP2]], i32 undef
+; CHECK-NEXT:    [[OP_EXTRA2:%.*]] = icmp sgt i32 [[OP_EXTRA1]], undef
+; CHECK-NEXT:    [[OP_EXTRA3:%.*]] = select i1 [[OP_EXTRA2]], i32 [[OP_EXTRA1]], i32 undef
+; CHECK-NEXT:    [[DOTSROA_SPECULATED_9:%.*]] = select i1 undef, i32 undef, i32 [[OP_EXTRA3]]
 ; CHECK-NEXT:    [[CMP_I1_10:%.*]] = icmp slt i32 [[DOTSROA_SPECULATED_9]], undef
 ; CHECK-NEXT:    ret void
 ;

From 6693c562f90925dd5214d86875a6f82ca5c6ef93 Mon Sep 17 00:00:00 2001
From: wlei <wlei@fb.com>
Date: Mon, 24 Jan 2022 16:55:05 -0800
Subject: [PATCH 478/946] [llvm-profgen] Support to load debug info from a
 second binary

For reducing binary size purpose, the binary's debug info and executable segment can be separated(like using objcopy --only-keep-debug). Here add support in llvm-profgen to use two binaries as input. The original one is executable binary and added for debug info only binary. Adding a flag `--debug-binary=file-path`, with this, the binary will load debug info from debug binary.

Reviewed By: hoy, wenlei

Differential Revision: https://reviews.llvm.org/D115948
---
 .../separate-debuginfo-binary.test            | 57 +++++++++++++++++++
 llvm/tools/llvm-profgen/ProfiledBinary.cpp    | 19 +++++--
 llvm/tools/llvm-profgen/ProfiledBinary.h      | 12 +++-
 llvm/tools/llvm-profgen/llvm-profgen.cpp      | 14 +++--
 4 files changed, 90 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/tools/llvm-profgen/separate-debuginfo-binary.test

diff --git a/llvm/test/tools/llvm-profgen/separate-debuginfo-binary.test b/llvm/test/tools/llvm-profgen/separate-debuginfo-binary.test
new file mode 100644
index 0000000000000..3daeac33471ce
--- /dev/null
+++ b/llvm/test/tools/llvm-profgen/separate-debuginfo-binary.test
@@ -0,0 +1,57 @@
+; RUN: llvm-objcopy --strip-debug %S/Inputs/inline-noprobe.perfbin %t1
+; RUN: llvm-objcopy --only-keep-debug %S/Inputs/inline-noprobe.perfbin %t2
+; RUN: echo -e "0\n0" > %t
+; RUN: llvm-profgen --format=text --unsymbolized-profile=%t --binary=%t1 --debug-binary=%t2 --output=%t3 --fill-zero-for-all-funcs
+; RUN: FileCheck %s --input-file %t3 --check-prefix=CHECK
+
+; RUN: llvm-objcopy --strip-debug %S/Inputs/inline-cs-pseudoprobe.perfbin %t4
+; RUN: llvm-objcopy --only-keep-debug %S/Inputs/inline-cs-pseudoprobe.perfbin %t5
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/inline-cs-pseudoprobe.perfscript --binary=%t4 --debug-binary=%t5 --output=%t6 --profile-summary-hot-count=0 --csspgo-preinliner=0
+; RUN: FileCheck %s --input-file %t6 --check-prefix=CHECK-CS-PROBE
+
+; CHECK: bar:0:0
+; CHECK:  1: 0
+; CHECK:  5: 0
+; CHECK: foo:0:0
+; CHECK:  0: 0
+; CHECK:  2.1: 0
+; CHECK:  3: 0
+; CHECK:  3.2: 0
+; CHECK:  4: 0
+; CHECK:  3.1: bar:0
+; CHECK:   1: 0
+; CHECK:   65533: 0
+; CHECK:  3.2: bar:0
+; CHECK:   1: 0
+; CHECK:   7: 0
+; CHECK: main:0:0
+; CHECK:  0: 0
+; CHECK:  2: 0
+; CHECK:  1: foo:0
+; CHECK:   2.1: 0
+; CHECK:   3: 0
+; CHECK:   3.2: 0
+; CHECK:   4: 0
+; CHECK:   65526: 0
+; CHECK:   3.1: bar:0
+; CHECK:    1: 0
+; CHECK:    65533: 0
+; CHECK:   3.2: bar:0
+; CHECK:    1: 0
+
+
+; CHECK-CS-PROBE: [main:2 @ foo]:74:0
+; CHECK-CS-PROBE:   1: 0
+; CHECK-CS-PROBE:   2: 15
+; CHECK-CS-PROBE:   3: 15
+; CHECK-CS-PROBE:   4: 14
+; CHECK-CS-PROBE:   5: 1
+; CHECK-CS-PROBE:   6: 15
+; CHECK-CS-PROBE:   7: 0
+; CHECK-CS-PROBE:   8: 14 bar:14
+; CHECK-CS-PROBE:   9: 0
+; CHECK-CS-PROBE: !CFGChecksum: 563088904013236
+; CHECK-CS-PROBE: [main:2 @ foo:8 @ bar]:28:14
+; CHECK-CS-PROBE:   1: 14
+; CHECK-CS-PROBE:   4: 14
+; CHECK-CS-PROBE: !CFGChecksum: 72617220756
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 6dc0d2604367d..a773a3c98d409 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -187,9 +187,9 @@ void ProfiledBinary::warnNoFuncEntry() {
 void ProfiledBinary::load() {
   // Attempt to open the binary.
   OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
-  Binary &Binary = *OBinary.getBinary();
+  Binary &ExeBinary = *OBinary.getBinary();
 
-  auto *Obj = dyn_cast<ELFObjectFileBase>(&Binary);
+  auto *Obj = dyn_cast<ELFObjectFileBase>(&ExeBinary);
   if (!Obj)
     exitWithError("not a valid Elf image", Path);
 
@@ -206,7 +206,15 @@ void ProfiledBinary::load() {
   decodePseudoProbe(Obj);
 
   // Load debug info of subprograms from DWARF section.
-  loadSymbolsFromDWARF(*cast<ObjectFile>(&Binary));
+  // If path of debug info binary is specified, use the debug info from it,
+  // otherwise use the debug info from the executable binary.
+  if (!DebugBinaryPath.empty()) {
+    OwningBinary<Binary> DebugPath =
+        unwrapOrError(createBinary(DebugBinaryPath), DebugBinaryPath);
+    loadSymbolsFromDWARF(*dyn_cast<ObjectFile>(DebugPath.getBinary()));
+  } else {
+    loadSymbolsFromDWARF(*dyn_cast<ObjectFile>(&ExeBinary));
+  }
 
   // Disassemble the text sections.
   disassemble(Obj);
@@ -684,8 +692,9 @@ SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
          "Binary should only symbolize its own instruction");
   auto Addr = object::SectionedAddress{IP.Offset + getPreferredBaseAddress(),
                                        object::SectionedAddress::UndefSection};
-  DIInliningInfo InlineStack =
-      unwrapOrError(Symbolizer->symbolizeInlinedCode(Path, Addr), getName());
+  DIInliningInfo InlineStack = unwrapOrError(
+      Symbolizer->symbolizeInlinedCode(SymbolizerPath.str(), Addr),
+      SymbolizerPath);
 
   SampleContextFrameVector CallStack;
   for (int32_t I = InlineStack.getNumberOfFrames() - 1; I >= 0; I--) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index b5c985fe6ebbc..d3d1c6f1fd248 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -185,8 +185,12 @@ class BinarySizeContextTracker {
 using OffsetRange = std::pair<uint64_t, uint64_t>;
 
 class ProfiledBinary {
-  // Absolute path of the binary.
+  // Absolute path of the executable binary.
   std::string Path;
+  // Path of the debug info binary.
+  std::string DebugBinaryPath;
+  // Path of symbolizer path which should be pointed to binary with debug info.
+  StringRef SymbolizerPath;
   // The target triple.
   Triple TheTriple;
   // The runtime base address that the first executable segment is loaded at.
@@ -311,10 +315,12 @@ class ProfiledBinary {
   void load();
 
 public:
-  ProfiledBinary(const StringRef Path)
-      : Path(Path), ProEpilogTracker(this),
+  ProfiledBinary(const StringRef ExeBinPath, const StringRef DebugBinPath)
+      : Path(ExeBinPath), DebugBinaryPath(DebugBinPath), ProEpilogTracker(this),
         TrackFuncContextSize(EnableCSPreInliner &&
                              UseContextCostForPreInliner) {
+    // Point to executable binary if debug info binary is not specified.
+    SymbolizerPath = DebugBinPath.empty() ? ExeBinPath : DebugBinPath;
     setupSymbolizer();
     load();
   }
diff --git a/llvm/tools/llvm-profgen/llvm-profgen.cpp b/llvm/tools/llvm-profgen/llvm-profgen.cpp
index 0ab93ca0bd65b..f092df04d52b3 100644
--- a/llvm/tools/llvm-profgen/llvm-profgen.cpp
+++ b/llvm/tools/llvm-profgen/llvm-profgen.cpp
@@ -48,9 +48,15 @@ static cl::opt<std::string> UnsymbolizedProfFilename(
 static cl::alias UPA("up", cl::desc("Alias for --unsymbolized-profile"),
                      cl::aliasopt(UnsymbolizedProfFilename));
 
-static cl::opt<std::string> BinaryPath(
-    "binary", cl::value_desc("binary"), cl::Required,
-    cl::desc("Path of profiled binary, only one binary is supported."),
+static cl::opt<std::string>
+    BinaryPath("binary", cl::value_desc("binary"), cl::Required,
+               cl::desc("Path of profiled executable binary."),
+               cl::cat(ProfGenCategory));
+
+static cl::opt<std::string> DebugBinPath(
+    "debug-binary", cl::value_desc("debug-binary"), cl::ZeroOrMore,
+    cl::desc("Path of debug info binary, llvm-profgen will load the DWARF info "
+             "from it instead of the executable binary."),
     cl::cat(ProfGenCategory));
 
 extern cl::opt<bool> ShowDisassemblyOnly;
@@ -135,7 +141,7 @@ int main(int argc, const char *argv[]) {
 
   // Load symbols and disassemble the code of a given binary.
   std::unique_ptr<ProfiledBinary> Binary =
-      std::make_unique<ProfiledBinary>(BinaryPath);
+      std::make_unique<ProfiledBinary>(BinaryPath, DebugBinPath);
   if (ShowDisassemblyOnly)
     return EXIT_SUCCESS;
 

From 8b29b84c99ac8140c9820fd34b733bdedf5bb0f5 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer <arthur.j.odwyer@gmail.com>
Date: Sat, 22 Jan 2022 15:13:13 -0500
Subject: [PATCH 479/946] [libc++] Fix LWG3422 "Issues of seed_seq's
 constructors"

https://cplusplus.github.io/LWG/issue3422

Also add a static_assert to check the "Mandates:" on the
iterator-pair constructor. Oddly, the `InputIterator` parameter
itself is merely preconditioned, not constrained, to satisfy the
input iterator requirements.

Also drive-by rename `init` to `__init`.

Differential Revision: https://reviews.llvm.org/D117962
---
 libcxx/docs/Status/Cxx2bIssues.csv            |  2 +-
 libcxx/include/__random/seed_seq.h            | 31 ++++++++------
 .../rand.util.seedseq/default.pass.cpp        |  3 ++
 .../rand.util.seedseq/iterator.pass.cpp       | 42 ++++++++++++++++++-
 .../rand.util.seedseq/iterator.verify.cpp     | 30 +++++++++++++
 5 files changed, 93 insertions(+), 15 deletions(-)
 create mode 100644 libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.verify.cpp

diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index 9baefabe05da8..0ce504816fe3b 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -108,7 +108,7 @@
 `3361 <https://wg21.link/LWG3361>`__,"``safe_range<SomeRange&>`` case","October 2021","","","|ranges|"
 `3392 <https://wg21.link/LWG3392>`__,"``ranges::distance()`` cannot be used on a move-only iterator with a sized sentinel","October 2021","","","|ranges|"
 `3407 <https://wg21.link/LWG3407>`__,"Some problems with the wording changes of P1739R4","October 2021","","","|ranges|"
-`3422 <https://wg21.link/LWG3422>`__,"Issues of ``seed_seq``'s constructors","October 2021","",""
+`3422 <https://wg21.link/LWG3422>`__,"Issues of ``seed_seq``'s constructors","October 2021","|Complete|","14.0"
 `3470 <https://wg21.link/LWG3470>`__,"``convertible-to-non-slicing`` seems to reject valid case","October 2021","","","|ranges|"
 `3480 <https://wg21.link/LWG3480>`__,"``directory_iterator`` and ``recursive_directory_iterator`` are not C++20 ranges","October 2021","|Complete|","14.0","|ranges|"
 `3498 <https://wg21.link/LWG3498>`__,"Inconsistent ``noexcept``-specifiers for ``basic_syncbuf``","October 2021","",""
diff --git a/libcxx/include/__random/seed_seq.h b/libcxx/include/__random/seed_seq.h
index bf27af6627a54..1a0877995650e 100644
--- a/libcxx/include/__random/seed_seq.h
+++ b/libcxx/include/__random/seed_seq.h
@@ -31,25 +31,24 @@ class _LIBCPP_TEMPLATE_VIS seed_seq
     // types
     typedef uint32_t result_type;
 
-private:
-    vector<result_type> __v_;
-
-    template<class _InputIterator>
-        void init(_InputIterator __first, _InputIterator __last);
-public:
     // constructors
     _LIBCPP_INLINE_VISIBILITY
     seed_seq() _NOEXCEPT {}
 #ifndef _LIBCPP_CXX03_LANG
-    template<class _Tp>
-        _LIBCPP_INLINE_VISIBILITY
-        seed_seq(initializer_list<_Tp> __il) {init(__il.begin(), __il.end());}
+    template<class _Tp, __enable_if_t<is_integral<_Tp>::value>* = nullptr>
+    _LIBCPP_INLINE_VISIBILITY
+    seed_seq(initializer_list<_Tp> __il) {
+        __init(__il.begin(), __il.end());
+    }
 #endif // _LIBCPP_CXX03_LANG
 
     template<class _InputIterator>
-        _LIBCPP_INLINE_VISIBILITY
-        seed_seq(_InputIterator __first, _InputIterator __last)
-             {init(__first, __last);}
+    _LIBCPP_INLINE_VISIBILITY
+    seed_seq(_InputIterator __first, _InputIterator __last) {
+        static_assert(is_integral<typename iterator_traits<_InputIterator>::value_type>::value,
+            "Mandates: iterator_traits<InputIterator>::value_type is an integer type");
+        __init(__first, __last);
+    }
 
     // generating functions
     template<class _RandomAccessIterator>
@@ -68,11 +67,17 @@ class _LIBCPP_TEMPLATE_VIS seed_seq
 
     _LIBCPP_INLINE_VISIBILITY
     static result_type _Tp(result_type __x) {return __x ^ (__x >> 27);}
+
+private:
+    template<class _InputIterator>
+    void __init(_InputIterator __first, _InputIterator __last);
+
+    vector<result_type> __v_;
 };
 
 template<class _InputIterator>
 void
-seed_seq::init(_InputIterator __first, _InputIterator __last)
+seed_seq::__init(_InputIterator __first, _InputIterator __last)
 {
     for (_InputIterator __s = __first; __s != __last; ++__s)
         __v_.push_back(*__s & 0xFFFFFFFF);
diff --git a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/default.pass.cpp b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/default.pass.cpp
index 33f855bab30c3..c99d5e276b2b5 100644
--- a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/default.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/default.pass.cpp
@@ -19,8 +19,11 @@
 
 int main(int, char**)
 {
+  ASSERT_NOEXCEPT(std::seed_seq());
+  {
     std::seed_seq s;
     assert(s.size() == 0);
+  }
 
   return 0;
 }
diff --git a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.pass.cpp b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.pass.cpp
index 2e2c6365eb4f0..1dd9a055f7ca3 100644
--- a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.pass.cpp
@@ -18,11 +18,13 @@
 
 #include "test_macros.h"
 
-int main(int, char**)
+void test()
 {
+  {
     unsigned a[5] = {5, 4, 3, 2, 1};
     std::seed_seq s(a, a+5);
     assert(s.size() == 5);
+
     unsigned b[5] = {0};
     s.param(b);
     assert(b[0] == 5);
@@ -30,6 +32,44 @@ int main(int, char**)
     assert(b[2] == 3);
     assert(b[3] == 2);
     assert(b[4] == 1);
+  }
+  {
+    // Test truncation to 32 bits
+    unsigned long long a[4] = {
+      0x1234000056780000uLL,
+      0x0000001234567800uLL,
+      0xFFFFFFFFFFFFFFFFuLL,
+      0x0000000180000000uLL,
+    };
+    std::seed_seq s(a, a+4);
+    assert(s.size() == 4);
+
+    unsigned b[4] = {0};
+    s.param(b);
+    assert(b[0] == 0x56780000u);
+    assert(b[1] == 0x34567800u);
+    assert(b[2] == 0xFFFFFFFFu);
+    assert(b[3] == 0x80000000u);
+  }
+#if TEST_STD_VER >= 11
+  {
+    // Test uniform initialization syntax (LWG 3422)
+    unsigned a[3] = {1, 2, 3};
+    std::seed_seq s{a, a+3};  // uniform initialization
+    assert(s.size() == 3);
+
+    unsigned b[3] = {0};
+    s.param(b);
+    assert(b[0] == 1);
+    assert(b[1] == 2);
+    assert(b[2] == 3);
+  }
+#endif // TEST_STD_VER >= 11
+}
+
+int main(int, char**)
+{
+  test();
 
   return 0;
 }
diff --git a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.verify.cpp b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.verify.cpp
new file mode 100644
index 0000000000000..d5c57841c8fa2
--- /dev/null
+++ b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/iterator.verify.cpp
@@ -0,0 +1,30 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <random>
+
+// class seed_seq;
+
+// template<class InputIterator>
+//   seed_seq(InputIterator begin, InputIterator end);
+// Mandates: iterator_traits<InputIterator>::value_type is an integer type.
+
+#include <random>
+
+void test()
+{
+  {
+    bool a[2] = {true, false};
+    std::seed_seq s(a, a+2); // OK
+  }
+  {
+    double a[2] = {1, 2};
+    std::seed_seq s(a, a+2); // expected-error@*:* {{Mandates: iterator_traits<InputIterator>::value_type is an integer type}}
+        // expected-error@*:* {{invalid operands to binary expression ('double' and 'unsigned int')}}
+  }
+}

From 16bff06790a7652b282352b07250b627e5787c8c Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Mon, 24 Jan 2022 17:18:18 -0800
Subject: [PATCH 480/946] [lldb] Make PythonDataObjects work with Python 2

I considered keeping this change strictly downstream. Since we still
have a bunch of places that check for Python 2, I figured it doesn't
harm to land it upstream and avoid the conflict when I eventually do
remove them (hopefully soon!).
---
 .../Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp  | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
index 32020f983f605..68f4e90d70f6e 100644
--- a/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
+++ b/lldb/source/Plugins/ScriptInterpreter/Python/PythonDataObjects.cpp
@@ -70,7 +70,9 @@ Expected<std::string> python::As<std::string>(Expected<PythonObject> &&obj) {
 }
 
 static bool python_is_finalizing() {
-#if PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 7
+#if PY_MAJOR_VERSION == 2
+  return false;
+#elif PY_MAJOR_VERSION == 3 && PY_MINOR_VERSION < 7
   return _Py_Finalizing != nullptr;
 #else
   return _Py_IsFinalizing();
@@ -279,7 +281,9 @@ PythonObject PythonObject::GetAttributeValue(llvm::StringRef attr) const {
 }
 
 StructuredData::ObjectSP PythonObject::CreateStructuredObject() const {
+#if PY_MAJOR_VERSION >= 3
   assert(PyGILState_Check());
+#endif
   switch (GetObjectType()) {
   case PyObjectType::Dictionary:
     return PythonDictionary(PyRefType::Borrowed, m_py_obj)

From 06cfdd5224bf5496e3d3dbdb9f73e77161ad5438 Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6@vols.utk.edu>
Date: Mon, 24 Jan 2022 14:02:20 -0500
Subject: [PATCH 481/946] [OpenMP][Fix] Properly inherit calling convention

Previously in OpenMPOpt we did not correctly inherit the calling
convention of the callee when creating new OpenMP runtime calls. This
created issues when the calling convention was changed during
`GlobalOpt` but a new call was creating without the correct calling
convention. This lead to the call being replaced with a poison value in
`InstCombine` due to undefined behaviour and causing large portions of
the program to be incorrectly eliminated. This patch correctly inherits
the existing calling convention from the callee.

Reviewed By: tianshilei1992, jdoerfert

Differential Revision: https://reviews.llvm.org/D118059
---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp      | 57 +++++++++++++++-------
 llvm/test/Transforms/OpenMP/spmdization.ll |  6 ++-
 2 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 416c3b8b01d40..87a1f7f7045ab 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -417,6 +417,12 @@ struct OMPInformationCache : public InformationCache {
       recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
   }
 
+  // Helper function to inherit the calling convention of the function callee.
+  void setCallingConvention(FunctionCallee Callee, CallInst *CI) {
+    if (Function *Fn = dyn_cast<Function>(Callee.getCallee()))
+      CI->setCallingConv(Fn->getCallingConv());
+  }
+
   /// Helper to initialize all runtime function information for those defined
   /// in OpenMPKinds.def.
   void initializeRuntimeFunctions() {
@@ -1531,6 +1537,7 @@ struct OpenMPOpt {
 
     CallInst *IssueCallsite =
         CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
+    OMPInfoCache.setCallingConvention(IssueDecl, IssueCallsite);
     RuntimeCall.eraseFromParent();
 
     // Add "wait" runtime call declaration:
@@ -1543,7 +1550,9 @@ struct OpenMPOpt {
             OffloadArray::DeviceIDArgNum), // device_id.
         Handle                             // handle to wait on.
     };
-    CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
+    CallInst *WaitCallsite = CallInst::Create(
+        WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
+    OMPInfoCache.setCallingConvention(WaitDecl, WaitCallsite);
 
     return true;
   }
@@ -3241,8 +3250,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
       FunctionCallee HardwareTidFn =
           OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
               M, OMPRTL___kmpc_get_hardware_thread_id_in_block);
-      Value *Tid =
+      CallInst *Tid =
           OMPInfoCache.OMPBuilder.Builder.CreateCall(HardwareTidFn, {});
+      Tid->setDebugLoc(DL);
+      OMPInfoCache.setCallingConvention(HardwareTidFn, Tid);
       Value *TidCheck = OMPInfoCache.OMPBuilder.Builder.CreateIsNull(Tid);
       OMPInfoCache.OMPBuilder.Builder
           .CreateCondBr(TidCheck, RegionStartBB, RegionBarrierBB)
@@ -3255,14 +3266,18 @@ struct AAKernelInfoFunction : AAKernelInfo {
               M, OMPRTL___kmpc_barrier_simple_spmd);
       OMPInfoCache.OMPBuilder.updateToLocation(InsertPointTy(
           RegionBarrierBB, RegionBarrierBB->getFirstInsertionPt()));
-      OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid})
-          ->setDebugLoc(DL);
+      CallInst *Barrier =
+          OMPInfoCache.OMPBuilder.Builder.CreateCall(BarrierFn, {Ident, Tid});
+      Barrier->setDebugLoc(DL);
+      OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
 
       // Second barrier ensures workers have read broadcast values.
-      if (HasBroadcastValues)
-        CallInst::Create(BarrierFn, {Ident, Tid}, "",
-                         RegionBarrierBB->getTerminator())
-            ->setDebugLoc(DL);
+      if (HasBroadcastValues) {
+        CallInst *Barrier = CallInst::Create(BarrierFn, {Ident, Tid}, "",
+                                             RegionBarrierBB->getTerminator());
+        Barrier->setDebugLoc(DL);
+        OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
+      }
     };
 
     auto &AllocSharedRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
@@ -3532,10 +3547,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
     FunctionCallee WarpSizeFn =
         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
             M, OMPRTL___kmpc_get_warp_size);
-    Instruction *BlockHwSize =
+    CallInst *BlockHwSize =
         CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB);
+    OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);
     BlockHwSize->setDebugLoc(DLoc);
-    Instruction *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);
+    CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);
+    OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);
     WarpSize->setDebugLoc(DLoc);
     Instruction *BlockSize =
         BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB);
@@ -3575,8 +3592,10 @@ struct AAKernelInfoFunction : AAKernelInfo {
     FunctionCallee BarrierFn =
         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
             M, OMPRTL___kmpc_barrier_simple_generic);
-    CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB)
-        ->setDebugLoc(DLoc);
+    CallInst *Barrier =
+        CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB);
+    OMPInfoCache.setCallingConvention(BarrierFn, Barrier);
+    Barrier->setDebugLoc(DLoc);
 
     if (WorkFnAI->getType()->getPointerAddressSpace() !=
         (unsigned int)AddressSpace::Generic) {
@@ -3592,8 +3611,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
     FunctionCallee KernelParallelFn =
         OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
             M, OMPRTL___kmpc_kernel_parallel);
-    Instruction *IsActiveWorker = CallInst::Create(
+    CallInst *IsActiveWorker = CallInst::Create(
         KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB);
+    OMPInfoCache.setCallingConvention(KernelParallelFn, IsActiveWorker);
     IsActiveWorker->setDebugLoc(DLoc);
     Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn",
                                        StateMachineBeginBB);
@@ -3673,10 +3693,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
                        StateMachineIfCascadeCurrentBB)
         ->setDebugLoc(DLoc);
 
-    CallInst::Create(OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
-                         M, OMPRTL___kmpc_kernel_end_parallel),
-                     {}, "", StateMachineEndParallelBB)
-        ->setDebugLoc(DLoc);
+    FunctionCallee EndParallelFn =
+        OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
+            M, OMPRTL___kmpc_kernel_end_parallel);
+    CallInst *EndParallel =
+        CallInst::Create(EndParallelFn, {}, "", StateMachineEndParallelBB);
+    OMPInfoCache.setCallingConvention(EndParallelFn, EndParallel);
+    EndParallel->setDebugLoc(DLoc);
     BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB)
         ->setDebugLoc(DLoc);
 
diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index 5051bce98279d..12ad3b831e3b8 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -1430,7 +1430,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*) to i32*
 ; AMDGPU-NEXT:    br label [[REGION_CHECK_TID:%.*]]
 ; AMDGPU:       region.check.tid:
-; AMDGPU-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; AMDGPU-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
 ; AMDGPU-NEXT:    br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; AMDGPU:       region.guarded:
@@ -1466,7 +1466,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*) to i32*
 ; NVPTX-NEXT:    br label [[REGION_CHECK_TID:%.*]]
 ; NVPTX:       region.check.tid:
-; NVPTX-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block()
+; NVPTX-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
 ; NVPTX-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
 ; NVPTX-NEXT:    br i1 [[TMP1]], label [[REGION_GUARDED:%.*]], label [[REGION_BARRIER:%.*]]
 ; NVPTX:       region.guarded:
@@ -2328,6 +2328,8 @@ entry:
   ret void
 }
 
+declare fastcc i32 @__kmpc_get_hardware_thread_id_in_block();
+
 attributes #0 = { alwaysinline convergent norecurse nounwind }
 attributes #1 = { argmemonly mustprogress nofree nosync nounwind willreturn }
 attributes #2 = { convergent }

From 5eb49009ebe6f1672e6c72f8ea1fe07d4018f682 Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6@vols.utk.edu>
Date: Mon, 24 Jan 2022 15:45:27 -0500
Subject: [PATCH 482/946] [OpenMP] Add more identifier to created shared
 globals

Currenly we push some variables to a global constant containing shared
memory as an optimization. This generated constant had internal linkage
and should not have collided with any known identifiers in the
translation unit. However, there have been observed cases of this
optimiztaion unintentionally colliding with undocumented PTX
identifiers. This patch adds a suffix to the created globals to
hopefully bypass this.

Depends on D118059

Reviewed By: tianshilei1992

Differential Revision: https://reviews.llvm.org/D118068
---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp         |  2 +-
 .../OpenMP/replace_globalization.ll           |  8 +--
 llvm/test/Transforms/OpenMP/spmdization.ll    | 56 +++++++++----------
 3 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 87a1f7f7045ab..6288d2ff4b01b 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -2779,7 +2779,7 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
       Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
       auto *SharedMem = new GlobalVariable(
           *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
-          UndefValue::get(Int8ArrTy), CB->getName(), nullptr,
+          UndefValue::get(Int8ArrTy), CB->getName() + "_shared", nullptr,
           GlobalValue::NotThreadLocal,
           static_cast<unsigned>(AddressSpace::Shared));
       auto *NewBuffer =
diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll
index 56dd9dd20893a..8c079317bcf11 100644
--- a/llvm/test/Transforms/OpenMP/replace_globalization.ll
+++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll
@@ -119,8 +119,8 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 ; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8*
 ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [113 x i8] c"
 ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
-; CHECK: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] undef, align 4
-; CHECK: @[[Y:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; CHECK: @[[X_shared:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] undef, align 4
+; CHECK: @[[Y_shared:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ;.
 ; CHECK-LABEL: define {{[^@]+}}@foo() {
 ; CHECK-NEXT:  entry:
@@ -139,13 +139,13 @@ declare void @unknown_no_openmp() "llvm.assume"="omp_no_openmp"
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[C]], -1
 ; CHECK-NEXT:    br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]]
 ; CHECK:       master1:
-; CHECK-NEXT:    call void @use.internalized(i8* nofree align 4 addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*)) #[[ATTR6]]
+; CHECK-NEXT:    call void @use.internalized(i8* nofree align 4 addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*)) #[[ATTR6]]
 ; CHECK-NEXT:    br label [[NEXT:%.*]]
 ; CHECK:       next:
 ; CHECK-NEXT:    call void @unknown_no_openmp() #[[ATTR4]]
 ; CHECK-NEXT:    br label [[MASTER2:%.*]]
 ; CHECK:       master2:
-; CHECK-NEXT:    call void @use.internalized(i8* nofree align 4 addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y, i32 0, i32 0) to i8*)) #[[ATTR6]]
+; CHECK-NEXT:    call void @use.internalized(i8* nofree align 4 addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*)) #[[ATTR6]]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true)
diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll
index 12ad3b831e3b8..b8de4034185d1 100644
--- a/llvm/test/Transforms/OpenMP/spmdization.ll
+++ b/llvm/test/Transforms/OpenMP/spmdization.ll
@@ -108,41 +108,41 @@
 ; AMDGPU: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
-; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
-; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
 ; AMDGPU: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
-; AMDGPU: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
-; AMDGPU: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; AMDGPU: @[[X_shared:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; AMDGPU: @[[X_shared_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; AMDGPU: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ;.
 ; NVPTX: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
 ; NVPTX: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
-; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
-; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
+; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 3
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
 ; NVPTX: @[[GLOB2:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 22, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
-; NVPTX: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
-; NVPTX: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; NVPTX: @[[X_shared:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; NVPTX: @[[X_shared1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; NVPTX: @[[__OMP_OUTLINED__9_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ;.
 ; AMDGPU-DISABLED: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [23 x i8] c"
 ; AMDGPU-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; AMDGPU-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
-; AMDGPU-DISABLED: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
-; AMDGPU-DISABLED: @[[X_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; AMDGPU-DISABLED: @[[X_shared:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; AMDGPU-DISABLED: @[[X_shared_1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ; AMDGPU-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
@@ -153,13 +153,13 @@
 ; NVPTX-DISABLED: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_L5_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_STACK_VAR_L20_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
-; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_SHARED_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_L35_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
+; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_SEQUENTIAL_LOOP_TO_shared_VAR_GUARDED_L50_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TARGET_L65_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX-DISABLED: @[[__OMP_OFFLOADING_FD02_2044372E_DO_NOT_SPMDIZE_TASK_L74_EXEC_MODE:[a-zA-Z0-9_$"\\.-]+]] = weak constant i8 1
 ; NVPTX-DISABLED: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [6 x i8*] [i8* @__omp_offloading_fd02_2044372e_sequential_loop_l5_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_stack_var_l20_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_l35_exec_mode, i8* @__omp_offloading_fd02_2044372e_sequential_loop_to_shared_var_guarded_l50_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_target_l65_exec_mode, i8* @__omp_offloading_fd02_2044372e_do_not_spmdize_task_l74_exec_mode], section "llvm.metadata"
-; NVPTX-DISABLED: @[[X:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
-; NVPTX-DISABLED: @[[X1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; NVPTX-DISABLED: @[[X_shared:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
+; NVPTX-DISABLED: @[[X_shared1:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4
 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__1_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__3_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
 ; NVPTX-DISABLED: @[[__OMP_OUTLINED__5_WRAPPER_ID:[a-zA-Z0-9_$"\\.-]+]] = private constant i8 undef
@@ -1049,7 +1049,7 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       for.body:
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; AMDGPU-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
+; AMDGPU-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
 ; AMDGPU-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 1)
@@ -1070,7 +1070,7 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       for.body:
 ; NVPTX-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; NVPTX-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
+; NVPTX-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
 ; NVPTX-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 1)
@@ -1091,7 +1091,7 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       for.body:
 ; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; AMDGPU-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
+; AMDGPU-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
 ; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1)
@@ -1112,7 +1112,7 @@ define internal void @__omp_outlined__4(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       for.body:
 ; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; NVPTX-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
+; NVPTX-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26:![0-9]+]]
 ; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1)
@@ -1427,7 +1427,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-NEXT:  entry:
 ; AMDGPU-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
-; AMDGPU-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*) to i32*
+; AMDGPU-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32*
 ; AMDGPU-NEXT:    br label [[REGION_CHECK_TID:%.*]]
 ; AMDGPU:       region.check.tid:
 ; AMDGPU-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
@@ -1452,7 +1452,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-NEXT:    ret void
 ; AMDGPU:       for.body:
 ; AMDGPU-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; AMDGPU-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]]
 ; AMDGPU-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-NEXT:    [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; AMDGPU-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP3]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP4]], i64 noundef 1)
@@ -1463,7 +1463,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-NEXT:  entry:
 ; NVPTX-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
-; NVPTX-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*) to i32*
+; NVPTX-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32*
 ; NVPTX-NEXT:    br label [[REGION_CHECK_TID:%.*]]
 ; NVPTX:       region.check.tid:
 ; NVPTX-NEXT:    [[TMP0:%.*]] = call fastcc i32 @__kmpc_get_hardware_thread_id_in_block()
@@ -1488,7 +1488,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-NEXT:    ret void
 ; NVPTX:       for.body:
 ; NVPTX-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; NVPTX-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]]
+; NVPTX-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP2]], align 8, !tbaa [[TBAA26]]
 ; NVPTX-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-NEXT:    [[TMP4:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; NVPTX-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP3]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP4]], i64 noundef 1)
@@ -1499,7 +1499,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; AMDGPU-DISABLED-NEXT:  entry:
 ; AMDGPU-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
-; AMDGPU-DISABLED-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*) to i32*
+; AMDGPU-DISABLED-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*) to i32*
 ; AMDGPU-DISABLED-NEXT:    store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-DISABLED-NEXT:    br label [[FOR_COND:%.*]]
 ; AMDGPU-DISABLED:       for.cond:
@@ -1511,7 +1511,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; AMDGPU-DISABLED-NEXT:    ret void
 ; AMDGPU-DISABLED:       for.body:
 ; AMDGPU-DISABLED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; AMDGPU-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x.1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]]
+; AMDGPU-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared.1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]]
 ; AMDGPU-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; AMDGPU-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; AMDGPU-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1)
@@ -1522,7 +1522,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-DISABLED-SAME: (i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] {
 ; NVPTX-DISABLED-NEXT:  entry:
 ; NVPTX-DISABLED-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8
-; NVPTX-DISABLED-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*) to i32*
+; NVPTX-DISABLED-NEXT:    [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*) to i32*
 ; NVPTX-DISABLED-NEXT:    store i32 42, i32* [[X_ON_STACK]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-DISABLED-NEXT:    br label [[FOR_COND:%.*]]
 ; NVPTX-DISABLED:       for.cond:
@@ -1534,7 +1534,7 @@ define internal void @__omp_outlined__6(i32* noalias %.global_tid., i32* noalias
 ; NVPTX-DISABLED-NEXT:    ret void
 ; NVPTX-DISABLED:       for.body:
 ; NVPTX-DISABLED-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0
-; NVPTX-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]]
+; NVPTX-DISABLED-NEXT:    store i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @x_shared1, i32 0, i32 0) to i8*), i8** [[TMP0]], align 8, !tbaa [[TBAA26]]
 ; NVPTX-DISABLED-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4, !tbaa [[TBAA18]]
 ; NVPTX-DISABLED-NEXT:    [[TMP2:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
 ; NVPTX-DISABLED-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 1)

From 92c1c63daeaf0b6b7abc6561133e2d3dbda80f8c Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik@google.com>
Date: Mon, 24 Jan 2022 14:23:03 -0800
Subject: [PATCH 483/946] [mlir][sparse] integration test for sparse output
 operation

Reviewed By: bixia

Differential Revision: https://reviews.llvm.org/D118079
---
 .../SparseTensor/python/test_output.py        | 127 ++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100644 mlir/test/Integration/Dialect/SparseTensor/python/test_output.py

diff --git a/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py b/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py
new file mode 100644
index 0000000000000..eaf39db09a2b0
--- /dev/null
+++ b/mlir/test/Integration/Dialect/SparseTensor/python/test_output.py
@@ -0,0 +1,127 @@
+# RUN: SUPPORT_LIB=%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
+# RUN:   %PYTHON %s | FileCheck %s
+
+import ctypes
+import os
+import tempfile
+
+import mlir.all_passes_registration
+
+from mlir import execution_engine
+from mlir import ir
+from mlir import passmanager
+from mlir import runtime as rt
+
+from mlir.dialects import builtin
+from mlir.dialects import sparse_tensor as st
+
+
+# TODO: move more into actual IR building.
+def boilerplate(attr: st.EncodingAttr):
+  """Returns boilerplate main method."""
+  return f"""
+func @main(%p : !llvm.ptr<i8>) -> () attributes {{ llvm.emit_c_interface }} {{
+  %d = arith.constant sparse<[[0, 0], [1, 1], [0, 9], [9, 0], [4, 4]],
+                             [1.0, 2.0, 3.0, 4.0, 5.0]> : tensor<10x10xf64>
+  %a = sparse_tensor.convert %d : tensor<10x10xf64> to tensor<10x10xf64, {attr}>
+  sparse_tensor.out %a, %p : tensor<10x10xf64, {attr}>, !llvm.ptr<i8>
+  return
+}}
+"""
+
+
+def expected():
+  """Returns expected contents of output.
+
+  Regardless of the dimension ordering, compression, and bitwidths that are
+  used in the sparse tensor, the output is always lexicographically sorted
+  by natural index order.
+  """
+  return f"""; extended FROSTT format
+2 5
+10 10
+1 1 1
+1 10 3
+2 2 2
+5 5 5
+10 1 4
+"""
+
+
+def build_compile_and_run_output(attr: st.EncodingAttr, support_lib: str,
+                                 compiler):
+  # Build and Compile.
+  module = ir.Module.parse(boilerplate(attr))
+  compiler(module)
+  engine = execution_engine.ExecutionEngine(
+      module, opt_level=0, shared_libs=[support_lib])
+
+  # Invoke the kernel and compare output.
+  with tempfile.TemporaryDirectory() as test_dir:
+    out = os.path.join(test_dir, 'out.tns')
+    buf = out.encode('utf-8')
+    mem_a = ctypes.pointer(ctypes.pointer(ctypes.create_string_buffer(buf)))
+    engine.invoke('main', mem_a)
+
+    actual = open(out).read()
+    if actual != expected():
+      quit('FAILURE')
+
+
+class SparseCompiler:
+  """Sparse compiler passes."""
+
+  def __init__(self):
+    pipeline = (
+        f'builtin.func(linalg-generalize-named-ops,linalg-fuse-elementwise-ops),'
+        f'sparsification,'
+        f'sparse-tensor-conversion,'
+        f'builtin.func(linalg-bufferize,convert-linalg-to-loops,convert-vector-to-scf),'
+        f'convert-scf-to-std,'
+        f'func-bufferize,'
+        f'tensor-constant-bufferize,'
+        f'builtin.func(tensor-bufferize,std-bufferize,finalizing-bufferize),'
+        f'convert-vector-to-llvm{{reassociate-fp-reductions=1 enable-index-optimizations=1}},'
+        f'lower-affine,'
+        f'convert-memref-to-llvm,'
+        f'convert-std-to-llvm,'
+        f'reconcile-unrealized-casts')
+    self.pipeline = pipeline
+
+  def __call__(self, module: ir.Module):
+    passmanager.PassManager.parse(self.pipeline).run(module)
+
+
+def main():
+  support_lib = os.getenv('SUPPORT_LIB')
+  assert support_lib is not None, 'SUPPORT_LIB is undefined'
+  if not os.path.exists(support_lib):
+    raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
+                            support_lib)
+
+  # CHECK-LABEL: TEST: test_output
+  print('\nTEST: test_output')
+  count = 0
+  with ir.Context() as ctx, ir.Location.unknown():
+    # Loop over various sparse types: CSR, DCSR, CSC, DCSC.
+    levels = [[st.DimLevelType.dense, st.DimLevelType.compressed],
+              [st.DimLevelType.compressed, st.DimLevelType.compressed]]
+    orderings = [
+        ir.AffineMap.get_permutation([0, 1]),
+        ir.AffineMap.get_permutation([1, 0])
+    ]
+    bitwidths = [8, 16, 32, 64]
+    for level in levels:
+      for ordering in orderings:
+        for bwidth in bitwidths:
+          attr = st.EncodingAttr.get(level, ordering, bwidth, bwidth)
+          compiler = SparseCompiler()
+          build_compile_and_run_output(attr, support_lib, compiler)
+          count = count + 1
+
+  # CHECK: Passed 16 tests
+  print('Passed', count, 'tests')
+
+
+if __name__ == '__main__':
+  main()

From ff8f7904d14d77e40f90c2f8306ecb737b02c997 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Tue, 25 Jan 2022 02:07:57 +0000
Subject: [PATCH 484/946] Remove null check after dereferencing the pointer
 (NFC)

Flagged by Coverity
---
 .../lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
index 32a77430fa3a9..73decbac7382c 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
@@ -349,7 +349,7 @@ class BufferDeallocation : public BufferPlacementTransformationBase {
     Region *argRegion = block->getParent();
     Operation *parentOp = argRegion->getParentOp();
     RegionBranchOpInterface regionInterface;
-    if (!argRegion || &argRegion->front() != block ||
+    if (&argRegion->front() != block ||
         !(regionInterface = dyn_cast<RegionBranchOpInterface>(parentOp)))
       return success();
 

From 9ea3dfa5d015f61ff282ed88d08125bb38fd19a8 Mon Sep 17 00:00:00 2001
From: jacquesguan <Jianjian.Guan@streamcomputing.com>
Date: Mon, 24 Jan 2022 15:13:46 +0800
Subject: [PATCH 485/946] [RISCV][NFC] Rename RequiredExtensions to
 RequiredFeatures.

The field 'RequiredExtensions' is used to specify the constraint for rvv builtin, and it contains something which is not a sub-extension or extension such as 'RV64'. So the word 'extension' is not accurate now, 'feature' seems better.

Differential Revision: https://reviews.llvm.org/D118015
---
 clang/include/clang/Basic/riscv_vector.td |  8 ++++----
 clang/utils/TableGen/RISCVVEmitter.cpp    | 16 ++++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index 28c57cc6afeeb..bf268d89d19e1 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -215,8 +215,8 @@ class RVVBuiltin<string suffix, string prototype, string type_range,
   // an automatic definition in header is emitted.
   string HeaderCode = "";
 
-  // Sub extension of vector spec.
-  list<string> RequiredExtensions = [];
+  // Features required to enable for this builtin.
+  list<string> RequiredFeatures = [];
 
   // Number of fields for Load/Store Segment instructions.
   int NF = 1;
@@ -720,7 +720,7 @@ multiclass RVVIndexedLoad<string op> {
         defvar eew64 = "64";
         defvar eew64_type = "(Log2EEW:6)";
         let Name = op # eew64 # "_v", IRName = op, IRNameMask = op # "_mask",
-            RequiredExtensions = ["RV64"] in {
+            RequiredFeatures = ["RV64"] in {
             def: RVVBuiltin<"v", "vPCe" # eew64_type # "Uv", type>;
               if !not(IsFloat<type>.val) then {
                 def: RVVBuiltin<"Uv", "UvPCUe" # eew64_type # "Uv", type>;
@@ -819,7 +819,7 @@ multiclass RVVIndexedStore<string op> {
         defvar eew64 = "64";
         defvar eew64_type = "(Log2EEW:6)";
         let Name = op # eew64  # "_v", IRName = op, IRNameMask = op # "_mask",
-            RequiredExtensions = ["RV64"]  in  {
+            RequiredFeatures = ["RV64"]  in  {
           def : RVVBuiltin<"v", "0Pe" # eew64_type # "Uvv", type>;
           if !not(IsFloat<type>.val) then {
             def : RVVBuiltin<"Uv", "0PUe" # eew64_type # "UvUv", type>;
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index c063b766e4a65..67d946d73e419 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -179,7 +179,7 @@ class RVVIntrinsic {
                bool HasNoMaskedOverloaded, bool HasAutoDef,
                StringRef ManualCodegen, const RVVTypes &Types,
                const std::vector<int64_t> &IntrinsicTypes,
-               const std::vector<StringRef> &RequiredExtensions, unsigned NF);
+               const std::vector<StringRef> &RequiredFeatures, unsigned NF);
   ~RVVIntrinsic() = default;
 
   StringRef getBuiltinName() const { return BuiltinName; }
@@ -772,7 +772,7 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
                            bool HasNoMaskedOverloaded, bool HasAutoDef,
                            StringRef ManualCodegen, const RVVTypes &OutInTypes,
                            const std::vector<int64_t> &NewIntrinsicTypes,
-                           const std::vector<StringRef> &RequiredExtensions,
+                           const std::vector<StringRef> &RequiredFeatures,
                            unsigned NF)
     : IRName(IRName), IsMask(IsMask), HasVL(HasVL), HasPolicy(HasPolicy),
       HasNoMaskedOverloaded(HasNoMaskedOverloaded), HasAutoDef(HasAutoDef),
@@ -805,8 +805,8 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
     if (T->isVector(64))
       RISCVPredefinedMacros |= RISCVPredefinedMacro::VectorMaxELen64;
   }
-  for (auto Extension : RequiredExtensions) {
-    if (Extension == "RV64")
+  for (auto Feature : RequiredFeatures) {
+    if (Feature == "RV64")
       RISCVPredefinedMacros |= RISCVPredefinedMacro::RV64;
   }
 
@@ -1154,8 +1154,8 @@ void RVVEmitter::createRVVIntrinsics(
     StringRef ManualCodegenMask = R->getValueAsString("ManualCodegenMask");
     std::vector<int64_t> IntrinsicTypes =
         R->getValueAsListOfInts("IntrinsicTypes");
-    std::vector<StringRef> RequiredExtensions =
-        R->getValueAsListOfStrings("RequiredExtensions");
+    std::vector<StringRef> RequiredFeatures =
+        R->getValueAsListOfStrings("RequiredFeatures");
     StringRef IRName = R->getValueAsString("IRName");
     StringRef IRNameMask = R->getValueAsString("IRNameMask");
     unsigned NF = R->getValueAsInt("NF");
@@ -1223,7 +1223,7 @@ void RVVEmitter::createRVVIntrinsics(
             Name, SuffixStr, MangledName, MangledSuffixStr, IRName,
             /*IsMask=*/false, /*HasMaskedOffOperand=*/false, HasVL, HasPolicy,
             HasNoMaskedOverloaded, HasAutoDef, ManualCodegen, Types.getValue(),
-            IntrinsicTypes, RequiredExtensions, NF));
+            IntrinsicTypes, RequiredFeatures, NF));
         if (HasMask) {
           // Create a mask intrinsic
           Optional<RVVTypes> MaskTypes =
@@ -1232,7 +1232,7 @@ void RVVEmitter::createRVVIntrinsics(
               Name, SuffixStr, MangledName, MangledSuffixStr, IRNameMask,
               /*IsMask=*/true, HasMaskedOffOperand, HasVL, HasPolicy,
               HasNoMaskedOverloaded, HasAutoDef, ManualCodegenMask,
-              MaskTypes.getValue(), IntrinsicTypes, RequiredExtensions, NF));
+              MaskTypes.getValue(), IntrinsicTypes, RequiredFeatures, NF));
         }
       } // end for Log2LMULList
     }   // end for TypeRange

From 0e55d4fab0183b6dca82ce127b78ded3db918c27 Mon Sep 17 00:00:00 2001
From: Evgeniy Brevnov <ybrevnov@azul.com>
Date: Mon, 24 Jan 2022 16:29:46 +0700
Subject: [PATCH 486/946] [AA] Refine ModRefInfo for llvm.memcpy.* in presence
 of operand bundles

Presence of operand bundles changes semantics in respect to ModRef. In particular, spec says: "From the compilers perspective, deoptimization operand bundles make the call sites theyre attached to at least readonly. They read through all of their pointer typed operands (even if theyre not otherwise escaped) and the entire visible heap. Deoptimization operand bundles do not capture their operands except during deoptimization, in which case control will not be returned to the compiled frame". Fix handling of llvm.memcpy.* according to the spec.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D118033
---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp | 4 ++--
 llvm/test/Analysis/BasicAA/deoptimize.ll | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index fa9ccb095a21d..b4c9859628379 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1020,9 +1020,9 @@ ModRefInfo BasicAAResult::getModRefInfo(const CallBase *Call,
         getBestAAResults().alias(MemoryLocation::getForDest(Inst), Loc, AAQI);
     // It's also possible for Loc to alias both src and dest, or neither.
     ModRefInfo rv = ModRefInfo::NoModRef;
-    if (SrcAA != AliasResult::NoAlias)
+    if (SrcAA != AliasResult::NoAlias || Call->hasReadingOperandBundles())
       rv = setRef(rv);
-    if (DestAA != AliasResult::NoAlias)
+    if (DestAA != AliasResult::NoAlias || Call->hasClobberingOperandBundles())
       rv = setMod(rv);
     return rv;
   }
diff --git a/llvm/test/Analysis/BasicAA/deoptimize.ll b/llvm/test/Analysis/BasicAA/deoptimize.ll
index 89297e2583186..18a41bd5405ae 100644
--- a/llvm/test/Analysis/BasicAA/deoptimize.ll
+++ b/llvm/test/Analysis/BasicAA/deoptimize.ll
@@ -22,7 +22,7 @@ define i32 @test_memcpy_with_deopt() {
 ; CHECK-LABEL: Function: test_memcpy_with_deopt:
 ; CHECK: Just Mod:  Ptr: i8* %A	<->  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
 ; CHECK: Just Ref:  Ptr: i8* %B	<->  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
-; CHECK: NoModRef:  Ptr: i32* @G1	<->  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
+; CHECK: Just Ref:  Ptr: i32* @G1	<->  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 -1, i1 false) [ "deopt"() ]
 
   %A = alloca i8
   %B = alloca i8

From 810f13f0ebde70e679a097a9f5dbe37fe58ffa27 Mon Sep 17 00:00:00 2001
From: Richard <legalize@xmission.com>
Date: Mon, 24 Jan 2022 20:19:03 -0700
Subject: [PATCH 487/946] [clang-tools-extra] Fix documentation build (NFC)

---
 clang-tools-extra/docs/ReleaseNotes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index ba0e530b7fec4..cb622f9b09606 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -192,7 +192,7 @@ Changes in existing checks
   option to control whether to warn on narrowing integer to floating-point
   conversions.
 
-- Improved :doc:`performance-move-const-arg` check.
+- Improved :doc:`performance-move-const-arg <clang-tidy/checks/performance-move-const-arg>` check.
 
   Removed a wrong FixIt for trivially copyable objects wrapped by ``std::move()`` and passed to an rvalue reference parameter. Removal of ``std::move()`` would break the code.
 

From e01e4c9115ad49479d01b6b6de4e83ee454bab24 Mon Sep 17 00:00:00 2001
From: harsh <harsh@nod-labs.com>
Date: Tue, 25 Jan 2022 02:37:52 +0000
Subject: [PATCH 488/946] Fix bugs in GPUToNVVM lowering

The current lowering from GPU to NVVM does
not correctly handle the following cases when
lowering the gpu shuffle op.

1. When the active width is set to 32 (all lanes),
then the current approach computes (1 << 32) -1 which
results in poison values in the LLVM IR. We fix this by
defining the active mask as (-1) >> (32 - width).

2. In the case of shuffle up, the computation of the third
operand c has to be different from the other 3 modes due to
the op definition in the ISA reference.
(https://docs.nvidia.com/cuda/parallel-thread-execution/index.html)
Specifically, the predicate value is computed as j >= maxLane
for up and j <= maxLane for all other modes. We fix this by
computing maskAndClamp as 32 - width for this mode.

TEST: We modify the existing test and add more checks for the up mode.

Reviewed By: ThomasRaoux

Differential Revision: https://reviews.llvm.org/D118086
---
 .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp        | 32 +++++++++++++------
 .../Conversion/GPUToNVVM/gpu-to-nvvm.mlir     | 15 +++++++--
 2 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index c1b95c71b474b..9d564ee7d19d4 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -64,8 +64,10 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
   /// the highest lane which participates in the shuffle).
   ///
   ///     %one = llvm.constant(1 : i32) : i32
-  ///     %shl = llvm.shl %one, %width : i32
-  ///     %active_mask = llvm.sub %shl, %one : i32
+  ///     %minus_one = llvm.constant(-1 : i32) : i32
+  ///     %thirty_two = llvm.constant(32 : i32) : i32
+  ///     %num_lanes = llvm.sub %thirty_two, %width : i32
+  ///     %active_mask = llvm.lshr %minus_one, %num_lanes : i32
   ///     %mask_and_clamp = llvm.sub %width, %one : i32
   ///     %shfl = nvvm.shfl.sync.bfly %active_mask, %value, %offset,
   ///         %mask_and_clamp : !llvm<"{ float, i1 }">
@@ -86,14 +88,24 @@ struct GPUShuffleOpLowering : public ConvertOpToLLVMPattern<gpu::ShuffleOp> {
 
     Value one = rewriter.create<LLVM::ConstantOp>(
         loc, int32Type, rewriter.getI32IntegerAttr(1));
-    // Bit mask of active lanes: `(1 << activeWidth) - 1`.
-    Value activeMask = rewriter.create<LLVM::SubOp>(
-        loc, int32Type,
-        rewriter.create<LLVM::ShlOp>(loc, int32Type, one, adaptor.width()),
-        one);
-    // Clamp lane: `activeWidth - 1`
-    Value maskAndClamp =
-        rewriter.create<LLVM::SubOp>(loc, int32Type, adaptor.width(), one);
+    Value minusOne = rewriter.create<LLVM::ConstantOp>(
+        loc, int32Type, rewriter.getI32IntegerAttr(-1));
+    Value thirtyTwo = rewriter.create<LLVM::ConstantOp>(
+        loc, int32Type, rewriter.getI32IntegerAttr(32));
+    Value numLeadInactiveLane = rewriter.create<LLVM::SubOp>(
+        loc, int32Type, thirtyTwo, adaptor.width());
+    // Bit mask of active lanes: `(-1) >> (32 - activeWidth)`.
+    Value activeMask = rewriter.create<LLVM::LShrOp>(loc, int32Type, minusOne,
+                                                     numLeadInactiveLane);
+    Value maskAndClamp;
+    if (op.mode() == gpu::ShuffleMode::UP) {
+      // Clamp lane: `32 - activeWidth`
+      maskAndClamp = numLeadInactiveLane;
+    } else {
+      // Clamp lane: `activeWidth - 1`
+      maskAndClamp =
+          rewriter.create<LLVM::SubOp>(loc, int32Type, adaptor.width(), one);
+    }
 
     auto returnValueAndIsValidAttr = rewriter.getUnitAttr();
     Value shfl = rewriter.create<NVVM::ShflOp>(
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index a413dae33ceb7..8219d2204772f 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -117,14 +117,23 @@ gpu.module @test_module {
     // CHECK: %[[#WIDTH:]] = llvm.mlir.constant(23 : i32) : i32
     %arg2 = arith.constant 23 : i32
     // CHECK: %[[#ONE:]] = llvm.mlir.constant(1 : i32) : i32
-    // CHECK: %[[#SHL:]] = llvm.shl %[[#ONE]], %[[#WIDTH]] : i32
-    // CHECK: %[[#MASK:]] = llvm.sub %[[#SHL]], %[[#ONE]] : i32
+    // CHECK: %[[#MINUS_ONE:]] = llvm.mlir.constant(-1 : i32) : i32
+    // CHECK: %[[#THIRTY_TWO:]] = llvm.mlir.constant(32 : i32) : i32
+    // CHECK: %[[#NUM_LANES:]] = llvm.sub %[[#THIRTY_TWO]], %[[#WIDTH]] : i32
+    // CHECK: %[[#MASK:]] = llvm.lshr %[[#MINUS_ONE]], %[[#NUM_LANES]] : i32
     // CHECK: %[[#CLAMP:]] = llvm.sub %[[#WIDTH]], %[[#ONE]] : i32
     // CHECK: %[[#SHFL:]] = nvvm.shfl.sync bfly %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#CLAMP]] {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
     // CHECK: llvm.extractvalue %[[#SHFL]][0 : index] : !llvm.struct<(f32, i1)>
     // CHECK: llvm.extractvalue %[[#SHFL]][1 : index] : !llvm.struct<(f32, i1)>
     %shfl, %pred = gpu.shuffle xor %arg0, %arg1, %arg2 : f32
-    // CHECK: nvvm.shfl.sync up {{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
+    // CHECK: %[[#ONE:]] = llvm.mlir.constant(1 : i32) : i32
+    // CHECK: %[[#MINUS_ONE:]] = llvm.mlir.constant(-1 : i32) : i32
+    // CHECK: %[[#THIRTY_TWO:]] = llvm.mlir.constant(32 : i32) : i32
+    // CHECK: %[[#NUM_LANES:]] = llvm.sub %[[#THIRTY_TWO]], %[[#WIDTH]] : i32
+    // CHECK: %[[#MASK:]] = llvm.lshr %[[#MINUS_ONE]], %[[#NUM_LANES]] : i32
+    // CHECK: %[[#SHFL:]] = nvvm.shfl.sync up %[[#MASK]], %[[#VALUE]], %[[#OFFSET]], %[[#NUM_LANES]] {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
+    // CHECK: llvm.extractvalue %[[#SHFL]][0 : index] : !llvm.struct<(f32, i1)>
+    // CHECK: llvm.extractvalue %[[#SHFL]][1 : index] : !llvm.struct<(f32, i1)>
     %shflu, %predu = gpu.shuffle up %arg0, %arg1, %arg2 : f32
     // CHECK: nvvm.shfl.sync down {{.*}} {return_value_and_is_valid} : f32 -> !llvm.struct<(f32, i1)>
     %shfld, %predd = gpu.shuffle down %arg0, %arg1, %arg2 : f32

From 0e9a4a3b65363c082087864b9ff5e0da33be90da Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Thu, 20 Jan 2022 14:30:47 -0800
Subject: [PATCH 489/946] [mlir] Move the Buffer related source files out of
 Transforms/

Transforms/ should only contain dialect-independent transformations,
and these files are a much better fit for the bufferization dialect anyways.

Differential Revision: https://reviews.llvm.org/D117839
---
 .../Bufferization}/Transforms/BufferUtils.h   | 17 ++---
 .../Dialect/Bufferization/Transforms/Passes.h | 24 +++++++
 .../Bufferization/Transforms/Passes.td        | 69 +++++++++++++++++++
 .../Dialect/StandardOps/Transforms/Passes.h   |  4 +-
 mlir/include/mlir/Transforms/Passes.h         | 24 -------
 mlir/include/mlir/Transforms/Passes.td        | 69 -------------------
 .../Transforms/BufferDeallocation.cpp         |  3 +-
 .../Transforms/BufferOptimizations.cpp        | 20 +++---
 .../Transforms/BufferResultsToOutParams.cpp   |  5 +-
 .../Bufferization}/Transforms/BufferUtils.cpp | 54 +++++++++++++--
 .../Bufferization/Transforms/CMakeLists.txt   |  3 +
 .../ArithInterfaceImpl.cpp                    |  2 +-
 .../Transforms/TensorConstantBufferize.cpp    | 45 +-----------
 mlir/lib/Transforms/CMakeLists.txt            |  4 --
 mlir/lib/Transforms/PassDetail.h              |  4 --
 15 files changed, 175 insertions(+), 172 deletions(-)
 rename mlir/include/mlir/{ => Dialect/Bufferization}/Transforms/BufferUtils.h (95%)
 rename mlir/lib/{ => Dialect/Bufferization}/Transforms/BufferOptimizations.cpp (96%)
 rename mlir/lib/{ => Dialect/Bufferization}/Transforms/BufferResultsToOutParams.cpp (97%)
 rename mlir/lib/{ => Dialect/Bufferization}/Transforms/BufferUtils.cpp (74%)

diff --git a/mlir/include/mlir/Transforms/BufferUtils.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/BufferUtils.h
similarity index 95%
rename from mlir/include/mlir/Transforms/BufferUtils.h
rename to mlir/include/mlir/Dialect/Bufferization/Transforms/BufferUtils.h
index d9d06963fb28b..681b94953f20f 100644
--- a/mlir/include/mlir/Transforms/BufferUtils.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/BufferUtils.h
@@ -11,13 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef MLIR_TRANSFORMS_BUFFERUTILS_H
-#define MLIR_TRANSFORMS_BUFFERUTILS_H
+#ifndef MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_BUFFERUTILS_H
+#define MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_BUFFERUTILS_H
 
 #include "mlir/Analysis/BufferViewFlowAnalysis.h"
 #include "mlir/Analysis/Liveness.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Dominance.h"
@@ -25,6 +24,11 @@
 #include "mlir/Transforms/DialectConversion.h"
 
 namespace mlir {
+namespace memref {
+class GlobalOp;
+} // namespace memref
+
+namespace bufferization {
 
 /// A simple analysis that detects allocation operations.
 class BufferPlacementAllocs {
@@ -117,10 +121,6 @@ class BufferPlacementTransformationBase {
   Liveness liveness;
 };
 
-namespace memref {
-class GlobalOp;
-} // namespace memref
-
 // Support class to create global ops for tensor-valued constants in the
 // program. Globals are created lazily at the top of the `moduleOp` with pretty
 // names. Duplicates are avoided.
@@ -137,6 +137,7 @@ class GlobalCreator {
   // dependence to the memref dialect for this.
   DenseMap<Attribute, Operation *> globals;
 };
+} // namespace bufferization
 } // namespace mlir
 
-#endif // MLIR_TRANSFORMS_BUFFERUTILS_H
+#endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_BUFFERUTILS_H
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
index 72c5136eb1988..481eaa284d98b 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
@@ -14,10 +14,34 @@ namespace bufferization {
 /// buffers.
 std::unique_ptr<Pass> createBufferDeallocationPass();
 
+/// Creates a pass that moves allocations upwards to reduce the number of
+/// required copies that are inserted during the BufferDeallocation pass.
+std::unique_ptr<Pass> createBufferHoistingPass();
+
+/// Creates a pass that moves allocations upwards out of loops. This avoids
+/// reallocations inside of loops.
+std::unique_ptr<Pass> createBufferLoopHoistingPass();
+
+/// Creates a pass that converts memref function results to out-params.
+std::unique_ptr<Pass> createBufferResultsToOutParamsPass();
+
 /// Creates a pass that finalizes a partial bufferization by removing remaining
 /// bufferization.to_tensor and bufferization.to_memref operations.
 std::unique_ptr<OperationPass<FuncOp>> createFinalizingBufferizePass();
 
+/// Creates a pass that promotes heap-based allocations to stack-based ones.
+/// Only buffers smaller than the provided size are promoted.
+/// Dynamic shaped buffers are promoted up to the given rank.
+std::unique_ptr<Pass>
+createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes = 1024,
+                                unsigned bitwidthOfIndexType = 64,
+                                unsigned maxRankOfAllocatedMemRef = 1);
+
+/// Creates a pass that promotes heap-based allocations to stack-based ones.
+/// Only buffers smaller with `isSmallAlloc(alloc) == true` are promoted.
+std::unique_ptr<Pass>
+createPromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc);
+
 //===----------------------------------------------------------------------===//
 // Registration
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index 22db1dc2c5bf9..dcbdd52a5c81d 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -88,6 +88,51 @@ def BufferDeallocation : Pass<"buffer-deallocation", "FuncOp"> {
   let constructor = "mlir::bufferization::createBufferDeallocationPass()";
 }
 
+def BufferHoisting : Pass<"buffer-hoisting", "FuncOp"> {
+  let summary = "Optimizes placement of allocation operations by moving them "
+                "into common dominators and out of nested regions";
+  let description = [{
+    This pass implements an approach to aggressively move allocations upwards
+    into common dominators and out of nested regions.
+  }];
+  let constructor = "mlir::bufferization::createBufferHoistingPass()";
+}
+
+def BufferLoopHoisting : Pass<"buffer-loop-hoisting", "FuncOp"> {
+  let summary = "Optimizes placement of allocation operations by moving them "
+                "out of loop nests";
+  let description = [{
+    This pass implements an approach to aggressively move allocations upwards
+    out of loop nests. It does not move allocations into common dominators.
+  }];
+  let constructor = "mlir::bufferization::createBufferLoopHoistingPass()";
+}
+
+def BufferResultsToOutParams : Pass<"buffer-results-to-out-params", "ModuleOp">  {
+  let summary = "Converts memref-typed function results to out-params";
+  let description = [{
+    Some calling conventions prefer to pass output memrefs as "out params". The
+    conversion to this calling convention must be done as an atomic
+    transformation of the entire program (hence this is a module pass).
+
+    For example, if a call is rewritten, the callee needs to be rewritten
+    otherwise the IR will end up invalid. Thus, this transformation
+    require an atomic change to the entire program (e.g. the whole module).
+
+    This pass is expected to run immediately after bufferization is finished.
+    At that point, tensor-typed results will have been converted to memref-typed
+    results, and can be consistently converted to out params.
+
+    All memref-typed results are appended to the function argument list.
+
+    The main issue with this pass (and the out-param calling convention) is that
+    buffers for results need to be allocated in the caller. This currently only
+    works for static shaped memrefs.
+  }];
+  let constructor = "mlir::bufferization::createBufferResultsToOutParamsPass()";
+  let dependentDialects = ["memref::MemRefDialect"];
+}
+
 def FinalizingBufferize : Pass<"finalizing-bufferize", "FuncOp"> {
   let summary = "Finalize a partial bufferization";
   let description = [{
@@ -104,4 +149,28 @@ def FinalizingBufferize : Pass<"finalizing-bufferize", "FuncOp"> {
   let constructor = "mlir::bufferization::createFinalizingBufferizePass()";
 }
 
+def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "FuncOp"> {
+  let summary = "Promotes heap-based allocations to automatically managed "
+                "stack-based allocations";
+  let description = [{
+    This pass implements a simple algorithm to convert heap-based memory
+    allocations to stack-based ones. It uses a built-in heuristic to decide
+    whether it makes sense to convert an allocation. Furthermore, dynamic
+    shaped buffers that are limited by the rank of the tensor can be
+    converted. They are only transformed if they are considered to be small.
+  }];
+  let constructor = "mlir::bufferization::createPromoteBuffersToStackPass()";
+  let options = [
+    Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned",
+           /*default=*/"1024",
+           "Maximal size in bytes to promote allocations to stack.">,
+    Option<"bitwidthOfIndexType", "bitwidth-of-index-type", "unsigned",
+           /*default=*/"64",
+           "Bitwidth of the index type. Used for size estimation.">,
+    Option<"maxRankOfAllocatedMemRef", "max-rank-of-allocated-memref", "unsigned",
+           /*default=*/"1",
+           "Maximal memref rank to promote dynamic buffers.">,
+  ];
+}
+
 #endif // MLIR_DIALECT_BUFFERIZATION_TRANSFORMS_PASSES
diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
index b80cd0bf6d625..b47303c70250e 100644
--- a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
@@ -19,9 +19,9 @@
 namespace mlir {
 namespace bufferization {
 class BufferizeTypeConverter;
+class GlobalCreator;
 } // namespace bufferization
 
-class GlobalCreator;
 class RewritePatternSet;
 using OwningRewritePatternList = RewritePatternSet;
 
@@ -38,7 +38,7 @@ std::unique_ptr<Pass> createFuncBufferizePass();
 /// Add patterns to bufferize tensor constants into global memrefs to the given
 /// pattern list.
 void populateTensorConstantBufferizePatterns(
-    GlobalCreator &globalCreator,
+    bufferization::GlobalCreator &globalCreator,
     bufferization::BufferizeTypeConverter &typeConverter,
     RewritePatternSet &patterns);
 
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 46bab0047c1b6..2224343244516 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -33,30 +33,6 @@ enum FusionMode { Greedy, ProducerConsumer, Sibling };
 // Passes
 //===----------------------------------------------------------------------===//
 
-/// Creates a pass that moves allocations upwards to reduce the number of
-/// required copies that are inserted during the BufferDeallocation pass.
-std::unique_ptr<Pass> createBufferHoistingPass();
-
-/// Creates a pass that moves allocations upwards out of loops. This avoids
-/// reallocations inside of loops.
-std::unique_ptr<Pass> createBufferLoopHoistingPass();
-
-/// Creates a pass that promotes heap-based allocations to stack-based ones.
-/// Only buffers smaller than the provided size are promoted.
-/// Dynamic shaped buffers are promoted up to the given rank.
-std::unique_ptr<Pass>
-createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes = 1024,
-                                unsigned bitwidthOfIndexType = 64,
-                                unsigned maxRankOfAllocatedMemRef = 1);
-
-/// Creates a pass that promotes heap-based allocations to stack-based ones.
-/// Only buffers smaller with `isSmallAlloc(alloc) == true` are promoted.
-std::unique_ptr<Pass>
-createPromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc);
-
-/// Creates a pass that converts memref function results to out-params.
-std::unique_ptr<Pass> createBufferResultsToOutParamsPass();
-
 /// Creates an instance of the Canonicalizer pass, configured with default
 /// settings (which can be overridden by pass options on the command line).
 std::unique_ptr<Pass> createCanonicalizerPass();
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 4b1d6ca71e2e1..9942c0fc88923 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -217,75 +217,6 @@ def AffinePipelineDataTransfer
   let constructor = "mlir::createPipelineDataTransferPass()";
 }
 
-def BufferHoisting : Pass<"buffer-hoisting", "FuncOp"> {
-  let summary = "Optimizes placement of allocation operations by moving them "
-                "into common dominators and out of nested regions";
-  let description = [{
-    This pass implements an approach to aggressively move allocations upwards
-    into common dominators and out of nested regions.
-  }];
-  let constructor = "mlir::createBufferHoistingPass()";
-}
-
-def BufferLoopHoisting : Pass<"buffer-loop-hoisting", "FuncOp"> {
-  let summary = "Optimizes placement of allocation operations by moving them "
-                "out of loop nests";
-  let description = [{
-    This pass implements an approach to aggressively move allocations upwards
-    out of loop nests. It does not move allocations into common dominators.
-  }];
-  let constructor = "mlir::createBufferLoopHoistingPass()";
-}
-
-def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "FuncOp"> {
-  let summary = "Promotes heap-based allocations to automatically managed "
-                "stack-based allocations";
-  let description = [{
-    This pass implements a simple algorithm to convert heap-based memory
-    allocations to stack-based ones. It uses a built-in heuristic to decide
-    whether it makes sense to convert an allocation. Furthermore, dynamic
-    shaped buffers that are limited by the rank of the tensor can be
-    converted. They are only transformed if they are considered to be small.
-  }];
-  let constructor = "mlir::createPromoteBuffersToStackPass()";
-  let options = [
-    Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned",
-           /*default=*/"1024",
-           "Maximal size in bytes to promote allocations to stack.">,
-    Option<"bitwidthOfIndexType", "bitwidth-of-index-type", "unsigned",
-           /*default=*/"64",
-           "Bitwidth of the index type. Used for size estimation.">,
-    Option<"maxRankOfAllocatedMemRef", "max-rank-of-allocated-memref", "unsigned",
-           /*default=*/"1",
-           "Maximal memref rank to promote dynamic buffers.">,
-  ];
-}
-
-def BufferResultsToOutParams : Pass<"buffer-results-to-out-params", "ModuleOp">  {
-  let summary = "Converts memref-typed function results to out-params";
-  let description = [{
-    Some calling conventions prefer to pass output memrefs as "out params". The
-    conversion to this calling convention must be done as an atomic
-    transformation of the entire program (hence this is a module pass).
-
-    For example, if a call is rewritten, the callee needs to be rewritten
-    otherwise the IR will end up invalid. Thus, this transformation
-    require an atomic change to the entire program (e.g. the whole module).
-
-    This pass is expected to run immediately after bufferization is finished.
-    At that point, tensor-typed results will have been converted to memref-typed
-    results, and can be consistently converted to out params.
-
-    All memref-typed results are appended to the function argument list.
-
-    The main issue with this pass (and the out-param calling convention) is that
-    buffers for results need to be allocated in the caller. This currently only
-    works for static shaped memrefs.
-  }];
-  let constructor = "mlir::createBufferResultsToOutParamsPass()";
-  let dependentDialects = ["memref::MemRefDialect"];
-}
-
 def Canonicalizer : Pass<"canonicalize"> {
   let summary = "Canonicalize operations";
   let description = [{
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
index 73decbac7382c..ed7bd5c20d58e 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp
@@ -54,12 +54,13 @@
 
 #include "mlir/Dialect/Bufferization/IR/AllocationOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
 #include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Transforms/BufferUtils.h"
 #include "llvm/ADT/SetOperations.h"
 
 using namespace mlir;
+using namespace mlir::bufferization;
 
 /// Walks over all immediate return-like terminators in the given region.
 static LogicalResult
diff --git a/mlir/lib/Transforms/BufferOptimizations.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp
similarity index 96%
rename from mlir/lib/Transforms/BufferOptimizations.cpp
rename to mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp
index 4fa035a939923..158c786739342 100644
--- a/mlir/lib/Transforms/BufferOptimizations.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp
@@ -12,14 +12,15 @@
 // convert heap-based allocations to stack-based allocations, if possible.
 
 #include "PassDetail.h"
+#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
+#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/BufferUtils.h"
-#include "mlir/Transforms/Passes.h"
 
 using namespace mlir;
+using namespace mlir::bufferization;
 
 /// Returns true if the given operation implements a known high-level region-
 /// based control-flow interface.
@@ -422,23 +423,22 @@ class PromoteBuffersToStackPass
 
 } // namespace
 
-std::unique_ptr<Pass> mlir::createBufferHoistingPass() {
+std::unique_ptr<Pass> mlir::bufferization::createBufferHoistingPass() {
   return std::make_unique<BufferHoistingPass>();
 }
 
-std::unique_ptr<Pass> mlir::createBufferLoopHoistingPass() {
+std::unique_ptr<Pass> mlir::bufferization::createBufferLoopHoistingPass() {
   return std::make_unique<BufferLoopHoistingPass>();
 }
 
-std::unique_ptr<Pass>
-mlir::createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
-                                      unsigned bitwidthOfIndexType,
-                                      unsigned maxRankOfAllocatedMemRef) {
+std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
+    unsigned maxAllocSizeInBytes, unsigned bitwidthOfIndexType,
+    unsigned maxRankOfAllocatedMemRef) {
   return std::make_unique<PromoteBuffersToStackPass>(
       maxAllocSizeInBytes, bitwidthOfIndexType, maxRankOfAllocatedMemRef);
 }
 
-std::unique_ptr<Pass>
-mlir::createPromoteBuffersToStackPass(std::function<bool(Value)> isSmallAlloc) {
+std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
+    std::function<bool(Value)> isSmallAlloc) {
   return std::make_unique<PromoteBuffersToStackPass>(std::move(isSmallAlloc));
 }
diff --git a/mlir/lib/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
similarity index 97%
rename from mlir/lib/Transforms/BufferResultsToOutParams.cpp
rename to mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
index c462955d068a4..08780db5f94df 100644
--- a/mlir/lib/Transforms/BufferResultsToOutParams.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
@@ -7,11 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
+#include "mlir/Dialect/Bufferization/Transforms/Passes.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/Passes.h"
 
 using namespace mlir;
 
@@ -139,6 +139,7 @@ struct BufferResultsToOutParamsPass
 };
 } // namespace
 
-std::unique_ptr<Pass> mlir::createBufferResultsToOutParamsPass() {
+std::unique_ptr<Pass>
+mlir::bufferization::createBufferResultsToOutParamsPass() {
   return std::make_unique<BufferResultsToOutParamsPass>();
 }
diff --git a/mlir/lib/Transforms/BufferUtils.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferUtils.cpp
similarity index 74%
rename from mlir/lib/Transforms/BufferUtils.cpp
rename to mlir/lib/Dialect/Bufferization/Transforms/BufferUtils.cpp
index c24293cb4bd68..a373a8dbe86b4 100644
--- a/mlir/lib/Transforms/BufferUtils.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferUtils.cpp
@@ -10,18 +10,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Transforms/BufferUtils.h"
-#include "PassDetail.h"
+#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
+#include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/MemRef/Utils/MemRefUtils.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/Passes.h"
 #include "llvm/ADT/SetOperations.h"
 
 using namespace mlir;
+using namespace mlir::bufferization;
 
 //===----------------------------------------------------------------------===//
 // BufferPlacementAllocs
@@ -139,3 +139,49 @@ bool BufferPlacementTransformationBase::isLoop(Operation *op) {
 
   return false;
 }
+
+//===----------------------------------------------------------------------===//
+// BufferPlacementTransformationBase
+//===----------------------------------------------------------------------===//
+
+memref::GlobalOp GlobalCreator::getGlobalFor(arith::ConstantOp constantOp) {
+  auto type = constantOp.getType().cast<RankedTensorType>();
+
+  BufferizeTypeConverter typeConverter;
+
+  // If we already have a global for this constant value, no need to do
+  // anything else.
+  auto it = globals.find(constantOp.getValue());
+  if (it != globals.end())
+    return cast<memref::GlobalOp>(it->second);
+
+  // Create a builder without an insertion point. We will insert using the
+  // symbol table to guarantee unique names.
+  OpBuilder globalBuilder(moduleOp.getContext());
+  SymbolTable symbolTable(moduleOp);
+
+  // Create a pretty name.
+  SmallString<64> buf;
+  llvm::raw_svector_ostream os(buf);
+  interleave(type.getShape(), os, "x");
+  os << "x" << type.getElementType();
+
+  // Add an optional alignment to the global memref.
+  IntegerAttr memrefAlignment =
+      alignment > 0 ? IntegerAttr::get(globalBuilder.getI64Type(), alignment)
+                    : IntegerAttr();
+
+  auto global = globalBuilder.create<memref::GlobalOp>(
+      constantOp.getLoc(), (Twine("__constant_") + os.str()).str(),
+      /*sym_visibility=*/globalBuilder.getStringAttr("private"),
+      /*type=*/typeConverter.convertType(type).cast<MemRefType>(),
+      /*initial_value=*/constantOp.getValue().cast<ElementsAttr>(),
+      /*constant=*/true,
+      /*alignment=*/memrefAlignment);
+  symbolTable.insert(global);
+  // The symbol table inserts at the end of the module, but globals are a bit
+  // nicer if they are at the beginning.
+  global->moveBefore(&moduleOp.front());
+  globals[constantOp.getValue()] = global;
+  return global;
+}
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
index b212ef952a05e..56fc326b4fa92 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Bufferization/Transforms/CMakeLists.txt
@@ -1,6 +1,9 @@
 add_mlir_dialect_library(MLIRBufferizationTransforms
   Bufferize.cpp
   BufferDeallocation.cpp
+  BufferOptimizations.cpp
+  BufferResultsToOutParams.cpp
+  BufferUtils.cpp
   OneShotAnalysis.cpp
 
   ADDITIONAL_HEADER_DIRS
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp
index 256916c5e7b32..de3fbcd8b121c 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp
@@ -10,10 +10,10 @@
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
+#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/Operation.h"
-#include "mlir/Transforms/BufferUtils.h"
 
 using namespace mlir::bufferization;
 
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp b/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp
index c7752f592a9e8..5bae6f3f6154f 100644
--- a/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp
+++ b/mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp
@@ -12,57 +12,16 @@
 
 #include "PassDetail.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
+#include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
 #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
 #include "mlir/IR/BlockAndValueMapping.h"
-#include "mlir/Transforms/BufferUtils.h"
 #include "mlir/Transforms/DialectConversion.h"
 
 using namespace mlir;
-
-memref::GlobalOp GlobalCreator::getGlobalFor(arith::ConstantOp constantOp) {
-  auto type = constantOp.getType().cast<RankedTensorType>();
-
-  bufferization::BufferizeTypeConverter typeConverter;
-
-  // If we already have a global for this constant value, no need to do
-  // anything else.
-  auto it = globals.find(constantOp.getValue());
-  if (it != globals.end())
-    return cast<memref::GlobalOp>(it->second);
-
-  // Create a builder without an insertion point. We will insert using the
-  // symbol table to guarantee unique names.
-  OpBuilder globalBuilder(moduleOp.getContext());
-  SymbolTable symbolTable(moduleOp);
-
-  // Create a pretty name.
-  SmallString<64> buf;
-  llvm::raw_svector_ostream os(buf);
-  interleave(type.getShape(), os, "x");
-  os << "x" << type.getElementType();
-
-  // Add an optional alignment to the global memref.
-  IntegerAttr memrefAlignment =
-      alignment > 0 ? IntegerAttr::get(globalBuilder.getI64Type(), alignment)
-                    : IntegerAttr();
-
-  auto global = globalBuilder.create<memref::GlobalOp>(
-      constantOp.getLoc(), (Twine("__constant_") + os.str()).str(),
-      /*sym_visibility=*/globalBuilder.getStringAttr("private"),
-      /*type=*/typeConverter.convertType(type).cast<MemRefType>(),
-      /*initial_value=*/constantOp.getValue().cast<ElementsAttr>(),
-      /*constant=*/true,
-      /*alignment=*/memrefAlignment);
-  symbolTable.insert(global);
-  // The symbol table inserts at the end of the module, but globals are a bit
-  // nicer if they are at the beginning.
-  global->moveBefore(&moduleOp.front());
-  globals[constantOp.getValue()] = global;
-  return global;
-}
+using namespace mlir::bufferization;
 
 namespace {
 class BufferizeTensorConstantOp
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 3e10b4a321311..1eba1ad94e5a5 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -1,9 +1,6 @@
 add_subdirectory(Utils)
 
 add_mlir_library(MLIRTransforms
-  BufferOptimizations.cpp
-  BufferResultsToOutParams.cpp
-  BufferUtils.cpp
   Canonicalizer.cpp
   ControlFlowSink.cpp
   CSE.cpp
@@ -31,7 +28,6 @@ add_mlir_library(MLIRTransforms
   LINK_LIBS PUBLIC
   MLIRAffine
   MLIRAnalysis
-  MLIRBufferization
   MLIRCopyOpInterface
   MLIRLoopLikeInterface
   MLIRMemRef
diff --git a/mlir/lib/Transforms/PassDetail.h b/mlir/lib/Transforms/PassDetail.h
index 9b846198fc08a..4496c4223cbec 100644
--- a/mlir/lib/Transforms/PassDetail.h
+++ b/mlir/lib/Transforms/PassDetail.h
@@ -27,10 +27,6 @@ namespace memref {
 class MemRefDialect;
 } // namespace memref
 
-namespace bufferization {
-class BufferizationDialect;
-} // namespace bufferization
-
 #define GEN_PASS_CLASSES
 #include "mlir/Transforms/Passes.h.inc"
 

From 2e2c0738e80e9c2b7c1413ca4719d5be2df4c6b5 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Thu, 20 Jan 2022 15:16:17 -0800
Subject: [PATCH 490/946] [mlir:Transforms] Move NormalizeMemRefs to
 MemRef/Transforms/

Transforms/  should only contain transformations that are dialect-independent and
this pass interacts with MemRef operations (making it a better fit for living in that
dialect).

Differential Revision: https://reviews.llvm.org/D117841
---
 .../mlir/Dialect/MemRef/Transforms/Passes.h   |   4 +
 .../mlir/Dialect/MemRef/Transforms/Passes.td  | 116 ++++++++++++++++++
 mlir/include/mlir/Transforms/Passes.h         |   4 -
 mlir/include/mlir/Transforms/Passes.td        | 116 ------------------
 .../Dialect/MemRef/Transforms/CMakeLists.txt  |   1 +
 .../MemRef}/Transforms/NormalizeMemRefs.cpp   |   5 +-
 .../Dialect/MemRef/Transforms/PassDetail.h    |  43 +++++++
 .../ResolveShapedTypeResultDims.cpp           |   4 +-
 mlir/lib/Transforms/CMakeLists.txt            |   1 -
 9 files changed, 168 insertions(+), 126 deletions(-)
 rename mlir/lib/{ => Dialect/MemRef}/Transforms/NormalizeMemRefs.cpp (99%)
 create mode 100644 mlir/lib/Dialect/MemRef/Transforms/PassDetail.h

diff --git a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h
index 186782c6efdb2..23d12508b65cb 100644
--- a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h
@@ -55,6 +55,10 @@ void populateResolveShapedTypeResultDimsPatterns(RewritePatternSet &patterns);
 /// load/store ops into `patterns`.
 std::unique_ptr<Pass> createFoldSubViewOpsPass();
 
+/// Creates an interprocedural pass to normalize memrefs to have a trivial
+/// (identity) layout map.
+std::unique_ptr<OperationPass<ModuleOp>> createNormalizeMemRefsPass();
+
 /// Creates an operation pass to resolve `memref.dim` operations with values
 /// that are defined by operations that implement the
 /// `ReifyRankedShapeTypeShapeOpInterface`, in terms of shapes of its input
diff --git a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td
index 29984c4fc385d..d67746b9c6033 100644
--- a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td
@@ -23,6 +23,122 @@ def FoldSubViewOps : Pass<"fold-memref-subview-ops"> {
   ];
 }
 
+def NormalizeMemRefs : Pass<"normalize-memrefs", "ModuleOp"> {
+  let summary = "Normalize memrefs";
+   let description = [{
+    This pass transforms memref types with a non-trivial
+    [layout map](https://mlir.llvm.org/docs/LangRef/#layout-map) into
+    memref types with an identity layout map, e.g. (i, j) -> (i, j). This
+    pass is inter-procedural, in the sense that it can modify function
+    interfaces and call sites that pass memref types. In order to modify
+    memref types while preserving the original behavior, users of those
+    memref types are also modified to incorporate the resulting layout map.
+    For instance, an [AffineLoadOp]
+    (https://mlir.llvm.org/docs/Dialects/Affine/#affineload-affineloadop)
+    will be updated to compose the layout map with with the affine expression
+    contained in the op. Operations marked with the [MemRefsNormalizable]
+    (https://mlir.llvm.org/docs/Traits/#memrefsnormalizable) trait are
+    expected to be normalizable. Supported operations include affine
+    operations, memref.alloc, memref.dealloc, and std.return.
+
+    Given an appropriate layout map specified in the code, this transformation
+    can express tiled or linearized access to multi-dimensional data
+    structures, but will not modify memref types without an explicit layout
+    map.
+
+    Currently this pass is limited to only modify
+    functions where all memref types can be normalized. If a function
+    contains any operations that are not MemRefNormalizable, then the function
+    and any functions that call or call it will not be modified.
+
+    Input
+
+    ```mlir
+    #tile = affine_map<(i) -> (i floordiv 4, i mod 4)>
+    func @matmul(%A: memref<16xf64, #tile>,
+                 %B: index, %C: memref<16xf64>) -> (memref<16xf64, #tile>) {
+      affine.for %arg3 = 0 to 16 {
+            %a = affine.load %A[%arg3] : memref<16xf64, #tile>
+            %p = arith.mulf %a, %a : f64
+            affine.store %p, %A[%arg3] : memref<16xf64, #tile>
+      }
+      %c = memref.alloc() : memref<16xf64, #tile>
+      %d = affine.load %c[0] : memref<16xf64, #tile>
+      return %A: memref<16xf64, #tile>
+    }
+    ```
+
+    Output
+
+    ```mlir
+    func @matmul(%arg0: memref<4x4xf64>, %arg1: index, %arg2: memref<16xf64>)
+      -> memref<4x4xf64> {
+      affine.for %arg3 = 0 to 16 {
+        %3 = affine.load %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
+        %4 = arith.mulf %3, %3 : f64
+        affine.store %4, %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
+      }
+      %0 = memref.alloc() : memref<4x4xf64>
+      %1 = affine.apply #map1()
+      %2 = affine.load %0[0, 0] : memref<4x4xf64>
+      return %arg0 : memref<4x4xf64>
+    }
+    ```
+
+    Input
+
+    ```
+    #linear8 = affine_map<(i, j) -> (i * 8 + j)>
+    func @linearize(%arg0: memref<8x8xi32, #linear8>,
+                    %arg1: memref<8x8xi32, #linear8>,
+                    %arg2: memref<8x8xi32, #linear8>) {
+      %c8 = arith.constant 8 : index
+      %c0 = arith.constant 0 : index
+      %c1 = arith.constant 1 : index
+      affine.for %arg3 = %c0 to %c8  {
+      affine.for %arg4 = %c0 to %c8  {
+        affine.for %arg5 = %c0 to %c8 {
+          %0 = affine.load %arg0[%arg3, %arg5] : memref<8x8xi32, #linear8>
+          %1 = affine.load %arg1[%arg5, %arg4] : memref<8x8xi32, #linear8>
+          %2 = affine.load %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
+          %3 = arith.muli %0, %1 : i32
+          %4 = arith.addi %2, %3 : i32
+          affine.store %4, %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
+        }
+      }
+      }
+      return
+    }
+    ```
+
+    Output
+
+    ```mlir
+    func @linearize(%arg0: memref<64xi32>,
+                    %arg1: memref<64xi32>,
+                    %arg2: memref<64xi32>) {
+    %c8 = arith.constant 8 : index
+    %c0 = arith.constant 0 : index
+    affine.for %arg3 = %c0 to %c8 {
+      affine.for %arg4 = %c0 to %c8 {
+        affine.for %arg5 = %c0 to %c8 {
+          %0 = affine.load %arg0[%arg3 * 8 + %arg5] : memref<64xi32>
+          %1 = affine.load %arg1[%arg5 * 8 + %arg4] : memref<64xi32>
+          %2 = affine.load %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
+          %3 = arith.muli %0, %1 : i32
+          %4 = arith.addi %2, %3 : i32
+          affine.store %4, %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
+        }
+      }
+    }
+    return
+  }
+  ```
+  }];
+  let constructor = "mlir::memref::createNormalizeMemRefsPass()";
+  let dependentDialects = ["AffineDialect"];
+}
+
 def ResolveRankedShapeTypeResultDims :
     Pass<"resolve-ranked-shaped-type-result-dims"> {
   let summary = "Resolve memref.dim of result values of ranked shape type";
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 2224343244516..4876d705afcb9 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -113,10 +113,6 @@ std::unique_ptr<Pass> createSCCPPass();
 /// pass may *only* be scheduled on an operation that defines a SymbolTable.
 std::unique_ptr<Pass> createSymbolDCEPass();
 
-/// Creates an interprocedural pass to normalize memrefs to have a trivial
-/// (identity) layout map.
-std::unique_ptr<OperationPass<ModuleOp>> createNormalizeMemRefsPass();
-
 //===----------------------------------------------------------------------===//
 // Registration
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 9942c0fc88923..44bf475af24c9 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -351,122 +351,6 @@ def LoopInvariantCodeMotion : Pass<"loop-invariant-code-motion"> {
   let constructor = "mlir::createLoopInvariantCodeMotionPass()";
 }
 
-def NormalizeMemRefs : Pass<"normalize-memrefs", "ModuleOp"> {
-  let summary = "Normalize memrefs";
-   let description = [{
-    This pass transforms memref types with a non-trivial
-    [layout map](https://mlir.llvm.org/docs/LangRef/#layout-map) into
-    memref types with an identity layout map, e.g. (i, j) -> (i, j). This
-    pass is inter-procedural, in the sense that it can modify function
-    interfaces and call sites that pass memref types. In order to modify
-    memref types while preserving the original behavior, users of those
-    memref types are also modified to incorporate the resulting layout map.
-    For instance, an [AffineLoadOp]
-    (https://mlir.llvm.org/docs/Dialects/Affine/#affineload-affineloadop)
-    will be updated to compose the layout map with with the affine expression
-    contained in the op. Operations marked with the [MemRefsNormalizable]
-    (https://mlir.llvm.org/docs/Traits/#memrefsnormalizable) trait are
-    expected to be normalizable. Supported operations include affine
-    operations, memref.alloc, memref.dealloc, and std.return.
-
-    Given an appropriate layout map specified in the code, this transformation
-    can express tiled or linearized access to multi-dimensional data
-    structures, but will not modify memref types without an explicit layout
-    map.
-
-    Currently this pass is limited to only modify
-    functions where all memref types can be normalized. If a function
-    contains any operations that are not MemRefNormalizable, then the function
-    and any functions that call or call it will not be modified.
-
-    Input
-
-    ```mlir
-    #tile = affine_map<(i) -> (i floordiv 4, i mod 4)>
-    func @matmul(%A: memref<16xf64, #tile>,
-                 %B: index, %C: memref<16xf64>) -> (memref<16xf64, #tile>) {
-      affine.for %arg3 = 0 to 16 {
-            %a = affine.load %A[%arg3] : memref<16xf64, #tile>
-            %p = arith.mulf %a, %a : f64
-            affine.store %p, %A[%arg3] : memref<16xf64, #tile>
-      }
-      %c = memref.alloc() : memref<16xf64, #tile>
-      %d = affine.load %c[0] : memref<16xf64, #tile>
-      return %A: memref<16xf64, #tile>
-    }
-    ```
-
-    Output
-
-    ```mlir
-    func @matmul(%arg0: memref<4x4xf64>, %arg1: index, %arg2: memref<16xf64>)
-      -> memref<4x4xf64> {
-      affine.for %arg3 = 0 to 16 {
-        %3 = affine.load %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
-        %4 = arith.mulf %3, %3 : f64
-        affine.store %4, %arg0[%arg3 floordiv 4, %arg3 mod 4]: memref<4x4xf64>
-      }
-      %0 = memref.alloc() : memref<4x4xf64>
-      %1 = affine.apply #map1()
-      %2 = affine.load %0[0, 0] : memref<4x4xf64>
-      return %arg0 : memref<4x4xf64>
-    }
-    ```
-
-    Input
-
-    ```
-    #linear8 = affine_map<(i, j) -> (i * 8 + j)>
-    func @linearize(%arg0: memref<8x8xi32, #linear8>,
-                    %arg1: memref<8x8xi32, #linear8>,
-                    %arg2: memref<8x8xi32, #linear8>) {
-      %c8 = arith.constant 8 : index
-      %c0 = arith.constant 0 : index
-      %c1 = arith.constant 1 : index
-      affine.for %arg3 = %c0 to %c8  {
-      affine.for %arg4 = %c0 to %c8  {
-        affine.for %arg5 = %c0 to %c8 {
-          %0 = affine.load %arg0[%arg3, %arg5] : memref<8x8xi32, #linear8>
-          %1 = affine.load %arg1[%arg5, %arg4] : memref<8x8xi32, #linear8>
-          %2 = affine.load %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
-          %3 = arith.muli %0, %1 : i32
-          %4 = arith.addi %2, %3 : i32
-          affine.store %4, %arg2[%arg3, %arg4] : memref<8x8xi32, #linear8>
-        }
-      }
-      }
-      return
-    }
-    ```
-
-    Output
-
-    ```mlir
-    func @linearize(%arg0: memref<64xi32>,
-                    %arg1: memref<64xi32>,
-                    %arg2: memref<64xi32>) {
-    %c8 = arith.constant 8 : index
-    %c0 = arith.constant 0 : index
-    affine.for %arg3 = %c0 to %c8 {
-      affine.for %arg4 = %c0 to %c8 {
-        affine.for %arg5 = %c0 to %c8 {
-          %0 = affine.load %arg0[%arg3 * 8 + %arg5] : memref<64xi32>
-          %1 = affine.load %arg1[%arg5 * 8 + %arg4] : memref<64xi32>
-          %2 = affine.load %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
-          %3 = arith.muli %0, %1 : i32
-          %4 = arith.addi %2, %3 : i32
-          affine.store %4, %arg2[%arg3 * 8 + %arg4] : memref<64xi32>
-        }
-      }
-    }
-    return
-  }
-  ```
-  }];
-  let constructor = "mlir::createNormalizeMemRefsPass()";
-  let dependentDialects = ["AffineDialect"];
-}
-
 def ParallelLoopCollapsing : Pass<"parallel-loop-collapsing"> {
   let summary = "Collapse parallel loops to use less induction variables";
   let constructor = "mlir::createParallelLoopCollapsingPass()";
diff --git a/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt b/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
index 3eda2ded018fe..319f9bbb95a37 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_mlir_dialect_library(MLIRMemRefTransforms
   FoldSubViewOps.cpp
+  NormalizeMemRefs.cpp
   ResolveShapedTypeResultDims.cpp
 
   ADDITIONAL_HEADER_DIRS
diff --git a/mlir/lib/Transforms/NormalizeMemRefs.cpp b/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp
similarity index 99%
rename from mlir/lib/Transforms/NormalizeMemRefs.cpp
rename to mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp
index 5119c4526364a..0b5e49b2df528 100644
--- a/mlir/lib/Transforms/NormalizeMemRefs.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp
@@ -14,7 +14,7 @@
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Transforms/Passes.h"
+#include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/Debug.h"
@@ -43,7 +43,8 @@ struct NormalizeMemRefs : public NormalizeMemRefsBase<NormalizeMemRefs> {
 
 } // namespace
 
-std::unique_ptr<OperationPass<ModuleOp>> mlir::createNormalizeMemRefsPass() {
+std::unique_ptr<OperationPass<ModuleOp>>
+mlir::memref::createNormalizeMemRefsPass() {
   return std::make_unique<NormalizeMemRefs>();
 }
 
diff --git a/mlir/lib/Dialect/MemRef/Transforms/PassDetail.h b/mlir/lib/Dialect/MemRef/Transforms/PassDetail.h
new file mode 100644
index 0000000000000..d15631526817f
--- /dev/null
+++ b/mlir/lib/Dialect/MemRef/Transforms/PassDetail.h
@@ -0,0 +1,43 @@
+//===- PassDetail.h - MemRef Pass class details -----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef DIALECT_MEMREF_TRANSFORMS_PASSDETAIL_H_
+#define DIALECT_MEMREF_TRANSFORMS_PASSDETAIL_H_
+
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+
+class AffineDialect;
+
+// Forward declaration from Dialect.h
+template <typename ConcreteDialect>
+void registerDialect(DialectRegistry &registry);
+
+namespace arith {
+class ArithmeticDialect;
+} // namespace arith
+
+namespace memref {
+class MemRefDialect;
+} // namespace memref
+
+namespace tensor {
+class TensorDialect;
+} // namespace tensor
+
+namespace vector {
+class VectorDialect;
+} // namespace vector
+
+#define GEN_PASS_CLASSES
+#include "mlir/Dialect/MemRef/Transforms/Passes.h.inc"
+
+} // namespace mlir
+
+#endif // DIALECT_MEMREF_TRANSFORMS_PASSDETAIL_H_
diff --git a/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp b/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp
index 60f82f3b9e4b2..3f6aeeb696414 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "PassDetail.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
@@ -107,9 +108,6 @@ struct DimOfReifyRankedShapedTypeOpInterface : public OpRewritePattern<OpTy> {
 //===----------------------------------------------------------------------===//
 
 namespace {
-#define GEN_PASS_CLASSES
-#include "mlir/Dialect/MemRef/Transforms/Passes.h.inc"
-
 struct ResolveRankedShapeTypeResultDimsPass final
     : public ResolveRankedShapeTypeResultDimsBase<
           ResolveRankedShapeTypeResultDimsPass> {
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 1eba1ad94e5a5..7826650f57471 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -9,7 +9,6 @@ add_mlir_library(MLIRTransforms
   LoopCoalescing.cpp
   LoopFusion.cpp
   LoopInvariantCodeMotion.cpp
-  NormalizeMemRefs.cpp
   OpStats.cpp
   ParallelLoopCollapsing.cpp
   PipelineDataTransfer.cpp

From a70aa7bb0d9a6066831b339e0a09a2c1bc74fe2b Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Thu, 20 Jan 2022 17:32:31 -0800
Subject: [PATCH 491/946] [mlir:Transforms] Move out the remaining non-dialect
 independent transforms and utilities

This has been a major TODO for a very long time, and is necessary for establishing a proper
dialect-free dependency layering for the Transforms library. Code was moved to effectively
two main locations:

* Affine/
There was quite a bit of affine dialect related code in Transforms/ do to historical reasons
(of a time way into MLIR's past). The following headers were moved to:
Transforms/LoopFusionUtils.h -> Dialect/Affine/LoopFusionUtils.h
Transforms/LoopUtils.h -> Dialect/Affine/LoopUtils.h
Transforms/Utils.h -> Dialect/Affine/Utils.h

The following transforms were also moved:
AffineLoopFusion, AffinePipelineDataTransfer, LoopCoalescing

* SCF/
Only one SCF pass was in Transforms/ (likely accidentally placed here): ParallelLoopCollapsing
The SCF specific utilities in LoopUtils have been moved to SCF/Utils.h

* Misc:
mlir::moveLoopInvariantCode was also moved to LoopLikeInterface.h given
that it is a simple utility defined in terms of LoopLikeOpInterface.

Differential Revision: https://reviews.llvm.org/D117848
---
 .../Affine}/LoopFusionUtils.h                 |   6 +-
 .../Affine}/LoopUtils.h                       |  48 +-
 mlir/include/mlir/Dialect/Affine/Passes.h     |  21 +
 mlir/include/mlir/Dialect/Affine/Passes.td    | 208 +++++
 mlir/include/mlir/Dialect/Affine/Utils.h      | 119 +++
 mlir/include/mlir/Dialect/SCF/Passes.h        |   4 +
 mlir/include/mlir/Dialect/SCF/Passes.td       |  16 +
 mlir/include/mlir/Dialect/SCF/Utils.h         |  60 ++
 .../mlir/Interfaces/LoopLikeInterface.h       |  13 +
 .../mlir/Transforms/ControlFlowSinkUtils.h    |  70 ++
 mlir/include/mlir/Transforms/Passes.h         |  26 -
 mlir/include/mlir/Transforms/Passes.td        | 224 -----
 mlir/include/mlir/Transforms/Utils.h          | 200 -----
 mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp     |   1 -
 .../SCFToStandard/SCFToStandard.cpp           |   1 -
 .../StandardToLLVM/StandardToLLVM.cpp         |   1 -
 .../Conversion/VectorToSCF/VectorToSCF.cpp    |   1 -
 .../Transforms/AffineDataCopyGeneration.cpp   |   2 +-
 .../AffineLoopInvariantCodeMotion.cpp         |   4 +-
 .../Affine/Transforms/AffineParallelize.cpp   |   2 +-
 .../Dialect/Affine/Transforms/CMakeLists.txt  |   3 +
 .../Affine}/Transforms/LoopCoalescing.cpp     |   3 +-
 .../Affine}/Transforms/LoopFusion.cpp         |   6 +-
 .../Dialect/Affine/Transforms/LoopTiling.cpp  |   4 +-
 .../Dialect/Affine/Transforms/LoopUnroll.cpp  |   2 +-
 .../Affine/Transforms/LoopUnrollAndJam.cpp    |   2 +-
 .../Dialect/Affine/Transforms/PassDetail.h    |   5 +
 .../Transforms/PipelineDataTransfer.cpp       |   7 +-
 .../Transforms/SimplifyAffineStructures.cpp   |   2 +-
 mlir/lib/Dialect/Affine/Utils/CMakeLists.txt  |   3 +
 .../Affine}/Utils/LoopFusionUtils.cpp         |   5 +-
 .../Affine}/Utils/LoopUtils.cpp               | 663 +--------------
 mlir/lib/Dialect/Affine/Utils/Utils.cpp       | 741 ++++++++++++++++-
 .../GPU/Transforms/MemoryPromotion.cpp        |   2 +-
 .../Linalg/Transforms/CodegenStrategy.cpp     |   2 -
 .../Linalg/Transforms/HoistPadding.cpp        |   2 -
 .../Dialect/Linalg/Transforms/Hoisting.cpp    |   2 -
 .../Transforms/LinalgStrategyPasses.cpp       |  12 +-
 .../Dialect/Linalg/Transforms/Transforms.cpp  |   1 -
 mlir/lib/Dialect/Linalg/Utils/Utils.cpp       |   2 +-
 .../MemRef/Transforms/NormalizeMemRefs.cpp    |   2 +-
 .../lib/Dialect/SCF/Transforms/CMakeLists.txt |   1 +
 .../Transforms/ParallelLoopCollapsing.cpp     |   6 +-
 mlir/lib/Dialect/SCF/Transforms/Utils.cpp     | 694 +++++++++++++++-
 .../VectorTransferSplitRewritePatterns.cpp    |   1 -
 mlir/lib/Dialect/Vector/VectorTransforms.cpp  |   1 -
 .../Dialect/Vector/VectorUnrollDistribute.cpp |   1 -
 mlir/lib/Interfaces/LoopLikeInterface.cpp     |  83 ++
 mlir/lib/Transforms/CMakeLists.txt            |   9 -
 mlir/lib/Transforms/CSE.cpp                   |   1 -
 mlir/lib/Transforms/ControlFlowSink.cpp       |   2 +-
 .../Transforms/LoopInvariantCodeMotion.cpp    |  79 +-
 mlir/lib/Transforms/PassDetail.h              |  15 -
 mlir/lib/Transforms/Utils/CMakeLists.txt      |  13 -
 .../Transforms/Utils/ControlFlowSinkUtils.cpp |   2 +-
 .../Transforms/Utils/DialectConversion.cpp    |   1 -
 mlir/lib/Transforms/Utils/Utils.cpp           | 767 ------------------
 mlir/test/lib/Dialect/Affine/CMakeLists.txt   |   2 +
 .../lib/Dialect/Affine/TestAffineDataCopy.cpp |   2 +-
 .../Affine/TestAffineLoopParametricTiling.cpp |   2 +-
 .../Affine}/TestLoopFusion.cpp                |   5 +-
 .../Affine}/TestLoopMapping.cpp               |   3 +-
 .../Dialect/Affine/TestLoopPermutation.cpp    |   3 +-
 .../Dialect/Affine/TestVectorizationUtils.cpp |   2 +-
 mlir/test/lib/Dialect/SCF/CMakeLists.txt      |   2 +
 .../SCF}/TestLoopParametricTiling.cpp         |   6 +-
 .../SCF}/TestLoopUnrolling.cpp                |   3 +-
 mlir/test/lib/Transforms/CMakeLists.txt       |   4 -
 mlir/test/lib/Transforms/TestConstantFold.cpp |   1 -
 mlir/unittests/Transforms/CMakeLists.txt      |   1 +
 70 files changed, 2129 insertions(+), 2081 deletions(-)
 rename mlir/include/mlir/{Transforms => Dialect/Affine}/LoopFusionUtils.h (98%)
 rename mlir/include/mlir/{Transforms => Dialect/Affine}/LoopUtils.h (85%)
 create mode 100644 mlir/include/mlir/Transforms/ControlFlowSinkUtils.h
 delete mode 100644 mlir/include/mlir/Transforms/Utils.h
 rename mlir/lib/{ => Dialect/Affine}/Transforms/LoopCoalescing.cpp (97%)
 rename mlir/lib/{ => Dialect/Affine}/Transforms/LoopFusion.cpp (99%)
 rename mlir/lib/{ => Dialect/Affine}/Transforms/PipelineDataTransfer.cpp (99%)
 rename mlir/lib/{Transforms => Dialect/Affine}/Utils/LoopFusionUtils.cpp (99%)
 rename mlir/lib/{Transforms => Dialect/Affine}/Utils/LoopUtils.cpp (81%)
 rename mlir/lib/{ => Dialect/SCF}/Transforms/ParallelLoopCollapsing.cpp (91%)
 delete mode 100644 mlir/lib/Transforms/Utils/Utils.cpp
 rename mlir/test/lib/{Transforms => Dialect/Affine}/TestLoopFusion.cpp (98%)
 rename mlir/test/lib/{Transforms => Dialect/Affine}/TestLoopMapping.cpp (96%)
 rename mlir/test/lib/{Transforms => Dialect/SCF}/TestLoopParametricTiling.cpp (93%)
 rename mlir/test/lib/{Transforms => Dialect/SCF}/TestLoopUnrolling.cpp (97%)

diff --git a/mlir/include/mlir/Transforms/LoopFusionUtils.h b/mlir/include/mlir/Dialect/Affine/LoopFusionUtils.h
similarity index 98%
rename from mlir/include/mlir/Transforms/LoopFusionUtils.h
rename to mlir/include/mlir/Dialect/Affine/LoopFusionUtils.h
index b26d023873b45..486d142e0a20e 100644
--- a/mlir/include/mlir/Transforms/LoopFusionUtils.h
+++ b/mlir/include/mlir/Dialect/Affine/LoopFusionUtils.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef MLIR_TRANSFORMS_LOOPFUSIONUTILS_H
-#define MLIR_TRANSFORMS_LOOPFUSIONUTILS_H
+#ifndef MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H
+#define MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H
 
 #include "mlir/IR/Value.h"
 #include "mlir/Support/LLVM.h"
@@ -167,4 +167,4 @@ void gatherProducerConsumerMemrefs(ArrayRef<Operation *> srcOps,
                                    DenseSet<Value> &producerConsumerMemrefs);
 } // namespace mlir
 
-#endif // MLIR_TRANSFORMS_LOOPFUSIONUTILS_H
+#endif // MLIR_DIALECT_AFFINE_LOOPFUSIONUTILS_H
diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
similarity index 85%
rename from mlir/include/mlir/Transforms/LoopUtils.h
rename to mlir/include/mlir/Dialect/Affine/LoopUtils.h
index d4d0d14d73fb5..bcaf864a43331 100644
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Dialect/Affine/LoopUtils.h
@@ -12,8 +12,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef MLIR_TRANSFORMS_LOOPUTILS_H
-#define MLIR_TRANSFORMS_LOOPUTILS_H
+#ifndef MLIR_DIALECT_AFFINE_LOOPUTILS_H
+#define MLIR_DIALECT_AFFINE_LOOPUTILS_H
 
 #include "mlir/IR/Block.h"
 #include "mlir/Support/LLVM.h"
@@ -45,9 +45,6 @@ LogicalResult loopUnrollFull(AffineForOp forOp);
 LogicalResult loopUnrollByFactor(
     AffineForOp forOp, uint64_t unrollFactor,
     function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr);
-LogicalResult loopUnrollByFactor(
-    scf::ForOp forOp, uint64_t unrollFactor,
-    function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr);
 
 /// Unrolls this loop by the specified unroll factor or its trip count,
 /// whichever is lower.
@@ -63,8 +60,6 @@ bool LLVM_ATTRIBUTE_UNUSED isPerfectlyNested(ArrayRef<AffineForOp> loops);
 /// AffineForOp, and the second op is a terminator).
 void getPerfectlyNestedLoops(SmallVectorImpl<AffineForOp> &nestedLoops,
                              AffineForOp root);
-void getPerfectlyNestedLoops(SmallVectorImpl<scf::ForOp> &nestedLoops,
-                             scf::ForOp root);
 
 /// Unrolls and jams this loop by the specified factor. `forOp` can be a loop
 /// with iteration arguments performing supported reductions and its inner loops
@@ -78,10 +73,9 @@ LogicalResult loopUnrollJamByFactor(AffineForOp forOp,
 LogicalResult loopUnrollJamUpToFactor(AffineForOp forOp,
                                       uint64_t unrollJamFactor);
 
-/// Promotes the loop body of a AffineForOp/scf::ForOp to its containing block
-/// if the loop was known to have a single iteration.
+/// Promotes the loop body of a AffineForOp to its containing block if the loop
+/// was known to have a single iteration.
 LogicalResult promoteIfSingleIteration(AffineForOp forOp);
-LogicalResult promoteIfSingleIteration(scf::ForOp forOp);
 
 /// Promotes all single iteration AffineForOp's in the Function, i.e., moves
 /// their body into the containing Block.
@@ -146,13 +140,9 @@ AffineForOp sinkSequentialLoops(AffineForOp forOp);
 /// occurrence in `forOps`, under each of the `targets`.
 /// Returns the new AffineForOps, one per each of (`forOps`, `targets`) pair,
 /// nested immediately under each of `targets`.
-using Loops = SmallVector<scf::ForOp, 8>;
-using TileLoops = std::pair<Loops, Loops>;
 SmallVector<SmallVector<AffineForOp, 8>, 8> tile(ArrayRef<AffineForOp> forOps,
                                                  ArrayRef<uint64_t> sizes,
                                                  ArrayRef<AffineForOp> targets);
-SmallVector<Loops, 8> tile(ArrayRef<scf::ForOp> forOps, ArrayRef<Value> sizes,
-                           ArrayRef<scf::ForOp> targets);
 
 /// Performs tiling (with interchange) by strip-mining the `forOps` by `sizes`
 /// and sinking them, in their order of occurrence in `forOps`, under `target`.
@@ -160,15 +150,6 @@ SmallVector<Loops, 8> tile(ArrayRef<scf::ForOp> forOps, ArrayRef<Value> sizes,
 /// `target`.
 SmallVector<AffineForOp, 8> tile(ArrayRef<AffineForOp> forOps,
                                  ArrayRef<uint64_t> sizes, AffineForOp target);
-Loops tile(ArrayRef<scf::ForOp> forOps, ArrayRef<Value> sizes,
-           scf::ForOp target);
-
-/// Tile a nest of scf::ForOp loops rooted at `rootForOp` with the given
-/// (parametric) sizes. Sizes are expected to be strictly positive values at
-/// runtime.  If more sizes than loops are provided, discard the trailing values
-/// in sizes.  Assumes the loop nest is permutable.
-/// Returns the newly created intra-tile loops.
-Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef<Value> sizes);
 
 /// Explicit copy / DMA generation options for mlir::affineDataCopyGenerate.
 struct AffineCopyOptions {
@@ -236,16 +217,6 @@ LogicalResult generateCopyForMemRegion(const MemRefRegion &memrefRegion,
                                        const AffineCopyOptions &copyOptions,
                                        CopyGenerateResult &result);
 
-/// Tile a nest of standard for loops rooted at `rootForOp` by finding such
-/// parametric tile sizes that the outer loops have a fixed number of iterations
-/// as defined in `sizes`.
-TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef<int64_t> sizes);
-
-/// Replace a perfect nest of "for" loops with a single linearized loop. Assumes
-/// `loops` contains a list of perfectly nested loops with bounds and steps
-/// independent of any loop induction variable involved in the nest.
-void coalesceLoops(MutableArrayRef<scf::ForOp> loops);
-
 /// Replace a perfect nest of "for" loops with a single linearized loop. Assumes
 /// `loops` contains a list of perfectly nested loops outermost to innermost
 /// that are normalized (step one and lower bound of zero) and with bounds and
@@ -254,12 +225,6 @@ void coalesceLoops(MutableArrayRef<scf::ForOp> loops);
 /// be representable using affine.for.
 LogicalResult coalesceLoops(MutableArrayRef<AffineForOp> loops);
 
-/// Take the ParallelLoop and for each set of dimension indices, combine them
-/// into a single dimension. combinedDimensions must contain each index into
-/// loops exactly once.
-void collapseParallelLoops(scf::ParallelOp loops,
-                           ArrayRef<std::vector<unsigned>> combinedDimensions);
-
 /// Maps `forOp` for execution on a parallel grid of virtual `processorIds` of
 /// size given by `numProcessors`. This is achieved by embedding the SSA values
 /// corresponding to `processorIds` and `numProcessors` into the bounds and step
@@ -321,9 +286,6 @@ LogicalResult
 separateFullTiles(MutableArrayRef<AffineForOp> nest,
                   SmallVectorImpl<AffineForOp> *fullTileNest = nullptr);
 
-/// Move loop invariant code out of `looplike`.
-LogicalResult moveLoopInvariantCode(LoopLikeOpInterface looplike);
-
 } // namespace mlir
 
-#endif // MLIR_TRANSFORMS_LOOPUTILS_H
+#endif // MLIR_DIALECT_AFFINE_LOOPUTILS_H
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
index e4f6d2034902f..8a94262a298b2 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -21,6 +21,10 @@ namespace mlir {
 
 class AffineForOp;
 
+/// Fusion mode to attempt. The default mode `Greedy` does both
+/// producer-consumer and sibling fusion.
+enum FusionMode { Greedy, ProducerConsumer, Sibling };
+
 /// Creates a simplification pass for affine structures (maps and sets). In
 /// addition, this pass also normalizes memrefs to have the trivial (identity)
 /// layout map.
@@ -53,6 +57,19 @@ std::unique_ptr<OperationPass<FuncOp>> createAffineDataCopyGenerationPass();
 /// dead allocs.
 std::unique_ptr<OperationPass<FuncOp>> createAffineScalarReplacementPass();
 
+/// Creates a pass that transforms perfectly nested loops with independent
+/// bounds into a single loop.
+std::unique_ptr<OperationPass<FuncOp>> createLoopCoalescingPass();
+
+/// Creates a loop fusion pass which fuses loops according to type of fusion
+/// specified in `fusionMode`. Buffers of size less than or equal to
+/// `localBufSizeThreshold` are promoted to memory space `fastMemorySpace`.
+std::unique_ptr<OperationPass<FuncOp>>
+createLoopFusionPass(unsigned fastMemorySpace = 0,
+                     uint64_t localBufSizeThreshold = 0,
+                     bool maximalFusion = false,
+                     enum FusionMode fusionMode = FusionMode::Greedy);
+
 /// Creates a pass to perform tiling on loop nests.
 std::unique_ptr<OperationPass<FuncOp>>
 createLoopTilingPass(uint64_t cacheSizeBytes);
@@ -76,6 +93,10 @@ std::unique_ptr<OperationPass<FuncOp>> createLoopUnrollPass(
 std::unique_ptr<OperationPass<FuncOp>>
 createLoopUnrollAndJamPass(int unrollJamFactor = -1);
 
+/// Creates a pass to pipeline explicit movement of data across levels of the
+/// memory hierarchy.
+std::unique_ptr<OperationPass<FuncOp>> createPipelineDataTransferPass();
+
 /// Creates a pass to vectorize loops, operations and data types using a
 /// target-independent, n-D super-vector abstraction.
 std::unique_ptr<OperationPass<FuncOp>>
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index 8ec4b84ea5ca0..d67bd33ef9d62 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -43,6 +43,138 @@ def AffineDataCopyGeneration : Pass<"affine-data-copy-generate", "FuncOp"> {
   ];
 }
 
+def AffineLoopFusion : Pass<"affine-loop-fusion", "FuncOp"> {
+  let summary = "Fuse affine loop nests";
+  let description = [{
+    This pass performs fusion of loop nests using a slicing-based approach. It
+    combines two fusion strategies: producer-consumer fusion and sibling fusion.
+    Producer-consumer fusion is aimed at fusing pairs of loops where the first
+    one writes to a memref that the second reads. Sibling fusion targets pairs
+    of loops that share no dependences between them but that load from the same
+    memref. The fused loop nests, when possible, are rewritten to access
+    significantly smaller local buffers instead of the original memref's, and
+    the latter are often either completely optimized away or contracted. This
+    transformation leads to enhanced locality and lower memory footprint through
+    the elimination or contraction of temporaries/intermediate memref's. These
+    benefits are sometimes achieved at the expense of redundant computation
+    through a cost model that evaluates available choices such as the depth at
+    which a source slice should be materialized in the designation slice.
+
+    Example 1: Producer-consumer fusion.
+    Input:
+    ```mlir
+    func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
+      %0 = memref.alloc() : memref<10xf32>
+      %1 = memref.alloc() : memref<10xf32>
+      %cst = arith.constant 0.000000e+00 : f32
+      affine.for %arg2 = 0 to 10 {
+        affine.store %cst, %0[%arg2] : memref<10xf32>
+        affine.store %cst, %1[%arg2] : memref<10xf32>
+      }
+      affine.for %arg2 = 0 to 10 {
+        %2 = affine.load %0[%arg2] : memref<10xf32>
+        %3 = arith.addf %2, %2 : f32
+        affine.store %3, %arg0[%arg2] : memref<10xf32>
+      }
+      affine.for %arg2 = 0 to 10 {
+        %2 = affine.load %1[%arg2] : memref<10xf32>
+        %3 = arith.mulf %2, %2 : f32
+        affine.store %3, %arg1[%arg2] : memref<10xf32>
+      }
+      return
+    }
+    ```
+    Output:
+    ```mlir
+    func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
+      %0 = memref.alloc() : memref<1xf32>
+      %1 = memref.alloc() : memref<1xf32>
+      %cst = arith.constant 0.000000e+00 : f32
+      affine.for %arg2 = 0 to 10 {
+        affine.store %cst, %0[0] : memref<1xf32>
+        affine.store %cst, %1[0] : memref<1xf32>
+        %2 = affine.load %1[0] : memref<1xf32>
+        %3 = arith.mulf %2, %2 : f32
+        affine.store %3, %arg1[%arg2] : memref<10xf32>
+        %4 = affine.load %0[0] : memref<1xf32>
+        %5 = arith.addf %4, %4 : f32
+        affine.store %5, %arg0[%arg2] : memref<10xf32>
+      }
+      return
+    }
+    ```
+
+    Example 2: Sibling fusion.
+    Input:
+    ```mlir
+    func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>,
+                         %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>,
+                         %arg4: memref<10x10xf32>) {
+      affine.for %arg5 = 0 to 3 {
+        affine.for %arg6 = 0 to 3 {
+          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
+          %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32>
+          %2 = arith.mulf %0, %1 : f32
+          affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32>
+        }
+      }
+      affine.for %arg5 = 0 to 3 {
+        affine.for %arg6 = 0 to 3 {
+          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
+          %1 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32>
+          %2 = arith.addf %0, %1 : f32
+          affine.store %2, %arg4[%arg5, %arg6] : memref<10x10xf32>
+        }
+      }
+      return
+    }
+    ```
+    Output:
+    ```mlir
+    func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>,
+                         %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>,
+                         %arg4: memref<10x10xf32>) {
+      affine.for %arg5 = 0 to 3 {
+        affine.for %arg6 = 0 to 3 {
+          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
+          %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32>
+          %2 = arith.mulf %0, %1 : f32
+          affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32>
+          %3 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
+          %4 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32>
+          %5 = arith.addf %3, %4 : f32
+          affine.store %5, %arg4[%arg5, %arg6] : memref<10x10xf32>
+        }
+      }
+      return
+    }
+    ```
+  }];
+  let constructor = "mlir::createLoopFusionPass()";
+  let options = [
+    Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double",
+           /*default=*/"0.30f", "Fractional increase in additional computation "
+                                "tolerated while fusing">,
+    Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned",
+           /*default=*/"0",
+           "Faster memory space number to promote fusion buffers to">,
+    Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t",
+           /*default=*/"0", "Threshold size (KiB) for promoting local buffers "
+                            "to fast memory space">,
+    Option<"maximalFusion", "fusion-maximal", "bool", /*default=*/"false",
+           "Enables maximal loop fusion">,
+    Option<"affineFusionMode", "mode", "enum FusionMode",
+           "mlir::FusionMode::Greedy", "fusion mode to attempt",
+           "llvm::cl::values(clEnumValN(mlir::FusionMode::Greedy,"
+           " \"greedy\", \"Perform greedy (both producer-consumer and sibling)  fusion\"), "
+           "clEnumValN( mlir::FusionMode::ProducerConsumer, "
+           "\"producer\", \"Perform only producer-consumer fusion\"), "
+           "clEnumValN( mlir::FusionMode::Sibling, "
+           "\"sibling\", \"Perform only sibling fusion\"))">,
+    ];
+  let dependentDialects = ["memref::MemRefDialect"];
+}
+
 def AffineLoopInvariantCodeMotion
     : Pass<"affine-loop-invariant-code-motion", "FuncOp"> {
   let summary = "Hoist loop invariant instructions outside of affine loops";
@@ -94,6 +226,75 @@ def AffineLoopUnrollAndJam : Pass<"affine-loop-unroll-jam", "FuncOp"> {
   ];
 }
 
+def AffinePipelineDataTransfer
+    : Pass<"affine-pipeline-data-transfer", "FuncOp"> {
+  let summary = "Pipeline non-blocking data transfers between explicitly "
+                "managed levels of the memory hierarchy";
+  let description = [{
+    This pass performs a transformation to overlap non-blocking DMA operations
+    in a loop with computations through double buffering. This is achieved by
+    advancing dma_start operations with respect to other operations.
+
+    Input
+
+    ```mlir
+    func @pipelinedatatransfer() {
+      %0 = memref.alloc() : memref<256xf32>
+      %1 = memref.alloc() : memref<32xf32, 1>
+      %2 = memref.alloc() : memref<1xf32>
+      %c0 = arith.constant 0 : index
+      %c128 = arith.constant 128 : index
+      affine.for %i0 = 0 to 8 {
+        affine.dma_start %0[%i0], %1[%i0], %2[%c0], %c128 : memref<256xf32>, memref<32xf32, 1>, memref<1xf32>
+        affine.dma_wait %2[%c0], %c128 : memref<1xf32>
+        %3 = affine.load %1[%i0] : memref<32xf32, 1>
+        %4 = "compute"(%3) : (f32) -> f32
+        affine.store %4, %1[%i0] : memref<32xf32, 1>
+      }
+      return
+    }
+    ```
+
+    Output
+
+    ```mlir
+    module {
+      func @pipelinedatatransfer() {
+        %c8 = arith.constant 8 : index
+        %c0 = arith.constant 0 : index
+        %0 = memref.alloc() : memref<256xf32>
+        %c0_0 = arith.constant 0 : index
+        %c128 = arith.constant 128 : index
+        %1 = memref.alloc() : memref<2x32xf32, 1>
+        %2 = memref.alloc() : memref<2x1xf32>
+        affine.dma_start %0[%c0], %1[%c0 mod 2, %c0], %2[%c0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
+        affine.for %arg0 = 1 to 8 {
+          affine.dma_start %0[%arg0], %1[%arg0 mod 2, %arg0], %2[%arg0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
+          %8 = affine.apply #map3(%arg0)
+          %9 = affine.apply #map4(%8)
+          %10 = affine.apply #map4(%8)
+          affine.dma_wait %2[%8 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
+          %11 = affine.load %1[%8 mod 2, %8] : memref<2x32xf32, 1>
+          %12 = "compute"(%11) : (f32) -> f32
+          affine.store %12, %1[%8 mod 2, %8] : memref<2x32xf32, 1>
+        }
+        %3 = affine.apply #map3(%c8)
+        %4 = affine.apply #map4(%3)
+        %5 = affine.apply #map4(%3)
+        affine.dma_wait %2[%3 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
+        %6 = affine.load %1[%3 mod 2, %3] : memref<2x32xf32, 1>
+        %7 = "compute"(%6) : (f32) -> f32
+        affine.store %7, %1[%3 mod 2, %3] : memref<2x32xf32, 1>
+        memref.dealloc %2 : memref<2x1xf32>
+        memref.dealloc %1 : memref<2x32xf32, 1>
+        return
+      }
+    }
+    ```
+  }];
+  let constructor = "mlir::createPipelineDataTransferPass()";
+}
+
 def AffineScalarReplacement : Pass<"affine-scalrep", "FuncOp"> {
   let summary = "Replace affine memref acceses by scalars by forwarding stores "
                 "to loads and eliminating redundant loads";
@@ -184,6 +385,13 @@ def AffineLoopNormalize : Pass<"affine-loop-normalize", "FuncOp"> {
   let constructor = "mlir::createAffineLoopNormalizePass()";
 }
 
+def LoopCoalescing : Pass<"loop-coalescing", "FuncOp"> {
+  let summary = "Coalesce nested loops with independent bounds into a single "
+                "loop";
+  let constructor = "mlir::createLoopCoalescingPass()";
+  let dependentDialects = ["arith::ArithmeticDialect"];
+}
+
 def SimplifyAffineStructures : Pass<"simplify-affine-structures", "FuncOp"> {
   let summary = "Simplify affine expressions in maps/sets and normalize "
                 "memrefs";
diff --git a/mlir/include/mlir/Dialect/Affine/Utils.h b/mlir/include/mlir/Dialect/Affine/Utils.h
index a0cb63d7fb9e5..bcbb7446df548 100644
--- a/mlir/include/mlir/Dialect/Affine/Utils.h
+++ b/mlir/include/mlir/Dialect/Affine/Utils.h
@@ -24,6 +24,10 @@ class DominanceInfo;
 class Operation;
 class PostDominanceInfo;
 
+namespace memref {
+class AllocOp;
+} // namespace memref
+
 struct LogicalResult;
 
 using ReductionLoopMap = DenseMap<Operation *, SmallVector<LoopReduction, 2>>;
@@ -168,6 +172,121 @@ void normalizeAffineFor(AffineForOp op);
 AffineExpr substWithMin(AffineExpr e, AffineExpr dim, AffineExpr min,
                         AffineExpr max, bool positivePath = true);
 
+/// Replaces all "dereferencing" uses of `oldMemRef` with `newMemRef` while
+/// optionally remapping the old memref's indices using the supplied affine map,
+/// `indexRemap`. The new memref could be of a different shape or rank.
+/// `extraIndices` provides any additional access indices to be added to the
+/// start.
+///
+/// `indexRemap` remaps indices of the old memref access to a new set of indices
+/// that are used to index the memref. Additional input operands to indexRemap
+/// can be optionally provided in `extraOperands`, and they occupy the start
+/// of its input list. `indexRemap`'s dimensional inputs are expected to
+/// correspond to memref's indices, and its symbolic inputs if any should be
+/// provided in `symbolOperands`.
+///
+/// `domOpFilter`, if non-null, restricts the replacement to only those
+/// operations that are dominated by the former; similarly, `postDomOpFilter`
+/// restricts replacement to only those operations that are postdominated by it.
+///
+/// 'allowNonDereferencingOps', if set, allows replacement of non-dereferencing
+/// uses of a memref without any requirement for access index rewrites as long
+/// as the user operation has the MemRefsNormalizable trait. The default value
+/// of this flag is false.
+///
+/// 'replaceInDeallocOp', if set, lets DeallocOp, a non-dereferencing user, to
+/// also be a candidate for replacement. The default value of this flag is
+/// false.
+///
+/// Returns true on success and false if the replacement is not possible,
+/// whenever a memref is used as an operand in a non-dereferencing context and
+/// 'allowNonDereferencingOps' is false, except for dealloc's on the memref
+/// which are left untouched. See comments at function definition for an
+/// example.
+//
+//  Ex: to replace load %A[%i, %j] with load %Abuf[%t mod 2, %ii - %i, %j]:
+//  The SSA value corresponding to '%t mod 2' should be in 'extraIndices', and
+//  index remap will perform (%i, %j) -> (%ii - %i, %j), i.e., indexRemap = (d0,
+//  d1, d2) -> (d0 - d1, d2), and %ii will be the extra operand. Without any
+//  extra operands, note that 'indexRemap' would just be applied to existing
+//  indices (%i, %j).
+//  TODO: allow extraIndices to be added at any position.
+LogicalResult replaceAllMemRefUsesWith(
+    Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices = {},
+    AffineMap indexRemap = AffineMap(), ArrayRef<Value> extraOperands = {},
+    ArrayRef<Value> symbolOperands = {}, Operation *domOpFilter = nullptr,
+    Operation *postDomOpFilter = nullptr, bool allowNonDereferencingOps = false,
+    bool replaceInDeallocOp = false);
+
+/// Performs the same replacement as the other version above but only for the
+/// dereferencing uses of `oldMemRef` in `op`, except in cases where
+/// 'allowNonDereferencingOps' is set to true where we replace the
+/// non-dereferencing uses as well.
+LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef,
+                                       Operation *op,
+                                       ArrayRef<Value> extraIndices = {},
+                                       AffineMap indexRemap = AffineMap(),
+                                       ArrayRef<Value> extraOperands = {},
+                                       ArrayRef<Value> symbolOperands = {},
+                                       bool allowNonDereferencingOps = false);
+
+/// Rewrites the memref defined by this alloc op to have an identity layout map
+/// and updates all its indexing uses. Returns failure if any of its uses
+/// escape (while leaving the IR in a valid state).
+LogicalResult normalizeMemRef(memref::AllocOp *op);
+
+/// Uses the old memref type map layout and computes the new memref type to have
+/// a new shape and a layout map, where the old layout map has been normalized
+/// to an identity layout map. It returns the old memref in case no
+/// normalization was needed or a failure occurs while transforming the old map
+/// layout to an identity layout map.
+MemRefType normalizeMemRefType(MemRefType memrefType, OpBuilder builder,
+                               unsigned numSymbolicOperands);
+
+/// Creates and inserts into 'builder' a new AffineApplyOp, with the number of
+/// its results equal to the number of operands, as a composition
+/// of all other AffineApplyOps reachable from input parameter 'operands'. If
+/// different operands were drawing results from multiple affine apply ops,
+/// these will also be collected into a single (multi-result) affine apply op.
+/// The final results of the composed AffineApplyOp are returned in output
+/// parameter 'results'. Returns the affine apply op created.
+Operation *createComposedAffineApplyOp(OpBuilder &builder, Location loc,
+                                       ArrayRef<Value> operands,
+                                       ArrayRef<Operation *> affineApplyOps,
+                                       SmallVectorImpl<Value> *results);
+
+/// Given an operation, inserts one or more single result affine apply
+/// operations, results of which are exclusively used by this operation.
+/// The operands of these newly created affine apply ops are
+/// guaranteed to be loop iterators or terminal symbols of a function.
+///
+/// Before
+///
+/// affine.for %i = 0 to #map(%N)
+///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
+///   send %A[%idx], ...
+///   %v = "compute"(%idx, ...)
+///
+/// After
+///
+/// affine.for %i = 0 to #map(%N)
+///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
+///   send %A[%idx], ...
+///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
+///   %v = "compute"(%idx_, ...)
+
+/// This allows the application of different transformations on send and
+/// compute (for eg. different shifts/delays)
+///
+/// Fills `sliceOps` with the list of affine.apply operations.
+/// In the following cases, `sliceOps` remains empty:
+///   1. If none of opInst's operands were the result of an affine.apply
+///      (i.e., there was no affine computation slice to create).
+///   2. If all the affine.apply op's supplying operands to this opInst did not
+///      have any uses other than those in this opInst.
+void createAffineComputationSlice(Operation *opInst,
+                                  SmallVectorImpl<AffineApplyOp> *sliceOps);
+
 } // namespace mlir
 
 #endif // MLIR_DIALECT_AFFINE_UTILS_H
diff --git a/mlir/include/mlir/Dialect/SCF/Passes.h b/mlir/include/mlir/Dialect/SCF/Passes.h
index e6123617f656e..bc9ed4f5b5c68 100644
--- a/mlir/include/mlir/Dialect/SCF/Passes.h
+++ b/mlir/include/mlir/Dialect/SCF/Passes.h
@@ -32,6 +32,10 @@ std::unique_ptr<Pass> createForLoopPeelingPass();
 /// inside of scf.for loops with known lower and upper bounds.
 std::unique_ptr<Pass> createSCFForLoopCanonicalizationPass();
 
+/// Creates a pass that transforms a single ParallelLoop over N induction
+/// variables into another ParallelLoop over less than N induction variables.
+std::unique_ptr<Pass> createParallelLoopCollapsingPass();
+
 /// Creates a loop fusion pass which fuses parallel loops.
 std::unique_ptr<Pass> createParallelLoopFusionPass();
 
diff --git a/mlir/include/mlir/Dialect/SCF/Passes.td b/mlir/include/mlir/Dialect/SCF/Passes.td
index 45b6f8c92ee72..9e151440792ef 100644
--- a/mlir/include/mlir/Dialect/SCF/Passes.td
+++ b/mlir/include/mlir/Dialect/SCF/Passes.td
@@ -52,6 +52,22 @@ def SCFParallelLoopFusion : Pass<"parallel-loop-fusion"> {
   let constructor = "mlir::createParallelLoopFusionPass()";
 }
 
+def SCFParallelLoopCollapsing : Pass<"parallel-loop-collapsing"> {
+  let summary = "Collapse parallel loops to use less induction variables";
+  let constructor = "mlir::createParallelLoopCollapsingPass()";
+  let options = [
+    ListOption<"clCollapsedIndices0", "collapsed-indices-0", "unsigned",
+               "Which loop indices to combine 0th loop index",
+               "llvm::cl::MiscFlags::CommaSeparated">,
+    ListOption<"clCollapsedIndices1", "collapsed-indices-1", "unsigned",
+               "Which loop indices to combine into the position 1 loop index",
+               "llvm::cl::MiscFlags::CommaSeparated">,
+    ListOption<"clCollapsedIndices2", "collapsed-indices-2", "unsigned",
+               "Which loop indices to combine into the position 2 loop index",
+               "llvm::cl::MiscFlags::CommaSeparated">,
+  ];
+}
+
 def SCFParallelLoopSpecialization
     : Pass<"parallel-loop-specialization", "FuncOp"> {
   let summary = "Specialize parallel loops for vectorization";
diff --git a/mlir/include/mlir/Dialect/SCF/Utils.h b/mlir/include/mlir/Dialect/SCF/Utils.h
index a062783d0bf60..38d80a0826dc9 100644
--- a/mlir/include/mlir/Dialect/SCF/Utils.h
+++ b/mlir/include/mlir/Dialect/SCF/Utils.h
@@ -98,5 +98,65 @@ getSCFMinMaxExpr(Value value, SmallVectorImpl<Value> &dims,
                  SmallVectorImpl<Value> &symbols,
                  llvm::function_ref<bool(Operation *)> loopFilter = nullptr);
 
+/// Replace a perfect nest of "for" loops with a single linearized loop. Assumes
+/// `loops` contains a list of perfectly nested loops with bounds and steps
+/// independent of any loop induction variable involved in the nest.
+void coalesceLoops(MutableArrayRef<scf::ForOp> loops);
+
+/// Take the ParallelLoop and for each set of dimension indices, combine them
+/// into a single dimension. combinedDimensions must contain each index into
+/// loops exactly once.
+void collapseParallelLoops(scf::ParallelOp loops,
+                           ArrayRef<std::vector<unsigned>> combinedDimensions);
+
+/// Promotes the loop body of a scf::ForOp to its containing block if the loop
+/// was known to have a single iteration.
+LogicalResult promoteIfSingleIteration(scf::ForOp forOp);
+
+/// Unrolls this for operation by the specified unroll factor. Returns failure
+/// if the loop cannot be unrolled either due to restrictions or due to invalid
+/// unroll factors. Requires positive loop bounds and step. If specified,
+/// annotates the Ops in each unrolled iteration by applying `annotateFn`.
+LogicalResult loopUnrollByFactor(
+    scf::ForOp forOp, uint64_t unrollFactor,
+    function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr);
+
+/// Tile a nest of standard for loops rooted at `rootForOp` by finding such
+/// parametric tile sizes that the outer loops have a fixed number of iterations
+/// as defined in `sizes`.
+using Loops = SmallVector<scf::ForOp, 8>;
+using TileLoops = std::pair<Loops, Loops>;
+TileLoops extractFixedOuterLoops(scf::ForOp rootFOrOp, ArrayRef<int64_t> sizes);
+
+/// Performs tiling fo imperfectly nested loops (with interchange) by
+/// strip-mining the `forOps` by `sizes` and sinking them, in their order of
+/// occurrence in `forOps`, under each of the `targets`.
+/// Returns the new AffineForOps, one per each of (`forOps`, `targets`) pair,
+/// nested immediately under each of `targets`.
+SmallVector<Loops, 8> tile(ArrayRef<scf::ForOp> forOps, ArrayRef<Value> sizes,
+                           ArrayRef<scf::ForOp> targets);
+
+/// Performs tiling (with interchange) by strip-mining the `forOps` by `sizes`
+/// and sinking them, in their order of occurrence in `forOps`, under `target`.
+/// Returns the new AffineForOps, one per `forOps`, nested immediately under
+/// `target`.
+Loops tile(ArrayRef<scf::ForOp> forOps, ArrayRef<Value> sizes,
+           scf::ForOp target);
+
+/// Tile a nest of scf::ForOp loops rooted at `rootForOp` with the given
+/// (parametric) sizes. Sizes are expected to be strictly positive values at
+/// runtime.  If more sizes than loops are provided, discard the trailing values
+/// in sizes.  Assumes the loop nest is permutable.
+/// Returns the newly created intra-tile loops.
+Loops tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef<Value> sizes);
+
+/// Get perfectly nested sequence of loops starting at root of loop nest
+/// (the first op being another AffineFor, and the second op - a terminator).
+/// A loop is perfectly nested iff: the first op in the loop's body is another
+/// AffineForOp, and the second op is a terminator).
+void getPerfectlyNestedLoops(SmallVectorImpl<scf::ForOp> &nestedLoops,
+                             scf::ForOp root);
+
 } // namespace mlir
+
 #endif // MLIR_DIALECT_SCF_UTILS_H_
diff --git a/mlir/include/mlir/Interfaces/LoopLikeInterface.h b/mlir/include/mlir/Interfaces/LoopLikeInterface.h
index 48399ad0d53a8..df4631690a3f9 100644
--- a/mlir/include/mlir/Interfaces/LoopLikeInterface.h
+++ b/mlir/include/mlir/Interfaces/LoopLikeInterface.h
@@ -15,7 +15,20 @@
 
 #include "mlir/IR/OpDefinition.h"
 
+//===----------------------------------------------------------------------===//
+// LoopLike Interfaces
+//===----------------------------------------------------------------------===//
+
 /// Include the generated interface declarations.
 #include "mlir/Interfaces/LoopLikeInterface.h.inc"
 
+//===----------------------------------------------------------------------===//
+// LoopLike Utilities
+//===----------------------------------------------------------------------===//
+
+namespace mlir {
+/// Move loop invariant code out of a `looplike` operation.
+LogicalResult moveLoopInvariantCode(LoopLikeOpInterface looplike);
+} // namespace mlir
+
 #endif // MLIR_INTERFACES_LOOPLIKEINTERFACE_H_
diff --git a/mlir/include/mlir/Transforms/ControlFlowSinkUtils.h b/mlir/include/mlir/Transforms/ControlFlowSinkUtils.h
new file mode 100644
index 0000000000000..f45d753564f45
--- /dev/null
+++ b/mlir/include/mlir/Transforms/ControlFlowSinkUtils.h
@@ -0,0 +1,70 @@
+//===- ControlFlowSinkUtils.h - ControlFlow Sink Utils ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_TRANSFORMS_CONTROLFLOWSINKUTILS_H
+#define MLIR_TRANSFORMS_CONTROLFLOWSINKUTILS_H
+
+#include "mlir/Support/LLVM.h"
+
+namespace mlir {
+
+class DominanceInfo;
+class Operation;
+class Region;
+class RegionBranchOpInterface;
+
+/// Given a list of regions, perform control flow sinking on them. For each
+/// region, control-flow sinking moves operations that dominate the region but
+/// whose only users are in the region into the regions so that they aren't
+/// executed on paths where their results are not needed.
+///
+/// TODO: For the moment, this is a *simple* control-flow sink, i.e., no
+/// duplicating of ops. It should be made to accept a cost model to determine
+/// whether duplicating a particular op is profitable.
+///
+/// Example:
+///
+/// ```mlir
+/// %0 = arith.addi %arg0, %arg1
+/// scf.if %cond {
+///   scf.yield %0
+/// } else {
+///   scf.yield %arg2
+/// }
+/// ```
+///
+/// After control-flow sink:
+///
+/// ```mlir
+/// scf.if %cond {
+///   %0 = arith.addi %arg0, %arg1
+///   scf.yield %0
+/// } else {
+///   scf.yield %arg2
+/// }
+/// ```
+///
+/// Users must supply a callback `shouldMoveIntoRegion` that determines whether
+/// the given operation that only has users in the given operation should be
+/// moved into that region.
+///
+/// Returns the number of operations sunk.
+size_t
+controlFlowSink(ArrayRef<Region *> regions, DominanceInfo &domInfo,
+                function_ref<bool(Operation *, Region *)> shouldMoveIntoRegion);
+
+/// Populates `regions` with regions of the provided region branch op that are
+/// executed at most once at that are reachable given the current operands of
+/// the op. These regions can be passed to `controlFlowSink` to perform sinking
+/// on the regions of the operation.
+void getSinglyExecutedRegionsToSink(RegionBranchOpInterface branch,
+                                    SmallVectorImpl<Region *> &regions);
+
+} // namespace mlir
+
+#endif // MLIR_TRANSFORMS_CONTROLFLOWSINKUTILS_H
diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h
index 4876d705afcb9..e2b5f14d34adf 100644
--- a/mlir/include/mlir/Transforms/Passes.h
+++ b/mlir/include/mlir/Transforms/Passes.h
@@ -22,13 +22,8 @@
 
 namespace mlir {
 
-class AffineForOp;
 class GreedyRewriteConfig;
 
-/// Fusion mode to attempt. The default mode `Greedy` does both
-/// producer-consumer and sibling fusion.
-enum FusionMode { Greedy, ProducerConsumer, Sibling };
-
 //===----------------------------------------------------------------------===//
 // Passes
 //===----------------------------------------------------------------------===//
@@ -56,31 +51,10 @@ std::unique_ptr<Pass> createControlFlowSinkPass();
 /// Creates a pass to perform common sub expression elimination.
 std::unique_ptr<Pass> createCSEPass();
 
-/// Creates a loop fusion pass which fuses loops according to type of fusion
-/// specified in `fusionMode`. Buffers of size less than or equal to
-/// `localBufSizeThreshold` are promoted to memory space `fastMemorySpace`.
-std::unique_ptr<OperationPass<FuncOp>>
-createLoopFusionPass(unsigned fastMemorySpace = 0,
-                     uint64_t localBufSizeThreshold = 0,
-                     bool maximalFusion = false,
-                     enum FusionMode fusionMode = FusionMode::Greedy);
-
 /// Creates a loop invariant code motion pass that hoists loop invariant
 /// instructions out of the loop.
 std::unique_ptr<Pass> createLoopInvariantCodeMotionPass();
 
-/// Creates a pass to pipeline explicit movement of data across levels of the
-/// memory hierarchy.
-std::unique_ptr<OperationPass<FuncOp>> createPipelineDataTransferPass();
-
-/// Creates a pass that transforms perfectly nested loops with independent
-/// bounds into a single loop.
-std::unique_ptr<OperationPass<FuncOp>> createLoopCoalescingPass();
-
-/// Creates a pass that transforms a single ParallelLoop over N induction
-/// variables into another ParallelLoop over less than N induction variables.
-std::unique_ptr<Pass> createParallelLoopCollapsingPass();
-
 /// Creates a pass to strip debug information from a function.
 std::unique_ptr<Pass> createStripDebugInfoPass();
 
diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td
index 44bf475af24c9..3c3b43d75e440 100644
--- a/mlir/include/mlir/Transforms/Passes.td
+++ b/mlir/include/mlir/Transforms/Passes.td
@@ -16,207 +16,6 @@
 include "mlir/Pass/PassBase.td"
 include "mlir/Rewrite/PassUtil.td"
 
-def AffineLoopFusion : Pass<"affine-loop-fusion", "FuncOp"> {
-  let summary = "Fuse affine loop nests";
-  let description = [{
-    This pass performs fusion of loop nests using a slicing-based approach. It
-    combines two fusion strategies: producer-consumer fusion and sibling fusion.
-    Producer-consumer fusion is aimed at fusing pairs of loops where the first
-    one writes to a memref that the second reads. Sibling fusion targets pairs
-    of loops that share no dependences between them but that load from the same
-    memref. The fused loop nests, when possible, are rewritten to access
-    significantly smaller local buffers instead of the original memref's, and
-    the latter are often either completely optimized away or contracted. This
-    transformation leads to enhanced locality and lower memory footprint through
-    the elimination or contraction of temporaries/intermediate memref's. These
-    benefits are sometimes achieved at the expense of redundant computation
-    through a cost model that evaluates available choices such as the depth at
-    which a source slice should be materialized in the designation slice.
-
-    Example 1: Producer-consumer fusion.
-    Input:
-    ```mlir
-    func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
-      %0 = memref.alloc() : memref<10xf32>
-      %1 = memref.alloc() : memref<10xf32>
-      %cst = arith.constant 0.000000e+00 : f32
-      affine.for %arg2 = 0 to 10 {
-        affine.store %cst, %0[%arg2] : memref<10xf32>
-        affine.store %cst, %1[%arg2] : memref<10xf32>
-      }
-      affine.for %arg2 = 0 to 10 {
-        %2 = affine.load %0[%arg2] : memref<10xf32>
-        %3 = arith.addf %2, %2 : f32
-        affine.store %3, %arg0[%arg2] : memref<10xf32>
-      }
-      affine.for %arg2 = 0 to 10 {
-        %2 = affine.load %1[%arg2] : memref<10xf32>
-        %3 = arith.mulf %2, %2 : f32
-        affine.store %3, %arg1[%arg2] : memref<10xf32>
-      }
-      return
-    }
-    ```
-    Output:
-    ```mlir
-    func @producer_consumer_fusion(%arg0: memref<10xf32>, %arg1: memref<10xf32>) {
-      %0 = memref.alloc() : memref<1xf32>
-      %1 = memref.alloc() : memref<1xf32>
-      %cst = arith.constant 0.000000e+00 : f32
-      affine.for %arg2 = 0 to 10 {
-        affine.store %cst, %0[0] : memref<1xf32>
-        affine.store %cst, %1[0] : memref<1xf32>
-        %2 = affine.load %1[0] : memref<1xf32>
-        %3 = arith.mulf %2, %2 : f32
-        affine.store %3, %arg1[%arg2] : memref<10xf32>
-        %4 = affine.load %0[0] : memref<1xf32>
-        %5 = arith.addf %4, %4 : f32
-        affine.store %5, %arg0[%arg2] : memref<10xf32>
-      }
-      return
-    }
-    ```
-
-    Example 2: Sibling fusion.
-    Input:
-    ```mlir
-    func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>,
-                         %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>,
-                         %arg4: memref<10x10xf32>) {
-      affine.for %arg5 = 0 to 3 {
-        affine.for %arg6 = 0 to 3 {
-          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
-          %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32>
-          %2 = arith.mulf %0, %1 : f32
-          affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32>
-        }
-      }
-      affine.for %arg5 = 0 to 3 {
-        affine.for %arg6 = 0 to 3 {
-          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
-          %1 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32>
-          %2 = arith.addf %0, %1 : f32
-          affine.store %2, %arg4[%arg5, %arg6] : memref<10x10xf32>
-        }
-      }
-      return
-    }
-    ```
-    Output:
-    ```mlir
-    func @sibling_fusion(%arg0: memref<10x10xf32>, %arg1: memref<10x10xf32>,
-                         %arg2: memref<10x10xf32>, %arg3: memref<10x10xf32>,
-                         %arg4: memref<10x10xf32>) {
-      affine.for %arg5 = 0 to 3 {
-        affine.for %arg6 = 0 to 3 {
-          %0 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
-          %1 = affine.load %arg1[%arg5, %arg6] : memref<10x10xf32>
-          %2 = arith.mulf %0, %1 : f32
-          affine.store %2, %arg3[%arg5, %arg6] : memref<10x10xf32>
-          %3 = affine.load %arg0[%arg5, %arg6] : memref<10x10xf32>
-          %4 = affine.load %arg2[%arg5, %arg6] : memref<10x10xf32>
-          %5 = arith.addf %3, %4 : f32
-          affine.store %5, %arg4[%arg5, %arg6] : memref<10x10xf32>
-        }
-      }
-      return
-    }
-    ```
-  }];
-  let constructor = "mlir::createLoopFusionPass()";
-  let options = [
-    Option<"computeToleranceThreshold", "fusion-compute-tolerance", "double",
-           /*default=*/"0.30f", "Fractional increase in additional computation "
-                                "tolerated while fusing">,
-    Option<"fastMemorySpace", "fusion-fast-mem-space", "unsigned",
-           /*default=*/"0",
-           "Faster memory space number to promote fusion buffers to">,
-    Option<"localBufSizeThreshold", "fusion-local-buf-threshold", "uint64_t",
-           /*default=*/"0", "Threshold size (KiB) for promoting local buffers "
-                            "to fast memory space">,
-    Option<"maximalFusion", "fusion-maximal", "bool", /*default=*/"false",
-           "Enables maximal loop fusion">,
-    Option<"affineFusionMode", "mode", "enum FusionMode",
-           "mlir::FusionMode::Greedy", "fusion mode to attempt",
-           "llvm::cl::values(clEnumValN(mlir::FusionMode::Greedy,"
-           " \"greedy\", \"Perform greedy (both producer-consumer and sibling)  fusion\"), "
-           "clEnumValN( mlir::FusionMode::ProducerConsumer, "
-           "\"producer\", \"Perform only producer-consumer fusion\"), "
-           "clEnumValN( mlir::FusionMode::Sibling, "
-           "\"sibling\", \"Perform only sibling fusion\"))">,
-    ];
-  let dependentDialects = ["memref::MemRefDialect"];
-}
-
-def AffinePipelineDataTransfer
-    : Pass<"affine-pipeline-data-transfer", "FuncOp"> {
-  let summary = "Pipeline non-blocking data transfers between explicitly "
-                "managed levels of the memory hierarchy";
-  let description = [{
-    This pass performs a transformation to overlap non-blocking DMA operations
-    in a loop with computations through double buffering. This is achieved by
-    advancing dma_start operations with respect to other operations.
-
-    Input
-
-    ```mlir
-    func @pipelinedatatransfer() {
-      %0 = memref.alloc() : memref<256xf32>
-      %1 = memref.alloc() : memref<32xf32, 1>
-      %2 = memref.alloc() : memref<1xf32>
-      %c0 = arith.constant 0 : index
-      %c128 = arith.constant 128 : index
-      affine.for %i0 = 0 to 8 {
-        affine.dma_start %0[%i0], %1[%i0], %2[%c0], %c128 : memref<256xf32>, memref<32xf32, 1>, memref<1xf32>
-        affine.dma_wait %2[%c0], %c128 : memref<1xf32>
-        %3 = affine.load %1[%i0] : memref<32xf32, 1>
-        %4 = "compute"(%3) : (f32) -> f32
-        affine.store %4, %1[%i0] : memref<32xf32, 1>
-      }
-      return
-    }
-    ```
-
-    Output
-
-    ```mlir
-    module {
-      func @pipelinedatatransfer() {
-        %c8 = arith.constant 8 : index
-        %c0 = arith.constant 0 : index
-        %0 = memref.alloc() : memref<256xf32>
-        %c0_0 = arith.constant 0 : index
-        %c128 = arith.constant 128 : index
-        %1 = memref.alloc() : memref<2x32xf32, 1>
-        %2 = memref.alloc() : memref<2x1xf32>
-        affine.dma_start %0[%c0], %1[%c0 mod 2, %c0], %2[%c0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
-        affine.for %arg0 = 1 to 8 {
-          affine.dma_start %0[%arg0], %1[%arg0 mod 2, %arg0], %2[%arg0 mod 2, symbol(%c0_0)], %c128 : memref<256xf32>, memref<2x32xf32, 1>, memref<2x1xf32>
-          %8 = affine.apply #map3(%arg0)
-          %9 = affine.apply #map4(%8)
-          %10 = affine.apply #map4(%8)
-          affine.dma_wait %2[%8 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
-          %11 = affine.load %1[%8 mod 2, %8] : memref<2x32xf32, 1>
-          %12 = "compute"(%11) : (f32) -> f32
-          affine.store %12, %1[%8 mod 2, %8] : memref<2x32xf32, 1>
-        }
-        %3 = affine.apply #map3(%c8)
-        %4 = affine.apply #map4(%3)
-        %5 = affine.apply #map4(%3)
-        affine.dma_wait %2[%3 mod 2, symbol(%c0_0)], %c128 : memref<2x1xf32>
-        %6 = affine.load %1[%3 mod 2, %3] : memref<2x32xf32, 1>
-        %7 = "compute"(%6) : (f32) -> f32
-        affine.store %7, %1[%3 mod 2, %3] : memref<2x32xf32, 1>
-        memref.dealloc %2 : memref<2x1xf32>
-        memref.dealloc %1 : memref<2x32xf32, 1>
-        return
-      }
-    }
-    ```
-  }];
-  let constructor = "mlir::createPipelineDataTransferPass()";
-}
-
 def Canonicalizer : Pass<"canonicalize"> {
   let summary = "Canonicalize operations";
   let description = [{
@@ -339,34 +138,11 @@ def LocationSnapshot : Pass<"snapshot-op-locations"> {
   ];
 }
 
-def LoopCoalescing : Pass<"loop-coalescing", "FuncOp"> {
-  let summary = "Coalesce nested loops with independent bounds into a single "
-                "loop";
-  let constructor = "mlir::createLoopCoalescingPass()";
-  let dependentDialects = ["arith::ArithmeticDialect"];
-}
-
 def LoopInvariantCodeMotion : Pass<"loop-invariant-code-motion"> {
   let summary = "Hoist loop invariant instructions outside of the loop";
   let constructor = "mlir::createLoopInvariantCodeMotionPass()";
 }
 
-def ParallelLoopCollapsing : Pass<"parallel-loop-collapsing"> {
-  let summary = "Collapse parallel loops to use less induction variables";
-  let constructor = "mlir::createParallelLoopCollapsingPass()";
-  let options = [
-    ListOption<"clCollapsedIndices0", "collapsed-indices-0", "unsigned",
-               "Which loop indices to combine 0th loop index",
-               "llvm::cl::MiscFlags::CommaSeparated">,
-    ListOption<"clCollapsedIndices1", "collapsed-indices-1", "unsigned",
-               "Which loop indices to combine into the position 1 loop index",
-               "llvm::cl::MiscFlags::CommaSeparated">,
-    ListOption<"clCollapsedIndices2", "collapsed-indices-2", "unsigned",
-               "Which loop indices to combine into the position 2 loop index",
-               "llvm::cl::MiscFlags::CommaSeparated">,
-  ];
-}
-
 def PrintOpStats : Pass<"print-op-stats"> {
   let summary = "Print statistics of operations";
   let constructor = "mlir::createPrintOpStatsPass()";
diff --git a/mlir/include/mlir/Transforms/Utils.h b/mlir/include/mlir/Transforms/Utils.h
deleted file mode 100644
index 5efbb19b08d25..0000000000000
--- a/mlir/include/mlir/Transforms/Utils.h
+++ /dev/null
@@ -1,200 +0,0 @@
-//===- Utils.h - General transformation utilities ---------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This header file defines prototypes for various transformation utilities for
-// memref's and non-loop IR structures. These are not passes by themselves but
-// are used either by passes, optimization sequences, or in turn by other
-// transformation utilities.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_TRANSFORMS_UTILS_H
-#define MLIR_TRANSFORMS_UTILS_H
-
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/IR/AffineMap.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-
-namespace mlir {
-
-class AffineApplyOp;
-class AffineForOp;
-class DominanceInfo;
-class Location;
-class OpBuilder;
-
-namespace memref {
-class AllocOp;
-} // namespace memref
-
-/// Replaces all "dereferencing" uses of `oldMemRef` with `newMemRef` while
-/// optionally remapping the old memref's indices using the supplied affine map,
-/// `indexRemap`. The new memref could be of a different shape or rank.
-/// `extraIndices` provides any additional access indices to be added to the
-/// start.
-///
-/// `indexRemap` remaps indices of the old memref access to a new set of indices
-/// that are used to index the memref. Additional input operands to indexRemap
-/// can be optionally provided in `extraOperands`, and they occupy the start
-/// of its input list. `indexRemap`'s dimensional inputs are expected to
-/// correspond to memref's indices, and its symbolic inputs if any should be
-/// provided in `symbolOperands`.
-///
-/// `domOpFilter`, if non-null, restricts the replacement to only those
-/// operations that are dominated by the former; similarly, `postDomOpFilter`
-/// restricts replacement to only those operations that are postdominated by it.
-///
-/// 'allowNonDereferencingOps', if set, allows replacement of non-dereferencing
-/// uses of a memref without any requirement for access index rewrites as long
-/// as the user operation has the MemRefsNormalizable trait. The default value
-/// of this flag is false.
-///
-/// 'replaceInDeallocOp', if set, lets DeallocOp, a non-dereferencing user, to
-/// also be a candidate for replacement. The default value of this flag is
-/// false.
-///
-/// Returns true on success and false if the replacement is not possible,
-/// whenever a memref is used as an operand in a non-dereferencing context and
-/// 'allowNonDereferencingOps' is false, except for dealloc's on the memref
-/// which are left untouched. See comments at function definition for an
-/// example.
-//
-//  Ex: to replace load %A[%i, %j] with load %Abuf[%t mod 2, %ii - %i, %j]:
-//  The SSA value corresponding to '%t mod 2' should be in 'extraIndices', and
-//  index remap will perform (%i, %j) -> (%ii - %i, %j), i.e., indexRemap = (d0,
-//  d1, d2) -> (d0 - d1, d2), and %ii will be the extra operand. Without any
-//  extra operands, note that 'indexRemap' would just be applied to existing
-//  indices (%i, %j).
-//  TODO: allow extraIndices to be added at any position.
-LogicalResult replaceAllMemRefUsesWith(
-    Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices = {},
-    AffineMap indexRemap = AffineMap(), ArrayRef<Value> extraOperands = {},
-    ArrayRef<Value> symbolOperands = {}, Operation *domOpFilter = nullptr,
-    Operation *postDomOpFilter = nullptr, bool allowNonDereferencingOps = false,
-    bool replaceInDeallocOp = false);
-
-/// Performs the same replacement as the other version above but only for the
-/// dereferencing uses of `oldMemRef` in `op`, except in cases where
-/// 'allowNonDereferencingOps' is set to true where we replace the
-/// non-dereferencing uses as well.
-LogicalResult replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef,
-                                       Operation *op,
-                                       ArrayRef<Value> extraIndices = {},
-                                       AffineMap indexRemap = AffineMap(),
-                                       ArrayRef<Value> extraOperands = {},
-                                       ArrayRef<Value> symbolOperands = {},
-                                       bool allowNonDereferencingOps = false);
-
-/// Rewrites the memref defined by this alloc op to have an identity layout map
-/// and updates all its indexing uses. Returns failure if any of its uses
-/// escape (while leaving the IR in a valid state).
-LogicalResult normalizeMemRef(memref::AllocOp *op);
-
-/// Uses the old memref type map layout and computes the new memref type to have
-/// a new shape and a layout map, where the old layout map has been normalized
-/// to an identity layout map. It returns the old memref in case no
-/// normalization was needed or a failure occurs while transforming the old map
-/// layout to an identity layout map.
-MemRefType normalizeMemRefType(MemRefType memrefType, OpBuilder builder,
-                               unsigned numSymbolicOperands);
-
-/// Creates and inserts into 'builder' a new AffineApplyOp, with the number of
-/// its results equal to the number of operands, as a composition
-/// of all other AffineApplyOps reachable from input parameter 'operands'. If
-/// different operands were drawing results from multiple affine apply ops,
-/// these will also be collected into a single (multi-result) affine apply op.
-/// The final results of the composed AffineApplyOp are returned in output
-/// parameter 'results'. Returns the affine apply op created.
-Operation *createComposedAffineApplyOp(OpBuilder &builder, Location loc,
-                                       ArrayRef<Value> operands,
-                                       ArrayRef<Operation *> affineApplyOps,
-                                       SmallVectorImpl<Value> *results);
-
-/// Given an operation, inserts one or more single result affine apply
-/// operations, results of which are exclusively used by this operation.
-/// The operands of these newly created affine apply ops are
-/// guaranteed to be loop iterators or terminal symbols of a function.
-///
-/// Before
-///
-/// affine.for %i = 0 to #map(%N)
-///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
-///   send %A[%idx], ...
-///   %v = "compute"(%idx, ...)
-///
-/// After
-///
-/// affine.for %i = 0 to #map(%N)
-///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
-///   send %A[%idx], ...
-///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
-///   %v = "compute"(%idx_, ...)
-
-/// This allows the application of different transformations on send and
-/// compute (for eg. different shifts/delays)
-///
-/// Fills `sliceOps` with the list of affine.apply operations.
-/// In the following cases, `sliceOps` remains empty:
-///   1. If none of opInst's operands were the result of an affine.apply
-///      (i.e., there was no affine computation slice to create).
-///   2. If all the affine.apply op's supplying operands to this opInst did not
-///      have any uses other than those in this opInst.
-void createAffineComputationSlice(Operation *opInst,
-                                  SmallVectorImpl<AffineApplyOp> *sliceOps);
-
-/// Given a list of regions, perform control flow sinking on them. For each
-/// region, control-flow sinking moves operations that dominate the region but
-/// whose only users are in the region into the regions so that they aren't
-/// executed on paths where their results are not needed.
-///
-/// TODO: For the moment, this is a *simple* control-flow sink, i.e., no
-/// duplicating of ops. It should be made to accept a cost model to determine
-/// whether duplicating a particular op is profitable.
-///
-/// Example:
-///
-/// ```mlir
-/// %0 = arith.addi %arg0, %arg1
-/// scf.if %cond {
-///   scf.yield %0
-/// } else {
-///   scf.yield %arg2
-/// }
-/// ```
-///
-/// After control-flow sink:
-///
-/// ```mlir
-/// scf.if %cond {
-///   %0 = arith.addi %arg0, %arg1
-///   scf.yield %0
-/// } else {
-///   scf.yield %arg2
-/// }
-/// ```
-///
-/// Users must supply a callback `shouldMoveIntoRegion` that determines whether
-/// the given operation that only has users in the given operation should be
-/// moved into that region.
-///
-/// Returns the number of operations sunk.
-size_t
-controlFlowSink(ArrayRef<Region *> regions, DominanceInfo &domInfo,
-                function_ref<bool(Operation *, Region *)> shouldMoveIntoRegion);
-
-/// Populates `regions` with regions of the provided region branch op that are
-/// executed at most once at that are reachable given the current operands of
-/// the op. These regions can be passed to `controlFlowSink` to perform sinking
-/// on the regions of the operation.
-void getSinglyExecutedRegionsToSink(RegionBranchOpInterface branch,
-                                    SmallVectorImpl<Region *> &regions);
-
-} // namespace mlir
-
-#endif // MLIR_TRANSFORMS_UTILS_H
diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
index d2faff9d32389..99a8b83882970 100644
--- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
+++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
@@ -27,7 +27,6 @@
 #include "mlir/IR/Builders.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Passes.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/ADT/Sequence.h"
diff --git a/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp b/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp
index 31e4cca72e90b..27808a0a72c61 100644
--- a/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp
+++ b/mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp
@@ -23,7 +23,6 @@
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 
 using namespace mlir;
 using namespace mlir::scf;
diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
index f914bad4c6eaf..97a3be46b7ee8 100644
--- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
+++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
@@ -33,7 +33,6 @@
 #include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
index 98968eb20d0a7..551c3ab5b2ccf 100644
--- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
+++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
@@ -16,7 +16,6 @@
 
 #include "../PassDetail.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
index f902909a7c3b2..587a0d2d67fa4 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
@@ -22,12 +22,12 @@
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
index 19c16cf9ce2a7..bd331525b01c0 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineLoopInvariantCodeMotion.cpp
@@ -17,13 +17,13 @@
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
index 2fed4921c2a4b..61bae249bff98 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
@@ -18,10 +18,10 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/Dialect/Affine/Passes.h.inc"
 #include "mlir/Dialect/Affine/Utils.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/Support/Debug.h"
 #include <deque>
 
diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
index 784c5d04f1a44..a99bb5789dac4 100644
--- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
@@ -4,9 +4,12 @@ add_mlir_dialect_library(MLIRAffineTransforms
   AffineLoopNormalize.cpp
   AffineParallelize.cpp
   AffineScalarReplacement.cpp
+  LoopCoalescing.cpp
+  LoopFusion.cpp
   LoopTiling.cpp
   LoopUnroll.cpp
   LoopUnrollAndJam.cpp
+  PipelineDataTransfer.cpp
   SuperVectorize.cpp
   SimplifyAffineStructures.cpp
 
diff --git a/mlir/lib/Transforms/LoopCoalescing.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp
similarity index 97%
rename from mlir/lib/Transforms/LoopCoalescing.cpp
rename to mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp
index 4b6780398103a..75f57c0d947da 100644
--- a/mlir/lib/Transforms/LoopCoalescing.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp
@@ -8,9 +8,10 @@
 
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/Transforms/LoopUtils.h"
+#include "mlir/Dialect/SCF/Utils.h"
 #include "mlir/Transforms/Passes.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/Support/Debug.h"
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
similarity index 99%
rename from mlir/lib/Transforms/LoopFusion.cpp
rename to mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
index 3ab98e0ee20cb..588dc63fbbff0 100644
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp
@@ -16,14 +16,14 @@
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopFusionUtils.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Transforms/LoopFusionUtils.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SetVector.h"
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
index f15571660b4d8..a66fcdae2ecd4 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp
@@ -17,11 +17,11 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 using namespace mlir;
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
index fb7cd11ebe6da..702319f8ffd38 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp
@@ -12,11 +12,11 @@
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
index b763017d0837f..273ffdb579d4b 100644
--- a/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp
@@ -37,12 +37,12 @@
 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/CommandLine.h"
 
diff --git a/mlir/lib/Dialect/Affine/Transforms/PassDetail.h b/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
index c3d55a60e5634..a7262d76c88d3 100644
--- a/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/Affine/Transforms/PassDetail.h
@@ -9,6 +9,7 @@
 #ifndef DIALECT_AFFINE_TRANSFORMS_PASSDETAIL_H_
 #define DIALECT_AFFINE_TRANSFORMS_PASSDETAIL_H_
 
+#include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
@@ -16,6 +17,10 @@ namespace mlir {
 template <typename ConcreteDialect>
 void registerDialect(DialectRegistry &registry);
 
+namespace arith {
+class ArithmeticDialect;
+} // namespace arith
+
 namespace linalg {
 class LinalgDialect;
 } // namespace linalg
diff --git a/mlir/lib/Transforms/PipelineDataTransfer.cpp b/mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp
similarity index 99%
rename from mlir/lib/Transforms/PipelineDataTransfer.cpp
rename to mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp
index 87ac14e770249..7429ecef39f38 100644
--- a/mlir/lib/Transforms/PipelineDataTransfer.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp
@@ -11,17 +11,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
-#include "mlir/Transforms/Passes.h"
-
 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
 #include "mlir/IR/Builders.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Utils.h"
+#include "mlir/Transforms/Passes.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp b/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp
index 08ccac678113e..d90b02c6f14c4 100644
--- a/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp
@@ -14,9 +14,9 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/IR/IntegerSet.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/Utils.h"
 
 #define DEBUG_TYPE "simplify-affine-structure"
 
diff --git a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
index 3bc37cfa3ba23..4393e2971a031 100644
--- a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
@@ -1,4 +1,6 @@
 add_mlir_dialect_library(MLIRAffineUtils
+  LoopFusionUtils.cpp
+  LoopUtils.cpp
   Utils.cpp
 
   ADDITIONAL_HEADER_DIRS
@@ -7,5 +9,6 @@ add_mlir_dialect_library(MLIRAffineUtils
   LINK_LIBS PUBLIC
   MLIRAffine
   MLIRAnalysis
+  MLIRMemRef
   MLIRTransformUtils
   )
diff --git a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
similarity index 99%
rename from mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
rename to mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
index 54adec2c09b1a..dcaa88a6dca19 100644
--- a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp
@@ -10,21 +10,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Transforms/LoopFusionUtils.h"
-
+#include "mlir/Dialect/Affine/LoopFusionUtils.h"
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/IR/AffineExpr.h"
 #include "mlir/IR/AffineMap.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Operation.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Debug.h"
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
similarity index 81%
rename from mlir/lib/Transforms/Utils/LoopUtils.cpp
rename to mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
index b9352ac2c2b75..f0e784aeb81dc 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp
@@ -10,14 +10,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Transforms/LoopUtils.h"
-
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/IR/BlockAndValueMapping.h"
@@ -25,7 +25,6 @@
 #include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 #include "mlir/Transforms/RegionUtils.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Debug.h"
@@ -108,42 +107,9 @@ getCleanupLoopLowerBound(AffineForOp forOp, unsigned unrollFactor,
     lb.erase();
 }
 
-// Build the IR that performs ceil division of a positive value by a constant:
-//    ceildiv(a, B) = divis(a + (B-1), B)
-// where divis is rounding-to-zero division.
-static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
-                             int64_t divisor) {
-  assert(divisor > 0 && "expected positive divisor");
-  assert(dividend.getType().isIndex() && "expected index-typed value");
-
-  Value divisorMinusOneCst =
-      builder.create<arith::ConstantIndexOp>(loc, divisor - 1);
-  Value divisorCst = builder.create<arith::ConstantIndexOp>(loc, divisor);
-  Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOneCst);
-  return builder.create<arith::DivSIOp>(loc, sum, divisorCst);
-}
-
-// Build the IR that performs ceil division of a positive value by another
-// positive value:
-//    ceildiv(a, b) = divis(a + (b - 1), b)
-// where divis is rounding-to-zero division.
-static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
-                             Value divisor) {
-  assert(dividend.getType().isIndex() && "expected index-typed value");
-
-  Value cstOne = builder.create<arith::ConstantIndexOp>(loc, 1);
-  Value divisorMinusOne = builder.create<arith::SubIOp>(loc, divisor, cstOne);
-  Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOne);
-  return builder.create<arith::DivSIOp>(loc, sum, divisor);
-}
-
 /// Helper to replace uses of loop carried values (iter_args) and loop
-/// yield values while promoting single iteration affine.for and scf.for ops.
-template <typename AffineOrSCFForOp>
-static void replaceIterArgsAndYieldResults(AffineOrSCFForOp forOp) {
-  static_assert(
-      llvm::is_one_of<AffineOrSCFForOp, AffineForOp, scf::ForOp>::value,
-      "only for affine.for and scf.for ops");
+/// yield values while promoting single iteration affine.for ops.
+static void replaceIterArgsAndYieldResults(AffineForOp forOp) {
   // Replace uses of iter arguments with iter operands (initial values).
   auto iterOperands = forOp.getIterOperands();
   auto iterArgs = forOp.getRegionIterArgs();
@@ -203,46 +169,6 @@ LogicalResult mlir::promoteIfSingleIteration(AffineForOp forOp) {
   return success();
 }
 
-/// Promotes the loop body of a forOp to its containing block if the forOp
-/// it can be determined that the loop has a single iteration.
-LogicalResult mlir::promoteIfSingleIteration(scf::ForOp forOp) {
-  auto lbCstOp = forOp.getLowerBound().getDefiningOp<arith::ConstantIndexOp>();
-  auto ubCstOp = forOp.getUpperBound().getDefiningOp<arith::ConstantIndexOp>();
-  auto stepCstOp = forOp.getStep().getDefiningOp<arith::ConstantIndexOp>();
-  if (!lbCstOp || !ubCstOp || !stepCstOp || lbCstOp.value() < 0 ||
-      ubCstOp.value() < 0 || stepCstOp.value() < 0)
-    return failure();
-  int64_t tripCount =
-      mlir::ceilDiv(ubCstOp.value() - lbCstOp.value(), stepCstOp.value());
-  if (tripCount != 1)
-    return failure();
-  auto iv = forOp.getInductionVar();
-  iv.replaceAllUsesWith(lbCstOp);
-
-  replaceIterArgsAndYieldResults(forOp);
-
-  // Move the loop body operations, except for its terminator, to the loop's
-  // containing block.
-  auto *parentBlock = forOp->getBlock();
-  forOp.getBody()->getTerminator()->erase();
-  parentBlock->getOperations().splice(Block::iterator(forOp),
-                                      forOp.getBody()->getOperations());
-  forOp.erase();
-  return success();
-}
-
-/// Promotes all single iteration 'for' ops in `f`, i.e., moves
-/// their body into the containing Block.
-void mlir::promoteSingleIterationLoops(FuncOp f) {
-  // Gathers all innermost loops through a post order pruned walk.
-  f.walk([](Operation *op) {
-    if (auto forOp = dyn_cast<AffineForOp>(op))
-      (void)promoteIfSingleIteration(forOp);
-    else if (auto forOp = dyn_cast<scf::ForOp>(op))
-      (void)promoteIfSingleIteration(forOp);
-  });
-}
-
 /// Generates an affine.for op with the specified lower and upper bounds
 /// while generating the right IV remappings to realize shifts for operations in
 /// its body. The operations that go into the loop body are specified in
@@ -1011,38 +937,22 @@ mlir::tilePerfectlyNestedParametric(MutableArrayRef<AffineForOp> input,
   return success();
 }
 
-/// Collect perfectly nested loops starting from `rootForOps`.  Loops are
-/// perfectly nested if each loop is the first and only non-terminator operation
-/// in the parent loop.  Collect at most `maxLoops` loops and append them to
-/// `forOps`.
-template <typename T>
-static void getPerfectlyNestedLoopsImpl(
-    SmallVectorImpl<T> &forOps, T rootForOp,
-    unsigned maxLoops = std::numeric_limits<unsigned>::max()) {
-  for (unsigned i = 0; i < maxLoops; ++i) {
-    forOps.push_back(rootForOp);
-    Block &body = rootForOp.getRegion().front();
-    if (body.begin() != std::prev(body.end(), 2))
-      return;
-
-    rootForOp = dyn_cast<T>(&body.front());
-    if (!rootForOp)
-      return;
-  }
-}
-
 /// Get perfectly nested sequence of loops starting at root of loop nest
 /// (the first op being another AffineFor, and the second op - a terminator).
 /// A loop is perfectly nested iff: the first op in the loop's body is another
 /// AffineForOp, and the second op is a terminator).
 void mlir::getPerfectlyNestedLoops(SmallVectorImpl<AffineForOp> &nestedLoops,
                                    AffineForOp root) {
-  getPerfectlyNestedLoopsImpl(nestedLoops, root);
-}
+  for (unsigned i = 0; i < std::numeric_limits<unsigned>::max(); ++i) {
+    nestedLoops.push_back(root);
+    Block &body = root.getRegion().front();
+    if (body.begin() != std::prev(body.end(), 2))
+      return;
 
-void mlir::getPerfectlyNestedLoops(SmallVectorImpl<scf::ForOp> &nestedLoops,
-                                   scf::ForOp root) {
-  getPerfectlyNestedLoopsImpl(nestedLoops, root);
+    root = dyn_cast<AffineForOp>(&body.front());
+    if (!root)
+      return;
+  }
 }
 
 /// Identify valid and profitable bands of loops to tile. This is currently just
@@ -1084,10 +994,10 @@ LogicalResult mlir::loopUnrollUpToFactor(AffineForOp forOp,
   return loopUnrollByFactor(forOp, unrollFactor);
 }
 
-/// Generates unrolled copies of AffineForOp or scf::ForOp 'loopBodyBlock', with
-/// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap
-/// 'forOpIV' for each unrolled body. If specified, annotates the Ops in each
-/// unrolled iteration using annotateFn.
+/// Generates unrolled copies of AffineForOp 'loopBodyBlock', with associated
+/// 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap 'forOpIV' for each
+/// unrolled body. If specified, annotates the Ops in each unrolled iteration
+/// using annotateFn.
 static void generateUnrolledLoop(
     Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
     function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
@@ -1237,127 +1147,6 @@ LogicalResult mlir::loopUnrollByFactor(
   return success();
 }
 
-/// Unrolls 'forOp' by 'unrollFactor', returns success if the loop is unrolled.
-LogicalResult mlir::loopUnrollByFactor(
-    scf::ForOp forOp, uint64_t unrollFactor,
-    function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn) {
-  assert(unrollFactor > 0 && "expected positive unroll factor");
-
-  // Return if the loop body is empty.
-  if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
-    return success();
-
-  // Compute tripCount = ceilDiv((upperBound - lowerBound), step) and populate
-  // 'upperBoundUnrolled' and 'stepUnrolled' for static and dynamic cases.
-  OpBuilder boundsBuilder(forOp);
-  auto loc = forOp.getLoc();
-  auto step = forOp.getStep();
-  Value upperBoundUnrolled;
-  Value stepUnrolled;
-  bool generateEpilogueLoop = true;
-
-  auto lbCstOp = forOp.getLowerBound().getDefiningOp<arith::ConstantIndexOp>();
-  auto ubCstOp = forOp.getUpperBound().getDefiningOp<arith::ConstantIndexOp>();
-  auto stepCstOp = forOp.getStep().getDefiningOp<arith::ConstantIndexOp>();
-  if (lbCstOp && ubCstOp && stepCstOp) {
-    // Constant loop bounds computation.
-    int64_t lbCst = lbCstOp.value();
-    int64_t ubCst = ubCstOp.value();
-    int64_t stepCst = stepCstOp.value();
-    assert(lbCst >= 0 && ubCst >= 0 && stepCst >= 0 &&
-           "expected positive loop bounds and step");
-    int64_t tripCount = mlir::ceilDiv(ubCst - lbCst, stepCst);
-
-    if (unrollFactor == 1) {
-      if (tripCount == 1 && failed(promoteIfSingleIteration(forOp)))
-        return failure();
-      return success();
-    }
-
-    int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor);
-    int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
-    assert(upperBoundUnrolledCst <= ubCst);
-    int64_t stepUnrolledCst = stepCst * unrollFactor;
-
-    // Create constant for 'upperBoundUnrolled' and set epilogue loop flag.
-    generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
-    if (generateEpilogueLoop)
-      upperBoundUnrolled = boundsBuilder.create<arith::ConstantIndexOp>(
-          loc, upperBoundUnrolledCst);
-    else
-      upperBoundUnrolled = ubCstOp;
-
-    // Create constant for 'stepUnrolled'.
-    stepUnrolled = stepCst == stepUnrolledCst
-                       ? step
-                       : boundsBuilder.create<arith::ConstantIndexOp>(
-                             loc, stepUnrolledCst);
-  } else {
-    // Dynamic loop bounds computation.
-    // TODO: Add dynamic asserts for negative lb/ub/step, or
-    // consider using ceilDiv from AffineApplyExpander.
-    auto lowerBound = forOp.getLowerBound();
-    auto upperBound = forOp.getUpperBound();
-    Value diff =
-        boundsBuilder.create<arith::SubIOp>(loc, upperBound, lowerBound);
-    Value tripCount = ceilDivPositive(boundsBuilder, loc, diff, step);
-    Value unrollFactorCst =
-        boundsBuilder.create<arith::ConstantIndexOp>(loc, unrollFactor);
-    Value tripCountRem =
-        boundsBuilder.create<arith::RemSIOp>(loc, tripCount, unrollFactorCst);
-    // Compute tripCountEvenMultiple = tripCount - (tripCount % unrollFactor)
-    Value tripCountEvenMultiple =
-        boundsBuilder.create<arith::SubIOp>(loc, tripCount, tripCountRem);
-    // Compute upperBoundUnrolled = lowerBound + tripCountEvenMultiple * step
-    upperBoundUnrolled = boundsBuilder.create<arith::AddIOp>(
-        loc, lowerBound,
-        boundsBuilder.create<arith::MulIOp>(loc, tripCountEvenMultiple, step));
-    // Scale 'step' by 'unrollFactor'.
-    stepUnrolled =
-        boundsBuilder.create<arith::MulIOp>(loc, step, unrollFactorCst);
-  }
-
-  // Create epilogue clean up loop starting at 'upperBoundUnrolled'.
-  if (generateEpilogueLoop) {
-    OpBuilder epilogueBuilder(forOp->getContext());
-    epilogueBuilder.setInsertionPoint(forOp->getBlock(),
-                                      std::next(Block::iterator(forOp)));
-    auto epilogueForOp = cast<scf::ForOp>(epilogueBuilder.clone(*forOp));
-    epilogueForOp.setLowerBound(upperBoundUnrolled);
-
-    // Update uses of loop results.
-    auto results = forOp.getResults();
-    auto epilogueResults = epilogueForOp.getResults();
-    auto epilogueIterOperands = epilogueForOp.getIterOperands();
-
-    for (auto e : llvm::zip(results, epilogueResults, epilogueIterOperands)) {
-      std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
-      epilogueForOp->replaceUsesOfWith(std::get<2>(e), std::get<0>(e));
-    }
-    (void)promoteIfSingleIteration(epilogueForOp);
-  }
-
-  // Create unrolled loop.
-  forOp.setUpperBound(upperBoundUnrolled);
-  forOp.setStep(stepUnrolled);
-
-  auto iterArgs = ValueRange(forOp.getRegionIterArgs());
-  auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
-
-  generateUnrolledLoop(
-      forOp.getBody(), forOp.getInductionVar(), unrollFactor,
-      [&](unsigned i, Value iv, OpBuilder b) {
-        // iv' = iv + step * i;
-        auto stride = b.create<arith::MulIOp>(
-            loc, step, b.create<arith::ConstantIndexOp>(loc, i));
-        return b.create<arith::AddIOp>(loc, iv, stride);
-      },
-      annotateFn, iterArgs, yieldedValues);
-  // Promote the loop body up if this has turned into a single iteration loop.
-  (void)promoteIfSingleIteration(forOp);
-  return success();
-}
-
 LogicalResult mlir::loopUnrollJamUpToFactor(AffineForOp forOp,
                                             uint64_t unrollJamFactor) {
   Optional<uint64_t> mayBeConstantTripCount = getConstantTripCount(forOp);
@@ -1888,61 +1677,25 @@ stripmineSink(AffineForOp forOp, uint64_t factor,
   return innerLoops;
 }
 
-static Loops stripmineSink(scf::ForOp forOp, Value factor,
-                           ArrayRef<scf::ForOp> targets) {
-  auto originalStep = forOp.getStep();
-  auto iv = forOp.getInductionVar();
-
-  OpBuilder b(forOp);
-  forOp.setStep(b.create<arith::MulIOp>(forOp.getLoc(), originalStep, factor));
-
-  Loops innerLoops;
-  for (auto t : targets) {
-    // Save information for splicing ops out of t when done
-    auto begin = t.getBody()->begin();
-    auto nOps = t.getBody()->getOperations().size();
-
-    // Insert newForOp before the terminator of `t`.
-    auto b = OpBuilder::atBlockTerminator((t.getBody()));
-    Value stepped = b.create<arith::AddIOp>(t.getLoc(), iv, forOp.getStep());
-    Value less = b.create<arith::CmpIOp>(t.getLoc(), arith::CmpIPredicate::slt,
-                                         forOp.getUpperBound(), stepped);
-    Value ub =
-        b.create<SelectOp>(t.getLoc(), less, forOp.getUpperBound(), stepped);
-
-    // Splice [begin, begin + nOps - 1) into `newForOp` and replace uses.
-    auto newForOp = b.create<scf::ForOp>(t.getLoc(), iv, ub, originalStep);
-    newForOp.getBody()->getOperations().splice(
-        newForOp.getBody()->getOperations().begin(),
-        t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
-    replaceAllUsesInRegionWith(iv, newForOp.getInductionVar(),
-                               newForOp.getRegion());
-
-    innerLoops.push_back(newForOp);
-  }
-
-  return innerLoops;
-}
-
 // Stripmines a `forOp` by `factor` and sinks it under a single `target`.
 // Returns the new AffineForOps, nested immediately under `target`.
-template <typename ForType, typename SizeType>
-static ForType stripmineSink(ForType forOp, SizeType factor, ForType target) {
+template <typename SizeType>
+static AffineForOp stripmineSink(AffineForOp forOp, SizeType factor,
+                                 AffineForOp target) {
   // TODO: Use cheap structural assertions that targets are nested under
   // forOp and that targets are not nested under each other when DominanceInfo
   // exposes the capability. It seems overkill to construct a whole function
   // dominance tree at this point.
-  auto res = stripmineSink(forOp, factor, ArrayRef<ForType>{target});
+  auto res = stripmineSink(forOp, factor, ArrayRef<AffineForOp>(target));
   assert(res.size() == 1 && "Expected 1 inner forOp");
   return res[0];
 }
 
-template <typename ForType, typename SizeType>
-static SmallVector<SmallVector<ForType, 8>, 8>
-tileImpl(ArrayRef<ForType> forOps, ArrayRef<SizeType> sizes,
-         ArrayRef<ForType> targets) {
-  SmallVector<SmallVector<ForType, 8>, 8> res;
-  SmallVector<ForType, 8> currentTargets(targets.begin(), targets.end());
+SmallVector<SmallVector<AffineForOp, 8>, 8>
+mlir::tile(ArrayRef<AffineForOp> forOps, ArrayRef<uint64_t> sizes,
+           ArrayRef<AffineForOp> targets) {
+  SmallVector<SmallVector<AffineForOp, 8>, 8> res;
+  SmallVector<AffineForOp, 8> currentTargets(targets.begin(), targets.end());
   for (auto it : llvm::zip(forOps, sizes)) {
     auto step = stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
     res.push_back(step);
@@ -1951,286 +1704,15 @@ tileImpl(ArrayRef<ForType> forOps, ArrayRef<SizeType> sizes,
   return res;
 }
 
-SmallVector<SmallVector<AffineForOp, 8>, 8>
-mlir::tile(ArrayRef<AffineForOp> forOps, ArrayRef<uint64_t> sizes,
-           ArrayRef<AffineForOp> targets) {
-  return tileImpl(forOps, sizes, targets);
-}
-
-SmallVector<Loops, 8> mlir::tile(ArrayRef<scf::ForOp> forOps,
-                                 ArrayRef<Value> sizes,
-                                 ArrayRef<scf::ForOp> targets) {
-  return tileImpl(forOps, sizes, targets);
-}
-
-template <typename ForType, typename SizeType>
-static SmallVector<ForType, 8>
-tileImpl(ArrayRef<ForType> forOps, ArrayRef<SizeType> sizes, ForType target) {
-  SmallVector<ForType, 8> res;
-  for (auto loops : tile(forOps, sizes, ArrayRef<ForType>{target})) {
-    assert(loops.size() == 1);
-    res.push_back(loops[0]);
-  }
-  return res;
-}
-
 SmallVector<AffineForOp, 8> mlir::tile(ArrayRef<AffineForOp> forOps,
                                        ArrayRef<uint64_t> sizes,
                                        AffineForOp target) {
-  return tileImpl(forOps, sizes, target);
-}
-
-Loops mlir::tile(ArrayRef<scf::ForOp> forOps, ArrayRef<Value> sizes,
-                 scf::ForOp target) {
-  return tileImpl(forOps, sizes, target);
-}
-
-Loops mlir::tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef<Value> sizes) {
-  // Collect perfectly nested loops.  If more size values provided than nested
-  // loops available, truncate `sizes`.
-  SmallVector<scf::ForOp, 4> forOps;
-  forOps.reserve(sizes.size());
-  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
-  if (forOps.size() < sizes.size())
-    sizes = sizes.take_front(forOps.size());
-
-  return ::tile(forOps, sizes, forOps.back());
-}
-
-// Hoist the ops within `outer` that appear before `inner`.
-// Such ops include the ops that have been introduced by parametric tiling.
-// Ops that come from triangular loops (i.e. that belong to the program slice
-// rooted at `outer`) and ops that have side effects cannot be hoisted.
-// Return failure when any op fails to hoist.
-static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner) {
-  SetVector<Operation *> forwardSlice;
-  getForwardSlice(
-      outer.getInductionVar(), &forwardSlice,
-      [&inner](Operation *op) { return op != inner.getOperation(); });
-  LogicalResult status = success();
-  SmallVector<Operation *, 8> toHoist;
-  for (auto &op : outer.getBody()->without_terminator()) {
-    // Stop when encountering the inner loop.
-    if (&op == inner.getOperation())
-      break;
-    // Skip over non-hoistable ops.
-    if (forwardSlice.count(&op) > 0) {
-      status = failure();
-      continue;
-    }
-    // Skip intermediate scf::ForOp, these are not considered a failure.
-    if (isa<scf::ForOp>(op))
-      continue;
-    // Skip other ops with regions.
-    if (op.getNumRegions() > 0) {
-      status = failure();
-      continue;
-    }
-    // Skip if op has side effects.
-    // TODO: loads to immutable memory regions are ok.
-    if (!MemoryEffectOpInterface::hasNoEffect(&op)) {
-      status = failure();
-      continue;
-    }
-    toHoist.push_back(&op);
-  }
-  auto *outerForOp = outer.getOperation();
-  for (auto *op : toHoist)
-    op->moveBefore(outerForOp);
-  return status;
-}
-
-// Traverse the interTile and intraTile loops and try to hoist ops such that
-// bands of perfectly nested loops are isolated.
-// Return failure if either perfect interTile or perfect intraTile bands cannot
-// be formed.
-static LogicalResult tryIsolateBands(const TileLoops &tileLoops) {
-  LogicalResult status = success();
-  const Loops &interTile = tileLoops.first;
-  const Loops &intraTile = tileLoops.second;
-  auto size = interTile.size();
-  assert(size == intraTile.size());
-  if (size <= 1)
-    return success();
-  for (unsigned s = 1; s < size; ++s)
-    status = succeeded(status) ? hoistOpsBetween(intraTile[0], intraTile[s])
-                               : failure();
-  for (unsigned s = 1; s < size; ++s)
-    status = succeeded(status) ? hoistOpsBetween(interTile[0], interTile[s])
-                               : failure();
-  return status;
-}
-
-TileLoops mlir::extractFixedOuterLoops(scf::ForOp rootForOp,
-                                       ArrayRef<int64_t> sizes) {
-  // Collect perfectly nested loops.  If more size values provided than nested
-  // loops available, truncate `sizes`.
-  SmallVector<scf::ForOp, 4> forOps;
-  forOps.reserve(sizes.size());
-  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
-  if (forOps.size() < sizes.size())
-    sizes = sizes.take_front(forOps.size());
-
-  // Compute the tile sizes such that i-th outer loop executes size[i]
-  // iterations.  Given that the loop current executes
-  //   numIterations = ceildiv((upperBound - lowerBound), step)
-  // iterations, we need to tile with size ceildiv(numIterations, size[i]).
-  SmallVector<Value, 4> tileSizes;
-  tileSizes.reserve(sizes.size());
-  for (unsigned i = 0, e = sizes.size(); i < e; ++i) {
-    assert(sizes[i] > 0 && "expected strictly positive size for strip-mining");
-
-    auto forOp = forOps[i];
-    OpBuilder builder(forOp);
-    auto loc = forOp.getLoc();
-    Value diff = builder.create<arith::SubIOp>(loc, forOp.getUpperBound(),
-                                               forOp.getLowerBound());
-    Value numIterations = ceilDivPositive(builder, loc, diff, forOp.getStep());
-    Value iterationsPerBlock =
-        ceilDivPositive(builder, loc, numIterations, sizes[i]);
-    tileSizes.push_back(iterationsPerBlock);
-  }
-
-  // Call parametric tiling with the given sizes.
-  auto intraTile = tile(forOps, tileSizes, forOps.back());
-  TileLoops tileLoops = std::make_pair(forOps, intraTile);
-
-  // TODO: for now we just ignore the result of band isolation.
-  // In the future, mapping decisions may be impacted by the ability to
-  // isolate perfectly nested bands.
-  (void)tryIsolateBands(tileLoops);
-
-  return tileLoops;
-}
-
-/// Return the new lower bound, upper bound, and step in that order. Insert any
-/// additional bounds calculations before the given builder and any additional
-/// conversion back to the original loop induction value inside the given Block.
-static LoopParams normalizeLoop(OpBuilder &boundsBuilder,
-                                OpBuilder &insideLoopBuilder, Location loc,
-                                Value lowerBound, Value upperBound, Value step,
-                                Value inductionVar) {
-  // Check if the loop is already known to have a constant zero lower bound or
-  // a constant one step.
-  bool isZeroBased = false;
-  if (auto ubCst = lowerBound.getDefiningOp<arith::ConstantIndexOp>())
-    isZeroBased = ubCst.value() == 0;
-
-  bool isStepOne = false;
-  if (auto stepCst = step.getDefiningOp<arith::ConstantIndexOp>())
-    isStepOne = stepCst.value() == 1;
-
-  // Compute the number of iterations the loop executes: ceildiv(ub - lb, step)
-  // assuming the step is strictly positive.  Update the bounds and the step
-  // of the loop to go from 0 to the number of iterations, if necessary.
-  // TODO: introduce support for negative steps or emit dynamic asserts
-  // on step positivity, whatever gets implemented first.
-  if (isZeroBased && isStepOne)
-    return {/*lowerBound=*/lowerBound, /*upperBound=*/upperBound,
-            /*step=*/step};
-
-  Value diff = boundsBuilder.create<arith::SubIOp>(loc, upperBound, lowerBound);
-  Value newUpperBound = ceilDivPositive(boundsBuilder, loc, diff, step);
-
-  Value newLowerBound =
-      isZeroBased ? lowerBound
-                  : boundsBuilder.create<arith::ConstantIndexOp>(loc, 0);
-  Value newStep =
-      isStepOne ? step : boundsBuilder.create<arith::ConstantIndexOp>(loc, 1);
-
-  // Insert code computing the value of the original loop induction variable
-  // from the "normalized" one.
-  Value scaled =
-      isStepOne
-          ? inductionVar
-          : insideLoopBuilder.create<arith::MulIOp>(loc, inductionVar, step);
-  Value shifted =
-      isZeroBased
-          ? scaled
-          : insideLoopBuilder.create<arith::AddIOp>(loc, scaled, lowerBound);
-
-  SmallPtrSet<Operation *, 2> preserve{scaled.getDefiningOp(),
-                                       shifted.getDefiningOp()};
-  inductionVar.replaceAllUsesExcept(shifted, preserve);
-  return {/*lowerBound=*/newLowerBound, /*upperBound=*/newUpperBound,
-          /*step=*/newStep};
-}
-
-/// Transform a loop with a strictly positive step
-///   for %i = %lb to %ub step %s
-/// into a 0-based loop with step 1
-///   for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 {
-///     %i = %ii * %s + %lb
-/// Insert the induction variable remapping in the body of `inner`, which is
-/// expected to be either `loop` or another loop perfectly nested under `loop`.
-/// Insert the definition of new bounds immediate before `outer`, which is
-/// expected to be either `loop` or its parent in the loop nest.
-static void normalizeLoop(scf::ForOp loop, scf::ForOp outer, scf::ForOp inner) {
-  OpBuilder builder(outer);
-  OpBuilder innerBuilder = OpBuilder::atBlockBegin(inner.getBody());
-  auto loopPieces = normalizeLoop(builder, innerBuilder, loop.getLoc(),
-                                  loop.getLowerBound(), loop.getUpperBound(),
-                                  loop.getStep(), loop.getInductionVar());
-
-  loop.setLowerBound(loopPieces.lowerBound);
-  loop.setUpperBound(loopPieces.upperBound);
-  loop.setStep(loopPieces.step);
-}
-
-void mlir::coalesceLoops(MutableArrayRef<scf::ForOp> loops) {
-  if (loops.size() < 2)
-    return;
-
-  scf::ForOp innermost = loops.back();
-  scf::ForOp outermost = loops.front();
-
-  // 1. Make sure all loops iterate from 0 to upperBound with step 1.  This
-  // allows the following code to assume upperBound is the number of iterations.
-  for (auto loop : loops)
-    normalizeLoop(loop, outermost, innermost);
-
-  // 2. Emit code computing the upper bound of the coalesced loop as product
-  // of the number of iterations of all loops.
-  OpBuilder builder(outermost);
-  Location loc = outermost.getLoc();
-  Value upperBound = outermost.getUpperBound();
-  for (auto loop : loops.drop_front())
-    upperBound =
-        builder.create<arith::MulIOp>(loc, upperBound, loop.getUpperBound());
-  outermost.setUpperBound(upperBound);
-
-  builder.setInsertionPointToStart(outermost.getBody());
-
-  // 3. Remap induction variables. For each original loop, the value of the
-  // induction variable can be obtained by dividing the induction variable of
-  // the linearized loop by the total number of iterations of the loops nested
-  // in it modulo the number of iterations in this loop (remove the values
-  // related to the outer loops):
-  //   iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i.
-  // Compute these iteratively from the innermost loop by creating a "running
-  // quotient" of division by the range.
-  Value previous = outermost.getInductionVar();
-  for (unsigned i = 0, e = loops.size(); i < e; ++i) {
-    unsigned idx = loops.size() - i - 1;
-    if (i != 0)
-      previous = builder.create<arith::DivSIOp>(loc, previous,
-                                                loops[idx + 1].getUpperBound());
-
-    Value iv = (i == e - 1) ? previous
-                            : builder.create<arith::RemSIOp>(
-                                  loc, previous, loops[idx].getUpperBound());
-    replaceAllUsesInRegionWith(loops[idx].getInductionVar(), iv,
-                               loops.back().getRegion());
+  SmallVector<AffineForOp, 8> res;
+  for (auto loops : tile(forOps, sizes, ArrayRef<AffineForOp>(target))) {
+    assert(loops.size() == 1);
+    res.push_back(loops[0]);
   }
-
-  // 4. Move the operations from the innermost just above the second-outermost
-  // loop, delete the extra terminator and the second-outermost loop.
-  scf::ForOp second = loops[1];
-  innermost.getBody()->back().erase();
-  outermost.getBody()->getOperations().splice(
-      Block::iterator(second.getOperation()),
-      innermost.getBody()->getOperations());
-  second.erase();
+  return res;
 }
 
 LogicalResult mlir::coalesceLoops(MutableArrayRef<AffineForOp> loops) {
@@ -2347,89 +1829,6 @@ LogicalResult mlir::coalesceLoops(MutableArrayRef<AffineForOp> loops) {
   return success();
 }
 
-void mlir::collapseParallelLoops(
-    scf::ParallelOp loops, ArrayRef<std::vector<unsigned>> combinedDimensions) {
-  OpBuilder outsideBuilder(loops);
-  Location loc = loops.getLoc();
-
-  // Presort combined dimensions.
-  auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
-  for (auto &dims : sortedDimensions)
-    std::sort(dims.begin(), dims.end());
-
-  // Normalize ParallelOp's iteration pattern.
-  SmallVector<Value, 3> normalizedLowerBounds, normalizedSteps,
-      normalizedUpperBounds;
-  for (unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
-    OpBuilder insideLoopBuilder = OpBuilder::atBlockBegin(loops.getBody());
-    auto resultBounds =
-        normalizeLoop(outsideBuilder, insideLoopBuilder, loc,
-                      loops.getLowerBound()[i], loops.getUpperBound()[i],
-                      loops.getStep()[i], loops.getBody()->getArgument(i));
-
-    normalizedLowerBounds.push_back(resultBounds.lowerBound);
-    normalizedUpperBounds.push_back(resultBounds.upperBound);
-    normalizedSteps.push_back(resultBounds.step);
-  }
-
-  // Combine iteration spaces.
-  SmallVector<Value, 3> lowerBounds, upperBounds, steps;
-  auto cst0 = outsideBuilder.create<arith::ConstantIndexOp>(loc, 0);
-  auto cst1 = outsideBuilder.create<arith::ConstantIndexOp>(loc, 1);
-  for (unsigned i = 0, e = sortedDimensions.size(); i < e; ++i) {
-    Value newUpperBound = outsideBuilder.create<arith::ConstantIndexOp>(loc, 1);
-    for (auto idx : sortedDimensions[i]) {
-      newUpperBound = outsideBuilder.create<arith::MulIOp>(
-          loc, newUpperBound, normalizedUpperBounds[idx]);
-    }
-    lowerBounds.push_back(cst0);
-    steps.push_back(cst1);
-    upperBounds.push_back(newUpperBound);
-  }
-
-  // Create new ParallelLoop with conversions to the original induction values.
-  // The loop below uses divisions to get the relevant range of values in the
-  // new induction value that represent each range of the original induction
-  // value. The remainders then determine based on that range, which iteration
-  // of the original induction value this represents. This is a normalized value
-  // that is un-normalized already by the previous logic.
-  auto newPloop = outsideBuilder.create<scf::ParallelOp>(
-      loc, lowerBounds, upperBounds, steps,
-      [&](OpBuilder &insideBuilder, Location, ValueRange ploopIVs) {
-        for (unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
-          Value previous = ploopIVs[i];
-          unsigned numberCombinedDimensions = combinedDimensions[i].size();
-          // Iterate over all except the last induction value.
-          for (unsigned j = numberCombinedDimensions - 1; j > 0; --j) {
-            unsigned idx = combinedDimensions[i][j];
-
-            // Determine the current induction value's current loop iteration
-            Value iv = insideBuilder.create<arith::RemSIOp>(
-                loc, previous, normalizedUpperBounds[idx]);
-            replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx), iv,
-                                       loops.getRegion());
-
-            // Remove the effect of the current induction value to prepare for
-            // the next value.
-            previous = insideBuilder.create<arith::DivSIOp>(
-                loc, previous, normalizedUpperBounds[idx]);
-          }
-
-          // The final induction value is just the remaining value.
-          unsigned idx = combinedDimensions[i][0];
-          replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx),
-                                     previous, loops.getRegion());
-        }
-      });
-
-  // Replace the old loop with the new loop.
-  loops.getBody()->back().erase();
-  newPloop.getBody()->getOperations().splice(
-      Block::iterator(newPloop.getBody()->back()),
-      loops.getBody()->getOperations());
-  loops.erase();
-}
-
 void mlir::mapLoopToProcessorIds(scf::ForOp forOp, ArrayRef<Value> processorId,
                                  ArrayRef<Value> numProcessors) {
   assert(processorId.size() == numProcessors.size());
diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
index 941ff3c88778c..1e3a38f51cb61 100644
--- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
@@ -16,12 +16,14 @@
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/IntegerSet.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/LoopUtils.h"
+
+#define DEBUG_TYPE "affine-utils"
 
 using namespace mlir;
 
@@ -856,3 +858,740 @@ void mlir::affineScalarReplace(FuncOp f, DominanceInfo &domInfo,
     defOp->erase();
   }
 }
+
+// Perform the replacement in `op`.
+LogicalResult mlir::replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef,
+                                             Operation *op,
+                                             ArrayRef<Value> extraIndices,
+                                             AffineMap indexRemap,
+                                             ArrayRef<Value> extraOperands,
+                                             ArrayRef<Value> symbolOperands,
+                                             bool allowNonDereferencingOps) {
+  unsigned newMemRefRank = newMemRef.getType().cast<MemRefType>().getRank();
+  (void)newMemRefRank; // unused in opt mode
+  unsigned oldMemRefRank = oldMemRef.getType().cast<MemRefType>().getRank();
+  (void)oldMemRefRank; // unused in opt mode
+  if (indexRemap) {
+    assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
+           "symbolic operand count mismatch");
+    assert(indexRemap.getNumInputs() ==
+           extraOperands.size() + oldMemRefRank + symbolOperands.size());
+    assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
+  } else {
+    assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
+  }
+
+  // Assert same elemental type.
+  assert(oldMemRef.getType().cast<MemRefType>().getElementType() ==
+         newMemRef.getType().cast<MemRefType>().getElementType());
+
+  SmallVector<unsigned, 2> usePositions;
+  for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
+    if (opEntry.value() == oldMemRef)
+      usePositions.push_back(opEntry.index());
+  }
+
+  // If memref doesn't appear, nothing to do.
+  if (usePositions.empty())
+    return success();
+
+  if (usePositions.size() > 1) {
+    // TODO: extend it for this case when needed (rare).
+    assert(false && "multiple dereferencing uses in a single op not supported");
+    return failure();
+  }
+
+  unsigned memRefOperandPos = usePositions.front();
+
+  OpBuilder builder(op);
+  // The following checks if op is dereferencing memref and performs the access
+  // index rewrites.
+  auto affMapAccInterface = dyn_cast<AffineMapAccessInterface>(op);
+  if (!affMapAccInterface) {
+    if (!allowNonDereferencingOps) {
+      // Failure: memref used in a non-dereferencing context (potentially
+      // escapes); no replacement in these cases unless allowNonDereferencingOps
+      // is set.
+      return failure();
+    }
+    op->setOperand(memRefOperandPos, newMemRef);
+    return success();
+  }
+  // Perform index rewrites for the dereferencing op and then replace the op
+  NamedAttribute oldMapAttrPair =
+      affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
+  AffineMap oldMap = oldMapAttrPair.getValue().cast<AffineMapAttr>().getValue();
+  unsigned oldMapNumInputs = oldMap.getNumInputs();
+  SmallVector<Value, 4> oldMapOperands(
+      op->operand_begin() + memRefOperandPos + 1,
+      op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs);
+
+  // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
+  SmallVector<Value, 4> oldMemRefOperands;
+  SmallVector<Value, 4> affineApplyOps;
+  oldMemRefOperands.reserve(oldMemRefRank);
+  if (oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
+    for (auto resultExpr : oldMap.getResults()) {
+      auto singleResMap = AffineMap::get(oldMap.getNumDims(),
+                                         oldMap.getNumSymbols(), resultExpr);
+      auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
+                                                oldMapOperands);
+      oldMemRefOperands.push_back(afOp);
+      affineApplyOps.push_back(afOp);
+    }
+  } else {
+    oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
+  }
+
+  // Construct new indices as a remap of the old ones if a remapping has been
+  // provided. The indices of a memref come right after it, i.e.,
+  // at position memRefOperandPos + 1.
+  SmallVector<Value, 4> remapOperands;
+  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
+                        symbolOperands.size());
+  remapOperands.append(extraOperands.begin(), extraOperands.end());
+  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
+  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
+
+  SmallVector<Value, 4> remapOutputs;
+  remapOutputs.reserve(oldMemRefRank);
+
+  if (indexRemap &&
+      indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
+    // Remapped indices.
+    for (auto resultExpr : indexRemap.getResults()) {
+      auto singleResMap = AffineMap::get(
+          indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
+      auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
+                                                remapOperands);
+      remapOutputs.push_back(afOp);
+      affineApplyOps.push_back(afOp);
+    }
+  } else {
+    // No remapping specified.
+    remapOutputs.assign(remapOperands.begin(), remapOperands.end());
+  }
+
+  SmallVector<Value, 4> newMapOperands;
+  newMapOperands.reserve(newMemRefRank);
+
+  // Prepend 'extraIndices' in 'newMapOperands'.
+  for (Value extraIndex : extraIndices) {
+    assert(extraIndex.getDefiningOp()->getNumResults() == 1 &&
+           "single result op's expected to generate these indices");
+    assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
+           "invalid memory op index");
+    newMapOperands.push_back(extraIndex);
+  }
+
+  // Append 'remapOutputs' to 'newMapOperands'.
+  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
+
+  // Create new fully composed AffineMap for new op to be created.
+  assert(newMapOperands.size() == newMemRefRank);
+  auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
+  // TODO: Avoid creating/deleting temporary AffineApplyOps here.
+  fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
+  newMap = simplifyAffineMap(newMap);
+  canonicalizeMapAndOperands(&newMap, &newMapOperands);
+  // Remove any affine.apply's that became dead as a result of composition.
+  for (Value value : affineApplyOps)
+    if (value.use_empty())
+      value.getDefiningOp()->erase();
+
+  OperationState state(op->getLoc(), op->getName());
+  // Construct the new operation using this memref.
+  state.operands.reserve(op->getNumOperands() + extraIndices.size());
+  // Insert the non-memref operands.
+  state.operands.append(op->operand_begin(),
+                        op->operand_begin() + memRefOperandPos);
+  // Insert the new memref value.
+  state.operands.push_back(newMemRef);
+
+  // Insert the new memref map operands.
+  state.operands.append(newMapOperands.begin(), newMapOperands.end());
+
+  // Insert the remaining operands unmodified.
+  state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
+                            oldMapNumInputs,
+                        op->operand_end());
+
+  // Result types don't change. Both memref's are of the same elemental type.
+  state.types.reserve(op->getNumResults());
+  for (auto result : op->getResults())
+    state.types.push_back(result.getType());
+
+  // Add attribute for 'newMap', other Attributes do not change.
+  auto newMapAttr = AffineMapAttr::get(newMap);
+  for (auto namedAttr : op->getAttrs()) {
+    if (namedAttr.getName() == oldMapAttrPair.getName())
+      state.attributes.push_back({namedAttr.getName(), newMapAttr});
+    else
+      state.attributes.push_back(namedAttr);
+  }
+
+  // Create the new operation.
+  auto *repOp = builder.createOperation(state);
+  op->replaceAllUsesWith(repOp);
+  op->erase();
+
+  return success();
+}
+
+LogicalResult mlir::replaceAllMemRefUsesWith(
+    Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices,
+    AffineMap indexRemap, ArrayRef<Value> extraOperands,
+    ArrayRef<Value> symbolOperands, Operation *domOpFilter,
+    Operation *postDomOpFilter, bool allowNonDereferencingOps,
+    bool replaceInDeallocOp) {
+  unsigned newMemRefRank = newMemRef.getType().cast<MemRefType>().getRank();
+  (void)newMemRefRank; // unused in opt mode
+  unsigned oldMemRefRank = oldMemRef.getType().cast<MemRefType>().getRank();
+  (void)oldMemRefRank;
+  if (indexRemap) {
+    assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
+           "symbol operand count mismatch");
+    assert(indexRemap.getNumInputs() ==
+           extraOperands.size() + oldMemRefRank + symbolOperands.size());
+    assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
+  } else {
+    assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
+  }
+
+  // Assert same elemental type.
+  assert(oldMemRef.getType().cast<MemRefType>().getElementType() ==
+         newMemRef.getType().cast<MemRefType>().getElementType());
+
+  std::unique_ptr<DominanceInfo> domInfo;
+  std::unique_ptr<PostDominanceInfo> postDomInfo;
+  if (domOpFilter)
+    domInfo =
+        std::make_unique<DominanceInfo>(domOpFilter->getParentOfType<FuncOp>());
+
+  if (postDomOpFilter)
+    postDomInfo = std::make_unique<PostDominanceInfo>(
+        postDomOpFilter->getParentOfType<FuncOp>());
+
+  // Walk all uses of old memref; collect ops to perform replacement. We use a
+  // DenseSet since an operation could potentially have multiple uses of a
+  // memref (although rare), and the replacement later is going to erase ops.
+  DenseSet<Operation *> opsToReplace;
+  for (auto *op : oldMemRef.getUsers()) {
+    // Skip this use if it's not dominated by domOpFilter.
+    if (domOpFilter && !domInfo->dominates(domOpFilter, op))
+      continue;
+
+    // Skip this use if it's not post-dominated by postDomOpFilter.
+    if (postDomOpFilter && !postDomInfo->postDominates(postDomOpFilter, op))
+      continue;
+
+    // Skip dealloc's - no replacement is necessary, and a memref replacement
+    // at other uses doesn't hurt these dealloc's.
+    if (isa<memref::DeallocOp>(op) && !replaceInDeallocOp)
+      continue;
+
+    // Check if the memref was used in a non-dereferencing context. It is fine
+    // for the memref to be used in a non-dereferencing way outside of the
+    // region where this replacement is happening.
+    if (!isa<AffineMapAccessInterface>(*op)) {
+      if (!allowNonDereferencingOps) {
+        LLVM_DEBUG(llvm::dbgs()
+                   << "Memref replacement failed: non-deferencing memref op: \n"
+                   << *op << '\n');
+        return failure();
+      }
+      // Non-dereferencing ops with the MemRefsNormalizable trait are
+      // supported for replacement.
+      if (!op->hasTrait<OpTrait::MemRefsNormalizable>()) {
+        LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
+                                   "memrefs normalizable trait: \n"
+                                << *op << '\n');
+        return failure();
+      }
+    }
+
+    // We'll first collect and then replace --- since replacement erases the op
+    // that has the use, and that op could be postDomFilter or domFilter itself!
+    opsToReplace.insert(op);
+  }
+
+  for (auto *op : opsToReplace) {
+    if (failed(replaceAllMemRefUsesWith(
+            oldMemRef, newMemRef, op, extraIndices, indexRemap, extraOperands,
+            symbolOperands, allowNonDereferencingOps)))
+      llvm_unreachable("memref replacement guaranteed to succeed here");
+  }
+
+  return success();
+}
+
+/// Given an operation, inserts one or more single result affine
+/// apply operations, results of which are exclusively used by this operation
+/// operation. The operands of these newly created affine apply ops are
+/// guaranteed to be loop iterators or terminal symbols of a function.
+///
+/// Before
+///
+/// affine.for %i = 0 to #map(%N)
+///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
+///   "send"(%idx, %A, ...)
+///   "compute"(%idx)
+///
+/// After
+///
+/// affine.for %i = 0 to #map(%N)
+///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
+///   "send"(%idx, %A, ...)
+///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
+///   "compute"(%idx_)
+///
+/// This allows applying different transformations on send and compute (for eg.
+/// different shifts/delays).
+///
+/// Returns nullptr either if none of opInst's operands were the result of an
+/// affine.apply and thus there was no affine computation slice to create, or if
+/// all the affine.apply op's supplying operands to this opInst did not have any
+/// uses besides this opInst; otherwise returns the list of affine.apply
+/// operations created in output argument `sliceOps`.
+void mlir::createAffineComputationSlice(
+    Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
+  // Collect all operands that are results of affine apply ops.
+  SmallVector<Value, 4> subOperands;
+  subOperands.reserve(opInst->getNumOperands());
+  for (auto operand : opInst->getOperands())
+    if (isa_and_nonnull<AffineApplyOp>(operand.getDefiningOp()))
+      subOperands.push_back(operand);
+
+  // Gather sequence of AffineApplyOps reachable from 'subOperands'.
+  SmallVector<Operation *, 4> affineApplyOps;
+  getReachableAffineApplyOps(subOperands, affineApplyOps);
+  // Skip transforming if there are no affine maps to compose.
+  if (affineApplyOps.empty())
+    return;
+
+  // Check if all uses of the affine apply op's lie only in this op op, in
+  // which case there would be nothing to do.
+  bool localized = true;
+  for (auto *op : affineApplyOps) {
+    for (auto result : op->getResults()) {
+      for (auto *user : result.getUsers()) {
+        if (user != opInst) {
+          localized = false;
+          break;
+        }
+      }
+    }
+  }
+  if (localized)
+    return;
+
+  OpBuilder builder(opInst);
+  SmallVector<Value, 4> composedOpOperands(subOperands);
+  auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
+  fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
+
+  // Create an affine.apply for each of the map results.
+  sliceOps->reserve(composedMap.getNumResults());
+  for (auto resultExpr : composedMap.getResults()) {
+    auto singleResMap = AffineMap::get(composedMap.getNumDims(),
+                                       composedMap.getNumSymbols(), resultExpr);
+    sliceOps->push_back(builder.create<AffineApplyOp>(
+        opInst->getLoc(), singleResMap, composedOpOperands));
+  }
+
+  // Construct the new operands that include the results from the composed
+  // affine apply op above instead of existing ones (subOperands). So, they
+  // differ from opInst's operands only for those operands in 'subOperands', for
+  // which they will be replaced by the corresponding one from 'sliceOps'.
+  SmallVector<Value, 4> newOperands(opInst->getOperands());
+  for (unsigned i = 0, e = newOperands.size(); i < e; i++) {
+    // Replace the subOperands from among the new operands.
+    unsigned j, f;
+    for (j = 0, f = subOperands.size(); j < f; j++) {
+      if (newOperands[i] == subOperands[j])
+        break;
+    }
+    if (j < subOperands.size()) {
+      newOperands[i] = (*sliceOps)[j];
+    }
+  }
+  for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++) {
+    opInst->setOperand(idx, newOperands[idx]);
+  }
+}
+
+/// Enum to set patterns of affine expr in tiled-layout map.
+/// TileFloorDiv: <dim expr> div <tile size>
+/// TileMod: <dim expr> mod <tile size>
+/// TileNone: None of the above
+/// Example:
+/// #tiled_2d_128x256 = affine_map<(d0, d1)
+///            -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
+/// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
+/// "d0 mod 128" and "d1 mod 256" ==> TileMod
+enum TileExprPattern { TileFloorDiv, TileMod, TileNone };
+
+/// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
+/// being floordiv'ed by respective tile sizes appeare in a mod with the same
+/// tile sizes, and no other expression involves those k dimensions. This
+/// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
+/// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
+/// tiled layout, an empty vector is returned.
+static LogicalResult getTileSizePos(
+    AffineMap map,
+    SmallVectorImpl<std::tuple<AffineExpr, unsigned, unsigned>> &tileSizePos) {
+  // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
+  // `floordiv` and its position in `map` output.
+  // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
+  //                -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
+  // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
+  SmallVector<std::tuple<AffineExpr, AffineExpr, unsigned>, 4> floordivExprs;
+  unsigned pos = 0;
+  for (AffineExpr expr : map.getResults()) {
+    if (expr.getKind() == AffineExprKind::FloorDiv) {
+      AffineBinaryOpExpr binaryExpr = expr.cast<AffineBinaryOpExpr>();
+      if (binaryExpr.getRHS().isa<AffineConstantExpr>())
+        floordivExprs.emplace_back(
+            std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
+    }
+    pos++;
+  }
+  // Not tiled layout if `floordivExprs` is empty.
+  if (floordivExprs.empty()) {
+    tileSizePos = SmallVector<std::tuple<AffineExpr, unsigned, unsigned>>{};
+    return success();
+  }
+
+  // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
+  // not tiled layout.
+  for (std::tuple<AffineExpr, AffineExpr, unsigned> fexpr : floordivExprs) {
+    AffineExpr floordivExprLHS = std::get<0>(fexpr);
+    AffineExpr floordivExprRHS = std::get<1>(fexpr);
+    unsigned floordivPos = std::get<2>(fexpr);
+
+    // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
+    // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
+    // other expr, the map is not tiled layout. Example of non tiled layout:
+    //   affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
+    //   affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
+    //   affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
+    //   256)>
+    bool found = false;
+    pos = 0;
+    for (AffineExpr expr : map.getResults()) {
+      bool notTiled = false;
+      if (pos != floordivPos) {
+        expr.walk([&](AffineExpr e) {
+          if (e == floordivExprLHS) {
+            if (expr.getKind() == AffineExprKind::Mod) {
+              AffineBinaryOpExpr binaryExpr = expr.cast<AffineBinaryOpExpr>();
+              // If LHS and RHS of `mod` are the same with those of floordiv.
+              if (floordivExprLHS == binaryExpr.getLHS() &&
+                  floordivExprRHS == binaryExpr.getRHS()) {
+                // Save tile size (RHS of `mod`), and position of `floordiv` and
+                // `mod` if same expr with `mod` is not found yet.
+                if (!found) {
+                  tileSizePos.emplace_back(
+                      std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
+                  found = true;
+                } else {
+                  // Non tiled layout: Have multilpe `mod` with the same LHS.
+                  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
+                  // mod 256, d2 mod 256)>
+                  notTiled = true;
+                }
+              } else {
+                // Non tiled layout: RHS of `mod` is different from `floordiv`.
+                // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
+                // mod 128)>
+                notTiled = true;
+              }
+            } else {
+              // Non tiled layout: LHS is the same, but not `mod`.
+              // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
+              // floordiv 256)>
+              notTiled = true;
+            }
+          }
+        });
+      }
+      if (notTiled) {
+        tileSizePos = SmallVector<std::tuple<AffineExpr, unsigned, unsigned>>{};
+        return success();
+      }
+      pos++;
+    }
+  }
+  return success();
+}
+
+/// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
+/// after normalization. Dimensions that include dynamic dimensions in the map
+/// output will become dynamic dimensions. Return true if `dim` is dynamic
+/// dimension.
+///
+/// Example:
+/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
+///
+/// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
+/// memref<4x?xf32, #map0>  ==>  memref<4x?x?xf32>
+static bool
+isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap,
+                             SmallVectorImpl<unsigned> &inMemrefTypeDynDims,
+                             MLIRContext *context) {
+  bool isDynamicDim = false;
+  AffineExpr expr = layoutMap.getResults()[dim];
+  // Check if affine expr of the dimension includes dynamic dimension of input
+  // memrefType.
+  expr.walk([&inMemrefTypeDynDims, &isDynamicDim, &context](AffineExpr e) {
+    if (e.isa<AffineDimExpr>()) {
+      for (unsigned dm : inMemrefTypeDynDims) {
+        if (e == getAffineDimExpr(dm, context)) {
+          isDynamicDim = true;
+        }
+      }
+    }
+  });
+  return isDynamicDim;
+}
+
+/// Create affine expr to calculate dimension size for a tiled-layout map.
+static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput,
+                                                  TileExprPattern pat) {
+  // Create map output for the patterns.
+  // "floordiv <tile size>" ==> "ceildiv <tile size>"
+  // "mod <tile size>" ==> "<tile size>"
+  AffineExpr newMapOutput;
+  AffineBinaryOpExpr binaryExpr = nullptr;
+  switch (pat) {
+  case TileExprPattern::TileMod:
+    binaryExpr = oldMapOutput.cast<AffineBinaryOpExpr>();
+    newMapOutput = binaryExpr.getRHS();
+    break;
+  case TileExprPattern::TileFloorDiv:
+    binaryExpr = oldMapOutput.cast<AffineBinaryOpExpr>();
+    newMapOutput = getAffineBinaryOpExpr(
+        AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
+    break;
+  default:
+    newMapOutput = oldMapOutput;
+  }
+  return newMapOutput;
+}
+
+/// Create new maps to calculate each dimension size of `newMemRefType`, and
+/// create `newDynamicSizes` from them by using AffineApplyOp.
+///
+/// Steps for normalizing dynamic memrefs for a tiled layout map
+/// Example:
+///    #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
+///    %0 = dim %arg0, %c1 :memref<4x?xf32>
+///    %1 = alloc(%0) : memref<4x?xf32, #map0>
+///
+/// (Before this function)
+/// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
+/// single layout map is supported.
+///
+/// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
+/// is memref<4x?x?xf32> in the above example.
+///
+/// (In this function)
+/// 3. Create new maps to calculate each dimension of the normalized memrefType
+/// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
+/// dimension size can be calculated by replacing "floordiv <tile size>" with
+/// "ceildiv <tile size>" and "mod <tile size>" with "<tile size>".
+/// - New map in the above example
+///   #map0 = affine_map<(d0, d1) -> (d0)>
+///   #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
+///   #map2 = affine_map<(d0, d1) -> (32)>
+///
+/// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
+/// is used in dynamicSizes of new AllocOp.
+///   %0 = dim %arg0, %c1 : memref<4x?xf32>
+///   %c4 = arith.constant 4 : index
+///   %1 = affine.apply #map1(%c4, %0)
+///   %2 = affine.apply #map2(%c4, %0)
+static void createNewDynamicSizes(MemRefType oldMemRefType,
+                                  MemRefType newMemRefType, AffineMap map,
+                                  memref::AllocOp *allocOp, OpBuilder b,
+                                  SmallVectorImpl<Value> &newDynamicSizes) {
+  // Create new input for AffineApplyOp.
+  SmallVector<Value, 4> inAffineApply;
+  ArrayRef<int64_t> oldMemRefShape = oldMemRefType.getShape();
+  unsigned dynIdx = 0;
+  for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
+    if (oldMemRefShape[d] < 0) {
+      // Use dynamicSizes of allocOp for dynamic dimension.
+      inAffineApply.emplace_back(allocOp->dynamicSizes()[dynIdx]);
+      dynIdx++;
+    } else {
+      // Create ConstantOp for static dimension.
+      Attribute constantAttr =
+          b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
+      inAffineApply.emplace_back(
+          b.create<arith::ConstantOp>(allocOp->getLoc(), constantAttr));
+    }
+  }
+
+  // Create new map to calculate each dimension size of new memref for each
+  // original map output. Only for dynamic dimesion of `newMemRefType`.
+  unsigned newDimIdx = 0;
+  ArrayRef<int64_t> newMemRefShape = newMemRefType.getShape();
+  SmallVector<std::tuple<AffineExpr, unsigned, unsigned>> tileSizePos;
+  (void)getTileSizePos(map, tileSizePos);
+  for (AffineExpr expr : map.getResults()) {
+    if (newMemRefShape[newDimIdx] < 0) {
+      // Create new maps to calculate each dimension size of new memref.
+      enum TileExprPattern pat = TileExprPattern::TileNone;
+      for (auto pos : tileSizePos) {
+        if (newDimIdx == std::get<1>(pos))
+          pat = TileExprPattern::TileFloorDiv;
+        else if (newDimIdx == std::get<2>(pos))
+          pat = TileExprPattern::TileMod;
+      }
+      AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
+      AffineMap newMap =
+          AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
+      Value affineApp =
+          b.create<AffineApplyOp>(allocOp->getLoc(), newMap, inAffineApply);
+      newDynamicSizes.emplace_back(affineApp);
+    }
+    newDimIdx++;
+  }
+}
+
+// TODO: Currently works for static memrefs with a single layout map.
+LogicalResult mlir::normalizeMemRef(memref::AllocOp *allocOp) {
+  MemRefType memrefType = allocOp->getType();
+  OpBuilder b(*allocOp);
+
+  // Fetch a new memref type after normalizing the old memref to have an
+  // identity map layout.
+  MemRefType newMemRefType =
+      normalizeMemRefType(memrefType, b, allocOp->symbolOperands().size());
+  if (newMemRefType == memrefType)
+    // Either memrefType already had an identity map or the map couldn't be
+    // transformed to an identity map.
+    return failure();
+
+  Value oldMemRef = allocOp->getResult();
+
+  SmallVector<Value, 4> symbolOperands(allocOp->symbolOperands());
+  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
+  memref::AllocOp newAlloc;
+  // Check if `layoutMap` is a tiled layout. Only single layout map is
+  // supported for normalizing dynamic memrefs.
+  SmallVector<std::tuple<AffineExpr, unsigned, unsigned>> tileSizePos;
+  (void)getTileSizePos(layoutMap, tileSizePos);
+  if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
+    MemRefType oldMemRefType = oldMemRef.getType().cast<MemRefType>();
+    SmallVector<Value, 4> newDynamicSizes;
+    createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
+                          newDynamicSizes);
+    // Add the new dynamic sizes in new AllocOp.
+    newAlloc =
+        b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
+                                  newDynamicSizes, allocOp->alignmentAttr());
+  } else {
+    newAlloc = b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
+                                         allocOp->alignmentAttr());
+  }
+  // Replace all uses of the old memref.
+  if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
+                                      /*extraIndices=*/{},
+                                      /*indexRemap=*/layoutMap,
+                                      /*extraOperands=*/{},
+                                      /*symbolOperands=*/symbolOperands,
+                                      /*domOpFilter=*/nullptr,
+                                      /*postDomOpFilter=*/nullptr,
+                                      /*allowNonDereferencingOps=*/true))) {
+    // If it failed (due to escapes for example), bail out.
+    newAlloc.erase();
+    return failure();
+  }
+  // Replace any uses of the original alloc op and erase it. All remaining uses
+  // have to be dealloc's; RAMUW above would've failed otherwise.
+  assert(llvm::all_of(oldMemRef.getUsers(), [](Operation *op) {
+    return isa<memref::DeallocOp>(op);
+  }));
+  oldMemRef.replaceAllUsesWith(newAlloc);
+  allocOp->erase();
+  return success();
+}
+
+MemRefType mlir::normalizeMemRefType(MemRefType memrefType, OpBuilder b,
+                                     unsigned numSymbolicOperands) {
+  unsigned rank = memrefType.getRank();
+  if (rank == 0)
+    return memrefType;
+
+  if (memrefType.getLayout().isIdentity()) {
+    // Either no maps is associated with this memref or this memref has
+    // a trivial (identity) map.
+    return memrefType;
+  }
+  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
+
+  // We don't do any checks for one-to-one'ness; we assume that it is
+  // one-to-one.
+
+  // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
+  // for now.
+  // TODO: Normalize the other types of dynamic memrefs.
+  SmallVector<std::tuple<AffineExpr, unsigned, unsigned>> tileSizePos;
+  (void)getTileSizePos(layoutMap, tileSizePos);
+  if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
+    return memrefType;
+
+  // We have a single map that is not an identity map. Create a new memref
+  // with the right shape and an identity layout map.
+  ArrayRef<int64_t> shape = memrefType.getShape();
+  // FlatAffineConstraint may later on use symbolicOperands.
+  FlatAffineConstraints fac(rank, numSymbolicOperands);
+  SmallVector<unsigned, 4> memrefTypeDynDims;
+  for (unsigned d = 0; d < rank; ++d) {
+    // Use constraint system only in static dimensions.
+    if (shape[d] > 0) {
+      fac.addBound(FlatAffineConstraints::LB, d, 0);
+      fac.addBound(FlatAffineConstraints::UB, d, shape[d] - 1);
+    } else {
+      memrefTypeDynDims.emplace_back(d);
+    }
+  }
+  // We compose this map with the original index (logical) space to derive
+  // the upper bounds for the new index space.
+  unsigned newRank = layoutMap.getNumResults();
+  if (failed(fac.composeMatchingMap(layoutMap)))
+    return memrefType;
+  // TODO: Handle semi-affine maps.
+  // Project out the old data dimensions.
+  fac.projectOut(newRank, fac.getNumIds() - newRank - fac.getNumLocalIds());
+  SmallVector<int64_t, 4> newShape(newRank);
+  for (unsigned d = 0; d < newRank; ++d) {
+    // Check if each dimension of normalized memrefType is dynamic.
+    bool isDynDim = isNormalizedMemRefDynamicDim(
+        d, layoutMap, memrefTypeDynDims, b.getContext());
+    if (isDynDim) {
+      newShape[d] = -1;
+    } else {
+      // The lower bound for the shape is always zero.
+      auto ubConst = fac.getConstantBound(FlatAffineConstraints::UB, d);
+      // For a static memref and an affine map with no symbols, this is
+      // always bounded.
+      assert(ubConst.hasValue() && "should always have an upper bound");
+      if (ubConst.getValue() < 0)
+        // This is due to an invalid map that maps to a negative space.
+        return memrefType;
+      // If dimension of new memrefType is dynamic, the value is -1.
+      newShape[d] = ubConst.getValue() + 1;
+    }
+  }
+
+  // Create the new memref type after trivializing the old layout map.
+  MemRefType newMemRefType =
+      MemRefType::Builder(memrefType)
+          .setShape(newShape)
+          .setLayout(AffineMapAttr::get(b.getMultiDimIdentityMap(newRank)));
+
+  return newMemRefType;
+}
diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
index 182a24cd04444..62c11f12b3f89 100644
--- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/GPU/MemoryPromotion.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
@@ -19,7 +20,6 @@
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/ImplicitLocOpBuilder.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
 
 using namespace mlir;
 using namespace mlir::gpu;
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp b/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp
index db5da26cc9a8c..3606d9b832d1a 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Linalg/Transforms/CodegenStrategy.h"
-
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Hoisting.h"
 #include "mlir/Dialect/SCF/Transforms.h"
@@ -20,7 +19,6 @@
 #include "mlir/Dialect/Vector/VectorTransforms.h"
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Passes.h"
 
 using namespace mlir;
diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
index 83c9aa4a54a01..8e863ab0c80e8 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp
@@ -12,7 +12,6 @@
 
 #include "mlir/Dialect/Linalg/Transforms/HoistPadding.h"
 #include "mlir/Analysis/SliceAnalysis.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/SCF/SCF.h"
@@ -24,7 +23,6 @@
 #include "mlir/IR/AsmState.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Dominance.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
index 097500f7190c4..33b70c0dc95fd 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Hoisting.cpp
@@ -15,7 +15,6 @@
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/SCF/SCF.h"
@@ -27,7 +26,6 @@
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
index 025adc2c56b2b..74d097ca5b766 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp
@@ -16,6 +16,8 @@
 #include "PassDetail.h"
 #include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Hoisting.h"
@@ -29,9 +31,7 @@
 #include "mlir/Pass/PassManager.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 
 using namespace mlir;
 using namespace mlir::vector;
@@ -348,7 +348,13 @@ struct LinalgStrategyEnablePass
         return signalPassFailure();
     }
 
-    promoteSingleIterationLoops(funcOp);
+    // Gathers all innermost loops through a post order pruned walk.
+    funcOp.walk([](Operation *op) {
+      if (auto forOp = dyn_cast<AffineForOp>(op))
+        (void)promoteIfSingleIteration(forOp);
+      else if (auto forOp = dyn_cast<scf::ForOp>(op))
+        (void)promoteIfSingleIteration(forOp);
+    });
     if (options.hoistRedundantVectorTransfers)
       hoistRedundantVectorTransfers(funcOp);
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
index 486a069a60ed0..ca4308851daed 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
index 1b0a6d7f2ba87..a26f623619212 100644
--- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp
@@ -16,6 +16,7 @@
 #include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
@@ -31,7 +32,6 @@
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp b/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp
index 0b5e49b2df528..89936111fce55 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp
@@ -13,9 +13,9 @@
 
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/MemRef/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Support/Debug.h"
 
diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
index f517079990015..2c723625038bf 100644
--- a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
@@ -6,6 +6,7 @@ add_mlir_dialect_library(MLIRSCFTransforms
   LoopPipelining.cpp
   LoopRangeFolding.cpp
   LoopSpecialization.cpp
+  ParallelLoopCollapsing.cpp
   ParallelLoopFusion.cpp
   ParallelLoopTiling.cpp
   StructuralTypeConversions.cpp
diff --git a/mlir/lib/Transforms/ParallelLoopCollapsing.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
similarity index 91%
rename from mlir/lib/Transforms/ParallelLoopCollapsing.cpp
rename to mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
index 2c3329cde2218..0e5d4da310bdb 100644
--- a/mlir/lib/Transforms/ParallelLoopCollapsing.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp
@@ -7,9 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
+#include "mlir/Dialect/SCF/Passes.h"
 #include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
+#include "mlir/Dialect/SCF/Utils.h"
 #include "mlir/Transforms/RegionUtils.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -20,7 +20,7 @@ using namespace mlir;
 
 namespace {
 struct ParallelLoopCollapsing
-    : public ParallelLoopCollapsingBase<ParallelLoopCollapsing> {
+    : public SCFParallelLoopCollapsingBase<ParallelLoopCollapsing> {
   void runOnOperation() override {
     Operation *module = getOperation();
 
diff --git a/mlir/lib/Dialect/SCF/Transforms/Utils.cpp b/mlir/lib/Dialect/SCF/Transforms/Utils.cpp
index 66be70bf1a19d..04b41306c6da6 100644
--- a/mlir/lib/Dialect/SCF/Transforms/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Transforms/Utils.cpp
@@ -11,20 +11,31 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/SCF/Utils.h"
-
+#include "mlir/Analysis/SliceAnalysis.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/PatternMatch.h"
+#include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/RegionUtils.h"
-
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 
 using namespace mlir;
 
+namespace {
+// This structure is to pass and return sets of loop parameters without
+// confusing the order.
+struct LoopParams {
+  Value lowerBound;
+  Value upperBound;
+  Value step;
+};
+} // namespace
+
 scf::ForOp mlir::cloneWithNewYields(OpBuilder &b, scf::ForOp loop,
                                     ValueRange newIterOperands,
                                     ValueRange newYieldedValues,
@@ -230,3 +241,682 @@ bool mlir::getInnermostParallelLoops(Operation *rootOp,
   }
   return rootEnclosesPloops;
 }
+
+// Build the IR that performs ceil division of a positive value by a constant:
+//    ceildiv(a, B) = divis(a + (B-1), B)
+// where divis is rounding-to-zero division.
+static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
+                             int64_t divisor) {
+  assert(divisor > 0 && "expected positive divisor");
+  assert(dividend.getType().isIndex() && "expected index-typed value");
+
+  Value divisorMinusOneCst =
+      builder.create<arith::ConstantIndexOp>(loc, divisor - 1);
+  Value divisorCst = builder.create<arith::ConstantIndexOp>(loc, divisor);
+  Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOneCst);
+  return builder.create<arith::DivSIOp>(loc, sum, divisorCst);
+}
+
+// Build the IR that performs ceil division of a positive value by another
+// positive value:
+//    ceildiv(a, b) = divis(a + (b - 1), b)
+// where divis is rounding-to-zero division.
+static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
+                             Value divisor) {
+  assert(dividend.getType().isIndex() && "expected index-typed value");
+
+  Value cstOne = builder.create<arith::ConstantIndexOp>(loc, 1);
+  Value divisorMinusOne = builder.create<arith::SubIOp>(loc, divisor, cstOne);
+  Value sum = builder.create<arith::AddIOp>(loc, dividend, divisorMinusOne);
+  return builder.create<arith::DivSIOp>(loc, sum, divisor);
+}
+
+/// Helper to replace uses of loop carried values (iter_args) and loop
+/// yield values while promoting single iteration scf.for ops.
+static void replaceIterArgsAndYieldResults(scf::ForOp forOp) {
+  // Replace uses of iter arguments with iter operands (initial values).
+  auto iterOperands = forOp.getIterOperands();
+  auto iterArgs = forOp.getRegionIterArgs();
+  for (auto e : llvm::zip(iterOperands, iterArgs))
+    std::get<1>(e).replaceAllUsesWith(std::get<0>(e));
+
+  // Replace uses of loop results with the values yielded by the loop.
+  auto outerResults = forOp.getResults();
+  auto innerResults = forOp.getBody()->getTerminator()->getOperands();
+  for (auto e : llvm::zip(outerResults, innerResults))
+    std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
+}
+
+/// Promotes the loop body of a forOp to its containing block if the forOp
+/// it can be determined that the loop has a single iteration.
+LogicalResult mlir::promoteIfSingleIteration(scf::ForOp forOp) {
+  auto lbCstOp = forOp.getLowerBound().getDefiningOp<arith::ConstantIndexOp>();
+  auto ubCstOp = forOp.getUpperBound().getDefiningOp<arith::ConstantIndexOp>();
+  auto stepCstOp = forOp.getStep().getDefiningOp<arith::ConstantIndexOp>();
+  if (!lbCstOp || !ubCstOp || !stepCstOp || lbCstOp.value() < 0 ||
+      ubCstOp.value() < 0 || stepCstOp.value() < 0)
+    return failure();
+  int64_t tripCount =
+      mlir::ceilDiv(ubCstOp.value() - lbCstOp.value(), stepCstOp.value());
+  if (tripCount != 1)
+    return failure();
+  auto iv = forOp.getInductionVar();
+  iv.replaceAllUsesWith(lbCstOp);
+
+  replaceIterArgsAndYieldResults(forOp);
+
+  // Move the loop body operations, except for its terminator, to the loop's
+  // containing block.
+  auto *parentBlock = forOp->getBlock();
+  forOp.getBody()->getTerminator()->erase();
+  parentBlock->getOperations().splice(Block::iterator(forOp),
+                                      forOp.getBody()->getOperations());
+  forOp.erase();
+  return success();
+}
+
+/// Generates unrolled copies of scf::ForOp 'loopBodyBlock', with
+/// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap
+/// 'forOpIV' for each unrolled body. If specified, annotates the Ops in each
+/// unrolled iteration using annotateFn.
+static void generateUnrolledLoop(
+    Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
+    function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
+    function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
+    ValueRange iterArgs, ValueRange yieldedValues) {
+  // Builder to insert unrolled bodies just before the terminator of the body of
+  // 'forOp'.
+  auto builder = OpBuilder::atBlockTerminator(loopBodyBlock);
+
+  if (!annotateFn)
+    annotateFn = [](unsigned, Operation *, OpBuilder) {};
+
+  // Keep a pointer to the last non-terminator operation in the original block
+  // so that we know what to clone (since we are doing this in-place).
+  Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2);
+
+  // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
+  SmallVector<Value, 4> lastYielded(yieldedValues);
+
+  for (unsigned i = 1; i < unrollFactor; i++) {
+    BlockAndValueMapping operandMap;
+
+    // Prepare operand map.
+    operandMap.map(iterArgs, lastYielded);
+
+    // If the induction variable is used, create a remapping to the value for
+    // this unrolled instance.
+    if (!forOpIV.use_empty()) {
+      Value ivUnroll = ivRemapFn(i, forOpIV, builder);
+      operandMap.map(forOpIV, ivUnroll);
+    }
+
+    // Clone the original body of 'forOp'.
+    for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++) {
+      Operation *clonedOp = builder.clone(*it, operandMap);
+      annotateFn(i, clonedOp, builder);
+    }
+
+    // Update yielded values.
+    for (unsigned i = 0, e = lastYielded.size(); i < e; i++)
+      lastYielded[i] = operandMap.lookup(yieldedValues[i]);
+  }
+
+  // Make sure we annotate the Ops in the original body. We do this last so that
+  // any annotations are not copied into the cloned Ops above.
+  for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++)
+    annotateFn(0, &*it, builder);
+
+  // Update operands of the yield statement.
+  loopBodyBlock->getTerminator()->setOperands(lastYielded);
+}
+
+/// Unrolls 'forOp' by 'unrollFactor', returns success if the loop is unrolled.
+LogicalResult mlir::loopUnrollByFactor(
+    scf::ForOp forOp, uint64_t unrollFactor,
+    function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn) {
+  assert(unrollFactor > 0 && "expected positive unroll factor");
+
+  // Return if the loop body is empty.
+  if (llvm::hasSingleElement(forOp.getBody()->getOperations()))
+    return success();
+
+  // Compute tripCount = ceilDiv((upperBound - lowerBound), step) and populate
+  // 'upperBoundUnrolled' and 'stepUnrolled' for static and dynamic cases.
+  OpBuilder boundsBuilder(forOp);
+  auto loc = forOp.getLoc();
+  auto step = forOp.getStep();
+  Value upperBoundUnrolled;
+  Value stepUnrolled;
+  bool generateEpilogueLoop = true;
+
+  auto lbCstOp = forOp.getLowerBound().getDefiningOp<arith::ConstantIndexOp>();
+  auto ubCstOp = forOp.getUpperBound().getDefiningOp<arith::ConstantIndexOp>();
+  auto stepCstOp = forOp.getStep().getDefiningOp<arith::ConstantIndexOp>();
+  if (lbCstOp && ubCstOp && stepCstOp) {
+    // Constant loop bounds computation.
+    int64_t lbCst = lbCstOp.value();
+    int64_t ubCst = ubCstOp.value();
+    int64_t stepCst = stepCstOp.value();
+    assert(lbCst >= 0 && ubCst >= 0 && stepCst >= 0 &&
+           "expected positive loop bounds and step");
+    int64_t tripCount = mlir::ceilDiv(ubCst - lbCst, stepCst);
+
+    if (unrollFactor == 1) {
+      if (tripCount == 1 && failed(promoteIfSingleIteration(forOp)))
+        return failure();
+      return success();
+    }
+
+    int64_t tripCountEvenMultiple = tripCount - (tripCount % unrollFactor);
+    int64_t upperBoundUnrolledCst = lbCst + tripCountEvenMultiple * stepCst;
+    assert(upperBoundUnrolledCst <= ubCst);
+    int64_t stepUnrolledCst = stepCst * unrollFactor;
+
+    // Create constant for 'upperBoundUnrolled' and set epilogue loop flag.
+    generateEpilogueLoop = upperBoundUnrolledCst < ubCst;
+    if (generateEpilogueLoop)
+      upperBoundUnrolled = boundsBuilder.create<arith::ConstantIndexOp>(
+          loc, upperBoundUnrolledCst);
+    else
+      upperBoundUnrolled = ubCstOp;
+
+    // Create constant for 'stepUnrolled'.
+    stepUnrolled = stepCst == stepUnrolledCst
+                       ? step
+                       : boundsBuilder.create<arith::ConstantIndexOp>(
+                             loc, stepUnrolledCst);
+  } else {
+    // Dynamic loop bounds computation.
+    // TODO: Add dynamic asserts for negative lb/ub/step, or
+    // consider using ceilDiv from AffineApplyExpander.
+    auto lowerBound = forOp.getLowerBound();
+    auto upperBound = forOp.getUpperBound();
+    Value diff =
+        boundsBuilder.create<arith::SubIOp>(loc, upperBound, lowerBound);
+    Value tripCount = ceilDivPositive(boundsBuilder, loc, diff, step);
+    Value unrollFactorCst =
+        boundsBuilder.create<arith::ConstantIndexOp>(loc, unrollFactor);
+    Value tripCountRem =
+        boundsBuilder.create<arith::RemSIOp>(loc, tripCount, unrollFactorCst);
+    // Compute tripCountEvenMultiple = tripCount - (tripCount % unrollFactor)
+    Value tripCountEvenMultiple =
+        boundsBuilder.create<arith::SubIOp>(loc, tripCount, tripCountRem);
+    // Compute upperBoundUnrolled = lowerBound + tripCountEvenMultiple * step
+    upperBoundUnrolled = boundsBuilder.create<arith::AddIOp>(
+        loc, lowerBound,
+        boundsBuilder.create<arith::MulIOp>(loc, tripCountEvenMultiple, step));
+    // Scale 'step' by 'unrollFactor'.
+    stepUnrolled =
+        boundsBuilder.create<arith::MulIOp>(loc, step, unrollFactorCst);
+  }
+
+  // Create epilogue clean up loop starting at 'upperBoundUnrolled'.
+  if (generateEpilogueLoop) {
+    OpBuilder epilogueBuilder(forOp->getContext());
+    epilogueBuilder.setInsertionPoint(forOp->getBlock(),
+                                      std::next(Block::iterator(forOp)));
+    auto epilogueForOp = cast<scf::ForOp>(epilogueBuilder.clone(*forOp));
+    epilogueForOp.setLowerBound(upperBoundUnrolled);
+
+    // Update uses of loop results.
+    auto results = forOp.getResults();
+    auto epilogueResults = epilogueForOp.getResults();
+    auto epilogueIterOperands = epilogueForOp.getIterOperands();
+
+    for (auto e : llvm::zip(results, epilogueResults, epilogueIterOperands)) {
+      std::get<0>(e).replaceAllUsesWith(std::get<1>(e));
+      epilogueForOp->replaceUsesOfWith(std::get<2>(e), std::get<0>(e));
+    }
+    (void)promoteIfSingleIteration(epilogueForOp);
+  }
+
+  // Create unrolled loop.
+  forOp.setUpperBound(upperBoundUnrolled);
+  forOp.setStep(stepUnrolled);
+
+  auto iterArgs = ValueRange(forOp.getRegionIterArgs());
+  auto yieldedValues = forOp.getBody()->getTerminator()->getOperands();
+
+  generateUnrolledLoop(
+      forOp.getBody(), forOp.getInductionVar(), unrollFactor,
+      [&](unsigned i, Value iv, OpBuilder b) {
+        // iv' = iv + step * i;
+        auto stride = b.create<arith::MulIOp>(
+            loc, step, b.create<arith::ConstantIndexOp>(loc, i));
+        return b.create<arith::AddIOp>(loc, iv, stride);
+      },
+      annotateFn, iterArgs, yieldedValues);
+  // Promote the loop body up if this has turned into a single iteration loop.
+  (void)promoteIfSingleIteration(forOp);
+  return success();
+}
+
+/// Return the new lower bound, upper bound, and step in that order. Insert any
+/// additional bounds calculations before the given builder and any additional
+/// conversion back to the original loop induction value inside the given Block.
+static LoopParams normalizeLoop(OpBuilder &boundsBuilder,
+                                OpBuilder &insideLoopBuilder, Location loc,
+                                Value lowerBound, Value upperBound, Value step,
+                                Value inductionVar) {
+  // Check if the loop is already known to have a constant zero lower bound or
+  // a constant one step.
+  bool isZeroBased = false;
+  if (auto ubCst = lowerBound.getDefiningOp<arith::ConstantIndexOp>())
+    isZeroBased = ubCst.value() == 0;
+
+  bool isStepOne = false;
+  if (auto stepCst = step.getDefiningOp<arith::ConstantIndexOp>())
+    isStepOne = stepCst.value() == 1;
+
+  // Compute the number of iterations the loop executes: ceildiv(ub - lb, step)
+  // assuming the step is strictly positive.  Update the bounds and the step
+  // of the loop to go from 0 to the number of iterations, if necessary.
+  // TODO: introduce support for negative steps or emit dynamic asserts
+  // on step positivity, whatever gets implemented first.
+  if (isZeroBased && isStepOne)
+    return {/*lowerBound=*/lowerBound, /*upperBound=*/upperBound,
+            /*step=*/step};
+
+  Value diff = boundsBuilder.create<arith::SubIOp>(loc, upperBound, lowerBound);
+  Value newUpperBound = ceilDivPositive(boundsBuilder, loc, diff, step);
+
+  Value newLowerBound =
+      isZeroBased ? lowerBound
+                  : boundsBuilder.create<arith::ConstantIndexOp>(loc, 0);
+  Value newStep =
+      isStepOne ? step : boundsBuilder.create<arith::ConstantIndexOp>(loc, 1);
+
+  // Insert code computing the value of the original loop induction variable
+  // from the "normalized" one.
+  Value scaled =
+      isStepOne
+          ? inductionVar
+          : insideLoopBuilder.create<arith::MulIOp>(loc, inductionVar, step);
+  Value shifted =
+      isZeroBased
+          ? scaled
+          : insideLoopBuilder.create<arith::AddIOp>(loc, scaled, lowerBound);
+
+  SmallPtrSet<Operation *, 2> preserve{scaled.getDefiningOp(),
+                                       shifted.getDefiningOp()};
+  inductionVar.replaceAllUsesExcept(shifted, preserve);
+  return {/*lowerBound=*/newLowerBound, /*upperBound=*/newUpperBound,
+          /*step=*/newStep};
+}
+
+/// Transform a loop with a strictly positive step
+///   for %i = %lb to %ub step %s
+/// into a 0-based loop with step 1
+///   for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 {
+///     %i = %ii * %s + %lb
+/// Insert the induction variable remapping in the body of `inner`, which is
+/// expected to be either `loop` or another loop perfectly nested under `loop`.
+/// Insert the definition of new bounds immediate before `outer`, which is
+/// expected to be either `loop` or its parent in the loop nest.
+static void normalizeLoop(scf::ForOp loop, scf::ForOp outer, scf::ForOp inner) {
+  OpBuilder builder(outer);
+  OpBuilder innerBuilder = OpBuilder::atBlockBegin(inner.getBody());
+  auto loopPieces = normalizeLoop(builder, innerBuilder, loop.getLoc(),
+                                  loop.getLowerBound(), loop.getUpperBound(),
+                                  loop.getStep(), loop.getInductionVar());
+
+  loop.setLowerBound(loopPieces.lowerBound);
+  loop.setUpperBound(loopPieces.upperBound);
+  loop.setStep(loopPieces.step);
+}
+
+void mlir::coalesceLoops(MutableArrayRef<scf::ForOp> loops) {
+  if (loops.size() < 2)
+    return;
+
+  scf::ForOp innermost = loops.back();
+  scf::ForOp outermost = loops.front();
+
+  // 1. Make sure all loops iterate from 0 to upperBound with step 1.  This
+  // allows the following code to assume upperBound is the number of iterations.
+  for (auto loop : loops)
+    normalizeLoop(loop, outermost, innermost);
+
+  // 2. Emit code computing the upper bound of the coalesced loop as product
+  // of the number of iterations of all loops.
+  OpBuilder builder(outermost);
+  Location loc = outermost.getLoc();
+  Value upperBound = outermost.getUpperBound();
+  for (auto loop : loops.drop_front())
+    upperBound =
+        builder.create<arith::MulIOp>(loc, upperBound, loop.getUpperBound());
+  outermost.setUpperBound(upperBound);
+
+  builder.setInsertionPointToStart(outermost.getBody());
+
+  // 3. Remap induction variables. For each original loop, the value of the
+  // induction variable can be obtained by dividing the induction variable of
+  // the linearized loop by the total number of iterations of the loops nested
+  // in it modulo the number of iterations in this loop (remove the values
+  // related to the outer loops):
+  //   iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i.
+  // Compute these iteratively from the innermost loop by creating a "running
+  // quotient" of division by the range.
+  Value previous = outermost.getInductionVar();
+  for (unsigned i = 0, e = loops.size(); i < e; ++i) {
+    unsigned idx = loops.size() - i - 1;
+    if (i != 0)
+      previous = builder.create<arith::DivSIOp>(loc, previous,
+                                                loops[idx + 1].getUpperBound());
+
+    Value iv = (i == e - 1) ? previous
+                            : builder.create<arith::RemSIOp>(
+                                  loc, previous, loops[idx].getUpperBound());
+    replaceAllUsesInRegionWith(loops[idx].getInductionVar(), iv,
+                               loops.back().getRegion());
+  }
+
+  // 4. Move the operations from the innermost just above the second-outermost
+  // loop, delete the extra terminator and the second-outermost loop.
+  scf::ForOp second = loops[1];
+  innermost.getBody()->back().erase();
+  outermost.getBody()->getOperations().splice(
+      Block::iterator(second.getOperation()),
+      innermost.getBody()->getOperations());
+  second.erase();
+}
+
+void mlir::collapseParallelLoops(
+    scf::ParallelOp loops, ArrayRef<std::vector<unsigned>> combinedDimensions) {
+  OpBuilder outsideBuilder(loops);
+  Location loc = loops.getLoc();
+
+  // Presort combined dimensions.
+  auto sortedDimensions = llvm::to_vector<3>(combinedDimensions);
+  for (auto &dims : sortedDimensions)
+    std::sort(dims.begin(), dims.end());
+
+  // Normalize ParallelOp's iteration pattern.
+  SmallVector<Value, 3> normalizedLowerBounds, normalizedSteps,
+      normalizedUpperBounds;
+  for (unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) {
+    OpBuilder insideLoopBuilder = OpBuilder::atBlockBegin(loops.getBody());
+    auto resultBounds =
+        normalizeLoop(outsideBuilder, insideLoopBuilder, loc,
+                      loops.getLowerBound()[i], loops.getUpperBound()[i],
+                      loops.getStep()[i], loops.getBody()->getArgument(i));
+
+    normalizedLowerBounds.push_back(resultBounds.lowerBound);
+    normalizedUpperBounds.push_back(resultBounds.upperBound);
+    normalizedSteps.push_back(resultBounds.step);
+  }
+
+  // Combine iteration spaces.
+  SmallVector<Value, 3> lowerBounds, upperBounds, steps;
+  auto cst0 = outsideBuilder.create<arith::ConstantIndexOp>(loc, 0);
+  auto cst1 = outsideBuilder.create<arith::ConstantIndexOp>(loc, 1);
+  for (unsigned i = 0, e = sortedDimensions.size(); i < e; ++i) {
+    Value newUpperBound = outsideBuilder.create<arith::ConstantIndexOp>(loc, 1);
+    for (auto idx : sortedDimensions[i]) {
+      newUpperBound = outsideBuilder.create<arith::MulIOp>(
+          loc, newUpperBound, normalizedUpperBounds[idx]);
+    }
+    lowerBounds.push_back(cst0);
+    steps.push_back(cst1);
+    upperBounds.push_back(newUpperBound);
+  }
+
+  // Create new ParallelLoop with conversions to the original induction values.
+  // The loop below uses divisions to get the relevant range of values in the
+  // new induction value that represent each range of the original induction
+  // value. The remainders then determine based on that range, which iteration
+  // of the original induction value this represents. This is a normalized value
+  // that is un-normalized already by the previous logic.
+  auto newPloop = outsideBuilder.create<scf::ParallelOp>(
+      loc, lowerBounds, upperBounds, steps,
+      [&](OpBuilder &insideBuilder, Location, ValueRange ploopIVs) {
+        for (unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) {
+          Value previous = ploopIVs[i];
+          unsigned numberCombinedDimensions = combinedDimensions[i].size();
+          // Iterate over all except the last induction value.
+          for (unsigned j = numberCombinedDimensions - 1; j > 0; --j) {
+            unsigned idx = combinedDimensions[i][j];
+
+            // Determine the current induction value's current loop iteration
+            Value iv = insideBuilder.create<arith::RemSIOp>(
+                loc, previous, normalizedUpperBounds[idx]);
+            replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx), iv,
+                                       loops.getRegion());
+
+            // Remove the effect of the current induction value to prepare for
+            // the next value.
+            previous = insideBuilder.create<arith::DivSIOp>(
+                loc, previous, normalizedUpperBounds[idx]);
+          }
+
+          // The final induction value is just the remaining value.
+          unsigned idx = combinedDimensions[i][0];
+          replaceAllUsesInRegionWith(loops.getBody()->getArgument(idx),
+                                     previous, loops.getRegion());
+        }
+      });
+
+  // Replace the old loop with the new loop.
+  loops.getBody()->back().erase();
+  newPloop.getBody()->getOperations().splice(
+      Block::iterator(newPloop.getBody()->back()),
+      loops.getBody()->getOperations());
+  loops.erase();
+}
+
+// Hoist the ops within `outer` that appear before `inner`.
+// Such ops include the ops that have been introduced by parametric tiling.
+// Ops that come from triangular loops (i.e. that belong to the program slice
+// rooted at `outer`) and ops that have side effects cannot be hoisted.
+// Return failure when any op fails to hoist.
+static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner) {
+  SetVector<Operation *> forwardSlice;
+  getForwardSlice(
+      outer.getInductionVar(), &forwardSlice,
+      [&inner](Operation *op) { return op != inner.getOperation(); });
+  LogicalResult status = success();
+  SmallVector<Operation *, 8> toHoist;
+  for (auto &op : outer.getBody()->without_terminator()) {
+    // Stop when encountering the inner loop.
+    if (&op == inner.getOperation())
+      break;
+    // Skip over non-hoistable ops.
+    if (forwardSlice.count(&op) > 0) {
+      status = failure();
+      continue;
+    }
+    // Skip intermediate scf::ForOp, these are not considered a failure.
+    if (isa<scf::ForOp>(op))
+      continue;
+    // Skip other ops with regions.
+    if (op.getNumRegions() > 0) {
+      status = failure();
+      continue;
+    }
+    // Skip if op has side effects.
+    // TODO: loads to immutable memory regions are ok.
+    if (!MemoryEffectOpInterface::hasNoEffect(&op)) {
+      status = failure();
+      continue;
+    }
+    toHoist.push_back(&op);
+  }
+  auto *outerForOp = outer.getOperation();
+  for (auto *op : toHoist)
+    op->moveBefore(outerForOp);
+  return status;
+}
+
+// Traverse the interTile and intraTile loops and try to hoist ops such that
+// bands of perfectly nested loops are isolated.
+// Return failure if either perfect interTile or perfect intraTile bands cannot
+// be formed.
+static LogicalResult tryIsolateBands(const TileLoops &tileLoops) {
+  LogicalResult status = success();
+  const Loops &interTile = tileLoops.first;
+  const Loops &intraTile = tileLoops.second;
+  auto size = interTile.size();
+  assert(size == intraTile.size());
+  if (size <= 1)
+    return success();
+  for (unsigned s = 1; s < size; ++s)
+    status = succeeded(status) ? hoistOpsBetween(intraTile[0], intraTile[s])
+                               : failure();
+  for (unsigned s = 1; s < size; ++s)
+    status = succeeded(status) ? hoistOpsBetween(interTile[0], interTile[s])
+                               : failure();
+  return status;
+}
+
+/// Collect perfectly nested loops starting from `rootForOps`.  Loops are
+/// perfectly nested if each loop is the first and only non-terminator operation
+/// in the parent loop.  Collect at most `maxLoops` loops and append them to
+/// `forOps`.
+template <typename T>
+static void getPerfectlyNestedLoopsImpl(
+    SmallVectorImpl<T> &forOps, T rootForOp,
+    unsigned maxLoops = std::numeric_limits<unsigned>::max()) {
+  for (unsigned i = 0; i < maxLoops; ++i) {
+    forOps.push_back(rootForOp);
+    Block &body = rootForOp.getRegion().front();
+    if (body.begin() != std::prev(body.end(), 2))
+      return;
+
+    rootForOp = dyn_cast<T>(&body.front());
+    if (!rootForOp)
+      return;
+  }
+}
+
+static Loops stripmineSink(scf::ForOp forOp, Value factor,
+                           ArrayRef<scf::ForOp> targets) {
+  auto originalStep = forOp.getStep();
+  auto iv = forOp.getInductionVar();
+
+  OpBuilder b(forOp);
+  forOp.setStep(b.create<arith::MulIOp>(forOp.getLoc(), originalStep, factor));
+
+  Loops innerLoops;
+  for (auto t : targets) {
+    // Save information for splicing ops out of t when done
+    auto begin = t.getBody()->begin();
+    auto nOps = t.getBody()->getOperations().size();
+
+    // Insert newForOp before the terminator of `t`.
+    auto b = OpBuilder::atBlockTerminator((t.getBody()));
+    Value stepped = b.create<arith::AddIOp>(t.getLoc(), iv, forOp.getStep());
+    Value less = b.create<arith::CmpIOp>(t.getLoc(), arith::CmpIPredicate::slt,
+                                         forOp.getUpperBound(), stepped);
+    Value ub =
+        b.create<SelectOp>(t.getLoc(), less, forOp.getUpperBound(), stepped);
+
+    // Splice [begin, begin + nOps - 1) into `newForOp` and replace uses.
+    auto newForOp = b.create<scf::ForOp>(t.getLoc(), iv, ub, originalStep);
+    newForOp.getBody()->getOperations().splice(
+        newForOp.getBody()->getOperations().begin(),
+        t.getBody()->getOperations(), begin, std::next(begin, nOps - 1));
+    replaceAllUsesInRegionWith(iv, newForOp.getInductionVar(),
+                               newForOp.getRegion());
+
+    innerLoops.push_back(newForOp);
+  }
+
+  return innerLoops;
+}
+
+// Stripmines a `forOp` by `factor` and sinks it under a single `target`.
+// Returns the new for operation, nested immediately under `target`.
+template <typename SizeType>
+static scf::ForOp stripmineSink(scf::ForOp forOp, SizeType factor,
+                                scf::ForOp target) {
+  // TODO: Use cheap structural assertions that targets are nested under
+  // forOp and that targets are not nested under each other when DominanceInfo
+  // exposes the capability. It seems overkill to construct a whole function
+  // dominance tree at this point.
+  auto res = stripmineSink(forOp, factor, ArrayRef<scf::ForOp>(target));
+  assert(res.size() == 1 && "Expected 1 inner forOp");
+  return res[0];
+}
+
+SmallVector<Loops, 8> mlir::tile(ArrayRef<scf::ForOp> forOps,
+                                 ArrayRef<Value> sizes,
+                                 ArrayRef<scf::ForOp> targets) {
+  SmallVector<SmallVector<scf::ForOp, 8>, 8> res;
+  SmallVector<scf::ForOp, 8> currentTargets(targets.begin(), targets.end());
+  for (auto it : llvm::zip(forOps, sizes)) {
+    auto step = stripmineSink(std::get<0>(it), std::get<1>(it), currentTargets);
+    res.push_back(step);
+    currentTargets = step;
+  }
+  return res;
+}
+
+Loops mlir::tile(ArrayRef<scf::ForOp> forOps, ArrayRef<Value> sizes,
+                 scf::ForOp target) {
+  SmallVector<scf::ForOp, 8> res;
+  for (auto loops : tile(forOps, sizes, ArrayRef<scf::ForOp>(target))) {
+    assert(loops.size() == 1);
+    res.push_back(loops[0]);
+  }
+  return res;
+}
+
+Loops mlir::tilePerfectlyNested(scf::ForOp rootForOp, ArrayRef<Value> sizes) {
+  // Collect perfectly nested loops.  If more size values provided than nested
+  // loops available, truncate `sizes`.
+  SmallVector<scf::ForOp, 4> forOps;
+  forOps.reserve(sizes.size());
+  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
+  if (forOps.size() < sizes.size())
+    sizes = sizes.take_front(forOps.size());
+
+  return ::tile(forOps, sizes, forOps.back());
+}
+
+void mlir::getPerfectlyNestedLoops(SmallVectorImpl<scf::ForOp> &nestedLoops,
+                                   scf::ForOp root) {
+  getPerfectlyNestedLoopsImpl(nestedLoops, root);
+}
+
+TileLoops mlir::extractFixedOuterLoops(scf::ForOp rootForOp,
+                                       ArrayRef<int64_t> sizes) {
+  // Collect perfectly nested loops.  If more size values provided than nested
+  // loops available, truncate `sizes`.
+  SmallVector<scf::ForOp, 4> forOps;
+  forOps.reserve(sizes.size());
+  getPerfectlyNestedLoopsImpl(forOps, rootForOp, sizes.size());
+  if (forOps.size() < sizes.size())
+    sizes = sizes.take_front(forOps.size());
+
+  // Compute the tile sizes such that i-th outer loop executes size[i]
+  // iterations.  Given that the loop current executes
+  //   numIterations = ceildiv((upperBound - lowerBound), step)
+  // iterations, we need to tile with size ceildiv(numIterations, size[i]).
+  SmallVector<Value, 4> tileSizes;
+  tileSizes.reserve(sizes.size());
+  for (unsigned i = 0, e = sizes.size(); i < e; ++i) {
+    assert(sizes[i] > 0 && "expected strictly positive size for strip-mining");
+
+    auto forOp = forOps[i];
+    OpBuilder builder(forOp);
+    auto loc = forOp.getLoc();
+    Value diff = builder.create<arith::SubIOp>(loc, forOp.getUpperBound(),
+                                               forOp.getLowerBound());
+    Value numIterations = ceilDivPositive(builder, loc, diff, forOp.getStep());
+    Value iterationsPerBlock =
+        ceilDivPositive(builder, loc, numIterations, sizes[i]);
+    tileSizes.push_back(iterationsPerBlock);
+  }
+
+  // Call parametric tiling with the given sizes.
+  auto intraTile = tile(forOps, tileSizes, forOps.back());
+  TileLoops tileLoops = std::make_pair(forOps, intraTile);
+
+  // TODO: for now we just ignore the result of band isolation.
+  // In the future, mapping decisions may be impacted by the ability to
+  // isolate perfectly nested bands.
+  (void)tryIsolateBands(tileLoops);
+
+  return tileLoops;
+}
diff --git a/mlir/lib/Dialect/Vector/VectorTransferSplitRewritePatterns.cpp b/mlir/lib/Dialect/Vector/VectorTransferSplitRewritePatterns.cpp
index cda4472695b2d..73d8d261b4d57 100644
--- a/mlir/lib/Dialect/Vector/VectorTransferSplitRewritePatterns.cpp
+++ b/mlir/lib/Dialect/Vector/VectorTransferSplitRewritePatterns.cpp
@@ -14,7 +14,6 @@
 #include <type_traits>
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
index cd26750c9eb7a..3ead467bc5181 100644
--- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
@@ -13,7 +13,6 @@
 #include <type_traits>
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Linalg/IR/Linalg.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
diff --git a/mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp b/mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp
index 319a6ab6b710e..ec502702d093b 100644
--- a/mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp
+++ b/mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Vector/VectorTransforms.h"
 #include "mlir/IR/ImplicitLocOpBuilder.h"
 #include "mlir/Interfaces/VectorInterfaces.h"
diff --git a/mlir/lib/Interfaces/LoopLikeInterface.cpp b/mlir/lib/Interfaces/LoopLikeInterface.cpp
index 4a0c5d3a18761..8ec4b51aa757b 100644
--- a/mlir/lib/Interfaces/LoopLikeInterface.cpp
+++ b/mlir/lib/Interfaces/LoopLikeInterface.cpp
@@ -7,12 +7,95 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Interfaces/LoopLikeInterface.h"
+#include "mlir/Interfaces/SideEffectInterfaces.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Support/Debug.h"
 
 using namespace mlir;
 
+#define DEBUG_TYPE "loop-like"
+
 //===----------------------------------------------------------------------===//
 // LoopLike Interfaces
 //===----------------------------------------------------------------------===//
 
 /// Include the definitions of the loop-like interfaces.
 #include "mlir/Interfaces/LoopLikeInterface.cpp.inc"
+
+//===----------------------------------------------------------------------===//
+// LoopLike Utilities
+//===----------------------------------------------------------------------===//
+
+// Checks whether the given op can be hoisted by checking that
+// - the op and any of its contained operations do not depend on SSA values
+//   defined inside of the loop (by means of calling definedOutside).
+// - the op has no side-effects. If sideEffecting is Never, sideeffects of this
+//   op and its nested ops are ignored.
+static bool canBeHoisted(Operation *op,
+                         function_ref<bool(Value)> definedOutside) {
+  // Check that dependencies are defined outside of loop.
+  if (!llvm::all_of(op->getOperands(), definedOutside))
+    return false;
+  // Check whether this op is side-effect free. If we already know that there
+  // can be no side-effects because the surrounding op has claimed so, we can
+  // (and have to) skip this step.
+  if (auto memInterface = dyn_cast<MemoryEffectOpInterface>(op)) {
+    if (!memInterface.hasNoEffect())
+      return false;
+    // If the operation doesn't have side effects and it doesn't recursively
+    // have side effects, it can always be hoisted.
+    if (!op->hasTrait<OpTrait::HasRecursiveSideEffects>())
+      return true;
+
+    // Otherwise, if the operation doesn't provide the memory effect interface
+    // and it doesn't have recursive side effects we treat it conservatively as
+    // side-effecting.
+  } else if (!op->hasTrait<OpTrait::HasRecursiveSideEffects>()) {
+    return false;
+  }
+
+  // Recurse into the regions for this op and check whether the contained ops
+  // can be hoisted.
+  for (auto &region : op->getRegions()) {
+    for (auto &block : region) {
+      for (auto &innerOp : block)
+        if (!canBeHoisted(&innerOp, definedOutside))
+          return false;
+    }
+  }
+  return true;
+}
+
+LogicalResult mlir::moveLoopInvariantCode(LoopLikeOpInterface looplike) {
+  auto &loopBody = looplike.getLoopBody();
+
+  // We use two collections here as we need to preserve the order for insertion
+  // and this is easiest.
+  SmallPtrSet<Operation *, 8> willBeMovedSet;
+  SmallVector<Operation *, 8> opsToMove;
+
+  // Helper to check whether an operation is loop invariant wrt. SSA properties.
+  auto isDefinedOutsideOfBody = [&](Value value) {
+    auto *definingOp = value.getDefiningOp();
+    return (definingOp && !!willBeMovedSet.count(definingOp)) ||
+           looplike.isDefinedOutsideOfLoop(value);
+  };
+
+  // Do not use walk here, as we do not want to go into nested regions and hoist
+  // operations from there. These regions might have semantics unknown to this
+  // rewriting. If the nested regions are loops, they will have been processed.
+  for (auto &block : loopBody) {
+    for (auto &op : block.without_terminator()) {
+      if (canBeHoisted(&op, isDefinedOutsideOfBody)) {
+        opsToMove.push_back(&op);
+        willBeMovedSet.insert(&op);
+      }
+    }
+  }
+
+  // For all instructions that we found to be invariant, move outside of the
+  // loop.
+  LogicalResult result = looplike.moveOutOfLoop(opsToMove);
+  LLVM_DEBUG(looplike.print(llvm::dbgs() << "\n\nModified loop:\n"));
+  return result;
+}
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 7826650f57471..6e59ef2d696ac 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -6,12 +6,8 @@ add_mlir_library(MLIRTransforms
   CSE.cpp
   Inliner.cpp
   LocationSnapshot.cpp
-  LoopCoalescing.cpp
-  LoopFusion.cpp
   LoopInvariantCodeMotion.cpp
   OpStats.cpp
-  ParallelLoopCollapsing.cpp
-  PipelineDataTransfer.cpp
   SCCP.cpp
   StripDebugInfo.cpp
   SymbolDCE.cpp
@@ -21,18 +17,13 @@ add_mlir_library(MLIRTransforms
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms
 
   DEPENDS
-  MLIRStandardOpsIncGen
   MLIRTransformsPassIncGen
 
   LINK_LIBS PUBLIC
-  MLIRAffine
   MLIRAnalysis
   MLIRCopyOpInterface
   MLIRLoopLikeInterface
-  MLIRMemRef
-  MLIRSCF
   MLIRPass
   MLIRSupport
   MLIRTransformUtils
-  MLIRVector
   )
diff --git a/mlir/lib/Transforms/CSE.cpp b/mlir/lib/Transforms/CSE.cpp
index bc265a4a233ee..235abda14f0e8 100644
--- a/mlir/lib/Transforms/CSE.cpp
+++ b/mlir/lib/Transforms/CSE.cpp
@@ -15,7 +15,6 @@
 #include "mlir/IR/Dominance.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/ScopedHashTable.h"
diff --git a/mlir/lib/Transforms/ControlFlowSink.cpp b/mlir/lib/Transforms/ControlFlowSink.cpp
index 10d41b0f0013f..6643158ac5de0 100644
--- a/mlir/lib/Transforms/ControlFlowSink.cpp
+++ b/mlir/lib/Transforms/ControlFlowSink.cpp
@@ -16,8 +16,8 @@
 #include "PassDetail.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
+#include "mlir/Transforms/ControlFlowSinkUtils.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 
 using namespace mlir;
 
diff --git a/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp b/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp
index 3c8e14aa66bbd..e4e3c16a14656 100644
--- a/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp
+++ b/mlir/lib/Transforms/LoopInvariantCodeMotion.cpp
@@ -11,13 +11,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "PassDetail.h"
-#include "mlir/Transforms/Passes.h"
-
 #include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
 #include "mlir/Interfaces/LoopLikeInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
-#include "mlir/Transforms/LoopUtils.h"
+#include "mlir/Transforms/Passes.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -34,80 +31,6 @@ struct LoopInvariantCodeMotion
 };
 } // namespace
 
-// Checks whether the given op can be hoisted by checking that
-// - the op and any of its contained operations do not depend on SSA values
-//   defined inside of the loop (by means of calling definedOutside).
-// - the op has no side-effects. If sideEffecting is Never, sideeffects of this
-//   op and its nested ops are ignored.
-static bool canBeHoisted(Operation *op,
-                         function_ref<bool(Value)> definedOutside) {
-  // Check that dependencies are defined outside of loop.
-  if (!llvm::all_of(op->getOperands(), definedOutside))
-    return false;
-  // Check whether this op is side-effect free. If we already know that there
-  // can be no side-effects because the surrounding op has claimed so, we can
-  // (and have to) skip this step.
-  if (auto memInterface = dyn_cast<MemoryEffectOpInterface>(op)) {
-    if (!memInterface.hasNoEffect())
-      return false;
-    // If the operation doesn't have side effects and it doesn't recursively
-    // have side effects, it can always be hoisted.
-    if (!op->hasTrait<OpTrait::HasRecursiveSideEffects>())
-      return true;
-
-    // Otherwise, if the operation doesn't provide the memory effect interface
-    // and it doesn't have recursive side effects we treat it conservatively as
-    // side-effecting.
-  } else if (!op->hasTrait<OpTrait::HasRecursiveSideEffects>()) {
-    return false;
-  }
-
-  // Recurse into the regions for this op and check whether the contained ops
-  // can be hoisted.
-  for (auto &region : op->getRegions()) {
-    for (auto &block : region) {
-      for (auto &innerOp : block)
-        if (!canBeHoisted(&innerOp, definedOutside))
-          return false;
-    }
-  }
-  return true;
-}
-
-LogicalResult mlir::moveLoopInvariantCode(LoopLikeOpInterface looplike) {
-  auto &loopBody = looplike.getLoopBody();
-
-  // We use two collections here as we need to preserve the order for insertion
-  // and this is easiest.
-  SmallPtrSet<Operation *, 8> willBeMovedSet;
-  SmallVector<Operation *, 8> opsToMove;
-
-  // Helper to check whether an operation is loop invariant wrt. SSA properties.
-  auto isDefinedOutsideOfBody = [&](Value value) {
-    auto *definingOp = value.getDefiningOp();
-    return (definingOp && !!willBeMovedSet.count(definingOp)) ||
-           looplike.isDefinedOutsideOfLoop(value);
-  };
-
-  // Do not use walk here, as we do not want to go into nested regions and hoist
-  // operations from there. These regions might have semantics unknown to this
-  // rewriting. If the nested regions are loops, they will have been processed.
-  for (auto &block : loopBody) {
-    for (auto &op : block.without_terminator()) {
-      if (canBeHoisted(&op, isDefinedOutsideOfBody)) {
-        opsToMove.push_back(&op);
-        willBeMovedSet.insert(&op);
-      }
-    }
-  }
-
-  // For all instructions that we found to be invariant, move outside of the
-  // loop.
-  auto result = looplike.moveOutOfLoop(opsToMove);
-  LLVM_DEBUG(looplike.print(llvm::dbgs() << "\n\nModified loop:\n"));
-  return result;
-}
-
 void LoopInvariantCodeMotion::runOnOperation() {
   // Walk through all loops in a function in innermost-loop-first order. This
   // way, we first LICM from the inner loop, and place the ops in
diff --git a/mlir/lib/Transforms/PassDetail.h b/mlir/lib/Transforms/PassDetail.h
index 4496c4223cbec..7e1fedc136c9e 100644
--- a/mlir/lib/Transforms/PassDetail.h
+++ b/mlir/lib/Transforms/PassDetail.h
@@ -13,23 +13,8 @@
 #include "mlir/Transforms/Passes.h"
 
 namespace mlir {
-class AffineDialect;
-
-// Forward declaration from Dialect.h
-template <typename ConcreteDialect>
-void registerDialect(DialectRegistry &registry);
-
-namespace arith {
-class ArithmeticDialect;
-} // namespace arith
-
-namespace memref {
-class MemRefDialect;
-} // namespace memref
-
 #define GEN_PASS_CLASSES
 #include "mlir/Transforms/Passes.h.inc"
-
 } // namespace mlir
 
 #endif // TRANSFORMS_PASSDETAIL_H_
diff --git a/mlir/lib/Transforms/Utils/CMakeLists.txt b/mlir/lib/Transforms/Utils/CMakeLists.txt
index c42d45325e1b2..8f16426e11b1f 100644
--- a/mlir/lib/Transforms/Utils/CMakeLists.txt
+++ b/mlir/lib/Transforms/Utils/CMakeLists.txt
@@ -4,25 +4,12 @@ add_mlir_library(MLIRTransformUtils
   FoldUtils.cpp
   GreedyPatternRewriteDriver.cpp
   InliningUtils.cpp
-  LoopFusionUtils.cpp
-  LoopUtils.cpp
   RegionUtils.cpp
-  Utils.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Transforms
 
-  DEPENDS
-  MLIRStandardOpsIncGen
-
   LINK_LIBS PUBLIC
-  MLIRAffine
-  MLIRArithmetic
   MLIRAnalysis
-  MLIRAffineAnalysis
-  MLIRMemRef
-  MLIRSCF
-  MLIRPass
   MLIRRewrite
-  MLIRStandard
   )
diff --git a/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
index 868174b261c4a..0101f9026dc71 100644
--- a/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
+++ b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp
@@ -18,10 +18,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "mlir/Transforms/ControlFlowSinkUtils.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
-#include "mlir/Transforms/Utils.h"
 #include <vector>
 
 #define DEBUG_TYPE "cf-sink"
diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp
index cbc8532606281..de15a23b906df 100644
--- a/mlir/lib/Transforms/Utils/DialectConversion.cpp
+++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp
@@ -13,7 +13,6 @@
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/FunctionInterfaces.h"
 #include "mlir/Rewrite/PatternApplicator.h"
-#include "mlir/Transforms/Utils.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
diff --git a/mlir/lib/Transforms/Utils/Utils.cpp b/mlir/lib/Transforms/Utils/Utils.cpp
deleted file mode 100644
index b2936bbca9620..0000000000000
--- a/mlir/lib/Transforms/Utils/Utils.cpp
+++ /dev/null
@@ -1,767 +0,0 @@
-//===- Utils.cpp ---- Misc utilities for code and data transformation -----===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements miscellaneous transformation routines for non-loop IR
-// structures.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Transforms/Utils.h"
-#include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
-#include "mlir/Dialect/Affine/Analysis/AffineStructures.h"
-#include "mlir/Dialect/Affine/Analysis/Utils.h"
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/IR/BuiltinOps.h"
-#include "mlir/IR/Dominance.h"
-#include "mlir/Support/MathExtras.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/TypeSwitch.h"
-
-#define DEBUG_TYPE "transforms-utils"
-
-using namespace mlir;
-
-// Perform the replacement in `op`.
-LogicalResult mlir::replaceAllMemRefUsesWith(Value oldMemRef, Value newMemRef,
-                                             Operation *op,
-                                             ArrayRef<Value> extraIndices,
-                                             AffineMap indexRemap,
-                                             ArrayRef<Value> extraOperands,
-                                             ArrayRef<Value> symbolOperands,
-                                             bool allowNonDereferencingOps) {
-  unsigned newMemRefRank = newMemRef.getType().cast<MemRefType>().getRank();
-  (void)newMemRefRank; // unused in opt mode
-  unsigned oldMemRefRank = oldMemRef.getType().cast<MemRefType>().getRank();
-  (void)oldMemRefRank; // unused in opt mode
-  if (indexRemap) {
-    assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
-           "symbolic operand count mismatch");
-    assert(indexRemap.getNumInputs() ==
-           extraOperands.size() + oldMemRefRank + symbolOperands.size());
-    assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
-  } else {
-    assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
-  }
-
-  // Assert same elemental type.
-  assert(oldMemRef.getType().cast<MemRefType>().getElementType() ==
-         newMemRef.getType().cast<MemRefType>().getElementType());
-
-  SmallVector<unsigned, 2> usePositions;
-  for (const auto &opEntry : llvm::enumerate(op->getOperands())) {
-    if (opEntry.value() == oldMemRef)
-      usePositions.push_back(opEntry.index());
-  }
-
-  // If memref doesn't appear, nothing to do.
-  if (usePositions.empty())
-    return success();
-
-  if (usePositions.size() > 1) {
-    // TODO: extend it for this case when needed (rare).
-    assert(false && "multiple dereferencing uses in a single op not supported");
-    return failure();
-  }
-
-  unsigned memRefOperandPos = usePositions.front();
-
-  OpBuilder builder(op);
-  // The following checks if op is dereferencing memref and performs the access
-  // index rewrites.
-  auto affMapAccInterface = dyn_cast<AffineMapAccessInterface>(op);
-  if (!affMapAccInterface) {
-    if (!allowNonDereferencingOps) {
-      // Failure: memref used in a non-dereferencing context (potentially
-      // escapes); no replacement in these cases unless allowNonDereferencingOps
-      // is set.
-      return failure();
-    }
-    op->setOperand(memRefOperandPos, newMemRef);
-    return success();
-  }
-  // Perform index rewrites for the dereferencing op and then replace the op
-  NamedAttribute oldMapAttrPair =
-      affMapAccInterface.getAffineMapAttrForMemRef(oldMemRef);
-  AffineMap oldMap = oldMapAttrPair.getValue().cast<AffineMapAttr>().getValue();
-  unsigned oldMapNumInputs = oldMap.getNumInputs();
-  SmallVector<Value, 4> oldMapOperands(
-      op->operand_begin() + memRefOperandPos + 1,
-      op->operand_begin() + memRefOperandPos + 1 + oldMapNumInputs);
-
-  // Apply 'oldMemRefOperands = oldMap(oldMapOperands)'.
-  SmallVector<Value, 4> oldMemRefOperands;
-  SmallVector<Value, 4> affineApplyOps;
-  oldMemRefOperands.reserve(oldMemRefRank);
-  if (oldMap != builder.getMultiDimIdentityMap(oldMap.getNumDims())) {
-    for (auto resultExpr : oldMap.getResults()) {
-      auto singleResMap = AffineMap::get(oldMap.getNumDims(),
-                                         oldMap.getNumSymbols(), resultExpr);
-      auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
-                                                oldMapOperands);
-      oldMemRefOperands.push_back(afOp);
-      affineApplyOps.push_back(afOp);
-    }
-  } else {
-    oldMemRefOperands.assign(oldMapOperands.begin(), oldMapOperands.end());
-  }
-
-  // Construct new indices as a remap of the old ones if a remapping has been
-  // provided. The indices of a memref come right after it, i.e.,
-  // at position memRefOperandPos + 1.
-  SmallVector<Value, 4> remapOperands;
-  remapOperands.reserve(extraOperands.size() + oldMemRefRank +
-                        symbolOperands.size());
-  remapOperands.append(extraOperands.begin(), extraOperands.end());
-  remapOperands.append(oldMemRefOperands.begin(), oldMemRefOperands.end());
-  remapOperands.append(symbolOperands.begin(), symbolOperands.end());
-
-  SmallVector<Value, 4> remapOutputs;
-  remapOutputs.reserve(oldMemRefRank);
-
-  if (indexRemap &&
-      indexRemap != builder.getMultiDimIdentityMap(indexRemap.getNumDims())) {
-    // Remapped indices.
-    for (auto resultExpr : indexRemap.getResults()) {
-      auto singleResMap = AffineMap::get(
-          indexRemap.getNumDims(), indexRemap.getNumSymbols(), resultExpr);
-      auto afOp = builder.create<AffineApplyOp>(op->getLoc(), singleResMap,
-                                                remapOperands);
-      remapOutputs.push_back(afOp);
-      affineApplyOps.push_back(afOp);
-    }
-  } else {
-    // No remapping specified.
-    remapOutputs.assign(remapOperands.begin(), remapOperands.end());
-  }
-
-  SmallVector<Value, 4> newMapOperands;
-  newMapOperands.reserve(newMemRefRank);
-
-  // Prepend 'extraIndices' in 'newMapOperands'.
-  for (Value extraIndex : extraIndices) {
-    assert(extraIndex.getDefiningOp()->getNumResults() == 1 &&
-           "single result op's expected to generate these indices");
-    assert((isValidDim(extraIndex) || isValidSymbol(extraIndex)) &&
-           "invalid memory op index");
-    newMapOperands.push_back(extraIndex);
-  }
-
-  // Append 'remapOutputs' to 'newMapOperands'.
-  newMapOperands.append(remapOutputs.begin(), remapOutputs.end());
-
-  // Create new fully composed AffineMap for new op to be created.
-  assert(newMapOperands.size() == newMemRefRank);
-  auto newMap = builder.getMultiDimIdentityMap(newMemRefRank);
-  // TODO: Avoid creating/deleting temporary AffineApplyOps here.
-  fullyComposeAffineMapAndOperands(&newMap, &newMapOperands);
-  newMap = simplifyAffineMap(newMap);
-  canonicalizeMapAndOperands(&newMap, &newMapOperands);
-  // Remove any affine.apply's that became dead as a result of composition.
-  for (Value value : affineApplyOps)
-    if (value.use_empty())
-      value.getDefiningOp()->erase();
-
-  OperationState state(op->getLoc(), op->getName());
-  // Construct the new operation using this memref.
-  state.operands.reserve(op->getNumOperands() + extraIndices.size());
-  // Insert the non-memref operands.
-  state.operands.append(op->operand_begin(),
-                        op->operand_begin() + memRefOperandPos);
-  // Insert the new memref value.
-  state.operands.push_back(newMemRef);
-
-  // Insert the new memref map operands.
-  state.operands.append(newMapOperands.begin(), newMapOperands.end());
-
-  // Insert the remaining operands unmodified.
-  state.operands.append(op->operand_begin() + memRefOperandPos + 1 +
-                            oldMapNumInputs,
-                        op->operand_end());
-
-  // Result types don't change. Both memref's are of the same elemental type.
-  state.types.reserve(op->getNumResults());
-  for (auto result : op->getResults())
-    state.types.push_back(result.getType());
-
-  // Add attribute for 'newMap', other Attributes do not change.
-  auto newMapAttr = AffineMapAttr::get(newMap);
-  for (auto namedAttr : op->getAttrs()) {
-    if (namedAttr.getName() == oldMapAttrPair.getName())
-      state.attributes.push_back({namedAttr.getName(), newMapAttr});
-    else
-      state.attributes.push_back(namedAttr);
-  }
-
-  // Create the new operation.
-  auto *repOp = builder.createOperation(state);
-  op->replaceAllUsesWith(repOp);
-  op->erase();
-
-  return success();
-}
-
-LogicalResult mlir::replaceAllMemRefUsesWith(
-    Value oldMemRef, Value newMemRef, ArrayRef<Value> extraIndices,
-    AffineMap indexRemap, ArrayRef<Value> extraOperands,
-    ArrayRef<Value> symbolOperands, Operation *domOpFilter,
-    Operation *postDomOpFilter, bool allowNonDereferencingOps,
-    bool replaceInDeallocOp) {
-  unsigned newMemRefRank = newMemRef.getType().cast<MemRefType>().getRank();
-  (void)newMemRefRank; // unused in opt mode
-  unsigned oldMemRefRank = oldMemRef.getType().cast<MemRefType>().getRank();
-  (void)oldMemRefRank;
-  if (indexRemap) {
-    assert(indexRemap.getNumSymbols() == symbolOperands.size() &&
-           "symbol operand count mismatch");
-    assert(indexRemap.getNumInputs() ==
-           extraOperands.size() + oldMemRefRank + symbolOperands.size());
-    assert(indexRemap.getNumResults() + extraIndices.size() == newMemRefRank);
-  } else {
-    assert(oldMemRefRank + extraIndices.size() == newMemRefRank);
-  }
-
-  // Assert same elemental type.
-  assert(oldMemRef.getType().cast<MemRefType>().getElementType() ==
-         newMemRef.getType().cast<MemRefType>().getElementType());
-
-  std::unique_ptr<DominanceInfo> domInfo;
-  std::unique_ptr<PostDominanceInfo> postDomInfo;
-  if (domOpFilter)
-    domInfo =
-        std::make_unique<DominanceInfo>(domOpFilter->getParentOfType<FuncOp>());
-
-  if (postDomOpFilter)
-    postDomInfo = std::make_unique<PostDominanceInfo>(
-        postDomOpFilter->getParentOfType<FuncOp>());
-
-  // Walk all uses of old memref; collect ops to perform replacement. We use a
-  // DenseSet since an operation could potentially have multiple uses of a
-  // memref (although rare), and the replacement later is going to erase ops.
-  DenseSet<Operation *> opsToReplace;
-  for (auto *op : oldMemRef.getUsers()) {
-    // Skip this use if it's not dominated by domOpFilter.
-    if (domOpFilter && !domInfo->dominates(domOpFilter, op))
-      continue;
-
-    // Skip this use if it's not post-dominated by postDomOpFilter.
-    if (postDomOpFilter && !postDomInfo->postDominates(postDomOpFilter, op))
-      continue;
-
-    // Skip dealloc's - no replacement is necessary, and a memref replacement
-    // at other uses doesn't hurt these dealloc's.
-    if (isa<memref::DeallocOp>(op) && !replaceInDeallocOp)
-      continue;
-
-    // Check if the memref was used in a non-dereferencing context. It is fine
-    // for the memref to be used in a non-dereferencing way outside of the
-    // region where this replacement is happening.
-    if (!isa<AffineMapAccessInterface>(*op)) {
-      if (!allowNonDereferencingOps) {
-        LLVM_DEBUG(llvm::dbgs()
-                   << "Memref replacement failed: non-deferencing memref op: \n"
-                   << *op << '\n');
-        return failure();
-      }
-      // Non-dereferencing ops with the MemRefsNormalizable trait are
-      // supported for replacement.
-      if (!op->hasTrait<OpTrait::MemRefsNormalizable>()) {
-        LLVM_DEBUG(llvm::dbgs() << "Memref replacement failed: use without a "
-                                   "memrefs normalizable trait: \n"
-                                << *op << '\n');
-        return failure();
-      }
-    }
-
-    // We'll first collect and then replace --- since replacement erases the op
-    // that has the use, and that op could be postDomFilter or domFilter itself!
-    opsToReplace.insert(op);
-  }
-
-  for (auto *op : opsToReplace) {
-    if (failed(replaceAllMemRefUsesWith(
-            oldMemRef, newMemRef, op, extraIndices, indexRemap, extraOperands,
-            symbolOperands, allowNonDereferencingOps)))
-      llvm_unreachable("memref replacement guaranteed to succeed here");
-  }
-
-  return success();
-}
-
-/// Given an operation, inserts one or more single result affine
-/// apply operations, results of which are exclusively used by this operation
-/// operation. The operands of these newly created affine apply ops are
-/// guaranteed to be loop iterators or terminal symbols of a function.
-///
-/// Before
-///
-/// affine.for %i = 0 to #map(%N)
-///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
-///   "send"(%idx, %A, ...)
-///   "compute"(%idx)
-///
-/// After
-///
-/// affine.for %i = 0 to #map(%N)
-///   %idx = affine.apply (d0) -> (d0 mod 2) (%i)
-///   "send"(%idx, %A, ...)
-///   %idx_ = affine.apply (d0) -> (d0 mod 2) (%i)
-///   "compute"(%idx_)
-///
-/// This allows applying different transformations on send and compute (for eg.
-/// different shifts/delays).
-///
-/// Returns nullptr either if none of opInst's operands were the result of an
-/// affine.apply and thus there was no affine computation slice to create, or if
-/// all the affine.apply op's supplying operands to this opInst did not have any
-/// uses besides this opInst; otherwise returns the list of affine.apply
-/// operations created in output argument `sliceOps`.
-void mlir::createAffineComputationSlice(
-    Operation *opInst, SmallVectorImpl<AffineApplyOp> *sliceOps) {
-  // Collect all operands that are results of affine apply ops.
-  SmallVector<Value, 4> subOperands;
-  subOperands.reserve(opInst->getNumOperands());
-  for (auto operand : opInst->getOperands())
-    if (isa_and_nonnull<AffineApplyOp>(operand.getDefiningOp()))
-      subOperands.push_back(operand);
-
-  // Gather sequence of AffineApplyOps reachable from 'subOperands'.
-  SmallVector<Operation *, 4> affineApplyOps;
-  getReachableAffineApplyOps(subOperands, affineApplyOps);
-  // Skip transforming if there are no affine maps to compose.
-  if (affineApplyOps.empty())
-    return;
-
-  // Check if all uses of the affine apply op's lie only in this op op, in
-  // which case there would be nothing to do.
-  bool localized = true;
-  for (auto *op : affineApplyOps) {
-    for (auto result : op->getResults()) {
-      for (auto *user : result.getUsers()) {
-        if (user != opInst) {
-          localized = false;
-          break;
-        }
-      }
-    }
-  }
-  if (localized)
-    return;
-
-  OpBuilder builder(opInst);
-  SmallVector<Value, 4> composedOpOperands(subOperands);
-  auto composedMap = builder.getMultiDimIdentityMap(composedOpOperands.size());
-  fullyComposeAffineMapAndOperands(&composedMap, &composedOpOperands);
-
-  // Create an affine.apply for each of the map results.
-  sliceOps->reserve(composedMap.getNumResults());
-  for (auto resultExpr : composedMap.getResults()) {
-    auto singleResMap = AffineMap::get(composedMap.getNumDims(),
-                                       composedMap.getNumSymbols(), resultExpr);
-    sliceOps->push_back(builder.create<AffineApplyOp>(
-        opInst->getLoc(), singleResMap, composedOpOperands));
-  }
-
-  // Construct the new operands that include the results from the composed
-  // affine apply op above instead of existing ones (subOperands). So, they
-  // differ from opInst's operands only for those operands in 'subOperands', for
-  // which they will be replaced by the corresponding one from 'sliceOps'.
-  SmallVector<Value, 4> newOperands(opInst->getOperands());
-  for (unsigned i = 0, e = newOperands.size(); i < e; i++) {
-    // Replace the subOperands from among the new operands.
-    unsigned j, f;
-    for (j = 0, f = subOperands.size(); j < f; j++) {
-      if (newOperands[i] == subOperands[j])
-        break;
-    }
-    if (j < subOperands.size()) {
-      newOperands[i] = (*sliceOps)[j];
-    }
-  }
-  for (unsigned idx = 0, e = newOperands.size(); idx < e; idx++) {
-    opInst->setOperand(idx, newOperands[idx]);
-  }
-}
-
-/// Enum to set patterns of affine expr in tiled-layout map.
-/// TileFloorDiv: <dim expr> div <tile size>
-/// TileMod: <dim expr> mod <tile size>
-/// TileNone: None of the above
-/// Example:
-/// #tiled_2d_128x256 = affine_map<(d0, d1)
-///            -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
-/// "d0 div 128" and "d1 div 256" ==> TileFloorDiv
-/// "d0 mod 128" and "d1 mod 256" ==> TileMod
-enum TileExprPattern { TileFloorDiv, TileMod, TileNone };
-
-/// Check if `map` is a tiled layout. In the tiled layout, specific k dimensions
-/// being floordiv'ed by respective tile sizes appeare in a mod with the same
-/// tile sizes, and no other expression involves those k dimensions. This
-/// function stores a vector of tuples (`tileSizePos`) including AffineExpr for
-/// tile size, positions of corresponding `floordiv` and `mod`. If it is not a
-/// tiled layout, an empty vector is returned.
-static LogicalResult getTileSizePos(
-    AffineMap map,
-    SmallVectorImpl<std::tuple<AffineExpr, unsigned, unsigned>> &tileSizePos) {
-  // Create `floordivExprs` which is a vector of tuples including LHS and RHS of
-  // `floordiv` and its position in `map` output.
-  // Example: #tiled_2d_128x256 = affine_map<(d0, d1)
-  //                -> (d0 div 128, d1 div 256, d0 mod 128, d1 mod 256)>
-  // In this example, `floordivExprs` includes {d0, 128, 0} and {d1, 256, 1}.
-  SmallVector<std::tuple<AffineExpr, AffineExpr, unsigned>, 4> floordivExprs;
-  unsigned pos = 0;
-  for (AffineExpr expr : map.getResults()) {
-    if (expr.getKind() == AffineExprKind::FloorDiv) {
-      AffineBinaryOpExpr binaryExpr = expr.cast<AffineBinaryOpExpr>();
-      if (binaryExpr.getRHS().isa<AffineConstantExpr>())
-        floordivExprs.emplace_back(
-            std::make_tuple(binaryExpr.getLHS(), binaryExpr.getRHS(), pos));
-    }
-    pos++;
-  }
-  // Not tiled layout if `floordivExprs` is empty.
-  if (floordivExprs.empty()) {
-    tileSizePos = SmallVector<std::tuple<AffineExpr, unsigned, unsigned>>{};
-    return success();
-  }
-
-  // Check if LHS of `floordiv` is used in LHS of `mod`. If not used, `map` is
-  // not tiled layout.
-  for (std::tuple<AffineExpr, AffineExpr, unsigned> fexpr : floordivExprs) {
-    AffineExpr floordivExprLHS = std::get<0>(fexpr);
-    AffineExpr floordivExprRHS = std::get<1>(fexpr);
-    unsigned floordivPos = std::get<2>(fexpr);
-
-    // Walk affinexpr of `map` output except `fexpr`, and check if LHS and RHS
-    // of `fexpr` are used in LHS and RHS of `mod`. If LHS of `fexpr` is used
-    // other expr, the map is not tiled layout. Example of non tiled layout:
-    //   affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 floordiv 256)>
-    //   affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 128)>
-    //   affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2 mod 256, d2 mod
-    //   256)>
-    bool found = false;
-    pos = 0;
-    for (AffineExpr expr : map.getResults()) {
-      bool notTiled = false;
-      if (pos != floordivPos) {
-        expr.walk([&](AffineExpr e) {
-          if (e == floordivExprLHS) {
-            if (expr.getKind() == AffineExprKind::Mod) {
-              AffineBinaryOpExpr binaryExpr = expr.cast<AffineBinaryOpExpr>();
-              // If LHS and RHS of `mod` are the same with those of floordiv.
-              if (floordivExprLHS == binaryExpr.getLHS() &&
-                  floordivExprRHS == binaryExpr.getRHS()) {
-                // Save tile size (RHS of `mod`), and position of `floordiv` and
-                // `mod` if same expr with `mod` is not found yet.
-                if (!found) {
-                  tileSizePos.emplace_back(
-                      std::make_tuple(binaryExpr.getRHS(), floordivPos, pos));
-                  found = true;
-                } else {
-                  // Non tiled layout: Have multilpe `mod` with the same LHS.
-                  // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
-                  // mod 256, d2 mod 256)>
-                  notTiled = true;
-                }
-              } else {
-                // Non tiled layout: RHS of `mod` is different from `floordiv`.
-                // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
-                // mod 128)>
-                notTiled = true;
-              }
-            } else {
-              // Non tiled layout: LHS is the same, but not `mod`.
-              // eg. affine_map<(d0, d1, d2) -> (d0, d1, d2 floordiv 256, d2
-              // floordiv 256)>
-              notTiled = true;
-            }
-          }
-        });
-      }
-      if (notTiled) {
-        tileSizePos = SmallVector<std::tuple<AffineExpr, unsigned, unsigned>>{};
-        return success();
-      }
-      pos++;
-    }
-  }
-  return success();
-}
-
-/// Check if `dim` dimension of memrefType with `layoutMap` becomes dynamic
-/// after normalization. Dimensions that include dynamic dimensions in the map
-/// output will become dynamic dimensions. Return true if `dim` is dynamic
-/// dimension.
-///
-/// Example:
-/// #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
-///
-/// If d1 is dynamic dimension, 2nd and 3rd dimension of map output are dynamic.
-/// memref<4x?xf32, #map0>  ==>  memref<4x?x?xf32>
-static bool
-isNormalizedMemRefDynamicDim(unsigned dim, AffineMap layoutMap,
-                             SmallVectorImpl<unsigned> &inMemrefTypeDynDims,
-                             MLIRContext *context) {
-  bool isDynamicDim = false;
-  AffineExpr expr = layoutMap.getResults()[dim];
-  // Check if affine expr of the dimension includes dynamic dimension of input
-  // memrefType.
-  expr.walk([&inMemrefTypeDynDims, &isDynamicDim, &context](AffineExpr e) {
-    if (e.isa<AffineDimExpr>()) {
-      for (unsigned dm : inMemrefTypeDynDims) {
-        if (e == getAffineDimExpr(dm, context)) {
-          isDynamicDim = true;
-        }
-      }
-    }
-  });
-  return isDynamicDim;
-}
-
-/// Create affine expr to calculate dimension size for a tiled-layout map.
-static AffineExpr createDimSizeExprForTiledLayout(AffineExpr oldMapOutput,
-                                                  TileExprPattern pat) {
-  // Create map output for the patterns.
-  // "floordiv <tile size>" ==> "ceildiv <tile size>"
-  // "mod <tile size>" ==> "<tile size>"
-  AffineExpr newMapOutput;
-  AffineBinaryOpExpr binaryExpr = nullptr;
-  switch (pat) {
-  case TileExprPattern::TileMod:
-    binaryExpr = oldMapOutput.cast<AffineBinaryOpExpr>();
-    newMapOutput = binaryExpr.getRHS();
-    break;
-  case TileExprPattern::TileFloorDiv:
-    binaryExpr = oldMapOutput.cast<AffineBinaryOpExpr>();
-    newMapOutput = getAffineBinaryOpExpr(
-        AffineExprKind::CeilDiv, binaryExpr.getLHS(), binaryExpr.getRHS());
-    break;
-  default:
-    newMapOutput = oldMapOutput;
-  }
-  return newMapOutput;
-}
-
-/// Create new maps to calculate each dimension size of `newMemRefType`, and
-/// create `newDynamicSizes` from them by using AffineApplyOp.
-///
-/// Steps for normalizing dynamic memrefs for a tiled layout map
-/// Example:
-///    #map0 = affine_map<(d0, d1) -> (d0, d1 floordiv 32, d1 mod 32)>
-///    %0 = dim %arg0, %c1 :memref<4x?xf32>
-///    %1 = alloc(%0) : memref<4x?xf32, #map0>
-///
-/// (Before this function)
-/// 1. Check if `map`(#map0) is a tiled layout using `getTileSizePos()`. Only
-/// single layout map is supported.
-///
-/// 2. Create normalized memrefType using `isNormalizedMemRefDynamicDim()`. It
-/// is memref<4x?x?xf32> in the above example.
-///
-/// (In this function)
-/// 3. Create new maps to calculate each dimension of the normalized memrefType
-/// using `createDimSizeExprForTiledLayout()`. In the tiled layout, the
-/// dimension size can be calculated by replacing "floordiv <tile size>" with
-/// "ceildiv <tile size>" and "mod <tile size>" with "<tile size>".
-/// - New map in the above example
-///   #map0 = affine_map<(d0, d1) -> (d0)>
-///   #map1 = affine_map<(d0, d1) -> (d1 ceildiv 32)>
-///   #map2 = affine_map<(d0, d1) -> (32)>
-///
-/// 4. Create AffineApplyOp to apply the new maps. The output of AffineApplyOp
-/// is used in dynamicSizes of new AllocOp.
-///   %0 = dim %arg0, %c1 : memref<4x?xf32>
-///   %c4 = arith.constant 4 : index
-///   %1 = affine.apply #map1(%c4, %0)
-///   %2 = affine.apply #map2(%c4, %0)
-static void createNewDynamicSizes(MemRefType oldMemRefType,
-                                  MemRefType newMemRefType, AffineMap map,
-                                  memref::AllocOp *allocOp, OpBuilder b,
-                                  SmallVectorImpl<Value> &newDynamicSizes) {
-  // Create new input for AffineApplyOp.
-  SmallVector<Value, 4> inAffineApply;
-  ArrayRef<int64_t> oldMemRefShape = oldMemRefType.getShape();
-  unsigned dynIdx = 0;
-  for (unsigned d = 0; d < oldMemRefType.getRank(); ++d) {
-    if (oldMemRefShape[d] < 0) {
-      // Use dynamicSizes of allocOp for dynamic dimension.
-      inAffineApply.emplace_back(allocOp->dynamicSizes()[dynIdx]);
-      dynIdx++;
-    } else {
-      // Create ConstantOp for static dimension.
-      Attribute constantAttr =
-          b.getIntegerAttr(b.getIndexType(), oldMemRefShape[d]);
-      inAffineApply.emplace_back(
-          b.create<arith::ConstantOp>(allocOp->getLoc(), constantAttr));
-    }
-  }
-
-  // Create new map to calculate each dimension size of new memref for each
-  // original map output. Only for dynamic dimesion of `newMemRefType`.
-  unsigned newDimIdx = 0;
-  ArrayRef<int64_t> newMemRefShape = newMemRefType.getShape();
-  SmallVector<std::tuple<AffineExpr, unsigned, unsigned>> tileSizePos;
-  (void)getTileSizePos(map, tileSizePos);
-  for (AffineExpr expr : map.getResults()) {
-    if (newMemRefShape[newDimIdx] < 0) {
-      // Create new maps to calculate each dimension size of new memref.
-      enum TileExprPattern pat = TileExprPattern::TileNone;
-      for (auto pos : tileSizePos) {
-        if (newDimIdx == std::get<1>(pos))
-          pat = TileExprPattern::TileFloorDiv;
-        else if (newDimIdx == std::get<2>(pos))
-          pat = TileExprPattern::TileMod;
-      }
-      AffineExpr newMapOutput = createDimSizeExprForTiledLayout(expr, pat);
-      AffineMap newMap =
-          AffineMap::get(map.getNumInputs(), map.getNumSymbols(), newMapOutput);
-      Value affineApp =
-          b.create<AffineApplyOp>(allocOp->getLoc(), newMap, inAffineApply);
-      newDynamicSizes.emplace_back(affineApp);
-    }
-    newDimIdx++;
-  }
-}
-
-// TODO: Currently works for static memrefs with a single layout map.
-LogicalResult mlir::normalizeMemRef(memref::AllocOp *allocOp) {
-  MemRefType memrefType = allocOp->getType();
-  OpBuilder b(*allocOp);
-
-  // Fetch a new memref type after normalizing the old memref to have an
-  // identity map layout.
-  MemRefType newMemRefType =
-      normalizeMemRefType(memrefType, b, allocOp->symbolOperands().size());
-  if (newMemRefType == memrefType)
-    // Either memrefType already had an identity map or the map couldn't be
-    // transformed to an identity map.
-    return failure();
-
-  Value oldMemRef = allocOp->getResult();
-
-  SmallVector<Value, 4> symbolOperands(allocOp->symbolOperands());
-  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
-  memref::AllocOp newAlloc;
-  // Check if `layoutMap` is a tiled layout. Only single layout map is
-  // supported for normalizing dynamic memrefs.
-  SmallVector<std::tuple<AffineExpr, unsigned, unsigned>> tileSizePos;
-  (void)getTileSizePos(layoutMap, tileSizePos);
-  if (newMemRefType.getNumDynamicDims() > 0 && !tileSizePos.empty()) {
-    MemRefType oldMemRefType = oldMemRef.getType().cast<MemRefType>();
-    SmallVector<Value, 4> newDynamicSizes;
-    createNewDynamicSizes(oldMemRefType, newMemRefType, layoutMap, allocOp, b,
-                          newDynamicSizes);
-    // Add the new dynamic sizes in new AllocOp.
-    newAlloc =
-        b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
-                                  newDynamicSizes, allocOp->alignmentAttr());
-  } else {
-    newAlloc = b.create<memref::AllocOp>(allocOp->getLoc(), newMemRefType,
-                                         allocOp->alignmentAttr());
-  }
-  // Replace all uses of the old memref.
-  if (failed(replaceAllMemRefUsesWith(oldMemRef, /*newMemRef=*/newAlloc,
-                                      /*extraIndices=*/{},
-                                      /*indexRemap=*/layoutMap,
-                                      /*extraOperands=*/{},
-                                      /*symbolOperands=*/symbolOperands,
-                                      /*domOpFilter=*/nullptr,
-                                      /*postDomOpFilter=*/nullptr,
-                                      /*allowNonDereferencingOps=*/true))) {
-    // If it failed (due to escapes for example), bail out.
-    newAlloc.erase();
-    return failure();
-  }
-  // Replace any uses of the original alloc op and erase it. All remaining uses
-  // have to be dealloc's; RAMUW above would've failed otherwise.
-  assert(llvm::all_of(oldMemRef.getUsers(), [](Operation *op) {
-    return isa<memref::DeallocOp>(op);
-  }));
-  oldMemRef.replaceAllUsesWith(newAlloc);
-  allocOp->erase();
-  return success();
-}
-
-MemRefType mlir::normalizeMemRefType(MemRefType memrefType, OpBuilder b,
-                                     unsigned numSymbolicOperands) {
-  unsigned rank = memrefType.getRank();
-  if (rank == 0)
-    return memrefType;
-
-  if (memrefType.getLayout().isIdentity()) {
-    // Either no maps is associated with this memref or this memref has
-    // a trivial (identity) map.
-    return memrefType;
-  }
-  AffineMap layoutMap = memrefType.getLayout().getAffineMap();
-
-  // We don't do any checks for one-to-one'ness; we assume that it is
-  // one-to-one.
-
-  // Normalize only static memrefs and dynamic memrefs with a tiled-layout map
-  // for now.
-  // TODO: Normalize the other types of dynamic memrefs.
-  SmallVector<std::tuple<AffineExpr, unsigned, unsigned>> tileSizePos;
-  (void)getTileSizePos(layoutMap, tileSizePos);
-  if (memrefType.getNumDynamicDims() > 0 && tileSizePos.empty())
-    return memrefType;
-
-  // We have a single map that is not an identity map. Create a new memref
-  // with the right shape and an identity layout map.
-  ArrayRef<int64_t> shape = memrefType.getShape();
-  // FlatAffineConstraint may later on use symbolicOperands.
-  FlatAffineConstraints fac(rank, numSymbolicOperands);
-  SmallVector<unsigned, 4> memrefTypeDynDims;
-  for (unsigned d = 0; d < rank; ++d) {
-    // Use constraint system only in static dimensions.
-    if (shape[d] > 0) {
-      fac.addBound(FlatAffineConstraints::LB, d, 0);
-      fac.addBound(FlatAffineConstraints::UB, d, shape[d] - 1);
-    } else {
-      memrefTypeDynDims.emplace_back(d);
-    }
-  }
-  // We compose this map with the original index (logical) space to derive
-  // the upper bounds for the new index space.
-  unsigned newRank = layoutMap.getNumResults();
-  if (failed(fac.composeMatchingMap(layoutMap)))
-    return memrefType;
-  // TODO: Handle semi-affine maps.
-  // Project out the old data dimensions.
-  fac.projectOut(newRank, fac.getNumIds() - newRank - fac.getNumLocalIds());
-  SmallVector<int64_t, 4> newShape(newRank);
-  for (unsigned d = 0; d < newRank; ++d) {
-    // Check if each dimension of normalized memrefType is dynamic.
-    bool isDynDim = isNormalizedMemRefDynamicDim(
-        d, layoutMap, memrefTypeDynDims, b.getContext());
-    if (isDynDim) {
-      newShape[d] = -1;
-    } else {
-      // The lower bound for the shape is always zero.
-      auto ubConst = fac.getConstantBound(FlatAffineConstraints::UB, d);
-      // For a static memref and an affine map with no symbols, this is
-      // always bounded.
-      assert(ubConst.hasValue() && "should always have an upper bound");
-      if (ubConst.getValue() < 0)
-        // This is due to an invalid map that maps to a negative space.
-        return memrefType;
-      // If dimension of new memrefType is dynamic, the value is -1.
-      newShape[d] = ubConst.getValue() + 1;
-    }
-  }
-
-  // Create the new memref type after trivializing the old layout map.
-  MemRefType newMemRefType =
-      MemRefType::Builder(memrefType)
-          .setShape(newShape)
-          .setLayout(AffineMapAttr::get(b.getMultiDimIdentityMap(newRank)));
-
-  return newMemRefType;
-}
diff --git a/mlir/test/lib/Dialect/Affine/CMakeLists.txt b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
index bad8e06ef35df..6ac033edb7f3f 100644
--- a/mlir/test/lib/Dialect/Affine/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
@@ -3,6 +3,8 @@ add_mlir_library(MLIRAffineTransformsTestPasses
   TestAffineDataCopy.cpp
   TestAffineLoopUnswitching.cpp
   TestAffineLoopParametricTiling.cpp
+  TestLoopFusion.cpp
+  TestLoopMapping.cpp
   TestLoopPermutation.cpp
   TestVectorizationUtils.cpp
 
diff --git a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
index 400af2a66ba7c..b7490935b1294 100644
--- a/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestAffineDataCopy.cpp
@@ -13,10 +13,10 @@
 
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Passes.h"
 
 #define PASS_NAME "test-affine-data-copy"
diff --git a/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp b/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp
index d215a3a792dd6..7096e11d1ef65 100644
--- a/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestAffineLoopParametricTiling.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Passes.h"
-#include "mlir/Transforms/LoopUtils.h"
 
 using namespace mlir;
 
diff --git a/mlir/test/lib/Transforms/TestLoopFusion.cpp b/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp
similarity index 98%
rename from mlir/test/lib/Transforms/TestLoopFusion.cpp
rename to mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp
index 70fcdeb9f5517..592f41ee547ce 100644
--- a/mlir/test/lib/Transforms/TestLoopFusion.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp
@@ -12,11 +12,10 @@
 
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopFusionUtils.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopFusionUtils.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
 
 #define DEBUG_TYPE "test-loop-fusion"
 
diff --git a/mlir/test/lib/Transforms/TestLoopMapping.cpp b/mlir/test/lib/Dialect/Affine/TestLoopMapping.cpp
similarity index 96%
rename from mlir/test/lib/Transforms/TestLoopMapping.cpp
rename to mlir/test/lib/Dialect/Affine/TestLoopMapping.cpp
index 09ea69d68dcbe..78c2fb50d5745 100644
--- a/mlir/test/lib/Transforms/TestLoopMapping.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestLoopMapping.cpp
@@ -12,11 +12,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
 
 #include "llvm/ADT/SetVector.h"
 
diff --git a/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp b/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp
index 1db0981485e84..ee8d371d33693 100644
--- a/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestLoopPermutation.cpp
@@ -12,9 +12,8 @@
 
 #include "mlir/Dialect/Affine/Analysis/Utils.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
 
 #define PASS_NAME "test-loop-permutation"
 
diff --git a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp
index 958d3621a054a..9174d67c527d4 100644
--- a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp
+++ b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp
@@ -14,6 +14,7 @@
 #include "mlir/Dialect/Affine/Analysis/AffineAnalysis.h"
 #include "mlir/Dialect/Affine/Analysis/NestedMatcher.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/LoopUtils.h"
 #include "mlir/Dialect/Affine/Utils.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/Dialect/Vector/VectorUtils.h"
@@ -21,7 +22,6 @@
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Diagnostics.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
 #include "mlir/Transforms/Passes.h"
 
 #include "llvm/ADT/STLExtras.h"
diff --git a/mlir/test/lib/Dialect/SCF/CMakeLists.txt b/mlir/test/lib/Dialect/SCF/CMakeLists.txt
index 31b04b1f3a98a..f2cd9db3d3fec 100644
--- a/mlir/test/lib/Dialect/SCF/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/SCF/CMakeLists.txt
@@ -1,5 +1,7 @@
 # Exclude tests from libMLIR.so
 add_mlir_library(MLIRSCFTestPasses
+  TestLoopParametricTiling.cpp
+  TestLoopUnrolling.cpp
   TestSCFUtils.cpp
 
   EXCLUDE_FROM_LIBMLIR
diff --git a/mlir/test/lib/Transforms/TestLoopParametricTiling.cpp b/mlir/test/lib/Dialect/SCF/TestLoopParametricTiling.cpp
similarity index 93%
rename from mlir/test/lib/Transforms/TestLoopParametricTiling.cpp
rename to mlir/test/lib/Dialect/SCF/TestLoopParametricTiling.cpp
index 184a7331bc559..9a6bbad3bc521 100644
--- a/mlir/test/lib/Transforms/TestLoopParametricTiling.cpp
+++ b/mlir/test/lib/Dialect/SCF/TestLoopParametricTiling.cpp
@@ -11,10 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/SCF/SCF.h"
+#include "mlir/Dialect/SCF/Utils.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
 
 using namespace mlir;
 
@@ -31,8 +30,7 @@ class SimpleParametricLoopTilingPass
   }
   StringRef getDescription() const final {
     return "test application of parametric tiling to the outer loops so that "
-           "the "
-           "ranges of outer loops become static";
+           "the ranges of outer loops become static";
   }
   SimpleParametricLoopTilingPass() = default;
   SimpleParametricLoopTilingPass(const SimpleParametricLoopTilingPass &) {}
diff --git a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp b/mlir/test/lib/Dialect/SCF/TestLoopUnrolling.cpp
similarity index 97%
rename from mlir/test/lib/Transforms/TestLoopUnrolling.cpp
rename to mlir/test/lib/Dialect/SCF/TestLoopUnrolling.cpp
index 1063853e434ba..5749bd99074c2 100644
--- a/mlir/test/lib/Transforms/TestLoopUnrolling.cpp
+++ b/mlir/test/lib/Dialect/SCF/TestLoopUnrolling.cpp
@@ -12,11 +12,10 @@
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/SCF/SCF.h"
+#include "mlir/Dialect/SCF/Utils.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/Pass/Pass.h"
-#include "mlir/Transforms/LoopUtils.h"
-#include "mlir/Transforms/Passes.h"
 
 using namespace mlir;
 
diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt
index 8d5dc53ad8d13..b8e65bf9a37c0 100644
--- a/mlir/test/lib/Transforms/CMakeLists.txt
+++ b/mlir/test/lib/Transforms/CMakeLists.txt
@@ -2,10 +2,6 @@
 add_mlir_library(MLIRTestTransforms
   TestConstantFold.cpp
   TestInlining.cpp
-  TestLoopFusion.cpp
-  TestLoopMapping.cpp
-  TestLoopParametricTiling.cpp
-  TestLoopUnrolling.cpp
 
   EXCLUDE_FROM_LIBMLIR
 
diff --git a/mlir/test/lib/Transforms/TestConstantFold.cpp b/mlir/test/lib/Transforms/TestConstantFold.cpp
index f0ea5fa888265..23daca4f26ad9 100644
--- a/mlir/test/lib/Transforms/TestConstantFold.cpp
+++ b/mlir/test/lib/Transforms/TestConstantFold.cpp
@@ -9,7 +9,6 @@
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/FoldUtils.h"
 #include "mlir/Transforms/Passes.h"
-#include "mlir/Transforms/Utils.h"
 
 using namespace mlir;
 
diff --git a/mlir/unittests/Transforms/CMakeLists.txt b/mlir/unittests/Transforms/CMakeLists.txt
index b78f3cd8cf223..3b08c8ecffbe0 100644
--- a/mlir/unittests/Transforms/CMakeLists.txt
+++ b/mlir/unittests/Transforms/CMakeLists.txt
@@ -4,4 +4,5 @@ add_mlir_unittest(MLIRTransformsTests
 )
 target_link_libraries(MLIRTransformsTests
   PRIVATE
+  MLIRParser
   MLIRTransforms)

From 88c1df64bdd379b4fb5a7946124c7f52a9c795da Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Fri, 21 Jan 2022 12:35:08 -0800
Subject: [PATCH 492/946] [mlir:ArmSVE][NFC] Remove dead code and unnecessary
 dependencies

Differential Revision: https://reviews.llvm.org/D117981
---
 mlir/include/mlir/Dialect/ArmSVE/ArmSVE.td    |  1 -
 mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp  | 35 ++++++++++---------
 mlir/lib/Dialect/ArmSVE/IR/CMakeLists.txt     |  1 -
 .../Transforms/LegalizeForLLVMExport.cpp      | 14 --------
 4 files changed, 18 insertions(+), 33 deletions(-)

diff --git a/mlir/include/mlir/Dialect/ArmSVE/ArmSVE.td b/mlir/include/mlir/Dialect/ArmSVE/ArmSVE.td
index 5ffcf740496d1..19dcfabe972ba 100644
--- a/mlir/include/mlir/Dialect/ArmSVE/ArmSVE.td
+++ b/mlir/include/mlir/Dialect/ArmSVE/ArmSVE.td
@@ -15,7 +15,6 @@
 
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
-include "mlir/Dialect/Arithmetic/IR/ArithmeticBase.td"
 
 //===----------------------------------------------------------------------===//
 // ArmSVE dialect definition
diff --git a/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp b/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp
index b3c79040c48da..1ea2fad56272a 100644
--- a/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp
+++ b/mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp
@@ -12,7 +12,6 @@
 
 #include "mlir/Dialect/ArmSVE/ArmSVEDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMTypes.h"
-#include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/DialectImplementation.h"
 #include "mlir/IR/OpImplementation.h"
@@ -20,11 +19,26 @@
 #include "llvm/ADT/TypeSwitch.h"
 
 using namespace mlir;
-using namespace arm_sve;
+using namespace mlir::arm_sve;
 
-#include "mlir/Dialect/ArmSVE/ArmSVEDialect.cpp.inc"
+//===----------------------------------------------------------------------===//
+// ScalableVector versions of general helpers for comparison ops
+//===----------------------------------------------------------------------===//
+
+/// Return the scalable vector of the same shape and containing i1.
+static Type getI1SameShape(Type type) {
+  auto i1Type = IntegerType::get(type.getContext(), 1);
+  if (auto sVectorType = type.dyn_cast<VectorType>())
+    return VectorType::get(sVectorType.getShape(), i1Type,
+                           sVectorType.getNumScalableDims());
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Tablegen Definitions
+//===----------------------------------------------------------------------===//
 
-static Type getI1SameShape(Type type);
+#include "mlir/Dialect/ArmSVE/ArmSVEDialect.cpp.inc"
 
 #define GET_OP_CLASSES
 #include "mlir/Dialect/ArmSVE/ArmSVE.cpp.inc"
@@ -38,16 +52,3 @@ void ArmSVEDialect::initialize() {
 #include "mlir/Dialect/ArmSVE/ArmSVE.cpp.inc"
       >();
 }
-
-//===----------------------------------------------------------------------===//
-// ScalableVector versions of general helpers for comparison ops
-//===----------------------------------------------------------------------===//
-
-// Return the scalable vector of the same shape and containing i1.
-static Type getI1SameShape(Type type) {
-  auto i1Type = IntegerType::get(type.getContext(), 1);
-  if (auto sVectorType = type.dyn_cast<VectorType>())
-    return VectorType::get(sVectorType.getShape(), i1Type,
-                           sVectorType.getNumScalableDims());
-  return nullptr;
-}
diff --git a/mlir/lib/Dialect/ArmSVE/IR/CMakeLists.txt b/mlir/lib/Dialect/ArmSVE/IR/CMakeLists.txt
index 4a2393e7ac3d9..9177b5889b948 100644
--- a/mlir/lib/Dialect/ArmSVE/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/ArmSVE/IR/CMakeLists.txt
@@ -10,6 +10,5 @@ add_mlir_dialect_library(MLIRArmSVE
   LINK_LIBS PUBLIC
   MLIRIR
   MLIRLLVMIR
-  MLIRStandard
   MLIRSideEffectInterfaces
   )
diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp
index fdda398fa01ac..95bc3e6b29599 100644
--- a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp
+++ b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp
@@ -11,7 +11,6 @@
 #include "mlir/Dialect/ArmSVE/ArmSVEDialect.h"
 #include "mlir/Dialect/ArmSVE/Transforms.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/IR/PatternMatch.h"
 
@@ -34,19 +33,6 @@ class ForwardOperands : public OpConversionPattern<OpTy> {
   }
 };
 
-class ReturnOpTypeConversion : public OpConversionPattern<ReturnOp> {
-public:
-  using OpConversionPattern<ReturnOp>::OpConversionPattern;
-
-  LogicalResult
-  matchAndRewrite(ReturnOp op, OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const final {
-    rewriter.updateRootInPlace(
-        op, [&]() { op->setOperands(adaptor.getOperands()); });
-    return success();
-  }
-};
-
 using SdotOpLowering = OneToOneConvertToLLVMPattern<SdotOp, SdotIntrOp>;
 using SmmlaOpLowering = OneToOneConvertToLLVMPattern<SmmlaOp, SmmlaIntrOp>;
 using UdotOpLowering = OneToOneConvertToLLVMPattern<UdotOp, UdotIntrOp>;

From 65e7cd13bbc6c16804614235aaeaed2e2ed94358 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Mon, 24 Jan 2022 11:41:00 -0800
Subject: [PATCH 493/946] [mlir] Remove a bunch of unnecessary dialect
 dependencies

A lot of dialects have dependencies that are unnecessary, either because of copy/paste
of files when creating things or some other means. This commit cleans up a bunch of
the simple ones:

* Copy/Paste or missed during refactoring
Most of the dependencies cleaned up here look like copy/paste errors when creating
new dialects/transformations, or because the dependency wasn't removed during a
refactoring (e.g. when splitting the standard dialect).

* Unnecessary hard coding of constant operations in matchers
There are a few instances where a dialect had a dependency because it
was hardcoding checks for constant operations instead of using the better m_Constant
approach.

Differential Revision: https://reviews.llvm.org/D118062
---
 mlir/include/mlir/Dialect/GPU/GPUOps.td        |  1 -
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp         |  1 -
 .../GPU/Transforms/AsyncRegionRewriter.cpp     |  1 -
 .../Dialect/GPU/Transforms/MemoryPromotion.cpp |  1 -
 mlir/lib/Dialect/OpenACC/CMakeLists.txt        |  2 --
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp        | 18 ++++++++++--------
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp   |  1 -
 mlir/lib/Dialect/Quant/CMakeLists.txt          |  1 -
 .../Dialect/Quant/Transforms/ConvertConst.cpp  |  1 -
 mlir/lib/Dialect/SCF/CMakeLists.txt            |  1 -
 mlir/lib/Dialect/SCF/SCF.cpp                   |  3 +--
 mlir/lib/Dialect/Shape/IR/CMakeLists.txt       |  1 -
 mlir/lib/Dialect/Shape/IR/Shape.cpp            |  1 -
 .../Dialect/Shape/IR/ShapeCanonicalization.td  |  1 -
 .../lib/Dialect/SparseTensor/IR/CMakeLists.txt |  2 --
 .../SparseTensor/IR/SparseTensorDialect.cpp    | 15 ++++++++-------
 16 files changed, 19 insertions(+), 32 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index 4e54bda5ab0eb..c76772b4470d3 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -15,7 +15,6 @@
 
 include "mlir/Dialect/DLTI/DLTIBase.td"
 include "mlir/Dialect/GPU/GPUBase.td"
-include "mlir/Dialect/LLVMIR/LLVMOpBase.td"
 include "mlir/IR/EnumAttr.td"
 include "mlir/IR/FunctionInterfaces.td"
 include "mlir/IR/SymbolInterfaces.td"
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index e44832460c0e0..d195ba121c81b 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -15,7 +15,6 @@
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
diff --git a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
index 5d7ded743da59..7b89936682477 100644
--- a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp
@@ -16,7 +16,6 @@
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/GPU/Passes.h"
 #include "mlir/Dialect/GPU/Utils.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/PatternMatch.h"
diff --git a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
index 62c11f12b3f89..62304fbb2cfa9 100644
--- a/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/MemoryPromotion.cpp
@@ -17,7 +17,6 @@
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/ImplicitLocOpBuilder.h"
 #include "mlir/Pass/Pass.h"
 
diff --git a/mlir/lib/Dialect/OpenACC/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/CMakeLists.txt
index ba1c2e9e82906..cc84ba20569c3 100644
--- a/mlir/lib/Dialect/OpenACC/CMakeLists.txt
+++ b/mlir/lib/Dialect/OpenACC/CMakeLists.txt
@@ -8,8 +8,6 @@ add_mlir_dialect_library(MLIROpenACC
   MLIROpenACCOpsIncGen
 
   LINK_LIBS PUBLIC
-  MLIRArithmetic
   MLIRIR
-  MLIRStandard
   )
 
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 31bde5ec9a486..cfd1fbdf341e9 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -7,12 +7,11 @@
 // =============================================================================
 
 #include "mlir/Dialect/OpenACC/OpenACC.h"
-#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/OpenACC/OpenACCOpsEnums.cpp.inc"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/DialectImplementation.h"
+#include "mlir/IR/Matchers.h"
 #include "mlir/IR/OpImplementation.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "llvm/ADT/TypeSwitch.h"
@@ -175,14 +174,17 @@ struct RemoveConstantIfCondition : public OpRewritePattern<OpTy> {
   LogicalResult matchAndRewrite(OpTy op,
                                 PatternRewriter &rewriter) const override {
     // Early return if there is no condition.
-    if (!op.ifCond())
+    Value ifCond = op.ifCond();
+    if (!ifCond)
       return success();
 
-    auto constOp = op.ifCond().template getDefiningOp<arith::ConstantOp>();
-    if (constOp && constOp.getValue().template cast<IntegerAttr>().getInt())
-      rewriter.updateRootInPlace(op, [&]() { op.ifCondMutable().erase(0); });
-    else if (constOp)
-      rewriter.eraseOp(op);
+    IntegerAttr constAttr;
+    if (matchPattern(ifCond, m_Constant(&constAttr))) {
+      if (constAttr.getInt())
+        rewriter.updateRootInPlace(op, [&]() { op.ifCondMutable().erase(0); });
+      else
+        rewriter.eraseOp(op);
+    }
 
     return success();
   }
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index a527954dfd8ec..f0c5cdc6eb63e 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -12,7 +12,6 @@
 
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMTypes.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/DialectImplementation.h"
 #include "mlir/IR/OpImplementation.h"
diff --git a/mlir/lib/Dialect/Quant/CMakeLists.txt b/mlir/lib/Dialect/Quant/CMakeLists.txt
index a122b4f626afc..5c447dcbc2e26 100644
--- a/mlir/lib/Dialect/Quant/CMakeLists.txt
+++ b/mlir/lib/Dialect/Quant/CMakeLists.txt
@@ -22,6 +22,5 @@ add_mlir_dialect_library(MLIRQuant
   MLIRPass
   MLIRSideEffectInterfaces
   MLIRSupport
-  MLIRStandard
   MLIRTransformUtils
   )
diff --git a/mlir/lib/Dialect/Quant/Transforms/ConvertConst.cpp b/mlir/lib/Dialect/Quant/Transforms/ConvertConst.cpp
index 7b66dba229541..3dc9f9f8a5365 100644
--- a/mlir/lib/Dialect/Quant/Transforms/ConvertConst.cpp
+++ b/mlir/lib/Dialect/Quant/Transforms/ConvertConst.cpp
@@ -12,7 +12,6 @@
 #include "mlir/Dialect/Quant/QuantOps.h"
 #include "mlir/Dialect/Quant/QuantizeUtils.h"
 #include "mlir/Dialect/Quant/UniformSupport.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
diff --git a/mlir/lib/Dialect/SCF/CMakeLists.txt b/mlir/lib/Dialect/SCF/CMakeLists.txt
index 2f7506c58c820..0af9b77709c16 100644
--- a/mlir/lib/Dialect/SCF/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/CMakeLists.txt
@@ -12,7 +12,6 @@ add_mlir_dialect_library(MLIRSCF
   MLIRBufferization
   MLIRIR
   MLIRLoopLikeInterface
-  MLIRMemRef
   MLIRSideEffectInterfaces
   MLIRStandard
   )
diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp
index 7276ed0cdb740..807b0b6df78d8 100644
--- a/mlir/lib/Dialect/SCF/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/SCF.cpp
@@ -9,14 +9,13 @@
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/BlockAndValueMapping.h"
 #include "mlir/IR/Matchers.h"
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Support/MathExtras.h"
 #include "mlir/Transforms/InliningUtils.h"
+
 using namespace mlir;
 using namespace mlir::scf;
 
diff --git a/mlir/lib/Dialect/Shape/IR/CMakeLists.txt b/mlir/lib/Dialect/Shape/IR/CMakeLists.txt
index 8365d4808e2f0..e0cc5abe33438 100644
--- a/mlir/lib/Dialect/Shape/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Shape/IR/CMakeLists.txt
@@ -19,6 +19,5 @@ add_mlir_dialect_library(MLIRShape
   MLIRInferTypeOpInterface
   MLIRIR
   MLIRSideEffectInterfaces
-  MLIRStandard
   MLIRTensor
   )
diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp
index 7e97846c33572..7352d850f338d 100644
--- a/mlir/lib/Dialect/Shape/IR/Shape.cpp
+++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp
@@ -12,7 +12,6 @@
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/CommonFolders.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Dialect/Traits.h"
 #include "mlir/IR/Builders.h"
diff --git a/mlir/lib/Dialect/Shape/IR/ShapeCanonicalization.td b/mlir/lib/Dialect/Shape/IR/ShapeCanonicalization.td
index 0825f0f680979..947b28851fdf0 100644
--- a/mlir/lib/Dialect/Shape/IR/ShapeCanonicalization.td
+++ b/mlir/lib/Dialect/Shape/IR/ShapeCanonicalization.td
@@ -1,5 +1,4 @@
 include "mlir/Dialect/Shape/IR/ShapeOps.td"
-include "mlir/Dialect/StandardOps/IR/Ops.td"
 include "mlir/Dialect/Tensor/IR/TensorOps.td"
 
 def AllInputShapesEq : Constraint<CPred< [{
diff --git a/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt b/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt
index fd0599cd2c296..6b94ee010b7cd 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/SparseTensor/IR/CMakeLists.txt
@@ -9,9 +9,7 @@ add_mlir_dialect_library(MLIRSparseTensor
   MLIRSparseTensorOpsIncGen
 
   LINK_LIBS PUBLIC
-  MLIRArithmetic
   MLIRDialect
   MLIRIR
-  MLIRStandard
   MLIRSupport
   )
diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index 8a7942c8d666a..5b0ee4656c491 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -6,11 +6,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/SparseTensor/IR/SparseTensor.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/DialectImplementation.h"
+#include "mlir/IR/Matchers.h"
 #include "mlir/IR/OpImplementation.h"
 #include "llvm/ADT/TypeSwitch.h"
 
@@ -194,8 +193,9 @@ mlir::sparse_tensor::getSparseTensorEncoding(Type type) {
 //===----------------------------------------------------------------------===//
 
 static LogicalResult isInBounds(Value dim, Value tensor) {
-  if (auto constantOp = dim.getDefiningOp<arith::ConstantOp>()) {
-    unsigned d = constantOp.getValue().cast<IntegerAttr>().getInt();
+  IntegerAttr constantAttr;
+  if (matchPattern(dim, m_Constant(&constantAttr))) {
+    unsigned d = constantAttr.getInt();
     if (d >= tensor.getType().cast<RankedTensorType>().getRank())
       return failure();
   }
@@ -227,11 +227,12 @@ static LogicalResult verify(InitOp op) {
   for (unsigned i = 0; i < rank; i++) {
     if (shape[i] == ShapedType::kDynamicSize)
       continue;
-    auto constantOp = op.sizes()[i].getDefiningOp<arith::ConstantOp>();
-    if (!constantOp ||
-        constantOp.getValue().cast<IntegerAttr>().getInt() != shape[i])
+    IntegerAttr constantAttr;
+    if (!matchPattern(op.sizes()[i], m_Constant(&constantAttr)) ||
+        constantAttr.getInt() != shape[i]) {
       return op.emitError("unexpected mismatch with static dimension size ")
              << shape[i];
+    }
   }
   return success();
 }

From 03e9ba274072a8921d78c678222fb0b43111125b Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed@bougacha.org>
Date: Mon, 24 Jan 2022 16:56:02 -0800
Subject: [PATCH 494/946] [ObjCARC] Remove unused RetainRVDep dependency kind.
 NFC.

---
 llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp | 3 ---
 llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h   | 3 +--
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp b/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
index 4921209f041b4..de0f5803b4c77 100644
--- a/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
+++ b/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.cpp
@@ -194,9 +194,6 @@ llvm::objcarc::Depends(DependenceKind Flavor, Instruction *Inst,
       return CanInterruptRV(Class);
     }
   }
-
-  case RetainRVDep:
-    return CanInterruptRV(GetBasicARCInstKind(Inst));
   }
 
   llvm_unreachable("Invalid dependence flavor");
diff --git a/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h b/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
index cf4c05ebe91ca..dd6a1c3f97958 100644
--- a/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
+++ b/llvm/lib/Transforms/ObjCARC/DependencyAnalysis.h
@@ -46,8 +46,7 @@ enum DependenceKind {
   AutoreleasePoolBoundary,
   CanChangeRetainCount,
   RetainAutoreleaseDep,       ///< Blocks objc_retainAutorelease.
-  RetainAutoreleaseRVDep,     ///< Blocks objc_retainAutoreleaseReturnValue.
-  RetainRVDep                 ///< Blocks objc_retainAutoreleasedReturnValue.
+  RetainAutoreleaseRVDep      ///< Blocks objc_retainAutoreleaseReturnValue.
 };
 
 /// Find dependent instructions. If there is exactly one dependent instruction,

From e7298464c5d004a119583cdb8a120dc3d968508d Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed@bougacha.org>
Date: Mon, 24 Jan 2022 17:14:39 -0800
Subject: [PATCH 495/946] [ObjCARC] Use "UnsafeClaimRV" to refer to unsafeClaim
 in enums. NFC.

This matches the actual runtime function more closely.
I considered also renaming both RetainRV/UnsafeClaimRV to end with
"ARV", for AutoreleasedReturnValue, but there's less potential
for confusion there.
---
 llvm/include/llvm/Analysis/ObjCARCInstKind.h  |  2 +-
 llvm/include/llvm/Analysis/ObjCARCUtil.h      |  6 ++--
 llvm/lib/Analysis/ObjCARCInstKind.cpp         | 28 +++++++++----------
 llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp |  2 +-
 .../ObjCARC/ARCRuntimeEntryPoints.h           | 10 +++----
 .../Transforms/ObjCARC/ObjCARCContract.cpp    |  2 +-
 llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp   | 20 ++++++-------
 llvm/lib/Transforms/Utils/InlineFunction.cpp  |  4 +--
 8 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ObjCARCInstKind.h b/llvm/include/llvm/Analysis/ObjCARCInstKind.h
index 84565b9315c78..e332bcf88be79 100644
--- a/llvm/include/llvm/Analysis/ObjCARCInstKind.h
+++ b/llvm/include/llvm/Analysis/ObjCARCInstKind.h
@@ -28,7 +28,7 @@ namespace objcarc {
 enum class ARCInstKind {
   Retain,                   ///< objc_retain
   RetainRV,                 ///< objc_retainAutoreleasedReturnValue
-  ClaimRV,                  ///< objc_unsafeClaimAutoreleasedReturnValue
+  UnsafeClaimRV,            ///< objc_unsafeClaimAutoreleasedReturnValue
   RetainBlock,              ///< objc_retainBlock
   Release,                  ///< objc_release
   Autorelease,              ///< objc_autorelease
diff --git a/llvm/include/llvm/Analysis/ObjCARCUtil.h b/llvm/include/llvm/Analysis/ObjCARCUtil.h
index 362dd6c299925..1d330ca58a872 100644
--- a/llvm/include/llvm/Analysis/ObjCARCUtil.h
+++ b/llvm/include/llvm/Analysis/ObjCARCUtil.h
@@ -48,15 +48,15 @@ inline Optional<Function *> getAttachedARCFunction(const CallBase *CB) {
   return cast<Function>(B->Inputs[0]);
 }
 
-/// Check whether the function is retainRV/claimRV.
+/// Check whether the function is retainRV/unsafeClaimRV.
 inline bool isRetainOrClaimRV(ARCInstKind Kind) {
-  return Kind == ARCInstKind::RetainRV || Kind == ARCInstKind::ClaimRV;
+  return Kind == ARCInstKind::RetainRV || Kind == ARCInstKind::UnsafeClaimRV;
 }
 
 /// This function returns the ARCInstKind of the function attached to operand
 /// bundle clang_arc_attachedcall. It returns None if the call doesn't have the
 /// operand bundle or the operand is null. Otherwise it returns either RetainRV
-/// or ClaimRV.
+/// or UnsafeClaimRV.
 inline ARCInstKind getAttachedARCFunctionKind(const CallBase *CB) {
   Optional<Function *> Fn = getAttachedARCFunction(CB);
   if (!Fn.hasValue())
diff --git a/llvm/lib/Analysis/ObjCARCInstKind.cpp b/llvm/lib/Analysis/ObjCARCInstKind.cpp
index f74a9f7f104fa..d177ee056a93a 100644
--- a/llvm/lib/Analysis/ObjCARCInstKind.cpp
+++ b/llvm/lib/Analysis/ObjCARCInstKind.cpp
@@ -32,8 +32,8 @@ raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS,
     return OS << "ARCInstKind::Retain";
   case ARCInstKind::RetainRV:
     return OS << "ARCInstKind::RetainRV";
-  case ARCInstKind::ClaimRV:
-    return OS << "ARCInstKind::ClaimRV";
+  case ARCInstKind::UnsafeClaimRV:
+    return OS << "ARCInstKind::UnsafeClaimRV";
   case ARCInstKind::RetainBlock:
     return OS << "ARCInstKind::RetainBlock";
   case ARCInstKind::Release:
@@ -127,7 +127,7 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
   case Intrinsic::objc_clang_arc_use:
     return ARCInstKind::IntrinsicUser;
   case Intrinsic::objc_unsafeClaimAutoreleasedReturnValue:
-    return ARCInstKind::ClaimRV;
+    return ARCInstKind::UnsafeClaimRV;
   case Intrinsic::objc_retainedObject:
     return ARCInstKind::NoopCast;
   case Intrinsic::objc_unretainedObject:
@@ -334,7 +334,7 @@ bool llvm::objcarc::IsUser(ARCInstKind Class) {
   case ARCInstKind::StoreStrong:
   case ARCInstKind::Call:
   case ARCInstKind::None:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
     return false;
   }
   llvm_unreachable("covered switch isn't covered?");
@@ -370,7 +370,7 @@ bool llvm::objcarc::IsRetain(ARCInstKind Class) {
   case ARCInstKind::Call:
   case ARCInstKind::User:
   case ARCInstKind::None:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
     return false;
   }
   llvm_unreachable("covered switch isn't covered?");
@@ -384,7 +384,7 @@ bool llvm::objcarc::IsAutorelease(ARCInstKind Class) {
     return true;
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::RetainBlock:
   case ARCInstKind::Release:
   case ARCInstKind::AutoreleasepoolPush:
@@ -416,7 +416,7 @@ bool llvm::objcarc::IsForwarding(ARCInstKind Class) {
   switch (Class) {
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::Autorelease:
   case ARCInstKind::AutoreleaseRV:
   case ARCInstKind::NoopCast:
@@ -451,7 +451,7 @@ bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) {
   switch (Class) {
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::Release:
   case ARCInstKind::Autorelease:
   case ARCInstKind::AutoreleaseRV:
@@ -486,7 +486,7 @@ bool llvm::objcarc::IsNoopOnGlobal(ARCInstKind Class) {
   switch (Class) {
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::Release:
   case ARCInstKind::Autorelease:
   case ARCInstKind::AutoreleaseRV:
@@ -522,7 +522,7 @@ bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) {
   switch (Class) {
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::AutoreleaseRV:
     return true;
   case ARCInstKind::Release:
@@ -563,7 +563,7 @@ bool llvm::objcarc::IsNeverTail(ARCInstKind Class) {
     return true;
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::AutoreleaseRV:
   case ARCInstKind::Release:
   case ARCInstKind::RetainBlock:
@@ -598,7 +598,7 @@ bool llvm::objcarc::IsNoThrow(ARCInstKind Class) {
   switch (Class) {
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::Release:
   case ARCInstKind::Autorelease:
   case ARCInstKind::AutoreleaseRV:
@@ -643,7 +643,7 @@ bool llvm::objcarc::CanInterruptRV(ARCInstKind Class) {
     return true;
   case ARCInstKind::Retain:
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
   case ARCInstKind::Release:
   case ARCInstKind::AutoreleasepoolPush:
   case ARCInstKind::RetainBlock:
@@ -696,7 +696,7 @@ bool llvm::objcarc::CanDecrementRefCount(ARCInstKind Kind) {
   case ARCInstKind::StoreStrong:
   case ARCInstKind::CallOrUser:
   case ARCInstKind::Call:
-  case ARCInstKind::ClaimRV:
+  case ARCInstKind::UnsafeClaimRV:
     return true;
   }
 
diff --git a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
index e3eb3f825851d..74b903f99284d 100644
--- a/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/llvm/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -97,7 +97,7 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
       objcarc::ARCInstKind Kind = objcarc::getAttachedARCFunctionKind(CB);
       (void)Kind;
       assert((Kind == objcarc::ARCInstKind::RetainRV ||
-              Kind == objcarc::ARCInstKind::ClaimRV) &&
+              Kind == objcarc::ARCInstKind::UnsafeClaimRV) &&
              "use expected to be the argument of operand bundle "
              "\"clang.arc.attachedcall\"");
       U.set(FCache.getCallee());
diff --git a/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index 764dc5f927073..c11691c613ac7 100644
--- a/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -42,7 +42,7 @@ enum class ARCRuntimeEntryPointKind {
   Autorelease,
   StoreStrong,
   RetainRV,
-  ClaimRV,
+  UnsafeClaimRV,
   RetainAutorelease,
   RetainAutoreleaseRV,
 };
@@ -62,7 +62,7 @@ class ARCRuntimeEntryPoints {
     Autorelease = nullptr;
     StoreStrong = nullptr;
     RetainRV = nullptr;
-    ClaimRV = nullptr;
+    UnsafeClaimRV = nullptr;
     RetainAutorelease = nullptr;
     RetainAutoreleaseRV = nullptr;
   }
@@ -87,9 +87,9 @@ class ARCRuntimeEntryPoints {
     case ARCRuntimeEntryPointKind::RetainRV:
       return getIntrinsicEntryPoint(RetainRV,
                                 Intrinsic::objc_retainAutoreleasedReturnValue);
-    case ARCRuntimeEntryPointKind::ClaimRV:
+    case ARCRuntimeEntryPointKind::UnsafeClaimRV:
       return getIntrinsicEntryPoint(
-          ClaimRV, Intrinsic::objc_unsafeClaimAutoreleasedReturnValue);
+          UnsafeClaimRV, Intrinsic::objc_unsafeClaimAutoreleasedReturnValue);
     case ARCRuntimeEntryPointKind::RetainAutorelease:
       return getIntrinsicEntryPoint(RetainAutorelease,
                                     Intrinsic::objc_retainAutorelease);
@@ -127,7 +127,7 @@ class ARCRuntimeEntryPoints {
   Function *RetainRV = nullptr;
 
   /// Declaration for objc_unsafeClaimAutoreleasedReturnValue().
-  Function *ClaimRV = nullptr;
+  Function *UnsafeClaimRV = nullptr;
 
   /// Declaration for objc_retainAutorelease().
   Function *RetainAutorelease = nullptr;
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index c2ed94e8e1f62..9e2832827686a 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -433,7 +433,7 @@ bool ObjCARCContract::tryToPeepholeInstruction(
     // If we succeed in our optimization, fall through.
     LLVM_FALLTHROUGH;
   case ARCInstKind::RetainRV:
-  case ARCInstKind::ClaimRV: {
+  case ARCInstKind::UnsafeClaimRV: {
     bool IsInstContainedInBundle = BundledInsts->contains(Inst);
 
     // Return now if the target doesn't need a special inline-asm marker. Return
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index 0fa4904456cdb..d07fa1d118e42 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -515,7 +515,7 @@ class ObjCARCOpt {
       Function &F, DenseMap<BasicBlock *, ColorVector> &BlockColors,
       Instruction *Inst, ARCInstKind Class, const Value *Arg);
 
-  /// Try to optimize an AutoreleaseRV with a RetainRV or ClaimRV.  If the
+  /// Try to optimize an AutoreleaseRV with a RetainRV or UnsafeClaimRV.  If the
   /// optimization occurs, returns true to indicate that the caller should
   /// assume the instructions are dead.
   bool OptimizeInlinedAutoreleaseRVCall(
@@ -705,14 +705,14 @@ bool ObjCARCOpt::OptimizeInlinedAutoreleaseRVCall(
     return true;
   }
 
-  // ClaimRV is a frontend peephole for RetainRV + Release.  Since the
-  // AutoreleaseRV and RetainRV cancel out, replace the ClaimRV with a Release.
-  assert(Class == ARCInstKind::ClaimRV);
+  // UnsafeClaimRV is a frontend peephole for RetainRV + Release.  Since the
+  // AutoreleaseRV and RetainRV cancel out, replace UnsafeClaimRV with Release.
+  assert(Class == ARCInstKind::UnsafeClaimRV);
   Value *CallArg = cast<CallInst>(Inst)->getArgOperand(0);
   CallInst *Release = CallInst::Create(
       EP.get(ARCRuntimeEntryPointKind::Release), CallArg, "", Inst);
-  assert(IsAlwaysTail(ARCInstKind::ClaimRV) &&
-         "Expected ClaimRV to be safe to tail call");
+  assert(IsAlwaysTail(ARCInstKind::UnsafeClaimRV) &&
+         "Expected UnsafeClaimRV to be safe to tail call");
   Release->setTailCall();
   Inst->replaceAllUsesWith(CallArg);
   EraseInstruction(Inst);
@@ -810,7 +810,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
     BlockColors = colorEHFunclets(F);
 
   // Store any delayed AutoreleaseRV intrinsics, so they can be easily paired
-  // with RetainRV and ClaimRV.
+  // with RetainRV and UnsafeClaimRV.
   Instruction *DelayedAutoreleaseRV = nullptr;
   const Value *DelayedAutoreleaseRVArg = nullptr;
   auto setDelayedAutoreleaseRV = [&](Instruction *AutoreleaseRV) {
@@ -837,7 +837,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       return false;
 
     // Given the frontend rules for emitting AutoreleaseRV, RetainRV, and
-    // ClaimRV, it's probably safe to skip over even opaque function calls
+    // UnsafeClaimRV, it's probably safe to skip over even opaque function calls
     // here since OptimizeInlinedAutoreleaseRVCall will confirm that they
     // have the same RCIdentityRoot.  However, what really matters is
     // skipping instructions or intrinsics that the inliner could leave behind;
@@ -881,7 +881,7 @@ void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
       setDelayedAutoreleaseRV(Inst);
       continue;
     case ARCInstKind::RetainRV:
-    case ARCInstKind::ClaimRV:
+    case ARCInstKind::UnsafeClaimRV:
       if (DelayedAutoreleaseRV) {
         // We have a potential RV pair.  Check if they cancel out.
         if (OptimizeInlinedAutoreleaseRVCall(F, BlockColors, Inst, Arg, Class,
@@ -1165,7 +1165,7 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(
       DepInst = findSingleDependency(AutoreleasePoolBoundary, Arg,
                                      Inst->getParent(), Inst, PA);
       break;
-    case ARCInstKind::ClaimRV:
+    case ARCInstKind::UnsafeClaimRV:
     case ARCInstKind::RetainRV:
     case ARCInstKind::AutoreleaseRV:
       // Don't move these; the RV optimization depends on the autoreleaseRV
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index fcf001be953ae..c9f872f5b7e1b 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1667,7 +1667,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
   Module *Mod = CB.getModule();
   assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");
   bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,
-       IsClaimRV = !IsRetainRV;
+       IsUnsafeClaimRV = !IsRetainRV;
 
   for (auto *RI : Returns) {
     Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0));
@@ -1694,7 +1694,7 @@ inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,
         //   and erase the autoreleaseRV call.
         // - If retainRV is attached to the call, just erase the autoreleaseRV
         //   call.
-        if (IsClaimRV) {
+        if (IsUnsafeClaimRV) {
           Builder.SetInsertPoint(II);
           Function *IFn =
               Intrinsic::getDeclaration(Mod, Intrinsic::objc_release);

From 07be76f2ae19a3b656b01d9e630fccf944aec0b3 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <minyihh@uci.edu>
Date: Tue, 25 Jan 2022 13:07:29 +0800
Subject: [PATCH 496/946] [M68k][Disassembler][NFC] Re-organize test files

Put test cases of each instruction category into their own files. NFC.
---
 llvm/test/MC/Disassembler/M68k/arithmetic.txt | 10 +++++
 llvm/test/MC/Disassembler/M68k/bits.txt       |  4 ++
 llvm/test/MC/Disassembler/M68k/control.txt    | 14 +++++++
 llvm/test/MC/Disassembler/M68k/data.txt       |  6 +++
 .../MC/Disassembler/M68k/instructions.txt     | 38 -------------------
 .../MC/Disassembler/M68k/shift-rotate.txt     | 12 ++++++
 6 files changed, 46 insertions(+), 38 deletions(-)
 create mode 100644 llvm/test/MC/Disassembler/M68k/arithmetic.txt
 create mode 100644 llvm/test/MC/Disassembler/M68k/bits.txt
 create mode 100644 llvm/test/MC/Disassembler/M68k/control.txt
 create mode 100644 llvm/test/MC/Disassembler/M68k/data.txt
 delete mode 100644 llvm/test/MC/Disassembler/M68k/instructions.txt
 create mode 100644 llvm/test/MC/Disassembler/M68k/shift-rotate.txt

diff --git a/llvm/test/MC/Disassembler/M68k/arithmetic.txt b/llvm/test/MC/Disassembler/M68k/arithmetic.txt
new file mode 100644
index 0000000000000..670d4297dab87
--- /dev/null
+++ b/llvm/test/MC/Disassembler/M68k/arithmetic.txt
@@ -0,0 +1,10 @@
+# RUN: llvm-mc -disassemble -triple m68k %s | FileCheck %s
+
+# CHECK: adda.l %a0, %a1
+0xd3 0xc8
+# CHECK: sub.w %d3, %d1
+0x92 0x43
+# CHECK: cmp.w %d1, %d0
+0xb0 0x41
+# CHECK: neg.w %d0
+0x44 0x40
diff --git a/llvm/test/MC/Disassembler/M68k/bits.txt b/llvm/test/MC/Disassembler/M68k/bits.txt
new file mode 100644
index 0000000000000..c0a3001ffd265
--- /dev/null
+++ b/llvm/test/MC/Disassembler/M68k/bits.txt
@@ -0,0 +1,4 @@
+# RUN: llvm-mc -disassemble -triple m68k %s | FileCheck %s
+
+# CHECK: btst #0, %d3
+0x08 0x03 0x00 0x00
diff --git a/llvm/test/MC/Disassembler/M68k/control.txt b/llvm/test/MC/Disassembler/M68k/control.txt
new file mode 100644
index 0000000000000..8140e28ac7e6b
--- /dev/null
+++ b/llvm/test/MC/Disassembler/M68k/control.txt
@@ -0,0 +1,14 @@
+# RUN: llvm-mc -disassemble -triple m68k %s | FileCheck %s
+
+# CHECK: bra $0
+0x60 0x00 0x00 0x00
+# CHECK: jsr $0
+0x4e 0xb9 0x00 0x00 0x00 0x00
+# CHECK: rts
+0x4e 0x75
+# CHECK: seq %d0
+0x57 0xc0
+# CHECK: sgt %d0
+0x5e 0xc0
+# CHECK: nop
+0x4e 0x71
diff --git a/llvm/test/MC/Disassembler/M68k/data.txt b/llvm/test/MC/Disassembler/M68k/data.txt
new file mode 100644
index 0000000000000..c29ca08c3a41c
--- /dev/null
+++ b/llvm/test/MC/Disassembler/M68k/data.txt
@@ -0,0 +1,6 @@
+# RUN: llvm-mc -disassemble -triple m68k %s | FileCheck %s
+
+# CHECK: move.l %a1, %a0
+0x20 0x49
+# CHECK: lea (50,%a0), %a1
+0x43 0xe8 0x00 0x32
diff --git a/llvm/test/MC/Disassembler/M68k/instructions.txt b/llvm/test/MC/Disassembler/M68k/instructions.txt
deleted file mode 100644
index 23316503f094b..0000000000000
--- a/llvm/test/MC/Disassembler/M68k/instructions.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-# RUN: llvm-mc -disassemble -triple m68k %s | FileCheck %s
-
-# CHECK: move.l %a1, %a0
-0x20 0x49
-# CHECK: adda.l %a0, %a1
-0xd3 0xc8
-# CHECK: sub.w %d3, %d1
-0x92 0x43
-# CHECK: cmp.w %d1, %d0
-0xb0 0x41
-# CHECK: neg.w %d0
-0x44 0x40
-# CHECK: btst #0, %d3
-0x08 0x03 0x00 0x00
-# CHECK: bra $0
-0x60 0x00 0x00 0x00
-# CHECK: jsr $0
-0x4e 0xb9 0x00 0x00 0x00 0x00
-# CHECK: seq %d0
-0x57 0xc0
-# CHECK: sgt %d0
-0x5e 0xc0
-# CHECK: lea (50,%a0), %a1
-0x43 0xe8 0x00 0x32
-# CHECK: lsl.l #5, %d1
-0xeb 0x89
-# CHECK: lsr.l #5, %d1
-0xea 0x89
-# CHECK: asr.l #5, %d1
-0xea 0x81
-# CHECK: rol.l #5, %d1
-0xeb 0x99
-# CHECK: ror.l #5, %d1
-0xea 0x99
-# CHECK: nop
-0x4e 0x71
-# CHECK: rts
-0x4e 0x75
diff --git a/llvm/test/MC/Disassembler/M68k/shift-rotate.txt b/llvm/test/MC/Disassembler/M68k/shift-rotate.txt
new file mode 100644
index 0000000000000..37e5e2c800f37
--- /dev/null
+++ b/llvm/test/MC/Disassembler/M68k/shift-rotate.txt
@@ -0,0 +1,12 @@
+# RUN: llvm-mc -disassemble -triple m68k %s | FileCheck %s
+
+# CHECK: lsl.l #5, %d1
+0xeb 0x89
+# CHECK: lsr.l #5, %d1
+0xea 0x89
+# CHECK: asr.l #5, %d1
+0xea 0x81
+# CHECK: rol.l #5, %d1
+0xeb 0x99
+# CHECK: ror.l #5, %d1
+0xea 0x99

From 63b8018468420a1e848068385dcfaab04f730d54 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Mon, 24 Jan 2022 21:09:19 -0800
Subject: [PATCH 497/946] [mlir:LoopLikeInterface] Add missing dependency on
 SideEffectInterfaces

This was missed when moveLoopInvariantCode was added.
---
 mlir/lib/Interfaces/CMakeLists.txt | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt
index a6f8e1118ddee..d1aae8e28273d 100644
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@@ -36,8 +36,21 @@ add_mlir_interface_library(CopyOpInterface)
 add_mlir_interface_library(DataLayoutInterfaces)
 add_mlir_interface_library(DerivedAttributeOpInterface)
 add_mlir_interface_library(InferTypeOpInterface)
-add_mlir_interface_library(LoopLikeInterface)
 add_mlir_interface_library(SideEffectInterfaces)
 add_mlir_interface_library(TilingInterface)
 add_mlir_interface_library(VectorInterfaces)
 add_mlir_interface_library(ViewLikeInterface)
+
+add_mlir_library(MLIRLoopLikeInterface
+  LoopLikeInterface.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces
+
+  DEPENDS
+  MLIRLoopLikeInterfaceIncGen
+
+  LINK_LIBS PUBLIC
+  MLIRIR
+  MLIRSideEffectInterfaces
+  )

From e51a20e166b6c4c7e610fa323d8dcf1213ce88a3 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Tue, 25 Jan 2022 05:15:08 +0000
Subject: [PATCH 498/946] Fix python test to register all passes before using
 "normalize-memrefs"

The pass moved from mlir.transforms to the Memref dialect.
---
 mlir/test/python/pass_manager.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlir/test/python/pass_manager.py b/mlir/test/python/pass_manager.py
index 380dc7ca3fb28..e5a42573a0e22 100644
--- a/mlir/test/python/pass_manager.py
+++ b/mlir/test/python/pass_manager.py
@@ -71,6 +71,7 @@ def testParseFail():
 def testInvalidNesting():
   with Context():
     try:
+      import mlir.all_passes_registration
       pm = PassManager.parse("builtin.func(normalize-memrefs)")
     except ValueError as e:
       # CHECK: Can't add pass 'NormalizeMemRefs' restricted to 'builtin.module' on a PassManager intended to run on 'builtin.func', did you intend to nest?

From 61b81e0f49510918cb11d79b4636de17b014806b Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Mon, 24 Jan 2022 21:23:30 -0800
Subject: [PATCH 499/946] [mlir:MLIRAffineUtils] Add missing dependency on
 MLIRAffineAnalysis

This was missed in a70aa7bb0d9a6066831b339e0a09a2c1bc74fe2b.
---
 mlir/lib/Dialect/Affine/Utils/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
index 4393e2971a031..96920e793daf3 100644
--- a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
@@ -8,6 +8,7 @@ add_mlir_dialect_library(MLIRAffineUtils
 
   LINK_LIBS PUBLIC
   MLIRAffine
+  MLIRAffineAnalysis
   MLIRAnalysis
   MLIRMemRef
   MLIRTransformUtils

From 71cb5ed03c9b564c5bf65c46ea0d89e886b2302f Mon Sep 17 00:00:00 2001
From: Jordan Rupprecht <rupprecht@google.com>
Date: Mon, 24 Jan 2022 21:21:37 -0800
Subject: [PATCH 500/946] [bazel] Update MLIR deps

I believe this is due to D117839, D117848, maybe others.

Latest build failure: https://buildkite.com/llvm-project/upstream-bazel-rbe/builds/18053
---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index a3876c0b71f4f..436e7a79833ec 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -2186,7 +2186,11 @@ cc_library(
             "lib/Dialect/Affine/Utils/*.h",
         ],
     ),
-    hdrs = ["include/mlir/Dialect/Affine/Utils.h"],
+    hdrs = [
+        "include/mlir/Dialect/Affine/LoopFusionUtils.h",
+        "include/mlir/Dialect/Affine/LoopUtils.h",
+        "include/mlir/Dialect/Affine/Utils.h",
+    ],
     includes = ["include"],
     deps = [
         ":Affine",
@@ -2194,7 +2198,10 @@ cc_library(
         ":Analysis",
         ":IR",
         ":MemRefDialect",
+        ":SCFDialect",
+        ":Support",
         ":TransformUtils",
+        "//llvm:Support",
     ],
 )
 
@@ -2466,6 +2473,7 @@ cc_library(
     deps = [
         ":IR",
         ":LoopLikeInterfaceIncGen",
+        "//llvm:Support",
     ],
 )
 
@@ -3152,6 +3160,7 @@ cc_library(
         ":ArithmeticDialect",
         ":Async",
         ":DLTIDialect",
+        ":AffineUtils",
         ":GPUDialect",
         ":GPUPassIncGen",
         ":MemRefDialect",
@@ -6599,6 +6608,7 @@ cc_library(
     deps = [
         ":ArithmeticDialect",
         ":BufferizationDialect",
+        ":BufferizationTransforms",
         ":IR",
         ":MemRefDialect",
         ":Support",
@@ -7836,6 +7846,7 @@ cc_library(
     includes = ["include"],
     deps = [
         ":Affine",
+        ":AffineUtils",
         ":ArithmeticDialect",
         ":IR",
         ":InferTypeOpInterface",
@@ -8013,6 +8024,7 @@ cc_library(
         ":DialectUtils",
         ":IR",
         ":InferTypeOpInterface",
+        ":LoopLikeInterface",
         ":MemRefDialect",
         ":Pass",
         ":StandardOps",

From b827b6340bf821abf443eccf84756f571e2ee47e Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Mon, 24 Jan 2022 21:31:29 -0800
Subject: [PATCH 501/946] [mlir] Add missing dependencies after D118062

These used to be covered transitively, but now need to be explicit.
---
 mlir/lib/Conversion/AffineToStandard/CMakeLists.txt | 1 +
 mlir/lib/Dialect/Tosa/CMakeLists.txt                | 1 +
 2 files changed, 2 insertions(+)

diff --git a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
index aae55d7e235e6..5a5189241d9db 100644
--- a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
+++ b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
@@ -19,4 +19,5 @@ add_mlir_conversion_library(MLIRAffineToStandard
   MLIRStandard
   MLIRTransforms
   MLIRIR
+  MLIRVector
   )
diff --git a/mlir/lib/Dialect/Tosa/CMakeLists.txt b/mlir/lib/Dialect/Tosa/CMakeLists.txt
index 9a9c80f933aec..de3328fde0b7b 100644
--- a/mlir/lib/Dialect/Tosa/CMakeLists.txt
+++ b/mlir/lib/Dialect/Tosa/CMakeLists.txt
@@ -20,6 +20,7 @@ add_mlir_dialect_library(MLIRTosa
   MLIRControlFlowInterfaces
   MLIRQuant
   MLIRSideEffectInterfaces
+  MLIRTensor
   MLIRViewLikeInterface
   )
 

From e697b971487d5288b18ae261b5840665510436c6 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Mon, 24 Jan 2022 21:36:31 -0800
Subject: [PATCH 502/946] [mlir] Add more missing dependencies after D118062

These used to be covered transitively, but now need to be explicit.
---
 mlir/lib/Conversion/VectorToGPU/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt b/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt
index e9f2ff83a6e0d..e8ec3041c7beb 100644
--- a/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt
+++ b/mlir/lib/Conversion/VectorToGPU/CMakeLists.txt
@@ -13,4 +13,5 @@ add_mlir_conversion_library(MLIRVectorToGPU
   MLIRLLVMIR
   MLIRMemRef
   MLIRTransforms
+  MLIRVector
   )

From 8676e10f744ce3500b491af16bd96713e9ce2803 Mon Sep 17 00:00:00 2001
From: Uday Bondhugula <uday@polymagelabs.com>
Date: Sun, 23 Jan 2022 07:39:01 +0530
Subject: [PATCH 503/946] [MLIR] Improve doc for -mlir-print-local-scope and
 unhide

This is a pretty important debugging option to stay hidden. Also,
improve its cmd-line description; the current description gives no hint
that this is the one to use to have locations printed inline.
Out-of-line locations are also unproductive to work with in many cases
where the locations are actually compact, which is also why this option
should be more visible.  This revision doesn't change the default on it
though.

Reviewed By: rriddle, jpienaar

Differential Revision: https://reviews.llvm.org/D117186
---
 mlir/lib/IR/AsmPrinter.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index f69d147c51b38..992fd1f793923 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -143,8 +143,8 @@ struct AsmPrinterOptions {
 
   llvm::cl::opt<bool> printLocalScopeOpt{
       "mlir-print-local-scope", llvm::cl::init(false),
-      llvm::cl::desc("Print assuming in local scope by default"),
-      llvm::cl::Hidden};
+      llvm::cl::desc("Print with local scope and inline information (eliding "
+                     "aliases for attributes, types, and locations")};
 };
 } // namespace
 

From 633f5badbf1315e972e9abb141111648a8fd77fd Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Mon, 24 Jan 2022 22:11:53 -0800
Subject: [PATCH 504/946] [mlir] Add more missing dependencies after D118062

These used to be covered transitively, but now need to be explicit.
---
 mlir/lib/Conversion/ArithmeticToLLVM/CMakeLists.txt    | 1 +
 mlir/lib/Conversion/ArithmeticToSPIRV/CMakeLists.txt   | 1 +
 mlir/lib/Conversion/ArmNeon2dToIntr/CMakeLists.txt     | 2 ++
 mlir/lib/Conversion/MathToLibm/CMakeLists.txt          | 1 +
 mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt          | 1 +
 mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt      | 1 +
 mlir/lib/Conversion/VectorToSCF/CMakeLists.txt         | 1 +
 mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt      | 2 ++
 mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt      | 1 +
 mlir/lib/Dialect/GPU/CMakeLists.txt                    | 1 +
 mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt           | 2 ++
 mlir/lib/Dialect/Math/Transforms/CMakeLists.txt        | 1 +
 mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt         | 1 +
 mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt | 1 +
 14 files changed, 17 insertions(+)

diff --git a/mlir/lib/Conversion/ArithmeticToLLVM/CMakeLists.txt b/mlir/lib/Conversion/ArithmeticToLLVM/CMakeLists.txt
index 14cf225634c12..170b63b0243fe 100644
--- a/mlir/lib/Conversion/ArithmeticToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/ArithmeticToLLVM/CMakeLists.txt
@@ -11,6 +11,7 @@ add_mlir_conversion_library(MLIRArithmeticToLLVM
   Core
 
   LINK_LIBS PUBLIC
+  MLIRArithmetic
   MLIRLLVMCommonConversion
   MLIRLLVMIR
   )
diff --git a/mlir/lib/Conversion/ArithmeticToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/ArithmeticToSPIRV/CMakeLists.txt
index e50d6320439e4..6c3c41b2b19f4 100644
--- a/mlir/lib/Conversion/ArithmeticToSPIRV/CMakeLists.txt
+++ b/mlir/lib/Conversion/ArithmeticToSPIRV/CMakeLists.txt
@@ -11,6 +11,7 @@ add_mlir_conversion_library(MLIRArithmeticToSPIRV
   Core
 
   LINK_LIBS PUBLIC
+  MLIRArithmetic
   MLIRSPIRVConversion
   MLIRSPIRV
   )
diff --git a/mlir/lib/Conversion/ArmNeon2dToIntr/CMakeLists.txt b/mlir/lib/Conversion/ArmNeon2dToIntr/CMakeLists.txt
index 5c729c86373a3..438c659be4119 100644
--- a/mlir/lib/Conversion/ArmNeon2dToIntr/CMakeLists.txt
+++ b/mlir/lib/Conversion/ArmNeon2dToIntr/CMakeLists.txt
@@ -11,8 +11,10 @@ add_mlir_conversion_library(MLIRArmNeon2dToIntr
   Core
 
   LINK_LIBS PUBLIC
+  MLIRArithmetic
   MLIRArmNeon
   MLIRPass
   MLIRTransforms
   MLIRIR
+  MLIRVector
   )
diff --git a/mlir/lib/Conversion/MathToLibm/CMakeLists.txt b/mlir/lib/Conversion/MathToLibm/CMakeLists.txt
index e5a52d525eeae..0b196d7394c53 100644
--- a/mlir/lib/Conversion/MathToLibm/CMakeLists.txt
+++ b/mlir/lib/Conversion/MathToLibm/CMakeLists.txt
@@ -14,4 +14,5 @@ add_mlir_conversion_library(MLIRMathToLibm
   MLIRArithmetic
   MLIRMath
   MLIRStandardOpsTransforms
+  MLIRVector
   )
diff --git a/mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt
index 3c1fc8aa9f4c9..615c6b2a6d759 100644
--- a/mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt
+++ b/mlir/lib/Conversion/SCFToSPIRV/CMakeLists.txt
@@ -16,6 +16,7 @@ add_mlir_conversion_library(MLIRSCFToSPIRV
   MLIRStandardToSPIRV
   MLIRIR
   MLIRPass
+  MLIRSCF
   MLIRStandard
   MLIRSupport
   MLIRTransforms
diff --git a/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt b/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
index d6c8b7e1247ab..4a637648ca2c0 100644
--- a/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/StandardToLLVM/CMakeLists.txt
@@ -18,5 +18,6 @@ add_mlir_conversion_library(MLIRStandardToLLVM
   MLIRLLVMCommonConversion
   MLIRLLVMIR
   MLIRMath
+  MLIRStandard
   MLIRTransforms
   )
diff --git a/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt b/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt
index 2b092978265ec..7bce61590a839 100644
--- a/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt
+++ b/mlir/lib/Conversion/VectorToSCF/CMakeLists.txt
@@ -12,4 +12,5 @@ add_mlir_conversion_library(MLIRVectorToSCF
   MLIRLLVMIR
   MLIRMemRef
   MLIRTransforms
+  MLIRVector
   )
diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
index a99bb5789dac4..94456c2bb4054 100644
--- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
@@ -23,11 +23,13 @@ add_mlir_dialect_library(MLIRAffineTransforms
 
   LINK_LIBS PUBLIC
   MLIRAffine
+  MLIRAffineAnalysis
   MLIRAffineUtils
   MLIRArithmetic
   MLIRIR
   MLIRMemRef
   MLIRPass
+  MLIRSCFTransforms
   MLIRSideEffectInterfaces
   MLIRStandard
   MLIRTransformUtils
diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt b/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt
index 8b2fe73252ef0..80f6e305a188b 100644
--- a/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/ArmSVE/Transforms/CMakeLists.txt
@@ -9,4 +9,5 @@ add_mlir_dialect_library(MLIRArmSVETransforms
   MLIRIR
   MLIRLLVMCommonConversion
   MLIRLLVMIR
+  MLIRStandard
   )
diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index 798213d2d95b0..6c14a3bcfdc49 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -66,6 +66,7 @@ add_mlir_dialect_library(MLIRGPUTransforms
   MLIRParallelLoopMapperEnumsGen
 
   LINK_LIBS PUBLIC
+  MLIRAffineUtils
   MLIRArithmetic
   MLIRAsync
   MLIRDataLayoutInterfaces
diff --git a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
index 1231f378a306d..dacabad7ddf8c 100644
--- a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
@@ -6,6 +6,8 @@ add_mlir_dialect_library(MLIRLinalgUtils
 
   LINK_LIBS PUBLIC
   MLIRAffine
+  MLIRAffineAnalysis
+  MLIRAffineUtils
   MLIRArithmetic
   MLIRIR
   MLIRLinalg
diff --git a/mlir/lib/Dialect/Math/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Math/Transforms/CMakeLists.txt
index c2182562fc244..1e8b05a2a89fd 100644
--- a/mlir/lib/Dialect/Math/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Math/Transforms/CMakeLists.txt
@@ -14,4 +14,5 @@ add_mlir_dialect_library(MLIRMathTransforms
   MLIRStandard
   MLIRTransforms
   MLIRX86Vector
+  MLIRVector
   )
diff --git a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
index 2c723625038bf..ef4dbb4ffc8e0 100644
--- a/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/SCF/Transforms/CMakeLists.txt
@@ -20,6 +20,7 @@ add_mlir_dialect_library(MLIRSCFTransforms
 
   LINK_LIBS PUBLIC
   MLIRAffine
+  MLIRAffineAnalysis
   MLIRArithmetic
   MLIRBufferizationTransforms
   MLIRIR
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
index d8d5714ab19ae..82e25923840dd 100644
--- a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
@@ -14,6 +14,7 @@ add_mlir_dialect_library(MLIRStandardOpsTransforms
   MLIRStandardTransformsIncGen
 
   LINK_LIBS PUBLIC
+  MLIRAffine
   MLIRArithmeticTransforms
   MLIRBufferizationTransforms
   MLIRIR

From c913dccfde69617e2357ac25532f25d2a81bca2c Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Tue, 25 Jan 2022 13:21:37 +0700
Subject: [PATCH 505/946] [SCEV] Use lshr in implications

This patch adds support for implication inference logic for the
following pattern:
```
  lhs < (y >> z) <= y, y <= rhs --> lhs < rhs
```
We should be able to use the fact that value shifted to right is
not greater than the original value (provided it is non-negative).

Differential Revision: https://reviews.llvm.org/D116150
Reviewed-By: apilipenko
---
 llvm/include/llvm/Analysis/ScalarEvolution.h  |  9 ++++
 llvm/lib/Analysis/ScalarEvolution.cpp         | 45 +++++++++++++++++++
 .../IndVarSimplify/shift-range-checks.ll      | 20 ++-------
 3 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index fd23ba7ae3872..1e6dac44cf2b7 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1898,6 +1898,15 @@ class ScalarEvolution {
                          const SCEV *FoundLHS, const SCEV *FoundRHS,
                          unsigned Depth);
 
+  /// Test whether the condition described by Pred, LHS, and RHS is true
+  /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+  /// true.
+  ///
+  /// This routine tries to reason about shifts.
+  bool isImpliedCondOperandsViaShift(ICmpInst::Predicate Pred, const SCEV *LHS,
+                                     const SCEV *RHS, const SCEV *FoundLHS,
+                                     const SCEV *FoundRHS);
+
   /// If we know that the specified Phi is in the header of its containing
   /// loop, we know the loop executes a constant number of times, and the PHI
   /// node is just a recurrence involving constants, fold it.
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 3019ff526b66d..fdfbd2c3ca77f 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -11468,6 +11468,48 @@ bool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred,
   return true;
 }
 
+bool ScalarEvolution::isImpliedCondOperandsViaShift(ICmpInst::Predicate Pred,
+                                                    const SCEV *LHS,
+                                                    const SCEV *RHS,
+                                                    const SCEV *FoundLHS,
+                                                    const SCEV *FoundRHS) {
+  // We want to imply LHS < RHS from LHS < (RHS >> shiftvalue).  First, make
+  // sure that we are dealing with same LHS.
+  if (RHS == FoundRHS) {
+    std::swap(LHS, RHS);
+    std::swap(FoundLHS, FoundRHS);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+  if (LHS != FoundLHS)
+    return false;
+
+  auto *SUFoundRHS = dyn_cast<SCEVUnknown>(FoundRHS);
+  if (!SUFoundRHS)
+    return false;
+
+  Value *Shiftee, *ShiftValue;
+
+  using namespace PatternMatch;
+  if (match(SUFoundRHS->getValue(),
+            m_LShr(m_Value(Shiftee), m_Value(ShiftValue)))) {
+    auto *ShifteeS = getSCEV(Shiftee);
+    // Prove one of the following:
+    // LHS <u (shiftee >> shiftvalue) && shiftee <=u RHS ---> LHS <u RHS
+    // LHS <=u (shiftee >> shiftvalue) && shiftee <=u RHS ---> LHS <=u RHS
+    // LHS <s (shiftee >> shiftvalue) && shiftee <=s RHS && shiftee >=s 0
+    //   ---> LHS <s RHS
+    // LHS <=s (shiftee >> shiftvalue) && shiftee <=s RHS && shiftee >=s 0
+    //   ---> LHS <=s RHS
+    if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_ULE)
+      return isKnownPredicate(ICmpInst::ICMP_ULE, ShifteeS, RHS);
+    if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+      if (isKnownNonNegative(ShifteeS))
+        return isKnownPredicate(ICmpInst::ICMP_SLE, ShifteeS, RHS);
+  }
+
+  return false;
+}
+
 bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
                                             const SCEV *LHS, const SCEV *RHS,
                                             const SCEV *FoundLHS,
@@ -11479,6 +11521,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
   if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS))
     return true;
 
+  if (isImpliedCondOperandsViaShift(Pred, LHS, RHS, FoundLHS, FoundRHS))
+    return true;
+
   if (isImpliedCondOperandsViaAddRecStart(Pred, LHS, RHS, FoundLHS, FoundRHS,
                                           CtxI))
     return true;
diff --git a/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll b/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll
index 1dc91d105ab3c..ab4cea2d879f5 100644
--- a/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll
+++ b/llvm/test/Transforms/IndVarSimplify/shift-range-checks.ll
@@ -5,8 +5,6 @@
 declare i1 @cond()
 declare void @exit(i32 %code)
 
-; FIXME: We can remove 2nd check here because it is implied by check
-; against the shifted value.
 define void @test_01(i32* %p, i32 %shift) {
 ; CHECK-LABEL: @test_01(
 ; CHECK-NEXT:  entry:
@@ -18,8 +16,7 @@ define void @test_01(i32* %p, i32 %shift) {
 ; CHECK-NEXT:    [[LESS_THAN_SHIFTED:%.*]] = icmp slt i32 [[IV]], [[X_SHIFTED]]
 ; CHECK-NEXT:    br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]]
 ; CHECK:       guarded:
-; CHECK-NEXT:    [[LESS_THAN_X:%.*]] = icmp ult i32 [[IV]], [[X]]
-; CHECK-NEXT:    br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[LOOP_COND:%.*]] = call i1 @cond()
@@ -64,8 +61,6 @@ never_happens:
   unreachable
 }
 
-; FIXME: We can remove 2nd check here because it is implied by check
-; against the shifted value.
 define void @test_02(i32* %p, i32 %shift) {
 ; CHECK-LABEL: @test_02(
 ; CHECK-NEXT:  entry:
@@ -77,8 +72,7 @@ define void @test_02(i32* %p, i32 %shift) {
 ; CHECK-NEXT:    [[LESS_THAN_SHIFTED:%.*]] = icmp sgt i32 [[X_SHIFTED]], [[IV]]
 ; CHECK-NEXT:    br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]]
 ; CHECK:       guarded:
-; CHECK-NEXT:    [[LESS_THAN_X:%.*]] = icmp ugt i32 [[X]], [[IV]]
-; CHECK-NEXT:    br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[LOOP_COND:%.*]] = call i1 @cond()
@@ -123,8 +117,6 @@ never_happens:
   unreachable
 }
 
-; FIXME: We can remove 2nd check here because it is implied by check
-; against the shifted value.
 define void @test_03(i32* %p, i32 %shift) {
 ; CHECK-LABEL: @test_03(
 ; CHECK-NEXT:  entry:
@@ -136,8 +128,7 @@ define void @test_03(i32* %p, i32 %shift) {
 ; CHECK-NEXT:    [[LESS_THAN_SHIFTED:%.*]] = icmp ult i32 [[IV]], [[X_SHIFTED]]
 ; CHECK-NEXT:    br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]]
 ; CHECK:       guarded:
-; CHECK-NEXT:    [[LESS_THAN_X:%.*]] = icmp ult i32 [[IV]], [[X]]
-; CHECK-NEXT:    br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[LOOP_COND:%.*]] = call i1 @cond()
@@ -182,8 +173,6 @@ never_happens:
   unreachable
 }
 
-; FIXME: We can remove 2nd check here because it is implied by check
-; against the shifted value.
 define void @test_04(i32* %p, i32 %shift) {
 ; CHECK-LABEL: @test_04(
 ; CHECK-NEXT:  entry:
@@ -195,8 +184,7 @@ define void @test_04(i32* %p, i32 %shift) {
 ; CHECK-NEXT:    [[LESS_THAN_SHIFTED:%.*]] = icmp ugt i32 [[X_SHIFTED]], [[IV]]
 ; CHECK-NEXT:    br i1 [[LESS_THAN_SHIFTED]], label [[GUARDED:%.*]], label [[FAILURE:%.*]]
 ; CHECK:       guarded:
-; CHECK-NEXT:    [[LESS_THAN_X:%.*]] = icmp ugt i32 [[X]], [[IV]]
-; CHECK-NEXT:    br i1 [[LESS_THAN_X]], label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
+; CHECK-NEXT:    br i1 true, label [[BACKEDGE]], label [[NEVER_HAPPENS:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
 ; CHECK-NEXT:    [[LOOP_COND:%.*]] = call i1 @cond()

From f6984b299afcbeddefe53db7ee1ce62e5d68cc4f Mon Sep 17 00:00:00 2001
From: "Liu, Chen3" <chen3.liu@intel.com>
Date: Tue, 25 Jan 2022 14:08:58 +0800
Subject: [PATCH 506/946] Fix the wrong value of bit_AVXVNNI

Differential Revision: https://reviews.llvm.org/D118103
---
 clang/lib/Headers/cpuid.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h
index 6df1b4a111726..5d262a60735f2 100644
--- a/clang/lib/Headers/cpuid.h
+++ b/clang/lib/Headers/cpuid.h
@@ -200,7 +200,7 @@
 #define bit_AMXINT8       0x02000000
 
 /* Features in %eax for leaf 7 sub-leaf 1 */
-#define bit_AVXVNNI       0x00000008
+#define bit_AVXVNNI       0x00000010
 #define bit_AVX512BF16    0x00000020
 #define bit_HRESET        0x00400000
 

From 320dc8c4df74ccce318c2c9bdb9b2937438711ac Mon Sep 17 00:00:00 2001
From: Shraiysh Vaishay <Shraiysh.Vaishay@amd.com>
Date: Mon, 24 Jan 2022 18:42:39 +0530
Subject: [PATCH 507/946] [mlir][OpenMP] Added omp.atomic.capture operation

This patch supports the atomic construct (capture) following section 2.17.7 of OpenMP 5.0 standard. Also added tests for the same.

Reviewed By: peixin, kiranchandramohan

Differential Revision: https://reviews.llvm.org/D115851
---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  44 +++++++
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |  62 ++++++++++
 mlir/test/Dialect/OpenMP/invalid.mlir         | 116 ++++++++++++++++++
 mlir/test/Dialect/OpenMP/ops.mlir             |  36 ++++++
 4 files changed, 258 insertions(+)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 384d629c4bf1d..311513f1682a6 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -708,6 +708,50 @@ def AtomicUpdateOp : OpenMP_Op<"atomic.update"> {
   let verifier = [{ return verifyAtomicUpdateOp(*this); }];
 }
 
+def AtomicCaptureOp : OpenMP_Op<"atomic.capture",
+    [SingleBlockImplicitTerminator<"TerminatorOp">]> {
+  let summary = "performs an atomic capture";
+  let description = [{
+    This operation performs an atomic capture.
+
+    `hint` is the value of hint (as used in the hint clause). It is a compile
+    time constant. As the name suggests, this is just a hint for optimization.
+
+    `memory_order` indicates the memory ordering behavior of the construct. It
+    can be one of `seq_cst`, `acq_rel`, `release`, `acquire` or `relaxed`.
+
+    The region has the following allowed forms:
+
+    ```
+      omp.atomic.capture {
+        omp.atomic.update ...
+        omp.atomic.read ...
+        omp.terminator
+      }
+
+      omp.atomic.capture {
+        omp.atomic.read ...
+        omp.atomic.update ...
+        omp.terminator
+      }
+
+      omp.atomic.capture {
+        omp.atomic.read ...
+        omp.atomic.write ...
+        omp.terminator
+      }
+    ```
+
+  }];
+
+  let arguments = (ins DefaultValuedAttr<I64Attr, "0">:$hint,
+                       OptionalAttr<MemoryOrderKind>:$memory_order);
+  let regions = (region SizedRegion<1>:$region);
+  let parser = [{ return parseAtomicCaptureOp(parser, result); }];
+  let printer = [{ return printAtomicCaptureOp(p, *this); }];
+  let verifier = [{ return verifyAtomicCaptureOp(*this); }];
+}
+
 //===----------------------------------------------------------------------===//
 // 2.19.5.7 declare reduction Directive
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index f0c5cdc6eb63e..1eeee3f65f3ba 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -1539,6 +1539,68 @@ static LogicalResult verifyAtomicUpdateOp(AtomicUpdateOp op) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// AtomicCaptureOp
+//===----------------------------------------------------------------------===//
+
+/// Parser for AtomicCaptureOp
+static LogicalResult parseAtomicCaptureOp(OpAsmParser &parser,
+                                          OperationState &result) {
+  SmallVector<ClauseType> clauses = {memoryOrderClause, hintClause};
+  SmallVector<int> segments;
+  if (parseClauses(parser, result, clauses, segments) ||
+      parser.parseRegion(*result.addRegion()))
+    return failure();
+  return success();
+}
+
+/// Printer for AtomicCaptureOp
+static void printAtomicCaptureOp(OpAsmPrinter &p, AtomicCaptureOp op) {
+  if (op.memory_order())
+    p << "memory_order(" << op.memory_order() << ") ";
+  if (op.hintAttr())
+    printSynchronizationHint(p, op, op.hintAttr());
+  p.printRegion(op.region());
+}
+
+/// Verifier for AtomicCaptureOp
+static LogicalResult verifyAtomicCaptureOp(AtomicCaptureOp op) {
+  Block::OpListType &ops = op.region().front().getOperations();
+  if (ops.size() != 3)
+    return emitError(op.getLoc())
+           << "expected three operations in omp.atomic.capture region (one "
+              "terminator, and two atomic ops)";
+  auto &firstOp = ops.front();
+  auto &secondOp = *ops.getNextNode(firstOp);
+  auto firstReadStmt = dyn_cast<AtomicReadOp>(firstOp);
+  auto firstUpdateStmt = dyn_cast<AtomicUpdateOp>(firstOp);
+  auto secondReadStmt = dyn_cast<AtomicReadOp>(secondOp);
+  auto secondUpdateStmt = dyn_cast<AtomicUpdateOp>(secondOp);
+  auto secondWriteStmt = dyn_cast<AtomicWriteOp>(secondOp);
+
+  if (!((firstUpdateStmt && secondReadStmt) ||
+        (firstReadStmt && secondUpdateStmt) ||
+        (firstReadStmt && secondWriteStmt)))
+    return emitError(ops.front().getLoc())
+           << "invalid sequence of operations in the capture region";
+  if (firstUpdateStmt && secondReadStmt &&
+      firstUpdateStmt.x() != secondReadStmt.x())
+    return emitError(firstUpdateStmt.getLoc())
+           << "updated variable in omp.atomic.update must be captured in "
+              "second operation";
+  if (firstReadStmt && secondUpdateStmt &&
+      firstReadStmt.x() != secondUpdateStmt.x())
+    return emitError(firstReadStmt.getLoc())
+           << "captured variable in omp.atomic.read must be updated in second "
+              "operation";
+  if (firstReadStmt && secondWriteStmt &&
+      firstReadStmt.x() != secondWriteStmt.address())
+    return emitError(firstReadStmt.getLoc())
+           << "captured variable in omp.atomic.read must be updated in "
+              "second operation";
+  return success();
+}
+
 #define GET_ATTRDEF_CLASSES
 #include "mlir/Dialect/OpenMP/OpenMPOpsAttributes.cpp.inc"
 
diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir
index 2875c61013b75..d0f66c3218cbc 100644
--- a/mlir/test/Dialect/OpenMP/invalid.mlir
+++ b/mlir/test/Dialect/OpenMP/invalid.mlir
@@ -650,6 +650,122 @@ func @omp_atomic_update5(%x: memref<i32>, %expr: i32) {
 
 // -----
 
+func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
+  // expected-error @below {{expected three operations in omp.atomic.capture region}}
+  omp.atomic.capture {
+    omp.atomic.read %v = %x : memref<i32>
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{invalid sequence of operations in the capture region}}
+    omp.atomic.read %v = %x : memref<i32>
+    omp.atomic.read %v = %x : memref<i32>
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{invalid sequence of operations in the capture region}}
+    omp.atomic.update %x = %x add %expr : memref<i32>, i32
+    omp.atomic.update %x = %x sub %expr : memref<i32>, i32
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{invalid sequence of operations in the capture region}}
+    omp.atomic.write %x = %expr : memref<i32>, i32
+    omp.atomic.write %x = %expr : memref<i32>, i32
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{invalid sequence of operations in the capture region}}
+    omp.atomic.write %x = %expr : memref<i32>, i32
+    omp.atomic.update %x = %x add %expr : memref<i32>, i32
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{invalid sequence of operations in the capture region}}
+    omp.atomic.update %x = %x add %expr : memref<i32>, i32
+    omp.atomic.write %x = %expr : memref<i32>, i32
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref<i32>, %v: memref<i32>, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{invalid sequence of operations in the capture region}}
+    omp.atomic.write %x = %expr : memref<i32>, i32
+    omp.atomic.read %v = %x : memref<i32>
+    omp.terminator
+  }
+  return
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref<i32>, %y: memref<i32>, %v: memref<i32>, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{updated variable in omp.atomic.update must be captured in second operation}}
+    omp.atomic.update %x = %x add %expr : memref<i32>, i32
+    omp.atomic.read %v = %y : memref<i32>
+    omp.terminator
+  }
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref<i32>, %y: memref<i32>, %v: memref<i32>, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{captured variable in omp.atomic.read must be updated in second operation}}
+    omp.atomic.read %v = %y : memref<i32>
+    omp.atomic.update %x = %x add %expr : memref<i32>, i32
+    omp.terminator
+  }
+}
+
+// -----
+
+func @omp_atomic_capture(%x: memref<i32>, %y: memref<i32>, %v: memref<i32>, %expr: i32) {
+  omp.atomic.capture {
+    // expected-error @below {{captured variable in omp.atomic.read must be updated in second operation}}
+    omp.atomic.read %v = %x : memref<i32>
+    omp.atomic.write %y = %expr : memref<i32>, i32
+    omp.terminator
+  }
+}
+
+// -----
+
 func @omp_sections(%data_var1 : memref<i32>, %data_var2 : memref<i32>, %data_var3 : memref<i32>) -> () {
   // expected-error @below {{operand used in both private and firstprivate clauses}}
   omp.sections private(%data_var1 : memref<i32>) firstprivate(%data_var1 : memref<i32>) {
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 950f3d0d472a5..96a0b427123f3 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -584,6 +584,42 @@ func @omp_atomic_update(%x : memref<i32>, %expr : i32, %xBool : memref<i1>, %exp
   return
 }
 
+// CHECK-LABEL: omp_atomic_capture
+// CHECK-SAME: (%[[v:.*]]: memref<i32>, %[[x:.*]]: memref<i32>, %[[expr:.*]]: i32)
+func @omp_atomic_capture(%v: memref<i32>, %x: memref<i32>, %expr: i32) {
+  // CHECK: omp.atomic.capture{
+  // CHECK-NEXT: omp.atomic.update %[[x]] = %[[expr]] add %[[x]] : memref<i32>, i32
+  // CHECK-NEXT: omp.atomic.read %[[v]] = %[[x]] : memref<i32>
+  // CHECK-NEXT: omp.terminator
+  // CHECK-NEXT: }
+  omp.atomic.capture{
+    omp.atomic.update %x = %expr add %x : memref<i32>, i32
+    omp.atomic.read %v = %x : memref<i32>
+    omp.terminator
+  }
+  // CHECK: omp.atomic.capture{
+  // CHECK-NEXT: omp.atomic.read %[[v]] = %[[x]] : memref<i32>
+  // CHECK-NEXT: omp.atomic.update %[[x]] = %[[expr]] add %[[x]] : memref<i32>, i32
+  // CHECK-NEXT: omp.terminator
+  // CHECK-NEXT: }
+  omp.atomic.capture{
+    omp.atomic.read %v = %x : memref<i32>
+    omp.atomic.update %x = %expr add %x : memref<i32>, i32
+    omp.terminator
+  }
+  // CHECK: omp.atomic.capture{
+  // CHECK-NEXT: omp.atomic.read %[[v]] = %[[x]] : memref<i32>
+  // CHECK-NEXT: omp.atomic.write %[[x]] = %[[expr]] : memref<i32>, i32
+  // CHECK-NEXT: omp.terminator
+  // CHECK-NEXT: }
+  omp.atomic.capture{
+    omp.atomic.read %v = %x : memref<i32>
+    omp.atomic.write %x = %expr : memref<i32>, i32
+    omp.terminator
+  }
+  return
+}
+
 // CHECK-LABEL: omp_sectionsop
 func @omp_sectionsop(%data_var1 : memref<i32>, %data_var2 : memref<i32>,
                      %data_var3 : memref<i32>, %redn_var : !llvm.ptr<f32>) {

From 881ff4e4ebe8cc0cc045c7c167cffb01f94f27f8 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Mon, 24 Jan 2022 22:59:01 -0800
Subject: [PATCH 508/946] [mlir] Remove unnecessary dependency on Tensor from
 MemRef

---
 mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt | 1 +
 mlir/lib/Dialect/Affine/IR/CMakeLists.txt          | 1 +
 mlir/lib/Dialect/MemRef/IR/CMakeLists.txt          | 1 -
 3 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt b/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
index d3b205cb79309..500f64b28ff5e 100644
--- a/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
+++ b/mlir/lib/Conversion/StandardToSPIRV/CMakeLists.txt
@@ -20,4 +20,5 @@ add_mlir_conversion_library(MLIRStandardToSPIRV
   MLIRSupport
   MLIRTransformUtils
   MLIRStandard
+  MLIRTensor
   )
diff --git a/mlir/lib/Dialect/Affine/IR/CMakeLists.txt b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
index 345003912f6ee..37cf1efef61e5 100644
--- a/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
@@ -17,4 +17,5 @@ add_mlir_dialect_library(MLIRAffine
   MLIRMemRef
   MLIRSideEffectInterfaces
   MLIRStandard
+  MLIRTensor
   )
diff --git a/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
index c4415da76b0e4..a03e2c41664ed 100644
--- a/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/MemRef/IR/CMakeLists.txt
@@ -20,6 +20,5 @@ add_mlir_dialect_library(MLIRMemRef
   MLIRIR
   MLIRMemRefUtils
   MLIRStandard
-  MLIRTensor
   MLIRViewLikeInterface
 )

From bca2d85153dc4a7bff5f671a742e12512a2bc31f Mon Sep 17 00:00:00 2001
From: Lorenzo Chelini <l.chelini@icloud.com>
Date: Tue, 25 Jan 2022 08:50:52 +0100
Subject: [PATCH 509/946] [MLIR][Interfaces] Silence -Wparentheses warning
 (NFC)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

warning: suggest parentheses around ‘&&’ within ‘||’ [-Wparentheses]
---
 mlir/include/mlir/Interfaces/ControlFlowInterfaces.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
index 806821a988bf3..1e8f7b54c474a 100644
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
@@ -91,7 +91,7 @@ class InvocationBounds {
   /// Create invocation bounds. The lower bound must be at least 0 and only the
   /// upper bound can be unknown.
   InvocationBounds(unsigned lb, Optional<unsigned> ub) : lower(lb), upper(ub) {
-    assert(!ub || ub >= lb && "upper bound cannot be less than lower bound");
+    assert((!ub || ub >= lb) && "upper bound cannot be less than lower bound");
   }
 
   /// Return the lower bound.

From 70cb8daed45fb8a794761a9cfca8432c8ee7c70b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sat, 22 Jan 2022 01:38:07 +0200
Subject: [PATCH 510/946] [X86] [CodeView] Add codeview mappings for registers
 ST0-ST7

These can end up needed after https://reviews.llvm.org/D116821.

Suggested by Alexandre Ganea.

Differential Revision: https://reviews.llvm.org/D118072
---
 .../X86/MCTargetDesc/X86MCTargetDesc.cpp      |  9 ++++
 llvm/test/DebugInfo/COFF/x87-registers.ll     | 44 +++++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 llvm/test/DebugInfo/COFF/x87-registers.ll

diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 4440bdc3d58f4..8913e405539e3 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -111,6 +111,15 @@ void X86_MC::initLLVMToSEHAndCVRegMapping(MCRegisterInfo *MRI) {
 
       {codeview::RegisterId::EFLAGS, X86::EFLAGS},
 
+      {codeview::RegisterId::ST0, X86::ST0},
+      {codeview::RegisterId::ST1, X86::ST1},
+      {codeview::RegisterId::ST2, X86::ST2},
+      {codeview::RegisterId::ST3, X86::ST3},
+      {codeview::RegisterId::ST4, X86::ST4},
+      {codeview::RegisterId::ST5, X86::ST5},
+      {codeview::RegisterId::ST6, X86::ST6},
+      {codeview::RegisterId::ST7, X86::ST7},
+
       {codeview::RegisterId::ST0, X86::FP0},
       {codeview::RegisterId::ST1, X86::FP1},
       {codeview::RegisterId::ST2, X86::FP2},
diff --git a/llvm/test/DebugInfo/COFF/x87-registers.ll b/llvm/test/DebugInfo/COFF/x87-registers.ll
new file mode 100644
index 0000000000000..02585673ffe25
--- /dev/null
+++ b/llvm/test/DebugInfo/COFF/x87-registers.ll
@@ -0,0 +1,44 @@
+; RUN: llc -experimental-debug-variable-locations=true < %s -filetype=obj | llvm-readobj - --codeview | FileCheck %s
+
+; CHECK:      DefRangeRegisterSym {
+; CHECK-NEXT:   Kind: S_DEFRANGE_REGISTER
+; CHECK-NEXT:   Register: ST0
+
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-w64-windows-gnu"
+
+define i32 @a() !dbg !8 {
+entry:
+  call void @llvm.dbg.declare(metadata [6 x i8]* undef, metadata !13, metadata !DIExpression(DW_OP_LLVM_fragment, 80, 48)), !dbg !15
+  %0 = tail call x86_fp80 asm sideeffect "", "={st},~{dirflag},~{fpsr},~{flags}"(), !dbg !16, !srcloc !17
+  call void @llvm.dbg.value(metadata x86_fp80 %0, metadata !13, metadata !DIExpression()), !dbg !18
+  ret i32 undef, !dbg !19
+}
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6}
+!llvm.ident = !{!7}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "codeview-fp0.c", directory: "llvm/build")
+!2 = !{i32 2, !"CodeView", i32 1}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 2}
+!5 = !{i32 7, !"PIC Level", i32 2}
+!6 = !{i32 7, !"uwtable", i32 1}
+!7 = !{!"clang version 14.0.0"}
+!8 = distinct !DISubprogram(name: "a", scope: !1, file: !1, line: 1, type: !9, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !12)
+!9 = !DISubroutineType(types: !10)
+!10 = !{!11}
+!11 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!12 = !{!13}
+!13 = !DILocalVariable(name: "b", scope: !8, file: !1, line: 2, type: !14)
+!14 = !DIBasicType(name: "long double", size: 128, encoding: DW_ATE_float)
+!15 = !DILocation(line: 2, scope: !8)
+!16 = !DILocation(line: 3, scope: !8)
+!17 = !{i64 40}
+!18 = !DILocation(line: 0, scope: !8)
+!19 = !DILocation(line: 4, scope: !8)

From 9554aaa2753bd866a00bf6fb4183656200e758e2 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Mon, 24 Jan 2022 14:29:05 +0100
Subject: [PATCH 511/946] [Dwarf] Optimize getOrCreateSourceID() for repeated
 calls on same file (NFCI)

DwarfCompileUnit::getOrCreateSourceID() is often called many times
in sequence with the same DIFile. This is currently very expensive,
because it involves creating a string from directory and file name
and looking it up in a string map. This patch remembers the last
DIFile and its ID and directly returns that.

This gives a geomean -1.3% compile-time improvement on CTMark O0-g.

Differential Revision: https://reviews.llvm.org/D118041
---
 llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 11 ++++++++---
 llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h   |  3 +++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index ab3c9f486670e..5913c687db48f 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -127,9 +127,14 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
   if (!File)
     return Asm->OutStreamer->emitDwarfFileDirective(0, "", "", None, None,
                                                     CUID);
-  return Asm->OutStreamer->emitDwarfFileDirective(
-      0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
-      File->getSource(), CUID);
+
+  if (LastFile != File) {
+    LastFile = File;
+    LastFileID = Asm->OutStreamer->emitDwarfFileDirective(
+        0, File->getDirectory(), File->getFilename(), DD->getMD5AsBytes(File),
+        File->getSource(), CUID);
+  }
+  return LastFileID;
 }
 
 DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index fb03982b5e4a2..f2e1f63468039 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -86,6 +86,9 @@ class DwarfCompileUnit final : public DwarfUnit {
   /// DWO ID for correlating skeleton and split units.
   uint64_t DWOId = 0;
 
+  const DIFile *LastFile = nullptr;
+  unsigned LastFileID;
+
   /// Construct a DIE for the given DbgVariable without initializing the
   /// DbgVariable's DIE reference.
   DIE *constructVariableDIEImpl(const DbgVariable &DV, bool Abstract);

From 2a14bc55c547f0fc7285b783b5320338c3ffdc42 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Tue, 25 Jan 2022 16:39:15 +0800
Subject: [PATCH 512/946] [NFC] [C++20] [Modules] Update comments for handling
 friend

There is a comment contains a FIXME for the Module TS. And now the
Module TS is merged so we should update the comment. I've checked the
implementation.
---
 clang/lib/Sema/SemaDecl.cpp | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index a29409461f575..e014500f2114f 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -1586,10 +1586,13 @@ void Sema::FilterLookupForScope(LookupResult &R, DeclContext *Ctx, Scope *S,
 /// We've determined that \p New is a redeclaration of \p Old. Check that they
 /// have compatible owning modules.
 bool Sema::CheckRedeclarationModuleOwnership(NamedDecl *New, NamedDecl *Old) {
-  // FIXME: The Modules TS is not clear about how friend declarations are
-  // to be treated. It's not meaningful to have different owning modules for
-  // linkage in redeclarations of the same entity, so for now allow the
-  // redeclaration and change the owning modules to match.
+  // [module.interface]p7:
+  // A declaration is attached to a module as follows:
+  // - If the declaration is a non-dependent friend declaration that nominates a
+  // function with a declarator-id that is a qualified-id or template-id or that
+  // nominates a class other than with an elaborated-type-specifier with neither
+  // a nested-name-specifier nor a simple-template-id, it is attached to the
+  // module to which the friend is attached ([basic.link]).
   if (New->getFriendObjectKind() &&
       Old->getOwningModuleForLinkage() != New->getOwningModuleForLinkage()) {
     New->setLocalOwningModule(Old->getOwningModule());

From aa97bc116d343f7b6f222d7229668de5d361b312 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 21 Jan 2022 13:03:15 +0100
Subject: [PATCH 513/946] [NFC] Remove uses of PointerType::getElementType()

Instead use either Type::getPointerElementType() or
Type::getNonOpaquePointerElementType().

This is part of D117885, in preparation for deprecating the API.
---
 clang/lib/CodeGen/CGCall.cpp                  | 16 +++--
 clang/lib/CodeGen/CGClass.cpp                 |  5 +-
 clang/lib/CodeGen/CGObjCGNU.cpp               | 15 +++--
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         |  7 ++-
 clang/lib/CodeGen/ItaniumCXXABI.cpp           |  3 +-
 clang/lib/CodeGen/TargetInfo.cpp              |  8 +--
 llvm/include/llvm/FuzzMutate/OpDescriptor.h   |  2 +-
 llvm/include/llvm/IR/MatrixBuilder.h          |  2 +-
 llvm/include/llvm/IR/Statepoint.h             |  2 +-
 llvm/lib/Analysis/AliasAnalysisEvaluator.cpp  |  6 +-
 llvm/lib/Analysis/ConstantFolding.cpp         |  2 +-
 llvm/lib/Analysis/IVDescriptors.cpp           |  5 +-
 llvm/lib/Analysis/ScalarEvolution.cpp         |  3 +-
 llvm/lib/AsmParser/LLParser.cpp               | 10 ++--
 llvm/lib/Bitcode/Reader/BitcodeReader.cpp     | 60 +++++++++----------
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     |  2 +-
 .../CodeGen/GlobalISel/InlineAsmLowering.cpp  |  2 +-
 llvm/lib/FuzzMutate/Operations.cpp            |  2 +-
 llvm/lib/FuzzMutate/RandomIRBuilder.cpp       | 10 ++--
 llvm/lib/IR/AsmWriter.cpp                     |  2 +-
 llvm/lib/IR/AutoUpgrade.cpp                   |  2 +-
 llvm/lib/IR/ConstantFold.cpp                  | 16 ++---
 llvm/lib/IR/Core.cpp                          |  2 +-
 llvm/lib/IR/Function.cpp                      | 10 ++--
 llvm/lib/IR/IRBuilder.cpp                     |  9 ++-
 llvm/lib/IR/Verifier.cpp                      | 24 ++++----
 .../Target/AArch64/AArch64ISelLowering.cpp    | 12 ++--
 .../AMDGPU/AMDGPUHSAMetadataStreamer.cpp      |  8 +--
 .../AMDGPU/AMDGPURewriteOutArguments.cpp      |  2 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |  8 +--
 llvm/lib/Target/BPF/BTFDebug.cpp              |  3 +-
 .../Target/Hexagon/HexagonVectorCombine.cpp   |  2 +-
 llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp     |  4 +-
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp   |  7 ++-
 llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp      |  2 +-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  4 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  2 +-
 llvm/lib/Target/Sparc/SparcISelLowering.cpp   |  2 +-
 llvm/lib/Transforms/Coroutines/CoroFrame.cpp  |  4 +-
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp |  7 +--
 .../InstCombine/InstCombineCalls.cpp          | 14 ++---
 .../InstCombine/InstCombineCasts.cpp          | 10 ++--
 .../InstCombineLoadStoreAlloca.cpp            |  3 +-
 .../InstCombine/InstructionCombining.cpp      |  6 +-
 .../Instrumentation/AddressSanitizer.cpp      |  5 +-
 .../Instrumentation/MemProfiler.cpp           |  5 +-
 .../Instrumentation/SanitizerCoverage.cpp     |  2 +-
 llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp   |  4 +-
 llvm/lib/Transforms/Scalar/SROA.cpp           |  4 +-
 llvm/lib/Transforms/Scalar/Scalarizer.cpp     |  5 +-
 .../lib/Transforms/Utils/AMDGPUEmitPrintf.cpp |  2 +-
 .../Transforms/Utils/LowerMemIntrinsics.cpp   |  2 +-
 .../Utils/ScalarEvolutionExpander.cpp         |  8 +--
 llvm/tools/llvm-stress/llvm-stress.cpp        |  6 +-
 54 files changed, 182 insertions(+), 188 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index ccb394283c4ed..a37ff8844e885 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -3880,9 +3880,8 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
   }
 
   // Create the temporary.
-  Address temp = CGF.CreateTempAlloca(destType->getElementType(),
-                                      CGF.getPointerAlign(),
-                                      "icr.temp");
+  Address temp = CGF.CreateTempAlloca(destType->getPointerElementType(),
+                                      CGF.getPointerAlign(), "icr.temp");
   // Loading an l-value can introduce a cleanup if the l-value is __weak,
   // and that cleanup will be conditional if we can't prove that the l-value
   // isn't null, so we need to register a dominating point so that the cleanups
@@ -3892,9 +3891,8 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
   // Zero-initialize it if we're not doing a copy-initialization.
   bool shouldCopy = CRE->shouldCopy();
   if (!shouldCopy) {
-    llvm::Value *null =
-      llvm::ConstantPointerNull::get(
-        cast<llvm::PointerType>(destType->getElementType()));
+    llvm::Value *null = llvm::ConstantPointerNull::get(
+        cast<llvm::PointerType>(destType->getPointerElementType()));
     CGF.Builder.CreateStore(null, temp);
   }
 
@@ -3936,7 +3934,7 @@ static void emitWritebackArg(CodeGenFunction &CGF, CallArgList &args,
     assert(srcRV.isScalar());
 
     llvm::Value *src = srcRV.getScalarVal();
-    src = CGF.Builder.CreateBitCast(src, destType->getElementType(),
+    src = CGF.Builder.CreateBitCast(src, destType->getPointerElementType(),
                                     "icr.cast");
 
     // Use an ordinary store, not a store-to-lvalue.
@@ -5075,8 +5073,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 #ifndef NDEBUG
         // Assert that these structs have equivalent element types.
         llvm::StructType *FullTy = CallInfo.getArgStruct();
-        llvm::StructType *DeclaredTy = cast<llvm::StructType>(
-            cast<llvm::PointerType>(LastParamTy)->getElementType());
+        llvm::StructType *DeclaredTy =
+            cast<llvm::StructType>(LastParamTy->getPointerElementType());
         assert(DeclaredTy->getNumElements() == FullTy->getNumElements());
         for (llvm::StructType::element_iterator DI = DeclaredTy->element_begin(),
                                                 DE = DeclaredTy->element_end(),
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index d84956c2653e1..520e119ada26c 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -2852,9 +2852,8 @@ llvm::Value *CodeGenFunction::EmitVTableTypeCheckedLoad(
               SanitizerHandler::CFICheckFail, {}, {});
   }
 
-  return Builder.CreateBitCast(
-      Builder.CreateExtractValue(CheckedLoad, 0),
-      cast<llvm::PointerType>(VTable->getType())->getElementType());
+  return Builder.CreateBitCast(Builder.CreateExtractValue(CheckedLoad, 0),
+                               VTable->getType()->getPointerElementType());
 }
 
 void CodeGenFunction::EmitForwardingCallToLambda(
diff --git a/clang/lib/CodeGen/CGObjCGNU.cpp b/clang/lib/CodeGen/CGObjCGNU.cpp
index b2bf60d2c0fcf..52b4490908681 100644
--- a/clang/lib/CodeGen/CGObjCGNU.cpp
+++ b/clang/lib/CodeGen/CGObjCGNU.cpp
@@ -2347,9 +2347,10 @@ llvm::Value *CGObjCGNU::GetTypedSelector(CodeGenFunction &CGF, Selector Sel,
     }
   }
   if (!SelValue) {
-    SelValue = llvm::GlobalAlias::create(
-        SelectorTy->getElementType(), 0, llvm::GlobalValue::PrivateLinkage,
-        ".objc_selector_" + Sel.getAsString(), &TheModule);
+    SelValue = llvm::GlobalAlias::create(SelectorTy->getPointerElementType(), 0,
+                                         llvm::GlobalValue::PrivateLinkage,
+                                         ".objc_selector_" + Sel.getAsString(),
+                                         &TheModule);
     Types.emplace_back(TypeEncoding, SelValue);
   }
 
@@ -2576,14 +2577,16 @@ CGObjCGNU::GenerateMessageSendSuper(CodeGenFunction &CGF,
       if (IsClassMessage)  {
         if (!MetaClassPtrAlias) {
           MetaClassPtrAlias = llvm::GlobalAlias::create(
-              IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
+              IdTy->getPointerElementType(), 0,
+              llvm::GlobalValue::InternalLinkage,
               ".objc_metaclass_ref" + Class->getNameAsString(), &TheModule);
         }
         ReceiverClass = MetaClassPtrAlias;
       } else {
         if (!ClassPtrAlias) {
           ClassPtrAlias = llvm::GlobalAlias::create(
-              IdTy->getElementType(), 0, llvm::GlobalValue::InternalLinkage,
+              IdTy->getPointerElementType(), 0,
+              llvm::GlobalValue::InternalLinkage,
               ".objc_class_ref" + Class->getNameAsString(), &TheModule);
         }
         ReceiverClass = ClassPtrAlias;
@@ -3706,7 +3709,7 @@ llvm::Function *CGObjCGNU::ModuleInitFunction() {
   GenerateProtocolHolderCategory();
 
   llvm::StructType *selStructTy =
-    dyn_cast<llvm::StructType>(SelectorTy->getElementType());
+      dyn_cast<llvm::StructType>(SelectorTy->getPointerElementType());
   llvm::Type *selStructPtrTy = SelectorTy;
   if (!selStructTy) {
     selStructTy = llvm::StructType::get(CGM.getLLVMContext(),
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index e4889586bb7ee..fd956aabc717f 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -837,9 +837,10 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
   }
   llvm::Value *Size;
   llvm::Value *SizeInChars;
-  auto *ElemType =
-      cast<llvm::PointerType>(OrigAddresses[N].first.getPointer(CGF)->getType())
-          ->getElementType();
+  auto *ElemType = OrigAddresses[N]
+                       .first.getPointer(CGF)
+                       ->getType()
+                       ->getPointerElementType();
   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
   if (AsArraySection) {
     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 6102046805503..2979d92c84172 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -4472,8 +4472,7 @@ static void InitCatchParam(CodeGenFunction &CGF,
       // pad.  The best solution is to fix the personality function.
       } else {
         // Pull the pointer for the reference type off.
-        llvm::Type *PtrTy =
-          cast<llvm::PointerType>(LLVMCatchTy)->getElementType();
+        llvm::Type *PtrTy = LLVMCatchTy->getPointerElementType();
 
         // Create the temporary and write the adjusted pointer into it.
         Address ExnPtrTmp =
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index ebe651b5a7184..fd9a7e602833a 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -8937,8 +8937,7 @@ class AMDGPUABIInfo final : public DefaultABIInfo {
                                        unsigned ToAS) const {
     // Single value types.
     if (Ty->isPointerTy() && Ty->getPointerAddressSpace() == FromAS)
-      return llvm::PointerType::get(
-          cast<llvm::PointerType>(Ty)->getElementType(), ToAS);
+      return llvm::PointerType::get(Ty->getPointerElementType(), ToAS);
     return Ty;
   }
 
@@ -9334,7 +9333,7 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
     return llvm::ConstantPointerNull::get(PT);
 
   auto &Ctx = CGM.getContext();
-  auto NPT = llvm::PointerType::get(PT->getElementType(),
+  auto NPT = llvm::PointerType::get(PT->getPointerElementType(),
       Ctx.getTargetAddressSpace(LangAS::opencl_generic));
   return llvm::ConstantExpr::getAddrSpaceCast(
       llvm::ConstantPointerNull::get(NPT), PT);
@@ -10271,8 +10270,7 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
     auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default);
     auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device);
     if (LTy->isPointerTy() && LTy->getPointerAddressSpace() == DefaultAS) {
-      LTy = llvm::PointerType::get(
-          cast<llvm::PointerType>(LTy)->getElementType(), GlobalAS);
+      LTy = llvm::PointerType::get(LTy->getPointerElementType(), GlobalAS);
       return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
     }
   }
diff --git a/llvm/include/llvm/FuzzMutate/OpDescriptor.h b/llvm/include/llvm/FuzzMutate/OpDescriptor.h
index d6c98cd949a29..43c8109207665 100644
--- a/llvm/include/llvm/FuzzMutate/OpDescriptor.h
+++ b/llvm/include/llvm/FuzzMutate/OpDescriptor.h
@@ -146,7 +146,7 @@ static inline SourcePred sizedPtrType() {
       return false;
 
     if (const auto *PtrT = dyn_cast<PointerType>(V->getType()))
-      return PtrT->getElementType()->isSized();
+      return PtrT->getPointerElementType()->isSized();
     return false;
   };
   auto Make = [](ArrayRef<Value *>, ArrayRef<Type *> Ts) {
diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h
index 6cc5797269e24..4c8286692ebf3 100644
--- a/llvm/include/llvm/IR/MatrixBuilder.h
+++ b/llvm/include/llvm/IR/MatrixBuilder.h
@@ -68,7 +68,7 @@ template <class IRBuilderTy> class MatrixBuilder {
 
     // Deal with the pointer
     PointerType *PtrTy = cast<PointerType>(DataPtr->getType());
-    Type *EltTy = PtrTy->getElementType();
+    Type *EltTy = PtrTy->getPointerElementType();
 
     auto *RetType = FixedVectorType::get(EltTy, Rows * Columns);
 
diff --git a/llvm/include/llvm/IR/Statepoint.h b/llvm/include/llvm/IR/Statepoint.h
index c6251b9bf5c90..a254a67e6b1f7 100644
--- a/llvm/include/llvm/IR/Statepoint.h
+++ b/llvm/include/llvm/IR/Statepoint.h
@@ -123,7 +123,7 @@ class GCStatepointInst : public CallBase {
   /// statepoint.
   Type *getActualReturnType() const {
     auto *CalleeTy =
-      cast<PointerType>(getActualCalledOperand()->getType())->getElementType();
+        getActualCalledOperand()->getType()->getPointerElementType();
     return cast<FunctionType>(CalleeTy)->getReturnType();
   }
 
diff --git a/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index 0c097b2fa3020..1577f1eb70b17 100644
--- a/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -142,13 +142,13 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
   for (SetVector<Value *>::iterator I1 = Pointers.begin(), E = Pointers.end();
        I1 != E; ++I1) {
     auto I1Size = LocationSize::afterPointer();
-    Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType();
+    Type *I1ElTy = (*I1)->getType()->getPointerElementType();
     if (I1ElTy->isSized())
       I1Size = LocationSize::precise(DL.getTypeStoreSize(I1ElTy));
 
     for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {
       auto I2Size = LocationSize::afterPointer();
-      Type *I2ElTy = cast<PointerType>((*I2)->getType())->getElementType();
+      Type *I2ElTy = (*I2)->getType()->getPointerElementType();
       if (I2ElTy->isSized())
         I2Size = LocationSize::precise(DL.getTypeStoreSize(I2ElTy));
 
@@ -233,7 +233,7 @@ void AAEvaluator::runInternal(Function &F, AAResults &AA) {
   for (CallBase *Call : Calls) {
     for (auto Pointer : Pointers) {
       auto Size = LocationSize::afterPointer();
-      Type *ElTy = cast<PointerType>(Pointer->getType())->getElementType();
+      Type *ElTy = Pointer->getType()->getPointerElementType();
       if (ElTy->isSized())
         Size = LocationSize::precise(DL.getTypeStoreSize(ElTy));
 
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 772316e7469d9..7cf69f613c669 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -941,7 +941,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
   if (auto *GV = dyn_cast<GlobalValue>(Ptr))
     SrcElemTy = GV->getValueType();
   else if (!PTy->isOpaque())
-    SrcElemTy = PTy->getElementType();
+    SrcElemTy = PTy->getNonOpaquePointerElementType();
   else
     SrcElemTy = Type::getInt8Ty(Ptr->getContext());
 
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 9551eb48e2316..e40b08b322637 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -1414,8 +1414,9 @@ bool InductionDescriptor::isInductionPHI(
 
   // Always use i8 element type for opaque pointer inductions.
   PointerType *PtrTy = cast<PointerType>(PhiTy);
-  Type *ElementType = PtrTy->isOpaque() ? Type::getInt8Ty(PtrTy->getContext())
-                                        : PtrTy->getElementType();
+  Type *ElementType = PtrTy->isOpaque()
+                          ? Type::getInt8Ty(PtrTy->getContext())
+                          : PtrTy->getNonOpaquePointerElementType();
   if (!ElementType->isSized())
     return false;
 
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index fdfbd2c3ca77f..4f2123b4c5fab 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7598,8 +7598,7 @@ const SCEV *ScalarEvolution::getConstantMaxTripCountFromArray(const Loop *L) {
         continue;
       // Also make sure step was increased the same with sizeof allocated
       // element type.
-      const PointerType *GEPT = dyn_cast<PointerType>(GEP->getType());
-      if (Ty->getElementType() != GEPT->getElementType())
+      if (Ty->getElementType() != GEP->getType()->getPointerElementType())
         continue;
 
       // FIXME: Since gep indices are silently zext to the indexing type,
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index cec4ffd82f818..868b8693926d7 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -990,10 +990,10 @@ bool LLParser::parseAliasOrIFunc(const std::string &Name, LocTy NameLoc,
         ExplicitTypeLoc,
         typeComparisonErrorMessage(
             "explicit pointee type doesn't match operand's pointee type", Ty,
-            PTy->getElementType()));
+            PTy->getNonOpaquePointerElementType()));
   }
 
-  if (!IsAlias && !PTy->getElementType()->isFunctionTy()) {
+  if (!IsAlias && !PTy->getPointerElementType()->isFunctionTy()) {
     return error(ExplicitTypeLoc,
                  "explicit pointee type should be a function type");
   }
@@ -3588,7 +3588,7 @@ bool LLParser::parseValID(ValID &ID, PerFunctionState *PFS, Type *ExpectedTy) {
             ExplicitTypeLoc,
             typeComparisonErrorMessage(
                 "explicit pointee type doesn't match operand's pointee type",
-                Ty, BasePointerType->getElementType()));
+                Ty, BasePointerType->getNonOpaquePointerElementType()));
       }
 
       unsigned GEPWidth =
@@ -7205,7 +7205,7 @@ int LLParser::parseLoad(Instruction *&Inst, PerFunctionState &PFS) {
         ExplicitTypeLoc,
         typeComparisonErrorMessage(
             "explicit pointee type doesn't match operand's pointee type", Ty,
-            cast<PointerType>(Val->getType())->getElementType()));
+            Val->getType()->getNonOpaquePointerElementType()));
   }
   SmallPtrSet<Type *, 4> Visited;
   if (!Alignment && !Ty->isSized(&Visited))
@@ -7465,7 +7465,7 @@ int LLParser::parseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
         ExplicitTypeLoc,
         typeComparisonErrorMessage(
             "explicit pointee type doesn't match operand's pointee type", Ty,
-            BasePointerType->getElementType()));
+            BasePointerType->getNonOpaquePointerElementType()));
   }
 
   SmallVector<Value*, 16> Indices;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 1e60bad616979..720ab560f988c 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2702,7 +2702,7 @@ Error BitcodeReader::parseConstants() {
 
       PointerType *OrigPtrTy = cast<PointerType>(Elt0FullTy->getScalarType());
       if (!PointeeType)
-        PointeeType = OrigPtrTy->getElementType();
+        PointeeType = OrigPtrTy->getPointerElementType();
       else if (!OrigPtrTy->isOpaqueOrPointeeTypeMatches(PointeeType))
         return error("Explicit gep operator type does not match pointee type "
                      "of pointer operand");
@@ -2825,9 +2825,8 @@ Error BitcodeReader::parseConstants() {
         ConstrStr += (char)Record[3+AsmStrSize+i];
       UpgradeInlineAsmString(&AsmStr);
       // FIXME: support upgrading in opaque pointers mode.
-      V = InlineAsm::get(
-          cast<FunctionType>(cast<PointerType>(CurTy)->getElementType()),
-          AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
+      V = InlineAsm::get(cast<FunctionType>(CurTy->getPointerElementType()),
+                         AsmStr, ConstrStr, HasSideEffects, IsAlignStack);
       break;
     }
     // This version adds support for the asm dialect keywords (e.g.,
@@ -2852,10 +2851,9 @@ Error BitcodeReader::parseConstants() {
         ConstrStr += (char)Record[3+AsmStrSize+i];
       UpgradeInlineAsmString(&AsmStr);
       // FIXME: support upgrading in opaque pointers mode.
-      V = InlineAsm::get(
-          cast<FunctionType>(cast<PointerType>(CurTy)->getElementType()),
-          AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
-          InlineAsm::AsmDialect(AsmDialect));
+      V = InlineAsm::get(cast<FunctionType>(CurTy->getPointerElementType()),
+                         AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
+                         InlineAsm::AsmDialect(AsmDialect));
       break;
     }
     // This version adds support for the unwind keyword.
@@ -2884,10 +2882,9 @@ Error BitcodeReader::parseConstants() {
         ConstrStr += (char)Record[OpNum + AsmStrSize + i];
       UpgradeInlineAsmString(&AsmStr);
       // FIXME: support upgrading in opaque pointers mode.
-      V = InlineAsm::get(
-          cast<FunctionType>(cast<PointerType>(CurTy)->getElementType()),
-          AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
-          InlineAsm::AsmDialect(AsmDialect), CanThrow);
+      V = InlineAsm::get(cast<FunctionType>(CurTy->getPointerElementType()),
+                         AsmStr, ConstrStr, HasSideEffects, IsAlignStack,
+                         InlineAsm::AsmDialect(AsmDialect), CanThrow);
       break;
     }
     // This version adds explicit function type.
@@ -3282,7 +3279,7 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
     if (!Ty->isPointerTy())
       return error("Invalid type for value");
     AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
-    Ty = cast<PointerType>(Ty)->getElementType();
+    Ty = Ty->getPointerElementType();
   }
 
   uint64_t RawLinkage = Record[3];
@@ -3375,7 +3372,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
   if (!FTy)
     return error("Invalid record");
   if (auto *PTy = dyn_cast<PointerType>(FTy))
-    FTy = PTy->getElementType();
+    FTy = PTy->getPointerElementType();
 
   if (!isa<FunctionType>(FTy))
     return error("Invalid type for value");
@@ -3416,7 +3413,7 @@ Error BitcodeReader::parseFunctionRecord(ArrayRef<uint64_t> Record) {
       Func->removeParamAttr(i, Kind);
 
       Type *PTy = cast<FunctionType>(FTy)->getParamType(i);
-      Type *PtrEltTy = cast<PointerType>(PTy)->getElementType();
+      Type *PtrEltTy = PTy->getPointerElementType();
       Attribute NewAttr;
       switch (Kind) {
       case Attribute::ByVal:
@@ -3539,7 +3536,7 @@ Error BitcodeReader::parseGlobalIndirectSymbolRecord(
     auto *PTy = dyn_cast<PointerType>(Ty);
     if (!PTy)
       return error("Invalid type for value");
-    Ty = PTy->getElementType();
+    Ty = PTy->getPointerElementType();
     AddrSpace = PTy->getAddressSpace();
   } else {
     AddrSpace = Record[OpNum++];
@@ -3908,7 +3905,7 @@ void BitcodeReader::propagateAttributeTypes(CallBase *CB,
 
       CB->removeParamAttr(i, Kind);
 
-      Type *PtrEltTy = cast<PointerType>(ArgsTys[i])->getElementType();
+      Type *PtrEltTy = ArgsTys[i]->getPointerElementType();
       Attribute NewAttr;
       switch (Kind) {
       case Attribute::ByVal:
@@ -3949,7 +3946,7 @@ void BitcodeReader::propagateAttributeTypes(CallBase *CB,
   case Intrinsic::preserve_array_access_index:
   case Intrinsic::preserve_struct_access_index:
     if (!CB->getAttributes().getParamElementType(0)) {
-      Type *ElTy = cast<PointerType>(ArgsTys[0])->getElementType();
+      Type *ElTy = ArgsTys[0]->getPointerElementType();
       Attribute NewAttr = Attribute::get(Context, Attribute::ElementType, ElTy);
       CB->addParamAttr(0, NewAttr);
     }
@@ -4239,8 +4236,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         return error("Invalid record");
 
       if (!Ty) {
-        Ty = cast<PointerType>(BasePtr->getType()->getScalarType())
-                 ->getElementType();
+        Ty = BasePtr->getType()->getScalarType()->getPointerElementType();
       } else if (!cast<PointerType>(BasePtr->getType()->getScalarType())
                       ->isOpaqueOrPointeeTypeMatches(Ty)) {
         return error(
@@ -4756,8 +4752,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       if (!CalleeTy)
         return error("Callee is not a pointer");
       if (!FTy) {
-        FTy = dyn_cast<FunctionType>(
-            cast<PointerType>(Callee->getType())->getElementType());
+        FTy =
+            dyn_cast<FunctionType>(Callee->getType()->getPointerElementType());
         if (!FTy)
           return error("Callee is not of pointer to function type");
       } else if (!CalleeTy->isOpaqueOrPointeeTypeMatches(FTy))
@@ -4837,8 +4833,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       if (!OpTy)
         return error("Callee is not a pointer type");
       if (!FTy) {
-        FTy = dyn_cast<FunctionType>(
-            cast<PointerType>(Callee->getType())->getElementType());
+        FTy =
+            dyn_cast<FunctionType>(Callee->getType()->getPointerElementType());
         if (!FTy)
           return error("Callee is not of pointer to function type");
       } else if (!OpTy->isOpaqueOrPointeeTypeMatches(FTy))
@@ -5000,7 +4996,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
         auto *PTy = dyn_cast_or_null<PointerType>(Ty);
         if (!PTy)
           return error("Old-style alloca with a non-pointer type");
-        Ty = PTy->getElementType();
+        Ty = PTy->getPointerElementType();
       }
       Type *OpTy = getTypeByID(Record[1]);
       Value *Size = getFnValueByID(Record[2], OpTy);
@@ -5045,7 +5041,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       if (OpNum + 3 == Record.size()) {
         Ty = getTypeByID(Record[OpNum++]);
       } else {
-        Ty = cast<PointerType>(Op->getType())->getElementType();
+        Ty = Op->getType()->getPointerElementType();
       }
 
       if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
@@ -5078,7 +5074,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       if (OpNum + 5 == Record.size()) {
         Ty = getTypeByID(Record[OpNum++]);
       } else {
-        Ty = cast<PointerType>(Op->getType())->getElementType();
+        Ty = Op->getType()->getPointerElementType();
       }
 
       if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
@@ -5110,8 +5106,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
           (BitCode == bitc::FUNC_CODE_INST_STORE
                ? getValueTypePair(Record, OpNum, NextValueNo, Val)
                : popValue(Record, OpNum, NextValueNo,
-                          cast<PointerType>(Ptr->getType())->getElementType(),
-                          Val)) ||
+                          Ptr->getType()->getPointerElementType(), Val)) ||
           OpNum + 2 != Record.size())
         return error("Invalid record");
 
@@ -5139,8 +5134,7 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
           (BitCode == bitc::FUNC_CODE_INST_STOREATOMIC
                ? getValueTypePair(Record, OpNum, NextValueNo, Val)
                : popValue(Record, OpNum, NextValueNo,
-                          cast<PointerType>(Ptr->getType())->getElementType(),
-                          Val)) ||
+                          Ptr->getType()->getPointerElementType(), Val)) ||
           OpNum + 4 != Record.size())
         return error("Invalid record");
 
@@ -5391,8 +5385,8 @@ Error BitcodeReader::parseFunctionBody(Function *F) {
       if (!OpTy)
         return error("Callee is not a pointer type");
       if (!FTy) {
-        FTy = dyn_cast<FunctionType>(
-            cast<PointerType>(Callee->getType())->getElementType());
+        FTy =
+            dyn_cast<FunctionType>(Callee->getType()->getPointerElementType());
         if (!FTy)
           return error("Callee is not of pointer to function type");
       } else if (!OpTy->isOpaqueOrPointeeTypeMatches(FTy))
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index e0efdf286caff..80978471da4e3 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -948,7 +948,7 @@ void ModuleBitcodeWriter::writeTypeTable() {
       } else {
         // POINTER: [pointee type, address space]
         Code = bitc::TYPE_CODE_POINTER;
-        TypeVals.push_back(VE.getTypeID(PTy->getElementType()));
+        TypeVals.push_back(VE.getTypeID(PTy->getNonOpaquePointerElementType()));
         TypeVals.push_back(AddressSpace);
         if (AddressSpace == 0)
           AbbrevToUse = PtrAbbrev;
diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index 56a700721544c..e0503b1ed2050 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -313,7 +313,7 @@ bool InlineAsmLowering::lowerInlineAsm(
         PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
         if (!PtrTy)
           report_fatal_error("Indirect operand for inline asm not a pointer!");
-        OpTy = PtrTy->getElementType();
+        OpTy = PtrTy->getPointerElementType();
       }
 
       // FIXME: Support aggregate input operands
diff --git a/llvm/lib/FuzzMutate/Operations.cpp b/llvm/lib/FuzzMutate/Operations.cpp
index a37fd5454dd42..221a3a84b49be 100644
--- a/llvm/lib/FuzzMutate/Operations.cpp
+++ b/llvm/lib/FuzzMutate/Operations.cpp
@@ -169,7 +169,7 @@ OpDescriptor llvm::fuzzerop::splitBlockDescriptor(unsigned Weight) {
 
 OpDescriptor llvm::fuzzerop::gepDescriptor(unsigned Weight) {
   auto buildGEP = [](ArrayRef<Value *> Srcs, Instruction *Inst) {
-    Type *Ty = cast<PointerType>(Srcs[0]->getType())->getElementType();
+    Type *Ty = Srcs[0]->getType()->getPointerElementType();
     auto Indices = makeArrayRef(Srcs).drop_front(1);
     return GetElementPtrInst::Create(Ty, Srcs[0], Indices, "G", Inst);
   };
diff --git a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
index 1295714839e87..27c3bdfb22a8e 100644
--- a/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
+++ b/llvm/lib/FuzzMutate/RandomIRBuilder.cpp
@@ -53,8 +53,8 @@ Value *RandomIRBuilder::newSource(BasicBlock &BB, ArrayRef<Instruction *> Insts,
       IP = ++I->getIterator();
       assert(IP != BB.end() && "guaranteed by the findPointer");
     }
-    auto *NewLoad = new LoadInst(
-        cast<PointerType>(Ptr->getType())->getElementType(), Ptr, "L", &*IP);
+    auto *NewLoad =
+        new LoadInst(Ptr->getType()->getPointerElementType(), Ptr, "L", &*IP);
 
     // Only sample this load if it really matches the descriptor
     if (Pred.matches(Srcs, NewLoad))
@@ -141,12 +141,12 @@ Value *RandomIRBuilder::findPointer(BasicBlock &BB,
 
     if (auto PtrTy = dyn_cast<PointerType>(Inst->getType())) {
       // We can never generate loads from non first class or non sized types
-      if (!PtrTy->getElementType()->isSized() ||
-          !PtrTy->getElementType()->isFirstClassType())
+      Type *ElemTy = PtrTy->getPointerElementType();
+      if (!ElemTy->isSized() || !ElemTy->isFirstClassType())
         return false;
 
       // TODO: Check if this is horribly expensive.
-      return Pred.matches(Srcs, UndefValue::get(PtrTy->getElementType()));
+      return Pred.matches(Srcs, UndefValue::get(ElemTy));
     }
     return false;
   };
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index bbe0c97e60a2f..6631fec2032eb 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -587,7 +587,7 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) {
         OS << " addrspace(" << AddressSpace << ')';
       return;
     }
-    print(PTy->getElementType(), OS);
+    print(PTy->getNonOpaquePointerElementType(), OS);
     if (unsigned AddressSpace = PTy->getAddressSpace())
       OS << " addrspace(" << AddressSpace << ')';
     OS << '*';
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index b820eabf173d7..45459e200b3d5 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -4495,7 +4495,7 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
 
   if (F.getCallingConv() == CallingConv::X86_INTR &&
       !F.arg_empty() && !F.hasParamAttribute(0, Attribute::ByVal)) {
-    Type *ByValTy = cast<PointerType>(F.getArg(0)->getType())->getElementType();
+    Type *ByValTy = F.getArg(0)->getType()->getPointerElementType();
     Attribute NewAttr = Attribute::getWithByValType(F.getContext(), ByValTy);
     F.addParamAttr(0, NewAttr);
   }
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 9f1d76b0c768a..1cd95cd5d4c8e 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -119,21 +119,21 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
     if (PointerType *DPTy = dyn_cast<PointerType>(DestTy))
       if (PTy->getAddressSpace() == DPTy->getAddressSpace() &&
           !PTy->isOpaque() && !DPTy->isOpaque() &&
-          PTy->getElementType()->isSized()) {
+          PTy->getNonOpaquePointerElementType()->isSized()) {
         SmallVector<Value*, 8> IdxList;
         Value *Zero =
           Constant::getNullValue(Type::getInt32Ty(DPTy->getContext()));
         IdxList.push_back(Zero);
-        Type *ElTy = PTy->getElementType();
-        while (ElTy && ElTy != DPTy->getElementType()) {
+        Type *ElTy = PTy->getNonOpaquePointerElementType();
+        while (ElTy && ElTy != DPTy->getNonOpaquePointerElementType()) {
           ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, (uint64_t)0);
           IdxList.push_back(Zero);
         }
 
-        if (ElTy == DPTy->getElementType())
+        if (ElTy == DPTy->getNonOpaquePointerElementType())
           // This GEP is inbounds because all indices are zero.
-          return ConstantExpr::getInBoundsGetElementPtr(PTy->getElementType(),
-                                                        V, IdxList);
+          return ConstantExpr::getInBoundsGetElementPtr(
+              PTy->getNonOpaquePointerElementType(), V, IdxList);
       }
 
   // Handle casts from one vector constant to another.  We know that the src
@@ -2098,9 +2098,9 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
       PointerType *DstPtrTy = dyn_cast<PointerType>(CE->getType());
       if (SrcPtrTy && DstPtrTy) {
         ArrayType *SrcArrayTy =
-          dyn_cast<ArrayType>(SrcPtrTy->getElementType());
+          dyn_cast<ArrayType>(SrcPtrTy->getPointerElementType());
         ArrayType *DstArrayTy =
-          dyn_cast<ArrayType>(DstPtrTy->getElementType());
+          dyn_cast<ArrayType>(DstPtrTy->getPointerElementType());
         if (SrcArrayTy && DstArrayTy
             && SrcArrayTy->getElementType() == DstArrayTy->getElementType()
             && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace())
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 3f899471843fc..47ee4cbbf5829 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -796,7 +796,7 @@ LLVMTypeRef LLVMScalableVectorType(LLVMTypeRef ElementType,
 LLVMTypeRef LLVMGetElementType(LLVMTypeRef WrappedTy) {
   auto *Ty = unwrap<Type>(WrappedTy);
   if (auto *PTy = dyn_cast<PointerType>(Ty))
-    return wrap(PTy->getElementType());
+    return wrap(PTy->getPointerElementType());
   if (auto *ATy = dyn_cast<ArrayType>(Ty))
     return wrap(ATy->getElementType());
   return wrap(cast<VectorType>(Ty)->getElementType());
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 534e8a74d0ebf..72d8f9e1547f6 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -817,7 +817,8 @@ static std::string getMangledTypeStr(Type *Ty, bool &HasUnnamedType) {
     // Opaque pointer doesn't have pointee type information, so we just mangle
     // address space for opaque pointer.
     if (!PTyp->isOpaque())
-      Result += getMangledTypeStr(PTyp->getElementType(), HasUnnamedType);
+      Result += getMangledTypeStr(PTyp->getNonOpaquePointerElementType(),
+                                  HasUnnamedType);
   } else if (ArrayType *ATyp = dyn_cast<ArrayType>(Ty)) {
     Result += "a" + utostr(ATyp->getNumElements()) +
               getMangledTypeStr(ATyp->getElementType(), HasUnnamedType);
@@ -1465,8 +1466,8 @@ static bool matchIntrinsicType(
       if (!PT || PT->getAddressSpace() != D.Pointer_AddressSpace)
         return true;
       if (!PT->isOpaque())
-        return matchIntrinsicType(PT->getElementType(), Infos, ArgTys,
-                                  DeferredChecks, IsDeferredCheck);
+        return matchIntrinsicType(PT->getNonOpaquePointerElementType(), Infos,
+                                  ArgTys, DeferredChecks, IsDeferredCheck);
       // Consume IIT descriptors relating to the pointer element type.
       while (Infos.front().Kind == IITDescriptor::Pointer)
         Infos = Infos.slice(1);
@@ -1573,7 +1574,8 @@ static bool matchIntrinsicType(
         return IsDeferredCheck || DeferCheck(Ty);
       Type * ReferenceType = ArgTys[D.getArgumentNumber()];
       PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
-      return (!ThisArgType || ThisArgType->getElementType() != ReferenceType);
+      return (!ThisArgType ||
+              ThisArgType->getPointerElementType() != ReferenceType);
     }
     case IITDescriptor::PtrToElt: {
       if (D.getArgumentNumber() >= ArgTys.size())
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index b1483a44ebf77..b91885bcfac47 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -679,7 +679,7 @@ static CallInst *CreateGCStatepointCallCommon(
     const Twine &Name) {
   // Extract out the type of the callee.
   auto *FuncPtrType = cast<PointerType>(ActualCallee->getType());
-  assert(isa<FunctionType>(FuncPtrType->getElementType()) &&
+  assert(isa<FunctionType>(FuncPtrType->getPointerElementType()) &&
          "actual callee must be a callable value");
 
   Module *M = Builder->GetInsertBlock()->getParent()->getParent();
@@ -736,7 +736,7 @@ static InvokeInst *CreateGCStatepointInvokeCommon(
     ArrayRef<T3> GCArgs, const Twine &Name) {
   // Extract out the type of the callee.
   auto *FuncPtrType = cast<PointerType>(ActualInvokee->getType());
-  assert(isa<FunctionType>(FuncPtrType->getElementType()) &&
+  assert(isa<FunctionType>(FuncPtrType->getPointerElementType()) &&
          "actual callee must be a callable value");
 
   Module *M = Builder->GetInsertBlock()->getParent()->getParent();
@@ -1002,12 +1002,11 @@ Value *IRBuilderBase::CreatePtrDiff(Value *LHS, Value *RHS,
                                     const Twine &Name) {
   assert(LHS->getType() == RHS->getType() &&
          "Pointer subtraction operand types must match!");
-  auto *ArgType = cast<PointerType>(LHS->getType());
+  auto *ArgElemType = LHS->getType()->getPointerElementType();
   Value *LHS_int = CreatePtrToInt(LHS, Type::getInt64Ty(Context));
   Value *RHS_int = CreatePtrToInt(RHS, Type::getInt64Ty(Context));
   Value *Difference = CreateSub(LHS_int, RHS_int);
-  return CreateExactSDiv(Difference,
-                         ConstantExpr::getSizeOf(ArgType->getElementType()),
+  return CreateExactSDiv(Difference, ConstantExpr::getSizeOf(ArgElemType),
                          Name);
 }
 
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index cb689e14e6f0b..b84edb7894058 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -1826,33 +1826,34 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty,
              "Attribute 'preallocated' does not support unsized types!", V);
     }
     if (!PTy->isOpaque()) {
-      if (!isa<PointerType>(PTy->getElementType()))
+      if (!isa<PointerType>(PTy->getNonOpaquePointerElementType()))
         Assert(!Attrs.hasAttribute(Attribute::SwiftError),
                "Attribute 'swifterror' only applies to parameters "
                "with pointer to pointer type!",
                V);
       if (Attrs.hasAttribute(Attribute::ByRef)) {
-        Assert(Attrs.getByRefType() == PTy->getElementType(),
+        Assert(Attrs.getByRefType() == PTy->getNonOpaquePointerElementType(),
                "Attribute 'byref' type does not match parameter!", V);
       }
 
       if (Attrs.hasAttribute(Attribute::ByVal) && Attrs.getByValType()) {
-        Assert(Attrs.getByValType() == PTy->getElementType(),
+        Assert(Attrs.getByValType() == PTy->getNonOpaquePointerElementType(),
                "Attribute 'byval' type does not match parameter!", V);
       }
 
       if (Attrs.hasAttribute(Attribute::Preallocated)) {
-        Assert(Attrs.getPreallocatedType() == PTy->getElementType(),
+        Assert(Attrs.getPreallocatedType() ==
+                   PTy->getNonOpaquePointerElementType(),
                "Attribute 'preallocated' type does not match parameter!", V);
       }
 
       if (Attrs.hasAttribute(Attribute::InAlloca)) {
-        Assert(Attrs.getInAllocaType() == PTy->getElementType(),
+        Assert(Attrs.getInAllocaType() == PTy->getNonOpaquePointerElementType(),
                "Attribute 'inalloca' type does not match parameter!", V);
       }
 
       if (Attrs.hasAttribute(Attribute::ElementType)) {
-        Assert(Attrs.getElementType() == PTy->getElementType(),
+        Assert(Attrs.getElementType() == PTy->getNonOpaquePointerElementType(),
                "Attribute 'elementtype' type does not match parameter!", V);
       }
     }
@@ -2195,9 +2196,10 @@ void Verifier::verifyStatepoint(const CallBase &Call) {
 
   const Value *Target = Call.getArgOperand(2);
   auto *PT = dyn_cast<PointerType>(Target->getType());
-  Assert(PT && PT->getElementType()->isFunctionTy(),
+  Assert(PT && PT->getPointerElementType()->isFunctionTy(),
          "gc.statepoint callee must be of function pointer type", Call, Target);
-  FunctionType *TargetFuncType = cast<FunctionType>(PT->getElementType());
+  FunctionType *TargetFuncType =
+      cast<FunctionType>(PT->getPointerElementType());
 
   const int NumCallArgs = cast<ConstantInt>(Call.getArgOperand(3))->getZExtValue();
   Assert(NumCallArgs >= 0,
@@ -5005,7 +5007,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
     // Assert that result type matches wrapped callee.
     const Value *Target = StatepointCall->getArgOperand(2);
     auto *PT = cast<PointerType>(Target->getType());
-    auto *TargetFuncType = cast<FunctionType>(PT->getElementType());
+    auto *TargetFuncType = cast<FunctionType>(PT->getPointerElementType());
     Assert(Call.getType() == TargetFuncType->getReturnType(),
            "gc.result result type does not match wrapped callee", Call);
     break;
@@ -5312,7 +5314,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
       PointerType *Op0PtrTy =
           cast<PointerType>(Call.getArgOperand(0)->getType());
       if (!Op0PtrTy->isOpaque())
-        Op0ElemTy = Op0PtrTy->getElementType();
+        Op0ElemTy = Op0PtrTy->getNonOpaquePointerElementType();
       break;
     }
     case Intrinsic::matrix_column_major_store: {
@@ -5326,7 +5328,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
       PointerType *Op1PtrTy =
           cast<PointerType>(Call.getArgOperand(1)->getType());
       if (!Op1PtrTy->isOpaque())
-        Op1ElemTy = Op1PtrTy->getElementType();
+        Op1ElemTy = Op1PtrTy->getNonOpaquePointerElementType();
       break;
     }
     default:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8f85c93e1d5f4..fd35ab2049e92 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -11781,10 +11781,10 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::aarch64_ldxr: {
     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
-    Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
     return true;
   }
@@ -11792,10 +11792,10 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::aarch64_stxr: {
     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
     Info.ptrVal = I.getArgOperand(1);
     Info.offset = 0;
-    Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
     return true;
   }
@@ -11823,7 +11823,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.memVT = MVT::getVT(I.getType());
     Info.ptrVal = I.getArgOperand(1);
     Info.offset = 0;
-    Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MONonTemporal;
     return true;
   }
@@ -11833,7 +11833,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.memVT = MVT::getVT(I.getOperand(0)->getType());
     Info.ptrVal = I.getArgOperand(2);
     Info.offset = 0;
-    Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
     return true;
   }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 699c6c4794554..f4d4d34b698c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -331,8 +331,8 @@ void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
   if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
       // FIXME: Should report this for all address spaces
-      PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
-                                                   PtrTy->getElementType());
+      PointeeAlign = DL.getValueOrABITypeAlignment(
+          Arg.getParamAlign(), PtrTy->getPointerElementType());
     }
   }
 
@@ -732,8 +732,8 @@ void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
   // FIXME: Need to distinguish in memory alignment from pointer alignment.
   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
-      PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
-                                                   PtrTy->getElementType());
+      PointeeAlign = DL.getValueOrABITypeAlignment(
+          Arg.getParamAlign(), PtrTy->getPointerElementType());
     }
   }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
index 3d578a9b891e3..1c6c63dd5b251 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURewriteOutArguments.cpp
@@ -433,7 +433,7 @@ bool AMDGPURewriteOutArguments::runOnFunction(Function &F) {
 
     PointerType *ArgType = cast<PointerType>(Arg.getType());
 
-    auto *EltTy = ArgType->getElementType();
+    auto *EltTy = ArgType->getPointerElementType();
     const auto Align =
         DL->getValueOrABITypeAlignment(Arg.getParamAlign(), EltTy);
 
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index b525c1dc60389..fe4e6b24367a3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -20789,10 +20789,10 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
-    Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
     return true;
   }
@@ -20801,10 +20801,10 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
     Info.ptrVal = I.getArgOperand(1);
     Info.offset = 0;
-    Info.align = DL.getABITypeAlign(PtrTy->getElementType());
+    Info.align = DL.getABITypeAlign(PtrTy->getPointerElementType());
     Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
     return true;
   }
diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index 0c510686a13bb..608be5160da7e 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -1366,7 +1366,8 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
 
     // Calculate symbol size
     const DataLayout &DL = Global.getParent()->getDataLayout();
-    uint32_t Size = DL.getTypeAllocSize(Global.getType()->getElementType());
+    uint32_t Size =
+        DL.getTypeAllocSize(Global.getType()->getPointerElementType());
 
     DataSecEntries[std::string(SecName)]->addDataSecEntry(VarId,
         Asm->getSymbol(&Global), Size);
diff --git a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
index bc64d9d30a4c2..6aca8d8078725 100644
--- a/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVectorCombine.cpp
@@ -443,7 +443,7 @@ auto AlignVectors::createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr,
   // we don't need to do pointer casts.
   auto *PtrTy = cast<PointerType>(Ptr->getType());
   if (!PtrTy->isOpaque()) {
-    Type *ElemTy = PtrTy->getElementType();
+    Type *ElemTy = PtrTy->getNonOpaquePointerElementType();
     int ElemSize = HVC.getAllocSizeOf(ElemTy);
     if (Adjust % ElemSize == 0 && Adjust != 0) {
       Value *Tmp0 =
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 96ea35bd86cd8..3a59306c4998c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -1454,7 +1454,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
 
           if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() !=
               NVPTX::CUDA) {
-            Type *ETy = PTy->getElementType();
+            Type *ETy = PTy->getPointerElementType();
             int addrSpace = PTy->getAddressSpace();
             switch (addrSpace) {
             default:
@@ -1514,7 +1514,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
     // param has byVal attribute. So should be a pointer
     auto *PTy = dyn_cast<PointerType>(Ty);
     assert(PTy && "Param with byval attribute should be a pointer type");
-    Type *ETy = PTy->getElementType();
+    Type *ETy = PTy->getPointerElementType();
 
     if (isABI || isKernelFunc) {
       // Just print .param .align <a> .b8 .param[size];
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index f2c8d17816b8f..eac237bb27bb2 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -1354,7 +1354,7 @@ std::string NVPTXTargetLowering::getPrototype(
     }
     auto *PTy = dyn_cast<PointerType>(Ty);
     assert(PTy && "Param with byval attribute should be a pointer type");
-    Type *ETy = PTy->getElementType();
+    Type *ETy = PTy->getPointerElementType();
 
     Align align = Outs[OIdx].Flags.getNonZeroByValAlign();
     unsigned sz = DL.getTypeAllocSize(ETy);
@@ -1577,7 +1577,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     SmallVector<uint64_t, 16> Offsets;
     auto *PTy = dyn_cast<PointerType>(Args[i].Ty);
     assert(PTy && "Type of a byval parameter should be pointer");
-    ComputePTXValueVTs(*this, DL, PTy->getElementType(), VTs, &Offsets, 0);
+    ComputePTXValueVTs(*this, DL, PTy->getPointerElementType(), VTs, &Offsets,
+                       0);
 
     // declare .param .align <align> .b8 .param<n>[<size>];
     unsigned sz = Outs[OIdx].Flags.getByValSize();
@@ -2447,7 +2448,7 @@ static bool isImageOrSamplerVal(const Value *arg, const Module *context) {
   if (!context)
     return false;
 
-  auto *STy = dyn_cast<StructType>(PTy->getElementType());
+  auto *STy = dyn_cast<StructType>(PTy->getPointerElementType());
   if (!STy || STy->isLiteral())
     return false;
 
diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index ddb7f097fe685..67aa49132016d 100644
--- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -233,7 +233,7 @@ void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
 
   assert(PType && "Expecting pointer type in handleByValParam");
 
-  Type *StructType = PType->getElementType();
+  Type *StructType = PType->getPointerElementType();
 
   auto IsALoadChain = [&](Value *Start) {
     SmallVector<Value *, 16> ValuesToCheck = {Start};
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index c1bfc11824d27..105a43510369d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -17894,7 +17894,7 @@ Value *PPCTargetLowering::emitMaskedAtomicRMWIntrinsic(
   assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
          "Only support quadword now");
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
-  Type *ValTy = cast<PointerType>(AlignedAddr->getType())->getElementType();
+  Type *ValTy = AlignedAddr->getType()->getPointerElementType();
   assert(ValTy->getPrimitiveSizeInBits() == 128);
   Function *RMW = Intrinsic::getDeclaration(
       M, getIntrinsicForAtomicRMWBinOp128(AI->getOperation()));
@@ -17919,7 +17919,7 @@ Value *PPCTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
   assert(EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() &&
          "Only support quadword now");
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
-  Type *ValTy = cast<PointerType>(AlignedAddr->getType())->getElementType();
+  Type *ValTy = AlignedAddr->getType()->getPointerElementType();
   assert(ValTy->getPrimitiveSizeInBits() == 128);
   Function *IntCmpXchg =
       Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f7e4e36a20d15..9b427703764ea 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1105,7 +1105,7 @@ bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   case Intrinsic::riscv_masked_cmpxchg_i32: {
     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
     Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::getVT(PtrTy->getElementType());
+    Info.memVT = MVT::getVT(PtrTy->getPointerElementType());
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
     Info.align = Align(4);
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 52b120031e3f0..6d6879bc94b38 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -826,7 +826,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
       // sret only allowed on first argument
       assert(Outs[realArgIdx].OrigArgIndex == 0);
       PointerType *Ty = cast<PointerType>(CLI.getArgs()[0].Ty);
-      Type *ElementTy = Ty->getElementType();
+      Type *ElementTy = Ty->getPointerElementType();
       SRetArgSize = DAG.getDataLayout().getTypeAllocSize(ElementTy);
       continue;
     }
diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
index 0a300ce855f07..92acfb93057a1 100644
--- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -808,7 +808,7 @@ static StringRef solveTypeName(Type *Ty) {
 
   if (Ty->isPointerTy()) {
     auto *PtrTy = cast<PointerType>(Ty);
-    Type *PointeeTy = PtrTy->getElementType();
+    Type *PointeeTy = PtrTy->getPointerElementType();
     auto Name = solveTypeName(PointeeTy);
     if (Name == "UnknownType")
       return "PointerType";
@@ -2278,7 +2278,7 @@ static void eliminateSwiftErrorArgument(Function &F, Argument &Arg,
   IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
 
   auto ArgTy = cast<PointerType>(Arg.getType());
-  auto ValueTy = ArgTy->getElementType();
+  auto ValueTy = ArgTy->getPointerElementType();
 
   // Reduce to the alloca case:
 
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 3a42a2cac928b..ce3c5153bde27 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -196,8 +196,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
       for (const auto &ArgIndex : ArgIndices) {
         // not allowed to dereference ->begin() if size() is 0
         Params.push_back(GetElementPtrInst::getIndexedType(
-            cast<PointerType>(I->getType())->getElementType(),
-            ArgIndex.second));
+            I->getType()->getPointerElementType(), ArgIndex.second));
         ArgAttrVec.push_back(AttributeSet());
         assert(Params.back());
       }
@@ -298,7 +297,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
               Ops.push_back(ConstantInt::get(IdxTy, II));
               // Keep track of the type we're currently indexing.
               if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
-                ElTy = ElPTy->getElementType();
+                ElTy = ElPTy->getPointerElementType();
               else
                 ElTy = GetElementPtrInst::getTypeAtIndex(ElTy, II);
             }
@@ -928,7 +927,7 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
   SmallPtrSet<Argument *, 8> ArgsToPromote;
   SmallPtrSet<Argument *, 8> ByValArgsToTransform;
   for (Argument *PtrArg : PointerArgs) {
-    Type *AgTy = cast<PointerType>(PtrArg->getType())->getElementType();
+    Type *AgTy = PtrArg->getType()->getPointerElementType();
 
     // Replace sret attribute with noalias. This reduces register pressure by
     // avoiding a register copy.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 3a3f169d2f516..bfe09da91f53b 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2520,7 +2520,7 @@ static bool isSafeToEliminateVarargsCast(const CallBase &Call,
   if (!Call.isByValArgument(ix))
     return false;
 
-  Type *SrcElemTy = SrcTy->getElementType();
+  Type *SrcElemTy = SrcTy->getNonOpaquePointerElementType();
   Type *DstElemTy = Call.getParamByValType(ix);
   if (!SrcElemTy->isSized() || !DstElemTy->isSized())
     return false;
@@ -2785,7 +2785,7 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
           Call.removeParamAttr(ix, Attribute::ByVal);
           Call.addParamAttr(
               ix, Attribute::getWithByValType(
-                      Call.getContext(), NewTy->getElementType()));
+                      Call.getContext(), NewTy->getPointerElementType()));
         }
         Changed = true;
       }
@@ -3034,12 +3034,12 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
     // sized type and the sized type has to have the same size as the old type.
     if (ParamTy != ActTy && CallerPAL.hasParamAttr(i, Attribute::ByVal)) {
       PointerType *ParamPTy = dyn_cast<PointerType>(ParamTy);
-      if (!ParamPTy || !ParamPTy->getElementType()->isSized())
+      if (!ParamPTy || !ParamPTy->getPointerElementType()->isSized())
         return false;
 
       Type *CurElTy = Call.getParamByValType(i);
       if (DL.getTypeAllocSize(CurElTy) !=
-          DL.getTypeAllocSize(ParamPTy->getElementType()))
+          DL.getTypeAllocSize(ParamPTy->getPointerElementType()))
         return false;
     }
   }
@@ -3053,16 +3053,16 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
     // call.  We don't want to introduce a varargs call where one doesn't
     // already exist.
     PointerType *APTy = cast<PointerType>(Call.getCalledOperand()->getType());
-    if (FT->isVarArg()!=cast<FunctionType>(APTy->getElementType())->isVarArg())
+    if (FT->isVarArg()!=cast<FunctionType>(APTy->getPointerElementType())->isVarArg())
       return false;
 
     // If both the callee and the cast type are varargs, we still have to make
     // sure the number of fixed parameters are the same or we have the same
     // ABI issues as if we introduce a varargs call.
     if (FT->isVarArg() &&
-        cast<FunctionType>(APTy->getElementType())->isVarArg() &&
+        cast<FunctionType>(APTy->getPointerElementType())->isVarArg() &&
         FT->getNumParams() !=
-        cast<FunctionType>(APTy->getElementType())->getNumParams())
+        cast<FunctionType>(APTy->getPointerElementType())->getNumParams())
       return false;
   }
 
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 8df4a4529f472..10a7c1b406a57 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -91,7 +91,7 @@ Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,
 
   // Get the type really allocated and the type casted to.
   Type *AllocElTy = AI.getAllocatedType();
-  Type *CastElTy = PTy->getElementType();
+  Type *CastElTy = PTy->getPointerElementType();
   if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
 
   // This optimisation does not work for cases where the cast type
@@ -2649,8 +2649,8 @@ static Instruction *convertBitCastToGEP(BitCastInst &CI, IRBuilderBase &Builder,
   if (SrcPTy->isOpaque() || DstPTy->isOpaque())
     return nullptr;
 
-  Type *DstElTy = DstPTy->getElementType();
-  Type *SrcElTy = SrcPTy->getElementType();
+  Type *DstElTy = DstPTy->getNonOpaquePointerElementType();
+  Type *SrcElTy = SrcPTy->getNonOpaquePointerElementType();
 
   // When the type pointed to is not sized the cast cannot be
   // turned into a gep.
@@ -2669,8 +2669,8 @@ static Instruction *convertBitCastToGEP(BitCastInst &CI, IRBuilderBase &Builder,
   // If we found a path from the src to dest, create the getelementptr now.
   if (SrcElTy == DstElTy) {
     SmallVector<Value *, 8> Idxs(NumZeros + 1, Builder.getInt32(0));
-    GetElementPtrInst *GEP =
-        GetElementPtrInst::Create(SrcPTy->getElementType(), Src, Idxs);
+    GetElementPtrInst *GEP = GetElementPtrInst::Create(
+        SrcPTy->getNonOpaquePointerElementType(), Src, Idxs);
 
     // If the source pointer is dereferenceable, then assume it points to an
     // allocated object and apply "inbounds" to the GEP.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 0dbfdba353c4d..dad5c2ca6a50c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -345,7 +345,8 @@ void PointerReplacer::replacePointer(Instruction &I, Value *V) {
 #ifndef NDEBUG
   auto *PT = cast<PointerType>(I.getType());
   auto *NT = cast<PointerType>(V->getType());
-  assert(PT != NT && PT->getElementType() == NT->getElementType() &&
+  assert(PT != NT &&
+         PT->getPointerElementType() == NT->getPointerElementType() &&
          "Invalid usage");
 #endif
   WorkMap[&I] = V;
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 32f6a980afa8d..5de1d6fa4567a 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1372,7 +1372,7 @@ Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
 Type *
 InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
                                       SmallVectorImpl<Value *> &NewIndices) {
-  Type *Ty = PtrTy->getElementType();
+  Type *Ty = PtrTy->getPointerElementType();
   if (!Ty->isSized())
     return nullptr;
 
@@ -2311,7 +2311,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   // type. For now, skip these.
   if (StrippedPtr != PtrOp && !StrippedPtrTy->isOpaque()) {
     bool HasZeroPointerIndex = false;
-    Type *StrippedPtrEltTy = StrippedPtrTy->getElementType();
+    Type *StrippedPtrEltTy = StrippedPtrTy->getNonOpaquePointerElementType();
 
     if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))
       HasZeroPointerIndex = C->isZero();
@@ -2498,7 +2498,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp)) {
     Value *SrcOp = BCI->getOperand(0);
     PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
-    Type *SrcEltType = SrcType->getElementType();
+    Type *SrcEltType = SrcType->getPointerElementType();
 
     // GEP directly using the source operand if this GEP is accessing an element
     // of a bitcasted pointer to vector or array of the same dimensions:
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index bd2dc8d639fc1..3bf2d16e7d209 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1559,7 +1559,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
       auto BasePtr = CI->getOperand(OpOffset);
       if (ignoreAccess(LI, BasePtr))
         return;
-      auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();
+      auto Ty = BasePtr->getType()->getPointerElementType();
       MaybeAlign Alignment = Align(1);
       // Otherwise no alignment guarantees. We probably got Undef.
       if (auto *Op = dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
@@ -1656,8 +1656,7 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
                                         unsigned Granularity, uint32_t TypeSize,
                                         bool IsWrite, Value *SizeArgument,
                                         bool UseCalls, uint32_t Exp) {
-  auto *VTy = cast<FixedVectorType>(
-      cast<PointerType>(Addr->getType())->getElementType());
+  auto *VTy = cast<FixedVectorType>(Addr->getType()->getPointerElementType());
   uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
   unsigned Num = VTy->getNumElements();
   auto Zero = ConstantInt::get(IntptrTy, 0);
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 727672fa0605d..92ea007691b27 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -384,7 +384,7 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
       }
 
       auto *BasePtr = CI->getOperand(0 + OpOffset);
-      auto *Ty = cast<PointerType>(BasePtr->getType())->getElementType();
+      auto *Ty = BasePtr->getType()->getPointerElementType();
       Access.TypeSize = DL.getTypeStoreSizeInBits(Ty);
       if (auto *AlignmentConstant =
               dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
@@ -419,8 +419,7 @@ void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
                                               Instruction *I, Value *Addr,
                                               unsigned Alignment,
                                               uint32_t TypeSize, bool IsWrite) {
-  auto *VTy = cast<FixedVectorType>(
-      cast<PointerType>(Addr->getType())->getElementType());
+  auto *VTy = cast<FixedVectorType>(Addr->getType()->getPointerElementType());
   uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
   unsigned Num = VTy->getNumElements();
   auto *Zero = ConstantInt::get(IntptrTy, 0);
diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index da8ee1f15bf8e..387ea5243265d 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -918,7 +918,7 @@ void ModuleSanitizerCoverage::InjectTraceForGep(
 void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
     Function &, ArrayRef<LoadInst *> Loads, ArrayRef<StoreInst *> Stores) {
   auto CallbackIdx = [&](const Value *Ptr) -> int {
-    auto ElementTy = cast<PointerType>(Ptr->getType())->getElementType();
+    auto *ElementTy = Ptr->getType()->getPointerElementType();
     uint64_t TypeSize = DL->getTypeStoreSizeInBits(ElementTy);
     return TypeSize == 8     ? 0
            : TypeSize == 16  ? 1
diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index d07fa1d118e42..caad91867112c 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -980,7 +980,7 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(
     if (IsNullOrUndef(CI->getArgOperand(0))) {
       Changed = true;
       Type *Ty = CI->getArgOperand(0)->getType();
-      new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+      new StoreInst(UndefValue::get(Ty->getPointerElementType()),
                     Constant::getNullValue(Ty), CI);
       Value *NewValue = UndefValue::get(CI->getType());
       LLVM_DEBUG(
@@ -1000,7 +1000,7 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(
         IsNullOrUndef(CI->getArgOperand(1))) {
       Changed = true;
       Type *Ty = CI->getArgOperand(0)->getType();
-      new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+      new StoreInst(UndefValue::get(Ty->getPointerElementType()),
                     Constant::getNullValue(Ty), CI);
 
       Value *NewValue = UndefValue::get(CI->getType());
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
index 2ed87ce6295b3..35497ae5ed9af 100644
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -1513,7 +1513,7 @@ static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
   if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
     return nullptr;
 
-  Type *ElementTy = Ty->getElementType();
+  Type *ElementTy = Ty->getNonOpaquePointerElementType();
   if (!ElementTy->isSized())
     return nullptr; // We can't GEP through an unsized element.
 
@@ -1572,7 +1572,7 @@ static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
   APInt Int8PtrOffset(Offset.getBitWidth(), 0);
 
   PointerType *TargetPtrTy = cast<PointerType>(PointerTy);
-  Type *TargetTy = TargetPtrTy->getElementType();
+  Type *TargetTy = TargetPtrTy->getNonOpaquePointerElementType();
 
   // As `addrspacecast` is , `Ptr` (the storage pointer) may have different
   // address space from the expected `PointerTy` (the pointer to be used).
diff --git a/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index 6b7419abe1d1f..3606c8a4b073f 100644
--- a/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -270,7 +270,7 @@ Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
   Type *Ty = V->getType();
   PtrTy = dyn_cast<PointerType>(Ty);
   if (PtrTy)
-    Ty = PtrTy->getElementType();
+    Ty = PtrTy->getPointerElementType();
   Size = cast<FixedVectorType>(Ty)->getNumElements();
   if (!CachePtr)
     Tmp.resize(Size, nullptr);
@@ -288,7 +288,8 @@ Value *Scatterer::operator[](unsigned I) {
     return CV[I];
   IRBuilder<> Builder(BB, BBI);
   if (PtrTy) {
-    Type *ElTy = cast<VectorType>(PtrTy->getElementType())->getElementType();
+    Type *ElTy =
+        cast<VectorType>(PtrTy->getPointerElementType())->getElementType();
     if (!CV[0]) {
       Type *NewPtrTy = PointerType::get(ElTy, PtrTy->getAddressSpace());
       CV[0] = Builder.CreateBitCast(V, NewPtrTy, V->getName() + ".i0");
diff --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
index fdc914a72bfd7..b8b0bbbc7a4ed 100644
--- a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -28,7 +28,7 @@ static bool isCString(const Value *Arg) {
   if (!PtrTy)
     return false;
 
-  auto IntTy = dyn_cast<IntegerType>(PtrTy->getElementType());
+  auto IntTy = dyn_cast<IntegerType>(PtrTy->getPointerElementType());
   if (!IntTy)
     return false;
 
diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
index 8dc4702993c35..3d75dd57456de 100644
--- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
+++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp
@@ -297,7 +297,7 @@ static void createMemMoveLoop(Instruction *InsertBefore, Value *SrcAddr,
   Function *F = OrigBB->getParent();
   const DataLayout &DL = F->getParent()->getDataLayout();
 
-  Type *EltTy = cast<PointerType>(SrcAddr->getType())->getElementType();
+  Type *EltTy = SrcAddr->getType()->getPointerElementType();
 
   // Create the a comparison of src and dst, based on which we jump to either
   // the forward-copy part of the function (if src >= dst) or the backwards-copy
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index fe8215d3b9916..c6044f8fdffd7 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -173,7 +173,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
     auto *PtrTy = cast<PointerType>(Ty);
     if (DL.isNonIntegralPointerType(PtrTy)) {
       auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace());
-      assert(DL.getTypeAllocSize(Int8PtrTy->getElementType()) == 1 &&
+      assert(DL.getTypeAllocSize(Int8PtrTy->getPointerElementType()) == 1 &&
              "alloc size of i8 must by 1 byte for the GEP to be correct");
       auto *GEP = Builder.CreateGEP(
           Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep");
@@ -471,7 +471,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     // indexes into the array implied by the pointer operand; the rest of
     // the indices index into the element or field type selected by the
     // preceding index.
-    Type *ElTy = PTy->getElementType();
+    Type *ElTy = PTy->getNonOpaquePointerElementType();
     for (;;) {
       // If the scale size is not 0, attempt to factor out a scale for
       // array indexing.
@@ -640,8 +640,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     Value *Casted = V;
     if (V->getType() != PTy)
       Casted = InsertNoopCastOfTo(Casted, PTy);
-    Value *GEP = Builder.CreateGEP(PTy->getElementType(), Casted, GepIndices,
-                                   "scevgep");
+    Value *GEP = Builder.CreateGEP(PTy->getNonOpaquePointerElementType(),
+                                   Casted, GepIndices, "scevgep");
     Ops.push_back(SE.getUnknown(GEP));
   }
 
diff --git a/llvm/tools/llvm-stress/llvm-stress.cpp b/llvm/tools/llvm-stress/llvm-stress.cpp
index bb11c18b57fa4..941b529da9b2d 100644
--- a/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -346,8 +346,7 @@ struct LoadModifier: public Modifier {
   void Act() override {
     // Try to use predefined pointers. If non-exist, use undef pointer value;
     Value *Ptr = getRandomPointerValue();
-    PointerType *Tp = cast<PointerType>(Ptr->getType());
-    Value *V = new LoadInst(Tp->getElementType(), Ptr, "L",
+    Value *V = new LoadInst(Ptr->getType()->getPointerElementType(), Ptr, "L",
                             BB->getTerminator());
     PT->push_back(V);
   }
@@ -360,8 +359,7 @@ struct StoreModifier: public Modifier {
   void Act() override {
     // Try to use predefined pointers. If non-exist, use undef pointer value;
     Value *Ptr = getRandomPointerValue();
-    PointerType *Tp = cast<PointerType>(Ptr->getType());
-    Value *Val = getRandomValue(Tp->getElementType());
+    Value *Val = getRandomValue(Ptr->getType()->getPointerElementType());
     Type  *ValTy = Val->getType();
 
     // Do not store vectors of i1s because they are unsupported

From 22487280dcea8261996385b852ca4470b8e4846b Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 10:07:45 +0100
Subject: [PATCH 514/946] [NFC] Remove more uses of
 PointerType::getElementType() (NFC)

Replace more uses which I missed in the first pass with
Type::getPointerElementType().
---
 llvm/unittests/AsmParser/AsmParserTest.cpp     | 12 ++++++------
 .../Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp | 18 ++++++++----------
 mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp        |  4 ++--
 3 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp
index 9a7d70ad1ed02..7639ff50571e6 100644
--- a/llvm/unittests/AsmParser/AsmParserTest.cpp
+++ b/llvm/unittests/AsmParser/AsmParserTest.cpp
@@ -252,7 +252,7 @@ TEST(AsmParserTest, TypeWithSlotMappingParsing) {
   ASSERT_TRUE(Ty->isPointerTy());
 
   PointerType *PT = cast<PointerType>(Ty);
-  Ty = PT->getElementType();
+  Ty = PT->getPointerElementType();
   ASSERT_TRUE(Ty->isIntegerTy());
   ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
 
@@ -262,11 +262,11 @@ TEST(AsmParserTest, TypeWithSlotMappingParsing) {
   ASSERT_TRUE(Ty->isPointerTy());
 
   PT = cast<PointerType>(Ty);
-  Ty = PT->getElementType();
+  Ty = PT->getPointerElementType();
   ASSERT_TRUE(Ty->isPointerTy());
 
   PT = cast<PointerType>(Ty);
-  Ty = PT->getElementType();
+  Ty = PT->getPointerElementType();
   ASSERT_TRUE(Ty->isIntegerTy());
   ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
 
@@ -386,7 +386,7 @@ TEST(AsmParserTest, TypeAtBeginningWithSlotMappingParsing) {
   ASSERT_TRUE(Read == 4);
 
   PointerType *PT = cast<PointerType>(Ty);
-  Ty = PT->getElementType();
+  Ty = PT->getPointerElementType();
   ASSERT_TRUE(Ty->isIntegerTy());
   ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
 
@@ -397,11 +397,11 @@ TEST(AsmParserTest, TypeAtBeginningWithSlotMappingParsing) {
   ASSERT_TRUE(Read == 5);
 
   PT = cast<PointerType>(Ty);
-  Ty = PT->getElementType();
+  Ty = PT->getPointerElementType();
   ASSERT_TRUE(Ty->isPointerTy());
 
   PT = cast<PointerType>(Ty);
-  Ty = PT->getElementType();
+  Ty = PT->getPointerElementType();
   ASSERT_TRUE(Ty->isIntegerTy());
   ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
index e4932e84cd28a..143630ce53ae0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
@@ -280,11 +280,10 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder,
     if (auto attr = op.getAttrOfType<FlatSymbolRefAttr>("callee"))
       return builder.CreateCall(
           moduleTranslation.lookupFunction(attr.getValue()), operandsRef);
-    auto *calleePtrType =
-        cast<llvm::PointerType>(operandsRef.front()->getType());
-    auto *calleeType =
-        cast<llvm::FunctionType>(calleePtrType->getElementType());
-    return builder.CreateCall(calleeType, operandsRef.front(),
+    auto *calleeType = operandsRef.front()->getType();
+    auto *calleeFunctionType =
+        cast<llvm::FunctionType>(calleeType->getPointerElementType());
+    return builder.CreateCall(calleeFunctionType, operandsRef.front(),
                               operandsRef.drop_front());
   };
 
@@ -349,12 +348,11 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder,
           moduleTranslation.lookupBlock(invOp.getSuccessor(0)),
           moduleTranslation.lookupBlock(invOp.getSuccessor(1)), operandsRef);
     } else {
-      auto *calleePtrType =
-          cast<llvm::PointerType>(operandsRef.front()->getType());
-      auto *calleeType =
-          cast<llvm::FunctionType>(calleePtrType->getElementType());
+      auto *calleeType = operandsRef.front()->getType();
+      auto *calleeFunctionType =
+          cast<llvm::FunctionType>(calleeType->getPointerElementType());
       result = builder.CreateInvoke(
-          calleeType, operandsRef.front(),
+          calleeFunctionType, operandsRef.front(),
           moduleTranslation.lookupBlock(invOp.getSuccessor(0)),
           moduleTranslation.lookupBlock(invOp.getSuccessor(1)),
           operandsRef.drop_front());
diff --git a/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp b/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp
index 4efb5332cced5..210ff9e4ebd05 100644
--- a/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp
+++ b/mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp
@@ -95,8 +95,8 @@ class TypeFromLLVMIRTranslatorImpl {
 
   /// Translates the given pointer type.
   Type translate(llvm::PointerType *type) {
-    return LLVM::LLVMPointerType::get(translateType(type->getElementType()),
-                                      type->getAddressSpace());
+    return LLVM::LLVMPointerType::get(
+        translateType(type->getPointerElementType()), type->getAddressSpace());
   }
 
   /// Translates the given structure type.

From 184591aeeb5a531f2315c3d7cddcd199c87ecb2c Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 09:57:26 +0100
Subject: [PATCH 515/946] [OpaquePtrs] Deprecate PointerType::getElementType()

This deprecates PointerType::getElementType() in favor of
Type::getPointerElementType(). The motivation is to make it more
apparent when code accesses the pointer element type, because
getElementType() may also also refer to at least
ArrayType::getElementType() and VectorType::getElementType().

Differential Revision: https://reviews.llvm.org/D117885
---
 llvm/include/llvm/IR/DerivedTypes.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index 8a1b26e699e32..f52ce3cde318d 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -667,9 +667,11 @@ class PointerType : public Type {
                                              unsigned AddressSpace) {
     if (PT->isOpaque())
       return get(PT->getContext(), AddressSpace);
-    return get(PT->getElementType(), AddressSpace);
+    return get(PT->PointeeTy, AddressSpace);
   }
 
+  [[deprecated("Pointer element types are deprecated. You can *temporarily* "
+               "use Type::getPointerElementType() instead")]]
   Type *getElementType() const {
     assert(!isOpaque() && "Attempting to get element type of opaque pointer");
     return PointeeTy;

From 13252160c3984b52a210bfa6ec64b9be4c911920 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood@arm.com>
Date: Fri, 21 Jan 2022 09:56:49 +0000
Subject: [PATCH 516/946] [NFC] Move useSVEForFixedLengthVectors into
 AArch64Subtarget.h

Given how small the function is and how often it gets used it
makes more sense to live in the header file.

Differential Revision: https://reviews.llvm.org/D117883
---
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 5 -----
 llvm/lib/Target/AArch64/AArch64Subtarget.h   | 5 ++++-
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index f4d046078d68e..a4f4b85821822 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -373,9 +373,4 @@ void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
     MFI.computeMaxCallFrameSize(MF);
 }
 
-bool AArch64Subtarget::useSVEForFixedLengthVectors() const {
-  // Prefer NEON unless larger SVE registers are available.
-  return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
-}
-
 bool AArch64Subtarget::useAA() const { return UseAA; }
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 5057dbec653e1..3e3c0f6aba15c 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -680,7 +680,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
     return MinSVEVectorSizeInBits;
   }
 
-  bool useSVEForFixedLengthVectors() const;
+  bool useSVEForFixedLengthVectors() const {
+    // Prefer NEON unless larger SVE registers are available.
+    return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
+  }
 
   unsigned getVScaleForTuning() const { return VScaleForTuning; }
 };

From 2233befa5dc4c2c0ed597b295cfa353039c21e23 Mon Sep 17 00:00:00 2001
From: Victor Perez <victor.perez@codeplay.com>
Date: Tue, 25 Jan 2022 10:05:50 +0000
Subject: [PATCH 517/946] [LegalizeTypes][VP] Add splitting support for
 vp.gather and vp.scatter

Split these nodes in a similar way as their masked versions.

Reviewed By: frasercrmck, craig.topper

Differential Revision: https://reviews.llvm.org/D117760
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  10 +-
 .../SelectionDAG/LegalizeVectorTypes.cpp      | 266 +++---
 .../RISCV/rvv/fixed-vectors-vpgather.ll       | 801 ++++++++++++++++++
 .../RISCV/rvv/fixed-vectors-vpscatter.ll      | 471 +++++++++-
 .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 362 ++++++++
 .../CodeGen/RISCV/rvv/vpscatter-sdnode.ll     | 368 +++++++-
 6 files changed, 2137 insertions(+), 141 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 0dccb6f3f6ac9..4d8daa82d8c02 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -821,6 +821,9 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   /// Split mask operator of a VP intrinsic.
   std::pair<SDValue, SDValue> SplitMask(SDValue Mask);
 
+  /// Split mask operator of a VP intrinsic in a given location.
+  std::pair<SDValue, SDValue> SplitMask(SDValue Mask, const SDLoc &DL);
+
   // Helper function for incrementing the pointer when splitting
   // memory operations
   void IncrementPointer(MemSDNode *N, EVT MemVT, MachinePointerInfo &MPI,
@@ -851,7 +854,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi);
-  void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_Gather(MemSDNode *VPGT, SDValue &Lo, SDValue &Hi,
+                          bool SplitSETCC = false);
   void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -879,8 +883,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo);
   SDValue SplitVecOp_MSTORE(MaskedStoreSDNode *N, unsigned OpNo);
-  SDValue SplitVecOp_MSCATTER(MaskedScatterSDNode *N, unsigned OpNo);
-  SDValue SplitVecOp_MGATHER(MaskedGatherSDNode *MGT, unsigned OpNo);
+  SDValue SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo);
+  SDValue SplitVecOp_Gather(MemSDNode *MGT, unsigned OpNo);
   SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
   SDValue SplitVecOp_VSETCC(SDNode *N);
   SDValue SplitVecOp_FP_ROUND(SDNode *N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 2d59a057f2031..f8f3d2ce8e45d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -944,7 +944,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
     SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
     break;
   case ISD::MGATHER:
-    SplitVecRes_MGATHER(cast<MaskedGatherSDNode>(N), Lo, Hi);
+  case ISD::VP_GATHER:
+    SplitVecRes_Gather(cast<MemSDNode>(N), Lo, Hi, /*SplitSETCC*/ true);
     break;
   case ISD::SETCC:
     SplitVecRes_SETCC(N, Lo, Hi);
@@ -1118,12 +1119,17 @@ void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
 }
 
 std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask) {
+  return SplitMask(Mask, SDLoc(Mask));
+}
+
+std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask,
+                                                        const SDLoc &DL) {
   SDValue MaskLo, MaskHi;
   EVT MaskVT = Mask.getValueType();
   if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
     GetSplitVector(Mask, MaskLo, MaskHi);
   else
-    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, SDLoc(Mask));
+    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
   return std::make_pair(MaskLo, MaskHi);
 }
 
@@ -1923,61 +1929,85 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
 
 }
 
-void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
-                                         SDValue &Lo, SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo,
+                                          SDValue &Hi, bool SplitSETCC) {
   EVT LoVT, HiVT;
-  SDLoc dl(MGT);
-  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
-
-  SDValue Ch = MGT->getChain();
-  SDValue Ptr = MGT->getBasePtr();
-  SDValue Mask = MGT->getMask();
-  SDValue PassThru = MGT->getPassThru();
-  SDValue Index = MGT->getIndex();
-  SDValue Scale = MGT->getScale();
-  EVT MemoryVT = MGT->getMemoryVT();
-  Align Alignment = MGT->getOriginalAlign();
-  ISD::LoadExtType ExtType = MGT->getExtensionType();
+  SDLoc dl(N);
+  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+
+  SDValue Ch = N->getChain();
+  SDValue Ptr = N->getBasePtr();
+  struct Operands {
+    SDValue Mask;
+    SDValue Index;
+    SDValue Scale;
+  } Ops = [&]() -> Operands {
+    if (auto *MSC = dyn_cast<MaskedGatherSDNode>(N)) {
+      return {MSC->getMask(), MSC->getIndex(), MSC->getScale()};
+    }
+    auto *VPSC = cast<VPGatherSDNode>(N);
+    return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale()};
+  }();
+
+  EVT MemoryVT = N->getMemoryVT();
+  Align Alignment = N->getOriginalAlign();
 
   // Split Mask operand
   SDValue MaskLo, MaskHi;
-  if (Mask.getOpcode() == ISD::SETCC) {
-    SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+  if (SplitSETCC && Ops.Mask.getOpcode() == ISD::SETCC) {
+    SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
   } else {
-    if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
-      GetSplitVector(Mask, MaskLo, MaskHi);
-    else
-      std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+    std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, dl);
   }
 
   EVT LoMemVT, HiMemVT;
   // Split MemoryVT
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
-  SDValue PassThruLo, PassThruHi;
-  if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(PassThru, PassThruLo, PassThruHi);
-  else
-    std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
-
   SDValue IndexHi, IndexLo;
-  if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(Index, IndexLo, IndexHi);
+  if (getTypeAction(Ops.Index.getValueType()) ==
+      TargetLowering::TypeSplitVector)
+    GetSplitVector(Ops.Index, IndexLo, IndexHi);
   else
-    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
+    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl);
 
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
-      MGT->getPointerInfo(), MachineMemOperand::MOLoad,
-      MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(),
-      MGT->getRanges());
+      N->getPointerInfo(), MachineMemOperand::MOLoad,
+      MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
 
-  SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
-  Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo,
-                           MMO, MGT->getIndexType(), ExtType);
+  if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) {
+    SDValue PassThru = MGT->getPassThru();
+    SDValue PassThruLo, PassThruHi;
+    if (getTypeAction(PassThru.getValueType()) ==
+        TargetLowering::TypeSplitVector)
+      GetSplitVector(PassThru, PassThruLo, PassThruHi);
+    else
+      std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
 
-  SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
-  Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi,
-                           MMO, MGT->getIndexType(), ExtType);
+    ISD::LoadExtType ExtType = MGT->getExtensionType();
+    ISD::MemIndexType IndexTy = MGT->getIndexType();
+
+    SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Ops.Scale};
+    Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl,
+                             OpsLo, MMO, IndexTy, ExtType);
+
+    SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Ops.Scale};
+    Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl,
+                             OpsHi, MMO, IndexTy, ExtType);
+  } else {
+    auto *VPGT = cast<VPGatherSDNode>(N);
+    SDValue EVLLo, EVLHi;
+    std::tie(EVLLo, EVLHi) =
+        DAG.SplitEVL(VPGT->getVectorLength(), MemoryVT, dl);
+
+    SDValue OpsLo[] = {Ch, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
+    Lo = DAG.getGatherVP(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo,
+                         MMO, VPGT->getIndexType());
+
+    SDValue OpsHi[] = {Ch, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
+    Hi = DAG.getGatherVP(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi,
+                         MMO, VPGT->getIndexType());
+  }
 
   // Build a factor node to remember that this load is independent of the
   // other one.
@@ -1986,10 +2016,9 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
 
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
-  ReplaceValueWith(SDValue(MGT, 1), Ch);
+  ReplaceValueWith(SDValue(N, 1), Ch);
 }
 
-
 void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
   assert(N->getValueType(0).isVector() &&
          N->getOperand(0).getValueType().isVector() &&
@@ -2286,10 +2315,12 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
     Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
     break;
   case ISD::MSCATTER:
-    Res = SplitVecOp_MSCATTER(cast<MaskedScatterSDNode>(N), OpNo);
+  case ISD::VP_SCATTER:
+    Res = SplitVecOp_Scatter(cast<MemSDNode>(N), OpNo);
     break;
   case ISD::MGATHER:
-    Res = SplitVecOp_MGATHER(cast<MaskedGatherSDNode>(N), OpNo);
+  case ISD::VP_GATHER:
+    Res = SplitVecOp_Gather(cast<MemSDNode>(N), OpNo);
     break;
   case ISD::VSELECT:
     Res = SplitVecOp_VSELECT(N, OpNo);
@@ -2663,69 +2694,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi);
 }
 
-SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
-                                             unsigned OpNo) {
-  EVT LoVT, HiVT;
-  SDLoc dl(MGT);
-  std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MGT->getValueType(0));
-
-  SDValue Ch = MGT->getChain();
-  SDValue Ptr = MGT->getBasePtr();
-  SDValue Index = MGT->getIndex();
-  SDValue Scale = MGT->getScale();
-  SDValue Mask = MGT->getMask();
-  SDValue PassThru = MGT->getPassThru();
-  Align Alignment = MGT->getOriginalAlign();
-  ISD::LoadExtType ExtType = MGT->getExtensionType();
-
-  SDValue MaskLo, MaskHi;
-  if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
-    // Split Mask operand
-    GetSplitVector(Mask, MaskLo, MaskHi);
-  else
-    std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
-
-  EVT MemoryVT = MGT->getMemoryVT();
-  EVT LoMemVT, HiMemVT;
-  std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
-  SDValue PassThruLo, PassThruHi;
-  if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(PassThru, PassThruLo, PassThruHi);
-  else
-    std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
-
-  SDValue IndexHi, IndexLo;
-  if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(Index, IndexLo, IndexHi);
-  else
-    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, dl);
-
-  MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
-      MGT->getPointerInfo(), MachineMemOperand::MOLoad,
-      MemoryLocation::UnknownSize, Alignment, MGT->getAAInfo(),
-      MGT->getRanges());
-
-  SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
-  SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl,
-                                   OpsLo, MMO, MGT->getIndexType(), ExtType);
-
-  SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
-  SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl,
-                                   OpsHi, MMO, MGT->getIndexType(), ExtType);
-
-  // Build a factor node to remember that this load is independent of the
-  // other one.
-  Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
-                   Hi.getValue(1));
-
-  // Legalize the chain result - switch anything that used the old chain to
-  // use the new one.
-  ReplaceValueWith(SDValue(MGT, 1), Ch);
+SDValue DAGTypeLegalizer::SplitVecOp_Gather(MemSDNode *N, unsigned OpNo) {
+  (void)OpNo;
+  SDValue Lo, Hi;
+  SplitVecRes_Gather(N, Lo, Hi);
 
-  SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MGT->getValueType(0), Lo,
-                            Hi);
-  ReplaceValueWith(SDValue(MGT, 0), Res);
+  SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, N, N->getValueType(0), Lo, Hi);
+  ReplaceValueWith(SDValue(N, 0), Res);
   return SDValue();
 }
 
@@ -2886,64 +2861,87 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
   return Res;
 }
 
-SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
-                                              unsigned OpNo) {
-  SDValue Ch  = N->getChain();
+SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) {
+  SDValue Ch = N->getChain();
   SDValue Ptr = N->getBasePtr();
-  SDValue Mask = N->getMask();
-  SDValue Index = N->getIndex();
-  SDValue Scale = N->getScale();
-  SDValue Data = N->getValue();
   EVT MemoryVT = N->getMemoryVT();
   Align Alignment = N->getOriginalAlign();
   SDLoc DL(N);
-
+  struct Operands {
+    SDValue Mask;
+    SDValue Index;
+    SDValue Scale;
+    SDValue Data;
+  } Ops = [&]() -> Operands {
+    if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
+      return {MSC->getMask(), MSC->getIndex(), MSC->getScale(),
+              MSC->getValue()};
+    }
+    auto *VPSC = cast<VPScatterSDNode>(N);
+    return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale(),
+            VPSC->getValue()};
+  }();
   // Split all operands
 
   EVT LoMemVT, HiMemVT;
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
   SDValue DataLo, DataHi;
-  if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
+  if (getTypeAction(Ops.Data.getValueType()) == TargetLowering::TypeSplitVector)
     // Split Data operand
-    GetSplitVector(Data, DataLo, DataHi);
+    GetSplitVector(Ops.Data, DataLo, DataHi);
   else
-    std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+    std::tie(DataLo, DataHi) = DAG.SplitVector(Ops.Data, DL);
 
   // Split Mask operand
   SDValue MaskLo, MaskHi;
-  if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
-    SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+  if (OpNo == 1 && Ops.Mask.getOpcode() == ISD::SETCC) {
+    SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
   } else {
-    if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
-      GetSplitVector(Mask, MaskLo, MaskHi);
-    else
-      std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+    std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, DL);
   }
 
   SDValue IndexHi, IndexLo;
-  if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
-    GetSplitVector(Index, IndexLo, IndexHi);
+  if (getTypeAction(Ops.Index.getValueType()) ==
+      TargetLowering::TypeSplitVector)
+    GetSplitVector(Ops.Index, IndexLo, IndexHi);
   else
-    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
+    std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL);
 
   SDValue Lo;
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
       N->getPointerInfo(), MachineMemOperand::MOStore,
       MemoryLocation::UnknownSize, Alignment, N->getAAInfo(), N->getRanges());
 
-  SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
-  Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT,
-                            DL, OpsLo, MMO, N->getIndexType(),
-                            N->isTruncatingStore());
+  if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
+    SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale};
+    Lo =
+        DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
+                             MSC->getIndexType(), MSC->isTruncatingStore());
+
+    // The order of the Scatter operation after split is well defined. The "Hi"
+    // part comes after the "Lo". So these two operations should be chained one
+    // after another.
+    SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Ops.Scale};
+    return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi,
+                                MMO, MSC->getIndexType(),
+                                MSC->isTruncatingStore());
+  }
+  auto *VPSC = cast<VPScatterSDNode>(N);
+  SDValue EVLLo, EVLHi;
+  std::tie(EVLLo, EVLHi) =
+      DAG.SplitEVL(VPSC->getVectorLength(), Ops.Data.getValueType(), DL);
+
+  SDValue OpsLo[] = {Ch, DataLo, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
+  Lo = DAG.getScatterVP(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
+                        VPSC->getIndexType());
 
   // The order of the Scatter operation after split is well defined. The "Hi"
   // part comes after the "Lo". So these two operations should be chained one
   // after another.
-  SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};
-  return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT,
-                              DL, OpsHi, MMO, N->getIndexType(),
-                              N->isTruncatingStore());
+  SDValue OpsHi[] = {Lo, DataHi, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
+  return DAG.getScatterVP(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi, MMO,
+                          VPSC->getIndexType());
 }
 
 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
index 7584a1507e706..bfc52bbce7242 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
@@ -271,6 +271,54 @@ define <8 x i8> @vpgather_baseidx_v8i8(i8* %base, <8 x i8> %idxs, <8 x i1> %m, i
   ret <8 x i8> %v
 }
 
+declare <32 x i8> @llvm.vp.gather.v32i8.v32p0i8(<32 x i8*>, <32 x i1>, i32)
+
+define <32 x i8> @vpgather_baseidx_v32i8(i8* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_v32i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf4 v16, v8
+; RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
+; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_v32i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vmv1r.v v10, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    bltu a1, a3, .LBB13_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB13_2:
+; RV64-NEXT:    vsetivli zero, 16, e8, m2, ta, mu
+; RV64-NEXT:    vslidedown.vi v12, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v16, v12
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v10, 2
+; RV64-NEXT:    vsetvli zero, a2, e8, m1, ta, mu
+; RV64-NEXT:    vluxei64.v v12, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB13_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB13_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v16, v8
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vluxei64.v v8, (a0), v16, v0.t
+; RV64-NEXT:    li a0, 32
+; RV64-NEXT:    vsetvli zero, a0, e8, m2, tu, mu
+; RV64-NEXT:    vslideup.vi v8, v12, 16
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds i8, i8* %base, <32 x i8> %idxs
+  %v = call <32 x i8> @llvm.vp.gather.v32i8.v32p0i8(<32 x i8*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x i8> %v
+}
+
 declare <2 x i16> @llvm.vp.gather.v2i16.v2p0i16(<2 x i16*>, <2 x i1>, i32)
 
 define <2 x i16> @vpgather_v2i16(<2 x i16*> %ptrs, <2 x i1> %m, i32 zeroext %evl) {
@@ -1870,3 +1918,756 @@ define <8 x double> @vpgather_baseidx_v8f64(double* %base, <8 x i64> %idxs, <8 x
   %v = call <8 x double> @llvm.vp.gather.v8f64.v8p0f64(<8 x double*> %ptrs, <8 x i1> %m, i32 %evl)
   ret <8 x double> %v
 }
+
+declare <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*>, <32 x i1>, i32)
+
+define <32 x double> @vpgather_v32f64(<32 x double*> %ptrs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a2, a0, -16
+; RV32-NEXT:    vmv1r.v v1, v0
+; RV32-NEXT:    li a1, 0
+; RV32-NEXT:    bltu a0, a2, .LBB86_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:  .LBB86_2:
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 16
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v1, 2
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (zero), v24, v0.t
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:    bltu a0, a1, .LBB86_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:  .LBB86_4:
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vluxei32.v v24, (zero), v8, v0.t
+; RV32-NEXT:    vmv.v.v v8, v24
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a2, a0, -16
+; RV64-NEXT:    vmv1r.v v24, v0
+; RV64-NEXT:    li a1, 0
+; RV64-NEXT:    bltu a0, a2, .LBB86_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:  .LBB86_2:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v24, 2
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (zero), v16, v0.t
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:    bltu a0, a1, .LBB86_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a0, 16
+; RV64-NEXT:  .LBB86_4:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
+; RV64-NEXT:    ret
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_v32i8_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    bltu a1, a3, .LBB87_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:  .LBB87_2:
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf4 v16, v8
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    bltu a1, a3, .LBB87_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB87_4:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 16
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_v32i8_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vmv1r.v v10, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    bltu a1, a3, .LBB87_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB87_2:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v10, 2
+; RV64-NEXT:    vsetivli zero, 16, e8, m2, ta, mu
+; RV64-NEXT:    vslidedown.vi v12, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v16, v12
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB87_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB87_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v24, v8
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i8> %idxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v10, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    vsetivli zero, 16, e8, m2, ta, mu
+; RV32-NEXT:    vslidedown.vi v12, v8, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vsext.vf8 v16, v12
+; RV32-NEXT:    bltu a1, a3, .LBB88_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB88_2:
+; RV32-NEXT:    vsext.vf8 v24, v8
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v10, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v12, v16, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v12, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB88_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB88_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v10, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    vsetivli zero, 16, e8, m2, ta, mu
+; RV64-NEXT:    vslidedown.vi v12, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vsext.vf8 v16, v12
+; RV64-NEXT:    bltu a1, a3, .LBB88_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB88_2:
+; RV64-NEXT:    vsext.vf8 v24, v8
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v10, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB88_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB88_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %eidxs = sext <32 x i8> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(double* %base, <32 x i8> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v10, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    vsetivli zero, 16, e8, m2, ta, mu
+; RV32-NEXT:    vslidedown.vi v12, v8, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vzext.vf8 v16, v12
+; RV32-NEXT:    bltu a1, a3, .LBB89_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB89_2:
+; RV32-NEXT:    vzext.vf8 v24, v8
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v10, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v12, v16, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v12, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB89_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB89_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v10
+; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v10, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    vsetivli zero, 16, e8, m2, ta, mu
+; RV64-NEXT:    vslidedown.vi v12, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vzext.vf8 v16, v12
+; RV64-NEXT:    bltu a1, a3, .LBB89_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB89_2:
+; RV64-NEXT:    vzext.vf8 v24, v8
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v10, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB89_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB89_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v10
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %eidxs = zext <32 x i8> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_v32i16_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    bltu a1, a3, .LBB90_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:  .LBB90_2:
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf2 v16, v8
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    bltu a1, a3, .LBB90_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB90_4:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 16
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_v32i16_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    bltu a1, a3, .LBB90_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB90_2:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v12, 2
+; RV64-NEXT:    vsetivli zero, 16, e16, m4, ta, mu
+; RV64-NEXT:    vslidedown.vi v16, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v24, v16
+; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB90_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB90_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v24, v8
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i16> %idxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    vsetivli zero, 16, e16, m4, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vsext.vf4 v16, v24
+; RV32-NEXT:    bltu a1, a3, .LBB91_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB91_2:
+; RV32-NEXT:    vsext.vf4 v24, v8
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v12, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v8, v16, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB91_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB91_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    vsetivli zero, 16, e16, m4, ta, mu
+; RV64-NEXT:    vslidedown.vi v24, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vsext.vf4 v16, v24
+; RV64-NEXT:    bltu a1, a3, .LBB91_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB91_2:
+; RV64-NEXT:    vsext.vf4 v24, v8
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v12, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB91_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB91_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %eidxs = sext <32 x i16> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(double* %base, <32 x i16> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    vsetivli zero, 16, e16, m4, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vzext.vf4 v16, v24
+; RV32-NEXT:    bltu a1, a3, .LBB92_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB92_2:
+; RV32-NEXT:    vzext.vf4 v24, v8
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v12, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v8, v16, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB92_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB92_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    vsetivli zero, 16, e16, m4, ta, mu
+; RV64-NEXT:    vslidedown.vi v24, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vzext.vf4 v16, v24
+; RV64-NEXT:    bltu a1, a3, .LBB92_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB92_2:
+; RV64-NEXT:    vzext.vf4 v24, v8
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v12, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB92_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB92_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %eidxs = zext <32 x i16> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_v32i32_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    bltu a1, a3, .LBB93_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:  .LBB93_2:
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV32-NEXT:    vsll.vi v16, v8, 3
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    bltu a1, a3, .LBB93_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB93_4:
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v16, 16
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_v32i32_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vmv1r.v v1, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    bltu a1, a3, .LBB93_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB93_2:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v1, 2
+; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV64-NEXT:    vslidedown.vi v16, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf2 v24, v16
+; RV64-NEXT:    vsll.vi v16, v24, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB93_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB93_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf2 v24, v8
+; RV64-NEXT:    vsll.vi v8, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v1
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i32> %idxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v1, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vsext.vf2 v16, v24
+; RV32-NEXT:    bltu a1, a3, .LBB94_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB94_2:
+; RV32-NEXT:    vsext.vf2 v24, v8
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v1, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v8, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v4, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB94_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB94_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v24, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v1, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV64-NEXT:    vslidedown.vi v24, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vsext.vf2 v16, v24
+; RV64-NEXT:    bltu a1, a3, .LBB94_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB94_2:
+; RV64-NEXT:    vsext.vf2 v24, v8
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v1, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB94_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB94_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v1
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
+; RV64-NEXT:    ret
+  %eidxs = sext <32 x i32> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v1, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v24, v8, 16
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vzext.vf2 v16, v24
+; RV32-NEXT:    bltu a1, a3, .LBB95_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB95_2:
+; RV32-NEXT:    vzext.vf2 v24, v8
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v1, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v8, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v4, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB95_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB95_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v24, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v1, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV64-NEXT:    vslidedown.vi v24, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vzext.vf2 v16, v24
+; RV64-NEXT:    bltu a1, a3, .LBB95_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB95_2:
+; RV64-NEXT:    vzext.vf2 v24, v8
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v1, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB95_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB95_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v1
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
+; RV64-NEXT:    ret
+  %eidxs = zext <32 x i32> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpgather_baseidx_v32f64(double* %base, <32 x i64> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi a3, a1, -16
+; RV32-NEXT:    vmv1r.v v24, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    bltu a1, a3, .LBB96_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB96_2:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v24, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v28, v16, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v28, v0.t
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    bltu a1, a2, .LBB96_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB96_4:
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v8, 3
+; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v28, v8, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v24
+; RV32-NEXT:    vluxei32.v v8, (a0), v28, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi a3, a1, -16
+; RV64-NEXT:    vmv1r.v v24, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    bltu a1, a3, .LBB96_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB96_2:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v24, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    bltu a1, a2, .LBB96_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB96_4:
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vluxei64.v v8, (a0), v8, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %idxs
+  %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0f64(<32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret <32 x double> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
index 1d5cad5d40212..5fe4ad0cfe635 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
 
 declare void @llvm.vp.scatter.v2i8.v2p0i8(<2 x i8>, <2 x i8*>, <2 x i1>, i32)
@@ -1716,3 +1716,470 @@ define void @vpscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64>
   call void @llvm.vp.scatter.v8f64.v8p0f64(<8 x double> %val, <8 x double*> %ptrs, <8 x i1> %m, i32 %evl)
   ret void
 }
+
+declare void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double>, <32 x double*>, <32 x i1>, i32)
+
+define void @vpscatter_v32f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a2, 32
+; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
+; RV32-NEXT:    vle32.v v24, (a0)
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    bltu a1, a0, .LBB79_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:  .LBB79_2:
+; RV32-NEXT:    li a0, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    addi a2, a1, -16
+; RV32-NEXT:    vsoxei32.v v8, (zero), v24, v0.t
+; RV32-NEXT:    bltu a1, a2, .LBB79_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a0, a2
+; RV32-NEXT:  .LBB79_4:
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v8, v24, 16
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; RV32-NEXT:    vsoxei32.v v16, (zero), v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vle64.v v24, (a0)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    li a3, 16
+; RV64-NEXT:    addi a0, a0, 128
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:    bltu a2, a3, .LBB79_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB79_2:
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    vle64.v v16, (a0)
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    addi a0, a2, -16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v8, (zero), v24, v0.t
+; RV64-NEXT:    bltu a2, a0, .LBB79_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a3, a0
+; RV64-NEXT:  .LBB79_4:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v0, 2
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  call void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret void
+}
+
+define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_v32i32_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV32-NEXT:    vle32.v v24, (a1)
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:    bltu a2, a3, .LBB80_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB80_2:
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    addi a1, a2, -16
+; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    bltu a2, a1, .LBB80_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB80_4:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v8, v24, 16
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    vsoxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_v32i32_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a4, a3, 3
+; RV64-NEXT:    add a3, a4, a3
+; RV64-NEXT:    sub sp, sp, a3
+; RV64-NEXT:    li a3, 32
+; RV64-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV64-NEXT:    vle32.v v24, (a1)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vs1r.v v0, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    bltu a2, a1, .LBB80_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a3, 16
+; RV64-NEXT:  .LBB80_2:
+; RV64-NEXT:    li a1, 0
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8re8.v v0, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vsext.vf2 v24, v0
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, a2, -16
+; RV64-NEXT:    addi a4, sp, 16
+; RV64-NEXT:    vl1r.v v0, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    bltu a2, a3, .LBB80_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a1, a3
+; RV64-NEXT:  .LBB80_4:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v0, 2
+; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8re8.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vslidedown.vi v8, v8, 16
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf2 v24, v8
+; RV64-NEXT:    vsll.vi v8, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a1, a0, 3
+; RV64-NEXT:    add a0, a1, a0
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i32> %idxs
+  call void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret void
+}
+
+define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_sext_v32i32_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 24
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    sub sp, sp, a3
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV32-NEXT:    vle32.v v24, (a1)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v8, v24, 16
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    vsext.vf2 v8, v24
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:    bltu a2, a3, .LBB81_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB81_2:
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    addi a4, sp, 16
+; RV32-NEXT:    vl8re8.v v24, (a4) # Unknown-size Folded Reload
+; RV32-NEXT:    vsext.vf2 v16, v24
+; RV32-NEXT:    vsll.vi v8, v8, 3
+; RV32-NEXT:    vsetvli a4, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    addi a1, a2, -16
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 3
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 16
+; RV32-NEXT:    vl8re8.v v8, (a4) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    bltu a2, a1, .LBB81_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB81_4:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v16, 3
+; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_sext_v32i32_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    sub sp, sp, a3
+; RV64-NEXT:    li a3, 32
+; RV64-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV64-NEXT:    vle32.v v24, (a1)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV64-NEXT:    vslidedown.vi v8, v24, 16
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    li a3, 16
+; RV64-NEXT:    vsext.vf2 v8, v24
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:    bltu a2, a3, .LBB81_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB81_2:
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    addi a4, sp, 16
+; RV64-NEXT:    vl8re8.v v24, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vsext.vf2 v16, v24
+; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    addi a1, a2, -16
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 3
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8re8.v v24, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v24, (a0), v8, v0.t
+; RV64-NEXT:    bltu a2, a1, .LBB81_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB81_4:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v0, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %eidxs = sext <32 x i32> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  call void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret void
+}
+
+define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, double* %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_zext_v32i32_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 24
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    sub sp, sp, a3
+; RV32-NEXT:    li a3, 32
+; RV32-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV32-NEXT:    vle32.v v24, (a1)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV32-NEXT:    vslidedown.vi v8, v24, 16
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    li a3, 16
+; RV32-NEXT:    vzext.vf2 v8, v24
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:    bltu a2, a3, .LBB82_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB82_2:
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    addi a4, sp, 16
+; RV32-NEXT:    vl8re8.v v24, (a4) # Unknown-size Folded Reload
+; RV32-NEXT:    vzext.vf2 v16, v24
+; RV32-NEXT:    vsll.vi v8, v8, 3
+; RV32-NEXT:    vsetvli a4, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    addi a1, a2, -16
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 3
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 16
+; RV32-NEXT:    vl8re8.v v8, (a4) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    bltu a2, a1, .LBB82_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB82_4:
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v0, 2
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v16, 3
+; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_zext_v32i32_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    sub sp, sp, a3
+; RV64-NEXT:    li a3, 32
+; RV64-NEXT:    vsetvli zero, a3, e32, m8, ta, mu
+; RV64-NEXT:    vle32.v v24, (a1)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetivli zero, 16, e32, m8, ta, mu
+; RV64-NEXT:    vslidedown.vi v8, v24, 16
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    li a3, 16
+; RV64-NEXT:    vzext.vf2 v8, v24
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:    bltu a2, a3, .LBB82_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a1, 16
+; RV64-NEXT:  .LBB82_2:
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    addi a4, sp, 16
+; RV64-NEXT:    vl8re8.v v24, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vzext.vf2 v16, v24
+; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    addi a1, a2, -16
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 3
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8re8.v v24, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v24, (a0), v8, v0.t
+; RV64-NEXT:    bltu a2, a1, .LBB82_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB82_4:
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v0, 2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %eidxs = zext <32 x i32> %idxs to <32 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <32 x i64> %eidxs
+  call void @llvm.vp.scatter.v32f64.v32p0f64(<32 x double> %val, <32 x double*> %ptrs, <32 x i1> %m, i32 %evl)
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
index dc38a17e10a25..f71a2f86fee7d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -251,6 +251,107 @@ define <vscale x 8 x i8> @vpgather_baseidx_nxv8i8(i8* %base, <vscale x 8 x i8> %
   ret <vscale x 8 x i8> %v
 }
 
+declare <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(i8* %base, <vscale x 32 x i8> %idxs, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_nxv32i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    srli a5, a2, 2
+; RV32-NEXT:    vsetvli a4, zero, e8, mf2, ta, mu
+; RV32-NEXT:    slli a2, a2, 1
+; RV32-NEXT:    sub a4, a1, a2
+; RV32-NEXT:    vslidedown.vx v0, v0, a5
+; RV32-NEXT:    bltu a1, a4, .LBB12_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a4
+; RV32-NEXT:  .LBB12_2:
+; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf4 v24, v10
+; RV32-NEXT:    vsetvli zero, a3, e8, m2, ta, mu
+; RV32-NEXT:    vluxei32.v v18, (a0), v24, v0.t
+; RV32-NEXT:    bltu a1, a2, .LBB12_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:  .LBB12_4:
+; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf4 v24, v8
+; RV32-NEXT:    vsetvli zero, a1, e8, m2, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
+; RV32-NEXT:    vmv4r.v v8, v16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_nxv32i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli t0, a3, 1
+; RV64-NEXT:    sub a4, a1, t0
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li t1, 0
+; RV64-NEXT:    li a7, 0
+; RV64-NEXT:    bltu a1, a4, .LBB12_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a7, a4
+; RV64-NEXT:  .LBB12_2:
+; RV64-NEXT:    sub a4, a7, a3
+; RV64-NEXT:    mv a2, t1
+; RV64-NEXT:    bltu a7, a4, .LBB12_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a2, a4
+; RV64-NEXT:  .LBB12_4:
+; RV64-NEXT:    srli a4, a3, 2
+; RV64-NEXT:    vsetvli a5, zero, e8, mf2, ta, mu
+; RV64-NEXT:    vslidedown.vx v13, v12, a4
+; RV64-NEXT:    srli a6, a3, 3
+; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vx v0, v13, a6
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v24, v11
+; RV64-NEXT:    vsetvli zero, a2, e8, m1, ta, mu
+; RV64-NEXT:    vluxei64.v v19, (a0), v24, v0.t
+; RV64-NEXT:    bltu a1, t0, .LBB12_6
+; RV64-NEXT:  # %bb.5:
+; RV64-NEXT:    mv a1, t0
+; RV64-NEXT:  .LBB12_6:
+; RV64-NEXT:    sub a2, a1, a3
+; RV64-NEXT:    bltu a1, a2, .LBB12_8
+; RV64-NEXT:  # %bb.7:
+; RV64-NEXT:    mv t1, a2
+; RV64-NEXT:  .LBB12_8:
+; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vx v0, v12, a6
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v24, v9
+; RV64-NEXT:    vsetvli zero, t1, e8, m1, ta, mu
+; RV64-NEXT:    vluxei64.v v17, (a0), v24, v0.t
+; RV64-NEXT:    bltu a1, a3, .LBB12_10
+; RV64-NEXT:  # %bb.9:
+; RV64-NEXT:    mv a1, a3
+; RV64-NEXT:  .LBB12_10:
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v24, v8
+; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v16, (a0), v24, v0.t
+; RV64-NEXT:    bltu a7, a3, .LBB12_12
+; RV64-NEXT:  # %bb.11:
+; RV64-NEXT:    mv a7, a3
+; RV64-NEXT:  .LBB12_12:
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf8 v24, v10
+; RV64-NEXT:    vsetvli zero, a7, e8, m1, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v13
+; RV64-NEXT:    vluxei64.v v18, (a0), v24, v0.t
+; RV64-NEXT:    vmv4r.v v8, v16
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds i8, i8* %base, <vscale x 32 x i8> %idxs
+  %v = call <vscale x 32 x i8> @llvm.vp.gather.nxv32i8.nxv32p0i8(<vscale x 32 x i8*> %ptrs, <vscale x 32 x i1> %m, i32 %evl)
+  ret <vscale x 32 x i8> %v
+}
+
 declare <vscale x 1 x i16> @llvm.vp.gather.nxv1i16.nxv1p0i16(<vscale x 1 x i16*>, <vscale x 1 x i1>, i32)
 
 define <vscale x 1 x i16> @vpgather_nxv1i16(<vscale x 1 x i16*> %ptrs, <vscale x 1 x i1> %m, i32 zeroext %evl) {
@@ -2232,3 +2333,264 @@ define <vscale x 8 x double> @vpgather_baseidx_nxv8f64(double* %base, <vscale x
   %v = call <vscale x 8 x double> @llvm.vp.gather.nxv8f64.nxv8p0f64(<vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
   ret <vscale x 8 x double> %v
 }
+
+declare <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x double> @vpgather_nxv16f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v24, v0
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    srli a4, a1, 3
+; RV32-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a3, a0, a1
+; RV32-NEXT:    vslidedown.vx v0, v0, a4
+; RV32-NEXT:    bltu a0, a3, .LBB102_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a3
+; RV32-NEXT:  .LBB102_2:
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (zero), v12, v0.t
+; RV32-NEXT:    bltu a0, a1, .LBB102_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a0, a1
+; RV32-NEXT:  .LBB102_4:
+; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v24
+; RV32-NEXT:    vluxei32.v v24, (zero), v8, v0.t
+; RV32-NEXT:    vmv.v.v v8, v24
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v24, v0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    srli a4, a1, 3
+; RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a3, a0, a1
+; RV64-NEXT:    vslidedown.vx v0, v0, a4
+; RV64-NEXT:    bltu a0, a3, .LBB102_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a3
+; RV64-NEXT:  .LBB102_2:
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (zero), v16, v0.t
+; RV64-NEXT:    bltu a0, a1, .LBB102_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:  .LBB102_4:
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v24
+; RV64-NEXT:    vluxei64.v v8, (zero), v8, v0.t
+; RV64-NEXT:    ret
+  %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x double> %v
+}
+
+define <vscale x 16 x double> @vpgather_baseidx_nxv16i16_nxv16f64(double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    srli a5, a2, 3
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a4, a1, a2
+; RV32-NEXT:    vslidedown.vx v0, v0, a5
+; RV32-NEXT:    bltu a1, a4, .LBB103_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a4
+; RV32-NEXT:  .LBB103_2:
+; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf2 v16, v8
+; RV32-NEXT:    vsll.vi v24, v16, 3
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v28, v0.t
+; RV32-NEXT:    bltu a1, a2, .LBB103_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:  .LBB103_4:
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_nxv16i16_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    srli a5, a2, 3
+; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a4, a1, a2
+; RV64-NEXT:    vslidedown.vx v0, v0, a5
+; RV64-NEXT:    bltu a1, a4, .LBB103_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a4
+; RV64-NEXT:  .LBB103_2:
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v16, v10
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    bltu a1, a2, .LBB103_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:  .LBB103_4:
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v24, v8
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs
+  %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x double> %v
+}
+
+define <vscale x 16 x double> @vpgather_baseidx_sext_nxv16i16_nxv16f64(double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV32-NEXT:    vsext.vf4 v16, v10
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    srli a5, a2, 3
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a4, a1, a2
+; RV32-NEXT:    vslidedown.vx v0, v0, a5
+; RV32-NEXT:    bltu a1, a4, .LBB104_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a4
+; RV32-NEXT:  .LBB104_2:
+; RV32-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
+; RV32-NEXT:    vsext.vf4 v24, v8
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v8, v16, 0
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    bltu a1, a2, .LBB104_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:  .LBB104_4:
+; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v16, v10
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    srli a5, a2, 3
+; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a4, a1, a2
+; RV64-NEXT:    vslidedown.vx v0, v0, a5
+; RV64-NEXT:    bltu a1, a4, .LBB104_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a4
+; RV64-NEXT:  .LBB104_2:
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v24, v8
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    bltu a1, a2, .LBB104_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:  .LBB104_4:
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs
+  %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x double> %v
+}
+
+define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vmv1r.v v12, v0
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV32-NEXT:    vzext.vf4 v16, v10
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    srli a5, a2, 3
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a4, a1, a2
+; RV32-NEXT:    vslidedown.vx v0, v0, a5
+; RV32-NEXT:    bltu a1, a4, .LBB105_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a4
+; RV32-NEXT:  .LBB105_2:
+; RV32-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
+; RV32-NEXT:    vzext.vf4 v24, v8
+; RV32-NEXT:    vsll.vi v16, v16, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v8, v16, 0
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
+; RV32-NEXT:    bltu a1, a2, .LBB105_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a1, a2
+; RV32-NEXT:  .LBB105_4:
+; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV32-NEXT:    vmv1r.v v0, v12
+; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vmv1r.v v12, v0
+; RV64-NEXT:    li a3, 0
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vzext.vf4 v16, v10
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    srli a5, a2, 3
+; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a4, a1, a2
+; RV64-NEXT:    vslidedown.vx v0, v0, a5
+; RV64-NEXT:    bltu a1, a4, .LBB105_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a4
+; RV64-NEXT:  .LBB105_2:
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
+; RV64-NEXT:    vzext.vf4 v24, v8
+; RV64-NEXT:    vsll.vi v16, v16, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    vluxei64.v v16, (a0), v16, v0.t
+; RV64-NEXT:    bltu a1, a2, .LBB105_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a1, a2
+; RV64-NEXT:  .LBB105_4:
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
+; RV64-NEXT:    vmv1r.v v0, v12
+; RV64-NEXT:    vluxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    ret
+  %eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs
+  %v = call <vscale x 16 x double> @llvm.vp.gather.nxv16f64.nxv16p0f64(<vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
+  ret <vscale x 16 x double> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
index 62a7244de1b3f..01be81fd1912d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64
 
 declare void @llvm.vp.scatter.nxv1i8.nxv1p0i8(<vscale x 1 x i8>, <vscale x 1 x i8*>, <vscale x 1 x i1>, i32)
@@ -2071,3 +2071,367 @@ define void @vpscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, double* %base
   call void @llvm.vp.scatter.nxv8f64.nxv8p0f64(<vscale x 8 x double> %val, <vscale x 8 x double*> %ptrs, <vscale x 8 x i1> %m, i32 %evl)
   ret void
 }
+
+declare void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double>, <vscale x 16 x double*>, <vscale x 16 x i1>, i32)
+
+define void @vpscatter_nxv16f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vl8re32.v v24, (a0)
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    mv a2, a1
+; RV32-NEXT:    bltu a1, a0, .LBB95_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a0
+; RV32-NEXT:  .LBB95_2:
+; RV32-NEXT:    li a3, 0
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
+; RV32-NEXT:    vsoxei32.v v8, (zero), v24, v0.t
+; RV32-NEXT:    srli a2, a0, 3
+; RV32-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a0, a1, a0
+; RV32-NEXT:    vslidedown.vx v0, v0, a2
+; RV32-NEXT:    bltu a1, a0, .LBB95_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a3, a0
+; RV32-NEXT:  .LBB95_4:
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    vsoxei32.v v16, (zero), v28, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vl8re64.v v16, (a0)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a3, a1, 3
+; RV64-NEXT:    add a0, a0, a3
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    bltu a2, a1, .LBB95_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB95_2:
+; RV64-NEXT:    li a4, 0
+; RV64-NEXT:    vl8re64.v v24, (a0)
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    vsoxei64.v v8, (zero), v16, v0.t
+; RV64-NEXT:    srli a3, a1, 3
+; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a0, a2, a1
+; RV64-NEXT:    vslidedown.vx v0, v0, a3
+; RV64-NEXT:    bltu a2, a0, .LBB95_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a4, a0
+; RV64-NEXT:  .LBB95_4:
+; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8re8.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v8, (zero), v24, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
+  ret void
+}
+
+define void @vpscatter_baseidx_nxv16i16_nxv16f64(<vscale x 16 x double> %val, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vl4re16.v v4, (a1)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    mv a3, a2
+; RV32-NEXT:    bltu a2, a1, .LBB96_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB96_2:
+; RV32-NEXT:    li a4, 0
+; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, mu
+; RV32-NEXT:    vsext.vf2 v24, v4
+; RV32-NEXT:    vsll.vi v24, v24, 3
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    srli a3, a1, 3
+; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a1, a2, a1
+; RV32-NEXT:    vslidedown.vx v0, v0, a3
+; RV32-NEXT:    bltu a2, a1, .LBB96_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:  .LBB96_4:
+; RV32-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV32-NEXT:    vsoxei32.v v16, (a0), v28, v0.t
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vl4re16.v v4, (a1)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    bltu a2, a1, .LBB96_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB96_2:
+; RV64-NEXT:    li a4, 0
+; RV64-NEXT:    vsetvli a5, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v24, v4
+; RV64-NEXT:    vsll.vi v24, v24, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    vsoxei64.v v8, (a0), v24, v0.t
+; RV64-NEXT:    srli a3, a1, 3
+; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a1, a2, a1
+; RV64-NEXT:    vslidedown.vx v0, v0, a3
+; RV64-NEXT:    bltu a2, a1, .LBB96_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a4, a1
+; RV64-NEXT:  .LBB96_4:
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsext.vf4 v8, v6
+; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    ret
+  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i16> %idxs
+  call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
+  ret void
+}
+
+define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    sub sp, sp, a3
+; RV32-NEXT:    vl4re16.v v24, (a1)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    vsext.vf4 v8, v24
+; RV32-NEXT:    mv a3, a2
+; RV32-NEXT:    bltu a2, a1, .LBB97_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB97_2:
+; RV32-NEXT:    li a4, 0
+; RV32-NEXT:    vsext.vf4 v16, v26
+; RV32-NEXT:    vsll.vi v8, v8, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, sp, 16
+; RV32-NEXT:    vl8re8.v v8, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    srli a3, a1, 3
+; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a1, a2, a1
+; RV32-NEXT:    vslidedown.vx v0, v0, a3
+; RV32-NEXT:    bltu a2, a1, .LBB97_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:  .LBB97_4:
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v16, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    sub sp, sp, a3
+; RV64-NEXT:    vl4re16.v v24, (a1)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    vsext.vf4 v8, v24
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    bltu a2, a1, .LBB97_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB97_2:
+; RV64-NEXT:    li a4, 0
+; RV64-NEXT:    vsext.vf4 v16, v26
+; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vl8re8.v v24, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v24, (a0), v8, v0.t
+; RV64-NEXT:    srli a3, a1, 3
+; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a1, a2, a1
+; RV64-NEXT:    vslidedown.vx v0, v0, a3
+; RV64-NEXT:    bltu a2, a1, .LBB97_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a4, a1
+; RV64-NEXT:  .LBB97_4:
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %eidxs = sext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs
+  call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
+  ret void
+}
+
+define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %val, double* %base, <vscale x 16 x i16> %idxs, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    sub sp, sp, a3
+; RV32-NEXT:    vl4re16.v v24, (a1)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    vzext.vf4 v8, v24
+; RV32-NEXT:    mv a3, a2
+; RV32-NEXT:    bltu a2, a1, .LBB98_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB98_2:
+; RV32-NEXT:    li a4, 0
+; RV32-NEXT:    vzext.vf4 v16, v26
+; RV32-NEXT:    vsll.vi v8, v8, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV32-NEXT:    addi a3, sp, 16
+; RV32-NEXT:    vl8re8.v v8, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
+; RV32-NEXT:    srli a3, a1, 3
+; RV32-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
+; RV32-NEXT:    sub a1, a2, a1
+; RV32-NEXT:    vslidedown.vx v0, v0, a3
+; RV32-NEXT:    bltu a2, a1, .LBB98_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a4, a1
+; RV32-NEXT:  .LBB98_4:
+; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV32-NEXT:    vsll.vi v8, v16, 3
+; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
+; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8re8.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    sub sp, sp, a3
+; RV64-NEXT:    vl4re16.v v24, (a1)
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    vzext.vf4 v8, v24
+; RV64-NEXT:    mv a3, a2
+; RV64-NEXT:    bltu a2, a1, .LBB98_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB98_2:
+; RV64-NEXT:    li a4, 0
+; RV64-NEXT:    vzext.vf4 v16, v26
+; RV64-NEXT:    vsll.vi v8, v8, 3
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vl8re8.v v24, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v24, (a0), v8, v0.t
+; RV64-NEXT:    srli a3, a1, 3
+; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
+; RV64-NEXT:    sub a1, a2, a1
+; RV64-NEXT:    vslidedown.vx v0, v0, a3
+; RV64-NEXT:    bltu a2, a1, .LBB98_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a4, a1
+; RV64-NEXT:  .LBB98_4:
+; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsll.vi v8, v16, 3
+; RV64-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsoxei64.v v16, (a0), v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %eidxs = zext <vscale x 16 x i16> %idxs to <vscale x 16 x i64>
+  %ptrs = getelementptr inbounds double, double* %base, <vscale x 16 x i64> %eidxs
+  call void @llvm.vp.scatter.nxv16f64.nxv16p0f64(<vscale x 16 x double> %val, <vscale x 16 x double*> %ptrs, <vscale x 16 x i1> %m, i32 %evl)
+  ret void
+}

From 48f763edb4718a2b2226d9654b96d4b69d66883e Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang <hsiangkai@google.com>
Date: Fri, 21 Jan 2022 07:56:08 +0000
Subject: [PATCH 518/946] [docs] Refine the description in Set-Like and
 Map-Like container options.

In "Other Set-Like Container Options":
* Drops the references to C++ TR1 and SGI and hash_set.
* Drops the worry about portability (this was a problem with hash_set, but
std::unordered_set has worked portably since LLVM started depending
on C++11).

It is similar in "Other Map-Like Container Options" section.

Differential Revision: https://reviews.llvm.org/D117858
---
 llvm/docs/ProgrammersManual.rst | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 630893a3c11e4..8437e24c595b3 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -2198,10 +2198,9 @@ membership.
 Other Set-Like Container Options
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The STL provides several other options, such as std::multiset and the various
-"hash_set" like containers (whether from C++ TR1 or from the SGI library).  We
-never use hash_set and unordered_set because they are generally very expensive
-(each insertion requires a malloc) and very non-portable.
+The STL provides several other options, such as std::multiset and
+std::unordered_set.  We never use containers like unordered_set because
+they are generally very expensive (each insertion requires a malloc).
 
 std::multiset is useful if you're not interested in elimination of duplicates,
 but has all the drawbacks of :ref:`std::set <dss_set>`.  A sorted vector
@@ -2389,10 +2388,9 @@ operations is logarithmic in the size of the original map.
 Other Map-Like Container Options
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The STL provides several other options, such as std::multimap and the various
-"hash_map" like containers (whether from C++ TR1 or from the SGI library).  We
-never use hash_set and unordered_set because they are generally very expensive
-(each insertion requires a malloc) and very non-portable.
+The STL provides several other options, such as std::multimap and
+std::unordered_map.  We never use containers like unordered_map because
+they are generally very expensive (each insertion requires a malloc).
 
 std::multimap is useful if you want to map a key to multiple values, but has all
 the drawbacks of std::map.  A sorted vector or some other approach is almost

From 901dd53cbf6130cb157b0bae20141b5a01a8b903 Mon Sep 17 00:00:00 2001
From: Hsiangkai Wang <hsiangkai@google.com>
Date: Fri, 21 Jan 2022 01:46:34 +0000
Subject: [PATCH 519/946] [docs] There are more than three bit storage
 containers.

To avoid listing all the bit containers in the title and do not use the
specific number for the number of bit containers.

Differential Revision: https://reviews.llvm.org/D117849
---
 llvm/docs/ProgrammersManual.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 8437e24c595b3..f56a3a4aa4567 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -2398,10 +2398,10 @@ always better.
 
 .. _ds_bit:
 
-Bit storage containers (BitVector, SparseBitVector, CoalescingBitVector)
+Bit storage containers
 ------------------------------------------------------------------------
 
-There are three bit storage containers, and choosing when to use each is
+There are several bit storage containers, and choosing when to use each is
 relatively straightforward.
 
 One additional option is ``std::vector<bool>``: we discourage its use for two

From e2f8d28afba0a6545284ad3b54a4b7532c3253b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= <david.bolvansky@gmail.com>
Date: Tue, 25 Jan 2022 11:11:14 +0100
Subject: [PATCH 520/946] [NFC] Added test with select with unpredictable
 metadata; regenerate x86-cmov-converter.ll

---
 llvm/test/CodeGen/X86/x86-cmov-converter.ll | 333 +++++++++++++++-----
 1 file changed, 251 insertions(+), 82 deletions(-)

diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
index 59f68269381a2..91c1367da1bdf 100644
--- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll
+++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -106,6 +107,33 @@
 ; CHECK: jg
 
 define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 {
+; CHECK-LABEL: CmovInHotPath:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jle .LBB0_5
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %r8d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:  .LBB0_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rcx,%rdi,4), %eax
+; CHECK-NEXT:    leal 1(%rax), %r9d
+; CHECK-NEXT:    imull %esi, %eax
+; CHECK-NEXT:    movl $10, %r10d
+; CHECK-NEXT:    cmpl %edx, %eax
+; CHECK-NEXT:    jg .LBB0_4
+; CHECK-NEXT:  # %bb.3: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    movl %r9d, %r10d
+; CHECK-NEXT:  .LBB0_4: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    imull %r9d, %r10d
+; CHECK-NEXT:    movl %r10d, (%rcx,%rdi,4)
+; CHECK-NEXT:    addq $1, %rdi
+; CHECK-NEXT:    cmpq %rdi, %r8
+; CHECK-NEXT:    jne .LBB0_2
+; CHECK-NEXT:  .LBB0_5: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 0
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -136,6 +164,32 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: cmovg
 
 define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture %d) #0 {
+; CHECK-LABEL: CmovNotInHotPath:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jle .LBB1_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edx, %r9d
+; CHECK-NEXT:    movl %edi, %r10d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movl $10, %r11d
+; CHECK-NEXT:  .LBB1_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rcx,%rdi,4), %eax
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    imull %esi, %edx
+; CHECK-NEXT:    cmpl %r9d, %edx
+; CHECK-NEXT:    cmovgl %r11d, %eax
+; CHECK-NEXT:    movl %eax, (%rcx,%rdi,4)
+; CHECK-NEXT:    movl (%r8,%rdi,4), %eax
+; CHECK-NEXT:    cltd
+; CHECK-NEXT:    idivl %r9d
+; CHECK-NEXT:    movl %eax, (%r8,%rdi,4)
+; CHECK-NEXT:    addq $1, %rdi
+; CHECK-NEXT:    cmpq %rdi, %r10
+; CHECK-NEXT:    jne .LBB1_2
+; CHECK-NEXT:  .LBB1_3: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %cmp18 = icmp sgt i32 %n, 0
   br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
@@ -169,6 +223,33 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: jg
 
 define i32 @MaxIndex(i32 %n, i32* nocapture readonly %a) #0 {
+; CHECK-LABEL: MaxIndex:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl $2, %edi
+; CHECK-NEXT:    jl .LBB2_5
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %r8d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:  .LBB2_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rsi,%rdx,4), %r9d
+; CHECK-NEXT:    movslq %edi, %rcx
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl (%rsi,%rcx,4), %r9d
+; CHECK-NEXT:    jg .LBB2_4
+; CHECK-NEXT:  # %bb.3: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:  .LBB2_4: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    addq $1, %rdx
+; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    cmpq %rdx, %r8
+; CHECK-NEXT:    jne .LBB2_2
+; CHECK-NEXT:  .LBB2_5: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 1
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -202,6 +283,24 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: cmovg
 
 define i32 @MaxValue(i32 %n, i32* nocapture readonly %a) #0 {
+; CHECK-LABEL: MaxValue:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    cmpl $2, %edi
+; CHECK-NEXT:    jl .LBB3_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:  .LBB3_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rsi,%rdx,4), %edi
+; CHECK-NEXT:    cmpl %eax, %edi
+; CHECK-NEXT:    cmovgl %edi, %eax
+; CHECK-NEXT:    addq $1, %rdx
+; CHECK-NEXT:    cmpq %rdx, %rcx
+; CHECK-NEXT:    jne .LBB3_2
+; CHECK-NEXT:  .LBB3_3: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %0 = load i32, i32* %a, align 4
   %cmp13 = icmp sgt i32 %n, 1
@@ -231,6 +330,24 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: set
 
 define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %struct.Node* nocapture readonly %Next) #0 {
+; CHECK-LABEL: BinarySearch:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    jmp .LBB4_2
+; CHECK-NEXT:  .LBB4_1: # %while.body
+; CHECK-NEXT:    # in Loop: Header=BB4_2 Depth=1
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    btl %eax, %edi
+; CHECK-NEXT:    setae %cl
+; CHECK-NEXT:    movq 8(%rdx,%rcx,8), %rdx
+; CHECK-NEXT:  .LBB4_2: # %while.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rdx), %ecx
+; CHECK-NEXT:    cmpl %ecx, %eax
+; CHECK-NEXT:    ja .LBB4_1
+; CHECK-NEXT:  # %bb.3: # %while.end
+; CHECK-NEXT:    retq
 entry:
   %Val8 = getelementptr inbounds %struct.Node, %struct.Node* %Curr, i64 0, i32 0
   %0 = load i32, i32* %Val8, align 8
@@ -301,6 +418,39 @@ while.end:                                        ; preds = %while.body, %entry
 ; CHECK:         ja
 
 define void @Transform(i32 *%arr, i32 *%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 {
+; CHECK-LABEL: Transform:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movb $1, %al
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jne .LBB5_5
+; CHECK-NEXT:  # %bb.1: # %while.body.preheader
+; CHECK-NEXT:    movl %edx, %r8d
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:  .LBB5_2: # %while.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movslq %esi, %rsi
+; CHECK-NEXT:    movl (%rdi,%rsi,4), %eax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %r8d
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    movl $11, %eax
+; CHECK-NEXT:    movl %r8d, %ecx
+; CHECK-NEXT:    cmpl %r8d, %edx
+; CHECK-NEXT:    ja .LBB5_4
+; CHECK-NEXT:  # %bb.3: # %while.body
+; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT:    movl $22, %eax
+; CHECK-NEXT:    movl $22, %ecx
+; CHECK-NEXT:  .LBB5_4: # %while.body
+; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %ecx
+; CHECK-NEXT:    movl %edx, (%rdi,%rsi,4)
+; CHECK-NEXT:    addl $1, %esi
+; CHECK-NEXT:    cmpl %r9d, %esi
+; CHECK-NEXT:    ja .LBB5_2
+; CHECK-NEXT:  .LBB5_5: # %while.end
+; CHECK-NEXT:    retq
 entry:
   %cmp10 = icmp ugt i32 0, %n
   br i1 %cmp10, label %while.body, label %while.end
@@ -328,16 +478,35 @@ while.end:                                        ; preds = %while.body, %entry
 ; even outside of a loop.
 define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB6_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB6_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edx, %eax
-; CHECK:         cmpl
   %load = load i32, i32* %y
   %z = select i1 %cond, i32 %x, i32 %load
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movl (%rcx), %eax
-; CHECK:       [[FALSE_BB]]:
+  ret i32 %z
+}
+
+define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
+; CHECK-LABEL: test_cmov_memoperand_unpredictable:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB6_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB6_2: # %entry
+; CHECK-NEXT:    retq
+entry:
+  %cond = icmp ugt i32 %a, %b
+  %load = load i32, i32* %y
+  %z = select i1 %cond, i32 %x, i32 %load , !unpredictable !0
   ret i32 %z
 }
 
@@ -345,29 +514,25 @@ entry:
 ; operand.
 define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    movl %edx, %r8d
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB8_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %r8d
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:  .LBB8_2: # %entry
+; CHECK-NEXT:    addl %r8d, %eax
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edx, %eax
-; CHECK:         cmpl
   %y = load i32, i32* %y.ptr
   %z1 = select i1 %cond, i32 %x, i32 %a
   %z2 = select i1 %cond, i32 %x, i32 %y
   %z3 = select i1 %cond, i32 %x, i32 %b
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
-; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
-; CHECK-DAG:     movl %{{.*}} %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         addl
-; CHECK-DAG:       %[[R1]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK-DAG:     addl
-; CHECK-DAG:       %[[R2]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK:         retq
   %s1 = add i32 %z1, %z2
   %s2 = add i32 %s1, %z3
   ret i32 %s2
@@ -376,29 +541,25 @@ entry:
 ; Same as before but with operands reversed in the select with a load.
 define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    movl %edx, %r8d
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    jbe .LBB9_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %r8d
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:  .LBB9_2: # %entry
+; CHECK-NEXT:    addl %r8d, %eax
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edx, %eax
-; CHECK:         cmpl
   %y = load i32, i32* %y.ptr
   %z2 = select i1 %cond, i32 %a, i32 %x
   %z1 = select i1 %cond, i32 %y, i32 %x
   %z3 = select i1 %cond, i32 %b, i32 %x
-; CHECK-NOT:     cmov
-; CHECK:         jbe [[FALSE_BB:.*]]
-; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
-; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
-; CHECK-DAG:     movl %{{.*}} %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         addl
-; CHECK-DAG:       %[[R1]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK-DAG:     addl
-; CHECK-DAG:       %[[R2]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK:         retq
   %s1 = add i32 %z1, %z2
   %s2 = add i32 %s1, %z3
   ret i32 %s2
@@ -408,15 +569,19 @@ entry:
 ; loads.
 define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, i32* %y1.ptr, i32* %y2.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_conflicting_dir:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:    cmoval %edx, %eax
+; CHECK-NEXT:    cmoval (%r8), %edx
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         cmpl
   %y1 = load i32, i32* %y1.ptr
   %y2 = load i32, i32* %y2.ptr
   %z1 = select i1 %cond, i32 %x, i32 %y1
   %z2 = select i1 %cond, i32 %y2, i32 %x
-; CHECK:         cmoval
-; CHECK:         cmoval
   %s1 = add i32 %z1, %z2
   ret i32 %s1
 }
@@ -426,18 +591,19 @@ entry:
 ; the group.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, i32* %x, i32* %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB11_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB11_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edi, %eax
-; CHECK:         cmpl
   %p = select i1 %cond, i32* %x, i32* %y
   %load = load i32, i32* %p
   %z = select i1 %cond, i32 %a, i32 %load
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movl (%r{{..}}), %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         retq
   ret i32 %z
 }
 
@@ -445,20 +611,21 @@ entry:
 ; uses the result of the other as part of the address.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, i32* %x, i32** %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB12_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movq (%rcx), %rax
+; CHECK-NEXT:    movl (%rax), %eax
+; CHECK-NEXT:  .LBB12_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edi, %eax
-; CHECK:         cmpl
   %load1 = load i32*, i32** %y
   %p = select i1 %cond, i32* %x, i32* %load1
   %load2 = load i32, i32* %p
   %z = select i1 %cond, i32 %a, i32 %load2
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movq (%r{{..}}), %[[R1:.*]]
-; CHECK:         movl (%[[R1]]), %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         retq
   ret i32 %z
 }
 
@@ -467,19 +634,20 @@ entry:
 ; where that cmov gets *its* input from a prior cmov in the group.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, i32* %x, i32* %y, i32* %z) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB13_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB13_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edi, %eax
-; CHECK:         cmpl
   %p = select i1 %cond, i32* %x, i32* %y
   %p2 = select i1 %cond, i32* %z, i32* %p
   %load = load i32, i32* %p2
   %r = select i1 %cond, i32 %a, i32 %load
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movl (%r{{..}}), %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         retq
   ret i32 %r
 }
 
@@ -495,34 +663,36 @@ define void @test_memoperand_loop(i32 %data) #0 {
 ; CHECK-NEXT:    movq (%rcx), %rdx
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    movq %rax, %rcx
-entry:
-  %begin = load i32*, i32** @begin, align 8
-  %end = load i32*, i32** @end, align 8
-  br label %loop.body
-
-; CHECK-NEXT:  .LBB13_1: # %loop.body
+; CHECK-NEXT:  .LBB14_1: # %loop.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    addq $8, %rcx
 ; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    ja .LBB13_3
+; CHECK-NEXT:    ja .LBB14_3
 ; CHECK-NEXT:  # %bb.2: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
 ; CHECK-NEXT:    movq (%r8), %rcx
-; CHECK-NEXT:  .LBB13_3: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:  .LBB14_3: # %loop.body
+; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
 ; CHECK-NEXT:    movl %edi, (%rcx)
 ; CHECK-NEXT:    addq $8, %rcx
 ; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    ja .LBB13_5
+; CHECK-NEXT:    ja .LBB14_5
 ; CHECK-NEXT:  # %bb.4: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
 ; CHECK-NEXT:    movq %rax, %rcx
-; CHECK-NEXT:  .LBB13_5: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:  .LBB14_5: # %loop.body
+; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
 ; CHECK-NEXT:    movl %edi, (%rcx)
 ; CHECK-NEXT:    addl $1, %esi
 ; CHECK-NEXT:    cmpl $1024, %esi # imm = 0x400
-; CHECK-NEXT:    jl .LBB13_1
+; CHECK-NEXT:    jl .LBB14_1
+; CHECK-NEXT:  # %bb.6: # %exit
+; CHECK-NEXT:    retq
+entry:
+  %begin = load i32*, i32** @begin, align 8
+  %end = load i32*, i32** @end, align 8
+  br label %loop.body
+
 loop.body:
   %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
   %phi.ptr = phi i32* [ %begin, %entry ], [ %dst2, %loop.body ]
@@ -539,10 +709,9 @@ loop.body:
   %cond = icmp slt i32 %iv.next, 1024
   br i1 %cond, label %loop.body, label %exit
 
-; CHECK-NEXT:  # %bb.6: # %exit
-; CHECK-NEXT:    retq
 exit:
   ret void
 }
 
 attributes #0 = {"target-cpu"="x86-64"}
+!0 = !{}

From 9fa6ad4c589316d71a61834fefd6d411249e4843 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= <david.bolvansky@gmail.com>
Date: Tue, 25 Jan 2022 11:28:26 +0100
Subject: [PATCH 521/946] Revert "[NFC] Added test with select with
 unpredictable metadata; regenerate x86-cmov-converter.ll"

This reverts commit e2f8d28afba0a6545284ad3b54a4b7532c3253b6.
---
 llvm/test/CodeGen/X86/x86-cmov-converter.ll | 333 +++++---------------
 1 file changed, 82 insertions(+), 251 deletions(-)

diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
index 91c1367da1bdf..59f68269381a2 100644
--- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll
+++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -107,33 +106,6 @@
 ; CHECK: jg
 
 define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 {
-; CHECK-LABEL: CmovInHotPath:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    jle .LBB0_5
-; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    movl %edi, %r8d
-; CHECK-NEXT:    xorl %edi, %edi
-; CHECK-NEXT:  .LBB0_2: # %for.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl (%rcx,%rdi,4), %eax
-; CHECK-NEXT:    leal 1(%rax), %r9d
-; CHECK-NEXT:    imull %esi, %eax
-; CHECK-NEXT:    movl $10, %r10d
-; CHECK-NEXT:    cmpl %edx, %eax
-; CHECK-NEXT:    jg .LBB0_4
-; CHECK-NEXT:  # %bb.3: # %for.body
-; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    movl %r9d, %r10d
-; CHECK-NEXT:  .LBB0_4: # %for.body
-; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    imull %r9d, %r10d
-; CHECK-NEXT:    movl %r10d, (%rcx,%rdi,4)
-; CHECK-NEXT:    addq $1, %rdi
-; CHECK-NEXT:    cmpq %rdi, %r8
-; CHECK-NEXT:    jne .LBB0_2
-; CHECK-NEXT:  .LBB0_5: # %for.cond.cleanup
-; CHECK-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 0
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -164,32 +136,6 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: cmovg
 
 define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture %d) #0 {
-; CHECK-LABEL: CmovNotInHotPath:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    jle .LBB1_3
-; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    movl %edx, %r9d
-; CHECK-NEXT:    movl %edi, %r10d
-; CHECK-NEXT:    xorl %edi, %edi
-; CHECK-NEXT:    movl $10, %r11d
-; CHECK-NEXT:  .LBB1_2: # %for.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl (%rcx,%rdi,4), %eax
-; CHECK-NEXT:    movl %eax, %edx
-; CHECK-NEXT:    imull %esi, %edx
-; CHECK-NEXT:    cmpl %r9d, %edx
-; CHECK-NEXT:    cmovgl %r11d, %eax
-; CHECK-NEXT:    movl %eax, (%rcx,%rdi,4)
-; CHECK-NEXT:    movl (%r8,%rdi,4), %eax
-; CHECK-NEXT:    cltd
-; CHECK-NEXT:    idivl %r9d
-; CHECK-NEXT:    movl %eax, (%r8,%rdi,4)
-; CHECK-NEXT:    addq $1, %rdi
-; CHECK-NEXT:    cmpq %rdi, %r10
-; CHECK-NEXT:    jne .LBB1_2
-; CHECK-NEXT:  .LBB1_3: # %for.cond.cleanup
-; CHECK-NEXT:    retq
 entry:
   %cmp18 = icmp sgt i32 %n, 0
   br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
@@ -223,33 +169,6 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: jg
 
 define i32 @MaxIndex(i32 %n, i32* nocapture readonly %a) #0 {
-; CHECK-LABEL: MaxIndex:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    cmpl $2, %edi
-; CHECK-NEXT:    jl .LBB2_5
-; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    movl %edi, %r8d
-; CHECK-NEXT:    xorl %edi, %edi
-; CHECK-NEXT:    movl $1, %edx
-; CHECK-NEXT:  .LBB2_2: # %for.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl (%rsi,%rdx,4), %r9d
-; CHECK-NEXT:    movslq %edi, %rcx
-; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    cmpl (%rsi,%rcx,4), %r9d
-; CHECK-NEXT:    jg .LBB2_4
-; CHECK-NEXT:  # %bb.3: # %for.body
-; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:  .LBB2_4: # %for.body
-; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
-; CHECK-NEXT:    addq $1, %rdx
-; CHECK-NEXT:    movl %eax, %edi
-; CHECK-NEXT:    cmpq %rdx, %r8
-; CHECK-NEXT:    jne .LBB2_2
-; CHECK-NEXT:  .LBB2_5: # %for.cond.cleanup
-; CHECK-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 1
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -283,24 +202,6 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: cmovg
 
 define i32 @MaxValue(i32 %n, i32* nocapture readonly %a) #0 {
-; CHECK-LABEL: MaxValue:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl (%rsi), %eax
-; CHECK-NEXT:    cmpl $2, %edi
-; CHECK-NEXT:    jl .LBB3_3
-; CHECK-NEXT:  # %bb.1: # %for.body.preheader
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    movl $1, %edx
-; CHECK-NEXT:  .LBB3_2: # %for.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl (%rsi,%rdx,4), %edi
-; CHECK-NEXT:    cmpl %eax, %edi
-; CHECK-NEXT:    cmovgl %edi, %eax
-; CHECK-NEXT:    addq $1, %rdx
-; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    jne .LBB3_2
-; CHECK-NEXT:  .LBB3_3: # %for.cond.cleanup
-; CHECK-NEXT:    retq
 entry:
   %0 = load i32, i32* %a, align 4
   %cmp13 = icmp sgt i32 %n, 1
@@ -330,24 +231,6 @@ for.body:                                         ; preds = %for.body.preheader,
 ; CHECK: set
 
 define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %struct.Node* nocapture readonly %Next) #0 {
-; CHECK-LABEL: BinarySearch:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl (%rsi), %eax
-; CHECK-NEXT:    jmp .LBB4_2
-; CHECK-NEXT:  .LBB4_1: # %while.body
-; CHECK-NEXT:    # in Loop: Header=BB4_2 Depth=1
-; CHECK-NEXT:    movl %ecx, %eax
-; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    btl %eax, %edi
-; CHECK-NEXT:    setae %cl
-; CHECK-NEXT:    movq 8(%rdx,%rcx,8), %rdx
-; CHECK-NEXT:  .LBB4_2: # %while.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movl (%rdx), %ecx
-; CHECK-NEXT:    cmpl %ecx, %eax
-; CHECK-NEXT:    ja .LBB4_1
-; CHECK-NEXT:  # %bb.3: # %while.end
-; CHECK-NEXT:    retq
 entry:
   %Val8 = getelementptr inbounds %struct.Node, %struct.Node* %Curr, i64 0, i32 0
   %0 = load i32, i32* %Val8, align 8
@@ -418,39 +301,6 @@ while.end:                                        ; preds = %while.body, %entry
 ; CHECK:         ja
 
 define void @Transform(i32 *%arr, i32 *%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 {
-; CHECK-LABEL: Transform:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    jne .LBB5_5
-; CHECK-NEXT:  # %bb.1: # %while.body.preheader
-; CHECK-NEXT:    movl %edx, %r8d
-; CHECK-NEXT:    xorl %esi, %esi
-; CHECK-NEXT:  .LBB5_2: # %while.body
-; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movslq %esi, %rsi
-; CHECK-NEXT:    movl (%rdi,%rsi,4), %eax
-; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    divl %r8d
-; CHECK-NEXT:    movl %eax, %edx
-; CHECK-NEXT:    movl $11, %eax
-; CHECK-NEXT:    movl %r8d, %ecx
-; CHECK-NEXT:    cmpl %r8d, %edx
-; CHECK-NEXT:    ja .LBB5_4
-; CHECK-NEXT:  # %bb.3: # %while.body
-; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    movl $22, %eax
-; CHECK-NEXT:    movl $22, %ecx
-; CHECK-NEXT:  .LBB5_4: # %while.body
-; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
-; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    divl %ecx
-; CHECK-NEXT:    movl %edx, (%rdi,%rsi,4)
-; CHECK-NEXT:    addl $1, %esi
-; CHECK-NEXT:    cmpl %r9d, %esi
-; CHECK-NEXT:    ja .LBB5_2
-; CHECK-NEXT:  .LBB5_5: # %while.end
-; CHECK-NEXT:    retq
 entry:
   %cmp10 = icmp ugt i32 0, %n
   br i1 %cmp10, label %while.body, label %while.end
@@ -478,35 +328,16 @@ while.end:                                        ; preds = %while.body, %entry
 ; even outside of a loop.
 define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    ja .LBB6_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movl (%rcx), %eax
-; CHECK-NEXT:  .LBB6_2: # %entry
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         movl %edx, %eax
+; CHECK:         cmpl
   %load = load i32, i32* %y
   %z = select i1 %cond, i32 %x, i32 %load
-  ret i32 %z
-}
-
-define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
-; CHECK-LABEL: test_cmov_memoperand_unpredictable:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    ja .LBB6_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movl (%rcx), %eax
-; CHECK-NEXT:  .LBB6_2: # %entry
-; CHECK-NEXT:    retq
-entry:
-  %cond = icmp ugt i32 %a, %b
-  %load = load i32, i32* %y
-  %z = select i1 %cond, i32 %x, i32 %load , !unpredictable !0
+; CHECK-NOT:     cmov
+; CHECK:         ja [[FALSE_BB:.*]]
+; CHECK:         movl (%rcx), %eax
+; CHECK:       [[FALSE_BB]]:
   ret i32 %z
 }
 
@@ -514,25 +345,29 @@ entry:
 ; operand.
 define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    movl %edx, %r8d
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    ja .LBB8_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movl (%rcx), %r8d
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    movl %esi, %edx
-; CHECK-NEXT:  .LBB8_2: # %entry
-; CHECK-NEXT:    addl %r8d, %eax
-; CHECK-NEXT:    addl %edx, %eax
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         movl %edx, %eax
+; CHECK:         cmpl
   %y = load i32, i32* %y.ptr
   %z1 = select i1 %cond, i32 %x, i32 %a
   %z2 = select i1 %cond, i32 %x, i32 %y
   %z3 = select i1 %cond, i32 %x, i32 %b
+; CHECK-NOT:     cmov
+; CHECK:         ja [[FALSE_BB:.*]]
+; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
+; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
+; CHECK-DAG:     movl %{{.*}} %eax
+; CHECK:       [[FALSE_BB]]:
+; CHECK:         addl
+; CHECK-DAG:       %[[R1]]
+; CHECK-DAG:       ,
+; CHECK-DAG:       %eax
+; CHECK-DAG:     addl
+; CHECK-DAG:       %[[R2]]
+; CHECK-DAG:       ,
+; CHECK-DAG:       %eax
+; CHECK:         retq
   %s1 = add i32 %z1, %z2
   %s2 = add i32 %s1, %z3
   ret i32 %s2
@@ -541,25 +376,29 @@ entry:
 ; Same as before but with operands reversed in the select with a load.
 define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group2:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edx, %eax
-; CHECK-NEXT:    movl %edx, %r8d
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    jbe .LBB9_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movl (%rcx), %r8d
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    movl %esi, %edx
-; CHECK-NEXT:  .LBB9_2: # %entry
-; CHECK-NEXT:    addl %r8d, %eax
-; CHECK-NEXT:    addl %edx, %eax
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         movl %edx, %eax
+; CHECK:         cmpl
   %y = load i32, i32* %y.ptr
   %z2 = select i1 %cond, i32 %a, i32 %x
   %z1 = select i1 %cond, i32 %y, i32 %x
   %z3 = select i1 %cond, i32 %b, i32 %x
+; CHECK-NOT:     cmov
+; CHECK:         jbe [[FALSE_BB:.*]]
+; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
+; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
+; CHECK-DAG:     movl %{{.*}} %eax
+; CHECK:       [[FALSE_BB]]:
+; CHECK:         addl
+; CHECK-DAG:       %[[R1]]
+; CHECK-DAG:       ,
+; CHECK-DAG:       %eax
+; CHECK-DAG:     addl
+; CHECK-DAG:       %[[R2]]
+; CHECK-DAG:       ,
+; CHECK-DAG:       %eax
+; CHECK:         retq
   %s1 = add i32 %z1, %z2
   %s2 = add i32 %s1, %z3
   ret i32 %s2
@@ -569,19 +408,15 @@ entry:
 ; loads.
 define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, i32* %y1.ptr, i32* %y2.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_conflicting_dir:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    movl (%rcx), %eax
-; CHECK-NEXT:    cmoval %edx, %eax
-; CHECK-NEXT:    cmoval (%r8), %edx
-; CHECK-NEXT:    addl %edx, %eax
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         cmpl
   %y1 = load i32, i32* %y1.ptr
   %y2 = load i32, i32* %y2.ptr
   %z1 = select i1 %cond, i32 %x, i32 %y1
   %z2 = select i1 %cond, i32 %y2, i32 %x
+; CHECK:         cmoval
+; CHECK:         cmoval
   %s1 = add i32 %z1, %z2
   ret i32 %s1
 }
@@ -591,19 +426,18 @@ entry:
 ; the group.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, i32* %x, i32* %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    ja .LBB11_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movl (%rcx), %eax
-; CHECK-NEXT:  .LBB11_2: # %entry
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         movl %edi, %eax
+; CHECK:         cmpl
   %p = select i1 %cond, i32* %x, i32* %y
   %load = load i32, i32* %p
   %z = select i1 %cond, i32 %a, i32 %load
+; CHECK-NOT:     cmov
+; CHECK:         ja [[FALSE_BB:.*]]
+; CHECK:         movl (%r{{..}}), %eax
+; CHECK:       [[FALSE_BB]]:
+; CHECK:         retq
   ret i32 %z
 }
 
@@ -611,21 +445,20 @@ entry:
 ; uses the result of the other as part of the address.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, i32* %x, i32** %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    ja .LBB12_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movq (%rcx), %rax
-; CHECK-NEXT:    movl (%rax), %eax
-; CHECK-NEXT:  .LBB12_2: # %entry
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         movl %edi, %eax
+; CHECK:         cmpl
   %load1 = load i32*, i32** %y
   %p = select i1 %cond, i32* %x, i32* %load1
   %load2 = load i32, i32* %p
   %z = select i1 %cond, i32 %a, i32 %load2
+; CHECK-NOT:     cmov
+; CHECK:         ja [[FALSE_BB:.*]]
+; CHECK:         movq (%r{{..}}), %[[R1:.*]]
+; CHECK:         movl (%[[R1]]), %eax
+; CHECK:       [[FALSE_BB]]:
+; CHECK:         retq
   ret i32 %z
 }
 
@@ -634,20 +467,19 @@ entry:
 ; where that cmov gets *its* input from a prior cmov in the group.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, i32* %x, i32* %y, i32* %z) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    cmpl %esi, %edi
-; CHECK-NEXT:    ja .LBB13_2
-; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    movl (%rcx), %eax
-; CHECK-NEXT:  .LBB13_2: # %entry
-; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
+; CHECK:         movl %edi, %eax
+; CHECK:         cmpl
   %p = select i1 %cond, i32* %x, i32* %y
   %p2 = select i1 %cond, i32* %z, i32* %p
   %load = load i32, i32* %p2
   %r = select i1 %cond, i32 %a, i32 %load
+; CHECK-NOT:     cmov
+; CHECK:         ja [[FALSE_BB:.*]]
+; CHECK:         movl (%r{{..}}), %eax
+; CHECK:       [[FALSE_BB]]:
+; CHECK:         retq
   ret i32 %r
 }
 
@@ -663,36 +495,34 @@ define void @test_memoperand_loop(i32 %data) #0 {
 ; CHECK-NEXT:    movq (%rcx), %rdx
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    movq %rax, %rcx
-; CHECK-NEXT:  .LBB14_1: # %loop.body
+entry:
+  %begin = load i32*, i32** @begin, align 8
+  %end = load i32*, i32** @end, align 8
+  br label %loop.body
+
+; CHECK-NEXT:  .LBB13_1: # %loop.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    addq $8, %rcx
 ; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    ja .LBB14_3
+; CHECK-NEXT:    ja .LBB13_3
 ; CHECK-NEXT:  # %bb.2: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
 ; CHECK-NEXT:    movq (%r8), %rcx
-; CHECK-NEXT:  .LBB14_3: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
+; CHECK-NEXT:  .LBB13_3: # %loop.body
+; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
 ; CHECK-NEXT:    movl %edi, (%rcx)
 ; CHECK-NEXT:    addq $8, %rcx
 ; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    ja .LBB14_5
+; CHECK-NEXT:    ja .LBB13_5
 ; CHECK-NEXT:  # %bb.4: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
 ; CHECK-NEXT:    movq %rax, %rcx
-; CHECK-NEXT:  .LBB14_5: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB14_1 Depth=1
+; CHECK-NEXT:  .LBB13_5: # %loop.body
+; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
 ; CHECK-NEXT:    movl %edi, (%rcx)
 ; CHECK-NEXT:    addl $1, %esi
 ; CHECK-NEXT:    cmpl $1024, %esi # imm = 0x400
-; CHECK-NEXT:    jl .LBB14_1
-; CHECK-NEXT:  # %bb.6: # %exit
-; CHECK-NEXT:    retq
-entry:
-  %begin = load i32*, i32** @begin, align 8
-  %end = load i32*, i32** @end, align 8
-  br label %loop.body
-
+; CHECK-NEXT:    jl .LBB13_1
 loop.body:
   %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
   %phi.ptr = phi i32* [ %begin, %entry ], [ %dst2, %loop.body ]
@@ -709,9 +539,10 @@ loop.body:
   %cond = icmp slt i32 %iv.next, 1024
   br i1 %cond, label %loop.body, label %exit
 
+; CHECK-NEXT:  # %bb.6: # %exit
+; CHECK-NEXT:    retq
 exit:
   ret void
 }
 
 attributes #0 = {"target-cpu"="x86-64"}
-!0 = !{}

From 5f5c5603ce40e9372c108645be92828d4fe6df6f Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser@codeplay.com>
Date: Mon, 24 Jan 2022 11:53:29 +0000
Subject: [PATCH 522/946] [SelectionDAG][VP] Add splitting support for VP_MERGE

This patch adds splitting support for ISD::VP_MERGE, which splits
identically to VP_SELECT and similarly to other select-like nodes.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D118032
---
 .../SelectionDAG/LegalizeTypesGeneric.cpp     |   2 +-
 .../SelectionDAG/LegalizeVectorTypes.cpp      |   1 +
 .../RISCV/rvv/fixed-vectors-vpmerge.ll        | 152 ++++++++++++++-
 llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 175 +++++++++++++++++-
 4 files changed, 325 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index c3a77879f225b..c6885677d6448 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -540,7 +540,7 @@ void DAGTypeLegalizer::SplitRes_Select(SDNode *N, SDValue &Lo, SDValue &Hi) {
       std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
   }
 
-  if (Opcode != ISD::VP_SELECT) {
+  if (Opcode != ISD::VP_SELECT && Opcode != ISD::VP_MERGE) {
     Lo = DAG.getNode(Opcode, dl, LL.getValueType(), CL, LL, RL);
     Hi = DAG.getNode(Opcode, dl, LH.getValueType(), CH, LH, RH);
     return;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f8f3d2ce8e45d..5dd5db2ad51f0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -915,6 +915,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
   case ISD::VSELECT:
   case ISD::SELECT:
+  case ISD::VP_MERGE:
   case ISD::VP_SELECT:    SplitRes_Select(N, Lo, Hi); break;
   case ISD::SELECT_CC:    SplitRes_SELECT_CC(N, Lo, Hi); break;
   case ISD::UNDEF:        SplitRes_UNDEF(N, Lo, Hi); break;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index e0cc13db7f118..e904e8a4d495c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <2 x i8> @llvm.vp.merge.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32)
@@ -951,3 +951,151 @@ define <16 x double> @vpmerge_vf_v16f64(double %a, <16 x double> %vb, <16 x i1>
   %v = call <16 x double> @llvm.vp.merge.v16f64(<16 x i1> %m, <16 x double> %va, <16 x double> %vb, i32 %evl)
   ret <16 x double> %v
 }
+
+declare <32 x double> @llvm.vp.merge.v32f64(<32 x i1>, <32 x double>, <32 x double>, i32)
+
+define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_vv_v32f64:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a1, a1, a3
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    addi a1, a0, 128
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV32-NEXT:    vle64.v v24, (a1)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a3, a2, -16
+; RV32-NEXT:    vmv1r.v v1, v0
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    li a1, 0
+; RV32-NEXT:    bltu a2, a3, .LBB72_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a1, a3
+; RV32-NEXT:  .LBB72_2:
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV32-NEXT:    vslidedown.vi v0, v1, 2
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
+; RV32-NEXT:    li a0, 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8re8.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vmerge.vvm v16, v16, v24, v0
+; RV32-NEXT:    bltu a2, a0, .LBB72_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:  .LBB72_4:
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, tu, mu
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vmerge.vvm v8, v8, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_vv_v32f64:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    addi a1, a0, 128
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
+; RV64-NEXT:    vle64.v v24, (a1)
+; RV64-NEXT:    addi a3, a2, -16
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vmv1r.v v1, v0
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    li a1, 0
+; RV64-NEXT:    bltu a2, a3, .LBB72_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a1, a3
+; RV64-NEXT:  .LBB72_2:
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; RV64-NEXT:    vslidedown.vi v0, v1, 2
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
+; RV64-NEXT:    li a0, 16
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vmerge.vvm v24, v24, v16, v0
+; RV64-NEXT:    bltu a2, a0, .LBB72_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    li a2, 16
+; RV64-NEXT:  .LBB72_4:
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, tu, mu
+; RV64-NEXT:    vmv1r.v v0, v1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vmerge.vvm v8, v8, v16, v0
+; RV64-NEXT:    vmv8r.v v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl)
+  ret <32 x double> %v
+}
+
+define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vf_v32f64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a2, a0, -16
+; CHECK-NEXT:    vmv1r.v v24, v0
+; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    bltu a0, a2, .LBB73_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a1, a2
+; CHECK-NEXT:  .LBB73_2:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
+; CHECK-NEXT:    vslidedown.vi v0, v24, 2
+; CHECK-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
+; CHECK-NEXT:    li a1, 16
+; CHECK-NEXT:    vfmerge.vfm v16, v16, fa0, v0
+; CHECK-NEXT:    bltu a0, a1, .LBB73_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    li a0, 16
+; CHECK-NEXT:  .LBB73_4:
+; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <32 x double> poison, double %a, i32 0
+  %va = shufflevector <32 x double> %elt.head, <32 x double> poison, <32 x i32> zeroinitializer
+  %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl)
+  ret <32 x double> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 46ebd0a6acf50..653217a20c54e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+v,+m -target-abi=ilp32d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
@@ -263,6 +263,177 @@ define <vscale x 64 x i8> @vpmerge_vi_nxv64i8(<vscale x 64 x i8> %vb, <vscale x
   ret <vscale x 64 x i8> %v
 }
 
+declare <vscale x 128 x i8> @llvm.vp.merge.nxv128i8(<vscale x 128 x i1>, <vscale x 128 x i8>, <vscale x 128 x i8>, i32)
+
+define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_vv_nxv128i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a4, 24
+; RV32-NEXT:    mul a1, a1, a4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a4, a0, a1
+; RV32-NEXT:    vl8r.v v24, (a4)
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 3
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 16
+; RV32-NEXT:    vs8r.v v24, (a4) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli a4, zero, e8, m8, ta, mu
+; RV32-NEXT:    vlm.v v2, (a2)
+; RV32-NEXT:    sub a4, a3, a1
+; RV32-NEXT:    vmv1r.v v1, v0
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    li a2, 0
+; RV32-NEXT:    bltu a3, a4, .LBB21_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    mv a2, a4
+; RV32-NEXT:  .LBB21_2:
+; RV32-NEXT:    vl8r.v v8, (a0)
+; RV32-NEXT:    vsetvli zero, a2, e8, m8, tu, mu
+; RV32-NEXT:    vmv1r.v v0, v2
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vmerge.vvm v16, v16, v24, v0
+; RV32-NEXT:    bltu a3, a1, .LBB21_4
+; RV32-NEXT:  # %bb.3:
+; RV32-NEXT:    mv a3, a1
+; RV32-NEXT:  .LBB21_4:
+; RV32-NEXT:    vsetvli zero, a3, e8, m8, tu, mu
+; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8re8.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vmerge.vvm v8, v8, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_vv_nxv128i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a4, a0, a1
+; RV64-NEXT:    vl8r.v v24, (a4)
+; RV64-NEXT:    vsetvli a4, zero, e8, m8, ta, mu
+; RV64-NEXT:    vlm.v v2, (a2)
+; RV64-NEXT:    sub a4, a3, a1
+; RV64-NEXT:    vmv1r.v v1, v0
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    bltu a3, a4, .LBB21_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a2, a4
+; RV64-NEXT:  .LBB21_2:
+; RV64-NEXT:    vl8r.v v8, (a0)
+; RV64-NEXT:    vsetvli zero, a2, e8, m8, tu, mu
+; RV64-NEXT:    vmv1r.v v0, v2
+; RV64-NEXT:    vmerge.vvm v24, v24, v16, v0
+; RV64-NEXT:    bltu a3, a1, .LBB21_4
+; RV64-NEXT:  # %bb.3:
+; RV64-NEXT:    mv a3, a1
+; RV64-NEXT:  .LBB21_4:
+; RV64-NEXT:    vsetvli zero, a3, e8, m8, tu, mu
+; RV64-NEXT:    vmv1r.v v0, v1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8re8.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vmerge.vvm v8, v8, v16, v0
+; RV64-NEXT:    vmv8r.v v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    ret
+  %v = call <vscale x 128 x i8> @llvm.vp.merge.nxv128i8(<vscale x 128 x i1> %m, <vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, i32 %evl)
+  ret <vscale x 128 x i8> %v
+}
+
+define <vscale x 128 x i8> @vpmerge_vx_nxv128i8(i8 %a, <vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv128i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a3, vlenb
+; CHECK-NEXT:    slli a3, a3, 3
+; CHECK-NEXT:    mv a4, a2
+; CHECK-NEXT:    bltu a2, a3, .LBB22_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a4, a3
+; CHECK-NEXT:  .LBB22_2:
+; CHECK-NEXT:    li a5, 0
+; CHECK-NEXT:    vsetvli a6, zero, e8, m8, ta, mu
+; CHECK-NEXT:    vlm.v v24, (a1)
+; CHECK-NEXT:    vsetvli zero, a4, e8, m8, tu, mu
+; CHECK-NEXT:    sub a1, a2, a3
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    bltu a2, a1, .LBB22_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    mv a5, a1
+; CHECK-NEXT:  .LBB22_4:
+; CHECK-NEXT:    vsetvli zero, a5, e8, m8, tu, mu
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmerge.vxm v16, v16, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 128 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+  %v = call <vscale x 128 x i8> @llvm.vp.merge.nxv128i8(<vscale x 128 x i1> %m, <vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, i32 %evl)
+  ret <vscale x 128 x i8> %v
+}
+
+define <vscale x 128 x i8> @vpmerge_vi_nxv128i8(<vscale x 128 x i8> %vb, <vscale x 128 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv128i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    csrr a2, vlenb
+; CHECK-NEXT:    slli a2, a2, 3
+; CHECK-NEXT:    mv a3, a1
+; CHECK-NEXT:    bltu a1, a2, .LBB23_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    mv a3, a2
+; CHECK-NEXT:  .LBB23_2:
+; CHECK-NEXT:    li a4, 0
+; CHECK-NEXT:    vsetvli a5, zero, e8, m8, ta, mu
+; CHECK-NEXT:    vlm.v v24, (a0)
+; CHECK-NEXT:    vsetvli zero, a3, e8, m8, tu, mu
+; CHECK-NEXT:    sub a0, a1, a2
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    bltu a1, a0, .LBB23_4
+; CHECK-NEXT:  # %bb.3:
+; CHECK-NEXT:    mv a4, a0
+; CHECK-NEXT:  .LBB23_4:
+; CHECK-NEXT:    vsetvli zero, a4, e8, m8, tu, mu
+; CHECK-NEXT:    vmv1r.v v0, v24
+; CHECK-NEXT:    vmerge.vim v16, v16, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 128 x i8> poison, i8 2, i32 0
+  %va = shufflevector <vscale x 128 x i8> %elt.head, <vscale x 128 x i8> poison, <vscale x 128 x i32> zeroinitializer
+  %v = call <vscale x 128 x i8> @llvm.vp.merge.nxv128i8(<vscale x 128 x i1> %m, <vscale x 128 x i8> %va, <vscale x 128 x i8> %vb, i32 %evl)
+  ret <vscale x 128 x i8> %v
+}
+
 declare <vscale x 1 x i16> @llvm.vp.merge.nxv1i16(<vscale x 1 x i1>, <vscale x 1 x i16>, <vscale x 1 x i16>, i32)
 
 define <vscale x 1 x i16> @vpmerge_vv_nxv1i16(<vscale x 1 x i16> %va, <vscale x 1 x i16> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {

From 19d3dc6e226c0714d08c5cd130d8f5ba63bbd4f2 Mon Sep 17 00:00:00 2001
From: Victor Perez <victor.perez@codeplay.com>
Date: Tue, 25 Jan 2022 10:34:58 +0000
Subject: [PATCH 523/946] [VP] Update CodeGen/RISCV/rvv/vpgather-sdnode.ll test

---
 .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll | 56 +++++++++----------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
index f71a2f86fee7d..4c3636e315825 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -287,62 +287,62 @@ define <vscale x 32 x i8> @vpgather_baseidx_nxv32i8(i8* %base, <vscale x 32 x i8
 ; RV64-LABEL: vpgather_baseidx_nxv32i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    csrr a3, vlenb
-; RV64-NEXT:    slli t0, a3, 1
-; RV64-NEXT:    sub a4, a1, t0
+; RV64-NEXT:    slli a5, a3, 1
+; RV64-NEXT:    sub a6, a1, a5
 ; RV64-NEXT:    vmv1r.v v12, v0
-; RV64-NEXT:    li t1, 0
-; RV64-NEXT:    li a7, 0
-; RV64-NEXT:    bltu a1, a4, .LBB12_2
+; RV64-NEXT:    li a4, 0
+; RV64-NEXT:    li a2, 0
+; RV64-NEXT:    bltu a1, a6, .LBB12_2
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a7, a4
+; RV64-NEXT:    mv a2, a6
 ; RV64-NEXT:  .LBB12_2:
-; RV64-NEXT:    sub a4, a7, a3
-; RV64-NEXT:    mv a2, t1
-; RV64-NEXT:    bltu a7, a4, .LBB12_4
+; RV64-NEXT:    sub a6, a2, a3
+; RV64-NEXT:    mv a7, a4
+; RV64-NEXT:    bltu a2, a6, .LBB12_4
 ; RV64-NEXT:  # %bb.3:
-; RV64-NEXT:    mv a2, a4
+; RV64-NEXT:    mv a7, a6
 ; RV64-NEXT:  .LBB12_4:
-; RV64-NEXT:    srli a4, a3, 2
-; RV64-NEXT:    vsetvli a5, zero, e8, mf2, ta, mu
-; RV64-NEXT:    vslidedown.vx v13, v12, a4
+; RV64-NEXT:    srli a6, a3, 2
+; RV64-NEXT:    vsetvli t0, zero, e8, mf2, ta, mu
+; RV64-NEXT:    vslidedown.vx v13, v12, a6
 ; RV64-NEXT:    srli a6, a3, 3
-; RV64-NEXT:    vsetvli a4, zero, e8, mf4, ta, mu
+; RV64-NEXT:    vsetvli t0, zero, e8, mf4, ta, mu
 ; RV64-NEXT:    vslidedown.vx v0, v13, a6
-; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsetvli t0, zero, e64, m8, ta, mu
 ; RV64-NEXT:    vsext.vf8 v24, v11
-; RV64-NEXT:    vsetvli zero, a2, e8, m1, ta, mu
+; RV64-NEXT:    vsetvli zero, a7, e8, m1, ta, mu
 ; RV64-NEXT:    vluxei64.v v19, (a0), v24, v0.t
-; RV64-NEXT:    bltu a1, t0, .LBB12_6
+; RV64-NEXT:    bltu a1, a5, .LBB12_6
 ; RV64-NEXT:  # %bb.5:
-; RV64-NEXT:    mv a1, t0
+; RV64-NEXT:    mv a1, a5
 ; RV64-NEXT:  .LBB12_6:
-; RV64-NEXT:    sub a2, a1, a3
-; RV64-NEXT:    bltu a1, a2, .LBB12_8
+; RV64-NEXT:    sub a5, a1, a3
+; RV64-NEXT:    bltu a1, a5, .LBB12_8
 ; RV64-NEXT:  # %bb.7:
-; RV64-NEXT:    mv t1, a2
+; RV64-NEXT:    mv a4, a5
 ; RV64-NEXT:  .LBB12_8:
-; RV64-NEXT:    vsetvli a2, zero, e8, mf4, ta, mu
+; RV64-NEXT:    vsetvli a5, zero, e8, mf4, ta, mu
 ; RV64-NEXT:    vslidedown.vx v0, v12, a6
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsetvli a5, zero, e64, m8, ta, mu
 ; RV64-NEXT:    vsext.vf8 v24, v9
-; RV64-NEXT:    vsetvli zero, t1, e8, m1, ta, mu
+; RV64-NEXT:    vsetvli zero, a4, e8, m1, ta, mu
 ; RV64-NEXT:    vluxei64.v v17, (a0), v24, v0.t
 ; RV64-NEXT:    bltu a1, a3, .LBB12_10
 ; RV64-NEXT:  # %bb.9:
 ; RV64-NEXT:    mv a1, a3
 ; RV64-NEXT:  .LBB12_10:
-; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, mu
 ; RV64-NEXT:    vsext.vf8 v24, v8
 ; RV64-NEXT:    vsetvli zero, a1, e8, m1, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v12
 ; RV64-NEXT:    vluxei64.v v16, (a0), v24, v0.t
-; RV64-NEXT:    bltu a7, a3, .LBB12_12
+; RV64-NEXT:    bltu a2, a3, .LBB12_12
 ; RV64-NEXT:  # %bb.11:
-; RV64-NEXT:    mv a7, a3
+; RV64-NEXT:    mv a2, a3
 ; RV64-NEXT:  .LBB12_12:
 ; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
 ; RV64-NEXT:    vsext.vf8 v24, v10
-; RV64-NEXT:    vsetvli zero, a7, e8, m1, ta, mu
+; RV64-NEXT:    vsetvli zero, a2, e8, m1, ta, mu
 ; RV64-NEXT:    vmv1r.v v0, v13
 ; RV64-NEXT:    vluxei64.v v18, (a0), v24, v0.t
 ; RV64-NEXT:    vmv4r.v v8, v16

From 2074eef5db330ab437410bfb617b58ea70f4fbff Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Tue, 25 Jan 2022 11:46:02 +0100
Subject: [PATCH 524/946] [bazel] Adjust dependencies after a70aa7bb

These are all picked up transitively, but fail with
--features=layering_check, which enforces header dependencies.
---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel      | 1 +
 utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 436e7a79833ec..03e54e2726f19 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -8018,6 +8018,7 @@ cc_library(
     deps = [
         ":AllocationOpInterface",
         ":Analysis",
+        ":ArithmeticDialect",
         ":BufferizationDialect",
         ":BufferizationPassIncGen",
         ":ControlFlowInterfaces",
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index 7bc92df875c46..79ba323efef07 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -332,6 +332,8 @@ cc_library(
         "//mlir:IR",
         "//mlir:MemRefDialect",
         "//mlir:Pass",
+        "//mlir:SCFDialect",
+        "//mlir:StandardOps",
         "//mlir:Support",
         "//mlir:Transforms",
         "//mlir:VectorOps",

From f302e0b5dd402e629620a58f9115a3441c65d60f Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham@arm.com>
Date: Tue, 25 Jan 2022 10:37:19 +0000
Subject: [PATCH 525/946] [AArch64] Exclude optional features from HasV8_0rOps.

The following SubtargetFeatures are removed from the definition of
HasV8_0rOps, on the grounds that they are optional in Armv8.4-A, and
therefore (by the definition of Armv8.0-R) also optional in v8.0-R:

 * performance monitoring: FeaturePerfMon
 * cryptography: FeatureSM4 and FeatureSHA3
 * half-precision FP: FeatureFullFP16, FeatureFP16FML
 * speculation control: FeatureSSBS, FeaturePredRes, FeatureSB,
   FeatureSpecRestrict

This isn't the full set of features that are listed as optional in the
spec. FeatureCCIDX and FeatureTRACEV8_4 are also optional. But LLVM
includes those in HasV8_3aOps and HasV8_4aOps respectively (I think on
the grounds that the system registers they enable are useful to be
able to access after a runtime check), and so for consistency, I've
left those in HasV8_0rOps too.

After this commit, HasV8_0rOps is a strict subset of HasV8_4aOps (but
missing features that are not in Armv8.0-R at all).

The definition of Cortex-R82 is correspondingly updated to add most of
the features that I've removed from base Armv8.0-R (with the exception
of the cryptography ones), since that particular implementation of
v8.0-R does have them.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D118045
---
 llvm/lib/Target/AArch64/AArch64.td                  | 13 ++++++-------
 llvm/test/MC/AArch64/armv8.2a-crypto.s              |  3 ++-
 .../MC/Disassembler/AArch64/armv8.3a-complex.txt    |  3 ++-
 .../MC/Disassembler/AArch64/armv8.5a-predres.txt    |  2 +-
 .../Disassembler/AArch64/armv8.5a-specrestrict.txt  |  2 +-
 llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt |  2 +-
 llvm/test/MC/Disassembler/AArch64/armv8a-fpmul.txt  |  1 -
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index 0a2d88c12338b..b87468d5c8de2 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -528,16 +528,13 @@ def HasV8_0rOps : SubtargetFeature<
   [//v8.1
   FeatureCRC, FeaturePAN, FeatureRDM, FeatureLSE, FeatureCONTEXTIDREL2,
   //v8.2
-  FeaturePerfMon, FeatureRAS, FeaturePsUAO, FeatureSM4,
-  FeatureSHA3, FeatureCCPP, FeatureFullFP16, FeaturePAN_RWV,
+  FeatureRAS, FeaturePsUAO, FeatureCCPP, FeaturePAN_RWV,
   //v8.3
   FeatureComplxNum, FeatureCCIDX, FeatureJS,
   FeaturePAuth, FeatureRCPC,
   //v8.4
-  FeatureDotProd, FeatureFP16FML, FeatureTRACEV8_4,
-  FeatureTLB_RMI, FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO,
-  //v8.5
-  FeatureSSBS, FeaturePredRes, FeatureSB, FeatureSpecRestrict]>;
+  FeatureDotProd, FeatureTRACEV8_4, FeatureTLB_RMI,
+  FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
@@ -969,7 +966,9 @@ def ProcessorFeatures {
   list<SubtargetFeature> A710 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon,
                                  FeatureETE, FeatureMTE, FeatureFP16FML,
                                  FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8];
-  list<SubtargetFeature> R82  = [HasV8_0rOps];
+  list<SubtargetFeature> R82  = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16,
+                                 FeatureFP16FML, FeatureSSBS, FeaturePredRes,
+                                 FeatureSB, FeatureSpecRestrict];
   list<SubtargetFeature> X1   = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
                                  FeatureNEON, FeatureRCPC, FeaturePerfMon,
                                  FeatureSPE, FeatureFullFP16, FeatureDotProd];
diff --git a/llvm/test/MC/AArch64/armv8.2a-crypto.s b/llvm/test/MC/AArch64/armv8.2a-crypto.s
index 8a1052037c683..93e84d5830b80 100644
--- a/llvm/test/MC/AArch64/armv8.2a-crypto.s
+++ b/llvm/test/MC/AArch64/armv8.2a-crypto.s
@@ -7,7 +7,8 @@
 // RUN: not llvm-mc -triple aarch64 -show-encoding < %s 2> %t
 // RUN: FileCheck --check-prefix=CHECK-NO-SHA --check-prefix=CHECK-NO-SM < %t %s
 
-// RUN: llvm-mc -triple aarch64 -mattr=+v8r -show-encoding -o - %s | FileCheck %s --check-prefixes=CHECK-SM,CHECK-SHA
+// RUN: not llvm-mc -triple aarch64 -mattr=+v8r -show-encoding -o - %s < %s 2> %t
+// RUN: FileCheck --check-prefix=CHECK-NO-SHA --check-prefix=CHECK-NO-SM < %t %s
 
   sha512h   q0, q1, v2.2d
   sha512h2  q0, q1, v2.2d
diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.3a-complex.txt b/llvm/test/MC/Disassembler/AArch64/armv8.3a-complex.txt
index b62d424eefe00..f920639173ad2 100644
--- a/llvm/test/MC/Disassembler/AArch64/armv8.3a-complex.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8.3a-complex.txt
@@ -1,7 +1,8 @@
 # RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.3a,-fullfp16 --disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK
 # RUN: FileCheck %s < %t --check-prefix=NO-FP16
 # RUN:     llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8.3a,+fullfp16 --disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
-# RUN:     llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8r --disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK --check-prefix=FP16
+# RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8r --disassemble < %s 2>%t | FileCheck %s --check-prefix=CHECK
+# RUN: FileCheck %s < %t --check-prefix=NO-FP16
 # RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=-v8.3a,+fullfp16 --disassemble < %s 2>&1 | FileCheck %s --check-prefix=NO-V83A
 
 ###### FCMLA vector
diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.5a-predres.txt b/llvm/test/MC/Disassembler/AArch64/armv8.5a-predres.txt
index 45bb17bdba840..e1b10b16a0f43 100644
--- a/llvm/test/MC/Disassembler/AArch64/armv8.5a-predres.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8.5a-predres.txt
@@ -1,6 +1,6 @@
 # RUN: llvm-mc -triple=aarch64 -mattr=+predres -disassemble < %s      | FileCheck %s
 # RUN: llvm-mc -triple=aarch64 -mattr=+v8.5a    -disassemble < %s      | FileCheck %s
-# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s --check-prefix=NOSB
 # RUN: llvm-mc -triple=aarch64 -mattr=-predres -disassemble < %s 2>&1 | FileCheck %s --check-prefix=NOSB
 
 [0x80 0x73 0x0b 0xd5]
diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.5a-specrestrict.txt b/llvm/test/MC/Disassembler/AArch64/armv8.5a-specrestrict.txt
index 8662b905a3f99..3301680b1fd8d 100644
--- a/llvm/test/MC/Disassembler/AArch64/armv8.5a-specrestrict.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8.5a-specrestrict.txt
@@ -1,6 +1,6 @@
 # RUN: llvm-mc -triple=aarch64 -mattr=+specrestrict -disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple=aarch64 -mattr=+v8.5a        -disassemble < %s | FileCheck %s
-# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s --check-prefix=NOSPECID
 # RUN: llvm-mc -triple=aarch64 -mattr=-specrestrict -disassemble < %s | FileCheck %s --check-prefix=NOSPECID
 
 [0x81 0x03 0x38 0xd5]
diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt b/llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt
index 3efec07fdcb14..7698751c88076 100644
--- a/llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8.5a-ssbs.txt
@@ -2,7 +2,7 @@
 # RUN: llvm-mc -triple=aarch64 -mattr=+v8.5a -disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple=aarch64 -mcpu=cortex-a76 -disassemble < %s | FileCheck %s
 # RUN: llvm-mc -triple=aarch64 -mcpu=cortex-a76ae -disassemble < %s | FileCheck %s
-# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s --check-prefix=NOSPECID
 # RUN: llvm-mc -triple=aarch64 -mattr=-ssbs  -disassemble < %s | FileCheck %s --check-prefix=NOSPECID
 
 [0x3f 0x41 0x03 0xd5]
diff --git a/llvm/test/MC/Disassembler/AArch64/armv8a-fpmul.txt b/llvm/test/MC/Disassembler/AArch64/armv8a-fpmul.txt
index 57520c99cb80c..5265df1ec7920 100644
--- a/llvm/test/MC/Disassembler/AArch64/armv8a-fpmul.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv8a-fpmul.txt
@@ -1,6 +1,5 @@
 # RUN:     llvm-mc -triple aarch64-none-linux-gnu -mattr=+fp16fml           --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,FP16
 # RUN:     llvm-mc -triple aarch64-none-linux-gnu -mattr=-fullfp16,+fp16fml --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,FP16
-# RUN:     llvm-mc -triple aarch64-none-linux-gnu -mattr=+v8r --disassemble < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,FP16
 
 #A fullfp16 instruction, for testing the interaction of the features
 [0x41,0x08,0xe3,0x1e]

From 99adacbcb7895114a62266c8a15e794bacd2380c Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 12:04:06 +0100
Subject: [PATCH 526/946] [clang] Remove some getPointerElementType() uses

Same cases where the call can be removed in a straightforward way.
---
 clang/lib/CodeGen/CGAtomic.cpp           |  8 ++++----
 clang/lib/CodeGen/CGCall.h               |  3 ++-
 clang/lib/CodeGen/CGExprScalar.cpp       |  5 +++--
 clang/lib/CodeGen/CGOpenMPRuntime.cpp    |  5 +----
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 13 +++++++------
 clang/lib/CodeGen/TargetInfo.cpp         | 14 ++++++++------
 6 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 6532f02879612..10569ae2c3f91 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -351,12 +351,12 @@ bool AtomicInfo::requiresMemSetZero(llvm::Type *type) const {
 
 bool AtomicInfo::emitMemSetZeroIfNecessary() const {
   assert(LVal.isSimple());
-  llvm::Value *addr = LVal.getPointer(CGF);
-  if (!requiresMemSetZero(addr->getType()->getPointerElementType()))
+  Address addr = LVal.getAddress(CGF);
+  if (!requiresMemSetZero(addr.getElementType()))
     return false;
 
   CGF.Builder.CreateMemSet(
-      addr, llvm::ConstantInt::get(CGF.Int8Ty, 0),
+      addr.getPointer(), llvm::ConstantInt::get(CGF.Int8Ty, 0),
       CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits).getQuantity(),
       LVal.getAlignment().getAsAlign());
   return true;
@@ -1522,7 +1522,7 @@ RValue AtomicInfo::ConvertIntToValueOrAtomic(llvm::Value *IntVal,
        !AsValue)) {
     auto *ValTy = AsValue
                       ? CGF.ConvertTypeForMem(ValueTy)
-                      : getAtomicAddress().getType()->getPointerElementType();
+                      : getAtomicAddress().getElementType();
     if (ValTy->isIntegerTy()) {
       assert(IntVal->getType() == ValTy && "Different integer types.");
       return RValue::get(CGF.EmitFromMemory(IntVal, ValueTy));
diff --git a/clang/lib/CodeGen/CGCall.h b/clang/lib/CodeGen/CGCall.h
index d2bb104a3f9d3..af63e1bddd2d6 100644
--- a/clang/lib/CodeGen/CGCall.h
+++ b/clang/lib/CodeGen/CGCall.h
@@ -112,7 +112,8 @@ class CGCallee {
     assert(functionPtr && "configuring callee without function pointer");
     assert(functionPtr->getType()->isPointerTy());
     assert(functionPtr->getType()->isOpaquePointerTy() ||
-           functionPtr->getType()->getPointerElementType()->isFunctionTy());
+           functionPtr->getType()->getNonOpaquePointerElementType()
+               ->isFunctionTy());
   }
 
   static CGCallee forBuiltin(unsigned builtinID,
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index e32462eb635cd..4e8933fffe03b 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1613,8 +1613,9 @@ ScalarExprEmitter::VisitSYCLUniqueStableNameExpr(SYCLUniqueStableNameExpr *E) {
   if (GlobalConstStr->getType()->getPointerAddressSpace() == ExprAS)
     return GlobalConstStr;
 
-  llvm::Type *EltTy = GlobalConstStr->getType()->getPointerElementType();
-  llvm::PointerType *NewPtrTy = llvm::PointerType::get(EltTy, ExprAS);
+  llvm::PointerType *PtrTy = cast<llvm::PointerType>(GlobalConstStr->getType());
+  llvm::PointerType *NewPtrTy =
+      llvm::PointerType::getWithSamePointeeType(PtrTy, ExprAS);
   return Builder.CreateAddrSpaceCast(GlobalConstStr, NewPtrTy, "usn_addr_cast");
 }
 
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index fd956aabc717f..c8c08060e729e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -837,10 +837,7 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
   }
   llvm::Value *Size;
   llvm::Value *SizeInChars;
-  auto *ElemType = OrigAddresses[N]
-                       .first.getPointer(CGF)
-                       ->getType()
-                       ->getPointerElementType();
+  auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
   if (AsArraySection) {
     Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 594c7fef36a77..7c8e4e6b52a08 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -3401,12 +3401,13 @@ CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF,
       LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation());
   // First cast to generic.
   TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
-                      /*AddrSpace=*/0));
+      TargetAddr, llvm::PointerType::getWithSamePointeeType(
+          cast<llvm::PointerType>(TargetAddr->getType()), /*AddrSpace=*/0));
   // Cast from generic to native address space.
   TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
-                      NativePointeeAddrSpace));
+      TargetAddr, llvm::PointerType::getWithSamePointeeType(
+          cast<llvm::PointerType>(TargetAddr->getType()),
+                                  NativePointeeAddrSpace));
   Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType);
   CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false,
                         NativeParamType);
@@ -3431,8 +3432,8 @@ void CGOpenMPRuntimeGPU::emitOutlinedFunctionCall(
       continue;
     }
     llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-        NativeArg,
-        NativeArg->getType()->getPointerElementType()->getPointerTo());
+        NativeArg, llvm::PointerType::getWithSamePointeeType(
+            cast<llvm::PointerType>(NativeArg->getType()), /*AddrSpace*/ 0));
     TargetArgs.emplace_back(
         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
   }
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index fd9a7e602833a..fb81169003fc0 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -8936,8 +8936,9 @@ class AMDGPUABIInfo final : public DefaultABIInfo {
   llvm::Type *coerceKernelArgumentType(llvm::Type *Ty, unsigned FromAS,
                                        unsigned ToAS) const {
     // Single value types.
-    if (Ty->isPointerTy() && Ty->getPointerAddressSpace() == FromAS)
-      return llvm::PointerType::get(Ty->getPointerElementType(), ToAS);
+    auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(Ty);
+    if (PtrTy && PtrTy->getAddressSpace() == FromAS)
+      return llvm::PointerType::getWithSamePointeeType(PtrTy, ToAS);
     return Ty;
   }
 
@@ -9333,8 +9334,8 @@ llvm::Constant *AMDGPUTargetCodeGenInfo::getNullPointer(
     return llvm::ConstantPointerNull::get(PT);
 
   auto &Ctx = CGM.getContext();
-  auto NPT = llvm::PointerType::get(PT->getPointerElementType(),
-      Ctx.getTargetAddressSpace(LangAS::opencl_generic));
+  auto NPT = llvm::PointerType::getWithSamePointeeType(
+      PT, Ctx.getTargetAddressSpace(LangAS::opencl_generic));
   return llvm::ConstantExpr::getAddrSpaceCast(
       llvm::ConstantPointerNull::get(NPT), PT);
 }
@@ -10269,8 +10270,9 @@ ABIArgInfo SPIRVABIInfo::classifyKernelArgumentType(QualType Ty) const {
     llvm::Type *LTy = CGT.ConvertType(Ty);
     auto DefaultAS = getContext().getTargetAddressSpace(LangAS::Default);
     auto GlobalAS = getContext().getTargetAddressSpace(LangAS::cuda_device);
-    if (LTy->isPointerTy() && LTy->getPointerAddressSpace() == DefaultAS) {
-      LTy = llvm::PointerType::get(LTy->getPointerElementType(), GlobalAS);
+    auto *PtrTy = llvm::dyn_cast<llvm::PointerType>(LTy);
+    if (PtrTy && PtrTy->getAddressSpace() == DefaultAS) {
+      LTy = llvm::PointerType::getWithSamePointeeType(PtrTy, GlobalAS);
       return ABIArgInfo::getDirect(LTy, 0, nullptr, false);
     }
   }

From 7cb452bfde1086f7bcddfd6de5594ebcb4c11bf5 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser@codeplay.com>
Date: Mon, 24 Jan 2022 11:43:20 +0000
Subject: [PATCH 527/946] [SelectionDAG][VP] Add widening support for VP_MERGE

This patch adds widening support for ISD::VP_MERGE, which widens
identically to VP_SELECT and similarly to other select-like nodes.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D118030
---
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  3 +-
 .../RISCV/rvv/fixed-vectors-vpmerge.ll        | 61 ++++++++++++----
 llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 69 ++++++++++++++-----
 3 files changed, 104 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 5dd5db2ad51f0..0bd44ce4c872e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3231,6 +3231,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::VSELECT:
   case ISD::SELECT:
   case ISD::VP_SELECT:
+  case ISD::VP_MERGE:
     Res = WidenVecRes_Select(N);
     break;
   case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
@@ -4782,7 +4783,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Select(SDNode *N) {
   SDValue InOp1 = GetWidenedVector(N->getOperand(1));
   SDValue InOp2 = GetWidenedVector(N->getOperand(2));
   assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
-  return Opcode == ISD::VP_SELECT
+  return Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE
              ? DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2,
                            N->getOperand(3))
              : DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index e904e8a4d495c..8ac3184f02c45 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -78,6 +78,43 @@ define <4 x i8> @vpmerge_vi_v4i8(<4 x i8> %vb, <4 x i1> %m, i32 zeroext %evl) {
   ret <4 x i8> %v
 }
 
+declare <6 x i8> @llvm.vp.merge.v6i8(<6 x i1>, <6 x i8>, <6 x i8>, i32)
+
+define <6 x i8> @vpmerge_vv_v6i8(<6 x i8> %va, <6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_v6i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl)
+  ret <6 x i8> %v
+}
+
+define <6 x i8> @vpmerge_vx_v6i8(i8 %a, <6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_v6i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <6 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <6 x i8> %elt.head, <6 x i8> poison, <6 x i32> zeroinitializer
+  %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl)
+  ret <6 x i8> %v
+}
+
+define <6 x i8> @vpmerge_vi_v6i8(<6 x i8> %vb, <6 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_v6i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <6 x i8> poison, i8 2, i32 0
+  %va = shufflevector <6 x i8> %elt.head, <6 x i8> poison, <6 x i32> zeroinitializer
+  %v = call <6 x i8> @llvm.vp.merge.v6i8(<6 x i1> %m, <6 x i8> %va, <6 x i8> %vb, i32 %evl)
+  ret <6 x i8> %v
+}
+
 declare <8 x i8> @llvm.vp.merge.v8i8(<8 x i1>, <8 x i8>, <8 x i8>, i32)
 
 define <8 x i8> @vpmerge_vv_v8i8(<8 x i8> %va, <8 x i8> %vb, <8 x i1> %m, i32 zeroext %evl) {
@@ -981,10 +1018,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
 ; RV32-NEXT:    addi a1, sp, 16
 ; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    bltu a2, a3, .LBB72_2
+; RV32-NEXT:    bltu a2, a3, .LBB75_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a1, a3
-; RV32-NEXT:  .LBB72_2:
+; RV32-NEXT:  .LBB75_2:
 ; RV32-NEXT:    vle64.v v8, (a0)
 ; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
 ; RV32-NEXT:    vslidedown.vi v0, v1, 2
@@ -1001,10 +1038,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmerge.vvm v16, v16, v24, v0
-; RV32-NEXT:    bltu a2, a0, .LBB72_4
+; RV32-NEXT:    bltu a2, a0, .LBB75_4
 ; RV32-NEXT:  # %bb.3:
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB72_4:
+; RV32-NEXT:  .LBB75_4:
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, tu, mu
 ; RV32-NEXT:    vmv1r.v v0, v1
 ; RV32-NEXT:    addi a0, sp, 16
@@ -1037,10 +1074,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    li a1, 0
-; RV64-NEXT:    bltu a2, a3, .LBB72_2
+; RV64-NEXT:    bltu a2, a3, .LBB75_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a1, a3
-; RV64-NEXT:  .LBB72_2:
+; RV64-NEXT:  .LBB75_2:
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
 ; RV64-NEXT:    vslidedown.vi v0, v1, 2
@@ -1049,10 +1086,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
 ; RV64-NEXT:    addi a1, sp, 16
 ; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vmerge.vvm v24, v24, v16, v0
-; RV64-NEXT:    bltu a2, a0, .LBB72_4
+; RV64-NEXT:    bltu a2, a0, .LBB75_4
 ; RV64-NEXT:  # %bb.3:
 ; RV64-NEXT:    li a2, 16
-; RV64-NEXT:  .LBB72_4:
+; RV64-NEXT:  .LBB75_4:
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, tu, mu
 ; RV64-NEXT:    vmv1r.v v0, v1
 ; RV64-NEXT:    csrr a0, vlenb
@@ -1077,19 +1114,19 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1>
 ; CHECK-NEXT:    addi a2, a0, -16
 ; CHECK-NEXT:    vmv1r.v v24, v0
 ; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bltu a0, a2, .LBB73_2
+; CHECK-NEXT:    bltu a0, a2, .LBB76_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a1, a2
-; CHECK-NEXT:  .LBB73_2:
+; CHECK-NEXT:  .LBB76_2:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
 ; CHECK-NEXT:    vslidedown.vi v0, v24, 2
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:    vfmerge.vfm v16, v16, fa0, v0
-; CHECK-NEXT:    bltu a0, a1, .LBB73_4
+; CHECK-NEXT:    bltu a0, a1, .LBB76_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    li a0, 16
-; CHECK-NEXT:  .LBB73_4:
+; CHECK-NEXT:  .LBB76_4:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 653217a20c54e..6a4ac666b1101 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -78,6 +78,43 @@ define <vscale x 2 x i8> @vpmerge_vi_nxv2i8(<vscale x 2 x i8> %vb, <vscale x 2 x
   ret <vscale x 2 x i8> %v
 }
 
+declare <vscale x 3 x i8> @llvm.vp.merge.nxv3i8(<vscale x 3 x i1>, <vscale x 3 x i8>, <vscale x 3 x i8>, i32)
+
+define <vscale x 3 x i8> @vpmerge_vv_nxv3i8(<vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vv_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vvm v9, v9, v8, v0
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    ret
+  %v = call <vscale x 3 x i8> @llvm.vp.merge.nxv3i8(<vscale x 3 x i1> %m, <vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vpmerge_vx_nxv3i8(i8 %a, <vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vx_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a1, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 %a, i32 0
+  %va = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.merge.nxv3i8(<vscale x 3 x i1> %m, <vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
+define <vscale x 3 x i8> @vpmerge_vi_nxv3i8(<vscale x 3 x i8> %vb, <vscale x 3 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vpmerge_vi_nxv3i8:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli zero, a0, e8, mf2, tu, mu
+; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT:    ret
+  %elt.head = insertelement <vscale x 3 x i8> poison, i8 2, i32 0
+  %va = shufflevector <vscale x 3 x i8> %elt.head, <vscale x 3 x i8> poison, <vscale x 3 x i32> zeroinitializer
+  %v = call <vscale x 3 x i8> @llvm.vp.merge.nxv3i8(<vscale x 3 x i1> %m, <vscale x 3 x i8> %va, <vscale x 3 x i8> %vb, i32 %evl)
+  ret <vscale x 3 x i8> %v
+}
+
 declare <vscale x 4 x i8> @llvm.vp.merge.nxv4i8(<vscale x 4 x i1>, <vscale x 4 x i8>, <vscale x 4 x i8>, i32)
 
 define <vscale x 4 x i8> @vpmerge_vv_nxv4i8(<vscale x 4 x i8> %va, <vscale x 4 x i8> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
@@ -295,10 +332,10 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
 ; RV32-NEXT:    addi a2, sp, 16
 ; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV32-NEXT:    li a2, 0
-; RV32-NEXT:    bltu a3, a4, .LBB21_2
+; RV32-NEXT:    bltu a3, a4, .LBB24_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a2, a4
-; RV32-NEXT:  .LBB21_2:
+; RV32-NEXT:  .LBB24_2:
 ; RV32-NEXT:    vl8r.v v8, (a0)
 ; RV32-NEXT:    vsetvli zero, a2, e8, m8, tu, mu
 ; RV32-NEXT:    vmv1r.v v0, v2
@@ -313,10 +350,10 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8re8.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmerge.vvm v16, v16, v24, v0
-; RV32-NEXT:    bltu a3, a1, .LBB21_4
+; RV32-NEXT:    bltu a3, a1, .LBB24_4
 ; RV32-NEXT:  # %bb.3:
 ; RV32-NEXT:    mv a3, a1
-; RV32-NEXT:  .LBB21_4:
+; RV32-NEXT:  .LBB24_4:
 ; RV32-NEXT:    vsetvli zero, a3, e8, m8, tu, mu
 ; RV32-NEXT:    vmv1r.v v0, v1
 ; RV32-NEXT:    addi a0, sp, 16
@@ -347,18 +384,18 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
 ; RV64-NEXT:    addi a2, sp, 16
 ; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    li a2, 0
-; RV64-NEXT:    bltu a3, a4, .LBB21_2
+; RV64-NEXT:    bltu a3, a4, .LBB24_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a2, a4
-; RV64-NEXT:  .LBB21_2:
+; RV64-NEXT:  .LBB24_2:
 ; RV64-NEXT:    vl8r.v v8, (a0)
 ; RV64-NEXT:    vsetvli zero, a2, e8, m8, tu, mu
 ; RV64-NEXT:    vmv1r.v v0, v2
 ; RV64-NEXT:    vmerge.vvm v24, v24, v16, v0
-; RV64-NEXT:    bltu a3, a1, .LBB21_4
+; RV64-NEXT:    bltu a3, a1, .LBB24_4
 ; RV64-NEXT:  # %bb.3:
 ; RV64-NEXT:    mv a3, a1
-; RV64-NEXT:  .LBB21_4:
+; RV64-NEXT:  .LBB24_4:
 ; RV64-NEXT:    vsetvli zero, a3, e8, m8, tu, mu
 ; RV64-NEXT:    vmv1r.v v0, v1
 ; RV64-NEXT:    addi a0, sp, 16
@@ -380,20 +417,20 @@ define <vscale x 128 x i8> @vpmerge_vx_nxv128i8(i8 %a, <vscale x 128 x i8> %vb,
 ; CHECK-NEXT:    csrr a3, vlenb
 ; CHECK-NEXT:    slli a3, a3, 3
 ; CHECK-NEXT:    mv a4, a2
-; CHECK-NEXT:    bltu a2, a3, .LBB22_2
+; CHECK-NEXT:    bltu a2, a3, .LBB25_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a4, a3
-; CHECK-NEXT:  .LBB22_2:
+; CHECK-NEXT:  .LBB25_2:
 ; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    vsetvli a6, zero, e8, m8, ta, mu
 ; CHECK-NEXT:    vlm.v v24, (a1)
 ; CHECK-NEXT:    vsetvli zero, a4, e8, m8, tu, mu
 ; CHECK-NEXT:    sub a1, a2, a3
 ; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
-; CHECK-NEXT:    bltu a2, a1, .LBB22_4
+; CHECK-NEXT:    bltu a2, a1, .LBB25_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a5, a1
-; CHECK-NEXT:  .LBB22_4:
+; CHECK-NEXT:  .LBB25_4:
 ; CHECK-NEXT:    vsetvli zero, a5, e8, m8, tu, mu
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vmerge.vxm v16, v16, a0, v0
@@ -410,20 +447,20 @@ define <vscale x 128 x i8> @vpmerge_vi_nxv128i8(<vscale x 128 x i8> %vb, <vscale
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 3
 ; CHECK-NEXT:    mv a3, a1
-; CHECK-NEXT:    bltu a1, a2, .LBB23_2
+; CHECK-NEXT:    bltu a1, a2, .LBB26_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a3, a2
-; CHECK-NEXT:  .LBB23_2:
+; CHECK-NEXT:  .LBB26_2:
 ; CHECK-NEXT:    li a4, 0
 ; CHECK-NEXT:    vsetvli a5, zero, e8, m8, ta, mu
 ; CHECK-NEXT:    vlm.v v24, (a0)
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, tu, mu
 ; CHECK-NEXT:    sub a0, a1, a2
 ; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
-; CHECK-NEXT:    bltu a1, a0, .LBB23_4
+; CHECK-NEXT:    bltu a1, a0, .LBB26_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a4, a0
-; CHECK-NEXT:  .LBB23_4:
+; CHECK-NEXT:  .LBB26_4:
 ; CHECK-NEXT:    vsetvli zero, a4, e8, m8, tu, mu
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vmerge.vim v16, v16, 2, v0

From 0f08db66db93b42ff26caca2159bd548436184ae Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Mon, 17 Jan 2022 15:56:11 +0100
Subject: [PATCH 528/946] [lldb] Make logging machinery type-safe

This patch makes use of c++ type checking and scoped enums to make
logging statements shorter and harder to misuse.

Defines like LIBLLDB_LOG_PROCESS are replaces with LLDBLog::Process.
Because it now carries type information we do not need to worry about
matching a specific enum value with the right getter function -- the
compiler will now do that for us.

The main entry point for the logging machinery becomes the GetLog
(template) function, which will obtain the correct Log object based on
the enum type. It achieves this through another template function
(LogChannelFor<T>), which must be specialized for each type, and should
return the appropriate channel object.

This patch also removes the ability to log a message if multiple
categories are enabled simultaneously as it was unused and confusing.

This patch does not actually remove any of the existing interfaces. The
defines and log retrieval functions are left around as wrappers around
the new interfaces. They will be removed in follow-up patch.

Differential Revision: https://reviews.llvm.org/D117490
---
 .../lldb/Interpreter/ScriptedInterface.h      |   2 +-
 lldb/include/lldb/Utility/Log.h               |  55 +++++++--
 lldb/include/lldb/Utility/Logging.h           | 116 +++++++++++-------
 .../Plugins/Process/POSIX/ProcessPOSIXLog.cpp |  20 +--
 .../Plugins/Process/POSIX/ProcessPOSIXLog.h   |  40 +++---
 .../GDBRemoteCommunicationServerLLGS.cpp      |   2 +-
 .../Process/gdb-remote/ProcessGDBRemote.cpp   |   5 +-
 .../gdb-remote/ProcessGDBRemoteLog.cpp        |  28 +++--
 .../Process/gdb-remote/ProcessGDBRemoteLog.h  |  55 ++++++---
 .../Process/gdb-remote/ThreadGDBRemote.cpp    |   8 +-
 .../SymbolFile/DWARF/LogChannelDWARF.cpp      |  18 +--
 .../SymbolFile/DWARF/LogChannelDWARF.h        |  33 +++--
 lldb/source/Utility/Log.cpp                   |   4 +-
 lldb/source/Utility/Logging.cpp               |  97 +++++++++------
 lldb/tools/lldb-server/lldb-gdbserver.cpp     |   1 +
 lldb/unittests/Utility/LogTest.cpp            |  60 +++++----
 16 files changed, 348 insertions(+), 196 deletions(-)

diff --git a/lldb/include/lldb/Interpreter/ScriptedInterface.h b/lldb/include/lldb/Interpreter/ScriptedInterface.h
index 9eb11832003e6..9de5e60cfea32 100644
--- a/lldb/include/lldb/Interpreter/ScriptedInterface.h
+++ b/lldb/include/lldb/Interpreter/ScriptedInterface.h
@@ -33,7 +33,7 @@ class ScriptedInterface {
   template <typename Ret>
   static Ret ErrorWithMessage(llvm::StringRef caller_name,
                               llvm::StringRef error_msg, Status &error,
-                              uint32_t log_caterogy = LIBLLDB_LOG_PROCESS) {
+                              LLDBLog log_caterogy = LLDBLog::Process) {
     LLDB_LOGF(GetLogIfAllCategoriesSet(log_caterogy), "%s ERROR = %s",
               caller_name.data(), error_msg.data());
     error.SetErrorString(llvm::Twine(caller_name + llvm::Twine(" ERROR = ") +
diff --git a/lldb/include/lldb/Utility/Log.h b/lldb/include/lldb/Utility/Log.h
index 2684783939bd8..09fd2cb3a7e60 100644
--- a/lldb/include/lldb/Utility/Log.h
+++ b/lldb/include/lldb/Utility/Log.h
@@ -10,7 +10,6 @@
 #define LLDB_UTILITY_LOG_H
 
 #include "lldb/Utility/Flags.h"
-#include "lldb/Utility/Logging.h"
 #include "lldb/lldb-defines.h"
 
 #include "llvm/ADT/ArrayRef.h"
@@ -48,11 +47,31 @@ namespace lldb_private {
 
 class Log final {
 public:
+  /// The underlying type of all log channel enums. Declare them as:
+  /// enum class MyLog : MaskType {
+  ///   Channel0 = Log::ChannelFlag<0>,
+  ///   Channel1 = Log::ChannelFlag<1>,
+  ///   ...,
+  ///   LLVM_MARK_AS_BITMASK_ENUM(LastChannel),
+  /// };
+  using MaskType = uint64_t;
+
+  template <MaskType Bit>
+  static constexpr MaskType ChannelFlag = MaskType(1) << Bit;
+
   // Description of a log channel category.
   struct Category {
     llvm::StringLiteral name;
     llvm::StringLiteral description;
-    uint32_t flag;
+    MaskType flag;
+
+    template <typename Cat>
+    constexpr Category(llvm::StringLiteral name,
+                       llvm::StringLiteral description, Cat mask)
+        : name(name), description(description), flag(MaskType(mask)) {
+      static_assert(
+          std::is_same<Log::MaskType, std::underlying_type_t<Cat>>::value, "");
+    }
   };
 
   // This class describes a log channel. It also encapsulates the behavior
@@ -63,18 +82,22 @@ class Log final {
 
   public:
     const llvm::ArrayRef<Category> categories;
-    const uint32_t default_flags;
+    const MaskType default_flags;
 
+    template <typename Cat>
     constexpr Channel(llvm::ArrayRef<Log::Category> categories,
-                      uint32_t default_flags)
+                      Cat default_flags)
         : log_ptr(nullptr), categories(categories),
-          default_flags(default_flags) {}
+          default_flags(MaskType(default_flags)) {
+      static_assert(
+          std::is_same<Log::MaskType, std::underlying_type_t<Cat>>::value, "");
+    }
 
     // This function is safe to call at any time. If the channel is disabled
     // after (or concurrently with) this function returning a non-null Log
     // pointer, it is still safe to attempt to write to the Log object -- the
     // output will be discarded.
-    Log *GetLogIfAll(uint32_t mask) {
+    Log *GetLogIfAll(MaskType mask) {
       Log *log = log_ptr.load(std::memory_order_relaxed);
       if (log && log->GetMask().AllSet(mask))
         return log;
@@ -85,7 +108,7 @@ class Log final {
     // after (or concurrently with) this function returning a non-null Log
     // pointer, it is still safe to attempt to write to the Log object -- the
     // output will be discarded.
-    Log *GetLogIfAny(uint32_t mask) {
+    Log *GetLogIfAny(MaskType mask) {
       Log *log = log_ptr.load(std::memory_order_relaxed);
       if (log && log->GetMask().AnySet(mask))
         return log;
@@ -180,7 +203,7 @@ class Log final {
 
   std::shared_ptr<llvm::raw_ostream> m_stream_sp;
   std::atomic<uint32_t> m_options{0};
-  std::atomic<uint32_t> m_mask{0};
+  std::atomic<MaskType> m_mask{0};
 
   void WriteHeader(llvm::raw_ostream &OS, llvm::StringRef file,
                    llvm::StringRef function);
@@ -215,6 +238,19 @@ class Log final {
   void operator=(const Log &) = delete;
 };
 
+// Must be specialized for a particular log type.
+template <typename Cat> Log::Channel &LogChannelFor() = delete;
+
+/// Retrieve the Log object for the channel associated with the given log enum.
+///
+/// Returns a valid Log object if any of the provided categories are enabled.
+/// Otherwise, returns nullptr.
+template <typename Cat> Log *GetLog(Cat mask) {
+  static_assert(std::is_same<Log::MaskType, std::underlying_type_t<Cat>>::value,
+                "");
+  return LogChannelFor<Cat>().GetLogIfAny(Log::MaskType(mask));
+}
+
 } // namespace lldb_private
 
 /// The LLDB_LOG* macros defined below are the way to emit log messages.
@@ -272,3 +308,6 @@ class Log final {
   } while (0)
 
 #endif // LLDB_UTILITY_LOG_H
+
+// TODO: Remove this and fix includes everywhere.
+#include "lldb/Utility/Logging.h"
diff --git a/lldb/include/lldb/Utility/Logging.h b/lldb/include/lldb/Utility/Logging.h
index 1a8a1022c5c0e..db84da244954a 100644
--- a/lldb/include/lldb/Utility/Logging.h
+++ b/lldb/include/lldb/Utility/Logging.h
@@ -9,57 +9,89 @@
 #ifndef LLDB_UTILITY_LOGGING_H
 #define LLDB_UTILITY_LOGGING_H
 
+#include "lldb/Utility/Log.h"
+#include "llvm/ADT/BitmaskEnum.h"
 #include <cstdint>
 
-// Log Bits specific to logging in lldb
-#define LIBLLDB_LOG_PROCESS (1u << 1)
-#define LIBLLDB_LOG_THREAD (1u << 2)
-#define LIBLLDB_LOG_DYNAMIC_LOADER (1u << 3)
-#define LIBLLDB_LOG_EVENTS (1u << 4)
-#define LIBLLDB_LOG_BREAKPOINTS (1u << 5)
-#define LIBLLDB_LOG_WATCHPOINTS (1u << 6)
-#define LIBLLDB_LOG_STEP (1u << 7)
-#define LIBLLDB_LOG_EXPRESSIONS (1u << 8)
-#define LIBLLDB_LOG_TEMPORARY (1u << 9)
-#define LIBLLDB_LOG_STATE (1u << 10)
-#define LIBLLDB_LOG_OBJECT (1u << 11)
-#define LIBLLDB_LOG_COMMUNICATION (1u << 12)
-#define LIBLLDB_LOG_CONNECTION (1u << 13)
-#define LIBLLDB_LOG_HOST (1u << 14)
-#define LIBLLDB_LOG_UNWIND (1u << 15)
-#define LIBLLDB_LOG_API (1u << 16)
-#define LIBLLDB_LOG_SCRIPT (1u << 17)
-#define LIBLLDB_LOG_COMMANDS (1U << 18)
-#define LIBLLDB_LOG_TYPES (1u << 19)
-#define LIBLLDB_LOG_SYMBOLS (1u << 20)
-#define LIBLLDB_LOG_MODULES (1u << 21)
-#define LIBLLDB_LOG_TARGET (1u << 22)
-#define LIBLLDB_LOG_MMAP (1u << 23)
-#define LIBLLDB_LOG_OS (1u << 24)
-#define LIBLLDB_LOG_PLATFORM (1u << 25)
-#define LIBLLDB_LOG_SYSTEM_RUNTIME (1u << 26)
-#define LIBLLDB_LOG_JIT_LOADER (1u << 27)
-#define LIBLLDB_LOG_LANGUAGE (1u << 28)
-#define LIBLLDB_LOG_DATAFORMATTERS (1u << 29)
-#define LIBLLDB_LOG_DEMANGLE (1u << 30)
-#define LIBLLDB_LOG_AST (1u << 31)
-#define LIBLLDB_LOG_ALL (UINT32_MAX)
-#define LIBLLDB_LOG_DEFAULT                                                    \
-  (LIBLLDB_LOG_PROCESS | LIBLLDB_LOG_THREAD | LIBLLDB_LOG_DYNAMIC_LOADER |     \
-   LIBLLDB_LOG_BREAKPOINTS | LIBLLDB_LOG_WATCHPOINTS | LIBLLDB_LOG_STEP |      \
-   LIBLLDB_LOG_STATE | LIBLLDB_LOG_SYMBOLS | LIBLLDB_LOG_TARGET |              \
-   LIBLLDB_LOG_COMMANDS)
-
 namespace lldb_private {
 
-class Log;
+enum class LLDBLog : Log::MaskType {
+  API = Log::ChannelFlag<0>,
+  AST = Log::ChannelFlag<1>,
+  Breakpoints = Log::ChannelFlag<2>,
+  Commands = Log::ChannelFlag<3>,
+  Communication = Log::ChannelFlag<4>,
+  Connection = Log::ChannelFlag<5>,
+  DataFormatters = Log::ChannelFlag<6>,
+  Demangle = Log::ChannelFlag<7>,
+  DynamicLoader = Log::ChannelFlag<8>,
+  Events = Log::ChannelFlag<9>,
+  Expressions = Log::ChannelFlag<10>,
+  Host = Log::ChannelFlag<11>,
+  JITLoader = Log::ChannelFlag<12>,
+  Language = Log::ChannelFlag<13>,
+  MMap = Log::ChannelFlag<14>,
+  Modules = Log::ChannelFlag<15>,
+  Object = Log::ChannelFlag<16>,
+  OS = Log::ChannelFlag<17>,
+  Platform = Log::ChannelFlag<18>,
+  Process = Log::ChannelFlag<19>,
+  Script = Log::ChannelFlag<20>,
+  State = Log::ChannelFlag<21>,
+  Step = Log::ChannelFlag<22>,
+  Symbols = Log::ChannelFlag<23>,
+  SystemRuntime = Log::ChannelFlag<24>,
+  Target = Log::ChannelFlag<25>,
+  Temporary = Log::ChannelFlag<26>,
+  Thread = Log::ChannelFlag<27>,
+  Types = Log::ChannelFlag<28>,
+  Unwind = Log::ChannelFlag<29>,
+  Watchpoints = Log::ChannelFlag<30>,
+  LLVM_MARK_AS_BITMASK_ENUM(Watchpoints),
+};
+
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+
+// Log Bits specific to logging in lldb
+#define LIBLLDB_LOG_PROCESS ::lldb_private::LLDBLog::Process
+#define LIBLLDB_LOG_THREAD ::lldb_private::LLDBLog::Thread
+#define LIBLLDB_LOG_DYNAMIC_LOADER ::lldb_private::LLDBLog::DynamicLoader
+#define LIBLLDB_LOG_EVENTS ::lldb_private::LLDBLog::Events
+#define LIBLLDB_LOG_BREAKPOINTS ::lldb_private::LLDBLog::Breakpoints
+#define LIBLLDB_LOG_WATCHPOINTS ::lldb_private::LLDBLog::Watchpoints
+#define LIBLLDB_LOG_STEP ::lldb_private::LLDBLog::Step
+#define LIBLLDB_LOG_EXPRESSIONS ::lldb_private::LLDBLog::Expressions
+#define LIBLLDB_LOG_TEMPORARY ::lldb_private::LLDBLog::Temporary
+#define LIBLLDB_LOG_STATE ::lldb_private::LLDBLog::State
+#define LIBLLDB_LOG_OBJECT ::lldb_private::LLDBLog::Object
+#define LIBLLDB_LOG_COMMUNICATION ::lldb_private::LLDBLog::Communication
+#define LIBLLDB_LOG_CONNECTION ::lldb_private::LLDBLog::Connection
+#define LIBLLDB_LOG_HOST ::lldb_private::LLDBLog::Host
+#define LIBLLDB_LOG_UNWIND ::lldb_private::LLDBLog::Unwind
+#define LIBLLDB_LOG_API ::lldb_private::LLDBLog::API
+#define LIBLLDB_LOG_SCRIPT ::lldb_private::LLDBLog::Script
+#define LIBLLDB_LOG_COMMANDS ::lldb_private::LLDBLog::Commands
+#define LIBLLDB_LOG_TYPES ::lldb_private::LLDBLog::Types
+#define LIBLLDB_LOG_SYMBOLS ::lldb_private::LLDBLog::Symbols
+#define LIBLLDB_LOG_MODULES ::lldb_private::LLDBLog::Modules
+#define LIBLLDB_LOG_TARGET ::lldb_private::LLDBLog::Target
+#define LIBLLDB_LOG_MMAP ::lldb_private::LLDBLog::MMap
+#define LIBLLDB_LOG_OS ::lldb_private::LLDBLog::OS
+#define LIBLLDB_LOG_PLATFORM ::lldb_private::LLDBLog::Platform
+#define LIBLLDB_LOG_SYSTEM_RUNTIME ::lldb_private::LLDBLog::SystemRuntime
+#define LIBLLDB_LOG_JIT_LOADER ::lldb_private::LLDBLog::JITLoader
+#define LIBLLDB_LOG_LANGUAGE ::lldb_private::LLDBLog::Language
+#define LIBLLDB_LOG_DATAFORMATTERS ::lldb_private::LLDBLog::DataFormatters
+#define LIBLLDB_LOG_DEMANGLE ::lldb_private::LLDBLog::Demangle
+#define LIBLLDB_LOG_AST ::lldb_private::LLDBLog::AST
 
-Log *GetLogIfAllCategoriesSet(uint32_t mask);
+Log *GetLogIfAllCategoriesSet(LLDBLog mask);
 
-Log *GetLogIfAnyCategoriesSet(uint32_t mask);
+Log *GetLogIfAnyCategoriesSet(LLDBLog mask);
 
 void InitializeLldbChannel();
 
+template <> Log::Channel &LogChannelFor<LLDBLog>();
 } // namespace lldb_private
 
 #endif // LLDB_UTILITY_LOGGING_H
diff --git a/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.cpp b/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.cpp
index f4d0803b264a3..7ad88aabc2c03 100644
--- a/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.cpp
+++ b/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.cpp
@@ -13,16 +13,20 @@
 using namespace lldb_private;
 
 static constexpr Log::Category g_categories[] = {
-  {{"break"}, {"log breakpoints"}, POSIX_LOG_BREAKPOINTS},
-  {{"memory"}, {"log memory reads and writes"}, POSIX_LOG_MEMORY},
-  {{"process"}, {"log process events and activities"}, POSIX_LOG_PROCESS},
-  {{"ptrace"}, {"log all calls to ptrace"}, POSIX_LOG_PTRACE},
-  {{"registers"}, {"log register read/writes"}, POSIX_LOG_REGISTERS},
-  {{"thread"}, {"log thread events and activities"}, POSIX_LOG_THREAD},
-  {{"watch"}, {"log watchpoint related activities"}, POSIX_LOG_WATCHPOINTS},
+    {{"break"}, {"log breakpoints"}, POSIXLog::Breakpoints},
+    {{"memory"}, {"log memory reads and writes"}, POSIXLog::Memory},
+    {{"process"}, {"log process events and activities"}, POSIXLog::Process},
+    {{"ptrace"}, {"log all calls to ptrace"}, POSIXLog::Ptrace},
+    {{"registers"}, {"log register read/writes"}, POSIXLog::Registers},
+    {{"thread"}, {"log thread events and activities"}, POSIXLog::Thread},
+    {{"watch"}, {"log watchpoint related activities"}, POSIXLog::Watchpoints},
 };
 
-Log::Channel ProcessPOSIXLog::g_channel(g_categories, POSIX_LOG_DEFAULT);
+static Log::Channel g_channel(g_categories, POSIXLog::Process);
+
+template <> Log::Channel &lldb_private::LogChannelFor<POSIXLog>() {
+  return g_channel;
+}
 
 void ProcessPOSIXLog::Initialize() {
   static llvm::once_flag g_once_flag;
diff --git a/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h b/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h
index c0147c43410fb..f0807e1d4480d 100644
--- a/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h
+++ b/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h
@@ -13,27 +13,35 @@
 
 #include "lldb/Utility/Log.h"
 
-#define POSIX_LOG_PROCESS (1u << 1)
-#define POSIX_LOG_THREAD (1u << 2)
-#define POSIX_LOG_MEMORY (1u << 4) // Log memory reads/writes calls
-#define POSIX_LOG_PTRACE (1u << 5)
-#define POSIX_LOG_REGISTERS (1u << 6)
-#define POSIX_LOG_BREAKPOINTS (1u << 7)
-#define POSIX_LOG_WATCHPOINTS (1u << 8)
-#define POSIX_LOG_ALL (UINT32_MAX)
-#define POSIX_LOG_DEFAULT POSIX_LOG_PROCESS
-
 namespace lldb_private {
-class ProcessPOSIXLog {
-  static Log::Channel g_channel;
 
+enum class POSIXLog : Log::MaskType {
+  Breakpoints = Log::ChannelFlag<0>,
+  Memory = Log::ChannelFlag<1>,
+  Process = Log::ChannelFlag<2>,
+  Ptrace = Log::ChannelFlag<3>,
+  Registers = Log::ChannelFlag<4>,
+  Thread = Log::ChannelFlag<5>,
+  Watchpoints = Log::ChannelFlag<6>,
+  LLVM_MARK_AS_BITMASK_ENUM(Watchpoints)
+};
+
+#define POSIX_LOG_PROCESS ::lldb_private::POSIXLog::Process
+#define POSIX_LOG_THREAD ::lldb_private::POSIXLog::Thread
+#define POSIX_LOG_MEMORY ::lldb_private::POSIXLog::Memory
+#define POSIX_LOG_PTRACE ::lldb_private::POSIXLog::Ptrace
+#define POSIX_LOG_REGISTERS ::lldb_private::POSIXLog::Registers
+#define POSIX_LOG_BREAKPOINTS ::lldb_private::POSIXLog::Breakpoints
+#define POSIX_LOG_WATCHPOINTS ::lldb_private::POSIXLog::Watchpoints
+
+class ProcessPOSIXLog {
 public:
   static void Initialize();
 
-  static Log *GetLogIfAllCategoriesSet(uint32_t mask) {
-    return g_channel.GetLogIfAll(mask);
-  }
+  static Log *GetLogIfAllCategoriesSet(POSIXLog mask) { return GetLog(mask); }
 };
-}
+
+template <> Log::Channel &LogChannelFor<POSIXLog>();
+} // namespace lldb_private
 
 #endif // liblldb_ProcessPOSIXLog_h_
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
index a6749274ca997..123a8198a89b8 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerLLGS.cpp
@@ -1086,7 +1086,7 @@ void GDBRemoteCommunicationServerLLGS::NewSubprocess(
 }
 
 void GDBRemoteCommunicationServerLLGS::DataAvailableCallback() {
-  Log *log(GetLogIfAnyCategoriesSet(GDBR_LOG_COMM));
+  Log *log = GetLog(GDBRLog::Comm);
 
   bool interrupt = false;
   bool done = false;
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
index 1946520d5d6cb..d8ad0b4e4e4be 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
@@ -529,7 +529,7 @@ Status ProcessGDBRemote::WillAttachToProcessWithName(const char *process_name,
 }
 
 Status ProcessGDBRemote::DoConnectRemote(llvm::StringRef remote_url) {
-  Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS));
+  Log *log = GetLog(GDBRLog::Process);
 
   Status error(WillLaunchOrAttach());
   if (error.Fail())
@@ -606,8 +606,7 @@ Status ProcessGDBRemote::DoConnectRemote(llvm::StringRef remote_url) {
                 ReadModuleFromMemory(FileSpec(namebuf), standalone_value);
           }
 
-          Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(
-              LIBLLDB_LOG_DYNAMIC_LOADER));
+          Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_DYNAMIC_LOADER));
           if (module_sp.get()) {
             target.GetImages().AppendIfNeeded(module_sp, false);
 
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.cpp
index 40990ef664943..3322f6b8048ab 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.cpp
@@ -15,25 +15,29 @@ using namespace lldb_private;
 using namespace lldb_private::process_gdb_remote;
 
 static constexpr Log::Category g_categories[] = {
-    {{"async"}, {"log asynchronous activity"}, GDBR_LOG_ASYNC},
-    {{"break"}, {"log breakpoints"}, GDBR_LOG_BREAKPOINTS},
-    {{"comm"}, {"log communication activity"}, GDBR_LOG_COMM},
-    {{"packets"}, {"log gdb remote packets"}, GDBR_LOG_PACKETS},
-    {{"memory"}, {"log memory reads and writes"}, GDBR_LOG_MEMORY},
+    {{"async"}, {"log asynchronous activity"}, GDBRLog::Async},
+    {{"break"}, {"log breakpoints"}, GDBRLog::Breakpoints},
+    {{"comm"}, {"log communication activity"}, GDBRLog::Comm},
+    {{"packets"}, {"log gdb remote packets"}, GDBRLog::Packets},
+    {{"memory"}, {"log memory reads and writes"}, GDBRLog::Memory},
     {{"data-short"},
      {"log memory bytes for memory reads and writes for short transactions "
       "only"},
-     GDBR_LOG_MEMORY_DATA_SHORT},
+     GDBRLog::MemoryDataShort},
     {{"data-long"},
      {"log memory bytes for memory reads and writes for all transactions"},
-     GDBR_LOG_MEMORY_DATA_LONG},
-    {{"process"}, {"log process events and activities"}, GDBR_LOG_PROCESS},
-    {{"step"}, {"log step related activities"}, GDBR_LOG_STEP},
-    {{"thread"}, {"log thread events and activities"}, GDBR_LOG_THREAD},
-    {{"watch"}, {"log watchpoint related activities"}, GDBR_LOG_WATCHPOINTS},
+     GDBRLog::MemoryDataLong},
+    {{"process"}, {"log process events and activities"}, GDBRLog::Process},
+    {{"step"}, {"log step related activities"}, GDBRLog::Step},
+    {{"thread"}, {"log thread events and activities"}, GDBRLog::Thread},
+    {{"watch"}, {"log watchpoint related activities"}, GDBRLog::Watchpoints},
 };
 
-Log::Channel ProcessGDBRemoteLog::g_channel(g_categories, GDBR_LOG_DEFAULT);
+static Log::Channel g_channel(g_categories, GDBRLog::Packets);
+
+template <> Log::Channel &lldb_private::LogChannelFor<GDBRLog>() {
+  return g_channel;
+}
 
 void ProcessGDBRemoteLog::Initialize() {
   static llvm::once_flag g_once_flag;
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h
index bd3e993cf72ac..44e390ec8cadb 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h
@@ -11,35 +11,52 @@
 
 #include "lldb/Utility/Log.h"
 
-#define GDBR_LOG_PROCESS (1u << 1)
-#define GDBR_LOG_THREAD (1u << 2)
-#define GDBR_LOG_PACKETS (1u << 3)
-#define GDBR_LOG_MEMORY (1u << 4) // Log memory reads/writes calls
-#define GDBR_LOG_MEMORY_DATA_SHORT                                             \
-  (1u << 5) // Log short memory reads/writes bytes
-#define GDBR_LOG_MEMORY_DATA_LONG (1u << 6) // Log all memory reads/writes bytes
-#define GDBR_LOG_BREAKPOINTS (1u << 7)
-#define GDBR_LOG_WATCHPOINTS (1u << 8)
-#define GDBR_LOG_STEP (1u << 9)
-#define GDBR_LOG_COMM (1u << 10)
-#define GDBR_LOG_ASYNC (1u << 11)
-#define GDBR_LOG_ALL (UINT32_MAX)
-#define GDBR_LOG_DEFAULT GDBR_LOG_PACKETS
-
 namespace lldb_private {
 namespace process_gdb_remote {
 
-class ProcessGDBRemoteLog {
-  static Log::Channel g_channel;
+enum class GDBRLog : Log::MaskType {
+  Async = Log::ChannelFlag<0>,
+  Breakpoints = Log::ChannelFlag<1>,
+  Comm = Log::ChannelFlag<2>,
+  Memory = Log::ChannelFlag<3>,          // Log memory reads/writes calls
+  MemoryDataLong = Log::ChannelFlag<4>,  // Log all memory reads/writes bytes
+  MemoryDataShort = Log::ChannelFlag<5>, // Log short memory reads/writes bytes
+  Packets = Log::ChannelFlag<6>,
+  Process = Log::ChannelFlag<7>,
+  Step = Log::ChannelFlag<8>,
+  Thread = Log::ChannelFlag<9>,
+  Watchpoints = Log::ChannelFlag<10>,
+  LLVM_MARK_AS_BITMASK_ENUM(Watchpoints)
+};
 
+#define GDBR_LOG_PROCESS ::lldb_private::process_gdb_remote::GDBRLog::Process
+#define GDBR_LOG_THREAD ::lldb_private::process_gdb_remote::GDBRLog::Thread
+#define GDBR_LOG_PACKETS ::lldb_private::process_gdb_remote::GDBRLog::Packets
+#define GDBR_LOG_MEMORY ::lldb_private::process_gdb_remote::GDBRLog::Memory
+#define GDBR_LOG_MEMORY_DATA_SHORT                                             \
+  ::lldb_private::process_gdb_remote::GDBRLog::MemoryDataShort
+#define GDBR_LOG_MEMORY_DATA_LONG                                              \
+  ::lldb_private::process_gdb_remote::GDBRLog::MemoryDataLong
+#define GDBR_LOG_BREAKPOINTS                                                   \
+  ::lldb_private::process_gdb_remote::GDBRLog::Breakpoints
+#define GDBR_LOG_WATCHPOINTS                                                   \
+  ::lldb_private::process_gdb_remote::GDBRLog::Watchpoints
+#define GDBR_LOG_STEP ::lldb_private::process_gdb_remote::GDBRLog::Step
+#define GDBR_LOG_COMM ::lldb_private::process_gdb_remote::GDBRLog::Comm
+#define GDBR_LOG_ASYNC ::lldb_private::process_gdb_remote::GDBRLog::Async
+
+class ProcessGDBRemoteLog {
 public:
   static void Initialize();
 
-  static Log *GetLogIfAllCategoriesSet(uint32_t mask) { return g_channel.GetLogIfAll(mask); }
-  static Log *GetLogIfAnyCategoryIsSet(uint32_t mask) { return g_channel.GetLogIfAny(mask); }
+  static Log *GetLogIfAllCategoriesSet(GDBRLog mask) { return GetLog(mask); }
+  static Log *GetLogIfAnyCategoryIsSet(GDBRLog mask) { return GetLog(mask); }
 };
 
 } // namespace process_gdb_remote
+
+template <> Log::Channel &LogChannelFor<process_gdb_remote::GDBRLog>();
+
 } // namespace lldb_private
 
 #endif // LLDB_SOURCE_PLUGINS_PROCESS_GDB_REMOTE_PROCESSGDBREMOTELOG_H
diff --git a/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp
index 2a9896e41085c..ba73115e4ad60 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp
@@ -39,7 +39,7 @@ ThreadGDBRemote::ThreadGDBRemote(Process &process, lldb::tid_t tid)
       m_dispatch_queue_t(LLDB_INVALID_ADDRESS), m_queue_kind(eQueueKindUnknown),
       m_queue_serial_number(LLDB_INVALID_QUEUE_ID),
       m_associated_with_libdispatch_queue(eLazyBoolCalculate) {
-  Log *log(GetLogIfAnyCategoriesSet(GDBR_LOG_THREAD));
+  Log *log = GetLog(GDBRLog::Thread);
   LLDB_LOG(log, "this = {0}, pid = {1}, tid = {2}", this, process.GetID(),
            GetID());
   // At this point we can clone reg_info for architectures supporting
@@ -54,7 +54,7 @@ ThreadGDBRemote::ThreadGDBRemote(Process &process, lldb::tid_t tid)
 
 ThreadGDBRemote::~ThreadGDBRemote() {
   ProcessSP process_sp(GetProcess());
-  Log *log(GetLogIfAnyCategoriesSet(GDBR_LOG_THREAD));
+  Log *log = GetLog(GDBRLog::Thread);
   LLDB_LOG(log, "this = {0}, pid = {1}, tid = {2}", this,
            process_sp ? process_sp->GetID() : LLDB_INVALID_PROCESS_ID, GetID());
   DestroyThread();
@@ -222,7 +222,7 @@ void ThreadGDBRemote::SetAssociatedWithLibdispatchQueue(
 StructuredData::ObjectSP ThreadGDBRemote::FetchThreadExtendedInfo() {
   StructuredData::ObjectSP object_sp;
   const lldb::user_id_t tid = GetProtocolID();
-  Log *log(GetLogIfAnyCategoriesSet(GDBR_LOG_THREAD));
+  Log *log = GetLog(GDBRLog::Thread);
   LLDB_LOGF(log, "Fetching extended information for thread %4.4" PRIx64, tid);
   ProcessSP process_sp(GetProcess());
   if (process_sp) {
@@ -236,7 +236,7 @@ StructuredData::ObjectSP ThreadGDBRemote::FetchThreadExtendedInfo() {
 void ThreadGDBRemote::WillResume(StateType resume_state) {
   int signo = GetResumeSignal();
   const lldb::user_id_t tid = GetProtocolID();
-  Log *log(GetLogIfAnyCategoriesSet(GDBR_LOG_THREAD));
+  Log *log = GetLog(GDBRLog::Thread);
   LLDB_LOGF(log, "Resuming thread: %4.4" PRIx64 " with state: %s.", tid,
             StateAsCString(resume_state));
 
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp
index 3f1d6677bacf2..d2b8fe19db530 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.cpp
@@ -13,18 +13,20 @@ using namespace lldb_private;
 static constexpr Log::Category g_categories[] = {
     {{"comp"},
      {"log insertions of object files into DWARF debug maps"},
-     DWARF_LOG_TYPE_COMPLETION},
-    {{"info"}, {"log the parsing of .debug_info"}, DWARF_LOG_DEBUG_INFO},
-    {{"line"}, {"log the parsing of .debug_line"}, DWARF_LOG_DEBUG_LINE},
+     DWARFLog::TypeCompletion},
+    {{"info"}, {"log the parsing of .debug_info"}, DWARFLog::DebugInfo},
+    {{"line"}, {"log the parsing of .debug_line"}, DWARFLog::DebugLine},
     {{"lookups"},
      {"log any lookups that happen by name, regex, or address"},
-     DWARF_LOG_LOOKUPS},
-    {{"map"},
-     {"log struct/unions/class type completions"},
-     DWARF_LOG_DEBUG_MAP},
+     DWARFLog::Lookups},
+    {{"map"}, {"log struct/unions/class type completions"}, DWARFLog::DebugMap},
 };
 
-Log::Channel LogChannelDWARF::g_channel(g_categories, DWARF_LOG_DEFAULT);
+static Log::Channel g_channel(g_categories, DWARFLog::DebugInfo);
+
+template <> Log::Channel &lldb_private::LogChannelFor<DWARFLog>() {
+  return g_channel;
+}
 
 void LogChannelDWARF::Initialize() {
   Log::Register("dwarf", g_channel);
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h
index 2fc23563ef938..8076c719e9c46 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h
@@ -11,25 +11,32 @@
 
 #include "lldb/Utility/Log.h"
 
-#define DWARF_LOG_DEBUG_INFO (1u << 1)
-#define DWARF_LOG_DEBUG_LINE (1u << 2)
-#define DWARF_LOG_LOOKUPS (1u << 3)
-#define DWARF_LOG_TYPE_COMPLETION (1u << 4)
-#define DWARF_LOG_DEBUG_MAP (1u << 5)
-#define DWARF_LOG_ALL (UINT32_MAX)
-#define DWARF_LOG_DEFAULT (DWARF_LOG_DEBUG_INFO)
-
 namespace lldb_private {
-class LogChannelDWARF {
-  static Log::Channel g_channel;
 
+enum class DWARFLog : Log::MaskType {
+  DebugInfo = Log::ChannelFlag<0>,
+  DebugLine = Log::ChannelFlag<1>,
+  DebugMap = Log::ChannelFlag<2>,
+  Lookups = Log::ChannelFlag<3>,
+  TypeCompletion = Log::ChannelFlag<4>,
+  LLVM_MARK_AS_BITMASK_ENUM(TypeCompletion)
+};
+#define DWARF_LOG_DEBUG_INFO ::lldb_private::DWARFLog::DebugInfo
+#define DWARF_LOG_DEBUG_LINE ::lldb_private::DWARFLog::DebugLine
+#define DWARF_LOG_LOOKUPS ::lldb_private::DWARFLog::Lookups
+#define DWARF_LOG_TYPE_COMPLETION ::lldb_private::DWARFLog::TypeCompletion
+#define DWARF_LOG_DEBUG_MAP ::lldb_private::DWARFLog::DebugMap
+
+class LogChannelDWARF {
 public:
   static void Initialize();
   static void Terminate();
 
-  static Log *GetLogIfAll(uint32_t mask) { return g_channel.GetLogIfAll(mask); }
-  static Log *GetLogIfAny(uint32_t mask) { return g_channel.GetLogIfAny(mask); }
+  static Log *GetLogIfAll(DWARFLog mask) { return GetLog(mask); }
+  static Log *GetLogIfAny(DWARFLog mask) { return GetLog(mask); }
 };
-}
+
+template <> Log::Channel &LogChannelFor<DWARFLog>();
+} // namespace lldb_private
 
 #endif // LLDB_SOURCE_PLUGINS_SYMBOLFILE_DWARF_LOGCHANNELDWARF_H
diff --git a/lldb/source/Utility/Log.cpp b/lldb/source/Utility/Log.cpp
index 26070d0740b19..d229538073d10 100644
--- a/lldb/source/Utility/Log.cpp
+++ b/lldb/source/Utility/Log.cpp
@@ -88,7 +88,7 @@ void Log::Enable(const std::shared_ptr<llvm::raw_ostream> &stream_sp,
                  uint32_t options, uint32_t flags) {
   llvm::sys::ScopedWriter lock(m_mutex);
 
-  uint32_t mask = m_mask.fetch_or(flags, std::memory_order_relaxed);
+  MaskType mask = m_mask.fetch_or(flags, std::memory_order_relaxed);
   if (mask | flags) {
     m_options.store(options, std::memory_order_relaxed);
     m_stream_sp = stream_sp;
@@ -99,7 +99,7 @@ void Log::Enable(const std::shared_ptr<llvm::raw_ostream> &stream_sp,
 void Log::Disable(uint32_t flags) {
   llvm::sys::ScopedWriter lock(m_mutex);
 
-  uint32_t mask = m_mask.fetch_and(~flags, std::memory_order_relaxed);
+  MaskType mask = m_mask.fetch_and(~flags, std::memory_order_relaxed);
   if (!(mask & ~flags)) {
     m_stream_sp.reset();
     m_channel.log_ptr.store(nullptr, std::memory_order_relaxed);
diff --git a/lldb/source/Utility/Logging.cpp b/lldb/source/Utility/Logging.cpp
index 4648bec502c54..67d5d3af2640c 100644
--- a/lldb/source/Utility/Logging.cpp
+++ b/lldb/source/Utility/Logging.cpp
@@ -16,49 +16,74 @@
 using namespace lldb_private;
 
 static constexpr Log::Category g_categories[] = {
-  {{"api"}, {"log API calls and return values"}, LIBLLDB_LOG_API},
-  {{"ast"}, {"log AST"}, LIBLLDB_LOG_AST},
-  {{"break"}, {"log breakpoints"}, LIBLLDB_LOG_BREAKPOINTS},
-  {{"commands"}, {"log command argument parsing"}, LIBLLDB_LOG_COMMANDS},
-  {{"comm"}, {"log communication activities"}, LIBLLDB_LOG_COMMUNICATION},
-  {{"conn"}, {"log connection details"}, LIBLLDB_LOG_CONNECTION},
-  {{"demangle"}, {"log mangled names to catch demangler crashes"}, LIBLLDB_LOG_DEMANGLE},
-  {{"dyld"}, {"log shared library related activities"}, LIBLLDB_LOG_DYNAMIC_LOADER},
-  {{"event"}, {"log broadcaster, listener and event queue activities"}, LIBLLDB_LOG_EVENTS},
-  {{"expr"}, {"log expressions"}, LIBLLDB_LOG_EXPRESSIONS},
-  {{"formatters"}, {"log data formatters related activities"}, LIBLLDB_LOG_DATAFORMATTERS},
-  {{"host"}, {"log host activities"}, LIBLLDB_LOG_HOST},
-  {{"jit"}, {"log JIT events in the target"}, LIBLLDB_LOG_JIT_LOADER},
-  {{"language"}, {"log language runtime events"}, LIBLLDB_LOG_LANGUAGE},
-  {{"mmap"}, {"log mmap related activities"}, LIBLLDB_LOG_MMAP},
-  {{"module"}, {"log module activities such as when modules are created, destroyed, replaced, and more"}, LIBLLDB_LOG_MODULES},
-  {{"object"}, {"log object construction/destruction for important objects"}, LIBLLDB_LOG_OBJECT},
-  {{"os"}, {"log OperatingSystem plugin related activities"}, LIBLLDB_LOG_OS},
-  {{"platform"}, {"log platform events and activities"}, LIBLLDB_LOG_PLATFORM},
-  {{"process"}, {"log process events and activities"}, LIBLLDB_LOG_PROCESS},
-  {{"script"}, {"log events about the script interpreter"}, LIBLLDB_LOG_SCRIPT},
-  {{"state"}, {"log private and public process state changes"}, LIBLLDB_LOG_STATE},
-  {{"step"}, {"log step related activities"}, LIBLLDB_LOG_STEP},
-  {{"symbol"}, {"log symbol related issues and warnings"}, LIBLLDB_LOG_SYMBOLS},
-  {{"system-runtime"}, {"log system runtime events"}, LIBLLDB_LOG_SYSTEM_RUNTIME},
-  {{"target"}, {"log target events and activities"}, LIBLLDB_LOG_TARGET},
-  {{"temp"}, {"log internal temporary debug messages"}, LIBLLDB_LOG_TEMPORARY},
-  {{"thread"}, {"log thread events and activities"}, LIBLLDB_LOG_THREAD},
-  {{"types"}, {"log type system related activities"}, LIBLLDB_LOG_TYPES},
-  {{"unwind"}, {"log stack unwind activities"}, LIBLLDB_LOG_UNWIND},
-  {{"watch"}, {"log watchpoint related activities"}, LIBLLDB_LOG_WATCHPOINTS},
+    {{"api"}, {"log API calls and return values"}, LLDBLog::API},
+    {{"ast"}, {"log AST"}, LLDBLog::AST},
+    {{"break"}, {"log breakpoints"}, LLDBLog::Breakpoints},
+    {{"commands"}, {"log command argument parsing"}, LLDBLog::Commands},
+    {{"comm"}, {"log communication activities"}, LLDBLog::Communication},
+    {{"conn"}, {"log connection details"}, LLDBLog::Connection},
+    {{"demangle"},
+     {"log mangled names to catch demangler crashes"},
+     LLDBLog::Demangle},
+    {{"dyld"},
+     {"log shared library related activities"},
+     LLDBLog::DynamicLoader},
+    {{"event"},
+     {"log broadcaster, listener and event queue activities"},
+     LLDBLog::Events},
+    {{"expr"}, {"log expressions"}, LLDBLog::Expressions},
+    {{"formatters"},
+     {"log data formatters related activities"},
+     LLDBLog::DataFormatters},
+    {{"host"}, {"log host activities"}, LLDBLog::Host},
+    {{"jit"}, {"log JIT events in the target"}, LLDBLog::JITLoader},
+    {{"language"}, {"log language runtime events"}, LLDBLog::Language},
+    {{"mmap"}, {"log mmap related activities"}, LLDBLog::MMap},
+    {{"module"},
+     {"log module activities such as when modules are created, destroyed, "
+      "replaced, and more"},
+     LLDBLog::Modules},
+    {{"object"},
+     {"log object construction/destruction for important objects"},
+     LLDBLog::Object},
+    {{"os"}, {"log OperatingSystem plugin related activities"}, LLDBLog::OS},
+    {{"platform"}, {"log platform events and activities"}, LLDBLog::Platform},
+    {{"process"}, {"log process events and activities"}, LLDBLog::Process},
+    {{"script"}, {"log events about the script interpreter"}, LLDBLog::Script},
+    {{"state"},
+     {"log private and public process state changes"},
+     LLDBLog::State},
+    {{"step"}, {"log step related activities"}, LLDBLog::Step},
+    {{"symbol"}, {"log symbol related issues and warnings"}, LLDBLog::Symbols},
+    {{"system-runtime"}, {"log system runtime events"}, LLDBLog::SystemRuntime},
+    {{"target"}, {"log target events and activities"}, LLDBLog::Target},
+    {{"temp"}, {"log internal temporary debug messages"}, LLDBLog::Temporary},
+    {{"thread"}, {"log thread events and activities"}, LLDBLog::Thread},
+    {{"types"}, {"log type system related activities"}, LLDBLog::Types},
+    {{"unwind"}, {"log stack unwind activities"}, LLDBLog::Unwind},
+    {{"watch"}, {"log watchpoint related activities"}, LLDBLog::Watchpoints},
 };
 
-static Log::Channel g_log_channel(g_categories, LIBLLDB_LOG_DEFAULT);
+static Log::Channel g_log_channel(g_categories,
+                                  LLDBLog::Process | LLDBLog::Thread |
+                                      LLDBLog::DynamicLoader |
+                                      LLDBLog::Breakpoints |
+                                      LLDBLog::Watchpoints | LLDBLog::Step |
+                                      LLDBLog::State | LLDBLog::Symbols |
+                                      LLDBLog::Target | LLDBLog::Commands);
+
+template <> Log::Channel &lldb_private::LogChannelFor<LLDBLog>() {
+  return g_log_channel;
+}
 
 void lldb_private::InitializeLldbChannel() {
   Log::Register("lldb", g_log_channel);
 }
 
-Log *lldb_private::GetLogIfAllCategoriesSet(uint32_t mask) {
-  return g_log_channel.GetLogIfAll(mask);
+Log *lldb_private::GetLogIfAllCategoriesSet(LLDBLog mask) {
+  return GetLog(mask);
 }
 
-Log *lldb_private::GetLogIfAnyCategoriesSet(uint32_t mask) {
-  return g_log_channel.GetLogIfAny(mask);
+Log *lldb_private::GetLogIfAnyCategoriesSet(LLDBLog mask) {
+  return GetLog(mask);
 }
diff --git a/lldb/tools/lldb-server/lldb-gdbserver.cpp b/lldb/tools/lldb-server/lldb-gdbserver.cpp
index 906ae4c378b6b..7648a0bb668d0 100644
--- a/lldb/tools/lldb-server/lldb-gdbserver.cpp
+++ b/lldb/tools/lldb-server/lldb-gdbserver.cpp
@@ -27,6 +27,7 @@
 #include "lldb/Host/Socket.h"
 #include "lldb/Host/common/NativeProcessProtocol.h"
 #include "lldb/Target/Process.h"
+#include "lldb/Utility/Logging.h"
 #include "lldb/Utility/Status.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Option/ArgList.h"
diff --git a/lldb/unittests/Utility/LogTest.cpp b/lldb/unittests/Utility/LogTest.cpp
index dfbb1f092146c..b3a5a7af44da3 100644
--- a/lldb/unittests/Utility/LogTest.cpp
+++ b/lldb/unittests/Utility/LogTest.cpp
@@ -18,13 +18,24 @@
 using namespace lldb;
 using namespace lldb_private;
 
-enum { FOO = 1, BAR = 2 };
+enum class TestChannel : Log::MaskType {
+  FOO = Log::ChannelFlag<0>,
+  BAR = Log::ChannelFlag<1>,
+  LLVM_MARK_AS_BITMASK_ENUM(BAR),
+};
+
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+
 static constexpr Log::Category test_categories[] = {
-    {{"foo"}, {"log foo"}, FOO}, {{"bar"}, {"log bar"}, BAR},
+    {{"foo"}, {"log foo"}, TestChannel::FOO},
+    {{"bar"}, {"log bar"}, TestChannel::BAR},
 };
-static constexpr uint32_t default_flags = FOO;
 
-static Log::Channel test_channel(test_categories, default_flags);
+static Log::Channel test_channel(test_categories, TestChannel::FOO);
+
+namespace lldb_private {
+template <> Log::Channel &LogChannelFor<TestChannel>() { return test_channel; }
+} // namespace lldb_private
 
 // Wrap enable, disable and list functions to make them easier to test.
 static bool EnableChannel(std::shared_ptr<llvm::raw_ostream> stream_sp,
@@ -93,7 +104,7 @@ void LogChannelEnabledTest::SetUp() {
   std::string error;
   ASSERT_TRUE(EnableChannel(m_stream_sp, 0, "chan", {}, error));
 
-  m_log = test_channel.GetLogIfAll(FOO);
+  m_log = GetLog(TestChannel::FOO);
   ASSERT_NE(nullptr, m_log);
 }
 
@@ -124,18 +135,18 @@ TEST(LogTest, Register) {
 TEST(LogTest, Unregister) {
   llvm::llvm_shutdown_obj obj;
   Log::Register("chan", test_channel);
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAny(FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::FOO));
   std::string message;
   std::shared_ptr<llvm::raw_string_ostream> stream_sp(
       new llvm::raw_string_ostream(message));
   EXPECT_TRUE(Log::EnableLogChannel(stream_sp, 0, "chan", {"foo"}, llvm::nulls()));
-  EXPECT_NE(nullptr, test_channel.GetLogIfAny(FOO));
+  EXPECT_NE(nullptr, GetLog(TestChannel::FOO));
   Log::Unregister("chan");
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAny(FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::FOO));
 }
 
 TEST_F(LogChannelTest, Enable) {
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAll(FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::FOO));
   std::string message;
   std::shared_ptr<llvm::raw_string_ostream> stream_sp(
       new llvm::raw_string_ostream(message));
@@ -144,20 +155,22 @@ TEST_F(LogChannelTest, Enable) {
   EXPECT_EQ("Invalid log channel 'chanchan'.\n", error);
 
   EXPECT_TRUE(EnableChannel(stream_sp, 0, "chan", {}, error));
-  EXPECT_NE(nullptr, test_channel.GetLogIfAll(FOO));
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAll(BAR));
+  EXPECT_NE(nullptr, GetLog(TestChannel::FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::BAR));
 
   EXPECT_TRUE(EnableChannel(stream_sp, 0, "chan", {"bar"}, error));
-  EXPECT_NE(nullptr, test_channel.GetLogIfAll(FOO | BAR));
+  EXPECT_NE(nullptr, test_channel.GetLogIfAll(
+                         Log::MaskType(TestChannel::FOO | TestChannel::BAR)));
 
   EXPECT_TRUE(EnableChannel(stream_sp, 0, "chan", {"baz"}, error));
   EXPECT_NE(std::string::npos, error.find("unrecognized log category 'baz'"))
       << "error: " << error;
-  EXPECT_NE(nullptr, test_channel.GetLogIfAll(FOO | BAR));
+  EXPECT_NE(nullptr, test_channel.GetLogIfAll(
+                         Log::MaskType(TestChannel::FOO | TestChannel::BAR)));
 }
 
 TEST_F(LogChannelTest, EnableOptions) {
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAll(FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::FOO));
   std::string message;
   std::shared_ptr<llvm::raw_string_ostream> stream_sp(
       new llvm::raw_string_ostream(message));
@@ -165,32 +178,33 @@ TEST_F(LogChannelTest, EnableOptions) {
   EXPECT_TRUE(
       EnableChannel(stream_sp, LLDB_LOG_OPTION_VERBOSE, "chan", {}, error));
 
-  Log *log = test_channel.GetLogIfAll(FOO);
+  Log *log = GetLog(TestChannel::FOO);
   ASSERT_NE(nullptr, log);
   EXPECT_TRUE(log->GetVerbose());
 }
 
 TEST_F(LogChannelTest, Disable) {
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAll(FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::FOO));
   std::string message;
   std::shared_ptr<llvm::raw_string_ostream> stream_sp(
       new llvm::raw_string_ostream(message));
   std::string error;
   EXPECT_TRUE(EnableChannel(stream_sp, 0, "chan", {"foo", "bar"}, error));
-  EXPECT_NE(nullptr, test_channel.GetLogIfAll(FOO | BAR));
+  EXPECT_NE(nullptr, test_channel.GetLogIfAll(
+                         Log::MaskType(TestChannel::FOO | TestChannel::BAR)));
 
   EXPECT_TRUE(DisableChannel("chan", {"bar"}, error));
-  EXPECT_NE(nullptr, test_channel.GetLogIfAll(FOO));
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAll(BAR));
+  EXPECT_NE(nullptr, GetLog(TestChannel::FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::BAR));
 
   EXPECT_TRUE(DisableChannel("chan", {"baz"}, error));
   EXPECT_NE(std::string::npos, error.find("unrecognized log category 'baz'"))
       << "error: " << error;
-  EXPECT_NE(nullptr, test_channel.GetLogIfAll(FOO));
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAll(BAR));
+  EXPECT_NE(nullptr, GetLog(TestChannel::FOO));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::BAR));
 
   EXPECT_TRUE(DisableChannel("chan", {}, error));
-  EXPECT_EQ(nullptr, test_channel.GetLogIfAny(FOO | BAR));
+  EXPECT_EQ(nullptr, GetLog(TestChannel::FOO | TestChannel::BAR));
 }
 
 TEST_F(LogChannelTest, List) {
@@ -309,5 +323,5 @@ TEST_F(LogChannelEnabledTest, LogGetLogThread) {
 
   // The mask should be either zero of "FOO". In either case, we should not trip
   // any undefined behavior (run the test under TSAN to verify this).
-  EXPECT_THAT(mask, testing::AnyOf(0, FOO));
+  EXPECT_THAT(mask, testing::AnyOf(0, Log::MaskType(TestChannel::FOO)));
 }

From ce6903595b7161f881b62834c55b3099853cabd5 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Tue, 25 Jan 2022 12:10:28 +0100
Subject: [PATCH 529/946] [lldb/test] Use abspath when searching for lldb.exe

realpath is too aggressive and does not produce the desired effect if
ones build folder is a symlink farm.
---
 lldb/packages/Python/lldbsuite/test/dotest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py
index 0815188d6cde8..ce01146055b2c 100644
--- a/lldb/packages/Python/lldbsuite/test/dotest.py
+++ b/lldb/packages/Python/lldbsuite/test/dotest.py
@@ -357,7 +357,7 @@ def parseOptionsAndInitTestdirs():
 
     if args.executable:
         # lldb executable is passed explicitly
-        lldbtest_config.lldbExec = os.path.realpath(args.executable)
+        lldbtest_config.lldbExec = os.path.abspath(args.executable)
         if not is_exe(lldbtest_config.lldbExec):
             lldbtest_config.lldbExec = which(args.executable)
         if not is_exe(lldbtest_config.lldbExec):

From 109cc5adccaec4c2264c0db3d54bbec1183bf95d Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Date: Wed, 12 Jan 2022 00:34:11 +0100
Subject: [PATCH 530/946] [DAGCombine] Fold SRA of a load into a narrower
 sign-extending load

An sra is basically sign-extending a narrower value. Fold away the
shift by doing a sextload of a narrower value, when it is legal to
reduce the load width accordingly.

Differential Revision: https://reviews.llvm.org/D116930
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 ++++++++++++-------
 llvm/test/CodeGen/PowerPC/pr13891.ll          |  2 +-
 llvm/test/CodeGen/X86/combine-sra-load.ll     | 20 +++++++---------
 3 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1137f8b16977f..c3d2ed2dcf85c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -8964,6 +8964,10 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   if (SDValue MULH = combineShiftToMULH(N, DAG, TLI))
     return MULH;
 
+  // Attempt to convert a sra of a load into a narrower sign-extending load.
+  if (SDValue NarrowLoad = reduceLoadWidth(N))
+    return NarrowLoad;
+
   return SDValue();
 }
 
@@ -12151,10 +12155,10 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
   if (Opc == ISD::SIGN_EXTEND_INREG) {
     ExtType = ISD::SEXTLOAD;
     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
-  } else if (Opc == ISD::SRL) {
-    // Another special-case: SRL is basically zero-extending a narrower value,
-    // or it may be shifting a higher subword, half or byte into the lowest
-    // bits.
+  } else if (Opc == ISD::SRL || Opc == ISD::SRA) {
+    // Another special-case: SRL/SRA is basically zero/sign-extending a narrower
+    // value, or it may be shifting a higher subword, half or byte into the
+    // lowest bits.
 
     // Only handle shift with constant shift amount, and the shiftee must be a
     // load.
@@ -12168,13 +12172,16 @@ SDValue DAGCombiner::reduceLoadWidth(SDNode *N) {
     uint64_t MemoryWidth = LN->getMemoryVT().getScalarSizeInBits();
     if (MemoryWidth <= ShAmt)
       return SDValue();
-    // Attempt to fold away the SRL by using ZEXTLOAD.
-    ExtType = ISD::ZEXTLOAD;
+    // Attempt to fold away the SRL by using ZEXTLOAD and SRA by using SEXTLOAD.
+    ExtType = Opc == ISD::SRL ? ISD::ZEXTLOAD : ISD::SEXTLOAD;
     ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShAmt);
     // If original load is a SEXTLOAD then we can't simply replace it by a
     // ZEXTLOAD (we could potentially replace it by a more narrow SEXTLOAD
-    // followed by a ZEXT, but that is not handled at the moment).
-    if (LN->getExtensionType() == ISD::SEXTLOAD)
+    // followed by a ZEXT, but that is not handled at the moment). Similarly if
+    // the original load is a ZEXTLOAD and we want to use a SEXTLOAD.
+    if ((LN->getExtensionType() == ISD::SEXTLOAD ||
+         LN->getExtensionType() == ISD::ZEXTLOAD) &&
+        LN->getExtensionType() != ExtType)
       return SDValue();
   } else if (Opc == ISD::AND) {
     // An AND with a constant mask is the same as a truncate + zero-extend.
diff --git a/llvm/test/CodeGen/PowerPC/pr13891.ll b/llvm/test/CodeGen/PowerPC/pr13891.ll
index f35a0a724bfd4..816166a20fedc 100644
--- a/llvm/test/CodeGen/PowerPC/pr13891.ll
+++ b/llvm/test/CodeGen/PowerPC/pr13891.ll
@@ -7,7 +7,7 @@ target triple = "powerpc64-unknown-linux-gnu"
 define void @_Z5check3foos(%struct.foo* nocapture byval(%struct.foo) %f, i16 signext %i) noinline {
 ; CHECK-LABEL: _Z5check3foos:
 ; CHECK: sth 3, {{[0-9]+}}(1)
-; CHECK: lha {{[0-9]+}}, {{[0-9]+}}(1)
+; CHECK: lbz {{[0-9]+}}, {{[0-9]+}}(1)
 entry:
   %0 = bitcast %struct.foo* %f to i16*
   %1 = load i16, i16* %0, align 2
diff --git a/llvm/test/CodeGen/X86/combine-sra-load.ll b/llvm/test/CodeGen/X86/combine-sra-load.ll
index 119acaa6a02b5..ba5814f0f160d 100644
--- a/llvm/test/CodeGen/X86/combine-sra-load.ll
+++ b/llvm/test/CodeGen/X86/combine-sra-load.ll
@@ -1,12 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK
 
-; FIXME: fold (sra (load i32), 16)) -> (sextload i16)
+; fold (sra (load i32), 16)) -> (sextload i16)
 define i32 @sra_half(i32* %p) {
 ; CHECK-LABEL: sra_half:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    sarl $16, %eax
+; CHECK-NEXT:    movswl 2(%rdi), %eax
 ; CHECK-NEXT:    retq
   %load = load i32, i32* %p
   %shift = ashr i32 %load, 16
@@ -25,12 +24,11 @@ define <4 x i32> @sra_half_vec(<4 x i32>* %p) {
   ret <4 x i32> %shift
 }
 
-; FIXME: fold (sra (load i64), 48)) -> (sextload i16)
+; fold (sra (load i64), 48)) -> (sextload i16)
 define i64 @sra_large_shift(i64* %r) {
 ; CHECK-LABEL: sra_large_shift:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq (%rdi), %rax
-; CHECK-NEXT:    sarq $48, %rax
+; CHECK-NEXT:    movswq 6(%rdi), %rax
 ; CHECK-NEXT:    retq
   %t0 = load i64, i64* %r
   %conv = ashr i64 %t0, 48
@@ -61,12 +59,11 @@ define i32 @sra_of_zextload(i16* %p) {
   ret i32 %shift
 }
 
-; FIXME: fold (sra (sextload i16 to i32), 8) -> (sextload i8)
+; fold (sra (sextload i16 to i32), 8) -> (sextload i8)
 define i32 @sra_of_sextload(i16* %p) {
 ; CHECK-LABEL: sra_of_sextload:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movswl (%rdi), %eax
-; CHECK-NEXT:    sarl $8, %eax
+; CHECK-NEXT:    movsbl 1(%rdi), %eax
 ; CHECK-NEXT:    retq
   %load = load i16, i16* %p
   %sext = sext i16 %load to i32
@@ -89,12 +86,11 @@ define i32 @sra_of_sextload_no_fold(i16* %p) {
   ret i32 %shift
 }
 
-; FIXME: Fold even if SRA has multiple uses.
+; Fold even if SRA has multiple uses.
 define i32 @sra_to_sextload_multiple_sra_uses(i32* %p) {
 ; CHECK-LABEL: sra_to_sextload_multiple_sra_uses:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %ecx
-; CHECK-NEXT:    sarl $16, %ecx
+; CHECK-NEXT:    movswl 2(%rdi), %ecx
 ; CHECK-NEXT:    movl %ecx, %eax
 ; CHECK-NEXT:    xorl $6, %eax
 ; CHECK-NEXT:    orl %ecx, %eax

From dbbe0109086d8e813fbabf3114da737086250ff9 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Tue, 25 Jan 2022 19:24:49 +0800
Subject: [PATCH 531/946] [MLIR] [AsyncToLLVM] Use llvm.coro.align intrinsic

Use llvm.coro.align to align coroutine frame properly.

Reviewed By: bkramer

Differential Revision: https://reviews.llvm.org/D117978
---
 mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td   |  4 +++
 .../Conversion/AsyncToLLVM/AsyncToLLVM.cpp    | 26 +++++++++++--------
 .../AsyncToLLVM/convert-coro-to-llvm.mlir     | 14 +++++-----
 .../test/Target/LLVMIR/llvmir-intrinsics.mlir |  9 +++++++
 4 files changed, 36 insertions(+), 17 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 58d8cc0ec7362..946e674fab66f 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -1542,6 +1542,10 @@ def LLVM_CoroSizeOp : LLVM_IntrOp<"coro.size", [0], [], [], 1> {
   let assemblyFormat = "attr-dict `:` type($res)";
 }
 
+def LLVM_CoroAlignOp : LLVM_IntrOp<"coro.align", [0], [], [], 1> {
+  let assemblyFormat = "attr-dict `:` type($res)";
+}
+
 def LLVM_CoroSaveOp : LLVM_IntrOp<"coro.save", [], [], [], 1> {
   let arguments = (ins LLVM_i8Ptr:$handle);
   let assemblyFormat = "$handle attr-dict `:` type($res)";
diff --git a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
index f0342f3660fe6..5e2bc50cc886f 100644
--- a/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
+++ b/mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp
@@ -335,27 +335,31 @@ class CoroBeginOpConversion : public OpConversionPattern<CoroBeginOp> {
     // Get coroutine frame size: @llvm.coro.size.i64.
     Value coroSize =
         rewriter.create<LLVM::CoroSizeOp>(loc, rewriter.getI64Type());
-    // The coroutine lowering doesn't properly account for alignment of the
-    // frame, so align everything to 64 bytes which ought to be enough for
-    // everyone. https://llvm.org/PR53148
-    constexpr int64_t coroAlign = 64;
+    // Get coroutine frame alignment: @llvm.coro.align.i64.
+    Value coroAlign =
+        rewriter.create<LLVM::CoroAlignOp>(loc, rewriter.getI64Type());
+
+    // Round up the size to be multiple of the alignment. Since aligned_alloc
+    // requires the size parameter be an integral multiple of the alignment
+    // parameter.
     auto makeConstant = [&](uint64_t c) {
       return rewriter.create<LLVM::ConstantOp>(
           op->getLoc(), rewriter.getI64Type(), rewriter.getI64IntegerAttr(c));
     };
-    // Round up the size to the alignment. This is a requirement of
-    // aligned_alloc.
-    coroSize = rewriter.create<LLVM::AddOp>(op->getLoc(), coroSize,
-                                            makeConstant(coroAlign - 1));
-    coroSize = rewriter.create<LLVM::AndOp>(op->getLoc(), coroSize,
-                                            makeConstant(-coroAlign));
+    coroSize = rewriter.create<LLVM::AddOp>(op->getLoc(), coroSize, coroAlign);
+    coroSize =
+        rewriter.create<LLVM::SubOp>(op->getLoc(), coroSize, makeConstant(1));
+    Value NegCoroAlign =
+        rewriter.create<LLVM::SubOp>(op->getLoc(), makeConstant(0), coroAlign);
+    coroSize =
+        rewriter.create<LLVM::AndOp>(op->getLoc(), coroSize, NegCoroAlign);
 
     // Allocate memory for the coroutine frame.
     auto allocFuncOp = LLVM::lookupOrCreateAlignedAllocFn(
         op->getParentOfType<ModuleOp>(), rewriter.getI64Type());
     auto coroAlloc = rewriter.create<LLVM::CallOp>(
         loc, i8Ptr, SymbolRefAttr::get(allocFuncOp),
-        ValueRange{makeConstant(coroAlign), coroSize});
+        ValueRange{coroAlign, coroSize});
 
     // Begin a coroutine: @llvm.coro.begin.
     auto coroId = CoroBeginOpAdaptor(adaptor.getOperands()).id();
diff --git a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
index 85623772249f9..f84c5960c4200 100644
--- a/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
+++ b/mlir/test/Conversion/AsyncToLLVM/convert-coro-to-llvm.mlir
@@ -14,12 +14,14 @@ func @coro_begin() {
   // CHECK: %[[ID:.*]] = llvm.intr.coro.id
   %0 = async.coro.id
   // CHECK: %[[SIZE:.*]] = llvm.intr.coro.size : i64
-  // CHECK: %[[C63:.*]] = llvm.mlir.constant(63 : i64) : i64
-  // CHECK: %[[SIZE2:.*]] = llvm.add %[[SIZE]], %[[C63]] : i64
-  // CHECK: %[[CN64:.*]] = llvm.mlir.constant(-64 : i64) : i64
-  // CHECK: %[[SIZE3:.*]] = llvm.and %[[SIZE2]], %[[CN64]] : i64
-  // CHECK: %[[ALIGN:.*]] = llvm.mlir.constant(64 : i64) : i64
-  // CHECK: %[[ALLOC:.*]] = llvm.call @aligned_alloc(%[[ALIGN]], %[[SIZE3]])
+  // CHECK: %[[ALIGN:.*]] = llvm.intr.coro.align : i64
+  // CHECK: %[[SIZE_PLUS_ALIGN:.*]] = llvm.add %[[SIZE]], %[[ALIGN]] : i64
+  // CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64
+  // CHECK: %[[SIZE_PLUS_ALIGN_MINUS_ONE:.*]] = llvm.sub %[[SIZE_PLUS_ALIGN]], %[[C1]] : i64
+  // CHECK: %[[C0:.*]] = llvm.mlir.constant(0 : i64) : i64
+  // CHECK: %[[NEGATED_ALIGN:.*]] = llvm.sub %[[C0]], %[[ALIGN]]  : i64
+  // CHECK: %[[ROUNDED_SIZE:.*]] = llvm.and %[[SIZE_PLUS_ALIGN_MINUS_ONE]], %[[NEGATED_ALIGN]] : i64
+  // CHECK: %[[ALLOC:.*]] = llvm.call @aligned_alloc(%[[ALIGN]], %[[ROUNDED_SIZE]])
   // CHECK: %[[HDL:.*]] = llvm.intr.coro.begin %[[ID]], %[[ALLOC]]
   %1 = async.coro.begin %0
   return
diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
index 752e9e961bcd1..f2f83d7425f39 100644
--- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
+++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir
@@ -446,6 +446,15 @@ llvm.func @coro_size() {
   llvm.return
 }
 
+// CHECK-LABEL: @coro_align
+llvm.func @coro_align() {
+  // CHECK: call i64 @llvm.coro.align.i64
+  %0 = llvm.intr.coro.align : i64
+  // CHECK: call i32 @llvm.coro.align.i32
+  %1 = llvm.intr.coro.align : i32
+  llvm.return
+}
+
 // CHECK-LABEL: @coro_save
 llvm.func @coro_save(%arg0: !llvm.ptr<i8>) {
   // CHECK: call token @llvm.coro.save

From a83e9266b96c4e5f92611af6f4d95fef6aeef12a Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Tue, 25 Jan 2022 12:33:42 +0100
Subject: [PATCH 532/946] [mlir][Bazel] Update BUILD.bazel file

---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 03e54e2726f19..dff9389cc766b 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -3161,6 +3161,7 @@ cc_library(
         ":Async",
         ":DLTIDialect",
         ":AffineUtils",
+        ":ExecutionEngineUtils",
         ":GPUDialect",
         ":GPUPassIncGen",
         ":MemRefDialect",
@@ -8184,8 +8185,8 @@ cc_library(
     hdrs = glob(["include/mlir/Tools/PDLL/AST/*.h"]),
     includes = ["include"],
     deps = [
+        ":Support",
         "//llvm:Support",
-        "//mlir:Support",
     ],
 )
 

From caff8591eff211c41d8de8505f89754d09ca6fa7 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 12:36:04 +0100
Subject: [PATCH 533/946] [OpenMP] Simplify pointer comparison

Rather than checking ptrdiff(a, b) != 0, directly check a != b.
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp        |  3 +--
 clang/test/OpenMP/declare_mapper_codegen.cpp | 24 ++++----------------
 2 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index c8c08060e729e..233656b90095b 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -10217,8 +10217,7 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
   llvm::Value *Cond;
   if (IsInit) {
     // base != begin?
-    llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateIsNotNull(
-        MapperCGF.Builder.CreatePtrDiff(Base, Begin));
+    llvm::Value *BaseIsBegin = MapperCGF.Builder.CreateICmpNE(Base, Begin);
     // IsPtrAndObj?
     llvm::Value *PtrAndObjBit = MapperCGF.Builder.CreateAnd(
         MapType,
diff --git a/clang/test/OpenMP/declare_mapper_codegen.cpp b/clang/test/OpenMP/declare_mapper_codegen.cpp
index 053aa9d2cc6df..e114a1315c79f 100644
--- a/clang/test/OpenMP/declare_mapper_codegen.cpp
+++ b/clang/test/OpenMP/declare_mapper_codegen.cpp
@@ -102,11 +102,7 @@ class C {
 // CK0-DAG: [[ISARRAY:%.+]] = icmp sgt i64 [[SIZE]], 1
 // CK0-DAG: [[PTRBEGIN:%.+]] = bitcast i8* [[BEGIN]] to %class.C*
 // CK0-DAG: [[PTREND:%.+]] = getelementptr %class.C, %class.C* [[PTRBEGIN]], i64 [[SIZE]]
-// CK0-DAG: [[BPTRI:%.+]] = ptrtoint i8* [[BPTR]] to i64
-// CK0-DAG: [[PTRI:%.+]] = ptrtoint i8* [[BEGIN]] to i64
-// CK0-DAG: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]]
-// CK0-DAG: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CK0-DAG: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0
+// CK0-DAG: [[PTRSNE:%.+]] = icmp ne i8* [[BPTR]], [[BEGIN]]
 // CK0-DAG: [[PTRANDOBJ:%.+]] = and i64 [[TYPE]], 16
 // CK0-DAG: [[ISPTRANDOBJ:%.+]] = icmp ne i64 [[PTRANDOBJ]], 0
 // CK0-DAG: [[CMPA:%.+]] = and i1 [[PTRSNE]], [[ISPTRANDOBJ]]
@@ -662,11 +658,7 @@ class C {
 // CK1-DAG: [[PTRBEGIN:%.+]] = bitcast i8* [[BEGIN]] to %class.C*
 // CK1-DAG: [[PTREND:%.+]] = getelementptr %class.C, %class.C* [[PTRBEGIN]], i64 [[SIZE]]
 // CK1-DAG: [[ISARRAY:%.+]] = icmp sgt i64 [[SIZE]], 1
-// CK1-DAG: [[BPTRI:%.+]] = ptrtoint i8* [[BPTR]] to i64
-// CK1-DAG: [[PTRI:%.+]] = ptrtoint i8* [[BEGIN]] to i64
-// CK1-DAG: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]]
-// CK1-DAG: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CK1-DAG: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0
+// CK1-DAG: [[PTRSNE:%.+]] = icmp ne i8* [[BPTR]], [[BEGIN]]
 // CK1-DAG: [[PTRANDOBJ:%.+]] = and i64 [[TYPE]], 16
 // CK1-DAG: [[ISPTRANDOBJ:%.+]] = icmp ne i64 [[PTRANDOBJ]], 0
 // CK1-DAG: [[CMPA:%.+]] = and i1 [[PTRSNE]], [[ISPTRANDOBJ]]
@@ -789,11 +781,7 @@ class C {
 // CK2-DAG: [[PTRBEGIN:%.+]] = bitcast i8* [[BEGIN]] to %class.C*
 // CK2-DAG: [[PTREND:%.+]] = getelementptr %class.C, %class.C* [[PTRBEGIN]], i64 [[SIZE]]
 // CK2-DAG: [[ISARRAY:%.+]] = icmp sgt i64 [[SIZE]], 1
-// CK2-DAG: [[BPTRI:%.+]] = ptrtoint i8* [[BPTR]] to i64
-// CK2-DAG: [[PTRI:%.+]] = ptrtoint i8* [[BEGIN]] to i64
-// CK2-DAG: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]]
-// CK2-DAG: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CK2-DAG: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0
+// CK2-DAG: [[PTRSNE:%.+]] = icmp ne i8* [[BPTR]], [[BEGIN]]
 // CK2-DAG: [[PTRANDOBJ:%.+]] = and i64 [[TYPE]], 16
 // CK2-DAG: [[ISPTRANDOBJ:%.+]] = icmp ne i64 [[PTRANDOBJ]], 0
 // CK2-DAG: [[CMPA:%.+]] = and i1 [[PTRSNE]], [[ISPTRANDOBJ]]
@@ -999,11 +987,7 @@ class C {
 // CK4-DAG: [[PTRBEGIN:%.+]] = bitcast i8* [[BEGIN]] to %class.C*
 // CK4-DAG: [[PTREND:%.+]] = getelementptr %class.C, %class.C* [[PTRBEGIN]], i64 [[SIZE]]
 // CK4-DAG: [[ISARRAY:%.+]] = icmp sgt i64 [[SIZE]], 1
-// CK4-DAG: [[BPTRI:%.+]] = ptrtoint i8* [[BPTR]] to i64
-// CK4-DAG: [[PTRI:%.+]] = ptrtoint i8* [[BEGIN]] to i64
-// CK4-DAG: [[DIF:%.+]] = sub i64 [[BPTRI]], [[PTRI]]
-// CK4-DAG: [[NORM:%.+]] = sdiv exact i64 [[DIF]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
-// CK4-DAG: [[PTRSNE:%.+]] = icmp ne i64 [[NORM]], 0
+// CK4-DAG: [[PTRSNE:%.+]] = icmp ne i8* [[BPTR]], [[BEGIN]]
 // CK4-DAG: [[PTRANDOBJ:%.+]] = and i64 [[TYPE]], 16
 // CK4-DAG: [[ISPTRANDOBJ:%.+]] = icmp ne i64 [[PTRANDOBJ]], 0
 // CK4-DAG: [[CMPA:%.+]] = and i1 [[PTRSNE]], [[ISPTRANDOBJ]]

From 91a0b464a853821734db8b1c521df03f8e2e56e7 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Tue, 25 Jan 2022 11:40:31 +0000
Subject: [PATCH 534/946] [OpenCL] Make read_write images optional for
 -fdeclare-opencl-builtins

Ensure any use of a `read_write` image is guarded behind the
`__opencl_c_read_write_images` feature macro.

Differential Revision: https://reviews.llvm.org/D117899
---
 clang/lib/Headers/opencl-c-base.h             |   1 +
 clang/lib/Sema/OpenCLBuiltins.td              | 164 +++++++++++-------
 .../SemaOpenCL/fdeclare-opencl-builtins.cl    |   1 +
 3 files changed, 108 insertions(+), 58 deletions(-)

diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
index 7485386c82346..4f63e7bbdcbba 100644
--- a/clang/lib/Headers/opencl-c-base.h
+++ b/clang/lib/Headers/opencl-c-base.h
@@ -68,6 +68,7 @@
 // For the SPIR and SPIR-V target all features are supported.
 #if defined(__SPIR__) || defined(__SPIRV__)
 #define __opencl_c_atomic_scope_all_devices 1
+#define __opencl_c_read_write_images 1
 #endif // defined(__SPIR__)
 #endif // (__OPENCL_CPP_VERSION__ == 202100 || __OPENCL_C_VERSION__ == 300)
 
diff --git a/clang/lib/Sema/OpenCLBuiltins.td b/clang/lib/Sema/OpenCLBuiltins.td
index 38debc5aa9fc5..df2f206041c10 100644
--- a/clang/lib/Sema/OpenCLBuiltins.td
+++ b/clang/lib/Sema/OpenCLBuiltins.td
@@ -80,11 +80,14 @@ def FuncExtKhrLocalInt32ExtendedAtomics  : FunctionExtension<"cl_khr_local_int32
 def FuncExtKhrInt64BaseAtomics           : FunctionExtension<"cl_khr_int64_base_atomics">;
 def FuncExtKhrInt64ExtendedAtomics       : FunctionExtension<"cl_khr_int64_extended_atomics">;
 def FuncExtKhrMipmapImage                : FunctionExtension<"cl_khr_mipmap_image">;
+def FuncExtKhrMipmapImageReadWrite       : FunctionExtension<"cl_khr_mipmap_image __opencl_c_read_write_images">;
 def FuncExtKhrMipmapImageWrites          : FunctionExtension<"cl_khr_mipmap_image_writes">;
 def FuncExtKhrGlMsaaSharing              : FunctionExtension<"cl_khr_gl_msaa_sharing">;
+def FuncExtKhrGlMsaaSharingReadWrite     : FunctionExtension<"cl_khr_gl_msaa_sharing __opencl_c_read_write_images">;
 
 def FuncExtOpenCLCPipes                  : FunctionExtension<"__opencl_c_pipes">;
 def FuncExtOpenCLCWGCollectiveFunctions  : FunctionExtension<"__opencl_c_work_group_collective_functions">;
+def FuncExtOpenCLCReadWriteImages        : FunctionExtension<"__opencl_c_read_write_images">;
 def FuncExtFloatAtomicsFp16GlobalLoadStore  : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_global_atomic_load_store">;
 def FuncExtFloatAtomicsFp16LocalLoadStore   : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_local_atomic_load_store">;
 def FuncExtFloatAtomicsFp16GenericLoadStore : FunctionExtension<"cl_ext_float_atomics __opencl_c_ext_fp16_global_atomic_load_store __opencl_c_ext_fp16_local_atomic_load_store">;
@@ -1390,30 +1393,35 @@ foreach coordTy = [Int, Float] in {
 }
 
 // --- Table 23: Sampler-less Read Functions ---
+multiclass ImageReadSamplerless<string aQual> {
+  foreach imgTy = [Image2d, Image1dArray] in {
+    def : Builtin<"read_imagef", [VectorType<Float, 4>, ImageType<imgTy, aQual>, VectorType<Int, 2>], Attr.Pure>;
+    def : Builtin<"read_imagei", [VectorType<Int, 4>, ImageType<imgTy, aQual>, VectorType<Int, 2>], Attr.Pure>;
+    def : Builtin<"read_imageui", [VectorType<UInt, 4>, ImageType<imgTy, aQual>, VectorType<Int, 2>], Attr.Pure>;
+  }
+  foreach imgTy = [Image3d, Image2dArray] in {
+    def : Builtin<"read_imagef", [VectorType<Float, 4>, ImageType<imgTy, aQual>, VectorType<Int, 4>], Attr.Pure>;
+    def : Builtin<"read_imagei", [VectorType<Int, 4>, ImageType<imgTy, aQual>, VectorType<Int, 4>], Attr.Pure>;
+    def : Builtin<"read_imageui", [VectorType<UInt, 4>, ImageType<imgTy, aQual>, VectorType<Int, 4>], Attr.Pure>;
+  }
+  foreach imgTy = [Image1d, Image1dBuffer] in {
+    def : Builtin<"read_imagef", [VectorType<Float, 4>, ImageType<imgTy, aQual>, Int], Attr.Pure>;
+    def : Builtin<"read_imagei", [VectorType<Int, 4>, ImageType<imgTy, aQual>, Int], Attr.Pure>;
+    def : Builtin<"read_imageui", [VectorType<UInt, 4>, ImageType<imgTy, aQual>, Int], Attr.Pure>;
+  }
+  def : Builtin<"read_imagef", [Float, ImageType<Image2dDepth, aQual>, VectorType<Int, 2>], Attr.Pure>;
+  def : Builtin<"read_imagef", [Float, ImageType<Image2dArrayDepth, aQual>, VectorType<Int, 4>], Attr.Pure>;
+}
+
 let MinVersion = CL12 in {
-  foreach aQual = ["RO", "RW"] in {
-    foreach imgTy = [Image2d, Image1dArray] in {
-      def : Builtin<"read_imagef", [VectorType<Float, 4>, ImageType<imgTy, aQual>, VectorType<Int, 2>], Attr.Pure>;
-      def : Builtin<"read_imagei", [VectorType<Int, 4>, ImageType<imgTy, aQual>, VectorType<Int, 2>], Attr.Pure>;
-      def : Builtin<"read_imageui", [VectorType<UInt, 4>, ImageType<imgTy, aQual>, VectorType<Int, 2>], Attr.Pure>;
-    }
-    foreach imgTy = [Image3d, Image2dArray] in {
-      def : Builtin<"read_imagef", [VectorType<Float, 4>, ImageType<imgTy, aQual>, VectorType<Int, 4>], Attr.Pure>;
-      def : Builtin<"read_imagei", [VectorType<Int, 4>, ImageType<imgTy, aQual>, VectorType<Int, 4>], Attr.Pure>;
-      def : Builtin<"read_imageui", [VectorType<UInt, 4>, ImageType<imgTy, aQual>, VectorType<Int, 4>], Attr.Pure>;
-    }
-    foreach imgTy = [Image1d, Image1dBuffer] in {
-      def : Builtin<"read_imagef", [VectorType<Float, 4>, ImageType<imgTy, aQual>, Int], Attr.Pure>;
-      def : Builtin<"read_imagei", [VectorType<Int, 4>, ImageType<imgTy, aQual>, Int], Attr.Pure>;
-      def : Builtin<"read_imageui", [VectorType<UInt, 4>, ImageType<imgTy, aQual>, Int], Attr.Pure>;
-    }
-    def : Builtin<"read_imagef", [Float, ImageType<Image2dDepth, aQual>, VectorType<Int, 2>], Attr.Pure>;
-    def : Builtin<"read_imagef", [Float, ImageType<Image2dArrayDepth, aQual>, VectorType<Int, 4>], Attr.Pure>;
+  defm : ImageReadSamplerless<"RO">;
+  let Extension = FuncExtOpenCLCReadWriteImages in {
+    defm : ImageReadSamplerless<"RW">;
   }
 }
 
 // --- Table 24: Image Write Functions ---
-foreach aQual = ["WO", "RW"] in {
+multiclass ImageWrite<string aQual> {
   foreach imgTy = [Image2d] in {
     def : Builtin<"write_imagef", [Void, ImageType<imgTy, aQual>, VectorType<Int, 2>, VectorType<Float, 4>]>;
     def : Builtin<"write_imagei", [Void, ImageType<imgTy, aQual>, VectorType<Int, 2>, VectorType<Int, 4>]>;
@@ -1443,8 +1451,13 @@ foreach aQual = ["WO", "RW"] in {
   def : Builtin<"write_imagef", [Void, ImageType<Image2dArrayDepth, aQual>, VectorType<Int, 4>, Float]>;
 }
 
+defm : ImageWrite<"WO">;
+let Extension = FuncExtOpenCLCReadWriteImages in {
+  defm : ImageWrite<"RW">;
+}
+
 // --- Table 25: Image Query Functions ---
-foreach aQual = ["RO", "WO", "RW"] in {
+multiclass ImageQuery<string aQual> {
   foreach imgTy = [Image1d, Image1dBuffer, Image2d, Image3d,
                    Image1dArray, Image2dArray, Image2dDepth,
                    Image2dArrayDepth] in {
@@ -1468,6 +1481,12 @@ foreach aQual = ["RO", "WO", "RW"] in {
   }
 }
 
+defm : ImageQuery<"RO">;
+defm : ImageQuery<"WO">;
+let Extension = FuncExtOpenCLCReadWriteImages in {
+  defm : ImageQuery<"RW">;
+}
+
 // OpenCL extension v2.0 s5.1.9: Built-in Image Read Functions
 // --- Table 8 ---
 foreach aQual = ["RO"] in {
@@ -1488,7 +1507,7 @@ foreach aQual = ["RO"] in {
 // OpenCL extension v2.0 s5.1.10: Built-in Image Sampler-less Read Functions
 // --- Table 9 ---
 let MinVersion = CL12 in {
-  foreach aQual = ["RO", "RW"] in {
+  multiclass ImageReadHalf<string aQual> {
     foreach name = ["read_imageh"] in {
       foreach imgTy = [Image2d, Image1dArray] in {
         def : Builtin<name, [VectorType<Half, 4>, ImageType<imgTy, aQual>, VectorType<Int, 2>], Attr.Pure>;
@@ -1501,10 +1520,14 @@ let MinVersion = CL12 in {
       }
     }
   }
+  defm : ImageReadHalf<"RO">;
+  let Extension = FuncExtOpenCLCReadWriteImages in {
+    defm : ImageReadHalf<"RW">;
+  }
 }
 // OpenCL extension v2.0 s5.1.11: Built-in Image Write Functions
 // --- Table 10 ---
-foreach aQual = ["WO", "RW"] in {
+multiclass ImageWriteHalf<string aQual> {
   foreach name = ["write_imageh"] in {
     def : Builtin<name, [Void, ImageType<Image2d, aQual>, VectorType<Int, 2>, VectorType<Half, 4>]>;
     def : Builtin<name, [Void, ImageType<Image2dArray, aQual>, VectorType<Int, 4>, VectorType<Half, 4>]>;
@@ -1515,6 +1538,12 @@ foreach aQual = ["WO", "RW"] in {
   }
 }
 
+defm : ImageWriteHalf<"WO">;
+let Extension = FuncExtOpenCLCReadWriteImages in {
+  defm : ImageWriteHalf<"RW">;
+}
+
+
 
 //--------------------------------------------------------------------
 // OpenCL v2.0 s6.13.15 - Work-group Functions
@@ -1688,14 +1717,24 @@ let Extension = FuncExtKhrMipmapImage in {
       }
     }
   }
-  // Added to section 6.13.14.5
-  foreach aQual = ["RO", "WO", "RW"] in {
-    foreach imgTy = [Image1d, Image2d, Image3d, Image1dArray, Image2dArray, Image2dDepth, Image2dArrayDepth] in {
-      def : Builtin<"get_image_num_mip_levels", [Int, ImageType<imgTy, aQual>]>;
-    }
+}
+
+// Added to section 6.13.14.5
+multiclass ImageQueryNumMipLevels<string aQual> {
+  foreach imgTy = [Image1d, Image2d, Image3d, Image1dArray, Image2dArray, Image2dDepth, Image2dArrayDepth] in {
+    def : Builtin<"get_image_num_mip_levels", [Int, ImageType<imgTy, aQual>]>;
   }
 }
 
+let Extension = FuncExtKhrMipmapImage in {
+  defm : ImageQueryNumMipLevels<"RO">;
+  defm : ImageQueryNumMipLevels<"WO">;
+}
+
+let Extension = FuncExtKhrMipmapImageReadWrite in {
+  defm : ImageQueryNumMipLevels<"RW">;
+}
+
 // Write functions are enabled using a separate extension.
 let Extension = FuncExtKhrMipmapImageWrites in {
   // Added to section 6.13.14.4.
@@ -1734,39 +1773,48 @@ let Extension = FuncExtKhrMipmapImageWrites in {
 
 //--------------------------------------------------------------------
 // OpenCL Extension v2.0 s18.3 - Creating OpenCL Memory Objects from OpenGL MSAA Textures
-let Extension = FuncExtKhrGlMsaaSharing in {
-  // --- Table 6.13.14.3 ---
-  foreach aQual = ["RO", "RW"] in {
-    foreach imgTy = [Image2dMsaa] in {
-      def : Builtin<"read_imagef", [VectorType<Float, 4>, ImageType<imgTy, aQual>, VectorType<Int, 2>, Int], Attr.Pure>;
-      def : Builtin<"read_imagei", [VectorType<Int, 4>, ImageType<imgTy, aQual>, VectorType<Int, 2>, Int], Attr.Pure>;
-      def : Builtin<"read_imageui", [VectorType<UInt, 4>, ImageType<imgTy, aQual>, VectorType<Int, 2>, Int], Attr.Pure>;
-    }
-    foreach imgTy = [Image2dArrayMsaa] in {
-      def : Builtin<"read_imagef", [VectorType<Float, 4>, ImageType<imgTy, aQual>, VectorType<Int, 4>, Int], Attr.Pure>;
-      def : Builtin<"read_imagei", [VectorType<Int, 4>, ImageType<imgTy, aQual>, VectorType<Int, 4>, Int], Attr.Pure>;
-      def : Builtin<"read_imageui", [VectorType<UInt, 4>, ImageType<imgTy, aQual>, VectorType<Int, 4>, Int], Attr.Pure>;
-    }
-    foreach name = ["read_imagef"] in {
-      def : Builtin<name, [Float, ImageType<Image2dMsaaDepth, aQual>, VectorType<Int, 2>, Int], Attr.Pure>;
-      def : Builtin<name, [Float, ImageType<Image2dArrayMsaaDepth, aQual>, VectorType<Int, 4>, Int], Attr.Pure>;
-    }
-  }
-
-  // --- Table 6.13.14.5 ---
-  foreach aQual = ["RO", "WO", "RW"] in {
-    foreach imgTy = [Image2dMsaa, Image2dArrayMsaa, Image2dMsaaDepth, Image2dArrayMsaaDepth] in {
-      foreach name = ["get_image_width", "get_image_height",
-                      "get_image_channel_data_type", "get_image_channel_order",
-                      "get_image_num_samples"] in {
-        def : Builtin<name, [Int, ImageType<imgTy, aQual>], Attr.Const>;
-      }
-      def : Builtin<"get_image_dim", [VectorType<Int, 2>, ImageType<imgTy, aQual>], Attr.Const>;
-    }
-    foreach imgTy = [Image2dArrayMsaa, Image2dArrayMsaaDepth] in {
-      def : Builtin<"get_image_array_size", [Size, ImageType<imgTy, aQual>], Attr.Const>;
+// --- Table 6.13.14.3 ---
+multiclass ImageReadMsaa<string aQual> {
+  foreach imgTy = [Image2dMsaa] in {
+    def : Builtin<"read_imagef", [VectorType<Float, 4>, ImageType<imgTy, aQual>, VectorType<Int, 2>, Int], Attr.Pure>;
+    def : Builtin<"read_imagei", [VectorType<Int, 4>, ImageType<imgTy, aQual>, VectorType<Int, 2>, Int], Attr.Pure>;
+    def : Builtin<"read_imageui", [VectorType<UInt, 4>, ImageType<imgTy, aQual>, VectorType<Int, 2>, Int], Attr.Pure>;
+  }
+  foreach imgTy = [Image2dArrayMsaa] in {
+    def : Builtin<"read_imagef", [VectorType<Float, 4>, ImageType<imgTy, aQual>, VectorType<Int, 4>, Int], Attr.Pure>;
+    def : Builtin<"read_imagei", [VectorType<Int, 4>, ImageType<imgTy, aQual>, VectorType<Int, 4>, Int], Attr.Pure>;
+    def : Builtin<"read_imageui", [VectorType<UInt, 4>, ImageType<imgTy, aQual>, VectorType<Int, 4>, Int], Attr.Pure>;
+  }
+  foreach name = ["read_imagef"] in {
+    def : Builtin<name, [Float, ImageType<Image2dMsaaDepth, aQual>, VectorType<Int, 2>, Int], Attr.Pure>;
+    def : Builtin<name, [Float, ImageType<Image2dArrayMsaaDepth, aQual>, VectorType<Int, 4>, Int], Attr.Pure>;
+  }
+}
+
+// --- Table 6.13.14.5 ---
+multiclass ImageQueryMsaa<string aQual> {
+  foreach imgTy = [Image2dMsaa, Image2dArrayMsaa, Image2dMsaaDepth, Image2dArrayMsaaDepth] in {
+    foreach name = ["get_image_width", "get_image_height",
+                    "get_image_channel_data_type", "get_image_channel_order",
+                    "get_image_num_samples"] in {
+      def : Builtin<name, [Int, ImageType<imgTy, aQual>], Attr.Const>;
     }
+    def : Builtin<"get_image_dim", [VectorType<Int, 2>, ImageType<imgTy, aQual>], Attr.Const>;
   }
+  foreach imgTy = [Image2dArrayMsaa, Image2dArrayMsaaDepth] in {
+    def : Builtin<"get_image_array_size", [Size, ImageType<imgTy, aQual>], Attr.Const>;
+  }
+}
+
+let Extension = FuncExtKhrGlMsaaSharing in {
+  defm : ImageReadMsaa<"RO">;
+  defm : ImageQueryMsaa<"RO">;
+  defm : ImageQueryMsaa<"WO">;
+}
+
+let Extension = FuncExtKhrGlMsaaSharingReadWrite in {
+  defm : ImageReadMsaa<"RW">;
+  defm : ImageQueryMsaa<"RW">;
 }
 
 //--------------------------------------------------------------------
diff --git a/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl
index 5da38c121e393..be6e53a07bdf3 100644
--- a/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl
+++ b/clang/test/SemaOpenCL/fdeclare-opencl-builtins.cl
@@ -74,6 +74,7 @@ typedef struct {int a;} ndrange_t;
 #define cl_khr_subgroup_ballot 1
 #define cl_khr_subgroup_non_uniform_arithmetic 1
 #define cl_khr_subgroup_clustered_reduce 1
+#define __opencl_c_read_write_images 1
 #endif
 #endif
 

From 30d4a7e2955356c69ae412bfe2de46b92a2202c1 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 12:23:28 +0100
Subject: [PATCH 535/946] [IRBuilder] Require explicit element type in
 CreatePtrDiff()

For opaque pointer compatibility, we cannot derive the element
type from the pointer type.
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp          | 12 +++++++-----
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp       |  5 +++--
 llvm/include/llvm/IR/IRBuilder.h               |  3 ++-
 llvm/lib/IR/Core.cpp                           |  4 +++-
 llvm/lib/IR/IRBuilder.cpp                      |  8 +++++---
 llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp |  3 ++-
 6 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 233656b90095b..db1c3ca191ca2 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -840,7 +840,8 @@ void ReductionCodeGen::emitAggregateType(CodeGenFunction &CGF, unsigned N) {
   auto *ElemType = OrigAddresses[N].first.getAddress(CGF).getElementType();
   auto *ElemSizeOf = llvm::ConstantExpr::getSizeOf(ElemType);
   if (AsArraySection) {
-    Size = CGF.Builder.CreatePtrDiff(OrigAddresses[N].second.getPointer(CGF),
+    Size = CGF.Builder.CreatePtrDiff(ElemType,
+                                     OrigAddresses[N].second.getPointer(CGF),
                                      OrigAddresses[N].first.getPointer(CGF));
     Size = CGF.Builder.CreateNUWAdd(
         Size, llvm::ConstantInt::get(Size->getType(), /*V=*/1));
@@ -1006,7 +1007,8 @@ Address ReductionCodeGen::adjustPrivateAddress(CodeGenFunction &CGF, unsigned N,
                     OriginalBaseLValue);
     Address SharedAddr = SharedAddresses[N].first.getAddress(CGF);
     llvm::Value *Adjustment = CGF.Builder.CreatePtrDiff(
-        BaseLValue.getPointer(CGF), SharedAddr.getPointer());
+        SharedAddr.getElementType(), BaseLValue.getPointer(CGF),
+        SharedAddr.getPointer());
     llvm::Value *PrivatePointer =
         CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
             PrivateAddr.getPointer(), SharedAddr.getType());
@@ -8119,7 +8121,7 @@ class MappableExprsHandler {
                           .getAddress(CGF);
                 }
                 Size = CGF.Builder.CreatePtrDiff(
-                    CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
+                    CGF.Int8Ty, CGF.EmitCastToVoidPtr(ComponentLB.getPointer()),
                     CGF.EmitCastToVoidPtr(LB.getPointer()));
                 break;
               }
@@ -8140,7 +8142,7 @@ class MappableExprsHandler {
           CombinedInfo.BasePointers.push_back(BP.getPointer());
           CombinedInfo.Pointers.push_back(LB.getPointer());
           Size = CGF.Builder.CreatePtrDiff(
-              CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
+              CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).getPointer(),
               CGF.EmitCastToVoidPtr(LB.getPointer()));
           CombinedInfo.Sizes.push_back(
               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
@@ -8966,7 +8968,7 @@ class MappableExprsHandler {
         CGF.Builder.CreateConstGEP1_32(HBAddr.getElementType(), HB, /*Idx0=*/1);
     llvm::Value *CLAddr = CGF.Builder.CreatePointerCast(LB, CGF.VoidPtrTy);
     llvm::Value *CHAddr = CGF.Builder.CreatePointerCast(HAddr, CGF.VoidPtrTy);
-    llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
+    llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, CHAddr, CLAddr);
     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
                                                   /*isSigned=*/false);
     CombinedInfo.Sizes.push_back(Size);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 7c8e4e6b52a08..e09ea5e01b1a9 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -1798,8 +1798,9 @@ static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
       Ptr = Address(PhiSrc, Ptr.getAlignment());
       ElemPtr = Address(PhiDest, ElemPtr.getAlignment());
       llvm::Value *PtrDiff = Bld.CreatePtrDiff(
-          PtrEnd.getPointer(), Bld.CreatePointerBitCastOrAddrSpaceCast(
-                                   Ptr.getPointer(), CGF.VoidPtrTy));
+          CGF.Int8Ty, PtrEnd.getPointer(),
+          Bld.CreatePointerBitCastOrAddrSpaceCast(Ptr.getPointer(),
+                                                  CGF.VoidPtrTy));
       Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)),
                        ThenBB, ExitBB);
       CGF.EmitBlock(ThenBB);
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 32c978f02a4d2..53f517480ca1c 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -2413,7 +2413,8 @@ class IRBuilderBase {
   /// This is intended to implement C-style pointer subtraction. As such, the
   /// pointers must be appropriately aligned for their element types and
   /// pointing into the same object.
-  Value *CreatePtrDiff(Value *LHS, Value *RHS, const Twine &Name = "");
+  Value *CreatePtrDiff(Type *ElemTy, Value *LHS, Value *RHS,
+                       const Twine &Name = "");
 
   /// Create a launder.invariant.group intrinsic call. If Ptr type is
   /// different from pointer to i8, it's casted to pointer to i8 in the same
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 47ee4cbbf5829..988d2affd9654 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -4019,7 +4019,9 @@ LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef B, LLVMValueRef Val,
 
 LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
                               LLVMValueRef RHS, const char *Name) {
-  return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name));
+  Value *L = unwrap(LHS);
+  Type *ElemTy = L->getType()->getPointerElementType();
+  return wrap(unwrap(B)->CreatePtrDiff(ElemTy, L, unwrap(RHS), Name));
 }
 
 LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op,
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index b91885bcfac47..27528a69be219 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -998,15 +998,17 @@ Value *IRBuilderBase::CreateSelect(Value *C, Value *True, Value *False,
   return Insert(Sel, Name);
 }
 
-Value *IRBuilderBase::CreatePtrDiff(Value *LHS, Value *RHS,
+Value *IRBuilderBase::CreatePtrDiff(Type *ElemTy, Value *LHS, Value *RHS,
                                     const Twine &Name) {
   assert(LHS->getType() == RHS->getType() &&
          "Pointer subtraction operand types must match!");
-  auto *ArgElemType = LHS->getType()->getPointerElementType();
+  assert(cast<PointerType>(LHS->getType())
+             ->isOpaqueOrPointeeTypeMatches(ElemTy) &&
+         "Pointer type must match element type");
   Value *LHS_int = CreatePtrToInt(LHS, Type::getInt64Ty(Context));
   Value *RHS_int = CreatePtrToInt(RHS, Type::getInt64Ty(Context));
   Value *Difference = CreateSub(LHS_int, RHS_int);
-  return CreateExactSDiv(Difference, ConstantExpr::getSizeOf(ArgElemType),
+  return CreateExactSDiv(Difference, ConstantExpr::getSizeOf(ElemTy),
                          Name);
 }
 
diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 3122fcd41c8ac..123fb0cfd1cbb 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2516,7 +2516,8 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
       // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
       // Handle mismatched pointer types (goes away with typeless pointers?).
       V = B.CreatePointerCast(V, Dest->getType());
-      Value *PtrDiff = B.CreatePtrDiff(V, Dest);
+      Value *PtrDiff = B.CreatePtrDiff(
+          Dest->getType()->getPointerElementType(), V, Dest);
       return B.CreateIntCast(PtrDiff, CI->getType(), false);
     }
 

From d8962b4139a58ba6955cd4e15dc432c674a013c0 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 12:47:50 +0100
Subject: [PATCH 536/946] [llvm-c] Deprecate LLVMBuildPtrDiff()

In favor of LLVMBuildPtrDiff2(), which accepts an explicit element
type and is compatible with opaque pointers.
---
 llvm/include/llvm-c/Core.h | 9 +++++++--
 llvm/lib/IR/Core.cpp       | 9 ++++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h
index ae2bcb8444b4b..ca3ca24487a51 100644
--- a/llvm/include/llvm-c/Core.h
+++ b/llvm/include/llvm-c/Core.h
@@ -4020,8 +4020,13 @@ LLVMValueRef LLVMBuildIsNull(LLVMBuilderRef, LLVMValueRef Val,
                              const char *Name);
 LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef, LLVMValueRef Val,
                                 const char *Name);
-LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS,
-                              LLVMValueRef RHS, const char *Name);
+LLVM_ATTRIBUTE_C_DEPRECATED(
+    LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef, LLVMValueRef LHS,
+                                  LLVMValueRef RHS, const char *Name),
+    "Use LLVMBuildPtrDiff2 instead to support opaque pointers");
+LLVMValueRef LLVMBuildPtrDiff2(LLVMBuilderRef, LLVMTypeRef ElemTy,
+                               LLVMValueRef LHS, LLVMValueRef RHS,
+                               const char *Name);
 LLVMValueRef LLVMBuildFence(LLVMBuilderRef B, LLVMAtomicOrdering ordering,
                             LLVMBool singleThread, const char *Name);
 LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B, LLVMAtomicRMWBinOp op,
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index 988d2affd9654..43df15e4d9328 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -4020,10 +4020,17 @@ LLVMValueRef LLVMBuildIsNotNull(LLVMBuilderRef B, LLVMValueRef Val,
 LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS,
                               LLVMValueRef RHS, const char *Name) {
   Value *L = unwrap(LHS);
-  Type *ElemTy = L->getType()->getPointerElementType();
+  Type *ElemTy = L->getType()->getNonOpaquePointerElementType();
   return wrap(unwrap(B)->CreatePtrDiff(ElemTy, L, unwrap(RHS), Name));
 }
 
+LLVMValueRef LLVMBuildPtrDiff2(LLVMBuilderRef B, LLVMTypeRef ElemTy,
+                               LLVMValueRef LHS, LLVMValueRef RHS,
+                               const char *Name) {
+  return wrap(unwrap(B)->CreatePtrDiff(unwrap(ElemTy), unwrap(LHS),
+                                       unwrap(RHS), Name));
+}
+
 LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op,
                                LLVMValueRef PTR, LLVMValueRef Val,
                                LLVMAtomicOrdering ordering,

From 157f9b68a3722101c3c2e40926dadc4aeb3e4a39 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 25 Jan 2022 11:41:15 +0000
Subject: [PATCH 537/946] [X86] combineVectorSignBitsTruncation - fix
 indentation. NFC.

---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c68774a838a25..b25a170a683ab 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -48989,7 +48989,7 @@ static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL,
     // originally concatenated from subvectors.
     SmallVector<SDValue> ConcatOps;
     if (VT.getSizeInBits() > 128 || !collectConcatOps(In.getNode(), ConcatOps))
-    return SDValue();
+      return SDValue();
   }
 
   unsigned NumPackedSignBits = std::min<unsigned>(SVT.getSizeInBits(), 16);

From 15e2be291f7fc8a694f796dd494e4f14d7f5a982 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 25 Jan 2022 11:54:11 +0000
Subject: [PATCH 538/946] [DAG] visitMULHS/MULHU/AND - remove some redundant
 LHS constant checks

Now that we constant fold and canonicalize constants to the RHS, we don't need to check both LHS and RHS for specific constants
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c3d2ed2dcf85c..082e2508aa4ba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4470,15 +4470,15 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
       return FoldedVOp;
 
     // fold (mulhs x, 0) -> 0
-    // do not return N0/N1, because undef node may exist.
-    if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
-        ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+    // do not return N1, because undef node may exist.
+    if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
       return DAG.getConstant(0, DL, VT);
   }
 
   // fold (mulhs x, 0) -> 0
   if (isNullConstant(N1))
     return N1;
+
   // fold (mulhs x, 1) -> (sra x, size(x)-1)
   if (isOneConstant(N1))
     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
@@ -4530,18 +4530,19 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
       return FoldedVOp;
 
     // fold (mulhu x, 0) -> 0
-    // do not return N0/N1, because undef node may exist.
-    if (ISD::isConstantSplatVectorAllZeros(N0.getNode()) ||
-        ISD::isConstantSplatVectorAllZeros(N1.getNode()))
+    // do not return N1, because undef node may exist.
+    if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
       return DAG.getConstant(0, DL, VT);
   }
 
   // fold (mulhu x, 0) -> 0
   if (isNullConstant(N1))
     return N1;
+
   // fold (mulhu x, 1) -> 0
   if (isOneConstant(N1))
     return DAG.getConstant(0, DL, N0.getValueType());
+
   // fold (mulhu x, undef) -> 0
   if (N0.isUndef() || N1.isUndef())
     return DAG.getConstant(0, DL, VT);
@@ -5815,18 +5816,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
       return FoldedVOp;
 
     // fold (and x, 0) -> 0, vector edition
-    if (ISD::isConstantSplatVectorAllZeros(N0.getNode()))
-      // do not return N0, because undef node may exist in N0
-      return DAG.getConstant(APInt::getZero(N0.getScalarValueSizeInBits()),
-                             SDLoc(N), N0.getValueType());
     if (ISD::isConstantSplatVectorAllZeros(N1.getNode()))
       // do not return N1, because undef node may exist in N1
       return DAG.getConstant(APInt::getZero(N1.getScalarValueSizeInBits()),
                              SDLoc(N), N1.getValueType());
 
     // fold (and x, -1) -> x, vector edition
-    if (ISD::isConstantSplatVectorAllOnes(N0.getNode()))
-      return N1;
     if (ISD::isConstantSplatVectorAllOnes(N1.getNode()))
       return N0;
 

From 3e2ae92d3f062f47b7cc8103e9a6c15b815d9018 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 12:55:02 +0100
Subject: [PATCH 539/946] [SCEV] Remove an unnecessary GEP type check

The code already checked that the addrec step size and type alloc
size are the same. The actual pointer element type is irrelevant
here.
---
 llvm/lib/Analysis/ScalarEvolution.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 4f2123b4c5fab..07aac1523b478 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -7596,10 +7596,6 @@ const SCEV *ScalarEvolution::getConstantMaxTripCountFromArray(const Loop *L) {
       auto *ArrSize = dyn_cast<ConstantInt>(AllocateInst->getArraySize());
       if (!Ty || !ArrSize || !ArrSize->isOne())
         continue;
-      // Also make sure step was increased the same with sizeof allocated
-      // element type.
-      if (Ty->getElementType() != GEP->getType()->getPointerElementType())
-        continue;
 
       // FIXME: Since gep indices are silently zext to the indexing type,
       // we will have a narrow gep index which wraps around rather than

From 4f4d071c909ef142c858a7e938aca9a99173cabe Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 13:07:36 +0100
Subject: [PATCH 540/946] [ObjCArcOpts] Regenerate test checks (NFC)

---
 llvm/test/Transforms/ObjCARC/weak.ll | 43 +++++++++++++++-------------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/llvm/test/Transforms/ObjCARC/weak.ll b/llvm/test/Transforms/ObjCARC/weak.ll
index caaeba7280e80..2009addbbdc56 100644
--- a/llvm/test/Transforms/ObjCARC/weak.ll
+++ b/llvm/test/Transforms/ObjCARC/weak.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -objc-arc -S < %s | FileCheck %s
 
 declare i8* @llvm.objc.initWeak(i8**, i8*)
@@ -10,27 +11,29 @@ declare void @llvm.objc.copyWeak(i8**, i8**)
 
 ; If the pointer-to-weak-pointer is null, it's undefined behavior.
 
-; CHECK-LABEL: define void @test0(
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: store i8* undef, i8** null
-; CHECK: ret void
 define void @test0(i8* %p, i8** %q) {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    ret void
+;
 entry:
   call i8* @llvm.objc.storeWeak(i8** null, i8* %p)
   call i8* @llvm.objc.storeWeak(i8** undef, i8* %p)

From 78e1f70220a57bc7a7554d4240c5f15810959d7c Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 13:05:14 +0100
Subject: [PATCH 541/946] [ObjCARCOpts] Use standard non-terminator unreachable
 pattern

This is what CreateNonTerminatorUnreachable() in InstCombine uses.
Specific choice here doesn't really matter, but we should pick
one that is pointer element type independent.
---
 llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 10 +++---
 llvm/test/Transforms/ObjCARC/weak.ll        | 36 ++++++++++-----------
 2 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index caad91867112c..b6dc97f1e43f8 100644
--- a/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -979,9 +979,8 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(
     CallInst *CI = cast<CallInst>(Inst);
     if (IsNullOrUndef(CI->getArgOperand(0))) {
       Changed = true;
-      Type *Ty = CI->getArgOperand(0)->getType();
-      new StoreInst(UndefValue::get(Ty->getPointerElementType()),
-                    Constant::getNullValue(Ty), CI);
+      new StoreInst(ConstantInt::getTrue(CI->getContext()),
+                    UndefValue::get(Type::getInt1PtrTy(CI->getContext())), CI);
       Value *NewValue = UndefValue::get(CI->getType());
       LLVM_DEBUG(
           dbgs() << "A null pointer-to-weak-pointer is undefined behavior."
@@ -999,9 +998,8 @@ void ObjCARCOpt::OptimizeIndividualCallImpl(
     if (IsNullOrUndef(CI->getArgOperand(0)) ||
         IsNullOrUndef(CI->getArgOperand(1))) {
       Changed = true;
-      Type *Ty = CI->getArgOperand(0)->getType();
-      new StoreInst(UndefValue::get(Ty->getPointerElementType()),
-                    Constant::getNullValue(Ty), CI);
+      new StoreInst(ConstantInt::getTrue(CI->getContext()),
+                    UndefValue::get(Type::getInt1PtrTy(CI->getContext())), CI);
 
       Value *NewValue = UndefValue::get(CI->getType());
       LLVM_DEBUG(
diff --git a/llvm/test/Transforms/ObjCARC/weak.ll b/llvm/test/Transforms/ObjCARC/weak.ll
index 2009addbbdc56..00a4e9f681e7d 100644
--- a/llvm/test/Transforms/ObjCARC/weak.ll
+++ b/llvm/test/Transforms/ObjCARC/weak.ll
@@ -14,24 +14,24 @@ declare void @llvm.objc.copyWeak(i8**, i8**)
 define void @test0(i8* %p, i8** %q) {
 ; CHECK-LABEL: @test0(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
-; CHECK-NEXT:    store i8* undef, i8** null, align 8
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
+; CHECK-NEXT:    store i1 true, i1* undef, align 1
 ; CHECK-NEXT:    ret void
 ;
 entry:

From d95cf1f6cf4242ae9f045b8032b9e4c08d41a12f Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Fri, 10 Dec 2021 18:05:38 +0000
Subject: [PATCH 542/946] [SVE] Enable ISD::ABDS/U ISel for scalable vectors.

NOTE: This patch also includes tests that highlight those cases
where the existing DAG combine doesn't yet work well for SVE.

Differential Revision: https://reviews.llvm.org/D117873
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |   8 +
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |   2 +
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |   4 +
 llvm/test/CodeGen/AArch64/sve-abd.ll          | 267 ++++++++++++++++++
 4 files changed, 281 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/sve-abd.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fd35ab2049e92..fc6e0b865681c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1206,6 +1206,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::SRL, VT, Custom);
       setOperationAction(ISD::SRA, VT, Custom);
       setOperationAction(ISD::ABS, VT, Custom);
+      setOperationAction(ISD::ABDS, VT, Custom);
+      setOperationAction(ISD::ABDU, VT, Custom);
       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
@@ -1994,6 +1996,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::CSINC)
     MAKE_CASE(AArch64ISD::THREAD_POINTER)
     MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
+    MAKE_CASE(AArch64ISD::ABDS_PRED)
+    MAKE_CASE(AArch64ISD::ABDU_PRED)
     MAKE_CASE(AArch64ISD::ADD_PRED)
     MAKE_CASE(AArch64ISD::MUL_PRED)
     MAKE_CASE(AArch64ISD::MULHS_PRED)
@@ -5196,6 +5200,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerFixedLengthVectorSelectToSVE(Op, DAG);
   case ISD::ABS:
     return LowerABS(Op, DAG);
+  case ISD::ABDS:
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDS_PRED);
+  case ISD::ABDU:
+    return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABDU_PRED);
   case ISD::BITREVERSE:
     return LowerBitreverse(Op, DAG);
   case ISD::BSWAP:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 9841a4c048632..df19a4729bb49 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -77,6 +77,8 @@ enum NodeType : unsigned {
   SBC, // adc, sbc instructions
 
   // Predicated instructions where inactive lanes produce undefined results.
+  ABDS_PRED,
+  ABDU_PRED,
   ADD_PRED,
   FADD_PRED,
   FDIV_PRED,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 63f8f58e76c53..63cd8f476272a 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -189,11 +189,13 @@ def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
 def AArch64lsl_p  : SDNode<"AArch64ISD::SHL_PRED",  SDT_AArch64Arith>;
 def AArch64lsr_p  : SDNode<"AArch64ISD::SRL_PRED",  SDT_AArch64Arith>;
 def AArch64mul_p  : SDNode<"AArch64ISD::MUL_PRED",  SDT_AArch64Arith>;
+def AArch64sabd_p : SDNode<"AArch64ISD::ABDS_PRED", SDT_AArch64Arith>;
 def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
 def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
 def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
 def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>;
 def AArch64sub_p  : SDNode<"AArch64ISD::SUB_PRED",  SDT_AArch64Arith>;
+def AArch64uabd_p : SDNode<"AArch64ISD::ABDU_PRED", SDT_AArch64Arith>;
 def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
 def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
 def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
@@ -418,6 +420,8 @@ let Predicates = [HasSVEorStreamingSVE] in {
   defm UMAX_ZPZZ : sve_int_bin_pred_bhsd<AArch64umax_p>;
   defm SMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64smin_p>;
   defm UMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64umin_p>;
+  defm SABD_ZPZZ : sve_int_bin_pred_bhsd<AArch64sabd_p>;
+  defm UABD_ZPZZ : sve_int_bin_pred_bhsd<AArch64uabd_p>;
 
   defm FRECPE_ZZ  : sve_fp_2op_u_zd<0b110, "frecpe",  AArch64frecpe>;
   defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", AArch64frsqrte>;
diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll
new file mode 100644
index 0000000000000..affd6d5b15f79
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-abd.ll
@@ -0,0 +1,267 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; SABD
+;
+
+define <vscale x 16 x i8> @sabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: sabd_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    sabd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %a.sext = sext <vscale x 16 x i8> %a to <vscale x 16 x i16>
+  %b.sext = sext <vscale x 16 x i8> %b to <vscale x 16 x i16>
+  %sub = sub <vscale x 16 x i16> %a.sext, %b.sext
+  %abs = call <vscale x 16 x i16> @llvm.abs.nxv16i16(<vscale x 16 x i16> %sub, i1 true)
+  %trunc = trunc <vscale x 16 x i16> %abs to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %trunc
+}
+
+define <vscale x 16 x i8> @sabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
+; CHECK-LABEL: sabd_b_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.b, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z1.b, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    ptrue p2.b
+; CHECK-NEXT:    sub z0.b, z0.b, z1.b
+; CHECK-NEXT:    abs z0.b, p2/m, z0.b
+; CHECK-NEXT:    ret
+  %a.sext = sext <vscale x 16 x i1> %a to <vscale x 16 x i8>
+  %b.sext = sext <vscale x 16 x i1> %b to <vscale x 16 x i8>
+  %sub = sub <vscale x 16 x i8> %a.sext, %b.sext
+  %abs = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> %sub, i1 true)
+  ret <vscale x 16 x i8> %abs
+}
+
+define <vscale x 8 x i16> @sabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: sabd_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    sabd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %a.sext = sext <vscale x 8 x i16> %a to <vscale x 8 x i32>
+  %b.sext = sext <vscale x 8 x i16> %b to <vscale x 8 x i32>
+  %sub = sub <vscale x 8 x i32> %a.sext, %b.sext
+  %abs = call <vscale x 8 x i32> @llvm.abs.nxv8i32(<vscale x 8 x i32> %sub, i1 true)
+  %trunc = trunc <vscale x 8 x i32> %abs to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %trunc
+}
+
+define <vscale x 8 x i16> @sabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) #0 {
+; CHECK-LABEL: sabd_h_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    sxtb z0.h, p0/m, z0.h
+; CHECK-NEXT:    sxtb z1.h, p0/m, z1.h
+; CHECK-NEXT:    sub z0.h, z0.h, z1.h
+; CHECK-NEXT:    abs z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %a.sext = sext <vscale x 8 x i8> %a to <vscale x 8 x i16>
+  %b.sext = sext <vscale x 8 x i8> %b to <vscale x 8 x i16>
+  %sub = sub <vscale x 8 x i16> %a.sext, %b.sext
+  %abs = call <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16> %sub, i1 true)
+  ret <vscale x 8 x i16> %abs
+}
+
+define <vscale x 4 x i32> @sabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: sabd_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sabd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %a.sext = sext <vscale x 4 x i32> %a to <vscale x 4 x i64>
+  %b.sext = sext <vscale x 4 x i32> %b to <vscale x 4 x i64>
+  %sub = sub <vscale x 4 x i64> %a.sext, %b.sext
+  %abs = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> %sub, i1 true)
+  %trunc = trunc <vscale x 4 x i64> %abs to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %trunc
+}
+
+define <vscale x 4 x i32> @sabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) #0 {
+; CHECK-LABEL: sabd_s_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sxth z0.s, p0/m, z0.s
+; CHECK-NEXT:    sxth z1.s, p0/m, z1.s
+; CHECK-NEXT:    sub z0.s, z0.s, z1.s
+; CHECK-NEXT:    abs z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %a.sext = sext <vscale x 4 x i16> %a to <vscale x 4 x i32>
+  %b.sext = sext <vscale x 4 x i16> %b to <vscale x 4 x i32>
+  %sub = sub <vscale x 4 x i32> %a.sext, %b.sext
+  %abs = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %sub, i1 true)
+  ret <vscale x 4 x i32> %abs
+}
+
+define <vscale x 2 x i64> @sabd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: sabd_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sabd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %a.sext = sext <vscale x 2 x i64> %a to <vscale x 2 x i128>
+  %b.sext = sext <vscale x 2 x i64> %b to <vscale x 2 x i128>
+  %sub = sub <vscale x 2 x i128> %a.sext, %b.sext
+  %abs = call <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128> %sub, i1 true)
+  %trunc = trunc <vscale x 2 x i128> %abs to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %trunc
+}
+
+define <vscale x 2 x i64> @sabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) #0 {
+; CHECK-LABEL: sabd_d_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT:    sxtw z1.d, p0/m, z1.d
+; CHECK-NEXT:    sub z0.d, z0.d, z1.d
+; CHECK-NEXT:    abs z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+  %a.sext = sext <vscale x 2 x i32> %a to <vscale x 2 x i64>
+  %b.sext = sext <vscale x 2 x i32> %b to <vscale x 2 x i64>
+  %sub = sub <vscale x 2 x i64> %a.sext, %b.sext
+  %abs = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %sub, i1 true)
+  ret <vscale x 2 x i64> %abs
+}
+
+;
+; UABD
+;
+
+define <vscale x 16 x i8> @uabd_b(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: uabd_b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.b
+; CHECK-NEXT:    uabd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT:    ret
+  %a.zext = zext <vscale x 16 x i8> %a to <vscale x 16 x i16>
+  %b.zext = zext <vscale x 16 x i8> %b to <vscale x 16 x i16>
+  %sub = sub <vscale x 16 x i16> %a.zext, %b.zext
+  %abs = call <vscale x 16 x i16> @llvm.abs.nxv16i16(<vscale x 16 x i16> %sub, i1 true)
+  %trunc = trunc <vscale x 16 x i16> %abs to <vscale x 16 x i8>
+  ret <vscale x 16 x i8> %trunc
+}
+
+define <vscale x 16 x i8> @uabd_b_promoted_ops(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) #0 {
+; CHECK-LABEL: uabd_b_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.b, p0/z, #1 // =0x1
+; CHECK-NEXT:    mov z1.b, p1/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    ptrue p2.b
+; CHECK-NEXT:    add z0.b, z0.b, z1.b
+; CHECK-NEXT:    abs z0.b, p2/m, z0.b
+; CHECK-NEXT:    ret
+  %a.zext = zext <vscale x 16 x i1> %a to <vscale x 16 x i8>
+  %b.zext = zext <vscale x 16 x i1> %b to <vscale x 16 x i8>
+  %sub = sub <vscale x 16 x i8> %a.zext, %b.zext
+  %abs = call <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8> %sub, i1 true)
+  ret <vscale x 16 x i8> %abs
+}
+
+define <vscale x 8 x i16> @uabd_h(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: uabd_h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    uabd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %a.zext = zext <vscale x 8 x i16> %a to <vscale x 8 x i32>
+  %b.zext = zext <vscale x 8 x i16> %b to <vscale x 8 x i32>
+  %sub = sub <vscale x 8 x i32> %a.zext, %b.zext
+  %abs = call <vscale x 8 x i32> @llvm.abs.nxv8i32(<vscale x 8 x i32> %sub, i1 true)
+  %trunc = trunc <vscale x 8 x i32> %abs to <vscale x 8 x i16>
+  ret <vscale x 8 x i16> %trunc
+}
+
+define <vscale x 8 x i16> @uabd_h_promoted_ops(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) #0 {
+; CHECK-LABEL: uabd_h_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.h, z0.h, #0xff
+; CHECK-NEXT:    and z1.h, z1.h, #0xff
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    sub z0.h, z0.h, z1.h
+; CHECK-NEXT:    abs z0.h, p0/m, z0.h
+; CHECK-NEXT:    ret
+  %a.zext = zext <vscale x 8 x i8> %a to <vscale x 8 x i16>
+  %b.zext = zext <vscale x 8 x i8> %b to <vscale x 8 x i16>
+  %sub = sub <vscale x 8 x i16> %a.zext, %b.zext
+  %abs = call <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16> %sub, i1 true)
+  ret <vscale x 8 x i16> %abs
+}
+
+define <vscale x 4 x i32> @uabd_s(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: uabd_s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    uabd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %a.zext = zext <vscale x 4 x i32> %a to <vscale x 4 x i64>
+  %b.zext = zext <vscale x 4 x i32> %b to <vscale x 4 x i64>
+  %sub = sub <vscale x 4 x i64> %a.zext, %b.zext
+  %abs = call <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64> %sub, i1 true)
+  %trunc = trunc <vscale x 4 x i64> %abs to <vscale x 4 x i32>
+  ret <vscale x 4 x i32> %trunc
+}
+
+define <vscale x 4 x i32> @uabd_s_promoted_ops(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) #0 {
+; CHECK-LABEL: uabd_s_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.s, z0.s, #0xffff
+; CHECK-NEXT:    and z1.s, z1.s, #0xffff
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sub z0.s, z0.s, z1.s
+; CHECK-NEXT:    abs z0.s, p0/m, z0.s
+; CHECK-NEXT:    ret
+  %a.zext = zext <vscale x 4 x i16> %a to <vscale x 4 x i32>
+  %b.zext = zext <vscale x 4 x i16> %b to <vscale x 4 x i32>
+  %sub = sub <vscale x 4 x i32> %a.zext, %b.zext
+  %abs = call <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32> %sub, i1 true)
+  ret <vscale x 4 x i32> %abs
+}
+
+define <vscale x 2 x i64> @uabd_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: uabd_d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uabd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %a.zext = zext <vscale x 2 x i64> %a to <vscale x 2 x i128>
+  %b.zext = zext <vscale x 2 x i64> %b to <vscale x 2 x i128>
+  %sub = sub <vscale x 2 x i128> %a.zext, %b.zext
+  %abs = call <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128> %sub, i1 true)
+  %trunc = trunc <vscale x 2 x i128> %abs to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %trunc
+}
+
+define <vscale x 2 x i64> @uabd_d_promoted_ops(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) #0 {
+; CHECK-LABEL: uabd_d_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    and z0.d, z0.d, #0xffffffff
+; CHECK-NEXT:    and z1.d, z1.d, #0xffffffff
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sub z0.d, z0.d, z1.d
+; CHECK-NEXT:    abs z0.d, p0/m, z0.d
+; CHECK-NEXT:    ret
+  %a.zext = zext <vscale x 2 x i32> %a to <vscale x 2 x i64>
+  %b.zext = zext <vscale x 2 x i32> %b to <vscale x 2 x i64>
+  %sub = sub <vscale x 2 x i64> %a.zext, %b.zext
+  %abs = call <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64> %sub, i1 true)
+  ret <vscale x 2 x i64> %abs
+}
+
+declare <vscale x 16 x i8> @llvm.abs.nxv16i8(<vscale x 16 x i8>, i1)
+
+declare <vscale x 8 x i16> @llvm.abs.nxv8i16(<vscale x 8 x i16>, i1)
+declare <vscale x 16 x i16> @llvm.abs.nxv16i16(<vscale x 16 x i16>, i1)
+
+declare <vscale x 4 x i32> @llvm.abs.nxv4i32(<vscale x 4 x i32>, i1)
+declare <vscale x 8 x i32> @llvm.abs.nxv8i32(<vscale x 8 x i32>, i1)
+
+declare <vscale x 2 x i64> @llvm.abs.nxv2i64(<vscale x 2 x i64>, i1)
+declare <vscale x 4 x i64> @llvm.abs.nxv4i64(<vscale x 4 x i64>, i1)
+
+declare <vscale x 2 x i128> @llvm.abs.nxv2i128(<vscale x 2 x i128>, i1)
+
+attributes #0 = { "target-features"="+neon,+sve" }

From 153b1e3cba1e0469bfa6c72208d91708c219e6a6 Mon Sep 17 00:00:00 2001
From: Danila Malyutin <dmalyutin@azul.com>
Date: Tue, 18 Jan 2022 20:17:22 +0300
Subject: [PATCH 543/946] [AArch64] Add patterns for relaxed atomic ld/st into
 fp registers

Adds patterns to match integer loads/stores bitcasted to fp values

Fixes https://github.com/llvm/llvm-project/issues/52927

Differential Revision: https://reviews.llvm.org/D117573
---
 .../lib/Target/AArch64/AArch64InstrAtomics.td | 60 ++++++++++++
 .../CodeGen/AArch64/relaxed-fp-atomics.ll     | 94 +++++++++++++++++++
 2 files changed, 154 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll

diff --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 84573dac7e41f..b220929514f9d 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -102,6 +102,34 @@ def : Pat<(relaxed_load<atomic_load_64>
                (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
           (LDURXi GPR64sp:$Rn, simm9:$offset)>;
 
+// FP 32-bit loads
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+                                                       ro_Wextend32:$extend))))),
+          (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+                                                       ro_Xextend32:$extend))))),
+          (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32> (am_indexed32 GPR64sp:$Rn,
+                                                      uimm12s8:$offset))))),
+          (LDRSui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(f32 (bitconvert (i32 (relaxed_load<atomic_load_32>
+               (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
+          (LDURSi GPR64sp:$Rn, simm9:$offset)>;
+
+// FP 64-bit loads
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+                                                       ro_Wextend64:$extend))))),
+          (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+                                                       ro_Xextend64:$extend))))),
+          (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64> (am_indexed64 GPR64sp:$Rn,
+                                                      uimm12s8:$offset))))),
+          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
+def : Pat<(f64 (bitconvert (i64 (relaxed_load<atomic_load_64>
+               (am_unscaled64 GPR64sp:$Rn, simm9:$offset))))),
+          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
+
 //===----------------------------------
 // Atomic stores
 //===----------------------------------
@@ -196,6 +224,38 @@ def : Pat<(relaxed_store<atomic_store_64>
                (am_unscaled64 GPR64sp:$Rn, simm9:$offset), GPR64:$val),
           (STURXi GPR64:$val, GPR64sp:$Rn, simm9:$offset)>;
 
+// FP 32-bit stores
+def : Pat<(relaxed_store<atomic_store_32> (ro_Windexed32 GPR64sp:$Rn, GPR32:$Rm,
+                                                         ro_Wextend32:$extend),
+                                          (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STRSroW FPR32Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend32:$extend)>;
+def : Pat<(relaxed_store<atomic_store_32> (ro_Xindexed32 GPR64sp:$Rn, GPR64:$Rm,
+                                                         ro_Xextend32:$extend),
+                                          (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STRSroX FPR32Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend32:$extend)>;
+def : Pat<(relaxed_store<atomic_store_32>
+              (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STRSui FPR32Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_store<atomic_store_32>
+               (am_unscaled32 GPR64sp:$Rn, simm9:$offset), (i32 (bitconvert (f32 FPR32Op:$val)))),
+          (STURSi FPR32Op:$val, GPR64sp:$Rn, simm9:$offset)>;
+
+// FP 64-bit stores
+def : Pat<(relaxed_store<atomic_store_64> (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
+                                                         ro_Wextend64:$extend),
+                                          (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STRDroW FPR64Op:$val, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
+def : Pat<(relaxed_store<atomic_store_64> (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
+                                                         ro_Xextend64:$extend),
+                                          (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STRDroX FPR64Op:$val, GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
+def : Pat<(relaxed_store<atomic_store_64>
+              (am_indexed64 GPR64sp:$Rn, uimm12s4:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STRDui FPR64Op:$val, GPR64sp:$Rn, uimm12s4:$offset)>;
+def : Pat<(relaxed_store<atomic_store_64>
+               (am_unscaled64 GPR64sp:$Rn, simm9:$offset), (i64 (bitconvert (f64 FPR64Op:$val)))),
+          (STURDi FPR64Op:$val, GPR64sp:$Rn, simm9:$offset)>;
+
 //===----------------------------------
 // Low-level exclusive operations
 //===----------------------------------
diff --git a/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll b/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll
new file mode 100644
index 0000000000000..1f8ba6da24646
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/relaxed-fp-atomics.ll
@@ -0,0 +1,94 @@
+; PR52927: Relaxed atomics can load to/store from fp regs directly
+; RUN: llc < %s -mtriple=arm64-eabi -asm-verbose=false -verify-machineinstrs -mcpu=cyclone | FileCheck %s
+
+define float @atomic_load_relaxed_f32(float* %p, i32 %off32, i64 %off64) #0 {
+; CHECK-LABEL: atomic_load_relaxed_f32:
+  %ptr_unsigned = getelementptr float, float* %p, i32 4095
+  %val_unsigned = load atomic float, float* %ptr_unsigned monotonic, align 4
+; CHECK: ldr {{s[0-9]+}}, [x0, #16380]
+
+  %ptr_regoff = getelementptr float, float* %p, i32 %off32
+  %val_regoff = load atomic float, float* %ptr_regoff unordered, align 4
+  %tot1 = fadd float %val_unsigned, %val_regoff
+; CHECK: ldr {{s[0-9]+}}, [x0, w1, sxtw #2]
+
+  %ptr_regoff64 = getelementptr float, float* %p, i64 %off64
+  %val_regoff64 = load atomic float, float* %ptr_regoff64 monotonic, align 4
+  %tot2 = fadd float %tot1, %val_regoff64
+; CHECK: ldr {{s[0-9]+}}, [x0, x2, lsl #2]
+
+  %ptr_unscaled = getelementptr float, float* %p, i32 -64
+  %val_unscaled = load atomic float, float* %ptr_unscaled unordered, align 4
+  %tot3 = fadd float %tot2, %val_unscaled
+; CHECK: ldur {{s[0-9]+}}, [x0, #-256]
+
+  ret float %tot3
+}
+
+define double @atomic_load_relaxed_f64(double* %p, i32 %off32, i64 %off64) #0 {
+; CHECK-LABEL: atomic_load_relaxed_f64:
+  %ptr_unsigned = getelementptr double, double* %p, i32 4095
+  %val_unsigned = load atomic double, double* %ptr_unsigned monotonic, align 8
+; CHECK: ldr {{d[0-9]+}}, [x0, #32760]
+
+  %ptr_regoff = getelementptr double, double* %p, i32 %off32
+  %val_regoff = load atomic double, double* %ptr_regoff unordered, align 8
+  %tot1 = fadd double %val_unsigned, %val_regoff
+; CHECK: ldr {{d[0-9]+}}, [x0, w1, sxtw #3]
+
+  %ptr_regoff64 = getelementptr double, double* %p, i64 %off64
+  %val_regoff64 = load atomic double, double* %ptr_regoff64 monotonic, align 8
+  %tot2 = fadd double %tot1, %val_regoff64
+; CHECK: ldr {{d[0-9]+}}, [x0, x2, lsl #3]
+
+  %ptr_unscaled = getelementptr double, double* %p, i32 -32
+  %val_unscaled = load atomic double, double* %ptr_unscaled unordered, align 8
+  %tot3 = fadd double %tot2, %val_unscaled
+; CHECK: ldur {{d[0-9]+}}, [x0, #-256]
+
+  ret double %tot3
+}
+
+define void @atomic_store_relaxed_f32(float* %p, i32 %off32, i64 %off64, float %val) #0 {
+; CHECK-LABEL: atomic_store_relaxed_f32:
+  %ptr_unsigned = getelementptr float, float* %p, i32 4095
+  store atomic float %val, float* %ptr_unsigned monotonic, align 4
+; CHECK: str {{s[0-9]+}}, [x0, #16380]
+
+  %ptr_regoff = getelementptr float, float* %p, i32 %off32
+  store atomic float %val, float* %ptr_regoff unordered, align 4
+; CHECK: str {{s[0-9]+}}, [x0, w1, sxtw #2]
+
+  %ptr_regoff64 = getelementptr float, float* %p, i64 %off64
+  store atomic float %val, float* %ptr_regoff64 monotonic, align 4
+; CHECK: str {{s[0-9]+}}, [x0, x2, lsl #2]
+
+  %ptr_unscaled = getelementptr float, float* %p, i32 -64
+  store atomic float %val, float* %ptr_unscaled unordered, align 4
+; CHECK: stur {{s[0-9]+}}, [x0, #-256]
+
+  ret void
+}
+
+define void @atomic_store_relaxed_f64(double* %p, i32 %off32, i64 %off64, double %val) #0 {
+; CHECK-LABEL: atomic_store_relaxed_f64:
+  %ptr_unsigned = getelementptr double, double* %p, i32 4095
+  store atomic double %val, double* %ptr_unsigned monotonic, align 8
+; CHECK: str {{d[0-9]+}}, [x0, #32760]
+
+  %ptr_regoff = getelementptr double, double* %p, i32 %off32
+  store atomic double %val, double* %ptr_regoff unordered, align 8
+; CHECK: str {{d[0-9]+}}, [x0, w1, sxtw #3]
+
+  %ptr_regoff64 = getelementptr double, double* %p, i64 %off64
+  store atomic double %val, double* %ptr_regoff64 unordered, align 8
+; CHECK: str {{d[0-9]+}}, [x0, x2, lsl #3]
+
+  %ptr_unscaled = getelementptr double, double* %p, i32 -32
+  store atomic double %val, double* %ptr_unscaled monotonic, align 8
+; CHECK: stur {{d[0-9]+}}, [x0, #-256]
+
+  ret void
+}
+
+attributes #0 = { nounwind }

From fc15ab7b1b26ebe8ac8435f0da23cc91b735ef4e Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 25 Jan 2022 12:35:00 +0000
Subject: [PATCH 544/946] [X86] Add folded load tests to PR46809 tests

---
 llvm/test/CodeGen/X86/select-lea.ll | 198 +++++++++++++++++++++++++++-
 1 file changed, 192 insertions(+), 6 deletions(-)

diff --git a/llvm/test/CodeGen/X86/select-lea.ll b/llvm/test/CodeGen/X86/select-lea.ll
index 4b50fdc2ca4e0..df4a760eb339e 100644
--- a/llvm/test/CodeGen/X86/select-lea.ll
+++ b/llvm/test/CodeGen/X86/select-lea.ll
@@ -46,6 +46,52 @@ define i32 @sadd_add_imm(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+define i32 @sadd_add_load(i32 %x, i32 %y, i32* %pz) nounwind {
+; X64-LABEL: sadd_add_load:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rsi), %eax
+; X64-NEXT:    addl (%rdx), %eax
+; X64-NEXT:    addl %esi, %edi
+; X64-NEXT:    cmovnol %edi, %eax
+; X64-NEXT:    retq
+;
+; CMOV-LABEL: sadd_add_load:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    pushl %esi
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CMOV-NEXT:    leal (%eax,%edx), %esi
+; CMOV-NEXT:    addl (%ecx), %esi
+; CMOV-NEXT:    addl %edx, %eax
+; CMOV-NEXT:    cmovol %esi, %eax
+; CMOV-NEXT:    popl %esi
+; CMOV-NEXT:    retl
+;
+; NOCMOV-LABEL: sadd_add_load:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOCMOV-NEXT:    leal (%eax,%edx), %ecx
+; NOCMOV-NEXT:    addl %edx, %eax
+; NOCMOV-NEXT:    jno .LBB1_2
+; NOCMOV-NEXT:  # %bb.1:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    addl (%eax), %ecx
+; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:  .LBB1_2:
+; NOCMOV-NEXT:    retl
+  %o = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %x, i32 %y)
+  %v1 = extractvalue { i32, i1 } %o, 1
+  %v2 = extractvalue { i32, i1 } %o, 0
+  %z = load i32, i32* %pz
+  %a = add i32 %v2, %z
+  %r = select i1 %v1, i32 %a, i32 %v2
+  ret i32 %r
+}
+
 define i32 @uadd_add_imm(i32 %x, i32 %y) {
 ; X64-LABEL: uadd_add_imm:
 ; X64:       # %bb.0:
@@ -73,11 +119,11 @@ define i32 @uadd_add_imm(i32 %x, i32 %y) {
 ; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; NOCMOV-NEXT:    leal (%eax,%edx), %ecx
 ; NOCMOV-NEXT:    addl %edx, %eax
-; NOCMOV-NEXT:    jae .LBB1_2
+; NOCMOV-NEXT:    jae .LBB2_2
 ; NOCMOV-NEXT:  # %bb.1:
 ; NOCMOV-NEXT:    addl $100, %ecx
 ; NOCMOV-NEXT:    movl %ecx, %eax
-; NOCMOV-NEXT:  .LBB1_2:
+; NOCMOV-NEXT:  .LBB2_2:
 ; NOCMOV-NEXT:    retl
   %o = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
   %v1 = extractvalue { i32, i1 } %o, 1
@@ -87,6 +133,52 @@ define i32 @uadd_add_imm(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+define i32 @uadd_add_load(i32 %x, i32 %y, i32* %pz) nounwind {
+; X64-LABEL: uadd_add_load:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rsi), %eax
+; X64-NEXT:    addl (%rdx), %eax
+; X64-NEXT:    addl %esi, %edi
+; X64-NEXT:    cmovael %edi, %eax
+; X64-NEXT:    retq
+;
+; CMOV-LABEL: uadd_add_load:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    pushl %esi
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CMOV-NEXT:    leal (%eax,%edx), %esi
+; CMOV-NEXT:    addl (%ecx), %esi
+; CMOV-NEXT:    addl %edx, %eax
+; CMOV-NEXT:    cmovbl %esi, %eax
+; CMOV-NEXT:    popl %esi
+; CMOV-NEXT:    retl
+;
+; NOCMOV-LABEL: uadd_add_load:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOCMOV-NEXT:    leal (%eax,%edx), %ecx
+; NOCMOV-NEXT:    addl %edx, %eax
+; NOCMOV-NEXT:    jae .LBB3_2
+; NOCMOV-NEXT:  # %bb.1:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    addl (%eax), %ecx
+; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:  .LBB3_2:
+; NOCMOV-NEXT:    retl
+  %o = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+  %v1 = extractvalue { i32, i1 } %o, 1
+  %v2 = extractvalue { i32, i1 } %o, 0
+  %z = load i32, i32* %pz
+  %a = add i32 %v2, %z
+  %r = select i1 %v1, i32 %a, i32 %v2
+  ret i32 %r
+}
+
 define i32 @ssub_add_imm(i32 %x, i32 %y) {
 ; X64-LABEL: ssub_add_imm:
 ; X64:       # %bb.0:
@@ -115,11 +207,11 @@ define i32 @ssub_add_imm(i32 %x, i32 %y) {
 ; NOCMOV-NEXT:    movl %eax, %ecx
 ; NOCMOV-NEXT:    subl %edx, %ecx
 ; NOCMOV-NEXT:    subl %edx, %eax
-; NOCMOV-NEXT:    jno .LBB2_2
+; NOCMOV-NEXT:    jno .LBB4_2
 ; NOCMOV-NEXT:  # %bb.1:
 ; NOCMOV-NEXT:    addl $100, %ecx
 ; NOCMOV-NEXT:    movl %ecx, %eax
-; NOCMOV-NEXT:  .LBB2_2:
+; NOCMOV-NEXT:  .LBB4_2:
 ; NOCMOV-NEXT:    retl
   %o = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 %y)
   %v1 = extractvalue { i32, i1 } %o, 1
@@ -129,6 +221,53 @@ define i32 @ssub_add_imm(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+define i32 @ssub_add_load(i32 %x, i32 %y, i32* %pz) nounwind {
+; X64-LABEL: ssub_add_load:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    addl (%rdx), %eax
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    cmovnol %edi, %eax
+; X64-NEXT:    retq
+;
+; CMOV-LABEL: ssub_add_load:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    pushl %esi
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CMOV-NEXT:    movl %eax, %esi
+; CMOV-NEXT:    subl %edx, %esi
+; CMOV-NEXT:    addl (%ecx), %esi
+; CMOV-NEXT:    subl %edx, %eax
+; CMOV-NEXT:    cmovol %esi, %eax
+; CMOV-NEXT:    popl %esi
+; CMOV-NEXT:    retl
+;
+; NOCMOV-LABEL: ssub_add_load:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOCMOV-NEXT:    movl %eax, %ecx
+; NOCMOV-NEXT:    subl %edx, %ecx
+; NOCMOV-NEXT:    subl %edx, %eax
+; NOCMOV-NEXT:    jno .LBB5_2
+; NOCMOV-NEXT:  # %bb.1:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    addl (%eax), %ecx
+; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:  .LBB5_2:
+; NOCMOV-NEXT:    retl
+  %o = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %x, i32 %y)
+  %v1 = extractvalue { i32, i1 } %o, 1
+  %v2 = extractvalue { i32, i1 } %o, 0
+  %z = load i32, i32* %pz
+  %a = add i32 %v2, %z
+  %r = select i1 %v1, i32 %a, i32 %v2
+  ret i32 %r
+}
+
 define i32 @usub_add_imm(i32 %x, i32 %y) {
 ; X64-LABEL: usub_add_imm:
 ; X64:       # %bb.0:
@@ -157,11 +296,11 @@ define i32 @usub_add_imm(i32 %x, i32 %y) {
 ; NOCMOV-NEXT:    movl %eax, %ecx
 ; NOCMOV-NEXT:    subl %edx, %ecx
 ; NOCMOV-NEXT:    subl %edx, %eax
-; NOCMOV-NEXT:    jae .LBB3_2
+; NOCMOV-NEXT:    jae .LBB6_2
 ; NOCMOV-NEXT:  # %bb.1:
 ; NOCMOV-NEXT:    addl $100, %ecx
 ; NOCMOV-NEXT:    movl %ecx, %eax
-; NOCMOV-NEXT:  .LBB3_2:
+; NOCMOV-NEXT:  .LBB6_2:
 ; NOCMOV-NEXT:    retl
   %o = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y)
   %v1 = extractvalue { i32, i1 } %o, 1
@@ -171,6 +310,53 @@ define i32 @usub_add_imm(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+define i32 @usub_add_load(i32 %x, i32 %y, i32* %pz) nounwind {
+; X64-LABEL: usub_add_load:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    addl (%rdx), %eax
+; X64-NEXT:    subl %esi, %edi
+; X64-NEXT:    cmovael %edi, %eax
+; X64-NEXT:    retq
+;
+; CMOV-LABEL: usub_add_load:
+; CMOV:       # %bb.0:
+; CMOV-NEXT:    pushl %esi
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; CMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CMOV-NEXT:    movl %eax, %esi
+; CMOV-NEXT:    subl %edx, %esi
+; CMOV-NEXT:    addl (%ecx), %esi
+; CMOV-NEXT:    subl %edx, %eax
+; CMOV-NEXT:    cmovbl %esi, %eax
+; CMOV-NEXT:    popl %esi
+; CMOV-NEXT:    retl
+;
+; NOCMOV-LABEL: usub_add_load:
+; NOCMOV:       # %bb.0:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; NOCMOV-NEXT:    movl %eax, %ecx
+; NOCMOV-NEXT:    subl %edx, %ecx
+; NOCMOV-NEXT:    subl %edx, %eax
+; NOCMOV-NEXT:    jae .LBB7_2
+; NOCMOV-NEXT:  # %bb.1:
+; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; NOCMOV-NEXT:    addl (%eax), %ecx
+; NOCMOV-NEXT:    movl %ecx, %eax
+; NOCMOV-NEXT:  .LBB7_2:
+; NOCMOV-NEXT:    retl
+  %o = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %x, i32 %y)
+  %v1 = extractvalue { i32, i1 } %o, 1
+  %v2 = extractvalue { i32, i1 } %o, 0
+  %z = load i32, i32* %pz
+  %a = add i32 %v2, %z
+  %r = select i1 %v1, i32 %a, i32 %v2
+  ret i32 %r
+}
+
 declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
 declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32)

From 345d85e1240801999ed321eb13a39048a9aa1a06 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Tue, 25 Jan 2022 13:41:09 +0100
Subject: [PATCH 545/946] [lldb] Fix mac build for D117490

This is exactly that kind of a API misuse that the patch was meant to
detect.
---
 .../Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp     | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp
index abb3b30e175a4..75271ca6abf49 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp
@@ -484,10 +484,8 @@ PlatformDarwinKernel::GetKernelsAndKextsInDirectoryHelper(
   ConstString file_spec_extension = file_spec.GetFileNameExtension();
 
   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PLATFORM));
-  Log *log_verbose(GetLogIfAllCategoriesSet(LIBLLDB_LOG_PLATFORM | LLDB_LOG_OPTION_VERBOSE));
 
-  LLDB_LOGF(log_verbose, "PlatformDarwinKernel examining '%s'",
-            file_spec.GetPath().c_str());
+  LLDB_LOGV(log, "PlatformDarwinKernel examining '{0}'", file_spec);
 
   PlatformDarwinKernel *thisp = (PlatformDarwinKernel *)baton;
 
@@ -567,9 +565,8 @@ PlatformDarwinKernel::GetKernelsAndKextsInDirectoryHelper(
   if (recurse && file_spec_extension != g_dsym_suffix &&
       file_spec_extension != g_kext_suffix &&
       file_spec_extension != g_bundle_suffix) {
-    LLDB_LOGF(log_verbose,
-              "PlatformDarwinKernel descending into directory '%s'",
-              file_spec.GetPath().c_str());
+    LLDB_LOGV(log, "PlatformDarwinKernel descending into directory '{0}'",
+              file_spec);
     return FileSystem::eEnumerateDirectoryResultEnter;
   } else {
     return FileSystem::eEnumerateDirectoryResultNext;

From 6b67e89b45c1e84a5ddac23f8c9c8c3961577bda Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Tue, 25 Jan 2022 13:51:53 +0100
Subject: [PATCH 546/946] [lldb] Fix windows build for D117490

I forgot to update ProcessWindowsLog to the new API.
---
 .../Windows/Common/ProcessWindowsLog.cpp      | 22 ++++++-----
 .../Windows/Common/ProcessWindowsLog.h        | 37 +++++++++++++------
 2 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.cpp b/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.cpp
index 6f5e020e812b6..0d7649a98e8c3 100644
--- a/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.cpp
@@ -11,17 +11,21 @@
 using namespace lldb_private;
 
 static constexpr Log::Category g_categories[] = {
-    {{"break"}, {"log breakpoints"}, WINDOWS_LOG_BREAKPOINTS},
-    {{"event"}, {"log low level debugger events"}, WINDOWS_LOG_EVENT},
-    {{"exception"}, {"log exception information"}, WINDOWS_LOG_EXCEPTION},
-    {{"memory"}, {"log memory reads and writes"}, WINDOWS_LOG_MEMORY},
-    {{"process"}, {"log process events and activities"}, WINDOWS_LOG_PROCESS},
-    {{"registers"}, {"log register read/writes"}, WINDOWS_LOG_REGISTERS},
-    {{"step"}, {"log step related activities"}, WINDOWS_LOG_STEP},
-    {{"thread"}, {"log thread events and activities"}, WINDOWS_LOG_THREAD},
+    {{"break"}, {"log breakpoints"}, WindowsLog::Breakpoints},
+    {{"event"}, {"log low level debugger events"}, WindowsLog::Event},
+    {{"exception"}, {"log exception information"}, WindowsLog::Exception},
+    {{"memory"}, {"log memory reads and writes"}, WindowsLog::Memory},
+    {{"process"}, {"log process events and activities"}, WindowsLog::Process},
+    {{"registers"}, {"log register read/writes"}, WindowsLog::Registers},
+    {{"step"}, {"log step related activities"}, WindowsLog::Step},
+    {{"thread"}, {"log thread events and activities"}, WindowsLog::Thread},
 };
 
-Log::Channel ProcessWindowsLog::g_channel(g_categories, WINDOWS_LOG_PROCESS);
+static Log::Channel g_channel(g_categories, WindowsLog::Process);
+
+template <> Log::Channel &lldb_private::LogChannelFor<WindowsLog>() {
+  return g_channel;
+}
 
 void ProcessWindowsLog::Initialize() {
   static llvm::once_flag g_once_flag;
diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.h b/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.h
index 66ba245c9fa88..68a9d767c227d 100644
--- a/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.h
+++ b/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.h
@@ -11,25 +11,38 @@
 
 #include "lldb/Utility/Log.h"
 
-#define WINDOWS_LOG_PROCESS (1u << 1)     // Log process operations
-#define WINDOWS_LOG_EXCEPTION (1u << 1)   // Log exceptions
-#define WINDOWS_LOG_THREAD (1u << 2)      // Log thread operations
-#define WINDOWS_LOG_MEMORY (1u << 3)      // Log memory reads/writes calls
-#define WINDOWS_LOG_BREAKPOINTS (1u << 4) // Log breakpoint operations
-#define WINDOWS_LOG_STEP (1u << 5)        // Log step operations
-#define WINDOWS_LOG_REGISTERS (1u << 6)   // Log register operations
-#define WINDOWS_LOG_EVENT (1u << 7)       // Low level debug events
-
 namespace lldb_private {
-class ProcessWindowsLog {
-  static Log::Channel g_channel;
 
+enum class WindowsLog : Log::MaskType {
+  Breakpoints = Log::ChannelFlag<0>, // Log breakpoint operations
+  Event = Log::ChannelFlag<1>,       // Low level debug events
+  Exception = Log::ChannelFlag<2>,   // Log exceptions
+  Memory = Log::ChannelFlag<3>,      // Log memory reads/writes calls
+  Process = Log::ChannelFlag<4>,     // Log process operations
+  Registers = Log::ChannelFlag<5>,   // Log register operations
+  Step = Log::ChannelFlag<6>,        // Log step operations
+  Thread = Log::ChannelFlag<7>,      // Log thread operations
+  LLVM_MARK_AS_BITMASK_ENUM(Thread)
+};
+
+#define WINDOWS_LOG_PROCESS ::lldb_private::WindowsLog::Process
+#define WINDOWS_LOG_EXCEPTION ::lldb_private::WindowsLog::Exception
+#define WINDOWS_LOG_THREAD ::lldb_private::WindowsLog::Thread
+#define WINDOWS_LOG_MEMORY ::lldb_private::WindowsLog::Memory
+#define WINDOWS_LOG_BREAKPOINTS ::lldb_private::WindowsLog::Breakpoints
+#define WINDOWS_LOG_STEP ::lldb_private::WindowsLog::Step
+#define WINDOWS_LOG_REGISTERS ::lldb_private::WindowsLog::Registers
+#define WINDOWS_LOG_EVENT ::lldb_private::WindowsLog::Event
+
+class ProcessWindowsLog {
 public:
   static void Initialize();
   static void Terminate();
 
-  static Log *GetLogIfAny(uint32_t mask) { return g_channel.GetLogIfAny(mask); }
+  static Log *GetLogIfAny(WindowsLog mask) { return GetLog(mask); }
 };
+
+template <> Log::Channel &LogChannelFor<WindowsLog>();
 }
 
 #endif // liblldb_ProcessWindowsLog_h_

From 694df0f0a807e1b0c1c04edd6714955f96b9cae1 Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Tue, 25 Jan 2022 21:50:02 +0900
Subject: [PATCH 547/946] [mlir][linalg][bufferize] Fix build

This fixes a linker error related to ModuleBufferization.cpp.

Differential Revision: https://reviews.llvm.org/D118127
---
 .../ComprehensiveBufferize/ModuleBufferization.cpp     | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
index 0fe79862a69d0..e5eac1fb2765d 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
@@ -85,6 +85,16 @@ using namespace tensor;
 using namespace comprehensive_bufferize;
 using namespace mlir::bufferization;
 
+/// Attribute name used to mark the bufferization layout for region
+/// arguments during linalg comprehensive bufferization.
+constexpr const ::llvm::StringLiteral
+    bufferization::BufferizableOpInterface::kBufferLayoutAttrName;
+
+/// Attribute name used to mark region arguments that can be bufferized
+/// in-place during linalg comprehensive bufferization.
+constexpr const ::llvm::StringLiteral
+    bufferization::BufferizableOpInterface::kInplaceableAttrName;
+
 namespace {
 /// The state of analysis of a FuncOp.
 enum class FuncOpAnalysisState { NotAnalyzed, InProgress, Analyzed };

From 4100cf2e92594342b53138bdbeeba220c7dd82c0 Mon Sep 17 00:00:00 2001
From: Marek Kurdej <marek.kurdej+llvm.org@gmail.com>
Date: Tue, 25 Jan 2022 14:12:30 +0100
Subject: [PATCH 548/946] [Visualizers] Fix Optional visualizer.

As discussed in https://reviews.llvm.org/D118105#3268773, OptionalStorage has been changed in commit https://github.com/llvm/llvm-project/commit/fb9730575086b3c2ba38a1aabf3106b01339888b, but the visualizer still tries to use old members.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D118117
---
 llvm/utils/LLVMVisualizers/llvm.natvis | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/LLVMVisualizers/llvm.natvis b/llvm/utils/LLVMVisualizers/llvm.natvis
index 6e75ebd6f4bba..108f1912c75e8 100644
--- a/llvm/utils/LLVMVisualizers/llvm.natvis
+++ b/llvm/utils/LLVMVisualizers/llvm.natvis
@@ -197,9 +197,9 @@ For later versions of Visual Studio, no setup is required.
   
   <Type Name="llvm::Optional&lt;*&gt;">
     <DisplayString Condition="!Storage.hasVal">None</DisplayString>
-    <DisplayString Condition="Storage.hasVal">{*(($T1 *)(unsigned char *)Storage.storage.buffer)}</DisplayString>
+    <DisplayString Condition="Storage.hasVal">{Storage.value}</DisplayString>
     <Expand>
-      <Item Name="[underlying]" Condition="Storage.hasVal">*(($T1 *)(unsigned char *)Storage.storage.buffer)</Item>
+      <Item Name="[underlying]" Condition="Storage.hasVal">Storage.value</Item>
     </Expand>
   </Type>
 

From 6a008de82a8965bd8aac4456e20a51a883e4126b Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 14:18:05 +0100
Subject: [PATCH 549/946] [Evaluator] Simplify handling of bitcasted calls

When fetching the function, strip casts. When casting the result,
use the call result type. Don't actually inspect the bitcast.
---
 .../include/llvm/Transforms/Utils/Evaluator.h |  2 +-
 llvm/lib/Transforms/Utils/Evaluator.cpp       | 29 ++++++-------------
 2 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/Evaluator.h b/llvm/include/llvm/Transforms/Utils/Evaluator.h
index 9346212dd8889..99e826bf855f2 100644
--- a/llvm/include/llvm/Transforms/Utils/Evaluator.h
+++ b/llvm/include/llvm/Transforms/Utils/Evaluator.h
@@ -127,7 +127,7 @@ class Evaluator {
   }
 
   /// Casts call result to a type of bitcast call expression
-  Constant *castCallResultIfNeeded(Value *CallExpr, Constant *RV);
+  Constant *castCallResultIfNeeded(Type *ReturnType, Constant *RV);
 
   /// Given call site return callee and list of its formal arguments
   Function *getCalleeWithFormalArgs(CallBase &CB,
diff --git a/llvm/lib/Transforms/Utils/Evaluator.cpp b/llvm/lib/Transforms/Utils/Evaluator.cpp
index 34f56b1a591d9..e73287c060ae4 100644
--- a/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -245,17 +245,10 @@ static Function *getFunction(Constant *C) {
 Function *
 Evaluator::getCalleeWithFormalArgs(CallBase &CB,
                                    SmallVectorImpl<Constant *> &Formals) {
-  auto *V = CB.getCalledOperand();
+  auto *V = CB.getCalledOperand()->stripPointerCasts();
   if (auto *Fn = getFunction(getVal(V)))
     return getFormalParams(CB, Fn, Formals) ? Fn : nullptr;
-
-  auto *CE = dyn_cast<ConstantExpr>(V);
-  if (!CE || CE->getOpcode() != Instruction::BitCast ||
-      !getFormalParams(CB, getFunction(CE->getOperand(0)), Formals))
-    return nullptr;
-
-  return dyn_cast<Function>(
-      ConstantFoldLoadThroughBitcast(CE, CE->getOperand(0)->getType(), DL));
+  return nullptr;
 }
 
 bool Evaluator::getFormalParams(CallBase &CB, Function *F,
@@ -284,17 +277,13 @@ bool Evaluator::getFormalParams(CallBase &CB, Function *F,
 
 /// If call expression contains bitcast then we may need to cast
 /// evaluated return value to a type of the call expression.
-Constant *Evaluator::castCallResultIfNeeded(Value *CallExpr, Constant *RV) {
-  ConstantExpr *CE = dyn_cast<ConstantExpr>(CallExpr);
-  if (!RV || !CE || CE->getOpcode() != Instruction::BitCast)
+Constant *Evaluator::castCallResultIfNeeded(Type *ReturnType, Constant *RV) {
+  if (!RV || RV->getType() == ReturnType)
     return RV;
 
-  if (auto *FT =
-          dyn_cast<FunctionType>(CE->getType()->getPointerElementType())) {
-    RV = ConstantFoldLoadThroughBitcast(RV, FT->getReturnType(), DL);
-    if (!RV)
-      LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n");
-  }
+  RV = ConstantFoldLoadThroughBitcast(RV, ReturnType, DL);
+  if (!RV)
+    LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n");
   return RV;
 }
 
@@ -540,7 +529,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
         if (Callee->isDeclaration()) {
           // If this is a function we can constant fold, do it.
           if (Constant *C = ConstantFoldCall(&CB, Callee, Formals, TLI)) {
-            InstResult = castCallResultIfNeeded(CB.getCalledOperand(), C);
+            InstResult = castCallResultIfNeeded(CB.getType(), C);
             if (!InstResult)
               return false;
             LLVM_DEBUG(dbgs() << "Constant folded function call. Result: "
@@ -564,7 +553,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst, BasicBlock *&NextBB,
             return false;
           }
           ValueStack.pop_back();
-          InstResult = castCallResultIfNeeded(CB.getCalledOperand(), RetVal);
+          InstResult = castCallResultIfNeeded(CB.getType(), RetVal);
           if (RetVal && !InstResult)
             return false;
 

From 71bbb78b8fdc72732e3c21ee6d37f3c3868a7fdc Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Tue, 25 Jan 2022 22:05:10 +0900
Subject: [PATCH 550/946] [mlir][linalg][bufferize] Support tensor.generate

This is mostly a copy of the existing tensor.generate bufferization. Once TensorInterfaceImpl.cpp is moved to the tensor dialect, the existing rewrite pattern can be deleted.

Differential Revision: https://reviews.llvm.org/D117770
---
 .../BufferizableOpInterfaceImpl.cpp           | 61 +++++++++++++++++++
 .../comprehensive-module-bufferize.mlir       | 20 ++++++
 2 files changed, 81 insertions(+)

diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index ea9d885736f90..aaa89b4ae2242 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -9,6 +9,7 @@
 #include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/Operation.h"
@@ -228,6 +229,65 @@ struct ExtractOpInterface
   }
 };
 
+/// Bufferization of tensor.generate.
+struct GenerateOpInterface
+    : public BufferizableOpInterface::ExternalModel<GenerateOpInterface,
+                                                    tensor::GenerateOp> {
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          const BufferizationState &state) const {
+    auto generateOp = cast<tensor::GenerateOp>(op);
+
+    // Allocate memory.
+    Location loc = op->getLoc();
+    MemRefType memrefType =
+        getContiguousMemRefType(generateOp.getType().cast<RankedTensorType>());
+    FailureOr<Value> maybeResult =
+        createAlloc(rewriter, loc, memrefType, generateOp.dynamicExtents(),
+                    /*deallocMemref=*/state.getOptions().createDeallocs,
+                    state.getOptions());
+    if (failed(maybeResult))
+      return failure();
+    Value result = *maybeResult;
+
+    // Collect loop bounds.
+    int64_t rank = memrefType.getRank();
+    Value zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
+    Value one = rewriter.create<arith::ConstantIndexOp>(loc, 1);
+    SmallVector<Value, 4> lowerBounds(rank, zero);
+    SmallVector<Value, 4> steps(rank, one);
+    SmallVector<Value, 4> upperBounds;
+    int nextDynamicIndex = 0;
+    for (int i = 0; i < rank; i++) {
+      Value upperBound = memrefType.isDynamicDim(i)
+                             ? generateOp.dynamicExtents()[nextDynamicIndex++]
+                             : rewriter.create<arith::ConstantIndexOp>(
+                                   loc, memrefType.getDimSize(i));
+      upperBounds.push_back(upperBound);
+    }
+
+    // Generate tensor elements with a parallel loop that stores into
+    // each element of the resulting memref. We use mergeBlockBefore to "move"
+    // this op's body into the scf.parallel's body.
+    auto parallel =
+        rewriter.create<scf::ParallelOp>(loc, lowerBounds, upperBounds, steps);
+    Block *parallelBody = parallel.getBody();
+    rewriter.mergeBlockBefore(generateOp.getBody(),
+                              parallelBody->getTerminator(),
+                              parallelBody->getArguments());
+    // Replace the inlined yield op with a store op. The scf.parallel's builder
+    // already populated an scf.yield at the end, so we don't need to worry
+    // about creating that.
+    Operation *elementYield = parallelBody->getTerminator()->getPrevNode();
+    rewriter.setInsertionPointAfter(elementYield);
+    rewriter.replaceOpWithNewOp<memref::StoreOp>(
+        elementYield, elementYield->getOperands()[0], result,
+        parallelBody->getArguments());
+
+    replaceOpWithBufferizedValues(rewriter, op, result);
+    return success();
+  }
+};
+
 /// Bufferization of tensor.insert. Replace with memref.store.
 struct InsertOpInterface
     : public BufferizableOpInterface::ExternalModel<InsertOpInterface,
@@ -502,6 +562,7 @@ void mlir::tensor::registerBufferizableOpInterfaceExternalModels(
   registry.addOpInterface<DimOp, DimOpInterface>();
   registry.addOpInterface<ExtractSliceOp, ExtractSliceOpInterface>();
   registry.addOpInterface<ExtractOp, ExtractOpInterface>();
+  registry.addOpInterface<GenerateOp, GenerateOpInterface>();
   registry.addOpInterface<InsertOp, InsertOpInterface>();
   registry.addOpInterface<InsertSliceOp, InsertSliceOpInterface>();
   registry.addOpInterface<RankOp, RankOpInterface>();
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index 28ee8bea2e9ec..eacb2bb9314dc 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -1359,3 +1359,23 @@ func @tensor_rank(%arg0: tensor<*xf32>) -> index {
   // CHECK: return %[[r]] : index
   return %0 : index
 }
+
+// -----
+
+// CHECK-LABEL: func @tensor_generate_static_and_dynamic(
+//  CHECK-SAME:     %[[arg0:.*]]: index
+func @tensor_generate_static_and_dynamic(%arg0: index) -> tensor<16x?xindex> {
+  // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
+  // CHECK-DAG: %[[c16:.*]] = arith.constant 16 : index
+  // CHECK: %[[alloc:.*]] = memref.alloc(%[[arg0]]) {{.*}} : memref<16x?xindex>
+  // CHECK: scf.parallel (%[[arg1:.*]], %[[arg2:.*]]) = (%[[c0]], %[[c0]]) to (%[[c16]], %[[arg0]]) {{.*}} {
+  %result = tensor.generate %arg0 {
+  ^bb0(%i: index, %j: index):
+    %sum = arith.addi %i, %j : index
+    // CHECK: memref.store {{.*}}, %[[alloc]][%[[arg1]], %[[arg2]]]
+    // CHECK: scf.yield
+    tensor.yield %sum : index
+  } : tensor<16x?xindex>
+  // CHECK: }
+  return %result : tensor<16x?xindex>
+}

From d581c94d6bfbb336b8620ef06e4340b5ea18a23e Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Tue, 25 Jan 2022 22:05:35 +0900
Subject: [PATCH 551/946] [mlir][linalg][bufferize] Support
 tensor.from_elements

This is mostly a copy of the existing tensor.from_elements bufferization. Once TensorInterfaceImpl.cpp is moved to the tensor dialect, the existing rewrite pattern can be deleted.

Differential Revision: https://reviews.llvm.org/D117775
---
 .../BufferizableOpInterfaceImpl.cpp           | 77 +++++++++++++++++++
 .../comprehensive-module-bufferize.mlir       | 21 +++++
 2 files changed, 98 insertions(+)

diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index aaa89b4ae2242..1c1226b451688 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -229,6 +229,82 @@ struct ExtractOpInterface
   }
 };
 
+// Implements backtracking to traverse indices of the output buffer while
+// iterating over op.elements().
+static void createStores(RewriterBase &rewriter, Location loc, int dim,
+                         Value buffer, ArrayRef<int64_t> shape,
+                         ArrayRef<Value> constants,
+                         OperandRange::iterator &elementIt,
+                         SmallVectorImpl<Value> &indices) {
+  if (dim == static_cast<int>(shape.size()) - 1) {
+    for (int i = 0; i < shape.back(); ++i) {
+      indices.back() = constants[i];
+      rewriter.create<memref::StoreOp>(loc, *elementIt, buffer, indices);
+      ++elementIt;
+    }
+    return;
+  }
+  for (int i = 0; i < shape[dim]; ++i) {
+    indices[dim] = constants[i];
+    createStores(rewriter, loc, dim + 1, buffer, shape, constants, elementIt,
+                 indices);
+  }
+}
+
+/// Bufferization of tensor.from_elements.
+struct FromElementsOpInterface
+    : public BufferizableOpInterface::ExternalModel<FromElementsOpInterface,
+                                                    tensor::FromElementsOp> {
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          const BufferizationState &state) const {
+    auto fromElementsOp = cast<tensor::FromElementsOp>(op);
+
+    // Allocate a buffer for the result.
+    Location loc = op->getLoc();
+    auto tensorType = fromElementsOp.getType().cast<RankedTensorType>();
+    auto shape = tensorType.getShape();
+    MemRefType resultType =
+        MemRefType::get(tensorType.getShape(), tensorType.getElementType());
+    FailureOr<Value> maybeBuffer =
+        createAlloc(rewriter, loc, resultType, {},
+                    /*deallocMemref=*/state.getOptions().createDeallocs,
+                    state.getOptions());
+    if (failed(maybeBuffer))
+      return failure();
+    Value buffer = *maybeBuffer;
+
+    // Case: tensor<0xelem_type>.
+    if (fromElementsOp.elements().empty()) {
+      replaceOpWithBufferizedValues(rewriter, op, buffer);
+      return success();
+    }
+
+    // Case: tensor<elem_type>.
+    if (shape.empty()) {
+      rewriter.create<memref::StoreOp>(loc, fromElementsOp.elements().front(),
+                                       buffer);
+      replaceOpWithBufferizedValues(rewriter, op, buffer);
+      return success();
+    }
+
+    // Create constants for the range of possible indices [0, max{shape_i}).
+    auto maxDim = *std::max_element(shape.begin(), shape.end());
+    SmallVector<Value, 2> constants;
+    constants.reserve(maxDim);
+    for (int i = 0; i < maxDim; ++i)
+      constants.push_back(rewriter.create<arith::ConstantIndexOp>(loc, i));
+
+    // Traverse all `elements` and create `memref.store` ops.
+    auto elementIt = fromElementsOp.elements().begin();
+    SmallVector<Value, 2> indices(tensorType.getRank(), constants[0]);
+    createStores(rewriter, loc, /*dim=*/0, buffer, shape, constants, elementIt,
+                 indices);
+
+    replaceOpWithBufferizedValues(rewriter, op, buffer);
+    return success();
+  }
+};
+
 /// Bufferization of tensor.generate.
 struct GenerateOpInterface
     : public BufferizableOpInterface::ExternalModel<GenerateOpInterface,
@@ -562,6 +638,7 @@ void mlir::tensor::registerBufferizableOpInterfaceExternalModels(
   registry.addOpInterface<DimOp, DimOpInterface>();
   registry.addOpInterface<ExtractSliceOp, ExtractSliceOpInterface>();
   registry.addOpInterface<ExtractOp, ExtractOpInterface>();
+  registry.addOpInterface<FromElementsOp, FromElementsOpInterface>();
   registry.addOpInterface<GenerateOp, GenerateOpInterface>();
   registry.addOpInterface<InsertOp, InsertOpInterface>();
   registry.addOpInterface<InsertSliceOp, InsertSliceOpInterface>();
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index eacb2bb9314dc..c4ea9a48b8ece 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -1379,3 +1379,24 @@ func @tensor_generate_static_and_dynamic(%arg0: index) -> tensor<16x?xindex> {
   // CHECK: }
   return %result : tensor<16x?xindex>
 }
+
+// -----
+
+// CHECK-LABEL: func @tensor_from_elements_2d(
+//  CHECK-SAME:     %[[ELEM0:.*]]: index, %[[ELEM1:.*]]: index
+func @tensor_from_elements_2d(%arg0: index, %arg1: index) -> tensor<3x2xindex> {
+  // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+  // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
+  // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
+  // CHECK-DAG: %[[MEMREF:.*]] = memref.alloc() {{.*}} : memref<3x2xindex>
+  //     CHECK: store %[[ELEM0]], %[[MEMREF]][%[[C0]], %[[C0]]]
+  //     CHECK: store %[[ELEM1]], %[[MEMREF]][%[[C0]], %[[C1]]]
+  //     CHECK: store %[[ELEM0]], %[[MEMREF]][%[[C1]], %[[C0]]]
+  //     CHECK: store %[[ELEM1]], %[[MEMREF]][%[[C1]], %[[C1]]]
+  //     CHECK: store %[[ELEM0]], %[[MEMREF]][%[[C2]], %[[C0]]]
+  //     CHECK: store %[[ELEM1]], %[[MEMREF]][%[[C2]], %[[C1]]]
+  %0 = tensor.from_elements %arg0, %arg1, %arg0, %arg1, %arg0, %arg1
+         : tensor<3x2xindex>
+  //     CHECK: return %[[MEMREF]]
+  return %0 : tensor<3x2xindex>
+}

From c0e3c893aa095c78156900ef11f68042aff83839 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20K=C3=BChnel?= <kuhnel@google.com>
Date: Mon, 13 Dec 2021 15:14:31 +0000
Subject: [PATCH 552/946] [NFC][clangd] cleaning up llvm-qualified-auto

This is a cleanup of all llvm-qualified-auto findings.
This patch was created by automatically applying the fixes from
clang-tidy.

Differential Revision: https://reviews.llvm.org/D113898
---
 clang-tools-extra/clangd/AST.cpp              |  2 +-
 clang-tools-extra/clangd/CodeComplete.cpp     |  4 +-
 clang-tools-extra/clangd/ExpectedTypes.cpp    |  2 +-
 clang-tools-extra/clangd/FindSymbols.cpp      |  2 +-
 .../clangd/HeaderSourceSwitch.cpp             |  4 +-
 clang-tools-extra/clangd/Hover.cpp            |  2 +-
 clang-tools-extra/clangd/TUScheduler.cpp      |  2 +-
 .../clangd/index/IndexAction.cpp              |  2 +-
 .../clangd/index/SymbolCollector.cpp          |  2 +-
 .../clangd/index/dex/Iterator.cpp             |  4 +-
 .../refactor/tweaks/AnnotateHighlightings.cpp |  2 +-
 .../clangd/refactor/tweaks/DefineInline.cpp   |  4 +-
 .../clangd/refactor/tweaks/DumpAST.cpp        |  2 +-
 .../clangd/refactor/tweaks/ExpandMacro.cpp    |  2 +-
 .../clangd/unittests/ClangdTests.cpp          | 26 +++++------
 .../clangd/unittests/FileIndexTests.cpp       |  2 +-
 .../clangd/unittests/HoverTests.cpp           |  2 +-
 .../clangd/unittests/TUSchedulerTests.cpp     | 10 ++---
 .../unittests/tweaks/DefineInlineTests.cpp    | 44 +++++++++----------
 19 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/clang-tools-extra/clangd/AST.cpp b/clang-tools-extra/clangd/AST.cpp
index 53b55e1579ec4..b970325098c65 100644
--- a/clang-tools-extra/clangd/AST.cpp
+++ b/clang-tools-extra/clangd/AST.cpp
@@ -456,7 +456,7 @@ class DeducedTypeVisitor : public RecursiveASTVisitor<DeducedTypeVisitor> {
     const AutoType *AT = D->getReturnType()->getContainedAutoType();
     if (AT && !AT->getDeducedType().isNull()) {
       DeducedType = AT->getDeducedType();
-    } else if (auto DT = dyn_cast<DecltypeType>(D->getReturnType())) {
+    } else if (auto *DT = dyn_cast<DecltypeType>(D->getReturnType())) {
       // auto in a trailing return type just points to a DecltypeType and
       // getContainedAutoType does not unwrap it.
       if (!DT->getUnderlyingType().isNull())
diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp
index d9165599fb9ab..43f35232270a3 100644
--- a/clang-tools-extra/clangd/CodeComplete.cpp
+++ b/clang-tools-extra/clangd/CodeComplete.cpp
@@ -453,7 +453,7 @@ struct CodeCompletionBuilder {
   template <std::string BundledEntry::*Member>
   const std::string *onlyValue() const {
     auto B = Bundled.begin(), E = Bundled.end();
-    for (auto I = B + 1; I != E; ++I)
+    for (auto *I = B + 1; I != E; ++I)
       if (I->*Member != B->*Member)
         return nullptr;
     return &(B->*Member);
@@ -461,7 +461,7 @@ struct CodeCompletionBuilder {
 
   template <bool BundledEntry::*Member> const bool *onlyValue() const {
     auto B = Bundled.begin(), E = Bundled.end();
-    for (auto I = B + 1; I != E; ++I)
+    for (auto *I = B + 1; I != E; ++I)
       if (I->*Member != B->*Member)
         return nullptr;
     return &(B->*Member);
diff --git a/clang-tools-extra/clangd/ExpectedTypes.cpp b/clang-tools-extra/clangd/ExpectedTypes.cpp
index e0a4e472f3496..01a08c8589c18 100644
--- a/clang-tools-extra/clangd/ExpectedTypes.cpp
+++ b/clang-tools-extra/clangd/ExpectedTypes.cpp
@@ -53,7 +53,7 @@ typeOfCompletion(const CodeCompletionResult &R) {
   auto T = VD->getType();
   if (T.isNull())
     return llvm::None;
-  if (auto FuncT = T->getAs<FunctionType>()) {
+  if (auto *FuncT = T->getAs<FunctionType>()) {
     // Functions are a special case. They are completed as 'foo()' and we want
     // to match their return type rather than the function type itself.
     // FIXME(ibiryukov): in some cases, we might want to avoid completing `()`
diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp
index edbeeed9e2ca6..75961d3a6ea17 100644
--- a/clang-tools-extra/clangd/FindSymbols.cpp
+++ b/clang-tools-extra/clangd/FindSymbols.cpp
@@ -483,7 +483,7 @@ class DocumentOutline {
     if (!llvm::isa<NamedDecl>(D))
       return VisitKind::No;
 
-    if (auto Func = llvm::dyn_cast<FunctionDecl>(D)) {
+    if (auto *Func = llvm::dyn_cast<FunctionDecl>(D)) {
       // Some functions are implicit template instantiations, those should be
       // ignored.
       if (auto *Info = Func->getTemplateSpecializationInfo()) {
diff --git a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp
index e3e2ab3ea8694..ed86c2eb0d12d 100644
--- a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp
+++ b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp
@@ -25,13 +25,13 @@ llvm::Optional<Path> getCorrespondingHeaderOrSource(
   llvm::StringRef PathExt = llvm::sys::path::extension(OriginalFile);
 
   // Lookup in a list of known extensions.
-  auto SourceIter =
+  auto *SourceIter =
       llvm::find_if(SourceExtensions, [&PathExt](PathRef SourceExt) {
         return SourceExt.equals_insensitive(PathExt);
       });
   bool IsSource = SourceIter != std::end(SourceExtensions);
 
-  auto HeaderIter =
+  auto *HeaderIter =
       llvm::find_if(HeaderExtensions, [&PathExt](PathRef HeaderExt) {
         return HeaderExt.equals_insensitive(PathExt);
       });
diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp
index 58ef2e3feb99d..c5cb5cd3df53a 100644
--- a/clang-tools-extra/clangd/Hover.cpp
+++ b/clang-tools-extra/clangd/Hover.cpp
@@ -86,7 +86,7 @@ std::string getLocalScope(const Decl *D) {
       Policy.SuppressScope = true;
       return declaredType(D).getAsString(Policy);
     }
-    if (auto RD = dyn_cast<RecordDecl>(D))
+    if (auto *RD = dyn_cast<RecordDecl>(D))
       return ("(anonymous " + RD->getKindName() + ")").str();
     return std::string("");
   };
diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp
index 43b4d8ca8881f..9b98791e748c3 100644
--- a/clang-tools-extra/clangd/TUScheduler.cpp
+++ b/clang-tools-extra/clangd/TUScheduler.cpp
@@ -1714,7 +1714,7 @@ DebouncePolicy::compute(llvm::ArrayRef<clock::duration> History) const {
   // nth_element needs a mutable array, take the chance to bound the data size.
   History = History.take_back(15);
   llvm::SmallVector<clock::duration, 15> Recent(History.begin(), History.end());
-  auto Median = Recent.begin() + Recent.size() / 2;
+  auto *Median = Recent.begin() + Recent.size() / 2;
   std::nth_element(Recent.begin(), Median, Recent.end());
 
   clock::duration Target =
diff --git a/clang-tools-extra/clangd/index/IndexAction.cpp b/clang-tools-extra/clangd/index/IndexAction.cpp
index 0d8ae93efa4ae..cd0c6dbd5ec69 100644
--- a/clang-tools-extra/clangd/index/IndexAction.cpp
+++ b/clang-tools-extra/clangd/index/IndexAction.cpp
@@ -61,7 +61,7 @@ struct IncludeGraphCollector : public PPCallbacks {
       return;
 
     const auto FileID = SM.getFileID(Loc);
-    const auto File = SM.getFileEntryForID(FileID);
+    const auto *File = SM.getFileEntryForID(FileID);
     auto URI = toURI(File);
     if (!URI)
       return;
diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp
index cccc2c4e8f3d4..3257041ffa0e3 100644
--- a/clang-tools-extra/clangd/index/SymbolCollector.cpp
+++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp
@@ -43,7 +43,7 @@ namespace {
 /// If \p ND is a template specialization, returns the described template.
 /// Otherwise, returns \p ND.
 const NamedDecl &getTemplateOrThis(const NamedDecl &ND) {
-  if (auto T = ND.getDescribedTemplate())
+  if (auto *T = ND.getDescribedTemplate())
     return *T;
   return ND;
 }
diff --git a/clang-tools-extra/clangd/index/dex/Iterator.cpp b/clang-tools-extra/clangd/index/dex/Iterator.cpp
index 8b37403ff406f..8b5e5244d3111 100644
--- a/clang-tools-extra/clangd/index/dex/Iterator.cpp
+++ b/clang-tools-extra/clangd/index/dex/Iterator.cpp
@@ -77,7 +77,7 @@ class AndIterator : public Iterator {
 private:
   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
     OS << "(& ";
-    auto Separator = "";
+    auto *Separator = "";
     for (const auto &Child : Children) {
       OS << Separator << *Child;
       Separator = " ";
@@ -206,7 +206,7 @@ class OrIterator : public Iterator {
 private:
   llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
     OS << "(| ";
-    auto Separator = "";
+    auto *Separator = "";
     for (const auto &Child : Children) {
       OS << Separator << *Child;
       Separator = " ";
diff --git a/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp b/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp
index cb53b3bedf9d0..1383560ad4656 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/AnnotateHighlightings.cpp
@@ -38,7 +38,7 @@ REGISTER_TWEAK(AnnotateHighlightings)
 
 Expected<Tweak::Effect> AnnotateHighlightings::apply(const Selection &Inputs) {
   const Decl *CommonDecl = nullptr;
-  for (auto N = Inputs.ASTSelection.commonAncestor(); N && !CommonDecl;
+  for (auto *N = Inputs.ASTSelection.commonAncestor(); N && !CommonDecl;
        N = N->Parent)
     CommonDecl = N->ASTNode.get<Decl>();
 
diff --git a/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp b/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp
index 14d2d082ce0f5..ae1ceaf12cb86 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/DefineInline.cpp
@@ -342,13 +342,13 @@ renameParameters(const FunctionDecl *Dest, const FunctionDecl *Source,
 // Because canonical declaration points to template decl instead of
 // specialization.
 const FunctionDecl *findTarget(const FunctionDecl *FD) {
-  auto CanonDecl = FD->getCanonicalDecl();
+  auto *CanonDecl = FD->getCanonicalDecl();
   if (!FD->isFunctionTemplateSpecialization() || CanonDecl == FD)
     return CanonDecl;
   // For specializations CanonicalDecl is the TemplatedDecl, which is not the
   // target we want to inline into. Instead we traverse previous decls to find
   // the first forward decl for this specialization.
-  auto PrevDecl = FD;
+  auto *PrevDecl = FD;
   while (PrevDecl->getPreviousDecl() != CanonDecl) {
     PrevDecl = PrevDecl->getPreviousDecl();
     assert(PrevDecl && "Found specialization without template decl");
diff --git a/clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp b/clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp
index d7a7852925db5..3f340fbd2a3fd 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/DumpAST.cpp
@@ -34,7 +34,7 @@ class DumpAST : public Tweak {
   const char *id() const override final;
 
   bool prepare(const Selection &Inputs) override {
-    for (auto N = Inputs.ASTSelection.commonAncestor(); N && !Node;
+    for (auto *N = Inputs.ASTSelection.commonAncestor(); N && !Node;
          N = N->Parent)
       if (dumpable(N->ASTNode))
         Node = N->ASTNode;
diff --git a/clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp b/clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp
index a7e2dddf4cbaf..fad3a3c3d20df 100644
--- a/clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp
+++ b/clang-tools-extra/clangd/refactor/tweaks/ExpandMacro.cpp
@@ -52,7 +52,7 @@ findTokenUnderCursor(const SourceManager &SM,
                      llvm::ArrayRef<syntax::Token> Spelled,
                      unsigned CursorOffset) {
   // Find the token that strats after the offset, then look at a previous one.
-  auto It = llvm::partition_point(Spelled, [&](const syntax::Token &T) {
+  auto *It = llvm::partition_point(Spelled, [&](const syntax::Token &T) {
     assert(T.location().isFileID());
     return SM.getFileOffset(T.location()) <= CursorOffset;
   });
diff --git a/clang-tools-extra/clangd/unittests/ClangdTests.cpp b/clang-tools-extra/clangd/unittests/ClangdTests.cpp
index 8295275835177..e6a2e377c59d2 100644
--- a/clang-tools-extra/clangd/unittests/ClangdTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ClangdTests.cpp
@@ -212,7 +212,7 @@ TEST(ClangdServerTest, ParseWithHeader) {
   parseSourceAndDumpAST("foo.cpp", "#include \"foo.h\"", {{"foo.h", ""}},
                         /*ExpectErrors=*/false);
 
-  const auto SourceContents = R"cpp(
+  const auto *SourceContents = R"cpp(
 #include "foo.h"
 int b = a;
 )cpp";
@@ -228,7 +228,7 @@ TEST(ClangdServerTest, Reparse) {
   MockCompilationDatabase CDB;
   ClangdServer Server(CDB, FS, ClangdServer::optsForTest(), &DiagConsumer);
 
-  const auto SourceContents = R"cpp(
+  const auto *SourceContents = R"cpp(
 #include "foo.h"
 int b = a;
 )cpp";
@@ -263,7 +263,7 @@ TEST(ClangdServerTest, ReparseOnHeaderChange) {
   MockCompilationDatabase CDB;
   ClangdServer Server(CDB, FS, ClangdServer::optsForTest(), &DiagConsumer);
 
-  const auto SourceContents = R"cpp(
+  const auto *SourceContents = R"cpp(
 #include "foo.h"
 int b = a;
 )cpp";
@@ -420,7 +420,7 @@ TEST(ClangdServerTest, SearchLibDir) {
   FS.Files[StringPath] = "class mock_string {};";
 
   auto FooCpp = testPath("foo.cpp");
-  const auto SourceContents = R"cpp(
+  const auto *SourceContents = R"cpp(
 #include <string>
 mock_string x;
 )cpp";
@@ -429,7 +429,7 @@ mock_string x;
   runAddDocument(Server, FooCpp, SourceContents);
   EXPECT_FALSE(DiagConsumer.hadErrorInLastDiags());
 
-  const auto SourceContentsWithError = R"cpp(
+  const auto *SourceContentsWithError = R"cpp(
 #include <string>
 std::string x;
 )cpp";
@@ -445,11 +445,11 @@ TEST(ClangdServerTest, ForceReparseCompileCommand) {
   ClangdServer Server(CDB, FS, ClangdServer::optsForTest(), &DiagConsumer);
 
   auto FooCpp = testPath("foo.cpp");
-  const auto SourceContents1 = R"cpp(
+  const auto *SourceContents1 = R"cpp(
 template <class T>
 struct foo { T x; };
 )cpp";
-  const auto SourceContents2 = R"cpp(
+  const auto *SourceContents2 = R"cpp(
 template <class T>
 struct bar { T x; };
 )cpp";
@@ -481,7 +481,7 @@ TEST(ClangdServerTest, ForceReparseCompileCommandDefines) {
   ClangdServer Server(CDB, FS, ClangdServer::optsForTest(), &DiagConsumer);
 
   auto FooCpp = testPath("foo.cpp");
-  const auto SourceContents = R"cpp(
+  const auto *SourceContents = R"cpp(
 #ifdef WITH_ERROR
 this
 #endif
@@ -585,7 +585,7 @@ TEST(ClangdServerTest, FileStats) {
   ClangdServer Server(CDB, FS, ClangdServer::optsForTest(), &DiagConsumer);
 
   Path FooCpp = testPath("foo.cpp");
-  const auto SourceContents = R"cpp(
+  const auto *SourceContents = R"cpp(
 struct Something {
   int method();
 };
@@ -652,14 +652,14 @@ TEST(ClangdThreadingTest, StressTest) {
   // BlockingRequestInterval-request will be a blocking one.
   const unsigned BlockingRequestInterval = 40;
 
-  const auto SourceContentsWithoutErrors = R"cpp(
+  const auto *SourceContentsWithoutErrors = R"cpp(
 int a;
 int b;
 int c;
 int d;
 )cpp";
 
-  const auto SourceContentsWithErrors = R"cpp(
+  const auto *SourceContentsWithErrors = R"cpp(
 int a = x;
 int b;
 int c;
@@ -892,14 +892,14 @@ TEST(ClangdThreadingTest, NoConcurrentDiagnostics) {
     std::promise<void> StartSecondReparse;
   };
 
-  const auto SourceContentsWithoutErrors = R"cpp(
+  const auto *SourceContentsWithoutErrors = R"cpp(
 int a;
 int b;
 int c;
 int d;
 )cpp";
 
-  const auto SourceContentsWithErrors = R"cpp(
+  const auto *SourceContentsWithErrors = R"cpp(
 int a = x;
 int b;
 int c;
diff --git a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
index 7ce47e5dbed07..fe2f1c395c382 100644
--- a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
+++ b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
@@ -249,7 +249,7 @@ TEST(FileIndexTest, HasSystemHeaderMappingsInPreamble) {
 }
 
 TEST(FileIndexTest, TemplateParamsInLabel) {
-  auto Source = R"cpp(
+  auto *Source = R"cpp(
 template <class Ty>
 class vector {
 };
diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp
index 740c907d7e4e4..6f5f6ba7b2669 100644
--- a/clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -2695,7 +2695,7 @@ TEST(Hover, DocsFromMostSpecial) {
 
   TestTU TU = TestTU::withCode(T.code());
   auto AST = TU.build();
-  for (auto Comment : {"doc1", "doc2", "doc3"}) {
+  for (const auto *Comment : {"doc1", "doc2", "doc3"}) {
     for (const auto &P : T.points(Comment)) {
       auto H = getHover(AST, P, format::getLLVMStyle(), nullptr);
       ASSERT_TRUE(H);
diff --git a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
index 605f398a3fd56..b63db4b8ccd87 100644
--- a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
+++ b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
@@ -132,7 +132,7 @@ class TUSchedulerTests : public ::testing::Test {
     private:
       void reportDiagnostics(PathRef File, llvm::ArrayRef<Diag> Diags,
                              PublishFn Publish) {
-        auto D = Context::current().get(DiagsCallbackKey);
+        auto *D = Context::current().get(DiagsCallbackKey);
         if (!D)
           return;
         Publish([&]() {
@@ -671,11 +671,11 @@ TEST_F(TUSchedulerTests, EmptyPreamble) {
 
   FS.Files[Header] = "void foo()";
   FS.Timestamps[Header] = time_t(0);
-  auto WithPreamble = R"cpp(
+  auto *WithPreamble = R"cpp(
     #include "foo.h"
     int main() {}
   )cpp";
-  auto WithEmptyPreamble = R"cpp(int main() {})cpp";
+  auto *WithEmptyPreamble = R"cpp(int main() {})cpp";
   S.update(Foo, getInputs(Foo, WithPreamble), WantDiagnostics::Auto);
   S.runWithPreamble(
       "getNonEmptyPreamble", Foo, TUScheduler::Stale,
@@ -748,7 +748,7 @@ TEST_F(TUSchedulerTests, RunWaitsForPreamble) {
   // the same time. All reads should get the same non-null preamble.
   TUScheduler S(CDB, optsForTest());
   auto Foo = testPath("foo.cpp");
-  auto NonEmptyPreamble = R"cpp(
+  auto *NonEmptyPreamble = R"cpp(
     #define FOO 1
     #define BAR 2
 
@@ -844,7 +844,7 @@ TEST_F(TUSchedulerTests, MissingHeader) {
   auto HeaderA = testPath("a/foo.h");
   auto HeaderB = testPath("b/foo.h");
 
-  auto SourceContents = R"cpp(
+  auto *SourceContents = R"cpp(
       #include "foo.h"
       int c = b;
     )cpp";
diff --git a/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp b/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp
index 4c1bd3ab55567..37d7459b16a0f 100644
--- a/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp
+++ b/clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp
@@ -192,7 +192,7 @@ TEST_F(DefineInlineTest, UsingShadowDecls) {
 }
 
 TEST_F(DefineInlineTest, TransformNestedNamespaces) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace a {
       void bar();
       namespace b {
@@ -220,7 +220,7 @@ TEST_F(DefineInlineTest, TransformNestedNamespaces) {
       b::c::aux();
       a::b::c::aux();
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace a {
       void bar();
       namespace b {
@@ -252,7 +252,7 @@ TEST_F(DefineInlineTest, TransformNestedNamespaces) {
 }
 
 TEST_F(DefineInlineTest, TransformUsings) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace a { namespace b { namespace c { void aux(); } } }
 
     void foo();
@@ -263,7 +263,7 @@ TEST_F(DefineInlineTest, TransformUsings) {
       using c::aux;
       namespace d = c;
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace a { namespace b { namespace c { void aux(); } } }
 
     void foo(){
@@ -278,7 +278,7 @@ TEST_F(DefineInlineTest, TransformUsings) {
 }
 
 TEST_F(DefineInlineTest, TransformDecls) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     void foo();
     void f^oo() {
       class Foo {
@@ -293,7 +293,7 @@ TEST_F(DefineInlineTest, TransformDecls) {
       enum class EnClass { Zero, One };
       EnClass y = EnClass::Zero;
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     void foo(){
       class Foo {
       public:
@@ -312,7 +312,7 @@ TEST_F(DefineInlineTest, TransformDecls) {
 }
 
 TEST_F(DefineInlineTest, TransformTemplDecls) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace a {
       template <typename T> class Bar {
       public:
@@ -329,7 +329,7 @@ TEST_F(DefineInlineTest, TransformTemplDecls) {
       bar<Bar<int>>.bar();
       aux<Bar<int>>();
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace a {
       template <typename T> class Bar {
       public:
@@ -350,7 +350,7 @@ TEST_F(DefineInlineTest, TransformTemplDecls) {
 }
 
 TEST_F(DefineInlineTest, TransformMembers) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     class Foo {
       void foo();
     };
@@ -358,7 +358,7 @@ TEST_F(DefineInlineTest, TransformMembers) {
     void Foo::f^oo() {
       return;
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     class Foo {
       void foo(){
       return;
@@ -395,7 +395,7 @@ TEST_F(DefineInlineTest, TransformMembers) {
 }
 
 TEST_F(DefineInlineTest, TransformDependentTypes) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace a {
       template <typename T> class Bar {};
     }
@@ -409,7 +409,7 @@ TEST_F(DefineInlineTest, TransformDependentTypes) {
       Bar<T> B;
       Bar<Bar<T>> q;
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace a {
       template <typename T> class Bar {};
     }
@@ -511,7 +511,7 @@ TEST_F(DefineInlineTest, TransformFunctionTempls) {
 }
 
 TEST_F(DefineInlineTest, TransformTypeLocs) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace a {
       template <typename T> class Bar {
       public:
@@ -528,7 +528,7 @@ TEST_F(DefineInlineTest, TransformTypeLocs) {
       Foo foo;
       a::Bar<Bar<int>>::Baz<Bar<int>> q;
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace a {
       template <typename T> class Bar {
       public:
@@ -549,7 +549,7 @@ TEST_F(DefineInlineTest, TransformTypeLocs) {
 }
 
 TEST_F(DefineInlineTest, TransformDeclRefs) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace a {
       template <typename T> class Bar {
       public:
@@ -575,7 +575,7 @@ TEST_F(DefineInlineTest, TransformDeclRefs) {
       bar();
       a::test();
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace a {
       template <typename T> class Bar {
       public:
@@ -605,12 +605,12 @@ TEST_F(DefineInlineTest, TransformDeclRefs) {
 }
 
 TEST_F(DefineInlineTest, StaticMembers) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace ns { class X { static void foo(); void bar(); }; }
     void ns::X::b^ar() {
       foo();
     })cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace ns { class X { static void foo(); void bar(){
       foo();
     } }; }
@@ -654,7 +654,7 @@ est);
 }
 
 TEST_F(DefineInlineTest, TransformTemplParamNames) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     struct Foo {
       struct Bar {
         template <class, class X,
@@ -668,7 +668,7 @@ TEST_F(DefineInlineTest, TransformTemplParamNames) {
               template<typename> class V, template<typename> class W,
               int X, int Y>
     void Foo::Bar::f^oo(U, W<U>, int Q) {})cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     struct Foo {
       struct Bar {
         template <class T, class U,
@@ -683,13 +683,13 @@ TEST_F(DefineInlineTest, TransformTemplParamNames) {
 }
 
 TEST_F(DefineInlineTest, TransformInlineNamespaces) {
-  auto Test = R"cpp(
+  auto *Test = R"cpp(
     namespace a { inline namespace b { namespace { struct Foo{}; } } }
     void foo();
 
     using namespace a;
     void ^foo() {Foo foo;})cpp";
-  auto Expected = R"cpp(
+  auto *Expected = R"cpp(
     namespace a { inline namespace b { namespace { struct Foo{}; } } }
     void foo(){a::Foo foo;}
 

From a3a2239aaaf6860eaee591c70a016b7c5984edde Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 14:25:48 +0100
Subject: [PATCH 553/946] [GlobalISel] Avoid pointer element type access during
 InlineAsm lowering

Same change as has been made for the SDAG lowering.
---
 llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index e0503b1ed2050..dfcf1b80392af 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -298,7 +298,7 @@ bool InlineAsmLowering::lowerInlineAsm(
 
     // Compute the value type for each operand.
     if (OpInfo.hasArg()) {
-      OpInfo.CallOperandVal = const_cast<Value *>(Call.getArgOperand(ArgNo++));
+      OpInfo.CallOperandVal = const_cast<Value *>(Call.getArgOperand(ArgNo));
 
       if (isa<BasicBlock>(OpInfo.CallOperandVal)) {
         LLVM_DEBUG(dbgs() << "Basic block input operands not supported yet\n");
@@ -310,10 +310,8 @@ bool InlineAsmLowering::lowerInlineAsm(
       // If this is an indirect operand, the operand is a pointer to the
       // accessed type.
       if (OpInfo.isIndirect) {
-        PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
-        if (!PtrTy)
-          report_fatal_error("Indirect operand for inline asm not a pointer!");
-        OpTy = PtrTy->getPointerElementType();
+        OpTy = Call.getAttributes().getParamElementType(ArgNo);
+        assert(OpTy && "Indirect operand must have elementtype attribute");
       }
 
       // FIXME: Support aggregate input operands
@@ -325,7 +323,7 @@ bool InlineAsmLowering::lowerInlineAsm(
 
       OpInfo.ConstraintVT =
           TLI->getAsmOperandValueType(DL, OpTy, true).getSimpleVT();
-
+      ++ArgNo;
     } else if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
       assert(!Call.getType()->isVoidTy() && "Bad inline asm!");
       if (StructType *STy = dyn_cast<StructType>(Call.getType())) {

From 475927d04606433f4ad70b9e41bbe731994ba9b6 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 14:31:51 +0100
Subject: [PATCH 554/946] [AsmParserTest] Avoid pointer element type accesses
 (NFC)

Use isOpaqueOrPointeeTypeEquals() instead.
---
 llvm/unittests/AsmParser/AsmParserTest.cpp | 26 +++++-----------------
 1 file changed, 6 insertions(+), 20 deletions(-)

diff --git a/llvm/unittests/AsmParser/AsmParserTest.cpp b/llvm/unittests/AsmParser/AsmParserTest.cpp
index 7639ff50571e6..9d2333b49bc31 100644
--- a/llvm/unittests/AsmParser/AsmParserTest.cpp
+++ b/llvm/unittests/AsmParser/AsmParserTest.cpp
@@ -252,9 +252,7 @@ TEST(AsmParserTest, TypeWithSlotMappingParsing) {
   ASSERT_TRUE(Ty->isPointerTy());
 
   PointerType *PT = cast<PointerType>(Ty);
-  Ty = PT->getPointerElementType();
-  ASSERT_TRUE(Ty->isIntegerTy());
-  ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
+  ASSERT_TRUE(PT->isOpaqueOrPointeeTypeMatches(Type::getIntNTy(Ctx, 32)));
 
   // Two indirections.
   Ty = parseType("i32**", Error, M, &Mapping);
@@ -262,13 +260,8 @@ TEST(AsmParserTest, TypeWithSlotMappingParsing) {
   ASSERT_TRUE(Ty->isPointerTy());
 
   PT = cast<PointerType>(Ty);
-  Ty = PT->getPointerElementType();
-  ASSERT_TRUE(Ty->isPointerTy());
-
-  PT = cast<PointerType>(Ty);
-  Ty = PT->getPointerElementType();
-  ASSERT_TRUE(Ty->isIntegerTy());
-  ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
+  Type *ExpectedElemTy = PointerType::getUnqual(Type::getIntNTy(Ctx, 32));
+  ASSERT_TRUE(PT->isOpaqueOrPointeeTypeMatches(ExpectedElemTy));
 
   // Check that we reject types with garbage.
   Ty = parseType("i32 garbage", Error, M, &Mapping);
@@ -386,9 +379,7 @@ TEST(AsmParserTest, TypeAtBeginningWithSlotMappingParsing) {
   ASSERT_TRUE(Read == 4);
 
   PointerType *PT = cast<PointerType>(Ty);
-  Ty = PT->getPointerElementType();
-  ASSERT_TRUE(Ty->isIntegerTy());
-  ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
+  ASSERT_TRUE(PT->isOpaqueOrPointeeTypeMatches(Type::getIntNTy(Ctx, 32)));
 
   // Two indirections.
   Ty = parseTypeAtBeginning("i32**", Read, Error, M, &Mapping);
@@ -397,13 +388,8 @@ TEST(AsmParserTest, TypeAtBeginningWithSlotMappingParsing) {
   ASSERT_TRUE(Read == 5);
 
   PT = cast<PointerType>(Ty);
-  Ty = PT->getPointerElementType();
-  ASSERT_TRUE(Ty->isPointerTy());
-
-  PT = cast<PointerType>(Ty);
-  Ty = PT->getPointerElementType();
-  ASSERT_TRUE(Ty->isIntegerTy());
-  ASSERT_TRUE(Ty->getPrimitiveSizeInBits() == 32);
+  Type *ExpectedElemTy = PointerType::getUnqual(Type::getIntNTy(Ctx, 32));
+  ASSERT_TRUE(PT->isOpaqueOrPointeeTypeMatches(ExpectedElemTy));
 
   // Check that we reject types with garbage.
   Ty = parseTypeAtBeginning("i32 garbage", Read, Error, M, &Mapping);

From bf00f7a64e3a37b1b9cc59a152da6ddb0accdbd9 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Mon, 24 Jan 2022 15:25:17 +0100
Subject: [PATCH 555/946] Add llvm-dwp to LLVM_TOOLCHAIN_TOOLS

since it qualifies as a toolchain tool rather than "internal llvm tool".
This will make it part of builds which set the
LLVM_INSTALL_TOOLCHAIN_ONLY cmake option, such as the Windows installer.

Differential revision: https://reviews.llvm.org/D118042
---
 llvm/cmake/modules/AddLLVM.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index fed1fec7d72e8..a262d55ddfa42 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -1210,6 +1210,7 @@ if(NOT LLVM_TOOLCHAIN_TOOLS)
     llvm-ar
     llvm-cov
     llvm-cxxfilt
+    llvm-dwp
     llvm-ranlib
     llvm-lib
     llvm-ml

From 8e3e772f84e590ed9ba7182c7b01844afdb2dbff Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 14:39:22 +0100
Subject: [PATCH 556/946] [OpenMPIRBuilderTest] Avoid some pointer element type
 accesses (NFC)

Use isOpaqueOrPointeeTypeMatches() instead, where possible.
---
 .../Frontend/OpenMPIRBuilderTest.cpp          | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index bc2d3ec8e7abe..a4d82e07dc139 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -1096,14 +1096,15 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
 
   Type *Arg2Type = OutlinedFn->getArg(2)->getType();
   EXPECT_TRUE(Arg2Type->isPointerTy());
-  EXPECT_EQ(Arg2Type->getPointerElementType(), I32Ty);
+  EXPECT_TRUE(cast<PointerType>(Arg2Type)->isOpaqueOrPointeeTypeMatches(I32Ty));
 
   // Arguments that need to be passed through pointers and reloaded will get
   // used earlier in the functions and therefore will appear first in the
   // argument list after outlining.
   Type *Arg3Type = OutlinedFn->getArg(3)->getType();
   EXPECT_TRUE(Arg3Type->isPointerTy());
-  EXPECT_EQ(Arg3Type->getPointerElementType(), StructTy);
+  EXPECT_TRUE(
+      cast<PointerType>(Arg3Type)->isOpaqueOrPointeeTypeMatches(StructTy));
 
   Type *Arg4Type = OutlinedFn->getArg(4)->getType();
   EXPECT_EQ(Arg4Type, I32PtrTy);
@@ -3814,10 +3815,10 @@ TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) {
   EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()
                   ->getArrayElementType()
                   ->isPointerTy());
-  EXPECT_TRUE(MapperAllocas.ArgsBase->getAllocatedType()
-                  ->getArrayElementType()
-                  ->getPointerElementType()
-                  ->isIntegerTy(8));
+  EXPECT_TRUE(
+      cast<PointerType>(
+          MapperAllocas.ArgsBase->getAllocatedType()->getArrayElementType())
+          ->isOpaqueOrPointeeTypeMatches(Builder.getInt8Ty()));
 
   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()->isArrayTy());
   ArrType = dyn_cast<ArrayType>(MapperAllocas.Args->getAllocatedType());
@@ -3825,10 +3826,9 @@ TEST_F(OpenMPIRBuilderTest, CreateMapperAllocas) {
   EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()
                   ->getArrayElementType()
                   ->isPointerTy());
-  EXPECT_TRUE(MapperAllocas.Args->getAllocatedType()
-                  ->getArrayElementType()
-                  ->getPointerElementType()
-                  ->isIntegerTy(8));
+  EXPECT_TRUE(cast<PointerType>(
+                  MapperAllocas.Args->getAllocatedType()->getArrayElementType())
+                  ->isOpaqueOrPointeeTypeMatches(Builder.getInt8Ty()));
 
   EXPECT_TRUE(MapperAllocas.ArgSizes->getAllocatedType()->isArrayTy());
   ArrType = dyn_cast<ArrayType>(MapperAllocas.ArgSizes->getAllocatedType());

From 7cc3e141d7106eb753b73cb8ad7251c67c738e9f Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 14:52:01 +0100
Subject: [PATCH 557/946] [MemProf] Avoid pointer element type access

Determine the masked load/store access type from the value type
of the intrinsics, rather than the pointer element type. For
cleanliness, include the access type in InterestingMemoryAccess.
---
 .../Instrumentation/MemProfiler.cpp           | 27 +++++++++----------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 92ea007691b27..8fedefccf0e16 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -156,6 +156,7 @@ struct InterestingMemoryAccess {
   Value *Addr = nullptr;
   bool IsWrite;
   unsigned Alignment;
+  Type *AccessTy;
   uint64_t TypeSize;
   Value *MaybeMask = nullptr;
 };
@@ -181,7 +182,7 @@ class MemProfiler {
                          Value *Addr, uint32_t TypeSize, bool IsWrite);
   void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
                                    Instruction *I, Value *Addr,
-                                   unsigned Alignment, uint32_t TypeSize,
+                                   unsigned Alignment, Type *AccessTy,
                                    bool IsWrite);
   void instrumentMemIntrinsic(MemIntrinsic *MI);
   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
@@ -334,36 +335,32 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
 
   InterestingMemoryAccess Access;
 
-  const DataLayout &DL = I->getModule()->getDataLayout();
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
     if (!ClInstrumentReads)
       return None;
     Access.IsWrite = false;
-    Access.TypeSize = DL.getTypeStoreSizeInBits(LI->getType());
+    Access.AccessTy = LI->getType();
     Access.Alignment = LI->getAlignment();
     Access.Addr = LI->getPointerOperand();
   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
     if (!ClInstrumentWrites)
       return None;
     Access.IsWrite = true;
-    Access.TypeSize =
-        DL.getTypeStoreSizeInBits(SI->getValueOperand()->getType());
+    Access.AccessTy = SI->getValueOperand()->getType();
     Access.Alignment = SI->getAlignment();
     Access.Addr = SI->getPointerOperand();
   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
     if (!ClInstrumentAtomics)
       return None;
     Access.IsWrite = true;
-    Access.TypeSize =
-        DL.getTypeStoreSizeInBits(RMW->getValOperand()->getType());
+    Access.AccessTy = RMW->getValOperand()->getType();
     Access.Alignment = 0;
     Access.Addr = RMW->getPointerOperand();
   } else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
     if (!ClInstrumentAtomics)
       return None;
     Access.IsWrite = true;
-    Access.TypeSize =
-        DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType());
+    Access.AccessTy = XCHG->getCompareOperand()->getType();
     Access.Alignment = 0;
     Access.Addr = XCHG->getPointerOperand();
   } else if (auto *CI = dyn_cast<CallInst>(I)) {
@@ -376,16 +373,16 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
           return None;
         // Masked store has an initial operand for the value.
         OpOffset = 1;
+        Access.AccessTy = CI->getArgOperand(0)->getType();
         Access.IsWrite = true;
       } else {
         if (!ClInstrumentReads)
           return None;
+        Access.AccessTy = CI->getType();
         Access.IsWrite = false;
       }
 
       auto *BasePtr = CI->getOperand(0 + OpOffset);
-      auto *Ty = BasePtr->getType()->getPointerElementType();
-      Access.TypeSize = DL.getTypeStoreSizeInBits(Ty);
       if (auto *AlignmentConstant =
               dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
         Access.Alignment = (unsigned)AlignmentConstant->getZExtValue();
@@ -412,14 +409,16 @@ MemProfiler::isInterestingMemoryAccess(Instruction *I) const {
   if (Access.Addr->isSwiftError())
     return None;
 
+  const DataLayout &DL = I->getModule()->getDataLayout();
+  Access.TypeSize = DL.getTypeStoreSizeInBits(Access.AccessTy);
   return Access;
 }
 
 void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,
                                               Instruction *I, Value *Addr,
                                               unsigned Alignment,
-                                              uint32_t TypeSize, bool IsWrite) {
-  auto *VTy = cast<FixedVectorType>(Addr->getType()->getPointerElementType());
+                                              Type *AccessTy, bool IsWrite) {
+  auto *VTy = cast<FixedVectorType>(AccessTy);
   uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
   unsigned Num = VTy->getNumElements();
   auto *Zero = ConstantInt::get(IntptrTy, 0);
@@ -468,7 +467,7 @@ void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL,
 
   if (Access.MaybeMask) {
     instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr,
-                                Access.Alignment, Access.TypeSize,
+                                Access.Alignment, Access.AccessTy,
                                 Access.IsWrite);
   } else {
     // Since the access counts will be accumulated across the entire allocation,

From 4ed7c6eec97925281f4c2a02ec7030dab750ba34 Mon Sep 17 00:00:00 2001
From: Sebastian Neubauer <Sebastian.Neubauer@amd.com>
Date: Mon, 24 Jan 2022 18:46:33 +0100
Subject: [PATCH 558/946] [AMDGPU] Only match correct type for a16

Addresses are floats when a sampler is present and unsigned integers
when no sampler is present.

Therefore, only zext instructions, not sext instructions should match.

Also match integer constants that can be truncated.

Differential Revision: https://reviews.llvm.org/D118043
---
 .../AMDGPU/AMDGPUInstCombineIntrinsic.cpp     | 45 ++++++---
 .../InstCombine/AMDGPU/amdgcn-intrinsics.ll   | 99 +++++++++++++++++++
 2 files changed, 131 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index c3a326945557e..4f1d700bcd842 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -58,24 +58,37 @@ static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
 
 // Check if a value can be converted to a 16-bit value without losing
 // precision.
-static bool canSafelyConvertTo16Bit(Value &V) {
+// The value is expected to be either a float (IsFloat = true) or an unsigned
+// integer (IsFloat = false).
+static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) {
   Type *VTy = V.getType();
   if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
     // The value is already 16-bit, so we don't want to convert to 16-bit again!
     return false;
   }
-  if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
-    // We need to check that if we cast the index down to a half, we do not lose
-    // precision.
-    APFloat FloatValue(ConstFloat->getValueAPF());
-    bool LosesInfo = true;
-    FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo);
-    return !LosesInfo;
+  if (IsFloat) {
+    if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
+      // We need to check that if we cast the index down to a half, we do not
+      // lose precision.
+      APFloat FloatValue(ConstFloat->getValueAPF());
+      bool LosesInfo = true;
+      FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero,
+                         &LosesInfo);
+      return !LosesInfo;
+    }
+  } else {
+    if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
+      // We need to check that if we cast the index down to an i16, we do not
+      // lose precision.
+      APInt IntValue(ConstInt->getValue());
+      return IntValue.getActiveBits() <= 16;
+    }
   }
+
   Value *CastSrc;
-  if (match(&V, m_FPExt(PatternMatch::m_Value(CastSrc))) ||
-      match(&V, m_SExt(PatternMatch::m_Value(CastSrc))) ||
-      match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)))) {
+  bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc)))
+                       : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)));
+  if (IsExt) {
     Type *CastSrcTy = CastSrc->getType();
     if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
       return true;
@@ -203,6 +216,10 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
   if (!ST->hasA16() && !ST->hasG16())
     return None;
 
+  // Address is interpreted as float if the instruction has a sampler or as
+  // unsigned int if there is no sampler.
+  bool HasSampler =
+      AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode)->Sampler;
   bool FloatCoord = false;
   // true means derivatives can be converted to 16 bit, coordinates not
   bool OnlyDerivatives = false;
@@ -211,7 +228,7 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
        OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
     Value *Coord = II.getOperand(OperandIndex);
     // If the values are not derived from 16-bit values, we cannot optimize.
-    if (!canSafelyConvertTo16Bit(*Coord)) {
+    if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) {
       if (OperandIndex < ImageDimIntr->CoordStart ||
           ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
         return None;
@@ -232,7 +249,9 @@ simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
   // Check if there is a bias parameter and if it can be converted to f16
   if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
     Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
-    if (!canSafelyConvertTo16Bit(*Bias))
+    assert(HasSampler &&
+           "Only image instructions with a sampler can have a bias");
+    if (!canSafelyConvertTo16Bit(*Bias, HasSampler))
       OnlyDerivatives = true;
   }
 
diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
index 709d531b8cee6..894e0ef860646 100644
--- a/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
@@ -3667,6 +3667,105 @@ define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_V2(<2 x float> addrspa
   ret void
 }
 
+define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %slice) {
+; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_const(
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f16(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], half [[S:%.*]], half 0xH3400, half [[SLICE:%.*]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <2 x float> [[RES]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+  %dsdh32 = fpext half %dsdh to float
+  %dtdh32 = fpext half %dtdh to float
+  %dsdv32 = fpext half %dsdv to float
+  %dtdv32 = fpext half %dtdv to float
+  %s32 = fpext half %s to float
+  %slice32 = fpext half %slice to float
+  %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 0.25, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <2 x float> %res, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_sample_a16_c_d_o_2darray_const_noopt(<2 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %slice) {
+; CHECK-LABEL: @image_sample_a16_c_d_o_2darray_const_noopt(
+; CHECK-NEXT:    [[S32:%.*]] = fpext half [[S:%.*]] to float
+; CHECK-NEXT:    [[SLICE32:%.*]] = fpext half [[SLICE:%.*]] to float
+; CHECK-NEXT:    [[RES:%.*]] = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 [[OFFSET:%.*]], float [[ZCOMPARE:%.*]], half [[DSDH:%.*]], half [[DTDH:%.*]], half [[DSDV:%.*]], half [[DTDV:%.*]], float [[S32]], float 1.000000e+10, float [[SLICE32]], <8 x i32> [[RSRC:%.*]], <4 x i32> [[SAMP:%.*]], i1 false, i32 0, i32 0)
+; CHECK-NEXT:    store <2 x float> [[RES]], <2 x float> addrspace(1)* [[OUT:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+  %dsdh32 = fpext half %dsdh to float
+  %dtdh32 = fpext half %dtdh to float
+  %dsdv32 = fpext half %dsdv to float
+  %dtdv32 = fpext half %dtdv to float
+  %s32 = fpext half %s to float
+  %slice32 = fpext half %slice to float
+  %res = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh32, float %dtdh32, float %dsdv32, float %dtdv32, float %s32, float 1.0e+10, float %slice32, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
+  store <2 x float> %res, <2 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_load_a16_mip_1d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s) {
+; CHECK-LABEL: @image_load_a16_mip_1d(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 [[S:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s32 = zext i16 %s to i32
+  %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_load_a16_mip_1d_noopt(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s) {
+; CHECK-LABEL: @image_load_a16_mip_1d_noopt(
+; CHECK-NEXT:    [[S32:%.*]] = sext i16 [[S:%.*]] to i32
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 [[S32]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s32 = sext i16 %s to i32
+  %res = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s32, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_load_a16_mip_2d(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s, i16 %t) {
+; CHECK-LABEL: @image_load_a16_mip_2d(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 [[S:%.*]], i16 [[T:%.*]], <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s32 = zext i16 %s to i32
+  %t32 = zext i16 %t to i32
+  %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 %t32, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_load_a16_mip_2d_const(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s) {
+; CHECK-LABEL: @image_load_a16_mip_2d_const(
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 [[S:%.*]], i16 -1, <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s32 = zext i16 %s to i32
+  %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 65535, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
+define amdgpu_kernel void @image_load_a16_mip_2d_const_noopt(<4 x float> addrspace(1)* %out, <8 x i32> inreg %rsrc, i16 %s) {
+; CHECK-LABEL: @image_load_a16_mip_2d_const_noopt(
+; CHECK-NEXT:    [[S32:%.*]] = zext i16 [[S:%.*]] to i32
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 [[S32]], i32 65536, <8 x i32> [[RSRC:%.*]], i32 0, i32 0)
+; CHECK-NEXT:    store <4 x float> [[RES]], <4 x float> addrspace(1)* [[OUT:%.*]], align 16
+; CHECK-NEXT:    ret void
+;
+  %s32 = zext i16 %s to i32
+  %res = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s32, i32 65536, i32 0, <8 x i32> %rsrc, i32 0, i32 0)
+  store <4 x float> %res, <4 x float> addrspace(1)* %out
+  ret void
+}
+
 ; --------------------------------------------------------------------
 ; llvm.amdgcn.image.sample g16
 ; --------------------------------------------------------------------

From 2c8a77ab21ff3a41829a5d67e0b838cc7a9f5f21 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Tue, 25 Jan 2022 15:17:07 +0100
Subject: [PATCH 559/946] [mlir] Move duplicated
 BufferizableOpInterface::kBufferLayoutAttrName defs to a single place

---
 .../Bufferization/IR/BufferizableOpInterface.cpp       | 10 ++++++++++
 .../ComprehensiveBufferize/ModuleBufferization.cpp     | 10 ----------
 mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp           | 10 ----------
 3 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 9cb99db16d6a7..7d91229625cca 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -32,6 +32,16 @@ namespace bufferization {
 using namespace mlir;
 using namespace bufferization;
 
+/// Attribute name used to mark the bufferization layout for region
+/// arguments during linalg comprehensive bufferization.
+constexpr const ::llvm::StringLiteral
+    bufferization::BufferizableOpInterface::kBufferLayoutAttrName;
+
+/// Attribute name used to mark region arguments that can be bufferized
+/// in-place during linalg comprehensive bufferization.
+constexpr const ::llvm::StringLiteral
+    bufferization::BufferizableOpInterface::kInplaceableAttrName;
+
 //===----------------------------------------------------------------------===//
 // BufferizationOptions
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
index e5eac1fb2765d..0fe79862a69d0 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
@@ -85,16 +85,6 @@ using namespace tensor;
 using namespace comprehensive_bufferize;
 using namespace mlir::bufferization;
 
-/// Attribute name used to mark the bufferization layout for region
-/// arguments during linalg comprehensive bufferization.
-constexpr const ::llvm::StringLiteral
-    bufferization::BufferizableOpInterface::kBufferLayoutAttrName;
-
-/// Attribute name used to mark region arguments that can be bufferized
-/// in-place during linalg comprehensive bufferization.
-constexpr const ::llvm::StringLiteral
-    bufferization::BufferizableOpInterface::kInplaceableAttrName;
-
 namespace {
 /// The state of analysis of a FuncOp.
 enum class FuncOpAnalysisState { NotAnalyzed, InProgress, Analyzed };
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
index ccab0bbd6cb2c..623dd5085363d 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp
@@ -62,16 +62,6 @@ struct LinalgInlinerInterface : public DialectInlinerInterface {
 constexpr const ::llvm::StringLiteral
     LinalgDialect::kMemoizedIndexingMapsAttrName;
 
-/// Attribute name used to mark the bufferization layout for region
-/// arguments during linalg comprehensive bufferization.
-constexpr const ::llvm::StringLiteral
-    bufferization::BufferizableOpInterface::kBufferLayoutAttrName;
-
-/// Attribute name used to mark region arguments that can be bufferized
-/// in-place during linalg comprehensive bufferization.
-constexpr const ::llvm::StringLiteral
-    bufferization::BufferizableOpInterface::kInplaceableAttrName;
-
 /// Trait to check if T provides a `regionBuilder` method.
 template <typename T, typename... Args>
 using has_region_builder = decltype(T::regionBuilder);

From 98db33349bcc4d12a56313c406c1f40038258f10 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 15:20:32 +0100
Subject: [PATCH 560/946] [SLC] Fix pointer diff type in sprintf() optimization

We should always be calculating a byte-wise difference here.
Previously this calculated the pointer difference while taking
the pointer element type into account, which is incorrect.
---
 llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp |  6 +++---
 llvm/test/Transforms/InstCombine/stpcpy-1.ll   | 14 +++++---------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 123fb0cfd1cbb..e02d02a057521 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -2515,9 +2515,9 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI,
     } else if (Value *V = emitStpCpy(Dest, CI->getArgOperand(2), B, TLI)) {
       // sprintf(dest, "%s", str) -> stpcpy(dest, str) - dest
       // Handle mismatched pointer types (goes away with typeless pointers?).
-      V = B.CreatePointerCast(V, Dest->getType());
-      Value *PtrDiff = B.CreatePtrDiff(
-          Dest->getType()->getPointerElementType(), V, Dest);
+      V = B.CreatePointerCast(V, B.getInt8PtrTy());
+      Dest = B.CreatePointerCast(Dest, B.getInt8PtrTy());
+      Value *PtrDiff = B.CreatePtrDiff(B.getInt8Ty(), V, Dest);
       return B.CreateIntCast(PtrDiff, CI->getType(), false);
     }
 
diff --git a/llvm/test/Transforms/InstCombine/stpcpy-1.ll b/llvm/test/Transforms/InstCombine/stpcpy-1.ll
index 5c59edc639c91..79587102af8a0 100644
--- a/llvm/test/Transforms/InstCombine/stpcpy-1.ll
+++ b/llvm/test/Transforms/InstCombine/stpcpy-1.ll
@@ -59,8 +59,8 @@ define i8* @test_no_simplify1() {
 
 define i8* @test_no_simplify2(i8* %dst, i8* %src) {
 ; CHECK-LABEL: @test_no_simplify2(
-; CHECK-NEXT:    %ret = musttail call i8* @stpcpy(i8* %dst, i8* %src)
-; CHECK-NEXT:    ret i8* %ret
+; CHECK-NEXT:    [[RET:%.*]] = musttail call i8* @stpcpy(i8* [[DST:%.*]], i8* [[SRC:%.*]])
+; CHECK-NEXT:    ret i8* [[RET]]
 ;
   %ret = musttail call i8* @stpcpy(i8* %dst, i8* %src)
   ret i8* %ret
@@ -87,13 +87,9 @@ define i32 @PR51200(i8** %p, i32* %p2) {
 ; CHECK-NEXT:    [[CSTR1:%.*]] = bitcast i32* [[P2:%.*]] to i8*
 ; CHECK-NEXT:    [[STPCPY:%.*]] = call i8* @stpcpy(i8* [[CSTR]], i8* [[CSTR1]])
 ; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i8* [[STPCPY]] to i32
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint i8** [[P]] to i32
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
-; CHECK-NEXT:    [[TMP5:%.*]] = sub nsw i64 [[TMP2]], [[TMP4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = lshr exact i64 [[TMP5]], 2
-; CHECK-NEXT:    [[TMP7:%.*]] = trunc i64 [[TMP6]] to i32
-; CHECK-NEXT:    ret i32 [[TMP7]]
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint i8** [[P]] to i32
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %call = call i32 (i8**, i32*, ...) @sprintf(i8** %p, i32* bitcast ([3 x i8]* @percent_s to i32*), i32* %p2)
   ret i32 %call

From f3314e3747873fdf026a28742a30f372503baf32 Mon Sep 17 00:00:00 2001
From: Jim Lin <jim@andestech.com>
Date: Tue, 25 Jan 2022 15:05:43 +0800
Subject: [PATCH 561/946] [clang-tidy] Pop Files only if FileChangeReason is
 ExitFile

enum FileChangeReason has four possible type EnterFile, ExitFile,
SystemHeaderPragma and RenameFile,
It should pop the back element of Files only if FileChangeReason is ExitFile.
---
 .../clang-tidy/readability/DuplicateIncludeCheck.cpp            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp
index 681b8399154a7..a6e49439c8434 100644
--- a/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp
@@ -71,7 +71,7 @@ void DuplicateIncludeCallbacks::FileChanged(SourceLocation Loc,
                                             FileID PrevFID) {
   if (Reason == EnterFile)
     Files.emplace_back();
-  else
+  else if (Reason == ExitFile)
     Files.pop_back();
 }
 

From b0956a9acf73a9eef9cb426d00b8dee775682f35 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Tue, 25 Jan 2022 14:49:17 +0000
Subject: [PATCH 562/946] [GVN] Add tests for loop load PRE through select.

---
 .../GVN/PRE/pre-loop-load-through-select.ll   | 841 ++++++++++++++++++
 1 file changed, 841 insertions(+)
 create mode 100644 llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll

diff --git a/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll b/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll
new file mode 100644
index 0000000000000..58f3d64fc8f6f
--- /dev/null
+++ b/llvm/test/Transforms/GVN/PRE/pre-loop-load-through-select.ll
@@ -0,0 +1,841 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes='require<domtree>,loop(loop-simplifycfg),gvn' -S %s | FileCheck %s
+
+define i32 @test_pointer_phi_select_same_object(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_lcssa(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_lcssa(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %lcssa.min = phi i32* [ %min.select, %loop ]
+  %res = load i32, i32* %lcssa.min, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_different_objects(i32* %A, i32 *%B, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_different_objects(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[B:%.*]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %A, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %B, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_multiple_loads_1(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_multiple_loads_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %l.3 = load i32, i32* %min.ptr, align 4
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_multiple_loads_2(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_multiple_loads_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %l.3 = load i32, i32* %ptr.iv, align 4
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_load_after(i32* %A, i32 *%B, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_load_after(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_2_PRE:%.*]] = load i32, i32* [[B:%.*]], align 4
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[L_2:%.*]] = phi i32 [ [[L_2_PRE]], [[ENTRY:%.*]] ], [ [[L_3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[A:%.*]], [[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[B]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[L_3]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i32 [[L_3]]
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[L_3]]
+;
+entry:
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %A, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %B, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %l.3 = load i32, i32* %min.select, align 4
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i32 %l.3
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_split_edge(i32* %ptr, i32* %end, i1 %c)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_split_edge(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br i1 [[C:%.*]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]]
+; CHECK:       loop.preheader:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ], [ [[START_PTR]], [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[MIN_SELECT:%.*]], [[LOOP]] ], [ [[PTR]], [[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[LOOP_EXIT:%.*]], label [[LOOP]]
+; CHECK:       loop.exit:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[LCSSA_PHI_2:%.*]] = phi i32* [ [[END]], [[ENTRY:%.*]] ], [ [[MIN_SELECT]], [[LOOP_EXIT]] ]
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[LCSSA_PHI_2]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br i1 %c, label %exit, label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %loop.exit, label %loop
+
+loop.exit:
+  %lcssa.phi.1 = phi i32* [ %min.select, %loop ]
+  br label %exit
+
+exit:
+  %lcssa.phi.2 = phi i32* [ %end, %entry ], [ %lcssa.phi.1, %loop.exit ]
+  %res = load i32, i32* %lcssa.phi.2, align 4
+  ret i32 %res
+}
+
+
+declare void @may_throw() readonly
+
+define i32 @test_pointer_phi_select_load_may_not_execute_1(i32* %A, i32 *%B, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_load_may_not_execute_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[B:%.*]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    call void @may_throw()
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %A, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %B, %entry ], [ %min.select, %loop ]
+  call void @may_throw()
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_load_may_not_execute_2(i32* %A, i32 *%B, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_load_may_not_execute_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[B:%.*]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    call void @may_throw()
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %A, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %B, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  call void @may_throw()
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_store(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_store(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    store i32 0, i32* [[PTR]], align 4
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 0
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  store i32 0, i32* %min.ptr
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+declare void @may_write()
+
+define i32 @test_pointer_phi_select_same_object_may_write_call(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_may_write_call(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    call void @may_write()
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  call void @may_write()
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_header_exit(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_header_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP_LATCH]] ]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    br label [[LOOP_HEADER]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop.header
+
+loop.header:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop.latch ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop.latch ]
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop.latch
+
+loop.latch:
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  br label %loop.header
+
+exit:
+  %res = load i32, i32* %min.ptr, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_ptr_use_cycle(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_ptr_use_cycle(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT_PREHEADER:%.*]], label [[LOOP]]
+; CHECK:       exit.preheader:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[P:%.*]] = phi i32* [ [[P_NEXT:%.*]], [[EXIT]] ], [ [[MIN_SELECT]], [[EXIT_PREHEADER]] ]
+; CHECK-NEXT:    store i32 0, i32* [[P]], align 4
+; CHECK-NEXT:    [[P_NEXT]] = getelementptr inbounds i32, i32* [[P]], i64 1
+; CHECK-NEXT:    br label [[EXIT]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %p = phi i32* [ %min.select, %loop ], [ %p.next, %exit ]
+  store i32 0, i32* %p
+  %p.next = getelementptr inbounds i32, i32* %p, i64 1
+  br label %exit
+}
+
+define i32 @test_pointer_phi_select_same_object_maybe_clobbered_in_exit(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_maybe_clobbered_in_exit(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    store i32 0, i32* [[START_PTR]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  store i32 0, i32* %start.ptr
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_same_object_maybe_clobbered_in_exit_2(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_maybe_clobbered_in_exit_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT_1:%.*]], label [[LOOP]]
+; CHECK:       exit.1:
+; CHECK-NEXT:    store i32 0, i32* [[START_PTR]], align 4
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit.1, label %loop
+
+exit.1:
+  %lcssa.min = phi i32* [ %min.select, %loop ]
+  store i32 0, i32* %start.ptr
+  br label %exit.2
+
+exit.2:
+  %res = load i32, i32* %lcssa.min, align 4
+  ret i32 %res
+}
+
+declare i32 @__CxxFrameHandler3(...)
+
+define i32 @test_pointer_phi_select_same_object_invoke_in_chain(i32* %ptr, i32* %end)  personality i32 (...)* @__CxxFrameHandler3 {
+; CHECK-LABEL: @test_pointer_phi_select_same_object_invoke_in_chain(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT_1:%.*]], label [[LOOP]]
+; CHECK:       exit.1:
+; CHECK-NEXT:    store i32 0, i32* [[START_PTR]], align 4
+; CHECK-NEXT:    invoke void @may_throw()
+; CHECK-NEXT:    to label [[EXIT_2:%.*]] unwind label [[CATCH_OBJECT:%.*]]
+; CHECK:       exit.2:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+; CHECK:       catch.object:
+; CHECK-NEXT:    [[LP:%.*]] = landingpad { i8*, i32 }
+; CHECK-NEXT:    catch i8* null
+; CHECK-NEXT:    unreachable
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit.1, label %loop
+
+exit.1:
+  %lcssa.min = phi i32* [ %min.select, %loop ]
+  store i32 0, i32* %start.ptr
+  invoke void @may_throw()
+  to label %exit.2 unwind label %catch.object
+
+exit.2:
+  %res = load i32, i32* %lcssa.min, align 4
+  ret i32 %res
+
+catch.object:
+  %lp = landingpad { i8*, i32 }
+  catch i8* null
+  unreachable
+}
+
+define i32 @test_pointer_phi_select_used_by_others_in_loop(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_used_by_others_in_loop(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    [[L_2_PRE:%.*]] = load i32, i32* [[PTR]], align 4
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[L_2:%.*]] = phi i32 [ [[L_2_PRE]], [[ENTRY:%.*]] ], [ [[L_3:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[L_3]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i32 [[L_3]]
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[L_3]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %l.3 = load i32, i32* %min.select, align 4
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i32 %l.3
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_used_by_others_in_loop_1(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_used_by_others_in_loop_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i32 [[L_2]]
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %l.3 = load i32, i32* %min.ptr, align 4
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i32 %l.3
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_used_by_others_in_loop_2(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_used_by_others_in_loop_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[PTR_IV]], align 4
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 [[L_1]], [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[GEP_MIN_PTR:%.*]] = getelementptr inbounds i32, i32* [[MIN_PTR]], i32 1
+; CHECK-NEXT:    [[L_3:%.*]] = load i32, i32* [[GEP_MIN_PTR]], align 4
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i32 [[L_3]]
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.1 = load i32, i32* %ptr.iv, align 4
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 %l.1, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %gep.min.ptr = getelementptr inbounds i32, i32* %min.ptr, i32 1
+  %l.3 = load i32, i32* %gep.min.ptr, align 4
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i32 %l.3
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}
+
+define i32 @test_pointer_phi_select_no_iter_load(i32* %ptr, i32* %end)  {
+; CHECK-LABEL: @test_pointer_phi_select_no_iter_load(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[START_PTR:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 1
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[PTR_IV:%.*]] = phi i32* [ [[START_PTR]], [[ENTRY:%.*]] ], [ [[PTR_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[MIN_PTR:%.*]] = phi i32* [ [[PTR]], [[ENTRY]] ], [ [[MIN_SELECT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[MIN_PTR]], align 4
+; CHECK-NEXT:    [[CMP_I_I_I:%.*]] = icmp ult i32 10, [[L_2]]
+; CHECK-NEXT:    [[MIN_SELECT]] = select i1 [[CMP_I_I_I]], i32* [[PTR_IV]], i32* [[MIN_PTR]]
+; CHECK-NEXT:    [[PTR_IV_NEXT]] = getelementptr inbounds i32, i32* [[PTR_IV]], i64 1
+; CHECK-NEXT:    [[EC:%.*]] = icmp eq i32* [[PTR_IV_NEXT]], [[END:%.*]]
+; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = load i32, i32* [[MIN_SELECT]], align 4
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %start.ptr = getelementptr inbounds i32, i32* %ptr, i64 1
+  br label %loop
+
+loop:
+  %ptr.iv = phi i32* [ %start.ptr, %entry ], [ %ptr.iv.next, %loop ]
+  %min.ptr = phi i32* [ %ptr, %entry ], [ %min.select, %loop ]
+  %l.2 = load i32, i32* %min.ptr, align 4
+  %cmp.i.i.i = icmp ult i32 10, %l.2
+  %min.select  = select i1 %cmp.i.i.i, i32* %ptr.iv, i32* %min.ptr
+  %ptr.iv.next = getelementptr inbounds i32, i32* %ptr.iv, i64 1
+  %ec = icmp eq i32* %ptr.iv.next, %end
+  br i1 %ec, label %exit, label %loop
+
+exit:
+  %res = load i32, i32* %min.select, align 4
+  ret i32 %res
+}

From 9d8c3ad94fad5dd5fb511af89c9e7c3679922ce0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= <1.int32@gmail.com>
Date: Tue, 25 Jan 2022 15:09:31 +0100
Subject: [PATCH 563/946] [clang-tidy] Change code of SignalHandlerCheck (NFC).

Using clang::CallGraph to get the called functions.
This makes a better foundation to improve support for
C++ and print the call chain.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D118016
---
 .../bugprone/SignalHandlerCheck.cpp           | 122 +++++++++---------
 .../clang-tidy/bugprone/SignalHandlerCheck.h  |   6 +-
 .../checkers/bugprone-signal-handler.c        |  35 +++++
 3 files changed, 98 insertions(+), 65 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp
index 63478a9540589..1fc19a8652e3e 100644
--- a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp
@@ -7,15 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "SignalHandlerCheck.h"
-#include "clang/AST/ASTContext.h"
-#include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
-#include "clang/Analysis/CallGraph.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include <iterator>
-#include <queue>
 
 using namespace clang::ast_matchers;
 
@@ -42,7 +36,9 @@ struct OptionEnumMapping<
 
 namespace bugprone {
 
-static bool isSystemCall(const FunctionDecl *FD) {
+namespace {
+
+bool isSystemCall(const FunctionDecl *FD) {
   // Find a possible redeclaration in system header.
   // FIXME: Looking at the canonical declaration is not the most exact way
   // to do this.
@@ -73,6 +69,22 @@ static bool isSystemCall(const FunctionDecl *FD) {
       FD->getCanonicalDecl()->getLocation());
 }
 
+/// Given a call graph node of a function and another one that is called from
+/// this function, get a CallExpr of the corresponding function call.
+/// It is unspecified which call is found if multiple calls exist, but the order
+/// should be deterministic (depend only on the AST).
+Expr *findCallExpr(const CallGraphNode *Caller, const CallGraphNode *Callee) {
+  auto FoundCallee = llvm::find_if(
+      Caller->callees(), [Callee](const CallGraphNode::CallRecord &Call) {
+        return Call.Callee == Callee;
+      });
+  assert(FoundCallee != Caller->end() &&
+         "Callee should be called from the caller function here.");
+  return FoundCallee->CallExpr;
+}
+
+} // namespace
+
 AST_MATCHER(FunctionDecl, isSystemCall) { return isSystemCall(&Node); }
 
 SignalHandlerCheck::SignalHandlerCheck(StringRef Name,
@@ -117,68 +129,50 @@ void SignalHandlerCheck::check(const MatchFinder::MatchResult &Result) {
   const auto *HandlerDecl =
       Result.Nodes.getNodeAs<FunctionDecl>("handler_decl");
   const auto *HandlerExpr = Result.Nodes.getNodeAs<DeclRefExpr>("handler_expr");
+  assert(SignalCall && HandlerDecl && HandlerExpr &&
+         "All of these should exist in a match here.");
+
+  if (CG.size() <= 1) {
+    // Call graph must be populated with the entire TU at the beginning.
+    // (It is possible to add a single function but the functions called from it
+    // are not analysed in this case.)
+    CG.addToCallGraph(const_cast<TranslationUnitDecl *>(
+        HandlerDecl->getTranslationUnitDecl()));
+    assert(CG.size() > 1 &&
+           "There should be at least one function added to call graph.");
+  }
 
-  // Visit each function encountered in the callgraph only once.
-  llvm::DenseSet<const FunctionDecl *> SeenFunctions;
-
-  // The worklist of the callgraph visitation algorithm.
-  std::deque<const CallExpr *> CalledFunctions;
-
-  auto ProcessFunction = [&](const FunctionDecl *F, const Expr *CallOrRef) {
-    // Ensure that canonical declaration is used.
-    F = F->getCanonicalDecl();
-
-    // Do not visit function if already encountered.
-    if (!SeenFunctions.insert(F).second)
-      return true;
-
-    // Check if the call is allowed.
-    // Non-system calls are not considered.
-    if (isSystemCall(F)) {
-      if (isSystemCallAllowed(F))
-        return true;
-
-      reportBug(F, CallOrRef, SignalCall, HandlerDecl);
-
-      return false;
-    }
-
-    // Get the body of the encountered non-system call function.
-    const FunctionDecl *FBody;
-    if (!F->hasBody(FBody)) {
-      reportBug(F, CallOrRef, SignalCall, HandlerDecl);
-      return false;
-    }
-
-    // Collect all called functions.
-    auto Matches = match(decl(forEachDescendant(callExpr().bind("call"))),
-                         *FBody, FBody->getASTContext());
-    for (const auto &Match : Matches) {
-      const auto *CE = Match.getNodeAs<CallExpr>("call");
-      if (isa<FunctionDecl>(CE->getCalleeDecl()))
-        CalledFunctions.push_back(CE);
-    }
-
-    return true;
-  };
-
-  if (!ProcessFunction(HandlerDecl, HandlerExpr))
+  // Check for special case when the signal handler itself is an unsafe external
+  // function.
+  if (!isFunctionAsyncSafe(HandlerDecl)) {
+    reportBug(HandlerDecl, HandlerExpr, SignalCall, HandlerDecl);
     return;
+  }
 
-  // Visit the definition of every function referenced by the handler function.
-  // Check for allowed function calls.
-  while (!CalledFunctions.empty()) {
-    const CallExpr *FunctionCall = CalledFunctions.front();
-    CalledFunctions.pop_front();
-    // At insertion we have already ensured that only function calls are there.
-    const auto *F = cast<FunctionDecl>(FunctionCall->getCalleeDecl());
-
-    if (!ProcessFunction(F, FunctionCall))
-      break;
+  CallGraphNode *HandlerNode = CG.getNode(HandlerDecl);
+  // Signal handler can be external but not unsafe, no call graph in this case.
+  if (!HandlerNode)
+    return;
+  // Start from signal handler and visit every function call.
+  for (auto Itr = llvm::df_begin(HandlerNode), ItrE = llvm::df_end(HandlerNode);
+       Itr != ItrE; ++Itr) {
+    const auto *CallF = dyn_cast<FunctionDecl>((*Itr)->getDecl());
+    if (CallF && !isFunctionAsyncSafe(CallF)) {
+      assert(Itr.getPathLength() >= 2);
+      reportBug(CallF, findCallExpr(Itr.getPath(Itr.getPathLength() - 2), *Itr),
+                SignalCall, HandlerDecl);
+    }
   }
 }
 
-bool SignalHandlerCheck::isSystemCallAllowed(const FunctionDecl *FD) const {
+bool SignalHandlerCheck::isFunctionAsyncSafe(const FunctionDecl *FD) const {
+  if (isSystemCall(FD))
+    return isSystemCallAsyncSafe(FD);
+  // For external (not checkable) functions assume that these are unsafe.
+  return FD->hasBody();
+}
+
+bool SignalHandlerCheck::isSystemCallAsyncSafe(const FunctionDecl *FD) const {
   const IdentifierInfo *II = FD->getIdentifier();
   // Unnamed functions are not explicitly allowed.
   if (!II)
diff --git a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h
index 51f273ffa51ec..9ac77a5cb323f 100644
--- a/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_BUGPRONE_SIGNALHANDLERCHECK_H
 
 #include "../ClangTidyCheck.h"
+#include "clang/Analysis/CallGraph.h"
 #include "llvm/ADT/StringSet.h"
 
 namespace clang {
@@ -31,9 +32,12 @@ class SignalHandlerCheck : public ClangTidyCheck {
   void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
 
 private:
+  bool isFunctionAsyncSafe(const FunctionDecl *FD) const;
+  bool isSystemCallAsyncSafe(const FunctionDecl *FD) const;
   void reportBug(const FunctionDecl *CalledFunction, const Expr *CallOrRef,
                  const CallExpr *SignalCall, const FunctionDecl *HandlerDecl);
-  bool isSystemCallAllowed(const FunctionDecl *FD) const;
+
+  CallGraph CG;
 
   AsyncSafeFunctionSetType AsyncSafeFunctionSet;
   llvm::StringSet<> &ConformingFunctions;
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-signal-handler.c b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signal-handler.c
index e0b2c24f44ed3..ac9b731cdc7a4 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-signal-handler.c
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-signal-handler.c
@@ -51,6 +51,37 @@ void handler_extern(int) {
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'f_extern' may not be asynchronous-safe; calling it from a signal handler may be dangerous [bugprone-signal-handler]
 }
 
+void test_false_condition(int) {
+  if (0)
+    printf("1234");
+  // CHECK-MESSAGES: :[[@LINE-1]]:5: warning: 'printf' may not be asynchronous-safe; calling it from a signal handler may be dangerous [bugprone-signal-handler]
+}
+
+void test_multiple_calls(int) {
+  f_extern();
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'f_extern' may not be asynchronous-safe; calling it from a signal handler may be dangerous [bugprone-signal-handler]
+  printf("1234");
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'printf' may not be asynchronous-safe; calling it from a signal handler may be dangerous [bugprone-signal-handler]
+  f_extern();
+  // first 'f_extern' call found only
+}
+
+void f_recursive();
+
+void test_recursive(int) {
+  f_recursive();
+  printf("");
+  // first 'printf' call (in other function) found only
+}
+
+void f_recursive() {
+  f_extern();
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'f_extern' may not be asynchronous-safe; calling it from a signal handler may be dangerous [bugprone-signal-handler]
+  printf("");
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: 'printf' may not be asynchronous-safe; calling it from a signal handler may be dangerous [bugprone-signal-handler]
+  f_recursive(2);
+}
+
 void test() {
   signal(SIGINT, handler_abort);
   signal(SIGINT, handler_signal);
@@ -66,4 +97,8 @@ void test() {
 
   signal(SIGINT, SIG_IGN);
   signal(SIGINT, SIG_DFL);
+
+  signal(SIGINT, test_false_condition);
+  signal(SIGINT, test_multiple_calls);
+  signal(SIGINT, test_recursive);
 }

From ea4b0489f5caf136aefe04869d650aa8d966041c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 25 Jan 2022 14:58:20 +0000
Subject: [PATCH 564/946] [X86][AVX] Add PR47194 shuffle test case

---
 .../CodeGen/X86/vector-shuffle-256-v32.ll     | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
index d280580f55f1b..d6731f851d0bf 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -4785,6 +4785,60 @@ define <32 x i8> @shuffle_v32i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09_
   ret <32 x i8> %shuffle
 }
 
+; PR47194
+define <32 x i8> @shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31(<32 x i8> %a) {
+; AVX1-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
+; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
+; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX1-NEXT:    retq
+;
+; AVX2-SLOW-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; AVX2-SLOW:       # %bb.0:
+; AVX2-SLOW-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
+; AVX2-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
+; AVX2-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX2-SLOW-NEXT:    retq
+;
+; AVX2-FAST-ALL-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; AVX2-FAST-ALL:       # %bb.0:
+; AVX2-FAST-ALL-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
+; AVX2-FAST-ALL-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [6,6,6,6,6,6,6,6]
+; AVX2-FAST-ALL-NEXT:    vpermd %ymm0, %ymm1, %ymm0
+; AVX2-FAST-ALL-NEXT:    retq
+;
+; AVX2-FAST-PERLANE-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; AVX2-FAST-PERLANE:       # %bb.0:
+; AVX2-FAST-PERLANE-NEXT:    vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,30,31,30,31,30,31,30,31,u,u,u,u,u,u,u,u]
+; AVX2-FAST-PERLANE-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; AVX2-FAST-PERLANE-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VL-NEXT:    vpermw %ymm0, %ymm1, %ymm0
+; AVX512VL-NEXT:    retq
+;
+; XOPAVX1-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; XOPAVX1:       # %bb.0:
+; XOPAVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; XOPAVX1-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
+; XOPAVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
+; XOPAVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; XOPAVX1-NEXT:    retq
+;
+; XOPAVX2-LABEL: shuffle_v32i8_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31_30_31:
+; XOPAVX2:       # %bb.0:
+; XOPAVX2-NEXT:    vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
+; XOPAVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,2,2,6,6,6,6]
+; XOPAVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,2]
+; XOPAVX2-NEXT:    retq
+  %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31, i32 30, i32 31>
+  ret <32 x i8> %shuffle
+}
+
 define <32 x i8> @shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62(<32 x i8> %a0, <32 x i8> %a1) {
 ; AVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
 ; AVX1:       # %bb.0:

From a2505bd063e7949e2f0d892d85dec360fb28c702 Mon Sep 17 00:00:00 2001
From: Sean Fertile <sd.fertile@gmail.com>
Date: Thu, 20 Jan 2022 13:30:42 -0500
Subject: [PATCH 565/946] [PowerPC][AIX] Override markFunctionEnd()

During fast-isel calling 'markFunctionEnd' in the base class will call
tidyLandingPads. This can cause an issue where we have determined that
we need ehinfo and emitted a traceback table with the bits set to
indicate that we will be emitting the ehinfo, but the tidying deletes
all landing pads. In this case we end up emitting a reference to
__ehinfo.N symbol, but not emitting a definition to said symbol and the
resulting file fails to assemble.

Differential Revision: https://reviews.llvm.org/D117040
---
 llvm/lib/CodeGen/AsmPrinter/AIXException.cpp |  2 +
 llvm/lib/CodeGen/AsmPrinter/DwarfException.h |  2 +
 llvm/test/CodeGen/PowerPC/aix-ehinfo-sym.ll  | 50 ++++++++++++++++++++
 3 files changed, 54 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-ehinfo-sym.ll

diff --git a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
index 964cef75d164f..03e63321e3c4d 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AIXException.cpp
@@ -23,6 +23,8 @@ namespace llvm {
 
 AIXException::AIXException(AsmPrinter *A) : DwarfCFIExceptionBase(A) {}
 
+void AIXException::markFunctionEnd() { endFragment(); }
+
 void AIXException::emitExceptionInfoTable(const MCSymbol *LSDA,
                                           const MCSymbol *PerSym) {
   // Generate EH Info Table.
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 40898c9fc8551..4defa8a30855e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -98,6 +98,8 @@ class LLVM_LIBRARY_VISIBILITY AIXException : public DwarfCFIExceptionBase {
 public:
   AIXException(AsmPrinter *A);
 
+  void markFunctionEnd() override;
+
   void endModule() override {}
   void beginFunction(const MachineFunction *MF) override {}
 
diff --git a/llvm/test/CodeGen/PowerPC/aix-ehinfo-sym.ll b/llvm/test/CodeGen/PowerPC/aix-ehinfo-sym.ll
new file mode 100644
index 0000000000000..24ead04072f14
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-ehinfo-sym.ll
@@ -0,0 +1,50 @@
+; RUN: llc -mtriple powerpc64-ibm-aix -verify-machineinstrs < %s | \
+; RUN:  FileCheck %s
+
+; RUN: llc -mtriple powerpc64-ibm-aix -fast-isel -verify-machineinstrs < %s | \
+; RUN:  FileCheck %s
+
+; Function Attrs: nounwind
+declare i32 @func1() #0
+
+declare i32 @__xlcxx_personality_v1(...)
+
+; Function Attrs: mustprogress noinline optnone
+define linkonce_odr void @func2() #1 align 2 personality i8* bitcast (i32 (...)* @__xlcxx_personality_v1 to i8*) {
+entry:
+  %0 = alloca i8*, align 8
+  %1 = alloca i32, align 4
+  br label %2
+
+2:                                                ; preds = %3, %entry
+  br i1 false, label %3, label %8
+
+3:                                                ; preds = %2
+  %4 = invoke i32 @func1()
+          to label %2 unwind label %lpad
+
+lpad:                                                ; preds = %3
+  %5 = landingpad { i8*, i32 }
+          cleanup
+  %6 = extractvalue { i8*, i32 } %5, 0
+  store i8* %6, i8** %0, align 8
+  %7 = extractvalue { i8*, i32 } %5, 1
+  store i32 %7, i32* %1, align 4
+  br label %eh.resume
+
+8:                                               ; preds = 2%
+  ret void
+
+eh.resume:                                               ; preds = %lpad
+  %9 = load i8*, i8** %0, align 8
+  %10 = load i32, i32* %1, align 4
+  %11 = insertvalue { i8*, i32 } undef, i8* %9, 0
+  %12 = insertvalue { i8*, i32 } %11, i32 %10, 1
+  resume { i8*, i32 } %12
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { mustprogress noinline optnone }
+
+; CHECK: __ehinfo.0:
+; CHECK: .tc __ehinfo.0[TC],__ehinfo.0

From e581841e8cf46109acea92e1acb661c404fa62b9 Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue@google.com>
Date: Mon, 24 Jan 2022 21:35:25 -0500
Subject: [PATCH 566/946] [libc] Implement log10f correctly rounded for all
 rounding modes.

Based on RLIBM implementation similar to logf and log2f.  Most of the exceptional inputs are the exact powers of 10.

Reviewed By: sivachandra, zimmermann6, santoshn, jpl169

Differential Revision: https://reviews.llvm.org/D118093
---
 libc/config/linux/aarch64/entrypoints.txt     |   1 +
 libc/config/linux/x86_64/entrypoints.txt      |   1 +
 libc/spec/stdc.td                             |   2 +
 libc/src/math/CMakeLists.txt                  |   2 +
 libc/src/math/generic/CMakeLists.txt          |  14 ++
 libc/src/math/generic/log10f.cpp              | 182 ++++++++++++++++++
 libc/src/math/log10f.h                        |  18 ++
 libc/test/src/math/CMakeLists.txt             |  13 ++
 .../math/differential_testing/CMakeLists.txt  |  11 ++
 .../math/differential_testing/log10f_perf.cpp |  16 ++
 libc/test/src/math/exhaustive/CMakeLists.txt  |  17 ++
 libc/test/src/math/exhaustive/log10f_test.cpp |  55 ++++++
 libc/test/src/math/log10f_test.cpp            |  74 +++++++
 libc/utils/MPFRWrapper/MPFRUtils.cpp          |   8 +
 libc/utils/MPFRWrapper/MPFRUtils.h            |   1 +
 15 files changed, 415 insertions(+)
 create mode 100644 libc/src/math/generic/log10f.cpp
 create mode 100644 libc/src/math/log10f.h
 create mode 100644 libc/test/src/math/differential_testing/log10f_perf.cpp
 create mode 100644 libc/test/src/math/exhaustive/log10f_test.cpp
 create mode 100644 libc/test/src/math/log10f_test.cpp

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
index 89d3fabc9f815..65dacdef31222 100644
--- a/libc/config/linux/aarch64/entrypoints.txt
+++ b/libc/config/linux/aarch64/entrypoints.txt
@@ -145,6 +145,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.ldexp
     libc.src.math.ldexpf
     libc.src.math.ldexpl
+    libc.src.math.log10f
     libc.src.math.log2f
     libc.src.math.logf
     libc.src.math.logb
diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
index 578fa2dd30d36..1e63ed0390092 100644
--- a/libc/config/linux/x86_64/entrypoints.txt
+++ b/libc/config/linux/x86_64/entrypoints.txt
@@ -144,6 +144,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.llround
     libc.src.math.llroundf
     libc.src.math.llroundl
+    libc.src.math.log10f
     libc.src.math.log2f
     libc.src.math.logf
     libc.src.math.logb
diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
index ac8dfe425c771..5cae1ddfe0ad0 100644
--- a/libc/spec/stdc.td
+++ b/libc/spec/stdc.td
@@ -396,6 +396,8 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"ldexpf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<IntType>]>,
           FunctionSpec<"ldexpl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<IntType>]>,
 
+          FunctionSpec<"log10f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
+
           FunctionSpec<"log2f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
 
           FunctionSpec<"logf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index aa4019d63b813..0ff088e6061ea 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -116,6 +116,8 @@ add_math_entrypoint_object(ldexp)
 add_math_entrypoint_object(ldexpf)
 add_math_entrypoint_object(ldexpl)
 
+add_math_entrypoint_object(log10f)
+
 add_math_entrypoint_object(log2f)
 
 add_math_entrypoint_object(logf)
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index c3914b8c45af3..88c29eb4ba0bb 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -666,6 +666,20 @@ add_object_library(
     -Wno-c++17-extensions
 )
 
+add_entrypoint_object(
+  log10f
+  SRCS
+    log10f.cpp
+  HDRS
+    ../log10f.h
+  DEPENDS
+    .common_constants
+    libc.src.__support.FPUtil.fputil
+  COMPILE_OPTIONS
+    -O3
+    -Wno-c++17-extensions
+)
+
 add_entrypoint_object(
   log2f
   SRCS
diff --git a/libc/src/math/generic/log10f.cpp b/libc/src/math/generic/log10f.cpp
new file mode 100644
index 0000000000000..c7dbbfd494c7d
--- /dev/null
+++ b/libc/src/math/generic/log10f.cpp
@@ -0,0 +1,182 @@
+//===-- Single-precision log10(x) function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/log10f.h"
+#include "common_constants.h" // Lookup table for (1/f)
+#include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvUtils.h"
+#include "src/__support/FPUtil/FMA.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/common.h"
+
+// This is an algorithm for log10(x) in single precision which is
+// correctly rounded for all rounding modes, based on the implementation of
+// log10(x) from the RLIBM project at:
+// https://people.cs.rutgers.edu/~sn349/rlibm
+
+// Step 1 - Range reduction:
+//   For x = 2^m * 1.mant, log(x) = m * log10(2) + log10(1.m)
+//   If x is denormal, we normalize it by multiplying x by 2^23 and subtracting
+//   m by 23.
+
+// Step 2 - Another range reduction:
+//   To compute log(1.mant), let f be the highest 8 bits including the hidden
+// bit, and d be the difference (1.mant - f), i.e. the remaining 16 bits of the
+// mantissa. Then we have the following approximation formula:
+//   log10(1.mant) = log10(f) + log10(1.mant / f)
+//                 = log10(f) + log10(1 + d/f)
+//                 ~ log10(f) + P(d/f)
+// since d/f is sufficiently small.
+//   log10(f) and 1/f are then stored in two 2^7 = 128 entries look-up tables.
+
+// Step 3 - Polynomial approximation:
+//   To compute P(d/f), we use a single degree-5 polynomial in double precision
+// which provides correct rounding for all but few exception values.
+//   For more detail about how this polynomial is obtained, please refer to the
+// papers:
+//   Lim, J. and Nagarakatte, S., "One Polynomial Approximation to Produce
+// Correctly Rounded Results of an Elementary Function for Multiple
+// Representations and Rounding Modes", Proceedings of the 49th ACM SIGPLAN
+// Symposium on Principles of Programming Languages (POPL-2022), Philadelphia,
+// USA, Jan. 16-22, 2022.
+// https://people.cs.rutgers.edu/~sn349/papers/rlibmall-popl-2022.pdf
+//   Aanjaneya, M., Lim, J., and Nagarakatte, S., "RLibm-Prog: Progressive
+// Polynomial Approximations for Fast Correctly Rounded Math Libraries",
+// Dept. of Comp. Sci., Rutgets U., Technical Report DCS-TR-758, Nov. 2021.
+// https://arxiv.org/pdf/2111.12852.pdf.
+
+namespace __llvm_libc {
+
+// Exact power of 10 in float:
+
+// Lookup table for log10(f) = log10(1 + n*2^(-7)) where n = 0..127.
+static constexpr double LOG10_F[128] = {
+    0x0.0000000000000p+0, 0x1.bafd47221ed26p-9, 0x1.b9476a4fcd10fp-8,
+    0x1.49b0851443684p-7, 0x1.b5e908eb13790p-7, 0x1.10a83a8446c78p-6,
+    0x1.45f4f5acb8be0p-6, 0x1.7adc3df3b1ff8p-6, 0x1.af5f92b00e610p-6,
+    0x1.e3806acbd058fp-6, 0x1.0ba01a8170000p-5, 0x1.25502c0fc314cp-5,
+    0x1.3ed1199a5e425p-5, 0x1.58238eeb353dap-5, 0x1.71483427d2a99p-5,
+    0x1.8a3fadeb847f4p-5, 0x1.a30a9d609efeap-5, 0x1.bba9a058dfd84p-5,
+    0x1.d41d5164facb4p-5, 0x1.ec6647eb58808p-5, 0x1.02428c1f08016p-4,
+    0x1.0e3d29d81165ep-4, 0x1.1a23445501816p-4, 0x1.25f5215eb594ap-4,
+    0x1.31b3055c47118p-4, 0x1.3d5d335c53179p-4, 0x1.48f3ed1df48fbp-4,
+    0x1.5477731973e85p-4, 0x1.5fe80488af4fdp-4, 0x1.6b45df6f3e2c9p-4,
+    0x1.769140a2526fdp-4, 0x1.81ca63d05a44ap-4, 0x1.8cf183886480dp-4,
+    0x1.9806d9414a209p-4, 0x1.a30a9d609efeap-4, 0x1.adfd07416be07p-4,
+    0x1.b8de4d3ab3d98p-4, 0x1.c3aea4a5c6effp-4, 0x1.ce6e41e463da5p-4,
+    0x1.d91d5866aa99cp-4, 0x1.e3bc1ab0e19fep-4, 0x1.ee4aba610f204p-4,
+    0x1.f8c9683468191p-4, 0x1.019c2a064b486p-3, 0x1.06cbd67a6c3b6p-3,
+    0x1.0bf3d0937c41cp-3, 0x1.11142f0811357p-3, 0x1.162d082ac9d10p-3,
+    0x1.1b3e71ec94f7bp-3, 0x1.204881dee8777p-3, 0x1.254b4d35e7d3cp-3,
+    0x1.2a46e8ca7ba2ap-3, 0x1.2f3b691c5a001p-3, 0x1.3428e2540096dp-3,
+    0x1.390f6844a0b83p-3, 0x1.3def0e6dfdf85p-3, 0x1.42c7e7fe3fc02p-3,
+    0x1.479a07d3b6411p-3, 0x1.4c65807e93338p-3, 0x1.512a644296c3dp-3,
+    0x1.55e8c518b10f8p-3, 0x1.5aa0b4b0988fap-3, 0x1.5f52447255c92p-3,
+    0x1.63fd857fc49bbp-3, 0x1.68a288b60b7fcp-3, 0x1.6d415eaf0906bp-3,
+    0x1.71da17c2b7e80p-3, 0x1.766cc40889e85p-3, 0x1.7af97358b9e04p-3,
+    0x1.7f80354d952a0p-3, 0x1.84011944bcb75p-3, 0x1.887c2e605e119p-3,
+    0x1.8cf183886480dp-3, 0x1.9161276ba2978p-3, 0x1.95cb2880f45bap-3,
+    0x1.9a2f95085a45cp-3, 0x1.9e8e7b0c0d4bep-3, 0x1.a2e7e8618c2d2p-3,
+    0x1.a73beaaaa22f4p-3, 0x1.ab8a8f56677fcp-3, 0x1.afd3e3a23b680p-3,
+    0x1.b417f49ab8807p-3, 0x1.b856cf1ca3105p-3, 0x1.bc907fd5d1c40p-3,
+    0x1.c0c5134610e26p-3, 0x1.c4f495c0002a2p-3, 0x1.c91f1369eb7cap-3,
+    0x1.cd44983e9e7bdp-3, 0x1.d165300e333f7p-3, 0x1.d580e67edc43dp-3,
+    0x1.d997c70da9b47p-3, 0x1.dda9dd0f4a329p-3, 0x1.e1b733b0c7381p-3,
+    0x1.e5bfd5f83d342p-3, 0x1.e9c3cec58f807p-3, 0x1.edc328d3184afp-3,
+    0x1.f1bdeeb654901p-3, 0x1.f5b42ae08c407p-3, 0x1.f9a5e79f76ac5p-3,
+    0x1.fd932f1ddb4d6p-3, 0x1.00be05b217844p-2, 0x1.02b0432c96ff0p-2,
+    0x1.04a054e139004p-2, 0x1.068e3fa282e3dp-2, 0x1.087a0832fa7acp-2,
+    0x1.0a63b3456c819p-2, 0x1.0c4b457d3193dp-2, 0x1.0e30c36e71a7fp-2,
+    0x1.1014319e661bdp-2, 0x1.11f594839a5bdp-2, 0x1.13d4f0862b2e1p-2,
+    0x1.15b24a0004a92p-2, 0x1.178da53d1ee01p-2, 0x1.1967067bb94b8p-2,
+    0x1.1b3e71ec94f7bp-2, 0x1.1d13ebb32d7f9p-2, 0x1.1ee777e5f0dc3p-2,
+    0x1.20b91a8e76105p-2, 0x1.2288d7a9b2b64p-2, 0x1.2456b3282f786p-2,
+    0x1.2622b0ee3b79dp-2, 0x1.27ecd4d41eb67p-2, 0x1.29b522a64b609p-2,
+    0x1.2b7b9e258e422p-2, 0x1.2d404b073e27ep-2, 0x1.2f032cf56a5bep-2,
+    0x1.30c4478f0835fp-2, 0x1.32839e681fc62p-2};
+
+INLINE_FMA
+LLVM_LIBC_FUNCTION(float, log10f, (float x)) {
+  constexpr double LOG10_2 = 0x1.34413509f79ffp-2;
+
+  using FPBits = typename fputil::FPBits<float>;
+  FPBits xbits(x);
+  double m = 0.0;
+
+  // Exact powers of 10 and other hard-to-round cases.
+  switch (xbits.uintval()) {
+  case 0x4120'0000U: // x = 10
+    return 1.0f;
+  case 0x42c8'0000U: // x = 100
+    return 2.0f;
+  case 0x447a'0000U: // x = 1,000
+    return 3.0f;
+  case 0x461c'4000U: // x = 10,000
+    return 4.0f;
+  case 0x47c3'5000U: // x = 100,000
+    return 5.0f;
+  case 0x4974'2400U: // x = 1,000,000
+    return 6.0f;
+  case 0x4b18'9680U: // x = 10,000,000
+    return 7.0f;
+  case 0x4cbe'bc20U: // x = 100,000,000
+    return 8.0f;
+  case 0x4e6e'6b28U: // x = 1,000,000,000
+    return 9.0f;
+  case 0x5015'02f9U: // x = 10,000,000,000
+    return 10.0f;
+  case 0x4f13'4f83U: // x = 2471461632.0
+    if (fputil::get_round() == FE_UPWARD)
+      return 0x1.2c9314p+3f;
+    break;
+  case 0x7956'ba5eU: { // x = 69683218960000541503257137270226944.0
+    int round_mode = fputil::get_round();
+    if (round_mode == FE_DOWNWARD || round_mode == FE_TOWARDZERO)
+      return 0x1.16bebap+5f;
+    break;
+  }
+  }
+
+  if (xbits.uintval() < FPBits::MIN_NORMAL ||
+      xbits.uintval() > FPBits::MAX_NORMAL) {
+    if (xbits.is_zero()) {
+      return static_cast<float>(FPBits::neg_inf());
+    }
+    if (xbits.get_sign() && !xbits.is_nan()) {
+      return FPBits::build_nan(1 << (fputil::MantissaWidth<float>::VALUE - 1));
+    }
+    if (xbits.is_inf_or_nan()) {
+      return x;
+    }
+    // Normalize denormal inputs.
+    xbits.val *= 0x1.0p23f;
+    m -= 23.0;
+  }
+
+  m += static_cast<double>(xbits.get_exponent());
+  // Set bits to 1.m
+  xbits.set_unbiased_exponent(0x7F);
+  int f_index = xbits.get_mantissa() >> 16;
+
+  FPBits f(xbits.val);
+  f.bits &= ~0x0000'FFFF;
+
+  double d = static_cast<float>(xbits) - static_cast<float>(f);
+  d *= ONE_OVER_F[f_index];
+
+  double extra_factor = fputil::fma(m, LOG10_2, LOG10_F[f_index]);
+
+  double r = fputil::polyeval(d, extra_factor, 0x1.bcb7b1526e4c5p-2,
+                              -0x1.bcb7b1518a5e9p-3, 0x1.287a72a6f716p-3,
+                              -0x1.bcadb40b85565p-4, 0x1.5e0bc97f97e22p-4);
+
+  return static_cast<float>(r);
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/math/log10f.h b/libc/src/math/log10f.h
new file mode 100644
index 0000000000000..d544ab5a55daa
--- /dev/null
+++ b/libc/src/math/log10f.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for log10f ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_LOG10F_H
+#define LLVM_LIBC_SRC_MATH_LOG10F_H
+
+namespace __llvm_libc {
+
+float log10f(float x);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_LOG10F_H
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
index 7c51dc00d49db..73ecef959aba0 100644
--- a/libc/test/src/math/CMakeLists.txt
+++ b/libc/test/src/math/CMakeLists.txt
@@ -1211,6 +1211,19 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fputil
 )
 
+add_fp_unittest(
+  log10f_test
+  NEED_MPFR
+  SUITE
+    libc_math_unittests
+  SRCS
+    log10f_test.cpp
+  DEPENDS
+    libc.include.math
+    libc.src.math.log10f
+    libc.src.__support.FPUtil.fputil
+)
+
 add_subdirectory(generic)
 add_subdirectory(exhaustive)
 add_subdirectory(differential_testing)
diff --git a/libc/test/src/math/differential_testing/CMakeLists.txt b/libc/test/src/math/differential_testing/CMakeLists.txt
index f2ac818fbd3b3..56d4ff4edf749 100644
--- a/libc/test/src/math/differential_testing/CMakeLists.txt
+++ b/libc/test/src/math/differential_testing/CMakeLists.txt
@@ -233,6 +233,17 @@ add_diff_binary(
     -fno-builtin
 )
 
+add_diff_binary(
+  log10f_perf
+  SRCS
+    log10f_perf.cpp
+  DEPENDS
+    .single_input_single_output_diff
+    libc.src.math.log10f
+  COMPILE_OPTIONS
+    -fno-builtin
+)
+
 add_diff_binary(
   log2f_diff
   SRCS
diff --git a/libc/test/src/math/differential_testing/log10f_perf.cpp b/libc/test/src/math/differential_testing/log10f_perf.cpp
new file mode 100644
index 0000000000000..e890d0393e0af
--- /dev/null
+++ b/libc/test/src/math/differential_testing/log10f_perf.cpp
@@ -0,0 +1,16 @@
+//===-- Differential test for log10f --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "SingleInputSingleOutputDiff.h"
+
+#include "src/math/log10f.h"
+
+#include <math.h>
+
+SINGLE_INPUT_SINGLE_OUTPUT_PERF(float, __llvm_libc::log10f, ::log10f,
+                                "log10f_perf.log")
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
index 772276bb01e16..f089bfb112e1a 100644
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -79,6 +79,23 @@ add_fp_unittest(
     -lpthread
 )
 
+add_fp_unittest(
+  log10f_test
+  NO_RUN_POSTBUILD
+  NEED_MPFR
+  SUITE
+    libc_math_exhaustive_tests
+  SRCS
+    log10f_test.cpp
+  DEPENDS
+    .exhaustive_test
+    libc.include.math
+    libc.src.math.log10f
+    libc.src.__support.FPUtil.fputil
+  LINK_OPTIONS
+    -lpthread
+)
+
 add_fp_unittest(
   log2f_test
   NO_RUN_POSTBUILD
diff --git a/libc/test/src/math/exhaustive/log10f_test.cpp b/libc/test/src/math/exhaustive/log10f_test.cpp
new file mode 100644
index 0000000000000..f8739ef0de7ca
--- /dev/null
+++ b/libc/test/src/math/exhaustive/log10f_test.cpp
@@ -0,0 +1,55 @@
+//===-- Exhaustive test for log10f ----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/log10f.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/FPMatcher.h"
+
+using FPBits = __llvm_libc::fputil::FPBits<float>;
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+
+struct LlvmLibcLog10fExhaustiveTest : public LlvmLibcExhaustiveTest<uint32_t> {
+  void check(uint32_t start, uint32_t stop,
+             mpfr::RoundingMode rounding) override {
+    mpfr::ForceRoundingMode r(rounding);
+    uint32_t bits = start;
+    do {
+      FPBits xbits(bits);
+      float x = float(xbits);
+      EXPECT_MPFR_MATCH(mpfr::Operation::Log10, x, __llvm_libc::log10f(x), 0.5,
+                        rounding);
+    } while (bits++ < stop);
+  }
+};
+
+// Range: All non-negative;
+static constexpr uint32_t START = 0x0000'0000U;
+static constexpr uint32_t STOP = 0x7f80'0000U;
+// Range: [1, 10];
+// static constexpr uint32_t START = 0x3f80'0000U;
+// static constexpr uint32_t STOP  = 0x41c0'0000U;
+static constexpr int NUM_THREADS = 16;
+
+TEST_F(LlvmLibcLog10fExhaustiveTest, RoundNearestTieToEven) {
+  test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Nearest);
+}
+
+TEST_F(LlvmLibcLog10fExhaustiveTest, RoundUp) {
+  test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Upward);
+}
+
+TEST_F(LlvmLibcLog10fExhaustiveTest, RoundDown) {
+  test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::Downward);
+}
+
+TEST_F(LlvmLibcLog10fExhaustiveTest, RoundTowardZero) {
+  test_full_range(START, STOP, NUM_THREADS, mpfr::RoundingMode::TowardZero);
+}
diff --git a/libc/test/src/math/log10f_test.cpp b/libc/test/src/math/log10f_test.cpp
new file mode 100644
index 0000000000000..a43290b858894
--- /dev/null
+++ b/libc/test/src/math/log10f_test.cpp
@@ -0,0 +1,74 @@
+//===-- Unittests for log10f ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/math/log10f.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/FPMatcher.h"
+#include "utils/UnitTest/Test.h"
+#include <math.h>
+
+#include <errno.h>
+#include <stdint.h>
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+
+DECLARE_SPECIAL_CONSTANTS(float)
+
+TEST(LlvmLibcLog10fTest, SpecialNumbers) {
+  EXPECT_FP_EQ(aNaN, __llvm_libc::log10f(aNaN));
+  EXPECT_FP_EQ(inf, __llvm_libc::log10f(inf));
+  EXPECT_TRUE(FPBits(__llvm_libc::log10f(neg_inf)).is_nan());
+  EXPECT_FP_EQ(neg_inf, __llvm_libc::log10f(0.0f));
+  EXPECT_FP_EQ(neg_inf, __llvm_libc::log10f(-0.0f));
+  EXPECT_TRUE(FPBits(__llvm_libc::log10f(-1.0f)).is_nan());
+  EXPECT_FP_EQ(zero, __llvm_libc::log10f(1.0f));
+}
+
+TEST(LlvmLibcLog10fTest, TrickyInputs) {
+  constexpr int N = 12;
+  constexpr uint32_t INPUTS[N] = {
+      0x41200000U /*10.0f*/,
+      0x42c80000U /*100.0f*/,
+      0x447a0000U /*1,000.0f*/,
+      0x461c4000U /*10,000.0f*/,
+      0x47c35000U /*100,000.0f*/,
+      0x49742400U /*1,000,000.0f*/,
+      0x4b189680U /*10,000,000.0f*/,
+      0x4cbebc20U /*100,000,000.0f*/,
+      0x4e6e6b28U /*1,000,000,000.0f*/,
+      0x501502f9U /*10,000,000,000.0f*/,
+      0x4f134f83U /*2471461632.0f*/,
+      0x7956ba5eU /*69683218960000541503257137270226944.0f*/};
+
+  for (int i = 0; i < N; ++i) {
+    float x = float(FPBits(INPUTS[i]));
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log10, x,
+                                   __llvm_libc::log10f(x), 0.5);
+  }
+}
+
+TEST(LlvmLibcLog10fTest, InFloatRange) {
+  constexpr uint32_t COUNT = 1000000;
+  constexpr uint32_t STEP = UINT32_MAX / COUNT;
+  for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
+    float x = float(FPBits(v));
+    if (isnan(x) || isinf(x))
+      continue;
+    errno = 0;
+    float result = __llvm_libc::log10f(x);
+    // If the computation resulted in an error or did not produce valid result
+    // in the single-precision floating point range, then ignore comparing with
+    // MPFR result as MPFR can still produce valid results because of its
+    // wider precision.
+    if (isnan(result) || isinf(result) || errno != 0)
+      continue;
+    ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log10, x,
+                                   __llvm_libc::log10f(x), 0.5);
+  }
+}
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
index 4f293837bf4e8..c6470e23451e0 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.cpp
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -273,6 +273,12 @@ class MPFRNumber {
     return result;
   }
 
+  MPFRNumber log10() const {
+    MPFRNumber result(*this);
+    mpfr_log10(result.value, value, mpfr_rounding);
+    return result;
+  }
+
   MPFRNumber remquo(const MPFRNumber &divisor, int &quotient) {
     MPFRNumber remainder(*this);
     long q;
@@ -502,6 +508,8 @@ unary_operation(Operation op, InputType input, unsigned int precision,
     return mpfrInput.log();
   case Operation::Log2:
     return mpfrInput.log2();
+  case Operation::Log10:
+    return mpfrInput.log10();
   case Operation::Mod2PI:
     return mpfrInput.mod_2pi();
   case Operation::ModPIOver2:
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
index 5094f08b9003e..2a13ab1d11608 100644
--- a/libc/utils/MPFRWrapper/MPFRUtils.h
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -32,6 +32,7 @@ enum class Operation : int {
   Floor,
   Log,
   Log2,
+  Log10,
   Mod2PI,
   ModPIOver2,
   ModPIOver4,

From 519810d63eb16ff6f367d8e9784a804805c30c44 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Tue, 25 Jan 2022 09:52:15 -0500
Subject: [PATCH 567/946] [NFC] Refine header dependencies of llvm/ADT/Any.h

---
 llvm/include/llvm/ADT/Any.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/Any.h b/llvm/include/llvm/ADT/Any.h
index e513586845a1e..1b4f2c2fa985f 100644
--- a/llvm/include/llvm/ADT/Any.h
+++ b/llvm/include/llvm/ADT/Any.h
@@ -15,7 +15,8 @@
 #ifndef LLVM_ADT_ANY_H
 #define LLVM_ADT_ANY_H
 
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLForwardCompat.h"
+#include "llvm/Support/Compiler.h"
 
 #include <cassert>
 #include <memory>

From adc9a346d842a61c8ce97627d2928dd2384bee5c Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Tue, 25 Jan 2022 09:53:09 -0500
Subject: [PATCH 568/946] Always use df_iterator_default_set as default set
 type for [i]df_ext_iterator

This is consistent with other default values in this file.
---
 llvm/include/llvm/ADT/DepthFirstIterator.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/ADT/DepthFirstIterator.h b/llvm/include/llvm/ADT/DepthFirstIterator.h
index d4f173ca7caa8..42ac61d7cf52a 100644
--- a/llvm/include/llvm/ADT/DepthFirstIterator.h
+++ b/llvm/include/llvm/ADT/DepthFirstIterator.h
@@ -38,7 +38,6 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/iterator_range.h"
 #include <iterator>
-#include <set>
 #include <utility>
 #include <vector>
 
@@ -231,7 +230,7 @@ iterator_range<df_iterator<T>> depth_first(const T& G) {
 }
 
 // Provide global definitions of external depth first iterators...
-template <class T, class SetTy = std::set<typename GraphTraits<T>::NodeRef>>
+template <class T, class SetTy = df_iterator_default_set<typename GraphTraits<T>::NodeRef>>
 struct df_ext_iterator : public df_iterator<T, SetTy, true> {
   df_ext_iterator(const df_iterator<T, SetTy, true> &V)
     : df_iterator<T, SetTy, true>(V) {}
@@ -280,7 +279,7 @@ iterator_range<idf_iterator<T>> inverse_depth_first(const T& G) {
 }
 
 // Provide global definitions of external inverse depth first iterators...
-template <class T, class SetTy = std::set<typename GraphTraits<T>::NodeRef>>
+template <class T, class SetTy = df_iterator_default_set<typename GraphTraits<T>::NodeRef>>
 struct idf_ext_iterator : public idf_iterator<T, SetTy, true> {
   idf_ext_iterator(const idf_iterator<T, SetTy, true> &V)
     : idf_iterator<T, SetTy, true>(V) {}

From 2f02c7e1f2580c7faf9032922c5a8d3e957d0bd5 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jan 2022 17:20:26 +0100
Subject: [PATCH 569/946] [SanitizerCoverage] Avoid pointer element type access

Use the load/store type instead.
---
 llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 387ea5243265d..d3b60c7add34c 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -917,8 +917,7 @@ void ModuleSanitizerCoverage::InjectTraceForGep(
 
 void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
     Function &, ArrayRef<LoadInst *> Loads, ArrayRef<StoreInst *> Stores) {
-  auto CallbackIdx = [&](const Value *Ptr) -> int {
-    auto *ElementTy = Ptr->getType()->getPointerElementType();
+  auto CallbackIdx = [&](Type *ElementTy) -> int {
     uint64_t TypeSize = DL->getTypeStoreSizeInBits(ElementTy);
     return TypeSize == 8     ? 0
            : TypeSize == 16  ? 1
@@ -932,7 +931,7 @@ void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
   for (auto LI : Loads) {
     IRBuilder<> IRB(LI);
     auto Ptr = LI->getPointerOperand();
-    int Idx = CallbackIdx(Ptr);
+    int Idx = CallbackIdx(LI->getType());
     if (Idx < 0)
       continue;
     IRB.CreateCall(SanCovLoadFunction[Idx],
@@ -941,7 +940,7 @@ void ModuleSanitizerCoverage::InjectTraceForLoadsAndStores(
   for (auto SI : Stores) {
     IRBuilder<> IRB(SI);
     auto Ptr = SI->getPointerOperand();
-    int Idx = CallbackIdx(Ptr);
+    int Idx = CallbackIdx(SI->getValueOperand()->getType());
     if (Idx < 0)
       continue;
     IRB.CreateCall(SanCovStoreFunction[Idx],

From ef0d90f682b169b3148ff66cb1f592d78adec2f3 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Tue, 25 Jan 2022 16:23:10 +0000
Subject: [PATCH 570/946] [X86] Regenerate avx-vbroadcast.ll

Remove '' around mattr to stop update script crash and use X86 prefixes instead of X32
---
 llvm/test/CodeGen/X86/avx-vbroadcast.ll | 576 ++++++++++++------------
 1 file changed, 288 insertions(+), 288 deletions(-)

diff --git a/llvm/test/CodeGen/X86/avx-vbroadcast.ll b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
index 5ebe6709a2fe4..df131f030baa3 100644
--- a/llvm/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx-vbroadcast.ll
@@ -1,13 +1,13 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr='+avx,+mmx' | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr='+avx,+mmx' | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx,+mmx | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx,+mmx | FileCheck %s --check-prefix=X64
 
 define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: A:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: A:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: A:
 ; X64:       ## %bb.0: ## %entry
@@ -23,20 +23,20 @@ entry:
 }
 
 define <4 x i64> @A2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: A2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    .cfi_offset %esi, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl (%ecx), %edx
-; X32-NEXT:    movl 4(%ecx), %esi
-; X32-NEXT:    vbroadcastsd (%ecx), %ymm0
-; X32-NEXT:    movl %edx, (%eax)
-; X32-NEXT:    movl %esi, 4(%eax)
-; X32-NEXT:    popl %esi
-; X32-NEXT:    retl
+; X86-LABEL: A2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %edx
+; X86-NEXT:    movl 4(%ecx), %esi
+; X86-NEXT:    vbroadcastsd (%ecx), %ymm0
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: A2:
 ; X64:       ## %bb.0: ## %entry
@@ -57,11 +57,11 @@ entry:
 }
 
 define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: B:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: B:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: B:
 ; X64:       ## %bb.0: ## %entry
@@ -77,11 +77,11 @@ entry:
 }
 
 define <8 x i32> @B2(i32* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: B2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: B2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: B2:
 ; X64:       ## %bb.0: ## %entry
@@ -101,16 +101,16 @@ entry:
 }
 
 define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: B3:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl (%ecx), %ecx
-; X32-NEXT:    movl %ecx, (%eax)
-; X32-NEXT:    vmovd %ecx, %xmm0
-; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; X32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: B3:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %ecx
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    vmovd %ecx, %xmm0
+; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: B3:
 ; X64:       ## %bb.0: ## %entry
@@ -135,11 +135,11 @@ entry:
 }
 
 define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: C:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: C:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: C:
 ; X64:       ## %bb.0: ## %entry
@@ -155,13 +155,13 @@ entry:
 }
 
 define <4 x double> @C2(double* %ptr, double* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: C2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vbroadcastsd (%ecx), %ymm0
-; X32-NEXT:    vmovlps %xmm0, (%eax)
-; X32-NEXT:    retl
+; X86-LABEL: C2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vbroadcastsd (%ecx), %ymm0
+; X86-NEXT:    vmovlps %xmm0, (%eax)
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: C2:
 ; X64:       ## %bb.0: ## %entry
@@ -179,11 +179,11 @@ entry:
 }
 
 define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: D:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: D:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: D:
 ; X64:       ## %bb.0: ## %entry
@@ -199,11 +199,11 @@ entry:
 }
 
 define <8 x float> @D2(float* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: D2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: D2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: D2:
 ; X64:       ## %bb.0: ## %entry
@@ -223,13 +223,13 @@ entry:
 }
 
 define <8 x float> @D3(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: D3:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vbroadcastss (%ecx), %ymm0
-; X32-NEXT:    vmovss %xmm0, (%eax)
-; X32-NEXT:    retl
+; X86-LABEL: D3:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vbroadcastss (%ecx), %ymm0
+; X86-NEXT:    vmovss %xmm0, (%eax)
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: D3:
 ; X64:       ## %bb.0: ## %entry
@@ -253,11 +253,11 @@ entry:
 ;;;; 128-bit versions
 
 define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: e:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: e:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: e:
 ; X64:       ## %bb.0: ## %entry
@@ -273,13 +273,13 @@ entry:
 }
 
 define <4 x float> @e2(float* %ptr, float* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: e2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vbroadcastss (%ecx), %xmm0
-; X32-NEXT:    vmovss %xmm0, (%eax)
-; X32-NEXT:    retl
+; X86-LABEL: e2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vbroadcastss (%ecx), %xmm0
+; X86-NEXT:    vmovss %xmm0, (%eax)
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: e2:
 ; X64:       ## %bb.0: ## %entry
@@ -298,10 +298,10 @@ entry:
 
 ; Don't broadcast constants on pre-AVX2 hardware.
 define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: _e2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3]
-; X32-NEXT:    retl
+; X86-LABEL: _e2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vmovaps {{.*#+}} xmm0 = [-7.8125E-3,-7.8125E-3,-7.8125E-3,-7.8125E-3]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: _e2:
 ; X64:       ## %bb.0: ## %entry
@@ -317,11 +317,11 @@ entry:
 
 
 define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: F:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: F:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: F:
 ; X64:       ## %bb.0: ## %entry
@@ -337,15 +337,15 @@ entry:
 }
 
 define <4 x i32> @F2(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: F2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl (%ecx), %ecx
-; X32-NEXT:    movl %ecx, (%eax)
-; X32-NEXT:    vmovd %ecx, %xmm0
-; X32-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
-; X32-NEXT:    retl
+; X86-LABEL: F2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %ecx
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    vmovd %ecx, %xmm0
+; X86-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: F2:
 ; X64:       ## %bb.0: ## %entry
@@ -367,11 +367,11 @@ entry:
 ; FIXME: Pointer adjusted broadcasts
 
 define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_4i32_4i32_1111:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpermilps {{.*#+}} xmm0 = mem[1,1,1,1]
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_4i32_4i32_1111:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpermilps {{.*#+}} xmm0 = mem[1,1,1,1]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_4i32_4i32_1111:
 ; X64:       ## %bb.0: ## %entry
@@ -384,11 +384,11 @@ entry:
 }
 
 define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_8i32_4i32_33333333:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_8i32_4i32_33333333:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss 12(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_8i32_4i32_33333333:
 ; X64:       ## %bb.0: ## %entry
@@ -401,11 +401,11 @@ entry:
 }
 
 define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_8i32_8i32_55555555:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_8i32_8i32_55555555:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss 20(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_8i32_8i32_55555555:
 ; X64:       ## %bb.0: ## %entry
@@ -418,11 +418,11 @@ entry:
 }
 
 define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_4f32_4f32_1111:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss 4(%eax), %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_4f32_4f32_1111:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss 4(%eax), %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_4f32_4f32_1111:
 ; X64:       ## %bb.0: ## %entry
@@ -435,11 +435,11 @@ entry:
 }
 
 define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_8f32_4f32_33333333:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss 12(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_8f32_4f32_33333333:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss 12(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_8f32_4f32_33333333:
 ; X64:       ## %bb.0: ## %entry
@@ -452,11 +452,11 @@ entry:
 }
 
 define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_8f32_8f32_55555555:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss 20(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_8f32_8f32_55555555:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss 20(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_8f32_8f32_55555555:
 ; X64:       ## %bb.0: ## %entry
@@ -469,11 +469,11 @@ entry:
 }
 
 define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_2i64_2i64_1111:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,2,3]
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_2i64_2i64_1111:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vpermilps {{.*#+}} xmm0 = mem[2,3,2,3]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_2i64_2i64_1111:
 ; X64:       ## %bb.0: ## %entry
@@ -486,11 +486,11 @@ entry:
 }
 
 define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_4i64_2i64_1111:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_4i64_2i64_1111:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd 8(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_4i64_2i64_1111:
 ; X64:       ## %bb.0: ## %entry
@@ -503,11 +503,11 @@ entry:
 }
 
 define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_4i64_4i64_2222:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_4i64_4i64_2222:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd 16(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_4i64_4i64_2222:
 ; X64:       ## %bb.0: ## %entry
@@ -520,11 +520,11 @@ entry:
 }
 
 define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_2f64_2f64_1111:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_2f64_2f64_1111:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_2f64_2f64_1111:
 ; X64:       ## %bb.0: ## %entry
@@ -537,11 +537,11 @@ entry:
 }
 
 define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_4f64_2f64_1111:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd 8(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_4f64_2f64_1111:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd 8(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_4f64_2f64_1111:
 ; X64:       ## %bb.0: ## %entry
@@ -554,11 +554,11 @@ entry:
 }
 
 define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: load_splat_4f64_4f64_2222:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd 16(%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: load_splat_4f64_4f64_2222:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd 16(%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: load_splat_4f64_4f64_2222:
 ; X64:       ## %bb.0: ## %entry
@@ -573,11 +573,11 @@ entry:
 ; Unsupported vbroadcasts
 
 define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: G:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT:    retl
+; X86-LABEL: G:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: G:
 ; X64:       ## %bb.0: ## %entry
@@ -591,20 +591,20 @@ entry:
 }
 
 define <2 x i64> @G2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: G2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    pushl %esi
-; X32-NEXT:    .cfi_def_cfa_offset 8
-; X32-NEXT:    .cfi_offset %esi, -8
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movl (%ecx), %edx
-; X32-NEXT:    movl 4(%ecx), %esi
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT:    movl %edx, (%eax)
-; X32-NEXT:    movl %esi, 4(%eax)
-; X32-NEXT:    popl %esi
-; X32-NEXT:    retl
+; X86-LABEL: G2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    .cfi_def_cfa_offset 8
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %edx
+; X86-NEXT:    movl 4(%ecx), %esi
+; X86-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X86-NEXT:    movl %edx, (%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: G2:
 ; X64:       ## %bb.0: ## %entry
@@ -622,10 +622,10 @@ entry:
 }
 
 define <4 x i32> @H(<4 x i32> %a) {
-; X32-LABEL: H:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; X32-NEXT:    retl
+; X86-LABEL: H:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: H:
 ; X64:       ## %bb.0: ## %entry
@@ -637,11 +637,11 @@ entry:
 }
 
 define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: I:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT:    retl
+; X86-LABEL: I:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: I:
 ; X64:       ## %bb.0: ## %entry
@@ -655,13 +655,13 @@ entry:
 }
 
 define <2 x double> @I2(double* %ptr, double* %ptr2) nounwind uwtable readnone ssp {
-; X32-LABEL: I2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT:    vmovlps %xmm0, (%eax)
-; X32-NEXT:    retl
+; X86-LABEL: I2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X86-NEXT:    vmovlps %xmm0, (%eax)
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: I2:
 ; X64:       ## %bb.0: ## %entry
@@ -677,14 +677,14 @@ entry:
 }
 
 define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
-; X32-LABEL: _RR:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vbroadcastss (%ecx), %xmm0
-; X32-NEXT:    movl (%eax), %eax
-; X32-NEXT:    movl %eax, (%eax)
-; X32-NEXT:    retl
+; X86-LABEL: _RR:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vbroadcastss (%ecx), %xmm0
+; X86-NEXT:    movl (%eax), %eax
+; X86-NEXT:    movl %eax, (%eax)
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: _RR:
 ; X64:       ## %bb.0: ## %entry
@@ -705,11 +705,11 @@ entry:
 }
 
 define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
-; X32-LABEL: _RR2:
-; X32:       ## %bb.0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %xmm0
-; X32-NEXT:    retl
+; X86-LABEL: _RR2:
+; X86:       ## %bb.0: ## %entry
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %xmm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: _RR2:
 ; X64:       ## %bb.0: ## %entry
@@ -727,11 +727,11 @@ entry:
 ; (via the insertelements).
 
 define <8 x float> @splat_concat1(float* %p) {
-; X32-LABEL: splat_concat1:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: splat_concat1:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: splat_concat1:
 ; X64:       ## %bb.0:
@@ -747,11 +747,11 @@ define <8 x float> @splat_concat1(float* %p) {
 }
 
 define <8 x float> @splat_concat2(float* %p) {
-; X32-LABEL: splat_concat2:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastss (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: splat_concat2:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastss (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: splat_concat2:
 ; X64:       ## %bb.0:
@@ -771,11 +771,11 @@ define <8 x float> @splat_concat2(float* %p) {
 }
 
 define <4 x double> @splat_concat3(double* %p) {
-; X32-LABEL: splat_concat3:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: splat_concat3:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: splat_concat3:
 ; X64:       ## %bb.0:
@@ -789,11 +789,11 @@ define <4 x double> @splat_concat3(double* %p) {
 }
 
 define <4 x double> @splat_concat4(double* %p) {
-; X32-LABEL: splat_concat4:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: splat_concat4:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: splat_concat4:
 ; X64:       ## %bb.0:
@@ -810,11 +810,11 @@ define <4 x double> @splat_concat4(double* %p) {
 
 ; PR34041
 define <4 x double> @broadcast_shuffle_1000(double* %p) {
-; X32-LABEL: broadcast_shuffle_1000:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: broadcast_shuffle_1000:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_shuffle_1000:
 ; X64:       ## %bb.0:
@@ -827,11 +827,11 @@ define <4 x double> @broadcast_shuffle_1000(double* %p) {
 }
 
 define <4 x double> @broadcast_shuffle1032(double* %p) {
-; X32-LABEL: broadcast_shuffle1032:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vbroadcastsd (%eax), %ymm0
-; X32-NEXT:    retl
+; X86-LABEL: broadcast_shuffle1032:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    vbroadcastsd (%eax), %ymm0
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_shuffle1032:
 ; X64:       ## %bb.0:
@@ -845,15 +845,15 @@ define <4 x double> @broadcast_shuffle1032(double* %p) {
 }
 
 define void @broadcast_v16i32(i32* %a, <16 x i32>* %b) {
-; X32-LABEL: broadcast_v16i32:
-; X32:       ## %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vbroadcastss (%ecx), %ymm0
-; X32-NEXT:    vmovups %ymm0, 32(%eax)
-; X32-NEXT:    vmovups %ymm0, (%eax)
-; X32-NEXT:    vzeroupper
-; X32-NEXT:    retl
+; X86-LABEL: broadcast_v16i32:
+; X86:       ## %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vbroadcastss (%ecx), %ymm0
+; X86-NEXT:    vmovups %ymm0, 32(%eax)
+; X86-NEXT:    vmovups %ymm0, (%eax)
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_v16i32:
 ; X64:       ## %bb.0:
@@ -874,21 +874,21 @@ define void @broadcast_v16i32(i32* %a, <16 x i32>* %b) {
 ; Broadcast scale factor for xyz vector - slp will have vectorized xy.
 ;
 define double @broadcast_scale_xyz(double* nocapture readonly, double* nocapture readonly) nounwind {
-; X32-LABEL: broadcast_scale_xyz:
-; X32:       ## %bb.0:
-; X32-NEXT:    subl $12, %esp
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-NEXT:    vmulpd (%eax), %xmm0, %xmm1
-; X32-NEXT:    vmulsd 16(%eax), %xmm0, %xmm0
-; X32-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
-; X32-NEXT:    vaddsd %xmm2, %xmm1, %xmm1
-; X32-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
-; X32-NEXT:    vmovsd %xmm0, (%esp)
-; X32-NEXT:    fldl (%esp)
-; X32-NEXT:    addl $12, %esp
-; X32-NEXT:    retl
+; X86-LABEL: broadcast_scale_xyz:
+; X86:       ## %bb.0:
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X86-NEXT:    vmulpd (%eax), %xmm0, %xmm1
+; X86-NEXT:    vmulsd 16(%eax), %xmm0, %xmm0
+; X86-NEXT:    vpermilpd {{.*#+}} xmm2 = xmm1[1,0]
+; X86-NEXT:    vaddsd %xmm2, %xmm1, %xmm1
+; X86-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
+; X86-NEXT:    vmovsd %xmm0, (%esp)
+; X86-NEXT:    fldl (%esp)
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_scale_xyz:
 ; X64:       ## %bb.0:
@@ -919,24 +919,24 @@ define double @broadcast_scale_xyz(double* nocapture readonly, double* nocapture
 ; When VBROADCAST replaces an existing load, ensure it still respects lifetime dependencies.
 ;
 define float @broadcast_lifetime() nounwind {
-; X32-LABEL: broadcast_lifetime:
-; X32:       ## %bb.0:
-; X32-NEXT:    pushl %esi
-; X32-NEXT:    subl $40, %esp
-; X32-NEXT:    leal {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl %esi, (%esp)
-; X32-NEXT:    calll _gfunc
-; X32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
-; X32-NEXT:    movl %esi, (%esp)
-; X32-NEXT:    calll _gfunc
-; X32-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; X32-NEXT:    vsubss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 4-byte Folded Reload
-; X32-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
-; X32-NEXT:    flds {{[0-9]+}}(%esp)
-; X32-NEXT:    addl $40, %esp
-; X32-NEXT:    popl %esi
-; X32-NEXT:    retl
+; X86-LABEL: broadcast_lifetime:
+; X86:       ## %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $40, %esp
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, (%esp)
+; X86-NEXT:    calll _gfunc
+; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    vmovss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; X86-NEXT:    movl %esi, (%esp)
+; X86-NEXT:    calll _gfunc
+; X86-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-NEXT:    vsubss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 4-byte Folded Reload
+; X86-NEXT:    vmovss %xmm0, {{[0-9]+}}(%esp)
+; X86-NEXT:    flds {{[0-9]+}}(%esp)
+; X86-NEXT:    addl $40, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_lifetime:
 ; X64:       ## %bb.0:
@@ -973,14 +973,14 @@ define float @broadcast_lifetime() nounwind {
 }
 
 define <8 x i16> @broadcast_x86_mmx(x86_mmx %tmp) nounwind {
-; X32-LABEL: broadcast_x86_mmx:
-; X32:       ## %bb.0: ## %bb
-; X32-NEXT:    subl $12, %esp
-; X32-NEXT:    movq %mm0, (%esp)
-; X32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; X32-NEXT:    addl $12, %esp
-; X32-NEXT:    retl
+; X86-LABEL: broadcast_x86_mmx:
+; X86:       ## %bb.0: ## %bb
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    movq %mm0, (%esp)
+; X86-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    retl
 ;
 ; X64-LABEL: broadcast_x86_mmx:
 ; X64:       ## %bb.0: ## %bb

From 0e5ea403e8deeb5374a9072aaa12292b9c0bed30 Mon Sep 17 00:00:00 2001
From: Ben Langmuir <blangmuir@apple.com>
Date: Mon, 24 Jan 2022 16:05:49 -0800
Subject: [PATCH 571/946] Fix running orc-rt tests with
 LLVM_BUILD_EXTERNAL_COMPILER_RT

Add missing dependency on llvm-jitlink when building compiler-rt with
LLVM_BUILD_EXTERNAL_COMPILER_RT. Previously we would
non-deterministically fail the tests due to the missing binary.

rdar://87247681

Differential Revision: https://reviews.llvm.org/D118087
---
 clang/runtime/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/runtime/CMakeLists.txt b/clang/runtime/CMakeLists.txt
index 0388008792511..ca7e17927ee1f 100644
--- a/clang/runtime/CMakeLists.txt
+++ b/clang/runtime/CMakeLists.txt
@@ -132,7 +132,7 @@ if(LLVM_BUILD_EXTERNAL_COMPILER_RT AND EXISTS ${COMPILER_RT_SRC_ROOT}/)
   if(LLVM_INCLUDE_TESTS)
     # Add binaries that compiler-rt tests depend on.
     set(COMPILER_RT_TEST_DEPENDENCIES
-      FileCheck count not llvm-nm llvm-objdump llvm-symbolizer)
+      FileCheck count not llvm-nm llvm-objdump llvm-symbolizer llvm-jitlink)
 
     # Add top-level targets for various compiler-rt test suites.
     set(COMPILER_RT_TEST_SUITES check-fuzzer check-asan check-hwasan check-asan-dynamic check-dfsan

From 64ba462b6e398bdb33464963f7d6274320f84370 Mon Sep 17 00:00:00 2001
From: Stanislav Gatev <sgatev@google.com>
Date: Mon, 24 Jan 2022 16:17:22 +0000
Subject: [PATCH 572/946] [clang][dataflow] Add a transfer function for
 InitListExpr

This is part of the implementation of the dataflow analysis framework.
See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev.

Reviewed-by: xazax.hun

Differential Revision: https://reviews.llvm.org/D118119
---
 .../clang/Analysis/FlowSensitive/Value.h      |  7 +-
 clang/lib/Analysis/FlowSensitive/Transfer.cpp | 29 +++++++
 .../Analysis/FlowSensitive/TransferTest.cpp   | 87 +++++++++++++++++++
 3 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/Value.h b/clang/include/clang/Analysis/FlowSensitive/Value.h
index d1de2b64fd95a..47e4d31c832bb 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Value.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Value.h
@@ -100,15 +100,18 @@ class StructValue final : public Value {
     return Val->getKind() == Kind::Struct;
   }
 
-  /// Returns the child value for `D`.
+  /// Returns the child value that is assigned for `D`.
   Value &getChild(const ValueDecl &D) const {
     auto It = Children.find(&D);
     assert(It != Children.end());
     return *It->second;
   }
 
+  /// Assigns `Val` as the child value for `D`.
+  void setChild(const ValueDecl &D, Value &Val) { Children[&D] = &Val; }
+
 private:
-  const llvm::DenseMap<const ValueDecl *, Value *> Children;
+  llvm::DenseMap<const ValueDecl *, Value *> Children;
 };
 
 } // namespace dataflow
diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 32a05333923f5..7c5e063278d78 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -22,9 +22,11 @@
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
 #include "clang/Basic/OperatorKinds.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Casting.h"
 #include <cassert>
 #include <memory>
+#include <tuple>
 
 namespace clang {
 namespace dataflow {
@@ -414,6 +416,33 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
       Env.setValue(Loc, *Val);
   }
 
+  void VisitInitListExpr(const InitListExpr *S) {
+    QualType Type = S->getType();
+
+    auto &Loc = Env.createStorageLocation(*S);
+    Env.setStorageLocation(*S, Loc);
+
+    auto *Val = Env.createValue(Type);
+    if (Val == nullptr)
+      return;
+
+    Env.setValue(Loc, *Val);
+
+    if (Type->isStructureOrClassType()) {
+      for (auto IT : llvm::zip(Type->getAsRecordDecl()->fields(), S->inits())) {
+        const FieldDecl *Field = std::get<0>(IT);
+        assert(Field != nullptr);
+
+        const Expr *Init = std::get<1>(IT);
+        assert(Init != nullptr);
+
+        if (Value *InitVal = Env.getValue(*Init, SkipPast::None))
+          cast<StructValue>(Val)->setChild(*Field, *InitVal);
+      }
+    }
+    // FIXME: Implement array initialization.
+  }
+
   // FIXME: Add support for:
   // - CXXBoolLiteralExpr
 
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index c1eaf281ddc49..cd3e58207680a 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -1865,4 +1865,91 @@ TEST_F(TransferTest, VarDeclInDoWhile) {
               });
 }
 
+TEST_F(TransferTest, AggregateInitialization) {
+  std::string BracesCode = R"(
+    struct A {
+      int Foo;
+    };
+
+    struct B {
+      int Bar;
+      A Baz;
+      int Qux;
+    };
+
+    void target(int BarArg, int FooArg, int QuxArg) {
+      B Quux{BarArg, {FooArg}, QuxArg};
+      /*[[p]]*/
+    }
+  )";
+  std::string BraceEllisionCode = R"(
+    struct A {
+      int Foo;
+    };
+
+    struct B {
+      int Bar;
+      A Baz;
+      int Qux;
+    };
+
+    void target(int BarArg, int FooArg, int QuxArg) {
+      B Quux = {BarArg, FooArg, QuxArg};
+      /*[[p]]*/
+    }
+  )";
+  for (const std::string &Code : {BracesCode, BraceEllisionCode}) {
+    runDataflow(
+        Code, [](llvm::ArrayRef<
+                     std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
+                     Results,
+                 ASTContext &ASTCtx) {
+          ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
+          const Environment &Env = Results[0].second.Env;
+
+          const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
+          ASSERT_THAT(FooDecl, NotNull());
+
+          const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar");
+          ASSERT_THAT(BarDecl, NotNull());
+
+          const ValueDecl *BazDecl = findValueDecl(ASTCtx, "Baz");
+          ASSERT_THAT(BazDecl, NotNull());
+
+          const ValueDecl *QuxDecl = findValueDecl(ASTCtx, "Qux");
+          ASSERT_THAT(QuxDecl, NotNull());
+
+          const ValueDecl *FooArgDecl = findValueDecl(ASTCtx, "FooArg");
+          ASSERT_THAT(FooArgDecl, NotNull());
+
+          const ValueDecl *BarArgDecl = findValueDecl(ASTCtx, "BarArg");
+          ASSERT_THAT(BarArgDecl, NotNull());
+
+          const ValueDecl *QuxArgDecl = findValueDecl(ASTCtx, "QuxArg");
+          ASSERT_THAT(QuxArgDecl, NotNull());
+
+          const ValueDecl *QuuxDecl = findValueDecl(ASTCtx, "Quux");
+          ASSERT_THAT(QuuxDecl, NotNull());
+
+          const auto *FooArgVal =
+              cast<IntegerValue>(Env.getValue(*FooArgDecl, SkipPast::None));
+          const auto *BarArgVal =
+              cast<IntegerValue>(Env.getValue(*BarArgDecl, SkipPast::None));
+          const auto *QuxArgVal =
+              cast<IntegerValue>(Env.getValue(*QuxArgDecl, SkipPast::None));
+
+          const auto *QuuxVal =
+              cast<StructValue>(Env.getValue(*QuuxDecl, SkipPast::None));
+          ASSERT_THAT(QuuxVal, NotNull());
+
+          const auto *BazVal = cast<StructValue>(&QuuxVal->getChild(*BazDecl));
+          ASSERT_THAT(BazVal, NotNull());
+
+          EXPECT_EQ(&QuuxVal->getChild(*BarDecl), BarArgVal);
+          EXPECT_EQ(&BazVal->getChild(*FooDecl), FooArgVal);
+          EXPECT_EQ(&QuuxVal->getChild(*QuxDecl), QuxArgVal);
+        });
+  }
+}
+
 } // namespace

From ce368e1aa51f3d9f0a5f6ff0be3c02a9cf1e2d2e Mon Sep 17 00:00:00 2001
From: Alex Brachet <abrachet@google.com>
Date: Tue, 25 Jan 2022 16:37:43 +0000
Subject: [PATCH 573/946] [libc][NFC] Workaround clang assertion in inline asm

The clobber list "cc" is added to inline assembly to workaround a clang assertion that triggers when building with a clang built with assertions enabled. See bug [53391](https://github.com/llvm/llvm-project/issues/53391).

See https://godbolt.org/z/z3bc6a9PM showing functionally same output assembly.

Reviewed By: sivachandra, lntue

Differential Revision: https://reviews.llvm.org/D118099
---
 libc/src/math/x86_64/cos.cpp | 2 +-
 libc/src/math/x86_64/sin.cpp | 2 +-
 libc/src/math/x86_64/tan.cpp | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/src/math/x86_64/cos.cpp b/libc/src/math/x86_64/cos.cpp
index 9e7e7227c6b1d..3b785a2f78cdf 100644
--- a/libc/src/math/x86_64/cos.cpp
+++ b/libc/src/math/x86_64/cos.cpp
@@ -13,7 +13,7 @@ namespace __llvm_libc {
 
 LLVM_LIBC_FUNCTION(double, cos, (double x)) {
   double result;
-  __asm__ __volatile__("fcos" : "=t"(result) : "f"(x));
+  __asm__ __volatile__("fcos" : "=t"(result) : "f"(x) : "cc");
   return result;
 }
 
diff --git a/libc/src/math/x86_64/sin.cpp b/libc/src/math/x86_64/sin.cpp
index 22774f278956e..e94aa1a3f0925 100644
--- a/libc/src/math/x86_64/sin.cpp
+++ b/libc/src/math/x86_64/sin.cpp
@@ -13,7 +13,7 @@ namespace __llvm_libc {
 
 LLVM_LIBC_FUNCTION(double, sin, (double x)) {
   double result;
-  __asm__ __volatile__("fsin" : "=t"(result) : "f"(x));
+  __asm__ __volatile__("fsin" : "=t"(result) : "f"(x) : "cc");
   return result;
 }
 
diff --git a/libc/src/math/x86_64/tan.cpp b/libc/src/math/x86_64/tan.cpp
index 9146473741f95..0503af7a16dde 100644
--- a/libc/src/math/x86_64/tan.cpp
+++ b/libc/src/math/x86_64/tan.cpp
@@ -16,7 +16,7 @@ LLVM_LIBC_FUNCTION(double, tan, (double x)) {
   double one;
   // The fptan instruction pushes the number 1 on to the FP stack after
   // computing tan. So, we read out the one before popping the actual result.
-  __asm__ __volatile__("fptan" : "=t"(one) : "f"(x));
+  __asm__ __volatile__("fptan" : "=t"(one) : "f"(x) : "cc");
   __asm__ __volatile__("fstpl %0" : "=m"(result));
   return result;
 }

From a22d870a4e96eb6b7c481fb119f283ba56f735fa Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Tue, 25 Jan 2022 09:08:13 -0800
Subject: [PATCH 574/946] Add missing include diagnosed by moduels build.

---
 llvm/include/llvm/Support/FormatVariadicDetails.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/include/llvm/Support/FormatVariadicDetails.h b/llvm/include/llvm/Support/FormatVariadicDetails.h
index 08f8fc61f69bd..2cafc120c1d73 100644
--- a/llvm/include/llvm/Support/FormatVariadicDetails.h
+++ b/llvm/include/llvm/Support/FormatVariadicDetails.h
@@ -10,6 +10,7 @@
 #define LLVM_SUPPORT_FORMATVARIADICDETAILS_H
 
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/raw_ostream.h"
 
 #include <type_traits>

From f23d57a63266354902f36b79f2ad8557e201b3bc Mon Sep 17 00:00:00 2001
From: Leonard Grey <lgrey@chromium.org>
Date: Mon, 24 Jan 2022 16:51:51 -0500
Subject: [PATCH 575/946] [lld-macho] Rename CallGraphSort.{h,cpp} to
 SectionPriorities

This is in preparation for moving the code that parses and processes
order files into this file.

See https://reviews.llvm.org/D117354 for context and discussion.
---
 lld/MachO/CMakeLists.txt                               | 2 +-
 lld/MachO/{CallGraphSort.cpp => SectionPriorities.cpp} | 4 ++--
 lld/MachO/{CallGraphSort.h => SectionPriorities.h}     | 6 +++---
 lld/MachO/Writer.cpp                                   | 2 +-
 llvm/utils/gn/secondary/lld/MachO/BUILD.gn             | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)
 rename lld/MachO/{CallGraphSort.cpp => SectionPriorities.cpp} (98%)
 rename lld/MachO/{CallGraphSort.h => SectionPriorities.h} (78%)

diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt
index 61f5c70005b9f..4bd0816bca66f 100644
--- a/lld/MachO/CMakeLists.txt
+++ b/lld/MachO/CMakeLists.txt
@@ -10,7 +10,6 @@ add_lld_library(lldMachO
   Arch/ARM64Common.cpp
   Arch/ARM64_32.cpp
   Arch/X86_64.cpp
-  CallGraphSort.cpp
   ConcatOutputSection.cpp
   Driver.cpp
   DriverUtils.cpp
@@ -26,6 +25,7 @@ add_lld_library(lldMachO
   OutputSection.cpp
   OutputSegment.cpp
   Relocations.cpp
+  SectionPriorities.cpp
   SymbolTable.cpp
   Symbols.cpp
   SyntheticSections.cpp
diff --git a/lld/MachO/CallGraphSort.cpp b/lld/MachO/SectionPriorities.cpp
similarity index 98%
rename from lld/MachO/CallGraphSort.cpp
rename to lld/MachO/SectionPriorities.cpp
index 7a0192ea691e0..5e63ceb34d110 100644
--- a/lld/MachO/CallGraphSort.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -1,4 +1,4 @@
-//===- CallGraphSort.cpp --------------------------------------------------===//
+//===- SectionPriorities.cpp ----------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -11,7 +11,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "CallGraphSort.h"
+#include "SectionPriorities.h"
 #include "Config.h"
 #include "InputFiles.h"
 #include "Symbols.h"
diff --git a/lld/MachO/CallGraphSort.h b/lld/MachO/SectionPriorities.h
similarity index 78%
rename from lld/MachO/CallGraphSort.h
rename to lld/MachO/SectionPriorities.h
index 034f54a260899..f510d315e2c45 100644
--- a/lld/MachO/CallGraphSort.h
+++ b/lld/MachO/SectionPriorities.h
@@ -1,4 +1,4 @@
-//===- CallGraphSort.h ------------------------------------------*- C++ -*-===//
+//===- SectionPriorities.h --------------------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLD_MACHO_CALL_GRAPH_SORT_H
-#define LLD_MACHO_CALL_GRAPH_SORT_H
+#ifndef LLD_MACHO_SECTION_PRIORITIES_H
+#define LLD_MACHO_SECTION_PRIORITIES_H
 
 #include "InputSection.h"
 #include "llvm/ADT/DenseMap.h"
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index c76dc691346e6..1fafda99e39d2 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -7,7 +7,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Writer.h"
-#include "CallGraphSort.h"
 #include "ConcatOutputSection.h"
 #include "Config.h"
 #include "InputFiles.h"
@@ -15,6 +14,7 @@
 #include "MapFile.h"
 #include "OutputSection.h"
 #include "OutputSegment.h"
+#include "SectionPriorities.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
diff --git a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
index 4d8a050dcd8c6..6b457a97f9981 100644
--- a/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/MachO/BUILD.gn
@@ -27,7 +27,6 @@ static_library("MachO") {
     "Arch/ARM64Common.cpp",
     "Arch/ARM64_32.cpp",
     "Arch/X86_64.cpp",
-    "CallGraphSort.cpp",
     "ConcatOutputSection.cpp",
     "Driver.cpp",
     "DriverUtils.cpp",
@@ -43,6 +42,7 @@ static_library("MachO") {
     "OutputSection.cpp",
     "OutputSegment.cpp",
     "Relocations.cpp",
+    "SectionPriorities.cpp",
     "SymbolTable.cpp",
     "Symbols.cpp",
     "SyntheticSections.cpp",

From a5c9d717807f2801b35f5d0e9501d6398efff42d Mon Sep 17 00:00:00 2001
From: Leonard Grey <lgrey@chromium.org>
Date: Mon, 24 Jan 2022 17:27:56 -0500
Subject: [PATCH 576/946] [lld-macho] Move order file and call graph sorting
 into SectionPriorities

See https://reviews.llvm.org/D117354 for context and discussion.
---
 lld/MachO/Driver.cpp            |  74 +-----------------
 lld/MachO/SectionPriorities.cpp | 129 +++++++++++++++++++++++++++++++-
 lld/MachO/SectionPriorities.h   |  35 ++++++++-
 lld/MachO/Writer.cpp            |  48 ------------
 4 files changed, 163 insertions(+), 123 deletions(-)

diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 50f5c96c61f35..e4c9f4dd6024a 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -15,6 +15,7 @@
 #include "ObjC.h"
 #include "OutputSection.h"
 #include "OutputSegment.h"
+#include "SectionPriorities.h"
 #include "SymbolTable.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
@@ -460,60 +461,6 @@ static void addFileList(StringRef path, bool isLazy) {
 // entry (the one nearest to the front of the list.)
 //
 // The file can also have line comments that start with '#'.
-static void parseOrderFile(StringRef path) {
-  Optional<MemoryBufferRef> buffer = readFile(path);
-  if (!buffer) {
-    error("Could not read order file at " + path);
-    return;
-  }
-
-  MemoryBufferRef mbref = *buffer;
-  size_t priority = std::numeric_limits<size_t>::max();
-  for (StringRef line : args::getLines(mbref)) {
-    StringRef objectFile, symbol;
-    line = line.take_until([](char c) { return c == '#'; }); // ignore comments
-    line = line.ltrim();
-
-    CPUType cpuType = StringSwitch<CPUType>(line)
-                          .StartsWith("i386:", CPU_TYPE_I386)
-                          .StartsWith("x86_64:", CPU_TYPE_X86_64)
-                          .StartsWith("arm:", CPU_TYPE_ARM)
-                          .StartsWith("arm64:", CPU_TYPE_ARM64)
-                          .StartsWith("ppc:", CPU_TYPE_POWERPC)
-                          .StartsWith("ppc64:", CPU_TYPE_POWERPC64)
-                          .Default(CPU_TYPE_ANY);
-
-    if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType)
-      continue;
-
-    // Drop the CPU type as well as the colon
-    if (cpuType != CPU_TYPE_ANY)
-      line = line.drop_until([](char c) { return c == ':'; }).drop_front();
-
-    constexpr std::array<StringRef, 2> fileEnds = {".o:", ".o):"};
-    for (StringRef fileEnd : fileEnds) {
-      size_t pos = line.find(fileEnd);
-      if (pos != StringRef::npos) {
-        // Split the string around the colon
-        objectFile = line.take_front(pos + fileEnd.size() - 1);
-        line = line.drop_front(pos + fileEnd.size());
-        break;
-      }
-    }
-    symbol = line.trim();
-
-    if (!symbol.empty()) {
-      SymbolPriorityEntry &entry = config->priorities[symbol];
-      if (!objectFile.empty())
-        entry.objectFiles.insert(std::make_pair(objectFile, priority));
-      else
-        entry.anyObjectFile = std::max(entry.anyObjectFile, priority);
-    }
-
-    --priority;
-  }
-}
-
 // We expect sub-library names of the form "libfoo", which will match a dylib
 // with a path of .*/libfoo.{dylib, tbd}.
 // XXX ld64 seems to ignore the extension entirely when matching sub-libraries;
@@ -1081,25 +1028,6 @@ static void gatherInputSections() {
   assert(inputOrder <= UnspecifiedInputOrder);
 }
 
-static void extractCallGraphProfile() {
-  TimeTraceScope timeScope("Extract call graph profile");
-  for (const InputFile *file : inputFiles) {
-    auto *obj = dyn_cast_or_null<ObjFile>(file);
-    if (!obj)
-      continue;
-    for (const CallGraphEntry &entry : obj->callGraph) {
-      assert(entry.fromIndex < obj->symbols.size() &&
-             entry.toIndex < obj->symbols.size());
-      auto *fromSym = dyn_cast_or_null<Defined>(obj->symbols[entry.fromIndex]);
-      auto *toSym = dyn_cast_or_null<Defined>(obj->symbols[entry.toIndex]);
-
-      if (!fromSym || !toSym)
-        continue;
-      config->callGraphProfile[{fromSym->isec, toSym->isec}] += entry.count;
-    }
-  }
-}
-
 static void foldIdenticalLiterals() {
   // We always create a cStringSection, regardless of whether dedupLiterals is
   // true. If it isn't, we simply create a non-deduplicating CStringSection.
diff --git a/lld/MachO/SectionPriorities.cpp b/lld/MachO/SectionPriorities.cpp
index 5e63ceb34d110..35510d7338e89 100644
--- a/lld/MachO/SectionPriorities.cpp
+++ b/lld/MachO/SectionPriorities.cpp
@@ -16,14 +16,20 @@
 #include "InputFiles.h"
 #include "Symbols.h"
 #include "Target.h"
+#include "lld/Common/Args.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/ErrorHandler.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/raw_ostream.h"
 #include <numeric>
 
 using namespace llvm;
+using namespace llvm::MachO;
+using namespace llvm::sys;
 using namespace lld;
 using namespace lld::macho;
 
@@ -241,12 +247,133 @@ DenseMap<const InputSection *, size_t> CallGraphSort::run() {
   return orderMap;
 }
 
+static size_t getSymbolPriority(const SymbolPriorityEntry &entry,
+                                const InputFile *f) {
+  // We don't use toString(InputFile *) here because it returns the full path
+  // for object files, and we only want the basename.
+  StringRef filename;
+  if (f->archiveName.empty())
+    filename = path::filename(f->getName());
+  else
+    filename = saver().save(path::filename(f->archiveName) + "(" +
+                            path::filename(f->getName()) + ")");
+  return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile);
+}
+
+void macho::extractCallGraphProfile() {
+  TimeTraceScope timeScope("Extract call graph profile");
+  for (const InputFile *file : inputFiles) {
+    auto *obj = dyn_cast_or_null<ObjFile>(file);
+    if (!obj)
+      continue;
+    for (const CallGraphEntry &entry : obj->callGraph) {
+      assert(entry.fromIndex < obj->symbols.size() &&
+             entry.toIndex < obj->symbols.size());
+      auto *fromSym = dyn_cast_or_null<Defined>(obj->symbols[entry.fromIndex]);
+      auto *toSym = dyn_cast_or_null<Defined>(obj->symbols[entry.toIndex]);
+
+      if (!fromSym || !toSym)
+        continue;
+      config->callGraphProfile[{fromSym->isec, toSym->isec}] += entry.count;
+    }
+  }
+}
+
+void macho::parseOrderFile(StringRef path) {
+  Optional<MemoryBufferRef> buffer = readFile(path);
+  if (!buffer) {
+    error("Could not read order file at " + path);
+    return;
+  }
+
+  MemoryBufferRef mbref = *buffer;
+  size_t priority = std::numeric_limits<size_t>::max();
+  for (StringRef line : args::getLines(mbref)) {
+    StringRef objectFile, symbol;
+    line = line.take_until([](char c) { return c == '#'; }); // ignore comments
+    line = line.ltrim();
+
+    CPUType cpuType = StringSwitch<CPUType>(line)
+                          .StartsWith("i386:", CPU_TYPE_I386)
+                          .StartsWith("x86_64:", CPU_TYPE_X86_64)
+                          .StartsWith("arm:", CPU_TYPE_ARM)
+                          .StartsWith("arm64:", CPU_TYPE_ARM64)
+                          .StartsWith("ppc:", CPU_TYPE_POWERPC)
+                          .StartsWith("ppc64:", CPU_TYPE_POWERPC64)
+                          .Default(CPU_TYPE_ANY);
+
+    if (cpuType != CPU_TYPE_ANY && cpuType != target->cpuType)
+      continue;
+
+    // Drop the CPU type as well as the colon
+    if (cpuType != CPU_TYPE_ANY)
+      line = line.drop_until([](char c) { return c == ':'; }).drop_front();
+
+    constexpr std::array<StringRef, 2> fileEnds = {".o:", ".o):"};
+    for (StringRef fileEnd : fileEnds) {
+      size_t pos = line.find(fileEnd);
+      if (pos != StringRef::npos) {
+        // Split the string around the colon
+        objectFile = line.take_front(pos + fileEnd.size() - 1);
+        line = line.drop_front(pos + fileEnd.size());
+        break;
+      }
+    }
+    symbol = line.trim();
+
+    if (!symbol.empty()) {
+      SymbolPriorityEntry &entry = config->priorities[symbol];
+      if (!objectFile.empty())
+        entry.objectFiles.insert(std::make_pair(objectFile, priority));
+      else
+        entry.anyObjectFile = std::max(entry.anyObjectFile, priority);
+    }
+
+    --priority;
+  }
+}
+
 // Sort sections by the profile data provided by __LLVM,__cg_profile sections.
 //
 // This first builds a call graph based on the profile data then merges sections
 // according to the C³ heuristic. All clusters are then sorted by a density
 // metric to further improve locality.
-DenseMap<const InputSection *, size_t> macho::computeCallGraphProfileOrder() {
+static DenseMap<const InputSection *, size_t> computeCallGraphProfileOrder() {
   TimeTraceScope timeScope("Call graph profile sort");
   return CallGraphSort().run();
 }
+
+// Each section gets assigned the priority of the highest-priority symbol it
+// contains.
+DenseMap<const InputSection *, size_t> macho::buildInputSectionPriorities() {
+  if (config->callGraphProfileSort)
+    return computeCallGraphProfileOrder();
+  DenseMap<const InputSection *, size_t> sectionPriorities;
+
+  if (config->priorities.empty())
+    return sectionPriorities;
+
+  auto addSym = [&](Defined &sym) {
+    if (sym.isAbsolute())
+      return;
+
+    auto it = config->priorities.find(sym.getName());
+    if (it == config->priorities.end())
+      return;
+
+    SymbolPriorityEntry &entry = it->second;
+    size_t &priority = sectionPriorities[sym.isec];
+    priority =
+        std::max(priority, getSymbolPriority(entry, sym.isec->getFile()));
+  };
+
+  // TODO: Make sure this handles weak symbols correctly.
+  for (const InputFile *file : inputFiles) {
+    if (isa<ObjFile>(file))
+      for (Symbol *sym : file->symbols)
+        if (auto *d = dyn_cast_or_null<Defined>(sym))
+          addSym(*d);
+  }
+
+  return sectionPriorities;
+}
diff --git a/lld/MachO/SectionPriorities.h b/lld/MachO/SectionPriorities.h
index f510d315e2c45..9cc4eff958cd7 100644
--- a/lld/MachO/SectionPriorities.h
+++ b/lld/MachO/SectionPriorities.h
@@ -15,7 +15,40 @@
 namespace lld {
 namespace macho {
 
-llvm::DenseMap<const InputSection *, size_t> computeCallGraphProfileOrder();
+// Reads every input section's call graph profile, and combines them into
+// config->callGraphProfile. If an order file is present, any edges where one
+// or both of the vertices are specified in the order file are discarded.
+void extractCallGraphProfile();
+
+// Reads the order file at `path` into config->priorities.
+//
+// An order file has one entry per line, in the following format:
+//
+//   <cpu>:<object file>:<symbol name>
+//
+// <cpu> and <object file> are optional. If not specified, then that entry
+// matches any symbol of that name. Parsing this format is not quite
+// straightforward because the symbol name itself can contain colons, so when
+// encountering a colon, we consider the preceding characters to decide if it
+// can be a valid CPU type or file path.
+//
+// If a symbol is matched by multiple entries, then it takes the lowest-ordered
+// entry (the one nearest to the front of the list.)
+//
+// The file can also have line comments that start with '#'.
+void parseOrderFile(StringRef path);
+
+// Returns layout priorities for some or all input sections. Sections are laid
+// out in decreasing order; that is, a higher priority section will be closer
+// to the beginning of its output section.
+//
+// If either an order file or a call graph profile are present, this is used
+// as the source of priorities. If both are present, the order file takes
+// precedence. If neither is present, an empty map is returned.
+//
+// Each section gets assigned the priority of the highest-priority symbol it
+// contains.
+llvm::DenseMap<const InputSection *, size_t> buildInputSectionPriorities();
 } // namespace macho
 } // namespace lld
 
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp
index 1fafda99e39d2..2c0794e08ae3e 100644
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -849,54 +849,6 @@ template <class LP> void Writer::createLoadCommands() {
                               : 0));
 }
 
-static size_t getSymbolPriority(const SymbolPriorityEntry &entry,
-                                const InputFile *f) {
-  // We don't use toString(InputFile *) here because it returns the full path
-  // for object files, and we only want the basename.
-  StringRef filename;
-  if (f->archiveName.empty())
-    filename = path::filename(f->getName());
-  else
-    filename = saver().save(path::filename(f->archiveName) + "(" +
-                            path::filename(f->getName()) + ")");
-  return std::max(entry.objectFiles.lookup(filename), entry.anyObjectFile);
-}
-
-// Each section gets assigned the priority of the highest-priority symbol it
-// contains.
-static DenseMap<const InputSection *, size_t> buildInputSectionPriorities() {
-  if (config->callGraphProfileSort)
-    return computeCallGraphProfileOrder();
-  DenseMap<const InputSection *, size_t> sectionPriorities;
-
-  if (config->priorities.empty())
-    return sectionPriorities;
-
-  auto addSym = [&](Defined &sym) {
-    if (sym.isAbsolute())
-      return;
-
-    auto it = config->priorities.find(sym.getName());
-    if (it == config->priorities.end())
-      return;
-
-    SymbolPriorityEntry &entry = it->second;
-    size_t &priority = sectionPriorities[sym.isec];
-    priority =
-        std::max(priority, getSymbolPriority(entry, sym.isec->getFile()));
-  };
-
-  // TODO: Make sure this handles weak symbols correctly.
-  for (const InputFile *file : inputFiles) {
-    if (isa<ObjFile>(file))
-      for (Symbol *sym : file->symbols)
-        if (auto *d = dyn_cast_or_null<Defined>(sym))
-          addSym(*d);
-  }
-
-  return sectionPriorities;
-}
-
 // Sorting only can happen once all outputs have been collected. Here we sort
 // segments, output sections within each segment, and input sections within each
 // output segment.

From bd1fac2fafd7a1afacce05cd53a3741a2214f5f1 Mon Sep 17 00:00:00 2001
From: Ashley Hedberg <ahedberg@google.com>
Date: Tue, 25 Jan 2022 15:42:49 +0000
Subject: [PATCH 577/946] Add assert on End iteration distance to
 Rewriter::getRewrittenText.

I currently have code that is crashing in the second std::advance call,
and it was not straightforward to identify the problem, as the first line
of the stacktrace is in RopePieceBTreeIterator::operator++:

```

*** SIGILL; stack trace: ***
PC: clang/include/clang/Rewrite/Core/RewriteRope.h:119 clang::RopePieceBTreeIterator::operator++()
    ../include/c++/v1/__iterator/advance.h:35 std::__u::__advance<>()
    ../include/c++/v1/__iterator/advance.h:65 std::__u::advance<>()
    clang/lib/Rewrite/Rewriter.cpp:228 clang::Rewriter::getRewrittenText()
    clang/include/clang/Rewrite/Core/Rewriter.h:106 clang::Rewriter::getRewrittenText()
```

Adding an assertion produces a friendlier error message for the caller.

Reviewed By: gribozavr2

Differential Revision: https://reviews.llvm.org/D117579
---
 clang/lib/Rewrite/Rewriter.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Rewrite/Rewriter.cpp b/clang/lib/Rewrite/Rewriter.cpp
index 3b06afc76e16e..8950bfb7c4dcc 100644
--- a/clang/lib/Rewrite/Rewriter.cpp
+++ b/clang/lib/Rewrite/Rewriter.cpp
@@ -223,6 +223,7 @@ std::string Rewriter::getRewrittenText(CharSourceRange Range) const {
   RewriteBuffer::iterator Start = RB.begin();
   std::advance(Start, StartOff);
   RewriteBuffer::iterator End = Start;
+  assert(EndOff >= StartOff && "Invalid iteration distance");
   std::advance(End, EndOff-StartOff);
 
   return std::string(Start, End);

From dcc3e728ca018de785991d4ecb9efe4f6a18ca75 Mon Sep 17 00:00:00 2001
From: Andrew Litteken <andrew.litteken@gmail.com>
Date: Wed, 22 Dec 2021 15:37:48 -0600
Subject: [PATCH 578/946] [IROutliner] Allowing Phi Nodes in exit blocks

In addition to having multiple exit locations, there can be multiple blocks leading to the same exit location, which results in a potential phi node. If we find that multiple blocks within the region branch to the same block outside the region, resulting in a phi node, the code extractor pulls this phi node into the function and uses it as an output.

We make sure that this phi node is given an output slot, and that the two values are removed from the outputs if they are not used anywhere else outside of the region. Across the extracted regions, the phi nodes are combined into a single block for each potential output block, similar to the previous patch.

Reviewers: paquette

Differential Revision: https://reviews.llvm.org/D106995
---
 llvm/include/llvm/Transforms/IPO/IROutliner.h |  13 +-
 llvm/lib/Transforms/IPO/IROutliner.cpp        | 643 ++++++++++++++++--
 .../IROutliner/gvn-output-set-overload.ll     | 122 ++++
 ...matched-phi-exits-not-in-first-outlined.ll |  85 +++
 .../IROutliner/mismatched-phi-exits.ll        |  85 +++
 .../mismatched-phi-outputs-ordering.ll        | 150 ++++
 .../outlining-branches-phi-nodes.ll           | 165 +++++
 .../IROutliner/outlining-exits-to-phi-node.ll |  48 +-
 .../IROutliner/phi-nodes-output-overload.ll   | 112 +++
 .../IROutliner/region-inputs-in-phi-nodes.ll  | 104 +++
 10 files changed, 1456 insertions(+), 71 deletions(-)
 create mode 100644 llvm/test/Transforms/IROutliner/gvn-output-set-overload.ll
 create mode 100644 llvm/test/Transforms/IROutliner/mismatched-phi-exits-not-in-first-outlined.ll
 create mode 100644 llvm/test/Transforms/IROutliner/mismatched-phi-exits.ll
 create mode 100644 llvm/test/Transforms/IROutliner/mismatched-phi-outputs-ordering.ll
 create mode 100644 llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
 create mode 100644 llvm/test/Transforms/IROutliner/phi-nodes-output-overload.ll
 create mode 100644 llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll

diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index 110c0b4dcf16b..dcae4454f8281 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -95,6 +95,10 @@ struct OutlinableRegion {
   /// required for the following basic blocks in this case.
   bool EndsInBranch = false;
 
+  /// The PHIBlocks with their corresponding return block based on the return
+  /// value as the key.
+  DenseMap<Value *, BasicBlock *> PHIBlocks;
+
   /// Mapping of the argument number in the deduplicated function
   /// to a given constant, which is used when creating the arguments to the call
   /// to the newly created deduplicated function.  This is handled separately
@@ -182,7 +186,14 @@ class IROutliner {
   IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI,
              function_ref<IRSimilarityIdentifier &(Module &)> GIRSI,
              function_ref<OptimizationRemarkEmitter &(Function &)> GORE)
-      : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {}
+      : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {
+    
+    // Check that the DenseMap implementation has not changed.
+    assert(DenseMapInfo<unsigned>::getEmptyKey() == (unsigned)-1 &&
+           "DenseMapInfo<unsigned>'s empty key isn't -1!");
+    assert(DenseMapInfo<unsigned>::getTombstoneKey() == (unsigned)-2 &&
+           "DenseMapInfo<unsigned>'s tombstone key isn't -2!");
+  }
   bool run(Module &M);
 
 private:
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index 928b12013a545..c3104111864a4 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -106,6 +106,16 @@ struct OutlinableGroup {
   /// of the region.
   unsigned BranchesToOutside = 0;
 
+  /// Tracker counting backwards from the highest unsigned value possible to
+  /// avoid conflicting with the GVNs of assigned values.  We start at -3 since
+  /// -2 and -1 are assigned by the DenseMap.
+  unsigned PHINodeGVNTracker = -3;
+
+  DenseMap<unsigned,
+           std::pair<std::pair<unsigned, unsigned>, SmallVector<unsigned, 2>>>
+      PHINodeGVNToGVNs;
+  DenseMap<hash_code, unsigned> GVNsToPHINodeGVN;
+
   /// The number of instructions that will be outlined by extracting \ref
   /// Regions.
   InstructionCost Benefit = 0;
@@ -356,6 +366,24 @@ InstructionCost OutlinableRegion::getBenefit(TargetTransformInfo &TTI) {
   return Benefit;
 }
 
+/// Check the \p OutputMappings structure for value \p Input, if it exists
+/// it has been used as an output for outlining, and has been renamed, and we
+/// return the new value, otherwise, we return the same value.
+///
+/// \param OutputMappings [in] - The mapping of values to their renamed value
+/// after being used as an output for an outlined region.
+/// \param Input [in] - The value to find the remapped value of, if it exists.
+/// \return The remapped value if it has been renamed, and the same value if has
+/// not.
+static Value *findOutputMapping(const DenseMap<Value *, Value *> OutputMappings,
+                                Value *Input) {
+  DenseMap<Value *, Value *>::const_iterator OutputMapping =
+      OutputMappings.find(Input);
+  if (OutputMapping != OutputMappings.end())
+    return OutputMapping->second;
+  return Input;
+}
+
 /// Find whether \p Region matches the global value numbering to Constant
 /// mapping found so far.
 ///
@@ -832,6 +860,209 @@ findExtractedInputToOverallInputMapping(OutlinableRegion &Region,
   Region.NumExtractedInputs = OriginalIndex;
 }
 
+/// Check if the \p V has any uses outside of the region other than \p PN.
+///
+/// \param V [in] - The value to check.
+/// \param PHILoc [in] - The location in the PHINode of \p V.
+/// \param PN [in] - The PHINode using \p V.
+/// \param Exits [in] - The potential blocks we exit to from the outlined
+/// region.
+/// \param BlocksInRegion [in] - The basic blocks contained in the region.
+/// \returns true if \p V has any use soutside its region other than \p PN.
+static bool outputHasNonPHI(Value *V, unsigned PHILoc, PHINode &PN,
+                            SmallPtrSet<BasicBlock *, 1> &Exits,
+                            DenseSet<BasicBlock *> &BlocksInRegion) {
+  // We check to see if the value is used by the PHINode from some other
+  // predecessor not included in the region.  If it is, we make sure
+  // to keep it as an output.
+  SmallVector<unsigned, 2> IncomingNumbers(PN.getNumIncomingValues());
+  std::iota(IncomingNumbers.begin(), IncomingNumbers.end(), 0);
+  if (any_of(IncomingNumbers, [PHILoc, &PN, V, &BlocksInRegion](unsigned Idx) {
+        return (Idx != PHILoc && V == PN.getIncomingValue(Idx) &&
+                !BlocksInRegion.contains(PN.getIncomingBlock(Idx)));
+      }))
+    return true;
+
+  // Check if the value is used by any other instructions outside the region.
+  return any_of(V->users(), [&Exits, &BlocksInRegion](User *U) {
+    Instruction *I = dyn_cast<Instruction>(U);
+    if (!I)
+      return false;
+
+    // If the use of the item is inside the region, we skip it.  Uses
+    // inside the region give us useful information about how the item could be
+    // used as an output.
+    BasicBlock *Parent = I->getParent();
+    if (BlocksInRegion.contains(Parent))
+      return false;
+
+    // If it's not a PHINode then we definitely know the use matters.  This
+    // output value will not completely combined with another item in a PHINode
+    // as it is directly reference by another non-phi instruction
+    if (!isa<PHINode>(I))
+      return true;
+
+    // If we have a PHINode outside one of the exit locations, then it
+    // can be considered an outside use as well.  If there is a PHINode
+    // contained in the Exit where this values use matters, it will be
+    // caught when we analyze that PHINode.
+    if (!Exits.contains(Parent))
+      return true;
+
+    return false;
+  });
+}
+
+/// Test whether \p CurrentExitFromRegion contains any PhiNodes that should be
+/// considered outputs. A PHINodes is an output when more than one incoming
+/// value has been marked by the CodeExtractor as an output.
+///
+/// \param CurrentExitFromRegion [in] - The block to analyze.
+/// \param PotentialExitsFromRegion [in] - The potential exit blocks from the
+/// region.
+/// \param RegionBlocks [in] - The basic blocks in the region.
+/// \param Outputs [in, out] - The existing outputs for the region, we may add
+/// PHINodes to this as we find that they replace output values.
+/// \param OutputsReplacedByPHINode [out] - A set containing outputs that are
+/// totally replaced  by a PHINode.
+/// \param OutputsWithNonPhiUses [out] - A set containing outputs that are used
+/// in PHINodes, but have other uses, and should still be considered outputs.
+static void analyzeExitPHIsForOutputUses(
+    BasicBlock *CurrentExitFromRegion,
+    SmallPtrSet<BasicBlock *, 1> &PotentialExitsFromRegion,
+    DenseSet<BasicBlock *> &RegionBlocks, SetVector<Value *> &Outputs,
+    DenseSet<Value *> &OutputsReplacedByPHINode,
+    DenseSet<Value *> &OutputsWithNonPhiUses) {
+  for (PHINode &PN : CurrentExitFromRegion->phis()) {
+    // Find all incoming values from the outlining region.
+    SmallVector<unsigned, 2> IncomingVals;
+    for (unsigned I = 0, E = PN.getNumIncomingValues(); I < E; ++I)
+      if (RegionBlocks.contains(PN.getIncomingBlock(I)))
+        IncomingVals.push_back(I);
+
+    // Do not process PHI if there are no predecessors from region.
+    unsigned NumIncomingVals = IncomingVals.size();
+    if (NumIncomingVals == 0)
+      continue;
+
+    // If there is one predecessor, we mark it as a value that needs to be kept
+    // as an output.
+    if (NumIncomingVals == 1) {
+      Value *V = PN.getIncomingValue(*IncomingVals.begin());
+      OutputsWithNonPhiUses.insert(V);
+      OutputsReplacedByPHINode.erase(V);
+      continue;
+    }
+
+    // This PHINode will be used as an output value, so we add it to our list.
+    Outputs.insert(&PN);
+
+    // Not all of the incoming values should be ignored as other inputs and
+    // outputs may have uses in outlined region.  If they have other uses
+    // outside of the single PHINode we should not skip over it.
+    for (unsigned Idx : IncomingVals) {
+      Value *V = PN.getIncomingValue(Idx);
+      if (outputHasNonPHI(V, Idx, PN, PotentialExitsFromRegion, RegionBlocks)) {
+        OutputsWithNonPhiUses.insert(V);
+        OutputsReplacedByPHINode.erase(V);
+        continue;
+      }
+      if (!OutputsWithNonPhiUses.contains(V))
+        OutputsReplacedByPHINode.insert(V);
+    }
+  }
+}
+
+// Represents the type for the unsigned number denoting the output number for
+// phi node, along with the canonical number for the exit block.
+using ArgLocWithBBCanon = std::pair<unsigned, unsigned>;
+// The list of canonical numbers for the incoming values to a PHINode.
+using CanonList = SmallVector<unsigned, 2>;
+// The pair type representing the set of canonical values being combined in the
+// PHINode, along with the location data for the PHINode.
+using PHINodeData = std::pair<ArgLocWithBBCanon, CanonList>;
+
+/// Encode \p PND as an integer for easy lookup based on the argument location,
+/// the parent BasicBlock canonical numbering, and the canonical numbering of
+/// the values stored in the PHINode.
+///
+/// \param PND - The data to hash.
+/// \returns The hash code of \p PND.
+static hash_code encodePHINodeData(PHINodeData &PND) {
+  return llvm::hash_combine(
+      llvm::hash_value(PND.first.first), llvm::hash_value(PND.first.second),
+      llvm::hash_combine_range(PND.second.begin(), PND.second.end()));
+}
+
+/// Create a special GVN for PHINodes that will be used outside of
+/// the region.  We create a hash code based on the Canonical number of the
+/// parent BasicBlock, the canonical numbering of the values stored in the
+/// PHINode and the aggregate argument location.  This is used to find whether
+/// this PHINode type has been given a canonical numbering already.  If not, we
+/// assign it a value and store it for later use.  The value is returned to
+/// identify different output schemes for the set of regions.
+///
+/// \param Region - The region that \p PN is an output for.
+/// \param PN - The PHINode we are analyzing.
+/// \param AggArgIdx - The argument \p PN will be stored into.
+/// \returns An optional holding the assigned canonical number, or None if
+/// there is some attribute of the PHINode blocking it from being used.
+static Optional<unsigned> getGVNForPHINode(OutlinableRegion &Region,
+                                           PHINode *PN, unsigned AggArgIdx) {
+  OutlinableGroup &Group = *Region.Parent;
+  IRSimilarityCandidate &Cand = *Region.Candidate;
+  BasicBlock *PHIBB = PN->getParent();
+  CanonList PHIGVNs;
+  for (Value *Incoming : PN->incoming_values()) {
+    // If we cannot find a GVN, this means that the input to the PHINode is
+    // not included in the region we are trying to analyze, meaning, that if
+    // it was outlined, we would be adding an extra input.  We ignore this
+    // case for now, and so ignore the region.
+    Optional<unsigned> OGVN = Cand.getGVN(Incoming);
+    if (!OGVN.hasValue()) {
+      Region.IgnoreRegion = true;
+      return None;
+    }
+
+    // Collect the canonical numbers of the values in the PHINode.
+    unsigned GVN = OGVN.getValue();
+    OGVN = Cand.getCanonicalNum(GVN);
+    assert(OGVN.hasValue() && "No GVN found for incoming value?");
+    PHIGVNs.push_back(*OGVN);
+  }
+
+  // Now that we have the GVNs for the incoming values, we are going to combine
+  // them with the GVN of the incoming bock, and the output location of the
+  // PHINode to generate a hash value representing this instance of the PHINode.
+  DenseMap<hash_code, unsigned>::iterator GVNToPHIIt;
+  DenseMap<unsigned, PHINodeData>::iterator PHIToGVNIt;
+  Optional<unsigned> BBGVN = Cand.getGVN(PHIBB);
+  assert(BBGVN.hasValue() && "Could not find GVN for the incoming block!");
+
+  BBGVN = Cand.getCanonicalNum(BBGVN.getValue());
+  assert(BBGVN.hasValue() &&
+         "Could not find canonical number for the incoming block!");
+  // Create a pair of the exit block canonical value, and the aggregate
+  // argument location, connected to the canonical numbers stored in the
+  // PHINode.
+  PHINodeData TemporaryPair =
+      std::make_pair(std::make_pair(BBGVN.getValue(), AggArgIdx), PHIGVNs);
+  hash_code PHINodeDataHash = encodePHINodeData(TemporaryPair);
+
+  // Look for and create a new entry in our connection between canonical
+  // numbers for PHINodes, and the set of objects we just created.
+  GVNToPHIIt = Group.GVNsToPHINodeGVN.find(PHINodeDataHash);
+  if (GVNToPHIIt == Group.GVNsToPHINodeGVN.end()) {
+    bool Inserted = false;
+    std::tie(PHIToGVNIt, Inserted) = Group.PHINodeGVNToGVNs.insert(
+        std::make_pair(Group.PHINodeGVNTracker, TemporaryPair));
+    std::tie(GVNToPHIIt, Inserted) = Group.GVNsToPHINodeGVN.insert(
+        std::make_pair(PHINodeDataHash, Group.PHINodeGVNTracker--));
+  }
+
+  return GVNToPHIIt->second;
+}
+
 /// Create a mapping of the output arguments for the \p Region to the output
 /// arguments of the overall outlined function.
 ///
@@ -844,35 +1075,25 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
   IRSimilarityCandidate &C = *Region.Candidate;
 
   SmallVector<BasicBlock *> BE;
-  DenseSet<BasicBlock *> BBSet;
-  C.getBasicBlocks(BBSet, BE);
+  DenseSet<BasicBlock *> BlocksInRegion;
+  C.getBasicBlocks(BlocksInRegion, BE);
 
   // Find the exits to the region.
   SmallPtrSet<BasicBlock *, 1> Exits;
   for (BasicBlock *Block : BE)
     for (BasicBlock *Succ : successors(Block))
-      if (!BBSet.contains(Succ))
+      if (!BlocksInRegion.contains(Succ))
         Exits.insert(Succ);
 
   // After determining which blocks exit to PHINodes, we add these PHINodes to
   // the set of outputs to be processed.  We also check the incoming values of
   // the PHINodes for whether they should no longer be considered outputs.
-  for (BasicBlock *ExitBB : Exits) {
-    for (PHINode &PN : ExitBB->phis()) {
-      // Find all incoming values from the outlining region.
-      SmallVector<unsigned, 2> IncomingVals;
-      for (unsigned Idx = 0; Idx < PN.getNumIncomingValues(); ++Idx)
-        if (BBSet.contains(PN.getIncomingBlock(Idx)))
-          IncomingVals.push_back(Idx);
-
-      // Do not process PHI if there is one (or fewer) predecessor from region.
-      if (IncomingVals.size() <= 1)
-        continue;
-
-      Region.IgnoreRegion = true;
-      return;
-    }
-  }
+  DenseSet<Value *> OutputsReplacedByPHINode;
+  DenseSet<Value *> OutputsWithNonPhiUses;
+  for (BasicBlock *ExitBB : Exits)
+    analyzeExitPHIsForOutputUses(ExitBB, Exits, BlocksInRegion, Outputs,
+                                 OutputsReplacedByPHINode,
+                                 OutputsWithNonPhiUses);
 
   // This counts the argument number in the extracted function.
   unsigned OriginalIndex = Region.NumExtractedInputs;
@@ -895,9 +1116,13 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
     // do not have to be in same order, but are functionally the same, we will
     // have to use a different scheme, as one-to-one correspondence is not
     // guaranteed.
-    unsigned GlobalValue = C.getGVN(Output).getValue();
     unsigned ArgumentSize = Group.ArgumentTypes.size();
 
+    // If the output is combined in a PHINode, we make sure to skip over it.
+    if (OutputsReplacedByPHINode.contains(Output))
+      continue;
+
+    unsigned AggArgIdx = 0;
     for (unsigned Jdx = TypeIndex; Jdx < ArgumentSize; Jdx++) {
       if (Group.ArgumentTypes[Jdx] != PointerType::getUnqual(Output->getType()))
         continue;
@@ -909,7 +1134,7 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
       AggArgsUsed.insert(Jdx);
       Region.ExtractedArgToAgg.insert(std::make_pair(OriginalIndex, Jdx));
       Region.AggArgToExtracted.insert(std::make_pair(Jdx, OriginalIndex));
-      Region.GVNStores.push_back(GlobalValue);
+      AggArgIdx = Jdx;
       break;
     }
 
@@ -918,18 +1143,54 @@ findExtractedOutputToOverallOutputMapping(OutlinableRegion &Region,
     // function to handle this output and create a mapping to it.
     if (!TypeFound) {
       Group.ArgumentTypes.push_back(PointerType::getUnqual(Output->getType()));
-      AggArgsUsed.insert(Group.ArgumentTypes.size() - 1);
+      // Mark the new pointer type as the last value in the aggregate argument
+      // list.
+      unsigned ArgTypeIdx = Group.ArgumentTypes.size() - 1;
+      AggArgsUsed.insert(ArgTypeIdx);
       Region.ExtractedArgToAgg.insert(
-          std::make_pair(OriginalIndex, Group.ArgumentTypes.size() - 1));
+          std::make_pair(OriginalIndex, ArgTypeIdx));
       Region.AggArgToExtracted.insert(
-          std::make_pair(Group.ArgumentTypes.size() - 1, OriginalIndex));
-      Region.GVNStores.push_back(GlobalValue);
+          std::make_pair(ArgTypeIdx, OriginalIndex));
+      AggArgIdx = ArgTypeIdx;
+    }
+
+    // TODO: Adapt to the extra input from the PHINode.
+    PHINode *PN = dyn_cast<PHINode>(Output);
+
+    Optional<unsigned> GVN;
+    if (PN && !BlocksInRegion.contains(PN->getParent())) {
+      // Values outside the region can be combined into PHINode when we
+      // have multiple exits. We collect both of these into a list to identify
+      // which values are being used in the PHINode. Each list identifies a
+      // different PHINode, and a different output. We store the PHINode as it's
+      // own canonical value.  These canonical values are also dependent on the
+      // output argument it is saved to.
+
+      // If two PHINodes have the same canonical values, but different aggregate
+      // argument locations, then they will have distinct Canonical Values.
+      GVN = getGVNForPHINode(Region, PN, AggArgIdx);
+      if (!GVN.hasValue())
+        return; 
+    } else {
+      // If we do not have a PHINode we use the global value numbering for the
+      // output value, to find the canonical number to add to the set of stored
+      // values.
+      GVN = C.getGVN(Output);
+      GVN = C.getCanonicalNum(*GVN);
     }
 
-    stable_sort(Region.GVNStores);
+    // Each region has a potentially unique set of outputs.  We save which
+    // values are output in a list of canonical values so we can differentiate
+    // among the different store schemes.
+    Region.GVNStores.push_back(*GVN);
+
     OriginalIndex++;
     TypeIndex++;
   }
+
+  // We sort the stored values to make sure that we are not affected by analysis
+  // order when determining what combination of items were stored.
+  stable_sort(Region.GVNStores);
 }
 
 void IROutliner::findAddInputsOutputs(Module &M, OutlinableRegion &Region,
@@ -1065,6 +1326,214 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
   return Call;
 }
 
+/// Find or create a BasicBlock in the outlined function containing PhiBlocks
+/// for \p RetVal.
+///
+/// \param Group - The OutlinableGroup containing the information about the
+/// overall outlined function.
+/// \param RetVal - The return value or exit option that we are currently
+/// evaluating.
+/// \returns The found or newly created BasicBlock to contain the needed
+/// PHINodes to be used as outputs.
+static BasicBlock *findOrCreatePHIBlock(OutlinableGroup &Group, Value *RetVal) {
+  DenseMap<Value *, BasicBlock *>::iterator PhiBlockForRetVal,
+      ReturnBlockForRetVal;
+  PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
+  ReturnBlockForRetVal = Group.EndBBs.find(RetVal);
+  assert(ReturnBlockForRetVal != Group.EndBBs.end() &&
+         "Could not find output value!");
+  BasicBlock *ReturnBB = ReturnBlockForRetVal->second;
+
+  // Find if a PHIBlock exists for this return value already.  If it is
+  // the first time we are analyzing this, we will not, so we record it.
+  PhiBlockForRetVal = Group.PHIBlocks.find(RetVal);
+  if (PhiBlockForRetVal != Group.PHIBlocks.end())
+    return PhiBlockForRetVal->second;
+  
+  // If we did not find a block, we create one, and insert it into the
+  // overall function and record it.
+  bool Inserted = false;
+  BasicBlock *PHIBlock = BasicBlock::Create(ReturnBB->getContext(), "phi_block",
+                                            ReturnBB->getParent());
+  std::tie(PhiBlockForRetVal, Inserted) =
+      Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
+
+  // We find the predecessors of the return block in the newly created outlined
+  // function in order to point them to the new PHIBlock rather than the already
+  // existing return block.
+  SmallVector<BranchInst *, 2> BranchesToChange;
+  for (BasicBlock *Pred : predecessors(ReturnBB))
+    BranchesToChange.push_back(cast<BranchInst>(Pred->getTerminator()));
+
+  // Now we mark the branch instructions found, and change the references of the
+  // return block to the newly created PHIBlock.
+  for (BranchInst *BI : BranchesToChange)
+    for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ < End; Succ++) {
+      if (BI->getSuccessor(Succ) != ReturnBB)
+        continue;
+      BI->setSuccessor(Succ, PHIBlock);
+    }
+
+  BranchInst::Create(ReturnBB, PHIBlock);
+
+  return PhiBlockForRetVal->second;
+}
+
+/// For the function call now representing the \p Region, find the passed value
+/// to that call that represents Argument \p A at the call location if the
+/// call has already been replaced with a call to the  overall, aggregate
+/// function.
+///
+/// \param A - The Argument to get the passed value for.
+/// \param Region - The extracted Region corresponding to the outlined function.
+/// \returns The Value representing \p A at the call site.
+static Value *
+getPassedArgumentInAlreadyOutlinedFunction(const Argument *A,
+                                           const OutlinableRegion &Region) {
+  // If we don't need to adjust the argument number at all (since the call
+  // has already been replaced by a call to the overall outlined function)
+  // we can just get the specified argument.
+  return Region.Call->getArgOperand(A->getArgNo());
+}
+
+/// For the function call now representing the \p Region, find the passed value
+/// to that call that represents Argument \p A at the call location if the
+/// call has only been replaced by the call to the aggregate function.
+///
+/// \param A - The Argument to get the passed value for.
+/// \param Region - The extracted Region corresponding to the outlined function.
+/// \returns The Value representing \p A at the call site.
+static Value *
+getPassedArgumentAndAdjustArgumentLocation(const Argument *A,
+                                           const OutlinableRegion &Region) {
+  unsigned ArgNum = A->getArgNo();
+  
+  // If it is a constant, we can look at our mapping from when we created
+  // the outputs to figure out what the constant value is.
+  if (Region.AggArgToConstant.count(ArgNum))
+    return Region.AggArgToConstant.find(ArgNum)->second;
+  
+  // If it is not a constant, and we are not looking at the overall function, we
+  // need to adjust which argument we are looking at.
+  ArgNum = Region.AggArgToExtracted.find(ArgNum)->second;
+  return Region.Call->getArgOperand(ArgNum);
+}
+
+/// Find the canonical numbering for the incoming Values into the PHINode \p PN.
+///
+/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
+/// \param Region [in] - The OutlinableRegion containing \p PN.
+/// \param OutputMappings [in] - The mapping of output values from outlined
+/// region to their original values.
+/// \param CanonNums [out] - The canonical numbering for the incoming values to
+/// \p PN.
+/// \param ReplacedWithOutlinedCall - A flag to use the extracted function call
+/// of \p Region rather than the overall function's call.
+static void
+findCanonNumsForPHI(PHINode *PN, OutlinableRegion &Region,
+                    const DenseMap<Value *, Value *> &OutputMappings,
+                    DenseSet<unsigned> &CanonNums,
+                    bool ReplacedWithOutlinedCall = true) {
+  // Iterate over the incoming values.
+  for (unsigned Idx = 0, EIdx = PN->getNumIncomingValues(); Idx < EIdx; Idx++) {
+    Value *IVal = PN->getIncomingValue(Idx);
+    // If we have an argument as incoming value, we need to grab the passed
+    // value from the call itself.
+    if (Argument *A = dyn_cast<Argument>(IVal)) {
+      if (ReplacedWithOutlinedCall)
+        IVal = getPassedArgumentInAlreadyOutlinedFunction(A, Region);
+      else
+        IVal = getPassedArgumentAndAdjustArgumentLocation(A, Region);
+    }
+
+    // Get the original value if it has been replaced by an output value.
+    IVal = findOutputMapping(OutputMappings, IVal);
+
+    // Find and add the canonical number for the incoming value.
+    Optional<unsigned> GVN = Region.Candidate->getGVN(IVal);
+    assert(GVN.hasValue() && "No GVN for incoming value");
+    Optional<unsigned> CanonNum = Region.Candidate->getCanonicalNum(*GVN);
+    assert(CanonNum.hasValue() && "No Canonical Number for GVN");
+    CanonNums.insert(*CanonNum);
+  }
+}
+
+/// Find, or add PHINode \p PN to the combined PHINode Block \p OverallPHIBlock
+/// in order to condense the number of instructions added to the outlined
+/// function.
+///
+/// \param PN [in] - The PHINode that we are finding the canonical numbers for.
+/// \param Region [in] - The OutlinableRegion containing \p PN. 
+/// \param OverallPhiBlock [in] - The overall PHIBlock we are trying to find
+/// \p PN in.
+/// \param OutputMappings [in] - The mapping of output values from outlined
+/// region to their original values.
+/// \return the newly found or created PHINode in \p OverallPhiBlock.
+static PHINode*
+findOrCreatePHIInBlock(PHINode &PN, OutlinableRegion &Region,
+                       BasicBlock *OverallPhiBlock,
+                       const DenseMap<Value *, Value *> &OutputMappings) {
+  OutlinableGroup &Group = *Region.Parent;
+  
+  DenseSet<unsigned> PNCanonNums;
+  // We have to use the extracted function since we have merged this region into
+  // the overall function yet.  We make sure to reassign the argument numbering
+  // since it is possible that the argument ordering is different between the
+  // functions.
+  findCanonNumsForPHI(&PN, Region, OutputMappings, PNCanonNums,
+                      /* ReplacedWithOutlinedCall = */ false);
+
+  OutlinableRegion *FirstRegion = Group.Regions[0];
+  DenseSet<unsigned> CurrentCanonNums;
+  // Find the Canonical Numbering for each PHINode, if it matches, we replace
+  // the uses of the PHINode we are searching for, with the found PHINode.
+  for (PHINode &CurrPN : OverallPhiBlock->phis()) {
+    CurrentCanonNums.clear();
+    findCanonNumsForPHI(&CurrPN, *FirstRegion, OutputMappings, CurrentCanonNums,
+                        /* ReplacedWithOutlinedCall = */ true);
+
+    if (all_of(PNCanonNums, [&CurrentCanonNums](unsigned CanonNum) {
+          return CurrentCanonNums.contains(CanonNum);
+        }))
+      return &CurrPN;
+  }
+
+  // If we've made it here, it means we weren't able to replace the PHINode, so
+  // we must insert it ourselves.
+  PHINode *NewPN = cast<PHINode>(PN.clone());
+  NewPN->insertBefore(&*OverallPhiBlock->begin());
+  for (unsigned Idx = 0, Edx = NewPN->getNumIncomingValues(); Idx < Edx;
+       Idx++) {
+    Value *IncomingVal = NewPN->getIncomingValue(Idx);
+    BasicBlock *IncomingBlock = NewPN->getIncomingBlock(Idx);
+
+    // Find corresponding basic block in the overall function for the incoming
+    // block.
+    Instruction *FirstNonPHI = IncomingBlock->getFirstNonPHI();
+    assert(FirstNonPHI && "Incoming block is empty?");
+    Value *CorrespondingVal =
+        Region.findCorrespondingValueIn(*FirstRegion, FirstNonPHI);
+    assert(CorrespondingVal && "Value is nullptr?");
+    BasicBlock *BlockToUse = cast<Instruction>(CorrespondingVal)->getParent();
+    NewPN->setIncomingBlock(Idx, BlockToUse);
+
+    // If we have an argument we make sure we replace using the argument from
+    // the correct function.
+    if (Argument *A = dyn_cast<Argument>(IncomingVal)) {
+      Value *Val = Group.OutlinedFunction->getArg(A->getArgNo());
+      NewPN->setIncomingValue(Idx, Val);
+      continue;
+    }
+    
+    // Find the corresponding value in the overall function.
+    IncomingVal = findOutputMapping(OutputMappings, IncomingVal);
+    Value *Val = Region.findCorrespondingValueIn(*FirstRegion, IncomingVal);
+    assert(Val && "Value is nullptr?");
+    NewPN->setIncomingValue(Idx, Val);
+  }
+  return NewPN;
+}
+
 // Within an extracted function, replace the argument uses of the extracted
 // region with the arguments of the function for an OutlinableGroup.
 //
@@ -1077,6 +1546,7 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
 static void
 replaceArgumentUses(OutlinableRegion &Region,
                     DenseMap<Value *, BasicBlock *> &OutputBBs,
+                    const DenseMap<Value *, Value *> &OutputMappings,
                     bool FirstFunction = false) {
   OutlinableGroup &Group = *Region.Parent;
   assert(Region.ExtractedFunction && "Region has no extracted function?");
@@ -1146,12 +1616,46 @@ replaceArgumentUses(OutlinableRegion &Region,
       LLVM_DEBUG(dbgs() << "Move store for instruction " << *I << " to "
                         << *OutputBB << "\n");
 
-      if (FirstFunction)
+      // If this is storing a PHINode, we must make sure it is included in the
+      // overall function.
+      if (!isa<PHINode>(ValueOperand)) {
+        if (FirstFunction)
+          continue;
+        Value *CorrVal =
+            Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
+        assert(CorrVal && "Value is nullptr?");
+        NewI->setOperand(0, CorrVal);
+        continue;
+      }
+      PHINode *PN = cast<PHINode>(SI->getValueOperand());
+      // If it has a value, it was not split by the code extractor, which
+      // is what we are looking for.
+      if (Region.Candidate->getGVN(PN).hasValue())
         continue;
-      Value *CorrVal =
-          Region.findCorrespondingValueIn(*Group.Regions[0], ValueOperand);
-      assert(CorrVal && "Value is nullptr?");
-      NewI->setOperand(0, CorrVal);
+
+      // We record the parent block for the PHINode in the Region so that
+      // we can exclude it from checks later on.
+      Region.PHIBlocks.insert(std::make_pair(RetVal, PN->getParent()));
+
+      // If this is the first function, we do not need to worry about mergiing
+      // this with any other block in the overall outlined function, so we can
+      // just continue.
+      if (FirstFunction) {
+        BasicBlock *PHIBlock = PN->getParent();
+        Group.PHIBlocks.insert(std::make_pair(RetVal, PHIBlock));
+        continue;
+      }
+
+      // We look for the aggregate block that contains the PHINodes leading into
+      // this exit path. If we can't find one, we create one.
+      BasicBlock *OverallPhiBlock = findOrCreatePHIBlock(Group, RetVal);
+
+      // For our PHINode, we find the combined canonical numbering, and
+      // attempt to find a matching PHINode in the overall PHIBlock.  If we
+      // cannot, we copy the PHINode and move it into this new block.
+      PHINode *NewPN =
+          findOrCreatePHIInBlock(*PN, Region, OverallPhiBlock, OutputMappings);
+      NewI->setOperand(0, NewPN);
     }
 
     // If we added an edge for basic blocks without a predecessor, we remove it
@@ -1392,7 +1896,12 @@ void createSwitchStatement(
     Module &M, OutlinableGroup &OG, DenseMap<Value *, BasicBlock *> &EndBBs,
     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs) {
   // We only need the switch statement if there is more than one store
-  // combination.
+  // combination, or there is more than one set of output blocks.  The first
+  // will occur when we store different sets of values for two different
+  // regions.  The second will occur when we have two outputs that are combined
+  // in a PHINode outside of the region in one outlined instance, and are used
+  // seaparately in another. This will create the same set of OutputGVNs, but
+  // will generate two different output schemes.
   if (OG.OutputGVNCombinations.size() > 1) {
     Function *AggFunc = OG.OutlinedFunction;
     // Create a final block for each different return block.
@@ -1435,8 +1944,14 @@ void createSwitchStatement(
     return;
   }
 
+  assert(OutputStoreBBs.size() < 2 && "Different store sets not handled!");
+
   // If there needs to be stores, move them from the output blocks to their
-  // corresponding ending block.
+  // corresponding ending block.  We do not check that the OutputGVNCombinations
+  // is equal to 1 here since that could just been the case where there are 0
+  // outputs. Instead, we check whether there is more than one set of output
+  // blocks since this is the only case where we would have to move the
+  // stores, and erase the extraneous blocks.
   if (OutputStoreBBs.size() == 1) {
     LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
                       << *OG.OutlinedFunction << "\n");
@@ -1468,10 +1983,13 @@ void createSwitchStatement(
 /// set of stores needed for the different functions.
 /// \param [in,out] FuncsToRemove - Extracted functions to erase from module
 /// once outlining is complete.
+/// \param [in] OutputMappings - Extracted functions to erase from module
+/// once outlining is complete.
 static void fillOverallFunction(
     Module &M, OutlinableGroup &CurrentGroup,
     std::vector<DenseMap<Value *, BasicBlock *>> &OutputStoreBBs,
-    std::vector<Function *> &FuncsToRemove) {
+    std::vector<Function *> &FuncsToRemove,
+    const DenseMap<Value *, Value *> &OutputMappings) {
   OutlinableRegion *CurrentOS = CurrentGroup.Regions[0];
 
   // Move first extracted function's instructions into new function.
@@ -1491,7 +2009,7 @@ static void fillOverallFunction(
                              CurrentGroup.OutlinedFunction, "output_block_0");
   CurrentOS->OutputBlockNum = 0;
 
-  replaceArgumentUses(*CurrentOS, NewBBs, true);
+  replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings, true);
   replaceConstants(*CurrentOS);
 
   // We first identify if any output blocks are empty, if they are we remove
@@ -1525,7 +2043,8 @@ void IROutliner::deduplicateExtractedSections(
 
   OutlinableRegion *CurrentOS;
 
-  fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove);
+  fillOverallFunction(M, CurrentGroup, OutputStoreBBs, FuncsToRemove,
+                      OutputMappings);
 
   std::vector<Value *> SortedKeys;
   for (unsigned Idx = 1; Idx < CurrentGroup.Regions.size(); Idx++) {
@@ -1539,8 +2058,7 @@ void IROutliner::deduplicateExtractedSections(
     createAndInsertBasicBlocks(
         CurrentGroup.EndBBs, NewBBs, CurrentGroup.OutlinedFunction,
         "output_block_" + Twine(static_cast<unsigned>(Idx)));
-
-    replaceArgumentUses(*CurrentOS, NewBBs);
+    replaceArgumentUses(*CurrentOS, NewBBs, OutputMappings);
     alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBBs,
                                 CurrentGroup.EndBBs, OutputMappings,
                                 OutputStoreBBs);
@@ -1708,6 +2226,34 @@ IROutliner::findBenefitFromAllRegions(OutlinableGroup &CurrentGroup) {
   return RegionBenefit;
 }
 
+/// For the \p OutputCanon number passed in find the value represented by this
+/// canonical number. If it is from a PHINode, we pick the first incoming
+/// value and return that Value instead.
+///
+/// \param Region - The OutlinableRegion to get the Value from.
+/// \param OutputCanon - The canonical number to find the Value from.
+/// \returns The Value represented by a canonical number \p OutputCanon in \p
+/// Region.
+static Value *findOutputValueInRegion(OutlinableRegion &Region,
+                                      unsigned OutputCanon) {
+  OutlinableGroup &CurrentGroup = *Region.Parent;
+  // If the value is greater than the value in the tracker, we have a
+  // PHINode and will instead use one of the incoming values to find the
+  // type.
+  if (OutputCanon > CurrentGroup.PHINodeGVNTracker) {
+    auto It = CurrentGroup.PHINodeGVNToGVNs.find(OutputCanon);
+    assert(It != CurrentGroup.PHINodeGVNToGVNs.end() &&
+           "Could not find GVN set for PHINode number!");
+    assert(It->second.second.size() > 0 && "PHINode does not have any values!");
+    OutputCanon = *It->second.second.begin();
+  }
+  Optional<unsigned> OGVN = Region.Candidate->fromCanonicalNum(OutputCanon);
+  assert(OGVN.hasValue() && "Could not find GVN for Canonical Number?");
+  Optional<Value *> OV = Region.Candidate->fromGVN(*OGVN);
+  assert(OV.hasValue() && "Could not find value for GVN?");
+  return *OV;
+}
+
 InstructionCost
 IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
   InstructionCost OverallCost = 0;
@@ -1715,10 +2261,8 @@ IROutliner::findCostOutputReloads(OutlinableGroup &CurrentGroup) {
     TargetTransformInfo &TTI = getTTI(*Region->StartBB->getParent());
 
     // Each output incurs a load after the call, so we add that to the cost.
-    for (unsigned OutputGVN : Region->GVNStores) {
-      Optional<Value *> OV = Region->Candidate->fromGVN(OutputGVN);
-      assert(OV.hasValue() && "Could not find value for GVN?");
-      Value *V = OV.getValue();
+    for (unsigned OutputCanon : Region->GVNStores) {
+      Value *V = findOutputValueInRegion(*Region, OutputCanon);
       InstructionCost LoadCost =
           TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
                               TargetTransformInfo::TCK_CodeSize);
@@ -1747,6 +2291,7 @@ static InstructionCost findCostForOutputBlocks(Module &M,
   InstructionCost OutputCost = 0;
   unsigned NumOutputBranches = 0;
 
+  OutlinableRegion &FirstRegion = *CurrentGroup.Regions[0];
   IRSimilarityCandidate &Candidate = *CurrentGroup.Regions[0]->Candidate;
   DenseSet<BasicBlock *> CandidateBlocks;
   Candidate.getBasicBlocks(CandidateBlocks);
@@ -1772,10 +2317,8 @@ static InstructionCost findCostForOutputBlocks(Module &M,
 
   for (const ArrayRef<unsigned> &OutputUse :
        CurrentGroup.OutputGVNCombinations) {
-    for (unsigned GVN : OutputUse) {
-      Optional<Value *> OV = Candidate.fromGVN(GVN);
-      assert(OV.hasValue() && "Could not find value for GVN?");
-      Value *V = OV.getValue();
+    for (unsigned OutputCanon : OutputUse) {
+      Value *V = findOutputValueInRegion(FirstRegion, OutputCanon);
       InstructionCost StoreCost =
           TTI.getMemoryOpCost(Instruction::Load, V->getType(), Align(1), 0,
                               TargetTransformInfo::TCK_CodeSize);
@@ -2035,8 +2578,8 @@ unsigned IROutliner::doOutline(Module &M) {
         continue;
 
       SmallVector<BasicBlock *> BE;
-      DenseSet<BasicBlock *> BBSet;
-      OS->Candidate->getBasicBlocks(BBSet, BE);
+      DenseSet<BasicBlock *> BlocksInRegion;
+      OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
       OS->CE = new (ExtractorAllocator.Allocate())
           CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
                         false, "outlined");
@@ -2146,8 +2689,8 @@ unsigned IROutliner::doOutline(Module &M) {
     OutlinedRegions.clear();
     for (OutlinableRegion *OS : CurrentGroup.Regions) {
       SmallVector<BasicBlock *> BE;
-      DenseSet<BasicBlock *> BBSet;
-      OS->Candidate->getBasicBlocks(BBSet, BE);
+      DenseSet<BasicBlock *> BlocksInRegion;
+      OS->Candidate->getBasicBlocks(BlocksInRegion, BE);
       OS->CE = new (ExtractorAllocator.Allocate())
           CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
                         false, "outlined");
diff --git a/llvm/test/Transforms/IROutliner/gvn-output-set-overload.ll b/llvm/test/Transforms/IROutliner/gvn-output-set-overload.ll
new file mode 100644
index 0000000000000..494bb89008507
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/gvn-output-set-overload.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do differentiate between outputs of the region stored in PHINodes
+; versus those stored outside of PHINodes.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  ret void
+next:
+  %1 = add i32 %c, 1
+  %2 = add i32 %e, 1
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32* null, i32 0)
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[C_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[LT_CAST1:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[C_LOC]], i32* [[E_LOC]], i32 1)
+; CHECK-NEXT:    [[C_RELOAD:%.*]] = load i32, i32* [[C_LOC]], align 4
+; CHECK-NEXT:    [[E_RELOAD:%.*]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[C_RELOAD]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[E_RELOAD]], 1
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT:%.*]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT]], label [[NEXT_EXITSTUB:%.*]]
+; CHECK:       first.split:
+; CHECK-NEXT:    [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP3:%.*]], label [[FINAL_BLOCK_1:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_0_1:%.*]]
+; CHECK-NEXT:    i32 1, label [[OUTPUT_BLOCK_1_1:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       next.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP3]], label [[FINAL_BLOCK_0:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_1_0:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       output_block_0_1:
+; CHECK-NEXT:    store i32 [[DOTCE]], i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_1]]
+; CHECK:       output_block_1_0:
+; CHECK-NEXT:    store i32 [[C]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_0]]
+; CHECK:       output_block_1_1:
+; CHECK-NEXT:    store i32 [[C]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_1]]
+; CHECK:       final_block_0:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       final_block_1:
+; CHECK-NEXT:    ret i1 true
+;
diff --git a/llvm/test/Transforms/IROutliner/mismatched-phi-exits-not-in-first-outlined.ll b/llvm/test/Transforms/IROutliner/mismatched-phi-exits-not-in-first-outlined.ll
new file mode 100644
index 0000000000000..ef003bad6c0c6
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/mismatched-phi-exits-not-in-first-outlined.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to extract blocks that contain PHINodes, and selectively
+; store into it's respective block, creating a new block if needed.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* null, i32 -1)
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32 0)
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[PHI_BLOCK:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[PHI_BLOCK]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP2:%.*]], label [[FINAL_BLOCK_0:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_1_0:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       output_block_1_0:
+; CHECK-NEXT:    store i32 [[TMP3:%.*]], i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_0]]
+; CHECK:       phi_block:
+; CHECK-NEXT:    [[TMP3]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       final_block_0:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/mismatched-phi-exits.ll b/llvm/test/Transforms/IROutliner/mismatched-phi-exits.ll
new file mode 100644
index 0000000000000..e51c281bab0c9
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/mismatched-phi-exits.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to extract blocks that contain PHINodes, and selectively
+; store into it's respective block, only using if needed.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32 0)
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* null, i32 -1)
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal void  @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST_SPLIT:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST_SPLIT]]
+; CHECK:       first.split:
+; CHECK-NEXT:    [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP2:%.*]], label [[FINAL_BLOCK_0:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_0_0:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       output_block_0_0:
+; CHECK-NEXT:    store i32 [[DOTCE]], i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_0]]
+; CHECK:       final_block_0:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/mismatched-phi-outputs-ordering.ll b/llvm/test/Transforms/IROutliner/mismatched-phi-outputs-ordering.ll
new file mode 100644
index 0000000000000..4f81747d04bb6
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/mismatched-phi-outputs-ordering.ll
@@ -0,0 +1,150 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do not extract similar regions that would involve the splitting
+; of phi nodes on exit.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  ret void
+next:
+  %2 = add i32 %d, 1
+  %3 = add i32 %e, 1
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  ret void
+next:
+  %1 = add i32 %d, 1
+  %2 = add i32 %e, 1
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[D_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[LT_CAST1:%.*]] = bitcast i32* [[D_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    [[LT_CAST2:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST2]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[E_LOC]], i32* [[D_LOC]], i32* [[DOTCE_LOC]], i32 0)
+; CHECK-NEXT:    [[E_RELOAD:%.*]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    [[D_RELOAD:%.*]] = load i32, i32* [[D_LOC]], align 4
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST2]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    call void @outlined_ir_func_1(i32 [[D_RELOAD]], i32 [[E_RELOAD]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[D_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[LT_CAST1:%.*]] = bitcast i32* [[D_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[E_LOC]], i32* [[D_LOC]], i32* null, i32 1)
+; CHECK-NEXT:    [[E_RELOAD:%.*]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    [[D_RELOAD:%.*]] = load i32, i32* [[D_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    call void @outlined_ir_func_1(i32 [[D_RELOAD]], i32 [[E_RELOAD]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT:%.*]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT]], label [[NEXT_EXITSTUB:%.*]]
+; CHECK:       first.split:
+; CHECK-NEXT:    [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP4:%.*]], label [[FINAL_BLOCK_1:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_0_1:%.*]]
+; CHECK-NEXT:    i32 1, label [[OUTPUT_BLOCK_1_1:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       next.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP4]], label [[FINAL_BLOCK_0:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_0_0:%.*]]
+; CHECK-NEXT:    i32 1, label [[OUTPUT_BLOCK_1_0:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       output_block_0_0:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    store i32 [[D]], i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_0]]
+; CHECK:       output_block_0_1:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    store i32 [[DOTCE]], i32* [[TMP3:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_1]]
+; CHECK:       output_block_1_0:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    store i32 [[D]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_0]]
+; CHECK:       output_block_1_1:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_1]]
+; CHECK:       final_block_0:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       final_block_1:
+; CHECK-NEXT:    ret i1 true
+;
+;
+; CHECK-LABEL: @outlined_ir_func_1(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[NEXT_TO_OUTLINE:%.*]]
+; CHECK:       next_to_outline:
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP0:%.*]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1:%.*]], 1
+; CHECK-NEXT:    br label [[NEXT_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       next_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
new file mode 100644
index 0000000000000..4e777e862543e
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
@@ -0,0 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Here we have multiple exits, but the different sources, same outputs are
+; needed, this checks that they are compressed, and moved into the appropriate
+; output blocks.
+
+define void @outline_outputs1() #0 {
+entry:
+  %output = alloca i32, align 4
+  %result = alloca i32, align 4
+  %output2 = alloca i32, align 4
+  %result2 = alloca i32, align 4
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  br label %block_2
+block_1:
+  %a2 = alloca i32, align 4
+  %b2 = alloca i32, align 4
+  br label %block_2
+block_2:
+  %a2val = load i32, i32* %a
+  %b2val = load i32, i32* %b
+  %add2 = add i32 2, %a2val
+  %mul2 = mul i32 2, %b2val
+  br label %block_5
+block_3:
+  %aval = load i32, i32* %a
+  %bval = load i32, i32* %b
+  %add = add i32 2, %aval
+  %mul = mul i32 2, %bval
+  br label %block_4
+block_4:
+  store i32 %add, i32* %output, align 4
+  store i32 %mul, i32* %result, align 4
+  br label %block_6
+block_5:
+  store i32 %add2, i32* %output, align 4
+  store i32 %mul2, i32* %result, align 4
+  br label %block_6
+block_6:
+  %diff = phi i32 [%aval, %block_4], [%a2val, %block_5]
+  ret void
+}
+
+define void @outline_outputs2() #0 {
+entry:
+  %output = alloca i32, align 4
+  %result = alloca i32, align 4
+  %output2 = alloca i32, align 4
+  %result2 = alloca i32, align 4
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  br label %block_2
+block_1:
+  %a2 = alloca i32, align 4
+  %b2 = alloca i32, align 4
+  br label %block_2
+block_2:
+  %a2val = load i32, i32* %a
+  %b2val = load i32, i32* %b
+  %add2 = add i32 2, %a2val
+  %mul2 = mul i32 2, %b2val
+  br label %block_5
+block_3:
+  %aval = load i32, i32* %a
+  %bval = load i32, i32* %b
+  %add = add i32 2, %aval
+  %mul = mul i32 2, %bval
+  br label %block_4
+block_4:
+  store i32 %add, i32* %output, align 4
+  store i32 %mul, i32* %result, align 4
+  br label %block_6
+block_5:
+  store i32 %add2, i32* %output, align 4
+  store i32 %mul2, i32* %result, align 4
+  br label %block_6
+block_6:
+  %diff = phi i32 [%aval, %block_4], [%a2val, %block_5]
+  ret void
+}
+
+; CHECK-LABEL: @outline_outputs1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DIFF_CE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[OUTPUT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[OUTPUT2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[RESULT2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[BLOCK_2:%.*]]
+; CHECK:       block_1:
+; CHECK-NEXT:    [[A2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[BLOCK_2]]
+; CHECK:       block_2:
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DIFF_CE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[DIFF_CE_LOC]])
+; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
+; CHECK:       block_6:
+; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @outline_outputs2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DIFF_CE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[OUTPUT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[OUTPUT2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[RESULT2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[BLOCK_2:%.*]]
+; CHECK:       block_1:
+; CHECK-NEXT:    [[A2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[BLOCK_2]]
+; CHECK:       block_2:
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DIFF_CE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[DIFF_CE_LOC]])
+; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
+; CHECK:       block_6:
+; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK:  define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[BLOCK_2_TO_OUTLINE:%.*]]
+; CHECK:       block_2_to_outline:
+; CHECK-NEXT:    [[A2VAL:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[B2VAL:%.*]] = load i32, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    [[ADD2:%.*]] = add i32 2, [[A2VAL]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul i32 2, [[B2VAL]]
+; CHECK-NEXT:    br label [[BLOCK_5:%.*]]
+; CHECK:       block_3:
+; CHECK-NEXT:    [[AVAL:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[BVAL:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 2, [[AVAL]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 2, [[BVAL]]
+; CHECK-NEXT:    br label [[BLOCK_4:%.*]]
+; CHECK:       block_4:
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    store i32 [[MUL]], i32* [[TMP3:%.*]], align 4
+; CHECK-NEXT:    br label [[BLOCK_6_SPLIT:%.*]]
+; CHECK:       block_5:
+; CHECK-NEXT:    store i32 [[ADD2]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    store i32 [[MUL2]], i32* [[TMP3]], align 4
+; CHECK-NEXT:    br label [[BLOCK_6_SPLIT]]
+; CHECK:       block_6.split:
+; CHECK-NEXT:    [[DIFF_CE:%.*]] = phi i32 [ [[AVAL]], [[BLOCK_4]] ], [ [[A2VAL]], [[BLOCK_5]] ]
+; CHECK-NEXT:    br label [[BLOCK_6_EXITSTUB:%.*]]
+; CHECK:       block_6.exitStub:
+; CHECK-NEXT:    store i32 [[DIFF_CE]], i32* [[TMP4:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
index 00c99488e231d..77cb4a5bc476f 100644
--- a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
@@ -37,42 +37,50 @@ first:
 }
 ; CHECK-LABEL: @function1(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    br label [[TEST1:%.*]]
-; CHECK:       test1:
-; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]])
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
-; CHECK:       test:
-; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]])
-; CHECK-NEXT:    br label [[FIRST]]
 ; CHECK:       first:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
 ;
 ;
 ; CHECK-LABEL: @function2(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    br label [[TEST1:%.*]]
-; CHECK:       test1:
-; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]])
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
-; CHECK:       test:
-; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]])
-; CHECK-NEXT:    br label [[FIRST]]
 ; CHECK:       first:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
 ;
 ;
-; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-LABEL: define internal void @outlined_ir_func_0(
 ; CHECK-NEXT:  newFuncRoot:
-; CHECK-NEXT:    br label [[TEST_TO_OUTLINE:%.*]]
-; CHECK:       test_to_outline:
-; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST_SPLIT:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST_SPLIT]]
+; CHECK:       first.split:
+; CHECK-NEXT:    [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
 ; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
 ; CHECK:       first.exitStub:
+; CHECK-NEXT:    store i32 [[DOTCE]], i32* [[TMP1:%.*]], align 4
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/IROutliner/phi-nodes-output-overload.ll b/llvm/test/Transforms/IROutliner/phi-nodes-output-overload.ll
new file mode 100644
index 0000000000000..ecc1c36d6b9e3
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/phi-nodes-output-overload.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do not extract similar regions that would involve the splitting
+; of phi nodes on exit.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  ret void
+next:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32 0)
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32* [[DOTCE_LOC]], i32 1)
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT:%.*]], label [[PHI_BLOCK:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT]], label [[PHI_BLOCK]]
+; CHECK:       first.split:
+; CHECK-NEXT:    [[DOTCE:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP2:%.*]], label [[FINAL_BLOCK_1:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_0_1:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       next.exitStub:
+; CHECK-NEXT:    switch i32 [[TMP2]], label [[FINAL_BLOCK_0:%.*]] [
+; CHECK-NEXT:    i32 0, label [[OUTPUT_BLOCK_1_0:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       output_block_0_1:
+; CHECK-NEXT:    store i32 [[DOTCE]], i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_1]]
+; CHECK:       output_block_1_0:
+; CHECK-NEXT:    store i32 [[TMP3:%.*]], i32* [[TMP1]], align 4
+; CHECK-NEXT:    br label [[FINAL_BLOCK_0]]
+; CHECK:       phi_block:
+; CHECK-NEXT:    [[TMP3]] = phi i32 [ [[C]], [[TEST]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    br label [[NEXT_EXITSTUB:%.*]]
+; CHECK:       final_block_0:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       final_block_1:
+; CHECK-NEXT:    ret i1 true
+;
diff --git a/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
new file mode 100644
index 0000000000000..258bbfe131e3e
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to propogate inputs to the region into the split PHINode
+; outside of the region if necessary.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = add i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = mul i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[DOTCE_LOC]])
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[DOTCE_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[DOTCE_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[DOTCE_LOC]])
+; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST_SPLIT:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]]
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST_SPLIT]], label [[NEXT_EXITSTUB:%.*]]
+; CHECK:       first.split:
+; CHECK-NEXT:    [[DOTCE:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[TMP1]], [[ENTRY_TO_OUTLINE]] ]
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    store i32 [[DOTCE]], i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    ret i1 true
+; CHECK:       next.exitStub:
+; CHECK-NEXT:    ret i1 false
+;

From d7e183b225ecddeef2a28a59c1addb8e1825ffc6 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 18 Nov 2021 14:25:23 -0800
Subject: [PATCH 579/946] [lldb] Use new dyld SPIs to query the shared cache
 local symbols

rdar://85492172
---
 .../ObjectFile/Mach-O/ObjectFileMachO.cpp     | 311 ++++++------------
 1 file changed, 107 insertions(+), 204 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 20383d9646fdc..2e712cded5308 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -6,8 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringRef.h"
 
+#include <mach/mach_init.h>
+#include <mach/vm_map.h>
+#include <lldb/Host/SafeMachO.h>
+
 #include "Plugins/Process/Utility/RegisterContextDarwin_arm.h"
 #include "Plugins/Process/Utility/RegisterContextDarwin_arm64.h"
 #include "Plugins/Process/Utility/RegisterContextDarwin_i386.h"
@@ -155,28 +160,6 @@ struct lldb_copy_dyld_cache_header_v1 {
                     // and later
 };
 
-struct lldb_copy_dyld_cache_mapping_info {
-  uint64_t address;
-  uint64_t size;
-  uint64_t fileOffset;
-  uint32_t maxProt;
-  uint32_t initProt;
-};
-
-struct lldb_copy_dyld_cache_local_symbols_info {
-  uint32_t nlistOffset;
-  uint32_t nlistCount;
-  uint32_t stringsOffset;
-  uint32_t stringsSize;
-  uint32_t entriesOffset;
-  uint32_t entriesCount;
-};
-struct lldb_copy_dyld_cache_local_symbols_entry {
-  uint32_t dylibOffset;
-  uint32_t nlistStartIndex;
-  uint32_t nlistCount;
-};
-
 static void PrintRegisterValue(RegisterContext *reg_ctx, const char *name,
                                const char *alt_name, size_t reg_byte_size,
                                Stream &data) {
@@ -2257,6 +2240,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
   llvm::StringRef g_objc_v2_prefix_class("_OBJC_CLASS_$_");
   llvm::StringRef g_objc_v2_prefix_metaclass("_OBJC_METACLASS_$_");
   llvm::StringRef g_objc_v2_prefix_ivar("_OBJC_IVAR_$_");
+  UUID image_uuid;
 
   for (i = 0; i < m_header.ncmds; ++i) {
     const lldb::offset_t cmd_offset = offset;
@@ -2324,6 +2308,14 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                sizeof(function_starts_load_command));
       break;
 
+    case LC_UUID: {
+      const uint8_t *uuid_bytes = m_data.PeekData(offset, 16);
+
+      if (uuid_bytes)
+        image_uuid = UUID::fromOptionalData(uuid_bytes, 16);
+      break;
+    }
+
     default:
       break;
     }
@@ -2615,8 +2607,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                                              ? eh_frame_section_sp->GetID()
                                              : static_cast<user_id_t>(NO_SECT);
 
-  lldb::offset_t nlist_data_offset = 0;
-
   uint32_t N_SO_index = UINT32_MAX;
 
   MachSymtabSectionInfo section_info(section_list);
@@ -2682,26 +2672,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
     // Next we need to determine the correct path for the dyld shared cache.
 
     ArchSpec header_arch = GetArchitecture();
-    char dsc_path[PATH_MAX];
-    char dsc_path_development[PATH_MAX];
-
-    snprintf(
-        dsc_path, sizeof(dsc_path), "%s%s%s",
-        "/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR
-                                                   */
-        "dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */
-        header_arch.GetArchitectureName());
-
-    snprintf(
-        dsc_path_development, sizeof(dsc_path), "%s%s%s%s",
-        "/System/Library/Caches/com.apple.dyld/", /* IPHONE_DYLD_SHARED_CACHE_DIR
-                                                   */
-        "dyld_shared_cache_", /* DYLD_SHARED_CACHE_BASE_NAME */
-        header_arch.GetArchitectureName(), ".development");
-
-    FileSpec dsc_nondevelopment_filespec(dsc_path);
-    FileSpec dsc_development_filespec(dsc_path_development);
-    FileSpec dsc_filespec;
 
     UUID dsc_uuid;
     UUID process_shared_cache_uuid;
@@ -2712,155 +2682,99 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                                 process_shared_cache_uuid);
     }
 
-    // First see if we can find an exact match for the inferior process
-    // shared cache UUID in the development or non-development shared caches
-    // on disk.
-    if (process_shared_cache_uuid.IsValid()) {
-      if (FileSystem::Instance().Exists(dsc_development_filespec)) {
-        UUID dsc_development_uuid = GetSharedCacheUUID(
-            dsc_development_filespec, byte_order, addr_byte_size);
-        if (dsc_development_uuid.IsValid() &&
-            dsc_development_uuid == process_shared_cache_uuid) {
-          dsc_filespec = dsc_development_filespec;
-          dsc_uuid = dsc_development_uuid;
-        }
-      }
-      if (!dsc_uuid.IsValid() &&
-          FileSystem::Instance().Exists(dsc_nondevelopment_filespec)) {
-        UUID dsc_nondevelopment_uuid = GetSharedCacheUUID(
-            dsc_nondevelopment_filespec, byte_order, addr_byte_size);
-        if (dsc_nondevelopment_uuid.IsValid() &&
-            dsc_nondevelopment_uuid == process_shared_cache_uuid) {
-          dsc_filespec = dsc_nondevelopment_filespec;
-          dsc_uuid = dsc_nondevelopment_uuid;
-        }
-      }
-    }
+    __block bool found_image = false;
+    __block void *nlist_buffer = nullptr;
+    __block unsigned nlist_count = 0;
+    __block char *string_table = nullptr;
+    __block vm_offset_t vm_nlist_memory = 0;
+    __block mach_msg_type_number_t vm_nlist_bytes_read = 0;
+    __block vm_offset_t vm_string_memory = 0;
+    __block mach_msg_type_number_t vm_string_bytes_read = 0;
+
+    auto _ = llvm::make_scope_exit(^{
+      if (vm_nlist_memory)
+        vm_deallocate(mach_task_self(), vm_nlist_memory, vm_nlist_bytes_read);
+      if (vm_string_memory)
+        vm_deallocate(mach_task_self(), vm_string_memory, vm_string_bytes_read);
+    });
 
-    // Failing a UUID match, prefer the development dyld_shared cache if both
-    // are present.
-    if (!FileSystem::Instance().Exists(dsc_filespec)) {
-      if (FileSystem::Instance().Exists(dsc_development_filespec)) {
-        dsc_filespec = dsc_development_filespec;
-      } else {
-        dsc_filespec = dsc_nondevelopment_filespec;
-      }
-    }
+    typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
+    typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
+    UndefinedNameToDescMap undefined_name_to_desc;
+    SymbolIndexToName reexport_shlib_needs_fixup;
 
-    /* The dyld_cache_header has a pointer to the
-       dyld_cache_local_symbols_info structure (localSymbolsOffset).
-       The dyld_cache_local_symbols_info structure gives us three things:
-         1. The start and count of the nlist records in the dyld_shared_cache
-       file
-         2. The start and size of the strings for these nlist records
-         3. The start and count of dyld_cache_local_symbols_entry entries
-
-       There is one dyld_cache_local_symbols_entry per dylib/framework in the
-       dyld shared cache.
-       The "dylibOffset" field is the Mach-O header of this dylib/framework in
-       the dyld shared cache.
-       The dyld_cache_local_symbols_entry also lists the start of this
-       dylib/framework's nlist records
-       and the count of how many nlist records there are for this
-       dylib/framework.
-    */
-
-    // Process the dyld shared cache header to find the unmapped symbols
-
-    DataBufferSP dsc_data_sp = MapFileData(
-        dsc_filespec, sizeof(struct lldb_copy_dyld_cache_header_v1), 0);
-    if (!dsc_uuid.IsValid()) {
-      dsc_uuid = GetSharedCacheUUID(dsc_filespec, byte_order, addr_byte_size);
-    }
-    if (dsc_data_sp) {
-      DataExtractor dsc_header_data(dsc_data_sp, byte_order, addr_byte_size);
+    dyld_for_each_installed_shared_cache(^(dyld_shared_cache_t shared_cache) {
+      uuid_t cache_uuid;
+      dyld_shared_cache_copy_uuid(shared_cache, &cache_uuid);
+      if (found_image)
+        return;
 
-      bool uuid_match = true;
-      if (dsc_uuid.IsValid() && process) {
         if (process_shared_cache_uuid.IsValid() &&
-            dsc_uuid != process_shared_cache_uuid) {
-          // The on-disk dyld_shared_cache file is not the same as the one in
-          // this process' memory, don't use it.
-          uuid_match = false;
-          ModuleSP module_sp(GetModule());
-          if (module_sp)
-            module_sp->ReportWarning("process shared cache does not match "
-                                     "on-disk dyld_shared_cache file, some "
-                                     "symbol names will be missing.");
-        }
-      }
+          process_shared_cache_uuid != UUID::fromOptionalData(&cache_uuid, 16))
+        return;
 
-      offset = offsetof(struct lldb_copy_dyld_cache_header_v1, mappingOffset);
-
-      uint32_t mappingOffset = dsc_header_data.GetU32(&offset);
-
-      // If the mappingOffset points to a location inside the header, we've
-      // opened an old dyld shared cache, and should not proceed further.
-      if (uuid_match &&
-          mappingOffset >= sizeof(struct lldb_copy_dyld_cache_header_v1)) {
-
-        DataBufferSP dsc_mapping_info_data_sp = MapFileData(
-            dsc_filespec, sizeof(struct lldb_copy_dyld_cache_mapping_info),
-            mappingOffset);
-
-        DataExtractor dsc_mapping_info_data(dsc_mapping_info_data_sp,
-                                            byte_order, addr_byte_size);
-        offset = 0;
-
-        // The File addresses (from the in-memory Mach-O load commands) for
-        // the shared libraries in the shared library cache need to be
-        // adjusted by an offset to match up with the dylibOffset identifying
-        // field in the dyld_cache_local_symbol_entry's.  This offset is
-        // recorded in mapping_offset_value.
-        const uint64_t mapping_offset_value =
-            dsc_mapping_info_data.GetU64(&offset);
-
-        offset =
-            offsetof(struct lldb_copy_dyld_cache_header_v1, localSymbolsOffset);
-        uint64_t localSymbolsOffset = dsc_header_data.GetU64(&offset);
-        uint64_t localSymbolsSize = dsc_header_data.GetU64(&offset);
-
-        if (localSymbolsOffset && localSymbolsSize) {
-          // Map the local symbols
-          DataBufferSP dsc_local_symbols_data_sp =
-              MapFileData(dsc_filespec, localSymbolsSize, localSymbolsOffset);
-
-          if (dsc_local_symbols_data_sp) {
-            DataExtractor dsc_local_symbols_data(dsc_local_symbols_data_sp,
-                                                 byte_order, addr_byte_size);
-
-            offset = 0;
-
-            typedef llvm::DenseMap<ConstString, uint16_t> UndefinedNameToDescMap;
-            typedef llvm::DenseMap<uint32_t, ConstString> SymbolIndexToName;
-            UndefinedNameToDescMap undefined_name_to_desc;
-            SymbolIndexToName reexport_shlib_needs_fixup;
-
-            // Read the local_symbols_infos struct in one shot
-            struct lldb_copy_dyld_cache_local_symbols_info local_symbols_info;
-            dsc_local_symbols_data.GetU32(&offset,
-                                          &local_symbols_info.nlistOffset, 6);
-
-            SectionSP text_section_sp(
-                section_list->FindSectionByName(GetSegmentNameTEXT()));
-
-            uint32_t header_file_offset =
-                (text_section_sp->GetFileAddress() - mapping_offset_value);
-
-            offset = local_symbols_info.entriesOffset;
-            for (uint32_t entry_index = 0;
-                 entry_index < local_symbols_info.entriesCount; entry_index++) {
-              struct lldb_copy_dyld_cache_local_symbols_entry
-                  local_symbols_entry;
-              local_symbols_entry.dylibOffset =
-                  dsc_local_symbols_data.GetU32(&offset);
-              local_symbols_entry.nlistStartIndex =
-                  dsc_local_symbols_data.GetU32(&offset);
-              local_symbols_entry.nlistCount =
-                  dsc_local_symbols_data.GetU32(&offset);
-
-              if (header_file_offset == local_symbols_entry.dylibOffset) {
-                unmapped_local_symbols_found = local_symbols_entry.nlistCount;
+      dyld_shared_cache_for_each_image(shared_cache, ^(dyld_image_t image) {
+        uuid_t dsc_image_uuid;
+        if (found_image)
+          return;
+
+        dyld_image_copy_uuid(image, &dsc_image_uuid);
+        if (image_uuid != UUID::fromOptionalData(dsc_image_uuid, 16))
+          return;
+
+        found_image = true;
+
+        // Compute the size of the string table. We need to ask dyld for a
+        // new SPI to avoid this step.
+        dyld_image_local_nlist_content_4Symbolication(
+            image, ^(const void *nlistStart, uint64_t nlistCount,
+                     const char *stringTable) {
+              if (!nlistStart || !nlistCount)
+                return;
+
+              // The buffers passed here are valid only inside the block.
+              // Use vm_read to make a cheap copy of them available for our
+              // processing later.
+              kern_return_t ret =
+                  vm_read(mach_task_self(), (vm_address_t)nlistStart,
+                          nlist_byte_size * nlistCount, &vm_nlist_memory,
+                          &vm_nlist_bytes_read);
+              if (ret != KERN_SUCCESS)
+                return;
+              assert(vm_nlist_bytes_read == nlist_byte_size * nlistCount);
+
+              // We don't know the size of the string table. It's cheaper
+              // to map the whol VM region than to determine the size by
+              // parsing all teh nlist entries.
+              vm_address_t string_address = (vm_address_t)stringTable;
+              vm_size_t region_size;
+              mach_msg_type_number_t info_count = VM_REGION_BASIC_INFO_COUNT_64;
+              vm_region_basic_info_data_t info;
+              memory_object_name_t object;
+              ret = vm_region_64(mach_task_self(), &string_address,
+                                 &region_size, VM_REGION_BASIC_INFO_64,
+                                 (vm_region_info_t)&info, &info_count, &object);
+              if (ret != KERN_SUCCESS)
+                return;
+
+              ret = vm_read(mach_task_self(), (vm_address_t)stringTable,
+                            region_size -
+                                ((vm_address_t)stringTable - string_address),
+                            &vm_string_memory, &vm_string_bytes_read);
+              if (ret != KERN_SUCCESS)
+                return;
+
+              nlist_buffer = (void *)vm_nlist_memory;
+              string_table = (char *)vm_string_memory;
+              nlist_count = nlistCount;
+            });
+      });
+    });
+    if (nlist_buffer) {
+      DataExtractor dsc_local_symbols_data(nlist_buffer,
+                                           nlist_count * nlist_byte_size,
+                                           byte_order, addr_byte_size);
+      unmapped_local_symbols_found = nlist_count;
 
                 // The normal nlist code cannot correctly size the Symbols
                 // array, we need to allocate it here.
@@ -2869,13 +2783,10 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                     unmapped_local_symbols_found - m_dysymtab.nlocalsym);
                 num_syms = symtab.GetNumSymbols();
 
-                nlist_data_offset =
-                    local_symbols_info.nlistOffset +
-                    (nlist_byte_size * local_symbols_entry.nlistStartIndex);
-                uint32_t string_table_offset = local_symbols_info.stringsOffset;
+      lldb::offset_t nlist_data_offset = 0;
 
                 for (uint32_t nlist_index = 0;
-                     nlist_index < local_symbols_entry.nlistCount;
+                     nlist_index < nlist_count;
                      nlist_index++) {
                   /////////////////////////////
                   {
@@ -2887,8 +2798,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                     struct nlist_64 nlist = *nlist_maybe;
 
                     SymbolType type = eSymbolTypeInvalid;
-                    const char *symbol_name = dsc_local_symbols_data.PeekCStr(
-                        string_table_offset + nlist.n_strx);
+          const char *symbol_name = string_table + nlist.n_strx;
 
                     if (symbol_name == NULL) {
                       // No symbol should be NULL, even the symbols with no
@@ -2898,7 +2808,7 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                           Host::eSystemLogError,
                           "error: DSC unmapped local symbol[%u] has invalid "
                           "string table offset 0x%x in %s, ignoring symbol\n",
-                          entry_index, nlist.n_strx,
+                          nlist_index, nlist.n_strx,
                           module_sp->GetFileSpec().GetPath().c_str());
                       continue;
                     }
@@ -3759,8 +3669,6 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
                   }
                   /////////////////////////////
                 }
-                break; // No more entries to consider
-              }
             }
 
             for (const auto &pos : reexport_shlib_needs_fixup) {
@@ -3774,14 +3682,9 @@ void ObjectFileMachO::ParseSymtab(Symtab &symtab) {
               }
             }
           }
-        }
-      }
-    }
-  }
 
-  // Must reset this in case it was mutated above!
-  nlist_data_offset = 0;
 #endif
+  lldb::offset_t nlist_data_offset = 0;
 
   if (nlist_data.GetByteSize() > 0) {
 

From 575c5d2a99eabf4a76ab6d81e25537804aa50c9d Mon Sep 17 00:00:00 2001
From: Steven Wan <wanyu9511@gmail.com>
Date: Tue, 25 Jan 2022 12:38:42 -0500
Subject: [PATCH 580/946] Disable Go binding test on AIX

Disable the Go binding test on AIX because building the binding on AIX with Clang is currently unsupported.

Reviewed By: ZarkoCA

Differential Revision: https://reviews.llvm.org/D117505
---
 llvm/test/Bindings/Go/go.test | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/test/Bindings/Go/go.test b/llvm/test/Bindings/Go/go.test
index 2d31c2790697a..52ed348df1252 100644
--- a/llvm/test/Bindings/Go/go.test
+++ b/llvm/test/Bindings/Go/go.test
@@ -1,4 +1,5 @@
 ; RUN: llvm-go test llvm.org/llvm/bindings/go/llvm
 
 ; REQUIRES: shell, default_triple
-; UNSUPPORTED: asan, ubsan, msan
+;; Building Go bindings with Clang is currently unsupported on AIX.
+; UNSUPPORTED: asan, ubsan, msan, -aix

From b089e4072a012dc0c8233cada37326f686ca2604 Mon Sep 17 00:00:00 2001
From: eopXD <eop.chen@sifive.com>
Date: Sat, 22 Jan 2022 00:46:36 -0800
Subject: [PATCH 581/946] [RISCV] Don't allow i64 vector div by constant to use
 mulh with Zve64x

EEW=64 of mulh and its vairants requires V extension.

Authored by: Craig Topper <craig.topper@sifive.com> @craig.topper

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117947
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  13 +-
 llvm/lib/Target/RISCV/RISCVSubtarget.h      |   1 +
 llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll  | 283 ++++++++++--------
 llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll | 242 ++++++++-------
 llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll  | 314 +++++++++++---------
 llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll | 274 +++++++++--------
 6 files changed, 628 insertions(+), 499 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9b427703764ea..483d0abc8ad76 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -614,6 +614,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::SMUL_LOHI, VT, Expand);
       setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 
+      // nxvXi64 MULHS/MULHU requires the V extension instead of Zve64*.
+      if (VT.getVectorElementType() == MVT::i64 && !Subtarget.hasStdExtV()) {
+        setOperationAction(ISD::MULHU, VT, Expand);
+        setOperationAction(ISD::MULHS, VT, Expand);
+      }
+
       setOperationAction(ISD::SMIN, VT, Legal);
       setOperationAction(ISD::SMAX, VT, Legal);
       setOperationAction(ISD::UMIN, VT, Legal);
@@ -910,8 +916,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         setOperationAction(ISD::UMAX, VT, Custom);
         setOperationAction(ISD::ABS,  VT, Custom);
 
-        setOperationAction(ISD::MULHS, VT, Custom);
-        setOperationAction(ISD::MULHU, VT, Custom);
+        // vXi64 MULHS/MULHU requires the V extension instead of Zve64*.
+        if (VT.getVectorElementType() != MVT::i64 || Subtarget.hasStdExtV()) {
+          setOperationAction(ISD::MULHS, VT, Custom);
+          setOperationAction(ISD::MULHU, VT, Custom);
+        }
 
         setOperationAction(ISD::SADDSAT, VT, Custom);
         setOperationAction(ISD::UADDSAT, VT, Custom);
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 8f32e88d57c07..044dda0a1ccc2 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -156,6 +156,7 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
   bool hasStdExtF() const { return HasStdExtF; }
   bool hasStdExtD() const { return HasStdExtD; }
   bool hasStdExtC() const { return HasStdExtC; }
+  bool hasStdExtV() const { return HasStdExtV; }
   bool hasStdExtZba() const { return HasStdExtZba; }
   bool hasStdExtZbb() const { return HasStdExtZbb; }
   bool hasStdExtZbc() const { return HasStdExtZbc; }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
index 16fe495b981cf..fc089d690863d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdiv-sdnode.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V
+; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V
+; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X
 
 define <vscale x 1 x i8> @vdiv_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vdiv_vv_nxv1i8:
@@ -895,38 +897,45 @@ define <vscale x 1 x i64> @vdiv_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
 }
 
 define <vscale x 1 x i64> @vdiv_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
-; RV32-LABEL: vdiv_vi_nxv1i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v9, (a0), zero
-; RV32-NEXT:    vmulh.vv v8, v8, v9
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v9, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdiv_vi_nxv1i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v9, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v8, v8, v9
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v9, v8, a0
+; RV32-V-NEXT:    vsra.vi v8, v8, 1
+; RV32-V-NEXT:    vadd.vv v8, v8, v9
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdiv_vi_nxv1i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI58_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI58_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v9, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdiv_vi_nxv1i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; ZVE64X-NEXT:    vdiv.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdiv_vi_nxv1i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI58_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI58_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; RV64-V-NEXT:    vmulh.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v9, v8, a0
+; RV64-V-NEXT:    vsra.vi v8, v8, 1
+; RV64-V-NEXT:    vadd.vv v8, v8, v9
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
   %vc = sdiv <vscale x 1 x i64> %va, %splat
@@ -969,38 +978,45 @@ define <vscale x 2 x i64> @vdiv_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
 }
 
 define <vscale x 2 x i64> @vdiv_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
-; RV32-LABEL: vdiv_vi_nxv2i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vmulh.vv v8, v8, v10
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v10, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdiv_vi_nxv2i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v10, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v8, v8, v10
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v10, v8, a0
+; RV32-V-NEXT:    vsra.vi v8, v8, 1
+; RV32-V-NEXT:    vadd.vv v8, v8, v10
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdiv_vi_nxv2i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI61_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI61_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v10, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdiv_vi_nxv2i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; ZVE64X-NEXT:    vdiv.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdiv_vi_nxv2i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI61_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI61_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; RV64-V-NEXT:    vmulh.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v10, v8, a0
+; RV64-V-NEXT:    vsra.vi v8, v8, 1
+; RV64-V-NEXT:    vadd.vv v8, v8, v10
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %vc = sdiv <vscale x 2 x i64> %va, %splat
@@ -1043,38 +1059,45 @@ define <vscale x 4 x i64> @vdiv_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
 }
 
 define <vscale x 4 x i64> @vdiv_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
-; RV32-LABEL: vdiv_vi_nxv4i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmulh.vv v8, v8, v12
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v12, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdiv_vi_nxv4i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v12, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v8, v8, v12
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v12, v8, a0
+; RV32-V-NEXT:    vsra.vi v8, v8, 1
+; RV32-V-NEXT:    vadd.vv v8, v8, v12
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdiv_vi_nxv4i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI64_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI64_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v12, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdiv_vi_nxv4i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; ZVE64X-NEXT:    vdiv.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdiv_vi_nxv4i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI64_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI64_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; RV64-V-NEXT:    vmulh.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v12, v8, a0
+; RV64-V-NEXT:    vsra.vi v8, v8, 1
+; RV64-V-NEXT:    vadd.vv v8, v8, v12
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
   %vc = sdiv <vscale x 4 x i64> %va, %splat
@@ -1117,41 +1140,47 @@ define <vscale x 8 x i64> @vdiv_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
 }
 
 define <vscale x 8 x i64> @vdiv_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
-; RV32-LABEL: vdiv_vi_nxv8i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmulh.vv v8, v8, v16
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v16, v8, a0
-; RV32-NEXT:    vsra.vi v8, v8, 1
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdiv_vi_nxv8i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v16, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v8, v8, v16
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v16, v8, a0
+; RV32-V-NEXT:    vsra.vi v8, v8, 1
+; RV32-V-NEXT:    vadd.vv v8, v8, v16
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdiv_vi_nxv8i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI67_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI67_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT:    vmulh.vx v8, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v16, v8, a0
-; RV64-NEXT:    vsra.vi v8, v8, 1
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdiv_vi_nxv8i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; ZVE64X-NEXT:    vdiv.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdiv_vi_nxv8i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI67_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI67_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-V-NEXT:    vmulh.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v16, v8, a0
+; RV64-V-NEXT:    vsra.vi v8, v8, 1
+; RV64-V-NEXT:    vadd.vv v8, v8, v16
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
   %vc = sdiv <vscale x 8 x i64> %va, %splat
   ret <vscale x 8 x i64> %vc
 }
-
diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
index 5bf80bbc34859..2635e14c7e06c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vdivu-sdnode.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V
+; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V
+; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X
 
 define <vscale x 1 x i8> @vdivu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vdivu_vv_nxv1i8:
@@ -820,33 +822,40 @@ define <vscale x 1 x i64> @vdivu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
 }
 
 define <vscale x 1 x i64> @vdivu_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
-; RV32-LABEL: vdivu_vi_nxv1i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v9, (a0), zero
-; RV32-NEXT:    vmulhu.vv v8, v8, v9
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdivu_vi_nxv1i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v9, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v8, v8, v9
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v8, v8, a0
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdivu_vi_nxv1i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v8, v8, a0
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdivu_vi_nxv1i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; ZVE64X-NEXT:    vdivu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdivu_vi_nxv1i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v8, v8, a0
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
   %vc = udiv <vscale x 1 x i64> %va, %splat
@@ -916,33 +925,40 @@ define <vscale x 2 x i64> @vdivu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
 }
 
 define <vscale x 2 x i64> @vdivu_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
-; RV32-LABEL: vdivu_vi_nxv2i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vmulhu.vv v8, v8, v10
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdivu_vi_nxv2i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v10, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v8, v8, v10
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v8, v8, a0
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdivu_vi_nxv2i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v8, v8, a0
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdivu_vi_nxv2i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; ZVE64X-NEXT:    vdivu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdivu_vi_nxv2i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v8, v8, a0
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %vc = udiv <vscale x 2 x i64> %va, %splat
@@ -1012,33 +1028,40 @@ define <vscale x 4 x i64> @vdivu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
 }
 
 define <vscale x 4 x i64> @vdivu_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
-; RV32-LABEL: vdivu_vi_nxv4i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmulhu.vv v8, v8, v12
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdivu_vi_nxv4i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v12, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v8, v8, v12
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v8, v8, a0
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdivu_vi_nxv4i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v8, v8, a0
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdivu_vi_nxv4i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; ZVE64X-NEXT:    vdivu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdivu_vi_nxv4i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v8, v8, a0
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
   %vc = udiv <vscale x 4 x i64> %va, %splat
@@ -1108,33 +1131,40 @@ define <vscale x 8 x i64> @vdivu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
 }
 
 define <vscale x 8 x i64> @vdivu_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
-; RV32-LABEL: vdivu_vi_nxv8i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmulhu.vv v8, v8, v16
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vdivu_vi_nxv8i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v16, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v8, v8, v16
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v8, v8, a0
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vdivu_vi_nxv8i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT:    vmulhu.vx v8, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v8, v8, a0
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vdivu_vi_nxv8i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; ZVE64X-NEXT:    vdivu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vdivu_vi_nxv8i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v8, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v8, v8, a0
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
   %vc = udiv <vscale x 8 x i64> %va, %splat
diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
index f6e47b8272b41..9cd03815dc38e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V
+; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V
+; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X
 
 define <vscale x 1 x i8> @vrem_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vrem_vv_nxv1i8:
@@ -929,42 +931,49 @@ define <vscale x 1 x i64> @vrem_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
 }
 
 define <vscale x 1 x i64> @vrem_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
-; RV32-LABEL: vrem_vi_nxv1i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v9, (a0), zero
-; RV32-NEXT:    vmulh.vv v9, v8, v9
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v10, v9, a0
-; RV32-NEXT:    vsra.vi v9, v9, 1
-; RV32-NEXT:    vadd.vv v9, v9, v10
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v9
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vrem_vi_nxv1i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v9, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v9, v8, v9
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v10, v9, a0
+; RV32-V-NEXT:    vsra.vi v9, v9, 1
+; RV32-V-NEXT:    vadd.vv v9, v9, v10
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v9
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vrem_vi_nxv1i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI56_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI56_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
-; RV64-NEXT:    vmulh.vx v9, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v10, v9, a0
-; RV64-NEXT:    vsra.vi v9, v9, 1
-; RV64-NEXT:    vadd.vv v9, v9, v10
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v9
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vrem_vi_nxv1i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; ZVE64X-NEXT:    vrem.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vrem_vi_nxv1i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI56_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI56_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; RV64-V-NEXT:    vmulh.vx v9, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v10, v9, a0
+; RV64-V-NEXT:    vsra.vi v9, v9, 1
+; RV64-V-NEXT:    vadd.vv v9, v9, v10
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v9
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
   %vc = srem <vscale x 1 x i64> %va, %splat
@@ -1007,42 +1016,49 @@ define <vscale x 2 x i64> @vrem_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
 }
 
 define <vscale x 2 x i64> @vrem_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
-; RV32-LABEL: vrem_vi_nxv2i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vmulh.vv v10, v8, v10
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v12, v10, a0
-; RV32-NEXT:    vsra.vi v10, v10, 1
-; RV32-NEXT:    vadd.vv v10, v10, v12
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v10
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vrem_vi_nxv2i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v10, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v10, v8, v10
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v12, v10, a0
+; RV32-V-NEXT:    vsra.vi v10, v10, 1
+; RV32-V-NEXT:    vadd.vv v10, v10, v12
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v10
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vrem_vi_nxv2i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI59_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI59_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
-; RV64-NEXT:    vmulh.vx v10, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v12, v10, a0
-; RV64-NEXT:    vsra.vi v10, v10, 1
-; RV64-NEXT:    vadd.vv v10, v10, v12
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v10
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vrem_vi_nxv2i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; ZVE64X-NEXT:    vrem.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vrem_vi_nxv2i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI59_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI59_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; RV64-V-NEXT:    vmulh.vx v10, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v12, v10, a0
+; RV64-V-NEXT:    vsra.vi v10, v10, 1
+; RV64-V-NEXT:    vadd.vv v10, v10, v12
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v10
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %vc = srem <vscale x 2 x i64> %va, %splat
@@ -1085,42 +1101,49 @@ define <vscale x 4 x i64> @vrem_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
 }
 
 define <vscale x 4 x i64> @vrem_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
-; RV32-LABEL: vrem_vi_nxv4i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmulh.vv v12, v8, v12
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v16, v12, a0
-; RV32-NEXT:    vsra.vi v12, v12, 1
-; RV32-NEXT:    vadd.vv v12, v12, v16
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v12
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vrem_vi_nxv4i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v12, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v12, v8, v12
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v16, v12, a0
+; RV32-V-NEXT:    vsra.vi v12, v12, 1
+; RV32-V-NEXT:    vadd.vv v12, v12, v16
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v12
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vrem_vi_nxv4i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI62_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI62_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
-; RV64-NEXT:    vmulh.vx v12, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v16, v12, a0
-; RV64-NEXT:    vsra.vi v12, v12, 1
-; RV64-NEXT:    vadd.vv v12, v12, v16
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v12
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vrem_vi_nxv4i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; ZVE64X-NEXT:    vrem.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vrem_vi_nxv4i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI62_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI62_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; RV64-V-NEXT:    vmulh.vx v12, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v16, v12, a0
+; RV64-V-NEXT:    vsra.vi v12, v12, 1
+; RV64-V-NEXT:    vadd.vv v12, v12, v16
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v12
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
   %vc = srem <vscale x 4 x i64> %va, %splat
@@ -1163,42 +1186,49 @@ define <vscale x 8 x i64> @vrem_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
 }
 
 define <vscale x 8 x i64> @vrem_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
-; RV32-LABEL: vrem_vi_nxv8i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 748983
-; RV32-NEXT:    addi a0, a0, -586
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    lui a0, 898779
-; RV32-NEXT:    addi a0, a0, 1755
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmulh.vv v16, v8, v16
-; RV32-NEXT:    li a0, 63
-; RV32-NEXT:    vsrl.vx v24, v16, a0
-; RV32-NEXT:    vsra.vi v16, v16, 1
-; RV32-NEXT:    vadd.vv v16, v16, v24
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v16
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vrem_vi_nxv8i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 748983
+; RV32-V-NEXT:    addi a0, a0, -586
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    lui a0, 898779
+; RV32-V-NEXT:    addi a0, a0, 1755
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v16, (a0), zero
+; RV32-V-NEXT:    vmulh.vv v16, v8, v16
+; RV32-V-NEXT:    li a0, 63
+; RV32-V-NEXT:    vsrl.vx v24, v16, a0
+; RV32-V-NEXT:    vsra.vi v16, v16, 1
+; RV32-V-NEXT:    vadd.vv v16, v16, v24
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v16
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vrem_vi_nxv8i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI65_0)
-; RV64-NEXT:    ld a0, %lo(.LCPI65_0)(a0)
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT:    vmulh.vx v16, v8, a0
-; RV64-NEXT:    li a0, 63
-; RV64-NEXT:    vsrl.vx v24, v16, a0
-; RV64-NEXT:    vsra.vi v16, v16, 1
-; RV64-NEXT:    vadd.vv v16, v16, v24
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v16
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vrem_vi_nxv8i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; ZVE64X-NEXT:    vrem.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vrem_vi_nxv8i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    lui a0, %hi(.LCPI65_0)
+; RV64-V-NEXT:    ld a0, %lo(.LCPI65_0)(a0)
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-V-NEXT:    vmulh.vx v16, v8, a0
+; RV64-V-NEXT:    li a0, 63
+; RV64-V-NEXT:    vsrl.vx v24, v16, a0
+; RV64-V-NEXT:    vsra.vi v16, v16, 1
+; RV64-V-NEXT:    vadd.vv v16, v16, v24
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v16
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
   %vc = srem <vscale x 8 x i64> %va, %splat
diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
index e67f25b6b5c7c..c049bc6b76999 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V
+; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V
+; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X
 
 define <vscale x 1 x i8> @vremu_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb) {
 ; CHECK-LABEL: vremu_vv_nxv1i8:
@@ -854,37 +856,44 @@ define <vscale x 1 x i64> @vremu_vx_nxv1i64(<vscale x 1 x i64> %va, i64 %b) {
 }
 
 define <vscale x 1 x i64> @vremu_vi_nxv1i64_0(<vscale x 1 x i64> %va) {
-; RV32-LABEL: vremu_vi_nxv1i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v9, (a0), zero
-; RV32-NEXT:    vmulhu.vv v9, v8, v9
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v9, v9, a0
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v9
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vremu_vi_nxv1i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v9, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v9, v8, v9
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v9, v9, a0
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v9
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vremu_vi_nxv1i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
-; RV64-NEXT:    vmulhu.vx v9, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v9, v9, a0
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v9
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vremu_vi_nxv1i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; ZVE64X-NEXT:    vremu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vremu_vi_nxv1i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v9, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v9, v9, a0
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v9
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 1 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> undef, <vscale x 1 x i32> zeroinitializer
   %vc = urem <vscale x 1 x i64> %va, %splat
@@ -958,37 +967,44 @@ define <vscale x 2 x i64> @vremu_vx_nxv2i64(<vscale x 2 x i64> %va, i64 %b) {
 }
 
 define <vscale x 2 x i64> @vremu_vi_nxv2i64_0(<vscale x 2 x i64> %va) {
-; RV32-LABEL: vremu_vi_nxv2i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v10, (a0), zero
-; RV32-NEXT:    vmulhu.vv v10, v8, v10
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v10, v10, a0
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v10
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vremu_vi_nxv2i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m2, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v10, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v10, v8, v10
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v10, v10, a0
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v10
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vremu_vi_nxv2i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
-; RV64-NEXT:    vmulhu.vx v10, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v10, v10, a0
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v10
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vremu_vi_nxv2i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; ZVE64X-NEXT:    vremu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vremu_vi_nxv2i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m2, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v10, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v10, v10, a0
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v10
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 2 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
   %vc = urem <vscale x 2 x i64> %va, %splat
@@ -1062,37 +1078,44 @@ define <vscale x 4 x i64> @vremu_vx_nxv4i64(<vscale x 4 x i64> %va, i64 %b) {
 }
 
 define <vscale x 4 x i64> @vremu_vi_nxv4i64_0(<vscale x 4 x i64> %va) {
-; RV32-LABEL: vremu_vi_nxv4i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v12, (a0), zero
-; RV32-NEXT:    vmulhu.vv v12, v8, v12
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v12, v12, a0
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v12
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vremu_vi_nxv4i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m4, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v12, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v12, v8, v12
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v12, v12, a0
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v12
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vremu_vi_nxv4i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
-; RV64-NEXT:    vmulhu.vx v12, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v12, v12, a0
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v12
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vremu_vi_nxv4i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; ZVE64X-NEXT:    vremu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vremu_vi_nxv4i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m4, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v12, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v12, v12, a0
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v12
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 4 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> undef, <vscale x 4 x i32> zeroinitializer
   %vc = urem <vscale x 4 x i64> %va, %splat
@@ -1166,37 +1189,44 @@ define <vscale x 8 x i64> @vremu_vx_nxv8i64(<vscale x 8 x i64> %va, i64 %b) {
 }
 
 define <vscale x 8 x i64> @vremu_vi_nxv8i64_0(<vscale x 8 x i64> %va) {
-; RV32-LABEL: vremu_vi_nxv8i64_0:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    lui a0, 131072
-; RV32-NEXT:    sw a0, 12(sp)
-; RV32-NEXT:    li a0, 1
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
-; RV32-NEXT:    addi a0, sp, 8
-; RV32-NEXT:    vlse64.v v16, (a0), zero
-; RV32-NEXT:    vmulhu.vv v16, v8, v16
-; RV32-NEXT:    li a0, 61
-; RV32-NEXT:    vsrl.vx v16, v16, a0
-; RV32-NEXT:    li a0, -7
-; RV32-NEXT:    vnmsac.vx v8, a0, v16
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32-V-LABEL: vremu_vi_nxv8i64_0:
+; RV32-V:       # %bb.0:
+; RV32-V-NEXT:    addi sp, sp, -16
+; RV32-V-NEXT:    .cfi_def_cfa_offset 16
+; RV32-V-NEXT:    lui a0, 131072
+; RV32-V-NEXT:    sw a0, 12(sp)
+; RV32-V-NEXT:    li a0, 1
+; RV32-V-NEXT:    sw a0, 8(sp)
+; RV32-V-NEXT:    vsetvli a0, zero, e64, m8, ta, mu
+; RV32-V-NEXT:    addi a0, sp, 8
+; RV32-V-NEXT:    vlse64.v v16, (a0), zero
+; RV32-V-NEXT:    vmulhu.vv v16, v8, v16
+; RV32-V-NEXT:    li a0, 61
+; RV32-V-NEXT:    vsrl.vx v16, v16, a0
+; RV32-V-NEXT:    li a0, -7
+; RV32-V-NEXT:    vnmsac.vx v8, a0, v16
+; RV32-V-NEXT:    addi sp, sp, 16
+; RV32-V-NEXT:    ret
 ;
-; RV64-LABEL: vremu_vi_nxv8i64_0:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a0, 1
-; RV64-NEXT:    slli a0, a0, 61
-; RV64-NEXT:    addi a0, a0, 1
-; RV64-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
-; RV64-NEXT:    vmulhu.vx v16, v8, a0
-; RV64-NEXT:    li a0, 61
-; RV64-NEXT:    vsrl.vx v16, v16, a0
-; RV64-NEXT:    li a0, -7
-; RV64-NEXT:    vnmsac.vx v8, a0, v16
-; RV64-NEXT:    ret
+; ZVE64X-LABEL: vremu_vi_nxv8i64_0:
+; ZVE64X:       # %bb.0:
+; ZVE64X-NEXT:    li a0, -7
+; ZVE64X-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; ZVE64X-NEXT:    vremu.vx v8, v8, a0
+; ZVE64X-NEXT:    ret
+;
+; RV64-V-LABEL: vremu_vi_nxv8i64_0:
+; RV64-V:       # %bb.0:
+; RV64-V-NEXT:    li a0, 1
+; RV64-V-NEXT:    slli a0, a0, 61
+; RV64-V-NEXT:    addi a0, a0, 1
+; RV64-V-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
+; RV64-V-NEXT:    vmulhu.vx v16, v8, a0
+; RV64-V-NEXT:    li a0, 61
+; RV64-V-NEXT:    vsrl.vx v16, v16, a0
+; RV64-V-NEXT:    li a0, -7
+; RV64-V-NEXT:    vnmsac.vx v8, a0, v16
+; RV64-V-NEXT:    ret
   %head = insertelement <vscale x 8 x i64> undef, i64 -7, i32 0
   %splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> undef, <vscale x 8 x i32> zeroinitializer
   %vc = urem <vscale x 8 x i64> %va, %splat

From 87e68cae50d7ca28975ca6fb456cf0ab2ac915a1 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Tue, 25 Jan 2022 09:55:48 -0800
Subject: [PATCH 582/946] Improve relnotes for the DWARFv5 default change

---
 clang/docs/ReleaseNotes.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 5cd2896de54d5..2272d7197ac57 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -256,9 +256,9 @@ DWARF Support in Clang
 ----------------------
 
 - The default DWARF version has increased from DWARFv4 to DWARFv5.  You can opt
-  back in to the old behavior with -gdwarf-4. Some platforms (Darwin, Android,
-  and SCE for instance) already opt out of this version bump as is suitable for
-  the platform
+  back in to the old behavior with ``-gdwarf-4`` or ``-fdebug-default-version=4``.
+  Some platforms (Darwin, Android, and SCE for instance) already opt out of this
+  version bump as is suitable for the platform
 
 Arm and AArch64 Support in Clang
 --------------------------------

From c2cd7cc63c5084cea943a7cfa7e1d7e5c62a34b2 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 25 Jan 2022 09:58:36 -0800
Subject: [PATCH 583/946] [lldb] Only include mach headers on Darwin

---
 lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 2e712cded5308..36ef7b520897b 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -9,10 +9,6 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringRef.h"
 
-#include <mach/mach_init.h>
-#include <mach/vm_map.h>
-#include <lldb/Host/SafeMachO.h>
-
 #include "Plugins/Process/Utility/RegisterContextDarwin_arm.h"
 #include "Plugins/Process/Utility/RegisterContextDarwin_arm64.h"
 #include "Plugins/Process/Utility/RegisterContextDarwin_i386.h"
@@ -60,6 +56,9 @@
 #include <TargetConditionals.h>
 // GetLLDBSharedCacheUUID() needs to call dlsym()
 #include <dlfcn.h>
+#include <mach/mach_init.h>
+#include <mach/vm_map.h>
+#include <lldb/Host/SafeMachO.h>
 #endif
 
 #ifndef __APPLE__

From 19d7a0b47b6849923576845f87a3278e012e049b Mon Sep 17 00:00:00 2001
From: Zinovy Nis <zinovy.nis@gmail.com>
Date: Sat, 15 Jan 2022 16:07:51 +0300
Subject: [PATCH 584/946] [clang-tidy] [bugprone-assert-side-effect] Ignore
 list for functions/methods

A semicolon-separated list of the names of functions or methods to be considered as not having side-effects was added for bugprone-assert-side-effect. It can be used to exclude methods like iterator::begin/end from being considered as having side-effects.

Differential Revision: https://reviews.llvm.org/D116478
---
 .../bugprone/AssertSideEffectCheck.cpp        | 22 ++++++++++++++-----
 .../bugprone/AssertSideEffectCheck.h          |  1 +
 clang-tools-extra/docs/ReleaseNotes.rst       | 12 +++++++---
 .../checks/bugprone-assert-side-effect.rst    | 10 +++++++++
 .../checkers/bugprone-assert-side-effect.cpp  | 18 ++++++++++++++-
 5 files changed, 54 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp
index 4e2359ff4f67b..eba6b29f56af9 100644
--- a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp
@@ -7,6 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "AssertSideEffectCheck.h"
+#include "../utils/Matchers.h"
+#include "../utils/OptionsUtils.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/Frontend/CompilerInstance.h"
@@ -25,7 +27,9 @@ namespace bugprone {
 
 namespace {
 
-AST_MATCHER_P(Expr, hasSideEffect, bool, CheckFunctionCalls) {
+AST_MATCHER_P2(Expr, hasSideEffect, bool, CheckFunctionCalls,
+               clang::ast_matchers::internal::Matcher<NamedDecl>,
+               IgnoredFunctionsMatcher) {
   const Expr *E = &Node;
 
   if (const auto *Op = dyn_cast<UnaryOperator>(E)) {
@@ -55,7 +59,8 @@ AST_MATCHER_P(Expr, hasSideEffect, bool, CheckFunctionCalls) {
     bool Result = CheckFunctionCalls;
     if (const auto *FuncDecl = CExpr->getDirectCallee()) {
       if (FuncDecl->getDeclName().isIdentifier() &&
-          FuncDecl->getName() == "__builtin_expect") // exceptions come here
+          IgnoredFunctionsMatcher.matches(*FuncDecl, Finder,
+                                          Builder)) // exceptions come here
         Result = false;
       else if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(FuncDecl))
         Result &= !MethodDecl->isConst();
@@ -72,8 +77,9 @@ AssertSideEffectCheck::AssertSideEffectCheck(StringRef Name,
                                              ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context),
       CheckFunctionCalls(Options.get("CheckFunctionCalls", false)),
-      RawAssertList(Options.get("AssertMacros",
-                                "assert,NSAssert,NSCAssert")) {
+      RawAssertList(Options.get("AssertMacros", "assert,NSAssert,NSCAssert")),
+      IgnoredFunctions(utils::options::parseStringList(
+          "__builtin_expect;" + Options.get("IgnoredFunctions", ""))) {
   StringRef(RawAssertList).split(AssertMacros, ",", -1, false);
 }
 
@@ -81,11 +87,17 @@ AssertSideEffectCheck::AssertSideEffectCheck(StringRef Name,
 void AssertSideEffectCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
   Options.store(Opts, "CheckFunctionCalls", CheckFunctionCalls);
   Options.store(Opts, "AssertMacros", RawAssertList);
+  Options.store(Opts, "IgnoredFunctions",
+                utils::options::serializeStringList(IgnoredFunctions));
 }
 
 void AssertSideEffectCheck::registerMatchers(MatchFinder *Finder) {
+  auto IgnoredFunctionsMatcher =
+      matchers::matchesAnyListedName(IgnoredFunctions);
+
   auto DescendantWithSideEffect =
-      traverse(TK_AsIs, hasDescendant(expr(hasSideEffect(CheckFunctionCalls))));
+      traverse(TK_AsIs, hasDescendant(expr(hasSideEffect(
+                            CheckFunctionCalls, IgnoredFunctionsMatcher))));
   auto ConditionWithSideEffect = hasCondition(DescendantWithSideEffect);
   Finder->addMatcher(
       stmt(
diff --git a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h
index 15d1a69cb8cd0..c240f362e71e6 100644
--- a/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h
+++ b/clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h
@@ -42,6 +42,7 @@ class AssertSideEffectCheck : public ClangTidyCheck {
   const bool CheckFunctionCalls;
   const std::string RawAssertList;
   SmallVector<StringRef, 5> AssertMacros;
+  const std::vector<std::string> IgnoredFunctions;
 };
 
 } // namespace bugprone
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index cb622f9b09606..0e9491eab9f6a 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -133,7 +133,7 @@ New checks
 - New :doc:`readability-duplicate-include
   <clang-tidy/checks/readability-duplicate-include>` check.
 
-  Looks for duplicate includes and removes them.  
+  Looks for duplicate includes and removes them.
 
 - New :doc:`readability-identifier-length
   <clang-tidy/checks/readability-identifier-length>` check.
@@ -167,7 +167,13 @@ New check aliases
 Changes in existing checks
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+- :doc:`bugprone-assert-side-effect <clang-tidy/checks/bugprone-assert-side-effect>`
+  check now supports an ``IgnoredFunctions`` option to explicitly consider
+  the specified semicolon-separated functions list as not having any
+  side-effects. Regular expressions for the list items are also accepted.
+
 - Removed default setting ``cppcoreguidelines-explicit-virtual-functions.IgnoreDestructors = "true"``,
+  from :doc:`cppcoreguidelines-explicit-virtual-functions <clang-tidy/checks/cppcoreguidelines-explicit-virtual-functions>`
   to match the current state of the C++ Core Guidelines.
 
 - Removed suggestion ``use gsl::at`` from warning message in the
@@ -185,10 +191,10 @@ Changes in existing checks
 
 - Fixed a false positive in :doc:`bugprone-throw-keyword-missing
   <clang-tidy/checks/bugprone-throw-keyword-missing>` when creating an exception object
-  using placement new
+  using placement new.
 
 - :doc:`cppcoreguidelines-narrowing-conversions <clang-tidy/checks/cppcoreguidelines-narrowing-conversions>`
-  check now supports a `WarnOnIntegerToFloatingPointNarrowingConversion`
+  check now supports a ``WarnOnIntegerToFloatingPointNarrowingConversion``
   option to control whether to warn on narrowing integer to floating-point
   conversions.
 
diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone-assert-side-effect.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone-assert-side-effect.rst
index dc7a3c9a4bd6c..8ba84ff61c6a9 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/bugprone-assert-side-effect.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone-assert-side-effect.rst
@@ -21,3 +21,13 @@ Options
    Whether to treat non-const member and non-member functions as they produce
    side effects. Disabled by default because it can increase the number of false
    positive warnings.
+
+.. option:: IgnoredFunctions
+
+   A semicolon-separated list of the names of functions or methods to be
+   considered as not having side-effects. Regular expressions are accepted,
+   e.g. `[Rr]ef(erence)?$` matches every type with suffix `Ref`, `ref`,
+   `Reference` and `reference`. The default is empty. If a name in the list
+   contains the sequence `::` it is matched against the qualified typename
+   (i.e. `namespace::Type`, otherwise it is matched against only
+   the type name (i.e. `Type`).
diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone-assert-side-effect.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone-assert-side-effect.cpp
index 85f471f6e9eb1..c327007651d4c 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/bugprone-assert-side-effect.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone-assert-side-effect.cpp
@@ -1,4 +1,4 @@
-// RUN: %check_clang_tidy %s bugprone-assert-side-effect %t -- -config="{CheckOptions: [{key: bugprone-assert-side-effect.CheckFunctionCalls, value: true}, {key: bugprone-assert-side-effect.AssertMacros, value: 'assert,assert2,my_assert,convoluted_assert,msvc_assert'}]}" -- -fexceptions
+// RUN: %check_clang_tidy %s bugprone-assert-side-effect %t -- -config="{CheckOptions: [{key: bugprone-assert-side-effect.CheckFunctionCalls, value: true}, {key: bugprone-assert-side-effect.AssertMacros, value: 'assert,assert2,my_assert,convoluted_assert,msvc_assert'}, {key: bugprone-assert-side-effect.IgnoredFunctions, value: 'MyClass::badButIgnoredFunc'}]}" -- -fexceptions
 
 //===--- assert definition block ------------------------------------------===//
 int abort() { return 0; }
@@ -43,9 +43,12 @@ void print(...);
 
 //===----------------------------------------------------------------------===//
 
+bool badButIgnoredFunc(int a, int b) { return a * b > 0; }
+
 class MyClass {
 public:
   bool badFunc(int a, int b) { return a * b > 0; }
+  bool badButIgnoredFunc(int a, int b) { return a * b > 0; }
   bool goodFunc(int a, int b) const { return a * b > 0; }
 
   MyClass &operator=(const MyClass &rhs) { return *this; }
@@ -57,6 +60,11 @@ class MyClass {
   void operator delete(void *p) {}
 };
 
+class SomeoneElseClass {
+public:
+  bool badButIgnoredFunc(int a, int b) { return a * b > 0; }
+};
+
 bool freeFunction() {
   return true;
 }
@@ -85,8 +93,16 @@ int main() {
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: side effect in assert() condition discarded in release builds
 
   MyClass mc;
+  SomeoneElseClass sec;
   assert(mc.badFunc(0, 1));
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: side effect in assert() condition discarded in release builds
+  assert(mc.badButIgnoredFunc(0, 1));
+  // badButIgnoredFunc is not ignored as only class members are ignored by the config
+  assert(badButIgnoredFunc(0, 1));
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: side effect in assert() condition discarded in release builds
+  // sec.badButIgnoredFunc is not ignored as only MyClass members are ignored by the config
+  assert(sec.badButIgnoredFunc(0, 1));
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: side effect in assert() condition discarded in release builds
   assert(mc.goodFunc(0, 1));
 
   MyClass mc2;

From c415ff186dbb9891886c6ac1f03060ad537e7074 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Tue, 25 Jan 2022 18:10:09 +0000
Subject: [PATCH 585/946] [AArch64] Add extra vecreduce.add tests, including
 extending reductions. NFC

This is all the reductions from i8 -> i64 with either sign or zero
extensions.
---
 llvm/test/CodeGen/AArch64/vecreduce-add.ll | 2095 ++++++++++++++++++++
 1 file changed, 2095 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/vecreduce-add.ll

diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
new file mode 100644
index 0000000000000..06077071468e2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll
@@ -0,0 +1,2095 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-none-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BASE
+; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+dotprod %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-DOT
+
+define i32 @add_v4i32_v4i32(<4 x i32> %x) {
+; CHECK-LABEL: add_v4i32_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
+  ret i32 %z
+}
+
+define i64 @add_v4i32_v4i64_zext(<4 x i32> %x) {
+; CHECK-LABEL: add_v4i32_v4i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i32> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v4i32_v4i64_sext(<4 x i32> %x) {
+; CHECK-LABEL: add_v4i32_v4i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v1.2d, v0.2s, #0
+; CHECK-NEXT:    saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i32> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i32_v2i64_zext(<2 x i32> %x) {
+; CHECK-LABEL: add_v2i32_v2i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i32> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i32_v2i64_sext(<2 x i32> %x) {
+; CHECK-LABEL: add_v2i32_v2i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i32> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  ret i64 %z
+}
+
+define i32 @add_v8i16_v8i32_zext(<8 x i16> %x) {
+; CHECK-LABEL: add_v8i16_v8i32_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-NEXT:    uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i16> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v8i16_v8i32_sext(<8 x i16> %x) {
+; CHECK-LABEL: add_v8i16_v8i32_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-NEXT:    saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i16> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v4i16_v4i32_zext(<4 x i16> %x) {
+; CHECK-LABEL: add_v4i16_v4i32_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i16> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v4i16_v4i32_sext(<4 x i16> %x) {
+; CHECK-LABEL: add_v4i16_v4i32_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i16> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  ret i32 %z
+}
+
+define zeroext i16 @add_v8i16_v8i16(<8 x i16> %x) {
+; CHECK-LABEL: add_v8i16_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
+  ret i16 %z
+}
+
+define i64 @add_v8i16_v8i64_zext(<8 x i16> %x) {
+; CHECK-LABEL: add_v8i16_v8i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i16> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v8i16_v8i64_sext(<8 x i16> %x) {
+; CHECK-LABEL: add_v8i16_v8i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i16> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v4i16_v4i64_zext(<4 x i16> %x) {
+; CHECK-LABEL: add_v4i16_v4i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i16> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v4i16_v4i64_sext(<4 x i16> %x) {
+; CHECK-LABEL: add_v4i16_v4i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sshll v1.2d, v0.2s, #0
+; CHECK-NEXT:    saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i16> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i16_v2i64_zext(<2 x i16> %x) {
+; CHECK-LABEL: add_v2i16_v2i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0x00ffff0000ffff
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i16> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i16_v2i64_sext(<2 x i16> %x) {
+; CHECK-LABEL: add_v2i16_v2i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #48
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #48
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i16> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  ret i64 %z
+}
+
+define i32 @add_v16i8_v16i32_zext(<16 x i8> %x) {
+; CHECK-BASE-LABEL: add_v16i8_v16i32_zext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-BASE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    uaddl2 v2.4s, v0.8h, v1.8h
+; CHECK-BASE-NEXT:    uaddl v0.4s, v0.4h, v1.4h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v16i8_v16i32_zext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.16b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    udot v2.4s, v0.16b, v1.16b
+; CHECK-DOT-NEXT:    addv s0, v2.4s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v16i8_v16i32_sext(<16 x i8> %x) {
+; CHECK-BASE-LABEL: add_v16i8_v16i32_sext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    sshll2 v1.8h, v0.16b, #0
+; CHECK-BASE-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    saddl2 v2.4s, v0.8h, v1.8h
+; CHECK-BASE-NEXT:    saddl v0.4s, v0.4h, v1.4h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v16i8_v16i32_sext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.16b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    sdot v2.4s, v0.16b, v1.16b
+; CHECK-DOT-NEXT:    addv s0, v2.4s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v8i8_v8i32_zext(<8 x i8> %x) {
+; CHECK-BASE-LABEL: add_v8i8_v8i32_zext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-BASE-NEXT:    uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v8i8_v8i32_zext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.8b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    udot v2.2s, v0.8b, v1.8b
+; CHECK-DOT-NEXT:    addp v0.2s, v2.2s, v2.2s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v8i8_v8i32_sext(<8 x i8> %x) {
+; CHECK-BASE-LABEL: add_v8i8_v8i32_sext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-BASE-NEXT:    saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v8i8_v8i32_sext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.8b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    sdot v2.2s, v0.8b, v1.8b
+; CHECK-DOT-NEXT:    addp v0.2s, v2.2s, v2.2s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v4i8_v4i32_zext(<4 x i8> %x) {
+; CHECK-LABEL: add_v4i8_v4i32_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i8> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  ret i32 %z
+}
+
+define i32 @add_v4i8_v4i32_sext(<4 x i8> %x) {
+; CHECK-LABEL: add_v4i8_v4i32_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    shl v0.4s, v0.4s, #24
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #24
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i8> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  ret i32 %z
+}
+
+define zeroext i16 @add_v16i8_v16i16_zext(<16 x i8> %x) {
+; CHECK-LABEL: add_v16i8_v16i16_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v1.8h, v0.8b, #0
+; CHECK-NEXT:    uaddw2 v0.8h, v1.8h, v0.16b
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
+  ret i16 %z
+}
+
+define signext i16 @add_v16i8_v16i16_sext(<16 x i8> %x) {
+; CHECK-LABEL: add_v16i8_v16i16_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v1.8h, v0.8b, #0
+; CHECK-NEXT:    saddw2 v0.8h, v1.8h, v0.16b
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    smov w0, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
+  ret i16 %z
+}
+
+define zeroext i16 @add_v8i8_v8i16_zext(<8 x i8> %x) {
+; CHECK-LABEL: add_v8i8_v8i16_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
+  ret i16 %z
+}
+
+define signext i16 @add_v8i8_v8i16_sext(<8 x i8> %x) {
+; CHECK-LABEL: add_v8i8_v8i16_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    smov w0, v0.h[0]
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
+  ret i16 %z
+}
+
+define zeroext i8 @add_v16i8_v16i8(<16 x i8> %x) {
+; CHECK-LABEL: add_v16i8_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv b0, v0.16b
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
+  ret i8 %z
+}
+
+define i64 @add_v16i8_v16i64_zext(<16 x i8> %x) {
+; CHECK-LABEL: add_v16i8_v16i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll2 v2.4s, v1.8h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    ushll2 v3.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    uaddl2 v4.2d, v3.4s, v2.4s
+; CHECK-NEXT:    uaddl2 v5.2d, v0.4s, v1.4s
+; CHECK-NEXT:    uaddl v2.2d, v3.2s, v2.2s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v1.2d, v5.2d, v4.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v16i8_v16i64_sext(<16 x i8> %x) {
+; CHECK-LABEL: add_v16i8_v16i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll2 v1.8h, v0.16b, #0
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll2 v2.4s, v1.8h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    sshll2 v3.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    saddl2 v4.2d, v3.4s, v2.4s
+; CHECK-NEXT:    saddl2 v5.2d, v0.4s, v1.4s
+; CHECK-NEXT:    saddl v2.2d, v3.2s, v2.2s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v1.2d, v5.2d, v4.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v8i8_v8i64_zext(<8 x i8> %x) {
+; CHECK-LABEL: add_v8i8_v8i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v8i8_v8i64_sext(<8 x i8> %x) {
+; CHECK-LABEL: add_v8i8_v8i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v4i8_v4i64_zext(<4 x i8> %x) {
+; CHECK-LABEL: add_v4i8_v4i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i8> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v4i8_v4i64_sext(<4 x i8> %x) {
+; CHECK-LABEL: add_v4i8_v4i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    ushll2 v0.2d, v0.4s, #0
+; CHECK-NEXT:    shl v1.2d, v1.2d, #56
+; CHECK-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-NEXT:    sshr v1.2d, v1.2d, #56
+; CHECK-NEXT:    ssra v1.2d, v0.2d, #56
+; CHECK-NEXT:    addp d0, v1.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i8> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i8_v2i64_zext(<2 x i8> %x) {
+; CHECK-LABEL: add_v2i8_v2i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0x0000ff000000ff
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i8> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i8_v2i64_sext(<2 x i8> %x) {
+; CHECK-LABEL: add_v2i8_v2i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #56
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i8> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  ret i64 %z
+}
+
+define i64 @add_v2i64_v2i64(<2 x i64> %x) {
+; CHECK-LABEL: add_v2i64_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x)
+  ret i64 %z
+}
+
+define i32 @add_v4i32_v4i32_acc(<4 x i32> %x, i32 %a) {
+; CHECK-LABEL: add_v4i32_v4i32_acc:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i64 @add_v4i32_v4i64_acc_zext(<4 x i32> %x, i64 %a) {
+; CHECK-LABEL: add_v4i32_v4i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i32> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v4i32_v4i64_acc_sext(<4 x i32> %x, i64 %a) {
+; CHECK-LABEL: add_v4i32_v4i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v1.2d, v0.2s, #0
+; CHECK-NEXT:    saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i32> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i32_v2i64_acc_zext(<2 x i32> %x, i64 %a) {
+; CHECK-LABEL: add_v2i32_v2i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i32> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i32_v2i64_acc_sext(<2 x i32> %x, i64 %a) {
+; CHECK-LABEL: add_v2i32_v2i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i32> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i32 @add_v8i16_v8i32_acc_zext(<8 x i16> %x, i32 %a) {
+; CHECK-LABEL: add_v8i16_v8i32_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-NEXT:    uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i16> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v8i16_v8i32_acc_sext(<8 x i16> %x, i32 %a) {
+; CHECK-LABEL: add_v8i16_v8i32_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-NEXT:    saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i16> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v4i16_v4i32_acc_zext(<4 x i16> %x, i32 %a) {
+; CHECK-LABEL: add_v4i16_v4i32_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i16> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v4i16_v4i32_acc_sext(<4 x i16> %x, i32 %a) {
+; CHECK-LABEL: add_v4i16_v4i32_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i16> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define zeroext i16 @add_v8i16_v8i16_acc(<8 x i16> %x, i16 %a) {
+; CHECK-LABEL: add_v8i16_v8i16_acc:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    and w0, w8, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
+  %r = add i16 %z, %a
+  ret i16 %r
+}
+
+define i64 @add_v8i16_v8i64_acc_zext(<8 x i16> %x, i64 %a) {
+; CHECK-LABEL: add_v8i16_v8i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i16> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v8i16_v8i64_acc_sext(<8 x i16> %x, i64 %a) {
+; CHECK-LABEL: add_v8i16_v8i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i16> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v4i16_v4i64_acc_zext(<4 x i16> %x, i64 %a) {
+; CHECK-LABEL: add_v4i16_v4i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i16> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v4i16_v4i64_acc_sext(<4 x i16> %x, i64 %a) {
+; CHECK-LABEL: add_v4i16_v4i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sshll v1.2d, v0.2s, #0
+; CHECK-NEXT:    saddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i16> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i16_v2i64_acc_zext(<2 x i16> %x, i64 %a) {
+; CHECK-LABEL: add_v2i16_v2i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0x00ffff0000ffff
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i16> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i16_v2i64_acc_sext(<2 x i16> %x, i64 %a) {
+; CHECK-LABEL: add_v2i16_v2i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #48
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #48
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i16> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i32 @add_v16i8_v16i32_acc_zext(<16 x i8> %x, i32 %a) {
+; CHECK-BASE-LABEL: add_v16i8_v16i32_acc_zext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-BASE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    uaddl2 v2.4s, v0.8h, v1.8h
+; CHECK-BASE-NEXT:    uaddl v0.4s, v0.4h, v1.4h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w8, s0
+; CHECK-BASE-NEXT:    add w0, w8, w0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v16i8_v16i32_acc_zext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.16b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    udot v2.4s, v0.16b, v1.16b
+; CHECK-DOT-NEXT:    addv s0, v2.4s
+; CHECK-DOT-NEXT:    fmov w8, s0
+; CHECK-DOT-NEXT:    add w0, w8, w0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v16i8_v16i32_acc_sext(<16 x i8> %x, i32 %a) {
+; CHECK-BASE-LABEL: add_v16i8_v16i32_acc_sext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    sshll2 v1.8h, v0.16b, #0
+; CHECK-BASE-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    saddl2 v2.4s, v0.8h, v1.8h
+; CHECK-BASE-NEXT:    saddl v0.4s, v0.4h, v1.4h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w8, s0
+; CHECK-BASE-NEXT:    add w0, w8, w0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v16i8_v16i32_acc_sext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.16b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    sdot v2.4s, v0.16b, v1.16b
+; CHECK-DOT-NEXT:    addv s0, v2.4s
+; CHECK-DOT-NEXT:    fmov w8, s0
+; CHECK-DOT-NEXT:    add w0, w8, w0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v8i8_v8i32_acc_zext(<8 x i8> %x, i32 %a) {
+; CHECK-BASE-LABEL: add_v8i8_v8i32_acc_zext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    ushll v1.4s, v0.4h, #0
+; CHECK-BASE-NEXT:    uaddw2 v0.4s, v1.4s, v0.8h
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w8, s0
+; CHECK-BASE-NEXT:    add w0, w8, w0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v8i8_v8i32_acc_zext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.8b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    udot v2.2s, v0.8b, v1.8b
+; CHECK-DOT-NEXT:    addp v0.2s, v2.2s, v2.2s
+; CHECK-DOT-NEXT:    fmov w8, s0
+; CHECK-DOT-NEXT:    add w0, w8, w0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v8i8_v8i32_acc_sext(<8 x i8> %x, i32 %a) {
+; CHECK-BASE-LABEL: add_v8i8_v8i32_acc_sext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    sshll v1.4s, v0.4h, #0
+; CHECK-BASE-NEXT:    saddw2 v0.4s, v1.4s, v0.8h
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w8, s0
+; CHECK-BASE-NEXT:    add w0, w8, w0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_v8i8_v8i32_acc_sext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v1.8b, #1
+; CHECK-DOT-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-DOT-NEXT:    sdot v2.2s, v0.8b, v1.8b
+; CHECK-DOT-NEXT:    addp v0.2s, v2.2s, v2.2s
+; CHECK-DOT-NEXT:    fmov w8, s0
+; CHECK-DOT-NEXT:    add w0, w8, w0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v4i8_v4i32_acc_zext(<4 x i8> %x, i32 %a) {
+; CHECK-LABEL: add_v4i8_v4i32_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i8> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define i32 @add_v4i8_v4i32_acc_sext(<4 x i8> %x, i32 %a) {
+; CHECK-LABEL: add_v4i8_v4i32_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    shl v0.4s, v0.4s, #24
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #24
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w0, w8, w0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i8> %x to <4 x i32>
+  %z = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %r = add i32 %z, %a
+  ret i32 %r
+}
+
+define zeroext i16 @add_v16i8_v16i16_acc_zext(<16 x i8> %x, i16 %a) {
+; CHECK-LABEL: add_v16i8_v16i16_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v1.8h, v0.8b, #0
+; CHECK-NEXT:    uaddw2 v0.8h, v1.8h, v0.16b
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    and w0, w8, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
+  %r = add i16 %z, %a
+  ret i16 %r
+}
+
+define signext i16 @add_v16i8_v16i16_acc_sext(<16 x i8> %x, i16 %a) {
+; CHECK-LABEL: add_v16i8_v16i16_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v1.8h, v0.8b, #0
+; CHECK-NEXT:    saddw2 v0.8h, v1.8h, v0.16b
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    sxth w0, w8
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
+  %r = add i16 %z, %a
+  ret i16 %r
+}
+
+define zeroext i16 @add_v8i8_v8i16_acc_zext(<8 x i8> %x, i16 %a) {
+; CHECK-LABEL: add_v8i8_v8i16_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    and w0, w8, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
+  %r = add i16 %z, %a
+  ret i16 %r
+}
+
+define signext i16 @add_v8i8_v8i16_acc_sext(<8 x i8> %x, i16 %a) {
+; CHECK-LABEL: add_v8i8_v8i16_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    sxth w0, w8
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i16>
+  %z = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
+  %r = add i16 %z, %a
+  ret i16 %r
+}
+
+define zeroext i8 @add_v16i8_v16i8_acc(<16 x i8> %x, i8 %a) {
+; CHECK-LABEL: add_v16i8_v16i8_acc:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv b0, v0.16b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    add w8, w8, w0
+; CHECK-NEXT:    and w0, w8, #0xff
+; CHECK-NEXT:    ret
+entry:
+  %z = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
+  %r = add i8 %z, %a
+  ret i8 %r
+}
+
+define i64 @add_v16i8_v16i64_acc_zext(<16 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v16i8_v16i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll2 v1.8h, v0.16b, #0
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll2 v2.4s, v1.8h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    ushll2 v3.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    uaddl2 v4.2d, v3.4s, v2.4s
+; CHECK-NEXT:    uaddl2 v5.2d, v0.4s, v1.4s
+; CHECK-NEXT:    uaddl v2.2d, v3.2s, v2.2s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v1.2d, v5.2d, v4.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v16i8_v16i64_acc_sext(<16 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v16i8_v16i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll2 v1.8h, v0.16b, #0
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll2 v2.4s, v1.8h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    sshll2 v3.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    saddl2 v4.2d, v3.4s, v2.4s
+; CHECK-NEXT:    saddl2 v5.2d, v0.4s, v1.4s
+; CHECK-NEXT:    saddl v2.2d, v3.2s, v2.2s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v1.2d, v5.2d, v4.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v8i8_v8i64_acc_zext(<8 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v8i8_v8i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    uaddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v8i8_v8i64_acc_sext(<8 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v8i8_v8i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll2 v1.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    saddl2 v2.2d, v0.4s, v1.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v2.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v4i8_v4i64_acc_zext(<4 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v4i8_v4i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v1.2d, v0.4s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i8> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v4i8_v4i64_acc_sext(<4 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v4i8_v4i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.2d, v0.2s, #0
+; CHECK-NEXT:    ushll2 v0.2d, v0.4s, #0
+; CHECK-NEXT:    shl v1.2d, v1.2d, #56
+; CHECK-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-NEXT:    sshr v1.2d, v1.2d, #56
+; CHECK-NEXT:    ssra v1.2d, v0.2d, #56
+; CHECK-NEXT:    addp d0, v1.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i8> %x to <4 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i8_v2i64_acc_zext(<2 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v2i8_v2i64_acc_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d1, #0x0000ff000000ff
+; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i8> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i8_v2i64_acc_sext(<2 x i8> %x, i64 %a) {
+; CHECK-LABEL: add_v2i8_v2i64_acc_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #56
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i8> %x to <2 x i64>
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i64 @add_v2i64_v2i64_acc(<2 x i64> %x, i64 %a) {
+; CHECK-LABEL: add_v2i64_v2i64_acc:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x8, d0
+; CHECK-NEXT:    add x0, x8, x0
+; CHECK-NEXT:    ret
+entry:
+  %z = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x)
+  %r = add i64 %z, %a
+  ret i64 %r
+}
+
+define i32 @add_pair_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: add_pair_v4i32_v4i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %x)
+  %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %y)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i64 @add_pair_v4i32_v4i64_zext(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: add_pair_v4i32_v4i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v2.2d, v0.2s, #0
+; CHECK-NEXT:    ushll v3.2d, v1.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v2.2d, v0.4s
+; CHECK-NEXT:    uaddw2 v1.2d, v3.2d, v1.4s
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i32> %x to <4 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %yy = zext <4 x i32> %y to <4 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v4i32_v4i64_sext(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: add_pair_v4i32_v4i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v2.2d, v0.2s, #0
+; CHECK-NEXT:    sshll v3.2d, v1.2s, #0
+; CHECK-NEXT:    saddw2 v0.2d, v2.2d, v0.4s
+; CHECK-NEXT:    saddw2 v1.2d, v3.2d, v1.4s
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i32> %x to <4 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %yy = sext <4 x i32> %y to <4 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i32_v2i64_zext(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: add_pair_v2i32_v2i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i32> %x to <2 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %yy = zext <2 x i32> %y to <2 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i32_v2i64_sext(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: add_pair_v2i32_v2i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i32> %x to <2 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %yy = sext <2 x i32> %y to <2 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i32 @add_pair_v8i16_v8i32_zext(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: add_pair_v8i16_v8i32_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v3.4s, v1.4h, #0
+; CHECK-NEXT:    uaddw2 v0.4s, v2.4s, v0.8h
+; CHECK-NEXT:    uaddw2 v1.4s, v3.4s, v1.8h
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i16> %x to <8 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %yy = zext <8 x i16> %y to <8 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v8i16_v8i32_sext(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: add_pair_v8i16_v8i32_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-NEXT:    sshll v3.4s, v1.4h, #0
+; CHECK-NEXT:    saddw2 v0.4s, v2.4s, v0.8h
+; CHECK-NEXT:    saddw2 v1.4s, v3.4s, v1.8h
+; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i16> %x to <8 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %yy = sext <8 x i16> %y to <8 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v4i16_v4i32_zext(<4 x i16> %x, <4 x i16> %y) {
+; CHECK-LABEL: add_pair_v4i16_v4i32_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uaddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i16> %x to <4 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %yy = zext <4 x i16> %y to <4 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v4i16_v4i32_sext(<4 x i16> %x, <4 x i16> %y) {
+; CHECK-LABEL: add_pair_v4i16_v4i32_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    saddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i16> %x to <4 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %yy = sext <4 x i16> %y to <4 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define zeroext i16 @add_pair_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: add_pair_v8i16_v8i16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    addv h1, v1.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    and w0, w8, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %x)
+  %z2 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %y)
+  %z = add i16 %z1, %z2
+  ret i16 %z
+}
+
+define i64 @add_pair_v8i16_v8i64_zext(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: add_pair_v8i16_v8i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll2 v2.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll2 v3.4s, v1.8h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    uaddl2 v4.2d, v0.4s, v2.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v2.2s
+; CHECK-NEXT:    uaddl2 v2.2d, v1.4s, v3.4s
+; CHECK-NEXT:    uaddl v1.2d, v1.2s, v3.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v4.2d
+; CHECK-NEXT:    add v1.2d, v1.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i16> %x to <8 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %yy = zext <8 x i16> %y to <8 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v8i16_v8i64_sext(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: add_pair_v8i16_v8i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll2 v2.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sshll2 v3.4s, v1.8h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    saddl2 v4.2d, v0.4s, v2.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v2.2s
+; CHECK-NEXT:    saddl2 v2.2d, v1.4s, v3.4s
+; CHECK-NEXT:    saddl v1.2d, v1.2s, v3.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v4.2d
+; CHECK-NEXT:    add v1.2d, v1.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i16> %x to <8 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %yy = sext <8 x i16> %y to <8 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v4i16_v4i64_zext(<4 x i16> %x, <4 x i16> %y) {
+; CHECK-LABEL: add_pair_v4i16_v4i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    ushll v2.2d, v0.2s, #0
+; CHECK-NEXT:    ushll v3.2d, v1.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v2.2d, v0.4s
+; CHECK-NEXT:    uaddw2 v1.2d, v3.2d, v1.4s
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i16> %x to <4 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %yy = zext <4 x i16> %y to <4 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v4i16_v4i64_sext(<4 x i16> %x, <4 x i16> %y) {
+; CHECK-LABEL: add_pair_v4i16_v4i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    sshll v2.2d, v0.2s, #0
+; CHECK-NEXT:    sshll v3.2d, v1.2s, #0
+; CHECK-NEXT:    saddw2 v0.2d, v2.2d, v0.4s
+; CHECK-NEXT:    saddw2 v1.2d, v3.2d, v1.4s
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i16> %x to <4 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %yy = sext <4 x i16> %y to <4 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i16_v2i64_zext(<2 x i16> %x, <2 x i16> %y) {
+; CHECK-LABEL: add_pair_v2i16_v2i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d2, #0x00ffff0000ffff
+; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
+; CHECK-NEXT:    and v1.8b, v1.8b, v2.8b
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i16> %x to <2 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %yy = zext <2 x i16> %y to <2 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i16_v2i64_sext(<2 x i16> %x, <2 x i16> %y) {
+; CHECK-LABEL: add_pair_v2i16_v2i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #48
+; CHECK-NEXT:    shl v1.2d, v1.2d, #48
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #48
+; CHECK-NEXT:    ssra v0.2d, v1.2d, #48
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i16> %x to <2 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %yy = sext <2 x i16> %y to <2 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i32 @add_pair_v16i8_v16i32_zext(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-BASE-LABEL: add_pair_v16i8_v16i32_zext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    ushll2 v2.8h, v0.16b, #0
+; CHECK-BASE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    ushll2 v3.8h, v1.16b, #0
+; CHECK-BASE-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-BASE-NEXT:    uaddl2 v4.4s, v0.8h, v2.8h
+; CHECK-BASE-NEXT:    uaddl v0.4s, v0.4h, v2.4h
+; CHECK-BASE-NEXT:    uaddl2 v2.4s, v1.8h, v3.8h
+; CHECK-BASE-NEXT:    uaddl v1.4s, v1.4h, v3.4h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v4.4s
+; CHECK-BASE-NEXT:    add v1.4s, v1.4s, v2.4s
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_pair_v16i8_v16i32_zext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v2.16b, #1
+; CHECK-DOT-NEXT:    movi v3.2d, #0000000000000000
+; CHECK-DOT-NEXT:    udot v3.4s, v1.16b, v2.16b
+; CHECK-DOT-NEXT:    udot v3.4s, v0.16b, v2.16b
+; CHECK-DOT-NEXT:    addv s0, v3.4s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
+  %yy = zext <16 x i8> %y to <16 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v16i8_v16i32_sext(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-BASE-LABEL: add_pair_v16i8_v16i32_sext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    sshll2 v2.8h, v0.16b, #0
+; CHECK-BASE-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    sshll2 v3.8h, v1.16b, #0
+; CHECK-BASE-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-BASE-NEXT:    saddl2 v4.4s, v0.8h, v2.8h
+; CHECK-BASE-NEXT:    saddl v0.4s, v0.4h, v2.4h
+; CHECK-BASE-NEXT:    saddl2 v2.4s, v1.8h, v3.8h
+; CHECK-BASE-NEXT:    saddl v1.4s, v1.4h, v3.4h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v4.4s
+; CHECK-BASE-NEXT:    add v1.4s, v1.4s, v2.4s
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_pair_v16i8_v16i32_sext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v2.16b, #1
+; CHECK-DOT-NEXT:    movi v3.2d, #0000000000000000
+; CHECK-DOT-NEXT:    sdot v3.4s, v1.16b, v2.16b
+; CHECK-DOT-NEXT:    sdot v3.4s, v0.16b, v2.16b
+; CHECK-DOT-NEXT:    addv s0, v3.4s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %xx)
+  %yy = sext <16 x i8> %y to <16 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v8i8_v8i32_zext(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-BASE-LABEL: add_pair_v8i8_v8i32_zext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-BASE-NEXT:    ushll v2.4s, v0.4h, #0
+; CHECK-BASE-NEXT:    ushll v3.4s, v1.4h, #0
+; CHECK-BASE-NEXT:    uaddw2 v0.4s, v2.4s, v0.8h
+; CHECK-BASE-NEXT:    uaddw2 v1.4s, v3.4s, v1.8h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_pair_v8i8_v8i32_zext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v2.8b, #1
+; CHECK-DOT-NEXT:    movi v3.2d, #0000000000000000
+; CHECK-DOT-NEXT:    udot v3.2s, v1.8b, v2.8b
+; CHECK-DOT-NEXT:    udot v3.2s, v0.8b, v2.8b
+; CHECK-DOT-NEXT:    addp v0.2s, v3.2s, v3.2s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %yy = zext <8 x i8> %y to <8 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v8i8_v8i32_sext(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-BASE-LABEL: add_pair_v8i8_v8i32_sext:
+; CHECK-BASE:       // %bb.0: // %entry
+; CHECK-BASE-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-BASE-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-BASE-NEXT:    sshll v2.4s, v0.4h, #0
+; CHECK-BASE-NEXT:    sshll v3.4s, v1.4h, #0
+; CHECK-BASE-NEXT:    saddw2 v0.4s, v2.4s, v0.8h
+; CHECK-BASE-NEXT:    saddw2 v1.4s, v3.4s, v1.8h
+; CHECK-BASE-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-BASE-NEXT:    addv s0, v0.4s
+; CHECK-BASE-NEXT:    fmov w0, s0
+; CHECK-BASE-NEXT:    ret
+;
+; CHECK-DOT-LABEL: add_pair_v8i8_v8i32_sext:
+; CHECK-DOT:       // %bb.0: // %entry
+; CHECK-DOT-NEXT:    movi v2.8b, #1
+; CHECK-DOT-NEXT:    movi v3.2d, #0000000000000000
+; CHECK-DOT-NEXT:    sdot v3.2s, v1.8b, v2.8b
+; CHECK-DOT-NEXT:    sdot v3.2s, v0.8b, v2.8b
+; CHECK-DOT-NEXT:    addp v0.2s, v3.2s, v3.2s
+; CHECK-DOT-NEXT:    fmov w0, s0
+; CHECK-DOT-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %xx)
+  %yy = sext <8 x i8> %y to <8 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v4i8_v4i32_zext(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: add_pair_v4i8_v4i32_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    bic v1.4h, #255, lsl #8
+; CHECK-NEXT:    uaddl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i8> %x to <4 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %yy = zext <4 x i8> %y to <4 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define i32 @add_pair_v4i8_v4i32_sext(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: add_pair_v4i8_v4i32_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    shl v0.4s, v0.4s, #24
+; CHECK-NEXT:    shl v1.4s, v1.4s, #24
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #24
+; CHECK-NEXT:    ssra v0.4s, v1.4s, #24
+; CHECK-NEXT:    addv s0, v0.4s
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i8> %x to <4 x i32>
+  %z1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %xx)
+  %yy = sext <4 x i8> %y to <4 x i32>
+  %z2 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %yy)
+  %z = add i32 %z1, %z2
+  ret i32 %z
+}
+
+define zeroext i16 @add_pair_v16i8_v16i16_zext(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: add_pair_v16i8_v16i16_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v2.8h, v0.8b, #0
+; CHECK-NEXT:    ushll v3.8h, v1.8b, #0
+; CHECK-NEXT:    uaddw2 v0.8h, v2.8h, v0.16b
+; CHECK-NEXT:    uaddw2 v1.8h, v3.8h, v1.16b
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    addv h1, v1.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    and w0, w8, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i16>
+  %z1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
+  %yy = zext <16 x i8> %y to <16 x i16>
+  %z2 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %yy)
+  %z = add i16 %z1, %z2
+  ret i16 %z
+}
+
+define signext i16 @add_pair_v16i8_v16i16_sext(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: add_pair_v16i8_v16i16_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v2.8h, v0.8b, #0
+; CHECK-NEXT:    sshll v3.8h, v1.8b, #0
+; CHECK-NEXT:    saddw2 v0.8h, v2.8h, v0.16b
+; CHECK-NEXT:    saddw2 v1.8h, v3.8h, v1.16b
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    addv h1, v1.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    sxth w0, w8
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i16>
+  %z1 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %xx)
+  %yy = sext <16 x i8> %y to <16 x i16>
+  %z2 = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %yy)
+  %z = add i16 %z1, %z2
+  ret i16 %z
+}
+
+define zeroext i16 @add_pair_v8i8_v8i16_zext(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: add_pair_v8i8_v8i16_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    addv h1, v1.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    and w0, w8, #0xffff
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i16>
+  %z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
+  %yy = zext <8 x i8> %y to <8 x i16>
+  %z2 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %yy)
+  %z = add i16 %z1, %z2
+  ret i16 %z
+}
+
+define signext i16 @add_pair_v8i8_v8i16_sext(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: add_pair_v8i8_v8i16_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-NEXT:    addv h0, v0.8h
+; CHECK-NEXT:    addv h1, v1.8h
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    sxth w0, w8
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i16>
+  %z1 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %xx)
+  %yy = sext <8 x i8> %y to <8 x i16>
+  %z2 = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %yy)
+  %z = add i16 %z1, %z2
+  ret i16 %z
+}
+
+define zeroext i8 @add_pair_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: add_pair_v16i8_v16i8:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    addv b0, v0.16b
+; CHECK-NEXT:    addv b1, v1.16b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    fmov w9, s1
+; CHECK-NEXT:    add w8, w8, w9
+; CHECK-NEXT:    and w0, w8, #0xff
+; CHECK-NEXT:    ret
+entry:
+  %z1 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %x)
+  %z2 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %y)
+  %z = add i8 %z1, %z2
+  ret i8 %z
+}
+
+define i64 @add_pair_v16i8_v16i64_zext(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: add_pair_v16i8_v16i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll2 v2.8h, v0.16b, #0
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll2 v3.8h, v1.16b, #0
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    ushll2 v4.4s, v2.8h, #0
+; CHECK-NEXT:    ushll v2.4s, v2.4h, #0
+; CHECK-NEXT:    ushll2 v5.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v6.4s, v3.4h, #0
+; CHECK-NEXT:    ushll v7.4s, v1.4h, #0
+; CHECK-NEXT:    ushll2 v3.4s, v3.8h, #0
+; CHECK-NEXT:    ushll2 v1.4s, v1.8h, #0
+; CHECK-NEXT:    uaddl2 v16.2d, v5.4s, v4.4s
+; CHECK-NEXT:    uaddl v4.2d, v5.2s, v4.2s
+; CHECK-NEXT:    uaddl2 v5.2d, v0.4s, v2.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v2.2s
+; CHECK-NEXT:    uaddl2 v2.2d, v1.4s, v3.4s
+; CHECK-NEXT:    uaddl v1.2d, v1.2s, v3.2s
+; CHECK-NEXT:    uaddl2 v3.2d, v7.4s, v6.4s
+; CHECK-NEXT:    uaddl v6.2d, v7.2s, v6.2s
+; CHECK-NEXT:    add v5.2d, v5.2d, v16.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v4.2d
+; CHECK-NEXT:    add v2.2d, v3.2d, v2.2d
+; CHECK-NEXT:    add v1.2d, v6.2d, v1.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v5.2d
+; CHECK-NEXT:    add v1.2d, v1.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <16 x i8> %x to <16 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
+  %yy = zext <16 x i8> %y to <16 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v16i8_v16i64_sext(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: add_pair_v16i8_v16i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll2 v2.8h, v0.16b, #0
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll2 v3.8h, v1.16b, #0
+; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-NEXT:    sshll2 v4.4s, v2.8h, #0
+; CHECK-NEXT:    sshll v2.4s, v2.4h, #0
+; CHECK-NEXT:    sshll2 v5.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sshll v6.4s, v3.4h, #0
+; CHECK-NEXT:    sshll v7.4s, v1.4h, #0
+; CHECK-NEXT:    sshll2 v3.4s, v3.8h, #0
+; CHECK-NEXT:    sshll2 v1.4s, v1.8h, #0
+; CHECK-NEXT:    saddl2 v16.2d, v5.4s, v4.4s
+; CHECK-NEXT:    saddl v4.2d, v5.2s, v4.2s
+; CHECK-NEXT:    saddl2 v5.2d, v0.4s, v2.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v2.2s
+; CHECK-NEXT:    saddl2 v2.2d, v1.4s, v3.4s
+; CHECK-NEXT:    saddl v1.2d, v1.2s, v3.2s
+; CHECK-NEXT:    saddl2 v3.2d, v7.4s, v6.4s
+; CHECK-NEXT:    saddl v6.2d, v7.2s, v6.2s
+; CHECK-NEXT:    add v5.2d, v5.2d, v16.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v4.2d
+; CHECK-NEXT:    add v2.2d, v3.2d, v2.2d
+; CHECK-NEXT:    add v1.2d, v6.2d, v1.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v5.2d
+; CHECK-NEXT:    add v1.2d, v1.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <16 x i8> %x to <16 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %xx)
+  %yy = sext <16 x i8> %y to <16 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v8i8_v8i64_zext(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: add_pair_v8i8_v8i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
+; CHECK-NEXT:    ushll v1.8h, v1.8b, #0
+; CHECK-NEXT:    ushll2 v2.4s, v0.8h, #0
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll2 v3.4s, v1.8h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    uaddl2 v4.2d, v0.4s, v2.4s
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v2.2s
+; CHECK-NEXT:    uaddl2 v2.2d, v1.4s, v3.4s
+; CHECK-NEXT:    uaddl v1.2d, v1.2s, v3.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v4.2d
+; CHECK-NEXT:    add v1.2d, v1.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <8 x i8> %x to <8 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %yy = zext <8 x i8> %y to <8 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v8i8_v8i64_sext(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: add_pair_v8i8_v8i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
+; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
+; CHECK-NEXT:    sshll2 v2.4s, v0.8h, #0
+; CHECK-NEXT:    sshll v0.4s, v0.4h, #0
+; CHECK-NEXT:    sshll2 v3.4s, v1.8h, #0
+; CHECK-NEXT:    sshll v1.4s, v1.4h, #0
+; CHECK-NEXT:    saddl2 v4.2d, v0.4s, v2.4s
+; CHECK-NEXT:    saddl v0.2d, v0.2s, v2.2s
+; CHECK-NEXT:    saddl2 v2.2d, v1.4s, v3.4s
+; CHECK-NEXT:    saddl v1.2d, v1.2s, v3.2s
+; CHECK-NEXT:    add v0.2d, v0.2d, v4.2d
+; CHECK-NEXT:    add v1.2d, v1.2d, v2.2d
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <8 x i8> %x to <8 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %xx)
+  %yy = sext <8 x i8> %y to <8 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v4i8_v4i64_zext(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: add_pair_v4i8_v4i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    bic v1.4h, #255, lsl #8
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    ushll v2.2d, v0.2s, #0
+; CHECK-NEXT:    ushll v3.2d, v1.2s, #0
+; CHECK-NEXT:    uaddw2 v0.2d, v2.2d, v0.4s
+; CHECK-NEXT:    uaddw2 v1.2d, v3.2d, v1.4s
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <4 x i8> %x to <4 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %yy = zext <4 x i8> %y to <4 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: add_pair_v4i8_v4i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.4s, v0.4h, #0
+; CHECK-NEXT:    ushll v1.4s, v1.4h, #0
+; CHECK-NEXT:    ushll v2.2d, v0.2s, #0
+; CHECK-NEXT:    ushll v3.2d, v1.2s, #0
+; CHECK-NEXT:    ushll2 v0.2d, v0.4s, #0
+; CHECK-NEXT:    ushll2 v1.2d, v1.4s, #0
+; CHECK-NEXT:    shl v2.2d, v2.2d, #56
+; CHECK-NEXT:    shl v3.2d, v3.2d, #56
+; CHECK-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-NEXT:    shl v1.2d, v1.2d, #56
+; CHECK-NEXT:    sshr v2.2d, v2.2d, #56
+; CHECK-NEXT:    sshr v3.2d, v3.2d, #56
+; CHECK-NEXT:    ssra v2.2d, v0.2d, #56
+; CHECK-NEXT:    ssra v3.2d, v1.2d, #56
+; CHECK-NEXT:    add v0.2d, v2.2d, v3.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <4 x i8> %x to <4 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %xx)
+  %yy = sext <4 x i8> %y to <4 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i8_v2i64_zext(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: add_pair_v2i8_v2i64_zext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d2, #0x0000ff000000ff
+; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
+; CHECK-NEXT:    and v1.8b, v1.8b, v2.8b
+; CHECK-NEXT:    uaddl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = zext <2 x i8> %x to <2 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %yy = zext <2 x i8> %y to <2 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i8_v2i64_sext(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: add_pair_v2i8_v2i64_sext:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
+; CHECK-NEXT:    ushll v1.2d, v1.2s, #0
+; CHECK-NEXT:    shl v0.2d, v0.2d, #56
+; CHECK-NEXT:    shl v1.2d, v1.2d, #56
+; CHECK-NEXT:    sshr v0.2d, v0.2d, #56
+; CHECK-NEXT:    ssra v0.2d, v1.2d, #56
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %xx = sext <2 x i8> %x to <2 x i64>
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %xx)
+  %yy = sext <2 x i8> %y to <2 x i64>
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %yy)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+define i64 @add_pair_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: add_pair_v2i64_v2i64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    addp d0, v0.2d
+; CHECK-NEXT:    fmov x0, d0
+; CHECK-NEXT:    ret
+entry:
+  %z1 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %x)
+  %z2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %y)
+  %z = add i64 %z1, %z2
+  ret i64 %z
+}
+
+declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
+declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
+declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
+declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
+declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
+declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
+declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
+declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
+declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
+declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)

From 970a191203e6d3d34c873beb64c333f2890b2025 Mon Sep 17 00:00:00 2001
From: eopXD <eop.chen@sifive.com>
Date: Fri, 21 Jan 2022 11:29:10 -0800
Subject: [PATCH 586/946] [Clang][RISCV] Guard vmulh, vsmul correctly

According to v-spec 1.0, `vmulh`, `vmulhu`, `vmulhsu` and `vsmul` are
NOT supported for EEW=64 in Zve64*.

This patch tries to guard it correctly.

Authored by: Craig Topper <craig.topper@sifive.com> @craig.topper
Co-Authored by: Eop Chen <eop.chen@sifive.com> @eopXD

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D117913
---
 clang/include/clang/Basic/riscv_vector.td     |   4 +
 .../rvv-intrinsics-overloaded/vmul-eew64.c    | 440 ++++++++++++++++++
 .../RISCV/rvv-intrinsics-overloaded/vmul.c    | 434 +----------------
 .../rvv-intrinsics-overloaded/vsmul-eew64.c   | 159 +++++++
 .../RISCV/rvv-intrinsics-overloaded/vsmul.c   | 154 +-----
 .../CodeGen/RISCV/rvv-intrinsics/vmul-eew64.c | 440 ++++++++++++++++++
 .../test/CodeGen/RISCV/rvv-intrinsics/vmul.c  | 434 +----------------
 .../RISCV/rvv-intrinsics/vsmul-eew64.c        | 159 +++++++
 .../test/CodeGen/RISCV/rvv-intrinsics/vsmul.c | 154 +-----
 clang/utils/TableGen/RISCVVEmitter.cpp        |  18 +-
 10 files changed, 1219 insertions(+), 1177 deletions(-)
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul-eew64.c
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul-eew64.c
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics/vmul-eew64.c
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul-eew64.c

diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index bf268d89d19e1..7e9f610a0b186 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -1668,11 +1668,13 @@ defm vmax : RVVSignedBinBuiltinSet;
 
 // 12.10. Vector Single-Width Integer Multiply Instructions
 defm vmul : RVVIntBinBuiltinSet;
+let RequiredFeatures = ["FullMultiply"] in {
 defm vmulh : RVVSignedBinBuiltinSet;
 defm vmulhu : RVVUnsignedBinBuiltinSet;
 defm vmulhsu : RVVOutOp1BuiltinSet<"vmulhsu", "csil",
                                    [["vv", "v", "vvUv"],
                                     ["vx", "v", "vvUe"]]>;
+}
 
 // 12.11. Vector Integer Divide Instructions
 defm vdivu : RVVUnsignedBinBuiltinSet;
@@ -1759,7 +1761,9 @@ defm vasubu : RVVUnsignedBinBuiltinSet;
 defm vasub : RVVSignedBinBuiltinSet;
 
 // 13.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+let RequiredFeatures = ["FullMultiply"] in {
 defm vsmul : RVVSignedBinBuiltinSet;
+}
 
 // 13.4. Vector Single-Width Scaling Shift Instructions
 defm vssrl : RVVUnsignedShiftBuiltinSet;
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul-eew64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul-eew64.c
new file mode 100644
index 0000000000000..a69e943a6a2bc
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul-eew64.c
@@ -0,0 +1,440 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// NOTE: This test file contains eew=64 of vmulh, vmulhu, vmulhsu.
+// NOTE: The purpose of separating these 3 instructions from vmul.c is that
+// eew=64 versions only enable when V extension is specified. (Not for zve)
+
+#include <riscv_vector.h>
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulh_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulh_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulh_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulh_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulh_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulh_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulh_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulh_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
+  return vmulh(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vv_u64m1(vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vx_u64m1(vuint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vv_u64m2(vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vx_u64m2(vuint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vv_u64m4(vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vx_u64m4(vuint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vv_u64m8(vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vx_u64m8(vuint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vv_i64m1(vint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vx_i64m1(vint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vv_i64m2(vint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vx_i64m2(vint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vv_i64m4(vint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vx_i64m4(vint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vv_i64m8(vint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vx_i64m8(vint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulh_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulh_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, int64_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulh_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulh_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, int64_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulh_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulh_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, int64_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulh_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulh_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, int64_t op2, size_t vl) {
+  return vmulh(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vv_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vx_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vv_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vx_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vv_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vx_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vv_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vx_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu(mask, maskedoff, op1, op2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c
index 7d633ecde4265..b08b304c7ef5e 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
@@ -1120,78 +1120,6 @@ vint32m8_t test_vmulh_vx_i32m8(vint32m8_t op1, int32_t op2, size_t vl) {
   return vmulh(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulh_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulh_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulh_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulh_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulh_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulh_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulh_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulh_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
-  return vmulh(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhu_vv_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmulhu.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[OP1:%.*]], <vscale x 1 x i8> [[OP2:%.*]], i64 [[VL:%.*]])
@@ -1516,78 +1444,6 @@ vuint32m8_t test_vmulhu_vx_u32m8(vuint32m8_t op1, uint32_t op2, size_t vl) {
   return vmulhu(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vv_u64m1(vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vx_u64m1(vuint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vv_u64m2(vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vx_u64m2(vuint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vv_u64m4(vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vx_u64m4(vuint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vv_u64m8(vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vx_u64m8(vuint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhsu_vv_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmulhsu.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[OP1:%.*]], <vscale x 1 x i8> [[OP2:%.*]], i64 [[VL:%.*]])
@@ -1912,78 +1768,6 @@ vint32m8_t test_vmulhsu_vx_i32m8(vint32m8_t op1, uint32_t op2, size_t vl) {
   return vmulhsu(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vv_i64m1(vint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vx_i64m1(vint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vv_i64m2(vint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vx_i64m2(vint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vv_i64m4(vint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vx_i64m4(vint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vv_i64m8(vint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vx_i64m8(vint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmul_vv_i8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmul.mask.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[OP1:%.*]], <vscale x 1 x i8> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -3100,78 +2884,6 @@ vint32m8_t test_vmulh_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8_t
   return vmulh(mask, maskedoff, op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulh_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulh_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, int64_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulh_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulh_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, int64_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulh_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulh_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, int64_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulh_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulh_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, int64_t op2, size_t vl) {
-  return vmulh(mask, maskedoff, op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhu_vv_u8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmulhu.mask.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[OP1:%.*]], <vscale x 1 x i8> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -3496,78 +3208,6 @@ vuint32m8_t test_vmulhu_vx_u32m8_m(vbool4_t mask, vuint32m8_t maskedoff, vuint32
   return vmulhu(mask, maskedoff, op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vv_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vx_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vv_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vx_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vv_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vx_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vv_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vx_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhu(mask, maskedoff, op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhsu_vv_i8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmulhsu.mask.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[OP1:%.*]], <vscale x 1 x i8> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -3891,75 +3531,3 @@ vint32m8_t test_vmulhsu_vv_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8
 vint32m8_t test_vmulhsu_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8_t op1, uint32_t op2, size_t vl) {
   return vmulhsu(mask, maskedoff, op1, op2, vl);
 }
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu(mask, maskedoff, op1, op2, vl);
-}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul-eew64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul-eew64.c
new file mode 100644
index 0000000000000..dc05b51b2a6aa
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul-eew64.c
@@ -0,0 +1,159 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// NOTE: The purpose of separating these 3 instructions from vsmul.c is that
+// eew=64 versions only enable when V extension is specified. (Not for zve)
+
+#include <riscv_vector.h>
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vsmul_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vsmul_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vsmul_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vsmul_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vsmul_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vsmul_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vsmul_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vsmul_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
+  return vsmul(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vsmul_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
+                                 vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vsmul_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
+                                 vint64m1_t op1, int64_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vsmul_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
+                                 vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vsmul_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
+                                 vint64m2_t op1, int64_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vsmul_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
+                                 vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vsmul_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
+                                 vint64m4_t op1, int64_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vsmul_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
+                                 vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vsmul_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
+                                 vint64m8_t op1, int64_t op2, size_t vl) {
+  return vsmul(mask, maskedoff, op1, op2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c
index 6c1479515c36f..3905826cb0d80 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vsmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
@@ -328,78 +328,6 @@ vint32m8_t test_vsmul_vx_i32m8(vint32m8_t op1, int32_t op2, size_t vl) {
   return vsmul(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vsmul_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vsmul_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vsmul_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vsmul_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vsmul_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vsmul_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vsmul_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vsmul_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
-  return vsmul(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vsmul_vv_i8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vsmul.mask.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[OP1:%.*]], <vscale x 1 x i8> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -762,83 +690,3 @@ vint32m8_t test_vsmul_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff,
                                  vint32m8_t op1, int32_t op2, size_t vl) {
   return vsmul(mask, maskedoff, op1, op2, vl);
 }
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vsmul_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
-                                 vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vsmul_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
-                                 vint64m1_t op1, int64_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vsmul_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
-                                 vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vsmul_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
-                                 vint64m2_t op1, int64_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vsmul_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
-                                 vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vsmul_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
-                                 vint64m4_t op1, int64_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vsmul_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
-                                 vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vsmul_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
-                                 vint64m8_t op1, int64_t op2, size_t vl) {
-  return vsmul(mask, maskedoff, op1, op2, vl);
-}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul-eew64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul-eew64.c
new file mode 100644
index 0000000000000..97686762db666
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul-eew64.c
@@ -0,0 +1,440 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// NOTE: This test file contains eew=64 of vmulh, vmulhu, vmulhsu.
+// NOTE: The purpose of separating these 3 instructions from vmul.c is that
+// eew=64 versions only enable when V extension is specified. (Not for zve)
+
+#include <riscv_vector.h>
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulh_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vmulh_vv_i64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulh_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulh_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vmulh_vv_i64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulh_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulh_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vmulh_vv_i64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulh_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulh_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vmulh_vv_i64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulh_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vv_u64m1(vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhu_vv_u64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vx_u64m1(vuint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vv_u64m2(vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhu_vv_u64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vx_u64m2(vuint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vv_u64m4(vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhu_vv_u64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vx_u64m4(vuint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vv_u64m8(vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhu_vv_u64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vx_u64m8(vuint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vv_i64m1(vint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhsu_vv_i64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vx_i64m1(vint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vv_i64m2(vint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhsu_vv_i64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vx_i64m2(vint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vv_i64m4(vint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhsu_vv_i64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vx_i64m4(vint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vv_i64m8(vint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhsu_vv_i64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vx_i64m8(vint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulh_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vmulh_vv_i64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulh_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulh_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vmulh_vv_i64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulh_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulh_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vmulh_vv_i64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulh_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulh_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vmulh_vv_i64m8_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulh_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, int64_t op2, size_t vl) {
+  return vmulh_vx_i64m8_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vv_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhu_vv_u64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vuint64m1_t test_vmulhu_vx_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vv_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhu_vv_u64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vuint64m2_t test_vmulhu_vx_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vv_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhu_vv_u64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vuint64m4_t test_vmulhu_vx_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vv_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhu_vv_u64m8_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vuint64m8_t test_vmulhu_vx_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhu_vx_u64m8_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vuint64m1_t op2, size_t vl) {
+  return vmulhsu_vv_i64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vmulhsu_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vuint64m2_t op2, size_t vl) {
+  return vmulhsu_vv_i64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vmulhsu_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vuint64m4_t op2, size_t vl) {
+  return vmulhsu_vv_i64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vmulhsu_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vuint64m8_t op2, size_t vl) {
+  return vmulhsu_vv_i64m8_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vmulhsu_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, uint64_t op2, size_t vl) {
+  return vmulhsu_vx_i64m8_m(mask, maskedoff, op1, op2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c
index 5d7f7e504290e..c2c4522ca174b 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
@@ -1120,78 +1120,6 @@ vint32m8_t test_vmulh_vx_i32m8(vint32m8_t op1, int32_t op2, size_t vl) {
   return vmulh_vx_i32m8(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulh_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vmulh_vv_i64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulh_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulh_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vmulh_vv_i64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulh_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulh_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vmulh_vv_i64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulh_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulh_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vmulh_vv_i64m8(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulh_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m8(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhu_vv_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmulhu.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[OP1:%.*]], <vscale x 1 x i8> [[OP2:%.*]], i64 [[VL:%.*]])
@@ -1516,78 +1444,6 @@ vuint32m8_t test_vmulhu_vx_u32m8(vuint32m8_t op1, uint32_t op2, size_t vl) {
   return vmulhu_vx_u32m8(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vv_u64m1(vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhu_vv_u64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vx_u64m1(vuint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vv_u64m2(vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhu_vv_u64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vx_u64m2(vuint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vv_u64m4(vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhu_vv_u64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vx_u64m4(vuint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vv_u64m8(vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhu_vv_u64m8(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vx_u64m8(vuint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m8(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhsu_vv_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmulhsu.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[OP1:%.*]], <vscale x 1 x i8> [[OP2:%.*]], i64 [[VL:%.*]])
@@ -1912,78 +1768,6 @@ vint32m8_t test_vmulhsu_vx_i32m8(vint32m8_t op1, uint32_t op2, size_t vl) {
   return vmulhsu_vx_i32m8(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vv_i64m1(vint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhsu_vv_i64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vx_i64m1(vint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vv_i64m2(vint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhsu_vv_i64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vx_i64m2(vint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vv_i64m4(vint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhsu_vv_i64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vx_i64m4(vint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vv_i64m8(vint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhsu_vv_i64m8(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vx_i64m8(vint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m8(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmul_vv_i8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmul.mask.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[OP1:%.*]], <vscale x 1 x i8> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -3100,78 +2884,6 @@ vint32m8_t test_vmulh_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8_t
   return vmulh_vx_i32m8_m(mask, maskedoff, op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulh_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vmulh_vv_i64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulh.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulh_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulh_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vmulh_vv_i64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulh.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulh_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulh_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vmulh_vv_i64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulh.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulh_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vv_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulh_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vmulh_vv_i64m8_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulh_vx_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulh.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulh_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, int64_t op2, size_t vl) {
-  return vmulh_vx_i64m8_m(mask, maskedoff, op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhu_vv_u8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmulhu.mask.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[OP1:%.*]], <vscale x 1 x i8> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -3496,78 +3208,6 @@ vuint32m8_t test_vmulhu_vx_u32m8_m(vbool4_t mask, vuint32m8_t maskedoff, vuint32
   return vmulhu_vx_u32m8_m(mask, maskedoff, op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vv_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhu_vv_u64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhu.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vuint64m1_t test_vmulhu_vx_u64m1_m(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vv_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhu_vv_u64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhu.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vuint64m2_t test_vmulhu_vx_u64m2_m(vbool32_t mask, vuint64m2_t maskedoff, vuint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vv_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhu_vv_u64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhu.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vuint64m4_t test_vmulhu_vx_u64m4_m(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vv_u64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vv_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhu_vv_u64m8_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhu_vx_u64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhu.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vuint64m8_t test_vmulhu_vx_u64m8_m(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhu_vx_u64m8_m(mask, maskedoff, op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vmulhsu_vv_i8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vmulhsu.mask.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[OP1:%.*]], <vscale x 1 x i8> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -3891,75 +3531,3 @@ vint32m8_t test_vmulhsu_vv_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8
 vint32m8_t test_vmulhsu_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff, vint32m8_t op1, uint32_t op2, size_t vl) {
   return vmulhsu_vx_i32m8_m(mask, maskedoff, op1, op2, vl);
 }
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, vuint64m1_t op2, size_t vl) {
-  return vmulhsu_vv_i64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vmulhsu.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vmulhsu_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff, vint64m1_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, vuint64m2_t op2, size_t vl) {
-  return vmulhsu_vv_i64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vmulhsu.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vmulhsu_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff, vint64m2_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, vuint64m4_t op2, size_t vl) {
-  return vmulhsu_vv_i64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vmulhsu.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vmulhsu_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff, vint64m4_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vv_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, vuint64m8_t op2, size_t vl) {
-  return vmulhsu_vv_i64m8_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vmulhsu_vx_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vmulhsu.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vmulhsu_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff, vint64m8_t op1, uint64_t op2, size_t vl) {
-  return vmulhsu_vx_i64m8_m(mask, maskedoff, op1, op2, vl);
-}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul-eew64.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul-eew64.c
new file mode 100644
index 0000000000000..43983b15f18d5
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul-eew64.c
@@ -0,0 +1,159 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// NOTE: The purpose of separating these 3 instructions from vsmul.c is that
+// eew=64 versions only enable when V extension is specified. (Not for zve)
+
+#include <riscv_vector.h>
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vsmul_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vsmul_vv_i64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vsmul_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m1(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vsmul_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vsmul_vv_i64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vsmul_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m2(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vsmul_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vsmul_vv_i64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vsmul_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m4(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vsmul_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vsmul_vv_i64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vsmul_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m8(op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vsmul_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
+                                 vint64m1_t op1, vint64m1_t op2, size_t vl) {
+  return vsmul_vv_i64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t test_vsmul_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
+                                 vint64m1_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m1_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vsmul_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
+                                 vint64m2_t op1, vint64m2_t op2, size_t vl) {
+  return vsmul_vv_i64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+vint64m2_t test_vsmul_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
+                                 vint64m2_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m2_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vsmul_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
+                                 vint64m4_t op1, vint64m4_t op2, size_t vl) {
+  return vsmul_vv_i64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
+//
+vint64m4_t test_vsmul_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
+                                 vint64m4_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m4_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vsmul_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
+                                 vint64m8_t op1, vint64m8_t op2, size_t vl) {
+  return vsmul_vv_i64m8_m(mask, maskedoff, op1, op2, vl);
+}
+
+// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8_m(
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
+//
+vint64m8_t test_vsmul_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
+                                 vint64m8_t op1, int64_t op2, size_t vl) {
+  return vsmul_vx_i64m8_m(mask, maskedoff, op1, op2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c
index dd0ba266080a1..e1db9d589ec65 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vsmul.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
+// RUN: %clang_cc1 -triple riscv64 -target-feature +zve64x -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
 
 #include <riscv_vector.h>
 
@@ -328,78 +328,6 @@ vint32m8_t test_vsmul_vx_i32m8(vint32m8_t op1, int32_t op2, size_t vl) {
   return vsmul_vx_i32m8(op1, op2, vl);
 }
 
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vsmul_vv_i64m1(vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vsmul_vv_i64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.nxv1i64.i64.i64(<vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vsmul_vx_i64m1(vint64m1_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m1(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vsmul_vv_i64m2(vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vsmul_vv_i64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.nxv2i64.i64.i64(<vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vsmul_vx_i64m2(vint64m2_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m2(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vsmul_vv_i64m4(vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vsmul_vv_i64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.nxv4i64.i64.i64(<vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vsmul_vx_i64m4(vint64m4_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m4(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vsmul_vv_i64m8(vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vsmul_vv_i64m8(op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.nxv8i64.i64.i64(<vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], i64 [[VL:%.*]])
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vsmul_vx_i64m8(vint64m8_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m8(op1, op2, vl);
-}
-
 // CHECK-RV64-LABEL: @test_vsmul_vv_i8mf8_m(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vsmul.mask.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> [[MASKEDOFF:%.*]], <vscale x 1 x i8> [[OP1:%.*]], <vscale x 1 x i8> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
@@ -762,83 +690,3 @@ vint32m8_t test_vsmul_vx_i32m8_m(vbool4_t mask, vint32m8_t maskedoff,
                                  vint32m8_t op1, int32_t op2, size_t vl) {
   return vsmul_vx_i32m8_m(mask, maskedoff, op1, op2, vl);
 }
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.mask.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], <vscale x 1 x i64> [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vsmul_vv_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
-                                 vint64m1_t op1, vint64m1_t op2, size_t vl) {
-  return vsmul_vv_i64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m1_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vsmul.mask.nxv1i64.i64.i64(<vscale x 1 x i64> [[MASKEDOFF:%.*]], <vscale x 1 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 1 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
-//
-vint64m1_t test_vsmul_vx_i64m1_m(vbool64_t mask, vint64m1_t maskedoff,
-                                 vint64m1_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m1_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.mask.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vsmul_vv_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
-                                 vint64m2_t op1, vint64m2_t op2, size_t vl) {
-  return vsmul_vv_i64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m2_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vsmul.mask.nxv2i64.i64.i64(<vscale x 2 x i64> [[MASKEDOFF:%.*]], <vscale x 2 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 2 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
-//
-vint64m2_t test_vsmul_vx_i64m2_m(vbool32_t mask, vint64m2_t maskedoff,
-                                 vint64m2_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m2_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.mask.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], <vscale x 4 x i64> [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vsmul_vv_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
-                                 vint64m4_t op1, vint64m4_t op2, size_t vl) {
-  return vsmul_vv_i64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m4_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vsmul.mask.nxv4i64.i64.i64(<vscale x 4 x i64> [[MASKEDOFF:%.*]], <vscale x 4 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 4 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP0]]
-//
-vint64m4_t test_vsmul_vx_i64m4_m(vbool16_t mask, vint64m4_t maskedoff,
-                                 vint64m4_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m4_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vv_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], <vscale x 8 x i64> [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vsmul_vv_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
-                                 vint64m8_t op1, vint64m8_t op2, size_t vl) {
-  return vsmul_vv_i64m8_m(mask, maskedoff, op1, op2, vl);
-}
-
-// CHECK-RV64-LABEL: @test_vsmul_vx_i64m8_m(
-// CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vsmul.mask.nxv8i64.i64.i64(<vscale x 8 x i64> [[MASKEDOFF:%.*]], <vscale x 8 x i64> [[OP1:%.*]], i64 [[OP2:%.*]], <vscale x 8 x i1> [[MASK:%.*]], i64 [[VL:%.*]], i64 0)
-// CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP0]]
-//
-vint64m8_t test_vsmul_vx_i64m8_m(vbool8_t mask, vint64m8_t maskedoff,
-                                 vint64m8_t op1, int64_t op2, size_t vl) {
-  return vsmul_vx_i64m8_m(mask, maskedoff, op1, op2, vl);
-}
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index 67d946d73e419..4b80d6da72fa8 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -141,11 +141,12 @@ using RISCVPredefinedMacroT = uint8_t;
 
 enum RISCVPredefinedMacro : RISCVPredefinedMacroT {
   Basic = 0,
-  Zfh = 1 << 1,
-  RV64 = 1 << 2,
-  VectorMaxELen64 = 1 << 3,
-  VectorMaxELenFp32 = 1 << 4,
-  VectorMaxELenFp64 = 1 << 5,
+  V = 1 << 1,
+  Zfh = 1 << 2,
+  RV64 = 1 << 3,
+  VectorMaxELen64 = 1 << 4,
+  VectorMaxELenFp32 = 1 << 5,
+  VectorMaxELenFp64 = 1 << 6,
 };
 
 // TODO refactor RVVIntrinsic class design after support all intrinsic
@@ -808,6 +809,11 @@ RVVIntrinsic::RVVIntrinsic(StringRef NewName, StringRef Suffix,
   for (auto Feature : RequiredFeatures) {
     if (Feature == "RV64")
       RISCVPredefinedMacros |= RISCVPredefinedMacro::RV64;
+    // Note: Full multiply instruction (mulh, mulhu, mulhsu, smul) for EEW=64
+    // require V.
+    if (Feature == "FullMultiply" &&
+        (RISCVPredefinedMacros & RISCVPredefinedMacro::VectorMaxELen64))
+      RISCVPredefinedMacros |= RISCVPredefinedMacro::V;
   }
 
   // Init OutputType and InputTypes
@@ -1314,6 +1320,8 @@ bool RVVEmitter::emitMacroRestrictionStr(RISCVPredefinedMacroT PredefinedMacros,
     return false;
   OS << "#if ";
   ListSeparator LS(" && ");
+  if (PredefinedMacros & RISCVPredefinedMacro::V)
+    OS << LS << "defined(__riscv_v)";
   if (PredefinedMacros & RISCVPredefinedMacro::Zfh)
     OS << LS << "defined(__riscv_zfh)";
   if (PredefinedMacros & RISCVPredefinedMacro::RV64)

From ee522345ae80c78f93ef5703b380f8496642c8db Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 25 Jan 2022 19:23:48 +0100
Subject: [PATCH 587/946] [libc++][doc] Update format implementation status.

---
 libcxx/docs/Status/FormatIssues.csv |  2 +-
 libcxx/docs/Status/FormatPaper.csv  | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/libcxx/docs/Status/FormatIssues.csv b/libcxx/docs/Status/FormatIssues.csv
index b979b0de3a18d..473aba8ea2050 100644
--- a/libcxx/docs/Status/FormatIssues.csv
+++ b/libcxx/docs/Status/FormatIssues.csv
@@ -1,6 +1,6 @@
 Number,Name,Assignee,Patch,Status,First released version
 `P0645 <https://wg21.link/P0645>`_,"Text Formatting",Mark de Wever,,|Complete|,Clang 14
-`P1652 <https://wg21.link/P1652>`_,"Printf corner cases in std::format",Mark de Wever,"`D103433 <https://reviews.llvm.org/D103433>`__, `D114001 <https://reviews.llvm.org/D114001>`__",|Review|,
+`P1652 <https://wg21.link/P1652>`_,"Printf corner cases in std::format",Mark de Wever,"`D103433 <https://reviews.llvm.org/D103433>`__, `D114001 <https://reviews.llvm.org/D114001>`__",|Complete|,Clang 14
 `P1892 <https://wg21.link/P1892>`_,"Extended locale-specific presentation specifiers for std::format",Mark de Wever,`D103368 <https://reviews.llvm.org/D103368>`__,|Complete|,Clang 14
 `P1868 <https://wg21.link/P1868>`_,"width: clarifying units of width and precision in std::format (Implements the unicode support.)",Mark de Wever,"`D103413 <https://reviews.llvm.org/D103413>`__ `D103425 <https://reviews.llvm.org/D103425>`__ `D103670 <https://reviews.llvm.org/D103670>`__",|Complete|,Clang 14
 `P2216 <https://wg21.link/P2216>`_,"std::format improvements",Mark de Wever,,|In Progress|,
diff --git a/libcxx/docs/Status/FormatPaper.csv b/libcxx/docs/Status/FormatPaper.csv
index f74ec44e6f607..e45777ef1cd77 100644
--- a/libcxx/docs/Status/FormatPaper.csv
+++ b/libcxx/docs/Status/FormatPaper.csv
@@ -6,21 +6,21 @@ Section,Description,Dependencies,Assignee,Patch,Status,First released version
 `[format.context] <https://wg21.link/format.context>`_,"Class template basic_format_context",,Mark de Wever,`D103357 <https://llvm.org/D103357>`__,|Complete|,Clang 14
 `[format.args] <https://wg21.link/format.args>`_,"Class template basic_format_args",,Mark de Wever,`D103357 <https://llvm.org/D103357>`__,|Complete|,Clang 14
 `[format.arg] <https://wg21.link/format.arg>`_,"Class template basic_format_arg",,Mark de Wever,`D103357 <https://llvm.org/D103357>`__,|Complete|,Clang 14
-`[format.arg] <https://wg21.link/format.arg>`_,"Class template basic_format_arg - handle",,Unassigned,,|Not Started|,
-`[format.arg] <https://wg21.link/format.arg>`_,"Class template basic_format_arg - pointers",,Mark de Wever,,|In Progress|,
+`[format.arg] <https://wg21.link/format.arg>`_,"Class template basic_format_arg - handle",,Mark de Wever,,|Complete|,Clang 14
+`[format.arg] <https://wg21.link/format.arg>`_,"Class template basic_format_arg - pointers",,Mark de Wever,,|Complete|,Clang 14
 `[format.arg.store] <https://wg21.link/format.arg.store>`_,"Class template format-arg-store",,Mark de Wever,`D103357 <https://llvm.org/D103357>`__,|Complete|,Clang 14
 `[format.formatter.spec] <https://wg21.link/format.formatter.spec>`_,"Formatter specializations - character types",,Mark de Wever,"`D96664 <https://llvm.org/D96664>`__ `D103466 <https://llvm.org/D103466>`__",|Complete|,Clang 14
 `[format.formatter.spec] <https://wg21.link/format.formatter.spec>`_,"Formatter specializations - string types",,Mark de Wever,"`D96664 <https://llvm.org/D96664>`__ `D103425 <https://reviews.llvm.org/D103425>`__",|Complete|,Clang 14
 `[format.formatter.spec] <https://wg21.link/format.formatter.spec>`_,"Formatter specializations - boolean type",,Mark de Wever,"`D96664 <https://llvm.org/D96664>`__ `D103670 <https://reviews.llvm.org/D103670>`__",|Complete|,Clang 14
 `[format.formatter.spec] <https://wg21.link/format.formatter.spec>`_,"Formatter specializations - integral types",,Mark de Wever,"`D96664 <https://llvm.org/D96664>`__ `D103433 <https://reviews.llvm.org/D103433>`__",|Complete|,Clang 14
-`[format.formatter.spec] <https://wg21.link/format.formatter.spec>`_,"Formatter specializations - floating-point types",`D70631 <https://llvm.org/D70631>`__,Mark de Wever,`D114001 <https://reviews.llvm.org/D114001>`__,|Review|,
-`[format.formatter.spec] <https://wg21.link/format.formatter.spec>`_,"Formatter specializations - pointer types",,Mark de Wever,,|In Progress|,
+`[format.formatter.spec] <https://wg21.link/format.formatter.spec>`_,"Formatter specializations - floating-point types",`D70631 <https://llvm.org/D70631>`__,Mark de Wever,`D114001 <https://reviews.llvm.org/D114001>`__,|Complete|,Clang 14
+`[format.formatter.spec] <https://wg21.link/format.formatter.spec>`_,"Formatter specializations - pointer types",,Mark de Wever,,|Complete|,Clang 14
 `[format.string.std] <https://wg21.link/format.string.std>`_,"Standard format specifiers - character types",,Mark de Wever,`D103368 <https://reviews.llvm.org/D103368>`__,|Complete|,Clang 14
 `[format.string.std] <https://wg21.link/format.string.std>`_,"Standard format specifiers - string types",`D103379 <https://reviews.llvm.org/D103379>`__,Mark de Wever,"`D103368 <https://reviews.llvm.org/D103368>`__ `D103413 <https://reviews.llvm.org/D103413>`__",|Complete|,Clang 14
 `[format.string.std] <https://wg21.link/format.string.std>`_,"Standard format specifiers - boolean type",`D103379 <https://reviews.llvm.org/D103379>`__,Mark de Wever,"`D103368 <https://reviews.llvm.org/D103368>`__ `D103413 <https://reviews.llvm.org/D103413>`__",|Complete|,Clang 14
 `[format.string.std] <https://wg21.link/format.string.std>`_,"Standard format specifiers - integral types",,Mark de Wever,`D103368 <https://reviews.llvm.org/D103368>`__,|Complete|,Clang 14
-`[format.string.std] <https://wg21.link/format.string.std>`_,"Standard format specifiers - floating-point types",,Mark de Wever,`D114001 <https://reviews.llvm.org/D114001>`__,|Review|,
-`[format.string.std] <https://wg21.link/format.string.std>`_,"Standard format specifiers - pointer types",,Mark de Wever,,|In Progress|,
+`[format.string.std] <https://wg21.link/format.string.std>`_,"Standard format specifiers - floating-point types",,Mark de Wever,`D114001 <https://reviews.llvm.org/D114001>`__,|Complete|,Clang 14
+`[format.string.std] <https://wg21.link/format.string.std>`_,"Standard format specifiers - pointer types",,Mark de Wever,,|Complete|,Clang 14
 `[format.functions] <https://wg21.link/format.functions>`_,"Format functions - format(string_view fmt, const Args&... args);",,Mark de Wever,`D96664 <https://llvm.org/D96664>`__,|Complete|,Clang 14
 `[format.functions] <https://wg21.link/format.functions>`_,"Format functions - format(wstring_view fmt, const Args&... args);",,Mark de Wever,`D96664 <https://llvm.org/D96664>`__,|Complete|,Clang 14
 `[format.functions] <https://wg21.link/format.functions>`_,"Format functions - format(const locale& loc, string_view fmt, const Args&... args);",,Mark de Wever,`D96664 <https://llvm.org/D96664>`__,|Complete|,Clang 14

From 4eb909c884721a0be5078c87d35d550ad31c9fb7 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Tue, 25 Jan 2022 09:53:40 -0500
Subject: [PATCH 588/946] Cleanup header dependencies of
 llvm/Support/Compiler.h

<new> and <cstdef> were introduced in aa60b3fd875c3 but the dependency
is now dead.

As a consequence you may need to include <new> where you use it while it
was auto-included as an implicit dependency before.

The impact on the codebase is small, as <new> is a very small header
(<100 SLOC) but it gets included everywhere, so that somehow counts (?)
---
 llvm/include/llvm/Support/Compiler.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h
index b31ba6bc7fc20..f4c277fae7cc2 100644
--- a/llvm/include/llvm/Support/Compiler.h
+++ b/llvm/include/llvm/Support/Compiler.h
@@ -17,9 +17,6 @@
 
 #include "llvm/Config/llvm-config.h"
 
-#ifdef __cplusplus
-#include <new>
-#endif
 #include <stddef.h>
 
 #if defined(_MSC_VER)

From 4cdc4416903bd4a818d70042d479442725eeebcc Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 25 Jan 2022 10:29:04 -0800
Subject: [PATCH 589/946] [ELF] Parallelize --compress-debug-sections=zlib

When linking a Debug build clang (265MiB SHF_ALLOC sections, 920MiB uncompressed
debug info), in a --threads=1 link "Compress debug sections" takes 2/3 time and
in a --threads=8 link "Compress debug sections" takes ~70% time.

This patch splits a section into 1MiB shards and calls zlib `deflake` parallelly.

DEFLATE blocks are a bit sequence. We need to ensure every shard starts
at a byte boundary for concatenation. We use Z_SYNC_FLUSH for all shards
but the last to flush the output to a byte boundary. (Z_FULL_FLUSH can
be used as well, but Z_FULL_FLUSH clears the hash table which just
wastes time.)

The last block requires the BFINAL flag. We call deflate with Z_FINISH
to set the flag as well as flush the output to a byte boundary. Under
the hood, all of Z_SYNC_FLUSH, Z_FULL_FLUSH, and Z_FINISH emit a
non-compressed block (called stored block in zlib). RFC1951 says "Any
bits of input up to the next byte boundary are ignored."

In a --threads=8 link, "Compress debug sections" is 5.7x as fast and the total
speed is 2.54x. Because the hash table for one shard is not shared with the next
shard, the output is slightly larger. Better compression ratio can be achieved
by preloading the window size from the previous shard as dictionary
(`deflateSetDictionary`), but that is overkill.

```
# 1MiB shards
% bloaty clang.new -- clang.old
    FILE SIZE        VM SIZE
 --------------  --------------
  +0.3%  +129Ki  [ = ]       0    .debug_str
  +0.1%  +105Ki  [ = ]       0    .debug_info
  +0.3%  +101Ki  [ = ]       0    .debug_line
  +0.2% +2.66Ki  [ = ]       0    .debug_abbrev
  +0.0% +1.19Ki  [ = ]       0    .debug_ranges
  +0.1%  +341Ki  [ = ]       0    TOTAL

# 2MiB shards
% bloaty clang.new -- clang.old
    FILE SIZE        VM SIZE
 --------------  --------------
  +0.2% +74.2Ki  [ = ]       0    .debug_line
  +0.1% +72.3Ki  [ = ]       0    .debug_str
  +0.0% +69.9Ki  [ = ]       0    .debug_info
  +0.1%    +976  [ = ]       0    .debug_abbrev
  +0.0%    +882  [ = ]       0    .debug_ranges
  +0.0%  +218Ki  [ = ]       0    TOTAL
```

Bonus in not using zlib::compress

* we can compress a debug section larger than 4GiB
* peak memory usage is lower because for most shards the output size is less
  than 50% input size (all less than 55% for a large binary I tested, but
  decreasing the initial output size does not decrease memory usage)

Reviewed By: ikudrin

Differential Revision: https://reviews.llvm.org/D117853
---
 lld/ELF/CMakeLists.txt     |  5 ++
 lld/ELF/OutputSections.cpp | 99 ++++++++++++++++++++++++++++++++++----
 lld/ELF/OutputSections.h   |  8 ++-
 3 files changed, 101 insertions(+), 11 deletions(-)

diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt
index f85d0fb9f55e3..b37035d3e7429 100644
--- a/lld/ELF/CMakeLists.txt
+++ b/lld/ELF/CMakeLists.txt
@@ -2,6 +2,10 @@ set(LLVM_TARGET_DEFINITIONS Options.td)
 tablegen(LLVM Options.inc -gen-opt-parser-defs)
 add_public_tablegen_target(ELFOptionsTableGen)
 
+if(LLVM_ENABLE_ZLIB)
+  set(imported_libs ZLIB::ZLIB)
+endif()
+
 add_lld_library(lldELF
   AArch64ErrataFix.cpp
   Arch/AArch64.cpp
@@ -58,6 +62,7 @@ add_lld_library(lldELF
 
   LINK_LIBS
   lldCommon
+  ${imported_libs}
   ${LLVM_PTHREAD_LIB}
 
   DEPENDS
diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index 07ee7d84a2cd3..cffde5d61ac91 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -15,7 +15,7 @@
 #include "lld/Common/Memory.h"
 #include "lld/Common/Strings.h"
 #include "llvm/BinaryFormat/Dwarf.h"
-#include "llvm/Support/Compression.h"
+#include "llvm/Config/config.h" // LLVM_ENABLE_ZLIB
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/Parallel.h"
@@ -23,6 +23,9 @@
 #include "llvm/Support/TimeProfiler.h"
 #include <regex>
 #include <unordered_set>
+#if LLVM_ENABLE_ZLIB
+#include <zlib.h>
+#endif
 
 using namespace llvm;
 using namespace llvm::dwarf;
@@ -284,13 +287,45 @@ static void fill(uint8_t *buf, size_t size,
   memcpy(buf + i, filler.data(), size - i);
 }
 
+#if LLVM_ENABLE_ZLIB
+static SmallVector<uint8_t, 0> deflateShard(ArrayRef<uint8_t> in, int level,
+                                            int flush) {
+  // 15 and 8 are default. windowBits=-15 is negative to generate raw deflate
+  // data with no zlib header or trailer.
+  z_stream s = {};
+  deflateInit2(&s, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY);
+  s.next_in = const_cast<uint8_t *>(in.data());
+  s.avail_in = in.size();
+
+  // Allocate a buffer of half of the input size, and grow it by 1.5x if
+  // insufficient.
+  SmallVector<uint8_t, 0> out;
+  size_t pos = 0;
+  out.resize_for_overwrite(std::max<size_t>(in.size() / 2, 64));
+  do {
+    if (pos == out.size())
+      out.resize_for_overwrite(out.size() * 3 / 2);
+    s.next_out = out.data() + pos;
+    s.avail_out = out.size() - pos;
+    (void)deflate(&s, flush);
+    pos = s.next_out - out.data();
+  } while (s.avail_out == 0);
+  assert(s.avail_in == 0);
+
+  out.truncate(pos);
+  deflateEnd(&s);
+  return out;
+}
+#endif
+
 // Compress section contents if this section contains debug info.
 template <class ELFT> void OutputSection::maybeCompress() {
+#if LLVM_ENABLE_ZLIB
   using Elf_Chdr = typename ELFT::Chdr;
 
   // Compress only DWARF debug sections.
   if (!config->compressDebugSections || (flags & SHF_ALLOC) ||
-      !name.startswith(".debug_"))
+      !name.startswith(".debug_") || size == 0)
     return;
 
   llvm::TimeTraceScope timeScope("Compress debug sections");
@@ -309,13 +344,42 @@ template <class ELFT> void OutputSection::maybeCompress() {
   // -O2 is given, we use level 6 to compress debug info more by ~15%. We found
   // that level 7 to 9 doesn't make much difference (~1% more compression) while
   // they take significant amount of time (~2x), so level 6 seems enough.
-  if (Error e = zlib::compress(toStringRef(buf), compressedData,
-                               config->optimize >= 2 ? 6 : 1))
-    fatal("compress failed: " + llvm::toString(std::move(e)));
+  const int level = config->optimize >= 2 ? 6 : Z_BEST_SPEED;
+
+  // Split input into 1-MiB shards.
+  constexpr size_t shardSize = 1 << 20;
+  const size_t numShards = (size + shardSize - 1) / shardSize;
+  auto shardsIn = std::make_unique<ArrayRef<uint8_t>[]>(numShards);
+  for (size_t i = 0, start = 0, end; start != buf.size(); ++i, start = end) {
+    end = std::min(start + shardSize, buf.size());
+    shardsIn[i] = makeArrayRef<uint8_t>(buf.data() + start, end - start);
+  }
+
+  // Compress shards and compute Alder-32 checksums. Use Z_SYNC_FLUSH for all
+  // shards but the last to flush the output to a byte boundary to be
+  // concatenated with the next shard.
+  auto shardsOut = std::make_unique<SmallVector<uint8_t, 0>[]>(numShards);
+  auto shardsAdler = std::make_unique<uint32_t[]>(numShards);
+  parallelForEachN(0, numShards, [&](size_t i) {
+    shardsOut[i] = deflateShard(shardsIn[i], level,
+                                i != numShards - 1 ? Z_SYNC_FLUSH : Z_FINISH);
+    shardsAdler[i] = adler32(1, shardsIn[i].data(), shardsIn[i].size());
+  });
+
+  // Update section size and combine Alder-32 checksums.
+  uint32_t checksum = 1;       // Initial Adler-32 value
+  size = sizeof(Elf_Chdr) + 2; // Elf_Chdir and zlib header
+  for (size_t i = 0; i != numShards; ++i) {
+    size += shardsOut[i].size();
+    checksum = adler32_combine(checksum, shardsAdler[i], shardsIn[i].size());
+  }
+  size += 4; // checksum
 
-  // Update section headers.
-  size = sizeof(Elf_Chdr) + compressedData.size();
+  compressed.shards = std::move(shardsOut);
+  compressed.numShards = numShards;
+  compressed.checksum = checksum;
   flags |= SHF_COMPRESSED;
+#endif
 }
 
 static void writeInt(uint8_t *buf, uint64_t data, uint64_t size) {
@@ -339,10 +403,25 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *buf) {
   // If --compress-debug-section is specified and if this is a debug section,
   // we've already compressed section contents. If that's the case,
   // just write it down.
-  if (!compressedData.empty()) {
+  if (compressed.shards) {
     memcpy(buf, zDebugHeader.data(), zDebugHeader.size());
-    memcpy(buf + zDebugHeader.size(), compressedData.data(),
-           compressedData.size());
+    buf += zDebugHeader.size();
+    size -= zDebugHeader.size();
+
+    // Compute shard offsets.
+    auto offsets = std::make_unique<size_t[]>(compressed.numShards);
+    offsets[0] = 2; // zlib header
+    for (size_t i = 1; i != compressed.numShards; ++i)
+      offsets[i] = offsets[i - 1] + compressed.shards[i - 1].size();
+
+    buf[0] = 0x78; // CMF
+    buf[1] = 0x01; // FLG: best speed
+    parallelForEachN(0, compressed.numShards, [&](size_t i) {
+      memcpy(buf + offsets[i], compressed.shards[i].data(),
+             compressed.shards[i].size());
+    });
+
+    write32be(buf + size - 4, compressed.checksum);
     return;
   }
 
diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h
index 4f589d8432e43..957e6768ff6ea 100644
--- a/lld/ELF/OutputSections.h
+++ b/lld/ELF/OutputSections.h
@@ -25,6 +25,12 @@ struct PhdrEntry;
 class InputSection;
 class InputSectionBase;
 
+struct CompressedData {
+  std::unique_ptr<SmallVector<uint8_t, 0>[]> shards;
+  uint32_t numShards = 0;
+  uint32_t checksum = 0;
+};
+
 // This represents a section in an output file.
 // It is composed of multiple InputSections.
 // The writer creates multiple OutputSections and assign them unique,
@@ -113,7 +119,7 @@ class OutputSection final : public SectionCommand, public SectionBase {
 private:
   // Used for implementation of --compress-debug-sections option.
   SmallVector<uint8_t, 0> zDebugHeader;
-  SmallVector<char, 0> compressedData;
+  CompressedData compressed;
 
   std::array<uint8_t, 4> getFiller();
 };

From 93230ac1d2cf32419ce88fdd850f92a02bec5553 Mon Sep 17 00:00:00 2001
From: Casey Carter <Casey@Carter.net>
Date: Sat, 1 Jan 2022 21:53:37 -0800
Subject: [PATCH 590/946] [libcxx][test] Use bool allocators for
 vector<bool>::get_allocator test

... to be consistent with other `get_allocator` tests, and to avoid requiring `vector<T, allocator<U>>` to be valid.
---
 .../containers/sequences/vector.bool/get_allocator.pass.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/test/std/containers/sequences/vector.bool/get_allocator.pass.cpp b/libcxx/test/std/containers/sequences/vector.bool/get_allocator.pass.cpp
index 566d1f81bafc7..f518b4601eef3 100644
--- a/libcxx/test/std/containers/sequences/vector.bool/get_allocator.pass.cpp
+++ b/libcxx/test/std/containers/sequences/vector.bool/get_allocator.pass.cpp
@@ -20,13 +20,13 @@
 
 int main(int, char**) {
     {
-        std::allocator<int> alloc;
+        std::allocator<bool> alloc;
         const std::vector<bool> vb(alloc);
         assert(vb.get_allocator() == alloc);
     }
     {
-        other_allocator<int> alloc(1);
-        const std::vector<bool, other_allocator<int> > vb(alloc);
+        other_allocator<bool> alloc(1);
+        const std::vector<bool, other_allocator<bool> > vb(alloc);
         assert(vb.get_allocator() == alloc);
     }
 

From e5a315f57acf5580aa8819123300d90b4f7a160a Mon Sep 17 00:00:00 2001
From: MaheshRavishankar <ravishankarm@google.com>
Date: Tue, 25 Jan 2022 10:36:34 -0800
Subject: [PATCH 591/946] [mlir][Linalg] Disallow ops with index semantics in
 `PushExpandingReshape`.

This pattern is not written to handle operations with `linalg.index`
operations in its body, i.e. operations that have index semantics.

Differential Revision: https://reviews.llvm.org/D117856
---
 .../Linalg/Transforms/ElementwiseOpFusion.cpp |  2 +-
 .../Dialect/Linalg/fusion-push-reshape.mlir   | 27 +++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
index be34ef8bbd625..aaa5d4c386208 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
@@ -994,7 +994,7 @@ struct PushExpandingReshape : public OpRewritePattern<GenericOp> {
   LogicalResult matchAndRewrite(GenericOp genericOp,
                                 PatternRewriter &rewriter) const override {
     // Only apply to elementwise linalg on tensor.
-    if (!genericOp.hasTensorSemantics() ||
+    if (!genericOp.hasTensorSemantics() || genericOp.hasIndexSemantics() ||
         genericOp.getNumParallelLoops() != genericOp.getNumLoops())
       return failure();
     // Only support identity output maps. It could be extended to permuations if
diff --git a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
index 9e96c98e7850b..0c02ff8c54d1f 100644
--- a/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
+++ b/mlir/test/Dialect/Linalg/fusion-push-reshape.mlir
@@ -124,3 +124,30 @@ func @type_correctness(%arg0 : tensor<6x5xi32>, %arg1 : tensor<5xf32>,
 //  CHECK-SAME:   outs(%{{.+}} : tensor<6x5xf32>)
 //       CHECK:   tensor.expand_shape %[[OP]]
 //  CHECK-SAME:   tensor<6x5xf32> into tensor<2x3x5xf32>
+
+// -----
+
+func @generic_op_index_semantics(%A: tensor<?x16xi64>, %B: tensor<16xi64>, %init: tensor<?x112x16xi64>) -> tensor<?x112x16xi64> {
+  %0 = tensor.expand_shape %A [[0, 1], [2]]
+      : tensor<?x16xi64> into tensor<?x112x16xi64>
+  %2 = linalg.generic {indexing_maps = [
+    affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>,
+    affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
+    iterator_types = ["parallel", "parallel", "parallel"]}
+  ins(%0, %B : tensor<?x112x16xi64>, tensor<16xi64>)
+  outs(%init : tensor<?x112x16xi64>) {
+  ^bb0(%arg1: i64, %arg2: i64, %arg3: i64):  // no predecessors
+    %index = linalg.index 0 : index
+    %1 = arith.index_cast %index : index to i64
+    %add = arith.addi %arg1, %1 : i64
+    %s = arith.subi %add, %arg2 : i64
+    linalg.yield %s : i64
+  } -> tensor<?x112x16xi64>
+  return %2 : tensor<?x112x16xi64>
+}
+//      CHECK: func @generic_op_index_semantics
+// CHECK-SAME:     %[[ARG0:.+]]: tensor<?x16xi64>
+//      CHECK:   %[[RESHAPE:.+]] = tensor.expand_shape %[[ARG0]]
+//      CHECK:   %[[RESULT:.+]] = linalg.generic
+// CHECK-SAME:       ins(%[[RESHAPE]]
+//      CHECK:   return %[[RESULT]]

From ce5b04cc048aa9b82355bbea21a062462553eb47 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Tue, 25 Jan 2022 19:36:52 +0100
Subject: [PATCH 592/946] [Support] #include <new> for std::align_val_t

This is only used when aligned new is enabled.
---
 llvm/lib/Support/MemAlloc.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Support/MemAlloc.cpp b/llvm/lib/Support/MemAlloc.cpp
index 7aaa0dc6e205a..07a26cf26480b 100644
--- a/llvm/lib/Support/MemAlloc.cpp
+++ b/llvm/lib/Support/MemAlloc.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/MemAlloc.h"
+#include <new>
 
 // These are out of line to have __cpp_aligned_new not affect ABI.
 

From a09be08594a8b980f3cd0ad7e8aea1f9545517ae Mon Sep 17 00:00:00 2001
From: David Tenty <daltenty@ibm.com>
Date: Mon, 24 Jan 2022 23:20:18 -0500
Subject: [PATCH 593/946] [compiler-rt][profile][AIX] pass extra link opts for
 test

The AIX linker doesn't export any symbols by default, so an export list is usually used. Since clang doesn't have the tools to auto-generate an export list yet, just pass the linker an extra opt to tell it to export everything. This is  generally not recommended for real shared libs, but is fine for the purpose of this test.

Differential Revision: https://reviews.llvm.org/D118101
---
 .../test/profile/Posix/instrprof-get-filename-merge-mode.c | 2 +-
 compiler-rt/test/profile/Posix/lit.local.cfg.py            | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c b/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c
index 7e26e3e6b5dd3..afd113c5fa7c5 100644
--- a/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c
+++ b/compiler-rt/test/profile/Posix/instrprof-get-filename-merge-mode.c
@@ -1,6 +1,6 @@
 // Test __llvm_profile_get_filename when the on-line merging mode is enabled.
 //
-// RUN: %clang_pgogen -fPIC -shared -o %t.dso %p/../Inputs/instrprof-get-filename-dso.c
+// RUN: %clang_pgogen -fPIC -shared %shared_linker_xopts -o %t.dso %p/../Inputs/instrprof-get-filename-dso.c
 // RUN: %clang_pgogen -o %t %s %t.dso
 // RUN: env LLVM_PROFILE_FILE="%t-%m.profraw" %run %t
 
diff --git a/compiler-rt/test/profile/Posix/lit.local.cfg.py b/compiler-rt/test/profile/Posix/lit.local.cfg.py
index 60a9460820a62..eb79b385cc486 100644
--- a/compiler-rt/test/profile/Posix/lit.local.cfg.py
+++ b/compiler-rt/test/profile/Posix/lit.local.cfg.py
@@ -7,3 +7,10 @@ def getRoot(config):
 
 if root.host_os in ['Windows']:
   config.unsupported = True
+
+# AIX usually usually makes use of an explicit export list when linking a shared
+# object, but for the purposes of these tests just export all symbols.
+if root.host_os in ['AIX']:
+  config.substitutions.append(('%shared_linker_xopts', '-Wl,-bexpfull'))
+else:
+  config.substitutions.append(('%shared_linker_xopts', ''))

From ff8a4766ac256e3361ef66c0765ef5b041f4dd5f Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Tue, 25 Jan 2022 19:59:24 +0100
Subject: [PATCH 594/946] [libc++][nfc] Update formatting of some tests.

These tests were formatted with older clang-format settings, this
updates them to the current settings.

In order to implement P2216 a lot of changes to these tests are
required. This makes it easier to review those patches.
---
 .../format.functions/format.locale.pass.cpp   |  22 +-
 .../format/format.functions/format.pass.cpp   |  28 +-
 .../format/format.functions/format_tests.h    | 506 ++++++------------
 .../format_to.locale.pass.cpp                 |  13 +-
 .../format.functions/format_to.pass.cpp       |  13 +-
 .../format_to_n.locale.pass.cpp               |  25 +-
 .../format.functions/format_to_n.pass.cpp     |  22 +-
 .../formatted_size.locale.pass.cpp            |   7 +-
 .../format.functions/formatted_size.pass.cpp  |   7 +-
 .../locale-specific_form.pass.cpp             |  39 +-
 .../format.functions/vformat.locale.pass.cpp  |  13 +-
 .../format/format.functions/vformat.pass.cpp  |  10 +-
 .../vformat_to.locale.pass.cpp                |  29 +-
 .../format.functions/vformat_to.pass.cpp      |  28 +-
 14 files changed, 273 insertions(+), 489 deletions(-)

diff --git a/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp
index 4da2f2ec6ee7f..197fddaa265f6 100644
--- a/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/format.locale.pass.cpp
@@ -27,33 +27,31 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
-                                           std::basic_string<CharT> fmt,
+auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
                                            const Args&... args) {
   std::basic_string<CharT> out = std::format(std::locale(), fmt, args...);
   if constexpr (std::same_as<CharT, char>)
     if (out != expected)
-      std::cerr << "\nFormat string   " << fmt << "\nExpected output "
-                << expected << "\nActual output   " << out << '\n';
+      std::cerr << "\nFormat string   " << fmt << "\nExpected output " << expected << "\nActual output   " << out
+                << '\n';
   assert(out == expected);
 };
 
-auto test_exception = []<class CharT, class... Args>(
-    std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
+auto test_exception =
+    []<class CharT, class... Args>(std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::format(std::locale(), fmt, args...);
     if constexpr (std::same_as<CharT, char>)
-      std::cerr << "\nFormat string   " << fmt
-                << "\nDidn't throw an exception.\n";
+      std::cerr << "\nFormat string   " << fmt << "\nDidn't throw an exception.\n";
     assert(false);
   } catch (std::format_error& e) {
-#ifdef _LIBCPP_VERSION
+#  ifdef _LIBCPP_VERSION
     if constexpr (std::same_as<CharT, char>)
       if (e.what() != what)
-        std::cerr << "\nFormat string   " << fmt << "\nExpected exception "
-                  << what << "\nActual exception   " << e.what() << '\n';
-#endif
+        std::cerr << "\nFormat string   " << fmt << "\nExpected exception " << what << "\nActual exception   "
+                  << e.what() << '\n';
+#  endif
     LIBCPP_ASSERT(e.what() == what);
     return;
   }
diff --git a/libcxx/test/std/utilities/format/format.functions/format.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format.pass.cpp
index 7963b1500f1c5..cf566670501bf 100644
--- a/libcxx/test/std/utilities/format/format.functions/format.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/format.pass.cpp
@@ -25,44 +25,42 @@
 #include <format>
 #include <cassert>
 #ifndef _LIBCPP_HAS_NO_LOCALIZATION
-#include <iostream>
+#  include <iostream>
 #endif
 #include <vector>
 
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
-                                           std::basic_string<CharT> fmt,
+auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
                                            const Args&... args) {
   std::basic_string<CharT> out = std::format(fmt, args...);
 #ifndef _LIBCPP_HAS_NO_LOCALIZATION
   if constexpr (std::same_as<CharT, char>)
     if (out != expected)
-      std::cerr << "\nFormat string   " << fmt << "\nExpected output "
-                << expected << "\nActual output   " << out << '\n';
+      std::cerr << "\nFormat string   " << fmt << "\nExpected output " << expected << "\nActual output   " << out
+                << '\n';
 #endif
   assert(out == expected);
 };
 
-auto test_exception = []<class CharT, class... Args>(
-    std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
+auto test_exception =
+    []<class CharT, class... Args>(std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::format(fmt, args...);
-#ifndef _LIBCPP_HAS_NO_LOCALIZATION
+#  ifndef _LIBCPP_HAS_NO_LOCALIZATION
     if constexpr (std::same_as<CharT, char>)
-      std::cerr << "\nFormat string   " << fmt
-                << "\nDidn't throw an exception.\n";
-#endif
+      std::cerr << "\nFormat string   " << fmt << "\nDidn't throw an exception.\n";
+#  endif
     assert(false);
   } catch (std::format_error& e) {
-#if defined(_LIBCPP_VERSION) && !defined(_LIBCPP_HAS_NO_LOCALIZATION)
+#  if defined(_LIBCPP_VERSION) && !defined(_LIBCPP_HAS_NO_LOCALIZATION)
     if constexpr (std::same_as<CharT, char>)
       if (e.what() != what)
-        std::cerr << "\nFormat string   " << fmt << "\nExpected exception "
-                  << what << "\nActual exception   " << e.what() << '\n';
-#endif
+        std::cerr << "\nFormat string   " << fmt << "\nExpected exception " << what << "\nActual exception   "
+                  << e.what() << '\n';
+#  endif
     LIBCPP_ASSERT(e.what() == what);
     return;
   }
diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h
index 8deed4da43635..3b2d0c71f11cb 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_tests.h
+++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h
@@ -134,10 +134,10 @@ template <class CharT>
 std::vector<std::basic_string<CharT>> invalid_types(std::string valid) {
   std::vector<std::basic_string<CharT>> result;
 
-#define CASE(T)                                                                \
-  case #T[0]:                                                                  \
-    result.push_back(STR("Invalid formatter type {:" #T "}"));                 \
-    break;
+#define CASE(T)                                                                                                        \
+case #T[0]:                                                                                                            \
+  result.push_back(STR("Invalid formatter type {:" #T "}"));                                                           \
+  break;
 
   for (auto type : "aAbBcdeEfFgGopsxX") {
     if (valid.find(type) != std::string::npos)
@@ -173,18 +173,15 @@ std::vector<std::basic_string<CharT>> invalid_types(std::string valid) {
 }
 
 template <class CharT, class T, class TestFunction, class ExceptionTest>
-void format_test_string(T world, T universe, TestFunction check,
-                        ExceptionTest check_exception) {
+void format_test_string(T world, T universe, TestFunction check, ExceptionTest check_exception) {
 
   // *** Valid input tests ***
   // Unsed argument is ignored. TODO FMT what does the Standard mandate?
   check(STR("hello world"), STR("hello {}"), world, universe);
-  check(STR("hello world and universe"), STR("hello {} and {}"), world,
-        universe);
+  check(STR("hello world and universe"), STR("hello {} and {}"), world, universe);
   check(STR("hello world"), STR("hello {0}"), world, universe);
   check(STR("hello universe"), STR("hello {1}"), world, universe);
-  check(STR("hello universe and world"), STR("hello {1} and {0}"), world,
-        universe);
+  check(STR("hello universe and world"), STR("hello {1} and {0}"), world, universe);
 
   check(STR("hello world"), STR("hello {:_>}"), world);
   check(STR("hello    world"), STR("hello {:>8}"), world);
@@ -225,97 +222,69 @@ void format_test_string(T world, T universe, TestFunction check,
   check(STR("hello uni#####"), STR("hello {:#<8.3s}"), universe);
 
   // *** sign ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("hello {:-}"), world);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("hello {:-}"), world);
 
   // *** alternate form ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("hello {:#}"), world);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("hello {:#}"), world);
 
   // *** zero-padding ***
-  check_exception("A format-spec width field shouldn't have a leading zero",
-                  STR("hello {:0}"), world);
+  check_exception("A format-spec width field shouldn't have a leading zero", STR("hello {:0}"), world);
 
   // *** width ***
 #ifdef _LIBCPP_VERSION
   // This limit isn't specified in the Standard.
-  static_assert(std::__format::__number_max == 2'147'483'647,
-                "Update the assert and the test.");
-  check_exception("The numeric value of the format-spec is too large",
-                  STR("{:2147483648}"), world);
-  check_exception("The numeric value of the format-spec is too large",
-                  STR("{:5000000000}"), world);
-  check_exception("The numeric value of the format-spec is too large",
-                  STR("{:10000000000}"), world);
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  check_exception("The numeric value of the format-spec is too large", STR("{:2147483648}"), world);
+  check_exception("The numeric value of the format-spec is too large", STR("{:5000000000}"), world);
+  check_exception("The numeric value of the format-spec is too large", STR("{:10000000000}"), world);
 #endif
 
-  check_exception(
-      "A format-spec width field replacement should have a positive value",
-      STR("hello {:{}}"), world, 0);
-  check_exception(
-      "A format-spec arg-id replacement shouldn't have a negative value",
-      STR("hello {:{}}"), world, -1);
-  check_exception(
-      "A format-spec arg-id replacement exceeds the maximum supported value",
-      STR("hello {:{}}"), world, unsigned(-1));
+  check_exception("A format-spec width field replacement should have a positive value", STR("hello {:{}}"), world, 0);
+  check_exception("A format-spec arg-id replacement shouldn't have a negative value", STR("hello {:{}}"), world, -1);
+  check_exception("A format-spec arg-id replacement exceeds the maximum supported value", STR("hello {:{}}"), world,
+                  unsigned(-1));
   check_exception("Argument index out of bounds", STR("hello {:{}}"), world);
-  check_exception(
-      "A format-spec arg-id replacement argument isn't an integral type",
-      STR("hello {:{}}"), world, universe);
-  check_exception(
-      "Using manual argument numbering in automatic argument numbering mode",
-      STR("hello {:{0}}"), world, 1);
-  check_exception(
-      "Using automatic argument numbering in manual argument numbering mode",
-      STR("hello {0:{}}"), world, 1);
+  check_exception("A format-spec arg-id replacement argument isn't an integral type", STR("hello {:{}}"), world,
+                  universe);
+  check_exception("Using manual argument numbering in automatic argument numbering mode", STR("hello {:{0}}"), world,
+                  1);
+  check_exception("Using automatic argument numbering in manual argument numbering mode", STR("hello {0:{}}"), world,
+                  1);
   // Arg-id may not have leading zeros.
   check_exception("Invalid arg-id", STR("hello {0:{01}}"), world, 1);
 
   // *** precision ***
 #ifdef _LIBCPP_VERSION
   // This limit isn't specified in the Standard.
-  static_assert(std::__format::__number_max == 2'147'483'647,
-                "Update the assert and the test.");
-  check_exception("The numeric value of the format-spec is too large",
-                  STR("{:.2147483648}"), world);
-  check_exception("The numeric value of the format-spec is too large",
-                  STR("{:.5000000000}"), world);
-  check_exception("The numeric value of the format-spec is too large",
-                  STR("{:.10000000000}"), world);
+  static_assert(std::__format::__number_max == 2'147'483'647, "Update the assert and the test.");
+  check_exception("The numeric value of the format-spec is too large", STR("{:.2147483648}"), world);
+  check_exception("The numeric value of the format-spec is too large", STR("{:.5000000000}"), world);
+  check_exception("The numeric value of the format-spec is too large", STR("{:.10000000000}"), world);
 #endif
 
   // Precision 0 allowed, but not useful for string arguments.
   check(STR("hello "), STR("hello {:.{}}"), world, 0);
   // Precision may have leading zeros. Secondly tests the value is still base 10.
   check(STR("hello 0123456789"), STR("hello {:.000010}"), STR("0123456789abcdef"));
-  check_exception(
-      "A format-spec arg-id replacement shouldn't have a negative value",
-      STR("hello {:.{}}"), world, -1);
-  check_exception(
-      "A format-spec arg-id replacement exceeds the maximum supported value",
-      STR("hello {:.{}}"), world, ~0u);
+  check_exception("A format-spec arg-id replacement shouldn't have a negative value", STR("hello {:.{}}"), world, -1);
+  check_exception("A format-spec arg-id replacement exceeds the maximum supported value", STR("hello {:.{}}"), world,
+                  ~0u);
   check_exception("Argument index out of bounds", STR("hello {:.{}}"), world);
-  check_exception(
-      "A format-spec arg-id replacement argument isn't an integral type",
-      STR("hello {:.{}}"), world, universe);
-  check_exception(
-      "Using manual argument numbering in automatic argument numbering mode",
-      STR("hello {:.{0}}"), world, 1);
-  check_exception(
-      "Using automatic argument numbering in manual argument numbering mode",
-      STR("hello {0:.{}}"), world, 1);
+  check_exception("A format-spec arg-id replacement argument isn't an integral type", STR("hello {:.{}}"), world,
+                  universe);
+  check_exception("Using manual argument numbering in automatic argument numbering mode", STR("hello {:.{0}}"), world,
+                  1);
+  check_exception("Using automatic argument numbering in manual argument numbering mode", STR("hello {0:.{}}"), world,
+                  1);
   // Arg-id may not have leading zeros.
   check_exception("Invalid arg-id", STR("hello {0:.{01}}"), world, 1);
 
   // *** locale-specific form ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("hello {:L}"), world);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("hello {:L}"), world);
 
   // *** type ***
   for (const auto& fmt : invalid_types<CharT>("s"))
-    check_exception(
-        "The format-spec type has a type not supported for a string argument",
-        fmt, world);
+    check_exception("The format-spec type has a type not supported for a string argument", fmt, world);
 }
 
 template <class CharT, class TestFunction>
@@ -364,13 +333,10 @@ void format_string_tests(TestFunction check, ExceptionTest check_exception) {
   // Testing the char const[] is a bit tricky due to array to pointer decay.
   // Since there are separate tests in format.formatter.spec the array is not
   // tested here.
-  format_test_string<CharT>(world.c_str(), universe.c_str(), check,
+  format_test_string<CharT>(world.c_str(), universe.c_str(), check, check_exception);
+  format_test_string<CharT>(const_cast<CharT*>(world.c_str()), const_cast<CharT*>(universe.c_str()), check,
                             check_exception);
-  format_test_string<CharT>(const_cast<CharT*>(world.c_str()),
-                            const_cast<CharT*>(universe.c_str()), check,
-                            check_exception);
-  format_test_string<CharT>(std::basic_string_view<CharT>(world),
-                            std::basic_string_view<CharT>(universe), check,
+  format_test_string<CharT>(std::basic_string_view<CharT>(world), std::basic_string_view<CharT>(universe), check,
                             check_exception);
   format_test_string<CharT>(world, universe, check, check_exception);
   format_test_string_unicode<CharT>(check);
@@ -399,60 +365,41 @@ void format_test_bool(TestFunction check, ExceptionTest check_exception) {
   check(STR("answer is '-false--'"), STR("answer is '{:-^8s}'"), false);
 
   // *** Sign ***
-  check_exception("A sign field isn't allowed in this format-spec", STR("{:-}"),
-                  true);
-  check_exception("A sign field isn't allowed in this format-spec", STR("{:+}"),
-                  true);
-  check_exception("A sign field isn't allowed in this format-spec", STR("{: }"),
-                  true);
-
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{:-s}"), true);
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{:+s}"), true);
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{: s}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:-}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:+}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{: }"), true);
+
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:-s}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:+s}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{: s}"), true);
 
   // *** alternate form ***
-  check_exception("An alternate form field isn't allowed in this format-spec",
-                  STR("{:#}"), true);
-  check_exception("An alternate form field isn't allowed in this format-spec",
-                  STR("{:#s}"), true);
+  check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#}"), true);
+  check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#s}"), true);
 
   // *** zero-padding ***
-  check_exception("A zero-padding field isn't allowed in this format-spec",
-                  STR("{:0}"), true);
-  check_exception("A zero-padding field isn't allowed in this format-spec",
-                  STR("{:0s}"), true);
+  check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0}"), true);
+  check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0s}"), true);
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42}"), true);
-
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.s}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0s}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42s}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42}"), true);
+
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.s}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0s}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42s}"), true);
 
   // *** locale-specific form ***
   // See locale-specific_form.pass.cpp
 
   // *** type ***
   for (const auto& fmt : invalid_types<CharT>("bBcdosxX"))
-    check_exception(
-        "The format-spec type has a type not supported for a bool argument",
-        fmt, true);
+    check_exception("The format-spec type has a type not supported for a bool argument", fmt, true);
 }
 
 template <class CharT, class TestFunction, class ExceptionTest>
-void format_test_bool_as_char(TestFunction check,
-                              ExceptionTest check_exception) {
+void format_test_bool_as_char(TestFunction check, ExceptionTest check_exception) {
   // *** align-fill & width ***
   check(STR("answer is '\1     '"), STR("answer is '{:6c}'"), true);
   check(STR("answer is '     \1'"), STR("answer is '{:>6c}'"), true);
@@ -463,47 +410,31 @@ void format_test_bool_as_char(TestFunction check,
   check(STR("answer is '\1-----'"), STR("answer is '{:-<6c}'"), true);
   check(STR("answer is '--\1---'"), STR("answer is '{:-^6c}'"), true);
 
-  check(std::basic_string<CharT>(CSTR("answer is '\0     '"), 18),
-        STR("answer is '{:6c}'"), false);
-  check(std::basic_string<CharT>(CSTR("answer is '\0     '"), 18),
-        STR("answer is '{:6c}'"), false);
-  check(std::basic_string<CharT>(CSTR("answer is '     \0'"), 18),
-        STR("answer is '{:>6c}'"), false);
-  check(std::basic_string<CharT>(CSTR("answer is '\0     '"), 18),
-        STR("answer is '{:<6c}'"), false);
-  check(std::basic_string<CharT>(CSTR("answer is '  \0   '"), 18),
-        STR("answer is '{:^6c}'"), false);
-
-  check(std::basic_string<CharT>(CSTR("answer is '-----\0'"), 18),
-        STR("answer is '{:->6c}'"), false);
-  check(std::basic_string<CharT>(CSTR("answer is '\0-----'"), 18),
-        STR("answer is '{:-<6c}'"), false);
-  check(std::basic_string<CharT>(CSTR("answer is '--\0---'"), 18),
-        STR("answer is '{:-^6c}'"), false);
+  check(std::basic_string<CharT>(CSTR("answer is '\0     '"), 18), STR("answer is '{:6c}'"), false);
+  check(std::basic_string<CharT>(CSTR("answer is '\0     '"), 18), STR("answer is '{:6c}'"), false);
+  check(std::basic_string<CharT>(CSTR("answer is '     \0'"), 18), STR("answer is '{:>6c}'"), false);
+  check(std::basic_string<CharT>(CSTR("answer is '\0     '"), 18), STR("answer is '{:<6c}'"), false);
+  check(std::basic_string<CharT>(CSTR("answer is '  \0   '"), 18), STR("answer is '{:^6c}'"), false);
+
+  check(std::basic_string<CharT>(CSTR("answer is '-----\0'"), 18), STR("answer is '{:->6c}'"), false);
+  check(std::basic_string<CharT>(CSTR("answer is '\0-----'"), 18), STR("answer is '{:-<6c}'"), false);
+  check(std::basic_string<CharT>(CSTR("answer is '--\0---'"), 18), STR("answer is '{:-^6c}'"), false);
 
   // *** Sign ***
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{:-c}"), true);
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{:+c}"), true);
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{: c}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:-c}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:+c}"), true);
+  check_exception("A sign field isn't allowed in this format-spec", STR("{: c}"), true);
 
   // *** alternate form ***
-  check_exception("An alternate form field isn't allowed in this format-spec",
-                  STR("{:#c}"), true);
+  check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#c}"), true);
 
   // *** zero-padding ***
-  check_exception("A zero-padding field isn't allowed in this format-spec",
-                  STR("{:0c}"), true);
+  check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0c}"), true);
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.c}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0c}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42c}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.c}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0c}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42c}"), true);
 
   // *** locale-specific form ***
   // Note it has no effect but it's allowed.
@@ -511,14 +442,11 @@ void format_test_bool_as_char(TestFunction check,
 
   // *** type ***
   for (const auto& fmt : invalid_types<CharT>("bBcdosxX"))
-    check_exception(
-        "The format-spec type has a type not supported for a bool argument",
-        fmt, true);
+    check_exception("The format-spec type has a type not supported for a bool argument", fmt, true);
 }
 
 template <class CharT, class TestFunction, class ExceptionTest>
-void format_test_bool_as_integer(TestFunction check,
-                                 ExceptionTest check_exception) {
+void format_test_bool_as_integer(TestFunction check, ExceptionTest check_exception) {
   // *** align-fill & width ***
   check(STR("answer is '1'"), STR("answer is '{:<1d}'"), true);
   check(STR("answer is '1 '"), STR("answer is '{:<2d}'"), true);
@@ -591,26 +519,20 @@ void format_test_bool_as_integer(TestFunction check,
   check(STR("answer is 0X0000000000"), STR("answer is {:#012X}"), false);
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0}"), true);
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0}"), true);
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42}"), true);
 
   // *** locale-specific form ***
   // See locale-specific_form.pass.cpp
 
   // *** type ***
   for (const auto& fmt : invalid_types<CharT>("bBcdosxX"))
-    check_exception(
-        "The format-spec type has a type not supported for a bool argument",
-        fmt, true);
+    check_exception("The format-spec type has a type not supported for a bool argument", fmt, true);
 }
 
 template <class I, class CharT, class TestFunction, class ExceptionTest>
-void format_test_integer_as_integer(TestFunction check,
-                                    ExceptionTest check_exception) {
+void format_test_integer_as_integer(TestFunction check, ExceptionTest check_exception) {
   // *** align-fill & width ***
   check(STR("answer is '42'"), STR("answer is '{:<1}'"), I(42));
   check(STR("answer is '42'"), STR("answer is '{:<2}'"), I(42));
@@ -729,26 +651,20 @@ void format_test_integer_as_integer(TestFunction check,
   check(STR("answer is +0X00000002A"), STR("answer is {:+#012X}"), I(42));
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.}"), I(0));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0}"), I(0));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42}"), I(0));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), I(0));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0}"), I(0));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42}"), I(0));
 
   // *** locale-specific form ***
   // See locale-specific_form.pass.cpp
 
   // *** type ***
   for (const auto& fmt : invalid_types<CharT>("bBcdoxX"))
-    check_exception(
-        "The format-spec type has a type not supported for an integer argument",
-        fmt, 42);
+    check_exception("The format-spec type has a type not supported for an integer argument", fmt, 42);
 }
 
 template <class I, class CharT, class TestFunction, class ExceptionTest>
-void format_test_integer_as_char(TestFunction check,
-                                 ExceptionTest check_exception) {
+void format_test_integer_as_char(TestFunction check, ExceptionTest check_exception) {
   // *** align-fill & width ***
   check(STR("answer is '*     '"), STR("answer is '{:6c}'"), I(42));
   check(STR("answer is '     *'"), STR("answer is '{:>6c}'"), I(42));
@@ -761,28 +677,20 @@ void format_test_integer_as_char(TestFunction check,
 
   // *** Sign ***
   check(STR("answer is *"), STR("answer is {:c}"), I(42));
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("answer is {:-c}"), I(42));
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("answer is {:+c}"), I(42));
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("answer is {: c}"), I(42));
+  check_exception("A sign field isn't allowed in this format-spec", STR("answer is {:-c}"), I(42));
+  check_exception("A sign field isn't allowed in this format-spec", STR("answer is {:+c}"), I(42));
+  check_exception("A sign field isn't allowed in this format-spec", STR("answer is {: c}"), I(42));
 
   // *** alternate form ***
-  check_exception("An alternate form field isn't allowed in this format-spec",
-                  STR("answer is {:#c}"), I(42));
+  check_exception("An alternate form field isn't allowed in this format-spec", STR("answer is {:#c}"), I(42));
 
   // *** zero-padding & width ***
-  check_exception("A zero-padding field isn't allowed in this format-spec",
-                  STR("answer is {:01c}"), I(42));
+  check_exception("A zero-padding field isn't allowed in this format-spec", STR("answer is {:01c}"), I(42));
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.c}"), I(0));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0c}"), I(0));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42c}"), I(0));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.c}"), I(0));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0c}"), I(0));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42c}"), I(0));
 
   // *** locale-specific form ***
   // Note it has no effect but it's allowed.
@@ -790,9 +698,7 @@ void format_test_integer_as_char(TestFunction check,
 
   // *** type ***
   for (const auto& fmt : invalid_types<CharT>("bBcdoxX"))
-    check_exception(
-        "The format-spec type has a type not supported for an integer argument",
-        fmt, I(42));
+    check_exception("The format-spec type has a type not supported for an integer argument", fmt, I(42));
 
   // *** Validate range ***
   // TODO FMT Update test after adding 128-bit support.
@@ -800,18 +706,16 @@ void format_test_integer_as_char(TestFunction check,
     // The code has some duplications to keep the if statement readable.
     if constexpr (std::signed_integral<CharT>) {
       if constexpr (std::signed_integral<I> && sizeof(I) > sizeof(CharT)) {
-        check_exception("Integral value outside the range of the char type",
-                        STR("{:c}"), std::numeric_limits<I>::min());
-        check_exception("Integral value outside the range of the char type",
-                        STR("{:c}"), std::numeric_limits<I>::max());
-      } else if constexpr (std::unsigned_integral<I> &&
-                           sizeof(I) >= sizeof(CharT)) {
-        check_exception("Integral value outside the range of the char type",
-                        STR("{:c}"), std::numeric_limits<I>::max());
+        check_exception("Integral value outside the range of the char type", STR("{:c}"),
+                        std::numeric_limits<I>::min());
+        check_exception("Integral value outside the range of the char type", STR("{:c}"),
+                        std::numeric_limits<I>::max());
+      } else if constexpr (std::unsigned_integral<I> && sizeof(I) >= sizeof(CharT)) {
+        check_exception("Integral value outside the range of the char type", STR("{:c}"),
+                        std::numeric_limits<I>::max());
       }
     } else if constexpr (sizeof(I) > sizeof(CharT)) {
-      check_exception("Integral value outside the range of the char type",
-                      STR("{:c}"), std::numeric_limits<I>::max());
+      check_exception("Integral value outside the range of the char type", STR("{:c}"), std::numeric_limits<I>::max());
     }
   }
 }
@@ -823,8 +727,7 @@ void format_test_integer(TestFunction check, ExceptionTest check_exception) {
 }
 
 template <class CharT, class TestFunction, class ExceptionTest>
-void format_test_signed_integer(TestFunction check,
-                                ExceptionTest check_exception) {
+void format_test_signed_integer(TestFunction check, ExceptionTest check_exception) {
   format_test_integer<signed char, CharT>(check, check_exception);
   format_test_integer<short, CharT>(check, check_exception);
   format_test_integer<int, CharT>(check, check_exception);
@@ -839,62 +742,49 @@ void format_test_signed_integer(TestFunction check,
   check(STR("-128"), STR("{:#}"), std::numeric_limits<int8_t>::min());
   check(STR("-0x80"), STR("{:#x}"), std::numeric_limits<int8_t>::min());
 
-  check(STR("-0b1000000000000000"), STR("{:#b}"),
-        std::numeric_limits<int16_t>::min());
+  check(STR("-0b1000000000000000"), STR("{:#b}"), std::numeric_limits<int16_t>::min());
   check(STR("-0100000"), STR("{:#o}"), std::numeric_limits<int16_t>::min());
   check(STR("-32768"), STR("{:#}"), std::numeric_limits<int16_t>::min());
   check(STR("-0x8000"), STR("{:#x}"), std::numeric_limits<int16_t>::min());
 
-  check(STR("-0b10000000000000000000000000000000"), STR("{:#b}"),
-        std::numeric_limits<int32_t>::min());
-  check(STR("-020000000000"), STR("{:#o}"),
-        std::numeric_limits<int32_t>::min());
+  check(STR("-0b10000000000000000000000000000000"), STR("{:#b}"), std::numeric_limits<int32_t>::min());
+  check(STR("-020000000000"), STR("{:#o}"), std::numeric_limits<int32_t>::min());
   check(STR("-2147483648"), STR("{:#}"), std::numeric_limits<int32_t>::min());
   check(STR("-0x80000000"), STR("{:#x}"), std::numeric_limits<int32_t>::min());
 
   check(STR("-0b100000000000000000000000000000000000000000000000000000000000000"
             "0"),
         STR("{:#b}"), std::numeric_limits<int64_t>::min());
-  check(STR("-01000000000000000000000"), STR("{:#o}"),
-        std::numeric_limits<int64_t>::min());
-  check(STR("-9223372036854775808"), STR("{:#}"),
-        std::numeric_limits<int64_t>::min());
-  check(STR("-0x8000000000000000"), STR("{:#x}"),
-        std::numeric_limits<int64_t>::min());
+  check(STR("-01000000000000000000000"), STR("{:#o}"), std::numeric_limits<int64_t>::min());
+  check(STR("-9223372036854775808"), STR("{:#}"), std::numeric_limits<int64_t>::min());
+  check(STR("-0x8000000000000000"), STR("{:#x}"), std::numeric_limits<int64_t>::min());
 
   check(STR("0b1111111"), STR("{:#b}"), std::numeric_limits<int8_t>::max());
   check(STR("0177"), STR("{:#o}"), std::numeric_limits<int8_t>::max());
   check(STR("127"), STR("{:#}"), std::numeric_limits<int8_t>::max());
   check(STR("0x7f"), STR("{:#x}"), std::numeric_limits<int8_t>::max());
 
-  check(STR("0b111111111111111"), STR("{:#b}"),
-        std::numeric_limits<int16_t>::max());
+  check(STR("0b111111111111111"), STR("{:#b}"), std::numeric_limits<int16_t>::max());
   check(STR("077777"), STR("{:#o}"), std::numeric_limits<int16_t>::max());
   check(STR("32767"), STR("{:#}"), std::numeric_limits<int16_t>::max());
   check(STR("0x7fff"), STR("{:#x}"), std::numeric_limits<int16_t>::max());
 
-  check(STR("0b1111111111111111111111111111111"), STR("{:#b}"),
-        std::numeric_limits<int32_t>::max());
+  check(STR("0b1111111111111111111111111111111"), STR("{:#b}"), std::numeric_limits<int32_t>::max());
   check(STR("017777777777"), STR("{:#o}"), std::numeric_limits<int32_t>::max());
   check(STR("2147483647"), STR("{:#}"), std::numeric_limits<int32_t>::max());
   check(STR("0x7fffffff"), STR("{:#x}"), std::numeric_limits<int32_t>::max());
 
-  check(
-      STR("0b111111111111111111111111111111111111111111111111111111111111111"),
-      STR("{:#b}"), std::numeric_limits<int64_t>::max());
-  check(STR("0777777777777777777777"), STR("{:#o}"),
-        std::numeric_limits<int64_t>::max());
-  check(STR("9223372036854775807"), STR("{:#}"),
-        std::numeric_limits<int64_t>::max());
-  check(STR("0x7fffffffffffffff"), STR("{:#x}"),
+  check(STR("0b111111111111111111111111111111111111111111111111111111111111111"), STR("{:#b}"),
         std::numeric_limits<int64_t>::max());
+  check(STR("0777777777777777777777"), STR("{:#o}"), std::numeric_limits<int64_t>::max());
+  check(STR("9223372036854775807"), STR("{:#}"), std::numeric_limits<int64_t>::max());
+  check(STR("0x7fffffffffffffff"), STR("{:#x}"), std::numeric_limits<int64_t>::max());
 
   // TODO FMT Add __int128_t test after implementing full range.
 }
 
 template <class CharT, class TestFunction, class ExceptionTest>
-void format_test_unsigned_integer(TestFunction check,
-                                  ExceptionTest check_exception) {
+void format_test_unsigned_integer(TestFunction check, ExceptionTest check_exception) {
   format_test_integer<unsigned char, CharT>(check, check_exception);
   format_test_integer<unsigned short, CharT>(check, check_exception);
   format_test_integer<unsigned, CharT>(check, check_exception);
@@ -909,28 +799,21 @@ void format_test_unsigned_integer(TestFunction check,
   check(STR("255"), STR("{:#}"), std::numeric_limits<uint8_t>::max());
   check(STR("0xff"), STR("{:#x}"), std::numeric_limits<uint8_t>::max());
 
-  check(STR("0b1111111111111111"), STR("{:#b}"),
-        std::numeric_limits<uint16_t>::max());
+  check(STR("0b1111111111111111"), STR("{:#b}"), std::numeric_limits<uint16_t>::max());
   check(STR("0177777"), STR("{:#o}"), std::numeric_limits<uint16_t>::max());
   check(STR("65535"), STR("{:#}"), std::numeric_limits<uint16_t>::max());
   check(STR("0xffff"), STR("{:#x}"), std::numeric_limits<uint16_t>::max());
 
-  check(STR("0b11111111111111111111111111111111"), STR("{:#b}"),
-        std::numeric_limits<uint32_t>::max());
-  check(STR("037777777777"), STR("{:#o}"),
-        std::numeric_limits<uint32_t>::max());
+  check(STR("0b11111111111111111111111111111111"), STR("{:#b}"), std::numeric_limits<uint32_t>::max());
+  check(STR("037777777777"), STR("{:#o}"), std::numeric_limits<uint32_t>::max());
   check(STR("4294967295"), STR("{:#}"), std::numeric_limits<uint32_t>::max());
   check(STR("0xffffffff"), STR("{:#x}"), std::numeric_limits<uint32_t>::max());
 
-  check(
-      STR("0b1111111111111111111111111111111111111111111111111111111111111111"),
-      STR("{:#b}"), std::numeric_limits<uint64_t>::max());
-  check(STR("01777777777777777777777"), STR("{:#o}"),
-        std::numeric_limits<uint64_t>::max());
-  check(STR("18446744073709551615"), STR("{:#}"),
-        std::numeric_limits<uint64_t>::max());
-  check(STR("0xffffffffffffffff"), STR("{:#x}"),
+  check(STR("0b1111111111111111111111111111111111111111111111111111111111111111"), STR("{:#b}"),
         std::numeric_limits<uint64_t>::max());
+  check(STR("01777777777777777777777"), STR("{:#o}"), std::numeric_limits<uint64_t>::max());
+  check(STR("18446744073709551615"), STR("{:#}"), std::numeric_limits<uint64_t>::max());
+  check(STR("0xffffffffffffffff"), STR("{:#x}"), std::numeric_limits<uint64_t>::max());
 
   // TODO FMT Add __uint128_t test after implementing full range.
 }
@@ -959,46 +842,30 @@ void format_test_char(TestFunction check, ExceptionTest check_exception) {
   check(STR("answer is '--*---'"), STR("answer is '{:-^6c}'"), CharT('*'));
 
   // *** Sign ***
-  check_exception("A sign field isn't allowed in this format-spec", STR("{:-}"),
-                  CharT('*'));
-  check_exception("A sign field isn't allowed in this format-spec", STR("{:+}"),
-                  CharT('*'));
-  check_exception("A sign field isn't allowed in this format-spec", STR("{: }"),
-                  CharT('*'));
-
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{:-c}"), CharT('*'));
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{:+c}"), CharT('*'));
-  check_exception("A sign field isn't allowed in this format-spec",
-                  STR("{: c}"), CharT('*'));
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:-}"), CharT('*'));
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:+}"), CharT('*'));
+  check_exception("A sign field isn't allowed in this format-spec", STR("{: }"), CharT('*'));
+
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:-c}"), CharT('*'));
+  check_exception("A sign field isn't allowed in this format-spec", STR("{:+c}"), CharT('*'));
+  check_exception("A sign field isn't allowed in this format-spec", STR("{: c}"), CharT('*'));
 
   // *** alternate form ***
-  check_exception("An alternate form field isn't allowed in this format-spec",
-                  STR("{:#}"), CharT('*'));
-  check_exception("An alternate form field isn't allowed in this format-spec",
-                  STR("{:#c}"), CharT('*'));
+  check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#}"), CharT('*'));
+  check_exception("An alternate form field isn't allowed in this format-spec", STR("{:#c}"), CharT('*'));
 
   // *** zero-padding ***
-  check_exception("A zero-padding field isn't allowed in this format-spec",
-                  STR("{:0}"), CharT('*'));
-  check_exception("A zero-padding field isn't allowed in this format-spec",
-                  STR("{:0c}"), CharT('*'));
+  check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0}"), CharT('*'));
+  check_exception("A zero-padding field isn't allowed in this format-spec", STR("{:0c}"), CharT('*'));
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.}"), CharT('*'));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0}"), CharT('*'));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42}"), CharT('*'));
-
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.c}"), CharT('*'));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0c}"), CharT('*'));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42c}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42}"), CharT('*'));
+
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.c}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0c}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42c}"), CharT('*'));
 
   // *** locale-specific form ***
   // Note it has no effect but it's allowed.
@@ -1007,14 +874,11 @@ void format_test_char(TestFunction check, ExceptionTest check_exception) {
 
   // *** type ***
   for (const auto& fmt : invalid_types<CharT>("bBcdoxX"))
-    check_exception(
-        "The format-spec type has a type not supported for a char argument",
-        fmt, CharT('*'));
+    check_exception("The format-spec type has a type not supported for a char argument", fmt, CharT('*'));
 }
 
 template <class CharT, class TestFunction, class ExceptionTest>
-void format_test_char_as_integer(TestFunction check,
-                                 ExceptionTest check_exception) {
+void format_test_char_as_integer(TestFunction check, ExceptionTest check_exception) {
   // *** align-fill & width ***
   check(STR("answer is '42'"), STR("answer is '{:<1d}'"), CharT('*'));
 
@@ -1067,21 +931,16 @@ void format_test_char_as_integer(TestFunction check,
   check(STR("answer is +0X00000002A"), STR("answer is {:+#012X}"), CharT('*'));
 
   // *** precision ***
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.d}"), CharT('*'));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.0d}"), CharT('*'));
-  check_exception("The format-spec should consume the input or end with a '}'",
-                  STR("{:.42d}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.d}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.0d}"), CharT('*'));
+  check_exception("The format-spec should consume the input or end with a '}'", STR("{:.42d}"), CharT('*'));
 
   // *** locale-specific form ***
   // See locale-specific_form.pass.cpp
 
   // *** type ***
   for (const auto& fmt : invalid_types<CharT>("bBcdoxX"))
-    check_exception(
-        "The format-spec type has a type not supported for a char argument",
-        fmt, '*');
+    check_exception("The format-spec type has a type not supported for a char argument", fmt, '*');
 }
 
 template <class F, class CharT, class TestFunction>
@@ -2650,42 +2509,34 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
 
   // ** Test invalid format strings ***
   check_exception("The format string terminates at a '{'", STR("{"));
-  check_exception("The replacement field misses a terminating '}'", STR("{:"),
-                  42);
+  check_exception("The replacement field misses a terminating '}'", STR("{:"), 42);
 
-  check_exception("The format string contains an invalid escape sequence",
-                  STR("}"));
-  check_exception("The format string contains an invalid escape sequence",
-                  STR("{:}-}"), 42);
+  check_exception("The format string contains an invalid escape sequence", STR("}"));
+  check_exception("The format string contains an invalid escape sequence", STR("{:}-}"), 42);
 
-  check_exception("The format string contains an invalid escape sequence",
-                  STR("} "));
+  check_exception("The format string contains an invalid escape sequence", STR("} "));
 
-  check_exception(
-      "The arg-id of the format-spec starts with an invalid character",
-      STR("{-"), 42);
+  check_exception("The arg-id of the format-spec starts with an invalid character", STR("{-"), 42);
   check_exception("Argument index out of bounds", STR("hello {}"));
   check_exception("Argument index out of bounds", STR("hello {0}"));
   check_exception("Argument index out of bounds", STR("hello {1}"), 42);
 
   // *** Test char format argument ***
   // The `char` to `wchar_t` formatting is tested separately.
-  check(STR("hello 09azAZ!"), STR("hello {}{}{}{}{}{}{}"), CharT('0'),
-        CharT('9'), CharT('a'), CharT('z'), CharT('A'), CharT('Z'), CharT('!'));
+  check(STR("hello 09azAZ!"), STR("hello {}{}{}{}{}{}{}"), CharT('0'), CharT('9'), CharT('a'), CharT('z'), CharT('A'),
+        CharT('Z'), CharT('!'));
 
   format_test_char<CharT>(check, check_exception);
   format_test_char_as_integer<CharT>(check, check_exception);
 
   // *** Test string format argument ***
   {
-    CharT buffer[] = {CharT('0'), CharT('9'), CharT('a'), CharT('z'),
-                      CharT('A'), CharT('Z'), CharT('!'), 0};
+    CharT buffer[] = {CharT('0'), CharT('9'), CharT('a'), CharT('z'), CharT('A'), CharT('Z'), CharT('!'), 0};
     CharT* data = buffer;
     check(STR("hello 09azAZ!"), STR("hello {}"), data);
   }
   {
-    CharT buffer[] = {CharT('0'), CharT('9'), CharT('a'), CharT('z'),
-                      CharT('A'), CharT('Z'), CharT('!'), 0};
+    CharT buffer[] = {CharT('0'), CharT('9'), CharT('a'), CharT('z'), CharT('A'), CharT('Z'), CharT('!'), 0};
     const CharT* data = buffer;
     check(STR("hello 09azAZ!"), STR("hello {}"), data);
   }
@@ -2718,20 +2569,14 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
   {
     // Note 128-bit support is only partly implemented test the range
     // conditions here.
-    std::basic_string<CharT> min =
-        std::format(STR("{}"), std::numeric_limits<long long>::min());
-    check(min, STR("{}"),
-          static_cast<__int128_t>(std::numeric_limits<long long>::min()));
-    std::basic_string<CharT> max =
-        std::format(STR("{}"), std::numeric_limits<long long>::max());
-    check(max, STR("{}"),
-          static_cast<__int128_t>(std::numeric_limits<long long>::max()));
-    check_exception(
-        "128-bit value is outside of implemented range", STR("{}"),
-        static_cast<__int128_t>(std::numeric_limits<long long>::min()) - 1);
-    check_exception(
-        "128-bit value is outside of implemented range", STR("{}"),
-        static_cast<__int128_t>(std::numeric_limits<long long>::max()) + 1);
+    std::basic_string<CharT> min = std::format(STR("{}"), std::numeric_limits<long long>::min());
+    check(min, STR("{}"), static_cast<__int128_t>(std::numeric_limits<long long>::min()));
+    std::basic_string<CharT> max = std::format(STR("{}"), std::numeric_limits<long long>::max());
+    check(max, STR("{}"), static_cast<__int128_t>(std::numeric_limits<long long>::max()));
+    check_exception("128-bit value is outside of implemented range", STR("{}"),
+                    static_cast<__int128_t>(std::numeric_limits<long long>::min()) - 1);
+    check_exception("128-bit value is outside of implemented range", STR("{}"),
+                    static_cast<__int128_t>(std::numeric_limits<long long>::max()) + 1);
   }
 #endif
   format_test_signed_integer<CharT>(check, check_exception);
@@ -2747,15 +2592,10 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
   {
     // Note 128-bit support is only partly implemented test the range
     // conditions here.
-    std::basic_string<CharT> max =
-        std::format(STR("{}"), std::numeric_limits<unsigned long long>::max());
-    check(max, STR("{}"),
-          static_cast<__uint128_t>(
-              std::numeric_limits<unsigned long long>::max()));
+    std::basic_string<CharT> max = std::format(STR("{}"), std::numeric_limits<unsigned long long>::max());
+    check(max, STR("{}"), static_cast<__uint128_t>(std::numeric_limits<unsigned long long>::max()));
     check_exception("128-bit value is outside of implemented range", STR("{}"),
-                    static_cast<__uint128_t>(
-                        std::numeric_limits<unsigned long long>::max()) +
-                        1);
+                    static_cast<__uint128_t>(std::numeric_limits<unsigned long long>::max()) + 1);
   }
 #endif
   format_test_unsigned_integer<CharT>(check, check_exception);
diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp
index 98356fdd8bd66..b403105cb121d 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/format_to.locale.pass.cpp
@@ -30,8 +30,7 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
-                                           std::basic_string<CharT> fmt,
+auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
                                            const Args&... args) {
   {
     std::basic_string<CharT> out(expected.size(), CharT(' '));
@@ -42,14 +41,12 @@ auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
   {
     std::list<CharT> out;
     std::format_to(std::back_inserter(out), std::locale(), fmt, args...);
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     std::vector<CharT> out;
     std::format_to(std::back_inserter(out), std::locale(), fmt, args...);
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     assert(expected.size() < 4096 && "Update the size of the buffer.");
@@ -61,8 +58,8 @@ auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
   }
 };
 
-auto test_exception = []<class CharT, class... Args>(
-    std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
+auto test_exception =
+    []<class CharT, class... Args>(std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::basic_string<CharT> out;
diff --git a/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp
index d8d24f92d60cc..cf46a0092eb16 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/format_to.pass.cpp
@@ -31,8 +31,7 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
-                                           std::basic_string<CharT> fmt,
+auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
                                            const Args&... args) {
   {
     std::basic_string<CharT> out(expected.size(), CharT(' '));
@@ -43,14 +42,12 @@ auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
   {
     std::list<CharT> out;
     std::format_to(std::back_inserter(out), fmt, args...);
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     std::vector<CharT> out;
     std::format_to(std::back_inserter(out), fmt, args...);
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     assert(expected.size() < 4096 && "Update the size of the buffer.");
@@ -62,8 +59,8 @@ auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
   }
 };
 
-auto test_exception = []<class CharT, class... Args>(
-    std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
+auto test_exception =
+    []<class CharT, class... Args>(std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::basic_string<CharT> out;
diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp
index 206450b53c466..1dc1745a1fd3f 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.locale.pass.cpp
@@ -32,13 +32,11 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
-                                           std::basic_string<CharT> fmt,
+auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
                                            const Args&... args) {
   {
     std::list<CharT> out;
-    std::format_to_n_result result = std::format_to_n(
-        std::back_inserter(out), 0, std::locale(), fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 0, std::locale(), fmt, args...);
     // To avoid signedness warnings make sure formatted_size uses the same type
     // as result.size.
     using diff_type = decltype(result.size);
@@ -49,20 +47,17 @@ auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
   }
   {
     std::vector<CharT> out;
-    std::format_to_n_result result = std::format_to_n(
-        std::back_inserter(out), 5, std::locale(), fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 5, std::locale(), fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(std::locale(), fmt, args...);
     diff_type size = std::min<diff_type>(5, formatted_size);
 
     assert(result.size == formatted_size);
-    assert(std::equal(out.begin(), out.end(), expected.begin(),
-                      expected.begin() + size));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.begin() + size));
   }
   {
     std::basic_string<CharT> out;
-    std::format_to_n_result result = std::format_to_n(
-        std::back_inserter(out), 1000, std::locale(), fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 1000, std::locale(), fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(std::locale(), fmt, args...);
     diff_type size = std::min<diff_type>(1000, formatted_size);
@@ -73,8 +68,7 @@ auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
   {
     // Test the returned iterator.
     std::basic_string<CharT> out(10, CharT(' '));
-    std::format_to_n_result result =
-        std::format_to_n(out.begin(), 10, std::locale(), fmt, args...);
+    std::format_to_n_result result = std::format_to_n(out.begin(), 10, std::locale(), fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(std::locale(), fmt, args...);
     diff_type size = std::min<diff_type>(10, formatted_size);
@@ -88,8 +82,7 @@ auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
                   "If the difference type isn't negative the test will fail "
                   "due to using a large positive value.");
     CharT buffer[1] = {CharT(0)};
-    std::format_to_n_result result =
-        std::format_to_n(buffer, -1, std::locale(), fmt, args...);
+    std::format_to_n_result result = std::format_to_n(buffer, -1, std::locale(), fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(std::locale(), fmt, args...);
 
@@ -99,8 +92,8 @@ auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
   }
 };
 
-auto test_exception = []<class CharT, class... Args>(
-    std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
+auto test_exception =
+    []<class CharT, class... Args>(std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::basic_string<CharT> out;
diff --git a/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp b/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp
index 6f69af4d99df9..a964fc2f6edf7 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/format_to_n.pass.cpp
@@ -29,13 +29,11 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
-                                           std::basic_string<CharT> fmt,
+auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
                                            const Args&... args) {
   {
     std::list<CharT> out;
-    std::format_to_n_result result =
-        std::format_to_n(std::back_inserter(out), 0, fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 0, fmt, args...);
     // To avoid signedness warnings make sure formatted_size uses the same type
     // as result.size.
     using diff_type = decltype(result.size);
@@ -46,20 +44,17 @@ auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
   }
   {
     std::vector<CharT> out;
-    std::format_to_n_result result =
-        std::format_to_n(std::back_inserter(out), 5, fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 5, fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(fmt, args...);
     diff_type size = std::min<diff_type>(5, formatted_size);
 
     assert(result.size == formatted_size);
-    assert(std::equal(out.begin(), out.end(), expected.begin(),
-                      expected.begin() + size));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.begin() + size));
   }
   {
     std::basic_string<CharT> out;
-    std::format_to_n_result result =
-        std::format_to_n(std::back_inserter(out), 1000, fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 1000, fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(fmt, args...);
     diff_type size = std::min<diff_type>(1000, formatted_size);
@@ -70,8 +65,7 @@ auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
   {
     // Test the returned iterator.
     std::basic_string<CharT> out(10, CharT(' '));
-    std::format_to_n_result result =
-        std::format_to_n(out.begin(), 10, fmt, args...);
+    std::format_to_n_result result = std::format_to_n(out.begin(), 10, fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(fmt, args...);
     diff_type size = std::min<diff_type>(10, formatted_size);
@@ -95,8 +89,8 @@ auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
   }
 };
 
-auto test_exception = []<class CharT, class... Args>(
-    std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
+auto test_exception =
+    []<class CharT, class... Args>(std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::basic_string<CharT> out;
diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp
index b31170b62c596..34f13191f17f4 100644
--- a/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.locale.pass.cpp
@@ -28,15 +28,14 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
-                                           std::basic_string<CharT> fmt,
+auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
                                            const Args&... args) {
   size_t size = std::formatted_size(std::locale(), fmt, args...);
   assert(size == expected.size());
 };
 
-auto test_exception = []<class CharT, class... Args>(
-    std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
+auto test_exception =
+    []<class CharT, class... Args>(std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::formatted_size(std::locale(), fmt, args...);
diff --git a/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp b/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp
index befb4ce626109..4171573d1348f 100644
--- a/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/formatted_size.pass.cpp
@@ -25,15 +25,14 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
-                                           std::basic_string<CharT> fmt,
+auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
                                            const Args&... args) {
   size_t size = std::formatted_size(fmt, args...);
   assert(size == expected.size());
 };
 
-auto test_exception = []<class CharT, class... Args>(
-    std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
+auto test_exception =
+    []<class CharT, class... Args>(std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::formatted_size(fmt, args...);
diff --git a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
index 5d86e46be7e6b..6f21454975117 100644
--- a/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/locale-specific_form.pass.cpp
@@ -126,21 +126,19 @@ struct numpunct<wchar_t> : std::numpunct<wchar_t> {
 #endif
 
 template <class CharT, class... Args>
-void test(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
-          const Args&... args) {
+void test(std::basic_string<CharT> expected, std::basic_string<CharT> fmt, const Args&... args) {
   // *** format ***
   {
     std::basic_string<CharT> out = std::format(fmt, args...);
     if constexpr (std::same_as<CharT, char>)
       if (out != expected)
-        std::cerr << "\nFormat string   " << fmt << "\nExpected output "
-                  << expected << "\nActual output   " << out << '\n';
+        std::cerr << "\nFormat string   " << fmt << "\nExpected output " << expected << "\nActual output   " << out
+                  << '\n';
     assert(out == expected);
   }
   // *** vformat ***
   {
-    std::basic_string<CharT> out =
-        std::vformat(fmt, std::make_format_args<context_t<CharT>>(args...));
+    std::basic_string<CharT> out = std::vformat(fmt, std::make_format_args<context_t<CharT>>(args...));
     assert(out == expected);
   }
   // *** format_to ***
@@ -153,16 +151,14 @@ void test(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
   // *** vformat_to ***
   {
     std::basic_string<CharT> out(expected.size(), CharT(' '));
-    auto it = std::vformat_to(out.begin(), fmt,
-                              std::make_format_args<context_t<CharT>>(args...));
+    auto it = std::vformat_to(out.begin(), fmt, std::make_format_args<context_t<CharT>>(args...));
     assert(it == out.end());
     assert(out == expected);
   }
   // *** format_to_n ***
   {
     std::basic_string<CharT> out;
-    std::format_to_n_result result =
-        std::format_to_n(std::back_inserter(out), 1000, fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 1000, fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(fmt, args...);
     diff_type size = std::min<diff_type>(1000, formatted_size);
@@ -178,21 +174,19 @@ void test(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
 }
 
 template <class CharT, class... Args>
-void test(std::basic_string<CharT> expected, std::locale loc,
-          std::basic_string<CharT> fmt, const Args&... args) {
+void test(std::basic_string<CharT> expected, std::locale loc, std::basic_string<CharT> fmt, const Args&... args) {
   // *** format ***
   {
     std::basic_string<CharT> out = std::format(loc, fmt, args...);
     if constexpr (std::same_as<CharT, char>)
       if (out != expected)
-        std::cerr << "\nFormat string   " << fmt << "\nExpected output "
-                  << expected << "\nActual output   " << out << '\n';
+        std::cerr << "\nFormat string   " << fmt << "\nExpected output " << expected << "\nActual output   " << out
+                  << '\n';
     assert(out == expected);
   }
   // *** vformat ***
   {
-    std::basic_string<CharT> out = std::vformat(
-        loc, fmt, std::make_format_args<context_t<CharT>>(args...));
+    std::basic_string<CharT> out = std::vformat(loc, fmt, std::make_format_args<context_t<CharT>>(args...));
     assert(out == expected);
   }
   // *** format_to ***
@@ -205,16 +199,14 @@ void test(std::basic_string<CharT> expected, std::locale loc,
   // *** vformat_to ***
   {
     std::basic_string<CharT> out(expected.size(), CharT(' '));
-    auto it = std::vformat_to(out.begin(), loc, fmt,
-                              std::make_format_args<context_t<CharT>>(args...));
+    auto it = std::vformat_to(out.begin(), loc, fmt, std::make_format_args<context_t<CharT>>(args...));
     assert(it == out.end());
     assert(out == expected);
   }
   // *** format_to_n ***
   {
     std::basic_string<CharT> out;
-    std::format_to_n_result result =
-        std::format_to_n(std::back_inserter(out), 1000, loc, fmt, args...);
+    std::format_to_n_result result = std::format_to_n(std::back_inserter(out), 1000, loc, fmt, args...);
     using diff_type = decltype(result.size);
     diff_type formatted_size = std::formatted_size(loc, fmt, args...);
     diff_type size = std::min<diff_type>(1000, formatted_size);
@@ -239,13 +231,13 @@ struct numpunct_unicode<char> : std::numpunct<char> {
   string_type do_falsename() const override { return "ungültig"; }
 };
 
-#ifndef TEST_HAS_NO_WIDE_CHARACTERS
+#  ifndef TEST_HAS_NO_WIDE_CHARACTERS
 template <>
 struct numpunct_unicode<wchar_t> : std::numpunct<wchar_t> {
   string_type do_truename() const override { return L"gültig"; }
   string_type do_falsename() const override { return L"ungültig"; }
 };
-#endif
+#  endif
 #endif // TEST_HAS_NO_UNICODE
 
 template <class CharT>
@@ -268,8 +260,7 @@ void test_bool() {
   test(STR("false"), std::locale(LOCALE_en_US_UTF_8), STR("{:L}"), false);
 
 #ifndef TEST_HAS_NO_UNICODE
-  std::locale loc_unicode =
-      std::locale(std::locale(), new numpunct_unicode<CharT>());
+  std::locale loc_unicode = std::locale(std::locale(), new numpunct_unicode<CharT>());
 
   test(STR("gültig"), loc_unicode, STR("{:L}"), true);
   test(STR("ungültig"), loc_unicode, STR("{:L}"), false);
diff --git a/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp
index c9dbab1002c4e..3bad47da7af95 100644
--- a/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/vformat.locale.pass.cpp
@@ -24,20 +24,17 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
-                                           std::basic_string<CharT> fmt,
+auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
                                            const Args&... args) {
-  std::basic_string<CharT> out = std::vformat(
-      std::locale(), fmt, std::make_format_args<context_t<CharT>>(args...));
+  std::basic_string<CharT> out = std::vformat(std::locale(), fmt, std::make_format_args<context_t<CharT>>(args...));
   assert(out == expected);
 };
 
-auto test_exception = []<class CharT, class... Args>(
-    std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
+auto test_exception =
+    []<class CharT, class... Args>(std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
-    (void) std::vformat(std::locale(), fmt,
-                        std::make_format_args<context_t<CharT>>(args...));
+    (void)std::vformat(std::locale(), fmt, std::make_format_args<context_t<CharT>>(args...));
     assert(false);
   } catch ([[maybe_unused]] std::format_error& e) {
     LIBCPP_ASSERT(e.what() == what);
diff --git a/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp
index 8688d560c41c6..effcd722ba9d4 100644
--- a/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/vformat.pass.cpp
@@ -23,16 +23,14 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
-                                           std::basic_string<CharT> fmt,
+auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
                                            const Args&... args) {
-  std::basic_string<CharT> out =
-      std::vformat(fmt, std::make_format_args<context_t<CharT>>(args...));
+  std::basic_string<CharT> out = std::vformat(fmt, std::make_format_args<context_t<CharT>>(args...));
   assert(out == expected);
 };
 
-auto test_exception = []<class CharT, class... Args>(
-    std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
+auto test_exception =
+    []<class CharT, class... Args>(std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     TEST_IGNORE_NODISCARD std::vformat(fmt, std::make_format_args<context_t<CharT>>(args...));
diff --git a/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp
index 737f618085591..b441656cfc599 100644
--- a/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/vformat_to.locale.pass.cpp
@@ -30,49 +30,40 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
-                                           std::basic_string<CharT> fmt,
+auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
                                            const Args&... args) {
   {
     std::basic_string<CharT> out(expected.size(), CharT(' '));
-    auto it = std::vformat_to(out.begin(), std::locale(), fmt,
-                              std::make_format_args<context_t<CharT>>(args...));
+    auto it = std::vformat_to(out.begin(), std::locale(), fmt, std::make_format_args<context_t<CharT>>(args...));
     assert(it == out.end());
     assert(out == expected);
   }
   {
     std::list<CharT> out;
-    std::vformat_to(std::back_inserter(out), std::locale(), fmt,
-                    std::make_format_args<context_t<CharT>>(args...));
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    std::vformat_to(std::back_inserter(out), std::locale(), fmt, std::make_format_args<context_t<CharT>>(args...));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     std::vector<CharT> out;
-    std::vformat_to(std::back_inserter(out), std::locale(), fmt,
-                    std::make_format_args<context_t<CharT>>(args...));
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    std::vformat_to(std::back_inserter(out), std::locale(), fmt, std::make_format_args<context_t<CharT>>(args...));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     assert(expected.size() < 4096 && "Update the size of the buffer.");
     CharT out[4096];
-    CharT* it =
-        std::vformat_to(out, std::locale(), fmt,
-                        std::make_format_args<context_t<CharT>>(args...));
+    CharT* it = std::vformat_to(out, std::locale(), fmt, std::make_format_args<context_t<CharT>>(args...));
     assert(std::distance(out, it) == int(expected.size()));
     // Convert to std::string since output contains '\0' for boolean tests.
     assert(std::basic_string<CharT>(out, it) == expected);
   }
 };
 
-auto test_exception = []<class CharT, class... Args>(
-    std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
+auto test_exception =
+    []<class CharT, class... Args>(std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::basic_string<CharT> out;
-    std::vformat_to(std::back_inserter(out), std::locale(), fmt,
-                    std::make_format_args<context_t<CharT>>(args...));
+    std::vformat_to(std::back_inserter(out), std::locale(), fmt, std::make_format_args<context_t<CharT>>(args...));
     assert(false);
   } catch ([[maybe_unused]] std::format_error& e) {
     LIBCPP_ASSERT(e.what() == what);
diff --git a/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp b/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp
index 0bec98a13b7a2..e6880d780f513 100644
--- a/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.functions/vformat_to.pass.cpp
@@ -31,48 +31,40 @@
 #include "test_macros.h"
 #include "format_tests.h"
 
-auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected,
-                                           std::basic_string<CharT> fmt,
+auto test = []<class CharT, class... Args>(std::basic_string<CharT> expected, std::basic_string<CharT> fmt,
                                            const Args&... args) {
   {
     std::basic_string<CharT> out(expected.size(), CharT(' '));
-    auto it = std::vformat_to(out.begin(), fmt,
-                              std::make_format_args<context_t<CharT>>(args...));
+    auto it = std::vformat_to(out.begin(), fmt, std::make_format_args<context_t<CharT>>(args...));
     assert(it == out.end());
     assert(out == expected);
   }
   {
     std::list<CharT> out;
-    std::vformat_to(std::back_inserter(out), fmt,
-                    std::make_format_args<context_t<CharT>>(args...));
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    std::vformat_to(std::back_inserter(out), fmt, std::make_format_args<context_t<CharT>>(args...));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     std::vector<CharT> out;
-    std::vformat_to(std::back_inserter(out), fmt,
-                    std::make_format_args<context_t<CharT>>(args...));
-    assert(
-        std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
+    std::vformat_to(std::back_inserter(out), fmt, std::make_format_args<context_t<CharT>>(args...));
+    assert(std::equal(out.begin(), out.end(), expected.begin(), expected.end()));
   }
   {
     assert(expected.size() < 4096 && "Update the size of the buffer.");
     CharT out[4096];
-    CharT* it = std::vformat_to(
-        out, fmt, std::make_format_args<context_t<CharT>>(args...));
+    CharT* it = std::vformat_to(out, fmt, std::make_format_args<context_t<CharT>>(args...));
     assert(std::distance(out, it) == int(expected.size()));
     // Convert to std::string since output contains '\0' for boolean tests.
     assert(std::basic_string<CharT>(out, it) == expected);
   }
 };
 
-auto test_exception = []<class CharT, class... Args>(
-    std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
+auto test_exception =
+    []<class CharT, class... Args>(std::string_view what, std::basic_string<CharT> fmt, const Args&... args) {
 #ifndef TEST_HAS_NO_EXCEPTIONS
   try {
     std::basic_string<CharT> out;
-    std::vformat_to(std::back_inserter(out), fmt,
-                    std::make_format_args<context_t<CharT>>(args...));
+    std::vformat_to(std::back_inserter(out), fmt, std::make_format_args<context_t<CharT>>(args...));
     assert(false);
   } catch ([[maybe_unused]] std::format_error& e) {
     LIBCPP_ASSERT(e.what() == what);

From 9c2891a8eddb7380a2809af297789eb07713df19 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Mon, 24 Jan 2022 20:08:35 -0800
Subject: [PATCH 595/946] [InstrProf][correlation] Read DWARFv5 `OP_addrx`
 location

Correctly read `OP_addrx` type encodings for DWARFv5 locations.

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D118098
---
 .../profile/Linux/instrprof-debug-info-correlate.c | 14 ++++++++++----
 llvm/lib/ProfileData/InstrProfCorrelator.cpp       | 12 +++++++++---
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
index 9aa24d72376ca..4f46586a16999 100644
--- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
+++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
@@ -1,12 +1,18 @@
 // REQUIRES: zlib
 
 // Value profiling is currently not supported in lightweight mode.
-// RUN: %clang_pgogen -o %t -g -gdwarf-4 -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
-// RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t
-// RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite
-
 // RUN: %clang_pgogen -o %t.normal -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
 // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t.normal
 // RUN: llvm-profdata merge -o %t.normal.profdata %t.profraw
 
+// RUN: %clang_pgogen -o %t.d4 -g -gdwarf-4 -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
+// RUN: env LLVM_PROFILE_FILE=%t.d4.proflite %run %t.d4
+// RUN: llvm-profdata merge -o %t.d4.profdata --debug-info=%t.d4 %t.d4.proflite
+
+// RUN: diff %t.normal.profdata %t.d4.profdata
+
+// RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
+// RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t
+// RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite
+
 // RUN: diff %t.normal.profdata %t.profdata
diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
index 8be2cbff3a20c..5f06541916ddd 100644
--- a/llvm/lib/ProfileData/InstrProfCorrelator.cpp
+++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
@@ -167,13 +167,19 @@ DwarfInstrProfCorrelator<IntPtrT>::getLocation(const DWARFDie &Die) const {
     return {};
   }
   auto &DU = *Die.getDwarfUnit();
+  auto AddressSize = DU.getAddressByteSize();
   for (auto &Location : *Locations) {
-    auto AddressSize = DU.getAddressByteSize();
     DataExtractor Data(Location.Expr, DICtx->isLittleEndian(), AddressSize);
     DWARFExpression Expr(Data, AddressSize);
-    for (auto &Op : Expr)
-      if (Op.getCode() == dwarf::DW_OP_addr)
+    for (auto &Op : Expr) {
+      if (Op.getCode() == dwarf::DW_OP_addr) {
         return Op.getRawOperand(0);
+      } else if (Op.getCode() == dwarf::DW_OP_addrx) {
+        uint64_t Index = Op.getRawOperand(0);
+        if (auto SA = DU.getAddrOffsetSectionItem(Index))
+          return SA->Address;
+      }
+    }
   }
   return {};
 }

From 4be86d18c0fc97a5eab26628e2a830f914c591eb Mon Sep 17 00:00:00 2001
From: Andrew Litteken <andrew.litteken@gmail.com>
Date: Wed, 22 Dec 2021 17:42:10 -0600
Subject: [PATCH 596/946] [IROutliner] Disallow outlining calls that return
 twice.

Functions that return twice can cause the IR Outliner to miscompile the given program. These function rely on information about the stack to be the same, and this may not necessarily be the case if called from an outlined function. So, we simply call these instructions illegal for the outliner to remove.

Reviewers: paquette

Differential Revision: https://reviews.llvm.org/D110007
---
 llvm/include/llvm/Transforms/IPO/IROutliner.h |  5 ++
 .../IROutliner/illegal-returns-twice.ll       | 66 +++++++++++++++++++
 2 files changed, 71 insertions(+)
 create mode 100644 llvm/test/Transforms/IROutliner/illegal-returns-twice.ll

diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index dcae4454f8281..16f597ab898e5 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -367,6 +367,11 @@ class IROutliner {
       Function *F = CI.getCalledFunction();
       if (!F || CI.isIndirectCall() || !F->hasName())
         return false;
+      // Returning twice can cause issues with the state of the function call
+      // that were not expected when the function was used, so we do not include
+      // the call in outlined functions.
+      if (CI.canReturnTwice())
+        return false;
       return true;
     }
     // TODO: Handle FreezeInsts.  Since a frozen value could be frozen inside
diff --git a/llvm/test/Transforms/IROutliner/illegal-returns-twice.ll b/llvm/test/Transforms/IROutliner/illegal-returns-twice.ll
new file mode 100644
index 0000000000000..31113ab5cd644
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/illegal-returns-twice.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; This test checks that we do not outline functions that are marked as returns
+; twice, since these can alter the frame of the function and affect how the
+; outliner behaves, causing miscompiles.
+
+; Function Attrs: optsize returns_twice
+declare i32 @setjmp(i32*) local_unnamed_addr #1
+@tmp_jmpb = global [37 x i32] zeroinitializer, align 16
+
+define void @function1() {
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 2, i32* [[A]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[B]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[C]], align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @setjmp(i32* getelementptr inbounds ([37 x i32], [37 x i32]* @tmp_jmpb, i64 0, i64 0))
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %call = call i32 @setjmp(i32* getelementptr inbounds ([37 x i32], [37 x i32]* @tmp_jmpb, i64 0, i64 0))
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+define void @function2() {
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 2, i32* [[A]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[B]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[C]], align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @setjmp(i32* getelementptr inbounds ([37 x i32], [37 x i32]* @tmp_jmpb, i64 0, i64 0))
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  %call = call i32 @setjmp(i32* getelementptr inbounds ([37 x i32], [37 x i32]* @tmp_jmpb, i64 0, i64 0))
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+attributes #1 = { optsize returns_twice }

From c39d22d1968cf07e54b5816ba76dccef8acaace1 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@google.com>
Date: Tue, 25 Jan 2022 09:37:14 -0800
Subject: [PATCH 597/946] [CMake] Set sanitizer test C++ library on Linux

We always want to use the in-tree libc++ for tests.

Differential Revision: https://reviews.llvm.org/D118161
---
 clang/cmake/caches/Fuchsia-stage2.cmake | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index aed39d0ca9fff..d998db64d87ab 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -134,6 +134,8 @@ foreach(target aarch64-unknown-linux-gnu;armv7-unknown-linux-gnueabihf;i386-unkn
     set(RUNTIMES_${target}_LLVM_ENABLE_ASSERTIONS OFF CACHE BOOL "")
     set(RUNTIMES_${target}_SANITIZER_CXX_ABI "libc++" CACHE STRING "")
     set(RUNTIMES_${target}_SANITIZER_CXX_ABI_INTREE ON CACHE BOOL "")
+    set(RUNTIMES_${target}_SANITIZER_TEST_CXX "libc++" CACHE STRING "")
+    set(RUNTIMES_${target}_SANITIZER_TEST_CXX_INTREE ON CACHE BOOL "")
     set(RUNTIMES_${target}_COMPILER_RT_TEST_COMPILER_CFLAGS "--unwindlib=libunwind -static-libgcc" CACHE STRING "")
     set(RUNTIMES_${target}_SANITIZER_COMMON_TEST_TARGET_CFLAGS "--unwindlib=libunwind -static-libgcc" CACHE STRING "")
     set(RUNTIMES_${target}_TSAN_TEST_TARGET_CFLAGS "--unwindlib=libunwind -static-libgcc" CACHE STRING "")

From 0ad19a833177861be55fefaff725ab89c8695d01 Mon Sep 17 00:00:00 2001
From: JackAKirk <jack.kirk@codeplay.com>
Date: Mon, 24 Jan 2022 12:32:36 -0800
Subject: [PATCH 598/946] [CUDA,NVPTX] Corrected fragment size for tf32 LD B
 matrix.

Signed-off-by: JackAKirk <jack.kirk@codeplay.com>

Reviewed By: tra

Differential Revision: https://reviews.llvm.org/D118023
---
 clang/lib/CodeGen/CGBuiltin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index cd35e7cbe76f7..a80a55e054a3b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17190,7 +17190,7 @@ static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
   case NVPTX::BI__mma_tf32_m16n16k8_ld_a:
     return MMA_LDST(4, m16n16k8_load_a_tf32);
   case NVPTX::BI__mma_tf32_m16n16k8_ld_b:
-    return MMA_LDST(2, m16n16k8_load_b_tf32);
+    return MMA_LDST(4, m16n16k8_load_b_tf32);
   case NVPTX::BI__mma_tf32_m16n16k8_ld_c:
     return MMA_LDST(8, m16n16k8_load_c_f32);
 

From ea1ac183f47ebc03051e63f76c4cb3ad3c5b9f3a Mon Sep 17 00:00:00 2001
From: MaheshRavishankar <ravishankarm@google.com>
Date: Tue, 25 Jan 2022 10:58:53 -0800
Subject: [PATCH 599/946] [mlir][Linalg] Fix incorrect fusion with reshape ops
 by linearization.

Fusion of reshape ops by linearization incorrectly inverted the
indexing map before linearizing dimensions. This leads to incorrect
indexing maps used in the fused operation.

Differential Revision: https://reviews.llvm.org/D117908
---
 .../Linalg/Transforms/ElementwiseOpFusion.cpp | 12 ++--
 .../Linalg/reshape_linearization_fusion.mlir  | 58 +++++++++++++++++++
 2 files changed, 62 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
index aaa5d4c386208..6532343830255 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
@@ -915,11 +915,9 @@ struct FoldProducerReshapeOpByLinearization
       // the operands of the consumers that arent fused are the same.
       SmallVector<AffineMap> fusedIndexMaps = genericOp.getIndexingMaps();
 
-      // Accepted consumer maps are either identity or permutation.
-      auto invMap = inversePermutation(fusedIndexMaps[en.index()]);
-
       // Compute the indexing map to use for the result of the producer.
-      AffineMap modifiedMap = linearizeCollapsedDims(invMap, reshapeOp);
+      AffineMap modifiedMap =
+          linearizeCollapsedDims(fusedIndexMaps[en.index()], reshapeOp);
       // The modified map cannot have symbols.
       if (modifiedMap.getNumSymbols())
         return failure();
@@ -1166,11 +1164,9 @@ struct FoldConsumerReshapeOpByLinearization
     // those for the operands of the producer.
     SmallVector<AffineMap> fusedIndexMaps = producer.getIndexingMaps();
 
-    auto invMap = inversePermutation(
-        producer.getTiedIndexingMap(producer.getOutputOperand(0)));
-
     // Compute the indexing map to use for the operand of the producer.
-    AffineMap modifiedMap = linearizeCollapsedDims(invMap, reshapeOp);
+    AffineMap modifiedMap = linearizeCollapsedDims(
+        producer.getTiedIndexingMap(producer.getOutputOperand(0)), reshapeOp);
     for (AffineExpr expr : modifiedMap.getResults()) {
       if (!expr.isPureAffine()) {
         return rewriter.notifyMatchFailure(
diff --git a/mlir/test/Dialect/Linalg/reshape_linearization_fusion.mlir b/mlir/test/Dialect/Linalg/reshape_linearization_fusion.mlir
index e2ec8ffa5c3c7..37edb04a7f587 100644
--- a/mlir/test/Dialect/Linalg/reshape_linearization_fusion.mlir
+++ b/mlir/test/Dialect/Linalg/reshape_linearization_fusion.mlir
@@ -1,5 +1,11 @@
 // RUN: mlir-opt -split-input-file -linalg-fold-reshape-ops-by-linearization %s | FileCheck %s
 
+// Note: These tests fuse the reshape ops by linearization. This can create
+// indexing maps which are hard to analyse later on. These patterns are useful
+// only if the folded dimensions in the reshape op are unit extent. Tests here
+// are more general for testing purposes, but use of these pattern for non-unit
+// dimensions should be deprecated.
+
 #map0 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
 func @generic_op_reshape_producer_fusion(%arg0 : tensor<?x?x?xi32>)
   -> tensor<?x?x4x?xi32> {
@@ -227,3 +233,55 @@ func @generic_op_permultation_reshape_consumer_fusion_unused_dim(%arg0 : tensor<
 //  CHECK-SAME:     indexing_maps = [#[[MAP0]], #[[MAP1]]]
 //  CHECK-SAME:     ins(%[[ARG0]] : tensor<6x1xf32>)
 //  CHECK-SAME:     outs(%[[T1]] : tensor<6xi32>)
+
+// -----
+
+#map0 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d2, d4, d0, d6, d3, d5, d1)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4, d5, d6)>
+func @permuted_dims_fusion_expand_shape(%arg0 : tensor<3x8x7x240xf32>) -> tensor<4x6x3x8x2x5x7xf32> {
+  %0 = tensor.expand_shape %arg0 [[0], [1, 2], [3], [4, 5, 6]]
+      : tensor<3x8x7x240xf32> into tensor<3x2x4x7x8x5x6xf32>
+  %1 = linalg.init_tensor [4, 6, 3, 8, 2, 5, 7] : tensor<4x6x3x8x2x5x7xf32>
+  %2 = linalg.generic {
+      indexing_maps = [#map0, #map1],
+      iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
+      ins(%0 : tensor<3x2x4x7x8x5x6xf32>) outs(%1 : tensor<4x6x3x8x2x5x7xf32>) {
+      ^bb0(%arg1 : f32, %arg2 : f32):
+        linalg.yield %arg1 : f32
+      } -> tensor<4x6x3x8x2x5x7xf32>
+  return %2 : tensor<4x6x3x8x2x5x7xf32>
+}
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d2, d0 + d4 * 4, d6, d1 + d3 * 30 + d5 * 6)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4, d5, d6)>
+//      CHECK: func @permuted_dims_fusion_expand_shape(
+// CHECK-SAME:     %[[ARG0:.+]]: tensor<3x8x7x240xf32>)
+//      CHECK:   %[[RESULT:.+]] = linalg.generic
+// CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
+// CHECK-SAME:       ins(%[[ARG0]] :
+//      CHECK:   return %[[RESULT]]
+
+// -----
+
+#map0 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d2, d4, d0, d6, d3, d5, d1)>
+#map1 = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4, d5, d6)>
+func @permuted_dims_fusion_collapse_shape(%arg0 : tensor<4x6x3x8x2x5x7xf32>) -> tensor<3x8x7x240xf32> {
+  %0 = linalg.init_tensor [3, 2, 4, 7, 8, 5, 6] : tensor<3x2x4x7x8x5x6xf32>
+  %1 = linalg.generic {
+      indexing_maps = [#map1, #map0],
+      iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "parallel"]}
+      ins(%arg0 : tensor<4x6x3x8x2x5x7xf32>) outs(%0 : tensor<3x2x4x7x8x5x6xf32>) {
+      ^bb0(%arg1 : f32, %arg2 : f32):
+        linalg.yield %arg1 : f32
+      } -> tensor<3x2x4x7x8x5x6xf32>
+  %2 = tensor.collapse_shape %1 [[0], [1, 2], [3], [4, 5, 6]]
+      : tensor<3x2x4x7x8x5x6xf32> into tensor<3x8x7x240xf32>
+  return %2 : tensor<3x8x7x240xf32>
+}
+//  CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d0, d1, d2, d3, d4, d5, d6)>
+//  CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6) -> (d2, d0 + d4 * 4, d6, d1 + d3 * 30 + d5 * 6)>
+//      CHECK: func @permuted_dims_fusion_collapse_shape(
+// CHECK-SAME:     %[[ARG0:.+]]: tensor<4x6x3x8x2x5x7xf32>)
+//      CHECK:   %[[RESULT:.+]] = linalg.generic
+// CHECK-SAME:       indexing_maps = [#[[MAP0]], #[[MAP1]]]
+// CHECK-SAME:       ins(%[[ARG0]] :
+//      CHECK:   return %[[RESULT]]

From 0944c196c58f62299540983e2d10cd7bef60691a Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm@google.com>
Date: Wed, 29 Dec 2021 15:46:43 +0000
Subject: [PATCH 600/946] [libTooling] Adds more support for constructing
 object access expressions.

This patch adds a `buildAccess` function, which constructs a string with the
proper operator to use based on the expression's form and type. It also adds two
predicates related to smart pointers, which are needed by `buildAccess` but are
also of general value.

We deprecate `buildDot` and `buildArrow` in favor of the more general
`buildAccess`. These will be removed in a future patch.

Differential Revision: https://reviews.llvm.org/D116377
---
 .../Tooling/Transformer/SourceCodeBuilders.h  |  35 ++++
 .../Transformer/SourceCodeBuilders.cpp        |  83 +++++++-
 clang/lib/Tooling/Transformer/Stencil.cpp     |  72 +------
 .../Tooling/SourceCodeBuildersTest.cpp        | 191 +++++++++++++++++-
 clang/unittests/Tooling/StencilTest.cpp       |  42 ++--
 5 files changed, 323 insertions(+), 100 deletions(-)

diff --git a/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h b/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h
index b6d9bd0e2d5d3..ab0eb71ef44e2 100644
--- a/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h
+++ b/clang/include/clang/Tooling/Transformer/SourceCodeBuilders.h
@@ -43,6 +43,15 @@ inline bool needParensBeforeDotOrArrow(const Expr &E) {
 /// Determines whether printing this expression to the right of a unary operator
 /// requires a parentheses to preserve its meaning.
 bool needParensAfterUnaryOperator(const Expr &E);
+
+// Recognizes known types (and sugared versions thereof) that overload the `*`
+// and `->` operator. Below is the list of currently included types, but it is
+// subject to change:
+//
+// * std::unique_ptr, std::shared_ptr, std::weak_ptr,
+// * std::optional, absl::optional, llvm::Optional,
+// * absl::StatusOr, llvm::Expected.
+bool isKnownPointerLikeType(QualType Ty, ASTContext &Context);
 /// @}
 
 /// \name Basic code-string generation utilities.
@@ -69,6 +78,8 @@ llvm::Optional<std::string> buildAddressOf(const Expr &E,
 ///  `x` becomes `x.`
 ///  `*a` becomes `a->`
 ///  `a+b` becomes `(a+b).`
+///
+/// DEPRECATED. Use `buildAccess`.
 llvm::Optional<std::string> buildDot(const Expr &E, const ASTContext &Context);
 
 /// Adds an arrow to the end of the given expression, but adds parentheses
@@ -77,8 +88,32 @@ llvm::Optional<std::string> buildDot(const Expr &E, const ASTContext &Context);
 ///  `x` becomes `x->`
 ///  `&a` becomes `a.`
 ///  `a+b` becomes `(a+b)->`
+///
+/// DEPRECATED. Use `buildAccess`.
 llvm::Optional<std::string> buildArrow(const Expr &E,
                                        const ASTContext &Context);
+
+/// Specifies how to classify pointer-like types -- like values or like pointers
+/// -- with regard to generating member-access syntax.
+enum class PLTClass : bool {
+  Value,
+  Pointer,
+};
+
+/// Adds an appropriate access operator (`.`, `->` or nothing, in the case of
+/// implicit `this`) to the end of the given expression. Adds parentheses when
+/// needed by the syntax and simplifies when possible. If `PLTypeClass` is
+/// `Pointer`, for known pointer-like types (see `isKnownPointerLikeType`),
+/// treats `operator->` and `operator*` like the built-in `->` and `*`
+/// operators.
+///
+///  `x` becomes `x->` or `x.`, depending on `E`'s type
+///  `a+b` becomes `(a+b)->` or `(a+b).`, depending on `E`'s type
+///  `&a` becomes `a.`
+///  `*a` becomes `a->`
+llvm::Optional<std::string>
+buildAccess(const Expr &E, ASTContext &Context,
+            PLTClass Classification = PLTClass::Pointer);
 /// @}
 
 } // namespace tooling
diff --git a/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp b/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp
index a1c99b60216b7..65ade4387a5eb 100644
--- a/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp
+++ b/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp
@@ -10,6 +10,8 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Tooling/Transformer/SourceCode.h"
 #include "llvm/ADT/Twine.h"
 #include <string>
@@ -60,6 +62,16 @@ bool tooling::needParensAfterUnaryOperator(const Expr &E) {
   return false;
 }
 
+bool tooling::isKnownPointerLikeType(QualType Ty, ASTContext &Context) {
+  using namespace ast_matchers;
+  const auto PointerLikeTy = type(hasUnqualifiedDesugaredType(
+      recordType(hasDeclaration(cxxRecordDecl(hasAnyName(
+          "::std::unique_ptr", "::std::shared_ptr", "::std::weak_ptr",
+          "::std::optional", "::absl::optional", "::llvm::Optional",
+          "absl::StatusOr", "::llvm::Expected"))))));
+  return match(PointerLikeTy, Ty, Context).size() > 0;
+}
+
 llvm::Optional<std::string> tooling::buildParens(const Expr &E,
                                                  const ASTContext &Context) {
   StringRef Text = getText(E, Context);
@@ -114,8 +126,10 @@ llvm::Optional<std::string> tooling::buildAddressOf(const Expr &E,
   return ("&" + Text).str();
 }
 
-llvm::Optional<std::string> tooling::buildDot(const Expr &E,
-                                              const ASTContext &Context) {
+// Append the appropriate access operation (syntactically) to `E`, assuming `E`
+// is a non-pointer value.
+static llvm::Optional<std::string>
+buildAccessForValue(const Expr &E, const ASTContext &Context) {
   if (const auto *Op = llvm::dyn_cast<UnaryOperator>(&E))
     if (Op->getOpcode() == UO_Deref) {
       // Strip leading '*', add following '->'.
@@ -138,8 +152,10 @@ llvm::Optional<std::string> tooling::buildDot(const Expr &E,
   return (Text + ".").str();
 }
 
-llvm::Optional<std::string> tooling::buildArrow(const Expr &E,
-                                                const ASTContext &Context) {
+// Append the appropriate access operation (syntactically) to `E`, assuming `E`
+// is a pointer value.
+static llvm::Optional<std::string>
+buildAccessForPointer(const Expr &E, const ASTContext &Context) {
   if (const auto *Op = llvm::dyn_cast<UnaryOperator>(&E))
     if (Op->getOpcode() == UO_AddrOf) {
       // Strip leading '&', add following '.'.
@@ -160,3 +176,62 @@ llvm::Optional<std::string> tooling::buildArrow(const Expr &E,
     return ("(" + Text + ")->").str();
   return (Text + "->").str();
 }
+
+llvm::Optional<std::string> tooling::buildDot(const Expr &E,
+                                              const ASTContext &Context) {
+  return buildAccessForValue(E, Context);
+}
+
+llvm::Optional<std::string> tooling::buildArrow(const Expr &E,
+                                                const ASTContext &Context) {
+  return buildAccessForPointer(E, Context);
+}
+
+// If `E` is an overloaded-operator call of kind `K` on an object `O`, returns
+// `O`. Otherwise, returns `nullptr`.
+static const Expr *maybeGetOperatorObjectArg(const Expr &E,
+                                             OverloadedOperatorKind K) {
+  if (const auto *OpCall = dyn_cast<clang::CXXOperatorCallExpr>(&E)) {
+    if (OpCall->getOperator() == K && OpCall->getNumArgs() == 1)
+      return OpCall->getArg(0);
+  }
+  return nullptr;
+}
+
+static bool treatLikePointer(QualType Ty, PLTClass C, ASTContext &Context) {
+  switch (C) {
+  case PLTClass::Value:
+    return false;
+  case PLTClass::Pointer:
+    return isKnownPointerLikeType(Ty, Context);
+  }
+}
+
+// FIXME: move over the other `maybe` functionality from Stencil. Should all be
+// in one place.
+llvm::Optional<std::string> tooling::buildAccess(const Expr &RawExpression,
+                                                 ASTContext &Context,
+                                                 PLTClass Classification) {
+  if (RawExpression.isImplicitCXXThis())
+    // Return the empty string, because `None` signifies some sort of failure.
+    return std::string();
+
+  const Expr *E = RawExpression.IgnoreImplicitAsWritten();
+
+  if (E->getType()->isAnyPointerType() ||
+      treatLikePointer(E->getType(), Classification, Context)) {
+    // Strip off operator-> calls. They can only occur inside an actual arrow
+    // member access, so we treat them as equivalent to an actual object
+    // expression.
+    if (const auto *Obj = maybeGetOperatorObjectArg(*E, clang::OO_Arrow))
+      E = Obj;
+    return buildAccessForPointer(*E, Context);
+  }
+
+  if (const auto *Obj = maybeGetOperatorObjectArg(*E, clang::OO_Star)) {
+    if (treatLikePointer(Obj->getType(), Classification, Context))
+      return buildAccessForPointer(*Obj, Context);
+  };
+
+  return buildAccessForValue(*E, Context);
+}
diff --git a/clang/lib/Tooling/Transformer/Stencil.cpp b/clang/lib/Tooling/Transformer/Stencil.cpp
index 8b20ef34c3ff2..348d04dbaf4a3 100644
--- a/clang/lib/Tooling/Transformer/Stencil.cpp
+++ b/clang/lib/Tooling/Transformer/Stencil.cpp
@@ -11,7 +11,6 @@
 #include "clang/AST/ASTTypeTraits.h"
 #include "clang/AST/Expr.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
-#include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Lex/Lexer.h"
 #include "clang/Tooling/Transformer/SourceCode.h"
@@ -56,39 +55,6 @@ static Error printNode(StringRef Id, const MatchFinder::MatchResult &Match,
   return Error::success();
 }
 
-// FIXME: Consider memoizing this function using the `ASTContext`.
-static bool isSmartPointerType(QualType Ty, ASTContext &Context) {
-  using namespace ::clang::ast_matchers;
-
-  // Optimization: hard-code common smart-pointer types. This can/should be
-  // removed if we start caching the results of this function.
-  auto KnownSmartPointer =
-      cxxRecordDecl(hasAnyName("::std::unique_ptr", "::std::shared_ptr"));
-  const auto QuacksLikeASmartPointer = cxxRecordDecl(
-      hasMethod(cxxMethodDecl(hasOverloadedOperatorName("->"),
-                              returns(qualType(pointsTo(type()))))),
-      hasMethod(cxxMethodDecl(hasOverloadedOperatorName("*"),
-                              returns(qualType(references(type()))))));
-  const auto SmartPointer = qualType(hasDeclaration(
-      cxxRecordDecl(anyOf(KnownSmartPointer, QuacksLikeASmartPointer))));
-  return match(SmartPointer, Ty, Context).size() > 0;
-}
-
-// Identifies use of `operator*` on smart pointers, and returns the underlying
-// smart-pointer expression; otherwise, returns null.
-static const Expr *isSmartDereference(const Expr &E, ASTContext &Context) {
-  using namespace ::clang::ast_matchers;
-
-  const auto HasOverloadedArrow = cxxRecordDecl(hasMethod(cxxMethodDecl(
-      hasOverloadedOperatorName("->"), returns(qualType(pointsTo(type()))))));
-  // Verify it is a smart pointer by finding `operator->` in the class
-  // declaration.
-  auto Deref = cxxOperatorCallExpr(
-      hasOverloadedOperatorName("*"), hasUnaryOperand(expr().bind("arg")),
-      callee(cxxMethodDecl(ofClass(HasOverloadedArrow))));
-  return selectFirst<Expr>("arg", match(Deref, E, Context));
-}
-
 namespace {
 // An arbitrary fragment of code within a stencil.
 class RawTextStencil : public StencilInterface {
@@ -196,7 +162,7 @@ class UnaryOperationStencil : public StencilInterface {
       break;
     case UnaryNodeOperator::MaybeDeref:
       if (E->getType()->isAnyPointerType() ||
-          isSmartPointerType(E->getType(), *Match.Context)) {
+          tooling::isKnownPointerLikeType(E->getType(), *Match.Context)) {
         // Strip off any operator->. This can only occur inside an actual arrow
         // member access, so we treat it as equivalent to an actual object
         // expression.
@@ -216,7 +182,7 @@ class UnaryOperationStencil : public StencilInterface {
       break;
     case UnaryNodeOperator::MaybeAddressOf:
       if (E->getType()->isAnyPointerType() ||
-          isSmartPointerType(E->getType(), *Match.Context)) {
+          tooling::isKnownPointerLikeType(E->getType(), *Match.Context)) {
         // Strip off any operator->. This can only occur inside an actual arrow
         // member access, so we treat it as equivalent to an actual object
         // expression.
@@ -311,34 +277,12 @@ class AccessStencil : public StencilInterface {
     if (E == nullptr)
       return llvm::make_error<StringError>(errc::invalid_argument,
                                            "Id not bound: " + BaseId);
-    if (!E->isImplicitCXXThis()) {
-      llvm::Optional<std::string> S;
-      if (E->getType()->isAnyPointerType() ||
-          isSmartPointerType(E->getType(), *Match.Context)) {
-        // Strip off any operator->. This can only occur inside an actual arrow
-        // member access, so we treat it as equivalent to an actual object
-        // expression.
-        if (const auto *OpCall = dyn_cast<clang::CXXOperatorCallExpr>(E)) {
-          if (OpCall->getOperator() == clang::OO_Arrow &&
-              OpCall->getNumArgs() == 1) {
-            E = OpCall->getArg(0);
-          }
-        }
-        S = tooling::buildArrow(*E, *Match.Context);
-      } else if (const auto *Operand = isSmartDereference(*E, *Match.Context)) {
-        // `buildDot` already handles the built-in dereference operator, so we
-        // only need to catch overloaded `operator*`.
-        S = tooling::buildArrow(*Operand, *Match.Context);
-      } else {
-        S = tooling::buildDot(*E, *Match.Context);
-      }
-      if (S.hasValue())
-        *Result += *S;
-      else
-        return llvm::make_error<StringError>(
-            errc::invalid_argument,
-            "Could not construct object text from ID: " + BaseId);
-    }
+    llvm::Optional<std::string> S = tooling::buildAccess(*E, *Match.Context);
+    if (!S.hasValue())
+      return llvm::make_error<StringError>(
+          errc::invalid_argument,
+          "Could not construct object text from ID: " + BaseId);
+    *Result += *S;
     return Member->eval(Match, Result);
   }
 };
diff --git a/clang/unittests/Tooling/SourceCodeBuildersTest.cpp b/clang/unittests/Tooling/SourceCodeBuildersTest.cpp
index ce99d1e7f5217..50167881e5cd6 100644
--- a/clang/unittests/Tooling/SourceCodeBuildersTest.cpp
+++ b/clang/unittests/Tooling/SourceCodeBuildersTest.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Tooling/Transformer/SourceCodeBuilders.h"
+#include "clang/AST/Type.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Tooling/Tooling.h"
@@ -24,8 +25,23 @@ using llvm::ValueIs;
 
 // Create a valid translation unit from a statement.
 static std::string wrapSnippet(StringRef StatementCode) {
-  return ("struct S { S(); S(int); int field; };\n"
+  return ("namespace std {\n"
+          "template <typename T> struct unique_ptr {\n"
+          "  T* operator->() const;\n"
+          "  T& operator*() const;\n"
+          "};\n"
+          "template <typename T> struct shared_ptr {\n"
+          "  T* operator->() const;\n"
+          "  T& operator*() const;\n"
+          "};\n"
+          "}\n"
+          "struct A { void super(); };\n"
+          "struct S : public A { S(); S(int); int Field; };\n"
           "S operator+(const S &a, const S &b);\n"
+          "struct Smart {\n"
+          "  S* operator->() const;\n"
+          "  S& operator*() const;\n"
+          "};\n"
           "auto test_snippet = []{" +
           StatementCode + "};")
       .str();
@@ -51,7 +67,8 @@ struct TestMatch {
 // `StatementCode` may contain other statements not described by `Matcher`.
 static llvm::Optional<TestMatch> matchStmt(StringRef StatementCode,
                                            StatementMatcher Matcher) {
-  auto AstUnit = buildASTFromCode(wrapSnippet(StatementCode));
+  auto AstUnit = buildASTFromCodeWithArgs(wrapSnippet(StatementCode),
+                                          {"-Wno-unused-value"});
   if (AstUnit == nullptr) {
     ADD_FAILURE() << "AST construction failed";
     return llvm::None;
@@ -95,7 +112,7 @@ TEST(SourceCodeBuildersTest, needParensAfterUnaryOperator) {
   testPredicate(needParensAfterUnaryOperator, "int(3.0);", false);
   testPredicate(needParensAfterUnaryOperator, "void f(); f();", false);
   testPredicate(needParensAfterUnaryOperator, "int a[3]; a[0];", false);
-  testPredicate(needParensAfterUnaryOperator, "S x; x.field;", false);
+  testPredicate(needParensAfterUnaryOperator, "S x; x.Field;", false);
   testPredicate(needParensAfterUnaryOperator, "int x = 1; --x;", false);
   testPredicate(needParensAfterUnaryOperator, "int x = 1; -x;", false);
 }
@@ -117,7 +134,7 @@ TEST(SourceCodeBuildersTest, mayEverNeedParens) {
   testPredicate(mayEverNeedParens, "int(3.0);", false);
   testPredicate(mayEverNeedParens, "void f(); f();", false);
   testPredicate(mayEverNeedParens, "int a[3]; a[0];", false);
-  testPredicate(mayEverNeedParens, "S x; x.field;", false);
+  testPredicate(mayEverNeedParens, "S x; x.Field;", false);
 }
 
 TEST(SourceCodeBuildersTest, mayEverNeedParensInImplictConversion) {
@@ -126,6 +143,50 @@ TEST(SourceCodeBuildersTest, mayEverNeedParensInImplictConversion) {
   testPredicateOnArg(mayEverNeedParens, "void f(S); f(3 + 5);", true);
 }
 
+TEST(SourceCodeBuildersTest, isKnownPointerLikeTypeUniquePtr) {
+  std::string Snippet = "std::unique_ptr<int> P; P;";
+  auto StmtMatch =
+      matchStmt(Snippet, declRefExpr(hasType(qualType().bind("ty"))));
+  ASSERT_TRUE(StmtMatch) << "Snippet: " << Snippet;
+  EXPECT_TRUE(
+      isKnownPointerLikeType(*StmtMatch->Result.Nodes.getNodeAs<QualType>("ty"),
+                             *StmtMatch->Result.Context))
+      << "Snippet: " << Snippet;
+}
+
+TEST(SourceCodeBuildersTest, isKnownPointerLikeTypeSharedPtr) {
+  std::string Snippet = "std::shared_ptr<int> P; P;";
+  auto StmtMatch =
+      matchStmt(Snippet, declRefExpr(hasType(qualType().bind("ty"))));
+  ASSERT_TRUE(StmtMatch) << "Snippet: " << Snippet;
+  EXPECT_TRUE(
+      isKnownPointerLikeType(*StmtMatch->Result.Nodes.getNodeAs<QualType>("ty"),
+                             *StmtMatch->Result.Context))
+      << "Snippet: " << Snippet;
+}
+
+TEST(SourceCodeBuildersTest, isKnownPointerLikeTypeUnknownTypeFalse) {
+  std::string Snippet = "Smart P; P;";
+  auto StmtMatch =
+      matchStmt(Snippet, declRefExpr(hasType(qualType().bind("ty"))));
+  ASSERT_TRUE(StmtMatch) << "Snippet: " << Snippet;
+  EXPECT_FALSE(
+      isKnownPointerLikeType(*StmtMatch->Result.Nodes.getNodeAs<QualType>("ty"),
+                             *StmtMatch->Result.Context))
+      << "Snippet: " << Snippet;
+}
+
+TEST(SourceCodeBuildersTest, isKnownPointerLikeTypeNormalTypeFalse) {
+  std::string Snippet = "int *P; P;";
+  auto StmtMatch =
+      matchStmt(Snippet, declRefExpr(hasType(qualType().bind("ty"))));
+  ASSERT_TRUE(StmtMatch) << "Snippet: " << Snippet;
+  EXPECT_FALSE(
+      isKnownPointerLikeType(*StmtMatch->Result.Nodes.getNodeAs<QualType>("ty"),
+                             *StmtMatch->Result.Context))
+      << "Snippet: " << Snippet;
+}
+
 static void testBuilder(
     llvm::Optional<std::string> (*Builder)(const Expr &, const ASTContext &),
     StringRef Snippet, StringRef Expected) {
@@ -136,6 +197,15 @@ static void testBuilder(
               ValueIs(std::string(Expected)));
 }
 
+static void testBuildAccess(StringRef Snippet, StringRef Expected,
+                            PLTClass C = PLTClass::Pointer) {
+  auto StmtMatch = matchStmt(Snippet, expr().bind("expr"));
+  ASSERT_TRUE(StmtMatch);
+  EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs<Expr>("expr"),
+                          *StmtMatch->Result.Context, C),
+              ValueIs(std::string(Expected)));
+}
+
 TEST(SourceCodeBuildersTest, BuildParensUnaryOp) {
   testBuilder(buildParens, "-4;", "(-4)");
 }
@@ -245,4 +315,117 @@ TEST(SourceCodeBuildersTest, BuildArrowBinaryOperation) {
 TEST(SourceCodeBuildersTest, BuildArrowValueAddressWithParens) {
   testBuilder(buildArrow, "S x; &(true ? x : x);", "(true ? x : x).");
 }
+
+TEST(SourceCodeBuildersTest, BuildAccessValue) {
+  testBuildAccess("S x; x;", "x.");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessPointerDereference) {
+  testBuildAccess("S *x; *x;", "x->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessPointerDereferenceIgnoresParens) {
+  testBuildAccess("S *x; *(x);", "x->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessValueBinaryOperation) {
+  testBuildAccess("S x; x + x;", "(x + x).");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessPointerDereferenceExprWithParens) {
+  testBuildAccess("S *x; *(x + 1);", "(x + 1)->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessPointer) {
+  testBuildAccess("S *x; x;", "x->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessValueAddress) {
+  testBuildAccess("S x; &x;", "x.");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessValueAddressIgnoresParens) {
+  testBuildAccess("S x; &(x);", "x.");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessPointerBinaryOperation) {
+  testBuildAccess("S *x; x + 1;", "(x + 1)->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessValueAddressWithParens) {
+  testBuildAccess("S x; &(true ? x : x);", "(true ? x : x).");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessSmartPointer) {
+  testBuildAccess("std::unique_ptr<int> x; x;", "x->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessSmartPointerAsValue) {
+  testBuildAccess("std::unique_ptr<int> x; x;", "x.", PLTClass::Value);
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessSmartPointerDeref) {
+  testBuildAccess("std::unique_ptr<int> x; *x;", "x->");
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessSmartPointerDerefAsValue) {
+  testBuildAccess("std::unique_ptr<int> x; *x;", "(*x).", PLTClass::Value);
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessSmartPointerMemberCall) {
+  StringRef Snippet = R"cc(
+    Smart x;
+    x->Field;
+  )cc";
+  auto StmtMatch =
+      matchStmt(Snippet, memberExpr(hasObjectExpression(expr().bind("expr"))));
+  ASSERT_TRUE(StmtMatch);
+  EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs<Expr>("expr"),
+                          *StmtMatch->Result.Context),
+              ValueIs(std::string("x->")));
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessIgnoreImplicit) {
+  StringRef Snippet = R"cc(
+    S x;
+    A *a;
+    a = &x;
+  )cc";
+  auto StmtMatch =
+      matchStmt(Snippet, binaryOperator(isAssignmentOperator(),
+                                        hasRHS(expr().bind("expr"))));
+  ASSERT_TRUE(StmtMatch);
+  EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs<Expr>("expr"),
+                          *StmtMatch->Result.Context),
+              ValueIs(std::string("x.")));
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessImplicitThis) {
+  StringRef Snippet = R"cc(
+    struct Struct {
+      void foo() {}
+      void bar() {
+        foo();
+      }
+    };
+  )cc";
+  auto StmtMatch = matchStmt(
+      Snippet,
+      cxxMemberCallExpr(onImplicitObjectArgument(cxxThisExpr().bind("expr"))));
+  ASSERT_TRUE(StmtMatch);
+  EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs<Expr>("expr"),
+                          *StmtMatch->Result.Context),
+              ValueIs(std::string()));
+}
+
+TEST(SourceCodeBuildersTest, BuildAccessImplicitThisIgnoreImplicitCasts) {
+  StringRef Snippet = "struct B : public A { void f() { super(); } };";
+  auto StmtMatch = matchStmt(
+      Snippet,
+      cxxMemberCallExpr(onImplicitObjectArgument(expr().bind("expr"))));
+  ASSERT_TRUE(StmtMatch);
+  EXPECT_THAT(buildAccess(*StmtMatch->Result.Nodes.getNodeAs<Expr>("expr"),
+                          *StmtMatch->Result.Context),
+              ValueIs(std::string()));
+}
 } // namespace
diff --git a/clang/unittests/Tooling/StencilTest.cpp b/clang/unittests/Tooling/StencilTest.cpp
index 6036f8f4fa381..28cc8281485a8 100644
--- a/clang/unittests/Tooling/StencilTest.cpp
+++ b/clang/unittests/Tooling/StencilTest.cpp
@@ -36,10 +36,13 @@ static std::string wrapSnippet(StringRef ExtraPreface,
     namespace N { class C {}; }
     namespace { class AnonC {}; }
     struct S { int Field; };
-    struct Smart {
-      S* operator->() const;
-      S& operator*() const;
+    namespace std {
+    template <typename T>
+    struct unique_ptr {
+      T* operator->() const;
+      T& operator*() const;
     };
+    }
   )cc";
   return (Preface + ExtraPreface + "auto stencil_test_snippet = []{" +
           StatementCode + "};")
@@ -326,32 +329,15 @@ TEST_F(StencilTest, MaybeDerefAddressExpr) {
 TEST_F(StencilTest, MaybeDerefSmartPointer) {
   StringRef Id = "id";
   std::string Snippet = R"cc(
-    Smart x;
+    std::unique_ptr<S> x;
     x;
   )cc";
   testExpr(Id, Snippet, maybeDeref(Id), "*x");
 }
 
-// Tests that unique_ptr specifically is handled.
-TEST_F(StencilTest, MaybeDerefSmartPointerUniquePtr) {
-  StringRef Id = "id";
-  // We deliberately specify `unique_ptr` as empty to verify that it matches
-  // because of its name, rather than its contents.
-  StringRef ExtraPreface =
-      "namespace std { template <typename T> class unique_ptr {}; }\n";
-  StringRef Snippet = R"cc(
-    std::unique_ptr<int> x;
-    x;
-  )cc";
-  auto StmtMatch = matchStmt(Snippet, expr().bind(Id), ExtraPreface);
-  ASSERT_TRUE(StmtMatch);
-  EXPECT_THAT_EXPECTED(maybeDeref(Id)->eval(StmtMatch->Result),
-                       HasValue(std::string("*x")));
-}
-
 TEST_F(StencilTest, MaybeDerefSmartPointerFromMemberExpr) {
   StringRef Id = "id";
-  std::string Snippet = "Smart x; x->Field;";
+  std::string Snippet = "std::unique_ptr<S> x; x->Field;";
   auto StmtMatch =
       matchStmt(Snippet, memberExpr(hasObjectExpression(expr().bind(Id))));
   ASSERT_TRUE(StmtMatch);
@@ -381,12 +367,12 @@ TEST_F(StencilTest, MaybeAddressOfDerefExpr) {
 
 TEST_F(StencilTest, MaybeAddressOfSmartPointer) {
   StringRef Id = "id";
-  testExpr(Id, "Smart x; x;", maybeAddressOf(Id), "x");
+  testExpr(Id, "std::unique_ptr<S> x; x;", maybeAddressOf(Id), "x");
 }
 
 TEST_F(StencilTest, MaybeAddressOfSmartPointerFromMemberCall) {
   StringRef Id = "id";
-  std::string Snippet = "Smart x; x->Field;";
+  std::string Snippet = "std::unique_ptr<S> x; x->Field;";
   auto StmtMatch =
       matchStmt(Snippet, memberExpr(hasObjectExpression(expr().bind(Id))));
   ASSERT_TRUE(StmtMatch);
@@ -396,7 +382,7 @@ TEST_F(StencilTest, MaybeAddressOfSmartPointerFromMemberCall) {
 
 TEST_F(StencilTest, MaybeAddressOfSmartPointerDerefNoCancel) {
   StringRef Id = "id";
-  testExpr(Id, "Smart x; *x;", maybeAddressOf(Id), "&*x");
+  testExpr(Id, "std::unique_ptr<S> x; *x;", maybeAddressOf(Id), "&*x");
 }
 
 TEST_F(StencilTest, AccessOpValue) {
@@ -446,7 +432,7 @@ TEST_F(StencilTest, AccessOpPointerDereference) {
 
 TEST_F(StencilTest, AccessOpSmartPointer) {
   StringRef Snippet = R"cc(
-    Smart x;
+    std::unique_ptr<S> x;
     x;
   )cc";
   StringRef Id = "id";
@@ -455,7 +441,7 @@ TEST_F(StencilTest, AccessOpSmartPointer) {
 
 TEST_F(StencilTest, AccessOpSmartPointerDereference) {
   StringRef Snippet = R"cc(
-    Smart x;
+    std::unique_ptr<S> x;
     *x;
   )cc";
   StringRef Id = "id";
@@ -464,7 +450,7 @@ TEST_F(StencilTest, AccessOpSmartPointerDereference) {
 
 TEST_F(StencilTest, AccessOpSmartPointerMemberCall) {
   StringRef Snippet = R"cc(
-    Smart x;
+    std::unique_ptr<S> x;
     x->Field;
   )cc";
   StringRef Id = "id";

From f3ab0ccd00db3bb26851d3e1cf4a789fe7fa6e2c Mon Sep 17 00:00:00 2001
From: MaheshRavishankar <ravishankarm@google.com>
Date: Tue, 25 Jan 2022 11:44:54 -0800
Subject: [PATCH 601/946] [mlir][Linalg] Add couple of convenience methods to
 `LinalgInterface`.

Add methods to
- Get block argument that is tied with an opOperand
- Get the yield value that is tied with a output opOperand.

Differential Revision: https://reviews.llvm.org/D118085
---
 .../Dialect/Linalg/IR/LinalgInterfaces.td     | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
index 413c2cc18acea..f40f82ed7cd94 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
@@ -641,6 +641,19 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> {
         return !opOperand->get().getType().template isa<ShapedType>();
       }]
     >,
+    InterfaceMethod<
+      /*desc=*/[{
+        Return the block argument for an `opOperand`.
+      }],
+      /*retTy=*/"BlockArgument",
+      /*methodName=*/"getTiedBlockArgument",
+      /*args=*/(ins "OpOperand *":$opOperand),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        assert(opOperand->getOwner() == this->getOperation());
+        return getBlock()->getArgument(opOperand->getOperandNumber());
+      }]
+    >,
     InterfaceMethod<
       /*desc=*/[{
         Return the input or output indexing map for `opOperand`.
@@ -672,6 +685,24 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> {
         return this->getOperation()->getResult(resultIndex);
       }]
     >,
+    InterfaceMethod<
+      /*desc=*/[{
+        Return the value yielded by the region corresponding to an output
+        `opOperand`.
+      }],
+      /*retTy=*/"OpOperand *",
+      /*methodName=*/"getTiedYieldValue",
+      /*args=*/(ins "OpOperand*":$opOperand),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        assert(opOperand->getOwner() == this->getOperation());
+        int64_t resultIndex = opOperand->getOperandNumber() - getNumInputs();
+        assert(resultIndex >= 0 &&
+               resultIndex < this->getOperation()->getNumResults());
+        Operation *yieldOp = getBlock()->getTerminator();
+        return &yieldOp->getOpOperand(resultIndex);
+      }]
+    >,
     //===------------------------------------------------------------------===//
     // Other interface methods.
     //===------------------------------------------------------------------===//

From 2868e2677b609ed0a1b0e441d5084a8956137816 Mon Sep 17 00:00:00 2001
From: Argyrios Kyrtzidis <kyrtzidis@apple.com>
Date: Mon, 24 Jan 2022 17:49:37 -0800
Subject: [PATCH 602/946] [cmake] Some NFC changes in preparation for
 accomodating `Ninja Multi-Config`

* Use `MATCHES` so that `Ninja Multi-Config` generator also satisfies the Ninja check
* Pull out a couple of values into variables, inside `function(tablegen project ofn)`, NFC

Differential Revision: https://reviews.llvm.org/D118100
---
 llvm/cmake/config-ix.cmake                 |  4 ++--
 llvm/cmake/modules/HandleLLVMOptions.cmake |  8 ++++----
 llvm/cmake/modules/TableGen.cmake          | 11 +++++++----
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index 7e9d76d6c5648..a138d372d3b29 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -602,7 +602,7 @@ find_program(GOLD_EXECUTABLE NAMES ${LLVM_DEFAULT_TARGET_TRIPLE}-ld.gold ld.gold
 set(LLVM_BINUTILS_INCDIR "" CACHE PATH
     "PATH to binutils/include containing plugin-api.h for gold plugin.")
 
-if(CMAKE_GENERATOR STREQUAL "Ninja")
+if(CMAKE_GENERATOR MATCHES "Ninja")
   execute_process(COMMAND ${CMAKE_MAKE_PROGRAM} --version
     OUTPUT_VARIABLE NINJA_VERSION
     OUTPUT_STRIP_TRAILING_WHITESPACE)
@@ -610,7 +610,7 @@ if(CMAKE_GENERATOR STREQUAL "Ninja")
   message(STATUS "Ninja version: ${NINJA_VERSION}")
 endif()
 
-if(CMAKE_GENERATOR STREQUAL "Ninja" AND
+if(CMAKE_GENERATOR MATCHES "Ninja" AND
     NOT "${NINJA_VERSION}" VERSION_LESS "1.9.0" AND
     CMAKE_HOST_APPLE AND CMAKE_HOST_SYSTEM_VERSION VERSION_GREATER "15.6.0")
   set(LLVM_TOUCH_STATIC_LIBRARIES ON)
diff --git a/llvm/cmake/modules/HandleLLVMOptions.cmake b/llvm/cmake/modules/HandleLLVMOptions.cmake
index c2feecc21a802..fcaa8f20bf941 100644
--- a/llvm/cmake/modules/HandleLLVMOptions.cmake
+++ b/llvm/cmake/modules/HandleLLVMOptions.cmake
@@ -34,7 +34,7 @@ string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO)
 set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING
   "Define the maximum number of concurrent compilation jobs (Ninja only).")
 if(LLVM_PARALLEL_COMPILE_JOBS)
-  if(NOT CMAKE_GENERATOR STREQUAL "Ninja")
+  if(NOT CMAKE_GENERATOR MATCHES "Ninja")
     message(WARNING "Job pooling is only available with Ninja generators.")
   else()
     set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${LLVM_PARALLEL_COMPILE_JOBS})
@@ -44,7 +44,7 @@ endif()
 
 set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING
   "Define the maximum number of concurrent link jobs (Ninja only).")
-if(CMAKE_GENERATOR STREQUAL "Ninja")
+if(CMAKE_GENERATOR MATCHES "Ninja")
   if(NOT LLVM_PARALLEL_LINK_JOBS AND uppercase_LLVM_ENABLE_LTO STREQUAL "THIN")
     message(STATUS "ThinLTO provides its own parallel linking - limiting parallel link jobs to 2.")
     set(LLVM_PARALLEL_LINK_JOBS "2")
@@ -920,7 +920,7 @@ add_definitions( -D__STDC_LIMIT_MACROS )
 
 # clang and gcc don't default-print colored diagnostics when invoked from Ninja.
 if (UNIX AND
-    CMAKE_GENERATOR STREQUAL "Ninja" AND
+    CMAKE_GENERATOR MATCHES "Ninja" AND
     (CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR
      (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
       NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9))))
@@ -928,7 +928,7 @@ if (UNIX AND
 endif()
 
 # lld doesn't print colored diagnostics when invoked from Ninja
-if (UNIX AND CMAKE_GENERATOR STREQUAL "Ninja")
+if (UNIX AND CMAKE_GENERATOR MATCHES "Ninja")
   include(LLVMCheckLinkerFlag)
   llvm_check_linker_flag(CXX "-Wl,--color-diagnostics" LINKER_SUPPORTS_COLOR_DIAGNOSTICS)
   append_if(LINKER_SUPPORTS_COLOR_DIAGNOSTICS "-Wl,--color-diagnostics"
diff --git a/llvm/cmake/modules/TableGen.cmake b/llvm/cmake/modules/TableGen.cmake
index 6d6513c5ab459..59fd0d3b07336 100644
--- a/llvm/cmake/modules/TableGen.cmake
+++ b/llvm/cmake/modules/TableGen.cmake
@@ -10,7 +10,7 @@ function(tablegen project ofn)
   endif()
 
   # Use depfile instead of globbing arbitrary *.td(s) for Ninja.
-  if(CMAKE_GENERATOR STREQUAL "Ninja")
+  if(CMAKE_GENERATOR MATCHES "Ninja")
     # Make output path relative to build.ninja, assuming located on
     # ${CMAKE_BINARY_DIR}.
     # CMake emits build targets as relative paths but Ninja doesn't identify
@@ -93,8 +93,11 @@ function(tablegen project ofn)
   get_directory_property(tblgen_includes INCLUDE_DIRECTORIES)
   list(TRANSFORM tblgen_includes PREPEND -I)
 
+  set(tablegen_exe ${${project}_TABLEGEN_EXE})
+  set(tablegen_depends ${${project}_TABLEGEN_TARGET} ${tablegen_exe})
+
   add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}
-    COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
+    COMMAND ${tablegen_exe} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
     ${tblgen_includes}
     ${LLVM_TABLEGEN_FLAGS}
     ${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
@@ -103,7 +106,7 @@ function(tablegen project ofn)
     # The file in LLVM_TARGET_DEFINITIONS may be not in the current
     # directory and local_tds may not contain it, so we must
     # explicitly list it here:
-    DEPENDS ${${project}_TABLEGEN_TARGET} ${${project}_TABLEGEN_EXE}
+    DEPENDS ${tablegen_depends}
       ${local_tds} ${global_tds}
     ${LLVM_TARGET_DEFINITIONS_ABSOLUTE}
     ${LLVM_TARGET_DEPENDS}
@@ -137,7 +140,7 @@ macro(add_tablegen target project)
   set(LLVM_LINK_COMPONENTS ${LLVM_LINK_COMPONENTS} TableGen)
 
   # CMake doesn't let compilation units depend on their dependent libraries on some generators.
-  if(NOT CMAKE_GENERATOR STREQUAL "Ninja" AND NOT XCODE)
+  if(NOT CMAKE_GENERATOR MATCHES "Ninja" AND NOT XCODE)
     # FIXME: It leaks to user, callee of add_tablegen.
     set(LLVM_ENABLE_OBJLIB ON)
   endif()

From 2a1b7aa016c0f4b5598806205bdfbab1ea2d92c4 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Tue, 25 Jan 2022 20:49:55 +0100
Subject: [PATCH 603/946] [lldb] Fix ProcessKDPLog for the logging refactor

---
 .../Process/MacOSX-Kernel/ProcessKDPLog.cpp   | 28 ++++++-----
 .../Process/MacOSX-Kernel/ProcessKDPLog.h     | 50 ++++++++++++-------
 2 files changed, 47 insertions(+), 31 deletions(-)

diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.cpp b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.cpp
index 3b5f1157d5446..f741126f965bb 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.cpp
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.cpp
@@ -11,24 +11,28 @@
 using namespace lldb_private;
 
 static constexpr Log::Category g_categories[] = {
-    {{"async"}, {"log asynchronous activity"}, KDP_LOG_ASYNC},
-    {{"break"}, {"log breakpoints"}, KDP_LOG_BREAKPOINTS},
-    {{"comm"}, {"log communication activity"}, KDP_LOG_COMM},
+    {{"async"}, {"log asynchronous activity"}, KDPLog::Async},
+    {{"break"}, {"log breakpoints"}, KDPLog::Breakpoints},
+    {{"comm"}, {"log communication activity"}, KDPLog::Comm},
     {{"data-long"},
      {"log memory bytes for memory reads and writes for all transactions"},
-     KDP_LOG_MEMORY_DATA_LONG},
+     KDPLog::MemoryDataLong},
     {{"data-short"},
      {"log memory bytes for memory reads and writes for short transactions "
       "only"},
-     KDP_LOG_MEMORY_DATA_SHORT},
-    {{"memory"}, {"log memory reads and writes"}, KDP_LOG_MEMORY},
-    {{"packets"}, {"log gdb remote packets"}, KDP_LOG_PACKETS},
-    {{"process"}, {"log process events and activities"}, KDP_LOG_PROCESS},
-    {{"step"}, {"log step related activities"}, KDP_LOG_STEP},
-    {{"thread"}, {"log thread events and activities"}, KDP_LOG_THREAD},
-    {{"watch"}, {"log watchpoint related activities"}, KDP_LOG_WATCHPOINTS},
+     KDPLog::MemoryDataShort},
+    {{"memory"}, {"log memory reads and writes"}, KDPLog::Memory},
+    {{"packets"}, {"log gdb remote packets"}, KDPLog::Packets},
+    {{"process"}, {"log process events and activities"}, KDPLog::Process},
+    {{"step"}, {"log step related activities"}, KDPLog::Step},
+    {{"thread"}, {"log thread events and activities"}, KDPLog::Thread},
+    {{"watch"}, {"log watchpoint related activities"}, KDPLog::Watchpoints},
 };
 
-Log::Channel ProcessKDPLog::g_channel(g_categories, KDP_LOG_DEFAULT);
+static Log::Channel g_channel(g_categories, KDPLog::Packets);
+
+template <> Log::Channel &lldb_private::LogChannelFor<KDPLog>() {
+  return g_channel;
+}
 
 void ProcessKDPLog::Initialize() { Log::Register("kdp-remote", g_channel); }
diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.h b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.h
index 91b1b6e49b7af..f47a9f5dd0876 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.h
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDPLog.h
@@ -11,32 +11,44 @@
 
 #include "lldb/Utility/Log.h"
 
-#define KDP_LOG_PROCESS (1u << 1)
-#define KDP_LOG_THREAD (1u << 2)
-#define KDP_LOG_PACKETS (1u << 3)
-#define KDP_LOG_MEMORY (1u << 4) // Log memory reads/writes calls
-#define KDP_LOG_MEMORY_DATA_SHORT                                              \
-  (1u << 5) // Log short memory reads/writes bytes
-#define KDP_LOG_MEMORY_DATA_LONG (1u << 6) // Log all memory reads/writes bytes
-#define KDP_LOG_BREAKPOINTS (1u << 7)
-#define KDP_LOG_WATCHPOINTS (1u << 8)
-#define KDP_LOG_STEP (1u << 9)
-#define KDP_LOG_COMM (1u << 10)
-#define KDP_LOG_ASYNC (1u << 11)
-#define KDP_LOG_ALL (UINT32_MAX)
-#define KDP_LOG_DEFAULT KDP_LOG_PACKETS
 
 namespace lldb_private {
-class ProcessKDPLog {
-  static Log::Channel g_channel;
 
+enum class KDPLog : Log::MaskType {
+  Async = Log::ChannelFlag<0>,
+  Breakpoints = Log::ChannelFlag<1>,
+  Comm = Log::ChannelFlag<2>,
+  MemoryDataLong = Log::ChannelFlag<3>,  // Log all memory reads/writes bytes
+  MemoryDataShort = Log::ChannelFlag<4>, // Log short memory reads/writes bytes
+  Memory = Log::ChannelFlag<5>,          // Log memory reads/writes calls
+  Packets = Log::ChannelFlag<6>,
+  Process = Log::ChannelFlag<7>,
+  Step = Log::ChannelFlag<8>,
+  Thread = Log::ChannelFlag<9>,
+  Watchpoints = Log::ChannelFlag<10>,
+  LLVM_MARK_AS_BITMASK_ENUM(Watchpoints)
+};
+#define KDP_LOG_PROCESS ::lldb_private::KDPLog::Process
+#define KDP_LOG_THREAD ::lldb_private::KDPLog::Thread
+#define KDP_LOG_PACKETS ::lldb_private::KDPLog::Packets
+#define KDP_LOG_MEMORY ::lldb_private::KDPLog::Memory
+#define KDP_LOG_MEMORY_DATA_SHORT ::lldb_private::KDPLog::MemoryDataShort
+#define KDP_LOG_MEMORY_DATA_LONG ::lldb_private::KDPLog::MemoryDataLong
+#define KDP_LOG_BREAKPOINTS ::lldb_private::KDPLog::Breakpoints
+#define KDP_LOG_WATCHPOINTS ::lldb_private::KDPLog::Watchpoints
+#define KDP_LOG_STEP ::lldb_private::KDPLog::Step
+#define KDP_LOG_COMM ::lldb_private::KDPLog::Comm
+#define KDP_LOG_ASYNC ::lldb_private::KDPLog::Async
+#define KDP_LOG_DEFAULT KDP_LOG_PACKETS
+
+class ProcessKDPLog {
 public:
   static void Initialize();
 
-  static Log *GetLogIfAllCategoriesSet(uint32_t mask) {
-    return g_channel.GetLogIfAll(mask);
-  }
+  static Log *GetLogIfAllCategoriesSet(KDPLog mask) { return GetLog(mask); }
 };
+
+template <> Log::Channel &LogChannelFor<KDPLog>();
 }
 
 #endif // LLDB_SOURCE_PLUGINS_PROCESS_MACOSX_KERNEL_PROCESSKDPLOG_H

From 491c154677bcf0fba3c91fdaa7897a48ab605327 Mon Sep 17 00:00:00 2001
From: Aleksandr Platonov <platonov.aleksandr@huawei.com>
Date: Tue, 25 Jan 2022 22:46:25 +0300
Subject: [PATCH 604/946] [analyzer] Don't specify PLUGIN_TOOL for analyzer
 plugins

Analyzer plugins explicitly export clang_registerCheckers and clang_analyzerAPIVersionString symbols, so we don't need to specify a tool to link agains.

Also, without this patch MSVC build fails with cmake flags -DLLVM_ENABLE_PLUGINS=On -DCLANG_PLUGINS_SUPPORT=On -DLLVM_EXPORT_SYMBOLS_FOR_PLUGINS=On
```
[936/936] Linking CXX shared module bin\SampleAnalyzerPlugin.dll
FAILED: bin/SampleAnalyzerPlugin.dll
cmd.exe /C "cd . && "D:\Program Files\CMake\bin\cmake.exe" -E vs_link_dll --intdir=tools\clang\lib\Analysis\plugins\SampleAnalyzer\CMakeFiles\SampleAnalyzerPlugin.dir --rc=C:\PROGRA~2\WI3CF2~1\10\bin\100183~1.0\x64\rc.exe --mt=C:\PROGRA~2\WI3CF2~1\10\bin\100183~1.0\x64\mt.exe --manifests  -- C:\PROGRA~2\MICROS~4\2019\COMMUN~1\VC\Tools\MSVC\1428~1.299\bin\Hostx64\x64\link.exe /nologo tools\clang\lib\Analysis\plugins\SampleAnalyzer\CMakeFiles\SampleAnalyzerPlugin.dir\MainCallChecker.cpp.obj  /out:bin\SampleAnalyzerPlugin.dll /implib:lib\SampleAnalyzerPlugin.lib /pdb:bin\SampleAnalyzerPlugin.pdb /dll /version:0.0 /machine:x64 /INCREMENTAL:NO  /DEF:"D:/work/llvm-project-original/build-plugins/tools/clang/lib/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.def"  lib\clang.lib  lib\clangAnalysis.lib  lib\clangAST.lib  lib\clangStaticAnalyzerCore.lib  lib\clangStaticAnalyzerFrontend.lib  lib\clangStaticAnalyzerCheckers.lib  lib\clangStaticAnalyzerCore.lib  lib\clangCrossTU.lib  lib\clangIndex.lib  lib\clangFormat.lib  lib\clangToolingInclusions.lib  lib\clangFrontend.lib  lib\clangDriver.lib  version.lib  lib\clangParse.lib  lib\clangSerialization.lib  lib\clangSema.lib  lib\clangAnalysis.lib  lib\clangEdit.lib  lib\LLVMOption.lib  lib\clangToolingCore.lib  lib\clangRewrite.lib  lib\clangASTMatchers.lib  lib\clangAST.lib  lib\clangLex.lib  lib\clangBasic.lib  lib\LLVMFrontendOpenMP.lib  lib\LLVMScalarOpts.lib  lib\LLVMAggressiveInstCombine.lib  lib\LLVMInstCombine.lib  lib\LLVMTransformUtils.lib  lib\LLVMAnalysis.lib  lib\LLVMProfileData.lib  lib\LLVMDebugInfoDWARF.lib  lib\LLVMObject.lib  lib\LLVMBitReader.lib  lib\LLVMCore.lib  lib\LLVMRemarks.lib  lib\LLVMBitstreamReader.lib  lib\LLVMMCParser.lib  lib\LLVMMC.lib  lib\LLVMDebugInfoCodeView.lib  lib\LLVMTextAPI.lib  lib\LLVMBinaryFormat.lib  lib\LLVMSupport.lib  psapi.lib  shell32.lib  ole32.lib  uuid.lib  advapi32.lib  delayimp.lib  -delayload:shell32.dll  -delayload:ole32.dll  lib\LLVMDemangle.lib  kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib  && cd ."
LINK: command "C:\PROGRA~2\MICROS~4\2019\COMMUN~1\VC\Tools\MSVC\1428~1.299\bin\Hostx64\x64\link.exe /nologo tools\clang\lib\Analysis\plugins\SampleAnalyzer\CMakeFiles\SampleAnalyzerPlugin.dir\MainCallChecker.cpp.obj /out:bin\SampleAnalyzerPlugin.dll /implib:lib\SampleAnalyzerPlugin.lib /pdb:bin\SampleAnalyzerPlugin.pdb /dll /version:0.0 /machine:x64 /INCREMENTAL:NO /DEF:D:/work/llvm-project-original/build-plugins/tools/clang/lib/Analysis/plugins/SampleAnalyzer/SampleAnalyzerPlugin.def lib\clang.lib lib\clangAnalysis.lib lib\clangAST.lib lib\clangStaticAnalyzerCore.lib lib\clangStaticAnalyzerFrontend.lib lib\clangStaticAnalyzerCheckers.lib lib\clangStaticAnalyzerCore.lib lib\clangCrossTU.lib lib\clangIndex.lib lib\clangFormat.lib lib\clangToolingInclusions.lib lib\clangFrontend.lib lib\clangDriver.lib version.lib lib\clangParse.lib lib\clangSerialization.lib lib\clangSema.lib lib\clangAnalysis.lib lib\clangEdit.lib lib\LLVMOption.lib lib\clangToolingCore.lib lib\clangRewrite.lib lib\clangASTMatchers.lib lib\clangAST.lib lib\clangLex.lib lib\clangBasic.lib lib\LLVMFrontendOpenMP.lib lib\LLVMScalarOpts.lib lib\LLVMAggressiveInstCombine.lib lib\LLVMInstCombine.lib lib\LLVMTransformUtils.lib lib\LLVMAnalysis.lib lib\LLVMProfileData.lib lib\LLVMDebugInfoDWARF.lib lib\LLVMObject.lib lib\LLVMBitReader.lib lib\LLVMCore.lib lib\LLVMRemarks.lib lib\LLVMBitstreamReader.lib lib\LLVMMCParser.lib lib\LLVMMC.lib lib\LLVMDebugInfoCodeView.lib lib\LLVMTextAPI.lib lib\LLVMBinaryFormat.lib lib\LLVMSupport.lib psapi.lib shell32.lib ole32.lib uuid.lib advapi32.lib delayimp.lib -delayload:shell32.dll -delayload:ole32.dll lib\LLVMDemangle.lib kernel32.lib user32.lib gdi32.lib winspool.lib shell32.lib ole32.lib oleaut32.lib uuid.lib comdlg32.lib advapi32.lib /MANIFEST /MANIFESTFILE:bin\SampleAnalyzerPlugin.dll.manifest" failed (exit code 1169) with the following output:
clangStaticAnalyzerCore.lib(BugReporter.cpp.obj) : error LNK2005: "public: __cdecl clang::ento::PathSensitiveBugReport::PathSensitiveBugReport(class clang::ento::BugType const &,class llvm::StringRef,class llvm::StringRef,class clang::ento::ExplodedNode const *,class clang::ento::PathDiagnosticLocation,class clang::Decl const *)" (??0PathSensitiveBugReport@ento@clang@@QEAA@AEBVBugType@12@VStringRef@llvm@@1PEBVExplodedNode@12@VPathDiagnosticLocation@12@PEBVDecl@2@@Z) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(BugReporter.cpp.obj) : error LNK2005: "private: virtual void __cdecl clang::ento::BugType::anchor(void)" (?anchor@BugType@ento@clang@@EEAAXXZ) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(SVals.cpp.obj) : error LNK2005: "public: class clang::FunctionDecl const * __cdecl clang::ento::SVal::getAsFunctionDecl(void)const " (?getAsFunctionDecl@SVal@ento@clang@@QEBAPEBVFunctionDecl@3@XZ) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(ProgramState.cpp.obj) : error LNK2005: "void __cdecl clang::ento::ProgramStateRelease(class clang::ento::ProgramState const *)" (?ProgramStateRelease@ento@clang@@YAXPEBVProgramState@12@@Z) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(ProgramState.cpp.obj) : error LNK2005: "void __cdecl clang::ento::ProgramStateRetain(class clang::ento::ProgramState const *)" (?ProgramStateRetain@ento@clang@@YAXPEBVProgramState@12@@Z) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(Environment.cpp.obj) : error LNK2005: "public: __cdecl clang::ento::EnvironmentEntry::EnvironmentEntry(class clang::Stmt const *,class clang::LocationContext const *)" (??0EnvironmentEntry@ento@clang@@QEAA@PEBVStmt@2@PEBVLocationContext@2@@Z) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(Environment.cpp.obj) : error LNK2005: "public: class clang::ento::SVal __cdecl clang::ento::Environment::getSVal(class clang::ento::EnvironmentEntry const &,class clang::ento::SValBuilder &)const " (?getSVal@Environment@ento@clang@@QEBA?AVSVal@23@AEBVEnvironmentEntry@23@AEAVSValBuilder@23@@Z) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(CheckerManager.cpp.obj) : error LNK2005: "public: void __cdecl clang::ento::CheckerManager::_registerForPreStmt(class clang::ento::CheckerFn<void __cdecl(class clang::Stmt const *,class clang::ento::CheckerContext &)>,bool (__cdecl*)(class clang::Stmt const *))" (?_registerForPreStmt@CheckerManager@ento@clang@@QEAAXV?$CheckerFn@$$A6AXPEBVStmt@clang@@AEAVCheckerContext@ento@2@@Z@23@P6A_NPEBVStmt@3@@Z@Z) already defined in clang.lib(clang.exe)
clangStaticAnalyzerCore.lib(CoreEngine.cpp.obj) : error LNK2005: "protected: class clang::ento::ExplodedNode * __cdecl clang::ento::NodeBuilder::generateNodeImpl(class clang::ProgramPoint const &,class llvm::IntrusiveRefCntPtr<class clang::ento::ProgramState const >,class clang::ento::ExplodedNode *,bool)" (?generateNodeImpl@NodeBuilder@ento@clang@@IEAAPEAVExplodedNode@23@AEBVProgramPoint@3@V?$IntrusiveRefCntPtr@$$CBVProgramState@ento@clang@@@llvm@@PEAV423@_N@Z) already defined in clang.lib(clang.exe)
LLVMSupport.lib(SmallVector.cpp.obj) : error LNK2005: "protected: void __cdecl llvm::SmallVectorBase<unsigned int>::grow_pod(void *,unsigned __int64,unsigned __int64)" (?grow_pod@?$SmallVectorBase@I@llvm@@IEAAXPEAX_K1@Z) already defined in clang.lib(clang.exe)
LLVMSupport.lib(FoldingSet.cpp.obj) : error LNK2005: "protected: __cdecl llvm::FoldingSetBase::~FoldingSetBase(void)" (??1FoldingSetBase@llvm@@IEAA@XZ) already defined in clang.lib(clang.exe)
clangAST.lib(ASTImporter.cpp.obj) : error LNK2005: "public: __cdecl clang::ASTImporter::ASTImporter(class clang::ASTContext &,class clang::FileManager &,class clang::ASTContext &,class clang::FileManager &,bool,class std::shared_ptr<class clang::ASTImporterSharedState>)" (??0ASTImporter@clang@@QEAA@AEAVASTContext@1@AEAVFileManager@1@01_NV?$shared_ptr@VASTImporterSharedState@clang@@@std@@@Z) already defined in clang.lib(clang.exe)
clangAST.lib(ASTImporter.cpp.obj) : error LNK2005: "public: class llvm::Expected<class clang::Decl *> __cdecl clang::ASTImporter::Import(class clang::Decl *)" (?Import@ASTImporter@clang@@QEAA?AV?$Expected@PEAVDecl@clang@@@llvm@@PEAVDecl@2@@Z) already defined in clang.lib(clang.exe)
clangAST.lib(ExternalASTSource.cpp.obj) : error LNK2005: "public: virtual __cdecl clang::ExternalASTSource::~ExternalASTSource(void)" (??1ExternalASTSource@clang@@UEAA@XZ) already defined in clang.lib(clang.exe)
clangAST.lib(ExternalASTSource.cpp.obj) : error LNK2005: "public: virtual void __cdecl clang::ExternalASTSource::CompleteRedeclChain(class clang::Decl const *)" (?CompleteRedeclChain@ExternalASTSource@clang@@UEAAXPEBVDecl@2@@Z) already defined in clang.lib(clang.exe)
clangAST.lib(ExternalASTSource.cpp.obj) : error LNK2005: "public: virtual void __cdecl clang::ExternalASTSource::CompleteType(class clang::ObjCInterfaceDecl *)" (?CompleteType@ExternalASTSource@clang@@UEAAXPEAVObjCInterfaceDecl@2@@Z) already defined in clang.lib(clang.exe)
...
```

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D116966
---
 .../Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt   | 2 +-
 clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt | 2 +-
 clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
index 229de54814926..fc8e2bbc25e11 100644
--- a/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
+++ b/clang/lib/Analysis/plugins/CheckerDependencyHandling/CMakeLists.txt
@@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS
   )
 
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/CheckerDependencyHandlingAnalyzerPlugin.exports)
-add_llvm_library(CheckerDependencyHandlingAnalyzerPlugin MODULE BUILDTREE_ONLY CheckerDependencyHandling.cpp PLUGIN_TOOL clang)
+add_llvm_library(CheckerDependencyHandlingAnalyzerPlugin MODULE BUILDTREE_ONLY CheckerDependencyHandling.cpp)
 
 clang_target_link_libraries(CheckerDependencyHandlingAnalyzerPlugin PRIVATE
   clangAnalysis
diff --git a/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt b/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
index 432383efba5cc..e8315a0513064 100644
--- a/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
+++ b/clang/lib/Analysis/plugins/CheckerOptionHandling/CMakeLists.txt
@@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS
   )
 
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/CheckerOptionHandlingAnalyzerPlugin.exports)
-add_llvm_library(CheckerOptionHandlingAnalyzerPlugin MODULE BUILDTREE_ONLY CheckerOptionHandling.cpp PLUGIN_TOOL clang)
+add_llvm_library(CheckerOptionHandlingAnalyzerPlugin MODULE BUILDTREE_ONLY CheckerOptionHandling.cpp)
 
 clang_target_link_libraries(CheckerOptionHandlingAnalyzerPlugin PRIVATE
   clangAnalysis
diff --git a/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt b/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
index d9b3f05cbd1bb..77acc47fd4832 100644
--- a/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
+++ b/clang/lib/Analysis/plugins/SampleAnalyzer/CMakeLists.txt
@@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS
   )
 
 set(LLVM_EXPORTED_SYMBOL_FILE ${CMAKE_CURRENT_SOURCE_DIR}/SampleAnalyzerPlugin.exports)
-add_llvm_library(SampleAnalyzerPlugin MODULE BUILDTREE_ONLY MainCallChecker.cpp PLUGIN_TOOL clang)
+add_llvm_library(SampleAnalyzerPlugin MODULE BUILDTREE_ONLY MainCallChecker.cpp)
 
 clang_target_link_libraries(SampleAnalyzerPlugin PRIVATE
   clangAnalysis

From 82df72cc67d4a2c07b766908a8aa0e272403421b Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue@google.com>
Date: Tue, 25 Jan 2022 09:54:01 -0500
Subject: [PATCH 605/946] [libc] Make logf function correctly rounded for all
 rounding modes.

Make logf function correctly rounded for all rounding modes.

Reviewed By: sivachandra, zimmermann6, santoshn, jpl169

Differential Revision: https://reviews.llvm.org/D118149
---
 libc/src/math/generic/logf.cpp   | 66 ++++++++++++++++++++++----------
 libc/test/src/math/logf_test.cpp | 36 ++++++++++++++---
 2 files changed, 76 insertions(+), 26 deletions(-)

diff --git a/libc/src/math/generic/logf.cpp b/libc/src/math/generic/logf.cpp
index 1c3aef8650ec2..99bd7a473c7b7 100644
--- a/libc/src/math/generic/logf.cpp
+++ b/libc/src/math/generic/logf.cpp
@@ -9,13 +9,15 @@
 #include "src/math/logf.h"
 #include "common_constants.h" // Lookup table for (1/f)
 #include "src/__support/FPUtil/BasicOperations.h"
+#include "src/__support/FPUtil/FEnvImpl.h"
 #include "src/__support/FPUtil/FMA.h"
 #include "src/__support/FPUtil/FPBits.h"
 #include "src/__support/FPUtil/PolyEval.h"
 #include "src/__support/common.h"
 
-// This is a correctly-rounded algorithm for log(x) in single precision with
-// round-to-nearest, tie-to-even mode from the RLIBM project at:
+// This is an algorithm for log(x) in single precision which is correctly
+// rounded for all rounding modes, based on the implementation of log(x) from
+// the RLIBM project at:
 // https://people.cs.rutgers.edu/~sn349/rlibm
 
 // Step 1 - Range reduction:
@@ -101,6 +103,42 @@ LLVM_LIBC_FUNCTION(float, logf, (float x)) {
   constexpr double LOG_2 = 0x1.62e42fefa39efp-1;
   using FPBits = typename fputil::FPBits<float>;
   FPBits xbits(x);
+
+  switch (FPBits(x).uintval()) {
+  case 0x41178febU: // x = 0x1.2f1fd6p+3f
+    if (fputil::get_round() == FE_TONEAREST)
+      return 0x1.1fcbcep+1f;
+    break;
+  case 0x4c5d65a5U: // x = 0x1.bacb4ap+25f
+    if (fputil::get_round() == FE_TONEAREST)
+      return 0x1.1e0696p+4f;
+    break;
+  case 0x65d890d3U: // x = 0x1.b121a6p+76f
+    if (fputil::get_round() == FE_TONEAREST)
+      return 0x1.a9a3f2p+5f;
+    break;
+  case 0x6f31a8ecU: // x = 0x1.6351d8p+95f
+    if (fputil::get_round() == FE_TONEAREST)
+      return 0x1.08b512p+6f;
+    break;
+  case 0x3f800001U: // x = 0x1.000002p+0f
+    if (fputil::get_round() == FE_UPWARD)
+      return 0x1p-23f;
+    return 0x1.fffffep-24f;
+  case 0x500ffb03U: // x = 0x1.1ff606p+33f
+    if (fputil::get_round() != FE_UPWARD)
+      return 0x1.6fdd34p+4f;
+    break;
+  case 0x7a17f30aU: // x = 0x1.2fe614p+117f
+    if (fputil::get_round() != FE_UPWARD)
+      return 0x1.451436p+6f;
+    break;
+  case 0x5cd69e88U: // x = 0x1.ad3d1p+58f
+    if (fputil::get_round() != FE_UPWARD)
+      return 0x1.45c146p+5f;
+    break;
+  }
+
   int m = 0;
 
   if (xbits.uintval() < FPBits::MIN_NORMAL ||
@@ -130,26 +168,14 @@ LLVM_LIBC_FUNCTION(float, logf, (float x)) {
   double d = static_cast<float>(xbits) - static_cast<float>(f);
   d *= ONE_OVER_F[f_index];
 
+  double extra_factor =
+      fputil::fma(static_cast<double>(m), LOG_2, LOG_F[f_index]);
+
   double r = __llvm_libc::fputil::polyeval(
-      d, 0x1.0000000008169p+0, -0x1.0000004f78405p-1, 0x1.555654d2bc769p-2,
-      -0x1.00a570d090322p-2, 0x1.e158d823f89cap-3);
+      d, extra_factor, 0x1.fffffffffffacp-1, -0x1.fffffffef9cb2p-2,
+      0x1.5555513bc679ap-2, -0x1.fff4805ea441p-3, 0x1.930180dbde91ap-3);
 
-  double extra_factor =
-      __llvm_libc::fputil::fma(static_cast<double>(m), LOG_2, LOG_F[f_index]);
-  switch (FPBits(x).uintval()) {
-  case 0x3f80d19f:
-    return 0x1.a1e82cp-8f;
-  case 0x41178feb:
-    return 0x1.1fcbcep+1f;
-  case 0x4c5d65a5:
-    return 0x1.1e0696p+4f;
-  case 0x65d890d3:
-    return 0x1.a9a3f2p+5f;
-  case 0x6f31a8ec:
-    return 0x1.08b512p+6f;
-  default:
-    return static_cast<float>(__llvm_libc::fputil::fma(d, r, extra_factor));
-  }
+  return static_cast<float>(r);
 }
 
 #pragma clang diagnostic pop
diff --git a/libc/test/src/math/logf_test.cpp b/libc/test/src/math/logf_test.cpp
index eb4da810486d3..1f2834d3cf31b 100644
--- a/libc/test/src/math/logf_test.cpp
+++ b/libc/test/src/math/logf_test.cpp
@@ -31,13 +31,37 @@ TEST(LlvmLibcLogfTest, SpecialNumbers) {
 }
 
 TEST(LlvmLibcLogfTest, TrickyInputs) {
-  constexpr int N = 24;
+  constexpr int N = 28;
   constexpr uint32_t INPUTS[N] = {
-      0x3509dcf6U, 0x3bf86ef0U, 0x3ca1c99fU, 0x3d13e105U, 0x3f7ff1f2U,
-      0x3f7fffffU, 0x3f800006U, 0x3f800014U, 0x3f80001cU, 0x3f80c777U,
-      0x3f80ce72U, 0x3f80d19fU, 0x3f80f7bfU, 0x3f80fcfeU, 0x3f81feb4U,
-      0x3f83d731U, 0x3f90cb1dU, 0x3fc55379U, 0x3fd364d7U, 0x41178febU,
-      0x4c5d65a5U, 0x4e85f412U, 0x65d890d3U, 0x6f31a8ecU};
+      0x3509dcf6U, /*0x1.13b9ecp-21f*/
+      0x3bf86ef0U, /*0x1.f0ddep-8f*/
+      0x3ca1c99fU, /*0x1.43933ep-6f*/
+      0x3d13e105U, /*0x1.27c20ap-5f*/
+      0x3f7ff1f2U, /*0x1.ffe3e4p-1f*/
+      0x3f7fffffU, /*0x1.fffffep-1f*/
+      0x3f800001U, /*0x1.000002p+0f*/
+      0x3f800006U, /*0x1.00000cp+0f*/
+      0x3f800014U, /*0x1.000028p+0f*/
+      0x3f80001cU, /*0x1.000038p+0f*/
+      0x3f80c777U, /*0x1.018eeep+0f*/
+      0x3f80ce72U, /*0x1.019ce4p+0f*/
+      0x3f80d19fU, /*0x1.01a33ep+0f*/
+      0x3f80f7bfU, /*0x1.01ef7ep+0f*/
+      0x3f80fcfeU, /*0x1.01f9fcp+0f*/
+      0x3f81feb4U, /*0x1.03fd68p+0f*/
+      0x3f83d731U, /*0x1.07ae62p+0f*/
+      0x3f90cb1dU, /*0x1.21963ap+0f*/
+      0x3fc55379U, /*0x1.8aa6f2p+0f*/
+      0x3fd364d7U, /*0x1.a6c9aep+0f*/
+      0x41178febU, /*0x1.2f1fd6p+3f*/
+      0x4c5d65a5U, /*0x1.bacb4ap+25f*/
+      0x4e85f412U, /*0x1.0be824p+30f*/
+      0x500ffb03U, /*0x1.1ff606p+33f*/
+      0x5cd69e88U, /*0x1.ad3d1p+58f*/
+      0x65d890d3U, /*0x1.b121a6p+76f*/
+      0x6f31a8ecU, /*0x1.6351d8p+95f*/
+      0x7a17f30aU, /*0x1.2fe614p+117f*/
+  };
   for (int i = 0; i < N; ++i) {
     float x = float(FPBits(INPUTS[i]));
     EXPECT_MPFR_MATCH(mpfr::Operation::Log, x, __llvm_libc::logf(x), 0.5);

From b35ef580d870fbb4e932a3e3a580860ac9d4ba49 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= <david.bolvansky@gmail.com>
Date: Tue, 25 Jan 2022 21:24:05 +0100
Subject: [PATCH 606/946] [NFC] Added test with select with unpredictable
 metadata; regenerate x86-cmov-converter.ll

---
 llvm/test/CodeGen/X86/x86-cmov-converter.ll | 420 ++++++++++++++------
 1 file changed, 307 insertions(+), 113 deletions(-)

diff --git a/llvm/test/CodeGen/X86/x86-cmov-converter.ll b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
index 59f68269381a2..c5ebe87f9754e 100644
--- a/llvm/test/CodeGen/X86/x86-cmov-converter.ll
+++ b/llvm/test/CodeGen/X86/x86-cmov-converter.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-pc-linux -x86-cmov-converter=true -verify-machineinstrs -disable-block-placement < %s | FileCheck -allow-deprecated-dag-overlap %s
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -101,11 +102,34 @@
 
 %struct.Node = type { i32, %struct.Node*, %struct.Node* }
 
-; CHECK-LABEL: CmovInHotPath
-; CHECK-NOT: cmov
-; CHECK: jg
-
 define void @CmovInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture readnone %d) #0 {
+; CHECK-LABEL: CmovInHotPath:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jle .LBB0_5
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %r8d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:  .LBB0_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rcx,%rdi,4), %eax
+; CHECK-NEXT:    leal 1(%rax), %r9d
+; CHECK-NEXT:    imull %esi, %eax
+; CHECK-NEXT:    movl $10, %r10d
+; CHECK-NEXT:    cmpl %edx, %eax
+; CHECK-NEXT:    jg .LBB0_4
+; CHECK-NEXT:  # %bb.3: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    movl %r9d, %r10d
+; CHECK-NEXT:  .LBB0_4: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT:    imull %r9d, %r10d
+; CHECK-NEXT:    movl %r10d, (%rcx,%rdi,4)
+; CHECK-NEXT:    addq $1, %rdi
+; CHECK-NEXT:    cmpq %rdi, %r8
+; CHECK-NEXT:    jne .LBB0_2
+; CHECK-NEXT:  .LBB0_5: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 0
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -132,10 +156,33 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-; CHECK-LABEL: CmovNotInHotPath
-; CHECK: cmovg
-
 define void @CmovNotInHotPath(i32 %n, i32 %a, i32 %b, i32* nocapture %c, i32* nocapture %d) #0 {
+; CHECK-LABEL: CmovNotInHotPath:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    testl %edi, %edi
+; CHECK-NEXT:    jle .LBB1_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edx, %r9d
+; CHECK-NEXT:    movl %edi, %r10d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movl $10, %r11d
+; CHECK-NEXT:  .LBB1_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rcx,%rdi,4), %eax
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    imull %esi, %edx
+; CHECK-NEXT:    cmpl %r9d, %edx
+; CHECK-NEXT:    cmovgl %r11d, %eax
+; CHECK-NEXT:    movl %eax, (%rcx,%rdi,4)
+; CHECK-NEXT:    movl (%r8,%rdi,4), %eax
+; CHECK-NEXT:    cltd
+; CHECK-NEXT:    idivl %r9d
+; CHECK-NEXT:    movl %eax, (%r8,%rdi,4)
+; CHECK-NEXT:    addq $1, %rdi
+; CHECK-NEXT:    cmpq %rdi, %r10
+; CHECK-NEXT:    jne .LBB1_2
+; CHECK-NEXT:  .LBB1_3: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %cmp18 = icmp sgt i32 %n, 0
   br i1 %cmp18, label %for.body.preheader, label %for.cond.cleanup
@@ -164,11 +211,34 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-; CHECK-LABEL: MaxIndex
-; CHECK-NOT: cmov
-; CHECK: jg
-
 define i32 @MaxIndex(i32 %n, i32* nocapture readonly %a) #0 {
+; CHECK-LABEL: MaxIndex:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl $2, %edi
+; CHECK-NEXT:    jl .LBB2_5
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %r8d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:  .LBB2_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rsi,%rdx,4), %r9d
+; CHECK-NEXT:    movslq %edi, %rcx
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl (%rsi,%rcx,4), %r9d
+; CHECK-NEXT:    jg .LBB2_4
+; CHECK-NEXT:  # %bb.3: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:  .LBB2_4: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB2_2 Depth=1
+; CHECK-NEXT:    addq $1, %rdx
+; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    cmpq %rdx, %r8
+; CHECK-NEXT:    jne .LBB2_2
+; CHECK-NEXT:  .LBB2_5: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %cmp14 = icmp sgt i32 %n, 1
   br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
@@ -197,11 +267,82 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-; CHECK-LABEL: MaxValue
-; CHECK-NOT: jg
-; CHECK: cmovg
+; TODO: If cmov instruction is marked as unpredicatable, do not convert it to branch.
+define i32 @MaxIndex_unpredictable(i32 %n, i32* nocapture readonly %a) #0 {
+; CHECK-LABEL: MaxIndex_unpredictable:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl $2, %edi
+; CHECK-NEXT:    jl .LBB3_5
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %r8d
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:  .LBB3_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rsi,%rdx,4), %r9d
+; CHECK-NEXT:    movslq %edi, %rcx
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl (%rsi,%rcx,4), %r9d
+; CHECK-NEXT:    jg .LBB3_4
+; CHECK-NEXT:  # %bb.3: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:  .LBB3_4: # %for.body
+; CHECK-NEXT:    # in Loop: Header=BB3_2 Depth=1
+; CHECK-NEXT:    addq $1, %rdx
+; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    cmpq %rdx, %r8
+; CHECK-NEXT:    jne .LBB3_2
+; CHECK-NEXT:  .LBB3_5: # %for.cond.cleanup
+; CHECK-NEXT:    retq
+entry:
+  %cmp14 = icmp sgt i32 %n, 1
+  br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  %wide.trip.count = zext i32 %n to i64
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %t.0.lcssa = phi i32 [ 0, %entry ], [ %i.0.t.0, %for.body ]
+  ret i32 %t.0.lcssa
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %for.body.preheader ]
+  %t.015 = phi i32 [ %i.0.t.0, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %idxprom1 = sext i32 %t.015 to i64
+  %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
+  %1 = load i32, i32* %arrayidx2, align 4
+  %cmp3 = icmp sgt i32 %0, %1
+  %2 = trunc i64 %indvars.iv to i32
+  %i.0.t.0 = select i1 %cmp3, i32 %2, i32 %t.015, !unpredictable !0
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
 
 define i32 @MaxValue(i32 %n, i32* nocapture readonly %a) #0 {
+; CHECK-LABEL: MaxValue:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    cmpl $2, %edi
+; CHECK-NEXT:    jl .LBB4_3
+; CHECK-NEXT:  # %bb.1: # %for.body.preheader
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    movl $1, %edx
+; CHECK-NEXT:  .LBB4_2: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rsi,%rdx,4), %edi
+; CHECK-NEXT:    cmpl %eax, %edi
+; CHECK-NEXT:    cmovgl %edi, %eax
+; CHECK-NEXT:    addq $1, %rdx
+; CHECK-NEXT:    cmpq %rdx, %rcx
+; CHECK-NEXT:    jne .LBB4_2
+; CHECK-NEXT:  .LBB4_3: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   %0 = load i32, i32* %a, align 4
   %cmp13 = icmp sgt i32 %n, 1
@@ -227,10 +368,25 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-; CHECK-LABEL: BinarySearch
-; CHECK: set
-
 define i32 @BinarySearch(i32 %Mask, %struct.Node* nocapture readonly %Curr, %struct.Node* nocapture readonly %Next) #0 {
+; CHECK-LABEL: BinarySearch:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    jmp .LBB5_2
+; CHECK-NEXT:  .LBB5_1: # %while.body
+; CHECK-NEXT:    # in Loop: Header=BB5_2 Depth=1
+; CHECK-NEXT:    movl %ecx, %eax
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    btl %eax, %edi
+; CHECK-NEXT:    setae %cl
+; CHECK-NEXT:    movq 8(%rdx,%rcx,8), %rdx
+; CHECK-NEXT:  .LBB5_2: # %while.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl (%rdx), %ecx
+; CHECK-NEXT:    cmpl %ecx, %eax
+; CHECK-NEXT:    ja .LBB5_1
+; CHECK-NEXT:  # %bb.3: # %while.end
+; CHECK-NEXT:    retq
 entry:
   %Val8 = getelementptr inbounds %struct.Node, %struct.Node* %Curr, i64 0, i32 0
   %0 = load i32, i32* %Val8, align 8
@@ -287,20 +443,40 @@ while.end:                                        ; preds = %while.body, %entry
 ;;                                          ; previous Phi instruction result
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-; CHECK-LABEL: Transform
-; CHECK-NOT: cmov
-; CHECK:         divl    [[a:%[0-9a-z]*]]
-; CHECK:         movl    $11, [[s1:%[0-9a-z]*]]
-; CHECK:         movl    [[a]], [[s2:%[0-9a-z]*]]
-; CHECK:         cmpl    [[a]], %edx
-; CHECK:         ja      [[SinkBB:.*]]
-; CHECK: [[FalseBB:.*]]:
-; CHECK:         movl    $22, [[s1]]
-; CHECK:         movl    $22, [[s2]]
-; CHECK: [[SinkBB]]:
-; CHECK:         ja
-
 define void @Transform(i32 *%arr, i32 *%arr2, i32 %a, i32 %b, i32 %c, i32 %n) #0 {
+; CHECK-LABEL: Transform:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movb $1, %al
+; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    jne .LBB6_5
+; CHECK-NEXT:  # %bb.1: # %while.body.preheader
+; CHECK-NEXT:    movl %edx, %r8d
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:  .LBB6_2: # %while.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movslq %esi, %rsi
+; CHECK-NEXT:    movl (%rdi,%rsi,4), %eax
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %r8d
+; CHECK-NEXT:    movl %eax, %edx
+; CHECK-NEXT:    movl $11, %eax
+; CHECK-NEXT:    movl %r8d, %ecx
+; CHECK-NEXT:    cmpl %r8d, %edx
+; CHECK-NEXT:    ja .LBB6_4
+; CHECK-NEXT:  # %bb.3: # %while.body
+; CHECK-NEXT:    # in Loop: Header=BB6_2 Depth=1
+; CHECK-NEXT:    movl $22, %eax
+; CHECK-NEXT:    movl $22, %ecx
+; CHECK-NEXT:  .LBB6_4: # %while.body
+; CHECK-NEXT:    # in Loop: Header=BB6_2 Depth=1
+; CHECK-NEXT:    xorl %edx, %edx
+; CHECK-NEXT:    divl %ecx
+; CHECK-NEXT:    movl %edx, (%rdi,%rsi,4)
+; CHECK-NEXT:    addl $1, %esi
+; CHECK-NEXT:    cmpl %r9d, %esi
+; CHECK-NEXT:    ja .LBB6_2
+; CHECK-NEXT:  .LBB6_5: # %while.end
+; CHECK-NEXT:    retq
 entry:
   %cmp10 = icmp ugt i32 0, %n
   br i1 %cmp10, label %while.body, label %while.end
@@ -328,16 +504,36 @@ while.end:                                        ; preds = %while.body, %entry
 ; even outside of a loop.
 define i32 @test_cmov_memoperand(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB7_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB7_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edx, %eax
-; CHECK:         cmpl
   %load = load i32, i32* %y
   %z = select i1 %cond, i32 %x, i32 %load
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movl (%rcx), %eax
-; CHECK:       [[FALSE_BB]]:
+  ret i32 %z
+}
+
+; TODO: If cmov instruction is marked as unpredicatable, do not convert it to branch.
+define i32 @test_cmov_memoperand_unpredictable(i32 %a, i32 %b, i32 %x, i32* %y) #0 {
+; CHECK-LABEL: test_cmov_memoperand_unpredictable:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB8_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB8_2: # %entry
+; CHECK-NEXT:    retq
+entry:
+  %cond = icmp ugt i32 %a, %b
+  %load = load i32, i32* %y
+  %z = select i1 %cond, i32 %x, i32 %load, !unpredictable !0
   ret i32 %z
 }
 
@@ -345,29 +541,25 @@ entry:
 ; operand.
 define i32 @test_cmov_memoperand_in_group(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    movl %edx, %r8d
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB9_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %r8d
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:  .LBB9_2: # %entry
+; CHECK-NEXT:    addl %r8d, %eax
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edx, %eax
-; CHECK:         cmpl
   %y = load i32, i32* %y.ptr
   %z1 = select i1 %cond, i32 %x, i32 %a
   %z2 = select i1 %cond, i32 %x, i32 %y
   %z3 = select i1 %cond, i32 %x, i32 %b
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
-; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
-; CHECK-DAG:     movl %{{.*}} %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         addl
-; CHECK-DAG:       %[[R1]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK-DAG:     addl
-; CHECK-DAG:       %[[R2]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK:         retq
   %s1 = add i32 %z1, %z2
   %s2 = add i32 %s1, %z3
   ret i32 %s2
@@ -376,29 +568,25 @@ entry:
 ; Same as before but with operands reversed in the select with a load.
 define i32 @test_cmov_memoperand_in_group2(i32 %a, i32 %b, i32 %x, i32* %y.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edx, %eax
+; CHECK-NEXT:    movl %edx, %r8d
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    jbe .LBB10_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %r8d
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %esi, %edx
+; CHECK-NEXT:  .LBB10_2: # %entry
+; CHECK-NEXT:    addl %r8d, %eax
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edx, %eax
-; CHECK:         cmpl
   %y = load i32, i32* %y.ptr
   %z2 = select i1 %cond, i32 %a, i32 %x
   %z1 = select i1 %cond, i32 %y, i32 %x
   %z3 = select i1 %cond, i32 %b, i32 %x
-; CHECK-NOT:     cmov
-; CHECK:         jbe [[FALSE_BB:.*]]
-; CHECK-DAG:     movl %{{.*}}, %[[R1:.*]]
-; CHECK-DAG:     movl (%r{{..}}), %[[R2:.*]]
-; CHECK-DAG:     movl %{{.*}} %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         addl
-; CHECK-DAG:       %[[R1]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK-DAG:     addl
-; CHECK-DAG:       %[[R2]]
-; CHECK-DAG:       ,
-; CHECK-DAG:       %eax
-; CHECK:         retq
   %s1 = add i32 %z1, %z2
   %s2 = add i32 %s1, %z3
   ret i32 %s2
@@ -408,15 +596,19 @@ entry:
 ; loads.
 define i32 @test_cmov_memoperand_conflicting_dir(i32 %a, i32 %b, i32 %x, i32* %y1.ptr, i32* %y2.ptr) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_conflicting_dir:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:    cmoval %edx, %eax
+; CHECK-NEXT:    cmoval (%r8), %edx
+; CHECK-NEXT:    addl %edx, %eax
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         cmpl
   %y1 = load i32, i32* %y1.ptr
   %y2 = load i32, i32* %y2.ptr
   %z1 = select i1 %cond, i32 %x, i32 %y1
   %z2 = select i1 %cond, i32 %y2, i32 %x
-; CHECK:         cmoval
-; CHECK:         cmoval
   %s1 = add i32 %z1, %z2
   ret i32 %s1
 }
@@ -426,18 +618,19 @@ entry:
 ; the group.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr(i32 %a, i32 %b, i32* %x, i32* %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB12_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB12_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edi, %eax
-; CHECK:         cmpl
   %p = select i1 %cond, i32* %x, i32* %y
   %load = load i32, i32* %p
   %z = select i1 %cond, i32 %a, i32 %load
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movl (%r{{..}}), %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         retq
   ret i32 %z
 }
 
@@ -445,20 +638,21 @@ entry:
 ; uses the result of the other as part of the address.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr2(i32 %a, i32 %b, i32* %x, i32** %y) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB13_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movq (%rcx), %rax
+; CHECK-NEXT:    movl (%rax), %eax
+; CHECK-NEXT:  .LBB13_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edi, %eax
-; CHECK:         cmpl
   %load1 = load i32*, i32** %y
   %p = select i1 %cond, i32* %x, i32* %load1
   %load2 = load i32, i32* %p
   %z = select i1 %cond, i32 %a, i32 %load2
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movq (%r{{..}}), %[[R1:.*]]
-; CHECK:         movl (%[[R1]]), %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         retq
   ret i32 %z
 }
 
@@ -467,19 +661,20 @@ entry:
 ; where that cmov gets *its* input from a prior cmov in the group.
 define i32 @test_cmov_memoperand_in_group_reuse_for_addr3(i32 %a, i32 %b, i32* %x, i32* %y, i32* %z) #0 {
 ; CHECK-LABEL: test_cmov_memoperand_in_group_reuse_for_addr3:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    cmpl %esi, %edi
+; CHECK-NEXT:    ja .LBB14_2
+; CHECK-NEXT:  # %bb.1: # %entry
+; CHECK-NEXT:    movl (%rcx), %eax
+; CHECK-NEXT:  .LBB14_2: # %entry
+; CHECK-NEXT:    retq
 entry:
   %cond = icmp ugt i32 %a, %b
-; CHECK:         movl %edi, %eax
-; CHECK:         cmpl
   %p = select i1 %cond, i32* %x, i32* %y
   %p2 = select i1 %cond, i32* %z, i32* %p
   %load = load i32, i32* %p2
   %r = select i1 %cond, i32 %a, i32 %load
-; CHECK-NOT:     cmov
-; CHECK:         ja [[FALSE_BB:.*]]
-; CHECK:         movl (%r{{..}}), %eax
-; CHECK:       [[FALSE_BB]]:
-; CHECK:         retq
   ret i32 %r
 }
 
@@ -495,34 +690,35 @@ define void @test_memoperand_loop(i32 %data) #0 {
 ; CHECK-NEXT:    movq (%rcx), %rdx
 ; CHECK-NEXT:    xorl %esi, %esi
 ; CHECK-NEXT:    movq %rax, %rcx
-entry:
-  %begin = load i32*, i32** @begin, align 8
-  %end = load i32*, i32** @end, align 8
-  br label %loop.body
-
-; CHECK-NEXT:  .LBB13_1: # %loop.body
+; CHECK-NEXT:  .LBB15_1: # %loop.body
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    addq $8, %rcx
 ; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    ja .LBB13_3
+; CHECK-NEXT:    ja .LBB15_3
 ; CHECK-NEXT:  # %bb.2: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
 ; CHECK-NEXT:    movq (%r8), %rcx
-; CHECK-NEXT:  .LBB13_3: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:  .LBB15_3: # %loop.body
+; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
 ; CHECK-NEXT:    movl %edi, (%rcx)
 ; CHECK-NEXT:    addq $8, %rcx
 ; CHECK-NEXT:    cmpq %rdx, %rcx
-; CHECK-NEXT:    ja .LBB13_5
+; CHECK-NEXT:    ja .LBB15_5
 ; CHECK-NEXT:  # %bb.4: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
 ; CHECK-NEXT:    movq %rax, %rcx
-; CHECK-NEXT:  .LBB13_5: # %loop.body
-; CHECK-NEXT:    # in Loop: Header=BB13_1 Depth=1
+; CHECK-NEXT:  .LBB15_5: # %loop.body
+; CHECK-NEXT:    # in Loop: Header=BB15_1 Depth=1
 ; CHECK-NEXT:    movl %edi, (%rcx)
 ; CHECK-NEXT:    addl $1, %esi
 ; CHECK-NEXT:    cmpl $1024, %esi # imm = 0x400
-; CHECK-NEXT:    jl .LBB13_1
+; CHECK-NEXT:    jl .LBB15_1
+; CHECK-NEXT:  # %bb.6: # %exit
+; CHECK-NEXT:    retq
+entry:
+  %begin = load i32*, i32** @begin, align 8
+  %end = load i32*, i32** @end, align 8
+  br label %loop.body
 loop.body:
   %phi.iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.body ]
   %phi.ptr = phi i32* [ %begin, %entry ], [ %dst2, %loop.body ]
@@ -538,11 +734,9 @@ loop.body:
   %iv.next = add i32 %phi.iv, 1
   %cond = icmp slt i32 %iv.next, 1024
   br i1 %cond, label %loop.body, label %exit
-
-; CHECK-NEXT:  # %bb.6: # %exit
-; CHECK-NEXT:    retq
 exit:
   ret void
 }
 
 attributes #0 = {"target-cpu"="x86-64"}
+!0 = !{}

From 7dc705f86dd3a0fd8afd6f7db3ea132e2a36fa61 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Tue, 25 Jan 2022 15:26:32 -0500
Subject: [PATCH 607/946] [libc++][NFC] Fix typo

---
 libcxx/src/filesystem/directory_iterator.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/src/filesystem/directory_iterator.cpp b/libcxx/src/filesystem/directory_iterator.cpp
index 90b255d9877f9..98928f06c868a 100644
--- a/libcxx/src/filesystem/directory_iterator.cpp
+++ b/libcxx/src/filesystem/directory_iterator.cpp
@@ -197,9 +197,9 @@ class __dir_stream {
       : __stream_(nullptr), __root_(root) {
     if ((__stream_ = ::opendir(root.c_str())) == nullptr) {
       ec = detail::capture_errno();
-      const bool allow_eacess =
+      const bool allow_eacces =
           bool(opts & directory_options::skip_permission_denied);
-      if (allow_eacess && ec.value() == EACCES)
+      if (allow_eacces && ec.value() == EACCES)
         ec.clear();
       return;
     }

From d2cc23a337ddf4e12c5f49664099be2874b16db8 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 25 Jan 2022 12:39:32 -0800
Subject: [PATCH 608/946] [docs] HowToCrossCompileLLVM.rst: prefer --target=
 over legacy -target

---
 llvm/docs/HowToCrossCompileLLVM.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/HowToCrossCompileLLVM.rst b/llvm/docs/HowToCrossCompileLLVM.rst
index d2dc7bf60e5cb..e1ad8e5f5f4ff 100644
--- a/llvm/docs/HowToCrossCompileLLVM.rst
+++ b/llvm/docs/HowToCrossCompileLLVM.rst
@@ -58,7 +58,7 @@ specific Linux distribution, version or GCC layout, so you'll need to fudge.
 
 In addition to the ones above, you'll also need:
 
- * ``'-target arm-linux-gnueabihf'`` or whatever is the triple of your cross GCC.
+ * ``--target=arm-linux-gnueabihf`` or whatever is the triple of your cross GCC.
  * ``'--sysroot=/usr/arm-linux-gnueabihf'``, ``'--sysroot=/opt/gcc/arm-linux-gnueabihf'``
    or whatever is the location of your GCC's sysroot (where /lib, /bin etc are).
  * Appropriate use of ``-I`` and ``-L``, depending on how the cross GCC is installed,

From 5b55e733a9c3ce471f28d10492f02de9cd213f54 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Tue, 25 Jan 2022 04:32:33 -0500
Subject: [PATCH 609/946] Remove unused <algorithm> include

---
 llvm/include/llvm/ADT/AllocatorList.h        | 1 -
 llvm/include/llvm/ADT/CoalescingBitVector.h  | 1 -
 llvm/include/llvm/ADT/DenseSet.h             | 1 -
 llvm/include/llvm/ADT/MapVector.h            | 1 -
 llvm/include/llvm/ADT/PriorityWorklist.h     | 1 -
 llvm/include/llvm/ADT/SetVector.h            | 1 -
 llvm/include/llvm/Support/BinaryByteStream.h | 1 -
 llvm/include/llvm/Support/Error.h            | 1 -
 llvm/include/llvm/Support/ScopedPrinter.h    | 3 +--
 9 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/llvm/include/llvm/ADT/AllocatorList.h b/llvm/include/llvm/ADT/AllocatorList.h
index 404a657f27de4..04d0afc9d076e 100644
--- a/llvm/include/llvm/ADT/AllocatorList.h
+++ b/llvm/include/llvm/ADT/AllocatorList.h
@@ -13,7 +13,6 @@
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/simple_ilist.h"
 #include "llvm/Support/Allocator.h"
-#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <iterator>
diff --git a/llvm/include/llvm/ADT/CoalescingBitVector.h b/llvm/include/llvm/ADT/CoalescingBitVector.h
index 82e2e1a9f9e25..6935c255a099d 100644
--- a/llvm/include/llvm/ADT/CoalescingBitVector.h
+++ b/llvm/include/llvm/ADT/CoalescingBitVector.h
@@ -21,7 +21,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
-#include <algorithm>
 #include <initializer_list>
 
 namespace llvm {
diff --git a/llvm/include/llvm/ADT/DenseSet.h b/llvm/include/llvm/ADT/DenseSet.h
index edce7c43773cf..e767211a09000 100644
--- a/llvm/include/llvm/ADT/DenseSet.h
+++ b/llvm/include/llvm/ADT/DenseSet.h
@@ -17,7 +17,6 @@
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/type_traits.h"
-#include <algorithm>
 #include <cstddef>
 #include <initializer_list>
 #include <iterator>
diff --git a/llvm/include/llvm/ADT/MapVector.h b/llvm/include/llvm/ADT/MapVector.h
index f9540999381aa..d281166b3e19b 100644
--- a/llvm/include/llvm/ADT/MapVector.h
+++ b/llvm/include/llvm/ADT/MapVector.h
@@ -18,7 +18,6 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
-#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <iterator>
diff --git a/llvm/include/llvm/ADT/PriorityWorklist.h b/llvm/include/llvm/ADT/PriorityWorklist.h
index 01dd59a2e71a3..e9fbf296973d2 100644
--- a/llvm/include/llvm/ADT/PriorityWorklist.h
+++ b/llvm/include/llvm/ADT/PriorityWorklist.h
@@ -19,7 +19,6 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
-#include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <iterator>
diff --git a/llvm/include/llvm/ADT/SetVector.h b/llvm/include/llvm/ADT/SetVector.h
index 32bcd50966cca..82d5e98afb5dc 100644
--- a/llvm/include/llvm/ADT/SetVector.h
+++ b/llvm/include/llvm/ADT/SetVector.h
@@ -23,7 +23,6 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Compiler.h"
-#include <algorithm>
 #include <cassert>
 #include <iterator>
 #include <vector>
diff --git a/llvm/include/llvm/Support/BinaryByteStream.h b/llvm/include/llvm/Support/BinaryByteStream.h
index 7d8b6d2dc43d0..dc4adba26f160 100644
--- a/llvm/include/llvm/Support/BinaryByteStream.h
+++ b/llvm/include/llvm/Support/BinaryByteStream.h
@@ -17,7 +17,6 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileOutputBuffer.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include <algorithm>
 #include <cstdint>
 #include <cstring>
 #include <memory>
diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h
index 3997f0ea6db79..881049b15b0df 100644
--- a/llvm/include/llvm/Support/Error.h
+++ b/llvm/include/llvm/Support/Error.h
@@ -25,7 +25,6 @@
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
 #include <cassert>
 #include <cstdint>
 #include <cstdlib>
diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h
index 803ae47793df0..9bde4f455a2df 100644
--- a/llvm/include/llvm/Support/ScopedPrinter.h
+++ b/llvm/include/llvm/Support/ScopedPrinter.h
@@ -18,7 +18,6 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/JSON.h"
 #include "llvm/Support/raw_ostream.h"
-#include <algorithm>
 
 namespace llvm {
 
@@ -123,7 +122,7 @@ class ScopedPrinter {
   void indent(int Levels = 1) { IndentLevel += Levels; }
 
   void unindent(int Levels = 1) {
-    IndentLevel = std::max(0, IndentLevel - Levels);
+    IndentLevel = IndentLevel > Levels ? IndentLevel - Levels : 0;
   }
 
   void resetIndent() { IndentLevel = 0; }

From 6427f4c52c31cc36004b14825e6598cd4a43f385 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Tue, 25 Jan 2022 15:48:51 -0500
Subject: [PATCH 610/946] [NFC] Use an llvm::DenseMap instead of std::map in
 CategorizedHelpPrinter::printOptions

---
 llvm/lib/Support/CommandLine.cpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index ed4f01f176c2f..e517ceb05d9bd 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -45,7 +45,6 @@
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
-#include <map>
 #include <string>
 using namespace llvm;
 using namespace cl;
@@ -2339,7 +2338,7 @@ class CategorizedHelpPrinter : public HelpPrinter {
 protected:
   void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) override {
     std::vector<OptionCategory *> SortedCategories;
-    std::map<OptionCategory *, std::vector<Option *>> CategorizedOptions;
+    DenseMap<OptionCategory *, std::vector<Option *>> CategorizedOptions;
 
     // Collect registered option categories into vector in preparation for
     // sorting.
@@ -2351,10 +2350,6 @@ class CategorizedHelpPrinter : public HelpPrinter {
     array_pod_sort(SortedCategories.begin(), SortedCategories.end(),
                    OptionCategoryCompare);
 
-    // Create map to empty vectors.
-    for (OptionCategory *Category : SortedCategories)
-      CategorizedOptions[Category] = std::vector<Option *>();
-
     // Walk through pre-sorted options and assign into categories.
     // Because the options are already alphabetically sorted the
     // options within categories will also be alphabetically sorted.

From f170595249196f1ca5c3a70118fedc1043c093b3 Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Tue, 25 Jan 2022 12:29:11 -0800
Subject: [PATCH 611/946] [InstrProf][Correlator] Do not compress names when
 reading debug info

There is no need to compress the names string when correlating with
debug info since InstrProfReader will immediately uncompress it anyway.
This also removes the dependency on zlib in this case.

Reviewed By: kyulee

Differential Revision: https://reviews.llvm.org/D118176
---
 .../Darwin/instrprof-debug-info-correlate.c     |  2 --
 .../Linux/instrprof-debug-info-correlate.c      |  2 --
 .../llvm/ProfileData/InstrProfCorrelator.h      | 17 +++++++----------
 llvm/lib/ProfileData/InstrProfCorrelator.cpp    |  8 ++++----
 llvm/lib/ProfileData/InstrProfReader.cpp        |  4 ++--
 5 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c
index 837e1f428d1e5..f628f928576a3 100644
--- a/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c
+++ b/compiler-rt/test/profile/Darwin/instrprof-debug-info-correlate.c
@@ -1,5 +1,3 @@
-// REQUIRES: zlib
-
 // Value profiling is currently not supported in lightweight mode.
 // RUN: %clang_pgogen -o %t -g -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
 // RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t
diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
index 4f46586a16999..c5f2a65c000df 100644
--- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
+++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate.c
@@ -1,5 +1,3 @@
-// REQUIRES: zlib
-
 // Value profiling is currently not supported in lightweight mode.
 // RUN: %clang_pgogen -o %t.normal -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp
 // RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t.normal
diff --git a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
index 81ecbc2813ab1..e254169c447a9 100644
--- a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
+++ b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
@@ -83,14 +83,11 @@ class InstrProfCorrelatorImpl : public InstrProfCorrelator {
   /// Return the number of ProfileData elements.
   size_t getDataSize() const { return Data.size(); }
 
-  /// Return a pointer to the compressed names string that this class
-  /// constructs.
-  const char *getCompressedNamesPointer() const {
-    return CompressedNames.c_str();
-  }
+  /// Return a pointer to the names string that this class constructs.
+  const char *getNamesPointer() const { return Names.c_str(); }
 
-  /// Return the number of bytes in the compressed names string.
-  size_t getCompressedNamesSize() const { return CompressedNames.size(); }
+  /// Return the number of bytes in the names string.
+  size_t getNamesSize() const { return Names.size(); }
 
   static llvm::Expected<std::unique_ptr<InstrProfCorrelatorImpl<IntPtrT>>>
   get(std::unique_ptr<InstrProfCorrelator::Context> Ctx,
@@ -98,7 +95,7 @@ class InstrProfCorrelatorImpl : public InstrProfCorrelator {
 
 protected:
   std::vector<RawInstrProf::ProfileData<IntPtrT>> Data;
-  std::string CompressedNames;
+  std::string Names;
 
   Error correlateProfileData() override;
   virtual void correlateProfileDataImpl() = 0;
@@ -110,7 +107,7 @@ class InstrProfCorrelatorImpl : public InstrProfCorrelator {
   InstrProfCorrelatorImpl(InstrProfCorrelatorKind Kind,
                           std::unique_ptr<InstrProfCorrelator::Context> Ctx)
       : InstrProfCorrelator(Kind, std::move(Ctx)){};
-  std::vector<std::string> Names;
+  std::vector<std::string> NamesVec;
   llvm::DenseSet<IntPtrT> CounterOffsets;
 
   // Byte-swap the value if necessary.
@@ -140,7 +137,7 @@ class DwarfInstrProfCorrelator : public InstrProfCorrelatorImpl<IntPtrT> {
   static bool isDIEOfProbe(const DWARFDie &Die);
 
   /// Iterate over DWARF DIEs to find those that symbolize instrumentation
-  /// probes and construct the ProfileData vector and CompressedNames string.
+  /// probes and construct the ProfileData vector and Names string.
   ///
   /// Here is some example DWARF for an instrumentation probe we are looking
   /// for:
diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
index 5f06541916ddd..4bd9a3df950d3 100644
--- a/llvm/lib/ProfileData/InstrProfCorrelator.cpp
+++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
@@ -125,12 +125,12 @@ InstrProfCorrelatorImpl<IntPtrT>::get(
 
 template <class IntPtrT>
 Error InstrProfCorrelatorImpl<IntPtrT>::correlateProfileData() {
-  assert(Data.empty() && CompressedNames.empty() && Names.empty());
+  assert(Data.empty() && Names.empty() && NamesVec.empty());
   correlateProfileDataImpl();
   auto Result =
-      collectPGOFuncNameStrings(Names, /*doCompression=*/true, CompressedNames);
+      collectPGOFuncNameStrings(NamesVec, /*doCompression=*/false, Names);
   CounterOffsets.clear();
-  Names.clear();
+  NamesVec.clear();
   return Result;
 }
 
@@ -155,7 +155,7 @@ void InstrProfCorrelatorImpl<IntPtrT>::addProbe(StringRef FunctionName,
       maybeSwap<uint32_t>(NumCounters),
       /*NumValueSites=*/{maybeSwap<uint16_t>(0), maybeSwap<uint16_t>(0)},
   });
-  Names.push_back(FunctionName.str());
+  NamesVec.push_back(FunctionName.str());
 }
 
 template <class IntPtrT>
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 2544e5bcf6476..861ff61df510f 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -411,8 +411,8 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
     assert(CountersDelta == 0 && NamesDelta == 0);
     Data = Correlator->getDataPointer();
     DataEnd = Data + Correlator->getDataSize();
-    NamesStart = Correlator->getCompressedNamesPointer();
-    NamesEnd = NamesStart + Correlator->getCompressedNamesSize();
+    NamesStart = Correlator->getNamesPointer();
+    NamesEnd = NamesStart + Correlator->getNamesSize();
   } else {
     Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
         Start + DataOffset);

From 8f5b1d9e146250affd69c08989a2885b77f8463c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Bylica?= <chfast@gmail.com>
Date: Sun, 23 Jan 2022 18:24:17 +0100
Subject: [PATCH 612/946] [test][DAGCombine] Add tests for cmp+add -> addcarry

Tests for https://reviews.llvm.org/D118037.
---
 llvm/test/CodeGen/X86/addcarry.ll | 64 +++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll
index f8f7d7ed2c556..1f66d2c5a6c9d 100644
--- a/llvm/test/CodeGen/X86/addcarry.ll
+++ b/llvm/test/CodeGen/X86/addcarry.ll
@@ -1033,3 +1033,67 @@ define void @add_U256_without_i128_or_recursive(%uint256* sret(%uint256) %0, %ui
   store i64 %37, i64* %41, align 8
   ret void
 }
+
+define i32 @addcarry_ult(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-LABEL: addcarry_ult:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal (%rdi,%rsi), %eax
+; CHECK-NEXT:    cmpl %ecx, %edx
+; CHECK-NEXT:    adcl $0, %eax
+; CHECK-NEXT:    retq
+  %s = add i32 %a, %b
+  %k = icmp ult i32 %x, %y
+  %z = zext i1 %k to i32
+  %r = add i32 %s, %z
+  ret i32 %r
+}
+
+define i32 @addcarry_ugt(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-LABEL: addcarry_ugt:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal (%rdi,%rsi), %eax
+; CHECK-NEXT:    cmpl %edx, %ecx
+; CHECK-NEXT:    adcl $0, %eax
+; CHECK-NEXT:    retq
+  %s = add i32 %a, %b
+  %k = icmp ugt i32 %x, %y
+  %z = zext i1 %k to i32
+  %r = add i32 %s, %z
+  ret i32 %r
+}
+
+define i32 @addcarry_ule(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-LABEL: addcarry_ule:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal (%rdi,%rsi), %eax
+; CHECK-NEXT:    cmpl %edx, %ecx
+; CHECK-NEXT:    sbbl $-1, %eax
+; CHECK-NEXT:    retq
+  %s = add i32 %a, %b
+  %k = icmp ule i32 %x, %y
+  %z = zext i1 %k to i32
+  %r = add i32 %s, %z
+  ret i32 %r
+}
+
+define i32 @addcarry_uge(i32 %a, i32 %b, i32 %x, i32 %y) {
+; CHECK-LABEL: addcarry_uge:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $esi killed $esi def $rsi
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal (%rdi,%rsi), %eax
+; CHECK-NEXT:    cmpl %ecx, %edx
+; CHECK-NEXT:    sbbl $-1, %eax
+; CHECK-NEXT:    retq
+  %s = add i32 %a, %b
+  %k = icmp uge i32 %x, %y
+  %z = zext i1 %k to i32
+  %r = add i32 %s, %z
+  ret i32 %r
+}

From f5f377d1fca8ecb4ec5bd2bca656e7c719727404 Mon Sep 17 00:00:00 2001
From: Andrew Litteken <andrew.litteken@gmail.com>
Date: Sun, 2 Jan 2022 14:31:47 -0600
Subject: [PATCH 613/946] [IRSim][IROutliner] Adding support for recognizing
 and outlining indirect function calls, and function calls with different
 names, but the same type

The outliner currently requires that function calls not be indirect calls, and have that the function name, and function type must match, as well as other attributes such as calling conventions. This patch treats called functions as values, and just another operand, and named function calls as constants. This allows functions to be treated like any other constant, or input and output into the outlined functions.

There are also debugging flags added to enforce the old behaviors where indirect calls not be allowed, and to enforce the old rule that function calls names must also match.

Reviewers: paquette, jroelofs

Differential Revision: https://reviews.llvm.org/D109448
---
 .../llvm/Analysis/IRSimilarityIdentifier.h    | 64 ++++++++++++-
 llvm/include/llvm/Transforms/IPO/IROutliner.h |  9 +-
 llvm/lib/Analysis/IRSimilarityIdentifier.cpp  | 52 ++++++++---
 llvm/lib/Transforms/IPO/IROutliner.cpp        |  5 +
 .../IROutliner/illegal-indirect-calls.ll      |  7 +-
 .../IROutliner/legal-indirect-calls.ll        | 71 ++++++++++++++
 .../IROutliner/outlining-call-and-indirect.ll | 70 ++++++++++++++
 .../outlining-calls-names-must-match.ll       | 93 +++++++++++++++++++
 .../Transforms/IROutliner/outlining-calls.ll  | 83 +++++++++--------
 .../Analysis/IRSimilarityIdentifierTest.cpp   | 65 ++++++++++++-
 10 files changed, 454 insertions(+), 65 deletions(-)
 create mode 100644 llvm/test/Transforms/IROutliner/legal-indirect-calls.ll
 create mode 100644 llvm/test/Transforms/IROutliner/outlining-call-and-indirect.ll
 create mode 100644 llvm/test/Transforms/IROutliner/outlining-calls-names-must-match.ll

diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 2f8ae205657d8..966bf02d128ec 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -128,6 +128,16 @@ struct IRInstructionData
   /// to a less than form.  It is None otherwise.
   Optional<CmpInst::Predicate> RevisedPredicate;
 
+  /// This is only relevant if we are wrapping a CallInst. If we are requiring
+  /// that the function calls have matching names as well as types, and the
+  /// call is not an indirect call, this will hold the name of the function.  If
+  /// it is an indirect string, it will be the empty string.  However, if this
+  /// requirement is not in place it will be the empty string regardless of the
+  /// function call type.  The value held here is used to create the hash of the
+  /// instruction, and check to make sure two instructions are close to one
+  /// another.
+  Optional<std::string> CalleeName;
+
   /// This structure holds the distances of how far "ahead of" or "behind" the
   /// target blocks of a branch, or the incoming blocks of a phi nodes are.
   /// If the value is negative, it means that the block was registered before
@@ -168,6 +178,10 @@ struct IRInstructionData
   /// instruction. the IRInstructionData must be wrapping a CmpInst.
   CmpInst::Predicate getPredicate() const;
 
+  /// Get the callee name that the call instruction is using for hashing the
+  /// instruction. The IRInstructionData must be wrapping a CallInst.
+  StringRef getCalleeName() const;
+
   /// A function that swaps the predicates to their less than form if they are
   /// in a greater than form. Otherwise, the predicate is unchanged.
   ///
@@ -185,6 +199,21 @@ struct IRInstructionData
   void
   setBranchSuccessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
 
+  /// For an IRInstructionData containing a CallInst, set the function name
+  /// appropriately.  This will be an empty string if it is an indirect call,
+  /// or we are not matching by name of the called function.  It will be the
+  /// name of the function if \p MatchByName is true and it is not an indirect
+  /// call.  We may decide not to match by name in order to expand the
+  /// size of the regions we can match.  If a function name has the same type
+  /// signature, but the different name, the region of code is still almost the
+  /// same.  Since function names can be treated as constants, the name itself
+  /// could be extrapolated away.  However, matching by name provides a
+  /// specificity and more "identical" code than not matching by name.
+  ///
+  /// \param MatchByName - A flag to mark whether we are using the called
+  /// function name as a differentiating parameter.
+  void setCalleeName(bool MatchByName = true);
+
   /// Hashes \p Value based on its opcode, types, and operand types.
   /// Two IRInstructionData instances produce the same hash when they perform
   /// the same operation.
@@ -223,12 +252,14 @@ struct IRInstructionData
           llvm::hash_value(ID.Inst->getType()),
           llvm::hash_value(ID.getPredicate()),
           llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
-    else if (CallInst *CI = dyn_cast<CallInst>(ID.Inst))
+    else if (CallInst *CI = dyn_cast<CallInst>(ID.Inst)) {
+      std::string FunctionName = *ID.CalleeName;
       return llvm::hash_combine(
           llvm::hash_value(ID.Inst->getOpcode()),
           llvm::hash_value(ID.Inst->getType()),
-          llvm::hash_value(CI->getCalledFunction()->getName().str()),
+          llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName),
           llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
+    }
     return llvm::hash_combine(
         llvm::hash_value(ID.Inst->getOpcode()),
         llvm::hash_value(ID.Inst->getType()),
@@ -346,6 +377,10 @@ struct IRInstructionMapper {
   /// to be considered for similarity.
   bool HaveLegalRange = false;
 
+  /// Marks whether we should use exact function names, as well as types to
+  /// find similarity between calls.
+  bool EnableMatchCallsByName = false;
+
   /// This allocator pointer is in charge of holding on to the IRInstructionData
   /// so it is not deallocated until whatever external tool is using it is done
   /// with the information.
@@ -483,7 +518,10 @@ struct IRInstructionMapper {
     // is not an indirect call.
     InstrType visitCallInst(CallInst &CI) {
       Function *F = CI.getCalledFunction();
-      if (!F || CI.isIndirectCall() || !F->hasName())
+      bool IsIndirectCall = CI.isIndirectCall();
+      if (IsIndirectCall && !EnableIndirectCalls)
+        return Illegal;
+      if (!F && !IsIndirectCall)
         return Illegal;
       return Legal;
     }
@@ -498,6 +536,10 @@ struct IRInstructionMapper {
     // The flag variable that lets the classifier know whether we should
     // allow branches to be checked for similarity.
     bool EnableBranches = false;
+
+    // The flag variable that lets the classifier know whether we should
+    // allow indirect calls to be considered legal instructions.
+    bool EnableIndirectCalls = false;
   };
 
   /// Maps an Instruction to a member of InstrType.
@@ -882,9 +924,12 @@ typedef std::vector<SimilarityGroup> SimilarityGroupList;
 /// analyzing the module.
 class IRSimilarityIdentifier {
 public:
-  IRSimilarityIdentifier(bool MatchBranches = true)
+  IRSimilarityIdentifier(bool MatchBranches = true,
+                         bool MatchIndirectCalls = true,
+                         bool MatchCallsWithName = false)
       : Mapper(&InstDataAllocator, &InstDataListAllocator),
-        EnableBranches(MatchBranches) {}
+        EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls),
+        EnableMatchingCallsByName(MatchCallsWithName) {}
 
 private:
   /// Map the instructions in the module to unsigned integers, using mapping
@@ -964,6 +1009,15 @@ class IRSimilarityIdentifier {
   /// similarity, or only look within basic blocks.
   bool EnableBranches = true;
 
+  /// The flag variable that marks whether we allow indirect calls to be checked
+  /// for similarity, or exclude them as a legal instruction.
+  bool EnableIndirectCalls = true;
+
+  /// The flag variable that marks whether we allow calls to be marked as
+  /// similar if they do not have the same name, only the same calling
+  /// convention, attributes and type signature.
+  bool EnableMatchingCallsByName = true;
+
   /// The SimilarityGroups found with the most recent run of \ref
   /// findSimilarity. None if there is no recent run.
   Optional<SimilarityGroupList> SimilarityCandidates;
diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index 16f597ab898e5..9799737a529e3 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -365,7 +365,10 @@ class IROutliner {
     // that they have a name in these cases.
     bool visitCallInst(CallInst &CI) {
       Function *F = CI.getCalledFunction();
-      if (!F || CI.isIndirectCall() || !F->hasName())
+      bool IsIndirectCall = CI.isIndirectCall();
+      if (IsIndirectCall && !EnableIndirectCalls)
+        return false;
+      if (!F && !IsIndirectCall)
         return false;
       // Returning twice can cause issues with the state of the function call
       // that were not expected when the function was used, so we do not include
@@ -389,6 +392,10 @@ class IROutliner {
     // The flag variable that marks whether we should allow branch instructions
     // to be outlined.
     bool EnableBranches = false;
+
+    // The flag variable that marks whether we should allow indirect calls
+    // to be outlined.
+    bool EnableIndirectCalls = true;
   };
 
   /// A InstVisitor used to exclude certain instructions from being outlined.
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index ea08e50bf7283..8c3c0400d6015 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -31,6 +31,17 @@ cl::opt<bool>
                              "across branches for debugging purposes."));
 } // namespace llvm
 
+cl::opt<bool>
+    DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
+                         cl::ReallyHidden,
+                         cl::desc("disable outlining indirect calls."));
+
+cl::opt<bool>
+    MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
+                     cl::desc("only allow matching call instructions if the "
+                              "name and type signature match."));
+
+
 IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
                                      IRInstructionDataList &IDList)
     : Inst(&I), Legal(Legality), IDL(&IDList) {
@@ -88,6 +99,15 @@ void IRInstructionData::setBranchSuccessors(
   }
 }
 
+void IRInstructionData::setCalleeName(bool MatchByName) {
+  CallInst *CI = dyn_cast<CallInst>(Inst);
+  assert(CI && "Instruction must be call");
+
+  CalleeName = "";
+  if (!CI->isIndirectCall() && MatchByName)
+    CalleeName = CI->getCalledFunction()->getName().str();
+}
+
 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
   switch (CI->getPredicate()) {
   case CmpInst::FCMP_OGT:
@@ -114,10 +134,13 @@ CmpInst::Predicate IRInstructionData::getPredicate() const {
   return cast<CmpInst>(Inst)->getPredicate();
 }
 
-static StringRef getCalledFunctionName(CallInst &CI) {
-  assert(CI.getCalledFunction() != nullptr && "Called Function is nullptr?");
+StringRef IRInstructionData::getCalleeName() const {
+  assert(isa<CallInst>(Inst) &&
+         "Can only get a name from a call instruction");
 
-  return CI.getCalledFunction()->getName();
+  assert(CalleeName.hasValue() && "CalleeName has not been set");
+
+  return *CalleeName;
 }
 
 bool IRSimilarity::isClose(const IRInstructionData &A,
@@ -172,13 +195,11 @@ bool IRSimilarity::isClose(const IRInstructionData &A,
                   });
   }
 
-  // If the instructions are functions, we make sure that the function name is
-  // the same.  We already know that the types are since is isSameOperationAs is
-  // true.
+  // If the instructions are functions calls, we make sure that the function
+  // name is the same.  We already know that the types are since is
+  // isSameOperationAs is true.
   if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
-    CallInst *CIA = cast<CallInst>(A.Inst);
-    CallInst *CIB = cast<CallInst>(B.Inst);
-    if (getCalledFunctionName(*CIA).compare(getCalledFunctionName(*CIB)) != 0)
+    if (A.getCalleeName().str().compare(B.getCalleeName().str()) != 0)
       return false;
   }
 
@@ -246,6 +267,9 @@ unsigned IRInstructionMapper::mapToLegalUnsigned(
   if (isa<BranchInst>(*It))
     ID->setBranchSuccessors(BasicBlockToInteger);
 
+  if (isa<CallInst>(*It))
+    ID->setCalleeName(EnableMatchCallsByName);
+
   // Add to the instruction list
   bool WasInserted;
   DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
@@ -1077,6 +1101,8 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
   std::vector<IRInstructionData *> InstrList;
   std::vector<unsigned> IntegerMapping;
   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
+  Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
+  Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
 
   populateMapper(Modules, InstrList, IntegerMapping);
   findCandidates(InstrList, IntegerMapping);
@@ -1087,6 +1113,8 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
   resetSimilarityCandidates();
   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
+  Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
+  Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
 
   std::vector<IRInstructionData *> InstrList;
   std::vector<unsigned> IntegerMapping;
@@ -1107,7 +1135,8 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
 }
 
 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
-  IRSI.reset(new IRSimilarityIdentifier(!DisableBranches));
+  IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
+                                        MatchCallsByName));
   return false;
 }
 
@@ -1125,7 +1154,8 @@ AnalysisKey IRSimilarityAnalysis::Key;
 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
                                                  ModuleAnalysisManager &) {
 
-  auto IRSI = IRSimilarityIdentifier(!DisableBranches);
+  auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
+                                     MatchCallsByName);
   IRSI.findSimilarity(M);
   return IRSI;
 }
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index c3104111864a4..9c79972443fe6 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -40,6 +40,10 @@ namespace llvm {
 extern cl::opt<bool> DisableBranches;
 } // namespace llvm
 
+// A command flag to be used for debugging to indirect calls from similarity
+// matching and outlining.
+extern cl::opt<bool> DisableIndirectCalls;
+
 // Set to true if the user wants the ir outliner to run on linkonceodr linkage
 // functions. This is false by default because the linker can dedupe linkonceodr
 // functions. Since the outliner is confined to a single module (modulo LTO),
@@ -2519,6 +2523,7 @@ bool IROutliner::extractSection(OutlinableRegion &Region) {
 unsigned IROutliner::doOutline(Module &M) {
   // Find the possible similarity sections.
   InstructionClassifier.EnableBranches = !DisableBranches;
+  InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
   IRSimilarityIdentifier &Identifier = getIRSI(M);
   SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
 
diff --git a/llvm/test/Transforms/IROutliner/illegal-indirect-calls.ll b/llvm/test/Transforms/IROutliner/illegal-indirect-calls.ll
index 513fa60dc625e..ba57a3c9ebea3 100644
--- a/llvm/test/Transforms/IROutliner/illegal-indirect-calls.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-indirect-calls.ll
@@ -1,9 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost --no-ir-sim-indirect-calls < %s | FileCheck %s
 
-; This test checks that we do not outline indirect calls.  We cannot guarantee
-; that we have the same name in these cases, so two indirect calls cannot
-; be considered similar.
+; This test checks that we do not outline indirect calls when it is specified
+; that we should not.
 
 declare void @f1(i32*, i32*);
 declare void @f2(i32*, i32*);
diff --git a/llvm/test/Transforms/IROutliner/legal-indirect-calls.ll b/llvm/test/Transforms/IROutliner/legal-indirect-calls.ll
new file mode 100644
index 0000000000000..489d8b59e1f15
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/legal-indirect-calls.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; This test checks that we do outline indirect calls when it is not specified
+; that we should not.
+
+declare void @f1(i32*, i32*);
+declare void @f2(i32*, i32*);
+
+define void @function1(void()* %func) {
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  call void %func()
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+define void @function2(void()* %func) {
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  call void %func()
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]], void ()* [[FUNC:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]], void ()* [[FUNC:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal void  @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    store i32 2, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    call void [[TMP3:%.*]]()
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/outlining-call-and-indirect.ll b/llvm/test/Transforms/IROutliner/outlining-call-and-indirect.ll
new file mode 100644
index 0000000000000..b58564ff45932
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/outlining-call-and-indirect.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; This test checks that we do can outline indirect and regular function calls
+; when the type matches when it is not specified that the names must match.
+
+declare void @f1();
+
+define void @function1(void()* %func) {
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  call void %func()
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+define void @function2() {
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  call void @f1()
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]], void ()* [[FUNC:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]], void ()* @f1)
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    store i32 2, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    call void [[TMP3:%.*]]()
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/outlining-calls-names-must-match.ll b/llvm/test/Transforms/IROutliner/outlining-calls-names-must-match.ll
new file mode 100644
index 0000000000000..d7bc171d15ee2
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/outlining-calls-names-must-match.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -ir-sim-calls-by-name < %s | FileCheck %s
+
+; This test checks that we do can outline calls, but only if they have the same
+; function type and the same name.
+
+declare void @f1(i32*, i32*);
+declare void @f2(i32*, i32*);
+
+define void @function1() {
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  call void @f1(i32* %a, i32* %b)
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+define void @function2() {
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  call void @f1(i32* %a, i32* %b)
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+define void @function3() {
+; CHECK-LABEL: @function3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 2, i32* [[A]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[B]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[C]], align 4
+; CHECK-NEXT:    call void @f2(i32* [[A]], i32* [[B]])
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[B]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  store i32 4, i32* %c, align 4
+  call void @f2(i32* %a, i32* %b)
+  %al = load i32, i32* %a
+  %bl = load i32, i32* %b
+  %cl = load i32, i32* %c
+  ret void
+}
+
+; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
+; CHECK: entry_to_outline:
+; CHECK-NEXT:    store i32 2, i32* [[ARG0]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[ARG1]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[ARG2]], align 4
+; CHECK-NEXT:    call void @f1(i32* [[ARG0]], i32* [[ARG1]])
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
diff --git a/llvm/test/Transforms/IROutliner/outlining-calls.ll b/llvm/test/Transforms/IROutliner/outlining-calls.ll
index 2372c4f674964..f925b2d0a72f9 100644
--- a/llvm/test/Transforms/IROutliner/outlining-calls.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-calls.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
 ; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
 
 ; This test checks that we do can outline calls, but only if they have the same
@@ -8,14 +8,6 @@ declare void @f1(i32*, i32*);
 declare void @f2(i32*, i32*);
 
 define void @function1() {
-; CHECK-LABEL: @function1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
-; CHECK-NEXT:    ret void
-;
 entry:
   %a = alloca i32, align 4
   %b = alloca i32, align 4
@@ -31,14 +23,6 @@ entry:
 }
 
 define void @function2() {
-; CHECK-LABEL: @function2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
-; CHECK-NEXT:    ret void
-;
 entry:
   %a = alloca i32, align 4
   %b = alloca i32, align 4
@@ -54,20 +38,6 @@ entry:
 }
 
 define void @function3() {
-; CHECK-LABEL: @function3(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 2, i32* [[A]], align 4
-; CHECK-NEXT:    store i32 3, i32* [[B]], align 4
-; CHECK-NEXT:    store i32 4, i32* [[C]], align 4
-; CHECK-NEXT:    call void @f2(i32* [[A]], i32* [[B]])
-; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[A]], align 4
-; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[B]], align 4
-; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[C]], align 4
-; CHECK-NEXT:    ret void
-;
 entry:
   %a = alloca i32, align 4
   %b = alloca i32, align 4
@@ -82,12 +52,45 @@ entry:
   ret void
 }
 
-; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
-; CHECK: entry_to_outline:
-; CHECK-NEXT:    store i32 2, i32* [[ARG0]], align 4
-; CHECK-NEXT:    store i32 3, i32* [[ARG1]], align 4
-; CHECK-NEXT:    store i32 4, i32* [[ARG2]], align 4
-; CHECK-NEXT:    call void @f1(i32* [[ARG0]], i32* [[ARG1]])
-; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
-; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
-; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]], void (i32*, i32*)* @f1)
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]], void (i32*, i32*)* @f1)
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]], void (i32*, i32*)* @f2)
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    store i32 2, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    store i32 4, i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    call void [[TMP3:%.*]](i32* [[TMP0]], i32* [[TMP1]])
+; CHECK-NEXT:    [[AL:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[BL:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    [[CL:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
index 8371028ba4354..a8781e8f78af7 100644
--- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
+++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
@@ -933,8 +933,8 @@ TEST(IRInstructionMapper, GetElementPtrDifferentInBounds) {
   ASSERT_NE(UnsignedVec[0], UnsignedVec[1]);
 }
 
-// Checks that indirect call instructions are mapped to be illegal since we
-// cannot guarantee the same function in two different cases.
+// Checks that indirect call instructions are mapped to be illegal when it is
+// specified to disallow them.
 TEST(IRInstructionMapper, CallsIllegalIndirect) {
   StringRef ModuleString = R"(
                           define i32 @f(void()* %func) {
@@ -951,12 +951,39 @@ TEST(IRInstructionMapper, CallsIllegalIndirect) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIndirectCalls = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
   ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
 }
 
+// Checks that indirect call instructions are mapped to be legal when it is not
+// specified to disallow them.
+TEST(IRInstructionMapper, CallsLegalIndirect) {
+  StringRef ModuleString = R"(
+                          define i32 @f(void()* %func) {
+                          bb0:
+                             call void %func()
+                             call void %func()
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIndirectCalls = true;
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(3));
+}
+
 // Checks that a call instruction is mapped to be legal.  Here we check that
 // a call with the same name, and same types are mapped to the same
 // value.
@@ -986,8 +1013,8 @@ TEST(IRInstructionMapper, CallsSameTypeSameName) {
 }
 
 // Here we check that a calls with different names, but the same arguments types
-// are mapped to different value.
-TEST(IRInstructionMapper, CallsSameArgTypeDifferentName) {
+// are mapped to different value when specified that the name must match.
+TEST(IRInstructionMapper, CallsSameArgTypeDifferentNameDisallowed) {
   StringRef ModuleString = R"(
                           declare i32 @f1(i32, i32)
                           declare i32 @f2(i32, i32)
@@ -1006,6 +1033,7 @@ TEST(IRInstructionMapper, CallsSameArgTypeDifferentName) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.EnableMatchCallsByName = true;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
@@ -1013,6 +1041,35 @@ TEST(IRInstructionMapper, CallsSameArgTypeDifferentName) {
   ASSERT_NE(UnsignedVec[0], UnsignedVec[1]);
 }
 
+// Here we check that a calls with different names, but the same arguments types
+// are mapped to the same value when it is not specifed that they must match.
+TEST(IRInstructionMapper, CallsSameArgTypeDifferentName) {
+  StringRef ModuleString = R"(
+                          declare i32 @f1(i32, i32)
+                          declare i32 @f2(i32, i32)
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = call i32 @f1(i32 %a, i32 %b)
+                             %1 = call i32 @f2(i32 %a, i32 %b)
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.EnableMatchCallsByName = false;
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(3));
+  ASSERT_EQ(UnsignedVec[0], UnsignedVec[1]);
+}
+
 // Here we check that a calls with different names, and different arguments
 // types are mapped to different value.
 TEST(IRInstructionMapper, CallsDifferentArgTypeDifferentName) {

From d65a3b3265d058ce1f0ac82fa4d0826bf1b2bbaf Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Tue, 25 Jan 2022 22:25:57 +0100
Subject: [PATCH 614/946] Fix build issue in assert mode introduced by
 6427f4c52c31cc36004

After 6427f4c52c31cc36004, one should use SortedCategories to check category
validity.
---
 llvm/lib/Support/CommandLine.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index e517ceb05d9bd..c61e3c00737a7 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -2356,7 +2356,9 @@ class CategorizedHelpPrinter : public HelpPrinter {
     for (size_t I = 0, E = Opts.size(); I != E; ++I) {
       Option *Opt = Opts[I].second;
       for (auto &Cat : Opt->Categories) {
-        assert(CategorizedOptions.count(Cat) > 0 &&
+        assert(std::binary_search(SortedCategories.begin(),
+                                  SortedCategories.end(), Cat,
+                                  OptionCategoryCompare) &&
                "Option has an unregistered category");
         CategorizedOptions[Cat].push_back(Opt);
       }

From 1c82fdb3d174b00ceab49e1f6e355c228f45048f Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Tue, 25 Jan 2022 16:31:57 -0500
Subject: [PATCH 615/946] Revert "Fix build issue in assert mode introduced by
 6427f4c52c31cc36004"

This reverts commit d65a3b3265d058ce1f0ac82fa4d0826bf1b2bbaf.
Breaks build everywhere, see e.g. http://45.33.8.238/linux/66344/step_4.txt
or https://lab.llvm.org/buildbot/#/builders/139/builds/16811
---
 llvm/lib/Support/CommandLine.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index c61e3c00737a7..e517ceb05d9bd 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -2356,9 +2356,7 @@ class CategorizedHelpPrinter : public HelpPrinter {
     for (size_t I = 0, E = Opts.size(); I != E; ++I) {
       Option *Opt = Opts[I].second;
       for (auto &Cat : Opt->Categories) {
-        assert(std::binary_search(SortedCategories.begin(),
-                                  SortedCategories.end(), Cat,
-                                  OptionCategoryCompare) &&
+        assert(CategorizedOptions.count(Cat) > 0 &&
                "Option has an unregistered category");
         CategorizedOptions[Cat].push_back(Opt);
       }

From a676bdb5d65bd827a17e82c8b7b89e93cc692501 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Tue, 25 Jan 2022 16:36:38 -0500
Subject: [PATCH 616/946] Revert "[NFC] Use an llvm::DenseMap instead of
 std::map in CategorizedHelpPrinter::printOptions"

This reverts commit 6427f4c52c31cc36004b14825e6598cd4a43f385.
Breaks a bunch of tests, see e.g. http://45.33.8.238/linux/66340/step_7.txt
or https://lab.llvm.org/buildbot/#/builders/139/builds/16807
---
 llvm/lib/Support/CommandLine.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index e517ceb05d9bd..ed4f01f176c2f 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -45,6 +45,7 @@
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
+#include <map>
 #include <string>
 using namespace llvm;
 using namespace cl;
@@ -2338,7 +2339,7 @@ class CategorizedHelpPrinter : public HelpPrinter {
 protected:
   void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) override {
     std::vector<OptionCategory *> SortedCategories;
-    DenseMap<OptionCategory *, std::vector<Option *>> CategorizedOptions;
+    std::map<OptionCategory *, std::vector<Option *>> CategorizedOptions;
 
     // Collect registered option categories into vector in preparation for
     // sorting.
@@ -2350,6 +2351,10 @@ class CategorizedHelpPrinter : public HelpPrinter {
     array_pod_sort(SortedCategories.begin(), SortedCategories.end(),
                    OptionCategoryCompare);
 
+    // Create map to empty vectors.
+    for (OptionCategory *Category : SortedCategories)
+      CategorizedOptions[Category] = std::vector<Option *>();
+
     // Walk through pre-sorted options and assign into categories.
     // Because the options are already alphabetically sorted the
     // options within categories will also be alphabetically sorted.

From e37de5d36e3190283916604342b029859129e2a4 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Tue, 25 Jan 2022 13:29:22 -0800
Subject: [PATCH 617/946] Fix UB in DwarfExpression::emitLegacyZExt()

A shift-left > 63 triggers a UBSAN failure. This patch kicks the can
down the road (to the consumer) by emitting a more compact
representation of the shift computation in DWARF expressions.

Differential Revision: https://reviews.llvm.org/D118183
---
 .../CodeGen/AsmPrinter/DwarfExpression.cpp    | 22 ++++++++++++++++---
 llvm/test/DebugInfo/X86/convert-debugloc.ll   |  9 ++++++++
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 37407c98e75f8..ee932d1051079 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -681,9 +681,25 @@ void DwarfExpression::emitLegacySExt(unsigned FromBits) {
 }
 
 void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
-  // (X & (1 << FromBits - 1))
-  emitOp(dwarf::DW_OP_constu);
-  emitUnsigned((1ULL << FromBits) - 1);
+  // Heuristic to decide the most efficient encoding.
+  // A ULEB can encode 7 1-bits per byte.
+  if (FromBits / 7 < 1+1+1+1+1) {
+    // (X & (1 << FromBits - 1))
+    emitOp(dwarf::DW_OP_constu);
+    emitUnsigned((1ULL << FromBits) - 1);
+  } else {
+    // Note that the DWARF 4 stack consists of pointer-sized elements,
+    // so technically it doesn't make sense to shift left more than 64
+    // bits. We leave that for the consumer to decide though. LLDB for
+    // example uses APInt for the stack elements and can still deal
+    // with this.
+    emitOp(dwarf::DW_OP_lit1);
+    emitOp(dwarf::DW_OP_constu);
+    emitUnsigned(FromBits);
+    emitOp(dwarf::DW_OP_shl);
+    emitOp(dwarf::DW_OP_lit1);
+    emitOp(dwarf::DW_OP_minus);
+  }
   emitOp(dwarf::DW_OP_and);
 }
 
diff --git a/llvm/test/DebugInfo/X86/convert-debugloc.ll b/llvm/test/DebugInfo/X86/convert-debugloc.ll
index 21e41dcc4c2a9..2ff6e96621900 100644
--- a/llvm/test/DebugInfo/X86/convert-debugloc.ll
+++ b/llvm/test/DebugInfo/X86/convert-debugloc.ll
@@ -64,11 +64,17 @@
 ; NOCONV:       DW_AT_location (
 ; NOCONV:         {{.*}}, DW_OP_dup, DW_OP_constu 0x7, DW_OP_shr, DW_OP_lit0, DW_OP_not, DW_OP_mul, DW_OP_constu 0x8, DW_OP_shl, DW_OP_or, DW_OP_stack_value)
 ; NOCONV:       DW_AT_name ("y")
+; NOCONV:     DW_TAG_variable
+; NOCONV:       DW_AT_location (
+; NOCONV:         DW_OP_constu 0x40, DW_OP_lit0, DW_OP_plus, DW_OP_lit1, DW_OP_constu 0x40, DW_OP_shl, DW_OP_lit1, DW_OP_minus, DW_OP_and, DW_OP_stack_value)
+; NOCONV:       DW_AT_name ("z")
 ; NOCONV:     NULL
 ; NOCONV:   DW_TAG_base_type
 ; NOCONV:     DW_AT_name ("signed char")
 ; NOCONV:   DW_TAG_base_type
 ; NOCONV:     DW_AT_name ("int")
+; NOCONV:   DW_TAG_base_type
+; NOCONV:     DW_AT_name ("unsigned long long")
 ; NOCONV:   NULL
 
 
@@ -81,6 +87,7 @@ entry:
 ;; will not attempt to eliminate.  At the moment, only "convert" ops are folded.
 ;; If you have to change the expression, the expected DWO_id also changes.
   call void @llvm.dbg.value(metadata i8 32, metadata !13, metadata !DIExpression(DW_OP_lit0, DW_OP_plus, DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value)), !dbg !15
+  call void @llvm.dbg.value(metadata i8 64, metadata !17, metadata !DIExpression(DW_OP_lit0, DW_OP_plus, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 128, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !15
   ret i8 %x, !dbg !16
 }
 
@@ -111,3 +118,5 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
 !14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
 !15 = !DILocation(line: 3, column: 14, scope: !7)
 !16 = !DILocation(line: 4, column: 3, scope: !7)
+!17 = !DILocalVariable(name: "z", scope: !7, file: !1, line: 3, type: !18)
+!18 = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned)

From 37ef303aa71a1594d42587871d2e1eb1deeeff0f Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Tue, 25 Jan 2022 13:52:44 -0800
Subject: [PATCH 618/946] Add missing include diagnosed by the modules build.

---
 llvm/include/llvm/Support/AllocatorBase.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/include/llvm/Support/AllocatorBase.h b/llvm/include/llvm/Support/AllocatorBase.h
index e5549d111622e..eccced1d1ff47 100644
--- a/llvm/include/llvm/Support/AllocatorBase.h
+++ b/llvm/include/llvm/Support/AllocatorBase.h
@@ -21,6 +21,7 @@
 
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/MemAlloc.h"
+#include <type_traits>
 
 namespace llvm {
 

From 3efa016d4c1af64bcae2f4d64bc141066ee8cd06 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Tue, 25 Jan 2022 13:53:14 -0800
Subject: [PATCH 619/946] Revert accidentally pushed commit. It was not yet
 reviewed.

"Fix UB in DwarfExpression::emitLegacyZExt()"

This reverts commit e37de5d36e3190283916604342b029859129e2a4.
---
 .../CodeGen/AsmPrinter/DwarfExpression.cpp    | 22 +++----------------
 llvm/test/DebugInfo/X86/convert-debugloc.ll   |  9 --------
 2 files changed, 3 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index ee932d1051079..37407c98e75f8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -681,25 +681,9 @@ void DwarfExpression::emitLegacySExt(unsigned FromBits) {
 }
 
 void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
-  // Heuristic to decide the most efficient encoding.
-  // A ULEB can encode 7 1-bits per byte.
-  if (FromBits / 7 < 1+1+1+1+1) {
-    // (X & (1 << FromBits - 1))
-    emitOp(dwarf::DW_OP_constu);
-    emitUnsigned((1ULL << FromBits) - 1);
-  } else {
-    // Note that the DWARF 4 stack consists of pointer-sized elements,
-    // so technically it doesn't make sense to shift left more than 64
-    // bits. We leave that for the consumer to decide though. LLDB for
-    // example uses APInt for the stack elements and can still deal
-    // with this.
-    emitOp(dwarf::DW_OP_lit1);
-    emitOp(dwarf::DW_OP_constu);
-    emitUnsigned(FromBits);
-    emitOp(dwarf::DW_OP_shl);
-    emitOp(dwarf::DW_OP_lit1);
-    emitOp(dwarf::DW_OP_minus);
-  }
+  // (X & (1 << FromBits - 1))
+  emitOp(dwarf::DW_OP_constu);
+  emitUnsigned((1ULL << FromBits) - 1);
   emitOp(dwarf::DW_OP_and);
 }
 
diff --git a/llvm/test/DebugInfo/X86/convert-debugloc.ll b/llvm/test/DebugInfo/X86/convert-debugloc.ll
index 2ff6e96621900..21e41dcc4c2a9 100644
--- a/llvm/test/DebugInfo/X86/convert-debugloc.ll
+++ b/llvm/test/DebugInfo/X86/convert-debugloc.ll
@@ -64,17 +64,11 @@
 ; NOCONV:       DW_AT_location (
 ; NOCONV:         {{.*}}, DW_OP_dup, DW_OP_constu 0x7, DW_OP_shr, DW_OP_lit0, DW_OP_not, DW_OP_mul, DW_OP_constu 0x8, DW_OP_shl, DW_OP_or, DW_OP_stack_value)
 ; NOCONV:       DW_AT_name ("y")
-; NOCONV:     DW_TAG_variable
-; NOCONV:       DW_AT_location (
-; NOCONV:         DW_OP_constu 0x40, DW_OP_lit0, DW_OP_plus, DW_OP_lit1, DW_OP_constu 0x40, DW_OP_shl, DW_OP_lit1, DW_OP_minus, DW_OP_and, DW_OP_stack_value)
-; NOCONV:       DW_AT_name ("z")
 ; NOCONV:     NULL
 ; NOCONV:   DW_TAG_base_type
 ; NOCONV:     DW_AT_name ("signed char")
 ; NOCONV:   DW_TAG_base_type
 ; NOCONV:     DW_AT_name ("int")
-; NOCONV:   DW_TAG_base_type
-; NOCONV:     DW_AT_name ("unsigned long long")
 ; NOCONV:   NULL
 
 
@@ -87,7 +81,6 @@ entry:
 ;; will not attempt to eliminate.  At the moment, only "convert" ops are folded.
 ;; If you have to change the expression, the expected DWO_id also changes.
   call void @llvm.dbg.value(metadata i8 32, metadata !13, metadata !DIExpression(DW_OP_lit0, DW_OP_plus, DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value)), !dbg !15
-  call void @llvm.dbg.value(metadata i8 64, metadata !17, metadata !DIExpression(DW_OP_lit0, DW_OP_plus, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 128, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !15
   ret i8 %x, !dbg !16
 }
 
@@ -118,5 +111,3 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
 !14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
 !15 = !DILocation(line: 3, column: 14, scope: !7)
 !16 = !DILocation(line: 4, column: 3, scope: !7)
-!17 = !DILocalVariable(name: "z", scope: !7, file: !1, line: 3, type: !18)
-!18 = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned)

From ceec4383681c42bfd3d06a6913ce7554bea160b0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= <david.bolvansky@gmail.com>
Date: Tue, 25 Jan 2022 22:47:16 +0100
Subject: [PATCH 620/946] [AlwaysInliner] Enable call site inlining to make
 flatten attribute working again (#53360)

Problem: Migration to new PM broke flatten attribute.

This is one use case why LLVM should support inlining call-site with alwaysinline.  The flatten attribute is nowdays broken, so we should either land patch like this one or remove everything related to  flatten attribute from Clang.

Second use case is something like "per call site inlining intrinsics" to control inlining even more; mentioned in
https://lists.llvm.org/pipermail/cfe-dev/2018-September/059232.html

Fixes https://github.com/llvm/llvm-project/issues/53360

Reviewed By: aeubanks

Differential Revision: https://reviews.llvm.org/D117965
---
 clang/test/CodeGen/flatten.c                  |  6 -----
 clang/test/CodeGenCXX/flatten.cpp             |  4 ----
 llvm/lib/Transforms/IPO/AlwaysInliner.cpp     |  6 ++---
 .../Coroutines/coro-retcon-once-private.ll    |  4 +---
 llvm/test/Transforms/Inline/always-inline.ll  | 24 ++++++-------------
 5 files changed, 11 insertions(+), 33 deletions(-)

diff --git a/clang/test/CodeGen/flatten.c b/clang/test/CodeGen/flatten.c
index 287d4f2a46b65..4e762223de486 100644
--- a/clang/test/CodeGen/flatten.c
+++ b/clang/test/CodeGen/flatten.c
@@ -1,9 +1,3 @@
-// UNSUPPORTED: experimental-new-pass-manager
-// Currently, different code seems to be intentionally generated under the new
-// PM since we alwaysinline functions and not callsites under new PM.
-// Under new PM, f() will not be inlined from g() since f is not marked as
-// alwaysinline.
-
 // RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o - | FileCheck %s
 
 void f(void) {}
diff --git a/clang/test/CodeGenCXX/flatten.cpp b/clang/test/CodeGenCXX/flatten.cpp
index 7a6484591aaa0..e988d6d726dd7 100644
--- a/clang/test/CodeGenCXX/flatten.cpp
+++ b/clang/test/CodeGenCXX/flatten.cpp
@@ -1,7 +1,3 @@
-// UNSUPPORTED: experimental-new-pass-manager
-// See the comment for CodeGen/flatten.c on why this is unsupported with the new
-// PM.
-
 // RUN: %clang_cc1 -triple=x86_64-linux-gnu -std=c++11 %s -emit-llvm -o - | FileCheck %s
 
 void f(void) {}
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 7acc9b266ad82..8f20f59b5e4d7 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -54,13 +54,13 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
     if (F.isPresplitCoroutine())
       continue;
 
-    if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) &&
-        isInlineViable(F).isSuccess()) {
+    if (!F.isDeclaration() && isInlineViable(F).isSuccess()) {
       Calls.clear();
 
       for (User *U : F.users())
         if (auto *CB = dyn_cast<CallBase>(U))
-          if (CB->getCalledFunction() == &F)
+          if (CB->getCalledFunction() == &F &&
+              CB->hasFnAttr(Attribute::AlwaysInline))
             Calls.insert(CB);
 
       for (CallBase *CB : Calls) {
diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-private.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-private.ll
index d805d7549231d..5d62747180233 100644
--- a/llvm/test/Transforms/Coroutines/coro-retcon-once-private.ll
+++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-private.ll
@@ -3,9 +3,7 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.12.0"
 
-; CHECK: define internal { i8*, i32 } @f(i8* %buffer, i32* %array)
-; CHECK-NEXT: entry:
-; CHECK-NEXT:  unreachable
+; CHECK-NOT: define {{.*}} @f(i8* %buffer, i32* %array)
 
 define internal {i8*, i32} @f(i8* %buffer, i32* %array) {
 entry:
diff --git a/llvm/test/Transforms/Inline/always-inline.ll b/llvm/test/Transforms/Inline/always-inline.ll
index 0fcf956199c46..8eb3d020f6634 100644
--- a/llvm/test/Transforms/Inline/always-inline.ll
+++ b/llvm/test/Transforms/Inline/always-inline.ll
@@ -1,14 +1,11 @@
-; RUN: opt < %s -inline-threshold=0 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
+; RUN: opt < %s -inline-threshold=0 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK
 ;
 ; Ensure the threshold has no impact on these decisions.
-; RUN: opt < %s -inline-threshold=20000000 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
-; RUN: opt < %s -inline-threshold=-20000000 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
+; RUN: opt < %s -inline-threshold=20000000 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK
+; RUN: opt < %s -inline-threshold=-20000000 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK
 ;
 ; The new pass manager doesn't re-use any threshold based infrastructure for
-; the always inliner, but test that we get the correct result. The new PM
-; always inliner also doesn't support inlining call-site alwaysinline
-; annotations. It isn't clear that this is a reasonable use case for
-; 'alwaysinline'.
+; the always inliner, but test that we get the correct result.
 ; RUN: opt < %s -inline-threshold=0 -passes=always-inline -S | FileCheck %s --check-prefix=CHECK
 ; RUN: opt < %s -inline-threshold=20000000 -passes=always-inline -S | FileCheck %s --check-prefix=CHECK
 ; RUN: opt < %s -inline-threshold=-20000000 -passes=always-inline -S | FileCheck %s --check-prefix=CHECK
@@ -26,12 +23,6 @@ define i32 @outer1() {
    ret i32 %r
 }
 
-; The always inliner can't DCE arbitrary internal functions. PR2945
-define internal i32 @pr2945() nounwind {
-; CHECK-LABEL: @pr2945(
-  ret i32 0
-}
-
 define internal void @inner2(i32 %N) alwaysinline {
 ; CHECK-NOT: @inner2(
   %P = alloca i32, i32 %N
@@ -146,10 +137,9 @@ define i32 @inner7() {
   ret i32 1
 }
 define i32 @outer7() {
-; CHECK-CALL-LABEL: @outer7(
-; CHECK-CALL-NOT: call
-; CHECK-CALL: ret
-
+; CHECK-LABEL: @outer7(
+; CHECK-NOT: call
+; CHECK: ret
    %r = call i32 @inner7() alwaysinline
    ret i32 %r
 }

From 90f185c964d05e9f3081bb4acc734b3932db221a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= <david.bolvansky@gmail.com>
Date: Tue, 25 Jan 2022 23:13:46 +0100
Subject: [PATCH 621/946] Revert "[AlwaysInliner] Enable call site inlining to
 make flatten attribute working again (#53360)"

This reverts commit ceec4383681c42bfd3d06a6913ce7554bea160b0. Clang tests fail.
---
 clang/test/CodeGen/flatten.c                  |  6 +++++
 clang/test/CodeGenCXX/flatten.cpp             |  4 ++++
 llvm/lib/Transforms/IPO/AlwaysInliner.cpp     |  6 ++---
 .../Coroutines/coro-retcon-once-private.ll    |  4 +++-
 llvm/test/Transforms/Inline/always-inline.ll  | 24 +++++++++++++------
 5 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/clang/test/CodeGen/flatten.c b/clang/test/CodeGen/flatten.c
index 4e762223de486..287d4f2a46b65 100644
--- a/clang/test/CodeGen/flatten.c
+++ b/clang/test/CodeGen/flatten.c
@@ -1,3 +1,9 @@
+// UNSUPPORTED: experimental-new-pass-manager
+// Currently, different code seems to be intentionally generated under the new
+// PM since we alwaysinline functions and not callsites under new PM.
+// Under new PM, f() will not be inlined from g() since f is not marked as
+// alwaysinline.
+
 // RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o - | FileCheck %s
 
 void f(void) {}
diff --git a/clang/test/CodeGenCXX/flatten.cpp b/clang/test/CodeGenCXX/flatten.cpp
index e988d6d726dd7..7a6484591aaa0 100644
--- a/clang/test/CodeGenCXX/flatten.cpp
+++ b/clang/test/CodeGenCXX/flatten.cpp
@@ -1,3 +1,7 @@
+// UNSUPPORTED: experimental-new-pass-manager
+// See the comment for CodeGen/flatten.c on why this is unsupported with the new
+// PM.
+
 // RUN: %clang_cc1 -triple=x86_64-linux-gnu -std=c++11 %s -emit-llvm -o - | FileCheck %s
 
 void f(void) {}
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 8f20f59b5e4d7..7acc9b266ad82 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -54,13 +54,13 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
     if (F.isPresplitCoroutine())
       continue;
 
-    if (!F.isDeclaration() && isInlineViable(F).isSuccess()) {
+    if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) &&
+        isInlineViable(F).isSuccess()) {
       Calls.clear();
 
       for (User *U : F.users())
         if (auto *CB = dyn_cast<CallBase>(U))
-          if (CB->getCalledFunction() == &F &&
-              CB->hasFnAttr(Attribute::AlwaysInline))
+          if (CB->getCalledFunction() == &F)
             Calls.insert(CB);
 
       for (CallBase *CB : Calls) {
diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-once-private.ll b/llvm/test/Transforms/Coroutines/coro-retcon-once-private.ll
index 5d62747180233..d805d7549231d 100644
--- a/llvm/test/Transforms/Coroutines/coro-retcon-once-private.ll
+++ b/llvm/test/Transforms/Coroutines/coro-retcon-once-private.ll
@@ -3,7 +3,9 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.12.0"
 
-; CHECK-NOT: define {{.*}} @f(i8* %buffer, i32* %array)
+; CHECK: define internal { i8*, i32 } @f(i8* %buffer, i32* %array)
+; CHECK-NEXT: entry:
+; CHECK-NEXT:  unreachable
 
 define internal {i8*, i32} @f(i8* %buffer, i32* %array) {
 entry:
diff --git a/llvm/test/Transforms/Inline/always-inline.ll b/llvm/test/Transforms/Inline/always-inline.ll
index 8eb3d020f6634..0fcf956199c46 100644
--- a/llvm/test/Transforms/Inline/always-inline.ll
+++ b/llvm/test/Transforms/Inline/always-inline.ll
@@ -1,11 +1,14 @@
-; RUN: opt < %s -inline-threshold=0 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK
+; RUN: opt < %s -inline-threshold=0 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
 ;
 ; Ensure the threshold has no impact on these decisions.
-; RUN: opt < %s -inline-threshold=20000000 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK
-; RUN: opt < %s -inline-threshold=-20000000 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK
+; RUN: opt < %s -inline-threshold=20000000 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
+; RUN: opt < %s -inline-threshold=-20000000 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
 ;
 ; The new pass manager doesn't re-use any threshold based infrastructure for
-; the always inliner, but test that we get the correct result.
+; the always inliner, but test that we get the correct result. The new PM
+; always inliner also doesn't support inlining call-site alwaysinline
+; annotations. It isn't clear that this is a reasonable use case for
+; 'alwaysinline'.
 ; RUN: opt < %s -inline-threshold=0 -passes=always-inline -S | FileCheck %s --check-prefix=CHECK
 ; RUN: opt < %s -inline-threshold=20000000 -passes=always-inline -S | FileCheck %s --check-prefix=CHECK
 ; RUN: opt < %s -inline-threshold=-20000000 -passes=always-inline -S | FileCheck %s --check-prefix=CHECK
@@ -23,6 +26,12 @@ define i32 @outer1() {
    ret i32 %r
 }
 
+; The always inliner can't DCE arbitrary internal functions. PR2945
+define internal i32 @pr2945() nounwind {
+; CHECK-LABEL: @pr2945(
+  ret i32 0
+}
+
 define internal void @inner2(i32 %N) alwaysinline {
 ; CHECK-NOT: @inner2(
   %P = alloca i32, i32 %N
@@ -137,9 +146,10 @@ define i32 @inner7() {
   ret i32 1
 }
 define i32 @outer7() {
-; CHECK-LABEL: @outer7(
-; CHECK-NOT: call
-; CHECK: ret
+; CHECK-CALL-LABEL: @outer7(
+; CHECK-CALL-NOT: call
+; CHECK-CALL: ret
+
    %r = call i32 @inner7() alwaysinline
    ret i32 %r
 }

From 8ba9c794feb30cd969b9776c39873def10c51bff Mon Sep 17 00:00:00 2001
From: Zahira Ammarguellat <zahira.ammarguellat@intel.com>
Date: Tue, 23 Nov 2021 14:58:03 -0800
Subject: [PATCH 622/946] Add support for sycl_special_class attribute.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Special classes such as accessor, sampler, and stream need additional
implementation when they are passed from host to device.

This patch is adding a new attribute “sycl_special_class” used to mark
SYCL classes/struct that need the additional compiler handling.
---
 .../clang/AST/CXXRecordDeclDefinitionBits.def |  3 +
 clang/include/clang/AST/DeclCXX.h             |  3 +
 clang/include/clang/Basic/Attr.td             |  7 ++
 clang/include/clang/Basic/AttrDocs.td         | 65 +++++++++++++++
 .../clang/Basic/DiagnosticSemaKinds.td        |  3 +
 clang/lib/AST/DeclCXX.cpp                     |  2 +-
 clang/lib/Sema/SemaDecl.cpp                   | 22 ++++-
 clang/lib/Sema/SemaDeclAttr.cpp               |  3 +
 ...a-attribute-supported-attributes-list.test |  1 +
 .../special-class-attribute-on-non-sycl.cpp   | 12 +++
 .../test/SemaSYCL/special-class-attribute.cpp | 80 +++++++++++++++++++
 11 files changed, 199 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/SemaSYCL/special-class-attribute-on-non-sycl.cpp
 create mode 100644 clang/test/SemaSYCL/special-class-attribute.cpp

diff --git a/clang/include/clang/AST/CXXRecordDeclDefinitionBits.def b/clang/include/clang/AST/CXXRecordDeclDefinitionBits.def
index 9b270682f8cf0..cdf0804680ad0 100644
--- a/clang/include/clang/AST/CXXRecordDeclDefinitionBits.def
+++ b/clang/include/clang/AST/CXXRecordDeclDefinitionBits.def
@@ -112,6 +112,9 @@ FIELD(HasVariantMembers, 1, NO_MERGE)
 /// True if there no non-field members declared by the user.
 FIELD(HasOnlyCMembers, 1, NO_MERGE)
 
+/// True if there is an '__init' method defined by the user.
+FIELD(HasInitMethod, 1, NO_MERGE)
+
 /// True if any field has an in-class initializer, including those
 /// within anonymous unions or structs.
 FIELD(HasInClassInitializer, 1, NO_MERGE)
diff --git a/clang/include/clang/AST/DeclCXX.h b/clang/include/clang/AST/DeclCXX.h
index b54e6b0ac2171..2833df0505dac 100644
--- a/clang/include/clang/AST/DeclCXX.h
+++ b/clang/include/clang/AST/DeclCXX.h
@@ -1139,6 +1139,9 @@ class CXXRecordDecl : public RecordDecl {
   /// \note This does NOT include a check for union-ness.
   bool isEmpty() const { return data().Empty; }
 
+  void setInitMethod(bool Val) { data().HasInitMethod = Val; }
+  bool hasInitMethod() const { return data().HasInitMethod; }
+
   bool hasPrivateFields() const {
     return data().HasPrivateFields;
   }
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index b071ebb4d576b..be56373cc3ca4 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1191,6 +1191,13 @@ def SYCLKernel : InheritableAttr {
   let Documentation = [SYCLKernelDocs];
 }
 
+def SYCLSpecialClass: InheritableAttr {
+  let Spellings = [Clang<"sycl_special_class">];
+  let Subjects = SubjectList<[CXXRecord]>;
+  let LangOpts = [SYCL];
+  let Documentation = [SYCLSpecialClassDocs];
+}
+
 def C11NoReturn : InheritableAttr {
   let Spellings = [Keyword<"_Noreturn">];
   let Subjects = SubjectList<[Function], ErrorDiag>;
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index a24218a9c82b6..18fac924b1140 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -409,6 +409,71 @@ The SYCL kernel in the previous code sample meets these expectations.
   }];
 }
 
+def SYCLSpecialClassDocs : Documentation {
+  let Category = DocCatStmt;
+  let Content = [{
+SYCL defines some special classes (accessor, sampler, and stream) which require
+specific handling during the generation of the SPIR entry point.
+The ``__attribute__((sycl_special_class))`` attribute is used in SYCL
+headers to indicate that a class or a struct needs a specific handling when
+it is passed from host to device.
+Special classes will have a mandatory ``__init`` method and an optional
+``__finalize`` method (the ``__finalize`` method is used only with the
+``stream`` type). Kernel parameters types are extract from the ``__init`` method
+parameters. The kernel function arguments list is derived from the
+arguments of the ``__init`` method. The arguments of the ``__init`` method are
+copied into the kernel function argument list and the ``__init`` and
+``__finalize`` methods are called at the beginning and the end of the kernel,
+respectively.
+The ``__init`` and ``__finalize`` methods must be defined inside the
+special class.
+Please note that this is an attribute that is used as an internal
+implementation detail and not intended to be used by external users.
+
+The syntax of the attribute is as follows:
+
+.. code-block:: c++
+
+   class __attribute__((sycl_special_class)) accessor {};
+   class [[clang::sycl_special_class]] accessor {};
+
+This is a code example that illustrates the use of the attribute:
+
+.. code-block:: c++
+
+   class __attribute__((sycl_special_class)) SpecialType {
+   int F1;
+   int F2;
+   void __init(int f1) {
+     F1 = f1;
+     F2 = f1;
+   }
+   void __finalize() {}
+   public:
+     SpecialType() = default;
+     int getF2() const { return F2; }
+   };
+
+   int main () {
+   SpecialType T;
+   cgh.single_task([=] {
+     T.getF2();
+   });
+}
+
+This would trigger the following kernel entry point in the AST:
+
+.. code-block:: c++
+
+   void __sycl_kernel(int f1) {
+   SpecialType T;
+   T.__init(f1);
+   ...
+   T.__finalize()
+ }
+  }];
+}
+
 def C11NoReturnDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index db1047586a473..735d92d79d27c 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11448,6 +11448,9 @@ def warn_sycl_kernel_num_of_function_params : Warning<
 def warn_sycl_kernel_return_type : Warning<
   "function template with 'sycl_kernel' attribute must have a 'void' return type">,
   InGroup<IgnoredAttributes>;
+def err_sycl_special_type_num_init_method : Error<
+  "types with 'sycl_special_class' attribute must have one and only one '__init' "
+  "method defined">;
 
 def err_bit_int_bad_size : Error<"%select{signed|unsigned}0 _BitInt must "
                                  "have a bit size of at least %select{2|1}0">;
diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index 108113274b8e0..a15498c89d6a1 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -111,7 +111,7 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D)
       HasDeclaredCopyAssignmentWithConstParam(false),
       IsAnyDestructorNoReturn(false), IsLambda(false),
       IsParsingBaseSpecifiers(false), ComputedVisibleConversions(false),
-      HasODRHash(false), Definition(D) {}
+      HasODRHash(false), Definition(D), HasInitMethod(false) {}
 
 CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const {
   return Bases.get(Definition->getASTContext().getExternalSource());
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index e014500f2114f..673631a12eee0 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -9170,6 +9170,13 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
         Diag(D.getDeclSpec().getVirtualSpecLoc(), diag::err_virtual_in_union);
         NewFD->setInvalidDecl();
       }
+      if ((Parent->isClass() || Parent->isStruct()) &&
+          Parent->hasAttr<SYCLSpecialClassAttr>() &&
+          NewFD->getKind() == Decl::Kind::CXXMethod &&
+          NewFD->getName() == "__init" && D.isFunctionDefinition()) {
+        if (auto *Def = Parent->getDefinition())
+          Def->setInitMethod(true);
+      }
     }
 
     SetNestedNameSpecifier(*this, NewFD, D);
@@ -16729,8 +16736,21 @@ void Sema::ActOnTagFinishDefinition(Scope *S, Decl *TagD,
       RD->completeDefinition();
   }
 
-  if (isa<CXXRecordDecl>(Tag)) {
+  if (auto *RD = dyn_cast<CXXRecordDecl>(Tag)) {
     FieldCollector->FinishClass();
+    if (RD->hasAttr<SYCLSpecialClassAttr>()) {
+      auto *Def = RD->getDefinition();
+      assert(Def && "The record is expected to have a completed definition");
+      unsigned NumInitMethods = 0;
+      for (auto *Method : Def->methods()) {
+        if (!Method->getIdentifier())
+            continue;
+        if (Method->getName() == "__init")
+          NumInitMethods++;
+      }
+      if (NumInitMethods > 1 || !Def->hasInitMethod())
+        Diag(RD->getLocation(), diag::err_sycl_special_type_num_init_method);
+    }
   }
 
   // Exit this scope of this tag's definition.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 955f477760429..f04236ab96c31 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -8301,6 +8301,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
   case ParsedAttr::AT_SYCLKernel:
     handleSYCLKernelAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_SYCLSpecialClass:
+    handleSimpleAttribute<SYCLSpecialClassAttr>(S, D, AL);
+    break;
   case ParsedAttr::AT_Format:
     handleFormatAttr(S, D, AL);
     break;
diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
index 82d3c446c011a..9ad2b60fd319d 100644
--- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -155,6 +155,7 @@
 // CHECK-NEXT: ReturnTypestate (SubjectMatchRule_function, SubjectMatchRule_variable_is_parameter)
 // CHECK-NEXT: ReturnsNonNull (SubjectMatchRule_objc_method, SubjectMatchRule_function)
 // CHECK-NEXT: ReturnsTwice (SubjectMatchRule_function)
+// CHECK-NEXT: SYCLSpecialClass (SubjectMatchRule_record)
 // CHECK-NEXT: ScopedLockable (SubjectMatchRule_record)
 // CHECK-NEXT: Section (SubjectMatchRule_function, SubjectMatchRule_variable_is_global, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property)
 // CHECK-NEXT: SetTypestate (SubjectMatchRule_function_is_member)
diff --git a/clang/test/SemaSYCL/special-class-attribute-on-non-sycl.cpp b/clang/test/SemaSYCL/special-class-attribute-on-non-sycl.cpp
new file mode 100644
index 0000000000000..3dc437c0a3bcc
--- /dev/null
+++ b/clang/test/SemaSYCL/special-class-attribute-on-non-sycl.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fsycl-is-device -verify %s
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -x c++ %s
+
+#ifndef __SYCL_DEVICE_ONLY__
+// expected-warning@+5 {{'sycl_special_class' attribute ignored}}
+#else
+// expected-no-diagnostics
+#endif
+
+class __attribute__((sycl_special_class)) special_class {
+  void __init(){}
+};
diff --git a/clang/test/SemaSYCL/special-class-attribute.cpp b/clang/test/SemaSYCL/special-class-attribute.cpp
new file mode 100644
index 0000000000000..f06a7b1007514
--- /dev/null
+++ b/clang/test/SemaSYCL/special-class-attribute.cpp
@@ -0,0 +1,80 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fsycl-is-device -verify %s
+
+// No diagnostics
+class [[clang::sycl_special_class]] class1 {
+  void __init(){}
+};
+class __attribute__((sycl_special_class)) class2 {
+  void __init(){}
+};
+
+class class3;
+class [[clang::sycl_special_class]] class3 {
+  void __init(){}
+};
+
+class class4;
+class __attribute__((sycl_special_class)) class4 {
+  void __init(){}
+};
+
+struct [[clang::sycl_special_class]] struct1 {
+  void __init(){}
+};
+struct __attribute__((sycl_special_class)) struct2 {
+  void __init(){}
+};
+
+class __attribute__((sycl_special_class)) class5;
+class class5 {
+  void __init(){}
+};
+
+// Must have one and only one __init method defined
+class __attribute__((sycl_special_class)) class6 { // expected-error {{types with 'sycl_special_class' attribute must have one and only one '__init' method defined}}
+  class6() {}
+};
+class [[clang::sycl_special_class]] class7 { // expected-error {{types with 'sycl_special_class' attribute must have one and only one '__init' method defined}}
+  void __init();
+};
+
+class [[clang::sycl_special_class]] class8 { // expected-error {{types with 'sycl_special_class' attribute must have one and only one '__init' method defined}}
+  void __init();
+  int func() {}
+  void __init(int a){}
+};
+
+struct __attribute__((sycl_special_class)) struct3;
+struct struct3 {}; // expected-error {{types with 'sycl_special_class' attribute must have one and only one '__init' method defined}}
+
+// Only classes
+[[clang::sycl_special_class]] int var1 = 0;       // expected-warning {{'sycl_special_class' attribute only applies to classes}}
+__attribute__((sycl_special_class)) int var2 = 0; // expected-warning {{'sycl_special_class' attribute only applies to classes}}
+
+[[clang::sycl_special_class]] void foo1();       // expected-warning {{'sycl_special_class' attribute only applies to classes}}
+__attribute__((sycl_special_class)) void foo2(); // expected-warning {{'sycl_special_class' attribute only applies to classes}}
+
+// Attribute takes no arguments
+class [[clang::sycl_special_class(1)]] class9{};         // expected-error {{'sycl_special_class' attribute takes no arguments}}
+class __attribute__((sycl_special_class(1))) class10 {}; // expected-error {{'sycl_special_class' attribute takes no arguments}}
+
+// __init method must be defined inside the CXXRecordDecl.
+class [[clang::sycl_special_class]] class11 { // expected-error {{types with 'sycl_special_class' attribute must have one and only one '__init' method defined}}
+  void __init();
+};
+void class11::__init(){}
+
+class __attribute__((sycl_special_class)) class12 { // expected-error {{types with 'sycl_special_class' attribute must have one and only one '__init' method defined}}
+  void __init();
+};
+void class12::__init(){}
+
+struct [[clang::sycl_special_class]] struct4 { // expected-error {{types with 'sycl_special_class' attribute must have one and only one '__init' method defined}}
+  void __init();
+};
+void struct4::__init(){}
+
+struct __attribute__((sycl_special_class)) struct5 { // expected-error {{types with 'sycl_special_class' attribute must have one and only one '__init' method defined}}
+  void __init();
+};
+void struct5::__init(){}

From 493509a40ad1653feb779861badd3ee59fc4a79c Mon Sep 17 00:00:00 2001
From: Jordan Rupprecht <rupprecht@google.com>
Date: Tue, 25 Jan 2022 14:27:25 -0800
Subject: [PATCH 623/946] [NFC] DeclCXX: Fix -Wreorder-ctor

From 8ba9c794feb30cd969b9776c39873def10c51bff
---
 clang/lib/AST/DeclCXX.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/clang/lib/AST/DeclCXX.cpp b/clang/lib/AST/DeclCXX.cpp
index a15498c89d6a1..0cf6e60b2a6c3 100644
--- a/clang/lib/AST/DeclCXX.cpp
+++ b/clang/lib/AST/DeclCXX.cpp
@@ -79,10 +79,9 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D)
       HasBasesWithFields(false), HasBasesWithNonStaticDataMembers(false),
       HasPrivateFields(false), HasProtectedFields(false),
       HasPublicFields(false), HasMutableFields(false), HasVariantMembers(false),
-      HasOnlyCMembers(true), HasInClassInitializer(false),
+      HasOnlyCMembers(true), HasInitMethod(false), HasInClassInitializer(false),
       HasUninitializedReferenceMember(false), HasUninitializedFields(false),
-      HasInheritedConstructor(false),
-      HasInheritedDefaultConstructor(false),
+      HasInheritedConstructor(false), HasInheritedDefaultConstructor(false),
       HasInheritedAssignment(false),
       NeedOverloadResolutionForCopyConstructor(false),
       NeedOverloadResolutionForMoveConstructor(false),
@@ -111,7 +110,7 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D)
       HasDeclaredCopyAssignmentWithConstParam(false),
       IsAnyDestructorNoReturn(false), IsLambda(false),
       IsParsingBaseSpecifiers(false), ComputedVisibleConversions(false),
-      HasODRHash(false), Definition(D), HasInitMethod(false) {}
+      HasODRHash(false), Definition(D) {}
 
 CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const {
   return Bases.get(Definition->getASTContext().getExternalSource());

From 8de76bd569732acae6a10fdcb0152a49f7d4cd39 Mon Sep 17 00:00:00 2001
From: Andrew Litteken <andrew.litteken@gmail.com>
Date: Wed, 22 Dec 2021 18:07:43 -0600
Subject: [PATCH 624/946] [IRSim][IROutliner] Allowing Intrinsic Calls to be
 Used in Similarity Matching and Outlined Regions

Due to some complications with lifetime, and assume-like intrinsics, intrinsics were not included as outlinable instructions. This patch opens up most intrinsics, excluding lifetime and assume-like intrinsics, to be outlined. For similarity, it is required that the intrinsic IDs, and the intrinsics names match exactly, as well as the function type. This puts intrinsics in a different class than normal call instructions (https://reviews.llvm.org/D109448), where the name will no longer have to match.

This also adds an additional command line flag debug option to disable outlining intrinsics.

Reviewers: paquette, jroelofs

Differential Revision: https://reviews.llvm.org/D109450
---
 .../llvm/Analysis/IRSimilarityIdentifier.h    | 53 ++++++++++-
 llvm/include/llvm/Transforms/IPO/IROutliner.h |  6 +-
 llvm/lib/Analysis/IRSimilarityIdentifier.cpp  | 13 ++-
 llvm/lib/Transforms/IPO/IROutliner.cpp        |  9 +-
 .../IROutliner/different-intrinsics.ll        | 92 +++++++++++++++++++
 .../Transforms/IROutliner/illegal-memcpy.ll   |  2 +-
 .../Transforms/IROutliner/illegal-memmove.ll  |  2 +-
 .../Transforms/IROutliner/illegal-memset.ll   |  2 +-
 .../Transforms/IROutliner/illegal-vaarg.ll    |  2 +-
 .../Transforms/IROutliner/outline-memcpy.ll   | 60 ++++++++++++
 .../Transforms/IROutliner/outline-memmove.ll  | 60 ++++++++++++
 .../Transforms/IROutliner/outline-memset.ll   | 55 +++++++++++
 .../IROutliner/outline-vaarg-intrinsic.ll     | 90 ++++++++++++++++++
 .../Analysis/IRSimilarityIdentifierTest.cpp   | 52 ++++++++++-
 14 files changed, 480 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/Transforms/IROutliner/different-intrinsics.ll
 create mode 100644 llvm/test/Transforms/IROutliner/outline-memcpy.ll
 create mode 100644 llvm/test/Transforms/IROutliner/outline-memmove.ll
 create mode 100644 llvm/test/Transforms/IROutliner/outline-memset.ll
 create mode 100644 llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll

diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 966bf02d128ec..73dd3219aa132 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -252,7 +252,30 @@ struct IRInstructionData
           llvm::hash_value(ID.Inst->getType()),
           llvm::hash_value(ID.getPredicate()),
           llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
-    else if (CallInst *CI = dyn_cast<CallInst>(ID.Inst)) {
+
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(ID.Inst)) {
+      // To hash intrinsics, we use the opcode, and types like the other
+      // instructions, but also, the Intrinsic ID, and the Name of the
+      // intrinsic.
+      Intrinsic::ID IntrinsicID = II->getIntrinsicID();
+      FunctionType *FT = II->getFunctionType();
+      std::string Name;
+      // If there is an overloaded name, we have to use the complex version
+      // of getName to get the entire string.
+      if (Intrinsic::isOverloaded(IntrinsicID))
+        Name =
+            Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
+      // If there is not an overloaded name, we only need to use this version.
+      else
+        Name = Intrinsic::getName(IntrinsicID).str();
+      return llvm::hash_combine(
+          llvm::hash_value(ID.Inst->getOpcode()),
+          llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID),
+          llvm::hash_value(Name),
+          llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
+    }
+
+    if (isa<CallInst>(ID.Inst)) {
       std::string FunctionName = *ID.CalleeName;
       return llvm::hash_combine(
           llvm::hash_value(ID.Inst->getOpcode()),
@@ -260,6 +283,7 @@ struct IRInstructionData
           llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName),
           llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
     }
+
     return llvm::hash_combine(
         llvm::hash_value(ID.Inst->getOpcode()),
         llvm::hash_value(ID.Inst->getType()),
@@ -512,8 +536,17 @@ struct IRInstructionMapper {
     // analyzed for similarity as it has no bearing on the outcome of the
     // program.
     InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; }
-    // TODO: Handle specific intrinsics.
-    InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; }
+    InstrType visitIntrinsicInst(IntrinsicInst &II) {
+      // These are disabled due to complications in the CodeExtractor when
+      // outlining these instructions.  For instance, It is unclear what we
+      // should do when moving only the start or end lifetime instruction into
+      // an outlined function. Also, assume-like intrinsics could be removed
+      // from the region, removing arguments, causing discrepencies in the
+      // number of inputs between different regions.
+      if (II.isLifetimeStartOrEnd() || II.isAssumeLikeIntrinsic())
+        return Illegal;
+      return EnableIntrinsics ? Legal : Illegal;
+    }
     // We only allow call instructions where the function has a name and
     // is not an indirect call.
     InstrType visitCallInst(CallInst &CI) {
@@ -540,6 +573,10 @@ struct IRInstructionMapper {
     // The flag variable that lets the classifier know whether we should
     // allow indirect calls to be considered legal instructions.
     bool EnableIndirectCalls = false;
+
+    // Flag that lets the classifier know whether we should allow intrinsics to
+    // be checked for similarity.
+    bool EnableIntrinsics = false;
   };
 
   /// Maps an Instruction to a member of InstrType.
@@ -926,10 +963,12 @@ class IRSimilarityIdentifier {
 public:
   IRSimilarityIdentifier(bool MatchBranches = true,
                          bool MatchIndirectCalls = true,
-                         bool MatchCallsWithName = false)
+                         bool MatchCallsWithName = false,
+                         bool MatchIntrinsics = true)
       : Mapper(&InstDataAllocator, &InstDataListAllocator),
         EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls),
-        EnableMatchingCallsByName(MatchCallsWithName) {}
+        EnableMatchingCallsByName(MatchCallsWithName),
+        EnableIntrinsics(MatchIntrinsics) {}
 
 private:
   /// Map the instructions in the module to unsigned integers, using mapping
@@ -1018,6 +1057,10 @@ class IRSimilarityIdentifier {
   /// convention, attributes and type signature.
   bool EnableMatchingCallsByName = true;
 
+  /// The flag variable that marks whether we should check intrinsics for
+  /// similarity.
+  bool EnableIntrinsics = true;
+
   /// The SimilarityGroups found with the most recent run of \ref
   /// findSimilarity. None if there is no recent run.
   Optional<SimilarityGroupList> SimilarityCandidates;
diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index 9799737a529e3..0364fba86581d 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -360,7 +360,7 @@ class IROutliner {
     bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
     // TODO: Handle specific intrinsics individually from those that can be
     // handled.
-    bool IntrinsicInst(IntrinsicInst &II) { return false; }
+    bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; }
     // We only handle CallInsts that are not indirect, since we cannot guarantee
     // that they have a name in these cases.
     bool visitCallInst(CallInst &CI) {
@@ -396,6 +396,10 @@ class IROutliner {
     // The flag variable that marks whether we should allow indirect calls
     // to be outlined.
     bool EnableIndirectCalls = true;
+
+    // The flag variable that marks whether we should allow intrinsics
+    // instructions to be outlined.
+    bool EnableIntrinsics = false;
   };
 
   /// A InstVisitor used to exclude certain instructions from being outlined.
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index 8c3c0400d6015..aa5aadae6032c 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -29,7 +29,6 @@ cl::opt<bool>
                     cl::ReallyHidden,
                     cl::desc("disable similarity matching, and outlining, "
                              "across branches for debugging purposes."));
-} // namespace llvm
 
 cl::opt<bool>
     DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
@@ -41,6 +40,9 @@ cl::opt<bool>
                      cl::desc("only allow matching call instructions if the "
                               "name and type signature match."));
 
+cl::opt<bool>
+    DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
+                      cl::desc("Don't match or outline intrinsics"));
 
 IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
                                      IRInstructionDataList &IDList)
@@ -48,6 +50,8 @@ IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
   initializeInstruction();
 }
 
+} // namespace llvm
+
 void IRInstructionData::initializeInstruction() {
   // We check for whether we have a comparison instruction.  If it is, we
   // find the "less than" version of the predicate for consistency for
@@ -1103,6 +1107,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
+  Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
 
   populateMapper(Modules, InstrList, IntegerMapping);
   findCandidates(InstrList, IntegerMapping);
@@ -1115,6 +1120,7 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
+  Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
 
   std::vector<IRInstructionData *> InstrList;
   std::vector<unsigned> IntegerMapping;
@@ -1136,7 +1142,7 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
 
 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
   IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
-                                        MatchCallsByName));
+                                        MatchCallsByName, !DisableIntrinsics));
   return false;
 }
 
@@ -1153,9 +1159,8 @@ bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
 AnalysisKey IRSimilarityAnalysis::Key;
 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
                                                  ModuleAnalysisManager &) {
-
   auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
-                                     MatchCallsByName);
+                                     MatchCallsByName, !DisableIntrinsics);
   IRSI.findSimilarity(M);
   return IRSI;
 }
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index 9c79972443fe6..fa2dadbb2d0d5 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -38,12 +38,17 @@ using namespace IRSimilarity;
 // matching and outlining.
 namespace llvm {
 extern cl::opt<bool> DisableBranches;
-} // namespace llvm
 
 // A command flag to be used for debugging to indirect calls from similarity
 // matching and outlining.
 extern cl::opt<bool> DisableIndirectCalls;
 
+// A command flag to be used for debugging to exclude intrinsics from similarity
+// matching and outlining.
+extern cl::opt<bool> DisableIntrinsics;
+
+} // namespace llvm
+
 // Set to true if the user wants the ir outliner to run on linkonceodr linkage
 // functions. This is false by default because the linker can dedupe linkonceodr
 // functions. Since the outliner is confined to a single module (modulo LTO),
@@ -2524,6 +2529,8 @@ unsigned IROutliner::doOutline(Module &M) {
   // Find the possible similarity sections.
   InstructionClassifier.EnableBranches = !DisableBranches;
   InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
+  InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
+
   IRSimilarityIdentifier &Identifier = getIRSI(M);
   SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
 
diff --git a/llvm/test/Transforms/IROutliner/different-intrinsics.ll b/llvm/test/Transforms/IROutliner/different-intrinsics.ll
new file mode 100644
index 0000000000000..a0fa2d2d59487
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/different-intrinsics.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; This test checks that we do not outline different intrinsics as the same
+; function or as a value like we would for non-intrinsic functions.
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
+
+define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) {
+entry:
+  %a = load i8, i8* %s
+  %b = load i8, i8* %d
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
+  %c = add i8 %a, %b
+  %ret = load i8, i8* %s
+  ret i8 %ret
+}
+
+define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) {
+entry:
+  %a = load i8, i8* %s
+  %b = load i8, i8* %d
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
+  %c = add i8 %a, %b
+  %ret = load i8, i8* %s
+  ret i8 %ret
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B_LOC:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[A_LOC:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[RET_LOC:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
+; CHECK-NEXT:    call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
+; CHECK-NEXT:    [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
+; CHECK-NEXT:    [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
+; CHECK-NEXT:    [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
+; CHECK-NEXT:    ret i8 [[RET_RELOAD]]
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B_LOC:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[A_LOC:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    [[RET_LOC:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
+; CHECK-NEXT:    call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
+; CHECK-NEXT:    [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
+; CHECK-NEXT:    [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
+; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
+; CHECK-NEXT:    [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
+; CHECK-NEXT:    ret i8 [[RET_RELOAD]]
+;
+;
+; CHECK-LABEL: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = add i8 [[TMP0:%.*]], [[TMP1:%.*]]
+; CHECK-NEXT:    [[RET:%.*]] = load i8, i8* [[TMP2:%.*]], align 1
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    store i8 [[RET]], i8* [[TMP3:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: define internal void @outlined_ir_func_1(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1
+; CHECK-NEXT:    [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    store i8 [[A]], i8* [[TMP2:%.*]], align 1
+; CHECK-NEXT:    store i8 [[B]], i8* [[TMP3:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll
index 6c242d7b5e846..8bee43c77b84b 100644
--- a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s
 
 ; This test checks that we do not outline memcpy intrinsics since it may require
 ; extra address space checks.
diff --git a/llvm/test/Transforms/IROutliner/illegal-memmove.ll b/llvm/test/Transforms/IROutliner/illegal-memmove.ll
index 0a9216425455d..afc626f6abd2f 100644
--- a/llvm/test/Transforms/IROutliner/illegal-memmove.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-memmove.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s
 
 ; This test checks that we do not outline memcpy intrinsics since it may require
 ; extra address space checks.
diff --git a/llvm/test/Transforms/IROutliner/illegal-memset.ll b/llvm/test/Transforms/IROutliner/illegal-memset.ll
index 4470d0c6d1287..ed3eeb2d01b4b 100644
--- a/llvm/test/Transforms/IROutliner/illegal-memset.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-memset.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s
 
 ; This test checks that we do not outline memset intrinsics since it requires
 ; extra address space checks.
diff --git a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
index 28a1e5994e703..eaffefe3d9d5f 100644
--- a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s
 
 ; This test ensures that we do not outline vararg instructions or intrinsics, as
 ; they may cause inconsistencies when outlining.
diff --git a/llvm/test/Transforms/IROutliner/outline-memcpy.ll b/llvm/test/Transforms/IROutliner/outline-memcpy.ll
new file mode 100644
index 0000000000000..d5d4859c318e4
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/outline-memcpy.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; This test checks that we successfully outline identical memcpy instructions.
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
+
+define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) {
+entry:
+  %a = load i8, i8* %s
+  %b = load i8, i8* %d
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
+  %c = add i8 %a, %b
+  %ret = load i8, i8* %s
+  ret i8 %ret
+}
+
+define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) {
+entry:
+  %a = load i8, i8* %s
+  %b = load i8, i8* %d
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
+  %c = add i8 %a, %b
+  %ret = load i8, i8* %s
+  ret i8 %ret
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RET_LOC:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]])
+; CHECK-NEXT:    [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
+; CHECK-NEXT:    ret i8 [[RET_RELOAD]]
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RET_LOC:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]])
+; CHECK-NEXT:    [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
+; CHECK-NEXT:    ret i8 [[RET_RELOAD]]
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1
+; CHECK-NEXT:    [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false)
+; CHECK-NEXT:    [[C:%.*]] = add i8 [[A]], [[B]]
+; CHECK-NEXT:    [[RET:%.*]] = load i8, i8* [[TMP0]], align 1
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    store i8 [[RET]], i8* [[TMP3:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/outline-memmove.ll b/llvm/test/Transforms/IROutliner/outline-memmove.ll
new file mode 100644
index 0000000000000..45e573c52653b
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/outline-memmove.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; This test checks that we sucecssfully outline identical memmove instructions.
+
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
+
+define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) {
+entry:
+  %a = load i8, i8* %s
+  %b = load i8, i8* %d
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
+  %c = add i8 %a, %b
+  %ret = load i8, i8* %s
+  ret i8 %ret
+}
+
+define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) {
+entry:
+  %a = load i8, i8* %s
+  %b = load i8, i8* %d
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
+  %c = add i8 %a, %b
+  %ret = load i8, i8* %s
+  ret i8 %ret
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RET_LOC:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]])
+; CHECK-NEXT:    [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
+; CHECK-NEXT:    ret i8 [[RET_RELOAD]]
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[RET_LOC:%.*]] = alloca i8, align 1
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
+; CHECK-NEXT:    call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]])
+; CHECK-NEXT:    [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
+; CHECK-NEXT:    ret i8 [[RET_RELOAD]]
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1
+; CHECK-NEXT:    [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1
+; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false)
+; CHECK-NEXT:    [[C:%.*]] = add i8 [[A]], [[B]]
+; CHECK-NEXT:    [[RET:%.*]] = load i8, i8* [[TMP0]], align 1
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    store i8 [[RET]], i8* [[TMP3:%.*]], align 1
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/outline-memset.ll b/llvm/test/Transforms/IROutliner/outline-memset.ll
new file mode 100644
index 0000000000000..65dd04978d0a6
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/outline-memset.ll
@@ -0,0 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; This test checks that we successfully outline identical memset instructions.
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
+
+define i64 @function1(i64 %x, i64 %z, i64 %n) {
+entry:
+  %pool = alloca [59 x i64], align 4
+  %tmp = bitcast [59 x i64]* %pool to i8*
+  call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
+  %cmp3 = icmp eq i64 %n, 0
+  %a = add i64 %x, %z
+  %c = add i64 %x, %z
+  ret i64 0
+}
+
+define i64 @function2(i64 %x, i64 %z, i64 %n) {
+entry:
+  %pool = alloca [59 x i64], align 4
+  %tmp = bitcast [59 x i64]* %pool to i8*
+  call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
+  %cmp3 = icmp eq i64 %n, 0
+  %a = add i64 %x, %z
+  %c = add i64 %x, %z
+  ret i64 0
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[POOL:%.*]] = alloca [59 x i64], align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]])
+; CHECK-NEXT:    ret i64 0
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[POOL:%.*]] = alloca [59 x i64], align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]])
+; CHECK-NEXT:    ret i64 0
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[TMP:%.*]] = bitcast [59 x i64]* [[TMP0:%.*]] to i8*
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP]], i8 0, i64 236, i1 false)
+; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i64 [[TMP1:%.*]], 0
+; CHECK-NEXT:    [[A:%.*]] = add i64 [[TMP2:%.*]], [[TMP3:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = add i64 [[TMP2]], [[TMP3]]
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll
new file mode 100644
index 0000000000000..8e36335b3120e
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll
@@ -0,0 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; This test checks that we sucessfully outline identical memcpy var arg
+; intrinsics, but not the var arg instruction itself.
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+declare void @llvm.va_end(i8*)
+
+define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca double, align 8
+  %ap = alloca i8*, align 4
+  %c = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store double %b, double* %b.addr, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32
+  call void @llvm.va_copy(i8* %v, i8* %ap1)
+  call void @llvm.va_end(i8* %ap1)
+  store i32 %0, i32* %c, align 4
+  %tmp = load i32, i32* %c, align 4
+  ret i32 %tmp
+}
+
+define i32 @func2(i32 %a, double %b, i8* %v, ...) nounwind {
+entry:
+  %a.addr = alloca i32, align 4
+  %b.addr = alloca double, align 8
+  %ap = alloca i8*, align 4
+  %c = alloca i32, align 4
+  store i32 %a, i32* %a.addr, align 4
+  store double %b, double* %b.addr, align 8
+  %ap1 = bitcast i8** %ap to i8*
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32
+  call void @llvm.va_copy(i8* %v, i8* %ap1)
+  call void @llvm.va_end(i8* %ap1)
+  store i32 %0, i32* %c, align 4
+  %ap2 = bitcast i8** %ap to i8*
+  %tmp = load i32, i32* %c, align 4
+  ret i32 %tmp
+}
+; CHECK-LABEL: @func1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+; CHECK-NEXT:    [[AP:%.*]] = alloca i8*, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+; CHECK-NEXT:    [[AP1:%.*]] = bitcast i8** [[AP]] to i8*
+; CHECK-NEXT:    call void @llvm.va_start(i8* [[AP1]])
+; CHECK-NEXT:    [[TMP0:%.*]] = va_arg i8** [[AP]], i32
+; CHECK-NEXT:    call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]])
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT:    ret i32 [[TMP]]
+;
+;
+; CHECK-LABEL: @func2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+; CHECK-NEXT:    [[AP:%.*]] = alloca i8*, align 4
+; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+; CHECK-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+; CHECK-NEXT:    [[AP1:%.*]] = bitcast i8** [[AP]] to i8*
+; CHECK-NEXT:    call void @llvm.va_start(i8* [[AP1]])
+; CHECK-NEXT:    [[TMP0:%.*]] = va_arg i8** [[AP]], i32
+; CHECK-NEXT:    call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]])
+; CHECK-NEXT:    [[AP2:%.*]] = bitcast i8** [[AP]] to i8*
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[C]], align 4
+; CHECK-NEXT:    ret i32 [[TMP]]
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    call void @llvm.va_copy(i8* [[TMP0:%.*]], i8* [[TMP1:%.*]])
+; CHECK-NEXT:    call void @llvm.va_end(i8* [[TMP1]])
+; CHECK-NEXT:    store i32 [[TMP2:%.*]], i32* [[TMP3:%.*]], align 4
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
index a8781e8f78af7..fb2cc07921621 100644
--- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
+++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
@@ -1479,7 +1479,8 @@ TEST(IRInstructionMapper, CleanuppadIllegal) {
 // are considered illegal since is extra checking needed to handle the address
 // space checking.
 
-// Checks that a memset instruction is mapped to an illegal value.
+// Checks that a memset instruction is mapped to an illegal value when
+// specified.
 TEST(IRInstructionMapper, MemSetIllegal) {
   StringRef ModuleString = R"(
   declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
@@ -1503,6 +1504,7 @@ TEST(IRInstructionMapper, MemSetIllegal) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIntrinsics = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
@@ -1510,7 +1512,8 @@ TEST(IRInstructionMapper, MemSetIllegal) {
   ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]);
 }
 
-// Checks that a memcpy instruction is mapped to an illegal value.
+// Checks that a memcpy instruction is mapped to an illegal value  when
+// specified.
 TEST(IRInstructionMapper, MemCpyIllegal) {
   StringRef ModuleString = R"(
   declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
@@ -1534,6 +1537,7 @@ TEST(IRInstructionMapper, MemCpyIllegal) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIntrinsics = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
@@ -1542,7 +1546,8 @@ TEST(IRInstructionMapper, MemCpyIllegal) {
   ASSERT_LT(UnsignedVec[2], UnsignedVec[0]);
 }
 
-// Checks that a memmove instruction is mapped to an illegal value.
+// Checks that a memmove instruction is mapped to an illegal value  when
+// specified.
 TEST(IRInstructionMapper, MemMoveIllegal) {
   StringRef ModuleString = R"(
   declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
@@ -1566,6 +1571,7 @@ TEST(IRInstructionMapper, MemMoveIllegal) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIntrinsics = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
@@ -1573,6 +1579,45 @@ TEST(IRInstructionMapper, MemMoveIllegal) {
   ASSERT_LT(UnsignedVec[2], UnsignedVec[0]);
 }
 
+// Checks that mem* instructions are mapped to an legal value when not
+// specified, and that all the intrinsics are marked differently.
+TEST(IRInstructionMapper, MemOpsLegal) {
+  StringRef ModuleString = R"(
+  declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
+  declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
+  declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
+
+  define i64 @function(i64 %x, i64 %z, i64 %n) {
+  entry:
+    %pool = alloca [59 x i64], align 4
+    %tmp = bitcast [59 x i64]* %pool to i8*
+    call void @llvm.memmove.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
+    call void @llvm.memcpy.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
+    call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
+    %cmp3 = icmp eq i64 %n, 0
+    %a = add i64 %x, %z
+    %c = add i64 %x, %z
+    ret i64 0
+  })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIntrinsics = true;
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(9));
+  ASSERT_LT(UnsignedVec[2], UnsignedVec[3]);
+  ASSERT_LT(UnsignedVec[3], UnsignedVec[4]);
+  ASSERT_LT(UnsignedVec[4], UnsignedVec[5]);
+}
+
 // Checks that a variable argument instructions are mapped to an illegal value.
 // We exclude variable argument instructions since variable arguments
 // requires extra checking of the argument list.
@@ -1614,6 +1659,7 @@ TEST(IRInstructionMapper, VarArgsIllegal) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableIntrinsics = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());

From ce94432702bf42a0b95a2693aa47177f37dd0bb3 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Fri, 7 Jan 2022 04:15:07 +0100
Subject: [PATCH 625/946] [clangd] Add designator inlay hints for initializer
 lists.

These make the init lists appear as if designated initialization was used.

Example:
  ExpectedHint{"param: ", "arg"}
becomes
  ExpectedHint{.Label="param: ", .RangeName="arg"}

Differential Revision: https://reviews.llvm.org/D116786
---
 clang-tools-extra/clangd/Config.h             |   1 +
 clang-tools-extra/clangd/ConfigCompile.cpp    |   4 +
 clang-tools-extra/clangd/ConfigFragment.h     |   2 +
 clang-tools-extra/clangd/ConfigYAML.cpp       |   4 +
 clang-tools-extra/clangd/InlayHints.cpp       | 202 +++++++++++++++++-
 clang-tools-extra/clangd/Protocol.cpp         |   2 +
 clang-tools-extra/clangd/Protocol.h           |   6 +
 .../clangd/unittests/InlayHintTests.cpp       | 105 ++++++++-
 8 files changed, 312 insertions(+), 14 deletions(-)

diff --git a/clang-tools-extra/clangd/Config.h b/clang-tools-extra/clangd/Config.h
index 952626db31e42..f84b5ef1ffb58 100644
--- a/clang-tools-extra/clangd/Config.h
+++ b/clang-tools-extra/clangd/Config.h
@@ -130,6 +130,7 @@ struct Config {
     // Whether specific categories of hints are enabled.
     bool Parameters = true;
     bool DeducedTypes = true;
+    bool Designators = false;
   } InlayHints;
 };
 
diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp
index a606b98a2dba4..268f214639b9c 100644
--- a/clang-tools-extra/clangd/ConfigCompile.cpp
+++ b/clang-tools-extra/clangd/ConfigCompile.cpp
@@ -541,6 +541,10 @@ struct FragmentCompiler {
       Out.Apply.push_back([Value(**F.DeducedTypes)](const Params &, Config &C) {
         C.InlayHints.DeducedTypes = Value;
       });
+    if (F.Designators)
+      Out.Apply.push_back([Value(**F.Designators)](const Params &, Config &C) {
+        C.InlayHints.Designators = Value;
+      });
   }
 
   constexpr static llvm::SourceMgr::DiagKind Error = llvm::SourceMgr::DK_Error;
diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h
index d165b6305aa55..0be906036c87c 100644
--- a/clang-tools-extra/clangd/ConfigFragment.h
+++ b/clang-tools-extra/clangd/ConfigFragment.h
@@ -293,6 +293,8 @@ struct Fragment {
     llvm::Optional<Located<bool>> ParameterNames;
     /// Show deduced types for `auto`.
     llvm::Optional<Located<bool>> DeducedTypes;
+    /// Show designators in aggregate initialization.
+    llvm::Optional<Located<bool>> Designators;
   };
   InlayHintsBlock InlayHints;
 };
diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp
index 04c0c633a3bb4..0c758b6d296d4 100644
--- a/clang-tools-extra/clangd/ConfigYAML.cpp
+++ b/clang-tools-extra/clangd/ConfigYAML.cpp
@@ -229,6 +229,10 @@ class Parser {
       if (auto Value = boolValue(N, "DeducedTypes"))
         F.DeducedTypes = *Value;
     });
+    Dict.handle("Designators", [&](Node &N) {
+      if (auto Value = boolValue(N, "Designators"))
+        F.Designators = *Value;
+    });
     Dict.parse(N);
   }
 
diff --git a/clang-tools-extra/clangd/InlayHints.cpp b/clang-tools-extra/clangd/InlayHints.cpp
index e534faa80c4bd..671f9a151d40f 100644
--- a/clang-tools-extra/clangd/InlayHints.cpp
+++ b/clang-tools-extra/clangd/InlayHints.cpp
@@ -13,6 +13,7 @@
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/ScopeExit.h"
 
 namespace clang {
 namespace clangd {
@@ -21,6 +22,167 @@ namespace {
 // For now, inlay hints are always anchored at the left or right of their range.
 enum class HintSide { Left, Right };
 
+// Helper class to iterate over the designator names of an aggregate type.
+//
+// For an array type, yields [0], [1], [2]...
+// For aggregate classes, yields null for each base, then .field1, .field2, ...
+class AggregateDesignatorNames {
+public:
+  AggregateDesignatorNames(QualType T) {
+    if (!T.isNull()) {
+      T = T.getCanonicalType();
+      if (T->isArrayType()) {
+        IsArray = true;
+        Valid = true;
+        return;
+      }
+      if (const RecordDecl *RD = T->getAsRecordDecl()) {
+        Valid = true;
+        FieldsIt = RD->field_begin();
+        FieldsEnd = RD->field_end();
+        if (const auto *CRD = llvm::dyn_cast<CXXRecordDecl>(RD)) {
+          BasesIt = CRD->bases_begin();
+          BasesEnd = CRD->bases_end();
+          Valid = CRD->isAggregate();
+        }
+        OneField = Valid && BasesIt == BasesEnd && FieldsIt != FieldsEnd &&
+                   std::next(FieldsIt) == FieldsEnd;
+      }
+    }
+  }
+  // Returns false if the type was not an aggregate.
+  operator bool() { return Valid; }
+  // Advance to the next element in the aggregate.
+  void next() {
+    if (IsArray)
+      ++Index;
+    else if (BasesIt != BasesEnd)
+      ++BasesIt;
+    else if (FieldsIt != FieldsEnd)
+      ++FieldsIt;
+  }
+  // Print the designator to Out.
+  // Returns false if we could not produce a designator for this element.
+  bool append(std::string &Out, bool ForSubobject) {
+    if (IsArray) {
+      Out.push_back('[');
+      Out.append(std::to_string(Index));
+      Out.push_back(']');
+      return true;
+    }
+    if (BasesIt != BasesEnd)
+      return false; // Bases can't be designated. Should we make one up?
+    if (FieldsIt != FieldsEnd) {
+      llvm::StringRef FieldName;
+      if (const IdentifierInfo *II = FieldsIt->getIdentifier())
+        FieldName = II->getName();
+
+      // For certain objects, their subobjects may be named directly.
+      if (ForSubobject &&
+          (FieldsIt->isAnonymousStructOrUnion() ||
+           // std::array<int,3> x = {1,2,3}. Designators not strictly valid!
+           (OneField && isReservedName(FieldName))))
+        return true;
+
+      if (!FieldName.empty() && !isReservedName(FieldName)) {
+        Out.push_back('.');
+        Out.append(FieldName.begin(), FieldName.end());
+        return true;
+      }
+      return false;
+    }
+    return false;
+  }
+
+private:
+  bool Valid = false;
+  bool IsArray = false;
+  bool OneField = false; // e.g. std::array { T __elements[N]; }
+  unsigned Index = 0;
+  CXXRecordDecl::base_class_const_iterator BasesIt;
+  CXXRecordDecl::base_class_const_iterator BasesEnd;
+  RecordDecl::field_iterator FieldsIt;
+  RecordDecl::field_iterator FieldsEnd;
+};
+
+// Collect designator labels describing the elements of an init list.
+//
+// This function contributes the designators of some (sub)object, which is
+// represented by the semantic InitListExpr Sem.
+// This includes any nested subobjects, but *only* if they are part of the same
+// original syntactic init list (due to brace elision).
+// In other words, it may descend into subobjects but not written init-lists.
+//
+// For example: struct Outer { Inner a,b; }; struct Inner { int x, y; }
+//              Outer o{{1, 2}, 3};
+// This function will be called with Sem = { {1, 2}, {3, ImplicitValue} }
+// It should generate designators '.a:' and '.b.x:'.
+// '.a:' is produced directly without recursing into the written sublist.
+// (The written sublist will have a separate collectDesignators() call later).
+// Recursion with Prefix='.b' and Sem = {3, ImplicitValue} produces '.b.x:'.
+void collectDesignators(const InitListExpr *Sem,
+                        llvm::DenseMap<SourceLocation, std::string> &Out,
+                        const llvm::DenseSet<SourceLocation> &NestedBraces,
+                        std::string &Prefix) {
+  if (!Sem || Sem->isTransparent())
+    return;
+  assert(Sem->isSemanticForm());
+
+  // The elements of the semantic form all correspond to direct subobjects of
+  // the aggregate type. `Fields` iterates over these subobject names.
+  AggregateDesignatorNames Fields(Sem->getType());
+  if (!Fields)
+    return;
+  for (const Expr *Init : Sem->inits()) {
+    auto Next = llvm::make_scope_exit([&, Size(Prefix.size())] {
+      Fields.next();       // Always advance to the next subobject name.
+      Prefix.resize(Size); // Erase any designator we appended.
+    });
+    if (llvm::isa<ImplicitValueInitExpr>(Init))
+      continue; // a "hole" for a subobject that was not explicitly initialized
+
+    const auto *BraceElidedSubobject = llvm::dyn_cast<InitListExpr>(Init);
+    if (BraceElidedSubobject &&
+        NestedBraces.contains(BraceElidedSubobject->getLBraceLoc()))
+      BraceElidedSubobject = nullptr; // there were braces!
+
+    if (!Fields.append(Prefix, BraceElidedSubobject != nullptr))
+      continue; // no designator available for this subobject
+    if (BraceElidedSubobject) {
+      // If the braces were elided, this aggregate subobject is initialized
+      // inline in the same syntactic list.
+      // Descend into the semantic list describing the subobject.
+      // (NestedBraces are still correct, they're from the same syntactic list).
+      collectDesignators(BraceElidedSubobject, Out, NestedBraces, Prefix);
+      continue;
+    }
+    Out.try_emplace(Init->getBeginLoc(), Prefix);
+  }
+}
+
+// Get designators describing the elements of a (syntactic) init list.
+// This does not produce designators for any explicitly-written nested lists.
+llvm::DenseMap<SourceLocation, std::string>
+getDesignators(const InitListExpr *Syn) {
+  assert(Syn->isSyntacticForm());
+
+  // collectDesignators needs to know which InitListExprs in the semantic tree
+  // were actually written, but InitListExpr::isExplicit() lies.
+  // Instead, record where braces of sub-init-lists occur in the syntactic form.
+  llvm::DenseSet<SourceLocation> NestedBraces;
+  for (const Expr *Init : Syn->inits())
+    if (auto *Nested = llvm::dyn_cast<InitListExpr>(Init))
+      NestedBraces.insert(Nested->getLBraceLoc());
+
+  // Traverse the semantic form to find the designators.
+  // We use their SourceLocation to correlate with the syntactic form later.
+  llvm::DenseMap<SourceLocation, std::string> Designators;
+  std::string EmptyPrefix;
+  collectDesignators(Syn->isSemanticForm() ? Syn : Syn->getSemanticForm(),
+                     Designators, NestedBraces, EmptyPrefix);
+  return Designators;
+}
+
 class InlayHintVisitor : public RecursiveASTVisitor<InlayHintVisitor> {
 public:
   InlayHintVisitor(std::vector<InlayHint> &Results, ParsedAST &AST,
@@ -127,6 +289,30 @@ class InlayHintVisitor : public RecursiveASTVisitor<InlayHintVisitor> {
     return true;
   }
 
+  bool VisitInitListExpr(InitListExpr *Syn) {
+    // We receive the syntactic form here (shouldVisitImplicitCode() is false).
+    // This is the one we will ultimately attach designators to.
+    // It may have subobject initializers inlined without braces. The *semantic*
+    // form of the init-list has nested init-lists for these.
+    // getDesignators will look at the semantic form to determine the labels.
+    assert(Syn->isSyntacticForm() && "RAV should not visit implicit code!");
+    if (!Cfg.InlayHints.Designators)
+      return true;
+    if (Syn->isIdiomaticZeroInitializer(AST.getLangOpts()))
+      return true;
+    llvm::DenseMap<SourceLocation, std::string> Designators =
+        getDesignators(Syn);
+    for (const Expr *Init : Syn->inits()) {
+      if (llvm::isa<DesignatedInitExpr>(Init))
+        continue;
+      auto It = Designators.find(Init->getBeginLoc());
+      if (It != Designators.end() &&
+          !isPrecededByParamNameComment(Init, It->second))
+        addDesignatorHint(Init->getSourceRange(), It->second);
+    }
+    return true;
+  }
+
   // FIXME: Handle RecoveryExpr to try to hint some invalid calls.
 
 private:
@@ -229,12 +415,16 @@ class InlayHintVisitor : public RecursiveASTVisitor<InlayHintVisitor> {
     // Check for comment ending.
     if (!SourcePrefix.consume_back("*/"))
       return false;
-    // Allow whitespace and "=" at end of comment.
-    SourcePrefix = SourcePrefix.rtrim().rtrim('=').rtrim();
+    // Ignore some punctuation and whitespace around comment.
+    // In particular this allows designators to match nicely.
+    llvm::StringLiteral IgnoreChars = " =.";
+    SourcePrefix = SourcePrefix.rtrim(IgnoreChars);
+    ParamName = ParamName.trim(IgnoreChars);
     // Other than that, the comment must contain exactly ParamName.
     if (!SourcePrefix.consume_back(ParamName))
       return false;
-    return SourcePrefix.rtrim().endswith("/*");
+    SourcePrefix = SourcePrefix.rtrim(IgnoreChars);
+    return SourcePrefix.endswith("/*");
   }
 
   // If "E" spells a single unqualified identifier, return that name.
@@ -341,6 +531,7 @@ class InlayHintVisitor : public RecursiveASTVisitor<InlayHintVisitor> {
     break
       CHECK_KIND(ParameterHint, Parameters);
       CHECK_KIND(TypeHint, DeducedTypes);
+      CHECK_KIND(DesignatorHint, Designators);
 #undef CHECK_KIND
     }
 
@@ -378,6 +569,11 @@ class InlayHintVisitor : public RecursiveASTVisitor<InlayHintVisitor> {
                    TypeName, /*Suffix=*/"");
   }
 
+  void addDesignatorHint(SourceRange R, llvm::StringRef Text) {
+    addInlayHint(R, HintSide::Left, InlayHintKind::DesignatorHint,
+                 /*Prefix=*/"", Text, /*Suffix=*/"=");
+  }
+
   std::vector<InlayHint> &Results;
   ASTContext &AST;
   const Config &Cfg;
diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp
index 42f452f74f970..4dee1373a1496 100644
--- a/clang-tools-extra/clangd/Protocol.cpp
+++ b/clang-tools-extra/clangd/Protocol.cpp
@@ -1326,6 +1326,8 @@ llvm::json::Value toJSON(InlayHintKind K) {
     return "parameter";
   case InlayHintKind::TypeHint:
     return "type";
+  case InlayHintKind::DesignatorHint:
+    return "designator";
   }
   llvm_unreachable("Unknown clang.clangd.InlayHintKind");
 }
diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h
index d7ca580dceffc..1945c766608be 100644
--- a/clang-tools-extra/clangd/Protocol.h
+++ b/clang-tools-extra/clangd/Protocol.h
@@ -1538,6 +1538,12 @@ enum class InlayHintKind {
   /// which shows the deduced type of the variable.
   TypeHint,
 
+  /// A hint before an element of an aggregate braced initializer list,
+  /// indicating what it is initializing.
+  ///   Pair{^1, ^2};
+  /// Uses designator syntax, e.g. `.first:`.
+  DesignatorHint,
+
   /// Other ideas for hints that are not currently implemented:
   ///
   /// * Chaining hints, showing the types of intermediate expressions
diff --git a/clang-tools-extra/clangd/unittests/InlayHintTests.cpp b/clang-tools-extra/clangd/unittests/InlayHintTests.cpp
index 992ec0e012aee..6c3ac0d62e0e5 100644
--- a/clang-tools-extra/clangd/unittests/InlayHintTests.cpp
+++ b/clang-tools-extra/clangd/unittests/InlayHintTests.cpp
@@ -13,21 +13,22 @@
 #include "TestWorkspace.h"
 #include "XRefs.h"
 #include "support/Context.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 namespace clang {
 namespace clangd {
 
-std::ostream &operator<<(std::ostream &Stream, const InlayHint &Hint) {
-  return Stream << Hint.label;
+llvm::raw_ostream &operator<<(llvm::raw_ostream &Stream,
+                              const InlayHint &Hint) {
+  return Stream << Hint.label << "@" << Hint.range;
 }
 
 namespace {
 
 using ::testing::ElementsAre;
 using ::testing::IsEmpty;
-using ::testing::UnorderedElementsAre;
 
 std::vector<InlayHint> hintsOfKind(ParsedAST &AST, InlayHintKind Kind) {
   std::vector<InlayHint> Result;
@@ -45,17 +46,24 @@ struct ExpectedHint {
   std::string RangeName;
   HintSide Side = Left;
 
-  friend std::ostream &operator<<(std::ostream &Stream,
-                                  const ExpectedHint &Hint) {
-    return Stream << Hint.RangeName << ": " << Hint.Label;
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &Stream,
+                                       const ExpectedHint &Hint) {
+    return Stream << Hint.Label << "@$" << Hint.RangeName;
   }
 };
 
-MATCHER_P2(HintMatcher, Expected, Code, "") {
-  return arg.label == Expected.Label &&
-         arg.range == Code.range(Expected.RangeName) &&
-         arg.position ==
-             ((Expected.Side == Left) ? arg.range.start : arg.range.end);
+MATCHER_P2(HintMatcher, Expected, Code, llvm::to_string(Expected)) {
+  if (arg.label != Expected.Label) {
+    *result_listener << "label is " << arg.label;
+    return false;
+  }
+  if (arg.range != Code.range(Expected.RangeName)) {
+    *result_listener << "range is " << arg.label << " but $"
+                     << Expected.RangeName << " is "
+                     << llvm::to_string(Code.range(Expected.RangeName));
+    return false;
+  }
+  return true;
 }
 
 MATCHER_P(labelIs, Label, "") { return arg.label == Label; }
@@ -64,6 +72,7 @@ Config noHintsConfig() {
   Config C;
   C.InlayHints.Parameters = false;
   C.InlayHints.DeducedTypes = false;
+  C.InlayHints.Designators = false;
   return C;
 }
 
@@ -100,6 +109,15 @@ void assertTypeHints(llvm::StringRef AnnotatedSource,
   assertHints(InlayHintKind::TypeHint, AnnotatedSource, Expected...);
 }
 
+template <typename... ExpectedHints>
+void assertDesignatorHints(llvm::StringRef AnnotatedSource,
+                           ExpectedHints... Expected) {
+  Config Cfg;
+  Cfg.InlayHints.Designators = true;
+  WithContextValue WithCfg(Config::Key, std::move(Cfg));
+  assertHints(InlayHintKind::DesignatorHint, AnnotatedSource, Expected...);
+}
+
 TEST(ParameterHints, Smoke) {
   assertParameterHints(R"cpp(
     void foo(int param);
@@ -658,6 +676,71 @@ TEST(TypeHints, Deduplication) {
                   ExpectedHint{": int", "var"});
 }
 
+TEST(DesignatorHints, Basic) {
+  assertDesignatorHints(R"cpp(
+    struct S { int x, y, z; };
+    S s {$x[[1]], $y[[2+2]]};
+
+    int x[] = {$0[[0]], $1[[1]]};
+  )cpp",
+                        ExpectedHint{".x=", "x"}, ExpectedHint{".y=", "y"},
+                        ExpectedHint{"[0]=", "0"}, ExpectedHint{"[1]=", "1"});
+}
+
+TEST(DesignatorHints, Nested) {
+  assertDesignatorHints(R"cpp(
+    struct Inner { int x, y; };
+    struct Outer { Inner a, b; };
+    Outer o{ $a[[{ $x[[1]], $y[[2]] }]], $bx[[3]] };
+  )cpp",
+                        ExpectedHint{".a=", "a"}, ExpectedHint{".x=", "x"},
+                        ExpectedHint{".y=", "y"}, ExpectedHint{".b.x=", "bx"});
+}
+
+TEST(DesignatorHints, AnonymousRecord) {
+  assertDesignatorHints(R"cpp(
+    struct S {
+      union {
+        struct {
+          struct {
+            int y;
+          };
+        } x;
+      };
+    };
+    S s{$xy[[42]]};
+  )cpp",
+                        ExpectedHint{".x.y=", "xy"});
+}
+
+TEST(DesignatorHints, Suppression) {
+  assertDesignatorHints(R"cpp(
+    struct Point { int a, b, c, d, e, f, g, h; };
+    Point p{/*a=*/1, .c=2, /* .d = */3, $e[[4]]};
+  )cpp",
+                        ExpectedHint{".e=", "e"});
+}
+
+TEST(DesignatorHints, StdArray) {
+  // Designators for std::array should be [0] rather than .__elements[0].
+  // While technically correct, the designator is useless and horrible to read.
+  assertDesignatorHints(R"cpp(
+    template <typename T, int N> struct Array { T __elements[N]; };
+    Array<int, 2> x = {$0[[0]], $1[[1]]};
+  )cpp",
+                        ExpectedHint{"[0]=", "0"}, ExpectedHint{"[1]=", "1"});
+}
+
+TEST(DesignatorHints, OnlyAggregateInit) {
+  assertDesignatorHints(R"cpp(
+    struct Copyable { int x; } c;
+    Copyable d{c};
+
+    struct Constructible { Constructible(int x); };
+    Constructible x{42};
+  )cpp" /*no designator hints expected (but param hints!)*/);
+}
+
 TEST(InlayHints, RestrictRange) {
   Annotations Code(R"cpp(
     auto a = false;

From d81a3c51e7f76c4b3f7ed687f82a019168aad2da Mon Sep 17 00:00:00 2001
From: Rob Suderman <rob.suderman@gmail.com>
Date: Tue, 25 Jan 2022 15:54:49 -0800
Subject: [PATCH 626/946] [mlir] Fold tensor.reshape operations into
 tensor.from_elements.

There is not much of a benefit to reshape a from element vs reloading it.
Updated to progagate shape manipulations into the output type of
tensor.from_elements.

Reviewed By: NatashaKnk

Differential Revision: https://reviews.llvm.org/D118201
---
 mlir/lib/Dialect/Tensor/IR/TensorOps.cpp   | 29 ++++++++++++++++++++--
 mlir/test/Dialect/Tensor/canonicalize.mlir | 22 ++++++++++++++++
 2 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index 42f57a9cf99bd..5ae13a613c427 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -798,20 +798,45 @@ struct FoldReshapeWithConstant : OpRewritePattern<TensorReshapeOp> {
   }
 };
 
+/// Reshape of a FromElements can be replaced with a FromElements of the result
+/// type
+template <typename TensorReshapeOp>
+struct FoldReshapeWithFromElements : OpRewritePattern<TensorReshapeOp> {
+  using OpRewritePattern<TensorReshapeOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(TensorReshapeOp reshapeOp,
+                                PatternRewriter &rewriter) const override {
+    auto fromElements =
+        reshapeOp.src().template getDefiningOp<FromElementsOp>();
+    if (!fromElements)
+      return failure();
+
+    auto shapedTy = reshapeOp.getType().template cast<ShapedType>();
+
+    if (!shapedTy.hasStaticShape())
+      return failure();
+
+    rewriter.replaceOpWithNewOp<FromElementsOp>(reshapeOp, reshapeOp.getType(),
+                                                fromElements.elements());
+    return success();
+  }
+};
+
 } // namespace
 
 void ExpandShapeOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                 MLIRContext *context) {
   results.add<CollapseReshapeOps<ExpandShapeOp>,
               CollapseMixedReshapeOps<ExpandShapeOp, CollapseShapeOp>,
-              FoldReshapeWithConstant<ExpandShapeOp>>(context);
+              FoldReshapeWithConstant<ExpandShapeOp>,
+              FoldReshapeWithFromElements<ExpandShapeOp>>(context);
 }
 
 void CollapseShapeOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                   MLIRContext *context) {
   results.add<CollapseReshapeOps<CollapseShapeOp>,
               CollapseMixedReshapeOps<CollapseShapeOp, ExpandShapeOp>,
-              FoldReshapeWithConstant<CollapseShapeOp>>(context);
+              FoldReshapeWithConstant<CollapseShapeOp>,
+              FoldReshapeWithFromElements<CollapseShapeOp>>(context);
 }
 
 OpFoldResult ExpandShapeOp::fold(ArrayRef<Attribute> operands) {
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
index 10d39132a1126..e0ea5d777acb8 100644
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -1178,3 +1178,25 @@ func @pad_nofold_static_zero(%arg0: tensor<?x?x?xf32>, %pad_value: f32) -> tenso
 
   return %0 : tensor<2x3x4xf32>
 }
+
+// -----
+
+// CHECK-LABEL: func @fold_collapse_shape_from_elements
+func @fold_collapse_shape_from_elements(%arg0: i32) -> tensor<i32> {
+  // CHECK: %[[FROM:.+]] = tensor.from_elements %arg0 : tensor<i32>
+  // CHECK: return %[[FROM]] : tensor<i32>
+  %0 = tensor.from_elements %arg0 : tensor<1xi32>
+  %1 = tensor.collapse_shape %0 [] : tensor<1xi32> into tensor<i32>
+  return %1 : tensor<i32>
+}
+
+// -----
+
+// CHECK-LABEL: func @fold_expand_shape_from_elements
+func @fold_expand_shape_from_elements(%arg0: i32) -> tensor<1xi32> {
+  // CHECK: %[[FROM:.+]] = tensor.from_elements %arg0 : tensor<1xi32>
+  // CHECK: return %[[FROM]] : tensor<1xi32>
+  %0 = tensor.from_elements %arg0 : tensor<i32>
+  %1 = tensor.expand_shape %0 [] : tensor<i32> into tensor<1xi32>
+  return %1 : tensor<1xi32>
+}

From fe30370b007e7efa1bd1f39a3189b27ae4e5fcbe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Bolvansk=C3=BD?= <david.bolvansky@gmail.com>
Date: Tue, 25 Jan 2022 23:21:15 +0100
Subject: [PATCH 627/946] Reland "[AlwaysInliner] Enable call site inlining to
 make flatten attribute working again (#53360)"

---
 clang/test/CodeGen/flatten.c                 |  6 ------
 clang/test/CodeGenCXX/flatten.cpp            |  4 ----
 llvm/lib/Transforms/IPO/AlwaysInliner.cpp    | 16 +++++++++-------
 llvm/test/Transforms/Inline/always-inline.ll | 18 +++++++-----------
 4 files changed, 16 insertions(+), 28 deletions(-)

diff --git a/clang/test/CodeGen/flatten.c b/clang/test/CodeGen/flatten.c
index 287d4f2a46b65..4e762223de486 100644
--- a/clang/test/CodeGen/flatten.c
+++ b/clang/test/CodeGen/flatten.c
@@ -1,9 +1,3 @@
-// UNSUPPORTED: experimental-new-pass-manager
-// Currently, different code seems to be intentionally generated under the new
-// PM since we alwaysinline functions and not callsites under new PM.
-// Under new PM, f() will not be inlined from g() since f is not marked as
-// alwaysinline.
-
 // RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o - | FileCheck %s
 
 void f(void) {}
diff --git a/clang/test/CodeGenCXX/flatten.cpp b/clang/test/CodeGenCXX/flatten.cpp
index 7a6484591aaa0..e988d6d726dd7 100644
--- a/clang/test/CodeGenCXX/flatten.cpp
+++ b/clang/test/CodeGenCXX/flatten.cpp
@@ -1,7 +1,3 @@
-// UNSUPPORTED: experimental-new-pass-manager
-// See the comment for CodeGen/flatten.c on why this is unsupported with the new
-// PM.
-
 // RUN: %clang_cc1 -triple=x86_64-linux-gnu -std=c++11 %s -emit-llvm -o - | FileCheck %s
 
 void f(void) {}
diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
index 7acc9b266ad82..a6d9ce1033f3c 100644
--- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
+++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -54,13 +54,13 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
     if (F.isPresplitCoroutine())
       continue;
 
-    if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) &&
-        isInlineViable(F).isSuccess()) {
+    if (!F.isDeclaration() && isInlineViable(F).isSuccess()) {
       Calls.clear();
 
       for (User *U : F.users())
         if (auto *CB = dyn_cast<CallBase>(U))
-          if (CB->getCalledFunction() == &F)
+          if (CB->getCalledFunction() == &F &&
+              CB->hasFnAttr(Attribute::AlwaysInline))
             Calls.insert(CB);
 
       for (CallBase *CB : Calls) {
@@ -92,10 +92,12 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M,
         Changed = true;
       }
 
-      // Remember to try and delete this function afterward. This both avoids
-      // re-walking the rest of the module and avoids dealing with any iterator
-      // invalidation issues while deleting functions.
-      InlinedFunctions.push_back(&F);
+      if (F.hasFnAttribute(Attribute::AlwaysInline)) {
+        // Remember to try and delete this function afterward. This both avoids
+        // re-walking the rest of the module and avoids dealing with any
+        // iterator invalidation issues while deleting functions.
+        InlinedFunctions.push_back(&F);
+      }
     }
   }
 
diff --git a/llvm/test/Transforms/Inline/always-inline.ll b/llvm/test/Transforms/Inline/always-inline.ll
index 0fcf956199c46..f947bdbd87347 100644
--- a/llvm/test/Transforms/Inline/always-inline.ll
+++ b/llvm/test/Transforms/Inline/always-inline.ll
@@ -1,14 +1,11 @@
-; RUN: opt < %s -inline-threshold=0 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
+; RUN: opt < %s -inline-threshold=0 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK
 ;
 ; Ensure the threshold has no impact on these decisions.
-; RUN: opt < %s -inline-threshold=20000000 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
-; RUN: opt < %s -inline-threshold=-20000000 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL
+; RUN: opt < %s -inline-threshold=20000000 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK
+; RUN: opt < %s -inline-threshold=-20000000 -always-inline -enable-new-pm=0 -S | FileCheck %s --check-prefix=CHECK
 ;
 ; The new pass manager doesn't re-use any threshold based infrastructure for
-; the always inliner, but test that we get the correct result. The new PM
-; always inliner also doesn't support inlining call-site alwaysinline
-; annotations. It isn't clear that this is a reasonable use case for
-; 'alwaysinline'.
+; the always inliner, but test that we get the correct result.
 ; RUN: opt < %s -inline-threshold=0 -passes=always-inline -S | FileCheck %s --check-prefix=CHECK
 ; RUN: opt < %s -inline-threshold=20000000 -passes=always-inline -S | FileCheck %s --check-prefix=CHECK
 ; RUN: opt < %s -inline-threshold=-20000000 -passes=always-inline -S | FileCheck %s --check-prefix=CHECK
@@ -146,10 +143,9 @@ define i32 @inner7() {
   ret i32 1
 }
 define i32 @outer7() {
-; CHECK-CALL-LABEL: @outer7(
-; CHECK-CALL-NOT: call
-; CHECK-CALL: ret
-
+; CHECK-LABEL: @outer7(
+; CHECK-NOT: call
+; CHECK: ret
    %r = call i32 @inner7() alwaysinline
    ret i32 %r
 }

From cf730d8ce1341ba593144df2e2bc8411238e04c3 Mon Sep 17 00:00:00 2001
From: Kirill Stoimenov <kstoimenov@google.com>
Date: Tue, 25 Jan 2022 21:33:55 +0000
Subject: [PATCH 628/946] [ASan] Not linking asan_static library for DSO.

Without this change DSOs fail to link because of missing asan_report_(load|store)n functions.

Reviewed By: kda

Differential Revision: https://reviews.llvm.org/D118184
---
 clang/lib/Driver/ToolChains/CommonArgs.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 3897f67d1fe64..5711998f90434 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -826,16 +826,16 @@ collectSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
   if (SanArgs.needsStatsRt() && SanArgs.linkRuntimes())
     StaticRuntimes.push_back("stats_client");
 
-  // Always link the static runtime regardless of DSO or executable.
-  if (SanArgs.needsAsanRt())
-    HelperStaticRuntimes.push_back("asan_static");
-
   // Collect static runtimes.
   if (Args.hasArg(options::OPT_shared)) {
     // Don't link static runtimes into DSOs.
     return;
   }
 
+  // Always link the static runtime for executable.
+  if (SanArgs.needsAsanRt())
+    HelperStaticRuntimes.push_back("asan_static");
+
   // Each static runtime that has a DSO counterpart above is excluded below,
   // but runtimes that exist only as static are not affected by needsSharedRt.
 

From 9c2daf648c9b3efb93a4361624890a2c31e8e243 Mon Sep 17 00:00:00 2001
From: Andrew Litteken <andrew.litteken@gmail.com>
Date: Tue, 25 Jan 2022 18:18:27 -0600
Subject: [PATCH 629/946] Revert "[IRSim][IROutliner] Allowing Intrinsic Calls
 to be Used in Similarity Matching and Outlined Regions"

This reverts commit 8de76bd569732acae6a10fdcb0152a49f7d4cd39.

Reverting due to failure of different-intrinsics.ll on lld-x86_64-win buildbot.
---
 .../llvm/Analysis/IRSimilarityIdentifier.h    | 53 +----------
 llvm/include/llvm/Transforms/IPO/IROutliner.h |  6 +-
 llvm/lib/Analysis/IRSimilarityIdentifier.cpp  | 13 +--
 llvm/lib/Transforms/IPO/IROutliner.cpp        |  9 +-
 .../IROutliner/different-intrinsics.ll        | 92 -------------------
 .../Transforms/IROutliner/illegal-memcpy.ll   |  2 +-
 .../Transforms/IROutliner/illegal-memmove.ll  |  2 +-
 .../Transforms/IROutliner/illegal-memset.ll   |  2 +-
 .../Transforms/IROutliner/illegal-vaarg.ll    |  2 +-
 .../Transforms/IROutliner/outline-memcpy.ll   | 60 ------------
 .../Transforms/IROutliner/outline-memmove.ll  | 60 ------------
 .../Transforms/IROutliner/outline-memset.ll   | 55 -----------
 .../IROutliner/outline-vaarg-intrinsic.ll     | 90 ------------------
 .../Analysis/IRSimilarityIdentifierTest.cpp   | 52 +----------
 14 files changed, 18 insertions(+), 480 deletions(-)
 delete mode 100644 llvm/test/Transforms/IROutliner/different-intrinsics.ll
 delete mode 100644 llvm/test/Transforms/IROutliner/outline-memcpy.ll
 delete mode 100644 llvm/test/Transforms/IROutliner/outline-memmove.ll
 delete mode 100644 llvm/test/Transforms/IROutliner/outline-memset.ll
 delete mode 100644 llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll

diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 73dd3219aa132..966bf02d128ec 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -252,30 +252,7 @@ struct IRInstructionData
           llvm::hash_value(ID.Inst->getType()),
           llvm::hash_value(ID.getPredicate()),
           llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
-
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(ID.Inst)) {
-      // To hash intrinsics, we use the opcode, and types like the other
-      // instructions, but also, the Intrinsic ID, and the Name of the
-      // intrinsic.
-      Intrinsic::ID IntrinsicID = II->getIntrinsicID();
-      FunctionType *FT = II->getFunctionType();
-      std::string Name;
-      // If there is an overloaded name, we have to use the complex version
-      // of getName to get the entire string.
-      if (Intrinsic::isOverloaded(IntrinsicID))
-        Name =
-            Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
-      // If there is not an overloaded name, we only need to use this version.
-      else
-        Name = Intrinsic::getName(IntrinsicID).str();
-      return llvm::hash_combine(
-          llvm::hash_value(ID.Inst->getOpcode()),
-          llvm::hash_value(ID.Inst->getType()), llvm::hash_value(IntrinsicID),
-          llvm::hash_value(Name),
-          llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
-    }
-
-    if (isa<CallInst>(ID.Inst)) {
+    else if (CallInst *CI = dyn_cast<CallInst>(ID.Inst)) {
       std::string FunctionName = *ID.CalleeName;
       return llvm::hash_combine(
           llvm::hash_value(ID.Inst->getOpcode()),
@@ -283,7 +260,6 @@ struct IRInstructionData
           llvm::hash_value(ID.Inst->getType()), llvm::hash_value(FunctionName),
           llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
     }
-
     return llvm::hash_combine(
         llvm::hash_value(ID.Inst->getOpcode()),
         llvm::hash_value(ID.Inst->getType()),
@@ -536,17 +512,8 @@ struct IRInstructionMapper {
     // analyzed for similarity as it has no bearing on the outcome of the
     // program.
     InstrType visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return Invisible; }
-    InstrType visitIntrinsicInst(IntrinsicInst &II) {
-      // These are disabled due to complications in the CodeExtractor when
-      // outlining these instructions.  For instance, It is unclear what we
-      // should do when moving only the start or end lifetime instruction into
-      // an outlined function. Also, assume-like intrinsics could be removed
-      // from the region, removing arguments, causing discrepencies in the
-      // number of inputs between different regions.
-      if (II.isLifetimeStartOrEnd() || II.isAssumeLikeIntrinsic())
-        return Illegal;
-      return EnableIntrinsics ? Legal : Illegal;
-    }
+    // TODO: Handle specific intrinsics.
+    InstrType visitIntrinsicInst(IntrinsicInst &II) { return Illegal; }
     // We only allow call instructions where the function has a name and
     // is not an indirect call.
     InstrType visitCallInst(CallInst &CI) {
@@ -573,10 +540,6 @@ struct IRInstructionMapper {
     // The flag variable that lets the classifier know whether we should
     // allow indirect calls to be considered legal instructions.
     bool EnableIndirectCalls = false;
-
-    // Flag that lets the classifier know whether we should allow intrinsics to
-    // be checked for similarity.
-    bool EnableIntrinsics = false;
   };
 
   /// Maps an Instruction to a member of InstrType.
@@ -963,12 +926,10 @@ class IRSimilarityIdentifier {
 public:
   IRSimilarityIdentifier(bool MatchBranches = true,
                          bool MatchIndirectCalls = true,
-                         bool MatchCallsWithName = false,
-                         bool MatchIntrinsics = true)
+                         bool MatchCallsWithName = false)
       : Mapper(&InstDataAllocator, &InstDataListAllocator),
         EnableBranches(MatchBranches), EnableIndirectCalls(MatchIndirectCalls),
-        EnableMatchingCallsByName(MatchCallsWithName),
-        EnableIntrinsics(MatchIntrinsics) {}
+        EnableMatchingCallsByName(MatchCallsWithName) {}
 
 private:
   /// Map the instructions in the module to unsigned integers, using mapping
@@ -1057,10 +1018,6 @@ class IRSimilarityIdentifier {
   /// convention, attributes and type signature.
   bool EnableMatchingCallsByName = true;
 
-  /// The flag variable that marks whether we should check intrinsics for
-  /// similarity.
-  bool EnableIntrinsics = true;
-
   /// The SimilarityGroups found with the most recent run of \ref
   /// findSimilarity. None if there is no recent run.
   Optional<SimilarityGroupList> SimilarityCandidates;
diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index 0364fba86581d..9799737a529e3 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -360,7 +360,7 @@ class IROutliner {
     bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
     // TODO: Handle specific intrinsics individually from those that can be
     // handled.
-    bool IntrinsicInst(IntrinsicInst &II) { return EnableIntrinsics; }
+    bool IntrinsicInst(IntrinsicInst &II) { return false; }
     // We only handle CallInsts that are not indirect, since we cannot guarantee
     // that they have a name in these cases.
     bool visitCallInst(CallInst &CI) {
@@ -396,10 +396,6 @@ class IROutliner {
     // The flag variable that marks whether we should allow indirect calls
     // to be outlined.
     bool EnableIndirectCalls = true;
-
-    // The flag variable that marks whether we should allow intrinsics
-    // instructions to be outlined.
-    bool EnableIntrinsics = false;
   };
 
   /// A InstVisitor used to exclude certain instructions from being outlined.
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index aa5aadae6032c..8c3c0400d6015 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -29,6 +29,7 @@ cl::opt<bool>
                     cl::ReallyHidden,
                     cl::desc("disable similarity matching, and outlining, "
                              "across branches for debugging purposes."));
+} // namespace llvm
 
 cl::opt<bool>
     DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
@@ -40,9 +41,6 @@ cl::opt<bool>
                      cl::desc("only allow matching call instructions if the "
                               "name and type signature match."));
 
-cl::opt<bool>
-    DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
-                      cl::desc("Don't match or outline intrinsics"));
 
 IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
                                      IRInstructionDataList &IDList)
@@ -50,8 +48,6 @@ IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
   initializeInstruction();
 }
 
-} // namespace llvm
-
 void IRInstructionData::initializeInstruction() {
   // We check for whether we have a comparison instruction.  If it is, we
   // find the "less than" version of the predicate for consistency for
@@ -1107,7 +1103,6 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
-  Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
 
   populateMapper(Modules, InstrList, IntegerMapping);
   findCandidates(InstrList, IntegerMapping);
@@ -1120,7 +1115,6 @@ SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
-  Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
 
   std::vector<IRInstructionData *> InstrList;
   std::vector<unsigned> IntegerMapping;
@@ -1142,7 +1136,7 @@ IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
 
 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
   IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
-                                        MatchCallsByName, !DisableIntrinsics));
+                                        MatchCallsByName));
   return false;
 }
 
@@ -1159,8 +1153,9 @@ bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
 AnalysisKey IRSimilarityAnalysis::Key;
 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
                                                  ModuleAnalysisManager &) {
+
   auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
-                                     MatchCallsByName, !DisableIntrinsics);
+                                     MatchCallsByName);
   IRSI.findSimilarity(M);
   return IRSI;
 }
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index fa2dadbb2d0d5..9c79972443fe6 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -38,17 +38,12 @@ using namespace IRSimilarity;
 // matching and outlining.
 namespace llvm {
 extern cl::opt<bool> DisableBranches;
+} // namespace llvm
 
 // A command flag to be used for debugging to indirect calls from similarity
 // matching and outlining.
 extern cl::opt<bool> DisableIndirectCalls;
 
-// A command flag to be used for debugging to exclude intrinsics from similarity
-// matching and outlining.
-extern cl::opt<bool> DisableIntrinsics;
-
-} // namespace llvm
-
 // Set to true if the user wants the ir outliner to run on linkonceodr linkage
 // functions. This is false by default because the linker can dedupe linkonceodr
 // functions. Since the outliner is confined to a single module (modulo LTO),
@@ -2529,8 +2524,6 @@ unsigned IROutliner::doOutline(Module &M) {
   // Find the possible similarity sections.
   InstructionClassifier.EnableBranches = !DisableBranches;
   InstructionClassifier.EnableIndirectCalls = !DisableIndirectCalls;
-  InstructionClassifier.EnableIntrinsics = !DisableIntrinsics;
-
   IRSimilarityIdentifier &Identifier = getIRSI(M);
   SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
 
diff --git a/llvm/test/Transforms/IROutliner/different-intrinsics.ll b/llvm/test/Transforms/IROutliner/different-intrinsics.ll
deleted file mode 100644
index a0fa2d2d59487..0000000000000
--- a/llvm/test/Transforms/IROutliner/different-intrinsics.ll
+++ /dev/null
@@ -1,92 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
-
-; This test checks that we do not outline different intrinsics as the same
-; function or as a value like we would for non-intrinsic functions.
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
-declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
-
-define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) {
-entry:
-  %a = load i8, i8* %s
-  %b = load i8, i8* %d
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
-  %c = add i8 %a, %b
-  %ret = load i8, i8* %s
-  ret i8 %ret
-}
-
-define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) {
-entry:
-  %a = load i8, i8* %s
-  %b = load i8, i8* %d
-  call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
-  %c = add i8 %a, %b
-  %ret = load i8, i8* %s
-  ret i8 %ret
-}
-; CHECK-LABEL: @function1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[B_LOC:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    [[A_LOC:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    [[RET_LOC:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
-; CHECK-NEXT:    call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
-; CHECK-NEXT:    [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
-; CHECK-NEXT:    [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT:    call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
-; CHECK-NEXT:    [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT:    ret i8 [[RET_RELOAD]]
-;
-;
-; CHECK-LABEL: @function2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[B_LOC:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    [[A_LOC:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    [[RET_LOC:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
-; CHECK-NEXT:    call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
-; CHECK-NEXT:    [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
-; CHECK-NEXT:    [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
-; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT:    call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
-; CHECK-NEXT:    [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT:    ret i8 [[RET_RELOAD]]
-;
-;
-; CHECK-LABEL: define internal void @outlined_ir_func_0(
-; CHECK-NEXT:  newFuncRoot:
-; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
-; CHECK:       entry_to_outline:
-; CHECK-NEXT:    [[C:%.*]] = add i8 [[TMP0:%.*]], [[TMP1:%.*]]
-; CHECK-NEXT:    [[RET:%.*]] = load i8, i8* [[TMP2:%.*]], align 1
-; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
-; CHECK:       entry_after_outline.exitStub:
-; CHECK-NEXT:    store i8 [[RET]], i8* [[TMP3:%.*]], align 1
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK-LABEL: define internal void @outlined_ir_func_1(
-; CHECK-NEXT:  newFuncRoot:
-; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
-; CHECK:       entry_to_outline:
-; CHECK-NEXT:    [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1
-; CHECK-NEXT:    [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1
-; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
-; CHECK:       entry_after_outline.exitStub:
-; CHECK-NEXT:    store i8 [[A]], i8* [[TMP2:%.*]], align 1
-; CHECK-NEXT:    store i8 [[B]], i8* [[TMP3:%.*]], align 1
-; CHECK-NEXT:    ret void
-;
diff --git a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll
index 8bee43c77b84b..6c242d7b5e846 100644
--- a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
 
 ; This test checks that we do not outline memcpy intrinsics since it may require
 ; extra address space checks.
diff --git a/llvm/test/Transforms/IROutliner/illegal-memmove.ll b/llvm/test/Transforms/IROutliner/illegal-memmove.ll
index afc626f6abd2f..0a9216425455d 100644
--- a/llvm/test/Transforms/IROutliner/illegal-memmove.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-memmove.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
 
 ; This test checks that we do not outline memcpy intrinsics since it may require
 ; extra address space checks.
diff --git a/llvm/test/Transforms/IROutliner/illegal-memset.ll b/llvm/test/Transforms/IROutliner/illegal-memset.ll
index ed3eeb2d01b4b..4470d0c6d1287 100644
--- a/llvm/test/Transforms/IROutliner/illegal-memset.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-memset.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
 
 ; This test checks that we do not outline memset intrinsics since it requires
 ; extra address space checks.
diff --git a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
index eaffefe3d9d5f..28a1e5994e703 100644
--- a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost -no-ir-sim-intrinsics < %s | FileCheck %s
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
 
 ; This test ensures that we do not outline vararg instructions or intrinsics, as
 ; they may cause inconsistencies when outlining.
diff --git a/llvm/test/Transforms/IROutliner/outline-memcpy.ll b/llvm/test/Transforms/IROutliner/outline-memcpy.ll
deleted file mode 100644
index d5d4859c318e4..0000000000000
--- a/llvm/test/Transforms/IROutliner/outline-memcpy.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
-
-; This test checks that we successfully outline identical memcpy instructions.
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
-
-define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) {
-entry:
-  %a = load i8, i8* %s
-  %b = load i8, i8* %d
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
-  %c = add i8 %a, %b
-  %ret = load i8, i8* %s
-  ret i8 %ret
-}
-
-define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) {
-entry:
-  %a = load i8, i8* %s
-  %b = load i8, i8* %d
-  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
-  %c = add i8 %a, %b
-  %ret = load i8, i8* %s
-  ret i8 %ret
-}
-; CHECK-LABEL: @function1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RET_LOC:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT:    call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]])
-; CHECK-NEXT:    [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT:    ret i8 [[RET_RELOAD]]
-;
-;
-; CHECK-LABEL: @function2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RET_LOC:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT:    call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]])
-; CHECK-NEXT:    [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT:    ret i8 [[RET_RELOAD]]
-;
-;
-; CHECK: define internal void @outlined_ir_func_0(
-; CHECK-NEXT:  newFuncRoot:
-; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
-; CHECK:       entry_to_outline:
-; CHECK-NEXT:    [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1
-; CHECK-NEXT:    [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false)
-; CHECK-NEXT:    [[C:%.*]] = add i8 [[A]], [[B]]
-; CHECK-NEXT:    [[RET:%.*]] = load i8, i8* [[TMP0]], align 1
-; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
-; CHECK:       entry_after_outline.exitStub:
-; CHECK-NEXT:    store i8 [[RET]], i8* [[TMP3:%.*]], align 1
-; CHECK-NEXT:    ret void
-;
diff --git a/llvm/test/Transforms/IROutliner/outline-memmove.ll b/llvm/test/Transforms/IROutliner/outline-memmove.ll
deleted file mode 100644
index 45e573c52653b..0000000000000
--- a/llvm/test/Transforms/IROutliner/outline-memmove.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
-
-; This test checks that we sucecssfully outline identical memmove instructions.
-
-declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
-
-define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) {
-entry:
-  %a = load i8, i8* %s
-  %b = load i8, i8* %d
-  call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
-  %c = add i8 %a, %b
-  %ret = load i8, i8* %s
-  ret i8 %ret
-}
-
-define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) {
-entry:
-  %a = load i8, i8* %s
-  %b = load i8, i8* %d
-  call void @llvm.memmove.p0i8.p0i8.i64(i8* %d, i8* %s, i64 %len, i1 false)
-  %c = add i8 %a, %b
-  %ret = load i8, i8* %s
-  ret i8 %ret
-}
-; CHECK-LABEL: @function1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RET_LOC:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT:    call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]])
-; CHECK-NEXT:    [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT:    ret i8 [[RET_RELOAD]]
-;
-;
-; CHECK-LABEL: @function2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RET_LOC:%.*]] = alloca i8, align 1
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT:    call void @outlined_ir_func_0(i8* [[S:%.*]], i8* [[D:%.*]], i64 [[LEN:%.*]], i8* [[RET_LOC]])
-; CHECK-NEXT:    [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT:    ret i8 [[RET_RELOAD]]
-;
-;
-; CHECK: define internal void @outlined_ir_func_0(
-; CHECK-NEXT:  newFuncRoot:
-; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
-; CHECK:       entry_to_outline:
-; CHECK-NEXT:    [[A:%.*]] = load i8, i8* [[TMP0:%.*]], align 1
-; CHECK-NEXT:    [[B:%.*]] = load i8, i8* [[TMP1:%.*]], align 1
-; CHECK-NEXT:    call void @llvm.memmove.p0i8.p0i8.i64(i8* [[TMP1]], i8* [[TMP0]], i64 [[TMP2:%.*]], i1 false)
-; CHECK-NEXT:    [[C:%.*]] = add i8 [[A]], [[B]]
-; CHECK-NEXT:    [[RET:%.*]] = load i8, i8* [[TMP0]], align 1
-; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
-; CHECK:       entry_after_outline.exitStub:
-; CHECK-NEXT:    store i8 [[RET]], i8* [[TMP3:%.*]], align 1
-; CHECK-NEXT:    ret void
-;
diff --git a/llvm/test/Transforms/IROutliner/outline-memset.ll b/llvm/test/Transforms/IROutliner/outline-memset.ll
deleted file mode 100644
index 65dd04978d0a6..0000000000000
--- a/llvm/test/Transforms/IROutliner/outline-memset.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
-
-; This test checks that we successfully outline identical memset instructions.
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
-
-define i64 @function1(i64 %x, i64 %z, i64 %n) {
-entry:
-  %pool = alloca [59 x i64], align 4
-  %tmp = bitcast [59 x i64]* %pool to i8*
-  call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
-  %cmp3 = icmp eq i64 %n, 0
-  %a = add i64 %x, %z
-  %c = add i64 %x, %z
-  ret i64 0
-}
-
-define i64 @function2(i64 %x, i64 %z, i64 %n) {
-entry:
-  %pool = alloca [59 x i64], align 4
-  %tmp = bitcast [59 x i64]* %pool to i8*
-  call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
-  %cmp3 = icmp eq i64 %n, 0
-  %a = add i64 %x, %z
-  %c = add i64 %x, %z
-  ret i64 0
-}
-; CHECK-LABEL: @function1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[POOL:%.*]] = alloca [59 x i64], align 4
-; CHECK-NEXT:    call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]])
-; CHECK-NEXT:    ret i64 0
-;
-;
-; CHECK-LABEL: @function2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[POOL:%.*]] = alloca [59 x i64], align 4
-; CHECK-NEXT:    call void @outlined_ir_func_0([59 x i64]* [[POOL]], i64 [[N:%.*]], i64 [[X:%.*]], i64 [[Z:%.*]])
-; CHECK-NEXT:    ret i64 0
-;
-;
-; CHECK: define internal void @outlined_ir_func_0(
-; CHECK-NEXT:  newFuncRoot:
-; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
-; CHECK:       entry_to_outline:
-; CHECK-NEXT:    [[TMP:%.*]] = bitcast [59 x i64]* [[TMP0:%.*]] to i8*
-; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 4 [[TMP]], i8 0, i64 236, i1 false)
-; CHECK-NEXT:    [[CMP3:%.*]] = icmp eq i64 [[TMP1:%.*]], 0
-; CHECK-NEXT:    [[A:%.*]] = add i64 [[TMP2:%.*]], [[TMP3:%.*]]
-; CHECK-NEXT:    [[C:%.*]] = add i64 [[TMP2]], [[TMP3]]
-; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
-; CHECK:       entry_after_outline.exitStub:
-; CHECK-NEXT:    ret void
-;
diff --git a/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll b/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll
deleted file mode 100644
index 8e36335b3120e..0000000000000
--- a/llvm/test/Transforms/IROutliner/outline-vaarg-intrinsic.ll
+++ /dev/null
@@ -1,90 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
-
-; This test checks that we sucessfully outline identical memcpy var arg
-; intrinsics, but not the var arg instruction itself.
-
-declare void @llvm.va_start(i8*)
-declare void @llvm.va_copy(i8*, i8*)
-declare void @llvm.va_end(i8*)
-
-define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind {
-entry:
-  %a.addr = alloca i32, align 4
-  %b.addr = alloca double, align 8
-  %ap = alloca i8*, align 4
-  %c = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store double %b, double* %b.addr, align 8
-  %ap1 = bitcast i8** %ap to i8*
-  call void @llvm.va_start(i8* %ap1)
-  %0 = va_arg i8** %ap, i32
-  call void @llvm.va_copy(i8* %v, i8* %ap1)
-  call void @llvm.va_end(i8* %ap1)
-  store i32 %0, i32* %c, align 4
-  %tmp = load i32, i32* %c, align 4
-  ret i32 %tmp
-}
-
-define i32 @func2(i32 %a, double %b, i8* %v, ...) nounwind {
-entry:
-  %a.addr = alloca i32, align 4
-  %b.addr = alloca double, align 8
-  %ap = alloca i8*, align 4
-  %c = alloca i32, align 4
-  store i32 %a, i32* %a.addr, align 4
-  store double %b, double* %b.addr, align 8
-  %ap1 = bitcast i8** %ap to i8*
-  call void @llvm.va_start(i8* %ap1)
-  %0 = va_arg i8** %ap, i32
-  call void @llvm.va_copy(i8* %v, i8* %ap1)
-  call void @llvm.va_end(i8* %ap1)
-  store i32 %0, i32* %c, align 4
-  %ap2 = bitcast i8** %ap to i8*
-  %tmp = load i32, i32* %c, align 4
-  ret i32 %tmp
-}
-; CHECK-LABEL: @func1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
-; CHECK-NEXT:    [[AP:%.*]] = alloca i8*, align 4
-; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
-; CHECK-NEXT:    [[AP1:%.*]] = bitcast i8** [[AP]] to i8*
-; CHECK-NEXT:    call void @llvm.va_start(i8* [[AP1]])
-; CHECK-NEXT:    [[TMP0:%.*]] = va_arg i8** [[AP]], i32
-; CHECK-NEXT:    call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]])
-; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[C]], align 4
-; CHECK-NEXT:    ret i32 [[TMP]]
-;
-;
-; CHECK-LABEL: @func2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
-; CHECK-NEXT:    [[AP:%.*]] = alloca i8*, align 4
-; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
-; CHECK-NEXT:    [[AP1:%.*]] = bitcast i8** [[AP]] to i8*
-; CHECK-NEXT:    call void @llvm.va_start(i8* [[AP1]])
-; CHECK-NEXT:    [[TMP0:%.*]] = va_arg i8** [[AP]], i32
-; CHECK-NEXT:    call void @outlined_ir_func_0(i8* [[V:%.*]], i8* [[AP1]], i32 [[TMP0]], i32* [[C]])
-; CHECK-NEXT:    [[AP2:%.*]] = bitcast i8** [[AP]] to i8*
-; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[C]], align 4
-; CHECK-NEXT:    ret i32 [[TMP]]
-;
-;
-; CHECK: define internal void @outlined_ir_func_0(
-; CHECK-NEXT:  newFuncRoot:
-; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
-; CHECK:       entry_to_outline:
-; CHECK-NEXT:    call void @llvm.va_copy(i8* [[TMP0:%.*]], i8* [[TMP1:%.*]])
-; CHECK-NEXT:    call void @llvm.va_end(i8* [[TMP1]])
-; CHECK-NEXT:    store i32 [[TMP2:%.*]], i32* [[TMP3:%.*]], align 4
-; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
-; CHECK:       entry_after_outline.exitStub:
-; CHECK-NEXT:    ret void
-;
diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
index fb2cc07921621..a8781e8f78af7 100644
--- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
+++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
@@ -1479,8 +1479,7 @@ TEST(IRInstructionMapper, CleanuppadIllegal) {
 // are considered illegal since is extra checking needed to handle the address
 // space checking.
 
-// Checks that a memset instruction is mapped to an illegal value when
-// specified.
+// Checks that a memset instruction is mapped to an illegal value.
 TEST(IRInstructionMapper, MemSetIllegal) {
   StringRef ModuleString = R"(
   declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
@@ -1504,7 +1503,6 @@ TEST(IRInstructionMapper, MemSetIllegal) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
-  Mapper.InstClassifier.EnableIntrinsics = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
@@ -1512,8 +1510,7 @@ TEST(IRInstructionMapper, MemSetIllegal) {
   ASSERT_TRUE(UnsignedVec[2] < UnsignedVec[0]);
 }
 
-// Checks that a memcpy instruction is mapped to an illegal value  when
-// specified.
+// Checks that a memcpy instruction is mapped to an illegal value.
 TEST(IRInstructionMapper, MemCpyIllegal) {
   StringRef ModuleString = R"(
   declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
@@ -1537,7 +1534,6 @@ TEST(IRInstructionMapper, MemCpyIllegal) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
-  Mapper.InstClassifier.EnableIntrinsics = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
@@ -1546,8 +1542,7 @@ TEST(IRInstructionMapper, MemCpyIllegal) {
   ASSERT_LT(UnsignedVec[2], UnsignedVec[0]);
 }
 
-// Checks that a memmove instruction is mapped to an illegal value  when
-// specified.
+// Checks that a memmove instruction is mapped to an illegal value.
 TEST(IRInstructionMapper, MemMoveIllegal) {
   StringRef ModuleString = R"(
   declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
@@ -1571,7 +1566,6 @@ TEST(IRInstructionMapper, MemMoveIllegal) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
-  Mapper.InstClassifier.EnableIntrinsics = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
@@ -1579,45 +1573,6 @@ TEST(IRInstructionMapper, MemMoveIllegal) {
   ASSERT_LT(UnsignedVec[2], UnsignedVec[0]);
 }
 
-// Checks that mem* instructions are mapped to an legal value when not
-// specified, and that all the intrinsics are marked differently.
-TEST(IRInstructionMapper, MemOpsLegal) {
-  StringRef ModuleString = R"(
-  declare void @llvm.memmove.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
-  declare void @llvm.memcpy.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
-  declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1)
-
-  define i64 @function(i64 %x, i64 %z, i64 %n) {
-  entry:
-    %pool = alloca [59 x i64], align 4
-    %tmp = bitcast [59 x i64]* %pool to i8*
-    call void @llvm.memmove.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
-    call void @llvm.memcpy.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
-    call void @llvm.memset.p0i8.i64(i8* nonnull %tmp, i8 0, i64 236, i32 4, i1 false)
-    %cmp3 = icmp eq i64 %n, 0
-    %a = add i64 %x, %z
-    %c = add i64 %x, %z
-    ret i64 0
-  })";
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
-
-  std::vector<IRInstructionData *> InstrList;
-  std::vector<unsigned> UnsignedVec;
-
-  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
-  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
-  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
-  Mapper.InstClassifier.EnableIntrinsics = true;
-  getVectors(*M, Mapper, InstrList, UnsignedVec);
-
-  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(9));
-  ASSERT_LT(UnsignedVec[2], UnsignedVec[3]);
-  ASSERT_LT(UnsignedVec[3], UnsignedVec[4]);
-  ASSERT_LT(UnsignedVec[4], UnsignedVec[5]);
-}
-
 // Checks that a variable argument instructions are mapped to an illegal value.
 // We exclude variable argument instructions since variable arguments
 // requires extra checking of the argument list.
@@ -1659,7 +1614,6 @@ TEST(IRInstructionMapper, VarArgsIllegal) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
-  Mapper.InstClassifier.EnableIntrinsics = false;
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());

From 515eec3553b02533e9a88ee84bc245d5415163da Mon Sep 17 00:00:00 2001
From: Andrew Litteken <andrew.litteken@gmail.com>
Date: Sun, 2 Jan 2022 16:25:43 -0600
Subject: [PATCH 630/946] [IRSim][IROutliner] Add support for outlining
 PHINodes with the rest of the region.

---
 .../llvm/Analysis/IRSimilarityIdentifier.h    |  17 +-
 llvm/include/llvm/Transforms/IPO/IROutliner.h |   3 +-
 llvm/lib/Analysis/IRSimilarityIdentifier.cpp  |  37 +++++
 llvm/lib/Transforms/IPO/IROutliner.cpp        | 110 ++++++++++--
 .../IROutliner/included-phi-nodes-begin.ll    |  93 +++++++++++
 .../IROutliner/included-phi-nodes-end.ll      |  94 +++++++++++
 .../must-capture-all-phi-nodes-begin.ll       | 108 ++++++++++++
 .../must-capture-all-phi-nodes-end.ll         |  88 ++++++++++
 .../outlining-branches-phi-nodes.ll           |   8 +
 .../IROutliner/outlining-exits-to-phi-node.ll |   8 +
 .../IROutliner/phi-nodes-non-constant.ll      |  74 +++++++++
 .../Transforms/IROutliner/phi-nodes-simple.ll |  58 +++++++
 .../IROutliner/region-inputs-in-phi-nodes.ll  |   8 +
 .../Analysis/IRSimilarityIdentifierTest.cpp   | 156 ++++++++++++++++--
 14 files changed, 833 insertions(+), 29 deletions(-)
 create mode 100644 llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
 create mode 100644 llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
 create mode 100644 llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
 create mode 100644 llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
 create mode 100644 llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
 create mode 100644 llvm/test/Transforms/IROutliner/phi-nodes-simple.ll

diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 966bf02d128ec..c7759e0bbf0f8 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -214,6 +214,16 @@ struct IRInstructionData
   /// function name as a differentiating parameter.
   void setCalleeName(bool MatchByName = true);
 
+  /// For an IRInstructionData containing a PHINode, finds the
+  /// relative distances from the incoming basic block to the current block by
+  /// taking the difference of the number assigned to the current basic block
+  /// and the incoming basic block of the branch.
+  ///
+  /// \param BasicBlockToInteger - The mapping of basic blocks to their location
+  /// in the module.
+  void
+  setPHIPredecessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
+
   /// Hashes \p Value based on its opcode, types, and operand types.
   /// Two IRInstructionData instances produce the same hash when they perform
   /// the same operation.
@@ -497,8 +507,11 @@ struct IRInstructionMapper {
         return Legal;
       return Illegal;
     }
-    // TODO: Determine a scheme to resolve when the labels are similar enough.
-    InstrType visitPHINode(PHINode &PN) { return Illegal; }
+    InstrType visitPHINode(PHINode &PN) { 
+      if (EnableBranches)
+        return Legal;
+      return Illegal;
+    }
     // TODO: Handle allocas.
     InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; }
     // We exclude variable argument instructions since variable arguments
diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index 9799737a529e3..ed74c8ed0e96c 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -342,8 +342,7 @@ class IROutliner {
     bool visitBranchInst(BranchInst &BI) { 
       return EnableBranches;
     }
-    // TODO: Determine a scheme to resolve when the labels are similar enough.
-    bool visitPHINode(PHINode &PN) { return false; }
+    bool visitPHINode(PHINode &PN) { return EnableBranches; }
     // TODO: Handle allocas.
     bool visitAllocaInst(AllocaInst &AI) { return false; }
     // VAArg instructions are not allowed since this could cause difficulty when
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index 8c3c0400d6015..0ce3d218eb33a 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -70,6 +70,12 @@ void IRInstructionData::initializeInstruction() {
 
     OperVals.push_back(OI.get());
   }
+
+  // We capture the incoming BasicBlocks as values as well as the incoming
+  // Values in order to check for structural similarity.
+  if (PHINode *PN = dyn_cast<PHINode>(Inst))
+    for (BasicBlock *BB : PN->blocks())
+      OperVals.push_back(BB);
 }
 
 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
@@ -108,6 +114,34 @@ void IRInstructionData::setCalleeName(bool MatchByName) {
     CalleeName = CI->getCalledFunction()->getName().str();
 }
 
+void IRInstructionData::setPHIPredecessors(
+    DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
+  assert(isa<PHINode>(Inst) && "Instruction must be phi node");
+
+  PHINode *PN = cast<PHINode>(Inst);
+  DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
+
+  BBNumIt = BasicBlockToInteger.find(PN->getParent());
+  assert(BBNumIt != BasicBlockToInteger.end() &&
+         "Could not find location for BasicBlock!");
+
+  int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
+
+  // Convert the incoming blocks of the PHINode to an integer value, based on
+  // the relative distances between the current block and the incoming block.
+  for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
+    BasicBlock *Incoming = PN->getIncomingBlock(Idx);
+    BBNumIt = BasicBlockToInteger.find(Incoming);
+    assert(BBNumIt != BasicBlockToInteger.end() &&
+           "Could not find number for BasicBlock!");
+    int OtherBlockNumber = static_cast<int>(BBNumIt->second);
+
+    int Relative = OtherBlockNumber - CurrentBlockNumber;
+    RelativeBlockLocations.push_back(Relative);
+    RelativeBlockLocations.push_back(Relative);
+  }
+}
+
 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
   switch (CI->getPredicate()) {
   case CmpInst::FCMP_OGT:
@@ -270,6 +304,9 @@ unsigned IRInstructionMapper::mapToLegalUnsigned(
   if (isa<CallInst>(*It))
     ID->setCalleeName(EnableMatchCallsByName);
 
+  if (isa<PHINode>(*It))
+    ID->setPHIPredecessors(BasicBlockToInteger);
+
   // Add to the instruction list
   bool WasInserted;
   DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index 9c79972443fe6..fd19b669c3c04 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -185,6 +185,44 @@ Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
   return FoundValueOpt.getValueOr(nullptr);
 }
 
+/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
+/// in \p Included to branch to BasicBlock \p Replace if they currently branch
+/// to the BasicBlock \p Find.  This is used to fix up the incoming basic blocks
+/// when PHINodes are included in outlined regions.
+///
+/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
+/// checked.
+/// \param Find - The successor block to be replaced.
+/// \param Replace - The new succesor block to branch to.
+/// \param Included - The set of blocks about to be outlined.
+static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find,
+                                      BasicBlock *Replace,
+                                      DenseSet<BasicBlock *> &Included) {
+  for (PHINode &PN : PHIBlock->phis()) {
+    for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
+         ++Idx) {
+      // Check if the incoming block is included in the set of blocks being
+      // outlined.
+      BasicBlock *Incoming = PN.getIncomingBlock(Idx);
+      if (!Included.contains(Incoming))
+        continue;
+
+      BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator());
+      assert(BI && "Not a branch instruction?");
+      // Look over the branching instructions into this block to see if we
+      // used to branch to Find in this outlined block.
+      for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
+           Succ++) {
+        // If we have found the block to replace, we do so here.
+        if (BI->getSuccessor(Succ) != Find)
+          continue;
+        BI->setSuccessor(Succ, Replace);
+      }
+    }
+  }
+}
+
+
 void OutlinableRegion::splitCandidate() {
   assert(!CandidateSplit && "Candidate already split!");
 
@@ -215,6 +253,39 @@ void OutlinableRegion::splitCandidate() {
   StartBB = StartInst->getParent();
   PrevBB = StartBB;
 
+  DenseSet<BasicBlock *> BBSet;
+  Candidate->getBasicBlocks(BBSet);
+
+  // We iterate over the instructions in the region, if we find a PHINode, we
+  // check if there are predecessors outside of the region, if there are,
+  // we ignore this region since we are unable to handle the severing of the
+  // phi node right now. 
+  BasicBlock::iterator It = StartInst->getIterator();
+  while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
+    unsigned NumPredsOutsideRegion = 0;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!BBSet.contains(PN->getIncomingBlock(i)))
+        ++NumPredsOutsideRegion;
+
+    if (NumPredsOutsideRegion > 1)
+      return;
+    
+    It++;
+  }
+
+  // If the region starts with a PHINode, but is not the initial instruction of
+  // the BasicBlock, we ignore this region for now.
+  if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
+    return;
+  
+  // If the region ends with a PHINode, but does not contain all of the phi node
+  // instructions of the region, we ignore it for now.
+  if (isa<PHINode>(BackInst)) {
+    EndBB = BackInst->getParent();
+    if (BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
+      return;
+  }
+
   // The basic block gets split like so:
   // block:                 block:
   //   inst1                  inst1
@@ -241,12 +312,20 @@ void OutlinableRegion::splitCandidate() {
     FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
     EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
     FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
-    return;
+  } else {
+    EndBB = BackInst->getParent();
+    EndsInBranch = true;
+    FollowBB = nullptr;
   }
 
-  EndBB = BackInst->getParent();
-  EndsInBranch = true;
-  FollowBB = nullptr;
+  // Refind the basic block set.
+  BBSet.clear();
+  Candidate->getBasicBlocks(BBSet);
+  // For the phi nodes in the new starting basic block of the region, we
+  // reassign the targets of the basic blocks branching instructions.
+  replaceTargetsFromPHINode(StartBB, PrevBB, StartBB, BBSet);
+  if (FollowBB)
+    replaceTargetsFromPHINode(FollowBB, EndBB, FollowBB, BBSet);
 }
 
 void OutlinableRegion::reattachCandidate() {
@@ -268,15 +347,21 @@ void OutlinableRegion::reattachCandidate() {
   //   inst4
   assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
 
-  // StartBB should only have one predecessor since we put an unconditional
-  // branch at the end of PrevBB when we split the BasicBlock.
-  PrevBB = StartBB->getSinglePredecessor();
-  assert(PrevBB != nullptr &&
-         "No Predecessor for the region start basic block!");
-
   assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
   PrevBB->getTerminator()->eraseFromParent();
 
+  // If we reattaching after outlining, we iterate over the phi nodes to
+  // the initial block, and reassign the branch instructions of the incoming
+  // blocks to the block we are remerging into.
+  if (!ExtractedFunction) {
+    DenseSet<BasicBlock *> BBSet;
+    Candidate->getBasicBlocks(BBSet);
+
+    replaceTargetsFromPHINode(StartBB, StartBB, PrevBB, BBSet);
+    if (!EndsInBranch)
+      replaceTargetsFromPHINode(FollowBB, FollowBB, EndBB, BBSet);
+  }
+
   moveBBContents(*StartBB, *PrevBB);
 
   BasicBlock *PlacementBB = PrevBB;
@@ -1622,7 +1707,8 @@ replaceArgumentUses(OutlinableRegion &Region,
 
       // If this is storing a PHINode, we must make sure it is included in the
       // overall function.
-      if (!isa<PHINode>(ValueOperand)) {
+      if (!isa<PHINode>(ValueOperand) ||
+          Region.Candidate->getGVN(ValueOperand).hasValue()) {
         if (FirstFunction)
           continue;
         Value *CorrVal =
@@ -2161,7 +2247,7 @@ void IROutliner::pruneIncompatibleRegions(
   if (FirstCandidate.getLength() == 2) {
     if (isa<CallInst>(FirstCandidate.front()->Inst) &&
         isa<BranchInst>(FirstCandidate.back()->Inst))
-        return;
+      return;
   }
 
   unsigned CurrentEndIdx = 0;
diff --git a/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll b/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
new file mode 100644
index 0000000000000..e519fb318ba87
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to outline when all of the phi nodes in the starting
+; block are included in the region and there is no more than one predecessor
+; into those phi nodes from outside of the region.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = add i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %e, %test1 ], [ %y, %entry  ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = mul i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32 [[Y]], i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32 [[Y]], i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[TEST1_TO_OUTLINE:%.*]]
+; CHECK:       test1_to_outline:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[E:%.*]], [[TEST1_TO_OUTLINE]] ], [ [[TMP0:%.*]], [[NEWFUNCROOT:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[E]], [[TEST1_TO_OUTLINE]] ], [ [[TMP0]], [[NEWFUNCROOT]] ]
+; CHECK-NEXT:    [[E]] = load i32, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP2:%.*]], [[TMP2]]
+; CHECK-NEXT:    br i1 true, label [[TEST:%.*]], label [[TEST1_TO_OUTLINE]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll b/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
new file mode 100644
index 0000000000000..0f9750576139a
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to propogate inputs to the region into the split PHINode
+; outside of the region if necessary.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = add i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = mul i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[NEXT:%.*]], label [[ENTRY_AFTER_OUTLINE:%.*]]
+; CHECK:       entry_after_outline:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[NEXT:%.*]], label [[ENTRY_AFTER_OUTLINE:%.*]]
+; CHECK:       entry_after_outline:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]]
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT_EXITSTUB:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[TMP1]], [[ENTRY_TO_OUTLINE]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[TMP1]], [[ENTRY_TO_OUTLINE]] ]
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       next.exitStub:
+; CHECK-NEXT:    ret i1 true
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    ret i1 false
+;
diff --git a/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
new file mode 100644
index 0000000000000..debaef12687b7
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do not outline when all of the phi nodes in the beginning
+; block are included not in the region.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = add i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %e, %test1 ], [ %y, %entry  ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = mul i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %y, %entry ], [ %e, %test1 ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[E_RELOAD:%.*]], [[TEST1]] ], [ [[Y]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[E_RELOAD]], [[TEST1]] ], [ [[Y]], [[ENTRY]] ]
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[E_LOC]])
+; CHECK-NEXT:    [[E_RELOAD]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[TEST1]], label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[E_RELOAD:%.*]], [[TEST1]] ], [ [[Y]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[Y]], [[ENTRY]] ], [ [[E_RELOAD]], [[TEST1]] ]
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[E_LOC]])
+; CHECK-NEXT:    [[E_RELOAD]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[TEST1]], label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[TEST1_TO_OUTLINE:%.*]]
+; CHECK:       test1_to_outline:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]]
+; CHECK-NEXT:    br i1 true, label [[TEST:%.*]], label [[TEST1_EXITSTUB:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       test1.exitStub:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    ret i1 true
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    ret i1 false
+;
diff --git a/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
new file mode 100644
index 0000000000000..a5d0314f21359
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do not  outline when all of the phi nodes in the end
+; block are not included in the region.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = add i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = mul i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %c, %entry ], [ %e, %test1 ]
+  ret void
+next:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[C]], [[ENTRY]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
index 4e777e862543e..59b08422cec3a 100644
--- a/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
@@ -38,6 +38,8 @@ block_5:
   store i32 %add2, i32* %output, align 4
   store i32 %mul2, i32* %result, align 4
   br label %block_6
+dummy:
+  ret void
 block_6:
   %diff = phi i32 [%aval, %block_4], [%a2val, %block_5]
   ret void
@@ -76,6 +78,8 @@ block_5:
   store i32 %add2, i32* %output, align 4
   store i32 %mul2, i32* %result, align 4
   br label %block_6
+dummy:
+  ret void
 block_6:
   %diff = phi i32 [%aval, %block_4], [%a2val, %block_5]
   ret void
@@ -102,6 +106,8 @@ block_6:
 ; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       block_6:
 ; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
 ; CHECK-NEXT:    ret void
@@ -128,6 +134,8 @@ block_6:
 ; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       block_6:
 ; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
index 77cb4a5bc476f..397121f000cd9 100644
--- a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
@@ -15,6 +15,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br label %first
+dummy:
+  ret void
 first:
   %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
   ret void
@@ -31,6 +33,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br label %first
+dummy:
+  ret void
 first:
   %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
   ret void
@@ -45,6 +49,8 @@ first:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
@@ -60,6 +66,8 @@ first:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll b/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
new file mode 100644
index 0000000000000..d56ffa5730552
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do extract phi nodes from the regions.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    store i32 2, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll b/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll
new file mode 100644
index 0000000000000..e5afb89a26138
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to outline the simple phi node case of constants when
+; the corresponding labels match.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  br label %test
+test:
+  br label %first
+first:
+  %0 = phi i32 [ 0, %test ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  br label %test
+test:
+  br label %first
+first:
+  %0 = phi i32 [ 0, %test ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[TEST_TO_OUTLINE:%.*]]
+; CHECK:       test_to_outline:
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ 0, [[TEST_TO_OUTLINE]] ]
+; CHECK-NEXT:    store i32 2, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       test_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
index 258bbfe131e3e..62d936d68ff2f 100644
--- a/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
+++ b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
@@ -17,6 +17,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br i1 true, label %first, label %next
+dummy:
+  ret void
 first:
   %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
   ret void
@@ -37,6 +39,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br i1 true, label %first, label %next
+dummy:
+  ret void
 first:
   %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
   ret void
@@ -55,6 +59,8 @@ next:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
@@ -74,6 +80,8 @@ next:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
index a8781e8f78af7..97d9be3d8985a 100644
--- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
+++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
@@ -757,26 +757,41 @@ TEST(IRInstructionMapper, BranchLegal) {
   ASSERT_TRUE(UnsignedVec[1] < UnsignedVec[2]);
 }
 
-// In most cases, the illegal instructions we are collecting don't require any
-// sort of setup.  In these cases, we can just only have illegal instructions,
-// and the mapper will create 0 length vectors, and we can check that.
+// Checks that a PHINode is mapped to be legal.
+TEST(IRInstructionMapper, PhiLegal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ]
+                             %1 = add i32 %a, %b
+                             ret i32 0
+                          bb1:
+                             ret i32 1
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
 
-// In cases where we have legal instructions needed to set up the illegal
-// instruction, to check illegal instructions are assigned unsigned integers
-// from the maximum value decreasing to 0, it will be greater than a legal
-// instruction that comes after.  So to check that we have an illegal
-// instruction, we place a legal instruction after an illegal instruction, and
-// check that the illegal unsigned integer is greater than the unsigned integer
-// of the legal instruction.
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(3));
+}
 
-// Checks that a PHINode is mapped to be illegal since there is extra checking
-// needed to ensure that a branch in one region is bin an isomorphic
-// location in a different region.
+// Checks that a PHINode is mapped to be legal.
 TEST(IRInstructionMapper, PhiIllegal) {
   StringRef ModuleString = R"(
                           define i32 @f(i32 %a, i32 %b) {
                           bb0:
                              %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ]
+                             %1 = add i32 %a, %b
                              ret i32 0
                           bb1:
                              ret i32 1
@@ -790,12 +805,25 @@ TEST(IRInstructionMapper, PhiIllegal) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.initializeForBBs(*M);
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
   ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
 }
 
+// In most cases, the illegal instructions we are collecting don't require any
+// sort of setup.  In these cases, we can just only have illegal instructions,
+// and the mapper will create 0 length vectors, and we can check that.
+
+// In cases where we have legal instructions needed to set up the illegal
+// instruction, to check illegal instructions are assigned unsigned integers
+// from the maximum value decreasing to 0, it will be greater than a legal
+// instruction that comes after.  So to check that we have an illegal
+// instruction, we place a legal instruction after an illegal instruction, and
+// check that the illegal unsigned integer is greater than the unsigned integer
+// of the legal instruction.
+
 // Checks that an alloca instruction is mapped to be illegal.
 TEST(IRInstructionMapper, AllocaIllegal) {
   StringRef ModuleString = R"(
@@ -2346,6 +2374,108 @@ TEST(IRSimilarityCandidate, DifferentBranchStructureOutside) {
   ASSERT_TRUE(longSimCandCompare(InstrList, true, 3, 0, 6));
 }
 
+// Checks that the same structure is recognized between two candidates,
+// when the phi predecessor are other blocks inside the same region,
+// the relative distance between the blocks must be the same.
+TEST(IRSimilarityCandidate, SamePHIStructureInternal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ]
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(11));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_TRUE(longSimCandCompare(InstrList, true, 4, 0, 6));
+}
+
+// Checks that the different structure is recognized between two candidates,
+// when the phi predecessor are other blocks inside the same region,
+// the relative distance between the blocks must be the same.
+TEST(IRSimilarityCandidate, DifferentPHIStructureInternal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb3:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb3:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb3 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(13));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_FALSE(longSimCandCompare(InstrList, true, 5, 0, 7));
+}
+
 // Checks that two sets of identical instructions are found to be the same.
 // Both sequences of adds have the same operand ordering, and the same
 // instructions, making them strcturally equivalent.

From e50b217b4e6fb0be9b93d760e01462c117fe9318 Mon Sep 17 00:00:00 2001
From: Andrew Litteken <andrew.litteken@gmail.com>
Date: Tue, 25 Jan 2022 18:23:38 -0600
Subject: [PATCH 631/946] Revert "[IRSim][IROutliner] Add support for outlining
 PHINodes with the rest of the region."

This reverts commit 515eec3553b02533e9a88ee84bc245d5415163da.

By mistake, commit message was not complete.
---
 .../llvm/Analysis/IRSimilarityIdentifier.h    |  17 +-
 llvm/include/llvm/Transforms/IPO/IROutliner.h |   3 +-
 llvm/lib/Analysis/IRSimilarityIdentifier.cpp  |  37 -----
 llvm/lib/Transforms/IPO/IROutliner.cpp        | 110 ++----------
 .../IROutliner/included-phi-nodes-begin.ll    |  93 -----------
 .../IROutliner/included-phi-nodes-end.ll      |  94 -----------
 .../must-capture-all-phi-nodes-begin.ll       | 108 ------------
 .../must-capture-all-phi-nodes-end.ll         |  88 ----------
 .../outlining-branches-phi-nodes.ll           |   8 -
 .../IROutliner/outlining-exits-to-phi-node.ll |   8 -
 .../IROutliner/phi-nodes-non-constant.ll      |  74 ---------
 .../Transforms/IROutliner/phi-nodes-simple.ll |  58 -------
 .../IROutliner/region-inputs-in-phi-nodes.ll  |   8 -
 .../Analysis/IRSimilarityIdentifierTest.cpp   | 156 ++----------------
 14 files changed, 29 insertions(+), 833 deletions(-)
 delete mode 100644 llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
 delete mode 100644 llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
 delete mode 100644 llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
 delete mode 100644 llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
 delete mode 100644 llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
 delete mode 100644 llvm/test/Transforms/IROutliner/phi-nodes-simple.ll

diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index c7759e0bbf0f8..966bf02d128ec 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -214,16 +214,6 @@ struct IRInstructionData
   /// function name as a differentiating parameter.
   void setCalleeName(bool MatchByName = true);
 
-  /// For an IRInstructionData containing a PHINode, finds the
-  /// relative distances from the incoming basic block to the current block by
-  /// taking the difference of the number assigned to the current basic block
-  /// and the incoming basic block of the branch.
-  ///
-  /// \param BasicBlockToInteger - The mapping of basic blocks to their location
-  /// in the module.
-  void
-  setPHIPredecessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
-
   /// Hashes \p Value based on its opcode, types, and operand types.
   /// Two IRInstructionData instances produce the same hash when they perform
   /// the same operation.
@@ -507,11 +497,8 @@ struct IRInstructionMapper {
         return Legal;
       return Illegal;
     }
-    InstrType visitPHINode(PHINode &PN) { 
-      if (EnableBranches)
-        return Legal;
-      return Illegal;
-    }
+    // TODO: Determine a scheme to resolve when the labels are similar enough.
+    InstrType visitPHINode(PHINode &PN) { return Illegal; }
     // TODO: Handle allocas.
     InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; }
     // We exclude variable argument instructions since variable arguments
diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index ed74c8ed0e96c..9799737a529e3 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -342,7 +342,8 @@ class IROutliner {
     bool visitBranchInst(BranchInst &BI) { 
       return EnableBranches;
     }
-    bool visitPHINode(PHINode &PN) { return EnableBranches; }
+    // TODO: Determine a scheme to resolve when the labels are similar enough.
+    bool visitPHINode(PHINode &PN) { return false; }
     // TODO: Handle allocas.
     bool visitAllocaInst(AllocaInst &AI) { return false; }
     // VAArg instructions are not allowed since this could cause difficulty when
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index 0ce3d218eb33a..8c3c0400d6015 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -70,12 +70,6 @@ void IRInstructionData::initializeInstruction() {
 
     OperVals.push_back(OI.get());
   }
-
-  // We capture the incoming BasicBlocks as values as well as the incoming
-  // Values in order to check for structural similarity.
-  if (PHINode *PN = dyn_cast<PHINode>(Inst))
-    for (BasicBlock *BB : PN->blocks())
-      OperVals.push_back(BB);
 }
 
 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
@@ -114,34 +108,6 @@ void IRInstructionData::setCalleeName(bool MatchByName) {
     CalleeName = CI->getCalledFunction()->getName().str();
 }
 
-void IRInstructionData::setPHIPredecessors(
-    DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
-  assert(isa<PHINode>(Inst) && "Instruction must be phi node");
-
-  PHINode *PN = cast<PHINode>(Inst);
-  DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
-
-  BBNumIt = BasicBlockToInteger.find(PN->getParent());
-  assert(BBNumIt != BasicBlockToInteger.end() &&
-         "Could not find location for BasicBlock!");
-
-  int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
-
-  // Convert the incoming blocks of the PHINode to an integer value, based on
-  // the relative distances between the current block and the incoming block.
-  for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
-    BasicBlock *Incoming = PN->getIncomingBlock(Idx);
-    BBNumIt = BasicBlockToInteger.find(Incoming);
-    assert(BBNumIt != BasicBlockToInteger.end() &&
-           "Could not find number for BasicBlock!");
-    int OtherBlockNumber = static_cast<int>(BBNumIt->second);
-
-    int Relative = OtherBlockNumber - CurrentBlockNumber;
-    RelativeBlockLocations.push_back(Relative);
-    RelativeBlockLocations.push_back(Relative);
-  }
-}
-
 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
   switch (CI->getPredicate()) {
   case CmpInst::FCMP_OGT:
@@ -304,9 +270,6 @@ unsigned IRInstructionMapper::mapToLegalUnsigned(
   if (isa<CallInst>(*It))
     ID->setCalleeName(EnableMatchCallsByName);
 
-  if (isa<PHINode>(*It))
-    ID->setPHIPredecessors(BasicBlockToInteger);
-
   // Add to the instruction list
   bool WasInserted;
   DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index fd19b669c3c04..9c79972443fe6 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -185,44 +185,6 @@ Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
   return FoundValueOpt.getValueOr(nullptr);
 }
 
-/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
-/// in \p Included to branch to BasicBlock \p Replace if they currently branch
-/// to the BasicBlock \p Find.  This is used to fix up the incoming basic blocks
-/// when PHINodes are included in outlined regions.
-///
-/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
-/// checked.
-/// \param Find - The successor block to be replaced.
-/// \param Replace - The new succesor block to branch to.
-/// \param Included - The set of blocks about to be outlined.
-static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find,
-                                      BasicBlock *Replace,
-                                      DenseSet<BasicBlock *> &Included) {
-  for (PHINode &PN : PHIBlock->phis()) {
-    for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
-         ++Idx) {
-      // Check if the incoming block is included in the set of blocks being
-      // outlined.
-      BasicBlock *Incoming = PN.getIncomingBlock(Idx);
-      if (!Included.contains(Incoming))
-        continue;
-
-      BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator());
-      assert(BI && "Not a branch instruction?");
-      // Look over the branching instructions into this block to see if we
-      // used to branch to Find in this outlined block.
-      for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
-           Succ++) {
-        // If we have found the block to replace, we do so here.
-        if (BI->getSuccessor(Succ) != Find)
-          continue;
-        BI->setSuccessor(Succ, Replace);
-      }
-    }
-  }
-}
-
-
 void OutlinableRegion::splitCandidate() {
   assert(!CandidateSplit && "Candidate already split!");
 
@@ -253,39 +215,6 @@ void OutlinableRegion::splitCandidate() {
   StartBB = StartInst->getParent();
   PrevBB = StartBB;
 
-  DenseSet<BasicBlock *> BBSet;
-  Candidate->getBasicBlocks(BBSet);
-
-  // We iterate over the instructions in the region, if we find a PHINode, we
-  // check if there are predecessors outside of the region, if there are,
-  // we ignore this region since we are unable to handle the severing of the
-  // phi node right now. 
-  BasicBlock::iterator It = StartInst->getIterator();
-  while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
-    unsigned NumPredsOutsideRegion = 0;
-    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
-      if (!BBSet.contains(PN->getIncomingBlock(i)))
-        ++NumPredsOutsideRegion;
-
-    if (NumPredsOutsideRegion > 1)
-      return;
-    
-    It++;
-  }
-
-  // If the region starts with a PHINode, but is not the initial instruction of
-  // the BasicBlock, we ignore this region for now.
-  if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
-    return;
-  
-  // If the region ends with a PHINode, but does not contain all of the phi node
-  // instructions of the region, we ignore it for now.
-  if (isa<PHINode>(BackInst)) {
-    EndBB = BackInst->getParent();
-    if (BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
-      return;
-  }
-
   // The basic block gets split like so:
   // block:                 block:
   //   inst1                  inst1
@@ -312,20 +241,12 @@ void OutlinableRegion::splitCandidate() {
     FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
     EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
     FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
-  } else {
-    EndBB = BackInst->getParent();
-    EndsInBranch = true;
-    FollowBB = nullptr;
+    return;
   }
 
-  // Refind the basic block set.
-  BBSet.clear();
-  Candidate->getBasicBlocks(BBSet);
-  // For the phi nodes in the new starting basic block of the region, we
-  // reassign the targets of the basic blocks branching instructions.
-  replaceTargetsFromPHINode(StartBB, PrevBB, StartBB, BBSet);
-  if (FollowBB)
-    replaceTargetsFromPHINode(FollowBB, EndBB, FollowBB, BBSet);
+  EndBB = BackInst->getParent();
+  EndsInBranch = true;
+  FollowBB = nullptr;
 }
 
 void OutlinableRegion::reattachCandidate() {
@@ -347,21 +268,15 @@ void OutlinableRegion::reattachCandidate() {
   //   inst4
   assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
 
+  // StartBB should only have one predecessor since we put an unconditional
+  // branch at the end of PrevBB when we split the BasicBlock.
+  PrevBB = StartBB->getSinglePredecessor();
+  assert(PrevBB != nullptr &&
+         "No Predecessor for the region start basic block!");
+
   assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
   PrevBB->getTerminator()->eraseFromParent();
 
-  // If we reattaching after outlining, we iterate over the phi nodes to
-  // the initial block, and reassign the branch instructions of the incoming
-  // blocks to the block we are remerging into.
-  if (!ExtractedFunction) {
-    DenseSet<BasicBlock *> BBSet;
-    Candidate->getBasicBlocks(BBSet);
-
-    replaceTargetsFromPHINode(StartBB, StartBB, PrevBB, BBSet);
-    if (!EndsInBranch)
-      replaceTargetsFromPHINode(FollowBB, FollowBB, EndBB, BBSet);
-  }
-
   moveBBContents(*StartBB, *PrevBB);
 
   BasicBlock *PlacementBB = PrevBB;
@@ -1707,8 +1622,7 @@ replaceArgumentUses(OutlinableRegion &Region,
 
       // If this is storing a PHINode, we must make sure it is included in the
       // overall function.
-      if (!isa<PHINode>(ValueOperand) ||
-          Region.Candidate->getGVN(ValueOperand).hasValue()) {
+      if (!isa<PHINode>(ValueOperand)) {
         if (FirstFunction)
           continue;
         Value *CorrVal =
@@ -2247,7 +2161,7 @@ void IROutliner::pruneIncompatibleRegions(
   if (FirstCandidate.getLength() == 2) {
     if (isa<CallInst>(FirstCandidate.front()->Inst) &&
         isa<BranchInst>(FirstCandidate.back()->Inst))
-      return;
+        return;
   }
 
   unsigned CurrentEndIdx = 0;
diff --git a/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll b/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
deleted file mode 100644
index e519fb318ba87..0000000000000
--- a/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
+++ /dev/null
@@ -1,93 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
-
-; Show that we are able to outline when all of the phi nodes in the starting
-; block are included in the region and there is no more than one predecessor
-; into those phi nodes from outside of the region.
-
-define void @function1(i32* %a, i32* %b) {
-entry:
-  %0 = alloca i32, align 4
-  %c = load i32, i32* %0, align 4
-  %y = add i32 %c, %c
-  br label %test1
-dummy:
-  ret void
-test1:
-  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
-  %2 = phi i32 [ %e, %test1 ], [ %y, %entry  ]
-  %e = load i32, i32* %0, align 4
-  %3 = add i32 %c, %c
-  br i1 true, label %test, label %test1
-test:
-  %d = load i32, i32* %0, align 4
-  br label %first
-first:
-  ret void
-}
-
-define void @function2(i32* %a, i32* %b) {
-entry:
-  %0 = alloca i32, align 4
-  %c = load i32, i32* %0, align 4
-  %y = mul i32 %c, %c
-  br label %test1
-dummy:
-  ret void
-test1:
-  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
-  %2 = phi i32 [ %e, %test1 ], [ %y, %entry ]
-  %e = load i32, i32* %0, align 4
-  %3 = add i32 %c, %c
-  br i1 true, label %test, label %test1
-test:
-  %d = load i32, i32* %0, align 4
-  br label %first
-first:
-  ret void
-}
-; CHECK-LABEL: @function1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[Y:%.*]] = add i32 [[C]], [[C]]
-; CHECK-NEXT:    br label [[TEST1:%.*]]
-; CHECK:       dummy:
-; CHECK-NEXT:    ret void
-; CHECK:       test1:
-; CHECK-NEXT:    call void @outlined_ir_func_0(i32 [[Y]], i32* [[TMP0]], i32 [[C]])
-; CHECK-NEXT:    br label [[FIRST:%.*]]
-; CHECK:       first:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK-LABEL: @function2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[C]], [[C]]
-; CHECK-NEXT:    br label [[TEST1:%.*]]
-; CHECK:       dummy:
-; CHECK-NEXT:    ret void
-; CHECK:       test1:
-; CHECK-NEXT:    call void @outlined_ir_func_0(i32 [[Y]], i32* [[TMP0]], i32 [[C]])
-; CHECK-NEXT:    br label [[FIRST:%.*]]
-; CHECK:       first:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: define internal void @outlined_ir_func_0(
-; CHECK-NEXT:  newFuncRoot:
-; CHECK-NEXT:    br label [[TEST1_TO_OUTLINE:%.*]]
-; CHECK:       test1_to_outline:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[E:%.*]], [[TEST1_TO_OUTLINE]] ], [ [[TMP0:%.*]], [[NEWFUNCROOT:%.*]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[E]], [[TEST1_TO_OUTLINE]] ], [ [[TMP0]], [[NEWFUNCROOT]] ]
-; CHECK-NEXT:    [[E]] = load i32, i32* [[TMP1:%.*]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP2:%.*]], [[TMP2]]
-; CHECK-NEXT:    br i1 true, label [[TEST:%.*]], label [[TEST1_TO_OUTLINE]]
-; CHECK:       test:
-; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP1]], align 4
-; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
-; CHECK:       first.exitStub:
-; CHECK-NEXT:    ret void
-;
diff --git a/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll b/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
deleted file mode 100644
index 0f9750576139a..0000000000000
--- a/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
-
-; Show that we are able to propogate inputs to the region into the split PHINode
-; outside of the region if necessary.
-
-define void @function1(i32* %a, i32* %b) {
-entry:
-  %0 = alloca i32, align 4
-  %c = load i32, i32* %0, align 4
-  %z = add i32 %c, %c
-  br i1 true, label %test1, label %first
-test1:
-  %e = load i32, i32* %0, align 4
-  %1 = add i32 %c, %c
-  br i1 true, label %first, label %test
-test:
-  %d = load i32, i32* %0, align 4
-  br i1 true, label %first, label %next
-first:
-  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
-  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
-  ret void
-next:
-  ret void
-}
-
-define void @function2(i32* %a, i32* %b) {
-entry:
-  %0 = alloca i32, align 4
-  %c = load i32, i32* %0, align 4
-  %z = mul i32 %c, %c
-  br i1 true, label %test1, label %first
-test1:
-  %e = load i32, i32* %0, align 4
-  %1 = add i32 %c, %c
-  br i1 true, label %first, label %test
-test:
-  %d = load i32, i32* %0, align 4
-  br i1 true, label %first, label %next
-first:
-  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
-  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
-  ret void
-next:
-  ret void
-}
-; CHECK-LABEL: @function1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[Z:%.*]] = add i32 [[C]], [[C]]
-; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]])
-; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[NEXT:%.*]], label [[ENTRY_AFTER_OUTLINE:%.*]]
-; CHECK:       entry_after_outline:
-; CHECK-NEXT:    ret void
-; CHECK:       next:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK-LABEL: @function2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[Z:%.*]] = mul i32 [[C]], [[C]]
-; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]])
-; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[NEXT:%.*]], label [[ENTRY_AFTER_OUTLINE:%.*]]
-; CHECK:       entry_after_outline:
-; CHECK-NEXT:    ret void
-; CHECK:       next:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: define internal i1 @outlined_ir_func_0(
-; CHECK-NEXT:  newFuncRoot:
-; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
-; CHECK:       entry_to_outline:
-; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
-; CHECK:       test1:
-; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]]
-; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
-; CHECK:       test:
-; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT_EXITSTUB:%.*]]
-; CHECK:       first:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[TMP1]], [[ENTRY_TO_OUTLINE]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[TMP1]], [[ENTRY_TO_OUTLINE]] ]
-; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
-; CHECK:       next.exitStub:
-; CHECK-NEXT:    ret i1 true
-; CHECK:       entry_after_outline.exitStub:
-; CHECK-NEXT:    ret i1 false
-;
diff --git a/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
deleted file mode 100644
index debaef12687b7..0000000000000
--- a/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
+++ /dev/null
@@ -1,108 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
-
-; Show that we do not outline when all of the phi nodes in the beginning
-; block are included not in the region.
-
-define void @function1(i32* %a, i32* %b) {
-entry:
-  %0 = alloca i32, align 4
-  %c = load i32, i32* %0, align 4
-  %y = add i32 %c, %c
-  br label %test1
-dummy:
-  ret void
-test1:
-  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
-  %2 = phi i32 [ %e, %test1 ], [ %y, %entry  ]
-  %e = load i32, i32* %0, align 4
-  %3 = add i32 %c, %c
-  br i1 true, label %test, label %test1
-test:
-  %d = load i32, i32* %0, align 4
-  br label %first
-first:
-  ret void
-}
-
-define void @function2(i32* %a, i32* %b) {
-entry:
-  %0 = alloca i32, align 4
-  %c = load i32, i32* %0, align 4
-  %y = mul i32 %c, %c
-  br label %test1
-dummy:
-  ret void
-test1:
-  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
-  %2 = phi i32 [ %y, %entry ], [ %e, %test1 ]
-  %e = load i32, i32* %0, align 4
-  %3 = add i32 %c, %c
-  br i1 true, label %test, label %test1
-test:
-  %d = load i32, i32* %0, align 4
-  br label %first
-first:
-  ret void
-}
-; CHECK-LABEL: @function1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[Y:%.*]] = add i32 [[C]], [[C]]
-; CHECK-NEXT:    br label [[TEST1:%.*]]
-; CHECK:       dummy:
-; CHECK-NEXT:    ret void
-; CHECK:       test1:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[E_RELOAD:%.*]], [[TEST1]] ], [ [[Y]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[E_RELOAD]], [[TEST1]] ], [ [[Y]], [[ENTRY]] ]
-; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
-; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[E_LOC]])
-; CHECK-NEXT:    [[E_RELOAD]] = load i32, i32* [[E_LOC]], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
-; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[TEST1]], label [[FIRST:%.*]]
-; CHECK:       first:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK-LABEL: @function2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[C]], [[C]]
-; CHECK-NEXT:    br label [[TEST1:%.*]]
-; CHECK:       dummy:
-; CHECK-NEXT:    ret void
-; CHECK:       test1:
-; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[E_RELOAD:%.*]], [[TEST1]] ], [ [[Y]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[Y]], [[ENTRY]] ], [ [[E_RELOAD]], [[TEST1]] ]
-; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
-; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[E_LOC]])
-; CHECK-NEXT:    [[E_RELOAD]] = load i32, i32* [[E_LOC]], align 4
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
-; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[TEST1]], label [[FIRST:%.*]]
-; CHECK:       first:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: define internal i1 @outlined_ir_func_0(
-; CHECK-NEXT:  newFuncRoot:
-; CHECK-NEXT:    br label [[TEST1_TO_OUTLINE:%.*]]
-; CHECK:       test1_to_outline:
-; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]]
-; CHECK-NEXT:    br i1 true, label [[TEST:%.*]], label [[TEST1_EXITSTUB:%.*]]
-; CHECK:       test:
-; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
-; CHECK:       test1.exitStub:
-; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2:%.*]], align 4
-; CHECK-NEXT:    ret i1 true
-; CHECK:       first.exitStub:
-; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2]], align 4
-; CHECK-NEXT:    ret i1 false
-;
diff --git a/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
deleted file mode 100644
index a5d0314f21359..0000000000000
--- a/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
-
-; Show that we do not  outline when all of the phi nodes in the end
-; block are not included in the region.
-
-define void @function1(i32* %a, i32* %b) {
-entry:
-  %0 = alloca i32, align 4
-  %c = load i32, i32* %0, align 4
-  %z = add i32 %c, %c
-  br i1 true, label %test1, label %first
-test1:
-  %e = load i32, i32* %0, align 4
-  %1 = add i32 %c, %c
-  br i1 true, label %first, label %test
-test:
-  %d = load i32, i32* %0, align 4
-  br i1 true, label %first, label %next
-first:
-  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
-  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
-  ret void
-next:
-  ret void
-}
-
-define void @function2(i32* %a, i32* %b) {
-entry:
-  %0 = alloca i32, align 4
-  %c = load i32, i32* %0, align 4
-  %z = mul i32 %c, %c
-  br i1 true, label %test1, label %first
-test1:
-  %e = load i32, i32* %0, align 4
-  %1 = add i32 %c, %c
-  br i1 true, label %first, label %test
-test:
-  %d = load i32, i32* %0, align 4
-  br i1 true, label %first, label %next
-first:
-  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
-  %3 = phi i32 [ %d, %test ], [ %c, %entry ], [ %e, %test1 ]
-  ret void
-next:
-  ret void
-}
-; CHECK-LABEL: @function1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[Z:%.*]] = add i32 [[C]], [[C]]
-; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
-; CHECK:       test1:
-; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C]], [[C]]
-; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
-; CHECK:       test:
-; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT:%.*]]
-; CHECK:       first:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY]] ]
-; CHECK-NEXT:    ret void
-; CHECK:       next:
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK-LABEL: @function2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[Z:%.*]] = mul i32 [[C]], [[C]]
-; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
-; CHECK:       test1:
-; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C]], [[C]]
-; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
-; CHECK:       test:
-; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT:%.*]]
-; CHECK:       first:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[C]], [[ENTRY]] ], [ [[E]], [[TEST1]] ]
-; CHECK-NEXT:    ret void
-; CHECK:       next:
-; CHECK-NEXT:    ret void
-;
diff --git a/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
index 59b08422cec3a..4e777e862543e 100644
--- a/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
@@ -38,8 +38,6 @@ block_5:
   store i32 %add2, i32* %output, align 4
   store i32 %mul2, i32* %result, align 4
   br label %block_6
-dummy:
-  ret void
 block_6:
   %diff = phi i32 [%aval, %block_4], [%a2val, %block_5]
   ret void
@@ -78,8 +76,6 @@ block_5:
   store i32 %add2, i32* %output, align 4
   store i32 %mul2, i32* %result, align 4
   br label %block_6
-dummy:
-  ret void
 block_6:
   %diff = phi i32 [%aval, %block_4], [%a2val, %block_5]
   ret void
@@ -106,8 +102,6 @@ block_6:
 ; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
-; CHECK: dummy:
-; CHECK-NEXT:  ret void
 ; CHECK:       block_6:
 ; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
 ; CHECK-NEXT:    ret void
@@ -134,8 +128,6 @@ block_6:
 ; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
-; CHECK: dummy:
-; CHECK-NEXT:  ret void
 ; CHECK:       block_6:
 ; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
index 397121f000cd9..77cb4a5bc476f 100644
--- a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
@@ -15,8 +15,6 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br label %first
-dummy:
-  ret void
 first:
   %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
   ret void
@@ -33,8 +31,6 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br label %first
-dummy:
-  ret void
 first:
   %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
   ret void
@@ -49,8 +45,6 @@ first:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
-; CHECK: dummy:
-; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
@@ -66,8 +60,6 @@ first:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
-; CHECK: dummy:
-; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll b/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
deleted file mode 100644
index d56ffa5730552..0000000000000
--- a/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
-
-; Show that we do extract phi nodes from the regions.
-
-define void @function1(i32* %a, i32* %b) {
-entry:
-  %0 = alloca i32, align 4
-  %c = load i32, i32* %0, align 4
-  br label %test1
-test1:
-  %e = load i32, i32* %0, align 4
-  br label %first
-test:
-  %d = load i32, i32* %0, align 4
-  br label %first
-first:
-  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
-  store i32 2, i32* %a, align 4
-  store i32 3, i32* %b, align 4
-  ret void
-}
-
-define void @function2(i32* %a, i32* %b) {
-entry:
-  %0 = alloca i32, align 4
-  %c = load i32, i32* %0, align 4
-  br label %test1
-test1:
-  %e = load i32, i32* %0, align 4
-  br label %first
-test:
-  %d = load i32, i32* %0, align 4
-  br label %first
-first:
-  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
-  store i32 2, i32* %a, align 4
-  store i32 3, i32* %b, align 4
-  ret void
-}
-; CHECK-LABEL: @function1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[A:%.*]], i32* [[B:%.*]])
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK-LABEL: @function2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[A:%.*]], i32* [[B:%.*]])
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: define internal void @outlined_ir_func_0(
-; CHECK-NEXT:  newFuncRoot:
-; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
-; CHECK:       entry_to_outline:
-; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
-; CHECK-NEXT:    br label [[TEST1:%.*]]
-; CHECK:       test1:
-; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    br label [[FIRST:%.*]]
-; CHECK:       test:
-; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    br label [[FIRST]]
-; CHECK:       first:
-; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
-; CHECK-NEXT:    store i32 2, i32* [[TMP1:%.*]], align 4
-; CHECK-NEXT:    store i32 3, i32* [[TMP2:%.*]], align 4
-; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
-; CHECK:       entry_after_outline.exitStub:
-; CHECK-NEXT:    ret void
-;
diff --git a/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll b/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll
deleted file mode 100644
index e5afb89a26138..0000000000000
--- a/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
-; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
-
-; Show that we are able to outline the simple phi node case of constants when
-; the corresponding labels match.
-
-define void @function1(i32* %a, i32* %b) {
-entry:
-  br label %test
-test:
-  br label %first
-first:
-  %0 = phi i32 [ 0, %test ]
-  store i32 2, i32* %a, align 4
-  store i32 3, i32* %b, align 4
-  ret void
-}
-
-define void @function2(i32* %a, i32* %b) {
-entry:
-  br label %test
-test:
-  br label %first
-first:
-  %0 = phi i32 [ 0, %test ]
-  store i32 2, i32* %a, align 4
-  store i32 3, i32* %b, align 4
-  ret void
-}
-; CHECK-LABEL: @function1(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[TEST:%.*]]
-; CHECK:       test:
-; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A:%.*]], i32* [[B:%.*]])
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK-LABEL: @function2(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    br label [[TEST:%.*]]
-; CHECK:       test:
-; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A:%.*]], i32* [[B:%.*]])
-; CHECK-NEXT:    ret void
-;
-;
-; CHECK: define internal void @outlined_ir_func_0(
-; CHECK-NEXT:  newFuncRoot:
-; CHECK-NEXT:    br label [[TEST_TO_OUTLINE:%.*]]
-; CHECK:       test_to_outline:
-; CHECK-NEXT:    br label [[FIRST:%.*]]
-; CHECK:       first:
-; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ 0, [[TEST_TO_OUTLINE]] ]
-; CHECK-NEXT:    store i32 2, i32* [[TMP0:%.*]], align 4
-; CHECK-NEXT:    store i32 3, i32* [[TMP1:%.*]], align 4
-; CHECK-NEXT:    br label [[TEST_AFTER_OUTLINE_EXITSTUB:%.*]]
-; CHECK:       test_after_outline.exitStub:
-; CHECK-NEXT:    ret void
-;
diff --git a/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
index 62d936d68ff2f..258bbfe131e3e 100644
--- a/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
+++ b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
@@ -17,8 +17,6 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br i1 true, label %first, label %next
-dummy:
-  ret void
 first:
   %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
   ret void
@@ -39,8 +37,6 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br i1 true, label %first, label %next
-dummy:
-  ret void
 first:
   %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
   ret void
@@ -59,8 +55,6 @@ next:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
-; CHECK: dummy:
-; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
@@ -80,8 +74,6 @@ next:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
-; CHECK: dummy:
-; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
index 97d9be3d8985a..a8781e8f78af7 100644
--- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
+++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
@@ -757,41 +757,26 @@ TEST(IRInstructionMapper, BranchLegal) {
   ASSERT_TRUE(UnsignedVec[1] < UnsignedVec[2]);
 }
 
-// Checks that a PHINode is mapped to be legal.
-TEST(IRInstructionMapper, PhiLegal) {
-  StringRef ModuleString = R"(
-                          define i32 @f(i32 %a, i32 %b) {
-                          bb0:
-                             %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ]
-                             %1 = add i32 %a, %b
-                             ret i32 0
-                          bb1:
-                             ret i32 1
-                          })";
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
-
-  std::vector<IRInstructionData *> InstrList;
-  std::vector<unsigned> UnsignedVec;
-
-  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
-  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
-  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
-  Mapper.InstClassifier.EnableBranches = true;
-  Mapper.initializeForBBs(*M);
-  getVectors(*M, Mapper, InstrList, UnsignedVec);
+// In most cases, the illegal instructions we are collecting don't require any
+// sort of setup.  In these cases, we can just only have illegal instructions,
+// and the mapper will create 0 length vectors, and we can check that.
 
-  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
-  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(3));
-}
+// In cases where we have legal instructions needed to set up the illegal
+// instruction, to check illegal instructions are assigned unsigned integers
+// from the maximum value decreasing to 0, it will be greater than a legal
+// instruction that comes after.  So to check that we have an illegal
+// instruction, we place a legal instruction after an illegal instruction, and
+// check that the illegal unsigned integer is greater than the unsigned integer
+// of the legal instruction.
 
-// Checks that a PHINode is mapped to be legal.
+// Checks that a PHINode is mapped to be illegal since there is extra checking
+// needed to ensure that a branch in one region is bin an isomorphic
+// location in a different region.
 TEST(IRInstructionMapper, PhiIllegal) {
   StringRef ModuleString = R"(
                           define i32 @f(i32 %a, i32 %b) {
                           bb0:
                              %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ]
-                             %1 = add i32 %a, %b
                              ret i32 0
                           bb1:
                              ret i32 1
@@ -805,25 +790,12 @@ TEST(IRInstructionMapper, PhiIllegal) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
-  Mapper.initializeForBBs(*M);
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
   ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
 }
 
-// In most cases, the illegal instructions we are collecting don't require any
-// sort of setup.  In these cases, we can just only have illegal instructions,
-// and the mapper will create 0 length vectors, and we can check that.
-
-// In cases where we have legal instructions needed to set up the illegal
-// instruction, to check illegal instructions are assigned unsigned integers
-// from the maximum value decreasing to 0, it will be greater than a legal
-// instruction that comes after.  So to check that we have an illegal
-// instruction, we place a legal instruction after an illegal instruction, and
-// check that the illegal unsigned integer is greater than the unsigned integer
-// of the legal instruction.
-
 // Checks that an alloca instruction is mapped to be illegal.
 TEST(IRInstructionMapper, AllocaIllegal) {
   StringRef ModuleString = R"(
@@ -2374,108 +2346,6 @@ TEST(IRSimilarityCandidate, DifferentBranchStructureOutside) {
   ASSERT_TRUE(longSimCandCompare(InstrList, true, 3, 0, 6));
 }
 
-// Checks that the same structure is recognized between two candidates,
-// when the phi predecessor are other blocks inside the same region,
-// the relative distance between the blocks must be the same.
-TEST(IRSimilarityCandidate, SamePHIStructureInternal) {
-  StringRef ModuleString = R"(
-                          define i32 @f(i32 %a, i32 %b) {
-                          bb0:
-                             br label %bb2
-                          bb1:
-                             br label %bb2
-                          bb2:
-                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ] 
-                             %1 = add i32 %b, %a
-                             %2 = add i32 %a, %b
-                             ret i32 0
-                          }
-                          
-                          define i32 @f2(i32 %a, i32 %b) {
-                          bb0:
-                             br label %bb2
-                          bb1:
-                             br label %bb2
-                          bb2:
-                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ]
-                             %1 = add i32 %b, %a
-                             %2 = add i32 %a, %b
-                             ret i32 0
-                          })";
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
-
-  std::vector<IRInstructionData *> InstrList;
-  std::vector<unsigned> UnsignedVec;
-
-  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
-  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
-  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
-  Mapper.InstClassifier.EnableBranches = true;
-  Mapper.initializeForBBs(*M);
-  getVectors(*M, Mapper, InstrList, UnsignedVec);
-
-  // Check to make sure that we have a long enough region.
-  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(11));
-  // Check that the instructions were added correctly to both vectors.
-  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
-
-  ASSERT_TRUE(longSimCandCompare(InstrList, true, 4, 0, 6));
-}
-
-// Checks that the different structure is recognized between two candidates,
-// when the phi predecessor are other blocks inside the same region,
-// the relative distance between the blocks must be the same.
-TEST(IRSimilarityCandidate, DifferentPHIStructureInternal) {
-  StringRef ModuleString = R"(
-                          define i32 @f(i32 %a, i32 %b) {
-                          bb0:
-                             br label %bb2
-                          bb1:
-                             br label %bb2
-                          bb3:
-                             br label %bb2
-                          bb2:
-                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ] 
-                             %1 = add i32 %b, %a
-                             %2 = add i32 %a, %b
-                             ret i32 0
-                          }
-                          
-                          define i32 @f2(i32 %a, i32 %b) {
-                          bb0:
-                             br label %bb2
-                          bb1:
-                             br label %bb2
-                          bb3:
-                             br label %bb2
-                          bb2:
-                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb3 ] 
-                             %1 = add i32 %b, %a
-                             %2 = add i32 %a, %b
-                             ret i32 0
-                          })";
-  LLVMContext Context;
-  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
-
-  std::vector<IRInstructionData *> InstrList;
-  std::vector<unsigned> UnsignedVec;
-
-  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
-  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
-  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
-  Mapper.InstClassifier.EnableBranches = true;
-  Mapper.initializeForBBs(*M);
-  getVectors(*M, Mapper, InstrList, UnsignedVec);
-
-  // Check to make sure that we have a long enough region.
-  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(13));
-  // Check that the instructions were added correctly to both vectors.
-  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
-
-  ASSERT_FALSE(longSimCandCompare(InstrList, true, 5, 0, 7));
-}
-
 // Checks that two sets of identical instructions are found to be the same.
 // Both sequences of adds have the same operand ordering, and the same
 // instructions, making them strcturally equivalent.

From e8f4e41b6bf4627eb075eb7cad88246af5d6777e Mon Sep 17 00:00:00 2001
From: Andrew Litteken <andrew.litteken@gmail.com>
Date: Sun, 2 Jan 2022 16:25:43 -0600
Subject: [PATCH 632/946] [IRSim][IROutliner] Add support for outlining
 PHINodes with the rest of the region.

We use the same similarity scheme we used for branch instructions for phi nodes, and allow them to be outlined. There is not a lot of special handling needed for these phi nodes when outlining, as they simply act as outputs. The code extractor does not currently allow for non entry blocks within the extracted region to have predecessors, so there are not conflicts to handle with respect to predecessors no longer contained in the function.

Recommit of 515eec3553b02533e9a88ee84bc245d5415163da

Reviewers: paquette

Differential Revision: https://reviews.llvm.org/D106997
---
 .../llvm/Analysis/IRSimilarityIdentifier.h    |  17 +-
 llvm/include/llvm/Transforms/IPO/IROutliner.h |   3 +-
 llvm/lib/Analysis/IRSimilarityIdentifier.cpp  |  37 +++++
 llvm/lib/Transforms/IPO/IROutliner.cpp        | 110 ++++++++++--
 .../IROutliner/included-phi-nodes-begin.ll    |  93 +++++++++++
 .../IROutliner/included-phi-nodes-end.ll      |  94 +++++++++++
 .../must-capture-all-phi-nodes-begin.ll       | 108 ++++++++++++
 .../must-capture-all-phi-nodes-end.ll         |  88 ++++++++++
 .../outlining-branches-phi-nodes.ll           |   8 +
 .../IROutliner/outlining-exits-to-phi-node.ll |   8 +
 .../IROutliner/phi-nodes-non-constant.ll      |  74 +++++++++
 .../Transforms/IROutliner/phi-nodes-simple.ll |  58 +++++++
 .../IROutliner/region-inputs-in-phi-nodes.ll  |   8 +
 .../Analysis/IRSimilarityIdentifierTest.cpp   | 156 ++++++++++++++++--
 14 files changed, 833 insertions(+), 29 deletions(-)
 create mode 100644 llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
 create mode 100644 llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
 create mode 100644 llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
 create mode 100644 llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
 create mode 100644 llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
 create mode 100644 llvm/test/Transforms/IROutliner/phi-nodes-simple.ll

diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index 966bf02d128ec..c7759e0bbf0f8 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -214,6 +214,16 @@ struct IRInstructionData
   /// function name as a differentiating parameter.
   void setCalleeName(bool MatchByName = true);
 
+  /// For an IRInstructionData containing a PHINode, finds the
+  /// relative distances from the incoming basic block to the current block by
+  /// taking the difference of the number assigned to the current basic block
+  /// and the incoming basic block of the branch.
+  ///
+  /// \param BasicBlockToInteger - The mapping of basic blocks to their location
+  /// in the module.
+  void
+  setPHIPredecessors(DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger);
+
   /// Hashes \p Value based on its opcode, types, and operand types.
   /// Two IRInstructionData instances produce the same hash when they perform
   /// the same operation.
@@ -497,8 +507,11 @@ struct IRInstructionMapper {
         return Legal;
       return Illegal;
     }
-    // TODO: Determine a scheme to resolve when the labels are similar enough.
-    InstrType visitPHINode(PHINode &PN) { return Illegal; }
+    InstrType visitPHINode(PHINode &PN) { 
+      if (EnableBranches)
+        return Legal;
+      return Illegal;
+    }
     // TODO: Handle allocas.
     InstrType visitAllocaInst(AllocaInst &AI) { return Illegal; }
     // We exclude variable argument instructions since variable arguments
diff --git a/llvm/include/llvm/Transforms/IPO/IROutliner.h b/llvm/include/llvm/Transforms/IPO/IROutliner.h
index 9799737a529e3..ed74c8ed0e96c 100644
--- a/llvm/include/llvm/Transforms/IPO/IROutliner.h
+++ b/llvm/include/llvm/Transforms/IPO/IROutliner.h
@@ -342,8 +342,7 @@ class IROutliner {
     bool visitBranchInst(BranchInst &BI) { 
       return EnableBranches;
     }
-    // TODO: Determine a scheme to resolve when the labels are similar enough.
-    bool visitPHINode(PHINode &PN) { return false; }
+    bool visitPHINode(PHINode &PN) { return EnableBranches; }
     // TODO: Handle allocas.
     bool visitAllocaInst(AllocaInst &AI) { return false; }
     // VAArg instructions are not allowed since this could cause difficulty when
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index 8c3c0400d6015..0ce3d218eb33a 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -70,6 +70,12 @@ void IRInstructionData::initializeInstruction() {
 
     OperVals.push_back(OI.get());
   }
+
+  // We capture the incoming BasicBlocks as values as well as the incoming
+  // Values in order to check for structural similarity.
+  if (PHINode *PN = dyn_cast<PHINode>(Inst))
+    for (BasicBlock *BB : PN->blocks())
+      OperVals.push_back(BB);
 }
 
 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
@@ -108,6 +114,34 @@ void IRInstructionData::setCalleeName(bool MatchByName) {
     CalleeName = CI->getCalledFunction()->getName().str();
 }
 
+void IRInstructionData::setPHIPredecessors(
+    DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
+  assert(isa<PHINode>(Inst) && "Instruction must be phi node");
+
+  PHINode *PN = cast<PHINode>(Inst);
+  DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
+
+  BBNumIt = BasicBlockToInteger.find(PN->getParent());
+  assert(BBNumIt != BasicBlockToInteger.end() &&
+         "Could not find location for BasicBlock!");
+
+  int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
+
+  // Convert the incoming blocks of the PHINode to an integer value, based on
+  // the relative distances between the current block and the incoming block.
+  for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
+    BasicBlock *Incoming = PN->getIncomingBlock(Idx);
+    BBNumIt = BasicBlockToInteger.find(Incoming);
+    assert(BBNumIt != BasicBlockToInteger.end() &&
+           "Could not find number for BasicBlock!");
+    int OtherBlockNumber = static_cast<int>(BBNumIt->second);
+
+    int Relative = OtherBlockNumber - CurrentBlockNumber;
+    RelativeBlockLocations.push_back(Relative);
+    RelativeBlockLocations.push_back(Relative);
+  }
+}
+
 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
   switch (CI->getPredicate()) {
   case CmpInst::FCMP_OGT:
@@ -270,6 +304,9 @@ unsigned IRInstructionMapper::mapToLegalUnsigned(
   if (isa<CallInst>(*It))
     ID->setCalleeName(EnableMatchCallsByName);
 
+  if (isa<PHINode>(*It))
+    ID->setPHIPredecessors(BasicBlockToInteger);
+
   // Add to the instruction list
   bool WasInserted;
   DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index 9c79972443fe6..fd19b669c3c04 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -185,6 +185,44 @@ Value *OutlinableRegion::findCorrespondingValueIn(const OutlinableRegion &Other,
   return FoundValueOpt.getValueOr(nullptr);
 }
 
+/// Rewrite the BranchInsts in the incoming blocks to \p PHIBlock that are found
+/// in \p Included to branch to BasicBlock \p Replace if they currently branch
+/// to the BasicBlock \p Find.  This is used to fix up the incoming basic blocks
+/// when PHINodes are included in outlined regions.
+///
+/// \param PHIBlock - The BasicBlock containing the PHINodes that need to be
+/// checked.
+/// \param Find - The successor block to be replaced.
+/// \param Replace - The new succesor block to branch to.
+/// \param Included - The set of blocks about to be outlined.
+static void replaceTargetsFromPHINode(BasicBlock *PHIBlock, BasicBlock *Find,
+                                      BasicBlock *Replace,
+                                      DenseSet<BasicBlock *> &Included) {
+  for (PHINode &PN : PHIBlock->phis()) {
+    for (unsigned Idx = 0, PNEnd = PN.getNumIncomingValues(); Idx != PNEnd;
+         ++Idx) {
+      // Check if the incoming block is included in the set of blocks being
+      // outlined.
+      BasicBlock *Incoming = PN.getIncomingBlock(Idx);
+      if (!Included.contains(Incoming))
+        continue;
+
+      BranchInst *BI = dyn_cast<BranchInst>(Incoming->getTerminator());
+      assert(BI && "Not a branch instruction?");
+      // Look over the branching instructions into this block to see if we
+      // used to branch to Find in this outlined block.
+      for (unsigned Succ = 0, End = BI->getNumSuccessors(); Succ != End;
+           Succ++) {
+        // If we have found the block to replace, we do so here.
+        if (BI->getSuccessor(Succ) != Find)
+          continue;
+        BI->setSuccessor(Succ, Replace);
+      }
+    }
+  }
+}
+
+
 void OutlinableRegion::splitCandidate() {
   assert(!CandidateSplit && "Candidate already split!");
 
@@ -215,6 +253,39 @@ void OutlinableRegion::splitCandidate() {
   StartBB = StartInst->getParent();
   PrevBB = StartBB;
 
+  DenseSet<BasicBlock *> BBSet;
+  Candidate->getBasicBlocks(BBSet);
+
+  // We iterate over the instructions in the region, if we find a PHINode, we
+  // check if there are predecessors outside of the region, if there are,
+  // we ignore this region since we are unable to handle the severing of the
+  // phi node right now. 
+  BasicBlock::iterator It = StartInst->getIterator();
+  while (PHINode *PN = dyn_cast<PHINode>(&*It)) {
+    unsigned NumPredsOutsideRegion = 0;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (!BBSet.contains(PN->getIncomingBlock(i)))
+        ++NumPredsOutsideRegion;
+
+    if (NumPredsOutsideRegion > 1)
+      return;
+    
+    It++;
+  }
+
+  // If the region starts with a PHINode, but is not the initial instruction of
+  // the BasicBlock, we ignore this region for now.
+  if (isa<PHINode>(StartInst) && StartInst != &*StartBB->begin())
+    return;
+  
+  // If the region ends with a PHINode, but does not contain all of the phi node
+  // instructions of the region, we ignore it for now.
+  if (isa<PHINode>(BackInst)) {
+    EndBB = BackInst->getParent();
+    if (BackInst != &*std::prev(EndBB->getFirstInsertionPt()))
+      return;
+  }
+
   // The basic block gets split like so:
   // block:                 block:
   //   inst1                  inst1
@@ -241,12 +312,20 @@ void OutlinableRegion::splitCandidate() {
     FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
     EndBB->replaceSuccessorsPhiUsesWith(EndBB, FollowBB);
     FollowBB->replaceSuccessorsPhiUsesWith(PrevBB, FollowBB);
-    return;
+  } else {
+    EndBB = BackInst->getParent();
+    EndsInBranch = true;
+    FollowBB = nullptr;
   }
 
-  EndBB = BackInst->getParent();
-  EndsInBranch = true;
-  FollowBB = nullptr;
+  // Refind the basic block set.
+  BBSet.clear();
+  Candidate->getBasicBlocks(BBSet);
+  // For the phi nodes in the new starting basic block of the region, we
+  // reassign the targets of the basic blocks branching instructions.
+  replaceTargetsFromPHINode(StartBB, PrevBB, StartBB, BBSet);
+  if (FollowBB)
+    replaceTargetsFromPHINode(FollowBB, EndBB, FollowBB, BBSet);
 }
 
 void OutlinableRegion::reattachCandidate() {
@@ -268,15 +347,21 @@ void OutlinableRegion::reattachCandidate() {
   //   inst4
   assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
 
-  // StartBB should only have one predecessor since we put an unconditional
-  // branch at the end of PrevBB when we split the BasicBlock.
-  PrevBB = StartBB->getSinglePredecessor();
-  assert(PrevBB != nullptr &&
-         "No Predecessor for the region start basic block!");
-
   assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
   PrevBB->getTerminator()->eraseFromParent();
 
+  // If we reattaching after outlining, we iterate over the phi nodes to
+  // the initial block, and reassign the branch instructions of the incoming
+  // blocks to the block we are remerging into.
+  if (!ExtractedFunction) {
+    DenseSet<BasicBlock *> BBSet;
+    Candidate->getBasicBlocks(BBSet);
+
+    replaceTargetsFromPHINode(StartBB, StartBB, PrevBB, BBSet);
+    if (!EndsInBranch)
+      replaceTargetsFromPHINode(FollowBB, FollowBB, EndBB, BBSet);
+  }
+
   moveBBContents(*StartBB, *PrevBB);
 
   BasicBlock *PlacementBB = PrevBB;
@@ -1622,7 +1707,8 @@ replaceArgumentUses(OutlinableRegion &Region,
 
       // If this is storing a PHINode, we must make sure it is included in the
       // overall function.
-      if (!isa<PHINode>(ValueOperand)) {
+      if (!isa<PHINode>(ValueOperand) ||
+          Region.Candidate->getGVN(ValueOperand).hasValue()) {
         if (FirstFunction)
           continue;
         Value *CorrVal =
@@ -2161,7 +2247,7 @@ void IROutliner::pruneIncompatibleRegions(
   if (FirstCandidate.getLength() == 2) {
     if (isa<CallInst>(FirstCandidate.front()->Inst) &&
         isa<BranchInst>(FirstCandidate.back()->Inst))
-        return;
+      return;
   }
 
   unsigned CurrentEndIdx = 0;
diff --git a/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll b/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
new file mode 100644
index 0000000000000..e519fb318ba87
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/included-phi-nodes-begin.ll
@@ -0,0 +1,93 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to outline when all of the phi nodes in the starting
+; block are included in the region and there is no more than one predecessor
+; into those phi nodes from outside of the region.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = add i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %e, %test1 ], [ %y, %entry  ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = mul i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32 [[Y]], i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32 [[Y]], i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[TEST1_TO_OUTLINE:%.*]]
+; CHECK:       test1_to_outline:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[E:%.*]], [[TEST1_TO_OUTLINE]] ], [ [[TMP0:%.*]], [[NEWFUNCROOT:%.*]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[E]], [[TEST1_TO_OUTLINE]] ], [ [[TMP0]], [[NEWFUNCROOT]] ]
+; CHECK-NEXT:    [[E]] = load i32, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP2:%.*]], [[TMP2]]
+; CHECK-NEXT:    br i1 true, label [[TEST:%.*]], label [[TEST1_TO_OUTLINE]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP1]], align 4
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll b/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
new file mode 100644
index 0000000000000..0f9750576139a
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/included-phi-nodes-end.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to propogate inputs to the region into the split PHINode
+; outside of the region if necessary.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = add i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = mul i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[NEXT:%.*]], label [[ENTRY_AFTER_OUTLINE:%.*]]
+; CHECK:       entry_after_outline:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[NEXT:%.*]], label [[ENTRY_AFTER_OUTLINE:%.*]]
+; CHECK:       entry_after_outline:
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]]
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT_EXITSTUB:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[TMP1]], [[ENTRY_TO_OUTLINE]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[TMP1]], [[ENTRY_TO_OUTLINE]] ]
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       next.exitStub:
+; CHECK-NEXT:    ret i1 true
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    ret i1 false
+;
diff --git a/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
new file mode 100644
index 0000000000000..debaef12687b7
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-begin.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do not outline when all of the phi nodes in the beginning
+; block are included not in the region.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = add i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %e, %test1 ], [ %y, %entry  ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %y = mul i32 %c, %c
+  br label %test1
+dummy:
+  ret void
+test1:
+  %1 = phi i32 [ %e, %test1 ], [ %y, %entry ]
+  %2 = phi i32 [ %y, %entry ], [ %e, %test1 ]
+  %e = load i32, i32* %0, align 4
+  %3 = add i32 %c, %c
+  br i1 true, label %test, label %test1
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[E_RELOAD:%.*]], [[TEST1]] ], [ [[Y]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[E_RELOAD]], [[TEST1]] ], [ [[Y]], [[ENTRY]] ]
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[E_LOC]])
+; CHECK-NEXT:    [[E_RELOAD]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[TEST1]], label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[E_LOC:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Y:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       dummy:
+; CHECK-NEXT:    ret void
+; CHECK:       test1:
+; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[E_RELOAD:%.*]], [[TEST1]] ], [ [[Y]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[Y]], [[ENTRY]] ], [ [[E_RELOAD]], [[TEST1]] ]
+; CHECK-NEXT:    [[LT_CAST:%.*]] = bitcast i32* [[E_LOC]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    [[TARGETBLOCK:%.*]] = call i1 @outlined_ir_func_0(i32* [[TMP0]], i32 [[C]], i32* [[E_LOC]])
+; CHECK-NEXT:    [[E_RELOAD]] = load i32, i32* [[E_LOC]], align 4
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
+; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[TEST1]], label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal i1 @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[TEST1_TO_OUTLINE:%.*]]
+; CHECK:       test1_to_outline:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1:%.*]], [[TMP1]]
+; CHECK-NEXT:    br i1 true, label [[TEST:%.*]], label [[TEST1_EXITSTUB:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST_EXITSTUB:%.*]]
+; CHECK:       test1.exitStub:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    ret i1 true
+; CHECK:       first.exitStub:
+; CHECK-NEXT:    store i32 [[E]], i32* [[TMP2]], align 4
+; CHECK-NEXT:    ret i1 false
+;
diff --git a/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
new file mode 100644
index 0000000000000..a5d0314f21359
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/must-capture-all-phi-nodes-end.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do not  outline when all of the phi nodes in the end
+; block are not included in the region.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = add i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  ret void
+next:
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  %z = mul i32 %c, %c
+  br i1 true, label %test1, label %first
+test1:
+  %e = load i32, i32* %0, align 4
+  %1 = add i32 %c, %c
+  br i1 true, label %first, label %test
+test:
+  %d = load i32, i32* %0, align 4
+  br i1 true, label %first, label %next
+first:
+  %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
+  %3 = phi i32 [ %d, %test ], [ %c, %entry ], [ %e, %test1 ]
+  ret void
+next:
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[Z:%.*]] = mul i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[TEST1:%.*]], label [[FIRST:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C]], [[C]]
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br i1 true, label [[FIRST]], label [[NEXT:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[E]], [[TEST1]] ], [ [[C]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[D]], [[TEST]] ], [ [[C]], [[ENTRY]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    ret void
+; CHECK:       next:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
index 4e777e862543e..59b08422cec3a 100644
--- a/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-branches-phi-nodes.ll
@@ -38,6 +38,8 @@ block_5:
   store i32 %add2, i32* %output, align 4
   store i32 %mul2, i32* %result, align 4
   br label %block_6
+dummy:
+  ret void
 block_6:
   %diff = phi i32 [%aval, %block_4], [%a2val, %block_5]
   ret void
@@ -76,6 +78,8 @@ block_5:
   store i32 %add2, i32* %output, align 4
   store i32 %mul2, i32* %result, align 4
   br label %block_6
+dummy:
+  ret void
 block_6:
   %diff = phi i32 [%aval, %block_4], [%a2val, %block_5]
   ret void
@@ -102,6 +106,8 @@ block_6:
 ; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       block_6:
 ; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
 ; CHECK-NEXT:    ret void
@@ -128,6 +134,8 @@ block_6:
 ; CHECK-NEXT:    [[DIFF_CE_RELOAD:%.*]] = load i32, i32* [[DIFF_CE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[BLOCK_6:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       block_6:
 ; CHECK-NEXT:    [[DIFF:%.*]] = phi i32 [ [[DIFF_CE_RELOAD]], [[BLOCK_2]] ]
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
index 77cb4a5bc476f..397121f000cd9 100644
--- a/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-exits-to-phi-node.ll
@@ -15,6 +15,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br label %first
+dummy:
+  ret void
 first:
   %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
   ret void
@@ -31,6 +33,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br label %first
+dummy:
+  ret void
 first:
   %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
   ret void
@@ -45,6 +49,8 @@ first:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
@@ -60,6 +66,8 @@ first:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll b/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
new file mode 100644
index 0000000000000..d56ffa5730552
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/phi-nodes-non-constant.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we do extract phi nodes from the regions.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  %0 = alloca i32, align 4
+  %c = load i32, i32* %0, align 4
+  br label %test1
+test1:
+  %e = load i32, i32* %0, align 4
+  br label %first
+test:
+  %d = load i32, i32* %0, align 4
+  br label %first
+first:
+  %1 = phi i32 [ %c, %test ], [ %e, %test1 ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[TMP0]], i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[ENTRY_TO_OUTLINE:%.*]]
+; CHECK:       entry_to_outline:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST1:%.*]]
+; CHECK:       test1:
+; CHECK-NEXT:    [[E:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    [[D:%.*]] = load i32, i32* [[TMP0]], align 4
+; CHECK-NEXT:    br label [[FIRST]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP3:%.*]] = phi i32 [ [[C]], [[TEST:%.*]] ], [ [[E]], [[TEST1]] ]
+; CHECK-NEXT:    store i32 2, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[TMP2:%.*]], align 4
+; CHECK-NEXT:    br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       entry_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll b/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll
new file mode 100644
index 0000000000000..e5afb89a26138
--- /dev/null
+++ b/llvm/test/Transforms/IROutliner/phi-nodes-simple.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs
+; RUN: opt -S -verify -iroutliner -ir-outlining-no-cost < %s | FileCheck %s
+
+; Show that we are able to outline the simple phi node case of constants when
+; the corresponding labels match.
+
+define void @function1(i32* %a, i32* %b) {
+entry:
+  br label %test
+test:
+  br label %first
+first:
+  %0 = phi i32 [ 0, %test ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+
+define void @function2(i32* %a, i32* %b) {
+entry:
+  br label %test
+test:
+  br label %first
+first:
+  %0 = phi i32 [ 0, %test ]
+  store i32 2, i32* %a, align 4
+  store i32 3, i32* %b, align 4
+  ret void
+}
+; CHECK-LABEL: @function1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK-LABEL: @function2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[TEST:%.*]]
+; CHECK:       test:
+; CHECK-NEXT:    call void @outlined_ir_func_0(i32* [[A:%.*]], i32* [[B:%.*]])
+; CHECK-NEXT:    ret void
+;
+;
+; CHECK: define internal void @outlined_ir_func_0(
+; CHECK-NEXT:  newFuncRoot:
+; CHECK-NEXT:    br label [[TEST_TO_OUTLINE:%.*]]
+; CHECK:       test_to_outline:
+; CHECK-NEXT:    br label [[FIRST:%.*]]
+; CHECK:       first:
+; CHECK-NEXT:    [[TMP2:%.*]] = phi i32 [ 0, [[TEST_TO_OUTLINE]] ]
+; CHECK-NEXT:    store i32 2, i32* [[TMP0:%.*]], align 4
+; CHECK-NEXT:    store i32 3, i32* [[TMP1:%.*]], align 4
+; CHECK-NEXT:    br label [[TEST_AFTER_OUTLINE_EXITSTUB:%.*]]
+; CHECK:       test_after_outline.exitStub:
+; CHECK-NEXT:    ret void
+;
diff --git a/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
index 258bbfe131e3e..62d936d68ff2f 100644
--- a/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
+++ b/llvm/test/Transforms/IROutliner/region-inputs-in-phi-nodes.ll
@@ -17,6 +17,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br i1 true, label %first, label %next
+dummy:
+  ret void
 first:
   %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
   ret void
@@ -37,6 +39,8 @@ test1:
 test:
   %d = load i32, i32* %0, align 4
   br i1 true, label %first, label %next
+dummy:
+  ret void
 first:
   %2 = phi i32 [ %d, %test ], [ %e, %test1 ], [ %c, %entry ]
   ret void
@@ -55,6 +59,8 @@ next:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
@@ -74,6 +80,8 @@ next:
 ; CHECK-NEXT:    [[DOTCE_RELOAD:%.*]] = load i32, i32* [[DOTCE_LOC]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
 ; CHECK-NEXT:    br i1 [[TARGETBLOCK]], label [[FIRST:%.*]], label [[NEXT:%.*]]
+; CHECK: dummy:
+; CHECK-NEXT:  ret void
 ; CHECK:       first:
 ; CHECK-NEXT:    [[TMP1:%.*]] = phi i32 [ [[DOTCE_RELOAD]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    ret void
diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
index a8781e8f78af7..97d9be3d8985a 100644
--- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
+++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp
@@ -757,26 +757,41 @@ TEST(IRInstructionMapper, BranchLegal) {
   ASSERT_TRUE(UnsignedVec[1] < UnsignedVec[2]);
 }
 
-// In most cases, the illegal instructions we are collecting don't require any
-// sort of setup.  In these cases, we can just only have illegal instructions,
-// and the mapper will create 0 length vectors, and we can check that.
+// Checks that a PHINode is mapped to be legal.
+TEST(IRInstructionMapper, PhiLegal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ]
+                             %1 = add i32 %a, %b
+                             ret i32 0
+                          bb1:
+                             ret i32 1
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
 
-// In cases where we have legal instructions needed to set up the illegal
-// instruction, to check illegal instructions are assigned unsigned integers
-// from the maximum value decreasing to 0, it will be greater than a legal
-// instruction that comes after.  So to check that we have an illegal
-// instruction, we place a legal instruction after an illegal instruction, and
-// check that the illegal unsigned integer is greater than the unsigned integer
-// of the legal instruction.
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  ASSERT_EQ(InstrList.size(), UnsignedVec.size());
+  ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(3));
+}
 
-// Checks that a PHINode is mapped to be illegal since there is extra checking
-// needed to ensure that a branch in one region is bin an isomorphic
-// location in a different region.
+// Checks that a PHINode is mapped to be legal.
 TEST(IRInstructionMapper, PhiIllegal) {
   StringRef ModuleString = R"(
                           define i32 @f(i32 %a, i32 %b) {
                           bb0:
                              %0 = phi i1 [ 0, %bb0 ], [ %0, %bb1 ]
+                             %1 = add i32 %a, %b
                              ret i32 0
                           bb1:
                              ret i32 1
@@ -790,12 +805,25 @@ TEST(IRInstructionMapper, PhiIllegal) {
   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
   SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
   IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.initializeForBBs(*M);
   getVectors(*M, Mapper, InstrList, UnsignedVec);
 
   ASSERT_EQ(InstrList.size(), UnsignedVec.size());
   ASSERT_EQ(UnsignedVec.size(), static_cast<unsigned>(0));
 }
 
+// In most cases, the illegal instructions we are collecting don't require any
+// sort of setup.  In these cases, we can just only have illegal instructions,
+// and the mapper will create 0 length vectors, and we can check that.
+
+// In cases where we have legal instructions needed to set up the illegal
+// instruction, to check illegal instructions are assigned unsigned integers
+// from the maximum value decreasing to 0, it will be greater than a legal
+// instruction that comes after.  So to check that we have an illegal
+// instruction, we place a legal instruction after an illegal instruction, and
+// check that the illegal unsigned integer is greater than the unsigned integer
+// of the legal instruction.
+
 // Checks that an alloca instruction is mapped to be illegal.
 TEST(IRInstructionMapper, AllocaIllegal) {
   StringRef ModuleString = R"(
@@ -2346,6 +2374,108 @@ TEST(IRSimilarityCandidate, DifferentBranchStructureOutside) {
   ASSERT_TRUE(longSimCandCompare(InstrList, true, 3, 0, 6));
 }
 
+// Checks that the same structure is recognized between two candidates,
+// when the phi predecessor are other blocks inside the same region,
+// the relative distance between the blocks must be the same.
+TEST(IRSimilarityCandidate, SamePHIStructureInternal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ]
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(11));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_TRUE(longSimCandCompare(InstrList, true, 4, 0, 6));
+}
+
+// Checks that the different structure is recognized between two candidates,
+// when the phi predecessor are other blocks inside the same region,
+// the relative distance between the blocks must be the same.
+TEST(IRSimilarityCandidate, DifferentPHIStructureInternal) {
+  StringRef ModuleString = R"(
+                          define i32 @f(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb3:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb1 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          }
+                          
+                          define i32 @f2(i32 %a, i32 %b) {
+                          bb0:
+                             br label %bb2
+                          bb1:
+                             br label %bb2
+                          bb3:
+                             br label %bb2
+                          bb2:
+                             %0 = phi i32 [ %a, %bb0 ], [ %b, %bb3 ] 
+                             %1 = add i32 %b, %a
+                             %2 = add i32 %a, %b
+                             ret i32 0
+                          })";
+  LLVMContext Context;
+  std::unique_ptr<Module> M = makeLLVMModule(Context, ModuleString);
+
+  std::vector<IRInstructionData *> InstrList;
+  std::vector<unsigned> UnsignedVec;
+
+  SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
+  SpecificBumpPtrAllocator<IRInstructionDataList> IDLAllocator;
+  IRInstructionMapper Mapper(&InstDataAllocator, &IDLAllocator);
+  Mapper.InstClassifier.EnableBranches = true;
+  Mapper.initializeForBBs(*M);
+  getVectors(*M, Mapper, InstrList, UnsignedVec);
+
+  // Check to make sure that we have a long enough region.
+  ASSERT_EQ(InstrList.size(), static_cast<unsigned>(13));
+  // Check that the instructions were added correctly to both vectors.
+  ASSERT_TRUE(InstrList.size() == UnsignedVec.size());
+
+  ASSERT_FALSE(longSimCandCompare(InstrList, true, 5, 0, 7));
+}
+
 // Checks that two sets of identical instructions are found to be the same.
 // Both sequences of adds have the same operand ordering, and the same
 // instructions, making them strcturally equivalent.

From a13c10588cd57bcef93f2a27a74dd7c32bc6850b Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer <arthur.j.odwyer@gmail.com>
Date: Sat, 22 Jan 2022 12:18:51 -0500
Subject: [PATCH 633/946] [libc++] [test] Fix LWG3146 "Excessive unwrapping in
 std::ref/cref"

Drive-by constexprify the existing tests, too.

Differential Revision: https://reviews.llvm.org/D117953
---
 libcxx/docs/Status/Cxx2bIssues.csv            |  2 +-
 .../include/__functional/reference_wrapper.h  |  8 ++-
 .../refwrap/refwrap.helpers/cref_1.pass.cpp   | 15 ++++-
 .../refwrap/refwrap.helpers/cref_2.pass.cpp   | 18 +++--
 .../refwrap/refwrap.helpers/lwg3146.pass.cpp  | 66 +++++++++++++++++++
 .../refwrap/refwrap.helpers/ref_1.pass.cpp    | 15 ++++-
 .../refwrap/refwrap.helpers/ref_2.pass.cpp    | 25 ++++---
 7 files changed, 127 insertions(+), 22 deletions(-)
 create mode 100644 libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/lwg3146.pass.cpp

diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index 0ce504816fe3b..b151393e0a50d 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -102,7 +102,7 @@
 `2762 <https://wg21.link/LWG2762>`__,"``unique_ptr operator*()`` should be ``noexcept``","October 2021","",""
 `3121 <https://wg21.link/LWG3121>`__,"``tuple`` constructor constraints for ``UTypes&&...`` overloads","October 2021","",""
 `3123 <https://wg21.link/LWG3123>`__,"``duration`` constructor from representation shouldn't be effectively non-throwing","October 2021","","","|chrono|"
-`3146 <https://wg21.link/LWG3146>`__,"Excessive unwrapping in ``std::ref/cref``","October 2021","",""
+`3146 <https://wg21.link/LWG3146>`__,"Excessive unwrapping in ``std::ref/cref``","October 2021","|Complete|","14.0"
 `3152 <https://wg21.link/LWG3152>`__,"``common_type`` and ``common_reference`` have flaws in common","October 2021","",""
 `3293 <https://wg21.link/LWG3293>`__,"``move_iterator operator+()`` has incorrect constraints","October 2021","","","|ranges|"
 `3361 <https://wg21.link/LWG3361>`__,"``safe_range<SomeRange&>`` case","October 2021","","","|ranges|"
diff --git a/libcxx/include/__functional/reference_wrapper.h b/libcxx/include/__functional/reference_wrapper.h
index e1e4abd80c236..c5de5e5f2e8df 100644
--- a/libcxx/include/__functional/reference_wrapper.h
+++ b/libcxx/include/__functional/reference_wrapper.h
@@ -176,7 +176,7 @@ class _LIBCPP_TEMPLATE_VIS reference_wrapper
 #endif // _LIBCPP_CXX03_LANG
 };
 
-#if _LIBCPP_STD_VER >= 17
+#if _LIBCPP_STD_VER > 14
 template <class _Tp>
 reference_wrapper(_Tp&) -> reference_wrapper<_Tp>;
 #endif
@@ -194,7 +194,7 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
 reference_wrapper<_Tp>
 ref(reference_wrapper<_Tp> __t) _NOEXCEPT
 {
-    return _VSTD::ref(__t.get());
+    return __t;
 }
 
 template <class _Tp>
@@ -210,7 +210,9 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
 reference_wrapper<const _Tp>
 cref(reference_wrapper<_Tp> __t) _NOEXCEPT
 {
-    return _VSTD::cref(__t.get());
+    // C++20 says "return __t", but C++03 lacks the relevant
+    // converting constructor. This should be equivalent.
+    return __t.get();
 }
 
 #ifndef _LIBCPP_CXX03_LANG
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_1.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_1.pass.cpp
index 2286d949e657f..c6bc471df2567 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_1.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_1.pass.cpp
@@ -17,11 +17,20 @@
 
 #include "test_macros.h"
 
+TEST_CONSTEXPR_CXX20 bool test()
+{
+  int i = 0;
+  std::reference_wrapper<const int> r = std::cref(i);
+  assert(&r.get() == &i);
+  return true;
+}
+
 int main(int, char**)
 {
-    int i = 0;
-    std::reference_wrapper<const int> r = std::cref(i);
-    assert(&r.get() == &i);
+  test();
+#if TEST_STD_VER > 17
+  static_assert(test());
+#endif
 
   return 0;
 }
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_2.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_2.pass.cpp
index d76bcf9245c09..1cf369201894a 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_2.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/cref_2.pass.cpp
@@ -22,19 +22,29 @@ namespace adl {
   void cref(A) {}
 }
 
-int main(int, char**)
+TEST_CONSTEXPR_CXX20 bool test()
 {
+  {
     const int i = 0;
     std::reference_wrapper<const int> r1 = std::cref(i);
     std::reference_wrapper<const int> r2 = std::cref(r1);
     assert(&r2.get() == &i);
-
-    {
+  }
+  {
     adl::A a;
     std::reference_wrapper<const adl::A> a1 = std::cref(a);
     std::reference_wrapper<const adl::A> a2 = std::cref(a1);
     assert(&a2.get() == &a);
-    }
+  }
+  return true;
+}
+
+int main(int, char**)
+{
+  test();
+#if TEST_STD_VER > 17
+  static_assert(test());
+#endif
 
   return 0;
 }
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/lwg3146.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/lwg3146.pass.cpp
new file mode 100644
index 0000000000000..66d254684b9c2
--- /dev/null
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/lwg3146.pass.cpp
@@ -0,0 +1,66 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <functional>
+
+// reference_wrapper
+
+// template <ObjectType T> reference_wrapper<T> ref(reference_wrapper<T> t);
+//   LWG 3146 "Excessive unwrapping in std::ref/cref"
+
+#include <functional>
+#include <cassert>
+
+#include "test_macros.h"
+
+TEST_CONSTEXPR_CXX20 bool test()
+{
+  {
+    int i = 0;
+    std::reference_wrapper<int> ri(i);
+    std::reference_wrapper<std::reference_wrapper<int> > rri(ri);
+    auto rrj = std::ref(rri);
+    ASSERT_SAME_TYPE(decltype(rrj), std::reference_wrapper<std::reference_wrapper<int> >);
+    assert(&rrj.get() == &ri);
+  }
+  {
+    int i = 0;
+    std::reference_wrapper<int> ri(i);
+    std::reference_wrapper<const std::reference_wrapper<int> > rri(ri);
+    auto rrj = std::ref(rri);
+    ASSERT_SAME_TYPE(decltype(rrj), std::reference_wrapper<const std::reference_wrapper<int> >);
+    assert(&rrj.get() == &ri);
+  }
+  {
+    int i = 0;
+    std::reference_wrapper<int> ri(i);
+    std::reference_wrapper<std::reference_wrapper<int> > rri(ri);
+    auto rrj = std::cref(rri);
+    ASSERT_SAME_TYPE(decltype(rrj), std::reference_wrapper<const std::reference_wrapper<int> >);
+    assert(&rrj.get() == &ri);
+  }
+  {
+    int i = 0;
+    std::reference_wrapper<int> ri(i);
+    std::reference_wrapper<const std::reference_wrapper<int> > rri(ri);
+    auto rrj = std::cref(rri);
+    ASSERT_SAME_TYPE(decltype(rrj), std::reference_wrapper<const std::reference_wrapper<int> >);
+    assert(&rrj.get() == &ri);
+  }
+  return true;
+}
+
+int main(int, char**)
+{
+  test();
+#if TEST_STD_VER > 17
+  static_assert(test());
+#endif
+
+  return 0;
+}
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_1.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_1.pass.cpp
index 41614d3b2e2a1..8424fcd52096a 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_1.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_1.pass.cpp
@@ -17,11 +17,20 @@
 
 #include "test_macros.h"
 
+TEST_CONSTEXPR_CXX20 bool test()
+{
+  int i = 0;
+  std::reference_wrapper<int> r = std::ref(i);
+  assert(&r.get() == &i);
+  return true;
+}
+
 int main(int, char**)
 {
-    int i = 0;
-    std::reference_wrapper<int> r = std::ref(i);
-    assert(&r.get() == &i);
+  test();
+#if TEST_STD_VER > 17
+  static_assert(test());
+#endif
 
   return 0;
 }
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_2.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_2.pass.cpp
index 9ab62a20a8731..97689fc53837c 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_2.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_2.pass.cpp
@@ -10,7 +10,7 @@
 
 // reference_wrapper
 
-// template <ObjectType T> reference_wrapper<T> ref(reference_wrapper<T>t);
+// template <ObjectType T> reference_wrapper<T> ref(reference_wrapper<T> t);
 
 #include <functional>
 #include <cassert>
@@ -29,22 +29,31 @@ namespace adl {
   void ref(A) {}
 }
 
-int main(int, char**)
+TEST_CONSTEXPR_CXX20 bool test()
 {
-    {
+  {
     int i = 0;
     std::reference_wrapper<int> r1 = std::ref(i);
     std::reference_wrapper<int> r2 = std::ref(r1);
     assert(&r2.get() == &i);
-    }
-    {
+  }
+  {
     adl::A a;
     std::reference_wrapper<adl::A> a1 = std::ref(a);
     std::reference_wrapper<adl::A> a2 = std::ref(a1);
     assert(&a2.get() == &a);
-    }
+  }
+  return true;
+}
+
+int main(int, char**)
+{
+  test();
+#if TEST_STD_VER > 17
+  static_assert(test());
+#endif
 
-    {
+  {
     unary_counting_predicate<bool(*)(int), int> cp(is5);
     assert(!cp(6));
     assert(cp.count() == 1);
@@ -52,7 +61,7 @@ int main(int, char**)
     assert(cp.count() == 1);
     assert(call_pred(std::ref(cp)));
     assert(cp.count() == 2);
-    }
+  }
 
   return 0;
 }

From ba79295c48bb362e2183361ad4bebc4ad2ab75a2 Mon Sep 17 00:00:00 2001
From: Andrew Litteken <andrew.litteken@gmail.com>
Date: Tue, 25 Jan 2022 18:41:18 -0600
Subject: [PATCH 634/946] [NFC][IROutliner] fix namespace and unused variable

---
 llvm/include/llvm/Analysis/IRSimilarityIdentifier.h | 2 +-
 llvm/lib/Analysis/IRSimilarityIdentifier.cpp        | 3 +--
 llvm/lib/Transforms/IPO/IROutliner.cpp              | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
index c7759e0bbf0f8..7b81d5754930a 100644
--- a/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
+++ b/llvm/include/llvm/Analysis/IRSimilarityIdentifier.h
@@ -262,7 +262,7 @@ struct IRInstructionData
           llvm::hash_value(ID.Inst->getType()),
           llvm::hash_value(ID.getPredicate()),
           llvm::hash_combine_range(OperTypes.begin(), OperTypes.end()));
-    else if (CallInst *CI = dyn_cast<CallInst>(ID.Inst)) {
+    else if (isa<CallInst>(ID.Inst)) {
       std::string FunctionName = *ID.CalleeName;
       return llvm::hash_combine(
           llvm::hash_value(ID.Inst->getOpcode()),
diff --git a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
index 0ce3d218eb33a..d2f0c57f6dabb 100644
--- a/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
+++ b/llvm/lib/Analysis/IRSimilarityIdentifier.cpp
@@ -29,7 +29,6 @@ cl::opt<bool>
                     cl::ReallyHidden,
                     cl::desc("disable similarity matching, and outlining, "
                              "across branches for debugging purposes."));
-} // namespace llvm
 
 cl::opt<bool>
     DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
@@ -40,7 +39,7 @@ cl::opt<bool>
     MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
                      cl::desc("only allow matching call instructions if the "
                               "name and type signature match."));
-
+} // namespace llvm
 
 IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
                                      IRInstructionDataList &IDList)
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index fd19b669c3c04..e064fbbef5950 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -38,11 +38,11 @@ using namespace IRSimilarity;
 // matching and outlining.
 namespace llvm {
 extern cl::opt<bool> DisableBranches;
-} // namespace llvm
 
 // A command flag to be used for debugging to indirect calls from similarity
 // matching and outlining.
 extern cl::opt<bool> DisableIndirectCalls;
+} // namespace llvm
 
 // Set to true if the user wants the ir outliner to run on linkonceodr linkage
 // functions. This is false by default because the linker can dedupe linkonceodr

From 69da422bdaa684fbd14c95b06f40ac57910642d9 Mon Sep 17 00:00:00 2001
From: Kirill Stoimenov <kstoimenov@google.com>
Date: Wed, 26 Jan 2022 00:57:40 +0000
Subject: [PATCH 635/946] [ASan] Added a unit test for D118184.

To make sure the the libraries are there for executable and not there for DSOs.

Reviewed By: kda

Differential Revision: https://reviews.llvm.org/D118205
---
 clang/test/Driver/sanitizer-ld.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/clang/test/Driver/sanitizer-ld.c b/clang/test/Driver/sanitizer-ld.c
index ea8c49f2384ad..d96cddb31d687 100644
--- a/clang/test/Driver/sanitizer-ld.c
+++ b/clang/test/Driver/sanitizer-ld.c
@@ -24,6 +24,24 @@
 //
 // CHECK-ASAN-NO-LINK-RUNTIME-LINUX-NOT: libclang_rt.asan-x86_64
 
+// RUN: %clang -fsanitize=address %s -### -o %t.o 2>&1 \
+// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN:     -resource-dir=%S/Inputs/resource_dir \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-ASAN-EXECUTABLE-LINUX %s
+//
+// CHECK-ASAN-EXECUTABLE-LINUX: libclang_rt.asan_static-x86_64
+// CHECK-ASAN-EXECUTABLE-LINUX: libclang_rt.asan-x86_64
+
+// RUN: %clang -fsanitize=address -shared %s -### -o %t.o 2>&1  \
+// RUN:     -target x86_64-unknown-linux -fuse-ld=ld \
+// RUN:     -resource-dir=%S/Inputs/resource_dir \
+// RUN:     --sysroot=%S/Inputs/basic_linux_tree \
+// RUN:   | FileCheck --check-prefix=CHECK-ASAN-SHARED-LINUX %s
+//
+// CHECK-ASAN-SHARED-LINUX-NOT: libclang_rt.asan_static-x86_64
+// CHECK-ASAN-SHARED-LINUX-NOT: libclang_rt.asan-x86_64
+
 // RUN: %clang -no-canonical-prefixes %s -### -o %t.o 2>&1 \
 // RUN:     -target i386-unknown-linux -fuse-ld=ld -fsanitize=address -shared-libsan \
 // RUN:     -resource-dir=%S/Inputs/resource_dir \

From 9273378b8576a63a04b03f1557442d3a90463488 Mon Sep 17 00:00:00 2001
From: Zakk Chen <zakk.chen@sifive.com>
Date: Tue, 25 Jan 2022 00:08:21 -0800
Subject: [PATCH 636/946] [RISCV] Add the passthru operand for RVV nomask load
 intrinsics.

The goal is support tail and mask policy in RVV builtins.
We focus on IR part first.
If the passthru operand is undef, we use tail agnostic, otherwise
use tail undisturbed.

Co-Authored-by: Hsiangkai Wang <Hsiangkai@gmail.com>

Reviewers: craig.topper, frasercrmck

Differential Revision: https://reviews.llvm.org/D117647
---
 clang/include/clang/Basic/riscv_vector.td     |   4 +
 .../RISCV/rvv-intrinsics-overloaded/vloxei.c  | 382 ++++++++--------
 .../RISCV/rvv-intrinsics-overloaded/vluxei.c  | 382 ++++++++--------
 clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c | 118 ++---
 .../test/CodeGen/RISCV/rvv-intrinsics/vleff.c | 118 ++---
 .../CodeGen/RISCV/rvv-intrinsics/vloxei.c     | 424 +++++++++---------
 .../test/CodeGen/RISCV/rvv-intrinsics/vlse.c  | 118 ++---
 .../CodeGen/RISCV/rvv-intrinsics/vluxei.c     | 424 +++++++++---------
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  47 +-
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp   |  49 +-
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h     |   2 +
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  20 +-
 .../Target/RISCV/RISCVInstrInfoVPseudos.td    |  96 +++-
 .../RISCV/rvv/rv32-vsetvli-intrinsics.ll      |   6 +-
 .../RISCV/rvv/rv64-vsetvli-intrinsics.ll      |   6 +-
 .../CodeGen/RISCV/rvv/rvv-out-arguments.ll    |   4 +-
 llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll    | 120 +++++
 llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll       |  74 +++
 llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll       |  74 +++
 llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll     |  77 ++++
 llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll     |  77 ++++
 llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll    | 212 +++++++++
 llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll    | 268 +++++++++++
 llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll      |  74 +++
 llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll      |  74 +++
 llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll    | 212 +++++++++
 llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll    | 268 +++++++++++
 .../RISCV/rvv/vsetvli-insert-crossbb.ll       |  20 +-
 .../RISCV/rvv/vsetvli-insert-crossbb.mir      |   8 +-
 llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll |   4 +-
 .../test/CodeGen/RISCV/rvv/vsetvli-insert.mir |  10 +-
 31 files changed, 2709 insertions(+), 1063 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll

diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td
index 7e9f610a0b186..6451e77e77f63 100644
--- a/clang/include/clang/Basic/riscv_vector.td
+++ b/clang/include/clang/Basic/riscv_vector.td
@@ -595,6 +595,7 @@ let HasNoMaskedOverloaded = false,
     ManualCodegen = [{
       IntrinsicTypes = {ResultType, Ops[1]->getType()};
       Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
+      Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType));
     }],
     ManualCodegenMask= [{
       // Move mask to right before vl.
@@ -628,6 +629,7 @@ multiclass RVVVLEFFBuiltin<list<string> types> {
         Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
         Value *NewVL = Ops[1];
         Ops.erase(Ops.begin() + 1);
+        Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType));
         llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes);
         llvm::Value *LoadValue = Builder.CreateCall(F, Ops, "");
         llvm::Value *V = Builder.CreateExtractValue(LoadValue, {0});
@@ -677,6 +679,7 @@ multiclass RVVVLSEBuiltin<list<string> types> {
       ManualCodegen = [{
         IntrinsicTypes = {ResultType, Ops[2]->getType()};
         Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
+        Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType));
       }],
       ManualCodegenMask= [{
         // Move mask to right before vl.
@@ -698,6 +701,7 @@ multiclass RVVIndexedLoad<string op> {
   let ManualCodegen = [{
         IntrinsicTypes = {ResultType, Ops[1]->getType(), Ops[2]->getType()};
         Ops[0] = Builder.CreateBitCast(Ops[0], ResultType->getPointerTo());
+        Ops.insert(Ops.begin(), llvm::UndefValue::get(ResultType));
       }],
       ManualCodegenMask = [{
         // Move mask to right before vl.
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxei.c
index 395c22636a51f..bc1a7ad2a3550 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxei.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vloxei.c
@@ -8,7 +8,7 @@
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vloxei8_v_i8mf8(const int8_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -18,7 +18,7 @@ vint8mf8_t test_vloxei8_v_i8mf8(const int8_t *base, vuint8mf8_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vloxei8_v_i8mf4(const int8_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -28,7 +28,7 @@ vint8mf4_t test_vloxei8_v_i8mf4(const int8_t *base, vuint8mf4_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vloxei8_v_i8mf2(const int8_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -38,7 +38,7 @@ vint8mf2_t test_vloxei8_v_i8mf2(const int8_t *base, vuint8mf2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vloxei8_v_i8m1(const int8_t *base, vuint8m1_t bindex, size_t vl) {
@@ -48,7 +48,7 @@ vint8m1_t test_vloxei8_v_i8m1(const int8_t *base, vuint8m1_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vloxei8_v_i8m2(const int8_t *base, vuint8m2_t bindex, size_t vl) {
@@ -58,7 +58,7 @@ vint8m2_t test_vloxei8_v_i8m2(const int8_t *base, vuint8m2_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vint8m4_t test_vloxei8_v_i8m4(const int8_t *base, vuint8m4_t bindex, size_t vl) {
@@ -68,7 +68,7 @@ vint8m4_t test_vloxei8_v_i8m4(const int8_t *base, vuint8m4_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP1]]
 //
 vint8m8_t test_vloxei8_v_i8m8(const int8_t *base, vuint8m8_t bindex, size_t vl) {
@@ -78,7 +78,7 @@ vint8m8_t test_vloxei8_v_i8m8(const int8_t *base, vuint8m8_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vloxei16_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vloxei16_v_i8mf8(const int8_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -88,7 +88,7 @@ vint8mf8_t test_vloxei16_v_i8mf8(const int8_t *base, vuint16mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vloxei16_v_i8mf4(const int8_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -98,7 +98,7 @@ vint8mf4_t test_vloxei16_v_i8mf4(const int8_t *base, vuint16mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vloxei16_v_i8mf2(const int8_t *base, vuint16m1_t bindex, size_t vl) {
@@ -108,7 +108,7 @@ vint8mf2_t test_vloxei16_v_i8mf2(const int8_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vloxei16_v_i8m1(const int8_t *base, vuint16m2_t bindex, size_t vl) {
@@ -118,7 +118,7 @@ vint8m1_t test_vloxei16_v_i8m1(const int8_t *base, vuint16m2_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei16_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vloxei16_v_i8m2(const int8_t *base, vuint16m4_t bindex, size_t vl) {
@@ -128,7 +128,7 @@ vint8m2_t test_vloxei16_v_i8m2(const int8_t *base, vuint16m4_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei16_v_i8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vint8m4_t test_vloxei16_v_i8m4(const int8_t *base, vuint16m8_t bindex, size_t vl) {
@@ -138,7 +138,7 @@ vint8m4_t test_vloxei16_v_i8m4(const int8_t *base, vuint16m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei32_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vloxei32_v_i8mf8(const int8_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -148,7 +148,7 @@ vint8mf8_t test_vloxei32_v_i8mf8(const int8_t *base, vuint32mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vloxei32_v_i8mf4(const int8_t *base, vuint32m1_t bindex, size_t vl) {
@@ -158,7 +158,7 @@ vint8mf4_t test_vloxei32_v_i8mf4(const int8_t *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vloxei32_v_i8mf2(const int8_t *base, vuint32m2_t bindex, size_t vl) {
@@ -168,7 +168,7 @@ vint8mf2_t test_vloxei32_v_i8mf2(const int8_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vloxei32_v_i8m1(const int8_t *base, vuint32m4_t bindex, size_t vl) {
@@ -178,7 +178,7 @@ vint8m1_t test_vloxei32_v_i8m1(const int8_t *base, vuint32m4_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei32_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vloxei32_v_i8m2(const int8_t *base, vuint32m8_t bindex, size_t vl) {
@@ -188,7 +188,7 @@ vint8m2_t test_vloxei32_v_i8m2(const int8_t *base, vuint32m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei64_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vloxei64_v_i8mf8(const int8_t *base, vuint64m1_t bindex, size_t vl) {
@@ -198,7 +198,7 @@ vint8mf8_t test_vloxei64_v_i8mf8(const int8_t *base, vuint64m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vloxei64_v_i8mf4(const int8_t *base, vuint64m2_t bindex, size_t vl) {
@@ -208,7 +208,7 @@ vint8mf4_t test_vloxei64_v_i8mf4(const int8_t *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vloxei64_v_i8mf2(const int8_t *base, vuint64m4_t bindex, size_t vl) {
@@ -218,7 +218,7 @@ vint8mf2_t test_vloxei64_v_i8mf2(const int8_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vloxei64_v_i8m1(const int8_t *base, vuint64m8_t bindex, size_t vl) {
@@ -228,7 +228,7 @@ vint8m1_t test_vloxei64_v_i8m1(const int8_t *base, vuint64m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei8_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vloxei8_v_i16mf4(const int16_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -238,7 +238,7 @@ vint16mf4_t test_vloxei8_v_i16mf4(const int16_t *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vloxei8_v_i16mf2(const int16_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -248,7 +248,7 @@ vint16mf2_t test_vloxei8_v_i16mf2(const int16_t *base, vuint8mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vloxei8_v_i16m1(const int16_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -258,7 +258,7 @@ vint16m1_t test_vloxei8_v_i16m1(const int16_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vloxei8_v_i16m2(const int16_t *base, vuint8m1_t bindex, size_t vl) {
@@ -268,7 +268,7 @@ vint16m2_t test_vloxei8_v_i16m2(const int16_t *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vloxei8_v_i16m4(const int16_t *base, vuint8m2_t bindex, size_t vl) {
@@ -278,7 +278,7 @@ vint16m4_t test_vloxei8_v_i16m4(const int16_t *base, vuint8m2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_i16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vint16m8_t test_vloxei8_v_i16m8(const int16_t *base, vuint8m4_t bindex, size_t vl) {
@@ -288,7 +288,7 @@ vint16m8_t test_vloxei8_v_i16m8(const int16_t *base, vuint8m4_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei16_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vloxei16_v_i16mf4(const int16_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -298,7 +298,7 @@ vint16mf4_t test_vloxei16_v_i16mf4(const int16_t *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vloxei16_v_i16mf2(const int16_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -308,7 +308,7 @@ vint16mf2_t test_vloxei16_v_i16mf2(const int16_t *base, vuint16mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vloxei16_v_i16m1(const int16_t *base, vuint16m1_t bindex, size_t vl) {
@@ -318,7 +318,7 @@ vint16m1_t test_vloxei16_v_i16m1(const int16_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vloxei16_v_i16m2(const int16_t *base, vuint16m2_t bindex, size_t vl) {
@@ -328,7 +328,7 @@ vint16m2_t test_vloxei16_v_i16m2(const int16_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vloxei16_v_i16m4(const int16_t *base, vuint16m4_t bindex, size_t vl) {
@@ -338,7 +338,7 @@ vint16m4_t test_vloxei16_v_i16m4(const int16_t *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vint16m8_t test_vloxei16_v_i16m8(const int16_t *base, vuint16m8_t bindex, size_t vl) {
@@ -348,7 +348,7 @@ vint16m8_t test_vloxei16_v_i16m8(const int16_t *base, vuint16m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vloxei32_v_i16mf4(const int16_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -358,7 +358,7 @@ vint16mf4_t test_vloxei32_v_i16mf4(const int16_t *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei32_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vloxei32_v_i16mf2(const int16_t *base, vuint32m1_t bindex, size_t vl) {
@@ -368,7 +368,7 @@ vint16mf2_t test_vloxei32_v_i16mf2(const int16_t *base, vuint32m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vloxei32_v_i16m1(const int16_t *base, vuint32m2_t bindex, size_t vl) {
@@ -378,7 +378,7 @@ vint16m1_t test_vloxei32_v_i16m1(const int16_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vloxei32_v_i16m2(const int16_t *base, vuint32m4_t bindex, size_t vl) {
@@ -388,7 +388,7 @@ vint16m2_t test_vloxei32_v_i16m2(const int16_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vloxei32_v_i16m4(const int16_t *base, vuint32m8_t bindex, size_t vl) {
@@ -398,7 +398,7 @@ vint16m4_t test_vloxei32_v_i16m4(const int16_t *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vloxei64_v_i16mf4(const int16_t *base, vuint64m1_t bindex, size_t vl) {
@@ -408,7 +408,7 @@ vint16mf4_t test_vloxei64_v_i16mf4(const int16_t *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vloxei64_v_i16mf2(const int16_t *base, vuint64m2_t bindex, size_t vl) {
@@ -418,7 +418,7 @@ vint16mf2_t test_vloxei64_v_i16mf2(const int16_t *base, vuint64m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vloxei64_v_i16m1(const int16_t *base, vuint64m4_t bindex, size_t vl) {
@@ -428,7 +428,7 @@ vint16m1_t test_vloxei64_v_i16m1(const int16_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vloxei64_v_i16m2(const int16_t *base, vuint64m8_t bindex, size_t vl) {
@@ -438,7 +438,7 @@ vint16m2_t test_vloxei64_v_i16m2(const int16_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vloxei8_v_i32mf2(const int32_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -448,7 +448,7 @@ vint32mf2_t test_vloxei8_v_i32mf2(const int32_t *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vloxei8_v_i32m1(const int32_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -458,7 +458,7 @@ vint32m1_t test_vloxei8_v_i32m1(const int32_t *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vloxei8_v_i32m2(const int32_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -468,7 +468,7 @@ vint32m2_t test_vloxei8_v_i32m2(const int32_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vloxei8_v_i32m4(const int32_t *base, vuint8m1_t bindex, size_t vl) {
@@ -478,7 +478,7 @@ vint32m4_t test_vloxei8_v_i32m4(const int32_t *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vloxei8_v_i32m8(const int32_t *base, vuint8m2_t bindex, size_t vl) {
@@ -488,7 +488,7 @@ vint32m8_t test_vloxei8_v_i32m8(const int32_t *base, vuint8m2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei16_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vloxei16_v_i32mf2(const int32_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -498,7 +498,7 @@ vint32mf2_t test_vloxei16_v_i32mf2(const int32_t *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vloxei16_v_i32m1(const int32_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -508,7 +508,7 @@ vint32m1_t test_vloxei16_v_i32m1(const int32_t *base, vuint16mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei16_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vloxei16_v_i32m2(const int32_t *base, vuint16m1_t bindex, size_t vl) {
@@ -518,7 +518,7 @@ vint32m2_t test_vloxei16_v_i32m2(const int32_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vloxei16_v_i32m4(const int32_t *base, vuint16m2_t bindex, size_t vl) {
@@ -528,7 +528,7 @@ vint32m4_t test_vloxei16_v_i32m4(const int32_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vloxei16_v_i32m8(const int32_t *base, vuint16m4_t bindex, size_t vl) {
@@ -538,7 +538,7 @@ vint32m8_t test_vloxei16_v_i32m8(const int32_t *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vloxei32_v_i32mf2(const int32_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -548,7 +548,7 @@ vint32mf2_t test_vloxei32_v_i32mf2(const int32_t *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei32_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vloxei32_v_i32m1(const int32_t *base, vuint32m1_t bindex, size_t vl) {
@@ -558,7 +558,7 @@ vint32m1_t test_vloxei32_v_i32m1(const int32_t *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vloxei32_v_i32m2(const int32_t *base, vuint32m2_t bindex, size_t vl) {
@@ -568,7 +568,7 @@ vint32m2_t test_vloxei32_v_i32m2(const int32_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vloxei32_v_i32m4(const int32_t *base, vuint32m4_t bindex, size_t vl) {
@@ -578,7 +578,7 @@ vint32m4_t test_vloxei32_v_i32m4(const int32_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vloxei32_v_i32m8(const int32_t *base, vuint32m8_t bindex, size_t vl) {
@@ -588,7 +588,7 @@ vint32m8_t test_vloxei32_v_i32m8(const int32_t *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vloxei64_v_i32mf2(const int32_t *base, vuint64m1_t bindex, size_t vl) {
@@ -598,7 +598,7 @@ vint32mf2_t test_vloxei64_v_i32mf2(const int32_t *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vloxei64_v_i32m1(const int32_t *base, vuint64m2_t bindex, size_t vl) {
@@ -608,7 +608,7 @@ vint32m1_t test_vloxei64_v_i32m1(const int32_t *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vloxei64_v_i32m2(const int32_t *base, vuint64m4_t bindex, size_t vl) {
@@ -618,7 +618,7 @@ vint32m2_t test_vloxei64_v_i32m2(const int32_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vloxei64_v_i32m4(const int32_t *base, vuint64m8_t bindex, size_t vl) {
@@ -628,7 +628,7 @@ vint32m4_t test_vloxei64_v_i32m4(const int32_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vloxei8_v_i64m1(const int64_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -638,7 +638,7 @@ vint64m1_t test_vloxei8_v_i64m1(const int64_t *base, vuint8mf8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vloxei8_v_i64m2(const int64_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -648,7 +648,7 @@ vint64m2_t test_vloxei8_v_i64m2(const int64_t *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vloxei8_v_i64m4(const int64_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -658,7 +658,7 @@ vint64m4_t test_vloxei8_v_i64m4(const int64_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vloxei8_v_i64m8(const int64_t *base, vuint8m1_t bindex, size_t vl) {
@@ -668,7 +668,7 @@ vint64m8_t test_vloxei8_v_i64m8(const int64_t *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei16_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vloxei16_v_i64m1(const int64_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -678,7 +678,7 @@ vint64m1_t test_vloxei16_v_i64m1(const int64_t *base, vuint16mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei16_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vloxei16_v_i64m2(const int64_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -688,7 +688,7 @@ vint64m2_t test_vloxei16_v_i64m2(const int64_t *base, vuint16mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei16_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vloxei16_v_i64m4(const int64_t *base, vuint16m1_t bindex, size_t vl) {
@@ -698,7 +698,7 @@ vint64m4_t test_vloxei16_v_i64m4(const int64_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vloxei16_v_i64m8(const int64_t *base, vuint16m2_t bindex, size_t vl) {
@@ -708,7 +708,7 @@ vint64m8_t test_vloxei16_v_i64m8(const int64_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vloxei32_v_i64m1(const int64_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -718,7 +718,7 @@ vint64m1_t test_vloxei32_v_i64m1(const int64_t *base, vuint32mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei32_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vloxei32_v_i64m2(const int64_t *base, vuint32m1_t bindex, size_t vl) {
@@ -728,7 +728,7 @@ vint64m2_t test_vloxei32_v_i64m2(const int64_t *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vloxei32_v_i64m4(const int64_t *base, vuint32m2_t bindex, size_t vl) {
@@ -738,7 +738,7 @@ vint64m4_t test_vloxei32_v_i64m4(const int64_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vloxei32_v_i64m8(const int64_t *base, vuint32m4_t bindex, size_t vl) {
@@ -748,7 +748,7 @@ vint64m8_t test_vloxei32_v_i64m8(const int64_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vloxei64_v_i64m1(const int64_t *base, vuint64m1_t bindex, size_t vl) {
@@ -758,7 +758,7 @@ vint64m1_t test_vloxei64_v_i64m1(const int64_t *base, vuint64m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vloxei64_v_i64m2(const int64_t *base, vuint64m2_t bindex, size_t vl) {
@@ -768,7 +768,7 @@ vint64m2_t test_vloxei64_v_i64m2(const int64_t *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vloxei64_v_i64m4(const int64_t *base, vuint64m4_t bindex, size_t vl) {
@@ -778,7 +778,7 @@ vint64m4_t test_vloxei64_v_i64m4(const int64_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vloxei64_v_i64m8(const int64_t *base, vuint64m8_t bindex, size_t vl) {
@@ -788,7 +788,7 @@ vint64m8_t test_vloxei64_v_i64m8(const int64_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vloxei8_v_u8mf8(const uint8_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -798,7 +798,7 @@ vuint8mf8_t test_vloxei8_v_u8mf8(const uint8_t *base, vuint8mf8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vloxei8_v_u8mf4(const uint8_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -808,7 +808,7 @@ vuint8mf4_t test_vloxei8_v_u8mf4(const uint8_t *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vloxei8_v_u8mf2(const uint8_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -818,7 +818,7 @@ vuint8mf2_t test_vloxei8_v_u8mf2(const uint8_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vloxei8_v_u8m1(const uint8_t *base, vuint8m1_t bindex, size_t vl) {
@@ -828,7 +828,7 @@ vuint8m1_t test_vloxei8_v_u8m1(const uint8_t *base, vuint8m1_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vloxei8_v_u8m2(const uint8_t *base, vuint8m2_t bindex, size_t vl) {
@@ -838,7 +838,7 @@ vuint8m2_t test_vloxei8_v_u8m2(const uint8_t *base, vuint8m2_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vuint8m4_t test_vloxei8_v_u8m4(const uint8_t *base, vuint8m4_t bindex, size_t vl) {
@@ -848,7 +848,7 @@ vuint8m4_t test_vloxei8_v_u8m4(const uint8_t *base, vuint8m4_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP1]]
 //
 vuint8m8_t test_vloxei8_v_u8m8(const uint8_t *base, vuint8m8_t bindex, size_t vl) {
@@ -858,7 +858,7 @@ vuint8m8_t test_vloxei8_v_u8m8(const uint8_t *base, vuint8m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei16_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vloxei16_v_u8mf8(const uint8_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -868,7 +868,7 @@ vuint8mf8_t test_vloxei16_v_u8mf8(const uint8_t *base, vuint16mf4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vloxei16_v_u8mf4(const uint8_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -878,7 +878,7 @@ vuint8mf4_t test_vloxei16_v_u8mf4(const uint8_t *base, vuint16mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vloxei16_v_u8mf2(const uint8_t *base, vuint16m1_t bindex, size_t vl) {
@@ -888,7 +888,7 @@ vuint8mf2_t test_vloxei16_v_u8mf2(const uint8_t *base, vuint16m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei16_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vloxei16_v_u8m1(const uint8_t *base, vuint16m2_t bindex, size_t vl) {
@@ -898,7 +898,7 @@ vuint8m1_t test_vloxei16_v_u8m1(const uint8_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vloxei16_v_u8m2(const uint8_t *base, vuint16m4_t bindex, size_t vl) {
@@ -908,7 +908,7 @@ vuint8m2_t test_vloxei16_v_u8m2(const uint8_t *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_u8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vuint8m4_t test_vloxei16_v_u8m4(const uint8_t *base, vuint16m8_t bindex, size_t vl) {
@@ -918,7 +918,7 @@ vuint8m4_t test_vloxei16_v_u8m4(const uint8_t *base, vuint16m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vloxei32_v_u8mf8(const uint8_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -928,7 +928,7 @@ vuint8mf8_t test_vloxei32_v_u8mf8(const uint8_t *base, vuint32mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vloxei32_v_u8mf4(const uint8_t *base, vuint32m1_t bindex, size_t vl) {
@@ -938,7 +938,7 @@ vuint8mf4_t test_vloxei32_v_u8mf4(const uint8_t *base, vuint32m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei32_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vloxei32_v_u8mf2(const uint8_t *base, vuint32m2_t bindex, size_t vl) {
@@ -948,7 +948,7 @@ vuint8mf2_t test_vloxei32_v_u8mf2(const uint8_t *base, vuint32m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei32_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vloxei32_v_u8m1(const uint8_t *base, vuint32m4_t bindex, size_t vl) {
@@ -958,7 +958,7 @@ vuint8m1_t test_vloxei32_v_u8m1(const uint8_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vloxei32_v_u8m2(const uint8_t *base, vuint32m8_t bindex, size_t vl) {
@@ -968,7 +968,7 @@ vuint8m2_t test_vloxei32_v_u8m2(const uint8_t *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vloxei64_v_u8mf8(const uint8_t *base, vuint64m1_t bindex, size_t vl) {
@@ -978,7 +978,7 @@ vuint8mf8_t test_vloxei64_v_u8mf8(const uint8_t *base, vuint64m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vloxei64_v_u8mf4(const uint8_t *base, vuint64m2_t bindex, size_t vl) {
@@ -988,7 +988,7 @@ vuint8mf4_t test_vloxei64_v_u8mf4(const uint8_t *base, vuint64m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vloxei64_v_u8mf2(const uint8_t *base, vuint64m4_t bindex, size_t vl) {
@@ -998,7 +998,7 @@ vuint8mf2_t test_vloxei64_v_u8mf2(const uint8_t *base, vuint64m4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vloxei64_v_u8m1(const uint8_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1008,7 +1008,7 @@ vuint8m1_t test_vloxei64_v_u8m1(const uint8_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vloxei8_v_u16mf4(const uint16_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -1018,7 +1018,7 @@ vuint16mf4_t test_vloxei8_v_u16mf4(const uint16_t *base, vuint8mf8_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei8_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vloxei8_v_u16mf2(const uint16_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -1028,7 +1028,7 @@ vuint16mf2_t test_vloxei8_v_u16mf2(const uint16_t *base, vuint8mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei8_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vloxei8_v_u16m1(const uint16_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -1038,7 +1038,7 @@ vuint16m1_t test_vloxei8_v_u16m1(const uint16_t *base, vuint8mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vloxei8_v_u16m2(const uint16_t *base, vuint8m1_t bindex, size_t vl) {
@@ -1048,7 +1048,7 @@ vuint16m2_t test_vloxei8_v_u16m2(const uint16_t *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vloxei8_v_u16m4(const uint16_t *base, vuint8m2_t bindex, size_t vl) {
@@ -1058,7 +1058,7 @@ vuint16m4_t test_vloxei8_v_u16m4(const uint16_t *base, vuint8m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vuint16m8_t test_vloxei8_v_u16m8(const uint16_t *base, vuint8m4_t bindex, size_t vl) {
@@ -1068,7 +1068,7 @@ vuint16m8_t test_vloxei8_v_u16m8(const uint16_t *base, vuint8m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vloxei16_v_u16mf4(const uint16_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -1078,7 +1078,7 @@ vuint16mf4_t test_vloxei16_v_u16mf4(const uint16_t *base, vuint16mf4_t bindex, s
 // CHECK-RV64-LABEL: @test_vloxei16_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vloxei16_v_u16mf2(const uint16_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -1088,7 +1088,7 @@ vuint16mf2_t test_vloxei16_v_u16mf2(const uint16_t *base, vuint16mf2_t bindex, s
 // CHECK-RV64-LABEL: @test_vloxei16_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vloxei16_v_u16m1(const uint16_t *base, vuint16m1_t bindex, size_t vl) {
@@ -1098,7 +1098,7 @@ vuint16m1_t test_vloxei16_v_u16m1(const uint16_t *base, vuint16m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vloxei16_v_u16m2(const uint16_t *base, vuint16m2_t bindex, size_t vl) {
@@ -1108,7 +1108,7 @@ vuint16m2_t test_vloxei16_v_u16m2(const uint16_t *base, vuint16m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vloxei16_v_u16m4(const uint16_t *base, vuint16m4_t bindex, size_t vl) {
@@ -1118,7 +1118,7 @@ vuint16m4_t test_vloxei16_v_u16m4(const uint16_t *base, vuint16m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vuint16m8_t test_vloxei16_v_u16m8(const uint16_t *base, vuint16m8_t bindex, size_t vl) {
@@ -1128,7 +1128,7 @@ vuint16m8_t test_vloxei16_v_u16m8(const uint16_t *base, vuint16m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vloxei32_v_u16mf4(const uint16_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -1138,7 +1138,7 @@ vuint16mf4_t test_vloxei32_v_u16mf4(const uint16_t *base, vuint32mf2_t bindex, s
 // CHECK-RV64-LABEL: @test_vloxei32_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vloxei32_v_u16mf2(const uint16_t *base, vuint32m1_t bindex, size_t vl) {
@@ -1148,7 +1148,7 @@ vuint16mf2_t test_vloxei32_v_u16mf2(const uint16_t *base, vuint32m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei32_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vloxei32_v_u16m1(const uint16_t *base, vuint32m2_t bindex, size_t vl) {
@@ -1158,7 +1158,7 @@ vuint16m1_t test_vloxei32_v_u16m1(const uint16_t *base, vuint32m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vloxei32_v_u16m2(const uint16_t *base, vuint32m4_t bindex, size_t vl) {
@@ -1168,7 +1168,7 @@ vuint16m2_t test_vloxei32_v_u16m2(const uint16_t *base, vuint32m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vloxei32_v_u16m4(const uint16_t *base, vuint32m8_t bindex, size_t vl) {
@@ -1178,7 +1178,7 @@ vuint16m4_t test_vloxei32_v_u16m4(const uint16_t *base, vuint32m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vloxei64_v_u16mf4(const uint16_t *base, vuint64m1_t bindex, size_t vl) {
@@ -1188,7 +1188,7 @@ vuint16mf4_t test_vloxei64_v_u16mf4(const uint16_t *base, vuint64m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei64_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vloxei64_v_u16mf2(const uint16_t *base, vuint64m2_t bindex, size_t vl) {
@@ -1198,7 +1198,7 @@ vuint16mf2_t test_vloxei64_v_u16mf2(const uint16_t *base, vuint64m2_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei64_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vloxei64_v_u16m1(const uint16_t *base, vuint64m4_t bindex, size_t vl) {
@@ -1209,7 +1209,7 @@ vuint16m1_t test_vloxei64_v_u16m1(const uint16_t *base, vuint64m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vloxei64_v_u16m2(const uint16_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1219,7 +1219,7 @@ vuint16m2_t test_vloxei64_v_u16m2(const uint16_t *base, vuint64m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei8_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vloxei8_v_u32mf2(const uint32_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -1229,7 +1229,7 @@ vuint32mf2_t test_vloxei8_v_u32mf2(const uint32_t *base, vuint8mf8_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei8_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vloxei8_v_u32m1(const uint32_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -1239,7 +1239,7 @@ vuint32m1_t test_vloxei8_v_u32m1(const uint32_t *base, vuint8mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vloxei8_v_u32m2(const uint32_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -1249,7 +1249,7 @@ vuint32m2_t test_vloxei8_v_u32m2(const uint32_t *base, vuint8mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vloxei8_v_u32m4(const uint32_t *base, vuint8m1_t bindex, size_t vl) {
@@ -1259,7 +1259,7 @@ vuint32m4_t test_vloxei8_v_u32m4(const uint32_t *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vloxei8_v_u32m8(const uint32_t *base, vuint8m2_t bindex, size_t vl) {
@@ -1269,7 +1269,7 @@ vuint32m8_t test_vloxei8_v_u32m8(const uint32_t *base, vuint8m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vloxei16_v_u32mf2(const uint32_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -1279,7 +1279,7 @@ vuint32mf2_t test_vloxei16_v_u32mf2(const uint32_t *base, vuint16mf4_t bindex, s
 // CHECK-RV64-LABEL: @test_vloxei16_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vloxei16_v_u32m1(const uint32_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -1289,7 +1289,7 @@ vuint32m1_t test_vloxei16_v_u32m1(const uint32_t *base, vuint16mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vloxei16_v_u32m2(const uint32_t *base, vuint16m1_t bindex, size_t vl) {
@@ -1299,7 +1299,7 @@ vuint32m2_t test_vloxei16_v_u32m2(const uint32_t *base, vuint16m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vloxei16_v_u32m4(const uint32_t *base, vuint16m2_t bindex, size_t vl) {
@@ -1309,7 +1309,7 @@ vuint32m4_t test_vloxei16_v_u32m4(const uint32_t *base, vuint16m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vloxei16_v_u32m8(const uint32_t *base, vuint16m4_t bindex, size_t vl) {
@@ -1319,7 +1319,7 @@ vuint32m8_t test_vloxei16_v_u32m8(const uint32_t *base, vuint16m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vloxei32_v_u32mf2(const uint32_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -1329,7 +1329,7 @@ vuint32mf2_t test_vloxei32_v_u32mf2(const uint32_t *base, vuint32mf2_t bindex, s
 // CHECK-RV64-LABEL: @test_vloxei32_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vloxei32_v_u32m1(const uint32_t *base, vuint32m1_t bindex, size_t vl) {
@@ -1339,7 +1339,7 @@ vuint32m1_t test_vloxei32_v_u32m1(const uint32_t *base, vuint32m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vloxei32_v_u32m2(const uint32_t *base, vuint32m2_t bindex, size_t vl) {
@@ -1349,7 +1349,7 @@ vuint32m2_t test_vloxei32_v_u32m2(const uint32_t *base, vuint32m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vloxei32_v_u32m4(const uint32_t *base, vuint32m4_t bindex, size_t vl) {
@@ -1359,7 +1359,7 @@ vuint32m4_t test_vloxei32_v_u32m4(const uint32_t *base, vuint32m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vloxei32_v_u32m8(const uint32_t *base, vuint32m8_t bindex, size_t vl) {
@@ -1369,7 +1369,7 @@ vuint32m8_t test_vloxei32_v_u32m8(const uint32_t *base, vuint32m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vloxei64_v_u32mf2(const uint32_t *base, vuint64m1_t bindex, size_t vl) {
@@ -1379,7 +1379,7 @@ vuint32mf2_t test_vloxei64_v_u32mf2(const uint32_t *base, vuint64m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei64_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vloxei64_v_u32m1(const uint32_t *base, vuint64m2_t bindex, size_t vl) {
@@ -1389,7 +1389,7 @@ vuint32m1_t test_vloxei64_v_u32m1(const uint32_t *base, vuint64m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vloxei64_v_u32m2(const uint32_t *base, vuint64m4_t bindex, size_t vl) {
@@ -1399,7 +1399,7 @@ vuint32m2_t test_vloxei64_v_u32m2(const uint32_t *base, vuint64m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vloxei64_v_u32m4(const uint32_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1409,7 +1409,7 @@ vuint32m4_t test_vloxei64_v_u32m4(const uint32_t *base, vuint64m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei8_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vloxei8_v_u64m1(const uint64_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -1419,7 +1419,7 @@ vuint64m1_t test_vloxei8_v_u64m1(const uint64_t *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vloxei8_v_u64m2(const uint64_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -1429,7 +1429,7 @@ vuint64m2_t test_vloxei8_v_u64m2(const uint64_t *base, vuint8mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vloxei8_v_u64m4(const uint64_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -1439,7 +1439,7 @@ vuint64m4_t test_vloxei8_v_u64m4(const uint64_t *base, vuint8mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vloxei8_v_u64m8(const uint64_t *base, vuint8m1_t bindex, size_t vl) {
@@ -1449,7 +1449,7 @@ vuint64m8_t test_vloxei8_v_u64m8(const uint64_t *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vloxei16_v_u64m1(const uint64_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -1459,7 +1459,7 @@ vuint64m1_t test_vloxei16_v_u64m1(const uint64_t *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vloxei16_v_u64m2(const uint64_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -1469,7 +1469,7 @@ vuint64m2_t test_vloxei16_v_u64m2(const uint64_t *base, vuint16mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vloxei16_v_u64m4(const uint64_t *base, vuint16m1_t bindex, size_t vl) {
@@ -1479,7 +1479,7 @@ vuint64m4_t test_vloxei16_v_u64m4(const uint64_t *base, vuint16m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vloxei16_v_u64m8(const uint64_t *base, vuint16m2_t bindex, size_t vl) {
@@ -1489,7 +1489,7 @@ vuint64m8_t test_vloxei16_v_u64m8(const uint64_t *base, vuint16m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vloxei32_v_u64m1(const uint64_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -1499,7 +1499,7 @@ vuint64m1_t test_vloxei32_v_u64m1(const uint64_t *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei32_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vloxei32_v_u64m2(const uint64_t *base, vuint32m1_t bindex, size_t vl) {
@@ -1509,7 +1509,7 @@ vuint64m2_t test_vloxei32_v_u64m2(const uint64_t *base, vuint32m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vloxei32_v_u64m4(const uint64_t *base, vuint32m2_t bindex, size_t vl) {
@@ -1519,7 +1519,7 @@ vuint64m4_t test_vloxei32_v_u64m4(const uint64_t *base, vuint32m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vloxei32_v_u64m8(const uint64_t *base, vuint32m4_t bindex, size_t vl) {
@@ -1529,7 +1529,7 @@ vuint64m8_t test_vloxei32_v_u64m8(const uint64_t *base, vuint32m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vloxei64_v_u64m1(const uint64_t *base, vuint64m1_t bindex, size_t vl) {
@@ -1539,7 +1539,7 @@ vuint64m1_t test_vloxei64_v_u64m1(const uint64_t *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vloxei64_v_u64m2(const uint64_t *base, vuint64m2_t bindex, size_t vl) {
@@ -1549,7 +1549,7 @@ vuint64m2_t test_vloxei64_v_u64m2(const uint64_t *base, vuint64m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vloxei64_v_u64m4(const uint64_t *base, vuint64m4_t bindex, size_t vl) {
@@ -1559,7 +1559,7 @@ vuint64m4_t test_vloxei64_v_u64m4(const uint64_t *base, vuint64m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vloxei64_v_u64m8(const uint64_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1569,7 +1569,7 @@ vuint64m8_t test_vloxei64_v_u64m8(const uint64_t *base, vuint64m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei8_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i8.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i8.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vloxei8_v_f32mf2(const float *base, vuint8mf8_t bindex, size_t vl) {
@@ -1579,7 +1579,7 @@ vfloat32mf2_t test_vloxei8_v_f32mf2(const float *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i8.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i8.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vloxei8_v_f32m1(const float *base, vuint8mf4_t bindex, size_t vl) {
@@ -1589,7 +1589,7 @@ vfloat32m1_t test_vloxei8_v_f32m1(const float *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i8.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i8.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vloxei8_v_f32m2(const float *base, vuint8mf2_t bindex, size_t vl) {
@@ -1599,7 +1599,7 @@ vfloat32m2_t test_vloxei8_v_f32m2(const float *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vloxei8_v_f32m4(const float *base, vuint8m1_t bindex, size_t vl) {
@@ -1609,7 +1609,7 @@ vfloat32m4_t test_vloxei8_v_f32m4(const float *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8.i64(<vscale x 16 x float>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vloxei8_v_f32m8(const float *base, vuint8m2_t bindex, size_t vl) {
@@ -1619,7 +1619,7 @@ vfloat32m8_t test_vloxei8_v_f32m8(const float *base, vuint8m2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei16_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i16.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i16.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vloxei16_v_f32mf2(const float *base, vuint16mf4_t bindex, size_t vl) {
@@ -1629,7 +1629,7 @@ vfloat32mf2_t test_vloxei16_v_f32mf2(const float *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i16.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i16.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vloxei16_v_f32m1(const float *base, vuint16mf2_t bindex, size_t vl) {
@@ -1639,7 +1639,7 @@ vfloat32m1_t test_vloxei16_v_f32m1(const float *base, vuint16mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei16_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vloxei16_v_f32m2(const float *base, vuint16m1_t bindex, size_t vl) {
@@ -1649,7 +1649,7 @@ vfloat32m2_t test_vloxei16_v_f32m2(const float *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vloxei16_v_f32m4(const float *base, vuint16m2_t bindex, size_t vl) {
@@ -1659,7 +1659,7 @@ vfloat32m4_t test_vloxei16_v_f32m4(const float *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16.i64(<vscale x 16 x float>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vloxei16_v_f32m8(const float *base, vuint16m4_t bindex, size_t vl) {
@@ -1669,7 +1669,7 @@ vfloat32m8_t test_vloxei16_v_f32m8(const float *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i32.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i32.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vloxei32_v_f32mf2(const float *base, vuint32mf2_t bindex, size_t vl) {
@@ -1679,7 +1679,7 @@ vfloat32mf2_t test_vloxei32_v_f32mf2(const float *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei32_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i32.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i32.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vloxei32_v_f32m1(const float *base, vuint32m1_t bindex, size_t vl) {
@@ -1689,7 +1689,7 @@ vfloat32m1_t test_vloxei32_v_f32m1(const float *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i32.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i32.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vloxei32_v_f32m2(const float *base, vuint32m2_t bindex, size_t vl) {
@@ -1699,7 +1699,7 @@ vfloat32m2_t test_vloxei32_v_f32m2(const float *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i32.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i32.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vloxei32_v_f32m4(const float *base, vuint32m4_t bindex, size_t vl) {
@@ -1709,7 +1709,7 @@ vfloat32m4_t test_vloxei32_v_f32m4(const float *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i32.i64(<vscale x 16 x float>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i32.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vloxei32_v_f32m8(const float *base, vuint32m8_t bindex, size_t vl) {
@@ -1719,7 +1719,7 @@ vfloat32m8_t test_vloxei32_v_f32m8(const float *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i64.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i64.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vloxei64_v_f32mf2(const float *base, vuint64m1_t bindex, size_t vl) {
@@ -1729,7 +1729,7 @@ vfloat32mf2_t test_vloxei64_v_f32mf2(const float *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i64.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i64.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vloxei64_v_f32m1(const float *base, vuint64m2_t bindex, size_t vl) {
@@ -1739,7 +1739,7 @@ vfloat32m1_t test_vloxei64_v_f32m1(const float *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i64.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i64.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vloxei64_v_f32m2(const float *base, vuint64m4_t bindex, size_t vl) {
@@ -1749,7 +1749,7 @@ vfloat32m2_t test_vloxei64_v_f32m2(const float *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i64.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i64.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vloxei64_v_f32m4(const float *base, vuint64m8_t bindex, size_t vl) {
@@ -1759,7 +1759,7 @@ vfloat32m4_t test_vloxei64_v_f32m4(const float *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i8.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i8.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vloxei8_v_f64m1(const double *base, vuint8mf8_t bindex, size_t vl) {
@@ -1769,7 +1769,7 @@ vfloat64m1_t test_vloxei8_v_f64m1(const double *base, vuint8mf8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i8.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i8.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vloxei8_v_f64m2(const double *base, vuint8mf4_t bindex, size_t vl) {
@@ -1779,7 +1779,7 @@ vfloat64m2_t test_vloxei8_v_f64m2(const double *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i8.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i8.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vloxei8_v_f64m4(const double *base, vuint8mf2_t bindex, size_t vl) {
@@ -1789,7 +1789,7 @@ vfloat64m4_t test_vloxei8_v_f64m4(const double *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vloxei8_v_f64m8(const double *base, vuint8m1_t bindex, size_t vl) {
@@ -1799,7 +1799,7 @@ vfloat64m8_t test_vloxei8_v_f64m8(const double *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i16.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i16.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vloxei16_v_f64m1(const double *base, vuint16mf4_t bindex, size_t vl) {
@@ -1809,7 +1809,7 @@ vfloat64m1_t test_vloxei16_v_f64m1(const double *base, vuint16mf4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i16.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i16.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vloxei16_v_f64m2(const double *base, vuint16mf2_t bindex, size_t vl) {
@@ -1819,7 +1819,7 @@ vfloat64m2_t test_vloxei16_v_f64m2(const double *base, vuint16mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vloxei16_v_f64m4(const double *base, vuint16m1_t bindex, size_t vl) {
@@ -1829,7 +1829,7 @@ vfloat64m4_t test_vloxei16_v_f64m4(const double *base, vuint16m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei16_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vloxei16_v_f64m8(const double *base, vuint16m2_t bindex, size_t vl) {
@@ -1839,7 +1839,7 @@ vfloat64m8_t test_vloxei16_v_f64m8(const double *base, vuint16m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei32_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i32.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i32.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vloxei32_v_f64m1(const double *base, vuint32mf2_t bindex, size_t vl) {
@@ -1849,7 +1849,7 @@ vfloat64m1_t test_vloxei32_v_f64m1(const double *base, vuint32mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vloxei32_v_f64m2(const double *base, vuint32m1_t bindex, size_t vl) {
@@ -1859,7 +1859,7 @@ vfloat64m2_t test_vloxei32_v_f64m2(const double *base, vuint32m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei32_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vloxei32_v_f64m4(const double *base, vuint32m2_t bindex, size_t vl) {
@@ -1869,7 +1869,7 @@ vfloat64m4_t test_vloxei32_v_f64m4(const double *base, vuint32m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei32_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vloxei32_v_f64m8(const double *base, vuint32m4_t bindex, size_t vl) {
@@ -1879,7 +1879,7 @@ vfloat64m8_t test_vloxei32_v_f64m8(const double *base, vuint32m4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i64.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i64.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vloxei64_v_f64m1(const double *base, vuint64m1_t bindex, size_t vl) {
@@ -1889,7 +1889,7 @@ vfloat64m1_t test_vloxei64_v_f64m1(const double *base, vuint64m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i64.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i64.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vloxei64_v_f64m2(const double *base, vuint64m2_t bindex, size_t vl) {
@@ -1899,7 +1899,7 @@ vfloat64m2_t test_vloxei64_v_f64m2(const double *base, vuint64m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i64.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i64.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vloxei64_v_f64m4(const double *base, vuint64m4_t bindex, size_t vl) {
@@ -1909,7 +1909,7 @@ vfloat64m4_t test_vloxei64_v_f64m4(const double *base, vuint64m4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i64.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i64.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vloxei64_v_f64m8(const double *base, vuint64m8_t bindex, size_t vl) {
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxei.c
index f1a1081aacfb7..2aabcab68be4a 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxei.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-overloaded/vluxei.c
@@ -8,7 +8,7 @@
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vluxei8_v_i8mf8(const int8_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -18,7 +18,7 @@ vint8mf8_t test_vluxei8_v_i8mf8(const int8_t *base, vuint8mf8_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vluxei8_v_i8mf4(const int8_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -28,7 +28,7 @@ vint8mf4_t test_vluxei8_v_i8mf4(const int8_t *base, vuint8mf4_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vluxei8_v_i8mf2(const int8_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -38,7 +38,7 @@ vint8mf2_t test_vluxei8_v_i8mf2(const int8_t *base, vuint8mf2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vluxei8_v_i8m1(const int8_t *base, vuint8m1_t bindex, size_t vl) {
@@ -48,7 +48,7 @@ vint8m1_t test_vluxei8_v_i8m1(const int8_t *base, vuint8m1_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vluxei8_v_i8m2(const int8_t *base, vuint8m2_t bindex, size_t vl) {
@@ -58,7 +58,7 @@ vint8m2_t test_vluxei8_v_i8m2(const int8_t *base, vuint8m2_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vint8m4_t test_vluxei8_v_i8m4(const int8_t *base, vuint8m4_t bindex, size_t vl) {
@@ -68,7 +68,7 @@ vint8m4_t test_vluxei8_v_i8m4(const int8_t *base, vuint8m4_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP1]]
 //
 vint8m8_t test_vluxei8_v_i8m8(const int8_t *base, vuint8m8_t bindex, size_t vl) {
@@ -78,7 +78,7 @@ vint8m8_t test_vluxei8_v_i8m8(const int8_t *base, vuint8m8_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vluxei16_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vluxei16_v_i8mf8(const int8_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -88,7 +88,7 @@ vint8mf8_t test_vluxei16_v_i8mf8(const int8_t *base, vuint16mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vluxei16_v_i8mf4(const int8_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -98,7 +98,7 @@ vint8mf4_t test_vluxei16_v_i8mf4(const int8_t *base, vuint16mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vluxei16_v_i8mf2(const int8_t *base, vuint16m1_t bindex, size_t vl) {
@@ -108,7 +108,7 @@ vint8mf2_t test_vluxei16_v_i8mf2(const int8_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vluxei16_v_i8m1(const int8_t *base, vuint16m2_t bindex, size_t vl) {
@@ -118,7 +118,7 @@ vint8m1_t test_vluxei16_v_i8m1(const int8_t *base, vuint16m2_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei16_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vluxei16_v_i8m2(const int8_t *base, vuint16m4_t bindex, size_t vl) {
@@ -128,7 +128,7 @@ vint8m2_t test_vluxei16_v_i8m2(const int8_t *base, vuint16m4_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei16_v_i8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vint8m4_t test_vluxei16_v_i8m4(const int8_t *base, vuint16m8_t bindex, size_t vl) {
@@ -138,7 +138,7 @@ vint8m4_t test_vluxei16_v_i8m4(const int8_t *base, vuint16m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei32_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vluxei32_v_i8mf8(const int8_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -148,7 +148,7 @@ vint8mf8_t test_vluxei32_v_i8mf8(const int8_t *base, vuint32mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vluxei32_v_i8mf4(const int8_t *base, vuint32m1_t bindex, size_t vl) {
@@ -158,7 +158,7 @@ vint8mf4_t test_vluxei32_v_i8mf4(const int8_t *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vluxei32_v_i8mf2(const int8_t *base, vuint32m2_t bindex, size_t vl) {
@@ -168,7 +168,7 @@ vint8mf2_t test_vluxei32_v_i8mf2(const int8_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vluxei32_v_i8m1(const int8_t *base, vuint32m4_t bindex, size_t vl) {
@@ -178,7 +178,7 @@ vint8m1_t test_vluxei32_v_i8m1(const int8_t *base, vuint32m4_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei32_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vluxei32_v_i8m2(const int8_t *base, vuint32m8_t bindex, size_t vl) {
@@ -188,7 +188,7 @@ vint8m2_t test_vluxei32_v_i8m2(const int8_t *base, vuint32m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei64_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vluxei64_v_i8mf8(const int8_t *base, vuint64m1_t bindex, size_t vl) {
@@ -198,7 +198,7 @@ vint8mf8_t test_vluxei64_v_i8mf8(const int8_t *base, vuint64m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vluxei64_v_i8mf4(const int8_t *base, vuint64m2_t bindex, size_t vl) {
@@ -208,7 +208,7 @@ vint8mf4_t test_vluxei64_v_i8mf4(const int8_t *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vluxei64_v_i8mf2(const int8_t *base, vuint64m4_t bindex, size_t vl) {
@@ -218,7 +218,7 @@ vint8mf2_t test_vluxei64_v_i8mf2(const int8_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vluxei64_v_i8m1(const int8_t *base, vuint64m8_t bindex, size_t vl) {
@@ -228,7 +228,7 @@ vint8m1_t test_vluxei64_v_i8m1(const int8_t *base, vuint64m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei8_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vluxei8_v_i16mf4(const int16_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -238,7 +238,7 @@ vint16mf4_t test_vluxei8_v_i16mf4(const int16_t *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vluxei8_v_i16mf2(const int16_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -248,7 +248,7 @@ vint16mf2_t test_vluxei8_v_i16mf2(const int16_t *base, vuint8mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vluxei8_v_i16m1(const int16_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -258,7 +258,7 @@ vint16m1_t test_vluxei8_v_i16m1(const int16_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vluxei8_v_i16m2(const int16_t *base, vuint8m1_t bindex, size_t vl) {
@@ -268,7 +268,7 @@ vint16m2_t test_vluxei8_v_i16m2(const int16_t *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vluxei8_v_i16m4(const int16_t *base, vuint8m2_t bindex, size_t vl) {
@@ -278,7 +278,7 @@ vint16m4_t test_vluxei8_v_i16m4(const int16_t *base, vuint8m2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_i16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vint16m8_t test_vluxei8_v_i16m8(const int16_t *base, vuint8m4_t bindex, size_t vl) {
@@ -288,7 +288,7 @@ vint16m8_t test_vluxei8_v_i16m8(const int16_t *base, vuint8m4_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei16_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vluxei16_v_i16mf4(const int16_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -298,7 +298,7 @@ vint16mf4_t test_vluxei16_v_i16mf4(const int16_t *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vluxei16_v_i16mf2(const int16_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -308,7 +308,7 @@ vint16mf2_t test_vluxei16_v_i16mf2(const int16_t *base, vuint16mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vluxei16_v_i16m1(const int16_t *base, vuint16m1_t bindex, size_t vl) {
@@ -318,7 +318,7 @@ vint16m1_t test_vluxei16_v_i16m1(const int16_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vluxei16_v_i16m2(const int16_t *base, vuint16m2_t bindex, size_t vl) {
@@ -328,7 +328,7 @@ vint16m2_t test_vluxei16_v_i16m2(const int16_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vluxei16_v_i16m4(const int16_t *base, vuint16m4_t bindex, size_t vl) {
@@ -338,7 +338,7 @@ vint16m4_t test_vluxei16_v_i16m4(const int16_t *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vint16m8_t test_vluxei16_v_i16m8(const int16_t *base, vuint16m8_t bindex, size_t vl) {
@@ -348,7 +348,7 @@ vint16m8_t test_vluxei16_v_i16m8(const int16_t *base, vuint16m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vluxei32_v_i16mf4(const int16_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -358,7 +358,7 @@ vint16mf4_t test_vluxei32_v_i16mf4(const int16_t *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei32_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vluxei32_v_i16mf2(const int16_t *base, vuint32m1_t bindex, size_t vl) {
@@ -368,7 +368,7 @@ vint16mf2_t test_vluxei32_v_i16mf2(const int16_t *base, vuint32m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vluxei32_v_i16m1(const int16_t *base, vuint32m2_t bindex, size_t vl) {
@@ -378,7 +378,7 @@ vint16m1_t test_vluxei32_v_i16m1(const int16_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vluxei32_v_i16m2(const int16_t *base, vuint32m4_t bindex, size_t vl) {
@@ -388,7 +388,7 @@ vint16m2_t test_vluxei32_v_i16m2(const int16_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vluxei32_v_i16m4(const int16_t *base, vuint32m8_t bindex, size_t vl) {
@@ -398,7 +398,7 @@ vint16m4_t test_vluxei32_v_i16m4(const int16_t *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vluxei64_v_i16mf4(const int16_t *base, vuint64m1_t bindex, size_t vl) {
@@ -408,7 +408,7 @@ vint16mf4_t test_vluxei64_v_i16mf4(const int16_t *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vluxei64_v_i16mf2(const int16_t *base, vuint64m2_t bindex, size_t vl) {
@@ -418,7 +418,7 @@ vint16mf2_t test_vluxei64_v_i16mf2(const int16_t *base, vuint64m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vluxei64_v_i16m1(const int16_t *base, vuint64m4_t bindex, size_t vl) {
@@ -428,7 +428,7 @@ vint16m1_t test_vluxei64_v_i16m1(const int16_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vluxei64_v_i16m2(const int16_t *base, vuint64m8_t bindex, size_t vl) {
@@ -438,7 +438,7 @@ vint16m2_t test_vluxei64_v_i16m2(const int16_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vluxei8_v_i32mf2(const int32_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -448,7 +448,7 @@ vint32mf2_t test_vluxei8_v_i32mf2(const int32_t *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vluxei8_v_i32m1(const int32_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -458,7 +458,7 @@ vint32m1_t test_vluxei8_v_i32m1(const int32_t *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vluxei8_v_i32m2(const int32_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -468,7 +468,7 @@ vint32m2_t test_vluxei8_v_i32m2(const int32_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vluxei8_v_i32m4(const int32_t *base, vuint8m1_t bindex, size_t vl) {
@@ -478,7 +478,7 @@ vint32m4_t test_vluxei8_v_i32m4(const int32_t *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vluxei8_v_i32m8(const int32_t *base, vuint8m2_t bindex, size_t vl) {
@@ -488,7 +488,7 @@ vint32m8_t test_vluxei8_v_i32m8(const int32_t *base, vuint8m2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei16_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vluxei16_v_i32mf2(const int32_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -498,7 +498,7 @@ vint32mf2_t test_vluxei16_v_i32mf2(const int32_t *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vluxei16_v_i32m1(const int32_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -508,7 +508,7 @@ vint32m1_t test_vluxei16_v_i32m1(const int32_t *base, vuint16mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei16_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vluxei16_v_i32m2(const int32_t *base, vuint16m1_t bindex, size_t vl) {
@@ -518,7 +518,7 @@ vint32m2_t test_vluxei16_v_i32m2(const int32_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vluxei16_v_i32m4(const int32_t *base, vuint16m2_t bindex, size_t vl) {
@@ -528,7 +528,7 @@ vint32m4_t test_vluxei16_v_i32m4(const int32_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vluxei16_v_i32m8(const int32_t *base, vuint16m4_t bindex, size_t vl) {
@@ -538,7 +538,7 @@ vint32m8_t test_vluxei16_v_i32m8(const int32_t *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vluxei32_v_i32mf2(const int32_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -548,7 +548,7 @@ vint32mf2_t test_vluxei32_v_i32mf2(const int32_t *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei32_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vluxei32_v_i32m1(const int32_t *base, vuint32m1_t bindex, size_t vl) {
@@ -558,7 +558,7 @@ vint32m1_t test_vluxei32_v_i32m1(const int32_t *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vluxei32_v_i32m2(const int32_t *base, vuint32m2_t bindex, size_t vl) {
@@ -568,7 +568,7 @@ vint32m2_t test_vluxei32_v_i32m2(const int32_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vluxei32_v_i32m4(const int32_t *base, vuint32m4_t bindex, size_t vl) {
@@ -578,7 +578,7 @@ vint32m4_t test_vluxei32_v_i32m4(const int32_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vluxei32_v_i32m8(const int32_t *base, vuint32m8_t bindex, size_t vl) {
@@ -588,7 +588,7 @@ vint32m8_t test_vluxei32_v_i32m8(const int32_t *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vluxei64_v_i32mf2(const int32_t *base, vuint64m1_t bindex, size_t vl) {
@@ -598,7 +598,7 @@ vint32mf2_t test_vluxei64_v_i32mf2(const int32_t *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vluxei64_v_i32m1(const int32_t *base, vuint64m2_t bindex, size_t vl) {
@@ -608,7 +608,7 @@ vint32m1_t test_vluxei64_v_i32m1(const int32_t *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vluxei64_v_i32m2(const int32_t *base, vuint64m4_t bindex, size_t vl) {
@@ -618,7 +618,7 @@ vint32m2_t test_vluxei64_v_i32m2(const int32_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vluxei64_v_i32m4(const int32_t *base, vuint64m8_t bindex, size_t vl) {
@@ -628,7 +628,7 @@ vint32m4_t test_vluxei64_v_i32m4(const int32_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vluxei8_v_i64m1(const int64_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -638,7 +638,7 @@ vint64m1_t test_vluxei8_v_i64m1(const int64_t *base, vuint8mf8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vluxei8_v_i64m2(const int64_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -648,7 +648,7 @@ vint64m2_t test_vluxei8_v_i64m2(const int64_t *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vluxei8_v_i64m4(const int64_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -658,7 +658,7 @@ vint64m4_t test_vluxei8_v_i64m4(const int64_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vluxei8_v_i64m8(const int64_t *base, vuint8m1_t bindex, size_t vl) {
@@ -668,7 +668,7 @@ vint64m8_t test_vluxei8_v_i64m8(const int64_t *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei16_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vluxei16_v_i64m1(const int64_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -678,7 +678,7 @@ vint64m1_t test_vluxei16_v_i64m1(const int64_t *base, vuint16mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei16_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vluxei16_v_i64m2(const int64_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -688,7 +688,7 @@ vint64m2_t test_vluxei16_v_i64m2(const int64_t *base, vuint16mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei16_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vluxei16_v_i64m4(const int64_t *base, vuint16m1_t bindex, size_t vl) {
@@ -698,7 +698,7 @@ vint64m4_t test_vluxei16_v_i64m4(const int64_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vluxei16_v_i64m8(const int64_t *base, vuint16m2_t bindex, size_t vl) {
@@ -708,7 +708,7 @@ vint64m8_t test_vluxei16_v_i64m8(const int64_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vluxei32_v_i64m1(const int64_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -718,7 +718,7 @@ vint64m1_t test_vluxei32_v_i64m1(const int64_t *base, vuint32mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei32_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vluxei32_v_i64m2(const int64_t *base, vuint32m1_t bindex, size_t vl) {
@@ -728,7 +728,7 @@ vint64m2_t test_vluxei32_v_i64m2(const int64_t *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vluxei32_v_i64m4(const int64_t *base, vuint32m2_t bindex, size_t vl) {
@@ -738,7 +738,7 @@ vint64m4_t test_vluxei32_v_i64m4(const int64_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vluxei32_v_i64m8(const int64_t *base, vuint32m4_t bindex, size_t vl) {
@@ -748,7 +748,7 @@ vint64m8_t test_vluxei32_v_i64m8(const int64_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vluxei64_v_i64m1(const int64_t *base, vuint64m1_t bindex, size_t vl) {
@@ -758,7 +758,7 @@ vint64m1_t test_vluxei64_v_i64m1(const int64_t *base, vuint64m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vluxei64_v_i64m2(const int64_t *base, vuint64m2_t bindex, size_t vl) {
@@ -768,7 +768,7 @@ vint64m2_t test_vluxei64_v_i64m2(const int64_t *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vluxei64_v_i64m4(const int64_t *base, vuint64m4_t bindex, size_t vl) {
@@ -778,7 +778,7 @@ vint64m4_t test_vluxei64_v_i64m4(const int64_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vluxei64_v_i64m8(const int64_t *base, vuint64m8_t bindex, size_t vl) {
@@ -788,7 +788,7 @@ vint64m8_t test_vluxei64_v_i64m8(const int64_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vluxei8_v_u8mf8(const uint8_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -798,7 +798,7 @@ vuint8mf8_t test_vluxei8_v_u8mf8(const uint8_t *base, vuint8mf8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vluxei8_v_u8mf4(const uint8_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -808,7 +808,7 @@ vuint8mf4_t test_vluxei8_v_u8mf4(const uint8_t *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vluxei8_v_u8mf2(const uint8_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -818,7 +818,7 @@ vuint8mf2_t test_vluxei8_v_u8mf2(const uint8_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vluxei8_v_u8m1(const uint8_t *base, vuint8m1_t bindex, size_t vl) {
@@ -828,7 +828,7 @@ vuint8m1_t test_vluxei8_v_u8m1(const uint8_t *base, vuint8m1_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vluxei8_v_u8m2(const uint8_t *base, vuint8m2_t bindex, size_t vl) {
@@ -838,7 +838,7 @@ vuint8m2_t test_vluxei8_v_u8m2(const uint8_t *base, vuint8m2_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vuint8m4_t test_vluxei8_v_u8m4(const uint8_t *base, vuint8m4_t bindex, size_t vl) {
@@ -848,7 +848,7 @@ vuint8m4_t test_vluxei8_v_u8m4(const uint8_t *base, vuint8m4_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP1]]
 //
 vuint8m8_t test_vluxei8_v_u8m8(const uint8_t *base, vuint8m8_t bindex, size_t vl) {
@@ -858,7 +858,7 @@ vuint8m8_t test_vluxei8_v_u8m8(const uint8_t *base, vuint8m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei16_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vluxei16_v_u8mf8(const uint8_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -868,7 +868,7 @@ vuint8mf8_t test_vluxei16_v_u8mf8(const uint8_t *base, vuint16mf4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vluxei16_v_u8mf4(const uint8_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -878,7 +878,7 @@ vuint8mf4_t test_vluxei16_v_u8mf4(const uint8_t *base, vuint16mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vluxei16_v_u8mf2(const uint8_t *base, vuint16m1_t bindex, size_t vl) {
@@ -888,7 +888,7 @@ vuint8mf2_t test_vluxei16_v_u8mf2(const uint8_t *base, vuint16m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei16_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vluxei16_v_u8m1(const uint8_t *base, vuint16m2_t bindex, size_t vl) {
@@ -898,7 +898,7 @@ vuint8m1_t test_vluxei16_v_u8m1(const uint8_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vluxei16_v_u8m2(const uint8_t *base, vuint16m4_t bindex, size_t vl) {
@@ -908,7 +908,7 @@ vuint8m2_t test_vluxei16_v_u8m2(const uint8_t *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_u8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vuint8m4_t test_vluxei16_v_u8m4(const uint8_t *base, vuint16m8_t bindex, size_t vl) {
@@ -918,7 +918,7 @@ vuint8m4_t test_vluxei16_v_u8m4(const uint8_t *base, vuint16m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vluxei32_v_u8mf8(const uint8_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -928,7 +928,7 @@ vuint8mf8_t test_vluxei32_v_u8mf8(const uint8_t *base, vuint32mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vluxei32_v_u8mf4(const uint8_t *base, vuint32m1_t bindex, size_t vl) {
@@ -938,7 +938,7 @@ vuint8mf4_t test_vluxei32_v_u8mf4(const uint8_t *base, vuint32m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei32_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vluxei32_v_u8mf2(const uint8_t *base, vuint32m2_t bindex, size_t vl) {
@@ -948,7 +948,7 @@ vuint8mf2_t test_vluxei32_v_u8mf2(const uint8_t *base, vuint32m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei32_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vluxei32_v_u8m1(const uint8_t *base, vuint32m4_t bindex, size_t vl) {
@@ -958,7 +958,7 @@ vuint8m1_t test_vluxei32_v_u8m1(const uint8_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vluxei32_v_u8m2(const uint8_t *base, vuint32m8_t bindex, size_t vl) {
@@ -968,7 +968,7 @@ vuint8m2_t test_vluxei32_v_u8m2(const uint8_t *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vluxei64_v_u8mf8(const uint8_t *base, vuint64m1_t bindex, size_t vl) {
@@ -978,7 +978,7 @@ vuint8mf8_t test_vluxei64_v_u8mf8(const uint8_t *base, vuint64m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vluxei64_v_u8mf4(const uint8_t *base, vuint64m2_t bindex, size_t vl) {
@@ -988,7 +988,7 @@ vuint8mf4_t test_vluxei64_v_u8mf4(const uint8_t *base, vuint64m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vluxei64_v_u8mf2(const uint8_t *base, vuint64m4_t bindex, size_t vl) {
@@ -998,7 +998,7 @@ vuint8mf2_t test_vluxei64_v_u8mf2(const uint8_t *base, vuint64m4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vluxei64_v_u8m1(const uint8_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1008,7 +1008,7 @@ vuint8m1_t test_vluxei64_v_u8m1(const uint8_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vluxei8_v_u16mf4(const uint16_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -1018,7 +1018,7 @@ vuint16mf4_t test_vluxei8_v_u16mf4(const uint16_t *base, vuint8mf8_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei8_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vluxei8_v_u16mf2(const uint16_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -1028,7 +1028,7 @@ vuint16mf2_t test_vluxei8_v_u16mf2(const uint16_t *base, vuint8mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei8_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vluxei8_v_u16m1(const uint16_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -1038,7 +1038,7 @@ vuint16m1_t test_vluxei8_v_u16m1(const uint16_t *base, vuint8mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vluxei8_v_u16m2(const uint16_t *base, vuint8m1_t bindex, size_t vl) {
@@ -1048,7 +1048,7 @@ vuint16m2_t test_vluxei8_v_u16m2(const uint16_t *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vluxei8_v_u16m4(const uint16_t *base, vuint8m2_t bindex, size_t vl) {
@@ -1058,7 +1058,7 @@ vuint16m4_t test_vluxei8_v_u16m4(const uint16_t *base, vuint8m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vuint16m8_t test_vluxei8_v_u16m8(const uint16_t *base, vuint8m4_t bindex, size_t vl) {
@@ -1068,7 +1068,7 @@ vuint16m8_t test_vluxei8_v_u16m8(const uint16_t *base, vuint8m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vluxei16_v_u16mf4(const uint16_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -1078,7 +1078,7 @@ vuint16mf4_t test_vluxei16_v_u16mf4(const uint16_t *base, vuint16mf4_t bindex, s
 // CHECK-RV64-LABEL: @test_vluxei16_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vluxei16_v_u16mf2(const uint16_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -1088,7 +1088,7 @@ vuint16mf2_t test_vluxei16_v_u16mf2(const uint16_t *base, vuint16mf2_t bindex, s
 // CHECK-RV64-LABEL: @test_vluxei16_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vluxei16_v_u16m1(const uint16_t *base, vuint16m1_t bindex, size_t vl) {
@@ -1098,7 +1098,7 @@ vuint16m1_t test_vluxei16_v_u16m1(const uint16_t *base, vuint16m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vluxei16_v_u16m2(const uint16_t *base, vuint16m2_t bindex, size_t vl) {
@@ -1108,7 +1108,7 @@ vuint16m2_t test_vluxei16_v_u16m2(const uint16_t *base, vuint16m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vluxei16_v_u16m4(const uint16_t *base, vuint16m4_t bindex, size_t vl) {
@@ -1118,7 +1118,7 @@ vuint16m4_t test_vluxei16_v_u16m4(const uint16_t *base, vuint16m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vuint16m8_t test_vluxei16_v_u16m8(const uint16_t *base, vuint16m8_t bindex, size_t vl) {
@@ -1128,7 +1128,7 @@ vuint16m8_t test_vluxei16_v_u16m8(const uint16_t *base, vuint16m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vluxei32_v_u16mf4(const uint16_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -1138,7 +1138,7 @@ vuint16mf4_t test_vluxei32_v_u16mf4(const uint16_t *base, vuint32mf2_t bindex, s
 // CHECK-RV64-LABEL: @test_vluxei32_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vluxei32_v_u16mf2(const uint16_t *base, vuint32m1_t bindex, size_t vl) {
@@ -1148,7 +1148,7 @@ vuint16mf2_t test_vluxei32_v_u16mf2(const uint16_t *base, vuint32m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei32_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vluxei32_v_u16m1(const uint16_t *base, vuint32m2_t bindex, size_t vl) {
@@ -1158,7 +1158,7 @@ vuint16m1_t test_vluxei32_v_u16m1(const uint16_t *base, vuint32m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vluxei32_v_u16m2(const uint16_t *base, vuint32m4_t bindex, size_t vl) {
@@ -1168,7 +1168,7 @@ vuint16m2_t test_vluxei32_v_u16m2(const uint16_t *base, vuint32m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vluxei32_v_u16m4(const uint16_t *base, vuint32m8_t bindex, size_t vl) {
@@ -1178,7 +1178,7 @@ vuint16m4_t test_vluxei32_v_u16m4(const uint16_t *base, vuint32m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vluxei64_v_u16mf4(const uint16_t *base, vuint64m1_t bindex, size_t vl) {
@@ -1188,7 +1188,7 @@ vuint16mf4_t test_vluxei64_v_u16mf4(const uint16_t *base, vuint64m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei64_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vluxei64_v_u16mf2(const uint16_t *base, vuint64m2_t bindex, size_t vl) {
@@ -1198,7 +1198,7 @@ vuint16mf2_t test_vluxei64_v_u16mf2(const uint16_t *base, vuint64m2_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei64_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vluxei64_v_u16m1(const uint16_t *base, vuint64m4_t bindex, size_t vl) {
@@ -1208,7 +1208,7 @@ vuint16m1_t test_vluxei64_v_u16m1(const uint16_t *base, vuint64m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vluxei64_v_u16m2(const uint16_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1218,7 +1218,7 @@ vuint16m2_t test_vluxei64_v_u16m2(const uint16_t *base, vuint64m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei8_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vluxei8_v_u32mf2(const uint32_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -1228,7 +1228,7 @@ vuint32mf2_t test_vluxei8_v_u32mf2(const uint32_t *base, vuint8mf8_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei8_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vluxei8_v_u32m1(const uint32_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -1238,7 +1238,7 @@ vuint32m1_t test_vluxei8_v_u32m1(const uint32_t *base, vuint8mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vluxei8_v_u32m2(const uint32_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -1248,7 +1248,7 @@ vuint32m2_t test_vluxei8_v_u32m2(const uint32_t *base, vuint8mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vluxei8_v_u32m4(const uint32_t *base, vuint8m1_t bindex, size_t vl) {
@@ -1258,7 +1258,7 @@ vuint32m4_t test_vluxei8_v_u32m4(const uint32_t *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vluxei8_v_u32m8(const uint32_t *base, vuint8m2_t bindex, size_t vl) {
@@ -1268,7 +1268,7 @@ vuint32m8_t test_vluxei8_v_u32m8(const uint32_t *base, vuint8m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vluxei16_v_u32mf2(const uint32_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -1278,7 +1278,7 @@ vuint32mf2_t test_vluxei16_v_u32mf2(const uint32_t *base, vuint16mf4_t bindex, s
 // CHECK-RV64-LABEL: @test_vluxei16_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vluxei16_v_u32m1(const uint32_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -1288,7 +1288,7 @@ vuint32m1_t test_vluxei16_v_u32m1(const uint32_t *base, vuint16mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vluxei16_v_u32m2(const uint32_t *base, vuint16m1_t bindex, size_t vl) {
@@ -1298,7 +1298,7 @@ vuint32m2_t test_vluxei16_v_u32m2(const uint32_t *base, vuint16m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vluxei16_v_u32m4(const uint32_t *base, vuint16m2_t bindex, size_t vl) {
@@ -1308,7 +1308,7 @@ vuint32m4_t test_vluxei16_v_u32m4(const uint32_t *base, vuint16m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vluxei16_v_u32m8(const uint32_t *base, vuint16m4_t bindex, size_t vl) {
@@ -1318,7 +1318,7 @@ vuint32m8_t test_vluxei16_v_u32m8(const uint32_t *base, vuint16m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vluxei32_v_u32mf2(const uint32_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -1328,7 +1328,7 @@ vuint32mf2_t test_vluxei32_v_u32mf2(const uint32_t *base, vuint32mf2_t bindex, s
 // CHECK-RV64-LABEL: @test_vluxei32_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vluxei32_v_u32m1(const uint32_t *base, vuint32m1_t bindex, size_t vl) {
@@ -1338,7 +1338,7 @@ vuint32m1_t test_vluxei32_v_u32m1(const uint32_t *base, vuint32m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vluxei32_v_u32m2(const uint32_t *base, vuint32m2_t bindex, size_t vl) {
@@ -1348,7 +1348,7 @@ vuint32m2_t test_vluxei32_v_u32m2(const uint32_t *base, vuint32m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vluxei32_v_u32m4(const uint32_t *base, vuint32m4_t bindex, size_t vl) {
@@ -1358,7 +1358,7 @@ vuint32m4_t test_vluxei32_v_u32m4(const uint32_t *base, vuint32m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vluxei32_v_u32m8(const uint32_t *base, vuint32m8_t bindex, size_t vl) {
@@ -1368,7 +1368,7 @@ vuint32m8_t test_vluxei32_v_u32m8(const uint32_t *base, vuint32m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vluxei64_v_u32mf2(const uint32_t *base, vuint64m1_t bindex, size_t vl) {
@@ -1378,7 +1378,7 @@ vuint32mf2_t test_vluxei64_v_u32mf2(const uint32_t *base, vuint64m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei64_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vluxei64_v_u32m1(const uint32_t *base, vuint64m2_t bindex, size_t vl) {
@@ -1388,7 +1388,7 @@ vuint32m1_t test_vluxei64_v_u32m1(const uint32_t *base, vuint64m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vluxei64_v_u32m2(const uint32_t *base, vuint64m4_t bindex, size_t vl) {
@@ -1398,7 +1398,7 @@ vuint32m2_t test_vluxei64_v_u32m2(const uint32_t *base, vuint64m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vluxei64_v_u32m4(const uint32_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1408,7 +1408,7 @@ vuint32m4_t test_vluxei64_v_u32m4(const uint32_t *base, vuint64m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei8_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vluxei8_v_u64m1(const uint64_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -1418,7 +1418,7 @@ vuint64m1_t test_vluxei8_v_u64m1(const uint64_t *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vluxei8_v_u64m2(const uint64_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -1428,7 +1428,7 @@ vuint64m2_t test_vluxei8_v_u64m2(const uint64_t *base, vuint8mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vluxei8_v_u64m4(const uint64_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -1438,7 +1438,7 @@ vuint64m4_t test_vluxei8_v_u64m4(const uint64_t *base, vuint8mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vluxei8_v_u64m8(const uint64_t *base, vuint8m1_t bindex, size_t vl) {
@@ -1448,7 +1448,7 @@ vuint64m8_t test_vluxei8_v_u64m8(const uint64_t *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vluxei16_v_u64m1(const uint64_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -1458,7 +1458,7 @@ vuint64m1_t test_vluxei16_v_u64m1(const uint64_t *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vluxei16_v_u64m2(const uint64_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -1468,7 +1468,7 @@ vuint64m2_t test_vluxei16_v_u64m2(const uint64_t *base, vuint16mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vluxei16_v_u64m4(const uint64_t *base, vuint16m1_t bindex, size_t vl) {
@@ -1478,7 +1478,7 @@ vuint64m4_t test_vluxei16_v_u64m4(const uint64_t *base, vuint16m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vluxei16_v_u64m8(const uint64_t *base, vuint16m2_t bindex, size_t vl) {
@@ -1488,7 +1488,7 @@ vuint64m8_t test_vluxei16_v_u64m8(const uint64_t *base, vuint16m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vluxei32_v_u64m1(const uint64_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -1498,7 +1498,7 @@ vuint64m1_t test_vluxei32_v_u64m1(const uint64_t *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei32_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vluxei32_v_u64m2(const uint64_t *base, vuint32m1_t bindex, size_t vl) {
@@ -1508,7 +1508,7 @@ vuint64m2_t test_vluxei32_v_u64m2(const uint64_t *base, vuint32m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vluxei32_v_u64m4(const uint64_t *base, vuint32m2_t bindex, size_t vl) {
@@ -1518,7 +1518,7 @@ vuint64m4_t test_vluxei32_v_u64m4(const uint64_t *base, vuint32m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vluxei32_v_u64m8(const uint64_t *base, vuint32m4_t bindex, size_t vl) {
@@ -1528,7 +1528,7 @@ vuint64m8_t test_vluxei32_v_u64m8(const uint64_t *base, vuint32m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vluxei64_v_u64m1(const uint64_t *base, vuint64m1_t bindex, size_t vl) {
@@ -1538,7 +1538,7 @@ vuint64m1_t test_vluxei64_v_u64m1(const uint64_t *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vluxei64_v_u64m2(const uint64_t *base, vuint64m2_t bindex, size_t vl) {
@@ -1548,7 +1548,7 @@ vuint64m2_t test_vluxei64_v_u64m2(const uint64_t *base, vuint64m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vluxei64_v_u64m4(const uint64_t *base, vuint64m4_t bindex, size_t vl) {
@@ -1558,7 +1558,7 @@ vuint64m4_t test_vluxei64_v_u64m4(const uint64_t *base, vuint64m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vluxei64_v_u64m8(const uint64_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1568,7 +1568,7 @@ vuint64m8_t test_vluxei64_v_u64m8(const uint64_t *base, vuint64m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei8_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i8.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i8.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vluxei8_v_f32mf2(const float *base, vuint8mf8_t bindex, size_t vl) {
@@ -1578,7 +1578,7 @@ vfloat32mf2_t test_vluxei8_v_f32mf2(const float *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i8.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i8.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vluxei8_v_f32m1(const float *base, vuint8mf4_t bindex, size_t vl) {
@@ -1588,7 +1588,7 @@ vfloat32m1_t test_vluxei8_v_f32m1(const float *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i8.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i8.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vluxei8_v_f32m2(const float *base, vuint8mf2_t bindex, size_t vl) {
@@ -1598,7 +1598,7 @@ vfloat32m2_t test_vluxei8_v_f32m2(const float *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vluxei8_v_f32m4(const float *base, vuint8m1_t bindex, size_t vl) {
@@ -1608,7 +1608,7 @@ vfloat32m4_t test_vluxei8_v_f32m4(const float *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8.i64(<vscale x 16 x float>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vluxei8_v_f32m8(const float *base, vuint8m2_t bindex, size_t vl) {
@@ -1618,7 +1618,7 @@ vfloat32m8_t test_vluxei8_v_f32m8(const float *base, vuint8m2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei16_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i16.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i16.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vluxei16_v_f32mf2(const float *base, vuint16mf4_t bindex, size_t vl) {
@@ -1628,7 +1628,7 @@ vfloat32mf2_t test_vluxei16_v_f32mf2(const float *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i16.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i16.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vluxei16_v_f32m1(const float *base, vuint16mf2_t bindex, size_t vl) {
@@ -1638,7 +1638,7 @@ vfloat32m1_t test_vluxei16_v_f32m1(const float *base, vuint16mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei16_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vluxei16_v_f32m2(const float *base, vuint16m1_t bindex, size_t vl) {
@@ -1648,7 +1648,7 @@ vfloat32m2_t test_vluxei16_v_f32m2(const float *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vluxei16_v_f32m4(const float *base, vuint16m2_t bindex, size_t vl) {
@@ -1658,7 +1658,7 @@ vfloat32m4_t test_vluxei16_v_f32m4(const float *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16.i64(<vscale x 16 x float>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vluxei16_v_f32m8(const float *base, vuint16m4_t bindex, size_t vl) {
@@ -1668,7 +1668,7 @@ vfloat32m8_t test_vluxei16_v_f32m8(const float *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i32.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i32.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vluxei32_v_f32mf2(const float *base, vuint32mf2_t bindex, size_t vl) {
@@ -1678,7 +1678,7 @@ vfloat32mf2_t test_vluxei32_v_f32mf2(const float *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei32_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i32.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i32.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vluxei32_v_f32m1(const float *base, vuint32m1_t bindex, size_t vl) {
@@ -1688,7 +1688,7 @@ vfloat32m1_t test_vluxei32_v_f32m1(const float *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i32.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i32.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vluxei32_v_f32m2(const float *base, vuint32m2_t bindex, size_t vl) {
@@ -1698,7 +1698,7 @@ vfloat32m2_t test_vluxei32_v_f32m2(const float *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i32.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i32.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vluxei32_v_f32m4(const float *base, vuint32m4_t bindex, size_t vl) {
@@ -1708,7 +1708,7 @@ vfloat32m4_t test_vluxei32_v_f32m4(const float *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i32.i64(<vscale x 16 x float>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i32.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vluxei32_v_f32m8(const float *base, vuint32m8_t bindex, size_t vl) {
@@ -1718,7 +1718,7 @@ vfloat32m8_t test_vluxei32_v_f32m8(const float *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i64.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i64.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vluxei64_v_f32mf2(const float *base, vuint64m1_t bindex, size_t vl) {
@@ -1728,7 +1728,7 @@ vfloat32mf2_t test_vluxei64_v_f32mf2(const float *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i64.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i64.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vluxei64_v_f32m1(const float *base, vuint64m2_t bindex, size_t vl) {
@@ -1738,7 +1738,7 @@ vfloat32m1_t test_vluxei64_v_f32m1(const float *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i64.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i64.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vluxei64_v_f32m2(const float *base, vuint64m4_t bindex, size_t vl) {
@@ -1748,7 +1748,7 @@ vfloat32m2_t test_vluxei64_v_f32m2(const float *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i64.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i64.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vluxei64_v_f32m4(const float *base, vuint64m8_t bindex, size_t vl) {
@@ -1758,7 +1758,7 @@ vfloat32m4_t test_vluxei64_v_f32m4(const float *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i8.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i8.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vluxei8_v_f64m1(const double *base, vuint8mf8_t bindex, size_t vl) {
@@ -1768,7 +1768,7 @@ vfloat64m1_t test_vluxei8_v_f64m1(const double *base, vuint8mf8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i8.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i8.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vluxei8_v_f64m2(const double *base, vuint8mf4_t bindex, size_t vl) {
@@ -1778,7 +1778,7 @@ vfloat64m2_t test_vluxei8_v_f64m2(const double *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i8.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i8.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vluxei8_v_f64m4(const double *base, vuint8mf2_t bindex, size_t vl) {
@@ -1788,7 +1788,7 @@ vfloat64m4_t test_vluxei8_v_f64m4(const double *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vluxei8_v_f64m8(const double *base, vuint8m1_t bindex, size_t vl) {
@@ -1798,7 +1798,7 @@ vfloat64m8_t test_vluxei8_v_f64m8(const double *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i16.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i16.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vluxei16_v_f64m1(const double *base, vuint16mf4_t bindex, size_t vl) {
@@ -1808,7 +1808,7 @@ vfloat64m1_t test_vluxei16_v_f64m1(const double *base, vuint16mf4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i16.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i16.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vluxei16_v_f64m2(const double *base, vuint16mf2_t bindex, size_t vl) {
@@ -1818,7 +1818,7 @@ vfloat64m2_t test_vluxei16_v_f64m2(const double *base, vuint16mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vluxei16_v_f64m4(const double *base, vuint16m1_t bindex, size_t vl) {
@@ -1828,7 +1828,7 @@ vfloat64m4_t test_vluxei16_v_f64m4(const double *base, vuint16m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei16_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vluxei16_v_f64m8(const double *base, vuint16m2_t bindex, size_t vl) {
@@ -1838,7 +1838,7 @@ vfloat64m8_t test_vluxei16_v_f64m8(const double *base, vuint16m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei32_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i32.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i32.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vluxei32_v_f64m1(const double *base, vuint32mf2_t bindex, size_t vl) {
@@ -1848,7 +1848,7 @@ vfloat64m1_t test_vluxei32_v_f64m1(const double *base, vuint32mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vluxei32_v_f64m2(const double *base, vuint32m1_t bindex, size_t vl) {
@@ -1858,7 +1858,7 @@ vfloat64m2_t test_vluxei32_v_f64m2(const double *base, vuint32m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei32_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vluxei32_v_f64m4(const double *base, vuint32m2_t bindex, size_t vl) {
@@ -1868,7 +1868,7 @@ vfloat64m4_t test_vluxei32_v_f64m4(const double *base, vuint32m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei32_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vluxei32_v_f64m8(const double *base, vuint32m4_t bindex, size_t vl) {
@@ -1878,7 +1878,7 @@ vfloat64m8_t test_vluxei32_v_f64m8(const double *base, vuint32m4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i64.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i64.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vluxei64_v_f64m1(const double *base, vuint64m1_t bindex, size_t vl) {
@@ -1888,7 +1888,7 @@ vfloat64m1_t test_vluxei64_v_f64m1(const double *base, vuint64m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i64.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i64.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vluxei64_v_f64m2(const double *base, vuint64m2_t bindex, size_t vl) {
@@ -1898,7 +1898,7 @@ vfloat64m2_t test_vluxei64_v_f64m2(const double *base, vuint64m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i64.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i64.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vluxei64_v_f64m4(const double *base, vuint64m4_t bindex, size_t vl) {
@@ -1908,7 +1908,7 @@ vfloat64m4_t test_vluxei64_v_f64m4(const double *base, vuint64m4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i64.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i64.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vluxei64_v_f64m8(const double *base, vuint64m8_t bindex, size_t vl) {
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c
index ca7ab81f4ec8a..14ba4f4eb7db1 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vle.c
@@ -9,7 +9,7 @@
 // CHECK-RV64-LABEL: @test_vle8_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vle8_v_i8mf8(const int8_t *base, size_t vl) {
@@ -19,7 +19,7 @@ vint8mf8_t test_vle8_v_i8mf8(const int8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vle.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vle.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vle8_v_i8mf4(const int8_t *base, size_t vl) {
@@ -29,7 +29,7 @@ vint8mf4_t test_vle8_v_i8mf4(const int8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vle.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vle.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vle8_v_i8mf2(const int8_t *base, size_t vl) {
@@ -39,7 +39,7 @@ vint8mf2_t test_vle8_v_i8mf2(const int8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vle8_v_i8m1(const int8_t *base, size_t vl) {
@@ -49,7 +49,7 @@ vint8m1_t test_vle8_v_i8m1(const int8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vle.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vle.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vle8_v_i8m2(const int8_t *base, size_t vl) {
@@ -59,7 +59,7 @@ vint8m2_t test_vle8_v_i8m2(const int8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8_v_i8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vle.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vle.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vint8m4_t test_vle8_v_i8m4(const int8_t *base, size_t vl) {
@@ -69,7 +69,7 @@ vint8m4_t test_vle8_v_i8m4(const int8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8_v_i8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP1]]
 //
 vint8m8_t test_vle8_v_i8m8(const int8_t *base, size_t vl) {
@@ -79,7 +79,7 @@ vint8m8_t test_vle8_v_i8m8(const int8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vle.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vle.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vle16_v_i16mf4(const int16_t *base, size_t vl) {
@@ -89,7 +89,7 @@ vint16mf4_t test_vle16_v_i16mf4(const int16_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vle16_v_i16mf2(const int16_t *base, size_t vl) {
@@ -99,7 +99,7 @@ vint16mf2_t test_vle16_v_i16mf2(const int16_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vle.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vle.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vle16_v_i16m1(const int16_t *base, size_t vl) {
@@ -109,7 +109,7 @@ vint16m1_t test_vle16_v_i16m1(const int16_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vle.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vle.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vle16_v_i16m2(const int16_t *base, size_t vl) {
@@ -119,7 +119,7 @@ vint16m2_t test_vle16_v_i16m2(const int16_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vle.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vle.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vle16_v_i16m4(const int16_t *base, size_t vl) {
@@ -129,7 +129,7 @@ vint16m4_t test_vle16_v_i16m4(const int16_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_i16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vint16m8_t test_vle16_v_i16m8(const int16_t *base, size_t vl) {
@@ -139,7 +139,7 @@ vint16m8_t test_vle16_v_i16m8(const int16_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vle32_v_i32mf2(const int32_t *base, size_t vl) {
@@ -149,7 +149,7 @@ vint32mf2_t test_vle32_v_i32mf2(const int32_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vle32_v_i32m1(const int32_t *base, size_t vl) {
@@ -159,7 +159,7 @@ vint32m1_t test_vle32_v_i32m1(const int32_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vle32_v_i32m2(const int32_t *base, size_t vl) {
@@ -169,7 +169,7 @@ vint32m2_t test_vle32_v_i32m2(const int32_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vle.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vle.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vle32_v_i32m4(const int32_t *base, size_t vl) {
@@ -179,7 +179,7 @@ vint32m4_t test_vle32_v_i32m4(const int32_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vle32_v_i32m8(const int32_t *base, size_t vl) {
@@ -189,7 +189,7 @@ vint32m8_t test_vle32_v_i32m8(const int32_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle64_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vle64_v_i64m1(const int64_t *base, size_t vl) {
@@ -199,7 +199,7 @@ vint64m1_t test_vle64_v_i64m1(const int64_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle64_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vle.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vle.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vle64_v_i64m2(const int64_t *base, size_t vl) {
@@ -209,7 +209,7 @@ vint64m2_t test_vle64_v_i64m2(const int64_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle64_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vle.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vle.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vle64_v_i64m4(const int64_t *base, size_t vl) {
@@ -219,7 +219,7 @@ vint64m4_t test_vle64_v_i64m4(const int64_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle64_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vle.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vle.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vle64_v_i64m8(const int64_t *base, size_t vl) {
@@ -229,7 +229,7 @@ vint64m8_t test_vle64_v_i64m8(const int64_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vle8_v_u8mf8(const uint8_t *base, size_t vl) {
@@ -239,7 +239,7 @@ vuint8mf8_t test_vle8_v_u8mf8(const uint8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vle.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vle.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vle8_v_u8mf4(const uint8_t *base, size_t vl) {
@@ -249,7 +249,7 @@ vuint8mf4_t test_vle8_v_u8mf4(const uint8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vle.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vle.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vle8_v_u8mf2(const uint8_t *base, size_t vl) {
@@ -259,7 +259,7 @@ vuint8mf2_t test_vle8_v_u8mf2(const uint8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vle8_v_u8m1(const uint8_t *base, size_t vl) {
@@ -269,7 +269,7 @@ vuint8m1_t test_vle8_v_u8m1(const uint8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vle.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vle.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vle8_v_u8m2(const uint8_t *base, size_t vl) {
@@ -279,7 +279,7 @@ vuint8m2_t test_vle8_v_u8m2(const uint8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8_v_u8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vle.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vle.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vuint8m4_t test_vle8_v_u8m4(const uint8_t *base, size_t vl) {
@@ -289,7 +289,7 @@ vuint8m4_t test_vle8_v_u8m4(const uint8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8_v_u8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP1]]
 //
 vuint8m8_t test_vle8_v_u8m8(const uint8_t *base, size_t vl) {
@@ -299,7 +299,7 @@ vuint8m8_t test_vle8_v_u8m8(const uint8_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vle.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vle.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vle16_v_u16mf4(const uint16_t *base, size_t vl) {
@@ -309,7 +309,7 @@ vuint16mf4_t test_vle16_v_u16mf4(const uint16_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vle16_v_u16mf2(const uint16_t *base, size_t vl) {
@@ -319,7 +319,7 @@ vuint16mf2_t test_vle16_v_u16mf2(const uint16_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vle.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vle.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vle16_v_u16m1(const uint16_t *base, size_t vl) {
@@ -329,7 +329,7 @@ vuint16m1_t test_vle16_v_u16m1(const uint16_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vle.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vle.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vle16_v_u16m2(const uint16_t *base, size_t vl) {
@@ -339,7 +339,7 @@ vuint16m2_t test_vle16_v_u16m2(const uint16_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vle.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vle.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vle16_v_u16m4(const uint16_t *base, size_t vl) {
@@ -349,7 +349,7 @@ vuint16m4_t test_vle16_v_u16m4(const uint16_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_u16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vuint16m8_t test_vle16_v_u16m8(const uint16_t *base, size_t vl) {
@@ -359,7 +359,7 @@ vuint16m8_t test_vle16_v_u16m8(const uint16_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vle32_v_u32mf2(const uint32_t *base, size_t vl) {
@@ -369,7 +369,7 @@ vuint32mf2_t test_vle32_v_u32mf2(const uint32_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vle32_v_u32m1(const uint32_t *base, size_t vl) {
@@ -379,7 +379,7 @@ vuint32m1_t test_vle32_v_u32m1(const uint32_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vle32_v_u32m2(const uint32_t *base, size_t vl) {
@@ -389,7 +389,7 @@ vuint32m2_t test_vle32_v_u32m2(const uint32_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vle.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vle.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vle32_v_u32m4(const uint32_t *base, size_t vl) {
@@ -399,7 +399,7 @@ vuint32m4_t test_vle32_v_u32m4(const uint32_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vle32_v_u32m8(const uint32_t *base, size_t vl) {
@@ -409,7 +409,7 @@ vuint32m8_t test_vle32_v_u32m8(const uint32_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle64_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vle64_v_u64m1(const uint64_t *base, size_t vl) {
@@ -419,7 +419,7 @@ vuint64m1_t test_vle64_v_u64m1(const uint64_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle64_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vle.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vle.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vle64_v_u64m2(const uint64_t *base, size_t vl) {
@@ -429,7 +429,7 @@ vuint64m2_t test_vle64_v_u64m2(const uint64_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle64_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vle.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vle.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vle64_v_u64m4(const uint64_t *base, size_t vl) {
@@ -439,7 +439,7 @@ vuint64m4_t test_vle64_v_u64m4(const uint64_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle64_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vle.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vle.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vle64_v_u64m8(const uint64_t *base, size_t vl) {
@@ -449,7 +449,7 @@ vuint64m8_t test_vle64_v_u64m8(const uint64_t *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vle.nxv1f32.i64(<vscale x 1 x float>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vle.nxv1f32.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vle32_v_f32mf2(const float *base, size_t vl) {
@@ -459,7 +459,7 @@ vfloat32mf2_t test_vle32_v_f32mf2(const float *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.i64(<vscale x 2 x float>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vle32_v_f32m1(const float *base, size_t vl) {
@@ -469,7 +469,7 @@ vfloat32m1_t test_vle32_v_f32m1(const float *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vle.nxv4f32.i64(<vscale x 4 x float>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vle.nxv4f32.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vle32_v_f32m2(const float *base, size_t vl) {
@@ -479,7 +479,7 @@ vfloat32m2_t test_vle32_v_f32m2(const float *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vle.nxv8f32.i64(<vscale x 8 x float>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vle.nxv8f32.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vle32_v_f32m4(const float *base, size_t vl) {
@@ -489,7 +489,7 @@ vfloat32m4_t test_vle32_v_f32m4(const float *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle32_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vle32_v_f32m8(const float *base, size_t vl) {
@@ -499,7 +499,7 @@ vfloat32m8_t test_vle32_v_f32m8(const float *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle64_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vle.nxv1f64.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vle64_v_f64m1(const double *base, size_t vl) {
@@ -509,7 +509,7 @@ vfloat64m1_t test_vle64_v_f64m1(const double *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle64_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vle.nxv2f64.i64(<vscale x 2 x double>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vle.nxv2f64.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vle64_v_f64m2(const double *base, size_t vl) {
@@ -519,7 +519,7 @@ vfloat64m2_t test_vle64_v_f64m2(const double *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle64_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vle.nxv4f64.i64(<vscale x 4 x double>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vle.nxv4f64.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vle64_v_f64m4(const double *base, size_t vl) {
@@ -529,7 +529,7 @@ vfloat64m4_t test_vle64_v_f64m4(const double *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle64_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vle.nxv8f64.i64(<vscale x 8 x double>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vle.nxv8f64.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vle64_v_f64m8(const double *base, size_t vl) {
@@ -1069,7 +1069,7 @@ vfloat64m8_t test_vle64_v_f64m8_m(vbool8_t mask, vfloat64m8_t maskedoff, const d
 // CHECK-RV64-LABEL: @test_vle16_v_f16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 1 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vle.nxv1f16.i64(<vscale x 1 x half>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vle.nxv1f16.i64(<vscale x 1 x half> undef, <vscale x 1 x half>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP1]]
 //
 vfloat16mf4_t test_vle16_v_f16mf4(const _Float16 *base, size_t vl) {
@@ -1079,7 +1079,7 @@ vfloat16mf4_t test_vle16_v_f16mf4(const _Float16 *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_f16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 2 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vle.nxv2f16.i64(<vscale x 2 x half>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vle.nxv2f16.i64(<vscale x 2 x half> undef, <vscale x 2 x half>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP1]]
 //
 vfloat16mf2_t test_vle16_v_f16mf2(const _Float16 *base, size_t vl) {
@@ -1089,7 +1089,7 @@ vfloat16mf2_t test_vle16_v_f16mf2(const _Float16 *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_f16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 4 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vle.nxv4f16.i64(<vscale x 4 x half>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vle.nxv4f16.i64(<vscale x 4 x half> undef, <vscale x 4 x half>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP1]]
 //
 vfloat16m1_t test_vle16_v_f16m1(const _Float16 *base, size_t vl) {
@@ -1099,7 +1099,7 @@ vfloat16m1_t test_vle16_v_f16m1(const _Float16 *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_f16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vle.nxv8f16.i64(<vscale x 8 x half>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vle.nxv8f16.i64(<vscale x 8 x half> undef, <vscale x 8 x half>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
 vfloat16m2_t test_vle16_v_f16m2(const _Float16 *base, size_t vl) {
@@ -1109,7 +1109,7 @@ vfloat16m2_t test_vle16_v_f16m2(const _Float16 *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_f16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 16 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vle.nxv16f16.i64(<vscale x 16 x half>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vle.nxv16f16.i64(<vscale x 16 x half> undef, <vscale x 16 x half>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP1]]
 //
 vfloat16m4_t test_vle16_v_f16m4(const _Float16 *base, size_t vl) {
@@ -1119,7 +1119,7 @@ vfloat16m4_t test_vle16_v_f16m4(const _Float16 *base, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle16_v_f16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 32 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.riscv.vle.nxv32f16.i64(<vscale x 32 x half>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.riscv.vle.nxv32f16.i64(<vscale x 32 x half> undef, <vscale x 32 x half>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP1]]
 //
 vfloat16m8_t test_vle16_v_f16m8(const _Float16 *base, size_t vl) {
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vleff.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vleff.c
index d88c6b40aedcb..e22efb6c34aae 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vleff.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vleff.c
@@ -9,7 +9,7 @@
 // CHECK-RV64-LABEL: @test_vle8ff_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i8>, i64 } @llvm.riscv.vleff.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i8>, i64 } @llvm.riscv.vleff.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 1 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 1 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -22,7 +22,7 @@ vint8mf8_t test_vle8ff_v_i8mf8 (const int8_t *base, size_t *new_vl, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8ff_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i8>, i64 } @llvm.riscv.vleff.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i8>, i64 } @llvm.riscv.vleff.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -35,7 +35,7 @@ vint8mf4_t test_vle8ff_v_i8mf4 (const int8_t *base, size_t *new_vl, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8ff_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i8>, i64 } @llvm.riscv.vleff.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i8>, i64 } @llvm.riscv.vleff.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -48,7 +48,7 @@ vint8mf2_t test_vle8ff_v_i8mf2 (const int8_t *base, size_t *new_vl, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8ff_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -61,7 +61,7 @@ vint8m1_t test_vle8ff_v_i8m1 (const int8_t *base, size_t *new_vl, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8ff_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x i8>, i64 } @llvm.riscv.vleff.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x i8>, i64 } @llvm.riscv.vleff.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -74,7 +74,7 @@ vint8m2_t test_vle8ff_v_i8m2 (const int8_t *base, size_t *new_vl, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8ff_v_i8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 32 x i8>, i64 } @llvm.riscv.vleff.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 32 x i8>, i64 } @llvm.riscv.vleff.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 32 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 32 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -87,7 +87,7 @@ vint8m4_t test_vle8ff_v_i8m4 (const int8_t *base, size_t *new_vl, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8ff_v_i8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 64 x i8>, i64 } @llvm.riscv.vleff.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 64 x i8>, i64 } @llvm.riscv.vleff.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 64 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 64 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -100,7 +100,7 @@ vint8m8_t test_vle8ff_v_i8m8 (const int8_t *base, size_t *new_vl, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8ff_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i8>, i64 } @llvm.riscv.vleff.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i8>, i64 } @llvm.riscv.vleff.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 1 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 1 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -113,7 +113,7 @@ vuint8mf8_t test_vle8ff_v_u8mf8 (const uint8_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle8ff_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i8>, i64 } @llvm.riscv.vleff.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i8>, i64 } @llvm.riscv.vleff.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -126,7 +126,7 @@ vuint8mf4_t test_vle8ff_v_u8mf4 (const uint8_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle8ff_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i8>, i64 } @llvm.riscv.vleff.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i8>, i64 } @llvm.riscv.vleff.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -139,7 +139,7 @@ vuint8mf2_t test_vle8ff_v_u8mf2 (const uint8_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle8ff_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -152,7 +152,7 @@ vuint8m1_t test_vle8ff_v_u8m1 (const uint8_t *base, size_t *new_vl, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8ff_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x i8>, i64 } @llvm.riscv.vleff.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x i8>, i64 } @llvm.riscv.vleff.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -165,7 +165,7 @@ vuint8m2_t test_vle8ff_v_u8m2 (const uint8_t *base, size_t *new_vl, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8ff_v_u8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 32 x i8>, i64 } @llvm.riscv.vleff.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 32 x i8>, i64 } @llvm.riscv.vleff.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 32 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 32 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -178,7 +178,7 @@ vuint8m4_t test_vle8ff_v_u8m4 (const uint8_t *base, size_t *new_vl, size_t vl) {
 // CHECK-RV64-LABEL: @test_vle8ff_v_u8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 64 x i8>, i64 } @llvm.riscv.vleff.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 64 x i8>, i64 } @llvm.riscv.vleff.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 64 x i8>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 64 x i8>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -373,7 +373,7 @@ vuint8m8_t test_vle8ff_v_u8m8_m (vbool1_t mask, vuint8m8_t maskedoff, const uint
 // CHECK-RV64-LABEL: @test_vle16ff_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i16>, i64 } @llvm.riscv.vleff.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i16>, i64 } @llvm.riscv.vleff.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 1 x i16>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 1 x i16>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -386,7 +386,7 @@ vint16mf4_t test_vle16ff_v_i16mf4 (const int16_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle16ff_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i16>, i64 } @llvm.riscv.vleff.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i16>, i64 } @llvm.riscv.vleff.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i16>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i16>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -399,7 +399,7 @@ vint16mf2_t test_vle16ff_v_i16mf2 (const int16_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle16ff_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i16>, i64 } @llvm.riscv.vleff.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i16>, i64 } @llvm.riscv.vleff.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i16>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i16>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -412,7 +412,7 @@ vint16m1_t test_vle16ff_v_i16m1 (const int16_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle16ff_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i16>, i64 } @llvm.riscv.vleff.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i16>, i64 } @llvm.riscv.vleff.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -425,7 +425,7 @@ vint16m2_t test_vle16ff_v_i16m2 (const int16_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle16ff_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x i16>, i64 } @llvm.riscv.vleff.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x i16>, i64 } @llvm.riscv.vleff.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i16>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i16>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -438,7 +438,7 @@ vint16m4_t test_vle16ff_v_i16m4 (const int16_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle16ff_v_i16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 32 x i16>, i64 } @llvm.riscv.vleff.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 32 x i16>, i64 } @llvm.riscv.vleff.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 32 x i16>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 32 x i16>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -451,7 +451,7 @@ vint16m8_t test_vle16ff_v_i16m8 (const int16_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle16ff_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i16>, i64 } @llvm.riscv.vleff.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i16>, i64 } @llvm.riscv.vleff.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 1 x i16>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 1 x i16>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -464,7 +464,7 @@ vuint16mf4_t test_vle16ff_v_u16mf4 (const uint16_t *base, size_t *new_vl, size_t
 // CHECK-RV64-LABEL: @test_vle16ff_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i16>, i64 } @llvm.riscv.vleff.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i16>, i64 } @llvm.riscv.vleff.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i16>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i16>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -477,7 +477,7 @@ vuint16mf2_t test_vle16ff_v_u16mf2 (const uint16_t *base, size_t *new_vl, size_t
 // CHECK-RV64-LABEL: @test_vle16ff_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i16>, i64 } @llvm.riscv.vleff.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i16>, i64 } @llvm.riscv.vleff.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i16>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i16>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -490,7 +490,7 @@ vuint16m1_t test_vle16ff_v_u16m1 (const uint16_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle16ff_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i16>, i64 } @llvm.riscv.vleff.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i16>, i64 } @llvm.riscv.vleff.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i16>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i16>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -503,7 +503,7 @@ vuint16m2_t test_vle16ff_v_u16m2 (const uint16_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle16ff_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x i16>, i64 } @llvm.riscv.vleff.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x i16>, i64 } @llvm.riscv.vleff.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i16>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i16>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -516,7 +516,7 @@ vuint16m4_t test_vle16ff_v_u16m4 (const uint16_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle16ff_v_u16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 32 x i16>, i64 } @llvm.riscv.vleff.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 32 x i16>, i64 } @llvm.riscv.vleff.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 32 x i16>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 32 x i16>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -685,7 +685,7 @@ vuint16m8_t test_vle16ff_v_u16m8_m (vbool2_t mask, vuint16m8_t maskedoff, const
 // CHECK-RV64-LABEL: @test_vle32ff_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i32>, i64 } @llvm.riscv.vleff.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i32>, i64 } @llvm.riscv.vleff.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 1 x i32>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 1 x i32>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -698,7 +698,7 @@ vint32mf2_t test_vle32ff_v_i32mf2 (const int32_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle32ff_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i32>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i32>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -711,7 +711,7 @@ vint32m1_t test_vle32ff_v_i32m1 (const int32_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle32ff_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i32>, i64 } @llvm.riscv.vleff.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i32>, i64 } @llvm.riscv.vleff.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -724,7 +724,7 @@ vint32m2_t test_vle32ff_v_i32m2 (const int32_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle32ff_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i32>, i64 } @llvm.riscv.vleff.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i32>, i64 } @llvm.riscv.vleff.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i32>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i32>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -737,7 +737,7 @@ vint32m4_t test_vle32ff_v_i32m4 (const int32_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle32ff_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x i32>, i64 } @llvm.riscv.vleff.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x i32>, i64 } @llvm.riscv.vleff.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i32>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i32>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -750,7 +750,7 @@ vint32m8_t test_vle32ff_v_i32m8 (const int32_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle32ff_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i32>, i64 } @llvm.riscv.vleff.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i32>, i64 } @llvm.riscv.vleff.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 1 x i32>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 1 x i32>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -763,7 +763,7 @@ vuint32mf2_t test_vle32ff_v_u32mf2 (const uint32_t *base, size_t *new_vl, size_t
 // CHECK-RV64-LABEL: @test_vle32ff_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i32>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i32>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -776,7 +776,7 @@ vuint32m1_t test_vle32ff_v_u32m1 (const uint32_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle32ff_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i32>, i64 } @llvm.riscv.vleff.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i32>, i64 } @llvm.riscv.vleff.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -789,7 +789,7 @@ vuint32m2_t test_vle32ff_v_u32m2 (const uint32_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle32ff_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i32>, i64 } @llvm.riscv.vleff.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i32>, i64 } @llvm.riscv.vleff.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i32>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i32>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -802,7 +802,7 @@ vuint32m4_t test_vle32ff_v_u32m4 (const uint32_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle32ff_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x i32>, i64 } @llvm.riscv.vleff.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x i32>, i64 } @llvm.riscv.vleff.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x i32>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x i32>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -815,7 +815,7 @@ vuint32m8_t test_vle32ff_v_u32m8 (const uint32_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle32ff_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x float>, i64 } @llvm.riscv.vleff.nxv1f32.i64(<vscale x 1 x float>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x float>, i64 } @llvm.riscv.vleff.nxv1f32.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 1 x float>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 1 x float>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -828,7 +828,7 @@ vfloat32mf2_t test_vle32ff_v_f32mf2 (const float *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle32ff_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x float>, i64 } @llvm.riscv.vleff.nxv2f32.i64(<vscale x 2 x float>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x float>, i64 } @llvm.riscv.vleff.nxv2f32.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x float>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x float>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -841,7 +841,7 @@ vfloat32m1_t test_vle32ff_v_f32m1 (const float *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle32ff_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x float>, i64 } @llvm.riscv.vleff.nxv4f32.i64(<vscale x 4 x float>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x float>, i64 } @llvm.riscv.vleff.nxv4f32.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x float>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -854,7 +854,7 @@ vfloat32m2_t test_vle32ff_v_f32m2 (const float *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle32ff_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x float>, i64 } @llvm.riscv.vleff.nxv8f32.i64(<vscale x 8 x float>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x float>, i64 } @llvm.riscv.vleff.nxv8f32.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x float>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x float>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -867,7 +867,7 @@ vfloat32m4_t test_vle32ff_v_f32m4 (const float *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle32ff_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x float>, i64 } @llvm.riscv.vleff.nxv16f32.i64(<vscale x 16 x float>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x float>, i64 } @llvm.riscv.vleff.nxv16f32.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x float>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x float>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1075,7 +1075,7 @@ vfloat32m8_t test_vle32ff_v_f32m8_m (vbool4_t mask, vfloat32m8_t maskedoff, cons
 // CHECK-RV64-LABEL: @test_vle64ff_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 1 x i64>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 1 x i64>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1088,7 +1088,7 @@ vint64m1_t test_vle64ff_v_i64m1 (const int64_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle64ff_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i64>, i64 } @llvm.riscv.vleff.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i64>, i64 } @llvm.riscv.vleff.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1101,7 +1101,7 @@ vint64m2_t test_vle64ff_v_i64m2 (const int64_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle64ff_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i64>, i64 } @llvm.riscv.vleff.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i64>, i64 } @llvm.riscv.vleff.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i64>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i64>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1114,7 +1114,7 @@ vint64m4_t test_vle64ff_v_i64m4 (const int64_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle64ff_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i64>, i64 } @llvm.riscv.vleff.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i64>, i64 } @llvm.riscv.vleff.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i64>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i64>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1127,7 +1127,7 @@ vint64m8_t test_vle64ff_v_i64m8 (const int64_t *base, size_t *new_vl, size_t vl)
 // CHECK-RV64-LABEL: @test_vle64ff_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 1 x i64>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 1 x i64>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1140,7 +1140,7 @@ vuint64m1_t test_vle64ff_v_u64m1 (const uint64_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle64ff_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i64>, i64 } @llvm.riscv.vleff.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x i64>, i64 } @llvm.riscv.vleff.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x i64>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x i64>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1153,7 +1153,7 @@ vuint64m2_t test_vle64ff_v_u64m2 (const uint64_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle64ff_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i64>, i64 } @llvm.riscv.vleff.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x i64>, i64 } @llvm.riscv.vleff.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x i64>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x i64>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1166,7 +1166,7 @@ vuint64m4_t test_vle64ff_v_u64m4 (const uint64_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle64ff_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i64>, i64 } @llvm.riscv.vleff.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x i64>, i64 } @llvm.riscv.vleff.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x i64>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x i64>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1179,7 +1179,7 @@ vuint64m8_t test_vle64ff_v_u64m8 (const uint64_t *base, size_t *new_vl, size_t v
 // CHECK-RV64-LABEL: @test_vle64ff_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.nxv1f64.i64(<vscale x 1 x double>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.nxv1f64.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 1 x double>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 1 x double>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1192,7 +1192,7 @@ vfloat64m1_t test_vle64ff_v_f64m1 (const double *base, size_t *new_vl, size_t vl
 // CHECK-RV64-LABEL: @test_vle64ff_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x double>, i64 } @llvm.riscv.vleff.nxv2f64.i64(<vscale x 2 x double>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x double>, i64 } @llvm.riscv.vleff.nxv2f64.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x double>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x double>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1205,7 +1205,7 @@ vfloat64m2_t test_vle64ff_v_f64m2 (const double *base, size_t *new_vl, size_t vl
 // CHECK-RV64-LABEL: @test_vle64ff_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x double>, i64 } @llvm.riscv.vleff.nxv4f64.i64(<vscale x 4 x double>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x double>, i64 } @llvm.riscv.vleff.nxv4f64.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x double>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x double>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1218,7 +1218,7 @@ vfloat64m4_t test_vle64ff_v_f64m4 (const double *base, size_t *new_vl, size_t vl
 // CHECK-RV64-LABEL: @test_vle64ff_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x double>, i64 } @llvm.riscv.vleff.nxv8f64.i64(<vscale x 8 x double>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x double>, i64 } @llvm.riscv.vleff.nxv8f64.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x double>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x double>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1387,7 +1387,7 @@ vfloat64m8_t test_vle64ff_v_f64m8_m (vbool8_t mask, vfloat64m8_t maskedoff, cons
 // CHECK-RV64-LABEL: @test_vle16ff_v_f16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 1 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x half>, i64 } @llvm.riscv.vleff.nxv1f16.i64(<vscale x 1 x half>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 1 x half>, i64 } @llvm.riscv.vleff.nxv1f16.i64(<vscale x 1 x half> undef, <vscale x 1 x half>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 1 x half>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 1 x half>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1400,7 +1400,7 @@ vfloat16mf4_t test_vle16ff_v_f16mf4 (const _Float16 *base, size_t *new_vl, size_
 // CHECK-RV64-LABEL: @test_vle16ff_v_f16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 2 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x half>, i64 } @llvm.riscv.vleff.nxv2f16.i64(<vscale x 2 x half>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 2 x half>, i64 } @llvm.riscv.vleff.nxv2f16.i64(<vscale x 2 x half> undef, <vscale x 2 x half>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 2 x half>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 2 x half>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1413,7 +1413,7 @@ vfloat16mf2_t test_vle16ff_v_f16mf2 (const _Float16 *base, size_t *new_vl, size_
 // CHECK-RV64-LABEL: @test_vle16ff_v_f16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 4 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x half>, i64 } @llvm.riscv.vleff.nxv4f16.i64(<vscale x 4 x half>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 4 x half>, i64 } @llvm.riscv.vleff.nxv4f16.i64(<vscale x 4 x half> undef, <vscale x 4 x half>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 4 x half>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 4 x half>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1426,7 +1426,7 @@ vfloat16m1_t test_vle16ff_v_f16m1 (const _Float16 *base, size_t *new_vl, size_t
 // CHECK-RV64-LABEL: @test_vle16ff_v_f16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x half>, i64 } @llvm.riscv.vleff.nxv8f16.i64(<vscale x 8 x half>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 8 x half>, i64 } @llvm.riscv.vleff.nxv8f16.i64(<vscale x 8 x half> undef, <vscale x 8 x half>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 8 x half>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 8 x half>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1439,7 +1439,7 @@ vfloat16m2_t test_vle16ff_v_f16m2 (const _Float16 *base, size_t *new_vl, size_t
 // CHECK-RV64-LABEL: @test_vle16ff_v_f16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 16 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x half>, i64 } @llvm.riscv.vleff.nxv16f16.i64(<vscale x 16 x half>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 16 x half>, i64 } @llvm.riscv.vleff.nxv16f16.i64(<vscale x 16 x half> undef, <vscale x 16 x half>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 16 x half>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 16 x half>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
@@ -1452,7 +1452,7 @@ vfloat16m4_t test_vle16ff_v_f16m4 (const _Float16 *base, size_t *new_vl, size_t
 // CHECK-RV64-LABEL: @test_vle16ff_v_f16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 32 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 32 x half>, i64 } @llvm.riscv.vleff.nxv32f16.i64(<vscale x 32 x half>* [[TMP0]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call { <vscale x 32 x half>, i64 } @llvm.riscv.vleff.nxv32f16.i64(<vscale x 32 x half> undef, <vscale x 32 x half>* [[TMP0]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    [[TMP2:%.*]] = extractvalue { <vscale x 32 x half>, i64 } [[TMP1]], 0
 // CHECK-RV64-NEXT:    [[TMP3:%.*]] = extractvalue { <vscale x 32 x half>, i64 } [[TMP1]], 1
 // CHECK-RV64-NEXT:    store i64 [[TMP3]], i64* [[NEW_VL:%.*]], align 8
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxei.c
index 328b3241ec2e5..21ffa76a19830 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxei.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vloxei.c
@@ -9,7 +9,7 @@
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vloxei8_v_i8mf8(const int8_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -19,7 +19,7 @@ vint8mf8_t test_vloxei8_v_i8mf8(const int8_t *base, vuint8mf8_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vloxei8_v_i8mf4(const int8_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -29,7 +29,7 @@ vint8mf4_t test_vloxei8_v_i8mf4(const int8_t *base, vuint8mf4_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vloxei8_v_i8mf2(const int8_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -39,7 +39,7 @@ vint8mf2_t test_vloxei8_v_i8mf2(const int8_t *base, vuint8mf2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vloxei8_v_i8m1(const int8_t *base, vuint8m1_t bindex, size_t vl) {
@@ -49,7 +49,7 @@ vint8m1_t test_vloxei8_v_i8m1(const int8_t *base, vuint8m1_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vloxei8_v_i8m2(const int8_t *base, vuint8m2_t bindex, size_t vl) {
@@ -59,7 +59,7 @@ vint8m2_t test_vloxei8_v_i8m2(const int8_t *base, vuint8m2_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vint8m4_t test_vloxei8_v_i8m4(const int8_t *base, vuint8m4_t bindex, size_t vl) {
@@ -69,7 +69,7 @@ vint8m4_t test_vloxei8_v_i8m4(const int8_t *base, vuint8m4_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vloxei8_v_i8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP1]]
 //
 vint8m8_t test_vloxei8_v_i8m8(const int8_t *base, vuint8m8_t bindex, size_t vl) {
@@ -79,7 +79,7 @@ vint8m8_t test_vloxei8_v_i8m8(const int8_t *base, vuint8m8_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vloxei16_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vloxei16_v_i8mf8(const int8_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -89,7 +89,7 @@ vint8mf8_t test_vloxei16_v_i8mf8(const int8_t *base, vuint16mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vloxei16_v_i8mf4(const int8_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -99,7 +99,7 @@ vint8mf4_t test_vloxei16_v_i8mf4(const int8_t *base, vuint16mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vloxei16_v_i8mf2(const int8_t *base, vuint16m1_t bindex, size_t vl) {
@@ -109,7 +109,7 @@ vint8mf2_t test_vloxei16_v_i8mf2(const int8_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vloxei16_v_i8m1(const int8_t *base, vuint16m2_t bindex, size_t vl) {
@@ -119,7 +119,7 @@ vint8m1_t test_vloxei16_v_i8m1(const int8_t *base, vuint16m2_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei16_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vloxei16_v_i8m2(const int8_t *base, vuint16m4_t bindex, size_t vl) {
@@ -129,7 +129,7 @@ vint8m2_t test_vloxei16_v_i8m2(const int8_t *base, vuint16m4_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei16_v_i8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vint8m4_t test_vloxei16_v_i8m4(const int8_t *base, vuint16m8_t bindex, size_t vl) {
@@ -139,7 +139,7 @@ vint8m4_t test_vloxei16_v_i8m4(const int8_t *base, vuint16m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei32_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vloxei32_v_i8mf8(const int8_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -149,7 +149,7 @@ vint8mf8_t test_vloxei32_v_i8mf8(const int8_t *base, vuint32mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vloxei32_v_i8mf4(const int8_t *base, vuint32m1_t bindex, size_t vl) {
@@ -159,7 +159,7 @@ vint8mf4_t test_vloxei32_v_i8mf4(const int8_t *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vloxei32_v_i8mf2(const int8_t *base, vuint32m2_t bindex, size_t vl) {
@@ -169,7 +169,7 @@ vint8mf2_t test_vloxei32_v_i8mf2(const int8_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vloxei32_v_i8m1(const int8_t *base, vuint32m4_t bindex, size_t vl) {
@@ -179,7 +179,7 @@ vint8m1_t test_vloxei32_v_i8m1(const int8_t *base, vuint32m4_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei32_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vloxei32_v_i8m2(const int8_t *base, vuint32m8_t bindex, size_t vl) {
@@ -189,7 +189,7 @@ vint8m2_t test_vloxei32_v_i8m2(const int8_t *base, vuint32m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei64_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vloxei64_v_i8mf8(const int8_t *base, vuint64m1_t bindex, size_t vl) {
@@ -199,7 +199,7 @@ vint8mf8_t test_vloxei64_v_i8mf8(const int8_t *base, vuint64m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vloxei64_v_i8mf4(const int8_t *base, vuint64m2_t bindex, size_t vl) {
@@ -209,7 +209,7 @@ vint8mf4_t test_vloxei64_v_i8mf4(const int8_t *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vloxei64_v_i8mf2(const int8_t *base, vuint64m4_t bindex, size_t vl) {
@@ -219,7 +219,7 @@ vint8mf2_t test_vloxei64_v_i8mf2(const int8_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vloxei64_v_i8m1(const int8_t *base, vuint64m8_t bindex, size_t vl) {
@@ -229,7 +229,7 @@ vint8m1_t test_vloxei64_v_i8m1(const int8_t *base, vuint64m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei8_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vloxei8_v_i16mf4(const int16_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -239,7 +239,7 @@ vint16mf4_t test_vloxei8_v_i16mf4(const int16_t *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vloxei8_v_i16mf2(const int16_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -249,7 +249,7 @@ vint16mf2_t test_vloxei8_v_i16mf2(const int16_t *base, vuint8mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vloxei8_v_i16m1(const int16_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -259,7 +259,7 @@ vint16m1_t test_vloxei8_v_i16m1(const int16_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vloxei8_v_i16m2(const int16_t *base, vuint8m1_t bindex, size_t vl) {
@@ -269,7 +269,7 @@ vint16m2_t test_vloxei8_v_i16m2(const int16_t *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vloxei8_v_i16m4(const int16_t *base, vuint8m2_t bindex, size_t vl) {
@@ -279,7 +279,7 @@ vint16m4_t test_vloxei8_v_i16m4(const int16_t *base, vuint8m2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_i16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vint16m8_t test_vloxei8_v_i16m8(const int16_t *base, vuint8m4_t bindex, size_t vl) {
@@ -289,7 +289,7 @@ vint16m8_t test_vloxei8_v_i16m8(const int16_t *base, vuint8m4_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei16_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vloxei16_v_i16mf4(const int16_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -299,7 +299,7 @@ vint16mf4_t test_vloxei16_v_i16mf4(const int16_t *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vloxei16_v_i16mf2(const int16_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -309,7 +309,7 @@ vint16mf2_t test_vloxei16_v_i16mf2(const int16_t *base, vuint16mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vloxei16_v_i16m1(const int16_t *base, vuint16m1_t bindex, size_t vl) {
@@ -319,7 +319,7 @@ vint16m1_t test_vloxei16_v_i16m1(const int16_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vloxei16_v_i16m2(const int16_t *base, vuint16m2_t bindex, size_t vl) {
@@ -329,7 +329,7 @@ vint16m2_t test_vloxei16_v_i16m2(const int16_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vloxei16_v_i16m4(const int16_t *base, vuint16m4_t bindex, size_t vl) {
@@ -339,7 +339,7 @@ vint16m4_t test_vloxei16_v_i16m4(const int16_t *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vint16m8_t test_vloxei16_v_i16m8(const int16_t *base, vuint16m8_t bindex, size_t vl) {
@@ -349,7 +349,7 @@ vint16m8_t test_vloxei16_v_i16m8(const int16_t *base, vuint16m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vloxei32_v_i16mf4(const int16_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -359,7 +359,7 @@ vint16mf4_t test_vloxei32_v_i16mf4(const int16_t *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei32_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vloxei32_v_i16mf2(const int16_t *base, vuint32m1_t bindex, size_t vl) {
@@ -369,7 +369,7 @@ vint16mf2_t test_vloxei32_v_i16mf2(const int16_t *base, vuint32m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vloxei32_v_i16m1(const int16_t *base, vuint32m2_t bindex, size_t vl) {
@@ -379,7 +379,7 @@ vint16m1_t test_vloxei32_v_i16m1(const int16_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vloxei32_v_i16m2(const int16_t *base, vuint32m4_t bindex, size_t vl) {
@@ -389,7 +389,7 @@ vint16m2_t test_vloxei32_v_i16m2(const int16_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vloxei32_v_i16m4(const int16_t *base, vuint32m8_t bindex, size_t vl) {
@@ -399,7 +399,7 @@ vint16m4_t test_vloxei32_v_i16m4(const int16_t *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vloxei64_v_i16mf4(const int16_t *base, vuint64m1_t bindex, size_t vl) {
@@ -409,7 +409,7 @@ vint16mf4_t test_vloxei64_v_i16mf4(const int16_t *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vloxei64_v_i16mf2(const int16_t *base, vuint64m2_t bindex, size_t vl) {
@@ -419,7 +419,7 @@ vint16mf2_t test_vloxei64_v_i16mf2(const int16_t *base, vuint64m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vloxei64_v_i16m1(const int16_t *base, vuint64m4_t bindex, size_t vl) {
@@ -429,7 +429,7 @@ vint16m1_t test_vloxei64_v_i16m1(const int16_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vloxei64_v_i16m2(const int16_t *base, vuint64m8_t bindex, size_t vl) {
@@ -439,7 +439,7 @@ vint16m2_t test_vloxei64_v_i16m2(const int16_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vloxei8_v_i32mf2(const int32_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -449,7 +449,7 @@ vint32mf2_t test_vloxei8_v_i32mf2(const int32_t *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vloxei8_v_i32m1(const int32_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -459,7 +459,7 @@ vint32m1_t test_vloxei8_v_i32m1(const int32_t *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vloxei8_v_i32m2(const int32_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -469,7 +469,7 @@ vint32m2_t test_vloxei8_v_i32m2(const int32_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vloxei8_v_i32m4(const int32_t *base, vuint8m1_t bindex, size_t vl) {
@@ -479,7 +479,7 @@ vint32m4_t test_vloxei8_v_i32m4(const int32_t *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vloxei8_v_i32m8(const int32_t *base, vuint8m2_t bindex, size_t vl) {
@@ -489,7 +489,7 @@ vint32m8_t test_vloxei8_v_i32m8(const int32_t *base, vuint8m2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei16_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vloxei16_v_i32mf2(const int32_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -499,7 +499,7 @@ vint32mf2_t test_vloxei16_v_i32mf2(const int32_t *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vloxei16_v_i32m1(const int32_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -509,7 +509,7 @@ vint32m1_t test_vloxei16_v_i32m1(const int32_t *base, vuint16mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei16_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vloxei16_v_i32m2(const int32_t *base, vuint16m1_t bindex, size_t vl) {
@@ -519,7 +519,7 @@ vint32m2_t test_vloxei16_v_i32m2(const int32_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vloxei16_v_i32m4(const int32_t *base, vuint16m2_t bindex, size_t vl) {
@@ -529,7 +529,7 @@ vint32m4_t test_vloxei16_v_i32m4(const int32_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vloxei16_v_i32m8(const int32_t *base, vuint16m4_t bindex, size_t vl) {
@@ -539,7 +539,7 @@ vint32m8_t test_vloxei16_v_i32m8(const int32_t *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vloxei32_v_i32mf2(const int32_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -549,7 +549,7 @@ vint32mf2_t test_vloxei32_v_i32mf2(const int32_t *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei32_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vloxei32_v_i32m1(const int32_t *base, vuint32m1_t bindex, size_t vl) {
@@ -559,7 +559,7 @@ vint32m1_t test_vloxei32_v_i32m1(const int32_t *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vloxei32_v_i32m2(const int32_t *base, vuint32m2_t bindex, size_t vl) {
@@ -569,7 +569,7 @@ vint32m2_t test_vloxei32_v_i32m2(const int32_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vloxei32_v_i32m4(const int32_t *base, vuint32m4_t bindex, size_t vl) {
@@ -579,7 +579,7 @@ vint32m4_t test_vloxei32_v_i32m4(const int32_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vloxei32_v_i32m8(const int32_t *base, vuint32m8_t bindex, size_t vl) {
@@ -589,7 +589,7 @@ vint32m8_t test_vloxei32_v_i32m8(const int32_t *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vloxei64_v_i32mf2(const int32_t *base, vuint64m1_t bindex, size_t vl) {
@@ -599,7 +599,7 @@ vint32mf2_t test_vloxei64_v_i32mf2(const int32_t *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vloxei64_v_i32m1(const int32_t *base, vuint64m2_t bindex, size_t vl) {
@@ -609,7 +609,7 @@ vint32m1_t test_vloxei64_v_i32m1(const int32_t *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vloxei64_v_i32m2(const int32_t *base, vuint64m4_t bindex, size_t vl) {
@@ -619,7 +619,7 @@ vint32m2_t test_vloxei64_v_i32m2(const int32_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vloxei64_v_i32m4(const int32_t *base, vuint64m8_t bindex, size_t vl) {
@@ -629,7 +629,7 @@ vint32m4_t test_vloxei64_v_i32m4(const int32_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vloxei8_v_i64m1(const int64_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -639,7 +639,7 @@ vint64m1_t test_vloxei8_v_i64m1(const int64_t *base, vuint8mf8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vloxei8_v_i64m2(const int64_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -649,7 +649,7 @@ vint64m2_t test_vloxei8_v_i64m2(const int64_t *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vloxei8_v_i64m4(const int64_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -659,7 +659,7 @@ vint64m4_t test_vloxei8_v_i64m4(const int64_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vloxei8_v_i64m8(const int64_t *base, vuint8m1_t bindex, size_t vl) {
@@ -669,7 +669,7 @@ vint64m8_t test_vloxei8_v_i64m8(const int64_t *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei16_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vloxei16_v_i64m1(const int64_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -679,7 +679,7 @@ vint64m1_t test_vloxei16_v_i64m1(const int64_t *base, vuint16mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei16_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vloxei16_v_i64m2(const int64_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -689,7 +689,7 @@ vint64m2_t test_vloxei16_v_i64m2(const int64_t *base, vuint16mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei16_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vloxei16_v_i64m4(const int64_t *base, vuint16m1_t bindex, size_t vl) {
@@ -699,7 +699,7 @@ vint64m4_t test_vloxei16_v_i64m4(const int64_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vloxei16_v_i64m8(const int64_t *base, vuint16m2_t bindex, size_t vl) {
@@ -709,7 +709,7 @@ vint64m8_t test_vloxei16_v_i64m8(const int64_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vloxei32_v_i64m1(const int64_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -719,7 +719,7 @@ vint64m1_t test_vloxei32_v_i64m1(const int64_t *base, vuint32mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei32_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vloxei32_v_i64m2(const int64_t *base, vuint32m1_t bindex, size_t vl) {
@@ -729,7 +729,7 @@ vint64m2_t test_vloxei32_v_i64m2(const int64_t *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vloxei32_v_i64m4(const int64_t *base, vuint32m2_t bindex, size_t vl) {
@@ -739,7 +739,7 @@ vint64m4_t test_vloxei32_v_i64m4(const int64_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vloxei32_v_i64m8(const int64_t *base, vuint32m4_t bindex, size_t vl) {
@@ -749,7 +749,7 @@ vint64m8_t test_vloxei32_v_i64m8(const int64_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vloxei64_v_i64m1(const int64_t *base, vuint64m1_t bindex, size_t vl) {
@@ -759,7 +759,7 @@ vint64m1_t test_vloxei64_v_i64m1(const int64_t *base, vuint64m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vloxei64_v_i64m2(const int64_t *base, vuint64m2_t bindex, size_t vl) {
@@ -769,7 +769,7 @@ vint64m2_t test_vloxei64_v_i64m2(const int64_t *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vloxei64_v_i64m4(const int64_t *base, vuint64m4_t bindex, size_t vl) {
@@ -779,7 +779,7 @@ vint64m4_t test_vloxei64_v_i64m4(const int64_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vloxei64_v_i64m8(const int64_t *base, vuint64m8_t bindex, size_t vl) {
@@ -789,7 +789,7 @@ vint64m8_t test_vloxei64_v_i64m8(const int64_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vloxei8_v_u8mf8(const uint8_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -799,7 +799,7 @@ vuint8mf8_t test_vloxei8_v_u8mf8(const uint8_t *base, vuint8mf8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vloxei8_v_u8mf4(const uint8_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -809,7 +809,7 @@ vuint8mf4_t test_vloxei8_v_u8mf4(const uint8_t *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vloxei8_v_u8mf2(const uint8_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -819,7 +819,7 @@ vuint8mf2_t test_vloxei8_v_u8mf2(const uint8_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vloxei8_v_u8m1(const uint8_t *base, vuint8m1_t bindex, size_t vl) {
@@ -829,7 +829,7 @@ vuint8m1_t test_vloxei8_v_u8m1(const uint8_t *base, vuint8m1_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vloxei8_v_u8m2(const uint8_t *base, vuint8m2_t bindex, size_t vl) {
@@ -839,7 +839,7 @@ vuint8m2_t test_vloxei8_v_u8m2(const uint8_t *base, vuint8m2_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vuint8m4_t test_vloxei8_v_u8m4(const uint8_t *base, vuint8m4_t bindex, size_t vl) {
@@ -849,7 +849,7 @@ vuint8m4_t test_vloxei8_v_u8m4(const uint8_t *base, vuint8m4_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei8_v_u8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP1]]
 //
 vuint8m8_t test_vloxei8_v_u8m8(const uint8_t *base, vuint8m8_t bindex, size_t vl) {
@@ -859,7 +859,7 @@ vuint8m8_t test_vloxei8_v_u8m8(const uint8_t *base, vuint8m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vloxei16_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vloxei16_v_u8mf8(const uint8_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -869,7 +869,7 @@ vuint8mf8_t test_vloxei16_v_u8mf8(const uint8_t *base, vuint16mf4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vloxei16_v_u8mf4(const uint8_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -879,7 +879,7 @@ vuint8mf4_t test_vloxei16_v_u8mf4(const uint8_t *base, vuint16mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vloxei16_v_u8mf2(const uint8_t *base, vuint16m1_t bindex, size_t vl) {
@@ -889,7 +889,7 @@ vuint8mf2_t test_vloxei16_v_u8mf2(const uint8_t *base, vuint16m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei16_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vloxei16_v_u8m1(const uint8_t *base, vuint16m2_t bindex, size_t vl) {
@@ -899,7 +899,7 @@ vuint8m1_t test_vloxei16_v_u8m1(const uint8_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vloxei16_v_u8m2(const uint8_t *base, vuint16m4_t bindex, size_t vl) {
@@ -909,7 +909,7 @@ vuint8m2_t test_vloxei16_v_u8m2(const uint8_t *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_u8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vuint8m4_t test_vloxei16_v_u8m4(const uint8_t *base, vuint16m8_t bindex, size_t vl) {
@@ -919,7 +919,7 @@ vuint8m4_t test_vloxei16_v_u8m4(const uint8_t *base, vuint16m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vloxei32_v_u8mf8(const uint8_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -929,7 +929,7 @@ vuint8mf8_t test_vloxei32_v_u8mf8(const uint8_t *base, vuint32mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vloxei32_v_u8mf4(const uint8_t *base, vuint32m1_t bindex, size_t vl) {
@@ -939,7 +939,7 @@ vuint8mf4_t test_vloxei32_v_u8mf4(const uint8_t *base, vuint32m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei32_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vloxei32_v_u8mf2(const uint8_t *base, vuint32m2_t bindex, size_t vl) {
@@ -949,7 +949,7 @@ vuint8mf2_t test_vloxei32_v_u8mf2(const uint8_t *base, vuint32m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei32_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vloxei32_v_u8m1(const uint8_t *base, vuint32m4_t bindex, size_t vl) {
@@ -959,7 +959,7 @@ vuint8m1_t test_vloxei32_v_u8m1(const uint8_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vloxei32_v_u8m2(const uint8_t *base, vuint32m8_t bindex, size_t vl) {
@@ -969,7 +969,7 @@ vuint8m2_t test_vloxei32_v_u8m2(const uint8_t *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vloxei64_v_u8mf8(const uint8_t *base, vuint64m1_t bindex, size_t vl) {
@@ -979,7 +979,7 @@ vuint8mf8_t test_vloxei64_v_u8mf8(const uint8_t *base, vuint64m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vloxei64_v_u8mf4(const uint8_t *base, vuint64m2_t bindex, size_t vl) {
@@ -989,7 +989,7 @@ vuint8mf4_t test_vloxei64_v_u8mf4(const uint8_t *base, vuint64m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vloxei64_v_u8mf2(const uint8_t *base, vuint64m4_t bindex, size_t vl) {
@@ -999,7 +999,7 @@ vuint8mf2_t test_vloxei64_v_u8mf2(const uint8_t *base, vuint64m4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vloxei64_v_u8m1(const uint8_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1009,7 +1009,7 @@ vuint8m1_t test_vloxei64_v_u8m1(const uint8_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vloxei8_v_u16mf4(const uint16_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -1019,7 +1019,7 @@ vuint16mf4_t test_vloxei8_v_u16mf4(const uint16_t *base, vuint8mf8_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei8_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vloxei8_v_u16mf2(const uint16_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -1029,7 +1029,7 @@ vuint16mf2_t test_vloxei8_v_u16mf2(const uint16_t *base, vuint8mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei8_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vloxei8_v_u16m1(const uint16_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -1039,7 +1039,7 @@ vuint16m1_t test_vloxei8_v_u16m1(const uint16_t *base, vuint8mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vloxei8_v_u16m2(const uint16_t *base, vuint8m1_t bindex, size_t vl) {
@@ -1049,7 +1049,7 @@ vuint16m2_t test_vloxei8_v_u16m2(const uint16_t *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vloxei8_v_u16m4(const uint16_t *base, vuint8m2_t bindex, size_t vl) {
@@ -1059,7 +1059,7 @@ vuint16m4_t test_vloxei8_v_u16m4(const uint16_t *base, vuint8m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vuint16m8_t test_vloxei8_v_u16m8(const uint16_t *base, vuint8m4_t bindex, size_t vl) {
@@ -1069,7 +1069,7 @@ vuint16m8_t test_vloxei8_v_u16m8(const uint16_t *base, vuint8m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vloxei16_v_u16mf4(const uint16_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -1079,7 +1079,7 @@ vuint16mf4_t test_vloxei16_v_u16mf4(const uint16_t *base, vuint16mf4_t bindex, s
 // CHECK-RV64-LABEL: @test_vloxei16_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vloxei16_v_u16mf2(const uint16_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -1089,7 +1089,7 @@ vuint16mf2_t test_vloxei16_v_u16mf2(const uint16_t *base, vuint16mf2_t bindex, s
 // CHECK-RV64-LABEL: @test_vloxei16_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vloxei16_v_u16m1(const uint16_t *base, vuint16m1_t bindex, size_t vl) {
@@ -1099,7 +1099,7 @@ vuint16m1_t test_vloxei16_v_u16m1(const uint16_t *base, vuint16m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vloxei16_v_u16m2(const uint16_t *base, vuint16m2_t bindex, size_t vl) {
@@ -1109,7 +1109,7 @@ vuint16m2_t test_vloxei16_v_u16m2(const uint16_t *base, vuint16m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vloxei16_v_u16m4(const uint16_t *base, vuint16m4_t bindex, size_t vl) {
@@ -1119,7 +1119,7 @@ vuint16m4_t test_vloxei16_v_u16m4(const uint16_t *base, vuint16m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vuint16m8_t test_vloxei16_v_u16m8(const uint16_t *base, vuint16m8_t bindex, size_t vl) {
@@ -1129,7 +1129,7 @@ vuint16m8_t test_vloxei16_v_u16m8(const uint16_t *base, vuint16m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vloxei32_v_u16mf4(const uint16_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -1139,7 +1139,7 @@ vuint16mf4_t test_vloxei32_v_u16mf4(const uint16_t *base, vuint32mf2_t bindex, s
 // CHECK-RV64-LABEL: @test_vloxei32_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vloxei32_v_u16mf2(const uint16_t *base, vuint32m1_t bindex, size_t vl) {
@@ -1149,7 +1149,7 @@ vuint16mf2_t test_vloxei32_v_u16mf2(const uint16_t *base, vuint32m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei32_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vloxei32_v_u16m1(const uint16_t *base, vuint32m2_t bindex, size_t vl) {
@@ -1159,7 +1159,7 @@ vuint16m1_t test_vloxei32_v_u16m1(const uint16_t *base, vuint32m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vloxei32_v_u16m2(const uint16_t *base, vuint32m4_t bindex, size_t vl) {
@@ -1169,7 +1169,7 @@ vuint16m2_t test_vloxei32_v_u16m2(const uint16_t *base, vuint32m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vloxei32_v_u16m4(const uint16_t *base, vuint32m8_t bindex, size_t vl) {
@@ -1179,7 +1179,7 @@ vuint16m4_t test_vloxei32_v_u16m4(const uint16_t *base, vuint32m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vloxei64_v_u16mf4(const uint16_t *base, vuint64m1_t bindex, size_t vl) {
@@ -1189,7 +1189,7 @@ vuint16mf4_t test_vloxei64_v_u16mf4(const uint16_t *base, vuint64m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei64_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vloxei64_v_u16mf2(const uint16_t *base, vuint64m2_t bindex, size_t vl) {
@@ -1199,7 +1199,7 @@ vuint16mf2_t test_vloxei64_v_u16mf2(const uint16_t *base, vuint64m2_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei64_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vloxei64_v_u16m1(const uint16_t *base, vuint64m4_t bindex, size_t vl) {
@@ -1209,7 +1209,7 @@ vuint16m1_t test_vloxei64_v_u16m1(const uint16_t *base, vuint64m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vloxei64_v_u16m2(const uint16_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1219,7 +1219,7 @@ vuint16m2_t test_vloxei64_v_u16m2(const uint16_t *base, vuint64m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei8_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vloxei8_v_u32mf2(const uint32_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -1229,7 +1229,7 @@ vuint32mf2_t test_vloxei8_v_u32mf2(const uint32_t *base, vuint8mf8_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei8_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vloxei8_v_u32m1(const uint32_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -1239,7 +1239,7 @@ vuint32m1_t test_vloxei8_v_u32m1(const uint32_t *base, vuint8mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vloxei8_v_u32m2(const uint32_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -1249,7 +1249,7 @@ vuint32m2_t test_vloxei8_v_u32m2(const uint32_t *base, vuint8mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vloxei8_v_u32m4(const uint32_t *base, vuint8m1_t bindex, size_t vl) {
@@ -1259,7 +1259,7 @@ vuint32m4_t test_vloxei8_v_u32m4(const uint32_t *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vloxei8_v_u32m8(const uint32_t *base, vuint8m2_t bindex, size_t vl) {
@@ -1269,7 +1269,7 @@ vuint32m8_t test_vloxei8_v_u32m8(const uint32_t *base, vuint8m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vloxei16_v_u32mf2(const uint32_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -1279,7 +1279,7 @@ vuint32mf2_t test_vloxei16_v_u32mf2(const uint32_t *base, vuint16mf4_t bindex, s
 // CHECK-RV64-LABEL: @test_vloxei16_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vloxei16_v_u32m1(const uint32_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -1289,7 +1289,7 @@ vuint32m1_t test_vloxei16_v_u32m1(const uint32_t *base, vuint16mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vloxei16_v_u32m2(const uint32_t *base, vuint16m1_t bindex, size_t vl) {
@@ -1299,7 +1299,7 @@ vuint32m2_t test_vloxei16_v_u32m2(const uint32_t *base, vuint16m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vloxei16_v_u32m4(const uint32_t *base, vuint16m2_t bindex, size_t vl) {
@@ -1309,7 +1309,7 @@ vuint32m4_t test_vloxei16_v_u32m4(const uint32_t *base, vuint16m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vloxei16_v_u32m8(const uint32_t *base, vuint16m4_t bindex, size_t vl) {
@@ -1319,7 +1319,7 @@ vuint32m8_t test_vloxei16_v_u32m8(const uint32_t *base, vuint16m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vloxei32_v_u32mf2(const uint32_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -1329,7 +1329,7 @@ vuint32mf2_t test_vloxei32_v_u32mf2(const uint32_t *base, vuint32mf2_t bindex, s
 // CHECK-RV64-LABEL: @test_vloxei32_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vloxei32_v_u32m1(const uint32_t *base, vuint32m1_t bindex, size_t vl) {
@@ -1339,7 +1339,7 @@ vuint32m1_t test_vloxei32_v_u32m1(const uint32_t *base, vuint32m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vloxei32_v_u32m2(const uint32_t *base, vuint32m2_t bindex, size_t vl) {
@@ -1349,7 +1349,7 @@ vuint32m2_t test_vloxei32_v_u32m2(const uint32_t *base, vuint32m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vloxei32_v_u32m4(const uint32_t *base, vuint32m4_t bindex, size_t vl) {
@@ -1359,7 +1359,7 @@ vuint32m4_t test_vloxei32_v_u32m4(const uint32_t *base, vuint32m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vloxei32_v_u32m8(const uint32_t *base, vuint32m8_t bindex, size_t vl) {
@@ -1369,7 +1369,7 @@ vuint32m8_t test_vloxei32_v_u32m8(const uint32_t *base, vuint32m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vloxei64_v_u32mf2(const uint32_t *base, vuint64m1_t bindex, size_t vl) {
@@ -1379,7 +1379,7 @@ vuint32mf2_t test_vloxei64_v_u32mf2(const uint32_t *base, vuint64m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei64_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vloxei64_v_u32m1(const uint32_t *base, vuint64m2_t bindex, size_t vl) {
@@ -1389,7 +1389,7 @@ vuint32m1_t test_vloxei64_v_u32m1(const uint32_t *base, vuint64m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vloxei64_v_u32m2(const uint32_t *base, vuint64m4_t bindex, size_t vl) {
@@ -1399,7 +1399,7 @@ vuint32m2_t test_vloxei64_v_u32m2(const uint32_t *base, vuint64m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vloxei64_v_u32m4(const uint32_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1409,7 +1409,7 @@ vuint32m4_t test_vloxei64_v_u32m4(const uint32_t *base, vuint64m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei8_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vloxei8_v_u64m1(const uint64_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -1419,7 +1419,7 @@ vuint64m1_t test_vloxei8_v_u64m1(const uint64_t *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vloxei8_v_u64m2(const uint64_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -1429,7 +1429,7 @@ vuint64m2_t test_vloxei8_v_u64m2(const uint64_t *base, vuint8mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vloxei8_v_u64m4(const uint64_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -1439,7 +1439,7 @@ vuint64m4_t test_vloxei8_v_u64m4(const uint64_t *base, vuint8mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vloxei8_v_u64m8(const uint64_t *base, vuint8m1_t bindex, size_t vl) {
@@ -1449,7 +1449,7 @@ vuint64m8_t test_vloxei8_v_u64m8(const uint64_t *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vloxei16_v_u64m1(const uint64_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -1459,7 +1459,7 @@ vuint64m1_t test_vloxei16_v_u64m1(const uint64_t *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vloxei16_v_u64m2(const uint64_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -1469,7 +1469,7 @@ vuint64m2_t test_vloxei16_v_u64m2(const uint64_t *base, vuint16mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vloxei16_v_u64m4(const uint64_t *base, vuint16m1_t bindex, size_t vl) {
@@ -1479,7 +1479,7 @@ vuint64m4_t test_vloxei16_v_u64m4(const uint64_t *base, vuint16m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vloxei16_v_u64m8(const uint64_t *base, vuint16m2_t bindex, size_t vl) {
@@ -1489,7 +1489,7 @@ vuint64m8_t test_vloxei16_v_u64m8(const uint64_t *base, vuint16m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vloxei32_v_u64m1(const uint64_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -1499,7 +1499,7 @@ vuint64m1_t test_vloxei32_v_u64m1(const uint64_t *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei32_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vloxei32_v_u64m2(const uint64_t *base, vuint32m1_t bindex, size_t vl) {
@@ -1509,7 +1509,7 @@ vuint64m2_t test_vloxei32_v_u64m2(const uint64_t *base, vuint32m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vloxei32_v_u64m4(const uint64_t *base, vuint32m2_t bindex, size_t vl) {
@@ -1519,7 +1519,7 @@ vuint64m4_t test_vloxei32_v_u64m4(const uint64_t *base, vuint32m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vloxei32_v_u64m8(const uint64_t *base, vuint32m4_t bindex, size_t vl) {
@@ -1529,7 +1529,7 @@ vuint64m8_t test_vloxei32_v_u64m8(const uint64_t *base, vuint32m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vloxei64_v_u64m1(const uint64_t *base, vuint64m1_t bindex, size_t vl) {
@@ -1539,7 +1539,7 @@ vuint64m1_t test_vloxei64_v_u64m1(const uint64_t *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vloxei64_v_u64m2(const uint64_t *base, vuint64m2_t bindex, size_t vl) {
@@ -1549,7 +1549,7 @@ vuint64m2_t test_vloxei64_v_u64m2(const uint64_t *base, vuint64m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vloxei64_v_u64m4(const uint64_t *base, vuint64m4_t bindex, size_t vl) {
@@ -1559,7 +1559,7 @@ vuint64m4_t test_vloxei64_v_u64m4(const uint64_t *base, vuint64m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vloxei64_v_u64m8(const uint64_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1569,7 +1569,7 @@ vuint64m8_t test_vloxei64_v_u64m8(const uint64_t *base, vuint64m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei8_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i8.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i8.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vloxei8_v_f32mf2(const float *base, vuint8mf8_t bindex, size_t vl) {
@@ -1579,7 +1579,7 @@ vfloat32mf2_t test_vloxei8_v_f32mf2(const float *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei8_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i8.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i8.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vloxei8_v_f32m1(const float *base, vuint8mf4_t bindex, size_t vl) {
@@ -1589,7 +1589,7 @@ vfloat32m1_t test_vloxei8_v_f32m1(const float *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i8.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i8.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vloxei8_v_f32m2(const float *base, vuint8mf2_t bindex, size_t vl) {
@@ -1599,7 +1599,7 @@ vfloat32m2_t test_vloxei8_v_f32m2(const float *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vloxei8_v_f32m4(const float *base, vuint8m1_t bindex, size_t vl) {
@@ -1609,7 +1609,7 @@ vfloat32m4_t test_vloxei8_v_f32m4(const float *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei8_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8.i64(<vscale x 16 x float>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vloxei8_v_f32m8(const float *base, vuint8m2_t bindex, size_t vl) {
@@ -1619,7 +1619,7 @@ vfloat32m8_t test_vloxei8_v_f32m8(const float *base, vuint8m2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vloxei16_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i16.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i16.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vloxei16_v_f32mf2(const float *base, vuint16mf4_t bindex, size_t vl) {
@@ -1629,7 +1629,7 @@ vfloat32mf2_t test_vloxei16_v_f32mf2(const float *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei16_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i16.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i16.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vloxei16_v_f32m1(const float *base, vuint16mf2_t bindex, size_t vl) {
@@ -1639,7 +1639,7 @@ vfloat32m1_t test_vloxei16_v_f32m1(const float *base, vuint16mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei16_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vloxei16_v_f32m2(const float *base, vuint16m1_t bindex, size_t vl) {
@@ -1649,7 +1649,7 @@ vfloat32m2_t test_vloxei16_v_f32m2(const float *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vloxei16_v_f32m4(const float *base, vuint16m2_t bindex, size_t vl) {
@@ -1659,7 +1659,7 @@ vfloat32m4_t test_vloxei16_v_f32m4(const float *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16.i64(<vscale x 16 x float>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vloxei16_v_f32m8(const float *base, vuint16m4_t bindex, size_t vl) {
@@ -1669,7 +1669,7 @@ vfloat32m8_t test_vloxei16_v_f32m8(const float *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i32.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i32.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vloxei32_v_f32mf2(const float *base, vuint32mf2_t bindex, size_t vl) {
@@ -1679,7 +1679,7 @@ vfloat32mf2_t test_vloxei32_v_f32mf2(const float *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei32_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i32.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i32.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vloxei32_v_f32m1(const float *base, vuint32m1_t bindex, size_t vl) {
@@ -1689,7 +1689,7 @@ vfloat32m1_t test_vloxei32_v_f32m1(const float *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i32.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i32.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vloxei32_v_f32m2(const float *base, vuint32m2_t bindex, size_t vl) {
@@ -1699,7 +1699,7 @@ vfloat32m2_t test_vloxei32_v_f32m2(const float *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i32.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i32.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vloxei32_v_f32m4(const float *base, vuint32m4_t bindex, size_t vl) {
@@ -1709,7 +1709,7 @@ vfloat32m4_t test_vloxei32_v_f32m4(const float *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei32_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i32.i64(<vscale x 16 x float>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i32.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vloxei32_v_f32m8(const float *base, vuint32m8_t bindex, size_t vl) {
@@ -1719,7 +1719,7 @@ vfloat32m8_t test_vloxei32_v_f32m8(const float *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i64.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i64.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vloxei64_v_f32mf2(const float *base, vuint64m1_t bindex, size_t vl) {
@@ -1729,7 +1729,7 @@ vfloat32mf2_t test_vloxei64_v_f32mf2(const float *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei64_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i64.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i64.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vloxei64_v_f32m1(const float *base, vuint64m2_t bindex, size_t vl) {
@@ -1739,7 +1739,7 @@ vfloat32m1_t test_vloxei64_v_f32m1(const float *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i64.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i64.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vloxei64_v_f32m2(const float *base, vuint64m4_t bindex, size_t vl) {
@@ -1749,7 +1749,7 @@ vfloat32m2_t test_vloxei64_v_f32m2(const float *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei64_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i64.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i64.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vloxei64_v_f32m4(const float *base, vuint64m8_t bindex, size_t vl) {
@@ -1759,7 +1759,7 @@ vfloat32m4_t test_vloxei64_v_f32m4(const float *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i8.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i8.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vloxei8_v_f64m1(const double *base, vuint8mf8_t bindex, size_t vl) {
@@ -1769,7 +1769,7 @@ vfloat64m1_t test_vloxei8_v_f64m1(const double *base, vuint8mf8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i8.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i8.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vloxei8_v_f64m2(const double *base, vuint8mf4_t bindex, size_t vl) {
@@ -1779,7 +1779,7 @@ vfloat64m2_t test_vloxei8_v_f64m2(const double *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i8.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i8.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vloxei8_v_f64m4(const double *base, vuint8mf2_t bindex, size_t vl) {
@@ -1789,7 +1789,7 @@ vfloat64m4_t test_vloxei8_v_f64m4(const double *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei8_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vloxei8_v_f64m8(const double *base, vuint8m1_t bindex, size_t vl) {
@@ -1799,7 +1799,7 @@ vfloat64m8_t test_vloxei8_v_f64m8(const double *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vloxei16_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i16.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i16.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vloxei16_v_f64m1(const double *base, vuint16mf4_t bindex, size_t vl) {
@@ -1809,7 +1809,7 @@ vfloat64m1_t test_vloxei16_v_f64m1(const double *base, vuint16mf4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i16.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i16.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vloxei16_v_f64m2(const double *base, vuint16mf2_t bindex, size_t vl) {
@@ -1819,7 +1819,7 @@ vfloat64m2_t test_vloxei16_v_f64m2(const double *base, vuint16mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vloxei16_v_f64m4(const double *base, vuint16m1_t bindex, size_t vl) {
@@ -1829,7 +1829,7 @@ vfloat64m4_t test_vloxei16_v_f64m4(const double *base, vuint16m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei16_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vloxei16_v_f64m8(const double *base, vuint16m2_t bindex, size_t vl) {
@@ -1839,7 +1839,7 @@ vfloat64m8_t test_vloxei16_v_f64m8(const double *base, vuint16m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei32_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i32.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i32.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vloxei32_v_f64m1(const double *base, vuint32mf2_t bindex, size_t vl) {
@@ -1849,7 +1849,7 @@ vfloat64m1_t test_vloxei32_v_f64m1(const double *base, vuint32mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei32_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vloxei32_v_f64m2(const double *base, vuint32m1_t bindex, size_t vl) {
@@ -1859,7 +1859,7 @@ vfloat64m2_t test_vloxei32_v_f64m2(const double *base, vuint32m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei32_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vloxei32_v_f64m4(const double *base, vuint32m2_t bindex, size_t vl) {
@@ -1869,7 +1869,7 @@ vfloat64m4_t test_vloxei32_v_f64m4(const double *base, vuint32m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei32_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vloxei32_v_f64m8(const double *base, vuint32m4_t bindex, size_t vl) {
@@ -1879,7 +1879,7 @@ vfloat64m8_t test_vloxei32_v_f64m8(const double *base, vuint32m4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i64.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i64.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vloxei64_v_f64m1(const double *base, vuint64m1_t bindex, size_t vl) {
@@ -1889,7 +1889,7 @@ vfloat64m1_t test_vloxei64_v_f64m1(const double *base, vuint64m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i64.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i64.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vloxei64_v_f64m2(const double *base, vuint64m2_t bindex, size_t vl) {
@@ -1899,7 +1899,7 @@ vfloat64m2_t test_vloxei64_v_f64m2(const double *base, vuint64m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i64.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i64.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vloxei64_v_f64m4(const double *base, vuint64m4_t bindex, size_t vl) {
@@ -1909,7 +1909,7 @@ vfloat64m4_t test_vloxei64_v_f64m4(const double *base, vuint64m4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vloxei64_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i64.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i64.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vloxei64_v_f64m8(const double *base, vuint64m8_t bindex, size_t vl) {
@@ -3829,7 +3829,7 @@ vfloat64m8_t test_vloxei64_v_f64m8_m(vbool8_t mask, vfloat64m8_t maskedoff, cons
 // CHECK-RV64-LABEL: @test_vloxei8_v_f16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 1 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i8.i64(<vscale x 1 x half>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i8.i64(<vscale x 1 x half> undef, <vscale x 1 x half>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP1]]
 //
 vfloat16mf4_t test_vloxei8_v_f16mf4 (const _Float16 *base, vuint8mf8_t bindex, size_t vl) {
@@ -3839,7 +3839,7 @@ vfloat16mf4_t test_vloxei8_v_f16mf4 (const _Float16 *base, vuint8mf8_t bindex, s
 // CHECK-RV64-LABEL: @test_vloxei8_v_f16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 2 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i8.i64(<vscale x 2 x half>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i8.i64(<vscale x 2 x half> undef, <vscale x 2 x half>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP1]]
 //
 vfloat16mf2_t test_vloxei8_v_f16mf2 (const _Float16 *base, vuint8mf4_t bindex, size_t vl) {
@@ -3849,7 +3849,7 @@ vfloat16mf2_t test_vloxei8_v_f16mf2 (const _Float16 *base, vuint8mf4_t bindex, s
 // CHECK-RV64-LABEL: @test_vloxei8_v_f16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 4 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i8.i64(<vscale x 4 x half>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i8.i64(<vscale x 4 x half> undef, <vscale x 4 x half>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP1]]
 //
 vfloat16m1_t test_vloxei8_v_f16m1 (const _Float16 *base, vuint8mf2_t bindex, size_t vl) {
@@ -3859,7 +3859,7 @@ vfloat16m1_t test_vloxei8_v_f16m1 (const _Float16 *base, vuint8mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vloxei8_v_f16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i8.i64(<vscale x 8 x half>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i8.i64(<vscale x 8 x half> undef, <vscale x 8 x half>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
 vfloat16m2_t test_vloxei8_v_f16m2 (const _Float16 *base, vuint8m1_t bindex, size_t vl) {
@@ -3869,7 +3869,7 @@ vfloat16m2_t test_vloxei8_v_f16m2 (const _Float16 *base, vuint8m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei8_v_f16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 16 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i8.i64(<vscale x 16 x half>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i8.i64(<vscale x 16 x half> undef, <vscale x 16 x half>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP1]]
 //
 vfloat16m4_t test_vloxei8_v_f16m4 (const _Float16 *base, vuint8m2_t bindex, size_t vl) {
@@ -3879,7 +3879,7 @@ vfloat16m4_t test_vloxei8_v_f16m4 (const _Float16 *base, vuint8m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei8_v_f16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 32 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i8.i64(<vscale x 32 x half>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i8.i64(<vscale x 32 x half> undef, <vscale x 32 x half>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP1]]
 //
 vfloat16m8_t test_vloxei8_v_f16m8 (const _Float16 *base, vuint8m4_t bindex, size_t vl) {
@@ -3889,7 +3889,7 @@ vfloat16m8_t test_vloxei8_v_f16m8 (const _Float16 *base, vuint8m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vloxei16_v_f16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 1 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i16.i64(<vscale x 1 x half>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i16.i64(<vscale x 1 x half> undef, <vscale x 1 x half>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP1]]
 //
 vfloat16mf4_t test_vloxei16_v_f16mf4 (const _Float16 *base, vuint16mf4_t bindex, size_t vl) {
@@ -3899,7 +3899,7 @@ vfloat16mf4_t test_vloxei16_v_f16mf4 (const _Float16 *base, vuint16mf4_t bindex,
 // CHECK-RV64-LABEL: @test_vloxei16_v_f16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 2 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i16.i64(<vscale x 2 x half>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i16.i64(<vscale x 2 x half> undef, <vscale x 2 x half>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP1]]
 //
 vfloat16mf2_t test_vloxei16_v_f16mf2 (const _Float16 *base, vuint16mf2_t bindex, size_t vl) {
@@ -3909,7 +3909,7 @@ vfloat16mf2_t test_vloxei16_v_f16mf2 (const _Float16 *base, vuint16mf2_t bindex,
 // CHECK-RV64-LABEL: @test_vloxei16_v_f16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 4 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i16.i64(<vscale x 4 x half>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i16.i64(<vscale x 4 x half> undef, <vscale x 4 x half>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP1]]
 //
 vfloat16m1_t test_vloxei16_v_f16m1 (const _Float16 *base, vuint16m1_t bindex, size_t vl) {
@@ -3919,7 +3919,7 @@ vfloat16m1_t test_vloxei16_v_f16m1 (const _Float16 *base, vuint16m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei16_v_f16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i16.i64(<vscale x 8 x half>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i16.i64(<vscale x 8 x half> undef, <vscale x 8 x half>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
 vfloat16m2_t test_vloxei16_v_f16m2 (const _Float16 *base, vuint16m2_t bindex, size_t vl) {
@@ -3929,7 +3929,7 @@ vfloat16m2_t test_vloxei16_v_f16m2 (const _Float16 *base, vuint16m2_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei16_v_f16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 16 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i16.i64(<vscale x 16 x half>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i16.i64(<vscale x 16 x half> undef, <vscale x 16 x half>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP1]]
 //
 vfloat16m4_t test_vloxei16_v_f16m4 (const _Float16 *base, vuint16m4_t bindex, size_t vl) {
@@ -3939,7 +3939,7 @@ vfloat16m4_t test_vloxei16_v_f16m4 (const _Float16 *base, vuint16m4_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei16_v_f16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 32 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i16.i64(<vscale x 32 x half>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i16.i64(<vscale x 32 x half> undef, <vscale x 32 x half>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP1]]
 //
 vfloat16m8_t test_vloxei16_v_f16m8 (const _Float16 *base, vuint16m8_t bindex, size_t vl) {
@@ -3949,7 +3949,7 @@ vfloat16m8_t test_vloxei16_v_f16m8 (const _Float16 *base, vuint16m8_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei32_v_f16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 1 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i32.i64(<vscale x 1 x half>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i32.i64(<vscale x 1 x half> undef, <vscale x 1 x half>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP1]]
 //
 vfloat16mf4_t test_vloxei32_v_f16mf4 (const _Float16 *base, vuint32mf2_t bindex, size_t vl) {
@@ -3959,7 +3959,7 @@ vfloat16mf4_t test_vloxei32_v_f16mf4 (const _Float16 *base, vuint32mf2_t bindex,
 // CHECK-RV64-LABEL: @test_vloxei32_v_f16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 2 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i32.i64(<vscale x 2 x half>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i32.i64(<vscale x 2 x half> undef, <vscale x 2 x half>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP1]]
 //
 vfloat16mf2_t test_vloxei32_v_f16mf2 (const _Float16 *base, vuint32m1_t bindex, size_t vl) {
@@ -3969,7 +3969,7 @@ vfloat16mf2_t test_vloxei32_v_f16mf2 (const _Float16 *base, vuint32m1_t bindex,
 // CHECK-RV64-LABEL: @test_vloxei32_v_f16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 4 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i32.i64(<vscale x 4 x half>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i32.i64(<vscale x 4 x half> undef, <vscale x 4 x half>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP1]]
 //
 vfloat16m1_t test_vloxei32_v_f16m1 (const _Float16 *base, vuint32m2_t bindex, size_t vl) {
@@ -3979,7 +3979,7 @@ vfloat16m1_t test_vloxei32_v_f16m1 (const _Float16 *base, vuint32m2_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei32_v_f16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i32.i64(<vscale x 8 x half>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i32.i64(<vscale x 8 x half> undef, <vscale x 8 x half>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
 vfloat16m2_t test_vloxei32_v_f16m2 (const _Float16 *base, vuint32m4_t bindex, size_t vl) {
@@ -3989,7 +3989,7 @@ vfloat16m2_t test_vloxei32_v_f16m2 (const _Float16 *base, vuint32m4_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei32_v_f16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 16 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i32.i64(<vscale x 16 x half>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i32.i64(<vscale x 16 x half> undef, <vscale x 16 x half>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP1]]
 //
 vfloat16m4_t test_vloxei32_v_f16m4 (const _Float16 *base, vuint32m8_t bindex, size_t vl) {
@@ -3999,7 +3999,7 @@ vfloat16m4_t test_vloxei32_v_f16m4 (const _Float16 *base, vuint32m8_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei64_v_f16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 1 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i64.i64(<vscale x 1 x half>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i64.i64(<vscale x 1 x half> undef, <vscale x 1 x half>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP1]]
 //
 vfloat16mf4_t test_vloxei64_v_f16mf4 (const _Float16 *base, vuint64m1_t bindex, size_t vl) {
@@ -4009,7 +4009,7 @@ vfloat16mf4_t test_vloxei64_v_f16mf4 (const _Float16 *base, vuint64m1_t bindex,
 // CHECK-RV64-LABEL: @test_vloxei64_v_f16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 2 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i64.i64(<vscale x 2 x half>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i64.i64(<vscale x 2 x half> undef, <vscale x 2 x half>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP1]]
 //
 vfloat16mf2_t test_vloxei64_v_f16mf2 (const _Float16 *base, vuint64m2_t bindex, size_t vl) {
@@ -4019,7 +4019,7 @@ vfloat16mf2_t test_vloxei64_v_f16mf2 (const _Float16 *base, vuint64m2_t bindex,
 // CHECK-RV64-LABEL: @test_vloxei64_v_f16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 4 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i64.i64(<vscale x 4 x half>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i64.i64(<vscale x 4 x half> undef, <vscale x 4 x half>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP1]]
 //
 vfloat16m1_t test_vloxei64_v_f16m1 (const _Float16 *base, vuint64m4_t bindex, size_t vl) {
@@ -4029,7 +4029,7 @@ vfloat16m1_t test_vloxei64_v_f16m1 (const _Float16 *base, vuint64m4_t bindex, si
 // CHECK-RV64-LABEL: @test_vloxei64_v_f16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i64.i64(<vscale x 8 x half>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i64.i64(<vscale x 8 x half> undef, <vscale x 8 x half>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
 vfloat16m2_t test_vloxei64_v_f16m2 (const _Float16 *base, vuint64m8_t bindex, size_t vl) {
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlse.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlse.c
index e35a9e8168bd0..0d0d5583da026 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vlse.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vlse.c
@@ -9,7 +9,7 @@
 // CHECK-RV64-LABEL: @test_vlse8_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vlse.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vlse.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vlse8_v_i8mf8(const int8_t *base, ptrdiff_t bstride,
@@ -20,7 +20,7 @@ vint8mf8_t test_vlse8_v_i8mf8(const int8_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse8_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vlse.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vlse.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vlse8_v_i8mf4(const int8_t *base, ptrdiff_t bstride,
@@ -31,7 +31,7 @@ vint8mf4_t test_vlse8_v_i8mf4(const int8_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse8_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vlse.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vlse.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vlse8_v_i8mf2(const int8_t *base, ptrdiff_t bstride,
@@ -42,7 +42,7 @@ vint8mf2_t test_vlse8_v_i8mf2(const int8_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse8_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vlse.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vlse.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vlse8_v_i8m1(const int8_t *base, ptrdiff_t bstride, size_t vl) {
@@ -52,7 +52,7 @@ vint8m1_t test_vlse8_v_i8m1(const int8_t *base, ptrdiff_t bstride, size_t vl) {
 // CHECK-RV64-LABEL: @test_vlse8_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vlse.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vlse.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vlse8_v_i8m2(const int8_t *base, ptrdiff_t bstride, size_t vl) {
@@ -62,7 +62,7 @@ vint8m2_t test_vlse8_v_i8m2(const int8_t *base, ptrdiff_t bstride, size_t vl) {
 // CHECK-RV64-LABEL: @test_vlse8_v_i8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vlse.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vlse.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vint8m4_t test_vlse8_v_i8m4(const int8_t *base, ptrdiff_t bstride, size_t vl) {
@@ -72,7 +72,7 @@ vint8m4_t test_vlse8_v_i8m4(const int8_t *base, ptrdiff_t bstride, size_t vl) {
 // CHECK-RV64-LABEL: @test_vlse8_v_i8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vlse.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vlse.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP1]]
 //
 vint8m8_t test_vlse8_v_i8m8(const int8_t *base, ptrdiff_t bstride, size_t vl) {
@@ -82,7 +82,7 @@ vint8m8_t test_vlse8_v_i8m8(const int8_t *base, ptrdiff_t bstride, size_t vl) {
 // CHECK-RV64-LABEL: @test_vlse16_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vlse.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vlse.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vlse16_v_i16mf4(const int16_t *base, ptrdiff_t bstride,
@@ -93,7 +93,7 @@ vint16mf4_t test_vlse16_v_i16mf4(const int16_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse16_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vlse.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vlse.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vlse16_v_i16mf2(const int16_t *base, ptrdiff_t bstride,
@@ -104,7 +104,7 @@ vint16mf2_t test_vlse16_v_i16mf2(const int16_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse16_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vlse.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vlse.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vlse16_v_i16m1(const int16_t *base, ptrdiff_t bstride,
@@ -115,7 +115,7 @@ vint16m1_t test_vlse16_v_i16m1(const int16_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse16_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vlse.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vlse.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vlse16_v_i16m2(const int16_t *base, ptrdiff_t bstride,
@@ -126,7 +126,7 @@ vint16m2_t test_vlse16_v_i16m2(const int16_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse16_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vlse.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vlse.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vlse16_v_i16m4(const int16_t *base, ptrdiff_t bstride,
@@ -137,7 +137,7 @@ vint16m4_t test_vlse16_v_i16m4(const int16_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse16_v_i16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vlse.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vlse.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vint16m8_t test_vlse16_v_i16m8(const int16_t *base, ptrdiff_t bstride,
@@ -148,7 +148,7 @@ vint16m8_t test_vlse16_v_i16m8(const int16_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vlse.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vlse.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vlse32_v_i32mf2(const int32_t *base, ptrdiff_t bstride,
@@ -159,7 +159,7 @@ vint32mf2_t test_vlse32_v_i32mf2(const int32_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vlse32_v_i32m1(const int32_t *base, ptrdiff_t bstride,
@@ -170,7 +170,7 @@ vint32m1_t test_vlse32_v_i32m1(const int32_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vlse.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vlse.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vlse32_v_i32m2(const int32_t *base, ptrdiff_t bstride,
@@ -181,7 +181,7 @@ vint32m2_t test_vlse32_v_i32m2(const int32_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vlse.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vlse.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vlse32_v_i32m4(const int32_t *base, ptrdiff_t bstride,
@@ -192,7 +192,7 @@ vint32m4_t test_vlse32_v_i32m4(const int32_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vlse.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vlse.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vlse32_v_i32m8(const int32_t *base, ptrdiff_t bstride,
@@ -203,7 +203,7 @@ vint32m8_t test_vlse32_v_i32m8(const int32_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse64_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vlse.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vlse.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vlse64_v_i64m1(const int64_t *base, ptrdiff_t bstride,
@@ -214,7 +214,7 @@ vint64m1_t test_vlse64_v_i64m1(const int64_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse64_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vlse.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vlse.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vlse64_v_i64m2(const int64_t *base, ptrdiff_t bstride,
@@ -225,7 +225,7 @@ vint64m2_t test_vlse64_v_i64m2(const int64_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse64_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vlse.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vlse.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vlse64_v_i64m4(const int64_t *base, ptrdiff_t bstride,
@@ -236,7 +236,7 @@ vint64m4_t test_vlse64_v_i64m4(const int64_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse64_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vlse.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vlse.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vlse64_v_i64m8(const int64_t *base, ptrdiff_t bstride,
@@ -247,7 +247,7 @@ vint64m8_t test_vlse64_v_i64m8(const int64_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse8_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vlse.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vlse.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vlse8_v_u8mf8(const uint8_t *base, ptrdiff_t bstride,
@@ -258,7 +258,7 @@ vuint8mf8_t test_vlse8_v_u8mf8(const uint8_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse8_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vlse.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vlse.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vlse8_v_u8mf4(const uint8_t *base, ptrdiff_t bstride,
@@ -269,7 +269,7 @@ vuint8mf4_t test_vlse8_v_u8mf4(const uint8_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse8_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vlse.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vlse.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vlse8_v_u8mf2(const uint8_t *base, ptrdiff_t bstride,
@@ -280,7 +280,7 @@ vuint8mf2_t test_vlse8_v_u8mf2(const uint8_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse8_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vlse.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vlse.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vlse8_v_u8m1(const uint8_t *base, ptrdiff_t bstride,
@@ -291,7 +291,7 @@ vuint8m1_t test_vlse8_v_u8m1(const uint8_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse8_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vlse.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vlse.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vlse8_v_u8m2(const uint8_t *base, ptrdiff_t bstride,
@@ -302,7 +302,7 @@ vuint8m2_t test_vlse8_v_u8m2(const uint8_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse8_v_u8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vlse.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vlse.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vuint8m4_t test_vlse8_v_u8m4(const uint8_t *base, ptrdiff_t bstride,
@@ -313,7 +313,7 @@ vuint8m4_t test_vlse8_v_u8m4(const uint8_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse8_v_u8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vlse.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vlse.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP1]]
 //
 vuint8m8_t test_vlse8_v_u8m8(const uint8_t *base, ptrdiff_t bstride,
@@ -324,7 +324,7 @@ vuint8m8_t test_vlse8_v_u8m8(const uint8_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse16_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vlse.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vlse.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vlse16_v_u16mf4(const uint16_t *base, ptrdiff_t bstride,
@@ -335,7 +335,7 @@ vuint16mf4_t test_vlse16_v_u16mf4(const uint16_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse16_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vlse.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vlse.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vlse16_v_u16mf2(const uint16_t *base, ptrdiff_t bstride,
@@ -346,7 +346,7 @@ vuint16mf2_t test_vlse16_v_u16mf2(const uint16_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse16_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vlse.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vlse.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vlse16_v_u16m1(const uint16_t *base, ptrdiff_t bstride,
@@ -357,7 +357,7 @@ vuint16m1_t test_vlse16_v_u16m1(const uint16_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse16_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vlse.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vlse.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vlse16_v_u16m2(const uint16_t *base, ptrdiff_t bstride,
@@ -368,7 +368,7 @@ vuint16m2_t test_vlse16_v_u16m2(const uint16_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse16_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vlse.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vlse.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vlse16_v_u16m4(const uint16_t *base, ptrdiff_t bstride,
@@ -379,7 +379,7 @@ vuint16m4_t test_vlse16_v_u16m4(const uint16_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse16_v_u16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vlse.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vlse.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vuint16m8_t test_vlse16_v_u16m8(const uint16_t *base, ptrdiff_t bstride,
@@ -390,7 +390,7 @@ vuint16m8_t test_vlse16_v_u16m8(const uint16_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vlse.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vlse.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vlse32_v_u32mf2(const uint32_t *base, ptrdiff_t bstride,
@@ -401,7 +401,7 @@ vuint32mf2_t test_vlse32_v_u32mf2(const uint32_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vlse32_v_u32m1(const uint32_t *base, ptrdiff_t bstride,
@@ -412,7 +412,7 @@ vuint32m1_t test_vlse32_v_u32m1(const uint32_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vlse.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vlse.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vlse32_v_u32m2(const uint32_t *base, ptrdiff_t bstride,
@@ -423,7 +423,7 @@ vuint32m2_t test_vlse32_v_u32m2(const uint32_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vlse.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vlse.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vlse32_v_u32m4(const uint32_t *base, ptrdiff_t bstride,
@@ -434,7 +434,7 @@ vuint32m4_t test_vlse32_v_u32m4(const uint32_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vlse.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vlse.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vlse32_v_u32m8(const uint32_t *base, ptrdiff_t bstride,
@@ -445,7 +445,7 @@ vuint32m8_t test_vlse32_v_u32m8(const uint32_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse64_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vlse.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vlse.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vlse64_v_u64m1(const uint64_t *base, ptrdiff_t bstride,
@@ -456,7 +456,7 @@ vuint64m1_t test_vlse64_v_u64m1(const uint64_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse64_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vlse.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vlse.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vlse64_v_u64m2(const uint64_t *base, ptrdiff_t bstride,
@@ -467,7 +467,7 @@ vuint64m2_t test_vlse64_v_u64m2(const uint64_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse64_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vlse.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vlse.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vlse64_v_u64m4(const uint64_t *base, ptrdiff_t bstride,
@@ -478,7 +478,7 @@ vuint64m4_t test_vlse64_v_u64m4(const uint64_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse64_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vlse.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vlse.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vlse64_v_u64m8(const uint64_t *base, ptrdiff_t bstride,
@@ -489,7 +489,7 @@ vuint64m8_t test_vlse64_v_u64m8(const uint64_t *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vlse.nxv1f32.i64(<vscale x 1 x float>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vlse.nxv1f32.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vlse32_v_f32mf2(const float *base, ptrdiff_t bstride,
@@ -500,7 +500,7 @@ vfloat32mf2_t test_vlse32_v_f32mf2(const float *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vlse.nxv2f32.i64(<vscale x 2 x float>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vlse.nxv2f32.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vlse32_v_f32m1(const float *base, ptrdiff_t bstride,
@@ -511,7 +511,7 @@ vfloat32m1_t test_vlse32_v_f32m1(const float *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vlse.nxv4f32.i64(<vscale x 4 x float>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vlse.nxv4f32.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vlse32_v_f32m2(const float *base, ptrdiff_t bstride,
@@ -522,7 +522,7 @@ vfloat32m2_t test_vlse32_v_f32m2(const float *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vlse.nxv8f32.i64(<vscale x 8 x float>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vlse.nxv8f32.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vlse32_v_f32m4(const float *base, ptrdiff_t bstride,
@@ -533,7 +533,7 @@ vfloat32m4_t test_vlse32_v_f32m4(const float *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse32_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vlse.nxv16f32.i64(<vscale x 16 x float>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vlse.nxv16f32.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vlse32_v_f32m8(const float *base, ptrdiff_t bstride,
@@ -544,7 +544,7 @@ vfloat32m8_t test_vlse32_v_f32m8(const float *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse64_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vlse.nxv1f64.i64(<vscale x 1 x double>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vlse.nxv1f64.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vlse64_v_f64m1(const double *base, ptrdiff_t bstride,
@@ -555,7 +555,7 @@ vfloat64m1_t test_vlse64_v_f64m1(const double *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse64_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vlse.nxv2f64.i64(<vscale x 2 x double>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vlse.nxv2f64.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vlse64_v_f64m2(const double *base, ptrdiff_t bstride,
@@ -566,7 +566,7 @@ vfloat64m2_t test_vlse64_v_f64m2(const double *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse64_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vlse.nxv4f64.i64(<vscale x 4 x double>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vlse.nxv4f64.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vlse64_v_f64m4(const double *base, ptrdiff_t bstride,
@@ -577,7 +577,7 @@ vfloat64m4_t test_vlse64_v_f64m4(const double *base, ptrdiff_t bstride,
 // CHECK-RV64-LABEL: @test_vlse64_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vlse.nxv8f64.i64(<vscale x 8 x double>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vlse.nxv8f64.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vlse64_v_f64m8(const double *base, ptrdiff_t bstride,
@@ -1224,7 +1224,7 @@ vfloat64m8_t test_vlse64_v_f64m8_m(vbool8_t mask, vfloat64m8_t maskedoff,
 // CHECK-RV64-LABEL: @test_vlse16_v_f16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 1 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vlse.nxv1f16.i64(<vscale x 1 x half>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vlse.nxv1f16.i64(<vscale x 1 x half> undef, <vscale x 1 x half>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP1]]
 //
 vfloat16mf4_t test_vlse16_v_f16mf4 (const _Float16 *base, ptrdiff_t bstride, size_t vl) {
@@ -1234,7 +1234,7 @@ vfloat16mf4_t test_vlse16_v_f16mf4 (const _Float16 *base, ptrdiff_t bstride, siz
 // CHECK-RV64-LABEL: @test_vlse16_v_f16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 2 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vlse.nxv2f16.i64(<vscale x 2 x half>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vlse.nxv2f16.i64(<vscale x 2 x half> undef, <vscale x 2 x half>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP1]]
 //
 vfloat16mf2_t test_vlse16_v_f16mf2 (const _Float16 *base, ptrdiff_t bstride, size_t vl) {
@@ -1244,7 +1244,7 @@ vfloat16mf2_t test_vlse16_v_f16mf2 (const _Float16 *base, ptrdiff_t bstride, siz
 // CHECK-RV64-LABEL: @test_vlse16_v_f16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 4 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vlse.nxv4f16.i64(<vscale x 4 x half>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vlse.nxv4f16.i64(<vscale x 4 x half> undef, <vscale x 4 x half>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP1]]
 //
 vfloat16m1_t test_vlse16_v_f16m1 (const _Float16 *base, ptrdiff_t bstride, size_t vl) {
@@ -1254,7 +1254,7 @@ vfloat16m1_t test_vlse16_v_f16m1 (const _Float16 *base, ptrdiff_t bstride, size_
 // CHECK-RV64-LABEL: @test_vlse16_v_f16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vlse.nxv8f16.i64(<vscale x 8 x half>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vlse.nxv8f16.i64(<vscale x 8 x half> undef, <vscale x 8 x half>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
 vfloat16m2_t test_vlse16_v_f16m2 (const _Float16 *base, ptrdiff_t bstride, size_t vl) {
@@ -1264,7 +1264,7 @@ vfloat16m2_t test_vlse16_v_f16m2 (const _Float16 *base, ptrdiff_t bstride, size_
 // CHECK-RV64-LABEL: @test_vlse16_v_f16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 16 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vlse.nxv16f16.i64(<vscale x 16 x half>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vlse.nxv16f16.i64(<vscale x 16 x half> undef, <vscale x 16 x half>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP1]]
 //
 vfloat16m4_t test_vlse16_v_f16m4 (const _Float16 *base, ptrdiff_t bstride, size_t vl) {
@@ -1274,7 +1274,7 @@ vfloat16m4_t test_vlse16_v_f16m4 (const _Float16 *base, ptrdiff_t bstride, size_
 // CHECK-RV64-LABEL: @test_vlse16_v_f16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 32 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.riscv.vlse.nxv32f16.i64(<vscale x 32 x half>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.riscv.vlse.nxv32f16.i64(<vscale x 32 x half> undef, <vscale x 32 x half>* [[TMP0]], i64 [[BSTRIDE:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP1]]
 //
 vfloat16m8_t test_vlse16_v_f16m8 (const _Float16 *base, ptrdiff_t bstride, size_t vl) {
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxei.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxei.c
index d24890ff9c7a4..6124aac1e572f 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxei.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vluxei.c
@@ -9,7 +9,7 @@
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vluxei8_v_i8mf8(const int8_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -19,7 +19,7 @@ vint8mf8_t test_vluxei8_v_i8mf8(const int8_t *base, vuint8mf8_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vluxei8_v_i8mf4(const int8_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -29,7 +29,7 @@ vint8mf4_t test_vluxei8_v_i8mf4(const int8_t *base, vuint8mf4_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vluxei8_v_i8mf2(const int8_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -39,7 +39,7 @@ vint8mf2_t test_vluxei8_v_i8mf2(const int8_t *base, vuint8mf2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vluxei8_v_i8m1(const int8_t *base, vuint8m1_t bindex, size_t vl) {
@@ -49,7 +49,7 @@ vint8m1_t test_vluxei8_v_i8m1(const int8_t *base, vuint8m1_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vluxei8_v_i8m2(const int8_t *base, vuint8m2_t bindex, size_t vl) {
@@ -59,7 +59,7 @@ vint8m2_t test_vluxei8_v_i8m2(const int8_t *base, vuint8m2_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vint8m4_t test_vluxei8_v_i8m4(const int8_t *base, vuint8m4_t bindex, size_t vl) {
@@ -69,7 +69,7 @@ vint8m4_t test_vluxei8_v_i8m4(const int8_t *base, vuint8m4_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vluxei8_v_i8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP1]]
 //
 vint8m8_t test_vluxei8_v_i8m8(const int8_t *base, vuint8m8_t bindex, size_t vl) {
@@ -79,7 +79,7 @@ vint8m8_t test_vluxei8_v_i8m8(const int8_t *base, vuint8m8_t bindex, size_t vl)
 // CHECK-RV64-LABEL: @test_vluxei16_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vluxei16_v_i8mf8(const int8_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -89,7 +89,7 @@ vint8mf8_t test_vluxei16_v_i8mf8(const int8_t *base, vuint16mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vluxei16_v_i8mf4(const int8_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -99,7 +99,7 @@ vint8mf4_t test_vluxei16_v_i8mf4(const int8_t *base, vuint16mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vluxei16_v_i8mf2(const int8_t *base, vuint16m1_t bindex, size_t vl) {
@@ -109,7 +109,7 @@ vint8mf2_t test_vluxei16_v_i8mf2(const int8_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vluxei16_v_i8m1(const int8_t *base, vuint16m2_t bindex, size_t vl) {
@@ -119,7 +119,7 @@ vint8m1_t test_vluxei16_v_i8m1(const int8_t *base, vuint16m2_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei16_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vluxei16_v_i8m2(const int8_t *base, vuint16m4_t bindex, size_t vl) {
@@ -129,7 +129,7 @@ vint8m2_t test_vluxei16_v_i8m2(const int8_t *base, vuint16m4_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei16_v_i8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vint8m4_t test_vluxei16_v_i8m4(const int8_t *base, vuint16m8_t bindex, size_t vl) {
@@ -139,7 +139,7 @@ vint8m4_t test_vluxei16_v_i8m4(const int8_t *base, vuint16m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei32_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vluxei32_v_i8mf8(const int8_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -149,7 +149,7 @@ vint8mf8_t test_vluxei32_v_i8mf8(const int8_t *base, vuint32mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vluxei32_v_i8mf4(const int8_t *base, vuint32m1_t bindex, size_t vl) {
@@ -159,7 +159,7 @@ vint8mf4_t test_vluxei32_v_i8mf4(const int8_t *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vluxei32_v_i8mf2(const int8_t *base, vuint32m2_t bindex, size_t vl) {
@@ -169,7 +169,7 @@ vint8mf2_t test_vluxei32_v_i8mf2(const int8_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vluxei32_v_i8m1(const int8_t *base, vuint32m4_t bindex, size_t vl) {
@@ -179,7 +179,7 @@ vint8m1_t test_vluxei32_v_i8m1(const int8_t *base, vuint32m4_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei32_v_i8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vint8m2_t test_vluxei32_v_i8m2(const int8_t *base, vuint32m8_t bindex, size_t vl) {
@@ -189,7 +189,7 @@ vint8m2_t test_vluxei32_v_i8m2(const int8_t *base, vuint32m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei64_v_i8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vint8mf8_t test_vluxei64_v_i8mf8(const int8_t *base, vuint64m1_t bindex, size_t vl) {
@@ -199,7 +199,7 @@ vint8mf8_t test_vluxei64_v_i8mf8(const int8_t *base, vuint64m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vint8mf4_t test_vluxei64_v_i8mf4(const int8_t *base, vuint64m2_t bindex, size_t vl) {
@@ -209,7 +209,7 @@ vint8mf4_t test_vluxei64_v_i8mf4(const int8_t *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vint8mf2_t test_vluxei64_v_i8mf2(const int8_t *base, vuint64m4_t bindex, size_t vl) {
@@ -219,7 +219,7 @@ vint8mf2_t test_vluxei64_v_i8mf2(const int8_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vint8m1_t test_vluxei64_v_i8m1(const int8_t *base, vuint64m8_t bindex, size_t vl) {
@@ -229,7 +229,7 @@ vint8m1_t test_vluxei64_v_i8m1(const int8_t *base, vuint64m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei8_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vluxei8_v_i16mf4(const int16_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -239,7 +239,7 @@ vint16mf4_t test_vluxei8_v_i16mf4(const int16_t *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vluxei8_v_i16mf2(const int16_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -249,7 +249,7 @@ vint16mf2_t test_vluxei8_v_i16mf2(const int16_t *base, vuint8mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vluxei8_v_i16m1(const int16_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -259,7 +259,7 @@ vint16m1_t test_vluxei8_v_i16m1(const int16_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vluxei8_v_i16m2(const int16_t *base, vuint8m1_t bindex, size_t vl) {
@@ -269,7 +269,7 @@ vint16m2_t test_vluxei8_v_i16m2(const int16_t *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vluxei8_v_i16m4(const int16_t *base, vuint8m2_t bindex, size_t vl) {
@@ -279,7 +279,7 @@ vint16m4_t test_vluxei8_v_i16m4(const int16_t *base, vuint8m2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_i16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vint16m8_t test_vluxei8_v_i16m8(const int16_t *base, vuint8m4_t bindex, size_t vl) {
@@ -289,7 +289,7 @@ vint16m8_t test_vluxei8_v_i16m8(const int16_t *base, vuint8m4_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei16_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vluxei16_v_i16mf4(const int16_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -299,7 +299,7 @@ vint16mf4_t test_vluxei16_v_i16mf4(const int16_t *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vluxei16_v_i16mf2(const int16_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -309,7 +309,7 @@ vint16mf2_t test_vluxei16_v_i16mf2(const int16_t *base, vuint16mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vluxei16_v_i16m1(const int16_t *base, vuint16m1_t bindex, size_t vl) {
@@ -319,7 +319,7 @@ vint16m1_t test_vluxei16_v_i16m1(const int16_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vluxei16_v_i16m2(const int16_t *base, vuint16m2_t bindex, size_t vl) {
@@ -329,7 +329,7 @@ vint16m2_t test_vluxei16_v_i16m2(const int16_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vluxei16_v_i16m4(const int16_t *base, vuint16m4_t bindex, size_t vl) {
@@ -339,7 +339,7 @@ vint16m4_t test_vluxei16_v_i16m4(const int16_t *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vint16m8_t test_vluxei16_v_i16m8(const int16_t *base, vuint16m8_t bindex, size_t vl) {
@@ -349,7 +349,7 @@ vint16m8_t test_vluxei16_v_i16m8(const int16_t *base, vuint16m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vluxei32_v_i16mf4(const int16_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -359,7 +359,7 @@ vint16mf4_t test_vluxei32_v_i16mf4(const int16_t *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei32_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vluxei32_v_i16mf2(const int16_t *base, vuint32m1_t bindex, size_t vl) {
@@ -369,7 +369,7 @@ vint16mf2_t test_vluxei32_v_i16mf2(const int16_t *base, vuint32m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vluxei32_v_i16m1(const int16_t *base, vuint32m2_t bindex, size_t vl) {
@@ -379,7 +379,7 @@ vint16m1_t test_vluxei32_v_i16m1(const int16_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vluxei32_v_i16m2(const int16_t *base, vuint32m4_t bindex, size_t vl) {
@@ -389,7 +389,7 @@ vint16m2_t test_vluxei32_v_i16m2(const int16_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vint16m4_t test_vluxei32_v_i16m4(const int16_t *base, vuint32m8_t bindex, size_t vl) {
@@ -399,7 +399,7 @@ vint16m4_t test_vluxei32_v_i16m4(const int16_t *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vint16mf4_t test_vluxei64_v_i16mf4(const int16_t *base, vuint64m1_t bindex, size_t vl) {
@@ -409,7 +409,7 @@ vint16mf4_t test_vluxei64_v_i16mf4(const int16_t *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_i16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vint16mf2_t test_vluxei64_v_i16mf2(const int16_t *base, vuint64m2_t bindex, size_t vl) {
@@ -419,7 +419,7 @@ vint16mf2_t test_vluxei64_v_i16mf2(const int16_t *base, vuint64m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_i16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vint16m1_t test_vluxei64_v_i16m1(const int16_t *base, vuint64m4_t bindex, size_t vl) {
@@ -429,7 +429,7 @@ vint16m1_t test_vluxei64_v_i16m1(const int16_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vint16m2_t test_vluxei64_v_i16m2(const int16_t *base, vuint64m8_t bindex, size_t vl) {
@@ -439,7 +439,7 @@ vint16m2_t test_vluxei64_v_i16m2(const int16_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vluxei8_v_i32mf2(const int32_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -449,7 +449,7 @@ vint32mf2_t test_vluxei8_v_i32mf2(const int32_t *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vluxei8_v_i32m1(const int32_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -459,7 +459,7 @@ vint32m1_t test_vluxei8_v_i32m1(const int32_t *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vluxei8_v_i32m2(const int32_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -469,7 +469,7 @@ vint32m2_t test_vluxei8_v_i32m2(const int32_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vluxei8_v_i32m4(const int32_t *base, vuint8m1_t bindex, size_t vl) {
@@ -479,7 +479,7 @@ vint32m4_t test_vluxei8_v_i32m4(const int32_t *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vluxei8_v_i32m8(const int32_t *base, vuint8m2_t bindex, size_t vl) {
@@ -489,7 +489,7 @@ vint32m8_t test_vluxei8_v_i32m8(const int32_t *base, vuint8m2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei16_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vluxei16_v_i32mf2(const int32_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -499,7 +499,7 @@ vint32mf2_t test_vluxei16_v_i32mf2(const int32_t *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vluxei16_v_i32m1(const int32_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -509,7 +509,7 @@ vint32m1_t test_vluxei16_v_i32m1(const int32_t *base, vuint16mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei16_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vluxei16_v_i32m2(const int32_t *base, vuint16m1_t bindex, size_t vl) {
@@ -519,7 +519,7 @@ vint32m2_t test_vluxei16_v_i32m2(const int32_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vluxei16_v_i32m4(const int32_t *base, vuint16m2_t bindex, size_t vl) {
@@ -529,7 +529,7 @@ vint32m4_t test_vluxei16_v_i32m4(const int32_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vluxei16_v_i32m8(const int32_t *base, vuint16m4_t bindex, size_t vl) {
@@ -539,7 +539,7 @@ vint32m8_t test_vluxei16_v_i32m8(const int32_t *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vluxei32_v_i32mf2(const int32_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -549,7 +549,7 @@ vint32mf2_t test_vluxei32_v_i32mf2(const int32_t *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei32_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vluxei32_v_i32m1(const int32_t *base, vuint32m1_t bindex, size_t vl) {
@@ -559,7 +559,7 @@ vint32m1_t test_vluxei32_v_i32m1(const int32_t *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vluxei32_v_i32m2(const int32_t *base, vuint32m2_t bindex, size_t vl) {
@@ -569,7 +569,7 @@ vint32m2_t test_vluxei32_v_i32m2(const int32_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vluxei32_v_i32m4(const int32_t *base, vuint32m4_t bindex, size_t vl) {
@@ -579,7 +579,7 @@ vint32m4_t test_vluxei32_v_i32m4(const int32_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vint32m8_t test_vluxei32_v_i32m8(const int32_t *base, vuint32m8_t bindex, size_t vl) {
@@ -589,7 +589,7 @@ vint32m8_t test_vluxei32_v_i32m8(const int32_t *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vint32mf2_t test_vluxei64_v_i32mf2(const int32_t *base, vuint64m1_t bindex, size_t vl) {
@@ -599,7 +599,7 @@ vint32mf2_t test_vluxei64_v_i32mf2(const int32_t *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_i32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vint32m1_t test_vluxei64_v_i32m1(const int32_t *base, vuint64m2_t bindex, size_t vl) {
@@ -609,7 +609,7 @@ vint32m1_t test_vluxei64_v_i32m1(const int32_t *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vint32m2_t test_vluxei64_v_i32m2(const int32_t *base, vuint64m4_t bindex, size_t vl) {
@@ -619,7 +619,7 @@ vint32m2_t test_vluxei64_v_i32m2(const int32_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vint32m4_t test_vluxei64_v_i32m4(const int32_t *base, vuint64m8_t bindex, size_t vl) {
@@ -629,7 +629,7 @@ vint32m4_t test_vluxei64_v_i32m4(const int32_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vluxei8_v_i64m1(const int64_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -639,7 +639,7 @@ vint64m1_t test_vluxei8_v_i64m1(const int64_t *base, vuint8mf8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vluxei8_v_i64m2(const int64_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -649,7 +649,7 @@ vint64m2_t test_vluxei8_v_i64m2(const int64_t *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vluxei8_v_i64m4(const int64_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -659,7 +659,7 @@ vint64m4_t test_vluxei8_v_i64m4(const int64_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vluxei8_v_i64m8(const int64_t *base, vuint8m1_t bindex, size_t vl) {
@@ -669,7 +669,7 @@ vint64m8_t test_vluxei8_v_i64m8(const int64_t *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei16_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vluxei16_v_i64m1(const int64_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -679,7 +679,7 @@ vint64m1_t test_vluxei16_v_i64m1(const int64_t *base, vuint16mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei16_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vluxei16_v_i64m2(const int64_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -689,7 +689,7 @@ vint64m2_t test_vluxei16_v_i64m2(const int64_t *base, vuint16mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei16_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vluxei16_v_i64m4(const int64_t *base, vuint16m1_t bindex, size_t vl) {
@@ -699,7 +699,7 @@ vint64m4_t test_vluxei16_v_i64m4(const int64_t *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vluxei16_v_i64m8(const int64_t *base, vuint16m2_t bindex, size_t vl) {
@@ -709,7 +709,7 @@ vint64m8_t test_vluxei16_v_i64m8(const int64_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vluxei32_v_i64m1(const int64_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -719,7 +719,7 @@ vint64m1_t test_vluxei32_v_i64m1(const int64_t *base, vuint32mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei32_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vluxei32_v_i64m2(const int64_t *base, vuint32m1_t bindex, size_t vl) {
@@ -729,7 +729,7 @@ vint64m2_t test_vluxei32_v_i64m2(const int64_t *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vluxei32_v_i64m4(const int64_t *base, vuint32m2_t bindex, size_t vl) {
@@ -739,7 +739,7 @@ vint64m4_t test_vluxei32_v_i64m4(const int64_t *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vluxei32_v_i64m8(const int64_t *base, vuint32m4_t bindex, size_t vl) {
@@ -749,7 +749,7 @@ vint64m8_t test_vluxei32_v_i64m8(const int64_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vint64m1_t test_vluxei64_v_i64m1(const int64_t *base, vuint64m1_t bindex, size_t vl) {
@@ -759,7 +759,7 @@ vint64m1_t test_vluxei64_v_i64m1(const int64_t *base, vuint64m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vint64m2_t test_vluxei64_v_i64m2(const int64_t *base, vuint64m2_t bindex, size_t vl) {
@@ -769,7 +769,7 @@ vint64m2_t test_vluxei64_v_i64m2(const int64_t *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vint64m4_t test_vluxei64_v_i64m4(const int64_t *base, vuint64m4_t bindex, size_t vl) {
@@ -779,7 +779,7 @@ vint64m4_t test_vluxei64_v_i64m4(const int64_t *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_i64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vint64m8_t test_vluxei64_v_i64m8(const int64_t *base, vuint64m8_t bindex, size_t vl) {
@@ -789,7 +789,7 @@ vint64m8_t test_vluxei64_v_i64m8(const int64_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vluxei8_v_u8mf8(const uint8_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -799,7 +799,7 @@ vuint8mf8_t test_vluxei8_v_u8mf8(const uint8_t *base, vuint8mf8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vluxei8_v_u8mf4(const uint8_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -809,7 +809,7 @@ vuint8mf4_t test_vluxei8_v_u8mf4(const uint8_t *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vluxei8_v_u8mf2(const uint8_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -819,7 +819,7 @@ vuint8mf2_t test_vluxei8_v_u8mf2(const uint8_t *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vluxei8_v_u8m1(const uint8_t *base, vuint8m1_t bindex, size_t vl) {
@@ -829,7 +829,7 @@ vuint8m1_t test_vluxei8_v_u8m1(const uint8_t *base, vuint8m1_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vluxei8_v_u8m2(const uint8_t *base, vuint8m2_t bindex, size_t vl) {
@@ -839,7 +839,7 @@ vuint8m2_t test_vluxei8_v_u8m2(const uint8_t *base, vuint8m2_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vuint8m4_t test_vluxei8_v_u8m4(const uint8_t *base, vuint8m4_t bindex, size_t vl) {
@@ -849,7 +849,7 @@ vuint8m4_t test_vluxei8_v_u8m4(const uint8_t *base, vuint8m4_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei8_v_u8m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 64 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8.i64(<vscale x 64 x i8> undef, <vscale x 64 x i8>* [[TMP0]], <vscale x 64 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 64 x i8> [[TMP1]]
 //
 vuint8m8_t test_vluxei8_v_u8m8(const uint8_t *base, vuint8m8_t bindex, size_t vl) {
@@ -859,7 +859,7 @@ vuint8m8_t test_vluxei8_v_u8m8(const uint8_t *base, vuint8m8_t bindex, size_t vl
 // CHECK-RV64-LABEL: @test_vluxei16_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vluxei16_v_u8mf8(const uint8_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -869,7 +869,7 @@ vuint8mf8_t test_vluxei16_v_u8mf8(const uint8_t *base, vuint16mf4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vluxei16_v_u8mf4(const uint8_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -879,7 +879,7 @@ vuint8mf4_t test_vluxei16_v_u8mf4(const uint8_t *base, vuint16mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vluxei16_v_u8mf2(const uint8_t *base, vuint16m1_t bindex, size_t vl) {
@@ -889,7 +889,7 @@ vuint8mf2_t test_vluxei16_v_u8mf2(const uint8_t *base, vuint16m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei16_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vluxei16_v_u8m1(const uint8_t *base, vuint16m2_t bindex, size_t vl) {
@@ -899,7 +899,7 @@ vuint8m1_t test_vluxei16_v_u8m1(const uint8_t *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vluxei16_v_u8m2(const uint8_t *base, vuint16m4_t bindex, size_t vl) {
@@ -909,7 +909,7 @@ vuint8m2_t test_vluxei16_v_u8m2(const uint8_t *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_u8m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 32 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16.i64(<vscale x 32 x i8> undef, <vscale x 32 x i8>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i8> [[TMP1]]
 //
 vuint8m4_t test_vluxei16_v_u8m4(const uint8_t *base, vuint16m8_t bindex, size_t vl) {
@@ -919,7 +919,7 @@ vuint8m4_t test_vluxei16_v_u8m4(const uint8_t *base, vuint16m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vluxei32_v_u8mf8(const uint8_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -929,7 +929,7 @@ vuint8mf8_t test_vluxei32_v_u8mf8(const uint8_t *base, vuint32mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vluxei32_v_u8mf4(const uint8_t *base, vuint32m1_t bindex, size_t vl) {
@@ -939,7 +939,7 @@ vuint8mf4_t test_vluxei32_v_u8mf4(const uint8_t *base, vuint32m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei32_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vluxei32_v_u8mf2(const uint8_t *base, vuint32m2_t bindex, size_t vl) {
@@ -949,7 +949,7 @@ vuint8mf2_t test_vluxei32_v_u8mf2(const uint8_t *base, vuint32m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei32_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vluxei32_v_u8m1(const uint8_t *base, vuint32m4_t bindex, size_t vl) {
@@ -959,7 +959,7 @@ vuint8m1_t test_vluxei32_v_u8m1(const uint8_t *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_u8m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 16 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32.i64(<vscale x 16 x i8> undef, <vscale x 16 x i8>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
 //
 vuint8m2_t test_vluxei32_v_u8m2(const uint8_t *base, vuint32m8_t bindex, size_t vl) {
@@ -969,7 +969,7 @@ vuint8m2_t test_vluxei32_v_u8m2(const uint8_t *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_u8mf8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 1 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64.i64(<vscale x 1 x i8> undef, <vscale x 1 x i8>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i8> [[TMP1]]
 //
 vuint8mf8_t test_vluxei64_v_u8mf8(const uint8_t *base, vuint64m1_t bindex, size_t vl) {
@@ -979,7 +979,7 @@ vuint8mf8_t test_vluxei64_v_u8mf8(const uint8_t *base, vuint64m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_u8mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 2 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64.i64(<vscale x 2 x i8> undef, <vscale x 2 x i8>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i8> [[TMP1]]
 //
 vuint8mf4_t test_vluxei64_v_u8mf4(const uint8_t *base, vuint64m2_t bindex, size_t vl) {
@@ -989,7 +989,7 @@ vuint8mf4_t test_vluxei64_v_u8mf4(const uint8_t *base, vuint64m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_u8mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 4 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64.i64(<vscale x 4 x i8> undef, <vscale x 4 x i8>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i8> [[TMP1]]
 //
 vuint8mf2_t test_vluxei64_v_u8mf2(const uint8_t *base, vuint64m4_t bindex, size_t vl) {
@@ -999,7 +999,7 @@ vuint8mf2_t test_vluxei64_v_u8mf2(const uint8_t *base, vuint64m4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_u8m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i8* [[BASE:%.*]] to <vscale x 8 x i8>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64.i64(<vscale x 8 x i8> undef, <vscale x 8 x i8>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i8> [[TMP1]]
 //
 vuint8m1_t test_vluxei64_v_u8m1(const uint8_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1009,7 +1009,7 @@ vuint8m1_t test_vluxei64_v_u8m1(const uint8_t *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vluxei8_v_u16mf4(const uint16_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -1019,7 +1019,7 @@ vuint16mf4_t test_vluxei8_v_u16mf4(const uint16_t *base, vuint8mf8_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei8_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vluxei8_v_u16mf2(const uint16_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -1029,7 +1029,7 @@ vuint16mf2_t test_vluxei8_v_u16mf2(const uint16_t *base, vuint8mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei8_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vluxei8_v_u16m1(const uint16_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -1039,7 +1039,7 @@ vuint16m1_t test_vluxei8_v_u16m1(const uint16_t *base, vuint8mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vluxei8_v_u16m2(const uint16_t *base, vuint8m1_t bindex, size_t vl) {
@@ -1049,7 +1049,7 @@ vuint16m2_t test_vluxei8_v_u16m2(const uint16_t *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vluxei8_v_u16m4(const uint16_t *base, vuint8m2_t bindex, size_t vl) {
@@ -1059,7 +1059,7 @@ vuint16m4_t test_vluxei8_v_u16m4(const uint16_t *base, vuint8m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vuint16m8_t test_vluxei8_v_u16m8(const uint16_t *base, vuint8m4_t bindex, size_t vl) {
@@ -1069,7 +1069,7 @@ vuint16m8_t test_vluxei8_v_u16m8(const uint16_t *base, vuint8m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vluxei16_v_u16mf4(const uint16_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -1079,7 +1079,7 @@ vuint16mf4_t test_vluxei16_v_u16mf4(const uint16_t *base, vuint16mf4_t bindex, s
 // CHECK-RV64-LABEL: @test_vluxei16_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vluxei16_v_u16mf2(const uint16_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -1089,7 +1089,7 @@ vuint16mf2_t test_vluxei16_v_u16mf2(const uint16_t *base, vuint16mf2_t bindex, s
 // CHECK-RV64-LABEL: @test_vluxei16_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vluxei16_v_u16m1(const uint16_t *base, vuint16m1_t bindex, size_t vl) {
@@ -1099,7 +1099,7 @@ vuint16m1_t test_vluxei16_v_u16m1(const uint16_t *base, vuint16m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vluxei16_v_u16m2(const uint16_t *base, vuint16m2_t bindex, size_t vl) {
@@ -1109,7 +1109,7 @@ vuint16m2_t test_vluxei16_v_u16m2(const uint16_t *base, vuint16m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vluxei16_v_u16m4(const uint16_t *base, vuint16m4_t bindex, size_t vl) {
@@ -1119,7 +1119,7 @@ vuint16m4_t test_vluxei16_v_u16m4(const uint16_t *base, vuint16m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 32 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16.i64(<vscale x 32 x i16> undef, <vscale x 32 x i16>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x i16> [[TMP1]]
 //
 vuint16m8_t test_vluxei16_v_u16m8(const uint16_t *base, vuint16m8_t bindex, size_t vl) {
@@ -1129,7 +1129,7 @@ vuint16m8_t test_vluxei16_v_u16m8(const uint16_t *base, vuint16m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vluxei32_v_u16mf4(const uint16_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -1139,7 +1139,7 @@ vuint16mf4_t test_vluxei32_v_u16mf4(const uint16_t *base, vuint32mf2_t bindex, s
 // CHECK-RV64-LABEL: @test_vluxei32_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vluxei32_v_u16mf2(const uint16_t *base, vuint32m1_t bindex, size_t vl) {
@@ -1149,7 +1149,7 @@ vuint16mf2_t test_vluxei32_v_u16mf2(const uint16_t *base, vuint32m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei32_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vluxei32_v_u16m1(const uint16_t *base, vuint32m2_t bindex, size_t vl) {
@@ -1159,7 +1159,7 @@ vuint16m1_t test_vluxei32_v_u16m1(const uint16_t *base, vuint32m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vluxei32_v_u16m2(const uint16_t *base, vuint32m4_t bindex, size_t vl) {
@@ -1169,7 +1169,7 @@ vuint16m2_t test_vluxei32_v_u16m2(const uint16_t *base, vuint32m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 16 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32.i64(<vscale x 16 x i16> undef, <vscale x 16 x i16>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i16> [[TMP1]]
 //
 vuint16m4_t test_vluxei32_v_u16m4(const uint16_t *base, vuint32m8_t bindex, size_t vl) {
@@ -1179,7 +1179,7 @@ vuint16m4_t test_vluxei32_v_u16m4(const uint16_t *base, vuint32m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 1 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64.i64(<vscale x 1 x i16> undef, <vscale x 1 x i16>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i16> [[TMP1]]
 //
 vuint16mf4_t test_vluxei64_v_u16mf4(const uint16_t *base, vuint64m1_t bindex, size_t vl) {
@@ -1189,7 +1189,7 @@ vuint16mf4_t test_vluxei64_v_u16mf4(const uint16_t *base, vuint64m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei64_v_u16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 2 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64.i64(<vscale x 2 x i16> undef, <vscale x 2 x i16>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i16> [[TMP1]]
 //
 vuint16mf2_t test_vluxei64_v_u16mf2(const uint16_t *base, vuint64m2_t bindex, size_t vl) {
@@ -1199,7 +1199,7 @@ vuint16mf2_t test_vluxei64_v_u16mf2(const uint16_t *base, vuint64m2_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei64_v_u16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 4 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64.i64(<vscale x 4 x i16> undef, <vscale x 4 x i16>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i16> [[TMP1]]
 //
 vuint16m1_t test_vluxei64_v_u16m1(const uint16_t *base, vuint64m4_t bindex, size_t vl) {
@@ -1209,7 +1209,7 @@ vuint16m1_t test_vluxei64_v_u16m1(const uint16_t *base, vuint64m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i16* [[BASE:%.*]] to <vscale x 8 x i16>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64.i64(<vscale x 8 x i16> undef, <vscale x 8 x i16>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
 //
 vuint16m2_t test_vluxei64_v_u16m2(const uint16_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1219,7 +1219,7 @@ vuint16m2_t test_vluxei64_v_u16m2(const uint16_t *base, vuint64m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei8_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vluxei8_v_u32mf2(const uint32_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -1229,7 +1229,7 @@ vuint32mf2_t test_vluxei8_v_u32mf2(const uint32_t *base, vuint8mf8_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei8_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vluxei8_v_u32m1(const uint32_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -1239,7 +1239,7 @@ vuint32m1_t test_vluxei8_v_u32m1(const uint32_t *base, vuint8mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vluxei8_v_u32m2(const uint32_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -1249,7 +1249,7 @@ vuint32m2_t test_vluxei8_v_u32m2(const uint32_t *base, vuint8mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vluxei8_v_u32m4(const uint32_t *base, vuint8m1_t bindex, size_t vl) {
@@ -1259,7 +1259,7 @@ vuint32m4_t test_vluxei8_v_u32m4(const uint32_t *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vluxei8_v_u32m8(const uint32_t *base, vuint8m2_t bindex, size_t vl) {
@@ -1269,7 +1269,7 @@ vuint32m8_t test_vluxei8_v_u32m8(const uint32_t *base, vuint8m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vluxei16_v_u32mf2(const uint32_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -1279,7 +1279,7 @@ vuint32mf2_t test_vluxei16_v_u32mf2(const uint32_t *base, vuint16mf4_t bindex, s
 // CHECK-RV64-LABEL: @test_vluxei16_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vluxei16_v_u32m1(const uint32_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -1289,7 +1289,7 @@ vuint32m1_t test_vluxei16_v_u32m1(const uint32_t *base, vuint16mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vluxei16_v_u32m2(const uint32_t *base, vuint16m1_t bindex, size_t vl) {
@@ -1299,7 +1299,7 @@ vuint32m2_t test_vluxei16_v_u32m2(const uint32_t *base, vuint16m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vluxei16_v_u32m4(const uint32_t *base, vuint16m2_t bindex, size_t vl) {
@@ -1309,7 +1309,7 @@ vuint32m4_t test_vluxei16_v_u32m4(const uint32_t *base, vuint16m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vluxei16_v_u32m8(const uint32_t *base, vuint16m4_t bindex, size_t vl) {
@@ -1319,7 +1319,7 @@ vuint32m8_t test_vluxei16_v_u32m8(const uint32_t *base, vuint16m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vluxei32_v_u32mf2(const uint32_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -1329,7 +1329,7 @@ vuint32mf2_t test_vluxei32_v_u32mf2(const uint32_t *base, vuint32mf2_t bindex, s
 // CHECK-RV64-LABEL: @test_vluxei32_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vluxei32_v_u32m1(const uint32_t *base, vuint32m1_t bindex, size_t vl) {
@@ -1339,7 +1339,7 @@ vuint32m1_t test_vluxei32_v_u32m1(const uint32_t *base, vuint32m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vluxei32_v_u32m2(const uint32_t *base, vuint32m2_t bindex, size_t vl) {
@@ -1349,7 +1349,7 @@ vuint32m2_t test_vluxei32_v_u32m2(const uint32_t *base, vuint32m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vluxei32_v_u32m4(const uint32_t *base, vuint32m4_t bindex, size_t vl) {
@@ -1359,7 +1359,7 @@ vuint32m4_t test_vluxei32_v_u32m4(const uint32_t *base, vuint32m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 16 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x i32> [[TMP1]]
 //
 vuint32m8_t test_vluxei32_v_u32m8(const uint32_t *base, vuint32m8_t bindex, size_t vl) {
@@ -1369,7 +1369,7 @@ vuint32m8_t test_vluxei32_v_u32m8(const uint32_t *base, vuint32m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 1 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i32> [[TMP1]]
 //
 vuint32mf2_t test_vluxei64_v_u32mf2(const uint32_t *base, vuint64m1_t bindex, size_t vl) {
@@ -1379,7 +1379,7 @@ vuint32mf2_t test_vluxei64_v_u32mf2(const uint32_t *base, vuint64m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei64_v_u32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 2 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i32> [[TMP1]]
 //
 vuint32m1_t test_vluxei64_v_u32m1(const uint32_t *base, vuint64m2_t bindex, size_t vl) {
@@ -1389,7 +1389,7 @@ vuint32m1_t test_vluxei64_v_u32m1(const uint32_t *base, vuint64m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 4 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
 //
 vuint32m2_t test_vluxei64_v_u32m2(const uint32_t *base, vuint64m4_t bindex, size_t vl) {
@@ -1399,7 +1399,7 @@ vuint32m2_t test_vluxei64_v_u32m2(const uint32_t *base, vuint64m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i32* [[BASE:%.*]] to <vscale x 8 x i32>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64.i64(<vscale x 8 x i32> undef, <vscale x 8 x i32>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i32> [[TMP1]]
 //
 vuint32m4_t test_vluxei64_v_u32m4(const uint32_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1409,7 +1409,7 @@ vuint32m4_t test_vluxei64_v_u32m4(const uint32_t *base, vuint64m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei8_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vluxei8_v_u64m1(const uint64_t *base, vuint8mf8_t bindex, size_t vl) {
@@ -1419,7 +1419,7 @@ vuint64m1_t test_vluxei8_v_u64m1(const uint64_t *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vluxei8_v_u64m2(const uint64_t *base, vuint8mf4_t bindex, size_t vl) {
@@ -1429,7 +1429,7 @@ vuint64m2_t test_vluxei8_v_u64m2(const uint64_t *base, vuint8mf4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vluxei8_v_u64m4(const uint64_t *base, vuint8mf2_t bindex, size_t vl) {
@@ -1439,7 +1439,7 @@ vuint64m4_t test_vluxei8_v_u64m4(const uint64_t *base, vuint8mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vluxei8_v_u64m8(const uint64_t *base, vuint8m1_t bindex, size_t vl) {
@@ -1449,7 +1449,7 @@ vuint64m8_t test_vluxei8_v_u64m8(const uint64_t *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vluxei16_v_u64m1(const uint64_t *base, vuint16mf4_t bindex, size_t vl) {
@@ -1459,7 +1459,7 @@ vuint64m1_t test_vluxei16_v_u64m1(const uint64_t *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vluxei16_v_u64m2(const uint64_t *base, vuint16mf2_t bindex, size_t vl) {
@@ -1469,7 +1469,7 @@ vuint64m2_t test_vluxei16_v_u64m2(const uint64_t *base, vuint16mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vluxei16_v_u64m4(const uint64_t *base, vuint16m1_t bindex, size_t vl) {
@@ -1479,7 +1479,7 @@ vuint64m4_t test_vluxei16_v_u64m4(const uint64_t *base, vuint16m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vluxei16_v_u64m8(const uint64_t *base, vuint16m2_t bindex, size_t vl) {
@@ -1489,7 +1489,7 @@ vuint64m8_t test_vluxei16_v_u64m8(const uint64_t *base, vuint16m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vluxei32_v_u64m1(const uint64_t *base, vuint32mf2_t bindex, size_t vl) {
@@ -1499,7 +1499,7 @@ vuint64m1_t test_vluxei32_v_u64m1(const uint64_t *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei32_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vluxei32_v_u64m2(const uint64_t *base, vuint32m1_t bindex, size_t vl) {
@@ -1509,7 +1509,7 @@ vuint64m2_t test_vluxei32_v_u64m2(const uint64_t *base, vuint32m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vluxei32_v_u64m4(const uint64_t *base, vuint32m2_t bindex, size_t vl) {
@@ -1519,7 +1519,7 @@ vuint64m4_t test_vluxei32_v_u64m4(const uint64_t *base, vuint32m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vluxei32_v_u64m8(const uint64_t *base, vuint32m4_t bindex, size_t vl) {
@@ -1529,7 +1529,7 @@ vuint64m8_t test_vluxei32_v_u64m8(const uint64_t *base, vuint32m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 1 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x i64> [[TMP1]]
 //
 vuint64m1_t test_vluxei64_v_u64m1(const uint64_t *base, vuint64m1_t bindex, size_t vl) {
@@ -1539,7 +1539,7 @@ vuint64m1_t test_vluxei64_v_u64m1(const uint64_t *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 2 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i64.i64(<vscale x 2 x i64> undef, <vscale x 2 x i64>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
 //
 vuint64m2_t test_vluxei64_v_u64m2(const uint64_t *base, vuint64m2_t bindex, size_t vl) {
@@ -1549,7 +1549,7 @@ vuint64m2_t test_vluxei64_v_u64m2(const uint64_t *base, vuint64m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 4 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i64.i64(<vscale x 4 x i64> undef, <vscale x 4 x i64>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x i64> [[TMP1]]
 //
 vuint64m4_t test_vluxei64_v_u64m4(const uint64_t *base, vuint64m4_t bindex, size_t vl) {
@@ -1559,7 +1559,7 @@ vuint64m4_t test_vluxei64_v_u64m4(const uint64_t *base, vuint64m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_u64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast i64* [[BASE:%.*]] to <vscale x 8 x i64>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i64.i64(<vscale x 8 x i64> undef, <vscale x 8 x i64>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x i64> [[TMP1]]
 //
 vuint64m8_t test_vluxei64_v_u64m8(const uint64_t *base, vuint64m8_t bindex, size_t vl) {
@@ -1569,7 +1569,7 @@ vuint64m8_t test_vluxei64_v_u64m8(const uint64_t *base, vuint64m8_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei8_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i8.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i8.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vluxei8_v_f32mf2(const float *base, vuint8mf8_t bindex, size_t vl) {
@@ -1579,7 +1579,7 @@ vfloat32mf2_t test_vluxei8_v_f32mf2(const float *base, vuint8mf8_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei8_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i8.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i8.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vluxei8_v_f32m1(const float *base, vuint8mf4_t bindex, size_t vl) {
@@ -1589,7 +1589,7 @@ vfloat32m1_t test_vluxei8_v_f32m1(const float *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i8.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i8.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vluxei8_v_f32m2(const float *base, vuint8mf2_t bindex, size_t vl) {
@@ -1599,7 +1599,7 @@ vfloat32m2_t test_vluxei8_v_f32m2(const float *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vluxei8_v_f32m4(const float *base, vuint8m1_t bindex, size_t vl) {
@@ -1609,7 +1609,7 @@ vfloat32m4_t test_vluxei8_v_f32m4(const float *base, vuint8m1_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei8_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8.i64(<vscale x 16 x float>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vluxei8_v_f32m8(const float *base, vuint8m2_t bindex, size_t vl) {
@@ -1619,7 +1619,7 @@ vfloat32m8_t test_vluxei8_v_f32m8(const float *base, vuint8m2_t bindex, size_t v
 // CHECK-RV64-LABEL: @test_vluxei16_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i16.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i16.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vluxei16_v_f32mf2(const float *base, vuint16mf4_t bindex, size_t vl) {
@@ -1629,7 +1629,7 @@ vfloat32mf2_t test_vluxei16_v_f32mf2(const float *base, vuint16mf4_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei16_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i16.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i16.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vluxei16_v_f32m1(const float *base, vuint16mf2_t bindex, size_t vl) {
@@ -1639,7 +1639,7 @@ vfloat32m1_t test_vluxei16_v_f32m1(const float *base, vuint16mf2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei16_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vluxei16_v_f32m2(const float *base, vuint16m1_t bindex, size_t vl) {
@@ -1649,7 +1649,7 @@ vfloat32m2_t test_vluxei16_v_f32m2(const float *base, vuint16m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vluxei16_v_f32m4(const float *base, vuint16m2_t bindex, size_t vl) {
@@ -1659,7 +1659,7 @@ vfloat32m4_t test_vluxei16_v_f32m4(const float *base, vuint16m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16.i64(<vscale x 16 x float>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vluxei16_v_f32m8(const float *base, vuint16m4_t bindex, size_t vl) {
@@ -1669,7 +1669,7 @@ vfloat32m8_t test_vluxei16_v_f32m8(const float *base, vuint16m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i32.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i32.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vluxei32_v_f32mf2(const float *base, vuint32mf2_t bindex, size_t vl) {
@@ -1679,7 +1679,7 @@ vfloat32mf2_t test_vluxei32_v_f32mf2(const float *base, vuint32mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei32_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i32.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i32.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vluxei32_v_f32m1(const float *base, vuint32m1_t bindex, size_t vl) {
@@ -1689,7 +1689,7 @@ vfloat32m1_t test_vluxei32_v_f32m1(const float *base, vuint32m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i32.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i32.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vluxei32_v_f32m2(const float *base, vuint32m2_t bindex, size_t vl) {
@@ -1699,7 +1699,7 @@ vfloat32m2_t test_vluxei32_v_f32m2(const float *base, vuint32m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i32.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i32.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vluxei32_v_f32m4(const float *base, vuint32m4_t bindex, size_t vl) {
@@ -1709,7 +1709,7 @@ vfloat32m4_t test_vluxei32_v_f32m4(const float *base, vuint32m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei32_v_f32m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 16 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i32.i64(<vscale x 16 x float>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i32.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP1]]
 //
 vfloat32m8_t test_vluxei32_v_f32m8(const float *base, vuint32m8_t bindex, size_t vl) {
@@ -1719,7 +1719,7 @@ vfloat32m8_t test_vluxei32_v_f32m8(const float *base, vuint32m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_f32mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 1 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i64.i64(<vscale x 1 x float>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i64.i64(<vscale x 1 x float> undef, <vscale x 1 x float>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP1]]
 //
 vfloat32mf2_t test_vluxei64_v_f32mf2(const float *base, vuint64m1_t bindex, size_t vl) {
@@ -1729,7 +1729,7 @@ vfloat32mf2_t test_vluxei64_v_f32mf2(const float *base, vuint64m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei64_v_f32m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 2 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i64.i64(<vscale x 2 x float>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i64.i64(<vscale x 2 x float> undef, <vscale x 2 x float>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP1]]
 //
 vfloat32m1_t test_vluxei64_v_f32m1(const float *base, vuint64m2_t bindex, size_t vl) {
@@ -1739,7 +1739,7 @@ vfloat32m1_t test_vluxei64_v_f32m1(const float *base, vuint64m2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_f32m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 4 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i64.i64(<vscale x 4 x float>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i64.i64(<vscale x 4 x float> undef, <vscale x 4 x float>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP1]]
 //
 vfloat32m2_t test_vluxei64_v_f32m2(const float *base, vuint64m4_t bindex, size_t vl) {
@@ -1749,7 +1749,7 @@ vfloat32m2_t test_vluxei64_v_f32m2(const float *base, vuint64m4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei64_v_f32m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast float* [[BASE:%.*]] to <vscale x 8 x float>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i64.i64(<vscale x 8 x float>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i64.i64(<vscale x 8 x float> undef, <vscale x 8 x float>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP1]]
 //
 vfloat32m4_t test_vluxei64_v_f32m4(const float *base, vuint64m8_t bindex, size_t vl) {
@@ -1759,7 +1759,7 @@ vfloat32m4_t test_vluxei64_v_f32m4(const float *base, vuint64m8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i8.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i8.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vluxei8_v_f64m1(const double *base, vuint8mf8_t bindex, size_t vl) {
@@ -1769,7 +1769,7 @@ vfloat64m1_t test_vluxei8_v_f64m1(const double *base, vuint8mf8_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i8.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i8.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vluxei8_v_f64m2(const double *base, vuint8mf4_t bindex, size_t vl) {
@@ -1779,7 +1779,7 @@ vfloat64m2_t test_vluxei8_v_f64m2(const double *base, vuint8mf4_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i8.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i8.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vluxei8_v_f64m4(const double *base, vuint8mf2_t bindex, size_t vl) {
@@ -1789,7 +1789,7 @@ vfloat64m4_t test_vluxei8_v_f64m4(const double *base, vuint8mf2_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei8_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vluxei8_v_f64m8(const double *base, vuint8m1_t bindex, size_t vl) {
@@ -1799,7 +1799,7 @@ vfloat64m8_t test_vluxei8_v_f64m8(const double *base, vuint8m1_t bindex, size_t
 // CHECK-RV64-LABEL: @test_vluxei16_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i16.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i16.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vluxei16_v_f64m1(const double *base, vuint16mf4_t bindex, size_t vl) {
@@ -1809,7 +1809,7 @@ vfloat64m1_t test_vluxei16_v_f64m1(const double *base, vuint16mf4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i16.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i16.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vluxei16_v_f64m2(const double *base, vuint16mf2_t bindex, size_t vl) {
@@ -1819,7 +1819,7 @@ vfloat64m2_t test_vluxei16_v_f64m2(const double *base, vuint16mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vluxei16_v_f64m4(const double *base, vuint16m1_t bindex, size_t vl) {
@@ -1829,7 +1829,7 @@ vfloat64m4_t test_vluxei16_v_f64m4(const double *base, vuint16m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei16_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vluxei16_v_f64m8(const double *base, vuint16m2_t bindex, size_t vl) {
@@ -1839,7 +1839,7 @@ vfloat64m8_t test_vluxei16_v_f64m8(const double *base, vuint16m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei32_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i32.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i32.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vluxei32_v_f64m1(const double *base, vuint32mf2_t bindex, size_t vl) {
@@ -1849,7 +1849,7 @@ vfloat64m1_t test_vluxei32_v_f64m1(const double *base, vuint32mf2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei32_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vluxei32_v_f64m2(const double *base, vuint32m1_t bindex, size_t vl) {
@@ -1859,7 +1859,7 @@ vfloat64m2_t test_vluxei32_v_f64m2(const double *base, vuint32m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei32_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vluxei32_v_f64m4(const double *base, vuint32m2_t bindex, size_t vl) {
@@ -1869,7 +1869,7 @@ vfloat64m4_t test_vluxei32_v_f64m4(const double *base, vuint32m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei32_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vluxei32_v_f64m8(const double *base, vuint32m4_t bindex, size_t vl) {
@@ -1879,7 +1879,7 @@ vfloat64m8_t test_vluxei32_v_f64m8(const double *base, vuint32m4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_f64m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 1 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i64.i64(<vscale x 1 x double>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i64.i64(<vscale x 1 x double> undef, <vscale x 1 x double>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x double> [[TMP1]]
 //
 vfloat64m1_t test_vluxei64_v_f64m1(const double *base, vuint64m1_t bindex, size_t vl) {
@@ -1889,7 +1889,7 @@ vfloat64m1_t test_vluxei64_v_f64m1(const double *base, vuint64m1_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_f64m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 2 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i64.i64(<vscale x 2 x double>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i64.i64(<vscale x 2 x double> undef, <vscale x 2 x double>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x double> [[TMP1]]
 //
 vfloat64m2_t test_vluxei64_v_f64m2(const double *base, vuint64m2_t bindex, size_t vl) {
@@ -1899,7 +1899,7 @@ vfloat64m2_t test_vluxei64_v_f64m2(const double *base, vuint64m2_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_f64m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 4 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i64.i64(<vscale x 4 x double>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i64.i64(<vscale x 4 x double> undef, <vscale x 4 x double>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x double> [[TMP1]]
 //
 vfloat64m4_t test_vluxei64_v_f64m4(const double *base, vuint64m4_t bindex, size_t vl) {
@@ -1909,7 +1909,7 @@ vfloat64m4_t test_vluxei64_v_f64m4(const double *base, vuint64m4_t bindex, size_
 // CHECK-RV64-LABEL: @test_vluxei64_v_f64m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast double* [[BASE:%.*]] to <vscale x 8 x double>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i64.i64(<vscale x 8 x double>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i64.i64(<vscale x 8 x double> undef, <vscale x 8 x double>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x double> [[TMP1]]
 //
 vfloat64m8_t test_vluxei64_v_f64m8(const double *base, vuint64m8_t bindex, size_t vl) {
@@ -3829,7 +3829,7 @@ vfloat64m8_t test_vluxei64_v_f64m8_m(vbool8_t mask, vfloat64m8_t maskedoff, cons
 // CHECK-RV64-LABEL: @test_vluxei8_v_f16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 1 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i8.i64(<vscale x 1 x half>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i8.i64(<vscale x 1 x half> undef, <vscale x 1 x half>* [[TMP0]], <vscale x 1 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP1]]
 //
 vfloat16mf4_t test_vluxei8_v_f16mf4 (const _Float16 *base, vuint8mf8_t bindex, size_t vl) {
@@ -3839,7 +3839,7 @@ vfloat16mf4_t test_vluxei8_v_f16mf4 (const _Float16 *base, vuint8mf8_t bindex, s
 // CHECK-RV64-LABEL: @test_vluxei8_v_f16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 2 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i8.i64(<vscale x 2 x half>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i8.i64(<vscale x 2 x half> undef, <vscale x 2 x half>* [[TMP0]], <vscale x 2 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP1]]
 //
 vfloat16mf2_t test_vluxei8_v_f16mf2 (const _Float16 *base, vuint8mf4_t bindex, size_t vl) {
@@ -3849,7 +3849,7 @@ vfloat16mf2_t test_vluxei8_v_f16mf2 (const _Float16 *base, vuint8mf4_t bindex, s
 // CHECK-RV64-LABEL: @test_vluxei8_v_f16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 4 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i8.i64(<vscale x 4 x half>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i8.i64(<vscale x 4 x half> undef, <vscale x 4 x half>* [[TMP0]], <vscale x 4 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP1]]
 //
 vfloat16m1_t test_vluxei8_v_f16m1 (const _Float16 *base, vuint8mf2_t bindex, size_t vl) {
@@ -3859,7 +3859,7 @@ vfloat16m1_t test_vluxei8_v_f16m1 (const _Float16 *base, vuint8mf2_t bindex, siz
 // CHECK-RV64-LABEL: @test_vluxei8_v_f16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i8.i64(<vscale x 8 x half>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i8.i64(<vscale x 8 x half> undef, <vscale x 8 x half>* [[TMP0]], <vscale x 8 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
 vfloat16m2_t test_vluxei8_v_f16m2 (const _Float16 *base, vuint8m1_t bindex, size_t vl) {
@@ -3869,7 +3869,7 @@ vfloat16m2_t test_vluxei8_v_f16m2 (const _Float16 *base, vuint8m1_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei8_v_f16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 16 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i8.i64(<vscale x 16 x half>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i8.i64(<vscale x 16 x half> undef, <vscale x 16 x half>* [[TMP0]], <vscale x 16 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP1]]
 //
 vfloat16m4_t test_vluxei8_v_f16m4 (const _Float16 *base, vuint8m2_t bindex, size_t vl) {
@@ -3879,7 +3879,7 @@ vfloat16m4_t test_vluxei8_v_f16m4 (const _Float16 *base, vuint8m2_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei8_v_f16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 32 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i8.i64(<vscale x 32 x half>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i8.i64(<vscale x 32 x half> undef, <vscale x 32 x half>* [[TMP0]], <vscale x 32 x i8> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP1]]
 //
 vfloat16m8_t test_vluxei8_v_f16m8 (const _Float16 *base, vuint8m4_t bindex, size_t vl) {
@@ -3889,7 +3889,7 @@ vfloat16m8_t test_vluxei8_v_f16m8 (const _Float16 *base, vuint8m4_t bindex, size
 // CHECK-RV64-LABEL: @test_vluxei16_v_f16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 1 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i16.i64(<vscale x 1 x half>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i16.i64(<vscale x 1 x half> undef, <vscale x 1 x half>* [[TMP0]], <vscale x 1 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP1]]
 //
 vfloat16mf4_t test_vluxei16_v_f16mf4 (const _Float16 *base, vuint16mf4_t bindex, size_t vl) {
@@ -3899,7 +3899,7 @@ vfloat16mf4_t test_vluxei16_v_f16mf4 (const _Float16 *base, vuint16mf4_t bindex,
 // CHECK-RV64-LABEL: @test_vluxei16_v_f16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 2 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i16.i64(<vscale x 2 x half>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i16.i64(<vscale x 2 x half> undef, <vscale x 2 x half>* [[TMP0]], <vscale x 2 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP1]]
 //
 vfloat16mf2_t test_vluxei16_v_f16mf2 (const _Float16 *base, vuint16mf2_t bindex, size_t vl) {
@@ -3909,7 +3909,7 @@ vfloat16mf2_t test_vluxei16_v_f16mf2 (const _Float16 *base, vuint16mf2_t bindex,
 // CHECK-RV64-LABEL: @test_vluxei16_v_f16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 4 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i16.i64(<vscale x 4 x half>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i16.i64(<vscale x 4 x half> undef, <vscale x 4 x half>* [[TMP0]], <vscale x 4 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP1]]
 //
 vfloat16m1_t test_vluxei16_v_f16m1 (const _Float16 *base, vuint16m1_t bindex, size_t vl) {
@@ -3919,7 +3919,7 @@ vfloat16m1_t test_vluxei16_v_f16m1 (const _Float16 *base, vuint16m1_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei16_v_f16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i16.i64(<vscale x 8 x half>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i16.i64(<vscale x 8 x half> undef, <vscale x 8 x half>* [[TMP0]], <vscale x 8 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
 vfloat16m2_t test_vluxei16_v_f16m2 (const _Float16 *base, vuint16m2_t bindex, size_t vl) {
@@ -3929,7 +3929,7 @@ vfloat16m2_t test_vluxei16_v_f16m2 (const _Float16 *base, vuint16m2_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei16_v_f16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 16 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i16.i64(<vscale x 16 x half>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i16.i64(<vscale x 16 x half> undef, <vscale x 16 x half>* [[TMP0]], <vscale x 16 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP1]]
 //
 vfloat16m4_t test_vluxei16_v_f16m4 (const _Float16 *base, vuint16m4_t bindex, size_t vl) {
@@ -3939,7 +3939,7 @@ vfloat16m4_t test_vluxei16_v_f16m4 (const _Float16 *base, vuint16m4_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei16_v_f16m8(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 32 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i16.i64(<vscale x 32 x half>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i16.i64(<vscale x 32 x half> undef, <vscale x 32 x half>* [[TMP0]], <vscale x 32 x i16> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 32 x half> [[TMP1]]
 //
 vfloat16m8_t test_vluxei16_v_f16m8 (const _Float16 *base, vuint16m8_t bindex, size_t vl) {
@@ -3949,7 +3949,7 @@ vfloat16m8_t test_vluxei16_v_f16m8 (const _Float16 *base, vuint16m8_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei32_v_f16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 1 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i32.i64(<vscale x 1 x half>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i32.i64(<vscale x 1 x half> undef, <vscale x 1 x half>* [[TMP0]], <vscale x 1 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP1]]
 //
 vfloat16mf4_t test_vluxei32_v_f16mf4 (const _Float16 *base, vuint32mf2_t bindex, size_t vl) {
@@ -3959,7 +3959,7 @@ vfloat16mf4_t test_vluxei32_v_f16mf4 (const _Float16 *base, vuint32mf2_t bindex,
 // CHECK-RV64-LABEL: @test_vluxei32_v_f16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 2 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i32.i64(<vscale x 2 x half>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i32.i64(<vscale x 2 x half> undef, <vscale x 2 x half>* [[TMP0]], <vscale x 2 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP1]]
 //
 vfloat16mf2_t test_vluxei32_v_f16mf2 (const _Float16 *base, vuint32m1_t bindex, size_t vl) {
@@ -3969,7 +3969,7 @@ vfloat16mf2_t test_vluxei32_v_f16mf2 (const _Float16 *base, vuint32m1_t bindex,
 // CHECK-RV64-LABEL: @test_vluxei32_v_f16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 4 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i32.i64(<vscale x 4 x half>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i32.i64(<vscale x 4 x half> undef, <vscale x 4 x half>* [[TMP0]], <vscale x 4 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP1]]
 //
 vfloat16m1_t test_vluxei32_v_f16m1 (const _Float16 *base, vuint32m2_t bindex, size_t vl) {
@@ -3979,7 +3979,7 @@ vfloat16m1_t test_vluxei32_v_f16m1 (const _Float16 *base, vuint32m2_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei32_v_f16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i32.i64(<vscale x 8 x half>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i32.i64(<vscale x 8 x half> undef, <vscale x 8 x half>* [[TMP0]], <vscale x 8 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
 vfloat16m2_t test_vluxei32_v_f16m2 (const _Float16 *base, vuint32m4_t bindex, size_t vl) {
@@ -3989,7 +3989,7 @@ vfloat16m2_t test_vluxei32_v_f16m2 (const _Float16 *base, vuint32m4_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei32_v_f16m4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 16 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i32.i64(<vscale x 16 x half>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i32.i64(<vscale x 16 x half> undef, <vscale x 16 x half>* [[TMP0]], <vscale x 16 x i32> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 16 x half> [[TMP1]]
 //
 vfloat16m4_t test_vluxei32_v_f16m4 (const _Float16 *base, vuint32m8_t bindex, size_t vl) {
@@ -3999,7 +3999,7 @@ vfloat16m4_t test_vluxei32_v_f16m4 (const _Float16 *base, vuint32m8_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei64_v_f16mf4(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 1 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i64.i64(<vscale x 1 x half>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i64.i64(<vscale x 1 x half> undef, <vscale x 1 x half>* [[TMP0]], <vscale x 1 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 1 x half> [[TMP1]]
 //
 vfloat16mf4_t test_vluxei64_v_f16mf4 (const _Float16 *base, vuint64m1_t bindex, size_t vl) {
@@ -4009,7 +4009,7 @@ vfloat16mf4_t test_vluxei64_v_f16mf4 (const _Float16 *base, vuint64m1_t bindex,
 // CHECK-RV64-LABEL: @test_vluxei64_v_f16mf2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 2 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i64.i64(<vscale x 2 x half>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i64.i64(<vscale x 2 x half> undef, <vscale x 2 x half>* [[TMP0]], <vscale x 2 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 2 x half> [[TMP1]]
 //
 vfloat16mf2_t test_vluxei64_v_f16mf2 (const _Float16 *base, vuint64m2_t bindex, size_t vl) {
@@ -4019,7 +4019,7 @@ vfloat16mf2_t test_vluxei64_v_f16mf2 (const _Float16 *base, vuint64m2_t bindex,
 // CHECK-RV64-LABEL: @test_vluxei64_v_f16m1(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 4 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i64.i64(<vscale x 4 x half>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i64.i64(<vscale x 4 x half> undef, <vscale x 4 x half>* [[TMP0]], <vscale x 4 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 4 x half> [[TMP1]]
 //
 vfloat16m1_t test_vluxei64_v_f16m1 (const _Float16 *base, vuint64m4_t bindex, size_t vl) {
@@ -4029,7 +4029,7 @@ vfloat16m1_t test_vluxei64_v_f16m1 (const _Float16 *base, vuint64m4_t bindex, si
 // CHECK-RV64-LABEL: @test_vluxei64_v_f16m2(
 // CHECK-RV64-NEXT:  entry:
 // CHECK-RV64-NEXT:    [[TMP0:%.*]] = bitcast half* [[BASE:%.*]] to <vscale x 8 x half>*
-// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i64.i64(<vscale x 8 x half>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
+// CHECK-RV64-NEXT:    [[TMP1:%.*]] = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i64.i64(<vscale x 8 x half> undef, <vscale x 8 x half>* [[TMP0]], <vscale x 8 x i64> [[BINDEX:%.*]], i64 [[VL:%.*]])
 // CHECK-RV64-NEXT:    ret <vscale x 8 x half> [[TMP1]]
 //
 vfloat16m2_t test_vluxei64_v_f16m2 (const _Float16 *base, vuint64m8_t bindex, size_t vl) {
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 99dd152fc0fc5..fc697f5c71e85 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -175,26 +175,37 @@ let TargetPrefix = "riscv" in {
                                            ImmArg<ArgIndex<0>>,
                                            ImmArg<ArgIndex<1>>]>;
 
-  // For unit stride load
+  // For unit stride mask load
   // Input: (pointer, vl)
-  class RISCVUSLoad
+  class RISCVUSMLoad
         : Intrinsic<[llvm_anyvector_ty],
                     [LLVMPointerType<LLVMMatchType<0>>,
                      llvm_anyint_ty],
                     [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic {
     let VLOperand = 1;
   }
+  // For unit stride load
+  // Input: (passthru, pointer, vl)
+  class RISCVUSLoad
+        : Intrinsic<[llvm_anyvector_ty],
+                    [LLVMMatchType<0>,
+                     LLVMPointerType<LLVMMatchType<0>>,
+                     llvm_anyint_ty],
+                    [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic {
+    let VLOperand = 2;
+  }
   // For unit stride fault-only-first load
-  // Input: (pointer, vl)
+  // Input: (passthru, pointer, vl)
   // Output: (data, vl)
   // NOTE: We model this with default memory properties since we model writing
   // VL as a side effect. IntrReadMem, IntrHasSideEffects does not work.
   class RISCVUSLoadFF
         : Intrinsic<[llvm_anyvector_ty, llvm_anyint_ty],
-                    [LLVMPointerType<LLVMMatchType<0>>, LLVMMatchType<1>],
-                    [NoCapture<ArgIndex<0>>]>,
+                    [LLVMMatchType<0>,
+                     LLVMPointerType<LLVMMatchType<0>>, LLVMMatchType<1>],
+                    [NoCapture<ArgIndex<1>>]>,
                     RISCVVIntrinsic {
-    let VLOperand = 1;
+    let VLOperand = 2;
   }
   // For unit stride load with mask
   // Input: (maskedoff, pointer, mask, vl, ta)
@@ -222,14 +233,15 @@ let TargetPrefix = "riscv" in {
                     [NoCapture<ArgIndex<1>>, ImmArg<ArgIndex<4>>]>, RISCVVIntrinsic {
     let VLOperand = 3;
   }
-  // For strided load
-  // Input: (pointer, stride, vl)
+  // For strided load with passthru operand
+  // Input: (passthru, pointer, stride, vl)
   class RISCVSLoad
         : Intrinsic<[llvm_anyvector_ty],
-                    [LLVMPointerType<LLVMMatchType<0>>,
+                    [LLVMMatchType<0>,
+                     LLVMPointerType<LLVMMatchType<0>>,
                      llvm_anyint_ty, LLVMMatchType<1>],
-                    [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic {
-    let VLOperand = 2;
+                    [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic {
+    let VLOperand = 3;
   }
   // For strided load with mask
   // Input: (maskedoff, pointer, stride, mask, vl, ta)
@@ -243,14 +255,15 @@ let TargetPrefix = "riscv" in {
                     RISCVVIntrinsic {
     let VLOperand = 4;
   }
-  // For indexed load
-  // Input: (pointer, index, vl)
+  // For indexed load with passthru operand
+  // Input: (passthru, pointer, index, vl)
   class RISCVILoad
         : Intrinsic<[llvm_anyvector_ty],
-                    [LLVMPointerType<LLVMMatchType<0>>,
+                    [LLVMMatchType<0>,
+                     LLVMPointerType<LLVMMatchType<0>>,
                      llvm_anyvector_ty, llvm_anyint_ty],
-                    [NoCapture<ArgIndex<0>>, IntrReadMem]>, RISCVVIntrinsic {
-    let VLOperand = 2;
+                    [NoCapture<ArgIndex<1>>, IntrReadMem]>, RISCVVIntrinsic {
+    let VLOperand = 3;
   }
   // For indexed load with mask
   // Input: (maskedoff, pointer, index, mask, vl, ta)
@@ -1124,7 +1137,7 @@ let TargetPrefix = "riscv" in {
   defm vsoxei : RISCVIStore;
   defm vsuxei : RISCVIStore;
 
-  def int_riscv_vlm : RISCVUSLoad;
+  def int_riscv_vlm : RISCVUSMLoad;
   def int_riscv_vsm : RISCVUSStore;
 
   defm vadd : RISCVBinaryAAX;
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index df4e955ef583b..5870502d74d5f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -86,8 +86,12 @@ void RISCVDAGToDAGISel::PreprocessISelDAG() {
     SDVTList VTs = CurDAG->getVTList({VT, MVT::Other});
     SDValue IntID =
         CurDAG->getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
-    SDValue Ops[] = {Chain, IntID, StackSlot,
-                     CurDAG->getRegister(RISCV::X0, MVT::i64), VL};
+    SDValue Ops[] = {Chain,
+                     IntID,
+                     CurDAG->getUNDEF(VT),
+                     StackSlot,
+                     CurDAG->getRegister(RISCV::X0, MVT::i64),
+                     VL};
 
     SDValue Result = CurDAG->getMemIntrinsicNode(
         ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64, MPI, Align(8),
@@ -1210,9 +1214,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
 
       unsigned CurOp = 2;
+      // Masked intrinsic only have TU version pseduo instructions.
+      bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
       SmallVector<SDValue, 8> Operands;
-      if (IsMasked)
+      if (IsTU)
         Operands.push_back(Node->getOperand(CurOp++));
+      else
+        // Skip the undef passthru operand for nomask TA version pseudo
+        CurOp++;
 
       MVT IndexVT;
       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
@@ -1230,7 +1239,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
                            "values when XLEN=32");
       }
       const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo(
-          IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
+          IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
           static_cast<unsigned>(IndexLMUL));
       MachineSDNode *Load =
           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
@@ -1255,16 +1264,25 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
 
       unsigned CurOp = 2;
+      // The riscv_vlm intrinsic are always tail agnostic and no passthru operand.
+      bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm;
+      // Masked intrinsic only have TU version pseduo instructions.
+      bool IsTU =
+          HasPassthruOperand &&
+          ((!IsMasked && !Node->getOperand(CurOp).isUndef()) || IsMasked);
       SmallVector<SDValue, 8> Operands;
-      if (IsMasked)
+      if (IsTU)
         Operands.push_back(Node->getOperand(CurOp++));
+      else if (HasPassthruOperand)
+        // Skip the undef passthru operand for nomask TA version pseudo
+        CurOp++;
 
       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided,
                                  Operands, /*IsLoad=*/true);
 
       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
       const RISCV::VLEPseudo *P =
-          RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW,
+          RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW,
                               static_cast<unsigned>(LMUL));
       MachineSDNode *Load =
           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
@@ -1283,9 +1301,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
       unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits());
 
       unsigned CurOp = 2;
+      // Masked intrinsic only have TU version pseduo instructions.
+      bool IsTU = IsMasked || (!IsMasked && !Node->getOperand(CurOp).isUndef());
       SmallVector<SDValue, 7> Operands;
-      if (IsMasked)
+      if (IsTU)
         Operands.push_back(Node->getOperand(CurOp++));
+      else
+        // Skip the undef passthru operand for nomask TA version pseudo
+        CurOp++;
 
       addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked,
                                  /*IsStridedOrIndexed*/ false, Operands,
@@ -1293,8 +1316,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
 
       RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
       const RISCV::VLEPseudo *P =
-          RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true, Log2SEW,
-                              static_cast<unsigned>(LMUL));
+          RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true,
+                              Log2SEW, static_cast<unsigned>(LMUL));
       MachineSDNode *Load =
           CurDAG->getMachineNode(P->Pseudo, DL, Node->getValueType(0),
                                  MVT::Other, MVT::Glue, Operands);
@@ -1424,8 +1447,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
                            "values when XLEN=32");
       }
       const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo(
-          IsMasked, IsOrdered, IndexLog2EEW, static_cast<unsigned>(LMUL),
-          static_cast<unsigned>(IndexLMUL));
+          IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW,
+          static_cast<unsigned>(LMUL), static_cast<unsigned>(IndexLMUL));
       MachineSDNode *Store =
           CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
 
@@ -1622,8 +1645,8 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
 
     RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
     const RISCV::VLEPseudo *P = RISCV::getVLEPseudo(
-        /*IsMasked*/ false, /*IsStrided*/ true, /*FF*/ false, Log2SEW,
-        static_cast<unsigned>(LMUL));
+        /*IsMasked*/ false, /*IsTU*/ false, /*IsStrided*/ true, /*FF*/ false,
+        Log2SEW, static_cast<unsigned>(LMUL));
     MachineSDNode *Load =
         CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands);
 
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index f4d6fdddca390..c429a9298739f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -161,6 +161,7 @@ struct VSXSEGPseudo {
 
 struct VLEPseudo {
   uint16_t Masked : 1;
+  uint16_t IsTU : 1;
   uint16_t Strided : 1;
   uint16_t FF : 1;
   uint16_t Log2SEW : 3;
@@ -178,6 +179,7 @@ struct VSEPseudo {
 
 struct VLX_VSXPseudo {
   uint16_t Masked : 1;
+  uint16_t IsTU : 1;
   uint16_t Ordered : 1;
   uint16_t Log2SEW : 3;
   uint16_t LMUL : 3;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 483d0abc8ad76..b831fdcb1d477 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2452,8 +2452,12 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
           SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
           SDValue IntID =
               DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
-          SDValue Ops[] = {Ld->getChain(), IntID, NewAddr,
-                           DAG.getRegister(RISCV::X0, XLenVT), VL};
+          SDValue Ops[] = {Ld->getChain(),
+                           IntID,
+                           DAG.getUNDEF(ContainerVT),
+                           NewAddr,
+                           DAG.getRegister(RISCV::X0, XLenVT),
+                           VL};
           SDValue NewLoad = DAG.getMemIntrinsicNode(
               ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
               DAG.getMachineFunction().getMachineMemOperand(
@@ -4574,7 +4578,9 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
 
     auto *Load = cast<MemIntrinsicSDNode>(Op);
     SmallVector<SDValue, 8> Ops{Load->getChain(), IntID};
-    if (!IsUnmasked)
+    if (IsUnmasked)
+      Ops.push_back(DAG.getUNDEF(ContainerVT));
+    else
       Ops.push_back(PassThru);
     Ops.push_back(Op.getOperand(3)); // Ptr
     Ops.push_back(Op.getOperand(4)); // Stride
@@ -5432,7 +5438,9 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op,
   unsigned IntID =
       IsUnmasked ? Intrinsic::riscv_vle : Intrinsic::riscv_vle_mask;
   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
-  if (!IsUnmasked)
+  if (IsUnmasked)
+    Ops.push_back(DAG.getUNDEF(ContainerVT));
+  else
     Ops.push_back(PassThru);
   Ops.push_back(BasePtr);
   if (!IsUnmasked)
@@ -5813,7 +5821,9 @@ SDValue RISCVTargetLowering::lowerMaskedGather(SDValue Op,
   unsigned IntID =
       IsUnmasked ? Intrinsic::riscv_vluxei : Intrinsic::riscv_vluxei_mask;
   SmallVector<SDValue, 8> Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)};
-  if (!IsUnmasked)
+  if (IsUnmasked)
+    Ops.push_back(DAG.getUNDEF(ContainerVT));
+  else
     Ops.push_back(PassThru);
   Ops.push_back(BasePtr);
   Ops.push_back(Index);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 3d4864003b518..4e7e251bc4120 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -424,8 +424,9 @@ def RISCVVIntrinsicsTable : GenericTable {
   let PrimaryKeyName = "getRISCVVIntrinsicInfo";
 }
 
-class RISCVVLE<bit M, bit Str, bit F, bits<3> S, bits<3> L> {
+class RISCVVLE<bit M, bit TU, bit Str, bit F, bits<3> S, bits<3> L> {
   bits<1> Masked = M;
+  bits<1> IsTU = TU;
   bits<1> Strided = Str;
   bits<1> FF = F;
   bits<3> Log2SEW = S;
@@ -436,8 +437,8 @@ class RISCVVLE<bit M, bit Str, bit F, bits<3> S, bits<3> L> {
 def RISCVVLETable : GenericTable {
   let FilterClass = "RISCVVLE";
   let CppTypeName = "VLEPseudo";
-  let Fields = ["Masked", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"];
-  let PrimaryKey = ["Masked", "Strided", "FF", "Log2SEW", "LMUL"];
+  let Fields = ["Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"];
+  let PrimaryKey = ["Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL"];
   let PrimaryKeyName = "getVLEPseudo";
 }
 
@@ -457,8 +458,9 @@ def RISCVVSETable : GenericTable {
   let PrimaryKeyName = "getVSEPseudo";
 }
 
-class RISCVVLX_VSX<bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> {
+class RISCVVLX_VSX<bit M, bit TU, bit O, bits<3> S, bits<3> L, bits<3> IL> {
   bits<1> Masked = M;
+  bits<1> IsTU = TU;
   bits<1> Ordered = O;
   bits<3> Log2SEW = S;
   bits<3> LMUL = L;
@@ -466,15 +468,15 @@ class RISCVVLX_VSX<bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> {
   Pseudo Pseudo = !cast<Pseudo>(NAME);
 }
 
-class RISCVVLX<bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> :
-  RISCVVLX_VSX<M, O, S, L, IL>;
+class RISCVVLX<bit M, bit TU, bit O, bits<3> S, bits<3> L, bits<3> IL> :
+  RISCVVLX_VSX<M, TU, O, S, L, IL>;
 class RISCVVSX<bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> :
-  RISCVVLX_VSX<M, O, S, L, IL>;
+  RISCVVLX_VSX<M, /*TU*/0, O, S, L, IL>;
 
 class RISCVVLX_VSXTable : GenericTable {
   let CppTypeName = "VLX_VSXPseudo";
-  let Fields = ["Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"];
-  let PrimaryKey = ["Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"];
+  let Fields = ["Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"];
+  let PrimaryKey = ["Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"];
 }
 
 def RISCVVLXTable : RISCVVLX_VSXTable {
@@ -629,7 +631,7 @@ class VPseudoUSLoadNoMask<VReg RetClass, int EEW, bit isFF> :
       Pseudo<(outs RetClass:$rd),
              (ins GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
       RISCVVPseudo,
-      RISCVVLE</*Masked*/0, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
+      RISCVVLE</*Masked*/0, /*TU*/0, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
@@ -639,13 +641,29 @@ class VPseudoUSLoadNoMask<VReg RetClass, int EEW, bit isFF> :
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
+class VPseudoUSLoadNoMaskTU<VReg RetClass, int EEW, bit isFF> :
+      Pseudo<(outs RetClass:$rd),
+             (ins RetClass:$dest, GPR:$rs1, AVL:$vl, ixlenimm:$sew),[]>,
+      RISCVVPseudo,
+      RISCVVLE</*Masked*/0, /*TU*/1, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
+  let mayLoad = 1;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let HasVLOp = 1;
+  let HasSEWOp = 1;
+  let HasDummyMask = 1;
+  let HasMergeOp = 1;
+  let Constraints = "$rd = $dest";
+  let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
 class VPseudoUSLoadMask<VReg RetClass, int EEW, bit isFF> :
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
               (ins GetVRegNoV0<RetClass>.R:$merge,
                    GPR:$rs1,
                    VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
       RISCVVPseudo,
-      RISCVVLE</*Masked*/1, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
+      RISCVVLE</*Masked*/1, /*TU*/1, /*Strided*/0, /*FF*/isFF, log2<EEW>.val, VLMul> {
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
@@ -661,13 +679,29 @@ class VPseudoSLoadNoMask<VReg RetClass, int EEW>:
       Pseudo<(outs RetClass:$rd),
              (ins GPR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
       RISCVVPseudo,
-      RISCVVLE</*Masked*/0, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
+      RISCVVLE</*Masked*/0, /*TU*/0, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
+  let mayLoad = 1;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let HasVLOp = 1;
+  let HasSEWOp = 1;
+  let HasDummyMask = 1;
+  let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
+class VPseudoSLoadNoMaskTU<VReg RetClass, int EEW>:
+      Pseudo<(outs RetClass:$rd),
+             (ins RetClass:$dest, GPR:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
+      RISCVVPseudo,
+      RISCVVLE</*Masked*/0, /*TU*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
   let HasVLOp = 1;
   let HasSEWOp = 1;
   let HasDummyMask = 1;
+  let HasMergeOp = 1;
+  let Constraints = "$rd = $dest";
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
@@ -677,7 +711,7 @@ class VPseudoSLoadMask<VReg RetClass, int EEW>:
                    GPR:$rs1, GPR:$rs2,
                    VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
       RISCVVPseudo,
-      RISCVVLE</*Masked*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
+      RISCVVLE</*Masked*/1, /*TU*/1, /*Strided*/1, /*FF*/0, log2<EEW>.val, VLMul> {
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
@@ -692,9 +726,10 @@ class VPseudoSLoadMask<VReg RetClass, int EEW>:
 class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
                          bit Ordered, bit EarlyClobber>:
       Pseudo<(outs RetClass:$rd),
-             (ins GPR:$rs1, IdxClass:$rs2, AVL:$vl, ixlenimm:$sew),[]>,
+             (ins GPR:$rs1, IdxClass:$rs2, AVL:$vl,
+              ixlenimm:$sew),[]>,
       RISCVVPseudo,
-      RISCVVLX</*Masked*/0, Ordered, log2<EEW>.val, VLMul, LMUL> {
+      RISCVVLX</*Masked*/0, /*TU*/0, Ordered, log2<EEW>.val, VLMul, LMUL> {
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
@@ -705,6 +740,24 @@ class VPseudoILoadNoMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
+class VPseudoILoadNoMaskTU<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
+                           bit Ordered, bit EarlyClobber>:
+      Pseudo<(outs RetClass:$rd),
+             (ins RetClass:$dest, GPR:$rs1, IdxClass:$rs2, AVL:$vl,
+              ixlenimm:$sew),[]>,
+      RISCVVPseudo,
+      RISCVVLX</*Masked*/0, /*TU*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
+  let mayLoad = 1;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let HasVLOp = 1;
+  let HasSEWOp = 1;
+  let HasDummyMask = 1;
+  let HasMergeOp = 1;
+  let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $dest", "$rd = $dest");
+  let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
 class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
                        bit Ordered, bit EarlyClobber>:
       Pseudo<(outs GetVRegNoV0<RetClass>.R:$rd),
@@ -712,7 +765,7 @@ class VPseudoILoadMask<VReg RetClass, VReg IdxClass, int EEW, bits<3> LMUL,
                    GPR:$rs1, IdxClass:$rs2,
                    VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>,
       RISCVVPseudo,
-      RISCVVLX</*Masked*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
+      RISCVVLX</*Masked*/1, /*TU*/1, Ordered, log2<EEW>.val, VLMul, LMUL> {
   let mayLoad = 1;
   let mayStore = 0;
   let hasSideEffects = 0;
@@ -1347,6 +1400,9 @@ multiclass VPseudoUSLoad {
         def "E" # eew # "_V_" # LInfo :
           VPseudoUSLoadNoMask<vreg, eew, false>,
           VLESched<eew>;
+        def "E" # eew # "_V_" # LInfo # "_TU":
+          VPseudoUSLoadNoMaskTU<vreg, eew, false>,
+          VLESched<eew>;
         def "E" # eew # "_V_" # LInfo # "_MASK" :
           VPseudoUSLoadMask<vreg, eew, false>,
           VLESched<eew>;
@@ -1364,6 +1420,9 @@ multiclass VPseudoFFLoad {
         def "E" # eew # "FF_V_" # LInfo :
           VPseudoUSLoadNoMask<vreg, eew, true>,
           VLFSched<eew>;
+        def "E" # eew # "FF_V_" # LInfo # "_TU":
+          VPseudoUSLoadNoMaskTU<vreg, eew, true>,
+          VLFSched<eew>;
         def "E" # eew # "FF_V_" # LInfo # "_MASK" :
           VPseudoUSLoadMask<vreg, eew, true>,
           VLFSched<eew>;
@@ -1388,6 +1447,8 @@ multiclass VPseudoSLoad {
       let VLMul = lmul.value in {
         def "E" # eew # "_V_" # LInfo : VPseudoSLoadNoMask<vreg, eew>,
                                         VLSSched<eew>;
+        def "E" # eew # "_V_" # LInfo # "_TU": VPseudoSLoadNoMaskTU<vreg, eew>,
+                                        VLSSched<eew>;
         def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSLoadMask<vreg, eew>,
                                                   VLSSched<eew>;
       }
@@ -1414,6 +1475,9 @@ multiclass VPseudoILoad<bit Ordered> {
             def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo :
               VPseudoILoadNoMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>,
               VLXSched<eew, Order>;
+            def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_TU":
+              VPseudoILoadNoMaskTU<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>,
+              VLXSched<eew, Order>;
             def "EI" # eew # "_V_" # IdxLInfo # "_" # LInfo # "_MASK" :
               VPseudoILoadMask<Vreg, IdxVreg, eew, idx_lmul.value, Ordered, HasConstraint>,
               VLXSched<eew, Order>;
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
index 1c3c219c13041..80425f42f55b2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-vsetvli-intrinsics.ll
@@ -95,7 +95,7 @@ define i32 @test_vsetvlimax_opt_e64m4() nounwind {
   ret i32 %vl
 }
 
-declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32>*, i32)
+declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32>, <vscale x 4 x i32>*, i32)
 
 ; Check that we remove the redundant vsetvli when followed by another operation
 define <vscale x 4 x i32> @redundant_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr) nounwind {
@@ -105,7 +105,7 @@ define <vscale x 4 x i32> @redundant_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr)
 ; CHECK-NEXT:    vle32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %vl = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)
-  %x = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32>* %ptr, i32 %vl)
+  %x = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32> undef, <vscale x 4 x i32>* %ptr, i32 %vl)
   ret <vscale x 4 x i32> %x
 }
 
@@ -122,6 +122,6 @@ define <vscale x 4 x i32> @repeated_vsetvli(i32 %avl, <vscale x 4 x i32>* %ptr)
 ; CHECK-NEXT:    ret
   %vl0 = call i32 @llvm.riscv.vsetvli.i32(i32 %avl, i32 2, i32 1)
   %vl1 = call i32 @llvm.riscv.vsetvli.i32(i32 %vl0, i32 2, i32 1)
-  %x = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32>* %ptr, i32 %vl1)
+  %x = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i32(<vscale x 4 x i32> undef, <vscale x 4 x i32>* %ptr, i32 %vl1)
   ret <vscale x 4 x i32> %x
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
index 26c3aeeba38fb..0dedab533e9d0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-vsetvli-intrinsics.ll
@@ -113,7 +113,7 @@ define i64 @test_vsetvlimax_opt_e64m4() nounwind {
   ret i64 %vl
 }
 
-declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>*, i64)
+declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>, <vscale x 4 x i32>*, i64)
 
 ; Check that we remove the redundant vsetvli when followed by another operation
 define <vscale x 4 x i32> @redundant_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr) nounwind {
@@ -123,7 +123,7 @@ define <vscale x 4 x i32> @redundant_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr)
 ; CHECK-NEXT:    vle32.v v8, (a1)
 ; CHECK-NEXT:    ret
   %vl = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)
-  %x = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>* %ptr, i64 %vl)
+  %x = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* %ptr, i64 %vl)
   ret <vscale x 4 x i32> %x
 }
 
@@ -140,6 +140,6 @@ define <vscale x 4 x i32> @repeated_vsetvli(i64 %avl, <vscale x 4 x i32>* %ptr)
 ; CHECK-NEXT:    ret
   %vl0 = call i64 @llvm.riscv.vsetvli.i64(i64 %avl, i64 2, i64 1)
   %vl1 = call i64 @llvm.riscv.vsetvli.i64(i64 %vl0, i64 2, i64 1)
-  %x = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32>* %ptr, i64 %vl1)
+  %x = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32.i64(<vscale x 4 x i32> undef, <vscale x 4 x i32>* %ptr, i64 %vl1)
   ret <vscale x 4 x i32> %x
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
index 8b75cc3981a63..bfcdee844ffaf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
@@ -168,7 +168,7 @@ entry:
   %arraydecay = getelementptr inbounds [4 x i32], [4 x i32]* %input, i64 0, i64 0
   %2 = load i64, i64* %vl, align 8
   %3 = bitcast i32* %arraydecay to <vscale x 16 x i32>*
-  %4 = call <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32.i64(<vscale x 16 x i32>* %3, i64 %2)
+  %4 = call <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32.i64(<vscale x 16 x i32> undef, <vscale x 16 x i32>* %3, i64 %2)
   store <vscale x 16 x i32> %4, <vscale x 16 x i32>* %v0, align 4
   store i32 1, i32* %x0, align 4
   store i32 1, i32* %x1, align 4
@@ -211,6 +211,6 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg)
 
 declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
 
-declare <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32.i64(<vscale x 16 x i32>* nocapture, i64)
+declare <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32.i64(<vscale x 16 x i32>, <vscale x 16 x i32>* nocapture, i64)
 
 attributes #0 = { noinline nounwind optnone "frame-pointer"="all" }
diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll
new file mode 100644
index 0000000000000..3d314b1779a3e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll
@@ -0,0 +1,120 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=RV32
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefix=RV64
+
+declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>*,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vle_v_tu_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8>* %1, iXLen %2) nounwind {
+; RV32-LABEL: intrinsic_vle_v_tu_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
+; RV32-NEXT:    vle8.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vle_v_tu_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
+; RV64-NEXT:    vle8.v v8, (a0)
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8>* %1,
+    iXLen %2)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vlse(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>*,
+  iXLen,
+  iXLen);
+
+
+define <vscale x 1 x i8> @intrinsic_vlse_v_tu(<vscale x 1 x i8> %0, <vscale x 1 x i8>* %1, iXLen %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vlse_v_tu:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a2, e8, mf8, tu, mu
+; RV32-NEXT:    vlse8.v v8, (a0), a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vlse_v_tu:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a2, e8, mf8, tu, mu
+; RV64-NEXT:    vlse8.v v8, (a0), a1
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vlse(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8>* %1,
+    iXLen %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
+
+declare { <vscale x 1 x i8>, iXLen } @llvm.riscv.vleff(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>*,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vleff_v_tu(<vscale x 1 x i8> %0, <vscale x 1 x i8>* %1, iXLen %2, iXLen* %3) nounwind {
+; RV32-LABEL: intrinsic_vleff_v_tu:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
+; RV32-NEXT:    vle8ff.v v8, (a0)
+; RV32-NEXT:    csrr a0, vl
+; RV32-NEXT:    sw a0, 0(a2)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vleff_v_tu:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
+; RV64-NEXT:    vle8ff.v v8, (a0)
+; RV64-NEXT:    csrr a0, vl
+; RV64-NEXT:    sd a0, 0(a2)
+; RV64-NEXT:    ret
+entry:
+  %a = call { <vscale x 1 x i8>, iXLen } @llvm.riscv.vleff(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8>* %1,
+    iXLen %2)
+  %b = extractvalue { <vscale x 1 x i8>, iXLen } %a, 0
+  %c = extractvalue { <vscale x 1 x i8>, iXLen } %a, 1
+  store iXLen %c, iXLen* %3
+  ret <vscale x 1 x i8> %b
+}
+
+declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8(
+  <vscale x 1 x i8>,
+  <vscale x 1 x i8>*,
+  <vscale x 1 x i8>,
+  iXLen);
+
+define <vscale x 1 x i8> @intrinsic_vloxei_v_tu_nxv1i8_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i8>* %1, <vscale x 1 x i8> %2, iXLen %3) nounwind {
+; RV32-LABEL: intrinsic_vloxei_v_tu_nxv1i8_nxv1i8:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
+; RV32-NEXT:    vloxei8.v v8, (a0), v9
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: intrinsic_vloxei_v_tu_nxv1i8_nxv1i8:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    vsetvli zero, a1, e8, mf8, tu, mu
+; RV64-NEXT:    vloxei8.v v8, (a0), v9
+; RV64-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8(
+    <vscale x 1 x i8> %0,
+    <vscale x 1 x i8>* %1,
+    <vscale x 1 x i8> %2,
+    iXLen %3)
+
+  ret <vscale x 1 x i8> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll
index 51521f31726da..a59500059fb1f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vle-rv32.ll
@@ -3,6 +3,7 @@
 ; RUN:   -mattr=+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   i32);
 
@@ -14,6 +15,7 @@ define <vscale x 1 x i64> @intrinsic_vle_v_nxv1i64_nxv1i64(<vscale x 1 x i64>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     i32 %1)
 
@@ -44,6 +46,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vle.nxv2i64(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   i32);
 
@@ -55,6 +58,7 @@ define <vscale x 2 x i64> @intrinsic_vle_v_nxv2i64_nxv2i64(<vscale x 2 x i64>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vle.nxv2i64(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     i32 %1)
 
@@ -85,6 +89,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vle.nxv4i64(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   i32);
 
@@ -96,6 +101,7 @@ define <vscale x 4 x i64> @intrinsic_vle_v_nxv4i64_nxv4i64(<vscale x 4 x i64>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vle.nxv4i64(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     i32 %1)
 
@@ -126,6 +132,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vle.nxv8i64(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   i32);
 
@@ -137,6 +144,7 @@ define <vscale x 8 x i64> @intrinsic_vle_v_nxv8i64_nxv8i64(<vscale x 8 x i64>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vle.nxv8i64(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     i32 %1)
 
@@ -167,6 +175,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vle.nxv1f64(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   i32);
 
@@ -178,6 +187,7 @@ define <vscale x 1 x double> @intrinsic_vle_v_nxv1f64_nxv1f64(<vscale x 1 x doub
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vle.nxv1f64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     i32 %1)
 
@@ -208,6 +218,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vle.nxv2f64(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   i32);
 
@@ -219,6 +230,7 @@ define <vscale x 2 x double> @intrinsic_vle_v_nxv2f64_nxv2f64(<vscale x 2 x doub
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vle.nxv2f64(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     i32 %1)
 
@@ -249,6 +261,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vle.nxv4f64(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   i32);
 
@@ -260,6 +273,7 @@ define <vscale x 4 x double> @intrinsic_vle_v_nxv4f64_nxv4f64(<vscale x 4 x doub
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vle.nxv4f64(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     i32 %1)
 
@@ -290,6 +304,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vle.nxv8f64(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   i32);
 
@@ -301,6 +316,7 @@ define <vscale x 8 x double> @intrinsic_vle_v_nxv8f64_nxv8f64(<vscale x 8 x doub
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vle.nxv8f64(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     i32 %1)
 
@@ -331,6 +347,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   i32);
 
@@ -342,6 +359,7 @@ define <vscale x 1 x i32> @intrinsic_vle_v_nxv1i32_nxv1i32(<vscale x 1 x i32>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     i32 %1)
 
@@ -372,6 +390,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   i32);
 
@@ -383,6 +402,7 @@ define <vscale x 2 x i32> @intrinsic_vle_v_nxv2i32_nxv2i32(<vscale x 2 x i32>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     i32 %1)
 
@@ -413,6 +433,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   i32);
 
@@ -424,6 +445,7 @@ define <vscale x 4 x i32> @intrinsic_vle_v_nxv4i32_nxv4i32(<vscale x 4 x i32>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     i32 %1)
 
@@ -454,6 +476,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vle.nxv8i32(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   i32);
 
@@ -465,6 +488,7 @@ define <vscale x 8 x i32> @intrinsic_vle_v_nxv8i32_nxv8i32(<vscale x 8 x i32>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vle.nxv8i32(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     i32 %1)
 
@@ -495,6 +519,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   i32);
 
@@ -506,6 +531,7 @@ define <vscale x 16 x i32> @intrinsic_vle_v_nxv16i32_nxv16i32(<vscale x 16 x i32
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     i32 %1)
 
@@ -536,6 +562,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vle.nxv1f32(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   i32);
 
@@ -547,6 +574,7 @@ define <vscale x 1 x float> @intrinsic_vle_v_nxv1f32_nxv1f32(<vscale x 1 x float
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vle.nxv1f32(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     i32 %1)
 
@@ -577,6 +605,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vle.nxv2f32(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   i32);
 
@@ -588,6 +617,7 @@ define <vscale x 2 x float> @intrinsic_vle_v_nxv2f32_nxv2f32(<vscale x 2 x float
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     i32 %1)
 
@@ -618,6 +648,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vle.nxv4f32(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   i32);
 
@@ -629,6 +660,7 @@ define <vscale x 4 x float> @intrinsic_vle_v_nxv4f32_nxv4f32(<vscale x 4 x float
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vle.nxv4f32(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     i32 %1)
 
@@ -659,6 +691,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vle.nxv8f32(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   i32);
 
@@ -670,6 +703,7 @@ define <vscale x 8 x float> @intrinsic_vle_v_nxv8f32_nxv8f32(<vscale x 8 x float
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vle.nxv8f32(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     i32 %1)
 
@@ -700,6 +734,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   i32);
 
@@ -711,6 +746,7 @@ define <vscale x 16 x float> @intrinsic_vle_v_nxv16f32_nxv16f32(<vscale x 16 x f
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     i32 %1)
 
@@ -741,6 +777,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vle.nxv1i16(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   i32);
 
@@ -752,6 +789,7 @@ define <vscale x 1 x i16> @intrinsic_vle_v_nxv1i16_nxv1i16(<vscale x 1 x i16>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vle.nxv1i16(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     i32 %1)
 
@@ -782,6 +820,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   i32);
 
@@ -793,6 +832,7 @@ define <vscale x 2 x i16> @intrinsic_vle_v_nxv2i16_nxv2i16(<vscale x 2 x i16>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     i32 %1)
 
@@ -823,6 +863,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vle.nxv4i16(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   i32);
 
@@ -834,6 +875,7 @@ define <vscale x 4 x i16> @intrinsic_vle_v_nxv4i16_nxv4i16(<vscale x 4 x i16>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vle.nxv4i16(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     i32 %1)
 
@@ -864,6 +906,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vle.nxv8i16(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   i32);
 
@@ -875,6 +918,7 @@ define <vscale x 8 x i16> @intrinsic_vle_v_nxv8i16_nxv8i16(<vscale x 8 x i16>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vle.nxv8i16(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     i32 %1)
 
@@ -905,6 +949,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vle.nxv16i16(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   i32);
 
@@ -916,6 +961,7 @@ define <vscale x 16 x i16> @intrinsic_vle_v_nxv16i16_nxv16i16(<vscale x 16 x i16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vle.nxv16i16(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     i32 %1)
 
@@ -946,6 +992,7 @@ entry:
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   i32);
 
@@ -957,6 +1004,7 @@ define <vscale x 32 x i16> @intrinsic_vle_v_nxv32i16_nxv32i16(<vscale x 32 x i16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     i32 %1)
 
@@ -987,6 +1035,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vle.nxv1f16(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   i32);
 
@@ -998,6 +1047,7 @@ define <vscale x 1 x half> @intrinsic_vle_v_nxv1f16_nxv1f16(<vscale x 1 x half>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vle.nxv1f16(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     i32 %1)
 
@@ -1028,6 +1078,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vle.nxv2f16(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   i32);
 
@@ -1039,6 +1090,7 @@ define <vscale x 2 x half> @intrinsic_vle_v_nxv2f16_nxv2f16(<vscale x 2 x half>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vle.nxv2f16(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     i32 %1)
 
@@ -1069,6 +1121,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vle.nxv4f16(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   i32);
 
@@ -1080,6 +1133,7 @@ define <vscale x 4 x half> @intrinsic_vle_v_nxv4f16_nxv4f16(<vscale x 4 x half>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vle.nxv4f16(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     i32 %1)
 
@@ -1110,6 +1164,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vle.nxv8f16(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   i32);
 
@@ -1121,6 +1176,7 @@ define <vscale x 8 x half> @intrinsic_vle_v_nxv8f16_nxv8f16(<vscale x 8 x half>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vle.nxv8f16(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     i32 %1)
 
@@ -1151,6 +1207,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vle.nxv16f16(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   i32);
 
@@ -1162,6 +1219,7 @@ define <vscale x 16 x half> @intrinsic_vle_v_nxv16f16_nxv16f16(<vscale x 16 x ha
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vle.nxv16f16(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     i32 %1)
 
@@ -1192,6 +1250,7 @@ entry:
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vle.nxv32f16(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   i32);
 
@@ -1203,6 +1262,7 @@ define <vscale x 32 x half> @intrinsic_vle_v_nxv32f16_nxv32f16(<vscale x 32 x ha
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vle.nxv32f16(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     i32 %1)
 
@@ -1233,6 +1293,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   i32);
 
@@ -1244,6 +1305,7 @@ define <vscale x 1 x i8> @intrinsic_vle_v_nxv1i8_nxv1i8(<vscale x 1 x i8>* %0, i
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     i32 %1)
 
@@ -1274,6 +1336,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vle.nxv2i8(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   i32);
 
@@ -1285,6 +1348,7 @@ define <vscale x 2 x i8> @intrinsic_vle_v_nxv2i8_nxv2i8(<vscale x 2 x i8>* %0, i
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vle.nxv2i8(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     i32 %1)
 
@@ -1315,6 +1379,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vle.nxv4i8(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   i32);
 
@@ -1326,6 +1391,7 @@ define <vscale x 4 x i8> @intrinsic_vle_v_nxv4i8_nxv4i8(<vscale x 4 x i8>* %0, i
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vle.nxv4i8(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     i32 %1)
 
@@ -1356,6 +1422,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   i32);
 
@@ -1367,6 +1434,7 @@ define <vscale x 8 x i8> @intrinsic_vle_v_nxv8i8_nxv8i8(<vscale x 8 x i8>* %0, i
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     i32 %1)
 
@@ -1397,6 +1465,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vle.nxv16i8(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   i32);
 
@@ -1408,6 +1477,7 @@ define <vscale x 16 x i8> @intrinsic_vle_v_nxv16i8_nxv16i8(<vscale x 16 x i8>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vle.nxv16i8(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     i32 %1)
 
@@ -1438,6 +1508,7 @@ entry:
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vle.nxv32i8(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   i32);
 
@@ -1449,6 +1520,7 @@ define <vscale x 32 x i8> @intrinsic_vle_v_nxv32i8_nxv32i8(<vscale x 32 x i8>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vle.nxv32i8(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     i32 %1)
 
@@ -1479,6 +1551,7 @@ entry:
 }
 
 declare <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8(
+  <vscale x 64 x i8>,
   <vscale x 64 x i8>*,
   i32);
 
@@ -1490,6 +1563,7 @@ define <vscale x 64 x i8> @intrinsic_vle_v_nxv64i8_nxv64i8(<vscale x 64 x i8>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8(
+    <vscale x 64 x i8> undef,
     <vscale x 64 x i8>* %0,
     i32 %1)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll
index 78e005648d746..65fe749edbc52 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vle-rv64.ll
@@ -3,6 +3,7 @@
 ; RUN:   -mattr=+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   i64);
 
@@ -14,6 +15,7 @@ define <vscale x 1 x i64> @intrinsic_vle_v_nxv1i64_nxv1i64(<vscale x 1 x i64>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     i64 %1)
 
@@ -44,6 +46,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vle.nxv2i64(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   i64);
 
@@ -55,6 +58,7 @@ define <vscale x 2 x i64> @intrinsic_vle_v_nxv2i64_nxv2i64(<vscale x 2 x i64>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vle.nxv2i64(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     i64 %1)
 
@@ -85,6 +89,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vle.nxv4i64(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   i64);
 
@@ -96,6 +101,7 @@ define <vscale x 4 x i64> @intrinsic_vle_v_nxv4i64_nxv4i64(<vscale x 4 x i64>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vle.nxv4i64(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     i64 %1)
 
@@ -126,6 +132,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vle.nxv8i64(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   i64);
 
@@ -137,6 +144,7 @@ define <vscale x 8 x i64> @intrinsic_vle_v_nxv8i64_nxv8i64(<vscale x 8 x i64>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vle.nxv8i64(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     i64 %1)
 
@@ -167,6 +175,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vle.nxv1f64(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   i64);
 
@@ -178,6 +187,7 @@ define <vscale x 1 x double> @intrinsic_vle_v_nxv1f64_nxv1f64(<vscale x 1 x doub
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vle.nxv1f64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     i64 %1)
 
@@ -208,6 +218,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vle.nxv2f64(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   i64);
 
@@ -219,6 +230,7 @@ define <vscale x 2 x double> @intrinsic_vle_v_nxv2f64_nxv2f64(<vscale x 2 x doub
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vle.nxv2f64(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     i64 %1)
 
@@ -249,6 +261,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vle.nxv4f64(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   i64);
 
@@ -260,6 +273,7 @@ define <vscale x 4 x double> @intrinsic_vle_v_nxv4f64_nxv4f64(<vscale x 4 x doub
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vle.nxv4f64(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     i64 %1)
 
@@ -290,6 +304,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vle.nxv8f64(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   i64);
 
@@ -301,6 +316,7 @@ define <vscale x 8 x double> @intrinsic_vle_v_nxv8f64_nxv8f64(<vscale x 8 x doub
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vle.nxv8f64(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     i64 %1)
 
@@ -331,6 +347,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   i64);
 
@@ -342,6 +359,7 @@ define <vscale x 1 x i32> @intrinsic_vle_v_nxv1i32_nxv1i32(<vscale x 1 x i32>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     i64 %1)
 
@@ -372,6 +390,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   i64);
 
@@ -383,6 +402,7 @@ define <vscale x 2 x i32> @intrinsic_vle_v_nxv2i32_nxv2i32(<vscale x 2 x i32>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     i64 %1)
 
@@ -413,6 +433,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   i64);
 
@@ -424,6 +445,7 @@ define <vscale x 4 x i32> @intrinsic_vle_v_nxv4i32_nxv4i32(<vscale x 4 x i32>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vle.nxv4i32(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     i64 %1)
 
@@ -454,6 +476,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vle.nxv8i32(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   i64);
 
@@ -465,6 +488,7 @@ define <vscale x 8 x i32> @intrinsic_vle_v_nxv8i32_nxv8i32(<vscale x 8 x i32>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vle.nxv8i32(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     i64 %1)
 
@@ -495,6 +519,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   i64);
 
@@ -506,6 +531,7 @@ define <vscale x 16 x i32> @intrinsic_vle_v_nxv16i32_nxv16i32(<vscale x 16 x i32
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vle.nxv16i32(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     i64 %1)
 
@@ -536,6 +562,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vle.nxv1f32(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   i64);
 
@@ -547,6 +574,7 @@ define <vscale x 1 x float> @intrinsic_vle_v_nxv1f32_nxv1f32(<vscale x 1 x float
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vle.nxv1f32(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     i64 %1)
 
@@ -577,6 +605,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vle.nxv2f32(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   i64);
 
@@ -588,6 +617,7 @@ define <vscale x 2 x float> @intrinsic_vle_v_nxv2f32_nxv2f32(<vscale x 2 x float
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vle.nxv2f32(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     i64 %1)
 
@@ -618,6 +648,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vle.nxv4f32(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   i64);
 
@@ -629,6 +660,7 @@ define <vscale x 4 x float> @intrinsic_vle_v_nxv4f32_nxv4f32(<vscale x 4 x float
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vle.nxv4f32(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     i64 %1)
 
@@ -659,6 +691,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vle.nxv8f32(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   i64);
 
@@ -670,6 +703,7 @@ define <vscale x 8 x float> @intrinsic_vle_v_nxv8f32_nxv8f32(<vscale x 8 x float
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vle.nxv8f32(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     i64 %1)
 
@@ -700,6 +734,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   i64);
 
@@ -711,6 +746,7 @@ define <vscale x 16 x float> @intrinsic_vle_v_nxv16f32_nxv16f32(<vscale x 16 x f
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     i64 %1)
 
@@ -741,6 +777,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vle.nxv1i16(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   i64);
 
@@ -752,6 +789,7 @@ define <vscale x 1 x i16> @intrinsic_vle_v_nxv1i16_nxv1i16(<vscale x 1 x i16>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vle.nxv1i16(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     i64 %1)
 
@@ -782,6 +820,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   i64);
 
@@ -793,6 +832,7 @@ define <vscale x 2 x i16> @intrinsic_vle_v_nxv2i16_nxv2i16(<vscale x 2 x i16>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     i64 %1)
 
@@ -823,6 +863,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vle.nxv4i16(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   i64);
 
@@ -834,6 +875,7 @@ define <vscale x 4 x i16> @intrinsic_vle_v_nxv4i16_nxv4i16(<vscale x 4 x i16>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vle.nxv4i16(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     i64 %1)
 
@@ -864,6 +906,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vle.nxv8i16(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   i64);
 
@@ -875,6 +918,7 @@ define <vscale x 8 x i16> @intrinsic_vle_v_nxv8i16_nxv8i16(<vscale x 8 x i16>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vle.nxv8i16(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     i64 %1)
 
@@ -905,6 +949,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vle.nxv16i16(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   i64);
 
@@ -916,6 +961,7 @@ define <vscale x 16 x i16> @intrinsic_vle_v_nxv16i16_nxv16i16(<vscale x 16 x i16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vle.nxv16i16(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     i64 %1)
 
@@ -946,6 +992,7 @@ entry:
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   i64);
 
@@ -957,6 +1004,7 @@ define <vscale x 32 x i16> @intrinsic_vle_v_nxv32i16_nxv32i16(<vscale x 32 x i16
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vle.nxv32i16(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     i64 %1)
 
@@ -987,6 +1035,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vle.nxv1f16(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   i64);
 
@@ -998,6 +1047,7 @@ define <vscale x 1 x half> @intrinsic_vle_v_nxv1f16_nxv1f16(<vscale x 1 x half>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vle.nxv1f16(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     i64 %1)
 
@@ -1028,6 +1078,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vle.nxv2f16(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   i64);
 
@@ -1039,6 +1090,7 @@ define <vscale x 2 x half> @intrinsic_vle_v_nxv2f16_nxv2f16(<vscale x 2 x half>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vle.nxv2f16(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     i64 %1)
 
@@ -1069,6 +1121,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vle.nxv4f16(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   i64);
 
@@ -1080,6 +1133,7 @@ define <vscale x 4 x half> @intrinsic_vle_v_nxv4f16_nxv4f16(<vscale x 4 x half>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vle.nxv4f16(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     i64 %1)
 
@@ -1110,6 +1164,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vle.nxv8f16(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   i64);
 
@@ -1121,6 +1176,7 @@ define <vscale x 8 x half> @intrinsic_vle_v_nxv8f16_nxv8f16(<vscale x 8 x half>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vle.nxv8f16(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     i64 %1)
 
@@ -1151,6 +1207,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vle.nxv16f16(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   i64);
 
@@ -1162,6 +1219,7 @@ define <vscale x 16 x half> @intrinsic_vle_v_nxv16f16_nxv16f16(<vscale x 16 x ha
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vle.nxv16f16(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     i64 %1)
 
@@ -1192,6 +1250,7 @@ entry:
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vle.nxv32f16(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   i64);
 
@@ -1203,6 +1262,7 @@ define <vscale x 32 x half> @intrinsic_vle_v_nxv32f16_nxv32f16(<vscale x 32 x ha
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vle.nxv32f16(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     i64 %1)
 
@@ -1233,6 +1293,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   i64);
 
@@ -1244,6 +1305,7 @@ define <vscale x 1 x i8> @intrinsic_vle_v_nxv1i8_nxv1i8(<vscale x 1 x i8>* %0, i
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vle.nxv1i8(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     i64 %1)
 
@@ -1274,6 +1336,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vle.nxv2i8(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   i64);
 
@@ -1285,6 +1348,7 @@ define <vscale x 2 x i8> @intrinsic_vle_v_nxv2i8_nxv2i8(<vscale x 2 x i8>* %0, i
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vle.nxv2i8(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     i64 %1)
 
@@ -1315,6 +1379,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vle.nxv4i8(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   i64);
 
@@ -1326,6 +1391,7 @@ define <vscale x 4 x i8> @intrinsic_vle_v_nxv4i8_nxv4i8(<vscale x 4 x i8>* %0, i
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vle.nxv4i8(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     i64 %1)
 
@@ -1356,6 +1422,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   i64);
 
@@ -1367,6 +1434,7 @@ define <vscale x 8 x i8> @intrinsic_vle_v_nxv8i8_nxv8i8(<vscale x 8 x i8>* %0, i
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     i64 %1)
 
@@ -1397,6 +1465,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vle.nxv16i8(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   i64);
 
@@ -1408,6 +1477,7 @@ define <vscale x 16 x i8> @intrinsic_vle_v_nxv16i8_nxv16i8(<vscale x 16 x i8>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vle.nxv16i8(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     i64 %1)
 
@@ -1438,6 +1508,7 @@ entry:
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vle.nxv32i8(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   i64);
 
@@ -1449,6 +1520,7 @@ define <vscale x 32 x i8> @intrinsic_vle_v_nxv32i8_nxv32i8(<vscale x 32 x i8>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vle.nxv32i8(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     i64 %1)
 
@@ -1479,6 +1551,7 @@ entry:
 }
 
 declare <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8(
+  <vscale x 64 x i8>,
   <vscale x 64 x i8>*,
   i64);
 
@@ -1490,6 +1563,7 @@ define <vscale x 64 x i8> @intrinsic_vle_v_nxv64i8_nxv64i8(<vscale x 64 x i8>* %
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i8> @llvm.riscv.vle.nxv64i8(
+    <vscale x 64 x i8> undef,
     <vscale x 64 x i8>* %0,
     i64 %1)
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll
index e5ae807e6ff22..d2241289df802 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vleff-rv32.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare { <vscale x 1 x i64>, i32 } @llvm.riscv.vleff.nxv1i64(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   i32);
 
@@ -15,6 +16,7 @@ define <vscale x 1 x i64> @intrinsic_vleff_v_nxv1i64_nxv1i64(<vscale x 1 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x i64>, i32 } @llvm.riscv.vleff.nxv1i64(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 1 x i64>, i32 } %a, 0
@@ -52,6 +54,7 @@ entry:
 }
 
 declare { <vscale x 2 x i64>, i32 } @llvm.riscv.vleff.nxv2i64(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   i32);
 
@@ -65,6 +68,7 @@ define <vscale x 2 x i64> @intrinsic_vleff_v_nxv2i64_nxv2i64(<vscale x 2 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x i64>, i32 } @llvm.riscv.vleff.nxv2i64(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 2 x i64>, i32 } %a, 0
@@ -102,6 +106,7 @@ entry:
 }
 
 declare { <vscale x 4 x i64>, i32 } @llvm.riscv.vleff.nxv4i64(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   i32);
 
@@ -115,6 +120,7 @@ define <vscale x 4 x i64> @intrinsic_vleff_v_nxv4i64_nxv4i64(<vscale x 4 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x i64>, i32 } @llvm.riscv.vleff.nxv4i64(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 4 x i64>, i32 } %a, 0
@@ -152,6 +158,7 @@ entry:
 }
 
 declare { <vscale x 8 x i64>, i32 } @llvm.riscv.vleff.nxv8i64(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   i32);
 
@@ -165,6 +172,7 @@ define <vscale x 8 x i64> @intrinsic_vleff_v_nxv8i64_nxv8i64(<vscale x 8 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x i64>, i32 } @llvm.riscv.vleff.nxv8i64(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 8 x i64>, i32 } %a, 0
@@ -202,6 +210,7 @@ entry:
 }
 
 declare { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.nxv1f64(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   i32);
 
@@ -215,6 +224,7 @@ define <vscale x 1 x double> @intrinsic_vleff_v_nxv1f64_nxv1f64(<vscale x 1 x do
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 1 x double>, i32 } %a, 0
@@ -252,6 +262,7 @@ entry:
 }
 
 declare { <vscale x 2 x double>, i32 } @llvm.riscv.vleff.nxv2f64(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   i32);
 
@@ -265,6 +276,7 @@ define <vscale x 2 x double> @intrinsic_vleff_v_nxv2f64_nxv2f64(<vscale x 2 x do
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x double>, i32 } @llvm.riscv.vleff.nxv2f64(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 2 x double>, i32 } %a, 0
@@ -302,6 +314,7 @@ entry:
 }
 
 declare { <vscale x 4 x double>, i32 } @llvm.riscv.vleff.nxv4f64(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   i32);
 
@@ -315,6 +328,7 @@ define <vscale x 4 x double> @intrinsic_vleff_v_nxv4f64_nxv4f64(<vscale x 4 x do
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x double>, i32 } @llvm.riscv.vleff.nxv4f64(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 4 x double>, i32 } %a, 0
@@ -352,6 +366,7 @@ entry:
 }
 
 declare { <vscale x 8 x double>, i32 } @llvm.riscv.vleff.nxv8f64(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   i32);
 
@@ -365,6 +380,7 @@ define <vscale x 8 x double> @intrinsic_vleff_v_nxv8f64_nxv8f64(<vscale x 8 x do
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x double>, i32 } @llvm.riscv.vleff.nxv8f64(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 8 x double>, i32 } %a, 0
@@ -402,6 +418,7 @@ entry:
 }
 
 declare { <vscale x 1 x i32>, i32 } @llvm.riscv.vleff.nxv1i32(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   i32);
 
@@ -415,6 +432,7 @@ define <vscale x 1 x i32> @intrinsic_vleff_v_nxv1i32_nxv1i32(<vscale x 1 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x i32>, i32 } @llvm.riscv.vleff.nxv1i32(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 1 x i32>, i32 } %a, 0
@@ -452,6 +470,7 @@ entry:
 }
 
 declare { <vscale x 2 x i32>, i32 } @llvm.riscv.vleff.nxv2i32(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   i32);
 
@@ -465,6 +484,7 @@ define <vscale x 2 x i32> @intrinsic_vleff_v_nxv2i32_nxv2i32(<vscale x 2 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x i32>, i32 } @llvm.riscv.vleff.nxv2i32(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 2 x i32>, i32 } %a, 0
@@ -502,6 +522,7 @@ entry:
 }
 
 declare { <vscale x 4 x i32>, i32 } @llvm.riscv.vleff.nxv4i32(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   i32);
 
@@ -515,6 +536,7 @@ define <vscale x 4 x i32> @intrinsic_vleff_v_nxv4i32_nxv4i32(<vscale x 4 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x i32>, i32 } @llvm.riscv.vleff.nxv4i32(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 4 x i32>, i32 } %a, 0
@@ -552,6 +574,7 @@ entry:
 }
 
 declare { <vscale x 8 x i32>, i32 } @llvm.riscv.vleff.nxv8i32(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   i32);
 
@@ -565,6 +588,7 @@ define <vscale x 8 x i32> @intrinsic_vleff_v_nxv8i32_nxv8i32(<vscale x 8 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x i32>, i32 } @llvm.riscv.vleff.nxv8i32(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 8 x i32>, i32 } %a, 0
@@ -602,6 +626,7 @@ entry:
 }
 
 declare { <vscale x 16 x i32>, i32 } @llvm.riscv.vleff.nxv16i32(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   i32);
 
@@ -615,6 +640,7 @@ define <vscale x 16 x i32> @intrinsic_vleff_v_nxv16i32_nxv16i32(<vscale x 16 x i
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 16 x i32>, i32 } @llvm.riscv.vleff.nxv16i32(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 16 x i32>, i32 } %a, 0
@@ -652,6 +678,7 @@ entry:
 }
 
 declare { <vscale x 1 x float>, i32 } @llvm.riscv.vleff.nxv1f32(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   i32);
 
@@ -665,6 +692,7 @@ define <vscale x 1 x float> @intrinsic_vleff_v_nxv1f32_nxv1f32(<vscale x 1 x flo
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x float>, i32 } @llvm.riscv.vleff.nxv1f32(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 1 x float>, i32 } %a, 0
@@ -702,6 +730,7 @@ entry:
 }
 
 declare { <vscale x 2 x float>, i32 } @llvm.riscv.vleff.nxv2f32(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   i32);
 
@@ -715,6 +744,7 @@ define <vscale x 2 x float> @intrinsic_vleff_v_nxv2f32_nxv2f32(<vscale x 2 x flo
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x float>, i32 } @llvm.riscv.vleff.nxv2f32(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 2 x float>, i32 } %a, 0
@@ -752,6 +782,7 @@ entry:
 }
 
 declare { <vscale x 4 x float>, i32 } @llvm.riscv.vleff.nxv4f32(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   i32);
 
@@ -765,6 +796,7 @@ define <vscale x 4 x float> @intrinsic_vleff_v_nxv4f32_nxv4f32(<vscale x 4 x flo
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x float>, i32 } @llvm.riscv.vleff.nxv4f32(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 4 x float>, i32 } %a, 0
@@ -802,6 +834,7 @@ entry:
 }
 
 declare { <vscale x 8 x float>, i32 } @llvm.riscv.vleff.nxv8f32(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   i32);
 
@@ -815,6 +848,7 @@ define <vscale x 8 x float> @intrinsic_vleff_v_nxv8f32_nxv8f32(<vscale x 8 x flo
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x float>, i32 } @llvm.riscv.vleff.nxv8f32(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 8 x float>, i32 } %a, 0
@@ -852,6 +886,7 @@ entry:
 }
 
 declare { <vscale x 16 x float>, i32 } @llvm.riscv.vleff.nxv16f32(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   i32);
 
@@ -865,6 +900,7 @@ define <vscale x 16 x float> @intrinsic_vleff_v_nxv16f32_nxv16f32(<vscale x 16 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 16 x float>, i32 } @llvm.riscv.vleff.nxv16f32(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 16 x float>, i32 } %a, 0
@@ -902,6 +938,7 @@ entry:
 }
 
 declare { <vscale x 1 x i16>, i32 } @llvm.riscv.vleff.nxv1i16(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   i32);
 
@@ -915,6 +952,7 @@ define <vscale x 1 x i16> @intrinsic_vleff_v_nxv1i16_nxv1i16(<vscale x 1 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x i16>, i32 } @llvm.riscv.vleff.nxv1i16(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 1 x i16>, i32 } %a, 0
@@ -952,6 +990,7 @@ entry:
 }
 
 declare { <vscale x 2 x i16>, i32 } @llvm.riscv.vleff.nxv2i16(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   i32);
 
@@ -965,6 +1004,7 @@ define <vscale x 2 x i16> @intrinsic_vleff_v_nxv2i16_nxv2i16(<vscale x 2 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x i16>, i32 } @llvm.riscv.vleff.nxv2i16(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 2 x i16>, i32 } %a, 0
@@ -1002,6 +1042,7 @@ entry:
 }
 
 declare { <vscale x 4 x i16>, i32 } @llvm.riscv.vleff.nxv4i16(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   i32);
 
@@ -1015,6 +1056,7 @@ define <vscale x 4 x i16> @intrinsic_vleff_v_nxv4i16_nxv4i16(<vscale x 4 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x i16>, i32 } @llvm.riscv.vleff.nxv4i16(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 4 x i16>, i32 } %a, 0
@@ -1052,6 +1094,7 @@ entry:
 }
 
 declare { <vscale x 8 x i16>, i32 } @llvm.riscv.vleff.nxv8i16(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   i32);
 
@@ -1065,6 +1108,7 @@ define <vscale x 8 x i16> @intrinsic_vleff_v_nxv8i16_nxv8i16(<vscale x 8 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x i16>, i32 } @llvm.riscv.vleff.nxv8i16(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 8 x i16>, i32 } %a, 0
@@ -1102,6 +1146,7 @@ entry:
 }
 
 declare { <vscale x 16 x i16>, i32 } @llvm.riscv.vleff.nxv16i16(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   i32);
 
@@ -1115,6 +1160,7 @@ define <vscale x 16 x i16> @intrinsic_vleff_v_nxv16i16_nxv16i16(<vscale x 16 x i
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 16 x i16>, i32 } @llvm.riscv.vleff.nxv16i16(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 16 x i16>, i32 } %a, 0
@@ -1152,6 +1198,7 @@ entry:
 }
 
 declare { <vscale x 32 x i16>, i32 } @llvm.riscv.vleff.nxv32i16(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   i32);
 
@@ -1165,6 +1212,7 @@ define <vscale x 32 x i16> @intrinsic_vleff_v_nxv32i16_nxv32i16(<vscale x 32 x i
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 32 x i16>, i32 } @llvm.riscv.vleff.nxv32i16(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 32 x i16>, i32 } %a, 0
@@ -1202,6 +1250,7 @@ entry:
 }
 
 declare { <vscale x 1 x half>, i32 } @llvm.riscv.vleff.nxv1f16(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   i32);
 
@@ -1215,6 +1264,7 @@ define <vscale x 1 x half> @intrinsic_vleff_v_nxv1half_nxv1f16(<vscale x 1 x hal
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x half>, i32 } @llvm.riscv.vleff.nxv1f16(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 1 x half>, i32 } %a, 0
@@ -1252,6 +1302,7 @@ entry:
 }
 
 declare { <vscale x 2 x half>, i32 } @llvm.riscv.vleff.nxv2f16(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   i32);
 
@@ -1265,6 +1316,7 @@ define <vscale x 2 x half> @intrinsic_vleff_v_nxv2half_nxv2f16(<vscale x 2 x hal
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x half>, i32 } @llvm.riscv.vleff.nxv2f16(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 2 x half>, i32 } %a, 0
@@ -1302,6 +1354,7 @@ entry:
 }
 
 declare { <vscale x 4 x half>, i32 } @llvm.riscv.vleff.nxv4f16(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   i32);
 
@@ -1315,6 +1368,7 @@ define <vscale x 4 x half> @intrinsic_vleff_v_nxv4half_nxv4f16(<vscale x 4 x hal
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x half>, i32 } @llvm.riscv.vleff.nxv4f16(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 4 x half>, i32 } %a, 0
@@ -1352,6 +1406,7 @@ entry:
 }
 
 declare { <vscale x 8 x half>, i32 } @llvm.riscv.vleff.nxv8f16(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   i32);
 
@@ -1365,6 +1420,7 @@ define <vscale x 8 x half> @intrinsic_vleff_v_nxv8half_nxv8f16(<vscale x 8 x hal
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x half>, i32 } @llvm.riscv.vleff.nxv8f16(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 8 x half>, i32 } %a, 0
@@ -1402,6 +1458,7 @@ entry:
 }
 
 declare { <vscale x 16 x half>, i32 } @llvm.riscv.vleff.nxv16f16(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   i32);
 
@@ -1415,6 +1472,7 @@ define <vscale x 16 x half> @intrinsic_vleff_v_nxv16half_nxv16f16(<vscale x 16 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 16 x half>, i32 } @llvm.riscv.vleff.nxv16f16(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 16 x half>, i32 } %a, 0
@@ -1452,6 +1510,7 @@ entry:
 }
 
 declare { <vscale x 32 x half>, i32 } @llvm.riscv.vleff.nxv32f16(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   i32);
 
@@ -1465,6 +1524,7 @@ define <vscale x 32 x half> @intrinsic_vleff_v_nxv32half_nxv32f16(<vscale x 32 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 32 x half>, i32 } @llvm.riscv.vleff.nxv32f16(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 32 x half>, i32 } %a, 0
@@ -1502,6 +1562,7 @@ entry:
 }
 
 declare { <vscale x 1 x i8>, i32 } @llvm.riscv.vleff.nxv1i8(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   i32);
 
@@ -1515,6 +1576,7 @@ define <vscale x 1 x i8> @intrinsic_vleff_v_nxv1i8_nxv1i8(<vscale x 1 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x i8>, i32 } @llvm.riscv.vleff.nxv1i8(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 1 x i8>, i32 } %a, 0
@@ -1552,6 +1614,7 @@ entry:
 }
 
 declare { <vscale x 2 x i8>, i32 } @llvm.riscv.vleff.nxv2i8(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   i32);
 
@@ -1565,6 +1628,7 @@ define <vscale x 2 x i8> @intrinsic_vleff_v_nxv2i8_nxv2i8(<vscale x 2 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x i8>, i32 } @llvm.riscv.vleff.nxv2i8(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 2 x i8>, i32 } %a, 0
@@ -1602,6 +1666,7 @@ entry:
 }
 
 declare { <vscale x 4 x i8>, i32 } @llvm.riscv.vleff.nxv4i8(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   i32);
 
@@ -1615,6 +1680,7 @@ define <vscale x 4 x i8> @intrinsic_vleff_v_nxv4i8_nxv4i8(<vscale x 4 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x i8>, i32 } @llvm.riscv.vleff.nxv4i8(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 4 x i8>, i32 } %a, 0
@@ -1652,6 +1718,7 @@ entry:
 }
 
 declare { <vscale x 8 x i8>, i32 } @llvm.riscv.vleff.nxv8i8(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   i32);
 
@@ -1665,6 +1732,7 @@ define <vscale x 8 x i8> @intrinsic_vleff_v_nxv8i8_nxv8i8(<vscale x 8 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x i8>, i32 } @llvm.riscv.vleff.nxv8i8(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 8 x i8>, i32 } %a, 0
@@ -1702,6 +1770,7 @@ entry:
 }
 
 declare { <vscale x 16 x i8>, i32 } @llvm.riscv.vleff.nxv16i8(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   i32);
 
@@ -1715,6 +1784,7 @@ define <vscale x 16 x i8> @intrinsic_vleff_v_nxv16i8_nxv16i8(<vscale x 16 x i8>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 16 x i8>, i32 } @llvm.riscv.vleff.nxv16i8(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 16 x i8>, i32 } %a, 0
@@ -1752,6 +1822,7 @@ entry:
 }
 
 declare { <vscale x 32 x i8>, i32 } @llvm.riscv.vleff.nxv32i8(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   i32);
 
@@ -1765,6 +1836,7 @@ define <vscale x 32 x i8> @intrinsic_vleff_v_nxv32i8_nxv32i8(<vscale x 32 x i8>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 32 x i8>, i32 } @llvm.riscv.vleff.nxv32i8(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 32 x i8>, i32 } %a, 0
@@ -1802,6 +1874,7 @@ entry:
 }
 
 declare { <vscale x 64 x i8>, i32 } @llvm.riscv.vleff.nxv64i8(
+  <vscale x 64 x i8>,
   <vscale x 64 x i8>*,
   i32);
 
@@ -1815,6 +1888,7 @@ define <vscale x 64 x i8> @intrinsic_vleff_v_nxv64i8_nxv64i8(<vscale x 64 x i8>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 64 x i8>, i32 } @llvm.riscv.vleff.nxv64i8(
+    <vscale x 64 x i8> undef,
     <vscale x 64 x i8>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 64 x i8>, i32 } %a, 0
@@ -1860,6 +1934,7 @@ define <vscale x 1 x double> @intrinsic_vleff_dead_vl(<vscale x 1 x double>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 1 x double>, i32 } %a, 0
@@ -1894,6 +1969,7 @@ define void @intrinsic_vleff_dead_value(<vscale x 1 x double>* %0, i32 %1, i32*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     i32 %1)
   %b = extractvalue { <vscale x 1 x double>, i32 } %a, 1
@@ -1930,6 +2006,7 @@ define void @intrinsic_vleff_dead_all(<vscale x 1 x double>* %0, i32 %1, i32* %2
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x double>, i32 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     i32 %1)
   ret void
diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll
index d588205f18702..91b64c072e0a3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vleff-rv64.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.nxv1i64(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   i64);
 
@@ -15,6 +16,7 @@ define <vscale x 1 x i64> @intrinsic_vleff_v_nxv1i64_nxv1i64(<vscale x 1 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.nxv1i64(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 1 x i64>, i64 } %a, 0
@@ -52,6 +54,7 @@ entry:
 }
 
 declare { <vscale x 2 x i64>, i64 } @llvm.riscv.vleff.nxv2i64(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   i64);
 
@@ -65,6 +68,7 @@ define <vscale x 2 x i64> @intrinsic_vleff_v_nxv2i64_nxv2i64(<vscale x 2 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x i64>, i64 } @llvm.riscv.vleff.nxv2i64(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 2 x i64>, i64 } %a, 0
@@ -102,6 +106,7 @@ entry:
 }
 
 declare { <vscale x 4 x i64>, i64 } @llvm.riscv.vleff.nxv4i64(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   i64);
 
@@ -115,6 +120,7 @@ define <vscale x 4 x i64> @intrinsic_vleff_v_nxv4i64_nxv4i64(<vscale x 4 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x i64>, i64 } @llvm.riscv.vleff.nxv4i64(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 4 x i64>, i64 } %a, 0
@@ -152,6 +158,7 @@ entry:
 }
 
 declare { <vscale x 8 x i64>, i64 } @llvm.riscv.vleff.nxv8i64(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   i64);
 
@@ -165,6 +172,7 @@ define <vscale x 8 x i64> @intrinsic_vleff_v_nxv8i64_nxv8i64(<vscale x 8 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x i64>, i64 } @llvm.riscv.vleff.nxv8i64(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 8 x i64>, i64 } %a, 0
@@ -202,6 +210,7 @@ entry:
 }
 
 declare { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.nxv1f64(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   i64);
 
@@ -215,6 +224,7 @@ define <vscale x 1 x double> @intrinsic_vleff_v_nxv1f64_nxv1f64(<vscale x 1 x do
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 1 x double>, i64 } %a, 0
@@ -252,6 +262,7 @@ entry:
 }
 
 declare { <vscale x 2 x double>, i64 } @llvm.riscv.vleff.nxv2f64(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   i64);
 
@@ -265,6 +276,7 @@ define <vscale x 2 x double> @intrinsic_vleff_v_nxv2f64_nxv2f64(<vscale x 2 x do
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x double>, i64 } @llvm.riscv.vleff.nxv2f64(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 2 x double>, i64 } %a, 0
@@ -302,6 +314,7 @@ entry:
 }
 
 declare { <vscale x 4 x double>, i64 } @llvm.riscv.vleff.nxv4f64(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   i64);
 
@@ -315,6 +328,7 @@ define <vscale x 4 x double> @intrinsic_vleff_v_nxv4f64_nxv4f64(<vscale x 4 x do
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x double>, i64 } @llvm.riscv.vleff.nxv4f64(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 4 x double>, i64 } %a, 0
@@ -352,6 +366,7 @@ entry:
 }
 
 declare { <vscale x 8 x double>, i64 } @llvm.riscv.vleff.nxv8f64(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   i64);
 
@@ -365,6 +380,7 @@ define <vscale x 8 x double> @intrinsic_vleff_v_nxv8f64_nxv8f64(<vscale x 8 x do
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x double>, i64 } @llvm.riscv.vleff.nxv8f64(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 8 x double>, i64 } %a, 0
@@ -402,6 +418,7 @@ entry:
 }
 
 declare { <vscale x 1 x i32>, i64 } @llvm.riscv.vleff.nxv1i32(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   i64);
 
@@ -415,6 +432,7 @@ define <vscale x 1 x i32> @intrinsic_vleff_v_nxv1i32_nxv1i32(<vscale x 1 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x i32>, i64 } @llvm.riscv.vleff.nxv1i32(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 1 x i32>, i64 } %a, 0
@@ -452,6 +470,7 @@ entry:
 }
 
 declare { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   i64);
 
@@ -465,6 +484,7 @@ define <vscale x 2 x i32> @intrinsic_vleff_v_nxv2i32_nxv2i32(<vscale x 2 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x i32>, i64 } @llvm.riscv.vleff.nxv2i32(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 2 x i32>, i64 } %a, 0
@@ -502,6 +522,7 @@ entry:
 }
 
 declare { <vscale x 4 x i32>, i64 } @llvm.riscv.vleff.nxv4i32(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   i64);
 
@@ -515,6 +536,7 @@ define <vscale x 4 x i32> @intrinsic_vleff_v_nxv4i32_nxv4i32(<vscale x 4 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x i32>, i64 } @llvm.riscv.vleff.nxv4i32(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 4 x i32>, i64 } %a, 0
@@ -552,6 +574,7 @@ entry:
 }
 
 declare { <vscale x 8 x i32>, i64 } @llvm.riscv.vleff.nxv8i32(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   i64);
 
@@ -565,6 +588,7 @@ define <vscale x 8 x i32> @intrinsic_vleff_v_nxv8i32_nxv8i32(<vscale x 8 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x i32>, i64 } @llvm.riscv.vleff.nxv8i32(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 8 x i32>, i64 } %a, 0
@@ -602,6 +626,7 @@ entry:
 }
 
 declare { <vscale x 16 x i32>, i64 } @llvm.riscv.vleff.nxv16i32(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   i64);
 
@@ -615,6 +640,7 @@ define <vscale x 16 x i32> @intrinsic_vleff_v_nxv16i32_nxv16i32(<vscale x 16 x i
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 16 x i32>, i64 } @llvm.riscv.vleff.nxv16i32(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 16 x i32>, i64 } %a, 0
@@ -652,6 +678,7 @@ entry:
 }
 
 declare { <vscale x 1 x float>, i64 } @llvm.riscv.vleff.nxv1f32(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   i64);
 
@@ -665,6 +692,7 @@ define <vscale x 1 x float> @intrinsic_vleff_v_nxv1f32_nxv1f32(<vscale x 1 x flo
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x float>, i64 } @llvm.riscv.vleff.nxv1f32(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 1 x float>, i64 } %a, 0
@@ -702,6 +730,7 @@ entry:
 }
 
 declare { <vscale x 2 x float>, i64 } @llvm.riscv.vleff.nxv2f32(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   i64);
 
@@ -715,6 +744,7 @@ define <vscale x 2 x float> @intrinsic_vleff_v_nxv2f32_nxv2f32(<vscale x 2 x flo
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x float>, i64 } @llvm.riscv.vleff.nxv2f32(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 2 x float>, i64 } %a, 0
@@ -752,6 +782,7 @@ entry:
 }
 
 declare { <vscale x 4 x float>, i64 } @llvm.riscv.vleff.nxv4f32(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   i64);
 
@@ -765,6 +796,7 @@ define <vscale x 4 x float> @intrinsic_vleff_v_nxv4f32_nxv4f32(<vscale x 4 x flo
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x float>, i64 } @llvm.riscv.vleff.nxv4f32(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 4 x float>, i64 } %a, 0
@@ -802,6 +834,7 @@ entry:
 }
 
 declare { <vscale x 8 x float>, i64 } @llvm.riscv.vleff.nxv8f32(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   i64);
 
@@ -815,6 +848,7 @@ define <vscale x 8 x float> @intrinsic_vleff_v_nxv8f32_nxv8f32(<vscale x 8 x flo
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x float>, i64 } @llvm.riscv.vleff.nxv8f32(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 8 x float>, i64 } %a, 0
@@ -852,6 +886,7 @@ entry:
 }
 
 declare { <vscale x 16 x float>, i64 } @llvm.riscv.vleff.nxv16f32(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   i64);
 
@@ -865,6 +900,7 @@ define <vscale x 16 x float> @intrinsic_vleff_v_nxv16f32_nxv16f32(<vscale x 16 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 16 x float>, i64 } @llvm.riscv.vleff.nxv16f32(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 16 x float>, i64 } %a, 0
@@ -902,6 +938,7 @@ entry:
 }
 
 declare { <vscale x 1 x i16>, i64 } @llvm.riscv.vleff.nxv1i16(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   i64);
 
@@ -915,6 +952,7 @@ define <vscale x 1 x i16> @intrinsic_vleff_v_nxv1i16_nxv1i16(<vscale x 1 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x i16>, i64 } @llvm.riscv.vleff.nxv1i16(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 1 x i16>, i64 } %a, 0
@@ -952,6 +990,7 @@ entry:
 }
 
 declare { <vscale x 2 x i16>, i64 } @llvm.riscv.vleff.nxv2i16(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   i64);
 
@@ -965,6 +1004,7 @@ define <vscale x 2 x i16> @intrinsic_vleff_v_nxv2i16_nxv2i16(<vscale x 2 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x i16>, i64 } @llvm.riscv.vleff.nxv2i16(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 2 x i16>, i64 } %a, 0
@@ -1002,6 +1042,7 @@ entry:
 }
 
 declare { <vscale x 4 x i16>, i64 } @llvm.riscv.vleff.nxv4i16(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   i64);
 
@@ -1015,6 +1056,7 @@ define <vscale x 4 x i16> @intrinsic_vleff_v_nxv4i16_nxv4i16(<vscale x 4 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x i16>, i64 } @llvm.riscv.vleff.nxv4i16(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 4 x i16>, i64 } %a, 0
@@ -1052,6 +1094,7 @@ entry:
 }
 
 declare { <vscale x 8 x i16>, i64 } @llvm.riscv.vleff.nxv8i16(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   i64);
 
@@ -1065,6 +1108,7 @@ define <vscale x 8 x i16> @intrinsic_vleff_v_nxv8i16_nxv8i16(<vscale x 8 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x i16>, i64 } @llvm.riscv.vleff.nxv8i16(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 8 x i16>, i64 } %a, 0
@@ -1102,6 +1146,7 @@ entry:
 }
 
 declare { <vscale x 16 x i16>, i64 } @llvm.riscv.vleff.nxv16i16(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   i64);
 
@@ -1115,6 +1160,7 @@ define <vscale x 16 x i16> @intrinsic_vleff_v_nxv16i16_nxv16i16(<vscale x 16 x i
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 16 x i16>, i64 } @llvm.riscv.vleff.nxv16i16(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 16 x i16>, i64 } %a, 0
@@ -1152,6 +1198,7 @@ entry:
 }
 
 declare { <vscale x 32 x i16>, i64 } @llvm.riscv.vleff.nxv32i16(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   i64);
 
@@ -1165,6 +1212,7 @@ define <vscale x 32 x i16> @intrinsic_vleff_v_nxv32i16_nxv32i16(<vscale x 32 x i
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 32 x i16>, i64 } @llvm.riscv.vleff.nxv32i16(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 32 x i16>, i64 } %a, 0
@@ -1202,6 +1250,7 @@ entry:
 }
 
 declare { <vscale x 1 x half>, i64 } @llvm.riscv.vleff.nxv1f16(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   i64);
 
@@ -1215,6 +1264,7 @@ define <vscale x 1 x half> @intrinsic_vleff_v_nxv1half_nxv1f16(<vscale x 1 x hal
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x half>, i64 } @llvm.riscv.vleff.nxv1f16(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 1 x half>, i64 } %a, 0
@@ -1252,6 +1302,7 @@ entry:
 }
 
 declare { <vscale x 2 x half>, i64 } @llvm.riscv.vleff.nxv2f16(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   i64);
 
@@ -1265,6 +1316,7 @@ define <vscale x 2 x half> @intrinsic_vleff_v_nxv2half_nxv2f16(<vscale x 2 x hal
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x half>, i64 } @llvm.riscv.vleff.nxv2f16(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 2 x half>, i64 } %a, 0
@@ -1302,6 +1354,7 @@ entry:
 }
 
 declare { <vscale x 4 x half>, i64 } @llvm.riscv.vleff.nxv4f16(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   i64);
 
@@ -1315,6 +1368,7 @@ define <vscale x 4 x half> @intrinsic_vleff_v_nxv4half_nxv4f16(<vscale x 4 x hal
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x half>, i64 } @llvm.riscv.vleff.nxv4f16(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 4 x half>, i64 } %a, 0
@@ -1352,6 +1406,7 @@ entry:
 }
 
 declare { <vscale x 8 x half>, i64 } @llvm.riscv.vleff.nxv8f16(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   i64);
 
@@ -1365,6 +1420,7 @@ define <vscale x 8 x half> @intrinsic_vleff_v_nxv8half_nxv8f16(<vscale x 8 x hal
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x half>, i64 } @llvm.riscv.vleff.nxv8f16(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 8 x half>, i64 } %a, 0
@@ -1402,6 +1458,7 @@ entry:
 }
 
 declare { <vscale x 16 x half>, i64 } @llvm.riscv.vleff.nxv16f16(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   i64);
 
@@ -1415,6 +1472,7 @@ define <vscale x 16 x half> @intrinsic_vleff_v_nxv16half_nxv16f16(<vscale x 16 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 16 x half>, i64 } @llvm.riscv.vleff.nxv16f16(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 16 x half>, i64 } %a, 0
@@ -1452,6 +1510,7 @@ entry:
 }
 
 declare { <vscale x 32 x half>, i64 } @llvm.riscv.vleff.nxv32f16(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   i64);
 
@@ -1465,6 +1524,7 @@ define <vscale x 32 x half> @intrinsic_vleff_v_nxv32half_nxv32f16(<vscale x 32 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 32 x half>, i64 } @llvm.riscv.vleff.nxv32f16(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 32 x half>, i64 } %a, 0
@@ -1502,6 +1562,7 @@ entry:
 }
 
 declare { <vscale x 1 x i8>, i64 } @llvm.riscv.vleff.nxv1i8(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   i64);
 
@@ -1515,6 +1576,7 @@ define <vscale x 1 x i8> @intrinsic_vleff_v_nxv1i8_nxv1i8(<vscale x 1 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x i8>, i64 } @llvm.riscv.vleff.nxv1i8(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 1 x i8>, i64 } %a, 0
@@ -1552,6 +1614,7 @@ entry:
 }
 
 declare { <vscale x 2 x i8>, i64 } @llvm.riscv.vleff.nxv2i8(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   i64);
 
@@ -1565,6 +1628,7 @@ define <vscale x 2 x i8> @intrinsic_vleff_v_nxv2i8_nxv2i8(<vscale x 2 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 2 x i8>, i64 } @llvm.riscv.vleff.nxv2i8(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 2 x i8>, i64 } %a, 0
@@ -1602,6 +1666,7 @@ entry:
 }
 
 declare { <vscale x 4 x i8>, i64 } @llvm.riscv.vleff.nxv4i8(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   i64);
 
@@ -1615,6 +1680,7 @@ define <vscale x 4 x i8> @intrinsic_vleff_v_nxv4i8_nxv4i8(<vscale x 4 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 4 x i8>, i64 } @llvm.riscv.vleff.nxv4i8(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 4 x i8>, i64 } %a, 0
@@ -1652,6 +1718,7 @@ entry:
 }
 
 declare { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff.nxv8i8(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   i64);
 
@@ -1665,6 +1732,7 @@ define <vscale x 8 x i8> @intrinsic_vleff_v_nxv8i8_nxv8i8(<vscale x 8 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 8 x i8>, i64 } @llvm.riscv.vleff.nxv8i8(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 8 x i8>, i64 } %a, 0
@@ -1702,6 +1770,7 @@ entry:
 }
 
 declare { <vscale x 16 x i8>, i64 } @llvm.riscv.vleff.nxv16i8(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   i64);
 
@@ -1715,6 +1784,7 @@ define <vscale x 16 x i8> @intrinsic_vleff_v_nxv16i8_nxv16i8(<vscale x 16 x i8>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 16 x i8>, i64 } @llvm.riscv.vleff.nxv16i8(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 16 x i8>, i64 } %a, 0
@@ -1752,6 +1822,7 @@ entry:
 }
 
 declare { <vscale x 32 x i8>, i64 } @llvm.riscv.vleff.nxv32i8(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   i64);
 
@@ -1765,6 +1836,7 @@ define <vscale x 32 x i8> @intrinsic_vleff_v_nxv32i8_nxv32i8(<vscale x 32 x i8>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 32 x i8>, i64 } @llvm.riscv.vleff.nxv32i8(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 32 x i8>, i64 } %a, 0
@@ -1802,6 +1874,7 @@ entry:
 }
 
 declare { <vscale x 64 x i8>, i64 } @llvm.riscv.vleff.nxv64i8(
+  <vscale x 64 x i8>,
   <vscale x 64 x i8>*,
   i64);
 
@@ -1815,6 +1888,7 @@ define <vscale x 64 x i8> @intrinsic_vleff_v_nxv64i8_nxv64i8(<vscale x 64 x i8>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 64 x i8>, i64 } @llvm.riscv.vleff.nxv64i8(
+    <vscale x 64 x i8> undef,
     <vscale x 64 x i8>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 64 x i8>, i64 } %a, 0
@@ -1860,6 +1934,7 @@ define <vscale x 1 x double> @intrinsic_vleff_dead_vl(<vscale x 1 x double>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 1 x double>, i64 } %a, 0
@@ -1894,6 +1969,7 @@ define void @intrinsic_vleff_dead_value(<vscale x 1 x double>* %0, i64 %1, i64*
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     i64 %1)
   %b = extractvalue { <vscale x 1 x double>, i64 } %a, 1
@@ -1930,6 +2006,7 @@ define void @intrinsic_vleff_dead_all(<vscale x 1 x double>* %0, i64 %1) nounwin
 ; CHECK-NEXT:    ret
 entry:
   %a = call { <vscale x 1 x double>, i64 } @llvm.riscv.vleff.nxv1f64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     i64 %1)
   ret void
diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll
index e3275457d2be5..dbf6c4b2848c4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv32.ll
@@ -3,6 +3,7 @@
 ; RUN:   < %s | FileCheck %s
 
 declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i32>,
   i32);
@@ -16,6 +17,7 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -49,6 +51,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i32>,
   i32);
@@ -62,6 +65,7 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -95,6 +99,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i32>,
   i32);
@@ -108,6 +113,7 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -141,6 +147,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i32>,
   i32);
@@ -154,6 +161,7 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -187,6 +195,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i32>,
   i32);
@@ -200,6 +209,7 @@ define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     <vscale x 16 x i32> %1,
     i32 %2)
@@ -233,6 +243,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i32>,
   i32);
@@ -246,6 +257,7 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -279,6 +291,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i32>,
   i32);
@@ -292,6 +305,7 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -325,6 +339,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i32>,
   i32);
@@ -338,6 +353,7 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -371,6 +387,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i32>,
   i32);
@@ -384,6 +401,7 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -417,6 +435,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i32>,
   i32);
@@ -430,6 +449,7 @@ define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i32(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     <vscale x 16 x i32> %1,
     i32 %2)
@@ -463,6 +483,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i32>,
   i32);
@@ -475,6 +496,7 @@ define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -508,6 +530,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i32>,
   i32);
@@ -520,6 +543,7 @@ define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -553,6 +577,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i32>,
   i32);
@@ -565,6 +590,7 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -598,6 +624,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i32>,
   i32);
@@ -610,6 +637,7 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -643,6 +671,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i32>,
   i32);
@@ -655,6 +684,7 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i32(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     <vscale x 16 x i32> %1,
     i32 %2)
@@ -688,6 +718,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i32>,
   i32);
@@ -701,6 +732,7 @@ define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -734,6 +766,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i32>,
   i32);
@@ -747,6 +780,7 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -780,6 +814,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i32>,
   i32);
@@ -793,6 +828,7 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -826,6 +862,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i32>,
   i32);
@@ -839,6 +876,7 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -872,6 +910,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i32(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i32>,
   i32);
@@ -885,6 +924,7 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i32(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -918,6 +958,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i32(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i32>,
   i32);
@@ -931,6 +972,7 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i32(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -964,6 +1006,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i32(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i32>,
   i32);
@@ -977,6 +1020,7 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i32(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -1010,6 +1054,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i32(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i32>,
   i32);
@@ -1023,6 +1068,7 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i32(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -1056,6 +1102,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i32(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i32>,
   i32);
@@ -1069,6 +1116,7 @@ define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i32(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i32(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     <vscale x 16 x i32> %1,
     i32 %2)
@@ -1102,6 +1150,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i32(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i32>,
   i32);
@@ -1114,6 +1163,7 @@ define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i32(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -1147,6 +1197,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i32(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i32>,
   i32);
@@ -1159,6 +1210,7 @@ define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i32(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -1192,6 +1244,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i32(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i32>,
   i32);
@@ -1204,6 +1257,7 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i32(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -1237,6 +1291,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i32(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i32>,
   i32);
@@ -1249,6 +1304,7 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i32(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -1282,6 +1338,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i32(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i32>,
   i32);
@@ -1294,6 +1351,7 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i32(<vsc
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i32(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     <vscale x 16 x i32> %1,
     i32 %2)
@@ -1327,6 +1385,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i32(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i32>,
   i32);
@@ -1340,6 +1399,7 @@ define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i32(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -1373,6 +1433,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i32>,
   i32);
@@ -1386,6 +1447,7 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -1419,6 +1481,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i32>,
   i32);
@@ -1432,6 +1495,7 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -1465,6 +1529,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i32>,
   i32);
@@ -1478,6 +1543,7 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -1511,6 +1577,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i16>,
   i32);
@@ -1524,6 +1591,7 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -1557,6 +1625,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i16>,
   i32);
@@ -1570,6 +1639,7 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -1603,6 +1673,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i16>,
   i32);
@@ -1616,6 +1687,7 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -1649,6 +1721,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i16>,
   i32);
@@ -1662,6 +1735,7 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -1695,6 +1769,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i16>,
   i32);
@@ -1708,6 +1783,7 @@ define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     <vscale x 16 x i16> %1,
     i32 %2)
@@ -1741,6 +1817,7 @@ entry:
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   <vscale x 32 x i16>,
   i32);
@@ -1754,6 +1831,7 @@ define <vscale x 32 x i8> @intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     <vscale x 32 x i16> %1,
     i32 %2)
@@ -1787,6 +1865,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i16>,
   i32);
@@ -1799,6 +1878,7 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -1832,6 +1912,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i16>,
   i32);
@@ -1844,6 +1925,7 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -1877,6 +1959,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i16>,
   i32);
@@ -1889,6 +1972,7 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -1922,6 +2006,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i16>,
   i32);
@@ -1934,6 +2019,7 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -1967,6 +2053,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i16>,
   i32);
@@ -1979,6 +2066,7 @@ define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i16(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     <vscale x 16 x i16> %1,
     i32 %2)
@@ -2012,6 +2100,7 @@ entry:
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   <vscale x 32 x i16>,
   i32);
@@ -2024,6 +2113,7 @@ define <vscale x 32 x i16> @intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i16(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     <vscale x 32 x i16> %1,
     i32 %2)
@@ -2057,6 +2147,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i16>,
   i32);
@@ -2070,6 +2161,7 @@ define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -2103,6 +2195,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i16>,
   i32);
@@ -2116,6 +2209,7 @@ define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -2149,6 +2243,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i16>,
   i32);
@@ -2162,6 +2257,7 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -2195,6 +2291,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i16>,
   i32);
@@ -2208,6 +2305,7 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -2241,6 +2339,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i16>,
   i32);
@@ -2254,6 +2353,7 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i16(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     <vscale x 16 x i16> %1,
     i32 %2)
@@ -2287,6 +2387,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i16>,
   i32);
@@ -2300,6 +2401,7 @@ define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -2333,6 +2435,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i16>,
   i32);
@@ -2346,6 +2449,7 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -2379,6 +2483,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i16>,
   i32);
@@ -2392,6 +2497,7 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -2425,6 +2531,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i16>,
   i32);
@@ -2438,6 +2545,7 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -2471,6 +2579,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i16(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i16>,
   i32);
@@ -2483,6 +2592,7 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i16(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -2516,6 +2626,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i16(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i16>,
   i32);
@@ -2528,6 +2639,7 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i16(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -2561,6 +2673,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i16(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i16>,
   i32);
@@ -2573,6 +2686,7 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i16(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -2606,6 +2720,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i16(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i16>,
   i32);
@@ -2618,6 +2733,7 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i16(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -2651,6 +2767,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i16(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i16>,
   i32);
@@ -2663,6 +2780,7 @@ define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i16(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i16(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     <vscale x 16 x i16> %1,
     i32 %2)
@@ -2696,6 +2814,7 @@ entry:
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i16(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   <vscale x 32 x i16>,
   i32);
@@ -2708,6 +2827,7 @@ define <vscale x 32 x half> @intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i16(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i16(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     <vscale x 32 x i16> %1,
     i32 %2)
@@ -2741,6 +2861,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i16(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i16>,
   i32);
@@ -2754,6 +2875,7 @@ define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i16(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -2787,6 +2909,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i16(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i16>,
   i32);
@@ -2800,6 +2923,7 @@ define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i16(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -2833,6 +2957,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i16>,
   i32);
@@ -2846,6 +2971,7 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -2879,6 +3005,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i16>,
   i32);
@@ -2892,6 +3019,7 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -2925,6 +3053,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i16>,
   i32);
@@ -2938,6 +3067,7 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i16(<vsc
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     <vscale x 16 x i16> %1,
     i32 %2)
@@ -2971,6 +3101,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i16(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i16>,
   i32);
@@ -2984,6 +3115,7 @@ define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i16(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -3017,6 +3149,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i16(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i16>,
   i32);
@@ -3030,6 +3163,7 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i16(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -3063,6 +3197,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i16>,
   i32);
@@ -3076,6 +3211,7 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -3109,6 +3245,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i16>,
   i32);
@@ -3122,6 +3259,7 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -3155,6 +3293,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i8>,
   i32);
@@ -3167,6 +3306,7 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -3200,6 +3340,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i8>,
   i32);
@@ -3212,6 +3353,7 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i8(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -3245,6 +3387,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i8>,
   i32);
@@ -3257,6 +3400,7 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i8(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -3290,6 +3434,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i8>,
   i32);
@@ -3302,6 +3447,7 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i8(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
@@ -3335,6 +3481,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i8>,
   i32);
@@ -3347,6 +3494,7 @@ define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     <vscale x 16 x i8> %1,
     i32 %2)
@@ -3380,6 +3528,7 @@ entry:
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   <vscale x 32 x i8>,
   i32);
@@ -3392,6 +3541,7 @@ define <vscale x 32 x i8> @intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     <vscale x 32 x i8> %1,
     i32 %2)
@@ -3425,6 +3575,7 @@ entry:
 }
 
 declare <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8(
+  <vscale x 64 x i8>,
   <vscale x 64 x i8>*,
   <vscale x 64 x i8>,
   i32);
@@ -3437,6 +3588,7 @@ define <vscale x 64 x i8> @intrinsic_vloxei_v_nxv64i8_nxv64i8_nxv64i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8(
+    <vscale x 64 x i8> undef,
     <vscale x 64 x i8>* %0,
     <vscale x 64 x i8> %1,
     i32 %2)
@@ -3470,6 +3622,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i8>,
   i32);
@@ -3483,6 +3636,7 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -3516,6 +3670,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i8>,
   i32);
@@ -3529,6 +3684,7 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -3562,6 +3718,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i8>,
   i32);
@@ -3575,6 +3732,7 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -3608,6 +3766,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i8>,
   i32);
@@ -3621,6 +3780,7 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
@@ -3654,6 +3814,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i8>,
   i32);
@@ -3667,6 +3828,7 @@ define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     <vscale x 16 x i8> %1,
     i32 %2)
@@ -3700,6 +3862,7 @@ entry:
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   <vscale x 32 x i8>,
   i32);
@@ -3713,6 +3876,7 @@ define <vscale x 32 x i16> @intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     <vscale x 32 x i8> %1,
     i32 %2)
@@ -3746,6 +3910,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i8>,
   i32);
@@ -3759,6 +3924,7 @@ define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -3792,6 +3958,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i8>,
   i32);
@@ -3805,6 +3972,7 @@ define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -3838,6 +4006,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i8>,
   i32);
@@ -3851,6 +4020,7 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -3884,6 +4054,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i8>,
   i32);
@@ -3897,6 +4068,7 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
@@ -3930,6 +4102,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i8>,
   i32);
@@ -3943,6 +4116,7 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     <vscale x 16 x i8> %1,
     i32 %2)
@@ -3976,6 +4150,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i8>,
   i32);
@@ -3989,6 +4164,7 @@ define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i8(<vscale x 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -4022,6 +4198,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i8>,
   i32);
@@ -4035,6 +4212,7 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i8(<vscale x 2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -4068,6 +4246,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i8>,
   i32);
@@ -4081,6 +4260,7 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i8(<vscale x 4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -4114,6 +4294,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i8>,
   i32);
@@ -4127,6 +4308,7 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i8(<vscale x 8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
@@ -4160,6 +4342,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i8(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i8>,
   i32);
@@ -4173,6 +4356,7 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i8(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -4206,6 +4390,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i8(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i8>,
   i32);
@@ -4219,6 +4404,7 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i8(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -4252,6 +4438,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i8(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i8>,
   i32);
@@ -4265,6 +4452,7 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i8(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -4298,6 +4486,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i8(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i8>,
   i32);
@@ -4311,6 +4500,7 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i8(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
@@ -4344,6 +4534,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i8(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i8>,
   i32);
@@ -4357,6 +4548,7 @@ define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i8(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i8(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     <vscale x 16 x i8> %1,
     i32 %2)
@@ -4390,6 +4582,7 @@ entry:
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i8(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   <vscale x 32 x i8>,
   i32);
@@ -4403,6 +4596,7 @@ define <vscale x 32 x half> @intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i8(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i8(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     <vscale x 32 x i8> %1,
     i32 %2)
@@ -4436,6 +4630,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i8(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i8>,
   i32);
@@ -4449,6 +4644,7 @@ define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i8(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -4482,6 +4678,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i8(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i8>,
   i32);
@@ -4495,6 +4692,7 @@ define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i8(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -4528,6 +4726,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i8(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i8>,
   i32);
@@ -4541,6 +4740,7 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i8(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -4574,6 +4774,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i8>,
   i32);
@@ -4587,6 +4788,7 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
@@ -4620,6 +4822,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i8>,
   i32);
@@ -4633,6 +4836,7 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i8(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     <vscale x 16 x i8> %1,
     i32 %2)
@@ -4666,6 +4870,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i8(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i8>,
   i32);
@@ -4679,6 +4884,7 @@ define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i8(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -4712,6 +4918,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i8(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i8>,
   i32);
@@ -4725,6 +4932,7 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i8(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -4758,6 +4966,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i8(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i8>,
   i32);
@@ -4771,6 +4980,7 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i8(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -4804,6 +5014,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i8>,
   i32);
@@ -4817,6 +5028,7 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll
index 138916c8bcb16..18d4d693ff3c9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vloxei-rv64.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i64>,
   i64);
@@ -15,6 +16,7 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i64(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i64(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -48,6 +50,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i64>,
   i64);
@@ -61,6 +64,7 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i64(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i64(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -94,6 +98,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i64>,
   i64);
@@ -107,6 +112,7 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i64(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i64(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -140,6 +146,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i64>,
   i64);
@@ -153,6 +160,7 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i64(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i64(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -186,6 +194,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i64>,
   i64);
@@ -199,6 +208,7 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i64(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -232,6 +242,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i64>,
   i64);
@@ -245,6 +256,7 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i64(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -278,6 +290,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i64>,
   i64);
@@ -291,6 +304,7 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i64(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -324,6 +338,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i64>,
   i64);
@@ -337,6 +352,7 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i64(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -370,6 +386,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i64>,
   i64);
@@ -383,6 +400,7 @@ define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i64(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -416,6 +434,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i64>,
   i64);
@@ -429,6 +448,7 @@ define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i64(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -462,6 +482,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i64>,
   i64);
@@ -475,6 +496,7 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i64(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -508,6 +530,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i64>,
   i64);
@@ -521,6 +544,7 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i64(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -554,6 +578,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i64(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i64>,
   i64);
@@ -566,6 +591,7 @@ define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i64(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -599,6 +625,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i64(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i64>,
   i64);
@@ -611,6 +638,7 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i64(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -644,6 +672,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i64(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i64>,
   i64);
@@ -656,6 +685,7 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i64(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -689,6 +719,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i64(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i64>,
   i64);
@@ -701,6 +732,7 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i64(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -734,6 +766,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i64(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i64>,
   i64);
@@ -747,6 +780,7 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i64(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -780,6 +814,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i64(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i64>,
   i64);
@@ -793,6 +828,7 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i64(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -826,6 +862,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i64(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i64>,
   i64);
@@ -839,6 +876,7 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i64(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -872,6 +910,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i64(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i64>,
   i64);
@@ -885,6 +924,7 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i64(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -918,6 +958,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i64(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i64>,
   i64);
@@ -931,6 +972,7 @@ define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i64(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -964,6 +1006,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i64(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i64>,
   i64);
@@ -977,6 +1020,7 @@ define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i64(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -1010,6 +1054,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i64(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i64>,
   i64);
@@ -1023,6 +1068,7 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i64(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -1056,6 +1102,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i64(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i64>,
   i64);
@@ -1069,6 +1116,7 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i64(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -1102,6 +1150,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i64(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i64>,
   i64);
@@ -1114,6 +1163,7 @@ define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -1147,6 +1197,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i64(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i64>,
   i64);
@@ -1159,6 +1210,7 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i64(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -1192,6 +1244,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i64(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i64>,
   i64);
@@ -1204,6 +1257,7 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i64(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -1237,6 +1291,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i64(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i64>,
   i64);
@@ -1249,6 +1304,7 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i64(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -1282,6 +1338,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i32>,
   i64);
@@ -1295,6 +1352,7 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i32(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -1328,6 +1386,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i32>,
   i64);
@@ -1341,6 +1400,7 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i32(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -1374,6 +1434,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i32>,
   i64);
@@ -1387,6 +1448,7 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i32(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -1420,6 +1482,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i32>,
   i64);
@@ -1433,6 +1496,7 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i32(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -1466,6 +1530,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i32>,
   i64);
@@ -1479,6 +1544,7 @@ define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i32(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     <vscale x 16 x i32> %1,
     i64 %2)
@@ -1512,6 +1578,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i32>,
   i64);
@@ -1525,6 +1592,7 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i32(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -1558,6 +1626,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i32>,
   i64);
@@ -1571,6 +1640,7 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i32(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -1604,6 +1674,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i32>,
   i64);
@@ -1617,6 +1688,7 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i32(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -1650,6 +1722,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i32>,
   i64);
@@ -1663,6 +1736,7 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i32(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -1696,6 +1770,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i32>,
   i64);
@@ -1709,6 +1784,7 @@ define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i32(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i32(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     <vscale x 16 x i32> %1,
     i64 %2)
@@ -1742,6 +1818,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i32>,
   i64);
@@ -1754,6 +1831,7 @@ define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i32(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -1787,6 +1865,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i32>,
   i64);
@@ -1799,6 +1878,7 @@ define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i32(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -1832,6 +1912,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i32>,
   i64);
@@ -1844,6 +1925,7 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i32(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -1877,6 +1959,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i32>,
   i64);
@@ -1889,6 +1972,7 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i32(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -1922,6 +2006,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i32>,
   i64);
@@ -1934,6 +2019,7 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i32(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i32(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     <vscale x 16 x i32> %1,
     i64 %2)
@@ -1967,6 +2053,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i32>,
   i64);
@@ -1980,6 +2067,7 @@ define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i32(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -2013,6 +2101,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i32>,
   i64);
@@ -2026,6 +2115,7 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i32(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -2059,6 +2149,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i32>,
   i64);
@@ -2072,6 +2163,7 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i32(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -2105,6 +2197,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i32>,
   i64);
@@ -2118,6 +2211,7 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i32(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -2151,6 +2245,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i32(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i32>,
   i64);
@@ -2164,6 +2259,7 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i32(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -2197,6 +2293,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i32(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i32>,
   i64);
@@ -2210,6 +2307,7 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i32(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -2243,6 +2341,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i32(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i32>,
   i64);
@@ -2256,6 +2355,7 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i32(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -2289,6 +2389,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i32(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i32>,
   i64);
@@ -2302,6 +2403,7 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i32(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -2335,6 +2437,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i32(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i32>,
   i64);
@@ -2348,6 +2451,7 @@ define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i32(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i32(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     <vscale x 16 x i32> %1,
     i64 %2)
@@ -2381,6 +2485,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i32(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i32>,
   i64);
@@ -2393,6 +2498,7 @@ define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i32(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -2426,6 +2532,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i32(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i32>,
   i64);
@@ -2438,6 +2545,7 @@ define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i32(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -2471,6 +2579,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i32(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i32>,
   i64);
@@ -2483,6 +2592,7 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i32(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -2516,6 +2626,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i32(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i32>,
   i64);
@@ -2528,6 +2639,7 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i32(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -2561,6 +2673,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i32(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i32>,
   i64);
@@ -2573,6 +2686,7 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i32(<vsc
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i32(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     <vscale x 16 x i32> %1,
     i64 %2)
@@ -2606,6 +2720,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i32(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i32>,
   i64);
@@ -2619,6 +2734,7 @@ define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i32(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -2652,6 +2768,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i32>,
   i64);
@@ -2665,6 +2782,7 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i32(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -2698,6 +2816,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i32>,
   i64);
@@ -2711,6 +2830,7 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i32(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -2744,6 +2864,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i32>,
   i64);
@@ -2757,6 +2878,7 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i32(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -2790,6 +2912,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i16>,
   i64);
@@ -2803,6 +2926,7 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i16(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -2836,6 +2960,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i16>,
   i64);
@@ -2849,6 +2974,7 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i16(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -2882,6 +3008,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i16>,
   i64);
@@ -2895,6 +3022,7 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i16(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -2928,6 +3056,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i16>,
   i64);
@@ -2941,6 +3070,7 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i16(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -2974,6 +3104,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i16>,
   i64);
@@ -2987,6 +3118,7 @@ define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i16(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     <vscale x 16 x i16> %1,
     i64 %2)
@@ -3020,6 +3152,7 @@ entry:
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   <vscale x 32 x i16>,
   i64);
@@ -3033,6 +3166,7 @@ define <vscale x 32 x i8> @intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i16(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     <vscale x 32 x i16> %1,
     i64 %2)
@@ -3066,6 +3200,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i16>,
   i64);
@@ -3078,6 +3213,7 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i16(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -3111,6 +3247,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i16>,
   i64);
@@ -3123,6 +3260,7 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i16(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -3156,6 +3294,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i16>,
   i64);
@@ -3168,6 +3307,7 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i16(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -3201,6 +3341,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i16>,
   i64);
@@ -3213,6 +3354,7 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i16(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -3246,6 +3388,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i16>,
   i64);
@@ -3258,6 +3401,7 @@ define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i16(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i16(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     <vscale x 16 x i16> %1,
     i64 %2)
@@ -3291,6 +3435,7 @@ entry:
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   <vscale x 32 x i16>,
   i64);
@@ -3303,6 +3448,7 @@ define <vscale x 32 x i16> @intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i16(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i16(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     <vscale x 32 x i16> %1,
     i64 %2)
@@ -3336,6 +3482,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i16>,
   i64);
@@ -3349,6 +3496,7 @@ define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i16(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -3382,6 +3530,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i16>,
   i64);
@@ -3395,6 +3544,7 @@ define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i16(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -3428,6 +3578,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i16>,
   i64);
@@ -3441,6 +3592,7 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i16(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -3474,6 +3626,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i16>,
   i64);
@@ -3487,6 +3640,7 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i16(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -3520,6 +3674,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i16>,
   i64);
@@ -3533,6 +3688,7 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i16(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i16(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     <vscale x 16 x i16> %1,
     i64 %2)
@@ -3566,6 +3722,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i16>,
   i64);
@@ -3579,6 +3736,7 @@ define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i16(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -3612,6 +3770,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i16>,
   i64);
@@ -3625,6 +3784,7 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i16(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -3658,6 +3818,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i16>,
   i64);
@@ -3671,6 +3832,7 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i16(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -3704,6 +3866,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i16>,
   i64);
@@ -3717,6 +3880,7 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i16(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -3750,6 +3914,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i16(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i16>,
   i64);
@@ -3762,6 +3927,7 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i16(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -3795,6 +3961,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i16(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i16>,
   i64);
@@ -3807,6 +3974,7 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i16(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -3840,6 +4008,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i16(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i16>,
   i64);
@@ -3852,6 +4021,7 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i16(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -3885,6 +4055,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i16(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i16>,
   i64);
@@ -3897,6 +4068,7 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i16(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -3930,6 +4102,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i16(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i16>,
   i64);
@@ -3942,6 +4115,7 @@ define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i16(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i16(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     <vscale x 16 x i16> %1,
     i64 %2)
@@ -3975,6 +4149,7 @@ entry:
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i16(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   <vscale x 32 x i16>,
   i64);
@@ -3987,6 +4162,7 @@ define <vscale x 32 x half> @intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i16(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i16(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     <vscale x 32 x i16> %1,
     i64 %2)
@@ -4020,6 +4196,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i16(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i16>,
   i64);
@@ -4033,6 +4210,7 @@ define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i16(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -4066,6 +4244,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i16(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i16>,
   i64);
@@ -4079,6 +4258,7 @@ define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i16(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -4112,6 +4292,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i16>,
   i64);
@@ -4125,6 +4306,7 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i16(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -4158,6 +4340,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i16>,
   i64);
@@ -4171,6 +4354,7 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i16(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -4204,6 +4388,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i16>,
   i64);
@@ -4217,6 +4402,7 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i16(<vsc
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i16(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     <vscale x 16 x i16> %1,
     i64 %2)
@@ -4250,6 +4436,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i16(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i16>,
   i64);
@@ -4263,6 +4450,7 @@ define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i16(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -4296,6 +4484,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i16(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i16>,
   i64);
@@ -4309,6 +4498,7 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i16(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -4342,6 +4532,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i16>,
   i64);
@@ -4355,6 +4546,7 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i16(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -4388,6 +4580,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i16>,
   i64);
@@ -4401,6 +4594,7 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i16(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -4434,6 +4628,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i8>,
   i64);
@@ -4446,6 +4641,7 @@ define <vscale x 1 x i8> @intrinsic_vloxei_v_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vloxei.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -4479,6 +4675,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i8>,
   i64);
@@ -4491,6 +4688,7 @@ define <vscale x 2 x i8> @intrinsic_vloxei_v_nxv2i8_nxv2i8_nxv2i8(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vloxei.nxv2i8.nxv2i8(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -4524,6 +4722,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i8>,
   i64);
@@ -4536,6 +4735,7 @@ define <vscale x 4 x i8> @intrinsic_vloxei_v_nxv4i8_nxv4i8_nxv4i8(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vloxei.nxv4i8.nxv4i8(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -4569,6 +4769,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i8>,
   i64);
@@ -4581,6 +4782,7 @@ define <vscale x 8 x i8> @intrinsic_vloxei_v_nxv8i8_nxv8i8_nxv8i8(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vloxei.nxv8i8.nxv8i8(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
@@ -4614,6 +4816,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i8>,
   i64);
@@ -4626,6 +4829,7 @@ define <vscale x 16 x i8> @intrinsic_vloxei_v_nxv16i8_nxv16i8_nxv16i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vloxei.nxv16i8.nxv16i8(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     <vscale x 16 x i8> %1,
     i64 %2)
@@ -4659,6 +4863,7 @@ entry:
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   <vscale x 32 x i8>,
   i64);
@@ -4671,6 +4876,7 @@ define <vscale x 32 x i8> @intrinsic_vloxei_v_nxv32i8_nxv32i8_nxv32i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vloxei.nxv32i8.nxv32i8(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     <vscale x 32 x i8> %1,
     i64 %2)
@@ -4704,6 +4910,7 @@ entry:
 }
 
 declare <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8(
+  <vscale x 64 x i8>,
   <vscale x 64 x i8>*,
   <vscale x 64 x i8>,
   i64);
@@ -4716,6 +4923,7 @@ define <vscale x 64 x i8> @intrinsic_vloxei_v_nxv64i8_nxv64i8_nxv64i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i8> @llvm.riscv.vloxei.nxv64i8.nxv64i8(
+    <vscale x 64 x i8> undef,
     <vscale x 64 x i8>* %0,
     <vscale x 64 x i8> %1,
     i64 %2)
@@ -4749,6 +4957,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i8>,
   i64);
@@ -4762,6 +4971,7 @@ define <vscale x 1 x i16> @intrinsic_vloxei_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vloxei.nxv1i16.nxv1i8(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -4795,6 +5005,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i8>,
   i64);
@@ -4808,6 +5019,7 @@ define <vscale x 2 x i16> @intrinsic_vloxei_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vloxei.nxv2i16.nxv2i8(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -4841,6 +5053,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i8>,
   i64);
@@ -4854,6 +5067,7 @@ define <vscale x 4 x i16> @intrinsic_vloxei_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vloxei.nxv4i16.nxv4i8(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -4887,6 +5101,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i8>,
   i64);
@@ -4900,6 +5115,7 @@ define <vscale x 8 x i16> @intrinsic_vloxei_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vloxei.nxv8i16.nxv8i8(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
@@ -4933,6 +5149,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i8>,
   i64);
@@ -4946,6 +5163,7 @@ define <vscale x 16 x i16> @intrinsic_vloxei_v_nxv16i16_nxv16i16_nxv16i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vloxei.nxv16i16.nxv16i8(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     <vscale x 16 x i8> %1,
     i64 %2)
@@ -4979,6 +5197,7 @@ entry:
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   <vscale x 32 x i8>,
   i64);
@@ -4992,6 +5211,7 @@ define <vscale x 32 x i16> @intrinsic_vloxei_v_nxv32i16_nxv32i16_nxv32i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vloxei.nxv32i16.nxv32i8(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     <vscale x 32 x i8> %1,
     i64 %2)
@@ -5025,6 +5245,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i8>,
   i64);
@@ -5038,6 +5259,7 @@ define <vscale x 1 x i32> @intrinsic_vloxei_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vloxei.nxv1i32.nxv1i8(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -5071,6 +5293,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i8>,
   i64);
@@ -5084,6 +5307,7 @@ define <vscale x 2 x i32> @intrinsic_vloxei_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vloxei.nxv2i32.nxv2i8(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -5117,6 +5341,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i8>,
   i64);
@@ -5130,6 +5355,7 @@ define <vscale x 4 x i32> @intrinsic_vloxei_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vloxei.nxv4i32.nxv4i8(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -5163,6 +5389,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i8>,
   i64);
@@ -5176,6 +5403,7 @@ define <vscale x 8 x i32> @intrinsic_vloxei_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vloxei.nxv8i32.nxv8i8(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
@@ -5209,6 +5437,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i8>,
   i64);
@@ -5222,6 +5451,7 @@ define <vscale x 16 x i32> @intrinsic_vloxei_v_nxv16i32_nxv16i32_nxv16i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vloxei.nxv16i32.nxv16i8(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     <vscale x 16 x i8> %1,
     i64 %2)
@@ -5255,6 +5485,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i8>,
   i64);
@@ -5268,6 +5499,7 @@ define <vscale x 1 x i64> @intrinsic_vloxei_v_nxv1i64_nxv1i64_nxv1i8(<vscale x 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vloxei.nxv1i64.nxv1i8(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -5301,6 +5533,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i8>,
   i64);
@@ -5314,6 +5547,7 @@ define <vscale x 2 x i64> @intrinsic_vloxei_v_nxv2i64_nxv2i64_nxv2i8(<vscale x 2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vloxei.nxv2i64.nxv2i8(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -5347,6 +5581,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i8>,
   i64);
@@ -5360,6 +5595,7 @@ define <vscale x 4 x i64> @intrinsic_vloxei_v_nxv4i64_nxv4i64_nxv4i8(<vscale x 4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vloxei.nxv4i64.nxv4i8(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -5393,6 +5629,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i8>,
   i64);
@@ -5406,6 +5643,7 @@ define <vscale x 8 x i64> @intrinsic_vloxei_v_nxv8i64_nxv8i64_nxv8i8(<vscale x 8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vloxei.nxv8i64.nxv8i8(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
@@ -5439,6 +5677,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i8(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i8>,
   i64);
@@ -5452,6 +5691,7 @@ define <vscale x 1 x half> @intrinsic_vloxei_v_nxv1f16_nxv1f16_nxv1i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vloxei.nxv1f16.nxv1i8(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -5485,6 +5725,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i8(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i8>,
   i64);
@@ -5498,6 +5739,7 @@ define <vscale x 2 x half> @intrinsic_vloxei_v_nxv2f16_nxv2f16_nxv2i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vloxei.nxv2f16.nxv2i8(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -5531,6 +5773,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i8(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i8>,
   i64);
@@ -5544,6 +5787,7 @@ define <vscale x 4 x half> @intrinsic_vloxei_v_nxv4f16_nxv4f16_nxv4i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vloxei.nxv4f16.nxv4i8(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -5577,6 +5821,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i8(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i8>,
   i64);
@@ -5590,6 +5835,7 @@ define <vscale x 8 x half> @intrinsic_vloxei_v_nxv8f16_nxv8f16_nxv8i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vloxei.nxv8f16.nxv8i8(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
@@ -5623,6 +5869,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i8(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i8>,
   i64);
@@ -5636,6 +5883,7 @@ define <vscale x 16 x half> @intrinsic_vloxei_v_nxv16f16_nxv16f16_nxv16i8(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vloxei.nxv16f16.nxv16i8(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     <vscale x 16 x i8> %1,
     i64 %2)
@@ -5669,6 +5917,7 @@ entry:
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i8(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   <vscale x 32 x i8>,
   i64);
@@ -5682,6 +5931,7 @@ define <vscale x 32 x half> @intrinsic_vloxei_v_nxv32f16_nxv32f16_nxv32i8(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vloxei.nxv32f16.nxv32i8(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     <vscale x 32 x i8> %1,
     i64 %2)
@@ -5715,6 +5965,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i8(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i8>,
   i64);
@@ -5728,6 +5979,7 @@ define <vscale x 1 x float> @intrinsic_vloxei_v_nxv1f32_nxv1f32_nxv1i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vloxei.nxv1f32.nxv1i8(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -5761,6 +6013,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i8(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i8>,
   i64);
@@ -5774,6 +6027,7 @@ define <vscale x 2 x float> @intrinsic_vloxei_v_nxv2f32_nxv2f32_nxv2i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vloxei.nxv2f32.nxv2i8(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -5807,6 +6061,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i8(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i8>,
   i64);
@@ -5820,6 +6075,7 @@ define <vscale x 4 x float> @intrinsic_vloxei_v_nxv4f32_nxv4f32_nxv4i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vloxei.nxv4f32.nxv4i8(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -5853,6 +6109,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i8>,
   i64);
@@ -5866,6 +6123,7 @@ define <vscale x 8 x float> @intrinsic_vloxei_v_nxv8f32_nxv8f32_nxv8i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vloxei.nxv8f32.nxv8i8(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
@@ -5899,6 +6157,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i8>,
   i64);
@@ -5912,6 +6171,7 @@ define <vscale x 16 x float> @intrinsic_vloxei_v_nxv16f32_nxv16f32_nxv16i8(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vloxei.nxv16f32.nxv16i8(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     <vscale x 16 x i8> %1,
     i64 %2)
@@ -5945,6 +6205,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i8(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i8>,
   i64);
@@ -5958,6 +6219,7 @@ define <vscale x 1 x double> @intrinsic_vloxei_v_nxv1f64_nxv1f64_nxv1i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vloxei.nxv1f64.nxv1i8(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -5991,6 +6253,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i8(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i8>,
   i64);
@@ -6004,6 +6267,7 @@ define <vscale x 2 x double> @intrinsic_vloxei_v_nxv2f64_nxv2f64_nxv2i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vloxei.nxv2f64.nxv2i8(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -6037,6 +6301,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i8(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i8>,
   i64);
@@ -6050,6 +6315,7 @@ define <vscale x 4 x double> @intrinsic_vloxei_v_nxv4f64_nxv4f64_nxv4i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vloxei.nxv4f64.nxv4i8(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -6083,6 +6349,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i8>,
   i64);
@@ -6096,6 +6363,7 @@ define <vscale x 8 x double> @intrinsic_vloxei_v_nxv8f64_nxv8f64_nxv8i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vloxei.nxv8f64.nxv8i8(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll
index 8b68a7f94cf94..00c098716cacb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlse-rv32.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i64> @llvm.riscv.vlse.nxv1i64(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   i32,
   i32);
@@ -14,6 +15,7 @@ define <vscale x 1 x i64> @intrinsic_vlse_v_nxv1i64_nxv1i64(<vscale x 1 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vlse.nxv1i64(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     i32 %1,
     i32 %2)
@@ -47,6 +49,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vlse.nxv2i64(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   i32,
   i32);
@@ -59,6 +62,7 @@ define <vscale x 2 x i64> @intrinsic_vlse_v_nxv2i64_nxv2i64(<vscale x 2 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vlse.nxv2i64(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     i32 %1,
     i32 %2)
@@ -92,6 +96,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vlse.nxv4i64(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   i32,
   i32);
@@ -104,6 +109,7 @@ define <vscale x 4 x i64> @intrinsic_vlse_v_nxv4i64_nxv4i64(<vscale x 4 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vlse.nxv4i64(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     i32 %1,
     i32 %2)
@@ -137,6 +143,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vlse.nxv8i64(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   i32,
   i32);
@@ -149,6 +156,7 @@ define <vscale x 8 x i64> @intrinsic_vlse_v_nxv8i64_nxv8i64(<vscale x 8 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vlse.nxv8i64(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     i32 %1,
     i32 %2)
@@ -182,6 +190,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vlse.nxv1f64(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   i32,
   i32);
@@ -194,6 +203,7 @@ define <vscale x 1 x double> @intrinsic_vlse_v_nxv1f64_nxv1f64(<vscale x 1 x dou
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vlse.nxv1f64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     i32 %1,
     i32 %2)
@@ -227,6 +237,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vlse.nxv2f64(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   i32,
   i32);
@@ -239,6 +250,7 @@ define <vscale x 2 x double> @intrinsic_vlse_v_nxv2f64_nxv2f64(<vscale x 2 x dou
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vlse.nxv2f64(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     i32 %1,
     i32 %2)
@@ -272,6 +284,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vlse.nxv4f64(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   i32,
   i32);
@@ -284,6 +297,7 @@ define <vscale x 4 x double> @intrinsic_vlse_v_nxv4f64_nxv4f64(<vscale x 4 x dou
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vlse.nxv4f64(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     i32 %1,
     i32 %2)
@@ -317,6 +331,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vlse.nxv8f64(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   i32,
   i32);
@@ -329,6 +344,7 @@ define <vscale x 8 x double> @intrinsic_vlse_v_nxv8f64_nxv8f64(<vscale x 8 x dou
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vlse.nxv8f64(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     i32 %1,
     i32 %2)
@@ -362,6 +378,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vlse.nxv1i32(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   i32,
   i32);
@@ -374,6 +391,7 @@ define <vscale x 1 x i32> @intrinsic_vlse_v_nxv1i32_nxv1i32(<vscale x 1 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vlse.nxv1i32(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     i32 %1,
     i32 %2)
@@ -407,6 +425,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   i32,
   i32);
@@ -419,6 +438,7 @@ define <vscale x 2 x i32> @intrinsic_vlse_v_nxv2i32_nxv2i32(<vscale x 2 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     i32 %1,
     i32 %2)
@@ -452,6 +472,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vlse.nxv4i32(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   i32,
   i32);
@@ -464,6 +485,7 @@ define <vscale x 4 x i32> @intrinsic_vlse_v_nxv4i32_nxv4i32(<vscale x 4 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vlse.nxv4i32(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     i32 %1,
     i32 %2)
@@ -497,6 +519,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vlse.nxv8i32(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   i32,
   i32);
@@ -509,6 +532,7 @@ define <vscale x 8 x i32> @intrinsic_vlse_v_nxv8i32_nxv8i32(<vscale x 8 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vlse.nxv8i32(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     i32 %1,
     i32 %2)
@@ -542,6 +566,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vlse.nxv16i32(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   i32,
   i32);
@@ -554,6 +579,7 @@ define <vscale x 16 x i32> @intrinsic_vlse_v_nxv16i32_nxv16i32(<vscale x 16 x i3
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vlse.nxv16i32(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     i32 %1,
     i32 %2)
@@ -587,6 +613,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vlse.nxv1f32(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   i32,
   i32);
@@ -599,6 +626,7 @@ define <vscale x 1 x float> @intrinsic_vlse_v_nxv1f32_nxv1f32(<vscale x 1 x floa
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vlse.nxv1f32(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     i32 %1,
     i32 %2)
@@ -632,6 +660,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vlse.nxv2f32(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   i32,
   i32);
@@ -644,6 +673,7 @@ define <vscale x 2 x float> @intrinsic_vlse_v_nxv2f32_nxv2f32(<vscale x 2 x floa
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vlse.nxv2f32(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     i32 %1,
     i32 %2)
@@ -677,6 +707,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vlse.nxv4f32(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   i32,
   i32);
@@ -689,6 +720,7 @@ define <vscale x 4 x float> @intrinsic_vlse_v_nxv4f32_nxv4f32(<vscale x 4 x floa
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vlse.nxv4f32(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     i32 %1,
     i32 %2)
@@ -722,6 +754,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vlse.nxv8f32(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   i32,
   i32);
@@ -734,6 +767,7 @@ define <vscale x 8 x float> @intrinsic_vlse_v_nxv8f32_nxv8f32(<vscale x 8 x floa
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vlse.nxv8f32(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     i32 %1,
     i32 %2)
@@ -767,6 +801,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vlse.nxv16f32(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   i32,
   i32);
@@ -779,6 +814,7 @@ define <vscale x 16 x float> @intrinsic_vlse_v_nxv16f32_nxv16f32(<vscale x 16 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vlse.nxv16f32(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     i32 %1,
     i32 %2)
@@ -812,6 +848,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vlse.nxv1i16(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   i32,
   i32);
@@ -824,6 +861,7 @@ define <vscale x 1 x i16> @intrinsic_vlse_v_nxv1i16_nxv1i16(<vscale x 1 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vlse.nxv1i16(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     i32 %1,
     i32 %2)
@@ -857,6 +895,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vlse.nxv2i16(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   i32,
   i32);
@@ -869,6 +908,7 @@ define <vscale x 2 x i16> @intrinsic_vlse_v_nxv2i16_nxv2i16(<vscale x 2 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vlse.nxv2i16(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     i32 %1,
     i32 %2)
@@ -902,6 +942,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vlse.nxv4i16(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   i32,
   i32);
@@ -914,6 +955,7 @@ define <vscale x 4 x i16> @intrinsic_vlse_v_nxv4i16_nxv4i16(<vscale x 4 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vlse.nxv4i16(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     i32 %1,
     i32 %2)
@@ -947,6 +989,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vlse.nxv8i16(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   i32,
   i32);
@@ -959,6 +1002,7 @@ define <vscale x 8 x i16> @intrinsic_vlse_v_nxv8i16_nxv8i16(<vscale x 8 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vlse.nxv8i16(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     i32 %1,
     i32 %2)
@@ -992,6 +1036,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vlse.nxv16i16(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   i32,
   i32);
@@ -1004,6 +1049,7 @@ define <vscale x 16 x i16> @intrinsic_vlse_v_nxv16i16_nxv16i16(<vscale x 16 x i1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vlse.nxv16i16(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     i32 %1,
     i32 %2)
@@ -1037,6 +1083,7 @@ entry:
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vlse.nxv32i16(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   i32,
   i32);
@@ -1049,6 +1096,7 @@ define <vscale x 32 x i16> @intrinsic_vlse_v_nxv32i16_nxv32i16(<vscale x 32 x i1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vlse.nxv32i16(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     i32 %1,
     i32 %2)
@@ -1082,6 +1130,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vlse.nxv1f16(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   i32,
   i32);
@@ -1094,6 +1143,7 @@ define <vscale x 1 x half> @intrinsic_vlse_v_nxv1f16_nxv1f16(<vscale x 1 x half>
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vlse.nxv1f16(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     i32 %1,
     i32 %2)
@@ -1127,6 +1177,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vlse.nxv2f16(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   i32,
   i32);
@@ -1139,6 +1190,7 @@ define <vscale x 2 x half> @intrinsic_vlse_v_nxv2f16_nxv2f16(<vscale x 2 x half>
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vlse.nxv2f16(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     i32 %1,
     i32 %2)
@@ -1172,6 +1224,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vlse.nxv4f16(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   i32,
   i32);
@@ -1184,6 +1237,7 @@ define <vscale x 4 x half> @intrinsic_vlse_v_nxv4f16_nxv4f16(<vscale x 4 x half>
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vlse.nxv4f16(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     i32 %1,
     i32 %2)
@@ -1217,6 +1271,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vlse.nxv8f16(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   i32,
   i32);
@@ -1229,6 +1284,7 @@ define <vscale x 8 x half> @intrinsic_vlse_v_nxv8f16_nxv8f16(<vscale x 8 x half>
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vlse.nxv8f16(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     i32 %1,
     i32 %2)
@@ -1262,6 +1318,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vlse.nxv16f16(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   i32,
   i32);
@@ -1274,6 +1331,7 @@ define <vscale x 16 x half> @intrinsic_vlse_v_nxv16f16_nxv16f16(<vscale x 16 x h
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vlse.nxv16f16(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     i32 %1,
     i32 %2)
@@ -1307,6 +1365,7 @@ entry:
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vlse.nxv32f16(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   i32,
   i32);
@@ -1319,6 +1378,7 @@ define <vscale x 32 x half> @intrinsic_vlse_v_nxv32f16_nxv32f16(<vscale x 32 x h
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vlse.nxv32f16(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     i32 %1,
     i32 %2)
@@ -1352,6 +1412,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vlse.nxv1i8(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   i32,
   i32);
@@ -1364,6 +1425,7 @@ define <vscale x 1 x i8> @intrinsic_vlse_v_nxv1i8_nxv1i8(<vscale x 1 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vlse.nxv1i8(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     i32 %1,
     i32 %2)
@@ -1397,6 +1459,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vlse.nxv2i8(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   i32,
   i32);
@@ -1409,6 +1472,7 @@ define <vscale x 2 x i8> @intrinsic_vlse_v_nxv2i8_nxv2i8(<vscale x 2 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vlse.nxv2i8(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     i32 %1,
     i32 %2)
@@ -1442,6 +1506,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vlse.nxv4i8(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   i32,
   i32);
@@ -1454,6 +1519,7 @@ define <vscale x 4 x i8> @intrinsic_vlse_v_nxv4i8_nxv4i8(<vscale x 4 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vlse.nxv4i8(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     i32 %1,
     i32 %2)
@@ -1487,6 +1553,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vlse.nxv8i8(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   i32,
   i32);
@@ -1499,6 +1566,7 @@ define <vscale x 8 x i8> @intrinsic_vlse_v_nxv8i8_nxv8i8(<vscale x 8 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vlse.nxv8i8(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     i32 %1,
     i32 %2)
@@ -1532,6 +1600,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vlse.nxv16i8(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   i32,
   i32);
@@ -1544,6 +1613,7 @@ define <vscale x 16 x i8> @intrinsic_vlse_v_nxv16i8_nxv16i8(<vscale x 16 x i8>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vlse.nxv16i8(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     i32 %1,
     i32 %2)
@@ -1577,6 +1647,7 @@ entry:
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vlse.nxv32i8(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   i32,
   i32);
@@ -1589,6 +1660,7 @@ define <vscale x 32 x i8> @intrinsic_vlse_v_nxv32i8_nxv32i8(<vscale x 32 x i8>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vlse.nxv32i8(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     i32 %1,
     i32 %2)
@@ -1622,6 +1694,7 @@ entry:
 }
 
 declare <vscale x 64 x i8> @llvm.riscv.vlse.nxv64i8(
+  <vscale x 64 x i8>,
   <vscale x 64 x i8>*,
   i32,
   i32);
@@ -1634,6 +1707,7 @@ define <vscale x 64 x i8> @intrinsic_vlse_v_nxv64i8_nxv64i8(<vscale x 64 x i8>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i8> @llvm.riscv.vlse.nxv64i8(
+    <vscale x 64 x i8> undef,
     <vscale x 64 x i8>* %0,
     i32 %1,
     i32 %2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll
index 62fa095e3a14b..83c5301555b21 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlse-rv64.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i64> @llvm.riscv.vlse.nxv1i64(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   i64,
   i64);
@@ -14,6 +15,7 @@ define <vscale x 1 x i64> @intrinsic_vlse_v_nxv1i64_nxv1i64(<vscale x 1 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vlse.nxv1i64(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     i64 %1,
     i64 %2)
@@ -47,6 +49,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vlse.nxv2i64(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   i64,
   i64);
@@ -59,6 +62,7 @@ define <vscale x 2 x i64> @intrinsic_vlse_v_nxv2i64_nxv2i64(<vscale x 2 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vlse.nxv2i64(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     i64 %1,
     i64 %2)
@@ -92,6 +96,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vlse.nxv4i64(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   i64,
   i64);
@@ -104,6 +109,7 @@ define <vscale x 4 x i64> @intrinsic_vlse_v_nxv4i64_nxv4i64(<vscale x 4 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vlse.nxv4i64(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     i64 %1,
     i64 %2)
@@ -137,6 +143,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vlse.nxv8i64(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   i64,
   i64);
@@ -149,6 +156,7 @@ define <vscale x 8 x i64> @intrinsic_vlse_v_nxv8i64_nxv8i64(<vscale x 8 x i64>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vlse.nxv8i64(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     i64 %1,
     i64 %2)
@@ -182,6 +190,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vlse.nxv1f64(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   i64,
   i64);
@@ -194,6 +203,7 @@ define <vscale x 1 x double> @intrinsic_vlse_v_nxv1f64_nxv1f64(<vscale x 1 x dou
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vlse.nxv1f64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     i64 %1,
     i64 %2)
@@ -227,6 +237,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vlse.nxv2f64(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   i64,
   i64);
@@ -239,6 +250,7 @@ define <vscale x 2 x double> @intrinsic_vlse_v_nxv2f64_nxv2f64(<vscale x 2 x dou
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vlse.nxv2f64(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     i64 %1,
     i64 %2)
@@ -272,6 +284,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vlse.nxv4f64(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   i64,
   i64);
@@ -284,6 +297,7 @@ define <vscale x 4 x double> @intrinsic_vlse_v_nxv4f64_nxv4f64(<vscale x 4 x dou
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vlse.nxv4f64(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     i64 %1,
     i64 %2)
@@ -317,6 +331,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vlse.nxv8f64(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   i64,
   i64);
@@ -329,6 +344,7 @@ define <vscale x 8 x double> @intrinsic_vlse_v_nxv8f64_nxv8f64(<vscale x 8 x dou
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vlse.nxv8f64(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     i64 %1,
     i64 %2)
@@ -362,6 +378,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vlse.nxv1i32(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   i64,
   i64);
@@ -374,6 +391,7 @@ define <vscale x 1 x i32> @intrinsic_vlse_v_nxv1i32_nxv1i32(<vscale x 1 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vlse.nxv1i32(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     i64 %1,
     i64 %2)
@@ -407,6 +425,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   i64,
   i64);
@@ -419,6 +438,7 @@ define <vscale x 2 x i32> @intrinsic_vlse_v_nxv2i32_nxv2i32(<vscale x 2 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vlse.nxv2i32(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     i64 %1,
     i64 %2)
@@ -452,6 +472,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vlse.nxv4i32(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   i64,
   i64);
@@ -464,6 +485,7 @@ define <vscale x 4 x i32> @intrinsic_vlse_v_nxv4i32_nxv4i32(<vscale x 4 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vlse.nxv4i32(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     i64 %1,
     i64 %2)
@@ -497,6 +519,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vlse.nxv8i32(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   i64,
   i64);
@@ -509,6 +532,7 @@ define <vscale x 8 x i32> @intrinsic_vlse_v_nxv8i32_nxv8i32(<vscale x 8 x i32>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vlse.nxv8i32(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     i64 %1,
     i64 %2)
@@ -542,6 +566,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vlse.nxv16i32(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   i64,
   i64);
@@ -554,6 +579,7 @@ define <vscale x 16 x i32> @intrinsic_vlse_v_nxv16i32_nxv16i32(<vscale x 16 x i3
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vlse.nxv16i32(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     i64 %1,
     i64 %2)
@@ -587,6 +613,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vlse.nxv1f32(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   i64,
   i64);
@@ -599,6 +626,7 @@ define <vscale x 1 x float> @intrinsic_vlse_v_nxv1f32_nxv1f32(<vscale x 1 x floa
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vlse.nxv1f32(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     i64 %1,
     i64 %2)
@@ -632,6 +660,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vlse.nxv2f32(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   i64,
   i64);
@@ -644,6 +673,7 @@ define <vscale x 2 x float> @intrinsic_vlse_v_nxv2f32_nxv2f32(<vscale x 2 x floa
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vlse.nxv2f32(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     i64 %1,
     i64 %2)
@@ -677,6 +707,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vlse.nxv4f32(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   i64,
   i64);
@@ -689,6 +720,7 @@ define <vscale x 4 x float> @intrinsic_vlse_v_nxv4f32_nxv4f32(<vscale x 4 x floa
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vlse.nxv4f32(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     i64 %1,
     i64 %2)
@@ -722,6 +754,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vlse.nxv8f32(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   i64,
   i64);
@@ -734,6 +767,7 @@ define <vscale x 8 x float> @intrinsic_vlse_v_nxv8f32_nxv8f32(<vscale x 8 x floa
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vlse.nxv8f32(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     i64 %1,
     i64 %2)
@@ -767,6 +801,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vlse.nxv16f32(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   i64,
   i64);
@@ -779,6 +814,7 @@ define <vscale x 16 x float> @intrinsic_vlse_v_nxv16f32_nxv16f32(<vscale x 16 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vlse.nxv16f32(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     i64 %1,
     i64 %2)
@@ -812,6 +848,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vlse.nxv1i16(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   i64,
   i64);
@@ -824,6 +861,7 @@ define <vscale x 1 x i16> @intrinsic_vlse_v_nxv1i16_nxv1i16(<vscale x 1 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vlse.nxv1i16(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     i64 %1,
     i64 %2)
@@ -857,6 +895,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vlse.nxv2i16(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   i64,
   i64);
@@ -869,6 +908,7 @@ define <vscale x 2 x i16> @intrinsic_vlse_v_nxv2i16_nxv2i16(<vscale x 2 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vlse.nxv2i16(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     i64 %1,
     i64 %2)
@@ -902,6 +942,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vlse.nxv4i16(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   i64,
   i64);
@@ -914,6 +955,7 @@ define <vscale x 4 x i16> @intrinsic_vlse_v_nxv4i16_nxv4i16(<vscale x 4 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vlse.nxv4i16(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     i64 %1,
     i64 %2)
@@ -947,6 +989,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vlse.nxv8i16(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   i64,
   i64);
@@ -959,6 +1002,7 @@ define <vscale x 8 x i16> @intrinsic_vlse_v_nxv8i16_nxv8i16(<vscale x 8 x i16>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vlse.nxv8i16(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     i64 %1,
     i64 %2)
@@ -992,6 +1036,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vlse.nxv16i16(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   i64,
   i64);
@@ -1004,6 +1049,7 @@ define <vscale x 16 x i16> @intrinsic_vlse_v_nxv16i16_nxv16i16(<vscale x 16 x i1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vlse.nxv16i16(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     i64 %1,
     i64 %2)
@@ -1037,6 +1083,7 @@ entry:
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vlse.nxv32i16(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   i64,
   i64);
@@ -1049,6 +1096,7 @@ define <vscale x 32 x i16> @intrinsic_vlse_v_nxv32i16_nxv32i16(<vscale x 32 x i1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vlse.nxv32i16(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     i64 %1,
     i64 %2)
@@ -1082,6 +1130,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vlse.nxv1f16(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   i64,
   i64);
@@ -1094,6 +1143,7 @@ define <vscale x 1 x half> @intrinsic_vlse_v_nxv1f16_nxv1f16(<vscale x 1 x half>
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vlse.nxv1f16(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     i64 %1,
     i64 %2)
@@ -1127,6 +1177,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vlse.nxv2f16(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   i64,
   i64);
@@ -1139,6 +1190,7 @@ define <vscale x 2 x half> @intrinsic_vlse_v_nxv2f16_nxv2f16(<vscale x 2 x half>
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vlse.nxv2f16(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     i64 %1,
     i64 %2)
@@ -1172,6 +1224,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vlse.nxv4f16(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   i64,
   i64);
@@ -1184,6 +1237,7 @@ define <vscale x 4 x half> @intrinsic_vlse_v_nxv4f16_nxv4f16(<vscale x 4 x half>
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vlse.nxv4f16(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     i64 %1,
     i64 %2)
@@ -1217,6 +1271,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vlse.nxv8f16(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   i64,
   i64);
@@ -1229,6 +1284,7 @@ define <vscale x 8 x half> @intrinsic_vlse_v_nxv8f16_nxv8f16(<vscale x 8 x half>
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vlse.nxv8f16(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     i64 %1,
     i64 %2)
@@ -1262,6 +1318,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vlse.nxv16f16(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   i64,
   i64);
@@ -1274,6 +1331,7 @@ define <vscale x 16 x half> @intrinsic_vlse_v_nxv16f16_nxv16f16(<vscale x 16 x h
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vlse.nxv16f16(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     i64 %1,
     i64 %2)
@@ -1307,6 +1365,7 @@ entry:
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vlse.nxv32f16(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   i64,
   i64);
@@ -1319,6 +1378,7 @@ define <vscale x 32 x half> @intrinsic_vlse_v_nxv32f16_nxv32f16(<vscale x 32 x h
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vlse.nxv32f16(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     i64 %1,
     i64 %2)
@@ -1352,6 +1412,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vlse.nxv1i8(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   i64,
   i64);
@@ -1364,6 +1425,7 @@ define <vscale x 1 x i8> @intrinsic_vlse_v_nxv1i8_nxv1i8(<vscale x 1 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vlse.nxv1i8(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     i64 %1,
     i64 %2)
@@ -1397,6 +1459,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vlse.nxv2i8(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   i64,
   i64);
@@ -1409,6 +1472,7 @@ define <vscale x 2 x i8> @intrinsic_vlse_v_nxv2i8_nxv2i8(<vscale x 2 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vlse.nxv2i8(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     i64 %1,
     i64 %2)
@@ -1442,6 +1506,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vlse.nxv4i8(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   i64,
   i64);
@@ -1454,6 +1519,7 @@ define <vscale x 4 x i8> @intrinsic_vlse_v_nxv4i8_nxv4i8(<vscale x 4 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vlse.nxv4i8(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     i64 %1,
     i64 %2)
@@ -1487,6 +1553,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vlse.nxv8i8(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   i64,
   i64);
@@ -1499,6 +1566,7 @@ define <vscale x 8 x i8> @intrinsic_vlse_v_nxv8i8_nxv8i8(<vscale x 8 x i8>* %0,
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vlse.nxv8i8(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     i64 %1,
     i64 %2)
@@ -1532,6 +1600,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vlse.nxv16i8(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   i64,
   i64);
@@ -1544,6 +1613,7 @@ define <vscale x 16 x i8> @intrinsic_vlse_v_nxv16i8_nxv16i8(<vscale x 16 x i8>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vlse.nxv16i8(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     i64 %1,
     i64 %2)
@@ -1577,6 +1647,7 @@ entry:
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vlse.nxv32i8(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   i64,
   i64);
@@ -1589,6 +1660,7 @@ define <vscale x 32 x i8> @intrinsic_vlse_v_nxv32i8_nxv32i8(<vscale x 32 x i8>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vlse.nxv32i8(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     i64 %1,
     i64 %2)
@@ -1622,6 +1694,7 @@ entry:
 }
 
 declare <vscale x 64 x i8> @llvm.riscv.vlse.nxv64i8(
+  <vscale x 64 x i8>,
   <vscale x 64 x i8>*,
   i64,
   i64);
@@ -1634,6 +1707,7 @@ define <vscale x 64 x i8> @intrinsic_vlse_v_nxv64i8_nxv64i8(<vscale x 64 x i8>*
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i8> @llvm.riscv.vlse.nxv64i8(
+    <vscale x 64 x i8> undef,
     <vscale x 64 x i8>* %0,
     i64 %1,
     i64 %2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll
index 64d73a94e0042..ce404ddffb503 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv32.ll
@@ -3,6 +3,7 @@
 ; RUN:   < %s | FileCheck %s
 
 declare <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i32>,
   i32);
@@ -16,6 +17,7 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -49,6 +51,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i32>,
   i32);
@@ -62,6 +65,7 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -95,6 +99,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i32>,
   i32);
@@ -108,6 +113,7 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -141,6 +147,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i32>,
   i32);
@@ -154,6 +161,7 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -187,6 +195,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i32>,
   i32);
@@ -200,6 +209,7 @@ define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     <vscale x 16 x i32> %1,
     i32 %2)
@@ -233,6 +243,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i32>,
   i32);
@@ -246,6 +257,7 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -279,6 +291,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i32>,
   i32);
@@ -292,6 +305,7 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -325,6 +339,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i32>,
   i32);
@@ -338,6 +353,7 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -371,6 +387,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i32>,
   i32);
@@ -384,6 +401,7 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -417,6 +435,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i32>,
   i32);
@@ -430,6 +449,7 @@ define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i32(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     <vscale x 16 x i32> %1,
     i32 %2)
@@ -463,6 +483,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i32>,
   i32);
@@ -475,6 +496,7 @@ define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -508,6 +530,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i32>,
   i32);
@@ -520,6 +543,7 @@ define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -553,6 +577,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i32>,
   i32);
@@ -565,6 +590,7 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -598,6 +624,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i32>,
   i32);
@@ -610,6 +637,7 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -643,6 +671,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i32>,
   i32);
@@ -655,6 +684,7 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i32(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     <vscale x 16 x i32> %1,
     i32 %2)
@@ -688,6 +718,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i32>,
   i32);
@@ -701,6 +732,7 @@ define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -734,6 +766,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i32>,
   i32);
@@ -747,6 +780,7 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -780,6 +814,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i32>,
   i32);
@@ -793,6 +828,7 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -826,6 +862,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i32>,
   i32);
@@ -839,6 +876,7 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -872,6 +910,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i32(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i32>,
   i32);
@@ -885,6 +924,7 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i32(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -918,6 +958,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i32(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i32>,
   i32);
@@ -931,6 +972,7 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i32(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -964,6 +1006,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i32(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i32>,
   i32);
@@ -977,6 +1020,7 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i32(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -1010,6 +1054,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i32(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i32>,
   i32);
@@ -1023,6 +1068,7 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i32(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -1056,6 +1102,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i32(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i32>,
   i32);
@@ -1069,6 +1116,7 @@ define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i32(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i32(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     <vscale x 16 x i32> %1,
     i32 %2)
@@ -1102,6 +1150,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i32(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i32>,
   i32);
@@ -1114,6 +1163,7 @@ define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i32(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -1147,6 +1197,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i32(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i32>,
   i32);
@@ -1159,6 +1210,7 @@ define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i32(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -1192,6 +1244,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i32(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i32>,
   i32);
@@ -1204,6 +1257,7 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i32(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -1237,6 +1291,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i32(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i32>,
   i32);
@@ -1249,6 +1304,7 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i32(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -1282,6 +1338,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i32(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i32>,
   i32);
@@ -1294,6 +1351,7 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i32(<vsc
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i32(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     <vscale x 16 x i32> %1,
     i32 %2)
@@ -1327,6 +1385,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i32(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i32>,
   i32);
@@ -1340,6 +1399,7 @@ define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i32(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i32> %1,
     i32 %2)
@@ -1373,6 +1433,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i32>,
   i32);
@@ -1386,6 +1447,7 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i32> %1,
     i32 %2)
@@ -1419,6 +1481,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i32>,
   i32);
@@ -1432,6 +1495,7 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i32> %1,
     i32 %2)
@@ -1465,6 +1529,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i32>,
   i32);
@@ -1478,6 +1543,7 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i32> %1,
     i32 %2)
@@ -1511,6 +1577,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i16>,
   i32);
@@ -1524,6 +1591,7 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -1557,6 +1625,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i16>,
   i32);
@@ -1570,6 +1639,7 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -1603,6 +1673,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i16>,
   i32);
@@ -1616,6 +1687,7 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -1649,6 +1721,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i16>,
   i32);
@@ -1662,6 +1735,7 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -1695,6 +1769,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i16>,
   i32);
@@ -1708,6 +1783,7 @@ define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     <vscale x 16 x i16> %1,
     i32 %2)
@@ -1741,6 +1817,7 @@ entry:
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   <vscale x 32 x i16>,
   i32);
@@ -1754,6 +1831,7 @@ define <vscale x 32 x i8> @intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     <vscale x 32 x i16> %1,
     i32 %2)
@@ -1787,6 +1865,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i16>,
   i32);
@@ -1799,6 +1878,7 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -1832,6 +1912,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i16>,
   i32);
@@ -1844,6 +1925,7 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -1877,6 +1959,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i16>,
   i32);
@@ -1889,6 +1972,7 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -1922,6 +2006,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i16>,
   i32);
@@ -1934,6 +2019,7 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -1967,6 +2053,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i16>,
   i32);
@@ -1979,6 +2066,7 @@ define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i16(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     <vscale x 16 x i16> %1,
     i32 %2)
@@ -2012,6 +2100,7 @@ entry:
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   <vscale x 32 x i16>,
   i32);
@@ -2024,6 +2113,7 @@ define <vscale x 32 x i16> @intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i16(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     <vscale x 32 x i16> %1,
     i32 %2)
@@ -2057,6 +2147,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i16>,
   i32);
@@ -2070,6 +2161,7 @@ define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -2103,6 +2195,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i16>,
   i32);
@@ -2116,6 +2209,7 @@ define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -2149,6 +2243,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i16>,
   i32);
@@ -2162,6 +2257,7 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -2195,6 +2291,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i16>,
   i32);
@@ -2208,6 +2305,7 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -2241,6 +2339,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i16>,
   i32);
@@ -2254,6 +2353,7 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i16(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     <vscale x 16 x i16> %1,
     i32 %2)
@@ -2287,6 +2387,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i16>,
   i32);
@@ -2300,6 +2401,7 @@ define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -2333,6 +2435,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i16>,
   i32);
@@ -2346,6 +2449,7 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -2379,6 +2483,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i16>,
   i32);
@@ -2392,6 +2497,7 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -2425,6 +2531,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i16>,
   i32);
@@ -2438,6 +2545,7 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -2471,6 +2579,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i16(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i16>,
   i32);
@@ -2483,6 +2592,7 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i16(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -2516,6 +2626,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i16(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i16>,
   i32);
@@ -2528,6 +2639,7 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i16(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -2561,6 +2673,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i16(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i16>,
   i32);
@@ -2573,6 +2686,7 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i16(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -2606,6 +2720,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i16(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i16>,
   i32);
@@ -2618,6 +2733,7 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i16(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -2651,6 +2767,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i16(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i16>,
   i32);
@@ -2663,6 +2780,7 @@ define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i16(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i16(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     <vscale x 16 x i16> %1,
     i32 %2)
@@ -2696,6 +2814,7 @@ entry:
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i16(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   <vscale x 32 x i16>,
   i32);
@@ -2708,6 +2827,7 @@ define <vscale x 32 x half> @intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i16(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i16(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     <vscale x 32 x i16> %1,
     i32 %2)
@@ -2741,6 +2861,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i16(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i16>,
   i32);
@@ -2754,6 +2875,7 @@ define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i16(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -2787,6 +2909,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i16(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i16>,
   i32);
@@ -2800,6 +2923,7 @@ define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i16(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -2833,6 +2957,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i16>,
   i32);
@@ -2846,6 +2971,7 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -2879,6 +3005,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i16>,
   i32);
@@ -2892,6 +3019,7 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -2925,6 +3053,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i16>,
   i32);
@@ -2938,6 +3067,7 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i16(<vsc
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     <vscale x 16 x i16> %1,
     i32 %2)
@@ -2971,6 +3101,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i16(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i16>,
   i32);
@@ -2984,6 +3115,7 @@ define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i16(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i16> %1,
     i32 %2)
@@ -3017,6 +3149,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i16(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i16>,
   i32);
@@ -3030,6 +3163,7 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i16(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i16> %1,
     i32 %2)
@@ -3063,6 +3197,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i16>,
   i32);
@@ -3076,6 +3211,7 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i16> %1,
     i32 %2)
@@ -3109,6 +3245,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i16>,
   i32);
@@ -3122,6 +3259,7 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i16> %1,
     i32 %2)
@@ -3155,6 +3293,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i8>,
   i32);
@@ -3167,6 +3306,7 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -3200,6 +3340,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i8>,
   i32);
@@ -3212,6 +3353,7 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i8(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -3245,6 +3387,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i8>,
   i32);
@@ -3257,6 +3400,7 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i8(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -3290,6 +3434,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i8>,
   i32);
@@ -3302,6 +3447,7 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i8(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
@@ -3335,6 +3481,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i8>,
   i32);
@@ -3347,6 +3494,7 @@ define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     <vscale x 16 x i8> %1,
     i32 %2)
@@ -3380,6 +3528,7 @@ entry:
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   <vscale x 32 x i8>,
   i32);
@@ -3392,6 +3541,7 @@ define <vscale x 32 x i8> @intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     <vscale x 32 x i8> %1,
     i32 %2)
@@ -3425,6 +3575,7 @@ entry:
 }
 
 declare <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8(
+  <vscale x 64 x i8>,
   <vscale x 64 x i8>*,
   <vscale x 64 x i8>,
   i32);
@@ -3437,6 +3588,7 @@ define <vscale x 64 x i8> @intrinsic_vluxei_v_nxv64i8_nxv64i8_nxv64i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8(
+    <vscale x 64 x i8> undef,
     <vscale x 64 x i8>* %0,
     <vscale x 64 x i8> %1,
     i32 %2)
@@ -3470,6 +3622,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i8>,
   i32);
@@ -3483,6 +3636,7 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -3516,6 +3670,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i8>,
   i32);
@@ -3529,6 +3684,7 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -3562,6 +3718,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i8>,
   i32);
@@ -3575,6 +3732,7 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -3608,6 +3766,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i8>,
   i32);
@@ -3621,6 +3780,7 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
@@ -3654,6 +3814,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i8>,
   i32);
@@ -3667,6 +3828,7 @@ define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     <vscale x 16 x i8> %1,
     i32 %2)
@@ -3700,6 +3862,7 @@ entry:
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   <vscale x 32 x i8>,
   i32);
@@ -3713,6 +3876,7 @@ define <vscale x 32 x i16> @intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     <vscale x 32 x i8> %1,
     i32 %2)
@@ -3746,6 +3910,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i8>,
   i32);
@@ -3759,6 +3924,7 @@ define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -3792,6 +3958,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i8>,
   i32);
@@ -3805,6 +3972,7 @@ define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -3838,6 +4006,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i8>,
   i32);
@@ -3851,6 +4020,7 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -3884,6 +4054,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i8>,
   i32);
@@ -3897,6 +4068,7 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
@@ -3930,6 +4102,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i8>,
   i32);
@@ -3943,6 +4116,7 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     <vscale x 16 x i8> %1,
     i32 %2)
@@ -3976,6 +4150,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i8>,
   i32);
@@ -3989,6 +4164,7 @@ define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i8(<vscale x 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -4022,6 +4198,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i8>,
   i32);
@@ -4035,6 +4212,7 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i8(<vscale x 2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -4068,6 +4246,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i8>,
   i32);
@@ -4081,6 +4260,7 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i8(<vscale x 4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -4114,6 +4294,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i8>,
   i32);
@@ -4127,6 +4308,7 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i8(<vscale x 8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
@@ -4160,6 +4342,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i8(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i8>,
   i32);
@@ -4173,6 +4356,7 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i8(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -4206,6 +4390,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i8(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i8>,
   i32);
@@ -4219,6 +4404,7 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i8(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -4252,6 +4438,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i8(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i8>,
   i32);
@@ -4265,6 +4452,7 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i8(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -4298,6 +4486,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i8(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i8>,
   i32);
@@ -4311,6 +4500,7 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i8(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
@@ -4344,6 +4534,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i8(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i8>,
   i32);
@@ -4357,6 +4548,7 @@ define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i8(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i8(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     <vscale x 16 x i8> %1,
     i32 %2)
@@ -4390,6 +4582,7 @@ entry:
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i8(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   <vscale x 32 x i8>,
   i32);
@@ -4403,6 +4596,7 @@ define <vscale x 32 x half> @intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i8(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i8(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     <vscale x 32 x i8> %1,
     i32 %2)
@@ -4436,6 +4630,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i8(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i8>,
   i32);
@@ -4449,6 +4644,7 @@ define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i8(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -4482,6 +4678,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i8(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i8>,
   i32);
@@ -4495,6 +4692,7 @@ define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i8(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -4528,6 +4726,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i8(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i8>,
   i32);
@@ -4541,6 +4740,7 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i8(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -4574,6 +4774,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i8>,
   i32);
@@ -4587,6 +4788,7 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
@@ -4620,6 +4822,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i8>,
   i32);
@@ -4633,6 +4836,7 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i8(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     <vscale x 16 x i8> %1,
     i32 %2)
@@ -4666,6 +4870,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i8(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i8>,
   i32);
@@ -4679,6 +4884,7 @@ define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i8(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i8> %1,
     i32 %2)
@@ -4712,6 +4918,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i8(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i8>,
   i32);
@@ -4725,6 +4932,7 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i8(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i8> %1,
     i32 %2)
@@ -4758,6 +4966,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i8(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i8>,
   i32);
@@ -4771,6 +4980,7 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i8(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i8> %1,
     i32 %2)
@@ -4804,6 +5014,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i8>,
   i32);
@@ -4817,6 +5028,7 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i8> %1,
     i32 %2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll
index efec1362cb9f2..f90ff809bea29 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vluxei-rv64.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+f,+d -verify-machineinstrs \
 ; RUN:   < %s | FileCheck %s
 declare <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i64>,
   i64);
@@ -15,6 +16,7 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i64(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i64(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -48,6 +50,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i64>,
   i64);
@@ -61,6 +64,7 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i64(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i64(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -94,6 +98,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i64>,
   i64);
@@ -107,6 +112,7 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i64(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i64(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -140,6 +146,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i64>,
   i64);
@@ -153,6 +160,7 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i64(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i64(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -186,6 +194,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i64>,
   i64);
@@ -199,6 +208,7 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i64(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -232,6 +242,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i64>,
   i64);
@@ -245,6 +256,7 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i64(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -278,6 +290,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i64>,
   i64);
@@ -291,6 +304,7 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i64(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -324,6 +338,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i64>,
   i64);
@@ -337,6 +352,7 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i64(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -370,6 +386,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i64>,
   i64);
@@ -383,6 +400,7 @@ define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i64(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -416,6 +434,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i64>,
   i64);
@@ -429,6 +448,7 @@ define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i64(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -462,6 +482,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i64>,
   i64);
@@ -475,6 +496,7 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i64(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -508,6 +530,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i64>,
   i64);
@@ -521,6 +544,7 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i64(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -554,6 +578,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i64(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i64>,
   i64);
@@ -566,6 +591,7 @@ define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i64(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -599,6 +625,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i64(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i64>,
   i64);
@@ -611,6 +638,7 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i64(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -644,6 +672,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i64(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i64>,
   i64);
@@ -656,6 +685,7 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i64(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -689,6 +719,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i64(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i64>,
   i64);
@@ -701,6 +732,7 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i64(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -734,6 +766,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i64(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i64>,
   i64);
@@ -747,6 +780,7 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i64(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -780,6 +814,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i64(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i64>,
   i64);
@@ -793,6 +828,7 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i64(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -826,6 +862,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i64(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i64>,
   i64);
@@ -839,6 +876,7 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i64(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -872,6 +910,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i64(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i64>,
   i64);
@@ -885,6 +924,7 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i64(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i64(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -918,6 +958,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i64(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i64>,
   i64);
@@ -931,6 +972,7 @@ define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i64(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -964,6 +1006,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i64(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i64>,
   i64);
@@ -977,6 +1020,7 @@ define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i64(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -1010,6 +1054,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i64(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i64>,
   i64);
@@ -1023,6 +1068,7 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i64(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -1056,6 +1102,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i64(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i64>,
   i64);
@@ -1069,6 +1116,7 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i64(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -1102,6 +1150,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i64(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i64>,
   i64);
@@ -1114,6 +1163,7 @@ define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i64(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i64> %1,
     i64 %2)
@@ -1147,6 +1197,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i64(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i64>,
   i64);
@@ -1159,6 +1210,7 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i64(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i64> %1,
     i64 %2)
@@ -1192,6 +1244,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i64(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i64>,
   i64);
@@ -1204,6 +1257,7 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i64(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i64> %1,
     i64 %2)
@@ -1237,6 +1291,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i64(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i64>,
   i64);
@@ -1249,6 +1304,7 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i64(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i64(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i64> %1,
     i64 %2)
@@ -1282,6 +1338,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i32>,
   i64);
@@ -1295,6 +1352,7 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i32(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i32(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -1328,6 +1386,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i32>,
   i64);
@@ -1341,6 +1400,7 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i32(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i32(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -1374,6 +1434,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i32>,
   i64);
@@ -1387,6 +1448,7 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i32(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i32(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -1420,6 +1482,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i32>,
   i64);
@@ -1433,6 +1496,7 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i32(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i32(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -1466,6 +1530,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i32>,
   i64);
@@ -1479,6 +1544,7 @@ define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i32(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     <vscale x 16 x i32> %1,
     i64 %2)
@@ -1512,6 +1578,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i32>,
   i64);
@@ -1525,6 +1592,7 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i32(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -1558,6 +1626,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i32>,
   i64);
@@ -1571,6 +1640,7 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i32(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -1604,6 +1674,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i32>,
   i64);
@@ -1617,6 +1688,7 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i32(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -1650,6 +1722,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i32>,
   i64);
@@ -1663,6 +1736,7 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i32(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -1696,6 +1770,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i32>,
   i64);
@@ -1709,6 +1784,7 @@ define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i32(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i32(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     <vscale x 16 x i32> %1,
     i64 %2)
@@ -1742,6 +1818,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i32>,
   i64);
@@ -1754,6 +1831,7 @@ define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i32(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -1787,6 +1865,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i32>,
   i64);
@@ -1799,6 +1878,7 @@ define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i32(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -1832,6 +1912,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i32>,
   i64);
@@ -1844,6 +1925,7 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i32(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -1877,6 +1959,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i32>,
   i64);
@@ -1889,6 +1972,7 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i32(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -1922,6 +2006,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i32>,
   i64);
@@ -1934,6 +2019,7 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i32(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i32(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     <vscale x 16 x i32> %1,
     i64 %2)
@@ -1967,6 +2053,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i32>,
   i64);
@@ -1980,6 +2067,7 @@ define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i32(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -2013,6 +2101,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i32>,
   i64);
@@ -2026,6 +2115,7 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i32(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -2059,6 +2149,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i32>,
   i64);
@@ -2072,6 +2163,7 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i32(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -2105,6 +2197,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i32>,
   i64);
@@ -2118,6 +2211,7 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i32(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -2151,6 +2245,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i32(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i32>,
   i64);
@@ -2164,6 +2259,7 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i32(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -2197,6 +2293,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i32(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i32>,
   i64);
@@ -2210,6 +2307,7 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i32(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -2243,6 +2341,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i32(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i32>,
   i64);
@@ -2256,6 +2355,7 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i32(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -2289,6 +2389,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i32(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i32>,
   i64);
@@ -2302,6 +2403,7 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i32(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i32(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -2335,6 +2437,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i32(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i32>,
   i64);
@@ -2348,6 +2451,7 @@ define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i32(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i32(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     <vscale x 16 x i32> %1,
     i64 %2)
@@ -2381,6 +2485,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i32(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i32>,
   i64);
@@ -2393,6 +2498,7 @@ define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i32(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -2426,6 +2532,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i32(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i32>,
   i64);
@@ -2438,6 +2545,7 @@ define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i32(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -2471,6 +2579,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i32(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i32>,
   i64);
@@ -2483,6 +2592,7 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i32(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -2516,6 +2626,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i32(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i32>,
   i64);
@@ -2528,6 +2639,7 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i32(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -2561,6 +2673,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i32(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i32>,
   i64);
@@ -2573,6 +2686,7 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i32(<vsc
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i32(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     <vscale x 16 x i32> %1,
     i64 %2)
@@ -2606,6 +2720,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i32(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i32>,
   i64);
@@ -2619,6 +2734,7 @@ define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i32(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i32> %1,
     i64 %2)
@@ -2652,6 +2768,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i32>,
   i64);
@@ -2665,6 +2782,7 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i32(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i32> %1,
     i64 %2)
@@ -2698,6 +2816,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i32>,
   i64);
@@ -2711,6 +2830,7 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i32(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i32> %1,
     i64 %2)
@@ -2744,6 +2864,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i32>,
   i64);
@@ -2757,6 +2878,7 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i32(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i32(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i32> %1,
     i64 %2)
@@ -2790,6 +2912,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i16>,
   i64);
@@ -2803,6 +2926,7 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i16(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i16(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -2836,6 +2960,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i16>,
   i64);
@@ -2849,6 +2974,7 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i16(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i16(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -2882,6 +3008,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i16>,
   i64);
@@ -2895,6 +3022,7 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i16(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i16(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -2928,6 +3056,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i16>,
   i64);
@@ -2941,6 +3070,7 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i16(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i16(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -2974,6 +3104,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i16>,
   i64);
@@ -2987,6 +3118,7 @@ define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i16(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     <vscale x 16 x i16> %1,
     i64 %2)
@@ -3020,6 +3152,7 @@ entry:
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   <vscale x 32 x i16>,
   i64);
@@ -3033,6 +3166,7 @@ define <vscale x 32 x i8> @intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i16(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     <vscale x 32 x i16> %1,
     i64 %2)
@@ -3066,6 +3200,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i16>,
   i64);
@@ -3078,6 +3213,7 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i16(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -3111,6 +3247,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i16>,
   i64);
@@ -3123,6 +3260,7 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i16(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -3156,6 +3294,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i16>,
   i64);
@@ -3168,6 +3307,7 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i16(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -3201,6 +3341,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i16>,
   i64);
@@ -3213,6 +3354,7 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i16(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -3246,6 +3388,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i16>,
   i64);
@@ -3258,6 +3401,7 @@ define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i16(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i16(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     <vscale x 16 x i16> %1,
     i64 %2)
@@ -3291,6 +3435,7 @@ entry:
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   <vscale x 32 x i16>,
   i64);
@@ -3303,6 +3448,7 @@ define <vscale x 32 x i16> @intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i16(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i16(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     <vscale x 32 x i16> %1,
     i64 %2)
@@ -3336,6 +3482,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i16>,
   i64);
@@ -3349,6 +3496,7 @@ define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i16(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -3382,6 +3530,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i16>,
   i64);
@@ -3395,6 +3544,7 @@ define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i16(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -3428,6 +3578,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i16>,
   i64);
@@ -3441,6 +3592,7 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i16(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -3474,6 +3626,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i16>,
   i64);
@@ -3487,6 +3640,7 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i16(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -3520,6 +3674,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i16>,
   i64);
@@ -3533,6 +3688,7 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i16(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i16(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     <vscale x 16 x i16> %1,
     i64 %2)
@@ -3566,6 +3722,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i16>,
   i64);
@@ -3579,6 +3736,7 @@ define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i16(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -3612,6 +3770,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i16>,
   i64);
@@ -3625,6 +3784,7 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i16(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -3658,6 +3818,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i16>,
   i64);
@@ -3671,6 +3832,7 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i16(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -3704,6 +3866,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i16>,
   i64);
@@ -3717,6 +3880,7 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i16(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -3750,6 +3914,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i16(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i16>,
   i64);
@@ -3762,6 +3927,7 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i16(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -3795,6 +3961,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i16(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i16>,
   i64);
@@ -3807,6 +3974,7 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i16(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -3840,6 +4008,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i16(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i16>,
   i64);
@@ -3852,6 +4021,7 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i16(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -3885,6 +4055,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i16(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i16>,
   i64);
@@ -3897,6 +4068,7 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i16(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i16(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -3930,6 +4102,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i16(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i16>,
   i64);
@@ -3942,6 +4115,7 @@ define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i16(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i16(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     <vscale x 16 x i16> %1,
     i64 %2)
@@ -3975,6 +4149,7 @@ entry:
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i16(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   <vscale x 32 x i16>,
   i64);
@@ -3987,6 +4162,7 @@ define <vscale x 32 x half> @intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i16(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i16(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     <vscale x 32 x i16> %1,
     i64 %2)
@@ -4020,6 +4196,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i16(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i16>,
   i64);
@@ -4033,6 +4210,7 @@ define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i16(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -4066,6 +4244,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i16(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i16>,
   i64);
@@ -4079,6 +4258,7 @@ define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i16(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -4112,6 +4292,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i16>,
   i64);
@@ -4125,6 +4306,7 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i16(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -4158,6 +4340,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i16>,
   i64);
@@ -4171,6 +4354,7 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i16(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -4204,6 +4388,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i16>,
   i64);
@@ -4217,6 +4402,7 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i16(<vsc
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i16(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     <vscale x 16 x i16> %1,
     i64 %2)
@@ -4250,6 +4436,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i16(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i16>,
   i64);
@@ -4263,6 +4450,7 @@ define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i16(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i16> %1,
     i64 %2)
@@ -4296,6 +4484,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i16(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i16>,
   i64);
@@ -4309,6 +4498,7 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i16(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i16> %1,
     i64 %2)
@@ -4342,6 +4532,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i16>,
   i64);
@@ -4355,6 +4546,7 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i16(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i16> %1,
     i64 %2)
@@ -4388,6 +4580,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i16>,
   i64);
@@ -4401,6 +4594,7 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i16(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i16(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i16> %1,
     i64 %2)
@@ -4434,6 +4628,7 @@ entry:
 }
 
 declare <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8(
+  <vscale x 1 x i8>,
   <vscale x 1 x i8>*,
   <vscale x 1 x i8>,
   i64);
@@ -4446,6 +4641,7 @@ define <vscale x 1 x i8> @intrinsic_vluxei_v_nxv1i8_nxv1i8_nxv1i8(<vscale x 1 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i8> @llvm.riscv.vluxei.nxv1i8.nxv1i8(
+    <vscale x 1 x i8> undef,
     <vscale x 1 x i8>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -4479,6 +4675,7 @@ entry:
 }
 
 declare <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8(
+  <vscale x 2 x i8>,
   <vscale x 2 x i8>*,
   <vscale x 2 x i8>,
   i64);
@@ -4491,6 +4688,7 @@ define <vscale x 2 x i8> @intrinsic_vluxei_v_nxv2i8_nxv2i8_nxv2i8(<vscale x 2 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i8> @llvm.riscv.vluxei.nxv2i8.nxv2i8(
+    <vscale x 2 x i8> undef,
     <vscale x 2 x i8>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -4524,6 +4722,7 @@ entry:
 }
 
 declare <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8(
+  <vscale x 4 x i8>,
   <vscale x 4 x i8>*,
   <vscale x 4 x i8>,
   i64);
@@ -4536,6 +4735,7 @@ define <vscale x 4 x i8> @intrinsic_vluxei_v_nxv4i8_nxv4i8_nxv4i8(<vscale x 4 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i8> @llvm.riscv.vluxei.nxv4i8.nxv4i8(
+    <vscale x 4 x i8> undef,
     <vscale x 4 x i8>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -4569,6 +4769,7 @@ entry:
 }
 
 declare <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8(
+  <vscale x 8 x i8>,
   <vscale x 8 x i8>*,
   <vscale x 8 x i8>,
   i64);
@@ -4581,6 +4782,7 @@ define <vscale x 8 x i8> @intrinsic_vluxei_v_nxv8i8_nxv8i8_nxv8i8(<vscale x 8 x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i8> @llvm.riscv.vluxei.nxv8i8.nxv8i8(
+    <vscale x 8 x i8> undef,
     <vscale x 8 x i8>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
@@ -4614,6 +4816,7 @@ entry:
 }
 
 declare <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8(
+  <vscale x 16 x i8>,
   <vscale x 16 x i8>*,
   <vscale x 16 x i8>,
   i64);
@@ -4626,6 +4829,7 @@ define <vscale x 16 x i8> @intrinsic_vluxei_v_nxv16i8_nxv16i8_nxv16i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i8> @llvm.riscv.vluxei.nxv16i8.nxv16i8(
+    <vscale x 16 x i8> undef,
     <vscale x 16 x i8>* %0,
     <vscale x 16 x i8> %1,
     i64 %2)
@@ -4659,6 +4863,7 @@ entry:
 }
 
 declare <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8(
+  <vscale x 32 x i8>,
   <vscale x 32 x i8>*,
   <vscale x 32 x i8>,
   i64);
@@ -4671,6 +4876,7 @@ define <vscale x 32 x i8> @intrinsic_vluxei_v_nxv32i8_nxv32i8_nxv32i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i8> @llvm.riscv.vluxei.nxv32i8.nxv32i8(
+    <vscale x 32 x i8> undef,
     <vscale x 32 x i8>* %0,
     <vscale x 32 x i8> %1,
     i64 %2)
@@ -4704,6 +4910,7 @@ entry:
 }
 
 declare <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8(
+  <vscale x 64 x i8>,
   <vscale x 64 x i8>*,
   <vscale x 64 x i8>,
   i64);
@@ -4716,6 +4923,7 @@ define <vscale x 64 x i8> @intrinsic_vluxei_v_nxv64i8_nxv64i8_nxv64i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 64 x i8> @llvm.riscv.vluxei.nxv64i8.nxv64i8(
+    <vscale x 64 x i8> undef,
     <vscale x 64 x i8>* %0,
     <vscale x 64 x i8> %1,
     i64 %2)
@@ -4749,6 +4957,7 @@ entry:
 }
 
 declare <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8(
+  <vscale x 1 x i16>,
   <vscale x 1 x i16>*,
   <vscale x 1 x i8>,
   i64);
@@ -4762,6 +4971,7 @@ define <vscale x 1 x i16> @intrinsic_vluxei_v_nxv1i16_nxv1i16_nxv1i8(<vscale x 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i16> @llvm.riscv.vluxei.nxv1i16.nxv1i8(
+    <vscale x 1 x i16> undef,
     <vscale x 1 x i16>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -4795,6 +5005,7 @@ entry:
 }
 
 declare <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8(
+  <vscale x 2 x i16>,
   <vscale x 2 x i16>*,
   <vscale x 2 x i8>,
   i64);
@@ -4808,6 +5019,7 @@ define <vscale x 2 x i16> @intrinsic_vluxei_v_nxv2i16_nxv2i16_nxv2i8(<vscale x 2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i16> @llvm.riscv.vluxei.nxv2i16.nxv2i8(
+    <vscale x 2 x i16> undef,
     <vscale x 2 x i16>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -4841,6 +5053,7 @@ entry:
 }
 
 declare <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8(
+  <vscale x 4 x i16>,
   <vscale x 4 x i16>*,
   <vscale x 4 x i8>,
   i64);
@@ -4854,6 +5067,7 @@ define <vscale x 4 x i16> @intrinsic_vluxei_v_nxv4i16_nxv4i16_nxv4i8(<vscale x 4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i16> @llvm.riscv.vluxei.nxv4i16.nxv4i8(
+    <vscale x 4 x i16> undef,
     <vscale x 4 x i16>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -4887,6 +5101,7 @@ entry:
 }
 
 declare <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8(
+  <vscale x 8 x i16>,
   <vscale x 8 x i16>*,
   <vscale x 8 x i8>,
   i64);
@@ -4900,6 +5115,7 @@ define <vscale x 8 x i16> @intrinsic_vluxei_v_nxv8i16_nxv8i16_nxv8i8(<vscale x 8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i16> @llvm.riscv.vluxei.nxv8i16.nxv8i8(
+    <vscale x 8 x i16> undef,
     <vscale x 8 x i16>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
@@ -4933,6 +5149,7 @@ entry:
 }
 
 declare <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8(
+  <vscale x 16 x i16>,
   <vscale x 16 x i16>*,
   <vscale x 16 x i8>,
   i64);
@@ -4946,6 +5163,7 @@ define <vscale x 16 x i16> @intrinsic_vluxei_v_nxv16i16_nxv16i16_nxv16i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i16> @llvm.riscv.vluxei.nxv16i16.nxv16i8(
+    <vscale x 16 x i16> undef,
     <vscale x 16 x i16>* %0,
     <vscale x 16 x i8> %1,
     i64 %2)
@@ -4979,6 +5197,7 @@ entry:
 }
 
 declare <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8(
+  <vscale x 32 x i16>,
   <vscale x 32 x i16>*,
   <vscale x 32 x i8>,
   i64);
@@ -4992,6 +5211,7 @@ define <vscale x 32 x i16> @intrinsic_vluxei_v_nxv32i16_nxv32i16_nxv32i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x i16> @llvm.riscv.vluxei.nxv32i16.nxv32i8(
+    <vscale x 32 x i16> undef,
     <vscale x 32 x i16>* %0,
     <vscale x 32 x i8> %1,
     i64 %2)
@@ -5025,6 +5245,7 @@ entry:
 }
 
 declare <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8(
+  <vscale x 1 x i32>,
   <vscale x 1 x i32>*,
   <vscale x 1 x i8>,
   i64);
@@ -5038,6 +5259,7 @@ define <vscale x 1 x i32> @intrinsic_vluxei_v_nxv1i32_nxv1i32_nxv1i8(<vscale x 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i32> @llvm.riscv.vluxei.nxv1i32.nxv1i8(
+    <vscale x 1 x i32> undef,
     <vscale x 1 x i32>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -5071,6 +5293,7 @@ entry:
 }
 
 declare <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8(
+  <vscale x 2 x i32>,
   <vscale x 2 x i32>*,
   <vscale x 2 x i8>,
   i64);
@@ -5084,6 +5307,7 @@ define <vscale x 2 x i32> @intrinsic_vluxei_v_nxv2i32_nxv2i32_nxv2i8(<vscale x 2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i32> @llvm.riscv.vluxei.nxv2i32.nxv2i8(
+    <vscale x 2 x i32> undef,
     <vscale x 2 x i32>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -5117,6 +5341,7 @@ entry:
 }
 
 declare <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8(
+  <vscale x 4 x i32>,
   <vscale x 4 x i32>*,
   <vscale x 4 x i8>,
   i64);
@@ -5130,6 +5355,7 @@ define <vscale x 4 x i32> @intrinsic_vluxei_v_nxv4i32_nxv4i32_nxv4i8(<vscale x 4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i32> @llvm.riscv.vluxei.nxv4i32.nxv4i8(
+    <vscale x 4 x i32> undef,
     <vscale x 4 x i32>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -5163,6 +5389,7 @@ entry:
 }
 
 declare <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8(
+  <vscale x 8 x i32>,
   <vscale x 8 x i32>*,
   <vscale x 8 x i8>,
   i64);
@@ -5176,6 +5403,7 @@ define <vscale x 8 x i32> @intrinsic_vluxei_v_nxv8i32_nxv8i32_nxv8i8(<vscale x 8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i32> @llvm.riscv.vluxei.nxv8i32.nxv8i8(
+    <vscale x 8 x i32> undef,
     <vscale x 8 x i32>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
@@ -5209,6 +5437,7 @@ entry:
 }
 
 declare <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8(
+  <vscale x 16 x i32>,
   <vscale x 16 x i32>*,
   <vscale x 16 x i8>,
   i64);
@@ -5222,6 +5451,7 @@ define <vscale x 16 x i32> @intrinsic_vluxei_v_nxv16i32_nxv16i32_nxv16i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x i32> @llvm.riscv.vluxei.nxv16i32.nxv16i8(
+    <vscale x 16 x i32> undef,
     <vscale x 16 x i32>* %0,
     <vscale x 16 x i8> %1,
     i64 %2)
@@ -5255,6 +5485,7 @@ entry:
 }
 
 declare <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8(
+  <vscale x 1 x i64>,
   <vscale x 1 x i64>*,
   <vscale x 1 x i8>,
   i64);
@@ -5268,6 +5499,7 @@ define <vscale x 1 x i64> @intrinsic_vluxei_v_nxv1i64_nxv1i64_nxv1i8(<vscale x 1
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x i64> @llvm.riscv.vluxei.nxv1i64.nxv1i8(
+    <vscale x 1 x i64> undef,
     <vscale x 1 x i64>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -5301,6 +5533,7 @@ entry:
 }
 
 declare <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8(
+  <vscale x 2 x i64>,
   <vscale x 2 x i64>*,
   <vscale x 2 x i8>,
   i64);
@@ -5314,6 +5547,7 @@ define <vscale x 2 x i64> @intrinsic_vluxei_v_nxv2i64_nxv2i64_nxv2i8(<vscale x 2
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x i64> @llvm.riscv.vluxei.nxv2i64.nxv2i8(
+    <vscale x 2 x i64> undef,
     <vscale x 2 x i64>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -5347,6 +5581,7 @@ entry:
 }
 
 declare <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8(
+  <vscale x 4 x i64>,
   <vscale x 4 x i64>*,
   <vscale x 4 x i8>,
   i64);
@@ -5360,6 +5595,7 @@ define <vscale x 4 x i64> @intrinsic_vluxei_v_nxv4i64_nxv4i64_nxv4i8(<vscale x 4
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x i64> @llvm.riscv.vluxei.nxv4i64.nxv4i8(
+    <vscale x 4 x i64> undef,
     <vscale x 4 x i64>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -5393,6 +5629,7 @@ entry:
 }
 
 declare <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8(
+  <vscale x 8 x i64>,
   <vscale x 8 x i64>*,
   <vscale x 8 x i8>,
   i64);
@@ -5406,6 +5643,7 @@ define <vscale x 8 x i64> @intrinsic_vluxei_v_nxv8i64_nxv8i64_nxv8i8(<vscale x 8
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x i64> @llvm.riscv.vluxei.nxv8i64.nxv8i8(
+    <vscale x 8 x i64> undef,
     <vscale x 8 x i64>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
@@ -5439,6 +5677,7 @@ entry:
 }
 
 declare <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i8(
+  <vscale x 1 x half>,
   <vscale x 1 x half>*,
   <vscale x 1 x i8>,
   i64);
@@ -5452,6 +5691,7 @@ define <vscale x 1 x half> @intrinsic_vluxei_v_nxv1f16_nxv1f16_nxv1i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x half> @llvm.riscv.vluxei.nxv1f16.nxv1i8(
+    <vscale x 1 x half> undef,
     <vscale x 1 x half>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -5485,6 +5725,7 @@ entry:
 }
 
 declare <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i8(
+  <vscale x 2 x half>,
   <vscale x 2 x half>*,
   <vscale x 2 x i8>,
   i64);
@@ -5498,6 +5739,7 @@ define <vscale x 2 x half> @intrinsic_vluxei_v_nxv2f16_nxv2f16_nxv2i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x half> @llvm.riscv.vluxei.nxv2f16.nxv2i8(
+    <vscale x 2 x half> undef,
     <vscale x 2 x half>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -5531,6 +5773,7 @@ entry:
 }
 
 declare <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i8(
+  <vscale x 4 x half>,
   <vscale x 4 x half>*,
   <vscale x 4 x i8>,
   i64);
@@ -5544,6 +5787,7 @@ define <vscale x 4 x half> @intrinsic_vluxei_v_nxv4f16_nxv4f16_nxv4i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x half> @llvm.riscv.vluxei.nxv4f16.nxv4i8(
+    <vscale x 4 x half> undef,
     <vscale x 4 x half>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -5577,6 +5821,7 @@ entry:
 }
 
 declare <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i8(
+  <vscale x 8 x half>,
   <vscale x 8 x half>*,
   <vscale x 8 x i8>,
   i64);
@@ -5590,6 +5835,7 @@ define <vscale x 8 x half> @intrinsic_vluxei_v_nxv8f16_nxv8f16_nxv8i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x half> @llvm.riscv.vluxei.nxv8f16.nxv8i8(
+    <vscale x 8 x half> undef,
     <vscale x 8 x half>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
@@ -5623,6 +5869,7 @@ entry:
 }
 
 declare <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i8(
+  <vscale x 16 x half>,
   <vscale x 16 x half>*,
   <vscale x 16 x i8>,
   i64);
@@ -5636,6 +5883,7 @@ define <vscale x 16 x half> @intrinsic_vluxei_v_nxv16f16_nxv16f16_nxv16i8(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x half> @llvm.riscv.vluxei.nxv16f16.nxv16i8(
+    <vscale x 16 x half> undef,
     <vscale x 16 x half>* %0,
     <vscale x 16 x i8> %1,
     i64 %2)
@@ -5669,6 +5917,7 @@ entry:
 }
 
 declare <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i8(
+  <vscale x 32 x half>,
   <vscale x 32 x half>*,
   <vscale x 32 x i8>,
   i64);
@@ -5682,6 +5931,7 @@ define <vscale x 32 x half> @intrinsic_vluxei_v_nxv32f16_nxv32f16_nxv32i8(<vscal
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 32 x half> @llvm.riscv.vluxei.nxv32f16.nxv32i8(
+    <vscale x 32 x half> undef,
     <vscale x 32 x half>* %0,
     <vscale x 32 x i8> %1,
     i64 %2)
@@ -5715,6 +5965,7 @@ entry:
 }
 
 declare <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i8(
+  <vscale x 1 x float>,
   <vscale x 1 x float>*,
   <vscale x 1 x i8>,
   i64);
@@ -5728,6 +5979,7 @@ define <vscale x 1 x float> @intrinsic_vluxei_v_nxv1f32_nxv1f32_nxv1i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x float> @llvm.riscv.vluxei.nxv1f32.nxv1i8(
+    <vscale x 1 x float> undef,
     <vscale x 1 x float>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -5761,6 +6013,7 @@ entry:
 }
 
 declare <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i8(
+  <vscale x 2 x float>,
   <vscale x 2 x float>*,
   <vscale x 2 x i8>,
   i64);
@@ -5774,6 +6027,7 @@ define <vscale x 2 x float> @intrinsic_vluxei_v_nxv2f32_nxv2f32_nxv2i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x float> @llvm.riscv.vluxei.nxv2f32.nxv2i8(
+    <vscale x 2 x float> undef,
     <vscale x 2 x float>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -5807,6 +6061,7 @@ entry:
 }
 
 declare <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i8(
+  <vscale x 4 x float>,
   <vscale x 4 x float>*,
   <vscale x 4 x i8>,
   i64);
@@ -5820,6 +6075,7 @@ define <vscale x 4 x float> @intrinsic_vluxei_v_nxv4f32_nxv4f32_nxv4i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vluxei.nxv4f32.nxv4i8(
+    <vscale x 4 x float> undef,
     <vscale x 4 x float>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -5853,6 +6109,7 @@ entry:
 }
 
 declare <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8(
+  <vscale x 8 x float>,
   <vscale x 8 x float>*,
   <vscale x 8 x i8>,
   i64);
@@ -5866,6 +6123,7 @@ define <vscale x 8 x float> @intrinsic_vluxei_v_nxv8f32_nxv8f32_nxv8i8(<vscale x
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x float> @llvm.riscv.vluxei.nxv8f32.nxv8i8(
+    <vscale x 8 x float> undef,
     <vscale x 8 x float>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
@@ -5899,6 +6157,7 @@ entry:
 }
 
 declare <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8(
+  <vscale x 16 x float>,
   <vscale x 16 x float>*,
   <vscale x 16 x i8>,
   i64);
@@ -5912,6 +6171,7 @@ define <vscale x 16 x float> @intrinsic_vluxei_v_nxv16f32_nxv16f32_nxv16i8(<vsca
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 16 x float> @llvm.riscv.vluxei.nxv16f32.nxv16i8(
+    <vscale x 16 x float> undef,
     <vscale x 16 x float>* %0,
     <vscale x 16 x i8> %1,
     i64 %2)
@@ -5945,6 +6205,7 @@ entry:
 }
 
 declare <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i8(
+  <vscale x 1 x double>,
   <vscale x 1 x double>*,
   <vscale x 1 x i8>,
   i64);
@@ -5958,6 +6219,7 @@ define <vscale x 1 x double> @intrinsic_vluxei_v_nxv1f64_nxv1f64_nxv1i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 1 x double> @llvm.riscv.vluxei.nxv1f64.nxv1i8(
+    <vscale x 1 x double> undef,
     <vscale x 1 x double>* %0,
     <vscale x 1 x i8> %1,
     i64 %2)
@@ -5991,6 +6253,7 @@ entry:
 }
 
 declare <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i8(
+  <vscale x 2 x double>,
   <vscale x 2 x double>*,
   <vscale x 2 x i8>,
   i64);
@@ -6004,6 +6267,7 @@ define <vscale x 2 x double> @intrinsic_vluxei_v_nxv2f64_nxv2f64_nxv2i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 2 x double> @llvm.riscv.vluxei.nxv2f64.nxv2i8(
+    <vscale x 2 x double> undef,
     <vscale x 2 x double>* %0,
     <vscale x 2 x i8> %1,
     i64 %2)
@@ -6037,6 +6301,7 @@ entry:
 }
 
 declare <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i8(
+  <vscale x 4 x double>,
   <vscale x 4 x double>*,
   <vscale x 4 x i8>,
   i64);
@@ -6050,6 +6315,7 @@ define <vscale x 4 x double> @intrinsic_vluxei_v_nxv4f64_nxv4f64_nxv4i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x double> @llvm.riscv.vluxei.nxv4f64.nxv4i8(
+    <vscale x 4 x double> undef,
     <vscale x 4 x double>* %0,
     <vscale x 4 x i8> %1,
     i64 %2)
@@ -6083,6 +6349,7 @@ entry:
 }
 
 declare <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8(
+  <vscale x 8 x double>,
   <vscale x 8 x double>*,
   <vscale x 8 x i8>,
   i64);
@@ -6096,6 +6363,7 @@ define <vscale x 8 x double> @intrinsic_vluxei_v_nxv8f64_nxv8f64_nxv8i8(<vscale
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 8 x double> @llvm.riscv.vluxei.nxv8f64.nxv8i8(
+    <vscale x 8 x double> undef,
     <vscale x 8 x double>* %0,
     <vscale x 8 x i8> %1,
     i64 %2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index d6ea430c36ca6..5c0eb6be8ddaf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -475,10 +475,10 @@ for.body:                                         ; preds = %for.body, %entry
   %x.addr.015 = phi float* [ %add.ptr, %for.body ], [ %x, %entry ]
   %y.addr.014 = phi float* [ %add.ptr1, %for.body ], [ %y, %entry ]
   %2 = bitcast float* %x.addr.015 to <vscale x 16 x float>*
-  %3 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>* %2, i64 %1)
+  %3 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* %2, i64 %1)
   %add.ptr = getelementptr inbounds float, float* %x.addr.015, i64 %1
   %4 = bitcast float* %y.addr.014 to <vscale x 16 x float>*
-  %5 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>* %4, i64 %1)
+  %5 = tail call <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float> undef, <vscale x 16 x float>* %4, i64 %1)
   %6 = tail call <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float> %5, float %a, <vscale x 16 x float> %3, i64 %1)
   tail call void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float> %6, <vscale x 16 x float>* %4, i64 %1)
   %add.ptr1 = getelementptr inbounds float, float* %y.addr.014, i64 %1
@@ -492,7 +492,7 @@ for.end:                                          ; preds = %for.body, %entry
 }
 
 declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
-declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>* nocapture, i64)
+declare <vscale x 16 x float> @llvm.riscv.vle.nxv16f32.i64(<vscale x 16 x float>, <vscale x 16 x float>* nocapture, i64)
 declare <vscale x 16 x float> @llvm.riscv.vfmacc.nxv16f32.f32.i64(<vscale x 16 x float>, float, <vscale x 16 x float>, i64)
 declare void @llvm.riscv.vse.nxv16f32.i64(<vscale x 16 x float>, <vscale x 16 x float>* nocapture, i64)
 
@@ -515,11 +515,11 @@ define <vscale x 2 x i32> @test_vsetvli_x0_x0(<vscale x 2 x i32>* %x, <vscale x
 ; CHECK-NEXT:    vadd.vv v8, v9, v8
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>* %x, i64 %vl)
+  %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32>* %x, i64 %vl)
   br i1 %cond, label %if, label %if.end
 
 if:
-  %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %y, i64 %vl)
+  %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16>* %y, i64 %vl)
   %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i16> %b, i16 0, i64 %vl)
   br label %if.end
 
@@ -528,8 +528,8 @@ if.end:
   %e = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %d, i64 %vl)
   ret <vscale x 2 x i32> %e
 }
-declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>*, i64)
-declare <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>*, i64)
+declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>*, i64)
+declare <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i16>*, i64)
 declare <vscale x 2 x i32> @llvm.riscv.vwadd.nxv2i32(<vscale x 2 x i16>, i16, i64)
 declare <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i64)
 
@@ -561,11 +561,11 @@ define <vscale x 2 x i32> @test_vsetvli_x0_x0_2(<vscale x 2 x i32>* %x, <vscale
 ; CHECK-NEXT:    vadd.vv v8, v9, v8
 ; CHECK-NEXT:    ret
 entry:
-  %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32>* %x, i64 %vl)
+  %a = call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32(<vscale x 2 x i32> undef, <vscale x 2 x i32>* %x, i64 %vl)
   br i1 %cond, label %if, label %if.end
 
 if:
-  %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %y, i64 %vl)
+  %b = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16>* %y, i64 %vl)
   %c = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> %a, <vscale x 2 x i16> %b, i64 %vl)
   br label %if.end
 
@@ -574,7 +574,7 @@ if.end:
   br i1 %cond2, label %if2, label %if2.end
 
 if2:
-  %e = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16>* %z, i64 %vl)
+  %e = call <vscale x 2 x i16> @llvm.riscv.vle.nxv2i16(<vscale x 2 x i16> undef, <vscale x 2 x i16>* %z, i64 %vl)
   %f = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(<vscale x 2 x i32> %d, <vscale x 2 x i16> %e, i64 %vl)
   br label %if2.end
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
index 4429e0db4ecd9..a8e965d52b276 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir
@@ -10,7 +10,7 @@
 
   define <vscale x 1 x i64> @load_add_or_sub(i8 zeroext %cond, <vscale x 1 x i64>* %0, <vscale x 1 x i64> %1, i64 %2) #0 {
   entry:
-    %a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* %0, i64 %2)
+    %a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* %0, i64 %2)
     %tobool = icmp eq i8 %cond, 0
     br i1 %tobool, label %if.else, label %if.then
 
@@ -29,7 +29,7 @@
 
   define void @load_zext_or_sext(i8 zeroext %cond, <vscale x 1 x i32>* %0, <vscale x 1 x i64>* %1, i64 %2) #0 {
   entry:
-    %a = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* %0, i64 %2)
+    %a = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* %0, i64 %2)
     %tobool = icmp eq i8 %cond, 0
     br i1 %tobool, label %if.else, label %if.then
 
@@ -102,10 +102,10 @@
   declare <vscale x 1 x i64> @llvm.riscv.vsub.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
 
   ; Function Attrs: nounwind readonly
-  declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* nocapture, i64) #3
+  declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>* nocapture, i64) #3
 
   ; Function Attrs: nounwind readonly
-  declare <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* nocapture, i64) #3
+  declare <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>, <vscale x 1 x i32>* nocapture, i64) #3
 
   ; Function Attrs: nounwind writeonly
   declare void @llvm.riscv.vse.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>* nocapture, i64) #4
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index 66878967ccb95..e708f5b75639e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
@@ -131,7 +131,7 @@ for.body:                                         ; preds = %entry, %for.body
   %i.012 = phi i64 [ %add, %for.body ], [ 0, %entry ]
   %add.ptr = getelementptr inbounds i32, i32* %A, i64 %i.012
   %2 = bitcast i32* %add.ptr to <vscale x 2 x i32>*
-  %3 = tail call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.i64(<vscale x 2 x i32>* %2, i64 %1)
+  %3 = tail call <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.i64(<vscale x 2 x i32> undef, <vscale x 2 x i32>* %2, i64 %1)
   %4 = tail call <vscale x 2 x i1> @llvm.riscv.vmslt.nxv2i32.i32.i64(<vscale x 2 x i32> %3, i32 -2, i64 %1)
   %5 = tail call <vscale x 2 x i1> @llvm.riscv.vmsgt.nxv2i32.i32.i64(<vscale x 2 x i32> %3, i32 2, i64 %1)
   %6 = tail call <vscale x 2 x i1> @llvm.riscv.vmor.nxv2i1.i64(<vscale x 2 x i1> %4, <vscale x 2 x i1> %5, i64 %1)
@@ -280,7 +280,7 @@ declare <vscale x 1 x double> @llvm.riscv.vfmv.s.f.nxv1f64
   i64)
 
 declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg)
-declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.i64(<vscale x 2 x i32>* nocapture, i64)
+declare <vscale x 2 x i32> @llvm.riscv.vle.nxv2i32.i64(<vscale x 2 x i32>, <vscale x 2 x i32>* nocapture, i64)
 declare <vscale x 2 x i1> @llvm.riscv.vmslt.nxv2i32.i32.i64(<vscale x 2 x i32>, i32, i64)
 declare <vscale x 2 x i1> @llvm.riscv.vmsgt.nxv2i32.i32.i64(<vscale x 2 x i32>, i32, i64)
 declare <vscale x 2 x i1> @llvm.riscv.vmor.nxv2i1.i64(<vscale x 2 x i1>, <vscale x 2 x i1>, i64)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
index e5c85d1908c1d..4aa61e0fd5fce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
@@ -16,14 +16,14 @@
 
   define <vscale x 1 x i64> @load_add(<vscale x 1 x i64>* %0, <vscale x 1 x i64> %1, i64 %2) #0 {
   entry:
-    %a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* %0, i64 %2)
+    %a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* %0, i64 %2)
     %b = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %1, i64 %2)
     ret <vscale x 1 x i64> %b
   }
 
   define <vscale x 1 x i64> @load_zext(<vscale x 1 x i32>* %0, i64 %1) #0 {
   entry:
-    %a = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* %0, i64 %1)
+    %a = call <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32> undef, <vscale x 1 x i32>* %0, i64 %1)
     %b = call <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32> %a, i64 %1)
     ret <vscale x 1 x i64> %b
   }
@@ -66,7 +66,7 @@
 
   define <vscale x 1 x i64> @load_add_inlineasm(<vscale x 1 x i64>* %0, <vscale x 1 x i64> %1, i64 %2) #0 {
   entry:
-    %a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* %0, i64 %2)
+    %a = call <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64> undef, <vscale x 1 x i64>* %0, i64 %2)
     call void asm sideeffect "", ""()
     %b = call <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64> %a, <vscale x 1 x i64> %1, i64 %2)
     ret <vscale x 1 x i64> %b
@@ -76,10 +76,10 @@
   declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
 
   ; Function Attrs: nounwind readonly
-  declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>* nocapture, i64) #4
+  declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>* nocapture, i64) #4
 
   ; Function Attrs: nounwind readonly
-  declare <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>* nocapture, i64) #4
+  declare <vscale x 1 x i32> @llvm.riscv.vle.nxv1i32.i64(<vscale x 1 x i32>, <vscale x 1 x i32>* nocapture, i64) #4
 
   ; Function Attrs: nounwind readnone
   declare <vscale x 1 x i64> @llvm.riscv.vzext.nxv1i64.nxv1i32.i64(<vscale x 1 x i32>, i64) #1

From 510710d0374908e3e2bd37f9a32065da423a7e4c Mon Sep 17 00:00:00 2001
From: Zakk Chen <zakk.chen@sifive.com>
Date: Tue, 25 Jan 2022 07:54:39 -0800
Subject: [PATCH 637/946] [RISCV][NFC] Add getVLOperand for RVV intrinsics.

Use the VLOperand information to get the VL.

Differential Revision: https://reviews.llvm.org/D118156
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b831fdcb1d477..a01c76d587966 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1547,6 +1547,19 @@ bool RISCVTargetLowering::isLegalElementTypeForRVV(Type *ScalarTy) const {
   return false;
 }
 
+static SDValue getVLOperand(SDValue Op) {
+  assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+          Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
+         "Unexpected opcode");
+  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
+  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
+  const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
+      RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
+  if (!II)
+    return SDValue();
+  return Op.getOperand(II->VLOperand + 1 + HasChain);
+}
+
 static bool useRVVForFixedLengthVectorVT(MVT VT,
                                          const RISCVSubtarget &Subtarget) {
   assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
@@ -4372,7 +4385,7 @@ static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
   // We need to convert the scalar to a splat vector.
   // FIXME: Can we implicitly truncate the scalar if it is known to
   // be sign extended?
-  SDValue VL = Op.getOperand(II->VLOperand + 1 + HasChain);
+  SDValue VL = getVLOperand(Op);
   assert(VL.getValueType() == XLenVT);
   ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
@@ -4460,7 +4473,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     //   vmerge.vvm vDest, vSrc, vVal, mMask
     MVT VT = Op.getSimpleValueType();
     SDValue Vec = Op.getOperand(1);
-    SDValue VL = Op.getOperand(3);
+    SDValue VL = getVLOperand(Op);
 
     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
     SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
@@ -4506,7 +4519,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                    DAG.getConstant(1, DL, XLenVT));
 
     // Double the VL since we halved SEW.
-    SDValue VL = Op.getOperand(NumOps - (1 + OpOffset));
+    SDValue VL = getVLOperand(Op);
     SDValue I32VL =
         DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
 

From 95b981ca2ae3915464a63d42eb53b0dde4a88227 Mon Sep 17 00:00:00 2001
From: Giorgis Georgakoudis <georgakoudis1@llnl.gov>
Date: Tue, 25 Jan 2022 20:08:19 -0500
Subject: [PATCH 638/946] [CodeExtractor] Enable partial aggregate arguments

Summary:
Enable CodeExtractor to construct output functions that partially
aggregate inputs/outputs in their argument list. A use case is the
OMPIRBuilder to create outlined functions for parallel regions that
aggregate in a struct the payload variables for the region while passing
as scalars thread and bound identifiers.

Differential Revision: https://reviews.llvm.org/D96854
---
 .../llvm/Transforms/Utils/CodeExtractor.h     |   8 +-
 llvm/lib/Transforms/Utils/CodeExtractor.cpp   | 179 +++++++++++-------
 .../Transforms/Utils/CodeExtractorTest.cpp    |  54 +++++-
 3 files changed, 169 insertions(+), 72 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index f08173e45a5bf..8aed3d0e40d93 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -168,7 +168,7 @@ class CodeExtractorAnalysisCache {
     ///
     /// Based on the blocks used when constructing the code extractor,
     /// determine whether it is eligible for extraction.
-    /// 
+    ///
     /// Checks that varargs handling (with vastart and vaend) is only done in
     /// the outlined blocks.
     bool isEligible() const;
@@ -214,6 +214,10 @@ class CodeExtractorAnalysisCache {
     /// original block will be added to the outline region.
     BasicBlock *findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock);
 
+    /// Exclude a value from aggregate argument passing when extracting a code
+    /// region, passing it instead as a scalar.
+    void excludeArgFromAggregate(Value *Arg);
+
   private:
     struct LifetimeMarkerInfo {
       bool SinkLifeStart = false;
@@ -222,6 +226,8 @@ class CodeExtractorAnalysisCache {
       Instruction *LifeEnd = nullptr;
     };
 
+    ValueSet ExcludeArgsFromAggregate;
+
     LifetimeMarkerInfo
     getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
                        Instruction *Addr, BasicBlock *ExitBlock) const;
diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index f577643f81b0f..24cd5747c5a45 100644
--- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -829,39 +829,54 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
   default: RetTy = Type::getInt16Ty(header->getContext()); break;
   }
 
-  std::vector<Type *> paramTy;
+  std::vector<Type *> ParamTy;
+  std::vector<Type *> AggParamTy;
+  ValueSet StructValues;
 
   // Add the types of the input values to the function's argument list
   for (Value *value : inputs) {
     LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n");
-    paramTy.push_back(value->getType());
+    if (AggregateArgs && !ExcludeArgsFromAggregate.contains(value)) {
+      AggParamTy.push_back(value->getType());
+      StructValues.insert(value);
+    } else
+      ParamTy.push_back(value->getType());
   }
 
   // Add the types of the output values to the function's argument list.
   for (Value *output : outputs) {
     LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n");
-    if (AggregateArgs)
-      paramTy.push_back(output->getType());
-    else
-      paramTy.push_back(PointerType::getUnqual(output->getType()));
+    if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) {
+      AggParamTy.push_back(output->getType());
+      StructValues.insert(output);
+    } else
+      ParamTy.push_back(PointerType::getUnqual(output->getType()));
+  }
+
+  assert(
+      (ParamTy.size() + AggParamTy.size()) ==
+          (inputs.size() + outputs.size()) &&
+      "Number of scalar and aggregate params does not match inputs, outputs");
+  assert(StructValues.empty() ||
+         AggregateArgs && "Expeced StructValues only with AggregateArgs set");
+
+  // Concatenate scalar and aggregate params in ParamTy.
+  size_t NumScalarParams = ParamTy.size();
+  StructType *StructTy = nullptr;
+  if (AggregateArgs && !AggParamTy.empty()) {
+    StructTy = StructType::get(M->getContext(), AggParamTy);
+    ParamTy.push_back(PointerType::getUnqual(StructTy));
   }
 
   LLVM_DEBUG({
     dbgs() << "Function type: " << *RetTy << " f(";
-    for (Type *i : paramTy)
+    for (Type *i : ParamTy)
       dbgs() << *i << ", ";
     dbgs() << ")\n";
   });
 
-  StructType *StructTy = nullptr;
-  if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
-    StructTy = StructType::get(M->getContext(), paramTy);
-    paramTy.clear();
-    paramTy.push_back(PointerType::getUnqual(StructTy));
-  }
-  FunctionType *funcType =
-                  FunctionType::get(RetTy, paramTy,
-                                    AllowVarArgs && oldFunction->isVarArg());
+  FunctionType *funcType = FunctionType::get(
+      RetTy, ParamTy, AllowVarArgs && oldFunction->isVarArg());
 
   std::string SuffixToUse =
       Suffix.empty()
@@ -981,24 +996,27 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
   }
   newFunction->getBasicBlockList().push_back(newRootNode);
 
-  // Create an iterator to name all of the arguments we inserted.
-  Function::arg_iterator AI = newFunction->arg_begin();
+  // Create scalar and aggregate iterators to name all of the arguments we
+  // inserted.
+  Function::arg_iterator ScalarAI = newFunction->arg_begin();
+  Function::arg_iterator AggAI = std::next(ScalarAI, NumScalarParams);
 
   // Rewrite all users of the inputs in the extracted region to use the
   // arguments (or appropriate addressing into struct) instead.
-  for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
+  for (unsigned i = 0, e = inputs.size(), aggIdx = 0; i != e; ++i) {
     Value *RewriteVal;
-    if (AggregateArgs) {
+    if (AggregateArgs && StructValues.contains(inputs[i])) {
       Value *Idx[2];
       Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext()));
-      Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i);
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), aggIdx);
       Instruction *TI = newFunction->begin()->getTerminator();
       GetElementPtrInst *GEP = GetElementPtrInst::Create(
-          StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI);
-      RewriteVal = new LoadInst(StructTy->getElementType(i), GEP,
+          StructTy, &*AggAI, Idx, "gep_" + inputs[i]->getName(), TI);
+      RewriteVal = new LoadInst(StructTy->getElementType(aggIdx), GEP,
                                 "loadgep_" + inputs[i]->getName(), TI);
+      ++aggIdx;
     } else
-      RewriteVal = &*AI++;
+      RewriteVal = &*ScalarAI++;
 
     std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end());
     for (User *use : Users)
@@ -1008,12 +1026,14 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
   }
 
   // Set names for input and output arguments.
-  if (!AggregateArgs) {
-    AI = newFunction->arg_begin();
-    for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI)
-      AI->setName(inputs[i]->getName());
-    for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI)
-      AI->setName(outputs[i]->getName()+".out");
+  if (NumScalarParams) {
+    ScalarAI = newFunction->arg_begin();
+    for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++ScalarAI)
+      if (!StructValues.contains(inputs[i]))
+        ScalarAI->setName(inputs[i]->getName());
+    for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++ScalarAI)
+      if (!StructValues.contains(outputs[i]))
+        ScalarAI->setName(outputs[i]->getName() + ".out");
   }
 
   // Rewrite branches to basic blocks outside of the loop to new dummy blocks
@@ -1121,7 +1141,8 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
                                                     ValueSet &outputs) {
   // Emit a call to the new function, passing in: *pointer to struct (if
   // aggregating parameters), or plan inputs and allocated memory for outputs
-  std::vector<Value *> params, StructValues, ReloadOutputs, Reloads;
+  std::vector<Value *> params, ReloadOutputs, Reloads;
+  ValueSet StructValues;
 
   Module *M = newFunction->getParent();
   LLVMContext &Context = M->getContext();
@@ -1129,23 +1150,24 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
   CallInst *call = nullptr;
 
   // Add inputs as params, or to be filled into the struct
-  unsigned ArgNo = 0;
+  unsigned ScalarInputArgNo = 0;
   SmallVector<unsigned, 1> SwiftErrorArgs;
   for (Value *input : inputs) {
-    if (AggregateArgs)
-      StructValues.push_back(input);
+    if (AggregateArgs && !ExcludeArgsFromAggregate.contains(input))
+      StructValues.insert(input);
     else {
       params.push_back(input);
       if (input->isSwiftError())
-        SwiftErrorArgs.push_back(ArgNo);
+        SwiftErrorArgs.push_back(ScalarInputArgNo);
     }
-    ++ArgNo;
+    ++ScalarInputArgNo;
   }
 
   // Create allocas for the outputs
+  unsigned ScalarOutputArgNo = 0;
   for (Value *output : outputs) {
-    if (AggregateArgs) {
-      StructValues.push_back(output);
+    if (AggregateArgs && !ExcludeArgsFromAggregate.contains(output)) {
+      StructValues.insert(output);
     } else {
       AllocaInst *alloca =
         new AllocaInst(output->getType(), DL.getAllocaAddrSpace(),
@@ -1153,12 +1175,14 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
                        &codeReplacer->getParent()->front().front());
       ReloadOutputs.push_back(alloca);
       params.push_back(alloca);
+      ++ScalarOutputArgNo;
     }
   }
 
   StructType *StructArgTy = nullptr;
   AllocaInst *Struct = nullptr;
-  if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
+  unsigned NumAggregatedInputs = 0;
+  if (AggregateArgs && !StructValues.empty()) {
     std::vector<Type *> ArgTypes;
     for (Value *V : StructValues)
       ArgTypes.push_back(V->getType());
@@ -1170,14 +1194,18 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
                             &codeReplacer->getParent()->front().front());
     params.push_back(Struct);
 
-    for (unsigned i = 0, e = inputs.size(); i != e; ++i) {
-      Value *Idx[2];
-      Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
-      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
-      GetElementPtrInst *GEP = GetElementPtrInst::Create(
-          StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
-      codeReplacer->getInstList().push_back(GEP);
-      new StoreInst(StructValues[i], GEP, codeReplacer);
+    // Store aggregated inputs in the struct.
+    for (unsigned i = 0, e = StructValues.size(); i != e; ++i) {
+      if (inputs.contains(StructValues[i])) {
+        Value *Idx[2];
+        Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
+        Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i);
+        GetElementPtrInst *GEP = GetElementPtrInst::Create(
+            StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName());
+        codeReplacer->getInstList().push_back(GEP);
+        new StoreInst(StructValues[i], GEP, codeReplacer);
+        NumAggregatedInputs++;
+      }
     }
   }
 
@@ -1200,24 +1228,24 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
     newFunction->addParamAttr(SwiftErrArgNo, Attribute::SwiftError);
   }
 
-  Function::arg_iterator OutputArgBegin = newFunction->arg_begin();
-  unsigned FirstOut = inputs.size();
-  if (!AggregateArgs)
-    std::advance(OutputArgBegin, inputs.size());
-
-  // Reload the outputs passed in by reference.
-  for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+  // Reload the outputs passed in by reference, use the struct if output is in
+  // the aggregate or reload from the scalar argument.
+  for (unsigned i = 0, e = outputs.size(), scalarIdx = 0,
+                aggIdx = NumAggregatedInputs;
+       i != e; ++i) {
     Value *Output = nullptr;
-    if (AggregateArgs) {
+    if (AggregateArgs && StructValues.contains(outputs[i])) {
       Value *Idx[2];
       Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
-      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx);
       GetElementPtrInst *GEP = GetElementPtrInst::Create(
           StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName());
       codeReplacer->getInstList().push_back(GEP);
       Output = GEP;
+      ++aggIdx;
     } else {
-      Output = ReloadOutputs[i];
+      Output = ReloadOutputs[scalarIdx];
+      ++scalarIdx;
     }
     LoadInst *load = new LoadInst(outputs[i]->getType(), Output,
                                   outputs[i]->getName() + ".reload",
@@ -1299,8 +1327,13 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
   // Store the arguments right after the definition of output value.
   // This should be proceeded after creating exit stubs to be ensure that invoke
   // result restore will be placed in the outlined function.
-  Function::arg_iterator OAI = OutputArgBegin;
-  for (unsigned i = 0, e = outputs.size(); i != e; ++i) {
+  Function::arg_iterator ScalarOutputArgBegin = newFunction->arg_begin();
+  std::advance(ScalarOutputArgBegin, ScalarInputArgNo);
+  Function::arg_iterator AggOutputArgBegin = newFunction->arg_begin();
+  std::advance(AggOutputArgBegin, ScalarInputArgNo + ScalarOutputArgNo);
+
+  for (unsigned i = 0, e = outputs.size(), aggIdx = NumAggregatedInputs; i != e;
+       ++i) {
     auto *OutI = dyn_cast<Instruction>(outputs[i]);
     if (!OutI)
       continue;
@@ -1320,23 +1353,27 @@ CallInst *CodeExtractor::emitCallAndSwitchStatement(Function *newFunction,
     assert((InsertBefore->getFunction() == newFunction ||
             Blocks.count(InsertBefore->getParent())) &&
            "InsertPt should be in new function");
-    assert(OAI != newFunction->arg_end() &&
-           "Number of output arguments should match "
-           "the amount of defined values");
-    if (AggregateArgs) {
+    if (AggregateArgs && StructValues.contains(outputs[i])) {
+      assert(AggOutputArgBegin != newFunction->arg_end() &&
+             "Number of aggregate output arguments should match "
+             "the number of defined values");
       Value *Idx[2];
       Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context));
-      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i);
+      Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), aggIdx);
       GetElementPtrInst *GEP = GetElementPtrInst::Create(
-          StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(),
+          StructArgTy, &*AggOutputArgBegin, Idx, "gep_" + outputs[i]->getName(),
           InsertBefore);
       new StoreInst(outputs[i], GEP, InsertBefore);
+      ++aggIdx;
       // Since there should be only one struct argument aggregating
-      // all the output values, we shouldn't increment OAI, which always
-      // points to the struct argument, in this case.
+      // all the output values, we shouldn't increment AggOutputArgBegin, which
+      // always points to the struct argument, in this case.
     } else {
-      new StoreInst(outputs[i], &*OAI, InsertBefore);
-      ++OAI;
+      assert(ScalarOutputArgBegin != newFunction->arg_end() &&
+             "Number of scalar output arguments should match "
+             "the number of defined values");
+      new StoreInst(outputs[i], &*ScalarOutputArgBegin, InsertBefore);
+      ++ScalarOutputArgBegin;
     }
   }
 
@@ -1835,3 +1872,7 @@ bool CodeExtractor::verifyAssumptionCache(const Function &OldFunc,
   }
   return false;
 }
+
+void CodeExtractor::excludeArgFromAggregate(Value *Arg) {
+  ExcludeArgsFromAggregate.insert(Arg);
+}
diff --git a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
index 5f7b0111c1c62..023a41c5bfd69 100644
--- a/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CodeExtractorTest.cpp
@@ -188,7 +188,7 @@ TEST(CodeExtractor, ExitBlockOrderingPhis) {
   EXPECT_TRUE(NextReturn);
   ConstantInt *CINext = dyn_cast<ConstantInt>(NextReturn->getReturnValue());
   EXPECT_TRUE(CINext->getLimitedValue() == 0u);
-  
+
   EXPECT_FALSE(verifyFunction(*Outlined));
   EXPECT_FALSE(verifyFunction(*Func));
 }
@@ -245,7 +245,7 @@ TEST(CodeExtractor, ExitBlockOrdering) {
   EXPECT_TRUE(NextReturn);
   ConstantInt *CINext = dyn_cast<ConstantInt>(NextReturn->getReturnValue());
   EXPECT_TRUE(CINext->getLimitedValue() == 0u);
-  
+
   EXPECT_FALSE(verifyFunction(*Outlined));
   EXPECT_FALSE(verifyFunction(*Func));
 }
@@ -504,4 +504,54 @@ TEST(CodeExtractor, RemoveBitcastUsesFromOuterLifetimeMarkers) {
   EXPECT_FALSE(verifyFunction(*Outlined));
   EXPECT_FALSE(verifyFunction(*Func));
 }
+
+TEST(CodeExtractor, PartialAggregateArgs) {
+  LLVMContext Ctx;
+  SMDiagnostic Err;
+  std::unique_ptr<Module> M(parseAssemblyString(R"ir(
+    target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+    target triple = "x86_64-unknown-linux-gnu"
+
+    declare void @use(i32)
+
+    define void @foo(i32 %a, i32 %b, i32 %c) {
+    entry:
+      br label %extract
+
+    extract:
+      call void @use(i32 %a)
+      call void @use(i32 %b)
+      call void @use(i32 %c)
+      br label %exit
+
+    exit:
+      ret void
+    }
+  )ir",
+                                                Err, Ctx));
+
+  Function *Func = M->getFunction("foo");
+  SmallVector<BasicBlock *, 1> Blocks{getBlockByName(Func, "extract")};
+
+  // Create the CodeExtractor with arguments aggregation enabled.
+  CodeExtractor CE(Blocks, /* DominatorTree */ nullptr,
+                   /* AggregateArgs */ true);
+  EXPECT_TRUE(CE.isEligible());
+
+  CodeExtractorAnalysisCache CEAC(*Func);
+  SetVector<Value *> Inputs, Outputs, SinkingCands, HoistingCands;
+  BasicBlock *CommonExit = nullptr;
+  CE.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
+  CE.findInputsOutputs(Inputs, Outputs, SinkingCands);
+  // Exclude the first input from the argument aggregate.
+  CE.excludeArgFromAggregate(Inputs[0]);
+
+  Function *Outlined = CE.extractCodeRegion(CEAC, Inputs, Outputs);
+  EXPECT_TRUE(Outlined);
+  // Expect 2 arguments in the outlined function: the excluded input and the
+  // struct aggregate for the remaining inputs.
+  EXPECT_EQ(Outlined->arg_size(), 2U);
+  EXPECT_FALSE(verifyFunction(*Outlined));
+  EXPECT_FALSE(verifyFunction(*Func));
+}
 } // end anonymous namespace

From 7cb4c2617391b80993e7c10f3a34c9e172f7ad41 Mon Sep 17 00:00:00 2001
From: Giorgis Georgakoudis <georgakoudis1@llnl.gov>
Date: Tue, 25 Jan 2022 20:31:35 -0500
Subject: [PATCH 639/946] [OMPIRBuilder] Generate aggregate argument for
 parallel region outlined functions

Summary:
This patch modifies code generation in OpenMPIRBuilder to pass arguments
to the parallel region outlined function in an aggregate (struct),
besides the global_tid and bound_tid arguments. It depends on the
updated CodeExtractor (see D96854) for support. It mirrors functionality
of Clang codegen (see D102107).

Differential Revision: https://reviews.llvm.org/D110114
---
 clang/test/OpenMP/cancel_codegen.cpp          | 292 ++++----
 .../irbuilder_nested_openmp_parallel_empty.c  |  30 +-
 .../OpenMP/irbuilder_nested_parallel_for.c    | 168 +++--
 clang/test/OpenMP/parallel_codegen.cpp        | 605 ++++++++--------
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |   6 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  47 +-
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp         |   3 +-
 .../OpenMP/parallel_region_merging.ll         | 682 +++++++++++-------
 .../Frontend/OpenMPIRBuilderTest.cpp          |  94 ++-
 9 files changed, 1159 insertions(+), 768 deletions(-)

diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp
index 542d2cc6832ef..1cee2ff7e33e4 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -1321,6 +1321,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-LABEL: define {{[^@]+}}@main
 // CHECK3-SAME: (i32 noundef [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i8*** }, align 8
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
@@ -1350,7 +1351,11 @@ for (int i = 0; i < argc; ++i) {
 // CHECK3-NEXT:    [[P_STRIDE30:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK3:       omp_parallel:
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]])
+// CHECK3-NEXT:    [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 0
+// CHECK3-NEXT:    store i32* [[ARGC_ADDR]], i32** [[GEP_ARGC_ADDR]], align 8
+// CHECK3-NEXT:    [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 1
+// CHECK3-NEXT:    store i8*** [[ARGV_ADDR]], i8**** [[GEP_ARGV_ADDR]], align 8
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i8*** }*)* @main..omp_par to void (i32*, i32*, ...)*), { i32*, i8*** }* [[STRUCTARG]])
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 // CHECK3:       omp.par.outlined.exit:
 // CHECK3-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -1553,58 +1558,62 @@ for (int i = 0; i < argc; ++i) {
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@main..omp_par
-// CHECK3-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARGC_ADDR:%.*]], i8*** [[ARGV_ADDR:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK3-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i8*** }* [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK3-NEXT:  omp.par.entry:
+// CHECK3-NEXT:    [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 0
+// CHECK3-NEXT:    [[LOADGEP_ARGC_ADDR:%.*]] = load i32*, i32** [[GEP_ARGC_ADDR]], align 8
+// CHECK3-NEXT:    [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 1
+// CHECK3-NEXT:    [[LOADGEP_ARGV_ADDR:%.*]] = load i8***, i8**** [[GEP_ARGV_ADDR]], align 8
 // CHECK3-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-// CHECK3-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK3-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK3-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 // CHECK3:       omp.par.region:
-// CHECK3-NEXT:    [[TMP1:%.*]] = load float, float* @flag, align 4
-// CHECK3-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00
-// CHECK3-NEXT:    br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]]
-// CHECK3:       2:
-// CHECK3-NEXT:    br label [[TMP3:%.*]]
+// CHECK3-NEXT:    [[TMP2:%.*]] = load float, float* @flag, align 4
+// CHECK3-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00
+// CHECK3-NEXT:    br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]]
 // CHECK3:       3:
-// CHECK3-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK3-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP4]] to i8
-// CHECK3-NEXT:    [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
-// CHECK3-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK3-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
+// CHECK3-NEXT:    br label [[TMP4:%.*]]
+// CHECK3:       4:
+// CHECK3-NEXT:    [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4
+// CHECK3-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP5]] to i8
+// CHECK3-NEXT:    [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0
+// CHECK3-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// CHECK3-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0
 // CHECK3-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
 // CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-// CHECK3-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
-// CHECK3-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
+// CHECK3-NEXT:    [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+// CHECK3-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+// CHECK3-NEXT:    br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
 // CHECK3:       .cncl5:
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 // CHECK3:       .cont:
-// CHECK3-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK3-NEXT:    [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
-// CHECK3-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
-// CHECK3-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
-// CHECK3-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
-// CHECK3-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
-// CHECK3-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP12]] to i32
-// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
+// CHECK3-NEXT:    [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4
+// CHECK3-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8
+// CHECK3-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
+// CHECK3-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
+// CHECK3-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
+// CHECK3-NEXT:    [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
+// CHECK3-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP13]] to i32
+// CHECK3-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
 // CHECK3-NEXT:    [[CONV9:%.*]] = trunc i32 [[ADD]] to i8
 // CHECK3-NEXT:    store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1
 // CHECK3-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK3:       omp.par.pre_finalize:
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
-// CHECK3:       13:
+// CHECK3:       14:
 // CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
-// CHECK3-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
-// CHECK3-NEXT:    br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
+// CHECK3-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
+// CHECK3-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
+// CHECK3-NEXT:    br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
 // CHECK3:       .cncl:
 // CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK3-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK3-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
 // CHECK3:       .split:
-// CHECK3-NEXT:    br label [[TMP3]]
+// CHECK3-NEXT:    br label [[TMP4]]
 // CHECK3:       omp.par.outlined.exit.exitStub:
 // CHECK3-NEXT:    ret void
 //
@@ -1948,6 +1957,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK4-LABEL: define {{[^@]+}}@main
 // CHECK4-SAME: (i32 noundef [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i8*** }, align 8
 // CHECK4-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
@@ -1977,7 +1987,11 @@ for (int i = 0; i < argc; ++i) {
 // CHECK4-NEXT:    [[P_STRIDE30:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK4:       omp_parallel:
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]])
+// CHECK4-NEXT:    [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 0
+// CHECK4-NEXT:    store i32* [[ARGC_ADDR]], i32** [[GEP_ARGC_ADDR]], align 8
+// CHECK4-NEXT:    [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 1
+// CHECK4-NEXT:    store i8*** [[ARGV_ADDR]], i8**** [[GEP_ARGV_ADDR]], align 8
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i8*** }*)* @main..omp_par to void (i32*, i32*, ...)*), { i32*, i8*** }* [[STRUCTARG]])
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 // CHECK4:       omp.par.outlined.exit:
 // CHECK4-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -2180,58 +2194,62 @@ for (int i = 0; i < argc; ++i) {
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@main..omp_par
-// CHECK4-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARGC_ADDR:%.*]], i8*** [[ARGV_ADDR:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK4-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i8*** }* [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK4-NEXT:  omp.par.entry:
+// CHECK4-NEXT:    [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 0
+// CHECK4-NEXT:    [[LOADGEP_ARGC_ADDR:%.*]] = load i32*, i32** [[GEP_ARGC_ADDR]], align 8
+// CHECK4-NEXT:    [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 1
+// CHECK4-NEXT:    [[LOADGEP_ARGV_ADDR:%.*]] = load i8***, i8**** [[GEP_ARGV_ADDR]], align 8
 // CHECK4-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-// CHECK4-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+// CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK4-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK4-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 // CHECK4:       omp.par.region:
-// CHECK4-NEXT:    [[TMP1:%.*]] = load float, float* @flag, align 4
-// CHECK4-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00
-// CHECK4-NEXT:    br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]]
-// CHECK4:       2:
-// CHECK4-NEXT:    br label [[TMP3:%.*]]
+// CHECK4-NEXT:    [[TMP2:%.*]] = load float, float* @flag, align 4
+// CHECK4-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00
+// CHECK4-NEXT:    br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]]
 // CHECK4:       3:
-// CHECK4-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK4-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP4]] to i8
-// CHECK4-NEXT:    [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
-// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
-// CHECK4-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK4-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
+// CHECK4-NEXT:    br label [[TMP4:%.*]]
+// CHECK4:       4:
+// CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4
+// CHECK4-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP5]] to i8
+// CHECK4-NEXT:    [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8
+// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0
+// CHECK4-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// CHECK4-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0
 // CHECK4-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
 // CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK4-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-// CHECK4-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
-// CHECK4-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
+// CHECK4-NEXT:    [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+// CHECK4-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+// CHECK4-NEXT:    br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
 // CHECK4:       .cncl5:
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 // CHECK4:       .cont:
-// CHECK4-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK4-NEXT:    [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
-// CHECK4-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
-// CHECK4-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
-// CHECK4-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
-// CHECK4-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
-// CHECK4-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP12]] to i32
-// CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
+// CHECK4-NEXT:    [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4
+// CHECK4-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8
+// CHECK4-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
+// CHECK4-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
+// CHECK4-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
+// CHECK4-NEXT:    [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
+// CHECK4-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP13]] to i32
+// CHECK4-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
 // CHECK4-NEXT:    [[CONV9:%.*]] = trunc i32 [[ADD]] to i8
 // CHECK4-NEXT:    store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1
 // CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK4:       omp.par.pre_finalize:
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
-// CHECK4:       13:
+// CHECK4:       14:
 // CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK4-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
-// CHECK4-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
-// CHECK4-NEXT:    br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
+// CHECK4-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
+// CHECK4-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
+// CHECK4-NEXT:    br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
 // CHECK4:       .cncl:
 // CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK4-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK4-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
 // CHECK4:       .split:
-// CHECK4-NEXT:    br label [[TMP3]]
+// CHECK4-NEXT:    br label [[TMP4]]
 // CHECK4:       omp.par.outlined.exit.exitStub:
 // CHECK4-NEXT:    ret void
 //
@@ -3815,6 +3833,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK9-LABEL: define {{[^@]+}}@main
 // CHECK9-SAME: (i32 noundef [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK9-NEXT:  entry:
+// CHECK9-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i8*** }, align 8
 // CHECK9-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
@@ -3844,7 +3863,11 @@ for (int i = 0; i < argc; ++i) {
 // CHECK9-NEXT:    [[P_STRIDE30:%.*]] = alloca i32, align 4
 // CHECK9-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK9:       omp_parallel:
-// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]])
+// CHECK9-NEXT:    [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 0
+// CHECK9-NEXT:    store i32* [[ARGC_ADDR]], i32** [[GEP_ARGC_ADDR]], align 8
+// CHECK9-NEXT:    [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 1
+// CHECK9-NEXT:    store i8*** [[ARGV_ADDR]], i8**** [[GEP_ARGV_ADDR]], align 8
+// CHECK9-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i8*** }*)* @main..omp_par to void (i32*, i32*, ...)*), { i32*, i8*** }* [[STRUCTARG]])
 // CHECK9-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 // CHECK9:       omp.par.outlined.exit:
 // CHECK9-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4047,58 +4070,62 @@ for (int i = 0; i < argc; ++i) {
 //
 //
 // CHECK9-LABEL: define {{[^@]+}}@main..omp_par
-// CHECK9-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARGC_ADDR:%.*]], i8*** [[ARGV_ADDR:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK9-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i8*** }* [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK9-NEXT:  omp.par.entry:
+// CHECK9-NEXT:    [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 0
+// CHECK9-NEXT:    [[LOADGEP_ARGC_ADDR:%.*]] = load i32*, i32** [[GEP_ARGC_ADDR]], align 8
+// CHECK9-NEXT:    [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 1
+// CHECK9-NEXT:    [[LOADGEP_ARGV_ADDR:%.*]] = load i8***, i8**** [[GEP_ARGV_ADDR]], align 8
 // CHECK9-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-// CHECK9-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-// CHECK9-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+// CHECK9-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+// CHECK9-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK9-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK9-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 // CHECK9:       omp.par.region:
-// CHECK9-NEXT:    [[TMP1:%.*]] = load float, float* @flag, align 4
-// CHECK9-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00
-// CHECK9-NEXT:    br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]]
-// CHECK9:       2:
-// CHECK9-NEXT:    br label [[TMP3:%.*]]
+// CHECK9-NEXT:    [[TMP2:%.*]] = load float, float* @flag, align 4
+// CHECK9-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00
+// CHECK9-NEXT:    br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]]
 // CHECK9:       3:
-// CHECK9-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP4]] to i8
-// CHECK9-NEXT:    [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
-// CHECK9-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
-// CHECK9-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK9-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
+// CHECK9-NEXT:    br label [[TMP4:%.*]]
+// CHECK9:       4:
+// CHECK9-NEXT:    [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP5]] to i8
+// CHECK9-NEXT:    [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8
+// CHECK9-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0
+// CHECK9-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// CHECK9-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0
 // CHECK9-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
 // CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK9-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-// CHECK9-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
-// CHECK9-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
+// CHECK9-NEXT:    [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+// CHECK9-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+// CHECK9-NEXT:    br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
 // CHECK9:       .cncl5:
 // CHECK9-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 // CHECK9:       .cont:
-// CHECK9-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK9-NEXT:    [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
-// CHECK9-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
-// CHECK9-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
-// CHECK9-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
-// CHECK9-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
-// CHECK9-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP12]] to i32
-// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
+// CHECK9-NEXT:    [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4
+// CHECK9-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8
+// CHECK9-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
+// CHECK9-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
+// CHECK9-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
+// CHECK9-NEXT:    [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
+// CHECK9-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP13]] to i32
+// CHECK9-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
 // CHECK9-NEXT:    [[CONV9:%.*]] = trunc i32 [[ADD]] to i8
 // CHECK9-NEXT:    store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1
 // CHECK9-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK9:       omp.par.pre_finalize:
 // CHECK9-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
-// CHECK9:       13:
+// CHECK9:       14:
 // CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK9-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
-// CHECK9-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
-// CHECK9-NEXT:    br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
+// CHECK9-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
+// CHECK9-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
+// CHECK9-NEXT:    br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
 // CHECK9:       .cncl:
 // CHECK9-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK9-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK9-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 // CHECK9-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
 // CHECK9:       .split:
-// CHECK9-NEXT:    br label [[TMP3]]
+// CHECK9-NEXT:    br label [[TMP4]]
 // CHECK9:       omp.par.outlined.exit.exitStub:
 // CHECK9-NEXT:    ret void
 //
@@ -4442,6 +4469,7 @@ for (int i = 0; i < argc; ++i) {
 // CHECK10-LABEL: define {{[^@]+}}@main
 // CHECK10-SAME: (i32 noundef [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK10-NEXT:  entry:
+// CHECK10-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i8*** }, align 8
 // CHECK10-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
@@ -4471,7 +4499,11 @@ for (int i = 0; i < argc; ++i) {
 // CHECK10-NEXT:    [[P_STRIDE30:%.*]] = alloca i32, align 4
 // CHECK10-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK10:       omp_parallel:
-// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i8***)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[ARGC_ADDR]], i8*** [[ARGV_ADDR]])
+// CHECK10-NEXT:    [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 0
+// CHECK10-NEXT:    store i32* [[ARGC_ADDR]], i32** [[GEP_ARGC_ADDR]], align 8
+// CHECK10-NEXT:    [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[STRUCTARG]], i32 0, i32 1
+// CHECK10-NEXT:    store i8*** [[ARGV_ADDR]], i8**** [[GEP_ARGV_ADDR]], align 8
+// CHECK10-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i8*** }*)* @main..omp_par to void (i32*, i32*, ...)*), { i32*, i8*** }* [[STRUCTARG]])
 // CHECK10-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 // CHECK10:       omp.par.outlined.exit:
 // CHECK10-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4674,58 +4706,62 @@ for (int i = 0; i < argc; ++i) {
 //
 //
 // CHECK10-LABEL: define {{[^@]+}}@main..omp_par
-// CHECK10-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[ARGC_ADDR:%.*]], i8*** [[ARGV_ADDR:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK10-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i8*** }* [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK10-NEXT:  omp.par.entry:
+// CHECK10-NEXT:    [[GEP_ARGC_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 0
+// CHECK10-NEXT:    [[LOADGEP_ARGC_ADDR:%.*]] = load i32*, i32** [[GEP_ARGC_ADDR]], align 8
+// CHECK10-NEXT:    [[GEP_ARGV_ADDR:%.*]] = getelementptr { i32*, i8*** }, { i32*, i8*** }* [[TMP0]], i32 0, i32 1
+// CHECK10-NEXT:    [[LOADGEP_ARGV_ADDR:%.*]] = load i8***, i8**** [[GEP_ARGV_ADDR]], align 8
 // CHECK10-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-// CHECK10-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-// CHECK10-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+// CHECK10-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+// CHECK10-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK10-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK10-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 // CHECK10:       omp.par.region:
-// CHECK10-NEXT:    [[TMP1:%.*]] = load float, float* @flag, align 4
-// CHECK10-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP1]], 0.000000e+00
-// CHECK10-NEXT:    br i1 [[TOBOOL]], label [[TMP13:%.*]], label [[TMP2:%.*]]
-// CHECK10:       2:
-// CHECK10-NEXT:    br label [[TMP3:%.*]]
+// CHECK10-NEXT:    [[TMP2:%.*]] = load float, float* @flag, align 4
+// CHECK10-NEXT:    [[TOBOOL:%.*]] = fcmp une float [[TMP2]], 0.000000e+00
+// CHECK10-NEXT:    br i1 [[TOBOOL]], label [[TMP14:%.*]], label [[TMP3:%.*]]
 // CHECK10:       3:
-// CHECK10-NEXT:    [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP4]] to i8
-// CHECK10-NEXT:    [[TMP5:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
-// CHECK10-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP5]], i64 0
-// CHECK10-NEXT:    [[TMP6:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
-// CHECK10-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i64 0
+// CHECK10-NEXT:    br label [[TMP4:%.*]]
+// CHECK10:       4:
+// CHECK10-NEXT:    [[TMP5:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[CONV:%.*]] = trunc i32 [[TMP5]] to i8
+// CHECK10-NEXT:    [[TMP6:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8
+// CHECK10-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP6]], i64 0
+// CHECK10-NEXT:    [[TMP7:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8
+// CHECK10-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0
 // CHECK10-NEXT:    store i8 [[CONV]], i8* [[ARRAYIDX3]], align 1
 // CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK10-NEXT:    [[TMP7:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
-// CHECK10-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
-// CHECK10-NEXT:    br i1 [[TMP8]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
+// CHECK10-NEXT:    [[TMP8:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
+// CHECK10-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+// CHECK10-NEXT:    br i1 [[TMP9]], label [[DOTCONT:%.*]], label [[DOTCNCL5:%.*]]
 // CHECK10:       .cncl5:
 // CHECK10-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
 // CHECK10:       .cont:
-// CHECK10-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4
-// CHECK10-NEXT:    [[TMP10:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8
-// CHECK10-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP10]], i64 0
-// CHECK10-NEXT:    [[TMP11:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
-// CHECK10-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP11]], i64 0
-// CHECK10-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
-// CHECK10-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP12]] to i32
-// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP9]]
+// CHECK10-NEXT:    [[TMP10:%.*]] = load i32, i32* [[LOADGEP_ARGC_ADDR]], align 4
+// CHECK10-NEXT:    [[TMP11:%.*]] = load i8**, i8*** [[LOADGEP_ARGV_ADDR]], align 8
+// CHECK10-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i8*, i8** [[TMP11]], i64 0
+// CHECK10-NEXT:    [[TMP12:%.*]] = load i8*, i8** [[ARRAYIDX6]], align 8
+// CHECK10-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, i8* [[TMP12]], i64 0
+// CHECK10-NEXT:    [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX7]], align 1
+// CHECK10-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP13]] to i32
+// CHECK10-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV8]], [[TMP10]]
 // CHECK10-NEXT:    [[CONV9:%.*]] = trunc i32 [[ADD]] to i8
 // CHECK10-NEXT:    store i8 [[CONV9]], i8* [[ARRAYIDX7]], align 1
 // CHECK10-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK10:       omp.par.pre_finalize:
 // CHECK10-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
-// CHECK10:       13:
+// CHECK10:       14:
 // CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK10-NEXT:    [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
-// CHECK10-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 0
-// CHECK10-NEXT:    br i1 [[TMP15]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
+// CHECK10-NEXT:    [[TMP15:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM1]], i32 1)
+// CHECK10-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 0
+// CHECK10-NEXT:    br i1 [[TMP16]], label [[DOTSPLIT:%.*]], label [[DOTCNCL:%.*]]
 // CHECK10:       .cncl:
 // CHECK10-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-// CHECK10-NEXT:    [[TMP16:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
+// CHECK10-NEXT:    [[TMP17:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 // CHECK10-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB]]
 // CHECK10:       .split:
-// CHECK10-NEXT:    br label [[TMP3]]
+// CHECK10-NEXT:    br label [[TMP4]]
 // CHECK10:       omp.par.outlined.exit.exitStub:
 // CHECK10-NEXT:    ret void
 //
diff --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
index 158a04be16492..56aa41b0db466 100644
--- a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
+++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
@@ -33,6 +33,8 @@ void nested_parallel_0(void) {
 
 // ALL-LABEL: @_Z17nested_parallel_1Pfid(
 // ALL-NEXT:  entry:
+// ALL-NEXT:    [[STRUCTARG14:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float** }, align 8
+// ALL-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
 // ALL-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
 // ALL-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 // ALL-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
@@ -42,7 +44,15 @@ void nested_parallel_0(void) {
 // ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // ALL:       omp_parallel:
-// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// ALL-NEXT:    [[GEP_STRUCTARG:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 0
+// ALL-NEXT:    store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG]], align 8
+// ALL-NEXT:    [[GEP_A_ADDR15:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 1
+// ALL-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR15]], align 8
+// ALL-NEXT:    [[GEP_B_ADDR16:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 2
+// ALL-NEXT:    store double* [[B_ADDR]], double** [[GEP_B_ADDR16]], align 8
+// ALL-NEXT:    [[GEP_R_ADDR17:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 3
+// ALL-NEXT:    store float** [[R_ADDR]], float*** [[GEP_R_ADDR17]], align 8
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]])
 // ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT13:%.*]]
 // ALL:       omp.par.outlined.exit13:
 // ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -61,6 +71,10 @@ void nested_parallel_1(float *r, int a, double b) {
 
 // ALL-LABEL: @_Z17nested_parallel_2Pfid(
 // ALL-NEXT:  entry:
+// ALL-NEXT:    [[STRUCTARG68:%.*]] = alloca { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, align 8
+// ALL-NEXT:    [[STRUCTARG64:%.*]] = alloca { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, align 8
+// ALL-NEXT:    [[STRUCTARG59:%.*]] = alloca { i32*, double*, float** }, align 8
+// ALL-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
 // ALL-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
 // ALL-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 // ALL-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
@@ -70,7 +84,19 @@ void nested_parallel_1(float *r, int a, double b) {
 // ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // ALL:       omp_parallel:
-// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// ALL-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 0
+// ALL-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+// ALL-NEXT:    [[GEP_B_ADDR:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 1
+// ALL-NEXT:    store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8
+// ALL-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 2
+// ALL-NEXT:    store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8
+// ALL-NEXT:    [[GEP_STRUCTARG64:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 3
+// ALL-NEXT:    store { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG64]], { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }** [[GEP_STRUCTARG64]], align 8
+// ALL-NEXT:    [[GEP_STRUCTARG69:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 4
+// ALL-NEXT:    store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG69]], align 8
+// ALL-NEXT:    [[GEP_STRUCTARG5970:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 5
+// ALL-NEXT:    store { i32*, double*, float** }* [[STRUCTARG59]], { i32*, double*, float** }** [[GEP_STRUCTARG5970]], align 8
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }*)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]])
 // ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT55:%.*]]
 // ALL:       omp.par.outlined.exit55:
 // ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c
index 7774236f9c0a9..7dde6e1f89d2a 100644
--- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c
+++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c
@@ -23,15 +23,15 @@
 //
 // CHECK-DEBUG-LABEL: @_Z14parallel_for_0v(
 // CHECK-DEBUG-NEXT:  entry:
-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG12:![0-9]+]]
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG13:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK-DEBUG:       omp_parallel:
-// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*)), !dbg [[DBG13:![0-9]+]]
+// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z14parallel_for_0v..omp_par to void (i32*, i32*, ...)*)), !dbg [[DBG14:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 // CHECK-DEBUG:       omp.par.outlined.exit:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 // CHECK-DEBUG:       omp.par.exit.split:
-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG17:![0-9]+]]
+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG18:![0-9]+]]
 //
 void parallel_for_0(void) {
 #pragma omp parallel
@@ -44,6 +44,8 @@ void parallel_for_0(void) {
 
 // CHECK-LABEL: @_Z14parallel_for_1Pfid(
 // CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[STRUCTARG17:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float** }, align 8
+// CHECK-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
 // CHECK-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
@@ -53,7 +55,15 @@ void parallel_for_0(void) {
 // CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK:       omp_parallel:
-// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// CHECK-NEXT:    [[GEP_STRUCTARG:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0
+// CHECK-NEXT:    store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG]], align 8
+// CHECK-NEXT:    [[GEP_A_ADDR18:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1
+// CHECK-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8
+// CHECK-NEXT:    [[GEP_B_ADDR19:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2
+// CHECK-NEXT:    store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8
+// CHECK-NEXT:    [[GEP_R_ADDR20:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 3
+// CHECK-NEXT:    store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8
+// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]])
 // CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
 // CHECK:       omp.par.outlined.exit16:
 // CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -62,24 +72,34 @@ void parallel_for_0(void) {
 //
 // CHECK-DEBUG-LABEL: @_Z14parallel_for_1Pfid(
 // CHECK-DEBUG-NEXT:  entry:
+// CHECK-DEBUG-NEXT:    [[STRUCTARG17:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float** }, align 8
+// CHECK-DEBUG-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
 // CHECK-DEBUG-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
 // CHECK-DEBUG-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 // CHECK-DEBUG-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
 // CHECK-DEBUG-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG72:![0-9]+]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]]
-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG77:![0-9]+]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]]
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG78:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK-DEBUG:       omp_parallel:
-// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg [[DBG78:![0-9]+]]
+// CHECK-DEBUG-NEXT:    [[GEP_STRUCTARG:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0
+// CHECK-DEBUG-NEXT:    store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG]], align 8
+// CHECK-DEBUG-NEXT:    [[GEP_A_ADDR18:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1
+// CHECK-DEBUG-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8
+// CHECK-DEBUG-NEXT:    [[GEP_B_ADDR19:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2
+// CHECK-DEBUG-NEXT:    store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8
+// CHECK-DEBUG-NEXT:    [[GEP_R_ADDR20:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 3
+// CHECK-DEBUG-NEXT:    store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8
+// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]]), !dbg [[DBG79:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT16:%.*]]
 // CHECK-DEBUG:       omp.par.outlined.exit16:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 // CHECK-DEBUG:       omp.par.exit.split:
-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG80:![0-9]+]]
+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG81:![0-9]+]]
 //
 void parallel_for_1(float *r, int a, double b) {
 #pragma omp parallel
@@ -96,6 +116,10 @@ void parallel_for_1(float *r, int a, double b) {
 
 // CHECK-LABEL: @_Z14parallel_for_2Pfid(
 // CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[STRUCTARG218:%.*]] = alloca { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8
+// CHECK-NEXT:    [[STRUCTARG214:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8
+// CHECK-NEXT:    [[STRUCTARG209:%.*]] = alloca { i32*, double*, float** }, align 8
+// CHECK-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
 // CHECK-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
@@ -113,7 +137,19 @@ void parallel_for_1(float *r, int a, double b) {
 // CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 // CHECK-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK:       omp_parallel:
-// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// CHECK-NEXT:    [[GEP_STRUCTARG214:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 0
+// CHECK-NEXT:    store { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG214]], { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }** [[GEP_STRUCTARG214]], align 8
+// CHECK-NEXT:    [[GEP_STRUCTARG219:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 1
+// CHECK-NEXT:    store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG219]], align 8
+// CHECK-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 2
+// CHECK-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+// CHECK-NEXT:    [[GEP_B_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 3
+// CHECK-NEXT:    store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8
+// CHECK-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 4
+// CHECK-NEXT:    store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8
+// CHECK-NEXT:    [[GEP_STRUCTARG209220:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 5
+// CHECK-NEXT:    store { i32*, double*, float** }* [[STRUCTARG209]], { i32*, double*, float** }** [[GEP_STRUCTARG209220]], align 8
+// CHECK-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]])
 // CHECK-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
 // CHECK:       omp.par.outlined.exit184:
 // CHECK-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -169,6 +205,10 @@ void parallel_for_1(float *r, int a, double b) {
 //
 // CHECK-DEBUG-LABEL: @_Z14parallel_for_2Pfid(
 // CHECK-DEBUG-NEXT:  entry:
+// CHECK-DEBUG-NEXT:    [[STRUCTARG218:%.*]] = alloca { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8
+// CHECK-DEBUG-NEXT:    [[STRUCTARG214:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8
+// CHECK-DEBUG-NEXT:    [[STRUCTARG209:%.*]] = alloca { i32*, double*, float** }, align 8
+// CHECK-DEBUG-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8
 // CHECK-DEBUG-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
 // CHECK-DEBUG-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 // CHECK-DEBUG-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
@@ -181,68 +221,80 @@ void parallel_for_1(float *r, int a, double b) {
 // CHECK-DEBUG-NEXT:    [[P_UPPERBOUND205:%.*]] = alloca i32, align 4
 // CHECK-DEBUG-NEXT:    [[P_STRIDE206:%.*]] = alloca i32, align 4
 // CHECK-DEBUG-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133:![0-9]+]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]]
 // CHECK-DEBUG-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]]
-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG138:![0-9]+]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]]
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK-DEBUG:       omp_parallel:
-// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]]), !dbg [[DBG139:![0-9]+]]
+// CHECK-DEBUG-NEXT:    [[GEP_STRUCTARG214:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 0
+// CHECK-DEBUG-NEXT:    store { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG214]], { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }** [[GEP_STRUCTARG214]], align 8
+// CHECK-DEBUG-NEXT:    [[GEP_STRUCTARG219:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 1
+// CHECK-DEBUG-NEXT:    store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG219]], align 8
+// CHECK-DEBUG-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 2
+// CHECK-DEBUG-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+// CHECK-DEBUG-NEXT:    [[GEP_B_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 3
+// CHECK-DEBUG-NEXT:    store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8
+// CHECK-DEBUG-NEXT:    [[GEP_R_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 4
+// CHECK-DEBUG-NEXT:    store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8
+// CHECK-DEBUG-NEXT:    [[GEP_STRUCTARG209220:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 5
+// CHECK-DEBUG-NEXT:    store { i32*, double*, float** }* [[STRUCTARG209]], { i32*, double*, float** }** [[GEP_STRUCTARG209220]], align 8
+// CHECK-DEBUG-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]]), !dbg [[DBG140:![0-9]+]]
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT184:%.*]]
 // CHECK-DEBUG:       omp.par.outlined.exit184:
 // CHECK-DEBUG-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 // CHECK-DEBUG:       omp.par.exit.split:
-// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146:![0-9]+]]
-// CHECK-DEBUG-NEXT:    store i32 0, i32* [[I185]], align 4, !dbg [[DBG146]]
-// CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG147:![0-9]+]]
-// CHECK-DEBUG-NEXT:    store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG148:![0-9]+]]
-// CHECK-DEBUG-NEXT:    store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]]
+// CHECK-DEBUG-NEXT:    store i32 0, i32* [[I185]], align 4, !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]]
+// CHECK-DEBUG-NEXT:    store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG149:![0-9]+]]
+// CHECK-DEBUG-NEXT:    store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]]
 // CHECK-DEBUG:       omp_loop.preheader190:
-// CHECK-DEBUG-NEXT:    store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 1), !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 1), !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]]
 // CHECK-DEBUG:       omp_loop.header191:
-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]]
 // CHECK-DEBUG:       omp_loop.cond192:
-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]]
 // CHECK-DEBUG:       omp_loop.body193:
-// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG149:![0-9]+]]
-// CHECK-DEBUG-NEXT:    [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG149]]
-// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG150:![0-9]+]]
-// CHECK-DEBUG-NEXT:    [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG151:![0-9]+]]
-// CHECK-DEBUG-NEXT:    [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG149]]
-// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG152:![0-9]+]]
-// CHECK-DEBUG-NEXT:    store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG153:![0-9]+]]
-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC194]], !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG150:![0-9]+]]
+// CHECK-DEBUG-NEXT:    [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG150]]
+// CHECK-DEBUG-NEXT:    [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG151:![0-9]+]]
+// CHECK-DEBUG-NEXT:    [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]]
+// CHECK-DEBUG-NEXT:    [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG150]]
+// CHECK-DEBUG-NEXT:    [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]]
+// CHECK-DEBUG-NEXT:    store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]]
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_INC194]], !dbg [[DBG148]]
 // CHECK-DEBUG:       omp_loop.inc194:
-// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191]], !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]]
 // CHECK-DEBUG:       omp_loop.exit195:
-// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG147]]
-// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG150]]
-// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]]
-// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG147]]
+// CHECK-DEBUG-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]]
+// CHECK-DEBUG-NEXT:    [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG151]]
+// CHECK-DEBUG-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG151]]
+// CHECK-DEBUG-NEXT:    br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]]
 // CHECK-DEBUG:       omp_loop.after196:
-// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG154:![0-9]+]]
+// CHECK-DEBUG-NEXT:    ret void, !dbg [[DBG155:![0-9]+]]
 //
 void parallel_for_2(float *r, int a, double b) {
 #pragma omp parallel
diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp
index 992feea683569..842c5fc3488e2 100644
--- a/clang/test/OpenMP/parallel_codegen.cpp
+++ b/clang/test/OpenMP/parallel_codegen.cpp
@@ -311,70 +311,70 @@ int main (int argc, char **argv) {
 // CHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK2-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]]
 // CHECK2-NEXT:    store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i8*** [[ARGV_ADDR]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4, !dbg [[DBG21:![0-9]+]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG22:![0-9]+]]
-// CHECK2-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG22]]
-// CHECK2-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG22]]
-// CHECK2-NEXT:    [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG22]]
-// CHECK2-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG22]]
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]]
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[VLA]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30:![0-9]+]]
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i32* [[VLA]]), !dbg [[DBG31:![0-9]+]]
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP1]]), !dbg [[DBG32:![0-9]+]]
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB9:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP1]], i32* [[VLA]]), !dbg [[DBG33:![0-9]+]]
-// CHECK2-NEXT:    [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8, !dbg [[DBG34:![0-9]+]]
-// CHECK2-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(i8** noundef [[TMP3]]), !dbg [[DBG35:![0-9]+]]
-// CHECK2-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4, !dbg [[DBG36:![0-9]+]]
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG37:![0-9]+]]
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP4]]), !dbg [[DBG37]]
-// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[RETVAL]], align 4, !dbg [[DBG37]]
-// CHECK2-NEXT:    ret i32 [[TMP5]], !dbg [[DBG37]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i8*** [[ARGV_ADDR]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21:![0-9]+]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4, !dbg [[DBG22:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG23:![0-9]+]]
+// CHECK2-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG23]]
+// CHECK2-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG23]]
+// CHECK2-NEXT:    [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG23]]
+// CHECK2-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG23]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[VLA]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]]
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP1]], i32* [[VLA]]), !dbg [[DBG32:![0-9]+]]
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB5:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined..4 to void (i32*, i32*, ...)*), i64 [[TMP1]]), !dbg [[DBG33:![0-9]+]]
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB9:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined..8 to void (i32*, i32*, ...)*), i64 [[TMP1]], i32* [[VLA]]), !dbg [[DBG34:![0-9]+]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8, !dbg [[DBG35:![0-9]+]]
+// CHECK2-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(i8** noundef [[TMP3]]), !dbg [[DBG36:![0-9]+]]
+// CHECK2-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4, !dbg [[DBG37:![0-9]+]]
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG38:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP4]]), !dbg [[DBG38]]
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[RETVAL]], align 4, !dbg [[DBG38]]
+// CHECK2-NEXT:    ret i32 [[TMP5]], !dbg [[DBG38]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__
-// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG38:![0-9]+]] {
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG39:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META48:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]]
 // CHECK2-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]]
 // CHECK2-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51:![0-9]+]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG52:![0-9]+]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG52]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG53:![0-9]+]]
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG53]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META51:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52:![0-9]+]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG53:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG53]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG54:![0-9]+]]
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG54]]
 // CHECK2-NEXT:    invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]])
-// CHECK2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG52]]
+// CHECK2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG53]]
 // CHECK2:       invoke.cont:
-// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG54:![0-9]+]]
-// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG55:![0-9]+]]
-// CHECK2-NEXT:    store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG56:![0-9]+]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG54]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG55:![0-9]+]]
+// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG56:![0-9]+]]
+// CHECK2-NEXT:    store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG57:![0-9]+]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG55]]
 // CHECK2:       terminate.lpad:
 // CHECK2-NEXT:    [[TMP4:%.*]] = landingpad { i8*, i32 }
-// CHECK2-NEXT:    catch i8* null, !dbg [[DBG52]]
-// CHECK2-NEXT:    [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG52]]
-// CHECK2-NEXT:    call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR7:[0-9]+]], !dbg [[DBG52]]
-// CHECK2-NEXT:    unreachable, !dbg [[DBG52]]
+// CHECK2-NEXT:    catch i8* null, !dbg [[DBG53]]
+// CHECK2-NEXT:    [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG53]]
+// CHECK2-NEXT:    call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR7:[0-9]+]], !dbg [[DBG53]]
+// CHECK2-NEXT:    unreachable, !dbg [[DBG53]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIiEvT_
-// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat !dbg [[DBG57:![0-9]+]] {
+// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat !dbg [[DBG58:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK2-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG63:![0-9]+]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG64:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG64:![0-9]+]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG65:![0-9]+]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@__clang_call_terminate
@@ -385,31 +385,31 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined.
-// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG65:![0-9]+]] {
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG66:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68:![0-9]+]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]]
 // CHECK2-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]]
 // CHECK2-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG71:![0-9]+]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG71]]
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG71]]
-// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG71]]
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG71]]
-// CHECK2-NEXT:    call void @.omp_outlined._debug__(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG71]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG71]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG72:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG72]]
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG72]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG72]]
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG72]]
+// CHECK2-NEXT:    call void @.omp_outlined._debug__(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG72]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG72]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.1
-// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG74:![0-9]+]] {
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG75:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -418,27 +418,27 @@ int main (int argc, char **argv) {
 // CHECK2-NEXT:    [[SAVED_STACK:%.*]] = alloca i8*, align 8
 // CHECK2-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]]
 // CHECK2-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG81:![0-9]+]]
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[GLOBAL]], metadata [[META82:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG81]]
-// CHECK2-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG81]]
-// CHECK2-NEXT:    [[VLA1:%.*]] = alloca i32, i64 [[TMP0]], align 16, !dbg [[DBG81]]
-// CHECK2-NEXT:    store i64 [[TMP0]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG81]]
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]]
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[VLA1]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]]
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP0]], i32* [[VLA1]], i32* [[GLOBAL]]), !dbg [[DBG81]]
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG85:![0-9]+]]
-// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP2]]), !dbg [[DBG85]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG87:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG82:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[GLOBAL]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG82]]
+// CHECK2-NEXT:    store i8* [[TMP1]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG82]]
+// CHECK2-NEXT:    [[VLA1:%.*]] = alloca i32, i64 [[TMP0]], align 16, !dbg [[DBG82]]
+// CHECK2-NEXT:    store i64 [[TMP0]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG82]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32* [[VLA1]], metadata [[META85:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]]
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP0]], i32* [[VLA1]], i32* [[GLOBAL]]), !dbg [[DBG82]]
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG86:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.stackrestore(i8* [[TMP2]]), !dbg [[DBG86]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG88:![0-9]+]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.2
-// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG88:![0-9]+]] {
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG89:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -446,37 +446,37 @@ int main (int argc, char **argv) {
 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[GLOBAL_ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG92:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93:![0-9]+]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META93:![0-9]+]], metadata !DIExpression()), !dbg [[DBG92]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META94:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]]
 // CHECK2-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META94:![0-9]+]], metadata !DIExpression()), !dbg [[DBG92]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]]
 // CHECK2-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG96:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META96:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97:![0-9]+]]
 // CHECK2-NEXT:    store i32* [[GLOBAL]], i32** [[GLOBAL_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[GLOBAL_ADDR]], metadata [[META97:![0-9]+]], metadata !DIExpression()), !dbg [[DBG98:![0-9]+]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG99:![0-9]+]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG99]]
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG99]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG100:![0-9]+]]
-// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG100]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[GLOBAL_ADDR]], metadata [[META98:![0-9]+]], metadata !DIExpression()), !dbg [[DBG99:![0-9]+]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG100:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG100]]
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG100]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG101:![0-9]+]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG101]]
 // CHECK2-NEXT:    invoke void @_Z3fooIiEvT_(i32 noundef [[TMP3]])
-// CHECK2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG99]]
+// CHECK2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG100]]
 // CHECK2:       invoke.cont:
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG101:![0-9]+]]
-// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG102:![0-9]+]]
-// CHECK2-NEXT:    store i32 [[TMP4]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG103:![0-9]+]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG101]]
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 4, !dbg [[DBG102:![0-9]+]]
+// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG103:![0-9]+]]
+// CHECK2-NEXT:    store i32 [[TMP4]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG104:![0-9]+]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG102]]
 // CHECK2:       terminate.lpad:
 // CHECK2-NEXT:    [[TMP5:%.*]] = landingpad { i8*, i32 }
-// CHECK2-NEXT:    catch i8* null, !dbg [[DBG99]]
-// CHECK2-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG99]]
-// CHECK2-NEXT:    call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR7]], !dbg [[DBG99]]
-// CHECK2-NEXT:    unreachable, !dbg [[DBG99]]
+// CHECK2-NEXT:    catch i8* null, !dbg [[DBG100]]
+// CHECK2-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG100]]
+// CHECK2-NEXT:    call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR7]], !dbg [[DBG100]]
+// CHECK2-NEXT:    unreachable, !dbg [[DBG100]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..3
-// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] !dbg [[DBG104:![0-9]+]] {
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] !dbg [[DBG105:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -484,166 +484,166 @@ int main (int argc, char **argv) {
 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[GLOBAL_ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META105:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META106:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107:![0-9]+]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META107:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]]
 // CHECK2-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]]
 // CHECK2-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]]
 // CHECK2-NEXT:    store i32* [[GLOBAL]], i32** [[GLOBAL_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[GLOBAL_ADDR]], metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG111:![0-9]+]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG111]]
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG111]]
-// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG111]]
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG111]]
-// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG111]]
-// CHECK2-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG111]]
-// CHECK2-NEXT:    call void @.omp_outlined._debug__.2(i32* [[TMP3]], i32* [[TMP4]], i64 [[TMP0]], i32* [[TMP5]], i32* [[TMP6]]) #[[ATTR6]], !dbg [[DBG111]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG111]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[GLOBAL_ADDR]], metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG112:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG112]]
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG112]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG112]]
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG112]]
+// CHECK2-NEXT:    [[TMP5:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG112]]
+// CHECK2-NEXT:    [[TMP6:%.*]] = load i32*, i32** [[GLOBAL_ADDR]], align 8, !dbg [[DBG112]]
+// CHECK2-NEXT:    call void @.omp_outlined._debug__.2(i32* [[TMP3]], i32* [[TMP4]], i64 [[TMP0]], i32* [[TMP5]], i32* [[TMP6]]) #[[ATTR6]], !dbg [[DBG112]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG112]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..4
-// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG112:![0-9]+]] {
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG113:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115:![0-9]+]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115]]
 // CHECK2-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG117:![0-9]+]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG117]]
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG117]]
-// CHECK2-NEXT:    call void @.omp_outlined._debug__.1(i32* [[TMP1]], i32* [[TMP2]], i64 [[TMP0]]) #[[ATTR6]], !dbg [[DBG117]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG117]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG118:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG118]]
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG118]]
+// CHECK2-NEXT:    call void @.omp_outlined._debug__.1(i32* [[TMP1]], i32* [[TMP2]], i64 [[TMP0]]) #[[ATTR6]], !dbg [[DBG118]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG118]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.5
-// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG118:![0-9]+]] {
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG119:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG120:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG120]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]]
 // CHECK2-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG120]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]]
 // CHECK2-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG124:![0-9]+]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG125:![0-9]+]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG125]]
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP0]], i32* [[TMP1]]), !dbg [[DBG125]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG126:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG126:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG126]]
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB7:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i32*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP0]], i32* [[TMP1]]), !dbg [[DBG126]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG127:![0-9]+]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.6
-// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG127:![0-9]+]] {
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG128:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG129:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META130:![0-9]+]], metadata !DIExpression()), !dbg [[DBG129]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]]
 // CHECK2-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG129]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]]
 // CHECK2-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133:![0-9]+]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG134:![0-9]+]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG134]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG135:![0-9]+]]
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG135]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG135:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG135]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG136:![0-9]+]]
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG136]]
 // CHECK2-NEXT:    invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]])
-// CHECK2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG134]]
+// CHECK2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG135]]
 // CHECK2:       invoke.cont:
-// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG136:![0-9]+]]
-// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG137:![0-9]+]]
-// CHECK2-NEXT:    store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG138:![0-9]+]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG136]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG137:![0-9]+]]
+// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 1, !dbg [[DBG138:![0-9]+]]
+// CHECK2-NEXT:    store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG139:![0-9]+]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG137]]
 // CHECK2:       terminate.lpad:
 // CHECK2-NEXT:    [[TMP4:%.*]] = landingpad { i8*, i32 }
-// CHECK2-NEXT:    catch i8* null, !dbg [[DBG134]]
-// CHECK2-NEXT:    [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG134]]
-// CHECK2-NEXT:    call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR7]], !dbg [[DBG134]]
-// CHECK2-NEXT:    unreachable, !dbg [[DBG134]]
+// CHECK2-NEXT:    catch i8* null, !dbg [[DBG135]]
+// CHECK2-NEXT:    [[TMP5:%.*]] = extractvalue { i8*, i32 } [[TMP4]], 0, !dbg [[DBG135]]
+// CHECK2-NEXT:    call void @__clang_call_terminate(i8* [[TMP5]]) #[[ATTR7]], !dbg [[DBG135]]
+// CHECK2-NEXT:    unreachable, !dbg [[DBG135]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..7
-// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG139:![0-9]+]] {
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG140:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142:![0-9]+]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]]
 // CHECK2-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]]
 // CHECK2-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG145:![0-9]+]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG145]]
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG145]]
-// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG145]]
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG145]]
-// CHECK2-NEXT:    call void @.omp_outlined._debug__.6(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG145]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG145]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG146:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG146]]
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG146]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG146]]
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG146]]
+// CHECK2-NEXT:    call void @.omp_outlined._debug__.6(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG146]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG146]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..8
-// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG146:![0-9]+]] {
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], i32* noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG147:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META147:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]]
 // CHECK2-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]]
 // CHECK2-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG152:![0-9]+]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG152]]
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG152]]
-// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG152]]
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG152]]
-// CHECK2-NEXT:    call void @.omp_outlined._debug__.5(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG152]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG152]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[A_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG153:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG153]]
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG153]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG153]]
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8, !dbg [[DBG153]]
+// CHECK2-NEXT:    call void @.omp_outlined._debug__.5(i32* [[TMP2]], i32* [[TMP3]], i64 [[TMP0]], i32* [[TMP4]]) #[[ATTR6]], !dbg [[DBG153]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG153]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_
-// CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG153:![0-9]+]] {
+// CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG154:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8**, align 8
 // CHECK2-NEXT:    store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META158:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159:![0-9]+]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8, !dbg [[DBG160:![0-9]+]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP0]], i64 0, !dbg [[DBG160]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8, !dbg [[DBG160]]
-// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 0, !dbg [[DBG160]]
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1, !dbg [[DBG160]]
-// CHECK2-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG161:![0-9]+]]
-// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB11:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 [[TMP3]]), !dbg [[DBG162:![0-9]+]]
-// CHECK2-NEXT:    ret i32 0, !dbg [[DBG163:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8, !dbg [[DBG161:![0-9]+]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP0]], i64 0, !dbg [[DBG161]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8, !dbg [[DBG161]]
+// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 0, !dbg [[DBG161]]
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1, !dbg [[DBG161]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG162:![0-9]+]]
+// CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB11:[0-9]+]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i8***, i64)* @.omp_outlined..10 to void (i32*, i32*, ...)*), i8*** [[ARGC_ADDR]], i64 [[TMP3]]), !dbg [[DBG163:![0-9]+]]
+// CHECK2-NEXT:    ret i32 0, !dbg [[DBG164:![0-9]+]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined._debug__.9
-// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG164:![0-9]+]] {
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG165:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
@@ -651,69 +651,70 @@ int main (int argc, char **argv) {
 // CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    [[VAR:%.*]] = alloca double*, align 8
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META168:![0-9]+]], metadata !DIExpression()), !dbg [[DBG169:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG169]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]]
 // CHECK2-NEXT:    store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i8**** [[ARGC_ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i8**** [[ARGC_ADDR]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]]
 // CHECK2-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META173:![0-9]+]], metadata !DIExpression()), !dbg [[DBG169]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG174:![0-9]+]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG174]]
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[TMP0]], align 8, !dbg [[DBG175:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META174:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG175:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG175]]
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[TMP0]], align 8, !dbg [[DBG176:![0-9]+]]
 // CHECK2-NEXT:    invoke void @_Z3fooIPPcEvT_(i8** noundef [[TMP2]])
-// CHECK2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG177:![0-9]+]]
+// CHECK2-NEXT:    to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG178:![0-9]+]]
 // CHECK2:       invoke.cont:
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata double** [[VAR]], metadata [[META178:![0-9]+]], metadata !DIExpression()), !dbg [[DBG185:![0-9]+]]
-// CHECK2-NEXT:    [[TMP3:%.*]] = load double*, double** [[VAR]], align 8, !dbg [[DBG186:![0-9]+]]
-// CHECK2-NEXT:    [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]], !dbg [[DBG186]]
-// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 [[TMP4]], !dbg [[DBG186]]
-// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX]], i64 0, !dbg [[DBG186]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG187:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata double** [[VAR]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG186:![0-9]+]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = load double*, double** [[VAR]], align 8, !dbg [[DBG187:![0-9]+]]
+// CHECK2-NEXT:    [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]], !dbg [[DBG187]]
+// CHECK2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 [[TMP4]], !dbg [[DBG187]]
+// CHECK2-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX]], i64 0, !dbg [[DBG187]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG188:![0-9]+]]
 // CHECK2:       terminate.lpad:
 // CHECK2-NEXT:    [[TMP5:%.*]] = landingpad { i8*, i32 }
-// CHECK2-NEXT:    catch i8* null, !dbg [[DBG177]]
-// CHECK2-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG177]]
-// CHECK2-NEXT:    call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR7]], !dbg [[DBG177]]
-// CHECK2-NEXT:    unreachable, !dbg [[DBG177]]
+// CHECK2-NEXT:    catch i8* null, !dbg [[DBG178]]
+// CHECK2-NEXT:    [[TMP6:%.*]] = extractvalue { i8*, i32 } [[TMP5]], 0, !dbg [[DBG178]]
+// CHECK2-NEXT:    call void @__clang_call_terminate(i8* [[TMP6]]) #[[ATTR7]], !dbg [[DBG178]]
+// CHECK2-NEXT:    unreachable, !dbg [[DBG178]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIPPcEvT_
-// CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG188:![0-9]+]] {
+// CHECK2-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG189:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8**, align 8
 // CHECK2-NEXT:    store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META191:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192:![0-9]+]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG193:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META192:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193:![0-9]+]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG194:![0-9]+]]
 //
 //
 // CHECK2-LABEL: define {{[^@]+}}@.omp_outlined..10
-// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG194:![0-9]+]] {
+// CHECK2-SAME: (i32* noalias noundef [[DOTGLOBAL_TID_:%.*]], i32* noalias noundef [[DOTBOUND_TID_:%.*]], i8*** noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG195:![0-9]+]] {
 // CHECK2-NEXT:  entry:
 // CHECK2-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8
 // CHECK2-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8***, align 8
 // CHECK2-NEXT:    [[VLA_ADDR:%.*]] = alloca i64, align 8
 // CHECK2-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META195:![0-9]+]], metadata !DIExpression()), !dbg [[DBG196:![0-9]+]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTGLOBAL_TID__ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197:![0-9]+]]
 // CHECK2-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG196]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i32** [[DOTBOUND_TID__ADDR]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]]
 // CHECK2-NEXT:    store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i8**** [[ARGC_ADDR]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG196]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i8**** [[ARGC_ADDR]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]]
 // CHECK2-NEXT:    store i64 [[VLA]], i64* [[VLA_ADDR]], align 8
-// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG196]]
-// CHECK2-NEXT:    [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG200:![0-9]+]]
-// CHECK2-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG200]]
-// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG200]]
-// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG200]]
-// CHECK2-NEXT:    [[TMP4:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG200]]
-// CHECK2-NEXT:    call void @.omp_outlined._debug__.9(i32* [[TMP2]], i32* [[TMP3]], i8*** [[TMP4]], i64 [[TMP1]]) #[[ATTR6]], !dbg [[DBG200]]
-// CHECK2-NEXT:    ret void, !dbg [[DBG200]]
+// CHECK2-NEXT:    call void @llvm.dbg.declare(metadata i64* [[VLA_ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]]
+// CHECK2-NEXT:    [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG201:![0-9]+]]
+// CHECK2-NEXT:    [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG201]]
+// CHECK2-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG201]]
+// CHECK2-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG201]]
+// CHECK2-NEXT:    [[TMP4:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8, !dbg [[DBG201]]
+// CHECK2-NEXT:    call void @.omp_outlined._debug__.9(i32* [[TMP2]], i32* [[TMP3]], i8*** [[TMP4]], i64 [[TMP1]]) #[[ATTR6]], !dbg [[DBG201]]
+// CHECK2-NEXT:    ret void, !dbg [[DBG201]]
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@main
 // CHECK3-SAME: (i32 noundef [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
 // CHECK3-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK3-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
@@ -731,7 +732,9 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]])
 // CHECK3-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK3:       omp_parallel:
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[VLA]])
+// CHECK3-NEXT:    [[GEP_VLA:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
+// CHECK3-NEXT:    store i32* [[VLA]], i32** [[GEP_VLA]], align 8
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @main..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 // CHECK3:       omp.par.outlined.exit:
 // CHECK3-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -746,20 +749,22 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@main..omp_par
-// CHECK3-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[VLA:%.*]]) #[[ATTR1:[0-9]+]] {
+// CHECK3-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
 // CHECK3-NEXT:  omp.par.entry:
+// CHECK3-NEXT:    [[GEP_VLA:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
+// CHECK3-NEXT:    [[LOADGEP_VLA:%.*]] = load i32*, i32** [[GEP_VLA]], align 8
 // CHECK3-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-// CHECK3-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK3-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK3-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 // CHECK3:       omp.par.region:
-// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 1
-// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-// CHECK3-NEXT:    call void @_Z3fooIiEvT_(i32 noundef [[TMP1]])
-// CHECK3-NEXT:    [[TMP2:%.*]] = load i32, i32* @global, align 4
-// CHECK3-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 1
-// CHECK3-NEXT:    store i32 [[TMP2]], i32* [[ARRAYIDX1]], align 4
+// CHECK3-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1
+// CHECK3-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+// CHECK3-NEXT:    call void @_Z3fooIiEvT_(i32 noundef [[TMP2]])
+// CHECK3-NEXT:    [[TMP3:%.*]] = load i32, i32* @global, align 4
+// CHECK3-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1
+// CHECK3-NEXT:    store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4
 // CHECK3-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK3:       omp.par.pre_finalize:
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]]
@@ -778,6 +783,7 @@ int main (int argc, char **argv) {
 // CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_
 // CHECK3-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat {
 // CHECK3-NEXT:  entry:
+// CHECK3-NEXT:    [[STRUCTARG:%.*]] = alloca { i64*, i8*** }, align 8
 // CHECK3-NEXT:    [[DOTRELOADED:%.*]] = alloca i64, align 8
 // CHECK3-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8**, align 8
 // CHECK3-NEXT:    store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8
@@ -791,7 +797,11 @@ int main (int argc, char **argv) {
 // CHECK3-NEXT:    store i64 [[TMP3]], i64* [[DOTRELOADED]], align 8
 // CHECK3-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK3:       omp_parallel:
-// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64*, i8***)* @_Z5tmainIPPcEiT_..omp_par to void (i32*, i32*, ...)*), i64* [[DOTRELOADED]], i8*** [[ARGC_ADDR]])
+// CHECK3-NEXT:    [[GEP__RELOADED:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[STRUCTARG]], i32 0, i32 0
+// CHECK3-NEXT:    store i64* [[DOTRELOADED]], i64** [[GEP__RELOADED]], align 8
+// CHECK3-NEXT:    [[GEP_ARGC_ADDR:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[STRUCTARG]], i32 0, i32 1
+// CHECK3-NEXT:    store i8*** [[ARGC_ADDR]], i8**** [[GEP_ARGC_ADDR]], align 8
+// CHECK3-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i64*, i8*** }*)* @_Z5tmainIPPcEiT_..omp_par to void (i32*, i32*, ...)*), { i64*, i8*** }* [[STRUCTARG]])
 // CHECK3-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 // CHECK3:       omp.par.outlined.exit:
 // CHECK3-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -800,21 +810,25 @@ int main (int argc, char **argv) {
 //
 //
 // CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par
-// CHECK3-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i64* [[DOTRELOADED:%.*]], i8*** [[ARGC_ADDR:%.*]]) #[[ATTR1]] {
+// CHECK3-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i64*, i8*** }* [[TMP0:%.*]]) #[[ATTR1]] {
 // CHECK3-NEXT:  omp.par.entry:
+// CHECK3-NEXT:    [[GEP__RELOADED:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[TMP0]], i32 0, i32 0
+// CHECK3-NEXT:    [[LOADGEP__RELOADED:%.*]] = load i64*, i64** [[GEP__RELOADED]], align 8
+// CHECK3-NEXT:    [[GEP_ARGC_ADDR:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[TMP0]], i32 0, i32 1
+// CHECK3-NEXT:    [[LOADGEP_ARGC_ADDR:%.*]] = load i8***, i8**** [[GEP_ARGC_ADDR]], align 8
 // CHECK3-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-// CHECK3-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-// CHECK3-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+// CHECK3-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+// CHECK3-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK3-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-// CHECK3-NEXT:    [[TMP1:%.*]] = load i64, i64* [[DOTRELOADED]], align 8
+// CHECK3-NEXT:    [[TMP2:%.*]] = load i64, i64* [[LOADGEP__RELOADED]], align 8
 // CHECK3-NEXT:    [[VAR:%.*]] = alloca double*, align 8
 // CHECK3-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 // CHECK3:       omp.par.region:
-// CHECK3-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8
-// CHECK3-NEXT:    call void @_Z3fooIPPcEvT_(i8** noundef [[TMP2]])
-// CHECK3-NEXT:    [[TMP3:%.*]] = load double*, double** [[VAR]], align 8
-// CHECK3-NEXT:    [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]]
-// CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 [[TMP4]]
+// CHECK3-NEXT:    [[TMP3:%.*]] = load i8**, i8*** [[LOADGEP_ARGC_ADDR]], align 8
+// CHECK3-NEXT:    call void @_Z3fooIPPcEvT_(i8** noundef [[TMP3]])
+// CHECK3-NEXT:    [[TMP4:%.*]] = load double*, double** [[VAR]], align 8
+// CHECK3-NEXT:    [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]]
+// CHECK3-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 [[TMP5]]
 // CHECK3-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX2]], i64 0
 // CHECK3-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]]
 // CHECK3:       omp.par.pre_finalize:
@@ -834,6 +848,7 @@ int main (int argc, char **argv) {
 // CHECK4-LABEL: define {{[^@]+}}@main
 // CHECK4-SAME: (i32 noundef [[ARGC:%.*]], i8** noundef [[ARGV:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG11:![0-9]+]] {
 // CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
 // CHECK4-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    [[ARGV_ADDR:%.*]] = alloca i8**, align 8
@@ -841,121 +856,133 @@ int main (int argc, char **argv) {
 // CHECK4-NEXT:    [[__VLA_EXPR0:%.*]] = alloca i64, align 8
 // CHECK4-NEXT:    store i32 0, i32* [[RETVAL]], align 4
 // CHECK4-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18:![0-9]+]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]]
 // CHECK4-NEXT:    store i8** [[ARGV]], i8*** [[ARGV_ADDR]], align 8
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i8*** [[ARGV_ADDR]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18]]
-// CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4, !dbg [[DBG20:![0-9]+]]
-// CHECK4-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG20]]
-// CHECK4-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG20]]
-// CHECK4-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG20]]
-// CHECK4-NEXT:    [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG20]]
-// CHECK4-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG20]]
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23:![0-9]+]]
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[VLA]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20]]
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG28:![0-9]+]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i8*** [[ARGV_ADDR]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19]]
+// CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4, !dbg [[DBG21:![0-9]+]]
+// CHECK4-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG21]]
+// CHECK4-NEXT:    [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG21]]
+// CHECK4-NEXT:    store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG21]]
+// CHECK4-NEXT:    [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG21]]
+// CHECK4-NEXT:    store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG21]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i64* [[__VLA_EXPR0]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[VLA]], metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]), !dbg [[DBG29:![0-9]+]]
 // CHECK4-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK4:       omp_parallel:
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @main..omp_par to void (i32*, i32*, ...)*), i32* [[VLA]]), !dbg [[DBG29:![0-9]+]]
+// CHECK4-NEXT:    [[GEP_VLA:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
+// CHECK4-NEXT:    store i32* [[VLA]], i32** [[GEP_VLA]], align 8
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @main..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]]), !dbg [[DBG30:![0-9]+]]
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 // CHECK4:       omp.par.outlined.exit:
 // CHECK4-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 // CHECK4:       omp.par.exit.split:
-// CHECK4-NEXT:    [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8, !dbg [[DBG30:![0-9]+]]
-// CHECK4-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(i8** noundef [[TMP3]]), !dbg [[DBG30]]
-// CHECK4-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4, !dbg [[DBG30]]
-// CHECK4-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG31:![0-9]+]]
-// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP4]]), !dbg [[DBG31]]
-// CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[RETVAL]], align 4, !dbg [[DBG31]]
-// CHECK4-NEXT:    ret i32 [[TMP5]], !dbg [[DBG31]]
+// CHECK4-NEXT:    [[TMP3:%.*]] = load i8**, i8*** [[ARGV_ADDR]], align 8, !dbg [[DBG31:![0-9]+]]
+// CHECK4-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z5tmainIPPcEiT_(i8** noundef [[TMP3]]), !dbg [[DBG31]]
+// CHECK4-NEXT:    store i32 [[CALL]], i32* [[RETVAL]], align 4, !dbg [[DBG31]]
+// CHECK4-NEXT:    [[TMP4:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG32:![0-9]+]]
+// CHECK4-NEXT:    call void @llvm.stackrestore(i8* [[TMP4]]), !dbg [[DBG32]]
+// CHECK4-NEXT:    [[TMP5:%.*]] = load i32, i32* [[RETVAL]], align 4, !dbg [[DBG32]]
+// CHECK4-NEXT:    ret i32 [[TMP5]], !dbg [[DBG32]]
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@main..omp_par
-// CHECK4-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[VLA:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG32:![0-9]+]] {
+// CHECK4-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG33:![0-9]+]] {
 // CHECK4-NEXT:  omp.par.entry:
+// CHECK4-NEXT:    [[GEP_VLA:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
+// CHECK4-NEXT:    [[LOADGEP_VLA:%.*]] = load i32*, i32** [[GEP_VLA]], align 8
 // CHECK4-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-// CHECK4-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+// CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK4-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK4-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 // CHECK4:       omp.par.region:
-// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 1, !dbg [[DBG34:![0-9]+]]
-// CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG34]]
-// CHECK4-NEXT:    call void @_Z3fooIiEvT_(i32 noundef [[TMP1]]), !dbg [[DBG34]]
-// CHECK4-NEXT:    [[TMP2:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG34]]
-// CHECK4-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[VLA]], i64 1, !dbg [[DBG34]]
-// CHECK4-NEXT:    store i32 [[TMP2]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG34]]
-// CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG34]]
+// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1, !dbg [[DBG35:![0-9]+]]
+// CHECK4-NEXT:    [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !dbg [[DBG35]]
+// CHECK4-NEXT:    call void @_Z3fooIiEvT_(i32 noundef [[TMP2]]), !dbg [[DBG35]]
+// CHECK4-NEXT:    [[TMP3:%.*]] = load i32, i32* @global, align 4, !dbg [[DBG35]]
+// CHECK4-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[LOADGEP_VLA]], i64 1, !dbg [[DBG35]]
+// CHECK4-NEXT:    store i32 [[TMP3]], i32* [[ARRAYIDX1]], align 4, !dbg [[DBG35]]
+// CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG35]]
 // CHECK4:       omp.par.pre_finalize:
-// CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG34]]
+// CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG35]]
 // CHECK4:       omp.par.outlined.exit.exitStub:
 // CHECK4-NEXT:    ret void
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z3fooIiEvT_
-// CHECK4-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat !dbg [[DBG35:![0-9]+]] {
+// CHECK4-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat !dbg [[DBG36:![0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i32, align 4
 // CHECK4-NEXT:    store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG41:![0-9]+]]
-// CHECK4-NEXT:    ret void, !dbg [[DBG41]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i32* [[ARGC_ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG42:![0-9]+]]
+// CHECK4-NEXT:    ret void, !dbg [[DBG42]]
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_
-// CHECK4-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat !dbg [[DBG44:![0-9]+]] {
+// CHECK4-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat !dbg [[DBG45:![0-9]+]] {
 // CHECK4-NEXT:  entry:
+// CHECK4-NEXT:    [[STRUCTARG:%.*]] = alloca { i64*, i8*** }, align 8
 // CHECK4-NEXT:    [[DOTRELOADED:%.*]] = alloca i64, align 8
 // CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8**, align 8
 // CHECK4-NEXT:    store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50:![0-9]+]]
-// CHECK4-NEXT:    [[TMP0:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8, !dbg [[DBG51:![0-9]+]]
-// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP0]], i64 0, !dbg [[DBG51]]
-// CHECK4-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8, !dbg [[DBG51]]
-// CHECK4-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 0, !dbg [[DBG51]]
-// CHECK4-NEXT:    [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1, !dbg [[DBG51]]
-// CHECK4-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG51]]
-// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG52:![0-9]+]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51:![0-9]+]]
+// CHECK4-NEXT:    [[TMP0:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8, !dbg [[DBG52:![0-9]+]]
+// CHECK4-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8*, i8** [[TMP0]], i64 0, !dbg [[DBG52]]
+// CHECK4-NEXT:    [[TMP1:%.*]] = load i8*, i8** [[ARRAYIDX]], align 8, !dbg [[DBG52]]
+// CHECK4-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 0, !dbg [[DBG52]]
+// CHECK4-NEXT:    [[TMP2:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1, !dbg [[DBG52]]
+// CHECK4-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG52]]
+// CHECK4-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]), !dbg [[DBG53:![0-9]+]]
 // CHECK4-NEXT:    store i64 [[TMP3]], i64* [[DOTRELOADED]], align 8
 // CHECK4-NEXT:    br label [[OMP_PARALLEL:%.*]]
 // CHECK4:       omp_parallel:
-// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64*, i8***)* @_Z5tmainIPPcEiT_..omp_par to void (i32*, i32*, ...)*), i64* [[DOTRELOADED]], i8*** [[ARGC_ADDR]]), !dbg [[DBG53:![0-9]+]]
+// CHECK4-NEXT:    [[GEP__RELOADED:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[STRUCTARG]], i32 0, i32 0
+// CHECK4-NEXT:    store i64* [[DOTRELOADED]], i64** [[GEP__RELOADED]], align 8
+// CHECK4-NEXT:    [[GEP_ARGC_ADDR:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[STRUCTARG]], i32 0, i32 1
+// CHECK4-NEXT:    store i8*** [[ARGC_ADDR]], i8**** [[GEP_ARGC_ADDR]], align 8
+// CHECK4-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i64*, i8*** }*)* @_Z5tmainIPPcEiT_..omp_par to void (i32*, i32*, ...)*), { i64*, i8*** }* [[STRUCTARG]]), !dbg [[DBG54:![0-9]+]]
 // CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 // CHECK4:       omp.par.outlined.exit:
 // CHECK4-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
 // CHECK4:       omp.par.exit.split:
-// CHECK4-NEXT:    ret i32 0, !dbg [[DBG55:![0-9]+]]
+// CHECK4-NEXT:    ret i32 0, !dbg [[DBG56:![0-9]+]]
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par
-// CHECK4-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i64* [[DOTRELOADED:%.*]], i8*** [[ARGC_ADDR:%.*]]) #[[ATTR1]] !dbg [[DBG56:![0-9]+]] {
+// CHECK4-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i64*, i8*** }* [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG57:![0-9]+]] {
 // CHECK4-NEXT:  omp.par.entry:
+// CHECK4-NEXT:    [[GEP__RELOADED:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[TMP0]], i32 0, i32 0
+// CHECK4-NEXT:    [[LOADGEP__RELOADED:%.*]] = load i64*, i64** [[GEP__RELOADED]], align 8
+// CHECK4-NEXT:    [[GEP_ARGC_ADDR:%.*]] = getelementptr { i64*, i8*** }, { i64*, i8*** }* [[TMP0]], i32 0, i32 1
+// CHECK4-NEXT:    [[LOADGEP_ARGC_ADDR:%.*]] = load i8***, i8**** [[GEP_ARGC_ADDR]], align 8
 // CHECK4-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-// CHECK4-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-// CHECK4-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+// CHECK4-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+// CHECK4-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 // CHECK4-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-// CHECK4-NEXT:    [[TMP1:%.*]] = load i64, i64* [[DOTRELOADED]], align 8
+// CHECK4-NEXT:    [[TMP2:%.*]] = load i64, i64* [[LOADGEP__RELOADED]], align 8
 // CHECK4-NEXT:    [[VAR:%.*]] = alloca double*, align 8
 // CHECK4-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 // CHECK4:       omp.par.region:
-// CHECK4-NEXT:    [[TMP2:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8, !dbg [[DBG57:![0-9]+]]
-// CHECK4-NEXT:    call void @_Z3fooIPPcEvT_(i8** noundef [[TMP2]]), !dbg [[DBG57]]
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata double** [[VAR]], metadata [[META58:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65:![0-9]+]]
-// CHECK4-NEXT:    [[TMP3:%.*]] = load double*, double** [[VAR]], align 8, !dbg [[DBG65]]
-// CHECK4-NEXT:    [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]], !dbg [[DBG65]]
-// CHECK4-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP3]], i64 [[TMP4]], !dbg [[DBG65]]
-// CHECK4-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX2]], i64 0, !dbg [[DBG65]]
-// CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG66:![0-9]+]]
+// CHECK4-NEXT:    [[TMP3:%.*]] = load i8**, i8*** [[LOADGEP_ARGC_ADDR]], align 8, !dbg [[DBG58:![0-9]+]]
+// CHECK4-NEXT:    call void @_Z3fooIPPcEvT_(i8** noundef [[TMP3]]), !dbg [[DBG58]]
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata double** [[VAR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66:![0-9]+]]
+// CHECK4-NEXT:    [[TMP4:%.*]] = load double*, double** [[VAR]], align 8, !dbg [[DBG66]]
+// CHECK4-NEXT:    [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]], !dbg [[DBG66]]
+// CHECK4-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds double, double* [[TMP4]], i64 [[TMP5]], !dbg [[DBG66]]
+// CHECK4-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX2]], i64 0, !dbg [[DBG66]]
+// CHECK4-NEXT:    br label [[OMP_PAR_PRE_FINALIZE:%.*]], !dbg [[DBG67:![0-9]+]]
 // CHECK4:       omp.par.pre_finalize:
-// CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG66]]
+// CHECK4-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG67]]
 // CHECK4:       omp.par.outlined.exit.exitStub:
 // CHECK4-NEXT:    ret void
 //
 //
 // CHECK4-LABEL: define {{[^@]+}}@_Z3fooIPPcEvT_
-// CHECK4-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR5]] comdat !dbg [[DBG67:![0-9]+]] {
+// CHECK4-SAME: (i8** noundef [[ARGC:%.*]]) #[[ATTR5]] comdat !dbg [[DBG68:![0-9]+]] {
 // CHECK4-NEXT:  entry:
 // CHECK4-NEXT:    [[ARGC_ADDR:%.*]] = alloca i8**, align 8
 // CHECK4-NEXT:    store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8
-// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG71:![0-9]+]]
-// CHECK4-NEXT:    ret void, !dbg [[DBG71]]
-//
+// CHECK4-NEXT:    call void @llvm.dbg.declare(metadata i8*** [[ARGC_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG72:![0-9]+]]
+// CHECK4-NEXT:    ret void, !dbg [[DBG72]]
 //
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index cfebdbd7b2217..85dd28ec31596 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -41,10 +41,7 @@ class OpenMPIRBuilder {
   /// Finalize the underlying module, e.g., by outlining regions.
   /// \param Fn                    The function to be finalized. If not used,
   ///                              all functions are finalized.
-  /// \param AllowExtractorSinking Flag to include sinking instructions,
-  ///                              emitted by CodeExtractor, in the
-  ///                              outlined region. Default is false.
-  void finalize(Function *Fn = nullptr, bool AllowExtractorSinking = false);
+  void finalize(Function *Fn = nullptr);
 
   /// Add attributes known for \p FnID to \p Fn.
   void addAttributes(omp::RuntimeFunction FnID, Function &Fn);
@@ -772,6 +769,7 @@ class OpenMPIRBuilder {
     using PostOutlineCBTy = std::function<void(Function &)>;
     PostOutlineCBTy PostOutlineCB;
     BasicBlock *EntryBB, *ExitBB;
+    SmallVector<Value *, 2> ExcludeArgsFromAggregate;
 
     /// Collect all blocks in between EntryBB and ExitBB in both the given
     /// vector and set.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 177ae9a47db75..d9b9035a54250 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -174,7 +174,7 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) {
 
 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
 
-void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) {
+void OpenMPIRBuilder::finalize(Function *Fn) {
   SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
   SmallVector<BasicBlock *, 32> Blocks;
   SmallVector<OutlineInfo, 16> DeferredOutlines;
@@ -193,7 +193,7 @@ void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) {
     Function *OuterFn = OI.getFunction();
     CodeExtractorAnalysisCache CEAC(*OuterFn);
     CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
-                            /* AggregateArgs */ false,
+                            /* AggregateArgs */ true,
                             /* BlockFrequencyInfo */ nullptr,
                             /* BranchProbabilityInfo */ nullptr,
                             /* AssumptionCache */ nullptr,
@@ -207,6 +207,9 @@ void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) {
     assert(Extractor.isEligible() &&
            "Expected OpenMP outlining to be possible!");
 
+    for (auto *V : OI.ExcludeArgsFromAggregate)
+      Extractor.excludeArgFromAggregate(V);
+
     Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
 
     LLVM_DEBUG(dbgs() << "After      outlining: " << *OuterFn << "\n");
@@ -225,25 +228,25 @@ void OpenMPIRBuilder::finalize(Function *Fn, bool AllowExtractorSinking) {
       BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
       assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
       assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
-      if (AllowExtractorSinking) {
-        // Move instructions from the to-be-deleted ArtificialEntry to the entry
-        // basic block of the parallel region. CodeExtractor may have sunk
-        // allocas/bitcasts for values that are solely used in the outlined
-        // region and do not escape.
-        assert(!ArtificialEntry.empty() &&
-               "Expected instructions to sink in the outlined region");
-        for (BasicBlock::iterator It = ArtificialEntry.begin(),
-                                  End = ArtificialEntry.end();
-             It != End;) {
-          Instruction &I = *It;
-          It++;
-
-          if (I.isTerminator())
-            continue;
-
-          I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
-        }
+      // Move instructions from the to-be-deleted ArtificialEntry to the entry
+      // basic block of the parallel region. CodeExtractor generates
+      // instructions to unwrap the aggregate argument and may sink
+      // allocas/bitcasts for values that are solely used in the outlined region
+      // and do not escape.
+      assert(!ArtificialEntry.empty() &&
+             "Expected instructions to add in the outlined region entry");
+      for (BasicBlock::reverse_iterator It = ArtificialEntry.rbegin(),
+                                        End = ArtificialEntry.rend();
+           It != End;) {
+        Instruction &I = *It;
+        It++;
+
+        if (I.isTerminator())
+          continue;
+
+        I.moveBefore(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
       }
+
       OI.EntryBB->moveBefore(&ArtificialEntry);
       ArtificialEntry.eraseFromParent();
     }
@@ -811,8 +814,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(
       getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
 
   auto PrivHelper = [&](Value &V) {
-    if (&V == TIDAddr || &V == ZeroAddr)
+    if (&V == TIDAddr || &V == ZeroAddr) {
+      OI.ExcludeArgsFromAggregate.push_back(&V);
       return;
+    }
 
     SetVector<Use *> Uses;
     for (Use &U : V.uses())
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 6288d2ff4b01b..80f4cbe3c8a51 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1074,8 +1074,7 @@ struct OpenMPOpt {
       BranchInst::Create(AfterBB, AfterIP.getBlock());
 
       // Perform the actual outlining.
-      OMPInfoCache.OMPBuilder.finalize(OriginalFn,
-                                       /* AllowExtractorSinking */ true);
+      OMPInfoCache.OMPBuilder.finalize(OriginalFn);
 
       Function *OutlinedFn = MergableCIs.front()->getCaller();
 
diff --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
index b4ff9d9ffee6d..075e1b86b850c 100644
--- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
@@ -4692,12 +4692,15 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge
 ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK1-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4708,20 +4711,22 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
+; CHECK1-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
-; CHECK1-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4835,12 +4840,15 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq
 ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK1-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4853,29 +4861,31 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
+; CHECK1-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
-; CHECK1-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
-; CHECK1-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4886,9 +4896,9 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK1-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK1-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_A_ADDR]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -4918,6 +4928,7 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float
 ; CHECK1-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { float*, float*, float* }, align 8
 ; CHECK1-NEXT:    [[F_RELOADED:%.*]] = alloca float, align 4
 ; CHECK1-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
 ; CHECK1-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
@@ -4925,7 +4936,13 @@ entry:
 ; CHECK1-NEXT:    store float [[F]], float* [[F_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]])
+; CHECK1-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK1-NEXT:    store float* [[F_RELOADED]], float** [[GEP_F_RELOADED]], align 8
+; CHECK1-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK1-NEXT:    store float* [[F_ADDR]], float** [[GEP_F_ADDR]], align 8
+; CHECK1-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK1-NEXT:    store float* [[P]], float** [[GEP_P]], align 8
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float*, float* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4936,30 +4953,36 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_RELOADED:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 0
+; CHECK1-NEXT:    [[LOADGEP_F_RELOADED:%.*]] = load float*, float** [[GEP_F_RELOADED]], align 8
+; CHECK1-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 1
+; CHECK1-NEXT:    [[LOADGEP_F_ADDR:%.*]] = load float*, float** [[GEP_F_ADDR]], align 8
+; CHECK1-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 2
+; CHECK1-NEXT:    [[LOADGEP_P:%.*]] = load float*, float** [[GEP_P]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK1-NEXT:    [[TMP1:%.*]] = load float, float* [[F_RELOADED]], align 4
+; CHECK1-NEXT:    [[TMP2:%.*]] = load float, float* [[LOADGEP_F_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
-; CHECK1-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK1-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
-; CHECK1-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -4970,8 +4993,8 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP1]], 0x40091EB860000000
-; CHECK1-NEXT:    store float [[ADD]], float* [[P]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP2]], 0x40091EB860000000
+; CHECK1-NEXT:    store float [[ADD]], float* [[LOADGEP_P]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -5003,13 +5026,18 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_firstprivate
 ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i64* }, align 8
 ; CHECK1-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK1-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK1-NEXT:    [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK1-NEXT:    store i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], i64** [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i64* }*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), { i32*, i64* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5022,29 +5050,33 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i64* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[TMP0]], i32 0, i32 0
+; CHECK1-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
+; CHECK1-NEXT:    [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[TMP0]], i32 0, i32 1
+; CHECK1-NEXT:    [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = load i64*, i64** [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
-; CHECK1-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
-; CHECK1-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK1-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK1-NEXT:    call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
@@ -5056,11 +5088,11 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK1-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK1-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_A_ADDR]], align 4
 ; CHECK1-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
-; CHECK1-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK1-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -5090,12 +5122,15 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_sink_lt
 ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK1-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5106,30 +5141,32 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
 ; CHECK1-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
+; CHECK1-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
-; CHECK1-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
-; CHECK1-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5140,12 +5177,12 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
-; CHECK1-NEXT:    [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
-; CHECK1-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
-; CHECK1-NEXT:    store i32 [[TMP5]], i32* [[B]], align 4
-; CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP4]])
+; CHECK1-NEXT:    [[TMP5:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK1-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
+; CHECK1-NEXT:    store i32 [[TMP6]], i32* [[B]], align 4
+; CHECK1-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP4]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -5175,6 +5212,7 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use
 ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
 ; CHECK1-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[B:%.*]] = alloca i32, align 4
@@ -5183,9 +5221,15 @@ entry:
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
+; CHECK1-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK1-NEXT:    store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK1-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK1-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK1-NEXT:    store i32* [[B]], i32** [[GEP_B]], align 8
 ; CHECK1-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5198,30 +5242,36 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_RELOADED:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
+; CHECK1-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK1-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
+; CHECK1-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
+; CHECK1-NEXT:    [[LOADGEP_B:%.*]] = load i32*, i32** [[GEP_B]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_RELOADED]], align 4
+; CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
-; CHECK1-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK1-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
-; CHECK1-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK1-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_B]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5232,9 +5282,9 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
-; CHECK1-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
+; CHECK1-NEXT:    [[TMP5:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
+; CHECK1-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_B]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -5266,6 +5316,7 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions
 ; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
 ; CHECK1-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
@@ -5273,7 +5324,11 @@ entry:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK1-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK1-NEXT:    store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8
+; CHECK1-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK1-NEXT:    store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5284,20 +5339,24 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
+; CHECK1-NEXT:    [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8
+; CHECK1-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK1-NEXT:    [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
-; CHECK1-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5340,6 +5399,7 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
 ; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
 ; CHECK1-NEXT:    [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
@@ -5349,7 +5409,13 @@ entry:
 ; CHECK1-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK1-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK1-NEXT:    store i32* [[CANCEL1_RELOADED]], i32** [[GEP_CANCEL1_RELOADED]], align 8
+; CHECK1-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK1-NEXT:    store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8
+; CHECK1-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK1-NEXT:    store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5360,30 +5426,36 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_RELOADED:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
+; CHECK1-NEXT:    [[LOADGEP_CANCEL1_RELOADED:%.*]] = load i32*, i32** [[GEP_CANCEL1_RELOADED]], align 8
+; CHECK1-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK1-NEXT:    [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8
+; CHECK1-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
+; CHECK1-NEXT:    [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CANCEL1_RELOADED]], align 4
+; CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
-; CHECK1-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK1-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
-; CHECK1-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5394,9 +5466,9 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK1-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0
 ; CHECK1-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
-; CHECK1-NEXT:    store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[LNOT_EXT]], i32* [[LOADGEP_CANCEL2_ADDR]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -5438,12 +5510,15 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_3
 ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK1-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_3..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5454,23 +5529,25 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_3..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
+; CHECK1-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
-; CHECK1-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
-; CHECK1-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5509,6 +5586,7 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq
 ; CHECK1-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32*, i32* }, align 8
 ; CHECK1-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
@@ -5518,7 +5596,15 @@ entry:
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK1-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK1-NEXT:    store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK1-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK1-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK1-NEXT:    store i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK1-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 3
+; CHECK1-NEXT:    store i32* [[ADD1_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32*, i32* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5531,42 +5617,50 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_RELOADED:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
+; CHECK1-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK1-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
+; CHECK1-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
+; CHECK1-NEXT:    [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK1-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 3
+; CHECK1-NEXT:    [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_RELOADED]], align 4
+; CHECK1-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
-; CHECK1-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK1-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split:
-; CHECK1-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK1-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
-; CHECK1-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
-; CHECK1-NEXT:    br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK1-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK1-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
+; CHECK1-NEXT:    br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK1:       omp_region.end4:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split.split.split:
-; CHECK1-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5577,9 +5671,9 @@ entry:
 ; CHECK1:       omp_region.body5:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
 ; CHECK1:       seq.par.merged2:
-; CHECK1-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
-; CHECK1-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP1]]
-; CHECK1-NEXT:    store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK1-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK1-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP2]]
+; CHECK1-NEXT:    store i32 [[ADD1]], i32* [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
@@ -5589,8 +5683,8 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
-; CHECK1-NEXT:    store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
+; CHECK1-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split:
 ; CHECK1-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -5739,12 +5833,15 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_2_unmergable_1
 ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK1-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5757,20 +5854,22 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
+; CHECK1-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
+; CHECK1-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK1-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.region:
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK1:       omp.par.merged:
-; CHECK1-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK1-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK1-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK1:       entry.split:
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5809,12 +5908,15 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5825,20 +5927,22 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined.(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..1(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -5952,12 +6056,15 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_seq..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5970,29 +6077,31 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..8(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..9(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -6003,9 +6112,9 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK2-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK2-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_A_ADDR]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -6035,6 +6144,7 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float
 ; CHECK2-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { float*, float*, float* }, align 8
 ; CHECK2-NEXT:    [[F_RELOADED:%.*]] = alloca float, align 4
 ; CHECK2-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
 ; CHECK2-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
@@ -6042,7 +6152,13 @@ entry:
 ; CHECK2-NEXT:    store float [[F]], float* [[F_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]])
+; CHECK2-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store float* [[F_RELOADED]], float** [[GEP_F_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    store float* [[F_ADDR]], float** [[GEP_F_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK2-NEXT:    store float* [[P]], float** [[GEP_P]], align 8
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { float*, float*, float* }*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), { float*, float*, float* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6053,30 +6169,36 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_RELOADED:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { float*, float*, float* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[GEP_F_RELOADED:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_F_RELOADED:%.*]] = load float*, float** [[GEP_F_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_F_ADDR:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[LOADGEP_F_ADDR:%.*]] = load float*, float** [[GEP_F_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_P:%.*]] = getelementptr { float*, float*, float* }, { float*, float*, float* }* [[TMP0]], i32 0, i32 2
+; CHECK2-NEXT:    [[LOADGEP_P:%.*]] = load float*, float** [[GEP_P]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load float, float* [[F_RELOADED]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = load float, float* [[LOADGEP_F_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..10(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[F_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..11(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], float* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_F_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -6087,8 +6209,8 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP1]], 0x40091EB860000000
-; CHECK2-NEXT:    store float [[ADD]], float* [[P]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP2]], 0x40091EB860000000
+; CHECK2-NEXT:    store float [[ADD]], float* [[LOADGEP_P]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -6120,13 +6242,18 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i64* }, align 8
 ; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = alloca i64, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]])
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    store i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], i64** [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i64* }*)* @merge_seq_firstprivate..omp_par to void (i32*, i32*, ...)*), { i32*, i64* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6139,29 +6266,33 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_firstprivate..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i64* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i64* }, { i32*, i64* }* [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC:%.*]] = load i64*, i64** [[GEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..12(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD:%.*]] = load i64, i64* [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK2-NEXT:    call void @.omp_outlined..13(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i64 [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_LOAD]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
@@ -6173,11 +6304,11 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 1
-; CHECK2-NEXT:    store i32 [[ADD]], i32* [[A_ADDR]], align 4
+; CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[LOADGEP_A_ADDR]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK2-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_A_ADDR]], align 4
 ; CHECK2-NEXT:    [[A_CASTED_SROA_0_0_INSERT_EXT:%.*]] = zext i32 [[ADD]] to i64
-; CHECK2-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    store i64 [[A_CASTED_SROA_0_0_INSERT_EXT]], i64* [[LOADGEP_A_CASTED_SROA_0_0_INSERT_EXT_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -6207,12 +6338,15 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_seq_sink_lt..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6223,30 +6357,32 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_sink_lt..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
 ; CHECK2-NEXT:    [[B:%.*]] = alloca i32, align 4
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..14(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..15(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -6257,12 +6393,12 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
-; CHECK2-NEXT:    [[TMP4:%.*]] = ptrtoint i32* [[B]] to i64
-; CHECK2-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP4]] to i32
-; CHECK2-NEXT:    store i32 [[TMP5]], i32* [[B]], align 4
-; CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP3]])
+; CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP4]])
+; CHECK2-NEXT:    [[TMP5:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK2-NEXT:    [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
+; CHECK2-NEXT:    store i32 [[TMP6]], i32* [[B]], align 4
+; CHECK2-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* noundef nonnull [[TMP4]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -6292,6 +6428,7 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
 ; CHECK2-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[B:%.*]] = alloca i32, align 4
@@ -6300,9 +6437,15 @@ entry:
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
+; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK2-NEXT:    store i32* [[B]], i32** [[GEP_B]], align 8
 ; CHECK2-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6315,30 +6458,36 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_RELOADED:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_B:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
+; CHECK2-NEXT:    [[LOADGEP_B:%.*]] = load i32*, i32** [[GEP_B]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_RELOADED]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..16(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK2-NEXT:    call void @.omp_outlined..17(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_B]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -6349,9 +6498,9 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
-; CHECK2-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
+; CHECK2-NEXT:    [[TMP5:%.*]] = bitcast i32* [[LOADGEP_B]] to i8*
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
+; CHECK2-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_B]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -6383,6 +6532,7 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions
 ; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32* }, align 8
 ; CHECK2-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
@@ -6390,7 +6540,11 @@ entry:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32* }*)* @merge_cancellable_regions..omp_par to void (i32*, i32*, ...)*), { i32*, i32* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6401,20 +6555,24 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32* }, { i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..18(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..19(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -6457,6 +6615,7 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
 ; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32* }, align 8
 ; CHECK2-NEXT:    [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
@@ -6466,7 +6625,13 @@ entry:
 ; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK2-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store i32* [[CANCEL1_RELOADED]], i32** [[GEP_CANCEL1_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    store i32* [[CANCEL1_ADDR]], i32** [[GEP_CANCEL1_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK2-NEXT:    store i32* [[CANCEL2_ADDR]], i32** [[GEP_CANCEL2_ADDR]], align 8
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32* }*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6477,30 +6642,36 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_RELOADED:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[GEP_CANCEL1_RELOADED:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_CANCEL1_RELOADED:%.*]] = load i32*, i32** [[GEP_CANCEL1_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL1_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[LOADGEP_CANCEL1_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL1_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_CANCEL2_ADDR:%.*]] = getelementptr { i32*, i32*, i32* }, { i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
+; CHECK2-NEXT:    [[LOADGEP_CANCEL2_ADDR:%.*]] = load i32*, i32** [[GEP_CANCEL2_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CANCEL1_RELOADED]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_CANCEL1_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL1_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..20(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL1_ADDR]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[CANCEL2_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..21(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_CANCEL2_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -6511,9 +6682,9 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP2]], 0
 ; CHECK2-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
-; CHECK2-NEXT:    store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[LNOT_EXT]], i32* [[LOADGEP_CANCEL2_ADDR]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -6555,12 +6726,15 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_3
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_3..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_3..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6571,23 +6745,25 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_3..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..22(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..23(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
-; CHECK2-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..24(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -6626,6 +6802,7 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq
 ; CHECK2-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32*, i32*, i32*, i32* }, align 8
 ; CHECK2-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
@@ -6635,7 +6812,15 @@ entry:
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store i32* [[A_RELOADED]], i32** [[GEP_A_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 1
+; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 2
+; CHECK2-NEXT:    store i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[STRUCTARG]], i32 0, i32 3
+; CHECK2-NEXT:    store i32* [[ADD1_SEQ_OUTPUT_ALLOC]], i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, i32*, i32*, i32* }*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), { i32*, i32*, i32*, i32* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6648,42 +6833,50 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_RELOADED:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32*, i32*, i32*, i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[GEP_A_RELOADED:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_RELOADED:%.*]] = load i32*, i32** [[GEP_A_RELOADED]], align 8
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 1
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    [[GEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 2
+; CHECK2-NEXT:    [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD_SEQ_OUTPUT_ALLOC]], align 8
+; CHECK2-NEXT:    [[GEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = getelementptr { i32*, i32*, i32*, i32* }, { i32*, i32*, i32*, i32* }* [[TMP0]], i32 0, i32 3
+; CHECK2-NEXT:    [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC:%.*]] = load i32*, i32** [[GEP_ADD1_SEQ_OUTPUT_ALLOC]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_RELOADED]], align 4
+; CHECK2-NEXT:    [[TMP2:%.*]] = load i32, i32* [[LOADGEP_A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..25(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split:
-; CHECK2-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..26(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
-; CHECK2-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
-; CHECK2-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
-; CHECK2-NEXT:    br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK2-NEXT:    [[TMP5:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK2-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
+; CHECK2-NEXT:    br i1 [[TMP6]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK2:       omp_region.end4:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split.split.split:
-; CHECK2-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..27(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
@@ -6694,9 +6887,9 @@ entry:
 ; CHECK2:       omp_region.body5:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
 ; CHECK2:       seq.par.merged2:
-; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
-; CHECK2-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP1]]
-; CHECK2-NEXT:    store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP2]]
+; CHECK2-NEXT:    store i32 [[ADD1]], i32* [[LOADGEP_ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY5_SPLIT:%.*]]
@@ -6706,8 +6899,8 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
-; CHECK2-NEXT:    store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP2]], 1
+; CHECK2-NEXT:    store i32 [[ADD]], i32* [[LOADGEP_ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
 ; CHECK2-NEXT:    br label [[OMP_REGION_BODY_SPLIT:%.*]]
@@ -6856,12 +7049,15 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[STRUCTARG:%.*]] = alloca { i32* }, align 8
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]])
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[STRUCTARG]], i32 0, i32 0
+; CHECK2-NEXT:    store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB2]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32* }*)* @merge_2_unmergable_1..omp_par to void (i32*, i32*, ...)*), { i32* }* [[STRUCTARG]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6874,20 +7070,22 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_2_unmergable_1..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], { i32* }* [[TMP0:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
+; CHECK2-NEXT:    [[GEP_A_ADDR:%.*]] = getelementptr { i32* }, { i32* }* [[TMP0]], i32 0, i32 0
+; CHECK2-NEXT:    [[LOADGEP_A_ADDR:%.*]] = load i32*, i32** [[GEP_A_ADDR]], align 8
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
-; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
-; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TID_ADDR]], align 4
+; CHECK2-NEXT:    store i32 [[TMP1]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.region:
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED:%.*]]
 ; CHECK2:       omp.par.merged:
-; CHECK2-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..37(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB3]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
+; CHECK2-NEXT:    call void @.omp_outlined..38(i32* [[TID_ADDR]], i32* [[ZERO_ADDR]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[LOADGEP_A_ADDR]])
 ; CHECK2-NEXT:    br label [[ENTRY_SPLIT:%.*]]
 ; CHECK2:       entry.split:
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION_SPLIT:%.*]]
diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index a4d82e07dc139..00aa25e8330d5 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -207,6 +207,64 @@ template <typename InstTy> static Value *findStoredValue(Value *AllocaValue) {
   return Store->getValueOperand();
 }
 
+// Returns the value stored in the aggregate argument of an outlined function,
+// or nullptr if it is not found.
+static Value *findStoredValueInAggregateAt(LLVMContext &Ctx, Value *Aggregate,
+                                           unsigned Idx) {
+  GetElementPtrInst *GEPAtIdx = nullptr;
+  // Find GEP instruction at that index.
+  for (User *Usr : Aggregate->users()) {
+    GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Usr);
+    if (!GEP)
+      continue;
+
+    if (GEP->getOperand(2) != ConstantInt::get(Type::getInt32Ty(Ctx), Idx))
+      continue;
+
+    EXPECT_EQ(GEPAtIdx, nullptr);
+    GEPAtIdx = GEP;
+  }
+
+  EXPECT_NE(GEPAtIdx, nullptr);
+  EXPECT_EQ(GEPAtIdx->getNumUses(), 1U);
+
+  // Find the value stored to the aggregate.
+  StoreInst *StoreToAgg = dyn_cast<StoreInst>(*GEPAtIdx->user_begin());
+  Value *StoredAggValue = StoreToAgg->getValueOperand();
+
+  Value *StoredValue = nullptr;
+
+  // Find the value stored to the value stored in the aggregate.
+  for (User *Usr : StoredAggValue->users()) {
+    StoreInst *Store = dyn_cast<StoreInst>(Usr);
+    if (!Store)
+      continue;
+
+    if (Store->getPointerOperand() != StoredAggValue)
+      continue;
+
+    EXPECT_EQ(StoredValue, nullptr);
+    StoredValue = Store->getValueOperand();
+  }
+
+  return StoredValue;
+}
+
+// Returns the aggregate that the value is originating from.
+static Value *findAggregateFromValue(Value *V) {
+  // Expects a load instruction that loads from the aggregate.
+  LoadInst *Load = dyn_cast<LoadInst>(V);
+  EXPECT_NE(Load, nullptr);
+  // Find the GEP instruction used in the load instruction.
+  GetElementPtrInst *GEP =
+      dyn_cast<GetElementPtrInst>(Load->getPointerOperand());
+  EXPECT_NE(GEP, nullptr);
+  // Find the aggregate used in the GEP instruction.
+  Value *Aggregate = GEP->getPointerOperand();
+
+  return Aggregate;
+}
+
 TEST_F(OpenMPIRBuilderTest, CreateBarrier) {
   OpenMPIRBuilder OMPBuilder(*M);
   OMPBuilder.initialize();
@@ -581,8 +639,9 @@ TEST_F(OpenMPIRBuilderTest, ParallelSimple) {
   EXPECT_EQ(ForkCI->getArgOperand(1),
             ConstantInt::get(Type::getInt32Ty(Ctx), 1U));
   EXPECT_EQ(ForkCI->getArgOperand(2), Usr);
-  EXPECT_EQ(findStoredValue<AllocaInst>(ForkCI->getArgOperand(3)),
-            F->arg_begin());
+  Value *StoredValue =
+      findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0);
+  EXPECT_EQ(StoredValue, F->arg_begin());
 }
 
 TEST_F(OpenMPIRBuilderTest, ParallelNested) {
@@ -906,7 +965,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
   EXPECT_TRUE(isa<GlobalVariable>(ForkCI->getArgOperand(0)));
   EXPECT_EQ(ForkCI->getArgOperand(1),
             ConstantInt::get(Type::getInt32Ty(Ctx), 1));
-  Value *StoredForkArg = findStoredValue<AllocaInst>(ForkCI->getArgOperand(3));
+  Value *StoredForkArg =
+      findStoredValueInAggregateAt(Ctx, ForkCI->getArgOperand(3), 0);
   EXPECT_EQ(StoredForkArg, F->arg_begin());
 
   EXPECT_EQ(DirectCI->getCalledFunction(), OutlinedFn);
@@ -914,7 +974,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelIfCond) {
   EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(0)));
   EXPECT_TRUE(isa<AllocaInst>(DirectCI->getArgOperand(1)));
   Value *StoredDirectArg =
-      findStoredValue<AllocaInst>(DirectCI->getArgOperand(2));
+      findStoredValueInAggregateAt(Ctx, DirectCI->getArgOperand(2), 0);
   EXPECT_EQ(StoredDirectArg, F->arg_begin());
 }
 
@@ -1045,6 +1105,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
   Type *I32PtrTy = Type::getInt32PtrTy(M->getContext());
   Type *StructTy = StructType::get(I32Ty, I32PtrTy);
   Type *StructPtrTy = StructTy->getPointerTo();
+  StructType *ArgStructTy =
+      StructType::get(I32PtrTy, StructPtrTy, I32PtrTy, StructPtrTy);
   Type *VoidTy = Type::getVoidTy(M->getContext());
   FunctionCallee RetI32Func = M->getOrInsertFunction("ret_i32", I32Ty);
   FunctionCallee TakeI32Func =
@@ -1096,21 +1158,7 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
 
   Type *Arg2Type = OutlinedFn->getArg(2)->getType();
   EXPECT_TRUE(Arg2Type->isPointerTy());
-  EXPECT_TRUE(cast<PointerType>(Arg2Type)->isOpaqueOrPointeeTypeMatches(I32Ty));
-
-  // Arguments that need to be passed through pointers and reloaded will get
-  // used earlier in the functions and therefore will appear first in the
-  // argument list after outlining.
-  Type *Arg3Type = OutlinedFn->getArg(3)->getType();
-  EXPECT_TRUE(Arg3Type->isPointerTy());
-  EXPECT_TRUE(
-      cast<PointerType>(Arg3Type)->isOpaqueOrPointeeTypeMatches(StructTy));
-
-  Type *Arg4Type = OutlinedFn->getArg(4)->getType();
-  EXPECT_EQ(Arg4Type, I32PtrTy);
-
-  Type *Arg5Type = OutlinedFn->getArg(5)->getType();
-  EXPECT_EQ(Arg5Type, StructPtrTy);
+  EXPECT_EQ(Arg2Type->getPointerElementType(), ArgStructTy);
 }
 
 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {
@@ -3031,7 +3079,7 @@ static bool isValueReducedToFuncArg(Value *V, BasicBlock *BB) {
     return false;
 
   return Store->getPointerOperand() == GlobalLoad->getPointerOperand() &&
-         isa<Argument>(GlobalLoad->getPointerOperand());
+         isa<Argument>(findAggregateFromValue(GlobalLoad->getPointerOperand()));
 }
 
 /// Finds among users of Ptr a pair of GEP instructions with indices [0, 0] and
@@ -3328,9 +3376,11 @@ TEST_F(OpenMPIRBuilderTest, CreateReductions) {
   auto *SecondAtomic =
       findSingleUserInBlock<AtomicRMWInst>(SecondLoad, AtomicBB);
   ASSERT_NE(FirstAtomic, nullptr);
-  EXPECT_TRUE(isa<Argument>(FirstAtomic->getPointerOperand()));
+  Value *AtomicStorePointer = FirstAtomic->getPointerOperand();
+  EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
   ASSERT_NE(SecondAtomic, nullptr);
-  EXPECT_TRUE(isa<Argument>(SecondAtomic->getPointerOperand()));
+  AtomicStorePointer = SecondAtomic->getPointerOperand();
+  EXPECT_TRUE(isa<Argument>(findAggregateFromValue(AtomicStorePointer)));
 
   // Check that the separate reduction function also performs (non-atomic)
   // reductions after extracting reduction variables from its arguments.

From 5c1f7b296ac0dddeca02891976e6ab5cfc006719 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Wed, 26 Jan 2022 10:53:50 +0800
Subject: [PATCH 640/946] [C++20] [Modules] Only check decls under namespace
 scope in CheckRedeclarationExported

Since only the decls inhabit in a namespace scope could be exported, it
is not meaningful to check it in CheckRedeclarationExported, which
implements [module.interface]/p6.

Reviewed By: urnathan

Differential Revision: https://reviews.llvm.org/D118120
---
 clang/lib/Sema/SemaDecl.cpp                   | 13 ++++++++++++
 clang/test/CXX/module/module.interface/p6.cpp | 21 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 673631a12eee0..7c5f6b318e973 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -1635,6 +1635,19 @@ bool Sema::CheckRedeclarationModuleOwnership(NamedDecl *New, NamedDecl *Old) {
 // A redeclaration of an entity X is implicitly exported if X was introduced by
 // an exported declaration; otherwise it shall not be exported.
 bool Sema::CheckRedeclarationExported(NamedDecl *New, NamedDecl *Old) {
+  // [module.interface]p1:
+  // An export-declaration shall inhabit a namespace scope.
+  //
+  // So it is meaningless to talk about redeclaration which is not at namespace
+  // scope.
+  if (!New->getLexicalDeclContext()
+           ->getNonTransparentContext()
+           ->isFileContext() ||
+      !Old->getLexicalDeclContext()
+           ->getNonTransparentContext()
+           ->isFileContext())
+    return false;
+
   bool IsNewExported = New->isInExportDeclContext();
   bool IsOldExported = Old->isInExportDeclContext();
 
diff --git a/clang/test/CXX/module/module.interface/p6.cpp b/clang/test/CXX/module/module.interface/p6.cpp
index a696851ccbff4..070aa62f5800a 100644
--- a/clang/test/CXX/module/module.interface/p6.cpp
+++ b/clang/test/CXX/module/module.interface/p6.cpp
@@ -91,3 +91,24 @@ template <typename T>
 T TemplVar; // expected-note {{previous declaration is here}}
 export template <typename T>
 T TemplVar; // expected-error {{cannot export redeclaration 'TemplVar' here since the previous declaration is not exported}}
+
+// Test the compiler wouldn't complain about the redeclaration of friend in exported class.
+namespace Friend {
+template <typename T>
+class bar;
+class gua;
+template <typename T>
+void hello();
+void hi();
+export class foo;
+bool operator<(const foo &a, const foo &b);
+export class foo {
+  template <typename T>
+  friend class bar;
+  friend class gua;
+  template <typename T>
+  friend void hello();
+  friend void hi();
+  friend bool operator<(const foo &a, const foo &b);
+};
+} // namespace Friend

From 08b29b175b1561af181e768587ffaf5f2bf4eb8d Mon Sep 17 00:00:00 2001
From: luxufan <932494295@qq.com>
Date: Fri, 14 Jan 2022 22:03:23 +0800
Subject: [PATCH 641/946] [MC] Put the Pending Fixups into location symbol's
 fragment

Differential Revision: https://reviews.llvm.org/D117317
---
 llvm/lib/MC/MCObjectStreamer.cpp     | 24 +++++++++++++++++++++++-
 llvm/test/MC/RISCV/reloc-directive.s | 25 +++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 6604d7988c4ca..ca462b3e558c9 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -120,7 +120,29 @@ void MCObjectStreamer::resolvePendingFixups() {
     }
     flushPendingLabels(PendingFixup.DF, PendingFixup.DF->getContents().size());
     PendingFixup.Fixup.setOffset(PendingFixup.Sym->getOffset());
-    PendingFixup.DF->getFixups().push_back(PendingFixup.Fixup);
+
+    // If the location symbol to relocate is in MCEncodedFragmentWithFixups,
+    // put the Fixup into location symbol's fragment. Otherwise
+    // put into PendingFixup.DF
+    MCFragment *SymFragment = PendingFixup.Sym->getFragment();
+    switch (SymFragment->getKind()) {
+    case MCFragment::FT_Relaxable:
+    case MCFragment::FT_Dwarf:
+    case MCFragment::FT_PseudoProbe:
+      cast<MCEncodedFragmentWithFixups<8, 1>>(SymFragment)
+          ->getFixups()
+          .push_back(PendingFixup.Fixup);
+      break;
+    case MCFragment::FT_Data:
+    case MCFragment::FT_CVDefRange:
+      cast<MCEncodedFragmentWithFixups<32, 4>>(SymFragment)
+          ->getFixups()
+          .push_back(PendingFixup.Fixup);
+      break;
+    default:
+      PendingFixup.DF->getFixups().push_back(PendingFixup.Fixup);
+      break;
+    }
   }
   PendingFixups.clear();
 }
diff --git a/llvm/test/MC/RISCV/reloc-directive.s b/llvm/test/MC/RISCV/reloc-directive.s
index 98c0efd06b44d..d4c20cde060bb 100644
--- a/llvm/test/MC/RISCV/reloc-directive.s
+++ b/llvm/test/MC/RISCV/reloc-directive.s
@@ -22,6 +22,19 @@
 # CHECK-NEXT: 0x0 R_RISCV_NONE - 0x9
 # CHECK-NEXT: 0x0 R_RISCV_32 - 0x9
 # CHECK-NEXT: 0x0 R_RISCV_64 - 0x9
+
+# CHECK:      Section ({{.*}}) .rela.data {
+# CHECK-NEXT:   0x0 R_RISCV_32 - 0x6
+# CHECK-NEXT: }
+
+# CHECK:      Section ({{.*}}) .rela.debug_line {
+# CHECK-NEXT:   0x0 R_RISCV_32 - 0x6
+# CHECK-NEXT: }
+
+# CHECK:      Section ({{.*}}) .rela.pseudoprobe {
+# CHECK-NEXT:   0x0 R_RISCV_32 - 0x6
+# CHECK-NEXT: }
+
 .text
   ret
   nop
@@ -38,9 +51,21 @@
   .reloc 0, BFD_RELOC_32, 9
   .reloc 0, BFD_RELOC_64, 9
 
+  .reloc foo, R_RISCV_32, 6
+  .reloc line, R_RISCV_32, 6
+  .reloc probe, R_RISCV_32, 6
+
 .data
 .globl foo
 foo:
   .word 0
   .word 0
   .word 0
+
+.section    .debug_line,"",@progbits
+line:
+  .word 0
+
+.section    .pseudoprobe,"",@progbits
+probe:
+  .word 0

From 227d18b3a87a7bb544b76276a77def10294d1f0a Mon Sep 17 00:00:00 2001
From: Puyan Lotfi <puyan@puyan.org>
Date: Tue, 25 Jan 2022 19:15:00 -0800
Subject: [PATCH 642/946] [lld][macho][NFC] Make MachO/start-end.s test less
 britle by checking for _main:

In start-end.s there is a lit check line `# SEG: _main` to begin the
check at the start of the function main where `_main` is the Darwin name
mangling for C main. Because the text file that FileCheck is getting as
input has the path of the compiler build in it from llvm-mc and
llvm-objdump, and because of the lack of a trailing colon in this check
line we end up inadvertently matching against the line of text with the
compiler path in it in the case where said path contains "_main" some
place. This can be very likely if the compiler branch has "main" or
"_main" in it.

To fix this I include the training : since that will match on the
function label and not the path line.
---
 lld/test/MachO/start-end.s | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/test/MachO/start-end.s b/lld/test/MachO/start-end.s
index f8f389ddf0ac0..ba02a1a643270 100644
--- a/lld/test/MachO/start-end.s
+++ b/lld/test/MachO/start-end.s
@@ -276,7 +276,7 @@
 # SEG-NEXT:   vmaddr 0x[[#%x, ASDFSTART:]]
 # SEG-NEXT:   vmsize 0x0000000000000000
 
-# SEG: _main
+# SEG: _main:
 
 ## segment$start$__TEXT / segment$end$__TEXT
 # SEG:      [[#%x, PC1:]]:

From f2c2333b6d2f0a00b02143fc3914462ebda5a015 Mon Sep 17 00:00:00 2001
From: ly <liuyang7164@163.com>
Date: Wed, 26 Jan 2022 11:49:50 +0800
Subject: [PATCH 643/946] [NFC][llvm] Align the comment

Reviewed By: luismarques

Differential Revision: https://reviews.llvm.org/D116579
---
 llvm/include/llvm/CodeGen/SDNodeProperties.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/SDNodeProperties.td b/llvm/include/llvm/CodeGen/SDNodeProperties.td
index d25e0bda26a95..3cb304f47f4b9 100644
--- a/llvm/include/llvm/CodeGen/SDNodeProperties.td
+++ b/llvm/include/llvm/CodeGen/SDNodeProperties.td
@@ -1,4 +1,4 @@
-//===- SDNodeProperties.td - Common code for DAG isels ---*- tablegen -*-===//
+//===- SDNodeProperties.td - Common code for DAG isels -----*- tablegen -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

From f65651cc8aa87ba3a505ba3edebf81a9797685a2 Mon Sep 17 00:00:00 2001
From: Micah Weston <micahsweston@gmail.com>
Date: Wed, 26 Jan 2022 04:21:48 +0000
Subject: [PATCH 644/946] [AArch64] Fixes ADD/SUB opt bug and abstracts shared
 behavior in MIPeepholeOpt for ADD, SUB, and AND.

This fixes a bug where (SUBREG_TO_REG 0 (MOVi32imm <negative-number>) sub_32)
would generate invalid code since the top 32-bits were not zeroed when inspecting the
immediate value. A new test was added for this case.

Change to abstract shared behavior in MIPeepholeOpt. Both
visitAND and visitADDSUB attempt to split an RR instruction with an immediate
operand into two RI instructions with the immediate split.

The differing behavior lies in how the immediate is split into two pieces and
how the new instructions are built. The rest of the behavior (adding new VRegs,
checking for the MOVImm, constraining reg classes, removing old intructions)
are shared between the operations.

The new helper function splitTwoPartImm implements the shared behavior and
delegates differing behavior to two function objects passed by the caller.
One function object splits the immediate into two values and returns the
opcode to use if it is a valid split. The other function object builds
the new instructions.

I felt this abstraction would help since I believe it will help reduce the
code repetition when adding new instructions of the pattern, such as
SUBS for this conditional optimization.

Tested it locally by running check all with compiler-rt, mlir, clang-tools-extra,
flang, llvm, and clang enabled.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D118000
---
 .../Target/AArch64/AArch64MIPeepholeOpt.cpp   | 275 ++++++++++--------
 llvm/test/CodeGen/AArch64/addsub.ll           |  17 ++
 2 files changed, 166 insertions(+), 126 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
index 0ddfc717e47f3..1fc5617b49f66 100644
--- a/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
+++ b/llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp
@@ -37,6 +37,7 @@
 #include "AArch64ExpandImm.h"
 #include "AArch64InstrInfo.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
@@ -55,17 +56,44 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
   }
 
   const AArch64InstrInfo *TII;
+  const AArch64RegisterInfo *TRI;
   MachineLoopInfo *MLI;
   MachineRegisterInfo *MRI;
 
+  template <typename T>
+  using SplitAndOpcFunc =
+      std::function<Optional<unsigned>(T, unsigned, T &, T &)>;
+  using BuildMIFunc =
+      std::function<void(MachineInstr &, unsigned, unsigned, unsigned, Register,
+                         Register, Register)>;
+
+  /// For instructions where an immediate operand could be split into two
+  /// separate immediate instructions, use the splitTwoPartImm two handle the
+  /// optimization.
+  ///
+  /// To implement, the following function types must be passed to
+  /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
+  /// splitting the immediate is valid and returns the associated new opcode. A
+  /// BuildMIFunc must be implemented to build the two immediate instructions.
+  ///
+  /// Example Pattern (where IMM would require 2+ MOV instructions):
+  ///     %dst = <Instr>rr %src IMM [...]
+  /// becomes:
+  ///     %tmp = <Instr>ri %src (encode half IMM) [...]
+  ///     %dst = <Instr>ri %tmp (encode half IMM) [...]
+  template <typename T>
+  bool splitTwoPartImm(MachineInstr &MI,
+                       SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
+                       SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
+
   bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
                         MachineInstr *&SubregToRegMI);
 
   template <typename T>
-  bool visitADDSUB(MachineInstr &MI,
-                   SmallSetVector<MachineInstr *, 8> &ToBeRemoved, bool IsAdd);
+  bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
+                   SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
   template <typename T>
-  bool visitAND(MachineInstr &MI,
+  bool visitAND(unsigned Opc, MachineInstr &MI,
                 SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
   bool visitORR(MachineInstr &MI,
                 SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
@@ -129,7 +157,8 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
 
 template <typename T>
 bool AArch64MIPeepholeOpt::visitAND(
-    MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
+    unsigned Opc, MachineInstr &MI,
+    SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
   // Try below transformation.
   //
   // MOVi32imm + ANDWrr ==> ANDWri + ANDWri
@@ -140,59 +169,25 @@ bool AArch64MIPeepholeOpt::visitAND(
   // bitmask immediates. It makes only two AND instructions intead of multiple
   // mov + and instructions.
 
-  unsigned RegSize = sizeof(T) * 8;
-  assert((RegSize == 32 || RegSize == 64) &&
-         "Invalid RegSize for AND bitmask peephole optimization");
-
-  // Perform several essential checks against current MI.
-  MachineInstr *MovMI = nullptr, *SubregToRegMI = nullptr;
-  if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
-    return false;
-
-  // Split the bitmask immediate into two.
-  T UImm = static_cast<T>(MovMI->getOperand(1).getImm());
-  // For the 32 bit form of instruction, the upper 32 bits of the destination
-  // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
-  // of UImm to zero.
-  if (SubregToRegMI)
-    UImm &= 0xFFFFFFFF;
-  T Imm1Enc;
-  T Imm2Enc;
-  if (!splitBitmaskImm(UImm, RegSize, Imm1Enc, Imm2Enc))
-    return false;
-
-  // Create new AND MIs.
-  DebugLoc DL = MI.getDebugLoc();
-  MachineBasicBlock *MBB = MI.getParent();
-  const TargetRegisterClass *ANDImmRC =
-      (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
-  Register DstReg = MI.getOperand(0).getReg();
-  Register SrcReg = MI.getOperand(1).getReg();
-  Register NewTmpReg = MRI->createVirtualRegister(ANDImmRC);
-  Register NewDstReg = MRI->createVirtualRegister(ANDImmRC);
-  unsigned Opcode = (RegSize == 32) ? AArch64::ANDWri : AArch64::ANDXri;
-
-  MRI->constrainRegClass(NewTmpReg, MRI->getRegClass(SrcReg));
-  BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
-      .addReg(SrcReg)
-      .addImm(Imm1Enc);
-
-  MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
-  BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
-      .addReg(NewTmpReg)
-      .addImm(Imm2Enc);
-
-  MRI->replaceRegWith(DstReg, NewDstReg);
-  // replaceRegWith changes MI's definition register. Keep it for SSA form until
-  // deleting MI.
-  MI.getOperand(0).setReg(DstReg);
-
-  ToBeRemoved.insert(&MI);
-  if (SubregToRegMI)
-    ToBeRemoved.insert(SubregToRegMI);
-  ToBeRemoved.insert(MovMI);
-
-  return true;
+  return splitTwoPartImm<T>(
+      MI, ToBeRemoved,
+      [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<unsigned> {
+        if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
+          return Opc;
+        return None;
+      },
+      [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
+                   unsigned Imm1, Register SrcReg, Register NewTmpReg,
+                   Register NewDstReg) {
+        DebugLoc DL = MI.getDebugLoc();
+        MachineBasicBlock *MBB = MI.getParent();
+        BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
+            .addReg(SrcReg)
+            .addImm(Imm0);
+        BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
+            .addReg(NewTmpReg)
+            .addImm(Imm1);
+      });
 }
 
 bool AArch64MIPeepholeOpt::visitORR(
@@ -260,8 +255,8 @@ static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
 
 template <typename T>
 bool AArch64MIPeepholeOpt::visitADDSUB(
-    MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
-    bool IsAdd) {
+    unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
+    SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
   // Try below transformation.
   //
   // MOVi32imm + ADDWrr ==> ADDWri + ADDWri
@@ -275,66 +270,30 @@ bool AArch64MIPeepholeOpt::visitADDSUB(
   // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
   // multiple `mov` + `and/sub` instructions.
 
-  unsigned RegSize = sizeof(T) * 8;
-  assert((RegSize == 32 || RegSize == 64) &&
-         "Invalid RegSize for legal add/sub immediate peephole optimization");
-
-  // Perform several essential checks against current MI.
-  MachineInstr *MovMI, *SubregToRegMI;
-  if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
-    return false;
-
-  // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
-  T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
-  unsigned Opcode;
-  if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) {
-    if (IsAdd)
-      Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
-    else
-      Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
-  } else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) {
-    if (IsAdd)
-      Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
-    else
-      Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
-  } else {
-    return false;
-  }
-
-  // Create new ADD/SUB MIs.
-  DebugLoc DL = MI.getDebugLoc();
-  MachineBasicBlock *MBB = MI.getParent();
-  const TargetRegisterClass *RC =
-      (RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
-  Register DstReg = MI.getOperand(0).getReg();
-  Register SrcReg = MI.getOperand(1).getReg();
-  Register NewTmpReg = MRI->createVirtualRegister(RC);
-  Register NewDstReg = MRI->createVirtualRegister(RC);
-
-  MRI->constrainRegClass(SrcReg, RC);
-  BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
-      .addReg(SrcReg)
-      .addImm(Imm0)
-      .addImm(12);
-
-  MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
-  BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
-      .addReg(NewTmpReg)
-      .addImm(Imm1)
-      .addImm(0);
-
-  MRI->replaceRegWith(DstReg, NewDstReg);
-  // replaceRegWith changes MI's definition register. Keep it for SSA form until
-  // deleting MI.
-  MI.getOperand(0).setReg(DstReg);
-
-  // Record the MIs need to be removed.
-  ToBeRemoved.insert(&MI);
-  if (SubregToRegMI)
-    ToBeRemoved.insert(SubregToRegMI);
-  ToBeRemoved.insert(MovMI);
-
-  return true;
+  return splitTwoPartImm<T>(
+      MI, ToBeRemoved,
+      [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
+                       T &Imm1) -> Optional<unsigned> {
+        if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
+          return PosOpc;
+        if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
+          return NegOpc;
+        return None;
+      },
+      [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
+                   unsigned Imm1, Register SrcReg, Register NewTmpReg,
+                   Register NewDstReg) {
+        DebugLoc DL = MI.getDebugLoc();
+        MachineBasicBlock *MBB = MI.getParent();
+        BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
+            .addReg(SrcReg)
+            .addImm(Imm0)
+            .addImm(12);
+        BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
+            .addReg(NewTmpReg)
+            .addImm(Imm1)
+            .addImm(0);
+      });
 }
 
 // Checks if the corresponding MOV immediate instruction is applicable for
@@ -377,15 +336,75 @@ bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
   return true;
 }
 
+template <typename T>
+bool AArch64MIPeepholeOpt::splitTwoPartImm(
+    MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
+    SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
+  unsigned RegSize = sizeof(T) * 8;
+  assert((RegSize == 32 || RegSize == 64) &&
+         "Invalid RegSize for legal immediate peephole optimization");
+
+  // Perform several essential checks against current MI.
+  MachineInstr *MovMI, *SubregToRegMI;
+  if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
+    return false;
+
+  // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
+  T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
+  // For the 32 bit form of instruction, the upper 32 bits of the destination
+  // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
+  // of Imm to zero. This is essential if the Immediate value was a negative
+  // number since it was sign extended when we assign to the 64-bit Imm.
+  if (SubregToRegMI)
+    Imm &= 0xFFFFFFFF;
+  unsigned Opcode;
+  if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
+    Opcode = R.getValue();
+  else
+    return false;
+
+  // Create new ADD/SUB MIs.
+  MachineFunction *MF = MI.getMF();
+  const TargetRegisterClass *RC =
+      TII->getRegClass(TII->get(Opcode), 0, TRI, *MF);
+  const TargetRegisterClass *ORC =
+      TII->getRegClass(TII->get(Opcode), 1, TRI, *MF);
+  Register DstReg = MI.getOperand(0).getReg();
+  Register SrcReg = MI.getOperand(1).getReg();
+  Register NewTmpReg = MRI->createVirtualRegister(RC);
+  Register NewDstReg = MRI->createVirtualRegister(RC);
+
+  MRI->constrainRegClass(SrcReg, RC);
+  MRI->constrainRegClass(NewTmpReg, ORC);
+  MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
+
+  BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
+
+  MRI->replaceRegWith(DstReg, NewDstReg);
+  // replaceRegWith changes MI's definition register. Keep it for SSA form until
+  // deleting MI.
+  MI.getOperand(0).setReg(DstReg);
+
+  // Record the MIs need to be removed.
+  ToBeRemoved.insert(&MI);
+  if (SubregToRegMI)
+    ToBeRemoved.insert(SubregToRegMI);
+  ToBeRemoved.insert(MovMI);
+
+  return true;
+}
+
 bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
 
   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
+  TRI = static_cast<const AArch64RegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
   MLI = &getAnalysis<MachineLoopInfo>();
   MRI = &MF.getRegInfo();
 
-  assert (MRI->isSSA() && "Expected to be run on SSA form!");
+  assert(MRI->isSSA() && "Expected to be run on SSA form!");
 
   bool Changed = false;
   SmallSetVector<MachineInstr *, 8> ToBeRemoved;
@@ -396,25 +415,29 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
       default:
         break;
       case AArch64::ANDWrr:
-        Changed = visitAND<uint32_t>(MI, ToBeRemoved);
+        Changed = visitAND<uint32_t>(AArch64::ANDWri, MI, ToBeRemoved);
         break;
       case AArch64::ANDXrr:
-        Changed = visitAND<uint64_t>(MI, ToBeRemoved);
+        Changed = visitAND<uint64_t>(AArch64::ANDXri, MI, ToBeRemoved);
         break;
       case AArch64::ORRWrs:
         Changed = visitORR(MI, ToBeRemoved);
         break;
       case AArch64::ADDWrr:
-        Changed = visitADDSUB<uint32_t>(MI, ToBeRemoved, true);
+        Changed = visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI,
+                                        ToBeRemoved);
         break;
       case AArch64::SUBWrr:
-        Changed = visitADDSUB<uint32_t>(MI, ToBeRemoved, false);
+        Changed = visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI,
+                                        ToBeRemoved);
         break;
       case AArch64::ADDXrr:
-        Changed = visitADDSUB<uint64_t>(MI, ToBeRemoved, true);
+        Changed = visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI,
+                                        ToBeRemoved);
         break;
       case AArch64::SUBXrr:
-        Changed = visitADDSUB<uint64_t>(MI, ToBeRemoved, false);
+        Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI,
+                                        ToBeRemoved);
         break;
       }
     }
diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll
index 37c9e4c5c6fe1..b95c15ac6d073 100644
--- a/llvm/test/CodeGen/AArch64/addsub.ll
+++ b/llvm/test/CodeGen/AArch64/addsub.ll
@@ -389,4 +389,21 @@ define i1 @uadd_add(i8 %a, i8 %b, i8* %p) {
   ret i1 %e1
 }
 
+; This is a unique edge case that will generate the following MIR
+;   MOVi32imm -1000000
+;   SUBREG_TO_REG 0, killed %1, %subreg.sub_32
+; When using a 64-bit unsigned for the "-1000000" immediate, the code
+; must make sure to zero out the top 32 bits since SUBREG_TO_REG is
+; zero extending the value
+define i64 @addl_0x80000000(i64 %a) {
+; CHECK-LABEL: addl_0x80000000:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #48576
+; CHECK-NEXT:    movk w8, #65520, lsl #16
+; CHECK-NEXT:    add x0, x0, x8
+; CHECK-NEXT:    ret
+  %b = add i64 %a, 4293967296
+  ret i64 %b
+}
+
 ; TODO: adds/subs

From ad0345aed1e76f98c6ad55372b675c5647e16d28 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Wed, 26 Jan 2022 13:23:21 +0800
Subject: [PATCH 645/946] [PowerPC] Emit gnu_attribute according to float-abi
 metadata

According to GNU as documentation, PowerPC supports some .gnu_attribute
tags to represent the vector and float ABI type in the object file.
Some linkers like GNU ld respects the attribute and will prevent objects
with conflicting ABIs being linked.

This patch emits gnu_attribute value in assembly printer according to
the float-abi metadata. More attributes for soft-fp, hard single/double
and even vector ABI need to be supported in the future.

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D117193
---
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp  | 43 ++++++++++++++++++++++
 llvm/test/CodeGen/PowerPC/gnu-attribute.ll | 15 ++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/gnu-attribute.ll

diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index f26c15667a0bb..7809818069965 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -109,6 +109,23 @@ struct DenseMapInfo<std::pair<const MCSymbol *, MCSymbolRefExpr::VariantKind>> {
 
 namespace {
 
+enum {
+  // GNU attribute tags for PowerPC ABI
+  Tag_GNU_Power_ABI_FP = 4,
+  Tag_GNU_Power_ABI_Vector = 8,
+  Tag_GNU_Power_ABI_Struct_Return = 12,
+
+  // GNU attribute values for PowerPC float ABI, as combination of two parts
+  Val_GNU_Power_ABI_NoFloat = 0b00,
+  Val_GNU_Power_ABI_HardFloat_DP = 0b01,
+  Val_GNU_Power_ABI_SoftFloat_DP = 0b10,
+  Val_GNU_Power_ABI_HardFloat_SP = 0b11,
+
+  Val_GNU_Power_ABI_LDBL_IBM128 = 0b0100,
+  Val_GNU_Power_ABI_LDBL_64 = 0b1000,
+  Val_GNU_Power_ABI_LDBL_IEEE128 = 0b1100,
+};
+
 class PPCAsmPrinter : public AsmPrinter {
 protected:
   // For TLS on AIX, we need to be able to identify TOC entries of specific
@@ -178,6 +195,8 @@ class PPCLinuxAsmPrinter : public PPCAsmPrinter {
     return "Linux PPC Assembly Printer";
   }
 
+  void emitGNUAttributes(Module &M);
+
   void emitStartOfAsmFile(Module &M) override;
   void emitEndOfAsmFile(Module &) override;
 
@@ -1388,6 +1407,28 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
   EmitToStreamer(*OutStreamer, TmpInst);
 }
 
+void PPCLinuxAsmPrinter::emitGNUAttributes(Module &M) {
+  // Emit float ABI into GNU attribute
+  Metadata *MD = M.getModuleFlag("float-abi");
+  MDString *FloatABI = dyn_cast_or_null<MDString>(MD);
+  if (!FloatABI)
+    return;
+  StringRef flt = FloatABI->getString();
+  // TODO: Support emitting soft-fp and hard double/single attributes.
+  if (flt == "doubledouble")
+    OutStreamer->emitGNUAttribute(Tag_GNU_Power_ABI_FP,
+                                  Val_GNU_Power_ABI_HardFloat_DP |
+                                      Val_GNU_Power_ABI_LDBL_IBM128);
+  else if (flt == "ieeequad")
+    OutStreamer->emitGNUAttribute(Tag_GNU_Power_ABI_FP,
+                                  Val_GNU_Power_ABI_HardFloat_DP |
+                                      Val_GNU_Power_ABI_LDBL_IEEE128);
+  else if (flt == "ieeedouble")
+    OutStreamer->emitGNUAttribute(Tag_GNU_Power_ABI_FP,
+                                  Val_GNU_Power_ABI_HardFloat_DP |
+                                      Val_GNU_Power_ABI_LDBL_64);
+}
+
 void PPCLinuxAsmPrinter::emitInstruction(const MachineInstr *MI) {
   if (!Subtarget->isPPC64())
     return PPCAsmPrinter::emitInstruction(MI);
@@ -1642,6 +1683,8 @@ void PPCLinuxAsmPrinter::emitEndOfAsmFile(Module &M) {
   PPCTargetStreamer *TS =
       static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
 
+  emitGNUAttributes(M);
+
   if (!TOC.empty()) {
     const char *Name = isPPC64 ? ".toc" : ".got2";
     MCSectionELF *Section = OutContext.getELFSection(
diff --git a/llvm/test/CodeGen/PowerPC/gnu-attribute.ll b/llvm/test/CodeGen/PowerPC/gnu-attribute.ll
new file mode 100644
index 0000000000000..31747146d2345
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/gnu-attribute.ll
@@ -0,0 +1,15 @@
+; RUN: sed -e 's/FLTABI/ieeequad/' %s | llc -mtriple=powerpc64le | FileCheck %s --check-prefix=IEEE
+; RUN: sed -e 's/FLTABI/doubledouble/' %s | llc -mtriple=powerpc64le | FileCheck %s --check-prefix=IBM
+; RUN: sed -e 's/FLTABI/ieeedouble/' %s | llc -mtriple=powerpc64le | FileCheck %s --check-prefix=DBL
+; RUN: sed -e 's/FLTABI//' %s | llc -mtriple=powerpc64le | FileCheck %s --check-prefix=NONE
+
+; IEEE: .gnu_attribute 4, 13
+; IBM: .gnu_attribute 4, 5
+; DBL: .gnu_attribute 4, 9
+; NONE-NOT: .gnu_attribute 4,
+
+!llvm.module.flags = !{!0, !1, !2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"uwtable", i32 1}
+!2 = !{i32 1, !"float-abi", !"FLTABI"}

From cc4beda039ba5441b76b5b112dd5f69553965186 Mon Sep 17 00:00:00 2001
From: Hongtao Yu <hoy@fb.com>
Date: Tue, 25 Jan 2022 16:11:41 -0800
Subject: [PATCH 646/946] [CSSPGO] Adjust SampleContextFrameVector to be a
 shorter SmallVector

It appears that some memory saving can be archived by tweaking the existing `SampleContextFrameVector` setup. Below are memory usage for two benchmarks before and after this change.

1. Medium benchmark:
   - Before:

```
    note: After computeSizeForProfiledFunctions
    note: VM: 12.50 GB   RSS: 11.02 GB
    note: After generateProbeBasedProfile
    note: **VM: 30.56 GB   RSS: 29.08 GB**
    note: After postProcessProfiles
    note: VM: 31.91 GB   RSS: 30.43 GB
```
   - After:

```
    note: After computeSizeForProfiledFunctions
    note: VM: 12.26 GB   RSS: 10.80 GB
    note: After generateProbeBasedProfile
    note: **VM: 28.76 GB   RSS: 27.29 GB**
    note: After postProcessProfiles
    note: VM: 30.12 GB   RSS: 28.66 GB
```

2. Large benchmark:
   - Before:

```
    note: After computeSizeForProfiledFunctions
    note: VM: 106.28 GB   RSS: 91.25 GB
    note: After generateProbeBasedProfile
    note: **VM: 245.19 GB   RSS: 224.30 GB**
    note: After postProcessProfiles
    note: VM: 254.96 GB   RSS: 221.58 GB
```

   - After:

```
    note: After computeSizeForProfiledFunctions
    note: VM: 105.95 GB   RSS: 91.04 GB
    note: After generateProbeBasedProfile
    note: VM: **234.29 GB   RSS: 215.39 GB**
    note: After postProcessProfiles
    note: VM: 244.03 GB   RSS: 213.45 GB
```

Reviewed By: wenlei

Differential Revision: https://reviews.llvm.org/D118203
---
 llvm/include/llvm/ProfileData/SampleProf.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 6be54d8d78b2f..bad2139fe8f0e 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -450,7 +450,7 @@ static inline hash_code hash_value(const SampleContextFrame &arg) {
                       arg.Location.Discriminator);
 }
 
-using SampleContextFrameVector = SmallVector<SampleContextFrame, 10>;
+using SampleContextFrameVector = SmallVector<SampleContextFrame, 1>;
 using SampleContextFrames = ArrayRef<SampleContextFrame>;
 
 struct SampleContextFrameHash {

From d606e23305015f14072ed59e8e71d6b27d010b48 Mon Sep 17 00:00:00 2001
From: luxufan <932494295@qq.com>
Date: Fri, 14 Jan 2022 23:02:27 +0800
Subject: [PATCH 647/946] [MC] Support constant offset for symbol PendingFixup

This patch add support relocation offset of sym+constant(like `foo+4`) form for pending fixup.

In the past, llvm-mc ignored the constant in sym+constant form, for `foo+4`, `4` would be ignored. And test case
```
.text
  ret
  nop
  nop
  .reloc foo+4, R_RISCV_32, 6

.data
.globl foo
foo:
  .word 0
  .word 0
  .word 0
```
when run `llvm-mc -filetype=obj -triple=riscv64 %s | llvm-readobj -r`
The output is
```
Relocations [
  Section (3) .rela.text {
    0x0 R_RISCV_32 - 0x6
  }
]
```

After applying this patch, the output is
```
Relocations [
  Section (3) .rela.text {
    0x4 R_RISCV_32 - 0x6
  }
]
```

Differential Revision: https://reviews.llvm.org/D117316
---
 llvm/lib/MC/MCObjectStreamer.cpp     | 8 +++++---
 llvm/test/MC/RISCV/reloc-directive.s | 2 ++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index ca462b3e558c9..ebbbd6ad4e164 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -119,7 +119,8 @@ void MCObjectStreamer::resolvePendingFixups() {
       continue;
     }
     flushPendingLabels(PendingFixup.DF, PendingFixup.DF->getContents().size());
-    PendingFixup.Fixup.setOffset(PendingFixup.Sym->getOffset());
+    PendingFixup.Fixup.setOffset(PendingFixup.Sym->getOffset() +
+                                 PendingFixup.Fixup.getOffset());
 
     // If the location symbol to relocate is in MCEncodedFragmentWithFixups,
     // put the Fixup into location symbol's fragment. Otherwise
@@ -838,8 +839,9 @@ MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name,
     return None;
   }
 
-  PendingFixups.emplace_back(&SRE.getSymbol(), DF,
-                             MCFixup::create(-1, Expr, Kind, Loc));
+  PendingFixups.emplace_back(
+      &SRE.getSymbol(), DF,
+      MCFixup::create(OffsetVal.getConstant(), Expr, Kind, Loc));
   return None;
 }
 
diff --git a/llvm/test/MC/RISCV/reloc-directive.s b/llvm/test/MC/RISCV/reloc-directive.s
index d4c20cde060bb..0e217fa798482 100644
--- a/llvm/test/MC/RISCV/reloc-directive.s
+++ b/llvm/test/MC/RISCV/reloc-directive.s
@@ -25,6 +25,7 @@
 
 # CHECK:      Section ({{.*}}) .rela.data {
 # CHECK-NEXT:   0x0 R_RISCV_32 - 0x6
+# CHECK-NEXT:   0x4 R_RISCV_32 - 0x6
 # CHECK-NEXT: }
 
 # CHECK:      Section ({{.*}}) .rela.debug_line {
@@ -55,6 +56,7 @@
   .reloc line, R_RISCV_32, 6
   .reloc probe, R_RISCV_32, 6
 
+  .reloc foo+4, R_RISCV_32, 6
 .data
 .globl foo
 foo:

From 8597458278027047c06a1a81579893335e761a33 Mon Sep 17 00:00:00 2001
From: wangpc <pc.wang@linux.alibaba.com>
Date: Wed, 26 Jan 2022 14:04:53 +0800
Subject: [PATCH 648/946] [regalloc] Fix assertion error when LiveInterval is
 empty

When evicting interference, it causes an asseertion error
since LiveIntervals::intervalIsInOneMBB assumes that input
is not empty.

This patch fixed bug mentioned in D118020.

Reviewed By: MatzeB

Differential Revision: https://reviews.llvm.org/D118124
---
 llvm/lib/CodeGen/LiveIntervals.cpp  | 2 ++
 llvm/lib/CodeGen/RegAllocGreedy.cpp | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 2f97386b6d189..9571afa434c12 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -827,6 +827,8 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
 
 MachineBasicBlock*
 LiveIntervals::intervalIsInOneMBB(const LiveInterval &LI) const {
+  assert(!LI.empty() && "LiveInterval is empty.");
+
   // A local live range must be fully contained inside the block, meaning it is
   // defined and killed at instructions, not at block boundaries. It is not
   // live in or out of any block.
diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 79314874093fb..6ea6dbcbbb743 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -497,7 +497,7 @@ bool DefaultEvictionAdvisor::canEvictInterferenceBasedOnCost(
   if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
     return false;
 
-  bool IsLocal = LIS->intervalIsInOneMBB(VirtReg);
+  bool IsLocal = VirtReg.empty() || LIS->intervalIsInOneMBB(VirtReg);
 
   // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
   // involved in an eviction before. If a cascade number was assigned, deny

From 223f9dea3d8b46847c635a1ce864917e48da01ae Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 25 Jan 2022 22:15:44 -0800
Subject: [PATCH 649/946] [ELF] maybeCompress: replace vector<uint8_t> with
 unique_ptr<uint8_t[]>. NFC

And mention that it is zero-initialized. I do not notice a speed-up if
changed to be uninitialized by forcing the zero filler in writeTo.
---
 lld/ELF/OutputSections.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index cffde5d61ac91..f01f74f44d22c 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -337,9 +337,9 @@ template <class ELFT> void OutputSection::maybeCompress() {
   hdr->ch_size = size;
   hdr->ch_addralign = alignment;
 
-  // Write section contents to a temporary buffer and compress it.
-  std::vector<uint8_t> buf(size);
-  writeTo<ELFT>(buf.data());
+  // Write uncompressed data to a temporary zero-initialized buffer.
+  auto buf = std::make_unique<uint8_t[]>(size);
+  writeTo<ELFT>(buf.get());
   // We chose 1 as the default compression level because it is the fastest. If
   // -O2 is given, we use level 6 to compress debug info more by ~15%. We found
   // that level 7 to 9 doesn't make much difference (~1% more compression) while
@@ -350,9 +350,9 @@ template <class ELFT> void OutputSection::maybeCompress() {
   constexpr size_t shardSize = 1 << 20;
   const size_t numShards = (size + shardSize - 1) / shardSize;
   auto shardsIn = std::make_unique<ArrayRef<uint8_t>[]>(numShards);
-  for (size_t i = 0, start = 0, end; start != buf.size(); ++i, start = end) {
-    end = std::min(start + shardSize, buf.size());
-    shardsIn[i] = makeArrayRef<uint8_t>(buf.data() + start, end - start);
+  for (size_t i = 0, start = 0, end; start != size; ++i, start = end) {
+    end = std::min(start + shardSize, size);
+    shardsIn[i] = makeArrayRef<uint8_t>(buf.get() + start, end - start);
   }
 
   // Compress shards and compute Alder-32 checksums. Use Z_SYNC_FLUSH for all

From 7c984be21a350d2fd227fd95f36a70165a523b99 Mon Sep 17 00:00:00 2001
From: Rob Suderman <rob.suderman@gmail.com>
Date: Tue, 25 Jan 2022 22:15:55 -0800
Subject: [PATCH 650/946] [mlir] Propagate arith.index_cast past tensor.extract

If we are extracting it is more useful to push the index_cast past the
extraction. This increases the chance the tensor.extract can evaluated at
compile time.

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D118204
---
 mlir/lib/Dialect/Tensor/IR/TensorOps.cpp   | 42 +++++++++++++++++++++-
 mlir/test/Dialect/Tensor/canonicalize.mlir | 14 ++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index 5ae13a613c427..bba7f9770f21a 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -425,11 +425,51 @@ struct ExtractElementFromTensorFromElements
   }
 };
 
+// Pushes the index_casts that occur before extractions to after the extract.
+// This minimizes type conversion in some cases and enables the extract
+// canonicalizer. This changes:
+//
+// %cast = arith.index_cast %tensor : tensor<1xi32> to tensor<1xindex>
+// %extract = tensor.extract %cast[%index] : tensor<1xindex>
+//
+// to the following:
+//
+// %extract = tensor.extract %tensor[%index] : tensor<1xindex>
+// %cast = arith.index_cast %extract : i32 to index
+//
+// to just %element.
+//
+// Consider expanding this to a template and handle all tensor cast operations.
+struct ExtractElementFromIndexCast
+    : public OpRewritePattern<tensor::ExtractOp> {
+  using OpRewritePattern<tensor::ExtractOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(tensor::ExtractOp extract,
+                                PatternRewriter &rewriter) const final {
+    Location loc = extract.getLoc();
+    auto indexCast = extract.tensor().getDefiningOp<arith::IndexCastOp>();
+    if (!indexCast)
+      return failure();
+
+    Type elementTy = getElementTypeOrSelf(indexCast.getIn());
+
+    auto newExtract = rewriter.create<tensor::ExtractOp>(
+        loc, elementTy, indexCast.getIn(), extract.indices());
+
+    rewriter.replaceOpWithNewOp<arith::IndexCastOp>(extract, extract.getType(),
+                                                    newExtract);
+
+    return success();
+  }
+};
+
 } // namespace
 
 void FromElementsOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                                  MLIRContext *context) {
-  results.add<ExtractElementFromTensorFromElements>(context);
+  results
+      .add<ExtractElementFromIndexCast, ExtractElementFromTensorFromElements>(
+          context);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir
index e0ea5d777acb8..3084a262af7dd 100644
--- a/mlir/test/Dialect/Tensor/canonicalize.mlir
+++ b/mlir/test/Dialect/Tensor/canonicalize.mlir
@@ -1200,3 +1200,17 @@ func @fold_expand_shape_from_elements(%arg0: i32) -> tensor<1xi32> {
   %1 = tensor.expand_shape %0 [] : tensor<i32> into tensor<1xi32>
   return %1 : tensor<1xi32>
 }
+
+// -----
+
+// CHECK-LABEL: func @propogate_index_cast
+func @propogate_index_cast(%arg0: tensor<1xi32>) -> index {
+  // CHECK: %[[IDX:.+]] = arith.constant 0
+  // CHECK: %[[EXT:.+]] = tensor.extract %arg0[%[[IDX]]] : tensor<1xi32>
+  // CHECK: %[[CAST:.+]] = arith.index_cast %[[EXT]]
+  // CHECK: return %[[CAST]] : index
+  %c0 = arith.constant 0 : index
+  %0 = arith.index_cast %arg0 : tensor<1xi32> to tensor<1xindex>
+  %1 = tensor.extract %0[%c0] : tensor<1xindex>
+  return %1 : index
+}

From f0726ae0f9fa3fc5fadec7fdc940cc2f88759001 Mon Sep 17 00:00:00 2001
From: Richard <legalize@xmission.com>
Date: Mon, 3 Jan 2022 16:03:06 -0700
Subject: [PATCH 651/946] Refactor: Extract Class MessagePrefix (NFC)

The work is the same, the only difference is the prefix
of the strings we look for in the reference files.
---
 .../test/clang-tidy/check_clang_tidy.py       | 64 ++++++++++---------
 1 file changed, 35 insertions(+), 29 deletions(-)

diff --git a/clang-tools-extra/test/clang-tidy/check_clang_tidy.py b/clang-tools-extra/test/clang-tidy/check_clang_tidy.py
index 1a234029b60bd..d764f20ef2ecc 100755
--- a/clang-tools-extra/test/clang-tidy/check_clang_tidy.py
+++ b/clang-tools-extra/test/clang-tidy/check_clang_tidy.py
@@ -51,6 +51,21 @@ def try_run(args, raise_error=True):
   return process_output
 
 
+# This class represents the appearance of a message prefix in a file.
+class MessagePrefix:
+  def __init__(self, label):
+    self.has_message = False
+    self.prefixes = []
+    self.label = label
+
+  def check(self, file_check_suffix, input_text):
+    self.prefix = self.label + file_check_suffix
+    self.has_message = self.prefix in input_text
+    if self.has_message:
+      self.prefixes.append(self.prefix)
+    return self.has_message
+
+
 class CheckRunner:
   def __init__(self, args, extra_args):
     self.resource_dir = args.resource_dir
@@ -63,12 +78,12 @@ def __init__(self, args, extra_args):
     self.std = args.std
     self.check_suffix = args.check_suffix
     self.input_text = ''
-    self.check_fixes_prefixes = []
-    self.check_messages_prefixes = []
-    self.check_notes_prefixes = []
     self.has_check_fixes = False
     self.has_check_messages = False
     self.has_check_notes = False
+    self.fixes = MessagePrefix('CHECK-FIXES')
+    self.messages = MessagePrefix('CHECK-MESSAGES')
+    self.notes = MessagePrefix('CHECK-NOTES')
 
     file_name_with_extension = self.assume_file_name or self.input_file_name
     _, extension = os.path.splitext(file_name_with_extension)
@@ -109,38 +124,29 @@ def read_input(self):
       self.input_text = input_file.read()
 
   def get_prefixes(self):
-    for check in self.check_suffix:
-      if check and not re.match('^[A-Z0-9\\-]+$', check):
+    for suffix in self.check_suffix:
+      if suffix and not re.match('^[A-Z0-9\\-]+$', suffix):
         sys.exit('Only A..Z, 0..9 and "-" are allowed in check suffixes list,'
-                 + ' but "%s" was given' % check)
+                 + ' but "%s" was given' % suffix)
 
-      file_check_suffix = ('-' + check) if check else ''
-      check_fixes_prefix = 'CHECK-FIXES' + file_check_suffix
-      check_messages_prefix = 'CHECK-MESSAGES' + file_check_suffix
-      check_notes_prefix = 'CHECK-NOTES' + file_check_suffix
+      file_check_suffix = ('-' + suffix) if suffix else ''
 
-      has_check_fix = check_fixes_prefix in self.input_text
-      has_check_message = check_messages_prefix in self.input_text
-      has_check_note = check_notes_prefix in self.input_text
+      has_check_fix = self.fixes.check(file_check_suffix, self.input_text)
+      self.has_check_fixes = self.has_check_fixes or has_check_fix
+
+      has_check_message = self.messages.check(file_check_suffix, self.input_text)
+      self.has_check_messages = self.has_check_messages or has_check_message
+
+      has_check_note = self.notes.check(file_check_suffix, self.input_text)
+      self.has_check_notes = self.has_check_notes or has_check_note
 
       if has_check_note and has_check_message:
         sys.exit('Please use either %s or %s but not both' %
-          (check_notes_prefix, check_messages_prefix))
+          (self.notes.prefix, self.messages.prefix))
 
       if not has_check_fix and not has_check_message and not has_check_note:
         sys.exit('%s, %s or %s not found in the input' %
-          (check_fixes_prefix, check_messages_prefix, check_notes_prefix))
-
-      self.has_check_fixes = self.has_check_fixes or has_check_fix
-      self.has_check_messages = self.has_check_messages or has_check_message
-      self.has_check_notes = self.has_check_notes or has_check_note
-
-      if has_check_fix:
-        self.check_fixes_prefixes.append(check_fixes_prefix)
-      if has_check_message:
-        self.check_messages_prefixes.append(check_messages_prefix)
-      if has_check_note:
-        self.check_notes_prefixes.append(check_notes_prefix)
+          (self.fixes.prefix, self.messages.prefix, self.notes.prefix))
 
     assert self.has_check_fixes or self.has_check_messages or self.has_check_notes
 
@@ -173,7 +179,7 @@ def run_clang_tidy(self):
   def check_fixes(self):
     if self.has_check_fixes:
       try_run(['FileCheck', '-input-file=' + self.temp_file_name, self.input_file_name,
-              '-check-prefixes=' + ','.join(self.check_fixes_prefixes),
+              '-check-prefixes=' + ','.join(self.fixes.prefixes),
               '-strict-whitespace'])
 
   def check_messages(self, clang_tidy_output):
@@ -181,7 +187,7 @@ def check_messages(self, clang_tidy_output):
       messages_file = self.temp_file_name + '.msg'
       write_file(messages_file, clang_tidy_output)
       try_run(['FileCheck', '-input-file=' + messages_file, self.input_file_name,
-             '-check-prefixes=' + ','.join(self.check_messages_prefixes),
+             '-check-prefixes=' + ','.join(self.messages.prefixes),
              '-implicit-check-not={{warning|error}}:'])
 
   def check_notes(self, clang_tidy_output):
@@ -191,7 +197,7 @@ def check_notes(self, clang_tidy_output):
                          if not ("note: FIX-IT applied" in line)]
       write_file(notes_file, '\n'.join(filtered_output))
       try_run(['FileCheck', '-input-file=' + notes_file, self.input_file_name,
-             '-check-prefixes=' + ','.join(self.check_notes_prefixes),
+             '-check-prefixes=' + ','.join(self.notes.prefixes),
              '-implicit-check-not={{note|warning|error}}:'])
 
   def run(self):

From 7438dbe078c654677e83a126ae07af4c463ed764 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 25 Jan 2022 22:38:23 -0800
Subject: [PATCH 652/946] [ELF] Cast size to size_t. NFC

To fix

../../chromeclang/bin/../include/c++/v1/__algorithm/min.h:39:1: note: candidate template ignored: deduced conflicting types for parameter '_Tp' ('unsigned long' vs. 'unsigned long long')

on macOS arm64.
---
 lld/ELF/OutputSections.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index f01f74f44d22c..e97d23798d03a 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -351,7 +351,7 @@ template <class ELFT> void OutputSection::maybeCompress() {
   const size_t numShards = (size + shardSize - 1) / shardSize;
   auto shardsIn = std::make_unique<ArrayRef<uint8_t>[]>(numShards);
   for (size_t i = 0, start = 0, end; start != size; ++i, start = end) {
-    end = std::min(start + shardSize, size);
+    end = std::min(start + shardSize, (size_t)size);
     shardsIn[i] = makeArrayRef<uint8_t>(buf.get() + start, end - start);
   }
 

From 07bd46764335357da2eacc1b9f022a63f7273140 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 25 Jan 2022 22:39:43 -0800
Subject: [PATCH 653/946] [ELF] --build-id: replace vector<uint8_t> with
 unique_ptr<uint8_t[]>. NFC

We can't use C++20 make_unique_for_overwrite yet.
---
 lld/ELF/Writer.cpp | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 2b14c07120c91..396b9c153dc21 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -2902,15 +2902,16 @@ computeHash(llvm::MutableArrayRef<uint8_t> hashBuf,
             llvm::ArrayRef<uint8_t> data,
             std::function<void(uint8_t *dest, ArrayRef<uint8_t> arr)> hashFn) {
   std::vector<ArrayRef<uint8_t>> chunks = split(data, 1024 * 1024);
-  std::vector<uint8_t> hashes(chunks.size() * hashBuf.size());
+  const size_t hashesSize = chunks.size() * hashBuf.size();
+  std::unique_ptr<uint8_t[]> hashes(new uint8_t[hashesSize]);
 
   // Compute hash values.
   parallelForEachN(0, chunks.size(), [&](size_t i) {
-    hashFn(hashes.data() + i * hashBuf.size(), chunks[i]);
+    hashFn(hashes.get() + i * hashBuf.size(), chunks[i]);
   });
 
   // Write to the final output buffer.
-  hashFn(hashBuf.data(), hashes);
+  hashFn(hashBuf.data(), makeArrayRef(hashes.get(), hashesSize));
 }
 
 template <class ELFT> void Writer<ELFT>::writeBuildId() {
@@ -2925,34 +2926,35 @@ template <class ELFT> void Writer<ELFT>::writeBuildId() {
 
   // Compute a hash of all sections of the output file.
   size_t hashSize = mainPart->buildId->hashSize;
-  std::vector<uint8_t> buildId(hashSize);
-  llvm::ArrayRef<uint8_t> buf{Out::bufferStart, size_t(fileSize)};
+  std::unique_ptr<uint8_t[]> buildId(new uint8_t[hashSize]);
+  MutableArrayRef<uint8_t> output(buildId.get(), hashSize);
+  llvm::ArrayRef<uint8_t> input{Out::bufferStart, size_t(fileSize)};
 
   switch (config->buildId) {
   case BuildIdKind::Fast:
-    computeHash(buildId, buf, [](uint8_t *dest, ArrayRef<uint8_t> arr) {
+    computeHash(output, input, [](uint8_t *dest, ArrayRef<uint8_t> arr) {
       write64le(dest, xxHash64(arr));
     });
     break;
   case BuildIdKind::Md5:
-    computeHash(buildId, buf, [&](uint8_t *dest, ArrayRef<uint8_t> arr) {
+    computeHash(output, input, [&](uint8_t *dest, ArrayRef<uint8_t> arr) {
       memcpy(dest, MD5::hash(arr).data(), hashSize);
     });
     break;
   case BuildIdKind::Sha1:
-    computeHash(buildId, buf, [&](uint8_t *dest, ArrayRef<uint8_t> arr) {
+    computeHash(output, input, [&](uint8_t *dest, ArrayRef<uint8_t> arr) {
       memcpy(dest, SHA1::hash(arr).data(), hashSize);
     });
     break;
   case BuildIdKind::Uuid:
-    if (auto ec = llvm::getRandomBytes(buildId.data(), hashSize))
+    if (auto ec = llvm::getRandomBytes(buildId.get(), hashSize))
       error("entropy source failure: " + ec.message());
     break;
   default:
     llvm_unreachable("unknown BuildIdKind");
   }
   for (Partition &part : partitions)
-    part.buildId->writeBuildId(buildId);
+    part.buildId->writeBuildId(output);
 }
 
 template void elf::createSyntheticSections<ELF32LE>();

From 2a80c3dbe17175cd99112838341411323473f68b Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 25 Jan 2022 22:40:53 -0800
Subject: [PATCH 654/946] [ELF] Clarify that Z_BEST_SPEED==1 in a comment. NFC

---
 lld/ELF/OutputSections.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index e97d23798d03a..c4c11236a336d 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -340,10 +340,11 @@ template <class ELFT> void OutputSection::maybeCompress() {
   // Write uncompressed data to a temporary zero-initialized buffer.
   auto buf = std::make_unique<uint8_t[]>(size);
   writeTo<ELFT>(buf.get());
-  // We chose 1 as the default compression level because it is the fastest. If
-  // -O2 is given, we use level 6 to compress debug info more by ~15%. We found
-  // that level 7 to 9 doesn't make much difference (~1% more compression) while
-  // they take significant amount of time (~2x), so level 6 seems enough.
+  // We chose 1 (Z_BEST_SPEED) as the default compression level because it is
+  // the fastest. If -O2 is given, we use level 6 to compress debug info more by
+  // ~15%. We found that level 7 to 9 doesn't make much difference (~1% more
+  // compression) while they take significant amount of time (~2x), so level 6
+  // seems enough.
   const int level = config->optimize >= 2 ? 6 : Z_BEST_SPEED;
 
   // Split input into 1-MiB shards.

From 4fb8e0b8621a641b38cf2218cc20301da346cedf Mon Sep 17 00:00:00 2001
From: Yevgeny Rouban <yrouban@azul.com>
Date: Wed, 26 Jan 2022 12:38:17 +0700
Subject: [PATCH 655/946] [JumpThreading] Add a test for handling zero !prof
 branch_weights

The test was a part of the revision D81499 and should have been
added with commit 707836ed4ed.

Reviewed By: yamauchi, wenlei
Differential Revision: https://reviews.llvm.org/D81499
---
 .../JumpThreading/threading_prof3.ll          | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 llvm/test/Transforms/JumpThreading/threading_prof3.ll

diff --git a/llvm/test/Transforms/JumpThreading/threading_prof3.ll b/llvm/test/Transforms/JumpThreading/threading_prof3.ll
new file mode 100644
index 0000000000000..b235c9b72b938
--- /dev/null
+++ b/llvm/test/Transforms/JumpThreading/threading_prof3.ll
@@ -0,0 +1,30 @@
+; RUN: opt -jump-threading -S < %s | FileCheck %s
+; RUN: opt -passes=jump-threading -S < %s | FileCheck %s
+
+; Check that all zero branch weights do not cause a crash.
+define void @zero_branch_weights(i32 %tmp, i32 %tmp3) {
+bb:
+  %tmp1 = icmp eq i32 %tmp, 1
+  br i1 %tmp1, label %bb5, label %bb2
+; CHECK-NOT: br i1 %tmp1,{{.*}} !prof
+
+bb2:
+  %tmp4 = icmp ne i32 %tmp3, 1
+  br label %bb5
+; CHECK: br i1 %tmp4, {{.*}} !prof ![[PROF:[0-9]+]]
+
+bb5:
+  %tmp6 = phi i1 [ false, %bb ], [ %tmp4, %bb2 ]
+  br i1 %tmp6, label %bb8, label %bb7, !prof !{!"branch_weights", i32 0, i32 0}
+
+bb7:
+  br label %bb9
+
+bb8:
+  br label %bb9
+
+bb9:
+  ret void
+}
+
+;CHECK: ![[PROF]] = !{!"branch_weights", i32 0, i32 0}

From 9fac78d0e1820b132964f6c1cfe22049d890d1d8 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 25 Jan 2022 22:50:03 -0800
Subject: [PATCH 656/946] [ELF] Simplify and optimize .relr.dyn NFC

---
 lld/ELF/SyntheticSections.cpp | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 87f4269b83917..9f0cbf9e84884 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -2038,28 +2038,16 @@ template <class ELFT> bool RelrSection<ELFT>::updateAllocSize() {
     ++i;
 
     // Find foldable relocations to construct bitmaps.
-    while (i < e) {
+    for (;;) {
       uint64_t bitmap = 0;
-
-      while (i < e) {
-        uint64_t delta = offsets[i] - base;
-
-        // If it is too far, it cannot be folded.
-        if (delta >= nBits * wordsize)
-          break;
-
-        // If it is not a multiple of wordsize away, it cannot be folded.
-        if (delta % wordsize)
+      for (; i != e; ++i) {
+        uint64_t d = offsets[i] - base;
+        if (d >= nBits * wordsize || d % wordsize)
           break;
-
-        // Fold it.
-        bitmap |= 1ULL << (delta / wordsize);
-        ++i;
+        bitmap |= uint64_t(1) << (d / wordsize);
       }
-
       if (!bitmap)
         break;
-
       relrRelocs.push_back(Elf_Relr((bitmap << 1) | 1));
       base += nBits * wordsize;
     }

From da1cac7d19c4f3f2dcc27083dc3b28d171610508 Mon Sep 17 00:00:00 2001
From: Jim Lin <jim@andestech.com>
Date: Wed, 26 Jan 2022 09:50:00 +0800
Subject: [PATCH 657/946] [NFC] Remove duplicate include

---
 llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp  | 1 -
 llvm/lib/Target/ARM/ARMSubtarget.cpp                          | 1 -
 llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp | 1 -
 3 files changed, 3 deletions(-)

diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index e069caff014f4..1f546ad50d571 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -18,7 +18,6 @@
 #include "AArch64RegisterInfo.h"
 #include "AArch64Subtarget.h"
 #include "AArch64TargetMachine.h"
-#include "AArch64GlobalISelUtils.h"
 #include "MCTargetDesc/AArch64AddressingModes.h"
 #include "MCTargetDesc/AArch64MCTargetDesc.h"
 #include "llvm/ADT/Optional.h"
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 16befa4fff040..2dd25234dc501 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -15,7 +15,6 @@
 #include "ARMCallLowering.h"
 #include "ARMLegalizerInfo.h"
 #include "ARMRegisterBankInfo.h"
-#include "ARMSubtarget.h"
 #include "ARMFrameLowering.h"
 #include "ARMInstrInfo.h"
 #include "ARMSubtarget.h"
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
index 0cdd1f4f701ff..bb5351af6523d 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430ELFObjectWriter.cpp
@@ -9,7 +9,6 @@
 #include "MCTargetDesc/MSP430FixupKinds.h"
 #include "MCTargetDesc/MSP430MCTargetDesc.h"
 
-#include "MCTargetDesc/MSP430MCTargetDesc.h"
 #include "llvm/MC/MCELFObjectWriter.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCObjectWriter.h"

From f563bd74cb9a4f0d2d3eb49d35536b648361ec53 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Wed, 26 Jan 2022 15:19:22 +0800
Subject: [PATCH 658/946] [NFC] Group PowerPC clang codegen tests into
 directory

---
 .../CodeGen/{ => PowerPC}/2009-02-13-zerosize-union-field-ppc.c   | 0
 clang/test/CodeGen/{ => PowerPC}/aix-alignment.c                  | 0
 clang/test/CodeGen/{ => PowerPC}/aix-altivec-vaargs.c             | 0
 clang/test/CodeGen/{ => PowerPC}/aix-altivec.c                    | 0
 clang/test/CodeGen/{ => PowerPC}/aix-constructor-attribute.c      | 0
 clang/test/CodeGen/{ => PowerPC}/aix-destructor-attribute.c       | 0
 clang/test/CodeGen/{ => PowerPC}/aix-ignore-xcoff-visibility.cpp  | 0
 clang/test/CodeGen/{ => PowerPC}/aix-init-priority-attribute.cpp  | 0
 clang/test/CodeGen/{ => PowerPC}/aix-return.c                     | 0
 clang/test/CodeGen/{ => PowerPC}/aix-struct-arg.c                 | 0
 clang/test/CodeGen/{ => PowerPC}/aix-tls-model.cpp                | 0
 clang/test/CodeGen/{ => PowerPC}/aix-vaargs.c                     | 0
 clang/test/CodeGen/{ => PowerPC}/aix-vector-attr-aligned.c        | 0
 .../test/CodeGen/{ => PowerPC}/aix-visibility-inlines-hidden.cpp  | 0
 clang/test/CodeGen/{ => PowerPC}/aix32-complex-varargs.c          | 0
 clang/test/CodeGen/{ => PowerPC}/aix_alloca_align.c               | 0
 clang/test/CodeGen/{ => PowerPC}/altivec-ct.c                     | 0
 clang/test/CodeGen/{ => PowerPC}/altivec-dss.c                    | 0
 clang/test/CodeGen/{ => PowerPC}/altivec.c                        | 0
 clang/test/CodeGen/{ => PowerPC}/attr-target-ppc.c                | 0
 clang/test/CodeGen/{ => PowerPC}/bool_test.c                      | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-32bit-vec-ll.c      | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-altivec.c           | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-build-pair-mma.c    | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-cache.c             | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-crypto-disabled.c   | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-crypto.c            | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-error.c             | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-fastmath.c          | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-fma.c               | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-fpconstrained.c     | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-htm.c               | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-int128.c            | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-ld-st-rmb.c         | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p10.c               | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p10vector-error.c   | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p10vector.c         | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p7-disabled.c       | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p7.c                | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p8vector.c          | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p9-f128.c           | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p9vector.c          | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-pair-mma.c          | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-quadword-noi128.c   | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-quadword.c          | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-vec-ins-error.c     | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-vsx.c               | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xl-xst.c            | 0
 .../builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c       | 0
 .../{ => PowerPC}/builtins-ppc-xlcompat-LoadReseve-StoreCond.c    | 0
 .../test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-cas-error.c  | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-cas.c      | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-cipher.c   | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-cmplx.c    | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-compare.c  | 0
 .../CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-conversionfunc.c  | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-darn.c     | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-error.c    | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-expect.c   | 0
 .../CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-fetch-error.c     | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-fetch.c    | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-fp.c       | 0
 .../builtins-ppc-xlcompat-load-store-reversed-64bit-only.c        | 0
 .../{ => PowerPC}/builtins-ppc-xlcompat-load-store-reversed.c     | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-macros.c   | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-math.c     | 0
 .../{ => PowerPC}/builtins-ppc-xlcompat-move-tofrom-regs.c        | 0
 .../{ => PowerPC}/builtins-ppc-xlcompat-multiply-64bit-only.c     | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-multiply.c | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-popcnt.c   | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-prefetch.c | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-pwr8.c     | 0
 .../test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-pwr9-64bit.c | 0
 .../test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-pwr9-error.c | 0
 .../CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-pwr9-warning.c    | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-pwr9.c     | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-rotate.c   | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-stfiw.c    | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-swdiv.c    | 0
 .../CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-swdiv_nochk.c     | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-sync.c     | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-test.c     | 0
 .../CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-trap-64bit-only.c | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-trap.c     | 0
 .../test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-vec-error.c  | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat.c          | 0
 clang/test/CodeGen/{ => PowerPC}/builtins-ppc.c                   | 0
 clang/test/CodeGen/{ => PowerPC}/ibm128-cast.c                    | 0
 clang/test/CodeGen/{ => PowerPC}/ignore-exceptions.cpp            | 0
 clang/test/CodeGen/{ => PowerPC}/inline-asm-matching-constraint.c | 0
 clang/test/CodeGen/{ => PowerPC}/inline-asm-matching-ppc-vsx.c    | 0
 clang/test/CodeGen/{ => PowerPC}/powerpc-c99complex.c             | 0
 clang/test/CodeGen/{ => PowerPC}/powerpc_types.c                  | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-aggregate-abi.cpp            | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-emmintrin.c                  | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-mm-malloc-le.c               | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-mm-malloc.c                  | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-mma-types.c                  | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-mmintrin.c                   | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-pmmintrin.c                  | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-sfvarargs.c                  | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-signbit.c                    | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-smmintrin.c                  | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-tmmintrin.c                  | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-varargs-struct.c             | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-vector-compare.cpp           | 0
 clang/test/CodeGen/{ => PowerPC}/ppc-xmmintrin.c                  | 0
 clang/test/CodeGen/{ => PowerPC}/ppc32-and-aix-struct-return.c    | 0
 clang/test/CodeGen/{ => PowerPC}/ppc32-dwarf.c                    | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-align-struct.c             | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-complex-parms.c            | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-complex-return.c           | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-dwarf.c                    | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-elf-abi.c                  | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-extend.c                   | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-f128-builtins.c            | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-inline-asm.c               | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-long-double.cpp            | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-soft-float.c               | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-struct-onefloat.c          | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-struct-onevect.c           | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-varargs-complex.c          | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64-vector.c                   | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64le-aggregates.c             | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64le-f128Aggregates.c         | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64le-varargs-complex.c        | 0
 clang/test/CodeGen/{ => PowerPC}/ppc64le-varargs-f128.c           | 0
 .../{ => PowerPC}/vector-bool-pixel-altivec-init-no-parentheses.c | 0
 clang/test/CodeGen/{ => PowerPC}/vector-bool-pixel-altivec-init.c | 0
 .../test/CodeGen/{ => PowerPC}/vector-compat-pixel-bool-ternary.c | 0
 clang/test/CodeGen/{ => PowerPC}/vector-compat-pixel-bool.c       | 0
 clang/test/CodeGen/{ => PowerPC}/vector-compat-ternary.c          | 0
 clang/test/CodeGen/{ => PowerPC}/vector-compat.c                  | 0
 clang/test/CodeGen/{ => PowerPC}/xcoff-comdat.cpp                 | 0
 134 files changed, 0 insertions(+), 0 deletions(-)
 rename clang/test/CodeGen/{ => PowerPC}/2009-02-13-zerosize-union-field-ppc.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix-alignment.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix-altivec-vaargs.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix-altivec.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix-constructor-attribute.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix-destructor-attribute.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix-ignore-xcoff-visibility.cpp (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix-init-priority-attribute.cpp (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix-return.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix-struct-arg.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix-tls-model.cpp (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix-vaargs.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix-vector-attr-aligned.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix-visibility-inlines-hidden.cpp (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix32-complex-varargs.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/aix_alloca_align.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/altivec-ct.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/altivec-dss.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/altivec.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/attr-target-ppc.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/bool_test.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-32bit-vec-ll.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-altivec.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-build-pair-mma.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-cache.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-crypto-disabled.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-crypto.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-error.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-fastmath.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-fma.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-fpconstrained.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-htm.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-int128.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-ld-st-rmb.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p10.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p10vector-error.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p10vector.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p7-disabled.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p7.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p8vector.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p9-f128.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-p9vector.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-pair-mma.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-quadword-noi128.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-quadword.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-vec-ins-error.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-vsx.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xl-xst.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-LoadReseve-StoreCond.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-cas-error.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-cas.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-cipher.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-cmplx.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-compare.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-conversionfunc.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-darn.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-error.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-expect.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-fetch-error.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-fetch.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-fp.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-load-store-reversed-64bit-only.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-load-store-reversed.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-macros.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-math.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-move-tofrom-regs.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-multiply-64bit-only.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-multiply.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-popcnt.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-prefetch.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-pwr8.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-pwr9-64bit.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-pwr9-error.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-pwr9-warning.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-pwr9.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-rotate.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-stfiw.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-swdiv.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-swdiv_nochk.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-sync.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-test.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-trap-64bit-only.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-trap.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat-vec-error.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc-xlcompat.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/builtins-ppc.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ibm128-cast.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ignore-exceptions.cpp (100%)
 rename clang/test/CodeGen/{ => PowerPC}/inline-asm-matching-constraint.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/inline-asm-matching-ppc-vsx.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/powerpc-c99complex.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/powerpc_types.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-aggregate-abi.cpp (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-emmintrin.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-mm-malloc-le.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-mm-malloc.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-mma-types.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-mmintrin.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-pmmintrin.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-sfvarargs.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-signbit.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-smmintrin.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-tmmintrin.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-varargs-struct.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-vector-compare.cpp (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc-xmmintrin.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc32-and-aix-struct-return.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc32-dwarf.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-align-struct.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-complex-parms.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-complex-return.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-dwarf.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-elf-abi.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-extend.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-f128-builtins.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-inline-asm.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-long-double.cpp (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-soft-float.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-struct-onefloat.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-struct-onevect.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-varargs-complex.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64-vector.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64le-aggregates.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64le-f128Aggregates.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64le-varargs-complex.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/ppc64le-varargs-f128.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/vector-bool-pixel-altivec-init-no-parentheses.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/vector-bool-pixel-altivec-init.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/vector-compat-pixel-bool-ternary.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/vector-compat-pixel-bool.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/vector-compat-ternary.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/vector-compat.c (100%)
 rename clang/test/CodeGen/{ => PowerPC}/xcoff-comdat.cpp (100%)

diff --git a/clang/test/CodeGen/2009-02-13-zerosize-union-field-ppc.c b/clang/test/CodeGen/PowerPC/2009-02-13-zerosize-union-field-ppc.c
similarity index 100%
rename from clang/test/CodeGen/2009-02-13-zerosize-union-field-ppc.c
rename to clang/test/CodeGen/PowerPC/2009-02-13-zerosize-union-field-ppc.c
diff --git a/clang/test/CodeGen/aix-alignment.c b/clang/test/CodeGen/PowerPC/aix-alignment.c
similarity index 100%
rename from clang/test/CodeGen/aix-alignment.c
rename to clang/test/CodeGen/PowerPC/aix-alignment.c
diff --git a/clang/test/CodeGen/aix-altivec-vaargs.c b/clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c
similarity index 100%
rename from clang/test/CodeGen/aix-altivec-vaargs.c
rename to clang/test/CodeGen/PowerPC/aix-altivec-vaargs.c
diff --git a/clang/test/CodeGen/aix-altivec.c b/clang/test/CodeGen/PowerPC/aix-altivec.c
similarity index 100%
rename from clang/test/CodeGen/aix-altivec.c
rename to clang/test/CodeGen/PowerPC/aix-altivec.c
diff --git a/clang/test/CodeGen/aix-constructor-attribute.c b/clang/test/CodeGen/PowerPC/aix-constructor-attribute.c
similarity index 100%
rename from clang/test/CodeGen/aix-constructor-attribute.c
rename to clang/test/CodeGen/PowerPC/aix-constructor-attribute.c
diff --git a/clang/test/CodeGen/aix-destructor-attribute.c b/clang/test/CodeGen/PowerPC/aix-destructor-attribute.c
similarity index 100%
rename from clang/test/CodeGen/aix-destructor-attribute.c
rename to clang/test/CodeGen/PowerPC/aix-destructor-attribute.c
diff --git a/clang/test/CodeGen/aix-ignore-xcoff-visibility.cpp b/clang/test/CodeGen/PowerPC/aix-ignore-xcoff-visibility.cpp
similarity index 100%
rename from clang/test/CodeGen/aix-ignore-xcoff-visibility.cpp
rename to clang/test/CodeGen/PowerPC/aix-ignore-xcoff-visibility.cpp
diff --git a/clang/test/CodeGen/aix-init-priority-attribute.cpp b/clang/test/CodeGen/PowerPC/aix-init-priority-attribute.cpp
similarity index 100%
rename from clang/test/CodeGen/aix-init-priority-attribute.cpp
rename to clang/test/CodeGen/PowerPC/aix-init-priority-attribute.cpp
diff --git a/clang/test/CodeGen/aix-return.c b/clang/test/CodeGen/PowerPC/aix-return.c
similarity index 100%
rename from clang/test/CodeGen/aix-return.c
rename to clang/test/CodeGen/PowerPC/aix-return.c
diff --git a/clang/test/CodeGen/aix-struct-arg.c b/clang/test/CodeGen/PowerPC/aix-struct-arg.c
similarity index 100%
rename from clang/test/CodeGen/aix-struct-arg.c
rename to clang/test/CodeGen/PowerPC/aix-struct-arg.c
diff --git a/clang/test/CodeGen/aix-tls-model.cpp b/clang/test/CodeGen/PowerPC/aix-tls-model.cpp
similarity index 100%
rename from clang/test/CodeGen/aix-tls-model.cpp
rename to clang/test/CodeGen/PowerPC/aix-tls-model.cpp
diff --git a/clang/test/CodeGen/aix-vaargs.c b/clang/test/CodeGen/PowerPC/aix-vaargs.c
similarity index 100%
rename from clang/test/CodeGen/aix-vaargs.c
rename to clang/test/CodeGen/PowerPC/aix-vaargs.c
diff --git a/clang/test/CodeGen/aix-vector-attr-aligned.c b/clang/test/CodeGen/PowerPC/aix-vector-attr-aligned.c
similarity index 100%
rename from clang/test/CodeGen/aix-vector-attr-aligned.c
rename to clang/test/CodeGen/PowerPC/aix-vector-attr-aligned.c
diff --git a/clang/test/CodeGen/aix-visibility-inlines-hidden.cpp b/clang/test/CodeGen/PowerPC/aix-visibility-inlines-hidden.cpp
similarity index 100%
rename from clang/test/CodeGen/aix-visibility-inlines-hidden.cpp
rename to clang/test/CodeGen/PowerPC/aix-visibility-inlines-hidden.cpp
diff --git a/clang/test/CodeGen/aix32-complex-varargs.c b/clang/test/CodeGen/PowerPC/aix32-complex-varargs.c
similarity index 100%
rename from clang/test/CodeGen/aix32-complex-varargs.c
rename to clang/test/CodeGen/PowerPC/aix32-complex-varargs.c
diff --git a/clang/test/CodeGen/aix_alloca_align.c b/clang/test/CodeGen/PowerPC/aix_alloca_align.c
similarity index 100%
rename from clang/test/CodeGen/aix_alloca_align.c
rename to clang/test/CodeGen/PowerPC/aix_alloca_align.c
diff --git a/clang/test/CodeGen/altivec-ct.c b/clang/test/CodeGen/PowerPC/altivec-ct.c
similarity index 100%
rename from clang/test/CodeGen/altivec-ct.c
rename to clang/test/CodeGen/PowerPC/altivec-ct.c
diff --git a/clang/test/CodeGen/altivec-dss.c b/clang/test/CodeGen/PowerPC/altivec-dss.c
similarity index 100%
rename from clang/test/CodeGen/altivec-dss.c
rename to clang/test/CodeGen/PowerPC/altivec-dss.c
diff --git a/clang/test/CodeGen/altivec.c b/clang/test/CodeGen/PowerPC/altivec.c
similarity index 100%
rename from clang/test/CodeGen/altivec.c
rename to clang/test/CodeGen/PowerPC/altivec.c
diff --git a/clang/test/CodeGen/attr-target-ppc.c b/clang/test/CodeGen/PowerPC/attr-target-ppc.c
similarity index 100%
rename from clang/test/CodeGen/attr-target-ppc.c
rename to clang/test/CodeGen/PowerPC/attr-target-ppc.c
diff --git a/clang/test/CodeGen/bool_test.c b/clang/test/CodeGen/PowerPC/bool_test.c
similarity index 100%
rename from clang/test/CodeGen/bool_test.c
rename to clang/test/CodeGen/PowerPC/bool_test.c
diff --git a/clang/test/CodeGen/builtins-ppc-32bit-vec-ll.c b/clang/test/CodeGen/PowerPC/builtins-ppc-32bit-vec-ll.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-32bit-vec-ll.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-32bit-vec-ll.c
diff --git a/clang/test/CodeGen/builtins-ppc-altivec.c b/clang/test/CodeGen/PowerPC/builtins-ppc-altivec.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-altivec.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-altivec.c
diff --git a/clang/test/CodeGen/builtins-ppc-build-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-build-pair-mma.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-build-pair-mma.c
diff --git a/clang/test/CodeGen/builtins-ppc-cache.c b/clang/test/CodeGen/PowerPC/builtins-ppc-cache.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-cache.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-cache.c
diff --git a/clang/test/CodeGen/builtins-ppc-crypto-disabled.c b/clang/test/CodeGen/PowerPC/builtins-ppc-crypto-disabled.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-crypto-disabled.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-crypto-disabled.c
diff --git a/clang/test/CodeGen/builtins-ppc-crypto.c b/clang/test/CodeGen/PowerPC/builtins-ppc-crypto.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-crypto.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-crypto.c
diff --git a/clang/test/CodeGen/builtins-ppc-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-error.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-error.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-error.c
diff --git a/clang/test/CodeGen/builtins-ppc-fastmath.c b/clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-fastmath.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-fastmath.c
diff --git a/clang/test/CodeGen/builtins-ppc-fma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-fma.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-fma.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-fma.c
diff --git a/clang/test/CodeGen/builtins-ppc-fpconstrained.c b/clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-fpconstrained.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c
diff --git a/clang/test/CodeGen/builtins-ppc-htm.c b/clang/test/CodeGen/PowerPC/builtins-ppc-htm.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-htm.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-htm.c
diff --git a/clang/test/CodeGen/builtins-ppc-int128.c b/clang/test/CodeGen/PowerPC/builtins-ppc-int128.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-int128.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-int128.c
diff --git a/clang/test/CodeGen/builtins-ppc-ld-st-rmb.c b/clang/test/CodeGen/PowerPC/builtins-ppc-ld-st-rmb.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-ld-st-rmb.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-ld-st-rmb.c
diff --git a/clang/test/CodeGen/builtins-ppc-p10.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p10.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-p10.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-p10.c
diff --git a/clang/test/CodeGen/builtins-ppc-p10vector-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector-error.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-p10vector-error.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-p10vector-error.c
diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-p10vector.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-p10vector.c
diff --git a/clang/test/CodeGen/builtins-ppc-p7-disabled.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p7-disabled.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-p7-disabled.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-p7-disabled.c
diff --git a/clang/test/CodeGen/builtins-ppc-p7.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p7.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-p7.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-p7.c
diff --git a/clang/test/CodeGen/builtins-ppc-p8vector.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p8vector.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-p8vector.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-p8vector.c
diff --git a/clang/test/CodeGen/builtins-ppc-p9-f128.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p9-f128.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-p9-f128.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-p9-f128.c
diff --git a/clang/test/CodeGen/builtins-ppc-p9vector.c b/clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-p9vector.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-p9vector.c
diff --git a/clang/test/CodeGen/builtins-ppc-pair-mma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-pair-mma.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-pair-mma.c
diff --git a/clang/test/CodeGen/builtins-ppc-quadword-noi128.c b/clang/test/CodeGen/PowerPC/builtins-ppc-quadword-noi128.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-quadword-noi128.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-quadword-noi128.c
diff --git a/clang/test/CodeGen/builtins-ppc-quadword.c b/clang/test/CodeGen/PowerPC/builtins-ppc-quadword.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-quadword.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-quadword.c
diff --git a/clang/test/CodeGen/builtins-ppc-vec-ins-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-vec-ins-error.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-vec-ins-error.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-vec-ins-error.c
diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-vsx.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c
diff --git a/clang/test/CodeGen/builtins-ppc-xl-xst.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xl-xst.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xl-xst.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xl-xst.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond-64bit-only.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-LoadReseve-StoreCond.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-cas-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas-error.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-cas-error.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas-error.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-cas.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-cas.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cas.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-cipher.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cipher.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-cipher.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cipher.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-cmplx.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cmplx.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-cmplx.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-cmplx.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-compare.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-compare.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-compare.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-compare.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-conversionfunc.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-conversionfunc.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-conversionfunc.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-conversionfunc.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-darn.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-darn.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-darn.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-darn.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-error.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-error.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-expect.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-expect.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-expect.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-expect.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-fetch-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fetch-error.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-fetch-error.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fetch-error.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-fetch.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fetch.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-fetch.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fetch.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-fp.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fp.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-fp.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-fp.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-load-store-reversed-64bit-only.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-load-store-reversed-64bit-only.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-load-store-reversed-64bit-only.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-load-store-reversed-64bit-only.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-load-store-reversed.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-load-store-reversed.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-load-store-reversed.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-load-store-reversed.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-macros.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-macros.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-macros.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-macros.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-math.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-math.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-move-tofrom-regs.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-move-tofrom-regs.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-move-tofrom-regs.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-multiply-64bit-only.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-multiply-64bit-only.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-multiply-64bit-only.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-multiply-64bit-only.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-multiply.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-multiply.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-multiply.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-multiply.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-popcnt.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-popcnt.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-popcnt.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-popcnt.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-prefetch.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-prefetch.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-prefetch.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-pwr8.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr8.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-pwr8.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr8.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-pwr9-64bit.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr9-64bit.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-pwr9-64bit.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr9-64bit.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-pwr9-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr9-error.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-pwr9-error.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr9-error.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-pwr9-warning.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr9-warning.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-pwr9-warning.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr9-warning.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-pwr9.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr9.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-pwr9.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-pwr9.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-rotate.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-rotate.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-rotate.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-stfiw.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-stfiw.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-stfiw.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-stfiw.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-swdiv.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-swdiv.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-swdiv.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-swdiv.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-swdiv_nochk.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-swdiv_nochk.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-swdiv_nochk.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-swdiv_nochk.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-sync.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-sync.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-test.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-test.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-test.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-test.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-trap-64bit-only.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-trap-64bit-only.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-trap.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-trap.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-vec-error.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-vec-error.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat-vec-error.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-vec-error.c
diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc-xlcompat.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat.c
diff --git a/clang/test/CodeGen/builtins-ppc.c b/clang/test/CodeGen/PowerPC/builtins-ppc.c
similarity index 100%
rename from clang/test/CodeGen/builtins-ppc.c
rename to clang/test/CodeGen/PowerPC/builtins-ppc.c
diff --git a/clang/test/CodeGen/ibm128-cast.c b/clang/test/CodeGen/PowerPC/ibm128-cast.c
similarity index 100%
rename from clang/test/CodeGen/ibm128-cast.c
rename to clang/test/CodeGen/PowerPC/ibm128-cast.c
diff --git a/clang/test/CodeGen/ignore-exceptions.cpp b/clang/test/CodeGen/PowerPC/ignore-exceptions.cpp
similarity index 100%
rename from clang/test/CodeGen/ignore-exceptions.cpp
rename to clang/test/CodeGen/PowerPC/ignore-exceptions.cpp
diff --git a/clang/test/CodeGen/inline-asm-matching-constraint.c b/clang/test/CodeGen/PowerPC/inline-asm-matching-constraint.c
similarity index 100%
rename from clang/test/CodeGen/inline-asm-matching-constraint.c
rename to clang/test/CodeGen/PowerPC/inline-asm-matching-constraint.c
diff --git a/clang/test/CodeGen/inline-asm-matching-ppc-vsx.c b/clang/test/CodeGen/PowerPC/inline-asm-matching-ppc-vsx.c
similarity index 100%
rename from clang/test/CodeGen/inline-asm-matching-ppc-vsx.c
rename to clang/test/CodeGen/PowerPC/inline-asm-matching-ppc-vsx.c
diff --git a/clang/test/CodeGen/powerpc-c99complex.c b/clang/test/CodeGen/PowerPC/powerpc-c99complex.c
similarity index 100%
rename from clang/test/CodeGen/powerpc-c99complex.c
rename to clang/test/CodeGen/PowerPC/powerpc-c99complex.c
diff --git a/clang/test/CodeGen/powerpc_types.c b/clang/test/CodeGen/PowerPC/powerpc_types.c
similarity index 100%
rename from clang/test/CodeGen/powerpc_types.c
rename to clang/test/CodeGen/PowerPC/powerpc_types.c
diff --git a/clang/test/CodeGen/ppc-aggregate-abi.cpp b/clang/test/CodeGen/PowerPC/ppc-aggregate-abi.cpp
similarity index 100%
rename from clang/test/CodeGen/ppc-aggregate-abi.cpp
rename to clang/test/CodeGen/PowerPC/ppc-aggregate-abi.cpp
diff --git a/clang/test/CodeGen/ppc-emmintrin.c b/clang/test/CodeGen/PowerPC/ppc-emmintrin.c
similarity index 100%
rename from clang/test/CodeGen/ppc-emmintrin.c
rename to clang/test/CodeGen/PowerPC/ppc-emmintrin.c
diff --git a/clang/test/CodeGen/ppc-mm-malloc-le.c b/clang/test/CodeGen/PowerPC/ppc-mm-malloc-le.c
similarity index 100%
rename from clang/test/CodeGen/ppc-mm-malloc-le.c
rename to clang/test/CodeGen/PowerPC/ppc-mm-malloc-le.c
diff --git a/clang/test/CodeGen/ppc-mm-malloc.c b/clang/test/CodeGen/PowerPC/ppc-mm-malloc.c
similarity index 100%
rename from clang/test/CodeGen/ppc-mm-malloc.c
rename to clang/test/CodeGen/PowerPC/ppc-mm-malloc.c
diff --git a/clang/test/CodeGen/ppc-mma-types.c b/clang/test/CodeGen/PowerPC/ppc-mma-types.c
similarity index 100%
rename from clang/test/CodeGen/ppc-mma-types.c
rename to clang/test/CodeGen/PowerPC/ppc-mma-types.c
diff --git a/clang/test/CodeGen/ppc-mmintrin.c b/clang/test/CodeGen/PowerPC/ppc-mmintrin.c
similarity index 100%
rename from clang/test/CodeGen/ppc-mmintrin.c
rename to clang/test/CodeGen/PowerPC/ppc-mmintrin.c
diff --git a/clang/test/CodeGen/ppc-pmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-pmmintrin.c
similarity index 100%
rename from clang/test/CodeGen/ppc-pmmintrin.c
rename to clang/test/CodeGen/PowerPC/ppc-pmmintrin.c
diff --git a/clang/test/CodeGen/ppc-sfvarargs.c b/clang/test/CodeGen/PowerPC/ppc-sfvarargs.c
similarity index 100%
rename from clang/test/CodeGen/ppc-sfvarargs.c
rename to clang/test/CodeGen/PowerPC/ppc-sfvarargs.c
diff --git a/clang/test/CodeGen/ppc-signbit.c b/clang/test/CodeGen/PowerPC/ppc-signbit.c
similarity index 100%
rename from clang/test/CodeGen/ppc-signbit.c
rename to clang/test/CodeGen/PowerPC/ppc-signbit.c
diff --git a/clang/test/CodeGen/ppc-smmintrin.c b/clang/test/CodeGen/PowerPC/ppc-smmintrin.c
similarity index 100%
rename from clang/test/CodeGen/ppc-smmintrin.c
rename to clang/test/CodeGen/PowerPC/ppc-smmintrin.c
diff --git a/clang/test/CodeGen/ppc-tmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-tmmintrin.c
similarity index 100%
rename from clang/test/CodeGen/ppc-tmmintrin.c
rename to clang/test/CodeGen/PowerPC/ppc-tmmintrin.c
diff --git a/clang/test/CodeGen/ppc-varargs-struct.c b/clang/test/CodeGen/PowerPC/ppc-varargs-struct.c
similarity index 100%
rename from clang/test/CodeGen/ppc-varargs-struct.c
rename to clang/test/CodeGen/PowerPC/ppc-varargs-struct.c
diff --git a/clang/test/CodeGen/ppc-vector-compare.cpp b/clang/test/CodeGen/PowerPC/ppc-vector-compare.cpp
similarity index 100%
rename from clang/test/CodeGen/ppc-vector-compare.cpp
rename to clang/test/CodeGen/PowerPC/ppc-vector-compare.cpp
diff --git a/clang/test/CodeGen/ppc-xmmintrin.c b/clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
similarity index 100%
rename from clang/test/CodeGen/ppc-xmmintrin.c
rename to clang/test/CodeGen/PowerPC/ppc-xmmintrin.c
diff --git a/clang/test/CodeGen/ppc32-and-aix-struct-return.c b/clang/test/CodeGen/PowerPC/ppc32-and-aix-struct-return.c
similarity index 100%
rename from clang/test/CodeGen/ppc32-and-aix-struct-return.c
rename to clang/test/CodeGen/PowerPC/ppc32-and-aix-struct-return.c
diff --git a/clang/test/CodeGen/ppc32-dwarf.c b/clang/test/CodeGen/PowerPC/ppc32-dwarf.c
similarity index 100%
rename from clang/test/CodeGen/ppc32-dwarf.c
rename to clang/test/CodeGen/PowerPC/ppc32-dwarf.c
diff --git a/clang/test/CodeGen/ppc64-align-struct.c b/clang/test/CodeGen/PowerPC/ppc64-align-struct.c
similarity index 100%
rename from clang/test/CodeGen/ppc64-align-struct.c
rename to clang/test/CodeGen/PowerPC/ppc64-align-struct.c
diff --git a/clang/test/CodeGen/ppc64-complex-parms.c b/clang/test/CodeGen/PowerPC/ppc64-complex-parms.c
similarity index 100%
rename from clang/test/CodeGen/ppc64-complex-parms.c
rename to clang/test/CodeGen/PowerPC/ppc64-complex-parms.c
diff --git a/clang/test/CodeGen/ppc64-complex-return.c b/clang/test/CodeGen/PowerPC/ppc64-complex-return.c
similarity index 100%
rename from clang/test/CodeGen/ppc64-complex-return.c
rename to clang/test/CodeGen/PowerPC/ppc64-complex-return.c
diff --git a/clang/test/CodeGen/ppc64-dwarf.c b/clang/test/CodeGen/PowerPC/ppc64-dwarf.c
similarity index 100%
rename from clang/test/CodeGen/ppc64-dwarf.c
rename to clang/test/CodeGen/PowerPC/ppc64-dwarf.c
diff --git a/clang/test/CodeGen/ppc64-elf-abi.c b/clang/test/CodeGen/PowerPC/ppc64-elf-abi.c
similarity index 100%
rename from clang/test/CodeGen/ppc64-elf-abi.c
rename to clang/test/CodeGen/PowerPC/ppc64-elf-abi.c
diff --git a/clang/test/CodeGen/ppc64-extend.c b/clang/test/CodeGen/PowerPC/ppc64-extend.c
similarity index 100%
rename from clang/test/CodeGen/ppc64-extend.c
rename to clang/test/CodeGen/PowerPC/ppc64-extend.c
diff --git a/clang/test/CodeGen/ppc64-f128-builtins.c b/clang/test/CodeGen/PowerPC/ppc64-f128-builtins.c
similarity index 100%
rename from clang/test/CodeGen/ppc64-f128-builtins.c
rename to clang/test/CodeGen/PowerPC/ppc64-f128-builtins.c
diff --git a/clang/test/CodeGen/ppc64-inline-asm.c b/clang/test/CodeGen/PowerPC/ppc64-inline-asm.c
similarity index 100%
rename from clang/test/CodeGen/ppc64-inline-asm.c
rename to clang/test/CodeGen/PowerPC/ppc64-inline-asm.c
diff --git a/clang/test/CodeGen/ppc64-long-double.cpp b/clang/test/CodeGen/PowerPC/ppc64-long-double.cpp
similarity index 100%
rename from clang/test/CodeGen/ppc64-long-double.cpp
rename to clang/test/CodeGen/PowerPC/ppc64-long-double.cpp
diff --git a/clang/test/CodeGen/ppc64-soft-float.c b/clang/test/CodeGen/PowerPC/ppc64-soft-float.c
similarity index 100%
rename from clang/test/CodeGen/ppc64-soft-float.c
rename to clang/test/CodeGen/PowerPC/ppc64-soft-float.c
diff --git a/clang/test/CodeGen/ppc64-struct-onefloat.c b/clang/test/CodeGen/PowerPC/ppc64-struct-onefloat.c
similarity index 100%
rename from clang/test/CodeGen/ppc64-struct-onefloat.c
rename to clang/test/CodeGen/PowerPC/ppc64-struct-onefloat.c
diff --git a/clang/test/CodeGen/ppc64-struct-onevect.c b/clang/test/CodeGen/PowerPC/ppc64-struct-onevect.c
similarity index 100%
rename from clang/test/CodeGen/ppc64-struct-onevect.c
rename to clang/test/CodeGen/PowerPC/ppc64-struct-onevect.c
diff --git a/clang/test/CodeGen/ppc64-varargs-complex.c b/clang/test/CodeGen/PowerPC/ppc64-varargs-complex.c
similarity index 100%
rename from clang/test/CodeGen/ppc64-varargs-complex.c
rename to clang/test/CodeGen/PowerPC/ppc64-varargs-complex.c
diff --git a/clang/test/CodeGen/ppc64-vector.c b/clang/test/CodeGen/PowerPC/ppc64-vector.c
similarity index 100%
rename from clang/test/CodeGen/ppc64-vector.c
rename to clang/test/CodeGen/PowerPC/ppc64-vector.c
diff --git a/clang/test/CodeGen/ppc64le-aggregates.c b/clang/test/CodeGen/PowerPC/ppc64le-aggregates.c
similarity index 100%
rename from clang/test/CodeGen/ppc64le-aggregates.c
rename to clang/test/CodeGen/PowerPC/ppc64le-aggregates.c
diff --git a/clang/test/CodeGen/ppc64le-f128Aggregates.c b/clang/test/CodeGen/PowerPC/ppc64le-f128Aggregates.c
similarity index 100%
rename from clang/test/CodeGen/ppc64le-f128Aggregates.c
rename to clang/test/CodeGen/PowerPC/ppc64le-f128Aggregates.c
diff --git a/clang/test/CodeGen/ppc64le-varargs-complex.c b/clang/test/CodeGen/PowerPC/ppc64le-varargs-complex.c
similarity index 100%
rename from clang/test/CodeGen/ppc64le-varargs-complex.c
rename to clang/test/CodeGen/PowerPC/ppc64le-varargs-complex.c
diff --git a/clang/test/CodeGen/ppc64le-varargs-f128.c b/clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c
similarity index 100%
rename from clang/test/CodeGen/ppc64le-varargs-f128.c
rename to clang/test/CodeGen/PowerPC/ppc64le-varargs-f128.c
diff --git a/clang/test/CodeGen/vector-bool-pixel-altivec-init-no-parentheses.c b/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init-no-parentheses.c
similarity index 100%
rename from clang/test/CodeGen/vector-bool-pixel-altivec-init-no-parentheses.c
rename to clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init-no-parentheses.c
diff --git a/clang/test/CodeGen/vector-bool-pixel-altivec-init.c b/clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init.c
similarity index 100%
rename from clang/test/CodeGen/vector-bool-pixel-altivec-init.c
rename to clang/test/CodeGen/PowerPC/vector-bool-pixel-altivec-init.c
diff --git a/clang/test/CodeGen/vector-compat-pixel-bool-ternary.c b/clang/test/CodeGen/PowerPC/vector-compat-pixel-bool-ternary.c
similarity index 100%
rename from clang/test/CodeGen/vector-compat-pixel-bool-ternary.c
rename to clang/test/CodeGen/PowerPC/vector-compat-pixel-bool-ternary.c
diff --git a/clang/test/CodeGen/vector-compat-pixel-bool.c b/clang/test/CodeGen/PowerPC/vector-compat-pixel-bool.c
similarity index 100%
rename from clang/test/CodeGen/vector-compat-pixel-bool.c
rename to clang/test/CodeGen/PowerPC/vector-compat-pixel-bool.c
diff --git a/clang/test/CodeGen/vector-compat-ternary.c b/clang/test/CodeGen/PowerPC/vector-compat-ternary.c
similarity index 100%
rename from clang/test/CodeGen/vector-compat-ternary.c
rename to clang/test/CodeGen/PowerPC/vector-compat-ternary.c
diff --git a/clang/test/CodeGen/vector-compat.c b/clang/test/CodeGen/PowerPC/vector-compat.c
similarity index 100%
rename from clang/test/CodeGen/vector-compat.c
rename to clang/test/CodeGen/PowerPC/vector-compat.c
diff --git a/clang/test/CodeGen/xcoff-comdat.cpp b/clang/test/CodeGen/PowerPC/xcoff-comdat.cpp
similarity index 100%
rename from clang/test/CodeGen/xcoff-comdat.cpp
rename to clang/test/CodeGen/PowerPC/xcoff-comdat.cpp

From 571d6a7120c2b637db2bb46fe3029f9d8576ab86 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 25 Jan 2022 23:33:40 -0800
Subject: [PATCH 659/946] [ELF] Optimize .relr.dyn to not grow
 vector<uint64_t>. NFC

---
 lld/ELF/SyntheticSections.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index 9f0cbf9e84884..f442acaafe85d 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -2024,14 +2024,14 @@ template <class ELFT> bool RelrSection<ELFT>::updateAllocSize() {
   const size_t nBits = wordsize * 8 - 1;
 
   // Get offsets for all relative relocations and sort them.
-  std::vector<uint64_t> offsets;
-  for (const RelativeReloc &rel : relocs)
-    offsets.push_back(rel.getOffset());
-  llvm::sort(offsets);
+  std::unique_ptr<uint64_t[]> offsets(new uint64_t[relocs.size()]);
+  for (auto it : llvm::enumerate(relocs))
+    offsets[it.index()] = it.value().getOffset();
+  std::sort(offsets.get(), offsets.get() + relocs.size());
 
   // For each leading relocation, find following ones that can be folded
   // as a bitmap and fold them.
-  for (size_t i = 0, e = offsets.size(); i < e;) {
+  for (size_t i = 0, e = relocs.size(); i != e;) {
     // Add a leading relocation.
     relrRelocs.push_back(Elf_Relr(offsets[i]));
     uint64_t base = offsets[i] + wordsize;

From 74acd744d35eebe4a6716026f5f3e9ee9bf15411 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Tue, 25 Jan 2022 22:53:20 +0100
Subject: [PATCH 660/946] [flang] Change getLoweredName to
 translateNameToFrontendMangledName()

getLoweredName() is not a well suited name change it to
translateNameToFrontendMangledName()

Reviewed By: schweitz

Differential Revision: https://reviews.llvm.org/D118140
---
 flang/include/flang/Optimizer/Dialect/FIRTypes.td | 2 +-
 flang/lib/Optimizer/CodeGen/CodeGen.cpp           | 2 +-
 flang/lib/Optimizer/Dialect/FIRType.cpp           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIRTypes.td b/flang/include/flang/Optimizer/Dialect/FIRTypes.td
index baf9afe664976..62e794cc93e25 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRTypes.td
+++ b/flang/include/flang/Optimizer/Dialect/FIRTypes.td
@@ -324,7 +324,7 @@ def fir_RecordType : FIR_Type<"Record", "type"> {
     void finalize(llvm::ArrayRef<TypePair> lenPList,
                   llvm::ArrayRef<TypePair> typeList);
     
-    std::string getLoweredName() const;
+    std::string translateNameToFrontendMangledName() const;
 
     detail::RecordTypeStorage const *uniqueKey() const;
   }];
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 40d6d2017b2fa..9072cf447e1d7 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -1640,7 +1640,7 @@ struct EmboxCommonConversion : public FIROpConversion<OP> {
   mlir::Value
   getTypeDescriptor(BOX box, mlir::ConversionPatternRewriter &rewriter,
                     mlir::Location loc, fir::RecordType recType) const {
-    std::string name = recType.getLoweredName();
+    std::string name = recType.translateNameToFrontendMangledName();
     auto module = box->template getParentOfType<mlir::ModuleOp>();
     if (auto global = module.template lookupSymbol<fir::GlobalOp>(name)) {
       auto ty = mlir::LLVM::LLVMPointerType::get(
diff --git a/flang/lib/Optimizer/Dialect/FIRType.cpp b/flang/lib/Optimizer/Dialect/FIRType.cpp
index 0d087bccf57d3..7095af8a71704 100644
--- a/flang/lib/Optimizer/Dialect/FIRType.cpp
+++ b/flang/lib/Optimizer/Dialect/FIRType.cpp
@@ -642,7 +642,7 @@ unsigned fir::RecordType::getFieldIndex(llvm::StringRef ident) {
   return std::numeric_limits<unsigned>::max();
 }
 
-std::string fir::RecordType::getLoweredName() const {
+std::string fir::RecordType::translateNameToFrontendMangledName() const {
   auto split = getName().split('T');
   std::string name = (split.first + "E.dt." + split.second).str();
   return name;

From 3704abaa166bec865f56f48337a1732eab77dc68 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Tue, 25 Jan 2022 23:53:23 -0800
Subject: [PATCH 661/946] [ELF] --gdb-index: replace vector<uint8_t> with
 unique_ptr<uint8_t[]>. NFC

---
 lld/ELF/SyntheticSections.cpp | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp
index f442acaafe85d..f125e3f0a51a8 100644
--- a/lld/ELF/SyntheticSections.cpp
+++ b/lld/ELF/SyntheticSections.cpp
@@ -2806,7 +2806,7 @@ createSymbols(ArrayRef<std::vector<GdbIndexSection::NameAttrEntry>> nameAttrs,
 
   // For each chunk, compute the number of compilation units preceding it.
   uint32_t cuIdx = 0;
-  std::vector<uint32_t> cuIdxs(chunks.size());
+  std::unique_ptr<uint32_t[]> cuIdxs(new uint32_t[chunks.size()]);
   for (uint32_t i = 0, e = chunks.size(); i != e; ++i) {
     cuIdxs[i] = cuIdx;
     cuIdx += chunks[i].compilationUnits.size();
@@ -2822,11 +2822,13 @@ createSymbols(ArrayRef<std::vector<GdbIndexSection::NameAttrEntry>> nameAttrs,
                        numShards));
 
   // A sharded map to uniquify symbols by name.
-  std::vector<DenseMap<CachedHashStringRef, size_t>> map(numShards);
+  auto map =
+      std::make_unique<DenseMap<CachedHashStringRef, size_t>[]>(numShards);
   size_t shift = 32 - countTrailingZeros(numShards);
 
   // Instantiate GdbSymbols while uniqufying them by name.
-  std::vector<std::vector<GdbSymbol>> symbols(numShards);
+  auto symbols = std::make_unique<std::vector<GdbSymbol>[]>(numShards);
+
   parallelForEachN(0, concurrency, [&](size_t threadId) {
     uint32_t i = 0;
     for (ArrayRef<NameAttrEntry> entries : nameAttrs) {
@@ -2850,14 +2852,15 @@ createSymbols(ArrayRef<std::vector<GdbIndexSection::NameAttrEntry>> nameAttrs,
   });
 
   size_t numSymbols = 0;
-  for (ArrayRef<GdbSymbol> v : symbols)
+  for (ArrayRef<GdbSymbol> v : makeArrayRef(symbols.get(), numShards))
     numSymbols += v.size();
 
   // The return type is a flattened vector, so we'll copy each vector
   // contents to Ret.
   std::vector<GdbSymbol> ret;
   ret.reserve(numSymbols);
-  for (std::vector<GdbSymbol> &vec : symbols)
+  for (std::vector<GdbSymbol> &vec :
+       makeMutableArrayRef(symbols.get(), numShards))
     for (GdbSymbol &sym : vec)
       ret.push_back(std::move(sym));
 

From c816be2026af3641f9b648482c48dd1f18a73dd1 Mon Sep 17 00:00:00 2001
From: Kristof Beyls <kristof.beyls@arm.com>
Date: Wed, 26 Jan 2022 09:13:22 +0100
Subject: [PATCH 662/946] Add release note for aarch64-none-elf driver change.

---
 clang/docs/ReleaseNotes.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 2272d7197ac57..6a9b046a1427d 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -275,6 +275,9 @@ Arm and AArch64 Support in Clang
   architecture features, but will enable certain optimizations specific to
   Cortex-A57 CPUs and enable the use of a more accurate scheduling model.
 
+- The --aarch64-none-elf target now uses the BareMetal driver rather than the
+  GNU driver. Programs that depend on clang invoking GCC as the linker driver
+  should use GCC as the linker in the build system.
 
 Floating Point Support in Clang
 -------------------------------

From 72e29caf039fd81bc6948e16d6f71d1581615469 Mon Sep 17 00:00:00 2001
From: Marek Kurdej <marek.kurdej+llvm.org@gmail.com>
Date: Wed, 26 Jan 2022 09:11:21 +0100
Subject: [PATCH 663/946] [clang-format] Fix regression in parsing pointers to
 arrays.

Fixes https://github.com/llvm/llvm-project/issues/53293.

After commit 5c2e7c9, the code:
```
template <> struct S : Template<int (*)[]> {};
```
was misformatted as:
```
template <> struct S : Template<int (*)[]>{};
```

Reviewed By: MyDeveloperDay, HazardyKnusperkeks, owenpan

Differential Revision: https://reviews.llvm.org/D118106
---
 clang/lib/Format/UnwrappedLineParser.cpp | 11 +++++++++--
 clang/unittests/Format/FormatTest.cpp    |  2 ++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index 96d227b7fe763..ff3e791822c47 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -3081,8 +3081,15 @@ void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
         if (!tryToParseBracedList())
           break;
       }
-      if (FormatTok->is(tok::l_square) && !tryToParseLambda())
-        break;
+      if (FormatTok->is(tok::l_square)) {
+        FormatToken *Previous = FormatTok->Previous;
+        if (!Previous || Previous->isNot(tok::r_paren)) {
+          // Don't try parsing a lambda if we had a closing parenthesis before,
+          // it was probably a pointer to an array: int (*)[].
+          if (!tryToParseLambda())
+            break;
+        }
+      }
       if (FormatTok->Tok.is(tok::semi))
         return;
       if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index c4e0e14ce5bcd..c45869f16ad29 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -23483,6 +23483,8 @@ TEST_F(FormatTest, EmptyShortBlock) {
 TEST_F(FormatTest, ShortTemplatedArgumentLists) {
   auto Style = getLLVMStyle();
 
+  verifyFormat("template <> struct S : Template<int (*)[]> {};\n", Style);
+  verifyFormat("template <> struct S : Template<int (*)[10]> {};\n", Style);
   verifyFormat("struct Y : X<[] { return 0; }> {};", Style);
   verifyFormat("struct Y<[] { return 0; }> {};", Style);
 

From 18778b8863522e3043a0134eb31cd60127bd962b Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Wed, 26 Jan 2022 03:35:33 +0900
Subject: [PATCH 664/946] [mlir] Fix merging of delayed registrations during
 DialectRegistry::appendTo

The existing implementation called DenseMap::insert, which is a no-op if the map already contains an entry with the same key.

Differential Revision: https://reviews.llvm.org/D118165
---
 mlir/include/mlir/IR/Dialect.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/IR/Dialect.h b/mlir/include/mlir/IR/Dialect.h
index 0ee8057d1e666..7fb298c1b9425 100644
--- a/mlir/include/mlir/IR/Dialect.h
+++ b/mlir/include/mlir/IR/Dialect.h
@@ -331,7 +331,22 @@ class DialectRegistry {
       destination.insert(nameAndRegistrationIt.second.first,
                          nameAndRegistrationIt.first,
                          nameAndRegistrationIt.second.second);
-    destination.interfaces.insert(interfaces.begin(), interfaces.end());
+    // Merge interfaces.
+    for (auto it : interfaces) {
+      TypeID dialect = it.first;
+      auto destInterfaces = destination.interfaces.find(dialect);
+      if (destInterfaces == destination.interfaces.end()) {
+        destination.interfaces[dialect] = it.second;
+        continue;
+      }
+      // The destination already has delayed interface registrations for this
+      // dialect. Merge registrations into the destination registry.
+      destInterfaces->second.dialectInterfaces.append(
+          it.second.dialectInterfaces.begin(),
+          it.second.dialectInterfaces.end());
+      destInterfaces->second.objectInterfaces.append(
+          it.second.objectInterfaces.begin(), it.second.objectInterfaces.end());
+    }
   }
 
   /// Return the names of dialects known to this registry.

From 267711e38bc7522abd2488a76e998a3a494d51d6 Mon Sep 17 00:00:00 2001
From: jacquesguan <Jianjian.Guan@streamcomputing.com>
Date: Wed, 26 Jan 2022 15:48:51 +0800
Subject: [PATCH 665/946] [RISCV] Fix support of vlen = 64.

In the Zve* extensions, the vlen could be 64. This patch change the vlen constraint of low bound to 64.

Differential Revision: https://reviews.llvm.org/D118217
---
 llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index de6b0df2df8a8..976e4ccb14228 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -149,15 +149,16 @@ unsigned RISCVSubtarget::getMaxRVVVectorSizeInBits() const {
                        "than the Zvl*b limitation");
 
   // FIXME: Change to >= 32 when VLEN = 32 is supported
-  assert(RVVVectorBitsMax >= 64 && RVVVectorBitsMax <= 65536 &&
-         isPowerOf2_32(RVVVectorBitsMax) &&
-         "V extension requires vector length to be in the range of 128 to "
-         "65536 and a power of 2!");
+  assert(
+      RVVVectorBitsMax >= 64 && RVVVectorBitsMax <= 65536 &&
+      isPowerOf2_32(RVVVectorBitsMax) &&
+      "V or Zve* extension requires vector length to be in the range of 64 to "
+      "65536 and a power of 2!");
   assert(RVVVectorBitsMax >= RVVVectorBitsMin &&
          "Minimum V extension vector length should not be larger than its "
          "maximum!");
   unsigned Max = std::max(RVVVectorBitsMin, RVVVectorBitsMax);
-  return PowerOf2Floor((Max < 128 || Max > 65536) ? 0 : Max);
+  return PowerOf2Floor((Max < 64 || Max > 65536) ? 0 : Max);
 }
 
 unsigned RISCVSubtarget::getMinRVVVectorSizeInBits() const {
@@ -170,18 +171,19 @@ unsigned RISCVSubtarget::getMinRVVVectorSizeInBits() const {
   assert(hasVInstructions() &&
          "Tried to get vector length without Zve or V extension support!");
   // FIXME: Change to >= 32 when VLEN = 32 is supported
-  assert((RVVVectorBitsMin == 0 ||
-          (RVVVectorBitsMin >= 64 && RVVVectorBitsMax <= 65536 &&
-           isPowerOf2_32(RVVVectorBitsMin))) &&
-         "V extension requires vector length to be in the range of 128 to "
-         "65536 and a power of 2!");
+  assert(
+      (RVVVectorBitsMin == 0 ||
+       (RVVVectorBitsMin >= 64 && RVVVectorBitsMin <= 65536 &&
+        isPowerOf2_32(RVVVectorBitsMin))) &&
+      "V or Zve* extension requires vector length to be in the range of 64 to "
+      "65536 and a power of 2!");
   assert((RVVVectorBitsMax >= RVVVectorBitsMin || RVVVectorBitsMax == 0) &&
          "Minimum V extension vector length should not be larger than its "
          "maximum!");
   unsigned Min = RVVVectorBitsMin;
   if (RVVVectorBitsMax != 0)
     Min = std::min(RVVVectorBitsMin, RVVVectorBitsMax);
-  return PowerOf2Floor((Min < 128 || Min > 65536) ? 0 : Min);
+  return PowerOf2Floor((Min < 64 || Min > 65536) ? 0 : Min);
 }
 
 unsigned RISCVSubtarget::getMaxLMULForFixedLengthVectors() const {

From d3f5ef241ae3841b6f9b70bd138a9252ed3a002e Mon Sep 17 00:00:00 2001
From: Yury Gribov <tetra2005@gmail.com>
Date: Fri, 21 Jan 2022 17:24:32 +0900
Subject: [PATCH 666/946] Add ieee_is_normal/ieee_is_negative to
 ieee_arithmetic module.

---
 flang/include/flang/Evaluate/real.h |  3 +++
 flang/lib/Evaluate/fold-logical.cpp | 14 +++++++++++
 flang/lib/Evaluate/intrinsics.cpp   |  2 ++
 flang/module/__fortran_builtins.f90 |  3 ++-
 flang/module/ieee_arithmetic.f90    | 36 +++++++++++++++++++++++++++++
 5 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/flang/include/flang/Evaluate/real.h b/flang/include/flang/Evaluate/real.h
index 77c25c4feae17..01634022d5fdb 100644
--- a/flang/include/flang/Evaluate/real.h
+++ b/flang/include/flang/Evaluate/real.h
@@ -88,6 +88,9 @@ class Real : public common::RealDetails<PREC> {
   constexpr bool IsSubnormal() const {
     return Exponent() == 0 && !GetSignificand().IsZero();
   }
+  constexpr bool IsNormal() const {
+    return !(IsInfinite() || IsNotANumber() || IsSubnormal());
+  }
 
   constexpr Real ABS() const { // non-arithmetic, no flags returned
     return {word_.IBCLR(bits - 1)};
diff --git a/flang/lib/Evaluate/fold-logical.cpp b/flang/lib/Evaluate/fold-logical.cpp
index 4ba4ce8387609..34a7639ba32d1 100644
--- a/flang/lib/Evaluate/fold-logical.cpp
+++ b/flang/lib/Evaluate/fold-logical.cpp
@@ -118,6 +118,20 @@ Expr<Type<TypeCategory::Logical, KIND>> FoldIntrinsicFunction(
         ScalarFunc<T, DefaultReal>([](const Scalar<DefaultReal> &x) {
           return Scalar<T>{x.IsNotANumber()};
         }));
+  } else if (name == "__builtin_ieee_is_negative") {
+    auto restorer{context.messages().DiscardMessages()};
+    using DefaultReal = Type<TypeCategory::Real, 4>;
+    return FoldElementalIntrinsic<T, DefaultReal>(context, std::move(funcRef),
+        ScalarFunc<T, DefaultReal>([](const Scalar<DefaultReal> &x) {
+          return Scalar<T>{x.IsNegative()};
+        }));
+  } else if (name == "__builtin_ieee_is_normal") {
+    auto restorer{context.messages().DiscardMessages()};
+    using DefaultReal = Type<TypeCategory::Real, 4>;
+    return FoldElementalIntrinsic<T, DefaultReal>(context, std::move(funcRef),
+        ScalarFunc<T, DefaultReal>([](const Scalar<DefaultReal> &x) {
+          return Scalar<T>{x.IsNormal()};
+        }));
   } else if (name == "is_contiguous") {
     if (args.at(0)) {
       if (auto *expr{args[0]->UnwrapExpr()}) {
diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp
index ae7e5e9bf7885..9dca8b4cc60a4 100644
--- a/flang/lib/Evaluate/intrinsics.cpp
+++ b/flang/lib/Evaluate/intrinsics.cpp
@@ -793,6 +793,8 @@ static const IntrinsicInterface genericIntrinsicFunction[]{
             DefaultingKIND},
         KINDInt},
     {"__builtin_ieee_is_nan", {{"a", AnyFloating}}, DefaultLogical},
+    {"__builtin_ieee_is_normal", {{"a", AnyFloating}}, DefaultLogical},
+    {"__builtin_ieee_is_negative", {{"a", AnyFloating}}, DefaultLogical},
     {"__builtin_ieee_next_after", {{"x", SameReal}, {"y", AnyReal}}, SameReal},
     {"__builtin_ieee_next_down", {{"x", SameReal}}, SameReal},
     {"__builtin_ieee_next_up", {{"x", SameReal}}, SameReal},
diff --git a/flang/module/__fortran_builtins.f90 b/flang/module/__fortran_builtins.f90
index 5d7a0008ec361..4a4c55e44e056 100644
--- a/flang/module/__fortran_builtins.f90
+++ b/flang/module/__fortran_builtins.f90
@@ -41,7 +41,8 @@
 
   procedure(type(__builtin_c_ptr)) :: __builtin_c_loc
 
-  intrinsic :: __builtin_ieee_is_nan
+  intrinsic :: __builtin_ieee_is_nan, __builtin_ieee_is_normal, &
+    __builtin_ieee_is_negative
   intrinsic :: __builtin_ieee_next_after, __builtin_ieee_next_down, &
     __builtin_ieee_next_up
   intrinsic :: scale ! for ieee_scalb
diff --git a/flang/module/ieee_arithmetic.f90 b/flang/module/ieee_arithmetic.f90
index 1d7c32e7e1167..45d3cc3f02a1e 100644
--- a/flang/module/ieee_arithmetic.f90
+++ b/flang/module/ieee_arithmetic.f90
@@ -11,6 +11,8 @@ module ieee_arithmetic
 
   use __Fortran_builtins, only: &
     ieee_is_nan => __builtin_ieee_is_nan, &
+    ieee_is_normal => __builtin_ieee_is_normal, &
+    ieee_is_negative => __builtin_ieee_is_negative, &
     ieee_next_after => __builtin_ieee_next_after, &
     ieee_next_down => __builtin_ieee_next_down, &
     ieee_next_up => __builtin_ieee_next_up, &
@@ -235,6 +237,40 @@ elemental function ieee_is_finite_a##KIND(x) result(res); \
   _IS_FINITE(16)
 #undef _IS_FINITE
 
+#define _IS_NEGATIVE(KIND) \
+  elemental function ieee_is_negative_a##KIND(x) result(res); \
+    real(kind=KIND), intent(in) :: x; \
+    logical :: res; \
+    type(ieee_class_type) :: classification; \
+    classification = ieee_class(x); \
+    res = classification == ieee_negative_zero .or. classification == ieee_negative_denormal \
+     .or. classification == ieee_negative_normal .or. classification == ieee_negative_inf; \
+  end function
+  _IS_NEGATIVE(2)
+  _IS_NEGATIVE(3)
+  _IS_NEGATIVE(4)
+  _IS_NEGATIVE(8)
+  _IS_NEGATIVE(10)
+  _IS_NEGATIVE(16)
+#undef _IS_NEGATIVE
+
+#define _IS_NORMAL(KIND) \
+  elemental function ieee_is_normal_a##KIND(x) result(res); \
+    real(kind=KIND), intent(in) :: x; \
+    logical :: res; \
+    type(ieee_class_type) :: classification; \
+    classification = ieee_class(x); \
+    res = classification == ieee_negative_normal .or. classification == ieee_positive_normal \
+      .or. classification == ieee_negative_zero .or. classification == ieee_positive_zero; \
+  end function
+  _IS_NORMAL(2)
+  _IS_NORMAL(3)
+  _IS_NORMAL(4)
+  _IS_NORMAL(8)
+  _IS_NORMAL(10)
+  _IS_NORMAL(16)
+#undef _IS_NORMAL
+
 ! TODO: handle edge cases from 17.11.31
 #define _REM(XKIND,YKIND) \
   elemental function ieee_rem_a##XKIND##_a##YKIND(x, y) result(res); \

From a5cc8f6d5e49c957a3d37670beea733941b003f9 Mon Sep 17 00:00:00 2001
From: Yury Gribov <tetra2005@gmail.com>
Date: Wed, 19 Jan 2022 19:48:54 +0900
Subject: [PATCH 667/946] [flang] Add a custom target for the "flang" wrapper
 script.

Differential Revision: https://reviews.llvm.org/D117768
---
 flang/tools/f18/CMakeLists.txt | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 8c09842359e77..aa77676c6229f 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -45,8 +45,10 @@ add_custom_target(module_files ALL DEPENDS ${MODULE_FILES})
 
 # This flang shell script will only work in a POSIX shell.
 if (NOT WIN32)
-  file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/flang
-    DESTINATION ${CMAKE_BINARY_DIR}/bin
-    FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE)
+  add_custom_command(
+    OUTPUT ${CMAKE_BINARY_DIR}/bin/flang
+    DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/flang
+    COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/flang ${CMAKE_BINARY_DIR}/bin)
+  add_custom_target(flang ALL DEPENDS ${CMAKE_BINARY_DIR}/bin/flang)
   install(PROGRAMS ${CMAKE_BINARY_DIR}/bin/flang DESTINATION "${CMAKE_INSTALL_BINDIR}")
 endif()

From 57356d6bb72adb1e85eb8b9ac5d9321b84e22fe6 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Wed, 26 Jan 2022 08:37:44 +0000
Subject: [PATCH 668/946] [DAG] Create fptoui.sat from clamped fptoui

This is the unsigned variant of D111976, where we convert a clamped
fptoui to a fptoui.sat. Because we are unsigned, the condition this time
is only UMIN of UINT_MAX. Similarly to D111976 it handles ISD::UMIN,
ISD::SETCC/ISD::SELECT, ISD::VSELECT or ISD::SELECT_CC nodes.

This especially helps on ARM/AArch64 where the vcvt instructions
naturally saturate the result.

Differential Revision: https://reviews.llvm.org/D114964
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  43 +++
 llvm/test/CodeGen/AArch64/fpclamptosat.ll     |  44 +---
 llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll | 216 ++++------------
 llvm/test/CodeGen/ARM/fpclamptosat.ll         | 113 ++++----
 llvm/test/CodeGen/RISCV/fpclamptosat.ll       | 148 ++++++-----
 .../CodeGen/Thumb2/mve-fpclamptosat_vec.ll    | 108 +-------
 llvm/test/CodeGen/WebAssembly/fpclamptosat.ll |  60 +----
 .../CodeGen/WebAssembly/fpclamptosat_vec.ll   | 244 ++----------------
 8 files changed, 277 insertions(+), 699 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 082e2508aa4ba..7e3dbb91f514c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -4892,6 +4892,42 @@ static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
                   : DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0));
 }
 
+static SDValue PerformUMinFpToSatCombine(SDValue N0, SDValue N1, SDValue N2,
+                                         SDValue N3, ISD::CondCode CC,
+                                         SelectionDAG &DAG) {
+  // We are looking for UMIN(FPTOUI(X), (2^n)-1), which may have come via a
+  // select/vselect/select_cc. The two operands pairs for the select (N2/N3) may
+  // be truncated versions of the the setcc (N0/N1).
+  if ((N0 != N2 &&
+       (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) ||
+      N0.getOpcode() != ISD::FP_TO_UINT || CC != ISD::SETULT)
+    return SDValue();
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
+  ConstantSDNode *N3C = isConstOrConstSplat(N3);
+  if (!N1C || !N3C)
+    return SDValue();
+  const APInt &C1 = N1C->getAPIntValue();
+  const APInt &C3 = N3C->getAPIntValue();
+  if (!(C1 + 1).isPowerOf2() || C1.getBitWidth() < C3.getBitWidth() ||
+      C1 != C3.zextOrSelf(C1.getBitWidth()))
+    return SDValue();
+
+  unsigned BW = (C1 + 1).exactLogBase2();
+  EVT FPVT = N0.getOperand(0).getValueType();
+  EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW);
+  if (FPVT.isVector())
+    NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT,
+                             FPVT.getVectorElementCount());
+  if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat(ISD::FP_TO_UINT_SAT,
+                                                        FPVT, NewVT))
+    return SDValue();
+
+  SDValue Sat =
+      DAG.getNode(ISD::FP_TO_UINT_SAT, SDLoc(N0), NewVT, N0.getOperand(0),
+                  DAG.getValueType(NewVT.getScalarType()));
+  return DAG.getZExtOrTrunc(Sat, SDLoc(N0), N3.getValueType());
+}
+
 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -4934,6 +4970,9 @@ SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
     if (SDValue S = PerformMinMaxFpToSatCombine(
             N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG))
       return S;
+  if (Opcode == ISD::UMIN)
+    if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N0, N1, ISD::SETULT, DAG))
+      return S;
 
   // Simplify the operands using demanded-bits information.
   if (SimplifyDemandedBits(SDValue(N, 0)))
@@ -10314,6 +10353,8 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
 
     if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
       return S;
+    if (SDValue S = PerformUMinFpToSatCombine(LHS, RHS, N1, N2, CC, DAG))
+      return S;
 
     // If this select has a condition (setcc) with narrower operands than the
     // select, try to widen the compare to match the select width.
@@ -23372,6 +23413,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 
   if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG))
     return S;
+  if (SDValue S = PerformUMinFpToSatCombine(N0, N1, N2, N3, CC, DAG))
+    return S;
 
   return SDValue();
 }
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat.ll b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
index ca399acc54488..829b4ddbcc5c6 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat.ll
@@ -22,10 +22,7 @@ entry:
 define i32 @utest_f64i32(double %x) {
 ; CHECK-LABEL: utest_f64i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu x8, d0
-; CHECK-NEXT:    mov w9, #-1
-; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    csinv w0, w8, wzr, lo
+; CHECK-NEXT:    fcvtzu w0, d0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui double %x to i64
@@ -68,10 +65,7 @@ entry:
 define i32 @utest_f32i32(float %x) {
 ; CHECK-LABEL: utest_f32i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu x8, s0
-; CHECK-NEXT:    mov w9, #-1
-; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    csinv w0, w8, wzr, lo
+; CHECK-NEXT:    fcvtzu w0, s0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui float %x to i64
@@ -121,18 +115,12 @@ define i32 @utesth_f16i32(half %x) {
 ; CHECK-CVT-LABEL: utesth_f16i32:
 ; CHECK-CVT:       // %bb.0: // %entry
 ; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w9, #-1
-; CHECK-CVT-NEXT:    fcvtzu x8, s0
-; CHECK-CVT-NEXT:    cmp x8, x9
-; CHECK-CVT-NEXT:    csinv w0, w8, wzr, lo
+; CHECK-CVT-NEXT:    fcvtzu w0, s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: utesth_f16i32:
 ; CHECK-FP16:       // %bb.0: // %entry
-; CHECK-FP16-NEXT:    fcvtzu x8, h0
-; CHECK-FP16-NEXT:    mov w9, #-1
-; CHECK-FP16-NEXT:    cmp x8, x9
-; CHECK-FP16-NEXT:    csinv w0, w8, wzr, lo
+; CHECK-FP16-NEXT:    fcvtzu w0, h0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %conv = fptoui half %x to i64
@@ -581,11 +569,7 @@ entry:
 define i32 @utest_f64i32_mm(double %x) {
 ; CHECK-LABEL: utest_f64i32_mm:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu x8, d0
-; CHECK-NEXT:    mov w9, #-1
-; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    csel x0, x8, x9, lo
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    fcvtzu w0, d0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui double %x to i64
@@ -623,11 +607,7 @@ entry:
 define i32 @utest_f32i32_mm(float %x) {
 ; CHECK-LABEL: utest_f32i32_mm:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu x8, s0
-; CHECK-NEXT:    mov w9, #-1
-; CHECK-NEXT:    cmp x8, x9
-; CHECK-NEXT:    csel x0, x8, x9, lo
-; CHECK-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT:    fcvtzu w0, s0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui float %x to i64
@@ -672,20 +652,12 @@ define i32 @utesth_f16i32_mm(half %x) {
 ; CHECK-CVT-LABEL: utesth_f16i32_mm:
 ; CHECK-CVT:       // %bb.0: // %entry
 ; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    mov w9, #-1
-; CHECK-CVT-NEXT:    fcvtzu x8, s0
-; CHECK-CVT-NEXT:    cmp x8, x9
-; CHECK-CVT-NEXT:    csel x0, x8, x9, lo
-; CHECK-CVT-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-CVT-NEXT:    fcvtzu w0, s0
 ; CHECK-CVT-NEXT:    ret
 ;
 ; CHECK-FP16-LABEL: utesth_f16i32_mm:
 ; CHECK-FP16:       // %bb.0: // %entry
-; CHECK-FP16-NEXT:    fcvtzu x8, h0
-; CHECK-FP16-NEXT:    mov w9, #-1
-; CHECK-FP16-NEXT:    cmp x8, x9
-; CHECK-FP16-NEXT:    csel x0, x8, x9, lo
-; CHECK-FP16-NEXT:    // kill: def $w0 killed $w0 killed $x0
+; CHECK-FP16-NEXT:    fcvtzu w0, h0
 ; CHECK-FP16-NEXT:    ret
 entry:
   %conv = fptoui half %x to i64
diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
index b66ba95daf80e..5d06f762a7710 100644
--- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll
@@ -27,12 +27,12 @@ entry:
 define <2 x i32> @utest_f64i32(<2 x double> %x) {
 ; CHECK-LABEL: utest_f64i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.2d, #0x000000ffffffff
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    cmhi v1.2d, v1.2d, v0.2d
-; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    orn v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    mov d1, v0.d[1]
+; CHECK-NEXT:    fcvtzu w8, d0
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    fcvtzu w8, d1
+; CHECK-NEXT:    mov v0.s[1], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <2 x double> %x to <2 x i64>
@@ -80,18 +80,7 @@ entry:
 define <4 x i32> @utest_f32i32(<4 x float> %x) {
 ; CHECK-LABEL: utest_f32i32:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v2.2d, v0.4s
-; CHECK-NEXT:    fcvtl v0.2d, v0.2s
-; CHECK-NEXT:    movi v1.2d, #0x000000ffffffff
-; CHECK-NEXT:    fcvtzu v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    cmhi v3.2d, v1.2d, v2.2d
-; CHECK-NEXT:    cmhi v1.2d, v1.2d, v0.2d
-; CHECK-NEXT:    and v2.16b, v2.16b, v3.16b
-; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    orn v2.16b, v2.16b, v3.16b
-; CHECK-NEXT:    orn v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <4 x float> %x to <4 x i64>
@@ -133,57 +122,11 @@ entry:
 }
 
 define <4 x i32> @utesth_f16i32(<4 x half> %x) {
-; CHECK-CVT-LABEL: utesth_f16i32:
-; CHECK-CVT:       // %bb.0: // %entry
-; CHECK-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT:    mov h2, v0.h[2]
-; CHECK-CVT-NEXT:    mov h3, v0.h[3]
-; CHECK-CVT-NEXT:    mov h4, v0.h[1]
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    movi v1.2d, #0x000000ffffffff
-; CHECK-CVT-NEXT:    fcvt s2, h2
-; CHECK-CVT-NEXT:    fcvt s3, h3
-; CHECK-CVT-NEXT:    fcvtzu x9, s0
-; CHECK-CVT-NEXT:    fcvtzu x8, s2
-; CHECK-CVT-NEXT:    fcvt s2, h4
-; CHECK-CVT-NEXT:    fmov d0, x8
-; CHECK-CVT-NEXT:    fcvtzu x8, s3
-; CHECK-CVT-NEXT:    fmov d3, x9
-; CHECK-CVT-NEXT:    fcvtzu x9, s2
-; CHECK-CVT-NEXT:    mov v0.d[1], x8
-; CHECK-CVT-NEXT:    mov v3.d[1], x9
-; CHECK-CVT-NEXT:    cmhi v2.2d, v1.2d, v0.2d
-; CHECK-CVT-NEXT:    cmhi v1.2d, v1.2d, v3.2d
-; CHECK-CVT-NEXT:    and v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT:    and v3.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT:    orn v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT:    orn v1.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT:    uzp1 v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: utesth_f16i32:
-; CHECK-FP16:       // %bb.0: // %entry
-; CHECK-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-FP16-NEXT:    mov h3, v0.h[3]
-; CHECK-FP16-NEXT:    fcvtzu x9, h0
-; CHECK-FP16-NEXT:    movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-NEXT:    fcvtzu x8, h2
-; CHECK-FP16-NEXT:    mov h2, v0.h[1]
-; CHECK-FP16-NEXT:    fmov d0, x8
-; CHECK-FP16-NEXT:    fcvtzu x8, h3
-; CHECK-FP16-NEXT:    fmov d3, x9
-; CHECK-FP16-NEXT:    fcvtzu x9, h2
-; CHECK-FP16-NEXT:    mov v0.d[1], x8
-; CHECK-FP16-NEXT:    mov v3.d[1], x9
-; CHECK-FP16-NEXT:    cmhi v2.2d, v1.2d, v0.2d
-; CHECK-FP16-NEXT:    cmhi v1.2d, v1.2d, v3.2d
-; CHECK-FP16-NEXT:    and v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT:    and v3.16b, v3.16b, v1.16b
-; CHECK-FP16-NEXT:    orn v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT:    orn v1.16b, v3.16b, v1.16b
-; CHECK-FP16-NEXT:    uzp1 v0.4s, v1.4s, v0.4s
-; CHECK-FP16-NEXT:    ret
+; CHECK-LABEL: utesth_f16i32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <4 x half> %x to <4 x i64>
   %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -338,17 +281,22 @@ entry:
 }
 
 define <8 x i16> @utesth_f16i16(<8 x half> %x) {
-; CHECK-LABEL: utesth_f16i16:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    movi v1.2d, #0x00ffff0000ffff
-; CHECK-NEXT:    fcvtzu v2.4s, v2.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    umin v2.4s, v2.4s, v1.4s
-; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-NEXT:    ret
+; CHECK-CVT-LABEL: utesth_f16i16:
+; CHECK-CVT:       // %bb.0: // %entry
+; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT:    movi v1.2d, #0x00ffff0000ffff
+; CHECK-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-NEXT:    ret
+;
+; CHECK-FP16-LABEL: utesth_f16i16:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-FP16-NEXT:    ret
 entry:
   %conv = fptoui <8 x half> %x to <8 x i32>
   %0 = icmp ult <8 x i32> %conv, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
@@ -758,12 +706,12 @@ entry:
 define <2 x i32> @utest_f64i32_mm(<2 x double> %x) {
 ; CHECK-LABEL: utest_f64i32_mm:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v1.2d, #0x000000ffffffff
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    cmhi v1.2d, v1.2d, v0.2d
-; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    orn v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    xtn v0.2s, v0.2d
+; CHECK-NEXT:    mov d1, v0.d[1]
+; CHECK-NEXT:    fcvtzu w8, d0
+; CHECK-NEXT:    fmov s0, w8
+; CHECK-NEXT:    fcvtzu w8, d1
+; CHECK-NEXT:    mov v0.s[1], w8
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <2 x double> %x to <2 x i64>
@@ -806,18 +754,7 @@ entry:
 define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
 ; CHECK-LABEL: utest_f32i32_mm:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v2.2d, v0.4s
-; CHECK-NEXT:    fcvtl v0.2d, v0.2s
-; CHECK-NEXT:    movi v1.2d, #0x000000ffffffff
-; CHECK-NEXT:    fcvtzu v2.2d, v2.2d
-; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
-; CHECK-NEXT:    cmhi v3.2d, v1.2d, v2.2d
-; CHECK-NEXT:    cmhi v1.2d, v1.2d, v0.2d
-; CHECK-NEXT:    and v2.16b, v2.16b, v3.16b
-; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    orn v2.16b, v2.16b, v3.16b
-; CHECK-NEXT:    orn v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
 ; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <4 x float> %x to <4 x i64>
@@ -854,57 +791,11 @@ entry:
 }
 
 define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
-; CHECK-CVT-LABEL: utesth_f16i32_mm:
-; CHECK-CVT:       // %bb.0: // %entry
-; CHECK-CVT-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-CVT-NEXT:    mov h2, v0.h[2]
-; CHECK-CVT-NEXT:    mov h3, v0.h[3]
-; CHECK-CVT-NEXT:    mov h4, v0.h[1]
-; CHECK-CVT-NEXT:    fcvt s0, h0
-; CHECK-CVT-NEXT:    movi v1.2d, #0x000000ffffffff
-; CHECK-CVT-NEXT:    fcvt s2, h2
-; CHECK-CVT-NEXT:    fcvt s3, h3
-; CHECK-CVT-NEXT:    fcvtzu x9, s0
-; CHECK-CVT-NEXT:    fcvtzu x8, s2
-; CHECK-CVT-NEXT:    fcvt s2, h4
-; CHECK-CVT-NEXT:    fmov d0, x8
-; CHECK-CVT-NEXT:    fcvtzu x8, s3
-; CHECK-CVT-NEXT:    fmov d3, x9
-; CHECK-CVT-NEXT:    fcvtzu x9, s2
-; CHECK-CVT-NEXT:    mov v0.d[1], x8
-; CHECK-CVT-NEXT:    mov v3.d[1], x9
-; CHECK-CVT-NEXT:    cmhi v2.2d, v1.2d, v0.2d
-; CHECK-CVT-NEXT:    cmhi v1.2d, v1.2d, v3.2d
-; CHECK-CVT-NEXT:    and v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT:    and v3.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT:    orn v0.16b, v0.16b, v2.16b
-; CHECK-CVT-NEXT:    orn v1.16b, v3.16b, v1.16b
-; CHECK-CVT-NEXT:    uzp1 v0.4s, v1.4s, v0.4s
-; CHECK-CVT-NEXT:    ret
-;
-; CHECK-FP16-LABEL: utesth_f16i32_mm:
-; CHECK-FP16:       // %bb.0: // %entry
-; CHECK-FP16-NEXT:    // kill: def $d0 killed $d0 def $q0
-; CHECK-FP16-NEXT:    mov h2, v0.h[2]
-; CHECK-FP16-NEXT:    mov h3, v0.h[3]
-; CHECK-FP16-NEXT:    fcvtzu x9, h0
-; CHECK-FP16-NEXT:    movi v1.2d, #0x000000ffffffff
-; CHECK-FP16-NEXT:    fcvtzu x8, h2
-; CHECK-FP16-NEXT:    mov h2, v0.h[1]
-; CHECK-FP16-NEXT:    fmov d0, x8
-; CHECK-FP16-NEXT:    fcvtzu x8, h3
-; CHECK-FP16-NEXT:    fmov d3, x9
-; CHECK-FP16-NEXT:    fcvtzu x9, h2
-; CHECK-FP16-NEXT:    mov v0.d[1], x8
-; CHECK-FP16-NEXT:    mov v3.d[1], x9
-; CHECK-FP16-NEXT:    cmhi v2.2d, v1.2d, v0.2d
-; CHECK-FP16-NEXT:    cmhi v1.2d, v1.2d, v3.2d
-; CHECK-FP16-NEXT:    and v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT:    and v3.16b, v3.16b, v1.16b
-; CHECK-FP16-NEXT:    orn v0.16b, v0.16b, v2.16b
-; CHECK-FP16-NEXT:    orn v1.16b, v3.16b, v1.16b
-; CHECK-FP16-NEXT:    uzp1 v0.4s, v1.4s, v0.4s
-; CHECK-FP16-NEXT:    ret
+; CHECK-LABEL: utesth_f16i32_mm:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-NEXT:    ret
 entry:
   %conv = fptoui <4 x half> %x to <4 x i64>
   %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
@@ -1044,17 +935,22 @@ entry:
 }
 
 define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) {
-; CHECK-LABEL: utesth_f16i16_mm:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtl2 v2.4s, v0.8h
-; CHECK-NEXT:    fcvtl v0.4s, v0.4h
-; CHECK-NEXT:    movi v1.2d, #0x00ffff0000ffff
-; CHECK-NEXT:    fcvtzu v2.4s, v2.4s
-; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
-; CHECK-NEXT:    umin v2.4s, v2.4s, v1.4s
-; CHECK-NEXT:    umin v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
-; CHECK-NEXT:    ret
+; CHECK-CVT-LABEL: utesth_f16i16_mm:
+; CHECK-CVT:       // %bb.0: // %entry
+; CHECK-CVT-NEXT:    fcvtl2 v2.4s, v0.8h
+; CHECK-CVT-NEXT:    fcvtl v0.4s, v0.4h
+; CHECK-CVT-NEXT:    movi v1.2d, #0x00ffff0000ffff
+; CHECK-CVT-NEXT:    fcvtzu v2.4s, v2.4s
+; CHECK-CVT-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-CVT-NEXT:    umin v2.4s, v2.4s, v1.4s
+; CHECK-CVT-NEXT:    umin v0.4s, v0.4s, v1.4s
+; CHECK-CVT-NEXT:    uzp1 v0.8h, v0.8h, v2.8h
+; CHECK-CVT-NEXT:    ret
+;
+; CHECK-FP16-LABEL: utesth_f16i16_mm:
+; CHECK-FP16:       // %bb.0: // %entry
+; CHECK-FP16-NEXT:    fcvtzu v0.8h, v0.8h
+; CHECK-FP16-NEXT:    ret
 entry:
   %conv = fptoui <8 x half> %x to <8 x i32>
   %spec.store.select = call <8 x i32> @llvm.umin.v8i32(<8 x i32> %conv, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>)
diff --git a/llvm/test/CodeGen/ARM/fpclamptosat.ll b/llvm/test/CodeGen/ARM/fpclamptosat.ll
index e8bf0ed47754b..5223ae1286f53 100644
--- a/llvm/test/CodeGen/ARM/fpclamptosat.ll
+++ b/llvm/test/CodeGen/ARM/fpclamptosat.ll
@@ -104,17 +104,23 @@ define i32 @utest_f64i32(double %x) {
 ; SOFT-NEXT:  .LBB1_2: @ %entry
 ; SOFT-NEXT:    pop {r7, pc}
 ;
-; VFP-LABEL: utest_f64i32:
-; VFP:       @ %bb.0: @ %entry
-; VFP-NEXT:    .save {r7, lr}
-; VFP-NEXT:    push {r7, lr}
-; VFP-NEXT:    vmov r0, r1, d0
-; VFP-NEXT:    bl __aeabi_d2ulz
-; VFP-NEXT:    subs.w r2, r0, #-1
-; VFP-NEXT:    sbcs r1, r1, #0
-; VFP-NEXT:    it hs
-; VFP-NEXT:    movhs.w r0, #-1
-; VFP-NEXT:    pop {r7, pc}
+; VFP2-LABEL: utest_f64i32:
+; VFP2:       @ %bb.0: @ %entry
+; VFP2-NEXT:    .save {r7, lr}
+; VFP2-NEXT:    push {r7, lr}
+; VFP2-NEXT:    vmov r0, r1, d0
+; VFP2-NEXT:    bl __aeabi_d2ulz
+; VFP2-NEXT:    subs.w r2, r0, #-1
+; VFP2-NEXT:    sbcs r1, r1, #0
+; VFP2-NEXT:    it hs
+; VFP2-NEXT:    movhs.w r0, #-1
+; VFP2-NEXT:    pop {r7, pc}
+;
+; FULL-LABEL: utest_f64i32:
+; FULL:       @ %bb.0: @ %entry
+; FULL-NEXT:    vcvt.u32.f64 s0, d0
+; FULL-NEXT:    vmov r0, s0
+; FULL-NEXT:    bx lr
 entry:
   %conv = fptoui double %x to i64
   %0 = icmp ult i64 %conv, 4294967295
@@ -289,15 +295,9 @@ define i32 @utest_f32i32(float %x) {
 ;
 ; VFP-LABEL: utest_f32i32:
 ; VFP:       @ %bb.0: @ %entry
-; VFP-NEXT:    .save {r7, lr}
-; VFP-NEXT:    push {r7, lr}
+; VFP-NEXT:    vcvt.u32.f32 s0, s0
 ; VFP-NEXT:    vmov r0, s0
-; VFP-NEXT:    bl __aeabi_f2ulz
-; VFP-NEXT:    subs.w r2, r0, #-1
-; VFP-NEXT:    sbcs r1, r1, #0
-; VFP-NEXT:    it hs
-; VFP-NEXT:    movhs.w r0, #-1
-; VFP-NEXT:    pop {r7, pc}
+; VFP-NEXT:    bx lr
 entry:
   %conv = fptoui float %x to i64
   %0 = icmp ult i64 %conv, 4294967295
@@ -466,25 +466,16 @@ define i32 @utesth_f16i32(half %x) {
 ; VFP2-NEXT:    push {r7, lr}
 ; VFP2-NEXT:    vmov r0, s0
 ; VFP2-NEXT:    bl __aeabi_h2f
-; VFP2-NEXT:    bl __aeabi_f2ulz
-; VFP2-NEXT:    subs.w r2, r0, #-1
-; VFP2-NEXT:    sbcs r1, r1, #0
-; VFP2-NEXT:    it hs
-; VFP2-NEXT:    movhs.w r0, #-1
+; VFP2-NEXT:    vmov s0, r0
+; VFP2-NEXT:    vcvt.u32.f32 s0, s0
+; VFP2-NEXT:    vmov r0, s0
 ; VFP2-NEXT:    pop {r7, pc}
 ;
 ; FULL-LABEL: utesth_f16i32:
 ; FULL:       @ %bb.0: @ %entry
-; FULL-NEXT:    .save {r7, lr}
-; FULL-NEXT:    push {r7, lr}
-; FULL-NEXT:    vmov.f16 r0, s0
-; FULL-NEXT:    vmov s0, r0
-; FULL-NEXT:    bl __fixunshfdi
-; FULL-NEXT:    subs.w r2, r0, #-1
-; FULL-NEXT:    sbcs r1, r1, #0
-; FULL-NEXT:    it hs
-; FULL-NEXT:    movhs.w r0, #-1
-; FULL-NEXT:    pop {r7, pc}
+; FULL-NEXT:    vcvt.u32.f16 s0, s0
+; FULL-NEXT:    vmov r0, s0
+; FULL-NEXT:    bx lr
 entry:
   %conv = fptoui half %x to i64
   %0 = icmp ult i64 %conv, 4294967295
@@ -2240,16 +2231,22 @@ define i32 @utest_f64i32_mm(double %x) {
 ; SOFT-NEXT:  .LBB28_2: @ %entry
 ; SOFT-NEXT:    pop {r7, pc}
 ;
-; VFP-LABEL: utest_f64i32_mm:
-; VFP:       @ %bb.0: @ %entry
-; VFP-NEXT:    .save {r7, lr}
-; VFP-NEXT:    push {r7, lr}
-; VFP-NEXT:    vmov r0, r1, d0
-; VFP-NEXT:    bl __aeabi_d2ulz
-; VFP-NEXT:    cmp r1, #0
-; VFP-NEXT:    it ne
-; VFP-NEXT:    movne.w r0, #-1
-; VFP-NEXT:    pop {r7, pc}
+; VFP2-LABEL: utest_f64i32_mm:
+; VFP2:       @ %bb.0: @ %entry
+; VFP2-NEXT:    .save {r7, lr}
+; VFP2-NEXT:    push {r7, lr}
+; VFP2-NEXT:    vmov r0, r1, d0
+; VFP2-NEXT:    bl __aeabi_d2ulz
+; VFP2-NEXT:    cmp r1, #0
+; VFP2-NEXT:    it ne
+; VFP2-NEXT:    movne.w r0, #-1
+; VFP2-NEXT:    pop {r7, pc}
+;
+; FULL-LABEL: utest_f64i32_mm:
+; FULL:       @ %bb.0: @ %entry
+; FULL-NEXT:    vcvt.u32.f64 s0, d0
+; FULL-NEXT:    vmov r0, s0
+; FULL-NEXT:    bx lr
 entry:
   %conv = fptoui double %x to i64
   %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
@@ -2429,14 +2426,9 @@ define i32 @utest_f32i32_mm(float %x) {
 ;
 ; VFP-LABEL: utest_f32i32_mm:
 ; VFP:       @ %bb.0: @ %entry
-; VFP-NEXT:    .save {r7, lr}
-; VFP-NEXT:    push {r7, lr}
+; VFP-NEXT:    vcvt.u32.f32 s0, s0
 ; VFP-NEXT:    vmov r0, s0
-; VFP-NEXT:    bl __aeabi_f2ulz
-; VFP-NEXT:    cmp r1, #0
-; VFP-NEXT:    it ne
-; VFP-NEXT:    movne.w r0, #-1
-; VFP-NEXT:    pop {r7, pc}
+; VFP-NEXT:    bx lr
 entry:
   %conv = fptoui float %x to i64
   %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
@@ -2609,23 +2601,16 @@ define i32 @utesth_f16i32_mm(half %x) {
 ; VFP2-NEXT:    push {r7, lr}
 ; VFP2-NEXT:    vmov r0, s0
 ; VFP2-NEXT:    bl __aeabi_h2f
-; VFP2-NEXT:    bl __aeabi_f2ulz
-; VFP2-NEXT:    cmp r1, #0
-; VFP2-NEXT:    it ne
-; VFP2-NEXT:    movne.w r0, #-1
+; VFP2-NEXT:    vmov s0, r0
+; VFP2-NEXT:    vcvt.u32.f32 s0, s0
+; VFP2-NEXT:    vmov r0, s0
 ; VFP2-NEXT:    pop {r7, pc}
 ;
 ; FULL-LABEL: utesth_f16i32_mm:
 ; FULL:       @ %bb.0: @ %entry
-; FULL-NEXT:    .save {r7, lr}
-; FULL-NEXT:    push {r7, lr}
-; FULL-NEXT:    vmov.f16 r0, s0
-; FULL-NEXT:    vmov s0, r0
-; FULL-NEXT:    bl __fixunshfdi
-; FULL-NEXT:    cmp r1, #0
-; FULL-NEXT:    it ne
-; FULL-NEXT:    movne.w r0, #-1
-; FULL-NEXT:    pop {r7, pc}
+; FULL-NEXT:    vcvt.u32.f16 s0, s0
+; FULL-NEXT:    vmov r0, s0
+; FULL-NEXT:    bx lr
 entry:
   %conv = fptoui half %x to i64
   %spec.store.select = call i64 @llvm.umin.i64(i64 %conv, i64 4294967295)
diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
index 69a646db09aa3..4643048232d85 100644
--- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll
+++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll
@@ -115,28 +115,28 @@ entry:
 }
 
 define i32 @utest_f64i32(double %x) {
-; RV32-LABEL: utest_f64i32:
-; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    call __fixunsdfdi@plt
-; RV32-NEXT:    beqz a1, .LBB1_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    beqz a1, .LBB1_3
-; RV32-NEXT:    j .LBB1_4
-; RV32-NEXT:  .LBB1_2:
-; RV32-NEXT:    addi a1, a0, 1
-; RV32-NEXT:    snez a1, a1
-; RV32-NEXT:    bnez a1, .LBB1_4
-; RV32-NEXT:  .LBB1_3: # %entry
-; RV32-NEXT:    li a0, -1
-; RV32-NEXT:  .LBB1_4: # %entry
-; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32IF-LABEL: utest_f64i32:
+; RV32IF:       # %bb.0: # %entry
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    .cfi_def_cfa_offset 16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    .cfi_offset ra, -4
+; RV32IF-NEXT:    call __fixunsdfdi@plt
+; RV32IF-NEXT:    beqz a1, .LBB1_2
+; RV32IF-NEXT:  # %bb.1: # %entry
+; RV32IF-NEXT:    li a1, 0
+; RV32IF-NEXT:    beqz a1, .LBB1_3
+; RV32IF-NEXT:    j .LBB1_4
+; RV32IF-NEXT:  .LBB1_2:
+; RV32IF-NEXT:    addi a1, a0, 1
+; RV32IF-NEXT:    snez a1, a1
+; RV32IF-NEXT:    bnez a1, .LBB1_4
+; RV32IF-NEXT:  .LBB1_3: # %entry
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:  .LBB1_4: # %entry
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: utest_f64i32:
 ; RV64IF:       # %bb.0: # %entry
@@ -155,6 +155,24 @@ define i32 @utest_f64i32(double %x) {
 ; RV64IF-NEXT:    addi sp, sp, 16
 ; RV64IF-NEXT:    ret
 ;
+; RV32IFD-LABEL: utest_f64i32:
+; RV32IFD:       # %bb.0: # %entry
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
+; RV32IFD-NEXT:    sw a0, 8(sp)
+; RV32IFD-NEXT:    sw a1, 12(sp)
+; RV32IFD-NEXT:    fld ft0, 8(sp)
+; RV32IFD-NEXT:    feq.d a0, ft0, ft0
+; RV32IFD-NEXT:    bnez a0, .LBB1_2
+; RV32IFD-NEXT:  # %bb.1: # %entry
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB1_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+;
 ; RV64IFD-LABEL: utest_f64i32:
 ; RV64IFD:       # %bb.0: # %entry
 ; RV64IFD-NEXT:    fmv.d.x ft0, a0
@@ -319,25 +337,14 @@ entry:
 define i32 @utest_f32i32(float %x) {
 ; RV32-LABEL: utest_f32i32:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    call __fixunssfdi@plt
-; RV32-NEXT:    beqz a1, .LBB4_2
+; RV32-NEXT:    fmv.w.x ft0, a0
+; RV32-NEXT:    feq.s a0, ft0, ft0
+; RV32-NEXT:    bnez a0, .LBB4_2
 ; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    beqz a1, .LBB4_3
-; RV32-NEXT:    j .LBB4_4
+; RV32-NEXT:    li a0, 0
+; RV32-NEXT:    ret
 ; RV32-NEXT:  .LBB4_2:
-; RV32-NEXT:    addi a1, a0, 1
-; RV32-NEXT:    snez a1, a1
-; RV32-NEXT:    bnez a1, .LBB4_4
-; RV32-NEXT:  .LBB4_3: # %entry
-; RV32-NEXT:    li a0, -1
-; RV32-NEXT:  .LBB4_4: # %entry
-; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    fcvt.wu.s a0, ft0, rtz
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: utest_f32i32:
@@ -2071,20 +2078,20 @@ entry:
 }
 
 define i32 @utest_f64i32_mm(double %x) {
-; RV32-LABEL: utest_f64i32_mm:
-; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    call __fixunsdfdi@plt
-; RV32-NEXT:    beqz a1, .LBB28_2
-; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    li a0, -1
-; RV32-NEXT:  .LBB28_2: # %entry
-; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 16
-; RV32-NEXT:    ret
+; RV32IF-LABEL: utest_f64i32_mm:
+; RV32IF:       # %bb.0: # %entry
+; RV32IF-NEXT:    addi sp, sp, -16
+; RV32IF-NEXT:    .cfi_def_cfa_offset 16
+; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IF-NEXT:    .cfi_offset ra, -4
+; RV32IF-NEXT:    call __fixunsdfdi@plt
+; RV32IF-NEXT:    beqz a1, .LBB28_2
+; RV32IF-NEXT:  # %bb.1: # %entry
+; RV32IF-NEXT:    li a0, -1
+; RV32IF-NEXT:  .LBB28_2: # %entry
+; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    addi sp, sp, 16
+; RV32IF-NEXT:    ret
 ;
 ; RV64IF-LABEL: utest_f64i32_mm:
 ; RV64IF:       # %bb.0: # %entry
@@ -2103,6 +2110,24 @@ define i32 @utest_f64i32_mm(double %x) {
 ; RV64IF-NEXT:    addi sp, sp, 16
 ; RV64IF-NEXT:    ret
 ;
+; RV32IFD-LABEL: utest_f64i32_mm:
+; RV32IFD:       # %bb.0: # %entry
+; RV32IFD-NEXT:    addi sp, sp, -16
+; RV32IFD-NEXT:    .cfi_def_cfa_offset 16
+; RV32IFD-NEXT:    sw a0, 8(sp)
+; RV32IFD-NEXT:    sw a1, 12(sp)
+; RV32IFD-NEXT:    fld ft0, 8(sp)
+; RV32IFD-NEXT:    feq.d a0, ft0, ft0
+; RV32IFD-NEXT:    bnez a0, .LBB28_2
+; RV32IFD-NEXT:  # %bb.1: # %entry
+; RV32IFD-NEXT:    li a0, 0
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+; RV32IFD-NEXT:  .LBB28_2:
+; RV32IFD-NEXT:    fcvt.wu.d a0, ft0, rtz
+; RV32IFD-NEXT:    addi sp, sp, 16
+; RV32IFD-NEXT:    ret
+;
 ; RV64IFD-LABEL: utest_f64i32_mm:
 ; RV64IFD:       # %bb.0: # %entry
 ; RV64IFD-NEXT:    fmv.d.x ft0, a0
@@ -2266,17 +2291,14 @@ entry:
 define i32 @utest_f32i32_mm(float %x) {
 ; RV32-LABEL: utest_f32i32_mm:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    addi sp, sp, -16
-; RV32-NEXT:    .cfi_def_cfa_offset 16
-; RV32-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    call __fixunssfdi@plt
-; RV32-NEXT:    beqz a1, .LBB31_2
+; RV32-NEXT:    fmv.w.x ft0, a0
+; RV32-NEXT:    feq.s a0, ft0, ft0
+; RV32-NEXT:    bnez a0, .LBB31_2
 ; RV32-NEXT:  # %bb.1: # %entry
-; RV32-NEXT:    li a0, -1
-; RV32-NEXT:  .LBB31_2: # %entry
-; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 16
+; RV32-NEXT:    li a0, 0
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB31_2:
+; RV32-NEXT:    fcvt.wu.s a0, ft0, rtz
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: utest_f32i32_mm:
diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
index 08bcba9b5cd7d..1e9fbe09539c8 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll
@@ -183,58 +183,8 @@ entry:
 define arm_aapcs_vfpcc <4 x i32> @utest_f32i32(<4 x float> %x) {
 ; CHECK-LABEL: utest_f32i32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
-; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    vmov r0, r4, d9
-; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    subs.w r2, r5, #-1
-; CHECK-NEXT:    vmov q0[2], q0[0], r5, r0
-; CHECK-NEXT:    sbcs r2, r6, #0
-; CHECK-NEXT:    mov.w r3, #0
-; CHECK-NEXT:    csetm r2, lo
-; CHECK-NEXT:    subs.w r0, r0, #-1
-; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    bfi r3, r2, #0, #8
-; CHECK-NEXT:    csetm r0, lo
-; CHECK-NEXT:    vmov.i64 q5, #0xffffffff
-; CHECK-NEXT:    bfi r3, r0, #8, #8
-; CHECK-NEXT:    vmov r0, r4, d8
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT:    vmsr p0, r3
-; CHECK-NEXT:    movs r7, #0
-; CHECK-NEXT:    vpsel q6, q0, q5
-; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    subs.w r2, r5, #-1
-; CHECK-NEXT:    vmov q0[2], q0[0], r5, r0
-; CHECK-NEXT:    sbcs r2, r6, #0
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT:    csetm r2, lo
-; CHECK-NEXT:    subs.w r0, r0, #-1
-; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    bfi r7, r2, #0, #8
-; CHECK-NEXT:    csetm r0, lo
-; CHECK-NEXT:    bfi r7, r0, #8, #8
-; CHECK-NEXT:    vmsr p0, r7
-; CHECK-NEXT:    vpsel q0, q0, q5
-; CHECK-NEXT:    vmov.f32 s1, s2
-; CHECK-NEXT:    vmov.f32 s2, s24
-; CHECK-NEXT:    vmov.f32 s3, s26
-; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    vcvt.u32.f32 q0, q0
+; CHECK-NEXT:    bx lr
 entry:
   %conv = fptoui <4 x float> %x to <4 x i64>
   %0 = icmp ult <4 x i64> %conv, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
@@ -1396,58 +1346,8 @@ entry:
 define arm_aapcs_vfpcc <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
 ; CHECK-LABEL: utest_f32i32_mm:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    push {r4, r5, r6, r7, lr}
-; CHECK-NEXT:    .pad #4
-; CHECK-NEXT:    sub sp, #4
-; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    vmov r0, r4, d9
-; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    subs.w r2, r5, #-1
-; CHECK-NEXT:    vmov q0[2], q0[0], r5, r0
-; CHECK-NEXT:    sbcs r2, r6, #0
-; CHECK-NEXT:    mov.w r3, #0
-; CHECK-NEXT:    csetm r2, lo
-; CHECK-NEXT:    subs.w r0, r0, #-1
-; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    bfi r3, r2, #0, #8
-; CHECK-NEXT:    csetm r0, lo
-; CHECK-NEXT:    vmov.i64 q5, #0xffffffff
-; CHECK-NEXT:    bfi r3, r0, #8, #8
-; CHECK-NEXT:    vmov r0, r4, d8
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT:    vmsr p0, r3
-; CHECK-NEXT:    movs r7, #0
-; CHECK-NEXT:    vpsel q6, q0, q5
-; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    mov r0, r4
-; CHECK-NEXT:    mov r6, r1
-; CHECK-NEXT:    bl __aeabi_f2ulz
-; CHECK-NEXT:    subs.w r2, r5, #-1
-; CHECK-NEXT:    vmov q0[2], q0[0], r5, r0
-; CHECK-NEXT:    sbcs r2, r6, #0
-; CHECK-NEXT:    vmov q0[3], q0[1], r6, r1
-; CHECK-NEXT:    csetm r2, lo
-; CHECK-NEXT:    subs.w r0, r0, #-1
-; CHECK-NEXT:    sbcs r0, r1, #0
-; CHECK-NEXT:    bfi r7, r2, #0, #8
-; CHECK-NEXT:    csetm r0, lo
-; CHECK-NEXT:    bfi r7, r0, #8, #8
-; CHECK-NEXT:    vmsr p0, r7
-; CHECK-NEXT:    vpsel q0, q0, q5
-; CHECK-NEXT:    vmov.f32 s1, s2
-; CHECK-NEXT:    vmov.f32 s2, s24
-; CHECK-NEXT:    vmov.f32 s3, s26
-; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
-; CHECK-NEXT:    add sp, #4
-; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:    vcvt.u32.f32 q0, q0
+; CHECK-NEXT:    bx lr
 entry:
   %conv = fptoui <4 x float> %x to <4 x i64>
   %spec.store.select = call <4 x i64> @llvm.umin.v4i64(<4 x i64> %conv, <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>)
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
index dabcae9fd3593..f8eeb2105e323 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat.ll
@@ -23,17 +23,9 @@ entry:
 define i32 @utest_f64i32(double %x) {
 ; CHECK-LABEL: utest_f64i32:
 ; CHECK:         .functype utest_f64i32 (f64) -> (i32)
-; CHECK-NEXT:    .local i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i64.trunc_sat_f64_u
-; CHECK-NEXT:    local.tee 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i32.wrap_i64
+; CHECK-NEXT:    i32.trunc_sat_f64_u
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui double %x to i64
@@ -80,17 +72,9 @@ entry:
 define i32 @utest_f32i32(float %x) {
 ; CHECK-LABEL: utest_f32i32:
 ; CHECK:         .functype utest_f32i32 (f32) -> (i32)
-; CHECK-NEXT:    .local i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i32.wrap_i64
+; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui float %x to i64
@@ -139,19 +123,11 @@ entry:
 define i32 @utesth_f16i32(half %x) {
 ; CHECK-LABEL: utesth_f16i32:
 ; CHECK:         .functype utesth_f16i32 (f32) -> (i32)
-; CHECK-NEXT:    .local i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i32.wrap_i64
+; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui half %x to i64
@@ -810,17 +786,9 @@ entry:
 define i32 @utest_f64i32_mm(double %x) {
 ; CHECK-LABEL: utest_f64i32_mm:
 ; CHECK:         .functype utest_f64i32_mm (f64) -> (i32)
-; CHECK-NEXT:    .local i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i64.trunc_sat_f64_u
-; CHECK-NEXT:    local.tee 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i32.wrap_i64
+; CHECK-NEXT:    i32.trunc_sat_f64_u
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui double %x to i64
@@ -862,17 +830,9 @@ entry:
 define i32 @utest_f32i32_mm(float %x) {
 ; CHECK-LABEL: utest_f32i32_mm:
 ; CHECK:         .functype utest_f32i32_mm (f32) -> (i32)
-; CHECK-NEXT:    .local i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i32.wrap_i64
+; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui float %x to i64
@@ -916,19 +876,11 @@ entry:
 define i32 @utesth_f16i32_mm(half %x) {
 ; CHECK-LABEL: utesth_f16i32_mm:
 ; CHECK:         .functype utesth_f16i32_mm (f32) -> (i32)
-; CHECK-NEXT:    .local i64
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i32.wrap_i64
+; CHECK-NEXT:    i32.trunc_sat_f32_u
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui half %x to i64
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index a595ffe51e2ed..c27da85cbf488 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -145,62 +145,9 @@ entry:
 define <4 x i32> @utest_f32i32(<4 x float> %x) {
 ; CHECK-LABEL: utest_f32i32:
 ; CHECK:         .functype utest_f32i32 (v128) -> (v128)
-; CHECK-NEXT:    .local i64, i64, v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    f32x4.extract_lane 0
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 1
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    f32x4.extract_lane 1
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 2
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    v128.const 4294967295, 4294967295
-; CHECK-NEXT:    local.tee 3
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    v128.bitselect
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    f32x4.extract_lane 2
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 1
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    f32x4.extract_lane 3
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 2
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    v128.bitselect
-; CHECK-NEXT:    i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT:    i32x4.trunc_sat_f32x4_u
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui <4 x float> %x to <4 x i64>
@@ -267,16 +214,7 @@ entry:
 define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-LABEL: utesth_f16i32:
 ; CHECK:         .functype utesth_f16i32 (f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT:    .local i64, i64, v128
 ; CHECK-NEXT:  # %bb.0: # %entry
-; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
-; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 3
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
-; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 2
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
@@ -284,55 +222,21 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 4
-; CHECK-NEXT:    i64x2.splat
+; CHECK-NEXT:    i32.trunc_sat_f32_u
+; CHECK-NEXT:    i32x4.splat
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 5
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    v128.const 4294967295, 4294967295
-; CHECK-NEXT:    local.tee 6
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 5
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    i32.trunc_sat_f32_u
+; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 4
-; CHECK-NEXT:    i64x2.splat
+; CHECK-NEXT:    call __truncsfhf2
+; CHECK-NEXT:    call __extendhfsf2
+; CHECK-NEXT:    i32.trunc_sat_f32_u
+; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 5
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    local.get 6
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 5
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    v128.bitselect
-; CHECK-NEXT:    i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT:    call __truncsfhf2
+; CHECK-NEXT:    call __extendhfsf2
+; CHECK-NEXT:    i32.trunc_sat_f32_u
+; CHECK-NEXT:    i32x4.replace_lane 3
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui <4 x half> %x to <4 x i64>
@@ -1742,62 +1646,9 @@ entry:
 define <4 x i32> @utest_f32i32_mm(<4 x float> %x) {
 ; CHECK-LABEL: utest_f32i32_mm:
 ; CHECK:         .functype utest_f32i32_mm (v128) -> (v128)
-; CHECK-NEXT:    .local i64, i64, v128
 ; CHECK-NEXT:  # %bb.0: # %entry
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    f32x4.extract_lane 0
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 1
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    f32x4.extract_lane 1
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 2
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    v128.const 4294967295, 4294967295
-; CHECK-NEXT:    local.tee 3
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    v128.bitselect
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    f32x4.extract_lane 2
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 1
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    f32x4.extract_lane 3
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 2
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    v128.bitselect
-; CHECK-NEXT:    i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT:    i32x4.trunc_sat_f32x4_u
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui <4 x float> %x to <4 x i64>
@@ -1859,16 +1710,7 @@ entry:
 define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-LABEL: utesth_f16i32_mm:
 ; CHECK:         .functype utesth_f16i32_mm (f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT:    .local i64, i64, v128
 ; CHECK-NEXT:  # %bb.0: # %entry
-; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    call __truncsfhf2
-; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 3
-; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    call __truncsfhf2
-; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    local.set 2
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
@@ -1876,55 +1718,21 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) {
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    call __truncsfhf2
 ; CHECK-NEXT:    call __extendhfsf2
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 4
-; CHECK-NEXT:    i64x2.splat
+; CHECK-NEXT:    i32.trunc_sat_f32_u
+; CHECK-NEXT:    i32x4.splat
 ; CHECK-NEXT:    local.get 1
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 5
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    v128.const 4294967295, 4294967295
-; CHECK-NEXT:    local.tee 6
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 5
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    i32.trunc_sat_f32_u
+; CHECK-NEXT:    i32x4.replace_lane 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 4
-; CHECK-NEXT:    i64x2.splat
+; CHECK-NEXT:    call __truncsfhf2
+; CHECK-NEXT:    call __extendhfsf2
+; CHECK-NEXT:    i32.trunc_sat_f32_u
+; CHECK-NEXT:    i32x4.replace_lane 2
 ; CHECK-NEXT:    local.get 3
-; CHECK-NEXT:    i64.trunc_sat_f32_u
-; CHECK-NEXT:    local.tee 5
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    local.get 6
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 4
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    local.get 5
-; CHECK-NEXT:    i64.const 4294967295
-; CHECK-NEXT:    i64.lt_u
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.replace_lane 1
-; CHECK-NEXT:    v128.bitselect
-; CHECK-NEXT:    i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27
+; CHECK-NEXT:    call __truncsfhf2
+; CHECK-NEXT:    call __extendhfsf2
+; CHECK-NEXT:    i32.trunc_sat_f32_u
+; CHECK-NEXT:    i32x4.replace_lane 3
 ; CHECK-NEXT:    # fallthrough-return
 entry:
   %conv = fptoui <4 x half> %x to <4 x i64>

From 8bbfdf8ec320e1d0d89f4babf81f4d003ce51e32 Mon Sep 17 00:00:00 2001
From: Yury Gribov <tetra2005@gmail.com>
Date: Wed, 19 Jan 2022 17:21:44 +0900
Subject: [PATCH 669/946] [flang] Get rid of code duplication in wrapper. Fix
 checking of undefined variables.

Differential Revision: https://reviews.llvm.org/D117767
---
 flang/tools/f18/flang | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/flang/tools/f18/flang b/flang/tools/f18/flang
index 10b055b098792..fc3a3a01556ee 100755
--- a/flang/tools/f18/flang
+++ b/flang/tools/f18/flang
@@ -263,6 +263,22 @@ categorise_opts()
   done
 }
 
+# === get_external_fc_name ====================================================
+#
+# Returns the name of external Fortran compiler based on values of
+# environment variables.
+# =============================================================================
+get_external_fc_name() {
+  if [[ -v FLANG_FC ]]; then
+    echo ${FLANG_FC}
+  elif [[ -v F18_FC ]]; then
+    # We support F18_FC for backwards compatibility.
+    echo ${F18_FC}
+  else
+    echo gfortran
+  fi
+}
+
 # === preprocess ==============================================================
 #
 # Runs the preprocessing. Fortran files are preprocessed using Flang. Other
@@ -278,18 +294,8 @@ preprocess() {
   local -n other_srcs=$2
   local -n opts=$3
 
-  local ext_fc=""
-  if [[ -v $FLANG_FC ]]; then
-    ext_fc="${FLANG_FC}"
-  elif [[ -v $F18_FC ]]; then
-    # We support F18_FC for backwards compatibility.
-    ext_fc="${F18_FC}"
-  else
-    ext_fc=gfortran
-  fi
-
+  local ext_fc="$(get_external_fc_name)"
 
-  local ext_fc="${FLANG_FC:-gfortran}"
   local -r wd=$(cd "$(dirname "$0")/.." && pwd)
 
   # Use the provided output file name.
@@ -412,7 +418,7 @@ main() {
   done
 
   # STEP 2: Compile Fortran Source Files
-  readonly ext_fc="${F18_FC:-gfortran}"
+  local ext_fc="$(get_external_fc_name)"
   # Temporary object files generated by this script. To be deleted at the end.
   local temp_object_files=()
   for idx in "${!fortran_source_files[@]}"; do

From c82cb5d0006ea486446b22cafd72f9a0923f5b5a Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 26 Jan 2022 10:10:13 +0100
Subject: [PATCH 670/946] [AddressSanitizer] Avoid pointer element type
 accesses

Determine masked load/store type based on the value operand and
result types, rather than pointer element type.
---
 .../Instrumentation/AddressSanitizerCommon.h      |  4 +++-
 .../Instrumentation/AddressSanitizer.cpp          | 15 +++++++--------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
index 6c351e3f8e1ff..5a0fb835606a1 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/AddressSanitizerCommon.h
@@ -26,6 +26,7 @@ class InterestingMemoryOperand {
 public:
   Use *PtrUse;
   bool IsWrite;
+  Type *OpType;
   uint64_t TypeSize;
   MaybeAlign Alignment;
   // The mask Value, if we're looking at a masked load/store.
@@ -34,7 +35,8 @@ class InterestingMemoryOperand {
   InterestingMemoryOperand(Instruction *I, unsigned OperandNo, bool IsWrite,
                            class Type *OpType, MaybeAlign Alignment,
                            Value *MaybeMask = nullptr)
-      : IsWrite(IsWrite), Alignment(Alignment), MaybeMask(MaybeMask) {
+      : IsWrite(IsWrite), OpType(OpType), Alignment(Alignment),
+        MaybeMask(MaybeMask) {
     const DataLayout &DL = I->getModule()->getDataLayout();
     TypeSize = DL.getTypeStoreSizeInBits(OpType);
     PtrUse = &I->getOperandUse(OperandNo);
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 3bf2d16e7d209..6e72255e51ae0 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -1547,10 +1547,9 @@ void AddressSanitizer::getInterestingMemoryOperands(
     Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
                              XCHG->getCompareOperand()->getType(), None);
   } else if (auto CI = dyn_cast<CallInst>(I)) {
-    auto *F = CI->getCalledFunction();
-    if (F && (F->getName().startswith("llvm.masked.load.") ||
-              F->getName().startswith("llvm.masked.store."))) {
-      bool IsWrite = F->getName().startswith("llvm.masked.store.");
+    if (CI->getIntrinsicID() == Intrinsic::masked_load ||
+        CI->getIntrinsicID() == Intrinsic::masked_store) {
+      bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_store;
       // Masked store has an initial operand for the value.
       unsigned OpOffset = IsWrite ? 1 : 0;
       if (IsWrite ? !ClInstrumentWrites : !ClInstrumentReads)
@@ -1559,7 +1558,7 @@ void AddressSanitizer::getInterestingMemoryOperands(
       auto BasePtr = CI->getOperand(OpOffset);
       if (ignoreAccess(LI, BasePtr))
         return;
-      auto Ty = BasePtr->getType()->getPointerElementType();
+      Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
       MaybeAlign Alignment = Align(1);
       // Otherwise no alignment guarantees. We probably got Undef.
       if (auto *Op = dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
@@ -1653,10 +1652,10 @@ static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
                                         const DataLayout &DL, Type *IntptrTy,
                                         Value *Mask, Instruction *I,
                                         Value *Addr, MaybeAlign Alignment,
-                                        unsigned Granularity, uint32_t TypeSize,
+                                        unsigned Granularity, Type *OpType,
                                         bool IsWrite, Value *SizeArgument,
                                         bool UseCalls, uint32_t Exp) {
-  auto *VTy = cast<FixedVectorType>(Addr->getType()->getPointerElementType());
+  auto *VTy = cast<FixedVectorType>(OpType);
   uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
   unsigned Num = VTy->getNumElements();
   auto Zero = ConstantInt::get(IntptrTy, 0);
@@ -1734,7 +1733,7 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
   unsigned Granularity = 1 << Mapping.Scale;
   if (O.MaybeMask) {
     instrumentMaskedLoadOrStore(this, DL, IntptrTy, O.MaybeMask, O.getInsn(),
-                                Addr, O.Alignment, Granularity, O.TypeSize,
+                                Addr, O.Alignment, Granularity, O.OpType,
                                 O.IsWrite, nullptr, UseCalls, Exp);
   } else {
     doInstrumentAddress(this, O.getInsn(), O.getInsn(), Addr, O.Alignment,

From c1b653bfa1cd7496f0636cc40034e08d7f6bb266 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Tue, 25 Jan 2022 15:48:51 -0500
Subject: [PATCH 671/946] [NFC] Use an llvm::DenseMap instead of std::map in
 CategorizedHelpPrinter::printOptions

It's a recommit of 6427f4c52c31cc36004 (patch included)
---
 llvm/lib/Support/CommandLine.cpp | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index ed4f01f176c2f..b1e688788c333 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -45,7 +45,6 @@
 #include "llvm/Support/VirtualFileSystem.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
-#include <map>
 #include <string>
 using namespace llvm;
 using namespace cl;
@@ -2339,7 +2338,7 @@ class CategorizedHelpPrinter : public HelpPrinter {
 protected:
   void printOptions(StrOptionPairVector &Opts, size_t MaxArgLen) override {
     std::vector<OptionCategory *> SortedCategories;
-    std::map<OptionCategory *, std::vector<Option *>> CategorizedOptions;
+    DenseMap<OptionCategory *, std::vector<Option *>> CategorizedOptions;
 
     // Collect registered option categories into vector in preparation for
     // sorting.
@@ -2351,17 +2350,13 @@ class CategorizedHelpPrinter : public HelpPrinter {
     array_pod_sort(SortedCategories.begin(), SortedCategories.end(),
                    OptionCategoryCompare);
 
-    // Create map to empty vectors.
-    for (OptionCategory *Category : SortedCategories)
-      CategorizedOptions[Category] = std::vector<Option *>();
-
     // Walk through pre-sorted options and assign into categories.
     // Because the options are already alphabetically sorted the
     // options within categories will also be alphabetically sorted.
     for (size_t I = 0, E = Opts.size(); I != E; ++I) {
       Option *Opt = Opts[I].second;
       for (auto &Cat : Opt->Categories) {
-        assert(CategorizedOptions.count(Cat) > 0 &&
+        assert(std::binary_search(SortedCategories.begin(), SortedCategories.end(), Cat, [](OptionCategory* A, OptionCategory* B) { return OptionCategoryCompare(&A, &B);}) &&
                "Option has an unregistered category");
         CategorizedOptions[Cat].push_back(Opt);
       }

From 9e7a2bfcf71674f9f40fb34e7d1d1ee9507ed8dd Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 26 Jan 2022 10:20:39 +0100
Subject: [PATCH 672/946] [OpenMPOpt] Add const qualifier (NFC)

Make it clear that this large lambda does not modify the vector.
---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 80f4cbe3c8a51..68f33410c602e 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1012,7 +1012,8 @@ struct OpenMPOpt {
     // into a single parallel region is contained in a single basic block
     // without any other instructions. We use the OpenMPIRBuilder to outline
     // that block and call the resulting function via __kmpc_fork_call.
-    auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) {
+    auto Merge = [&](const SmallVectorImpl<CallInst *> &MergableCIs,
+                     BasicBlock *BB) {
       // TODO: Change the interface to allow single CIs expanded, e.g, to
       // include an outer loop.
       assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs");

From bec4e865dedfa36fc9c03139268f405e32e9be1d Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 26 Jan 2022 10:34:23 +0100
Subject: [PATCH 673/946] [SCEVExpander] Remove pointer element type access in
 assertion

Assert directly on i8 rather than the element type of i8*.
---
 llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index c6044f8fdffd7..1495ea1a40882 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -173,7 +173,7 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
     auto *PtrTy = cast<PointerType>(Ty);
     if (DL.isNonIntegralPointerType(PtrTy)) {
       auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace());
-      assert(DL.getTypeAllocSize(Int8PtrTy->getPointerElementType()) == 1 &&
+      assert(DL.getTypeAllocSize(Builder.getInt8Ty()) == 1 &&
              "alloc size of i8 must by 1 byte for the GEP to be correct");
       auto *GEP = Builder.CreateGEP(
           Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep");

From a2aea7199a5c99260d27ee37f3cda8f752478c02 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 9 Jan 2022 22:32:47 +0000
Subject: [PATCH 674/946] [libcxx] [test] Fix mismatches between
 _aligned_malloc and free() on Windows

This allows getting rid of one case of LIBCXX-WINDOWS-FIXME. The fixme
comment was inaccurate; aligned allocation functions are provided these
days, but the test kept failing as it was using mismatched allocation
and free functions.

A similar issue was fixed earlier, in
6596778b46ba69517191e7397289228168064ff4. That test was fixed by
overriding the aligned `operator new` too, and returning a dummy fixed
allocation instead. As this test is libcxx specific, it can use the
internal `std::__libcpp_aligned_free()` instead, to match libcxx's
internal aligned `operator new`.

Differential Revision: https://reviews.llvm.org/D118190
---
 .../support.dynamic/libcpp_deallocate.sh.cpp              | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp
index 1f663e05fc709..a90eaf2600e98 100644
--- a/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp
+++ b/libcxx/test/libcxx/language.support/support.dynamic/libcpp_deallocate.sh.cpp
@@ -9,10 +9,6 @@
 // test libc++'s implementation of align_val_t, and the relevant new/delete
 // overloads in all dialects when -faligned-allocation is present.
 
-// Libc++ defers to the underlying MSVC library to provide the new/delete
-// definitions, which does not yet provide aligned allocation
-// XFAIL: LIBCXX-WINDOWS-FIXME
-
 // XFAIL: LIBCXX-AIX-FIXME
 
 // The dylibs shipped before macosx10.13 do not contain the aligned allocation
@@ -115,14 +111,14 @@ void operator delete(void* p, size_t n)TEST_NOEXCEPT {
 
 #ifndef NO_ALIGN
 void operator delete(void* p, std::align_val_t a)TEST_NOEXCEPT {
-  ::free(p);
+  std::__libcpp_aligned_free(p);
   stats.aligned_called++;
   stats.last_align = static_cast<int>(a);
   stats.last_size = -1;
 }
 
 void operator delete(void* p, size_t n, std::align_val_t a)TEST_NOEXCEPT {
-  ::free(p);
+  std::__libcpp_aligned_free(p);
   stats.aligned_sized_called++;
   stats.last_align = static_cast<int>(a);
   stats.last_size = n;

From b61c878fc5e5c73243191962516bdeaaac4c923b Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Wed, 26 Jan 2022 10:15:44 +0100
Subject: [PATCH 675/946] [mlir][Bazel] Remove unnecessary dependencies

Differential Revision: https://reviews.llvm.org/D118221
---
 .../llvm-project-overlay/mlir/BUILD.bazel      | 18 ------------------
 1 file changed, 18 deletions(-)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index dff9389cc766b..1007f1af9414c 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -1919,13 +1919,9 @@ cc_library(
     hdrs = ["include/mlir/Dialect/SparseTensor/IR/SparseTensor.h"],
     includes = ["include"],
     deps = [
-        ":ArithmeticDialect",
         ":IR",
-        ":InferTypeOpInterface",
-        ":SideEffectInterfaces",
         ":SparseTensorAttrDefsIncGen",
         ":SparseTensorOpsIncGen",
-        ":StandardOps",
         "//llvm:Support",
     ],
 )
@@ -2422,13 +2418,11 @@ cc_library(
         ":ControlFlowInterfaces",
         ":IR",
         ":LoopLikeInterface",
-        ":MemRefDialect",
         ":Pass",
         ":SCFIncGen",
         ":SCFPassIncGen",
         ":StandardOps",
         ":Support",
-        ":TensorDialect",
         "//llvm:Support",
     ],
 )
@@ -2576,8 +2570,6 @@ cc_library(
     includes = ["include"],
     deps = [
         ":ArithmeticDialect",
-        ":CallOpInterfaces",
-        ":CommonFolders",
         ":ControlFlowInterfaces",
         ":Dialect",
         ":IR",
@@ -2585,8 +2577,6 @@ cc_library(
         ":MLIRShapeCanonicalizationIncGen",
         ":ShapeOpsIncGen",
         ":SideEffectInterfaces",
-        ":StandardOps",
-        ":Support",
         ":TensorDialect",
         "//llvm:Support",
     ],
@@ -3103,8 +3093,6 @@ cc_library(
         ":LLVMDialect",
         ":MemRefDialect",
         ":SideEffectInterfaces",
-        ":StandardOps",
-        ":Support",
         "//llvm:Support",
     ],
 )
@@ -3161,7 +3149,6 @@ cc_library(
         ":Async",
         ":DLTIDialect",
         ":AffineUtils",
-        ":ExecutionEngineUtils",
         ":GPUDialect",
         ":GPUPassIncGen",
         ":MemRefDialect",
@@ -6108,10 +6095,8 @@ cc_library(
     ]),
     includes = ["include"],
     deps = [
-        ":ArithmeticDialect",
         ":IR",
         ":OpenACCOpsIncGen",
-        ":StandardOps",
         ":Transforms",
         "//llvm:Support",
     ],
@@ -6256,8 +6241,6 @@ cc_library(
         ":LLVMDialect",
         ":OpenMPInterfacesIncGen",
         ":OpenMPOpsIncGen",
-        ":SideEffectInterfaces",
-        ":StandardOps",
         "//llvm:Support",
     ],
 )
@@ -6426,7 +6409,6 @@ cc_library(
         ":QuantOpsIncGen",
         ":QuantPassIncGen",
         ":SideEffectInterfaces",
-        ":StandardOps",
         ":TransformUtils",
         "//llvm:Support",
     ],

From c5263cd518689bc17690b90aa6bb0a163ef4a56c Mon Sep 17 00:00:00 2001
From: Maciej Gabka <maciej.gabka@arm.com>
Date: Wed, 26 Jan 2022 09:46:32 +0000
Subject: [PATCH 676/946] Restrict performPostLD1Combine to 64 and 128 bit
 vectors

When wider vectors are used, for example fixed width SVE,
there is no patterns to select AArch64ISD::LD1LANEpost
nodes, so we should do an early exit.

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D117674
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  2 +-
 llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll  | 58 ++++++++++++++++++-
 2 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fc6e0b865681c..cce714ea918e8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15929,7 +15929,7 @@ static SDValue performPostLD1Combine(SDNode *N,
   SelectionDAG &DAG = DCI.DAG;
   EVT VT = N->getValueType(0);
 
-  if (VT.isScalableVector())
+  if (!VT.is128BitVector() && !VT.is64BitVector())
     return SDValue();
 
   unsigned LoadIdx = IsLaneOp ? 1 : 0;
diff --git a/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll b/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll
index 6ec720c39783f..17edf7479bdc4 100644
--- a/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll
@@ -3,7 +3,7 @@
 
 ; These tests are here to ensure we don't get a selection error caused
 ; by performPostLD1Combine, which should bail out if the return
-; type is a scalable vector
+; type is not 128 or 64 bit vector.
 
 define <vscale x 4 x i32> @test_post_ld1_insert(i32* %a, i32** %ptr, i64 %inc) {
 ; CHECK-LABEL: test_post_ld1_insert:
@@ -35,4 +35,60 @@ define <vscale x 2 x double> @test_post_ld1_dup(double* %a, double** %ptr, i64 %
   ret <vscale x 2 x double> %dup
 }
 
+define <4 x i64> @test_post_ld1_int_fixed(i64* %data, i64 %idx, <4 x i64>* %addr)  #1 {
+; CHECK-LABEL: test_post_ld1_int_fixed:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov w9, #2
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x2]
+; CHECK-NEXT:    ldr x10, [x0, x1, lsl #3]
+; CHECK-NEXT:    ldr x11, [x0]
+; CHECK-NEXT:    index z3.d, #0, #1
+; CHECK-NEXT:    mov z2.d, x9
+; CHECK-NEXT:    ptrue p1.d, vl1
+; CHECK-NEXT:    cmpeq p2.d, p0/z, z3.d, z2.d
+; CHECK-NEXT:    mov z1.d, z0.d
+; CHECK-NEXT:    mov z0.d, p2/m, x10
+; CHECK-NEXT:    mov z1.d, p1/m, x11
+; CHECK-NEXT:    add z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    st1d { z0.d }, p0, [x8]
+; CHECK-NEXT:    ret
+  %A = load <4 x i64>, <4 x i64>* %addr
+  %ld1 = load i64, i64* %data
+  %vec1 = insertelement <4 x i64> %A, i64 %ld1, i32 0
+  %gep = getelementptr i64, i64* %data, i64 %idx
+  %ld2 = load i64, i64* %gep
+  %vec2 = insertelement <4 x i64> %A, i64 %ld2, i32 2
+  %res = add <4 x i64> %vec1, %vec2
+  ret <4 x i64> %res
+}
+
+define <4 x double> @test_post_ld1_double_fixed(double* %data, i64 %idx, <4 x double>* %addr)  #1 {
+; CHECK-LABEL: test_post_ld1_double_fixed:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov w9, #2
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x2]
+; CHECK-NEXT:    ldr d1, [x0, x1, lsl #3]
+; CHECK-NEXT:    ldr d2, [x0]
+; CHECK-NEXT:    index z4.d, #0, #1
+; CHECK-NEXT:    mov z3.d, x9
+; CHECK-NEXT:    ptrue p1.d, vl1
+; CHECK-NEXT:    cmpeq p2.d, p0/z, z4.d, z3.d
+; CHECK-NEXT:    sel z2.d, p1, z2.d, z0.d
+; CHECK-NEXT:    mov z0.d, p2/m, d1
+; CHECK-NEXT:    fadd z0.d, z2.d, z0.d
+; CHECK-NEXT:    st1d { z0.d }, p0, [x8]
+; CHECK-NEXT:    ret
+  %A = load <4 x double>, <4 x double>* %addr
+  %ld1 = load double, double* %data
+  %vec1 = insertelement <4 x double> %A, double %ld1, i32 0
+  %gep = getelementptr double, double* %data, i64 %idx
+  %ld2 = load double, double* %gep
+  %vec2 = insertelement <4 x double> %A, double %ld2, i32 2
+  %res = fadd <4 x double> %vec1, %vec2
+  ret <4 x double> %res
+}
+attributes #1 = { vscale_range(2,2) "target-features"="+neon,+sve" }
+
 declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double)

From 2461aee6dbc5b2bb9027e1e86b940a2d0220cd54 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 26 Jan 2022 11:02:03 +0100
Subject: [PATCH 677/946] [OpenMPIRBuilderTest] Avoid pointer element type
 access

This was reintroduced by D110114.
---
 llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
index 00aa25e8330d5..143b053c9e61d 100644
--- a/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
+++ b/llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp
@@ -1158,7 +1158,8 @@ TEST_F(OpenMPIRBuilderTest, ParallelForwardAsPointers) {
 
   Type *Arg2Type = OutlinedFn->getArg(2)->getType();
   EXPECT_TRUE(Arg2Type->isPointerTy());
-  EXPECT_EQ(Arg2Type->getPointerElementType(), ArgStructTy);
+  EXPECT_TRUE(cast<PointerType>(Arg2Type)
+                  ->isOpaqueOrPointeeTypeMatches(ArgStructTy));
 }
 
 TEST_F(OpenMPIRBuilderTest, CanonicalLoopSimple) {

From dee0c268efadb6babafb8168838dc1060416d9af Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 11 Jan 2022 10:57:56 +0100
Subject: [PATCH 678/946] [LICM] Add additional tests for promotion with
 unwinding (NFC)

---
 .../Transforms/LICM/scalar-promote-unwind.ll  | 92 ++++++++++++++++++-
 1 file changed, 90 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/LICM/scalar-promote-unwind.ll b/llvm/test/Transforms/LICM/scalar-promote-unwind.ll
index 0662a23fe6be3..315e25e180fba 100644
--- a/llvm/test/Transforms/LICM/scalar-promote-unwind.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote-unwind.ll
@@ -53,8 +53,8 @@ for.cond.cleanup:
 
 ; We can hoist the store out of the loop here; if f() unwinds,
 ; the lifetime of %a ends.
-define void @test2(i1 zeroext %y) uwtable {
-; CHECK-LABEL: @test2(
+define void @test_alloca(i1 zeroext %y) uwtable {
+; CHECK-LABEL: @test_alloca(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[A_PROMOTED:%.*]] = load i32, i32* [[A]], align 4
@@ -100,6 +100,94 @@ for.cond.cleanup:
   ret void
 }
 
+; TODO: byval memory cannot be accessed on unwind either.
+define void @test_byval(i32* byval(i32) %a, i1 zeroext %y) uwtable {
+; CHECK-LABEL: @test_byval(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
+; CHECK-NEXT:    br i1 [[Y:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @f()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_03]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 10000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %0 = load i32, i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  br i1 %y, label %if.then, label %for.inc
+
+if.then:
+  tail call void @f()
+  br label %for.inc
+
+for.inc:
+  %inc = add nuw nsw i32 %i.03, 1
+  %exitcond = icmp eq i32 %inc, 10000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+; TODO: sret could be specified to not be accessed on unwind either.
+define void @test_sret(i32* noalias sret(i32) %a, i1 zeroext %y) uwtable {
+; CHECK-LABEL: @test_sret(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
+; CHECK-NEXT:    br i1 [[Y:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]]
+; CHECK:       if.then:
+; CHECK-NEXT:    tail call void @f()
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_03]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 10000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %0 = load i32, i32* %a, align 4
+  %add = add nsw i32 %0, 1
+  store i32 %add, i32* %a, align 4
+  br i1 %y, label %if.then, label %for.inc
+
+if.then:
+  tail call void @f()
+  br label %for.inc
+
+for.inc:
+  %inc = add nuw nsw i32 %i.03, 1
+  %exitcond = icmp eq i32 %inc, 10000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
 ;; We can promote if the load can be proven safe to speculate, and the
 ;; store safe to sink, even if the the store *isn't* must execute.
 define void @test3(i1 zeroext %y) uwtable {

From 7c02776567cc9561e6691dc05235799da45e72d6 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Wed, 26 Jan 2022 05:06:12 -0500
Subject: [PATCH 679/946] Fix edb02d8c5df36bb375df7171b4ba61635564dfb4

---
 llvm/lib/Support/CommandLine.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index b1e688788c333..71a6ebf2a72eb 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -2356,7 +2356,7 @@ class CategorizedHelpPrinter : public HelpPrinter {
     for (size_t I = 0, E = Opts.size(); I != E; ++I) {
       Option *Opt = Opts[I].second;
       for (auto &Cat : Opt->Categories) {
-        assert(std::binary_search(SortedCategories.begin(), SortedCategories.end(), Cat, [](OptionCategory* A, OptionCategory* B) { return OptionCategoryCompare(&A, &B);}) &&
+        assert(find(SortedCategories, Cat) != SortedCategories.end() &&
                "Option has an unregistered category");
         CategorizedOptions[Cat].push_back(Opt);
       }

From 44cfc3a8169cfa3f6a2824ed9cee9ff269f5a8ea Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 11 Jan 2022 11:02:26 +0100
Subject: [PATCH 680/946] [LICM] Generalize unwinding check during scalar
 promotion

This extract a common isNotVisibleOnUnwind() helper into
AliasAnalysis, which handles allocas, byval arguments and noalias
calls. After D116998 this could also handle sret arguments. We
have similar logic in DSE and MemCpyOpt, which will be switched
to use this helper as well.

The noalias call case is a bit different from the others, because
it also requires that the object is not captured. The caller is
responsible for doing the appropriate check.

Differential Revision: https://reviews.llvm.org/D117000
---
 llvm/include/llvm/Analysis/AliasAnalysis.h    |  8 +++++++
 llvm/lib/Analysis/AliasAnalysis.cpp           | 23 +++++++++++++++++++
 llvm/lib/Transforms/Scalar/LICM.cpp           | 23 +++++++------------
 .../Transforms/LICM/scalar-promote-unwind.ll  | 12 ++++++----
 4 files changed, 46 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h
index 2dcdfa6bdde37..d4febe6c1db9f 100644
--- a/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -1267,6 +1267,14 @@ bool isIdentifiedObject(const Value *V);
 /// IdentifiedObjects.
 bool isIdentifiedFunctionLocal(const Value *V);
 
+/// Return true if Object memory is not visible after an unwind, in the sense
+/// that program semantics cannot depend on Object containing any particular
+/// value on unwind. If the RequiresNoCaptureBeforeUnwind out parameter is set
+/// to true, then the memory is only not visible if the object has not been
+/// captured prior to the unwind. Otherwise it is not visible even if captured.
+bool isNotVisibleOnUnwind(const Value *Object,
+                          bool &RequiresNoCaptureBeforeUnwind);
+
 /// A manager for alias analyses.
 ///
 /// This class can have analyses registered with it and when run, it will run
diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp
index 5658499946556..a8132e5abf540 100644
--- a/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -988,6 +988,29 @@ bool llvm::isIdentifiedFunctionLocal(const Value *V) {
   return isa<AllocaInst>(V) || isNoAliasCall(V) || isNoAliasOrByValArgument(V);
 }
 
+bool llvm::isNotVisibleOnUnwind(const Value *Object,
+                                bool &RequiresNoCaptureBeforeUnwind) {
+  RequiresNoCaptureBeforeUnwind = false;
+
+  // Alloca goes out of scope on unwind.
+  if (isa<AllocaInst>(Object))
+    return true;
+
+  // Byval goes out of scope on unwind.
+  if (auto *A = dyn_cast<Argument>(Object))
+    return A->hasByValAttr();
+
+  // A noalias return is not accessible from any other code. If the pointer
+  // does not escape prior to the unwind, then the caller cannot access the
+  // memory either.
+  if (isNoAliasCall(Object)) {
+    RequiresNoCaptureBeforeUnwind = true;
+    return true;
+  }
+
+  return false;
+}
+
 void llvm::getAAResultsAnalysisUsage(AnalysisUsage &AU) {
   // This function needs to be in sync with llvm::createLegacyPMAAResults -- if
   // more alias analyses are added to llvm::createLegacyPMAAResults, they need
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index b944ff883d8f1..7fb1a25bdf13e 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -1925,21 +1925,14 @@ bool isNotCapturedBeforeOrInLoop(const Value *V, const Loop *L,
 
 /// Return true if we can prove that a caller cannot inspect the object if an
 /// unwind occurs inside the loop.
-bool isNotVisibleOnUnwind(Value *Object, const Loop *L, DominatorTree *DT) {
-  if (isa<AllocaInst>(Object))
-    // Since the alloca goes out of scope, we know the caller can't retain a
-    // reference to it and be well defined.  Thus, we don't need to check for
-    // capture.
-    return true;
+bool isNotVisibleOnUnwindInLoop(const Value *Object, const Loop *L,
+                                DominatorTree *DT) {
+  bool RequiresNoCaptureBeforeUnwind;
+  if (!isNotVisibleOnUnwind(Object, RequiresNoCaptureBeforeUnwind))
+    return false;
 
-  // For all other objects we need to know that the caller can't possibly
-  // have gotten a reference to the object prior to an unwind in the loop.
-  // There are two components of that:
-  //   1) Object can't have been captured prior to the definition site.
-  //      For this, we need to know the return value is noalias.
-  //   1) Object can't be captured before or inside the loop.  This is what
-  //      isNotCapturedBeforeOrInLoop() checks.
-  return isNoAliasCall(Object) && isNotCapturedBeforeOrInLoop(Object, L, DT);
+  return !RequiresNoCaptureBeforeUnwind ||
+         isNotCapturedBeforeOrInLoop(Object, L, DT);
 }
 
 } // namespace
@@ -2026,7 +2019,7 @@ bool llvm::promoteLoopAccessesToScalars(
     // this by proving that the caller can't have a reference to the object
     // after return and thus can't possibly load from the object.
     Value *Object = getUnderlyingObject(SomePtr);
-    if (!isNotVisibleOnUnwind(Object, CurLoop, DT))
+    if (!isNotVisibleOnUnwindInLoop(Object, CurLoop, DT))
       return false;
     // Subtlety: Alloca's aren't visible to callers, but *are* potentially
     // visible to other threads if captured and used during their lifetimes.
diff --git a/llvm/test/Transforms/LICM/scalar-promote-unwind.ll b/llvm/test/Transforms/LICM/scalar-promote-unwind.ll
index 315e25e180fba..506b5baaa3f72 100644
--- a/llvm/test/Transforms/LICM/scalar-promote-unwind.ll
+++ b/llvm/test/Transforms/LICM/scalar-promote-unwind.ll
@@ -100,16 +100,16 @@ for.cond.cleanup:
   ret void
 }
 
-; TODO: byval memory cannot be accessed on unwind either.
+; byval memory cannot be accessed on unwind either.
 define void @test_byval(i32* byval(i32) %a, i1 zeroext %y) uwtable {
 ; CHECK-LABEL: @test_byval(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_PROMOTED:%.*]] = load i32, i32* [[A:%.*]], align 4
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[I_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 1
-; CHECK-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
+; CHECK-NEXT:    [[ADD1:%.*]] = phi i32 [ [[A_PROMOTED]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_INC:%.*]] ]
+; CHECK-NEXT:    [[I_03:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_INC]] ]
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[ADD1]], 1
 ; CHECK-NEXT:    br i1 [[Y:%.*]], label [[IF_THEN:%.*]], label [[FOR_INC]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    tail call void @f()
@@ -119,6 +119,8 @@ define void @test_byval(i32* byval(i32) %a, i1 zeroext %y) uwtable {
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 10000
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
 ; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_INC]] ]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA]], i32* [[A]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:

From 66c602be25c15ca69f6c3a618427ba0237c0d4a9 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Wed, 26 Jan 2022 02:50:00 -0500
Subject: [PATCH 681/946] [NFC] Additional header dependency cleanup
 LLVMSupport

A few more forward-declarations, a few less headers. the impact on number of
preprocessed lines for LLVMSupport is negligible (-3K lines) but it's always
good to remove dependencies.

Related discourse thread: https://llvm.discourse.group/t/include-what-you-use-include-cleanup
---
 llvm/include/llvm/ADT/SmallVector.h         | 3 ++-
 llvm/include/llvm/Support/ARMTargetParser.h | 1 -
 llvm/include/llvm/Support/ConvertUTF.h      | 3 +++
 llvm/include/llvm/Support/TargetParser.h    | 4 +++-
 llvm/include/llvm/Support/ThreadPool.h      | 1 -
 llvm/include/llvm/Support/X86TargetParser.h | 2 +-
 llvm/include/llvm/Support/YAMLTraits.h      | 4 +++-
 llvm/lib/Support/BinaryStreamError.cpp      | 2 +-
 llvm/lib/Support/CrashRecoveryContext.cpp   | 5 ++++-
 llvm/lib/Support/FileUtilities.cpp          | 1 -
 llvm/lib/Support/MemoryBuffer.cpp           | 1 -
 llvm/lib/Support/TargetParser.cpp           | 2 +-
 llvm/lib/Support/ThreadPool.cpp             | 1 -
 llvm/lib/Support/YAMLTraits.cpp             | 1 +
 14 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index 9347f01a4191f..466acb83d4668 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_ADT_SMALLVECTOR_H
 #define LLVM_ADT_SMALLVECTOR_H
 
-#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/type_traits.h"
 #include <algorithm>
@@ -32,6 +31,8 @@
 
 namespace llvm {
 
+template <typename IteratorT> class iterator_range;
+
 /// This is all the stuff common to all SmallVectors.
 ///
 /// The template parameter specifies the type which should be used to hold the
diff --git a/llvm/include/llvm/Support/ARMTargetParser.h b/llvm/include/llvm/Support/ARMTargetParser.h
index b40704c24e87c..1ada6daaad3ba 100644
--- a/llvm/include/llvm/Support/ARMTargetParser.h
+++ b/llvm/include/llvm/Support/ARMTargetParser.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_SUPPORT_ARMTARGETPARSER_H
 #define LLVM_SUPPORT_ARMTARGETPARSER_H
 
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/ARMBuildAttributes.h"
 #include <vector>
diff --git a/llvm/include/llvm/Support/ConvertUTF.h b/llvm/include/llvm/Support/ConvertUTF.h
index 1add185330fa0..374cdb907fdc2 100644
--- a/llvm/include/llvm/Support/ConvertUTF.h
+++ b/llvm/include/llvm/Support/ConvertUTF.h
@@ -91,7 +91,10 @@
 
 #include <cstddef>
 #include <string>
+
+#if defined(_WIN32)
 #include <system_error>
+#endif
 
 // Wrap everything in namespace llvm so that programs can link with llvm and
 // their own version of the unicode libraries.
diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h
index 1d7594ebedc22..a6f8ebe1381a6 100644
--- a/llvm/include/llvm/Support/TargetParser.h
+++ b/llvm/include/llvm/Support/TargetParser.h
@@ -16,12 +16,14 @@
 
 // FIXME: vector is used because that's what clang uses for subtarget feature
 // lists, but SmallVector would probably be better
-#include "llvm/ADT/Triple.h"
 #include "llvm/Support/RISCVISAInfo.h"
 #include <vector>
 
 namespace llvm {
+
 class StringRef;
+template <typename T> class SmallVectorImpl;
+class Triple;
 
 // Target specific information in their own namespaces.
 // (ARM/AArch64/X86 are declared in ARM/AArch64/X86TargetParser.h)
diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h
index aecff122d3cbe..868dd2819f836 100644
--- a/llvm/include/llvm/Support/ThreadPool.h
+++ b/llvm/include/llvm/Support/ThreadPool.h
@@ -19,7 +19,6 @@
 
 #include <future>
 
-#include <atomic>
 #include <condition_variable>
 #include <functional>
 #include <memory>
diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h
index bfa3e23dbd9db..6f5db64714e03 100644
--- a/llvm/include/llvm/Support/X86TargetParser.h
+++ b/llvm/include/llvm/Support/X86TargetParser.h
@@ -14,10 +14,10 @@
 #define LLVM_SUPPORT_X86TARGETPARSER_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 
 namespace llvm {
+template <typename T> class SmallVectorImpl;;
 class StringRef;
 
 namespace X86 {
diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index 66529075e2e71..b7e41f662810e 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -20,7 +20,6 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/VersionTuple.h"
 #include "llvm/Support/YAMLParser.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
@@ -34,6 +33,9 @@
 #include <vector>
 
 namespace llvm {
+
+class VersionTuple;
+
 namespace yaml {
 
 enum class NodeKind : uint8_t {
diff --git a/llvm/lib/Support/BinaryStreamError.cpp b/llvm/lib/Support/BinaryStreamError.cpp
index f22523f09ac80..9b8f6862b65c2 100644
--- a/llvm/lib/Support/BinaryStreamError.cpp
+++ b/llvm/lib/Support/BinaryStreamError.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/BinaryStreamError.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp
index b6aaf373a5221..2ee3074b840e4 100644
--- a/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -9,7 +9,6 @@
 #include "llvm/Support/CrashRecoveryContext.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ExitCodes.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/ThreadLocal.h"
@@ -17,6 +16,10 @@
 #include <mutex>
 #include <setjmp.h>
 
+#if !defined(_MSC_VER) && !defined(_WIN32)
+#include "llvm/Support/ExitCodes.h"
+#endif
+
 using namespace llvm;
 
 namespace {
diff --git a/llvm/lib/Support/FileUtilities.cpp b/llvm/lib/Support/FileUtilities.cpp
index 1a14ed2673606..489b8d119e6f3 100644
--- a/llvm/lib/Support/FileUtilities.cpp
+++ b/llvm/lib/Support/FileUtilities.cpp
@@ -18,7 +18,6 @@
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
-#include <cmath>
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 7192fb1321cb6..bf34a242c4c3f 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -18,7 +18,6 @@
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/SmallVectorMemoryBuffer.h"
diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp
index c14ddbeea5c39..0105cd2e81532 100644
--- a/llvm/lib/Support/TargetParser.cpp
+++ b/llvm/lib/Support/TargetParser.cpp
@@ -14,7 +14,7 @@
 #include "llvm/Support/TargetParser.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/ARMBuildAttributes.h"
+#include "llvm/ADT/Triple.h"
 
 using namespace llvm;
 using namespace AMDGPU;
diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index 6eec368e626ff..bf2584950c4ac 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -14,7 +14,6 @@
 
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Threading.h"
-#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp
index b79d520900d2c..939427d3fc818 100644
--- a/llvm/lib/Support/YAMLTraits.cpp
+++ b/llvm/lib/Support/YAMLTraits.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/LineIterator.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/VersionTuple.h"
 #include "llvm/Support/YAMLParser.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>

From 24a49e99f386432fc6fa46faf6e2ba91cfaed2df Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes@tecnico.ulisboa.pt>
Date: Wed, 26 Jan 2022 10:10:22 +0000
Subject: [PATCH 682/946] [NewGVN] FIx phi-of-ops in the presence of memory
 read operations

The phi-of-ops functionality has a function OpIsSafeForPHIOfOps
to determine when it's safe to create the new phi.
But this function only checks for the obvious dominator conditions
and ignores memory.
This patch takes the conservative approach and disables phi-of-ops
whenever there's a load that doesn't dominate the phi, as its
value may be affected by a store inside the loop.

This can be improved later to check aliasing between the
load/stores.

Fixes https://llvm.org/PR53277

Reviewed By: asbirlea

Differential Revision: https://reviews.llvm.org/D117999
---
 llvm/lib/Transforms/Scalar/NewGVN.cpp         |  9 +++
 .../Transforms/NewGVN/phi-of-ops-loads.ll     | 70 +++++++++++++++----
 llvm/test/Transforms/NewGVN/storeoverstore.ll | 33 +++++----
 3 files changed, 85 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 6890ad3852353..2476e6c408b1d 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -2587,6 +2587,15 @@ bool NewGVN::OpIsSafeForPHIOfOpsHelper(
   }
 
   auto *OrigI = cast<Instruction>(V);
+  // When we hit an instruction that reads memory (load, call, etc), we must
+  // consider any store that may happen in the loop. For now, we assume the
+  // worst: there is a store in the loop that alias with this read.
+  // The case where the load is outside the loop is already covered by the
+  // dominator check above.
+  // TODO: relax this condition
+  if (OrigI->mayReadFromMemory())
+    return false;
+
   for (auto *Op : OrigI->operand_values()) {
     if (!isa<Instruction>(Op))
       continue;
diff --git a/llvm/test/Transforms/NewGVN/phi-of-ops-loads.ll b/llvm/test/Transforms/NewGVN/phi-of-ops-loads.ll
index 63a16d60dfc78..335b3b0175a1e 100644
--- a/llvm/test/Transforms/NewGVN/phi-of-ops-loads.ll
+++ b/llvm/test/Transforms/NewGVN/phi-of-ops-loads.ll
@@ -1,15 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -passes=newgvn -enable-phi-of-ops=true -S -o - %s | FileCheck %s
 
-define void @store-in-loop(i8* %p, i8* %q) {
-; CHECK-LABEL: @store-in-loop(
+define void @hoisted-load(i8* %p, i8* %q) {
+; CHECK-LABEL: @hoisted-load(
 ; CHECK-NEXT:  bb56:
+; CHECK-NEXT:    [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1
 ; CHECK-NEXT:    br label [[BB57:%.*]]
 ; CHECK:       bb57:
 ; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB56:%.*]] ], [ [[N62:%.*]], [[BB229:%.*]] ]
 ; CHECK-NEXT:    [[N59:%.*]] = phi i1 [ false, [[BB229]] ], [ true, [[BB56]] ]
 ; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ 0, [[BB56]] ], [ [[INC:%.*]], [[BB229]] ]
-; CHECK-NEXT:    [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1
 ; CHECK-NEXT:    [[N62]] = icmp ne i8 [[N60]], 2
 ; CHECK-NEXT:    br i1 [[PHIOFOPS]], label [[BB229]], label [[BB237:%.*]]
 ; CHECK:       bb229:
@@ -19,6 +19,44 @@ define void @store-in-loop(i8* %p, i8* %q) {
 ; CHECK:       bb237:
 ; CHECK-NEXT:    ret void
 ;
+bb56:
+  %n60 = load i8, i8* %p
+  br label %bb57
+
+bb57:
+  %n59 = phi i1 [ false, %bb229 ], [ true, %bb56 ]
+  %idx = phi i8 [0, %bb56], [%inc, %bb229]
+  %n62 = icmp ne i8 %n60, 2
+  %n63 = or i1 %n59, %n62
+  br i1 %n63, label %bb229, label %bb237
+
+bb229:
+  %inc = add i8 %idx, 1
+  store i8 %inc, i8* %q
+  br label %bb57
+
+bb237:
+  ret void
+}
+
+define void @store-in-loop(i8* %p, i8* %q) {
+; CHECK-LABEL: @store-in-loop(
+; CHECK-NEXT:  bb56:
+; CHECK-NEXT:    br label [[BB57:%.*]]
+; CHECK:       bb57:
+; CHECK-NEXT:    [[N59:%.*]] = phi i1 [ false, [[BB229:%.*]] ], [ true, [[BB56:%.*]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ 0, [[BB56]] ], [ [[INC:%.*]], [[BB229]] ]
+; CHECK-NEXT:    [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1
+; CHECK-NEXT:    [[N62:%.*]] = icmp ne i8 [[N60]], 2
+; CHECK-NEXT:    [[N63:%.*]] = or i1 [[N59]], [[N62]]
+; CHECK-NEXT:    br i1 [[N63]], label [[BB229]], label [[BB237:%.*]]
+; CHECK:       bb229:
+; CHECK-NEXT:    [[INC]] = add i8 [[IDX]], 1
+; CHECK-NEXT:    store i8 [[INC]], i8* [[Q:%.*]], align 1
+; CHECK-NEXT:    br label [[BB57]]
+; CHECK:       bb237:
+; CHECK-NEXT:    ret void
+;
 bb56:
   br label %bb57
 
@@ -39,17 +77,18 @@ bb237:
   ret void
 }
 
+; TODO: we should support this case
 define void @no-alias-store-in-loop(i8* noalias %p, i8* noalias %q) {
 ; CHECK-LABEL: @no-alias-store-in-loop(
 ; CHECK-NEXT:  bb56:
 ; CHECK-NEXT:    br label [[BB57:%.*]]
 ; CHECK:       bb57:
-; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB56:%.*]] ], [ [[N62:%.*]], [[BB229:%.*]] ]
-; CHECK-NEXT:    [[N59:%.*]] = phi i1 [ false, [[BB229]] ], [ true, [[BB56]] ]
+; CHECK-NEXT:    [[N59:%.*]] = phi i1 [ false, [[BB229:%.*]] ], [ true, [[BB56:%.*]] ]
 ; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ 0, [[BB56]] ], [ [[INC:%.*]], [[BB229]] ]
 ; CHECK-NEXT:    [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1
-; CHECK-NEXT:    [[N62]] = icmp ne i8 [[N60]], 2
-; CHECK-NEXT:    br i1 [[PHIOFOPS]], label [[BB229]], label [[BB237:%.*]]
+; CHECK-NEXT:    [[N62:%.*]] = icmp ne i8 [[N60]], 2
+; CHECK-NEXT:    [[N63:%.*]] = or i1 [[N59]], [[N62]]
+; CHECK-NEXT:    br i1 [[N63]], label [[BB229]], label [[BB237:%.*]]
 ; CHECK:       bb229:
 ; CHECK-NEXT:    [[INC]] = add i8 [[IDX]], 1
 ; CHECK-NEXT:    store i8 [[INC]], i8* [[Q:%.*]], align 1
@@ -82,11 +121,11 @@ define void @function-in-loop(i8* %p) {
 ; CHECK-NEXT:  bb56:
 ; CHECK-NEXT:    br label [[BB57:%.*]]
 ; CHECK:       bb57:
-; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB56:%.*]] ], [ [[N62:%.*]], [[BB229:%.*]] ]
-; CHECK-NEXT:    [[N59:%.*]] = phi i1 [ false, [[BB229]] ], [ true, [[BB56]] ]
+; CHECK-NEXT:    [[N59:%.*]] = phi i1 [ false, [[BB229:%.*]] ], [ true, [[BB56:%.*]] ]
 ; CHECK-NEXT:    [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1
-; CHECK-NEXT:    [[N62]] = icmp ne i8 [[N60]], 2
-; CHECK-NEXT:    br i1 [[PHIOFOPS]], label [[BB229]], label [[BB237:%.*]]
+; CHECK-NEXT:    [[N62:%.*]] = icmp ne i8 [[N60]], 2
+; CHECK-NEXT:    [[N63:%.*]] = or i1 [[N59]], [[N62]]
+; CHECK-NEXT:    br i1 [[N63]], label [[BB229]], label [[BB237:%.*]]
 ; CHECK:       bb229:
 ; CHECK-NEXT:    call void @f()
 ; CHECK-NEXT:    br label [[BB57]]
@@ -111,16 +150,17 @@ bb237:
   ret void
 }
 
+; TODO: we should support this case
 define void @nowrite-function-in-loop(i8* %p) {
 ; CHECK-LABEL: @nowrite-function-in-loop(
 ; CHECK-NEXT:  bb56:
 ; CHECK-NEXT:    br label [[BB57:%.*]]
 ; CHECK:       bb57:
-; CHECK-NEXT:    [[PHIOFOPS:%.*]] = phi i1 [ true, [[BB56:%.*]] ], [ [[N62:%.*]], [[BB229:%.*]] ]
-; CHECK-NEXT:    [[N59:%.*]] = phi i1 [ false, [[BB229]] ], [ true, [[BB56]] ]
+; CHECK-NEXT:    [[N59:%.*]] = phi i1 [ false, [[BB229:%.*]] ], [ true, [[BB56:%.*]] ]
 ; CHECK-NEXT:    [[N60:%.*]] = load i8, i8* [[P:%.*]], align 1
-; CHECK-NEXT:    [[N62]] = icmp ne i8 [[N60]], 2
-; CHECK-NEXT:    br i1 [[PHIOFOPS]], label [[BB229]], label [[BB237:%.*]]
+; CHECK-NEXT:    [[N62:%.*]] = icmp ne i8 [[N60]], 2
+; CHECK-NEXT:    [[N63:%.*]] = or i1 [[N59]], [[N62]]
+; CHECK-NEXT:    br i1 [[N63]], label [[BB229]], label [[BB237:%.*]]
 ; CHECK:       bb229:
 ; CHECK-NEXT:    call void @f() #[[ATTR0:[0-9]+]]
 ; CHECK-NEXT:    br label [[BB57]]
diff --git a/llvm/test/Transforms/NewGVN/storeoverstore.ll b/llvm/test/Transforms/NewGVN/storeoverstore.ll
index 385f18757784c..0e4a4ece32753 100644
--- a/llvm/test/Transforms/NewGVN/storeoverstore.ll
+++ b/llvm/test/Transforms/NewGVN/storeoverstore.ll
@@ -12,12 +12,16 @@ define i32 @foo(i32*, i32)  {
 ; CHECK-NEXT:    store i32 5, i32* [[TMP0:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
-; CHECK:         br label [[TMP5]]
-; CHECK:         [[PHIOFOPS:%.*]] = phi i32 [ 10, [[TMP2:%.*]] ], [ 15, [[TMP4]] ]
-; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP2]] ]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP6:%.*]], label [[TMP7:%.*]]
-; CHECK:         br label [[TMP7]]
-; CHECK:         [[DOT1:%.*]] = phi i32 [ [[PHIOFOPS]], [[TMP6]] ], [ [[DOT0]], [[TMP5]] ]
+; CHECK:       4:
+; CHECK-NEXT:    br label [[TMP5]]
+; CHECK:       5:
+; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP2:%.*]] ]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP6:%.*]], label [[TMP8:%.*]]
+; CHECK:       6:
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw i32 [[DOT0]], 5
+; CHECK-NEXT:    br label [[TMP8]]
+; CHECK:       8:
+; CHECK-NEXT:    [[DOT1:%.*]] = phi i32 [ [[TMP7]], [[TMP6]] ], [ [[DOT0]], [[TMP5]] ]
 ; CHECK-NEXT:    ret i32 [[DOT1]]
 ;
   store i32 5, i32* %0, align 4
@@ -52,13 +56,18 @@ define i32 @foo2(i32*, i32)  {
 ; CHECK-NEXT:    store i32 5, i32* [[TMP0:%.*]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP1:%.*]], 0
 ; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP5:%.*]]
-; CHECK:         br label [[TMP6:%.*]]
-; CHECK:         br label [[TMP6]]
-; CHECK:         [[PHIOFOPS:%.*]] = phi i32 [ 10, [[TMP5]] ], [ 15, [[TMP4]] ]
+; CHECK:       4:
+; CHECK-NEXT:    br label [[TMP6:%.*]]
+; CHECK:       5:
+; CHECK-NEXT:    br label [[TMP6]]
+; CHECK:       6:
 ; CHECK-NEXT:    [[DOT0:%.*]] = phi i32 [ 10, [[TMP4]] ], [ 5, [[TMP5]] ]
-; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP8:%.*]]
-; CHECK:         br label [[TMP8]]
-; CHECK:         [[DOT1:%.*]] = phi i32 [ [[PHIOFOPS]], [[TMP7]] ], [ [[DOT0]], [[TMP6]] ]
+; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP7:%.*]], label [[TMP9:%.*]]
+; CHECK:       7:
+; CHECK-NEXT:    [[TMP8:%.*]] = add nsw i32 [[DOT0]], 5
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    [[DOT1:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ [[DOT0]], [[TMP6]] ]
 ; CHECK-NEXT:    ret i32 [[DOT1]]
 ;
   store i32 5, i32* %0, align 4

From aa33688cada423d987ce03ecbc13f2c554223d77 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda@apple.com>
Date: Wed, 26 Jan 2022 11:16:11 +0100
Subject: [PATCH 683/946] [llvm][support] Replace `std::vector<bool>` use in
 YAMLTraits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LLVM Programmer’s Manual strongly discourages the use of `std::vector<bool>` and suggests `llvm::BitVector` as a possible replacement.

This patch replaces the use of `std::vector` with `llvm::BitVector` in LLVM's YAML traits and replaces the call to `Vec.insert(Vec.begin(), N, false)` on empty `Vec` with `Vec.resize(N)`, which has the same semantics but avoids using `insert` and iterators, which `llvm::BitVector` doesn't possess.

Reviewed By: dexonsmith, dblaikie

Differential Revision: https://reviews.llvm.org/D118111
---
 llvm/include/llvm/Support/YAMLTraits.h | 3 ++-
 llvm/lib/Support/YAMLTraits.cpp        | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Support/YAMLTraits.h b/llvm/include/llvm/Support/YAMLTraits.h
index b7e41f662810e..7ad73543fc6e0 100644
--- a/llvm/include/llvm/Support/YAMLTraits.h
+++ b/llvm/include/llvm/Support/YAMLTraits.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_SUPPORT_YAMLTRAITS_H
 #define LLVM_SUPPORT_YAMLTRAITS_H
 
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -1521,7 +1522,7 @@ class Input : public IO {
   std::error_code                     EC;
   BumpPtrAllocator                    StringAllocator;
   document_iterator                   DocIterator;
-  std::vector<bool>                   BitValuesUsed;
+  llvm::BitVector                     BitValuesUsed;
   HNode *CurrentNode = nullptr;
   bool                                ScalarMatchFound = false;
   bool AllowUnknownKeys = false;
diff --git a/llvm/lib/Support/YAMLTraits.cpp b/llvm/lib/Support/YAMLTraits.cpp
index 939427d3fc818..8cdd03149bcfa 100644
--- a/llvm/lib/Support/YAMLTraits.cpp
+++ b/llvm/lib/Support/YAMLTraits.cpp
@@ -299,7 +299,7 @@ void Input::endEnumScalar() {
 bool Input::beginBitSetScalar(bool &DoClear) {
   BitValuesUsed.clear();
   if (SequenceHNode *SQ = dyn_cast<SequenceHNode>(CurrentNode)) {
-    BitValuesUsed.insert(BitValuesUsed.begin(), SQ->Entries.size(), false);
+    BitValuesUsed.resize(SQ->Entries.size());
   } else {
     setError(CurrentNode, "expected sequence of bit values");
   }

From 600c6714ac77915b7b656b860cf71494a7c9ec7f Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda@apple.com>
Date: Wed, 26 Jan 2022 11:18:00 +0100
Subject: [PATCH 684/946] [clang][syntax] Replace `std::vector<bool>` use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

LLVM Programmer’s Manual strongly discourages the use of `std::vector<bool>` and suggests `llvm::BitVector` as a possible replacement.

This patch replaces `std::vector<bool>` with `llvm::BitVector` in the Syntax library and replaces range-based for loop with regular for loop. This is necessary due to `llvm::BitVector` not having `begin()` and `end()` (D117116).

Reviewed By: dexonsmith, dblaikie

Differential Revision: https://reviews.llvm.org/D118109
---
 clang/lib/Tooling/Syntax/Tree.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp
index c813865e95cd8..981bac508f733 100644
--- a/clang/lib/Tooling/Syntax/Tree.cpp
+++ b/clang/lib/Tooling/Syntax/Tree.cpp
@@ -9,6 +9,7 @@
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Tooling/Syntax/Nodes.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Casting.h"
 #include <cassert>
@@ -202,7 +203,7 @@ static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L,
 }
 
 static void dumpNode(raw_ostream &OS, const syntax::Node *N,
-                     const SourceManager &SM, std::vector<bool> IndentMask) {
+                     const SourceManager &SM, llvm::BitVector IndentMask) {
   auto DumpExtraInfo = [&OS](const syntax::Node *N) {
     if (N->getRole() != syntax::NodeRole::Unknown)
       OS << " " << N->getRole();
@@ -228,8 +229,8 @@ static void dumpNode(raw_ostream &OS, const syntax::Node *N,
   OS << "\n";
 
   for (const syntax::Node &It : T->getChildren()) {
-    for (bool Filled : IndentMask) {
-      if (Filled)
+    for (unsigned Idx = 0; Idx < IndentMask.size(); ++Idx) {
+      if (IndentMask[Idx])
         OS << "| ";
       else
         OS << "  ";

From 76cb4cd074a6816f3801fd4a2bd8854597748239 Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda@apple.com>
Date: Wed, 26 Jan 2022 11:21:49 +0100
Subject: [PATCH 685/946] [clang] Fix serialized diagnostics edge-cases

The Clang frontend sometimes fails on the following assertion when launched with `-serialize-diagnostic-file <x>`:

```
Assertion failed: (BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance"), function ~BitstreamWriter, file BitstreamWriter.h, line 125.
```

This was first noticed when passing an unknown command-line argument to `-cc1`.

It turns out the `DiagnosticConsumer::finish()` function should be called as soon as processing of all source files ends, but there are some code paths where that doesn't happen:

1. when command line parsing fails in `cc1_main()`,
2. when `!Act.PrepareToExecute(*this)` or `!createTarget()` evaluate to `true` in `CompilerInstance::ExecuteAction` and the function returns early.

This patch ensures `finish()` is called in all those code paths.

Reviewed By: Bigcheese

Differential Revision: https://reviews.llvm.org/D118150
---
 clang/lib/Frontend/CompilerInstance.cpp                  | 9 ++++++---
 .../Misc/serialized-diags-emit-header-module-misconfig.c | 4 ++++
 clang/test/Misc/serialized-diags-unknown-argument.c      | 4 ++++
 clang/test/Misc/serialized-diags-unknown-target.c        | 4 ++++
 clang/tools/driver/cc1_main.cpp                          | 4 +++-
 5 files changed, 21 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/Misc/serialized-diags-emit-header-module-misconfig.c
 create mode 100644 clang/test/Misc/serialized-diags-unknown-argument.c
 create mode 100644 clang/test/Misc/serialized-diags-unknown-target.c

diff --git a/clang/lib/Frontend/CompilerInstance.cpp b/clang/lib/Frontend/CompilerInstance.cpp
index 4ef5094931329..2465a7e2453be 100644
--- a/clang/lib/Frontend/CompilerInstance.cpp
+++ b/clang/lib/Frontend/CompilerInstance.cpp
@@ -37,6 +37,7 @@
 #include "clang/Serialization/ASTReader.h"
 #include "clang/Serialization/GlobalModuleIndex.h"
 #include "clang/Serialization/InMemoryModuleCache.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/BuryPointer.h"
 #include "llvm/Support/CrashRecoveryContext.h"
@@ -996,6 +997,11 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
   // DesiredStackSpace available.
   noteBottomOfStack();
 
+  auto FinishDiagnosticClient = llvm::make_scope_exit([&]() {
+    // Notify the diagnostic client that all files were processed.
+    getDiagnosticClient().finish();
+  });
+
   raw_ostream &OS = getVerboseOutputStream();
 
   if (!Act.PrepareToExecute(*this))
@@ -1034,9 +1040,6 @@ bool CompilerInstance::ExecuteAction(FrontendAction &Act) {
     }
   }
 
-  // Notify the diagnostic client that all files were processed.
-  getDiagnostics().getClient()->finish();
-
   if (getDiagnosticOpts().ShowCarets) {
     // We can have multiple diagnostics sharing one diagnostic client.
     // Get the total number of warnings/errors from the client.
diff --git a/clang/test/Misc/serialized-diags-emit-header-module-misconfig.c b/clang/test/Misc/serialized-diags-emit-header-module-misconfig.c
new file mode 100644
index 0000000000000..8629f293b18b9
--- /dev/null
+++ b/clang/test/Misc/serialized-diags-emit-header-module-misconfig.c
@@ -0,0 +1,4 @@
+// RUN: rm -rf %t && mkdir %t
+// RUN: not %clang_cc1 -emit-header-module %s -o %t/out.pcm -serialize-diagnostic-file %t/diag 2>&1 | FileCheck %s
+
+// CHECK: error: header module compilation requires '-fmodules', '-std=c++20', or '-fmodules-ts'
diff --git a/clang/test/Misc/serialized-diags-unknown-argument.c b/clang/test/Misc/serialized-diags-unknown-argument.c
new file mode 100644
index 0000000000000..7d788e11a51b1
--- /dev/null
+++ b/clang/test/Misc/serialized-diags-unknown-argument.c
@@ -0,0 +1,4 @@
+// RUN: rm -rf %t && mkdir %t
+// RUN: not %clang_cc1 %s -unknown-argument -serialize-diagnostic-file %t/diag -o /dev/null 2>&1 | FileCheck %s
+
+// CHECK: error: unknown argument: '-unknown-argument'
diff --git a/clang/test/Misc/serialized-diags-unknown-target.c b/clang/test/Misc/serialized-diags-unknown-target.c
new file mode 100644
index 0000000000000..040dfa4b2849e
--- /dev/null
+++ b/clang/test/Misc/serialized-diags-unknown-target.c
@@ -0,0 +1,4 @@
+// RUN: rm -rf %t && mkdir %t
+// RUN: not %clang_cc1 %s -triple blah-unknown-unknown -serialize-diagnostic-file %t/diag -o /dev/null 2>&1 | FileCheck %s
+
+// CHECK: error: unknown target triple 'blah-unknown-unknown', please use -triple or -arch
diff --git a/clang/tools/driver/cc1_main.cpp b/clang/tools/driver/cc1_main.cpp
index fd3b25ccb3cb1..f648adeba4834 100644
--- a/clang/tools/driver/cc1_main.cpp
+++ b/clang/tools/driver/cc1_main.cpp
@@ -237,8 +237,10 @@ int cc1_main(ArrayRef<const char *> Argv, const char *Argv0, void *MainAddr) {
                                   static_cast<void*>(&Clang->getDiagnostics()));
 
   DiagsBuffer->FlushDiagnostics(Clang->getDiagnostics());
-  if (!Success)
+  if (!Success) {
+    Clang->getDiagnosticClient().finish();
     return 1;
+  }
 
   // Execute the frontend actions.
   {

From e9b4239fefa657362678c063c1ba81b0eed2cab3 Mon Sep 17 00:00:00 2001
From: Alexander Batashev <alexander.batashev@intel.com>
Date: Wed, 26 Jan 2022 10:10:50 +0000
Subject: [PATCH 686/946] [mlir][openmp] Custom syntax for `omp.target`
 operation

Add a custom parser and printer for `omp.target` operation.

Reviewed By: kiranchandramohan

Differential Revision: https://reviews.llvm.org/D117539
---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |  3 +
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  | 73 +++++++++++++++++++
 mlir/test/Dialect/OpenMP/ops.mlir             | 21 +++++-
 3 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 311513f1682a6..d628027698d33 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -418,6 +418,9 @@ def TargetOp : OpenMP_Op<"target",[AttrSizedOperandSegments]> {
                        UnitAttr:$nowait);
 
   let regions = (region AnyRegion:$region);
+
+  let parser = [{ return parseTargetOp(parser, result); }];
+  let printer = [{ return printTargetOp(p, *this); }];
 }
 
 
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 1eeee3f65f3ba..089d534bcdae4 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -198,6 +198,24 @@ static void printParallelOp(OpAsmPrinter &p, ParallelOp op) {
   p.printRegion(op.getRegion());
 }
 
+static void printTargetOp(OpAsmPrinter &p, TargetOp op) {
+  p << " ";
+  if (auto ifCond = op.if_expr())
+    p << "if(" << ifCond << " : " << ifCond.getType() << ") ";
+
+  if (auto device = op.device())
+    p << "device(" << device << " : " << device.getType() << ") ";
+
+  if (auto threads = op.thread_limit())
+    p << "thread_limit(" << threads << " : " << threads.getType() << ") ";
+
+  if (op.nowait()) {
+    p << "nowait ";
+  }
+
+  p.printRegion(op.getRegion());
+}
+
 //===----------------------------------------------------------------------===//
 // Parser and printer for Linear Clause
 //===----------------------------------------------------------------------===//
@@ -523,6 +541,8 @@ static LogicalResult verifySynchronizationHint(Operation *op, uint64_t hint) {
 enum ClauseType {
   ifClause,
   numThreadsClause,
+  deviceClause,
+  threadLimitClause,
   privateClause,
   firstprivateClause,
   lastprivateClause,
@@ -611,6 +631,8 @@ static ParseResult parseClauses(OpAsmParser &parser, OperationState &result,
   // Containers for storing operands, types and attributes for various clauses
   std::pair<OpAsmParser::OperandType, Type> ifCond;
   std::pair<OpAsmParser::OperandType, Type> numThreads;
+  std::pair<OpAsmParser::OperandType, Type> device;
+  std::pair<OpAsmParser::OperandType, Type> threadLimit;
 
   SmallVector<OpAsmParser::OperandType> privates, firstprivates, lastprivates,
       shareds, copyins;
@@ -681,6 +703,18 @@ static ParseResult parseClauses(OpAsmParser &parser, OperationState &result,
           parser.parseColonType(numThreads.second) || parser.parseRParen())
         return failure();
       clauseSegments[pos[numThreadsClause]] = 1;
+    } else if (clauseKeyword == "device") {
+      if (checkAllowed(deviceClause) || parser.parseLParen() ||
+          parser.parseOperand(device.first) ||
+          parser.parseColonType(device.second) || parser.parseRParen())
+        return failure();
+      clauseSegments[pos[deviceClause]] = 1;
+    } else if (clauseKeyword == "thread_limit") {
+      if (checkAllowed(threadLimitClause) || parser.parseLParen() ||
+          parser.parseOperand(threadLimit.first) ||
+          parser.parseColonType(threadLimit.second) || parser.parseRParen())
+        return failure();
+      clauseSegments[pos[threadLimitClause]] = 1;
     } else if (clauseKeyword == "private") {
       if (checkAllowed(privateClause) ||
           parseOperandAndTypeList(parser, privates, privateTypes))
@@ -812,6 +846,18 @@ static ParseResult parseClauses(OpAsmParser &parser, OperationState &result,
                                    result.operands)))
     return failure();
 
+  // Add device parameter.
+  if (done[deviceClause] && clauseSegments[pos[deviceClause]] &&
+      failed(
+          parser.resolveOperand(device.first, device.second, result.operands)))
+    return failure();
+
+  // Add thread_limit parameter.
+  if (done[threadLimitClause] && clauseSegments[pos[threadLimitClause]] &&
+      failed(parser.resolveOperand(threadLimit.first, threadLimit.second,
+                                   result.operands)))
+    return failure();
+
   // Add private parameters.
   if (done[privateClause] && clauseSegments[pos[privateClause]] &&
       failed(parser.resolveOperands(privates, privateTypes,
@@ -948,6 +994,33 @@ static ParseResult parseParallelOp(OpAsmParser &parser,
   return success();
 }
 
+/// Parses a target operation.
+///
+/// operation ::= `omp.target` clause-list
+/// clause-list ::= clause | clause clause-list
+/// clause ::= if | device | thread_limit | nowait
+///
+static ParseResult parseTargetOp(OpAsmParser &parser, OperationState &result) {
+  SmallVector<ClauseType> clauses = {ifClause, deviceClause, threadLimitClause,
+                                     nowaitClause};
+
+  SmallVector<int> segments;
+
+  if (failed(parseClauses(parser, result, clauses, segments)))
+    return failure();
+
+  result.addAttribute(
+      TargetOp::AttrSizedOperandSegments::getOperandSegmentSizeAttr(),
+      parser.getBuilder().getI32VectorAttr(segments));
+
+  Region *body = result.addRegion();
+  SmallVector<OpAsmParser::OperandType> regionArgs;
+  SmallVector<Type> regionArgTypes;
+  if (parser.parseRegion(*body, regionArgs, regionArgTypes))
+    return failure();
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // Parser, printer and verifier for SectionsOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir
index 96a0b427123f3..3586acbd9dd2d 100644
--- a/mlir/test/Dialect/OpenMP/ops.mlir
+++ b/mlir/test/Dialect/OpenMP/ops.mlir
@@ -330,12 +330,12 @@ func @omp_wsloop_pretty_multiple(%lb1 : i32, %ub1 : i32, %step1 : i32, %lb2 : i3
 // CHECK-LABEL: omp_target
 func @omp_target(%if_cond : i1, %device : si32,  %num_threads : si32) -> () {
 
-    // Test with optional operands; if_expr, device, thread_limit, and nowait.
-    // CHECK: omp.target
+    // Test with optional operands; if_expr, device, thread_limit, private, firstprivate and nowait.
+    // CHECK: omp.target if({{.*}}) device({{.*}}) thread_limit({{.*}}) nowait
     "omp.target"(%if_cond, %device, %num_threads) ({
        // CHECK: omp.terminator
        omp.terminator
-    }) {operand_segment_sizes = dense<[1,1,1]>: vector<3xi32>, nowait } : ( i1, si32, si32  ) -> ()
+    }) {operand_segment_sizes = dense<[1,1,1]>: vector<3xi32>, nowait } : ( i1, si32, si32 ) -> ()
 
     // CHECK: omp.barrier
     omp.barrier
@@ -343,6 +343,21 @@ func @omp_target(%if_cond : i1, %device : si32,  %num_threads : si32) -> () {
     return
 }
 
+// CHECK-LABEL: omp_target_pretty
+func @omp_target_pretty(%if_cond : i1, %device : si32,  %num_threads : si32) -> () {
+    // CHECK: omp.target if({{.*}}) device({{.*}})
+    omp.target if(%if_cond : i1) device(%device : si32) {
+      omp.terminator
+    }
+
+    // CHECK: omp.target if({{.*}}) device({{.*}}) nowait
+    omp.target if(%if_cond : i1) device(%device : si32) thread_limit(%num_threads : si32) nowait {
+      omp.terminator
+    }
+
+    return
+}
+
 // CHECK: omp.reduction.declare
 // CHECK-LABEL: @add_f32
 // CHECK: : f32

From 188d28f73cc7b7891c182a85fdb6e274123b5b69 Mon Sep 17 00:00:00 2001
From: Stanislav Gatev <sgatev@google.com>
Date: Wed, 26 Jan 2022 09:15:07 +0000
Subject: [PATCH 687/946] [clang][dataflow] Assign aggregate storage locations
 to union stmts

This patch ensures that the dataflow analysis framework does not crash
when it encounters access to members of union types.

This is part of the implementation of the dataflow analysis framework.
See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev.

Reviewed-by: xazax.hun

Differential Revision: https://reviews.llvm.org/D118226
---
 .../FlowSensitive/DataflowEnvironment.cpp     |  2 +-
 .../Analysis/FlowSensitive/TransferTest.cpp   | 33 +++++++++++++++++++
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 6fbc3ec42465c..512e3d991df20 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -100,7 +100,7 @@ LatticeJoinEffect Environment::join(const Environment &Other) {
 
 StorageLocation &Environment::createStorageLocation(QualType Type) {
   assert(!Type.isNull());
-  if (Type->isStructureOrClassType()) {
+  if (Type->isStructureOrClassType() || Type->isUnionType()) {
     // FIXME: Explore options to avoid eager initialization of fields as some of
     // them might not be needed for a particular analysis.
     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index cd3e58207680a..0f7ac3af74edd 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -1952,4 +1952,37 @@ TEST_F(TransferTest, AggregateInitialization) {
   }
 }
 
+TEST_F(TransferTest, AssignToUnionMember) {
+  std::string Code = R"(
+    union A {
+      int Foo;
+    };
+
+    void target(int Bar) {
+      A Baz;
+      Baz.Foo = Bar;
+      // [[p]]
+    }
+  )";
+  runDataflow(Code,
+              [](llvm::ArrayRef<
+                     std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
+                     Results,
+                 ASTContext &ASTCtx) {
+                ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
+                const Environment &Env = Results[0].second.Env;
+
+                const ValueDecl *BazDecl = findValueDecl(ASTCtx, "Baz");
+                ASSERT_THAT(BazDecl, NotNull());
+                ASSERT_TRUE(BazDecl->getType()->isUnionType());
+
+                const auto *BazLoc = dyn_cast_or_null<AggregateStorageLocation>(
+                    Env.getStorageLocation(*BazDecl, SkipPast::None));
+                ASSERT_THAT(BazLoc, NotNull());
+
+                // FIXME: Add support for union types.
+                EXPECT_THAT(Env.getValue(*BazLoc), IsNull());
+              });
+}
+
 } // namespace

From 0776f6e04d8c3823c31b8fa6fa7d1376a7009af1 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 13 Jan 2022 13:12:50 +0100
Subject: [PATCH 688/946] [LSV] Vectorize loads of vectors by turning it into a
 larger vector

Use shufflevector to do the subvector extracts. This allows a lot more
load merging on AMDGPU and also on NVPTX when <2 x half> is involved.

Differential Revision: https://reviews.llvm.org/D117219
---
 .../Vectorize/LoadStoreVectorizer.cpp         |   73 +-
 .../test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll | 1664 ++++++++-------
 .../test/CodeGen/AMDGPU/GlobalISel/udivrem.ll |  958 +++++----
 .../AMDGPU/amdgpu-codegenprepare-idiv.ll      | 1839 ++++++++---------
 llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll        |   10 +-
 llvm/test/CodeGen/AMDGPU/fmax_legacy.ll       |    4 +-
 llvm/test/CodeGen/AMDGPU/fmin_legacy.ll       |    8 +-
 llvm/test/CodeGen/AMDGPU/fshl.ll              |  322 ++-
 llvm/test/CodeGen/AMDGPU/fshr.ll              |  286 ++-
 llvm/test/CodeGen/AMDGPU/half.ll              |   45 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll  |    3 +-
 llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll        |  150 +-
 llvm/test/CodeGen/AMDGPU/memory_clause.ll     |   36 +-
 llvm/test/CodeGen/AMDGPU/min.ll               |    4 +-
 llvm/test/CodeGen/AMDGPU/mul_int24.ll         |   27 +-
 llvm/test/CodeGen/AMDGPU/sdiv.ll              |   63 +-
 llvm/test/CodeGen/AMDGPU/sdiv64.ll            |   34 +-
 llvm/test/CodeGen/AMDGPU/select-vectors.ll    |    9 +-
 llvm/test/CodeGen/AMDGPU/shift-i128.ll        |  203 +-
 llvm/test/CodeGen/AMDGPU/shl.ll               |  175 +-
 llvm/test/CodeGen/AMDGPU/shl.v2i16.ll         |  142 +-
 llvm/test/CodeGen/AMDGPU/sra.ll               |   74 +-
 llvm/test/CodeGen/AMDGPU/srl.ll               |   50 +-
 llvm/test/CodeGen/AMDGPU/sub.v2i16.ll         |   15 +-
 llvm/test/CodeGen/AMDGPU/udiv.ll              |   63 +-
 llvm/test/CodeGen/AMDGPU/urem64.ll            |  276 ++-
 .../LoadStoreVectorizer/NVPTX/4x2xhalf.ll     |   96 +
 27 files changed, 3247 insertions(+), 3382 deletions(-)
 create mode 100644 llvm/test/Transforms/LoadStoreVectorizer/NVPTX/4x2xhalf.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index d2e0d1d474b0e..97c2acb7d4c76 100644
--- a/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -854,13 +854,6 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
           (VecTy && TTI.getLoadVectorFactor(VF, TySize, TySize / 8, VecTy) == 0))
         continue;
 
-      // Make sure all the users of a vector are constant-index extracts.
-      if (isa<VectorType>(Ty) && !llvm::all_of(LI->users(), [](const User *U) {
-            const ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(U);
-            return EEI && isa<ConstantInt>(EEI->getOperand(1));
-          }))
-        continue;
-
       // Save the load locations.
       const ChainID ID = getChainID(Ptr);
       LoadRefs[ID].push_back(LI);
@@ -901,12 +894,6 @@ Vectorizer::collectInstructions(BasicBlock *BB) {
           (VecTy && TTI.getStoreVectorFactor(VF, TySize, TySize / 8, VecTy) == 0))
         continue;
 
-      if (isa<VectorType>(Ty) && !llvm::all_of(SI->users(), [](const User *U) {
-            const ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(U);
-            return EEI && isa<ConstantInt>(EEI->getOperand(1));
-          }))
-        continue;
-
       // Save store location.
       const ChainID ID = getChainID(Ptr);
       StoreRefs[ID].push_back(SI);
@@ -1290,52 +1277,32 @@ bool Vectorizer::vectorizeLoadChain(
       Builder.CreateAlignedLoad(VecTy, Bitcast, MaybeAlign(Alignment));
   propagateMetadata(LI, Chain);
 
-  if (VecLoadTy) {
-    SmallVector<Instruction *, 16> InstrsToErase;
-
-    unsigned VecWidth = VecLoadTy->getNumElements();
-    for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
-      for (auto Use : Chain[I]->users()) {
-        // All users of vector loads are ExtractElement instructions with
-        // constant indices, otherwise we would have bailed before now.
-        Instruction *UI = cast<Instruction>(Use);
-        unsigned Idx = cast<ConstantInt>(UI->getOperand(1))->getZExtValue();
-        unsigned NewIdx = Idx + I * VecWidth;
-        Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(NewIdx),
-                                                UI->getName());
-        if (V->getType() != UI->getType())
-          V = Builder.CreateBitCast(V, UI->getType());
-
-        // Replace the old instruction.
-        UI->replaceAllUsesWith(V);
-        InstrsToErase.push_back(UI);
-      }
+  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
+    Value *CV = Chain[I];
+    Value *V;
+    if (VecLoadTy) {
+      // Extract a subvector using shufflevector.
+      unsigned VecWidth = VecLoadTy->getNumElements();
+      auto Mask =
+          llvm::to_vector<8>(llvm::seq<int>(I * VecWidth, (I + 1) * VecWidth));
+      V = Builder.CreateShuffleVector(LI, Mask, CV->getName());
+    } else {
+      V = Builder.CreateExtractElement(LI, Builder.getInt32(I), CV->getName());
     }
 
-    // Bitcast might not be an Instruction, if the value being loaded is a
-    // constant.  In that case, no need to reorder anything.
-    if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
-      reorder(BitcastInst);
-
-    for (auto I : InstrsToErase)
-      I->eraseFromParent();
-  } else {
-    for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
-      Value *CV = Chain[I];
-      Value *V =
-          Builder.CreateExtractElement(LI, Builder.getInt32(I), CV->getName());
-      if (V->getType() != CV->getType()) {
-        V = Builder.CreateBitOrPointerCast(V, CV->getType());
-      }
-
-      // Replace the old instruction.
-      CV->replaceAllUsesWith(V);
+    if (V->getType() != CV->getType()) {
+      V = Builder.CreateBitOrPointerCast(V, CV->getType());
     }
 
-    if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
-      reorder(BitcastInst);
+    // Replace the old instruction.
+    CV->replaceAllUsesWith(V);
   }
 
+  // Bitcast might not be an Instruction, if the value being loaded is a
+  // constant. In that case, no need to reorder anything.
+  if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
+    reorder(BitcastInst);
+
   eraseInstructions(Chain);
 
   ++NumVectorInstructions;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
index 62b01dc0ee3e9..25eae693c1634 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll
@@ -667,69 +667,68 @@ define amdgpu_kernel void @sdivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)
 define amdgpu_kernel void @sdivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32> addrspace(1)* %out1, <2 x i32> %x, <2 x i32> %y) {
 ; GFX8-LABEL: sdivrem_v2i32:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x18
-; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x10
+; GFX8-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x10
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_ashr_i32 s8, s0, 31
-; GFX8-NEXT:    s_add_i32 s0, s0, s8
-; GFX8-NEXT:    s_xor_b32 s9, s0, s8
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s9
-; GFX8-NEXT:    s_ashr_i32 s11, s1, 31
-; GFX8-NEXT:    s_add_i32 s0, s1, s11
-; GFX8-NEXT:    s_sub_i32 s1, 0, s9
+; GFX8-NEXT:    s_ashr_i32 s2, s10, 31
+; GFX8-NEXT:    s_add_i32 s0, s10, s2
+; GFX8-NEXT:    s_xor_b32 s3, s0, s2
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s3
+; GFX8-NEXT:    s_sub_i32 s1, 0, s3
+; GFX8-NEXT:    s_ashr_i32 s12, s11, 31
+; GFX8-NEXT:    s_add_i32 s0, s11, s12
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX8-NEXT:    s_xor_b32 s12, s0, s11
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v2, s12
-; GFX8-NEXT:    s_ashr_i32 s10, s2, 31
+; GFX8-NEXT:    s_xor_b32 s11, s0, s12
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v2, s11
+; GFX8-NEXT:    s_ashr_i32 s10, s8, 31
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX8-NEXT:    s_add_i32 s0, s2, s10
+; GFX8-NEXT:    s_add_i32 s0, s8, s10
 ; GFX8-NEXT:    s_xor_b32 s0, s0, s10
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; GFX8-NEXT:    v_mul_lo_u32 v1, s1, v0
-; GFX8-NEXT:    s_ashr_i32 s2, s3, 31
+; GFX8-NEXT:    s_sub_i32 s8, 0, s11
 ; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s0, v0
 ; GFX8-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v2
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s9
+; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s3
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s0, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s9, v2
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s9, v2
+; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s3, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s9, v2
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s9, v2
-; GFX8-NEXT:    s_sub_i32 s0, 0, s12
+; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s3, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v3, s0, v1
-; GFX8-NEXT:    s_add_i32 s1, s3, s2
-; GFX8-NEXT:    s_xor_b32 s1, s1, s2
-; GFX8-NEXT:    s_xor_b32 s0, s10, s8
+; GFX8-NEXT:    v_mul_lo_u32 v3, s8, v1
+; GFX8-NEXT:    s_xor_b32 s0, s10, s2
+; GFX8-NEXT:    s_ashr_i32 s2, s9, 31
+; GFX8-NEXT:    s_add_i32 s1, s9, s2
 ; GFX8-NEXT:    v_mul_hi_u32 v3, v1, v3
+; GFX8-NEXT:    s_xor_b32 s1, s1, s2
 ; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; GFX8-NEXT:    v_xor_b32_e32 v2, s10, v2
-; GFX8-NEXT:    v_subrev_u32_e32 v0, vcc, s0, v0
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
 ; GFX8-NEXT:    v_mul_hi_u32 v1, s1, v1
+; GFX8-NEXT:    v_subrev_u32_e32 v0, vcc, s0, v0
 ; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, s10, v2
-; GFX8-NEXT:    v_mul_lo_u32 v3, v1, s12
+; GFX8-NEXT:    v_mul_lo_u32 v3, v1, s11
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s1, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s12, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s12, v3
+; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s12, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s12, v3
-; GFX8-NEXT:    s_xor_b32 s0, s2, s11
+; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s11, v3
+; GFX8-NEXT:    s_xor_b32 s0, s2, s12
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_xor_b32_e32 v1, s0, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v4, s4
@@ -745,74 +744,73 @@ define amdgpu_kernel void @sdivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32
 ;
 ; GFX9-LABEL: sdivrem_v2i32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x18
-; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x10
+; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_ashr_i32 s10, s6, 31
-; GFX9-NEXT:    s_add_i32 s0, s6, s10
-; GFX9-NEXT:    s_xor_b32 s6, s0, s10
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s6
+; GFX9-NEXT:    s_ashr_i32 s6, s10, 31
+; GFX9-NEXT:    s_add_i32 s0, s10, s6
+; GFX9-NEXT:    s_xor_b32 s7, s0, s6
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s7
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GFX9-NEXT:    s_ashr_i32 s5, s7, 31
-; GFX9-NEXT:    s_add_i32 s7, s7, s5
+; GFX9-NEXT:    s_ashr_i32 s4, s11, 31
+; GFX9-NEXT:    s_add_i32 s5, s11, s4
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX9-NEXT:    s_xor_b32 s7, s7, s5
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s7
-; GFX9-NEXT:    s_sub_i32 s11, 0, s6
+; GFX9-NEXT:    s_xor_b32 s5, s5, s4
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s5
+; GFX9-NEXT:    s_sub_i32 s11, 0, s7
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v1, v1
-; GFX9-NEXT:    s_ashr_i32 s4, s8, 31
-; GFX9-NEXT:    s_add_i32 s8, s8, s4
+; GFX9-NEXT:    s_ashr_i32 s10, s8, 31
+; GFX9-NEXT:    s_add_i32 s8, s8, s10
 ; GFX9-NEXT:    v_mul_lo_u32 v2, s11, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    s_xor_b32 s8, s8, s4
+; GFX9-NEXT:    s_xor_b32 s8, s8, s10
 ; GFX9-NEXT:    v_mul_hi_u32 v2, v0, v2
-; GFX9-NEXT:    s_sub_i32 s12, 0, s7
+; GFX9-NEXT:    s_sub_i32 s11, 0, s5
+; GFX9-NEXT:    v_mul_lo_u32 v3, s11, v1
 ; GFX9-NEXT:    s_ashr_i32 s11, s9, 31
-; GFX9-NEXT:    s_add_i32 s9, s9, s11
 ; GFX9-NEXT:    v_add_u32_e32 v0, v0, v2
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s8, v0
-; GFX9-NEXT:    v_mul_lo_u32 v2, s12, v1
+; GFX9-NEXT:    v_mul_hi_u32 v2, v1, v3
+; GFX9-NEXT:    s_add_i32 s9, s9, s11
 ; GFX9-NEXT:    s_xor_b32 s9, s9, s11
-; GFX9-NEXT:    v_mul_lo_u32 v3, v0, s6
-; GFX9-NEXT:    v_mul_hi_u32 v2, v1, v2
-; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
-; GFX9-NEXT:    v_sub_u32_e32 v3, s8, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX9-NEXT:    v_mul_lo_u32 v3, v0, s7
 ; GFX9-NEXT:    v_add_u32_e32 v1, v1, v2
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v4, s6, v3
+; GFX9-NEXT:    v_add_u32_e32 v2, 1, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v1, s9, v1
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v4, s6, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v3, v1, s7
+; GFX9-NEXT:    v_sub_u32_e32 v3, s8, v3
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX9-NEXT:    v_subrev_u32_e32 v2, s7, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
+; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GFX9-NEXT:    v_subrev_u32_e32 v3, s7, v2
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v3, v1, s5
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v1
-; GFX9-NEXT:    v_xor_b32_e32 v2, s4, v2
-; GFX9-NEXT:    s_xor_b32 s6, s4, s10
+; GFX9-NEXT:    s_xor_b32 s6, s10, s6
+; GFX9-NEXT:    s_xor_b32 s4, s11, s4
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s9, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v3
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v4, s7, v3
+; GFX9-NEXT:    v_subrev_u32_e32 v4, s5, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v3
-; GFX9-NEXT:    v_subrev_u32_e32 v2, s4, v2
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v4, s7, v3
-; GFX9-NEXT:    s_xor_b32 s4, s11, s5
+; GFX9-NEXT:    v_subrev_u32_e32 v4, s5, v3
 ; GFX9-NEXT:    v_xor_b32_e32 v0, s6, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v1, s4, v1
 ; GFX9-NEXT:    v_subrev_u32_e32 v0, s6, v0
+; GFX9-NEXT:    v_xor_b32_e32 v2, s10, v2
 ; GFX9-NEXT:    v_subrev_u32_e32 v1, s4, v1
 ; GFX9-NEXT:    v_xor_b32_e32 v3, s11, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    v_subrev_u32_e32 v2, s10, v2
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s11, v3
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_store_dwordx2 v4, v[0:1], s[0:1]
@@ -821,34 +819,32 @@ define amdgpu_kernel void @sdivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32
 ;
 ; GFX10-LABEL: sdivrem_v2i32:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x18
+; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_ashr_i32 s2, s0, 31
-; GFX10-NEXT:    s_ashr_i32 s3, s1, 31
-; GFX10-NEXT:    s_add_i32 s0, s0, s2
-; GFX10-NEXT:    s_add_i32 s1, s1, s3
-; GFX10-NEXT:    s_xor_b32 s8, s0, s2
-; GFX10-NEXT:    s_xor_b32 s9, s1, s3
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s8
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s9
-; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x10
-; GFX10-NEXT:    s_sub_i32 s6, 0, s8
-; GFX10-NEXT:    s_sub_i32 s7, 0, s9
+; GFX10-NEXT:    s_ashr_i32 s8, s2, 31
+; GFX10-NEXT:    s_ashr_i32 s9, s3, 31
+; GFX10-NEXT:    s_add_i32 s2, s2, s8
+; GFX10-NEXT:    s_add_i32 s3, s3, s9
+; GFX10-NEXT:    s_xor_b32 s2, s2, s8
+; GFX10-NEXT:    s_xor_b32 s3, s3, s9
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s3
+; GFX10-NEXT:    s_sub_i32 s6, 0, s2
+; GFX10-NEXT:    s_sub_i32 s7, 0, s3
+; GFX10-NEXT:    s_ashr_i32 s10, s0, 31
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; GFX10-NEXT:    s_ashr_i32 s11, s1, 31
+; GFX10-NEXT:    s_add_i32 s0, s0, s10
+; GFX10-NEXT:    s_add_i32 s1, s1, s11
+; GFX10-NEXT:    s_xor_b32 s0, s0, s10
+; GFX10-NEXT:    s_xor_b32 s1, s1, s11
 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_ashr_i32 s10, s0, 31
-; GFX10-NEXT:    s_ashr_i32 s11, s1, 31
-; GFX10-NEXT:    s_add_i32 s0, s0, s10
 ; GFX10-NEXT:    v_mul_lo_u32 v2, s6, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v3, s7, v1
-; GFX10-NEXT:    s_add_i32 s1, s1, s11
-; GFX10-NEXT:    s_xor_b32 s0, s0, s10
-; GFX10-NEXT:    s_xor_b32 s1, s1, s11
 ; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX10-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; GFX10-NEXT:    v_mul_hi_u32 v3, v1, v3
@@ -856,32 +852,32 @@ define amdgpu_kernel void @sdivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v3
 ; GFX10-NEXT:    v_mul_hi_u32 v0, s0, v0
 ; GFX10-NEXT:    v_mul_hi_u32 v1, s1, v1
-; GFX10-NEXT:    v_mul_lo_u32 v2, v0, s8
-; GFX10-NEXT:    v_mul_lo_u32 v3, v1, s9
+; GFX10-NEXT:    v_mul_lo_u32 v2, v0, s2
+; GFX10-NEXT:    v_mul_lo_u32 v3, v1, s3
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 1, v0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, s0, v2
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, s1, v3
-; GFX10-NEXT:    s_xor_b32 s1, s10, s2
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s8, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s9, v3
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s8, v2
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s9, v3
+; GFX10-NEXT:    s_xor_b32 s1, s10, s8
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s2, v2
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s3, v3
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s2, v2
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s3, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s8, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s9, v3
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s8, v2
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s9, v3
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s2, v2
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s3, v3
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s2, v2
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s3, v3
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX10-NEXT:    s_xor_b32 s0, s11, s3
+; GFX10-NEXT:    s_xor_b32 s0, s11, s9
 ; GFX10-NEXT:    v_xor_b32_e32 v0, s1, v0
 ; GFX10-NEXT:    v_xor_b32_e32 v1, s0, v1
 ; GFX10-NEXT:    v_xor_b32_e32 v2, s10, v2
@@ -905,142 +901,141 @@ define amdgpu_kernel void @sdivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32
 define amdgpu_kernel void @sdivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, <4 x i32> %x, <4 x i32> %y) {
 ; GFX8-LABEL: sdivrem_v4i32:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x20
-; GFX8-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x10
-; GFX8-NEXT:    v_mov_b32_e32 v3, 0x4f7ffffe
+; GFX8-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
+; GFX8-NEXT:    v_mov_b32_e32 v2, 0x4f7ffffe
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_ashr_i32 s12, s0, 31
-; GFX8-NEXT:    s_add_i32 s0, s0, s12
-; GFX8-NEXT:    s_xor_b32 s13, s0, s12
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s13
-; GFX8-NEXT:    s_ashr_i32 s15, s1, 31
-; GFX8-NEXT:    s_add_i32 s0, s1, s15
-; GFX8-NEXT:    s_sub_i32 s1, 0, s13
+; GFX8-NEXT:    s_ashr_i32 s2, s12, 31
+; GFX8-NEXT:    s_add_i32 s0, s12, s2
+; GFX8-NEXT:    s_xor_b32 s3, s0, s2
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s3
+; GFX8-NEXT:    s_sub_i32 s1, 0, s3
+; GFX8-NEXT:    s_ashr_i32 s16, s13, 31
+; GFX8-NEXT:    s_add_i32 s0, s13, s16
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX8-NEXT:    s_xor_b32 s16, s0, s15
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v2, s16
-; GFX8-NEXT:    s_ashr_i32 s14, s8, 31
+; GFX8-NEXT:    s_xor_b32 s13, s0, s16
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v3, s13
+; GFX8-NEXT:    s_ashr_i32 s12, s8, 31
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX8-NEXT:    s_add_i32 s0, s8, s14
-; GFX8-NEXT:    s_xor_b32 s0, s0, s14
-; GFX8-NEXT:    v_rcp_iflag_f32_e32 v2, v2
+; GFX8-NEXT:    s_add_i32 s0, s8, s12
+; GFX8-NEXT:    s_xor_b32 s0, s0, s12
+; GFX8-NEXT:    v_rcp_iflag_f32_e32 v3, v3
 ; GFX8-NEXT:    v_mul_lo_u32 v1, s1, v0
-; GFX8-NEXT:    s_ashr_i32 s8, s9, 31
 ; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s0, v0
-; GFX8-NEXT:    v_mul_f32_e32 v1, v2, v3
+; GFX8-NEXT:    v_mul_f32_e32 v1, v3, v2
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s13
+; GFX8-NEXT:    v_mul_lo_u32 v3, v0, s3
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s0, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s13, v2
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s0, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s13, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s3, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s13, v2
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s13, v2
-; GFX8-NEXT:    s_sub_i32 s0, 0, s16
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s3, v3
+; GFX8-NEXT:    s_sub_i32 s0, 0, s13
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_mul_lo_u32 v4, s0, v1
-; GFX8-NEXT:    s_add_i32 s1, s9, s8
-; GFX8-NEXT:    s_xor_b32 s1, s1, s8
-; GFX8-NEXT:    s_xor_b32 s0, s14, s12
+; GFX8-NEXT:    s_xor_b32 s0, s12, s2
+; GFX8-NEXT:    s_ashr_i32 s2, s9, 31
+; GFX8-NEXT:    s_add_i32 s1, s9, s2
 ; GFX8-NEXT:    v_mul_hi_u32 v4, v1, v4
+; GFX8-NEXT:    s_xor_b32 s1, s1, s2
 ; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX8-NEXT:    v_xor_b32_e32 v2, s14, v2
-; GFX8-NEXT:    s_ashr_i32 s9, s2, 31
+; GFX8-NEXT:    v_xor_b32_e32 v3, s12, v3
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v4
 ; GFX8-NEXT:    v_mul_hi_u32 v1, s1, v1
+; GFX8-NEXT:    s_ashr_i32 s3, s14, 31
 ; GFX8-NEXT:    v_subrev_u32_e32 v0, vcc, s0, v0
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, s14, v2
-; GFX8-NEXT:    v_mul_lo_u32 v5, v1, s16
-; GFX8-NEXT:    s_add_i32 s0, s2, s9
-; GFX8-NEXT:    s_xor_b32 s2, s0, s9
-; GFX8-NEXT:    s_ashr_i32 s12, s10, 31
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s1, v5
+; GFX8-NEXT:    v_mul_lo_u32 v5, v1, s13
+; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, s12, v3
+; GFX8-NEXT:    s_add_i32 s0, s14, s3
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s1, v5
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s16, v2
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s13, v3
+; GFX8-NEXT:    s_xor_b32 s8, s0, s3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v5, s2
-; GFX8-NEXT:    v_subrev_u32_e64 v6, s[0:1], s16, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v5, s8
+; GFX8-NEXT:    v_subrev_u32_e64 v6, s[0:1], s13, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v5, v5
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s16, v2
-; GFX8-NEXT:    v_mul_f32_e32 v5, v5, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s13, v3
+; GFX8-NEXT:    v_mul_f32_e32 v5, v5, v2
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v5, v5
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v6, s[0:1], s16, v2
-; GFX8-NEXT:    s_sub_i32 s0, 0, s2
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GFX8-NEXT:    v_subrev_u32_e64 v6, s[0:1], s13, v3
+; GFX8-NEXT:    s_sub_i32 s0, 0, s8
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
 ; GFX8-NEXT:    v_mul_lo_u32 v6, s0, v5
-; GFX8-NEXT:    s_add_i32 s1, s10, s12
-; GFX8-NEXT:    s_xor_b32 s1, s1, s12
-; GFX8-NEXT:    s_xor_b32 s0, s8, s15
+; GFX8-NEXT:    s_ashr_i32 s9, s10, 31
+; GFX8-NEXT:    s_add_i32 s1, s10, s9
+; GFX8-NEXT:    s_xor_b32 s1, s1, s9
 ; GFX8-NEXT:    v_mul_hi_u32 v6, v5, v6
-; GFX8-NEXT:    v_xor_b32_e32 v2, s8, v2
+; GFX8-NEXT:    s_xor_b32 s0, s2, s16
+; GFX8-NEXT:    v_xor_b32_e32 v3, s2, v3
 ; GFX8-NEXT:    v_xor_b32_e32 v1, s0, v1
-; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, s0, v1
 ; GFX8-NEXT:    v_add_u32_e32 v5, vcc, v5, v6
 ; GFX8-NEXT:    v_mul_hi_u32 v6, s1, v5
-; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, s8, v2
-; GFX8-NEXT:    s_ashr_i32 s8, s3, 31
-; GFX8-NEXT:    v_mul_lo_u32 v7, v6, s2
-; GFX8-NEXT:    s_add_i32 s0, s3, s8
-; GFX8-NEXT:    s_xor_b32 s3, s0, s8
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s1, v7
+; GFX8-NEXT:    v_subrev_u32_e32 v5, vcc, s2, v3
+; GFX8-NEXT:    s_ashr_i32 s2, s15, 31
+; GFX8-NEXT:    v_mul_lo_u32 v7, v6, s8
+; GFX8-NEXT:    v_subrev_u32_e32 v1, vcc, s0, v1
+; GFX8-NEXT:    s_add_i32 s0, s15, s2
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s1, v7
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 1, v6
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s2, v2
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s8, v3
+; GFX8-NEXT:    s_xor_b32 s10, s0, s2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v7, s3
-; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s2, v2
-; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v8, vcc
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v7, s10
+; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s8, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v7, v7
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 1, v6
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s2, v2
-; GFX8-NEXT:    v_mul_f32_e32 v3, v7, v3
-; GFX8-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GFX8-NEXT:    v_subrev_u32_e64 v7, s[0:1], s2, v2
-; GFX8-NEXT:    s_sub_i32 s0, 0, s3
-; GFX8-NEXT:    v_cndmask_b32_e32 v7, v2, v7, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v2, s0, v3
-; GFX8-NEXT:    s_ashr_i32 s2, s11, 31
-; GFX8-NEXT:    s_add_i32 s1, s11, s2
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s8, v3
+; GFX8-NEXT:    v_mul_f32_e32 v2, v7, v2
+; GFX8-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX8-NEXT:    v_subrev_u32_e64 v7, s[0:1], s8, v3
+; GFX8-NEXT:    s_sub_i32 s0, 0, s10
+; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
+; GFX8-NEXT:    v_mul_lo_u32 v7, s0, v2
+; GFX8-NEXT:    s_xor_b32 s0, s9, s3
+; GFX8-NEXT:    s_ashr_i32 s3, s11, 31
+; GFX8-NEXT:    s_add_i32 s1, s11, s3
+; GFX8-NEXT:    v_mul_hi_u32 v7, v2, v7
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v6, v8, vcc
-; GFX8-NEXT:    v_mul_hi_u32 v2, v3, v2
-; GFX8-NEXT:    s_xor_b32 s1, s1, s2
-; GFX8-NEXT:    s_xor_b32 s0, s12, s9
+; GFX8-NEXT:    s_xor_b32 s1, s1, s3
 ; GFX8-NEXT:    v_xor_b32_e32 v6, s0, v6
-; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT:    v_mul_hi_u32 v3, s1, v2
+; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v7
+; GFX8-NEXT:    v_mul_hi_u32 v7, s1, v2
+; GFX8-NEXT:    v_xor_b32_e32 v3, s9, v3
 ; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, s0, v6
-; GFX8-NEXT:    v_xor_b32_e32 v6, s12, v7
-; GFX8-NEXT:    v_mul_lo_u32 v7, v3, s3
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s12, v6
-; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 1, v3
-; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, s1, v7
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v7
-; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s3, v7
+; GFX8-NEXT:    v_mul_lo_u32 v8, v7, s10
+; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s9, v3
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s1, v8
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 1, v7
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
-; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 1, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v7
+; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s10, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s3, v7
-; GFX8-NEXT:    s_xor_b32 s0, s2, s8
+; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 1, v7
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
-; GFX8-NEXT:    v_xor_b32_e32 v3, s0, v3
+; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s10, v3
+; GFX8-NEXT:    v_cndmask_b32_e32 v8, v3, v8, vcc
+; GFX8-NEXT:    s_xor_b32 s0, s3, s2
+; GFX8-NEXT:    v_xor_b32_e32 v3, s0, v7
+; GFX8-NEXT:    v_xor_b32_e32 v7, s3, v8
 ; GFX8-NEXT:    v_mov_b32_e32 v9, s5
 ; GFX8-NEXT:    v_subrev_u32_e32 v3, vcc, s0, v3
 ; GFX8-NEXT:    v_mov_b32_e32 v8, s4
-; GFX8-NEXT:    v_xor_b32_e32 v7, s2, v7
 ; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
-; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, s2, v7
+; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, s3, v7
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s7
 ; GFX8-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
@@ -1048,7 +1043,7 @@ define amdgpu_kernel void @sdivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32
 ;
 ; GFX9-LABEL: sdivrem_v4i32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[4:5], 0x20
+; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x4f7ffffe
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_ashr_i32 s6, s12, 31
@@ -1056,116 +1051,114 @@ define amdgpu_kernel void @sdivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32
 ; GFX9-NEXT:    s_xor_b32 s7, s0, s6
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s7
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x10
-; GFX9-NEXT:    s_ashr_i32 s4, s13, 31
-; GFX9-NEXT:    s_add_i32 s5, s13, s4
+; GFX9-NEXT:    s_ashr_i32 s5, s13, 31
+; GFX9-NEXT:    s_add_i32 s12, s13, s5
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX9-NEXT:    s_sub_i32 s12, 0, s7
-; GFX9-NEXT:    s_xor_b32 s5, s5, s4
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s5
+; GFX9-NEXT:    s_xor_b32 s12, s12, s5
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s12
+; GFX9-NEXT:    s_sub_i32 s13, 0, s7
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX9-NEXT:    s_sub_i32 s13, 0, s5
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v1, v1
-; GFX9-NEXT:    v_mul_lo_u32 v3, s12, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_ashr_i32 s12, s8, 31
-; GFX9-NEXT:    s_add_i32 s8, s8, s12
-; GFX9-NEXT:    s_xor_b32 s8, s8, s12
-; GFX9-NEXT:    v_mul_hi_u32 v3, v0, v3
+; GFX9-NEXT:    s_ashr_i32 s4, s8, 31
+; GFX9-NEXT:    s_add_i32 s8, s8, s4
+; GFX9-NEXT:    v_mul_lo_u32 v3, s13, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    s_xor_b32 s6, s12, s6
-; GFX9-NEXT:    v_add_u32_e32 v0, v0, v3
-; GFX9-NEXT:    v_mul_hi_u32 v0, s8, v0
-; GFX9-NEXT:    v_mul_lo_u32 v3, s13, v1
+; GFX9-NEXT:    s_xor_b32 s8, s8, s4
+; GFX9-NEXT:    v_mul_hi_u32 v3, v0, v3
+; GFX9-NEXT:    s_sub_i32 s16, 0, s12
 ; GFX9-NEXT:    s_ashr_i32 s13, s9, 31
 ; GFX9-NEXT:    s_add_i32 s9, s9, s13
+; GFX9-NEXT:    v_add_u32_e32 v0, v0, v3
+; GFX9-NEXT:    v_mul_hi_u32 v0, s8, v0
+; GFX9-NEXT:    v_mul_lo_u32 v3, s16, v1
+; GFX9-NEXT:    s_xor_b32 s9, s9, s13
+; GFX9-NEXT:    s_xor_b32 s6, s4, s6
 ; GFX9-NEXT:    v_mul_lo_u32 v4, v0, s7
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v1, v3
 ; GFX9-NEXT:    v_add_u32_e32 v5, 1, v0
-; GFX9-NEXT:    s_xor_b32 s4, s13, s4
+; GFX9-NEXT:    s_xor_b32 s5, s13, s5
 ; GFX9-NEXT:    v_sub_u32_e32 v4, s8, v4
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v5, s7, v4
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; GFX9-NEXT:    v_mul_hi_u32 v1, s9, v1
 ; GFX9-NEXT:    v_add_u32_e32 v5, 1, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v5, s7, v4
-; GFX9-NEXT:    s_xor_b32 s7, s9, s13
-; GFX9-NEXT:    v_add_u32_e32 v1, v1, v3
-; GFX9-NEXT:    v_mul_hi_u32 v1, s7, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v5, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v5, v1, s12
+; GFX9-NEXT:    v_xor_b32_e32 v3, s4, v3
 ; GFX9-NEXT:    v_xor_b32_e32 v0, s6, v0
+; GFX9-NEXT:    v_subrev_u32_e32 v4, s4, v3
+; GFX9-NEXT:    s_ashr_i32 s4, s14, 31
 ; GFX9-NEXT:    v_subrev_u32_e32 v0, s6, v0
-; GFX9-NEXT:    v_mul_lo_u32 v5, v1, s5
-; GFX9-NEXT:    v_xor_b32_e32 v3, s12, v3
-; GFX9-NEXT:    s_ashr_i32 s6, s14, 31
-; GFX9-NEXT:    v_subrev_u32_e32 v4, s12, v3
-; GFX9-NEXT:    v_sub_u32_e32 v3, s7, v5
-; GFX9-NEXT:    s_add_i32 s7, s14, s6
-; GFX9-NEXT:    s_xor_b32 s7, s7, s6
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s7
+; GFX9-NEXT:    s_add_i32 s6, s14, s4
+; GFX9-NEXT:    s_xor_b32 s6, s6, s4
+; GFX9-NEXT:    v_sub_u32_e32 v3, s9, v5
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s6
 ; GFX9-NEXT:    v_add_u32_e32 v6, 1, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s12, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; GFX9-NEXT:    v_subrev_u32_e32 v6, s5, v3
+; GFX9-NEXT:    v_subrev_u32_e32 v6, s12, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v6, 1, v1
 ; GFX9-NEXT:    v_mul_f32_e32 v5, v5, v2
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
-; GFX9-NEXT:    s_sub_i32 s8, 0, s7
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s12, v3
+; GFX9-NEXT:    s_sub_i32 s7, 0, s6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v6, s8, v5
-; GFX9-NEXT:    v_xor_b32_e32 v1, s4, v1
-; GFX9-NEXT:    v_subrev_u32_e32 v1, s4, v1
-; GFX9-NEXT:    s_ashr_i32 s4, s15, 31
-; GFX9-NEXT:    s_add_i32 s9, s15, s4
+; GFX9-NEXT:    v_mul_lo_u32 v6, s7, v5
+; GFX9-NEXT:    v_xor_b32_e32 v1, s5, v1
+; GFX9-NEXT:    v_subrev_u32_e32 v1, s5, v1
+; GFX9-NEXT:    s_ashr_i32 s5, s15, 31
+; GFX9-NEXT:    s_add_i32 s9, s15, s5
 ; GFX9-NEXT:    v_mul_hi_u32 v6, v5, v6
-; GFX9-NEXT:    s_xor_b32 s9, s9, s4
+; GFX9-NEXT:    s_xor_b32 s9, s9, s5
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v8, s9
-; GFX9-NEXT:    v_subrev_u32_e32 v7, s5, v3
-; GFX9-NEXT:    s_ashr_i32 s5, s10, 31
-; GFX9-NEXT:    s_add_i32 s8, s10, s5
-; GFX9-NEXT:    s_xor_b32 s8, s8, s5
+; GFX9-NEXT:    s_ashr_i32 s7, s10, 31
+; GFX9-NEXT:    s_add_i32 s8, s10, s7
+; GFX9-NEXT:    s_xor_b32 s8, s8, s7
 ; GFX9-NEXT:    v_add_u32_e32 v5, v5, v6
 ; GFX9-NEXT:    v_mul_hi_u32 v6, s8, v5
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v8, v8
+; GFX9-NEXT:    v_subrev_u32_e32 v7, s12, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX9-NEXT:    v_xor_b32_e32 v3, s13, v3
-; GFX9-NEXT:    v_mul_lo_u32 v7, v6, s7
+; GFX9-NEXT:    v_mul_lo_u32 v7, v6, s6
 ; GFX9-NEXT:    v_mul_f32_e32 v2, v8, v2
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX9-NEXT:    v_xor_b32_e32 v3, s13, v3
 ; GFX9-NEXT:    v_subrev_u32_e32 v5, s13, v3
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s8, v7
 ; GFX9-NEXT:    s_sub_i32 s8, 0, s9
 ; GFX9-NEXT:    v_mul_lo_u32 v8, s8, v2
 ; GFX9-NEXT:    v_add_u32_e32 v7, 1, v6
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v3
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v7, s7, v3
+; GFX9-NEXT:    v_subrev_u32_e32 v7, s6, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
 ; GFX9-NEXT:    v_mul_hi_u32 v8, v2, v8
 ; GFX9-NEXT:    v_add_u32_e32 v7, 1, v6
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v3
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v7, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v7, s7, v3
-; GFX9-NEXT:    s_ashr_i32 s7, s11, 31
-; GFX9-NEXT:    s_add_i32 s8, s11, s7
-; GFX9-NEXT:    s_xor_b32 s8, s8, s7
+; GFX9-NEXT:    v_subrev_u32_e32 v7, s6, v3
+; GFX9-NEXT:    s_ashr_i32 s6, s11, 31
+; GFX9-NEXT:    s_add_i32 s8, s11, s6
+; GFX9-NEXT:    s_xor_b32 s8, s8, s6
 ; GFX9-NEXT:    v_add_u32_e32 v2, v2, v8
 ; GFX9-NEXT:    v_mul_hi_u32 v8, s8, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc
-; GFX9-NEXT:    s_xor_b32 s6, s5, s6
-; GFX9-NEXT:    v_xor_b32_e32 v3, s5, v3
-; GFX9-NEXT:    v_mul_lo_u32 v7, v8, s9
-; GFX9-NEXT:    v_xor_b32_e32 v2, s6, v6
-; GFX9-NEXT:    v_subrev_u32_e32 v6, s5, v3
 ; GFX9-NEXT:    s_xor_b32 s4, s7, s4
+; GFX9-NEXT:    v_xor_b32_e32 v3, s7, v3
+; GFX9-NEXT:    v_mul_lo_u32 v7, v8, s9
+; GFX9-NEXT:    v_xor_b32_e32 v2, s4, v6
+; GFX9-NEXT:    v_subrev_u32_e32 v6, s7, v3
+; GFX9-NEXT:    v_subrev_u32_e32 v2, s4, v2
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s8, v7
 ; GFX9-NEXT:    v_add_u32_e32 v7, 1, v8
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s9, v3
@@ -1176,148 +1169,147 @@ define amdgpu_kernel void @sdivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s9, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v8, s9, v3
+; GFX9-NEXT:    s_xor_b32 s4, s6, s5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v8, v3, v8, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v3, s4, v7
-; GFX9-NEXT:    v_subrev_u32_e32 v2, s6, v2
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s4, v3
-; GFX9-NEXT:    v_xor_b32_e32 v7, s7, v8
+; GFX9-NEXT:    v_xor_b32_e32 v7, s6, v8
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0
-; GFX9-NEXT:    v_subrev_u32_e32 v7, s7, v7
+; GFX9-NEXT:    v_subrev_u32_e32 v7, s6, v7
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1]
 ; GFX9-NEXT:    global_store_dwordx4 v8, v[4:7], s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: sdivrem_v4i32:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x20
-; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x10
+; GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0x4f7ffffe
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_ashr_i32 s12, s8, 31
-; GFX10-NEXT:    s_ashr_i32 s14, s10, 31
-; GFX10-NEXT:    s_add_i32 s6, s8, s12
-; GFX10-NEXT:    s_add_i32 s8, s10, s14
-; GFX10-NEXT:    s_xor_b32 s10, s6, s12
-; GFX10-NEXT:    s_ashr_i32 s13, s9, 31
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s10
-; GFX10-NEXT:    s_ashr_i32 s15, s11, 31
-; GFX10-NEXT:    s_add_i32 s7, s9, s13
-; GFX10-NEXT:    s_add_i32 s9, s11, s15
-; GFX10-NEXT:    s_xor_b32 s11, s7, s13
-; GFX10-NEXT:    s_xor_b32 s8, s8, s14
+; GFX10-NEXT:    s_ashr_i32 s0, s12, 31
+; GFX10-NEXT:    s_ashr_i32 s2, s14, 31
+; GFX10-NEXT:    s_add_i32 s6, s12, s0
+; GFX10-NEXT:    s_add_i32 s12, s14, s2
+; GFX10-NEXT:    s_xor_b32 s14, s6, s0
+; GFX10-NEXT:    s_ashr_i32 s1, s13, 31
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s14
+; GFX10-NEXT:    s_ashr_i32 s3, s15, 31
+; GFX10-NEXT:    s_add_i32 s7, s13, s1
+; GFX10-NEXT:    s_add_i32 s13, s15, s3
+; GFX10-NEXT:    s_xor_b32 s15, s7, s1
+; GFX10-NEXT:    s_xor_b32 s12, s12, s2
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s15
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s12
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s11
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s8
-; GFX10-NEXT:    s_xor_b32 s9, s9, s15
-; GFX10-NEXT:    s_sub_i32 s6, 0, s10
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v3, s9
+; GFX10-NEXT:    s_xor_b32 s13, s13, s3
+; GFX10-NEXT:    s_sub_i32 s6, 0, s14
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v3, s13
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v2, v2
-; GFX10-NEXT:    s_sub_i32 s7, 0, s11
-; GFX10-NEXT:    s_sub_i32 s19, 0, s8
+; GFX10-NEXT:    s_sub_i32 s7, 0, s15
+; GFX10-NEXT:    s_sub_i32 s19, 0, s12
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v3, v3
+; GFX10-NEXT:    s_ashr_i32 s16, s8, 31
 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
-; GFX10-NEXT:    s_ashr_i32 s16, s0, 31
-; GFX10-NEXT:    s_ashr_i32 s17, s1, 31
-; GFX10-NEXT:    s_add_i32 s0, s0, s16
-; GFX10-NEXT:    s_ashr_i32 s18, s2, 31
-; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX10-NEXT:    s_ashr_i32 s18, s10, 31
+; GFX10-NEXT:    s_ashr_i32 s17, s9, 31
+; GFX10-NEXT:    s_xor_b32 s20, s16, s0
 ; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v4
 ; GFX10-NEXT:    v_mul_f32_e32 v2, v2, v4
-; GFX10-NEXT:    s_xor_b32 s0, s0, s16
+; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX10-NEXT:    s_xor_b32 s21, s17, s1
 ; GFX10-NEXT:    v_mul_f32_e32 v3, v3, v4
-; GFX10-NEXT:    v_mul_lo_u32 v4, s6, v0
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX10-NEXT:    s_sub_i32 s6, 0, s9
+; GFX10-NEXT:    v_mul_lo_u32 v4, s6, v0
+; GFX10-NEXT:    s_sub_i32 s6, 0, s13
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GFX10-NEXT:    s_add_i32 s1, s1, s17
 ; GFX10-NEXT:    v_mul_lo_u32 v5, s7, v1
 ; GFX10-NEXT:    v_mul_lo_u32 v6, s19, v2
-; GFX10-NEXT:    v_mul_hi_u32 v4, v0, v4
+; GFX10-NEXT:    s_ashr_i32 s19, s11, 31
+; GFX10-NEXT:    s_add_i32 s7, s9, s17
 ; GFX10-NEXT:    v_mul_lo_u32 v7, s6, v3
-; GFX10-NEXT:    s_add_i32 s2, s2, s18
-; GFX10-NEXT:    s_ashr_i32 s19, s3, 31
-; GFX10-NEXT:    s_xor_b32 s1, s1, s17
-; GFX10-NEXT:    s_xor_b32 s2, s2, s18
+; GFX10-NEXT:    v_mul_hi_u32 v4, v0, v4
+; GFX10-NEXT:    s_add_i32 s6, s8, s16
+; GFX10-NEXT:    s_add_i32 s8, s10, s18
 ; GFX10-NEXT:    v_mul_hi_u32 v5, v1, v5
 ; GFX10-NEXT:    v_mul_hi_u32 v6, v2, v6
-; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v4
+; GFX10-NEXT:    s_xor_b32 s10, s6, s16
+; GFX10-NEXT:    s_add_i32 s9, s11, s19
 ; GFX10-NEXT:    v_mul_hi_u32 v7, v3, v7
-; GFX10-NEXT:    s_add_i32 s3, s3, s19
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
-; GFX10-NEXT:    s_xor_b32 s3, s3, s19
-; GFX10-NEXT:    v_mul_hi_u32 v0, s0, v0
+; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v4
+; GFX10-NEXT:    s_xor_b32 s11, s7, s17
+; GFX10-NEXT:    s_xor_b32 s8, s8, s18
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v5
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, v2, v6
-; GFX10-NEXT:    s_xor_b32 s12, s16, s12
+; GFX10-NEXT:    v_mul_hi_u32 v0, s10, v0
+; GFX10-NEXT:    s_xor_b32 s9, s9, s19
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v7
-; GFX10-NEXT:    s_xor_b32 s13, s17, s13
-; GFX10-NEXT:    v_mul_hi_u32 v1, s1, v1
-; GFX10-NEXT:    v_mul_hi_u32 v2, s2, v2
-; GFX10-NEXT:    v_mul_lo_u32 v4, v0, s10
-; GFX10-NEXT:    v_mul_hi_u32 v3, s3, v3
+; GFX10-NEXT:    v_mul_hi_u32 v1, s11, v1
+; GFX10-NEXT:    v_mul_hi_u32 v2, s8, v2
+; GFX10-NEXT:    s_xor_b32 s22, s18, s2
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX10-NEXT:    v_mul_hi_u32 v3, s9, v3
+; GFX10-NEXT:    v_mul_lo_u32 v4, v0, s14
 ; GFX10-NEXT:    v_add_nc_u32_e32 v8, 1, v0
-; GFX10-NEXT:    s_xor_b32 s14, s18, s14
-; GFX10-NEXT:    v_mul_lo_u32 v5, v1, s11
-; GFX10-NEXT:    v_mul_lo_u32 v6, v2, s8
-; GFX10-NEXT:    v_sub_nc_u32_e32 v4, s0, v4
-; GFX10-NEXT:    v_mul_lo_u32 v7, v3, s9
+; GFX10-NEXT:    v_mul_lo_u32 v5, v1, s15
+; GFX10-NEXT:    v_mul_lo_u32 v6, v2, s12
 ; GFX10-NEXT:    v_add_nc_u32_e32 v9, 1, v1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v10, 1, v2
+; GFX10-NEXT:    v_mul_lo_u32 v7, v3, s13
+; GFX10-NEXT:    v_sub_nc_u32_e32 v4, s10, v4
 ; GFX10-NEXT:    v_add_nc_u32_e32 v11, 1, v3
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s10, v4
-; GFX10-NEXT:    v_sub_nc_u32_e32 v5, s1, v5
-; GFX10-NEXT:    v_sub_nc_u32_e32 v6, s2, v6
-; GFX10-NEXT:    v_sub_nc_u32_e32 v7, s3, v7
+; GFX10-NEXT:    v_sub_nc_u32_e32 v5, s11, v5
+; GFX10-NEXT:    v_sub_nc_u32_e32 v6, s8, v6
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s14, v4
+; GFX10-NEXT:    v_sub_nc_u32_e32 v7, s9, v7
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s15, v5
+; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s12, v6
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, s10, v4
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s11, v5
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s8, v6
-; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s9, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc_lo
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, s14, v4
+; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s13, v7
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v9, s11, v5
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v9, s15, v5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s1
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, s8, v6
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, s12, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s2
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v11, s9, v7
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v11, s13, v7
 ; GFX10-NEXT:    v_add_nc_u32_e32 v8, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s10, v4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v10, s1
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s14, v4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v11, s2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v9, 1, v1
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, s10, v4
 ; GFX10-NEXT:    v_add_nc_u32_e32 v10, 1, v2
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s11, v5
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s8, v6
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s15, v5
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, s14, v4
+; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s12, v6
 ; GFX10-NEXT:    v_add_nc_u32_e32 v11, 1, v3
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s9, v7
+; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s13, v7
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v9, s0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v9, s11, v5
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v9, s15, v5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s1
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, s8, v6
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v12, s9, v7
-; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v11, vcc_lo
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, s12, v6
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v8, vcc_lo
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v8, s13, v7
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v11, s2
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v9, s0
-; GFX10-NEXT:    s_xor_b32 s0, s19, s15
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v10, s1
-; GFX10-NEXT:    v_cndmask_b32_e32 v7, v7, v12, vcc_lo
-; GFX10-NEXT:    v_xor_b32_e32 v0, s12, v0
-; GFX10-NEXT:    v_xor_b32_e32 v1, s13, v1
-; GFX10-NEXT:    v_xor_b32_e32 v2, s14, v2
+; GFX10-NEXT:    s_xor_b32 s0, s19, s3
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v8, s2
+; GFX10-NEXT:    v_xor_b32_e32 v0, s20, v0
+; GFX10-NEXT:    v_xor_b32_e32 v1, s21, v1
+; GFX10-NEXT:    v_xor_b32_e32 v2, s22, v2
 ; GFX10-NEXT:    v_xor_b32_e32 v3, s0, v3
 ; GFX10-NEXT:    v_xor_b32_e32 v4, s16, v4
 ; GFX10-NEXT:    v_xor_b32_e32 v5, s17, v5
 ; GFX10-NEXT:    v_xor_b32_e32 v6, s18, v6
 ; GFX10-NEXT:    v_xor_b32_e32 v7, s19, v7
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v0, s12, v0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, s13, v1
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, s14, v2
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v0, s20, v0
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v1, s21, v1
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, s22, v2
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v3, s0, v3
 ; GFX10-NEXT:    v_mov_b32_e32 v8, 0
 ; GFX10-NEXT:    v_subrev_nc_u32_e32 v4, s16, v4
@@ -1338,27 +1330,26 @@ define amdgpu_kernel void @sdivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32
 define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64> addrspace(1)* %out1, <2 x i64> %x, <2 x i64> %y) {
 ; GFX8-LABEL: sdivrem_v2i64:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x10
-; GFX8-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x20
+; GFX8-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_ashr_i32 s6, s9, 31
-; GFX8-NEXT:    s_ashr_i32 s12, s1, 31
-; GFX8-NEXT:    s_add_u32 s14, s8, s6
-; GFX8-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX8-NEXT:    s_and_b32 s7, s7, 1
-; GFX8-NEXT:    s_cmp_lg_u32 s7, 0
-; GFX8-NEXT:    s_addc_u32 s15, s9, s6
-; GFX8-NEXT:    s_add_u32 s0, s0, s12
-; GFX8-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX8-NEXT:    s_and_b32 s7, s7, 1
-; GFX8-NEXT:    s_cmp_lg_u32 s7, 0
-; GFX8-NEXT:    s_mov_b32 s13, s12
-; GFX8-NEXT:    s_addc_u32 s1, s1, s12
-; GFX8-NEXT:    s_xor_b64 s[8:9], s[0:1], s[12:13]
+; GFX8-NEXT:    s_ashr_i32 s2, s9, 31
+; GFX8-NEXT:    s_ashr_i32 s6, s13, 31
+; GFX8-NEXT:    s_add_u32 s0, s8, s2
+; GFX8-NEXT:    s_cselect_b32 s1, 1, 0
+; GFX8-NEXT:    s_and_b32 s1, s1, 1
+; GFX8-NEXT:    s_cmp_lg_u32 s1, 0
+; GFX8-NEXT:    s_addc_u32 s1, s9, s2
+; GFX8-NEXT:    s_add_u32 s8, s12, s6
+; GFX8-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX8-NEXT:    s_and_b32 s3, s3, 1
+; GFX8-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX8-NEXT:    s_mov_b32 s7, s6
+; GFX8-NEXT:    s_addc_u32 s9, s13, s6
+; GFX8-NEXT:    s_xor_b64 s[8:9], s[8:9], s[6:7]
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s9
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s8
-; GFX8-NEXT:    s_mov_b32 s7, s6
-; GFX8-NEXT:    s_xor_b64 s[14:15], s[14:15], s[6:7]
+; GFX8-NEXT:    s_mov_b32 s3, s2
+; GFX8-NEXT:    s_xor_b64 s[12:13], s[0:1], s[2:3]
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX8-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
@@ -1433,24 +1424,24 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v3
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v2, s15, v0
-; GFX8-NEXT:    v_mul_lo_u32 v3, s14, v1
-; GFX8-NEXT:    v_mul_hi_u32 v5, s14, v0
-; GFX8-NEXT:    v_mul_hi_u32 v0, s15, v0
-; GFX8-NEXT:    v_mov_b32_e32 v4, s15
+; GFX8-NEXT:    v_mul_lo_u32 v2, s13, v0
+; GFX8-NEXT:    v_mul_lo_u32 v3, s12, v1
+; GFX8-NEXT:    v_mul_hi_u32 v5, s12, v0
+; GFX8-NEXT:    v_mul_hi_u32 v0, s13, v0
+; GFX8-NEXT:    v_mov_b32_e32 v4, s13
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v5, s15, v1
+; GFX8-NEXT:    v_mul_lo_u32 v5, s13, v1
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT:    v_mul_hi_u32 v3, s14, v1
+; GFX8-NEXT:    v_mul_hi_u32 v3, s12, v1
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v5, v0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v3
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v5, v3
-; GFX8-NEXT:    v_mul_hi_u32 v1, s15, v1
+; GFX8-NEXT:    v_mul_hi_u32 v1, s13, v1
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
@@ -1461,9 +1452,9 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_mul_lo_u32 v5, s8, v0
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v7
-; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s14, v5
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s12, v5
 ; GFX8-NEXT:    v_subb_u32_e64 v4, s[0:1], v4, v2, vcc
-; GFX8-NEXT:    v_sub_u32_e64 v2, s[0:1], s15, v2
+; GFX8-NEXT:    v_sub_u32_e64 v2, s[0:1], s13, v2
 ; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v4
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[0:1]
 ; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v3
@@ -1494,37 +1485,37 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, v7, v6, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, v8, v2, s[0:1]
-; GFX8-NEXT:    s_xor_b64 s[0:1], s[6:7], s[12:13]
+; GFX8-NEXT:    s_xor_b64 s[0:1], s[2:3], s[6:7]
 ; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
-; GFX8-NEXT:    s_ashr_i32 s8, s11, 31
-; GFX8-NEXT:    s_ashr_i32 s12, s3, 31
+; GFX8-NEXT:    s_ashr_i32 s6, s11, 31
+; GFX8-NEXT:    s_ashr_i32 s8, s15, 31
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GFX8-NEXT:    v_subrev_u32_e32 v0, vcc, s0, v0
-; GFX8-NEXT:    s_add_u32 s0, s10, s8
+; GFX8-NEXT:    s_add_u32 s0, s10, s6
 ; GFX8-NEXT:    v_xor_b32_e32 v1, s1, v1
 ; GFX8-NEXT:    v_mov_b32_e32 v4, s1
 ; GFX8-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX8-NEXT:    s_and_b32 s1, s1, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s1, 0
-; GFX8-NEXT:    s_addc_u32 s1, s11, s8
-; GFX8-NEXT:    s_add_u32 s2, s2, s12
-; GFX8-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX8-NEXT:    s_and_b32 s7, s7, 1
-; GFX8-NEXT:    s_cmp_lg_u32 s7, 0
-; GFX8-NEXT:    s_mov_b32 s13, s12
-; GFX8-NEXT:    s_addc_u32 s3, s3, s12
-; GFX8-NEXT:    s_xor_b64 s[2:3], s[2:3], s[12:13]
-; GFX8-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v4, s3
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v5, s2
+; GFX8-NEXT:    s_addc_u32 s1, s11, s6
+; GFX8-NEXT:    s_add_u32 s10, s14, s8
+; GFX8-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX8-NEXT:    s_and_b32 s3, s3, 1
+; GFX8-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX8-NEXT:    s_mov_b32 s9, s8
-; GFX8-NEXT:    s_xor_b64 s[10:11], s[0:1], s[8:9]
+; GFX8-NEXT:    s_addc_u32 s11, s15, s8
+; GFX8-NEXT:    s_xor_b64 s[10:11], s[10:11], s[8:9]
+; GFX8-NEXT:    v_subb_u32_e32 v1, vcc, v1, v4, vcc
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v4, s11
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v5, s10
+; GFX8-NEXT:    s_mov_b32 s7, s6
+; GFX8-NEXT:    s_xor_b64 s[12:13], s[0:1], s[6:7]
 ; GFX8-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v4
 ; GFX8-NEXT:    v_add_f32_e32 v4, v4, v5
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GFX8-NEXT:    s_sub_u32 s0, 0, s2
+; GFX8-NEXT:    s_sub_u32 s0, 0, s10
 ; GFX8-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX8-NEXT:    s_and_b32 s1, s1, 1
 ; GFX8-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
@@ -1535,19 +1526,19 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v7, v4
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v6, v6
 ; GFX8-NEXT:    s_cmp_lg_u32 s1, 0
-; GFX8-NEXT:    s_subb_u32 s1, 0, s3
+; GFX8-NEXT:    s_subb_u32 s1, 0, s11
 ; GFX8-NEXT:    v_mul_lo_u32 v4, s1, v7
 ; GFX8-NEXT:    v_mul_lo_u32 v8, s0, v6
 ; GFX8-NEXT:    v_mul_hi_u32 v10, s0, v7
 ; GFX8-NEXT:    v_mul_lo_u32 v9, s0, v7
-; GFX8-NEXT:    v_xor_b32_e32 v3, s6, v3
+; GFX8-NEXT:    v_xor_b32_e32 v3, s2, v3
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v8
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, v4, v10
-; GFX8-NEXT:    v_xor_b32_e32 v2, s6, v2
-; GFX8-NEXT:    v_mov_b32_e32 v5, s6
+; GFX8-NEXT:    v_xor_b32_e32 v2, s2, v2
+; GFX8-NEXT:    v_mov_b32_e32 v5, s2
 ; GFX8-NEXT:    v_mul_lo_u32 v10, v6, v9
 ; GFX8-NEXT:    v_mul_lo_u32 v11, v7, v8
-; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, s6, v3
+; GFX8-NEXT:    v_subrev_u32_e32 v4, vcc, s2, v3
 ; GFX8-NEXT:    v_subb_u32_e32 v5, vcc, v2, v5, vcc
 ; GFX8-NEXT:    v_mul_hi_u32 v2, v7, v9
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v10, v11
@@ -1574,14 +1565,13 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_mul_lo_u32 v7, s0, v3
 ; GFX8-NEXT:    v_mul_hi_u32 v9, s0, v2
 ; GFX8-NEXT:    v_mul_lo_u32 v8, s0, v2
-; GFX8-NEXT:    v_mov_b32_e32 v10, s3
+; GFX8-NEXT:    v_mov_b32_e32 v10, s11
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v7
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v9
 ; GFX8-NEXT:    v_mul_lo_u32 v7, v3, v8
 ; GFX8-NEXT:    v_mul_lo_u32 v9, v2, v6
 ; GFX8-NEXT:    v_mul_hi_u32 v11, v2, v8
 ; GFX8-NEXT:    v_mul_hi_u32 v8, v3, v8
-; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v9
 ; GFX8-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v7, v11
@@ -1601,51 +1591,51 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v8
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v7
 ; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v6, s11, v2
-; GFX8-NEXT:    v_mul_lo_u32 v7, s10, v3
-; GFX8-NEXT:    v_mul_hi_u32 v9, s10, v2
-; GFX8-NEXT:    v_mul_hi_u32 v2, s11, v2
-; GFX8-NEXT:    v_mov_b32_e32 v8, s11
+; GFX8-NEXT:    v_mul_lo_u32 v6, s13, v2
+; GFX8-NEXT:    v_mul_lo_u32 v7, s12, v3
+; GFX8-NEXT:    v_mul_hi_u32 v9, s12, v2
+; GFX8-NEXT:    v_mul_hi_u32 v2, s13, v2
+; GFX8-NEXT:    v_mov_b32_e32 v8, s13
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v7
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v9
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v9, s11, v3
+; GFX8-NEXT:    v_mul_lo_u32 v9, s13, v3
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v7, v6
-; GFX8-NEXT:    v_mul_hi_u32 v7, s10, v3
+; GFX8-NEXT:    v_mul_hi_u32 v7, s12, v3
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v9, v2
 ; GFX8-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v7
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v9, v7
-; GFX8-NEXT:    v_mul_hi_u32 v3, s11, v3
+; GFX8-NEXT:    v_mul_hi_u32 v3, s13, v3
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v7, v6
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v6
-; GFX8-NEXT:    v_mul_lo_u32 v6, s3, v2
-; GFX8-NEXT:    v_mul_lo_u32 v7, s2, v3
-; GFX8-NEXT:    v_mul_hi_u32 v11, s2, v2
-; GFX8-NEXT:    v_mul_lo_u32 v9, s2, v2
+; GFX8-NEXT:    v_mul_lo_u32 v6, s11, v2
+; GFX8-NEXT:    v_mul_lo_u32 v7, s10, v3
+; GFX8-NEXT:    v_mul_hi_u32 v11, s10, v2
+; GFX8-NEXT:    v_mul_lo_u32 v9, s10, v2
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v7
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v11
-; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, s10, v9
+; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, s12, v9
 ; GFX8-NEXT:    v_subb_u32_e64 v8, s[0:1], v8, v6, vcc
-; GFX8-NEXT:    v_sub_u32_e64 v6, s[0:1], s11, v6
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v8
+; GFX8-NEXT:    v_sub_u32_e64 v6, s[0:1], s13, v6
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v8
 ; GFX8-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v7
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v7
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v8
+; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v8
 ; GFX8-NEXT:    v_subb_u32_e32 v6, vcc, v6, v10, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[0:1]
-; GFX8-NEXT:    v_subrev_u32_e32 v11, vcc, s2, v7
+; GFX8-NEXT:    v_subrev_u32_e32 v11, vcc, s10, v7
 ; GFX8-NEXT:    v_subbrev_u32_e64 v12, s[0:1], 0, v6, vcc
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v12
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v12
 ; GFX8-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v11
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v11
 ; GFX8-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v12
+; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v12
 ; GFX8-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[0:1]
 ; GFX8-NEXT:    v_add_u32_e64 v14, s[0:1], 1, v2
 ; GFX8-NEXT:    v_subb_u32_e32 v6, vcc, v6, v10, vcc
@@ -1653,61 +1643,61 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_add_u32_e32 v10, vcc, 1, v14
 ; GFX8-NEXT:    v_addc_u32_e32 v16, vcc, 0, v15, vcc
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
-; GFX8-NEXT:    v_subrev_u32_e64 v13, s[0:1], s2, v11
+; GFX8-NEXT:    v_subrev_u32_e64 v13, s[0:1], s10, v11
 ; GFX8-NEXT:    v_subbrev_u32_e64 v6, s[0:1], 0, v6, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v10, v14, v10, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v14, v15, v16, vcc
 ; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v9
 ; GFX8-NEXT:    v_cndmask_b32_e32 v9, v11, v13, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v12, v6, vcc
+; GFX8-NEXT:    s_load_dwordx4 s[12:15], s[4:5], 0x0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, v2, v10, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, v8, v6, s[0:1]
-; GFX8-NEXT:    s_xor_b64 s[0:1], s[8:9], s[12:13]
+; GFX8-NEXT:    s_xor_b64 s[0:1], s[6:7], s[8:9]
 ; GFX8-NEXT:    v_xor_b32_e32 v2, s0, v2
 ; GFX8-NEXT:    v_xor_b32_e32 v3, s1, v3
 ; GFX8-NEXT:    v_mov_b32_e32 v8, s1
 ; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, s0, v2
 ; GFX8-NEXT:    v_subb_u32_e32 v3, vcc, v3, v8, vcc
-; GFX8-NEXT:    v_xor_b32_e32 v7, s8, v7
-; GFX8-NEXT:    v_xor_b32_e32 v8, s8, v6
-; GFX8-NEXT:    v_mov_b32_e32 v9, s8
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s8, v7
+; GFX8-NEXT:    v_xor_b32_e32 v7, s6, v7
+; GFX8-NEXT:    v_xor_b32_e32 v8, s6, v6
+; GFX8-NEXT:    v_mov_b32_e32 v9, s6
+; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s6, v7
 ; GFX8-NEXT:    v_subb_u32_e32 v7, vcc, v8, v9, vcc
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_mov_b32_e32 v9, s5
-; GFX8-NEXT:    v_mov_b32_e32 v8, s4
+; GFX8-NEXT:    v_mov_b32_e32 v8, s12
+; GFX8-NEXT:    v_mov_b32_e32 v9, s13
 ; GFX8-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
 ; GFX8-NEXT:    s_nop 0
-; GFX8-NEXT:    v_mov_b32_e32 v0, s6
-; GFX8-NEXT:    v_mov_b32_e32 v1, s7
+; GFX8-NEXT:    v_mov_b32_e32 v0, s14
+; GFX8-NEXT:    v_mov_b32_e32 v1, s15
 ; GFX8-NEXT:    flat_store_dwordx4 v[0:1], v[4:7]
 ; GFX8-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: sdivrem_v2i64:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x10
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x20
+; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_ashr_i32 s6, s9, 31
-; GFX9-NEXT:    s_ashr_i32 s12, s1, 31
-; GFX9-NEXT:    s_add_u32 s14, s8, s6
-; GFX9-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX9-NEXT:    s_and_b32 s7, s7, 1
-; GFX9-NEXT:    s_cmp_lg_u32 s7, 0
-; GFX9-NEXT:    s_addc_u32 s15, s9, s6
-; GFX9-NEXT:    s_add_u32 s0, s0, s12
-; GFX9-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX9-NEXT:    s_and_b32 s7, s7, 1
-; GFX9-NEXT:    s_cmp_lg_u32 s7, 0
-; GFX9-NEXT:    s_mov_b32 s13, s12
-; GFX9-NEXT:    s_addc_u32 s1, s1, s12
-; GFX9-NEXT:    s_xor_b64 s[8:9], s[0:1], s[12:13]
+; GFX9-NEXT:    s_ashr_i32 s2, s9, 31
+; GFX9-NEXT:    s_ashr_i32 s6, s13, 31
+; GFX9-NEXT:    s_add_u32 s0, s8, s2
+; GFX9-NEXT:    s_cselect_b32 s1, 1, 0
+; GFX9-NEXT:    s_and_b32 s1, s1, 1
+; GFX9-NEXT:    s_cmp_lg_u32 s1, 0
+; GFX9-NEXT:    s_addc_u32 s1, s9, s2
+; GFX9-NEXT:    s_add_u32 s8, s12, s6
+; GFX9-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX9-NEXT:    s_and_b32 s3, s3, 1
+; GFX9-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX9-NEXT:    s_mov_b32 s7, s6
+; GFX9-NEXT:    s_addc_u32 s9, s13, s6
+; GFX9-NEXT:    s_xor_b64 s[8:9], s[8:9], s[6:7]
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s9
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s8
-; GFX9-NEXT:    s_mov_b32 s7, s6
-; GFX9-NEXT:    s_xor_b64 s[14:15], s[14:15], s[6:7]
+; GFX9-NEXT:    s_mov_b32 s3, s2
+; GFX9-NEXT:    s_xor_b64 s[12:13], s[0:1], s[2:3]
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
@@ -1727,7 +1717,7 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_mul_lo_u32 v3, s1, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v4, s0, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v5, s0, v0
-; GFX9-NEXT:    v_mov_b32_e32 v7, s15
+; GFX9-NEXT:    v_mov_b32_e32 v7, s13
 ; GFX9-NEXT:    v_add3_u32 v2, v3, v2, v4
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v1, v5
 ; GFX9-NEXT:    v_mul_lo_u32 v4, v0, v2
@@ -1778,19 +1768,19 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_add3_u32 v2, v5, v4, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v3
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v2, s15, v0
-; GFX9-NEXT:    v_mul_lo_u32 v3, s14, v1
-; GFX9-NEXT:    v_mul_hi_u32 v4, s14, v0
-; GFX9-NEXT:    v_mul_hi_u32 v0, s15, v0
+; GFX9-NEXT:    v_mul_lo_u32 v2, s13, v0
+; GFX9-NEXT:    v_mul_lo_u32 v3, s12, v1
+; GFX9-NEXT:    v_mul_hi_u32 v4, s12, v0
+; GFX9-NEXT:    v_mul_hi_u32 v0, s13, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v5, s9
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v3
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v4
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v4, s15, v1
+; GFX9-NEXT:    v_mul_lo_u32 v4, s13, v1
 ; GFX9-NEXT:    v_add_u32_e32 v2, v3, v2
-; GFX9-NEXT:    v_mul_hi_u32 v3, s14, v1
-; GFX9-NEXT:    v_mul_hi_u32 v1, s15, v1
+; GFX9-NEXT:    v_mul_hi_u32 v3, s12, v1
+; GFX9-NEXT:    v_mul_hi_u32 v1, s13, v1
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v4, v0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v3
@@ -1804,10 +1794,10 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_mul_hi_u32 v4, s8, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v6, s8, v0
 ; GFX9-NEXT:    v_add3_u32 v2, v2, v3, v4
-; GFX9-NEXT:    v_sub_co_u32_e32 v3, vcc, s14, v6
+; GFX9-NEXT:    v_sub_co_u32_e32 v3, vcc, s12, v6
 ; GFX9-NEXT:    v_subb_co_u32_e64 v4, s[0:1], v7, v2, vcc
 ; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v4
-; GFX9-NEXT:    v_sub_u32_e32 v2, s15, v2
+; GFX9-NEXT:    v_sub_u32_e32 v2, s13, v2
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
 ; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v3
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[0:1]
@@ -1832,35 +1822,35 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v11
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, v7, v5, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, v8, v2, s[0:1]
-; GFX9-NEXT:    s_xor_b64 s[0:1], s[6:7], s[12:13]
-; GFX9-NEXT:    s_ashr_i32 s8, s11, 31
-; GFX9-NEXT:    s_ashr_i32 s12, s3, 31
-; GFX9-NEXT:    s_add_u32 s10, s10, s8
-; GFX9-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX9-NEXT:    s_and_b32 s7, s7, 1
-; GFX9-NEXT:    s_cmp_lg_u32 s7, 0
-; GFX9-NEXT:    s_addc_u32 s11, s11, s8
-; GFX9-NEXT:    s_add_u32 s2, s2, s12
-; GFX9-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX9-NEXT:    s_and_b32 s7, s7, 1
-; GFX9-NEXT:    s_cmp_lg_u32 s7, 0
-; GFX9-NEXT:    s_mov_b32 s13, s12
-; GFX9-NEXT:    s_addc_u32 s3, s3, s12
+; GFX9-NEXT:    s_xor_b64 s[0:1], s[2:3], s[6:7]
+; GFX9-NEXT:    s_ashr_i32 s6, s11, 31
+; GFX9-NEXT:    s_ashr_i32 s8, s15, 31
+; GFX9-NEXT:    s_add_u32 s12, s10, s6
+; GFX9-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX9-NEXT:    s_and_b32 s3, s3, 1
+; GFX9-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX9-NEXT:    s_addc_u32 s13, s11, s6
+; GFX9-NEXT:    s_add_u32 s10, s14, s8
+; GFX9-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX9-NEXT:    s_and_b32 s3, s3, 1
+; GFX9-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX9-NEXT:    s_mov_b32 s9, s8
+; GFX9-NEXT:    s_addc_u32 s11, s15, s8
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
-; GFX9-NEXT:    s_xor_b64 s[2:3], s[2:3], s[12:13]
+; GFX9-NEXT:    s_xor_b64 s[10:11], s[10:11], s[8:9]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v9, v9, v12, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v10, v10, v13, vcc
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v6, s3
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v7, s2
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v6, s11
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v7, s10
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
 ; GFX9-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v6
 ; GFX9-NEXT:    v_add_f32_e32 v4, v4, v7
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v4, v4
-; GFX9-NEXT:    s_mov_b32 s9, s8
-; GFX9-NEXT:    s_xor_b64 s[10:11], s[10:11], s[8:9]
-; GFX9-NEXT:    s_sub_u32 s7, 0, s2
+; GFX9-NEXT:    s_mov_b32 s7, s6
+; GFX9-NEXT:    s_xor_b64 s[12:13], s[12:13], s[6:7]
+; GFX9-NEXT:    s_sub_u32 s3, 0, s10
 ; GFX9-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; GFX9-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
 ; GFX9-NEXT:    v_trunc_f32_e32 v5, v5
@@ -1871,12 +1861,12 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    s_cselect_b32 s14, 1, 0
 ; GFX9-NEXT:    s_and_b32 s14, s14, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s14, 0
-; GFX9-NEXT:    s_subb_u32 s14, 0, s3
+; GFX9-NEXT:    s_subb_u32 s14, 0, s11
 ; GFX9-NEXT:    v_mul_lo_u32 v6, s14, v4
-; GFX9-NEXT:    v_mul_lo_u32 v7, s7, v5
-; GFX9-NEXT:    v_mul_hi_u32 v8, s7, v4
+; GFX9-NEXT:    v_mul_lo_u32 v7, s3, v5
+; GFX9-NEXT:    v_mul_hi_u32 v8, s3, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v9, s7, v4
+; GFX9-NEXT:    v_mul_lo_u32 v9, s3, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; GFX9-NEXT:    v_add3_u32 v6, v6, v7, v8
@@ -1907,17 +1897,17 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v4, v7
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v5, v6, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v6, s14, v4
-; GFX9-NEXT:    v_mul_lo_u32 v7, s7, v5
-; GFX9-NEXT:    v_mul_hi_u32 v8, s7, v4
-; GFX9-NEXT:    v_mul_lo_u32 v9, s7, v4
-; GFX9-NEXT:    v_xor_b32_e32 v3, s6, v3
-; GFX9-NEXT:    v_xor_b32_e32 v2, s6, v2
+; GFX9-NEXT:    v_mul_lo_u32 v7, s3, v5
+; GFX9-NEXT:    v_mul_hi_u32 v8, s3, v4
+; GFX9-NEXT:    v_mul_lo_u32 v9, s3, v4
+; GFX9-NEXT:    v_xor_b32_e32 v3, s2, v3
+; GFX9-NEXT:    v_xor_b32_e32 v2, s2, v2
 ; GFX9-NEXT:    v_add3_u32 v6, v6, v7, v8
 ; GFX9-NEXT:    v_mul_lo_u32 v7, v5, v9
 ; GFX9-NEXT:    v_mul_lo_u32 v8, v4, v6
 ; GFX9-NEXT:    v_mul_hi_u32 v11, v4, v9
 ; GFX9-NEXT:    v_mul_hi_u32 v9, v5, v9
-; GFX9-NEXT:    v_mov_b32_e32 v10, s6
+; GFX9-NEXT:    v_mov_b32_e32 v10, s2
 ; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, v7, v8
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, v7, v11
@@ -1936,20 +1926,20 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_add3_u32 v6, v9, v8, v6
 ; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, v4, v7
 ; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, v5, v6, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v8, s11, v7
-; GFX9-NEXT:    v_mul_lo_u32 v9, s10, v6
-; GFX9-NEXT:    v_subrev_co_u32_e32 v4, vcc, s6, v3
+; GFX9-NEXT:    v_mul_lo_u32 v8, s13, v7
+; GFX9-NEXT:    v_mul_lo_u32 v9, s12, v6
+; GFX9-NEXT:    v_subrev_co_u32_e32 v4, vcc, s2, v3
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v2, v10, vcc
-; GFX9-NEXT:    v_mul_hi_u32 v2, s10, v7
+; GFX9-NEXT:    v_mul_hi_u32 v2, s12, v7
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v8, v9
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v3, v2
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v3, s11, v6
-; GFX9-NEXT:    v_mul_hi_u32 v7, s11, v7
+; GFX9-NEXT:    v_mul_lo_u32 v3, s13, v6
+; GFX9-NEXT:    v_mul_hi_u32 v7, s13, v7
 ; GFX9-NEXT:    v_add_u32_e32 v2, v8, v2
-; GFX9-NEXT:    v_mul_hi_u32 v8, s10, v6
-; GFX9-NEXT:    v_mul_hi_u32 v6, s11, v6
+; GFX9-NEXT:    v_mul_hi_u32 v8, s12, v6
+; GFX9-NEXT:    v_mul_hi_u32 v6, s13, v6
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v8
@@ -1958,30 +1948,30 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_add_u32_e32 v7, v7, v8
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX9-NEXT:    v_add3_u32 v3, v7, v3, v6
-; GFX9-NEXT:    v_mul_lo_u32 v6, s3, v2
-; GFX9-NEXT:    v_mul_lo_u32 v7, s2, v3
-; GFX9-NEXT:    v_mul_hi_u32 v8, s2, v2
-; GFX9-NEXT:    v_mul_lo_u32 v10, s2, v2
-; GFX9-NEXT:    v_mov_b32_e32 v11, s11
-; GFX9-NEXT:    v_mov_b32_e32 v9, s3
+; GFX9-NEXT:    v_mul_lo_u32 v6, s11, v2
+; GFX9-NEXT:    v_mul_lo_u32 v7, s10, v3
+; GFX9-NEXT:    v_mul_hi_u32 v8, s10, v2
+; GFX9-NEXT:    v_mul_lo_u32 v10, s10, v2
+; GFX9-NEXT:    v_mov_b32_e32 v11, s13
+; GFX9-NEXT:    v_mov_b32_e32 v9, s11
 ; GFX9-NEXT:    v_add3_u32 v6, v6, v7, v8
-; GFX9-NEXT:    v_sub_co_u32_e32 v7, vcc, s10, v10
+; GFX9-NEXT:    v_sub_co_u32_e32 v7, vcc, s12, v10
 ; GFX9-NEXT:    v_subb_co_u32_e64 v8, s[0:1], v11, v6, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v8
-; GFX9-NEXT:    v_sub_u32_e32 v6, s11, v6
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v8
+; GFX9-NEXT:    v_sub_u32_e32 v6, s13, v6
 ; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v7
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v8
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v8
 ; GFX9-NEXT:    v_subb_co_u32_e32 v6, vcc, v6, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[0:1]
-; GFX9-NEXT:    v_subrev_co_u32_e32 v11, vcc, s2, v7
+; GFX9-NEXT:    v_subrev_co_u32_e32 v11, vcc, s10, v7
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v12, s[0:1], 0, v6, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v12
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v12
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v11
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v11
 ; GFX9-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v12
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v12
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[0:1]
 ; GFX9-NEXT:    v_add_co_u32_e64 v14, s[0:1], 1, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v6, vcc, v6, v9, vcc
@@ -1991,96 +1981,94 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
 ; GFX9-NEXT:    v_cndmask_b32_e32 v9, v14, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v14, v15, v16, vcc
-; GFX9-NEXT:    v_subrev_co_u32_e64 v15, s[0:1], s2, v11
+; GFX9-NEXT:    v_subrev_co_u32_e64 v15, s[0:1], s10, v11
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v6, s[0:1], 0, v6, s[0:1]
 ; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v10
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, v2, v9, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v9, v11, v15, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v12, v6, vcc
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[4:5], 0x0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v14, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, v8, v6, s[0:1]
-; GFX9-NEXT:    s_xor_b64 s[0:1], s[8:9], s[12:13]
+; GFX9-NEXT:    s_xor_b64 s[0:1], s[6:7], s[8:9]
 ; GFX9-NEXT:    v_xor_b32_e32 v2, s0, v2
 ; GFX9-NEXT:    v_xor_b32_e32 v3, s1, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v8, s1
 ; GFX9-NEXT:    v_subrev_co_u32_e32 v2, vcc, s0, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v3, v8, vcc
-; GFX9-NEXT:    v_xor_b32_e32 v7, s8, v7
+; GFX9-NEXT:    v_xor_b32_e32 v7, s6, v7
 ; GFX9-NEXT:    v_mov_b32_e32 v13, 0
-; GFX9-NEXT:    v_xor_b32_e32 v8, s8, v6
-; GFX9-NEXT:    v_mov_b32_e32 v9, s8
-; GFX9-NEXT:    v_subrev_co_u32_e32 v6, vcc, s8, v7
+; GFX9-NEXT:    v_xor_b32_e32 v8, s6, v6
+; GFX9-NEXT:    v_mov_b32_e32 v9, s6
+; GFX9-NEXT:    v_subrev_co_u32_e32 v6, vcc, s6, v7
 ; GFX9-NEXT:    v_subb_co_u32_e32 v7, vcc, v8, v9, vcc
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_store_dwordx4 v13, v[0:3], s[4:5]
-; GFX9-NEXT:    global_store_dwordx4 v13, v[4:7], s[6:7]
+; GFX9-NEXT:    global_store_dwordx4 v13, v[0:3], s[12:13]
+; GFX9-NEXT:    global_store_dwordx4 v13, v[4:7], s[14:15]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: sdivrem_v2i64:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x10
-; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x20
+; GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_ashr_i32 s12, s9, 31
-; GFX10-NEXT:    s_ashr_i32 s6, s1, 31
-; GFX10-NEXT:    s_add_u32 s14, s8, s12
-; GFX10-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX10-NEXT:    s_mov_b32 s13, s12
-; GFX10-NEXT:    s_and_b32 s7, s7, 1
-; GFX10-NEXT:    s_cmp_lg_u32 s7, 0
-; GFX10-NEXT:    s_addc_u32 s15, s9, s12
-; GFX10-NEXT:    s_add_u32 s0, s0, s6
-; GFX10-NEXT:    s_cselect_b32 s7, 1, 0
-; GFX10-NEXT:    s_and_b32 s8, s7, 1
+; GFX10-NEXT:    s_ashr_i32 s2, s9, 31
+; GFX10-NEXT:    s_ashr_i32 s6, s13, 31
+; GFX10-NEXT:    s_add_u32 s0, s8, s2
+; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX10-NEXT:    s_mov_b32 s7, s6
-; GFX10-NEXT:    s_cmp_lg_u32 s8, 0
-; GFX10-NEXT:    s_addc_u32 s1, s1, s6
-; GFX10-NEXT:    s_xor_b64 s[8:9], s[0:1], s[6:7]
-; GFX10-NEXT:    s_xor_b64 s[0:1], s[14:15], s[12:13]
+; GFX10-NEXT:    s_and_b32 s1, s1, 1
+; GFX10-NEXT:    s_cmp_lg_u32 s1, 0
+; GFX10-NEXT:    s_addc_u32 s1, s9, s2
+; GFX10-NEXT:    s_add_u32 s8, s12, s6
+; GFX10-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX10-NEXT:    s_and_b32 s3, s3, 1
+; GFX10-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX10-NEXT:    s_mov_b32 s3, s2
+; GFX10-NEXT:    s_addc_u32 s9, s13, s6
+; GFX10-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX10-NEXT:    s_xor_b64 s[8:9], s[8:9], s[6:7]
 ; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s9
 ; GFX10-NEXT:    s_sub_u32 s20, 0, s8
-; GFX10-NEXT:    s_cselect_b32 s14, 1, 0
+; GFX10-NEXT:    s_cselect_b32 s12, 1, 0
 ; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s8
-; GFX10-NEXT:    s_and_b32 s14, s14, 1
+; GFX10-NEXT:    s_and_b32 s12, s12, 1
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
-; GFX10-NEXT:    s_cmp_lg_u32 s14, 0
+; GFX10-NEXT:    s_cmp_lg_u32 s12, 0
 ; GFX10-NEXT:    s_subb_u32 s21, 0, s9
-; GFX10-NEXT:    s_ashr_i32 s14, s11, 31
-; GFX10-NEXT:    s_xor_b64 s[18:19], s[12:13], s[6:7]
-; GFX10-NEXT:    s_ashr_i32 s16, s3, 31
+; GFX10-NEXT:    s_ashr_i32 s12, s11, 31
+; GFX10-NEXT:    s_xor_b64 s[18:19], s[2:3], s[6:7]
+; GFX10-NEXT:    s_ashr_i32 s16, s15, 31
 ; GFX10-NEXT:    v_add_f32_e32 v0, v1, v0
-; GFX10-NEXT:    s_add_u32 s6, s10, s14
-; GFX10-NEXT:    s_cselect_b32 s7, 1, 0
+; GFX10-NEXT:    s_add_u32 s6, s10, s12
+; GFX10-NEXT:    s_cselect_b32 s3, 1, 0
 ; GFX10-NEXT:    s_mov_b32 s17, s16
-; GFX10-NEXT:    s_and_b32 s7, s7, 1
+; GFX10-NEXT:    s_and_b32 s3, s3, 1
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX10-NEXT:    s_cmp_lg_u32 s7, 0
-; GFX10-NEXT:    s_mov_b32 s15, s14
-; GFX10-NEXT:    s_addc_u32 s7, s11, s14
-; GFX10-NEXT:    s_add_u32 s2, s2, s16
-; GFX10-NEXT:    s_cselect_b32 s10, 1, 0
-; GFX10-NEXT:    s_and_b32 s10, s10, 1
-; GFX10-NEXT:    s_cmp_lg_u32 s10, 0
+; GFX10-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX10-NEXT:    s_mov_b32 s13, s12
+; GFX10-NEXT:    s_addc_u32 s7, s11, s12
+; GFX10-NEXT:    s_add_u32 s10, s14, s16
+; GFX10-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX10-NEXT:    s_and_b32 s3, s3, 1
+; GFX10-NEXT:    s_cmp_lg_u32 s3, 0
 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
-; GFX10-NEXT:    s_addc_u32 s3, s3, s16
-; GFX10-NEXT:    s_xor_b64 s[10:11], s[6:7], s[14:15]
-; GFX10-NEXT:    s_xor_b64 s[2:3], s[2:3], s[16:17]
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s3
+; GFX10-NEXT:    s_addc_u32 s11, s15, s16
+; GFX10-NEXT:    s_xor_b64 s[14:15], s[6:7], s[12:13]
+; GFX10-NEXT:    s_xor_b64 s[10:11], s[10:11], s[16:17]
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s11
 ; GFX10-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v0
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v3, s2
-; GFX10-NEXT:    s_sub_u32 s6, 0, s2
-; GFX10-NEXT:    s_cselect_b32 s7, 1, 0
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v3, s10
+; GFX10-NEXT:    s_sub_u32 s3, 0, s10
+; GFX10-NEXT:    s_cselect_b32 s6, 1, 0
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0x4f800000, v1
 ; GFX10-NEXT:    v_trunc_f32_e32 v2, v2
-; GFX10-NEXT:    s_and_b32 s7, s7, 1
-; GFX10-NEXT:    s_cmp_lg_u32 s7, 0
+; GFX10-NEXT:    s_and_b32 s6, s6, 1
+; GFX10-NEXT:    s_cmp_lg_u32 s6, 0
 ; GFX10-NEXT:    v_add_f32_e32 v1, v1, v3
 ; GFX10-NEXT:    v_mul_f32_e32 v3, 0xcf800000, v2
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX10-NEXT:    s_subb_u32 s7, 0, s3
+; GFX10-NEXT:    s_subb_u32 s6, 0, s11
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GFX10-NEXT:    v_add_f32_e32 v0, v3, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v3, s20, v2
@@ -2102,23 +2090,23 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX10-NEXT:    v_add_f32_e32 v1, v9, v1
-; GFX10-NEXT:    v_add_co_u32 v5, s13, v5, v8
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s13
-; GFX10-NEXT:    v_add_co_u32 v6, s13, v10, v6
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s13
-; GFX10-NEXT:    v_add_co_u32 v5, s13, v5, v7
+; GFX10-NEXT:    v_add_co_u32 v5, s7, v5, v8
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s7
+; GFX10-NEXT:    v_add_co_u32 v6, s7, v10, v6
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s7
+; GFX10-NEXT:    v_add_co_u32 v5, s7, v5, v7
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s13
-; GFX10-NEXT:    v_mul_lo_u32 v9, s6, v4
-; GFX10-NEXT:    v_add_co_u32 v6, s13, v6, v11
-; GFX10-NEXT:    v_mul_lo_u32 v12, s7, v1
-; GFX10-NEXT:    v_mul_hi_u32 v13, s6, v1
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s7
+; GFX10-NEXT:    v_mul_lo_u32 v9, s3, v4
+; GFX10-NEXT:    v_add_co_u32 v6, s7, v6, v11
+; GFX10-NEXT:    v_mul_lo_u32 v12, s6, v1
+; GFX10-NEXT:    v_mul_hi_u32 v13, s3, v1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, v8, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s13
-; GFX10-NEXT:    v_mul_lo_u32 v11, s6, v1
-; GFX10-NEXT:    v_add_co_u32 v5, s13, v6, v5
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s7
+; GFX10-NEXT:    v_mul_lo_u32 v11, s3, v1
+; GFX10-NEXT:    v_add_co_u32 v5, s7, v6, v5
 ; GFX10-NEXT:    v_add_nc_u32_e32 v7, v10, v7
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s13
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s7
 ; GFX10-NEXT:    v_add3_u32 v8, v12, v9, v13
 ; GFX10-NEXT:    v_mul_lo_u32 v9, v4, v11
 ; GFX10-NEXT:    v_mul_hi_u32 v10, v1, v11
@@ -2130,218 +2118,218 @@ define amdgpu_kernel void @sdivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v2, v3, vcc_lo
 ; GFX10-NEXT:    v_mul_hi_u32 v5, v1, v8
 ; GFX10-NEXT:    v_mul_lo_u32 v12, s21, v0
-; GFX10-NEXT:    v_add_co_u32 v6, s13, v9, v6
+; GFX10-NEXT:    v_add_co_u32 v6, s7, v9, v6
 ; GFX10-NEXT:    v_mul_hi_u32 v13, s20, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v14, s20, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s13
-; GFX10-NEXT:    v_add_co_u32 v7, s13, v7, v11
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s13
-; GFX10-NEXT:    v_add_co_u32 v6, s13, v6, v10
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s7
+; GFX10-NEXT:    v_add_co_u32 v7, s7, v7, v11
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s7
+; GFX10-NEXT:    v_add_co_u32 v6, s7, v6, v10
 ; GFX10-NEXT:    v_mul_lo_u32 v3, s20, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s13
-; GFX10-NEXT:    v_add_co_u32 v5, s13, v7, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s13
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s7
+; GFX10-NEXT:    v_add_co_u32 v5, s7, v7, v5
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s7
 ; GFX10-NEXT:    v_add3_u32 v12, v12, v14, v13
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, v9, v6
 ; GFX10-NEXT:    v_mul_hi_u32 v8, v4, v8
 ; GFX10-NEXT:    v_mul_lo_u32 v10, v2, v3
 ; GFX10-NEXT:    v_add_nc_u32_e32 v7, v11, v7
 ; GFX10-NEXT:    v_mul_lo_u32 v11, v0, v12
-; GFX10-NEXT:    v_add_co_u32 v5, s13, v5, v6
+; GFX10-NEXT:    v_add_co_u32 v5, s7, v5, v6
 ; GFX10-NEXT:    v_mul_hi_u32 v9, v0, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s13
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s7
 ; GFX10-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; GFX10-NEXT:    v_mul_lo_u32 v13, v2, v12
 ; GFX10-NEXT:    v_add_co_u32 v1, vcc_lo, v1, v5
-; GFX10-NEXT:    v_add_co_u32 v5, s13, v10, v11
-; GFX10-NEXT:    v_mul_hi_u32 v14, v0, v12
+; GFX10-NEXT:    v_add_co_u32 v5, s7, v10, v11
 ; GFX10-NEXT:    v_add3_u32 v6, v7, v6, v8
-; GFX10-NEXT:    v_mul_lo_u32 v10, s7, v1
-; GFX10-NEXT:    v_add_co_u32 v5, s7, v5, v9
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s13
-; GFX10-NEXT:    v_add_co_u32 v3, s13, v13, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s7
+; GFX10-NEXT:    v_mul_hi_u32 v14, v0, v12
+; GFX10-NEXT:    v_mul_lo_u32 v10, s6, v1
+; GFX10-NEXT:    v_add_co_u32 v5, s6, v5, v9
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s7
+; GFX10-NEXT:    v_add_co_u32 v3, s7, v13, v3
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, v4, v6, vcc_lo
-; GFX10-NEXT:    v_add_co_u32 v3, s7, v3, v14
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s6
+; GFX10-NEXT:    v_mul_hi_u32 v11, s3, v1
+; GFX10-NEXT:    v_add_co_u32 v3, s6, v3, v14
+; GFX10-NEXT:    v_mul_lo_u32 v13, s3, v4
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, v7, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s13
-; GFX10-NEXT:    v_mul_hi_u32 v11, s6, v1
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s7
-; GFX10-NEXT:    v_mul_lo_u32 v13, s6, v4
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s7
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s6
 ; GFX10-NEXT:    v_mul_hi_u32 v7, v2, v12
-; GFX10-NEXT:    v_mul_lo_u32 v6, s6, v1
-; GFX10-NEXT:    v_add_co_u32 v3, s6, v3, v5
+; GFX10-NEXT:    v_mul_lo_u32 v6, s3, v1
+; GFX10-NEXT:    v_add_co_u32 v3, s3, v3, v5
 ; GFX10-NEXT:    v_add_nc_u32_e32 v8, v8, v9
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s3
 ; GFX10-NEXT:    v_add3_u32 v9, v10, v13, v11
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v3
-; GFX10-NEXT:    v_add3_u32 v5, v8, v5, v7
 ; GFX10-NEXT:    v_mul_lo_u32 v10, v4, v6
+; GFX10-NEXT:    v_add3_u32 v5, v8, v5, v7
 ; GFX10-NEXT:    v_mul_lo_u32 v7, v1, v9
 ; GFX10-NEXT:    v_mul_hi_u32 v11, v1, v6
 ; GFX10-NEXT:    v_mul_hi_u32 v6, v4, v6
+; GFX10-NEXT:    v_mul_lo_u32 v8, v4, v9
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v2, v5, vcc_lo
-; GFX10-NEXT:    v_mul_lo_u32 v3, v4, v9
+; GFX10-NEXT:    v_mul_hi_u32 v3, v1, v9
 ; GFX10-NEXT:    v_mul_lo_u32 v5, s1, v0
-; GFX10-NEXT:    v_mul_hi_u32 v12, s0, v0
+; GFX10-NEXT:    v_add_co_u32 v7, s3, v10, v7
 ; GFX10-NEXT:    v_mul_lo_u32 v13, s0, v2
-; GFX10-NEXT:    v_add_co_u32 v7, s6, v10, v7
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s3
+; GFX10-NEXT:    v_add_co_u32 v6, s3, v8, v6
+; GFX10-NEXT:    v_mul_hi_u32 v12, s0, v0
 ; GFX10-NEXT:    v_mul_hi_u32 v0, s1, v0
-; GFX10-NEXT:    v_add_co_u32 v3, s6, v3, v6
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v7, s6, v7, v11
 ; GFX10-NEXT:    v_mul_lo_u32 v14, s1, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v5, s6, v5, v13
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s3
+; GFX10-NEXT:    v_add_co_u32 v7, s3, v7, v11
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s3
+; GFX10-NEXT:    v_add_co_u32 v3, s3, v6, v3
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s3
+; GFX10-NEXT:    v_add_co_u32 v5, s3, v5, v13
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s3
+; GFX10-NEXT:    v_add_co_u32 v0, s3, v14, v0
 ; GFX10-NEXT:    v_mul_hi_u32 v15, s0, v2
-; GFX10-NEXT:    v_mul_hi_u32 v8, v1, v9
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v5, s7, v5, v12
-; GFX10-NEXT:    v_add_co_u32 v0, s6, v14, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s7
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s3
+; GFX10-NEXT:    v_add_co_u32 v5, s3, v5, v12
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s3
 ; GFX10-NEXT:    v_mul_hi_u32 v2, s1, v2
-; GFX10-NEXT:    v_add_co_u32 v0, s6, v0, v15
-; GFX10-NEXT:    v_add_nc_u32_e32 v5, v11, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v3, s6, v3, v8
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v0, s6, v0, v5
-; GFX10-NEXT:    v_add_nc_u32_e32 v11, v12, v13
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s6
 ; GFX10-NEXT:    v_add_nc_u32_e32 v7, v10, v7
+; GFX10-NEXT:    v_add_nc_u32_e32 v6, v8, v6
+; GFX10-NEXT:    v_add_co_u32 v0, s3, v0, v15
+; GFX10-NEXT:    v_add_nc_u32_e32 v5, v11, v5
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s3
 ; GFX10-NEXT:    v_mul_hi_u32 v9, v4, v9
-; GFX10-NEXT:    v_add_nc_u32_e32 v6, v6, v8
-; GFX10-NEXT:    v_mul_hi_u32 v8, s8, v0
-; GFX10-NEXT:    v_add3_u32 v2, v11, v5, v2
-; GFX10-NEXT:    v_add_co_u32 v3, s6, v3, v7
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s6
-; GFX10-NEXT:    v_mul_lo_u32 v7, s9, v0
-; GFX10-NEXT:    v_mul_lo_u32 v10, s8, v2
-; GFX10-NEXT:    v_add_co_u32 v1, vcc_lo, v1, v3
-; GFX10-NEXT:    v_add3_u32 v5, v6, v5, v9
-; GFX10-NEXT:    v_mul_lo_u32 v6, s8, v0
 ; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
-; GFX10-NEXT:    v_mov_b32_e32 v9, 0
-; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v4, v5, vcc_lo
-; GFX10-NEXT:    v_add3_u32 v4, v7, v10, v8
-; GFX10-NEXT:    v_mul_lo_u32 v5, s11, v1
-; GFX10-NEXT:    v_sub_co_u32 v6, vcc_lo, s0, v6
-; GFX10-NEXT:    v_mul_lo_u32 v14, s10, v3
-; GFX10-NEXT:    v_sub_nc_u32_e32 v8, s1, v4
-; GFX10-NEXT:    v_sub_co_ci_u32_e64 v4, s0, s1, v4, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s8, v6
-; GFX10-NEXT:    v_mul_hi_u32 v7, s11, v1
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v8, vcc_lo, s9, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s9, v4
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s0
-; GFX10-NEXT:    v_mul_lo_u32 v15, s11, v3
-; GFX10-NEXT:    v_mul_hi_u32 v1, s10, v1
-; GFX10-NEXT:    v_mul_hi_u32 v17, s10, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, -1, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v12, vcc_lo, v6, s8
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v13, s0, 0, v8, vcc_lo
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s9, v4
-; GFX10-NEXT:    v_mul_hi_u32 v3, s11, v3
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v8, vcc_lo, s9, v8, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, v11, v10, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s9, v13
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s8, v12
+; GFX10-NEXT:    v_add_co_u32 v0, s3, v0, v5
+; GFX10-NEXT:    v_add_nc_u32_e32 v8, v13, v12
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s3
+; GFX10-NEXT:    v_add_co_u32 v3, s3, v3, v7
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s3
+; GFX10-NEXT:    v_add3_u32 v2, v8, v5, v2
+; GFX10-NEXT:    v_add_co_u32 v1, vcc_lo, v1, v3
+; GFX10-NEXT:    v_add3_u32 v5, v6, v7, v9
+; GFX10-NEXT:    v_mul_lo_u32 v6, s9, v0
+; GFX10-NEXT:    v_mul_hi_u32 v7, s8, v0
+; GFX10-NEXT:    v_mul_lo_u32 v9, s8, v2
+; GFX10-NEXT:    v_mul_lo_u32 v3, s8, v0
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v4, vcc_lo, v4, v5, vcc_lo
+; GFX10-NEXT:    v_mul_lo_u32 v5, s15, v1
+; GFX10-NEXT:    v_mul_hi_u32 v10, s15, v1
+; GFX10-NEXT:    v_mul_hi_u32 v1, s14, v1
+; GFX10-NEXT:    v_mul_hi_u32 v17, s14, v4
+; GFX10-NEXT:    v_add3_u32 v6, v6, v9, v7
+; GFX10-NEXT:    v_sub_co_u32 v3, vcc_lo, s0, v3
+; GFX10-NEXT:    v_mul_lo_u32 v7, s14, v4
+; GFX10-NEXT:    v_mul_lo_u32 v9, s15, v4
+; GFX10-NEXT:    v_sub_nc_u32_e32 v11, s1, v6
+; GFX10-NEXT:    v_sub_co_ci_u32_e64 v6, s0, s1, v6, vcc_lo
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s8, v3
+; GFX10-NEXT:    v_mul_hi_u32 v4, s15, v4
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v11, vcc_lo, s9, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s9, v6
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s0
+; GFX10-NEXT:    v_mov_b32_e32 v8, 0
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v14, vcc_lo, v3, s8
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v15, s0, 0, v11, vcc_lo
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s9, v6
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v11, vcc_lo, s9, v11, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, v13, v12, s0
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s8, v14
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s0
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s9, v15
 ; GFX10-NEXT:    v_cndmask_b32_e64 v16, 0, -1, s0
-; GFX10-NEXT:    v_add_co_u32 v5, s0, v5, v14
-; GFX10-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v7, s0, v15, v7
+; GFX10-NEXT:    v_add_co_u32 v5, s0, v5, v7
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s0
+; GFX10-NEXT:    v_add_co_u32 v9, s0, v9, v10
 ; GFX10-NEXT:    v_add_co_u32 v1, s1, v5, v1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v7, s0, v7, v17
-; GFX10-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s0
+; GFX10-NEXT:    v_add_co_u32 v9, s0, v9, v17
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s0
 ; GFX10-NEXT:    v_add_co_u32 v17, s0, v0, 1
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v18, s0, 0, v2, s0
-; GFX10-NEXT:    v_add_nc_u32_e32 v1, v14, v1
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s9, v13
-; GFX10-NEXT:    v_add_nc_u32_e32 v5, v5, v15
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, v11, v16, s0
-; GFX10-NEXT:    v_add_co_u32 v7, s0, v7, v1
+; GFX10-NEXT:    v_add_nc_u32_e32 v1, v7, v1
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s9, v15
+; GFX10-NEXT:    v_add_nc_u32_e32 v5, v5, v10
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, v16, v13, s0
+; GFX10-NEXT:    v_add_co_u32 v9, s0, v9, v1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v14, s0, v17, 1
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v15, s0, 0, v18, s0
-; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v11
-; GFX10-NEXT:    v_add3_u32 v3, v5, v1, v3
-; GFX10-NEXT:    v_sub_co_u32 v1, s0, v12, s8
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v5, s0, 0, v8, s0
-; GFX10-NEXT:    v_cndmask_b32_e32 v8, v17, v14, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v14, v18, v15, vcc_lo
-; GFX10-NEXT:    v_mul_lo_u32 v15, s3, v7
-; GFX10-NEXT:    v_mul_lo_u32 v16, s2, v3
-; GFX10-NEXT:    v_mul_hi_u32 v17, s2, v7
-; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v10
-; GFX10-NEXT:    v_mul_lo_u32 v10, s2, v7
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v11
-; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v8, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v14, vcc_lo
-; GFX10-NEXT:    v_add3_u32 v8, v15, v16, v17
-; GFX10-NEXT:    v_cndmask_b32_e64 v1, v12, v1, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v13, v5, s0
-; GFX10-NEXT:    v_sub_co_u32 v10, s0, s10, v10
-; GFX10-NEXT:    v_sub_co_ci_u32_e64 v11, s1, s11, v8, s0
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, v6, v1, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc_lo
-; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s11, v8
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s3, v11
+; GFX10-NEXT:    v_add_co_u32 v10, s0, v17, 1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v13, s0, 0, v18, s0
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v7
+; GFX10-NEXT:    v_add3_u32 v4, v5, v1, v4
+; GFX10-NEXT:    v_sub_co_u32 v1, s0, v14, s8
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v5, s0, 0, v11, s0
+; GFX10-NEXT:    v_cndmask_b32_e32 v10, v17, v10, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v11, v18, v13, vcc_lo
+; GFX10-NEXT:    v_mul_lo_u32 v13, s11, v9
+; GFX10-NEXT:    v_mul_lo_u32 v16, s10, v4
+; GFX10-NEXT:    v_mul_hi_u32 v17, s10, v9
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v12
+; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v7
+; GFX10-NEXT:    v_mul_lo_u32 v7, s10, v9
+; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v10, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v1, v14, v1, s0
+; GFX10-NEXT:    v_add3_u32 v10, v13, v16, v17
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, v15, v5, s0
+; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v11, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v7, s0, s14, v7
+; GFX10-NEXT:    v_sub_co_ci_u32_e64 v11, s1, s15, v10, s0
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v1, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v5, v6, v5, vcc_lo
+; GFX10-NEXT:    v_sub_nc_u32_e32 v1, s15, v10
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s11, v11
 ; GFX10-NEXT:    v_xor_b32_e32 v0, s18, v0
 ; GFX10-NEXT:    v_xor_b32_e32 v2, s19, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc_lo
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v8, vcc_lo, s3, v1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v10
+; GFX10-NEXT:    v_xor_b32_e32 v5, s2, v5
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc_lo
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v10, vcc_lo, s11, v1, s0
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s10, v7
 ; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, -1, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v13, vcc_lo, v10, s2
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v14, s0, 0, v8, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v13, vcc_lo, v7, s10
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v14, s0, 0, v10, vcc_lo
 ; GFX10-NEXT:    v_sub_co_u32 v0, s0, v0, s18
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v1, s0, s19, v2, s0
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s3, v11
-; GFX10-NEXT:    v_xor_b32_e32 v2, s12, v6
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v8, vcc_lo, s3, v8, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v12, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s3, v14
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s11, v11
+; GFX10-NEXT:    v_xor_b32_e32 v2, s2, v3
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v10, vcc_lo, s11, v10, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v6, v12, s0
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s11, v14
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s2, v13
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s10, v13
 ; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s0
-; GFX10-NEXT:    v_add_co_u32 v15, s0, v7, 1
-; GFX10-NEXT:    v_add_co_ci_u32_e64 v16, s0, 0, v3, s0
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s3, v14
+; GFX10-NEXT:    v_add_co_u32 v15, s0, v9, 1
+; GFX10-NEXT:    v_add_co_ci_u32_e64 v16, s0, 0, v4, s0
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s11, v14
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s0
 ; GFX10-NEXT:    v_add_co_u32 v12, s0, v15, 1
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v17, s0, 0, v16, s0
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v6
-; GFX10-NEXT:    v_sub_co_u32 v6, s0, v13, s2
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v8, s0, 0, v8, s0
+; GFX10-NEXT:    v_sub_co_u32 v6, s0, v13, s10
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v10, s0, 0, v10, s0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v12, v15, v12, vcc_lo
-; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v5
+; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v15, v16, v17, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v5, v13, v6, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e32 v6, v14, v8, vcc_lo
-; GFX10-NEXT:    v_xor_b32_e32 v8, s12, v4
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v12, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, v10, v5, s0
+; GFX10-NEXT:    v_cndmask_b32_e32 v3, v13, v6, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e32 v6, v14, v10, vcc_lo
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, v9, v12, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v4, v15, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v3, v7, v3, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, v11, v6, s0
-; GFX10-NEXT:    s_xor_b64 s[0:1], s[14:15], s[16:17]
-; GFX10-NEXT:    v_sub_co_u32 v4, vcc_lo, v2, s12
-; GFX10-NEXT:    v_xor_b32_e32 v2, s0, v7
-; GFX10-NEXT:    v_xor_b32_e32 v3, s1, v3
-; GFX10-NEXT:    v_xor_b32_e32 v7, s14, v10
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v5, vcc_lo, s12, v8, vcc_lo
-; GFX10-NEXT:    v_xor_b32_e32 v8, s14, v6
+; GFX10-NEXT:    s_xor_b64 s[0:1], s[12:13], s[16:17]
+; GFX10-NEXT:    v_sub_co_u32 v4, vcc_lo, v2, s2
+; GFX10-NEXT:    v_xor_b32_e32 v2, s0, v9
+; GFX10-NEXT:    v_xor_b32_e32 v7, s1, v10
+; GFX10-NEXT:    v_xor_b32_e32 v9, s12, v3
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v5, vcc_lo, s2, v5, vcc_lo
+; GFX10-NEXT:    v_xor_b32_e32 v10, s12, v6
 ; GFX10-NEXT:    v_sub_co_u32 v2, vcc_lo, v2, s0
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v3, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v6, vcc_lo, v7, s14
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v7, vcc_lo, s14, v8, vcc_lo
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v3, vcc_lo, s1, v7, vcc_lo
+; GFX10-NEXT:    v_sub_co_u32 v6, vcc_lo, v9, s12
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v7, vcc_lo, s12, v10, vcc_lo
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    global_store_dwordx4 v9, v[0:3], s[4:5]
-; GFX10-NEXT:    global_store_dwordx4 v9, v[4:7], s[6:7]
+; GFX10-NEXT:    global_store_dwordx4 v8, v[0:3], s[4:5]
+; GFX10-NEXT:    global_store_dwordx4 v8, v[4:7], s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = sdiv <2 x i64> %x, %y
   store <2 x i64> %div, <2 x i64> addrspace(1)* %out0
@@ -2909,63 +2897,62 @@ define amdgpu_kernel void @sdiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* %
 define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> %x, <2 x i16> %y) {
 ; GFX8-LABEL: sdivrem_v2i16:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dword s0, s[4:5], 0x14
-; GFX8-NEXT:    s_load_dword s8, s[4:5], 0x10
-; GFX8-NEXT:    s_mov_b32 s9, 0x100010
+; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x10
+; GFX8-NEXT:    s_mov_b32 s10, 0x100010
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_sext_i32_i16 s1, s0
-; GFX8-NEXT:    s_ashr_i32 s2, s1, 31
-; GFX8-NEXT:    s_add_i32 s1, s1, s2
-; GFX8-NEXT:    s_xor_b32 s3, s1, s2
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s3
-; GFX8-NEXT:    s_sub_i32 s6, 0, s3
-; GFX8-NEXT:    s_sext_i32_i16 s1, s8
-; GFX8-NEXT:    s_bfe_i32 s0, s0, s9
+; GFX8-NEXT:    s_sext_i32_i16 s0, s3
+; GFX8-NEXT:    s_ashr_i32 s8, s0, 31
+; GFX8-NEXT:    s_add_i32 s0, s0, s8
+; GFX8-NEXT:    s_xor_b32 s9, s0, s8
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s9
+; GFX8-NEXT:    s_sub_i32 s6, 0, s9
+; GFX8-NEXT:    s_sext_i32_i16 s0, s2
+; GFX8-NEXT:    s_bfe_i32 s1, s3, s10
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX8-NEXT:    s_ashr_i32 s10, s1, 31
-; GFX8-NEXT:    s_ashr_i32 s11, s0, 31
-; GFX8-NEXT:    s_add_i32 s1, s1, s10
+; GFX8-NEXT:    s_ashr_i32 s3, s0, 31
+; GFX8-NEXT:    s_ashr_i32 s11, s1, 31
+; GFX8-NEXT:    s_add_i32 s0, s0, s3
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX8-NEXT:    s_add_i32 s0, s0, s11
-; GFX8-NEXT:    s_xor_b32 s1, s1, s10
-; GFX8-NEXT:    s_xor_b32 s12, s0, s11
+; GFX8-NEXT:    s_add_i32 s1, s1, s11
+; GFX8-NEXT:    s_xor_b32 s0, s0, s3
+; GFX8-NEXT:    s_xor_b32 s12, s1, s11
 ; GFX8-NEXT:    v_mul_lo_u32 v1, s6, v0
 ; GFX8-NEXT:    v_cvt_f32_u32_e32 v2, s12
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX8-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
-; GFX8-NEXT:    v_mul_hi_u32 v0, s1, v0
+; GFX8-NEXT:    v_mul_hi_u32 v0, s0, v0
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v1, v2
-; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s3
+; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s9
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
 ; GFX8-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
-; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s1, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
+; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s0, v2
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s9, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s3, v2
+; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s9, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s9, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s3, v2
+; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s9, v2
 ; GFX8-NEXT:    s_sub_i32 s1, 0, s12
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_mul_lo_u32 v3, s1, v1
-; GFX8-NEXT:    s_bfe_i32 s1, s8, s9
-; GFX8-NEXT:    s_xor_b32 s0, s10, s2
+; GFX8-NEXT:    s_bfe_i32 s1, s2, s10
 ; GFX8-NEXT:    s_ashr_i32 s2, s1, 31
-; GFX8-NEXT:    v_mul_hi_u32 v3, v1, v3
 ; GFX8-NEXT:    s_add_i32 s1, s1, s2
+; GFX8-NEXT:    v_mul_hi_u32 v3, v1, v3
 ; GFX8-NEXT:    s_xor_b32 s1, s1, s2
+; GFX8-NEXT:    s_xor_b32 s0, s3, s8
 ; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
 ; GFX8-NEXT:    v_mul_hi_u32 v1, s1, v1
-; GFX8-NEXT:    v_xor_b32_e32 v2, s10, v2
+; GFX8-NEXT:    v_xor_b32_e32 v2, s3, v2
 ; GFX8-NEXT:    v_subrev_u32_e32 v0, vcc, s0, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v1, s12
-; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, s10, v2
+; GFX8-NEXT:    v_subrev_u32_e32 v2, vcc, s3, v2
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s1, v3
 ; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s12, v3
@@ -3000,106 +2987,104 @@ define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16
 ;
 ; GFX9-LABEL: sdivrem_v2i16:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s6, s[4:5], 0x14
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[4:5], 0x10
+; GFX9-NEXT:    s_mov_b32 s10, 0x100010
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_sext_i32_i16 s0, s6
-; GFX9-NEXT:    s_ashr_i32 s7, s0, 31
-; GFX9-NEXT:    s_add_i32 s0, s0, s7
-; GFX9-NEXT:    s_xor_b32 s8, s0, s7
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s8
-; GFX9-NEXT:    s_load_dword s9, s[4:5], 0x10
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
-; GFX9-NEXT:    s_mov_b32 s4, 0x100010
-; GFX9-NEXT:    s_bfe_i32 s6, s6, s4
+; GFX9-NEXT:    s_sext_i32_i16 s0, s7
+; GFX9-NEXT:    s_ashr_i32 s8, s0, 31
+; GFX9-NEXT:    s_add_i32 s0, s0, s8
+; GFX9-NEXT:    s_xor_b32 s9, s0, s8
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s9
+; GFX9-NEXT:    s_bfe_i32 s1, s7, s10
+; GFX9-NEXT:    s_ashr_i32 s7, s1, 31
+; GFX9-NEXT:    s_add_i32 s1, s1, s7
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX9-NEXT:    s_ashr_i32 s10, s6, 31
-; GFX9-NEXT:    s_add_i32 s6, s6, s10
-; GFX9-NEXT:    s_sub_i32 s11, 0, s8
+; GFX9-NEXT:    s_xor_b32 s11, s1, s7
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s11
+; GFX9-NEXT:    s_sub_i32 s1, 0, s9
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX9-NEXT:    s_xor_b32 s6, s6, s10
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v2, s6
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_sext_i32_i16 s5, s9
-; GFX9-NEXT:    v_mul_lo_u32 v1, s11, v0
-; GFX9-NEXT:    s_ashr_i32 s11, s5, 31
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v2, v2
-; GFX9-NEXT:    s_add_i32 s5, s5, s11
-; GFX9-NEXT:    v_mul_hi_u32 v1, v0, v1
-; GFX9-NEXT:    s_xor_b32 s5, s5, s11
-; GFX9-NEXT:    s_bfe_i32 s4, s9, s4
-; GFX9-NEXT:    s_sub_i32 s9, 0, s6
-; GFX9-NEXT:    v_add_u32_e32 v0, v0, v1
-; GFX9-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v2
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; GFX9-NEXT:    s_sext_i32_i16 s0, s6
+; GFX9-NEXT:    s_ashr_i32 s12, s0, 31
+; GFX9-NEXT:    v_mul_lo_u32 v2, s1, v0
+; GFX9-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    v_mul_hi_u32 v0, s5, v0
-; GFX9-NEXT:    s_xor_b32 s7, s11, s7
-; GFX9-NEXT:    v_mul_lo_u32 v3, s9, v1
-; GFX9-NEXT:    v_mul_lo_u32 v2, v0, s8
-; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
-; GFX9-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GFX9-NEXT:    v_sub_u32_e32 v2, s5, v2
+; GFX9-NEXT:    s_add_i32 s0, s0, s12
+; GFX9-NEXT:    v_mul_hi_u32 v2, v0, v2
+; GFX9-NEXT:    s_xor_b32 s13, s0, s12
+; GFX9-NEXT:    s_sub_i32 s0, 0, s11
+; GFX9-NEXT:    v_add_u32_e32 v0, v0, v2
+; GFX9-NEXT:    v_mul_hi_u32 v0, s13, v0
+; GFX9-NEXT:    v_mul_lo_u32 v2, s0, v1
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
+; GFX9-NEXT:    s_bfe_i32 s4, s6, s10
+; GFX9-NEXT:    v_mul_lo_u32 v3, v0, s9
+; GFX9-NEXT:    v_mul_hi_u32 v2, v1, v2
 ; GFX9-NEXT:    s_ashr_i32 s5, s4, 31
 ; GFX9-NEXT:    s_add_i32 s4, s4, s5
+; GFX9-NEXT:    v_sub_u32_e32 v3, s13, v3
+; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s9, v3
 ; GFX9-NEXT:    s_xor_b32 s4, s4, s5
-; GFX9-NEXT:    v_add_u32_e32 v1, v1, v3
-; GFX9-NEXT:    v_mul_hi_u32 v1, s4, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s8, v2
+; GFX9-NEXT:    v_add_u32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v4, s8, v2
-; GFX9-NEXT:    v_mul_lo_u32 v3, v1, s6
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-NEXT:    v_subrev_u32_e32 v4, s9, v3
+; GFX9-NEXT:    v_mul_hi_u32 v1, s4, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s8, v2
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s9, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v4, s8, v2
-; GFX9-NEXT:    v_sub_u32_e32 v3, s4, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-NEXT:    v_subrev_u32_e32 v4, s9, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v3, v1, s11
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX9-NEXT:    s_xor_b32 s6, s12, s8
+; GFX9-NEXT:    v_xor_b32_e32 v0, s6, v0
+; GFX9-NEXT:    v_sub_u32_e32 v3, s4, v3
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s11, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v4, s6, v3
+; GFX9-NEXT:    v_subrev_u32_e32 v4, s11, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s11, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v4, s6, v3
+; GFX9-NEXT:    v_subrev_u32_e32 v4, s11, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX9-NEXT:    s_xor_b32 s4, s5, s10
-; GFX9-NEXT:    v_xor_b32_e32 v0, s7, v0
-; GFX9-NEXT:    v_xor_b32_e32 v2, s11, v2
+; GFX9-NEXT:    s_xor_b32 s4, s5, s7
+; GFX9-NEXT:    v_xor_b32_e32 v2, s12, v2
 ; GFX9-NEXT:    v_xor_b32_e32 v1, s4, v1
 ; GFX9-NEXT:    v_xor_b32_e32 v3, s5, v3
-; GFX9-NEXT:    v_subrev_u32_e32 v0, s7, v0
-; GFX9-NEXT:    v_subrev_u32_e32 v2, s11, v2
+; GFX9-NEXT:    v_subrev_u32_e32 v0, s6, v0
+; GFX9-NEXT:    v_subrev_u32_e32 v2, s12, v2
 ; GFX9-NEXT:    v_sub_u32_sdwa v1, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_sub_u32_sdwa v3, v3, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
 ; GFX9-NEXT:    v_and_or_b32 v0, v0, v4, v1
 ; GFX9-NEXT:    v_and_or_b32 v1, v2, v4, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX9-NEXT:    global_store_dword v2, v1, s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: sdivrem_v2i16:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dword s0, s[4:5], 0x14
-; GFX10-NEXT:    s_mov_b32 s1, 0x100010
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x10
+; GFX10-NEXT:    s_mov_b32 s2, 0x100010
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_sext_i32_i16 s2, s0
-; GFX10-NEXT:    s_bfe_i32 s0, s0, s1
-; GFX10-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX10-NEXT:    s_ashr_i32 s8, s0, 31
-; GFX10-NEXT:    s_add_i32 s2, s2, s3
-; GFX10-NEXT:    s_add_i32 s0, s0, s8
-; GFX10-NEXT:    s_xor_b32 s2, s2, s3
-; GFX10-NEXT:    s_xor_b32 s9, s0, s8
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s2
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s9
-; GFX10-NEXT:    s_load_dword s0, s[4:5], 0x10
-; GFX10-NEXT:    s_sub_i32 s6, 0, s2
-; GFX10-NEXT:    s_sub_i32 s7, 0, s9
+; GFX10-NEXT:    s_sext_i32_i16 s3, s1
+; GFX10-NEXT:    s_bfe_i32 s1, s1, s2
+; GFX10-NEXT:    s_ashr_i32 s8, s3, 31
+; GFX10-NEXT:    s_ashr_i32 s9, s1, 31
+; GFX10-NEXT:    s_add_i32 s3, s3, s8
+; GFX10-NEXT:    s_add_i32 s1, s1, s9
+; GFX10-NEXT:    s_xor_b32 s3, s3, s8
+; GFX10-NEXT:    s_xor_b32 s1, s1, s9
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s3
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s1
+; GFX10-NEXT:    s_sub_i32 s6, 0, s3
+; GFX10-NEXT:    s_sub_i32 s7, 0, s1
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
@@ -3108,56 +3093,55 @@ define amdgpu_kernel void @sdivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX10-NEXT:    v_mul_lo_u32 v2, s6, v0
 ; GFX10-NEXT:    v_mul_lo_u32 v3, s7, v1
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_sext_i32_i16 s6, s0
-; GFX10-NEXT:    s_bfe_i32 s0, s0, s1
-; GFX10-NEXT:    s_ashr_i32 s1, s6, 31
+; GFX10-NEXT:    s_bfe_i32 s0, s0, s2
+; GFX10-NEXT:    s_ashr_i32 s2, s6, 31
 ; GFX10-NEXT:    s_ashr_i32 s10, s0, 31
-; GFX10-NEXT:    s_add_i32 s6, s6, s1
+; GFX10-NEXT:    s_add_i32 s6, s6, s2
 ; GFX10-NEXT:    s_add_i32 s0, s0, s10
 ; GFX10-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; GFX10-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GFX10-NEXT:    s_xor_b32 s6, s6, s1
+; GFX10-NEXT:    s_xor_b32 s6, s6, s2
 ; GFX10-NEXT:    s_xor_b32 s0, s0, s10
 ; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v3
 ; GFX10-NEXT:    v_mul_hi_u32 v0, s6, v0
 ; GFX10-NEXT:    v_mul_hi_u32 v1, s0, v1
-; GFX10-NEXT:    v_mul_lo_u32 v2, v0, s2
-; GFX10-NEXT:    v_mul_lo_u32 v3, v1, s9
+; GFX10-NEXT:    v_mul_lo_u32 v2, v0, s3
+; GFX10-NEXT:    v_mul_lo_u32 v3, v1, s1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 1, v0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, s6, v2
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, s0, v3
 ; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s2, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s9, v3
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s2, v2
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s9, v3
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s3, v2
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s1, v3
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s3, v2
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s1, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, 1, v1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s2, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s9, v3
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s2, v2
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s9, v3
-; GFX10-NEXT:    s_xor_b32 s2, s1, s3
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s3, v2
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s1, v3
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v6, s3, v2
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s1, v3
+; GFX10-NEXT:    s_xor_b32 s1, s2, s8
 ; GFX10-NEXT:    v_cndmask_b32_e64 v0, v0, v4, s0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
-; GFX10-NEXT:    s_xor_b32 s0, s10, s8
-; GFX10-NEXT:    v_xor_b32_e32 v0, s2, v0
+; GFX10-NEXT:    s_xor_b32 s0, s10, s9
+; GFX10-NEXT:    v_xor_b32_e32 v0, s1, v0
 ; GFX10-NEXT:    v_xor_b32_e32 v1, s0, v1
-; GFX10-NEXT:    v_xor_b32_e32 v2, s1, v2
+; GFX10-NEXT:    v_xor_b32_e32 v2, s2, v2
 ; GFX10-NEXT:    v_xor_b32_e32 v3, s10, v3
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v0, s2, v0
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v0, s1, v0
 ; GFX10-NEXT:    v_sub_nc_u32_sdwa v1, v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, s1, v2
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v2, s2, v2
 ; GFX10-NEXT:    v_sub_nc_u32_sdwa v3, v3, s10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX10-NEXT:    v_and_or_b32 v0, v0, v4, v1
 ; GFX10-NEXT:    v_mov_b32_e32 v1, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
index 5bd27e6655054..106ca06a30191 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll
@@ -555,13 +555,12 @@ define amdgpu_kernel void @udivrem_i64(i64 addrspace(1)* %out0, i64 addrspace(1)
 define amdgpu_kernel void @udivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32> addrspace(1)* %out1, <2 x i32> %x, <2 x i32> %y) {
 ; GFX8-LABEL: udivrem_v2i32:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x18
-; GFX8-NEXT:    s_load_dwordx2 s[8:9], s[4:5], 0x10
+; GFX8-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x10
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s2
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s3
-; GFX8-NEXT:    s_sub_i32 s0, 0, s2
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s10
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s11
+; GFX8-NEXT:    s_sub_i32 s0, 0, s10
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
@@ -569,7 +568,7 @@ define amdgpu_kernel void @udivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32
 ; GFX8-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX8-NEXT:    v_mul_lo_u32 v2, s0, v0
-; GFX8-NEXT:    s_sub_i32 s0, 0, s3
+; GFX8-NEXT:    s_sub_i32 s0, 0, s11
 ; GFX8-NEXT:    v_mul_lo_u32 v3, s0, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; GFX8-NEXT:    v_mul_hi_u32 v3, v1, v3
@@ -577,29 +576,29 @@ define amdgpu_kernel void @udivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s8, v0
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
 ; GFX8-NEXT:    v_mul_hi_u32 v1, s9, v1
-; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s2
+; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s10
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
-; GFX8-NEXT:    v_mul_lo_u32 v4, v1, s3
+; GFX8-NEXT:    v_mul_lo_u32 v4, v1, s11
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s8, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s2, v2
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s2, v2
+; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s10, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s2, v2
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s2, v2
+; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s10, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s9, v4
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s3, v3
+; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s3, v3
+; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s11, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v4, s4
 ; GFX8-NEXT:    v_mov_b32_e32 v5, s5
@@ -611,26 +610,24 @@ define amdgpu_kernel void @udivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32
 ;
 ; GFX9-LABEL: udivrem_v2i32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x18
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s2
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s3
-; GFX9-NEXT:    s_sub_i32 s0, 0, s2
-; GFX9-NEXT:    s_sub_i32 s1, 0, s3
+; GFX9-NEXT:    s_sub_i32 s6, 0, s2
+; GFX9-NEXT:    s_sub_i32 s7, 0, s3
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    v_mul_lo_u32 v2, s0, v0
-; GFX9-NEXT:    v_mul_lo_u32 v3, s1, v1
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x10
-; GFX9-NEXT:    v_mul_hi_u32 v2, v0, v2
+; GFX9-NEXT:    v_mul_lo_u32 v2, s6, v0
+; GFX9-NEXT:    v_mul_lo_u32 v3, s7, v1
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX9-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v1, v3
 ; GFX9-NEXT:    v_add_u32_e32 v0, v0, v2
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s0, v0
 ; GFX9-NEXT:    v_add_u32_e32 v1, v1, v3
 ; GFX9-NEXT:    v_mul_hi_u32 v1, s1, v1
@@ -652,41 +649,40 @@ define amdgpu_kernel void @udivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, v3, v5, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s2, v2
+; GFX9-NEXT:    v_add_u32_e32 v5, 1, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX9-NEXT:    v_add_u32_e32 v4, 1, v1
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s3, v3
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_store_dwordx2 v4, v[0:1], s[4:5]
 ; GFX9-NEXT:    global_store_dwordx2 v4, v[2:3], s[6:7]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: udivrem_v2i32:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x18
+; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x10
 ; GFX10-NEXT:    v_mov_b32_e32 v8, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s2
 ; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s3
-; GFX10-NEXT:    s_sub_i32 s0, 0, s2
-; GFX10-NEXT:    s_sub_i32 s1, 0, s3
+; GFX10-NEXT:    s_sub_i32 s6, 0, s2
+; GFX10-NEXT:    s_sub_i32 s7, 0, s3
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX10-NEXT:    v_mul_lo_u32 v2, s0, v0
-; GFX10-NEXT:    v_mul_lo_u32 v3, s1, v1
-; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x10
+; GFX10-NEXT:    v_mul_lo_u32 v2, s6, v0
+; GFX10-NEXT:    v_mul_lo_u32 v3, s7, v1
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX10-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; GFX10-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v3
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mul_hi_u32 v0, s0, v0
 ; GFX10-NEXT:    v_mul_hi_u32 v1, s1, v1
 ; GFX10-NEXT:    v_mul_lo_u32 v2, v0, s2
@@ -713,6 +709,7 @@ define amdgpu_kernel void @udivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v1, v5, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v6, s0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v3, v3, v7, vcc_lo
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_store_dwordx2 v8, v[0:1], s[4:5]
 ; GFX10-NEXT:    global_store_dwordx2 v8, v[2:3], s[6:7]
 ; GFX10-NEXT:    s_endpgm
@@ -726,15 +723,14 @@ define amdgpu_kernel void @udivrem_v2i32(<2 x i32> addrspace(1)* %out0, <2 x i32
 define amdgpu_kernel void @udivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32> addrspace(1)* %out1, <4 x i32> %x, <4 x i32> %y) {
 ; GFX8-LABEL: udivrem_v4i32:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x20
-; GFX8-NEXT:    s_load_dwordx4 s[12:15], s[4:5], 0x10
+; GFX8-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x4f7ffffe
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s8
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s9
-; GFX8-NEXT:    s_sub_i32 s0, 0, s8
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v6, s10
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s12
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s13
+; GFX8-NEXT:    s_sub_i32 s0, 0, s12
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v6, s14
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
@@ -742,78 +738,78 @@ define amdgpu_kernel void @udivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32
 ; GFX8-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX8-NEXT:    v_mul_lo_u32 v3, s0, v0
-; GFX8-NEXT:    s_sub_i32 s0, 0, s9
+; GFX8-NEXT:    s_sub_i32 s0, 0, s13
 ; GFX8-NEXT:    v_mul_lo_u32 v4, s0, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v3, v0, v3
 ; GFX8-NEXT:    v_mul_hi_u32 v4, v1, v4
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v3
-; GFX8-NEXT:    v_mul_hi_u32 v0, s12, v0
+; GFX8-NEXT:    v_mul_hi_u32 v0, s8, v0
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v4
-; GFX8-NEXT:    v_mul_hi_u32 v1, s13, v1
-; GFX8-NEXT:    v_mul_lo_u32 v3, v0, s8
+; GFX8-NEXT:    v_mul_hi_u32 v1, s9, v1
+; GFX8-NEXT:    v_mul_lo_u32 v3, v0, s12
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
-; GFX8-NEXT:    v_mul_lo_u32 v5, v1, s9
-; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s12, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s8, v3
+; GFX8-NEXT:    v_mul_lo_u32 v5, v1, s13
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s8, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s12, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s8, v3
+; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s12, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s8, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s12, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s8, v3
+; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s12, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v4, v3, v4, vcc
-; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s13, v5
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s9, v5
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v5, v6
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s9, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s13, v3
 ; GFX8-NEXT:    v_mul_f32_e32 v5, v5, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GFX8-NEXT:    v_subrev_u32_e64 v6, s[0:1], s9, v3
+; GFX8-NEXT:    v_subrev_u32_e64 v6, s[0:1], s13, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s9, v3
-; GFX8-NEXT:    s_sub_i32 s0, 0, s10
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s13, v3
+; GFX8-NEXT:    s_sub_i32 s0, 0, s14
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v6, s11
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v6, s15
 ; GFX8-NEXT:    v_mul_lo_u32 v7, s0, v5
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v6, v6
 ; GFX8-NEXT:    v_mul_hi_u32 v7, v5, v7
 ; GFX8-NEXT:    v_mul_f32_e32 v2, v6, v2
 ; GFX8-NEXT:    v_add_u32_e64 v5, s[0:1], v5, v7
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX8-NEXT:    v_mul_hi_u32 v7, s14, v5
-; GFX8-NEXT:    v_subrev_u32_e64 v5, s[0:1], s9, v3
-; GFX8-NEXT:    s_sub_i32 s0, 0, s11
+; GFX8-NEXT:    v_mul_hi_u32 v7, s10, v5
+; GFX8-NEXT:    v_subrev_u32_e64 v5, s[0:1], s13, v3
+; GFX8-NEXT:    s_sub_i32 s0, 0, s15
 ; GFX8-NEXT:    v_mul_lo_u32 v6, s0, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v5, v3, v5, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v3, v7, s10
+; GFX8-NEXT:    v_mul_lo_u32 v3, v7, s14
 ; GFX8-NEXT:    v_add_u32_e32 v8, vcc, 1, v7
 ; GFX8-NEXT:    v_mul_hi_u32 v6, v2, v6
-; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s14, v3
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v3
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s10, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s14, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s10, v3
+; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s14, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
-; GFX8-NEXT:    v_mul_hi_u32 v8, s15, v2
+; GFX8-NEXT:    v_mul_hi_u32 v8, s11, v2
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, 1, v7
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s10, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s14, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v7, v2, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v7, v8, s11
-; GFX8-NEXT:    v_subrev_u32_e64 v6, s[0:1], s10, v3
+; GFX8-NEXT:    v_mul_lo_u32 v7, v8, s15
+; GFX8-NEXT:    v_subrev_u32_e64 v6, s[0:1], s14, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v6, v3, v6, vcc
-; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s15, v7
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s11, v7
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, 1, v8
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s11, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s15, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v8, v7, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s11, v3
+; GFX8-NEXT:    v_subrev_u32_e64 v8, s[0:1], s15, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v8, v3, v8, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v7
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s11, v8
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s15, v8
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v7, s[0:1], s11, v8
+; GFX8-NEXT:    v_subrev_u32_e64 v7, s[0:1], s15, v8
 ; GFX8-NEXT:    v_cndmask_b32_e32 v7, v8, v7, vcc
 ; GFX8-NEXT:    v_mov_b32_e32 v9, s5
 ; GFX8-NEXT:    v_mov_b32_e32 v8, s4
@@ -826,120 +822,118 @@ define amdgpu_kernel void @udivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32
 ;
 ; GFX9-LABEL: udivrem_v4i32:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x20
-; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x10
+; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x4f7ffffe
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s0
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s1
-; GFX9-NEXT:    s_sub_i32 s6, 0, s0
-; GFX9-NEXT:    s_sub_i32 s7, 0, s1
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s12
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s13
+; GFX9-NEXT:    s_sub_i32 s0, 0, s12
+; GFX9-NEXT:    s_sub_i32 s1, 0, s13
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v1, v1
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s2
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s14
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, v1, v2
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v5, v5
-; GFX9-NEXT:    v_mul_lo_u32 v3, s6, v0
-; GFX9-NEXT:    v_mul_lo_u32 v4, s7, v1
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX9-NEXT:    v_mul_lo_u32 v3, s0, v0
+; GFX9-NEXT:    v_mul_lo_u32 v4, s1, v1
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v0, v3
+; GFX9-NEXT:    s_sub_i32 s4, 0, s14
 ; GFX9-NEXT:    v_mul_hi_u32 v4, v1, v4
 ; GFX9-NEXT:    v_add_u32_e32 v0, v0, v3
 ; GFX9-NEXT:    v_mul_hi_u32 v0, s8, v0
 ; GFX9-NEXT:    v_add_u32_e32 v1, v1, v4
 ; GFX9-NEXT:    v_mul_hi_u32 v1, s9, v1
 ; GFX9-NEXT:    v_mul_f32_e32 v3, v5, v2
-; GFX9-NEXT:    v_mul_lo_u32 v4, v0, s0
+; GFX9-NEXT:    v_mul_lo_u32 v4, v0, s12
 ; GFX9-NEXT:    v_add_u32_e32 v6, 1, v0
-; GFX9-NEXT:    v_mul_lo_u32 v5, v1, s1
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX9-NEXT:    v_mul_lo_u32 v5, v1, s13
+; GFX9-NEXT:    v_add_u32_e32 v7, 1, v1
 ; GFX9-NEXT:    v_sub_u32_e32 v4, s8, v4
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s0, v4
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s12, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v6, s0, v4
+; GFX9-NEXT:    v_subrev_u32_e32 v6, s12, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v6, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s0, v4
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s12, v4
 ; GFX9-NEXT:    v_sub_u32_e32 v5, s9, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v6, s0, v4
+; GFX9-NEXT:    v_subrev_u32_e32 v6, s12, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX9-NEXT:    v_add_u32_e32 v6, 1, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s1, v5
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v6, s3
-; GFX9-NEXT:    s_sub_i32 s0, 0, s2
-; GFX9-NEXT:    v_mul_lo_u32 v7, s0, v3
-; GFX9-NEXT:    s_sub_i32 s0, 0, s3
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v6
-; GFX9-NEXT:    v_subrev_u32_e32 v8, s1, v5
-; GFX9-NEXT:    v_mul_hi_u32 v7, v3, v7
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s13, v5
+; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v7, vcc
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v7, s15
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX9-NEXT:    v_subrev_u32_e32 v8, s13, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
-; GFX9-NEXT:    v_mul_f32_e32 v2, v6, v2
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
-; GFX9-NEXT:    v_add_u32_e32 v3, v3, v7
-; GFX9-NEXT:    v_mul_hi_u32 v3, s10, v3
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v7, v7
+; GFX9-NEXT:    v_mul_lo_u32 v6, s4, v3
+; GFX9-NEXT:    s_sub_i32 s4, 0, s15
 ; GFX9-NEXT:    v_add_u32_e32 v8, 1, v1
-; GFX9-NEXT:    v_mul_lo_u32 v7, s0, v2
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s1, v5
-; GFX9-NEXT:    v_mul_lo_u32 v6, v3, s2
+; GFX9-NEXT:    v_mul_f32_e32 v2, v7, v2
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX9-NEXT:    v_mul_hi_u32 v6, v3, v6
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s13, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v8, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v7, s4, v2
+; GFX9-NEXT:    v_add_u32_e32 v3, v3, v6
+; GFX9-NEXT:    v_mul_hi_u32 v3, s10, v3
+; GFX9-NEXT:    v_subrev_u32_e32 v8, s13, v5
 ; GFX9-NEXT:    v_mul_hi_u32 v7, v2, v7
-; GFX9-NEXT:    v_subrev_u32_e32 v8, s1, v5
-; GFX9-NEXT:    v_sub_u32_e32 v6, s10, v6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v5, v8, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v6, v3, s14
+; GFX9-NEXT:    v_add_u32_e32 v8, 1, v3
 ; GFX9-NEXT:    v_add_u32_e32 v2, v2, v7
 ; GFX9-NEXT:    v_mul_hi_u32 v7, s11, v2
-; GFX9-NEXT:    v_add_u32_e32 v8, 1, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s2, v6
+; GFX9-NEXT:    v_sub_u32_e32 v6, s10, v6
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s14, v6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v8, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v2, s2, v6
-; GFX9-NEXT:    v_mul_lo_u32 v8, v7, s3
+; GFX9-NEXT:    v_subrev_u32_e32 v2, s14, v6
+; GFX9-NEXT:    v_mul_lo_u32 v8, v7, s15
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v2, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v2, 1, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s2, v6
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s14, v6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v2, v3, v2, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v3, s2, v6
+; GFX9-NEXT:    v_subrev_u32_e32 v3, s14, v6
 ; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v3, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v3, s11, v8
 ; GFX9-NEXT:    v_add_u32_e32 v8, 1, v7
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s15, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v8, s3, v3
+; GFX9-NEXT:    v_subrev_u32_e32 v8, s15, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v8, v3, v8, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v7
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s3, v8
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s15, v8
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v3, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v7, s3, v8
+; GFX9-NEXT:    v_subrev_u32_e32 v7, s15, v8
 ; GFX9-NEXT:    v_cndmask_b32_e32 v7, v8, v7, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v8, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_store_dwordx4 v8, v[0:3], s[4:5]
-; GFX9-NEXT:    global_store_dwordx4 v8, v[4:7], s[6:7]
+; GFX9-NEXT:    global_store_dwordx4 v8, v[0:3], s[0:1]
+; GFX9-NEXT:    global_store_dwordx4 v8, v[4:7], s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: udivrem_v4i32:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x20
-; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x10
+; GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0x4f7ffffe
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX10-NEXT:    v_mov_b32_e32 v8, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s8
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s9
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s10
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v3, s11
-; GFX10-NEXT:    s_sub_i32 s6, 0, s8
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s12
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s13
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s14
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v3, s15
+; GFX10-NEXT:    s_sub_i32 s0, 0, s12
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v3, v3
-; GFX10-NEXT:    s_sub_i32 s7, 0, s9
-; GFX10-NEXT:    s_sub_i32 s12, 0, s10
+; GFX10-NEXT:    s_sub_i32 s1, 0, s13
+; GFX10-NEXT:    s_sub_i32 s2, 0, s14
 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX10-NEXT:    v_mul_f32_e32 v1, v1, v4
 ; GFX10-NEXT:    v_mul_f32_e32 v2, v2, v4
@@ -948,12 +942,11 @@ define amdgpu_kernel void @udivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GFX10-NEXT:    v_mul_lo_u32 v4, s6, v0
-; GFX10-NEXT:    v_mul_lo_u32 v5, s7, v1
-; GFX10-NEXT:    v_mul_lo_u32 v6, s12, v2
-; GFX10-NEXT:    s_sub_i32 s6, 0, s11
-; GFX10-NEXT:    v_mul_lo_u32 v7, s6, v3
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
+; GFX10-NEXT:    v_mul_lo_u32 v4, s0, v0
+; GFX10-NEXT:    v_mul_lo_u32 v5, s1, v1
+; GFX10-NEXT:    v_mul_lo_u32 v6, s2, v2
+; GFX10-NEXT:    s_sub_i32 s0, 0, s15
+; GFX10-NEXT:    v_mul_lo_u32 v7, s0, v3
 ; GFX10-NEXT:    v_mul_hi_u32 v4, v0, v4
 ; GFX10-NEXT:    v_mul_hi_u32 v5, v1, v5
 ; GFX10-NEXT:    v_mul_hi_u32 v6, v2, v6
@@ -962,34 +955,34 @@ define amdgpu_kernel void @udivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v5
 ; GFX10-NEXT:    v_add_nc_u32_e32 v2, v2, v6
 ; GFX10-NEXT:    v_add_nc_u32_e32 v3, v3, v7
-; GFX10-NEXT:    v_mul_hi_u32 v0, s0, v0
-; GFX10-NEXT:    v_mul_hi_u32 v1, s1, v1
-; GFX10-NEXT:    v_mul_hi_u32 v2, s2, v2
-; GFX10-NEXT:    v_mul_hi_u32 v3, s3, v3
-; GFX10-NEXT:    v_mul_lo_u32 v4, v0, s8
-; GFX10-NEXT:    v_mul_lo_u32 v5, v1, s9
-; GFX10-NEXT:    v_mul_lo_u32 v6, v2, s10
+; GFX10-NEXT:    v_mul_hi_u32 v0, s8, v0
+; GFX10-NEXT:    v_mul_hi_u32 v1, s9, v1
+; GFX10-NEXT:    v_mul_hi_u32 v2, s10, v2
+; GFX10-NEXT:    v_mul_hi_u32 v3, s11, v3
+; GFX10-NEXT:    v_mul_lo_u32 v4, v0, s12
+; GFX10-NEXT:    v_mul_lo_u32 v5, v1, s13
+; GFX10-NEXT:    v_mul_lo_u32 v6, v2, s14
 ; GFX10-NEXT:    v_add_nc_u32_e32 v9, 1, v0
-; GFX10-NEXT:    v_mul_lo_u32 v7, v3, s11
+; GFX10-NEXT:    v_mul_lo_u32 v7, v3, s15
 ; GFX10-NEXT:    v_add_nc_u32_e32 v10, 1, v1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v11, 1, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v12, 1, v3
-; GFX10-NEXT:    v_sub_nc_u32_e32 v4, s0, v4
-; GFX10-NEXT:    v_sub_nc_u32_e32 v5, s1, v5
-; GFX10-NEXT:    v_sub_nc_u32_e32 v6, s2, v6
-; GFX10-NEXT:    v_sub_nc_u32_e32 v7, s3, v7
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s8, v4
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s9, v5
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s10, v6
-; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s11, v7
+; GFX10-NEXT:    v_sub_nc_u32_e32 v4, s8, v4
+; GFX10-NEXT:    v_sub_nc_u32_e32 v5, s9, v5
+; GFX10-NEXT:    v_sub_nc_u32_e32 v6, s10, v6
+; GFX10-NEXT:    v_sub_nc_u32_e32 v7, s11, v7
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s12, v4
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s13, v5
+; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s14, v6
+; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s15, v7
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v9, s8, v4
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v9, s12, v4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v10, s0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, s9, v5
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, s13, v5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v11, s1
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v11, s10, v6
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v11, s14, v6
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v12, s2
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v12, s11, v7
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v12, s15, v7
 ; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v10, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v11, s1
@@ -997,24 +990,23 @@ define amdgpu_kernel void @udivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32
 ; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v12, s2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v10, 1, v1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v11, 1, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s8, v4
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s9, v5
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s10, v6
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s12, v4
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s13, v5
+; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s14, v6
 ; GFX10-NEXT:    v_add_nc_u32_e32 v12, 1, v3
-; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s11, v7
+; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s15, v7
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc_lo
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v9, s8, v4
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v9, s12, v4
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v10, s0
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, s9, v5
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v10, s13, v5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v2, v2, v11, s1
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v11, s10, v6
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v13, s11, v7
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v11, s14, v6
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v13, s15, v7
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v12, s2
 ; GFX10-NEXT:    v_cndmask_b32_e32 v4, v4, v9, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v10, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, v6, v11, s1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v7, v7, v13, s2
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    global_store_dwordx4 v8, v[0:3], s[4:5]
 ; GFX10-NEXT:    global_store_dwordx4 v8, v[4:7], s[6:7]
 ; GFX10-NEXT:    s_endpgm
@@ -1028,20 +1020,19 @@ define amdgpu_kernel void @udivrem_v4i32(<4 x i32> addrspace(1)* %out0, <4 x i32
 define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64> addrspace(1)* %out1, <2 x i64> %x, <2 x i64> %y) {
 ; GFX8-LABEL: udivrem_v2i64:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x20
-; GFX8-NEXT:    s_load_dwordx4 s[12:15], s[4:5], 0x10
+; GFX8-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s9
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s8
-; GFX8-NEXT:    s_sub_u32 s0, 0, s8
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s13
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s12
+; GFX8-NEXT:    s_sub_u32 s0, 0, s12
 ; GFX8-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX8-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX8-NEXT:    s_and_b32 s1, s1, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s1, 0
-; GFX8-NEXT:    s_subb_u32 s1, 0, s9
+; GFX8-NEXT:    s_subb_u32 s1, 0, s13
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX8-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX8-NEXT:    v_trunc_f32_e32 v1, v1
@@ -1049,7 +1040,7 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_add_f32_e32 v0, v2, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX8-NEXT:    s_sub_u32 s2, 0, s10
+; GFX8-NEXT:    s_sub_u32 s2, 0, s14
 ; GFX8-NEXT:    v_mul_lo_u32 v2, s0, v1
 ; GFX8-NEXT:    v_mul_lo_u32 v3, s1, v0
 ; GFX8-NEXT:    v_mul_hi_u32 v5, s0, v0
@@ -1059,15 +1050,15 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v1, v4
 ; GFX8-NEXT:    v_mul_lo_u32 v5, v0, v2
 ; GFX8-NEXT:    v_mul_hi_u32 v6, v0, v4
+; GFX8-NEXT:    v_mul_lo_u32 v7, v1, v2
 ; GFX8-NEXT:    v_mul_hi_u32 v4, v1, v4
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v5
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v6
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v6, v1, v2
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v5, v3
 ; GFX8-NEXT:    v_mul_hi_u32 v5, v0, v2
-; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v6, v4
+; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v7, v4
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, v4, v5
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
@@ -1083,7 +1074,7 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_mul_lo_u32 v3, s0, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v5, s0, v0
 ; GFX8-NEXT:    v_mul_lo_u32 v4, s0, v0
-; GFX8-NEXT:    v_mov_b32_e32 v6, s9
+; GFX8-NEXT:    v_mov_b32_e32 v6, s13
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
 ; GFX8-NEXT:    v_mul_lo_u32 v3, v1, v4
@@ -1109,64 +1100,64 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v4
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v3
 ; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v1, v2, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v2, s13, v0
-; GFX8-NEXT:    v_mul_lo_u32 v3, s12, v1
-; GFX8-NEXT:    v_mul_hi_u32 v5, s12, v0
-; GFX8-NEXT:    v_mul_hi_u32 v0, s13, v0
-; GFX8-NEXT:    v_mov_b32_e32 v4, s13
+; GFX8-NEXT:    v_mul_lo_u32 v2, s9, v0
+; GFX8-NEXT:    v_mul_lo_u32 v3, s8, v1
+; GFX8-NEXT:    v_mul_hi_u32 v5, s8, v0
+; GFX8-NEXT:    v_mul_hi_u32 v0, s9, v0
+; GFX8-NEXT:    v_mov_b32_e32 v4, s9
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v5
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v5, s13, v1
+; GFX8-NEXT:    v_mul_lo_u32 v5, s9, v1
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
-; GFX8-NEXT:    v_mul_hi_u32 v3, s12, v1
+; GFX8-NEXT:    v_mul_hi_u32 v3, s8, v1
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v5, v0
 ; GFX8-NEXT:    v_cndmask_b32_e64 v5, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v3
 ; GFX8-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v5, v3
-; GFX8-NEXT:    v_mul_hi_u32 v1, s13, v1
+; GFX8-NEXT:    v_mul_hi_u32 v1, s9, v1
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; GFX8-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v3, v2
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v2
-; GFX8-NEXT:    v_mul_lo_u32 v2, s9, v0
-; GFX8-NEXT:    v_mul_lo_u32 v3, s8, v1
-; GFX8-NEXT:    v_mul_hi_u32 v7, s8, v0
-; GFX8-NEXT:    v_mul_lo_u32 v5, s8, v0
+; GFX8-NEXT:    v_mul_lo_u32 v2, s13, v0
+; GFX8-NEXT:    v_mul_lo_u32 v3, s12, v1
+; GFX8-NEXT:    v_mul_hi_u32 v7, s12, v0
+; GFX8-NEXT:    v_mul_lo_u32 v5, s12, v0
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v3
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v7
-; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s12, v5
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s8, v5
 ; GFX8-NEXT:    v_subb_u32_e64 v5, s[0:1], v4, v2, vcc
-; GFX8-NEXT:    v_sub_u32_e64 v2, s[0:1], s13, v2
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v5
+; GFX8-NEXT:    v_sub_u32_e64 v2, s[0:1], s9, v2
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v5
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v3
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v3
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v5
+; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v5
 ; GFX8-NEXT:    v_subb_u32_e32 v2, vcc, v2, v6, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[0:1]
-; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, s8, v3
+; GFX8-NEXT:    v_subrev_u32_e32 v7, vcc, s12, v3
 ; GFX8-NEXT:    v_subbrev_u32_e64 v8, s[0:1], 0, v2, vcc
 ; GFX8-NEXT:    v_add_u32_e64 v9, s[0:1], 1, v0
 ; GFX8-NEXT:    v_addc_u32_e64 v10, s[0:1], 0, v1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v8
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v8
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v7
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v7
 ; GFX8-NEXT:    v_subb_u32_e32 v2, vcc, v2, v6, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v8
-; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s8, v7
+; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v8
+; GFX8-NEXT:    v_subrev_u32_e32 v6, vcc, s12, v7
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[0:1]
 ; GFX8-NEXT:    v_add_u32_e64 v12, s[0:1], 1, v9
 ; GFX8-NEXT:    v_subbrev_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX8-NEXT:    v_addc_u32_e64 v13, s[0:1], 0, v10, s[0:1]
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v11
 ; GFX8-NEXT:    v_cndmask_b32_e32 v10, v10, v13, vcc
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v13, s11
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v13, s15
 ; GFX8-NEXT:    v_cndmask_b32_e32 v9, v9, v12, vcc
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v12, s10
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v12, s14
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
 ; GFX8-NEXT:    v_mul_f32_e32 v4, 0x4f800000, v13
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v9, vcc
@@ -1186,7 +1177,7 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX8-NEXT:    s_and_b32 s0, s0, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX8-NEXT:    s_subb_u32 s3, 0, s11
+; GFX8-NEXT:    s_subb_u32 s3, 0, s15
 ; GFX8-NEXT:    v_mul_lo_u32 v7, s3, v3
 ; GFX8-NEXT:    v_mul_lo_u32 v8, s2, v6
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v10, vcc
@@ -1222,7 +1213,7 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_mul_lo_u32 v7, s2, v3
 ; GFX8-NEXT:    v_mul_hi_u32 v9, s2, v2
 ; GFX8-NEXT:    v_mul_lo_u32 v8, s2, v2
-; GFX8-NEXT:    v_mov_b32_e32 v10, s11
+; GFX8-NEXT:    v_mov_b32_e32 v10, s15
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v7
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v9
 ; GFX8-NEXT:    v_mul_lo_u32 v7, v3, v8
@@ -1248,51 +1239,51 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v8
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v7
 ; GFX8-NEXT:    v_addc_u32_e32 v3, vcc, v3, v6, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v6, s15, v2
-; GFX8-NEXT:    v_mul_lo_u32 v7, s14, v3
-; GFX8-NEXT:    v_mul_hi_u32 v9, s14, v2
-; GFX8-NEXT:    v_mul_hi_u32 v2, s15, v2
-; GFX8-NEXT:    v_mov_b32_e32 v8, s15
+; GFX8-NEXT:    v_mul_lo_u32 v6, s11, v2
+; GFX8-NEXT:    v_mul_lo_u32 v7, s10, v3
+; GFX8-NEXT:    v_mul_hi_u32 v9, s10, v2
+; GFX8-NEXT:    v_mul_hi_u32 v2, s11, v2
+; GFX8-NEXT:    v_mov_b32_e32 v8, s11
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v7
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v9
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v9, s15, v3
+; GFX8-NEXT:    v_mul_lo_u32 v9, s11, v3
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v7, v6
-; GFX8-NEXT:    v_mul_hi_u32 v7, s14, v3
+; GFX8-NEXT:    v_mul_hi_u32 v7, s10, v3
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v9, v2
 ; GFX8-NEXT:    v_cndmask_b32_e64 v9, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v7
 ; GFX8-NEXT:    v_cndmask_b32_e64 v7, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v7, vcc, v9, v7
-; GFX8-NEXT:    v_mul_hi_u32 v3, s15, v3
+; GFX8-NEXT:    v_mul_hi_u32 v3, s11, v3
 ; GFX8-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
 ; GFX8-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v7, v6
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, v3, v6
-; GFX8-NEXT:    v_mul_lo_u32 v6, s11, v2
-; GFX8-NEXT:    v_mul_lo_u32 v7, s10, v3
-; GFX8-NEXT:    v_mul_hi_u32 v11, s10, v2
-; GFX8-NEXT:    v_mul_lo_u32 v9, s10, v2
+; GFX8-NEXT:    v_mul_lo_u32 v6, s15, v2
+; GFX8-NEXT:    v_mul_lo_u32 v7, s14, v3
+; GFX8-NEXT:    v_mul_hi_u32 v11, s14, v2
+; GFX8-NEXT:    v_mul_lo_u32 v9, s14, v2
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v7
 ; GFX8-NEXT:    v_add_u32_e32 v6, vcc, v6, v11
-; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, s14, v9
+; GFX8-NEXT:    v_sub_u32_e32 v7, vcc, s10, v9
 ; GFX8-NEXT:    v_subb_u32_e64 v8, s[0:1], v8, v6, vcc
-; GFX8-NEXT:    v_sub_u32_e64 v6, s[0:1], s15, v6
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v8
+; GFX8-NEXT:    v_sub_u32_e64 v6, s[0:1], s11, v6
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s15, v8
 ; GFX8-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v7
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v7
 ; GFX8-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v8
+; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s15, v8
 ; GFX8-NEXT:    v_subb_u32_e32 v6, vcc, v6, v10, vcc
 ; GFX8-NEXT:    v_cndmask_b32_e64 v9, v9, v11, s[0:1]
-; GFX8-NEXT:    v_subrev_u32_e32 v11, vcc, s10, v7
+; GFX8-NEXT:    v_subrev_u32_e32 v11, vcc, s14, v7
 ; GFX8-NEXT:    v_subbrev_u32_e64 v12, s[0:1], 0, v6, vcc
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v12
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s15, v12
 ; GFX8-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v11
+; GFX8-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v11
 ; GFX8-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[0:1]
-; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v12
+; GFX8-NEXT:    v_cmp_eq_u32_e64 s[0:1], s15, v12
 ; GFX8-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[0:1]
 ; GFX8-NEXT:    v_add_u32_e64 v14, s[0:1], 1, v2
 ; GFX8-NEXT:    v_subb_u32_e32 v6, vcc, v6, v10, vcc
@@ -1300,7 +1291,7 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX8-NEXT:    v_add_u32_e32 v10, vcc, 1, v14
 ; GFX8-NEXT:    v_addc_u32_e32 v16, vcc, 0, v15, vcc
 ; GFX8-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
-; GFX8-NEXT:    v_subrev_u32_e64 v13, s[0:1], s10, v11
+; GFX8-NEXT:    v_subrev_u32_e64 v13, s[0:1], s14, v11
 ; GFX8-NEXT:    v_subbrev_u32_e64 v6, s[0:1], 0, v6, s[0:1]
 ; GFX8-NEXT:    v_cndmask_b32_e32 v10, v14, v10, vcc
 ; GFX8-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v9
@@ -1322,19 +1313,18 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ;
 ; GFX9-LABEL: udivrem_v2i64:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x20
-; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[4:5], 0x10
+; GFX9-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s9
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s8
-; GFX9-NEXT:    s_sub_u32 s0, 0, s8
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s13
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s12
+; GFX9-NEXT:    s_sub_u32 s0, 0, s12
 ; GFX9-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX9-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX9-NEXT:    s_and_b32 s1, s1, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s1, 0
-; GFX9-NEXT:    s_subb_u32 s1, 0, s9
+; GFX9-NEXT:    s_subb_u32 s1, 0, s13
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
@@ -1342,8 +1332,8 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_add_f32_e32 v0, v2, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v14, s11
-; GFX9-NEXT:    s_sub_u32 s2, 0, s10
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v14, s15
+; GFX9-NEXT:    s_sub_u32 s2, 0, s14
 ; GFX9-NEXT:    v_mul_lo_u32 v2, s0, v1
 ; GFX9-NEXT:    v_mul_lo_u32 v3, s1, v0
 ; GFX9-NEXT:    v_mul_hi_u32 v4, s0, v0
@@ -1377,7 +1367,7 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_mul_lo_u32 v3, s0, v1
 ; GFX9-NEXT:    v_mul_hi_u32 v4, s0, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v5, s0, v0
-; GFX9-NEXT:    v_mov_b32_e32 v7, s13
+; GFX9-NEXT:    v_mov_b32_e32 v7, s9
 ; GFX9-NEXT:    v_add3_u32 v2, v2, v3, v4
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v1, v5
 ; GFX9-NEXT:    v_mul_lo_u32 v4, v0, v2
@@ -1401,19 +1391,19 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_add3_u32 v2, v5, v4, v2
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v3
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v2, s13, v0
-; GFX9-NEXT:    v_mul_lo_u32 v3, s12, v1
-; GFX9-NEXT:    v_mul_hi_u32 v4, s12, v0
-; GFX9-NEXT:    v_mul_hi_u32 v0, s13, v0
-; GFX9-NEXT:    v_mov_b32_e32 v5, s9
+; GFX9-NEXT:    v_mul_lo_u32 v2, s9, v0
+; GFX9-NEXT:    v_mul_lo_u32 v3, s8, v1
+; GFX9-NEXT:    v_mul_hi_u32 v4, s8, v0
+; GFX9-NEXT:    v_mul_hi_u32 v0, s9, v0
+; GFX9-NEXT:    v_mov_b32_e32 v5, s13
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v3
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v4
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v4, s13, v1
+; GFX9-NEXT:    v_mul_lo_u32 v4, s9, v1
 ; GFX9-NEXT:    v_add_u32_e32 v2, v3, v2
-; GFX9-NEXT:    v_mul_hi_u32 v3, s12, v1
-; GFX9-NEXT:    v_mul_hi_u32 v1, s13, v1
+; GFX9-NEXT:    v_mul_hi_u32 v3, s8, v1
+; GFX9-NEXT:    v_mul_hi_u32 v1, s9, v1
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v4, v0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, 1, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v3
@@ -1422,35 +1412,35 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_add_u32_e32 v3, v4, v3
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
 ; GFX9-NEXT:    v_add3_u32 v1, v3, v2, v1
-; GFX9-NEXT:    v_mul_lo_u32 v2, s9, v0
-; GFX9-NEXT:    v_mul_lo_u32 v3, s8, v1
-; GFX9-NEXT:    v_mul_hi_u32 v4, s8, v0
-; GFX9-NEXT:    v_mul_lo_u32 v6, s8, v0
+; GFX9-NEXT:    v_mul_lo_u32 v2, s13, v0
+; GFX9-NEXT:    v_mul_lo_u32 v3, s12, v1
+; GFX9-NEXT:    v_mul_hi_u32 v4, s12, v0
+; GFX9-NEXT:    v_mul_lo_u32 v6, s12, v0
 ; GFX9-NEXT:    v_add3_u32 v2, v2, v3, v4
-; GFX9-NEXT:    v_sub_co_u32_e32 v3, vcc, s12, v6
+; GFX9-NEXT:    v_sub_co_u32_e32 v3, vcc, s8, v6
 ; GFX9-NEXT:    v_subb_co_u32_e64 v6, s[0:1], v7, v2, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v6
-; GFX9-NEXT:    v_sub_u32_e32 v2, s13, v2
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v6
+; GFX9-NEXT:    v_sub_u32_e32 v2, s9, v2
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v3
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v3
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v6
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v6
 ; GFX9-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v5, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[0:1]
-; GFX9-NEXT:    v_subrev_co_u32_e32 v7, vcc, s8, v3
+; GFX9-NEXT:    v_subrev_co_u32_e32 v7, vcc, s12, v3
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v8, s[0:1], 0, v2, vcc
 ; GFX9-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v5, vcc
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s10
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s14
 ; GFX9-NEXT:    v_add_co_u32_e64 v9, s[0:1], 1, v0
 ; GFX9-NEXT:    v_addc_co_u32_e64 v10, s[0:1], 0, v1, s[0:1]
 ; GFX9-NEXT:    v_add_f32_e32 v5, v14, v5
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v8
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v8
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v5, v5
 ; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v7
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v8
-; GFX9-NEXT:    v_subrev_co_u32_e32 v15, vcc, s8, v7
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v8
+; GFX9-NEXT:    v_subrev_co_u32_e32 v15, vcc, s12, v7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v11, v11, v12, s[0:1]
 ; GFX9-NEXT:    v_subbrev_co_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX9-NEXT:    v_add_co_u32_e64 v12, s[0:1], 1, v9
@@ -1468,7 +1458,7 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX9-NEXT:    s_and_b32 s0, s0, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX9-NEXT:    s_subb_u32 s3, 0, s11
+; GFX9-NEXT:    s_subb_u32 s3, 0, s15
 ; GFX9-NEXT:    v_mul_lo_u32 v13, s3, v5
 ; GFX9-NEXT:    v_mul_lo_u32 v14, s2, v12
 ; GFX9-NEXT:    v_mul_hi_u32 v16, s2, v5
@@ -1530,19 +1520,19 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_add3_u32 v7, v10, v7, v8
 ; GFX9-NEXT:    v_add_co_u32_e64 v3, s[0:1], v5, v3
 ; GFX9-NEXT:    v_addc_co_u32_e64 v7, s[0:1], v9, v7, s[0:1]
-; GFX9-NEXT:    v_mul_lo_u32 v8, s15, v3
-; GFX9-NEXT:    v_mul_lo_u32 v9, s14, v7
+; GFX9-NEXT:    v_mul_lo_u32 v8, s11, v3
+; GFX9-NEXT:    v_mul_lo_u32 v9, s10, v7
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v6, v2, vcc
-; GFX9-NEXT:    v_mul_hi_u32 v2, s14, v3
-; GFX9-NEXT:    v_mul_hi_u32 v3, s15, v3
+; GFX9-NEXT:    v_mul_hi_u32 v2, s10, v3
+; GFX9-NEXT:    v_mul_hi_u32 v3, s11, v3
 ; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v8, v9
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, 1, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v6, v2
 ; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, 1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v6, s15, v7
+; GFX9-NEXT:    v_mul_lo_u32 v6, s11, v7
 ; GFX9-NEXT:    v_add_u32_e32 v2, v8, v2
-; GFX9-NEXT:    v_mul_hi_u32 v8, s14, v7
-; GFX9-NEXT:    v_mul_hi_u32 v7, s15, v7
+; GFX9-NEXT:    v_mul_hi_u32 v8, s10, v7
+; GFX9-NEXT:    v_mul_hi_u32 v7, s11, v7
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v6, v3
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, 1, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v8
@@ -1551,30 +1541,30 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_add_u32_e32 v6, v6, v8
 ; GFX9-NEXT:    v_cndmask_b32_e64 v3, 0, 1, vcc
 ; GFX9-NEXT:    v_add3_u32 v3, v6, v3, v7
-; GFX9-NEXT:    v_mul_lo_u32 v6, s11, v2
-; GFX9-NEXT:    v_mul_lo_u32 v7, s10, v3
-; GFX9-NEXT:    v_mul_hi_u32 v8, s10, v2
-; GFX9-NEXT:    v_mul_lo_u32 v10, s10, v2
-; GFX9-NEXT:    v_mov_b32_e32 v11, s15
-; GFX9-NEXT:    v_mov_b32_e32 v9, s11
+; GFX9-NEXT:    v_mul_lo_u32 v6, s15, v2
+; GFX9-NEXT:    v_mul_lo_u32 v7, s14, v3
+; GFX9-NEXT:    v_mul_hi_u32 v8, s14, v2
+; GFX9-NEXT:    v_mul_lo_u32 v10, s14, v2
+; GFX9-NEXT:    v_mov_b32_e32 v11, s11
+; GFX9-NEXT:    v_mov_b32_e32 v9, s15
 ; GFX9-NEXT:    v_add3_u32 v6, v6, v7, v8
-; GFX9-NEXT:    v_sub_co_u32_e32 v7, vcc, s14, v10
+; GFX9-NEXT:    v_sub_co_u32_e32 v7, vcc, s10, v10
 ; GFX9-NEXT:    v_subb_co_u32_e64 v8, s[0:1], v11, v6, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v8
-; GFX9-NEXT:    v_sub_u32_e32 v6, s15, v6
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s15, v8
+; GFX9-NEXT:    v_sub_u32_e32 v6, s11, v6
 ; GFX9-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v7
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v8
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s15, v8
 ; GFX9-NEXT:    v_subb_co_u32_e32 v6, vcc, v6, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e64 v10, v10, v11, s[0:1]
-; GFX9-NEXT:    v_subrev_co_u32_e32 v11, vcc, s10, v7
+; GFX9-NEXT:    v_subrev_co_u32_e32 v11, vcc, s14, v7
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v12, s[0:1], 0, v6, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v12
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s15, v12
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v11
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s14, v11
 ; GFX9-NEXT:    v_cndmask_b32_e64 v14, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v12
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s15, v12
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, v13, v14, s[0:1]
 ; GFX9-NEXT:    v_add_co_u32_e64 v14, s[0:1], 1, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v6, vcc, v6, v9, vcc
@@ -1584,7 +1574,7 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v13
 ; GFX9-NEXT:    v_cndmask_b32_e32 v9, v14, v9, vcc
 ; GFX9-NEXT:    v_cndmask_b32_e32 v14, v15, v16, vcc
-; GFX9-NEXT:    v_subrev_co_u32_e64 v15, s[0:1], s10, v11
+; GFX9-NEXT:    v_subrev_co_u32_e64 v15, s[0:1], s14, v11
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v6, s[0:1], 0, v6, s[0:1]
 ; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v10
 ; GFX9-NEXT:    v_mov_b32_e32 v13, 0
@@ -1601,28 +1591,28 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ;
 ; GFX10-LABEL: udivrem_v2i64:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x20
+; GFX10-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x10
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s9
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s11
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s8
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v3, s10
-; GFX10-NEXT:    s_sub_u32 s6, 0, s8
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s13
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v2, s15
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s12
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v3, s14
+; GFX10-NEXT:    s_sub_u32 s0, 0, s12
 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f800000, v0
 ; GFX10-NEXT:    v_mul_f32_e32 v2, 0x4f800000, v2
-; GFX10-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX10-NEXT:    s_and_b32 s0, s0, 1
+; GFX10-NEXT:    s_cselect_b32 s1, 1, 0
+; GFX10-NEXT:    s_and_b32 s1, s1, 1
 ; GFX10-NEXT:    v_add_f32_e32 v0, v0, v1
 ; GFX10-NEXT:    v_add_f32_e32 v1, v2, v3
-; GFX10-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10-NEXT:    s_subb_u32 s7, 0, s9
+; GFX10-NEXT:    s_cmp_lg_u32 s1, 0
+; GFX10-NEXT:    s_subb_u32 s1, 0, s13
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v1, v1
-; GFX10-NEXT:    s_sub_u32 s12, 0, s10
-; GFX10-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX10-NEXT:    s_and_b32 s0, s0, 1
-; GFX10-NEXT:    s_cmp_lg_u32 s0, 0
-; GFX10-NEXT:    s_subb_u32 s13, 0, s11
+; GFX10-NEXT:    s_sub_u32 s2, 0, s14
+; GFX10-NEXT:    s_cselect_b32 s3, 1, 0
+; GFX10-NEXT:    s_and_b32 s3, s3, 1
+; GFX10-NEXT:    s_cmp_lg_u32 s3, 0
+; GFX10-NEXT:    s_subb_u32 s3, 0, s15
 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX10-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
 ; GFX10-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v0
@@ -1635,16 +1625,16 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GFX10-NEXT:    v_add_f32_e32 v0, v4, v0
 ; GFX10-NEXT:    v_add_f32_e32 v1, v5, v1
-; GFX10-NEXT:    v_mul_lo_u32 v4, s6, v2
-; GFX10-NEXT:    v_mul_lo_u32 v8, s12, v3
+; GFX10-NEXT:    v_mul_lo_u32 v4, s0, v2
+; GFX10-NEXT:    v_mul_lo_u32 v8, s2, v3
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX10-NEXT:    v_mul_lo_u32 v5, s7, v0
-; GFX10-NEXT:    v_mul_hi_u32 v6, s6, v0
-; GFX10-NEXT:    v_mul_lo_u32 v9, s13, v1
-; GFX10-NEXT:    v_mul_hi_u32 v10, s12, v1
-; GFX10-NEXT:    v_mul_lo_u32 v7, s6, v0
-; GFX10-NEXT:    v_mul_lo_u32 v11, s12, v1
+; GFX10-NEXT:    v_mul_lo_u32 v5, s1, v0
+; GFX10-NEXT:    v_mul_hi_u32 v6, s0, v0
+; GFX10-NEXT:    v_mul_lo_u32 v9, s3, v1
+; GFX10-NEXT:    v_mul_hi_u32 v10, s2, v1
+; GFX10-NEXT:    v_mul_lo_u32 v7, s0, v0
+; GFX10-NEXT:    v_mul_lo_u32 v11, s2, v1
 ; GFX10-NEXT:    v_add3_u32 v4, v5, v4, v6
 ; GFX10-NEXT:    v_add3_u32 v8, v9, v8, v10
 ; GFX10-NEXT:    v_mul_lo_u32 v5, v2, v7
@@ -1657,49 +1647,49 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_mul_hi_u32 v10, v1, v11
 ; GFX10-NEXT:    v_mul_hi_u32 v11, v3, v11
 ; GFX10-NEXT:    v_mul_lo_u32 v16, v3, v8
-; GFX10-NEXT:    v_add_co_u32 v5, s0, v5, v12
+; GFX10-NEXT:    v_add_co_u32 v5, s6, v5, v12
 ; GFX10-NEXT:    v_mul_hi_u32 v14, v0, v4
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v7, s0, v13, v7
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v9, s0, v9, v15
-; GFX10-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v11, s0, v16, v11
-; GFX10-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v5, s0, v5, v6
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s6
+; GFX10-NEXT:    v_add_co_u32 v7, s6, v13, v7
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s6
+; GFX10-NEXT:    v_add_co_u32 v9, s6, v9, v15
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s6
+; GFX10-NEXT:    v_add_co_u32 v11, s6, v16, v11
+; GFX10-NEXT:    v_cndmask_b32_e64 v16, 0, 1, s6
+; GFX10-NEXT:    v_add_co_u32 v5, s6, v5, v6
 ; GFX10-NEXT:    v_mul_hi_u32 v17, v1, v8
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v6, s0, v7, v14
-; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v9, s0, v9, v10
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s6
+; GFX10-NEXT:    v_add_co_u32 v6, s6, v7, v14
+; GFX10-NEXT:    v_cndmask_b32_e64 v7, 0, 1, s6
+; GFX10-NEXT:    v_add_co_u32 v9, s6, v9, v10
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s6
 ; GFX10-NEXT:    v_add_nc_u32_e32 v5, v12, v5
-; GFX10-NEXT:    v_add_co_u32 v10, s0, v11, v17
+; GFX10-NEXT:    v_add_co_u32 v10, s6, v11, v17
 ; GFX10-NEXT:    v_mul_hi_u32 v4, v2, v4
 ; GFX10-NEXT:    v_add_nc_u32_e32 v9, v15, v9
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s0
-; GFX10-NEXT:    v_add_co_u32 v5, s0, v6, v5
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s6
+; GFX10-NEXT:    v_add_co_u32 v5, s6, v6, v5
 ; GFX10-NEXT:    v_add_nc_u32_e32 v7, v13, v7
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s6
 ; GFX10-NEXT:    v_mul_hi_u32 v8, v3, v8
-; GFX10-NEXT:    v_add_co_u32 v9, s0, v10, v9
+; GFX10-NEXT:    v_add_co_u32 v9, s6, v10, v9
 ; GFX10-NEXT:    v_add_nc_u32_e32 v11, v16, v11
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s6
 ; GFX10-NEXT:    v_add3_u32 v4, v7, v6, v4
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v5
-; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x10
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX10-NEXT:    v_add3_u32 v5, v11, v10, v8
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v2, v4, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v1, vcc_lo, v1, v9
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo
-; GFX10-NEXT:    v_mul_lo_u32 v6, s7, v0
-; GFX10-NEXT:    v_mul_hi_u32 v7, s6, v0
-; GFX10-NEXT:    v_mul_lo_u32 v5, s6, v2
-; GFX10-NEXT:    v_mul_lo_u32 v9, s13, v1
-; GFX10-NEXT:    v_mul_hi_u32 v10, s12, v1
-; GFX10-NEXT:    v_mul_lo_u32 v11, s12, v3
-; GFX10-NEXT:    v_mul_lo_u32 v4, s6, v0
-; GFX10-NEXT:    v_mul_lo_u32 v8, s12, v1
+; GFX10-NEXT:    v_mul_lo_u32 v6, s1, v0
+; GFX10-NEXT:    v_mul_hi_u32 v7, s0, v0
+; GFX10-NEXT:    v_mul_lo_u32 v5, s0, v2
+; GFX10-NEXT:    v_mul_lo_u32 v9, s3, v1
+; GFX10-NEXT:    v_mul_hi_u32 v10, s2, v1
+; GFX10-NEXT:    v_mul_lo_u32 v11, s2, v3
+; GFX10-NEXT:    v_mul_lo_u32 v4, s0, v0
+; GFX10-NEXT:    v_mul_lo_u32 v8, s2, v1
 ; GFX10-NEXT:    v_add3_u32 v5, v6, v5, v7
 ; GFX10-NEXT:    v_add3_u32 v9, v9, v11, v10
 ; GFX10-NEXT:    v_mul_lo_u32 v12, v2, v4
@@ -1711,168 +1701,166 @@ define amdgpu_kernel void @udivrem_v2i64(<2 x i64> addrspace(1)* %out0, <2 x i64
 ; GFX10-NEXT:    v_mul_lo_u32 v15, v1, v9
 ; GFX10-NEXT:    v_mul_hi_u32 v7, v1, v8
 ; GFX10-NEXT:    v_mul_hi_u32 v8, v3, v8
-; GFX10-NEXT:    v_add_co_u32 v10, s6, v12, v10
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v4, s6, v11, v4
+; GFX10-NEXT:    v_add_co_u32 v10, s0, v12, v10
+; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, 1, s0
+; GFX10-NEXT:    v_add_co_u32 v4, s0, v11, v4
 ; GFX10-NEXT:    v_mul_lo_u32 v16, v3, v9
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v6, s6, v6, v15
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s0
+; GFX10-NEXT:    v_add_co_u32 v6, s0, v6, v15
 ; GFX10-NEXT:    v_mul_hi_u32 v14, v0, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v10, s6, v10, v13
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v8, s6, v16, v8
-; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v4, s6, v4, v14
+; GFX10-NEXT:    v_cndmask_b32_e64 v15, 0, 1, s0
+; GFX10-NEXT:    v_add_co_u32 v10, s0, v10, v13
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s0
+; GFX10-NEXT:    v_add_co_u32 v8, s0, v16, v8
+; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, 1, s0
+; GFX10-NEXT:    v_add_co_u32 v4, s0, v4, v14
 ; GFX10-NEXT:    v_add_nc_u32_e32 v10, v12, v10
-; GFX10-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v6, s6, v6, v7
+; GFX10-NEXT:    v_cndmask_b32_e64 v14, 0, 1, s0
+; GFX10-NEXT:    v_add_co_u32 v6, s0, v6, v7
 ; GFX10-NEXT:    v_mul_hi_u32 v5, v2, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v4, s6, v4, v10
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s0
+; GFX10-NEXT:    v_add_co_u32 v4, s0, v4, v10
 ; GFX10-NEXT:    v_add_nc_u32_e32 v7, v11, v14
-; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s6
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, 1, s0
 ; GFX10-NEXT:    v_mul_hi_u32 v17, v1, v9
 ; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v4
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, v15, v6
 ; GFX10-NEXT:    v_add3_u32 v5, v7, v10, v5
 ; GFX10-NEXT:    v_mul_hi_u32 v9, v3, v9
-; GFX10-NEXT:    v_mov_b32_e32 v10, 0
-; GFX10-NEXT:    v_add_co_u32 v8, s6, v8, v17
+; GFX10-NEXT:    v_mul_hi_u32 v10, s9, v0
+; GFX10-NEXT:    v_mov_b32_e32 v12, 0
+; GFX10-NEXT:    v_add_co_u32 v8, s0, v8, v17
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v2, vcc_lo, v2, v5, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s6
-; GFX10-NEXT:    v_add_co_u32 v4, s12, v8, v6
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_mul_lo_u32 v6, s1, v0
-; GFX10-NEXT:    v_mul_lo_u32 v8, s0, v2
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, 1, s0
+; GFX10-NEXT:    v_add_co_u32 v4, s0, v8, v6
+; GFX10-NEXT:    v_mul_lo_u32 v6, s9, v0
+; GFX10-NEXT:    v_mul_lo_u32 v8, s8, v2
+; GFX10-NEXT:    v_mul_hi_u32 v0, s8, v0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v7, v13, v11
-; GFX10-NEXT:    v_mul_hi_u32 v11, s1, v0
-; GFX10-NEXT:    v_mul_hi_u32 v0, s0, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s12
-; GFX10-NEXT:    v_mul_lo_u32 v12, s1, v2
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s0
+; GFX10-NEXT:    v_mul_lo_u32 v11, s9, v2
 ; GFX10-NEXT:    v_add_co_u32 v1, vcc_lo, v1, v4
-; GFX10-NEXT:    v_add_co_u32 v6, s12, v6, v8
+; GFX10-NEXT:    v_add_co_u32 v6, s0, v6, v8
 ; GFX10-NEXT:    v_add3_u32 v5, v7, v5, v9
-; GFX10-NEXT:    v_mul_hi_u32 v7, s0, v2
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s12
-; GFX10-NEXT:    v_add_co_u32 v0, s13, v6, v0
-; GFX10-NEXT:    v_add_co_u32 v9, s12, v12, v11
-; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s13
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s12
-; GFX10-NEXT:    v_mul_hi_u32 v2, s1, v2
-; GFX10-NEXT:    v_add_co_u32 v7, s12, v9, v7
+; GFX10-NEXT:    v_mul_hi_u32 v7, s8, v2
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s0
+; GFX10-NEXT:    v_add_co_u32 v0, s1, v6, v0
+; GFX10-NEXT:    v_add_co_u32 v9, s0, v11, v10
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s1
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s0
+; GFX10-NEXT:    v_mul_hi_u32 v2, s9, v2
+; GFX10-NEXT:    v_add_co_u32 v7, s0, v9, v7
 ; GFX10-NEXT:    v_add_nc_u32_e32 v0, v8, v0
-; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s12
+; GFX10-NEXT:    v_cndmask_b32_e64 v9, 0, 1, s0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
-; GFX10-NEXT:    v_add_co_u32 v0, s12, v7, v0
+; GFX10-NEXT:    v_add_co_u32 v0, s0, v7, v0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, v6, v9
-; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s12
-; GFX10-NEXT:    v_mul_lo_u32 v6, s3, v1
-; GFX10-NEXT:    v_mul_lo_u32 v8, s2, v3
-; GFX10-NEXT:    v_mul_lo_u32 v9, s9, v0
-; GFX10-NEXT:    v_mul_hi_u32 v11, s8, v0
+; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s0
+; GFX10-NEXT:    v_mul_lo_u32 v6, s11, v1
+; GFX10-NEXT:    v_mul_lo_u32 v8, s10, v3
+; GFX10-NEXT:    v_mul_lo_u32 v9, s13, v0
+; GFX10-NEXT:    v_mul_hi_u32 v10, s12, v0
 ; GFX10-NEXT:    v_add3_u32 v2, v4, v5, v2
-; GFX10-NEXT:    v_mul_hi_u32 v7, s2, v1
-; GFX10-NEXT:    v_mul_lo_u32 v13, s8, v0
-; GFX10-NEXT:    v_mul_hi_u32 v1, s3, v1
-; GFX10-NEXT:    v_mul_lo_u32 v4, s3, v3
-; GFX10-NEXT:    v_mul_lo_u32 v12, s8, v2
-; GFX10-NEXT:    v_add_co_u32 v6, s12, v6, v8
-; GFX10-NEXT:    v_mul_hi_u32 v5, s2, v3
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s12
-; GFX10-NEXT:    v_mul_hi_u32 v3, s3, v3
-; GFX10-NEXT:    v_add_co_u32 v1, s12, v4, v1
-; GFX10-NEXT:    v_add3_u32 v9, v9, v12, v11
-; GFX10-NEXT:    v_sub_co_u32 v11, vcc_lo, s0, v13
+; GFX10-NEXT:    v_mul_hi_u32 v7, s10, v1
+; GFX10-NEXT:    v_mul_hi_u32 v1, s11, v1
+; GFX10-NEXT:    v_mul_lo_u32 v4, s11, v3
+; GFX10-NEXT:    v_mul_lo_u32 v13, s12, v0
+; GFX10-NEXT:    v_mul_lo_u32 v11, s12, v2
+; GFX10-NEXT:    v_add_co_u32 v6, s0, v6, v8
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, 1, s0
+; GFX10-NEXT:    v_mul_hi_u32 v5, s10, v3
+; GFX10-NEXT:    v_mul_hi_u32 v3, s11, v3
+; GFX10-NEXT:    v_add_co_u32 v1, s0, v4, v1
+; GFX10-NEXT:    v_add3_u32 v9, v9, v11, v10
+; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s0
+; GFX10-NEXT:    v_sub_co_u32 v10, vcc_lo, s8, v13
 ; GFX10-NEXT:    v_add_co_u32 v6, s0, v6, v7
-; GFX10-NEXT:    v_sub_nc_u32_e32 v7, s1, v9
+; GFX10-NEXT:    v_sub_nc_u32_e32 v7, s9, v9
 ; GFX10-NEXT:    v_cndmask_b32_e64 v6, 0, 1, s0
-; GFX10-NEXT:    v_sub_co_ci_u32_e64 v9, s0, s1, v9, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s8, v11
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v7, vcc_lo, s9, v7, vcc_lo
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s9, v9
+; GFX10-NEXT:    v_sub_co_ci_u32_e64 v9, s0, s9, v9, vcc_lo
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s12, v10
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v7, vcc_lo, s13, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s13, v9
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, v8, v6
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s0
-; GFX10-NEXT:    v_cndmask_b32_e64 v4, 0, 1, s12
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, -1, vcc_lo
-; GFX10-NEXT:    v_sub_co_u32 v14, vcc_lo, v11, s8
+; GFX10-NEXT:    v_sub_co_u32 v14, vcc_lo, v10, s12
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v15, s0, 0, v7, vcc_lo
 ; GFX10-NEXT:    v_add_co_u32 v1, s0, v1, v5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, 1, s0
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s9, v9
-; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v7, vcc_lo, s9, v7, vcc_lo
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s13, v9
+; GFX10-NEXT:    v_subrev_co_ci_u32_e32 v7, vcc_lo, s13, v7, vcc_lo
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, v4, v5
-; GFX10-NEXT:    v_cndmask_b32_e64 v12, v13, v12, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s9, v15
+; GFX10-NEXT:    v_cndmask_b32_e64 v11, v13, v11, s0
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s13, v15
 ; GFX10-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s8, v14
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s12, v14
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s0
 ; GFX10-NEXT:    v_add_co_u32 v6, s0, v1, v6
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s0
 ; GFX10-NEXT:    v_add_co_u32 v5, s0, v0, 1
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v16, s0, 0, v2, s0
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s9, v15
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s0, s13, v15
 ; GFX10-NEXT:    v_add3_u32 v3, v4, v1, v3
-; GFX10-NEXT:    v_mul_hi_u32 v18, s10, v6
+; GFX10-NEXT:    v_mul_hi_u32 v18, s14, v6
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, v13, v8, s0
-; GFX10-NEXT:    v_mul_lo_u32 v13, s11, v6
-; GFX10-NEXT:    v_mul_lo_u32 v17, s10, v3
+; GFX10-NEXT:    v_mul_lo_u32 v13, s15, v6
+; GFX10-NEXT:    v_mul_lo_u32 v17, s14, v3
 ; GFX10-NEXT:    v_add_co_u32 v1, s0, v5, 1
 ; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v8
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v4, s0, 0, v16, s0
-; GFX10-NEXT:    v_sub_co_u32 v19, s0, v14, s8
+; GFX10-NEXT:    v_sub_co_u32 v19, s0, v14, s12
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v5, v1, vcc_lo
-; GFX10-NEXT:    v_mul_lo_u32 v5, s10, v6
+; GFX10-NEXT:    v_mul_lo_u32 v5, s14, v6
 ; GFX10-NEXT:    v_cndmask_b32_e32 v4, v16, v4, vcc_lo
 ; GFX10-NEXT:    v_add3_u32 v13, v13, v17, v18
-; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v12
+; GFX10-NEXT:    v_cmp_ne_u32_e32 vcc_lo, 0, v11
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v7, s0, 0, v7, s0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v1, v2, v4, vcc_lo
-; GFX10-NEXT:    v_sub_nc_u32_e32 v2, s3, v13
-; GFX10-NEXT:    v_sub_co_u32 v12, s0, s2, v5
-; GFX10-NEXT:    v_sub_co_ci_u32_e64 v16, s1, s3, v13, s0
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v2, s0, s11, v2, s0
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s10, v12
+; GFX10-NEXT:    v_sub_nc_u32_e32 v2, s11, v13
+; GFX10-NEXT:    v_sub_co_u32 v11, s0, s10, v5
+; GFX10-NEXT:    v_sub_co_ci_u32_e64 v16, s1, s11, v13, s0
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v2, s0, s15, v2, s0
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s14, v11
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 s1, 0, v8
-; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s11, v16
+; GFX10-NEXT:    v_cmp_le_u32_e64 s2, s15, v16
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s0
-; GFX10-NEXT:    v_sub_co_u32 v13, s0, v12, s10
+; GFX10-NEXT:    v_sub_co_u32 v13, s0, v11, s14
 ; GFX10-NEXT:    v_cndmask_b32_e64 v4, v14, v19, s1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s2
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v14, s2, 0, v2, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v7, v15, v7, s1
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, s11, v16
-; GFX10-NEXT:    v_cndmask_b32_e32 v4, v11, v4, vcc_lo
-; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v2, s0, s11, v2, s0
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, s15, v16
+; GFX10-NEXT:    v_cndmask_b32_e32 v4, v10, v4, vcc_lo
+; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v2, s0, s15, v2, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v5, v5, v8, s1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s11, v14
+; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s15, v14
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s10, v13
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, 0, -1, s1
+; GFX10-NEXT:    v_cmp_le_u32_e64 s1, s14, v13
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, 0, -1, s1
 ; GFX10-NEXT:    v_add_co_u32 v15, s1, v6, 1
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v17, s1, 0, v3, s1
-; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, s11, v14
-; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v11, s1
-; GFX10-NEXT:    v_add_co_u32 v11, s1, v15, 1
+; GFX10-NEXT:    v_cmp_eq_u32_e64 s1, s15, v14
+; GFX10-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s1
+; GFX10-NEXT:    v_add_co_u32 v10, s1, v15, 1
 ; GFX10-NEXT:    v_add_co_ci_u32_e64 v18, s1, 0, v17, s1
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 s0, 0, v8
-; GFX10-NEXT:    v_sub_co_u32 v8, s1, v13, s10
+; GFX10-NEXT:    v_sub_co_u32 v8, s1, v13, s14
 ; GFX10-NEXT:    v_subrev_co_ci_u32_e64 v2, s1, 0, v2, s1
-; GFX10-NEXT:    v_cndmask_b32_e64 v11, v15, v11, s0
+; GFX10-NEXT:    v_cndmask_b32_e64 v10, v15, v10, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v15, v17, v18, s0
 ; GFX10-NEXT:    v_cmp_ne_u32_e64 s1, 0, v5
 ; GFX10-NEXT:    v_cndmask_b32_e64 v8, v13, v8, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v13, v14, v2, s0
 ; GFX10-NEXT:    v_cndmask_b32_e32 v5, v9, v7, vcc_lo
-; GFX10-NEXT:    v_cndmask_b32_e64 v2, v6, v11, s1
+; GFX10-NEXT:    v_cndmask_b32_e64 v2, v6, v10, s1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v15, s1
-; GFX10-NEXT:    v_cndmask_b32_e64 v6, v12, v8, s1
+; GFX10-NEXT:    v_cndmask_b32_e64 v6, v11, v8, s1
 ; GFX10-NEXT:    v_cndmask_b32_e64 v7, v16, v13, s1
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    global_store_dwordx4 v10, v[0:3], s[4:5]
-; GFX10-NEXT:    global_store_dwordx4 v10, v[4:7], s[6:7]
+; GFX10-NEXT:    global_store_dwordx4 v12, v[0:3], s[4:5]
+; GFX10-NEXT:    global_store_dwordx4 v12, v[4:7], s[6:7]
 ; GFX10-NEXT:    s_endpgm
   %div = udiv <2 x i64> %x, %y
   store <2 x i64> %div, <2 x i64> addrspace(1)* %out0
@@ -2307,57 +2295,55 @@ define amdgpu_kernel void @udiv_i16(i16 addrspace(1)* %out0, i16 addrspace(1)* %
 define amdgpu_kernel void @udivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16> addrspace(1)* %out1, <2 x i16> %x, <2 x i16> %y) {
 ; GFX8-LABEL: udivrem_v2i16:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_load_dword s0, s[4:5], 0x14
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x10
 ; GFX8-NEXT:    s_mov_b32 s2, 0xffff
+; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_and_b32 s6, s0, s2
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s6
-; GFX8-NEXT:    s_lshr_b32 s3, s0, 16
-; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s3
-; GFX8-NEXT:    s_sub_i32 s1, 0, s6
+; GFX8-NEXT:    s_and_b32 s3, s1, s2
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v0, s3
+; GFX8-NEXT:    s_lshr_b32 s8, s1, 16
+; GFX8-NEXT:    v_cvt_f32_u32_e32 v1, s8
+; GFX8-NEXT:    s_sub_i32 s1, 0, s3
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX8-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x0
-; GFX8-NEXT:    s_load_dword s0, s[4:5], 0x10
+; GFX8-NEXT:    s_lshr_b32 s9, s0, 16
 ; GFX8-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; GFX8-NEXT:    s_and_b32 s0, s0, s2
 ; GFX8-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX8-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
 ; GFX8-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_lshr_b32 s4, s0, 16
 ; GFX8-NEXT:    v_mul_lo_u32 v2, s1, v0
-; GFX8-NEXT:    s_sub_i32 s1, 0, s3
-; GFX8-NEXT:    s_and_b32 s0, s0, s2
+; GFX8-NEXT:    s_sub_i32 s1, 0, s8
 ; GFX8-NEXT:    v_mul_lo_u32 v3, s1, v1
 ; GFX8-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; GFX8-NEXT:    v_mul_hi_u32 v3, v1, v3
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
 ; GFX8-NEXT:    v_mul_hi_u32 v0, s0, v0
 ; GFX8-NEXT:    v_add_u32_e32 v1, vcc, v1, v3
-; GFX8-NEXT:    v_mul_hi_u32 v1, s4, v1
-; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s6
+; GFX8-NEXT:    v_mul_hi_u32 v1, s9, v1
+; GFX8-NEXT:    v_mul_lo_u32 v2, v0, s3
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
 ; GFX8-NEXT:    v_sub_u32_e32 v2, vcc, s0, v2
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s6, v2
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s6, v2
+; GFX8-NEXT:    v_subrev_u32_e64 v3, s[0:1], s3, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v3, vcc, 1, v0
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s6, v2
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX8-NEXT:    v_mul_lo_u32 v3, v1, s3
-; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s6, v2
+; GFX8-NEXT:    v_mul_lo_u32 v3, v1, s8
+; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s3, v2
 ; GFX8-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s4, v3
+; GFX8-NEXT:    v_sub_u32_e32 v3, vcc, s9, v3
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s8, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s3, v3
+; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s8, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_add_u32_e32 v4, vcc, 1, v1
-; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s3, v3
+; GFX8-NEXT:    v_cmp_le_u32_e32 vcc, s8, v3
 ; GFX8-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s3, v3
+; GFX8-NEXT:    v_subrev_u32_e64 v4, s[0:1], s8, v3
 ; GFX8-NEXT:    v_and_b32_e32 v1, s2, v1
 ; GFX8-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
@@ -2365,36 +2351,34 @@ define amdgpu_kernel void @udivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16
 ; GFX8-NEXT:    v_and_b32_e32 v0, s2, v3
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX8-NEXT:    v_or_b32_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; GFX8-NEXT:    v_mov_b32_e32 v0, s8
-; GFX8-NEXT:    v_mov_b32_e32 v1, s9
+; GFX8-NEXT:    v_mov_b32_e32 v0, s4
+; GFX8-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8-NEXT:    flat_store_dword v[0:1], v4
-; GFX8-NEXT:    v_mov_b32_e32 v0, s10
-; GFX8-NEXT:    v_mov_b32_e32 v1, s11
+; GFX8-NEXT:    v_mov_b32_e32 v0, s6
+; GFX8-NEXT:    v_mov_b32_e32 v1, s7
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: udivrem_v2i16:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x14
-; GFX9-NEXT:    s_mov_b32 s1, 0xffff
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x10
+; GFX9-NEXT:    s_mov_b32 s2, 0xffff
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_and_b32 s7, s0, s1
+; GFX9-NEXT:    s_and_b32 s7, s1, s2
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s7
-; GFX9-NEXT:    s_lshr_b32 s6, s0, 16
+; GFX9-NEXT:    s_lshr_b32 s6, s1, 16
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s6
-; GFX9-NEXT:    s_sub_i32 s2, 0, s7
+; GFX9-NEXT:    s_sub_i32 s1, 0, s7
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x10
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GFX9-NEXT:    s_sub_i32 s3, 0, s6
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v1, v1
+; GFX9-NEXT:    s_and_b32 s9, s0, s2
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x4f7ffffe, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_and_b32 s9, s0, s1
-; GFX9-NEXT:    v_mul_lo_u32 v2, s2, v0
 ; GFX9-NEXT:    s_lshr_b32 s8, s0, 16
+; GFX9-NEXT:    v_mul_lo_u32 v2, s1, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v3, s3, v1
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; GFX9-NEXT:    v_mul_hi_u32 v2, v0, v2
@@ -2440,15 +2424,14 @@ define amdgpu_kernel void @udivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16
 ;
 ; GFX10-LABEL: udivrem_v2i16:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_load_dword s0, s[4:5], 0x14
-; GFX10-NEXT:    s_mov_b32 s2, 0xffff
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x10
+; GFX10-NEXT:    s_mov_b32 s3, 0xffff
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_lshr_b32 s1, s0, 16
-; GFX10-NEXT:    s_and_b32 s3, s0, s2
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s1
-; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s3
-; GFX10-NEXT:    s_sub_i32 s6, 0, s1
-; GFX10-NEXT:    s_load_dword s0, s[4:5], 0x10
+; GFX10-NEXT:    s_lshr_b32 s2, s1, 16
+; GFX10-NEXT:    s_and_b32 s1, s1, s3
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; GFX10-NEXT:    v_cvt_f32_u32_e32 v1, s1
+; GFX10-NEXT:    s_sub_i32 s6, 0, s2
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX10-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GFX10-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
@@ -2456,38 +2439,37 @@ define amdgpu_kernel void @udivrem_v2i16(<2 x i16> addrspace(1)* %out0, <2 x i16
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX10-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX10-NEXT:    v_mul_lo_u32 v2, s6, v0
-; GFX10-NEXT:    s_sub_i32 s6, 0, s3
+; GFX10-NEXT:    s_sub_i32 s6, 0, s1
 ; GFX10-NEXT:    v_mul_lo_u32 v3, s6, v1
-; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    s_lshr_b32 s6, s0, 16
-; GFX10-NEXT:    s_and_b32 s0, s0, s2
+; GFX10-NEXT:    s_and_b32 s0, s0, s3
 ; GFX10-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; GFX10-NEXT:    v_mul_hi_u32 v3, v1, v3
 ; GFX10-NEXT:    v_add_nc_u32_e32 v0, v0, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v1, v1, v3
 ; GFX10-NEXT:    v_mul_hi_u32 v0, s6, v0
 ; GFX10-NEXT:    v_mul_hi_u32 v1, s0, v1
-; GFX10-NEXT:    v_mul_lo_u32 v2, v0, s1
+; GFX10-NEXT:    v_mul_lo_u32 v2, v0, s2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 1, v0
-; GFX10-NEXT:    v_mul_lo_u32 v3, v1, s3
+; GFX10-NEXT:    v_mul_lo_u32 v3, v1, s1
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, 1, v1
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v2, s6, v2
 ; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x0
 ; GFX10-NEXT:    v_sub_nc_u32_e32 v3, s0, v3
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v5, s1, v2
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s1, v2
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s3, v3
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s3, v3
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v5, s2, v2
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v2
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s1, v3
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s1, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s0
 ; GFX10-NEXT:    v_cndmask_b32_e64 v3, v3, v7, s0
 ; GFX10-NEXT:    v_add_nc_u32_e32 v4, 1, v0
-; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s1, v2
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v5, s1, v2
+; GFX10-NEXT:    v_cmp_le_u32_e32 vcc_lo, s2, v2
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v5, s2, v2
 ; GFX10-NEXT:    v_add_nc_u32_e32 v6, 1, v1
-; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s3, v3
-; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s3, v3
+; GFX10-NEXT:    v_cmp_le_u32_e64 s0, s1, v3
+; GFX10-NEXT:    v_subrev_nc_u32_e32 v7, s1, v3
 ; GFX10-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc_lo
 ; GFX10-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc_lo
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0xffff
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index e7a0847383b8e..741602f8f0338 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -2528,18 +2528,16 @@ define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; GFX6-NEXT:    s_mov_b32 s8, 0xffff
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
-; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_and_b32 s9, s6, s8
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s9
-; GFX6-NEXT:    s_lshr_b32 s9, s4, 16
-; GFX6-NEXT:    s_and_b32 s4, s4, s8
+; GFX6-NEXT:    s_and_b32 s2, s6, s8
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; GFX6-NEXT:    s_and_b32 s9, s4, s8
 ; GFX6-NEXT:    s_lshr_b32 s6, s6, 16
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, s4
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, s9
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v2, v0
 ; GFX6-NEXT:    v_cvt_f32_u32_e32 v3, s6
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v4, s9
-; GFX6-NEXT:    s_and_b32 s6, s7, s8
+; GFX6-NEXT:    s_lshr_b32 s4, s4, 16
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v4, s4
 ; GFX6-NEXT:    v_mul_f32_e32 v2, v1, v2
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v5, v3
 ; GFX6-NEXT:    v_trunc_f32_e32 v2, v2
@@ -2548,36 +2546,38 @@ define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
 ; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, v4, v5
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
+; GFX6-NEXT:    s_and_b32 s4, s7, s8
 ; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, 0, v2, vcc
 ; GFX6-NEXT:    v_mad_f32 v2, -v1, v3, v4
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v4, s6
-; GFX6-NEXT:    s_lshr_b32 s4, s5, 16
-; GFX6-NEXT:    s_lshr_b32 s10, s7, 16
-; GFX6-NEXT:    s_and_b32 s5, s5, s8
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v4, s4
+; GFX6-NEXT:    s_and_b32 s4, s5, s8
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v5, s5
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v5, s4
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v6, v4
 ; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v3
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v3, s10
 ; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v1, vcc
 ; GFX6-NEXT:    v_mul_f32_e32 v1, v5, v6
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v6, s4
-; GFX6-NEXT:    v_rcp_iflag_f32_e32 v7, v3
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_mad_f32 v5, -v1, v4, v5
+; GFX6-NEXT:    s_lshr_b32 s4, s7, 16
+; GFX6-NEXT:    v_mad_f32 v3, -v1, v4, v5
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v5, s4
+; GFX6-NEXT:    s_lshr_b32 s4, s5, 16
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v6, s4
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v5|, v4
-; GFX6-NEXT:    v_mul_f32_e32 v4, v6, v7
-; GFX6-NEXT:    v_trunc_f32_e32 v4, v4
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v5, v4
+; GFX6-NEXT:    v_rcp_iflag_f32_e32 v7, v5
+; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, v4
+; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GFX6-NEXT:    v_mad_f32 v4, -v4, v3, v6
-; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v4|, v3
-; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v5, vcc
-; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; GFX6-NEXT:    v_mul_f32_e32 v3, v6, v7
+; GFX6-NEXT:    v_trunc_f32_e32 v3, v3
+; GFX6-NEXT:    v_cvt_u32_f32_e32 v4, v3
+; GFX6-NEXT:    v_mad_f32 v3, -v3, v5, v6
+; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, v5
 ; GFX6-NEXT:    v_and_b32_e32 v1, s8, v1
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
+; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; GFX6-NEXT:    v_and_b32_e32 v0, s8, v0
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    v_or_b32_e32 v1, v1, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -2592,49 +2592,49 @@ define amdgpu_kernel void @udiv_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_and_b32 s8, s6, s0
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s8
+; GFX9-NEXT:    s_lshr_b32 s6, s6, 16
+; GFX9-NEXT:    s_and_b32 s1, s4, s0
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v3, s6
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s1
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v4, v0
 ; GFX9-NEXT:    s_lshr_b32 s1, s4, 16
-; GFX9-NEXT:    s_and_b32 s4, s4, s0
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s4
-; GFX9-NEXT:    s_lshr_b32 s4, s6, 16
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v0
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v4, s4
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s1
-; GFX9-NEXT:    s_and_b32 s1, s7, s0
-; GFX9-NEXT:    v_mul_f32_e32 v3, v1, v3
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v4
-; GFX9-NEXT:    v_trunc_f32_e32 v3, v3
-; GFX9-NEXT:    v_mad_f32 v1, -v3, v0, v1
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v3
+; GFX9-NEXT:    v_mul_f32_e32 v4, v1, v4
+; GFX9-NEXT:    v_trunc_f32_e32 v4, v4
+; GFX9-NEXT:    v_mad_f32 v1, -v4, v0, v1
 ; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, v5, v6
 ; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    v_addc_co_u32_e32 v0, vcc, 0, v3, vcc
-; GFX9-NEXT:    v_mad_f32 v3, -v1, v4, v5
+; GFX9-NEXT:    s_and_b32 s1, s7, s0
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v7, v4
+; GFX9-NEXT:    v_mad_f32 v4, -v1, v3, v5
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s1
-; GFX9-NEXT:    s_lshr_b32 s6, s7, 16
 ; GFX9-NEXT:    s_and_b32 s0, s5, s0
+; GFX9-NEXT:    v_addc_co_u32_e32 v0, vcc, 0, v7, vcc
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v6, s0
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v7, v5
-; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, v4
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v4, s6
-; GFX9-NEXT:    s_lshr_b32 s8, s5, 16
+; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v4|, v3
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
 ; GFX9-NEXT:    v_mul_f32_e32 v1, v6, v7
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v7, s8
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v8, v4
 ; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    v_mad_f32 v6, -v1, v5, v6
+; GFX9-NEXT:    s_lshr_b32 s0, s7, 16
+; GFX9-NEXT:    v_mad_f32 v4, -v1, v5, v6
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v6, s0
+; GFX9-NEXT:    s_lshr_b32 s0, s5, 16
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v7, s0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v6|, v5
-; GFX9-NEXT:    v_mul_f32_e32 v5, v7, v8
-; GFX9-NEXT:    v_trunc_f32_e32 v5, v5
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v5
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v8, v6
+; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v4|, v5
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    v_mad_f32 v5, -v5, v4, v7
-; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v5|, v4
+; GFX9-NEXT:    v_mul_f32_e32 v4, v7, v8
+; GFX9-NEXT:    v_trunc_f32_e32 v4, v4
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v5, v4
+; GFX9-NEXT:    v_mad_f32 v4, -v4, v6, v7
+; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v4|, v6
+; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v5, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v5, 0xffff
-; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v6, vcc
 ; GFX9-NEXT:    v_and_b32_e32 v1, v5, v1
 ; GFX9-NEXT:    v_and_b32_e32 v0, v5, v0
 ; GFX9-NEXT:    v_lshl_or_b32 v1, v4, 16, v1
@@ -2745,65 +2745,69 @@ define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; GFX6-NEXT:    s_mov_b32 s8, 0xffff
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
-; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_and_b32 s9, s6, s8
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s9
-; GFX6-NEXT:    s_and_b32 s10, s4, s8
-; GFX6-NEXT:    s_lshr_b32 s11, s6, 16
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, s10
+; GFX6-NEXT:    s_and_b32 s2, s6, s8
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; GFX6-NEXT:    v_mov_b32_e32 v4, s6
+; GFX6-NEXT:    v_alignbit_b32 v4, s7, v4, 16
+; GFX6-NEXT:    s_and_b32 s9, s4, s8
+; GFX6-NEXT:    v_and_b32_e32 v5, s8, v4
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, s9
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v2, v0
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v3, s11
-; GFX6-NEXT:    s_lshr_b32 s9, s4, 16
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v4, s9
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v5, v5
+; GFX6-NEXT:    v_mov_b32_e32 v3, s4
+; GFX6-NEXT:    v_alignbit_b32 v3, s5, v3, 16
+; GFX6-NEXT:    v_and_b32_e32 v6, s8, v3
 ; GFX6-NEXT:    v_mul_f32_e32 v2, v1, v2
-; GFX6-NEXT:    v_rcp_iflag_f32_e32 v5, v3
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v6, v6
+; GFX6-NEXT:    v_rcp_iflag_f32_e32 v7, v5
 ; GFX6-NEXT:    v_trunc_f32_e32 v2, v2
 ; GFX6-NEXT:    v_mad_f32 v1, -v2, v0, v1
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, v0
-; GFX6-NEXT:    v_mul_f32_e32 v1, v4, v5
+; GFX6-NEXT:    v_mul_f32_e32 v1, v6, v7
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
 ; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, 0, v2, vcc
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v2, v1
-; GFX6-NEXT:    v_mad_f32 v1, -v1, v3, v4
+; GFX6-NEXT:    v_mad_f32 v1, -v1, v5, v6
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v0, s6
-; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, v3
+; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, v5
 ; GFX6-NEXT:    s_and_b32 s6, s7, s8
 ; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, 0, v2, vcc
 ; GFX6-NEXT:    v_cvt_f32_u32_e32 v2, s6
 ; GFX6-NEXT:    s_and_b32 s6, s5, s8
-; GFX6-NEXT:    v_mul_lo_u32 v1, v1, s11
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v3, s6
-; GFX6-NEXT:    v_rcp_iflag_f32_e32 v4, v2
-; GFX6-NEXT:    s_lshr_b32 s12, s7, 16
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s9, v1
-; GFX6-NEXT:    v_mul_f32_e32 v1, v3, v4
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v4, s12
-; GFX6-NEXT:    s_lshr_b32 s10, s5, 16
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v6, s10
-; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_rcp_iflag_f32_e32 v7, v4
+; GFX6-NEXT:    v_mul_lo_u32 v1, v1, v4
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v4, s6
+; GFX6-NEXT:    v_rcp_iflag_f32_e32 v5, v2
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s4, v0
-; GFX6-NEXT:    v_mad_f32 v3, -v1, v2, v3
+; GFX6-NEXT:    s_lshr_b32 s4, s7, 16
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, v3, v1
+; GFX6-NEXT:    v_mul_f32_e32 v1, v4, v5
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v5, s4
+; GFX6-NEXT:    s_lshr_b32 s6, s5, 16
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v6, s6
+; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
+; GFX6-NEXT:    v_rcp_iflag_f32_e32 v7, v5
+; GFX6-NEXT:    v_mad_f32 v4, -v1, v2, v4
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, v2
+; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v4|, v2
 ; GFX6-NEXT:    v_mul_f32_e32 v2, v6, v7
 ; GFX6-NEXT:    v_trunc_f32_e32 v2, v2
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v3, v2
+; GFX6-NEXT:    v_cvt_u32_f32_e32 v4, v2
 ; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; GFX6-NEXT:    v_mad_f32 v2, -v2, v4, v6
-; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v4
-; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v3, vcc
+; GFX6-NEXT:    v_mad_f32 v2, -v2, v5, v6
+; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v5
+; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v4, vcc
 ; GFX6-NEXT:    v_mul_lo_u32 v1, v1, s7
-; GFX6-NEXT:    v_mul_lo_u32 v2, v2, s12
+; GFX6-NEXT:    v_mul_lo_u32 v2, v2, s4
 ; GFX6-NEXT:    v_and_b32_e32 v0, s8, v0
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s5, v1
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s10, v2
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s6, v2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; GFX6-NEXT:    v_and_b32_e32 v1, s8, v1
 ; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v5
+; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
@@ -2817,56 +2821,56 @@ define amdgpu_kernel void @urem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_and_b32 s8, s6, s0
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s8
-; GFX9-NEXT:    s_and_b32 s9, s4, s0
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s9
-; GFX9-NEXT:    s_lshr_b32 s9, s6, 16
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v0
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v4, s9
-; GFX9-NEXT:    s_lshr_b32 s1, s4, 16
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s1
-; GFX9-NEXT:    v_mul_f32_e32 v3, v1, v3
-; GFX9-NEXT:    v_trunc_f32_e32 v3, v3
-; GFX9-NEXT:    v_mad_f32 v1, -v3, v0, v1
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v4
+; GFX9-NEXT:    s_and_b32 s1, s4, s0
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s1
+; GFX9-NEXT:    s_lshr_b32 s6, s6, 16
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v4, v0
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v3, s6
+; GFX9-NEXT:    s_lshr_b32 s4, s4, 16
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s4
+; GFX9-NEXT:    v_mul_f32_e32 v4, v1, v4
+; GFX9-NEXT:    v_trunc_f32_e32 v4, v4
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v3
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v7, v4
+; GFX9-NEXT:    v_mad_f32 v1, -v4, v0, v1
 ; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, v0
-; GFX9-NEXT:    s_lshr_b32 s10, s7, 16
-; GFX9-NEXT:    v_addc_co_u32_e32 v0, vcc, 0, v3, vcc
 ; GFX9-NEXT:    v_mul_f32_e32 v1, v5, v6
-; GFX9-NEXT:    v_mul_lo_u32 v0, v0, s6
+; GFX9-NEXT:    v_addc_co_u32_e32 v0, vcc, 0, v7, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v0, v0, s8
 ; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    s_and_b32 s6, s7, s0
-; GFX9-NEXT:    v_mad_f32 v3, -v1, v4, v5
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s6
+; GFX9-NEXT:    s_and_b32 s8, s7, s0
+; GFX9-NEXT:    v_mad_f32 v4, -v1, v3, v5
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s8
 ; GFX9-NEXT:    s_and_b32 s0, s5, s0
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v6, s0
-; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, v4
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v7, v5
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v4, s10
-; GFX9-NEXT:    s_lshr_b32 s8, s5, 16
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v4|, v3
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    v_mul_f32_e32 v3, v6, v7
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v7, s8
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v8, v4
+; GFX9-NEXT:    v_mul_lo_u32 v1, v1, s6
 ; GFX9-NEXT:    v_trunc_f32_e32 v3, v3
-; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    v_mad_f32 v6, -v3, v5, v6
+; GFX9-NEXT:    s_lshr_b32 s6, s7, 16
+; GFX9-NEXT:    v_mad_f32 v4, -v3, v5, v6
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v6, s6
+; GFX9-NEXT:    s_lshr_b32 s5, s5, 16
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v7, s5
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v6|, v5
-; GFX9-NEXT:    v_mul_f32_e32 v5, v7, v8
-; GFX9-NEXT:    v_trunc_f32_e32 v5, v5
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v5
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v8, v6
+; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v4|, v5
+; GFX9-NEXT:    v_sub_u32_e32 v0, s1, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
-; GFX9-NEXT:    v_mad_f32 v5, -v5, v4, v7
-; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v5|, v4
-; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v6, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v1, v1, s9
-; GFX9-NEXT:    v_mul_lo_u32 v3, v3, s7
-; GFX9-NEXT:    v_mul_lo_u32 v4, v4, s10
-; GFX9-NEXT:    v_sub_u32_e32 v0, s4, v0
-; GFX9-NEXT:    v_sub_u32_e32 v5, s1, v1
-; GFX9-NEXT:    v_sub_u32_e32 v1, s5, v3
-; GFX9-NEXT:    v_sub_u32_e32 v3, s8, v4
+; GFX9-NEXT:    v_mul_f32_e32 v4, v7, v8
+; GFX9-NEXT:    v_trunc_f32_e32 v4, v4
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v5, v4
+; GFX9-NEXT:    v_mad_f32 v4, -v4, v6, v7
+; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v4|, v6
+; GFX9-NEXT:    v_mul_lo_u32 v3, v3, s8
+; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v5, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v4, v4, s6
+; GFX9-NEXT:    v_sub_u32_e32 v5, s4, v1
+; GFX9-NEXT:    v_sub_u32_e32 v1, s0, v3
+; GFX9-NEXT:    v_sub_u32_e32 v3, s5, v4
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
 ; GFX9-NEXT:    v_and_b32_e32 v1, v4, v1
 ; GFX9-NEXT:    v_and_b32_e32 v0, v4, v0
@@ -3274,70 +3278,75 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
 ; GFX6-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
+; GFX6-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GFX6-NEXT:    v_mov_b32_e32 v2, s6
+; GFX6-NEXT:    v_alignbit_b32 v2, s7, v2, 16
+; GFX6-NEXT:    v_bfe_i32 v3, v2, 0, 16
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v4, v3
+; GFX6-NEXT:    v_alignbit_b32 v1, s5, v1, 16
+; GFX6-NEXT:    v_bfe_i32 v5, v1, 0, 16
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v6, v5
+; GFX6-NEXT:    v_rcp_iflag_f32_e32 v7, v4
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v0, s6
-; GFX6-NEXT:    s_ashr_i32 s6, s6, 16
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v1, s6
+; GFX6-NEXT:    v_xor_b32_e32 v3, v5, v3
+; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 30, v3
+; GFX6-NEXT:    v_mul_f32_e32 v5, v6, v7
+; GFX6-NEXT:    v_trunc_f32_e32 v5, v5
+; GFX6-NEXT:    v_mad_f32 v6, -v5, v4, v6
+; GFX6-NEXT:    v_cvt_i32_f32_e32 v5, v5
 ; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s4, v0
-; GFX6-NEXT:    s_ashr_i32 s4, s4, 16
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v2, s4
-; GFX6-NEXT:    v_rcp_iflag_f32_e32 v3, v1
-; GFX6-NEXT:    s_xor_b32 s8, s4, s6
-; GFX6-NEXT:    s_ashr_i32 s8, s8, 30
-; GFX6-NEXT:    s_or_b32 s8, s8, 1
-; GFX6-NEXT:    v_mul_f32_e32 v3, v2, v3
-; GFX6-NEXT:    v_trunc_f32_e32 v3, v3
-; GFX6-NEXT:    v_mad_f32 v2, -v3, v1, v2
-; GFX6-NEXT:    v_cvt_i32_f32_e32 v3, v3
-; GFX6-NEXT:    v_mov_b32_e32 v4, s8
-; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, |v1|
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
-; GFX6-NEXT:    v_mul_lo_u32 v1, v1, s6
-; GFX6-NEXT:    s_sext_i32_i16 s6, s7
+; GFX6-NEXT:    v_or_b32_e32 v3, 1, v3
+; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v6|, |v4|
+; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GFX6-NEXT:    s_sext_i32_i16 s4, s7
+; GFX6-NEXT:    v_mul_lo_u32 v2, v3, v2
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v3, s4
+; GFX6-NEXT:    s_sext_i32_i16 s6, s5
+; GFX6-NEXT:    s_xor_b32 s4, s6, s4
+; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
 ; GFX6-NEXT:    v_cvt_f32_i32_e32 v2, s6
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s4, v1
-; GFX6-NEXT:    s_sext_i32_i16 s4, s5
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v1, s4
-; GFX6-NEXT:    v_rcp_iflag_f32_e32 v4, v2
-; GFX6-NEXT:    s_xor_b32 s4, s4, s6
+; GFX6-NEXT:    v_rcp_iflag_f32_e32 v4, v3
 ; GFX6-NEXT:    s_ashr_i32 s4, s4, 30
 ; GFX6-NEXT:    s_or_b32 s4, s4, 1
-; GFX6-NEXT:    v_mul_f32_e32 v4, v1, v4
-; GFX6-NEXT:    v_trunc_f32_e32 v4, v4
-; GFX6-NEXT:    v_mad_f32 v1, -v4, v2, v1
 ; GFX6-NEXT:    v_mov_b32_e32 v5, s4
-; GFX6-NEXT:    s_ashr_i32 s4, s7, 16
+; GFX6-NEXT:    v_mul_f32_e32 v4, v2, v4
+; GFX6-NEXT:    v_trunc_f32_e32 v4, v4
+; GFX6-NEXT:    v_mad_f32 v2, -v4, v3, v2
 ; GFX6-NEXT:    v_cvt_i32_f32_e32 v4, v4
-; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v2|
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v2, s4
-; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v5, vcc
-; GFX6-NEXT:    s_ashr_i32 s6, s5, 16
-; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v4
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v4, s6
-; GFX6-NEXT:    v_rcp_iflag_f32_e32 v5, v2
-; GFX6-NEXT:    v_mul_lo_u32 v1, v1, s7
-; GFX6-NEXT:    s_xor_b32 s7, s6, s4
-; GFX6-NEXT:    s_ashr_i32 s7, s7, 30
+; GFX6-NEXT:    s_ashr_i32 s4, s7, 16
+; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, |v3|
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v3, s4
+; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v5, vcc
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
+; GFX6-NEXT:    v_mul_lo_u32 v2, v2, s7
+; GFX6-NEXT:    s_lshr_b32 s6, s7, 16
+; GFX6-NEXT:    s_ashr_i32 s7, s5, 16
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v4, s7
+; GFX6-NEXT:    v_rcp_iflag_f32_e32 v5, v3
+; GFX6-NEXT:    s_xor_b32 s4, s7, s4
+; GFX6-NEXT:    s_ashr_i32 s4, s4, 30
+; GFX6-NEXT:    s_or_b32 s4, s4, 1
 ; GFX6-NEXT:    v_mul_f32_e32 v5, v4, v5
 ; GFX6-NEXT:    v_trunc_f32_e32 v5, v5
-; GFX6-NEXT:    v_mad_f32 v4, -v5, v2, v4
+; GFX6-NEXT:    v_mad_f32 v4, -v5, v3, v4
 ; GFX6-NEXT:    v_cvt_i32_f32_e32 v5, v5
-; GFX6-NEXT:    s_or_b32 s7, s7, 1
-; GFX6-NEXT:    v_mov_b32_e32 v6, s7
-; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v4|, |v2|
-; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v6, vcc
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v5
-; GFX6-NEXT:    v_mul_lo_u32 v2, v2, s4
-; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s5, v1
+; GFX6-NEXT:    v_mov_b32_e32 v6, s4
+; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v4|, |v3|
+; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v6, vcc
+; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
+; GFX6-NEXT:    v_mul_lo_u32 v3, v3, s6
+; GFX6-NEXT:    s_lshr_b32 s4, s5, 16
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s5, v2
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s4, v3
 ; GFX6-NEXT:    s_mov_b32 s4, 0xffff
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s6, v2
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX6-NEXT:    v_and_b32_e32 v1, s4, v1
-; GFX6-NEXT:    v_or_b32_e32 v1, v1, v2
-; GFX6-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
+; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_and_b32_e32 v0, s4, v0
-; GFX6-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
+; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
+; GFX6-NEXT:    v_and_b32_e32 v2, s4, v2
+; GFX6-NEXT:    v_or_b32_e32 v1, v2, v1
 ; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
@@ -3347,78 +3356,78 @@ define amdgpu_kernel void @srem_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %x
 ; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_sext_i32_i16 s0, s6
-; GFX9-NEXT:    v_cvt_f32_i32_e32 v0, s0
-; GFX9-NEXT:    s_sext_i32_i16 s1, s4
-; GFX9-NEXT:    v_cvt_f32_i32_e32 v1, s1
-; GFX9-NEXT:    s_xor_b32 s0, s1, s0
+; GFX9-NEXT:    s_sext_i32_i16 s8, s6
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v0, s8
+; GFX9-NEXT:    s_sext_i32_i16 s9, s4
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v1, s9
+; GFX9-NEXT:    s_xor_b32 s0, s9, s8
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v0
 ; GFX9-NEXT:    s_ashr_i32 s0, s0, 30
-; GFX9-NEXT:    s_or_b32 s8, s0, 1
+; GFX9-NEXT:    s_or_b32 s10, s0, 1
 ; GFX9-NEXT:    v_mul_f32_e32 v3, v1, v3
 ; GFX9-NEXT:    v_trunc_f32_e32 v3, v3
 ; GFX9-NEXT:    v_mad_f32 v1, -v3, v0, v1
 ; GFX9-NEXT:    v_cmp_ge_f32_e64 s[0:1], |v1|, |v0|
 ; GFX9-NEXT:    s_and_b64 s[0:1], s[0:1], exec
-; GFX9-NEXT:    s_cselect_b32 s0, s8, 0
-; GFX9-NEXT:    s_ashr_i32 s9, s6, 16
+; GFX9-NEXT:    s_cselect_b32 s0, s10, 0
+; GFX9-NEXT:    s_ashr_i32 s6, s6, 16
 ; GFX9-NEXT:    v_cvt_i32_f32_e32 v3, v3
-; GFX9-NEXT:    v_cvt_f32_i32_e32 v0, s9
-; GFX9-NEXT:    s_ashr_i32 s8, s4, 16
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v0, s6
+; GFX9-NEXT:    s_ashr_i32 s4, s4, 16
 ; GFX9-NEXT:    v_add_u32_e32 v1, s0, v3
-; GFX9-NEXT:    v_cvt_f32_i32_e32 v3, s8
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v3, s4
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v4, v0
-; GFX9-NEXT:    s_xor_b32 s0, s8, s9
+; GFX9-NEXT:    s_xor_b32 s0, s4, s6
 ; GFX9-NEXT:    s_ashr_i32 s0, s0, 30
-; GFX9-NEXT:    v_mul_lo_u32 v1, v1, s6
+; GFX9-NEXT:    v_mul_lo_u32 v1, v1, s8
 ; GFX9-NEXT:    v_mul_f32_e32 v4, v3, v4
 ; GFX9-NEXT:    v_trunc_f32_e32 v4, v4
 ; GFX9-NEXT:    v_mad_f32 v3, -v4, v0, v3
-; GFX9-NEXT:    v_cvt_i32_f32_e32 v4, v4
-; GFX9-NEXT:    s_or_b32 s6, s0, 1
+; GFX9-NEXT:    s_or_b32 s8, s0, 1
 ; GFX9-NEXT:    v_cmp_ge_f32_e64 s[0:1], |v3|, |v0|
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v4, v4
 ; GFX9-NEXT:    s_and_b64 s[0:1], s[0:1], exec
-; GFX9-NEXT:    s_cselect_b32 s0, s6, 0
+; GFX9-NEXT:    s_cselect_b32 s0, s8, 0
+; GFX9-NEXT:    s_sext_i32_i16 s8, s7
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v3, s8
 ; GFX9-NEXT:    v_add_u32_e32 v0, s0, v4
-; GFX9-NEXT:    s_sext_i32_i16 s0, s7
-; GFX9-NEXT:    v_cvt_f32_i32_e32 v3, s0
-; GFX9-NEXT:    s_sext_i32_i16 s1, s5
-; GFX9-NEXT:    v_cvt_f32_i32_e32 v4, s1
-; GFX9-NEXT:    s_xor_b32 s0, s1, s0
+; GFX9-NEXT:    v_mul_lo_u32 v0, v0, s6
+; GFX9-NEXT:    s_sext_i32_i16 s6, s5
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v4, s6
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v5, v3
+; GFX9-NEXT:    s_xor_b32 s0, s6, s8
 ; GFX9-NEXT:    s_ashr_i32 s0, s0, 30
-; GFX9-NEXT:    s_or_b32 s6, s0, 1
-; GFX9-NEXT:    v_mul_lo_u32 v0, v0, s9
+; GFX9-NEXT:    s_or_b32 s10, s0, 1
 ; GFX9-NEXT:    v_mul_f32_e32 v5, v4, v5
 ; GFX9-NEXT:    v_trunc_f32_e32 v5, v5
 ; GFX9-NEXT:    v_mad_f32 v4, -v5, v3, v4
 ; GFX9-NEXT:    v_cmp_ge_f32_e64 s[0:1], |v4|, |v3|
-; GFX9-NEXT:    v_cvt_i32_f32_e32 v5, v5
 ; GFX9-NEXT:    s_and_b64 s[0:1], s[0:1], exec
-; GFX9-NEXT:    s_cselect_b32 s0, s6, 0
-; GFX9-NEXT:    s_ashr_i32 s6, s7, 16
-; GFX9-NEXT:    v_cvt_f32_i32_e32 v4, s6
+; GFX9-NEXT:    s_cselect_b32 s0, s10, 0
+; GFX9-NEXT:    s_ashr_i32 s7, s7, 16
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v5, v5
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v4, s7
+; GFX9-NEXT:    s_ashr_i32 s5, s5, 16
+; GFX9-NEXT:    v_sub_u32_e32 v0, s4, v0
 ; GFX9-NEXT:    v_add_u32_e32 v3, s0, v5
-; GFX9-NEXT:    v_mul_lo_u32 v3, v3, s7
-; GFX9-NEXT:    s_ashr_i32 s7, s5, 16
-; GFX9-NEXT:    v_cvt_f32_i32_e32 v5, s7
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v5, s5
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v4
-; GFX9-NEXT:    s_xor_b32 s0, s7, s6
+; GFX9-NEXT:    s_xor_b32 s0, s5, s7
 ; GFX9-NEXT:    s_ashr_i32 s0, s0, 30
-; GFX9-NEXT:    s_or_b32 s9, s0, 1
+; GFX9-NEXT:    v_mul_lo_u32 v3, v3, s8
 ; GFX9-NEXT:    v_mul_f32_e32 v6, v5, v6
 ; GFX9-NEXT:    v_trunc_f32_e32 v6, v6
 ; GFX9-NEXT:    v_mad_f32 v5, -v6, v4, v5
 ; GFX9-NEXT:    v_cvt_i32_f32_e32 v6, v6
+; GFX9-NEXT:    s_or_b32 s8, s0, 1
 ; GFX9-NEXT:    v_cmp_ge_f32_e64 s[0:1], |v5|, |v4|
 ; GFX9-NEXT:    s_and_b64 s[0:1], s[0:1], exec
-; GFX9-NEXT:    s_cselect_b32 s0, s9, 0
+; GFX9-NEXT:    s_cselect_b32 s0, s8, 0
 ; GFX9-NEXT:    v_add_u32_e32 v4, s0, v6
-; GFX9-NEXT:    v_mul_lo_u32 v4, v4, s6
-; GFX9-NEXT:    v_sub_u32_e32 v5, s4, v1
-; GFX9-NEXT:    v_sub_u32_e32 v1, s5, v3
-; GFX9-NEXT:    v_sub_u32_e32 v0, s8, v0
-; GFX9-NEXT:    v_sub_u32_e32 v3, s7, v4
+; GFX9-NEXT:    v_mul_lo_u32 v4, v4, s7
+; GFX9-NEXT:    v_sub_u32_e32 v5, s9, v1
+; GFX9-NEXT:    v_sub_u32_e32 v1, s6, v3
+; GFX9-NEXT:    v_sub_u32_e32 v3, s5, v4
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
 ; GFX9-NEXT:    v_and_b32_e32 v1, v4, v1
 ; GFX9-NEXT:    v_lshl_or_b32 v1, v3, 16, v1
@@ -3820,21 +3829,20 @@ define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ;
 ; GFX6-LABEL: udiv_v3i16:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; GFX6-NEXT:    s_mov_b32 s8, 0xffff
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_and_b32 s6, s2, s8
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s6
-; GFX6-NEXT:    s_and_b32 s6, s0, s8
-; GFX6-NEXT:    s_lshr_b32 s2, s2, 16
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, s6
+; GFX6-NEXT:    s_and_b32 s2, s6, s8
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; GFX6-NEXT:    s_and_b32 s9, s4, s8
+; GFX6-NEXT:    s_lshr_b32 s6, s6, 16
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, s9
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v2, v0
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v3, s2
-; GFX6-NEXT:    s_lshr_b32 s0, s0, 16
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v4, s0
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v3, s6
+; GFX6-NEXT:    s_lshr_b32 s4, s4, 16
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v4, s4
 ; GFX6-NEXT:    v_mul_f32_e32 v2, v1, v2
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v5, v3
 ; GFX6-NEXT:    v_trunc_f32_e32 v2, v2
@@ -3843,16 +3851,16 @@ define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, v4, v5
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    s_and_b32 s0, s3, s8
+; GFX6-NEXT:    s_and_b32 s4, s7, s8
 ; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, 0, v2, vcc
 ; GFX6-NEXT:    v_mad_f32 v2, -v1, v3, v4
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v4, s0
-; GFX6-NEXT:    s_and_b32 s0, s1, s8
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v5, s0
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v4, s4
+; GFX6-NEXT:    s_and_b32 s4, s5, s8
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v5, s4
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v6, v4
 ; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v3
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX6-NEXT:    v_mul_f32_e32 v2, v5, v6
 ; GFX6-NEXT:    v_trunc_f32_e32 v2, v2
@@ -3863,55 +3871,54 @@ define amdgpu_kernel void @udiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v3, vcc
 ; GFX6-NEXT:    v_and_b32_e32 v0, s8, v0
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT:    buffer_store_short v2, off, s[4:7], 0 offset:4
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_short v2, off, s[0:3], 0 offset:4
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: udiv_v3i16:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX9-NEXT:    s_mov_b32 s0, 0xffff
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_and_b32 s1, s2, s0
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s1
-; GFX9-NEXT:    s_and_b32 s1, s6, s0
-; GFX9-NEXT:    s_lshr_b32 s2, s2, 16
+; GFX9-NEXT:    s_and_b32 s8, s6, s0
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s8
+; GFX9-NEXT:    s_lshr_b32 s6, s6, 16
+; GFX9-NEXT:    s_and_b32 s1, s4, s0
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v3, s6
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v2, s1
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v0
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v4, s2
-; GFX9-NEXT:    s_lshr_b32 s1, s6, 16
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v4, v1
+; GFX9-NEXT:    s_lshr_b32 s1, s4, 16
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s1
-; GFX9-NEXT:    v_mul_f32_e32 v3, v2, v3
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v4
-; GFX9-NEXT:    v_trunc_f32_e32 v3, v3
-; GFX9-NEXT:    v_mad_f32 v2, -v3, v0, v2
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v3
-; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v0
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v3
+; GFX9-NEXT:    v_mul_f32_e32 v4, v2, v4
+; GFX9-NEXT:    v_trunc_f32_e32 v4, v4
+; GFX9-NEXT:    v_mad_f32 v2, -v4, v1, v2
+; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v1
 ; GFX9-NEXT:    v_mul_f32_e32 v2, v5, v6
 ; GFX9-NEXT:    v_trunc_f32_e32 v2, v2
-; GFX9-NEXT:    s_and_b32 s1, s3, s0
-; GFX9-NEXT:    v_addc_co_u32_e32 v0, vcc, 0, v3, vcc
-; GFX9-NEXT:    v_mad_f32 v3, -v2, v4, v5
+; GFX9-NEXT:    s_and_b32 s1, s7, s0
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v7, v4
+; GFX9-NEXT:    v_mad_f32 v4, -v2, v3, v5
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s1
-; GFX9-NEXT:    s_and_b32 s0, s7, s0
+; GFX9-NEXT:    s_and_b32 s0, s5, s0
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v7, vcc
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v6, s0
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v7, v5
-; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, v4
-; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v4|, v3
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX9-NEXT:    v_mul_f32_e32 v3, v6, v7
 ; GFX9-NEXT:    v_trunc_f32_e32 v3, v3
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v4, v3
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v2, vcc
 ; GFX9-NEXT:    v_mad_f32 v3, -v3, v5, v6
 ; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, v5
-; GFX9-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v4, vcc
-; GFX9-NEXT:    global_store_short v1, v3, s[4:5] offset:4
-; GFX9-NEXT:    global_store_dword v1, v0, s[4:5]
+; GFX9-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
+; GFX9-NEXT:    global_store_short v0, v3, s[2:3] offset:4
+; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
 ; GFX9-NEXT:    s_endpgm
   %r = udiv <3 x i16> %x, %y
   store <3 x i16> %r, <3 x i16> addrspace(1)* %out
@@ -3991,114 +3998,112 @@ define amdgpu_kernel void @urem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ;
 ; GFX6-LABEL: urem_v3i16:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; GFX6-NEXT:    s_mov_b32 s8, 0xffff
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_and_b32 s6, s2, s8
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s6
-; GFX6-NEXT:    s_and_b32 s6, s0, s8
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v2, s6
-; GFX6-NEXT:    v_mov_b32_e32 v4, s2
-; GFX6-NEXT:    v_rcp_iflag_f32_e32 v3, v0
-; GFX6-NEXT:    v_alignbit_b32 v4, s3, v4, 16
-; GFX6-NEXT:    v_and_b32_e32 v5, s8, v4
-; GFX6-NEXT:    v_mov_b32_e32 v1, s0
-; GFX6-NEXT:    v_mul_f32_e32 v3, v2, v3
-; GFX6-NEXT:    v_trunc_f32_e32 v3, v3
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v6, v3
-; GFX6-NEXT:    v_mad_f32 v2, -v3, v0, v2
-; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v0
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v2, v5
+; GFX6-NEXT:    s_and_b32 s9, s6, s8
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s9
+; GFX6-NEXT:    s_and_b32 s2, s4, s8
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v3, s2
+; GFX6-NEXT:    v_mov_b32_e32 v2, s6
+; GFX6-NEXT:    v_rcp_iflag_f32_e32 v4, v0
+; GFX6-NEXT:    v_alignbit_b32 v2, s7, v2, 16
+; GFX6-NEXT:    v_and_b32_e32 v5, s8, v2
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v5, v5
+; GFX6-NEXT:    v_mul_f32_e32 v4, v3, v4
+; GFX6-NEXT:    v_trunc_f32_e32 v4, v4
+; GFX6-NEXT:    v_cvt_u32_f32_e32 v6, v4
+; GFX6-NEXT:    v_mad_f32 v3, -v4, v0, v3
+; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, v0
+; GFX6-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, 0, v6, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v0, v0, s2
-; GFX6-NEXT:    v_alignbit_b32 v1, s1, v1, 16
+; GFX6-NEXT:    v_mul_lo_u32 v0, v0, s6
+; GFX6-NEXT:    v_alignbit_b32 v1, s5, v1, 16
 ; GFX6-NEXT:    v_and_b32_e32 v3, s8, v1
 ; GFX6-NEXT:    v_cvt_f32_u32_e32 v3, v3
-; GFX6-NEXT:    v_rcp_iflag_f32_e32 v5, v2
-; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
-; GFX6-NEXT:    s_and_b32 s0, s3, s8
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v6, s0
-; GFX6-NEXT:    v_mul_f32_e32 v5, v3, v5
-; GFX6-NEXT:    s_and_b32 s0, s1, s8
-; GFX6-NEXT:    v_trunc_f32_e32 v5, v5
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v7, s0
+; GFX6-NEXT:    v_rcp_iflag_f32_e32 v4, v5
+; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s4, v0
+; GFX6-NEXT:    s_and_b32 s4, s7, s8
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v6, s4
+; GFX6-NEXT:    v_mul_f32_e32 v4, v3, v4
+; GFX6-NEXT:    v_trunc_f32_e32 v4, v4
+; GFX6-NEXT:    v_mad_f32 v3, -v4, v5, v3
+; GFX6-NEXT:    v_cvt_u32_f32_e32 v4, v4
+; GFX6-NEXT:    s_and_b32 s4, s5, s8
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v7, s4
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v8, v6
-; GFX6-NEXT:    v_mad_f32 v3, -v5, v2, v3
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, v2
+; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, v5
+; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
+; GFX6-NEXT:    v_mul_lo_u32 v2, v3, v2
 ; GFX6-NEXT:    v_mul_f32_e32 v3, v7, v8
 ; GFX6-NEXT:    v_trunc_f32_e32 v3, v3
-; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, v2, v4
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v4, v3
 ; GFX6-NEXT:    v_mad_f32 v3, -v3, v6, v7
 ; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, v6
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, 0, v4, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v3, v3, s3
+; GFX6-NEXT:    v_mul_lo_u32 v3, v3, s7
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s1, v3
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s5, v3
 ; GFX6-NEXT:    v_and_b32_e32 v0, s8, v0
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT:    buffer_store_short v2, off, s[4:7], 0 offset:4
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_short v2, off, s[0:3], 0 offset:4
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: urem_v3i16:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x24
-; GFX9-NEXT:    s_mov_b32 s0, 0xffff
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_mov_b32 s2, 0xffff
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_and_b32 s1, s2, s0
-; GFX9-NEXT:    s_and_b32 s8, s4, s0
+; GFX9-NEXT:    s_and_b32 s8, s6, s2
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s8
+; GFX9-NEXT:    s_and_b32 s3, s4, s2
+; GFX9-NEXT:    s_lshr_b32 s6, s6, 16
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s6
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v2, s3
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v4, v0
 ; GFX9-NEXT:    s_lshr_b32 s4, s4, 16
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v2, s4
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s1
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v0
-; GFX9-NEXT:    s_lshr_b32 s2, s2, 16
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v4, s2
-; GFX9-NEXT:    v_rcp_iflag_f32_e32 v5, v2
-; GFX9-NEXT:    v_mul_f32_e32 v3, v1, v3
-; GFX9-NEXT:    v_trunc_f32_e32 v3, v3
-; GFX9-NEXT:    v_mad_f32 v1, -v3, v0, v1
-; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, v0
-; GFX9-NEXT:    v_mul_f32_e32 v1, v4, v5
-; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    s_and_b32 s5, s5, s0
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v3
-; GFX9-NEXT:    v_mad_f32 v3, -v1, v2, v4
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v4, s5
-; GFX9-NEXT:    s_and_b32 s0, s3, s0
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v3, s4
+; GFX9-NEXT:    v_rcp_iflag_f32_e32 v5, v1
+; GFX9-NEXT:    v_mul_f32_e32 v4, v2, v4
+; GFX9-NEXT:    v_trunc_f32_e32 v4, v4
+; GFX9-NEXT:    s_and_b32 s7, s7, s2
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v4
+; GFX9-NEXT:    v_mad_f32 v2, -v4, v0, v2
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v4, s7
+; GFX9-NEXT:    v_mul_f32_e32 v5, v3, v5
+; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v0
+; GFX9-NEXT:    v_trunc_f32_e32 v2, v5
+; GFX9-NEXT:    s_and_b32 s2, s5, s2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v0, vcc, 0, v6, vcc
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s0
+; GFX9-NEXT:    v_mad_f32 v3, -v2, v1, v3
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, s2
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v6, v4
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, v2
-; GFX9-NEXT:    v_mul_lo_u32 v0, v0, s8
+; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, v1
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v2, vcc
 ; GFX9-NEXT:    v_mul_f32_e32 v2, v5, v6
 ; GFX9-NEXT:    v_trunc_f32_e32 v2, v2
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v3, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    v_mad_f32 v2, -v2, v4, v5
 ; GFX9-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v4
+; GFX9-NEXT:    v_mul_lo_u32 v0, v0, s8
 ; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v3, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v1, v1, s4
-; GFX9-NEXT:    v_mul_lo_u32 v2, v2, s5
-; GFX9-NEXT:    v_sub_u32_e32 v0, s1, v0
+; GFX9-NEXT:    v_mul_lo_u32 v1, v1, s6
+; GFX9-NEXT:    v_mul_lo_u32 v2, v2, s7
+; GFX9-NEXT:    v_sub_u32_e32 v0, s3, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v3, 0
-; GFX9-NEXT:    v_sub_u32_e32 v1, s2, v1
-; GFX9-NEXT:    v_sub_u32_e32 v2, s0, v2
+; GFX9-NEXT:    v_sub_u32_e32 v1, s4, v1
+; GFX9-NEXT:    v_sub_u32_e32 v2, s2, v2
 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
-; GFX9-NEXT:    global_store_short v3, v2, s[6:7] offset:4
-; GFX9-NEXT:    global_store_dword v3, v0, s[6:7]
+; GFX9-NEXT:    global_store_short v3, v2, s[0:1] offset:4
+; GFX9-NEXT:    global_store_dword v3, v0, s[0:1]
 ; GFX9-NEXT:    s_endpgm
   %r = urem <3 x i16> %x, %y
   store <3 x i16> %r, <3 x i16> addrspace(1)* %out
@@ -4184,19 +4189,18 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ;
 ; GFX6-LABEL: sdiv_v3i16:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_sext_i32_i16 s8, s2
+; GFX6-NEXT:    s_sext_i32_i16 s8, s6
 ; GFX6-NEXT:    v_cvt_f32_i32_e32 v0, s8
-; GFX6-NEXT:    s_sext_i32_i16 s9, s0
+; GFX6-NEXT:    s_sext_i32_i16 s9, s4
 ; GFX6-NEXT:    v_cvt_f32_i32_e32 v1, s9
 ; GFX6-NEXT:    s_xor_b32 s8, s9, s8
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v2, v0
-; GFX6-NEXT:    s_ashr_i32 s2, s2, 16
+; GFX6-NEXT:    s_ashr_i32 s6, s6, 16
 ; GFX6-NEXT:    s_ashr_i32 s8, s8, 30
 ; GFX6-NEXT:    s_or_b32 s8, s8, 1
 ; GFX6-NEXT:    v_mul_f32_e32 v2, v1, v2
@@ -4204,55 +4208,54 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ; GFX6-NEXT:    v_mad_f32 v1, -v2, v0, v1
 ; GFX6-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v1, s2
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v1, s6
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s8
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
-; GFX6-NEXT:    s_ashr_i32 s0, s0, 16
+; GFX6-NEXT:    s_ashr_i32 s4, s4, 16
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v2, s0
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v2, s4
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v3, v1
-; GFX6-NEXT:    s_xor_b32 s0, s0, s2
-; GFX6-NEXT:    s_ashr_i32 s0, s0, 30
-; GFX6-NEXT:    s_or_b32 s0, s0, 1
+; GFX6-NEXT:    s_xor_b32 s4, s4, s6
+; GFX6-NEXT:    s_ashr_i32 s4, s4, 30
+; GFX6-NEXT:    s_or_b32 s4, s4, 1
 ; GFX6-NEXT:    v_mul_f32_e32 v3, v2, v3
 ; GFX6-NEXT:    v_trunc_f32_e32 v3, v3
 ; GFX6-NEXT:    v_mad_f32 v2, -v3, v1, v2
-; GFX6-NEXT:    v_mov_b32_e32 v4, s0
-; GFX6-NEXT:    s_sext_i32_i16 s0, s3
+; GFX6-NEXT:    v_mov_b32_e32 v4, s4
+; GFX6-NEXT:    s_sext_i32_i16 s4, s7
 ; GFX6-NEXT:    v_cvt_i32_f32_e32 v3, v3
 ; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, |v1|
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v2, s0
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v2, s4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, 0, v4, vcc
-; GFX6-NEXT:    s_sext_i32_i16 s1, s1
+; GFX6-NEXT:    s_sext_i32_i16 s5, s5
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v1, v3
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v3, s1
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v3, s5
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v4, v2
-; GFX6-NEXT:    s_xor_b32 s0, s1, s0
-; GFX6-NEXT:    s_ashr_i32 s0, s0, 30
-; GFX6-NEXT:    s_or_b32 s0, s0, 1
+; GFX6-NEXT:    s_xor_b32 s4, s5, s4
+; GFX6-NEXT:    s_ashr_i32 s4, s4, 30
+; GFX6-NEXT:    s_or_b32 s4, s4, 1
 ; GFX6-NEXT:    v_mul_f32_e32 v4, v3, v4
 ; GFX6-NEXT:    v_trunc_f32_e32 v4, v4
 ; GFX6-NEXT:    v_mad_f32 v3, -v4, v2, v3
 ; GFX6-NEXT:    v_cvt_i32_f32_e32 v4, v4
-; GFX6-NEXT:    v_mov_b32_e32 v5, s0
+; GFX6-NEXT:    v_mov_b32_e32 v5, s4
 ; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v2|
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, 0, v5, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
-; GFX6-NEXT:    buffer_store_short v2, off, s[4:7], 0 offset:4
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_short v2, off, s[0:3], 0 offset:4
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: sdiv_v3i16:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_sext_i32_i16 s0, s2
+; GFX9-NEXT:    s_sext_i32_i16 s0, s6
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v0, s0
 ; GFX9-NEXT:    s_sext_i32_i16 s1, s4
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v2, s1
@@ -4266,44 +4269,44 @@ define amdgpu_kernel void @sdiv_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ; GFX9-NEXT:    v_cmp_ge_f32_e64 s[0:1], |v2|, |v0|
 ; GFX9-NEXT:    s_and_b64 s[0:1], s[0:1], exec
 ; GFX9-NEXT:    s_cselect_b32 s0, s8, 0
-; GFX9-NEXT:    s_ashr_i32 s1, s2, 16
+; GFX9-NEXT:    s_ashr_i32 s1, s6, 16
 ; GFX9-NEXT:    v_cvt_i32_f32_e32 v3, v3
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v0, s1
-; GFX9-NEXT:    s_ashr_i32 s2, s4, 16
+; GFX9-NEXT:    s_ashr_i32 s4, s4, 16
 ; GFX9-NEXT:    v_add_u32_e32 v2, s0, v3
-; GFX9-NEXT:    v_cvt_f32_i32_e32 v3, s2
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v3, s4
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v4, v0
-; GFX9-NEXT:    s_xor_b32 s0, s2, s1
+; GFX9-NEXT:    s_xor_b32 s0, s4, s1
 ; GFX9-NEXT:    s_ashr_i32 s0, s0, 30
-; GFX9-NEXT:    s_or_b32 s2, s0, 1
+; GFX9-NEXT:    s_or_b32 s4, s0, 1
 ; GFX9-NEXT:    v_mul_f32_e32 v4, v3, v4
 ; GFX9-NEXT:    v_trunc_f32_e32 v4, v4
 ; GFX9-NEXT:    v_mad_f32 v3, -v4, v0, v3
 ; GFX9-NEXT:    v_cmp_ge_f32_e64 s[0:1], |v3|, |v0|
 ; GFX9-NEXT:    s_and_b64 s[0:1], s[0:1], exec
 ; GFX9-NEXT:    v_cvt_i32_f32_e32 v4, v4
-; GFX9-NEXT:    s_sext_i32_i16 s1, s3
+; GFX9-NEXT:    s_sext_i32_i16 s1, s7
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v0, s1
-; GFX9-NEXT:    s_cselect_b32 s0, s2, 0
+; GFX9-NEXT:    s_cselect_b32 s0, s4, 0
 ; GFX9-NEXT:    v_add_u32_e32 v3, s0, v4
 ; GFX9-NEXT:    s_sext_i32_i16 s0, s5
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v4, s0
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v5, v0
 ; GFX9-NEXT:    s_xor_b32 s0, s0, s1
 ; GFX9-NEXT:    s_ashr_i32 s0, s0, 30
-; GFX9-NEXT:    s_or_b32 s2, s0, 1
+; GFX9-NEXT:    s_or_b32 s4, s0, 1
 ; GFX9-NEXT:    v_mul_f32_e32 v5, v4, v5
 ; GFX9-NEXT:    v_trunc_f32_e32 v5, v5
 ; GFX9-NEXT:    v_mad_f32 v4, -v5, v0, v4
 ; GFX9-NEXT:    v_cvt_i32_f32_e32 v5, v5
 ; GFX9-NEXT:    v_cmp_ge_f32_e64 s[0:1], |v4|, |v0|
 ; GFX9-NEXT:    s_and_b64 s[0:1], s[0:1], exec
-; GFX9-NEXT:    s_cselect_b32 s0, s2, 0
+; GFX9-NEXT:    s_cselect_b32 s0, s4, 0
 ; GFX9-NEXT:    v_add_u32_e32 v0, s0, v5
 ; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
-; GFX9-NEXT:    global_store_short v1, v0, s[6:7] offset:4
-; GFX9-NEXT:    global_store_dword v1, v2, s[6:7]
+; GFX9-NEXT:    global_store_short v1, v0, s[2:3] offset:4
+; GFX9-NEXT:    global_store_dword v1, v2, s[2:3]
 ; GFX9-NEXT:    s_endpgm
   %r = sdiv <3 x i16> %x, %y
   store <3 x i16> %r, <3 x i16> addrspace(1)* %out
@@ -4395,51 +4398,51 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ;
 ; GFX6-LABEL: srem_v3i16:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
-; GFX6-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_sext_i32_i16 s2, s4
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v0, s2
 ; GFX6-NEXT:    s_sext_i32_i16 s8, s6
-; GFX6-NEXT:    v_cvt_f32_i32_e32 v1, s8
-; GFX6-NEXT:    s_xor_b32 s2, s8, s2
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v0, s8
+; GFX6-NEXT:    s_sext_i32_i16 s9, s4
+; GFX6-NEXT:    v_cvt_f32_i32_e32 v1, s9
+; GFX6-NEXT:    s_xor_b32 s8, s9, s8
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v2, v0
-; GFX6-NEXT:    s_ashr_i32 s2, s2, 30
-; GFX6-NEXT:    s_or_b32 s2, s2, 1
-; GFX6-NEXT:    v_mov_b32_e32 v3, s2
+; GFX6-NEXT:    s_ashr_i32 s8, s8, 30
+; GFX6-NEXT:    s_or_b32 s8, s8, 1
+; GFX6-NEXT:    v_mov_b32_e32 v3, s8
 ; GFX6-NEXT:    v_mul_f32_e32 v2, v1, v2
 ; GFX6-NEXT:    v_trunc_f32_e32 v2, v2
 ; GFX6-NEXT:    v_mad_f32 v1, -v2, v0, v1
 ; GFX6-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
-; GFX6-NEXT:    v_mov_b32_e32 v1, s6
+; GFX6-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
-; GFX6-NEXT:    v_mov_b32_e32 v2, s4
-; GFX6-NEXT:    v_alignbit_b32 v2, s5, v2, 16
+; GFX6-NEXT:    v_mov_b32_e32 v2, s6
+; GFX6-NEXT:    v_alignbit_b32 v2, s7, v2, 16
 ; GFX6-NEXT:    v_bfe_i32 v3, v2, 0, 16
 ; GFX6-NEXT:    v_cvt_f32_i32_e32 v4, v3
-; GFX6-NEXT:    v_alignbit_b32 v1, s7, v1, 16
+; GFX6-NEXT:    v_alignbit_b32 v1, s5, v1, 16
 ; GFX6-NEXT:    v_bfe_i32 v5, v1, 0, 16
 ; GFX6-NEXT:    v_cvt_f32_i32_e32 v6, v5
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v7, v4
-; GFX6-NEXT:    v_mul_lo_u32 v0, v0, s4
+; GFX6-NEXT:    v_mul_lo_u32 v0, v0, s6
 ; GFX6-NEXT:    v_xor_b32_e32 v3, v5, v3
-; GFX6-NEXT:    s_sext_i32_i16 s4, s5
+; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 30, v3
 ; GFX6-NEXT:    v_mul_f32_e32 v5, v6, v7
 ; GFX6-NEXT:    v_trunc_f32_e32 v5, v5
-; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s6, v0
+; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s4, v0
 ; GFX6-NEXT:    v_mad_f32 v6, -v5, v4, v6
 ; GFX6-NEXT:    v_cvt_i32_f32_e32 v5, v5
-; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 30, v3
+; GFX6-NEXT:    s_sext_i32_i16 s4, s7
 ; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v6|, |v4|
 ; GFX6-NEXT:    v_cvt_f32_i32_e32 v4, s4
 ; GFX6-NEXT:    v_or_b32_e32 v3, 1, v3
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; GFX6-NEXT:    s_sext_i32_i16 s6, s7
+; GFX6-NEXT:    s_sext_i32_i16 s6, s5
 ; GFX6-NEXT:    v_mul_lo_u32 v2, v3, v2
 ; GFX6-NEXT:    v_cvt_f32_i32_e32 v3, s6
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v5, v4
@@ -4454,11 +4457,10 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ; GFX6-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v4|
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, 0, v6, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; GFX6-NEXT:    v_mul_lo_u32 v3, v3, s5
+; GFX6-NEXT:    v_mul_lo_u32 v3, v3, s7
 ; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, v1, v2
-; GFX6-NEXT:    s_mov_b32 s2, -1
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s7, v3
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s5, v3
 ; GFX6-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX6-NEXT:    buffer_store_short v2, off, s[0:3], 0 offset:4
@@ -4467,66 +4469,65 @@ define amdgpu_kernel void @srem_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %x
 ;
 ; GFX9-LABEL: srem_v3i16:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_sext_i32_i16 s8, s2
+; GFX9-NEXT:    s_sext_i32_i16 s8, s6
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v0, s8
 ; GFX9-NEXT:    s_sext_i32_i16 s9, s4
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v1, s9
-; GFX9-NEXT:    s_xor_b32 s6, s9, s8
+; GFX9-NEXT:    s_xor_b32 s2, s9, s8
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v2, v0
-; GFX9-NEXT:    s_ashr_i32 s6, s6, 30
-; GFX9-NEXT:    s_or_b32 s10, s6, 1
-; GFX9-NEXT:    s_sext_i32_i16 s5, s5
+; GFX9-NEXT:    s_ashr_i32 s2, s2, 30
+; GFX9-NEXT:    s_or_b32 s10, s2, 1
+; GFX9-NEXT:    s_sext_i32_i16 s7, s7
 ; GFX9-NEXT:    v_mul_f32_e32 v2, v1, v2
 ; GFX9-NEXT:    v_trunc_f32_e32 v2, v2
 ; GFX9-NEXT:    v_mad_f32 v1, -v2, v0, v1
-; GFX9-NEXT:    v_cmp_ge_f32_e64 s[6:7], |v1|, |v0|
-; GFX9-NEXT:    s_and_b64 s[6:7], s[6:7], exec
-; GFX9-NEXT:    s_cselect_b32 s6, s10, 0
-; GFX9-NEXT:    s_ashr_i32 s2, s2, 16
+; GFX9-NEXT:    v_cmp_ge_f32_e64 s[2:3], |v1|, |v0|
+; GFX9-NEXT:    s_and_b64 s[2:3], s[2:3], exec
+; GFX9-NEXT:    s_cselect_b32 s2, s10, 0
+; GFX9-NEXT:    s_ashr_i32 s6, s6, 16
 ; GFX9-NEXT:    v_cvt_i32_f32_e32 v2, v2
-; GFX9-NEXT:    v_cvt_f32_i32_e32 v0, s2
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v0, s6
 ; GFX9-NEXT:    s_ashr_i32 s4, s4, 16
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9-NEXT:    v_add_u32_e32 v1, s6, v2
+; GFX9-NEXT:    s_sext_i32_i16 s5, s5
+; GFX9-NEXT:    v_add_u32_e32 v1, s2, v2
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v2, s4
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v3, v0
-; GFX9-NEXT:    s_xor_b32 s6, s4, s2
-; GFX9-NEXT:    s_ashr_i32 s6, s6, 30
+; GFX9-NEXT:    s_xor_b32 s2, s4, s6
+; GFX9-NEXT:    s_ashr_i32 s2, s2, 30
 ; GFX9-NEXT:    v_mul_lo_u32 v1, v1, s8
 ; GFX9-NEXT:    v_mul_f32_e32 v3, v2, v3
 ; GFX9-NEXT:    v_trunc_f32_e32 v3, v3
 ; GFX9-NEXT:    v_mad_f32 v2, -v3, v0, v2
+; GFX9-NEXT:    s_or_b32 s8, s2, 1
 ; GFX9-NEXT:    v_cvt_i32_f32_e32 v3, v3
-; GFX9-NEXT:    s_or_b32 s8, s6, 1
-; GFX9-NEXT:    v_cmp_ge_f32_e64 s[6:7], |v2|, |v0|
-; GFX9-NEXT:    s_and_b64 s[6:7], s[6:7], exec
-; GFX9-NEXT:    s_cselect_b32 s6, s8, 0
-; GFX9-NEXT:    v_add_u32_e32 v0, s6, v3
-; GFX9-NEXT:    s_sext_i32_i16 s6, s3
-; GFX9-NEXT:    v_cvt_f32_i32_e32 v2, s6
+; GFX9-NEXT:    v_cmp_ge_f32_e64 s[2:3], |v2|, |v0|
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v2, s7
+; GFX9-NEXT:    s_and_b64 s[2:3], s[2:3], exec
+; GFX9-NEXT:    s_cselect_b32 s2, s8, 0
+; GFX9-NEXT:    v_add_u32_e32 v0, s2, v3
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v3, s5
-; GFX9-NEXT:    v_mul_lo_u32 v0, v0, s2
-; GFX9-NEXT:    s_xor_b32 s2, s5, s6
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v4, v2
+; GFX9-NEXT:    s_xor_b32 s2, s5, s7
 ; GFX9-NEXT:    s_ashr_i32 s2, s2, 30
-; GFX9-NEXT:    s_or_b32 s7, s2, 1
-; GFX9-NEXT:    v_sub_u32_e32 v1, s9, v1
+; GFX9-NEXT:    v_mul_lo_u32 v0, v0, s6
 ; GFX9-NEXT:    v_mul_f32_e32 v4, v3, v4
 ; GFX9-NEXT:    v_trunc_f32_e32 v4, v4
 ; GFX9-NEXT:    v_mad_f32 v3, -v4, v2, v3
 ; GFX9-NEXT:    v_cvt_i32_f32_e32 v4, v4
+; GFX9-NEXT:    s_or_b32 s6, s2, 1
 ; GFX9-NEXT:    v_cmp_ge_f32_e64 s[2:3], |v3|, |v2|
 ; GFX9-NEXT:    s_and_b64 s[2:3], s[2:3], exec
-; GFX9-NEXT:    s_cselect_b32 s2, s7, 0
+; GFX9-NEXT:    s_cselect_b32 s2, s6, 0
 ; GFX9-NEXT:    v_add_u32_e32 v2, s2, v4
-; GFX9-NEXT:    v_mul_lo_u32 v2, v2, s6
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX9-NEXT:    v_mul_lo_u32 v2, v2, s7
+; GFX9-NEXT:    v_sub_u32_e32 v1, s9, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX9-NEXT:    v_sub_u32_e32 v0, s4, v0
-; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX9-NEXT:    v_sub_u32_e32 v2, s5, v2
+; GFX9-NEXT:    v_and_b32_e32 v1, 0xffff, v1
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    global_store_short v3, v2, s[0:1] offset:4
@@ -5680,109 +5681,106 @@ define amdgpu_kernel void @udiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou
 ;
 ; GFX6-LABEL: udiv_v2i32_pow2_shl_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
-; GFX6-NEXT:    s_movk_i32 s4, 0x1000
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
+; GFX6-NEXT:    s_movk_i32 s2, 0x1000
+; GFX6-NEXT:    s_mov_b32 s0, 0x4f7ffffe
+; GFX6-NEXT:    s_mov_b32 s11, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshl_b32 s8, s4, s2
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s8
-; GFX6-NEXT:    s_lshl_b32 s9, s4, s3
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, s9
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_lshl_b32 s3, s2, s6
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s3
+; GFX6-NEXT:    s_lshl_b32 s2, s2, s7
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, s2
+; GFX6-NEXT:    s_mov_b32 s10, -1
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX6-NEXT:    s_mov_b32 s0, 0x4f7ffffe
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GFX6-NEXT:    v_mul_f32_e32 v0, s0, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, s0, v1
-; GFX6-NEXT:    s_sub_i32 s0, 0, s8
+; GFX6-NEXT:    s_sub_i32 s0, 0, s3
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX6-NEXT:    v_mul_lo_u32 v2, s0, v0
-; GFX6-NEXT:    s_sub_i32 s0, 0, s9
+; GFX6-NEXT:    s_sub_i32 s0, 0, s2
 ; GFX6-NEXT:    v_mul_lo_u32 v3, s0, v1
 ; GFX6-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; GFX6-NEXT:    v_mul_hi_u32 v3, v1, v3
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
-; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
+; GFX6-NEXT:    v_mul_hi_u32 v0, s4, v0
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
-; GFX6-NEXT:    v_mul_hi_u32 v1, s3, v1
-; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s8
+; GFX6-NEXT:    v_mul_hi_u32 v1, s5, v1
+; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s3
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
-; GFX6-NEXT:    v_mul_lo_u32 v4, v1, s9
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s2, v2
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v2
+; GFX6-NEXT:    v_mul_lo_u32 v4, v1, s2
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s4, v2
+; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[0:1]
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s8, v2
+; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s3, v2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s8, v2
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s3, v4
+; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s5, v4
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v2
+; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s9, v2
+; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s2, v2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s9, v2
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s2, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[8:11], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: udiv_v2i32_pow2_shl_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX9-NEXT:    s_movk_i32 s4, 0x1000
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_movk_i32 s2, 0x1000
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_lshl_b32 s5, s4, s3
-; GFX9-NEXT:    s_lshl_b32 s4, s4, s2
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s4
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s5
+; GFX9-NEXT:    s_lshl_b32 s6, s2, s6
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s6
+; GFX9-NEXT:    s_lshl_b32 s7, s2, s7
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s7
 ; GFX9-NEXT:    s_mov_b32 s2, 0x4f7ffffe
-; GFX9-NEXT:    s_sub_i32 s3, 0, s5
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; GFX9-NEXT:    s_sub_i32 s3, 0, s7
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GFX9-NEXT:    v_mul_f32_e32 v0, s2, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, s2, v1
-; GFX9-NEXT:    s_sub_i32 s2, 0, s4
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX9-NEXT:    s_sub_i32 s2, 0, s6
 ; GFX9-NEXT:    v_mul_lo_u32 v2, s2, v0
 ; GFX9-NEXT:    v_mul_lo_u32 v3, s3, v1
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX9-NEXT:    v_mul_hi_u32 v2, v0, v2
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v1, v3
 ; GFX9-NEXT:    v_add_u32_e32 v0, v0, v2
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
+; GFX9-NEXT:    v_mul_hi_u32 v0, s4, v0
 ; GFX9-NEXT:    v_add_u32_e32 v1, v1, v3
-; GFX9-NEXT:    v_mul_hi_u32 v1, s3, v1
+; GFX9-NEXT:    v_mul_hi_u32 v1, s5, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    v_mul_lo_u32 v3, v0, s4
+; GFX9-NEXT:    v_mul_lo_u32 v3, v0, s6
 ; GFX9-NEXT:    v_add_u32_e32 v5, 1, v0
-; GFX9-NEXT:    v_mul_lo_u32 v4, v1, s5
+; GFX9-NEXT:    v_mul_lo_u32 v4, v1, s7
 ; GFX9-NEXT:    v_add_u32_e32 v6, 1, v1
-; GFX9-NEXT:    v_sub_u32_e32 v3, s2, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v3
+; GFX9-NEXT:    v_sub_u32_e32 v3, s4, v3
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v5, s4, v3
+; GFX9-NEXT:    v_subrev_u32_e32 v5, s6, v3
+; GFX9-NEXT:    v_sub_u32_e32 v4, s5, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; GFX9-NEXT:    v_sub_u32_e32 v4, s3, v4
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s7, v4
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s6, v3
+; GFX9-NEXT:    v_subrev_u32_e32 v3, s7, v4
+; GFX9-NEXT:    v_cndmask_b32_e64 v1, v1, v6, s[0:1]
 ; GFX9-NEXT:    v_add_u32_e32 v5, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v3
+; GFX9-NEXT:    v_cndmask_b32_e64 v3, v4, v3, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v4
-; GFX9-NEXT:    v_subrev_u32_e32 v3, s5, v4
-; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
-; GFX9-NEXT:    v_cndmask_b32_e32 v3, v4, v3, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v1
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v3
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s7, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
 ; GFX9-NEXT:    s_endpgm
   %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
   %r = udiv <2 x i32> %x, %shl.y
@@ -6020,18 +6018,18 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou
 ;
 ; GFX6-LABEL: urem_v2i32_pow2_shl_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
-; GFX6-NEXT:    s_movk_i32 s4, 0x1000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
+; GFX6-NEXT:    s_movk_i32 s2, 0x1000
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshl_b32 s6, s4, s2
+; GFX6-NEXT:    s_lshl_b32 s6, s2, s6
 ; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s6
-; GFX6-NEXT:    s_lshl_b32 s7, s4, s3
+; GFX6-NEXT:    s_lshl_b32 s7, s2, s7
 ; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, s7
 ; GFX6-NEXT:    s_mov_b32 s2, 0x4f7ffffe
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v1, v1
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; GFX6-NEXT:    v_mul_f32_e32 v0, s2, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, s2, v1
@@ -6039,13 +6037,11 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX6-NEXT:    v_mul_lo_u32 v2, s2, v0
 ; GFX6-NEXT:    s_sub_i32 s2, 0, s7
-; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    v_mul_lo_u32 v3, s2, v1
 ; GFX6-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    v_mul_hi_u32 v3, v1, v3
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v2, v0
-; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    v_mul_hi_u32 v0, s4, v0
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
 ; GFX6-NEXT:    v_mul_hi_u32 v1, s5, v1
@@ -6070,49 +6066,47 @@ define amdgpu_kernel void @urem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou
 ;
 ; GFX9-LABEL: urem_v2i32_pow2_shl_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX9-NEXT:    s_movk_i32 s4, 0x1000
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_movk_i32 s2, 0x1000
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_lshl_b32 s5, s4, s3
-; GFX9-NEXT:    s_lshl_b32 s4, s4, s2
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s4
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s5
-; GFX9-NEXT:    s_mov_b32 s2, 0x4f7ffffe
-; GFX9-NEXT:    s_sub_i32 s3, 0, s5
+; GFX9-NEXT:    s_lshl_b32 s3, s2, s7
+; GFX9-NEXT:    s_lshl_b32 s2, s2, s6
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s3
+; GFX9-NEXT:    s_mov_b32 s6, 0x4f7ffffe
+; GFX9-NEXT:    s_sub_i32 s7, 0, s3
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v1, v1
-; GFX9-NEXT:    v_mul_f32_e32 v0, s2, v0
-; GFX9-NEXT:    v_mul_f32_e32 v1, s2, v1
+; GFX9-NEXT:    v_mul_f32_e32 v0, s6, v0
+; GFX9-NEXT:    v_mul_f32_e32 v1, s6, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    s_sub_i32 s2, 0, s4
-; GFX9-NEXT:    v_mul_lo_u32 v2, s2, v0
-; GFX9-NEXT:    v_mul_lo_u32 v3, s3, v1
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX9-NEXT:    s_sub_i32 s6, 0, s2
+; GFX9-NEXT:    v_mul_lo_u32 v2, s6, v0
+; GFX9-NEXT:    v_mul_lo_u32 v3, s7, v1
 ; GFX9-NEXT:    v_mul_hi_u32 v2, v0, v2
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v1, v3
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    v_add_u32_e32 v0, v0, v2
 ; GFX9-NEXT:    v_add_u32_e32 v1, v1, v3
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mul_hi_u32 v0, s2, v0
-; GFX9-NEXT:    v_mul_hi_u32 v1, s3, v1
+; GFX9-NEXT:    v_mul_hi_u32 v0, s4, v0
+; GFX9-NEXT:    v_mul_hi_u32 v1, s5, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9-NEXT:    v_mul_lo_u32 v0, v0, s4
-; GFX9-NEXT:    v_mul_lo_u32 v1, v1, s5
-; GFX9-NEXT:    v_sub_u32_e32 v0, s2, v0
-; GFX9-NEXT:    v_sub_u32_e32 v1, s3, v1
-; GFX9-NEXT:    v_subrev_u32_e32 v3, s4, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
-; GFX9-NEXT:    v_subrev_u32_e32 v4, s5, v1
+; GFX9-NEXT:    v_mul_lo_u32 v0, v0, s2
+; GFX9-NEXT:    v_mul_lo_u32 v1, v1, s3
+; GFX9-NEXT:    v_sub_u32_e32 v0, s4, v0
+; GFX9-NEXT:    v_sub_u32_e32 v1, s5, v1
+; GFX9-NEXT:    v_subrev_u32_e32 v3, s2, v0
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
+; GFX9-NEXT:    v_subrev_u32_e32 v4, s3, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s3, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v3, s4, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s4, v0
-; GFX9-NEXT:    v_subrev_u32_e32 v4, s5, v1
+; GFX9-NEXT:    v_subrev_u32_e32 v3, s2, v0
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
+; GFX9-NEXT:    v_subrev_u32_e32 v4, s3, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s5, v1
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s3, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9-NEXT:    s_endpgm
@@ -6492,133 +6486,130 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou
 ;
 ; GFX6-LABEL: sdiv_v2i32_pow2_shl_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
-; GFX6-NEXT:    s_movk_i32 s10, 0x1000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
+; GFX6-NEXT:    s_movk_i32 s2, 0x1000
 ; GFX6-NEXT:    s_mov_b32 s12, 0x4f7ffffe
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_mov_b32 s11, 0xf000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshl_b32 s2, s10, s2
-; GFX6-NEXT:    s_ashr_i32 s11, s2, 31
-; GFX6-NEXT:    s_add_i32 s2, s2, s11
-; GFX6-NEXT:    s_xor_b32 s2, s2, s11
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s2
-; GFX6-NEXT:    s_lshl_b32 s0, s10, s3
-; GFX6-NEXT:    s_sub_i32 s10, 0, s2
-; GFX6-NEXT:    s_ashr_i32 s3, s0, 31
+; GFX6-NEXT:    s_lshl_b32 s3, s2, s6
+; GFX6-NEXT:    s_ashr_i32 s6, s3, 31
+; GFX6-NEXT:    s_add_i32 s3, s3, s6
+; GFX6-NEXT:    s_xor_b32 s3, s3, s6
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s3
+; GFX6-NEXT:    s_lshl_b32 s0, s2, s7
+; GFX6-NEXT:    s_sub_i32 s7, 0, s3
+; GFX6-NEXT:    s_ashr_i32 s2, s0, 31
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX6-NEXT:    s_add_i32 s0, s0, s3
-; GFX6-NEXT:    s_ashr_i32 s1, s8, 31
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_add_i32 s0, s0, s2
+; GFX6-NEXT:    s_ashr_i32 s1, s4, 31
+; GFX6-NEXT:    s_mov_b32 s10, -1
 ; GFX6-NEXT:    v_mul_f32_e32 v0, s12, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT:    v_mul_lo_u32 v1, s10, v0
-; GFX6-NEXT:    s_xor_b32 s10, s0, s3
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v2, s10
-; GFX6-NEXT:    s_add_i32 s0, s8, s1
+; GFX6-NEXT:    v_mul_lo_u32 v1, s7, v0
+; GFX6-NEXT:    s_xor_b32 s7, s0, s2
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v2, s7
+; GFX6-NEXT:    s_add_i32 s0, s4, s1
 ; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; GFX6-NEXT:    s_xor_b32 s0, s0, s1
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v2, v2
-; GFX6-NEXT:    s_xor_b32 s8, s1, s11
+; GFX6-NEXT:    s_xor_b32 s4, s1, s6
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
 ; GFX6-NEXT:    v_mul_hi_u32 v0, s0, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, s12, v2
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s2
+; GFX6-NEXT:    v_mul_lo_u32 v2, v0, s3
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s0, v2
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v2
+; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v3, s[0:1]
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s2, v2
+; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s3, v2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; GFX6-NEXT:    s_sub_i32 s0, 0, s10
+; GFX6-NEXT:    s_sub_i32 s0, 0, s7
 ; GFX6-NEXT:    v_mul_lo_u32 v3, s0, v1
-; GFX6-NEXT:    s_ashr_i32 s0, s9, 31
-; GFX6-NEXT:    s_add_i32 s1, s9, s0
+; GFX6-NEXT:    s_ashr_i32 s0, s5, 31
+; GFX6-NEXT:    s_add_i32 s1, s5, s0
 ; GFX6-NEXT:    s_xor_b32 s1, s1, s0
 ; GFX6-NEXT:    v_mul_hi_u32 v3, v1, v3
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
+; GFX6-NEXT:    s_xor_b32 s2, s0, s2
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v3, v1
 ; GFX6-NEXT:    v_mul_hi_u32 v1, s1, v1
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s2, v2
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
-; GFX6-NEXT:    s_xor_b32 s2, s0, s3
-; GFX6-NEXT:    v_mul_lo_u32 v2, v1, s10
+; GFX6-NEXT:    v_xor_b32_e32 v0, s4, v0
+; GFX6-NEXT:    v_mul_lo_u32 v2, v1, s7
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
-; GFX6-NEXT:    v_xor_b32_e32 v0, s8, v0
+; GFX6-NEXT:    v_subrev_i32_e32 v0, vcc, s4, v0
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, s1, v2
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v2
+; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s7, v2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v3, s[0:1]
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s10, v2
-; GFX6-NEXT:    v_subrev_i32_e32 v0, vcc, s8, v0
+; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s7, v2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, 1, v1
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s10, v2
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s7, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX6-NEXT:    v_xor_b32_e32 v1, s2, v1
 ; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, s2, v1
-; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[8:11], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: sdiv_v2i32_pow2_shl_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX9-NEXT:    s_movk_i32 s8, 0x1000
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    s_movk_i32 s0, 0x1000
 ; GFX9-NEXT:    s_mov_b32 s10, 0x4f7ffffe
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_lshl_b32 s2, s8, s2
-; GFX9-NEXT:    s_ashr_i32 s9, s2, 31
-; GFX9-NEXT:    s_add_i32 s2, s2, s9
-; GFX9-NEXT:    s_xor_b32 s2, s2, s9
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s2
-; GFX9-NEXT:    s_lshl_b32 s0, s8, s3
-; GFX9-NEXT:    s_ashr_i32 s1, s0, 31
-; GFX9-NEXT:    s_add_i32 s0, s0, s1
+; GFX9-NEXT:    s_lshl_b32 s1, s0, s6
+; GFX9-NEXT:    s_ashr_i32 s6, s1, 31
+; GFX9-NEXT:    s_add_i32 s1, s1, s6
+; GFX9-NEXT:    s_xor_b32 s1, s1, s6
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s1
+; GFX9-NEXT:    s_lshl_b32 s0, s0, s7
+; GFX9-NEXT:    s_ashr_i32 s8, s0, 31
+; GFX9-NEXT:    s_add_i32 s0, s0, s8
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX9-NEXT:    s_xor_b32 s0, s0, s1
+; GFX9-NEXT:    s_xor_b32 s0, s0, s8
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s0
-; GFX9-NEXT:    s_sub_i32 s3, 0, s2
+; GFX9-NEXT:    s_sub_i32 s9, 0, s1
 ; GFX9-NEXT:    v_mul_f32_e32 v0, s10, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v1, v1
-; GFX9-NEXT:    s_sub_i32 s8, 0, s0
-; GFX9-NEXT:    v_mul_lo_u32 v3, s3, v0
+; GFX9-NEXT:    s_ashr_i32 s7, s4, 31
+; GFX9-NEXT:    s_add_i32 s4, s4, s7
+; GFX9-NEXT:    v_mul_lo_u32 v3, s9, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, s10, v1
-; GFX9-NEXT:    s_ashr_i32 s3, s6, 31
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
+; GFX9-NEXT:    s_xor_b32 s4, s4, s7
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v0, v3
-; GFX9-NEXT:    s_add_i32 s6, s6, s3
-; GFX9-NEXT:    s_xor_b32 s6, s6, s3
-; GFX9-NEXT:    s_xor_b32 s3, s3, s9
+; GFX9-NEXT:    s_sub_i32 s10, 0, s0
+; GFX9-NEXT:    s_ashr_i32 s9, s5, 31
+; GFX9-NEXT:    s_add_i32 s5, s5, s9
 ; GFX9-NEXT:    v_add_u32_e32 v0, v0, v3
-; GFX9-NEXT:    v_mul_hi_u32 v0, s6, v0
-; GFX9-NEXT:    v_mul_lo_u32 v3, s8, v1
-; GFX9-NEXT:    s_ashr_i32 s8, s7, 31
-; GFX9-NEXT:    s_xor_b32 s1, s8, s1
-; GFX9-NEXT:    v_mul_lo_u32 v4, v0, s2
+; GFX9-NEXT:    v_mul_hi_u32 v0, s4, v0
+; GFX9-NEXT:    v_mul_lo_u32 v3, s10, v1
+; GFX9-NEXT:    s_xor_b32 s6, s7, s6
+; GFX9-NEXT:    v_mul_lo_u32 v4, v0, s1
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v1, v3
 ; GFX9-NEXT:    v_add_u32_e32 v5, 1, v0
-; GFX9-NEXT:    v_sub_u32_e32 v4, s6, v4
-; GFX9-NEXT:    s_add_i32 s6, s7, s8
-; GFX9-NEXT:    s_xor_b32 s6, s6, s8
+; GFX9-NEXT:    v_sub_u32_e32 v4, s4, v4
+; GFX9-NEXT:    s_xor_b32 s4, s5, s9
 ; GFX9-NEXT:    v_add_u32_e32 v1, v1, v3
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s2, v4
-; GFX9-NEXT:    v_mul_hi_u32 v1, s6, v1
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s1, v4
+; GFX9-NEXT:    v_mul_hi_u32 v1, s4, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
-; GFX9-NEXT:    v_subrev_u32_e32 v5, s2, v4
+; GFX9-NEXT:    v_subrev_u32_e32 v5, s1, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v3, 1, v0
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s2, v4
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s1, v4
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v3, v1, s0
 ; GFX9-NEXT:    v_add_u32_e32 v4, 1, v1
-; GFX9-NEXT:    v_xor_b32_e32 v0, s3, v0
-; GFX9-NEXT:    v_subrev_u32_e32 v0, s3, v0
-; GFX9-NEXT:    v_sub_u32_e32 v3, s6, v3
+; GFX9-NEXT:    s_xor_b32 s1, s9, s8
+; GFX9-NEXT:    v_xor_b32_e32 v0, s6, v0
+; GFX9-NEXT:    v_sub_u32_e32 v3, s4, v3
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s0, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v4, s0, v3
@@ -6627,8 +6618,9 @@ define amdgpu_kernel void @sdiv_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s0, v3
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v4, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v1, s1, v1
+; GFX9-NEXT:    v_subrev_u32_e32 v0, s6, v0
 ; GFX9-NEXT:    v_subrev_u32_e32 v1, s1, v1
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
 ; GFX9-NEXT:    s_endpgm
   %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
   %r = sdiv <2 x i32> %x, %shl.y
@@ -6951,120 +6943,117 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou
 ;
 ; GFX6-LABEL: srem_v2i32_pow2_shl_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
-; GFX6-NEXT:    s_movk_i32 s6, 0x1000
-; GFX6-NEXT:    s_mov_b32 s10, 0x4f7ffffe
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
+; GFX6-NEXT:    s_movk_i32 s8, 0x1000
+; GFX6-NEXT:    s_mov_b32 s11, 0x4f7ffffe
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshl_b32 s2, s6, s2
-; GFX6-NEXT:    s_ashr_i32 s4, s2, 31
-; GFX6-NEXT:    s_add_i32 s2, s2, s4
-; GFX6-NEXT:    s_xor_b32 s2, s2, s4
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s2
-; GFX6-NEXT:    s_lshl_b32 s3, s6, s3
-; GFX6-NEXT:    s_ashr_i32 s6, s3, 31
-; GFX6-NEXT:    s_add_i32 s3, s3, s6
+; GFX6-NEXT:    s_lshl_b32 s2, s8, s6
+; GFX6-NEXT:    s_ashr_i32 s3, s2, 31
+; GFX6-NEXT:    s_add_i32 s2, s2, s3
+; GFX6-NEXT:    s_xor_b32 s6, s2, s3
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s6
+; GFX6-NEXT:    s_lshl_b32 s7, s8, s7
+; GFX6-NEXT:    s_ashr_i32 s9, s7, 31
+; GFX6-NEXT:    s_add_i32 s7, s7, s9
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX6-NEXT:    s_sub_i32 s9, 0, s2
-; GFX6-NEXT:    s_xor_b32 s3, s3, s6
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v2, s3
-; GFX6-NEXT:    v_mul_f32_e32 v0, s10, v0
+; GFX6-NEXT:    s_sub_i32 s10, 0, s6
+; GFX6-NEXT:    s_xor_b32 s7, s7, s9
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v2, s7
+; GFX6-NEXT:    v_mul_f32_e32 v0, s11, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; GFX6-NEXT:    s_ashr_i32 s8, s4, 31
 ; GFX6-NEXT:    v_rcp_iflag_f32_e32 v2, v2
-; GFX6-NEXT:    s_mov_b32 s6, -1
-; GFX6-NEXT:    v_mul_lo_u32 v1, s9, v0
-; GFX6-NEXT:    s_sub_i32 s9, 0, s3
-; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_ashr_i32 s8, s0, 31
-; GFX6-NEXT:    s_add_i32 s0, s0, s8
+; GFX6-NEXT:    s_add_i32 s4, s4, s8
+; GFX6-NEXT:    v_mul_lo_u32 v1, s10, v0
+; GFX6-NEXT:    s_xor_b32 s4, s4, s8
+; GFX6-NEXT:    s_sub_i32 s10, 0, s7
+; GFX6-NEXT:    s_ashr_i32 s9, s5, 31
 ; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
-; GFX6-NEXT:    s_xor_b32 s0, s0, s8
+; GFX6-NEXT:    s_mov_b32 s3, 0xf000
+; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; GFX6-NEXT:    v_mul_f32_e32 v1, s10, v2
+; GFX6-NEXT:    v_mul_f32_e32 v1, s11, v2
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT:    v_mul_hi_u32 v0, s0, v0
-; GFX6-NEXT:    v_mul_lo_u32 v2, s9, v1
-; GFX6-NEXT:    v_mul_lo_u32 v0, v0, s2
+; GFX6-NEXT:    v_mul_hi_u32 v0, s4, v0
+; GFX6-NEXT:    v_mul_lo_u32 v2, s10, v1
+; GFX6-NEXT:    v_mul_lo_u32 v0, v0, s6
 ; GFX6-NEXT:    v_mul_hi_u32 v2, v1, v2
-; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s0, v0
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s2, v0
-; GFX6-NEXT:    s_ashr_i32 s0, s1, 31
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
-; GFX6-NEXT:    s_add_i32 s1, s1, s0
+; GFX6-NEXT:    v_sub_i32_e32 v0, vcc, s4, v0
+; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s6, v0
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; GFX6-NEXT:    s_add_i32 s4, s5, s9
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GFX6-NEXT:    s_xor_b32 s1, s1, s0
+; GFX6-NEXT:    s_xor_b32 s4, s4, s9
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
-; GFX6-NEXT:    v_mul_hi_u32 v1, s1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s2, v0
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s2, v0
-; GFX6-NEXT:    v_mul_lo_u32 v1, v1, s3
+; GFX6-NEXT:    v_mul_hi_u32 v1, s4, v1
+; GFX6-NEXT:    v_subrev_i32_e32 v3, vcc, s6, v0
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s6, v0
+; GFX6-NEXT:    v_mul_lo_u32 v1, v1, s7
 ; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX6-NEXT:    v_xor_b32_e32 v0, s8, v0
 ; GFX6-NEXT:    v_subrev_i32_e32 v0, vcc, s8, v0
-; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s1, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, s3, v1
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s3, v1
+; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s4, v1
+; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, s7, v1
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s7, v1
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, s3, v1
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s3, v1
+; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, s7, v1
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s7, v1
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX6-NEXT:    v_xor_b32_e32 v1, s0, v1
-; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, s0, v1
-; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX6-NEXT:    v_xor_b32_e32 v1, s9, v1
+; GFX6-NEXT:    v_subrev_i32_e32 v1, vcc, s9, v1
+; GFX6-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: srem_v2i32_pow2_shl_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX9-NEXT:    s_movk_i32 s0, 0x1000
 ; GFX9-NEXT:    s_mov_b32 s8, 0x4f7ffffe
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_lshl_b32 s1, s0, s2
-; GFX9-NEXT:    s_ashr_i32 s2, s1, 31
-; GFX9-NEXT:    s_add_i32 s1, s1, s2
-; GFX9-NEXT:    s_xor_b32 s1, s1, s2
+; GFX9-NEXT:    s_lshl_b32 s1, s0, s6
+; GFX9-NEXT:    s_ashr_i32 s6, s1, 31
+; GFX9-NEXT:    s_add_i32 s1, s1, s6
+; GFX9-NEXT:    s_xor_b32 s1, s1, s6
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s1
-; GFX9-NEXT:    s_lshl_b32 s0, s0, s3
-; GFX9-NEXT:    s_ashr_i32 s2, s0, 31
-; GFX9-NEXT:    s_add_i32 s0, s0, s2
-; GFX9-NEXT:    s_xor_b32 s0, s0, s2
+; GFX9-NEXT:    s_lshl_b32 s0, s0, s7
+; GFX9-NEXT:    s_ashr_i32 s6, s0, 31
+; GFX9-NEXT:    s_add_i32 s0, s0, s6
+; GFX9-NEXT:    s_xor_b32 s0, s0, s6
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s0
-; GFX9-NEXT:    s_sub_i32 s3, 0, s1
-; GFX9-NEXT:    s_ashr_i32 s2, s6, 31
+; GFX9-NEXT:    s_sub_i32 s7, 0, s1
+; GFX9-NEXT:    s_ashr_i32 s6, s4, 31
 ; GFX9-NEXT:    v_mul_f32_e32 v0, s8, v0
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v1, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    s_add_i32 s4, s4, s6
+; GFX9-NEXT:    s_xor_b32 s4, s4, s6
 ; GFX9-NEXT:    v_mul_f32_e32 v1, s8, v1
-; GFX9-NEXT:    v_mul_lo_u32 v3, s3, v0
+; GFX9-NEXT:    v_mul_lo_u32 v3, s7, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX9-NEXT:    s_add_i32 s3, s6, s2
-; GFX9-NEXT:    s_sub_i32 s6, 0, s0
+; GFX9-NEXT:    s_sub_i32 s7, 0, s0
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v0, v3
-; GFX9-NEXT:    v_mul_lo_u32 v4, s6, v1
-; GFX9-NEXT:    s_xor_b32 s3, s3, s2
-; GFX9-NEXT:    s_ashr_i32 s6, s7, 31
+; GFX9-NEXT:    v_mul_lo_u32 v4, s7, v1
+; GFX9-NEXT:    s_ashr_i32 s7, s5, 31
+; GFX9-NEXT:    s_add_i32 s5, s5, s7
 ; GFX9-NEXT:    v_add_u32_e32 v0, v0, v3
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v1, v4
-; GFX9-NEXT:    v_mul_hi_u32 v0, s3, v0
-; GFX9-NEXT:    s_add_i32 s7, s7, s6
-; GFX9-NEXT:    s_xor_b32 s7, s7, s6
+; GFX9-NEXT:    v_mul_hi_u32 v0, s4, v0
+; GFX9-NEXT:    s_xor_b32 s5, s5, s7
 ; GFX9-NEXT:    v_add_u32_e32 v1, v1, v3
-; GFX9-NEXT:    v_mul_hi_u32 v1, s7, v1
+; GFX9-NEXT:    v_mul_hi_u32 v1, s5, v1
 ; GFX9-NEXT:    v_mul_lo_u32 v0, v0, s1
 ; GFX9-NEXT:    v_mul_lo_u32 v1, v1, s0
-; GFX9-NEXT:    v_sub_u32_e32 v0, s3, v0
+; GFX9-NEXT:    v_sub_u32_e32 v0, s4, v0
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s1, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s1, v0
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s1, v0
-; GFX9-NEXT:    v_sub_u32_e32 v1, s7, v1
+; GFX9-NEXT:    v_sub_u32_e32 v1, s5, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s0, v1
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s0, v1
@@ -7072,11 +7061,11 @@ define amdgpu_kernel void @srem_v2i32_pow2_shl_denom(<2 x i32> addrspace(1)* %ou
 ; GFX9-NEXT:    v_subrev_u32_e32 v3, s0, v1
 ; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s0, v1
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; GFX9-NEXT:    v_xor_b32_e32 v0, s2, v0
-; GFX9-NEXT:    v_xor_b32_e32 v1, s6, v1
-; GFX9-NEXT:    v_subrev_u32_e32 v0, s2, v0
-; GFX9-NEXT:    v_subrev_u32_e32 v1, s6, v1
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[4:5]
+; GFX9-NEXT:    v_xor_b32_e32 v0, s6, v0
+; GFX9-NEXT:    v_xor_b32_e32 v1, s7, v1
+; GFX9-NEXT:    v_subrev_u32_e32 v0, s6, v0
+; GFX9-NEXT:    v_subrev_u32_e32 v1, s7, v1
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
 ; GFX9-NEXT:    s_endpgm
   %shl.y = shl <2 x i32> <i32 4096, i32 4096>, %y
   %r = srem <2 x i32> %x, %shl.y
@@ -7703,16 +7692,15 @@ define amdgpu_kernel void @udiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ;
 ; GFX6-LABEL: udiv_v2i64_pow2_shl_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x11
-; GFX6-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
 ; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; GFX6-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX6-NEXT:    s_mov_b32 s2, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_add_i32 s4, s4, 12
-; GFX6-NEXT:    s_add_i32 s6, s6, 12
-; GFX6-NEXT:    s_lshr_b64 s[4:5], s[8:9], s4
-; GFX6-NEXT:    s_lshr_b64 s[6:7], s[10:11], s6
+; GFX6-NEXT:    s_add_i32 s8, s8, 12
+; GFX6-NEXT:    s_add_i32 s9, s10, 12
+; GFX6-NEXT:    s_lshr_b64 s[4:5], s[4:5], s8
+; GFX6-NEXT:    s_lshr_b64 s[6:7], s[6:7], s9
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s4
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s6
@@ -7722,20 +7710,19 @@ define amdgpu_kernel void @udiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ;
 ; GFX9-LABEL: udiv_v2i64_pow2_shl_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
-; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_add_i32 s0, s4, 12
-; GFX9-NEXT:    s_add_i32 s4, s6, 12
-; GFX9-NEXT:    s_lshr_b64 s[0:1], s[8:9], s0
-; GFX9-NEXT:    s_lshr_b64 s[4:5], s[10:11], s4
-; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    s_add_i32 s2, s8, 12
+; GFX9-NEXT:    s_add_i32 s8, s10, 12
+; GFX9-NEXT:    s_lshr_b64 s[2:3], s[4:5], s2
+; GFX9-NEXT:    s_lshr_b64 s[4:5], s[6:7], s8
+; GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s5
-; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[2:3]
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
 ; GFX9-NEXT:    s_endpgm
   %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
   %r = udiv <2 x i64> %x, %shl.y
@@ -8135,49 +8122,47 @@ define amdgpu_kernel void @urem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ;
 ; GFX6-LABEL: urem_v2i64_pow2_shl_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0xd
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x11
+; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0xd
 ; GFX6-NEXT:    s_mov_b64 s[12:13], 0x1000
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_mov_b32 s11, 0xf000
+; GFX6-NEXT:    s_mov_b32 s10, -1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshl_b64 s[2:3], s[12:13], s2
-; GFX6-NEXT:    s_lshl_b64 s[0:1], s[12:13], s0
-; GFX6-NEXT:    s_add_u32 s0, s0, -1
-; GFX6-NEXT:    s_addc_u32 s1, s1, -1
-; GFX6-NEXT:    s_and_b64 s[0:1], s[8:9], s[0:1]
-; GFX6-NEXT:    s_add_u32 s2, s2, -1
-; GFX6-NEXT:    s_addc_u32 s3, s3, -1
-; GFX6-NEXT:    s_and_b64 s[2:3], s[10:11], s[2:3]
+; GFX6-NEXT:    s_lshl_b64 s[6:7], s[12:13], s6
+; GFX6-NEXT:    s_lshl_b64 s[4:5], s[12:13], s4
+; GFX6-NEXT:    s_add_u32 s4, s4, -1
+; GFX6-NEXT:    s_addc_u32 s5, s5, -1
+; GFX6-NEXT:    s_and_b64 s[0:1], s[0:1], s[4:5]
+; GFX6-NEXT:    s_add_u32 s4, s6, -1
+; GFX6-NEXT:    s_addc_u32 s5, s7, -1
+; GFX6-NEXT:    s_and_b64 s[2:3], s[2:3], s[4:5]
 ; GFX6-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX6-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX6-NEXT:    v_mov_b32_e32 v3, s3
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: urem_v2i64_pow2_shl_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x34
-; GFX9-NEXT:    s_mov_b64 s[0:1], 0x1000
+; GFX9-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
+; GFX9-NEXT:    s_mov_b64 s[2:3], 0x1000
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_lshl_b64 s[6:7], s[0:1], s6
-; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
-; GFX9-NEXT:    s_add_u32 s0, s0, -1
-; GFX9-NEXT:    s_addc_u32 s1, s1, -1
-; GFX9-NEXT:    s_and_b64 s[0:1], s[8:9], s[0:1]
-; GFX9-NEXT:    s_add_u32 s4, s6, -1
-; GFX9-NEXT:    s_addc_u32 s5, s7, -1
-; GFX9-NEXT:    s_and_b64 s[4:5], s[10:11], s[4:5]
-; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    s_lshl_b64 s[10:11], s[2:3], s10
+; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
+; GFX9-NEXT:    s_add_u32 s2, s2, -1
+; GFX9-NEXT:    s_addc_u32 s3, s3, -1
+; GFX9-NEXT:    s_and_b64 s[2:3], s[4:5], s[2:3]
+; GFX9-NEXT:    s_add_u32 s4, s10, -1
+; GFX9-NEXT:    s_addc_u32 s5, s11, -1
+; GFX9-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
+; GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s5
-; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[2:3]
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
 ; GFX9-NEXT:    s_endpgm
   %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
   %r = urem <2 x i64> %x, %shl.y
@@ -9091,42 +9076,42 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ;
 ; GFX6-LABEL: sdiv_v2i64_pow2_shl_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x11
-; GFX6-NEXT:    s_mov_b64 s[2:3], 0x1000
+; GFX6-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
+; GFX6-NEXT:    s_mov_b64 s[12:13], 0x1000
 ; GFX6-NEXT:    s_mov_b32 s18, 0x4f800000
 ; GFX6-NEXT:    s_mov_b32 s19, 0x5f7ffffc
 ; GFX6-NEXT:    s_mov_b32 s20, 0x2f800000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshl_b64 s[8:9], s[2:3], s6
-; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], s4
-; GFX6-NEXT:    s_ashr_i32 s12, s3, 31
-; GFX6-NEXT:    s_add_u32 s2, s2, s12
-; GFX6-NEXT:    s_mov_b32 s13, s12
-; GFX6-NEXT:    s_addc_u32 s3, s3, s12
-; GFX6-NEXT:    s_xor_b64 s[10:11], s[2:3], s[12:13]
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s10
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, s11
+; GFX6-NEXT:    s_lshl_b64 s[8:9], s[12:13], s8
+; GFX6-NEXT:    s_lshl_b64 s[2:3], s[12:13], s10
+; GFX6-NEXT:    s_ashr_i32 s14, s9, 31
+; GFX6-NEXT:    s_add_u32 s8, s8, s14
+; GFX6-NEXT:    s_mov_b32 s15, s14
+; GFX6-NEXT:    s_addc_u32 s9, s9, s14
+; GFX6-NEXT:    s_xor_b64 s[12:13], s[8:9], s[14:15]
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s12
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, s13
 ; GFX6-NEXT:    s_mov_b32 s21, 0xcf800000
-; GFX6-NEXT:    s_sub_u32 s6, 0, s10
-; GFX6-NEXT:    s_subb_u32 s7, 0, s11
+; GFX6-NEXT:    s_sub_u32 s10, 0, s12
+; GFX6-NEXT:    s_subb_u32 s11, 0, s13
 ; GFX6-NEXT:    v_mac_f32_e32 v0, s18, v1
 ; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX6-NEXT:    s_ashr_i32 s16, s5, 31
+; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
+; GFX6-NEXT:    s_add_u32 s0, s4, s16
 ; GFX6-NEXT:    v_mul_f32_e32 v0, s19, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, s20, v0
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
 ; GFX6-NEXT:    v_mac_f32_e32 v0, s21, v1
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v2, v0
-; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_ashr_i32 s14, s1, 31
-; GFX6-NEXT:    s_add_u32 s0, s0, s14
-; GFX6-NEXT:    v_mul_lo_u32 v0, s6, v1
-; GFX6-NEXT:    v_mul_hi_u32 v3, s6, v2
-; GFX6-NEXT:    v_mul_lo_u32 v4, s7, v2
-; GFX6-NEXT:    v_mul_lo_u32 v5, s6, v2
-; GFX6-NEXT:    s_mov_b32 s15, s14
+; GFX6-NEXT:    s_mov_b32 s17, s16
+; GFX6-NEXT:    s_addc_u32 s1, s5, s16
+; GFX6-NEXT:    v_mul_lo_u32 v0, s10, v1
+; GFX6-NEXT:    v_mul_hi_u32 v3, s10, v2
+; GFX6-NEXT:    v_mul_lo_u32 v4, s11, v2
+; GFX6-NEXT:    v_mul_lo_u32 v5, s10, v2
+; GFX6-NEXT:    s_xor_b64 s[4:5], s[0:1], s[16:17]
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v2, v3
@@ -9138,8 +9123,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v6, vcc
 ; GFX6-NEXT:    v_mul_lo_u32 v6, v1, v5
 ; GFX6-NEXT:    v_mul_hi_u32 v5, v1, v5
-; GFX6-NEXT:    s_addc_u32 s1, s1, s14
-; GFX6-NEXT:    s_xor_b64 s[16:17], s[0:1], s[14:15]
+; GFX6-NEXT:    s_xor_b64 s[14:15], s[16:17], s[14:15]
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
 ; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
 ; GFX6-NEXT:    v_mov_b32_e32 v0, 0
@@ -9148,13 +9132,12 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v3, s6, v1
-; GFX6-NEXT:    v_mul_hi_u32 v4, s6, v2
-; GFX6-NEXT:    v_mul_lo_u32 v5, s7, v2
-; GFX6-NEXT:    s_xor_b64 s[14:15], s[14:15], s[12:13]
-; GFX6-NEXT:    s_ashr_i32 s12, s9, 31
+; GFX6-NEXT:    v_mul_lo_u32 v3, s10, v1
+; GFX6-NEXT:    v_mul_hi_u32 v4, s10, v2
+; GFX6-NEXT:    v_mul_lo_u32 v5, s11, v2
+; GFX6-NEXT:    s_mov_b32 s11, 0xf000
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; GFX6-NEXT:    v_mul_lo_u32 v4, s6, v2
+; GFX6-NEXT:    v_mul_lo_u32 v4, s10, v2
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v5, v3
 ; GFX6-NEXT:    v_mul_lo_u32 v7, v2, v3
 ; GFX6-NEXT:    v_mul_hi_u32 v8, v2, v4
@@ -9172,57 +9155,59 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v3, s16, v1
-; GFX6-NEXT:    v_mul_hi_u32 v4, s16, v2
-; GFX6-NEXT:    v_mul_hi_u32 v5, s16, v1
-; GFX6-NEXT:    v_mul_hi_u32 v6, s17, v1
-; GFX6-NEXT:    v_mul_lo_u32 v1, s17, v1
+; GFX6-NEXT:    v_mul_lo_u32 v3, s4, v1
+; GFX6-NEXT:    v_mul_hi_u32 v4, s4, v2
+; GFX6-NEXT:    v_mul_hi_u32 v5, s4, v1
+; GFX6-NEXT:    v_mul_hi_u32 v6, s5, v1
+; GFX6-NEXT:    v_mul_lo_u32 v1, s5, v1
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
 ; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v5, s17, v2
-; GFX6-NEXT:    v_mul_hi_u32 v2, s17, v2
-; GFX6-NEXT:    s_add_u32 s8, s8, s12
-; GFX6-NEXT:    s_mov_b32 s13, s12
+; GFX6-NEXT:    v_mul_lo_u32 v5, s5, v2
+; GFX6-NEXT:    v_mul_hi_u32 v2, s5, v2
+; GFX6-NEXT:    s_mov_b32 s10, -1
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
 ; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, v4, v2, vcc
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v6, v0, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v1, vcc, v2, v1
 ; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, 0, v3, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v3, s10, v2
-; GFX6-NEXT:    v_mul_hi_u32 v4, s10, v1
-; GFX6-NEXT:    v_mul_lo_u32 v5, s11, v1
-; GFX6-NEXT:    v_mov_b32_e32 v6, s11
-; GFX6-NEXT:    s_addc_u32 s9, s9, s12
+; GFX6-NEXT:    v_mul_lo_u32 v3, s12, v2
+; GFX6-NEXT:    v_mul_hi_u32 v4, s12, v1
+; GFX6-NEXT:    v_mul_lo_u32 v5, s13, v1
+; GFX6-NEXT:    v_mov_b32_e32 v6, s13
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
-; GFX6-NEXT:    v_mul_lo_u32 v4, s10, v1
+; GFX6-NEXT:    v_mul_lo_u32 v4, s12, v1
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s17, v3
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s16, v4
+; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s5, v3
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s4, v4
 ; GFX6-NEXT:    v_subb_u32_e64 v5, s[0:1], v5, v6, vcc
-; GFX6-NEXT:    v_subrev_i32_e64 v6, s[0:1], s10, v4
+; GFX6-NEXT:    v_subrev_i32_e64 v6, s[0:1], s12, v4
 ; GFX6-NEXT:    v_subbrev_u32_e64 v5, s[0:1], 0, v5, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v5
+; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s13, v5
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v6
+; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s12, v6
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
-; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v5
+; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], s13, v5
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, v7, v6, s[0:1]
 ; GFX6-NEXT:    v_add_i32_e64 v6, s[0:1], 2, v1
 ; GFX6-NEXT:    v_addc_u32_e64 v7, s[0:1], 0, v2, s[0:1]
 ; GFX6-NEXT:    v_add_i32_e64 v8, s[0:1], 1, v1
 ; GFX6-NEXT:    v_addc_u32_e64 v9, s[0:1], 0, v2, s[0:1]
+; GFX6-NEXT:    s_ashr_i32 s4, s3, 31
 ; GFX6-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v5
-; GFX6-NEXT:    s_xor_b64 s[8:9], s[8:9], s[12:13]
+; GFX6-NEXT:    s_add_u32 s2, s2, s4
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, v9, v7, s[0:1]
-; GFX6-NEXT:    v_mov_b32_e32 v7, s17
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v9, s8
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v10, s9
+; GFX6-NEXT:    v_mov_b32_e32 v7, s5
+; GFX6-NEXT:    s_mov_b32 s5, s4
+; GFX6-NEXT:    s_addc_u32 s3, s3, s4
+; GFX6-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v9, s2
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v10, s3
 ; GFX6-NEXT:    v_subb_u32_e32 v3, vcc, v7, v3, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s11, v3
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s13, v3
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s10, v4
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s12, v4
 ; GFX6-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s11, v3
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s13, v3
 ; GFX6-NEXT:    v_mac_f32_e32 v9, s18, v10
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, v7, v4, vcc
 ; GFX6-NEXT:    v_rcp_f32_e32 v4, v9
@@ -9235,13 +9220,13 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_mac_f32_e32 v4, s21, v5
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GFX6-NEXT:    s_sub_u32 s0, 0, s8
+; GFX6-NEXT:    s_sub_u32 s0, 0, s2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX6-NEXT:    v_mul_hi_u32 v3, s0, v4
 ; GFX6-NEXT:    v_mul_lo_u32 v6, s0, v5
-; GFX6-NEXT:    s_subb_u32 s1, 0, s9
+; GFX6-NEXT:    s_subb_u32 s1, 0, s3
 ; GFX6-NEXT:    v_mul_lo_u32 v7, s1, v4
-; GFX6-NEXT:    s_ashr_i32 s10, s3, 31
+; GFX6-NEXT:    s_ashr_i32 s12, s7, 31
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
 ; GFX6-NEXT:    v_mul_lo_u32 v6, s0, v4
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
@@ -9254,7 +9239,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_addc_u32_e32 v8, vcc, 0, v9, vcc
 ; GFX6-NEXT:    v_mul_lo_u32 v9, v5, v6
 ; GFX6-NEXT:    v_mul_hi_u32 v6, v5, v6
-; GFX6-NEXT:    s_mov_b32 s11, s10
+; GFX6-NEXT:    s_mov_b32 s13, s12
 ; GFX6-NEXT:    v_xor_b32_e32 v1, s14, v1
 ; GFX6-NEXT:    v_add_i32_e32 v7, vcc, v7, v9
 ; GFX6-NEXT:    v_addc_u32_e32 v6, vcc, v8, v6, vcc
@@ -9267,7 +9252,6 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_mul_hi_u32 v6, s0, v3
 ; GFX6-NEXT:    v_mul_lo_u32 v7, s1, v3
 ; GFX6-NEXT:    v_xor_b32_e32 v2, s15, v2
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
 ; GFX6-NEXT:    v_mul_lo_u32 v6, s0, v3
 ; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v7, v5
@@ -9285,46 +9269,45 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, v7, v0, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
 ; GFX6-NEXT:    v_addc_u32_e32 v6, vcc, 0, v7, vcc
-; GFX6-NEXT:    s_add_u32 s0, s2, s10
+; GFX6-NEXT:    s_add_u32 s0, s6, s12
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; GFX6-NEXT:    s_addc_u32 s1, s3, s10
+; GFX6-NEXT:    s_addc_u32 s1, s7, s12
 ; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, v4, v6, vcc
-; GFX6-NEXT:    s_xor_b64 s[2:3], s[0:1], s[10:11]
-; GFX6-NEXT:    v_mul_lo_u32 v5, s2, v4
-; GFX6-NEXT:    v_mul_hi_u32 v6, s2, v3
-; GFX6-NEXT:    v_mul_hi_u32 v8, s2, v4
-; GFX6-NEXT:    v_mul_hi_u32 v9, s3, v4
-; GFX6-NEXT:    v_mul_lo_u32 v4, s3, v4
+; GFX6-NEXT:    s_xor_b64 s[6:7], s[0:1], s[12:13]
+; GFX6-NEXT:    v_mul_lo_u32 v5, s6, v4
+; GFX6-NEXT:    v_mul_hi_u32 v6, s6, v3
+; GFX6-NEXT:    v_mul_hi_u32 v8, s6, v4
+; GFX6-NEXT:    v_mul_hi_u32 v9, s7, v4
+; GFX6-NEXT:    v_mul_lo_u32 v4, s7, v4
 ; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
 ; GFX6-NEXT:    v_addc_u32_e32 v6, vcc, 0, v8, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v8, s3, v3
-; GFX6-NEXT:    v_mul_hi_u32 v3, s3, v3
+; GFX6-NEXT:    v_mul_lo_u32 v8, s7, v3
+; GFX6-NEXT:    v_mul_hi_u32 v3, s7, v3
 ; GFX6-NEXT:    v_mov_b32_e32 v7, s15
-; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v6, v3, vcc
 ; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, v9, v0, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
 ; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v0, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v5, s8, v4
-; GFX6-NEXT:    v_mul_hi_u32 v6, s8, v3
+; GFX6-NEXT:    v_mul_lo_u32 v5, s2, v4
+; GFX6-NEXT:    v_mul_hi_u32 v6, s2, v3
 ; GFX6-NEXT:    v_subrev_i32_e32 v0, vcc, s14, v1
 ; GFX6-NEXT:    v_subb_u32_e32 v1, vcc, v2, v7, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, s9, v3
+; GFX6-NEXT:    v_mul_lo_u32 v2, s3, v3
 ; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
-; GFX6-NEXT:    v_mov_b32_e32 v7, s9
+; GFX6-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v5, v2
-; GFX6-NEXT:    v_mul_lo_u32 v5, s8, v3
-; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, s3, v2
-; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s2, v5
+; GFX6-NEXT:    v_mul_lo_u32 v5, s2, v3
+; GFX6-NEXT:    v_sub_i32_e32 v6, vcc, s7, v2
+; GFX6-NEXT:    v_sub_i32_e32 v5, vcc, s6, v5
 ; GFX6-NEXT:    v_subb_u32_e64 v6, s[0:1], v6, v7, vcc
-; GFX6-NEXT:    v_subrev_i32_e64 v7, s[0:1], s8, v5
+; GFX6-NEXT:    v_subrev_i32_e64 v7, s[0:1], s2, v5
 ; GFX6-NEXT:    v_subbrev_u32_e64 v6, s[0:1], 0, v6, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v6
+; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v6
 ; GFX6-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v7
+; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s2, v7
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[0:1]
-; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v6
+; GFX6-NEXT:    v_cmp_eq_u32_e64 s[0:1], s3, v6
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, v8, v7, s[0:1]
 ; GFX6-NEXT:    v_add_i32_e64 v7, s[0:1], 2, v3
 ; GFX6-NEXT:    v_addc_u32_e64 v8, s[0:1], 0, v4, s[0:1]
@@ -9332,59 +9315,58 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_addc_u32_e64 v10, s[0:1], 0, v4, s[0:1]
 ; GFX6-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v6
 ; GFX6-NEXT:    v_cndmask_b32_e64 v6, v10, v8, s[0:1]
-; GFX6-NEXT:    v_mov_b32_e32 v8, s3
+; GFX6-NEXT:    v_mov_b32_e32 v8, s7
 ; GFX6-NEXT:    v_subb_u32_e32 v2, vcc, v8, v2, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s9, v2
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s3, v2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s8, v5
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s2, v5
 ; GFX6-NEXT:    v_cndmask_b32_e64 v5, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s9, v2
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s3, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v8, v5, vcc
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v4, v6, vcc
 ; GFX6-NEXT:    v_cndmask_b32_e64 v4, v9, v7, s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; GFX6-NEXT:    s_xor_b64 s[0:1], s[10:11], s[12:13]
+; GFX6-NEXT:    s_xor_b64 s[0:1], s[12:13], s[4:5]
 ; GFX6-NEXT:    v_xor_b32_e32 v3, s0, v3
 ; GFX6-NEXT:    v_xor_b32_e32 v4, s1, v2
 ; GFX6-NEXT:    v_mov_b32_e32 v5, s1
 ; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, s0, v3
 ; GFX6-NEXT:    v_subb_u32_e32 v3, vcc, v4, v5, vcc
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: sdiv_v2i64_pow2_shl_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
+; GFX9-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
 ; GFX9-NEXT:    s_mov_b64 s[2:3], 0x1000
 ; GFX9-NEXT:    s_mov_b32 s16, 0x4f800000
 ; GFX9-NEXT:    s_mov_b32 s17, 0x5f7ffffc
 ; GFX9-NEXT:    s_mov_b32 s18, 0x2f800000
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_lshl_b64 s[8:9], s[2:3], s6
-; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], s4
+; GFX9-NEXT:    s_lshl_b64 s[10:11], s[2:3], s10
+; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
 ; GFX9-NEXT:    s_ashr_i32 s12, s3, 31
 ; GFX9-NEXT:    s_add_u32 s2, s2, s12
 ; GFX9-NEXT:    s_mov_b32 s13, s12
 ; GFX9-NEXT:    s_addc_u32 s3, s3, s12
-; GFX9-NEXT:    s_xor_b64 s[10:11], s[2:3], s[12:13]
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s10
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s11
+; GFX9-NEXT:    s_xor_b64 s[8:9], s[2:3], s[12:13]
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s8
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s9
 ; GFX9-NEXT:    s_mov_b32 s19, 0xcf800000
-; GFX9-NEXT:    s_sub_u32 s2, 0, s10
-; GFX9-NEXT:    s_subb_u32 s3, 0, s11
+; GFX9-NEXT:    s_sub_u32 s2, 0, s8
+; GFX9-NEXT:    s_subb_u32 s3, 0, s9
 ; GFX9-NEXT:    v_mac_f32_e32 v0, s16, v1
 ; GFX9-NEXT:    v_rcp_f32_e32 v0, v0
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_ashr_i32 s14, s5, 31
+; GFX9-NEXT:    s_mov_b32 s15, s14
 ; GFX9-NEXT:    v_mul_f32_e32 v0, s17, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, s18, v0
 ; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
 ; GFX9-NEXT:    v_mac_f32_e32 v0, s19, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_ashr_i32 s14, s5, 31
-; GFX9-NEXT:    s_mov_b32 s15, s14
 ; GFX9-NEXT:    v_mul_lo_u32 v0, s2, v1
 ; GFX9-NEXT:    v_mul_hi_u32 v3, s2, v2
 ; GFX9-NEXT:    v_mul_lo_u32 v5, s3, v2
@@ -9449,44 +9431,44 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v6, v0, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v1, vcc, v2, v1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, 0, v3, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v3, s10, v2
-; GFX9-NEXT:    v_mul_hi_u32 v4, s10, v1
-; GFX9-NEXT:    v_mul_lo_u32 v5, s11, v1
-; GFX9-NEXT:    v_mov_b32_e32 v6, s11
+; GFX9-NEXT:    v_mul_lo_u32 v3, s8, v2
+; GFX9-NEXT:    v_mul_hi_u32 v4, s8, v1
+; GFX9-NEXT:    v_mul_lo_u32 v5, s9, v1
+; GFX9-NEXT:    v_mov_b32_e32 v6, s9
 ; GFX9-NEXT:    v_add_u32_e32 v3, v4, v3
-; GFX9-NEXT:    v_mul_lo_u32 v4, s10, v1
+; GFX9-NEXT:    v_mul_lo_u32 v4, s8, v1
 ; GFX9-NEXT:    v_add_u32_e32 v3, v3, v5
 ; GFX9-NEXT:    v_sub_u32_e32 v5, s5, v3
 ; GFX9-NEXT:    v_sub_co_u32_e32 v4, vcc, s4, v4
 ; GFX9-NEXT:    v_subb_co_u32_e64 v5, s[0:1], v5, v6, vcc
-; GFX9-NEXT:    v_subrev_co_u32_e64 v6, s[0:1], s10, v4
+; GFX9-NEXT:    v_subrev_co_u32_e64 v6, s[0:1], s8, v4
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v5, s[0:1], 0, v5, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v5
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v5
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v6
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v6
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v5
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v5
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, v7, v6, s[0:1]
 ; GFX9-NEXT:    v_add_co_u32_e64 v6, s[0:1], 2, v1
 ; GFX9-NEXT:    v_addc_co_u32_e64 v7, s[0:1], 0, v2, s[0:1]
 ; GFX9-NEXT:    v_add_co_u32_e64 v8, s[0:1], 1, v1
 ; GFX9-NEXT:    v_addc_co_u32_e64 v9, s[0:1], 0, v2, s[0:1]
-; GFX9-NEXT:    s_ashr_i32 s4, s9, 31
+; GFX9-NEXT:    s_ashr_i32 s4, s11, 31
 ; GFX9-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v5
-; GFX9-NEXT:    s_add_u32 s8, s8, s4
+; GFX9-NEXT:    s_add_u32 s10, s10, s4
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, v9, v7, s[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v7, s5
 ; GFX9-NEXT:    s_mov_b32 s5, s4
-; GFX9-NEXT:    s_addc_u32 s9, s9, s4
-; GFX9-NEXT:    s_xor_b64 s[8:9], s[8:9], s[4:5]
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v9, s8
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v10, s9
+; GFX9-NEXT:    s_addc_u32 s11, s11, s4
+; GFX9-NEXT:    s_xor_b64 s[10:11], s[10:11], s[4:5]
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v9, s10
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v10, s11
 ; GFX9-NEXT:    v_subb_co_u32_e32 v3, vcc, v7, v3, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s11, v3
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s9, v3
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s10, v4
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s8, v4
 ; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
-; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, s11, v3
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, s9, v3
 ; GFX9-NEXT:    v_mac_f32_e32 v9, s16, v10
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v7, v4, vcc
 ; GFX9-NEXT:    v_rcp_f32_e32 v4, v9
@@ -9499,8 +9481,8 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX9-NEXT:    v_mac_f32_e32 v4, s19, v5
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GFX9-NEXT:    s_sub_u32 s0, 0, s8
-; GFX9-NEXT:    s_subb_u32 s1, 0, s9
+; GFX9-NEXT:    s_sub_u32 s0, 0, s10
+; GFX9-NEXT:    s_subb_u32 s1, 0, s11
 ; GFX9-NEXT:    v_mul_hi_u32 v6, s0, v4
 ; GFX9-NEXT:    v_mul_lo_u32 v7, s0, v5
 ; GFX9-NEXT:    v_mul_lo_u32 v8, s1, v4
@@ -9517,8 +9499,8 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX9-NEXT:    v_addc_co_u32_e32 v8, vcc, 0, v9, vcc
 ; GFX9-NEXT:    v_mul_lo_u32 v9, v5, v3
 ; GFX9-NEXT:    v_mul_hi_u32 v3, v5, v3
-; GFX9-NEXT:    s_ashr_i32 s10, s7, 31
-; GFX9-NEXT:    s_mov_b32 s11, s10
+; GFX9-NEXT:    s_ashr_i32 s8, s7, 31
+; GFX9-NEXT:    s_mov_b32 s9, s8
 ; GFX9-NEXT:    v_add_co_u32_e32 v7, vcc, v7, v9
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v8, v3, vcc
 ; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, v10, v0, vcc
@@ -9530,7 +9512,7 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX9-NEXT:    v_mul_hi_u32 v6, s0, v3
 ; GFX9-NEXT:    v_mul_lo_u32 v7, s1, v3
 ; GFX9-NEXT:    v_mul_lo_u32 v8, s0, v3
-; GFX9-NEXT:    s_add_u32 s0, s6, s10
+; GFX9-NEXT:    s_add_u32 s0, s6, s8
 ; GFX9-NEXT:    v_add_u32_e32 v5, v6, v5
 ; GFX9-NEXT:    v_add_u32_e32 v5, v5, v7
 ; GFX9-NEXT:    v_mul_lo_u32 v9, v3, v5
@@ -9548,9 +9530,9 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX9-NEXT:    v_add_co_u32_e32 v5, vcc, v7, v5
 ; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v5
-; GFX9-NEXT:    s_addc_u32 s1, s7, s10
+; GFX9-NEXT:    s_addc_u32 s1, s7, s8
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v4, v6, vcc
-; GFX9-NEXT:    s_xor_b64 s[6:7], s[0:1], s[10:11]
+; GFX9-NEXT:    s_xor_b64 s[6:7], s[0:1], s[8:9]
 ; GFX9-NEXT:    v_mul_lo_u32 v5, s6, v4
 ; GFX9-NEXT:    v_mul_hi_u32 v6, s6, v3
 ; GFX9-NEXT:    v_mul_hi_u32 v8, s6, v4
@@ -9567,26 +9549,26 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v9, v0, vcc
 ; GFX9-NEXT:    v_add_co_u32_e32 v3, vcc, v3, v4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v5, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v5, s8, v4
-; GFX9-NEXT:    v_mul_hi_u32 v6, s8, v3
-; GFX9-NEXT:    v_mul_lo_u32 v8, s9, v3
+; GFX9-NEXT:    v_mul_lo_u32 v5, s10, v4
+; GFX9-NEXT:    v_mul_hi_u32 v6, s10, v3
+; GFX9-NEXT:    v_mul_lo_u32 v8, s11, v3
 ; GFX9-NEXT:    v_mov_b32_e32 v7, s13
 ; GFX9-NEXT:    v_subrev_co_u32_e32 v1, vcc, s12, v1
 ; GFX9-NEXT:    v_add_u32_e32 v5, v6, v5
-; GFX9-NEXT:    v_mul_lo_u32 v6, s8, v3
+; GFX9-NEXT:    v_mul_lo_u32 v6, s10, v3
 ; GFX9-NEXT:    v_subb_co_u32_e32 v2, vcc, v2, v7, vcc
 ; GFX9-NEXT:    v_add_u32_e32 v5, v5, v8
 ; GFX9-NEXT:    v_sub_u32_e32 v7, s7, v5
-; GFX9-NEXT:    v_mov_b32_e32 v8, s9
+; GFX9-NEXT:    v_mov_b32_e32 v8, s11
 ; GFX9-NEXT:    v_sub_co_u32_e32 v6, vcc, s6, v6
 ; GFX9-NEXT:    v_subb_co_u32_e64 v7, s[0:1], v7, v8, vcc
-; GFX9-NEXT:    v_subrev_co_u32_e64 v8, s[0:1], s8, v6
+; GFX9-NEXT:    v_subrev_co_u32_e64 v8, s[0:1], s10, v6
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v7, s[0:1], 0, v7, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s9, v7
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s11, v7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s8, v8
+; GFX9-NEXT:    v_cmp_le_u32_e64 s[0:1], s10, v8
 ; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[0:1]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s9, v7
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[0:1], s11, v7
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, v9, v8, s[0:1]
 ; GFX9-NEXT:    v_add_co_u32_e64 v8, s[0:1], 2, v3
 ; GFX9-NEXT:    v_addc_co_u32_e64 v9, s[0:1], 0, v4, s[0:1]
@@ -9596,16 +9578,16 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX9-NEXT:    v_cndmask_b32_e64 v7, v11, v9, s[0:1]
 ; GFX9-NEXT:    v_mov_b32_e32 v9, s7
 ; GFX9-NEXT:    v_subb_co_u32_e32 v5, vcc, v9, v5, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s9, v5
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s11, v5
 ; GFX9-NEXT:    v_cndmask_b32_e64 v9, 0, -1, vcc
-; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s8, v6
+; GFX9-NEXT:    v_cmp_le_u32_e32 vcc, s10, v6
 ; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, s9, v5
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, s11, v5
 ; GFX9-NEXT:    v_cndmask_b32_e32 v5, v9, v6, vcc
 ; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v5
 ; GFX9-NEXT:    v_cndmask_b32_e64 v5, v10, v8, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
-; GFX9-NEXT:    s_xor_b64 s[0:1], s[10:11], s[4:5]
+; GFX9-NEXT:    s_xor_b64 s[0:1], s[8:9], s[4:5]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v7, vcc
 ; GFX9-NEXT:    v_xor_b32_e32 v3, s0, v3
 ; GFX9-NEXT:    v_xor_b32_e32 v4, s1, v4
@@ -10272,19 +10254,19 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ;
 ; GFX6-LABEL: srem_v2i64_pow2_shl_denom:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x11
+; GFX6-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
 ; GFX6-NEXT:    s_mov_b64 s[2:3], 0x1000
 ; GFX6-NEXT:    s_mov_b32 s18, 0x4f800000
 ; GFX6-NEXT:    s_mov_b32 s19, 0x5f7ffffc
 ; GFX6-NEXT:    s_mov_b32 s20, 0x2f800000
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshl_b64 s[14:15], s[2:3], s6
-; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], s4
-; GFX6-NEXT:    s_ashr_i32 s4, s3, 31
-; GFX6-NEXT:    s_add_u32 s2, s2, s4
-; GFX6-NEXT:    s_mov_b32 s5, s4
-; GFX6-NEXT:    s_addc_u32 s3, s3, s4
-; GFX6-NEXT:    s_xor_b64 s[16:17], s[2:3], s[4:5]
+; GFX6-NEXT:    s_lshl_b64 s[14:15], s[2:3], s10
+; GFX6-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
+; GFX6-NEXT:    s_ashr_i32 s8, s3, 31
+; GFX6-NEXT:    s_add_u32 s2, s2, s8
+; GFX6-NEXT:    s_mov_b32 s9, s8
+; GFX6-NEXT:    s_addc_u32 s3, s3, s8
+; GFX6-NEXT:    s_xor_b64 s[16:17], s[2:3], s[8:9]
 ; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s16
 ; GFX6-NEXT:    v_cvt_f32_u32_e32 v1, s17
 ; GFX6-NEXT:    s_mov_b32 s21, 0xcf800000
@@ -10292,24 +10274,22 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    s_subb_u32 s3, 0, s17
 ; GFX6-NEXT:    v_mac_f32_e32 v0, s18, v1
 ; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0xd
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    s_ashr_i32 s12, s5, 31
+; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
+; GFX6-NEXT:    s_add_u32 s0, s4, s12
 ; GFX6-NEXT:    v_mul_f32_e32 v0, s19, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, s20, v0
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
 ; GFX6-NEXT:    v_mac_f32_e32 v0, s21, v1
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v2, v0
-; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_ashr_i32 s12, s9, 31
-; GFX6-NEXT:    s_add_u32 s0, s8, s12
+; GFX6-NEXT:    s_mov_b32 s13, s12
+; GFX6-NEXT:    s_addc_u32 s1, s5, s12
 ; GFX6-NEXT:    v_mul_lo_u32 v0, s2, v1
 ; GFX6-NEXT:    v_mul_hi_u32 v3, s2, v2
 ; GFX6-NEXT:    v_mul_lo_u32 v4, s3, v2
 ; GFX6-NEXT:    v_mul_lo_u32 v5, s2, v2
-; GFX6-NEXT:    s_mov_b32 s13, s12
+; GFX6-NEXT:    s_xor_b64 s[4:5], s[0:1], s[12:13]
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v3, v0
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v0, v4
 ; GFX6-NEXT:    v_mul_lo_u32 v0, v2, v3
@@ -10321,8 +10301,8 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v6, vcc
 ; GFX6-NEXT:    v_mul_lo_u32 v6, v1, v5
 ; GFX6-NEXT:    v_mul_hi_u32 v5, v1, v5
-; GFX6-NEXT:    s_addc_u32 s1, s9, s12
-; GFX6-NEXT:    s_xor_b64 s[8:9], s[0:1], s[12:13]
+; GFX6-NEXT:    s_mov_b32 s11, 0xf000
+; GFX6-NEXT:    s_mov_b32 s10, -1
 ; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v6
 ; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, v4, v5, vcc
 ; GFX6-NEXT:    v_mov_b32_e32 v0, 0
@@ -10353,15 +10333,15 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; GFX6-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v3, s8, v1
-; GFX6-NEXT:    v_mul_hi_u32 v4, s8, v2
-; GFX6-NEXT:    v_mul_hi_u32 v5, s8, v1
-; GFX6-NEXT:    v_mul_hi_u32 v6, s9, v1
-; GFX6-NEXT:    v_mul_lo_u32 v1, s9, v1
+; GFX6-NEXT:    v_mul_lo_u32 v3, s4, v1
+; GFX6-NEXT:    v_mul_hi_u32 v4, s4, v2
+; GFX6-NEXT:    v_mul_hi_u32 v5, s4, v1
+; GFX6-NEXT:    v_mul_hi_u32 v6, s5, v1
+; GFX6-NEXT:    v_mul_lo_u32 v1, s5, v1
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v4, v3
 ; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, 0, v5, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v5, s9, v2
-; GFX6-NEXT:    v_mul_hi_u32 v2, s9, v2
+; GFX6-NEXT:    v_mul_lo_u32 v5, s5, v2
+; GFX6-NEXT:    v_mul_hi_u32 v2, s5, v2
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
 ; GFX6-NEXT:    v_addc_u32_e32 v2, vcc, v4, v2, vcc
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v6, v0, vcc
@@ -10373,9 +10353,9 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_mul_lo_u32 v1, s16, v1
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v3, v2
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s9, v2
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s5, v2
 ; GFX6-NEXT:    v_mov_b32_e32 v4, s17
-; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s8, v1
+; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s4, v1
 ; GFX6-NEXT:    v_subb_u32_e64 v3, s[0:1], v3, v4, vcc
 ; GFX6-NEXT:    v_subrev_i32_e64 v5, s[0:1], s16, v1
 ; GFX6-NEXT:    v_subbrev_u32_e64 v6, s[2:3], 0, v3, s[0:1]
@@ -10390,14 +10370,14 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_subbrev_u32_e64 v3, s[0:1], 0, v3, s[0:1]
 ; GFX6-NEXT:    s_ashr_i32 s2, s15, 31
 ; GFX6-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v7
-; GFX6-NEXT:    s_add_u32 s8, s14, s2
+; GFX6-NEXT:    s_add_u32 s4, s14, s2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v3, v6, v3, s[0:1]
-; GFX6-NEXT:    v_mov_b32_e32 v6, s9
+; GFX6-NEXT:    v_mov_b32_e32 v6, s5
 ; GFX6-NEXT:    s_mov_b32 s3, s2
-; GFX6-NEXT:    s_addc_u32 s9, s15, s2
-; GFX6-NEXT:    s_xor_b64 s[8:9], s[8:9], s[2:3]
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v7, s8
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v8, s9
+; GFX6-NEXT:    s_addc_u32 s5, s15, s2
+; GFX6-NEXT:    s_xor_b64 s[4:5], s[4:5], s[2:3]
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v7, s4
+; GFX6-NEXT:    v_cvt_f32_u32_e32 v8, s5
 ; GFX6-NEXT:    v_subb_u32_e32 v2, vcc, v6, v2, vcc
 ; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s17, v2
 ; GFX6-NEXT:    v_mac_f32_e32 v7, s18, v8
@@ -10416,13 +10396,13 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_mac_f32_e32 v4, s21, v5
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v5, v5
-; GFX6-NEXT:    s_sub_u32 s0, 0, s8
+; GFX6-NEXT:    s_sub_u32 s0, 0, s4
 ; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
 ; GFX6-NEXT:    v_mul_hi_u32 v3, s0, v4
 ; GFX6-NEXT:    v_mul_lo_u32 v6, s0, v5
-; GFX6-NEXT:    s_subb_u32 s1, 0, s9
+; GFX6-NEXT:    s_subb_u32 s1, 0, s5
 ; GFX6-NEXT:    v_mul_lo_u32 v7, s1, v4
-; GFX6-NEXT:    s_ashr_i32 s14, s11, 31
+; GFX6-NEXT:    s_ashr_i32 s14, s7, 31
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v6
 ; GFX6-NEXT:    v_mul_lo_u32 v6, s0, v4
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
@@ -10465,58 +10445,58 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_addc_u32_e32 v7, vcc, v7, v0, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
 ; GFX6-NEXT:    v_addc_u32_e32 v6, vcc, 0, v7, vcc
-; GFX6-NEXT:    s_add_u32 s0, s10, s14
+; GFX6-NEXT:    s_add_u32 s0, s6, s14
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v5
-; GFX6-NEXT:    s_addc_u32 s1, s11, s14
+; GFX6-NEXT:    s_addc_u32 s1, s7, s14
 ; GFX6-NEXT:    v_addc_u32_e32 v4, vcc, v4, v6, vcc
-; GFX6-NEXT:    s_xor_b64 s[10:11], s[0:1], s[14:15]
-; GFX6-NEXT:    v_mul_lo_u32 v5, s10, v4
-; GFX6-NEXT:    v_mul_hi_u32 v6, s10, v3
-; GFX6-NEXT:    v_mul_hi_u32 v8, s10, v4
-; GFX6-NEXT:    v_mul_hi_u32 v9, s11, v4
-; GFX6-NEXT:    v_mul_lo_u32 v4, s11, v4
+; GFX6-NEXT:    s_xor_b64 s[6:7], s[0:1], s[14:15]
+; GFX6-NEXT:    v_mul_lo_u32 v5, s6, v4
+; GFX6-NEXT:    v_mul_hi_u32 v6, s6, v3
+; GFX6-NEXT:    v_mul_hi_u32 v8, s6, v4
+; GFX6-NEXT:    v_mul_hi_u32 v9, s7, v4
+; GFX6-NEXT:    v_mul_lo_u32 v4, s7, v4
 ; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v6, v5
 ; GFX6-NEXT:    v_addc_u32_e32 v6, vcc, 0, v8, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v8, s11, v3
-; GFX6-NEXT:    v_mul_hi_u32 v3, s11, v3
+; GFX6-NEXT:    v_mul_lo_u32 v8, s7, v3
+; GFX6-NEXT:    v_mul_hi_u32 v3, s7, v3
 ; GFX6-NEXT:    v_mov_b32_e32 v7, s12
 ; GFX6-NEXT:    v_add_i32_e32 v5, vcc, v5, v8
 ; GFX6-NEXT:    v_addc_u32_e32 v3, vcc, v6, v3, vcc
 ; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, v9, v0, vcc
 ; GFX6-NEXT:    v_add_i32_e32 v3, vcc, v3, v4
 ; GFX6-NEXT:    v_addc_u32_e32 v0, vcc, 0, v0, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v4, s8, v0
-; GFX6-NEXT:    v_mul_hi_u32 v5, s8, v3
+; GFX6-NEXT:    v_mul_lo_u32 v4, s4, v0
+; GFX6-NEXT:    v_mul_hi_u32 v5, s4, v3
 ; GFX6-NEXT:    v_subrev_i32_e32 v0, vcc, s12, v1
 ; GFX6-NEXT:    v_subb_u32_e32 v1, vcc, v2, v7, vcc
-; GFX6-NEXT:    v_mul_lo_u32 v2, s9, v3
-; GFX6-NEXT:    v_mul_lo_u32 v3, s8, v3
+; GFX6-NEXT:    v_mul_lo_u32 v2, s5, v3
+; GFX6-NEXT:    v_mul_lo_u32 v3, s4, v3
 ; GFX6-NEXT:    v_add_i32_e32 v4, vcc, v5, v4
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v4, v2
-; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s11, v2
-; GFX6-NEXT:    v_mov_b32_e32 v5, s9
-; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s10, v3
+; GFX6-NEXT:    v_sub_i32_e32 v4, vcc, s7, v2
+; GFX6-NEXT:    v_mov_b32_e32 v5, s5
+; GFX6-NEXT:    v_sub_i32_e32 v3, vcc, s6, v3
 ; GFX6-NEXT:    v_subb_u32_e64 v4, s[0:1], v4, v5, vcc
-; GFX6-NEXT:    v_subrev_i32_e64 v6, s[0:1], s8, v3
+; GFX6-NEXT:    v_subrev_i32_e64 v6, s[0:1], s4, v3
 ; GFX6-NEXT:    v_subbrev_u32_e64 v7, s[2:3], 0, v4, s[0:1]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[2:3], s9, v7
+; GFX6-NEXT:    v_cmp_le_u32_e64 s[2:3], s5, v7
 ; GFX6-NEXT:    v_cndmask_b32_e64 v8, 0, -1, s[2:3]
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[2:3], s8, v6
+; GFX6-NEXT:    v_cmp_le_u32_e64 s[2:3], s4, v6
 ; GFX6-NEXT:    v_subb_u32_e64 v4, s[0:1], v4, v5, s[0:1]
 ; GFX6-NEXT:    v_cndmask_b32_e64 v9, 0, -1, s[2:3]
-; GFX6-NEXT:    v_cmp_eq_u32_e64 s[2:3], s9, v7
-; GFX6-NEXT:    v_subrev_i32_e64 v5, s[0:1], s8, v6
+; GFX6-NEXT:    v_cmp_eq_u32_e64 s[2:3], s5, v7
+; GFX6-NEXT:    v_subrev_i32_e64 v5, s[0:1], s4, v6
 ; GFX6-NEXT:    v_cndmask_b32_e64 v8, v8, v9, s[2:3]
 ; GFX6-NEXT:    v_subbrev_u32_e64 v4, s[0:1], 0, v4, s[0:1]
 ; GFX6-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v8
 ; GFX6-NEXT:    v_cndmask_b32_e64 v4, v7, v4, s[0:1]
-; GFX6-NEXT:    v_mov_b32_e32 v7, s11
+; GFX6-NEXT:    v_mov_b32_e32 v7, s7
 ; GFX6-NEXT:    v_subb_u32_e32 v2, vcc, v7, v2, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s9, v2
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s5, v2
 ; GFX6-NEXT:    v_cndmask_b32_e64 v7, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s8, v3
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, s4, v3
 ; GFX6-NEXT:    v_cndmask_b32_e64 v8, 0, -1, vcc
-; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s9, v2
+; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, s5, v2
 ; GFX6-NEXT:    v_cndmask_b32_e32 v7, v7, v8, vcc
 ; GFX6-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v7
 ; GFX6-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
@@ -10527,24 +10507,25 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX6-NEXT:    v_mov_b32_e32 v5, s14
 ; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, s14, v3
 ; GFX6-NEXT:    v_subb_u32_e32 v3, vcc, v4, v5, vcc
-; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
 ; GFX6-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: srem_v2i64_pow2_shl_denom:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
+; GFX9-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
 ; GFX9-NEXT:    s_mov_b64 s[2:3], 0x1000
 ; GFX9-NEXT:    s_mov_b32 s16, 0x4f800000
 ; GFX9-NEXT:    s_mov_b32 s17, 0x5f7ffffc
 ; GFX9-NEXT:    s_mov_b32 s18, 0x2f800000
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_lshl_b64 s[10:11], s[2:3], s6
-; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], s4
-; GFX9-NEXT:    s_ashr_i32 s4, s3, 31
-; GFX9-NEXT:    s_add_u32 s2, s2, s4
-; GFX9-NEXT:    s_mov_b32 s5, s4
-; GFX9-NEXT:    s_addc_u32 s3, s3, s4
-; GFX9-NEXT:    s_xor_b64 s[12:13], s[2:3], s[4:5]
+; GFX9-NEXT:    s_lshl_b64 s[10:11], s[2:3], s10
+; GFX9-NEXT:    s_lshl_b64 s[2:3], s[2:3], s8
+; GFX9-NEXT:    s_ashr_i32 s8, s3, 31
+; GFX9-NEXT:    s_add_u32 s2, s2, s8
+; GFX9-NEXT:    s_mov_b32 s9, s8
+; GFX9-NEXT:    s_addc_u32 s3, s3, s8
+; GFX9-NEXT:    s_xor_b64 s[12:13], s[2:3], s[8:9]
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s12
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s13
 ; GFX9-NEXT:    s_mov_b32 s19, 0xcf800000
@@ -10552,16 +10533,14 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(<2 x i64> addrspace(1)* %ou
 ; GFX9-NEXT:    s_subb_u32 s3, 0, s13
 ; GFX9-NEXT:    v_mac_f32_e32 v0, s16, v1
 ; GFX9-NEXT:    v_rcp_f32_e32 v0, v0
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_ashr_i32 s8, s5, 31
+; GFX9-NEXT:    s_mov_b32 s9, s8
 ; GFX9-NEXT:    v_mul_f32_e32 v0, s17, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, s18, v0
 ; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
 ; GFX9-NEXT:    v_mac_f32_e32 v0, s19, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v0
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_ashr_i32 s8, s5, 31
-; GFX9-NEXT:    s_mov_b32 s9, s8
 ; GFX9-NEXT:    v_mul_lo_u32 v0, s2, v1
 ; GFX9-NEXT:    v_mul_hi_u32 v3, s2, v2
 ; GFX9-NEXT:    v_mul_lo_u32 v5, s3, v2
diff --git a/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll b/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
index baff889c521e8..654aac83b9ccc 100644
--- a/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/ashr.v2i16.ll
@@ -29,16 +29,15 @@ define amdgpu_kernel void @s_ashr_v2i16(<2 x i16> addrspace(1)* %out, i32, <2 x
 }
 
 ; GCN-LABEL: {{^}}v_ashr_v2i16:
-; GCN: {{buffer|flat|global}}_load_dword [[LHS:v[0-9]+]]
-; GCN: {{buffer|flat|global}}_load_dword [[RHS:v[0-9]+]]
-; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], [[RHS]], [[LHS]]
+; GCN: {{buffer|flat|global}}_load_dwordx2 v[[[LHS:[0-9]+]]:[[RHS:[0-9]+]]]
+; GFX9: v_pk_ashrrev_i16 [[RESULT:v[0-9]+]], v[[RHS]], v[[LHS]]
 
 ; VI: v_ashrrev_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; VI: v_ashrrev_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 
 ; CI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 16
-; CI: v_ashrrev_i32_e32 v{{[0-9]+}}, 16, [[LHS]]
+; CI: v_ashrrev_i32_e32 v{{[0-9]+}}, 16, v[[LHS]]
 ; CI: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, v{{[0-9]+}}
 ; CI: v_ashrrev_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; CI: v_ashrrev_i32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
@@ -117,8 +116,7 @@ define amdgpu_kernel void @ashr_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
 }
 
 ; GCN-LABEL: {{^}}v_ashr_v4i16:
-; GCN: {{buffer|flat|global}}_load_dwordx2
-; GCN: {{buffer|flat|global}}_load_dwordx2
+; GCN: {{buffer|flat|global}}_load_dwordx4
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 ; GFX9: v_pk_ashrrev_i16 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}
 
diff --git a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
index 6aa3d0bc91cbe..b3ae7e0530bb3 100644
--- a/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmax_legacy.ll
@@ -157,8 +157,8 @@ define amdgpu_kernel void @test_fmax_legacy_ogt_v1f32(<1 x float> addrspace(1)*
   %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
 
-  %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
-  %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
+  %a = load volatile <1 x float>, <1 x float> addrspace(1)* %gep.0
+  %b = load volatile <1 x float>, <1 x float> addrspace(1)* %gep.1
 
   %cmp = fcmp ogt <1 x float> %a, %b
   %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
diff --git a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
index 051cb5b40269d..9435b8278f300 100644
--- a/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmin_legacy.ll
@@ -187,8 +187,8 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v1f32(<1 x float> addrspace(1)*
   %gep.0 = getelementptr <1 x float>, <1 x float> addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr <1 x float>, <1 x float> addrspace(1)* %gep.0, i32 1
 
-  %a = load <1 x float>, <1 x float> addrspace(1)* %gep.0
-  %b = load <1 x float>, <1 x float> addrspace(1)* %gep.1
+  %a = load volatile <1 x float>, <1 x float> addrspace(1)* %gep.0
+  %b = load volatile <1 x float>, <1 x float> addrspace(1)* %gep.1
 
   %cmp = fcmp ult <1 x float> %a, %b
   %val = select <1 x i1> %cmp, <1 x float> %a, <1 x float> %b
@@ -214,8 +214,8 @@ define amdgpu_kernel void @test_fmin_legacy_ult_v2f32(<2 x float> addrspace(1)*
   %gep.0 = getelementptr <2 x float>, <2 x float> addrspace(1)* %in, i32 %tid
   %gep.1 = getelementptr <2 x float>, <2 x float> addrspace(1)* %gep.0, i32 1
 
-  %a = load <2 x float>, <2 x float> addrspace(1)* %gep.0
-  %b = load <2 x float>, <2 x float> addrspace(1)* %gep.1
+  %a = load volatile <2 x float>, <2 x float> addrspace(1)* %gep.0
+  %b = load volatile <2 x float>, <2 x float> addrspace(1)* %gep.1
 
   %cmp = fcmp ult <2 x float> %a, %b
   %val = select <2 x i1> %cmp, <2 x float> %a, <2 x float> %b
diff --git a/llvm/test/CodeGen/AMDGPU/fshl.ll b/llvm/test/CodeGen/AMDGPU/fshl.ll
index f3d220b684be5..edba47666197f 100644
--- a/llvm/test/CodeGen/AMDGPU/fshl.ll
+++ b/llvm/test/CodeGen/AMDGPU/fshl.ll
@@ -161,47 +161,45 @@ entry:
 define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
 ; SI-LABEL: fshl_v2i32:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
+; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
+; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xf
-; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_mov_b32 s11, 0xf000
+; SI-NEXT:    s_mov_b32 s10, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v0, s3
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    v_alignbit_b32 v0, s9, v0, 1
+; SI-NEXT:    v_mov_b32_e32 v0, s7
+; SI-NEXT:    v_alignbit_b32 v0, s5, v0, 1
 ; SI-NEXT:    s_not_b32 s1, s1
-; SI-NEXT:    s_lshr_b32 s3, s9, 1
+; SI-NEXT:    s_lshr_b32 s2, s5, 1
 ; SI-NEXT:    v_mov_b32_e32 v1, s1
-; SI-NEXT:    v_alignbit_b32 v1, s3, v0, v1
-; SI-NEXT:    v_mov_b32_e32 v0, s2
+; SI-NEXT:    v_alignbit_b32 v1, s2, v0, v1
+; SI-NEXT:    v_mov_b32_e32 v0, s6
 ; SI-NEXT:    s_not_b32 s0, s0
-; SI-NEXT:    v_alignbit_b32 v0, s8, v0, 1
-; SI-NEXT:    s_lshr_b32 s1, s8, 1
+; SI-NEXT:    v_alignbit_b32 v0, s4, v0, 1
+; SI-NEXT:    s_lshr_b32 s1, s4, 1
 ; SI-NEXT:    v_mov_b32_e32 v2, s0
 ; SI-NEXT:    v_alignbit_b32 v0, s1, v0, v2
-; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[8:11], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: fshl_v2i32:
 ; VI:       ; %bb.0: ; %entry
-; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x3c
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x3c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v0, s3
-; VI-NEXT:    s_lshr_b32 s3, s5, 1
+; VI-NEXT:    v_mov_b32_e32 v0, s7
+; VI-NEXT:    s_not_b32 s3, s3
+; VI-NEXT:    s_lshr_b32 s7, s5, 1
 ; VI-NEXT:    v_alignbit_b32 v0, s5, v0, 1
-; VI-NEXT:    s_not_b32 s5, s7
-; VI-NEXT:    v_mov_b32_e32 v1, s5
-; VI-NEXT:    v_alignbit_b32 v1, s3, v0, v1
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    s_not_b32 s3, s6
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_alignbit_b32 v1, s7, v0, v1
+; VI-NEXT:    v_mov_b32_e32 v0, s6
+; VI-NEXT:    s_not_b32 s2, s2
 ; VI-NEXT:    v_alignbit_b32 v0, s4, v0, 1
-; VI-NEXT:    s_lshr_b32 s2, s4, 1
-; VI-NEXT:    v_mov_b32_e32 v2, s3
-; VI-NEXT:    v_alignbit_b32 v0, s2, v0, v2
+; VI-NEXT:    s_lshr_b32 s3, s4, 1
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_alignbit_b32 v0, s3, v0, v2
 ; VI-NEXT:    v_mov_b32_e32 v3, s1
 ; VI-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
@@ -209,25 +207,24 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
 ;
 ; GFX9-LABEL: fshl_v2i32:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x3c
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-NEXT:    v_alignbit_b32 v0, s5, v0, 1
+; GFX9-NEXT:    v_mov_b32_e32 v0, s7
 ; GFX9-NEXT:    s_lshr_b32 s0, s5, 1
 ; GFX9-NEXT:    s_not_b32 s1, s9
+; GFX9-NEXT:    v_alignbit_b32 v0, s5, v0, 1
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_alignbit_b32 v1, s0, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-NEXT:    v_mov_b32_e32 v0, s6
 ; GFX9-NEXT:    s_not_b32 s1, s8
 ; GFX9-NEXT:    v_alignbit_b32 v0, s4, v0, 1
 ; GFX9-NEXT:    s_lshr_b32 s0, s4, 1
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v3
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; R600-LABEL: fshl_v2i32:
@@ -250,21 +247,20 @@ define amdgpu_kernel void @fshl_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
 ;
 ; GFX10-LABEL: fshl_v2i32:
 ; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_clause 0x3
-; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
-; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x3c
+; GFX10-NEXT:    s_clause 0x2
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x3c
 ; GFX10-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_lshr_b32 s0, s3, 1
-; GFX10-NEXT:    v_alignbit_b32 v0, s3, s5, 1
-; GFX10-NEXT:    v_alignbit_b32 v3, s2, s4, 1
-; GFX10-NEXT:    s_not_b32 s1, s7
-; GFX10-NEXT:    s_lshr_b32 s2, s2, 1
-; GFX10-NEXT:    s_not_b32 s3, s6
+; GFX10-NEXT:    v_alignbit_b32 v0, s5, s7, 1
+; GFX10-NEXT:    v_alignbit_b32 v3, s4, s6, 1
+; GFX10-NEXT:    s_lshr_b32 s0, s5, 1
+; GFX10-NEXT:    s_not_b32 s1, s3
+; GFX10-NEXT:    s_lshr_b32 s3, s4, 1
+; GFX10-NEXT:    s_not_b32 s2, s2
 ; GFX10-NEXT:    v_alignbit_b32 v1, s0, v0, s1
-; GFX10-NEXT:    v_alignbit_b32 v0, s2, v3, s3
+; GFX10-NEXT:    v_alignbit_b32 v0, s3, v3, s2
 ; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX10-NEXT:    s_endpgm
 entry:
@@ -276,27 +272,25 @@ entry:
 define amdgpu_kernel void @fshl_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
 ; SI-LABEL: fshl_v2i32_imm:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
-; SI-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0xb
+; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v0, s5
-; SI-NEXT:    v_alignbit_b32 v1, s7, v0, 23
-; SI-NEXT:    v_mov_b32_e32 v0, s4
-; SI-NEXT:    v_alignbit_b32 v0, s6, v0, 25
+; SI-NEXT:    v_mov_b32_e32 v0, s7
+; SI-NEXT:    v_mov_b32_e32 v2, s6
+; SI-NEXT:    v_alignbit_b32 v1, s5, v0, 23
+; SI-NEXT:    v_alignbit_b32 v0, s4, v2, 25
 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: fshl_v2i32_imm:
 ; VI:       ; %bb.0: ; %entry
-; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v0, s3
-; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v0, s7
+; VI-NEXT:    v_mov_b32_e32 v2, s6
 ; VI-NEXT:    v_alignbit_b32 v1, s5, v0, 23
 ; VI-NEXT:    v_alignbit_b32 v0, s4, v2, 25
 ; VI-NEXT:    v_mov_b32_e32 v3, s1
@@ -306,16 +300,15 @@ define amdgpu_kernel void @fshl_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32>
 ;
 ; GFX9-LABEL: fshl_v2i32_imm:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NEXT:    v_mov_b32_e32 v0, s7
+; GFX9-NEXT:    v_mov_b32_e32 v3, s6
 ; GFX9-NEXT:    v_alignbit_b32 v1, s5, v0, 23
 ; GFX9-NEXT:    v_alignbit_b32 v0, s4, v3, 25
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; R600-LABEL: fshl_v2i32_imm:
@@ -334,15 +327,14 @@ define amdgpu_kernel void @fshl_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32>
 ;
 ; GFX10-LABEL: fshl_v2i32_imm:
 ; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_clause 0x2
-; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
-; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x24
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_alignbit_b32 v1, s3, s5, 23
-; GFX10-NEXT:    v_alignbit_b32 v0, s2, s4, 25
-; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX10-NEXT:    v_alignbit_b32 v1, s5, s7, 23
+; GFX10-NEXT:    v_alignbit_b32 v0, s4, s6, 25
+; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
 ; GFX10-NEXT:    s_endpgm
 entry:
   %0 = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 7, i32 9>)
@@ -353,69 +345,67 @@ entry:
 define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; SI-LABEL: fshl_v4i32:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x11
-; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
-; SI-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0xd
-; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x15
-; SI-NEXT:    s_mov_b32 s11, 0xf000
+; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
+; SI-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x15
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v0, s7
-; SI-NEXT:    s_mov_b32 s10, -1
-; SI-NEXT:    v_alignbit_b32 v0, s15, v0, 1
-; SI-NEXT:    s_not_b32 s3, s3
-; SI-NEXT:    s_lshr_b32 s7, s15, 1
-; SI-NEXT:    v_mov_b32_e32 v1, s3
+; SI-NEXT:    v_mov_b32_e32 v0, s11
+; SI-NEXT:    s_not_b32 s11, s15
+; SI-NEXT:    v_alignbit_b32 v0, s7, v0, 1
+; SI-NEXT:    s_lshr_b32 s7, s7, 1
+; SI-NEXT:    v_mov_b32_e32 v1, s11
 ; SI-NEXT:    v_alignbit_b32 v3, s7, v0, v1
-; SI-NEXT:    v_mov_b32_e32 v0, s6
-; SI-NEXT:    s_not_b32 s2, s2
-; SI-NEXT:    v_alignbit_b32 v0, s14, v0, 1
-; SI-NEXT:    s_lshr_b32 s3, s14, 1
-; SI-NEXT:    v_mov_b32_e32 v1, s2
-; SI-NEXT:    v_alignbit_b32 v2, s3, v0, v1
-; SI-NEXT:    v_mov_b32_e32 v0, s5
-; SI-NEXT:    s_not_b32 s1, s1
-; SI-NEXT:    v_alignbit_b32 v0, s13, v0, 1
-; SI-NEXT:    s_lshr_b32 s2, s13, 1
-; SI-NEXT:    v_mov_b32_e32 v1, s1
-; SI-NEXT:    v_alignbit_b32 v1, s2, v0, v1
-; SI-NEXT:    v_mov_b32_e32 v0, s4
-; SI-NEXT:    s_not_b32 s0, s0
-; SI-NEXT:    v_alignbit_b32 v0, s12, v0, 1
-; SI-NEXT:    s_lshr_b32 s1, s12, 1
-; SI-NEXT:    v_mov_b32_e32 v4, s0
-; SI-NEXT:    v_alignbit_b32 v0, s1, v0, v4
-; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
+; SI-NEXT:    v_mov_b32_e32 v0, s10
+; SI-NEXT:    s_not_b32 s7, s14
+; SI-NEXT:    v_alignbit_b32 v0, s6, v0, 1
+; SI-NEXT:    s_lshr_b32 s6, s6, 1
+; SI-NEXT:    v_mov_b32_e32 v1, s7
+; SI-NEXT:    v_alignbit_b32 v2, s6, v0, v1
+; SI-NEXT:    v_mov_b32_e32 v0, s9
+; SI-NEXT:    s_not_b32 s6, s13
+; SI-NEXT:    v_alignbit_b32 v0, s5, v0, 1
+; SI-NEXT:    s_lshr_b32 s5, s5, 1
+; SI-NEXT:    v_mov_b32_e32 v1, s6
+; SI-NEXT:    v_alignbit_b32 v1, s5, v0, v1
+; SI-NEXT:    v_mov_b32_e32 v0, s8
+; SI-NEXT:    s_not_b32 s5, s12
+; SI-NEXT:    v_alignbit_b32 v0, s4, v0, 1
+; SI-NEXT:    s_lshr_b32 s4, s4, 1
+; SI-NEXT:    v_mov_b32_e32 v4, s5
+; SI-NEXT:    v_alignbit_b32 v0, s4, v0, v4
+; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: fshl_v4i32:
 ; VI:       ; %bb.0: ; %entry
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
-; VI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
 ; VI-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x54
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v0, s7
-; VI-NEXT:    s_lshr_b32 s2, s11, 1
+; VI-NEXT:    v_mov_b32_e32 v0, s11
 ; VI-NEXT:    s_not_b32 s3, s15
-; VI-NEXT:    v_alignbit_b32 v0, s11, v0, 1
+; VI-NEXT:    s_lshr_b32 s2, s7, 1
+; VI-NEXT:    v_alignbit_b32 v0, s7, v0, 1
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_alignbit_b32 v3, s2, v0, v1
-; VI-NEXT:    v_mov_b32_e32 v0, s6
+; VI-NEXT:    v_mov_b32_e32 v0, s10
 ; VI-NEXT:    s_not_b32 s3, s14
-; VI-NEXT:    v_alignbit_b32 v0, s10, v0, 1
-; VI-NEXT:    s_lshr_b32 s2, s10, 1
+; VI-NEXT:    v_alignbit_b32 v0, s6, v0, 1
+; VI-NEXT:    s_lshr_b32 s2, s6, 1
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_alignbit_b32 v2, s2, v0, v1
-; VI-NEXT:    v_mov_b32_e32 v0, s5
+; VI-NEXT:    v_mov_b32_e32 v0, s9
 ; VI-NEXT:    s_not_b32 s3, s13
-; VI-NEXT:    v_alignbit_b32 v0, s9, v0, 1
-; VI-NEXT:    s_lshr_b32 s2, s9, 1
+; VI-NEXT:    v_alignbit_b32 v0, s5, v0, 1
+; VI-NEXT:    s_lshr_b32 s2, s5, 1
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_alignbit_b32 v1, s2, v0, v1
-; VI-NEXT:    v_mov_b32_e32 v0, s4
+; VI-NEXT:    v_mov_b32_e32 v0, s8
 ; VI-NEXT:    s_not_b32 s3, s12
-; VI-NEXT:    v_alignbit_b32 v0, s8, v0, 1
-; VI-NEXT:    s_lshr_b32 s2, s8, 1
+; VI-NEXT:    v_alignbit_b32 v0, s4, v0, 1
+; VI-NEXT:    s_lshr_b32 s2, s4, 1
 ; VI-NEXT:    v_mov_b32_e32 v4, s3
 ; VI-NEXT:    v_alignbit_b32 v0, s2, v0, v4
 ; VI-NEXT:    v_mov_b32_e32 v5, s1
@@ -425,34 +415,33 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
 ;
 ; GFX9-LABEL: fshl_v4i32:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
-; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
 ; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x54
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, s7
-; GFX9-NEXT:    v_alignbit_b32 v0, s11, v0, 1
-; GFX9-NEXT:    s_lshr_b32 s0, s11, 1
 ; GFX9-NEXT:    s_not_b32 s1, s15
+; GFX9-NEXT:    v_mov_b32_e32 v0, s11
+; GFX9-NEXT:    s_lshr_b32 s0, s7, 1
+; GFX9-NEXT:    v_alignbit_b32 v0, s7, v0, 1
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_alignbit_b32 v3, s0, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-NEXT:    v_mov_b32_e32 v0, s10
 ; GFX9-NEXT:    s_not_b32 s1, s14
-; GFX9-NEXT:    v_alignbit_b32 v0, s10, v0, 1
-; GFX9-NEXT:    s_lshr_b32 s0, s10, 1
+; GFX9-NEXT:    v_alignbit_b32 v0, s6, v0, 1
+; GFX9-NEXT:    s_lshr_b32 s0, s6, 1
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_alignbit_b32 v2, s0, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v0, s5
+; GFX9-NEXT:    v_mov_b32_e32 v0, s9
 ; GFX9-NEXT:    s_not_b32 s1, s13
-; GFX9-NEXT:    v_alignbit_b32 v0, s9, v0, 1
-; GFX9-NEXT:    s_lshr_b32 s0, s9, 1
+; GFX9-NEXT:    v_alignbit_b32 v0, s5, v0, 1
+; GFX9-NEXT:    s_lshr_b32 s0, s5, 1
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_alignbit_b32 v1, s0, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-NEXT:    v_mov_b32_e32 v0, s8
 ; GFX9-NEXT:    s_not_b32 s1, s12
-; GFX9-NEXT:    v_alignbit_b32 v0, s8, v0, 1
-; GFX9-NEXT:    s_lshr_b32 s0, s8, 1
+; GFX9-NEXT:    v_alignbit_b32 v0, s4, v0, 1
+; GFX9-NEXT:    s_lshr_b32 s0, s4, 1
 ; GFX9-NEXT:    v_mov_b32_e32 v5, s1
 ; GFX9-NEXT:    v_alignbit_b32 v0, s0, v0, v5
 ; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[2:3]
@@ -486,30 +475,29 @@ define amdgpu_kernel void @fshl_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
 ;
 ; GFX10-LABEL: fshl_v4i32:
 ; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_clause 0x3
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x44
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
 ; GFX10-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x54
-; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_lshr_b32 s0, s7, 1
 ; GFX10-NEXT:    v_alignbit_b32 v0, s7, s11, 1
 ; GFX10-NEXT:    v_alignbit_b32 v1, s6, s10, 1
 ; GFX10-NEXT:    v_alignbit_b32 v5, s5, s9, 1
 ; GFX10-NEXT:    v_alignbit_b32 v6, s4, s8, 1
-; GFX10-NEXT:    s_not_b32 s1, s15
+; GFX10-NEXT:    s_lshr_b32 s2, s7, 1
+; GFX10-NEXT:    s_not_b32 s3, s15
 ; GFX10-NEXT:    s_lshr_b32 s6, s6, 1
 ; GFX10-NEXT:    s_not_b32 s7, s14
 ; GFX10-NEXT:    s_lshr_b32 s5, s5, 1
 ; GFX10-NEXT:    s_not_b32 s9, s13
 ; GFX10-NEXT:    s_lshr_b32 s4, s4, 1
 ; GFX10-NEXT:    s_not_b32 s8, s12
-; GFX10-NEXT:    v_alignbit_b32 v3, s0, v0, s1
+; GFX10-NEXT:    v_alignbit_b32 v3, s2, v0, s3
 ; GFX10-NEXT:    v_alignbit_b32 v2, s6, v1, s7
 ; GFX10-NEXT:    v_alignbit_b32 v1, s5, v5, s9
 ; GFX10-NEXT:    v_alignbit_b32 v0, s4, v6, s8
-; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[2:3]
+; GFX10-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
 ; GFX10-NEXT:    s_endpgm
 entry:
   %0 = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z)
@@ -520,58 +508,55 @@ entry:
 define amdgpu_kernel void @fshl_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
 ; SI-LABEL: fshl_v4i32_imm:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x11
-; SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0xd
+; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v0, s7
-; SI-NEXT:    v_alignbit_b32 v3, s11, v0, 31
-; SI-NEXT:    v_mov_b32_e32 v0, s6
-; SI-NEXT:    v_alignbit_b32 v2, s10, v0, 23
-; SI-NEXT:    v_mov_b32_e32 v0, s5
-; SI-NEXT:    v_alignbit_b32 v1, s9, v0, 25
-; SI-NEXT:    v_mov_b32_e32 v0, s4
-; SI-NEXT:    v_alignbit_b32 v0, s8, v0, 31
+; SI-NEXT:    v_mov_b32_e32 v0, s11
+; SI-NEXT:    v_mov_b32_e32 v1, s10
+; SI-NEXT:    v_alignbit_b32 v3, s7, v0, 31
+; SI-NEXT:    v_mov_b32_e32 v0, s9
+; SI-NEXT:    v_alignbit_b32 v2, s6, v1, 23
+; SI-NEXT:    v_alignbit_b32 v1, s5, v0, 25
+; SI-NEXT:    v_mov_b32_e32 v0, s8
+; SI-NEXT:    v_alignbit_b32 v0, s4, v0, 31
 ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: fshl_v4i32_imm:
 ; VI:       ; %bb.0: ; %entry
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
-; VI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v0, s7
-; VI-NEXT:    v_mov_b32_e32 v1, s6
-; VI-NEXT:    v_alignbit_b32 v3, s11, v0, 31
-; VI-NEXT:    v_mov_b32_e32 v0, s5
-; VI-NEXT:    v_alignbit_b32 v2, s10, v1, 23
-; VI-NEXT:    v_alignbit_b32 v1, s9, v0, 25
-; VI-NEXT:    v_mov_b32_e32 v0, s4
+; VI-NEXT:    v_mov_b32_e32 v0, s11
+; VI-NEXT:    v_mov_b32_e32 v1, s10
+; VI-NEXT:    v_mov_b32_e32 v4, s9
+; VI-NEXT:    v_alignbit_b32 v3, s7, v0, 31
+; VI-NEXT:    v_alignbit_b32 v2, s6, v1, 23
+; VI-NEXT:    v_alignbit_b32 v1, s5, v4, 25
+; VI-NEXT:    v_mov_b32_e32 v0, s8
 ; VI-NEXT:    v_mov_b32_e32 v5, s1
-; VI-NEXT:    v_alignbit_b32 v0, s8, v0, 31
+; VI-NEXT:    v_alignbit_b32 v0, s4, v0, 31
 ; VI-NEXT:    v_mov_b32_e32 v4, s0
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: fshl_v4i32_imm:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
-; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, s7
-; GFX9-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-NEXT:    v_alignbit_b32 v3, s11, v0, 31
-; GFX9-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9-NEXT:    v_alignbit_b32 v2, s10, v1, 23
-; GFX9-NEXT:    v_alignbit_b32 v1, s9, v0, 25
-; GFX9-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9-NEXT:    v_alignbit_b32 v0, s8, v0, 31
-; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[2:3]
+; GFX9-NEXT:    v_mov_b32_e32 v0, s11
+; GFX9-NEXT:    v_mov_b32_e32 v1, s10
+; GFX9-NEXT:    v_alignbit_b32 v3, s7, v0, 31
+; GFX9-NEXT:    v_mov_b32_e32 v0, s9
+; GFX9-NEXT:    v_alignbit_b32 v2, s6, v1, 23
+; GFX9-NEXT:    v_alignbit_b32 v1, s5, v0, 25
+; GFX9-NEXT:    v_mov_b32_e32 v0, s8
+; GFX9-NEXT:    v_alignbit_b32 v0, s4, v0, 31
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; R600-LABEL: fshl_v4i32_imm:
@@ -594,9 +579,8 @@ define amdgpu_kernel void @fshl_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32>
 ;
 ; GFX10-LABEL: fshl_v4i32_imm:
 ; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_clause 0x2
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x44
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
 ; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll
index 5214a02b9f74f..1c041d0c7d700 100644
--- a/llvm/test/CodeGen/AMDGPU/fshr.ll
+++ b/llvm/test/CodeGen/AMDGPU/fshr.ll
@@ -156,35 +156,33 @@ entry:
 define amdgpu_kernel void @fshr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y, <2 x i32> %z) {
 ; SI-LABEL: fshr_v2i32:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
+; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
 ; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xf
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
-; SI-NEXT:    s_mov_b32 s7, 0xf000
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v0, s3
+; SI-NEXT:    v_mov_b32_e32 v0, s7
 ; SI-NEXT:    v_mov_b32_e32 v1, s9
+; SI-NEXT:    v_alignbit_b32 v1, s5, v0, v1
+; SI-NEXT:    v_mov_b32_e32 v0, s6
 ; SI-NEXT:    v_mov_b32_e32 v2, s8
-; SI-NEXT:    v_alignbit_b32 v1, s1, v0, v1
-; SI-NEXT:    v_mov_b32_e32 v0, s2
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    v_alignbit_b32 v0, s0, v0, v2
-; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT:    v_alignbit_b32 v0, s4, v0, v2
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: fshr_v2i32:
 ; VI:       ; %bb.0: ; %entry
-; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x3c
-; VI-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x3c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v0, s3
-; VI-NEXT:    v_mov_b32_e32 v1, s5
-; VI-NEXT:    v_alignbit_b32 v1, s7, v0, v1
+; VI-NEXT:    v_mov_b32_e32 v0, s7
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_mov_b32_e32 v2, s6
+; VI-NEXT:    v_alignbit_b32 v1, s5, v0, v1
 ; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_mov_b32_e32 v2, s4
-; VI-NEXT:    v_alignbit_b32 v0, s6, v0, v2
+; VI-NEXT:    v_alignbit_b32 v0, s4, v2, v0
 ; VI-NEXT:    v_mov_b32_e32 v3, s1
 ; VI-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
@@ -192,18 +190,17 @@ define amdgpu_kernel void @fshr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
 ;
 ; GFX9-LABEL: fshr_v2i32:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x3c
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x3c
 ; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x24
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-NEXT:    v_mov_b32_e32 v1, s5
-; GFX9-NEXT:    v_alignbit_b32 v1, s7, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-NEXT:    v_mov_b32_e32 v3, s4
-; GFX9-NEXT:    v_alignbit_b32 v0, s6, v0, v3
+; GFX9-NEXT:    v_mov_b32_e32 v0, s7
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9-NEXT:    v_alignbit_b32 v1, s5, v0, v1
+; GFX9-NEXT:    v_mov_b32_e32 v0, s6
+; GFX9-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NEXT:    v_alignbit_b32 v0, s4, v0, v3
 ; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[8:9]
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -223,10 +220,9 @@ define amdgpu_kernel void @fshr_v2i32(<2 x i32> addrspace(1)* %in, <2 x i32> %x,
 ;
 ; GFX10-LABEL: fshr_v2i32:
 ; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_clause 0x3
+; GFX10-NEXT:    s_clause 0x2
 ; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x3c
-; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
-; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x34
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
 ; GFX10-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
@@ -245,27 +241,25 @@ entry:
 define amdgpu_kernel void @fshr_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32> %x, <2 x i32> %y) {
 ; SI-LABEL: fshr_v2i32_imm:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
-; SI-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0xb
+; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v0, s5
-; SI-NEXT:    v_alignbit_b32 v1, s7, v0, 9
-; SI-NEXT:    v_mov_b32_e32 v0, s4
-; SI-NEXT:    v_alignbit_b32 v0, s6, v0, 7
+; SI-NEXT:    v_mov_b32_e32 v0, s7
+; SI-NEXT:    v_mov_b32_e32 v2, s6
+; SI-NEXT:    v_alignbit_b32 v1, s5, v0, 9
+; SI-NEXT:    v_alignbit_b32 v0, s4, v2, 7
 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: fshr_v2i32_imm:
 ; VI:       ; %bb.0: ; %entry
-; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v0, s3
-; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v0, s7
+; VI-NEXT:    v_mov_b32_e32 v2, s6
 ; VI-NEXT:    v_alignbit_b32 v1, s5, v0, 9
 ; VI-NEXT:    v_alignbit_b32 v0, s4, v2, 7
 ; VI-NEXT:    v_mov_b32_e32 v3, s1
@@ -275,16 +269,15 @@ define amdgpu_kernel void @fshr_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32>
 ;
 ; GFX9-LABEL: fshr_v2i32_imm:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
-; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, s3
-; GFX9-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NEXT:    v_mov_b32_e32 v0, s7
+; GFX9-NEXT:    v_mov_b32_e32 v3, s6
 ; GFX9-NEXT:    v_alignbit_b32 v1, s5, v0, 9
 ; GFX9-NEXT:    v_alignbit_b32 v0, s4, v3, 7
-; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX9-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; R600-LABEL: fshr_v2i32_imm:
@@ -303,15 +296,14 @@ define amdgpu_kernel void @fshr_v2i32_imm(<2 x i32> addrspace(1)* %in, <2 x i32>
 ;
 ; GFX10-LABEL: fshr_v2i32_imm:
 ; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_clause 0x2
-; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
-; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x24
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x2c
+; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_alignbit_b32 v1, s3, s5, 9
-; GFX10-NEXT:    v_alignbit_b32 v0, s2, s4, 7
-; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[6:7]
+; GFX10-NEXT:    v_alignbit_b32 v1, s5, s7, 9
+; GFX10-NEXT:    v_alignbit_b32 v0, s4, s6, 7
+; GFX10-NEXT:    global_store_dwordx2 v2, v[0:1], s[2:3]
 ; GFX10-NEXT:    s_endpgm
 entry:
   %0 = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> <i32 7, i32 9>)
@@ -322,47 +314,45 @@ entry:
 define amdgpu_kernel void @fshr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; SI-LABEL: fshr_v4i32:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x11
-; SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x15
-; SI-NEXT:    s_load_dwordx2 s[12:13], s[0:1], 0x9
-; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
-; SI-NEXT:    s_mov_b32 s15, 0xf000
+; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
+; SI-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x15
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v0, s7
-; SI-NEXT:    v_mov_b32_e32 v1, s11
-; SI-NEXT:    v_mov_b32_e32 v4, s8
-; SI-NEXT:    v_alignbit_b32 v3, s3, v0, v1
-; SI-NEXT:    v_mov_b32_e32 v0, s6
-; SI-NEXT:    v_mov_b32_e32 v1, s10
-; SI-NEXT:    v_alignbit_b32 v2, s2, v0, v1
-; SI-NEXT:    v_mov_b32_e32 v0, s5
-; SI-NEXT:    v_mov_b32_e32 v1, s9
-; SI-NEXT:    v_alignbit_b32 v1, s1, v0, v1
-; SI-NEXT:    v_mov_b32_e32 v0, s4
-; SI-NEXT:    s_mov_b32 s14, -1
-; SI-NEXT:    v_alignbit_b32 v0, s0, v0, v4
-; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[12:15], 0
+; SI-NEXT:    v_mov_b32_e32 v0, s11
+; SI-NEXT:    v_mov_b32_e32 v1, s15
+; SI-NEXT:    v_alignbit_b32 v3, s7, v0, v1
+; SI-NEXT:    v_mov_b32_e32 v0, s10
+; SI-NEXT:    v_mov_b32_e32 v1, s14
+; SI-NEXT:    v_alignbit_b32 v2, s6, v0, v1
+; SI-NEXT:    v_mov_b32_e32 v0, s9
+; SI-NEXT:    v_mov_b32_e32 v1, s13
+; SI-NEXT:    v_alignbit_b32 v1, s5, v0, v1
+; SI-NEXT:    v_mov_b32_e32 v0, s8
+; SI-NEXT:    v_mov_b32_e32 v4, s12
+; SI-NEXT:    v_alignbit_b32 v0, s4, v0, v4
+; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: fshr_v4i32:
 ; VI:       ; %bb.0: ; %entry
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
-; VI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x54
-; VI-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x54
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v0, s7
-; VI-NEXT:    v_mov_b32_e32 v1, s11
-; VI-NEXT:    v_alignbit_b32 v3, s15, v0, v1
-; VI-NEXT:    v_mov_b32_e32 v0, s6
-; VI-NEXT:    v_mov_b32_e32 v1, s10
-; VI-NEXT:    v_alignbit_b32 v2, s14, v0, v1
-; VI-NEXT:    v_mov_b32_e32 v0, s5
-; VI-NEXT:    v_mov_b32_e32 v1, s9
-; VI-NEXT:    v_alignbit_b32 v1, s13, v0, v1
-; VI-NEXT:    v_mov_b32_e32 v0, s4
-; VI-NEXT:    v_mov_b32_e32 v4, s8
-; VI-NEXT:    v_alignbit_b32 v0, s12, v0, v4
+; VI-NEXT:    v_mov_b32_e32 v0, s11
+; VI-NEXT:    v_mov_b32_e32 v1, s15
+; VI-NEXT:    v_mov_b32_e32 v2, s10
+; VI-NEXT:    v_alignbit_b32 v3, s7, v0, v1
+; VI-NEXT:    v_mov_b32_e32 v0, s14
+; VI-NEXT:    v_alignbit_b32 v2, s6, v2, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s9
+; VI-NEXT:    v_mov_b32_e32 v1, s13
+; VI-NEXT:    v_alignbit_b32 v1, s5, v0, v1
+; VI-NEXT:    v_mov_b32_e32 v0, s8
+; VI-NEXT:    v_mov_b32_e32 v4, s12
+; VI-NEXT:    v_alignbit_b32 v0, s4, v0, v4
 ; VI-NEXT:    v_mov_b32_e32 v5, s1
 ; VI-NEXT:    v_mov_b32_e32 v4, s0
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
@@ -370,25 +360,24 @@ define amdgpu_kernel void @fshr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
 ;
 ; GFX9-LABEL: fshr_v4i32:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
-; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x54
-; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x54
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, s7
-; GFX9-NEXT:    v_mov_b32_e32 v1, s11
-; GFX9-NEXT:    v_alignbit_b32 v3, s15, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v0, s6
-; GFX9-NEXT:    v_mov_b32_e32 v1, s10
-; GFX9-NEXT:    v_alignbit_b32 v2, s14, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9-NEXT:    v_mov_b32_e32 v1, s9
-; GFX9-NEXT:    v_alignbit_b32 v1, s13, v0, v1
-; GFX9-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9-NEXT:    v_mov_b32_e32 v5, s8
-; GFX9-NEXT:    v_alignbit_b32 v0, s12, v0, v5
-; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[2:3]
+; GFX9-NEXT:    v_mov_b32_e32 v0, s11
+; GFX9-NEXT:    v_mov_b32_e32 v1, s15
+; GFX9-NEXT:    v_alignbit_b32 v3, s7, v0, v1
+; GFX9-NEXT:    v_mov_b32_e32 v0, s10
+; GFX9-NEXT:    v_mov_b32_e32 v1, s14
+; GFX9-NEXT:    v_alignbit_b32 v2, s6, v0, v1
+; GFX9-NEXT:    v_mov_b32_e32 v0, s9
+; GFX9-NEXT:    v_mov_b32_e32 v1, s13
+; GFX9-NEXT:    v_alignbit_b32 v1, s5, v0, v1
+; GFX9-NEXT:    v_mov_b32_e32 v0, s8
+; GFX9-NEXT:    v_mov_b32_e32 v5, s12
+; GFX9-NEXT:    v_alignbit_b32 v0, s4, v0, v5
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; R600-LABEL: fshr_v4i32:
@@ -411,21 +400,20 @@ define amdgpu_kernel void @fshr_v4i32(<4 x i32> addrspace(1)* %in, <4 x i32> %x,
 ;
 ; GFX10-LABEL: fshr_v4i32:
 ; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_clause 0x3
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x54
-; GFX10-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x44
-; GFX10-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x34
+; GFX10-NEXT:    s_clause 0x2
+; GFX10-NEXT:    s_load_dwordx4 s[12:15], s[0:1], 0x54
+; GFX10-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
 ; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v6, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_mov_b32_e32 v0, s7
-; GFX10-NEXT:    v_mov_b32_e32 v1, s6
-; GFX10-NEXT:    v_mov_b32_e32 v4, s5
-; GFX10-NEXT:    v_mov_b32_e32 v5, s4
-; GFX10-NEXT:    v_alignbit_b32 v3, s15, s11, v0
-; GFX10-NEXT:    v_alignbit_b32 v2, s14, s10, v1
-; GFX10-NEXT:    v_alignbit_b32 v1, s13, s9, v4
-; GFX10-NEXT:    v_alignbit_b32 v0, s12, s8, v5
+; GFX10-NEXT:    v_mov_b32_e32 v0, s15
+; GFX10-NEXT:    v_mov_b32_e32 v1, s14
+; GFX10-NEXT:    v_mov_b32_e32 v4, s13
+; GFX10-NEXT:    v_mov_b32_e32 v5, s12
+; GFX10-NEXT:    v_alignbit_b32 v3, s7, s11, v0
+; GFX10-NEXT:    v_alignbit_b32 v2, s6, s10, v1
+; GFX10-NEXT:    v_alignbit_b32 v1, s5, s9, v4
+; GFX10-NEXT:    v_alignbit_b32 v0, s4, s8, v5
 ; GFX10-NEXT:    global_store_dwordx4 v6, v[0:3], s[2:3]
 ; GFX10-NEXT:    s_endpgm
 entry:
@@ -437,58 +425,55 @@ entry:
 define amdgpu_kernel void @fshr_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32> %x, <4 x i32> %y) {
 ; SI-LABEL: fshr_v4i32_imm:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x11
-; SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0xd
+; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    v_mov_b32_e32 v0, s7
-; SI-NEXT:    v_alignbit_b32 v3, s11, v0, 1
-; SI-NEXT:    v_mov_b32_e32 v0, s6
-; SI-NEXT:    v_alignbit_b32 v2, s10, v0, 9
-; SI-NEXT:    v_mov_b32_e32 v0, s5
-; SI-NEXT:    v_alignbit_b32 v1, s9, v0, 7
-; SI-NEXT:    v_mov_b32_e32 v0, s4
-; SI-NEXT:    v_alignbit_b32 v0, s8, v0, 1
+; SI-NEXT:    v_mov_b32_e32 v0, s11
+; SI-NEXT:    v_mov_b32_e32 v1, s10
+; SI-NEXT:    v_alignbit_b32 v3, s7, v0, 1
+; SI-NEXT:    v_mov_b32_e32 v0, s9
+; SI-NEXT:    v_alignbit_b32 v2, s6, v1, 9
+; SI-NEXT:    v_alignbit_b32 v1, s5, v0, 7
+; SI-NEXT:    v_mov_b32_e32 v0, s8
+; SI-NEXT:    v_alignbit_b32 v0, s4, v0, 1
 ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: fshr_v4i32_imm:
 ; VI:       ; %bb.0: ; %entry
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
-; VI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v0, s7
-; VI-NEXT:    v_mov_b32_e32 v1, s6
-; VI-NEXT:    v_alignbit_b32 v3, s11, v0, 1
-; VI-NEXT:    v_mov_b32_e32 v0, s5
-; VI-NEXT:    v_alignbit_b32 v2, s10, v1, 9
-; VI-NEXT:    v_alignbit_b32 v1, s9, v0, 7
-; VI-NEXT:    v_mov_b32_e32 v0, s4
+; VI-NEXT:    v_mov_b32_e32 v0, s11
+; VI-NEXT:    v_mov_b32_e32 v1, s10
+; VI-NEXT:    v_mov_b32_e32 v4, s9
+; VI-NEXT:    v_alignbit_b32 v3, s7, v0, 1
+; VI-NEXT:    v_alignbit_b32 v2, s6, v1, 9
+; VI-NEXT:    v_alignbit_b32 v1, s5, v4, 7
+; VI-NEXT:    v_mov_b32_e32 v0, s8
 ; VI-NEXT:    v_mov_b32_e32 v5, s1
-; VI-NEXT:    v_alignbit_b32 v0, s8, v0, 1
+; VI-NEXT:    v_alignbit_b32 v0, s4, v0, 1
 ; VI-NEXT:    v_mov_b32_e32 v4, s0
 ; VI-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: fshr_v4i32_imm:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x44
-; GFX9-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x34
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, s7
-; GFX9-NEXT:    v_mov_b32_e32 v1, s6
-; GFX9-NEXT:    v_alignbit_b32 v3, s11, v0, 1
-; GFX9-NEXT:    v_mov_b32_e32 v0, s5
-; GFX9-NEXT:    v_alignbit_b32 v2, s10, v1, 9
-; GFX9-NEXT:    v_alignbit_b32 v1, s9, v0, 7
-; GFX9-NEXT:    v_mov_b32_e32 v0, s4
-; GFX9-NEXT:    v_alignbit_b32 v0, s8, v0, 1
-; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[2:3]
+; GFX9-NEXT:    v_mov_b32_e32 v0, s11
+; GFX9-NEXT:    v_mov_b32_e32 v1, s10
+; GFX9-NEXT:    v_alignbit_b32 v3, s7, v0, 1
+; GFX9-NEXT:    v_mov_b32_e32 v0, s9
+; GFX9-NEXT:    v_alignbit_b32 v2, s6, v1, 9
+; GFX9-NEXT:    v_alignbit_b32 v1, s5, v0, 7
+; GFX9-NEXT:    v_mov_b32_e32 v0, s8
+; GFX9-NEXT:    v_alignbit_b32 v0, s4, v0, 1
+; GFX9-NEXT:    global_store_dwordx4 v4, v[0:3], s[0:1]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; R600-LABEL: fshr_v4i32_imm:
@@ -509,9 +494,8 @@ define amdgpu_kernel void @fshr_v4i32_imm(<4 x i32> addrspace(1)* %in, <4 x i32>
 ;
 ; GFX10-LABEL: fshr_v4i32_imm:
 ; GFX10:       ; %bb.0: ; %entry
-; GFX10-NEXT:    s_clause 0x2
-; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
-; GFX10-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x44
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x34
 ; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/half.ll b/llvm/test/CodeGen/AMDGPU/half.ll
index dc402e13068c5..9669f01ea3e27 100644
--- a/llvm/test/CodeGen/AMDGPU/half.ll
+++ b/llvm/test/CodeGen/AMDGPU/half.ll
@@ -1897,8 +1897,7 @@ define amdgpu_kernel void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #
 define amdgpu_kernel void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
 ; SI-LABEL: fadd_v2f16:
 ; SI:       ; %bb.0:
-; SI-NEXT:    s_load_dword s0, s[4:5], 0x2
-; SI-NEXT:    s_load_dword s1, s[4:5], 0x3
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_lshr_b32 s2, s0, 16
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, s0
@@ -1921,20 +1920,19 @@ define amdgpu_kernel void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half>
 ;
 ; VI-LABEL: fadd_v2f16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[4:5], 0xc
-; VI-NEXT:    s_load_dword s3, s[4:5], 0x8
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_lshr_b32 s4, s2, 16
-; VI-NEXT:    s_lshr_b32 s5, s3, 16
-; VI-NEXT:    v_mov_b32_e32 v0, s4
-; VI-NEXT:    v_mov_b32_e32 v1, s5
-; VI-NEXT:    v_add_f16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NEXT:    v_mov_b32_e32 v1, s2
-; VI-NEXT:    v_add_f16_e32 v1, s3, v1
-; VI-NEXT:    v_or_b32_e32 v2, v1, v0
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    s_lshr_b32 s4, s1, 16
+; VI-NEXT:    s_lshr_b32 s5, s0, 16
+; VI-NEXT:    v_mov_b32_e32 v0, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s4
+; VI-NEXT:    v_mov_b32_e32 v2, s5
+; VI-NEXT:    v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT:    v_add_f16_e32 v0, s0, v0
+; VI-NEXT:    v_or_b32_e32 v2, v0, v1
+; VI-NEXT:    v_mov_b32_e32 v0, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
   %add = fadd <2 x half> %a, %b
@@ -1949,20 +1947,14 @@ define amdgpu_kernel void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half>
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    v_mov_b32_e32 v0, s2
 ; SI-NEXT:    v_mov_b32_e32 v1, s3
-; SI-NEXT:    s_add_u32 s2, s2, 8
-; SI-NEXT:    s_addc_u32 s3, s3, 0
-; SI-NEXT:    v_mov_b32_e32 v2, s2
-; SI-NEXT:    v_mov_b32_e32 v3, s3
-; SI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
-; SI-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
+; SI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
 ; SI-NEXT:    v_mov_b32_e32 v4, s0
 ; SI-NEXT:    v_mov_b32_e32 v5, s1
-; SI-NEXT:    s_waitcnt vmcnt(1)
+; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cvt_f32_f16_e32 v6, v0
 ; SI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; SI-NEXT:    v_cvt_f32_f16_e32 v7, v1
 ; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
-; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_cvt_f32_f16_e32 v8, v2
 ; SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 ; SI-NEXT:    v_cvt_f32_f16_e32 v9, v3
@@ -1990,14 +1982,9 @@ define amdgpu_kernel void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half>
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_add_u32 s4, s2, 8
 ; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    s_addc_u32 s5, s3, 0
-; VI-NEXT:    v_mov_b32_e32 v2, s4
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    v_mov_b32_e32 v3, s5
-; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
-; VI-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
+; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
 ; VI-NEXT:    v_mov_b32_e32 v4, s0
 ; VI-NEXT:    v_mov_b32_e32 v5, s1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
index 30762f36ed552..870f4fcb8996d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
@@ -186,8 +186,7 @@ bb:
 
 ; GCN-LABEL: {{^}}test_mfma_f32_4x4x4f16:
 ; GCN:              s_load_dwordx4
-; GCN:              s_load_dwordx2
-; GCN:              s_load_dwordx2
+; GCN:              s_load_dwordx4
 ; GFX908_A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
 ; GFX908_A:         v_mfma_f32_4x4x4f16 [[RES:a\[[0-9]+:[0-9]+\]]], {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX908-COUNT-4:   v_accvgpr_read_b32
diff --git a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
index 0ced63b67d91f..6ad048b4e2fae 100644
--- a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
@@ -7,20 +7,18 @@
 define amdgpu_kernel void @s_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
 ; GFX9-LABEL: s_lshr_v2i16:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s4, s[0:1], 0x2c
-; GFX9-NEXT:    s_load_dword s5, s[0:1], 0x30
-; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v1, s4
-; GFX9-NEXT:    v_pk_lshrrev_b16 v1, s5, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[2:3]
+; GFX9-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9-NEXT:    v_pk_lshrrev_b16 v1, s3, v1
+; GFX9-NEXT:    global_store_dword v0, v1, s[4:5]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: s_lshr_v2i16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dword s3, s[0:1], 0x30
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    s_and_b32 s4, s2, 0xffff
@@ -38,33 +36,31 @@ define amdgpu_kernel void @s_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ;
 ; CI-LABEL: s_lshr_v2i16:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dword s2, s[0:1], 0xb
-; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; CI-NEXT:    s_load_dword s0, s[0:1], 0xc
-; CI-NEXT:    s_mov_b32 s7, 0xf000
-; CI-NEXT:    s_mov_b32 s6, -1
+; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
+; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; CI-NEXT:    s_mov_b32 s3, 0xf000
+; CI-NEXT:    s_mov_b32 s2, -1
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_and_b32 s1, s2, 0xffff
-; CI-NEXT:    s_lshr_b32 s2, s2, 16
-; CI-NEXT:    s_lshr_b32 s3, s0, 16
-; CI-NEXT:    s_lshr_b32 s2, s2, s3
-; CI-NEXT:    s_lshl_b32 s2, s2, 16
-; CI-NEXT:    s_lshr_b32 s0, s1, s0
-; CI-NEXT:    s_or_b32 s0, s0, s2
-; CI-NEXT:    v_mov_b32_e32 v0, s0
-; CI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; CI-NEXT:    s_and_b32 s6, s4, 0xffff
+; CI-NEXT:    s_lshr_b32 s4, s4, 16
+; CI-NEXT:    s_lshr_b32 s7, s5, 16
+; CI-NEXT:    s_lshr_b32 s4, s4, s7
+; CI-NEXT:    s_lshl_b32 s4, s4, 16
+; CI-NEXT:    s_lshr_b32 s5, s6, s5
+; CI-NEXT:    s_or_b32 s4, s5, s4
+; CI-NEXT:    v_mov_b32_e32 v0, s4
+; CI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; CI-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: s_lshr_v2i16:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_clause 0x2
-; GFX10-NEXT:    s_load_dword s4, s[0:1], 0x2c
-; GFX10-NEXT:    s_load_dword s5, s[0:1], 0x30
-; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
 ; GFX10-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_pk_lshrrev_b16 v1, s5, s4
-; GFX10-NEXT:    global_store_dword v0, v1, s[2:3]
+; GFX10-NEXT:    v_pk_lshrrev_b16 v1, s3, s2
+; GFX10-NEXT:    global_store_dword v0, v1, s[4:5]
 ; GFX10-NEXT:    s_endpgm
   %result = lshr <2 x i16> %lhs, %rhs
   store <2 x i16> %result, <2 x i16> addrspace(1)* %out
@@ -75,35 +71,31 @@ define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ; GFX9-LABEL: v_lshr_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[2:3]
-; GFX9-NEXT:    global_load_dword v2, v0, s[2:3] offset:4
+; GFX9-NEXT:    global_load_dwordx2 v[0:1], v2, s[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    v_pk_lshrrev_b16 v0, v1, v0
+; GFX9-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_lshr_v2i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_add_u32_e32 v2, vcc, 4, v0
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v5, v[0:1]
-; VI-NEXT:    flat_load_dword v2, v[2:3]
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v4
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_lshrrev_b16_e32 v3, v2, v5
-; VI-NEXT:    v_lshrrev_b16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_or_b32_e32 v2, v3, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    v_lshrrev_b16_e32 v4, v1, v0
+; VI-NEXT:    v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_e32 v0, v4, v0
+; VI-NEXT:    flat_store_dword v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: v_lshr_v2i16:
@@ -115,13 +107,11 @@ define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_mov_b64 s[4:5], s[2:3]
-; CI-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; CI-NEXT:    buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4
+; CI-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
 ; CI-NEXT:    s_mov_b64 s[2:3], s[6:7]
-; CI-NEXT:    s_waitcnt vmcnt(1)
+; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
 ; CI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
-; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
 ; CI-NEXT:    v_lshrrev_b32_e32 v2, v3, v2
 ; CI-NEXT:    v_lshrrev_b32_e32 v3, v5, v4
@@ -133,14 +123,12 @@ define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ; GFX10-LABEL: v_lshr_v2i16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    global_load_dword v1, v0, s[2:3]
-; GFX10-NEXT:    global_load_dword v2, v0, s[2:3] offset:4
+; GFX10-NEXT:    global_load_dwordx2 v[0:1], v2, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_pk_lshrrev_b16 v1, v2, v1
-; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT:    v_pk_lshrrev_b16 v0, v1, v0
+; GFX10-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -458,8 +446,7 @@ define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dwordx2 v[0:1], v4, s[2:3]
-; GFX9-NEXT:    global_load_dwordx2 v[2:3], v4, s[2:3] offset:8
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v4, s[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v3, v1
 ; GFX9-NEXT:    v_pk_lshrrev_b16 v0, v2, v0
@@ -474,10 +461,7 @@ define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_add_u32_e32 v2, vcc, 8, v0
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
-; VI-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
+; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
 ; VI-NEXT:    v_mov_b32_e32 v5, s1
 ; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v4
 ; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
@@ -496,31 +480,29 @@ define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
 ; CI-NEXT:    s_mov_b32 s6, 0
-; CI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; CI-NEXT:    v_mov_b32_e32 v1, 0
+; CI-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
+; CI-NEXT:    v_mov_b32_e32 v5, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_mov_b64 s[4:5], s[2:3]
-; CI-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
-; CI-NEXT:    buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 offset:8
+; CI-NEXT:    buffer_load_dwordx4 v[0:3], v[4:5], s[4:7], 0 addr64
 ; CI-NEXT:    s_mov_b32 s4, 0xffff
 ; CI-NEXT:    s_mov_b64 s[2:3], s[6:7]
-; CI-NEXT:    s_waitcnt vmcnt(1)
-; CI-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; CI-NEXT:    v_and_b32_e32 v2, s4, v2
-; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; CI-NEXT:    v_and_b32_e32 v3, s4, v3
 ; CI-NEXT:    s_waitcnt vmcnt(0)
-; CI-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
-; CI-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
-; CI-NEXT:    v_lshrrev_b32_e32 v3, v5, v3
-; CI-NEXT:    v_lshrrev_b32_e32 v5, v9, v7
-; CI-NEXT:    v_lshrrev_b32_e32 v2, v4, v2
-; CI-NEXT:    v_lshrrev_b32_e32 v4, v8, v6
-; CI-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; CI-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; CI-NEXT:    v_or_b32_e32 v3, v3, v5
-; CI-NEXT:    v_or_b32_e32 v2, v2, v4
-; CI-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
+; CI-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; CI-NEXT:    v_and_b32_e32 v0, s4, v0
+; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
+; CI-NEXT:    v_and_b32_e32 v1, s4, v1
+; CI-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; CI-NEXT:    v_lshrrev_b32_e32 v9, 16, v3
+; CI-NEXT:    v_lshrrev_b32_e32 v1, v3, v1
+; CI-NEXT:    v_lshrrev_b32_e32 v3, v9, v7
+; CI-NEXT:    v_lshrrev_b32_e32 v0, v2, v0
+; CI-NEXT:    v_lshrrev_b32_e32 v2, v8, v6
+; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; CI-NEXT:    v_or_b32_e32 v1, v1, v3
+; CI-NEXT:    v_or_b32_e32 v0, v0, v2
+; CI-NEXT:    buffer_store_dwordx2 v[0:1], v[4:5], s[0:3], 0 addr64
 ; CI-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: v_lshr_v4i16:
@@ -528,9 +510,7 @@ define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    global_load_dwordx2 v[0:1], v4, s[2:3]
-; GFX10-NEXT:    global_load_dwordx2 v[2:3], v4, s[2:3] offset:8
+; GFX10-NEXT:    global_load_dwordx4 v[0:3], v4, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_pk_lshrrev_b16 v1, v3, v1
 ; GFX10-NEXT:    v_pk_lshrrev_b16 v0, v2, v0
diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.ll b/llvm/test/CodeGen/AMDGPU/memory_clause.ll
index 1fe61b2f9c9b3..75139bfa1cf1f 100644
--- a/llvm/test/CodeGen/AMDGPU/memory_clause.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory_clause.ll
@@ -71,50 +71,42 @@ bb:
 define amdgpu_kernel void @scalar_clause(<4 x i32> addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture %arg1) {
 ; GCN-LABEL: scalar_clause:
 ; GCN:       ; %bb.0: ; %bb
-; GCN-NEXT:    s_load_dwordx2 s[12:13], s[0:1], 0x24
+; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GCN-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x2c
-; GCN-NEXT:    v_mov_b32_e32 v12, 0
+; GCN-NEXT:    v_mov_b32_e32 v16, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_load_dwordx4 s[0:3], s[12:13], 0x0
-; GCN-NEXT:    s_load_dwordx4 s[4:7], s[12:13], 0x10
-; GCN-NEXT:    s_load_dwordx4 s[8:11], s[12:13], 0x20
-; GCN-NEXT:    s_nop 0
-; GCN-NEXT:    s_load_dwordx4 s[12:15], s[12:13], 0x30
+; GCN-NEXT:    s_load_dwordx16 s[0:15], s[2:3], 0x0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s1
 ; GCN-NEXT:    v_mov_b32_e32 v2, s2
 ; GCN-NEXT:    v_mov_b32_e32 v3, s3
 ; GCN-NEXT:    v_mov_b32_e32 v4, s4
-; GCN-NEXT:    v_mov_b32_e32 v8, s8
 ; GCN-NEXT:    v_mov_b32_e32 v5, s5
 ; GCN-NEXT:    v_mov_b32_e32 v6, s6
 ; GCN-NEXT:    v_mov_b32_e32 v7, s7
+; GCN-NEXT:    v_mov_b32_e32 v8, s8
 ; GCN-NEXT:    v_mov_b32_e32 v9, s9
 ; GCN-NEXT:    v_mov_b32_e32 v10, s10
 ; GCN-NEXT:    v_mov_b32_e32 v11, s11
-; GCN-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17]
-; GCN-NEXT:    global_store_dwordx4 v12, v[4:7], s[16:17] offset:16
-; GCN-NEXT:    global_store_dwordx4 v12, v[8:11], s[16:17] offset:32
-; GCN-NEXT:    v_mov_b32_e32 v0, s12
-; GCN-NEXT:    v_mov_b32_e32 v1, s13
-; GCN-NEXT:    v_mov_b32_e32 v2, s14
-; GCN-NEXT:    v_mov_b32_e32 v3, s15
-; GCN-NEXT:    global_store_dwordx4 v12, v[0:3], s[16:17] offset:48
+; GCN-NEXT:    v_mov_b32_e32 v12, s12
+; GCN-NEXT:    v_mov_b32_e32 v13, s13
+; GCN-NEXT:    v_mov_b32_e32 v14, s14
+; GCN-NEXT:    v_mov_b32_e32 v15, s15
+; GCN-NEXT:    global_store_dwordx4 v16, v[0:3], s[16:17]
+; GCN-NEXT:    global_store_dwordx4 v16, v[4:7], s[16:17] offset:16
+; GCN-NEXT:    global_store_dwordx4 v16, v[8:11], s[16:17] offset:32
+; GCN-NEXT:    global_store_dwordx4 v16, v[12:15], s[16:17] offset:48
 ; GCN-NEXT:    s_endpgm
 ;
 ; GCN-SCRATCH-LABEL: scalar_clause:
 ; GCN-SCRATCH:       ; %bb.0: ; %bb
 ; GCN-SCRATCH-NEXT:    s_clause 0x1
-; GCN-SCRATCH-NEXT:    s_load_dwordx2 s[12:13], s[0:1], 0x24
+; GCN-SCRATCH-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; GCN-SCRATCH-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x2c
 ; GCN-SCRATCH-NEXT:    v_mov_b32_e32 v16, 0
 ; GCN-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-SCRATCH-NEXT:    s_clause 0x3
-; GCN-SCRATCH-NEXT:    s_load_dwordx4 s[0:3], s[12:13], 0x0
-; GCN-SCRATCH-NEXT:    s_load_dwordx4 s[4:7], s[12:13], 0x10
-; GCN-SCRATCH-NEXT:    s_load_dwordx4 s[8:11], s[12:13], 0x20
-; GCN-SCRATCH-NEXT:    s_load_dwordx4 s[12:15], s[12:13], 0x30
+; GCN-SCRATCH-NEXT:    s_load_dwordx16 s[0:15], s[2:3], 0x0
 ; GCN-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-SCRATCH-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-SCRATCH-NEXT:    v_mov_b32_e32 v1, s1
diff --git a/llvm/test/CodeGen/AMDGPU/min.ll b/llvm/test/CodeGen/AMDGPU/min.ll
index 27366601cd09b..c5e83127166bb 100644
--- a/llvm/test/CodeGen/AMDGPU/min.ll
+++ b/llvm/test/CodeGen/AMDGPU/min.ll
@@ -107,8 +107,8 @@ define amdgpu_kernel void @s_test_imin_sle_v4i8(<4 x i8> addrspace(1)* %out, [8
 }
 
 ; FUNC-LABEL: {{^}}s_test_imin_sle_v2i16:
-; GCN: s_load_dword s
-; GCN: s_load_dword s
+; GCN: s_load_dwordx2 s
+; GCN: s_load_dwordx2 s
 
 ; SI: s_ashr_i32
 ; SI: s_sext_i32_i16
diff --git a/llvm/test/CodeGen/AMDGPU/mul_int24.ll b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
index 475655b34772a..478701cc97486 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
@@ -709,13 +709,12 @@ define amdgpu_kernel void @simplify_i24_crash(<2 x i32> addrspace(1)* %out, i32
 ; SI-NEXT:  ; %bb.1: ; %bb7
 ; SI-NEXT:    s_endpgm
 ; SI-NEXT:  .LBB8_2: ; %bb11
-; SI-NEXT:    s_load_dword s2, s[0:1], 0xd
-; SI-NEXT:    s_load_dword s4, s[0:1], 0xf
+; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_bfe_i32 s2, s2, 0x180000
-; SI-NEXT:    s_bfe_i32 s4, s4, 0x180000
+; SI-NEXT:    s_bfe_i32 s2, s4, 0x180000
+; SI-NEXT:    s_bfe_i32 s4, s6, 0x180000
 ; SI-NEXT:    s_mul_i32 s4, s2, s4
 ; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    v_mov_b32_e32 v0, s4
@@ -732,14 +731,13 @@ define amdgpu_kernel void @simplify_i24_crash(<2 x i32> addrspace(1)* %out, i32
 ; VI-NEXT:  ; %bb.1: ; %bb7
 ; VI-NEXT:    s_endpgm
 ; VI-NEXT:  .LBB8_2: ; %bb11
-; VI-NEXT:    s_load_dword s4, s[0:1], 0x34
-; VI-NEXT:    s_load_dword s5, s[0:1], 0x3c
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    s_bfe_i32 s4, s4, 0x180000
-; VI-NEXT:    s_bfe_i32 s5, s5, 0x180000
+; VI-NEXT:    s_bfe_i32 s5, s6, 0x180000
 ; VI-NEXT:    s_mul_i32 s4, s4, s5
 ; VI-NEXT:    v_mov_b32_e32 v0, s4
 ; VI-NEXT:    v_mov_b32_e32 v1, s4
@@ -755,18 +753,17 @@ define amdgpu_kernel void @simplify_i24_crash(<2 x i32> addrspace(1)* %out, i32
 ; GFX9-NEXT:  ; %bb.1: ; %bb7
 ; GFX9-NEXT:    s_endpgm
 ; GFX9-NEXT:  .LBB8_2: ; %bb11
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x34
-; GFX9-NEXT:    s_load_dword s3, s[0:1], 0x3c
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
-; GFX9-NEXT:    s_mov_b32 s7, 0xf000
-; GFX9-NEXT:    s_mov_b32 s6, -1
+; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s11, 0xf000
+; GFX9-NEXT:    s_mov_b32 s10, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_bfe_i32 s0, s2, 0x180000
-; GFX9-NEXT:    s_bfe_i32 s1, s3, 0x180000
+; GFX9-NEXT:    s_bfe_i32 s0, s4, 0x180000
+; GFX9-NEXT:    s_bfe_i32 s1, s6, 0x180000
 ; GFX9-NEXT:    s_mul_i32 s0, s0, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s0
-; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[8:11], 0
 ; GFX9-NEXT:    s_endpgm
 ;
 ; EG-LABEL: simplify_i24_crash:
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv.ll b/llvm/test/CodeGen/AMDGPU/sdiv.ll
index f255a4547fa10..a0421e5adc73b 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv.ll
@@ -440,15 +440,15 @@ define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
 ; GCN-NEXT:    v_mul_lo_u32 v6, v4, v2
 ; GCN-NEXT:    v_mul_lo_u32 v10, v5, v3
 ; GCN-NEXT:    v_add_i32_e32 v7, vcc, 1, v4
-; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, v6, v0
-; GCN-NEXT:    v_subrev_i32_e32 v1, vcc, v10, v1
+; GCN-NEXT:    v_sub_i32_e32 v0, vcc, v0, v6
+; GCN-NEXT:    v_sub_i32_e32 v1, vcc, v1, v10
 ; GCN-NEXT:    v_add_i32_e32 v11, vcc, 1, v5
 ; GCN-NEXT:    v_cmp_ge_u32_e64 s[0:1], v0, v2
 ; GCN-NEXT:    v_cmp_ge_u32_e64 s[2:3], v1, v3
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[0:1]
-; GCN-NEXT:    v_subrev_i32_e32 v6, vcc, v2, v0
+; GCN-NEXT:    v_sub_i32_e32 v6, vcc, v0, v2
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[2:3]
-; GCN-NEXT:    v_subrev_i32_e32 v7, vcc, v3, v1
+; GCN-NEXT:    v_sub_i32_e32 v7, vcc, v1, v3
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, v6, s[0:1]
 ; GCN-NEXT:    v_add_i32_e32 v6, vcc, 1, v4
 ; GCN-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[2:3]
@@ -514,15 +514,15 @@ define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
 ; TONGA-NEXT:    v_mul_lo_u32 v6, v4, v2
 ; TONGA-NEXT:    v_mul_lo_u32 v10, v5, v3
 ; TONGA-NEXT:    v_add_u32_e32 v7, vcc, 1, v4
-; TONGA-NEXT:    v_subrev_u32_e32 v0, vcc, v6, v0
-; TONGA-NEXT:    v_subrev_u32_e32 v1, vcc, v10, v1
+; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v0, v6
+; TONGA-NEXT:    v_sub_u32_e32 v1, vcc, v1, v10
 ; TONGA-NEXT:    v_add_u32_e32 v11, vcc, 1, v5
 ; TONGA-NEXT:    v_cmp_ge_u32_e64 s[0:1], v0, v2
 ; TONGA-NEXT:    v_cmp_ge_u32_e64 s[2:3], v1, v3
 ; TONGA-NEXT:    v_cndmask_b32_e64 v4, v4, v7, s[0:1]
-; TONGA-NEXT:    v_subrev_u32_e32 v6, vcc, v2, v0
+; TONGA-NEXT:    v_sub_u32_e32 v6, vcc, v0, v2
 ; TONGA-NEXT:    v_cndmask_b32_e64 v5, v5, v11, s[2:3]
-; TONGA-NEXT:    v_subrev_u32_e32 v7, vcc, v3, v1
+; TONGA-NEXT:    v_sub_u32_e32 v7, vcc, v1, v3
 ; TONGA-NEXT:    v_cndmask_b32_e64 v0, v0, v6, s[0:1]
 ; TONGA-NEXT:    v_add_u32_e32 v6, vcc, 1, v4
 ; TONGA-NEXT:    v_cndmask_b32_e64 v1, v1, v7, s[2:3]
@@ -533,8 +533,8 @@ define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
 ; TONGA-NEXT:    v_cndmask_b32_e32 v1, v5, v7, vcc
 ; TONGA-NEXT:    v_xor_b32_e32 v0, v0, v8
 ; TONGA-NEXT:    v_xor_b32_e32 v1, v1, v9
-; TONGA-NEXT:    v_sub_u32_e32 v0, vcc, v0, v8
-; TONGA-NEXT:    v_sub_u32_e32 v1, vcc, v1, v9
+; TONGA-NEXT:    v_subrev_u32_e32 v0, vcc, v8, v0
+; TONGA-NEXT:    v_subrev_u32_e32 v1, vcc, v9, v1
 ; TONGA-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; TONGA-NEXT:    s_endpgm
 ;
@@ -614,27 +614,26 @@ define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
 ;
 ; EG-LABEL: sdiv_v2i32:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 1 @6
-; EG-NEXT:    ALU 51, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 51, @9, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_64 T1.XY, T0.X, 8, #1
-; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
-; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
 ; EG-NEXT:     MOV * T0.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 11:
-; EG-NEXT:     SETGT_INT * T0.W, 0.0, T1.Y,
-; EG-NEXT:     ADD_INT T1.W, T1.Y, PV.W,
-; EG-NEXT:     SETGT_INT * T2.W, 0.0, T1.X,
-; EG-NEXT:     XOR_INT * T1.W, PV.W, T0.W,
-; EG-NEXT:     SUB_INT T0.Z, 0.0, PV.W,
-; EG-NEXT:     ADD_INT T3.W, T1.X, T2.W,
-; EG-NEXT:     RECIP_UINT * T1.X, PV.W,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     SETGT_INT * T1.W, 0.0, T0.W,
+; EG-NEXT:     ADD_INT T0.W, T0.W, PV.W,
+; EG-NEXT:     SETGT_INT * T2.W, 0.0, T0.Z,
+; EG-NEXT:     XOR_INT * T0.W, PV.W, T1.W,
+; EG-NEXT:     SUB_INT T1.Z, 0.0, PV.W,
+; EG-NEXT:     ADD_INT T3.W, T0.Z, T2.W,
+; EG-NEXT:     RECIP_UINT * T0.Z, PV.W,
 ; EG-NEXT:     XOR_INT T3.W, PV.W, T2.W,
-; EG-NEXT:     MULLO_INT * T0.Z, PV.Z, PS,
+; EG-NEXT:     MULLO_INT * T1.X, PV.Z, PS,
 ; EG-NEXT:     SUB_INT T4.W, 0.0, PV.W,
 ; EG-NEXT:     RECIP_UINT * T1.Y, PV.W,
 ; EG-NEXT:     SETGT_INT T5.W, 0.0, T0.X,
@@ -645,18 +644,18 @@ define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
 ; EG-NEXT:     ADD_INT T1.Y, T1.Y, PS,
 ; EG-NEXT:     XOR_INT T1.Z, PV.W, T5.W,
 ; EG-NEXT:     ADD_INT T4.W, T0.Y, PV.Z, BS:VEC_120/SCL_212
-; EG-NEXT:     MULHI * T0.X, T1.X, T0.Z,
-; EG-NEXT:     ADD_INT T0.Z, T1.X, PS,
+; EG-NEXT:     MULHI * T0.X, T0.Z, T1.X,
+; EG-NEXT:     ADD_INT T0.Z, T0.Z, PS,
 ; EG-NEXT:     XOR_INT T4.W, PV.W, T2.Z,
 ; EG-NEXT:     MULHI * T0.X, PV.Z, PV.Y,
 ; EG-NEXT:     MULHI * T0.Y, PV.W, PV.Z,
-; EG-NEXT:     MULLO_INT * T0.Z, PS, T1.W,
+; EG-NEXT:     MULLO_INT * T0.Z, PS, T0.W,
 ; EG-NEXT:     SUB_INT T4.W, T4.W, PS,
 ; EG-NEXT:     MULLO_INT * T0.Z, T0.X, T3.W,
 ; EG-NEXT:     SUB_INT T1.Y, T1.Z, PS,
 ; EG-NEXT:     ADD_INT T0.Z, T0.Y, 1,
-; EG-NEXT:     SETGE_UINT T6.W, PV.W, T1.W,
-; EG-NEXT:     SUB_INT * T7.W, PV.W, T1.W,
+; EG-NEXT:     SETGE_UINT T6.W, PV.W, T0.W,
+; EG-NEXT:     SUB_INT * T7.W, PV.W, T0.W,
 ; EG-NEXT:     CNDE_INT T1.X, PV.W, T4.W, PS, BS:VEC_021/SCL_122
 ; EG-NEXT:     CNDE_INT T0.Y, PV.W, T0.Y, PV.Z,
 ; EG-NEXT:     ADD_INT T0.Z, T0.X, 1,
@@ -665,9 +664,9 @@ define amdgpu_kernel void @sdiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
 ; EG-NEXT:     CNDE_INT T1.Y, PV.W, T1.Y, PS,
 ; EG-NEXT:     CNDE_INT T0.Z, PV.W, T0.X, PV.Z,
 ; EG-NEXT:     ADD_INT T4.W, PV.Y, 1,
-; EG-NEXT:     SETGE_UINT * T1.W, PV.X, T1.W,
+; EG-NEXT:     SETGE_UINT * T0.W, PV.X, T0.W,
 ; EG-NEXT:     CNDE_INT T0.Y, PS, T0.Y, PV.W,
-; EG-NEXT:     XOR_INT T1.Z, T2.Z, T0.W, BS:VEC_021/SCL_122
+; EG-NEXT:     XOR_INT T1.Z, T2.Z, T1.W, BS:VEC_021/SCL_122
 ; EG-NEXT:     ADD_INT T0.W, PV.Z, 1,
 ; EG-NEXT:     SETGE_UINT * T1.W, PV.Y, T3.W,
 ; EG-NEXT:     CNDE_INT T0.Z, PS, T0.Z, PV.W,
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index dd65453e6fb3a..19f57b603723f 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -854,94 +854,92 @@ define amdgpu_kernel void @s_test_sdiv25_64(i64 addrspace(1)* %out, i64 %x, i64
 define amdgpu_kernel void @s_test_sdiv24_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) {
 ; GCN-LABEL: s_test_sdiv24_v2i64:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GCN-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x11
+; GCN-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
 ; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NEXT:    s_mov_b32 s2, -1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_ashr_i64 s[4:5], s[4:5], 40
 ; GCN-NEXT:    s_ashr_i64 s[8:9], s[8:9], 40
 ; GCN-NEXT:    v_cvt_f32_i32_e32 v0, s8
+; GCN-NEXT:    s_ashr_i64 s[4:5], s[4:5], 40
 ; GCN-NEXT:    v_cvt_f32_i32_e32 v1, s4
 ; GCN-NEXT:    s_xor_b32 s4, s4, s8
-; GCN-NEXT:    s_ashr_i32 s4, s4, 30
 ; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v0
+; GCN-NEXT:    s_ashr_i32 s4, s4, 30
 ; GCN-NEXT:    s_or_b32 s4, s4, 1
 ; GCN-NEXT:    v_mov_b32_e32 v3, s4
-; GCN-NEXT:    s_ashr_i64 s[10:11], s[10:11], 40
 ; GCN-NEXT:    v_mul_f32_e32 v2, v1, v2
 ; GCN-NEXT:    v_trunc_f32_e32 v2, v2
 ; GCN-NEXT:    v_mad_f32 v1, -v2, v0, v1
 ; GCN-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
+; GCN-NEXT:    s_ashr_i64 s[10:11], s[10:11], 40
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
-; GCN-NEXT:    s_ashr_i64 s[6:7], s[6:7], 40
 ; GCN-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
 ; GCN-NEXT:    v_cvt_f32_i32_e32 v2, s10
+; GCN-NEXT:    s_ashr_i64 s[6:7], s[6:7], 40
 ; GCN-NEXT:    v_cvt_f32_i32_e32 v3, s6
 ; GCN-NEXT:    s_xor_b32 s4, s6, s10
-; GCN-NEXT:    s_ashr_i32 s4, s4, 30
 ; GCN-NEXT:    v_rcp_iflag_f32_e32 v4, v2
+; GCN-NEXT:    s_ashr_i32 s4, s4, 30
 ; GCN-NEXT:    s_or_b32 s4, s4, 1
 ; GCN-NEXT:    v_mov_b32_e32 v5, s4
-; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
 ; GCN-NEXT:    v_mul_f32_e32 v4, v3, v4
 ; GCN-NEXT:    v_trunc_f32_e32 v4, v4
 ; GCN-NEXT:    v_mad_f32 v3, -v4, v2, v3
 ; GCN-NEXT:    v_cvt_i32_f32_e32 v4, v4
 ; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v2|
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v5, vcc
-; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
 ; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GCN-NEXT:    v_bfe_i32 v2, v2, 0, 24
+; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
 ; GCN-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
 ;
 ; GCN-IR-LABEL: s_test_sdiv24_v2i64:
 ; GCN-IR:       ; %bb.0:
-; GCN-IR-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xd
-; GCN-IR-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x11
+; GCN-IR-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
 ; GCN-IR-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-IR-NEXT:    s_mov_b32 s2, -1
 ; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-IR-NEXT:    s_ashr_i64 s[4:5], s[4:5], 40
 ; GCN-IR-NEXT:    s_ashr_i64 s[8:9], s[8:9], 40
 ; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v0, s8
+; GCN-IR-NEXT:    s_ashr_i64 s[4:5], s[4:5], 40
 ; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v1, s4
 ; GCN-IR-NEXT:    s_xor_b32 s4, s4, s8
-; GCN-IR-NEXT:    s_ashr_i32 s4, s4, 30
 ; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v0
+; GCN-IR-NEXT:    s_ashr_i32 s4, s4, 30
 ; GCN-IR-NEXT:    s_or_b32 s4, s4, 1
 ; GCN-IR-NEXT:    v_mov_b32_e32 v3, s4
-; GCN-IR-NEXT:    s_ashr_i64 s[10:11], s[10:11], 40
 ; GCN-IR-NEXT:    v_mul_f32_e32 v2, v1, v2
 ; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
 ; GCN-IR-NEXT:    v_mad_f32 v1, -v2, v0, v1
 ; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v2, v2
 ; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, |v0|
+; GCN-IR-NEXT:    s_ashr_i64 s[10:11], s[10:11], 40
 ; GCN-IR-NEXT:    v_cndmask_b32_e32 v0, 0, v3, vcc
-; GCN-IR-NEXT:    s_ashr_i64 s[6:7], s[6:7], 40
 ; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
 ; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v2, s10
+; GCN-IR-NEXT:    s_ashr_i64 s[6:7], s[6:7], 40
 ; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v3, s6
 ; GCN-IR-NEXT:    s_xor_b32 s4, s6, s10
-; GCN-IR-NEXT:    s_ashr_i32 s4, s4, 30
 ; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v4, v2
+; GCN-IR-NEXT:    s_ashr_i32 s4, s4, 30
 ; GCN-IR-NEXT:    s_or_b32 s4, s4, 1
 ; GCN-IR-NEXT:    v_mov_b32_e32 v5, s4
-; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 24
 ; GCN-IR-NEXT:    v_mul_f32_e32 v4, v3, v4
 ; GCN-IR-NEXT:    v_trunc_f32_e32 v4, v4
 ; GCN-IR-NEXT:    v_mad_f32 v3, -v4, v2, v3
 ; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v4, v4
 ; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v3|, |v2|
 ; GCN-IR-NEXT:    v_cndmask_b32_e32 v2, 0, v5, vcc
-; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
+; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 24
 ; GCN-IR-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GCN-IR-NEXT:    v_bfe_i32 v2, v2, 0, 24
+; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
 ; GCN-IR-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GCN-IR-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GCN-IR-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/select-vectors.ll b/llvm/test/CodeGen/AMDGPU/select-vectors.ll
index 176f972e4d4f0..c7d6301de3419 100644
--- a/llvm/test/CodeGen/AMDGPU/select-vectors.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-vectors.ll
@@ -216,16 +216,14 @@ define amdgpu_kernel void @select_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32>
 }
 
 ; GCN-LABEL: {{^}}s_select_v2f32:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[ALO:[0-9]+]]:[[AHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[BLO:[0-9]+]]:[[BHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xd|0x34}}
+; GCN-DAG: s_load_dwordx4 s{{\[}}[[ALO:[0-9]+]]:[[BHI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
 
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[AHI]]
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BHI]]
 ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[ALO]]
 ; GCN-DAG: s_cmp_eq_u32 s{{[0-9]+}}, 0{{$}}
 
 ; GCN-DAG: v_cndmask_b32_e32
-; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s[[BLO]]
+; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
 ; GCN-DAG: v_cndmask_b32_e32
 ; GCN: buffer_store_dwordx2
 define amdgpu_kernel void @s_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b, i32 %c) #0 {
@@ -251,8 +249,7 @@ define amdgpu_kernel void @s_select_v3f32(<3 x float> addrspace(1)* %out, <3 x f
 }
 
 ; GCN-LABEL: {{^}}s_select_v4f32:
-; GCN: s_load_dwordx4
-; GCN: s_load_dwordx4
+; GCN: s_load_dwordx8
 ; GCN: s_cmp_eq_u32 s{{[0-9]+}}, 0{{$}}
 
 ; GCN: v_cndmask_b32_e32
diff --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
index 206c62d683dee..b6457168d4eb2 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
@@ -451,64 +451,63 @@ define <2 x i128> @v_ashr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 define amdgpu_kernel void @s_shl_v2i128ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-LABEL: s_shl_v2i128ss:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x8
-; GCN-NEXT:    s_load_dwordx8 s[16:23], s[4:5], 0x0
+; GCN-NEXT:    s_load_dwordx16 s[4:19], s[4:5], 0x0
 ; GCN-NEXT:    v_mov_b32_e32 v10, 16
 ; GCN-NEXT:    v_mov_b32_e32 v8, 0
 ; GCN-NEXT:    v_mov_b32_e32 v11, 0
+; GCN-NEXT:    v_mov_b32_e32 v9, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[8:9], 64
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[10:11], 0
-; GCN-NEXT:    s_sub_i32 s6, 64, s8
-; GCN-NEXT:    s_sub_i32 s4, s8, 64
-; GCN-NEXT:    s_lshr_b64 s[6:7], s[16:17], s6
-; GCN-NEXT:    s_lshl_b64 s[24:25], s[18:19], s8
-; GCN-NEXT:    s_lshl_b64 s[4:5], s[16:17], s4
-; GCN-NEXT:    s_or_b64 s[6:7], s[24:25], s[6:7]
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[12:13], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[14:15], 0
+; GCN-NEXT:    s_sub_i32 s22, 64, s12
+; GCN-NEXT:    s_sub_i32 s20, s12, 64
+; GCN-NEXT:    s_lshr_b64 s[22:23], s[4:5], s22
+; GCN-NEXT:    s_lshl_b64 s[24:25], s[6:7], s12
+; GCN-NEXT:    s_lshl_b64 s[20:21], s[4:5], s20
+; GCN-NEXT:    s_or_b64 s[22:23], s[24:25], s[22:23]
 ; GCN-NEXT:    s_and_b64 vcc, s[2:3], s[0:1]
-; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[10:11]
-; GCN-NEXT:    v_mov_b32_e32 v0, s5
-; GCN-NEXT:    v_mov_b32_e32 v1, s7
+; GCN-NEXT:    s_or_b64 s[0:1], s[12:13], s[14:15]
+; GCN-NEXT:    v_mov_b32_e32 v0, s21
+; GCN-NEXT:    v_mov_b32_e32 v1, s23
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[0:1], 0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GCN-NEXT:    v_mov_b32_e32 v1, s19
+; GCN-NEXT:    v_mov_b32_e32 v1, s7
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, v0, v1, s[0:1]
-; GCN-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NEXT:    v_mov_b32_e32 v1, s6
+; GCN-NEXT:    v_mov_b32_e32 v0, s20
+; GCN-NEXT:    v_mov_b32_e32 v1, s22
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GCN-NEXT:    v_mov_b32_e32 v1, s18
+; GCN-NEXT:    v_mov_b32_e32 v1, s6
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v0, v1, s[0:1]
-; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[12:13], 64
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[14:15], 0
-; GCN-NEXT:    s_sub_i32 s6, 64, s12
-; GCN-NEXT:    s_sub_i32 s4, s12, 64
-; GCN-NEXT:    s_lshr_b64 s[6:7], s[20:21], s6
-; GCN-NEXT:    s_lshl_b64 s[10:11], s[22:23], s12
-; GCN-NEXT:    s_lshl_b64 s[4:5], s[20:21], s4
-; GCN-NEXT:    s_or_b64 s[6:7], s[10:11], s[6:7]
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[16:17], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[18:19], 0
+; GCN-NEXT:    s_sub_i32 s13, 64, s16
+; GCN-NEXT:    s_sub_i32 s6, s16, 64
+; GCN-NEXT:    s_lshr_b64 s[14:15], s[8:9], s13
+; GCN-NEXT:    s_lshl_b64 s[20:21], s[10:11], s16
+; GCN-NEXT:    s_lshl_b64 s[6:7], s[8:9], s6
+; GCN-NEXT:    s_or_b64 s[14:15], s[20:21], s[14:15]
 ; GCN-NEXT:    s_and_b64 s[0:1], s[2:3], s[0:1]
-; GCN-NEXT:    s_or_b64 s[2:3], s[12:13], s[14:15]
-; GCN-NEXT:    v_mov_b32_e32 v0, s5
-; GCN-NEXT:    v_mov_b32_e32 v1, s7
+; GCN-NEXT:    s_or_b64 s[2:3], s[16:17], s[18:19]
+; GCN-NEXT:    v_mov_b32_e32 v0, s7
+; GCN-NEXT:    v_mov_b32_e32 v1, s15
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], 0
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[0:1]
-; GCN-NEXT:    v_mov_b32_e32 v1, s23
+; GCN-NEXT:    v_mov_b32_e32 v1, s11
 ; GCN-NEXT:    v_cndmask_b32_e64 v7, v0, v1, s[2:3]
-; GCN-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NEXT:    v_mov_b32_e32 v1, s6
+; GCN-NEXT:    v_mov_b32_e32 v0, s6
+; GCN-NEXT:    v_mov_b32_e32 v1, s14
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[0:1]
-; GCN-NEXT:    v_mov_b32_e32 v1, s22
+; GCN-NEXT:    v_mov_b32_e32 v1, s10
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, v0, v1, s[2:3]
-; GCN-NEXT:    s_lshl_b64 s[2:3], s[16:17], s8
+; GCN-NEXT:    s_lshl_b64 s[2:3], s[4:5], s12
 ; GCN-NEXT:    v_mov_b32_e32 v0, s3
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v0, s2
-; GCN-NEXT:    s_lshl_b64 s[2:3], s[20:21], s12
+; GCN-NEXT:    s_lshl_b64 s[2:3], s[8:9], s16
 ; GCN-NEXT:    v_mov_b32_e32 v4, s3
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, v4, s[0:1]
 ; GCN-NEXT:    v_mov_b32_e32 v4, s2
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[0:1]
-; GCN-NEXT:    v_mov_b32_e32 v9, 0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GCN-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
 ; GCN-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
@@ -521,64 +520,63 @@ define amdgpu_kernel void @s_shl_v2i128ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 define amdgpu_kernel void @s_lshr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-LABEL: s_lshr_v2i128_ss:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x8
-; GCN-NEXT:    s_load_dwordx8 s[16:23], s[4:5], 0x0
+; GCN-NEXT:    s_load_dwordx16 s[4:19], s[4:5], 0x0
 ; GCN-NEXT:    v_mov_b32_e32 v10, 16
 ; GCN-NEXT:    v_mov_b32_e32 v8, 0
 ; GCN-NEXT:    v_mov_b32_e32 v11, 0
+; GCN-NEXT:    v_mov_b32_e32 v9, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[8:9], 64
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[10:11], 0
-; GCN-NEXT:    s_sub_i32 s6, 64, s8
-; GCN-NEXT:    s_sub_i32 s4, s8, 64
-; GCN-NEXT:    s_lshl_b64 s[6:7], s[18:19], s6
-; GCN-NEXT:    s_lshr_b64 s[24:25], s[16:17], s8
-; GCN-NEXT:    s_lshr_b64 s[4:5], s[18:19], s4
-; GCN-NEXT:    s_or_b64 s[6:7], s[24:25], s[6:7]
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[12:13], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[14:15], 0
+; GCN-NEXT:    s_sub_i32 s22, 64, s12
+; GCN-NEXT:    s_sub_i32 s20, s12, 64
+; GCN-NEXT:    s_lshl_b64 s[22:23], s[6:7], s22
+; GCN-NEXT:    s_lshr_b64 s[24:25], s[4:5], s12
+; GCN-NEXT:    s_lshr_b64 s[20:21], s[6:7], s20
+; GCN-NEXT:    s_or_b64 s[22:23], s[24:25], s[22:23]
 ; GCN-NEXT:    s_and_b64 vcc, s[2:3], s[0:1]
-; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[10:11]
-; GCN-NEXT:    v_mov_b32_e32 v0, s5
-; GCN-NEXT:    v_mov_b32_e32 v1, s7
+; GCN-NEXT:    s_or_b64 s[0:1], s[12:13], s[14:15]
+; GCN-NEXT:    v_mov_b32_e32 v0, s21
+; GCN-NEXT:    v_mov_b32_e32 v1, s23
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[0:1], 0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GCN-NEXT:    v_mov_b32_e32 v1, s17
+; GCN-NEXT:    v_mov_b32_e32 v1, s5
 ; GCN-NEXT:    v_cndmask_b32_e64 v1, v0, v1, s[0:1]
-; GCN-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NEXT:    v_mov_b32_e32 v2, s6
+; GCN-NEXT:    v_mov_b32_e32 v0, s20
+; GCN-NEXT:    v_mov_b32_e32 v2, s22
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GCN-NEXT:    v_mov_b32_e32 v2, s16
+; GCN-NEXT:    v_mov_b32_e32 v2, s4
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
-; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[12:13], 64
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[14:15], 0
-; GCN-NEXT:    s_sub_i32 s6, 64, s12
-; GCN-NEXT:    s_sub_i32 s4, s12, 64
-; GCN-NEXT:    s_lshl_b64 s[6:7], s[22:23], s6
-; GCN-NEXT:    s_lshr_b64 s[10:11], s[20:21], s12
-; GCN-NEXT:    s_lshr_b64 s[4:5], s[22:23], s4
-; GCN-NEXT:    s_or_b64 s[6:7], s[10:11], s[6:7]
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[16:17], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[18:19], 0
+; GCN-NEXT:    s_sub_i32 s13, 64, s16
+; GCN-NEXT:    s_sub_i32 s4, s16, 64
+; GCN-NEXT:    s_lshl_b64 s[14:15], s[10:11], s13
+; GCN-NEXT:    s_lshr_b64 s[20:21], s[8:9], s16
+; GCN-NEXT:    s_lshr_b64 s[4:5], s[10:11], s4
+; GCN-NEXT:    s_or_b64 s[14:15], s[20:21], s[14:15]
 ; GCN-NEXT:    s_and_b64 s[0:1], s[2:3], s[0:1]
-; GCN-NEXT:    s_or_b64 s[2:3], s[12:13], s[14:15]
+; GCN-NEXT:    s_or_b64 s[2:3], s[16:17], s[18:19]
 ; GCN-NEXT:    v_mov_b32_e32 v2, s5
-; GCN-NEXT:    v_mov_b32_e32 v3, s7
+; GCN-NEXT:    v_mov_b32_e32 v3, s15
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], 0
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; GCN-NEXT:    v_mov_b32_e32 v3, s21
+; GCN-NEXT:    v_mov_b32_e32 v3, s9
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, v2, v3, s[2:3]
 ; GCN-NEXT:    v_mov_b32_e32 v2, s4
-; GCN-NEXT:    v_mov_b32_e32 v3, s6
+; GCN-NEXT:    v_mov_b32_e32 v3, s14
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; GCN-NEXT:    v_mov_b32_e32 v3, s20
+; GCN-NEXT:    v_mov_b32_e32 v3, s8
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, v2, v3, s[2:3]
-; GCN-NEXT:    s_lshr_b64 s[2:3], s[18:19], s8
+; GCN-NEXT:    s_lshr_b64 s[2:3], s[6:7], s12
 ; GCN-NEXT:    v_mov_b32_e32 v2, s3
 ; GCN-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v2, s2
-; GCN-NEXT:    s_lshr_b64 s[2:3], s[22:23], s12
+; GCN-NEXT:    s_lshr_b64 s[2:3], s[10:11], s16
 ; GCN-NEXT:    v_mov_b32_e32 v6, s3
 ; GCN-NEXT:    v_cndmask_b32_e64 v7, 0, v6, s[0:1]
 ; GCN-NEXT:    v_mov_b32_e32 v6, s2
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[0:1]
-; GCN-NEXT:    v_mov_b32_e32 v9, 0
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; GCN-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
 ; GCN-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
@@ -591,61 +589,61 @@ define amdgpu_kernel void @s_lshr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 define amdgpu_kernel void @s_ashr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-LABEL: s_ashr_v2i128_ss:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x8
-; GCN-NEXT:    s_load_dwordx8 s[16:23], s[4:5], 0x0
+; GCN-NEXT:    s_load_dwordx16 s[4:19], s[4:5], 0x0
 ; GCN-NEXT:    v_mov_b32_e32 v10, 16
 ; GCN-NEXT:    v_mov_b32_e32 v8, 0
 ; GCN-NEXT:    v_mov_b32_e32 v11, 0
+; GCN-NEXT:    v_mov_b32_e32 v9, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[8:9], 64
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[10:11], 0
-; GCN-NEXT:    s_sub_i32 s6, 64, s8
-; GCN-NEXT:    s_sub_i32 s4, s8, 64
-; GCN-NEXT:    s_lshl_b64 s[6:7], s[18:19], s6
-; GCN-NEXT:    s_lshr_b64 s[24:25], s[16:17], s8
-; GCN-NEXT:    s_ashr_i64 s[4:5], s[18:19], s4
-; GCN-NEXT:    s_or_b64 s[6:7], s[24:25], s[6:7]
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[12:13], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[14:15], 0
+; GCN-NEXT:    s_sub_i32 s22, 64, s12
+; GCN-NEXT:    s_sub_i32 s20, s12, 64
+; GCN-NEXT:    s_lshl_b64 s[22:23], s[6:7], s22
+; GCN-NEXT:    s_lshr_b64 s[24:25], s[4:5], s12
+; GCN-NEXT:    s_ashr_i64 s[20:21], s[6:7], s20
+; GCN-NEXT:    s_or_b64 s[22:23], s[24:25], s[22:23]
 ; GCN-NEXT:    s_and_b64 vcc, s[2:3], s[0:1]
-; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[10:11]
-; GCN-NEXT:    v_mov_b32_e32 v0, s5
-; GCN-NEXT:    v_mov_b32_e32 v1, s7
+; GCN-NEXT:    s_or_b64 s[0:1], s[12:13], s[14:15]
+; GCN-NEXT:    v_mov_b32_e32 v0, s21
+; GCN-NEXT:    v_mov_b32_e32 v1, s23
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[0:1], 0
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GCN-NEXT:    v_mov_b32_e32 v1, s17
+; GCN-NEXT:    v_mov_b32_e32 v1, s5
 ; GCN-NEXT:    v_cndmask_b32_e64 v1, v0, v1, s[0:1]
-; GCN-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NEXT:    v_mov_b32_e32 v2, s6
+; GCN-NEXT:    v_mov_b32_e32 v0, s20
+; GCN-NEXT:    v_mov_b32_e32 v2, s22
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GCN-NEXT:    v_mov_b32_e32 v2, s16
+; GCN-NEXT:    v_mov_b32_e32 v2, s4
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
-; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[12:13], 64
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[14:15], 0
-; GCN-NEXT:    s_sub_i32 s6, 64, s12
-; GCN-NEXT:    s_sub_i32 s4, s12, 64
-; GCN-NEXT:    s_lshl_b64 s[6:7], s[22:23], s6
-; GCN-NEXT:    s_lshr_b64 s[10:11], s[20:21], s12
-; GCN-NEXT:    s_ashr_i64 s[4:5], s[22:23], s4
-; GCN-NEXT:    s_or_b64 s[6:7], s[10:11], s[6:7]
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[16:17], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[18:19], 0
+; GCN-NEXT:    s_sub_i32 s13, 64, s16
+; GCN-NEXT:    s_sub_i32 s4, s16, 64
+; GCN-NEXT:    s_lshl_b64 s[14:15], s[10:11], s13
+; GCN-NEXT:    s_lshr_b64 s[20:21], s[8:9], s16
+; GCN-NEXT:    s_ashr_i64 s[4:5], s[10:11], s4
+; GCN-NEXT:    s_or_b64 s[14:15], s[20:21], s[14:15]
 ; GCN-NEXT:    s_and_b64 s[0:1], s[2:3], s[0:1]
-; GCN-NEXT:    s_or_b64 s[2:3], s[12:13], s[14:15]
+; GCN-NEXT:    s_or_b64 s[2:3], s[16:17], s[18:19]
 ; GCN-NEXT:    v_mov_b32_e32 v2, s5
-; GCN-NEXT:    v_mov_b32_e32 v3, s7
+; GCN-NEXT:    v_mov_b32_e32 v3, s15
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], 0
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; GCN-NEXT:    v_mov_b32_e32 v3, s21
+; GCN-NEXT:    v_mov_b32_e32 v3, s9
 ; GCN-NEXT:    v_cndmask_b32_e64 v5, v2, v3, s[2:3]
 ; GCN-NEXT:    v_mov_b32_e32 v2, s4
-; GCN-NEXT:    v_mov_b32_e32 v3, s6
+; GCN-NEXT:    v_mov_b32_e32 v3, s14
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; GCN-NEXT:    v_mov_b32_e32 v3, s20
+; GCN-NEXT:    v_mov_b32_e32 v3, s8
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, v2, v3, s[2:3]
-; GCN-NEXT:    s_ashr_i32 s4, s19, 31
-; GCN-NEXT:    s_ashr_i64 s[2:3], s[18:19], s8
+; GCN-NEXT:    s_ashr_i32 s4, s7, 31
+; GCN-NEXT:    s_ashr_i64 s[2:3], s[6:7], s12
 ; GCN-NEXT:    v_mov_b32_e32 v2, s4
 ; GCN-NEXT:    v_mov_b32_e32 v3, s3
 ; GCN-NEXT:    v_mov_b32_e32 v6, s2
-; GCN-NEXT:    s_ashr_i32 s4, s23, 31
-; GCN-NEXT:    s_ashr_i64 s[2:3], s[22:23], s12
+; GCN-NEXT:    s_ashr_i32 s4, s11, 31
+; GCN-NEXT:    s_ashr_i64 s[2:3], s[10:11], s16
 ; GCN-NEXT:    v_cndmask_b32_e32 v3, v2, v3, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
 ; GCN-NEXT:    v_mov_b32_e32 v6, s4
@@ -653,7 +651,6 @@ define amdgpu_kernel void @s_ashr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-NEXT:    v_mov_b32_e32 v12, s2
 ; GCN-NEXT:    v_cndmask_b32_e64 v7, v6, v7, s[0:1]
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, v6, v12, s[0:1]
-; GCN-NEXT:    v_mov_b32_e32 v9, 0
 ; GCN-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
 ; GCN-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
 ; GCN-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index 43e15f1f58d12..dbe7fb6cc0859 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -31,8 +31,7 @@ define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> add
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
-; VI-NEXT:    s_load_dwordx2 s[6:7], s[2:3], 0x8
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -45,20 +44,19 @@ define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> add
 ;
 ; EG-LABEL: shl_v2i32:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 1 @6
-; EG-NEXT:    ALU 3, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 3, @9, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_64 T1.XY, T0.X, 8, #1
-; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
-; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
 ; EG-NEXT:     MOV * T0.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 11:
-; EG-NEXT:     LSHL * T0.Y, T0.Y, T1.Y,
-; EG-NEXT:     LSHL T0.X, T0.X, T1.X,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     LSHL * T0.Y, T0.Y, T0.W,
+; EG-NEXT:     LSHL T0.X, T0.X, T0.Z,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
@@ -94,22 +92,21 @@ define amdgpu_kernel void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> add
 ;
 ; VI-LABEL: shl_v4i32:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
-; VI-NEXT:    s_load_dwordx4 s[8:11], s[2:3], 0x10
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
+; VI-NEXT:    s_mov_b32 s11, 0xf000
+; VI-NEXT:    s_mov_b32 s10, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_lshl_b32 s7, s7, s11
-; VI-NEXT:    s_lshl_b32 s6, s6, s10
-; VI-NEXT:    s_lshl_b32 s5, s5, s9
-; VI-NEXT:    s_lshl_b32 s4, s4, s8
-; VI-NEXT:    v_mov_b32_e32 v0, s4
-; VI-NEXT:    v_mov_b32_e32 v1, s5
-; VI-NEXT:    v_mov_b32_e32 v2, s6
-; VI-NEXT:    v_mov_b32_e32 v3, s7
-; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; VI-NEXT:    s_lshl_b32 s3, s3, s7
+; VI-NEXT:    s_lshl_b32 s2, s2, s6
+; VI-NEXT:    s_lshl_b32 s1, s1, s5
+; VI-NEXT:    s_lshl_b32 s0, s0, s4
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
 ;
 ; EG-LABEL: shl_v4i32:
@@ -637,31 +634,29 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add
 ; SI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s7, 0xf000
 ; SI-NEXT:    s_mov_b32 s6, 0
-; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; SI-NEXT:    v_mov_b32_e32 v1, 0
+; SI-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
+; SI-NEXT:    v_mov_b32_e32 v5, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[4:5], s[2:3]
-; SI-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
-; SI-NEXT:    buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 offset:8
+; SI-NEXT:    buffer_load_dwordx4 v[0:3], v[4:5], s[4:7], 0 addr64
 ; SI-NEXT:    s_mov_b32 s4, 0xffff
 ; SI-NEXT:    s_mov_b64 s[2:3], s[6:7]
-; SI-NEXT:    s_waitcnt vmcnt(1)
-; SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
-; SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
-; SI-NEXT:    v_lshlrev_b32_e32 v3, v5, v3
-; SI-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
-; SI-NEXT:    v_lshlrev_b32_e32 v4, v9, v7
-; SI-NEXT:    v_lshlrev_b32_e32 v5, v8, v6
-; SI-NEXT:    v_and_b32_e32 v3, s4, v3
-; SI-NEXT:    v_and_b32_e32 v2, s4, v2
-; SI-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; SI-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; SI-NEXT:    v_or_b32_e32 v3, v3, v4
-; SI-NEXT:    v_or_b32_e32 v2, v2, v5
-; SI-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
+; SI-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; SI-NEXT:    v_lshrrev_b32_e32 v9, 16, v3
+; SI-NEXT:    v_lshlrev_b32_e32 v1, v3, v1
+; SI-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
+; SI-NEXT:    v_lshlrev_b32_e32 v2, v9, v7
+; SI-NEXT:    v_lshlrev_b32_e32 v3, v8, v6
+; SI-NEXT:    v_and_b32_e32 v1, s4, v1
+; SI-NEXT:    v_and_b32_e32 v0, s4, v0
+; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; SI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; SI-NEXT:    v_or_b32_e32 v1, v1, v2
+; SI-NEXT:    v_or_b32_e32 v0, v0, v3
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], v[4:5], s[0:3], 0 addr64
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: shl_v4i16:
@@ -672,10 +667,7 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_add_u32_e32 v2, vcc, 8, v0
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
-; VI-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
+; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
 ; VI-NEXT:    v_mov_b32_e32 v5, s1
 ; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v4
 ; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
@@ -691,45 +683,40 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add
 ;
 ; EG-LABEL: shl_v4i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 2, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 0 @8
-; EG-NEXT:    ALU 3, @15, KC0[], KC1[]
-; EG-NEXT:    TEX 0 @10
-; EG-NEXT:    ALU 49, @19, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 2, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 53, @11, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XY, T0.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
-; EG-NEXT:    Fetch clause starting at 8:
-; EG-NEXT:     VTX_READ_64 T10.XY, T0.X, 0, #1
-; EG-NEXT:    Fetch clause starting at 10:
-; EG-NEXT:     VTX_READ_64 T10.XY, T0.X, 8, #1
-; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_128 T10.XYZW, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
 ; EG-NEXT:     LSHL * T0.W, T0.X, literal.x,
 ; EG-NEXT:    3(4.203895e-45), 0(0.000000e+00)
 ; EG-NEXT:     ADD_INT * T0.X, KC0[2].Z, PV.W,
-; EG-NEXT:    ALU clause starting at 15:
+; EG-NEXT:    ALU clause starting at 11:
 ; EG-NEXT:     MOV T4.X, T10.X,
 ; EG-NEXT:     MOV * T5.X, T10.Y,
-; EG-NEXT:     MOV T0.Y, PV.X,
-; EG-NEXT:     MOV * T0.Z, PS,
-; EG-NEXT:    ALU clause starting at 19:
-; EG-NEXT:     MOV T2.X, T10.X,
-; EG-NEXT:     MOV * T3.X, T10.Y,
-; EG-NEXT:     MOV T0.X, T6.X,
-; EG-NEXT:     MOV * T1.Y, PV.X,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     MOV T0.Y, PS,
+; EG-NEXT:     MOV * T2.X, T10.Z,
+; EG-NEXT:     MOV T3.X, T10.W,
+; EG-NEXT:     MOV * T0.Z, T6.X,
+; EG-NEXT:     MOV * T1.Y, T2.X,
 ; EG-NEXT:     AND_INT T1.W, PV.Y, literal.x,
-; EG-NEXT:     AND_INT * T2.W, T0.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T0.X, literal.x,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; EG-NEXT:     LSHL * T1.W, PS, PV.W,
 ; EG-NEXT:     AND_INT T1.W, PV.W, literal.x,
-; EG-NEXT:     AND_INT * T2.W, T0.X, literal.y,
+; EG-NEXT:     AND_INT * T2.W, T0.Z, literal.y,
 ; EG-NEXT:    65535(9.183409e-41), -65536(nan)
 ; EG-NEXT:     OR_INT * T1.W, PS, PV.W,
-; EG-NEXT:     MOV T0.X, T3.X,
-; EG-NEXT:     MOV * T6.X, PV.W,
-; EG-NEXT:     MOV T1.Z, PS,
+; EG-NEXT:     MOV * T0.Z, T3.X,
+; EG-NEXT:     MOV * T6.X, T1.W,
+; EG-NEXT:     MOV T1.Z, PV.X,
 ; EG-NEXT:     LSHR T1.W, T1.Y, literal.x,
-; EG-NEXT:     LSHR * T2.W, T0.Y, literal.x,
+; EG-NEXT:     LSHR * T2.W, T0.X, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
 ; EG-NEXT:     LSHL T1.W, PS, PV.W,
 ; EG-NEXT:     AND_INT * T2.W, PV.Z, literal.x,
@@ -738,23 +725,23 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
 ; EG-NEXT:     OR_INT * T1.W, T2.W, PV.W,
 ; EG-NEXT:     MOV T6.X, PV.W,
-; EG-NEXT:     MOV T0.Y, T7.X,
-; EG-NEXT:     AND_INT T1.W, T0.X, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT:     AND_INT * T2.W, T0.Z, literal.x,
+; EG-NEXT:     MOV * T0.X, T7.X,
+; EG-NEXT:     AND_INT T1.W, T0.Z, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T0.Y, literal.x,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; EG-NEXT:     LSHL T1.W, PS, PV.W,
-; EG-NEXT:     AND_INT * T2.W, PV.Y, literal.x,
+; EG-NEXT:     AND_INT * T2.W, T0.X, literal.x,
 ; EG-NEXT:    -65536(nan), 0(0.000000e+00)
 ; EG-NEXT:     AND_INT * T1.W, PV.W, literal.x,
 ; EG-NEXT:    65535(9.183409e-41), 0(0.000000e+00)
 ; EG-NEXT:     OR_INT * T1.W, T2.W, PV.W,
 ; EG-NEXT:     MOV * T7.X, PV.W,
-; EG-NEXT:     MOV T0.Y, PV.X,
-; EG-NEXT:     LSHR T1.W, T0.X, literal.x,
-; EG-NEXT:     LSHR * T2.W, T0.Z, literal.x,
+; EG-NEXT:     MOV T0.X, PV.X,
+; EG-NEXT:     LSHR T1.W, T0.Z, literal.x,
+; EG-NEXT:     LSHR * T2.W, T0.Y, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
 ; EG-NEXT:     LSHL * T1.W, PS, PV.W,
-; EG-NEXT:     AND_INT T0.Z, T0.Y, literal.x,
+; EG-NEXT:     AND_INT T0.Z, T0.X, literal.x,
 ; EG-NEXT:     LSHL T1.W, PV.W, literal.y,
 ; EG-NEXT:     ADD_INT * T0.W, KC0[2].Y, T0.W,
 ; EG-NEXT:    65535(9.183409e-41), 16(2.242078e-44)
@@ -864,20 +851,19 @@ define amdgpu_kernel void @shl_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> add
 ;
 ; VI-LABEL: shl_v2i64:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
-; VI-NEXT:    s_load_dwordx4 s[8:11], s[2:3], 0x10
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
+; VI-NEXT:    s_mov_b32 s11, 0xf000
+; VI-NEXT:    s_mov_b32 s10, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_lshl_b64 s[6:7], s[6:7], s10
-; VI-NEXT:    s_lshl_b64 s[4:5], s[4:5], s8
-; VI-NEXT:    v_mov_b32_e32 v0, s4
-; VI-NEXT:    v_mov_b32_e32 v1, s5
-; VI-NEXT:    v_mov_b32_e32 v2, s6
-; VI-NEXT:    v_mov_b32_e32 v3, s7
-; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; VI-NEXT:    s_lshl_b64 s[2:3], s[2:3], s6
+; VI-NEXT:    s_lshl_b64 s[0:1], s[0:1], s4
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
 ;
 ; EG-LABEL: shl_v2i64:
@@ -956,8 +942,7 @@ define amdgpu_kernel void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> add
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dwordx8 s[0:7], s[18:19], 0x0
-; VI-NEXT:    s_load_dwordx8 s[8:15], s[18:19], 0x20
+; VI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
 ; VI-NEXT:    s_mov_b32 s19, 0xf000
 ; VI-NEXT:    s_mov_b32 s18, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
index 0134ff6c319be..08b045e4a623f 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
@@ -7,8 +7,7 @@
 define amdgpu_kernel void @s_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %lhs, <2 x i16> %rhs) #0 {
 ; GFX9-LABEL: s_shl_v2i16:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; GFX9-NEXT:    s_load_dword s3, s[0:1], 0x30
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
 ; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
 ; GFX9-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s6, -1
@@ -20,29 +19,27 @@ define amdgpu_kernel void @s_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %
 ;
 ; VI-LABEL: s_shl_v2i16:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
-; VI-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; VI-NEXT:    s_load_dword s0, s[0:1], 0x30
-; VI-NEXT:    s_mov_b32 s1, 0xffff
-; VI-NEXT:    s_mov_b32 s7, 0xf000
-; VI-NEXT:    s_mov_b32 s6, -1
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; VI-NEXT:    s_mov_b32 s6, 0xffff
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_and_b32 s3, s2, s1
-; VI-NEXT:    s_lshr_b32 s2, s2, 16
-; VI-NEXT:    s_lshr_b32 s8, s0, 16
-; VI-NEXT:    s_lshl_b32 s2, s2, s8
-; VI-NEXT:    s_lshl_b32 s0, s3, s0
-; VI-NEXT:    s_lshl_b32 s2, s2, 16
-; VI-NEXT:    s_and_b32 s0, s0, s1
-; VI-NEXT:    s_or_b32 s0, s0, s2
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    s_and_b32 s7, s4, s6
+; VI-NEXT:    s_lshr_b32 s4, s4, 16
+; VI-NEXT:    s_lshr_b32 s8, s5, 16
+; VI-NEXT:    s_lshl_b32 s4, s4, s8
+; VI-NEXT:    s_lshl_b32 s5, s7, s5
+; VI-NEXT:    s_lshl_b32 s4, s4, 16
+; VI-NEXT:    s_and_b32 s5, s5, s6
+; VI-NEXT:    s_or_b32 s4, s5, s4
+; VI-NEXT:    v_mov_b32_e32 v0, s4
+; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: s_shl_v2i16:
 ; CI:       ; %bb.0:
-; CI-NEXT:    s_load_dword s4, s[0:1], 0xb
-; CI-NEXT:    s_load_dword s5, s[0:1], 0xc
+; CI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
 ; CI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, -1
@@ -60,9 +57,8 @@ define amdgpu_kernel void @s_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %
 ;
 ; GFX10-LABEL: s_shl_v2i16:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_clause 0x2
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; GFX10-NEXT:    s_load_dword s3, s[0:1], 0x30
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
 ; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
 ; GFX10-NEXT:    s_mov_b32 s7, 0x31016000
 ; GFX10-NEXT:    s_mov_b32 s6, -1
@@ -79,35 +75,31 @@ define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> a
 ; GFX9-LABEL: v_shl_v2i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v1, v0, s[2:3]
-; GFX9-NEXT:    global_load_dword v2, v0, s[2:3] offset:4
+; GFX9-NEXT:    global_load_dwordx2 v[0:1], v2, s[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v2, v1
-; GFX9-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v1, v0
+; GFX9-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_shl_v2i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
-; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_add_u32_e32 v2, vcc, 4, v0
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v5, v[0:1]
-; VI-NEXT:    flat_load_dword v2, v[2:3]
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v4
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_lshlrev_b16_e32 v3, v2, v5
-; VI-NEXT:    v_lshlrev_b16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_or_b32_e32 v2, v3, v2
-; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    v_lshlrev_b16_e32 v4, v1, v0
+; VI-NEXT:    v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_e32 v0, v4, v0
+; VI-NEXT:    flat_store_dword v[2:3], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: v_shl_v2i16:
@@ -119,17 +111,15 @@ define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> a
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_mov_b64 s[4:5], s[2:3]
-; CI-NEXT:    buffer_load_dword v2, v[0:1], s[4:7], 0 addr64
-; CI-NEXT:    buffer_load_dword v3, v[0:1], s[4:7], 0 addr64 offset:4
+; CI-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
 ; CI-NEXT:    s_mov_b64 s[2:3], s[6:7]
-; CI-NEXT:    s_waitcnt vmcnt(1)
-; CI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
 ; CI-NEXT:    s_waitcnt vmcnt(0)
+; CI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
 ; CI-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
 ; CI-NEXT:    v_lshlrev_b32_e32 v2, v3, v2
 ; CI-NEXT:    v_lshlrev_b32_e32 v3, v5, v4
-; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; CI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
+; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; CI-NEXT:    v_or_b32_e32 v2, v2, v3
 ; CI-NEXT:    buffer_store_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    s_endpgm
@@ -137,14 +127,12 @@ define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> a
 ; GFX10-LABEL: v_shl_v2i16:
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX10-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GFX10-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    global_load_dword v1, v0, s[2:3]
-; GFX10-NEXT:    global_load_dword v2, v0, s[2:3] offset:4
+; GFX10-NEXT:    global_load_dwordx2 v[0:1], v2, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_pk_lshlrev_b16 v1, v2, v1
-; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v1, v0
+; GFX10-NEXT:    global_store_dword v2, v0, s[0:1]
 ; GFX10-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %tid.ext = sext i32 %tid to i64
@@ -464,8 +452,7 @@ define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> a
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    global_load_dwordx2 v[0:1], v4, s[2:3]
-; GFX9-NEXT:    global_load_dwordx2 v[2:3], v4, s[2:3] offset:8
+; GFX9-NEXT:    global_load_dwordx4 v[0:3], v4, s[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v3, v1
 ; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
@@ -480,10 +467,7 @@ define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> a
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_add_u32_e32 v2, vcc, 8, v0
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
-; VI-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
+; VI-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
 ; VI-NEXT:    v_mov_b32_e32 v5, s1
 ; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v4
 ; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
@@ -502,31 +486,29 @@ define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> a
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
 ; CI-NEXT:    s_mov_b32 s7, 0xf000
 ; CI-NEXT:    s_mov_b32 s6, 0
-; CI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; CI-NEXT:    v_mov_b32_e32 v1, 0
+; CI-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
+; CI-NEXT:    v_mov_b32_e32 v5, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_mov_b64 s[4:5], s[2:3]
-; CI-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
-; CI-NEXT:    buffer_load_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 offset:8
+; CI-NEXT:    buffer_load_dwordx4 v[0:3], v[4:5], s[4:7], 0 addr64
 ; CI-NEXT:    s_mov_b32 s4, 0xffff
 ; CI-NEXT:    s_mov_b64 s[2:3], s[6:7]
-; CI-NEXT:    s_waitcnt vmcnt(1)
-; CI-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
-; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
 ; CI-NEXT:    s_waitcnt vmcnt(0)
-; CI-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
-; CI-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
-; CI-NEXT:    v_lshlrev_b32_e32 v3, v5, v3
-; CI-NEXT:    v_lshlrev_b32_e32 v2, v4, v2
-; CI-NEXT:    v_lshlrev_b32_e32 v4, v9, v7
-; CI-NEXT:    v_lshlrev_b32_e32 v5, v8, v6
-; CI-NEXT:    v_and_b32_e32 v3, s4, v3
-; CI-NEXT:    v_and_b32_e32 v2, s4, v2
-; CI-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; CI-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; CI-NEXT:    v_or_b32_e32 v3, v3, v4
-; CI-NEXT:    v_or_b32_e32 v2, v2, v5
-; CI-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
+; CI-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v1
+; CI-NEXT:    v_lshrrev_b32_e32 v8, 16, v2
+; CI-NEXT:    v_lshrrev_b32_e32 v9, 16, v3
+; CI-NEXT:    v_lshlrev_b32_e32 v1, v3, v1
+; CI-NEXT:    v_lshlrev_b32_e32 v0, v2, v0
+; CI-NEXT:    v_lshlrev_b32_e32 v2, v9, v7
+; CI-NEXT:    v_lshlrev_b32_e32 v3, v8, v6
+; CI-NEXT:    v_and_b32_e32 v1, s4, v1
+; CI-NEXT:    v_and_b32_e32 v0, s4, v0
+; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; CI-NEXT:    v_or_b32_e32 v1, v1, v2
+; CI-NEXT:    v_or_b32_e32 v0, v0, v3
+; CI-NEXT:    buffer_store_dwordx2 v[0:1], v[4:5], s[0:3], 0 addr64
 ; CI-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: v_shl_v4i16:
@@ -534,9 +516,7 @@ define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> a
 ; GFX10-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX10-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    s_clause 0x1
-; GFX10-NEXT:    global_load_dwordx2 v[0:1], v4, s[2:3]
-; GFX10-NEXT:    global_load_dwordx2 v[2:3], v4, s[2:3] offset:8
+; GFX10-NEXT:    global_load_dwordx4 v[0:3], v4, s[2:3]
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    v_pk_lshlrev_b16 v1, v3, v1
 ; GFX10-NEXT:    v_pk_lshlrev_b16 v0, v2, v0
diff --git a/llvm/test/CodeGen/AMDGPU/sra.ll b/llvm/test/CodeGen/AMDGPU/sra.ll
index 491b1c742b8b3..090ac93961235 100644
--- a/llvm/test/CodeGen/AMDGPU/sra.ll
+++ b/llvm/test/CodeGen/AMDGPU/sra.ll
@@ -46,20 +46,19 @@ define amdgpu_kernel void @ashr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
 ;
 ; EG-LABEL: ashr_v2i32:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 1 @6
-; EG-NEXT:    ALU 3, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 3, @9, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_64 T1.XY, T0.X, 8, #1
-; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
-; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
 ; EG-NEXT:     MOV * T0.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 11:
-; EG-NEXT:     ASHR * T0.Y, T0.Y, T1.Y,
-; EG-NEXT:     ASHR T0.X, T0.X, T1.X,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     ASHR * T0.Y, T0.Y, T0.W,
+; EG-NEXT:     ASHR T0.X, T0.X, T0.Z,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
@@ -193,25 +192,23 @@ define amdgpu_kernel void @ashr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> ad
 ;
 ; EG-LABEL: ashr_v2i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 1, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 1 @6
-; EG-NEXT:    ALU 14, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 14, @9, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T6.X, T7.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_32 T7.X, T7.X, 0, #1
-; EG-NEXT:     VTX_READ_32 T6.X, T6.X, 4, #1
-; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     VTX_READ_64 T6.XY, T6.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
 ; EG-NEXT:     MOV * T6.X, KC0[2].Z,
-; EG-NEXT:     MOV * T7.X, PV.X,
-; EG-NEXT:    ALU clause starting at 12:
-; EG-NEXT:     LSHR * T0.W, T7.X, literal.x,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     LSHR * T0.W, T6.X, literal.x,
 ; EG-NEXT:    16(2.242078e-44), 0(0.000000e+00)
 ; EG-NEXT:     BFE_INT T0.Y, PV.W, 0.0, literal.x,
-; EG-NEXT:     LSHR T0.Z, T6.X, literal.x,
-; EG-NEXT:     BFE_INT T0.W, T7.X, 0.0, literal.x, BS:VEC_120/SCL_212
-; EG-NEXT:     AND_INT * T1.W, T6.X, literal.y,
+; EG-NEXT:     LSHR T0.Z, T6.Y, literal.x,
+; EG-NEXT:     BFE_INT T0.W, T6.X, 0.0, literal.x,
+; EG-NEXT:     AND_INT * T1.W, T6.Y, literal.y,
 ; EG-NEXT:    16(2.242078e-44), 65535(9.183409e-41)
 ; EG-NEXT:     ASHR T0.W, PV.W, PS,
 ; EG-NEXT:     ASHR * T1.W, PV.Y, PV.Z,
@@ -253,10 +250,10 @@ define amdgpu_kernel void @ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> ad
 ; SI-NEXT:    v_ashrrev_i32_e32 v1, 16, v1
 ; SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
 ; SI-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
-; SI-NEXT:    v_ashrrev_i32_e32 v1, v7, v1
-; SI-NEXT:    v_ashrrev_i32_e32 v3, v3, v5
-; SI-NEXT:    v_ashrrev_i32_e32 v0, v6, v0
-; SI-NEXT:    v_ashrrev_i32_e32 v2, v2, v4
+; SI-NEXT:    v_ashr_i32_e32 v1, v1, v7
+; SI-NEXT:    v_ashr_i32_e32 v3, v5, v3
+; SI-NEXT:    v_ashr_i32_e32 v0, v0, v6
+; SI-NEXT:    v_ashr_i32_e32 v2, v4, v2
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; SI-NEXT:    v_and_b32_e32 v3, s2, v3
 ; SI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
@@ -291,28 +288,23 @@ define amdgpu_kernel void @ashr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> ad
 ;
 ; EG-LABEL: ashr_v4i16:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @12, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 0 @8
-; EG-NEXT:    ALU 3, @13, KC0[], KC1[]
-; EG-NEXT:    TEX 0 @10
-; EG-NEXT:    ALU 54, @17, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 58, @9, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T10.XY, T9.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
-; EG-NEXT:    Fetch clause starting at 8:
-; EG-NEXT:     VTX_READ_64 T10.XY, T9.X, 0, #1
-; EG-NEXT:    Fetch clause starting at 10:
-; EG-NEXT:     VTX_READ_64 T9.XY, T9.X, 8, #1
-; EG-NEXT:    ALU clause starting at 12:
+; EG-NEXT:    Fetch clause starting at 6:
+; EG-NEXT:     VTX_READ_128 T9.XYZW, T9.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
 ; EG-NEXT:     MOV * T9.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 13:
-; EG-NEXT:     MOV T4.X, T10.X,
-; EG-NEXT:     MOV * T5.X, T10.Y,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     MOV T4.X, T9.X,
+; EG-NEXT:     MOV * T5.X, T9.Y,
 ; EG-NEXT:     MOV T0.Y, PV.X,
 ; EG-NEXT:     MOV * T0.Z, PS,
-; EG-NEXT:    ALU clause starting at 17:
-; EG-NEXT:     MOV T2.X, T9.X,
-; EG-NEXT:     MOV * T3.X, T9.Y,
+; EG-NEXT:     MOV T2.X, T9.Z,
+; EG-NEXT:     MOV * T3.X, T9.W,
 ; EG-NEXT:     MOV * T0.W, T6.X,
 ; EG-NEXT:     MOV T1.Y, T2.X,
 ; EG-NEXT:     BFE_INT * T1.W, T0.Y, 0.0, literal.x,
diff --git a/llvm/test/CodeGen/AMDGPU/srl.ll b/llvm/test/CodeGen/AMDGPU/srl.ll
index 88cd9b00c9859..f2d392144344c 100644
--- a/llvm/test/CodeGen/AMDGPU/srl.ll
+++ b/llvm/test/CodeGen/AMDGPU/srl.ll
@@ -85,8 +85,7 @@ define amdgpu_kernel void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dwordx2 s[4:5], s[2:3], 0x0
-; VI-NEXT:    s_load_dwordx2 s[6:7], s[2:3], 0x8
+; VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
@@ -99,20 +98,19 @@ define amdgpu_kernel void @lshr_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
 ;
 ; EG-LABEL: lshr_v2i32:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 1 @6
-; EG-NEXT:    ALU 3, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 3, @9, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_64 T1.XY, T0.X, 8, #1
-; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
-; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
 ; EG-NEXT:     MOV * T0.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 11:
-; EG-NEXT:     LSHR * T0.Y, T0.Y, T1.Y,
-; EG-NEXT:     LSHR T0.X, T0.X, T1.X,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     LSHR * T0.Y, T0.Y, T0.W,
+; EG-NEXT:     LSHR T0.X, T0.X, T0.Z,
 ; EG-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
@@ -148,22 +146,21 @@ define amdgpu_kernel void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad
 ;
 ; VI-LABEL: lshr_v4i32:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[2:3], 0x0
-; VI-NEXT:    s_load_dwordx4 s[8:11], s[2:3], 0x10
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_load_dwordx8 s[0:7], s[10:11], 0x0
+; VI-NEXT:    s_mov_b32 s11, 0xf000
+; VI-NEXT:    s_mov_b32 s10, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_lshr_b32 s7, s7, s11
-; VI-NEXT:    s_lshr_b32 s6, s6, s10
-; VI-NEXT:    s_lshr_b32 s5, s5, s9
-; VI-NEXT:    s_lshr_b32 s4, s4, s8
-; VI-NEXT:    v_mov_b32_e32 v0, s4
-; VI-NEXT:    v_mov_b32_e32 v1, s5
-; VI-NEXT:    v_mov_b32_e32 v2, s6
-; VI-NEXT:    v_mov_b32_e32 v3, s7
-; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; VI-NEXT:    s_lshr_b32 s3, s3, s7
+; VI-NEXT:    s_lshr_b32 s2, s2, s6
+; VI-NEXT:    s_lshr_b32 s1, s1, s5
+; VI-NEXT:    s_lshr_b32 s0, s0, s4
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
 ;
 ; EG-LABEL: lshr_v4i32:
@@ -289,8 +286,7 @@ define amdgpu_kernel void @lshr_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> ad
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[16:19], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_load_dwordx8 s[0:7], s[18:19], 0x0
-; VI-NEXT:    s_load_dwordx8 s[8:15], s[18:19], 0x20
+; VI-NEXT:    s_load_dwordx16 s[0:15], s[18:19], 0x0
 ; VI-NEXT:    s_mov_b32 s19, 0xf000
 ; VI-NEXT:    s_mov_b32 s18, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
index a229ce1146aba..4c5e61ca626ce 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -165,21 +165,19 @@ define amdgpu_kernel void @s_test_sub_self_v2i16(<2 x i16> addrspace(1)* %out, <
 define amdgpu_kernel void @s_test_sub_v2i16_kernarg(<2 x i16> addrspace(1)* %out, <2 x i16> %a, <2 x i16> %b) #1 {
 ; GFX9-LABEL: s_test_sub_v2i16_kernarg:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x30
-; GFX9-NEXT:    s_load_dword s3, s[0:1], 0x2c
+; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
 ; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
 ; GFX9-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s6, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9-NEXT:    v_pk_sub_i16 v0, s3, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-NEXT:    v_pk_sub_i16 v0, s2, v0
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: s_test_sub_v2i16_kernarg:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dword s4, s[0:1], 0x2c
-; VI-NEXT:    s_load_dword s5, s[0:1], 0x30
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x2c
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
@@ -197,9 +195,8 @@ define amdgpu_kernel void @s_test_sub_v2i16_kernarg(<2 x i16> addrspace(1)* %out
 ;
 ; GFX10-LABEL: s_test_sub_v2i16_kernarg:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    s_clause 0x2
-; GFX10-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; GFX10-NEXT:    s_load_dword s3, s[0:1], 0x30
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
 ; GFX10-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
 ; GFX10-NEXT:    s_mov_b32 s7, 0x31016000
 ; GFX10-NEXT:    s_mov_b32 s6, -1
diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll
index d678b64f54b69..574c7671de936 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv.ll
@@ -547,50 +547,49 @@ define amdgpu_kernel void @udiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> ad
 ;
 ; EG-LABEL: udiv_v2i32:
 ; EG:       ; %bb.0:
-; EG-NEXT:    ALU 0, @10, KC0[CB0:0-32], KC1[]
-; EG-NEXT:    TEX 1 @6
-; EG-NEXT:    ALU 33, @11, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    ALU 0, @8, KC0[CB0:0-32], KC1[]
+; EG-NEXT:    TEX 0 @6
+; EG-NEXT:    ALU 33, @9, KC0[CB0:0-32], KC1[]
 ; EG-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.XY, T0.X, 1
 ; EG-NEXT:    CF_END
 ; EG-NEXT:    PAD
 ; EG-NEXT:    Fetch clause starting at 6:
-; EG-NEXT:     VTX_READ_64 T1.XY, T0.X, 8, #1
-; EG-NEXT:     VTX_READ_64 T0.XY, T0.X, 0, #1
-; EG-NEXT:    ALU clause starting at 10:
+; EG-NEXT:     VTX_READ_128 T0.XYZW, T0.X, 0, #1
+; EG-NEXT:    ALU clause starting at 8:
 ; EG-NEXT:     MOV * T0.X, KC0[2].Z,
-; EG-NEXT:    ALU clause starting at 11:
-; EG-NEXT:     SUB_INT T0.W, 0.0, T1.Y,
-; EG-NEXT:     RECIP_UINT * T0.Z, T1.Y,
-; EG-NEXT:     MULLO_INT * T0.W, PV.W, PS,
-; EG-NEXT:     SUB_INT T1.W, 0.0, T1.X,
-; EG-NEXT:     RECIP_UINT * T1.Z, T1.X,
+; EG-NEXT:    ALU clause starting at 9:
+; EG-NEXT:     SUB_INT T1.W, 0.0, T0.W,
+; EG-NEXT:     RECIP_UINT * T1.X, T0.W,
+; EG-NEXT:     MULLO_INT * T1.Y, PV.W, PS,
+; EG-NEXT:     SUB_INT T1.W, 0.0, T0.Z,
+; EG-NEXT:     RECIP_UINT * T1.Z, T0.Z,
 ; EG-NEXT:     MULLO_INT * T1.W, PV.W, PS,
 ; EG-NEXT:     MULHI * T1.W, T1.Z, PS,
 ; EG-NEXT:     ADD_INT T1.W, T1.Z, PS,
-; EG-NEXT:     MULHI * T0.W, T0.Z, T0.W,
-; EG-NEXT:     ADD_INT T0.W, T0.Z, PS,
-; EG-NEXT:     MULHI * T0.Z, T0.X, PV.W,
-; EG-NEXT:     MULHI * T0.W, T0.Y, PV.W,
-; EG-NEXT:     MULLO_INT * T1.Z, PS, T1.Y,
+; EG-NEXT:     MULHI * T1.Y, T1.X, T1.Y,
+; EG-NEXT:     ADD_INT T2.W, T1.X, PS,
+; EG-NEXT:     MULHI * T1.X, T0.X, PV.W,
+; EG-NEXT:     MULHI * T1.Y, T0.Y, PV.W,
+; EG-NEXT:     MULLO_INT * T1.Z, PS, T0.W,
 ; EG-NEXT:     SUB_INT T1.W, T0.Y, PS,
-; EG-NEXT:     MULLO_INT * T0.Y, T0.Z, T1.X,
+; EG-NEXT:     MULLO_INT * T0.Y, T1.X, T0.Z,
 ; EG-NEXT:     SUB_INT T0.Y, T0.X, PS,
-; EG-NEXT:     ADD_INT T1.Z, T0.W, 1,
-; EG-NEXT:     SETGE_UINT T2.W, PV.W, T1.Y,
-; EG-NEXT:     SUB_INT * T3.W, PV.W, T1.Y,
+; EG-NEXT:     ADD_INT T1.Z, T1.Y, 1,
+; EG-NEXT:     SETGE_UINT T2.W, PV.W, T0.W,
+; EG-NEXT:     SUB_INT * T3.W, PV.W, T0.W,
 ; EG-NEXT:     CNDE_INT T0.X, PV.W, T1.W, PS,
-; EG-NEXT:     CNDE_INT T2.Y, PV.W, T0.W, PV.Z, BS:VEC_021/SCL_122
-; EG-NEXT:     ADD_INT T1.Z, T0.Z, 1,
-; EG-NEXT:     SETGE_UINT T0.W, PV.Y, T1.X,
-; EG-NEXT:     SUB_INT * T1.W, PV.Y, T1.X,
-; EG-NEXT:     CNDE_INT T0.Y, PV.W, T0.Y, PS, BS:VEC_021/SCL_122
-; EG-NEXT:     CNDE_INT T0.Z, PV.W, T0.Z, PV.Z,
-; EG-NEXT:     ADD_INT T0.W, PV.Y, 1,
-; EG-NEXT:     SETGE_UINT * T1.W, PV.X, T1.Y,
-; EG-NEXT:     CNDE_INT T1.Y, PS, T2.Y, PV.W,
+; EG-NEXT:     CNDE_INT T1.Y, PV.W, T1.Y, PV.Z,
+; EG-NEXT:     ADD_INT T1.Z, T1.X, 1,
+; EG-NEXT:     SETGE_UINT T1.W, PV.Y, T0.Z,
+; EG-NEXT:     SUB_INT * T2.W, PV.Y, T0.Z,
+; EG-NEXT:     CNDE_INT T0.Y, PV.W, T0.Y, PS,
+; EG-NEXT:     CNDE_INT T1.Z, PV.W, T1.X, PV.Z,
+; EG-NEXT:     ADD_INT T1.W, PV.Y, 1,
+; EG-NEXT:     SETGE_UINT * T0.W, PV.X, T0.W,
+; EG-NEXT:     CNDE_INT T1.Y, PS, T1.Y, PV.W,
 ; EG-NEXT:     ADD_INT T0.W, PV.Z, 1,
-; EG-NEXT:     SETGE_UINT * T1.W, PV.Y, T1.X,
-; EG-NEXT:     CNDE_INT T1.X, PS, T0.Z, PV.W,
+; EG-NEXT:     SETGE_UINT * T1.W, PV.Y, T0.Z,
+; EG-NEXT:     CNDE_INT T1.X, PS, T1.Z, PV.W,
 ; EG-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
 ; EG-NEXT:    2(2.802597e-45), 0(0.000000e+00)
   %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll
index 00bae7afa10fa..d7d5546e93fa9 100644
--- a/llvm/test/CodeGen/AMDGPU/urem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -491,86 +491,84 @@ define amdgpu_kernel void @s_test_urem31_i64(i64 addrspace(1)* %out, i64 %x, i64
 define amdgpu_kernel void @s_test_urem31_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) {
 ; GCN-LABEL: s_test_urem31_v2i64:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x11
-; GCN-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
-; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
-; GCN-NEXT:    s_mov_b32 s11, 0xf000
-; GCN-NEXT:    s_mov_b32 s10, -1
+; GCN-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    s_mov_b32 s2, -1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_lshr_b32 s0, s5, 1
-; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s0
-; GCN-NEXT:    s_lshr_b32 s1, s1, 1
-; GCN-NEXT:    v_cvt_f32_u32_e32 v1, s1
-; GCN-NEXT:    s_lshr_b32 s2, s3, 1
-; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v0
-; GCN-NEXT:    s_lshr_b32 s3, s7, 1
-; GCN-NEXT:    v_cvt_f32_u32_e32 v4, s3
-; GCN-NEXT:    v_cvt_f32_u32_e32 v3, s2
-; GCN-NEXT:    v_mul_f32_e32 v2, v1, v2
+; GCN-NEXT:    s_lshr_b32 s4, s5, 1
+; GCN-NEXT:    s_lshr_b32 s5, s9, 1
+; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s5
+; GCN-NEXT:    v_cvt_f32_u32_e32 v2, s4
+; GCN-NEXT:    s_lshr_b32 s6, s11, 1
+; GCN-NEXT:    v_cvt_f32_u32_e32 v3, s6
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v4, v0
+; GCN-NEXT:    s_lshr_b32 s7, s7, 1
+; GCN-NEXT:    v_cvt_f32_u32_e32 v5, s7
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v6, v3
+; GCN-NEXT:    v_mul_f32_e32 v4, v2, v4
+; GCN-NEXT:    v_trunc_f32_e32 v4, v4
+; GCN-NEXT:    v_cvt_u32_f32_e32 v7, v4
+; GCN-NEXT:    v_mad_f32 v2, -v4, v0, v2
+; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v0
+; GCN-NEXT:    v_mul_f32_e32 v2, v5, v6
+; GCN-NEXT:    v_addc_u32_e32 v0, vcc, 0, v7, vcc
+; GCN-NEXT:    v_mul_lo_u32 v0, v0, s5
 ; GCN-NEXT:    v_trunc_f32_e32 v2, v2
-; GCN-NEXT:    v_cvt_u32_f32_e32 v5, v2
-; GCN-NEXT:    v_mad_f32 v1, -v2, v0, v1
-; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v4
-; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, v0
-; GCN-NEXT:    v_addc_u32_e32 v0, vcc, 0, v5, vcc
-; GCN-NEXT:    v_mul_lo_u32 v0, v0, s0
-; GCN-NEXT:    v_mul_f32_e32 v2, v3, v2
-; GCN-NEXT:    v_trunc_f32_e32 v2, v2
-; GCN-NEXT:    v_cvt_u32_f32_e32 v5, v2
-; GCN-NEXT:    v_sub_i32_e32 v0, vcc, s1, v0
-; GCN-NEXT:    v_mad_f32 v2, -v2, v4, v3
-; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v4
-; GCN-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
-; GCN-NEXT:    v_mul_lo_u32 v2, v2, s3
-; GCN-NEXT:    v_mov_b32_e32 v1, 0
-; GCN-NEXT:    s_brev_b32 s0, -2
-; GCN-NEXT:    v_and_b32_e32 v0, s0, v0
-; GCN-NEXT:    v_sub_i32_e32 v2, vcc, s2, v2
-; GCN-NEXT:    v_and_b32_e32 v2, s0, v2
+; GCN-NEXT:    v_cvt_u32_f32_e32 v4, v2
+; GCN-NEXT:    v_mad_f32 v2, -v2, v3, v5
+; GCN-NEXT:    v_sub_i32_e32 v0, vcc, s4, v0
+; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v3
+; GCN-NEXT:    v_addc_u32_e32 v2, vcc, 0, v4, vcc
+; GCN-NEXT:    v_mul_lo_u32 v2, v2, s6
+; GCN-NEXT:    s_brev_b32 s4, -2
+; GCN-NEXT:    v_and_b32_e32 v0, s4, v0
 ; GCN-NEXT:    v_mov_b32_e32 v3, v1
-; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
+; GCN-NEXT:    v_sub_i32_e32 v2, vcc, s7, v2
+; GCN-NEXT:    v_and_b32_e32 v2, s4, v2
+; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
 ;
 ; GCN-IR-LABEL: s_test_urem31_v2i64:
 ; GCN-IR:       ; %bb.0:
-; GCN-IR-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x11
-; GCN-IR-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
-; GCN-IR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
-; GCN-IR-NEXT:    s_mov_b32 s11, 0xf000
-; GCN-IR-NEXT:    s_mov_b32 s10, -1
+; GCN-IR-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
+; GCN-IR-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-IR-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-IR-NEXT:    s_mov_b32 s2, -1
 ; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-IR-NEXT:    s_lshr_b32 s0, s5, 1
-; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v0, s0
-; GCN-IR-NEXT:    s_lshr_b32 s1, s1, 1
-; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v1, s1
-; GCN-IR-NEXT:    s_lshr_b32 s2, s3, 1
-; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v0
-; GCN-IR-NEXT:    s_lshr_b32 s3, s7, 1
-; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v4, s3
-; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v3, s2
-; GCN-IR-NEXT:    v_mul_f32_e32 v2, v1, v2
+; GCN-IR-NEXT:    s_lshr_b32 s4, s5, 1
+; GCN-IR-NEXT:    s_lshr_b32 s5, s9, 1
+; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v0, s5
+; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v2, s4
+; GCN-IR-NEXT:    s_lshr_b32 s6, s11, 1
+; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v3, s6
+; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v4, v0
+; GCN-IR-NEXT:    s_lshr_b32 s7, s7, 1
+; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v5, s7
+; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v6, v3
+; GCN-IR-NEXT:    v_mul_f32_e32 v4, v2, v4
+; GCN-IR-NEXT:    v_trunc_f32_e32 v4, v4
+; GCN-IR-NEXT:    v_cvt_u32_f32_e32 v7, v4
+; GCN-IR-NEXT:    v_mad_f32 v2, -v4, v0, v2
+; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v0
+; GCN-IR-NEXT:    v_mul_f32_e32 v2, v5, v6
+; GCN-IR-NEXT:    v_addc_u32_e32 v0, vcc, 0, v7, vcc
+; GCN-IR-NEXT:    v_mul_lo_u32 v0, v0, s5
 ; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
-; GCN-IR-NEXT:    v_cvt_u32_f32_e32 v5, v2
-; GCN-IR-NEXT:    v_mad_f32 v1, -v2, v0, v1
-; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v4
-; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, v0
-; GCN-IR-NEXT:    v_addc_u32_e32 v0, vcc, 0, v5, vcc
-; GCN-IR-NEXT:    v_mul_lo_u32 v0, v0, s0
-; GCN-IR-NEXT:    v_mul_f32_e32 v2, v3, v2
-; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
-; GCN-IR-NEXT:    v_cvt_u32_f32_e32 v5, v2
-; GCN-IR-NEXT:    v_sub_i32_e32 v0, vcc, s1, v0
-; GCN-IR-NEXT:    v_mad_f32 v2, -v2, v4, v3
-; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v4
-; GCN-IR-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
-; GCN-IR-NEXT:    v_mul_lo_u32 v2, v2, s3
-; GCN-IR-NEXT:    v_mov_b32_e32 v1, 0
-; GCN-IR-NEXT:    s_brev_b32 s0, -2
-; GCN-IR-NEXT:    v_and_b32_e32 v0, s0, v0
-; GCN-IR-NEXT:    v_sub_i32_e32 v2, vcc, s2, v2
-; GCN-IR-NEXT:    v_and_b32_e32 v2, s0, v2
+; GCN-IR-NEXT:    v_cvt_u32_f32_e32 v4, v2
+; GCN-IR-NEXT:    v_mad_f32 v2, -v2, v3, v5
+; GCN-IR-NEXT:    v_sub_i32_e32 v0, vcc, s4, v0
+; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v3
+; GCN-IR-NEXT:    v_addc_u32_e32 v2, vcc, 0, v4, vcc
+; GCN-IR-NEXT:    v_mul_lo_u32 v2, v2, s6
+; GCN-IR-NEXT:    s_brev_b32 s4, -2
+; GCN-IR-NEXT:    v_and_b32_e32 v0, s4, v0
 ; GCN-IR-NEXT:    v_mov_b32_e32 v3, v1
-; GCN-IR-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
+; GCN-IR-NEXT:    v_sub_i32_e32 v2, vcc, s7, v2
+; GCN-IR-NEXT:    v_and_b32_e32 v2, s4, v2
+; GCN-IR-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GCN-IR-NEXT:    s_endpgm
   %1 = lshr <2 x i64> %x, <i64 33, i64 33>
   %2 = lshr <2 x i64> %y, <i64 33, i64 33>
@@ -639,86 +637,84 @@ define amdgpu_kernel void @s_test_urem24_i64(i64 addrspace(1)* %out, i64 %x, i64
 define amdgpu_kernel void @s_test_urem23_64_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %x, <2 x i64> %y) {
 ; GCN-LABEL: s_test_urem23_64_v2i64:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x11
-; GCN-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
-; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
-; GCN-NEXT:    s_mov_b32 s11, 0xf000
-; GCN-NEXT:    s_mov_b32 s10, -1
+; GCN-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
+; GCN-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-NEXT:    s_mov_b32 s2, -1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_lshr_b32 s0, s5, 1
-; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s0
-; GCN-NEXT:    s_lshr_b32 s1, s1, 1
-; GCN-NEXT:    v_cvt_f32_u32_e32 v1, s1
-; GCN-NEXT:    s_lshr_b32 s2, s3, 9
-; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v0
-; GCN-NEXT:    s_lshr_b32 s3, s7, 9
-; GCN-NEXT:    v_cvt_f32_u32_e32 v4, s3
-; GCN-NEXT:    v_cvt_f32_u32_e32 v3, s2
-; GCN-NEXT:    v_mul_f32_e32 v2, v1, v2
+; GCN-NEXT:    s_lshr_b32 s4, s5, 1
+; GCN-NEXT:    s_lshr_b32 s5, s9, 1
+; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s5
+; GCN-NEXT:    v_cvt_f32_u32_e32 v2, s4
+; GCN-NEXT:    s_lshr_b32 s6, s11, 9
+; GCN-NEXT:    v_cvt_f32_u32_e32 v3, s6
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v4, v0
+; GCN-NEXT:    s_lshr_b32 s7, s7, 9
+; GCN-NEXT:    v_cvt_f32_u32_e32 v5, s7
+; GCN-NEXT:    v_rcp_iflag_f32_e32 v6, v3
+; GCN-NEXT:    v_mul_f32_e32 v4, v2, v4
+; GCN-NEXT:    v_trunc_f32_e32 v4, v4
+; GCN-NEXT:    v_cvt_u32_f32_e32 v7, v4
+; GCN-NEXT:    v_mad_f32 v2, -v4, v0, v2
+; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v0
+; GCN-NEXT:    v_mul_f32_e32 v2, v5, v6
+; GCN-NEXT:    v_addc_u32_e32 v0, vcc, 0, v7, vcc
+; GCN-NEXT:    v_mul_lo_u32 v0, v0, s5
 ; GCN-NEXT:    v_trunc_f32_e32 v2, v2
-; GCN-NEXT:    v_cvt_u32_f32_e32 v5, v2
-; GCN-NEXT:    v_mad_f32 v1, -v2, v0, v1
-; GCN-NEXT:    v_rcp_iflag_f32_e32 v2, v4
-; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, v0
-; GCN-NEXT:    v_addc_u32_e32 v0, vcc, 0, v5, vcc
-; GCN-NEXT:    v_mul_lo_u32 v0, v0, s0
-; GCN-NEXT:    v_mul_f32_e32 v2, v3, v2
-; GCN-NEXT:    v_trunc_f32_e32 v2, v2
-; GCN-NEXT:    v_cvt_u32_f32_e32 v5, v2
-; GCN-NEXT:    v_sub_i32_e32 v0, vcc, s1, v0
-; GCN-NEXT:    v_mad_f32 v2, -v2, v4, v3
-; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v4
-; GCN-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
-; GCN-NEXT:    v_mul_lo_u32 v2, v2, s3
-; GCN-NEXT:    v_mov_b32_e32 v1, 0
-; GCN-NEXT:    s_brev_b32 s0, -2
-; GCN-NEXT:    v_and_b32_e32 v0, s0, v0
-; GCN-NEXT:    v_sub_i32_e32 v2, vcc, s2, v2
-; GCN-NEXT:    v_and_b32_e32 v2, s0, v2
+; GCN-NEXT:    v_cvt_u32_f32_e32 v4, v2
+; GCN-NEXT:    v_mad_f32 v2, -v2, v3, v5
+; GCN-NEXT:    v_sub_i32_e32 v0, vcc, s4, v0
+; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v3
+; GCN-NEXT:    v_addc_u32_e32 v2, vcc, 0, v4, vcc
+; GCN-NEXT:    v_mul_lo_u32 v2, v2, s6
+; GCN-NEXT:    s_brev_b32 s4, -2
+; GCN-NEXT:    v_and_b32_e32 v0, s4, v0
 ; GCN-NEXT:    v_mov_b32_e32 v3, v1
-; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
+; GCN-NEXT:    v_sub_i32_e32 v2, vcc, s7, v2
+; GCN-NEXT:    v_and_b32_e32 v2, s4, v2
+; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
 ;
 ; GCN-IR-LABEL: s_test_urem23_64_v2i64:
 ; GCN-IR:       ; %bb.0:
-; GCN-IR-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x11
-; GCN-IR-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
-; GCN-IR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
-; GCN-IR-NEXT:    s_mov_b32 s11, 0xf000
-; GCN-IR-NEXT:    s_mov_b32 s10, -1
+; GCN-IR-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0xd
+; GCN-IR-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GCN-IR-NEXT:    v_mov_b32_e32 v1, 0
+; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
+; GCN-IR-NEXT:    s_mov_b32 s2, -1
 ; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-IR-NEXT:    s_lshr_b32 s0, s5, 1
-; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v0, s0
-; GCN-IR-NEXT:    s_lshr_b32 s1, s1, 1
-; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v1, s1
-; GCN-IR-NEXT:    s_lshr_b32 s2, s3, 9
-; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v0
-; GCN-IR-NEXT:    s_lshr_b32 s3, s7, 9
-; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v4, s3
-; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v3, s2
-; GCN-IR-NEXT:    v_mul_f32_e32 v2, v1, v2
+; GCN-IR-NEXT:    s_lshr_b32 s4, s5, 1
+; GCN-IR-NEXT:    s_lshr_b32 s5, s9, 1
+; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v0, s5
+; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v2, s4
+; GCN-IR-NEXT:    s_lshr_b32 s6, s11, 9
+; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v3, s6
+; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v4, v0
+; GCN-IR-NEXT:    s_lshr_b32 s7, s7, 9
+; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v5, s7
+; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v6, v3
+; GCN-IR-NEXT:    v_mul_f32_e32 v4, v2, v4
+; GCN-IR-NEXT:    v_trunc_f32_e32 v4, v4
+; GCN-IR-NEXT:    v_cvt_u32_f32_e32 v7, v4
+; GCN-IR-NEXT:    v_mad_f32 v2, -v4, v0, v2
+; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v0
+; GCN-IR-NEXT:    v_mul_f32_e32 v2, v5, v6
+; GCN-IR-NEXT:    v_addc_u32_e32 v0, vcc, 0, v7, vcc
+; GCN-IR-NEXT:    v_mul_lo_u32 v0, v0, s5
 ; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
-; GCN-IR-NEXT:    v_cvt_u32_f32_e32 v5, v2
-; GCN-IR-NEXT:    v_mad_f32 v1, -v2, v0, v1
-; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v2, v4
-; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v1|, v0
-; GCN-IR-NEXT:    v_addc_u32_e32 v0, vcc, 0, v5, vcc
-; GCN-IR-NEXT:    v_mul_lo_u32 v0, v0, s0
-; GCN-IR-NEXT:    v_mul_f32_e32 v2, v3, v2
-; GCN-IR-NEXT:    v_trunc_f32_e32 v2, v2
-; GCN-IR-NEXT:    v_cvt_u32_f32_e32 v5, v2
-; GCN-IR-NEXT:    v_sub_i32_e32 v0, vcc, s1, v0
-; GCN-IR-NEXT:    v_mad_f32 v2, -v2, v4, v3
-; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v4
-; GCN-IR-NEXT:    v_addc_u32_e32 v2, vcc, 0, v5, vcc
-; GCN-IR-NEXT:    v_mul_lo_u32 v2, v2, s3
-; GCN-IR-NEXT:    v_mov_b32_e32 v1, 0
-; GCN-IR-NEXT:    s_brev_b32 s0, -2
-; GCN-IR-NEXT:    v_and_b32_e32 v0, s0, v0
-; GCN-IR-NEXT:    v_sub_i32_e32 v2, vcc, s2, v2
-; GCN-IR-NEXT:    v_and_b32_e32 v2, s0, v2
+; GCN-IR-NEXT:    v_cvt_u32_f32_e32 v4, v2
+; GCN-IR-NEXT:    v_mad_f32 v2, -v2, v3, v5
+; GCN-IR-NEXT:    v_sub_i32_e32 v0, vcc, s4, v0
+; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v2|, v3
+; GCN-IR-NEXT:    v_addc_u32_e32 v2, vcc, 0, v4, vcc
+; GCN-IR-NEXT:    v_mul_lo_u32 v2, v2, s6
+; GCN-IR-NEXT:    s_brev_b32 s4, -2
+; GCN-IR-NEXT:    v_and_b32_e32 v0, s4, v0
 ; GCN-IR-NEXT:    v_mov_b32_e32 v3, v1
-; GCN-IR-NEXT:    buffer_store_dwordx4 v[0:3], off, s[8:11], 0
+; GCN-IR-NEXT:    v_sub_i32_e32 v2, vcc, s7, v2
+; GCN-IR-NEXT:    v_and_b32_e32 v2, s4, v2
+; GCN-IR-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GCN-IR-NEXT:    s_endpgm
   %1 = lshr <2 x i64> %x, <i64 33, i64 41>
   %2 = lshr <2 x i64> %y, <i64 33, i64 41>
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/4x2xhalf.ll b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/4x2xhalf.ll
new file mode 100644
index 0000000000000..41b10f087caa9
--- /dev/null
+++ b/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/4x2xhalf.ll
@@ -0,0 +1,96 @@
+; RUN: opt -mtriple=nvptx64-nvidia-cuda -load-store-vectorizer -S -o - %s | FileCheck %s
+
+define void @ldg_f16(half* nocapture align 16 %rd0) {
+  %in1b = bitcast half* %rd0 to <2 x half>*
+  %load1 = load <2 x half>, <2 x half>* %in1b, align 4
+  %p1 = fcmp ogt <2 x half> %load1, zeroinitializer
+  %s1 = select <2 x i1> %p1, <2 x half> %load1, <2 x half> zeroinitializer
+  store <2 x half> %s1, <2 x half>* %in1b, align 4
+  %in2 = getelementptr half, half* %rd0, i64 2
+  %in2b = bitcast half* %in2 to <2 x half>*
+  %load2 = load <2 x half>, <2 x half>* %in2b, align 4
+  %p2 = fcmp ogt <2 x half> %load2, zeroinitializer
+  %s2 = select <2 x i1> %p2, <2 x half> %load2, <2 x half> zeroinitializer
+  store <2 x half> %s2, <2 x half>* %in2b, align 4
+  %in3 = getelementptr half, half* %rd0, i64 4
+  %in3b = bitcast half* %in3 to <2 x half>*
+  %load3 = load <2 x half>, <2 x half>* %in3b, align 4
+  %p3 = fcmp ogt <2 x half> %load3, zeroinitializer
+  %s3 = select <2 x i1> %p3, <2 x half> %load3, <2 x half> zeroinitializer
+  store <2 x half> %s3, <2 x half>* %in3b, align 4
+  %in4 = getelementptr half, half* %rd0, i64 6
+  %in4b = bitcast half* %in4 to <2 x half>*
+  %load4 = load <2 x half>, <2 x half>* %in4b, align 4
+  %p4 = fcmp ogt <2 x half> %load4, zeroinitializer
+  %s4 = select <2 x i1> %p4, <2 x half> %load4, <2 x half> zeroinitializer
+  store <2 x half> %s4, <2 x half>* %in4b, align 4
+  ret void
+
+; CHECK-LABEL: @ldg_f16
+; CHECK: %[[LD:.*]] = load <8 x half>, <8 x half>*
+; CHECK: shufflevector <8 x half> %[[LD]], <8 x half> poison, <2 x i32> <i32 0, i32 1>
+; CHECK: shufflevector <8 x half> %[[LD]], <8 x half> poison, <2 x i32> <i32 2, i32 3>
+; CHECK: shufflevector <8 x half> %[[LD]], <8 x half> poison, <2 x i32> <i32 4, i32 5>
+; CHECK: shufflevector <8 x half> %[[LD]], <8 x half> poison, <2 x i32> <i32 6, i32 7>
+; CHECK: store <8 x half>
+}
+
+define void @no_nonpow2_vector(half* nocapture align 16 %rd0) {
+  %in1b = bitcast half* %rd0 to <3 x half>*
+  %load1 = load <3 x half>, <3 x half>* %in1b, align 4
+  %p1 = fcmp ogt <3 x half> %load1, zeroinitializer
+  %s1 = select <3 x i1> %p1, <3 x half> %load1, <3 x half> zeroinitializer
+  store <3 x half> %s1, <3 x half>* %in1b, align 4
+  %in2 = getelementptr half, half* %rd0, i64 3
+  %in2b = bitcast half* %in2 to <3 x half>*
+  %load2 = load <3 x half>, <3 x half>* %in2b, align 4
+  %p2 = fcmp ogt <3 x half> %load2, zeroinitializer
+  %s2 = select <3 x i1> %p2, <3 x half> %load2, <3 x half> zeroinitializer
+  store <3 x half> %s2, <3 x half>* %in2b, align 4
+  %in3 = getelementptr half, half* %rd0, i64 6
+  %in3b = bitcast half* %in3 to <3 x half>*
+  %load3 = load <3 x half>, <3 x half>* %in3b, align 4
+  %p3 = fcmp ogt <3 x half> %load3, zeroinitializer
+  %s3 = select <3 x i1> %p3, <3 x half> %load3, <3 x half> zeroinitializer
+  store <3 x half> %s3, <3 x half>* %in3b, align 4
+  %in4 = getelementptr half, half* %rd0, i64 9
+  %in4b = bitcast half* %in4 to <3 x half>*
+  %load4 = load <3 x half>, <3 x half>* %in4b, align 4
+  %p4 = fcmp ogt <3 x half> %load4, zeroinitializer
+  %s4 = select <3 x i1> %p4, <3 x half> %load4, <3 x half> zeroinitializer
+  store <3 x half> %s4, <3 x half>* %in4b, align 4
+  ret void
+
+; CHECK-LABEL: @no_nonpow2_vector
+; CHECK-NOT: shufflevector
+}
+
+define void @no_pointer_vector(half** nocapture align 16 %rd0) {
+  %in1b = bitcast half** %rd0 to <2 x half*>*
+  %load1 = load <2 x half*>, <2 x half*>* %in1b, align 4
+  %p1 = icmp ne <2 x half*> %load1, zeroinitializer
+  %s1 = select <2 x i1> %p1, <2 x half*> %load1, <2 x half*> zeroinitializer
+  store <2 x half*> %s1, <2 x half*>* %in1b, align 4
+  %in2 = getelementptr half*, half** %rd0, i64 2
+  %in2b = bitcast half** %in2 to <2 x half*>*
+  %load2 = load <2 x half*>, <2 x half*>* %in2b, align 4
+  %p2 = icmp ne <2 x half*> %load2, zeroinitializer
+  %s2 = select <2 x i1> %p2, <2 x half*> %load2, <2 x half*> zeroinitializer
+  store <2 x half*> %s2, <2 x half*>* %in2b, align 4
+  %in3 = getelementptr half*, half** %rd0, i64 4
+  %in3b = bitcast half** %in3 to <2 x half*>*
+  %load3 = load <2 x half*>, <2 x half*>* %in3b, align 4
+  %p3 = icmp ne <2 x half*> %load3, zeroinitializer
+  %s3 = select <2 x i1> %p3, <2 x half*> %load3, <2 x half*> zeroinitializer
+  store <2 x half*> %s3, <2 x half*>* %in3b, align 4
+  %in4 = getelementptr half*, half** %rd0, i64 6
+  %in4b = bitcast half** %in4 to <2 x half*>*
+  %load4 = load <2 x half*>, <2 x half*>* %in4b, align 4
+  %p4 = icmp ne <2 x half*> %load4, zeroinitializer
+  %s4 = select <2 x i1> %p4, <2 x half*> %load4, <2 x half*> zeroinitializer
+  store <2 x half*> %s4, <2 x half*>* %in4b, align 4
+  ret void
+
+; CHECK-LABEL: @no_pointer_vector
+; CHECK-NOT: shufflevector
+}

From 37d690b330198ca602ea83d2e1798e30c4c3e037 Mon Sep 17 00:00:00 2001
From: Nuno Lopes <nuno.lopes@tecnico.ulisboa.pt>
Date: Wed, 26 Jan 2022 10:48:43 +0000
Subject: [PATCH 689/946] remove spurious comma [NFC]

---
 llvm/include/llvm/Support/X86TargetParser.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h
index 6f5db64714e03..612046f3b2d9c 100644
--- a/llvm/include/llvm/Support/X86TargetParser.h
+++ b/llvm/include/llvm/Support/X86TargetParser.h
@@ -17,7 +17,7 @@
 #include "llvm/ADT/StringMap.h"
 
 namespace llvm {
-template <typename T> class SmallVectorImpl;;
+template <typename T> class SmallVectorImpl;
 class StringRef;
 
 namespace X86 {

From ed4efee2a3d45058aa1a6ffb463485046e981dc6 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 26 Jan 2022 11:48:11 +0100
Subject: [PATCH 690/946] [MemCpyOpt] Add additiona call slot unwind tests
 (NFC)

Test a possibly unwinding call with a byval and sret argument.
---
 .../Transforms/MemCpyOpt/callslot_throw.ll    | 38 ++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll b/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll
index a3e2665120a91..ce0d575ab21b9 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll
@@ -25,7 +25,7 @@ define void @test2(i32* nocapture noalias dereferenceable(4) %x) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[T:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @may_throw(i32* nonnull [[T]]) [[ATTR0:#.*]]
+; CHECK-NEXT:    call void @may_throw(i32* nonnull [[T]]) #[[ATTR0:[0-9]+]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* [[T]], align 4
 ; CHECK-NEXT:    call void @always_throws()
 ; CHECK-NEXT:    store i32 [[LOAD]], i32* [[X:%.*]], align 4
@@ -39,3 +39,39 @@ entry:
   store i32 %load, i32* %x, align 4
   ret void
 }
+
+; TODO: byval argument is not visible on unwind.
+define void @test_byval(i32* nocapture noalias dereferenceable(4) byval(i32) %x) {
+; CHECK-LABEL: @test_byval(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[T:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @may_throw(i32* nonnull [[T]])
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* [[T]], align 4
+; CHECK-NEXT:    store i32 [[LOAD]], i32* [[X:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %t = alloca i32, align 4
+  call void @may_throw(i32* nonnull %t)
+  %load = load i32, i32* %t, align 4
+  store i32 %load, i32* %x, align 4
+  ret void
+}
+
+; TODO: With updated semantics, sret could also be invisible on unwind.
+define void @test_sret(i32* nocapture noalias dereferenceable(4) sret(i32) %x) {
+; CHECK-LABEL: @test_sret(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[T:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @may_throw(i32* nonnull [[T]])
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* [[T]], align 4
+; CHECK-NEXT:    store i32 [[LOAD]], i32* [[X:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %t = alloca i32, align 4
+  call void @may_throw(i32* nonnull %t)
+  %load = load i32, i32* %t, align 4
+  store i32 %load, i32* %x, align 4
+  ret void
+}

From 9b6c2ea30219c16c264eaa38609e324470e2ad07 Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <ntv@google.com>
Date: Wed, 26 Jan 2022 05:19:53 -0500
Subject: [PATCH 691/946] [mlir][Linalg] Add GenericOp self-copy on buffers
 folding

Reviewed By: pifon2a

Differential Revision: https://reviews.llvm.org/D118116
---
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp   | 14 ++++++++++++--
 mlir/test/Dialect/Linalg/canonicalize.mlir | 16 ++++++++++++++++
 mlir/test/Dialect/Linalg/inlining.mlir     |  3 ++-
 3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 3ca3932a44eec..b3067109b4365 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -843,8 +843,6 @@ struct EraseIdentityGenericOp : public OpRewritePattern<GenericOp> {
 
   LogicalResult matchAndRewrite(GenericOp genericOp,
                                 PatternRewriter &rewriter) const override {
-    if (!genericOp.hasTensorSemantics())
-      return failure();
     // Check all indexing maps are identity.
     if (llvm::any_of(genericOp.getIndexingMaps(),
                      [](AffineMap map) { return !map.isIdentity(); }))
@@ -859,6 +857,17 @@ struct EraseIdentityGenericOp : public OpRewritePattern<GenericOp> {
     if (!yieldOp)
       return failure();
 
+    // In the buffer case, we need to check exact buffer equality.
+    if (genericOp.hasBufferSemantics()) {
+      if (genericOp.getNumInputs() == 1 && genericOp.getNumOutputs() == 1 &&
+          genericOp.getInputOperand(0)->get() ==
+              genericOp.getOutputOperand(0)->get()) {
+        rewriter.eraseOp(genericOp);
+        return success();
+      }
+      return failure();
+    }
+
     // Get the argument number of the returned values. That is the operand
     // number to use for replacing uses of this operation.
     SmallVector<Value> returnedArgs;
@@ -876,6 +885,7 @@ struct EraseIdentityGenericOp : public OpRewritePattern<GenericOp> {
                                                       resultType, returnedArg);
       returnedArgs.push_back(returnedArg);
     }
+
     if (returnedArgs.size() != genericOp->getNumResults())
       return failure();
     rewriter.replaceOp(genericOp, returnedArgs);
diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir
index 44cb18f11d152..96d3aa26deafa 100644
--- a/mlir/test/Dialect/Linalg/canonicalize.mlir
+++ b/mlir/test/Dialect/Linalg/canonicalize.mlir
@@ -583,3 +583,19 @@ func @dim_of_tiled_loop_result_no_canonicalize(%arg0: tensor<?x?xf32>, %arg1: te
   %r2 = tensor.dim %r, %c0 : tensor<?x?xf32>
   return %r2 : index
 }
+
+// -----
+
+// CHECK: func @fold_self_copy
+func @fold_self_copy(%0 : memref<4x16xf32>) {
+// CHECK-NEXT: return
+  linalg.generic {indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 
+                                   affine_map<(d0, d1) -> (d0, d1)>], 
+                  iterator_types = ["parallel", "parallel"]} 
+    ins(%0 : memref<4x16xf32>) 
+    outs(%0 : memref<4x16xf32>) {
+      ^bb0(%arg4: f32, %arg5: f32):
+        linalg.yield %arg4 : f32
+    }
+  return 
+}
diff --git a/mlir/test/Dialect/Linalg/inlining.mlir b/mlir/test/Dialect/Linalg/inlining.mlir
index 527b044fa2499..033213c2a954c 100644
--- a/mlir/test/Dialect/Linalg/inlining.mlir
+++ b/mlir/test/Dialect/Linalg/inlining.mlir
@@ -25,7 +25,8 @@ func @inlined_fn(%arg0: memref<?xf32>) {
      ins(%arg0 : memref<?xf32>)
     outs(%arg0 : memref<?xf32>) {
     ^bb(%0 : f32, %1 : f32) :
-      linalg.yield %0 : f32
+      %2 = arith.addf %0, %0: f32
+      linalg.yield %2 : f32
   }
   return
 }

From 19eaad94c47f92dd23989b0b6832bb6751dde979 Mon Sep 17 00:00:00 2001
From: Salman Javed <mail@salmanjaved.org>
Date: Wed, 26 Jan 2022 23:56:27 +1300
Subject: [PATCH 692/946] [clang-tidy] Cache the locations of NOLINTBEGIN/END
 blocks

Support for NOLINT(BEGIN/END) blocks (implemented in D108560) is
currently costly. This patch aims to improve the performance with the
following changes:

- The use of tokenized NOLINTs instead of a series of repetitive ad-hoc
string operations (`find()`, `split()`, `slice()`, regex matching etc).
- The caching of NOLINT(BEGIN/END) block locations. Determining these
locations each time a new diagnostic is raised is wasteful as it
requires reading and parsing the entire source file.

Move NOLINT-specific code from `ClangTidyDiagnosticConsumer` to new
purpose-built class `NoLintDirectiveHandler`.

Differential Revision: https://reviews.llvm.org/D116085
---
 clang-tools-extra/clang-tidy/CMakeLists.txt   |   1 +
 .../ClangTidyDiagnosticConsumer.cpp           | 230 +---------
 .../clang-tidy/ClangTidyDiagnosticConsumer.h  |  47 +-
 .../clang-tidy/NoLintDirectiveHandler.cpp     | 415 ++++++++++++++++++
 .../clang-tidy/NoLintDirectiveHandler.h       |  51 +++
 clang-tools-extra/clangd/ParsedAST.cpp        |  12 +-
 .../nolintbeginend/1st-translation-unit.cpp   |   5 +
 .../nolintbeginend/2nd-translation-unit.cpp   |   6 +
 .../test/clang-tidy/infrastructure/nolint.cpp |  51 ++-
 .../infrastructure/nolintbeginend-LIFO.cpp    |  19 +
 .../nolintbeginend-begin-all-end-glob.cpp     |  16 +
 .../nolintbeginend-begin-all-end-specific.cpp |  16 +
 .../nolintbeginend-begin-glob-end-all.cpp     |  16 +
 ...nolintbeginend-begin-glob-end-specific.cpp |  16 +
 ...lintbeginend-begin-global-end-specific.cpp |  25 --
 ...lintbeginend-begin-multiple-end-single.cpp |  22 +
 ...lintbeginend-begin-single-end-multiple.cpp |  22 +
 .../nolintbeginend-begin-specific-end-all.cpp |  16 +
 ...nolintbeginend-begin-specific-end-glob.cpp |  16 +
 ...lintbeginend-begin-specific-end-global.cpp |  25 --
 .../nolintbeginend-mismatched-check-names.cpp |  21 -
 .../nolintbeginend-mismatched-delims.cpp      |   1 -
 .../nolintbeginend-multiple-TUs.cpp           |   6 +
 .../nolintbeginend-typo-in-check-name.cpp     |   3 +
 .../infrastructure/nolintbeginend.cpp         |  28 +-
 25 files changed, 752 insertions(+), 334 deletions(-)
 create mode 100644 clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp
 create mode 100644 clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/1st-translation-unit.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/2nd-translation-unit.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-LIFO.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-glob.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-specific.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-all.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-specific.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-global-end-specific.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-multiple-end-single.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-single-end-multiple.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-all.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-glob.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-global.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-multiple-TUs.cpp

diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt
index 075e9f9909d65..8a953eeea2759 100644
--- a/clang-tools-extra/clang-tidy/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/CMakeLists.txt
@@ -17,6 +17,7 @@ add_clang_library(clangTidy
   ClangTidyProfiling.cpp
   ExpandModularHeadersPPCallbacks.cpp
   GlobList.cpp
+  NoLintDirectiveHandler.cpp
 
   DEPENDS
   ClangSACheckers
diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
index 66f60ec60b605..04721a2d3a020 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
+++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
@@ -18,6 +18,7 @@
 #include "ClangTidyDiagnosticConsumer.h"
 #include "ClangTidyOptions.h"
 #include "GlobList.h"
+#include "NoLintDirectiveHandler.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTDiagnostic.h"
 #include "clang/AST/Attr.h"
@@ -188,7 +189,7 @@ DiagnosticBuilder ClangTidyContext::diag(
   return DiagEngine->Report(ID);
 }
 
-DiagnosticBuilder ClangTidyContext::diag(const ClangTidyError &Error) {
+DiagnosticBuilder ClangTidyContext::diag(const tooling::Diagnostic &Error) {
   SourceManager &SM = DiagEngine->getSourceManager();
   llvm::ErrorOr<const FileEntry *> File =
       SM.getFileManager().getFile(Error.Message.FilePath);
@@ -206,6 +207,15 @@ DiagnosticBuilder ClangTidyContext::configurationDiag(
   return diag("clang-tidy-config", Message, Level);
 }
 
+bool ClangTidyContext::shouldSuppressDiagnostic(
+    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info,
+    SmallVectorImpl<tooling::Diagnostic> &NoLintErrors, bool AllowIO,
+    bool EnableNoLintBlocks) {
+  std::string CheckName = getCheckName(Info.getID());
+  return NoLintHandler.shouldSuppress(DiagLevel, Info, CheckName, NoLintErrors,
+                                      AllowIO, EnableNoLintBlocks);
+}
+
 void ClangTidyContext::setSourceManager(SourceManager *SourceMgr) {
   DiagEngine->setSourceManager(SourceMgr);
 }
@@ -307,218 +317,9 @@ void ClangTidyDiagnosticConsumer::finalizeLastError() {
   LastErrorPassesLineFilter = false;
 }
 
-static bool isNOLINTFound(StringRef NolintDirectiveText, StringRef CheckName,
-                          StringRef Line, size_t *FoundNolintIndex = nullptr,
-                          StringRef *FoundNolintChecksStr = nullptr) {
-  if (FoundNolintIndex)
-    *FoundNolintIndex = StringRef::npos;
-  if (FoundNolintChecksStr)
-    *FoundNolintChecksStr = StringRef();
-
-  size_t NolintIndex = Line.find(NolintDirectiveText);
-  if (NolintIndex == StringRef::npos)
-    return false;
-
-  size_t BracketIndex = NolintIndex + NolintDirectiveText.size();
-  if (BracketIndex < Line.size() && isalnum(Line[BracketIndex])) {
-    // Reject this search result, otherwise it will cause false positives when
-    // NOLINT is found as a substring of NOLINT(NEXTLINE/BEGIN/END).
-    return false;
-  }
-
-  // Check if specific checks are specified in brackets.
-  if (BracketIndex < Line.size() && Line[BracketIndex] == '(') {
-    ++BracketIndex;
-    const size_t BracketEndIndex = Line.find(')', BracketIndex);
-    if (BracketEndIndex != StringRef::npos) {
-      StringRef ChecksStr =
-          Line.substr(BracketIndex, BracketEndIndex - BracketIndex);
-      if (FoundNolintChecksStr)
-        *FoundNolintChecksStr = ChecksStr;
-      // Allow specifying a few checks with a glob expression, ignoring
-      // negative globs (which would effectively disable the suppression).
-      GlobList Globs(ChecksStr, /*KeepNegativeGlobs=*/false);
-      if (!Globs.contains(CheckName))
-        return false;
-    }
-  }
-
-  if (FoundNolintIndex)
-    *FoundNolintIndex = NolintIndex;
-
-  return true;
-}
-
-static llvm::Optional<StringRef> getBuffer(const SourceManager &SM, FileID File,
-                                           bool AllowIO) {
-  return AllowIO ? SM.getBufferDataOrNone(File)
-                 : SM.getBufferDataIfLoaded(File);
-}
-
-static ClangTidyError createNolintError(const ClangTidyContext &Context,
-                                        const SourceManager &SM,
-                                        SourceLocation Loc,
-                                        bool IsNolintBegin) {
-  ClangTidyError Error("clang-tidy-nolint", ClangTidyError::Error,
-                       Context.getCurrentBuildDirectory(), false);
-  StringRef Message =
-      IsNolintBegin
-          ? ("unmatched 'NOLINTBEGIN' comment without a subsequent 'NOLINT"
-             "END' comment")
-          : ("unmatched 'NOLINTEND' comment without a previous 'NOLINT"
-             "BEGIN' comment");
-  Error.Message = tooling::DiagnosticMessage(Message, SM, Loc);
-  return Error;
-}
-
-static Optional<ClangTidyError> tallyNolintBegins(
-    const ClangTidyContext &Context, const SourceManager &SM,
-    StringRef CheckName, SmallVector<StringRef> Lines, SourceLocation LinesLoc,
-    SmallVector<std::pair<SourceLocation, StringRef>> &NolintBegins) {
-  // Keep a running total of how many NOLINT(BEGIN...END) blocks are active, as
-  // well as the bracket expression (if any) that was used in the NOLINT
-  // expression.
-  size_t NolintIndex;
-  StringRef NolintChecksStr;
-  for (const auto &Line : Lines) {
-    if (isNOLINTFound("NOLINTBEGIN", CheckName, Line, &NolintIndex,
-                      &NolintChecksStr)) {
-      // Check if a new block is being started.
-      NolintBegins.emplace_back(std::make_pair(
-          LinesLoc.getLocWithOffset(NolintIndex), NolintChecksStr));
-    } else if (isNOLINTFound("NOLINTEND", CheckName, Line, &NolintIndex,
-                             &NolintChecksStr)) {
-      // Check if the previous block is being closed.
-      if (!NolintBegins.empty() &&
-          NolintBegins.back().second == NolintChecksStr) {
-        NolintBegins.pop_back();
-      } else {
-        // Trying to close a nonexistent block. Return a diagnostic about this
-        // misuse that can be displayed along with the original clang-tidy check
-        // that the user was attempting to suppress.
-        return createNolintError(Context, SM,
-                                 LinesLoc.getLocWithOffset(NolintIndex), false);
-      }
-    }
-    // Advance source location to the next line.
-    LinesLoc = LinesLoc.getLocWithOffset(Line.size() + sizeof('\n'));
-  }
-  return None; // All NOLINT(BEGIN/END) use has been consistent so far.
-}
-
-static bool
-lineIsWithinNolintBegin(const ClangTidyContext &Context,
-                        SmallVectorImpl<ClangTidyError> &SuppressionErrors,
-                        const SourceManager &SM, SourceLocation Loc,
-                        StringRef CheckName, StringRef TextBeforeDiag,
-                        StringRef TextAfterDiag) {
-  Loc = SM.getExpansionRange(Loc).getBegin();
-  SourceLocation FileStartLoc = SM.getLocForStartOfFile(SM.getFileID(Loc));
-  SmallVector<std::pair<SourceLocation, StringRef>> NolintBegins;
-
-  // Check if there's an open NOLINT(BEGIN...END) block on the previous lines.
-  SmallVector<StringRef> PrevLines;
-  TextBeforeDiag.split(PrevLines, '\n');
-  auto Error = tallyNolintBegins(Context, SM, CheckName, PrevLines,
-                                 FileStartLoc, NolintBegins);
-  if (Error) {
-    SuppressionErrors.emplace_back(Error.getValue());
-  }
-  bool WithinNolintBegin = !NolintBegins.empty();
-
-  // Check that every block is terminated correctly on the following lines.
-  SmallVector<StringRef> FollowingLines;
-  TextAfterDiag.split(FollowingLines, '\n');
-  Error = tallyNolintBegins(Context, SM, CheckName, FollowingLines, Loc,
-                            NolintBegins);
-  if (Error) {
-    SuppressionErrors.emplace_back(Error.getValue());
-  }
-
-  // The following blocks were never closed. Return diagnostics for each
-  // instance that can be displayed along with the original clang-tidy check
-  // that the user was attempting to suppress.
-  for (const auto &NolintBegin : NolintBegins) {
-    SuppressionErrors.emplace_back(
-        createNolintError(Context, SM, NolintBegin.first, true));
-  }
-
-  return WithinNolintBegin && SuppressionErrors.empty();
-}
-
-static bool
-lineIsMarkedWithNOLINT(const ClangTidyContext &Context,
-                       SmallVectorImpl<ClangTidyError> &SuppressionErrors,
-                       bool AllowIO, const SourceManager &SM,
-                       SourceLocation Loc, StringRef CheckName,
-                       bool EnableNolintBlocks) {
-  // Get source code for this location.
-  FileID File;
-  unsigned Offset;
-  std::tie(File, Offset) = SM.getDecomposedSpellingLoc(Loc);
-  Optional<StringRef> Buffer = getBuffer(SM, File, AllowIO);
-  if (!Buffer)
-    return false;
-
-  // Check if there's a NOLINT on this line.
-  StringRef TextAfterDiag = Buffer->substr(Offset);
-  StringRef RestOfThisLine, FollowingLines;
-  std::tie(RestOfThisLine, FollowingLines) = TextAfterDiag.split('\n');
-  if (isNOLINTFound("NOLINT", CheckName, RestOfThisLine))
-    return true;
-
-  // Check if there's a NOLINTNEXTLINE on the previous line.
-  StringRef TextBeforeDiag = Buffer->substr(0, Offset);
-  size_t LastNewLinePos = TextBeforeDiag.rfind('\n');
-  StringRef PrevLines = (LastNewLinePos == StringRef::npos)
-                            ? StringRef()
-                            : TextBeforeDiag.slice(0, LastNewLinePos);
-  LastNewLinePos = PrevLines.rfind('\n');
-  StringRef PrevLine = (LastNewLinePos == StringRef::npos)
-                           ? PrevLines
-                           : PrevLines.substr(LastNewLinePos + 1);
-  if (isNOLINTFound("NOLINTNEXTLINE", CheckName, PrevLine))
-    return true;
-
-  // Check if this line is within a NOLINT(BEGIN...END) block.
-  return EnableNolintBlocks &&
-         lineIsWithinNolintBegin(Context, SuppressionErrors, SM, Loc, CheckName,
-                                 TextBeforeDiag, TextAfterDiag);
-}
-
-static bool lineIsMarkedWithNOLINTinMacro(
-    const Diagnostic &Info, const ClangTidyContext &Context,
-    SmallVectorImpl<ClangTidyError> &SuppressionErrors, bool AllowIO,
-    bool EnableNolintBlocks) {
-  const SourceManager &SM = Info.getSourceManager();
-  SourceLocation Loc = Info.getLocation();
-  std::string CheckName = Context.getCheckName(Info.getID());
-  while (true) {
-    if (lineIsMarkedWithNOLINT(Context, SuppressionErrors, AllowIO, SM, Loc,
-                               CheckName, EnableNolintBlocks))
-      return true;
-    if (!Loc.isMacroID())
-      return false;
-    Loc = SM.getImmediateExpansionRange(Loc).getBegin();
-  }
-  return false;
-}
-
 namespace clang {
 namespace tidy {
 
-bool shouldSuppressDiagnostic(
-    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info,
-    ClangTidyContext &Context,
-    SmallVectorImpl<ClangTidyError> &SuppressionErrors, bool AllowIO,
-    bool EnableNolintBlocks) {
-  return Info.getLocation().isValid() &&
-         DiagLevel != DiagnosticsEngine::Error &&
-         DiagLevel != DiagnosticsEngine::Fatal &&
-         lineIsMarkedWithNOLINTinMacro(Info, Context, SuppressionErrors,
-                                       AllowIO, EnableNolintBlocks);
-}
-
 const llvm::StringMap<tooling::Replacements> *
 getFixIt(const tooling::Diagnostic &Diagnostic, bool GetFixFromNotes) {
   if (!Diagnostic.Message.Fix.empty())
@@ -545,12 +346,15 @@ void ClangTidyDiagnosticConsumer::HandleDiagnostic(
   if (LastErrorWasIgnored && DiagLevel == DiagnosticsEngine::Note)
     return;
 
-  SmallVector<ClangTidyError, 1> SuppressionErrors;
-  if (shouldSuppressDiagnostic(DiagLevel, Info, Context, SuppressionErrors,
-                               EnableNolintBlocks)) {
+  SmallVector<tooling::Diagnostic, 1> SuppressionErrors;
+  if (Context.shouldSuppressDiagnostic(DiagLevel, Info, SuppressionErrors,
+                                       EnableNolintBlocks)) {
     ++Context.Stats.ErrorsIgnoredNOLINT;
     // Ignored a warning, should ignore related notes as well
     LastErrorWasIgnored = true;
+    Context.DiagEngine->Clear();
+    for (const auto &Error : SuppressionErrors)
+      Context.diag(Error);
     return;
   }
 
diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
index 69f1ceddd9bd2..e41003262cccf 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
+++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
@@ -11,6 +11,7 @@
 
 #include "ClangTidyOptions.h"
 #include "ClangTidyProfiling.h"
+#include "NoLintDirectiveHandler.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Tooling/Core/Diagnostic.h"
 #include "llvm/ADT/DenseMap.h"
@@ -95,13 +96,33 @@ class ClangTidyContext {
   DiagnosticBuilder diag(StringRef CheckName, StringRef Message,
                          DiagnosticIDs::Level Level = DiagnosticIDs::Warning);
 
-  DiagnosticBuilder diag(const ClangTidyError &Error);
+  DiagnosticBuilder diag(const tooling::Diagnostic &Error);
 
   /// Report any errors to do with reading the configuration using this method.
   DiagnosticBuilder
   configurationDiag(StringRef Message,
                     DiagnosticIDs::Level Level = DiagnosticIDs::Warning);
 
+  /// Check whether a given diagnostic should be suppressed due to the presence
+  /// of a "NOLINT" suppression comment.
+  /// This is exposed so that other tools that present clang-tidy diagnostics
+  /// (such as clangd) can respect the same suppression rules as clang-tidy.
+  /// This does not handle suppression of notes following a suppressed
+  /// diagnostic; that is left to the caller as it requires maintaining state in
+  /// between calls to this function.
+  /// If any NOLINT is malformed, e.g. a BEGIN without a subsequent END, an
+  /// error about it will be returned in output \param NoLintErrors.
+  /// If \param AllowIO is false, the function does not attempt to read source
+  /// files from disk which are not already mapped into memory; such files are
+  /// treated as not containing a suppression comment.
+  /// \param EnableNoLintBlocks controls whether to honor NOLINTBEGIN/NOLINTEND
+  /// blocks; if false, only considers line-level disabling.
+  bool
+  shouldSuppressDiagnostic(DiagnosticsEngine::Level DiagLevel,
+                           const Diagnostic &Info,
+                           SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+                           bool AllowIO = true, bool EnableNoLintBlocks = true);
+
   /// Sets the \c SourceManager of the used \c DiagnosticsEngine.
   ///
   /// This is called from the \c ClangTidyCheck base class.
@@ -208,29 +229,9 @@ class ClangTidyContext {
   std::string ProfilePrefix;
 
   bool AllowEnablingAnalyzerAlphaCheckers;
-};
 
-/// Check whether a given diagnostic should be suppressed due to the presence
-/// of a "NOLINT" suppression comment.
-/// This is exposed so that other tools that present clang-tidy diagnostics
-/// (such as clangd) can respect the same suppression rules as clang-tidy.
-/// This does not handle suppression of notes following a suppressed diagnostic;
-/// that is left to the caller as it requires maintaining state in between calls
-/// to this function.
-/// If `AllowIO` is false, the function does not attempt to read source files
-/// from disk which are not already mapped into memory; such files are treated
-/// as not containing a suppression comment.
-/// \param EnableNolintBlocks controls whether to honor NOLINTBEGIN/NOLINTEND
-/// blocks; if false, only considers line-level disabling.
-/// If suppression is not possible due to improper use of "NOLINT" comments -
-/// for example, the use of a "NOLINTBEGIN" comment that is not followed by a
-/// "NOLINTEND" comment - a diagnostic regarding the improper use is returned
-/// via the output argument `SuppressionErrors`.
-bool shouldSuppressDiagnostic(
-    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info,
-    ClangTidyContext &Context,
-    SmallVectorImpl<ClangTidyError> &SuppressionErrors, bool AllowIO = true,
-    bool EnableNolintBlocks = true);
+  NoLintDirectiveHandler NoLintHandler;
+};
 
 /// Gets the Fix attached to \p Diagnostic.
 /// If there isn't a Fix attached to the diagnostic and \p AnyFix is true, Check
diff --git a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp
new file mode 100644
index 0000000000000..6723b752fc848
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp
@@ -0,0 +1,415 @@
+//===-- clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+///  \file This file implements the NoLintDirectiveHandler class, which is used
+///  to locate NOLINT comments in the file being analyzed, to decide whether a
+///  diagnostic should be suppressed.
+///
+//===----------------------------------------------------------------------===//
+
+#include "NoLintDirectiveHandler.h"
+#include "GlobList.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Tooling/Core/Diagnostic.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+namespace clang {
+namespace tidy {
+
+//===----------------------------------------------------------------------===//
+// NoLintType
+//===----------------------------------------------------------------------===//
+
+// The type - one of NOLINT[NEXTLINE/BEGIN/END].
+enum class NoLintType { NoLint, NoLintNextLine, NoLintBegin, NoLintEnd };
+
+// Convert a string like "NOLINTNEXTLINE" to its enum `Type::NoLintNextLine`.
+// Return `None` if the string is unrecognized.
+static Optional<NoLintType> strToNoLintType(StringRef Str) {
+  auto Type = llvm::StringSwitch<Optional<NoLintType>>(Str)
+                  .Case("NOLINT", NoLintType::NoLint)
+                  .Case("NOLINTNEXTLINE", NoLintType::NoLintNextLine)
+                  .Case("NOLINTBEGIN", NoLintType::NoLintBegin)
+                  .Case("NOLINTEND", NoLintType::NoLintEnd)
+                  .Default(None);
+  return Type;
+}
+
+//===----------------------------------------------------------------------===//
+// NoLintToken
+//===----------------------------------------------------------------------===//
+
+// Whitespace within a NOLINT's check list shall be ignored.
+// "NOLINT( check1, check2 )" is equivalent to "NOLINT(check1,check2)".
+// Return the check list with all extraneous whitespace removed.
+static std::string trimWhitespace(StringRef Checks) {
+  SmallVector<StringRef> Split;
+  Checks.split(Split, ',');
+  for (StringRef &Check : Split)
+    Check = Check.trim();
+  return llvm::join(Split, ",");
+}
+
+namespace {
+
+// Record the presence of a NOLINT comment - its type, location, checks -
+// as parsed from the file's character contents.
+class NoLintToken {
+public:
+  // \param Checks:
+  // - If unspecified (i.e. `None`) then ALL checks are suppressed - equivalent
+  //   to NOLINT(*).
+  // - An empty string means nothing is suppressed - equivalent to NOLINT().
+  // - Negative globs ignored (which would effectively disable the suppression).
+  NoLintToken(NoLintType Type, size_t Pos, const Optional<std::string> &Checks)
+      : Type(Type), Pos(Pos), ChecksGlob(std::make_unique<CachedGlobList>(
+                                  Checks.getValueOr("*"),
+                                  /*KeepNegativeGlobs=*/false)) {
+    if (Checks)
+      this->Checks = trimWhitespace(*Checks);
+  }
+
+  // The type - one of NOLINT[NEXTLINE/BEGIN/END].
+  NoLintType Type;
+
+  // The location of the first character, "N", in "NOLINT".
+  size_t Pos;
+
+  // If this NOLINT specifies checks, return the checks.
+  Optional<std::string> checks() const { return Checks; }
+
+  // Whether this NOLINT applies to the provided check.
+  bool suppresses(StringRef Check) const { return ChecksGlob->contains(Check); }
+
+private:
+  Optional<std::string> Checks;
+  std::unique_ptr<CachedGlobList> ChecksGlob;
+};
+
+} // namespace
+
+// Consume the entire buffer and return all `NoLintToken`s that were found.
+static SmallVector<NoLintToken> getNoLints(StringRef Buffer) {
+  static constexpr llvm::StringLiteral NOLINT = "NOLINT";
+  SmallVector<NoLintToken> NoLints;
+
+  size_t Pos = 0;
+  while (Pos < Buffer.size()) {
+    // Find NOLINT:
+    const size_t NoLintPos = Buffer.find(NOLINT, Pos);
+    if (NoLintPos == StringRef::npos)
+      break; // Buffer exhausted
+
+    // Read [A-Z] characters immediately after "NOLINT", e.g. the "NEXTLINE" in
+    // "NOLINTNEXTLINE".
+    Pos = NoLintPos + NOLINT.size();
+    while (Pos < Buffer.size() && llvm::isAlpha(Buffer[Pos]))
+      ++Pos;
+
+    // Is this a recognized NOLINT type?
+    const Optional<NoLintType> NoLintType =
+        strToNoLintType(Buffer.slice(NoLintPos, Pos));
+    if (!NoLintType)
+      continue;
+
+    // Get checks, if specified.
+    Optional<std::string> Checks;
+    if (Pos < Buffer.size() && Buffer[Pos] == '(') {
+      size_t ClosingBracket = Buffer.find_first_of("\n)", ++Pos);
+      if (ClosingBracket != StringRef::npos && Buffer[ClosingBracket] == ')') {
+        Checks = Buffer.slice(Pos, ClosingBracket).str();
+        Pos = ClosingBracket + 1;
+      }
+    }
+
+    NoLints.emplace_back(*NoLintType, NoLintPos, Checks);
+  }
+
+  return NoLints;
+}
+
+//===----------------------------------------------------------------------===//
+// NoLintBlockToken
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+// Represents a source range within a pair of NOLINT(BEGIN/END) comments.
+class NoLintBlockToken {
+public:
+  NoLintBlockToken(NoLintToken Begin, const NoLintToken &End)
+      : Begin(std::move(Begin)), EndPos(End.Pos) {
+    assert(this->Begin.Type == NoLintType::NoLintBegin);
+    assert(End.Type == NoLintType::NoLintEnd);
+    assert(this->Begin.Pos < End.Pos);
+    assert(this->Begin.checks() == End.checks());
+  }
+
+  // Whether the provided diagnostic is within and is suppressible by this block
+  // of NOLINT(BEGIN/END) comments.
+  bool suppresses(size_t DiagPos, StringRef DiagName) const {
+    return (Begin.Pos < DiagPos) && (DiagPos < EndPos) &&
+           Begin.suppresses(DiagName);
+  }
+
+private:
+  NoLintToken Begin;
+  size_t EndPos;
+};
+
+} // namespace
+
+// Match NOLINTBEGINs with their corresponding NOLINTENDs and move them into
+// `NoLintBlockToken`s. If any BEGINs or ENDs are left over, they are moved to
+// `UnmatchedTokens`.
+static SmallVector<NoLintBlockToken>
+formNoLintBlocks(SmallVector<NoLintToken> NoLints,
+                 SmallVectorImpl<NoLintToken> &UnmatchedTokens) {
+  SmallVector<NoLintBlockToken> CompletedBlocks;
+  SmallVector<NoLintToken> Stack;
+
+  // Nested blocks must be fully contained within their parent block. What this
+  // means is that when you have a series of nested BEGIN tokens, the END tokens
+  // shall appear in the reverse order, starting with the closing of the
+  // inner-most block first, then the next level up, and so on. This is
+  // essentially a last-in-first-out/stack system.
+  for (NoLintToken &NoLint : NoLints) {
+    if (NoLint.Type == NoLintType::NoLintBegin)
+      // A new block is being started. Add it to the stack.
+      Stack.emplace_back(std::move(NoLint));
+    else if (NoLint.Type == NoLintType::NoLintEnd) {
+      if (!Stack.empty() && Stack.back().checks() == NoLint.checks())
+        // The previous block is being closed. Pop one element off the stack.
+        CompletedBlocks.emplace_back(std::move(Stack.pop_back_val()), NoLint);
+      else
+        // Trying to close the wrong block.
+        UnmatchedTokens.emplace_back(std::move(NoLint));
+    }
+  }
+
+  llvm::move(Stack, std::back_inserter(UnmatchedTokens));
+  return CompletedBlocks;
+}
+
+//===----------------------------------------------------------------------===//
+// NoLintDirectiveHandler::Impl
+//===----------------------------------------------------------------------===//
+
+class NoLintDirectiveHandler::Impl {
+public:
+  bool shouldSuppress(DiagnosticsEngine::Level DiagLevel,
+                      const Diagnostic &Diag, StringRef DiagName,
+                      SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+                      bool AllowIO, bool EnableNoLintBlocks);
+
+private:
+  bool diagHasNoLintInMacro(const Diagnostic &Diag, StringRef DiagName,
+                            SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+                            bool AllowIO, bool EnableNoLintBlocks);
+
+  bool diagHasNoLint(StringRef DiagName, SourceLocation DiagLoc,
+                     const SourceManager &SrcMgr,
+                     SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+                     bool AllowIO, bool EnableNoLintBlocks);
+
+  void generateCache(const SourceManager &SrcMgr, StringRef FileName,
+                     FileID File, StringRef Buffer,
+                     SmallVectorImpl<tooling::Diagnostic> &NoLintErrors);
+
+  llvm::StringMap<SmallVector<NoLintBlockToken>> Cache;
+};
+
+bool NoLintDirectiveHandler::Impl::shouldSuppress(
+    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Diag,
+    StringRef DiagName, SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+    bool AllowIO, bool EnableNoLintBlocks) {
+  if (DiagLevel >= DiagnosticsEngine::Error)
+    return false;
+  return diagHasNoLintInMacro(Diag, DiagName, NoLintErrors, AllowIO,
+                              EnableNoLintBlocks);
+}
+
+// Look at the macro's spelling location for a NOLINT. If none is found, keep
+// looking up the call stack.
+bool NoLintDirectiveHandler::Impl::diagHasNoLintInMacro(
+    const Diagnostic &Diag, StringRef DiagName,
+    SmallVectorImpl<tooling::Diagnostic> &NoLintErrors, bool AllowIO,
+    bool EnableNoLintBlocks) {
+  SourceLocation DiagLoc = Diag.getLocation();
+  if (DiagLoc.isInvalid())
+    return false;
+  const SourceManager &SrcMgr = Diag.getSourceManager();
+  while (true) {
+    if (diagHasNoLint(DiagName, DiagLoc, SrcMgr, NoLintErrors, AllowIO,
+                      EnableNoLintBlocks))
+      return true;
+    if (!DiagLoc.isMacroID())
+      return false;
+    DiagLoc = SrcMgr.getImmediateMacroCallerLoc(DiagLoc);
+  }
+  return false;
+}
+
+// Look behind and ahead for '\n' characters. These mark the start and end of
+// this line.
+static std::pair<size_t, size_t> getLineStartAndEnd(StringRef Buffer,
+                                                    size_t From) {
+  size_t StartPos = Buffer.find_last_of('\n', From) + 1;
+  size_t EndPos = std::min(Buffer.find('\n', From), Buffer.size());
+  return std::make_pair(StartPos, EndPos);
+}
+
+// Whether the line has a NOLINT of type = `Type` that can suppress the
+// diagnostic `DiagName`.
+static bool lineHasNoLint(StringRef Buffer,
+                          std::pair<size_t, size_t> LineStartAndEnd,
+                          NoLintType Type, StringRef DiagName) {
+  // Get all NOLINTs on the line.
+  Buffer = Buffer.slice(LineStartAndEnd.first, LineStartAndEnd.second);
+  SmallVector<NoLintToken> NoLints = getNoLints(Buffer);
+
+  // Do any of these NOLINTs match the desired type and diag name?
+  return llvm::any_of(NoLints, [&](const NoLintToken &NoLint) {
+    return NoLint.Type == Type && NoLint.suppresses(DiagName);
+  });
+}
+
+// Whether the provided diagnostic is located within and is suppressible by a
+// block of NOLINT(BEGIN/END) comments.
+static bool withinNoLintBlock(ArrayRef<NoLintBlockToken> NoLintBlocks,
+                              size_t DiagPos, StringRef DiagName) {
+  return llvm::any_of(NoLintBlocks, [&](const NoLintBlockToken &NoLintBlock) {
+    return NoLintBlock.suppresses(DiagPos, DiagName);
+  });
+}
+
+// Get the file contents as a string.
+static Optional<StringRef> getBuffer(const SourceManager &SrcMgr, FileID File,
+                                     bool AllowIO) {
+  return AllowIO ? SrcMgr.getBufferDataOrNone(File)
+                 : SrcMgr.getBufferDataIfLoaded(File);
+}
+
+// We will check for NOLINTs and NOLINTNEXTLINEs first. Checking for these is
+// not so expensive (just need to parse the current and previous lines). Only if
+// that fails do we look for NOLINT(BEGIN/END) blocks (which requires reading
+// the entire file).
+bool NoLintDirectiveHandler::Impl::diagHasNoLint(
+    StringRef DiagName, SourceLocation DiagLoc, const SourceManager &SrcMgr,
+    SmallVectorImpl<tooling::Diagnostic> &NoLintErrors, bool AllowIO,
+    bool EnableNoLintBlocks) {
+  // Translate the diagnostic's SourceLocation to a raw file + offset pair.
+  FileID File;
+  unsigned int Pos = 0;
+  std::tie(File, Pos) = SrcMgr.getDecomposedSpellingLoc(DiagLoc);
+
+  // We will only see NOLINTs in user-authored sources. No point reading the
+  // file if it is a <built-in>.
+  Optional<StringRef> FileName = SrcMgr.getNonBuiltinFilenameForID(File);
+  if (!FileName)
+    return false;
+
+  // Get file contents.
+  Optional<StringRef> Buffer = getBuffer(SrcMgr, File, AllowIO);
+  if (!Buffer)
+    return false;
+
+  // Check if there's a NOLINT on this line.
+  auto ThisLine = getLineStartAndEnd(*Buffer, Pos);
+  if (lineHasNoLint(*Buffer, ThisLine, NoLintType::NoLint, DiagName))
+    return true;
+
+  // Check if there's a NOLINTNEXTLINE on the previous line.
+  if (ThisLine.first > 0) {
+    auto PrevLine = getLineStartAndEnd(*Buffer, ThisLine.first - 1);
+    if (lineHasNoLint(*Buffer, PrevLine, NoLintType::NoLintNextLine, DiagName))
+      return true;
+  }
+
+  // Check if this line is within a NOLINT(BEGIN/END) block.
+  if (!EnableNoLintBlocks)
+    return false;
+
+  // Do we have cached NOLINT block locations for this file?
+  if (Cache.count(*FileName) == 0)
+    // Warning: heavy operation - need to read entire file.
+    generateCache(SrcMgr, *FileName, File, *Buffer, NoLintErrors);
+
+  return withinNoLintBlock(Cache[*FileName], Pos, DiagName);
+}
+
+// Construct a [clang-tidy-nolint] diagnostic to do with the unmatched
+// NOLINT(BEGIN/END) pair.
+static tooling::Diagnostic makeNoLintError(const SourceManager &SrcMgr,
+                                           FileID File,
+                                           const NoLintToken &NoLint) {
+  tooling::Diagnostic Error;
+  Error.DiagLevel = tooling::Diagnostic::Error;
+  Error.DiagnosticName = "clang-tidy-nolint";
+  StringRef Message =
+      (NoLint.Type == NoLintType::NoLintBegin)
+          ? ("unmatched 'NOLINTBEGIN' comment without a subsequent 'NOLINT"
+             "END' comment")
+          : ("unmatched 'NOLINTEND' comment without a previous 'NOLINT"
+             "BEGIN' comment");
+  SourceLocation Loc = SrcMgr.getComposedLoc(File, NoLint.Pos);
+  Error.Message = tooling::DiagnosticMessage(Message, SrcMgr, Loc);
+  return Error;
+}
+
+// Find all NOLINT(BEGIN/END) blocks in a file and store in the cache.
+void NoLintDirectiveHandler::Impl::generateCache(
+    const SourceManager &SrcMgr, StringRef FileName, FileID File,
+    StringRef Buffer, SmallVectorImpl<tooling::Diagnostic> &NoLintErrors) {
+  // Read entire file to get all NOLINTs.
+  SmallVector<NoLintToken> NoLints = getNoLints(Buffer);
+
+  // Match each BEGIN with its corresponding END.
+  SmallVector<NoLintToken> UnmatchedTokens;
+  Cache[FileName] = formNoLintBlocks(std::move(NoLints), UnmatchedTokens);
+
+  // Raise error for any BEGIN/END left over.
+  for (const NoLintToken &NoLint : UnmatchedTokens)
+    NoLintErrors.emplace_back(makeNoLintError(SrcMgr, File, NoLint));
+}
+
+//===----------------------------------------------------------------------===//
+// NoLintDirectiveHandler
+//===----------------------------------------------------------------------===//
+
+NoLintDirectiveHandler::NoLintDirectiveHandler()
+    : PImpl(std::make_unique<Impl>()) {}
+
+NoLintDirectiveHandler::~NoLintDirectiveHandler() = default;
+
+bool NoLintDirectiveHandler::shouldSuppress(
+    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Diag,
+    StringRef DiagName, SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+    bool AllowIO, bool EnableNoLintBlocks) {
+  return PImpl->shouldSuppress(DiagLevel, Diag, DiagName, NoLintErrors, AllowIO,
+                               EnableNoLintBlocks);
+}
+
+} // namespace tidy
+} // namespace clang
diff --git a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h
new file mode 100644
index 0000000000000..db9fd593283c7
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h
@@ -0,0 +1,51 @@
+//===-- clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h ----*- C++ *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_NOLINTDIRECTIVEHANDLER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_NOLINTDIRECTIVEHANDLER_H
+
+#include "clang/Basic/Diagnostic.h"
+#include "llvm/ADT/StringRef.h"
+#include <memory>
+
+namespace clang {
+namespace tooling {
+struct Diagnostic;
+} // namespace tooling
+} // namespace clang
+
+namespace llvm {
+template <typename T> class SmallVectorImpl;
+} // namespace llvm
+
+namespace clang {
+namespace tidy {
+
+/// This class is used to locate NOLINT comments in the file being analyzed, to
+/// decide whether a diagnostic should be suppressed.
+/// This class keeps a cache of every NOLINT comment found so that files do not
+/// have to be repeatedly parsed each time a new diagnostic is raised.
+class NoLintDirectiveHandler {
+public:
+  NoLintDirectiveHandler();
+  ~NoLintDirectiveHandler();
+
+  bool shouldSuppress(DiagnosticsEngine::Level DiagLevel,
+                      const Diagnostic &Diag, llvm::StringRef DiagName,
+                      llvm::SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+                      bool AllowIO, bool EnableNoLintBlocks);
+
+private:
+  class Impl;
+  std::unique_ptr<Impl> PImpl;
+};
+
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_NOLINTDIRECTIVEHANDLER_H
diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp
index 9c64c645130bf..86c4b2151243c 100644
--- a/clang-tools-extra/clangd/ParsedAST.cpp
+++ b/clang-tools-extra/clangd/ParsedAST.cpp
@@ -10,6 +10,7 @@
 #include "../clang-tidy/ClangTidyCheck.h"
 #include "../clang-tidy/ClangTidyDiagnosticConsumer.h"
 #include "../clang-tidy/ClangTidyModuleRegistry.h"
+#include "../clang-tidy/NoLintDirectiveHandler.h"
 #include "AST.h"
 #include "Compiler.h"
 #include "Config.h"
@@ -468,12 +469,11 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs,
           bool IsInsideMainFile =
               Info.hasSourceManager() &&
               isInsideMainFile(Info.getLocation(), Info.getSourceManager());
-          SmallVector<tidy::ClangTidyError, 1> TidySuppressedErrors;
-          if (IsInsideMainFile &&
-              tidy::shouldSuppressDiagnostic(DiagLevel, Info, *CTContext,
-                                             TidySuppressedErrors,
-                                             /*AllowIO=*/false,
-                                             /*EnableNolintBlocks=*/false)) {
+          SmallVector<tooling::Diagnostic, 1> TidySuppressedErrors;
+          if (IsInsideMainFile && CTContext->shouldSuppressDiagnostic(
+                                      DiagLevel, Info, TidySuppressedErrors,
+                                      /*AllowIO=*/false,
+                                      /*EnableNolintBlocks=*/false)) {
             // FIXME: should we expose the suppression error (invalid use of
             // NOLINT comments)?
             return DiagnosticsEngine::Ignored;
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/1st-translation-unit.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/1st-translation-unit.cpp
new file mode 100644
index 0000000000000..d90fc069c9789
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/1st-translation-unit.cpp
@@ -0,0 +1,5 @@
+// NOLINTBEGIN
+class A { A(int i); };
+// NOLINTEND
+
+class B { B(int i); };
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/2nd-translation-unit.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/2nd-translation-unit.cpp
new file mode 100644
index 0000000000000..0497037d31b82
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/2nd-translation-unit.cpp
@@ -0,0 +1,6 @@
+
+class A { A(int i); };
+
+// NOLINTBEGIN
+class B { B(int i); };
+// NOLINTEND
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp
index 4b80bbe4eb3ca..9ef194232b6b7 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp
@@ -28,11 +28,56 @@ class C4 { C4(int i); }; // NOLINT(google-explicit-constructor)
 
 class C5 { C5(int i); }; // NOLINT(some-check, google-explicit-constructor)
 
-class C6 { C6(int i); }; // NOLINT without-brackets-skip-all, another-check
+class C6 { C6(int i); }; // NOLINT without-brackets-skip-all
 
-class C7 { C7(int i); }; // NOLINTNEXTLINE doesn't get misconstrued as a NOLINT
+// Other NOLINT* types (e.g. NEXTLINE) should not be misconstrued as a NOLINT:
+class C7 { C7(int i); }; // NOLINTNEXTLINE
 // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: single-argument constructors must be marked explicit
 
+// NOLINT must be UPPERCASE:
+// NOLINTnextline
+class C8 { C8(int i); }; // nolint
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: single-argument constructors must be marked explicit
+
+// Unrecognized marker:
+// NOLINTNEXTLINEXYZ
+class C9 { C9(int i); }; // NOLINTXYZ
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: single-argument constructors must be marked explicit
+
+// C-style comments are supported:
+class C10 { C10(int i); }; /* NOLINT */
+/* NOLINT */ class C11 { C11(int i); };
+
+// Multiple NOLINTs in the same comment:
+class C12 { C12(int i); }; // NOLINT(some-other-check) NOLINT(google-explicit-constructor)
+class C13 { C13(int i); }; // NOLINT(google-explicit-constructor) NOLINT(some-other-check)
+class C14 { C14(int i); }; // NOLINTNEXTLINE(some-other-check) NOLINT(google-explicit-constructor)
+
+// NOLINTNEXTLINE(google-explicit-constructor) NOLINT(some-other-check)
+class C15 { C15(int i); }; 
+
+// Any text after a NOLINT expression is treated as a comment:
+class C16 { C16(int i); }; // NOLINT: suppress check because <reason>
+class C17 { C17(int i); }; // NOLINT(google-explicit-constructor): suppress check because <reason>
+
+// NOLINT must appear in its entirety on one line:
+class C18 { C18(int i); }; /* NOL
+INT */
+// CHECK-MESSAGES: :[[@LINE-2]]:13: warning: single-argument constructors must be marked explicit
+
+/* NO
+LINT */ class C19 { C19(int i); };
+// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: single-argument constructors must be marked explicit
+
+// Spaces between items in the comma-separated check list are ignroed:
+class C20 { C20(int i); }; // NOLINT( google-explicit-constructor )
+class C21 { C21(int i); }; // NOLINT( google-explicit-constructor , some-other-check )
+class C22 { C22(int i); }; // NOLINT(google-explicit- constructor)
+// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: single-argument constructors must be marked explicit
+
+// If there is a space between "NOLINT" and the bracket, it is treated as a regular NOLINT: 
+class C23 { C23(int i); }; // NOLINT (some-other-check)
+
 void f() {
   int i;
 // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: unused variable 'i' [clang-diagnostic-unused-variable]
@@ -71,4 +116,4 @@ int array2[10];  // NOLINT(cppcoreguidelines-avoid-c-arrays)
 int array3[10];  // NOLINT(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
 int array4[10];  // NOLINT(*-avoid-c-arrays)
 
-// CHECK-MESSAGES: Suppressed 23 warnings (23 NOLINT)
+// CHECK-MESSAGES: Suppressed 34 warnings (34 NOLINT)
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-LIFO.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-LIFO.cpp
new file mode 100644
index 0000000000000..ee5b1cca755ab
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-LIFO.cpp
@@ -0,0 +1,19 @@
+// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor,google-readability-casting' 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(google-explicit-constructor)
+// NOLINTBEGIN(google-readability-casting)
+class A { A(int i); };
+auto Num = (unsigned int)(-1);
+// NOLINTEND(google-explicit-constructor)
+// NOLINTEND(google-readability-casting)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-10]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-11]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-10]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-glob.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-glob.cpp
new file mode 100644
index 0000000000000..90b9fa9883024
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-glob.cpp
@@ -0,0 +1,16 @@
+// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor' 2>&1 | FileCheck %s
+
+// NOLINTBEGIN
+class B { B(int i); };
+// NOLINTEND(*)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-specific.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-specific.cpp
new file mode 100644
index 0000000000000..6ffa914e4ef0b
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-specific.cpp
@@ -0,0 +1,16 @@
+// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor' 2>&1 | FileCheck %s
+
+// NOLINTBEGIN
+class A { A(int i); };
+// NOLINTEND(google-explicit-constructor)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-all.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-all.cpp
new file mode 100644
index 0000000000000..3697d5c11e2e2
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-all.cpp
@@ -0,0 +1,16 @@
+// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(*)
+class B { B(int i); };
+// NOLINTEND
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-specific.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-specific.cpp
new file mode 100644
index 0000000000000..5bdb117f20242
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-specific.cpp
@@ -0,0 +1,16 @@
+// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(*)
+class B { B(int i); };
+// NOLINTEND(google-explicit-constructor)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-global-end-specific.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-global-end-specific.cpp
deleted file mode 100644
index 01e69ddf6352a..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-global-end-specific.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor' 2>&1 | FileCheck %s
-
-// NOLINTBEGIN
-class A { A(int i); };
-// NOLINTEND(google-explicit-constructor)
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-6]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-6]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
-
-// NOLINTBEGIN
-class B { B(int i); };
-// NOLINTEND(*)
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-6]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-6]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-multiple-end-single.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-multiple-end-single.cpp
new file mode 100644
index 0000000000000..150913ce0ec69
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-multiple-end-single.cpp
@@ -0,0 +1,22 @@
+// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor,google-readability-casting' 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(google-explicit-constructor,google-readability-casting)
+class B { B(int i); };
+// NOLINTEND(google-explicit-constructor)
+auto Num2 = (unsigned int)(-1);
+// NOLINTEND(google-readability-casting)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-11]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-11]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-13]]:13: warning: C-style casts are discouraged; use static_cast
+// CHECK: :[[@LINE-13]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-single-end-multiple.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-single-end-multiple.cpp
new file mode 100644
index 0000000000000..f9a915c890cbb
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-single-end-multiple.cpp
@@ -0,0 +1,22 @@
+// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor,google-readability-casting' 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(google-explicit-constructor)
+// NOLINTBEGIN(google-readability-casting)
+class B { B(int i); };
+auto Num2 = (unsigned int)(-1);
+// NOLINTEND(google-explicit-constructor,google-readability-casting)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-11]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-13]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-13]]:13: warning: C-style casts are discouraged; use static_cast
+// CHECK: :[[@LINE-13]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-all.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-all.cpp
new file mode 100644
index 0000000000000..decfe2dd5a4c1
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-all.cpp
@@ -0,0 +1,16 @@
+// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(google-explicit-constructor)
+class A { A(int i); };
+// NOLINTEND
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-glob.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-glob.cpp
new file mode 100644
index 0000000000000..a9f904ccce138
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-glob.cpp
@@ -0,0 +1,16 @@
+// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(google-explicit-constructor)
+class A { A(int i); };
+// NOLINTEND(*)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-global.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-global.cpp
deleted file mode 100644
index c6fcfddd09ba8..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-global.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
-
-// NOLINTBEGIN(google-explicit-constructor)
-class A { A(int i); };
-// NOLINTEND
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-6]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-6]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
-
-// NOLINTBEGIN(*)
-class B { B(int i); };
-// NOLINTEND
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-6]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-6]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-check-names.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-check-names.cpp
index 957b0b341a3cc..8d7786fb8c712 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-check-names.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-check-names.cpp
@@ -16,24 +16,3 @@ auto Num = (unsigned int)(-1);
 // CHECK: :[[@LINE-10]]:4: error: unmatched 'NOLIN
 // CHECK: TEND' comment without a previous 'NOLIN
 // CHECK: TBEGIN' comment [clang-tidy-nolint]
-
-// NOLINTBEGIN(google-explicit-constructor,google-readability-casting)
-class B { B(int i); };
-// NOLINTEND(google-explicit-constructor)
-auto Num2 = (unsigned int)(-1);
-// NOLINTEND(google-readability-casting)
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
-// CHECK: TBEGIN' comment without a subsequent 'NOLIN
-// CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-11]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-11]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-13]]:13: warning: C-style casts are discouraged; use static_cast
-// CHECK: :[[@LINE-13]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-delims.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-delims.cpp
index 7ed5fb820e509..4b8947e369f92 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-delims.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-delims.cpp
@@ -11,4 +11,3 @@ class A { A(int i); };
 // CHECK: :[[@LINE-8]]:4: error: unmatched 'NOLIN
 // CHECK: TBEGIN' comment without a subsequent 'NOLIN
 // CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-multiple-TUs.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-multiple-TUs.cpp
new file mode 100644
index 0000000000000..773e023cb22c9
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-multiple-TUs.cpp
@@ -0,0 +1,6 @@
+// RUN: clang-tidy %S/Inputs/nolintbeginend/1st-translation-unit.cpp %S/Inputs/nolintbeginend/2nd-translation-unit.cpp --checks='-*,google-explicit-constructor' 2>&1 | FileCheck %s
+
+// CHECK-NOT: 1st-translation-unit.cpp:2:11: warning: single-argument constructors must be marked explicit
+// CHECK: 1st-translation-unit.cpp:5:11: warning: single-argument constructors must be marked explicit
+// CHECK: 2nd-translation-unit.cpp:2:11: warning: single-argument constructors must be marked explicit
+// CHECK-NOT: 2nd-translation-unit.cpp:5:11: warning: single-argument constructors must be marked explicit
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-typo-in-check-name.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-typo-in-check-name.cpp
index 0f2f9994c1fa0..57e1ff331c8ba 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-typo-in-check-name.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-typo-in-check-name.cpp
@@ -11,3 +11,6 @@ class A { A(int i); };
 // CHECK: TBEGIN' comment without a subsequent 'NOLIN
 // CHECK: TEND' comment [clang-tidy-nolint]
 // CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend.cpp
index 8379128530180..ae97bc5b51441 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend.cpp
@@ -58,30 +58,24 @@ class C6 { C6(int i); };
 // NOLINTEND(some-other-check)
 // NOLINTEND(google-explicit-constructor)
 
-// NOLINTBEGIN(google-explicit-constructor)
-// NOLINTBEGIN(some-other-check)
-class C7 { C7(int i); };
-// NOLINTEND(google-explicit-constructor)
-// NOLINTEND(some-other-check)
-
 // NOLINTBEGIN(google-explicit-constructor)
 // NOLINTBEGIN
-class C8 { C8(int i); };
+class C7 { C7(int i); };
 // NOLINTEND
 // NOLINTEND(google-explicit-constructor)
 
 // NOLINTBEGIN
 // NOLINTBEGIN(google-explicit-constructor)
-class C9 { C9(int i); };
+class C8 { C8(int i); };
 // NOLINTEND(google-explicit-constructor)
 // NOLINTEND
 
 // NOLINTBEGIN(not-closed-bracket-is-treated-as-skip-all
-class C10 { C10(int i); };
+class C9 { C9(int i); };
 // NOLINTEND(not-closed-bracket-is-treated-as-skip-all
 
 // NOLINTBEGIN without-brackets-skip-all, another-check
-class C11 { C11(int i); };
+class C10 { C10(int i); };
 // NOLINTEND without-brackets-skip-all, another-check
 
 #define MACRO(X) class X { X(int i); };
@@ -114,28 +108,28 @@ MACRO_WRAPPED_WITH_NO_LINT
 MACRO_NO_LINT_INSIDE_MACRO
 
 // NOLINTBEGIN(google*)
-class C12 { C12(int i); };
+class C11 { C11(int i); };
 // NOLINTEND(google*)
 
 // NOLINTBEGIN(*explicit-constructor)
-class C15 { C15(int i); };
+class C12 { C12(int i); };
 // NOLINTEND(*explicit-constructor)
 
 // NOLINTBEGIN(*explicit*)
-class C16 { C16(int i); };
+class C13 { C13(int i); };
 // NOLINTEND(*explicit*)
 
 // NOLINTBEGIN(-explicit-constructor)
-class C17 { C17(int x); };
+class C14 { C14(int x); };
 // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: single-argument constructors must be marked explicit
 // NOLINTEND(-explicit-constructor)
 
 // NOLINTBEGIN(google*,-google*)
-class C18 { C18(int x); };
+class C15 { C15(int x); };
 // NOLINTEND(google*,-google*)
 
 // NOLINTBEGIN(*,-google*)
-class C19 { C19(int x); };
+class C16 { C16(int x); };
 // NOLINTEND(*,-google*)
 
 int array1[10];
@@ -154,4 +148,4 @@ int array3[10];
 int array4[10];
 // NOLINTEND(*-avoid-c-arrays)
 
-// CHECK-MESSAGES: Suppressed 27 warnings (27 NOLINT).
+// CHECK-MESSAGES: Suppressed 26 warnings (26 NOLINT).

From 3271f43680da8126eada63340a19a79fe8b3943b Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Wed, 26 Jan 2022 17:48:32 +0700
Subject: [PATCH 693/946] [Test] Add test for PR53419

These tests demonstrate how suboptimal is the lowering of
equality checks for short vectors.
---
 llvm/test/CodeGen/X86/pr53419.ll | 81 ++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/pr53419.ll

diff --git a/llvm/test/CodeGen/X86/pr53419.ll b/llvm/test/CodeGen/X86/pr53419.ll
new file mode 100644
index 0000000000000..b5431c9dcb406
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr53419.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
+
+; FIXME: All four versions are semantically equivalent and should produce same asm as scalar version.
+define i1 @intrinsic_version(i8* align 1 %arg, i8* align 1 %arg1, i32 %arg2) {
+; AVX-LABEL: intrinsic_version:
+; AVX:       # %bb.0: # %bb
+; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
+bb:
+  %ptr1 = bitcast i8* %arg1 to <4 x i8>*
+  %ptr2 = bitcast i8* %arg to <4 x i8>*
+  %lhs = load <4 x i8>, <4 x i8>* %ptr1, align 1
+  %rhs = load <4 x i8>, <4 x i8>* %ptr2, align 1
+  %cmp = icmp eq <4 x i8> %lhs, %rhs
+  %all_eq = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %cmp)
+  ret i1 %all_eq
+}
+
+define i1 @vector_version(i8* align 1 %arg, i8* align 1 %arg1) {
+; AVX-LABEL: vector_version:
+; AVX:       # %bb.0: # %bb
+; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; AVX-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vptest %xmm0, %xmm0
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
+bb:
+  %ptr1 = bitcast i8* %arg1 to <4 x i8>*
+  %ptr2 = bitcast i8* %arg to <4 x i8>*
+  %lhs = load <4 x i8>, <4 x i8>* %ptr1, align 1
+  %rhs = load <4 x i8>, <4 x i8>* %ptr2, align 1
+  %any_ne = icmp ne <4 x i8> %lhs, %rhs
+  %any_ne_scalar = bitcast <4 x i1> %any_ne to i4
+  %all_eq = icmp eq i4 %any_ne_scalar, 0
+  ret i1 %all_eq
+}
+
+define i1 @mixed_version(i8* align 1 %arg, i8* align 1 %arg1) {
+; AVX-LABEL: mixed_version:
+; AVX:       # %bb.0: # %bb
+; AVX-NEXT:    movl (%rsi), %eax
+; AVX-NEXT:    cmpl (%rdi), %eax
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
+bb:
+  %ptr1 = bitcast i8* %arg1 to <4 x i8>*
+  %ptr2 = bitcast i8* %arg to <4 x i8>*
+  %lhs = load <4 x i8>, <4 x i8>* %ptr1, align 1
+  %rhs = load <4 x i8>, <4 x i8>* %ptr2, align 1
+  %lhs_s = bitcast <4 x i8> %lhs to i32
+  %rhs_s = bitcast <4 x i8> %rhs to i32
+  %all_eq = icmp eq i32 %lhs_s, %rhs_s
+  ret i1 %all_eq
+}
+
+define i1 @scalar_version(i8* align 1 %arg, i8* align 1 %arg1) {
+; AVX-LABEL: scalar_version:
+; AVX:       # %bb.0: # %bb
+; AVX-NEXT:    movl (%rsi), %eax
+; AVX-NEXT:    cmpl (%rdi), %eax
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
+bb:
+  %ptr1 = bitcast i8* %arg1 to i32*
+  %ptr2 = bitcast i8* %arg to i32*
+  %lhs = load i32, i32* %ptr1, align 1
+  %rhs = load i32, i32* %ptr2, align 1
+  %all_eq = icmp eq i32 %lhs, %rhs
+  ret i1 %all_eq
+}

From ee0c3820f8862aaf39d7ad542672092c35dda266 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Fri, 29 Oct 2021 16:09:03 +0100
Subject: [PATCH 694/946] [lldb][AArch64] Add MakeTaggedRanges to
 MemoryTagManager

This is to be used when you want to know what subranges
of a larger range have memory tagging. Like MakeTaggedRange
but memory without tags is skipped and you get a list of ranges back.

Will be used later by DumpDataExtractor to show memory tags.

MakeTaggedRanges assumes that the memory regions it is
given are sorted in ascending order and do not overlap.
For the current use case where you get regions from
GetMemoryRegions and are on some Linux like OS, this is
reasonable to assume.

I've used asserts to check those conditions. In future
any API binding will check them up front to prevent a crash.

Reviewed By: omjavaid

Differential Revision: https://reviews.llvm.org/D112824
---
 lldb/include/lldb/Target/MemoryTagManager.h   |  19 ++-
 .../Utility/MemoryTagManagerAArch64MTE.cpp    | 105 ++++++++++++++-
 .../Utility/MemoryTagManagerAArch64MTE.h      |   4 +
 .../MemoryTagManagerAArch64MTETest.cpp        | 124 ++++++++++++++++++
 4 files changed, 244 insertions(+), 8 deletions(-)

diff --git a/lldb/include/lldb/Target/MemoryTagManager.h b/lldb/include/lldb/Target/MemoryTagManager.h
index ed4d04f712ab5..859c43cb437a1 100644
--- a/lldb/include/lldb/Target/MemoryTagManager.h
+++ b/lldb/include/lldb/Target/MemoryTagManager.h
@@ -64,7 +64,7 @@ class MemoryTagManager {
   //   (which may include one or more memory regions)
   //
   // If so, return a modified range which will have been expanded
-  // to be granule aligned.
+  // to be granule aligned. Otherwise return an error.
   //
   // Tags in the input addresses are ignored and not present
   // in the returned range.
@@ -72,6 +72,23 @@ class MemoryTagManager {
       lldb::addr_t addr, lldb::addr_t end_addr,
       const lldb_private::MemoryRegionInfos &memory_regions) const = 0;
 
+  // Given a range addr to end_addr, check that end_addr >= addr.
+  // If it is not, return an error saying so.
+  // Otherwise, granule align it and return a set of ranges representing
+  // subsections of the aligned range that have memory tagging enabled.
+  //
+  // Basically a sparse version of MakeTaggedRange. Use this when you
+  // want to know which parts of a larger range have memory tagging.
+  //
+  // Regions in memory_regions should be sorted in ascending order and
+  // not overlap. (use Process GetMemoryRegions)
+  //
+  // Tags in the input addresses are ignored and not present
+  // in the returned ranges.
+  virtual llvm::Expected<std::vector<TagRange>> MakeTaggedRanges(
+      lldb::addr_t addr, lldb::addr_t end_addr,
+      const lldb_private::MemoryRegionInfos &memory_regions) const = 0;
+
   // Return the type value to use in GDB protocol qMemTags packets to read
   // allocation tags. This is named "Allocation" specifically because the spec
   // allows for logical tags to be read the same way, though we do not use that.
diff --git a/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.cpp b/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.cpp
index fba23401c88f9..b71de4cadb185 100644
--- a/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.cpp
+++ b/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.cpp
@@ -7,6 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "MemoryTagManagerAArch64MTE.h"
+#include "llvm/Support/Error.h"
+#include <assert.h>
 
 using namespace lldb_private;
 
@@ -66,6 +68,15 @@ MemoryTagManagerAArch64MTE::ExpandToGranule(TagRange range) const {
   return TagRange(new_start, new_len);
 }
 
+static llvm::Error MakeInvalidRangeErr(lldb::addr_t addr,
+                                       lldb::addr_t end_addr) {
+  return llvm::createStringError(
+      llvm::inconvertibleErrorCode(),
+      "End address (0x%" PRIx64
+      ") must be greater than the start address (0x%" PRIx64 ")",
+      end_addr, addr);
+}
+
 llvm::Expected<MemoryTagManager::TagRange>
 MemoryTagManagerAArch64MTE::MakeTaggedRange(
     lldb::addr_t addr, lldb::addr_t end_addr,
@@ -74,13 +85,8 @@ MemoryTagManagerAArch64MTE::MakeTaggedRange(
   // We must remove tags here otherwise an address with a higher
   // tag value will always be > the other.
   ptrdiff_t len = AddressDiff(end_addr, addr);
-  if (len <= 0) {
-    return llvm::createStringError(
-        llvm::inconvertibleErrorCode(),
-        "End address (0x%" PRIx64
-        ") must be greater than the start address (0x%" PRIx64 ")",
-        end_addr, addr);
-  }
+  if (len <= 0)
+    return MakeInvalidRangeErr(addr, end_addr);
 
   // Region addresses will not have memory tags. So when searching
   // we must use an untagged address.
@@ -123,6 +129,91 @@ MemoryTagManagerAArch64MTE::MakeTaggedRange(
   return tag_range;
 }
 
+llvm::Expected<std::vector<MemoryTagManager::TagRange>>
+MemoryTagManagerAArch64MTE::MakeTaggedRanges(
+    lldb::addr_t addr, lldb::addr_t end_addr,
+    const lldb_private::MemoryRegionInfos &memory_regions) const {
+  // First check that the range is not inverted.
+  // We must remove tags here otherwise an address with a higher
+  // tag value will always be > the other.
+  ptrdiff_t len = AddressDiff(end_addr, addr);
+  if (len <= 0)
+    return MakeInvalidRangeErr(addr, end_addr);
+
+  std::vector<MemoryTagManager::TagRange> tagged_ranges;
+  // No memory regions means no tagged memory at all
+  if (memory_regions.empty())
+    return tagged_ranges;
+
+  // For the logic to work regions must be in ascending order
+  // which is what you'd have if you used GetMemoryRegions.
+  assert(std::is_sorted(
+      memory_regions.begin(), memory_regions.end(),
+      [](const MemoryRegionInfo &lhs, const MemoryRegionInfo &rhs) {
+        return lhs.GetRange().GetRangeBase() < rhs.GetRange().GetRangeBase();
+      }));
+
+  // If we're debugging userspace in an OS like Linux that uses an MMU,
+  // the only reason we'd get overlapping regions is incorrect data.
+  // It is possible that won't hold for embedded with memory protection
+  // units (MPUs) that allow overlaps.
+  //
+  // For now we're going to assume the former, as there is no good way
+  // to handle overlaps. For example:
+  // < requested range >
+  // [--  region 1   --]
+  //           [-- region 2--]
+  // Where the first region will reduce the requested range to nothing
+  // and exit early before it sees the overlap.
+  MemoryRegionInfos::const_iterator overlap = std::adjacent_find(
+      memory_regions.begin(), memory_regions.end(),
+      [](const MemoryRegionInfo &lhs, const MemoryRegionInfo &rhs) {
+        return rhs.GetRange().DoesIntersect(lhs.GetRange());
+      });
+  UNUSED_IF_ASSERT_DISABLED(overlap);
+  assert(overlap == memory_regions.end());
+
+  // Region addresses will not have memory tags so when searching
+  // we must use an untagged address.
+  MemoryRegionInfo::RangeType range(RemoveTagBits(addr), len);
+  range = ExpandToGranule(range);
+
+  // While there are regions to check and the range has non zero length
+  for (const MemoryRegionInfo &region : memory_regions) {
+    // If range we're checking has been reduced to zero length, exit early
+    if (!range.IsValid())
+      break;
+
+    // If the region doesn't overlap the range at all, ignore it.
+    if (!region.GetRange().DoesIntersect(range))
+      continue;
+
+    // If it's tagged record this sub-range.
+    // (assuming that it's already granule aligned)
+    if (region.GetMemoryTagged()) {
+      // The region found may extend outside the requested range.
+      // For example the first region might start before the range.
+      // We must only add what covers the requested range.
+      lldb::addr_t start =
+          std::max(range.GetRangeBase(), region.GetRange().GetRangeBase());
+      lldb::addr_t end =
+          std::min(range.GetRangeEnd(), region.GetRange().GetRangeEnd());
+      tagged_ranges.push_back(MemoryTagManager::TagRange(start, end - start));
+    }
+
+    // Move the range up to start at the end of the region.
+    lldb::addr_t old_end = range.GetRangeEnd();
+    // This "slides" the range so it moves the end as well.
+    range.SetRangeBase(region.GetRange().GetRangeEnd());
+    // So we set the end back to the original end address after sliding it up.
+    range.SetRangeEnd(old_end);
+    // (if the above were to try to set end < begin the range will just be set
+    // to 0 size)
+  }
+
+  return tagged_ranges;
+}
+
 llvm::Expected<std::vector<lldb::addr_t>>
 MemoryTagManagerAArch64MTE::UnpackTagsData(const std::vector<uint8_t> &tags,
                                            size_t granules /*=0*/) const {
diff --git a/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.h b/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.h
index 7825ba0a92490..7cda728b140f0 100644
--- a/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.h
+++ b/lldb/source/Plugins/Process/Utility/MemoryTagManagerAArch64MTE.h
@@ -36,6 +36,10 @@ class MemoryTagManagerAArch64MTE : public MemoryTagManager {
       lldb::addr_t addr, lldb::addr_t end_addr,
       const lldb_private::MemoryRegionInfos &memory_regions) const override;
 
+  llvm::Expected<std::vector<TagRange>> MakeTaggedRanges(
+      lldb::addr_t addr, lldb::addr_t end_addr,
+      const lldb_private::MemoryRegionInfos &memory_regions) const override;
+
   llvm::Expected<std::vector<lldb::addr_t>>
   UnpackTagsData(const std::vector<uint8_t> &tags,
                  size_t granules = 0) const override;
diff --git a/lldb/unittests/Process/Utility/MemoryTagManagerAArch64MTETest.cpp b/lldb/unittests/Process/Utility/MemoryTagManagerAArch64MTETest.cpp
index 1a34b05ac671a..78688aef3fd8a 100644
--- a/lldb/unittests/Process/Utility/MemoryTagManagerAArch64MTETest.cpp
+++ b/lldb/unittests/Process/Utility/MemoryTagManagerAArch64MTETest.cpp
@@ -165,6 +165,14 @@ TEST(MemoryTagManagerAArch64MTETest, MakeTaggedRange) {
       llvm::FailedWithMessage(
           "End address (0x0) must be greater than the start address (0x1)"));
 
+  // The inversion check ignores tags in the addresses (MTE tags start at bit
+  // 56).
+  ASSERT_THAT_EXPECTED(
+      manager.MakeTaggedRange((lldb::addr_t)1 << 56,
+                              ((lldb::addr_t)2 << 56) + 0x10, memory_regions),
+      llvm::FailedWithMessage(
+          "Address range 0x0:0x10 is not in a memory tagged region"));
+
   // Adding a single region to cover the whole range
   memory_regions.push_back(MakeRegionInfo(0, 0x1000, true));
 
@@ -247,6 +255,122 @@ TEST(MemoryTagManagerAArch64MTETest, MakeTaggedRange) {
   ASSERT_EQ(*got, expected_range);
 }
 
+TEST(MemoryTagManagerAArch64MTETest, MakeTaggedRanges) {
+  MemoryTagManagerAArch64MTE manager;
+  MemoryRegionInfos memory_regions;
+
+  // Note that MakeTaggedRanges takes start/end address.
+  // Whereas TagRanges and regions take start address and size.
+
+  // Range must not be inverted
+  ASSERT_THAT_EXPECTED(
+      manager.MakeTaggedRanges(1, 0, memory_regions),
+      llvm::FailedWithMessage(
+          "End address (0x0) must be greater than the start address (0x1)"));
+
+  // We remove tags before doing the inversion check, so this is not an error.
+  // Also no regions means no tagged regions returned.
+  // (bit 56 is where MTE tags begin)
+  llvm::Expected<std::vector<MemoryTagManager::TagRange>> got =
+      manager.MakeTaggedRanges((lldb::addr_t)2 << 56,
+                               ((lldb::addr_t)1 << 56) + 0x10, memory_regions);
+  ASSERT_THAT_EXPECTED(got, llvm::Succeeded());
+  ASSERT_EQ(*got, std::vector<MemoryTagManager::TagRange>{});
+
+  // Cover whole range, untagged. No ranges returned.
+  memory_regions.push_back(MakeRegionInfo(0, 0x20, false));
+  got = manager.MakeTaggedRanges(0, 0x20, memory_regions);
+  ASSERT_THAT_EXPECTED(got, llvm::Succeeded());
+  ASSERT_EQ(*got, std::vector<MemoryTagManager::TagRange>{});
+
+  // Make the region tagged and it'll be the one range returned.
+  memory_regions.back().SetMemoryTagged(MemoryRegionInfo::eYes);
+  got = manager.MakeTaggedRanges(0, 0x20, memory_regions);
+  ASSERT_THAT_EXPECTED(got, llvm::Succeeded());
+  ASSERT_EQ(*got, std::vector<MemoryTagManager::TagRange>{
+                      MemoryTagManager::TagRange(0, 0x20)});
+
+  // This region will be trimmed if it's larger than the whole range.
+  memory_regions.clear();
+  memory_regions.push_back(MakeRegionInfo(0, 0x40, true));
+  got = manager.MakeTaggedRanges(0x10, 0x30, memory_regions);
+  ASSERT_THAT_EXPECTED(got, llvm::Succeeded());
+  ASSERT_EQ(*got, std::vector<MemoryTagManager::TagRange>{
+                      MemoryTagManager::TagRange(0x10, 0x20)});
+
+  memory_regions.clear();
+
+  // For the following tests we keep the input regions
+  // in ascending order as MakeTaggedRanges expects.
+
+  // Only start of range is tagged, only that is returned.
+  // Start the region just before the requested range to check
+  // we limit the result to the requested range.
+  memory_regions.push_back(MakeRegionInfo(0, 0x20, true));
+  got = manager.MakeTaggedRanges(0x10, 0x100, memory_regions);
+  ASSERT_THAT_EXPECTED(got, llvm::Succeeded());
+  ASSERT_EQ(*got, std::vector<MemoryTagManager::TagRange>{
+                      MemoryTagManager::TagRange(0x10, 0x10)});
+
+  // Add a tagged region at the end, now we get both
+  // and the middle is untagged.
+  // <tagged: [0x0, 0x20)>
+  // <...>
+  // <tagged: [0xE0, 0x120)>
+  // The range added here is deliberately over the end of the
+  // requested range to show that we trim the end.
+  memory_regions.push_back(MakeRegionInfo(0xE0, 0x40, true));
+  got = manager.MakeTaggedRanges(0x10, 0x110, memory_regions);
+  ASSERT_THAT_EXPECTED(got, llvm::Succeeded());
+
+  std::vector<MemoryTagManager::TagRange> expected{
+      MemoryTagManager::TagRange(0x10, 0x10),
+      MemoryTagManager::TagRange(0xE0, 0x30)};
+  ASSERT_EQ(*got, expected);
+
+  // Now add a middle tagged region.
+  // <tagged: [0x0, 0x20)>
+  // <...>
+  // <tagged: [0x90, 0xB0)>
+  // <...>
+  // <tagged: [0xE0, 0x120)>
+  memory_regions.insert(std::next(memory_regions.begin()),
+                        MakeRegionInfo(0x90, 0x20, true));
+
+  // As the given regions are in ascending order, the resulting
+  // tagged ranges are also. So this new range goes in the middle.
+  expected.insert(std::next(expected.begin()),
+                  MemoryTagManager::TagRange(0x90, 0x20));
+  got = manager.MakeTaggedRanges(0x10, 0x110, memory_regions);
+  ASSERT_THAT_EXPECTED(got, llvm::Succeeded());
+  ASSERT_EQ(*got, expected);
+
+  // Then if we add untagged regions in between the tagged,
+  // the output should stay the same.
+  // <tagged:   [0x0, 0x20)>
+  // <untagged: [0x20, 0x90)>
+  // <tagged:   [0x90, 0xB0)>
+  // <untagged: [0xB0, 0xE0)>
+  // <tagged:   [0xE0, 0x120)>
+  memory_regions.insert(std::next(memory_regions.begin()),
+                        MakeRegionInfo(0x20, 0x70, false));
+  memory_regions.insert(std::prev(memory_regions.end()),
+                        MakeRegionInfo(0xB0, 0x30, false));
+  got = manager.MakeTaggedRanges(0x10, 0x110, memory_regions);
+  ASSERT_THAT_EXPECTED(got, llvm::Succeeded());
+  ASSERT_EQ(*got, expected);
+
+  // Finally check that we handle only having the end of the range.
+  memory_regions.clear();
+  expected.clear();
+
+  memory_regions.push_back(MakeRegionInfo(0x100, 0x10, true));
+  expected.push_back(MemoryTagManager::TagRange(0x100, 0x10));
+  got = manager.MakeTaggedRanges(0x10, 0x110, memory_regions);
+  ASSERT_THAT_EXPECTED(got, llvm::Succeeded());
+  ASSERT_EQ(*got, expected);
+}
+
 TEST(MemoryTagManagerAArch64MTETest, RemoveTagBits) {
   MemoryTagManagerAArch64MTE manager;
 

From 66804666638292406d0b7039514642d0a968dcfd Mon Sep 17 00:00:00 2001
From: Sebastian Neubauer <Sebastian.Neubauer@amd.com>
Date: Tue, 25 Jan 2022 15:22:28 +0100
Subject: [PATCH 695/946] [AMDGPU][NFC] Pre-commit regenerated test

---
 .../postlegalizercombiner-select.mir          | 48 ++++++++++---------
 1 file changed, 26 insertions(+), 22 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir
index 75840bfda5631..4262fd9e8f2d2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir
@@ -10,13 +10,14 @@ body:             |
 
     ; GCN-LABEL: name: select_from_different_results_of_unmerge_values
     ; GCN: liveins: $vgpr0
-    ; GCN: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; GCN: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GCN: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
-    ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
-    ; GCN: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV]], [[UV1]]
-    ; GCN: $vgpr0 = COPY [[SELECT]](s32)
-    ; GCN: SI_RETURN_TO_EPILOG $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+    ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
+    ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV]], [[UV1]]
+    ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](s32)
+    ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
     %0:_(<2 x s32>) = G_IMPLICIT_DEF
     %1:_(s32) = COPY $vgpr0
     %2:_(s1) = G_TRUNC %1:_(s32)
@@ -36,11 +37,12 @@ body:             |
 
     ; GCN-LABEL: name: select_from_same_results_of_unmerge_values
     ; GCN: liveins: $vgpr0
-    ; GCN: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; GCN: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[DEF]](<2 x s32>)
-    ; GCN: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64)
-    ; GCN: $vgpr0 = COPY [[TRUNC]](s32)
-    ; GCN: SI_RETURN_TO_EPILOG $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[DEF]](<2 x s32>)
+    ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64)
+    ; GCN-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
+    ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
     %0:_(<2 x s32>) = G_IMPLICIT_DEF
     %1:_(s32) = COPY $vgpr0
     %2:_(s1) = G_TRUNC %1:_(s32)
@@ -60,13 +62,14 @@ body: |
 
     ; GCN-LABEL: name: select_different_result_from_different_unmerge_values_with_the_same_source
     ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
-    ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
-    ; GCN: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32)
-    ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; GCN: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; GCN: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV1]], [[UV7]]
-    ; GCN: $vgpr0 = COPY [[SELECT]](s32)
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4
+    ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32)
+    ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; GCN-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV1]], [[UV7]]
+    ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](s32)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(s1) = G_TRUNC %1:_(s32)
@@ -85,9 +88,10 @@ body: |
 
     ; GCN-LABEL: name: select_same_result_from_different_unmerge_values_with_the_same_source
     ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4
-    ; GCN: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
-    ; GCN: $vgpr0 = COPY [[UV1]](s32)
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
+    ; GCN-NEXT: $vgpr0 = COPY [[UV1]](s32)
     %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(s32) = COPY $vgpr4
     %2:_(s1) = G_TRUNC %1:_(s32)

From 4723f3cf03a90473dc9ae006e65d9019f00cb771 Mon Sep 17 00:00:00 2001
From: Sebastian Neubauer <Sebastian.Neubauer@amd.com>
Date: Tue, 25 Jan 2022 15:20:42 +0100
Subject: [PATCH 696/946] [AMDGPU][GlobalISel] Combine unmerge of undef

Fold (unmerge undef) -> undef, undef, ...

Differential Revision: https://reviews.llvm.org/D118138
---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |  5 +++++
 .../include/llvm/Target/GlobalISel/Combine.td | 11 +++++++++-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 14 ++++++++++++
 .../GlobalISel/call-outgoing-stack-args.ll    |  4 ----
 .../postlegalizer-combiner-unmerge-undef.mir  | 22 +++++++++++++++++++
 .../postlegalizercombiner-select.mir          | 14 ++++--------
 6 files changed, 55 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 1d07d7d6e7aed..45c27c25aea0b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -323,6 +323,11 @@ class CombinerHelper {
   void applyCombineUnmergeConstant(MachineInstr &MI,
                                    SmallVectorImpl<APInt> &Csts);
 
+  /// Transform G_UNMERGE G_IMPLICIT_DEF -> G_IMPLICIT_DEF, G_IMPLICIT_DEF, ...
+  bool
+  matchCombineUnmergeUndef(MachineInstr &MI,
+                           std::function<void(MachineIRBuilder &)> &MatchInfo);
+
   /// Transform X, Y<dead> = G_UNMERGE Z -> X = G_TRUNC Z.
   bool matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
   void applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 9736e52a7b5bb..4859cf6b57b71 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -535,6 +535,14 @@ def unmerge_cst : GICombineRule<
   (apply [{ Helper.applyCombineUnmergeConstant(*${d}, ${info}); }])
 >;
 
+// Fold (unmerge undef) -> undef, undef, ...
+def unmerge_undef : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$info),
+  (match (wip_match_opcode G_UNMERGE_VALUES): $root,
+         [{ return Helper.matchCombineUnmergeUndef(*${root}, ${info}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])
+>;
+
 // Transform x,y<dead> = unmerge z -> x = trunc z.
 def unmerge_dead_to_trunc : GICombineRule<
   (defs root:$d),
@@ -844,7 +852,8 @@ def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      propagate_undef_any_op,
                                      propagate_undef_all_ops,
                                      propagate_undef_shuffle_mask,
-                                     erase_undef_store]>;
+                                     erase_undef_store,
+                                     unmerge_undef]>;
 
 def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
                                         binop_same_val, binop_left_to_zero,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 4b5a19155c672..d6a009744161d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1748,6 +1748,20 @@ void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
   MI.eraseFromParent();
 }
 
+bool CombinerHelper::matchCombineUnmergeUndef(
+    MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+  unsigned SrcIdx = MI.getNumOperands() - 1;
+  Register SrcReg = MI.getOperand(SrcIdx).getReg();
+  MatchInfo = [&MI](MachineIRBuilder &B) {
+    unsigned NumElems = MI.getNumOperands() - 1;
+    for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
+      Register DstReg = MI.getOperand(Idx).getReg();
+      B.buildUndef(DstReg);
+    }
+  };
+  return isa<GImplicitDef>(MRI.getVRegDef(SrcReg));
+}
+
 bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
          "Expected an unmerge");
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
index 0d8f48c1291c4..0519a9c7db321 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
@@ -25,7 +25,6 @@ define amdgpu_kernel void @kernel_caller_stack() {
 ; MUBUF-NEXT:    v_mov_b32_e32 v1, 11
 ; MUBUF-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
 ; MUBUF-NEXT:    v_mov_b32_e32 v1, 12
-; MUBUF-NEXT:    buffer_store_dword v0, v0, s[0:3], 0 offen
 ; MUBUF-NEXT:    s_getpc_b64 s[4:5]
 ; MUBUF-NEXT:    s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4
 ; MUBUF-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12
@@ -38,7 +37,6 @@ define amdgpu_kernel void @kernel_caller_stack() {
 ; FLATSCR-NEXT:    s_add_u32 flat_scratch_lo, s0, s3
 ; FLATSCR-NEXT:    s_mov_b32 s32, 0
 ; FLATSCR-NEXT:    s_addc_u32 flat_scratch_hi, s1, 0
-; FLATSCR-NEXT:    scratch_store_dword off, v0, s32
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 9
 ; FLATSCR-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 10
@@ -256,7 +254,6 @@ define void @func_caller_stack() {
 ; MUBUF-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
 ; MUBUF-NEXT:    v_mov_b32_e32 v1, 12
 ; MUBUF-NEXT:    v_writelane_b32 v40, s31, 1
-; MUBUF-NEXT:    buffer_store_dword v0, v0, s[0:3], 0 offen
 ; MUBUF-NEXT:    s_getpc_b64 s[4:5]
 ; MUBUF-NEXT:    s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4
 ; MUBUF-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12
@@ -281,7 +278,6 @@ define void @func_caller_stack() {
 ; FLATSCR-NEXT:    v_writelane_b32 v40, s33, 2
 ; FLATSCR-NEXT:    s_mov_b32 s33, s32
 ; FLATSCR-NEXT:    s_add_i32 s32, s32, 16
-; FLATSCR-NEXT:    scratch_store_dword off, v0, s32
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 9
 ; FLATSCR-NEXT:    scratch_store_dword off, v0, s32 offset:4
 ; FLATSCR-NEXT:    v_mov_b32_e32 v0, 10
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir
new file mode 100644
index 0000000000000..f0aa0b09a9544
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizer-combiner-unmerge-undef.mir
@@ -0,0 +1,22 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s
+
+---
+name: split_unmerge_undef
+tracksRegLiveness: true
+legalized: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+    ; CHECK-LABEL: name: split_unmerge_undef
+    ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
+    ; CHECK-NEXT: {{  $}}
+    %ptr1:_(p1) = COPY $vgpr0_vgpr1
+    %ptr2:_(p1) = COPY $vgpr2_vgpr3
+    %ptr3:_(p1) = COPY $vgpr4_vgpr5
+    %vec:_(<3 x s32>) = G_IMPLICIT_DEF
+    %p1:_(s32), %p2:_(s32), %p3:_(s32) = G_UNMERGE_VALUES %vec
+    G_STORE %p1:_(s32), %ptr1:_(p1) :: (store (s32), addrspace 1, align 4)
+    G_STORE %p2:_(s32), %ptr2:_(p1) :: (store (s32), addrspace 1, align 4)
+    G_STORE %p3:_(s32), %ptr3:_(p1) :: (store (s32), addrspace 1, align 4)
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir
index 4262fd9e8f2d2..c0f96d210f588 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/postlegalizercombiner-select.mir
@@ -11,12 +11,8 @@ body:             |
     ; GCN-LABEL: name: select_from_different_results_of_unmerge_values
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
-    ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
-    ; GCN-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<2 x s32>)
-    ; GCN-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[TRUNC]](s1), [[UV]], [[UV1]]
-    ; GCN-NEXT: $vgpr0 = COPY [[SELECT]](s32)
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32)
     ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
     %0:_(<2 x s32>) = G_IMPLICIT_DEF
     %1:_(s32) = COPY $vgpr0
@@ -38,10 +34,8 @@ body:             |
     ; GCN-LABEL: name: select_from_same_results_of_unmerge_values
     ; GCN: liveins: $vgpr0
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
-    ; GCN-NEXT: [[BITCAST:%[0-9]+]]:_(s64) = G_BITCAST [[DEF]](<2 x s32>)
-    ; GCN-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[BITCAST]](s64)
-    ; GCN-NEXT: $vgpr0 = COPY [[TRUNC]](s32)
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; GCN-NEXT: $vgpr0 = COPY [[DEF]](s32)
     ; GCN-NEXT: SI_RETURN_TO_EPILOG $vgpr0
     %0:_(<2 x s32>) = G_IMPLICIT_DEF
     %1:_(s32) = COPY $vgpr0

From 04754af925053efdc91fd0cbe045feb7578ad1ae Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 26 Jan 2022 11:33:08 +0000
Subject: [PATCH 697/946] Fix MSVC 'not all control paths return a value'
 warning. NFC.

---
 clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp b/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp
index 65ade4387a5eb..7496e968469ce 100644
--- a/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp
+++ b/clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp
@@ -205,6 +205,7 @@ static bool treatLikePointer(QualType Ty, PLTClass C, ASTContext &Context) {
   case PLTClass::Pointer:
     return isKnownPointerLikeType(Ty, Context);
   }
+  llvm_unreachable("Unknown PLTClass enum");
 }
 
 // FIXME: move over the other `maybe` functionality from Stencil. Should all be

From de3f81557ae33a01862dd19b9db451d00737b67c Mon Sep 17 00:00:00 2001
From: OCHyams <orlando.hyams@sony.com>
Date: Wed, 26 Jan 2022 11:09:21 +0000
Subject: [PATCH 698/946] [Dexter] Remove false requirement of lldb for dexter
 regression tests on Windows

Not quite NFC because a little work was required to configure some tests to run
on Windows at all.

Before this patch on Windows:

    $ llvm-lit cross-project-tests\debuginfo-tests\dexter\feature-tests
       Unsupported: 49
       Passed     : 23

After this patch on Windows:

    $ llvm-lit cross-project-tests\debuginfo-tests\dexter\feature-tests
       Unsupported      : 27
       Passed           : 39
       Expectedly failed:  6

There are 3 main changes here. The first is to add a few more substitutions in
cross-project-tests/lit.cfg.py so that tests need to use specific flags can
still use the dexter regression test defaults for the native platform. These
are:

     %dexter_regression_test_debugger
     %dexter_regression_test_builder
     %dexter_regression_test_cflags
     %dexter_regression_test_ldflags

Tests that now use these options and therefore can be run on Windows too
(though the second is still failing for unknown reasons):

    cross-project-tests/debuginfo-tests/dexte/feature_tests
        /subtools/clang-opt-bisect/clang-opt-bisect.cpp
        /subtools/test/source-root-dir.cpp

The second change is to remove spurious `REQUIRES: system-linux, lldb` and
`UNSUPPORTED: system-windows` directives, and make changes to lit.local.cfg
files that have the same effect. I've also added comments to the genuine
REQUIRES, UNSUPPORTED, and XFAIL directives so it's easier to understand
requirements at a glance. The most common reason for a test to not be supported
on Windows is that it uses DexLimitSteps, DexDeclareAddress, or DexCommandLine,
none of which are supported in the dbgeng driver.

There are two failures on Windows that were previously hidden, which I've
XFAILed:

    cross-project-tests/debuginfo-tests/dexter/feature_tests
        /commands/perfect/dex_finish_test/default_conditional.cpp
        /commands/perfect/dex_finish_test/default_conditional_hit_count.cpp

And two that were easy to fix:

    cross-project-tests/debuginfo-tests/dexter/feature_tests
        /commands/perfect/dex_finish_test/default_simple.cpp
        /commands/perfect/dex_finish_test/default_hit_count.cpp

Lastly, I've set three directories as unsupported.

    cross-project-tests/debuginfo-tests/dexter/feature_tests
        /commands/perfect/limit_steps
        /commands/perfect/dex_declare_address
        /commands/perfect/dex_declare_file

The first two are unsupported on Windows because they contains tests for the
DexLimitSteps and DexDeclareAddress commands which aren't supported in the
dbgeng driver. The third is unsupported on all platforms as the tests involve
invoking clang directly, which isn't currently a supported way of building
tests for dexter in lit (it can cause problems for cross compilers that can
target the host, as the tests use the default triple and linker, which may
be aligned for the default target, not host).

Tested on Windows and Linux.

Reviewed By: jmorse

Differential Revision: https://reviews.llvm.org/D118048
---
 .../commands/penalty/missing_dex_address.cpp  |  3 ++-
 .../commands/perfect/command_line.c           |  3 ++-
 .../dex_declare_address/address_after_ref.cpp |  2 --
 .../dex_declare_address/address_hit_count.cpp |  2 --
 .../expression_address.cpp                    |  2 --
 .../dex_declare_address/identical_address.cpp |  2 --
 .../perfect/dex_declare_address/lit.local.cfg |  3 +++
 .../dex_declare_address/multiple_address.cpp  |  2 --
 .../dex_declare_address/offset_address.cpp    |  2 --
 .../dex_declare_address/self_comparison.cpp   |  2 --
 .../perfect/dex_declare_file/lit.local.cfg    |  3 +++
 .../dex_commands/source_root_dir.dex          |  1 -
 .../dex_finish_test/default_conditional.cpp   |  6 ++---
 .../default_conditional_hit_count.cpp         |  4 +--
 .../dex_finish_test/default_hit_count.cpp     |  4 +--
 .../dex_finish_test/default_simple.cpp        |  8 +++---
 .../limit_steps_conditional.cpp               |  3 ++-
 .../limit_steps_conditional_hit_count.cpp     |  3 ++-
 .../dex_finish_test/limit_steps_hit_count.cpp |  3 ++-
 .../dex_finish_test/limit_steps_simple.cpp    |  3 ++-
 .../perfect/limit_steps/hit_count.cpp         |  2 --
 .../limit_steps_check_json_step_count.cpp     |  2 --
 .../limit_steps/limit_steps_expect_loop.cpp   |  2 --
 .../limit_steps/limit_steps_expect_value.cpp  |  2 --
 .../limit_steps/limit_steps_line_mismatch.cpp |  5 ----
 .../limit_steps_overlapping_ranges.cpp        |  2 --
 .../limit_steps_same_line_conditional.cpp     |  2 --
 .../perfect/limit_steps/lit.local.cfg         |  4 +++
 .../perfect/limit_steps/unconditional.cpp     |  2 --
 .../commands/perfect/lit.local.cfg            |  2 --
 .../clang-opt-bisect/clang-opt-bisect.cpp     | 12 +++++----
 .../subtools/test/address_printing.cpp        |  3 ++-
 .../test/err_limit_steps_no_values.cpp        |  3 +--
 .../subtools/test/label_another_line.cpp      |  5 +---
 .../subtools/test/label_offset.cpp            |  1 -
 .../subtools/test/source-root-dir.cpp         | 12 ++++-----
 .../dexter/feature_tests/subtools/view.cpp    |  1 -
 cross-project-tests/lit.cfg.py                | 27 +++++++++++--------
 38 files changed, 63 insertions(+), 87 deletions(-)
 create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/lit.local.cfg
 create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/lit.local.cfg
 create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/lit.local.cfg
 delete mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/lit.local.cfg

diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/missing_dex_address.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/missing_dex_address.cpp
index 0d3cc0978f640..07a7557e87c84 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/missing_dex_address.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty/missing_dex_address.cpp
@@ -2,7 +2,8 @@
 //      Test that when a \DexDeclareAddress never resolves to a value, it is
 //      counted as a missing value in any \DexExpectWatchValues.
 //
-// REQUIRES: system-linux
+// The dbgeng driver doesn't support \DexDeclareAddress yet.
+// UNSUPPORTED: system-windows
 //
 // RUN: not %dexter_regression_test -- %s | FileCheck %s
 // CHECK: missing_dex_address.cpp
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/command_line.c b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/command_line.c
index 06887707ab858..7ec1050e9baae 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/command_line.c
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/command_line.c
@@ -1,4 +1,5 @@
-// UNSUPPORTED: dbgeng
+// The dbgeng driver doesn't support \DexCommandLine yet.
+// UNSUPPORTED: system-windows
 //
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: command_line.c:
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/address_after_ref.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/address_after_ref.cpp
index 6ce9b8ac2e03c..a91bed42960e1 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/address_after_ref.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/address_after_ref.cpp
@@ -2,8 +2,6 @@
 //      Test that a \DexDeclareAddress value can have its value defined after
 //      the first reference to that value.
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: address_after_ref.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/address_hit_count.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/address_hit_count.cpp
index c064ea90d39aa..4538880352944 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/address_hit_count.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/address_hit_count.cpp
@@ -4,8 +4,6 @@
 //      expression after the target line has been stepped on a given number of
 //      times.
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: address_hit_count.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/expression_address.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/expression_address.cpp
index 72ad1196ec581..9cea93eb06d37 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/expression_address.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/expression_address.cpp
@@ -2,8 +2,6 @@
 //      Test that a \DexDeclareAddress value can be used to compare the
 //      addresses of two local variables that refer to the same address.
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: expression_address.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/identical_address.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/identical_address.cpp
index ea495d31bdfab..4ce590dd7ad17 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/identical_address.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/identical_address.cpp
@@ -2,8 +2,6 @@
 //      Test that a \DexDeclareAddress value can be used to compare two equal
 //      pointer variables.
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: identical_address.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/lit.local.cfg b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/lit.local.cfg
new file mode 100644
index 0000000000000..95de055175d07
--- /dev/null
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/lit.local.cfg
@@ -0,0 +1,3 @@
+# The dbgeng driver doesn't support DexDeclareAddress yet.
+if config.is_msvc:
+    config.unsupported = True
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/multiple_address.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/multiple_address.cpp
index c8bcbea24b3b3..b0ec7958e9709 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/multiple_address.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/multiple_address.cpp
@@ -2,8 +2,6 @@
 //      Test that multiple \DexDeclareAddress references that point to different
 //      addresses can be used within a single \DexExpectWatchValue.
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: multiple_address.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/offset_address.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/offset_address.cpp
index e778cd1075473..6d4f928786e48 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/offset_address.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/offset_address.cpp
@@ -2,8 +2,6 @@
 //      Test that a \DexDeclareAddress value can be used to compare two pointer
 //      variables that have a fixed offset between them.
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: offset_address.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/self_comparison.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/self_comparison.cpp
index 77801b70e4fb6..93d9787b59768 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/self_comparison.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address/self_comparison.cpp
@@ -2,8 +2,6 @@
 //      Test that a \DexDeclareAddress value can be used to check the change in
 //      value of a variable over time, relative to its initial value.
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: self_comparison.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/lit.local.cfg b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/lit.local.cfg
new file mode 100644
index 0000000000000..4fa170f984801
--- /dev/null
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/lit.local.cfg
@@ -0,0 +1,3 @@
+# FIXME: These tests compile code with %clang substitution which needs to run
+# natively but doesn't specify target triple (i.e. default triple is used).
+config.unsupported = True
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/source_root_dir.dex b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/source_root_dir.dex
index 115182315df5e..e186cabfd25bb 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/source_root_dir.dex
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/dex_commands/source_root_dir.dex
@@ -2,7 +2,6 @@
 ##    Check that \DexDeclareFile's file declaration can be made relative to the
 ##    --source-root-dir path.
 
-# REQUIRES: lldb
 # UNSUPPORTED: system-darwin
 
 # RUN: %clang %S/../source/test.cpp -O0 -g -o %t
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_conditional.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_conditional.cpp
index 977f3fa872b56..4dacdbd4f4e95 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_conditional.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_conditional.cpp
@@ -1,11 +1,11 @@
+// FIXME: Feature appears to be broken on Windows with dbgeng.
+// XFAIL: system-windows
 // Purpose:
 //      Test that \DexFinishTest can be used with a condition, so the test exits
 //      when the line referenced by \DexFinishTest is stepped on and the given
 //      condition (x == 5) is satisfied.
 //      Tests using the default controller (no \DexLimitSteps).
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: default_conditional.cpp
 
@@ -15,4 +15,4 @@ int main() {
 }
 
 // DexFinishTest('x', 5, on_line=ref('finish_line'))
-// DexExpectWatchValue('x', 0, 1, 2, 3, 4, 5)
+// DexExpectWatchValue('x', 0, 1, 2, 3, 4, 5, on_line=ref('finish_line'))
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_conditional_hit_count.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_conditional_hit_count.cpp
index 3bda2ed3c0bd6..680dbd0ca1a6a 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_conditional_hit_count.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_conditional_hit_count.cpp
@@ -1,3 +1,5 @@
+// FIXME: Feature appears to be broken on Windows with dbgeng.
+// XFAIL: system-windows
 // Purpose:
 //      Test that \DexFinishTest can be used with a combination of a hit_count
 //      and a condition, so that the test exits after the line referenced
@@ -5,8 +7,6 @@
 //      given number of times.
 //      Tests using the default controller (no \DexLimitSteps).
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: default_conditional_hit_count.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_hit_count.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_hit_count.cpp
index b434dffdfbfc8..1d700d74ba01c 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_hit_count.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_hit_count.cpp
@@ -4,8 +4,6 @@
 //      specific number of times.
 //      Tests using the default controller (no \DexLimitSteps).
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: default_hit_count.cpp
 
@@ -15,4 +13,4 @@ int main() {
 }
 
 // DexFinishTest(on_line=ref('finish_line'), hit_count=5)
-// DexExpectWatchValue('x', 0, 1, 2, 3, 4, 5)
+// DexExpectWatchValue('x', 0, 1, 2, 3, 4, 5, on_line=ref('finish_line'))
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_simple.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_simple.cpp
index dc569f606ff7f..953ba394d8746 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_simple.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/default_simple.cpp
@@ -4,16 +4,14 @@
 //      is stepped on.
 //      Tests using the default controller (no \DexLimitSteps).
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: default_simple.cpp
 
 int main() {
-    int x = 0; // DexLabel('start_line')
-    x = 1;
+    int x = 0;
+    x = 1; // DexLabel('start_line')
     x = 2; // DexLabel('finish_line')
 }
 
 // DexFinishTest(on_line=ref('finish_line'))
-// DexExpectWatchValue('x', 0, 1)
+// DexExpectWatchValue('x', 0, 1, from_line=ref('start_line'), to_line=ref('finish_line'))
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_conditional.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_conditional.cpp
index 91506f069feee..f7a03c1f23e62 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_conditional.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_conditional.cpp
@@ -4,7 +4,8 @@
 //      condition (x == 5) is satisfied.
 //      Test using the conditional controller (using \DexLimitSteps).
 //
-// REQUIRES: system-linux
+// The dbgeng driver doesn't support \DexLimitSteps yet.
+// UNSUPPORTED: system-windows
 //
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: limit_steps_conditional.cpp
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_conditional_hit_count.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_conditional_hit_count.cpp
index 88dedf30bb1da..766ae531d44a9 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_conditional_hit_count.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_conditional_hit_count.cpp
@@ -5,7 +5,8 @@
 //      given number of times.
 //      Test using the conditional controller (using \DexLimitSteps).
 //
-// REQUIRES: system-linux
+// The dbgeng driver doesn't support \DexLimitSteps yet.
+// UNSUPPORTED: system-windows
 //
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: limit_steps_conditional_hit_count.cpp
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_hit_count.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_hit_count.cpp
index c88f5f4d925c5..7c57b1726111c 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_hit_count.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_hit_count.cpp
@@ -4,7 +4,8 @@
 //      specific number of times.
 //      Test using the conditional controller (using \DexLimitSteps).
 //
-// REQUIRES: system-linux
+// The dbgeng driver doesn't support \DexLimitSteps yet.
+// UNSUPPORTED: system-windows
 //
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: limit_steps_hit_count.cpp
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_simple.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_simple.cpp
index 5187b5084d4bc..06836a31e51f7 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_simple.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_finish_test/limit_steps_simple.cpp
@@ -4,7 +4,8 @@
 //      is stepped on.
 //      Test using the conditional controller (using \DexLimitSteps).
 //
-// REQUIRES: system-linux
+// The dbgeng driver doesn't support \DexLimitSteps yet.
+// UNSUPPORTED: system-windows
 //
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: limit_steps_simple.cpp
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/hit_count.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/hit_count.cpp
index e7cd903c756c0..059d501a394f6 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/hit_count.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/hit_count.cpp
@@ -2,8 +2,6 @@
 //      Test that \DexLimitSteps keyword argument hit_count correctly limits
 //      the number of times the command can trigger.
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: hit_count.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_check_json_step_count.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_check_json_step_count.cpp
index 3c96035b9bc49..ac5d2e91a66c7 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_check_json_step_count.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_check_json_step_count.cpp
@@ -1,8 +1,6 @@
 // Purpose:
 //      Check number of step lines are correctly reported in json output.
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test --verbose -- %s | FileCheck %s
 // CHECK: limit_steps_check_json_step_count.cpp
 // CHECK: ## BEGIN ##
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_loop.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_loop.cpp
index 5dc7cd2037826..f2ec583523556 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_loop.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_loop.cpp
@@ -2,8 +2,6 @@
 //      Check the DexLimit steps only gathers step info for 2 iterations of a
 //      for loop.
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: limit_steps_expect_loop.cpp:
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_value.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_value.cpp
index 056531317a36d..8808df640da8c 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_value.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_expect_value.cpp
@@ -1,8 +1,6 @@
 // Purpose:
 //      Ensure that limited stepping breaks for all expected values.
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: limit_steps_expect_value.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_line_mismatch.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_line_mismatch.cpp
index 711c006e3a8df..e56d4e42e2cc7 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_line_mismatch.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_line_mismatch.cpp
@@ -3,11 +3,6 @@
 //      doesn't exist. This can happen due to optimisations or label is on an
 //      empty line.
 //
-// FIXME: Windows regression tests run with dbgeng. \DexLimitSteps isn't yet
-// supported with dbgeng.
-//
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: limit_steps_line_mismatch.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_overlapping_ranges.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_overlapping_ranges.cpp
index 251eef776ac65..6801b249d759e 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_overlapping_ranges.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_overlapping_ranges.cpp
@@ -1,8 +1,6 @@
 // Purpose:
 //      Ensure that multiple overlapping \DexLimitSteps ranges do not interfere.
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: limit_steps_overlapping_ranges.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_same_line_conditional.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_same_line_conditional.cpp
index f71b54acb080d..71c13b76c3992 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_same_line_conditional.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/limit_steps_same_line_conditional.cpp
@@ -1,8 +1,6 @@
 // Purpose:
 //      Test that LimitStep commands can exist on the same from line.
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: limit_steps_same_line_conditional.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/lit.local.cfg b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/lit.local.cfg
new file mode 100644
index 0000000000000..4f3bbe2165a4a
--- /dev/null
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/lit.local.cfg
@@ -0,0 +1,4 @@
+# The dbgeng driver doesn't support DexLimitSteps yet.
+if config.is_msvc:
+    config.unsupported = True
+
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/unconditional.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/unconditional.cpp
index af3ead4b0b24b..d97d51190b9f3 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/unconditional.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/limit_steps/unconditional.cpp
@@ -2,8 +2,6 @@
 //      Test that \DexLimitSteps can be used without a condition (i.e. the
 //      breakpoint range is set any time from_line is stepped on).
 //
-// REQUIRES: system-linux
-//
 // RUN: %dexter_regression_test -- %s | FileCheck %s
 // CHECK: unconditional.cpp
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/lit.local.cfg b/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/lit.local.cfg
deleted file mode 100644
index 7d1dd8b7b9026..0000000000000
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not 'lldb' in config.available_features:
-    config.unsupported = True
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/clang-opt-bisect/clang-opt-bisect.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/clang-opt-bisect/clang-opt-bisect.cpp
index b0e1aabbe8df6..f3cc20ec45bcf 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/clang-opt-bisect/clang-opt-bisect.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/clang-opt-bisect/clang-opt-bisect.cpp
@@ -1,12 +1,14 @@
 // Purpose:
 //     Check the `clang-opt-bisect` tool runs with typical input.
 //
-// REQUIRES: system-linux, lldb
-//
 // RUN: true
-// RUN: %dexter_base clang-opt-bisect --debugger 'lldb' --builder 'clang' \
-// RUN:     --cflags "-O0 -g" -- %s \
-// RUN:     | FileCheck %s
+// RUN: %dexter_base clang-opt-bisect \
+// RUN:     --debugger %dexter_regression_test_debugger \
+// RUN:     --builder %dexter_regression_test_builder \
+// RUN:     --cflags "%dexter_regression_test_cflags" \
+// RUN:     --ldflags "%dexter_regression_test_ldflags" \
+// RUN:     -- %s \
+// RUN: | FileCheck %s
 // CHECK: running pass 0
 // CHECK: wrote{{.*}}per_pass_score
 // CHECK: wrote{{.*}}pass-summary
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/address_printing.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/address_printing.cpp
index ecd7cd2d49584..ffb4defac6624 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/address_printing.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/address_printing.cpp
@@ -8,7 +8,8 @@
 //      display the address properly; if it is implemented, this test should be
 //      updated.
 //
-// REQUIRES: lldb, system-linux
+// The dbgeng driver doesn't support \DexLimitSteps yet.
+// UNSUPPORTED: system-windows
 //
 // RUN: not %dexter_regression_test -v -- %s | FileCheck %s
 
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_limit_steps_no_values.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_limit_steps_no_values.cpp
index 17e59942bee59..64e41495ece10 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_limit_steps_no_values.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/err_limit_steps_no_values.cpp
@@ -3,8 +3,7 @@
 //    in a \DexLimitSteps command results in a useful error message.
 //    Use --binary switch to trick dexter into skipping the build step.
 //
-// REQUIRES: system-linux
 // RUN: not %dexter_base test --binary %s --debugger 'lldb' -- %s | FileCheck %s
-// CHECK: parser error:{{.*}}err_limit_steps_no_values.cpp(10): expected 0 or at least 2 positional arguments
+// CHECK: parser error:{{.*}}err_limit_steps_no_values.cpp(9): expected 0 or at least 2 positional arguments
 
 // DexLimitSteps('test')
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/label_another_line.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/label_another_line.cpp
index c7cbe7ca44e08..0a95336475153 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/label_another_line.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/label_another_line.cpp
@@ -1,6 +1,3 @@
-// REQUIRES: lldb
-// UNSUPPORTED: system-windows
-//
 // Purpose:
 //    Check that the optional keyword argument 'on_line' makes a \DexLabel label
 //    that line instead of the line the command is found on.
@@ -13,5 +10,5 @@ int main() {
   return result;
 }
 
-// DexLabel('test', on_line=13)
+// DexLabel('test', on_line=10)
 // DexExpectWatchValue('result', '0', on_line=ref('test'))
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/label_offset.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/label_offset.cpp
index cad7d0cd3ecea..81d4ca47b1144 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/label_offset.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/label_offset.cpp
@@ -1,4 +1,3 @@
-// REQUIRES: lldb
 // Purpose:
 //      Check that we can use label-relative line numbers.
 //
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/source-root-dir.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/source-root-dir.cpp
index 94738b053f2d3..77273912d1b2e 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/source-root-dir.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/test/source-root-dir.cpp
@@ -1,12 +1,10 @@
-// XFAIL:*
 // This test started failing recently for unknown reasons.
-
-// REQUIRES: lldb
-// UNSUPPORTED: system-windows
-//
+// XFAIL:*
 // RUN: %dexter --fail-lt 1.0 -w \
-// RUN:     --builder 'clang' --debugger 'lldb' \
-// RUN:     --cflags "-O0 -glldb -fdebug-prefix-map=%S=/changed" \
+// RUN:     --builder %dexter_regression_test_builder \
+// RUN:     --debugger %dexter_regression_test_debugger \
+// RUN:     --cflags "%dexter_regression_test_cflags -fdebug-prefix-map=%S=/changed" \
+// RUN:     --ldflags "%dexter_regression_test_ldflags" \
 // RUN:     --source-root-dir=%S --debugger-use-relative-paths -- %s
 
 #include <stdio.h>
diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/view.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/view.cpp
index a48410a53e10c..d5b04b3fe8eff 100644
--- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/view.cpp
+++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/view.cpp
@@ -1,4 +1,3 @@
-// REQUIRES: lldb
 // Purpose:
 //      Check the `view` subtool works with typical inputs.
 //
diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py
index 6b3284d10cecf..51a6dd514a875 100644
--- a/cross-project-tests/lit.cfg.py
+++ b/cross-project-tests/lit.cfg.py
@@ -111,18 +111,23 @@ def configure_dexter_substitutions():
   if platform.system() == 'Windows':
     # The Windows builder script uses lld.
     dependencies = ['clang', 'lld-link']
-    dexter_regression_test_builder = '--builder clang-cl_vs2015'
-    dexter_regression_test_debugger = '--debugger dbgeng'
-    dexter_regression_test_cflags = '--cflags "/Zi /Od"'
-    dexter_regression_test_ldflags = '--ldflags "/Zi"'
+    dexter_regression_test_builder = 'clang-cl_vs2015'
+    dexter_regression_test_debugger = 'dbgeng'
+    dexter_regression_test_cflags = '/Zi /Od'
+    dexter_regression_test_ldflags = '/Zi'
   else:
     # Use lldb as the debugger on non-Windows platforms.
     dependencies = ['clang', 'lldb']
-    dexter_regression_test_builder = '--builder clang'
-    dexter_regression_test_debugger = "--debugger lldb"
-    dexter_regression_test_cflags = '--cflags "-O0 -glldb"'
+    dexter_regression_test_builder = 'clang'
+    dexter_regression_test_debugger = 'lldb'
+    dexter_regression_test_cflags = '-O0 -glldb'
     dexter_regression_test_ldflags = ''
 
+  tools.append(ToolSubst('%dexter_regression_test_builder', dexter_regression_test_builder))
+  tools.append(ToolSubst('%dexter_regression_test_debugger', dexter_regression_test_debugger))
+  tools.append(ToolSubst('%dexter_regression_test_cflags', dexter_regression_test_cflags))
+  tools.append(ToolSubst('%dexter_regression_test_ldflags', dexter_regression_test_cflags))
+
   # Typical command would take the form:
   # ./path_to_py/python.exe ./path_to_dex/dexter.py test --fail-lt 1.0 -w --builder clang --debugger lldb --cflags '-O0 -g'
   # Exclude build flags for %dexter_regression_base.
@@ -132,15 +137,15 @@ def configure_dexter_substitutions():
     '"{}"'.format(dexter_path),
     'test',
     '--fail-lt 1.0 -w',
-    dexter_regression_test_debugger])
+    '--debugger', dexter_regression_test_debugger])
   tools.append(ToolSubst('%dexter_regression_base', dexter_regression_test_base))
 
   # Include build flags for %dexter_regression_test.
   dexter_regression_test_build = ' '.join([
     dexter_regression_test_base,
-    dexter_regression_test_builder,
-    dexter_regression_test_cflags,
-    dexter_regression_test_ldflags])
+    '--builder', dexter_regression_test_builder,
+    '--cflags "',  dexter_regression_test_cflags + '"',
+    '--ldflags "', dexter_regression_test_ldflags + '"'])
   tools.append(ToolSubst('%dexter_regression_test', dexter_regression_test_build))
   return dependencies
 

From d3597ec0aaad11a670f45b42428628531d4b2c05 Mon Sep 17 00:00:00 2001
From: Stanislav Gatev <sgatev@google.com>
Date: Mon, 24 Jan 2022 13:29:06 +0000
Subject: [PATCH 699/946] [clang][dataflow] Enable merging distinct values in
 Environment::join

Make specializations of `DataflowAnalysis` extendable with domain-specific
logic for merging distinct values when joining environments. This could be
a strict lattice join or a more general widening operation.

This is part of the implementation of the dataflow analysis framework.
See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev.

Reviewed-by: xazax.hun

Differential Revision: https://reviews.llvm.org/D118038
---
 .../Analysis/FlowSensitive/DataflowAnalysis.h |   5 +
 .../FlowSensitive/DataflowAnalysisContext.h   |  25 ++-
 .../FlowSensitive/DataflowEnvironment.h       |  54 ++++-
 .../TypeErasedDataflowAnalysis.h              |   2 +-
 .../clang/Analysis/FlowSensitive/Value.h      |  26 ++-
 .../FlowSensitive/DataflowEnvironment.cpp     |  40 ++--
 .../TypeErasedDataflowAnalysis.cpp            |   2 +-
 .../Analysis/FlowSensitive/TestingSupport.cpp |  12 +-
 .../Analysis/FlowSensitive/TestingSupport.h   |   7 +
 .../Analysis/FlowSensitive/TransferTest.cpp   |  17 +-
 .../TypeErasedDataflowAnalysisTest.cpp        | 191 +++++++++++++++++-
 11 files changed, 338 insertions(+), 43 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
index 7402e42749ee2..38a64a277412b 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
@@ -42,6 +42,11 @@ namespace dataflow {
 ///   * `void transfer(const Stmt *, LatticeT &, Environment &)` - applies the
 ///     analysis transfer function for a given statement and lattice element.
 ///
+///  `Derived` can optionally override the following members:
+///   * `bool merge(QualType, const Value &, const Value &, Value &,
+///     Environment &)` -  joins distinct values. This could be a strict
+///     lattice join or a more general widening operation.
+///
 ///  `LatticeT` is a bounded join-semilattice that is used by `Derived` and must
 ///  provide the following public members:
 ///   * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
index d1a06f7ef8d3a..5c1b41d538921 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include <cassert>
 #include <memory>
+#include <type_traits>
 #include <utility>
 #include <vector>
 
@@ -32,15 +33,21 @@ namespace dataflow {
 /// is used during dataflow analysis.
 class DataflowAnalysisContext {
 public:
+  DataflowAnalysisContext()
+      : TrueVal(&takeOwnership(std::make_unique<BoolValue>())),
+        FalseVal(&takeOwnership(std::make_unique<BoolValue>())) {}
+
   /// Takes ownership of `Loc` and returns a reference to it.
   ///
   /// Requirements:
   ///
   ///  `Loc` must not be null.
-  StorageLocation &takeOwnership(std::unique_ptr<StorageLocation> Loc) {
+  template <typename T>
+  typename std::enable_if<std::is_base_of<StorageLocation, T>::value, T &>::type
+  takeOwnership(std::unique_ptr<T> Loc) {
     assert(Loc != nullptr);
     Locs.push_back(std::move(Loc));
-    return *Locs.back().get();
+    return *cast<T>(Locs.back().get());
   }
 
   /// Takes ownership of `Val` and returns a reference to it.
@@ -48,10 +55,12 @@ class DataflowAnalysisContext {
   /// Requirements:
   ///
   ///  `Val` must not be null.
-  Value &takeOwnership(std::unique_ptr<Value> Val) {
+  template <typename T>
+  typename std::enable_if<std::is_base_of<Value, T>::value, T &>::type
+  takeOwnership(std::unique_ptr<T> Val) {
     assert(Val != nullptr);
     Vals.push_back(std::move(Val));
-    return *Vals.back().get();
+    return *cast<T>(Vals.back().get());
   }
 
   /// Assigns `Loc` as the storage location of `D`.
@@ -104,6 +113,12 @@ class DataflowAnalysisContext {
     return ThisPointeeLoc;
   }
 
+  /// Returns a symbolic boolean value that models a boolean literal equal to
+  /// `Value`.
+  BoolValue &getBoolLiteralValue(bool Value) const {
+    return Value ? *TrueVal : *FalseVal;
+  }
+
 private:
   // Storage for the state of a program.
   std::vector<std::unique_ptr<StorageLocation>> Locs;
@@ -120,6 +135,8 @@ class DataflowAnalysisContext {
   StorageLocation *ThisPointeeLoc = nullptr;
 
   // FIXME: Add support for boolean expressions.
+  BoolValue *TrueVal;
+  BoolValue *FalseVal;
 };
 
 } // namespace dataflow
diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
index 5082181ecf3e6..e560305cf5ca2 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -26,6 +26,9 @@
 #include "clang/Analysis/FlowSensitive/Value.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include <memory>
+#include <type_traits>
+#include <utility>
 
 namespace clang {
 namespace dataflow {
@@ -48,6 +51,25 @@ enum class SkipPast {
 /// Holds the state of the program (store and heap) at a given program point.
 class Environment {
 public:
+  /// Supplements `Environment` with non-standard join operations.
+  class Merger {
+  public:
+    virtual ~Merger() = default;
+
+    /// Given distinct `Val1` and `Val2`, modifies `MergedVal` to approximate
+    /// both `Val1` and `Val2`. This could be a strict lattice join or a more
+    /// general widening operation. If this function returns true, `MergedVal`
+    /// will be assigned to a storage location of type `Type` in `Env`.
+    ///
+    /// Requirements:
+    ///
+    ///  `Val1` and `Val2` must be distinct.
+    virtual bool merge(QualType Type, const Value &Val1, const Value &Val2,
+                       Value &MergedVal, Environment &Env) {
+      return false;
+    }
+  };
+
   /// Creates an environment that uses `DACtx` to store objects that encompass
   /// the state of a program.
   explicit Environment(DataflowAnalysisContext &DACtx) : DACtx(&DACtx) {}
@@ -64,7 +86,7 @@ class Environment {
 
   bool operator==(const Environment &) const;
 
-  LatticeJoinEffect join(const Environment &);
+  LatticeJoinEffect join(const Environment &, Environment::Merger &);
 
   // FIXME: Rename `createOrGetStorageLocation` to `getOrCreateStorageLocation`,
   // `getStableStorageLocation`, or something more appropriate.
@@ -142,12 +164,34 @@ class Environment {
   Value *getValue(const Expr &E, SkipPast SP) const;
 
   /// Transfers ownership of `Loc` to the analysis context and returns a
-  /// reference to `Loc`.
-  StorageLocation &takeOwnership(std::unique_ptr<StorageLocation> Loc);
+  /// reference to it.
+  ///
+  /// Requirements:
+  ///
+  ///  `Loc` must not be null.
+  template <typename T>
+  typename std::enable_if<std::is_base_of<StorageLocation, T>::value, T &>::type
+  takeOwnership(std::unique_ptr<T> Loc) {
+    return DACtx->takeOwnership(std::move(Loc));
+  }
 
   /// Transfers ownership of `Val` to the analysis context and returns a
-  /// reference to `Val`.
-  Value &takeOwnership(std::unique_ptr<Value> Val);
+  /// reference to it.
+  ///
+  /// Requirements:
+  ///
+  ///  `Val` must not be null.
+  template <typename T>
+  typename std::enable_if<std::is_base_of<Value, T>::value, T &>::type
+  takeOwnership(std::unique_ptr<T> Val) {
+    return DACtx->takeOwnership(std::move(Val));
+  }
+
+  /// Returns a symbolic boolean value that models a boolean literal equal to
+  /// `Value`
+  BoolValue &getBoolLiteralValue(bool Value) const {
+    return DACtx->getBoolLiteralValue(Value);
+  }
 
 private:
   /// Creates a value appropriate for `Type`, if `Type` is supported, otherwise
diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
index c0490140d0ce5..3c25c8fc2f28f 100644
--- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
+++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
@@ -40,7 +40,7 @@ struct TypeErasedLattice {
 };
 
 /// Type-erased base class for dataflow analyses built on a single lattice type.
-class TypeErasedDataflowAnalysis {
+class TypeErasedDataflowAnalysis : public Environment::Merger {
 public:
   virtual ~TypeErasedDataflowAnalysis() {}
 
diff --git a/clang/include/clang/Analysis/FlowSensitive/Value.h b/clang/include/clang/Analysis/FlowSensitive/Value.h
index 47e4d31c832bb..da04f926c597b 100644
--- a/clang/include/clang/Analysis/FlowSensitive/Value.h
+++ b/clang/include/clang/Analysis/FlowSensitive/Value.h
@@ -17,6 +17,8 @@
 #include "clang/AST/Decl.h"
 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include <cassert>
 #include <utility>
 
@@ -26,7 +28,7 @@ namespace dataflow {
 /// Base class for all values computed by abstract interpretation.
 class Value {
 public:
-  enum class Kind { Integer, Reference, Pointer, Struct };
+  enum class Kind { Bool, Integer, Reference, Pointer, Struct };
 
   explicit Value(Kind ValKind) : ValKind(ValKind) {}
 
@@ -38,6 +40,14 @@ class Value {
   Kind ValKind;
 };
 
+/// Models a boolean.
+class BoolValue : public Value {
+public:
+  explicit BoolValue() : Value(Kind::Bool) {}
+
+  static bool classof(const Value *Val) { return Val->getKind() == Kind::Bool; }
+};
+
 /// Models an integer.
 class IntegerValue : public Value {
 public:
@@ -110,8 +120,22 @@ class StructValue final : public Value {
   /// Assigns `Val` as the child value for `D`.
   void setChild(const ValueDecl &D, Value &Val) { Children[&D] = &Val; }
 
+  /// Returns the value of the synthetic property with the given `Name` or null
+  /// if the property isn't assigned a value.
+  Value *getProperty(llvm::StringRef Name) const {
+    auto It = Properties.find(Name);
+    return It == Properties.end() ? nullptr : It->second;
+  }
+
+  /// Assigns `Val` as the value of the synthetic property with the given
+  /// `Name`.
+  void setProperty(llvm::StringRef Name, Value &Val) {
+    Properties.insert_or_assign(Name, &Val);
+  }
+
 private:
   llvm::DenseMap<const ValueDecl *, Value *> Children;
+  llvm::StringMap<Value *> Properties;
 };
 
 } // namespace dataflow
diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
index 512e3d991df20..938f7338b6403 100644
--- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
+++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
@@ -73,7 +73,8 @@ bool Environment::operator==(const Environment &Other) const {
   return DeclToLoc == Other.DeclToLoc && LocToVal == Other.LocToVal;
 }
 
-LatticeJoinEffect Environment::join(const Environment &Other) {
+LatticeJoinEffect Environment::join(const Environment &Other,
+                                    Environment::Merger &Merger) {
   assert(DACtx == Other.DACtx);
 
   auto Effect = LatticeJoinEffect::Unchanged;
@@ -88,10 +89,32 @@ LatticeJoinEffect Environment::join(const Environment &Other) {
   if (ExprToLocSizeBefore != ExprToLoc.size())
     Effect = LatticeJoinEffect::Changed;
 
-  // FIXME: Add support for joining distinct values that are assigned to the
-  // same storage locations in `LocToVal` and `Other.LocToVal`.
+  llvm::DenseMap<const StorageLocation *, Value *> MergedLocToVal;
+  for (auto &Entry : LocToVal) {
+    const StorageLocation *Loc = Entry.first;
+    assert(Loc != nullptr);
+
+    Value *Val = Entry.second;
+    assert(Val != nullptr);
+
+    auto It = Other.LocToVal.find(Loc);
+    if (It == Other.LocToVal.end())
+      continue;
+    assert(It->second != nullptr);
+
+    if (It->second == Val) {
+      MergedLocToVal.insert({Loc, Val});
+      continue;
+    }
+
+    // FIXME: Consider destroying `MergedValue` immediately if `Merger::merge`
+    // returns false to avoid storing unneeded values in `DACtx`.
+    if (Value *MergedVal = createValue(Loc->getType()))
+      if (Merger.merge(Loc->getType(), *Val, *It->second, *MergedVal, *this))
+        MergedLocToVal.insert({Loc, MergedVal});
+  }
   const unsigned LocToValSizeBefore = LocToVal.size();
-  LocToVal = intersectDenseMaps(LocToVal, Other.LocToVal);
+  LocToVal = std::move(MergedLocToVal);
   if (LocToValSizeBefore != LocToVal.size())
     Effect = LatticeJoinEffect::Changed;
 
@@ -267,15 +290,6 @@ Value *Environment::createValueUnlessSelfReferential(
   return nullptr;
 }
 
-StorageLocation &
-Environment::takeOwnership(std::unique_ptr<StorageLocation> Loc) {
-  return DACtx->takeOwnership(std::move(Loc));
-}
-
-Value &Environment::takeOwnership(std::unique_ptr<Value> Val) {
-  return DACtx->takeOwnership(std::move(Val));
-}
-
 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const {
   switch (SP) {
   case SkipPast::None:
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 7611395cafb6b..0a5f89ce41ad2 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -93,7 +93,7 @@ static TypeErasedDataflowAnalysisState computeBlockInputState(
         MaybePredState.getValue();
     if (MaybeState.hasValue()) {
       Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice);
-      MaybeState->Env.join(PredState.Env);
+      MaybeState->Env.join(PredState.Env, Analysis);
     } else {
       MaybeState = PredState;
     }
diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp
index cbaf544a87c2d..4c5efa7504048 100644
--- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp
@@ -18,8 +18,10 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Testing/Support/Annotations.h"
+#include <cassert>
 #include <functional>
 #include <memory>
 #include <string>
@@ -29,6 +31,7 @@
 
 using namespace clang;
 using namespace dataflow;
+using namespace ast_matchers;
 
 static bool
 isAnnotationDirectlyAfterStatement(const Stmt *Stmt, unsigned AnnotationBegin,
@@ -55,7 +58,6 @@ test::buildStatementToAnnotationMapping(const FunctionDecl *Func,
                                         llvm::Annotations AnnotatedCode) {
   llvm::DenseMap<const Stmt *, std::string> Result;
 
-  using namespace ast_matchers; // NOLINT: Too many names
   auto StmtMatcher =
       findAll(stmt(unless(anyOf(hasParent(expr()), hasParent(returnStmt()))))
                   .bind("stmt"));
@@ -121,3 +123,11 @@ test::buildStatementToAnnotationMapping(const FunctionDecl *Func,
 
   return Result;
 }
+
+const ValueDecl *test::findValueDecl(ASTContext &ASTCtx, llvm::StringRef Name) {
+  auto TargetNodes = match(valueDecl(hasName(Name)).bind("v"), ASTCtx);
+  assert(TargetNodes.size() == 1 && "Name must be unique");
+  auto *const Result = selectFirst<ValueDecl>("v", TargetNodes);
+  assert(Result != nullptr);
+  return Result;
+}
diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
index 349db46a73814..276441359d05d 100644
--- a/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
+++ b/clang/unittests/Analysis/FlowSensitive/TestingSupport.h
@@ -165,6 +165,13 @@ llvm::Error checkDataflow(
                        VirtualMappedFiles);
 }
 
+/// Returns the `ValueDecl` for the given identifier.
+///
+/// Requirements:
+///
+///  `Name` must be unique in `ASTCtx`.
+const ValueDecl *findValueDecl(ASTContext &ASTCtx, llvm::StringRef Name);
+
 } // namespace test
 } // namespace dataflow
 } // namespace clang
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index 0f7ac3af74edd..dc365d5bfe75f 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -22,7 +22,6 @@
 #include "llvm/Testing/Support/Error.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include <cassert>
 #include <string>
 #include <utility>
 
@@ -30,6 +29,7 @@ namespace {
 
 using namespace clang;
 using namespace dataflow;
+using namespace test;
 using ::testing::_;
 using ::testing::ElementsAre;
 using ::testing::IsNull;
@@ -58,21 +58,6 @@ class TransferTest : public ::testing::Test {
   }
 };
 
-/// Returns the `ValueDecl` for the given identifier.
-///
-/// Requirements:
-///
-///  `Name` must be unique in `ASTCtx`.
-static const ValueDecl *findValueDecl(ASTContext &ASTCtx,
-                                      llvm::StringRef Name) {
-  auto TargetNodes = ast_matchers::match(
-      ast_matchers::valueDecl(ast_matchers::hasName(Name)).bind("v"), ASTCtx);
-  assert(TargetNodes.size() == 1 && "Name must be unique");
-  auto *const Result = ast_matchers::selectFirst<ValueDecl>("v", TargetNodes);
-  assert(Result != nullptr);
-  return Result;
-}
-
 TEST_F(TransferTest, IntVarDecl) {
   std::string Code = R"(
     void target() {
diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
index a11a130013917..efb1cb728fa48 100644
--- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
@@ -9,6 +9,7 @@
 #include "NoopAnalysis.h"
 #include "TestingSupport.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
 #include "clang/Analysis/CFG.h"
@@ -16,6 +17,7 @@
 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h"
 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
+#include "clang/Analysis/FlowSensitive/Value.h"
 #include "clang/Tooling/Tooling.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
@@ -35,8 +37,15 @@ namespace {
 
 using namespace clang;
 using namespace dataflow;
+using namespace test;
+using namespace ast_matchers;
+using ::testing::_;
+using ::testing::ElementsAre;
 using ::testing::IsEmpty;
+using ::testing::IsNull;
+using ::testing::NotNull;
 using ::testing::Pair;
+using ::testing::Test;
 using ::testing::UnorderedElementsAre;
 
 template <typename AnalysisT>
@@ -174,7 +183,7 @@ class FunctionCallAnalysis
   }
 };
 
-class NoreturnDestructorTest : public ::testing::Test {
+class NoreturnDestructorTest : public Test {
 protected:
   template <typename Matcher>
   void runDataflow(llvm::StringRef Code, Matcher Expectations) {
@@ -300,4 +309,184 @@ TEST_F(NoreturnDestructorTest, ConditionalOperatorNestedBranchReturns) {
   // FIXME: Called functions at point `p` should contain only "foo".
 }
 
+class OptionalIntAnalysis
+    : public DataflowAnalysis<OptionalIntAnalysis, NoopLattice> {
+public:
+  explicit OptionalIntAnalysis(ASTContext &Context)
+      : DataflowAnalysis<OptionalIntAnalysis, NoopLattice>(Context) {}
+
+  static NoopLattice initialElement() { return {}; }
+
+  void transfer(const Stmt *S, NoopLattice &, Environment &Env) {
+    auto OptionalIntRecordDecl = recordDecl(hasName("OptionalInt"));
+    auto HasOptionalIntType = hasType(OptionalIntRecordDecl);
+
+    if (const auto *E = selectFirst<CXXConstructExpr>(
+            "call", match(cxxConstructExpr(HasOptionalIntType).bind("call"), *S,
+                          getASTContext()))) {
+      auto &ConstructorVal = *cast<StructValue>(Env.createValue(E->getType()));
+      ConstructorVal.setProperty("has_value", Env.getBoolLiteralValue(false));
+      Env.setValue(*Env.getStorageLocation(*E, SkipPast::None), ConstructorVal);
+    } else if (const auto *E = selectFirst<CXXOperatorCallExpr>(
+                   "call",
+                   match(cxxOperatorCallExpr(callee(cxxMethodDecl(ofClass(
+                                                 OptionalIntRecordDecl))))
+                             .bind("call"),
+                         *S, getASTContext()))) {
+      assert(E->getNumArgs() > 0);
+      auto *Object = E->getArg(0);
+      assert(Object != nullptr);
+
+      auto *ObjectLoc =
+          Env.getStorageLocation(*Object, SkipPast::ReferenceThenPointer);
+      assert(ObjectLoc != nullptr);
+
+      auto &ConstructorVal =
+          *cast<StructValue>(Env.createValue(Object->getType()));
+      ConstructorVal.setProperty("has_value", Env.getBoolLiteralValue(true));
+      Env.setValue(*ObjectLoc, ConstructorVal);
+    }
+  }
+
+  bool merge(QualType Type, const Value &Val1, const Value &Val2,
+             Value &MergedVal, Environment &Env) final {
+    if (!Type->isRecordType() ||
+        Type->getAsCXXRecordDecl()->getQualifiedNameAsString() != "OptionalInt")
+      return false;
+
+    auto *HasValue1 = cast_or_null<BoolValue>(
+        cast<StructValue>(&Val1)->getProperty("has_value"));
+    if (HasValue1 == nullptr)
+      return false;
+
+    auto *HasValue2 = cast_or_null<BoolValue>(
+        cast<StructValue>(&Val2)->getProperty("has_value"));
+    if (HasValue2 == nullptr)
+      return false;
+
+    if (HasValue1 != HasValue2)
+      return false;
+
+    cast<StructValue>(&MergedVal)->setProperty("has_value", *HasValue1);
+    return true;
+  }
+};
+
+class WideningTest : public Test {
+protected:
+  template <typename Matcher>
+  void runDataflow(llvm::StringRef Code, Matcher Match) {
+    tooling::FileContentMappings FilesContents;
+    FilesContents.push_back(
+        std::make_pair<std::string, std::string>("widening_test_defs.h", R"(
+      struct OptionalInt {
+        OptionalInt() = default;
+        OptionalInt& operator=(int);
+      };
+    )"));
+    ASSERT_THAT_ERROR(
+        test::checkDataflow<OptionalIntAnalysis>(
+            Code, "target",
+            [](ASTContext &Context, Environment &Env) {
+              return OptionalIntAnalysis(Context);
+            },
+            [&Match](
+                llvm::ArrayRef<
+                    std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
+                    Results,
+                ASTContext &ASTCtx) { Match(Results, ASTCtx); },
+            {"-fsyntax-only", "-std=c++17"}, FilesContents),
+        llvm::Succeeded());
+  }
+};
+
+TEST_F(WideningTest, JoinDistinctValuesWithDistinctProperties) {
+  std::string Code = R"(
+    #include "widening_test_defs.h"
+
+    void target(bool Cond) {
+      OptionalInt Foo;
+      /*[[p1]]*/
+      if (Cond) {
+        Foo = 1;
+        /*[[p2]]*/
+      }
+      (void)0;
+      /*[[p3]]*/
+    }
+  )";
+  runDataflow(
+      Code, [](llvm::ArrayRef<
+                   std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
+                   Results,
+               ASTContext &ASTCtx) {
+        ASSERT_THAT(Results,
+                    ElementsAre(Pair("p3", _), Pair("p2", _), Pair("p1", _)));
+        const Environment &Env1 = Results[2].second.Env;
+        const Environment &Env2 = Results[1].second.Env;
+        const Environment &Env3 = Results[0].second.Env;
+
+        const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
+        ASSERT_THAT(FooDecl, NotNull());
+
+        auto GetFooValue = [FooDecl](const Environment &Env) {
+          return cast<StructValue>(Env.getValue(*FooDecl, SkipPast::None));
+        };
+
+        EXPECT_EQ(GetFooValue(Env1)->getProperty("has_value"),
+                  &Env1.getBoolLiteralValue(false));
+        EXPECT_EQ(GetFooValue(Env2)->getProperty("has_value"),
+                  &Env2.getBoolLiteralValue(true));
+        EXPECT_THAT(Env3.getValue(*FooDecl, SkipPast::None), IsNull());
+      });
+}
+
+TEST_F(WideningTest, JoinDistinctValuesWithSameProperties) {
+  std::string Code = R"(
+    #include "widening_test_defs.h"
+
+    void target(bool Cond) {
+      OptionalInt Foo;
+      /*[[p1]]*/
+      if (Cond) {
+        Foo = 1;
+        /*[[p2]]*/
+      } else {
+        Foo = 2;
+        /*[[p3]]*/
+      }
+      (void)0;
+      /*[[p4]]*/
+    }
+  )";
+  runDataflow(
+      Code, [](llvm::ArrayRef<
+                   std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
+                   Results,
+               ASTContext &ASTCtx) {
+        ASSERT_THAT(Results, ElementsAre(Pair("p4", _), Pair("p3", _),
+                                         Pair("p2", _), Pair("p1", _)));
+        const Environment &Env1 = Results[3].second.Env;
+        const Environment &Env2 = Results[2].second.Env;
+        const Environment &Env3 = Results[1].second.Env;
+        const Environment &Env4 = Results[0].second.Env;
+
+        const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
+        ASSERT_THAT(FooDecl, NotNull());
+
+        auto GetFooValue = [FooDecl](const Environment &Env) {
+          return cast<StructValue>(Env.getValue(*FooDecl, SkipPast::None));
+        };
+
+        EXPECT_EQ(GetFooValue(Env1)->getProperty("has_value"),
+                  &Env1.getBoolLiteralValue(false));
+        EXPECT_EQ(GetFooValue(Env2)->getProperty("has_value"),
+                  &Env2.getBoolLiteralValue(true));
+        EXPECT_EQ(GetFooValue(Env3)->getProperty("has_value"),
+                  &Env3.getBoolLiteralValue(true));
+        EXPECT_EQ(GetFooValue(Env4)->getProperty("has_value"),
+                  &Env4.getBoolLiteralValue(true));
+      });
+}
+
 } // namespace

From 6b69985da42c6bdbe83127de9653ee795024c8d0 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 26 Jan 2022 11:27:44 +0100
Subject: [PATCH 700/946] [MemCpyOpt] Use helper for unwind check

This extends support to byval arguments. It would be further
extended to handle the case of non-captured noalias returns.
---
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 24 ++++++++++++-------
 .../Transforms/MemCpyOpt/callslot_throw.ll    |  6 ++---
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 4959125e1055e..6698db26626b7 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -313,15 +313,21 @@ INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
 static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start,
                                          Instruction *End) {
   assert(Start->getParent() == End->getParent() && "Must be in same block");
-  if (!Start->getFunction()->doesNotThrow() &&
-      !isa<AllocaInst>(getUnderlyingObject(V))) {
-    for (const Instruction &I :
-         make_range(Start->getIterator(), End->getIterator())) {
-      if (I.mayThrow())
-        return true;
-    }
-  }
-  return false;
+  // Function can't unwind, so it also can't be visible through unwinding.
+  if (Start->getFunction()->doesNotThrow())
+    return false;
+
+  // Object is not visible on unwind.
+  // TODO: Support RequiresNoCaptureBeforeUnwind case.
+  bool RequiresNoCaptureBeforeUnwind;
+  if (isNotVisibleOnUnwind(getUnderlyingObject(V),
+                           RequiresNoCaptureBeforeUnwind) &&
+      !RequiresNoCaptureBeforeUnwind)
+    return false;
+
+  // Check whether there are any unwinding instructions in the range.
+  return any_of(make_range(Start->getIterator(), End->getIterator()),
+                [](const Instruction &I) { return I.mayThrow(); });
 }
 
 void MemCpyOptPass::eraseInstruction(Instruction *I) {
diff --git a/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll b/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll
index ce0d575ab21b9..62b6fcfe4d5a3 100644
--- a/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll
+++ b/llvm/test/Transforms/MemCpyOpt/callslot_throw.ll
@@ -40,14 +40,12 @@ entry:
   ret void
 }
 
-; TODO: byval argument is not visible on unwind.
+; byval argument is not visible on unwind.
 define void @test_byval(i32* nocapture noalias dereferenceable(4) byval(i32) %x) {
 ; CHECK-LABEL: @test_byval(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[T:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @may_throw(i32* nonnull [[T]])
-; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* [[T]], align 4
-; CHECK-NEXT:    store i32 [[LOAD]], i32* [[X:%.*]], align 4
+; CHECK-NEXT:    call void @may_throw(i32* nonnull [[X:%.*]])
 ; CHECK-NEXT:    ret void
 ;
 entry:

From c5907f8a722342a09eada1454b79f0b6904b6d1e Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Wed, 26 Jan 2022 11:47:14 +0000
Subject: [PATCH 701/946] [gn build] Port 19eaad94c47f

---
 llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn
index cd65d60831cc2..cd987b6d87ed2 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn
@@ -58,6 +58,7 @@ static_library("clang-tidy") {
     "ClangTidyProfiling.cpp",
     "ExpandModularHeadersPPCallbacks.cpp",
     "GlobList.cpp",
+    "NoLintDirectiveHandler.cpp",
   ]
 }
 

From 8e29d19b8d29db1ad60af3a8921aad7bbfc24435 Mon Sep 17 00:00:00 2001
From: Salman Javed <mail@salmanjaved.org>
Date: Thu, 27 Jan 2022 00:52:25 +1300
Subject: [PATCH 702/946] Revert "[clang-tidy] Cache the locations of
 NOLINTBEGIN/END blocks"

Build warning here:
https://lab.llvm.org/buildbot/#/builders/57/builds/14322
---
 clang-tools-extra/clang-tidy/CMakeLists.txt   |   1 -
 .../ClangTidyDiagnosticConsumer.cpp           | 230 +++++++++-
 .../clang-tidy/ClangTidyDiagnosticConsumer.h  |  47 +-
 .../clang-tidy/NoLintDirectiveHandler.cpp     | 415 ------------------
 .../clang-tidy/NoLintDirectiveHandler.h       |  51 ---
 clang-tools-extra/clangd/ParsedAST.cpp        |  12 +-
 .../nolintbeginend/1st-translation-unit.cpp   |   5 -
 .../nolintbeginend/2nd-translation-unit.cpp   |   6 -
 .../test/clang-tidy/infrastructure/nolint.cpp |  51 +--
 .../infrastructure/nolintbeginend-LIFO.cpp    |  19 -
 .../nolintbeginend-begin-all-end-glob.cpp     |  16 -
 .../nolintbeginend-begin-all-end-specific.cpp |  16 -
 .../nolintbeginend-begin-glob-end-all.cpp     |  16 -
 ...nolintbeginend-begin-glob-end-specific.cpp |  16 -
 ...lintbeginend-begin-global-end-specific.cpp |  25 ++
 ...lintbeginend-begin-multiple-end-single.cpp |  22 -
 ...lintbeginend-begin-single-end-multiple.cpp |  22 -
 .../nolintbeginend-begin-specific-end-all.cpp |  16 -
 ...nolintbeginend-begin-specific-end-glob.cpp |  16 -
 ...lintbeginend-begin-specific-end-global.cpp |  25 ++
 .../nolintbeginend-mismatched-check-names.cpp |  21 +
 .../nolintbeginend-mismatched-delims.cpp      |   1 +
 .../nolintbeginend-multiple-TUs.cpp           |   6 -
 .../nolintbeginend-typo-in-check-name.cpp     |   3 -
 .../infrastructure/nolintbeginend.cpp         |  28 +-
 25 files changed, 334 insertions(+), 752 deletions(-)
 delete mode 100644 clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp
 delete mode 100644 clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/1st-translation-unit.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/2nd-translation-unit.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-LIFO.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-glob.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-specific.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-all.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-specific.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-global-end-specific.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-multiple-end-single.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-single-end-multiple.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-all.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-glob.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-global.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-multiple-TUs.cpp

diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt
index 8a953eeea2759..075e9f9909d65 100644
--- a/clang-tools-extra/clang-tidy/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/CMakeLists.txt
@@ -17,7 +17,6 @@ add_clang_library(clangTidy
   ClangTidyProfiling.cpp
   ExpandModularHeadersPPCallbacks.cpp
   GlobList.cpp
-  NoLintDirectiveHandler.cpp
 
   DEPENDS
   ClangSACheckers
diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
index 04721a2d3a020..66f60ec60b605 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
+++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
@@ -18,7 +18,6 @@
 #include "ClangTidyDiagnosticConsumer.h"
 #include "ClangTidyOptions.h"
 #include "GlobList.h"
-#include "NoLintDirectiveHandler.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTDiagnostic.h"
 #include "clang/AST/Attr.h"
@@ -189,7 +188,7 @@ DiagnosticBuilder ClangTidyContext::diag(
   return DiagEngine->Report(ID);
 }
 
-DiagnosticBuilder ClangTidyContext::diag(const tooling::Diagnostic &Error) {
+DiagnosticBuilder ClangTidyContext::diag(const ClangTidyError &Error) {
   SourceManager &SM = DiagEngine->getSourceManager();
   llvm::ErrorOr<const FileEntry *> File =
       SM.getFileManager().getFile(Error.Message.FilePath);
@@ -207,15 +206,6 @@ DiagnosticBuilder ClangTidyContext::configurationDiag(
   return diag("clang-tidy-config", Message, Level);
 }
 
-bool ClangTidyContext::shouldSuppressDiagnostic(
-    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info,
-    SmallVectorImpl<tooling::Diagnostic> &NoLintErrors, bool AllowIO,
-    bool EnableNoLintBlocks) {
-  std::string CheckName = getCheckName(Info.getID());
-  return NoLintHandler.shouldSuppress(DiagLevel, Info, CheckName, NoLintErrors,
-                                      AllowIO, EnableNoLintBlocks);
-}
-
 void ClangTidyContext::setSourceManager(SourceManager *SourceMgr) {
   DiagEngine->setSourceManager(SourceMgr);
 }
@@ -317,9 +307,218 @@ void ClangTidyDiagnosticConsumer::finalizeLastError() {
   LastErrorPassesLineFilter = false;
 }
 
+static bool isNOLINTFound(StringRef NolintDirectiveText, StringRef CheckName,
+                          StringRef Line, size_t *FoundNolintIndex = nullptr,
+                          StringRef *FoundNolintChecksStr = nullptr) {
+  if (FoundNolintIndex)
+    *FoundNolintIndex = StringRef::npos;
+  if (FoundNolintChecksStr)
+    *FoundNolintChecksStr = StringRef();
+
+  size_t NolintIndex = Line.find(NolintDirectiveText);
+  if (NolintIndex == StringRef::npos)
+    return false;
+
+  size_t BracketIndex = NolintIndex + NolintDirectiveText.size();
+  if (BracketIndex < Line.size() && isalnum(Line[BracketIndex])) {
+    // Reject this search result, otherwise it will cause false positives when
+    // NOLINT is found as a substring of NOLINT(NEXTLINE/BEGIN/END).
+    return false;
+  }
+
+  // Check if specific checks are specified in brackets.
+  if (BracketIndex < Line.size() && Line[BracketIndex] == '(') {
+    ++BracketIndex;
+    const size_t BracketEndIndex = Line.find(')', BracketIndex);
+    if (BracketEndIndex != StringRef::npos) {
+      StringRef ChecksStr =
+          Line.substr(BracketIndex, BracketEndIndex - BracketIndex);
+      if (FoundNolintChecksStr)
+        *FoundNolintChecksStr = ChecksStr;
+      // Allow specifying a few checks with a glob expression, ignoring
+      // negative globs (which would effectively disable the suppression).
+      GlobList Globs(ChecksStr, /*KeepNegativeGlobs=*/false);
+      if (!Globs.contains(CheckName))
+        return false;
+    }
+  }
+
+  if (FoundNolintIndex)
+    *FoundNolintIndex = NolintIndex;
+
+  return true;
+}
+
+static llvm::Optional<StringRef> getBuffer(const SourceManager &SM, FileID File,
+                                           bool AllowIO) {
+  return AllowIO ? SM.getBufferDataOrNone(File)
+                 : SM.getBufferDataIfLoaded(File);
+}
+
+static ClangTidyError createNolintError(const ClangTidyContext &Context,
+                                        const SourceManager &SM,
+                                        SourceLocation Loc,
+                                        bool IsNolintBegin) {
+  ClangTidyError Error("clang-tidy-nolint", ClangTidyError::Error,
+                       Context.getCurrentBuildDirectory(), false);
+  StringRef Message =
+      IsNolintBegin
+          ? ("unmatched 'NOLINTBEGIN' comment without a subsequent 'NOLINT"
+             "END' comment")
+          : ("unmatched 'NOLINTEND' comment without a previous 'NOLINT"
+             "BEGIN' comment");
+  Error.Message = tooling::DiagnosticMessage(Message, SM, Loc);
+  return Error;
+}
+
+static Optional<ClangTidyError> tallyNolintBegins(
+    const ClangTidyContext &Context, const SourceManager &SM,
+    StringRef CheckName, SmallVector<StringRef> Lines, SourceLocation LinesLoc,
+    SmallVector<std::pair<SourceLocation, StringRef>> &NolintBegins) {
+  // Keep a running total of how many NOLINT(BEGIN...END) blocks are active, as
+  // well as the bracket expression (if any) that was used in the NOLINT
+  // expression.
+  size_t NolintIndex;
+  StringRef NolintChecksStr;
+  for (const auto &Line : Lines) {
+    if (isNOLINTFound("NOLINTBEGIN", CheckName, Line, &NolintIndex,
+                      &NolintChecksStr)) {
+      // Check if a new block is being started.
+      NolintBegins.emplace_back(std::make_pair(
+          LinesLoc.getLocWithOffset(NolintIndex), NolintChecksStr));
+    } else if (isNOLINTFound("NOLINTEND", CheckName, Line, &NolintIndex,
+                             &NolintChecksStr)) {
+      // Check if the previous block is being closed.
+      if (!NolintBegins.empty() &&
+          NolintBegins.back().second == NolintChecksStr) {
+        NolintBegins.pop_back();
+      } else {
+        // Trying to close a nonexistent block. Return a diagnostic about this
+        // misuse that can be displayed along with the original clang-tidy check
+        // that the user was attempting to suppress.
+        return createNolintError(Context, SM,
+                                 LinesLoc.getLocWithOffset(NolintIndex), false);
+      }
+    }
+    // Advance source location to the next line.
+    LinesLoc = LinesLoc.getLocWithOffset(Line.size() + sizeof('\n'));
+  }
+  return None; // All NOLINT(BEGIN/END) use has been consistent so far.
+}
+
+static bool
+lineIsWithinNolintBegin(const ClangTidyContext &Context,
+                        SmallVectorImpl<ClangTidyError> &SuppressionErrors,
+                        const SourceManager &SM, SourceLocation Loc,
+                        StringRef CheckName, StringRef TextBeforeDiag,
+                        StringRef TextAfterDiag) {
+  Loc = SM.getExpansionRange(Loc).getBegin();
+  SourceLocation FileStartLoc = SM.getLocForStartOfFile(SM.getFileID(Loc));
+  SmallVector<std::pair<SourceLocation, StringRef>> NolintBegins;
+
+  // Check if there's an open NOLINT(BEGIN...END) block on the previous lines.
+  SmallVector<StringRef> PrevLines;
+  TextBeforeDiag.split(PrevLines, '\n');
+  auto Error = tallyNolintBegins(Context, SM, CheckName, PrevLines,
+                                 FileStartLoc, NolintBegins);
+  if (Error) {
+    SuppressionErrors.emplace_back(Error.getValue());
+  }
+  bool WithinNolintBegin = !NolintBegins.empty();
+
+  // Check that every block is terminated correctly on the following lines.
+  SmallVector<StringRef> FollowingLines;
+  TextAfterDiag.split(FollowingLines, '\n');
+  Error = tallyNolintBegins(Context, SM, CheckName, FollowingLines, Loc,
+                            NolintBegins);
+  if (Error) {
+    SuppressionErrors.emplace_back(Error.getValue());
+  }
+
+  // The following blocks were never closed. Return diagnostics for each
+  // instance that can be displayed along with the original clang-tidy check
+  // that the user was attempting to suppress.
+  for (const auto &NolintBegin : NolintBegins) {
+    SuppressionErrors.emplace_back(
+        createNolintError(Context, SM, NolintBegin.first, true));
+  }
+
+  return WithinNolintBegin && SuppressionErrors.empty();
+}
+
+static bool
+lineIsMarkedWithNOLINT(const ClangTidyContext &Context,
+                       SmallVectorImpl<ClangTidyError> &SuppressionErrors,
+                       bool AllowIO, const SourceManager &SM,
+                       SourceLocation Loc, StringRef CheckName,
+                       bool EnableNolintBlocks) {
+  // Get source code for this location.
+  FileID File;
+  unsigned Offset;
+  std::tie(File, Offset) = SM.getDecomposedSpellingLoc(Loc);
+  Optional<StringRef> Buffer = getBuffer(SM, File, AllowIO);
+  if (!Buffer)
+    return false;
+
+  // Check if there's a NOLINT on this line.
+  StringRef TextAfterDiag = Buffer->substr(Offset);
+  StringRef RestOfThisLine, FollowingLines;
+  std::tie(RestOfThisLine, FollowingLines) = TextAfterDiag.split('\n');
+  if (isNOLINTFound("NOLINT", CheckName, RestOfThisLine))
+    return true;
+
+  // Check if there's a NOLINTNEXTLINE on the previous line.
+  StringRef TextBeforeDiag = Buffer->substr(0, Offset);
+  size_t LastNewLinePos = TextBeforeDiag.rfind('\n');
+  StringRef PrevLines = (LastNewLinePos == StringRef::npos)
+                            ? StringRef()
+                            : TextBeforeDiag.slice(0, LastNewLinePos);
+  LastNewLinePos = PrevLines.rfind('\n');
+  StringRef PrevLine = (LastNewLinePos == StringRef::npos)
+                           ? PrevLines
+                           : PrevLines.substr(LastNewLinePos + 1);
+  if (isNOLINTFound("NOLINTNEXTLINE", CheckName, PrevLine))
+    return true;
+
+  // Check if this line is within a NOLINT(BEGIN...END) block.
+  return EnableNolintBlocks &&
+         lineIsWithinNolintBegin(Context, SuppressionErrors, SM, Loc, CheckName,
+                                 TextBeforeDiag, TextAfterDiag);
+}
+
+static bool lineIsMarkedWithNOLINTinMacro(
+    const Diagnostic &Info, const ClangTidyContext &Context,
+    SmallVectorImpl<ClangTidyError> &SuppressionErrors, bool AllowIO,
+    bool EnableNolintBlocks) {
+  const SourceManager &SM = Info.getSourceManager();
+  SourceLocation Loc = Info.getLocation();
+  std::string CheckName = Context.getCheckName(Info.getID());
+  while (true) {
+    if (lineIsMarkedWithNOLINT(Context, SuppressionErrors, AllowIO, SM, Loc,
+                               CheckName, EnableNolintBlocks))
+      return true;
+    if (!Loc.isMacroID())
+      return false;
+    Loc = SM.getImmediateExpansionRange(Loc).getBegin();
+  }
+  return false;
+}
+
 namespace clang {
 namespace tidy {
 
+bool shouldSuppressDiagnostic(
+    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info,
+    ClangTidyContext &Context,
+    SmallVectorImpl<ClangTidyError> &SuppressionErrors, bool AllowIO,
+    bool EnableNolintBlocks) {
+  return Info.getLocation().isValid() &&
+         DiagLevel != DiagnosticsEngine::Error &&
+         DiagLevel != DiagnosticsEngine::Fatal &&
+         lineIsMarkedWithNOLINTinMacro(Info, Context, SuppressionErrors,
+                                       AllowIO, EnableNolintBlocks);
+}
+
 const llvm::StringMap<tooling::Replacements> *
 getFixIt(const tooling::Diagnostic &Diagnostic, bool GetFixFromNotes) {
   if (!Diagnostic.Message.Fix.empty())
@@ -346,15 +545,12 @@ void ClangTidyDiagnosticConsumer::HandleDiagnostic(
   if (LastErrorWasIgnored && DiagLevel == DiagnosticsEngine::Note)
     return;
 
-  SmallVector<tooling::Diagnostic, 1> SuppressionErrors;
-  if (Context.shouldSuppressDiagnostic(DiagLevel, Info, SuppressionErrors,
-                                       EnableNolintBlocks)) {
+  SmallVector<ClangTidyError, 1> SuppressionErrors;
+  if (shouldSuppressDiagnostic(DiagLevel, Info, Context, SuppressionErrors,
+                               EnableNolintBlocks)) {
     ++Context.Stats.ErrorsIgnoredNOLINT;
     // Ignored a warning, should ignore related notes as well
     LastErrorWasIgnored = true;
-    Context.DiagEngine->Clear();
-    for (const auto &Error : SuppressionErrors)
-      Context.diag(Error);
     return;
   }
 
diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
index e41003262cccf..69f1ceddd9bd2 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
+++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
@@ -11,7 +11,6 @@
 
 #include "ClangTidyOptions.h"
 #include "ClangTidyProfiling.h"
-#include "NoLintDirectiveHandler.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Tooling/Core/Diagnostic.h"
 #include "llvm/ADT/DenseMap.h"
@@ -96,33 +95,13 @@ class ClangTidyContext {
   DiagnosticBuilder diag(StringRef CheckName, StringRef Message,
                          DiagnosticIDs::Level Level = DiagnosticIDs::Warning);
 
-  DiagnosticBuilder diag(const tooling::Diagnostic &Error);
+  DiagnosticBuilder diag(const ClangTidyError &Error);
 
   /// Report any errors to do with reading the configuration using this method.
   DiagnosticBuilder
   configurationDiag(StringRef Message,
                     DiagnosticIDs::Level Level = DiagnosticIDs::Warning);
 
-  /// Check whether a given diagnostic should be suppressed due to the presence
-  /// of a "NOLINT" suppression comment.
-  /// This is exposed so that other tools that present clang-tidy diagnostics
-  /// (such as clangd) can respect the same suppression rules as clang-tidy.
-  /// This does not handle suppression of notes following a suppressed
-  /// diagnostic; that is left to the caller as it requires maintaining state in
-  /// between calls to this function.
-  /// If any NOLINT is malformed, e.g. a BEGIN without a subsequent END, an
-  /// error about it will be returned in output \param NoLintErrors.
-  /// If \param AllowIO is false, the function does not attempt to read source
-  /// files from disk which are not already mapped into memory; such files are
-  /// treated as not containing a suppression comment.
-  /// \param EnableNoLintBlocks controls whether to honor NOLINTBEGIN/NOLINTEND
-  /// blocks; if false, only considers line-level disabling.
-  bool
-  shouldSuppressDiagnostic(DiagnosticsEngine::Level DiagLevel,
-                           const Diagnostic &Info,
-                           SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
-                           bool AllowIO = true, bool EnableNoLintBlocks = true);
-
   /// Sets the \c SourceManager of the used \c DiagnosticsEngine.
   ///
   /// This is called from the \c ClangTidyCheck base class.
@@ -229,10 +208,30 @@ class ClangTidyContext {
   std::string ProfilePrefix;
 
   bool AllowEnablingAnalyzerAlphaCheckers;
-
-  NoLintDirectiveHandler NoLintHandler;
 };
 
+/// Check whether a given diagnostic should be suppressed due to the presence
+/// of a "NOLINT" suppression comment.
+/// This is exposed so that other tools that present clang-tidy diagnostics
+/// (such as clangd) can respect the same suppression rules as clang-tidy.
+/// This does not handle suppression of notes following a suppressed diagnostic;
+/// that is left to the caller as it requires maintaining state in between calls
+/// to this function.
+/// If `AllowIO` is false, the function does not attempt to read source files
+/// from disk which are not already mapped into memory; such files are treated
+/// as not containing a suppression comment.
+/// \param EnableNolintBlocks controls whether to honor NOLINTBEGIN/NOLINTEND
+/// blocks; if false, only considers line-level disabling.
+/// If suppression is not possible due to improper use of "NOLINT" comments -
+/// for example, the use of a "NOLINTBEGIN" comment that is not followed by a
+/// "NOLINTEND" comment - a diagnostic regarding the improper use is returned
+/// via the output argument `SuppressionErrors`.
+bool shouldSuppressDiagnostic(
+    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info,
+    ClangTidyContext &Context,
+    SmallVectorImpl<ClangTidyError> &SuppressionErrors, bool AllowIO = true,
+    bool EnableNolintBlocks = true);
+
 /// Gets the Fix attached to \p Diagnostic.
 /// If there isn't a Fix attached to the diagnostic and \p AnyFix is true, Check
 /// to see if exactly one note has a Fix and return it. Otherwise return
diff --git a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp
deleted file mode 100644
index 6723b752fc848..0000000000000
--- a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp
+++ /dev/null
@@ -1,415 +0,0 @@
-//===-- clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-///  \file This file implements the NoLintDirectiveHandler class, which is used
-///  to locate NOLINT comments in the file being analyzed, to decide whether a
-///  diagnostic should be suppressed.
-///
-//===----------------------------------------------------------------------===//
-
-#include "NoLintDirectiveHandler.h"
-#include "GlobList.h"
-#include "clang/Basic/LLVM.h"
-#include "clang/Basic/SourceLocation.h"
-#include "clang/Basic/SourceManager.h"
-#include "clang/Tooling/Core/Diagnostic.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/None.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringSwitch.h"
-#include <cassert>
-#include <cstddef>
-#include <iterator>
-#include <string>
-#include <tuple>
-#include <type_traits>
-#include <utility>
-
-namespace clang {
-namespace tidy {
-
-//===----------------------------------------------------------------------===//
-// NoLintType
-//===----------------------------------------------------------------------===//
-
-// The type - one of NOLINT[NEXTLINE/BEGIN/END].
-enum class NoLintType { NoLint, NoLintNextLine, NoLintBegin, NoLintEnd };
-
-// Convert a string like "NOLINTNEXTLINE" to its enum `Type::NoLintNextLine`.
-// Return `None` if the string is unrecognized.
-static Optional<NoLintType> strToNoLintType(StringRef Str) {
-  auto Type = llvm::StringSwitch<Optional<NoLintType>>(Str)
-                  .Case("NOLINT", NoLintType::NoLint)
-                  .Case("NOLINTNEXTLINE", NoLintType::NoLintNextLine)
-                  .Case("NOLINTBEGIN", NoLintType::NoLintBegin)
-                  .Case("NOLINTEND", NoLintType::NoLintEnd)
-                  .Default(None);
-  return Type;
-}
-
-//===----------------------------------------------------------------------===//
-// NoLintToken
-//===----------------------------------------------------------------------===//
-
-// Whitespace within a NOLINT's check list shall be ignored.
-// "NOLINT( check1, check2 )" is equivalent to "NOLINT(check1,check2)".
-// Return the check list with all extraneous whitespace removed.
-static std::string trimWhitespace(StringRef Checks) {
-  SmallVector<StringRef> Split;
-  Checks.split(Split, ',');
-  for (StringRef &Check : Split)
-    Check = Check.trim();
-  return llvm::join(Split, ",");
-}
-
-namespace {
-
-// Record the presence of a NOLINT comment - its type, location, checks -
-// as parsed from the file's character contents.
-class NoLintToken {
-public:
-  // \param Checks:
-  // - If unspecified (i.e. `None`) then ALL checks are suppressed - equivalent
-  //   to NOLINT(*).
-  // - An empty string means nothing is suppressed - equivalent to NOLINT().
-  // - Negative globs ignored (which would effectively disable the suppression).
-  NoLintToken(NoLintType Type, size_t Pos, const Optional<std::string> &Checks)
-      : Type(Type), Pos(Pos), ChecksGlob(std::make_unique<CachedGlobList>(
-                                  Checks.getValueOr("*"),
-                                  /*KeepNegativeGlobs=*/false)) {
-    if (Checks)
-      this->Checks = trimWhitespace(*Checks);
-  }
-
-  // The type - one of NOLINT[NEXTLINE/BEGIN/END].
-  NoLintType Type;
-
-  // The location of the first character, "N", in "NOLINT".
-  size_t Pos;
-
-  // If this NOLINT specifies checks, return the checks.
-  Optional<std::string> checks() const { return Checks; }
-
-  // Whether this NOLINT applies to the provided check.
-  bool suppresses(StringRef Check) const { return ChecksGlob->contains(Check); }
-
-private:
-  Optional<std::string> Checks;
-  std::unique_ptr<CachedGlobList> ChecksGlob;
-};
-
-} // namespace
-
-// Consume the entire buffer and return all `NoLintToken`s that were found.
-static SmallVector<NoLintToken> getNoLints(StringRef Buffer) {
-  static constexpr llvm::StringLiteral NOLINT = "NOLINT";
-  SmallVector<NoLintToken> NoLints;
-
-  size_t Pos = 0;
-  while (Pos < Buffer.size()) {
-    // Find NOLINT:
-    const size_t NoLintPos = Buffer.find(NOLINT, Pos);
-    if (NoLintPos == StringRef::npos)
-      break; // Buffer exhausted
-
-    // Read [A-Z] characters immediately after "NOLINT", e.g. the "NEXTLINE" in
-    // "NOLINTNEXTLINE".
-    Pos = NoLintPos + NOLINT.size();
-    while (Pos < Buffer.size() && llvm::isAlpha(Buffer[Pos]))
-      ++Pos;
-
-    // Is this a recognized NOLINT type?
-    const Optional<NoLintType> NoLintType =
-        strToNoLintType(Buffer.slice(NoLintPos, Pos));
-    if (!NoLintType)
-      continue;
-
-    // Get checks, if specified.
-    Optional<std::string> Checks;
-    if (Pos < Buffer.size() && Buffer[Pos] == '(') {
-      size_t ClosingBracket = Buffer.find_first_of("\n)", ++Pos);
-      if (ClosingBracket != StringRef::npos && Buffer[ClosingBracket] == ')') {
-        Checks = Buffer.slice(Pos, ClosingBracket).str();
-        Pos = ClosingBracket + 1;
-      }
-    }
-
-    NoLints.emplace_back(*NoLintType, NoLintPos, Checks);
-  }
-
-  return NoLints;
-}
-
-//===----------------------------------------------------------------------===//
-// NoLintBlockToken
-//===----------------------------------------------------------------------===//
-
-namespace {
-
-// Represents a source range within a pair of NOLINT(BEGIN/END) comments.
-class NoLintBlockToken {
-public:
-  NoLintBlockToken(NoLintToken Begin, const NoLintToken &End)
-      : Begin(std::move(Begin)), EndPos(End.Pos) {
-    assert(this->Begin.Type == NoLintType::NoLintBegin);
-    assert(End.Type == NoLintType::NoLintEnd);
-    assert(this->Begin.Pos < End.Pos);
-    assert(this->Begin.checks() == End.checks());
-  }
-
-  // Whether the provided diagnostic is within and is suppressible by this block
-  // of NOLINT(BEGIN/END) comments.
-  bool suppresses(size_t DiagPos, StringRef DiagName) const {
-    return (Begin.Pos < DiagPos) && (DiagPos < EndPos) &&
-           Begin.suppresses(DiagName);
-  }
-
-private:
-  NoLintToken Begin;
-  size_t EndPos;
-};
-
-} // namespace
-
-// Match NOLINTBEGINs with their corresponding NOLINTENDs and move them into
-// `NoLintBlockToken`s. If any BEGINs or ENDs are left over, they are moved to
-// `UnmatchedTokens`.
-static SmallVector<NoLintBlockToken>
-formNoLintBlocks(SmallVector<NoLintToken> NoLints,
-                 SmallVectorImpl<NoLintToken> &UnmatchedTokens) {
-  SmallVector<NoLintBlockToken> CompletedBlocks;
-  SmallVector<NoLintToken> Stack;
-
-  // Nested blocks must be fully contained within their parent block. What this
-  // means is that when you have a series of nested BEGIN tokens, the END tokens
-  // shall appear in the reverse order, starting with the closing of the
-  // inner-most block first, then the next level up, and so on. This is
-  // essentially a last-in-first-out/stack system.
-  for (NoLintToken &NoLint : NoLints) {
-    if (NoLint.Type == NoLintType::NoLintBegin)
-      // A new block is being started. Add it to the stack.
-      Stack.emplace_back(std::move(NoLint));
-    else if (NoLint.Type == NoLintType::NoLintEnd) {
-      if (!Stack.empty() && Stack.back().checks() == NoLint.checks())
-        // The previous block is being closed. Pop one element off the stack.
-        CompletedBlocks.emplace_back(std::move(Stack.pop_back_val()), NoLint);
-      else
-        // Trying to close the wrong block.
-        UnmatchedTokens.emplace_back(std::move(NoLint));
-    }
-  }
-
-  llvm::move(Stack, std::back_inserter(UnmatchedTokens));
-  return CompletedBlocks;
-}
-
-//===----------------------------------------------------------------------===//
-// NoLintDirectiveHandler::Impl
-//===----------------------------------------------------------------------===//
-
-class NoLintDirectiveHandler::Impl {
-public:
-  bool shouldSuppress(DiagnosticsEngine::Level DiagLevel,
-                      const Diagnostic &Diag, StringRef DiagName,
-                      SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
-                      bool AllowIO, bool EnableNoLintBlocks);
-
-private:
-  bool diagHasNoLintInMacro(const Diagnostic &Diag, StringRef DiagName,
-                            SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
-                            bool AllowIO, bool EnableNoLintBlocks);
-
-  bool diagHasNoLint(StringRef DiagName, SourceLocation DiagLoc,
-                     const SourceManager &SrcMgr,
-                     SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
-                     bool AllowIO, bool EnableNoLintBlocks);
-
-  void generateCache(const SourceManager &SrcMgr, StringRef FileName,
-                     FileID File, StringRef Buffer,
-                     SmallVectorImpl<tooling::Diagnostic> &NoLintErrors);
-
-  llvm::StringMap<SmallVector<NoLintBlockToken>> Cache;
-};
-
-bool NoLintDirectiveHandler::Impl::shouldSuppress(
-    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Diag,
-    StringRef DiagName, SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
-    bool AllowIO, bool EnableNoLintBlocks) {
-  if (DiagLevel >= DiagnosticsEngine::Error)
-    return false;
-  return diagHasNoLintInMacro(Diag, DiagName, NoLintErrors, AllowIO,
-                              EnableNoLintBlocks);
-}
-
-// Look at the macro's spelling location for a NOLINT. If none is found, keep
-// looking up the call stack.
-bool NoLintDirectiveHandler::Impl::diagHasNoLintInMacro(
-    const Diagnostic &Diag, StringRef DiagName,
-    SmallVectorImpl<tooling::Diagnostic> &NoLintErrors, bool AllowIO,
-    bool EnableNoLintBlocks) {
-  SourceLocation DiagLoc = Diag.getLocation();
-  if (DiagLoc.isInvalid())
-    return false;
-  const SourceManager &SrcMgr = Diag.getSourceManager();
-  while (true) {
-    if (diagHasNoLint(DiagName, DiagLoc, SrcMgr, NoLintErrors, AllowIO,
-                      EnableNoLintBlocks))
-      return true;
-    if (!DiagLoc.isMacroID())
-      return false;
-    DiagLoc = SrcMgr.getImmediateMacroCallerLoc(DiagLoc);
-  }
-  return false;
-}
-
-// Look behind and ahead for '\n' characters. These mark the start and end of
-// this line.
-static std::pair<size_t, size_t> getLineStartAndEnd(StringRef Buffer,
-                                                    size_t From) {
-  size_t StartPos = Buffer.find_last_of('\n', From) + 1;
-  size_t EndPos = std::min(Buffer.find('\n', From), Buffer.size());
-  return std::make_pair(StartPos, EndPos);
-}
-
-// Whether the line has a NOLINT of type = `Type` that can suppress the
-// diagnostic `DiagName`.
-static bool lineHasNoLint(StringRef Buffer,
-                          std::pair<size_t, size_t> LineStartAndEnd,
-                          NoLintType Type, StringRef DiagName) {
-  // Get all NOLINTs on the line.
-  Buffer = Buffer.slice(LineStartAndEnd.first, LineStartAndEnd.second);
-  SmallVector<NoLintToken> NoLints = getNoLints(Buffer);
-
-  // Do any of these NOLINTs match the desired type and diag name?
-  return llvm::any_of(NoLints, [&](const NoLintToken &NoLint) {
-    return NoLint.Type == Type && NoLint.suppresses(DiagName);
-  });
-}
-
-// Whether the provided diagnostic is located within and is suppressible by a
-// block of NOLINT(BEGIN/END) comments.
-static bool withinNoLintBlock(ArrayRef<NoLintBlockToken> NoLintBlocks,
-                              size_t DiagPos, StringRef DiagName) {
-  return llvm::any_of(NoLintBlocks, [&](const NoLintBlockToken &NoLintBlock) {
-    return NoLintBlock.suppresses(DiagPos, DiagName);
-  });
-}
-
-// Get the file contents as a string.
-static Optional<StringRef> getBuffer(const SourceManager &SrcMgr, FileID File,
-                                     bool AllowIO) {
-  return AllowIO ? SrcMgr.getBufferDataOrNone(File)
-                 : SrcMgr.getBufferDataIfLoaded(File);
-}
-
-// We will check for NOLINTs and NOLINTNEXTLINEs first. Checking for these is
-// not so expensive (just need to parse the current and previous lines). Only if
-// that fails do we look for NOLINT(BEGIN/END) blocks (which requires reading
-// the entire file).
-bool NoLintDirectiveHandler::Impl::diagHasNoLint(
-    StringRef DiagName, SourceLocation DiagLoc, const SourceManager &SrcMgr,
-    SmallVectorImpl<tooling::Diagnostic> &NoLintErrors, bool AllowIO,
-    bool EnableNoLintBlocks) {
-  // Translate the diagnostic's SourceLocation to a raw file + offset pair.
-  FileID File;
-  unsigned int Pos = 0;
-  std::tie(File, Pos) = SrcMgr.getDecomposedSpellingLoc(DiagLoc);
-
-  // We will only see NOLINTs in user-authored sources. No point reading the
-  // file if it is a <built-in>.
-  Optional<StringRef> FileName = SrcMgr.getNonBuiltinFilenameForID(File);
-  if (!FileName)
-    return false;
-
-  // Get file contents.
-  Optional<StringRef> Buffer = getBuffer(SrcMgr, File, AllowIO);
-  if (!Buffer)
-    return false;
-
-  // Check if there's a NOLINT on this line.
-  auto ThisLine = getLineStartAndEnd(*Buffer, Pos);
-  if (lineHasNoLint(*Buffer, ThisLine, NoLintType::NoLint, DiagName))
-    return true;
-
-  // Check if there's a NOLINTNEXTLINE on the previous line.
-  if (ThisLine.first > 0) {
-    auto PrevLine = getLineStartAndEnd(*Buffer, ThisLine.first - 1);
-    if (lineHasNoLint(*Buffer, PrevLine, NoLintType::NoLintNextLine, DiagName))
-      return true;
-  }
-
-  // Check if this line is within a NOLINT(BEGIN/END) block.
-  if (!EnableNoLintBlocks)
-    return false;
-
-  // Do we have cached NOLINT block locations for this file?
-  if (Cache.count(*FileName) == 0)
-    // Warning: heavy operation - need to read entire file.
-    generateCache(SrcMgr, *FileName, File, *Buffer, NoLintErrors);
-
-  return withinNoLintBlock(Cache[*FileName], Pos, DiagName);
-}
-
-// Construct a [clang-tidy-nolint] diagnostic to do with the unmatched
-// NOLINT(BEGIN/END) pair.
-static tooling::Diagnostic makeNoLintError(const SourceManager &SrcMgr,
-                                           FileID File,
-                                           const NoLintToken &NoLint) {
-  tooling::Diagnostic Error;
-  Error.DiagLevel = tooling::Diagnostic::Error;
-  Error.DiagnosticName = "clang-tidy-nolint";
-  StringRef Message =
-      (NoLint.Type == NoLintType::NoLintBegin)
-          ? ("unmatched 'NOLINTBEGIN' comment without a subsequent 'NOLINT"
-             "END' comment")
-          : ("unmatched 'NOLINTEND' comment without a previous 'NOLINT"
-             "BEGIN' comment");
-  SourceLocation Loc = SrcMgr.getComposedLoc(File, NoLint.Pos);
-  Error.Message = tooling::DiagnosticMessage(Message, SrcMgr, Loc);
-  return Error;
-}
-
-// Find all NOLINT(BEGIN/END) blocks in a file and store in the cache.
-void NoLintDirectiveHandler::Impl::generateCache(
-    const SourceManager &SrcMgr, StringRef FileName, FileID File,
-    StringRef Buffer, SmallVectorImpl<tooling::Diagnostic> &NoLintErrors) {
-  // Read entire file to get all NOLINTs.
-  SmallVector<NoLintToken> NoLints = getNoLints(Buffer);
-
-  // Match each BEGIN with its corresponding END.
-  SmallVector<NoLintToken> UnmatchedTokens;
-  Cache[FileName] = formNoLintBlocks(std::move(NoLints), UnmatchedTokens);
-
-  // Raise error for any BEGIN/END left over.
-  for (const NoLintToken &NoLint : UnmatchedTokens)
-    NoLintErrors.emplace_back(makeNoLintError(SrcMgr, File, NoLint));
-}
-
-//===----------------------------------------------------------------------===//
-// NoLintDirectiveHandler
-//===----------------------------------------------------------------------===//
-
-NoLintDirectiveHandler::NoLintDirectiveHandler()
-    : PImpl(std::make_unique<Impl>()) {}
-
-NoLintDirectiveHandler::~NoLintDirectiveHandler() = default;
-
-bool NoLintDirectiveHandler::shouldSuppress(
-    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Diag,
-    StringRef DiagName, SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
-    bool AllowIO, bool EnableNoLintBlocks) {
-  return PImpl->shouldSuppress(DiagLevel, Diag, DiagName, NoLintErrors, AllowIO,
-                               EnableNoLintBlocks);
-}
-
-} // namespace tidy
-} // namespace clang
diff --git a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h
deleted file mode 100644
index db9fd593283c7..0000000000000
--- a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h
+++ /dev/null
@@ -1,51 +0,0 @@
-//===-- clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h ----*- C++ *-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_NOLINTDIRECTIVEHANDLER_H
-#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_NOLINTDIRECTIVEHANDLER_H
-
-#include "clang/Basic/Diagnostic.h"
-#include "llvm/ADT/StringRef.h"
-#include <memory>
-
-namespace clang {
-namespace tooling {
-struct Diagnostic;
-} // namespace tooling
-} // namespace clang
-
-namespace llvm {
-template <typename T> class SmallVectorImpl;
-} // namespace llvm
-
-namespace clang {
-namespace tidy {
-
-/// This class is used to locate NOLINT comments in the file being analyzed, to
-/// decide whether a diagnostic should be suppressed.
-/// This class keeps a cache of every NOLINT comment found so that files do not
-/// have to be repeatedly parsed each time a new diagnostic is raised.
-class NoLintDirectiveHandler {
-public:
-  NoLintDirectiveHandler();
-  ~NoLintDirectiveHandler();
-
-  bool shouldSuppress(DiagnosticsEngine::Level DiagLevel,
-                      const Diagnostic &Diag, llvm::StringRef DiagName,
-                      llvm::SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
-                      bool AllowIO, bool EnableNoLintBlocks);
-
-private:
-  class Impl;
-  std::unique_ptr<Impl> PImpl;
-};
-
-} // namespace tidy
-} // namespace clang
-
-#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_NOLINTDIRECTIVEHANDLER_H
diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp
index 86c4b2151243c..9c64c645130bf 100644
--- a/clang-tools-extra/clangd/ParsedAST.cpp
+++ b/clang-tools-extra/clangd/ParsedAST.cpp
@@ -10,7 +10,6 @@
 #include "../clang-tidy/ClangTidyCheck.h"
 #include "../clang-tidy/ClangTidyDiagnosticConsumer.h"
 #include "../clang-tidy/ClangTidyModuleRegistry.h"
-#include "../clang-tidy/NoLintDirectiveHandler.h"
 #include "AST.h"
 #include "Compiler.h"
 #include "Config.h"
@@ -469,11 +468,12 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs,
           bool IsInsideMainFile =
               Info.hasSourceManager() &&
               isInsideMainFile(Info.getLocation(), Info.getSourceManager());
-          SmallVector<tooling::Diagnostic, 1> TidySuppressedErrors;
-          if (IsInsideMainFile && CTContext->shouldSuppressDiagnostic(
-                                      DiagLevel, Info, TidySuppressedErrors,
-                                      /*AllowIO=*/false,
-                                      /*EnableNolintBlocks=*/false)) {
+          SmallVector<tidy::ClangTidyError, 1> TidySuppressedErrors;
+          if (IsInsideMainFile &&
+              tidy::shouldSuppressDiagnostic(DiagLevel, Info, *CTContext,
+                                             TidySuppressedErrors,
+                                             /*AllowIO=*/false,
+                                             /*EnableNolintBlocks=*/false)) {
             // FIXME: should we expose the suppression error (invalid use of
             // NOLINT comments)?
             return DiagnosticsEngine::Ignored;
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/1st-translation-unit.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/1st-translation-unit.cpp
deleted file mode 100644
index d90fc069c9789..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/1st-translation-unit.cpp
+++ /dev/null
@@ -1,5 +0,0 @@
-// NOLINTBEGIN
-class A { A(int i); };
-// NOLINTEND
-
-class B { B(int i); };
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/2nd-translation-unit.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/2nd-translation-unit.cpp
deleted file mode 100644
index 0497037d31b82..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/2nd-translation-unit.cpp
+++ /dev/null
@@ -1,6 +0,0 @@
-
-class A { A(int i); };
-
-// NOLINTBEGIN
-class B { B(int i); };
-// NOLINTEND
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp
index 9ef194232b6b7..4b80bbe4eb3ca 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp
@@ -28,56 +28,11 @@ class C4 { C4(int i); }; // NOLINT(google-explicit-constructor)
 
 class C5 { C5(int i); }; // NOLINT(some-check, google-explicit-constructor)
 
-class C6 { C6(int i); }; // NOLINT without-brackets-skip-all
+class C6 { C6(int i); }; // NOLINT without-brackets-skip-all, another-check
 
-// Other NOLINT* types (e.g. NEXTLINE) should not be misconstrued as a NOLINT:
-class C7 { C7(int i); }; // NOLINTNEXTLINE
+class C7 { C7(int i); }; // NOLINTNEXTLINE doesn't get misconstrued as a NOLINT
 // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: single-argument constructors must be marked explicit
 
-// NOLINT must be UPPERCASE:
-// NOLINTnextline
-class C8 { C8(int i); }; // nolint
-// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: single-argument constructors must be marked explicit
-
-// Unrecognized marker:
-// NOLINTNEXTLINEXYZ
-class C9 { C9(int i); }; // NOLINTXYZ
-// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: single-argument constructors must be marked explicit
-
-// C-style comments are supported:
-class C10 { C10(int i); }; /* NOLINT */
-/* NOLINT */ class C11 { C11(int i); };
-
-// Multiple NOLINTs in the same comment:
-class C12 { C12(int i); }; // NOLINT(some-other-check) NOLINT(google-explicit-constructor)
-class C13 { C13(int i); }; // NOLINT(google-explicit-constructor) NOLINT(some-other-check)
-class C14 { C14(int i); }; // NOLINTNEXTLINE(some-other-check) NOLINT(google-explicit-constructor)
-
-// NOLINTNEXTLINE(google-explicit-constructor) NOLINT(some-other-check)
-class C15 { C15(int i); }; 
-
-// Any text after a NOLINT expression is treated as a comment:
-class C16 { C16(int i); }; // NOLINT: suppress check because <reason>
-class C17 { C17(int i); }; // NOLINT(google-explicit-constructor): suppress check because <reason>
-
-// NOLINT must appear in its entirety on one line:
-class C18 { C18(int i); }; /* NOL
-INT */
-// CHECK-MESSAGES: :[[@LINE-2]]:13: warning: single-argument constructors must be marked explicit
-
-/* NO
-LINT */ class C19 { C19(int i); };
-// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: single-argument constructors must be marked explicit
-
-// Spaces between items in the comma-separated check list are ignroed:
-class C20 { C20(int i); }; // NOLINT( google-explicit-constructor )
-class C21 { C21(int i); }; // NOLINT( google-explicit-constructor , some-other-check )
-class C22 { C22(int i); }; // NOLINT(google-explicit- constructor)
-// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: single-argument constructors must be marked explicit
-
-// If there is a space between "NOLINT" and the bracket, it is treated as a regular NOLINT: 
-class C23 { C23(int i); }; // NOLINT (some-other-check)
-
 void f() {
   int i;
 // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: unused variable 'i' [clang-diagnostic-unused-variable]
@@ -116,4 +71,4 @@ int array2[10];  // NOLINT(cppcoreguidelines-avoid-c-arrays)
 int array3[10];  // NOLINT(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
 int array4[10];  // NOLINT(*-avoid-c-arrays)
 
-// CHECK-MESSAGES: Suppressed 34 warnings (34 NOLINT)
+// CHECK-MESSAGES: Suppressed 23 warnings (23 NOLINT)
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-LIFO.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-LIFO.cpp
deleted file mode 100644
index ee5b1cca755ab..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-LIFO.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor,google-readability-casting' 2>&1 | FileCheck %s
-
-// NOLINTBEGIN(google-explicit-constructor)
-// NOLINTBEGIN(google-readability-casting)
-class A { A(int i); };
-auto Num = (unsigned int)(-1);
-// NOLINTEND(google-explicit-constructor)
-// NOLINTEND(google-readability-casting)
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-10]]:4: error: unmatched 'NOLIN
-// CHECK: TBEGIN' comment without a subsequent 'NOLIN
-// CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-11]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-10]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-glob.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-glob.cpp
deleted file mode 100644
index 90b9fa9883024..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-glob.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor' 2>&1 | FileCheck %s
-
-// NOLINTBEGIN
-class B { B(int i); };
-// NOLINTEND(*)
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
-// CHECK: TBEGIN' comment without a subsequent 'NOLIN
-// CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-specific.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-specific.cpp
deleted file mode 100644
index 6ffa914e4ef0b..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-specific.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor' 2>&1 | FileCheck %s
-
-// NOLINTBEGIN
-class A { A(int i); };
-// NOLINTEND(google-explicit-constructor)
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
-// CHECK: TBEGIN' comment without a subsequent 'NOLIN
-// CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-all.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-all.cpp
deleted file mode 100644
index 3697d5c11e2e2..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-all.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
-
-// NOLINTBEGIN(*)
-class B { B(int i); };
-// NOLINTEND
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
-// CHECK: TBEGIN' comment without a subsequent 'NOLIN
-// CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-specific.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-specific.cpp
deleted file mode 100644
index 5bdb117f20242..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-specific.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
-
-// NOLINTBEGIN(*)
-class B { B(int i); };
-// NOLINTEND(google-explicit-constructor)
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
-// CHECK: TBEGIN' comment without a subsequent 'NOLIN
-// CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-global-end-specific.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-global-end-specific.cpp
new file mode 100644
index 0000000000000..01e69ddf6352a
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-global-end-specific.cpp
@@ -0,0 +1,25 @@
+// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor' 2>&1 | FileCheck %s
+
+// NOLINTBEGIN
+class A { A(int i); };
+// NOLINTEND(google-explicit-constructor)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-6]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-6]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
+
+// NOLINTBEGIN
+class B { B(int i); };
+// NOLINTEND(*)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-6]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-6]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-multiple-end-single.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-multiple-end-single.cpp
deleted file mode 100644
index 150913ce0ec69..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-multiple-end-single.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor,google-readability-casting' 2>&1 | FileCheck %s
-
-// NOLINTBEGIN(google-explicit-constructor,google-readability-casting)
-class B { B(int i); };
-// NOLINTEND(google-explicit-constructor)
-auto Num2 = (unsigned int)(-1);
-// NOLINTEND(google-readability-casting)
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
-// CHECK: TBEGIN' comment without a subsequent 'NOLIN
-// CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-11]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-11]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-13]]:13: warning: C-style casts are discouraged; use static_cast
-// CHECK: :[[@LINE-13]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-single-end-multiple.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-single-end-multiple.cpp
deleted file mode 100644
index f9a915c890cbb..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-single-end-multiple.cpp
+++ /dev/null
@@ -1,22 +0,0 @@
-// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor,google-readability-casting' 2>&1 | FileCheck %s
-
-// NOLINTBEGIN(google-explicit-constructor)
-// NOLINTBEGIN(google-readability-casting)
-class B { B(int i); };
-auto Num2 = (unsigned int)(-1);
-// NOLINTEND(google-explicit-constructor,google-readability-casting)
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
-// CHECK: TBEGIN' comment without a subsequent 'NOLIN
-// CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-11]]:4: error: unmatched 'NOLIN
-// CHECK: TBEGIN' comment without a subsequent 'NOLIN
-// CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-13]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-13]]:13: warning: C-style casts are discouraged; use static_cast
-// CHECK: :[[@LINE-13]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-all.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-all.cpp
deleted file mode 100644
index decfe2dd5a4c1..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-all.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
-
-// NOLINTBEGIN(google-explicit-constructor)
-class A { A(int i); };
-// NOLINTEND
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
-// CHECK: TBEGIN' comment without a subsequent 'NOLIN
-// CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-glob.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-glob.cpp
deleted file mode 100644
index a9f904ccce138..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-glob.cpp
+++ /dev/null
@@ -1,16 +0,0 @@
-// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
-
-// NOLINTBEGIN(google-explicit-constructor)
-class A { A(int i); };
-// NOLINTEND(*)
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
-// CHECK: TBEGIN' comment without a subsequent 'NOLIN
-// CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-global.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-global.cpp
new file mode 100644
index 0000000000000..c6fcfddd09ba8
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-global.cpp
@@ -0,0 +1,25 @@
+// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(google-explicit-constructor)
+class A { A(int i); };
+// NOLINTEND
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-6]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-6]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
+
+// NOLINTBEGIN(*)
+class B { B(int i); };
+// NOLINTEND
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-6]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-6]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-check-names.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-check-names.cpp
index 8d7786fb8c712..957b0b341a3cc 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-check-names.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-check-names.cpp
@@ -16,3 +16,24 @@ auto Num = (unsigned int)(-1);
 // CHECK: :[[@LINE-10]]:4: error: unmatched 'NOLIN
 // CHECK: TEND' comment without a previous 'NOLIN
 // CHECK: TBEGIN' comment [clang-tidy-nolint]
+
+// NOLINTBEGIN(google-explicit-constructor,google-readability-casting)
+class B { B(int i); };
+// NOLINTEND(google-explicit-constructor)
+auto Num2 = (unsigned int)(-1);
+// NOLINTEND(google-readability-casting)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-11]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-11]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-13]]:13: warning: C-style casts are discouraged; use static_cast
+// CHECK: :[[@LINE-13]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-delims.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-delims.cpp
index 4b8947e369f92..7ed5fb820e509 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-delims.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-delims.cpp
@@ -11,3 +11,4 @@ class A { A(int i); };
 // CHECK: :[[@LINE-8]]:4: error: unmatched 'NOLIN
 // CHECK: TBEGIN' comment without a subsequent 'NOLIN
 // CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-multiple-TUs.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-multiple-TUs.cpp
deleted file mode 100644
index 773e023cb22c9..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-multiple-TUs.cpp
+++ /dev/null
@@ -1,6 +0,0 @@
-// RUN: clang-tidy %S/Inputs/nolintbeginend/1st-translation-unit.cpp %S/Inputs/nolintbeginend/2nd-translation-unit.cpp --checks='-*,google-explicit-constructor' 2>&1 | FileCheck %s
-
-// CHECK-NOT: 1st-translation-unit.cpp:2:11: warning: single-argument constructors must be marked explicit
-// CHECK: 1st-translation-unit.cpp:5:11: warning: single-argument constructors must be marked explicit
-// CHECK: 2nd-translation-unit.cpp:2:11: warning: single-argument constructors must be marked explicit
-// CHECK-NOT: 2nd-translation-unit.cpp:5:11: warning: single-argument constructors must be marked explicit
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-typo-in-check-name.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-typo-in-check-name.cpp
index 57e1ff331c8ba..0f2f9994c1fa0 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-typo-in-check-name.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-typo-in-check-name.cpp
@@ -11,6 +11,3 @@ class A { A(int i); };
 // CHECK: TBEGIN' comment without a subsequent 'NOLIN
 // CHECK: TEND' comment [clang-tidy-nolint]
 // CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend.cpp
index ae97bc5b51441..8379128530180 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend.cpp
@@ -59,23 +59,29 @@ class C6 { C6(int i); };
 // NOLINTEND(google-explicit-constructor)
 
 // NOLINTBEGIN(google-explicit-constructor)
-// NOLINTBEGIN
+// NOLINTBEGIN(some-other-check)
 class C7 { C7(int i); };
+// NOLINTEND(google-explicit-constructor)
+// NOLINTEND(some-other-check)
+
+// NOLINTBEGIN(google-explicit-constructor)
+// NOLINTBEGIN
+class C8 { C8(int i); };
 // NOLINTEND
 // NOLINTEND(google-explicit-constructor)
 
 // NOLINTBEGIN
 // NOLINTBEGIN(google-explicit-constructor)
-class C8 { C8(int i); };
+class C9 { C9(int i); };
 // NOLINTEND(google-explicit-constructor)
 // NOLINTEND
 
 // NOLINTBEGIN(not-closed-bracket-is-treated-as-skip-all
-class C9 { C9(int i); };
+class C10 { C10(int i); };
 // NOLINTEND(not-closed-bracket-is-treated-as-skip-all
 
 // NOLINTBEGIN without-brackets-skip-all, another-check
-class C10 { C10(int i); };
+class C11 { C11(int i); };
 // NOLINTEND without-brackets-skip-all, another-check
 
 #define MACRO(X) class X { X(int i); };
@@ -108,28 +114,28 @@ MACRO_WRAPPED_WITH_NO_LINT
 MACRO_NO_LINT_INSIDE_MACRO
 
 // NOLINTBEGIN(google*)
-class C11 { C11(int i); };
+class C12 { C12(int i); };
 // NOLINTEND(google*)
 
 // NOLINTBEGIN(*explicit-constructor)
-class C12 { C12(int i); };
+class C15 { C15(int i); };
 // NOLINTEND(*explicit-constructor)
 
 // NOLINTBEGIN(*explicit*)
-class C13 { C13(int i); };
+class C16 { C16(int i); };
 // NOLINTEND(*explicit*)
 
 // NOLINTBEGIN(-explicit-constructor)
-class C14 { C14(int x); };
+class C17 { C17(int x); };
 // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: single-argument constructors must be marked explicit
 // NOLINTEND(-explicit-constructor)
 
 // NOLINTBEGIN(google*,-google*)
-class C15 { C15(int x); };
+class C18 { C18(int x); };
 // NOLINTEND(google*,-google*)
 
 // NOLINTBEGIN(*,-google*)
-class C16 { C16(int x); };
+class C19 { C19(int x); };
 // NOLINTEND(*,-google*)
 
 int array1[10];
@@ -148,4 +154,4 @@ int array3[10];
 int array4[10];
 // NOLINTEND(*-avoid-c-arrays)
 
-// CHECK-MESSAGES: Suppressed 26 warnings (26 NOLINT).
+// CHECK-MESSAGES: Suppressed 27 warnings (27 NOLINT).

From 3d8fa00b2d24799d234fbfdca97c84954b629ac3 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Wed, 26 Jan 2022 11:52:58 +0000
Subject: [PATCH 703/946] [gn build] Port 8e29d19b8d29

---
 llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn
index cd987b6d87ed2..cd65d60831cc2 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn
@@ -58,7 +58,6 @@ static_library("clang-tidy") {
     "ClangTidyProfiling.cpp",
     "ExpandModularHeadersPPCallbacks.cpp",
     "GlobList.cpp",
-    "NoLintDirectiveHandler.cpp",
   ]
 }
 

From 5da7c040030c4af72dcc21220f579098469c554e Mon Sep 17 00:00:00 2001
From: Salman Javed <mail@salmanjaved.org>
Date: Thu, 27 Jan 2022 01:02:35 +1300
Subject: [PATCH 704/946] Re-land "Cache the locations of NOLINTBEGIN/END
 blocks" with fix for build bot

---
 clang-tools-extra/clang-tidy/CMakeLists.txt   |   1 +
 .../ClangTidyDiagnosticConsumer.cpp           | 230 +---------
 .../clang-tidy/ClangTidyDiagnosticConsumer.h  |  47 +-
 .../clang-tidy/NoLintDirectiveHandler.cpp     | 415 ++++++++++++++++++
 .../clang-tidy/NoLintDirectiveHandler.h       |  51 +++
 clang-tools-extra/clangd/ParsedAST.cpp        |  12 +-
 .../nolintbeginend/1st-translation-unit.cpp   |   5 +
 .../nolintbeginend/2nd-translation-unit.cpp   |   6 +
 .../test/clang-tidy/infrastructure/nolint.cpp |  51 ++-
 .../infrastructure/nolintbeginend-LIFO.cpp    |  19 +
 .../nolintbeginend-begin-all-end-glob.cpp     |  16 +
 .../nolintbeginend-begin-all-end-specific.cpp |  16 +
 .../nolintbeginend-begin-glob-end-all.cpp     |  16 +
 ...nolintbeginend-begin-glob-end-specific.cpp |  16 +
 ...lintbeginend-begin-global-end-specific.cpp |  25 --
 ...lintbeginend-begin-multiple-end-single.cpp |  22 +
 ...lintbeginend-begin-single-end-multiple.cpp |  22 +
 .../nolintbeginend-begin-specific-end-all.cpp |  16 +
 ...nolintbeginend-begin-specific-end-glob.cpp |  16 +
 ...lintbeginend-begin-specific-end-global.cpp |  25 --
 .../nolintbeginend-mismatched-check-names.cpp |  21 -
 .../nolintbeginend-mismatched-delims.cpp      |   1 -
 .../nolintbeginend-multiple-TUs.cpp           |   6 +
 .../nolintbeginend-typo-in-check-name.cpp     |   3 +
 .../infrastructure/nolintbeginend.cpp         |  28 +-
 25 files changed, 752 insertions(+), 334 deletions(-)
 create mode 100644 clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp
 create mode 100644 clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/1st-translation-unit.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/2nd-translation-unit.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-LIFO.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-glob.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-specific.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-all.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-specific.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-global-end-specific.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-multiple-end-single.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-single-end-multiple.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-all.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-glob.cpp
 delete mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-global.cpp
 create mode 100644 clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-multiple-TUs.cpp

diff --git a/clang-tools-extra/clang-tidy/CMakeLists.txt b/clang-tools-extra/clang-tidy/CMakeLists.txt
index 075e9f9909d65..8a953eeea2759 100644
--- a/clang-tools-extra/clang-tidy/CMakeLists.txt
+++ b/clang-tools-extra/clang-tidy/CMakeLists.txt
@@ -17,6 +17,7 @@ add_clang_library(clangTidy
   ClangTidyProfiling.cpp
   ExpandModularHeadersPPCallbacks.cpp
   GlobList.cpp
+  NoLintDirectiveHandler.cpp
 
   DEPENDS
   ClangSACheckers
diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
index 66f60ec60b605..04721a2d3a020 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
+++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
@@ -18,6 +18,7 @@
 #include "ClangTidyDiagnosticConsumer.h"
 #include "ClangTidyOptions.h"
 #include "GlobList.h"
+#include "NoLintDirectiveHandler.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/ASTDiagnostic.h"
 #include "clang/AST/Attr.h"
@@ -188,7 +189,7 @@ DiagnosticBuilder ClangTidyContext::diag(
   return DiagEngine->Report(ID);
 }
 
-DiagnosticBuilder ClangTidyContext::diag(const ClangTidyError &Error) {
+DiagnosticBuilder ClangTidyContext::diag(const tooling::Diagnostic &Error) {
   SourceManager &SM = DiagEngine->getSourceManager();
   llvm::ErrorOr<const FileEntry *> File =
       SM.getFileManager().getFile(Error.Message.FilePath);
@@ -206,6 +207,15 @@ DiagnosticBuilder ClangTidyContext::configurationDiag(
   return diag("clang-tidy-config", Message, Level);
 }
 
+bool ClangTidyContext::shouldSuppressDiagnostic(
+    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info,
+    SmallVectorImpl<tooling::Diagnostic> &NoLintErrors, bool AllowIO,
+    bool EnableNoLintBlocks) {
+  std::string CheckName = getCheckName(Info.getID());
+  return NoLintHandler.shouldSuppress(DiagLevel, Info, CheckName, NoLintErrors,
+                                      AllowIO, EnableNoLintBlocks);
+}
+
 void ClangTidyContext::setSourceManager(SourceManager *SourceMgr) {
   DiagEngine->setSourceManager(SourceMgr);
 }
@@ -307,218 +317,9 @@ void ClangTidyDiagnosticConsumer::finalizeLastError() {
   LastErrorPassesLineFilter = false;
 }
 
-static bool isNOLINTFound(StringRef NolintDirectiveText, StringRef CheckName,
-                          StringRef Line, size_t *FoundNolintIndex = nullptr,
-                          StringRef *FoundNolintChecksStr = nullptr) {
-  if (FoundNolintIndex)
-    *FoundNolintIndex = StringRef::npos;
-  if (FoundNolintChecksStr)
-    *FoundNolintChecksStr = StringRef();
-
-  size_t NolintIndex = Line.find(NolintDirectiveText);
-  if (NolintIndex == StringRef::npos)
-    return false;
-
-  size_t BracketIndex = NolintIndex + NolintDirectiveText.size();
-  if (BracketIndex < Line.size() && isalnum(Line[BracketIndex])) {
-    // Reject this search result, otherwise it will cause false positives when
-    // NOLINT is found as a substring of NOLINT(NEXTLINE/BEGIN/END).
-    return false;
-  }
-
-  // Check if specific checks are specified in brackets.
-  if (BracketIndex < Line.size() && Line[BracketIndex] == '(') {
-    ++BracketIndex;
-    const size_t BracketEndIndex = Line.find(')', BracketIndex);
-    if (BracketEndIndex != StringRef::npos) {
-      StringRef ChecksStr =
-          Line.substr(BracketIndex, BracketEndIndex - BracketIndex);
-      if (FoundNolintChecksStr)
-        *FoundNolintChecksStr = ChecksStr;
-      // Allow specifying a few checks with a glob expression, ignoring
-      // negative globs (which would effectively disable the suppression).
-      GlobList Globs(ChecksStr, /*KeepNegativeGlobs=*/false);
-      if (!Globs.contains(CheckName))
-        return false;
-    }
-  }
-
-  if (FoundNolintIndex)
-    *FoundNolintIndex = NolintIndex;
-
-  return true;
-}
-
-static llvm::Optional<StringRef> getBuffer(const SourceManager &SM, FileID File,
-                                           bool AllowIO) {
-  return AllowIO ? SM.getBufferDataOrNone(File)
-                 : SM.getBufferDataIfLoaded(File);
-}
-
-static ClangTidyError createNolintError(const ClangTidyContext &Context,
-                                        const SourceManager &SM,
-                                        SourceLocation Loc,
-                                        bool IsNolintBegin) {
-  ClangTidyError Error("clang-tidy-nolint", ClangTidyError::Error,
-                       Context.getCurrentBuildDirectory(), false);
-  StringRef Message =
-      IsNolintBegin
-          ? ("unmatched 'NOLINTBEGIN' comment without a subsequent 'NOLINT"
-             "END' comment")
-          : ("unmatched 'NOLINTEND' comment without a previous 'NOLINT"
-             "BEGIN' comment");
-  Error.Message = tooling::DiagnosticMessage(Message, SM, Loc);
-  return Error;
-}
-
-static Optional<ClangTidyError> tallyNolintBegins(
-    const ClangTidyContext &Context, const SourceManager &SM,
-    StringRef CheckName, SmallVector<StringRef> Lines, SourceLocation LinesLoc,
-    SmallVector<std::pair<SourceLocation, StringRef>> &NolintBegins) {
-  // Keep a running total of how many NOLINT(BEGIN...END) blocks are active, as
-  // well as the bracket expression (if any) that was used in the NOLINT
-  // expression.
-  size_t NolintIndex;
-  StringRef NolintChecksStr;
-  for (const auto &Line : Lines) {
-    if (isNOLINTFound("NOLINTBEGIN", CheckName, Line, &NolintIndex,
-                      &NolintChecksStr)) {
-      // Check if a new block is being started.
-      NolintBegins.emplace_back(std::make_pair(
-          LinesLoc.getLocWithOffset(NolintIndex), NolintChecksStr));
-    } else if (isNOLINTFound("NOLINTEND", CheckName, Line, &NolintIndex,
-                             &NolintChecksStr)) {
-      // Check if the previous block is being closed.
-      if (!NolintBegins.empty() &&
-          NolintBegins.back().second == NolintChecksStr) {
-        NolintBegins.pop_back();
-      } else {
-        // Trying to close a nonexistent block. Return a diagnostic about this
-        // misuse that can be displayed along with the original clang-tidy check
-        // that the user was attempting to suppress.
-        return createNolintError(Context, SM,
-                                 LinesLoc.getLocWithOffset(NolintIndex), false);
-      }
-    }
-    // Advance source location to the next line.
-    LinesLoc = LinesLoc.getLocWithOffset(Line.size() + sizeof('\n'));
-  }
-  return None; // All NOLINT(BEGIN/END) use has been consistent so far.
-}
-
-static bool
-lineIsWithinNolintBegin(const ClangTidyContext &Context,
-                        SmallVectorImpl<ClangTidyError> &SuppressionErrors,
-                        const SourceManager &SM, SourceLocation Loc,
-                        StringRef CheckName, StringRef TextBeforeDiag,
-                        StringRef TextAfterDiag) {
-  Loc = SM.getExpansionRange(Loc).getBegin();
-  SourceLocation FileStartLoc = SM.getLocForStartOfFile(SM.getFileID(Loc));
-  SmallVector<std::pair<SourceLocation, StringRef>> NolintBegins;
-
-  // Check if there's an open NOLINT(BEGIN...END) block on the previous lines.
-  SmallVector<StringRef> PrevLines;
-  TextBeforeDiag.split(PrevLines, '\n');
-  auto Error = tallyNolintBegins(Context, SM, CheckName, PrevLines,
-                                 FileStartLoc, NolintBegins);
-  if (Error) {
-    SuppressionErrors.emplace_back(Error.getValue());
-  }
-  bool WithinNolintBegin = !NolintBegins.empty();
-
-  // Check that every block is terminated correctly on the following lines.
-  SmallVector<StringRef> FollowingLines;
-  TextAfterDiag.split(FollowingLines, '\n');
-  Error = tallyNolintBegins(Context, SM, CheckName, FollowingLines, Loc,
-                            NolintBegins);
-  if (Error) {
-    SuppressionErrors.emplace_back(Error.getValue());
-  }
-
-  // The following blocks were never closed. Return diagnostics for each
-  // instance that can be displayed along with the original clang-tidy check
-  // that the user was attempting to suppress.
-  for (const auto &NolintBegin : NolintBegins) {
-    SuppressionErrors.emplace_back(
-        createNolintError(Context, SM, NolintBegin.first, true));
-  }
-
-  return WithinNolintBegin && SuppressionErrors.empty();
-}
-
-static bool
-lineIsMarkedWithNOLINT(const ClangTidyContext &Context,
-                       SmallVectorImpl<ClangTidyError> &SuppressionErrors,
-                       bool AllowIO, const SourceManager &SM,
-                       SourceLocation Loc, StringRef CheckName,
-                       bool EnableNolintBlocks) {
-  // Get source code for this location.
-  FileID File;
-  unsigned Offset;
-  std::tie(File, Offset) = SM.getDecomposedSpellingLoc(Loc);
-  Optional<StringRef> Buffer = getBuffer(SM, File, AllowIO);
-  if (!Buffer)
-    return false;
-
-  // Check if there's a NOLINT on this line.
-  StringRef TextAfterDiag = Buffer->substr(Offset);
-  StringRef RestOfThisLine, FollowingLines;
-  std::tie(RestOfThisLine, FollowingLines) = TextAfterDiag.split('\n');
-  if (isNOLINTFound("NOLINT", CheckName, RestOfThisLine))
-    return true;
-
-  // Check if there's a NOLINTNEXTLINE on the previous line.
-  StringRef TextBeforeDiag = Buffer->substr(0, Offset);
-  size_t LastNewLinePos = TextBeforeDiag.rfind('\n');
-  StringRef PrevLines = (LastNewLinePos == StringRef::npos)
-                            ? StringRef()
-                            : TextBeforeDiag.slice(0, LastNewLinePos);
-  LastNewLinePos = PrevLines.rfind('\n');
-  StringRef PrevLine = (LastNewLinePos == StringRef::npos)
-                           ? PrevLines
-                           : PrevLines.substr(LastNewLinePos + 1);
-  if (isNOLINTFound("NOLINTNEXTLINE", CheckName, PrevLine))
-    return true;
-
-  // Check if this line is within a NOLINT(BEGIN...END) block.
-  return EnableNolintBlocks &&
-         lineIsWithinNolintBegin(Context, SuppressionErrors, SM, Loc, CheckName,
-                                 TextBeforeDiag, TextAfterDiag);
-}
-
-static bool lineIsMarkedWithNOLINTinMacro(
-    const Diagnostic &Info, const ClangTidyContext &Context,
-    SmallVectorImpl<ClangTidyError> &SuppressionErrors, bool AllowIO,
-    bool EnableNolintBlocks) {
-  const SourceManager &SM = Info.getSourceManager();
-  SourceLocation Loc = Info.getLocation();
-  std::string CheckName = Context.getCheckName(Info.getID());
-  while (true) {
-    if (lineIsMarkedWithNOLINT(Context, SuppressionErrors, AllowIO, SM, Loc,
-                               CheckName, EnableNolintBlocks))
-      return true;
-    if (!Loc.isMacroID())
-      return false;
-    Loc = SM.getImmediateExpansionRange(Loc).getBegin();
-  }
-  return false;
-}
-
 namespace clang {
 namespace tidy {
 
-bool shouldSuppressDiagnostic(
-    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info,
-    ClangTidyContext &Context,
-    SmallVectorImpl<ClangTidyError> &SuppressionErrors, bool AllowIO,
-    bool EnableNolintBlocks) {
-  return Info.getLocation().isValid() &&
-         DiagLevel != DiagnosticsEngine::Error &&
-         DiagLevel != DiagnosticsEngine::Fatal &&
-         lineIsMarkedWithNOLINTinMacro(Info, Context, SuppressionErrors,
-                                       AllowIO, EnableNolintBlocks);
-}
-
 const llvm::StringMap<tooling::Replacements> *
 getFixIt(const tooling::Diagnostic &Diagnostic, bool GetFixFromNotes) {
   if (!Diagnostic.Message.Fix.empty())
@@ -545,12 +346,15 @@ void ClangTidyDiagnosticConsumer::HandleDiagnostic(
   if (LastErrorWasIgnored && DiagLevel == DiagnosticsEngine::Note)
     return;
 
-  SmallVector<ClangTidyError, 1> SuppressionErrors;
-  if (shouldSuppressDiagnostic(DiagLevel, Info, Context, SuppressionErrors,
-                               EnableNolintBlocks)) {
+  SmallVector<tooling::Diagnostic, 1> SuppressionErrors;
+  if (Context.shouldSuppressDiagnostic(DiagLevel, Info, SuppressionErrors,
+                                       EnableNolintBlocks)) {
     ++Context.Stats.ErrorsIgnoredNOLINT;
     // Ignored a warning, should ignore related notes as well
     LastErrorWasIgnored = true;
+    Context.DiagEngine->Clear();
+    for (const auto &Error : SuppressionErrors)
+      Context.diag(Error);
     return;
   }
 
diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
index 69f1ceddd9bd2..e41003262cccf 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
+++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
@@ -11,6 +11,7 @@
 
 #include "ClangTidyOptions.h"
 #include "ClangTidyProfiling.h"
+#include "NoLintDirectiveHandler.h"
 #include "clang/Basic/Diagnostic.h"
 #include "clang/Tooling/Core/Diagnostic.h"
 #include "llvm/ADT/DenseMap.h"
@@ -95,13 +96,33 @@ class ClangTidyContext {
   DiagnosticBuilder diag(StringRef CheckName, StringRef Message,
                          DiagnosticIDs::Level Level = DiagnosticIDs::Warning);
 
-  DiagnosticBuilder diag(const ClangTidyError &Error);
+  DiagnosticBuilder diag(const tooling::Diagnostic &Error);
 
   /// Report any errors to do with reading the configuration using this method.
   DiagnosticBuilder
   configurationDiag(StringRef Message,
                     DiagnosticIDs::Level Level = DiagnosticIDs::Warning);
 
+  /// Check whether a given diagnostic should be suppressed due to the presence
+  /// of a "NOLINT" suppression comment.
+  /// This is exposed so that other tools that present clang-tidy diagnostics
+  /// (such as clangd) can respect the same suppression rules as clang-tidy.
+  /// This does not handle suppression of notes following a suppressed
+  /// diagnostic; that is left to the caller as it requires maintaining state in
+  /// between calls to this function.
+  /// If any NOLINT is malformed, e.g. a BEGIN without a subsequent END, an
+  /// error about it will be returned in output \param NoLintErrors.
+  /// If \param AllowIO is false, the function does not attempt to read source
+  /// files from disk which are not already mapped into memory; such files are
+  /// treated as not containing a suppression comment.
+  /// \param EnableNoLintBlocks controls whether to honor NOLINTBEGIN/NOLINTEND
+  /// blocks; if false, only considers line-level disabling.
+  bool
+  shouldSuppressDiagnostic(DiagnosticsEngine::Level DiagLevel,
+                           const Diagnostic &Info,
+                           SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+                           bool AllowIO = true, bool EnableNoLintBlocks = true);
+
   /// Sets the \c SourceManager of the used \c DiagnosticsEngine.
   ///
   /// This is called from the \c ClangTidyCheck base class.
@@ -208,29 +229,9 @@ class ClangTidyContext {
   std::string ProfilePrefix;
 
   bool AllowEnablingAnalyzerAlphaCheckers;
-};
 
-/// Check whether a given diagnostic should be suppressed due to the presence
-/// of a "NOLINT" suppression comment.
-/// This is exposed so that other tools that present clang-tidy diagnostics
-/// (such as clangd) can respect the same suppression rules as clang-tidy.
-/// This does not handle suppression of notes following a suppressed diagnostic;
-/// that is left to the caller as it requires maintaining state in between calls
-/// to this function.
-/// If `AllowIO` is false, the function does not attempt to read source files
-/// from disk which are not already mapped into memory; such files are treated
-/// as not containing a suppression comment.
-/// \param EnableNolintBlocks controls whether to honor NOLINTBEGIN/NOLINTEND
-/// blocks; if false, only considers line-level disabling.
-/// If suppression is not possible due to improper use of "NOLINT" comments -
-/// for example, the use of a "NOLINTBEGIN" comment that is not followed by a
-/// "NOLINTEND" comment - a diagnostic regarding the improper use is returned
-/// via the output argument `SuppressionErrors`.
-bool shouldSuppressDiagnostic(
-    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Info,
-    ClangTidyContext &Context,
-    SmallVectorImpl<ClangTidyError> &SuppressionErrors, bool AllowIO = true,
-    bool EnableNolintBlocks = true);
+  NoLintDirectiveHandler NoLintHandler;
+};
 
 /// Gets the Fix attached to \p Diagnostic.
 /// If there isn't a Fix attached to the diagnostic and \p AnyFix is true, Check
diff --git a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp
new file mode 100644
index 0000000000000..667ffdb5fab60
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp
@@ -0,0 +1,415 @@
+//===-- clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+///  \file This file implements the NoLintDirectiveHandler class, which is used
+///  to locate NOLINT comments in the file being analyzed, to decide whether a
+///  diagnostic should be suppressed.
+///
+//===----------------------------------------------------------------------===//
+
+#include "NoLintDirectiveHandler.h"
+#include "GlobList.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
+#include "clang/Tooling/Core/Diagnostic.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <utility>
+
+namespace clang {
+namespace tidy {
+
+//===----------------------------------------------------------------------===//
+// NoLintType
+//===----------------------------------------------------------------------===//
+
+// The type - one of NOLINT[NEXTLINE/BEGIN/END].
+enum class NoLintType { NoLint, NoLintNextLine, NoLintBegin, NoLintEnd };
+
+// Convert a string like "NOLINTNEXTLINE" to its enum `Type::NoLintNextLine`.
+// Return `None` if the string is unrecognized.
+static Optional<NoLintType> strToNoLintType(StringRef Str) {
+  auto Type = llvm::StringSwitch<Optional<NoLintType>>(Str)
+                  .Case("NOLINT", NoLintType::NoLint)
+                  .Case("NOLINTNEXTLINE", NoLintType::NoLintNextLine)
+                  .Case("NOLINTBEGIN", NoLintType::NoLintBegin)
+                  .Case("NOLINTEND", NoLintType::NoLintEnd)
+                  .Default(None);
+  return Type;
+}
+
+//===----------------------------------------------------------------------===//
+// NoLintToken
+//===----------------------------------------------------------------------===//
+
+// Whitespace within a NOLINT's check list shall be ignored.
+// "NOLINT( check1, check2 )" is equivalent to "NOLINT(check1,check2)".
+// Return the check list with all extraneous whitespace removed.
+static std::string trimWhitespace(StringRef Checks) {
+  SmallVector<StringRef> Split;
+  Checks.split(Split, ',');
+  for (StringRef &Check : Split)
+    Check = Check.trim();
+  return llvm::join(Split, ",");
+}
+
+namespace {
+
+// Record the presence of a NOLINT comment - its type, location, checks -
+// as parsed from the file's character contents.
+class NoLintToken {
+public:
+  // \param Checks:
+  // - If unspecified (i.e. `None`) then ALL checks are suppressed - equivalent
+  //   to NOLINT(*).
+  // - An empty string means nothing is suppressed - equivalent to NOLINT().
+  // - Negative globs ignored (which would effectively disable the suppression).
+  NoLintToken(NoLintType Type, size_t Pos, const Optional<std::string> &Checks)
+      : Type(Type), Pos(Pos), ChecksGlob(std::make_unique<CachedGlobList>(
+                                  Checks.getValueOr("*"),
+                                  /*KeepNegativeGlobs=*/false)) {
+    if (Checks)
+      this->Checks = trimWhitespace(*Checks);
+  }
+
+  // The type - one of NOLINT[NEXTLINE/BEGIN/END].
+  NoLintType Type;
+
+  // The location of the first character, "N", in "NOLINT".
+  size_t Pos;
+
+  // If this NOLINT specifies checks, return the checks.
+  Optional<std::string> checks() const { return Checks; }
+
+  // Whether this NOLINT applies to the provided check.
+  bool suppresses(StringRef Check) const { return ChecksGlob->contains(Check); }
+
+private:
+  Optional<std::string> Checks;
+  std::unique_ptr<CachedGlobList> ChecksGlob;
+};
+
+} // namespace
+
+// Consume the entire buffer and return all `NoLintToken`s that were found.
+static SmallVector<NoLintToken> getNoLints(StringRef Buffer) {
+  static constexpr llvm::StringLiteral NOLINT = "NOLINT";
+  SmallVector<NoLintToken> NoLints;
+
+  size_t Pos = 0;
+  while (Pos < Buffer.size()) {
+    // Find NOLINT:
+    const size_t NoLintPos = Buffer.find(NOLINT, Pos);
+    if (NoLintPos == StringRef::npos)
+      break; // Buffer exhausted
+
+    // Read [A-Z] characters immediately after "NOLINT", e.g. the "NEXTLINE" in
+    // "NOLINTNEXTLINE".
+    Pos = NoLintPos + NOLINT.size();
+    while (Pos < Buffer.size() && llvm::isAlpha(Buffer[Pos]))
+      ++Pos;
+
+    // Is this a recognized NOLINT type?
+    const Optional<NoLintType> NoLintType =
+        strToNoLintType(Buffer.slice(NoLintPos, Pos));
+    if (!NoLintType)
+      continue;
+
+    // Get checks, if specified.
+    Optional<std::string> Checks;
+    if (Pos < Buffer.size() && Buffer[Pos] == '(') {
+      size_t ClosingBracket = Buffer.find_first_of("\n)", ++Pos);
+      if (ClosingBracket != StringRef::npos && Buffer[ClosingBracket] == ')') {
+        Checks = Buffer.slice(Pos, ClosingBracket).str();
+        Pos = ClosingBracket + 1;
+      }
+    }
+
+    NoLints.emplace_back(*NoLintType, NoLintPos, Checks);
+  }
+
+  return NoLints;
+}
+
+//===----------------------------------------------------------------------===//
+// NoLintBlockToken
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+// Represents a source range within a pair of NOLINT(BEGIN/END) comments.
+class NoLintBlockToken {
+public:
+  NoLintBlockToken(NoLintToken Begin, const NoLintToken &End)
+      : Begin(std::move(Begin)), EndPos(End.Pos) {
+    assert(this->Begin.Type == NoLintType::NoLintBegin);
+    assert(End.Type == NoLintType::NoLintEnd);
+    assert(this->Begin.Pos < End.Pos);
+    assert(this->Begin.checks() == End.checks());
+  }
+
+  // Whether the provided diagnostic is within and is suppressible by this block
+  // of NOLINT(BEGIN/END) comments.
+  bool suppresses(size_t DiagPos, StringRef DiagName) const {
+    return (Begin.Pos < DiagPos) && (DiagPos < EndPos) &&
+           Begin.suppresses(DiagName);
+  }
+
+private:
+  NoLintToken Begin;
+  size_t EndPos;
+};
+
+} // namespace
+
+// Match NOLINTBEGINs with their corresponding NOLINTENDs and move them into
+// `NoLintBlockToken`s. If any BEGINs or ENDs are left over, they are moved to
+// `UnmatchedTokens`.
+static SmallVector<NoLintBlockToken>
+formNoLintBlocks(SmallVector<NoLintToken> NoLints,
+                 SmallVectorImpl<NoLintToken> &UnmatchedTokens) {
+  SmallVector<NoLintBlockToken> CompletedBlocks;
+  SmallVector<NoLintToken> Stack;
+
+  // Nested blocks must be fully contained within their parent block. What this
+  // means is that when you have a series of nested BEGIN tokens, the END tokens
+  // shall appear in the reverse order, starting with the closing of the
+  // inner-most block first, then the next level up, and so on. This is
+  // essentially a last-in-first-out/stack system.
+  for (NoLintToken &NoLint : NoLints) {
+    if (NoLint.Type == NoLintType::NoLintBegin)
+      // A new block is being started. Add it to the stack.
+      Stack.emplace_back(std::move(NoLint));
+    else if (NoLint.Type == NoLintType::NoLintEnd) {
+      if (!Stack.empty() && Stack.back().checks() == NoLint.checks())
+        // The previous block is being closed. Pop one element off the stack.
+        CompletedBlocks.emplace_back(Stack.pop_back_val(), NoLint);
+      else
+        // Trying to close the wrong block.
+        UnmatchedTokens.emplace_back(std::move(NoLint));
+    }
+  }
+
+  llvm::move(Stack, std::back_inserter(UnmatchedTokens));
+  return CompletedBlocks;
+}
+
+//===----------------------------------------------------------------------===//
+// NoLintDirectiveHandler::Impl
+//===----------------------------------------------------------------------===//
+
+class NoLintDirectiveHandler::Impl {
+public:
+  bool shouldSuppress(DiagnosticsEngine::Level DiagLevel,
+                      const Diagnostic &Diag, StringRef DiagName,
+                      SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+                      bool AllowIO, bool EnableNoLintBlocks);
+
+private:
+  bool diagHasNoLintInMacro(const Diagnostic &Diag, StringRef DiagName,
+                            SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+                            bool AllowIO, bool EnableNoLintBlocks);
+
+  bool diagHasNoLint(StringRef DiagName, SourceLocation DiagLoc,
+                     const SourceManager &SrcMgr,
+                     SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+                     bool AllowIO, bool EnableNoLintBlocks);
+
+  void generateCache(const SourceManager &SrcMgr, StringRef FileName,
+                     FileID File, StringRef Buffer,
+                     SmallVectorImpl<tooling::Diagnostic> &NoLintErrors);
+
+  llvm::StringMap<SmallVector<NoLintBlockToken>> Cache;
+};
+
+bool NoLintDirectiveHandler::Impl::shouldSuppress(
+    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Diag,
+    StringRef DiagName, SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+    bool AllowIO, bool EnableNoLintBlocks) {
+  if (DiagLevel >= DiagnosticsEngine::Error)
+    return false;
+  return diagHasNoLintInMacro(Diag, DiagName, NoLintErrors, AllowIO,
+                              EnableNoLintBlocks);
+}
+
+// Look at the macro's spelling location for a NOLINT. If none is found, keep
+// looking up the call stack.
+bool NoLintDirectiveHandler::Impl::diagHasNoLintInMacro(
+    const Diagnostic &Diag, StringRef DiagName,
+    SmallVectorImpl<tooling::Diagnostic> &NoLintErrors, bool AllowIO,
+    bool EnableNoLintBlocks) {
+  SourceLocation DiagLoc = Diag.getLocation();
+  if (DiagLoc.isInvalid())
+    return false;
+  const SourceManager &SrcMgr = Diag.getSourceManager();
+  while (true) {
+    if (diagHasNoLint(DiagName, DiagLoc, SrcMgr, NoLintErrors, AllowIO,
+                      EnableNoLintBlocks))
+      return true;
+    if (!DiagLoc.isMacroID())
+      return false;
+    DiagLoc = SrcMgr.getImmediateMacroCallerLoc(DiagLoc);
+  }
+  return false;
+}
+
+// Look behind and ahead for '\n' characters. These mark the start and end of
+// this line.
+static std::pair<size_t, size_t> getLineStartAndEnd(StringRef Buffer,
+                                                    size_t From) {
+  size_t StartPos = Buffer.find_last_of('\n', From) + 1;
+  size_t EndPos = std::min(Buffer.find('\n', From), Buffer.size());
+  return std::make_pair(StartPos, EndPos);
+}
+
+// Whether the line has a NOLINT of type = `Type` that can suppress the
+// diagnostic `DiagName`.
+static bool lineHasNoLint(StringRef Buffer,
+                          std::pair<size_t, size_t> LineStartAndEnd,
+                          NoLintType Type, StringRef DiagName) {
+  // Get all NOLINTs on the line.
+  Buffer = Buffer.slice(LineStartAndEnd.first, LineStartAndEnd.second);
+  SmallVector<NoLintToken> NoLints = getNoLints(Buffer);
+
+  // Do any of these NOLINTs match the desired type and diag name?
+  return llvm::any_of(NoLints, [&](const NoLintToken &NoLint) {
+    return NoLint.Type == Type && NoLint.suppresses(DiagName);
+  });
+}
+
+// Whether the provided diagnostic is located within and is suppressible by a
+// block of NOLINT(BEGIN/END) comments.
+static bool withinNoLintBlock(ArrayRef<NoLintBlockToken> NoLintBlocks,
+                              size_t DiagPos, StringRef DiagName) {
+  return llvm::any_of(NoLintBlocks, [&](const NoLintBlockToken &NoLintBlock) {
+    return NoLintBlock.suppresses(DiagPos, DiagName);
+  });
+}
+
+// Get the file contents as a string.
+static Optional<StringRef> getBuffer(const SourceManager &SrcMgr, FileID File,
+                                     bool AllowIO) {
+  return AllowIO ? SrcMgr.getBufferDataOrNone(File)
+                 : SrcMgr.getBufferDataIfLoaded(File);
+}
+
+// We will check for NOLINTs and NOLINTNEXTLINEs first. Checking for these is
+// not so expensive (just need to parse the current and previous lines). Only if
+// that fails do we look for NOLINT(BEGIN/END) blocks (which requires reading
+// the entire file).
+bool NoLintDirectiveHandler::Impl::diagHasNoLint(
+    StringRef DiagName, SourceLocation DiagLoc, const SourceManager &SrcMgr,
+    SmallVectorImpl<tooling::Diagnostic> &NoLintErrors, bool AllowIO,
+    bool EnableNoLintBlocks) {
+  // Translate the diagnostic's SourceLocation to a raw file + offset pair.
+  FileID File;
+  unsigned int Pos = 0;
+  std::tie(File, Pos) = SrcMgr.getDecomposedSpellingLoc(DiagLoc);
+
+  // We will only see NOLINTs in user-authored sources. No point reading the
+  // file if it is a <built-in>.
+  Optional<StringRef> FileName = SrcMgr.getNonBuiltinFilenameForID(File);
+  if (!FileName)
+    return false;
+
+  // Get file contents.
+  Optional<StringRef> Buffer = getBuffer(SrcMgr, File, AllowIO);
+  if (!Buffer)
+    return false;
+
+  // Check if there's a NOLINT on this line.
+  auto ThisLine = getLineStartAndEnd(*Buffer, Pos);
+  if (lineHasNoLint(*Buffer, ThisLine, NoLintType::NoLint, DiagName))
+    return true;
+
+  // Check if there's a NOLINTNEXTLINE on the previous line.
+  if (ThisLine.first > 0) {
+    auto PrevLine = getLineStartAndEnd(*Buffer, ThisLine.first - 1);
+    if (lineHasNoLint(*Buffer, PrevLine, NoLintType::NoLintNextLine, DiagName))
+      return true;
+  }
+
+  // Check if this line is within a NOLINT(BEGIN/END) block.
+  if (!EnableNoLintBlocks)
+    return false;
+
+  // Do we have cached NOLINT block locations for this file?
+  if (Cache.count(*FileName) == 0)
+    // Warning: heavy operation - need to read entire file.
+    generateCache(SrcMgr, *FileName, File, *Buffer, NoLintErrors);
+
+  return withinNoLintBlock(Cache[*FileName], Pos, DiagName);
+}
+
+// Construct a [clang-tidy-nolint] diagnostic to do with the unmatched
+// NOLINT(BEGIN/END) pair.
+static tooling::Diagnostic makeNoLintError(const SourceManager &SrcMgr,
+                                           FileID File,
+                                           const NoLintToken &NoLint) {
+  tooling::Diagnostic Error;
+  Error.DiagLevel = tooling::Diagnostic::Error;
+  Error.DiagnosticName = "clang-tidy-nolint";
+  StringRef Message =
+      (NoLint.Type == NoLintType::NoLintBegin)
+          ? ("unmatched 'NOLINTBEGIN' comment without a subsequent 'NOLINT"
+             "END' comment")
+          : ("unmatched 'NOLINTEND' comment without a previous 'NOLINT"
+             "BEGIN' comment");
+  SourceLocation Loc = SrcMgr.getComposedLoc(File, NoLint.Pos);
+  Error.Message = tooling::DiagnosticMessage(Message, SrcMgr, Loc);
+  return Error;
+}
+
+// Find all NOLINT(BEGIN/END) blocks in a file and store in the cache.
+void NoLintDirectiveHandler::Impl::generateCache(
+    const SourceManager &SrcMgr, StringRef FileName, FileID File,
+    StringRef Buffer, SmallVectorImpl<tooling::Diagnostic> &NoLintErrors) {
+  // Read entire file to get all NOLINTs.
+  SmallVector<NoLintToken> NoLints = getNoLints(Buffer);
+
+  // Match each BEGIN with its corresponding END.
+  SmallVector<NoLintToken> UnmatchedTokens;
+  Cache[FileName] = formNoLintBlocks(std::move(NoLints), UnmatchedTokens);
+
+  // Raise error for any BEGIN/END left over.
+  for (const NoLintToken &NoLint : UnmatchedTokens)
+    NoLintErrors.emplace_back(makeNoLintError(SrcMgr, File, NoLint));
+}
+
+//===----------------------------------------------------------------------===//
+// NoLintDirectiveHandler
+//===----------------------------------------------------------------------===//
+
+NoLintDirectiveHandler::NoLintDirectiveHandler()
+    : PImpl(std::make_unique<Impl>()) {}
+
+NoLintDirectiveHandler::~NoLintDirectiveHandler() = default;
+
+bool NoLintDirectiveHandler::shouldSuppress(
+    DiagnosticsEngine::Level DiagLevel, const Diagnostic &Diag,
+    StringRef DiagName, SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+    bool AllowIO, bool EnableNoLintBlocks) {
+  return PImpl->shouldSuppress(DiagLevel, Diag, DiagName, NoLintErrors, AllowIO,
+                               EnableNoLintBlocks);
+}
+
+} // namespace tidy
+} // namespace clang
diff --git a/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h
new file mode 100644
index 0000000000000..db9fd593283c7
--- /dev/null
+++ b/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h
@@ -0,0 +1,51 @@
+//===-- clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h ----*- C++ *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_NOLINTDIRECTIVEHANDLER_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_NOLINTDIRECTIVEHANDLER_H
+
+#include "clang/Basic/Diagnostic.h"
+#include "llvm/ADT/StringRef.h"
+#include <memory>
+
+namespace clang {
+namespace tooling {
+struct Diagnostic;
+} // namespace tooling
+} // namespace clang
+
+namespace llvm {
+template <typename T> class SmallVectorImpl;
+} // namespace llvm
+
+namespace clang {
+namespace tidy {
+
+/// This class is used to locate NOLINT comments in the file being analyzed, to
+/// decide whether a diagnostic should be suppressed.
+/// This class keeps a cache of every NOLINT comment found so that files do not
+/// have to be repeatedly parsed each time a new diagnostic is raised.
+class NoLintDirectiveHandler {
+public:
+  NoLintDirectiveHandler();
+  ~NoLintDirectiveHandler();
+
+  bool shouldSuppress(DiagnosticsEngine::Level DiagLevel,
+                      const Diagnostic &Diag, llvm::StringRef DiagName,
+                      llvm::SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
+                      bool AllowIO, bool EnableNoLintBlocks);
+
+private:
+  class Impl;
+  std::unique_ptr<Impl> PImpl;
+};
+
+} // namespace tidy
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_NOLINTDIRECTIVEHANDLER_H
diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp
index 9c64c645130bf..86c4b2151243c 100644
--- a/clang-tools-extra/clangd/ParsedAST.cpp
+++ b/clang-tools-extra/clangd/ParsedAST.cpp
@@ -10,6 +10,7 @@
 #include "../clang-tidy/ClangTidyCheck.h"
 #include "../clang-tidy/ClangTidyDiagnosticConsumer.h"
 #include "../clang-tidy/ClangTidyModuleRegistry.h"
+#include "../clang-tidy/NoLintDirectiveHandler.h"
 #include "AST.h"
 #include "Compiler.h"
 #include "Config.h"
@@ -468,12 +469,11 @@ ParsedAST::build(llvm::StringRef Filename, const ParseInputs &Inputs,
           bool IsInsideMainFile =
               Info.hasSourceManager() &&
               isInsideMainFile(Info.getLocation(), Info.getSourceManager());
-          SmallVector<tidy::ClangTidyError, 1> TidySuppressedErrors;
-          if (IsInsideMainFile &&
-              tidy::shouldSuppressDiagnostic(DiagLevel, Info, *CTContext,
-                                             TidySuppressedErrors,
-                                             /*AllowIO=*/false,
-                                             /*EnableNolintBlocks=*/false)) {
+          SmallVector<tooling::Diagnostic, 1> TidySuppressedErrors;
+          if (IsInsideMainFile && CTContext->shouldSuppressDiagnostic(
+                                      DiagLevel, Info, TidySuppressedErrors,
+                                      /*AllowIO=*/false,
+                                      /*EnableNolintBlocks=*/false)) {
             // FIXME: should we expose the suppression error (invalid use of
             // NOLINT comments)?
             return DiagnosticsEngine::Ignored;
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/1st-translation-unit.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/1st-translation-unit.cpp
new file mode 100644
index 0000000000000..d90fc069c9789
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/1st-translation-unit.cpp
@@ -0,0 +1,5 @@
+// NOLINTBEGIN
+class A { A(int i); };
+// NOLINTEND
+
+class B { B(int i); };
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/2nd-translation-unit.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/2nd-translation-unit.cpp
new file mode 100644
index 0000000000000..0497037d31b82
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/Inputs/nolintbeginend/2nd-translation-unit.cpp
@@ -0,0 +1,6 @@
+
+class A { A(int i); };
+
+// NOLINTBEGIN
+class B { B(int i); };
+// NOLINTEND
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp
index 4b80bbe4eb3ca..9ef194232b6b7 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolint.cpp
@@ -28,11 +28,56 @@ class C4 { C4(int i); }; // NOLINT(google-explicit-constructor)
 
 class C5 { C5(int i); }; // NOLINT(some-check, google-explicit-constructor)
 
-class C6 { C6(int i); }; // NOLINT without-brackets-skip-all, another-check
+class C6 { C6(int i); }; // NOLINT without-brackets-skip-all
 
-class C7 { C7(int i); }; // NOLINTNEXTLINE doesn't get misconstrued as a NOLINT
+// Other NOLINT* types (e.g. NEXTLINE) should not be misconstrued as a NOLINT:
+class C7 { C7(int i); }; // NOLINTNEXTLINE
 // CHECK-MESSAGES: :[[@LINE-1]]:12: warning: single-argument constructors must be marked explicit
 
+// NOLINT must be UPPERCASE:
+// NOLINTnextline
+class C8 { C8(int i); }; // nolint
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: single-argument constructors must be marked explicit
+
+// Unrecognized marker:
+// NOLINTNEXTLINEXYZ
+class C9 { C9(int i); }; // NOLINTXYZ
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: single-argument constructors must be marked explicit
+
+// C-style comments are supported:
+class C10 { C10(int i); }; /* NOLINT */
+/* NOLINT */ class C11 { C11(int i); };
+
+// Multiple NOLINTs in the same comment:
+class C12 { C12(int i); }; // NOLINT(some-other-check) NOLINT(google-explicit-constructor)
+class C13 { C13(int i); }; // NOLINT(google-explicit-constructor) NOLINT(some-other-check)
+class C14 { C14(int i); }; // NOLINTNEXTLINE(some-other-check) NOLINT(google-explicit-constructor)
+
+// NOLINTNEXTLINE(google-explicit-constructor) NOLINT(some-other-check)
+class C15 { C15(int i); }; 
+
+// Any text after a NOLINT expression is treated as a comment:
+class C16 { C16(int i); }; // NOLINT: suppress check because <reason>
+class C17 { C17(int i); }; // NOLINT(google-explicit-constructor): suppress check because <reason>
+
+// NOLINT must appear in its entirety on one line:
+class C18 { C18(int i); }; /* NOL
+INT */
+// CHECK-MESSAGES: :[[@LINE-2]]:13: warning: single-argument constructors must be marked explicit
+
+/* NO
+LINT */ class C19 { C19(int i); };
+// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: single-argument constructors must be marked explicit
+
+// Spaces between items in the comma-separated check list are ignroed:
+class C20 { C20(int i); }; // NOLINT( google-explicit-constructor )
+class C21 { C21(int i); }; // NOLINT( google-explicit-constructor , some-other-check )
+class C22 { C22(int i); }; // NOLINT(google-explicit- constructor)
+// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: single-argument constructors must be marked explicit
+
+// If there is a space between "NOLINT" and the bracket, it is treated as a regular NOLINT: 
+class C23 { C23(int i); }; // NOLINT (some-other-check)
+
 void f() {
   int i;
 // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: unused variable 'i' [clang-diagnostic-unused-variable]
@@ -71,4 +116,4 @@ int array2[10];  // NOLINT(cppcoreguidelines-avoid-c-arrays)
 int array3[10];  // NOLINT(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays)
 int array4[10];  // NOLINT(*-avoid-c-arrays)
 
-// CHECK-MESSAGES: Suppressed 23 warnings (23 NOLINT)
+// CHECK-MESSAGES: Suppressed 34 warnings (34 NOLINT)
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-LIFO.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-LIFO.cpp
new file mode 100644
index 0000000000000..ee5b1cca755ab
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-LIFO.cpp
@@ -0,0 +1,19 @@
+// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor,google-readability-casting' 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(google-explicit-constructor)
+// NOLINTBEGIN(google-readability-casting)
+class A { A(int i); };
+auto Num = (unsigned int)(-1);
+// NOLINTEND(google-explicit-constructor)
+// NOLINTEND(google-readability-casting)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-10]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-11]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-10]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-glob.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-glob.cpp
new file mode 100644
index 0000000000000..90b9fa9883024
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-glob.cpp
@@ -0,0 +1,16 @@
+// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor' 2>&1 | FileCheck %s
+
+// NOLINTBEGIN
+class B { B(int i); };
+// NOLINTEND(*)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-specific.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-specific.cpp
new file mode 100644
index 0000000000000..6ffa914e4ef0b
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-all-end-specific.cpp
@@ -0,0 +1,16 @@
+// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor' 2>&1 | FileCheck %s
+
+// NOLINTBEGIN
+class A { A(int i); };
+// NOLINTEND(google-explicit-constructor)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-all.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-all.cpp
new file mode 100644
index 0000000000000..3697d5c11e2e2
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-all.cpp
@@ -0,0 +1,16 @@
+// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(*)
+class B { B(int i); };
+// NOLINTEND
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-specific.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-specific.cpp
new file mode 100644
index 0000000000000..5bdb117f20242
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-glob-end-specific.cpp
@@ -0,0 +1,16 @@
+// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(*)
+class B { B(int i); };
+// NOLINTEND(google-explicit-constructor)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-global-end-specific.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-global-end-specific.cpp
deleted file mode 100644
index 01e69ddf6352a..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-global-end-specific.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor' 2>&1 | FileCheck %s
-
-// NOLINTBEGIN
-class A { A(int i); };
-// NOLINTEND(google-explicit-constructor)
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-6]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-6]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
-
-// NOLINTBEGIN
-class B { B(int i); };
-// NOLINTEND(*)
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-6]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-6]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-multiple-end-single.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-multiple-end-single.cpp
new file mode 100644
index 0000000000000..150913ce0ec69
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-multiple-end-single.cpp
@@ -0,0 +1,22 @@
+// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor,google-readability-casting' 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(google-explicit-constructor,google-readability-casting)
+class B { B(int i); };
+// NOLINTEND(google-explicit-constructor)
+auto Num2 = (unsigned int)(-1);
+// NOLINTEND(google-readability-casting)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-11]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-11]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-13]]:13: warning: C-style casts are discouraged; use static_cast
+// CHECK: :[[@LINE-13]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-single-end-multiple.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-single-end-multiple.cpp
new file mode 100644
index 0000000000000..f9a915c890cbb
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-single-end-multiple.cpp
@@ -0,0 +1,22 @@
+// RUN: not clang-tidy %s --checks='-*,google-explicit-constructor,google-readability-casting' 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(google-explicit-constructor)
+// NOLINTBEGIN(google-readability-casting)
+class B { B(int i); };
+auto Num2 = (unsigned int)(-1);
+// NOLINTEND(google-explicit-constructor,google-readability-casting)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-11]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-13]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-13]]:13: warning: C-style casts are discouraged; use static_cast
+// CHECK: :[[@LINE-13]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-all.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-all.cpp
new file mode 100644
index 0000000000000..decfe2dd5a4c1
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-all.cpp
@@ -0,0 +1,16 @@
+// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(google-explicit-constructor)
+class A { A(int i); };
+// NOLINTEND
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-glob.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-glob.cpp
new file mode 100644
index 0000000000000..a9f904ccce138
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-glob.cpp
@@ -0,0 +1,16 @@
+// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
+
+// NOLINTBEGIN(google-explicit-constructor)
+class A { A(int i); };
+// NOLINTEND(*)
+
+// Note: the expected output has been split over several lines so that clang-tidy
+//       does not see the "no lint" suppression comment and mistakenly assume it
+//       is meant for itself.
+// CHECK: :[[@LINE-7]]:4: error: unmatched 'NOLIN
+// CHECK: TBEGIN' comment without a subsequent 'NOLIN
+// CHECK: TEND' comment [clang-tidy-nolint]
+// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-global.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-global.cpp
deleted file mode 100644
index c6fcfddd09ba8..0000000000000
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-begin-specific-end-global.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-// RUN: not clang-tidy %s --checks="-*,google-explicit-constructor" 2>&1 | FileCheck %s
-
-// NOLINTBEGIN(google-explicit-constructor)
-class A { A(int i); };
-// NOLINTEND
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-6]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-6]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
-
-// NOLINTBEGIN(*)
-class B { B(int i); };
-// NOLINTEND
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-6]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-6]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-check-names.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-check-names.cpp
index 957b0b341a3cc..8d7786fb8c712 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-check-names.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-check-names.cpp
@@ -16,24 +16,3 @@ auto Num = (unsigned int)(-1);
 // CHECK: :[[@LINE-10]]:4: error: unmatched 'NOLIN
 // CHECK: TEND' comment without a previous 'NOLIN
 // CHECK: TBEGIN' comment [clang-tidy-nolint]
-
-// NOLINTBEGIN(google-explicit-constructor,google-readability-casting)
-class B { B(int i); };
-// NOLINTEND(google-explicit-constructor)
-auto Num2 = (unsigned int)(-1);
-// NOLINTEND(google-readability-casting)
-
-// Note: the expected output has been split over several lines so that clang-tidy
-//       does not see the "no lint" suppression comment and mistakenly assume it
-//       is meant for itself.
-// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
-// CHECK: TBEGIN' comment without a subsequent 'NOLIN
-// CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-11]]:11: warning: single-argument constructors must be marked explicit
-// CHECK: :[[@LINE-11]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-13]]:13: warning: C-style casts are discouraged; use static_cast
-// CHECK: :[[@LINE-13]]:4: error: unmatched 'NOLIN
-// CHECK: TEND' comment without a previous 'NOLIN
-// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-delims.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-delims.cpp
index 7ed5fb820e509..4b8947e369f92 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-delims.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-mismatched-delims.cpp
@@ -11,4 +11,3 @@ class A { A(int i); };
 // CHECK: :[[@LINE-8]]:4: error: unmatched 'NOLIN
 // CHECK: TBEGIN' comment without a subsequent 'NOLIN
 // CHECK: TEND' comment [clang-tidy-nolint]
-// CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-multiple-TUs.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-multiple-TUs.cpp
new file mode 100644
index 0000000000000..773e023cb22c9
--- /dev/null
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-multiple-TUs.cpp
@@ -0,0 +1,6 @@
+// RUN: clang-tidy %S/Inputs/nolintbeginend/1st-translation-unit.cpp %S/Inputs/nolintbeginend/2nd-translation-unit.cpp --checks='-*,google-explicit-constructor' 2>&1 | FileCheck %s
+
+// CHECK-NOT: 1st-translation-unit.cpp:2:11: warning: single-argument constructors must be marked explicit
+// CHECK: 1st-translation-unit.cpp:5:11: warning: single-argument constructors must be marked explicit
+// CHECK: 2nd-translation-unit.cpp:2:11: warning: single-argument constructors must be marked explicit
+// CHECK-NOT: 2nd-translation-unit.cpp:5:11: warning: single-argument constructors must be marked explicit
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-typo-in-check-name.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-typo-in-check-name.cpp
index 0f2f9994c1fa0..57e1ff331c8ba 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-typo-in-check-name.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend-typo-in-check-name.cpp
@@ -11,3 +11,6 @@ class A { A(int i); };
 // CHECK: TBEGIN' comment without a subsequent 'NOLIN
 // CHECK: TEND' comment [clang-tidy-nolint]
 // CHECK: :[[@LINE-9]]:11: warning: single-argument constructors must be marked explicit
+// CHECK: :[[@LINE-9]]:4: error: unmatched 'NOLIN
+// CHECK: TEND' comment without a previous 'NOLIN
+// CHECK: TBEGIN' comment [clang-tidy-nolint]
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend.cpp
index 8379128530180..ae97bc5b51441 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/nolintbeginend.cpp
@@ -58,30 +58,24 @@ class C6 { C6(int i); };
 // NOLINTEND(some-other-check)
 // NOLINTEND(google-explicit-constructor)
 
-// NOLINTBEGIN(google-explicit-constructor)
-// NOLINTBEGIN(some-other-check)
-class C7 { C7(int i); };
-// NOLINTEND(google-explicit-constructor)
-// NOLINTEND(some-other-check)
-
 // NOLINTBEGIN(google-explicit-constructor)
 // NOLINTBEGIN
-class C8 { C8(int i); };
+class C7 { C7(int i); };
 // NOLINTEND
 // NOLINTEND(google-explicit-constructor)
 
 // NOLINTBEGIN
 // NOLINTBEGIN(google-explicit-constructor)
-class C9 { C9(int i); };
+class C8 { C8(int i); };
 // NOLINTEND(google-explicit-constructor)
 // NOLINTEND
 
 // NOLINTBEGIN(not-closed-bracket-is-treated-as-skip-all
-class C10 { C10(int i); };
+class C9 { C9(int i); };
 // NOLINTEND(not-closed-bracket-is-treated-as-skip-all
 
 // NOLINTBEGIN without-brackets-skip-all, another-check
-class C11 { C11(int i); };
+class C10 { C10(int i); };
 // NOLINTEND without-brackets-skip-all, another-check
 
 #define MACRO(X) class X { X(int i); };
@@ -114,28 +108,28 @@ MACRO_WRAPPED_WITH_NO_LINT
 MACRO_NO_LINT_INSIDE_MACRO
 
 // NOLINTBEGIN(google*)
-class C12 { C12(int i); };
+class C11 { C11(int i); };
 // NOLINTEND(google*)
 
 // NOLINTBEGIN(*explicit-constructor)
-class C15 { C15(int i); };
+class C12 { C12(int i); };
 // NOLINTEND(*explicit-constructor)
 
 // NOLINTBEGIN(*explicit*)
-class C16 { C16(int i); };
+class C13 { C13(int i); };
 // NOLINTEND(*explicit*)
 
 // NOLINTBEGIN(-explicit-constructor)
-class C17 { C17(int x); };
+class C14 { C14(int x); };
 // CHECK-MESSAGES: :[[@LINE-1]]:13: warning: single-argument constructors must be marked explicit
 // NOLINTEND(-explicit-constructor)
 
 // NOLINTBEGIN(google*,-google*)
-class C18 { C18(int x); };
+class C15 { C15(int x); };
 // NOLINTEND(google*,-google*)
 
 // NOLINTBEGIN(*,-google*)
-class C19 { C19(int x); };
+class C16 { C16(int x); };
 // NOLINTEND(*,-google*)
 
 int array1[10];
@@ -154,4 +148,4 @@ int array3[10];
 int array4[10];
 // NOLINTEND(*-avoid-c-arrays)
 
-// CHECK-MESSAGES: Suppressed 27 warnings (27 NOLINT).
+// CHECK-MESSAGES: Suppressed 26 warnings (26 NOLINT).

From 66bd7ebdf76ab1758469145a5194b6fa833dd3a9 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Mon, 24 Jan 2022 12:35:18 +0000
Subject: [PATCH 705/946] [SVE] Use DUPM to handling more splat immediate
 cases.

NOTE: Only considers i64 based vectors at this time because smaller
element types require extra isel operand parsing.

Differential Revision: https://reviews.llvm.org/D118040
---
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  3 +++
 .../test/CodeGen/AArch64/sve-int-arith-imm.ll | 15 +++++--------
 .../AArch64/sve-intrinsics-int-arith-imm.ll   |  9 +++-----
 llvm/test/CodeGen/AArch64/sve-vector-splat.ll | 21 +++++++++++++------
 llvm/test/CodeGen/AArch64/sve-vselect-imm.ll  | 10 ++++-----
 llvm/test/CodeGen/AArch64/sve2-int-mul.ll     |  3 +--
 6 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 466c785557805..359f5afa3b54c 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1708,6 +1708,9 @@ multiclass sve_int_dup_mask_imm<string asm> {
                   (!cast<Instruction>(NAME) ZPR32:$Zd, sve_preferred_logical_imm32:$imm), 6>;
   def : InstAlias<"mov $Zd, $imm",
                   (!cast<Instruction>(NAME) ZPR64:$Zd, sve_preferred_logical_imm64:$imm), 5>;
+
+  def : Pat<(nxv2i64 (AArch64dup (i64 logical_imm64:$imm))),
+            (!cast<Instruction>(NAME) logical_imm64:$imm)>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll
index 4a355e929f3fc..3f0aa281b3a16 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll
@@ -133,9 +133,8 @@ define <vscale x 2 x i64> @smax_i64_neg(<vscale x 2 x i64> %a) {
 define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: smax_i64_out_of_range:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #65535
+; CHECK-NEXT:    mov z1.d, #65535 // =0xffff
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.d, x8
 ; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
@@ -277,9 +276,8 @@ define <vscale x 2 x i64> @smin_i64_neg(<vscale x 2 x i64> %a) {
 define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: smin_i64_out_of_range:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #65535
+; CHECK-NEXT:    mov z1.d, #65535 // =0xffff
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.d, x8
 ; CHECK-NEXT:    smin z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
@@ -385,9 +383,8 @@ define <vscale x 2 x i64> @umax_i64_pos(<vscale x 2 x i64> %a) {
 define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: umax_i64_out_of_range:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #65535
+; CHECK-NEXT:    mov z1.d, #65535 // =0xffff
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.d, x8
 ; CHECK-NEXT:    umax z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
@@ -493,9 +490,8 @@ define <vscale x 2 x i64> @umin_i64_pos(<vscale x 2 x i64> %a) {
 define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: umin_i64_out_of_range:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #65535
+; CHECK-NEXT:    mov z1.d, #65535 // =0xffff
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.d, x8
 ; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
@@ -627,9 +623,8 @@ define <vscale x 4 x i32> @mul_i32_range(<vscale x 4 x i32> %a) {
 define <vscale x 2 x i64> @mul_i64_range(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: mul_i64_range:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #255
+; CHECK-NEXT:    mov z1.d, #255 // =0xff
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.d, x8
 ; CHECK-NEXT:    mul z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
index 30309dbbfe3cc..8b57622c749dc 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
@@ -514,9 +514,8 @@ define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a) {
 define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: smax_i64_out_of_range:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #65535
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mov z1.d, #65535 // =0xffff
 ; CHECK-NEXT:    smax z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -832,9 +831,8 @@ define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a) {
 define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: umax_i64_out_of_range:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #65535
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mov z1.d, #65535 // =0xffff
 ; CHECK-NEXT:    umax z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
@@ -991,9 +989,8 @@ define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a) {
 define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: umin_i64_out_of_range:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #65535
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mov z1.d, #65535 // =0xffff
 ; CHECK-NEXT:    umin z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
index 58821738f3a3a..641ee2f308747 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
@@ -73,8 +73,8 @@ define <vscale x 4 x i32> @sve_splat_4xi32_imm() {
   ret <vscale x 4 x i32> %splat
 }
 
-define <vscale x 2 x i64> @sve_splat_2xi64_imm() {
-; CHECK-LABEL: sve_splat_2xi64_imm:
+define <vscale x 2 x i64> @sve_splat_2xi64_dup_imm() {
+; CHECK-LABEL: sve_splat_2xi64_dup_imm:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov z0.d, #1 // =0x1
 ; CHECK-NEXT:    ret
@@ -83,6 +83,16 @@ define <vscale x 2 x i64> @sve_splat_2xi64_imm() {
   ret <vscale x 2 x i64> %splat
 }
 
+define <vscale x 2 x i64> @sve_splat_2xi64_dupm_imm() {
+; CHECK-LABEL: sve_splat_2xi64_dupm_imm:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, #0xffff00000000
+; CHECK-NEXT:    ret
+  %ins = insertelement <vscale x 2 x i64> undef, i64 281470681743360, i32 0 ; 0xffff00000000
+  %splat = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+  ret <vscale x 2 x i64> %splat
+}
+
 ;; Promote splats of smaller illegal integer vector types
 
 define <vscale x 2 x i8> @sve_splat_2xi8(i8 %val) {
@@ -173,8 +183,7 @@ define <vscale x 2 x i32> @sve_splat_2xi32(i32 %val) {
 define <vscale x 2 x i32> @sve_splat_2xi32_imm() {
 ; CHECK-LABEL: sve_splat_2xi32_imm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #-1
-; CHECK-NEXT:    mov z0.d, x8
+; CHECK-NEXT:    mov z0.d, #0xffffffff
 ; CHECK-NEXT:    ret
   %ins = insertelement <vscale x 2 x i32> undef, i32 -1, i32 0
   %splat = shufflevector <vscale x 2 x i32> %ins, <vscale x 2 x i32> undef, <vscale x 2 x i32> zeroinitializer
@@ -530,9 +539,9 @@ define <vscale x 4 x float> @splat_nxv4f32_imm_out_of_range() {
 define <vscale x 2 x double> @splat_nxv2f64_imm_out_of_range() {
 ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    adrp x8, .LCPI50_0
+; CHECK-NEXT:    adrp x8, .LCPI51_0
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    add x8, x8, :lo12:.LCPI50_0
+; CHECK-NEXT:    add x8, x8, :lo12:.LCPI51_0
 ; CHECK-NEXT:    ld1rd { z0.d }, p0/z, [x8]
 ; CHECK-NEXT:    ret
   %1 = insertelement <vscale x 2 x double> undef, double 3.33, i32 0
diff --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
index 63d95f3d19bfe..18024ebbb30f5 100644
--- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
@@ -144,10 +144,9 @@ ret <vscale x 4 x i32> %sel
 define <vscale x 2 x i64> @sel_64_illegal_wrong_extension(<vscale x 2 x i1> %p) {
 ; CHECK-LABEL: sel_64_illegal_wrong_extension:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #128
-; CHECK-NEXT:    mov z1.d, #0 // =0x0
-; CHECK-NEXT:    mov z0.d, x8
-; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
+; CHECK-NEXT:    mov z0.d, #0 // =0x0
+; CHECK-NEXT:    mov z1.d, #128 // =0x80
+; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
 %vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
 %sel = select <vscale x 2 x i1> %p, <vscale x 2 x i64> %vec, <vscale x 2 x i64> zeroinitializer
@@ -370,8 +369,7 @@ ret <vscale x 4 x i32> %sel
 define <vscale x 2 x i64> @sel_merge_64_illegal_wrong_extension(<vscale x 2 x i1> %p, <vscale x 2 x i64> %in) {
 ; CHECK-LABEL: sel_merge_64_illegal_wrong_extension:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #128
-; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mov z1.d, #128 // =0x80
 ; CHECK-NEXT:    mov z0.d, p0/m, z1.d
 ; CHECK-NEXT:    ret
 %vec = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 128, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll
index 57d5775f7c39e..4bf30e1dbbd79 100644
--- a/llvm/test/CodeGen/AArch64/sve2-int-mul.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-int-mul.ll
@@ -59,8 +59,7 @@ define <vscale x 4 x i32> @mul_i32_imm_neg(<vscale x 4 x i32> %a) {
 define <vscale x 2 x i64> @mul_i64_imm(<vscale x 2 x i64> %a) {
 ; CHECK-LABEL: mul_i64_imm:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #255
-; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mov z1.d, #255 // =0xff
 ; CHECK-NEXT:    mul z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
   %elt = insertelement <vscale x 2 x i64> undef, i64 255, i32 0

From 4fa1ad05215d6242c4f195139fd4af7450cfd18e Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Wed, 26 Jan 2022 13:02:48 +0100
Subject: [PATCH 706/946] [lldb] Convert POSIXLog to use the new API

---
 .../Process/FreeBSD/NativeProcessFreeBSD.cpp  | 30 +++++-----
 .../NativeRegisterContextFreeBSD_arm64.cpp    |  2 +-
 .../Process/FreeBSD/NativeThreadFreeBSD.cpp   |  8 +--
 .../Plugins/Process/Linux/IntelPTManager.cpp  |  8 +--
 .../Process/Linux/NativeProcessLinux.cpp      | 58 +++++++++----------
 .../Linux/NativeRegisterContextLinux.cpp      |  4 +-
 .../Linux/NativeRegisterContextLinux_arm.cpp  | 26 ++++-----
 .../NativeRegisterContextLinux_ppc64le.cpp    | 16 ++---
 .../Plugins/Process/Linux/SingleStepCheck.cpp |  6 +-
 .../Process/NetBSD/NativeProcessNetBSD.cpp    | 30 +++++-----
 .../Process/NetBSD/NativeThreadNetBSD.cpp     |  6 +-
 .../Plugins/Process/POSIX/ProcessPOSIXLog.h   | 10 ----
 12 files changed, 97 insertions(+), 107 deletions(-)

diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.cpp b/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.cpp
index 25fc7028216e3..21c9ead0eca41 100644
--- a/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.cpp
+++ b/lldb/source/Plugins/Process/FreeBSD/NativeProcessFreeBSD.cpp
@@ -54,7 +54,7 @@ llvm::Expected<std::unique_ptr<NativeProcessProtocol>>
 NativeProcessFreeBSD::Factory::Launch(ProcessLaunchInfo &launch_info,
                                       NativeDelegate &native_delegate,
                                       MainLoop &mainloop) const {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
 
   Status status;
   ::pid_t pid = ProcessLauncherPosixFork()
@@ -108,7 +108,7 @@ llvm::Expected<std::unique_ptr<NativeProcessProtocol>>
 NativeProcessFreeBSD::Factory::Attach(
     lldb::pid_t pid, NativeProcessProtocol::NativeDelegate &native_delegate,
     MainLoop &mainloop) const {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "pid = {0:x}", pid);
 
   // Retrieve the architecture for the running process.
@@ -171,7 +171,7 @@ void NativeProcessFreeBSD::MonitorCallback(lldb::pid_t pid, int signal) {
 }
 
 void NativeProcessFreeBSD::MonitorExited(lldb::pid_t pid, WaitStatus status) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
 
   LLDB_LOG(log, "got exit signal({0}) , pid = {1}", status, pid);
 
@@ -194,7 +194,7 @@ void NativeProcessFreeBSD::MonitorSIGSTOP(lldb::pid_t pid) {
 }
 
 void NativeProcessFreeBSD::MonitorSIGTRAP(lldb::pid_t pid) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   struct ptrace_lwpinfo info;
 
   const auto siginfo_err = PtraceWrapper(PT_LWPINFO, pid, &info, sizeof(info));
@@ -357,7 +357,7 @@ void NativeProcessFreeBSD::MonitorSIGTRAP(lldb::pid_t pid) {
 }
 
 void NativeProcessFreeBSD::MonitorSignal(lldb::pid_t pid, int signal) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   struct ptrace_lwpinfo info;
 
   const auto siginfo_err = PtraceWrapper(PT_LWPINFO, pid, &info, sizeof(info));
@@ -386,7 +386,7 @@ void NativeProcessFreeBSD::MonitorSignal(lldb::pid_t pid, int signal) {
 
 Status NativeProcessFreeBSD::PtraceWrapper(int req, lldb::pid_t pid, void *addr,
                                            int data, int *result) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PTRACE));
+  Log *log = GetLog(POSIXLog::Ptrace);
   Status error;
   int ret;
 
@@ -430,7 +430,7 @@ NativeProcessFreeBSD::GetSoftwareBreakpointTrapOpcode(size_t size_hint) {
 }
 
 Status NativeProcessFreeBSD::Resume(const ResumeActionList &resume_actions) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "pid {0}", GetID());
 
   Status ret;
@@ -527,7 +527,7 @@ Status NativeProcessFreeBSD::Signal(int signo) {
 Status NativeProcessFreeBSD::Interrupt() { return Halt(); }
 
 Status NativeProcessFreeBSD::Kill() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "pid {0}", GetID());
 
   Status error;
@@ -614,7 +614,7 @@ Status NativeProcessFreeBSD::GetMemoryRegionInfo(lldb::addr_t load_addr,
 }
 
 Status NativeProcessFreeBSD::PopulateMemoryRegionCache() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   // If our cache is empty, pull the latest.  There should always be at least
   // one memory region if memory region handling is supported.
   if (!m_mem_region_cache.empty()) {
@@ -741,7 +741,7 @@ NativeProcessFreeBSD::GetFileLoadAddress(const llvm::StringRef &file_name,
 }
 
 void NativeProcessFreeBSD::SigchldHandler() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   int status;
   ::pid_t wait_pid =
       llvm::sys::RetryAfterSignal(-1, waitpid, GetID(), &status, WNOHANG);
@@ -786,7 +786,7 @@ bool NativeProcessFreeBSD::HasThreadNoLock(lldb::tid_t thread_id) {
 }
 
 NativeThreadFreeBSD &NativeProcessFreeBSD::AddThread(lldb::tid_t thread_id) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+  Log *log = GetLog(POSIXLog::Thread);
   LLDB_LOG(log, "pid {0} adding thread with tid {1}", GetID(), thread_id);
 
   assert(thread_id > 0);
@@ -802,7 +802,7 @@ NativeThreadFreeBSD &NativeProcessFreeBSD::AddThread(lldb::tid_t thread_id) {
 }
 
 void NativeProcessFreeBSD::RemoveThread(lldb::tid_t thread_id) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+  Log *log = GetLog(POSIXLog::Thread);
   LLDB_LOG(log, "pid {0} removing thread with tid {1}", GetID(), thread_id);
 
   assert(thread_id > 0);
@@ -854,7 +854,7 @@ Status NativeProcessFreeBSD::ReadMemory(lldb::addr_t addr, void *buf,
   unsigned char *dst = static_cast<unsigned char *>(buf);
   struct ptrace_io_desc io;
 
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_MEMORY));
+  Log *log = GetLog(POSIXLog::Memory);
   LLDB_LOG(log, "addr = {0}, buf = {1}, size = {2}", addr, buf, size);
 
   bytes_read = 0;
@@ -882,7 +882,7 @@ Status NativeProcessFreeBSD::WriteMemory(lldb::addr_t addr, const void *buf,
   Status error;
   struct ptrace_io_desc io;
 
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_MEMORY));
+  Log *log = GetLog(POSIXLog::Memory);
   LLDB_LOG(log, "addr = {0}, buf = {1}, size = {2}", addr, buf, size);
 
   bytes_written = 0;
@@ -962,7 +962,7 @@ bool NativeProcessFreeBSD::SupportHardwareSingleStepping() const {
 
 void NativeProcessFreeBSD::MonitorClone(::pid_t child_pid, bool is_vfork,
                                         NativeThreadFreeBSD &parent_thread) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "fork, child_pid={0}", child_pid);
 
   int status;
diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm64.cpp b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm64.cpp
index 4578138a89b39..143d94069bc61 100644
--- a/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm64.cpp
+++ b/lldb/source/Plugins/Process/FreeBSD/NativeRegisterContextFreeBSD_arm64.cpp
@@ -227,7 +227,7 @@ llvm::Error NativeRegisterContextFreeBSD_arm64::CopyHardwareWatchpointsFrom(
 
 llvm::Error NativeRegisterContextFreeBSD_arm64::ReadHardwareDebugInfo() {
 #ifdef LLDB_HAS_FREEBSD_WATCHPOINT
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_REGISTERS));
+  Log *log = GetLog(POSIXLog::Registers);
 
   // we're fully stateful, so no need to reread control registers ever
   if (m_read_dbreg)
diff --git a/lldb/source/Plugins/Process/FreeBSD/NativeThreadFreeBSD.cpp b/lldb/source/Plugins/Process/FreeBSD/NativeThreadFreeBSD.cpp
index 20e9d6cfcd7f4..a75668f3b5c7f 100644
--- a/lldb/source/Plugins/Process/FreeBSD/NativeThreadFreeBSD.cpp
+++ b/lldb/source/Plugins/Process/FreeBSD/NativeThreadFreeBSD.cpp
@@ -75,7 +75,7 @@ Status NativeThreadFreeBSD::Suspend() {
 
 void NativeThreadFreeBSD::SetStoppedBySignal(uint32_t signo,
                                              const siginfo_t *info) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+  Log *log = GetLog(POSIXLog::Thread);
   LLDB_LOG(log, "tid = {0} in called with signal {1}", GetID(), signo);
 
   SetStopped();
@@ -178,7 +178,7 @@ void NativeThreadFreeBSD::SetStepping() {
 }
 
 std::string NativeThreadFreeBSD::GetName() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+  Log *log = GetLog(POSIXLog::Thread);
 
   std::vector<struct kinfo_proc> kp;
   int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
@@ -213,7 +213,7 @@ lldb::StateType NativeThreadFreeBSD::GetState() { return m_state; }
 
 bool NativeThreadFreeBSD::GetStopReason(ThreadStopInfo &stop_info,
                                         std::string &description) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+  Log *log = GetLog(POSIXLog::Thread);
   description.clear();
 
   switch (m_state) {
@@ -316,7 +316,7 @@ NativeThreadFreeBSD::CopyWatchpointsFrom(NativeThreadFreeBSD &source) {
 
 llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
 NativeThreadFreeBSD::GetSiginfo() const {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
 
   struct ptrace_lwpinfo info;
   const auto siginfo_err = NativeProcessFreeBSD::PtraceWrapper(
diff --git a/lldb/source/Plugins/Process/Linux/IntelPTManager.cpp b/lldb/source/Plugins/Process/Linux/IntelPTManager.cpp
index c5575fb30c98a..794689b1d3791 100644
--- a/lldb/source/Plugins/Process/Linux/IntelPTManager.cpp
+++ b/lldb/source/Plugins/Process/Linux/IntelPTManager.cpp
@@ -182,7 +182,7 @@ Error IntelPTThreadTrace::StartTrace(lldb::pid_t pid, lldb::tid_t tid,
 #ifndef PERF_ATTR_SIZE_VER5
   llvm_unreachable("Intel PT Linux perf event not supported");
 #else
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PTRACE));
+  Log *log = GetLog(POSIXLog::Ptrace);
 
   m_tid = tid;
   LLDB_LOG(log, "called thread id {0}", tid);
@@ -348,7 +348,7 @@ IntelPTThreadTrace::ReadPerfTraceAux(llvm::MutableArrayRef<uint8_t> &buffer,
   // in the man page of perf_event_open.
   ioctl(*m_fd, PERF_EVENT_IOC_DISABLE);
 
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PTRACE));
+  Log *log = GetLog(POSIXLog::Ptrace);
   Status error;
   uint64_t head = m_mmap_meta->aux_head;
 
@@ -381,7 +381,7 @@ IntelPTThreadTrace::ReadPerfTraceData(llvm::MutableArrayRef<uint8_t> &buffer,
 #ifndef PERF_ATTR_SIZE_VER5
   llvm_unreachable("perf event not supported");
 #else
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PTRACE));
+  Log *log = GetLog(POSIXLog::Ptrace);
   uint64_t bytes_remaining = buffer.size();
   Status error;
 
@@ -427,7 +427,7 @@ void IntelPTThreadTrace::ReadCyclicBuffer(llvm::MutableArrayRef<uint8_t> &dst,
                                           llvm::MutableArrayRef<uint8_t> src,
                                           size_t src_cyc_index, size_t offset) {
 
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PTRACE));
+  Log *log = GetLog(POSIXLog::Ptrace);
 
   if (dst.empty() || src.empty()) {
     dst = dst.drop_back(dst.size());
diff --git a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp
index b1ac247346a79..e4dec0e00ebe4 100644
--- a/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp
+++ b/lldb/source/Plugins/Process/Linux/NativeProcessLinux.cpp
@@ -79,7 +79,7 @@ static bool ProcessVmReadvSupported() {
   static llvm::once_flag flag;
 
   llvm::call_once(flag, [] {
-    Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+    Log *log = GetLog(POSIXLog::Process);
 
     uint32_t source = 0x47424742;
     uint32_t dest = 0;
@@ -108,7 +108,7 @@ static bool ProcessVmReadvSupported() {
 }
 
 static void MaybeLogLaunchInfo(const ProcessLaunchInfo &info) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   if (!log)
     return;
 
@@ -143,7 +143,7 @@ static void DisplayBytes(StreamString &s, void *bytes, uint32_t count) {
 }
 
 static void PtraceDisplayBytes(int &req, void *data, size_t data_size) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PTRACE));
+  Log *log = GetLog(POSIXLog::Ptrace);
   if (!log)
     return;
   StreamString buf;
@@ -218,7 +218,7 @@ llvm::Expected<std::unique_ptr<NativeProcessProtocol>>
 NativeProcessLinux::Factory::Launch(ProcessLaunchInfo &launch_info,
                                     NativeDelegate &native_delegate,
                                     MainLoop &mainloop) const {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
 
   MaybeLogLaunchInfo(launch_info);
 
@@ -270,7 +270,7 @@ llvm::Expected<std::unique_ptr<NativeProcessProtocol>>
 NativeProcessLinux::Factory::Attach(
     lldb::pid_t pid, NativeProcessProtocol::NativeDelegate &native_delegate,
     MainLoop &mainloop) const {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "pid = {0:x}", pid);
 
   // Retrieve the architecture for the running process.
@@ -336,7 +336,7 @@ NativeProcessLinux::NativeProcessLinux(::pid_t pid, int terminal_fd,
 }
 
 llvm::Expected<std::vector<::pid_t>> NativeProcessLinux::Attach(::pid_t pid) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
 
   Status status;
   // Use a map to keep track of the threads which we have attached/need to
@@ -501,7 +501,7 @@ void NativeProcessLinux::MonitorCallback(NativeThreadLinux &thread,
 }
 
 void NativeProcessLinux::WaitForCloneNotification(::pid_t pid) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
 
   // The PID is not tracked yet, let's wait for it to appear.
   int status = -1;
@@ -523,7 +523,7 @@ void NativeProcessLinux::WaitForCloneNotification(::pid_t pid) {
 
 void NativeProcessLinux::MonitorSIGTRAP(const siginfo_t &info,
                                         NativeThreadLinux &thread) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   const bool is_main_thread = (thread.GetID() == GetID());
 
   assert(info.si_signo == SIGTRAP && "Unexpected child signal!");
@@ -704,7 +704,7 @@ void NativeProcessLinux::MonitorSIGTRAP(const siginfo_t &info,
 }
 
 void NativeProcessLinux::MonitorTrace(NativeThreadLinux &thread) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "received trace event, pid = {0}", thread.GetID());
 
   // This thread is currently stopped.
@@ -750,7 +750,7 @@ void NativeProcessLinux::MonitorSignal(const siginfo_t &info,
   const int signo = info.si_signo;
   const bool is_from_llgs = info.si_pid == getpid();
 
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
 
   // POSIX says that process behaviour is undefined after it ignores a SIGFPE,
   // SIGILL, SIGSEGV, or SIGBUS *unless* that signal was generated by a kill(2)
@@ -834,7 +834,7 @@ void NativeProcessLinux::MonitorSignal(const siginfo_t &info,
 
 bool NativeProcessLinux::MonitorClone(NativeThreadLinux &parent,
                                       lldb::pid_t child_pid, int event) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "parent_tid={0}, child_pid={1}, event={2}", parent.GetID(),
            child_pid, event);
 
@@ -895,7 +895,7 @@ bool NativeProcessLinux::SupportHardwareSingleStepping() const {
 }
 
 Status NativeProcessLinux::Resume(const ResumeActionList &resume_actions) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "pid {0}", GetID());
 
   bool software_single_step = !SupportHardwareSingleStepping();
@@ -992,7 +992,7 @@ Status NativeProcessLinux::Detach() {
 Status NativeProcessLinux::Signal(int signo) {
   Status error;
 
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "sending signal {0} ({1}) to pid {1}", signo,
            Host::GetSignalAsCString(signo), GetID());
 
@@ -1005,7 +1005,7 @@ Status NativeProcessLinux::Signal(int signo) {
 Status NativeProcessLinux::Interrupt() {
   // Pick a running thread (or if none, a not-dead stopped thread) as the
   // chosen thread that will be the stop-reason thread.
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
 
   NativeThreadProtocol *running_thread = nullptr;
   NativeThreadProtocol *stopped_thread = nullptr;
@@ -1046,7 +1046,7 @@ Status NativeProcessLinux::Interrupt() {
 }
 
 Status NativeProcessLinux::Kill() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "pid {0}", GetID());
 
   Status error;
@@ -1151,7 +1151,7 @@ Status NativeProcessLinux::GetMemoryRegionInfo(lldb::addr_t load_addr,
 }
 
 Status NativeProcessLinux::PopulateMemoryRegionCache() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
 
   // If our cache is empty, pull the latest.  There should always be at least
   // one memory region if memory region handling is supported.
@@ -1214,7 +1214,7 @@ Status NativeProcessLinux::PopulateMemoryRegionCache() {
 }
 
 void NativeProcessLinux::DoStopIDBumped(uint32_t newBumpId) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "newBumpId={0}", newBumpId);
   LLDB_LOG(log, "clearing {0} entries from memory region cache",
            m_mem_region_cache.size());
@@ -1537,7 +1537,7 @@ Status NativeProcessLinux::ReadMemory(lldb::addr_t addr, void *buf, size_t size,
     bytes_read = process_vm_readv(pid, &local_iov, 1, &remote_iov, 1, 0);
     const bool success = bytes_read == size;
 
-    Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+    Log *log = GetLog(POSIXLog::Process);
     LLDB_LOG(log,
              "using process_vm_readv to read {0} bytes from inferior "
              "address {1:x}: {2}",
@@ -1553,7 +1553,7 @@ Status NativeProcessLinux::ReadMemory(lldb::addr_t addr, void *buf, size_t size,
   size_t remainder;
   long data;
 
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_MEMORY));
+  Log *log = GetLog(POSIXLog::Memory);
   LLDB_LOG(log, "addr = {0}, buf = {1}, size = {2}", addr, buf, size);
 
   for (bytes_read = 0; bytes_read < size; bytes_read += remainder) {
@@ -1581,7 +1581,7 @@ Status NativeProcessLinux::WriteMemory(lldb::addr_t addr, const void *buf,
   size_t remainder;
   Status error;
 
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_MEMORY));
+  Log *log = GetLog(POSIXLog::Memory);
   LLDB_LOG(log, "addr = {0}, buf = {1}, size = {2}", addr, buf, size);
 
   for (bytes_written = 0; bytes_written < size; bytes_written += remainder) {
@@ -1651,7 +1651,7 @@ bool NativeProcessLinux::HasThreadNoLock(lldb::tid_t thread_id) {
 }
 
 void NativeProcessLinux::StopTrackingThread(NativeThreadLinux &thread) {
-  Log *const log = ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD);
+  Log *const log = GetLog(POSIXLog::Thread);
   lldb::tid_t thread_id = thread.GetID();
   LLDB_LOG(log, "tid: {0}", thread_id);
 
@@ -1666,7 +1666,7 @@ void NativeProcessLinux::StopTrackingThread(NativeThreadLinux &thread) {
 }
 
 Status NativeProcessLinux::NotifyTracersOfNewThread(lldb::tid_t tid) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+  Log *log = GetLog(POSIXLog::Thread);
   Status error(m_intel_pt_manager.OnThreadCreated(tid));
   if (error.Fail())
     LLDB_LOG(log, "Failed to trace a new thread with intel-pt, tid = {0}. {1}",
@@ -1675,7 +1675,7 @@ Status NativeProcessLinux::NotifyTracersOfNewThread(lldb::tid_t tid) {
 }
 
 Status NativeProcessLinux::NotifyTracersOfThreadDestroyed(lldb::tid_t tid) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+  Log *log = GetLog(POSIXLog::Thread);
   Status error(m_intel_pt_manager.OnThreadDestroyed(tid));
   if (error.Fail())
     LLDB_LOG(log,
@@ -1686,7 +1686,7 @@ Status NativeProcessLinux::NotifyTracersOfThreadDestroyed(lldb::tid_t tid) {
 
 NativeThreadLinux &NativeProcessLinux::AddThread(lldb::tid_t thread_id,
                                                  bool resume) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+  Log *log = GetLog(POSIXLog::Thread);
   LLDB_LOG(log, "pid {0} adding thread with tid {1}", GetID(), thread_id);
 
   assert(!HasThreadNoLock(thread_id) &&
@@ -1761,7 +1761,7 @@ NativeThreadLinux *NativeProcessLinux::GetCurrentThread() {
 
 Status NativeProcessLinux::ResumeThread(NativeThreadLinux &thread,
                                         lldb::StateType state, int signo) {
-  Log *const log = ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD);
+  Log *const log = GetLog(POSIXLog::Thread);
   LLDB_LOG(log, "tid: {0}", thread.GetID());
 
   // Before we do the resume below, first check if we have a pending stop
@@ -1801,7 +1801,7 @@ Status NativeProcessLinux::ResumeThread(NativeThreadLinux &thread,
 //===----------------------------------------------------------------------===//
 
 void NativeProcessLinux::StopRunningThreads(const lldb::tid_t triggering_tid) {
-  Log *const log = ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD);
+  Log *const log = GetLog(POSIXLog::Thread);
   LLDB_LOG(log, "about to process event: (triggering_tid: {0})",
            triggering_tid);
 
@@ -1848,7 +1848,7 @@ void NativeProcessLinux::SignalIfAllThreadsStopped() {
 }
 
 void NativeProcessLinux::ThreadWasCreated(NativeThreadLinux &thread) {
-  Log *const log = ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD);
+  Log *const log = GetLog(POSIXLog::Thread);
   LLDB_LOG(log, "tid: {0}", thread.GetID());
 
   if (m_pending_notification_tid != LLDB_INVALID_THREAD_ID &&
@@ -1860,7 +1860,7 @@ void NativeProcessLinux::ThreadWasCreated(NativeThreadLinux &thread) {
 }
 
 void NativeProcessLinux::SigchldHandler() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
 
   // Threads can appear or disappear as a result of event processing, so gather
   // the events upfront.
@@ -1910,7 +1910,7 @@ Status NativeProcessLinux::PtraceWrapper(int req, lldb::pid_t pid, void *addr,
   Status error;
   long int ret;
 
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PTRACE));
+  Log *log = GetLog(POSIXLog::Ptrace);
 
   PtraceDisplayBytes(req, data, data_size);
 
diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux.cpp
index 90a6d8dcba042..920de58e07d61 100644
--- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux.cpp
+++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux.cpp
@@ -130,7 +130,7 @@ Status NativeRegisterContextLinux::DoReadRegisterValue(uint32_t offset,
                                                        const char *reg_name,
                                                        uint32_t size,
                                                        RegisterValue &value) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_REGISTERS));
+  Log *log = GetLog(POSIXLog::Registers);
 
   long data;
   Status error = NativeProcessLinux::PtraceWrapper(
@@ -147,7 +147,7 @@ Status NativeRegisterContextLinux::DoReadRegisterValue(uint32_t offset,
 
 Status NativeRegisterContextLinux::DoWriteRegisterValue(
     uint32_t offset, const char *reg_name, const RegisterValue &value) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_REGISTERS));
+  Log *log = GetLog(POSIXLog::Registers);
 
   void *buf = reinterpret_cast<void *>(value.GetAsUInt64());
   LLDB_LOG(log, "{0}: {1}", reg_name, buf);
diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.cpp
index 7e798ac3cb56a..399cd699a040f 100644
--- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.cpp
+++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm.cpp
@@ -270,7 +270,7 @@ bool NativeRegisterContextLinux_arm::IsFPR(unsigned reg) const {
 }
 
 uint32_t NativeRegisterContextLinux_arm::NumSupportedHardwareBreakpoints() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_BREAKPOINTS));
+  Log *log = GetLog(POSIXLog::Breakpoints);
 
   LLDB_LOGF(log, "NativeRegisterContextLinux_arm::%s()", __FUNCTION__);
 
@@ -289,7 +289,7 @@ uint32_t NativeRegisterContextLinux_arm::NumSupportedHardwareBreakpoints() {
 uint32_t
 NativeRegisterContextLinux_arm::SetHardwareBreakpoint(lldb::addr_t addr,
                                                       size_t size) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_BREAKPOINTS));
+  Log *log = GetLog(POSIXLog::Breakpoints);
   LLDB_LOG(log, "addr: {0:x}, size: {1:x}", addr, size);
 
   // Read hardware breakpoint and watchpoint information.
@@ -347,7 +347,7 @@ NativeRegisterContextLinux_arm::SetHardwareBreakpoint(lldb::addr_t addr,
 }
 
 bool NativeRegisterContextLinux_arm::ClearHardwareBreakpoint(uint32_t hw_idx) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_BREAKPOINTS));
+  Log *log = GetLog(POSIXLog::Breakpoints);
   LLDB_LOG(log, "hw_idx: {0}", hw_idx);
 
   // Read hardware breakpoint and watchpoint information.
@@ -381,7 +381,7 @@ bool NativeRegisterContextLinux_arm::ClearHardwareBreakpoint(uint32_t hw_idx) {
 
 Status NativeRegisterContextLinux_arm::GetHardwareBreakHitIndex(
     uint32_t &bp_index, lldb::addr_t trap_addr) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_BREAKPOINTS));
+  Log *log = GetLog(POSIXLog::Breakpoints);
 
   LLDB_LOGF(log, "NativeRegisterContextLinux_arm::%s()", __FUNCTION__);
 
@@ -401,7 +401,7 @@ Status NativeRegisterContextLinux_arm::GetHardwareBreakHitIndex(
 }
 
 Status NativeRegisterContextLinux_arm::ClearAllHardwareBreakpoints() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_BREAKPOINTS));
+  Log *log = GetLog(POSIXLog::Breakpoints);
 
   LLDB_LOGF(log, "NativeRegisterContextLinux_arm::%s()", __FUNCTION__);
 
@@ -442,7 +442,7 @@ Status NativeRegisterContextLinux_arm::ClearAllHardwareBreakpoints() {
 }
 
 uint32_t NativeRegisterContextLinux_arm::NumSupportedHardwareWatchpoints() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
 
   // Read hardware breakpoint and watchpoint information.
   Status error = ReadHardwareDebugInfo();
@@ -456,7 +456,7 @@ uint32_t NativeRegisterContextLinux_arm::NumSupportedHardwareWatchpoints() {
 
 uint32_t NativeRegisterContextLinux_arm::SetHardwareWatchpoint(
     lldb::addr_t addr, size_t size, uint32_t watch_flags) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "addr: {0:x}, size: {1:x} watch_flags: {2:x}", addr, size,
            watch_flags);
 
@@ -561,7 +561,7 @@ uint32_t NativeRegisterContextLinux_arm::SetHardwareWatchpoint(
 
 bool NativeRegisterContextLinux_arm::ClearHardwareWatchpoint(
     uint32_t wp_index) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "wp_index: {0}", wp_index);
 
   // Read hardware breakpoint and watchpoint information.
@@ -630,7 +630,7 @@ Status NativeRegisterContextLinux_arm::ClearAllHardwareWatchpoints() {
 }
 
 uint32_t NativeRegisterContextLinux_arm::GetWatchpointSize(uint32_t wp_index) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "wp_index: {0}", wp_index);
 
   switch ((m_hwp_regs[wp_index].control >> 5) & 0x0f) {
@@ -647,7 +647,7 @@ uint32_t NativeRegisterContextLinux_arm::GetWatchpointSize(uint32_t wp_index) {
   }
 }
 bool NativeRegisterContextLinux_arm::WatchpointIsEnabled(uint32_t wp_index) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "wp_index: {0}", wp_index);
 
   if ((m_hwp_regs[wp_index].control & 0x1) == 0x1)
@@ -659,7 +659,7 @@ bool NativeRegisterContextLinux_arm::WatchpointIsEnabled(uint32_t wp_index) {
 Status
 NativeRegisterContextLinux_arm::GetWatchpointHitIndex(uint32_t &wp_index,
                                                       lldb::addr_t trap_addr) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "wp_index: {0}, trap_addr: {1:x}", wp_index, trap_addr);
 
   uint32_t watch_size;
@@ -682,7 +682,7 @@ NativeRegisterContextLinux_arm::GetWatchpointHitIndex(uint32_t &wp_index,
 
 lldb::addr_t
 NativeRegisterContextLinux_arm::GetWatchpointAddress(uint32_t wp_index) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "wp_index: {0}", wp_index);
 
   if (wp_index >= m_max_hwp_supported)
@@ -696,7 +696,7 @@ NativeRegisterContextLinux_arm::GetWatchpointAddress(uint32_t wp_index) {
 
 lldb::addr_t
 NativeRegisterContextLinux_arm::GetWatchpointHitAddress(uint32_t wp_index) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "wp_index: {0}", wp_index);
 
   if (wp_index >= m_max_hwp_supported)
diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_ppc64le.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_ppc64le.cpp
index 5af5aa548f2eb..1c447175033e9 100644
--- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_ppc64le.cpp
+++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_ppc64le.cpp
@@ -499,7 +499,7 @@ bool NativeRegisterContextLinux_ppc64le::IsVSX(unsigned reg) {
 }
 
 uint32_t NativeRegisterContextLinux_ppc64le::NumSupportedHardwareWatchpoints() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
 
   // Read hardware breakpoint and watchpoint information.
   Status error = ReadHardwareDebugInfo();
@@ -513,7 +513,7 @@ uint32_t NativeRegisterContextLinux_ppc64le::NumSupportedHardwareWatchpoints() {
 
 uint32_t NativeRegisterContextLinux_ppc64le::SetHardwareWatchpoint(
     lldb::addr_t addr, size_t size, uint32_t watch_flags) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "addr: {0:x}, size: {1:x} watch_flags: {2:x}", addr, size,
            watch_flags);
 
@@ -600,7 +600,7 @@ uint32_t NativeRegisterContextLinux_ppc64le::SetHardwareWatchpoint(
 
 bool NativeRegisterContextLinux_ppc64le::ClearHardwareWatchpoint(
     uint32_t wp_index) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "wp_index: {0}", wp_index);
 
   // Read hardware breakpoint and watchpoint information.
@@ -640,7 +640,7 @@ bool NativeRegisterContextLinux_ppc64le::ClearHardwareWatchpoint(
 
 uint32_t
 NativeRegisterContextLinux_ppc64le::GetWatchpointSize(uint32_t wp_index) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "wp_index: {0}", wp_index);
 
   unsigned control = (m_hwp_regs[wp_index].control >> 5) & 0xff;
@@ -653,7 +653,7 @@ NativeRegisterContextLinux_ppc64le::GetWatchpointSize(uint32_t wp_index) {
 
 bool NativeRegisterContextLinux_ppc64le::WatchpointIsEnabled(
     uint32_t wp_index) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "wp_index: {0}", wp_index);
 
   return !!((m_hwp_regs[wp_index].control & 0x1) == 0x1);
@@ -661,7 +661,7 @@ bool NativeRegisterContextLinux_ppc64le::WatchpointIsEnabled(
 
 Status NativeRegisterContextLinux_ppc64le::GetWatchpointHitIndex(
     uint32_t &wp_index, lldb::addr_t trap_addr) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "wp_index: {0}, trap_addr: {1:x}", wp_index, trap_addr);
 
   uint32_t watch_size;
@@ -684,7 +684,7 @@ Status NativeRegisterContextLinux_ppc64le::GetWatchpointHitIndex(
 
 lldb::addr_t
 NativeRegisterContextLinux_ppc64le::GetWatchpointAddress(uint32_t wp_index) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "wp_index: {0}", wp_index);
 
   if (wp_index >= m_max_hwp_supported)
@@ -698,7 +698,7 @@ NativeRegisterContextLinux_ppc64le::GetWatchpointAddress(uint32_t wp_index) {
 
 lldb::addr_t
 NativeRegisterContextLinux_ppc64le::GetWatchpointHitAddress(uint32_t wp_index) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_WATCHPOINTS));
+  Log *log = GetLog(POSIXLog::Watchpoints);
   LLDB_LOG(log, "wp_index: {0}", wp_index);
 
   if (wp_index >= m_max_hwp_supported)
diff --git a/lldb/source/Plugins/Process/Linux/SingleStepCheck.cpp b/lldb/source/Plugins/Process/Linux/SingleStepCheck.cpp
index 5f8057d2fef63..e7124ef8e5498 100644
--- a/lldb/source/Plugins/Process/Linux/SingleStepCheck.cpp
+++ b/lldb/source/Plugins/Process/Linux/SingleStepCheck.cpp
@@ -65,7 +65,7 @@ bool WorkaroundNeeded() {
   // turn, and verify that single-stepping works on that cpu. A workaround is
   // needed if we find at least one broken cpu.
 
-  Log *log = ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD);
+  Log *log = GetLog(POSIXLog::Thread);
   ::pid_t child_pid = fork();
   if (child_pid == -1) {
     LLDB_LOG(log, "failed to fork(): {0}", Status(errno, eErrorTypePOSIX));
@@ -144,7 +144,7 @@ bool WorkaroundNeeded() {
 } // end anonymous namespace
 
 std::unique_ptr<SingleStepWorkaround> SingleStepWorkaround::Get(::pid_t tid) {
-  Log *log = ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD);
+  Log *log = GetLog(POSIXLog::Thread);
 
   static bool workaround_needed = WorkaroundNeeded();
   if (!workaround_needed) {
@@ -176,7 +176,7 @@ std::unique_ptr<SingleStepWorkaround> SingleStepWorkaround::Get(::pid_t tid) {
 }
 
 SingleStepWorkaround::~SingleStepWorkaround() {
-  Log *log = ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD);
+  Log *log = GetLog(POSIXLog::Thread);
   LLDB_LOG(log, "Removing workaround");
   if (sched_setaffinity(m_tid, sizeof m_original_set, &m_original_set) != 0) {
     LLDB_LOG(log, "Unable to reset cpu affinity for thread {0}: {1}", m_tid,
diff --git a/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp b/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp
index 0420d00e39d61..182eefb7bee7f 100644
--- a/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp
+++ b/lldb/source/Plugins/Process/NetBSD/NativeProcessNetBSD.cpp
@@ -59,7 +59,7 @@ llvm::Expected<std::unique_ptr<NativeProcessProtocol>>
 NativeProcessNetBSD::Factory::Launch(ProcessLaunchInfo &launch_info,
                                      NativeDelegate &native_delegate,
                                      MainLoop &mainloop) const {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
 
   Status status;
   ::pid_t pid = ProcessLauncherPosixFork()
@@ -113,7 +113,7 @@ llvm::Expected<std::unique_ptr<NativeProcessProtocol>>
 NativeProcessNetBSD::Factory::Attach(
     lldb::pid_t pid, NativeProcessProtocol::NativeDelegate &native_delegate,
     MainLoop &mainloop) const {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "pid = {0:x}", pid);
 
   // Retrieve the architecture for the running process.
@@ -172,7 +172,7 @@ void NativeProcessNetBSD::MonitorCallback(lldb::pid_t pid, int signal) {
 }
 
 void NativeProcessNetBSD::MonitorExited(lldb::pid_t pid, WaitStatus status) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
 
   LLDB_LOG(log, "got exit signal({0}) , pid = {1}", status, pid);
 
@@ -207,7 +207,7 @@ void NativeProcessNetBSD::MonitorSIGSTOP(lldb::pid_t pid) {
 }
 
 void NativeProcessNetBSD::MonitorSIGTRAP(lldb::pid_t pid) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   ptrace_siginfo_t info;
 
   const auto siginfo_err =
@@ -359,7 +359,7 @@ void NativeProcessNetBSD::MonitorSIGTRAP(lldb::pid_t pid) {
 }
 
 void NativeProcessNetBSD::MonitorSignal(lldb::pid_t pid, int signal) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   ptrace_siginfo_t info;
 
   const auto siginfo_err =
@@ -383,7 +383,7 @@ void NativeProcessNetBSD::MonitorSignal(lldb::pid_t pid, int signal) {
 
 Status NativeProcessNetBSD::PtraceWrapper(int req, lldb::pid_t pid, void *addr,
                                           int data, int *result) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PTRACE));
+  Log *log = GetLog(POSIXLog::Ptrace);
   Status error;
   int ret;
 
@@ -459,7 +459,7 @@ static llvm::Expected<ptrace_siginfo_t> ComputeSignalInfo(
 }
 
 Status NativeProcessNetBSD::Resume(const ResumeActionList &resume_actions) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "pid {0}", GetID());
 
   Status ret;
@@ -562,7 +562,7 @@ Status NativeProcessNetBSD::Interrupt() {
 }
 
 Status NativeProcessNetBSD::Kill() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "pid {0}", GetID());
 
   Status error;
@@ -654,7 +654,7 @@ Status NativeProcessNetBSD::GetMemoryRegionInfo(lldb::addr_t load_addr,
 }
 
 Status NativeProcessNetBSD::PopulateMemoryRegionCache() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   // If our cache is empty, pull the latest.  There should always be at least
   // one memory region if memory region handling is supported.
   if (!m_mem_region_cache.empty()) {
@@ -772,7 +772,7 @@ Status NativeProcessNetBSD::GetFileLoadAddress(const llvm::StringRef &file_name,
 }
 
 void NativeProcessNetBSD::SigchldHandler() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   int status;
   ::pid_t wait_pid = llvm::sys::RetryAfterSignal(-1, waitpid, GetID(), &status,
                                                  WALLSIG | WNOHANG);
@@ -817,7 +817,7 @@ bool NativeProcessNetBSD::HasThreadNoLock(lldb::tid_t thread_id) {
 }
 
 NativeThreadNetBSD &NativeProcessNetBSD::AddThread(lldb::tid_t thread_id) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+  Log *log = GetLog(POSIXLog::Thread);
   LLDB_LOG(log, "pid {0} adding thread with tid {1}", GetID(), thread_id);
 
   assert(thread_id > 0);
@@ -833,7 +833,7 @@ NativeThreadNetBSD &NativeProcessNetBSD::AddThread(lldb::tid_t thread_id) {
 }
 
 void NativeProcessNetBSD::RemoveThread(lldb::tid_t thread_id) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+  Log *log = GetLog(POSIXLog::Thread);
   LLDB_LOG(log, "pid {0} removing thread with tid {1}", GetID(), thread_id);
 
   assert(thread_id > 0);
@@ -882,7 +882,7 @@ Status NativeProcessNetBSD::ReadMemory(lldb::addr_t addr, void *buf,
   unsigned char *dst = static_cast<unsigned char *>(buf);
   struct ptrace_io_desc io;
 
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_MEMORY));
+  Log *log = GetLog(POSIXLog::Memory);
   LLDB_LOG(log, "addr = {0}, buf = {1}, size = {2}", addr, buf, size);
 
   bytes_read = 0;
@@ -910,7 +910,7 @@ Status NativeProcessNetBSD::WriteMemory(lldb::addr_t addr, const void *buf,
   Status error;
   struct ptrace_io_desc io;
 
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_MEMORY));
+  Log *log = GetLog(POSIXLog::Memory);
   LLDB_LOG(log, "addr = {0}, buf = {1}, size = {2}", addr, buf, size);
 
   bytes_written = 0;
@@ -1013,7 +1013,7 @@ Status NativeProcessNetBSD::ReinitializeThreads() {
 
 void NativeProcessNetBSD::MonitorClone(::pid_t child_pid, bool is_vfork,
                                        NativeThreadNetBSD &parent_thread) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_PROCESS));
+  Log *log = GetLog(POSIXLog::Process);
   LLDB_LOG(log, "clone, child_pid={0}", child_pid);
 
   int status;
diff --git a/lldb/source/Plugins/Process/NetBSD/NativeThreadNetBSD.cpp b/lldb/source/Plugins/Process/NetBSD/NativeThreadNetBSD.cpp
index 400b89a5fddf0..3e8cf9fd9f23e 100644
--- a/lldb/source/Plugins/Process/NetBSD/NativeThreadNetBSD.cpp
+++ b/lldb/source/Plugins/Process/NetBSD/NativeThreadNetBSD.cpp
@@ -75,7 +75,7 @@ Status NativeThreadNetBSD::Suspend() {
 
 void NativeThreadNetBSD::SetStoppedBySignal(uint32_t signo,
                                             const siginfo_t *info) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+  Log *log = GetLog(POSIXLog::Thread);
   LLDB_LOG(log, "tid = {0} in called with signal {1}", GetID(), signo);
 
   SetStopped();
@@ -178,7 +178,7 @@ void NativeThreadNetBSD::SetStepping() {
 }
 
 std::string NativeThreadNetBSD::GetName() {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+  Log *log = GetLog(POSIXLog::Thread);
 
 #ifdef PT_LWPSTATUS
   struct ptrace_lwpstatus info = {};
@@ -225,7 +225,7 @@ lldb::StateType NativeThreadNetBSD::GetState() { return m_state; }
 
 bool NativeThreadNetBSD::GetStopReason(ThreadStopInfo &stop_info,
                                        std::string &description) {
-  Log *log(ProcessPOSIXLog::GetLogIfAllCategoriesSet(POSIX_LOG_THREAD));
+  Log *log = GetLog(POSIXLog::Thread);
   description.clear();
 
   switch (m_state) {
diff --git a/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h b/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h
index f0807e1d4480d..7b8b6cdbf2557 100644
--- a/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h
+++ b/lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h
@@ -26,19 +26,9 @@ enum class POSIXLog : Log::MaskType {
   LLVM_MARK_AS_BITMASK_ENUM(Watchpoints)
 };
 
-#define POSIX_LOG_PROCESS ::lldb_private::POSIXLog::Process
-#define POSIX_LOG_THREAD ::lldb_private::POSIXLog::Thread
-#define POSIX_LOG_MEMORY ::lldb_private::POSIXLog::Memory
-#define POSIX_LOG_PTRACE ::lldb_private::POSIXLog::Ptrace
-#define POSIX_LOG_REGISTERS ::lldb_private::POSIXLog::Registers
-#define POSIX_LOG_BREAKPOINTS ::lldb_private::POSIXLog::Breakpoints
-#define POSIX_LOG_WATCHPOINTS ::lldb_private::POSIXLog::Watchpoints
-
 class ProcessPOSIXLog {
 public:
   static void Initialize();
-
-  static Log *GetLogIfAllCategoriesSet(POSIXLog mask) { return GetLog(mask); }
 };
 
 template <> Log::Channel &LogChannelFor<POSIXLog>();

From bfd5696b55cb80011162346ce7e234ebfe0447c3 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Wed, 26 Jan 2022 12:14:21 +0000
Subject: [PATCH 707/946] [gn build] Port 5da7c040030c

---
 llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn
index cd65d60831cc2..cd987b6d87ed2 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn
@@ -58,6 +58,7 @@ static_library("clang-tidy") {
     "ClangTidyProfiling.cpp",
     "ExpandModularHeadersPPCallbacks.cpp",
     "GlobList.cpp",
+    "NoLintDirectiveHandler.cpp",
   ]
 }
 

From 5157f984ae2c5e6fe9a8bd3a5dae99d5a96a276d Mon Sep 17 00:00:00 2001
From: alex-t <alexander.timofeev@amd.com>
Date: Fri, 24 Dec 2021 17:40:49 +0300
Subject: [PATCH 708/946] [AMDGPU] Enable divergence-driven XNOR selection

Currently not (xor_one_use) pattern is always selected to S_XNOR irrelative od the node divergence.
This relies on further custom selection pass which converts to VALU if necessary and replaces with V_NOT_B32 ( V_XOR_B32)
on those targets which have no V_XNOR.
Current change enables the patterns which explicitly select the not (xor_one_use) to appropriate form.
We assume that xor (not) is already turned into the not (xor) by the combiner.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D116270
---
 llvm/include/llvm/CodeGen/TargetLowering.h    | 11 +++++
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  2 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 36 ++++++++++++---
 llvm/lib/Target/AMDGPU/SIISelLowering.h       |  5 +++
 llvm/lib/Target/AMDGPU/SOPInstructions.td     |  4 +-
 llvm/lib/Target/AMDGPU/VOP2Instructions.td    | 36 +++++++++++++--
 .../CodeGen/AMDGPU/divergence-driven-xnor.ll  | 44 +++++++++++++++++++
 llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll    |  2 +-
 llvm/test/CodeGen/AMDGPU/permute.ll           | 17 ++++---
 .../test/CodeGen/AMDGPU/wwm-reserved-spill.ll |  4 +-
 llvm/test/CodeGen/AMDGPU/xnor.ll              | 10 ++---
 llvm/test/CodeGen/AMDGPU/xor3.ll              |  6 +--
 12 files changed, 149 insertions(+), 28 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/divergence-driven-xnor.ll

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 0497847e74316..bec1915705947 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3292,6 +3292,17 @@ class TargetLowering : public TargetLoweringBase {
     return false;
   }
 
+  // Lets target to control the following reassociation of operands: (op (op x,
+  // c1), y) -> (op (op x, y), c1) where N0 is (op x, c1) and N1 is y. By
+  // default consider profitable any case where N0 has single use.  This
+  // behavior reflects the condition replaced by this target hook call in the
+  // DAGCombiner.  Any particular target can implement its own heuristic to
+  // restrict common combiner.
+  virtual bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
+                                   SDValue N1) const {
+    return N0.hasOneUse();
+  }
+
   virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
     return false;
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7e3dbb91f514c..932f263d2558c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1070,7 +1070,7 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
         return DAG.getNode(Opc, DL, VT, N00, OpNode);
       return SDValue();
     }
-    if (N0.hasOneUse()) {
+    if (TLI.isReassocProfitable(DAG, N0, N1)) {
       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
       //              iff (op x, c1) has one use
       if (SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1))
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index ec610160b2278..47c9db6627e7b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9639,6 +9639,9 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
 
 SDValue SITargetLowering::performXorCombine(SDNode *N,
                                             DAGCombinerInfo &DCI) const {
+  if (SDValue RV = reassociateScalarOps(N, DCI.DAG))
+    return RV;
+
   EVT VT = N->getValueType(0);
   if (VT != MVT::i64)
     return SDValue();
@@ -10551,6 +10554,9 @@ SDValue SITargetLowering::reassociateScalarOps(SDNode *N,
   if (VT != MVT::i32 && VT != MVT::i64)
     return SDValue();
 
+  if (DAG.isBaseWithConstantOffset(SDValue(N, 0)))
+    return SDValue();
+
   unsigned Opc = N->getOpcode();
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
@@ -10572,12 +10578,6 @@ SDValue SITargetLowering::reassociateScalarOps(SDNode *N,
   if (Op1->isDivergent())
     std::swap(Op1, Op2);
 
-  // If either operand is constant this will conflict with
-  // DAGCombiner::ReassociateOps().
-  if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||
-      DAG.isConstantIntBuildVectorOrConstantInt(Op1))
-    return SDValue();
-
   SDLoc SL(N);
   SDValue Add1 = DAG.getNode(Opc, SL, VT, Op0, Op1);
   return DAG.getNode(Opc, SL, VT, Add1, Op2);
@@ -12578,3 +12578,27 @@ SITargetLowering::getTypeLegalizationCost(const DataLayout &DL,
   Cost.first += (Size + 255) / 256;
   return Cost;
 }
+
+bool SITargetLowering::hasMemSDNodeUser(SDNode *N) const {
+  SDNode::use_iterator I = N->use_begin(), E = N->use_end();
+  for (; I != E; ++I) {
+    if (MemSDNode *M = dyn_cast<MemSDNode>(*I)) {
+      if (getBasePtrIndex(M) == I.getOperandNo())
+        return true;
+    }
+  }
+  return false;
+}
+
+bool SITargetLowering::isReassocProfitable(SelectionDAG &DAG, SDValue N0,
+                                           SDValue N1) const {
+  if (!N0.hasOneUse())
+    return false;
+  // Take care of the oportunity to keep N0 uniform
+  if (N0->isDivergent() || !N1->isDivergent())
+    return true;
+  // Check if we have a good chance to form the memory access pattern with the
+  // base and offset
+  return (DAG.isBaseWithConstantOffset(N0) &&
+          hasMemSDNodeUser(*N0->use_begin()));
+}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1315cc15dd021..bf81e082b478e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -449,6 +449,11 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   bool isSDNodeSourceOfDivergence(const SDNode *N,
     FunctionLoweringInfo *FLI, LegacyDivergenceAnalysis *DA) const override;
 
+  bool hasMemSDNodeUser(SDNode *N) const;
+
+  bool isReassocProfitable(SelectionDAG &DAG, SDValue N0,
+                           SDValue N1) const override;
+
   bool isCanonicalized(SelectionDAG &DAG, SDValue Op,
                        unsigned MaxDepth = 5) const;
   bool isCanonicalized(Register Reg, MachineFunction &MF,
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 324d36091827c..3f7837f7dbf11 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -550,11 +550,11 @@ def S_XOR_B64 : SOP2_64 <"s_xor_b64",
 >;
 
 def S_XNOR_B32 : SOP2_32 <"s_xnor_b32",
-  [(set i32:$sdst, (not (xor_oneuse i32:$src0, i32:$src1)))]
+  [(set i32:$sdst, (UniformUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1)))]
 >;
 
 def S_XNOR_B64 : SOP2_64 <"s_xnor_b64",
-  [(set i64:$sdst, (not (xor_oneuse i64:$src0, i64:$src1)))]
+  [(set i64:$sdst, (UniformUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1)))]
 >;
 
 def S_NAND_B32 : SOP2_32 <"s_nand_b32",
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 8d232ffe41141..b9ff814a4dc5c 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -637,9 +637,9 @@ class divergent_i64_BinOp <SDPatternOperator Op, Instruction Inst> :
       )
   >;
 
-def :  divergent_i64_BinOp <and, V_AND_B32_e32>;
-def :  divergent_i64_BinOp <or,  V_OR_B32_e32>;
-def :  divergent_i64_BinOp <xor, V_XOR_B32_e32>;
+def :  divergent_i64_BinOp <and, V_AND_B32_e64>;
+def :  divergent_i64_BinOp <or,  V_OR_B32_e64>;
+def :  divergent_i64_BinOp <xor, V_XOR_B32_e64>;
 
 let SubtargetPredicate = Has16BitInsts in {
 
@@ -688,6 +688,36 @@ let SubtargetPredicate = HasDLInsts in {
 let isReMaterializable = 1 in
 defm V_XNOR_B32 : VOP2Inst <"v_xnor_b32", VOP_I32_I32_I32, xnor>;
 
+def : GCNPat<
+  (i32 (DivergentUnaryFrag<not> (xor_oneuse i32:$src0, i32:$src1))),
+  (i32 (V_XNOR_B32_e64 $src0, $src1))
+>;
+
+def : GCNPat<
+  (i32 (DivergentBinFrag<xor_oneuse> (not i32:$src0), i32:$src1)),
+  (i32 (V_XNOR_B32_e64 $src0, $src1))
+>;
+
+def : GCNPat<
+  (i64 (DivergentUnaryFrag<not> (xor_oneuse i64:$src0, i64:$src1))),
+  (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64
+                            (i32 (EXTRACT_SUBREG $src0, sub0)),
+                            (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0,
+                     (i32 (V_XNOR_B32_e64
+                            (i32 (EXTRACT_SUBREG $src0, sub1)),
+                            (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1)
+>;
+
+def : GCNPat<
+  (i64 (DivergentBinFrag<xor_oneuse> (not i64:$src0), i64:$src1)),
+  (REG_SEQUENCE VReg_64, (i32 (V_XNOR_B32_e64
+                            (i32 (EXTRACT_SUBREG $src0, sub0)),
+                            (i32 (EXTRACT_SUBREG $src1, sub0)))), sub0,
+                     (i32 (V_XNOR_B32_e64
+                            (i32 (EXTRACT_SUBREG $src0, sub1)),
+                            (i32 (EXTRACT_SUBREG $src1, sub1)))), sub1)
+>;
+
 let Constraints = "$vdst = $src2",
     DisableEncoding = "$src2",
     isConvertibleToThreeAddress = 1,
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-xnor.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-xnor.ll
new file mode 100644
index 0000000000000..ec5979c119835
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-xnor.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx906 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN_DL %s
+
+; GCN-LABEL: name:            uniform_xnor_i64
+; GCN: S_XNOR_B64
+define amdgpu_kernel void @uniform_xnor_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+  %xor = xor i64 %a, %b
+  %res = xor i64 %xor, -1
+  store i64 %res, i64 addrspace(1)* %out
+  ret void
+}
+; GCN-LABEL: name:            divergent_xnor_i64
+; GCN: V_XOR_B32_e64
+; GCN: V_XOR_B32_e64
+; GCN: V_NOT_B32_e32
+; GCN: V_NOT_B32_e32
+; GCN_DL: V_XNOR_B32_e64
+; GCN_DL: V_XNOR_B32_e64
+define i64 @divergent_xnor_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
+  %xor = xor i64 %a, %b
+  %res = xor i64 %xor, -1
+  ret i64 %res
+}
+
+; GCN-LABEL: name:            uniform_xnor_i32
+; GCN: S_XNOR_B32
+define amdgpu_kernel void @uniform_xnor_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+  %xor = xor i32 %a, %b
+  %res = xor i32 %xor, -1
+  store i32 %res, i32 addrspace(1)* %out
+  ret void
+}
+
+; GCN-LABEL: name:            divergent_xnor_i32
+; GCN: V_XOR_B32_e64
+; GCN: V_NOT_B32_e32
+; GCN_DL: V_XNOR_B32_e64
+define i32 @divergent_xnor_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+  %xor = xor i32 %a, %b
+  %res = xor i32 %xor, -1
+  ret i32 %res
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
diff --git a/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll b/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
index d4b7b7d9cf2f5..6ac24a99fbb14 100644
--- a/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
+++ b/llvm/test/CodeGen/AMDGPU/fused-bitlogic.ll
@@ -163,8 +163,8 @@ define amdgpu_kernel void @divergent_xor3_b64(<3 x i64> addrspace(1)* %arg) {
 ; GCN-NEXT:    v_xor_b32_e32 v1, v3, v1
 ; GCN-NEXT:    v_xor_b32_e32 v0, v2, v0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_xnor_b32_e32 v0, v0, v4
 ; GCN-NEXT:    v_xnor_b32_e32 v1, v1, v5
+; GCN-NEXT:    v_xnor_b32_e32 v0, v0, v4
 ; GCN-NEXT:    global_store_dwordx2 v6, v[0:1], s[0:1]
 ; GCN-NEXT:    s_endpgm
 bb:
diff --git a/llvm/test/CodeGen/AMDGPU/permute.ll b/llvm/test/CodeGen/AMDGPU/permute.ll
index d8c20acc666b1..26af6f4d0f2bc 100644
--- a/llvm/test/CodeGen/AMDGPU/permute.ll
+++ b/llvm/test/CodeGen/AMDGPU/permute.ll
@@ -106,8 +106,11 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}and_or_or_and:
-; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
-; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
+; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0xff00
+; GCN: s_or_b32 [[SREG:s[0-9]+]], s{{[0-9]+}}, 0xffff0000
+; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, v{{[0-9]+}}
+; GCN: v_or_b32_e32 v{{[0-9]+}}, [[SREG]], [[VREG]]
+; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
 define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -153,10 +156,14 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}known_ffff0500:
-; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
-; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
-; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
+; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
+; GCN: s_and_b32 [[SREG:s[0-9]+]], [[SREG]], 0xff00
+; GCN: s_or_b32 [[SREG]], [[SREG]], 0xffff0000
+; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, [[VREG]]
+; GCN: v_or_b32_e32 [[VREG]], [[SREG]], [[VREG]]
+; GCN: store_dword v[{{[0-9:]+}}], [[VREG]]{{$}}
 ; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
+; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
 define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
 bb:
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
index 6a079bc4e52d8..516c154f996d0 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
@@ -472,10 +472,10 @@ define amdgpu_gfx i64 @strict_wwm_called_i64(i64 %a) noinline {
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v0
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v0, v7
-; GFX9-O0-NEXT:    v_or_b32_e32 v0, v0, v3
+; GFX9-O0-NEXT:    v_or_b32_e64 v0, v0, v3
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v2, v1
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
-; GFX9-O0-NEXT:    v_or_b32_e32 v6, v1, v2
+; GFX9-O0-NEXT:    v_or_b32_e64 v6, v1, v2
 ; GFX9-O0-NEXT:    ; kill: def $vgpr6 killed $vgpr6 def $vgpr6_vgpr7 killed $exec
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v7, v0
 ; GFX9-O0-NEXT:    v_mov_b32_e32 v1, v6
diff --git a/llvm/test/CodeGen/AMDGPU/xnor.ll b/llvm/test/CodeGen/AMDGPU/xnor.ll
index 4b2fde48d1a47..bf5cb3ece02cd 100644
--- a/llvm/test/CodeGen/AMDGPU/xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/xnor.ll
@@ -61,8 +61,8 @@ entry:
 
 ; GCN-LABEL: {{^}}vector_xnor_i32_one_use
 ; GCN-NOT: s_xnor_b32
-; GCN: v_not_b32
 ; GCN: v_xor_b32
+; GCN: v_not_b32
 ; GCN-DL: v_xnor_b32
 define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) {
 entry:
@@ -73,10 +73,10 @@ entry:
 
 ; GCN-LABEL: {{^}}vector_xnor_i64_one_use
 ; GCN-NOT: s_xnor_b64
-; GCN: v_not_b32
-; GCN: v_not_b32
 ; GCN: v_xor_b32
 ; GCN: v_xor_b32
+; GCN: v_not_b32
+; GCN: v_not_b32
 ; GCN-DL: v_xnor_b32
 ; GCN-DL: v_xnor_b32
 define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) {
@@ -150,8 +150,8 @@ entry:
 
 ; GCN-LABEL: {{^}}vector_xor_na_b_i32_one_use
 ; GCN-NOT: s_xnor_b32
-; GCN: v_not_b32
 ; GCN: v_xor_b32
+; GCN: v_not_b32
 ; GCN-DL: v_xnor_b32
 define i32 @vector_xor_na_b_i32_one_use(i32 %a, i32 %b) {
 entry:
@@ -162,8 +162,8 @@ entry:
 
 ; GCN-LABEL: {{^}}vector_xor_a_nb_i32_one_use
 ; GCN-NOT: s_xnor_b32
-; GCN: v_not_b32
 ; GCN: v_xor_b32
+; GCN: v_not_b32
 ; GCN-DL: v_xnor_b32
 define i32 @vector_xor_a_nb_i32_one_use(i32 %a, i32 %b) {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/xor3.ll b/llvm/test/CodeGen/AMDGPU/xor3.ll
index b43fb96de2c16..813a096f1ca34 100644
--- a/llvm/test/CodeGen/AMDGPU/xor3.ll
+++ b/llvm/test/CodeGen/AMDGPU/xor3.ll
@@ -26,13 +26,13 @@ define amdgpu_ps float @xor3(i32 %a, i32 %b, i32 %c) {
 define amdgpu_ps float @xor3_vgpr_b(i32 inreg %a, i32 %b, i32 inreg %c) {
 ; GFX9-LABEL: xor3_vgpr_b:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:    v_xor_b32_e32 v0, s2, v0
-; GFX9-NEXT:    v_xor_b32_e32 v0, s3, v0
+; GFX9-NEXT:    s_xor_b32 s0, s3, s2
+; GFX9-NEXT:    v_xor_b32_e32 v0, s0, v0
 ; GFX9-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-LABEL: xor3_vgpr_b:
 ; GFX10:       ; %bb.0:
-; GFX10-NEXT:    v_xor3_b32 v0, s2, v0, s3
+; GFX10-NEXT:    v_xor3_b32 v0, s3, s2, v0
 ; GFX10-NEXT:    ; return to shader part epilog
   %x = xor i32 %a, %b
   %result = xor i32 %x, %c

From ff64327a801902460de3a7176beb0282d56a08be Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 26 Jan 2022 12:32:38 +0000
Subject: [PATCH 709/946] [X86] Extend PR53419 test coverage

Test on SSE2/SSE41/AVX1 targets to compare PMOVMSK vs PTEST codegen paths

Add v8i8 reduction case and test on X64 and X86 targets to check 32-bit handling
---
 llvm/test/CodeGen/X86/pr53419.ll | 197 ++++++++++++++++++++++++++++---
 1 file changed, 178 insertions(+), 19 deletions(-)

diff --git a/llvm/test/CodeGen/X86/pr53419.ll b/llvm/test/CodeGen/X86/pr53419.ll
index b5431c9dcb406..49b9809a9ff4c 100644
--- a/llvm/test/CodeGen/X86/pr53419.ll
+++ b/llvm/test/CodeGen/X86/pr53419.ll
@@ -1,14 +1,38 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2   | FileCheck %s --check-prefixes=CHECK,SSE,SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx    | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2   | FileCheck %s --check-prefixes=CHECK,AVX
+; RUN: llc < %s -mtriple=i686-unknown   -mattr=+avx2   | FileCheck %s --check-prefixes=X86
 
 declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1>)
+declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
 
 ; FIXME: All four versions are semantically equivalent and should produce same asm as scalar version.
-define i1 @intrinsic_version(i8* align 1 %arg, i8* align 1 %arg1, i32 %arg2) {
-; AVX-LABEL: intrinsic_version:
+
+define i1 @intrinsic_v4i8(i8* align 1 %arg, i8* align 1 %arg1) {
+; SSE2-LABEL: intrinsic_v4i8:
+; SSE2:       # %bb.0: # %bb
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    movmskps %xmm0, %eax
+; SSE2-NEXT:    cmpb $15, %al
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: intrinsic_v4i8:
+; SSE42:       # %bb.0: # %bb
+; SSE42-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE42-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE42-NEXT:    psubd %xmm1, %xmm0
+; SSE42-NEXT:    ptest %xmm0, %xmm0
+; SSE42-NEXT:    sete %al
+; SSE42-NEXT:    retq
+;
+; AVX-LABEL: intrinsic_v4i8:
 ; AVX:       # %bb.0: # %bb
 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
@@ -16,6 +40,17 @@ define i1 @intrinsic_version(i8* align 1 %arg, i8* align 1 %arg1, i32 %arg2) {
 ; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
+;
+; X86-LABEL: intrinsic_v4i8:
+; X86:       # %bb.0: # %bb
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X86-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X86-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; X86-NEXT:    vptest %xmm0, %xmm0
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
 bb:
   %ptr1 = bitcast i8* %arg1 to <4 x i8>*
   %ptr2 = bitcast i8* %arg to <4 x i8>*
@@ -26,7 +61,72 @@ bb:
   ret i1 %all_eq
 }
 
+define i1 @intrinsic_v8i8(i8* align 1 %arg, i8* align 1 %arg1) {
+; SSE-LABEL: intrinsic_v8i8:
+; SSE:       # %bb.0: # %bb
+; SSE-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; SSE-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; SSE-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE-NEXT:    pmovmskb %xmm1, %eax
+; SSE-NEXT:    cmpb $-1, %al
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: intrinsic_v8i8:
+; AVX:       # %bb.0: # %bb
+; AVX-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpmovmskb %xmm0, %eax
+; AVX-NEXT:    cmpb $-1, %al
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    retq
+;
+; X86-LABEL: intrinsic_v8i8:
+; X86:       # %bb.0: # %bb
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
+; X86-NEXT:    vmovq {{.*#+}} xmm1 = mem[0],zero
+; X86-NEXT:    vpcmpeqb %xmm1, %xmm0, %xmm0
+; X86-NEXT:    vpmovmskb %xmm0, %eax
+; X86-NEXT:    cmpb $-1, %al
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+bb:
+  %ptr1 = bitcast i8* %arg1 to <8 x i8>*
+  %ptr2 = bitcast i8* %arg to <8 x i8>*
+  %lhs = load <8 x i8>, <8 x i8>* %ptr1, align 1
+  %rhs = load <8 x i8>, <8 x i8>* %ptr2, align 1
+  %cmp = icmp eq <8 x i8> %lhs, %rhs
+  %all_eq = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %cmp)
+  ret i1 %all_eq
+}
+
 define i1 @vector_version(i8* align 1 %arg, i8* align 1 %arg1) {
+; SSE2-LABEL: vector_version:
+; SSE2:       # %bb.0: # %bb
+; SSE2-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE2-NEXT:    movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT:    pcmpeqb %xmm0, %xmm1
+; SSE2-NEXT:    pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    movmskps %xmm0, %eax
+; SSE2-NEXT:    testl %eax, %eax
+; SSE2-NEXT:    sete %al
+; SSE2-NEXT:    retq
+;
+; SSE42-LABEL: vector_version:
+; SSE42:       # %bb.0: # %bb
+; SSE42-NEXT:    pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE42-NEXT:    pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; SSE42-NEXT:    psubd %xmm1, %xmm0
+; SSE42-NEXT:    ptest %xmm0, %xmm0
+; SSE42-NEXT:    sete %al
+; SSE42-NEXT:    retq
+;
 ; AVX-LABEL: vector_version:
 ; AVX:       # %bb.0: # %bb
 ; AVX-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
@@ -35,6 +135,17 @@ define i1 @vector_version(i8* align 1 %arg, i8* align 1 %arg1) {
 ; AVX-NEXT:    vptest %xmm0, %xmm0
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
+;
+; X86-LABEL: vector_version:
+; X86:       # %bb.0: # %bb
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X86-NEXT:    vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
+; X86-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; X86-NEXT:    vptest %xmm0, %xmm0
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
 bb:
   %ptr1 = bitcast i8* %arg1 to <4 x i8>*
   %ptr2 = bitcast i8* %arg to <4 x i8>*
@@ -46,13 +157,22 @@ bb:
   ret i1 %all_eq
 }
 
-define i1 @mixed_version(i8* align 1 %arg, i8* align 1 %arg1) {
-; AVX-LABEL: mixed_version:
-; AVX:       # %bb.0: # %bb
-; AVX-NEXT:    movl (%rsi), %eax
-; AVX-NEXT:    cmpl (%rdi), %eax
-; AVX-NEXT:    sete %al
-; AVX-NEXT:    retq
+define i1 @mixed_version_v4i8(i8* align 1 %arg, i8* align 1 %arg1) {
+; CHECK-LABEL: mixed_version_v4i8:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    cmpl (%rdi), %eax
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    retq
+;
+; X86-LABEL: mixed_version_v4i8:
+; X86:       # %bb.0: # %bb
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %ecx
+; X86-NEXT:    cmpl (%eax), %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
 bb:
   %ptr1 = bitcast i8* %arg1 to <4 x i8>*
   %ptr2 = bitcast i8* %arg to <4 x i8>*
@@ -64,13 +184,52 @@ bb:
   ret i1 %all_eq
 }
 
+define i1 @mixed_version_v8i8(i8* align 1 %arg, i8* align 1 %arg1) {
+; CHECK-LABEL: mixed_version_v8i8:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    movq (%rsi), %rax
+; CHECK-NEXT:    cmpq (%rdi), %rax
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    retq
+;
+; X86-LABEL: mixed_version_v8i8:
+; X86:       # %bb.0: # %bb
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %edx
+; X86-NEXT:    movl 4(%ecx), %ecx
+; X86-NEXT:    xorl 4(%eax), %ecx
+; X86-NEXT:    xorl (%eax), %edx
+; X86-NEXT:    orl %ecx, %edx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+bb:
+  %ptr1 = bitcast i8* %arg1 to <8 x i8>*
+  %ptr2 = bitcast i8* %arg to <8 x i8>*
+  %lhs = load <8 x i8>, <8 x i8>* %ptr1, align 1
+  %rhs = load <8 x i8>, <8 x i8>* %ptr2, align 1
+  %lhs_s = bitcast <8 x i8> %lhs to i64
+  %rhs_s = bitcast <8 x i8> %rhs to i64
+  %all_eq = icmp eq i64 %lhs_s, %rhs_s
+  ret i1 %all_eq
+}
+
 define i1 @scalar_version(i8* align 1 %arg, i8* align 1 %arg1) {
-; AVX-LABEL: scalar_version:
-; AVX:       # %bb.0: # %bb
-; AVX-NEXT:    movl (%rsi), %eax
-; AVX-NEXT:    cmpl (%rdi), %eax
-; AVX-NEXT:    sete %al
-; AVX-NEXT:    retq
+; CHECK-LABEL: scalar_version:
+; CHECK:       # %bb.0: # %bb
+; CHECK-NEXT:    movl (%rsi), %eax
+; CHECK-NEXT:    cmpl (%rdi), %eax
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    retq
+;
+; X86-LABEL: scalar_version:
+; X86:       # %bb.0: # %bb
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl (%ecx), %ecx
+; X86-NEXT:    cmpl (%eax), %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
 bb:
   %ptr1 = bitcast i8* %arg1 to i32*
   %ptr2 = bitcast i8* %arg to i32*

From e6ce2c0b8d5f8253791bf87145669c58328c30db Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <nicolas.vasilache@gmail.com>
Date: Wed, 26 Jan 2022 05:57:09 -0500
Subject: [PATCH 710/946] [mlir][LLVM] Add support for operand_attrs to
 InlineAsmOp

This revision adds enough support to allow InlineAsmOp to work properly with indirect memory constraints "*m".
These require an explicit "elementtype" TypeAttr on the operands to pass LLVM verification and need to be provided.

Reviewed By: bkramer

Differential Revision: https://reviews.llvm.org/D118006
---
 mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td   | 10 +++-
 .../X86Vector/Transforms/AVXTranspose.cpp     |  5 +-
 .../LLVMIR/LLVMToLLVMIRTranslation.cpp        | 25 +++++++++-
 .../test-inline-asm-vector-avx512.mlir        | 49 +++++++++++++++++++
 4 files changed, 85 insertions(+), 4 deletions(-)
 create mode 100644 mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-inline-asm-vector-avx512.mlir

diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 946e674fab66f..0a7f4c9ff82bc 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -1934,7 +1934,8 @@ def LLVM_InlineAsmOp : LLVM_Op<"inline_asm", []> {
         UnitAttr:$has_side_effects,
         UnitAttr:$is_align_stack,
         OptionalAttr<
-          DefaultValuedAttr<AsmATTOrIntel, "AsmDialect::AD_ATT">>:$asm_dialect);
+          DefaultValuedAttr<AsmATTOrIntel, "AsmDialect::AD_ATT">>:$asm_dialect,
+        OptionalAttr<ArrayAttr>:$operand_attrs);
 
   let results = (outs Optional<LLVM_Type>:$res);
 
@@ -1942,9 +1943,16 @@ def LLVM_InlineAsmOp : LLVM_Op<"inline_asm", []> {
     (`has_side_effects` $has_side_effects^)?
     (`is_align_stack` $is_align_stack^)?
     (`asm_dialect` `=` $asm_dialect^)?
+    (`operand_attrs` `=` $operand_attrs^)?
     attr-dict
     $asm_string `,` $constraints
     operands `:` functional-type(operands, results)
    }];
+
+  let extraClassDeclaration = [{
+    static StringRef getElementTypeAttrName() {
+      return "elementtype";
+    }
+  }];
 }
 #endif // LLVMIR_OPS
diff --git a/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp b/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp
index 3f2db00b388dc..20d9d4411535e 100644
--- a/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp
+++ b/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp
@@ -37,7 +37,10 @@ Value mlir::x86vector::avx2::inline_asm::mm256BlendPsAsm(
   SmallVector<Value> asmVals{v1, v2};
   auto asmStr = llvm::formatv(asmTp, llvm::format_hex(mask, /*width=*/2)).str();
   auto asmOp = b.create<LLVM::InlineAsmOp>(
-      v1.getType(), asmVals, asmStr, asmCstr, false, false, asmDialectAttr);
+      v1.getType(), /*operands=*/asmVals, /*asm_string=*/asmStr,
+      /*constraints=*/asmCstr, /*has_side_effects=*/false,
+      /*is_align_stack=*/false, /*asm_dialect=*/asmDialectAttr,
+      /*operand_attrs=*/ArrayAttr());
   return asmOp.getResult(0);
 }
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
index 143630ce53ae0..6701ccb830d53 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
@@ -330,11 +330,32 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder,
                                    inlineAsmOp.getConstraints(),
                                    inlineAsmOp.getHasSideEffects(),
                                    inlineAsmOp.getIsAlignStack());
-    llvm::Value *result = builder.CreateCall(
+    llvm::CallInst *inst = builder.CreateCall(
         inlineAsmInst,
         moduleTranslation.lookupValues(inlineAsmOp.getOperands()));
+    if (auto maybeOperandAttrs = inlineAsmOp.getOperandAttrs()) {
+      llvm::AttributeList attrList;
+      for (const auto &it : llvm::enumerate(*maybeOperandAttrs)) {
+        Attribute attr = it.value();
+        if (!attr)
+          continue;
+        DictionaryAttr dAttr = attr.cast<DictionaryAttr>();
+        TypeAttr tAttr =
+            dAttr.get(InlineAsmOp::getElementTypeAttrName()).cast<TypeAttr>();
+        llvm::AttrBuilder b(moduleTranslation.getLLVMContext());
+        llvm::Type *ty = moduleTranslation.convertType(tAttr.getValue());
+        b.addTypeAttr(llvm::Attribute::ElementType, ty);
+        // shift to account for the returned value (this is always 1 aggregate
+        // value in LLVM).
+        int shift = (opInst.getNumResults() > 0) ? 1 : 0;
+        attrList = attrList.addAttributesAtIndex(
+            moduleTranslation.getLLVMContext(), it.index() + shift, b);
+      }
+      inst->setAttributes(attrList);
+    }
+
     if (opInst.getNumResults() != 0)
-      moduleTranslation.mapValue(opInst.getResult(0), result);
+      moduleTranslation.mapValue(opInst.getResult(0), inst);
     return success();
   }
 
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-inline-asm-vector-avx512.mlir b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-inline-asm-vector-avx512.mlir
new file mode 100644
index 0000000000000..7ce9c1a98f331
--- /dev/null
+++ b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-inline-asm-vector-avx512.mlir
@@ -0,0 +1,49 @@
+// RUN: mlir-opt %s -convert-linalg-to-loops -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm  -convert-std-to-llvm='use-bare-ptr-memref-call-conv=1' -convert-arith-to-llvm -reconcile-unrealized-casts |\
+// RUN: mlir-translate --mlir-to-llvmir |\
+// RUN: %lli --entry-function=entry --mattr="avx512f" | \
+// RUN: FileCheck %s
+
+module {
+
+  // printf format string "%i\n", char by char:   %    i  \n  0
+  llvm.mlir.global private @pct_i_newline(dense<[37, 105, 10, 0]> : tensor<4xi8>)
+    : !llvm.array<4xi8>
+  // an array of 16 i32 of values [0..15]
+  llvm.mlir.global private @const16(
+    dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : tensor<16 x i32>)
+      : !llvm.array<16 x i32>
+
+  // declare void @printf(i8*, ...)
+  llvm.func @printf(!llvm.ptr<i8>, ...)
+
+  llvm.func @entry() {
+    %c0 = llvm.mlir.constant(0 : index) : i64
+
+    %1 = llvm.mlir.addressof @const16 : !llvm.ptr<array<16 x i32>>
+    %ptr = llvm.getelementptr %1[%c0, %c0]
+      : (!llvm.ptr<array<16 x i32>>, i64, i64) -> !llvm.ptr<i32>
+    %ptr2 = llvm.bitcast %ptr :  !llvm.ptr<i32> to !llvm.ptr<vector<16xi32>>
+    // operand_attrs of *m operands need to be piped through to LLVM for
+    // verification to pass.
+    %v = llvm.inline_asm
+        asm_dialect = intel
+        operand_attrs = [{ elementtype = vector<16xi32> }]
+        "vmovdqu32 $0, $1", "=x,*m" %ptr2
+      : (!llvm.ptr<vector<16xi32>>) -> vector<16xi32>
+
+    %2 = llvm.mlir.addressof @pct_i_newline : !llvm.ptr<array<4xi8>>
+    %ptrfmt = llvm.getelementptr %2[%c0, %c0]
+      : (!llvm.ptr<array<4xi8>>, i64, i64) -> !llvm.ptr<i8>
+
+    // CHECK: 0
+    %v0 = vector.extract %v[0]: vector<16xi32>
+    llvm.call @printf(%ptrfmt, %v0) : (!llvm.ptr<i8>, i32) -> ()
+
+    // CHECK: 9
+    %v9 = vector.extract %v[9]: vector<16xi32>
+    llvm.call @printf(%ptrfmt, %v9) : (!llvm.ptr<i8>, i32) -> ()
+
+    llvm.return
+  }
+}
+

From b58174d6248805c8994cfe685c826d3f14961202 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Wed, 26 Jan 2022 06:22:41 -0500
Subject: [PATCH 711/946] Cleanup headers for BinaryFormat

A few header removal, some forward declarations. As usual, this can
break your build due to false dependencies, the most notable change are:

- "llvm/BinaryFormat/AMDGPUMetadataVerifier.h" no longer includes "llvm/BinaryFormat/MsgPackDocument.h"

The impact on generated preprocessed lines for LLVMBinaryFormat is
pretty nice:

$ clang++ -E  -Iinclude -I../llvm/include ../llvm/lib/BinaryFormat/*.cpp -std=c++14 -fno-rtti -fno-exceptions | wc -l
before this patch: 705281
after this patch: 751456

Discourse thread on the topic: https://llvm.discourse.group/t/include-what-you-use-include-cleanup
---
 .../llvm/BinaryFormat/AMDGPUMetadataVerifier.h     | 14 +++++++++++++-
 llvm/include/llvm/BinaryFormat/Dwarf.h             |  3 ++-
 llvm/include/llvm/BinaryFormat/ELF.h               |  1 -
 llvm/include/llvm/BinaryFormat/MsgPackReader.h     |  3 +--
 llvm/include/llvm/BinaryFormat/MsgPackWriter.h     |  7 ++++---
 llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp   |  8 +++++++-
 llvm/lib/BinaryFormat/ELF.cpp                      |  3 +--
 llvm/lib/BinaryFormat/Magic.cpp                    |  2 --
 llvm/tools/llvm-readobj/ELFDumper.cpp              |  1 +
 9 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h b/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
index 7332b2a7ea891..c94cafe7458b3 100644
--- a/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
+++ b/llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h
@@ -16,9 +16,21 @@
 #ifndef LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
 #define LLVM_BINARYFORMAT_AMDGPUMETADATAVERIFIER_H
 
-#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/BinaryFormat/MsgPackReader.h"
+
+#include <cstddef>
 
 namespace llvm {
+
+namespace msgpack {
+  class DocNode;
+  class MapDocNode;
+}
+
 namespace AMDGPU {
 namespace HSAMD {
 namespace V3 {
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.h b/llvm/include/llvm/BinaryFormat/Dwarf.h
index 7ce3a97f9906f..4473f506d3711 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.h
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.h
@@ -19,7 +19,6 @@
 #ifndef LLVM_BINARYFORMAT_DWARF_H
 #define LLVM_BINARYFORMAT_DWARF_H
 
-#include "llvm/ADT/Optional.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -31,9 +30,11 @@
 
 namespace llvm {
 class StringRef;
+template<typename T> class Optional;
 
 namespace dwarf {
 
+
 //===----------------------------------------------------------------------===//
 // DWARF constants as gleaned from the DWARF Debugging Information Format V.5
 // reference manual http://www.dwarfstd.org/.
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 7942af073176c..8840929174d67 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -22,7 +22,6 @@
 #include "llvm/ADT/StringRef.h"
 #include <cstdint>
 #include <cstring>
-#include <string>
 
 namespace llvm {
 namespace ELF {
diff --git a/llvm/include/llvm/BinaryFormat/MsgPackReader.h b/llvm/include/llvm/BinaryFormat/MsgPackReader.h
index 5035de03cc1bd..5123fb65cf095 100644
--- a/llvm/include/llvm/BinaryFormat/MsgPackReader.h
+++ b/llvm/include/llvm/BinaryFormat/MsgPackReader.h
@@ -34,8 +34,7 @@
 #define LLVM_BINARYFORMAT_MSGPACKREADER_H
 
 #include "llvm/Support/Error.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/MemoryBufferRef.h"
 #include <cstdint>
 
 namespace llvm {
diff --git a/llvm/include/llvm/BinaryFormat/MsgPackWriter.h b/llvm/include/llvm/BinaryFormat/MsgPackWriter.h
index 254b52f18bd3b..b3d3c3bcdef0e 100644
--- a/llvm/include/llvm/BinaryFormat/MsgPackWriter.h
+++ b/llvm/include/llvm/BinaryFormat/MsgPackWriter.h
@@ -28,12 +28,13 @@
 #ifndef LLVM_BINARYFORMAT_MSGPACKWRITER_H
 #define LLVM_BINARYFORMAT_MSGPACKWRITER_H
 
-#include "llvm/BinaryFormat/MsgPack.h"
 #include "llvm/Support/EndianStream.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/MemoryBufferRef.h"
 
 namespace llvm {
+
+class raw_ostream;
+
 namespace msgpack {
 
 /// Writes MessagePack objects to an output stream, one at a time.
diff --git a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
index 284e469a1d2fe..99d2c82212811 100644
--- a/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
+++ b/llvm/lib/BinaryFormat/AMDGPUMetadataVerifier.cpp
@@ -12,8 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/AMDGPUMetadata.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+
+#include <map>
+#include <utility>
 
 namespace llvm {
 namespace AMDGPU {
diff --git a/llvm/lib/BinaryFormat/ELF.cpp b/llvm/lib/BinaryFormat/ELF.cpp
index 2ede63f464d3a..e2e601b6d90f9 100644
--- a/llvm/lib/BinaryFormat/ELF.cpp
+++ b/llvm/lib/BinaryFormat/ELF.cpp
@@ -7,9 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/BinaryFormat/ELF.h"
-#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/Error.h"
 
 using namespace llvm;
 using namespace ELF;
diff --git a/llvm/lib/BinaryFormat/Magic.cpp b/llvm/lib/BinaryFormat/Magic.cpp
index 0c40bce22412d..044e4840cb3b0 100644
--- a/llvm/lib/BinaryFormat/Magic.cpp
+++ b/llvm/lib/BinaryFormat/Magic.cpp
@@ -10,10 +10,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/COFF.h"
-#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Support/Endian.h"
-#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 
 #if !defined(_MSC_VER) && !defined(__MINGW32__)
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index c95ba7cec704a..cfb618117d2bf 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -31,6 +31,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/AMDGPUMetadataVerifier.h"
 #include "llvm/BinaryFormat/ELF.h"
+#include "llvm/BinaryFormat/MsgPackDocument.h"
 #include "llvm/Demangle/Demangle.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/ELF.h"

From 52c7faeae8fce49cd7e3853a29bb38e3ce5be892 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell <nathan@acm.org>
Date: Mon, 24 Jan 2022 11:07:29 -0800
Subject: [PATCH 712/946] [demangler] improve test harness

The demangler test harness is a little unclear.  The failed demangling
message always causes me to think about 'reality', changing to a
simple 'Found' seems clearer.

The expected-to-fail tests abort as soon as one passes, rather than
continue, and then abort if any passed.  This changes that loop to
fail at the end, in a similar manner to the expected-to-work loop.

Reviewed By: ChuanqiXu

Differential Revision: https://reviews.llvm.org/D118130
---
 libcxxabi/test/test_demangle.pass.cpp | 44 +++++++++++----------------
 1 file changed, 18 insertions(+), 26 deletions(-)

diff --git a/libcxxabi/test/test_demangle.pass.cpp b/libcxxabi/test/test_demangle.pass.cpp
index 7bbb17c581cc1..41ee1100d12d2 100644
--- a/libcxxabi/test/test_demangle.pass.cpp
+++ b/libcxxabi/test/test_demangle.pass.cpp
@@ -29946,51 +29946,43 @@ void test()
     for (unsigned i = 0; i < N; ++i)
     {
         int status;
-        char* demang = __cxxabiv1::__cxa_demangle(cases[i][0], buf, &len, &status);
-        if (demang == 0 || std::strcmp(demang, cases[i][1]) != 0)
+        char* demang =
+            __cxxabiv1::__cxa_demangle(cases[i][0], buf, &len, &status);
+        if (!demang || std::strcmp(demang, cases[i][1]) != 0)
         {
-            std::printf("ERROR demangling %s\nexpected: %s\n", cases[i][0], cases[i][1]);
-            if (demang)
-            {
-                std::printf(" reality: %s\n", demang);
-                buf = demang;
-                failed = true;
-            }
-            else
-            {
-                std::printf("Got instead: NULL, %d\n", status);
-                failed = true;
-            }
-        }
-        else
-        {
-            buf = demang;
+            std::printf("ERROR demangling %s\nexpected: %s\n",
+                        cases[i][0], cases[i][1]);
+            std::printf("Got: %d, %s\n", status, demang ? demang : "(null)");
+            failed = true;
         }
+        if (demang)
+          buf = demang;
     }
-    assert(!failed);
     free(buf);
+    assert(!failed && "demangle failed");
 }
 
 void test_invalid_cases()
 {
     std::size_t len = 0;
     char* buf = nullptr;
+    bool passed = false;
     for (unsigned i = 0; i < NI; ++i)
     {
         int status;
-        char* demang = __cxxabiv1::__cxa_demangle(invalid_cases[i], buf, &len, &status);
+        char* demang =
+            __cxxabiv1::__cxa_demangle(invalid_cases[i], buf, &len, &status);
         if (status != -2)
         {
             std::printf("%s should be invalid but is not\n", invalid_cases[i]);
-            std::printf("Got status %d\n", status);
-            assert(status == -2);
-        }
-        else
-        {
-            buf = demang;
+            std::printf("Got: %d, %s\n", status, demang ? demang : "(null)");
+            passed = true;
         }
+        if (demang)
+          buf = demang;
     }
     free(buf);
+    assert(!passed && "demangle did not fail");
 }
 
 const char *xfail_cases[] = {

From 4e5fce58485cc69ee69da10e26a338ceeef280a0 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell <nathan@acm.org>
Date: Mon, 24 Jan 2022 07:59:57 -0800
Subject: [PATCH 713/946] [demangler] refactor SpecialSubKind

Code generating the special substitutions in std is a switch statement
with each case block containing the same conststruction template.  It
is more efficient to commonize that after the switch, having
determined which SubKind to create.  Also, let's sort the cases.

Reviewed By: ChuanqiXu

Differential Revision: https://reviews.llvm.org/D118131
---
 libcxxabi/src/demangle/ItaniumDemangle.h     | 27 +++++++++-----------
 llvm/include/llvm/Demangle/ItaniumDemangle.h | 27 +++++++++-----------
 2 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/libcxxabi/src/demangle/ItaniumDemangle.h b/libcxxabi/src/demangle/ItaniumDemangle.h
index 45bb741f5629f..98489e4dc802f 100644
--- a/libcxxabi/src/demangle/ItaniumDemangle.h
+++ b/libcxxabi/src/demangle/ItaniumDemangle.h
@@ -5448,37 +5448,34 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSubstitution() {
     return nullptr;
 
   if (look() >= 'a' && look() <= 'z') {
-    Node *SpecialSub;
+    SpecialSubKind Kind;
     switch (look()) {
     case 'a':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::allocator);
+      Kind = SpecialSubKind::allocator;
       break;
     case 'b':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::basic_string);
+      Kind = SpecialSubKind::basic_string;
       break;
-    case 's':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::string);
+    case 'd':
+      Kind = SpecialSubKind::iostream;
       break;
     case 'i':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::istream);
+      Kind = SpecialSubKind::istream;
       break;
     case 'o':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::ostream);
+      Kind = SpecialSubKind::ostream;
       break;
-    case 'd':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::iostream);
+    case 's':
+      Kind = SpecialSubKind::string;
       break;
     default:
       return nullptr;
     }
+    ++First;
+    auto *SpecialSub = make<SpecialSubstitution>(Kind);
     if (!SpecialSub)
       return nullptr;
+
     // Itanium C++ ABI 5.1.2: If a name that would use a built-in <substitution>
     // has ABI tags, the tags are appended to the substitution; the result is a
     // substitutable component.
diff --git a/llvm/include/llvm/Demangle/ItaniumDemangle.h b/llvm/include/llvm/Demangle/ItaniumDemangle.h
index a3a0381edf97d..28545ed068367 100644
--- a/llvm/include/llvm/Demangle/ItaniumDemangle.h
+++ b/llvm/include/llvm/Demangle/ItaniumDemangle.h
@@ -5450,37 +5450,34 @@ Node *AbstractManglingParser<Derived, Alloc>::parseSubstitution() {
     return nullptr;
 
   if (look() >= 'a' && look() <= 'z') {
-    Node *SpecialSub;
+    SpecialSubKind Kind;
     switch (look()) {
     case 'a':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::allocator);
+      Kind = SpecialSubKind::allocator;
       break;
     case 'b':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::basic_string);
+      Kind = SpecialSubKind::basic_string;
       break;
-    case 's':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::string);
+    case 'd':
+      Kind = SpecialSubKind::iostream;
       break;
     case 'i':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::istream);
+      Kind = SpecialSubKind::istream;
       break;
     case 'o':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::ostream);
+      Kind = SpecialSubKind::ostream;
       break;
-    case 'd':
-      ++First;
-      SpecialSub = make<SpecialSubstitution>(SpecialSubKind::iostream);
+    case 's':
+      Kind = SpecialSubKind::string;
       break;
     default:
       return nullptr;
     }
+    ++First;
+    auto *SpecialSub = make<SpecialSubstitution>(Kind);
     if (!SpecialSub)
       return nullptr;
+
     // Itanium C++ ABI 5.1.2: If a name that would use a built-in <substitution>
     // has ABI tags, the tags are appended to the substitution; the result is a
     // substitutable component.

From 718562a469f98a580975a5662f381cca9f1b3de5 Mon Sep 17 00:00:00 2001
From: Andrzej Warzynski <andrzej.warzynski@arm.com>
Date: Tue, 25 Jan 2022 09:42:35 +0000
Subject: [PATCH 714/946] [flang][tco] Remove unneeded dependencies

`tco` does not generate machine code, so it does not require (machine)
code-gen related dependencies.

Differential Revision: https://reviews.llvm.org/D118112
---
 flang/tools/tco/CMakeLists.txt | 4 ----
 flang/tools/tco/tco.cpp        | 2 --
 2 files changed, 6 deletions(-)

diff --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt
index 49ba32f01dd91..f986ea1a64cf8 100644
--- a/flang/tools/tco/CMakeLists.txt
+++ b/flang/tools/tco/CMakeLists.txt
@@ -1,7 +1,3 @@
-set(LLVM_LINK_COMPONENTS
-  ${LLVM_TARGETS_TO_BUILD}
-)
-
 add_flang_tool(tco tco.cpp)
 llvm_update_compile_flags(tco)
 get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp
index db01ea545580c..e363394f23289 100644
--- a/flang/tools/tco/tco.cpp
+++ b/flang/tools/tco/tco.cpp
@@ -136,8 +136,6 @@ int main(int argc, char **argv) {
   fir::support::registerMLIRPassesForFortranTools();
   fir::registerOptCodeGenPasses();
   fir::registerOptTransformPasses();
-  InitializeAllTargets();
-  mlir::registerAsmPrinterCLOptions();
   mlir::registerMLIRContextCLOptions();
   mlir::registerPassManagerCLOptions();
   mlir::PassPipelineCLParser passPipe("", "Compiler passes to run");

From e30525b6a5c46a44ae7bef2f9ac60614396ee9b3 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Wed, 26 Jan 2022 12:55:15 +0000
Subject: [PATCH 715/946] [AArch64] Add NEON test cases for ISD::ABDS/U.

---
 llvm/test/CodeGen/AArch64/neon-abd.ll | 381 ++++++++++++++++++++++++++
 1 file changed, 381 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/neon-abd.ll

diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll
new file mode 100644
index 0000000000000..94c25e945f630
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/neon-abd.ll
@@ -0,0 +1,381 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+;
+; SABD
+;
+
+define <8 x i8> @sabd_8b(<8 x i8> %a, <8 x i8> %b) #0 {
+; CHECK-LABEL: sabd_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
+  %a.sext = sext <8 x i8> %a to <8 x i16>
+  %b.sext = sext <8 x i8> %b to <8 x i16>
+  %sub = sub <8 x i16> %a.sext, %b.sext
+  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+  %trunc = trunc <8 x i16> %abs to <8 x i8>
+  ret <8 x i8> %trunc
+}
+
+define <16 x i8> @sabd_16b(<16 x i8> %a, <16 x i8> %b) #0 {
+; CHECK-LABEL: sabd_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabd v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ret
+  %a.sext = sext <16 x i8> %a to <16 x i16>
+  %b.sext = sext <16 x i8> %b to <16 x i16>
+  %sub = sub <16 x i16> %a.sext, %b.sext
+  %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
+  %trunc = trunc <16 x i16> %abs to <16 x i8>
+  ret <16 x i8> %trunc
+}
+
+define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) #0 {
+; CHECK-LABEL: sabd_4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabd v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    ret
+  %a.sext = sext <4 x i16> %a to <4 x i32>
+  %b.sext = sext <4 x i16> %b to <4 x i32>
+  %sub = sub <4 x i32> %a.sext, %b.sext
+  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+  %trunc = trunc <4 x i32> %abs to <4 x i16>
+  ret <4 x i16> %trunc
+}
+
+define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 {
+; CHECK-LABEL: sabd_4h_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.4h, v0.4h, #8
+; CHECK-NEXT:    shl v1.4h, v1.4h, #8
+; CHECK-NEXT:    sshr v0.4h, v0.4h, #8
+; CHECK-NEXT:    sshr v1.4h, v1.4h, #8
+; CHECK-NEXT:    sub v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    abs v0.4h, v0.4h
+; CHECK-NEXT:    ret
+  %a.sext = sext <4 x i8> %a to <4 x i16>
+  %b.sext = sext <4 x i8> %b to <4 x i16>
+  %sub = sub <4 x i16> %a.sext, %b.sext
+  %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
+  ret <4 x i16> %abs
+}
+
+define <8 x i16> @sabd_8h(<8 x i16> %a, <8 x i16> %b) #0 {
+; CHECK-LABEL: sabd_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %a.sext = sext <8 x i16> %a to <8 x i32>
+  %b.sext = sext <8 x i16> %b to <8 x i32>
+  %sub = sub <8 x i32> %a.sext, %b.sext
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true)
+  %trunc = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %trunc
+}
+
+define <8 x i16> @sabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) #0 {
+; CHECK-LABEL: sabd_8h_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabdl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    ret
+  %a.sext = sext <8 x i8> %a to <8 x i16>
+  %b.sext = sext <8 x i8> %b to <8 x i16>
+  %sub = sub <8 x i16> %a.sext, %b.sext
+  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+  ret <8 x i16> %abs
+}
+
+define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: sabd_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+  %a.sext = sext <2 x i32> %a to <2 x i64>
+  %b.sext = sext <2 x i32> %b to <2 x i64>
+  %sub = sub <2 x i64> %a.sext, %b.sext
+  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+  %trunc = trunc <2 x i64> %abs to <2 x i32>
+  ret <2 x i32> %trunc
+}
+
+define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 {
+; CHECK-LABEL: sabd_2s_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.2s, v0.2s, #16
+; CHECK-NEXT:    shl v1.2s, v1.2s, #16
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
+; CHECK-NEXT:    sshr v1.2s, v1.2s, #16
+; CHECK-NEXT:    sub v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    abs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %a.sext = sext <2 x i16> %a to <2 x i32>
+  %b.sext = sext <2 x i16> %b to <2 x i32>
+  %sub = sub <2 x i32> %a.sext, %b.sext
+  %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
+  ret <2 x i32> %abs
+}
+
+define <4 x i32> @sabd_4s(<4 x i32> %a, <4 x i32> %b) #0 {
+; CHECK-LABEL: sabd_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %a.sext = sext <4 x i32> %a to <4 x i64>
+  %b.sext = sext <4 x i32> %b to <4 x i64>
+  %sub = sub <4 x i64> %a.sext, %b.sext
+  %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true)
+  %trunc = trunc <4 x i64> %abs to <4 x i32>
+  ret <4 x i32> %trunc
+}
+
+define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) #0 {
+; CHECK-LABEL: sabd_4s_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabdl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    ret
+  %a.sext = sext <4 x i16> %a to <4 x i32>
+  %b.sext = sext <4 x i16> %b to <4 x i32>
+  %sub = sub <4 x i32> %a.sext, %b.sext
+  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+  ret <4 x i32> %abs
+}
+
+define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) #0 {
+; CHECK-LABEL: sabd_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    fmov x10, d0
+; CHECK-NEXT:    mov x9, v1.d[1]
+; CHECK-NEXT:    asr x11, x10, #63
+; CHECK-NEXT:    asr x12, x8, #63
+; CHECK-NEXT:    asr x13, x9, #63
+; CHECK-NEXT:    subs x8, x8, x9
+; CHECK-NEXT:    fmov x9, d1
+; CHECK-NEXT:    sbcs x12, x12, x13
+; CHECK-NEXT:    asr x13, x9, #63
+; CHECK-NEXT:    subs x9, x10, x9
+; CHECK-NEXT:    sbcs x10, x11, x13
+; CHECK-NEXT:    cmp x10, #0
+; CHECK-NEXT:    cneg x9, x9, lt
+; CHECK-NEXT:    cmp x12, #0
+; CHECK-NEXT:    cneg x8, x8, lt
+; CHECK-NEXT:    fmov d0, x9
+; CHECK-NEXT:    fmov d1, x8
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ret
+  %a.sext = sext <2 x i64> %a to <2 x i128>
+  %b.sext = sext <2 x i64> %b to <2 x i128>
+  %sub = sub <2 x i128> %a.sext, %b.sext
+  %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true)
+  %trunc = trunc <2 x i128> %abs to <2 x i64>
+  ret <2 x i64> %trunc
+}
+
+define <2 x i64> @sabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: sabd_2d_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sabdl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+  %a.sext = sext <2 x i32> %a to <2 x i64>
+  %b.sext = sext <2 x i32> %b to <2 x i64>
+  %sub = sub <2 x i64> %a.sext, %b.sext
+  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+  ret <2 x i64> %abs
+}
+
+;
+; UABD
+;
+
+define <8 x i8> @uabd_8b(<8 x i8> %a, <8 x i8> %b) #0 {
+; CHECK-LABEL: uabd_8b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabd v0.8b, v0.8b, v1.8b
+; CHECK-NEXT:    ret
+  %a.zext = zext <8 x i8> %a to <8 x i16>
+  %b.zext = zext <8 x i8> %b to <8 x i16>
+  %sub = sub <8 x i16> %a.zext, %b.zext
+  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+  %trunc = trunc <8 x i16> %abs to <8 x i8>
+  ret <8 x i8> %trunc
+}
+
+define <16 x i8> @uabd_16b(<16 x i8> %a, <16 x i8> %b) #0 {
+; CHECK-LABEL: uabd_16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabd v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ret
+  %a.zext = zext <16 x i8> %a to <16 x i16>
+  %b.zext = zext <16 x i8> %b to <16 x i16>
+  %sub = sub <16 x i16> %a.zext, %b.zext
+  %abs = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %sub, i1 true)
+  %trunc = trunc <16 x i16> %abs to <16 x i8>
+  ret <16 x i8> %trunc
+}
+
+define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) #0 {
+; CHECK-LABEL: uabd_4h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabd v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    ret
+  %a.zext = zext <4 x i16> %a to <4 x i32>
+  %b.zext = zext <4 x i16> %b to <4 x i32>
+  %sub = sub <4 x i32> %a.zext, %b.zext
+  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+  %trunc = trunc <4 x i32> %abs to <4 x i16>
+  ret <4 x i16> %trunc
+}
+
+define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 {
+; CHECK-LABEL: uabd_4h_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    bic v0.4h, #255, lsl #8
+; CHECK-NEXT:    bic v1.4h, #255, lsl #8
+; CHECK-NEXT:    sub v0.4h, v0.4h, v1.4h
+; CHECK-NEXT:    abs v0.4h, v0.4h
+; CHECK-NEXT:    ret
+  %a.zext = zext <4 x i8> %a to <4 x i16>
+  %b.zext = zext <4 x i8> %b to <4 x i16>
+  %sub = sub <4 x i16> %a.zext, %b.zext
+  %abs = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %sub, i1 true)
+  ret <4 x i16> %abs
+}
+
+define <8 x i16> @uabd_8h(<8 x i16> %a, <8 x i16> %b) #0 {
+; CHECK-LABEL: uabd_8h:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    ret
+  %a.zext = zext <8 x i16> %a to <8 x i32>
+  %b.zext = zext <8 x i16> %b to <8 x i32>
+  %sub = sub <8 x i32> %a.zext, %b.zext
+  %abs = call <8 x i32> @llvm.abs.v8i32(<8 x i32> %sub, i1 true)
+  %trunc = trunc <8 x i32> %abs to <8 x i16>
+  ret <8 x i16> %trunc
+}
+
+define <8 x i16> @uabd_8h_promoted_ops(<8 x i8> %a, <8 x i8> %b) #0 {
+; CHECK-LABEL: uabd_8h_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabdl v0.8h, v0.8b, v1.8b
+; CHECK-NEXT:    ret
+  %a.zext = zext <8 x i8> %a to <8 x i16>
+  %b.zext = zext <8 x i8> %b to <8 x i16>
+  %sub = sub <8 x i16> %a.zext, %b.zext
+  %abs = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %sub, i1 true)
+  ret <8 x i16> %abs
+}
+
+define <2 x i32> @uabd_2s(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: uabd_2s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabd v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+  %a.zext = zext <2 x i32> %a to <2 x i64>
+  %b.zext = zext <2 x i32> %b to <2 x i64>
+  %sub = sub <2 x i64> %a.zext, %b.zext
+  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+  %trunc = trunc <2 x i64> %abs to <2 x i32>
+  ret <2 x i32> %trunc
+}
+
+define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 {
+; CHECK-LABEL: uabd_2s_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    movi d2, #0x00ffff0000ffff
+; CHECK-NEXT:    and v0.8b, v0.8b, v2.8b
+; CHECK-NEXT:    and v1.8b, v1.8b, v2.8b
+; CHECK-NEXT:    sub v0.2s, v0.2s, v1.2s
+; CHECK-NEXT:    abs v0.2s, v0.2s
+; CHECK-NEXT:    ret
+  %a.zext = zext <2 x i16> %a to <2 x i32>
+  %b.zext = zext <2 x i16> %b to <2 x i32>
+  %sub = sub <2 x i32> %a.zext, %b.zext
+  %abs = call <2 x i32> @llvm.abs.v2i32(<2 x i32> %sub, i1 true)
+  ret <2 x i32> %abs
+}
+
+define <4 x i32> @uabd_4s(<4 x i32> %a, <4 x i32> %b) #0 {
+; CHECK-LABEL: uabd_4s:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
+  %a.zext = zext <4 x i32> %a to <4 x i64>
+  %b.zext = zext <4 x i32> %b to <4 x i64>
+  %sub = sub <4 x i64> %a.zext, %b.zext
+  %abs = call <4 x i64> @llvm.abs.v4i64(<4 x i64> %sub, i1 true)
+  %trunc = trunc <4 x i64> %abs to <4 x i32>
+  ret <4 x i32> %trunc
+}
+
+define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) #0 {
+; CHECK-LABEL: uabd_4s_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabdl v0.4s, v0.4h, v1.4h
+; CHECK-NEXT:    ret
+  %a.zext = zext <4 x i16> %a to <4 x i32>
+  %b.zext = zext <4 x i16> %b to <4 x i32>
+  %sub = sub <4 x i32> %a.zext, %b.zext
+  %abs = call <4 x i32> @llvm.abs.v4i32(<4 x i32> %sub, i1 true)
+  ret <4 x i32> %abs
+}
+
+define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) #0 {
+; CHECK-LABEL: uabd_2d:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, v0.d[1]
+; CHECK-NEXT:    fmov x10, d0
+; CHECK-NEXT:    mov x9, v1.d[1]
+; CHECK-NEXT:    subs x8, x8, x9
+; CHECK-NEXT:    fmov x9, d1
+; CHECK-NEXT:    ngcs x11, xzr
+; CHECK-NEXT:    subs x9, x10, x9
+; CHECK-NEXT:    ngcs x10, xzr
+; CHECK-NEXT:    cmp x10, #0
+; CHECK-NEXT:    cneg x9, x9, lt
+; CHECK-NEXT:    cmp x11, #0
+; CHECK-NEXT:    cneg x8, x8, lt
+; CHECK-NEXT:    fmov d0, x9
+; CHECK-NEXT:    fmov d1, x8
+; CHECK-NEXT:    mov v0.d[1], v1.d[0]
+; CHECK-NEXT:    ret
+  %a.zext = zext <2 x i64> %a to <2 x i128>
+  %b.zext = zext <2 x i64> %b to <2 x i128>
+  %sub = sub <2 x i128> %a.zext, %b.zext
+  %abs = call <2 x i128> @llvm.abs.v2i128(<2 x i128> %sub, i1 true)
+  %trunc = trunc <2 x i128> %abs to <2 x i64>
+  ret <2 x i64> %trunc
+}
+
+define <2 x i64> @uabd_2d_promoted_ops(<2 x i32> %a, <2 x i32> %b) #0 {
+; CHECK-LABEL: uabd_2d_promoted_ops:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uabdl v0.2d, v0.2s, v1.2s
+; CHECK-NEXT:    ret
+  %a.zext = zext <2 x i32> %a to <2 x i64>
+  %b.zext = zext <2 x i32> %b to <2 x i64>
+  %sub = sub <2 x i64> %a.zext, %b.zext
+  %abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
+  ret <2 x i64> %abs
+}
+
+declare <8 x i8> @llvm.abs.v8i8(<8 x i8>, i1)
+declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
+
+declare <4 x i16> @llvm.abs.v4i16(<4 x i16>, i1)
+declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
+declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1)
+
+declare <2 x i32> @llvm.abs.v2i32(<2 x i32>, i1)
+declare <4 x i32> @llvm.abs.v4i32(<4 x i32>, i1)
+declare <8 x i32> @llvm.abs.v8i32(<8 x i32>, i1)
+
+declare <2 x i64> @llvm.abs.v2i64(<2 x i64>, i1)
+declare <4 x i64> @llvm.abs.v4i64(<4 x i64>, i1)
+
+declare <2 x i128> @llvm.abs.v2i128(<2 x i128>, i1)
+
+attributes #0 = { "target-features"="+neon" }

From 03d0acc545653fc65d5707424446563aa9d735a4 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 26 Jan 2022 13:08:39 +0100
Subject: [PATCH 716/946] [DSE] Use helper for unwind check (NFCI)

This should be no functional change, as the cases supported by the
helper and the cases supported by DSE are currently the same, the
code structure is just slightly different.
---
 .../Scalar/DeadStoreElimination.cpp           | 38 +++++++++----------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 9ed6082903d94..ae636e7b61f72 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -756,9 +756,8 @@ struct DSEState {
   SmallVector<MemoryDef *, 64> MemDefs;
   // Any that should be skipped as they are already deleted
   SmallPtrSet<MemoryAccess *, 4> SkipStores;
-  // Keep track of all of the objects that are invisible to the caller before
-  // the function returns.
-  DenseMap<const Value *, bool> InvisibleToCallerBeforeRet;
+  // Keep track whether a given object is captured before return or not.
+  DenseMap<const Value *, bool> CapturedBeforeReturn;
   // Keep track of all of the objects that are invisible to the caller after
   // the function returns.
   DenseMap<const Value *, bool> InvisibleToCallerAfterRet;
@@ -801,12 +800,8 @@ struct DSEState {
     // Treat byval or inalloca arguments the same as Allocas, stores to them are
     // dead at the end of the function.
     for (Argument &AI : F.args())
-      if (AI.hasPassPointeeByValueCopyAttr()) {
-        // For byval, the caller doesn't know the address of the allocation.
-        if (AI.hasByValAttr())
-          InvisibleToCallerBeforeRet.insert({&AI, true});
+      if (AI.hasPassPointeeByValueCopyAttr())
         InvisibleToCallerAfterRet.insert({&AI, true});
-      }
 
     // Collect whether there is any irreducible control flow in the function.
     ContainsIrreducibleLoops = mayContainIrreducibleControl(F, &LI);
@@ -953,7 +948,7 @@ struct DSEState {
       return true;
     auto I = InvisibleToCallerAfterRet.insert({V, false});
     if (I.second) {
-      if (!isInvisibleToCallerBeforeRet(V)) {
+      if (!isInvisibleToCallerOnUnwind(V)) {
         I.first->second = false;
       } else if (isNoAliasCall(V)) {
         I.first->second = !PointerMayBeCaptured(V, true, false);
@@ -962,17 +957,21 @@ struct DSEState {
     return I.first->second;
   }
 
-  bool isInvisibleToCallerBeforeRet(const Value *V) {
-    if (isa<AllocaInst>(V))
+  bool isInvisibleToCallerOnUnwind(const Value *V) {
+    bool RequiresNoCaptureBeforeUnwind;
+    if (!isNotVisibleOnUnwind(V, RequiresNoCaptureBeforeUnwind))
+      return false;
+    if (!RequiresNoCaptureBeforeUnwind)
       return true;
-    auto I = InvisibleToCallerBeforeRet.insert({V, false});
-    if (I.second && isNoAliasCall(V))
+
+    auto I = CapturedBeforeReturn.insert({V, true});
+    if (I.second)
       // NOTE: This could be made more precise by PointerMayBeCapturedBefore
       // with the killing MemoryDef. But we refrain from doing so for now to
       // limit compile-time and this does not cause any changes to the number
       // of stores removed on a large test set in practice.
-      I.first->second = !PointerMayBeCaptured(V, false, true);
-    return I.first->second;
+      I.first->second = PointerMayBeCaptured(V, false, true);
+    return !I.first->second;
   }
 
   Optional<MemoryLocation> getLocForWrite(Instruction *I) const {
@@ -1260,8 +1259,7 @@ struct DSEState {
       MemoryDef *CurrentDef = cast<MemoryDef>(Current);
       Instruction *CurrentI = CurrentDef->getMemoryInst();
 
-      if (canSkipDef(CurrentDef,
-                     !isInvisibleToCallerBeforeRet(KillingUndObj))) {
+      if (canSkipDef(CurrentDef, !isInvisibleToCallerOnUnwind(KillingUndObj))) {
         CanOptimize = false;
         continue;
       }
@@ -1433,7 +1431,7 @@ struct DSEState {
         continue;
       }
 
-      if (UseInst->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj)) {
+      if (UseInst->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj)) {
         LLVM_DEBUG(dbgs() << "  ... found throwing instruction\n");
         return None;
       }
@@ -1614,7 +1612,7 @@ struct DSEState {
     // First see if we can ignore it by using the fact that KillingI is an
     // alloca/alloca like object that is not visible to the caller during
     // execution of the function.
-    if (KillingUndObj && isInvisibleToCallerBeforeRet(KillingUndObj))
+    if (KillingUndObj && isInvisibleToCallerOnUnwind(KillingUndObj))
       return false;
 
     if (KillingI->getParent() == DeadI->getParent())
@@ -1630,7 +1628,7 @@ struct DSEState {
   bool isDSEBarrier(const Value *KillingUndObj, Instruction *DeadI) {
     // If DeadI may throw it acts as a barrier, unless we are to an
     // alloca/alloca like object that does not escape.
-    if (DeadI->mayThrow() && !isInvisibleToCallerBeforeRet(KillingUndObj))
+    if (DeadI->mayThrow() && !isInvisibleToCallerOnUnwind(KillingUndObj))
       return true;
 
     // If DeadI is an atomic load/store stronger than monotonic, do not try to

From 0984aa70da1047677577c3a1eb0445fca90d8614 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Wed, 26 Jan 2022 14:15:14 +0100
Subject: [PATCH 717/946] Fix conditional include in ThreadPool

Should fix  https://lab.llvm.org/buildbot#builders/37/builds/10259
---
 llvm/lib/Support/ThreadPool.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index bf2584950c4ac..9f92ae1c7a7c1 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -13,7 +13,12 @@
 #include "llvm/Support/ThreadPool.h"
 
 #include "llvm/Config/llvm-config.h"
+
+#if LLVM_ENABLE_THREADS
 #include "llvm/Support/Threading.h"
+#else
+#include "llvm/Support/raw_ostream.h"
+#endif
 
 using namespace llvm;
 

From 5ceb0bc7eaccb318eb299ee308e01210a7da1d1e Mon Sep 17 00:00:00 2001
From: Simon Moll <simon.moll@emea.nec.com>
Date: Wed, 26 Jan 2022 10:32:26 +0100
Subject: [PATCH 718/946] [VE] Packed 32/64bit broadcast isel and tests

Packed-mode broadcast of f32/i32 requires the subregister to be
replicated to the full I64 register prior. Add repl_i32 and repl_f32 to
faciliate this.

Reviewed By: kaz7

Differential Revision: https://reviews.llvm.org/D117878
---
 llvm/lib/Target/VE/VECustomDAG.cpp           | 24 ++++++++
 llvm/lib/Target/VE/VECustomDAG.h             |  2 +
 llvm/lib/Target/VE/VEISelLowering.cpp        | 21 ++++---
 llvm/lib/Target/VE/VEISelLowering.h          |  2 +
 llvm/lib/Target/VE/VEInstrInfo.td            | 16 +++++
 llvm/lib/Target/VE/VEInstrPatternsVec.td     | 16 +++++
 llvm/test/CodeGen/VE/Packed/vec_broadcast.ll | 65 ++++++++++++++++++++
 7 files changed, 136 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/CodeGen/VE/Packed/vec_broadcast.ll

diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp
index 2f9976e426129..af3e4af138142 100644
--- a/llvm/lib/Target/VE/VECustomDAG.cpp
+++ b/llvm/lib/Target/VE/VECustomDAG.cpp
@@ -19,6 +19,14 @@
 
 namespace llvm {
 
+static const int StandardVectorWidth = 256;
+
+bool isPackedVectorType(EVT SomeVT) {
+  if (!SomeVT.isVector())
+    return false;
+  return SomeVT.getVectorNumElements() > StandardVectorWidth;
+}
+
 /// \returns the VVP_* SDNode opcode corresponsing to \p OC.
 Optional<unsigned> getVVPOpcode(unsigned Opcode) {
   switch (Opcode) {
@@ -51,6 +59,22 @@ SDValue VECustomDAG::getConstant(uint64_t Val, EVT VT, bool IsTarget,
 
 SDValue VECustomDAG::getBroadcast(EVT ResultVT, SDValue Scalar,
                                   SDValue AVL) const {
+  assert(ResultVT.isVector());
+  auto ScaVT = Scalar.getValueType();
+  assert(ScaVT != MVT::i1 && "TODO: Mask broadcasts");
+
+  if (isPackedVectorType(ResultVT)) {
+    // v512x packed mode broadcast
+    // Replicate the scalar reg (f32 or i32) onto the opposing half of the full
+    // scalar register. If it's an I64 type, assume that this has already
+    // happened.
+    if (ScaVT == MVT::f32) {
+      Scalar = getNode(VEISD::REPL_F32, MVT::i64, Scalar);
+    } else if (ScaVT == MVT::i32) {
+      Scalar = getNode(VEISD::REPL_I32, MVT::i64, Scalar);
+    }
+  }
+
   return getNode(VEISD::VEC_BROADCAST, ResultVT, {Scalar, AVL});
 }
 
diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h
index e78b5dda6828c..ddd6ce7833664 100644
--- a/llvm/lib/Target/VE/VECustomDAG.h
+++ b/llvm/lib/Target/VE/VECustomDAG.h
@@ -25,6 +25,8 @@ Optional<unsigned> getVVPOpcode(unsigned Opcode);
 
 bool isVVPBinaryOp(unsigned Opcode);
 
+bool isPackedVectorType(EVT SomeVT);
+
 class VECustomDAG {
   SelectionDAG &DAG;
   SDLoc DL;
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 3ab876aa05c99..9137c476777e8 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -11,9 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "VECustomDAG.h"
 #include "VEISelLowering.h"
 #include "MCTargetDesc/VEMCExpr.h"
+#include "VECustomDAG.h"
 #include "VEInstrBuilder.h"
 #include "VEMachineFunctionInfo.h"
 #include "VERegisterInfo.h"
@@ -899,6 +899,8 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
     TARGET_NODE_CASE(RET_FLAG)
     TARGET_NODE_CASE(TS1AM)
     TARGET_NODE_CASE(VEC_BROADCAST)
+    TARGET_NODE_CASE(REPL_I32)
+    TARGET_NODE_CASE(REPL_F32)
 
     // Register the VVP_* SDNodes.
 #define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
@@ -1642,8 +1644,7 @@ static SDValue getSplatValue(SDNode *N) {
 SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
                                             SelectionDAG &DAG) const {
   VECustomDAG CDAG(DAG, Op);
-  unsigned NumEls = Op.getValueType().getVectorNumElements();
-  MVT ElemVT = Op.getSimpleValueType().getVectorElementType();
+  MVT ResultVT = Op.getSimpleValueType();
 
   // If there is just one element, expand to INSERT_VECTOR_ELT.
   unsigned UniqueIdx;
@@ -1651,17 +1652,17 @@ SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
     SDValue AccuV = CDAG.getUNDEF(Op.getValueType());
     auto ElemV = Op->getOperand(UniqueIdx);
     SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
-    return CDAG.getNode(ISD::INSERT_VECTOR_ELT, Op.getValueType(),
-                        {AccuV, ElemV, IdxV});
+    return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});
   }
 
   // Else emit a broadcast.
   if (SDValue ScalarV = getSplatValue(Op.getNode())) {
-    // lower to VEC_BROADCAST
-    MVT LegalResVT = MVT::getVectorVT(ElemVT, 256);
-
-    auto AVL = CDAG.getConstant(NumEls, MVT::i32);
-    return CDAG.getBroadcast(LegalResVT, Op.getOperand(0), AVL);
+    unsigned NumEls = ResultVT.getVectorNumElements();
+    // TODO: Legalize packed-mode AVL.
+    //       For now, cap the AVL at 256.
+    auto CappedLength = std::min<unsigned>(256, NumEls);
+    auto AVL = CDAG.getConstant(CappedLength, MVT::i32);
+    return CDAG.getBroadcast(ResultVT, Op.getOperand(0), AVL);
   }
 
   // Expand
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index ec0f58b7a3f6d..09bd19e837172 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -40,6 +40,8 @@ enum NodeType : unsigned {
   TS1AM,                  // A TS1AM instruction used for 1/2 bytes swap.
   VEC_BROADCAST,          // A vector broadcast instruction.
                           //   0: scalar value, 1: VL
+  REPL_I32,
+  REPL_F32, // Replicate subregister to other half.
 
 // VVP_* nodes.
 #define ADD_VVP_OP(VVP_NAME, ...) VVP_NAME,
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index c3abbe2cafab6..717427c3f48da 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -1576,6 +1576,12 @@ def f2l : OutPatFrag<(ops node:$exp),
 def l2f : OutPatFrag<(ops node:$exp),
                      (EXTRACT_SUBREG $exp, sub_f32)>;
 
+// Zero out subregisters.
+def zero_i32 : OutPatFrag<(ops node:$expr),
+                          (ANDrm $expr, 32)>;
+def zero_f32 : OutPatFrag<(ops node:$expr),
+                          (ANDrm $expr, !add(32, 64))>;
+
 // Small immediates.
 def : Pat<(i32 simm7:$val), (EXTRACT_SUBREG (ORim (LO7 $val), 0), sub_i32)>;
 def : Pat<(i64 simm7:$val), (ORim (LO7 $val), 0)>;
@@ -2287,6 +2293,16 @@ class IsVLVT<int OpIdx> : SDTCisVT<OpIdx,i32>;
 def vec_broadcast       : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2,
                                  [SDTCisVec<0>, IsVLVT<2>]>>;
 
+// replicate lower 32bit to upper 32bit (f32 scalar replication).
+def repl_f32            : SDNode<"VEISD::REPL_F32",
+                            SDTypeProfile<1, 1,
+                              [SDTCisInt<0>, SDTCisFP<1>]>>;
+// replicate upper 32bit to lower 32 bit (i32 scalar replication).
+def repl_i32            : SDNode<"VEISD::REPL_I32",
+                            SDTypeProfile<1, 1,
+                              [SDTCisInt<0>, SDTCisInt<1>]>>;
+
+
 // Whether this is an all-true mask (assuming undef-bits above VL are all-true).
 def true_mask           : PatLeaf<
                             (vec_broadcast (i32 nonzero), (i32 srcvalue))>;
diff --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td
index dc3c913c918af..6c5b80315efb5 100644
--- a/llvm/lib/Target/VE/VEInstrPatternsVec.td
+++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td
@@ -15,6 +15,17 @@
 // Instruction format superclass
 //===----------------------------------------------------------------------===//
 
+// Sub-register replication for packed broadcast.
+def: Pat<(i64 (repl_f32 f32:$val)),
+            (ORrr
+              (SRLri (f2l $val), 32),
+              (zero_i32 (f2l $val)))>;
+def: Pat<(i64 (repl_i32 i32:$val)),
+            (ORrr
+              (zero_f32 (i2l $val)),
+              (SLLri (i2l $val), 32))>;
+
+
 multiclass vbrd_elem32<ValueType v32, ValueType s32, SDPatternOperator ImmOp,
                        SDNodeXForm ImmCast, OutPatFrag SuperRegCast> {
   // VBRDil
@@ -89,3 +100,8 @@ defm : patterns_elem32<v256f32, f32, simm7fp, LO7FP, l2f, f2l>;
 
 defm : patterns_elem64<v256i64, i64, simm7, LO7>;
 defm : patterns_elem64<v256f64, f64, simm7fp, LO7FP>;
+
+defm : vbrd_elem64<v512i32, i64, simm7, LO7>;
+defm : vbrd_elem64<v512f32, i64, simm7, LO7>;
+defm : vbrd_elem64<v512i32, f64, simm7fp, LO7FP>;
+defm : vbrd_elem64<v512f32, f64, simm7fp, LO7FP>;
diff --git a/llvm/test/CodeGen/VE/Packed/vec_broadcast.ll b/llvm/test/CodeGen/VE/Packed/vec_broadcast.ll
new file mode 100644
index 0000000000000..ed41f4fcb9747
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Packed/vec_broadcast.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s
+
+define fastcc <512 x i32> @brd_v512i32(i32 %s) {
+; CHECK-LABEL: brd_v512i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    sll %s1, %s0, 32
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    or %s0, %s0, %s1
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vbrd %v0, %s0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %val = insertelement <512 x i32> undef, i32 %s, i32 0
+  %ret = shufflevector <512 x i32> %val, <512 x i32> undef, <512 x i32> zeroinitializer
+  ret <512 x i32> %ret
+}
+
+define fastcc <512 x i32> @brdi_v512i32() {
+; CHECK-LABEL: brdi_v512i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    or %s0, 17, (0)1
+; CHECK-NEXT:    sll %s1, %s0, 32
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    or %s0, %s0, %s1
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vbrd %v0, %s0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %val = insertelement <512 x i32> undef, i32 17, i32 0
+  %ret = shufflevector <512 x i32> %val, <512 x i32> undef, <512 x i32> zeroinitializer
+  ret <512 x i32> %ret
+}
+
+define fastcc <512 x float> @brd_v512f32(float %s) {
+; CHECK-LABEL: brd_v512f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    and %s1, %s0, (32)1
+; CHECK-NEXT:    srl %s0, %s0, 32
+; CHECK-NEXT:    or %s0, %s0, %s1
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vbrd %v0, %s0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %val = insertelement <512 x float> undef, float %s, i32 0
+  %ret = shufflevector <512 x float> %val, <512 x float> undef, <512 x i32> zeroinitializer
+  ret <512 x float> %ret
+}
+
+define fastcc <512 x float> @brdi_v512f32() {
+; CHECK-LABEL: brdi_v512f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lea.sl %s0, 0
+; CHECK-NEXT:    and %s1, %s0, (32)1
+; CHECK-NEXT:    srl %s0, %s0, 32
+; CHECK-NEXT:    or %s0, %s0, %s1
+; CHECK-NEXT:    lea %s1, 256
+; CHECK-NEXT:    lvl %s1
+; CHECK-NEXT:    vbrd %v0, %s0
+; CHECK-NEXT:    b.l.t (, %s10)
+  %val = insertelement <512 x float> undef, float 0.e+00, i32 0
+  %ret = shufflevector <512 x float> %val, <512 x float> undef, <512 x i32> zeroinitializer
+  ret <512 x float> %ret
+}

From c283c8dfb5a9770126f06f55644c6208b5582c19 Mon Sep 17 00:00:00 2001
From: Salman Javed <mail@salmanjaved.org>
Date: Thu, 27 Jan 2022 01:30:38 +1300
Subject: [PATCH 719/946] Rewrite Doxygen comment to resolve -Wdocumentation
 warning (NFC)

Comment change only, no functional change intended.
Example of warning:
https://lab.llvm.org/buildbot/#/builders/188/builds/8696/steps/4/logs/warnings__2_
---
 clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
index e41003262cccf..3b5f82033d253 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
+++ b/clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h
@@ -110,8 +110,8 @@ class ClangTidyContext {
   /// This does not handle suppression of notes following a suppressed
   /// diagnostic; that is left to the caller as it requires maintaining state in
   /// between calls to this function.
-  /// If any NOLINT is malformed, e.g. a BEGIN without a subsequent END, an
-  /// error about it will be returned in output \param NoLintErrors.
+  /// If any NOLINT is malformed, e.g. a BEGIN without a subsequent END, output
+  /// \param NoLintErrors will return an error about it.
   /// If \param AllowIO is false, the function does not attempt to read source
   /// files from disk which are not already mapped into memory; such files are
   /// treated as not containing a suppression comment.

From b48e3782182639d4e59624290f166a84e53300bb Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Wed, 26 Jan 2022 08:02:49 -0500
Subject: [PATCH 720/946] Cleanup LLVMTextAPI headers

Based on the output of iwyu. A full rebuild of llvm-project doesn't exhibit any
significant false dependencies.

The impact on preprocessed output is larger than expected, given the small
amount of changes

$ clang++ -E  -Iinclude -I../llvm/include ../llvm/lib/TextAPI/*.cpp -std=c++14 -fno-rtti -fno-exceptions | wc -l
before: 635319
After: 643716

Discourse thread on the topic: https://llvm.discourse.group/t/include-what-you-use-include-cleanup
---
 llvm/include/llvm/TextAPI/InterfaceFile.h | 4 ----
 llvm/include/llvm/TextAPI/Target.h        | 4 +++-
 llvm/lib/TextAPI/Architecture.cpp         | 2 +-
 llvm/lib/TextAPI/PackedVersion.cpp        | 1 -
 llvm/lib/TextAPI/Target.cpp               | 5 +----
 llvm/lib/TextAPI/TextStubCommon.h         | 9 +++++++--
 6 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/TextAPI/InterfaceFile.h b/llvm/include/llvm/TextAPI/InterfaceFile.h
index 6ef4db2ae158e..5f07397adacaa 100644
--- a/llvm/include/llvm/TextAPI/InterfaceFile.h
+++ b/llvm/include/llvm/TextAPI/InterfaceFile.h
@@ -19,11 +19,7 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator.h"
-#include "llvm/BinaryFormat/MachO.h"
-#include "llvm/BinaryFormat/Magic.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/Error.h"
-#include "llvm/TextAPI/Architecture.h"
 #include "llvm/TextAPI/ArchitectureSet.h"
 #include "llvm/TextAPI/PackedVersion.h"
 #include "llvm/TextAPI/Platform.h"
diff --git a/llvm/include/llvm/TextAPI/Target.h b/llvm/include/llvm/TextAPI/Target.h
index e35afcc831439..fbb76295f706a 100644
--- a/llvm/include/llvm/TextAPI/Target.h
+++ b/llvm/include/llvm/TextAPI/Target.h
@@ -9,13 +9,15 @@
 #ifndef LLVM_TEXTAPI_TARGET_H
 #define LLVM_TEXTAPI_TARGET_H
 
-#include "llvm/ADT/Triple.h"
 #include "llvm/Support/Error.h"
 #include "llvm/TextAPI/Architecture.h"
 #include "llvm/TextAPI/ArchitectureSet.h"
 #include "llvm/TextAPI/Platform.h"
 
 namespace llvm {
+
+class Triple;
+
 namespace MachO {
 
 // This is similar to a llvm Triple, but the triple doesn't have all the
diff --git a/llvm/lib/TextAPI/Architecture.cpp b/llvm/lib/TextAPI/Architecture.cpp
index e1901d5c0ce52..bb349b21774ed 100644
--- a/llvm/lib/TextAPI/Architecture.cpp
+++ b/llvm/lib/TextAPI/Architecture.cpp
@@ -15,7 +15,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/BinaryFormat/MachO.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/TextAPI/ArchitectureSet.h"
+#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
 namespace MachO {
diff --git a/llvm/lib/TextAPI/PackedVersion.cpp b/llvm/lib/TextAPI/PackedVersion.cpp
index f8171e02b6d3e..67fb30aeb127b 100644
--- a/llvm/lib/TextAPI/PackedVersion.cpp
+++ b/llvm/lib/TextAPI/PackedVersion.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/TextAPI/PackedVersion.h"
-#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Format.h"
diff --git a/llvm/lib/TextAPI/Target.cpp b/llvm/lib/TextAPI/Target.cpp
index feeb5c671a9cb..c54c3bd66b9dd 100644
--- a/llvm/lib/TextAPI/Target.cpp
+++ b/llvm/lib/TextAPI/Target.cpp
@@ -7,11 +7,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/TextAPI/Target.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/Support/Format.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace llvm {
diff --git a/llvm/lib/TextAPI/TextStubCommon.h b/llvm/lib/TextAPI/TextStubCommon.h
index 89ae5d56297c0..aac27221b5fff 100644
--- a/llvm/lib/TextAPI/TextStubCommon.h
+++ b/llvm/lib/TextAPI/TextStubCommon.h
@@ -16,9 +16,9 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/TextAPI/Architecture.h"
-#include "llvm/TextAPI/ArchitectureSet.h"
 #include "llvm/TextAPI/InterfaceFile.h"
-#include "llvm/TextAPI/PackedVersion.h"
+#include "llvm/TextAPI/Platform.h"
+#include "llvm/TextAPI/Target.h"
 
 using UUID = std::pair<llvm::MachO::Target, std::string>;
 
@@ -28,6 +28,11 @@ LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(UUID)
 LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(FlowStringRef)
 
 namespace llvm {
+
+namespace MachO {
+    class ArchitectureSet;
+    class PackedVersion;
+}
 namespace yaml {
 
 template <> struct ScalarTraits<FlowStringRef> {

From 31c1842a7b5ef4897fe3cbec21d261dae6ed5a74 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 26 Jan 2022 14:25:05 +0100
Subject: [PATCH 721/946] [DSE] Add test with sret argument (NFC)

---
 .../test/Transforms/DeadStoreElimination/simple.ll | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll
index 14716ee9b0e6a..88197ee2cdbd6 100644
--- a/llvm/test/Transforms/DeadStoreElimination/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll
@@ -532,6 +532,20 @@ define void @test34(i32* noalias %p) {
   store i32 0, i32* %p
   ret void
 }
+; Same as previous case, but with an sret argument.
+; TODO: The first store could be eliminated if sret is not visible on unwind.
+define void @test34_sret(i32* noalias sret(i32) %p) {
+; CHECK-LABEL: @test34_sret(
+; CHECK-NEXT:    store i32 1, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    call void @unknown_func()
+; CHECK-NEXT:    store i32 0, i32* [[P]], align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 1, i32* %p
+  call void @unknown_func()
+  store i32 0, i32* %p
+  ret void
+}
 
 ; Remove redundant store even with an unwinding function in the same block
 define void @test35(i32* noalias %p) {

From 37c4bd0fdbc6bc5ec6145765d6f4dcd06540201d Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Fri, 29 Oct 2021 16:10:36 +0100
Subject: [PATCH 722/946] [lldb] Add MemoryTagMap class

The tag map holds a sparse set of memory tags and allows
you to query ranges for tags.

Granules that do not have tags will be set to llvm::None.
to keep the ordering intact. If there are no tags for the
requested range we'll just return an empty result so that
callers don't need to check that all values are llvm::None.

This will be combined with MemoryTagManager's MakeTaggedRanges:
* MakeTaggedRanges
* Read from all those ranges
* Insert the results into the tag map
* Give the tag map to whatever needs to print tags

Which in this case will be "memory read"/DumpDataExtractor.

Reviewed By: JDevlieghere

Differential Revision: https://reviews.llvm.org/D112825
---
 lldb/include/lldb/Target/MemoryTagMap.h    | 98 ++++++++++++++++++++++
 lldb/source/Target/CMakeLists.txt          |  1 +
 lldb/source/Target/MemoryTagMap.cpp        | 64 ++++++++++++++
 lldb/unittests/Target/CMakeLists.txt       |  1 +
 lldb/unittests/Target/MemoryTagMapTest.cpp | 81 ++++++++++++++++++
 5 files changed, 245 insertions(+)
 create mode 100644 lldb/include/lldb/Target/MemoryTagMap.h
 create mode 100644 lldb/source/Target/MemoryTagMap.cpp
 create mode 100644 lldb/unittests/Target/MemoryTagMapTest.cpp

diff --git a/lldb/include/lldb/Target/MemoryTagMap.h b/lldb/include/lldb/Target/MemoryTagMap.h
new file mode 100644
index 0000000000000..acf3825bfb3f3
--- /dev/null
+++ b/lldb/include/lldb/Target/MemoryTagMap.h
@@ -0,0 +1,98 @@
+//===-- MemoryTagMap.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_TARGET_MEMORYTAGMAP_H
+#define LLDB_TARGET_MEMORYTAGMAP_H
+
+#include "lldb/Target/MemoryTagManager.h"
+#include "lldb/lldb-private.h"
+#include "llvm/ADT/Optional.h"
+#include <map>
+
+namespace lldb_private {
+
+/// MemoryTagMap provides a way to give a sparse read result
+/// when reading memory tags for a range. This is useful when
+/// you want to annotate some large memory dump that might include
+/// tagged memory but you don't know that it is all tagged.
+class MemoryTagMap {
+public:
+  /// Init an empty tag map
+  ///
+  /// \param [in] manager
+  ///     Non-null pointer to a memory tag manager.
+  MemoryTagMap(const MemoryTagManager *manager);
+
+  /// Insert tags into the map starting from addr.
+  ///
+  /// \param [in] addr
+  ///     Start address of the range to insert tags for.
+  ///     This address should be granule aligned and have had
+  ///     any non address bits removed.
+  ///     (ideally you would use the base of the range you used
+  ///     to read the tags in the first place)
+  ///
+  /// \param [in] tags
+  ///     Vector of tags to insert. The first tag will be inserted
+  ///     at addr, the next at addr+granule size and so on until
+  ///     all tags have been inserted.
+  void InsertTags(lldb::addr_t addr, const std::vector<lldb::addr_t> tags);
+
+  bool Empty() const;
+
+  /// Lookup memory tags for a range of memory from addr to addr+len.
+  ///
+  /// \param [in] addr
+  ///    The start of the range. This may include non address bits and
+  ///    does not have to be granule aligned.
+  ///
+  /// \param [in] len
+  ///    The length in bytes of the range to read tags for. This does
+  ///    not need to be multiple of the granule size.
+  ///
+  /// \return
+  ///    A vector containing the tags found for the granules in the
+  ///    range. (which is the result of granule aligning the given range)
+  ///
+  ///    Each item in the vector is an optional tag. Meaning that if
+  ///    it is valid then the granule had a tag and if not, it didn't.
+  ///
+  ///    If the range had no tags at all, the vector will be empty.
+  ///    If some of the range was tagged it will have items and some
+  ///    of them may be llvm::None.
+  ///    (this saves the caller checking whether all items are llvm::None)
+  std::vector<llvm::Optional<lldb::addr_t>> GetTags(lldb::addr_t addr,
+                                                    size_t len) const;
+
+private:
+  /// Lookup the tag for address
+  ///
+  /// \param [in] address
+  ///     The address to lookup a tag for. This should be aligned
+  ///     to a granule boundary.
+  ///
+  /// \return
+  ///     The tag for the granule that address refers to, or llvm::None
+  ///     if it has no memory tag.
+  llvm::Optional<lldb::addr_t> GetTag(lldb::addr_t addr) const;
+
+  // A map of granule aligned addresses to their memory tag
+  std::map<lldb::addr_t, lldb::addr_t> m_addr_to_tag;
+
+  // Memory tag manager used to align addresses and get granule size.
+  // Ideally this would be a const& but only certain architectures will
+  // have a memory tag manager class to provide here. So for a method
+  // returning a MemoryTagMap, Optional<MemoryTagMap> allows it to handle
+  // architectures without memory tagging. Optionals cannot hold references
+  // so we go with a pointer that we assume will be not be null.
+  const MemoryTagManager *m_manager;
+};
+
+} // namespace lldb_private
+
+#endif // LLDB_TARGET_MEMORYTAGMAP_H
diff --git a/lldb/source/Target/CMakeLists.txt b/lldb/source/Target/CMakeLists.txt
index 2353ddcba2495..ef28de7285fea 100644
--- a/lldb/source/Target/CMakeLists.txt
+++ b/lldb/source/Target/CMakeLists.txt
@@ -20,6 +20,7 @@ add_lldb_library(lldbTarget
   Memory.cpp
   MemoryHistory.cpp
   MemoryRegionInfo.cpp
+  MemoryTagMap.cpp
   ModuleCache.cpp
   OperatingSystem.cpp
   PathMappingList.cpp
diff --git a/lldb/source/Target/MemoryTagMap.cpp b/lldb/source/Target/MemoryTagMap.cpp
new file mode 100644
index 0000000000000..846eef9209da3
--- /dev/null
+++ b/lldb/source/Target/MemoryTagMap.cpp
@@ -0,0 +1,64 @@
+//===-- MemoryTagMap.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Target/MemoryTagMap.h"
+
+using namespace lldb_private;
+
+MemoryTagMap::MemoryTagMap(const MemoryTagManager *manager)
+    : m_manager(manager) {
+  assert(m_manager && "valid tag manager required to construct a MemoryTagMap");
+}
+
+void MemoryTagMap::InsertTags(lldb::addr_t addr,
+                              const std::vector<lldb::addr_t> tags) {
+  // We're assuming that addr has no non address bits and is granule aligned.
+  size_t granule_size = m_manager->GetGranuleSize();
+  for (auto tag : tags) {
+    m_addr_to_tag[addr] = tag;
+    addr += granule_size;
+  }
+}
+
+bool MemoryTagMap::Empty() const { return m_addr_to_tag.empty(); }
+
+std::vector<llvm::Optional<lldb::addr_t>>
+MemoryTagMap::GetTags(lldb::addr_t addr, size_t len) const {
+  // Addr and len might be unaligned
+  addr = m_manager->RemoveTagBits(addr);
+  MemoryTagManager::TagRange range(addr, len);
+  range = m_manager->ExpandToGranule(range);
+
+  std::vector<llvm::Optional<lldb::addr_t>> tags;
+  lldb::addr_t end_addr = range.GetRangeEnd();
+  addr = range.GetRangeBase();
+  bool got_valid_tags = false;
+  size_t granule_size = m_manager->GetGranuleSize();
+
+  for (; addr < end_addr; addr += granule_size) {
+    llvm::Optional<lldb::addr_t> tag = GetTag(addr);
+    tags.push_back(tag);
+    if (tag)
+      got_valid_tags = true;
+  }
+
+  // To save the caller checking if every item is llvm::None,
+  // we return an empty vector if we got no tags at all.
+  if (got_valid_tags)
+    return tags;
+  return {};
+}
+
+llvm::Optional<lldb::addr_t> MemoryTagMap::GetTag(lldb::addr_t addr) const {
+  // Here we assume that addr is granule aligned, just like when the tags
+  // were inserted.
+  auto found = m_addr_to_tag.find(addr);
+  if (found == m_addr_to_tag.end())
+    return llvm::None;
+  return found->second;
+}
diff --git a/lldb/unittests/Target/CMakeLists.txt b/lldb/unittests/Target/CMakeLists.txt
index 3b23550feaf9c..c16f0fcfabe15 100644
--- a/lldb/unittests/Target/CMakeLists.txt
+++ b/lldb/unittests/Target/CMakeLists.txt
@@ -3,6 +3,7 @@ add_lldb_unittest(TargetTests
   DynamicRegisterInfoTest.cpp
   ExecutionContextTest.cpp
   MemoryRegionInfoTest.cpp
+  MemoryTagMapTest.cpp
   ModuleCacheTest.cpp
   PathMappingListTest.cpp
   RemoteAwarePlatformTest.cpp
diff --git a/lldb/unittests/Target/MemoryTagMapTest.cpp b/lldb/unittests/Target/MemoryTagMapTest.cpp
new file mode 100644
index 0000000000000..831d4337a2896
--- /dev/null
+++ b/lldb/unittests/Target/MemoryTagMapTest.cpp
@@ -0,0 +1,81 @@
+//===-- MemoryTagMapTest.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Target/MemoryTagMap.h"
+#include "Plugins/Process/Utility/MemoryTagManagerAArch64MTE.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using namespace lldb_private;
+using namespace lldb;
+
+// In these tests we use the AArch64 MTE tag manager because it is the only
+// implementation of a memory tag manager. MemoryTagMap itself is generic.
+
+TEST(MemoryTagMapTest, EmptyTagMap) {
+  MemoryTagManagerAArch64MTE manager;
+  MemoryTagMap tag_map(&manager);
+
+  tag_map.InsertTags(0, {});
+  ASSERT_TRUE(tag_map.Empty());
+  tag_map.InsertTags(0, {0});
+  ASSERT_FALSE(tag_map.Empty());
+}
+
+TEST(MemoryTagMapTest, GetTags) {
+  using TagsVec = std::vector<llvm::Optional<lldb::addr_t>>;
+
+  MemoryTagManagerAArch64MTE manager;
+  MemoryTagMap tag_map(&manager);
+
+  // No tags for an address not in the map
+  ASSERT_TRUE(tag_map.GetTags(0, 16).empty());
+
+  tag_map.InsertTags(0, {0, 1});
+
+  // No tags if you read zero length
+  ASSERT_TRUE(tag_map.GetTags(0, 0).empty());
+
+  EXPECT_THAT(tag_map.GetTags(0, 16), ::testing::ContainerEq(TagsVec{0}));
+
+  EXPECT_THAT(tag_map.GetTags(0, 32), ::testing::ContainerEq(TagsVec{0, 1}));
+
+  // Last granule of the range is not tagged
+  EXPECT_THAT(tag_map.GetTags(0, 48),
+              ::testing::ContainerEq(TagsVec{0, 1, llvm::None}));
+
+  EXPECT_THAT(tag_map.GetTags(16, 32),
+              ::testing::ContainerEq(TagsVec{1, llvm::None}));
+
+  // Reading beyond that address gives you no tags at all
+  EXPECT_THAT(tag_map.GetTags(32, 16), ::testing::ContainerEq(TagsVec{}));
+
+  // Address is granule aligned for you
+  // The length here is set such that alignment doesn't produce a 2 granule
+  // range.
+  EXPECT_THAT(tag_map.GetTags(8, 8), ::testing::ContainerEq(TagsVec{0}));
+
+  EXPECT_THAT(tag_map.GetTags(30, 2), ::testing::ContainerEq(TagsVec{1}));
+
+  // Here the length pushes the range into the next granule. When aligned
+  // this produces 2 granules.
+  EXPECT_THAT(tag_map.GetTags(30, 4),
+              ::testing::ContainerEq(TagsVec{1, llvm::None}));
+
+  // A range can also have gaps at the beginning or in the middle.
+  // Add more tags, 1 granule away from the first range.
+  tag_map.InsertTags(48, {3, 4});
+
+  // Untagged first granule
+  EXPECT_THAT(tag_map.GetTags(32, 32),
+              ::testing::ContainerEq(TagsVec{llvm::None, 3}));
+
+  // Untagged middle granule
+  EXPECT_THAT(tag_map.GetTags(16, 48),
+              ::testing::ContainerEq(TagsVec{1, llvm::None, 3}));
+}

From 8700b22cf622fdcefab0f9bf8377bcf6dda39ac7 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Wed, 26 Jan 2022 13:56:09 +0000
Subject: [PATCH 723/946] [gn build] Port 37c4bd0fdbc6

---
 llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn
index 306d556f5078b..87df088be9c01 100644
--- a/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Target/BUILD.gn
@@ -43,6 +43,7 @@ static_library("Target") {
     "Memory.cpp",
     "MemoryHistory.cpp",
     "MemoryRegionInfo.cpp",
+    "MemoryTagMap.cpp",
     "ModuleCache.cpp",
     "OperatingSystem.cpp",
     "PathMappingList.cpp",

From 360af60e17283020e160609d82268ecf806e2178 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan@ibm.com>
Date: Wed, 26 Jan 2022 09:01:28 -0500
Subject: [PATCH 724/946] [SystemZ][z/OS] Add AutoConvert.h header to
 MemoryBuffer.cpp

This commit https://github.com/llvm/llvm-project/commit/75e164f61d391979b4829bf2746a5d74b94e95f2 removed the AutoConvert.h header causing a build break on z/OS. This patch adds it back to fix it.

Reviewed By: zibi

Differential Revision: https://reviews.llvm.org/D118129
---
 llvm/lib/Support/MemoryBuffer.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index bf34a242c4c3f..7816779cca1da 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/config.h"
+#include "llvm/Support/AutoConvert.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Errc.h"

From 5d2f90cbcd5f64d3b47ba5d2738939be66f05936 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Wed, 26 Jan 2022 13:35:47 +0000
Subject: [PATCH 725/946] [lldb] Correct some uses of \b in Doxygen
 documentation

---
 lldb/include/lldb/Breakpoint/BreakpointOptions.h | 4 ++--
 lldb/include/lldb/Interpreter/Options.h          | 2 +-
 lldb/include/lldb/Symbol/CompileUnit.h           | 4 ++--
 lldb/include/lldb/Target/Process.h               | 4 ++--
 lldb/include/lldb/Target/Target.h                | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/lldb/include/lldb/Breakpoint/BreakpointOptions.h b/lldb/include/lldb/Breakpoint/BreakpointOptions.h
index ec39c976f7ac0..c2456024d9194 100644
--- a/lldb/include/lldb/Breakpoint/BreakpointOptions.h
+++ b/lldb/include/lldb/Breakpoint/BreakpointOptions.h
@@ -194,8 +194,8 @@ friend class Breakpoint;
   ///    The commands will be appended to this list.
   ///
   /// \return
-  ///    \btrue if the command callback is a command-line callback,
-  ///    \bfalse otherwise.
+  ///    \b true if the command callback is a command-line callback,
+  ///    \b false otherwise.
   bool GetCommandLineCallbacks(StringList &command_list);
 
   /// Remove the callback from this option set.
diff --git a/lldb/include/lldb/Interpreter/Options.h b/lldb/include/lldb/Interpreter/Options.h
index 6bf5c21fe98e7..5899a9edc47f9 100644
--- a/lldb/include/lldb/Interpreter/Options.h
+++ b/lldb/include/lldb/Interpreter/Options.h
@@ -169,7 +169,7 @@ class Options {
   /// user wants returned.
   ///
   /// \return
-  ///     \btrue if we were in an option, \bfalse otherwise.
+  ///     \b true if we were in an option, \b false otherwise.
   bool HandleOptionCompletion(lldb_private::CompletionRequest &request,
                               OptionElementVector &option_map,
                               CommandInterpreter &interpreter);
diff --git a/lldb/include/lldb/Symbol/CompileUnit.h b/lldb/include/lldb/Symbol/CompileUnit.h
index 34e34e5514df3..44e1d673f1fd1 100644
--- a/lldb/include/lldb/Symbol/CompileUnit.h
+++ b/lldb/include/lldb/Symbol/CompileUnit.h
@@ -208,9 +208,9 @@ class CompileUnit : public std::enable_shared_from_this<CompileUnit>,
   ///     unit file.
   ///
   /// \param[in] exact
-  ///     If \btrue match only if there is a line table entry for this line
+  ///     If \b true match only if there is a line table entry for this line
   ///     number.
-  ///     If \bfalse, find the line table entry equal to or after this line
+  ///     If \b false, find the line table entry equal to or after this line
   ///     number.
   ///
   /// \param[out] line_entry
diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h
index 255a8b4baaca6..12ed1e09227cf 100644
--- a/lldb/include/lldb/Target/Process.h
+++ b/lldb/include/lldb/Target/Process.h
@@ -1884,7 +1884,7 @@ class Process : public std::enable_shared_from_this<Process>,
   ///     want to deallocate.
   ///
   /// \return
-  ///     \btrue if the memory was deallocated, \bfalse otherwise.
+  ///     \b true if the memory was deallocated, \b false otherwise.
   virtual Status DoDeallocateMemory(lldb::addr_t ptr) {
     Status error;
     error.SetErrorStringWithFormatv(
@@ -1903,7 +1903,7 @@ class Process : public std::enable_shared_from_this<Process>,
   ///     want to deallocate.
   ///
   /// \return
-  ///     \btrue if the memory was deallocated, \bfalse otherwise.
+  ///     \b true if the memory was deallocated, \b false otherwise.
   Status DeallocateMemory(lldb::addr_t ptr);
 
   /// Get any available STDOUT.
diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h
index b85839c15b2f9..2c8b36d1e3d9c 100644
--- a/lldb/include/lldb/Target/Target.h
+++ b/lldb/include/lldb/Target/Target.h
@@ -991,7 +991,7 @@ class Target : public std::enable_shared_from_this<Target>,
   ///     manually set following this function call).
   ///
   /// \return
-  ///     \b true if the architecture was successfully set, \bfalse otherwise.
+  ///     \b true if the architecture was successfully set, \b false otherwise.
   bool SetArchitecture(const ArchSpec &arch_spec, bool set_platform = false);
 
   bool MergeArchitecture(const ArchSpec &arch_spec);

From 8d714e4ad592eee351afa95a2cb3d38335a70fb6 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Wed, 26 Jan 2022 13:41:03 +0000
Subject: [PATCH 726/946] [lldb] Correct \params to \param in StackFrame
 Doxygen comments

---
 lldb/include/lldb/Target/StackFrame.h | 14 +++++++-------
 lldb/source/Target/StackFrame.cpp     |  6 +++---
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/lldb/include/lldb/Target/StackFrame.h b/lldb/include/lldb/Target/StackFrame.h
index 1a9aaad1a4db6..1b0485b22cacb 100644
--- a/lldb/include/lldb/Target/StackFrame.h
+++ b/lldb/include/lldb/Target/StackFrame.h
@@ -171,7 +171,7 @@ class StackFrame : public ExecutionContextScope,
   /// functions looking up symbolic information for a given pc value multiple
   /// times.
   ///
-  /// \params [in] resolve_scope
+  /// \param [in] resolve_scope
   ///   Flags from the SymbolContextItem enumerated type which specify what
   ///   type of symbol context is needed by this caller.
   ///
@@ -408,10 +408,10 @@ class StackFrame : public ExecutionContextScope,
 
   /// Create a ValueObject for a given Variable in this StackFrame.
   ///
-  /// \params [in] variable_sp
+  /// \param [in] variable_sp
   ///   The Variable to base this ValueObject on
   ///
-  /// \params [in] use_dynamic
+  /// \param [in] use_dynamic
   ///     Whether the correct dynamic type of the variable should be
   ///     determined before creating the ValueObject, or if the static type
   ///     is sufficient.  One of the DynamicValueType enumerated values.
@@ -437,7 +437,7 @@ class StackFrame : public ExecutionContextScope,
   /// the current instruction.  The ExpressionPath should indicate how to get
   /// to this value using "frame variable."
   ///
-  /// \params [in] addr
+  /// \param [in] addr
   ///   The raw address.
   ///
   /// \return
@@ -448,10 +448,10 @@ class StackFrame : public ExecutionContextScope,
   /// given register plus an offset.  The ExpressionPath should indicate how
   /// to get to this value using "frame variable."
   ///
-  /// \params [in] reg
+  /// \param [in] reg
   ///   The name of the register.
   ///
-  /// \params [in] offset
+  /// \param [in] offset
   ///   The offset from the register.  Particularly important for sp...
   ///
   /// \return
@@ -465,7 +465,7 @@ class StackFrame : public ExecutionContextScope,
   /// PC in the stack frame and traverse through all parent blocks stopping at
   /// inlined function boundaries.
   ///
-  /// \params [in] name
+  /// \param [in] name
   ///   The name of the variable.
   ///
   /// \return
diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp
index 7b4295158425e..58de26b23b657 100644
--- a/lldb/source/Target/StackFrame.cpp
+++ b/lldb/source/Target/StackFrame.cpp
@@ -1433,13 +1433,13 @@ ValueObjectSP GetValueForDereferincingOffset(StackFrame &frame,
 /// Attempt to reconstruct the ValueObject for the address contained in a
 /// given register plus an offset.
 ///
-/// \params [in] frame
+/// \param [in] frame
 ///   The current stack frame.
 ///
-/// \params [in] reg
+/// \param [in] reg
 ///   The register.
 ///
-/// \params [in] offset
+/// \param [in] offset
 ///   The offset from the register.
 ///
 /// \param [in] disassembler

From ecf7a0e33883fc8f001445c68a1128866cb81d38 Mon Sep 17 00:00:00 2001
From: Sebastian Neubauer <Sebastian.Neubauer@amd.com>
Date: Mon, 24 Jan 2022 20:39:54 +0100
Subject: [PATCH 727/946] [CMake] Disable mvsc warning for new versions

Starting with VS 2019, CMake defaults to the x64 host toolchain, so the
warning does not apply anymore.

References:
VS 2017 defaults to x86
https://cmake.org/cmake/help/latest/generator/Visual%20Studio%2015%202017.html?highlight=host#toolset-selection
VS 2019 and 2022 default to x64 for x64 targets
https://cmake.org/cmake/help/latest/generator/Visual%20Studio%2016%202019.html?highlight=host#toolset-selection
https://cmake.org/cmake/help/latest/generator/Visual%20Studio%2017%202022.html?highlight=host#toolset-selection

Differential Revision: https://reviews.llvm.org/D118228
---
 llvm/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 76c918306b225..a5245c22040ee 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -28,7 +28,7 @@ if (NOT PACKAGE_VERSION)
     "${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX}")
 endif()
 
-if ((CMAKE_GENERATOR MATCHES "Visual Studio") AND (CMAKE_GENERATOR_TOOLSET STREQUAL ""))
+if ((CMAKE_GENERATOR MATCHES "Visual Studio") AND (MSVC_TOOLSET_VERSION LESS 142) AND (CMAKE_GENERATOR_TOOLSET STREQUAL ""))
   message(WARNING "Visual Studio generators use the x86 host compiler by "
                   "default, even for 64-bit targets. This can result in linker "
                   "instability and out of memory errors. To use the 64-bit "

From 2feddb37b48ea55f0d586d2710b9bc17f607e3e1 Mon Sep 17 00:00:00 2001
From: Alban Bridonneau <alban.bridonneau@arm.com>
Date: Wed, 26 Jan 2022 13:33:38 +0000
Subject: [PATCH 728/946] Implement correct cost for SVE bitcasts

We have some bitcasts which we know will be simplified,
so their cost is zero.

Reviewed By: david-arm, sdesmalen

Differential Revision: https://reviews.llvm.org/D118019
---
 .../AArch64/AArch64TargetTransformInfo.cpp    |  9 +++
 .../Analysis/CostModel/AArch64/sve-bitcast.ll | 57 ++++++++++++++++++-
 2 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 2df5f091a9db7..a4d666a0a3c29 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1590,6 +1590,15 @@ InstructionCost AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
     { ISD::FP_EXTEND, MVT::nxv4f64, MVT::nxv4f32, 2},
     { ISD::FP_EXTEND, MVT::nxv8f64, MVT::nxv8f32, 6},
 
+    // Bitcasts from float to integer
+    { ISD::BITCAST, MVT::nxv2f16, MVT::nxv2i16, 0 },
+    { ISD::BITCAST, MVT::nxv4f16, MVT::nxv4i16, 0 },
+    { ISD::BITCAST, MVT::nxv2f32, MVT::nxv2i32, 0 },
+
+    // Bitcasts from integer to float
+    { ISD::BITCAST, MVT::nxv2i16, MVT::nxv2f16, 0 },
+    { ISD::BITCAST, MVT::nxv4i16, MVT::nxv4f16, 0 },
+    { ISD::BITCAST, MVT::nxv2i32, MVT::nxv2f32, 0 },
   };
 
   if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll b/llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll
index a0069e71bb6d6..1e09a402fbea0 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll
@@ -1,8 +1,61 @@
 ; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -cost-model -analyze < %s | FileCheck %s
 
-; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x double> %a to <vscale x 2 x i64>
+; Integer to float bitcasts
+
+define <vscale x 2 x double> @test_nxv2f64(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: test_nxv2f64
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x i64> %a to <vscale x 2 x double>
+  %b = bitcast <vscale x 2 x i64> %a to <vscale x 2 x double>
+  ret <vscale x 2 x double> %b
+}
+
+define <vscale x 2 x half> @test_nxv2f16(<vscale x 2 x i16> %a) {
+; CHECK-LABEL: test_nxv2f16
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x i16> %a to <vscale x 2 x half>
+  %b = bitcast <vscale x 2 x i16> %a to <vscale x 2 x half>
+  ret <vscale x 2 x half> %b
+}
+
+define <vscale x 4 x half> @test_nxv4f16(<vscale x 4 x i16> %a) {
+; CHECK-LABEL: test_nxv4f16
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 4 x i16> %a to <vscale x 4 x half>
+  %b = bitcast <vscale x 4 x i16> %a to <vscale x 4 x half>
+  ret <vscale x 4 x half> %b
+}
+
+define <vscale x 2 x float> @test_nxv2f32(<vscale x 2 x i32> %a) {
+; CHECK-LABEL: test_nxv2f32
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x i32> %a to <vscale x 2 x float>
+  %b = bitcast <vscale x 2 x i32> %a to <vscale x 2 x float>
+  ret <vscale x 2 x float> %b
+}
 
-define <vscale x 2 x i64> @foo(<vscale x 2 x double> %a, i32 %x) {
+; Float to integer bitcasts
+
+define <vscale x 2 x i64> @test_nxv2i64(<vscale x 2 x double> %a) {
+; CHECK-LABEL: test_nxv2i64
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x double> %a to <vscale x 2 x i64>
   %b = bitcast <vscale x 2 x double> %a to <vscale x 2 x i64>
   ret <vscale x 2 x i64> %b
 }
+
+define <vscale x 2 x i16> @test_nxv2i16(<vscale x 2 x half> %a) {
+; CHECK-LABEL: test_nxv2i16
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x half> %a to <vscale x 2 x i16>
+  %b = bitcast <vscale x 2 x half> %a to <vscale x 2 x i16>
+  ret <vscale x 2 x i16> %b
+}
+
+define <vscale x 4 x i16> @test_nxv4i16(<vscale x 4 x half> %a) {
+; CHECK-LABEL: test_nxv4i16
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 4 x half> %a to <vscale x 4 x i16>
+  %b = bitcast <vscale x 4 x half> %a to <vscale x 4 x i16>
+  ret <vscale x 4 x i16> %b
+}
+
+define <vscale x 2 x i32> @test_nxv2i32(<vscale x 2 x float> %a) {
+; CHECK-LABEL: test_nxv2i32
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x float> %a to <vscale x 2 x i32>
+  %b = bitcast <vscale x 2 x float> %a to <vscale x 2 x i32>
+  ret <vscale x 2 x i32> %b
+}

From 903c3d2863b9d0ae760a2f45e0e934f1d12cd1d4 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 26 Jan 2022 15:32:11 +0100
Subject: [PATCH 729/946] [SCEVExpander] Always use i8 GEP for reused value
 offset

We could keep the non-i8 GEP code for non-opaque pointers, but
there's two reasons I'm dropping it: First, this actually appears
to be dead code, at least it isn't hit in any of our tests. I
expect that this is because we usually expand trip counts, and
those are never pointers (anymore). Second, the non-i8 GEP was
actually incorrect in multiple ways, because it used SCEV type
sizes, which don't match DL type sizes (for pointers) and certainly
don't match type alloc sizes (which is what GEPs actually use).
As such, I'm simplifying the code to always use the i8 GEP code
path if it does get hit.
---
 .../Utils/ScalarEvolutionExpander.cpp         | 22 ++++++-------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 1495ea1a40882..5363a851fc273 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1981,22 +1981,14 @@ Value *SCEVExpander::expand(const SCEV *S) {
 
     if (VO.second) {
       if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
-        Type *Ety = Vty->getPointerElementType();
         int64_t Offset = VO.second->getSExtValue();
-        int64_t ESize = SE.getTypeSizeInBits(Ety);
-        if ((Offset * 8) % ESize == 0) {
-          ConstantInt *Idx =
-            ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize);
-          V = Builder.CreateGEP(Ety, V, Idx, "scevgep");
-        } else {
-          ConstantInt *Idx =
-            ConstantInt::getSigned(VO.second->getType(), -Offset);
-          unsigned AS = Vty->getAddressSpace();
-          V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
-          V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
-                                "uglygep");
-          V = Builder.CreateBitCast(V, Vty);
-        }
+        ConstantInt *Idx =
+          ConstantInt::getSigned(VO.second->getType(), -Offset);
+        unsigned AS = Vty->getAddressSpace();
+        V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
+        V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
+                              "uglygep");
+        V = Builder.CreateBitCast(V, Vty);
       } else {
         V = Builder.CreateSub(V, VO.second);
       }

From 070090d08eb506187f7203bd3b321ca1e74d9d5c Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Fri, 29 Oct 2021 16:11:14 +0100
Subject: [PATCH 730/946] [lldb] Add option to show memory tags in memory read
 output

This adds an option --show-tags to "memory read".

(lldb) memory read mte_buf mte_buf+32 -f "x" -s8 --show-tags
0x900fffff7ff8000: 0x0000000000000000 0x0000000000000000 (tag: 0x0)
0x900fffff7ff8010: 0x0000000000000000 0x0000000000000000 (tag: 0x1)

Tags are printed on the end of each line, if that
line has any tags associated with it. Meaning that
untagged memory output is unchanged.

Tags are printed based on the granule(s) of memory that
a line covers. So you may have lines with 1 tag, with many
tags, no tags or partially tagged lines.

In the case of partially tagged lines, untagged granules
will show "<no tag>" so that the ordering is obvious.
For example, a line that covers 2 granules where the first
is not tagged:

(lldb) memory read mte_buf-16 mte_buf+16 -l32 -f"x" --show-tags
0x900fffff7ff7ff0: 0x00000000 <...> (tags: <no tag> 0x0)

Untagged lines will just not have the "(tags: ..." at all.
Though they may be part of a larger output that does have
some tagged lines.

To do this I've extended DumpDataExtractor to also print
memory tags where it has a valid execution context and
is asked to print them.

There are no special alignment requirements, simply
use "memory read" as usual. All alignment is handled
in DumpDataExtractor.

We use MakeTaggedRanges to find all the tagged memory
in the current dump, then read all that into a MemoryTagMap.

The tag map is populated once in DumpDataExtractor and re-used
for each subsequently printed line (or recursive call of
DumpDataExtractor, which some formats do).

Reviewed By: omjavaid

Differential Revision: https://reviews.llvm.org/D107140
---
 lldb/include/lldb/Core/DumpDataExtractor.h    |  12 +-
 lldb/source/Commands/CommandObjectMemory.cpp  |   8 +-
 lldb/source/Commands/Options.td               |   2 +
 lldb/source/Core/DumpDataExtractor.cpp        | 129 ++++++++++++++++--
 .../TestAArch64LinuxMTEMemoryTagAccess.py     | 125 +++++++++++++++++
 llvm/docs/ReleaseNotes.rst                    |   3 +
 6 files changed, 269 insertions(+), 10 deletions(-)

diff --git a/lldb/include/lldb/Core/DumpDataExtractor.h b/lldb/include/lldb/Core/DumpDataExtractor.h
index 12188609e8c03..08501b512a2b7 100644
--- a/lldb/include/lldb/Core/DumpDataExtractor.h
+++ b/lldb/include/lldb/Core/DumpDataExtractor.h
@@ -76,6 +76,15 @@ class Stream;
 ///     same integer value. If the items being displayed are not
 ///     bitfields, this value should be zero.
 ///
+/// \param[in] exe_scope
+///     If provided, this will be used to lookup language specific
+///     information, address information and memory tags.
+///     (if they are requested by the other options)
+///
+/// \param[in] show_memory_tags
+///     If exe_scope and base_addr are valid, include memory tags
+///     in the output. This does not apply to certain formats.
+///
 /// \return
 ///     The offset at which dumping ended.
 lldb::offset_t
@@ -83,7 +92,8 @@ DumpDataExtractor(const DataExtractor &DE, Stream *s, lldb::offset_t offset,
                   lldb::Format item_format, size_t item_byte_size,
                   size_t item_count, size_t num_per_line, uint64_t base_addr,
                   uint32_t item_bit_size, uint32_t item_bit_offset,
-                  ExecutionContextScope *exe_scope = nullptr);
+                  ExecutionContextScope *exe_scope = nullptr,
+                  bool show_memory_tags = false);
 
 void DumpHexBytes(Stream *s, const void *src, size_t src_len,
                   uint32_t bytes_per_line, lldb::addr_t base_addr);
diff --git a/lldb/source/Commands/CommandObjectMemory.cpp b/lldb/source/Commands/CommandObjectMemory.cpp
index f5e2cb4ed44ea..1b44a1bd709a9 100644
--- a/lldb/source/Commands/CommandObjectMemory.cpp
+++ b/lldb/source/Commands/CommandObjectMemory.cpp
@@ -91,6 +91,10 @@ class OptionGroupReadMemory : public OptionGroup {
       error = m_offset.SetValueFromString(option_value);
       break;
 
+    case '\x01':
+      m_show_tags = true;
+      break;
+
     default:
       llvm_unreachable("Unimplemented option");
     }
@@ -104,6 +108,7 @@ class OptionGroupReadMemory : public OptionGroup {
     m_force = false;
     m_offset.Clear();
     m_language_for_type.Clear();
+    m_show_tags = false;
   }
 
   Status FinalizeSettings(Target *target, OptionGroupFormat &format_options) {
@@ -277,6 +282,7 @@ class OptionGroupReadMemory : public OptionGroup {
   bool m_force;
   OptionValueUInt64 m_offset;
   OptionValueLanguage m_language_for_type;
+  bool m_show_tags = false;
 };
 
 // Read memory from the inferior process
@@ -860,7 +866,7 @@ class CommandObjectMemoryRead : public CommandObjectParsed {
     size_t bytes_dumped = DumpDataExtractor(
         data, output_stream_p, 0, format, item_byte_size, item_count,
         num_per_line / target->GetArchitecture().GetDataByteSize(), addr, 0, 0,
-        exe_scope);
+        exe_scope, m_memory_options.m_show_tags);
     m_next_addr = addr + bytes_dumped;
     output_stream_p->EOL();
     return true;
diff --git a/lldb/source/Commands/Options.td b/lldb/source/Commands/Options.td
index 7fbf0ab039953..269c937296d5d 100644
--- a/lldb/source/Commands/Options.td
+++ b/lldb/source/Commands/Options.td
@@ -488,6 +488,8 @@ let Command = "memory read" in {
     "display data.">;
   def memory_read_force : Option<"force", "r">, Groups<[1,2,3]>,
     Desc<"Necessary if reading over target.max-memory-read-size bytes.">;
+  def memory_read_show_tags : Option<"show-tags", "\\x01">, Group<1>,
+    Desc<"Include memory tags in output (does not apply to binary output).">;
 }
 
 let Command = "memory find" in {
diff --git a/lldb/source/Core/DumpDataExtractor.cpp b/lldb/source/Core/DumpDataExtractor.cpp
index 175ffef04a812..211e16a2e0337 100644
--- a/lldb/source/Core/DumpDataExtractor.cpp
+++ b/lldb/source/Core/DumpDataExtractor.cpp
@@ -17,6 +17,9 @@
 #include "lldb/Target/ABI.h"
 #include "lldb/Target/ExecutionContext.h"
 #include "lldb/Target/ExecutionContextScope.h"
+#include "lldb/Target/MemoryRegionInfo.h"
+#include "lldb/Target/MemoryTagManager.h"
+#include "lldb/Target/MemoryTagMap.h"
 #include "lldb/Target/Process.h"
 #include "lldb/Target/SectionLoadList.h"
 #include "lldb/Target/Target.h"
@@ -253,6 +256,85 @@ void DumpFloatingPoint(std::ostringstream &ss, FloatT f) {
   ss << f;
 }
 
+static llvm::Optional<MemoryTagMap>
+GetMemoryTags(lldb::addr_t addr, size_t length,
+              ExecutionContextScope *exe_scope) {
+  assert(addr != LLDB_INVALID_ADDRESS);
+
+  if (!exe_scope)
+    return llvm::None;
+
+  TargetSP target_sp = exe_scope->CalculateTarget();
+  if (!target_sp)
+    return llvm::None;
+
+  ProcessSP process_sp = target_sp->CalculateProcess();
+  if (!process_sp)
+    return llvm::None;
+
+  llvm::Expected<const MemoryTagManager *> tag_manager_or_err =
+      process_sp->GetMemoryTagManager();
+  if (!tag_manager_or_err) {
+    llvm::consumeError(tag_manager_or_err.takeError());
+    return llvm::None;
+  }
+
+  MemoryRegionInfos memory_regions;
+  // Don't check return status, list will be just empty if an error happened.
+  process_sp->GetMemoryRegions(memory_regions);
+
+  llvm::Expected<std::vector<MemoryTagManager::TagRange>> tagged_ranges_or_err =
+      (*tag_manager_or_err)
+          ->MakeTaggedRanges(addr, addr + length, memory_regions);
+  // Here we know that our range will not be inverted but we must still check
+  // for an error.
+  if (!tagged_ranges_or_err) {
+    llvm::consumeError(tagged_ranges_or_err.takeError());
+    return llvm::None;
+  }
+  if (tagged_ranges_or_err->empty())
+    return llvm::None;
+
+  MemoryTagMap memory_tag_map(*tag_manager_or_err);
+  for (const MemoryTagManager::TagRange &range : *tagged_ranges_or_err) {
+    llvm::Expected<std::vector<lldb::addr_t>> tags_or_err =
+        process_sp->ReadMemoryTags(range.GetRangeBase(), range.GetByteSize());
+
+    if (tags_or_err)
+      memory_tag_map.InsertTags(range.GetRangeBase(), *tags_or_err);
+    else
+      llvm::consumeError(tags_or_err.takeError());
+  }
+
+  if (memory_tag_map.Empty())
+    return llvm::None;
+
+  return memory_tag_map;
+}
+
+static void
+printMemoryTags(const DataExtractor &DE, Stream *s, lldb::addr_t addr,
+                size_t len,
+                const llvm::Optional<MemoryTagMap> &memory_tag_map) {
+  std::vector<llvm::Optional<lldb::addr_t>> tags =
+      memory_tag_map->GetTags(addr, len);
+
+  // Only print if there is at least one tag for this line
+  if (tags.empty())
+    return;
+
+  s->Printf(" (tag%s:", tags.size() > 1 ? "s" : "");
+  // Some granules may not be tagged but print something for them
+  // so that the ordering remains intact.
+  for (auto tag : tags) {
+    if (tag)
+      s->Printf(" 0x%" PRIx64, *tag);
+    else
+      s->PutCString(" <no tag>");
+  }
+  s->PutCString(")");
+}
+
 lldb::offset_t lldb_private::DumpDataExtractor(
     const DataExtractor &DE, Stream *s, offset_t start_offset,
     lldb::Format item_format, size_t item_byte_size, size_t item_count,
@@ -261,7 +343,7 @@ lldb::offset_t lldb_private::DumpDataExtractor(
                               // non-zero, the value is a bitfield
     uint32_t item_bit_offset, // If "item_bit_size" is non-zero, this is the
                               // shift amount to apply to a bitfield
-    ExecutionContextScope *exe_scope) {
+    ExecutionContextScope *exe_scope, bool show_memory_tags) {
   if (s == nullptr)
     return start_offset;
 
@@ -272,6 +354,11 @@ lldb::offset_t lldb_private::DumpDataExtractor(
 
   offset_t offset = start_offset;
 
+  llvm::Optional<MemoryTagMap> memory_tag_map = llvm::None;
+  if (show_memory_tags && base_addr != LLDB_INVALID_ADDRESS)
+    memory_tag_map =
+        GetMemoryTags(base_addr, DE.GetByteSize() - offset, exe_scope);
+
   if (item_format == eFormatInstruction)
     return DumpInstructions(DE, s, exe_scope, start_offset, base_addr,
                             item_count);
@@ -283,7 +370,10 @@ lldb::offset_t lldb_private::DumpDataExtractor(
   lldb::offset_t line_start_offset = start_offset;
   for (uint32_t count = 0; DE.ValidOffset(offset) && count < item_count;
        ++count) {
+    // If we are at the beginning or end of a line
+    // Note that the last line is handled outside this for loop.
     if ((count % num_per_line) == 0) {
+      // If we are at the end of a line
       if (count > 0) {
         if (item_format == eFormatBytesWithASCII &&
             offset > line_start_offset) {
@@ -295,6 +385,15 @@ lldb::offset_t lldb_private::DumpDataExtractor(
                             offset - line_start_offset, SIZE_MAX,
                             LLDB_INVALID_ADDRESS, 0, 0);
         }
+
+        if (base_addr != LLDB_INVALID_ADDRESS && memory_tag_map) {
+          size_t line_len = offset - line_start_offset;
+          lldb::addr_t line_base =
+              base_addr +
+              (offset - start_offset - line_len) / DE.getTargetByteSize();
+          printMemoryTags(DE, s, line_base, line_len, memory_tag_map);
+        }
+
         s->EOL();
       }
       if (base_addr != LLDB_INVALID_ADDRESS)
@@ -796,14 +895,28 @@ lldb::offset_t lldb_private::DumpDataExtractor(
     }
   }
 
-  if (item_format == eFormatBytesWithASCII && offset > line_start_offset) {
-    s->Printf("%*s", static_cast<int>(
-                         (num_per_line - (offset - line_start_offset)) * 3 + 2),
-              "");
-    DumpDataExtractor(DE, s, line_start_offset, eFormatCharPrintable, 1,
-                      offset - line_start_offset, SIZE_MAX,
-                      LLDB_INVALID_ADDRESS, 0, 0);
+  // If anything was printed we want to catch the end of the last line.
+  // Since we will exit the for loop above before we get a chance to append to
+  // it normally.
+  if (offset > line_start_offset) {
+    if (item_format == eFormatBytesWithASCII) {
+      s->Printf("%*s",
+                static_cast<int>(
+                    (num_per_line - (offset - line_start_offset)) * 3 + 2),
+                "");
+      DumpDataExtractor(DE, s, line_start_offset, eFormatCharPrintable, 1,
+                        offset - line_start_offset, SIZE_MAX,
+                        LLDB_INVALID_ADDRESS, 0, 0);
+    }
+
+    if (base_addr != LLDB_INVALID_ADDRESS && memory_tag_map) {
+      size_t line_len = offset - line_start_offset;
+      lldb::addr_t line_base = base_addr + (offset - start_offset - line_len) /
+                                               DE.getTargetByteSize();
+      printMemoryTags(DE, s, line_base, line_len, memory_tag_map);
+    }
   }
+
   return offset; // Return the offset at which we ended up
 }
 
diff --git a/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py b/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py
index 983f283444a46..c95c2ae327533 100644
--- a/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py
+++ b/lldb/test/API/linux/aarch64/mte_tag_access/TestAArch64LinuxMTEMemoryTagAccess.py
@@ -286,3 +286,128 @@ def test_mte_tag_write(self):
                           "\[0x[0-9A-Fa-f]+10, 0x[0-9A-Fa-f]+20\): 0x3 \(mismatch\)\n"
                           "\[0x[0-9A-Fa-f]+20, 0x[0-9A-Fa-f]+30\): 0x3 \(mismatch\)\n"
                           "\[0x[0-9A-Fa-f]+30, 0x[0-9A-Fa-f]+40\): 0x0$"])
+
+    @skipUnlessArch("aarch64")
+    @skipUnlessPlatform(["linux"])
+    @skipUnlessAArch64MTELinuxCompiler
+    def test_mte_memory_read_tag_display(self):
+        self.setup_mte_test()
+
+        # Reading from an untagged range should not be any different
+        self.expect("memory read non_mte_buf non_mte_buf+16",
+                substrs=["tag"], matching=False)
+
+        # show-tags option is required
+        self.expect("memory read mte_buf mte_buf+32 -f \"x\" -l 1 -s 16",
+                patterns=["tag"], matching=False)
+
+        # Reading 16 bytes per line means 1 granule and so 1 tag per line
+        self.expect("memory read mte_buf mte_buf+32 -f \"x\" -l 1 -s 16 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+00: 0x0+ \(tag: 0x0\)\n"
+                    "0x[0-9A-fa-f]+10: 0x0+ \(tag: 0x1\)"
+                    ])
+
+        # If bytes per line is > granule size then you get multiple tags
+        # per line.
+        self.expect("memory read mte_buf mte_buf+32 -f \"x\" -l 1 -s 32 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+00: 0x0+ \(tags: 0x0 0x1\)\n"
+                    ])
+
+        # Reading half a granule still shows you the tag for that granule
+        self.expect("memory read mte_buf mte_buf+8 -f \"x\" -l 1 -s 8 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+00: 0x0+ \(tag: 0x0\)\n"
+                    ])
+
+        # We can read a whole number of granules but split them over more lines
+        # than there are granules. Tags are shown repeated for each applicable line.
+        self.expect("memory read mte_buf+32 mte_buf+64 -f \"x\" -l 1 -s 8 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+20: 0x0+ \(tag: 0x2\)\n"
+                    "0x[0-9A-fa-f]+28: 0x0+ \(tag: 0x2\)\n"
+                    "0x[0-9A-fa-f]+30: 0x0+ \(tag: 0x3\)\n"
+                    "0x[0-9A-fa-f]+38: 0x0+ \(tag: 0x3\)"
+                    ])
+
+        # Also works if we misalign the start address. Note the first tag is shown
+        # only once here and we have a new tag on the last line.
+        # (bytes per line == the misalignment here)
+        self.expect("memory read mte_buf+32+8 mte_buf+64+8 -f \"x\" -l 1 -s 8 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+28: 0x0+ \(tag: 0x2\)\n"
+                    "0x[0-9A-fa-f]+30: 0x0+ \(tag: 0x3\)\n"
+                    "0x[0-9A-fa-f]+38: 0x0+ \(tag: 0x3\)\n"
+                    "0x[0-9A-fa-f]+40: 0x0+ \(tag: 0x4\)"
+                    ])
+
+        # We can do the same thing but where the misaligment isn't equal to
+        # bytes per line. This time, some lines cover multiple granules and
+        # so show multiple tags.
+        self.expect("memory read mte_buf+32+4 mte_buf+64+4 -f \"x\" -l 1 -s 8 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+24: 0x0+ \(tag: 0x2\)\n"
+                    "0x[0-9A-fa-f]+2c: 0x0+ \(tags: 0x2 0x3\)\n"
+                    "0x[0-9A-fa-f]+34: 0x0+ \(tag: 0x3\)\n"
+                    "0x[0-9A-fa-f]+3c: 0x0+ \(tags: 0x3 0x4\)"
+                    ])
+
+        # If you read a range that includes non tagged areas those areas
+        # simply aren't annotated.
+
+        # Initial part of range is untagged
+        self.expect("memory read mte_buf-16 mte_buf+32 -f \"x\" -l 1 -s 16 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+f0: 0x0+\n"
+                    "0x[0-9A-fa-f]+00: 0x0+ \(tag: 0x0\)\n"
+                    "0x[0-9A-fa-f]+10: 0x0+ \(tag: 0x1\)"
+                    ])
+
+        # End of range is untagged
+        self.expect("memory read mte_buf+page_size-16 mte_buf+page_size+16 -f \"x\" -l 1 -s 16 --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+f0: 0x0+ \(tag: 0xf\)\n"
+                    "0x[0-9A-fa-f]+00: 0x0+"
+                    ])
+
+        # The smallest MTE range we can get is a single page so we just check
+        # parts of this result. Where we read from before the tagged page to after it.
+        # Add --force here because we're reading just over 4k.
+        self.expect(
+                "memory read mte_read_only-16 mte_read_only+page_size+16 -f \"x\" -l 1 -s 16 --force --show-tags",
+                patterns=[
+                    "0x[0-9A-fa-f]+f0: 0x0+\n"
+                    "0x[0-9A-fa-f]+00: 0x0+ \(tag: 0x0\)\n",
+                    "0x[0-9A-fa-f]+f0: 0x0+ \(tag: 0x0\)\n"
+                    "0x[0-9A-fa-f]+00: 0x0+"
+                    ])
+
+        # Some parts of a line might be tagged and others untagged.
+        # <no tag> is shown in where the tag would be, to keep the order intact.
+        self.expect("memory read mte_buf-16 mte_buf+32 -f \"x\" -l 1 -s 32 --show-tags",
+                patterns=["0x[0-9A-fa-f]+f0: 0x0+ \(tags: <no tag> 0x0\)"])
+        self.expect(
+                "memory read mte_read_only+page_size-16 mte_read_only+page_size+16 -f \"x\" -l 1 -s 32  --show-tags",
+                patterns=["0x[0-9A-fa-f]+f0: 0x0+ \(tags: 0x0 <no tag>\)"])
+
+        # Here the start address is unaligned so we cover 3 granules instead of 2
+        self.expect("memory read mte_buf-16+4 mte_buf+32+4 -f \"x\" -l 1 -s 32 --show-tags",
+                patterns=["0x[0-9A-fa-f]+f4: 0x0+ \(tags: <no tag> 0x0 0x1\)"])
+        self.expect(
+                "memory read mte_read_only+page_size-16+4 mte_read_only+page_size+16+4 -f \"x\" -l 1 -s 32 --show-tags",
+                patterns=["0x[0-9A-fa-f]+f4: 0x0+ \(tags: 0x0 <no tag> <no tag>\)"])
+
+        # Some formats call DumpDataExtractor multiple times,
+        # check that those print tags only once per line.
+        self.expect("memory read mte_buf mte_buf+32 -f \"x\" --show-tags",
+                patterns=["0x[0-9A-fa-f]+00: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x0\)\n",
+                          "0x[0-9A-fa-f]+10: 0x0+ 0x0+ 0x0+ 0x0+ \(tag: 0x1\)"])
+
+        self.expect("memory read mte_buf mte_buf+32 -f \"bytes with ASCII\" --show-tags",
+                patterns=["0x[0-9A-fa-f]+00: (00 ){16} \.{16} \(tag: 0x0\)\n",
+                          "0x[0-9A-fa-f]+10: (00 ){16} \.{16} \(tag: 0x1\)"])
+
+        self.expect("memory read mte_buf mte_buf+32 -f \"uint8_t[]\" -s 16 -l 1 --show-tags",
+                patterns=["0x[0-9A-Fa-f]+00: \{(0x00 ){15}0x00\} \(tag: 0x0\)\n"
+                          "0x[0-9A-Fa-f]+10: \{(0x00 ){15}0x00\} \(tag: 0x1\)"])
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 15f7428a7fde2..7718bcc959103 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -172,6 +172,9 @@ Changes to LLDB
   * ``memory tag read``
   * ``memory tag write``
 
+* The ``memory read`` command has a new option ``--show-tags``. Use this option
+  to show memory tags beside the contents of tagged memory ranges.
+
 Changes to Sanitizers
 ---------------------
 

From a5e324e3e2edcc72d9e402b4d846cae615192ad8 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 26 Jan 2022 10:53:21 +0100
Subject: [PATCH 731/946] [AMDGPUHSAMetadataStreamer] Do not assume ABI
 alignment for pointers

AMDGPUHSAMetadataStreamer currently assumes that pointer arguments
without align attribute have ABI alignment of the pointee type.
This is incompatible with opaque pointers, but also plain incorrect:
Pointer arguments without explicit alignment have alignment 1. It is
the responsibility of the frontent to add correct align annotations.

Differential Revision: https://reviews.llvm.org/D118229
---
 llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp |  9 +++------
 .../AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll      | 12 ++++++------
 .../CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll | 12 ++++++------
 3 files changed, 15 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index f4d4d34b698c5..3ac7c45b32759 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -331,8 +331,7 @@ void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
   if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
     if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
       // FIXME: Should report this for all address spaces
-      PointeeAlign = DL.getValueOrABITypeAlignment(
-          Arg.getParamAlign(), PtrTy->getPointerElementType());
+      PointeeAlign = Arg.getParamAlign().valueOrOne();
     }
   }
 
@@ -731,10 +730,8 @@ void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
 
   // FIXME: Need to distinguish in memory alignment from pointer alignment.
   if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
-    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
-      PointeeAlign = DL.getValueOrABITypeAlignment(
-          Arg.getParamAlign(), PtrTy->getPointerElementType());
-    }
+    if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
+      PointeeAlign = Arg.getParamAlign().valueOrOne();
   }
 
   // There's no distinction between byval aggregates and raw aggregates.
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
index 45c8282739fce..e219652c7bc78 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
@@ -881,7 +881,7 @@ define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) #0
 ; CHECK:          .symbol:         test_addr_space.kd
 define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g,
                                            i32 addrspace(4)* %c,
-                                           i32 addrspace(3)* %l) #0
+                                           i32 addrspace(3)* align 4 %l) #0
     !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51
     !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
   ret void
@@ -1679,11 +1679,11 @@ define amdgpu_kernel void @test_arg_unknown_builtin_type(
 ; CHECK:          .symbol:         test_pointee_align.kd
 define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a,
                                               i8 addrspace(3)* %b,
-                                              <2 x i8> addrspace(3)* %c,
-                                              <3 x i8> addrspace(3)* %d,
-                                              <4 x i8> addrspace(3)* %e,
-                                              <8 x i8> addrspace(3)* %f,
-                                              <16 x i8> addrspace(3)* %g,
+                                              <2 x i8> addrspace(3)* align 2 %c,
+                                              <3 x i8> addrspace(3)* align 4 %d,
+                                              <4 x i8> addrspace(3)* align 4 %e,
+                                              <8 x i8> addrspace(3)* align 8 %f,
+                                              <16 x i8> addrspace(3)* align 16 %g,
                                               {} addrspace(3)* %h) #0
     !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93
     !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
index f19fbc14cc554..f44681a6fcf66 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
@@ -873,7 +873,7 @@ define amdgpu_kernel void @test_multi_arg(i32 %a, <2 x i16> %b, <3 x i8> %c) #0
 ; CHECK-NEXT:       AddrSpaceQual: Global
 define amdgpu_kernel void @test_addr_space(i32 addrspace(1)* %g,
                                            i32 addrspace(4)* %c,
-                                           i32 addrspace(3)* %l) #0
+                                           i32 addrspace(3)* align 4 %l) #0
     !kernel_arg_addr_space !50 !kernel_arg_access_qual !23 !kernel_arg_type !51
     !kernel_arg_base_type !51 !kernel_arg_type_qual !25 {
   ret void
@@ -1653,11 +1653,11 @@ define amdgpu_kernel void @test_arg_unknown_builtin_type(
 ; CHECK-NEXT:       AddrSpaceQual: Global
 define amdgpu_kernel void @test_pointee_align(i64 addrspace(1)* %a,
                                               i8 addrspace(3)* %b,
-                                              <2 x i8> addrspace(3)* %c,
-                                              <3 x i8> addrspace(3)* %d,
-                                              <4 x i8> addrspace(3)* %e,
-                                              <8 x i8> addrspace(3)* %f,
-                                              <16 x i8> addrspace(3)* %g,
+                                              <2 x i8> addrspace(3)* align 2 %c,
+                                              <3 x i8> addrspace(3)* align 4 %d,
+                                              <4 x i8> addrspace(3)* align 4 %e,
+                                              <8 x i8> addrspace(3)* align 8 %f,
+                                              <16 x i8> addrspace(3)* align 16 %g,
                                               {} addrspace(3)* %h) #0
     !kernel_arg_addr_space !91 !kernel_arg_access_qual !92 !kernel_arg_type !93
     !kernel_arg_base_type !93 !kernel_arg_type_qual !94 {

From 33c3ef2fbeec4ede5fc303b09cdca99ae2c0522a Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Thu, 30 Dec 2021 01:57:47 +0100
Subject: [PATCH 732/946] [CodeCompletion][clangd] Clean __uglified parameter
 names in completion & hover

Underscore-uglified identifiers are used in standard library implementations to
guard against collisions with macros, and they hurt readability considerably.
(Consider `push_back(Tp_ &&__value)` vs `push_back(Tp value)`.
When we're describing an interface, the exact names of parameters are not
critical so we can drop these prefixes.

This patch adds a new PrintingPolicy flag that can applies this stripping
when recursively printing pieces of AST.
We set it in code completion/signature help, and in clangd's hover display.
All three features also do a bit of manual poking at names, so fix up those too.

Fixes https://github.com/clangd/clangd/issues/736

Differential Revision: https://reviews.llvm.org/D116387
---
 .../clangd/unittests/SymbolCollectorTests.cpp |  7 ++--
 clang/include/clang/AST/PrettyPrinter.h       |  8 ++++-
 clang/include/clang/Basic/IdentifierTable.h   |  4 +++
 clang/lib/AST/DeclPrinter.cpp                 | 24 +++++++++----
 clang/lib/AST/StmtPrinter.cpp                 | 12 +++++--
 clang/lib/AST/TemplateName.cpp                |  8 +++--
 clang/lib/AST/TypePrinter.cpp                 |  3 +-
 clang/lib/Basic/IdentifierTable.cpp           |  8 +++++
 clang/lib/Sema/SemaCodeComplete.cpp           | 15 ++++----
 clang/test/CodeCompletion/deuglify.cpp        | 25 +++++++++++++
 clang/unittests/AST/DeclPrinterTest.cpp       | 35 +++++++++++++++++++
 clang/unittests/AST/StmtPrinterTest.cpp       | 19 ++++++++++
 clang/unittests/AST/TypePrinterTest.cpp       | 19 +++++++++-
 13 files changed, 164 insertions(+), 23 deletions(-)
 create mode 100644 clang/test/CodeCompletion/deuglify.cpp

diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
index 24cd8b7313bd3..a21c085299b2f 100644
--- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp
@@ -1457,8 +1457,8 @@ TEST_F(SymbolCollectorTest, CanonicalSTLHeader) {
       namespace std {
         class string {};
         // Move overloads have special handling.
-        template <typename T> T&& move(T&&);
-        template <typename I, typename O> O move(I, I, O);
+        template <typename _T> T&& move(_T&& __value);
+        template <typename _I, typename _O> _O move(_I, _I, _O);
       }
       )cpp",
       /*Main=*/"");
@@ -1468,7 +1468,8 @@ TEST_F(SymbolCollectorTest, CanonicalSTLHeader) {
           QName("std"),
           AllOf(QName("std::string"), DeclURI(TestHeaderURI),
                 IncludeHeader("<string>")),
-          AllOf(Labeled("move(T &&)"), IncludeHeader("<utility>")),
+          // Parameter names are demangled.
+          AllOf(Labeled("move(T &&value)"), IncludeHeader("<utility>")),
           AllOf(Labeled("move(I, I, O)"), IncludeHeader("<algorithm>"))));
 }
 
diff --git a/clang/include/clang/AST/PrettyPrinter.h b/clang/include/clang/AST/PrettyPrinter.h
index 01dc8e0002703..fd40328d8dcf7 100644
--- a/clang/include/clang/AST/PrettyPrinter.h
+++ b/clang/include/clang/AST/PrettyPrinter.h
@@ -73,7 +73,8 @@ struct PrintingPolicy {
         MSVCFormatting(false), ConstantsAsWritten(false),
         SuppressImplicitBase(false), FullyQualifiedName(false),
         PrintCanonicalTypes(false), PrintInjectedClassNameWithArguments(true),
-        UsePreferredNames(true), AlwaysIncludeTypeForTemplateArgument(false) {}
+        UsePreferredNames(true), AlwaysIncludeTypeForTemplateArgument(false),
+        CleanUglifiedParameters(false) {}
 
   /// Adjust this printing policy for cases where it's known that we're
   /// printing C++ code (for instance, if AST dumping reaches a C++-only
@@ -280,6 +281,11 @@ struct PrintingPolicy {
   /// parameters.
   unsigned AlwaysIncludeTypeForTemplateArgument : 1;
 
+  /// Whether to strip underscores when printing reserved parameter names.
+  /// e.g. std::vector<class _Tp> becomes std::vector<class Tp>.
+  /// This only affects parameter names, and so describes a compatible API.
+  unsigned CleanUglifiedParameters : 1;
+
   /// Callbacks to use to allow the behavior of printing to be customized.
   const PrintingCallbacks *Callbacks = nullptr;
 };
diff --git a/clang/include/clang/Basic/IdentifierTable.h b/clang/include/clang/Basic/IdentifierTable.h
index 19c967efcc424..aaf1297c1a648 100644
--- a/clang/include/clang/Basic/IdentifierTable.h
+++ b/clang/include/clang/Basic/IdentifierTable.h
@@ -458,6 +458,10 @@ class alignas(IdentifierInfoAlignment) IdentifierInfo {
   /// 7.1.3, C++ [lib.global.names]).
   ReservedIdentifierStatus isReserved(const LangOptions &LangOpts) const;
 
+  /// If the identifier is an "uglified" reserved name, return a cleaned form.
+  /// e.g. _Foo => Foo. Otherwise, just returns the name.
+  StringRef deuglifiedName() const;
+
   /// Provide less than operator for lexicographical sorting.
   bool operator<(const IdentifierInfo &RHS) const {
     return getName() < RHS.getName();
diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp
index 044eb8f8f8e55..698b1c3ffd348 100644
--- a/clang/lib/AST/DeclPrinter.cpp
+++ b/clang/lib/AST/DeclPrinter.cpp
@@ -885,7 +885,10 @@ void DeclPrinter::VisitVarDecl(VarDecl *D) {
     }
   }
 
-  printDeclType(T, D->getName());
+  printDeclType(T, (isa<ParmVarDecl>(D) && Policy.CleanUglifiedParameters &&
+                    D->getIdentifier())
+                       ? D->getIdentifier()->deuglifiedName()
+                       : D->getName());
   Expr *Init = D->getInit();
   if (!Policy.SuppressInitializers && Init) {
     bool ImplicitInit = false;
@@ -1131,8 +1134,12 @@ void DeclPrinter::VisitTemplateDecl(const TemplateDecl *D) {
     else if (TTP->getDeclName())
       Out << ' ';
 
-    if (TTP->getDeclName())
-      Out << TTP->getDeclName();
+    if (TTP->getDeclName()) {
+      if (Policy.CleanUglifiedParameters && TTP->getIdentifier())
+        Out << TTP->getIdentifier()->deuglifiedName();
+      else
+        Out << TTP->getDeclName();
+    }
   } else if (auto *TD = D->getTemplatedDecl())
     Visit(TD);
   else if (const auto *Concept = dyn_cast<ConceptDecl>(D)) {
@@ -1742,8 +1749,12 @@ void DeclPrinter::VisitTemplateTypeParmDecl(const TemplateTypeParmDecl *TTP) {
   else if (TTP->getDeclName())
     Out << ' ';
 
-  if (TTP->getDeclName())
-    Out << TTP->getDeclName();
+  if (TTP->getDeclName()) {
+    if (Policy.CleanUglifiedParameters && TTP->getIdentifier())
+      Out << TTP->getIdentifier()->deuglifiedName();
+    else
+      Out << TTP->getDeclName();
+  }
 
   if (TTP->hasDefaultArgument()) {
     Out << " = ";
@@ -1755,7 +1766,8 @@ void DeclPrinter::VisitNonTypeTemplateParmDecl(
     const NonTypeTemplateParmDecl *NTTP) {
   StringRef Name;
   if (IdentifierInfo *II = NTTP->getIdentifier())
-    Name = II->getName();
+    Name =
+        Policy.CleanUglifiedParameters ? II->deuglifiedName() : II->getName();
   printDeclType(NTTP->getType(), Name, NTTP->isParameterPack());
 
   if (NTTP->hasDefaultArgument()) {
diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index b65a38d1e5665..adc0720fe0008 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -1030,7 +1030,12 @@ void StmtPrinter::VisitDeclRefExpr(DeclRefExpr *Node) {
     Qualifier->print(OS, Policy);
   if (Node->hasTemplateKeyword())
     OS << "template ";
-  OS << Node->getNameInfo();
+  if (Policy.CleanUglifiedParameters &&
+      isa<ParmVarDecl, NonTypeTemplateParmDecl>(Node->getDecl()) &&
+      Node->getDecl()->getIdentifier())
+    OS << Node->getDecl()->getIdentifier()->deuglifiedName();
+  else
+    Node->getNameInfo().printName(OS, Policy);
   if (Node->hasExplicitTemplateArgs()) {
     const TemplateParameterList *TPL = nullptr;
     if (!Node->hadMultipleCandidates())
@@ -2069,7 +2074,10 @@ void StmtPrinter::VisitLambdaExpr(LambdaExpr *Node) {
       } else {
         NeedComma = true;
       }
-      std::string ParamStr = P->getNameAsString();
+      std::string ParamStr =
+          (Policy.CleanUglifiedParameters && P->getIdentifier())
+              ? P->getIdentifier()->deuglifiedName().str()
+              : P->getNameAsString();
       P->getOriginalType().print(OS, Policy, ParamStr);
     }
     if (Method->isVariadic()) {
diff --git a/clang/lib/AST/TemplateName.cpp b/clang/lib/AST/TemplateName.cpp
index c8bd74f0b5bb4..05d7d58b71c4d 100644
--- a/clang/lib/AST/TemplateName.cpp
+++ b/clang/lib/AST/TemplateName.cpp
@@ -223,8 +223,12 @@ bool TemplateName::containsUnexpandedParameterPack() const {
 void TemplateName::print(raw_ostream &OS, const PrintingPolicy &Policy,
                          Qualified Qual) const {
   if (TemplateDecl *Template = Storage.dyn_cast<TemplateDecl *>())
-    if (Qual == Qualified::Fully &&
-        getDependence() != TemplateNameDependenceScope::DependentInstantiation)
+    if (Policy.CleanUglifiedParameters &&
+        isa<TemplateTemplateParmDecl>(Template) && Template->getIdentifier())
+      OS << Template->getIdentifier()->deuglifiedName();
+    else if (Qual == Qualified::Fully &&
+             getDependence() !=
+                 TemplateNameDependenceScope::DependentInstantiation)
       Template->printQualifiedName(OS, Policy);
     else
       OS << *Template;
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index cf520fcb037ed..bba323f651aad 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -1418,7 +1418,8 @@ void TypePrinter::printTemplateTypeParmBefore(const TemplateTypeParmType *T,
     }
     OS << "auto";
   } else if (IdentifierInfo *Id = T->getIdentifier())
-    OS << Id->getName();
+    OS << (Policy.CleanUglifiedParameters ? Id->deuglifiedName()
+                                          : Id->getName());
   else
     OS << "type-parameter-" << T->getDepth() << '-' << T->getIndex();
 
diff --git a/clang/lib/Basic/IdentifierTable.cpp b/clang/lib/Basic/IdentifierTable.cpp
index d811aeec84a0f..b86cb7af69bdf 100644
--- a/clang/lib/Basic/IdentifierTable.cpp
+++ b/clang/lib/Basic/IdentifierTable.cpp
@@ -309,6 +309,14 @@ IdentifierInfo::isReserved(const LangOptions &LangOpts) const {
   return ReservedIdentifierStatus::NotReserved;
 }
 
+StringRef IdentifierInfo::deuglifiedName() const {
+  StringRef Name = getName();
+  if (Name.size() >= 2 && Name.front() == '_' &&
+      (Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z')))
+    return Name.ltrim('_');
+  return Name;
+}
+
 tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
   // We use a perfect hash function here involving the length of the keyword,
   // the first and third character.  For preprocessor ID's there are no
diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp
index b86bfe869c69d..01fdf51c60c38 100644
--- a/clang/lib/Sema/SemaCodeComplete.cpp
+++ b/clang/lib/Sema/SemaCodeComplete.cpp
@@ -1896,6 +1896,7 @@ static PrintingPolicy getCompletionPrintingPolicy(const ASTContext &Context,
   Policy.SuppressStrongLifetime = true;
   Policy.SuppressUnwrittenScope = true;
   Policy.SuppressScope = true;
+  Policy.CleanUglifiedParameters = true;
   return Policy;
 }
 
@@ -2837,7 +2838,7 @@ FormatFunctionParameter(const PrintingPolicy &Policy,
     std::string Result;
 
     if (Param->getIdentifier() && !ObjCMethodParam && !SuppressName)
-      Result = std::string(Param->getIdentifier()->getName());
+      Result = std::string(Param->getIdentifier()->deuglifiedName());
 
     QualType Type = Param->getType();
     if (ObjCSubsts)
@@ -2847,7 +2848,7 @@ FormatFunctionParameter(const PrintingPolicy &Policy,
       Result = "(" + formatObjCParamQualifiers(ObjCQual, Type);
       Result += Type.getAsString(Policy) + ")";
       if (Param->getIdentifier() && !SuppressName)
-        Result += Param->getIdentifier()->getName();
+        Result += Param->getIdentifier()->deuglifiedName();
     } else {
       Type.getAsStringInternal(Result, Policy);
     }
@@ -2875,7 +2876,7 @@ FormatFunctionParameter(const PrintingPolicy &Policy,
     // for the block; just use the parameter type as a placeholder.
     std::string Result;
     if (!ObjCMethodParam && Param->getIdentifier())
-      Result = std::string(Param->getIdentifier()->getName());
+      Result = std::string(Param->getIdentifier()->deuglifiedName());
 
     QualType Type = Param->getType().getUnqualifiedType();
 
@@ -2887,7 +2888,7 @@ FormatFunctionParameter(const PrintingPolicy &Policy,
       if (Result.back() != ')')
         Result += " ";
       if (Param->getIdentifier())
-        Result += Param->getIdentifier()->getName();
+        Result += Param->getIdentifier()->deuglifiedName();
     } else {
       Type.getAsStringInternal(Result, Policy);
     }
@@ -3082,14 +3083,14 @@ static void AddTemplateParameterChunks(
 
       if (TTP->getIdentifier()) {
         PlaceholderStr += ' ';
-        PlaceholderStr += TTP->getIdentifier()->getName();
+        PlaceholderStr += TTP->getIdentifier()->deuglifiedName();
       }
 
       HasDefaultArg = TTP->hasDefaultArgument();
     } else if (NonTypeTemplateParmDecl *NTTP =
                    dyn_cast<NonTypeTemplateParmDecl>(*P)) {
       if (NTTP->getIdentifier())
-        PlaceholderStr = std::string(NTTP->getIdentifier()->getName());
+        PlaceholderStr = std::string(NTTP->getIdentifier()->deuglifiedName());
       NTTP->getType().getAsStringInternal(PlaceholderStr, Policy);
       HasDefaultArg = NTTP->hasDefaultArgument();
     } else {
@@ -3101,7 +3102,7 @@ static void AddTemplateParameterChunks(
       PlaceholderStr = "template<...> class";
       if (TTP->getIdentifier()) {
         PlaceholderStr += ' ';
-        PlaceholderStr += TTP->getIdentifier()->getName();
+        PlaceholderStr += TTP->getIdentifier()->deuglifiedName();
       }
 
       HasDefaultArg = TTP->hasDefaultArgument();
diff --git a/clang/test/CodeCompletion/deuglify.cpp b/clang/test/CodeCompletion/deuglify.cpp
new file mode 100644
index 0000000000000..c5f801736376c
--- /dev/null
+++ b/clang/test/CodeCompletion/deuglify.cpp
@@ -0,0 +1,25 @@
+// Fake standard library with uglified names.
+// Parameters (including template params) get ugliness stripped.
+namespace std {
+
+template <typename _Tp>
+class __vector_base {};
+
+template <typename _Tp>
+class vector : private __vector_base<_Tp> {
+public:
+  _Tp &at(unsigned __index) const;
+  int __stays_ugly();
+};
+
+} // namespace std
+
+int x = std::vector<int>{}.at(42);
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:17:14 %s -o - | FileCheck -check-prefix=CHECK-CC1 %s
+// CHECK-CC1: COMPLETION: __vector_base : __vector_base<<#typename Tp#>>
+// CHECK-CC1: COMPLETION: vector : vector<<#typename Tp#>>
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:17:28 %s -o - | FileCheck -check-prefix=CHECK-CC2 %s
+// CHECK-CC2: COMPLETION: __stays_ugly : [#int#]__stays_ugly()
+// CHECK-CC2: COMPLETION: at : [#int &#]at(<#unsigned int index#>)[# const#]
+// RUN: %clang_cc1 -fsyntax-only -code-completion-at=%s:17:31 %s -o - | FileCheck -check-prefix=CHECK-CC3 %s
+// CHECK-CC3: OVERLOAD: [#int &#]at(<#unsigned int index#>)
diff --git a/clang/unittests/AST/DeclPrinterTest.cpp b/clang/unittests/AST/DeclPrinterTest.cpp
index bdc23f33f39b0..0b579565f5b4a 100644
--- a/clang/unittests/AST/DeclPrinterTest.cpp
+++ b/clang/unittests/AST/DeclPrinterTest.cpp
@@ -1336,6 +1336,41 @@ TEST(DeclPrinter, TestTemplateArgumentList16) {
   ASSERT_TRUE(PrintedDeclCXX11Matches(Code, "NT2", "int NT2 = 5"));
 }
 
+TEST(DeclPrinter, TestFunctionParamUglified) {
+  llvm::StringLiteral Code = R"cpp(
+    class __c;
+    void _A(__c *__param);
+  )cpp";
+  auto Clean = [](PrintingPolicy &Policy) {
+    Policy.CleanUglifiedParameters = true;
+  };
+
+  ASSERT_TRUE(PrintedDeclCXX17Matches(Code, namedDecl(hasName("_A")).bind("id"),
+                                      "void _A(__c *__param)"));
+  ASSERT_TRUE(PrintedDeclCXX17Matches(Code, namedDecl(hasName("_A")).bind("id"),
+                                      "void _A(__c *param)", Clean));
+}
+
+TEST(DeclPrinter, TestTemplateParamUglified) {
+  llvm::StringLiteral Code = R"cpp(
+    template <typename _Tp, int __n, template <typename> class _Container>
+    struct _A{};
+  )cpp";
+  auto Clean = [](PrintingPolicy &Policy) {
+    Policy.CleanUglifiedParameters = true;
+  };
+
+  ASSERT_TRUE(PrintedDeclCXX17Matches(
+      Code, classTemplateDecl(hasName("_A")).bind("id"),
+      "template <typename _Tp, int __n, template <typename> class _Container> "
+      "struct _A {}"));
+  ASSERT_TRUE(PrintedDeclCXX17Matches(
+      Code, classTemplateDecl(hasName("_A")).bind("id"),
+      "template <typename Tp, int n, template <typename> class Container> "
+      "struct _A {}",
+      Clean));
+}
+
 TEST(DeclPrinter, TestStaticAssert1) {
   ASSERT_TRUE(PrintedDeclCXX17Matches("static_assert(true);",
                                       staticAssertDecl().bind("id"),
diff --git a/clang/unittests/AST/StmtPrinterTest.cpp b/clang/unittests/AST/StmtPrinterTest.cpp
index 65dfec4cc5b4a..f3ea74d65696f 100644
--- a/clang/unittests/AST/StmtPrinterTest.cpp
+++ b/clang/unittests/AST/StmtPrinterTest.cpp
@@ -263,3 +263,22 @@ TEST(StmtPrinter, TerseOutputWithLambdas) {
 
       [](PrintingPolicy &PP) { PP.TerseOutput = true; }));
 }
+
+TEST(StmtPrinter, ParamsUglified) {
+  llvm::StringLiteral Code = R"cpp(
+    template <typename _T, int _I, template <typename> class _C>
+    auto foo(int __j) {
+      return typename _C<_T>::_F(_I, __j);
+    }
+  )cpp";
+  auto Clean = [](PrintingPolicy &Policy) {
+    Policy.CleanUglifiedParameters = true;
+  };
+
+  ASSERT_TRUE(PrintedStmtCXXMatches(StdVer::CXX14, Code,
+                                    returnStmt().bind("id"),
+                                    "return typename _C<_T>::_F(_I, __j);\n"));
+  ASSERT_TRUE(
+      PrintedStmtCXXMatches(StdVer::CXX14, Code, returnStmt().bind("id"),
+                            "return typename C<T>::_F(I, j);\n", Clean));
+}
diff --git a/clang/unittests/AST/TypePrinterTest.cpp b/clang/unittests/AST/TypePrinterTest.cpp
index 07dc21a88fba1..8500d518d25fe 100644
--- a/clang/unittests/AST/TypePrinterTest.cpp
+++ b/clang/unittests/AST/TypePrinterTest.cpp
@@ -62,4 +62,21 @@ TEST(TypePrinter, TemplateId) {
   ASSERT_TRUE(PrintedTypeMatches(
       Code, {}, Matcher, "const N::Type<T> &",
       [](PrintingPolicy &Policy) { Policy.FullyQualifiedName = true; }));
-}
\ No newline at end of file
+}
+
+TEST(TypePrinter, ParamsUglified) {
+  llvm::StringLiteral Code = R"cpp(
+    template <typename _Tp, template <typename> class __f>
+    const __f<_Tp&> *A = nullptr;
+  )cpp";
+  auto Clean = [](PrintingPolicy &Policy) {
+    Policy.CleanUglifiedParameters = true;
+  };
+
+  ASSERT_TRUE(PrintedTypeMatches(Code, {},
+                                 varDecl(hasType(qualType().bind("id"))),
+                                 "const __f<_Tp &> *", nullptr));
+  ASSERT_TRUE(PrintedTypeMatches(Code, {},
+                                 varDecl(hasType(qualType().bind("id"))),
+                                 "const f<Tp &> *", Clean));
+}

From f7202723304461c4f94399b906333d6ede85579a Mon Sep 17 00:00:00 2001
From: Jan Svoboda <jan_svoboda@apple.com>
Date: Wed, 26 Jan 2022 15:20:19 +0100
Subject: [PATCH 733/946] [clang][lex] Include tracking: simplify and move to
 preprocessor

This patch replaces the exact include count of each file in `HeaderFileInfo` with a set of included files in `Preprocessor`.

The number of includes isn't a property of a header file but rather a preprocessor state. The exact number of includes is not used anywhere except statistic tracking.

Reviewed By: vsapsai

Differential Revision: https://reviews.llvm.org/D114095
---
 clang/include/clang/Lex/HeaderSearch.h        | 11 +----
 clang/include/clang/Lex/Preprocessor.h        | 21 ++++++++++
 .../include/clang/Serialization/ASTBitCodes.h |  3 ++
 clang/include/clang/Serialization/ASTReader.h |  1 +
 clang/include/clang/Serialization/ASTWriter.h |  1 +
 clang/lib/Lex/HeaderSearch.cpp                | 20 +++------
 clang/lib/Lex/PPDirectives.cpp                |  2 +-
 clang/lib/Lex/Preprocessor.cpp                |  2 +-
 clang/lib/Serialization/ASTReader.cpp         | 24 +++++++++--
 clang/lib/Serialization/ASTWriter.cpp         | 41 ++++++++++++++++++-
 10 files changed, 94 insertions(+), 32 deletions(-)

diff --git a/clang/include/clang/Lex/HeaderSearch.h b/clang/include/clang/Lex/HeaderSearch.h
index 9b9d28433c080..74768717470bf 100644
--- a/clang/include/clang/Lex/HeaderSearch.h
+++ b/clang/include/clang/Lex/HeaderSearch.h
@@ -57,6 +57,8 @@ class TargetInfo;
 /// The preprocessor keeps track of this information for each
 /// file that is \#included.
 struct HeaderFileInfo {
+  // TODO: Whether the file was imported is not a property of the file itself.
+  // It's a preprocessor state, move it there.
   /// True if this is a \#import'd file.
   unsigned isImport : 1;
 
@@ -95,9 +97,6 @@ struct HeaderFileInfo {
   /// Whether this file has been looked up as a header.
   unsigned IsValid : 1;
 
-  /// The number of times the file has been included already.
-  unsigned short NumIncludes = 0;
-
   /// The ID number of the controlling macro.
   ///
   /// This ID number will be non-zero when there is a controlling
@@ -469,12 +468,6 @@ class HeaderSearch {
                             ModuleMap::ModuleHeaderRole Role,
                             bool isCompilingModuleHeader);
 
-  /// Increment the count for the number of times the specified
-  /// FileEntry has been entered.
-  void IncrementIncludeCount(const FileEntry *File) {
-    ++getFileInfo(File).NumIncludes;
-  }
-
   /// Mark the specified file as having a controlling macro.
   ///
   /// This is used by the multiple-include optimization to eliminate
diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
index c62bf0c4ceb6f..e567f6391531d 100644
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -450,6 +450,8 @@ class Preprocessor {
           ElseLoc(ElseLoc) {}
   };
 
+  using IncludedFilesSet = llvm::DenseSet<const FileEntry *>;
+
 private:
   friend class ASTReader;
   friend class MacroArgs;
@@ -765,6 +767,9 @@ class Preprocessor {
   /// in a submodule.
   SubmoduleState *CurSubmoduleState;
 
+  /// The files that have been included.
+  IncludedFilesSet IncludedFiles;
+
   /// The set of known macros exported from modules.
   llvm::FoldingSet<ModuleMacro> ModuleMacros;
 
@@ -1224,6 +1229,22 @@ class Preprocessor {
 
   /// \}
 
+  /// Mark the file as included.
+  /// Returns true if this is the first time the file was included.
+  bool markIncluded(const FileEntry *File) {
+    HeaderInfo.getFileInfo(File);
+    return IncludedFiles.insert(File).second;
+  }
+
+  /// Return true if this header has already been included.
+  bool alreadyIncluded(const FileEntry *File) const {
+    return IncludedFiles.count(File);
+  }
+
+  /// Get the set of included files.
+  IncludedFilesSet &getIncludedFiles() { return IncludedFiles; }
+  const IncludedFilesSet &getIncludedFiles() const { return IncludedFiles; }
+
   /// Return the name of the macro defined before \p Loc that has
   /// spelling \p Tokens.  If there are multiple macros with same spelling,
   /// return the last one defined.
diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 341da5bd1d62e..f98e173b158c1 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -695,6 +695,9 @@ enum ASTRecordTypes {
 
   /// Record code for \#pragma float_control options.
   FLOAT_CONTROL_PRAGMA_OPTIONS = 65,
+
+  /// Record code for included files.
+  PP_INCLUDED_FILES = 66,
 };
 
 /// Record types used within a source manager block.
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index a36c8ba20a10a..d46a6c4500f4e 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -1329,6 +1329,7 @@ class ASTReader
   llvm::Error ReadSourceManagerBlock(ModuleFile &F);
   llvm::BitstreamCursor &SLocCursorForID(int ID);
   SourceLocation getImportLocation(ModuleFile *F);
+  void readIncludedFiles(ModuleFile &F, StringRef Blob, Preprocessor &PP);
   ASTReadResult ReadModuleMapFileBlock(RecordData &Record, ModuleFile &F,
                                        const ModuleFile *ImportedBy,
                                        unsigned ClientLoadCapabilities);
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 27a8770d7f267..e455e4d4d96a5 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -465,6 +465,7 @@ class ASTWriter : public ASTDeserializationListener,
                        std::set<const FileEntry *> &AffectingModuleMaps);
   void WriteSourceManagerBlock(SourceManager &SourceMgr,
                                const Preprocessor &PP);
+  void writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP);
   void WritePreprocessor(const Preprocessor &PP, bool IsModule);
   void WriteHeaderSearch(const HeaderSearch &HS);
   void WritePreprocessorDetail(PreprocessingRecord &PPRec,
diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp
index fcd759af6d5ea..39c125c395ef8 100644
--- a/clang/lib/Lex/HeaderSearch.cpp
+++ b/clang/lib/Lex/HeaderSearch.cpp
@@ -90,16 +90,10 @@ HeaderSearch::HeaderSearch(std::shared_ptr<HeaderSearchOptions> HSOpts,
 void HeaderSearch::PrintStats() {
   llvm::errs() << "\n*** HeaderSearch Stats:\n"
                << FileInfo.size() << " files tracked.\n";
-  unsigned NumOnceOnlyFiles = 0, MaxNumIncludes = 0, NumSingleIncludedFiles = 0;
-  for (unsigned i = 0, e = FileInfo.size(); i != e; ++i) {
+  unsigned NumOnceOnlyFiles = 0;
+  for (unsigned i = 0, e = FileInfo.size(); i != e; ++i)
     NumOnceOnlyFiles += (FileInfo[i].isPragmaOnce || FileInfo[i].isImport);
-    if (MaxNumIncludes < FileInfo[i].NumIncludes)
-      MaxNumIncludes = FileInfo[i].NumIncludes;
-    NumSingleIncludedFiles += FileInfo[i].NumIncludes == 1;
-  }
-  llvm::errs() << "  " << NumOnceOnlyFiles << " #import/#pragma once files.\n"
-               << "  " << NumSingleIncludedFiles << " included exactly once.\n"
-               << "  " << MaxNumIncludes << " max times a file is included.\n";
+  llvm::errs() << "  " << NumOnceOnlyFiles << " #import/#pragma once files.\n";
 
   llvm::errs() << "  " << NumIncluded << " #include/#include_next/#import.\n"
                << "    " << NumMultiIncludeFileOptzn
@@ -1243,7 +1237,6 @@ static void mergeHeaderFileInfo(HeaderFileInfo &HFI,
   HFI.isImport |= OtherHFI.isImport;
   HFI.isPragmaOnce |= OtherHFI.isPragmaOnce;
   HFI.isModuleHeader |= OtherHFI.isModuleHeader;
-  HFI.NumIncludes += OtherHFI.NumIncludes;
 
   if (!HFI.ControllingMacro && !HFI.ControllingMacroID) {
     HFI.ControllingMacro = OtherHFI.ControllingMacro;
@@ -1404,7 +1397,7 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
     FileInfo.isImport = true;
 
     // Has this already been #import'ed or #include'd?
-    if (FileInfo.NumIncludes && !TryEnterImported())
+    if (PP.alreadyIncluded(File) && !TryEnterImported())
       return false;
   } else {
     // Otherwise, if this is a #include of a file that was previously #import'd
@@ -1427,10 +1420,7 @@ bool HeaderSearch::ShouldEnterIncludeFile(Preprocessor &PP,
     }
   }
 
-  // Increment the number of times this file has been included.
-  ++FileInfo.NumIncludes;
-
-  IsFirstIncludeOfFile = FileInfo.NumIncludes == 1;
+  IsFirstIncludeOfFile = PP.markIncluded(File);
 
   return true;
 }
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index 29fc8b3aa7a0c..f3aefdd22b514 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -2058,7 +2058,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
   // include cycle. Don't enter already processed files again as it can lead to
   // reaching the max allowed include depth again.
   if (Action == Enter && HasReachedMaxIncludeDepth && File &&
-      HeaderInfo.getFileInfo(&File->getFileEntry()).NumIncludes)
+      alreadyIncluded(*File))
     Action = IncludeLimitReached;
 
   // Determine whether we should try to import the module for this #include, if
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
index 3eea0be7b762c..3c338a2b81235 100644
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -549,7 +549,7 @@ void Preprocessor::EnterMainSourceFile() {
     // Tell the header info that the main file was entered.  If the file is later
     // #imported, it won't be re-entered.
     if (const FileEntry *FE = SourceMgr.getFileEntryForID(MainFileID))
-      HeaderInfo.IncrementIncludeCount(FE);
+      markIncluded(FE);
   }
 
   // Preprocess Predefines to populate the initial preprocessor state.
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 9056f00978c8f..d806fb9e19494 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1887,10 +1887,6 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
   HFI.isPragmaOnce |= (Flags >> 4) & 0x01;
   HFI.DirInfo = (Flags >> 1) & 0x07;
   HFI.IndexHeaderMapHeader = Flags & 0x01;
-  // FIXME: Find a better way to handle this. Maybe just store a
-  // "has been included" flag?
-  HFI.NumIncludes = std::max(endian::readNext<uint16_t, little, unaligned>(d),
-                             HFI.NumIncludes);
   HFI.ControllingMacroID = Reader.getGlobalIdentifierID(
       M, endian::readNext<uint32_t, little, unaligned>(d));
   if (unsigned FrameworkOffset =
@@ -2962,6 +2958,22 @@ ASTReader::ReadControlBlock(ModuleFile &F,
   }
 }
 
+void ASTReader::readIncludedFiles(ModuleFile &F, StringRef Blob,
+                                  Preprocessor &PP) {
+  using namespace llvm::support;
+
+  const unsigned char *D = (const unsigned char *)Blob.data();
+  unsigned FileCount = endian::readNext<uint32_t, little, unaligned>(D);
+
+  for (unsigned I = 0; I < FileCount; ++I) {
+    size_t ID = endian::readNext<uint32_t, little, unaligned>(D);
+    InputFileInfo IFI = readInputFileInfo(F, ID);
+    if (llvm::ErrorOr<const FileEntry *> File =
+            PP.getFileManager().getFile(IFI.Filename))
+      PP.getIncludedFiles().insert(*File);
+  }
+}
+
 llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
                                     unsigned ClientLoadCapabilities) {
   BitstreamCursor &Stream = F.Stream;
@@ -3700,6 +3712,10 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
       break;
     }
 
+    case PP_INCLUDED_FILES:
+      readIncludedFiles(F, Blob, PP);
+      break;
+
     case LATE_PARSED_TEMPLATE:
       LateParsedTemplates.emplace_back(
           std::piecewise_construct, std::forward_as_tuple(&F),
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index c2bee93b077e6..763fc9537c04b 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -862,6 +862,7 @@ void ASTWriter::WriteBlockInfoBlock() {
   RECORD(CUDA_PRAGMA_FORCE_HOST_DEVICE_DEPTH);
   RECORD(PP_CONDITIONAL_STACK);
   RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
+  RECORD(PP_INCLUDED_FILES);
 
   // SourceManager Block.
   BLOCK(SOURCE_MANAGER_BLOCK);
@@ -1773,7 +1774,7 @@ namespace {
     std::pair<unsigned, unsigned>
     EmitKeyDataLength(raw_ostream& Out, key_type_ref key, data_type_ref Data) {
       unsigned KeyLen = key.Filename.size() + 1 + 8 + 8;
-      unsigned DataLen = 1 + 2 + 4 + 4;
+      unsigned DataLen = 1 + 4 + 4;
       for (auto ModInfo : Data.KnownHeaders)
         if (Writer.getLocalOrImportedSubmoduleID(ModInfo.getModule()))
           DataLen += 4;
@@ -1805,7 +1806,6 @@ namespace {
                           | (Data.HFI.DirInfo << 1)
                           | Data.HFI.IndexHeaderMapHeader;
       LE.write<uint8_t>(Flags);
-      LE.write<uint16_t>(Data.HFI.NumIncludes);
 
       if (!Data.HFI.ControllingMacro)
         LE.write<uint32_t>(Data.HFI.ControllingMacroID);
@@ -2254,6 +2254,29 @@ static bool shouldIgnoreMacro(MacroDirective *MD, bool IsModule,
   return false;
 }
 
+void ASTWriter::writeIncludedFiles(raw_ostream &Out, const Preprocessor &PP) {
+  using namespace llvm::support;
+
+  const Preprocessor::IncludedFilesSet &IncludedFiles = PP.getIncludedFiles();
+
+  std::vector<uint32_t> IncludedInputFileIDs;
+  IncludedInputFileIDs.reserve(IncludedFiles.size());
+
+  for (const FileEntry *File : IncludedFiles) {
+    auto InputFileIt = InputFileIDs.find(File);
+    if (InputFileIt == InputFileIDs.end())
+      continue;
+    IncludedInputFileIDs.push_back(InputFileIt->second);
+  }
+
+  llvm::sort(IncludedInputFileIDs);
+
+  endian::Writer LE(Out, little);
+  LE.write<uint32_t>(IncludedInputFileIDs.size());
+  for (uint32_t ID : IncludedInputFileIDs)
+    LE.write<uint32_t>(ID);
+}
+
 /// Writes the block containing the serialized form of the
 /// preprocessor.
 void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
@@ -2462,6 +2485,20 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
                                        MacroOffsetsBase - ASTBlockStartOffset};
     Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
   }
+
+  {
+    auto Abbrev = std::make_shared<BitCodeAbbrev>();
+    Abbrev->Add(BitCodeAbbrevOp(PP_INCLUDED_FILES));
+    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+    unsigned IncludedFilesAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
+
+    SmallString<2048> Buffer;
+    raw_svector_ostream Out(Buffer);
+    writeIncludedFiles(Out, PP);
+    RecordData::value_type Record[] = {PP_INCLUDED_FILES};
+    Stream.EmitRecordWithBlob(IncludedFilesAbbrev, Record, Buffer.data(),
+                              Buffer.size());
+  }
 }
 
 void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,

From dc441d776f9f8c3bbde4e234c606abffe7d5a95e Mon Sep 17 00:00:00 2001
From: Christian Sigg <csigg@google.com>
Date: Wed, 26 Jan 2022 15:52:25 +0100
Subject: [PATCH 734/946] [NVPTX] NFC: Remove unused arguments and attribute
 from test

---
 llvm/test/CodeGen/NVPTX/fminimum-fmaximum.ll | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/NVPTX/fminimum-fmaximum.ll b/llvm/test/CodeGen/NVPTX/fminimum-fmaximum.ll
index 673dd6f44314b..8f702a1434618 100644
--- a/llvm/test/CodeGen/NVPTX/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/NVPTX/fminimum-fmaximum.ll
@@ -34,7 +34,7 @@ define double @minimum_double(double %a) #0 {
 }
 
 ; CHECK-LABEL: minimum_v2half
-define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) #0 {
+define <2 x half> @minimum_v2half(<2 x half> %a) #0 {
   ; CHECK-NONAN-DAG: setp
   ; CHECK-NONAN-DAG: setp
   ; CHECK-NONAN-DAG: selp.b16
@@ -78,7 +78,7 @@ define double @maximum_double(double %a) #0 {
 }
 
 ; CHECK-LABEL: maximum_v2half
-define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) #0 {
+define <2 x half> @maximum_v2half(<2 x half> %a) #0 {
   ; CHECK-NONAN-DAG: setp
   ; CHECK-NONAN-DAG: setp
   ; CHECK-NONAN-DAG: selp.b16
@@ -88,5 +88,3 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) #0 {
   %x = select <2 x i1> %p, <2 x half> %a, <2 x half> zeroinitializer
   ret <2 x half> %x
 }
-
-attributes #0 = { "no-signed-zeros-fp-math"="true" }

From 0c56bc92e4b9cf949f431d1c9e11e9b509ef6dbd Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Mon, 24 Jan 2022 13:19:20 -0600
Subject: [PATCH 735/946] [PowerPC] Fix eq/ne comparison of v2i64 pre-Power8

In commit 1674d9b6b2da, I fixed the bug where we didn't consider
both words of the result of the comparison. However, the logic
needs to be different for eq and ne.
Namely for eq, we need both words of the doubleword to equal so it
is an AND. OTOH for ne, we need either word to be unequal so it
is an OR.
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |   5 +-
 .../CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll    | 114 +++++++++++++++++-
 .../PowerPC/vector-popcnt-128-ult-ugt.ll      |   2 +-
 llvm/test/CodeGen/PowerPC/vsx.ll              |   6 +-
 4 files changed, 117 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 105a43510369d..25cc34badda04 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3508,8 +3508,9 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
       int ShuffV[] = {1, 0, 3, 2};
       SDValue Shuff =
           DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
-      return DAG.getBitcast(
-          MVT::v2i64, DAG.getNode(ISD::AND, dl, MVT::v4i32, Shuff, SetCC32));
+      return DAG.getBitcast(MVT::v2i64,
+                            DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
+                                        dl, MVT::v4i32, Shuff, SetCC32));
     }
 
     // We handle most of these in the usual way.
diff --git a/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll
index b63b142793eec..e9d792c6a4408 100644
--- a/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-icmpeq-v2i64-p7.ll
@@ -5,8 +5,8 @@
 ; RUN:   FileCheck %s --check-prefix=CHECK_LE
 ; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64le-- < %s | \
 ; RUN:   FileCheck %s --check-prefix=CHECK_P8LE
-define i1 @foo(<2 x i64> %a) #0 {
-; CHECK-LABEL: foo:
+define i1 @shufeq(<2 x i64> %a) #0 {
+; CHECK-LABEL: shufeq:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xxswapd 35, 34
 ; CHECK-NEXT:    lwz 3, L..C0(2) # %const.0
@@ -19,7 +19,7 @@ define i1 @foo(<2 x i64> %a) #0 {
 ; CHECK-NEXT:    lwz 3, -12(1)
 ; CHECK-NEXT:    blr
 ;
-; CHECK_LE-LABEL: foo:
+; CHECK_LE-LABEL: shufeq:
 ; CHECK_LE:       # %bb.0: # %entry
 ; CHECK_LE-NEXT:    xxswapd 35, 34
 ; CHECK_LE-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
@@ -33,7 +33,7 @@ define i1 @foo(<2 x i64> %a) #0 {
 ; CHECK_LE-NEXT:    ld 3, -16(1)
 ; CHECK_LE-NEXT:    blr
 ;
-; CHECK_P8LE-LABEL: foo:
+; CHECK_P8LE-LABEL: shufeq:
 ; CHECK_P8LE:       # %bb.0: # %entry
 ; CHECK_P8LE-NEXT:    xxswapd 35, 34
 ; CHECK_P8LE-NEXT:    vcmpequd 2, 2, 3
@@ -46,3 +46,109 @@ entry:
   %2 = extractelement <2 x i1> %1, i32 0
   ret i1 %2
 }
+
+define i1 @shufne(<2 x i64> %a) #0 {
+; CHECK-LABEL: shufne:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxswapd 35, 34
+; CHECK-NEXT:    lwz 3, L..C1(2) # %const.0
+; CHECK-NEXT:    vcmpequw 2, 2, 3
+; CHECK-NEXT:    lxvw4x 35, 0, 3
+; CHECK-NEXT:    addi 3, 1, -16
+; CHECK-NEXT:    vperm 3, 2, 2, 3
+; CHECK-NEXT:    xxland 0, 35, 34
+; CHECK-NEXT:    stxvw4x 0, 0, 3
+; CHECK-NEXT:    lwz 3, -12(1)
+; CHECK-NEXT:    blr
+;
+; CHECK_LE-LABEL: shufne:
+; CHECK_LE:       # %bb.0: # %entry
+; CHECK_LE-NEXT:    xxswapd 35, 34
+; CHECK_LE-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
+; CHECK_LE-NEXT:    addi 3, 3, .LCPI1_0@toc@l
+; CHECK_LE-NEXT:    vcmpequw 2, 2, 3
+; CHECK_LE-NEXT:    lvx 3, 0, 3
+; CHECK_LE-NEXT:    addi 3, 1, -16
+; CHECK_LE-NEXT:    vperm 3, 2, 2, 3
+; CHECK_LE-NEXT:    xxland 34, 35, 34
+; CHECK_LE-NEXT:    stvx 2, 0, 3
+; CHECK_LE-NEXT:    ld 3, -16(1)
+; CHECK_LE-NEXT:    blr
+;
+; CHECK_P8LE-LABEL: shufne:
+; CHECK_P8LE:       # %bb.0: # %entry
+; CHECK_P8LE-NEXT:    xxswapd 35, 34
+; CHECK_P8LE-NEXT:    vcmpequd 2, 2, 3
+; CHECK_P8LE-NEXT:    xxswapd 0, 34
+; CHECK_P8LE-NEXT:    mffprd 3, 0
+; CHECK_P8LE-NEXT:    blr
+entry:
+  %0 = shufflevector <2 x i64> %a, <2 x i64> poison, <2 x i32> <i32 1, i32 undef>
+  %1 = icmp eq <2 x i64> %a, %0
+  %2 = extractelement <2 x i1> %1, i32 0
+  ret i1 %2
+}
+
+define <2 x i64> @cmpeq(<2 x i64> noundef %a, <2 x i64> noundef %b) {
+; CHECK-LABEL: cmpeq:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lwz 3, L..C2(2) # %const.0
+; CHECK-NEXT:    vcmpequw 2, 2, 3
+; CHECK-NEXT:    lxvw4x 35, 0, 3
+; CHECK-NEXT:    vperm 3, 2, 2, 3
+; CHECK-NEXT:    xxland 34, 35, 34
+; CHECK-NEXT:    blr
+;
+; CHECK_LE-LABEL: cmpeq:
+; CHECK_LE:       # %bb.0: # %entry
+; CHECK_LE-NEXT:    vcmpequw 2, 2, 3
+; CHECK_LE-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
+; CHECK_LE-NEXT:    addi 3, 3, .LCPI2_0@toc@l
+; CHECK_LE-NEXT:    lvx 3, 0, 3
+; CHECK_LE-NEXT:    vperm 3, 2, 2, 3
+; CHECK_LE-NEXT:    xxland 34, 35, 34
+; CHECK_LE-NEXT:    blr
+;
+; CHECK_P8LE-LABEL: cmpeq:
+; CHECK_P8LE:       # %bb.0: # %entry
+; CHECK_P8LE-NEXT:    vcmpequd 2, 2, 3
+; CHECK_P8LE-NEXT:    blr
+entry:
+  %cmp = icmp eq <2 x i64> %a, %b
+  %sext = sext <2 x i1> %cmp to <2 x i64>
+  ret <2 x i64> %sext
+}
+
+define <2 x i64> @cmpne(<2 x i64> noundef %a, <2 x i64> noundef %b) {
+; CHECK-LABEL: cmpne:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vcmpequw 2, 2, 3
+; CHECK-NEXT:    lwz 3, L..C3(2) # %const.0
+; CHECK-NEXT:    lxvw4x 35, 0, 3
+; CHECK-NEXT:    xxlnor 34, 34, 34
+; CHECK-NEXT:    vperm 3, 2, 2, 3
+; CHECK-NEXT:    xxlor 34, 35, 34
+; CHECK-NEXT:    blr
+;
+; CHECK_LE-LABEL: cmpne:
+; CHECK_LE:       # %bb.0: # %entry
+; CHECK_LE-NEXT:    vcmpequw 2, 2, 3
+; CHECK_LE-NEXT:    addis 3, 2, .LCPI3_0@toc@ha
+; CHECK_LE-NEXT:    addi 3, 3, .LCPI3_0@toc@l
+; CHECK_LE-NEXT:    lvx 3, 0, 3
+; CHECK_LE-NEXT:    xxlnor 34, 34, 34
+; CHECK_LE-NEXT:    vperm 3, 2, 2, 3
+; CHECK_LE-NEXT:    xxlor 34, 35, 34
+; CHECK_LE-NEXT:    blr
+;
+; CHECK_P8LE-LABEL: cmpne:
+; CHECK_P8LE:       # %bb.0: # %entry
+; CHECK_P8LE-NEXT:    vcmpequd 2, 2, 3
+; CHECK_P8LE-NEXT:    xxlnor 34, 34, 34
+; CHECK_P8LE-NEXT:    blr
+entry:
+  %cmp = icmp ne <2 x i64> %a, %b
+  %sext = sext <2 x i1> %cmp to <2 x i64>
+  ret <2 x i64> %sext
+}
+
diff --git a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
index eee3ff42bd133..c55eae88a39b8 100644
--- a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
@@ -11986,7 +11986,7 @@ define <2 x i64> @ugt_1_v2i64(<2 x i64> %0) {
 ; PWR7-NEXT:    lxvw4x 35, 0, 3
 ; PWR7-NEXT:    xxlnor 34, 34, 34
 ; PWR7-NEXT:    vperm 3, 2, 2, 3
-; PWR7-NEXT:    xxland 34, 35, 34
+; PWR7-NEXT:    xxlor 34, 35, 34
 ; PWR7-NEXT:    blr
 ;
 ; PWR8-LABEL: ugt_1_v2i64:
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index d882050b2e5ad..10dbd4d363a62 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -2079,7 +2079,7 @@ define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-NEXT:    xxlnor v2, v2, v2
 ; CHECK-NEXT:    vperm v3, v2, v2, v3
-; CHECK-NEXT:    xxland v2, v3, v2
+; CHECK-NEXT:    xxlor v2, v3, v2
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-REG-LABEL: test66:
@@ -2090,7 +2090,7 @@ define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-REG-NEXT:    lxvw4x v3, 0, r3
 ; CHECK-REG-NEXT:    xxlnor v2, v2, v2
 ; CHECK-REG-NEXT:    vperm v3, v2, v2, v3
-; CHECK-REG-NEXT:    xxland v2, v3, v2
+; CHECK-REG-NEXT:    xxlor v2, v3, v2
 ; CHECK-REG-NEXT:    blr
 ;
 ; CHECK-FISL-LABEL: test66:
@@ -2101,7 +2101,7 @@ define <2 x i1> @test66(<2 x i64> %a, <2 x i64> %b) {
 ; CHECK-FISL-NEXT:    addi r3, r3, .LCPI60_0@toc@l
 ; CHECK-FISL-NEXT:    lxvw4x v2, 0, r3
 ; CHECK-FISL-NEXT:    vperm v2, v3, v3, v2
-; CHECK-FISL-NEXT:    xxland v2, v2, v3
+; CHECK-FISL-NEXT:    xxlor v2, v2, v3
 ; CHECK-FISL-NEXT:    blr
 ;
 ; CHECK-LE-LABEL: test66:

From 1f3aa2af9d79daa6431443caac04839e54644a8f Mon Sep 17 00:00:00 2001
From: Marek Kurdej <marek.kurdej+llvm.org@gmail.com>
Date: Wed, 26 Jan 2022 15:59:21 +0100
Subject: [PATCH 736/946] [Visualizers] Fix SmallVector<T> visualizer for T
 inside an anonymous namespace. Use `value_type` instead of `$T1`.

At least on MSVC 2022, using $T1 does not work.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D118105
---
 llvm/utils/LLVMVisualizers/llvm.natvis | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/utils/LLVMVisualizers/llvm.natvis b/llvm/utils/LLVMVisualizers/llvm.natvis
index 108f1912c75e8..622ea7788c54b 100644
--- a/llvm/utils/LLVMVisualizers/llvm.natvis
+++ b/llvm/utils/LLVMVisualizers/llvm.natvis
@@ -10,13 +10,13 @@ For later versions of Visual Studio, no setup is required.
 <AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
   <Type Name="llvm::SmallVectorImpl&lt;*&gt;">
     <DisplayString IncludeView ="elt0" Condition="Size == 0"></DisplayString>
-    <DisplayString IncludeView ="elt0">{(($T1*)BeginX)[0]}{*this,view(elt1)}</DisplayString>
+    <DisplayString IncludeView ="elt0">{((value_type*)BeginX)[0]}{*this,view(elt1)}</DisplayString>
     <DisplayString IncludeView ="elt1" Condition="Size == 1"></DisplayString>
-    <DisplayString IncludeView ="elt1">, {(($T1*)BeginX)[1]}{*this,view(elt2)}</DisplayString>
+    <DisplayString IncludeView ="elt1">, {((value_type*)BeginX)[1]}{*this,view(elt2)}</DisplayString>
     <DisplayString IncludeView ="elt2" Condition="Size == 2"></DisplayString>
-    <DisplayString IncludeView ="elt2">, {(($T1*)BeginX)[2]}{*this,view(elt3)}</DisplayString>
+    <DisplayString IncludeView ="elt2">, {((value_type*)BeginX)[2]}{*this,view(elt3)}</DisplayString>
     <DisplayString IncludeView ="elt3" Condition="Size == 3"></DisplayString>
-    <DisplayString IncludeView ="elt3">, {(($T1*)BeginX)[3]}{*this,view(elt4)}</DisplayString>
+    <DisplayString IncludeView ="elt3">, {((value_type*)BeginX)[3]}{*this,view(elt4)}</DisplayString>
     <DisplayString IncludeView ="elt4" Condition="Size == 4"></DisplayString>
     <DisplayString IncludeView ="elt4">, /* {Size - 4} more*/ </DisplayString>
     <DisplayString Condition="Size == 0">empty</DisplayString>
@@ -27,7 +27,7 @@ For later versions of Visual Studio, no setup is required.
       <Item Name="[capacity]">Capacity</Item>
       <ArrayItems>
         <Size>Size</Size>
-        <ValuePointer>($T1*)BeginX</ValuePointer>
+        <ValuePointer>(value_type*)BeginX</ValuePointer>
       </ArrayItems>
     </Expand>
   </Type>

From 268524238e903261231c6dafca65d9831e3ca34c Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Wed, 26 Jan 2022 23:53:55 +0900
Subject: [PATCH 737/946] [mlir][bufferization] Add an option to use memref
 types without layout maps

This is for compatibility with existing bufferization passes. Also clean up memref type generation a bit.

Differential Revision: https://reviews.llvm.org/D118243
---
 .../IR/BufferizableOpInterface.h              | 18 +++---
 mlir/include/mlir/Dialect/Linalg/Passes.td    |  3 +
 .../IR/BufferizableOpInterface.cpp            | 60 +++++++++++--------
 .../ModuleBufferization.cpp                   | 21 +++----
 .../SCFInterfaceImpl.cpp                      | 14 ++---
 .../Transforms/ComprehensiveBufferizePass.cpp |  1 +
 .../BufferizableOpInterfaceImpl.cpp           | 13 +---
 ...omprehensive-module-bufferize-partial.mlir | 19 ++++--
 .../comprehensive-module-bufferize.mlir       |  7 +++
 .../Linalg/TestComprehensiveBufferize.cpp     |  5 ++
 10 files changed, 94 insertions(+), 67 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
index bbac6e59aeeb2..5107710413ead 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
@@ -98,6 +98,10 @@ struct BufferizationOptions {
   /// Should be used only with `testAnalysisOnly = true`.
   unsigned analysisFuzzerSeed = 0;
 
+  /// Specifies whether fully dynamic layout maps should be used on ranked
+  /// MemRef types. If false, MemRef types will have no layout maps.
+  bool fullyDynamicLayoutMaps = true;
+
   /// If set to `true`, does not modify the IR apart from adding attributes (for
   /// checking the results of the analysis) and post analysis steps.
   bool testAnalysisOnly = false;
@@ -282,21 +286,17 @@ OpTy replaceOpWithNewBufferizedOp(RewriterBase &rewriter, Operation *op,
 }
 
 /// Return a contiguous MemRefType (i.e. with canonical/empty layout map)
-/// with the same shape as `shapedType` and specified `layout` and
-/// `addressSpace`.
+/// with the same shape as `shapedType` and specified `addressSpace`.
 MemRefType getContiguousMemRefType(ShapedType shapedType,
-                                   MemRefLayoutAttrInterface layout = {},
                                    Attribute memorySpace = {});
 
-/// Return an UnrankedMemRefType with the given element type and memory space.
-UnrankedMemRefType getUnrankedMemRefType(Type elementType,
-                                         Attribute memorySpace = {});
-
 /// Return a MemRefType to which the `tensorType` can be bufferized in a
 /// composable fashion. The layout must be the most dynamic possible and
 /// canonicalize away once bufferization is finished.
-MemRefType getDynamicMemRefType(RankedTensorType tensorType,
-                                unsigned addressSpace = 0);
+BaseMemRefType getMemRefType(TensorType tensorType,
+                             const BufferizationOptions &options,
+                             MemRefLayoutAttrInterface layout = {},
+                             Attribute memorySpace = {});
 
 /// Creates a memref allocation with the given type and dynamic extents.
 FailureOr<Value> createAlloc(OpBuilder &b, Location loc, MemRefType type,
diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td
index c67ebc84a5cf9..fac60e2fd2b8c 100644
--- a/mlir/include/mlir/Dialect/Linalg/Passes.td
+++ b/mlir/include/mlir/Dialect/Linalg/Passes.td
@@ -55,6 +55,9 @@ def LinalgComprehensiveModuleBufferize :
     Option<"useLinalgCopy", "use-linalg-copy", "bool",
            /*default=*/"false",
            "Use a copy operation implemented as a Linalg op.">,
+    Option<"fullyDynamicLayoutMaps", "fully-dynamic-layout-maps", "bool",
+           /*default=*/"true",
+           "Generate MemRef types with dynamic offset+strides by default.">,
     Option<"analysisFuzzerSeed", "analysis-fuzzer-seed", "unsigned",
            /*default=*/"0",
            "Analyze ops in random order with a given seed (fuzzer)">,
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 7d91229625cca..276950d4ef19e 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -210,8 +210,10 @@ static void ensureToMemrefOpIsValid(Value tensor, Type memrefType) {
 #endif
 }
 
-static Value lookupBuffer(RewriterBase &rewriter, Value tensor) {
-  assert(tensor.getType().isa<TensorType>() && "unexpected non-tensor type");
+static Value lookupBuffer(RewriterBase &rewriter, Value tensor,
+                          const BufferizationOptions &options) {
+  auto tensorType = tensor.getType().dyn_cast<TensorType>();
+  assert(tensorType && "unexpected non-tensor type");
 
   // Replace "%t = to_tensor %m" with %m.
   if (auto toTensorOp = tensor.getDefiningOp<bufferization::ToTensorOp>())
@@ -220,13 +222,7 @@ static Value lookupBuffer(RewriterBase &rewriter, Value tensor) {
   // Insert to_memref op.
   OpBuilder::InsertionGuard g(rewriter);
   setInsertionPointAfter(rewriter, tensor);
-  Type memrefType;
-  if (auto rankedTensorType = tensor.getType().dyn_cast<RankedTensorType>()) {
-    memrefType = getDynamicMemRefType(rankedTensorType);
-  } else {
-    memrefType = getUnrankedMemRefType(
-        tensor.getType().cast<TensorType>().getElementType());
-  }
+  Type memrefType = getMemRefType(tensorType, options);
   ensureToMemrefOpIsValid(tensor, memrefType);
   return rewriter.create<bufferization::ToMemrefOp>(tensor.getLoc(), memrefType,
                                                     tensor);
@@ -242,7 +238,7 @@ FailureOr<Value> BufferizationState::getBuffer(
   Operation *op = opOperand.getOwner();
   Location loc = op->getLoc();
   Value operand = opOperand.get();
-  Value operandBuffer = lookupBuffer(rewriter, operand);
+  Value operandBuffer = lookupBuffer(rewriter, operand, options);
 
   if (forceInPlace || isInPlace(opOperand))
     return operandBuffer;
@@ -513,27 +509,43 @@ bool bufferization::isFunctionArgument(Value value) {
   return isa<FuncOp>(bbArg.getOwner()->getParentOp());
 }
 
-MemRefType
-bufferization::getContiguousMemRefType(ShapedType shapedType,
-                                       MemRefLayoutAttrInterface layout,
-                                       Attribute memorySpace) {
+MemRefType bufferization::getContiguousMemRefType(ShapedType shapedType,
+                                                  Attribute memorySpace) {
+  MemRefLayoutAttrInterface layout = {};
   return MemRefType::get(shapedType.getShape(), shapedType.getElementType(),
                          layout, memorySpace);
 }
 
-UnrankedMemRefType bufferization::getUnrankedMemRefType(Type elementType,
-                                                        Attribute memorySpace) {
-  return UnrankedMemRefType::get(elementType, memorySpace);
-}
+BaseMemRefType bufferization::getMemRefType(TensorType tensorType,
+                                            const BufferizationOptions &options,
+                                            MemRefLayoutAttrInterface layout,
+                                            Attribute memorySpace) {
+  // Case 1: Unranked memref type.
+  if (auto unrankedTensorType = tensorType.dyn_cast<UnrankedTensorType>()) {
+    assert(!layout && "UnrankedTensorType cannot have a layout map");
+    return UnrankedMemRefType::get(unrankedTensorType.getElementType(),
+                                   memorySpace);
+  }
+
+  // Case 2: Ranked memref type with specified layout. If fully dynamic layout
+  // maps are not requested, generate a type with `layout`, which is empty (no
+  // layout map) by default.
+  auto rankedTensorType = tensorType.cast<RankedTensorType>();
+  if (layout || !options.fullyDynamicLayoutMaps) {
+    return MemRefType::get(rankedTensorType.getShape(),
+                           rankedTensorType.getElementType(), layout,
+                           memorySpace);
+  }
 
-MemRefType bufferization::getDynamicMemRefType(RankedTensorType tensorType,
-                                               unsigned addressSpace) {
+  // Case 3: Ranked memref type with unspecified layout. Choose the most dynamic
+  // one.
   // TODO: address space decisions to connect with the actual alloc.
   int64_t dynamicOffset = ShapedType::kDynamicStrideOrOffset;
-  SmallVector<int64_t> dynamicStrides(tensorType.getRank(),
+  SmallVector<int64_t> dynamicStrides(rankedTensorType.getRank(),
                                       ShapedType::kDynamicStrideOrOffset);
   AffineMap stridedLayout = makeStridedLinearLayoutMap(
-      dynamicStrides, dynamicOffset, tensorType.getContext());
-  return MemRefType::get(tensorType.getShape(), tensorType.getElementType(),
-                         stridedLayout, addressSpace);
+      dynamicStrides, dynamicOffset, rankedTensorType.getContext());
+  return MemRefType::get(rankedTensorType.getShape(),
+                         rankedTensorType.getElementType(), stridedLayout,
+                         memorySpace);
 }
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
index 0fe79862a69d0..63ffb09320076 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp
@@ -308,16 +308,15 @@ static FuncOp getCalledFunction(CallOpInterface callOp) {
 /// dynamic buffer type supported.
 /// A later pass across all CallOps in the module can decide whether to simplify
 /// the types of to version according to some cost model.
-static FunctionType getBufferizedFunctionType(MLIRContext *ctx,
-                                              TypeRange argumentTypes,
-                                              TypeRange resultTypes) {
-  auto rewrite = [](Type t) -> Type {
+static FunctionType
+getBufferizedFunctionType(MLIRContext *ctx, TypeRange argumentTypes,
+                          TypeRange resultTypes,
+                          const BufferizationOptions &options) {
+  auto rewrite = [&](Type t) -> Type {
     // TODO: non-zero address space.
     // TODO: layout information if relevant.
-    if (auto rankedTensorType = t.dyn_cast<RankedTensorType>())
-      return getDynamicMemRefType(rankedTensorType);
     if (auto tensorType = t.dyn_cast<TensorType>())
-      return getUnrankedMemRefType(tensorType.getElementType());
+      return getMemRefType(tensorType, options);
     return t;
   };
   auto argTypes = llvm::to_vector<4>(llvm::map_range(argumentTypes, rewrite));
@@ -398,7 +397,8 @@ static LogicalResult bufferizeFuncOpBoundary(FuncOp funcOp,
       return funcOp->emitError() << "cannot bufferize bodiless function that "
                                  << "returns a tensor";
     FunctionType bufferizedFuncType = getBufferizedFunctionType(
-        funcOp.getContext(), funcOp.getType().getInputs(), TypeRange{});
+        funcOp.getContext(), funcOp.getType().getInputs(), TypeRange{},
+        state.getOptions());
     funcOp.setType(bufferizedFuncType);
     return success();
   }
@@ -431,7 +431,8 @@ static LogicalResult bufferizeFuncOpBoundary(FuncOp funcOp,
   // 2. Rewrite the terminator without the inPlace bufferizable values.
   ValueRange retValues{returnValues};
   FunctionType bufferizedFuncType = getBufferizedFunctionType(
-      funcOp.getContext(), funcOp.getType().getInputs(), retValues.getTypes());
+      funcOp.getContext(), funcOp.getType().getInputs(), retValues.getTypes(),
+      state.getOptions());
   OpBuilder b(returnOp);
   b.create<ReturnOp>(returnOp.getLoc(), returnValues);
   returnOp->erase();
@@ -822,7 +823,7 @@ struct CallOpInterface
     // Get the bufferized FunctionType for funcOp or construct it if not yet
     // available.
     FunctionType bufferizedFuncType = getBufferizedFunctionType(
-        funcOp.getContext(), argumentTypes, resultTypes);
+        funcOp.getContext(), argumentTypes, resultTypes, state.getOptions());
 
     // 3. Rewrite tensor operands as memrefs based on `bufferizedFuncType`.
     for (OpOperand &opOperand : callOp->getOpOperands()) {
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp
index 87dd5b09773dc..cc8517f8f119f 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp
@@ -74,11 +74,8 @@ struct ExecuteRegionOpInterface
     // Compute new result types.
     SmallVector<Type> newResultTypes;
     for (Type type : executeRegionOp->getResultTypes()) {
-      if (auto rankedTensorType = type.dyn_cast<RankedTensorType>()) {
-        newResultTypes.push_back(getDynamicMemRefType(rankedTensorType));
-      } else if (auto tensorType = type.dyn_cast<TensorType>()) {
-        newResultTypes.push_back(
-            getUnrankedMemRefType(tensorType.getElementType()));
+      if (auto tensorType = type.dyn_cast<TensorType>()) {
+        newResultTypes.push_back(getMemRefType(tensorType, state.getOptions()));
       } else {
         newResultTypes.push_back(type);
       }
@@ -186,11 +183,8 @@ struct IfOpInterface
     // Compute new types of the bufferized scf.if op.
     SmallVector<Type> newTypes;
     for (Type returnType : ifOp->getResultTypes()) {
-      if (returnType.isa<TensorType>()) {
-        assert(returnType.isa<RankedTensorType>() &&
-               "unsupported unranked tensor");
-        newTypes.push_back(
-            getDynamicMemRefType(returnType.cast<RankedTensorType>()));
+      if (auto tensorType = returnType.dyn_cast<TensorType>()) {
+        newTypes.push_back(getMemRefType(tensorType, state.getOptions()));
       } else {
         newTypes.push_back(returnType);
       }
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
index 12d43300aacf7..3b3b1e4e76ec9 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -120,6 +120,7 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() {
   options->allowUnknownOps = allowUnknownOps;
   options->analysisFuzzerSeed = analysisFuzzerSeed;
   options->createDeallocs = createDeallocs;
+  options->fullyDynamicLayoutMaps = fullyDynamicLayoutMaps;
   options->printConflicts = printConflicts;
   options->testAnalysisOnly = testAnalysisOnly;
 
diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
index 1c1226b451688..f3c9fb5aeb48f 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -65,14 +65,8 @@ struct CastOpInterface
         layout = rankedMemRefType.getLayout();
 
     // Compute the new memref type.
-    Type resultMemRefType;
-    if (resultTensorType.isa<RankedTensorType>()) {
-      resultMemRefType =
-          getContiguousMemRefType(resultTensorType, layout, memorySpace);
-    } else {
-      resultMemRefType =
-          getUnrankedMemRefType(resultTensorType.getElementType(), memorySpace);
-    }
+    Type resultMemRefType = getMemRefType(resultTensorType, state.getOptions(),
+                                          layout, memorySpace);
 
     // Replace the op with a memref.cast.
     assert(memref::CastOp::areCastCompatible(resultBuffer->getType(),
@@ -263,8 +257,7 @@ struct FromElementsOpInterface
     Location loc = op->getLoc();
     auto tensorType = fromElementsOp.getType().cast<RankedTensorType>();
     auto shape = tensorType.getShape();
-    MemRefType resultType =
-        MemRefType::get(tensorType.getShape(), tensorType.getElementType());
+    MemRefType resultType = getContiguousMemRefType(tensorType);
     FailureOr<Value> maybeBuffer =
         createAlloc(rewriter, loc, resultType, {},
                     /*deallocMemref=*/state.getOptions().createDeallocs,
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-partial.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-partial.mlir
index aadbeaff86ff8..ea1251fc080b2 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-partial.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize-partial.mlir
@@ -1,5 +1,8 @@
 // RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref allow-unknown-ops" -split-input-file | FileCheck %s
 
+// Test bufferization using memref types that have no layout map.
+// RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref allow-unknown-ops fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
+
 // Run fuzzer with different seeds.
 // RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=23" -split-input-file -o /dev/null
 // RUN: mlir-opt %s -allow-unregistered-dialect -linalg-comprehensive-module-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
@@ -8,20 +11,28 @@
 // RUN: mlir-opt %s -allow-unregistered-dialect -test-comprehensive-function-bufferize="dialect-filter=tensor allow-unknown-ops allow-return-memref" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-TENSOR
 // RUN: mlir-opt %s -allow-unregistered-dialect -test-comprehensive-function-bufferize="dialect-filter=scf allow-unknown-ops allow-return-memref" -canonicalize -split-input-file | FileCheck %s --check-prefix=CHECK-SCF
 
+// CHECK: #[[$MAP:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+
 // CHECK-LABEL: func @use_of_unknown_op_1(
-//  CHECK-SAME:     %[[m1:.*]]: memref<?xf32
+//  CHECK-SAME:     %[[m1:.*]]: memref<?xf32, #[[$MAP]]>
+// CHECK-NO-LAYOUT-MAP-LABEL: func @use_of_unknown_op_1(
+//  CHECK-NO-LAYOUT-MAP-SAME:     %[[m1:.*]]: memref<?xf32>)
 func @use_of_unknown_op_1(%t1: tensor<?xf32> {linalg.inplaceable = true})
     -> vector<5xf32> {
   // ToTensorOp is generated because the function is bufferized and has a
   // memref block argument.
-  // CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]]
+  // CHECK: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]] : memref<?xf32, #[[$MAP]]>
   // CHECK: %[[dummy:.*]] = "test.dummy_op"(%[[m1_tensor]])
+  // CHECK-NO-LAYOUT-MAP: %[[m1_tensor:.*]] = bufferization.to_tensor %[[m1]] : memref<?xf32>
+  // CHECK-NO-LAYOUT-MAP: %[[dummy:.*]] = "test.dummy_op"(%[[m1_tensor]])
   %0 = "test.dummy_op"(%t1) : (tensor<?xf32>) -> tensor<?xf32>
 
   %idx = arith.constant 0 : index
   %cst = arith.constant 0.0 : f32
-  // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]]
-  // CHECK: vector.transfer_read %[[dummy_memref]]
+  // CHECK: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : memref<?xf32, #[[$MAP]]>
+  // CHECK: vector.transfer_read %[[dummy_memref]][%{{.*}}], %{{.*}} : memref<?xf32, #[[$MAP]]>
+  // CHECK-NO-LAYOUT-MAP: %[[dummy_memref:.*]] = bufferization.to_memref %[[dummy]] : memref<?xf32>
+  // CHECK-NO-LAYOUT-MAP: vector.transfer_read %[[dummy_memref]][%{{.*}}], %{{.*}} : memref<?xf32>
   %1 = vector.transfer_read %0[%idx], %cst : tensor<?xf32>, vector<5xf32>
   return %1 : vector<5xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index c4ea9a48b8ece..42734ae1e9ad5 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -5,7 +5,11 @@
 // RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=59" -split-input-file -o /dev/null
 // RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-memref test-analysis-only analysis-fuzzer-seed=91" -split-input-file -o /dev/null
 
+// Test bufferization using memref types that have no layout map.
+// RUN: mlir-opt %s -linalg-comprehensive-module-bufferize="allow-return-memref fully-dynamic-layout-maps=0" -split-input-file | FileCheck %s --check-prefix=CHECK-NO-LAYOUT-MAP
+
 // CHECK-LABEL: func @transfer_read(%{{.*}}: memref<?xf32, #map>) -> vector<4xf32> {
+// CHECK-NO-LAYOUT-MAP-LABEL: func @transfer_read(%{{.*}}: memref<?xf32>) -> vector<4xf32>
 func @transfer_read(
     %A : tensor<?xf32> {linalg.inplaceable = false})
   -> (vector<4xf32>)
@@ -26,6 +30,7 @@ func @transfer_read(
 
 // CHECK-LABEL: func @fill_inplace(
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>
+// CHECK-NO-LAYOUT-MAP-LABEL: func @fill_inplace(%{{.*}}: memref<?xf32>) {
 func @fill_inplace(
     %A : tensor<?xf32> {linalg.inplaceable = true})
   -> tensor<?xf32>
@@ -63,6 +68,7 @@ func @tensor_extract(%A : tensor<?xf32> {linalg.inplaceable = false}) -> (f32) {
 /// No linalg.inplaceable flag, must allocate.
 // CHECK-LABEL: func @not_inplace(
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?xf32, #[[$map_1d_dyn]]>) -> memref<?xf32> {
+// CHECK-NO-LAYOUT-MAP-LABEL: func @not_inplace(%{{.*}}: memref<?xf32>) -> memref<?xf32>
 func @not_inplace(
     %A : tensor<?xf32> {linalg.inplaceable = false})
   -> tensor<?xf32>
@@ -86,6 +92,7 @@ func @not_inplace(
 
 // CHECK-LABEL: func @not_inplace
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]*]]: memref<?x?xf32, #[[$map_2d_dyn]]>) {
+// CHECK-NO-LAYOUT-MAP-LABEL: func @not_inplace(%{{.*}}: memref<?x?xf32>) {
 func @not_inplace(
     %A : tensor<?x?xf32> {linalg.inplaceable = true})
   -> tensor<?x?xf32>
diff --git a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
index a9b5ab206d42f..6e72f77ee3493 100644
--- a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
@@ -91,6 +91,10 @@ struct TestComprehensiveFunctionBufferize
       *this, "dialect-filter",
       llvm::cl::desc("Bufferize only ops from the specified dialects"),
       llvm::cl::ZeroOrMore, llvm::cl::MiscFlags::CommaSeparated};
+  Option<bool> fullyDynamicLayoutMaps{
+      *this, "fully-dynamic-layout-maps",
+      llvm::cl::desc("Use fully dynamic layout maps on memref types"),
+      llvm::cl::init(true)};
   Option<bool> createDeallocs{
       *this, "create-deallocs",
       llvm::cl::desc("Specify if buffers should be deallocated"),
@@ -108,6 +112,7 @@ void TestComprehensiveFunctionBufferize::runOnOperation() {
   options->allowUnknownOps = allowUnknownOps;
   options->testAnalysisOnly = testAnalysisOnly;
   options->analysisFuzzerSeed = analysisFuzzerSeed;
+  options->fullyDynamicLayoutMaps = fullyDynamicLayoutMaps;
   options->createDeallocs = createDeallocs;
 
   if (dialectFilter.hasValue()) {

From b777d354f6703afd58b1803fdfb6d43a607d8fd6 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Wed, 26 Jan 2022 14:50:40 +0100
Subject: [PATCH 738/946] [clang][DeclPrinter] Fix printing for noexcept
 expressions

We are already building into the final result, no need to append it
again.

Fixes https://github.com/clangd/vscode-clangd/issues/290.

Differential Revision: https://reviews.llvm.org/D118245
---
 clang/lib/AST/DeclPrinter.cpp           | 1 -
 clang/unittests/AST/DeclPrinterTest.cpp | 9 +++------
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp
index 698b1c3ffd348..715da7e0d90c5 100644
--- a/clang/lib/AST/DeclPrinter.cpp
+++ b/clang/lib/AST/DeclPrinter.cpp
@@ -731,7 +731,6 @@ void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) {
         FT->getNoexceptExpr()->printPretty(EOut, nullptr, SubPolicy,
                                            Indentation, "\n", &Context);
         EOut.flush();
-        Proto += EOut.str();
         Proto += ")";
       }
     }
diff --git a/clang/unittests/AST/DeclPrinterTest.cpp b/clang/unittests/AST/DeclPrinterTest.cpp
index 0b579565f5b4a..c2d7d78738f96 100644
--- a/clang/unittests/AST/DeclPrinterTest.cpp
+++ b/clang/unittests/AST/DeclPrinterTest.cpp
@@ -909,8 +909,7 @@ TEST(DeclPrinter, TestFunctionDecl_ExceptionSpecification5) {
     "  void A(int a) noexcept(true);"
     "};",
     "A",
-    "void A(int a) noexcept(trueA(int a) noexcept(true)"));
-    // WRONG; Should be: "void A(int a) noexcept(true);"
+    "void A(int a) noexcept(true)"));
 }
 
 TEST(DeclPrinter, TestFunctionDecl_ExceptionSpecification6) {
@@ -919,8 +918,7 @@ TEST(DeclPrinter, TestFunctionDecl_ExceptionSpecification6) {
     "  void A(int a) noexcept(1 < 2);"
     "};",
     "A",
-    "void A(int a) noexcept(1 < 2A(int a) noexcept(1 < 2)"));
-    // WRONG; Should be: "void A(int a) noexcept(1 < 2);"
+    "void A(int a) noexcept(1 < 2)"));
 }
 
 TEST(DeclPrinter, TestFunctionDecl_ExceptionSpecification7) {
@@ -930,8 +928,7 @@ TEST(DeclPrinter, TestFunctionDecl_ExceptionSpecification7) {
     "  void A(int a) noexcept(N < 2);"
     "};",
     "A",
-    "void A(int a) noexcept(N < 2A(int a) noexcept(N < 2)"));
-    // WRONG; Should be: "void A(int a) noexcept(N < 2);"
+    "void A(int a) noexcept(N < 2)"));
 }
 
 TEST(DeclPrinter, TestVarDecl1) {

From 93948c5299d7ee446aa707221751a0af2b87c12b Mon Sep 17 00:00:00 2001
From: Marek Kurdej <marek.kurdej+llvm.org@gmail.com>
Date: Wed, 26 Jan 2022 16:00:26 +0100
Subject: [PATCH 739/946] [clang-format] Correctly format lambdas with variadic
 template parameters.

Fixes https://github.com/llvm/llvm-project/issues/53405.

Reviewed By: MyDeveloperDay, owenpan

Differential Revision: https://reviews.llvm.org/D118220
---
 clang/lib/Format/UnwrappedLineParser.cpp | 14 +++++++----
 clang/unittests/Format/FormatTest.cpp    | 30 ++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp
index ff3e791822c47..b0588d92ab001 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -1856,6 +1856,7 @@ bool UnwrappedLineParser::tryToParseLambda() {
     return false;
 
   bool SeenArrow = false;
+  bool InTemplateParameterList = false;
 
   while (FormatTok->isNot(tok::l_brace)) {
     if (FormatTok->isSimpleTypeSpecifier()) {
@@ -1871,6 +1872,14 @@ bool UnwrappedLineParser::tryToParseLambda() {
     case tok::l_square:
       parseSquare();
       break;
+    case tok::kw_class:
+    case tok::kw_template:
+    case tok::kw_typename:
+      assert(FormatTok->Previous);
+      if (FormatTok->Previous->is(tok::less))
+        InTemplateParameterList = true;
+      nextToken();
+      break;
     case tok::amp:
     case tok::star:
     case tok::kw_const:
@@ -1880,11 +1889,8 @@ bool UnwrappedLineParser::tryToParseLambda() {
     case tok::identifier:
     case tok::numeric_constant:
     case tok::coloncolon:
-    case tok::kw_class:
     case tok::kw_mutable:
     case tok::kw_noexcept:
-    case tok::kw_template:
-    case tok::kw_typename:
       nextToken();
       break;
     // Specialization of a template with an integer parameter can contain
@@ -1921,7 +1927,7 @@ bool UnwrappedLineParser::tryToParseLambda() {
     case tok::ellipsis:
     case tok::kw_true:
     case tok::kw_false:
-      if (SeenArrow) {
+      if (SeenArrow || InTemplateParameterList) {
         nextToken();
         break;
       }
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index c45869f16ad29..9185de023ccf9 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -20560,6 +20560,36 @@ TEST_F(FormatTest, FormatsLambdas) {
   // Lambdas with explicit template argument lists.
   verifyFormat(
       "auto L = []<template <typename> class T, class U>(T<U> &&a) {};\n");
+  verifyFormat("auto L = []<class T>(T) {\n"
+               "  {\n"
+               "    f();\n"
+               "    g();\n"
+               "  }\n"
+               "};\n");
+  verifyFormat("auto L = []<class... T>(T...) {\n"
+               "  {\n"
+               "    f();\n"
+               "    g();\n"
+               "  }\n"
+               "};\n");
+  verifyFormat("auto L = []<typename... T>(T...) {\n"
+               "  {\n"
+               "    f();\n"
+               "    g();\n"
+               "  }\n"
+               "};\n");
+  verifyFormat("auto L = []<template <typename...> class T>(T...) {\n"
+               "  {\n"
+               "    f();\n"
+               "    g();\n"
+               "  }\n"
+               "};\n");
+  verifyFormat("auto L = []</*comment*/ class... T>(T...) {\n"
+               "  {\n"
+               "    f();\n"
+               "    g();\n"
+               "  }\n"
+               "};\n");
 
   // Multiple lambdas in the same parentheses change indentation rules. These
   // lambdas are forced to start on new lines.

From de8867a0b6407c9b6d95a2ecb830256f0e224b52 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 26 Jan 2022 16:15:09 +0100
Subject: [PATCH 740/946] [AMDGPUEmitPrintf] Don't require specific pointer
 element type

Rather than checking for i8*, simply add a bitcast to i8*, so the
appendString() code sees the expected type.
---
 llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
index b8b0bbbc7a4ed..c734611836eb6 100644
--- a/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
+++ b/llvm/lib/Transforms/Utils/AMDGPUEmitPrintf.cpp
@@ -22,19 +22,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-emit-printf"
 
-static bool isCString(const Value *Arg) {
-  auto Ty = Arg->getType();
-  auto PtrTy = dyn_cast<PointerType>(Ty);
-  if (!PtrTy)
-    return false;
-
-  auto IntTy = dyn_cast<IntegerType>(PtrTy->getPointerElementType());
-  if (!IntTy)
-    return false;
-
-  return IntTy->getBitWidth() == 8;
-}
-
 static Value *fitArgInto64Bits(IRBuilder<> &Builder, Value *Arg) {
   auto Int64Ty = Builder.getInt64Ty();
   auto Ty = Arg->getType();
@@ -176,13 +163,15 @@ static Value *callAppendStringN(IRBuilder<> &Builder, Value *Desc, Value *Str,
 
 static Value *appendString(IRBuilder<> &Builder, Value *Desc, Value *Arg,
                            bool IsLast) {
+  Arg = Builder.CreateBitCast(
+      Arg, Builder.getInt8PtrTy(Arg->getType()->getPointerAddressSpace()));
   auto Length = getStrlenWithNull(Builder, Arg);
   return callAppendStringN(Builder, Desc, Arg, Length, IsLast);
 }
 
 static Value *processArg(IRBuilder<> &Builder, Value *Desc, Value *Arg,
                          bool SpecIsCString, bool IsLast) {
-  if (SpecIsCString && isCString(Arg)) {
+  if (SpecIsCString && isa<PointerType>(Arg->getType())) {
     return appendString(Builder, Desc, Arg, IsLast);
   }
   // If the format specifies a string but the argument is not, the frontend will

From 99ae5c13f64e138d6b17c00bd01c87c3ce58cb6b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 26 Jan 2022 15:17:09 +0000
Subject: [PATCH 741/946] [X86] Add 'getSplitVectorSrc' helper to determine if
 subvectors all come from the same source

Helps determine if the subvector ops come from the same larger vector and match the lower/upper extractions
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 114 ++++++++++++++----------
 1 file changed, 65 insertions(+), 49 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b25a170a683ab..ba606d7a80edb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -6146,6 +6146,29 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
   return DAG.getBitcast(VT, Vec);
 }
 
+// Helper to determine if the ops are all the extracted subvectors come from a
+// single source. If we allow commute they don't have to be in order (Lo/Hi).
+static SDValue getSplitVectorSrc(SDValue LHS, SDValue RHS, bool AllowCommute) {
+  if (LHS.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+      RHS.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+      LHS.getValueType() != RHS.getValueType() ||
+      LHS.getOperand(0) != RHS.getOperand(0))
+    return SDValue();
+
+  SDValue Src = LHS.getOperand(0);
+  if (Src.getValueSizeInBits() != (LHS.getValueSizeInBits() * 2))
+    return SDValue();
+
+  unsigned NumElts = LHS.getValueType().getVectorNumElements();
+  if ((LHS.getConstantOperandAPInt(1) == 0 &&
+       RHS.getConstantOperandAPInt(1) == NumElts) ||
+      (AllowCommute && RHS.getConstantOperandAPInt(1) == 0 &&
+       LHS.getConstantOperandAPInt(1) == NumElts))
+    return Src;
+
+  return SDValue();
+}
+
 static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
                                 const SDLoc &dl, unsigned vectorWidth) {
   EVT VT = Vec.getValueType();
@@ -44512,30 +44535,28 @@ static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC,
     // PMOVMSKB(PACKSSBW(LO(X), HI(X)))
     // -> PMOVMSKB(BITCAST_v32i8(X)) & 0xAAAAAAAA.
     if (CmpBits >= 16 && Subtarget.hasInt256() &&
-        VecOp0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
-        VecOp1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
-        VecOp0.getOperand(0) == VecOp1.getOperand(0) &&
-        VecOp0.getConstantOperandAPInt(1) == 0 &&
-        VecOp1.getConstantOperandAPInt(1) == 8 &&
         (IsAnyOf || (SignExt0 && SignExt1))) {
-      SDLoc DL(EFLAGS);
-      SDValue Result = peekThroughBitcasts(VecOp0.getOperand(0));
-      if (IsAllOf && Result.getOpcode() == X86ISD::PCMPEQ) {
-        SDValue V = DAG.getNode(ISD::SUB, DL, Result.getValueType(),
-                                Result.getOperand(0), Result.getOperand(1));
-        V = DAG.getBitcast(MVT::v4i64, V);
-        return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
-      }
-      Result = DAG.getBitcast(MVT::v32i8, Result);
-      Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
-      unsigned CmpMask = IsAnyOf ? 0 : 0xFFFFFFFF;
-      if (!SignExt0 || !SignExt1) {
-        assert(IsAnyOf && "Only perform v16i16 signmasks for any_of patterns");
-        Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
-                             DAG.getConstant(0xAAAAAAAA, DL, MVT::i32));
+      if (SDValue Src = getSplitVectorSrc(VecOp0, VecOp1, true)) {
+        SDLoc DL(EFLAGS);
+        SDValue Result = peekThroughBitcasts(Src);
+        if (IsAllOf && Result.getOpcode() == X86ISD::PCMPEQ) {
+          SDValue V = DAG.getNode(ISD::SUB, DL, Result.getValueType(),
+                                  Result.getOperand(0), Result.getOperand(1));
+          V = DAG.getBitcast(MVT::v4i64, V);
+          return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V);
+        }
+        Result = DAG.getBitcast(MVT::v32i8, Result);
+        Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
+        unsigned CmpMask = IsAnyOf ? 0 : 0xFFFFFFFF;
+        if (!SignExt0 || !SignExt1) {
+          assert(IsAnyOf &&
+                 "Only perform v16i16 signmasks for any_of patterns");
+          Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result,
+                               DAG.getConstant(0xAAAAAAAA, DL, MVT::i32));
+        }
+        return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
+                           DAG.getConstant(CmpMask, DL, MVT::i32));
       }
-      return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result,
-                         DAG.getConstant(CmpMask, DL, MVT::i32));
     }
   }
 
@@ -45582,33 +45603,28 @@ static SDValue combineHorizOpWithShuffle(SDNode *N, SelectionDAG &DAG,
   // truncation trees that help us avoid lane crossing shuffles.
   // TODO: There's a lot more we can do for PACK/HADD style shuffle combines.
   // TODO: We don't handle vXf64 shuffles yet.
-  if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32 &&
-      BC0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
-      BC1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
-      BC0.getOperand(0) == BC1.getOperand(0) &&
-      BC0.getOperand(0).getValueType().is256BitVector() &&
-      BC0.getConstantOperandAPInt(1) == 0 &&
-      BC1.getConstantOperandAPInt(1) ==
-          BC0.getValueType().getVectorNumElements()) {
-    SmallVector<SDValue> ShuffleOps;
-    SmallVector<int> ShuffleMask, ScaledMask;
-    SDValue Vec = peekThroughBitcasts(BC0.getOperand(0));
-    if (getTargetShuffleInputs(Vec, ShuffleOps, ShuffleMask, DAG)) {
-      resolveTargetShuffleInputsAndMask(ShuffleOps, ShuffleMask);
-      // To keep the HOP LHS/RHS coherency, we must be able to scale the unary
-      // shuffle to a v4X64 width - we can probably relax this in the future.
-      if (!isAnyZero(ShuffleMask) && ShuffleOps.size() == 1 &&
-          ShuffleOps[0].getValueType().is256BitVector() &&
-          scaleShuffleElements(ShuffleMask, 4, ScaledMask)) {
-        SDValue Lo, Hi;
-        MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
-        std::tie(Lo, Hi) = DAG.SplitVector(ShuffleOps[0], DL);
-        Lo = DAG.getBitcast(SrcVT, Lo);
-        Hi = DAG.getBitcast(SrcVT, Hi);
-        SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi);
-        Res = DAG.getBitcast(ShufVT, Res);
-        Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ScaledMask);
-        return DAG.getBitcast(VT, Res);
+  if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32) {
+    if (SDValue BCSrc = getSplitVectorSrc(BC0, BC1, false)) {
+      SmallVector<SDValue> ShuffleOps;
+      SmallVector<int> ShuffleMask, ScaledMask;
+      SDValue Vec = peekThroughBitcasts(BCSrc);
+      if (getTargetShuffleInputs(Vec, ShuffleOps, ShuffleMask, DAG)) {
+        resolveTargetShuffleInputsAndMask(ShuffleOps, ShuffleMask);
+        // To keep the HOP LHS/RHS coherency, we must be able to scale the unary
+        // shuffle to a v4X64 width - we can probably relax this in the future.
+        if (!isAnyZero(ShuffleMask) && ShuffleOps.size() == 1 &&
+            ShuffleOps[0].getValueType().is256BitVector() &&
+            scaleShuffleElements(ShuffleMask, 4, ScaledMask)) {
+          SDValue Lo, Hi;
+          MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32;
+          std::tie(Lo, Hi) = DAG.SplitVector(ShuffleOps[0], DL);
+          Lo = DAG.getBitcast(SrcVT, Lo);
+          Hi = DAG.getBitcast(SrcVT, Hi);
+          SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi);
+          Res = DAG.getBitcast(ShufVT, Res);
+          Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ScaledMask);
+          return DAG.getBitcast(VT, Res);
+        }
       }
     }
   }

From ef8206320769ad31422a803a0d6de6077fd231d2 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Wed, 26 Jan 2022 10:11:12 -0500
Subject: [PATCH 742/946] Rename llvm::array_lengthof into llvm::size to match
 std::size from C++17

As a conquence move llvm::array_lengthof from STLExtras.h to
STLForwardCompat.h (which is included by STLExtras.h so no build
breakage expected).
---
 clang/include/clang/AST/OpenMPClause.h        | 12 ++---
 clang/lib/AST/ASTContext.cpp                  |  2 +-
 clang/lib/AST/AttrDocTable.cpp                |  2 +-
 clang/lib/AST/CommentCommandTraits.cpp        |  6 +--
 clang/lib/Basic/DiagnosticIDs.cpp             |  4 +-
 clang/lib/Basic/Targets/NVPTX.h               |  2 +-
 clang/lib/CodeGen/CGExpr.cpp                  |  2 +-
 clang/lib/CodeGen/CGObjC.cpp                  |  2 +-
 clang/lib/Driver/Compilation.cpp              |  2 +-
 clang/lib/Driver/ToolChain.cpp                |  2 +-
 clang/lib/Driver/ToolChains/Darwin.cpp        |  8 ++--
 clang/lib/Driver/Types.cpp                    |  2 +-
 .../lib/Frontend/PrintPreprocessedOutput.cpp  |  2 +-
 clang/lib/Parse/ParseDecl.cpp                 |  4 +-
 clang/lib/Parse/ParseOpenMP.cpp               |  2 +-
 clang/lib/Sema/DeclSpec.cpp                   |  4 +-
 clang/lib/Sema/ParsedAttr.cpp                 |  2 +-
 clang/lib/Sema/SemaLookup.cpp                 |  6 +--
 clang/lib/Sema/TreeTransform.h                |  2 +-
 clang/test/Analysis/templates.cpp             |  4 +-
 clang/test/SemaTemplate/instantiate-init.cpp  | 10 ++--
 clang/unittests/AST/CommentLexer.cpp          | 48 +++++++++----------
 clang/unittests/AST/CommentParser.cpp         | 36 +++++++-------
 clang/unittests/AST/DeclPrinterTest.cpp       |  4 +-
 .../Tooling/CompilationDatabaseTest.cpp       |  2 +-
 lld/COFF/Chunks.cpp                           |  2 +-
 lldb/source/DataFormatters/FormatManager.cpp  |  2 +-
 lldb/source/Host/common/MainLoop.cpp          |  2 +-
 .../windows/ConnectionGenericFileWindows.cpp  |  2 +-
 .../source/Interpreter/CommandInterpreter.cpp |  2 +-
 lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp |  4 +-
 lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp   |  4 +-
 lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp |  4 +-
 .../Plugins/ABI/Mips/ABISysV_mips64.cpp       |  2 +-
 .../Plugins/ABI/PowerPC/ABISysV_ppc.cpp       |  2 +-
 .../Plugins/ABI/PowerPC/ABISysV_ppc64.cpp     |  4 +-
 .../Plugins/ABI/SystemZ/ABISysV_s390x.cpp     |  2 +-
 .../DynamicLoaderDarwinKernel.cpp             |  2 +-
 .../Instruction/ARM/EmulateInstructionARM.cpp |  4 +-
 .../ARM64/EmulateInstructionARM64.cpp         |  4 +-
 .../MIPS/EmulateInstructionMIPS.cpp           |  2 +-
 .../MIPS64/EmulateInstructionMIPS64.cpp       |  2 +-
 .../PPC64/EmulateInstructionPPC64.cpp         |  4 +-
 .../AppleObjCTrampolineHandler.cpp            |  4 +-
 .../ObjectFile/PECOFF/ObjectFilePECOFF.cpp    |  2 +-
 .../ObjectFile/PECOFF/PECallFrameInfo.cpp     |  4 +-
 .../Platform/MacOSX/PlatformDarwin.cpp        |  4 +-
 .../Utility/RegisterContextDarwin_arm.cpp     | 10 ++--
 .../Utility/RegisterContextDarwin_arm64.cpp   | 10 ++--
 .../Utility/RegisterContextDarwin_i386.cpp    | 10 ++--
 .../Utility/RegisterContextDarwin_x86_64.cpp  | 10 ++--
 .../Utility/RegisterContextWindows_i386.cpp   |  4 +-
 .../Utility/RegisterContextWindows_x86_64.cpp |  4 +-
 .../NativeRegisterContextWindows_WoW64.cpp    |  2 +-
 .../NativeRegisterContextWindows_arm.cpp      |  4 +-
 .../NativeRegisterContextWindows_arm64.cpp    |  4 +-
 .../NativeRegisterContextWindows_i386.cpp     |  2 +-
 .../NativeRegisterContextWindows_x86_64.cpp   |  4 +-
 .../Common/arm/RegisterContextWindows_arm.cpp | 10 ++--
 .../arm64/RegisterContextWindows_arm64.cpp    | 10 ++--
 .../Common/x64/RegisterContextWindows_x64.cpp | 10 ++--
 .../Common/x86/RegisterContextWindows_x86.cpp |  8 ++--
 .../minidump/RegisterContextMinidump_ARM.cpp  | 12 ++---
 .../RegisterContextMinidump_ARM64.cpp         |  8 ++--
 lldb/source/Utility/ArchSpec.cpp              | 16 +++----
 lldb/unittests/Utility/StatusTest.cpp         |  2 +-
 llvm/include/llvm/ADT/STLExtras.h             |  6 ---
 llvm/include/llvm/ADT/STLForwardCompat.h      | 10 ++++
 llvm/include/llvm/CodeGen/TargetLowering.h    | 24 +++++-----
 llvm/include/llvm/MC/SubtargetFeature.h       |  6 +--
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  8 ++--
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  8 ++--
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |  2 +-
 llvm/lib/DWP/DWP.cpp                          |  2 +-
 llvm/lib/IR/DebugInfoMetadata.cpp             |  6 +--
 llvm/lib/MC/MCAsmBackend.cpp                  |  2 +-
 llvm/lib/MC/MCDwarf.cpp                       |  2 +-
 llvm/lib/Object/MachOObjectFile.cpp           |  2 +-
 llvm/lib/Support/ARMAttributeParser.cpp       |  6 +--
 llvm/lib/Support/CrashRecoveryContext.cpp     |  2 +-
 llvm/lib/Support/NativeFormatting.cpp         |  2 +-
 llvm/lib/Support/RISCVAttributeParser.cpp     |  2 +-
 llvm/lib/Support/Triple.cpp                   | 10 ++--
 llvm/lib/Support/Unix/Signals.inc             | 10 ++--
 llvm/lib/Support/Windows/Path.inc             |  2 +-
 llvm/lib/Support/Windows/Signals.inc          |  2 +-
 llvm/lib/Support/X86TargetParser.cpp          | 14 +++---
 llvm/lib/Support/raw_ostream.cpp              |  4 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  4 +-
 .../MCTargetDesc/AArch64InstPrinter.cpp       |  2 +-
 llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp      |  4 +-
 llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp   |  2 +-
 llvm/lib/Target/ARC/ARCISelLowering.cpp       |  8 ++--
 llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp      |  4 +-
 llvm/lib/Target/ARM/ARMFrameLowering.cpp      |  2 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |  2 +-
 llvm/lib/Target/ARM/Thumb2SizeReduction.cpp   |  2 +-
 llvm/lib/Target/AVR/AVRISelLowering.cpp       |  4 +-
 .../Disassembler/HexagonDisassembler.cpp      |  8 ++--
 .../lib/Target/Hexagon/HexagonFrameLowering.h |  2 +-
 .../Target/Hexagon/HexagonHardwareLoops.cpp   |  4 +-
 .../Target/Hexagon/HexagonISelDAGToDAG.cpp    |  2 +-
 .../Target/Hexagon/HexagonISelLowering.cpp    |  2 +-
 .../MCTargetDesc/HexagonMCCodeEmitter.cpp     |  2 +-
 .../MSP430/MCTargetDesc/MSP430AsmBackend.cpp  |  2 +-
 llvm/lib/Target/MSP430/MSP430ISelLowering.cpp |  4 +-
 .../Mips/MCTargetDesc/MipsAsmBackend.cpp      |  4 +-
 llvm/lib/Target/Mips/Mips16ISelLowering.cpp   |  2 +-
 llvm/lib/Target/PowerPC/PPCCallingConv.cpp    |  6 +--
 llvm/lib/Target/PowerPC/PPCFrameLowering.cpp  |  8 ++--
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 20 ++++----
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      |  4 +-
 .../RISCV/MCTargetDesc/RISCVAsmBackend.cpp    |  2 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  4 +-
 .../Sparc/Disassembler/SparcDisassembler.cpp  |  2 +-
 .../Target/SystemZ/SystemZFrameLowering.cpp   |  4 +-
 llvm/lib/Target/VE/VEFrameLowering.h          |  2 +-
 .../lib/Target/X86/AsmParser/X86AsmParser.cpp |  4 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  4 +-
 llvm/lib/Target/XCore/XCoreISelLowering.cpp   |  4 +-
 .../InstCombine/InstCombineAddSub.cpp         |  2 +-
 .../Transforms/Scalar/LoopStrengthReduce.cpp  |  2 +-
 llvm/lib/Transforms/Utils/MetaRenamer.cpp     |  2 +-
 llvm/tools/llvm-config/llvm-config.cpp        |  4 +-
 llvm/tools/llvm-objdump/COFFDump.cpp          |  2 +-
 llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp   |  4 +-
 llvm/tools/llvm-readobj/ELFDumper.cpp         |  2 +-
 llvm/unittests/ADT/APFloatTest.cpp            | 20 ++++----
 llvm/unittests/ADT/StringRefTest.cpp          | 12 ++---
 llvm/unittests/ADT/TinyPtrVectorTest.cpp      |  2 +-
 .../ProfileData/CoverageMappingTest.cpp       |  4 +-
 llvm/unittests/Support/BinaryStreamTest.cpp   |  2 +-
 llvm/unittests/Support/CommandLineTest.cpp    | 10 ++--
 llvm/unittests/Support/FormatVariadicTest.cpp |  2 +-
 llvm/unittests/Support/TargetParserTest.cpp   | 12 ++---
 llvm/utils/KillTheDoctor/KillTheDoctor.cpp    |  6 +--
 llvm/utils/TableGen/AsmWriterEmitter.cpp      |  2 +-
 llvm/utils/TableGen/CodeGenTarget.cpp         |  2 +-
 llvm/utils/TableGen/RegisterInfoEmitter.cpp   |  4 +-
 llvm/utils/TableGen/X86DisassemblerTables.cpp |  2 +-
 .../mlir/Dialect/SPIRV/IR/SPIRVBase.td        |  4 +-
 mlir/lib/Dialect/SPIRV/IR/SPIRVEnums.cpp      |  6 +--
 mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp      | 22 ++++-----
 143 files changed, 386 insertions(+), 382 deletions(-)

diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 3ecc1d40fafc6..e3f975c906433 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -5772,11 +5772,11 @@ class OMPMapClause final : public OMPMappableExprListClause<OMPMapClause>,
                                   /*SupportsMapper=*/true, &MapperQualifierLoc,
                                   &MapperIdInfo),
         MapType(MapType), MapTypeIsImplicit(MapTypeIsImplicit), MapLoc(MapLoc) {
-    assert(llvm::array_lengthof(MapTypeModifiers) == MapModifiers.size() &&
+    assert(llvm::size(MapTypeModifiers) == MapModifiers.size() &&
            "Unexpected number of map type modifiers.");
     llvm::copy(MapModifiers, std::begin(MapTypeModifiers));
 
-    assert(llvm::array_lengthof(MapTypeModifiersLoc) ==
+    assert(llvm::size(MapTypeModifiersLoc) ==
                MapModifiersLoc.size() &&
            "Unexpected number of map type modifier locations.");
     llvm::copy(MapModifiersLoc, std::begin(MapTypeModifiersLoc));
@@ -6694,11 +6694,11 @@ class OMPToClause final : public OMPMappableExprListClause<OMPToClause>,
       : OMPMappableExprListClause(llvm::omp::OMPC_to, Locs, Sizes,
                                   /*SupportsMapper=*/true, &MapperQualifierLoc,
                                   &MapperIdInfo) {
-    assert(llvm::array_lengthof(MotionModifiers) == TheMotionModifiers.size() &&
+    assert(llvm::size(MotionModifiers) == TheMotionModifiers.size() &&
            "Unexpected number of motion modifiers.");
     llvm::copy(TheMotionModifiers, std::begin(MotionModifiers));
 
-    assert(llvm::array_lengthof(MotionModifiersLoc) ==
+    assert(llvm::size(MotionModifiersLoc) ==
                TheMotionModifiersLoc.size() &&
            "Unexpected number of motion modifier locations.");
     llvm::copy(TheMotionModifiersLoc, std::begin(MotionModifiersLoc));
@@ -6896,11 +6896,11 @@ class OMPFromClause final
       : OMPMappableExprListClause(llvm::omp::OMPC_from, Locs, Sizes,
                                   /*SupportsMapper=*/true, &MapperQualifierLoc,
                                   &MapperIdInfo) {
-    assert(llvm::array_lengthof(MotionModifiers) == TheMotionModifiers.size() &&
+    assert(llvm::size(MotionModifiers) == TheMotionModifiers.size() &&
            "Unexpected number of motion modifiers.");
     llvm::copy(TheMotionModifiers, std::begin(MotionModifiers));
 
-    assert(llvm::array_lengthof(MotionModifiersLoc) ==
+    assert(llvm::size(MotionModifiersLoc) ==
                TheMotionModifiersLoc.size() &&
            "Unexpected number of motion modifier locations.");
     llvm::copy(TheMotionModifiersLoc, std::begin(MotionModifiersLoc));
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 9d63724c919a0..2111e9380167c 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -6594,7 +6594,7 @@ QualType ASTContext::getPromotedIntegerType(QualType Promotable) const {
       uint64_t FromSize = getTypeSize(BT);
       QualType PromoteTypes[] = { IntTy, UnsignedIntTy, LongTy, UnsignedLongTy,
                                   LongLongTy, UnsignedLongLongTy };
-      for (size_t Idx = 0; Idx < llvm::array_lengthof(PromoteTypes); ++Idx) {
+      for (size_t Idx = 0; Idx < llvm::size(PromoteTypes); ++Idx) {
         uint64_t ToSize = getTypeSize(PromoteTypes[Idx]);
         if (FromSize < ToSize ||
             (FromSize == ToSize &&
diff --git a/clang/lib/AST/AttrDocTable.cpp b/clang/lib/AST/AttrDocTable.cpp
index 3bfedac8b8f13..8f7e7eaf0452c 100644
--- a/clang/lib/AST/AttrDocTable.cpp
+++ b/clang/lib/AST/AttrDocTable.cpp
@@ -21,7 +21,7 @@ static const llvm::StringRef AttrDoc[] = {
 };
 
 llvm::StringRef clang::Attr::getDocumentation(clang::attr::Kind K) {
-  if(K < llvm::array_lengthof(AttrDoc))
+  if(K < llvm::size(AttrDoc))
     return AttrDoc[K];
   return "";
 }
diff --git a/clang/lib/AST/CommentCommandTraits.cpp b/clang/lib/AST/CommentCommandTraits.cpp
index bdc0dd47fb7d2..68f8006261f14 100644
--- a/clang/lib/AST/CommentCommandTraits.cpp
+++ b/clang/lib/AST/CommentCommandTraits.cpp
@@ -17,7 +17,7 @@ namespace comments {
 
 CommandTraits::CommandTraits(llvm::BumpPtrAllocator &Allocator,
                              const CommentOptions &CommentOptions) :
-    NextID(llvm::array_lengthof(Commands)), Allocator(Allocator) {
+    NextID(llvm::size(Commands)), Allocator(Allocator) {
   registerCommentOptions(CommentOptions);
 }
 
@@ -115,7 +115,7 @@ const CommandInfo *CommandTraits::registerBlockCommand(StringRef CommandName) {
 
 const CommandInfo *CommandTraits::getBuiltinCommandInfo(
                                                   unsigned CommandID) {
-  if (CommandID < llvm::array_lengthof(Commands))
+  if (CommandID < llvm::size(Commands))
     return &Commands[CommandID];
   return nullptr;
 }
@@ -131,7 +131,7 @@ const CommandInfo *CommandTraits::getRegisteredCommandInfo(
 
 const CommandInfo *CommandTraits::getRegisteredCommandInfo(
                                                   unsigned CommandID) const {
-  return RegisteredCommands[CommandID - llvm::array_lengthof(Commands)];
+  return RegisteredCommands[CommandID - llvm::size(Commands)];
 }
 
 } // end namespace comments
diff --git a/clang/lib/Basic/DiagnosticIDs.cpp b/clang/lib/Basic/DiagnosticIDs.cpp
index 87db131992e47..3e6204a07c297 100644
--- a/clang/lib/Basic/DiagnosticIDs.cpp
+++ b/clang/lib/Basic/DiagnosticIDs.cpp
@@ -202,7 +202,7 @@ const StaticDiagInfoRec StaticDiagInfo[] = {
 
 } // namespace
 
-static const unsigned StaticDiagInfoSize = llvm::array_lengthof(StaticDiagInfo);
+static const unsigned StaticDiagInfoSize = llvm::size(StaticDiagInfo);
 
 /// GetDiagInfo - Return the StaticDiagInfoRec entry for the specified DiagID,
 /// or null if the ID is invalid.
@@ -317,7 +317,7 @@ static const StaticDiagCategoryRec CategoryNameTable[] = {
 
 /// getNumberOfCategories - Return the number of categories
 unsigned DiagnosticIDs::getNumberOfCategories() {
-  return llvm::array_lengthof(CategoryNameTable) - 1;
+  return llvm::size(CategoryNameTable) - 1;
 }
 
 /// getCategoryNameFromID - Given a category ID, return the name of the
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
index 589f24f4bb03e..83f220dcb8b35 100644
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@@ -159,7 +159,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
   /// DWARF.
   Optional<unsigned>
   getDWARFAddressSpace(unsigned AddressSpace) const override {
-    if (AddressSpace >= llvm::array_lengthof(NVPTXDWARFAddrSpaceMap) ||
+    if (AddressSpace >= llvm::size(NVPTXDWARFAddrSpaceMap) ||
         NVPTXDWARFAddrSpaceMap[AddressSpace] < 0)
       return llvm::None;
     return NVPTXDWARFAddrSpaceMap[AddressSpace];
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 0fb7ec26a85e5..888f9e275dd15 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3206,7 +3206,7 @@ void CodeGenFunction::EmitCheck(
   assert(IsSanitizerScope);
   assert(Checked.size() > 0);
   assert(CheckHandler >= 0 &&
-         size_t(CheckHandler) < llvm::array_lengthof(SanitizerHandlers));
+         size_t(CheckHandler) < llvm::size(SanitizerHandlers));
   const StringRef CheckName = SanitizerHandlers[CheckHandler].Name;
 
   llvm::Value *FatalCond = nullptr;
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 8cc609186f9e9..a2c8f8ef10665 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -1749,7 +1749,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
     &CGM.getContext().Idents.get("count")
   };
   Selector FastEnumSel =
-    CGM.getContext().Selectors.getSelector(llvm::array_lengthof(II), &II[0]);
+    CGM.getContext().Selectors.getSelector(llvm::size(II), &II[0]);
 
   QualType ItemsTy =
     getContext().getConstantArrayType(getContext().getObjCIdType(),
diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
index 67d941c6c2ab9..c479375157c26 100644
--- a/clang/lib/Driver/Compilation.cpp
+++ b/clang/lib/Driver/Compilation.cpp
@@ -278,7 +278,7 @@ void Compilation::initCompilationForDiagnostics() {
       options::OPT_o,  options::OPT_MD, options::OPT_MMD, options::OPT_M,
       options::OPT_MM, options::OPT_MF, options::OPT_MG,  options::OPT_MJ,
       options::OPT_MQ, options::OPT_MT, options::OPT_MV};
-  for (unsigned i = 0, e = llvm::array_lengthof(OutputOpts); i != e; ++i) {
+  for (unsigned i = 0, e = llvm::size(OutputOpts); i != e; ++i) {
     if (TranslatedArgs->hasArg(OutputOpts[i]))
       TranslatedArgs->eraseArg(OutputOpts[i]);
   }
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 7551ee4aeb79f..cc6f7210a8d07 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -151,7 +151,7 @@ static const DriverSuffix *FindDriverSuffix(StringRef ProgName, size_t &Pos) {
       {"flang", "--driver-mode=flang"},
   };
 
-  for (size_t i = 0; i < llvm::array_lengthof(DriverSuffixes); ++i) {
+  for (size_t i = 0; i < llvm::size(DriverSuffixes); ++i) {
     StringRef Suffix(DriverSuffixes[i].Suffix);
     if (ProgName.endswith(Suffix)) {
       Pos = ProgName.size() - Suffix.size();
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index f7da3f187814f..8fed21586959d 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -1694,7 +1694,7 @@ getDeploymentTargetFromEnvironmentVariables(const Driver &TheDriver,
       "TVOS_DEPLOYMENT_TARGET",
       "WATCHOS_DEPLOYMENT_TARGET",
   };
-  static_assert(llvm::array_lengthof(EnvVars) == Darwin::LastDarwinPlatform + 1,
+  static_assert(llvm::size(EnvVars) == Darwin::LastDarwinPlatform + 1,
                 "Missing platform");
   for (const auto &I : llvm::enumerate(llvm::makeArrayRef(EnvVars))) {
     if (char *Env = ::getenv(I.value()))
@@ -1715,11 +1715,11 @@ getDeploymentTargetFromEnvironmentVariables(const Driver &TheDriver,
           Targets[Darwin::TvOS] = "";
   } else {
     // Don't allow conflicts in any other platform.
-    unsigned FirstTarget = llvm::array_lengthof(Targets);
-    for (unsigned I = 0; I != llvm::array_lengthof(Targets); ++I) {
+    unsigned FirstTarget = llvm::size(Targets);
+    for (unsigned I = 0; I != llvm::size(Targets); ++I) {
       if (Targets[I].empty())
         continue;
-      if (FirstTarget == llvm::array_lengthof(Targets))
+      if (FirstTarget == llvm::size(Targets))
         FirstTarget = I;
       else
         TheDriver.Diag(diag::err_drv_conflicting_deployment_targets)
diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp
index 1bd187ad2fc0a..c5c17e1378fbc 100644
--- a/clang/lib/Driver/Types.cpp
+++ b/clang/lib/Driver/Types.cpp
@@ -42,7 +42,7 @@ static constexpr TypeInfo TypeInfos[] = {
 #include "clang/Driver/Types.def"
 #undef TYPE
 };
-static const unsigned numTypes = llvm::array_lengthof(TypeInfos);
+static const unsigned numTypes = llvm::size(TypeInfos);
 
 static const TypeInfo &getInfo(unsigned id) {
   assert(id > 0 && id - 1 < numTypes && "Invalid Type ID.");
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 1d0022bda474c..23d2bec11dcf9 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -863,7 +863,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
     } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
                Tok.getLiteralData()) {
       OS.write(Tok.getLiteralData(), Tok.getLength());
-    } else if (Tok.getLength() < llvm::array_lengthof(Buffer)) {
+    } else if (Tok.getLength() < llvm::size(Buffer)) {
       const char *TokPtr = Buffer;
       unsigned Len = PP.getSpelling(Tok, TokPtr);
       OS.write(TokPtr, Len);
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index f21938c81689b..d05169988f2f8 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -1365,7 +1365,7 @@ void Parser::ParseExternalSourceSymbolAttribute(
   ArgsUnion Args[] = {Language.get(), DefinedInExpr.get(),
                       GeneratedDeclaration};
   Attrs.addNew(&ExternalSourceSymbol, SourceRange(Loc, T.getCloseLocation()),
-               ScopeName, ScopeLoc, Args, llvm::array_lengthof(Args), Syntax);
+               ScopeName, ScopeLoc, Args, llvm::size(Args), Syntax);
 }
 
 /// Parse the contents of the "objc_bridge_related" attribute.
@@ -1493,7 +1493,7 @@ void Parser::ParseSwiftNewTypeAttribute(
 
   ArgsUnion Args[] = {SwiftType};
   Attrs.addNew(&AttrName, SourceRange(AttrNameLoc, T.getCloseLocation()),
-               ScopeName, ScopeLoc, Args, llvm::array_lengthof(Args), Syntax);
+               ScopeName, ScopeLoc, Args, llvm::size(Args), Syntax);
 }
 
 
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index de3d58baf84c9..d13f6e080630e 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -188,7 +188,7 @@ static OpenMPDirectiveKindExWrapper parseOpenMPDirectiveKind(Parser &P) {
   if (DKind == OMPD_unknown)
     return OMPD_unknown;
 
-  for (unsigned I = 0; I < llvm::array_lengthof(F); ++I) {
+  for (unsigned I = 0; I < llvm::size(F); ++I) {
     if (DKind != F[I][0])
       continue;
 
diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp
index d4dc790c008a2..c595237401e64 100644
--- a/clang/lib/Sema/DeclSpec.cpp
+++ b/clang/lib/Sema/DeclSpec.cpp
@@ -238,7 +238,7 @@ DeclaratorChunk DeclaratorChunk::getFunction(bool hasProto,
     // is already used (consider a function returning a function pointer) or too
     // small (function with too many parameters), go to the heap.
     if (!TheDeclarator.InlineStorageUsed &&
-        NumParams <= llvm::array_lengthof(TheDeclarator.InlineParams)) {
+        NumParams <= llvm::size(TheDeclarator.InlineParams)) {
       I.Fun.Params = TheDeclarator.InlineParams;
       new (I.Fun.Params) ParamInfo[NumParams];
       I.Fun.DeleteParams = false;
@@ -308,7 +308,7 @@ void Declarator::setDecompositionBindings(
   // Allocate storage for bindings and stash them away.
   if (Bindings.size()) {
     if (!InlineStorageUsed &&
-        Bindings.size() <= llvm::array_lengthof(InlineBindings)) {
+        Bindings.size() <= llvm::size(InlineBindings)) {
       BindingGroup.Bindings = InlineBindings;
       BindingGroup.DeleteBindings = false;
       InlineStorageUsed = true;
diff --git a/clang/lib/Sema/ParsedAttr.cpp b/clang/lib/Sema/ParsedAttr.cpp
index 045847d0ce0fd..5c533f11bf324 100644
--- a/clang/lib/Sema/ParsedAttr.cpp
+++ b/clang/lib/Sema/ParsedAttr.cpp
@@ -111,7 +111,7 @@ namespace {
 
 const ParsedAttrInfo &ParsedAttrInfo::get(const AttributeCommonInfo &A) {
   // If we have a ParsedAttrInfo for this ParsedAttr then return that.
-  if ((size_t)A.getParsedKind() < llvm::array_lengthof(AttrInfoMap))
+  if ((size_t)A.getParsedKind() < llvm::size(AttrInfoMap))
     return *AttrInfoMap[A.getParsedKind()];
 
   // If this is an ignored attribute then return an appropriate ParsedAttrInfo.
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index af6ee24240ceb..219f2c0f8a31f 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -4726,7 +4726,7 @@ static void AddKeywordsToConsumer(Sema &SemaRef,
       "extern", "inline", "static", "typedef"
     };
 
-    const unsigned NumCTypeSpecs = llvm::array_lengthof(CTypeSpecs);
+    const unsigned NumCTypeSpecs = llvm::size(CTypeSpecs);
     for (unsigned I = 0; I != NumCTypeSpecs; ++I)
       Consumer.addKeywordResult(CTypeSpecs[I]);
 
@@ -4780,7 +4780,7 @@ static void AddKeywordsToConsumer(Sema &SemaRef,
       static const char *const CXXExprs[] = {
         "delete", "new", "operator", "throw", "typeid"
       };
-      const unsigned NumCXXExprs = llvm::array_lengthof(CXXExprs);
+      const unsigned NumCXXExprs = llvm::size(CXXExprs);
       for (unsigned I = 0; I != NumCXXExprs; ++I)
         Consumer.addKeywordResult(CXXExprs[I]);
 
@@ -4806,7 +4806,7 @@ static void AddKeywordsToConsumer(Sema &SemaRef,
       // Statements.
       static const char *const CStmts[] = {
         "do", "else", "for", "goto", "if", "return", "switch", "while" };
-      const unsigned NumCStmts = llvm::array_lengthof(CStmts);
+      const unsigned NumCStmts = llvm::size(CStmts);
       for (unsigned I = 0; I != NumCStmts; ++I)
         Consumer.addKeywordResult(CStmts[I]);
 
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index e43b3ca968ebb..8a0e26fca91e7 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -14357,7 +14357,7 @@ TreeTransform<Derived>::RebuildArrayType(QualType ElementType,
     SemaRef.Context.UnsignedIntTy, SemaRef.Context.UnsignedLongTy,
     SemaRef.Context.UnsignedLongLongTy, SemaRef.Context.UnsignedInt128Ty
   };
-  const unsigned NumTypes = llvm::array_lengthof(Types);
+  const unsigned NumTypes = llvm::size(Types);
   QualType SizeType;
   for (unsigned I = 0; I != NumTypes; ++I)
     if (Size->getBitWidth() == SemaRef.Context.getIntWidth(Types[I])) {
diff --git a/clang/test/Analysis/templates.cpp b/clang/test/Analysis/templates.cpp
index e7c30a764f72d..2c5b22d24119c 100644
--- a/clang/test/Analysis/templates.cpp
+++ b/clang/test/Analysis/templates.cpp
@@ -34,13 +34,13 @@ int main(){
 
 // <rdar://problem/11949235>
 template<class T, unsigned N>
-inline unsigned array_lengthof(T (&)[N]) {
+inline unsigned size(T (&)[N]) {
   return N;
 }
 
 void testNonTypeTemplateInstantiation() {
   const char *S[] = { "a", "b" };
-  clang_analyzer_eval(array_lengthof(S) == 2);
+  clang_analyzer_eval(size(S) == 2);
 #ifndef NO_INLINE
   // expected-warning@-2 {{TRUE}}
 #else
diff --git a/clang/test/SemaTemplate/instantiate-init.cpp b/clang/test/SemaTemplate/instantiate-init.cpp
index 6db33a972f86d..52f2b416e64a3 100644
--- a/clang/test/SemaTemplate/instantiate-init.cpp
+++ b/clang/test/SemaTemplate/instantiate-init.cpp
@@ -86,7 +86,7 @@ namespace PR7985 {
   template<int N> struct integral_c { };
 
   template <typename T, int N>
-  integral_c<N> array_lengthof(T (&x)[N]) { return integral_c<N>(); } // expected-note 2{{candidate template ignored: could not match 'T[N]' against 'const Data<}}
+  integral_c<N> size(T (&x)[N]) { return integral_c<N>(); } // expected-note 2{{candidate template ignored: could not match 'T[N]' against 'const Data<}}
 
   template<typename T>
   struct Data {
@@ -105,14 +105,14 @@ namespace PR7985 {
   const Data<float*> Description<float*>::data[];
 
   void test() {
-    integral_c<1> ic1 = array_lengthof(Description<int>::data);
-    (void)sizeof(array_lengthof(Description<float>::data));
+    integral_c<1> ic1 = size(Description<int>::data);
+    (void)sizeof(size(Description<float>::data));
 
-    (void)sizeof(array_lengthof( // expected-error{{no matching function for call to 'array_lengthof'}}
+    (void)sizeof(size( // expected-error{{no matching function for call to 'size'}}
                           Description<int*>::data // expected-note{{in instantiation of static data member 'PR7985::Description<int *>::data' requested here}}
                           ));
 
-    array_lengthof(Description<float*>::data); // expected-error{{no matching function for call to 'array_lengthof'}}
+    size(Description<float*>::data); // expected-error{{no matching function for call to 'size'}}
   }
 }
 
diff --git a/clang/unittests/AST/CommentLexer.cpp b/clang/unittests/AST/CommentLexer.cpp
index 456473e1753b6..9540ebfe32e52 100644
--- a/clang/unittests/AST/CommentLexer.cpp
+++ b/clang/unittests/AST/CommentLexer.cpp
@@ -91,7 +91,7 @@ TEST_F(CommentLexerTest, Basic2) {
   const char *Sources[] = {
     "//", "///", "//!", "///<", "//!<"
   };
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -107,7 +107,7 @@ TEST_F(CommentLexerTest, Basic3) {
   const char *Sources[] = {
     "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/"
   };
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -126,7 +126,7 @@ TEST_F(CommentLexerTest, Basic4) {
     "// Meow\n", "// Meow\r\n", "//! Meow\r",
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -146,7 +146,7 @@ TEST_F(CommentLexerTest, Basic5) {
     "/* Meow*/", "/** Meow*/",  "/*! Meow*/"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -169,7 +169,7 @@ TEST_F(CommentLexerTest, Basic6) {
     "// Aaa\\\r"   " Bbb\\ \r"   " Ccc?" "?/\r"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -248,7 +248,7 @@ TEST_F(CommentLexerTest, Basic7) {
 // A command marker followed by comment end.
 TEST_F(CommentLexerTest, DoxygenCommand1) {
   const char *Sources[] = { "//@", "///@", "//!@" };
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -265,7 +265,7 @@ TEST_F(CommentLexerTest, DoxygenCommand1) {
 // A command marker followed by comment end.
 TEST_F(CommentLexerTest, DoxygenCommand2) {
   const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"};
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -283,7 +283,7 @@ TEST_F(CommentLexerTest, DoxygenCommand2) {
 // A command marker followed by comment end.
 TEST_F(CommentLexerTest, DoxygenCommand3) {
   const char *Sources[] = { "/*\\*/", "/**\\*/" };
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -311,12 +311,12 @@ TEST_F(CommentLexerTest, DoxygenCommand4) {
     "::", ""
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
 
-    ASSERT_EQ(array_lengthof(Text), Toks.size());
+    ASSERT_EQ(size(Text), Toks.size());
 
     for (size_t j = 0, e = Toks.size(); j != e; j++) {
       if(Toks[j].is(tok::text)) {
@@ -578,7 +578,7 @@ TEST_F(CommentLexerTest, VerbatimBlock1) {
     "/** \\verbatim\\endverbatim*/"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -645,7 +645,7 @@ TEST_F(CommentLexerTest, VerbatimBlock4) {
     "/** Meow \\verbatim aaa \\endverbatim*/"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -676,7 +676,7 @@ TEST_F(CommentLexerTest, VerbatimBlock5) {
     "/** Meow \\verbatim aaa */"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -901,7 +901,7 @@ TEST_F(CommentLexerTest, VerbatimLine1) {
     "/** \\fn*/"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -926,7 +926,7 @@ TEST_F(CommentLexerTest, VerbatimLine2) {
     "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1052,7 +1052,7 @@ TEST_F(CommentLexerTest, HTML4) {
     "// <img "
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1169,7 +1169,7 @@ TEST_F(CommentLexerTest, HTML9) {
     "// <img src "
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1195,7 +1195,7 @@ TEST_F(CommentLexerTest, HTML10) {
     "// <img src ="
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1225,7 +1225,7 @@ TEST_F(CommentLexerTest, HTML11) {
     "// <img src = \'"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1284,7 +1284,7 @@ TEST_F(CommentLexerTest, HTML13) {
     "// <img src=\'val\\\"\\'val\'"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1315,7 +1315,7 @@ TEST_F(CommentLexerTest, HTML14) {
     "// <img src=\'val\\\"\\'val\'>"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1348,7 +1348,7 @@ TEST_F(CommentLexerTest, HTML15) {
     "// <img />"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1373,7 +1373,7 @@ TEST_F(CommentLexerTest, HTML16) {
     "// <img / Aaa"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1797,7 +1797,7 @@ TEST_F(CommentLexerTest, HTMLCharacterReferences16) {
     "// &#X3D;"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
diff --git a/clang/unittests/AST/CommentParser.cpp b/clang/unittests/AST/CommentParser.cpp
index 62c461a00d16c..0de20dd32cb0b 100644
--- a/clang/unittests/AST/CommentParser.cpp
+++ b/clang/unittests/AST/CommentParser.cpp
@@ -664,7 +664,7 @@ TEST_F(CommentParserTest, ParagraphSplitting1) {
     "*/"),
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -801,7 +801,7 @@ TEST_F(CommentParserTest, ParamCommand3) {
     "// Bbb\n")
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -831,7 +831,7 @@ TEST_F(CommentParserTest, ParamCommand4) {
     "// Bbb\n"),
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -861,7 +861,7 @@ TEST_F(CommentParserTest, ParamCommand5) {
     "// Bbb\n"),
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -892,7 +892,7 @@ TEST_F(CommentParserTest, ParamCommand6) {
     "// Bbb\n"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -947,7 +947,7 @@ TEST_F(CommentParserTest, TParamCommand1) {
     "// Bbb\n"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -1081,7 +1081,7 @@ TEST_F(CommentParserTest, HTML1) {
     "// <a >"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 1));
 
@@ -1103,7 +1103,7 @@ TEST_F(CommentParserTest, HTML2) {
     "// <br />"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 1));
 
@@ -1127,7 +1127,7 @@ TEST_F(CommentParserTest, HTML3) {
     "// <a href >",
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 1));
 
@@ -1149,7 +1149,7 @@ TEST_F(CommentParserTest, HTML4) {
     "// <a href=\"bbb\">",
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 1));
 
@@ -1172,7 +1172,7 @@ TEST_F(CommentParserTest, HTML5) {
     "// </a >"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 1));
 
@@ -1285,7 +1285,7 @@ TEST_F(CommentParserTest, VerbatimBlock5) {
     " *\\endverbatim*/"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 1));
 
@@ -1309,7 +1309,7 @@ TEST_F(CommentParserTest, VerbatimBlock6) {
     " * \\endverbatim*/"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -1336,7 +1336,7 @@ TEST_F(CommentParserTest, VerbatimBlock7) {
     " * \\endverbatim*/"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -1364,7 +1364,7 @@ TEST_F(CommentParserTest, VerbatimBlock8) {
     " * Bbb\n"
     " * \\endverbatim*/"
   };
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -1387,7 +1387,7 @@ TEST_F(CommentParserTest, VerbatimLine1) {
     "// \\fn\n"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -1405,7 +1405,7 @@ TEST_F(CommentParserTest, VerbatimLine2) {
     "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -1424,7 +1424,7 @@ TEST_F(CommentParserTest, Deprecated) {
     "/// @deprecated\n"
   };
 
-  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
+  for (size_t i = 0, e = size(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
diff --git a/clang/unittests/AST/DeclPrinterTest.cpp b/clang/unittests/AST/DeclPrinterTest.cpp
index c2d7d78738f96..628b948c72e48 100644
--- a/clang/unittests/AST/DeclPrinterTest.cpp
+++ b/clang/unittests/AST/DeclPrinterTest.cpp
@@ -731,7 +731,7 @@ TEST(DeclPrinter, TestCXXMethodDecl_Operator1) {
     "()", "[]"
   };
 
-  for (unsigned i = 0, e = llvm::array_lengthof(OperatorNames); i != e; ++i) {
+  for (unsigned i = 0, e = llvm::size(OperatorNames); i != e; ++i) {
     SmallString<128> Code;
     Code.append("struct Z { void operator");
     Code.append(OperatorNames[i]);
@@ -754,7 +754,7 @@ TEST(DeclPrinter, TestCXXMethodDecl_Operator2) {
     "~", "!", "++", "--", "->"
   };
 
-  for (unsigned i = 0, e = llvm::array_lengthof(OperatorNames); i != e; ++i) {
+  for (unsigned i = 0, e = llvm::size(OperatorNames); i != e; ++i) {
     SmallString<128> Code;
     Code.append("struct Z { void operator");
     Code.append(OperatorNames[i]);
diff --git a/clang/unittests/Tooling/CompilationDatabaseTest.cpp b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
index adb9de0c1cbaa..94ccdcafda6d2 100644
--- a/clang/unittests/Tooling/CompilationDatabaseTest.cpp
+++ b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -642,7 +642,7 @@ TEST(ParseFixedCompilationDatabase, HandlesPositionalArgsSyntaxOnly) {
   // Adjust the given command line arguments to ensure that any positional
   // arguments in them are stripped.
   const char *Argv[] = {"--", "somefile.cpp", "-fsyntax-only", "-DDEF3"};
-  int Argc = llvm::array_lengthof(Argv);
+  int Argc = llvm::size(Argv);
   std::string ErrorMessage;
   std::unique_ptr<CompilationDatabase> Database =
       FixedCompilationDatabase::loadFromCommandLine(Argc, Argv, ErrorMessage);
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index 6cabb22d98cf2..e26679ceb730a 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -947,7 +947,7 @@ MergeChunk::MergeChunk(uint32_t alignment)
 void MergeChunk::addSection(COFFLinkerContext &ctx, SectionChunk *c) {
   assert(isPowerOf2_32(c->getAlignment()));
   uint8_t p2Align = llvm::Log2_32(c->getAlignment());
-  assert(p2Align < array_lengthof(ctx.mergeChunkInstances));
+  assert(p2Align < size(ctx.mergeChunkInstances));
   auto *&mc = ctx.mergeChunkInstances[p2Align];
   if (!mc)
     mc = make<MergeChunk>(c->getAlignment());
diff --git a/lldb/source/DataFormatters/FormatManager.cpp b/lldb/source/DataFormatters/FormatManager.cpp
index f07bb9a7136a3..dfc9edefd7c6c 100644
--- a/lldb/source/DataFormatters/FormatManager.cpp
+++ b/lldb/source/DataFormatters/FormatManager.cpp
@@ -77,7 +77,7 @@ static_assert((sizeof(g_format_infos) / sizeof(g_format_infos[0])) ==
                   kNumFormats,
               "All formats must have a corresponding info entry.");
 
-static uint32_t g_num_format_infos = llvm::array_lengthof(g_format_infos);
+static uint32_t g_num_format_infos = llvm::size(g_format_infos);
 
 static bool GetFormatFromFormatChar(char format_char, Format &format) {
   for (uint32_t i = 0; i < g_num_format_infos; ++i) {
diff --git a/lldb/source/Host/common/MainLoop.cpp b/lldb/source/Host/common/MainLoop.cpp
index d36587ce2346e..c7e0abdf7f4e8 100644
--- a/lldb/source/Host/common/MainLoop.cpp
+++ b/lldb/source/Host/common/MainLoop.cpp
@@ -108,7 +108,7 @@ Status MainLoop::RunImpl::Poll() {
     EV_SET(&in_events[i++], fd.first, EVFILT_READ, EV_ADD, 0, 0, 0);
 
   num_events = kevent(loop.m_kqueue, in_events.data(), in_events.size(),
-                      out_events, llvm::array_lengthof(out_events), nullptr);
+                      out_events, llvm::size(out_events), nullptr);
 
   if (num_events < 0) {
     if (errno == EINTR) {
diff --git a/lldb/source/Host/windows/ConnectionGenericFileWindows.cpp b/lldb/source/Host/windows/ConnectionGenericFileWindows.cpp
index 817663d6212e5..bbd471dea076f 100644
--- a/lldb/source/Host/windows/ConnectionGenericFileWindows.cpp
+++ b/lldb/source/Host/windows/ConnectionGenericFileWindows.cpp
@@ -188,7 +188,7 @@ size_t ConnectionGenericFile::Read(void *dst, size_t dst_len,
                     .count()
               : INFINITE;
       DWORD wait_result =
-          ::WaitForMultipleObjects(llvm::array_lengthof(m_event_handles),
+          ::WaitForMultipleObjects(llvm::size(m_event_handles),
                                    m_event_handles, FALSE, milliseconds);
       // All of the events are manual reset events, so make sure we reset them
       // to non-signalled.
diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp
index 59c23716bf89c..9df7c578b6699 100644
--- a/lldb/source/Interpreter/CommandInterpreter.cpp
+++ b/lldb/source/Interpreter/CommandInterpreter.cpp
@@ -549,7 +549,7 @@ void CommandInterpreter::LoadCommandDictionary() {
        "breakpoint set --name '%1'"}};
   // clang-format on
 
-  size_t num_regexes = llvm::array_lengthof(break_regexes);
+  size_t num_regexes = llvm::size(break_regexes);
 
   std::unique_ptr<CommandObjectRegexCommand> break_regex_cmd_up(
       new CommandObjectRegexCommand(
diff --git a/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp b/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp
index a8d1cbc675e36..b21f5d4341fbd 100644
--- a/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp
+++ b/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp
@@ -1187,7 +1187,7 @@ static const RegisterInfo g_register_infos[] = {
      }};
 
 static const uint32_t k_num_register_infos =
-    llvm::array_lengthof(g_register_infos);
+    llvm::size(g_register_infos);
 
 const lldb_private::RegisterInfo *
 ABIMacOSX_arm::GetRegisterInfoArray(uint32_t &count) {
@@ -1235,7 +1235,7 @@ bool ABIMacOSX_arm::PrepareTrivialCall(Thread &thread, addr_t sp,
 
   llvm::ArrayRef<addr_t>::iterator ai = args.begin(), ae = args.end();
 
-  for (size_t i = 0; i < llvm::array_lengthof(reg_names); ++i) {
+  for (size_t i = 0; i < llvm::size(reg_names); ++i) {
     if (ai == ae)
       break;
 
diff --git a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
index 9ed042df4e501..87a643f77407f 100644
--- a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
+++ b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
@@ -1190,7 +1190,7 @@ static const RegisterInfo g_register_infos[] = {
      }};
 
 static const uint32_t k_num_register_infos =
-    llvm::array_lengthof(g_register_infos);
+    llvm::size(g_register_infos);
 
 const lldb_private::RegisterInfo *
 ABISysV_arm::GetRegisterInfoArray(uint32_t &count) {
@@ -1240,7 +1240,7 @@ bool ABISysV_arm::PrepareTrivialCall(Thread &thread, addr_t sp,
 
   llvm::ArrayRef<addr_t>::iterator ai = args.begin(), ae = args.end();
 
-  for (size_t i = 0; i < llvm::array_lengthof(reg_names); ++i) {
+  for (size_t i = 0; i < llvm::size(reg_names); ++i) {
     if (ai == ae)
       break;
 
diff --git a/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp b/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp
index 662689ca06159..f06a3258f1e19 100644
--- a/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp
+++ b/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp
@@ -503,7 +503,7 @@ static const RegisterInfo g_register_infos[] = {
 };
 
 static const uint32_t k_num_register_infos =
-    llvm::array_lengthof(g_register_infos);
+    llvm::size(g_register_infos);
 
 const lldb_private::RegisterInfo *
 ABISysV_mips::GetRegisterInfoArray(uint32_t &count) {
@@ -559,7 +559,7 @@ bool ABISysV_mips::PrepareTrivialCall(Thread &thread, addr_t sp,
   llvm::ArrayRef<addr_t>::iterator ai = args.begin(), ae = args.end();
 
   // Write arguments to registers
-  for (size_t i = 0; i < llvm::array_lengthof(reg_names); ++i) {
+  for (size_t i = 0; i < llvm::size(reg_names); ++i) {
     if (ai == ae)
       break;
 
diff --git a/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp b/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp
index 7e272265e15d4..7003667bf9dd1 100644
--- a/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp
+++ b/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp
@@ -503,7 +503,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
 };
 
 static const uint32_t k_num_register_infos =
-    llvm::array_lengthof(g_register_infos_mips64);
+    llvm::size(g_register_infos_mips64);
 
 const lldb_private::RegisterInfo *
 ABISysV_mips64::GetRegisterInfoArray(uint32_t &count) {
diff --git a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.cpp b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.cpp
index 70b9c46d353f6..fc95965620235 100644
--- a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.cpp
+++ b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.cpp
@@ -204,7 +204,7 @@ static const RegisterInfo g_register_infos[] = {
      }};
 
 static const uint32_t k_num_register_infos =
-    llvm::array_lengthof(g_register_infos);
+    llvm::size(g_register_infos);
 
 const lldb_private::RegisterInfo *
 ABISysV_ppc::GetRegisterInfoArray(uint32_t &count) {
diff --git a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp
index f9a2851d3949f..9270ae57ac4e3 100644
--- a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp
+++ b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp
@@ -52,10 +52,10 @@ LLDB_PLUGIN_DEFINE(ABISysV_ppc64)
 const lldb_private::RegisterInfo *
 ABISysV_ppc64::GetRegisterInfoArray(uint32_t &count) {
   if (GetByteOrder() == lldb::eByteOrderLittle) {
-    count = llvm::array_lengthof(g_register_infos_ppc64le);
+    count = llvm::size(g_register_infos_ppc64le);
     return g_register_infos_ppc64le;
   } else {
-    count = llvm::array_lengthof(g_register_infos_ppc64);
+    count = llvm::size(g_register_infos_ppc64);
     return g_register_infos_ppc64;
   }
 }
diff --git a/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp b/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp
index f8156deb7e306..986225ecf67a1 100644
--- a/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp
+++ b/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp
@@ -172,7 +172,7 @@ static const RegisterInfo g_register_infos[] = {
 };
 
 static const uint32_t k_num_register_infos =
-    llvm::array_lengthof(g_register_infos);
+    llvm::size(g_register_infos);
 
 const lldb_private::RegisterInfo *
 ABISysV_s390x::GetRegisterInfoArray(uint32_t &count) {
diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
index a5abce7de3aef..633c7e8c604dd 100644
--- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
@@ -405,7 +405,7 @@ DynamicLoaderDarwinKernel::ReadMachHeader(addr_t addr, Process *process, llvm::M
   const uint32_t magicks[] = { llvm::MachO::MH_MAGIC_64, llvm::MachO::MH_MAGIC, llvm::MachO::MH_CIGAM, llvm::MachO::MH_CIGAM_64};
 
   bool found_matching_pattern = false;
-  for (size_t i = 0; i < llvm::array_lengthof (magicks); i++)
+  for (size_t i = 0; i < llvm::size (magicks); i++)
     if (::memcmp (&header.magic, &magicks[i], sizeof (uint32_t)) == 0)
         found_matching_pattern = true;
 
diff --git a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp
index 5a238c5d4ac78..4b3a45e05dd01 100644
--- a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp
+++ b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp
@@ -13198,7 +13198,7 @@ EmulateInstructionARM::GetARMOpcodeForInstruction(const uint32_t opcode,
        &EmulateInstructionARM::EmulateRFE, "rfe{<amode>} <Rn>{!}"}
 
   };
-  static const size_t k_num_arm_opcodes = llvm::array_lengthof(g_arm_opcodes);
+  static const size_t k_num_arm_opcodes = llvm::size(g_arm_opcodes);
 
   for (size_t i = 0; i < k_num_arm_opcodes; ++i) {
     if ((g_arm_opcodes[i].mask & opcode) == g_arm_opcodes[i].value &&
@@ -13749,7 +13749,7 @@ EmulateInstructionARM::GetThumbOpcodeForInstruction(const uint32_t opcode,
        &EmulateInstructionARM::EmulateUXTH, "uxth<c>.w <Rd>,<Rm>{,<rotation>}"},
   };
 
-  const size_t k_num_thumb_opcodes = llvm::array_lengthof(g_thumb_opcodes);
+  const size_t k_num_thumb_opcodes = llvm::size(g_thumb_opcodes);
   for (size_t i = 0; i < k_num_thumb_opcodes; ++i) {
     if ((g_thumb_opcodes[i].mask & opcode) == g_thumb_opcodes[i].value &&
         (g_thumb_opcodes[i].variants & arm_isa) != 0)
diff --git a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
index f86609f3c5c1c..3cac8259ae28e 100644
--- a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
+++ b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
@@ -52,7 +52,7 @@ using namespace lldb_private;
 LLDB_PLUGIN_DEFINE_ADV(EmulateInstructionARM64, InstructionARM64)
 
 static bool LLDBTableGetRegisterInfo(uint32_t reg_num, RegisterInfo &reg_info) {
-  if (reg_num >= llvm::array_lengthof(g_register_infos_arm64_le))
+  if (reg_num >= llvm::size(g_register_infos_arm64_le))
     return false;
   reg_info = g_register_infos_arm64_le[reg_num];
   return true;
@@ -360,7 +360,7 @@ EmulateInstructionARM64::GetOpcodeForInstruction(const uint32_t opcode) {
        "TBNZ <R><t>, #<imm>, <label>"},
 
   };
-  static const size_t k_num_arm_opcodes = llvm::array_lengthof(g_opcodes);
+  static const size_t k_num_arm_opcodes = llvm::size(g_opcodes);
 
   for (size_t i = 0; i < k_num_arm_opcodes; ++i) {
     if ((g_opcodes[i].mask & opcode) == g_opcodes[i].value)
diff --git a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
index 4ef0a034b6dd4..fd5ac54722a4c 100644
--- a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
+++ b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
@@ -954,7 +954,7 @@ EmulateInstructionMIPS::GetOpcodeForInstruction(const char *op_name) {
       {"JALRS_MM", &EmulateInstructionMIPS::Emulate_JALRS, "JALRS rt, rs"},
   };
 
-  static const size_t k_num_mips_opcodes = llvm::array_lengthof(g_opcodes);
+  static const size_t k_num_mips_opcodes = llvm::size(g_opcodes);
 
   for (size_t i = 0; i < k_num_mips_opcodes; ++i) {
     if (!strcasecmp(g_opcodes[i].op_name, op_name))
diff --git a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
index 26736f4c58baa..f9770cb494e5f 100644
--- a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
+++ b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
@@ -919,7 +919,7 @@ EmulateInstructionMIPS64::GetOpcodeForInstruction(const char *op_name) {
       {"BZ_V", &EmulateInstructionMIPS64::Emulate_BZV, "BZ.V wt,s16"},
   };
 
-  static const size_t k_num_mips_opcodes = llvm::array_lengthof(g_opcodes);
+  static const size_t k_num_mips_opcodes = llvm::size(g_opcodes);
 
   for (size_t i = 0; i < k_num_mips_opcodes; ++i) {
     if (!strcasecmp(g_opcodes[i].op_name, op_name))
diff --git a/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp b/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp
index dcfad8e106aab..292e909b3ca5c 100644
--- a/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp
+++ b/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp
@@ -59,7 +59,7 @@ bool EmulateInstructionPPC64::SetTargetTriple(const ArchSpec &arch) {
 }
 
 static bool LLDBTableGetRegisterInfo(uint32_t reg_num, RegisterInfo &reg_info) {
-  if (reg_num >= llvm::array_lengthof(g_register_infos_ppc64le))
+  if (reg_num >= llvm::size(g_register_infos_ppc64le))
     return false;
   reg_info = g_register_infos_ppc64le[reg_num];
   return true;
@@ -147,7 +147,7 @@ EmulateInstructionPPC64::GetOpcodeForInstruction(uint32_t opcode) {
        "addi RT, RA, SI"},
       {0xfc000003, 0xe8000000, &EmulateInstructionPPC64::EmulateLD,
        "ld RT, DS(RA)"}};
-  static const size_t k_num_ppc_opcodes = llvm::array_lengthof(g_opcodes);
+  static const size_t k_num_ppc_opcodes = llvm::size(g_opcodes);
 
   for (size_t i = 0; i < k_num_ppc_opcodes; ++i) {
     if ((g_opcodes[i].mask & opcode) == g_opcodes[i].value)
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp
index ff41f187ba9d0..536869b1b3b64 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp
@@ -749,7 +749,7 @@ AppleObjCTrampolineHandler::AppleObjCTrampolineHandler(
   // array into a template table, and populate the DispatchFunction
   // map from there.
 
-  for (size_t i = 0; i != llvm::array_lengthof(g_dispatch_functions); i++) {
+  for (size_t i = 0; i != llvm::size(g_dispatch_functions); i++) {
     ConstString name_const_str(g_dispatch_functions[i].name);
     const Symbol *msgSend_symbol =
         m_objc_module_sp->FindFirstSymbolWithNameAndType(name_const_str,
@@ -769,7 +769,7 @@ AppleObjCTrampolineHandler::AppleObjCTrampolineHandler(
   }
   
   // Similarly, cache the addresses of the "optimized dispatch" function.
-  for (size_t i = 0; i != llvm::array_lengthof(g_opt_dispatch_names); i++) {
+  for (size_t i = 0; i != llvm::size(g_opt_dispatch_names); i++) {
     ConstString name_const_str(g_opt_dispatch_names[i]);
     const Symbol *msgSend_symbol =
         m_objc_module_sp->FindFirstSymbolWithNameAndType(name_const_str,
diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
index f69f3c300ae78..71e28378872c2 100644
--- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
+++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
@@ -574,7 +574,7 @@ bool ObjectFilePECOFF::ParseSectionHeaders(
 }
 
 llvm::StringRef ObjectFilePECOFF::GetSectionName(const section_header_t &sect) {
-  llvm::StringRef hdr_name(sect.name, llvm::array_lengthof(sect.name));
+  llvm::StringRef hdr_name(sect.name, llvm::size(sect.name));
   hdr_name = hdr_name.split('\0').first;
   if (hdr_name.consume_front("/")) {
     lldb::offset_t stroff;
diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.cpp
index 522c9a65e8682..ae66bfb65d80e 100644
--- a/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.cpp
+++ b/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.cpp
@@ -169,7 +169,7 @@ uint32_t EHProgramBuilder::ConvertMachineToLLDBRegister(uint8_t machine_reg) {
       lldb_r8_x86_64,  lldb_r9_x86_64,  lldb_r10_x86_64, lldb_r11_x86_64,
       lldb_r12_x86_64, lldb_r13_x86_64, lldb_r14_x86_64, lldb_r15_x86_64};
 
-  if (machine_reg >= llvm::array_lengthof(machine_to_lldb_register))
+  if (machine_reg >= llvm::size(machine_to_lldb_register))
     return LLDB_INVALID_REGNUM;
 
   return machine_to_lldb_register[machine_reg];
@@ -184,7 +184,7 @@ uint32_t EHProgramBuilder::ConvertXMMToLLDBRegister(uint8_t xmm_reg) {
       lldb_xmm12_x86_64, lldb_xmm13_x86_64, lldb_xmm14_x86_64,
       lldb_xmm15_x86_64};
 
-  if (xmm_reg >= llvm::array_lengthof(xmm_to_lldb_register))
+  if (xmm_reg >= llvm::size(xmm_to_lldb_register))
     return LLDB_INVALID_REGNUM;
 
   return xmm_to_lldb_register[xmm_reg];
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
index 461a21f44421b..733d1ee5d14f5 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
@@ -686,7 +686,7 @@ BreakpointSP PlatformDarwin::SetThreadCreationBreakpoint(Target &target) {
                                        "libSystem.B.dylib"};
 
   FileSpecList bp_modules;
-  for (size_t i = 0; i < llvm::array_lengthof(g_bp_modules); i++) {
+  for (size_t i = 0; i < llvm::size(g_bp_modules); i++) {
     const char *bp_module = g_bp_modules[i];
     bp_modules.EmplaceBack(bp_module);
   }
@@ -695,7 +695,7 @@ BreakpointSP PlatformDarwin::SetThreadCreationBreakpoint(Target &target) {
   bool hardware = false;
   LazyBool skip_prologue = eLazyBoolNo;
   bp_sp = target.CreateBreakpoint(&bp_modules, nullptr, g_bp_names,
-                                  llvm::array_lengthof(g_bp_names),
+                                  llvm::size(g_bp_names),
                                   eFunctionNameTypeFull, eLanguageTypeUnknown,
                                   0, skip_prologue, internal, hardware);
   bp_sp->SetBreakpointKind("thread-creation");
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp
index e1d3b6ecd7d09..598a55286cc40 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp
@@ -856,7 +856,7 @@ static uint32_t g_exc_regnums[] = {
     exc_exception, exc_fsr, exc_far,
 };
 
-static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos);
+static size_t k_num_register_infos = llvm::size(g_register_infos);
 
 RegisterContextDarwin_arm::RegisterContextDarwin_arm(
     Thread &thread, uint32_t concrete_frame_idx)
@@ -897,9 +897,9 @@ const RegisterInfo *RegisterContextDarwin_arm::GetRegisterInfos() {
 }
 
 // Number of registers in each register set
-const size_t k_num_gpr_registers = llvm::array_lengthof(g_gpr_regnums);
-const size_t k_num_fpu_registers = llvm::array_lengthof(g_fpu_regnums);
-const size_t k_num_exc_registers = llvm::array_lengthof(g_exc_regnums);
+const size_t k_num_gpr_registers = llvm::size(g_gpr_regnums);
+const size_t k_num_fpu_registers = llvm::size(g_fpu_regnums);
+const size_t k_num_exc_registers = llvm::size(g_exc_regnums);
 
 // Register set definitions. The first definitions at register set index of
 // zero is for all registers, followed by other registers sets. The register
@@ -911,7 +911,7 @@ static const RegisterSet g_reg_sets[] = {
     {"Floating Point Registers", "fpu", k_num_fpu_registers, g_fpu_regnums},
     {"Exception State Registers", "exc", k_num_exc_registers, g_exc_regnums}};
 
-const size_t k_num_regsets = llvm::array_lengthof(g_reg_sets);
+const size_t k_num_regsets = llvm::size(g_reg_sets);
 
 size_t RegisterContextDarwin_arm::GetRegisterSetCount() {
   return k_num_regsets;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp
index 50f710e268152..f23a116fbb188 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp
@@ -91,7 +91,7 @@ static uint32_t g_fpu_regnums[] = {
 static uint32_t g_exc_regnums[] = {exc_far, exc_esr, exc_exception};
 
 static size_t k_num_register_infos =
-    llvm::array_lengthof(g_register_infos_arm64_le);
+    llvm::size(g_register_infos_arm64_le);
 
 RegisterContextDarwin_arm64::RegisterContextDarwin_arm64(
     Thread &thread, uint32_t concrete_frame_idx)
@@ -132,9 +132,9 @@ const RegisterInfo *RegisterContextDarwin_arm64::GetRegisterInfos() {
 }
 
 // Number of registers in each register set
-const size_t k_num_gpr_registers = llvm::array_lengthof(g_gpr_regnums);
-const size_t k_num_fpu_registers = llvm::array_lengthof(g_fpu_regnums);
-const size_t k_num_exc_registers = llvm::array_lengthof(g_exc_regnums);
+const size_t k_num_gpr_registers = llvm::size(g_gpr_regnums);
+const size_t k_num_fpu_registers = llvm::size(g_fpu_regnums);
+const size_t k_num_exc_registers = llvm::size(g_exc_regnums);
 
 // Register set definitions. The first definitions at register set index of
 // zero is for all registers, followed by other registers sets. The register
@@ -146,7 +146,7 @@ static const RegisterSet g_reg_sets[] = {
     {"Floating Point Registers", "fpu", k_num_fpu_registers, g_fpu_regnums},
     {"Exception State Registers", "exc", k_num_exc_registers, g_exc_regnums}};
 
-const size_t k_num_regsets = llvm::array_lengthof(g_reg_sets);
+const size_t k_num_regsets = llvm::size(g_reg_sets);
 
 size_t RegisterContextDarwin_arm64::GetRegisterSetCount() {
   return k_num_regsets;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp
index 5f56e6f1636a2..34c7b4fd64eff 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp
@@ -363,7 +363,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      }};
 
-static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos);
+static size_t k_num_register_infos = llvm::size(g_register_infos);
 
 RegisterContextDarwin_i386::RegisterContextDarwin_i386(
     Thread &thread, uint32_t concrete_frame_idx)
@@ -421,9 +421,9 @@ static uint32_t g_fpu_regnums[] = {
 static uint32_t g_exc_regnums[] = {exc_trapno, exc_err, exc_faultvaddr};
 
 // Number of registers in each register set
-const size_t k_num_gpr_registers = llvm::array_lengthof(g_gpr_regnums);
-const size_t k_num_fpu_registers = llvm::array_lengthof(g_fpu_regnums);
-const size_t k_num_exc_registers = llvm::array_lengthof(g_exc_regnums);
+const size_t k_num_gpr_registers = llvm::size(g_gpr_regnums);
+const size_t k_num_fpu_registers = llvm::size(g_fpu_regnums);
+const size_t k_num_exc_registers = llvm::size(g_exc_regnums);
 
 // Register set definitions. The first definitions at register set index of
 // zero is for all registers, followed by other registers sets. The register
@@ -435,7 +435,7 @@ static const RegisterSet g_reg_sets[] = {
     {"Floating Point Registers", "fpu", k_num_fpu_registers, g_fpu_regnums},
     {"Exception State Registers", "exc", k_num_exc_registers, g_exc_regnums}};
 
-const size_t k_num_regsets = llvm::array_lengthof(g_reg_sets);
+const size_t k_num_regsets = llvm::size(g_reg_sets);
 
 size_t RegisterContextDarwin_i386::GetRegisterSetCount() {
   return k_num_regsets;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp
index 567df8fc980ce..1551281e8d8d4 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp
@@ -420,7 +420,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      }};
 
-static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos);
+static size_t k_num_register_infos = llvm::size(g_register_infos);
 
 RegisterContextDarwin_x86_64::RegisterContextDarwin_x86_64(
     Thread &thread, uint32_t concrete_frame_idx)
@@ -477,9 +477,9 @@ static uint32_t g_fpu_regnums[] = {
 static uint32_t g_exc_regnums[] = {exc_trapno, exc_err, exc_faultvaddr};
 
 // Number of registers in each register set
-const size_t k_num_gpr_registers = llvm::array_lengthof(g_gpr_regnums);
-const size_t k_num_fpu_registers = llvm::array_lengthof(g_fpu_regnums);
-const size_t k_num_exc_registers = llvm::array_lengthof(g_exc_regnums);
+const size_t k_num_gpr_registers = llvm::size(g_gpr_regnums);
+const size_t k_num_fpu_registers = llvm::size(g_fpu_regnums);
+const size_t k_num_exc_registers = llvm::size(g_exc_regnums);
 
 // Register set definitions. The first definitions at register set index of
 // zero is for all registers, followed by other registers sets. The register
@@ -491,7 +491,7 @@ static const RegisterSet g_reg_sets[] = {
     {"Floating Point Registers", "fpu", k_num_fpu_registers, g_fpu_regnums},
     {"Exception State Registers", "exc", k_num_exc_registers, g_exc_regnums}};
 
-const size_t k_num_regsets = llvm::array_lengthof(g_reg_sets);
+const size_t k_num_regsets = llvm::size(g_reg_sets);
 
 size_t RegisterContextDarwin_x86_64::GetRegisterSetCount() {
   return k_num_regsets;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp
index 066d50d9c149f..cfd4d21763e49 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp
@@ -78,11 +78,11 @@ const RegisterInfo *RegisterContextWindows_i386::GetRegisterInfo() const {
 }
 
 uint32_t RegisterContextWindows_i386::GetRegisterCount() const {
-  return llvm::array_lengthof(g_register_infos_i386);
+  return llvm::size(g_register_infos_i386);
 }
 
 uint32_t RegisterContextWindows_i386::GetUserRegisterCount() const {
-  return llvm::array_lengthof(g_register_infos_i386);
+  return llvm::size(g_register_infos_i386);
 }
 
 size_t RegisterContextWindows_i386::GetGPRSize() const { return sizeof(GPR); }
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp
index a35ccace5d5be..f0fd6fec24fd8 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp
@@ -141,11 +141,11 @@ const RegisterInfo *RegisterContextWindows_x86_64::GetRegisterInfo() const {
 }
 
 uint32_t RegisterContextWindows_x86_64::GetRegisterCount() const {
-  return llvm::array_lengthof(g_register_infos_x86_64);
+  return llvm::size(g_register_infos_x86_64);
 }
 
 uint32_t RegisterContextWindows_x86_64::GetUserRegisterCount() const {
-  return llvm::array_lengthof(g_register_infos_x86_64);
+  return llvm::size(g_register_infos_x86_64);
 }
 
 size_t RegisterContextWindows_x86_64::GetGPRSize() const { return sizeof(GPR); }
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.cpp
index d562c7df8bfa2..ea4485e1e075f 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.cpp
@@ -38,7 +38,7 @@ static const uint32_t g_gpr_regnums_WoW64[] = {
 
 static const RegisterSet g_reg_sets_WoW64[] = {
     {"General Purpose Registers", "gpr",
-     llvm::array_lengthof(g_gpr_regnums_WoW64) - 1, g_gpr_regnums_WoW64},
+     llvm::size(g_gpr_regnums_WoW64) - 1, g_gpr_regnums_WoW64},
 };
 enum { k_num_register_sets = 1 };
 
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp
index 5cfd790c624a3..1707a10530a95 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp
@@ -69,9 +69,9 @@ static_assert(((sizeof g_fpr_regnums_arm / sizeof g_fpr_regnums_arm[0]) - 1) ==
 
 static const RegisterSet g_reg_sets_arm[] = {
     {"General Purpose Registers", "gpr",
-     llvm::array_lengthof(g_gpr_regnums_arm) - 1, g_gpr_regnums_arm},
+     llvm::size(g_gpr_regnums_arm) - 1, g_gpr_regnums_arm},
     {"Floating Point Registers", "fpr",
-     llvm::array_lengthof(g_fpr_regnums_arm) - 1, g_fpr_regnums_arm},
+     llvm::size(g_fpr_regnums_arm) - 1, g_fpr_regnums_arm},
 };
 
 enum { k_num_register_sets = 2 };
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp
index fc65945723218..792b2d59bcb40 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp
@@ -85,9 +85,9 @@ static_assert(((sizeof g_fpr_regnums_arm64 / sizeof g_fpr_regnums_arm64[0]) -
 
 static const RegisterSet g_reg_sets_arm64[] = {
     {"General Purpose Registers", "gpr",
-     llvm::array_lengthof(g_gpr_regnums_arm64) - 1, g_gpr_regnums_arm64},
+     llvm::size(g_gpr_regnums_arm64) - 1, g_gpr_regnums_arm64},
     {"Floating Point Registers", "fpr",
-     llvm::array_lengthof(g_fpr_regnums_arm64) - 1, g_fpr_regnums_arm64},
+     llvm::size(g_fpr_regnums_arm64) - 1, g_fpr_regnums_arm64},
 };
 
 enum { k_num_register_sets = 2 };
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.cpp
index 45da4cd02cec3..b0102e613de65 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.cpp
@@ -38,7 +38,7 @@ static const uint32_t g_gpr_regnums_i386[] = {
 
 static const RegisterSet g_reg_sets_i386[] = {
     {"General Purpose Registers", "gpr",
-     llvm::array_lengthof(g_gpr_regnums_i386) - 1, g_gpr_regnums_i386},
+     llvm::size(g_gpr_regnums_i386) - 1, g_gpr_regnums_i386},
 };
 
 enum { k_num_register_sets = 1 };
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.cpp
index 819cd8eb383bf..bf7564966c787 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.cpp
@@ -49,9 +49,9 @@ static const uint32_t g_fpr_regnums_x86_64[] = {
 
 static const RegisterSet g_reg_sets_x86_64[] = {
     {"General Purpose Registers", "gpr",
-     llvm::array_lengthof(g_gpr_regnums_x86_64) - 1, g_gpr_regnums_x86_64},
+     llvm::size(g_gpr_regnums_x86_64) - 1, g_gpr_regnums_x86_64},
     {"Floating Point Registers", "fpr",
-     llvm::array_lengthof(g_fpr_regnums_x86_64) - 1, g_fpr_regnums_x86_64}};
+     llvm::size(g_fpr_regnums_x86_64) - 1, g_fpr_regnums_x86_64}};
 
 enum { k_num_register_sets = 2 };
 
diff --git a/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.cpp b/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.cpp
index 2dc678a9efcff..368b7fcab3857 100644
--- a/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.cpp
@@ -41,7 +41,7 @@ using namespace lldb_private;
 #include "Plugins/Process/Utility/RegisterInfos_arm.h"
 #undef DECLARE_REGISTER_INFOS_ARM_STRUCT
 
-static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos_arm);
+static size_t k_num_register_infos = llvm::size(g_register_infos_arm);
 
 // Array of lldb register numbers used to define the set of all General Purpose
 // Registers
@@ -69,8 +69,8 @@ uint32_t g_fpu_reg_indices[] = {
 
 RegisterSet g_register_sets[] = {
     {"General Purpose Registers", "gpr",
-     llvm::array_lengthof(g_gpr_reg_indices), g_gpr_reg_indices},
-    {"Floating Point Registers", "fpu", llvm::array_lengthof(g_fpu_reg_indices),
+     llvm::size(g_gpr_reg_indices), g_gpr_reg_indices},
+    {"Floating Point Registers", "fpu", llvm::size(g_fpu_reg_indices),
      g_fpu_reg_indices},
 };
 
@@ -82,7 +82,7 @@ RegisterContextWindows_arm::RegisterContextWindows_arm(
 RegisterContextWindows_arm::~RegisterContextWindows_arm() {}
 
 size_t RegisterContextWindows_arm::GetRegisterCount() {
-  return llvm::array_lengthof(g_register_infos_arm);
+  return llvm::size(g_register_infos_arm);
 }
 
 const RegisterInfo *
@@ -93,7 +93,7 @@ RegisterContextWindows_arm::GetRegisterInfoAtIndex(size_t reg) {
 }
 
 size_t RegisterContextWindows_arm::GetRegisterSetCount() {
-  return llvm::array_lengthof(g_register_sets);
+  return llvm::size(g_register_sets);
 }
 
 const RegisterSet *RegisterContextWindows_arm::GetRegisterSet(size_t reg_set) {
diff --git a/lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.cpp b/lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.cpp
index 3ce288597c86a..c57b4fb8e8935 100644
--- a/lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.cpp
@@ -45,7 +45,7 @@ using namespace lldb_private;
 #undef DECLARE_REGISTER_INFOS_ARM64_STRUCT
 
 static size_t k_num_register_infos =
-    llvm::array_lengthof(g_register_infos_arm64_le);
+    llvm::size(g_register_infos_arm64_le);
 
 // Array of lldb register numbers used to define the set of all General Purpose
 // Registers
@@ -83,8 +83,8 @@ uint32_t g_fpu_reg_indices[] = {
 
 RegisterSet g_register_sets[] = {
     {"General Purpose Registers", "gpr",
-     llvm::array_lengthof(g_gpr_reg_indices), g_gpr_reg_indices},
-    {"Floating Point Registers", "fpu", llvm::array_lengthof(g_fpu_reg_indices),
+     llvm::size(g_gpr_reg_indices), g_gpr_reg_indices},
+    {"Floating Point Registers", "fpu", llvm::size(g_fpu_reg_indices),
      g_fpu_reg_indices},
 };
 
@@ -96,7 +96,7 @@ RegisterContextWindows_arm64::RegisterContextWindows_arm64(
 RegisterContextWindows_arm64::~RegisterContextWindows_arm64() {}
 
 size_t RegisterContextWindows_arm64::GetRegisterCount() {
-  return llvm::array_lengthof(g_register_infos_arm64_le);
+  return llvm::size(g_register_infos_arm64_le);
 }
 
 const RegisterInfo *
@@ -107,7 +107,7 @@ RegisterContextWindows_arm64::GetRegisterInfoAtIndex(size_t reg) {
 }
 
 size_t RegisterContextWindows_arm64::GetRegisterSetCount() {
-  return llvm::array_lengthof(g_register_sets);
+  return llvm::size(g_register_sets);
 }
 
 const RegisterSet *
diff --git a/lldb/source/Plugins/Process/Windows/Common/x64/RegisterContextWindows_x64.cpp b/lldb/source/Plugins/Process/Windows/Common/x64/RegisterContextWindows_x64.cpp
index 41e0d429fe52b..ed0b1b43bda74 100644
--- a/lldb/source/Plugins/Process/Windows/Common/x64/RegisterContextWindows_x64.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/x64/RegisterContextWindows_x64.cpp
@@ -212,7 +212,7 @@ RegisterInfo g_register_infos[] = {
     {DEFINE_FPU_XMM(xmm15)}
 };
 
-static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos);
+static size_t k_num_register_infos = llvm::size(g_register_infos);
 
 // Array of lldb register numbers used to define the set of all General Purpose
 // Registers
@@ -235,9 +235,9 @@ uint32_t g_fpu_reg_indices[] = {
 
 RegisterSet g_register_sets[] = {
     {"General Purpose Registers", "gpr",
-     llvm::array_lengthof(g_gpr_reg_indices), g_gpr_reg_indices},
+     llvm::size(g_gpr_reg_indices), g_gpr_reg_indices},
     {"Floating Point Registers", "fpu",
-     llvm::array_lengthof(g_fpu_reg_indices), g_fpu_reg_indices}};
+     llvm::size(g_fpu_reg_indices), g_fpu_reg_indices}};
 }
 
 // Constructors and Destructors
@@ -248,7 +248,7 @@ RegisterContextWindows_x64::RegisterContextWindows_x64(
 RegisterContextWindows_x64::~RegisterContextWindows_x64() {}
 
 size_t RegisterContextWindows_x64::GetRegisterCount() {
-  return llvm::array_lengthof(g_register_infos);
+  return llvm::size(g_register_infos);
 }
 
 const RegisterInfo *
@@ -259,7 +259,7 @@ RegisterContextWindows_x64::GetRegisterInfoAtIndex(size_t reg) {
 }
 
 size_t RegisterContextWindows_x64::GetRegisterSetCount() {
-  return llvm::array_lengthof(g_register_sets);
+  return llvm::size(g_register_sets);
 }
 
 const RegisterSet *RegisterContextWindows_x64::GetRegisterSet(size_t reg_set) {
diff --git a/lldb/source/Plugins/Process/Windows/Common/x86/RegisterContextWindows_x86.cpp b/lldb/source/Plugins/Process/Windows/Common/x86/RegisterContextWindows_x86.cpp
index e4f1806597517..33c1257798ed8 100644
--- a/lldb/source/Plugins/Process/Windows/Common/x86/RegisterContextWindows_x86.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/x86/RegisterContextWindows_x86.cpp
@@ -118,7 +118,7 @@ RegisterInfo g_register_infos[] = {
      nullptr,
     },
 };
-static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos);
+static size_t k_num_register_infos = llvm::size(g_register_infos);
 
 // Array of lldb register numbers used to define the set of all General Purpose
 // Registers
@@ -130,7 +130,7 @@ uint32_t g_gpr_reg_indices[] = {eRegisterIndexEax, eRegisterIndexEbx,
 
 RegisterSet g_register_sets[] = {
     {"General Purpose Registers", "gpr",
-     llvm::array_lengthof(g_gpr_reg_indices), g_gpr_reg_indices},
+     llvm::size(g_gpr_reg_indices), g_gpr_reg_indices},
 };
 }
 
@@ -142,7 +142,7 @@ RegisterContextWindows_x86::RegisterContextWindows_x86(
 RegisterContextWindows_x86::~RegisterContextWindows_x86() {}
 
 size_t RegisterContextWindows_x86::GetRegisterCount() {
-  return llvm::array_lengthof(g_register_infos);
+  return llvm::size(g_register_infos);
 }
 
 const RegisterInfo *
@@ -153,7 +153,7 @@ RegisterContextWindows_x86::GetRegisterInfoAtIndex(size_t reg) {
 }
 
 size_t RegisterContextWindows_x86::GetRegisterSetCount() {
-  return llvm::array_lengthof(g_register_sets);
+  return llvm::size(g_register_sets);
 }
 
 const RegisterSet *RegisterContextWindows_x86::GetRegisterSet(size_t reg_set) {
diff --git a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp
index 7184dbacb08d6..a6eef5d8e9bb3 100644
--- a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp
+++ b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp
@@ -336,7 +336,7 @@ static RegisterInfo g_reg_infos[] = {
     DEF_Q(14),
     DEF_Q(15)};
 
-constexpr size_t k_num_reg_infos = llvm::array_lengthof(g_reg_infos);
+constexpr size_t k_num_reg_infos = llvm::size(g_reg_infos);
 
 // ARM general purpose registers.
 const uint32_t g_gpr_regnums[] = {
@@ -445,26 +445,26 @@ const uint32_t g_fpu_regnums[] = {
 };
 
 // Skip the last LLDB_INVALID_REGNUM in each count below by subtracting 1
-constexpr size_t k_num_gpr_regs = llvm::array_lengthof(g_gpr_regnums) - 1;
-constexpr size_t k_num_fpu_regs = llvm::array_lengthof(g_fpu_regnums) - 1;
+constexpr size_t k_num_gpr_regs = llvm::size(g_gpr_regnums) - 1;
+constexpr size_t k_num_fpu_regs = llvm::size(g_fpu_regnums) - 1;
 
 static RegisterSet g_reg_sets[] = {
     {"General Purpose Registers", "gpr", k_num_gpr_regs, g_gpr_regnums},
     {"Floating Point Registers", "fpu", k_num_fpu_regs, g_fpu_regnums},
 };
 
-constexpr size_t k_num_reg_sets = llvm::array_lengthof(g_reg_sets);
+constexpr size_t k_num_reg_sets = llvm::size(g_reg_sets);
 
 RegisterContextMinidump_ARM::RegisterContextMinidump_ARM(
     lldb_private::Thread &thread, const DataExtractor &data, bool apple)
     : RegisterContext(thread, 0), m_apple(apple) {
   lldb::offset_t offset = 0;
   m_regs.context_flags = data.GetU32(&offset);
-  for (unsigned i = 0; i < llvm::array_lengthof(m_regs.r); ++i)
+  for (unsigned i = 0; i < llvm::size(m_regs.r); ++i)
     m_regs.r[i] = data.GetU32(&offset);
   m_regs.cpsr = data.GetU32(&offset);
   m_regs.fpscr = data.GetU64(&offset);
-  for (unsigned i = 0; i < llvm::array_lengthof(m_regs.d); ++i)
+  for (unsigned i = 0; i < llvm::size(m_regs.d); ++i)
     m_regs.d[i] = data.GetU64(&offset);
   lldbassert(k_num_regs == k_num_reg_infos);
 }
diff --git a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.cpp b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.cpp
index e606ec9c3b644..d139d68c741c7 100644
--- a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.cpp
+++ b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.cpp
@@ -544,7 +544,7 @@ static RegisterInfo g_reg_infos[] = {
     DEF_H(31),
 };
 
-constexpr size_t k_num_reg_infos = llvm::array_lengthof(g_reg_infos);
+constexpr size_t k_num_reg_infos = llvm::size(g_reg_infos);
 
 // ARM64 general purpose registers.
 const uint32_t g_gpr_regnums[] = {
@@ -751,15 +751,15 @@ const uint32_t g_fpu_regnums[] = {
 };
 
 // Skip the last LLDB_INVALID_REGNUM in each count below by subtracting 1
-constexpr size_t k_num_gpr_regs = llvm::array_lengthof(g_gpr_regnums) - 1;
-constexpr size_t k_num_fpu_regs = llvm::array_lengthof(g_fpu_regnums) - 1;
+constexpr size_t k_num_gpr_regs = llvm::size(g_gpr_regnums) - 1;
+constexpr size_t k_num_fpu_regs = llvm::size(g_fpu_regnums) - 1;
 
 static RegisterSet g_reg_sets[] = {
     {"General Purpose Registers", "gpr", k_num_gpr_regs, g_gpr_regnums},
     {"Floating Point Registers", "fpu", k_num_fpu_regs, g_fpu_regnums},
 };
 
-constexpr size_t k_num_reg_sets = llvm::array_lengthof(g_reg_sets);
+constexpr size_t k_num_reg_sets = llvm::size(g_reg_sets);
 
 RegisterContextMinidump_ARM64::RegisterContextMinidump_ARM64(
     lldb_private::Thread &thread, const DataExtractor &data)
diff --git a/lldb/source/Utility/ArchSpec.cpp b/lldb/source/Utility/ArchSpec.cpp
index 74a4370423458..de63568ed7ecc 100644
--- a/lldb/source/Utility/ArchSpec.cpp
+++ b/lldb/source/Utility/ArchSpec.cpp
@@ -254,12 +254,12 @@ struct ArchDefinition {
 };
 
 void ArchSpec::ListSupportedArchNames(StringList &list) {
-  for (uint32_t i = 0; i < llvm::array_lengthof(g_core_definitions); ++i)
+  for (uint32_t i = 0; i < llvm::size(g_core_definitions); ++i)
     list.AppendString(g_core_definitions[i].name);
 }
 
 void ArchSpec::AutoComplete(CompletionRequest &request) {
-  for (uint32_t i = 0; i < llvm::array_lengthof(g_core_definitions); ++i)
+  for (uint32_t i = 0; i < llvm::size(g_core_definitions); ++i)
     request.TryCompleteCurrentArg(g_core_definitions[i].name);
 }
 
@@ -340,7 +340,7 @@ static const ArchDefinitionEntry g_macho_arch_entries[] = {
 // clang-format on
 
 static const ArchDefinition g_macho_arch_def = {
-    eArchTypeMachO, llvm::array_lengthof(g_macho_arch_entries),
+    eArchTypeMachO, llvm::size(g_macho_arch_entries),
     g_macho_arch_entries, "mach-o"};
 
 //===----------------------------------------------------------------------===//
@@ -409,7 +409,7 @@ static const ArchDefinitionEntry g_elf_arch_entries[] = {
 
 static const ArchDefinition g_elf_arch_def = {
     eArchTypeELF,
-    llvm::array_lengthof(g_elf_arch_entries),
+    llvm::size(g_elf_arch_entries),
     g_elf_arch_entries,
     "elf",
 };
@@ -435,7 +435,7 @@ static const ArchDefinitionEntry g_coff_arch_entries[] = {
 
 static const ArchDefinition g_coff_arch_def = {
     eArchTypeCOFF,
-    llvm::array_lengthof(g_coff_arch_entries),
+    llvm::size(g_coff_arch_entries),
     g_coff_arch_entries,
     "pe-coff",
 };
@@ -446,7 +446,7 @@ static const ArchDefinition *g_arch_definitions[] = {
     &g_macho_arch_def, &g_elf_arch_def, &g_coff_arch_def};
 
 static const size_t k_num_arch_definitions =
-    llvm::array_lengthof(g_arch_definitions);
+    llvm::size(g_arch_definitions);
 
 //===----------------------------------------------------------------------===//
 // Static helper functions.
@@ -463,7 +463,7 @@ static const ArchDefinition *FindArchDefinition(ArchitectureType arch_type) {
 
 // Get an architecture definition by name.
 static const CoreDefinition *FindCoreDefinition(llvm::StringRef name) {
-  for (unsigned int i = 0; i < llvm::array_lengthof(g_core_definitions); ++i) {
+  for (unsigned int i = 0; i < llvm::size(g_core_definitions); ++i) {
     if (name.equals_insensitive(g_core_definitions[i].name))
       return &g_core_definitions[i];
   }
@@ -471,7 +471,7 @@ static const CoreDefinition *FindCoreDefinition(llvm::StringRef name) {
 }
 
 static inline const CoreDefinition *FindCoreDefinition(ArchSpec::Core core) {
-  if (core < llvm::array_lengthof(g_core_definitions))
+  if (core < llvm::size(g_core_definitions))
     return &g_core_definitions[core];
   return nullptr;
 }
diff --git a/lldb/unittests/Utility/StatusTest.cpp b/lldb/unittests/Utility/StatusTest.cpp
index 9b9d870cd12a3..f6bc11058c6a5 100644
--- a/lldb/unittests/Utility/StatusTest.cpp
+++ b/lldb/unittests/Utility/StatusTest.cpp
@@ -72,7 +72,7 @@ TEST(StatusTest, ErrorWin32) {
   EXPECT_TRUE(success.Success());
 
   WCHAR name[128]{};
-  ULONG nameLen = llvm::array_lengthof(name);
+  ULONG nameLen = llvm::size(name);
   ULONG langs = 0;
   GetUserPreferredUILanguages(MUI_LANGUAGE_NAME, &langs,
                               reinterpret_cast<PZZWSTR>(&name), &nameLen);
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index c3200c9265182..b0a8a436f9c7a 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -1430,12 +1430,6 @@ void shuffle(Iterator first, Iterator last, RNG &&g) {
   }
 }
 
-/// Find the length of an array.
-template <class T, std::size_t N>
-constexpr inline size_t array_lengthof(T (&)[N]) {
-  return N;
-}
-
 /// Adapt std::less<T> for array_pod_sort.
 template<typename T>
 inline int array_pod_sort_comparator(const void *P1, const void *P2) {
diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h
index 440b29df260ca..49e96234a752f 100644
--- a/llvm/include/llvm/ADT/STLForwardCompat.h
+++ b/llvm/include/llvm/ADT/STLForwardCompat.h
@@ -63,6 +63,16 @@ struct in_place_index_t // NOLINT(readability-identifier-naming)
   explicit in_place_index_t() = default;
 };
 
+template <class C>
+constexpr auto size(const C& c) -> decltype(c.size()) { // NOLINT(readability-identifier-naming)
+  return c.size();
+}
+
+template <class T, std::size_t N>
+constexpr std::size_t size(const T (&array)[N]) noexcept { // NOLINT(readability-identifier-naming)
+  return N;
+}
+
 //===----------------------------------------------------------------------===//
 //     Features from C++20
 //===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index bec1915705947..792b3ebe87256 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -924,7 +924,7 @@ class TargetLoweringBase {
   /// promotions or expansions.
   bool isTypeLegal(EVT VT) const {
     assert(!VT.isSimple() ||
-           (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT));
+           (unsigned)VT.getSimpleVT().SimpleTy < size(RegClassForVT));
     return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr;
   }
 
@@ -1078,7 +1078,7 @@ class TargetLoweringBase {
     if (VT.isExtended()) return Expand;
     // If a target-specific SDNode requires legalization, require the target
     // to provide custom legalization for it.
-    if (Op >= array_lengthof(OpActions[0])) return Custom;
+    if (Op >= size(OpActions[0])) return Custom;
     return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
   }
 
@@ -1379,8 +1379,8 @@ class TargetLoweringBase {
   /// expander for it.
   LegalizeAction
   getCondCodeAction(ISD::CondCode CC, MVT VT) const {
-    assert((unsigned)CC < array_lengthof(CondCodeActions) &&
-           ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) &&
+    assert((unsigned)CC < size(CondCodeActions) &&
+           ((unsigned)VT.SimpleTy >> 3) < size(CondCodeActions[0]) &&
            "Table isn't big enough!");
     // See setCondCodeAction for how this is encoded.
     uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
@@ -1488,7 +1488,7 @@ class TargetLoweringBase {
 
   /// Return the type of registers that this ValueType will eventually require.
   MVT getRegisterType(MVT VT) const {
-    assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT));
+    assert((unsigned)VT.SimpleTy < size(RegisterTypeForVT));
     return RegisterTypeForVT[VT.SimpleTy];
   }
 
@@ -1496,7 +1496,7 @@ class TargetLoweringBase {
   MVT getRegisterType(LLVMContext &Context, EVT VT) const {
     if (VT.isSimple()) {
       assert((unsigned)VT.getSimpleVT().SimpleTy <
-                array_lengthof(RegisterTypeForVT));
+                size(RegisterTypeForVT));
       return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
     }
     if (VT.isVector()) {
@@ -1529,7 +1529,7 @@ class TargetLoweringBase {
                   Optional<MVT> RegisterVT = None) const {
     if (VT.isSimple()) {
       assert((unsigned)VT.getSimpleVT().SimpleTy <
-                array_lengthof(NumRegistersForVT));
+                size(NumRegistersForVT));
       return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
     }
     if (VT.isVector()) {
@@ -1597,7 +1597,7 @@ class TargetLoweringBase {
   /// If true, the target has custom DAG combine transformations that it can
   /// perform for the specified node.
   bool hasTargetDAGCombine(ISD::NodeType NT) const {
-    assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray));
+    assert(unsigned(NT >> 3) < size(TargetDAGCombineArray));
     return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
   }
 
@@ -2212,7 +2212,7 @@ class TargetLoweringBase {
   /// specified value type. This indicates the selector can handle values of
   /// that class natively.
   void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
-    assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT));
+    assert((unsigned)VT.SimpleTy < size(RegClassForVT));
     RegClassForVT[VT.SimpleTy] = RC;
   }
 
@@ -2229,7 +2229,7 @@ class TargetLoweringBase {
   /// type and indicate what to do about it. Note that VT may refer to either
   /// the type of a result or that of an operand of Op.
   void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) {
-    assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
+    assert(Op < size(OpActions[0]) && "Table isn't big enough!");
     OpActions[(unsigned)VT.SimpleTy][Op] = Action;
   }
 
@@ -2294,7 +2294,7 @@ class TargetLoweringBase {
   /// target and indicate what to do about it.
   void setCondCodeAction(ISD::CondCode CC, MVT VT,
                          LegalizeAction Action) {
-    assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&
+    assert(VT.isValid() && (unsigned)CC < size(CondCodeActions) &&
            "Table isn't big enough!");
     assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
     /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit
@@ -2324,7 +2324,7 @@ class TargetLoweringBase {
   /// they want to provide a custom DAG combiner for by implementing the
   /// PerformDAGCombine virtual method.
   void setTargetDAGCombine(ISD::NodeType NT) {
-    assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray));
+    assert(unsigned(NT >> 3) < size(TargetDAGCombineArray));
     TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
   }
 
diff --git a/llvm/include/llvm/MC/SubtargetFeature.h b/llvm/include/llvm/MC/SubtargetFeature.h
index 032e2a7df1f2d..6eb6c13efe3e9 100644
--- a/llvm/include/llvm/MC/SubtargetFeature.h
+++ b/llvm/include/llvm/MC/SubtargetFeature.h
@@ -104,7 +104,7 @@ class FeatureBitset {
   }
 
   constexpr FeatureBitset &operator^=(const FeatureBitset &RHS) {
-    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) {
+    for (unsigned I = 0, E = llvm::size(Bits); I != E; ++I) {
       Bits[I] ^= RHS.Bits[I];
     }
     return *this;
@@ -116,7 +116,7 @@ class FeatureBitset {
   }
 
   constexpr FeatureBitset &operator&=(const FeatureBitset &RHS) {
-    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) {
+    for (unsigned I = 0, E = llvm::size(Bits); I != E; ++I) {
       Bits[I] &= RHS.Bits[I];
     }
     return *this;
@@ -128,7 +128,7 @@ class FeatureBitset {
   }
 
   constexpr FeatureBitset &operator|=(const FeatureBitset &RHS) {
-    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) {
+    for (unsigned I = 0, E = llvm::size(Bits); I != E; ++I) {
       Bits[I] |= RHS.Bits[I];
     }
     return *this;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 37bc8a65dc7c3..5d43a9523182b 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4329,7 +4329,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
       // The input vector this mask element indexes into.
       unsigned Input = (unsigned)Idx / NewElts;
 
-      if (Input >= array_lengthof(Inputs)) {
+      if (Input >= size(Inputs)) {
         // The mask element does not index into any input vector.
         Ops.push_back(-1);
         continue;
@@ -4340,7 +4340,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
 
       // Find or create a shuffle vector operand to hold this input.
       unsigned OpNo;
-      for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+      for (OpNo = 0; OpNo < size(InputUsed); ++OpNo) {
         if (InputUsed[OpNo] == Input) {
           // This input vector is already an operand.
           break;
@@ -4351,7 +4351,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
         }
       }
 
-      if (OpNo >= array_lengthof(InputUsed)) {
+      if (OpNo >= size(InputUsed)) {
         // More than two input vectors used!  Give up on trying to create a
         // shuffle vector.  Insert all elements into a BUILD_VECTOR instead.
         UseBuildVector = true;
@@ -4374,7 +4374,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
         // The input vector this mask element indexes into.
         unsigned Input = (unsigned)Idx / NewElts;
 
-        if (Input >= array_lengthof(Inputs)) {
+        if (Input >= size(Inputs)) {
           // The mask element is "undef" or indexes off the end of the input.
           SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
           continue;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 0bd44ce4c872e..997f1de7ef3a1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2153,7 +2153,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
       // The input vector this mask element indexes into.
       unsigned Input = (unsigned)Idx / NewElts;
 
-      if (Input >= array_lengthof(Inputs)) {
+      if (Input >= size(Inputs)) {
         // The mask element does not index into any input vector.
         Ops.push_back(-1);
         continue;
@@ -2164,7 +2164,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
 
       // Find or create a shuffle vector operand to hold this input.
       unsigned OpNo;
-      for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
+      for (OpNo = 0; OpNo < size(InputUsed); ++OpNo) {
         if (InputUsed[OpNo] == Input) {
           // This input vector is already an operand.
           break;
@@ -2175,7 +2175,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
         }
       }
 
-      if (OpNo >= array_lengthof(InputUsed)) {
+      if (OpNo >= size(InputUsed)) {
         // More than two input vectors used!  Give up on trying to create a
         // shuffle vector.  Insert all elements into a BUILD_VECTOR instead.
         useBuildVector = true;
@@ -2198,7 +2198,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
         // The input vector this mask element indexes into.
         unsigned Input = (unsigned)Idx / NewElts;
 
-        if (Input >= array_lengthof(Inputs)) {
+        if (Input >= size(Inputs)) {
           // The mask element is "undef" or indexes off the end of the input.
           SVOps.push_back(DAG.getUNDEF(EltVT));
           continue;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index ab574232e3677..f34a917bf4484 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -966,7 +966,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
   // If this is a simple type, use the ComputeRegisterProp mechanism.
   if (VT.isSimple()) {
     MVT SVT = VT.getSimpleVT();
-    assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType));
+    assert((unsigned)SVT.SimpleTy < size(TransformToType));
     MVT NVT = TransformToType[SVT.SimpleTy];
     LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
 
diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index f6538c0549d0b..fd6ab9f7f0f4d 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -394,7 +394,7 @@ void writeIndexTable(
     const MapVector<uint64_t, UnitIndexEntry> &IndexEntries,
     uint32_t DWARFUnitIndex::Entry::SectionContribution::*Field) {
   for (const auto &E : IndexEntries)
-    for (size_t I = 0; I != array_lengthof(E.second.Contributions); ++I)
+    for (size_t I = 0; I != size(E.second.Contributions); ++I)
       if (ContributionOffsets[I])
         Out.emitIntValue(E.second.Contributions[I].*Field, 4);
 }
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index b20e581d283aa..dc6197a4338af 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -326,14 +326,14 @@ void GenericDINode::recalculateHash() {
     }                                                                          \
   } while (false)
 #define DEFINE_GETIMPL_STORE(CLASS, ARGS, OPS)                                 \
-  return storeImpl(new (array_lengthof(OPS))                                   \
+  return storeImpl(new (size(OPS))                                   \
                        CLASS(Context, Storage, UNWRAP_ARGS(ARGS), OPS),        \
                    Storage, Context.pImpl->CLASS##s)
 #define DEFINE_GETIMPL_STORE_NO_OPS(CLASS, ARGS)                               \
   return storeImpl(new (0u) CLASS(Context, Storage, UNWRAP_ARGS(ARGS)),        \
                    Storage, Context.pImpl->CLASS##s)
 #define DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(CLASS, OPS)                   \
-  return storeImpl(new (array_lengthof(OPS)) CLASS(Context, Storage, OPS),     \
+  return storeImpl(new (size(OPS)) CLASS(Context, Storage, OPS),     \
                    Storage, Context.pImpl->CLASS##s)
 #define DEFINE_GETIMPL_STORE_N(CLASS, ARGS, OPS, NUM_OPS)                      \
   return storeImpl(new (NUM_OPS)                                               \
@@ -772,7 +772,7 @@ DICompileUnit *DICompileUnit::getImpl(
                      Macros,
                      SysRoot,
                      SDK};
-  return storeImpl(new (array_lengthof(Ops)) DICompileUnit(
+  return storeImpl(new (size(Ops)) DICompileUnit(
                        Context, Storage, SourceLanguage, IsOptimized,
                        RuntimeVersion, EmissionKind, DWOId, SplitDebugInlining,
                        DebugInfoForProfiling, NameTableKind, RangesBaseAddress,
diff --git a/llvm/lib/MC/MCAsmBackend.cpp b/llvm/lib/MC/MCAsmBackend.cpp
index 7989dd57907ca..8bf0e76b08594 100644
--- a/llvm/lib/MC/MCAsmBackend.cpp
+++ b/llvm/lib/MC/MCAsmBackend.cpp
@@ -97,7 +97,7 @@ const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"FK_SecRel_8", 0, 64, 0},
   };
 
-  assert((size_t)Kind <= array_lengthof(Builtins) && "Unknown fixup kind");
+  assert((size_t)Kind <= size(Builtins) && "Unknown fixup kind");
   return Builtins[Kind];
 }
 
diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index 2cb5a000f88a7..cd6b5325caac5 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -306,7 +306,7 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
       0, // length of DW_LNS_set_epilogue_begin
       1  // DW_LNS_set_isa
   };
-  assert(array_lengthof(StandardOpcodeLengths) >=
+  assert(size(StandardOpcodeLengths) >=
          (Params.DWARF2LineOpcodeBase - 1U));
   return Emit(
       MCOS, Params,
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 42e257516f4e0..96e1fb137aa8c 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -2288,7 +2288,7 @@ void MachOObjectFile::getRelocationTypeName(
         "ARM64_RELOC_ADDEND"
       };
 
-      if (RType >= array_lengthof(Table))
+      if (RType >= size(Table))
         res = "Unknown";
       else
         res = Table[RType];
diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp
index 908e56319025d..f9185a5b5e8bb 100644
--- a/llvm/lib/Support/ARMAttributeParser.cpp
+++ b/llvm/lib/Support/ARMAttributeParser.cpp
@@ -211,7 +211,7 @@ Error ARMAttributeParser::ABI_align_needed(AttrType tag) {
   uint64_t value = de.getULEB128(cursor);
 
   std::string description;
-  if (value < array_lengthof(strings))
+  if (value < size(strings))
     description = strings[value];
   else if (value <= 12)
     description = "8-byte alignment, " + utostr(1ULL << value) +
@@ -230,7 +230,7 @@ Error ARMAttributeParser::ABI_align_preserved(AttrType tag) {
   uint64_t value = de.getULEB128(cursor);
 
   std::string description;
-  if (value < array_lengthof(strings))
+  if (value < size(strings))
     description = std::string(strings[value]);
   else if (value <= 12)
     description = std::string("8-byte stack alignment, ") +
@@ -382,7 +382,7 @@ Error ARMAttributeParser::nodefaults(AttrType tag) {
 
 Error ARMAttributeParser::handler(uint64_t tag, bool &handled) {
   handled = false;
-  for (unsigned AHI = 0, AHE = array_lengthof(displayRoutines); AHI != AHE;
+  for (unsigned AHI = 0, AHE = size(displayRoutines); AHI != AHE;
        ++AHI) {
     if (uint64_t(displayRoutines[AHI].attribute) == tag) {
       if (Error e =
diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp
index 2ee3074b840e4..b55e8a548d0bb 100644
--- a/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -348,7 +348,7 @@ static void uninstallExceptionOrSignalHandlers() {
 
 static const int Signals[] =
     { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
-static const unsigned NumSignals = array_lengthof(Signals);
+static const unsigned NumSignals = size(Signals);
 static struct sigaction PrevActions[NumSignals];
 
 static void CrashRecoverySignalHandler(int Signal) {
diff --git a/llvm/lib/Support/NativeFormatting.cpp b/llvm/lib/Support/NativeFormatting.cpp
index 0a797046bb684..fc7199ee9df12 100644
--- a/llvm/lib/Support/NativeFormatting.cpp
+++ b/llvm/lib/Support/NativeFormatting.cpp
@@ -145,7 +145,7 @@ void llvm::write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style,
       std::max(static_cast<unsigned>(W), std::max(1u, Nibbles) + PrefixChars);
 
   char NumberBuffer[kMaxWidth];
-  ::memset(NumberBuffer, '0', llvm::array_lengthof(NumberBuffer));
+  ::memset(NumberBuffer, '0', llvm::size(NumberBuffer));
   if (Prefix)
     NumberBuffer[1] = 'x';
   char *EndPtr = NumberBuffer + NumChars;
diff --git a/llvm/lib/Support/RISCVAttributeParser.cpp b/llvm/lib/Support/RISCVAttributeParser.cpp
index 393861c73a4a9..7629719a55e41 100644
--- a/llvm/lib/Support/RISCVAttributeParser.cpp
+++ b/llvm/lib/Support/RISCVAttributeParser.cpp
@@ -53,7 +53,7 @@ Error RISCVAttributeParser::stackAlign(unsigned tag) {
 
 Error RISCVAttributeParser::handler(uint64_t tag, bool &handled) {
   handled = false;
-  for (unsigned AHI = 0, AHE = array_lengthof(displayRoutines); AHI != AHE;
+  for (unsigned AHI = 0, AHE = size(displayRoutines); AHI != AHE;
        ++AHI) {
     if (uint64_t(displayRoutines[AHI].attribute) == tag) {
       if (Error e = (this->*displayRoutines[AHI].routine)(tag))
diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp
index 20dea8c302a5e..925ff1be369fc 100644
--- a/llvm/lib/Support/Triple.cpp
+++ b/llvm/lib/Support/Triple.cpp
@@ -898,13 +898,13 @@ std::string Triple::normalize(StringRef Str) {
   // If they are not there already, permute the components into their canonical
   // positions by seeing if they parse as a valid architecture, and if so moving
   // the component to the architecture position etc.
-  for (unsigned Pos = 0; Pos != array_lengthof(Found); ++Pos) {
+  for (unsigned Pos = 0; Pos != size(Found); ++Pos) {
     if (Found[Pos])
       continue; // Already in the canonical position.
 
     for (unsigned Idx = 0; Idx != Components.size(); ++Idx) {
       // Do not reparse any components that already matched.
-      if (Idx < array_lengthof(Found) && Found[Idx])
+      if (Idx < size(Found) && Found[Idx])
         continue;
 
       // Does this component parse as valid for the target position?
@@ -952,7 +952,7 @@ std::string Triple::normalize(StringRef Str) {
         // components to the right.
         for (unsigned i = Pos; !CurrentComponent.empty(); ++i) {
           // Skip over any fixed components.
-          while (i < array_lengthof(Found) && Found[i])
+          while (i < size(Found) && Found[i])
             ++i;
           // Place the component at the new position, getting the component
           // that was at this position - it will be moved right.
@@ -973,7 +973,7 @@ std::string Triple::normalize(StringRef Str) {
             if (CurrentComponent.empty())
               break;
             // Advance to the next component, skipping any fixed components.
-            while (++i < array_lengthof(Found) && Found[i])
+            while (++i < size(Found) && Found[i])
               ;
           }
           // The last component was pushed off the end - append it.
@@ -981,7 +981,7 @@ std::string Triple::normalize(StringRef Str) {
             Components.push_back(CurrentComponent);
 
           // Advance Idx to the component's new position.
-          while (++Idx < array_lengthof(Found) && Found[Idx])
+          while (++Idx < size(Found) && Found[Idx])
             ;
         } while (Idx < Pos); // Add more until the final position is reached.
       }
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 575e2aab1eab2..29176e312c7d7 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -239,8 +239,8 @@ static const int InfoSigs[] = {
 };
 
 static const size_t NumSigs =
-    array_lengthof(IntSigs) + array_lengthof(KillSigs) +
-    array_lengthof(InfoSigs) + 1 /* SIGPIPE */;
+    size(IntSigs) + size(KillSigs) +
+    size(InfoSigs) + 1 /* SIGPIPE */;
 
 
 static std::atomic<unsigned> NumRegisteredSignals = ATOMIC_VAR_INIT(0);
@@ -298,7 +298,7 @@ static void RegisterHandlers() { // Not signal-safe.
   enum class SignalKind { IsKill, IsInfo };
   auto registerHandler = [&](int Signal, SignalKind Kind) {
     unsigned Index = NumRegisteredSignals.load();
-    assert(Index < array_lengthof(RegisteredSignalInfo) &&
+    assert(Index < size(RegisteredSignalInfo) &&
            "Out of space for signal handlers!");
 
     struct sigaction NewHandler;
@@ -562,13 +562,13 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
 #if defined(HAVE_BACKTRACE)
   // Use backtrace() to output a backtrace on Linux systems with glibc.
   if (!depth)
-    depth = backtrace(StackTrace, static_cast<int>(array_lengthof(StackTrace)));
+    depth = backtrace(StackTrace, static_cast<int>(size(StackTrace)));
 #endif
 #if defined(HAVE__UNWIND_BACKTRACE)
   // Try _Unwind_Backtrace() if backtrace() failed.
   if (!depth)
     depth = unwindBacktrace(StackTrace,
-                        static_cast<int>(array_lengthof(StackTrace)));
+                        static_cast<int>(size(StackTrace)));
 #endif
   if (!depth)
     return;
diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index 5f1a364ea1a8e..544b9157a92a9 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -687,7 +687,7 @@ static bool isReservedName(StringRef path) {
     return true;
 
   // Then compare against the list of ancient reserved names.
-  for (size_t i = 0; i < array_lengthof(sReservedNames); ++i) {
+  for (size_t i = 0; i < size(sReservedNames); ++i) {
     if (path.equals_insensitive(sReservedNames[i]))
       return true;
   }
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index 32186bbe51607..c05b743b334e8 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -242,7 +242,7 @@ static bool printStackTraceWithLLVMSymbolizer(llvm::raw_ostream &OS,
     if (StackFrame.AddrFrame.Offset == 0)
       break;
     StackTrace[Depth++] = (void *)(uintptr_t)StackFrame.AddrPC.Offset;
-    if (Depth >= array_lengthof(StackTrace))
+    if (Depth >= size(StackTrace))
       break;
   }
 
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index 10f9692d217e9..4c3013af6fb44 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -54,7 +54,7 @@ class FeatureBitset {
   }
 
   constexpr FeatureBitset &operator&=(const FeatureBitset &RHS) {
-    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) {
+    for (unsigned I = 0, E = size(Bits); I != E; ++I) {
       // GCC <6.2 crashes if this is written in a single statement.
       uint32_t NewBits = Bits[I] & RHS.Bits[I];
       Bits[I] = NewBits;
@@ -63,7 +63,7 @@ class FeatureBitset {
   }
 
   constexpr FeatureBitset &operator|=(const FeatureBitset &RHS) {
-    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) {
+    for (unsigned I = 0, E = size(Bits); I != E; ++I) {
       // GCC <6.2 crashes if this is written in a single statement.
       uint32_t NewBits = Bits[I] | RHS.Bits[I];
       Bits[I] = NewBits;
@@ -74,7 +74,7 @@ class FeatureBitset {
   // gcc 5.3 miscompiles this if we try to write this using operator&=.
   constexpr FeatureBitset operator&(const FeatureBitset &RHS) const {
     FeatureBitset Result;
-    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I)
+    for (unsigned I = 0, E = size(Bits); I != E; ++I)
       Result.Bits[I] = Bits[I] & RHS.Bits[I];
     return Result;
   }
@@ -82,20 +82,20 @@ class FeatureBitset {
   // gcc 5.3 miscompiles this if we try to write this using operator&=.
   constexpr FeatureBitset operator|(const FeatureBitset &RHS) const {
     FeatureBitset Result;
-    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I)
+    for (unsigned I = 0, E = size(Bits); I != E; ++I)
       Result.Bits[I] = Bits[I] | RHS.Bits[I];
     return Result;
   }
 
   constexpr FeatureBitset operator~() const {
     FeatureBitset Result;
-    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I)
+    for (unsigned I = 0, E = size(Bits); I != E; ++I)
       Result.Bits[I] = ~Bits[I];
     return Result;
   }
 
   constexpr bool operator!=(const FeatureBitset &RHS) const {
-    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I)
+    for (unsigned I = 0, E = size(Bits); I != E; ++I)
       if (Bits[I] != RHS.Bits[I])
         return true;
     return false;
@@ -690,7 +690,7 @@ unsigned llvm::X86::getFeaturePriority(ProcessorFeatures Feat) {
 #include "llvm/Support/X86TargetParser.def"
       std::numeric_limits<unsigned>::max() // Need to consume last comma.
   };
-  std::array<unsigned, array_lengthof(Priorities) - 1> HelperList;
+  std::array<unsigned, size(Priorities) - 1> HelperList;
   std::iota(HelperList.begin(), HelperList.end(), 0);
   assert(std::is_permutation(HelperList.begin(), HelperList.end(),
                              std::begin(Priorities),
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index e4b747b68beaa..96e7466e4e4d0 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -480,12 +480,12 @@ static raw_ostream &write_padding(raw_ostream &OS, unsigned NumChars) {
                                C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C};
 
   // Usually the indentation is small, handle it with a fastpath.
-  if (NumChars < array_lengthof(Chars))
+  if (NumChars < size(Chars))
     return OS.write(Chars, NumChars);
 
   while (NumChars) {
     unsigned NumToWrite = std::min(NumChars,
-                                   (unsigned)array_lengthof(Chars)-1);
+                                   (unsigned)size(Chars)-1);
     OS.write(Chars, NumToWrite);
     NumChars -= NumToWrite;
   }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cce714ea918e8..13dfea3edaf43 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5640,7 +5640,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
   static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
                                           AArch64::X3, AArch64::X4, AArch64::X5,
                                           AArch64::X6, AArch64::X7 };
-  static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
+  static const unsigned NumGPRArgRegs = size(GPRArgRegs);
   unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
 
   unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
@@ -5676,7 +5676,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
     static const MCPhysReg FPRArgRegs[] = {
         AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
         AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
-    static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
+    static const unsigned NumFPRArgRegs = size(FPRArgRegs);
     unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
 
     unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index ee0870d9ef7ae..462176155b7b6 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -706,7 +706,7 @@ static const LdStNInstrDesc LdStNInstInfo[] = {
 
 static const LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
   unsigned Idx;
-  for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx)
+  for (Idx = 0; Idx != size(LdStNInstInfo); ++Idx)
     if (LdStNInstInfo[Idx].Opcode == Opcode)
       return &LdStNInstInfo[Idx];
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index aa7c7ff2e388d..5c7f2b5aee29b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -347,7 +347,7 @@ const UnmangledFuncInfo UnmangledFuncInfo::Table[] = {
 };
 
 const unsigned UnmangledFuncInfo::TableSize =
-    array_lengthof(UnmangledFuncInfo::Table);
+    size(UnmangledFuncInfo::Table);
 
 static AMDGPULibFunc::Param getRetType(AMDGPULibFunc::EFuncId id,
                                        const AMDGPULibFunc::Param (&Leads)[2]) {
@@ -555,7 +555,7 @@ static AMDGPULibFunc::ENamePrefix parseNamePrefix(StringRef& mangledName) {
 }
 
 StringMap<int> ManglingRule::buildManglingRulesMap() {
-  StringMap<int> Map(array_lengthof(manglingRules));
+  StringMap<int> Map(size(manglingRules));
   int Id = 0;
   for (auto Rule : manglingRules)
     Map.insert({Rule.Name, Id++});
diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
index c329bae50f92e..ba86df621fa0b 100644
--- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -29,7 +29,7 @@ unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) {
     R600::sub12, R600::sub13, R600::sub14, R600::sub15
   };
 
-  assert(Channel < array_lengthof(SubRegFromChannelTable));
+  assert(Channel < size(SubRegFromChannelTable));
   return SubRegFromChannelTable[Channel];
 }
 
diff --git a/llvm/lib/Target/ARC/ARCISelLowering.cpp b/llvm/lib/Target/ARC/ARCISelLowering.cpp
index 7fd08f70ea3bb..ce57140d8dbd3 100644
--- a/llvm/lib/Target/ARC/ARCISelLowering.cpp
+++ b/llvm/lib/Target/ARC/ARCISelLowering.cpp
@@ -560,18 +560,18 @@ SDValue ARCTargetLowering::LowerCallArguments(
                                         ARC::R4, ARC::R5, ARC::R6, ARC::R7};
     auto *AFI = MF.getInfo<ARCFunctionInfo>();
     unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs);
-    if (FirstVAReg < array_lengthof(ArgRegs)) {
+    if (FirstVAReg < size(ArgRegs)) {
       int Offset = 0;
       // Save remaining registers, storing higher register numbers at a higher
       // address
-      // There are (array_lengthof(ArgRegs) - FirstVAReg) registers which
+      // There are (size(ArgRegs) - FirstVAReg) registers which
       // need to be saved.
       int VarFI =
-          MFI.CreateFixedObject((array_lengthof(ArgRegs) - FirstVAReg) * 4,
+          MFI.CreateFixedObject((size(ArgRegs) - FirstVAReg) * 4,
                                 CCInfo.getNextStackOffset(), true);
       AFI->setVarArgsFrameIndex(VarFI);
       SDValue FIN = DAG.getFrameIndex(VarFI, MVT::i32);
-      for (unsigned i = FirstVAReg; i < array_lengthof(ArgRegs); i++) {
+      for (unsigned i = FirstVAReg; i < size(ArgRegs); i++) {
         // Move argument from phys reg -> virt reg
         unsigned VReg = RegInfo.createVirtualRegister(&ARC::GPR32RegClass);
         RegInfo.addLiveIn(ArgRegs[i], VReg);
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index cde7158803765..d8343e070ccb2 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -113,7 +113,7 @@ static const ARM_MLxEntry ARM_MLxTable[] = {
 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
   : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
     Subtarget(STI) {
-  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
+  for (unsigned i = 0, e = size(ARM_MLxTable); i != e; ++i) {
     if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
       llvm_unreachable("Duplicated entries?");
     MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
@@ -2466,7 +2466,7 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
 };
 
 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
-  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
+  for (unsigned i = 0, e = size(AddSubFlagsOpcodeMap); i != e; ++i)
     if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
       return AddSubFlagsOpcodeMap[i].MachineOpc;
   return 0;
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 1f2f6f7497e02..adfdfb8a3404f 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -2367,7 +2367,7 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
 const TargetFrameLowering::SpillSlot *
 ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
   static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
-  NumEntries = array_lengthof(FixedSpillOffsets);
+  NumEntries = size(FixedSpillOffsets);
   return FixedSpillOffsets;
 }
 
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index fe4e6b24367a3..c0ce1fca9b157 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -4465,7 +4465,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
   int lastInsIndex = -1;
   if (isVarArg && MFI.hasVAStart()) {
     unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
-    if (RegIdx != array_lengthof(GPRArgRegs))
+    if (RegIdx != size(GPRArgRegs))
       ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
   }
 
diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index 1cc5422523f17..98a5ae28efd85 100644
--- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -244,7 +244,7 @@ INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false,
 Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
     : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
   OptimizeSize = MinimizeSize = false;
-  for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
+  for (unsigned i = 0, e = size(ReduceTable); i != e; ++i) {
     unsigned FromOpc = ReduceTable[i].WideOpc;
     if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
       llvm_unreachable("Duplicated entries?");
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp
index a58fedf6cd364..3ab79a16b5ab3 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -1033,7 +1033,7 @@ static const MCPhysReg RegList16[] = {
     AVR::R16R15, AVR::R15R14, AVR::R14R13, AVR::R13R12, AVR::R12R11,
     AVR::R11R10, AVR::R10R9,  AVR::R9R8};
 
-static_assert(array_lengthof(RegList8) == array_lengthof(RegList16),
+static_assert(size(RegList8) == size(RegList16),
               "8-bit and 16-bit register arrays must be of equal length");
 
 /// Analyze incoming and outgoing function arguments. We need custom C++ code
@@ -1074,7 +1074,7 @@ analyzeArguments(TargetLowering::CallLoweringInfo *CLI, const Function *F,
     unsigned RegIdx = RegLastIdx + TotalBytes;
     RegLastIdx = RegIdx;
     // If there are not enough registers, use the stack
-    if (RegIdx >= array_lengthof(RegList8)) {
+    if (RegIdx >= size(RegList8)) {
       UseStack = true;
     }
     for (; i != j; ++i) {
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 3c742c98077bc..5944ca48e6e26 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -675,7 +675,7 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
     /* 28 */  0,          0,          UTIMERLO,   UTIMERHI
   };
 
-  if (RegNo >= array_lengthof(CtrlRegDecoderTable))
+  if (RegNo >= size(CtrlRegDecoderTable))
     return MCDisassembler::Fail;
 
   static_assert(NoRegister == 0, "Expecting NoRegister to be 0");
@@ -703,7 +703,7 @@ static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
     /* 28 */  0,          0,          UTIMER,     0
   };
 
-  if (RegNo >= array_lengthof(CtrlReg64DecoderTable))
+  if (RegNo >= size(CtrlReg64DecoderTable))
     return MCDisassembler::Fail;
 
   static_assert(NoRegister == 0, "Expecting NoRegister to be 0");
@@ -855,7 +855,7 @@ static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo,
     /* 28 */ GPMUCNT2,  GPMUCNT3,   G30,        G31
   };
 
-  if (RegNo >= array_lengthof(GuestRegDecoderTable))
+  if (RegNo >= size(GuestRegDecoderTable))
     return MCDisassembler::Fail;
   if (GuestRegDecoderTable[RegNo] == Hexagon::NoRegister)
     return MCDisassembler::Fail;
@@ -881,7 +881,7 @@ static DecodeStatus DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
     /* 28 */ G29_28,    0,          G31_30,     0
   };
 
-  if (RegNo >= array_lengthof(GuestReg64DecoderTable))
+  if (RegNo >= size(GuestReg64DecoderTable))
     return MCDisassembler::Fail;
   if (GuestReg64DecoderTable[RegNo] == Hexagon::NoRegister)
     return MCDisassembler::Fail;
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index 4ffd31b670e40..036e4cfcd1473 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -97,7 +97,7 @@ class HexagonFrameLowering : public TargetFrameLowering {
       { Hexagon::R25, -36 }, { Hexagon::R24, -40 }, { Hexagon::D12, -40 },
       { Hexagon::R27, -44 }, { Hexagon::R26, -48 }, { Hexagon::D13, -48 }
     };
-    NumEntries = array_lengthof(Offsets);
+    NumEntries = size(Offsets);
     return Offsets;
   }
 
diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 43afae441457a..2bbccfe766254 100644
--- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -998,8 +998,8 @@ bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
 
   static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 };
   static const unsigned Regs1[]  = { LC1, SA1 };
-  auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01))
-                                 : makeArrayRef(Regs1, array_lengthof(Regs1));
+  auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, size(Regs01))
+                                 : makeArrayRef(Regs1, size(Regs1));
   for (unsigned R : CheckRegs)
     if (MI->modifiesRegister(R, TRI))
       return true;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 161768b8dc226..0b23ba2944327 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -304,7 +304,7 @@ bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) {
     SDNode *S = StoreInstrForLoadIntrinsic(L, C);
     SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
     SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
-    ReplaceUses(F, T, array_lengthof(T));
+    ReplaceUses(F, T, size(T));
     // This transformation will leave the intrinsic dead. If it remains in
     // the DAG, the selection code will see it again, but without the load,
     // and it will generate a store that is normally required for it.
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index d7ca934a23e64..8012c03bfc258 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -144,7 +144,7 @@ static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
     Hexagon::R0, Hexagon::R1, Hexagon::R2,
     Hexagon::R3, Hexagon::R4, Hexagon::R5
   };
-  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+  const unsigned NumArgRegs = size(ArgRegs);
   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
 
   // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index f8ac35aed7c03..26054d066616d 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -643,7 +643,7 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
           Hexagon::fixup_Hexagon_GPREL16_0, Hexagon::fixup_Hexagon_GPREL16_1,
           Hexagon::fixup_Hexagon_GPREL16_2, Hexagon::fixup_Hexagon_GPREL16_3
         };
-        assert(Shift < array_lengthof(GPRelFixups));
+        assert(Shift < size(GPRelFixups));
         auto UsesGP = [] (const MCInstrDesc &D) {
           for (const MCPhysReg *U = D.getImplicitUses(); U && *U; ++U)
             if (*U == Hexagon::GP)
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
index 953916776c572..144c004dbf90a 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -81,7 +81,7 @@ class MSP430AsmBackend : public MCAsmBackend {
       {"fixup_8",             0,  8, 0},
       {"fixup_sym_diff",      0, 32, 0},
     };
-    static_assert((array_lengthof(Infos)) == MSP430::NumTargetFixupKinds,
+    static_assert((size(Infos)) == MSP430::NumTargetFixupKinds,
                   "Not all fixup kinds added to Infos array");
   
     if (Kind < FirstTargetFixupKind)
diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index aebfc6b0ae2ea..b6382a51a0943 100644
--- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -458,12 +458,12 @@ static void AnalyzeArguments(CCState &State,
   static const MCPhysReg CRegList[] = {
     MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15
   };
-  static const unsigned CNbRegs = array_lengthof(CRegList);
+  static const unsigned CNbRegs = size(CRegList);
   static const MCPhysReg BuiltinRegList[] = {
     MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
     MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15
   };
-  static const unsigned BuiltinNbRegs = array_lengthof(BuiltinRegList);
+  static const unsigned BuiltinNbRegs = size(BuiltinRegList);
 
   ArrayRef<MCPhysReg> RegList;
   unsigned NbRegs;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index a3dbe6f84a1e3..72578974cbe81 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -420,7 +420,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
     { "fixup_Mips_JALR",                 0,     32,   0 },
     { "fixup_MICROMIPS_JALR",            0,     32,   0 }
   };
-  static_assert(array_lengthof(LittleEndianInfos) == Mips::NumTargetFixupKinds,
+  static_assert(size(LittleEndianInfos) == Mips::NumTargetFixupKinds,
                 "Not all MIPS little endian fixup kinds added!");
 
   const static MCFixupKindInfo BigEndianInfos[] = {
@@ -499,7 +499,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
     { "fixup_Mips_JALR",                  0,     32,   0 },
     { "fixup_MICROMIPS_JALR",             0,     32,   0 }
   };
-  static_assert(array_lengthof(BigEndianInfos) == Mips::NumTargetFixupKinds,
+  static_assert(size(BigEndianInfos) == Mips::NumTargetFixupKinds,
                 "Not all MIPS big endian fixup kinds added!");
 
   if (Kind < FirstTargetFixupKind)
diff --git a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 78ffe00c020c0..1e98b8ac54a14 100644
--- a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -246,7 +246,7 @@ bool Mips16TargetLowering::isEligibleForTailCallOptimization(
 }
 
 void Mips16TargetLowering::setMips16HardFloatLibCalls() {
-  for (unsigned I = 0; I != array_lengthof(HardFloatLibCalls); ++I) {
+  for (unsigned I = 0; I != size(HardFloatLibCalls); ++I) {
     assert((I == 0 || HardFloatLibCalls[I - 1] < HardFloatLibCalls[I]) &&
            "Array not sorted!");
     if (HardFloatLibCalls[I].Libcall != RTLIB::UNKNOWN_LIBCALL)
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
index 77cdf5c939dcc..c90bd04370c67 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
@@ -37,7 +37,7 @@ static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
-  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+  const unsigned NumArgRegs = size(ArgRegs);
 
   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
 
@@ -62,7 +62,7 @@ static bool CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
-  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+  const unsigned NumArgRegs = size(ArgRegs);
 
   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
   int RegsLeft = NumArgRegs - RegNum;
@@ -88,7 +88,7 @@ static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
     PPC::F8
   };
 
-  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+  const unsigned NumArgRegs = size(ArgRegs);
 
   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
 
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 65c969c196e1d..9a04a201d2e68 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -228,23 +228,23 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
 
   if (Subtarget.is64BitELFABI()) {
-    NumEntries = array_lengthof(ELFOffsets64);
+    NumEntries = size(ELFOffsets64);
     return ELFOffsets64;
   }
 
   if (Subtarget.is32BitELFABI()) {
-    NumEntries = array_lengthof(ELFOffsets32);
+    NumEntries = size(ELFOffsets32);
     return ELFOffsets32;
   }
 
   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
 
   if (Subtarget.isPPC64()) {
-    NumEntries = array_lengthof(AIXOffsets64);
+    NumEntries = size(AIXOffsets64);
     return AIXOffsets64;
   }
 
-  NumEntries = array_lengthof(AIXOffsets32);
+  NumEntries = size(AIXOffsets32);
   return AIXOffsets32;
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 25cc34badda04..650b9e9774f49 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4149,13 +4149,13 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
     };
-    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
+    const unsigned NumGPArgRegs = size(GPArgRegs);
 
     static const MCPhysReg FPArgRegs[] = {
       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
       PPC::F8
     };
-    unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
+    unsigned NumFPArgRegs = size(FPArgRegs);
 
     if (useSoftFloat() || hasSPE())
        NumFPArgRegs = 0;
@@ -4267,9 +4267,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
 
-  const unsigned Num_GPR_Regs = array_lengthof(GPR);
+  const unsigned Num_GPR_Regs = size(GPR);
   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
-  const unsigned Num_VR_Regs  = array_lengthof(VR);
+  const unsigned Num_VR_Regs  = size(VR);
 
   // Do a first pass over the arguments to determine whether the ABI
   // guarantees that our caller has allocated the parameter save area
@@ -4724,9 +4724,9 @@ needStackSlotPassParameters(const PPCSubtarget &Subtarget,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
 
-  const unsigned NumGPRs = array_lengthof(GPR);
+  const unsigned NumGPRs = size(GPR);
   const unsigned NumFPRs = 13;
-  const unsigned NumVRs = array_lengthof(VR);
+  const unsigned NumVRs = size(VR);
   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
 
   unsigned NumBytes = LinkageSize;
@@ -5977,9 +5977,9 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
 
-  const unsigned NumGPRs = array_lengthof(GPR);
+  const unsigned NumGPRs = size(GPR);
   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
-  const unsigned NumVRs  = array_lengthof(VR);
+  const unsigned NumVRs  = size(VR);
 
   // On ELFv2, we can avoid allocating the parameter area if all the arguments
   // can be passed to the callee in registers.
@@ -7148,7 +7148,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
 
     static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
                                        PPC::X7, PPC::X8, PPC::X9, PPC::X10};
-    const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
+    const unsigned NumGPArgRegs = size(IsPPC64 ? GPR_64 : GPR_32);
 
     // The fixed integer arguments of a variadic function are stored to the
     // VarArgsFrameIndex on the stack so that they may be loaded by
@@ -9354,7 +9354,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
   };
 
-  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
+  for (unsigned idx = 0; idx < size(SplatCsts); ++idx) {
     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
     int i = SplatCsts[idx];
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index eada872c2a7db..3d91879389705 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -310,7 +310,7 @@ static const uint16_t FMAOpIdxInfo[][6] = {
 // Check if an opcode is a FMA instruction. If it is, return the index in array
 // FMAOpIdxInfo. Otherwise, return -1.
 int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
-  for (unsigned I = 0; I < array_lengthof(FMAOpIdxInfo); I++)
+  for (unsigned I = 0; I < size(FMAOpIdxInfo); I++)
     if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
       return I;
   return -1;
@@ -2331,7 +2331,7 @@ bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI,
 
   bool Found = false;
   for (const MachineOperand &MO : MI.operands()) {
-    for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) {
+    for (unsigned c = 0; c < size(RCs) && !Found; ++c) {
       const TargetRegisterClass *RC = RCs[c];
       if (MO.isReg()) {
         if (MO.isDef() && RC->contains(MO.getReg())) {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 514789b3f6459..4305d1716b156 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -94,7 +94,7 @@ RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"fixup_riscv_set_6b", 2, 6, 0},
       {"fixup_riscv_sub_6b", 2, 6, 0},
   };
-  static_assert((array_lengthof(Infos)) == RISCV::NumTargetFixupKinds,
+  static_assert((size(Infos)) == RISCV::NumTargetFixupKinds,
                 "Not all fixup kinds added to Infos array");
 
   // Fixup kinds from .reloc directive are like R_RISCV_NONE. They
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a01c76d587966..b96dae7f3c950 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8786,7 +8786,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
   }
 
   // FPR16, FPR32, and FPR64 alias each other.
-  if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
+  if (State.getFirstUnallocated(ArgFPR32s) == size(ArgFPR32s)) {
     UseGPRForF16_F32 = true;
     UseGPRForF64 = true;
   }
@@ -8815,7 +8815,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
     // Skip 'odd' register if necessary.
-    if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
+    if (RegIdx != size(ArgGPRs) && RegIdx % 2 == 1)
       State.AllocateReg(ArgGPRs);
   }
 
diff --git a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 142124a8e0d9d..a7d0029afc44a 100644
--- a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -235,7 +235,7 @@ static DecodeStatus DecodeASRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
 static DecodeStatus DecodePRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
                                                uint64_t Address,
                                                const void *Decoder) {
-  if (RegNo >= array_lengthof(PRRegDecoderTable))
+  if (RegNo >= size(PRRegDecoderTable))
     return MCDisassembler::Fail;
   Inst.addOperand(MCOperand::createReg(PRRegDecoderTable[RegNo]));
   return MCDisassembler::Success;
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index ccc7d0737f531..f33d5cda8c8c1 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -219,7 +219,7 @@ SystemZELFFrameLowering::SystemZELFFrameLowering()
   // Create a mapping from register number to save slot offset.
   // These offsets are relative to the start of the register save area.
   RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
-  for (unsigned I = 0, E = array_lengthof(ELFSpillOffsetTable); I != E; ++I)
+  for (unsigned I = 0, E = size(ELFSpillOffsetTable); I != E; ++I)
     RegSpillOffsets[ELFSpillOffsetTable[I].Reg] = ELFSpillOffsetTable[I].Offset;
 }
 
@@ -824,7 +824,7 @@ SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering()
   // Create a mapping from register number to save slot offset.
   // These offsets are relative to the start of the local are area.
   RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
-  for (unsigned I = 0, E = array_lengthof(XPLINKSpillOffsetTable); I != E; ++I)
+  for (unsigned I = 0, E = size(XPLINKSpillOffsetTable); I != E; ++I)
     RegSpillOffsets[XPLINKSpillOffsetTable[I].Reg] =
         XPLINKSpillOffsetTable[I].Offset;
 }
diff --git a/llvm/lib/Target/VE/VEFrameLowering.h b/llvm/lib/Target/VE/VEFrameLowering.h
index 99eb41189b250..71ce75b8eafb0 100644
--- a/llvm/lib/Target/VE/VEFrameLowering.h
+++ b/llvm/lib/Target/VE/VEFrameLowering.h
@@ -62,7 +62,7 @@ class VEFrameLowering : public TargetFrameLowering {
         {VE::SX25, 104}, {VE::SX26, 112}, {VE::SX27, 120}, {VE::SX28, 128},
         {VE::SX29, 136}, {VE::SX30, 144}, {VE::SX31, 152}, {VE::SX32, 160},
         {VE::SX33, 168}};
-    NumEntries = array_lengthof(Offsets);
+    NumEntries = size(Offsets);
     return Offsets;
   }
 
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e9ecff3bf514d..6a0d36bca1b77 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -4408,7 +4408,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
     }
   }
 
-  for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
+  for (unsigned I = 0, E = size(Match); I != E; ++I) {
     Tmp.back() = Suffixes[I];
     if (MemOp && HasVectorReg)
       MemOp->Mem.Size = MemSize[I];
@@ -4454,7 +4454,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
   if (NumSuccessfulMatches > 1) {
     char MatchChars[4];
     unsigned NumMatches = 0;
-    for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
+    for (unsigned I = 0, E = size(Match); I != E; ++I)
       if (Match[I] == Match_Success)
         MatchChars[NumMatches++] = Suffixes[I];
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ba606d7a80edb..fcf0fc1b74d1d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12262,7 +12262,7 @@ static int canLowerByDroppingElements(ArrayRef<int> Mask, bool MatchEven,
       continue;
 
     bool IsAnyViable = false;
-    for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
+    for (unsigned j = 0; j != size(ViableForN); ++j)
       if (ViableForN[j]) {
         uint64_t N = j + 1;
 
@@ -12277,7 +12277,7 @@ static int canLowerByDroppingElements(ArrayRef<int> Mask, bool MatchEven,
       break;
   }
 
-  for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
+  for (unsigned j = 0; j != size(ViableForN); ++j)
     if (ViableForN[j])
       return j + 1;
 
diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 7c86262269fcd..47634634cb0a0 100644
--- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1347,11 +1347,11 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
     };
     XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
     unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs);
-    if (FirstVAReg < array_lengthof(ArgRegs)) {
+    if (FirstVAReg < size(ArgRegs)) {
       int offset = 0;
       // Save remaining registers, storing higher register numbers at a higher
       // address
-      for (int i = array_lengthof(ArgRegs) - 1; i >= (int)FirstVAReg; --i) {
+      for (int i = size(ArgRegs) - 1; i >= (int)FirstVAReg; --i) {
         // Create a stack slot
         int FI = MFI.CreateFixedObject(4, offset, true);
         if (i == (int)FirstVAReg) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 0598f751febe2..db6df996dfe07 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -576,7 +576,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
     }
   }
 
-  assert((NextTmpIdx <= array_lengthof(TmpResult) + 1) &&
+  assert((NextTmpIdx <= size(TmpResult) + 1) &&
          "out-of-bound access");
 
   Value *Result;
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 654f0d2a03a89..b1f515dc232dd 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4254,7 +4254,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
       ImmMapTy::const_iterator OtherImms[] = {
           Imms.begin(), std::prev(Imms.end()),
          Imms.lower_bound(Avg)};
-      for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
+      for (size_t i = 0, e = size(OtherImms); i != e; ++i) {
         ImmMapTy::const_iterator M = OtherImms[i];
         if (M == J || M == JE) continue;
 
diff --git a/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 9fba2f3f86b58..679bc9e386472 100644
--- a/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -87,7 +87,7 @@ struct Renamer {
   Renamer(unsigned int seed) { prng.srand(seed); }
 
   const char *newName() {
-    return metaNames[prng.rand() % array_lengthof(metaNames)];
+    return metaNames[prng.rand() % size(metaNames)];
   }
 
   PRNG prng;
diff --git a/llvm/tools/llvm-config/llvm-config.cpp b/llvm/tools/llvm-config/llvm-config.cpp
index 8ed88f33ead46..a663b4c45951f 100644
--- a/llvm/tools/llvm-config/llvm-config.cpp
+++ b/llvm/tools/llvm-config/llvm-config.cpp
@@ -170,7 +170,7 @@ static std::vector<std::string> ComputeLibsForComponents(
 
   // Build a map of component names to information.
   StringMap<AvailableComponent *> ComponentMap;
-  for (unsigned i = 0; i != array_lengthof(AvailableComponents); ++i) {
+  for (unsigned i = 0; i != size(AvailableComponents); ++i) {
     AvailableComponent *AC = &AvailableComponents[i];
     ComponentMap[AC->Name] = AC;
   }
@@ -540,7 +540,7 @@ int main(int argc, char **argv) {
         /// built, print LLVM_DYLIB_COMPONENTS instead of everything
         /// in the manifest.
         std::vector<std::string> Components;
-        for (unsigned j = 0; j != array_lengthof(AvailableComponents); ++j) {
+        for (unsigned j = 0; j != size(AvailableComponents); ++j) {
           // Only include non-installed components when in a development tree.
           if (!AvailableComponents[j].IsInstalled && !IsInDevelopmentTree)
             continue;
diff --git a/llvm/tools/llvm-objdump/COFFDump.cpp b/llvm/tools/llvm-objdump/COFFDump.cpp
index 32fdd1a4d5c31..c3d5ff0d3f9ee 100644
--- a/llvm/tools/llvm-objdump/COFFDump.cpp
+++ b/llvm/tools/llvm-objdump/COFFDump.cpp
@@ -173,7 +173,7 @@ void COFFDumper::printPEHeader(const PEHeader &Hdr) const {
       "Reserved",
   };
   outs() << "\nThe Data Directory\n";
-  for (uint32_t I = 0; I != array_lengthof(DirName); ++I) {
+  for (uint32_t I = 0; I != size(DirName); ++I) {
     uint32_t Addr = 0, Size = 0;
     if (const data_directory *Data = Obj.getDataDirectory(I)) {
       Addr = Data->RelativeVirtualAddress;
diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
index 78be632f21530..691ded4ce943b 100644
--- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -883,8 +883,8 @@ void Decoder::decodeOpcodes(ArrayRef<uint8_t> Opcodes, unsigned Offset,
   bool Terminated = false;
   for (unsigned OI = Offset, OE = Opcodes.size(); !Terminated && OI < OE; ) {
     for (unsigned DI = 0;; ++DI) {
-      if ((isAArch64 && (DI >= array_lengthof(Ring64))) ||
-          (!isAArch64 && (DI >= array_lengthof(Ring)))) {
+      if ((isAArch64 && (DI >= size(Ring64))) ||
+          (!isAArch64 && (DI >= size(Ring)))) {
         SW.startLine() << format("0x%02x                ; Bad opcode!\n",
                                  Opcodes.data()[OI]);
         ++OI;
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index cfb618117d2bf..ae1b1212a6f8e 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -4988,7 +4988,7 @@ template <typename ELFT> static GNUAbiTag getGNUAbiTag(ArrayRef<uint8_t> Desc) {
       "Linux", "Hurd", "Solaris", "FreeBSD", "NetBSD", "Syllable", "NaCl",
   };
   StringRef OSName = "Unknown";
-  if (Words[0] < array_lengthof(OSNames))
+  if (Words[0] < size(OSNames))
     OSName = OSNames[Words[0]];
   uint32_t Major = Words[1], Minor = Words[2], Patch = Words[3];
   std::string str;
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 1683a9d671734..35fa362eca71e 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -937,13 +937,13 @@ TEST(APFloatTest, fromStringSpecials) {
   };
 
   // Convert payload integer to decimal string representation.
-  std::string NaNPayloadDecStrings[array_lengthof(NaNPayloads)];
-  for (size_t I = 0; I < array_lengthof(NaNPayloads); ++I)
+  std::string NaNPayloadDecStrings[size(NaNPayloads)];
+  for (size_t I = 0; I < size(NaNPayloads); ++I)
     NaNPayloadDecStrings[I] = utostr(NaNPayloads[I]);
 
   // Convert payload integer to hexadecimal string representation.
-  std::string NaNPayloadHexStrings[array_lengthof(NaNPayloads)];
-  for (size_t I = 0; I < array_lengthof(NaNPayloads); ++I)
+  std::string NaNPayloadHexStrings[size(NaNPayloads)];
+  for (size_t I = 0; I < size(NaNPayloads); ++I)
     NaNPayloadHexStrings[I] = "0x" + utohexstr(NaNPayloads[I]);
 
   // Fix payloads to expected result.
@@ -967,7 +967,7 @@ TEST(APFloatTest, fromStringSpecials) {
     for (char TypeChar : NaNTypes) {
       bool Signaling = (TypeChar == 's' || TypeChar == 'S');
 
-      for (size_t J = 0; J < array_lengthof(NaNPayloads); ++J) {
+      for (size_t J = 0; J < size(NaNPayloads); ++J) {
         uint64_t Payload = (Signaling && !NaNPayloads[J]) ? SNaNDefaultPayload
                                                           : NaNPayloads[J];
         std::string &PayloadDec = NaNPayloadDecStrings[J];
@@ -2220,7 +2220,7 @@ TEST(APFloatTest, add) {
     { MSmallestNormalized, MSmallestNormalized, "-0x1p-125", APFloat::opOK, APFloat::fcNormal }
   };
 
-  for (size_t i = 0; i < array_lengthof(SpecialCaseTests); ++i) {
+  for (size_t i = 0; i < size(SpecialCaseTests); ++i) {
     APFloat x(SpecialCaseTests[i].x);
     APFloat y(SpecialCaseTests[i].y);
     APFloat::opStatus status = x.add(y, APFloat::rmNearestTiesToEven);
@@ -2460,7 +2460,7 @@ TEST(APFloatTest, subtract) {
     { MSmallestNormalized, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }
   };
 
-  for (size_t i = 0; i < array_lengthof(SpecialCaseTests); ++i) {
+  for (size_t i = 0; i < size(SpecialCaseTests); ++i) {
     APFloat x(SpecialCaseTests[i].x);
     APFloat y(SpecialCaseTests[i].y);
     APFloat::opStatus status = x.subtract(y, APFloat::rmNearestTiesToEven);
@@ -2764,7 +2764,7 @@ TEST(APFloatTest, multiply) {
      APFloat::rmNearestTiesToAway},
   };
 
-  for (size_t i = 0; i < array_lengthof(SpecialCaseTests); ++i) {
+  for (size_t i = 0; i < size(SpecialCaseTests); ++i) {
     APFloat x(SpecialCaseTests[i].x);
     APFloat y(SpecialCaseTests[i].y);
     APFloat::opStatus status = x.multiply(y, SpecialCaseTests[i].roundingMode);
@@ -3046,7 +3046,7 @@ TEST(APFloatTest, divide) {
      APFloat::rmNearestTiesToAway},
   };
 
-  for (size_t i = 0; i < array_lengthof(SpecialCaseTests); ++i) {
+  for (size_t i = 0; i < size(SpecialCaseTests); ++i) {
     APFloat x(SpecialCaseTests[i].x);
     APFloat y(SpecialCaseTests[i].y);
     APFloat::opStatus status = x.divide(y, SpecialCaseTests[i].roundingMode);
@@ -4006,7 +4006,7 @@ TEST(APFloatTest, remainder) {
     { MVal6, MVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
   };
 
-  for (size_t i = 0; i < array_lengthof(SpecialCaseTests); ++i) {
+  for (size_t i = 0; i < size(SpecialCaseTests); ++i) {
     APFloat x(SpecialCaseTests[i].x);
     APFloat y(SpecialCaseTests[i].y);
     APFloat::opStatus status = x.remainder(y);
diff --git a/llvm/unittests/ADT/StringRefTest.cpp b/llvm/unittests/ADT/StringRefTest.cpp
index e80a25a19969c..114c097b3db57 100644
--- a/llvm/unittests/ADT/StringRefTest.cpp
+++ b/llvm/unittests/ADT/StringRefTest.cpp
@@ -659,7 +659,7 @@ TEST(StringRefTest, getAsInteger) {
   uint32_t U32;
   uint64_t U64;
 
-  for (size_t i = 0; i < array_lengthof(Unsigned); ++i) {
+  for (size_t i = 0; i < size(Unsigned); ++i) {
     bool U8Success = StringRef(Unsigned[i].Str).getAsInteger(0, U8);
     if (static_cast<uint8_t>(Unsigned[i].Expected) == Unsigned[i].Expected) {
       ASSERT_FALSE(U8Success);
@@ -691,7 +691,7 @@ TEST(StringRefTest, getAsInteger) {
   int32_t S32;
   int64_t S64;
 
-  for (size_t i = 0; i < array_lengthof(Signed); ++i) {
+  for (size_t i = 0; i < size(Signed); ++i) {
     bool S8Success = StringRef(Signed[i].Str).getAsInteger(0, S8);
     if (static_cast<int8_t>(Signed[i].Expected) == Signed[i].Expected) {
       ASSERT_FALSE(S8Success);
@@ -737,7 +737,7 @@ static const char* BadStrings[] = {
 
 TEST(StringRefTest, getAsUnsignedIntegerBadStrings) {
   unsigned long long U64;
-  for (size_t i = 0; i < array_lengthof(BadStrings); ++i) {
+  for (size_t i = 0; i < size(BadStrings); ++i) {
     bool IsBadNumber = StringRef(BadStrings[i]).getAsInteger(0, U64);
     ASSERT_TRUE(IsBadNumber);
   }
@@ -820,7 +820,7 @@ TEST(StringRefTest, consumeIntegerUnsigned) {
   uint32_t U32;
   uint64_t U64;
 
-  for (size_t i = 0; i < array_lengthof(ConsumeUnsigned); ++i) {
+  for (size_t i = 0; i < size(ConsumeUnsigned); ++i) {
     StringRef Str = ConsumeUnsigned[i].Str;
     bool U8Success = Str.consumeInteger(0, U8);
     if (static_cast<uint8_t>(ConsumeUnsigned[i].Expected) ==
@@ -868,7 +868,7 @@ TEST(StringRefTest, consumeIntegerSigned) {
   int32_t S32;
   int64_t S64;
 
-  for (size_t i = 0; i < array_lengthof(ConsumeSigned); ++i) {
+  for (size_t i = 0; i < size(ConsumeSigned); ++i) {
     StringRef Str = ConsumeSigned[i].Str;
     bool S8Success = Str.consumeInteger(0, S8);
     if (static_cast<int8_t>(ConsumeSigned[i].Expected) ==
@@ -945,7 +945,7 @@ static const char join_result3[] = "a::b::c";
 TEST(StringRefTest, joinStrings) {
   std::vector<StringRef> v1;
   std::vector<std::string> v2;
-  for (size_t i = 0; i < array_lengthof(join_input); ++i) {
+  for (size_t i = 0; i < size(join_input); ++i) {
     v1.push_back(join_input[i]);
     v2.push_back(join_input[i]);
   }
diff --git a/llvm/unittests/ADT/TinyPtrVectorTest.cpp b/llvm/unittests/ADT/TinyPtrVectorTest.cpp
index a78f5b0d32aac..aa6d53b40fa5f 100644
--- a/llvm/unittests/ADT/TinyPtrVectorTest.cpp
+++ b/llvm/unittests/ADT/TinyPtrVectorTest.cpp
@@ -45,7 +45,7 @@ class TinyPtrVectorTest : public testing::Test {
   std::vector<PtrT> TestPtrs;
 
   TinyPtrVectorTest() {
-    for (size_t i = 0, e = array_lengthof(TestValues); i != e; ++i)
+    for (size_t i = 0, e = size(TestValues); i != e; ++i)
       TestPtrs.push_back(PtrT(&TestValues[i]));
 
     std::shuffle(TestPtrs.begin(), TestPtrs.end(), std::mt19937{});
diff --git a/llvm/unittests/ProfileData/CoverageMappingTest.cpp b/llvm/unittests/ProfileData/CoverageMappingTest.cpp
index cc4c953e65351..b9fd4d3979e66 100644
--- a/llvm/unittests/ProfileData/CoverageMappingTest.cpp
+++ b/llvm/unittests/ProfileData/CoverageMappingTest.cpp
@@ -292,7 +292,7 @@ TEST_P(CoverageMappingTest, basic_write_read) {
 
 TEST_P(CoverageMappingTest, correct_deserialize_for_more_than_two_files) {
   const char *FileNames[] = {"bar", "baz", "foo"};
-  static const unsigned N = array_lengthof(FileNames);
+  static const unsigned N = size(FileNames);
 
   startFunction("func", 0x1234);
   for (unsigned I = 0; I < N; ++I)
@@ -321,7 +321,7 @@ TEST_P(CoverageMappingTest, load_coverage_for_more_than_two_files) {
   ProfileWriter.addRecord({"func", 0x1234, {0}}, Err);
 
   const char *FileNames[] = {"bar", "baz", "foo"};
-  static const unsigned N = array_lengthof(FileNames);
+  static const unsigned N = size(FileNames);
 
   startFunction("func", 0x1234);
   for (unsigned I = 0; I < N; ++I)
diff --git a/llvm/unittests/Support/BinaryStreamTest.cpp b/llvm/unittests/Support/BinaryStreamTest.cpp
index f98b7f327682b..f3841b1eac594 100644
--- a/llvm/unittests/Support/BinaryStreamTest.cpp
+++ b/llvm/unittests/Support/BinaryStreamTest.cpp
@@ -103,7 +103,7 @@ class BrokenStream : public WritableBinaryStream {
 };
 
 constexpr endianness Endians[] = {big, little, native};
-constexpr uint32_t NumEndians = llvm::array_lengthof(Endians);
+constexpr uint32_t NumEndians = llvm::size(Endians);
 constexpr uint32_t NumStreams = 2 * NumEndians;
 
 class BinaryStreamTest : public testing::Test {
diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp
index 8032d0e7bf403..695bb7043c5ef 100644
--- a/llvm/unittests/Support/CommandLineTest.cpp
+++ b/llvm/unittests/Support/CommandLineTest.cpp
@@ -345,7 +345,7 @@ TEST(CommandLineTest, AliasesWithArguments) {
     { "-tool", "-alias", "x" }
   };
 
-  for (size_t i = 0, e = array_lengthof(Inputs); i < e; ++i) {
+  for (size_t i = 0, e = size(Inputs); i < e; ++i) {
     StackOption<std::string> Actual("actual");
     StackOption<bool> Extra("extra");
     StackOption<std::string> Input(cl::Positional);
@@ -374,8 +374,8 @@ void testAliasRequired(int argc, const char *const *argv) {
 TEST(CommandLineTest, AliasRequired) {
   const char *opts1[] = { "-tool", "-option=x" };
   const char *opts2[] = { "-tool", "-o", "x" };
-  testAliasRequired(array_lengthof(opts1), opts1);
-  testAliasRequired(array_lengthof(opts2), opts2);
+  testAliasRequired(size(opts1), opts1);
+  testAliasRequired(size(opts2), opts2);
 }
 
 TEST(CommandLineTest, HideUnrelatedOptions) {
@@ -987,8 +987,8 @@ TEST(CommandLineTest, ResponseFileEOLs) {
                                       /*CurrentDir=*/StringRef(TestRoot), FS));
   const char *Expected[] = {"clang", "-Xclang", "-Wno-whatever", nullptr,
                             "input.cpp"};
-  ASSERT_EQ(array_lengthof(Expected), Argv.size());
-  for (size_t I = 0, E = array_lengthof(Expected); I < E; ++I) {
+  ASSERT_EQ(size(Expected), Argv.size());
+  for (size_t I = 0, E = size(Expected); I < E; ++I) {
     if (Expected[I] == nullptr) {
       ASSERT_EQ(Argv[I], nullptr);
     } else {
diff --git a/llvm/unittests/Support/FormatVariadicTest.cpp b/llvm/unittests/Support/FormatVariadicTest.cpp
index 37946165e3651..e905d259dbd86 100644
--- a/llvm/unittests/Support/FormatVariadicTest.cpp
+++ b/llvm/unittests/Support/FormatVariadicTest.cpp
@@ -529,7 +529,7 @@ TEST(FormatVariadicTest, BigTest) {
   std::string S;
   llvm::raw_string_ostream Stream(S);
   Stream << formatv(Intro, std::tuple_size<Tuple>::value,
-                    llvm::array_lengthof(Ts))
+                    llvm::size(Ts))
          << "\n";
   Stream << formatv(Header, "Char", "HexInt", "Str", "Ref", "std::str",
                     "double", "float", "pointer", "comma", "exp", "bigint",
diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp
index a9a8de6d291a3..7165f2d916179 100644
--- a/llvm/unittests/Support/TargetParserTest.cpp
+++ b/llvm/unittests/Support/TargetParserTest.cpp
@@ -733,7 +733,7 @@ TEST(TargetParserTest, ARMArchExtFeature) {
                               {"mve", "nomve", "+mve", "-mve"},
                               {"mve.fp", "nomve.fp", "+mve.fp", "-mve.fp"}};
 
-  for (unsigned i = 0; i < array_lengthof(ArchExt); i++) {
+  for (unsigned i = 0; i < size(ArchExt); i++) {
     EXPECT_EQ(StringRef(ArchExt[i][2]), ARM::getArchExtFeature(ArchExt[i][0]));
     EXPECT_EQ(StringRef(ArchExt[i][3]), ARM::getArchExtFeature(ArchExt[i][1]));
   }
@@ -764,7 +764,7 @@ TEST(TargetParserTest, ARMArchExtDependencies) {
 TEST(TargetParserTest, ARMparseHWDiv) {
   const char *hwdiv[] = {"thumb", "arm", "arm,thumb", "thumb,arm"};
 
-  for (unsigned i = 0; i < array_lengthof(hwdiv); i++)
+  for (unsigned i = 0; i < size(hwdiv); i++)
     EXPECT_NE(ARM::AEK_INVALID, ARM::parseHWDiv((StringRef)hwdiv[i]));
 }
 
@@ -782,7 +782,7 @@ TEST(TargetParserTest, ARMparseArchEndianAndISA) {
       "v8.7a",     "v8.8-a", "v8.8a", "v8-r",   "v8m.base", "v8m.main",
       "v8.1m.main"};
 
-  for (unsigned i = 0; i < array_lengthof(Arch); i++) {
+  for (unsigned i = 0; i < size(Arch); i++) {
     std::string arm_1 = "armeb" + (std::string)(Arch[i]);
     std::string arm_2 = "arm" + (std::string)(Arch[i]) + "eb";
     std::string arm_3 = "arm" + (std::string)(Arch[i]);
@@ -821,7 +821,7 @@ TEST(TargetParserTest, ARMparseArchEndianAndISA) {
 }
 
 TEST(TargetParserTest, ARMparseArchProfile) {
-  for (unsigned i = 0; i < array_lengthof(ARMArch); i++) {
+  for (unsigned i = 0; i < size(ARMArch); i++) {
     switch (ARM::parseArch(ARMArch[i])) {
     case ARM::ArchKind::ARMV6M:
     case ARM::ArchKind::ARMV7M:
@@ -861,7 +861,7 @@ TEST(TargetParserTest, ARMparseArchProfile) {
 }
 
 TEST(TargetParserTest, ARMparseArchVersion) {
-  for (unsigned i = 0; i < array_lengthof(ARMArch); i++)
+  for (unsigned i = 0; i < size(ARMArch); i++)
     if (((std::string)ARMArch[i]).substr(0, 4) == "armv")
       EXPECT_EQ((ARMArch[i][4] - 48u), ARM::parseArchVersion(ARMArch[i]));
     else
@@ -1526,7 +1526,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"pmuv3", "nopmuv3", "+perfmon", "-perfmon"},
   };
 
-  for (unsigned i = 0; i < array_lengthof(ArchExt); i++) {
+  for (unsigned i = 0; i < size(ArchExt); i++) {
     EXPECT_EQ(StringRef(ArchExt[i][2]),
               AArch64::getArchExtFeature(ArchExt[i][0]));
     EXPECT_EQ(StringRef(ArchExt[i][3]),
diff --git a/llvm/utils/KillTheDoctor/KillTheDoctor.cpp b/llvm/utils/KillTheDoctor/KillTheDoctor.cpp
index 358ef165cf63a..be36176ae8ec0 100644
--- a/llvm/utils/KillTheDoctor/KillTheDoctor.cpp
+++ b/llvm/utils/KillTheDoctor/KillTheDoctor.cpp
@@ -207,7 +207,7 @@ static std::error_code GetFileNameFromHandle(HANDLE FileHandle,
   Success = ::GetMappedFileNameA(::GetCurrentProcess(),
                                 MappedFile,
                                 Filename,
-                                array_lengthof(Filename) - 1);
+                                size(Filename) - 1);
 
   if (!Success)
     return windows_error(::GetLastError());
@@ -242,12 +242,12 @@ static std::string FindProgram(const std::string &Program,
     DWORD length = ::SearchPathA(NULL,
                                  Program.c_str(),
                                  Extension,
-                                 array_lengthof(PathName),
+                                 size(PathName),
                                  PathName,
                                  NULL);
     if (length == 0)
       ec = windows_error(::GetLastError());
-    else if (length > array_lengthof(PathName)) {
+    else if (length > size(PathName)) {
       // This may have been the file, return with error.
       ec = windows_error(ERROR_BUFFER_OVERFLOW);
       break;
diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp
index 9283ceeb31e08..36ad66f506ce8 100644
--- a/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -1175,7 +1175,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   O.indent(2) << "  makeArrayRef(OpToPatterns),\n";
   O.indent(2) << "  makeArrayRef(Patterns),\n";
   O.indent(2) << "  makeArrayRef(Conds),\n";
-  O.indent(2) << "  StringRef(AsmStrings, array_lengthof(AsmStrings)),\n";
+  O.indent(2) << "  StringRef(AsmStrings, size(AsmStrings)),\n";
   if (MCOpPredicates.empty())
     O.indent(2) << "  nullptr,\n";
   else
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index 2c1583f7979db..b88f2514ee264 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -477,7 +477,7 @@ static const char *const FixedInstrs[] = {
     nullptr};
 
 unsigned CodeGenTarget::getNumFixedInstructions() {
-  return array_lengthof(FixedInstrs) - 1;
+  return size(FixedInstrs) - 1;
 }
 
 /// Return all of the instructions defined by the target, ordered by
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 1ed7bc103f9ce..93a59f1956672 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -440,7 +440,7 @@ void RegisterInfoEmitter::EmitRegMappingTables(
       OS << "extern const unsigned " << Namespace
          << (j == 0 ? "DwarfFlavour" : "EHFlavour") << I << "Dwarf2LSize";
       if (!isCtor)
-        OS << " = array_lengthof(" << Namespace
+        OS << " = size(" << Namespace
            << (j == 0 ? "DwarfFlavour" : "EHFlavour") << I << "Dwarf2L);\n\n";
       else
         OS << ";\n\n";
@@ -498,7 +498,7 @@ void RegisterInfoEmitter::EmitRegMappingTables(
       OS << "extern const unsigned " << Namespace
          << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i << "L2DwarfSize";
       if (!isCtor)
-        OS << " = array_lengthof(" << Namespace
+        OS << " = size(" << Namespace
            << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i << "L2Dwarf);\n\n";
       else
         OS << ";\n\n";
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index d1a9ecb06a2b0..474dcbdb328f8 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -657,7 +657,7 @@ static const char* stringForDecisionType(ModRMDecisionType dt) {
 }
 
 DisassemblerTables::DisassemblerTables() {
-  for (unsigned i = 0; i < array_lengthof(Tables); i++)
+  for (unsigned i = 0; i < size(Tables); i++)
     Tables[i] = std::make_unique<ContextDecision>();
 
   HasConflicts = false;
diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
index 5431baa0f9f41..3036a7268f141 100644
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
@@ -186,7 +186,7 @@ class Extension<list<I32EnumAttrCase> extensions> : Availability {
     "}; " #
     // The following manual ArrayRef constructor call is to satisfy GCC 5.
     "ArrayRef<::mlir::spirv::Extension> " #
-      "ref(exts, ::llvm::array_lengthof(exts));");
+      "ref(exts, ::llvm::size(exts));");
   let instance = "ref";
 }
 
@@ -228,7 +228,7 @@ class Capability<list<I32EnumAttrCase> capabilities> : Availability {
     "}; " #
     // The following manual ArrayRef constructor call is to satisfy GCC 5.
     "ArrayRef<::mlir::spirv::Capability> " #
-      "ref(caps, ::llvm::array_lengthof(caps));");
+      "ref(caps, ::llvm::size(caps));");
   let instance = "ref";
 }
 
diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVEnums.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVEnums.cpp
index 0562d42cfca43..b5559df49d2f4 100644
--- a/mlir/lib/Dialect/SPIRV/IR/SPIRVEnums.cpp
+++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVEnums.cpp
@@ -59,16 +59,16 @@ ArrayRef<spirv::Extension> spirv::getImpliedExtensions(spirv::Version version) {
   case Version::V_1_3: {
     // The following manual ArrayRef constructor call is to satisfy GCC 5.
     static const Extension exts[] = {V_1_3_IMPLIED_EXTS};
-    return ArrayRef<spirv::Extension>(exts, llvm::array_lengthof(exts));
+    return ArrayRef<spirv::Extension>(exts, llvm::size(exts));
   }
   case Version::V_1_4: {
     static const Extension exts[] = {V_1_3_IMPLIED_EXTS, V_1_4_IMPLIED_EXTS};
-    return ArrayRef<spirv::Extension>(exts, llvm::array_lengthof(exts));
+    return ArrayRef<spirv::Extension>(exts, llvm::size(exts));
   }
   case Version::V_1_5: {
     static const Extension exts[] = {V_1_3_IMPLIED_EXTS, V_1_4_IMPLIED_EXTS,
                                      V_1_5_IMPLIED_EXTS};
-    return ArrayRef<spirv::Extension>(exts, llvm::array_lengthof(exts));
+    return ArrayRef<spirv::Extension>(exts, llvm::size(exts));
   }
   }
 
diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp
index b66f569c352d9..0084b1242d2d4 100644
--- a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp
+++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp
@@ -168,7 +168,7 @@ void CompositeType::getCapabilities(
         auto vecSize = getNumElements();
         if (vecSize == 8 || vecSize == 16) {
           static const Capability caps[] = {Capability::Vector16};
-          ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));
+          ArrayRef<Capability> ref(caps, llvm::size(caps));
           capabilities.push_back(ref);
         }
         return type.getElementType().cast<ScalarType>().getCapabilities(
@@ -242,7 +242,7 @@ void CooperativeMatrixNVType::getExtensions(
     Optional<StorageClass> storage) {
   getElementType().cast<SPIRVType>().getExtensions(extensions, storage);
   static const Extension exts[] = {Extension::SPV_NV_cooperative_matrix};
-  ArrayRef<Extension> ref(exts, llvm::array_lengthof(exts));
+  ArrayRef<Extension> ref(exts, llvm::size(exts));
   extensions.push_back(ref);
 }
 
@@ -251,7 +251,7 @@ void CooperativeMatrixNVType::getCapabilities(
     Optional<StorageClass> storage) {
   getElementType().cast<SPIRVType>().getCapabilities(capabilities, storage);
   static const Capability caps[] = {Capability::CooperativeMatrixNV};
-  ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));
+  ArrayRef<Capability> ref(caps, llvm::size(caps));
   capabilities.push_back(ref);
 }
 
@@ -468,7 +468,7 @@ void RuntimeArrayType::getCapabilities(
     Optional<StorageClass> storage) {
   {
     static const Capability caps[] = {Capability::Shader};
-    ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));
+    ArrayRef<Capability> ref(caps, llvm::size(caps));
     capabilities.push_back(ref);
   }
   getElementType().cast<SPIRVType>().getCapabilities(capabilities, storage);
@@ -517,7 +517,7 @@ void ScalarType::getExtensions(SPIRVType::ExtensionArrayRefVector &extensions,
   case StorageClass::Uniform:
     if (getIntOrFloatBitWidth() == 8) {
       static const Extension exts[] = {Extension::SPV_KHR_8bit_storage};
-      ArrayRef<Extension> ref(exts, llvm::array_lengthof(exts));
+      ArrayRef<Extension> ref(exts, llvm::size(exts));
       extensions.push_back(ref);
     }
     LLVM_FALLTHROUGH;
@@ -525,7 +525,7 @@ void ScalarType::getExtensions(SPIRVType::ExtensionArrayRefVector &extensions,
   case StorageClass::Output:
     if (getIntOrFloatBitWidth() == 16) {
       static const Extension exts[] = {Extension::SPV_KHR_16bit_storage};
-      ArrayRef<Extension> ref(exts, llvm::array_lengthof(exts));
+      ArrayRef<Extension> ref(exts, llvm::size(exts));
       extensions.push_back(ref);
     }
     break;
@@ -547,11 +547,11 @@ void ScalarType::getCapabilities(
   case StorageClass::storage: {                                                \
     if (bitwidth == 8) {                                                       \
       static const Capability caps[] = {Capability::cap8};                     \
-      ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));              \
+      ArrayRef<Capability> ref(caps, llvm::size(caps));              \
       capabilities.push_back(ref);                                             \
     } else if (bitwidth == 16) {                                               \
       static const Capability caps[] = {Capability::cap16};                    \
-      ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));              \
+      ArrayRef<Capability> ref(caps, llvm::size(caps));              \
       capabilities.push_back(ref);                                             \
     }                                                                          \
     /* No requirements for other bitwidths */                                  \
@@ -571,7 +571,7 @@ void ScalarType::getCapabilities(
     case StorageClass::Output: {
       if (bitwidth == 16) {
         static const Capability caps[] = {Capability::StorageInputOutput16};
-        ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));
+        ArrayRef<Capability> ref(caps, llvm::size(caps));
         capabilities.push_back(ref);
       }
       return;
@@ -588,7 +588,7 @@ void ScalarType::getCapabilities(
 #define WIDTH_CASE(type, width)                                                \
   case width: {                                                                \
     static const Capability caps[] = {Capability::type##width};                \
-    ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));                \
+    ArrayRef<Capability> ref(caps, llvm::size(caps));                \
     capabilities.push_back(ref);                                               \
   } break
 
@@ -1147,7 +1147,7 @@ void MatrixType::getCapabilities(
     Optional<StorageClass> storage) {
   {
     static const Capability caps[] = {Capability::Matrix};
-    ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));
+    ArrayRef<Capability> ref(caps, llvm::size(caps));
     capabilities.push_back(ref);
   }
   // Add any capabilities associated with the underlying vectors (i.e., columns)

From d70d9977999b8a08fe5635fb2c58588e6ffb7997 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 26 Jan 2022 09:55:17 -0500
Subject: [PATCH 743/946] [x86] add test for miscompile from wrong min signbits
 ( #53401 ); NFC

---
 llvm/test/CodeGen/X86/vselect-constants.ll | 34 ++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/llvm/test/CodeGen/X86/vselect-constants.ll b/llvm/test/CodeGen/X86/vselect-constants.ll
index 9422b836d39dd..8aba81824d121 100644
--- a/llvm/test/CodeGen/X86/vselect-constants.ll
+++ b/llvm/test/CodeGen/X86/vselect-constants.ll
@@ -273,3 +273,37 @@ BB:
   %smax96 = select <2 x i1> %c0, <2 x i37> %xor_x, <2 x i37> zeroinitializer
   ret <2 x i37> %smax96
 }
+
+; PR53401
+
+define i32 @wrong_min_signbits(<2 x i16> %x) {
+; SSE-LABEL: wrong_min_signbits:
+; SSE:       # %bb.0:
+; SSE-NEXT:    pxor %xmm1, %xmm1
+; SSE-NEXT:    pcmpeqw %xmm0, %xmm1
+; SSE-NEXT:    movdqa {{.*#+}} xmm0 = [1,0,0,0]
+; SSE-NEXT:    pandn %xmm0, %xmm1
+; SSE-NEXT:    psllw $15, %xmm1
+; SSE-NEXT:    psraw $15, %xmm1
+; SSE-NEXT:    movdqa %xmm1, %xmm2
+; SSE-NEXT:    pandn %xmm0, %xmm2
+; SSE-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
+; SSE-NEXT:    por %xmm2, %xmm1
+; SSE-NEXT:    movd %xmm1, %eax
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: wrong_min_signbits:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,0,0,0]
+; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
+; AVX-NEXT:    vmovd %xmm0, %eax
+; AVX-NEXT:    retq
+  %true_true = icmp ne <2 x i16> %x, zeroinitializer
+  %true_false = and <2 x i1> %true_true, <i1 true, i1 false>
+  %sel = select <2 x i1> %true_false, <2 x i16> <i16 2, i16 0>, <2 x i16> <i16 1, i16 0>
+  %t1 = bitcast <2 x i16> %sel to i32
+  ret i32 %t1
+}

From 63daea8b35cdb48d6061dc8ad72e5445b808dbce Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 26 Jan 2022 09:59:51 -0500
Subject: [PATCH 744/946] [SDAG] fix bug in ComputeNumSignBits of target
 constant

The loop below the changed line assumes that the element
width of the target constant is the same as the element
width of the loaded value, but that is not always true.

We could try harder to do some kind of min/max calc even
if the sizes don't match, but that can be another patch
if needed. This fixes #53401 (miscompile) and does not
change the motivating cases added when this analysis
was introduced:
ad298f86b7ad2a
---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 3 ++-
 llvm/test/CodeGen/X86/vselect-constants.ll     | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index e305041f7490d..28363e2b9e88e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4282,7 +4282,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
           // scalar cases.
           Type *CstTy = Cst->getType();
           if (CstTy->isVectorTy() &&
-              (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) {
+              (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits() &&
+              VTBits == CstTy->getScalarSizeInBits()) {
             Tmp = VTBits;
             for (unsigned i = 0; i != NumElts; ++i) {
               if (!DemandedElts[i])
diff --git a/llvm/test/CodeGen/X86/vselect-constants.ll b/llvm/test/CodeGen/X86/vselect-constants.ll
index 8aba81824d121..f95eccae188a9 100644
--- a/llvm/test/CodeGen/X86/vselect-constants.ll
+++ b/llvm/test/CodeGen/X86/vselect-constants.ll
@@ -298,6 +298,8 @@ define i32 @wrong_min_signbits(<2 x i16> %x) {
 ; AVX-NEXT:    vpcmpeqw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [1,0,0,0]
 ; AVX-NEXT:    vpandn %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vpsllw $15, %xmm0, %xmm0
+; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
 ; AVX-NEXT:    vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
 ; AVX-NEXT:    vmovd %xmm0, %eax
 ; AVX-NEXT:    retq

From df597bf000b5c566e0f9218a53927daf9bc60f6d Mon Sep 17 00:00:00 2001
From: dongAxis <dongaxis1944@outlook.com>
Date: Wed, 26 Jan 2022 23:22:09 +0800
Subject: [PATCH 745/946] [NFC][ORC][AArch64] use isInt<N> to replace
 fitsRangeSignedInt on aarch64

Summary:
This is the first path to support more relocation types on aarch64.
 The patch just uses the isInt<N> to replace fitsRangeSignedInt.

Test Plan:
check-all

Differential Revision: https://reviews.llvm.org/D118231
---
 llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
index a228c2ca0b233..dd3eb97c21a0e 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp
@@ -16,6 +16,7 @@
 #include "llvm/BinaryFormat/ELF.h"
 #include "llvm/ExecutionEngine/JITLink/aarch64.h"
 #include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/MathExtras.h"
 
 #define DEBUG_TYPE "jitlink"
 
@@ -51,7 +52,7 @@ class ELFJITLinker_aarch64 : public JITLinker<ELFJITLinker_aarch64> {
       if (static_cast<uint64_t>(Value) & 0x3)
         return make_error<JITLinkError>("Call target is not 32-bit aligned");
 
-      if (!fitsRangeSignedInt<27>(Value))
+      if (!isInt<28>(Value))
         return makeTargetOutOfRangeError(G, B, E);
 
       uint32_t RawInstr = *(little32_t *)FixupPtr;
@@ -65,10 +66,6 @@ class ELFJITLinker_aarch64 : public JITLinker<ELFJITLinker_aarch64> {
     }
     return Error::success();
   }
-
-  template <uint8_t Bits> static bool fitsRangeSignedInt(int64_t Value) {
-    return Value >= -(1ll << Bits) && Value < (1ll << Bits);
-  }
 };
 
 template <typename ELFT>

From 75c22b382f2a7b0bb9499215a3d64e146e3f02cc Mon Sep 17 00:00:00 2001
From: Stanislav Gatev <sgatev@google.com>
Date: Wed, 26 Jan 2022 12:10:38 +0000
Subject: [PATCH 746/946] [clang][dataflow] Add a transfer function for
 CXXBoolLiteralExpr

This is part of the implementation of the dataflow analysis framework.
See "[RFC] A dataflow analysis framework for Clang AST" on cfe-dev.

Reviewed-by: xazax.hun

Differential Revision: https://reviews.llvm.org/D118236
---
 clang/lib/Analysis/FlowSensitive/Transfer.cpp |  7 ++--
 .../Analysis/FlowSensitive/TransferTest.cpp   | 35 +++++++++++++++++++
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Analysis/FlowSensitive/Transfer.cpp b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
index 7c5e063278d78..51a86b727e339 100644
--- a/clang/lib/Analysis/FlowSensitive/Transfer.cpp
+++ b/clang/lib/Analysis/FlowSensitive/Transfer.cpp
@@ -443,8 +443,11 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
     // FIXME: Implement array initialization.
   }
 
-  // FIXME: Add support for:
-  // - CXXBoolLiteralExpr
+  void VisitCXXBoolLiteralExpr(const CXXBoolLiteralExpr *S) {
+    auto &Loc = Env.createStorageLocation(*S);
+    Env.setStorageLocation(*S, Loc);
+    Env.setValue(Loc, Env.getBoolLiteralValue(S->getValue()));
+  }
 
 private:
   Environment &Env;
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index dc365d5bfe75f..30238a40fec59 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -1970,4 +1970,39 @@ TEST_F(TransferTest, AssignToUnionMember) {
               });
 }
 
+TEST_F(TransferTest, AssignFromBoolLiteral) {
+  std::string Code = R"(
+    void target() {
+      bool Foo = true;
+      bool Bar = false;
+      // [[p]]
+    }
+  )";
+  runDataflow(
+      Code, [](llvm::ArrayRef<
+                   std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
+                   Results,
+               ASTContext &ASTCtx) {
+        ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
+        const Environment &Env = Results[0].second.Env;
+
+        const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
+        ASSERT_THAT(FooDecl, NotNull());
+
+        const auto *FooVal =
+            dyn_cast_or_null<BoolValue>(Env.getValue(*FooDecl, SkipPast::None));
+        ASSERT_THAT(FooVal, NotNull());
+
+        const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar");
+        ASSERT_THAT(BarDecl, NotNull());
+
+        const auto *BarVal =
+            dyn_cast_or_null<BoolValue>(Env.getValue(*BarDecl, SkipPast::None));
+        ASSERT_THAT(BarVal, NotNull());
+
+        EXPECT_EQ(FooVal, &Env.getBoolLiteralValue(true));
+        EXPECT_EQ(BarVal, &Env.getBoolLiteralValue(false));
+      });
+}
+
 } // namespace

From 297bbf106288b0d673709ca02917d82e7bebdf1a Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Wed, 26 Jan 2022 16:53:38 +0100
Subject: [PATCH 747/946] Fix ambiguous call to llvm::size introduced in
 ef8206320769ad31422

---
 llvm/utils/TableGen/X86DisassemblerTables.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 474dcbdb328f8..c8d33833b17a6 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -657,7 +657,7 @@ static const char* stringForDecisionType(ModRMDecisionType dt) {
 }
 
 DisassemblerTables::DisassemblerTables() {
-  for (unsigned i = 0; i < size(Tables); i++)
+  for (unsigned i = 0; i < llvm::size(Tables); i++)
     Tables[i] = std::make_unique<ContextDecision>();
 
   HasConflicts = false;

From f15014ff549a8686671a599f7b49ce9963769eaf Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Wed, 26 Jan 2022 16:55:53 +0100
Subject: [PATCH 748/946] Revert "Rename llvm::array_lengthof into llvm::size
 to match std::size from C++17"

This reverts commit ef8206320769ad31422a803a0d6de6077fd231d2.

- It conflicts with the existing llvm::size in STLExtras, which will now
  never be called.
- Calling it without llvm:: breaks C++17 compat
---
 clang/include/clang/AST/OpenMPClause.h        | 12 ++---
 clang/lib/AST/ASTContext.cpp                  |  2 +-
 clang/lib/AST/AttrDocTable.cpp                |  2 +-
 clang/lib/AST/CommentCommandTraits.cpp        |  6 +--
 clang/lib/Basic/DiagnosticIDs.cpp             |  4 +-
 clang/lib/Basic/Targets/NVPTX.h               |  2 +-
 clang/lib/CodeGen/CGExpr.cpp                  |  2 +-
 clang/lib/CodeGen/CGObjC.cpp                  |  2 +-
 clang/lib/Driver/Compilation.cpp              |  2 +-
 clang/lib/Driver/ToolChain.cpp                |  2 +-
 clang/lib/Driver/ToolChains/Darwin.cpp        |  8 ++--
 clang/lib/Driver/Types.cpp                    |  2 +-
 .../lib/Frontend/PrintPreprocessedOutput.cpp  |  2 +-
 clang/lib/Parse/ParseDecl.cpp                 |  4 +-
 clang/lib/Parse/ParseOpenMP.cpp               |  2 +-
 clang/lib/Sema/DeclSpec.cpp                   |  4 +-
 clang/lib/Sema/ParsedAttr.cpp                 |  2 +-
 clang/lib/Sema/SemaLookup.cpp                 |  6 +--
 clang/lib/Sema/TreeTransform.h                |  2 +-
 clang/test/Analysis/templates.cpp             |  4 +-
 clang/test/SemaTemplate/instantiate-init.cpp  | 10 ++--
 clang/unittests/AST/CommentLexer.cpp          | 48 +++++++++----------
 clang/unittests/AST/CommentParser.cpp         | 36 +++++++-------
 clang/unittests/AST/DeclPrinterTest.cpp       |  4 +-
 .../Tooling/CompilationDatabaseTest.cpp       |  2 +-
 lld/COFF/Chunks.cpp                           |  2 +-
 lldb/source/DataFormatters/FormatManager.cpp  |  2 +-
 lldb/source/Host/common/MainLoop.cpp          |  2 +-
 .../windows/ConnectionGenericFileWindows.cpp  |  2 +-
 .../source/Interpreter/CommandInterpreter.cpp |  2 +-
 lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp |  4 +-
 lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp   |  4 +-
 lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp |  4 +-
 .../Plugins/ABI/Mips/ABISysV_mips64.cpp       |  2 +-
 .../Plugins/ABI/PowerPC/ABISysV_ppc.cpp       |  2 +-
 .../Plugins/ABI/PowerPC/ABISysV_ppc64.cpp     |  4 +-
 .../Plugins/ABI/SystemZ/ABISysV_s390x.cpp     |  2 +-
 .../DynamicLoaderDarwinKernel.cpp             |  2 +-
 .../Instruction/ARM/EmulateInstructionARM.cpp |  4 +-
 .../ARM64/EmulateInstructionARM64.cpp         |  4 +-
 .../MIPS/EmulateInstructionMIPS.cpp           |  2 +-
 .../MIPS64/EmulateInstructionMIPS64.cpp       |  2 +-
 .../PPC64/EmulateInstructionPPC64.cpp         |  4 +-
 .../AppleObjCTrampolineHandler.cpp            |  4 +-
 .../ObjectFile/PECOFF/ObjectFilePECOFF.cpp    |  2 +-
 .../ObjectFile/PECOFF/PECallFrameInfo.cpp     |  4 +-
 .../Platform/MacOSX/PlatformDarwin.cpp        |  4 +-
 .../Utility/RegisterContextDarwin_arm.cpp     | 10 ++--
 .../Utility/RegisterContextDarwin_arm64.cpp   | 10 ++--
 .../Utility/RegisterContextDarwin_i386.cpp    | 10 ++--
 .../Utility/RegisterContextDarwin_x86_64.cpp  | 10 ++--
 .../Utility/RegisterContextWindows_i386.cpp   |  4 +-
 .../Utility/RegisterContextWindows_x86_64.cpp |  4 +-
 .../NativeRegisterContextWindows_WoW64.cpp    |  2 +-
 .../NativeRegisterContextWindows_arm.cpp      |  4 +-
 .../NativeRegisterContextWindows_arm64.cpp    |  4 +-
 .../NativeRegisterContextWindows_i386.cpp     |  2 +-
 .../NativeRegisterContextWindows_x86_64.cpp   |  4 +-
 .../Common/arm/RegisterContextWindows_arm.cpp | 10 ++--
 .../arm64/RegisterContextWindows_arm64.cpp    | 10 ++--
 .../Common/x64/RegisterContextWindows_x64.cpp | 10 ++--
 .../Common/x86/RegisterContextWindows_x86.cpp |  8 ++--
 .../minidump/RegisterContextMinidump_ARM.cpp  | 12 ++---
 .../RegisterContextMinidump_ARM64.cpp         |  8 ++--
 lldb/source/Utility/ArchSpec.cpp              | 16 +++----
 lldb/unittests/Utility/StatusTest.cpp         |  2 +-
 llvm/include/llvm/ADT/STLExtras.h             |  6 +++
 llvm/include/llvm/ADT/STLForwardCompat.h      | 10 ----
 llvm/include/llvm/CodeGen/TargetLowering.h    | 24 +++++-----
 llvm/include/llvm/MC/SubtargetFeature.h       |  6 +--
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  8 ++--
 .../SelectionDAG/LegalizeVectorTypes.cpp      |  8 ++--
 llvm/lib/CodeGen/TargetLoweringBase.cpp       |  2 +-
 llvm/lib/DWP/DWP.cpp                          |  2 +-
 llvm/lib/IR/DebugInfoMetadata.cpp             |  6 +--
 llvm/lib/MC/MCAsmBackend.cpp                  |  2 +-
 llvm/lib/MC/MCDwarf.cpp                       |  2 +-
 llvm/lib/Object/MachOObjectFile.cpp           |  2 +-
 llvm/lib/Support/ARMAttributeParser.cpp       |  6 +--
 llvm/lib/Support/CrashRecoveryContext.cpp     |  2 +-
 llvm/lib/Support/NativeFormatting.cpp         |  2 +-
 llvm/lib/Support/RISCVAttributeParser.cpp     |  2 +-
 llvm/lib/Support/Triple.cpp                   | 10 ++--
 llvm/lib/Support/Unix/Signals.inc             | 10 ++--
 llvm/lib/Support/Windows/Path.inc             |  2 +-
 llvm/lib/Support/Windows/Signals.inc          |  2 +-
 llvm/lib/Support/X86TargetParser.cpp          | 14 +++---
 llvm/lib/Support/raw_ostream.cpp              |  4 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  4 +-
 .../MCTargetDesc/AArch64InstPrinter.cpp       |  2 +-
 llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp      |  4 +-
 llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp   |  2 +-
 llvm/lib/Target/ARC/ARCISelLowering.cpp       |  8 ++--
 llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp      |  4 +-
 llvm/lib/Target/ARM/ARMFrameLowering.cpp      |  2 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |  2 +-
 llvm/lib/Target/ARM/Thumb2SizeReduction.cpp   |  2 +-
 llvm/lib/Target/AVR/AVRISelLowering.cpp       |  4 +-
 .../Disassembler/HexagonDisassembler.cpp      |  8 ++--
 .../lib/Target/Hexagon/HexagonFrameLowering.h |  2 +-
 .../Target/Hexagon/HexagonHardwareLoops.cpp   |  4 +-
 .../Target/Hexagon/HexagonISelDAGToDAG.cpp    |  2 +-
 .../Target/Hexagon/HexagonISelLowering.cpp    |  2 +-
 .../MCTargetDesc/HexagonMCCodeEmitter.cpp     |  2 +-
 .../MSP430/MCTargetDesc/MSP430AsmBackend.cpp  |  2 +-
 llvm/lib/Target/MSP430/MSP430ISelLowering.cpp |  4 +-
 .../Mips/MCTargetDesc/MipsAsmBackend.cpp      |  4 +-
 llvm/lib/Target/Mips/Mips16ISelLowering.cpp   |  2 +-
 llvm/lib/Target/PowerPC/PPCCallingConv.cpp    |  6 +--
 llvm/lib/Target/PowerPC/PPCFrameLowering.cpp  |  8 ++--
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   | 20 ++++----
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      |  4 +-
 .../RISCV/MCTargetDesc/RISCVAsmBackend.cpp    |  2 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  4 +-
 .../Sparc/Disassembler/SparcDisassembler.cpp  |  2 +-
 .../Target/SystemZ/SystemZFrameLowering.cpp   |  4 +-
 llvm/lib/Target/VE/VEFrameLowering.h          |  2 +-
 .../lib/Target/X86/AsmParser/X86AsmParser.cpp |  4 +-
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  4 +-
 llvm/lib/Target/XCore/XCoreISelLowering.cpp   |  4 +-
 .../InstCombine/InstCombineAddSub.cpp         |  2 +-
 .../Transforms/Scalar/LoopStrengthReduce.cpp  |  2 +-
 llvm/lib/Transforms/Utils/MetaRenamer.cpp     |  2 +-
 llvm/tools/llvm-config/llvm-config.cpp        |  4 +-
 llvm/tools/llvm-objdump/COFFDump.cpp          |  2 +-
 llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp   |  4 +-
 llvm/tools/llvm-readobj/ELFDumper.cpp         |  2 +-
 llvm/unittests/ADT/APFloatTest.cpp            | 20 ++++----
 llvm/unittests/ADT/StringRefTest.cpp          | 12 ++---
 llvm/unittests/ADT/TinyPtrVectorTest.cpp      |  2 +-
 .../ProfileData/CoverageMappingTest.cpp       |  4 +-
 llvm/unittests/Support/BinaryStreamTest.cpp   |  2 +-
 llvm/unittests/Support/CommandLineTest.cpp    | 10 ++--
 llvm/unittests/Support/FormatVariadicTest.cpp |  2 +-
 llvm/unittests/Support/TargetParserTest.cpp   | 12 ++---
 llvm/utils/KillTheDoctor/KillTheDoctor.cpp    |  6 +--
 llvm/utils/TableGen/AsmWriterEmitter.cpp      |  2 +-
 llvm/utils/TableGen/CodeGenTarget.cpp         |  2 +-
 llvm/utils/TableGen/RegisterInfoEmitter.cpp   |  4 +-
 llvm/utils/TableGen/X86DisassemblerTables.cpp |  2 +-
 .../mlir/Dialect/SPIRV/IR/SPIRVBase.td        |  4 +-
 mlir/lib/Dialect/SPIRV/IR/SPIRVEnums.cpp      |  6 +--
 mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp      | 22 ++++-----
 143 files changed, 382 insertions(+), 386 deletions(-)

diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index e3f975c906433..3ecc1d40fafc6 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -5772,11 +5772,11 @@ class OMPMapClause final : public OMPMappableExprListClause<OMPMapClause>,
                                   /*SupportsMapper=*/true, &MapperQualifierLoc,
                                   &MapperIdInfo),
         MapType(MapType), MapTypeIsImplicit(MapTypeIsImplicit), MapLoc(MapLoc) {
-    assert(llvm::size(MapTypeModifiers) == MapModifiers.size() &&
+    assert(llvm::array_lengthof(MapTypeModifiers) == MapModifiers.size() &&
            "Unexpected number of map type modifiers.");
     llvm::copy(MapModifiers, std::begin(MapTypeModifiers));
 
-    assert(llvm::size(MapTypeModifiersLoc) ==
+    assert(llvm::array_lengthof(MapTypeModifiersLoc) ==
                MapModifiersLoc.size() &&
            "Unexpected number of map type modifier locations.");
     llvm::copy(MapModifiersLoc, std::begin(MapTypeModifiersLoc));
@@ -6694,11 +6694,11 @@ class OMPToClause final : public OMPMappableExprListClause<OMPToClause>,
       : OMPMappableExprListClause(llvm::omp::OMPC_to, Locs, Sizes,
                                   /*SupportsMapper=*/true, &MapperQualifierLoc,
                                   &MapperIdInfo) {
-    assert(llvm::size(MotionModifiers) == TheMotionModifiers.size() &&
+    assert(llvm::array_lengthof(MotionModifiers) == TheMotionModifiers.size() &&
            "Unexpected number of motion modifiers.");
     llvm::copy(TheMotionModifiers, std::begin(MotionModifiers));
 
-    assert(llvm::size(MotionModifiersLoc) ==
+    assert(llvm::array_lengthof(MotionModifiersLoc) ==
                TheMotionModifiersLoc.size() &&
            "Unexpected number of motion modifier locations.");
     llvm::copy(TheMotionModifiersLoc, std::begin(MotionModifiersLoc));
@@ -6896,11 +6896,11 @@ class OMPFromClause final
       : OMPMappableExprListClause(llvm::omp::OMPC_from, Locs, Sizes,
                                   /*SupportsMapper=*/true, &MapperQualifierLoc,
                                   &MapperIdInfo) {
-    assert(llvm::size(MotionModifiers) == TheMotionModifiers.size() &&
+    assert(llvm::array_lengthof(MotionModifiers) == TheMotionModifiers.size() &&
            "Unexpected number of motion modifiers.");
     llvm::copy(TheMotionModifiers, std::begin(MotionModifiers));
 
-    assert(llvm::size(MotionModifiersLoc) ==
+    assert(llvm::array_lengthof(MotionModifiersLoc) ==
                TheMotionModifiersLoc.size() &&
            "Unexpected number of motion modifier locations.");
     llvm::copy(TheMotionModifiersLoc, std::begin(MotionModifiersLoc));
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 2111e9380167c..9d63724c919a0 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -6594,7 +6594,7 @@ QualType ASTContext::getPromotedIntegerType(QualType Promotable) const {
       uint64_t FromSize = getTypeSize(BT);
       QualType PromoteTypes[] = { IntTy, UnsignedIntTy, LongTy, UnsignedLongTy,
                                   LongLongTy, UnsignedLongLongTy };
-      for (size_t Idx = 0; Idx < llvm::size(PromoteTypes); ++Idx) {
+      for (size_t Idx = 0; Idx < llvm::array_lengthof(PromoteTypes); ++Idx) {
         uint64_t ToSize = getTypeSize(PromoteTypes[Idx]);
         if (FromSize < ToSize ||
             (FromSize == ToSize &&
diff --git a/clang/lib/AST/AttrDocTable.cpp b/clang/lib/AST/AttrDocTable.cpp
index 8f7e7eaf0452c..3bfedac8b8f13 100644
--- a/clang/lib/AST/AttrDocTable.cpp
+++ b/clang/lib/AST/AttrDocTable.cpp
@@ -21,7 +21,7 @@ static const llvm::StringRef AttrDoc[] = {
 };
 
 llvm::StringRef clang::Attr::getDocumentation(clang::attr::Kind K) {
-  if(K < llvm::size(AttrDoc))
+  if(K < llvm::array_lengthof(AttrDoc))
     return AttrDoc[K];
   return "";
 }
diff --git a/clang/lib/AST/CommentCommandTraits.cpp b/clang/lib/AST/CommentCommandTraits.cpp
index 68f8006261f14..bdc0dd47fb7d2 100644
--- a/clang/lib/AST/CommentCommandTraits.cpp
+++ b/clang/lib/AST/CommentCommandTraits.cpp
@@ -17,7 +17,7 @@ namespace comments {
 
 CommandTraits::CommandTraits(llvm::BumpPtrAllocator &Allocator,
                              const CommentOptions &CommentOptions) :
-    NextID(llvm::size(Commands)), Allocator(Allocator) {
+    NextID(llvm::array_lengthof(Commands)), Allocator(Allocator) {
   registerCommentOptions(CommentOptions);
 }
 
@@ -115,7 +115,7 @@ const CommandInfo *CommandTraits::registerBlockCommand(StringRef CommandName) {
 
 const CommandInfo *CommandTraits::getBuiltinCommandInfo(
                                                   unsigned CommandID) {
-  if (CommandID < llvm::size(Commands))
+  if (CommandID < llvm::array_lengthof(Commands))
     return &Commands[CommandID];
   return nullptr;
 }
@@ -131,7 +131,7 @@ const CommandInfo *CommandTraits::getRegisteredCommandInfo(
 
 const CommandInfo *CommandTraits::getRegisteredCommandInfo(
                                                   unsigned CommandID) const {
-  return RegisteredCommands[CommandID - llvm::size(Commands)];
+  return RegisteredCommands[CommandID - llvm::array_lengthof(Commands)];
 }
 
 } // end namespace comments
diff --git a/clang/lib/Basic/DiagnosticIDs.cpp b/clang/lib/Basic/DiagnosticIDs.cpp
index 3e6204a07c297..87db131992e47 100644
--- a/clang/lib/Basic/DiagnosticIDs.cpp
+++ b/clang/lib/Basic/DiagnosticIDs.cpp
@@ -202,7 +202,7 @@ const StaticDiagInfoRec StaticDiagInfo[] = {
 
 } // namespace
 
-static const unsigned StaticDiagInfoSize = llvm::size(StaticDiagInfo);
+static const unsigned StaticDiagInfoSize = llvm::array_lengthof(StaticDiagInfo);
 
 /// GetDiagInfo - Return the StaticDiagInfoRec entry for the specified DiagID,
 /// or null if the ID is invalid.
@@ -317,7 +317,7 @@ static const StaticDiagCategoryRec CategoryNameTable[] = {
 
 /// getNumberOfCategories - Return the number of categories
 unsigned DiagnosticIDs::getNumberOfCategories() {
-  return llvm::size(CategoryNameTable) - 1;
+  return llvm::array_lengthof(CategoryNameTable) - 1;
 }
 
 /// getCategoryNameFromID - Given a category ID, return the name of the
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
index 83f220dcb8b35..589f24f4bb03e 100644
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@@ -159,7 +159,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
   /// DWARF.
   Optional<unsigned>
   getDWARFAddressSpace(unsigned AddressSpace) const override {
-    if (AddressSpace >= llvm::size(NVPTXDWARFAddrSpaceMap) ||
+    if (AddressSpace >= llvm::array_lengthof(NVPTXDWARFAddrSpaceMap) ||
         NVPTXDWARFAddrSpaceMap[AddressSpace] < 0)
       return llvm::None;
     return NVPTXDWARFAddrSpaceMap[AddressSpace];
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 888f9e275dd15..0fb7ec26a85e5 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3206,7 +3206,7 @@ void CodeGenFunction::EmitCheck(
   assert(IsSanitizerScope);
   assert(Checked.size() > 0);
   assert(CheckHandler >= 0 &&
-         size_t(CheckHandler) < llvm::size(SanitizerHandlers));
+         size_t(CheckHandler) < llvm::array_lengthof(SanitizerHandlers));
   const StringRef CheckName = SanitizerHandlers[CheckHandler].Name;
 
   llvm::Value *FatalCond = nullptr;
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index a2c8f8ef10665..8cc609186f9e9 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -1749,7 +1749,7 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
     &CGM.getContext().Idents.get("count")
   };
   Selector FastEnumSel =
-    CGM.getContext().Selectors.getSelector(llvm::size(II), &II[0]);
+    CGM.getContext().Selectors.getSelector(llvm::array_lengthof(II), &II[0]);
 
   QualType ItemsTy =
     getContext().getConstantArrayType(getContext().getObjCIdType(),
diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp
index c479375157c26..67d941c6c2ab9 100644
--- a/clang/lib/Driver/Compilation.cpp
+++ b/clang/lib/Driver/Compilation.cpp
@@ -278,7 +278,7 @@ void Compilation::initCompilationForDiagnostics() {
       options::OPT_o,  options::OPT_MD, options::OPT_MMD, options::OPT_M,
       options::OPT_MM, options::OPT_MF, options::OPT_MG,  options::OPT_MJ,
       options::OPT_MQ, options::OPT_MT, options::OPT_MV};
-  for (unsigned i = 0, e = llvm::size(OutputOpts); i != e; ++i) {
+  for (unsigned i = 0, e = llvm::array_lengthof(OutputOpts); i != e; ++i) {
     if (TranslatedArgs->hasArg(OutputOpts[i]))
       TranslatedArgs->eraseArg(OutputOpts[i]);
   }
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index cc6f7210a8d07..7551ee4aeb79f 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -151,7 +151,7 @@ static const DriverSuffix *FindDriverSuffix(StringRef ProgName, size_t &Pos) {
       {"flang", "--driver-mode=flang"},
   };
 
-  for (size_t i = 0; i < llvm::size(DriverSuffixes); ++i) {
+  for (size_t i = 0; i < llvm::array_lengthof(DriverSuffixes); ++i) {
     StringRef Suffix(DriverSuffixes[i].Suffix);
     if (ProgName.endswith(Suffix)) {
       Pos = ProgName.size() - Suffix.size();
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index 8fed21586959d..f7da3f187814f 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -1694,7 +1694,7 @@ getDeploymentTargetFromEnvironmentVariables(const Driver &TheDriver,
       "TVOS_DEPLOYMENT_TARGET",
       "WATCHOS_DEPLOYMENT_TARGET",
   };
-  static_assert(llvm::size(EnvVars) == Darwin::LastDarwinPlatform + 1,
+  static_assert(llvm::array_lengthof(EnvVars) == Darwin::LastDarwinPlatform + 1,
                 "Missing platform");
   for (const auto &I : llvm::enumerate(llvm::makeArrayRef(EnvVars))) {
     if (char *Env = ::getenv(I.value()))
@@ -1715,11 +1715,11 @@ getDeploymentTargetFromEnvironmentVariables(const Driver &TheDriver,
           Targets[Darwin::TvOS] = "";
   } else {
     // Don't allow conflicts in any other platform.
-    unsigned FirstTarget = llvm::size(Targets);
-    for (unsigned I = 0; I != llvm::size(Targets); ++I) {
+    unsigned FirstTarget = llvm::array_lengthof(Targets);
+    for (unsigned I = 0; I != llvm::array_lengthof(Targets); ++I) {
       if (Targets[I].empty())
         continue;
-      if (FirstTarget == llvm::size(Targets))
+      if (FirstTarget == llvm::array_lengthof(Targets))
         FirstTarget = I;
       else
         TheDriver.Diag(diag::err_drv_conflicting_deployment_targets)
diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp
index c5c17e1378fbc..1bd187ad2fc0a 100644
--- a/clang/lib/Driver/Types.cpp
+++ b/clang/lib/Driver/Types.cpp
@@ -42,7 +42,7 @@ static constexpr TypeInfo TypeInfos[] = {
 #include "clang/Driver/Types.def"
 #undef TYPE
 };
-static const unsigned numTypes = llvm::size(TypeInfos);
+static const unsigned numTypes = llvm::array_lengthof(TypeInfos);
 
 static const TypeInfo &getInfo(unsigned id) {
   assert(id > 0 && id - 1 < numTypes && "Invalid Type ID.");
diff --git a/clang/lib/Frontend/PrintPreprocessedOutput.cpp b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
index 23d2bec11dcf9..1d0022bda474c 100644
--- a/clang/lib/Frontend/PrintPreprocessedOutput.cpp
+++ b/clang/lib/Frontend/PrintPreprocessedOutput.cpp
@@ -863,7 +863,7 @@ static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
     } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
                Tok.getLiteralData()) {
       OS.write(Tok.getLiteralData(), Tok.getLength());
-    } else if (Tok.getLength() < llvm::size(Buffer)) {
+    } else if (Tok.getLength() < llvm::array_lengthof(Buffer)) {
       const char *TokPtr = Buffer;
       unsigned Len = PP.getSpelling(Tok, TokPtr);
       OS.write(TokPtr, Len);
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index d05169988f2f8..f21938c81689b 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -1365,7 +1365,7 @@ void Parser::ParseExternalSourceSymbolAttribute(
   ArgsUnion Args[] = {Language.get(), DefinedInExpr.get(),
                       GeneratedDeclaration};
   Attrs.addNew(&ExternalSourceSymbol, SourceRange(Loc, T.getCloseLocation()),
-               ScopeName, ScopeLoc, Args, llvm::size(Args), Syntax);
+               ScopeName, ScopeLoc, Args, llvm::array_lengthof(Args), Syntax);
 }
 
 /// Parse the contents of the "objc_bridge_related" attribute.
@@ -1493,7 +1493,7 @@ void Parser::ParseSwiftNewTypeAttribute(
 
   ArgsUnion Args[] = {SwiftType};
   Attrs.addNew(&AttrName, SourceRange(AttrNameLoc, T.getCloseLocation()),
-               ScopeName, ScopeLoc, Args, llvm::size(Args), Syntax);
+               ScopeName, ScopeLoc, Args, llvm::array_lengthof(Args), Syntax);
 }
 
 
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index d13f6e080630e..de3d58baf84c9 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -188,7 +188,7 @@ static OpenMPDirectiveKindExWrapper parseOpenMPDirectiveKind(Parser &P) {
   if (DKind == OMPD_unknown)
     return OMPD_unknown;
 
-  for (unsigned I = 0; I < llvm::size(F); ++I) {
+  for (unsigned I = 0; I < llvm::array_lengthof(F); ++I) {
     if (DKind != F[I][0])
       continue;
 
diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp
index c595237401e64..d4dc790c008a2 100644
--- a/clang/lib/Sema/DeclSpec.cpp
+++ b/clang/lib/Sema/DeclSpec.cpp
@@ -238,7 +238,7 @@ DeclaratorChunk DeclaratorChunk::getFunction(bool hasProto,
     // is already used (consider a function returning a function pointer) or too
     // small (function with too many parameters), go to the heap.
     if (!TheDeclarator.InlineStorageUsed &&
-        NumParams <= llvm::size(TheDeclarator.InlineParams)) {
+        NumParams <= llvm::array_lengthof(TheDeclarator.InlineParams)) {
       I.Fun.Params = TheDeclarator.InlineParams;
       new (I.Fun.Params) ParamInfo[NumParams];
       I.Fun.DeleteParams = false;
@@ -308,7 +308,7 @@ void Declarator::setDecompositionBindings(
   // Allocate storage for bindings and stash them away.
   if (Bindings.size()) {
     if (!InlineStorageUsed &&
-        Bindings.size() <= llvm::size(InlineBindings)) {
+        Bindings.size() <= llvm::array_lengthof(InlineBindings)) {
       BindingGroup.Bindings = InlineBindings;
       BindingGroup.DeleteBindings = false;
       InlineStorageUsed = true;
diff --git a/clang/lib/Sema/ParsedAttr.cpp b/clang/lib/Sema/ParsedAttr.cpp
index 5c533f11bf324..045847d0ce0fd 100644
--- a/clang/lib/Sema/ParsedAttr.cpp
+++ b/clang/lib/Sema/ParsedAttr.cpp
@@ -111,7 +111,7 @@ namespace {
 
 const ParsedAttrInfo &ParsedAttrInfo::get(const AttributeCommonInfo &A) {
   // If we have a ParsedAttrInfo for this ParsedAttr then return that.
-  if ((size_t)A.getParsedKind() < llvm::size(AttrInfoMap))
+  if ((size_t)A.getParsedKind() < llvm::array_lengthof(AttrInfoMap))
     return *AttrInfoMap[A.getParsedKind()];
 
   // If this is an ignored attribute then return an appropriate ParsedAttrInfo.
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 219f2c0f8a31f..af6ee24240ceb 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -4726,7 +4726,7 @@ static void AddKeywordsToConsumer(Sema &SemaRef,
       "extern", "inline", "static", "typedef"
     };
 
-    const unsigned NumCTypeSpecs = llvm::size(CTypeSpecs);
+    const unsigned NumCTypeSpecs = llvm::array_lengthof(CTypeSpecs);
     for (unsigned I = 0; I != NumCTypeSpecs; ++I)
       Consumer.addKeywordResult(CTypeSpecs[I]);
 
@@ -4780,7 +4780,7 @@ static void AddKeywordsToConsumer(Sema &SemaRef,
       static const char *const CXXExprs[] = {
         "delete", "new", "operator", "throw", "typeid"
       };
-      const unsigned NumCXXExprs = llvm::size(CXXExprs);
+      const unsigned NumCXXExprs = llvm::array_lengthof(CXXExprs);
       for (unsigned I = 0; I != NumCXXExprs; ++I)
         Consumer.addKeywordResult(CXXExprs[I]);
 
@@ -4806,7 +4806,7 @@ static void AddKeywordsToConsumer(Sema &SemaRef,
       // Statements.
       static const char *const CStmts[] = {
         "do", "else", "for", "goto", "if", "return", "switch", "while" };
-      const unsigned NumCStmts = llvm::size(CStmts);
+      const unsigned NumCStmts = llvm::array_lengthof(CStmts);
       for (unsigned I = 0; I != NumCStmts; ++I)
         Consumer.addKeywordResult(CStmts[I]);
 
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 8a0e26fca91e7..e43b3ca968ebb 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -14357,7 +14357,7 @@ TreeTransform<Derived>::RebuildArrayType(QualType ElementType,
     SemaRef.Context.UnsignedIntTy, SemaRef.Context.UnsignedLongTy,
     SemaRef.Context.UnsignedLongLongTy, SemaRef.Context.UnsignedInt128Ty
   };
-  const unsigned NumTypes = llvm::size(Types);
+  const unsigned NumTypes = llvm::array_lengthof(Types);
   QualType SizeType;
   for (unsigned I = 0; I != NumTypes; ++I)
     if (Size->getBitWidth() == SemaRef.Context.getIntWidth(Types[I])) {
diff --git a/clang/test/Analysis/templates.cpp b/clang/test/Analysis/templates.cpp
index 2c5b22d24119c..e7c30a764f72d 100644
--- a/clang/test/Analysis/templates.cpp
+++ b/clang/test/Analysis/templates.cpp
@@ -34,13 +34,13 @@ int main(){
 
 // <rdar://problem/11949235>
 template<class T, unsigned N>
-inline unsigned size(T (&)[N]) {
+inline unsigned array_lengthof(T (&)[N]) {
   return N;
 }
 
 void testNonTypeTemplateInstantiation() {
   const char *S[] = { "a", "b" };
-  clang_analyzer_eval(size(S) == 2);
+  clang_analyzer_eval(array_lengthof(S) == 2);
 #ifndef NO_INLINE
   // expected-warning@-2 {{TRUE}}
 #else
diff --git a/clang/test/SemaTemplate/instantiate-init.cpp b/clang/test/SemaTemplate/instantiate-init.cpp
index 52f2b416e64a3..6db33a972f86d 100644
--- a/clang/test/SemaTemplate/instantiate-init.cpp
+++ b/clang/test/SemaTemplate/instantiate-init.cpp
@@ -86,7 +86,7 @@ namespace PR7985 {
   template<int N> struct integral_c { };
 
   template <typename T, int N>
-  integral_c<N> size(T (&x)[N]) { return integral_c<N>(); } // expected-note 2{{candidate template ignored: could not match 'T[N]' against 'const Data<}}
+  integral_c<N> array_lengthof(T (&x)[N]) { return integral_c<N>(); } // expected-note 2{{candidate template ignored: could not match 'T[N]' against 'const Data<}}
 
   template<typename T>
   struct Data {
@@ -105,14 +105,14 @@ namespace PR7985 {
   const Data<float*> Description<float*>::data[];
 
   void test() {
-    integral_c<1> ic1 = size(Description<int>::data);
-    (void)sizeof(size(Description<float>::data));
+    integral_c<1> ic1 = array_lengthof(Description<int>::data);
+    (void)sizeof(array_lengthof(Description<float>::data));
 
-    (void)sizeof(size( // expected-error{{no matching function for call to 'size'}}
+    (void)sizeof(array_lengthof( // expected-error{{no matching function for call to 'array_lengthof'}}
                           Description<int*>::data // expected-note{{in instantiation of static data member 'PR7985::Description<int *>::data' requested here}}
                           ));
 
-    size(Description<float*>::data); // expected-error{{no matching function for call to 'size'}}
+    array_lengthof(Description<float*>::data); // expected-error{{no matching function for call to 'array_lengthof'}}
   }
 }
 
diff --git a/clang/unittests/AST/CommentLexer.cpp b/clang/unittests/AST/CommentLexer.cpp
index 9540ebfe32e52..456473e1753b6 100644
--- a/clang/unittests/AST/CommentLexer.cpp
+++ b/clang/unittests/AST/CommentLexer.cpp
@@ -91,7 +91,7 @@ TEST_F(CommentLexerTest, Basic2) {
   const char *Sources[] = {
     "//", "///", "//!", "///<", "//!<"
   };
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -107,7 +107,7 @@ TEST_F(CommentLexerTest, Basic3) {
   const char *Sources[] = {
     "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/"
   };
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -126,7 +126,7 @@ TEST_F(CommentLexerTest, Basic4) {
     "// Meow\n", "// Meow\r\n", "//! Meow\r",
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -146,7 +146,7 @@ TEST_F(CommentLexerTest, Basic5) {
     "/* Meow*/", "/** Meow*/",  "/*! Meow*/"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -169,7 +169,7 @@ TEST_F(CommentLexerTest, Basic6) {
     "// Aaa\\\r"   " Bbb\\ \r"   " Ccc?" "?/\r"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -248,7 +248,7 @@ TEST_F(CommentLexerTest, Basic7) {
 // A command marker followed by comment end.
 TEST_F(CommentLexerTest, DoxygenCommand1) {
   const char *Sources[] = { "//@", "///@", "//!@" };
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -265,7 +265,7 @@ TEST_F(CommentLexerTest, DoxygenCommand1) {
 // A command marker followed by comment end.
 TEST_F(CommentLexerTest, DoxygenCommand2) {
   const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"};
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -283,7 +283,7 @@ TEST_F(CommentLexerTest, DoxygenCommand2) {
 // A command marker followed by comment end.
 TEST_F(CommentLexerTest, DoxygenCommand3) {
   const char *Sources[] = { "/*\\*/", "/**\\*/" };
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -311,12 +311,12 @@ TEST_F(CommentLexerTest, DoxygenCommand4) {
     "::", ""
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
 
-    ASSERT_EQ(size(Text), Toks.size());
+    ASSERT_EQ(array_lengthof(Text), Toks.size());
 
     for (size_t j = 0, e = Toks.size(); j != e; j++) {
       if(Toks[j].is(tok::text)) {
@@ -578,7 +578,7 @@ TEST_F(CommentLexerTest, VerbatimBlock1) {
     "/** \\verbatim\\endverbatim*/"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -645,7 +645,7 @@ TEST_F(CommentLexerTest, VerbatimBlock4) {
     "/** Meow \\verbatim aaa \\endverbatim*/"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -676,7 +676,7 @@ TEST_F(CommentLexerTest, VerbatimBlock5) {
     "/** Meow \\verbatim aaa */"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -901,7 +901,7 @@ TEST_F(CommentLexerTest, VerbatimLine1) {
     "/** \\fn*/"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -926,7 +926,7 @@ TEST_F(CommentLexerTest, VerbatimLine2) {
     "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1052,7 +1052,7 @@ TEST_F(CommentLexerTest, HTML4) {
     "// <img "
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1169,7 +1169,7 @@ TEST_F(CommentLexerTest, HTML9) {
     "// <img src "
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1195,7 +1195,7 @@ TEST_F(CommentLexerTest, HTML10) {
     "// <img src ="
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1225,7 +1225,7 @@ TEST_F(CommentLexerTest, HTML11) {
     "// <img src = \'"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1284,7 +1284,7 @@ TEST_F(CommentLexerTest, HTML13) {
     "// <img src=\'val\\\"\\'val\'"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1315,7 +1315,7 @@ TEST_F(CommentLexerTest, HTML14) {
     "// <img src=\'val\\\"\\'val\'>"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1348,7 +1348,7 @@ TEST_F(CommentLexerTest, HTML15) {
     "// <img />"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1373,7 +1373,7 @@ TEST_F(CommentLexerTest, HTML16) {
     "// <img / Aaa"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
@@ -1797,7 +1797,7 @@ TEST_F(CommentLexerTest, HTMLCharacterReferences16) {
     "// &#X3D;"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     std::vector<Token> Toks;
 
     lexString(Sources[i], Toks);
diff --git a/clang/unittests/AST/CommentParser.cpp b/clang/unittests/AST/CommentParser.cpp
index 0de20dd32cb0b..62c461a00d16c 100644
--- a/clang/unittests/AST/CommentParser.cpp
+++ b/clang/unittests/AST/CommentParser.cpp
@@ -664,7 +664,7 @@ TEST_F(CommentParserTest, ParagraphSplitting1) {
     "*/"),
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -801,7 +801,7 @@ TEST_F(CommentParserTest, ParamCommand3) {
     "// Bbb\n")
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -831,7 +831,7 @@ TEST_F(CommentParserTest, ParamCommand4) {
     "// Bbb\n"),
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -861,7 +861,7 @@ TEST_F(CommentParserTest, ParamCommand5) {
     "// Bbb\n"),
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -892,7 +892,7 @@ TEST_F(CommentParserTest, ParamCommand6) {
     "// Bbb\n"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -947,7 +947,7 @@ TEST_F(CommentParserTest, TParamCommand1) {
     "// Bbb\n"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -1081,7 +1081,7 @@ TEST_F(CommentParserTest, HTML1) {
     "// <a >"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 1));
 
@@ -1103,7 +1103,7 @@ TEST_F(CommentParserTest, HTML2) {
     "// <br />"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 1));
 
@@ -1127,7 +1127,7 @@ TEST_F(CommentParserTest, HTML3) {
     "// <a href >",
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 1));
 
@@ -1149,7 +1149,7 @@ TEST_F(CommentParserTest, HTML4) {
     "// <a href=\"bbb\">",
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 1));
 
@@ -1172,7 +1172,7 @@ TEST_F(CommentParserTest, HTML5) {
     "// </a >"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 1));
 
@@ -1285,7 +1285,7 @@ TEST_F(CommentParserTest, VerbatimBlock5) {
     " *\\endverbatim*/"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 1));
 
@@ -1309,7 +1309,7 @@ TEST_F(CommentParserTest, VerbatimBlock6) {
     " * \\endverbatim*/"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -1336,7 +1336,7 @@ TEST_F(CommentParserTest, VerbatimBlock7) {
     " * \\endverbatim*/"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -1364,7 +1364,7 @@ TEST_F(CommentParserTest, VerbatimBlock8) {
     " * Bbb\n"
     " * \\endverbatim*/"
   };
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -1387,7 +1387,7 @@ TEST_F(CommentParserTest, VerbatimLine1) {
     "// \\fn\n"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -1405,7 +1405,7 @@ TEST_F(CommentParserTest, VerbatimLine2) {
     "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
@@ -1424,7 +1424,7 @@ TEST_F(CommentParserTest, Deprecated) {
     "/// @deprecated\n"
   };
 
-  for (size_t i = 0, e = size(Sources); i != e; i++) {
+  for (size_t i = 0, e = array_lengthof(Sources); i != e; i++) {
     FullComment *FC = parseString(Sources[i]);
     ASSERT_TRUE(HasChildCount(FC, 2));
 
diff --git a/clang/unittests/AST/DeclPrinterTest.cpp b/clang/unittests/AST/DeclPrinterTest.cpp
index 628b948c72e48..c2d7d78738f96 100644
--- a/clang/unittests/AST/DeclPrinterTest.cpp
+++ b/clang/unittests/AST/DeclPrinterTest.cpp
@@ -731,7 +731,7 @@ TEST(DeclPrinter, TestCXXMethodDecl_Operator1) {
     "()", "[]"
   };
 
-  for (unsigned i = 0, e = llvm::size(OperatorNames); i != e; ++i) {
+  for (unsigned i = 0, e = llvm::array_lengthof(OperatorNames); i != e; ++i) {
     SmallString<128> Code;
     Code.append("struct Z { void operator");
     Code.append(OperatorNames[i]);
@@ -754,7 +754,7 @@ TEST(DeclPrinter, TestCXXMethodDecl_Operator2) {
     "~", "!", "++", "--", "->"
   };
 
-  for (unsigned i = 0, e = llvm::size(OperatorNames); i != e; ++i) {
+  for (unsigned i = 0, e = llvm::array_lengthof(OperatorNames); i != e; ++i) {
     SmallString<128> Code;
     Code.append("struct Z { void operator");
     Code.append(OperatorNames[i]);
diff --git a/clang/unittests/Tooling/CompilationDatabaseTest.cpp b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
index 94ccdcafda6d2..adb9de0c1cbaa 100644
--- a/clang/unittests/Tooling/CompilationDatabaseTest.cpp
+++ b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -642,7 +642,7 @@ TEST(ParseFixedCompilationDatabase, HandlesPositionalArgsSyntaxOnly) {
   // Adjust the given command line arguments to ensure that any positional
   // arguments in them are stripped.
   const char *Argv[] = {"--", "somefile.cpp", "-fsyntax-only", "-DDEF3"};
-  int Argc = llvm::size(Argv);
+  int Argc = llvm::array_lengthof(Argv);
   std::string ErrorMessage;
   std::unique_ptr<CompilationDatabase> Database =
       FixedCompilationDatabase::loadFromCommandLine(Argc, Argv, ErrorMessage);
diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp
index e26679ceb730a..6cabb22d98cf2 100644
--- a/lld/COFF/Chunks.cpp
+++ b/lld/COFF/Chunks.cpp
@@ -947,7 +947,7 @@ MergeChunk::MergeChunk(uint32_t alignment)
 void MergeChunk::addSection(COFFLinkerContext &ctx, SectionChunk *c) {
   assert(isPowerOf2_32(c->getAlignment()));
   uint8_t p2Align = llvm::Log2_32(c->getAlignment());
-  assert(p2Align < size(ctx.mergeChunkInstances));
+  assert(p2Align < array_lengthof(ctx.mergeChunkInstances));
   auto *&mc = ctx.mergeChunkInstances[p2Align];
   if (!mc)
     mc = make<MergeChunk>(c->getAlignment());
diff --git a/lldb/source/DataFormatters/FormatManager.cpp b/lldb/source/DataFormatters/FormatManager.cpp
index dfc9edefd7c6c..f07bb9a7136a3 100644
--- a/lldb/source/DataFormatters/FormatManager.cpp
+++ b/lldb/source/DataFormatters/FormatManager.cpp
@@ -77,7 +77,7 @@ static_assert((sizeof(g_format_infos) / sizeof(g_format_infos[0])) ==
                   kNumFormats,
               "All formats must have a corresponding info entry.");
 
-static uint32_t g_num_format_infos = llvm::size(g_format_infos);
+static uint32_t g_num_format_infos = llvm::array_lengthof(g_format_infos);
 
 static bool GetFormatFromFormatChar(char format_char, Format &format) {
   for (uint32_t i = 0; i < g_num_format_infos; ++i) {
diff --git a/lldb/source/Host/common/MainLoop.cpp b/lldb/source/Host/common/MainLoop.cpp
index c7e0abdf7f4e8..d36587ce2346e 100644
--- a/lldb/source/Host/common/MainLoop.cpp
+++ b/lldb/source/Host/common/MainLoop.cpp
@@ -108,7 +108,7 @@ Status MainLoop::RunImpl::Poll() {
     EV_SET(&in_events[i++], fd.first, EVFILT_READ, EV_ADD, 0, 0, 0);
 
   num_events = kevent(loop.m_kqueue, in_events.data(), in_events.size(),
-                      out_events, llvm::size(out_events), nullptr);
+                      out_events, llvm::array_lengthof(out_events), nullptr);
 
   if (num_events < 0) {
     if (errno == EINTR) {
diff --git a/lldb/source/Host/windows/ConnectionGenericFileWindows.cpp b/lldb/source/Host/windows/ConnectionGenericFileWindows.cpp
index bbd471dea076f..817663d6212e5 100644
--- a/lldb/source/Host/windows/ConnectionGenericFileWindows.cpp
+++ b/lldb/source/Host/windows/ConnectionGenericFileWindows.cpp
@@ -188,7 +188,7 @@ size_t ConnectionGenericFile::Read(void *dst, size_t dst_len,
                     .count()
               : INFINITE;
       DWORD wait_result =
-          ::WaitForMultipleObjects(llvm::size(m_event_handles),
+          ::WaitForMultipleObjects(llvm::array_lengthof(m_event_handles),
                                    m_event_handles, FALSE, milliseconds);
       // All of the events are manual reset events, so make sure we reset them
       // to non-signalled.
diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp
index 9df7c578b6699..59c23716bf89c 100644
--- a/lldb/source/Interpreter/CommandInterpreter.cpp
+++ b/lldb/source/Interpreter/CommandInterpreter.cpp
@@ -549,7 +549,7 @@ void CommandInterpreter::LoadCommandDictionary() {
        "breakpoint set --name '%1'"}};
   // clang-format on
 
-  size_t num_regexes = llvm::size(break_regexes);
+  size_t num_regexes = llvm::array_lengthof(break_regexes);
 
   std::unique_ptr<CommandObjectRegexCommand> break_regex_cmd_up(
       new CommandObjectRegexCommand(
diff --git a/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp b/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp
index b21f5d4341fbd..a8d1cbc675e36 100644
--- a/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp
+++ b/lldb/source/Plugins/ABI/ARM/ABIMacOSX_arm.cpp
@@ -1187,7 +1187,7 @@ static const RegisterInfo g_register_infos[] = {
      }};
 
 static const uint32_t k_num_register_infos =
-    llvm::size(g_register_infos);
+    llvm::array_lengthof(g_register_infos);
 
 const lldb_private::RegisterInfo *
 ABIMacOSX_arm::GetRegisterInfoArray(uint32_t &count) {
@@ -1235,7 +1235,7 @@ bool ABIMacOSX_arm::PrepareTrivialCall(Thread &thread, addr_t sp,
 
   llvm::ArrayRef<addr_t>::iterator ai = args.begin(), ae = args.end();
 
-  for (size_t i = 0; i < llvm::size(reg_names); ++i) {
+  for (size_t i = 0; i < llvm::array_lengthof(reg_names); ++i) {
     if (ai == ae)
       break;
 
diff --git a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
index 87a643f77407f..9ed042df4e501 100644
--- a/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
+++ b/lldb/source/Plugins/ABI/ARM/ABISysV_arm.cpp
@@ -1190,7 +1190,7 @@ static const RegisterInfo g_register_infos[] = {
      }};
 
 static const uint32_t k_num_register_infos =
-    llvm::size(g_register_infos);
+    llvm::array_lengthof(g_register_infos);
 
 const lldb_private::RegisterInfo *
 ABISysV_arm::GetRegisterInfoArray(uint32_t &count) {
@@ -1240,7 +1240,7 @@ bool ABISysV_arm::PrepareTrivialCall(Thread &thread, addr_t sp,
 
   llvm::ArrayRef<addr_t>::iterator ai = args.begin(), ae = args.end();
 
-  for (size_t i = 0; i < llvm::size(reg_names); ++i) {
+  for (size_t i = 0; i < llvm::array_lengthof(reg_names); ++i) {
     if (ai == ae)
       break;
 
diff --git a/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp b/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp
index f06a3258f1e19..662689ca06159 100644
--- a/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp
+++ b/lldb/source/Plugins/ABI/Mips/ABISysV_mips.cpp
@@ -503,7 +503,7 @@ static const RegisterInfo g_register_infos[] = {
 };
 
 static const uint32_t k_num_register_infos =
-    llvm::size(g_register_infos);
+    llvm::array_lengthof(g_register_infos);
 
 const lldb_private::RegisterInfo *
 ABISysV_mips::GetRegisterInfoArray(uint32_t &count) {
@@ -559,7 +559,7 @@ bool ABISysV_mips::PrepareTrivialCall(Thread &thread, addr_t sp,
   llvm::ArrayRef<addr_t>::iterator ai = args.begin(), ae = args.end();
 
   // Write arguments to registers
-  for (size_t i = 0; i < llvm::size(reg_names); ++i) {
+  for (size_t i = 0; i < llvm::array_lengthof(reg_names); ++i) {
     if (ai == ae)
       break;
 
diff --git a/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp b/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp
index 7003667bf9dd1..7e272265e15d4 100644
--- a/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp
+++ b/lldb/source/Plugins/ABI/Mips/ABISysV_mips64.cpp
@@ -503,7 +503,7 @@ static const RegisterInfo g_register_infos_mips64[] = {
 };
 
 static const uint32_t k_num_register_infos =
-    llvm::size(g_register_infos_mips64);
+    llvm::array_lengthof(g_register_infos_mips64);
 
 const lldb_private::RegisterInfo *
 ABISysV_mips64::GetRegisterInfoArray(uint32_t &count) {
diff --git a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.cpp b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.cpp
index fc95965620235..70b9c46d353f6 100644
--- a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.cpp
+++ b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc.cpp
@@ -204,7 +204,7 @@ static const RegisterInfo g_register_infos[] = {
      }};
 
 static const uint32_t k_num_register_infos =
-    llvm::size(g_register_infos);
+    llvm::array_lengthof(g_register_infos);
 
 const lldb_private::RegisterInfo *
 ABISysV_ppc::GetRegisterInfoArray(uint32_t &count) {
diff --git a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp
index 9270ae57ac4e3..f9a2851d3949f 100644
--- a/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp
+++ b/lldb/source/Plugins/ABI/PowerPC/ABISysV_ppc64.cpp
@@ -52,10 +52,10 @@ LLDB_PLUGIN_DEFINE(ABISysV_ppc64)
 const lldb_private::RegisterInfo *
 ABISysV_ppc64::GetRegisterInfoArray(uint32_t &count) {
   if (GetByteOrder() == lldb::eByteOrderLittle) {
-    count = llvm::size(g_register_infos_ppc64le);
+    count = llvm::array_lengthof(g_register_infos_ppc64le);
     return g_register_infos_ppc64le;
   } else {
-    count = llvm::size(g_register_infos_ppc64);
+    count = llvm::array_lengthof(g_register_infos_ppc64);
     return g_register_infos_ppc64;
   }
 }
diff --git a/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp b/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp
index 986225ecf67a1..f8156deb7e306 100644
--- a/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp
+++ b/lldb/source/Plugins/ABI/SystemZ/ABISysV_s390x.cpp
@@ -172,7 +172,7 @@ static const RegisterInfo g_register_infos[] = {
 };
 
 static const uint32_t k_num_register_infos =
-    llvm::size(g_register_infos);
+    llvm::array_lengthof(g_register_infos);
 
 const lldb_private::RegisterInfo *
 ABISysV_s390x::GetRegisterInfoArray(uint32_t &count) {
diff --git a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
index 633c7e8c604dd..a5abce7de3aef 100644
--- a/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
+++ b/lldb/source/Plugins/DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernel.cpp
@@ -405,7 +405,7 @@ DynamicLoaderDarwinKernel::ReadMachHeader(addr_t addr, Process *process, llvm::M
   const uint32_t magicks[] = { llvm::MachO::MH_MAGIC_64, llvm::MachO::MH_MAGIC, llvm::MachO::MH_CIGAM, llvm::MachO::MH_CIGAM_64};
 
   bool found_matching_pattern = false;
-  for (size_t i = 0; i < llvm::size (magicks); i++)
+  for (size_t i = 0; i < llvm::array_lengthof (magicks); i++)
     if (::memcmp (&header.magic, &magicks[i], sizeof (uint32_t)) == 0)
         found_matching_pattern = true;
 
diff --git a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp
index 4b3a45e05dd01..5a238c5d4ac78 100644
--- a/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp
+++ b/lldb/source/Plugins/Instruction/ARM/EmulateInstructionARM.cpp
@@ -13198,7 +13198,7 @@ EmulateInstructionARM::GetARMOpcodeForInstruction(const uint32_t opcode,
        &EmulateInstructionARM::EmulateRFE, "rfe{<amode>} <Rn>{!}"}
 
   };
-  static const size_t k_num_arm_opcodes = llvm::size(g_arm_opcodes);
+  static const size_t k_num_arm_opcodes = llvm::array_lengthof(g_arm_opcodes);
 
   for (size_t i = 0; i < k_num_arm_opcodes; ++i) {
     if ((g_arm_opcodes[i].mask & opcode) == g_arm_opcodes[i].value &&
@@ -13749,7 +13749,7 @@ EmulateInstructionARM::GetThumbOpcodeForInstruction(const uint32_t opcode,
        &EmulateInstructionARM::EmulateUXTH, "uxth<c>.w <Rd>,<Rm>{,<rotation>}"},
   };
 
-  const size_t k_num_thumb_opcodes = llvm::size(g_thumb_opcodes);
+  const size_t k_num_thumb_opcodes = llvm::array_lengthof(g_thumb_opcodes);
   for (size_t i = 0; i < k_num_thumb_opcodes; ++i) {
     if ((g_thumb_opcodes[i].mask & opcode) == g_thumb_opcodes[i].value &&
         (g_thumb_opcodes[i].variants & arm_isa) != 0)
diff --git a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
index 3cac8259ae28e..f86609f3c5c1c 100644
--- a/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
+++ b/lldb/source/Plugins/Instruction/ARM64/EmulateInstructionARM64.cpp
@@ -52,7 +52,7 @@ using namespace lldb_private;
 LLDB_PLUGIN_DEFINE_ADV(EmulateInstructionARM64, InstructionARM64)
 
 static bool LLDBTableGetRegisterInfo(uint32_t reg_num, RegisterInfo &reg_info) {
-  if (reg_num >= llvm::size(g_register_infos_arm64_le))
+  if (reg_num >= llvm::array_lengthof(g_register_infos_arm64_le))
     return false;
   reg_info = g_register_infos_arm64_le[reg_num];
   return true;
@@ -360,7 +360,7 @@ EmulateInstructionARM64::GetOpcodeForInstruction(const uint32_t opcode) {
        "TBNZ <R><t>, #<imm>, <label>"},
 
   };
-  static const size_t k_num_arm_opcodes = llvm::size(g_opcodes);
+  static const size_t k_num_arm_opcodes = llvm::array_lengthof(g_opcodes);
 
   for (size_t i = 0; i < k_num_arm_opcodes; ++i) {
     if ((g_opcodes[i].mask & opcode) == g_opcodes[i].value)
diff --git a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
index fd5ac54722a4c..4ef0a034b6dd4 100644
--- a/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
+++ b/lldb/source/Plugins/Instruction/MIPS/EmulateInstructionMIPS.cpp
@@ -954,7 +954,7 @@ EmulateInstructionMIPS::GetOpcodeForInstruction(const char *op_name) {
       {"JALRS_MM", &EmulateInstructionMIPS::Emulate_JALRS, "JALRS rt, rs"},
   };
 
-  static const size_t k_num_mips_opcodes = llvm::size(g_opcodes);
+  static const size_t k_num_mips_opcodes = llvm::array_lengthof(g_opcodes);
 
   for (size_t i = 0; i < k_num_mips_opcodes; ++i) {
     if (!strcasecmp(g_opcodes[i].op_name, op_name))
diff --git a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
index f9770cb494e5f..26736f4c58baa 100644
--- a/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
+++ b/lldb/source/Plugins/Instruction/MIPS64/EmulateInstructionMIPS64.cpp
@@ -919,7 +919,7 @@ EmulateInstructionMIPS64::GetOpcodeForInstruction(const char *op_name) {
       {"BZ_V", &EmulateInstructionMIPS64::Emulate_BZV, "BZ.V wt,s16"},
   };
 
-  static const size_t k_num_mips_opcodes = llvm::size(g_opcodes);
+  static const size_t k_num_mips_opcodes = llvm::array_lengthof(g_opcodes);
 
   for (size_t i = 0; i < k_num_mips_opcodes; ++i) {
     if (!strcasecmp(g_opcodes[i].op_name, op_name))
diff --git a/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp b/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp
index 292e909b3ca5c..dcfad8e106aab 100644
--- a/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp
+++ b/lldb/source/Plugins/Instruction/PPC64/EmulateInstructionPPC64.cpp
@@ -59,7 +59,7 @@ bool EmulateInstructionPPC64::SetTargetTriple(const ArchSpec &arch) {
 }
 
 static bool LLDBTableGetRegisterInfo(uint32_t reg_num, RegisterInfo &reg_info) {
-  if (reg_num >= llvm::size(g_register_infos_ppc64le))
+  if (reg_num >= llvm::array_lengthof(g_register_infos_ppc64le))
     return false;
   reg_info = g_register_infos_ppc64le[reg_num];
   return true;
@@ -147,7 +147,7 @@ EmulateInstructionPPC64::GetOpcodeForInstruction(uint32_t opcode) {
        "addi RT, RA, SI"},
       {0xfc000003, 0xe8000000, &EmulateInstructionPPC64::EmulateLD,
        "ld RT, DS(RA)"}};
-  static const size_t k_num_ppc_opcodes = llvm::size(g_opcodes);
+  static const size_t k_num_ppc_opcodes = llvm::array_lengthof(g_opcodes);
 
   for (size_t i = 0; i < k_num_ppc_opcodes; ++i) {
     if ((g_opcodes[i].mask & opcode) == g_opcodes[i].value)
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp
index 536869b1b3b64..ff41f187ba9d0 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTrampolineHandler.cpp
@@ -749,7 +749,7 @@ AppleObjCTrampolineHandler::AppleObjCTrampolineHandler(
   // array into a template table, and populate the DispatchFunction
   // map from there.
 
-  for (size_t i = 0; i != llvm::size(g_dispatch_functions); i++) {
+  for (size_t i = 0; i != llvm::array_lengthof(g_dispatch_functions); i++) {
     ConstString name_const_str(g_dispatch_functions[i].name);
     const Symbol *msgSend_symbol =
         m_objc_module_sp->FindFirstSymbolWithNameAndType(name_const_str,
@@ -769,7 +769,7 @@ AppleObjCTrampolineHandler::AppleObjCTrampolineHandler(
   }
   
   // Similarly, cache the addresses of the "optimized dispatch" function.
-  for (size_t i = 0; i != llvm::size(g_opt_dispatch_names); i++) {
+  for (size_t i = 0; i != llvm::array_lengthof(g_opt_dispatch_names); i++) {
     ConstString name_const_str(g_opt_dispatch_names[i]);
     const Symbol *msgSend_symbol =
         m_objc_module_sp->FindFirstSymbolWithNameAndType(name_const_str,
diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
index 71e28378872c2..f69f3c300ae78 100644
--- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
+++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
@@ -574,7 +574,7 @@ bool ObjectFilePECOFF::ParseSectionHeaders(
 }
 
 llvm::StringRef ObjectFilePECOFF::GetSectionName(const section_header_t &sect) {
-  llvm::StringRef hdr_name(sect.name, llvm::size(sect.name));
+  llvm::StringRef hdr_name(sect.name, llvm::array_lengthof(sect.name));
   hdr_name = hdr_name.split('\0').first;
   if (hdr_name.consume_front("/")) {
     lldb::offset_t stroff;
diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.cpp
index ae66bfb65d80e..522c9a65e8682 100644
--- a/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.cpp
+++ b/lldb/source/Plugins/ObjectFile/PECOFF/PECallFrameInfo.cpp
@@ -169,7 +169,7 @@ uint32_t EHProgramBuilder::ConvertMachineToLLDBRegister(uint8_t machine_reg) {
       lldb_r8_x86_64,  lldb_r9_x86_64,  lldb_r10_x86_64, lldb_r11_x86_64,
       lldb_r12_x86_64, lldb_r13_x86_64, lldb_r14_x86_64, lldb_r15_x86_64};
 
-  if (machine_reg >= llvm::size(machine_to_lldb_register))
+  if (machine_reg >= llvm::array_lengthof(machine_to_lldb_register))
     return LLDB_INVALID_REGNUM;
 
   return machine_to_lldb_register[machine_reg];
@@ -184,7 +184,7 @@ uint32_t EHProgramBuilder::ConvertXMMToLLDBRegister(uint8_t xmm_reg) {
       lldb_xmm12_x86_64, lldb_xmm13_x86_64, lldb_xmm14_x86_64,
       lldb_xmm15_x86_64};
 
-  if (xmm_reg >= llvm::size(xmm_to_lldb_register))
+  if (xmm_reg >= llvm::array_lengthof(xmm_to_lldb_register))
     return LLDB_INVALID_REGNUM;
 
   return xmm_to_lldb_register[xmm_reg];
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
index 733d1ee5d14f5..461a21f44421b 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
@@ -686,7 +686,7 @@ BreakpointSP PlatformDarwin::SetThreadCreationBreakpoint(Target &target) {
                                        "libSystem.B.dylib"};
 
   FileSpecList bp_modules;
-  for (size_t i = 0; i < llvm::size(g_bp_modules); i++) {
+  for (size_t i = 0; i < llvm::array_lengthof(g_bp_modules); i++) {
     const char *bp_module = g_bp_modules[i];
     bp_modules.EmplaceBack(bp_module);
   }
@@ -695,7 +695,7 @@ BreakpointSP PlatformDarwin::SetThreadCreationBreakpoint(Target &target) {
   bool hardware = false;
   LazyBool skip_prologue = eLazyBoolNo;
   bp_sp = target.CreateBreakpoint(&bp_modules, nullptr, g_bp_names,
-                                  llvm::size(g_bp_names),
+                                  llvm::array_lengthof(g_bp_names),
                                   eFunctionNameTypeFull, eLanguageTypeUnknown,
                                   0, skip_prologue, internal, hardware);
   bp_sp->SetBreakpointKind("thread-creation");
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp
index 598a55286cc40..e1d3b6ecd7d09 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm.cpp
@@ -856,7 +856,7 @@ static uint32_t g_exc_regnums[] = {
     exc_exception, exc_fsr, exc_far,
 };
 
-static size_t k_num_register_infos = llvm::size(g_register_infos);
+static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos);
 
 RegisterContextDarwin_arm::RegisterContextDarwin_arm(
     Thread &thread, uint32_t concrete_frame_idx)
@@ -897,9 +897,9 @@ const RegisterInfo *RegisterContextDarwin_arm::GetRegisterInfos() {
 }
 
 // Number of registers in each register set
-const size_t k_num_gpr_registers = llvm::size(g_gpr_regnums);
-const size_t k_num_fpu_registers = llvm::size(g_fpu_regnums);
-const size_t k_num_exc_registers = llvm::size(g_exc_regnums);
+const size_t k_num_gpr_registers = llvm::array_lengthof(g_gpr_regnums);
+const size_t k_num_fpu_registers = llvm::array_lengthof(g_fpu_regnums);
+const size_t k_num_exc_registers = llvm::array_lengthof(g_exc_regnums);
 
 // Register set definitions. The first definitions at register set index of
 // zero is for all registers, followed by other registers sets. The register
@@ -911,7 +911,7 @@ static const RegisterSet g_reg_sets[] = {
     {"Floating Point Registers", "fpu", k_num_fpu_registers, g_fpu_regnums},
     {"Exception State Registers", "exc", k_num_exc_registers, g_exc_regnums}};
 
-const size_t k_num_regsets = llvm::size(g_reg_sets);
+const size_t k_num_regsets = llvm::array_lengthof(g_reg_sets);
 
 size_t RegisterContextDarwin_arm::GetRegisterSetCount() {
   return k_num_regsets;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp
index f23a116fbb188..50f710e268152 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_arm64.cpp
@@ -91,7 +91,7 @@ static uint32_t g_fpu_regnums[] = {
 static uint32_t g_exc_regnums[] = {exc_far, exc_esr, exc_exception};
 
 static size_t k_num_register_infos =
-    llvm::size(g_register_infos_arm64_le);
+    llvm::array_lengthof(g_register_infos_arm64_le);
 
 RegisterContextDarwin_arm64::RegisterContextDarwin_arm64(
     Thread &thread, uint32_t concrete_frame_idx)
@@ -132,9 +132,9 @@ const RegisterInfo *RegisterContextDarwin_arm64::GetRegisterInfos() {
 }
 
 // Number of registers in each register set
-const size_t k_num_gpr_registers = llvm::size(g_gpr_regnums);
-const size_t k_num_fpu_registers = llvm::size(g_fpu_regnums);
-const size_t k_num_exc_registers = llvm::size(g_exc_regnums);
+const size_t k_num_gpr_registers = llvm::array_lengthof(g_gpr_regnums);
+const size_t k_num_fpu_registers = llvm::array_lengthof(g_fpu_regnums);
+const size_t k_num_exc_registers = llvm::array_lengthof(g_exc_regnums);
 
 // Register set definitions. The first definitions at register set index of
 // zero is for all registers, followed by other registers sets. The register
@@ -146,7 +146,7 @@ static const RegisterSet g_reg_sets[] = {
     {"Floating Point Registers", "fpu", k_num_fpu_registers, g_fpu_regnums},
     {"Exception State Registers", "exc", k_num_exc_registers, g_exc_regnums}};
 
-const size_t k_num_regsets = llvm::size(g_reg_sets);
+const size_t k_num_regsets = llvm::array_lengthof(g_reg_sets);
 
 size_t RegisterContextDarwin_arm64::GetRegisterSetCount() {
   return k_num_regsets;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp
index 34c7b4fd64eff..5f56e6f1636a2 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_i386.cpp
@@ -363,7 +363,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      }};
 
-static size_t k_num_register_infos = llvm::size(g_register_infos);
+static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos);
 
 RegisterContextDarwin_i386::RegisterContextDarwin_i386(
     Thread &thread, uint32_t concrete_frame_idx)
@@ -421,9 +421,9 @@ static uint32_t g_fpu_regnums[] = {
 static uint32_t g_exc_regnums[] = {exc_trapno, exc_err, exc_faultvaddr};
 
 // Number of registers in each register set
-const size_t k_num_gpr_registers = llvm::size(g_gpr_regnums);
-const size_t k_num_fpu_registers = llvm::size(g_fpu_regnums);
-const size_t k_num_exc_registers = llvm::size(g_exc_regnums);
+const size_t k_num_gpr_registers = llvm::array_lengthof(g_gpr_regnums);
+const size_t k_num_fpu_registers = llvm::array_lengthof(g_fpu_regnums);
+const size_t k_num_exc_registers = llvm::array_lengthof(g_exc_regnums);
 
 // Register set definitions. The first definitions at register set index of
 // zero is for all registers, followed by other registers sets. The register
@@ -435,7 +435,7 @@ static const RegisterSet g_reg_sets[] = {
     {"Floating Point Registers", "fpu", k_num_fpu_registers, g_fpu_regnums},
     {"Exception State Registers", "exc", k_num_exc_registers, g_exc_regnums}};
 
-const size_t k_num_regsets = llvm::size(g_reg_sets);
+const size_t k_num_regsets = llvm::array_lengthof(g_reg_sets);
 
 size_t RegisterContextDarwin_i386::GetRegisterSetCount() {
   return k_num_regsets;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp
index 1551281e8d8d4..567df8fc980ce 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextDarwin_x86_64.cpp
@@ -420,7 +420,7 @@ static RegisterInfo g_register_infos[] = {
      nullptr,
      }};
 
-static size_t k_num_register_infos = llvm::size(g_register_infos);
+static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos);
 
 RegisterContextDarwin_x86_64::RegisterContextDarwin_x86_64(
     Thread &thread, uint32_t concrete_frame_idx)
@@ -477,9 +477,9 @@ static uint32_t g_fpu_regnums[] = {
 static uint32_t g_exc_regnums[] = {exc_trapno, exc_err, exc_faultvaddr};
 
 // Number of registers in each register set
-const size_t k_num_gpr_registers = llvm::size(g_gpr_regnums);
-const size_t k_num_fpu_registers = llvm::size(g_fpu_regnums);
-const size_t k_num_exc_registers = llvm::size(g_exc_regnums);
+const size_t k_num_gpr_registers = llvm::array_lengthof(g_gpr_regnums);
+const size_t k_num_fpu_registers = llvm::array_lengthof(g_fpu_regnums);
+const size_t k_num_exc_registers = llvm::array_lengthof(g_exc_regnums);
 
 // Register set definitions. The first definitions at register set index of
 // zero is for all registers, followed by other registers sets. The register
@@ -491,7 +491,7 @@ static const RegisterSet g_reg_sets[] = {
     {"Floating Point Registers", "fpu", k_num_fpu_registers, g_fpu_regnums},
     {"Exception State Registers", "exc", k_num_exc_registers, g_exc_regnums}};
 
-const size_t k_num_regsets = llvm::size(g_reg_sets);
+const size_t k_num_regsets = llvm::array_lengthof(g_reg_sets);
 
 size_t RegisterContextDarwin_x86_64::GetRegisterSetCount() {
   return k_num_regsets;
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp
index cfd4d21763e49..066d50d9c149f 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextWindows_i386.cpp
@@ -78,11 +78,11 @@ const RegisterInfo *RegisterContextWindows_i386::GetRegisterInfo() const {
 }
 
 uint32_t RegisterContextWindows_i386::GetRegisterCount() const {
-  return llvm::size(g_register_infos_i386);
+  return llvm::array_lengthof(g_register_infos_i386);
 }
 
 uint32_t RegisterContextWindows_i386::GetUserRegisterCount() const {
-  return llvm::size(g_register_infos_i386);
+  return llvm::array_lengthof(g_register_infos_i386);
 }
 
 size_t RegisterContextWindows_i386::GetGPRSize() const { return sizeof(GPR); }
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp b/lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp
index f0fd6fec24fd8..a35ccace5d5be 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextWindows_x86_64.cpp
@@ -141,11 +141,11 @@ const RegisterInfo *RegisterContextWindows_x86_64::GetRegisterInfo() const {
 }
 
 uint32_t RegisterContextWindows_x86_64::GetRegisterCount() const {
-  return llvm::size(g_register_infos_x86_64);
+  return llvm::array_lengthof(g_register_infos_x86_64);
 }
 
 uint32_t RegisterContextWindows_x86_64::GetUserRegisterCount() const {
-  return llvm::size(g_register_infos_x86_64);
+  return llvm::array_lengthof(g_register_infos_x86_64);
 }
 
 size_t RegisterContextWindows_x86_64::GetGPRSize() const { return sizeof(GPR); }
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.cpp
index ea4485e1e075f..d562c7df8bfa2 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.cpp
@@ -38,7 +38,7 @@ static const uint32_t g_gpr_regnums_WoW64[] = {
 
 static const RegisterSet g_reg_sets_WoW64[] = {
     {"General Purpose Registers", "gpr",
-     llvm::size(g_gpr_regnums_WoW64) - 1, g_gpr_regnums_WoW64},
+     llvm::array_lengthof(g_gpr_regnums_WoW64) - 1, g_gpr_regnums_WoW64},
 };
 enum { k_num_register_sets = 1 };
 
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp
index 1707a10530a95..5cfd790c624a3 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp
@@ -69,9 +69,9 @@ static_assert(((sizeof g_fpr_regnums_arm / sizeof g_fpr_regnums_arm[0]) - 1) ==
 
 static const RegisterSet g_reg_sets_arm[] = {
     {"General Purpose Registers", "gpr",
-     llvm::size(g_gpr_regnums_arm) - 1, g_gpr_regnums_arm},
+     llvm::array_lengthof(g_gpr_regnums_arm) - 1, g_gpr_regnums_arm},
     {"Floating Point Registers", "fpr",
-     llvm::size(g_fpr_regnums_arm) - 1, g_fpr_regnums_arm},
+     llvm::array_lengthof(g_fpr_regnums_arm) - 1, g_fpr_regnums_arm},
 };
 
 enum { k_num_register_sets = 2 };
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp
index 792b2d59bcb40..fc65945723218 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp
@@ -85,9 +85,9 @@ static_assert(((sizeof g_fpr_regnums_arm64 / sizeof g_fpr_regnums_arm64[0]) -
 
 static const RegisterSet g_reg_sets_arm64[] = {
     {"General Purpose Registers", "gpr",
-     llvm::size(g_gpr_regnums_arm64) - 1, g_gpr_regnums_arm64},
+     llvm::array_lengthof(g_gpr_regnums_arm64) - 1, g_gpr_regnums_arm64},
     {"Floating Point Registers", "fpr",
-     llvm::size(g_fpr_regnums_arm64) - 1, g_fpr_regnums_arm64},
+     llvm::array_lengthof(g_fpr_regnums_arm64) - 1, g_fpr_regnums_arm64},
 };
 
 enum { k_num_register_sets = 2 };
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.cpp
index b0102e613de65..45da4cd02cec3 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.cpp
@@ -38,7 +38,7 @@ static const uint32_t g_gpr_regnums_i386[] = {
 
 static const RegisterSet g_reg_sets_i386[] = {
     {"General Purpose Registers", "gpr",
-     llvm::size(g_gpr_regnums_i386) - 1, g_gpr_regnums_i386},
+     llvm::array_lengthof(g_gpr_regnums_i386) - 1, g_gpr_regnums_i386},
 };
 
 enum { k_num_register_sets = 1 };
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.cpp
index bf7564966c787..819cd8eb383bf 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.cpp
@@ -49,9 +49,9 @@ static const uint32_t g_fpr_regnums_x86_64[] = {
 
 static const RegisterSet g_reg_sets_x86_64[] = {
     {"General Purpose Registers", "gpr",
-     llvm::size(g_gpr_regnums_x86_64) - 1, g_gpr_regnums_x86_64},
+     llvm::array_lengthof(g_gpr_regnums_x86_64) - 1, g_gpr_regnums_x86_64},
     {"Floating Point Registers", "fpr",
-     llvm::size(g_fpr_regnums_x86_64) - 1, g_fpr_regnums_x86_64}};
+     llvm::array_lengthof(g_fpr_regnums_x86_64) - 1, g_fpr_regnums_x86_64}};
 
 enum { k_num_register_sets = 2 };
 
diff --git a/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.cpp b/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.cpp
index 368b7fcab3857..2dc678a9efcff 100644
--- a/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/arm/RegisterContextWindows_arm.cpp
@@ -41,7 +41,7 @@ using namespace lldb_private;
 #include "Plugins/Process/Utility/RegisterInfos_arm.h"
 #undef DECLARE_REGISTER_INFOS_ARM_STRUCT
 
-static size_t k_num_register_infos = llvm::size(g_register_infos_arm);
+static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos_arm);
 
 // Array of lldb register numbers used to define the set of all General Purpose
 // Registers
@@ -69,8 +69,8 @@ uint32_t g_fpu_reg_indices[] = {
 
 RegisterSet g_register_sets[] = {
     {"General Purpose Registers", "gpr",
-     llvm::size(g_gpr_reg_indices), g_gpr_reg_indices},
-    {"Floating Point Registers", "fpu", llvm::size(g_fpu_reg_indices),
+     llvm::array_lengthof(g_gpr_reg_indices), g_gpr_reg_indices},
+    {"Floating Point Registers", "fpu", llvm::array_lengthof(g_fpu_reg_indices),
      g_fpu_reg_indices},
 };
 
@@ -82,7 +82,7 @@ RegisterContextWindows_arm::RegisterContextWindows_arm(
 RegisterContextWindows_arm::~RegisterContextWindows_arm() {}
 
 size_t RegisterContextWindows_arm::GetRegisterCount() {
-  return llvm::size(g_register_infos_arm);
+  return llvm::array_lengthof(g_register_infos_arm);
 }
 
 const RegisterInfo *
@@ -93,7 +93,7 @@ RegisterContextWindows_arm::GetRegisterInfoAtIndex(size_t reg) {
 }
 
 size_t RegisterContextWindows_arm::GetRegisterSetCount() {
-  return llvm::size(g_register_sets);
+  return llvm::array_lengthof(g_register_sets);
 }
 
 const RegisterSet *RegisterContextWindows_arm::GetRegisterSet(size_t reg_set) {
diff --git a/lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.cpp b/lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.cpp
index c57b4fb8e8935..3ce288597c86a 100644
--- a/lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/arm64/RegisterContextWindows_arm64.cpp
@@ -45,7 +45,7 @@ using namespace lldb_private;
 #undef DECLARE_REGISTER_INFOS_ARM64_STRUCT
 
 static size_t k_num_register_infos =
-    llvm::size(g_register_infos_arm64_le);
+    llvm::array_lengthof(g_register_infos_arm64_le);
 
 // Array of lldb register numbers used to define the set of all General Purpose
 // Registers
@@ -83,8 +83,8 @@ uint32_t g_fpu_reg_indices[] = {
 
 RegisterSet g_register_sets[] = {
     {"General Purpose Registers", "gpr",
-     llvm::size(g_gpr_reg_indices), g_gpr_reg_indices},
-    {"Floating Point Registers", "fpu", llvm::size(g_fpu_reg_indices),
+     llvm::array_lengthof(g_gpr_reg_indices), g_gpr_reg_indices},
+    {"Floating Point Registers", "fpu", llvm::array_lengthof(g_fpu_reg_indices),
      g_fpu_reg_indices},
 };
 
@@ -96,7 +96,7 @@ RegisterContextWindows_arm64::RegisterContextWindows_arm64(
 RegisterContextWindows_arm64::~RegisterContextWindows_arm64() {}
 
 size_t RegisterContextWindows_arm64::GetRegisterCount() {
-  return llvm::size(g_register_infos_arm64_le);
+  return llvm::array_lengthof(g_register_infos_arm64_le);
 }
 
 const RegisterInfo *
@@ -107,7 +107,7 @@ RegisterContextWindows_arm64::GetRegisterInfoAtIndex(size_t reg) {
 }
 
 size_t RegisterContextWindows_arm64::GetRegisterSetCount() {
-  return llvm::size(g_register_sets);
+  return llvm::array_lengthof(g_register_sets);
 }
 
 const RegisterSet *
diff --git a/lldb/source/Plugins/Process/Windows/Common/x64/RegisterContextWindows_x64.cpp b/lldb/source/Plugins/Process/Windows/Common/x64/RegisterContextWindows_x64.cpp
index ed0b1b43bda74..41e0d429fe52b 100644
--- a/lldb/source/Plugins/Process/Windows/Common/x64/RegisterContextWindows_x64.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/x64/RegisterContextWindows_x64.cpp
@@ -212,7 +212,7 @@ RegisterInfo g_register_infos[] = {
     {DEFINE_FPU_XMM(xmm15)}
 };
 
-static size_t k_num_register_infos = llvm::size(g_register_infos);
+static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos);
 
 // Array of lldb register numbers used to define the set of all General Purpose
 // Registers
@@ -235,9 +235,9 @@ uint32_t g_fpu_reg_indices[] = {
 
 RegisterSet g_register_sets[] = {
     {"General Purpose Registers", "gpr",
-     llvm::size(g_gpr_reg_indices), g_gpr_reg_indices},
+     llvm::array_lengthof(g_gpr_reg_indices), g_gpr_reg_indices},
     {"Floating Point Registers", "fpu",
-     llvm::size(g_fpu_reg_indices), g_fpu_reg_indices}};
+     llvm::array_lengthof(g_fpu_reg_indices), g_fpu_reg_indices}};
 }
 
 // Constructors and Destructors
@@ -248,7 +248,7 @@ RegisterContextWindows_x64::RegisterContextWindows_x64(
 RegisterContextWindows_x64::~RegisterContextWindows_x64() {}
 
 size_t RegisterContextWindows_x64::GetRegisterCount() {
-  return llvm::size(g_register_infos);
+  return llvm::array_lengthof(g_register_infos);
 }
 
 const RegisterInfo *
@@ -259,7 +259,7 @@ RegisterContextWindows_x64::GetRegisterInfoAtIndex(size_t reg) {
 }
 
 size_t RegisterContextWindows_x64::GetRegisterSetCount() {
-  return llvm::size(g_register_sets);
+  return llvm::array_lengthof(g_register_sets);
 }
 
 const RegisterSet *RegisterContextWindows_x64::GetRegisterSet(size_t reg_set) {
diff --git a/lldb/source/Plugins/Process/Windows/Common/x86/RegisterContextWindows_x86.cpp b/lldb/source/Plugins/Process/Windows/Common/x86/RegisterContextWindows_x86.cpp
index 33c1257798ed8..e4f1806597517 100644
--- a/lldb/source/Plugins/Process/Windows/Common/x86/RegisterContextWindows_x86.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/x86/RegisterContextWindows_x86.cpp
@@ -118,7 +118,7 @@ RegisterInfo g_register_infos[] = {
      nullptr,
     },
 };
-static size_t k_num_register_infos = llvm::size(g_register_infos);
+static size_t k_num_register_infos = llvm::array_lengthof(g_register_infos);
 
 // Array of lldb register numbers used to define the set of all General Purpose
 // Registers
@@ -130,7 +130,7 @@ uint32_t g_gpr_reg_indices[] = {eRegisterIndexEax, eRegisterIndexEbx,
 
 RegisterSet g_register_sets[] = {
     {"General Purpose Registers", "gpr",
-     llvm::size(g_gpr_reg_indices), g_gpr_reg_indices},
+     llvm::array_lengthof(g_gpr_reg_indices), g_gpr_reg_indices},
 };
 }
 
@@ -142,7 +142,7 @@ RegisterContextWindows_x86::RegisterContextWindows_x86(
 RegisterContextWindows_x86::~RegisterContextWindows_x86() {}
 
 size_t RegisterContextWindows_x86::GetRegisterCount() {
-  return llvm::size(g_register_infos);
+  return llvm::array_lengthof(g_register_infos);
 }
 
 const RegisterInfo *
@@ -153,7 +153,7 @@ RegisterContextWindows_x86::GetRegisterInfoAtIndex(size_t reg) {
 }
 
 size_t RegisterContextWindows_x86::GetRegisterSetCount() {
-  return llvm::size(g_register_sets);
+  return llvm::array_lengthof(g_register_sets);
 }
 
 const RegisterSet *RegisterContextWindows_x86::GetRegisterSet(size_t reg_set) {
diff --git a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp
index a6eef5d8e9bb3..7184dbacb08d6 100644
--- a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp
+++ b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM.cpp
@@ -336,7 +336,7 @@ static RegisterInfo g_reg_infos[] = {
     DEF_Q(14),
     DEF_Q(15)};
 
-constexpr size_t k_num_reg_infos = llvm::size(g_reg_infos);
+constexpr size_t k_num_reg_infos = llvm::array_lengthof(g_reg_infos);
 
 // ARM general purpose registers.
 const uint32_t g_gpr_regnums[] = {
@@ -445,26 +445,26 @@ const uint32_t g_fpu_regnums[] = {
 };
 
 // Skip the last LLDB_INVALID_REGNUM in each count below by subtracting 1
-constexpr size_t k_num_gpr_regs = llvm::size(g_gpr_regnums) - 1;
-constexpr size_t k_num_fpu_regs = llvm::size(g_fpu_regnums) - 1;
+constexpr size_t k_num_gpr_regs = llvm::array_lengthof(g_gpr_regnums) - 1;
+constexpr size_t k_num_fpu_regs = llvm::array_lengthof(g_fpu_regnums) - 1;
 
 static RegisterSet g_reg_sets[] = {
     {"General Purpose Registers", "gpr", k_num_gpr_regs, g_gpr_regnums},
     {"Floating Point Registers", "fpu", k_num_fpu_regs, g_fpu_regnums},
 };
 
-constexpr size_t k_num_reg_sets = llvm::size(g_reg_sets);
+constexpr size_t k_num_reg_sets = llvm::array_lengthof(g_reg_sets);
 
 RegisterContextMinidump_ARM::RegisterContextMinidump_ARM(
     lldb_private::Thread &thread, const DataExtractor &data, bool apple)
     : RegisterContext(thread, 0), m_apple(apple) {
   lldb::offset_t offset = 0;
   m_regs.context_flags = data.GetU32(&offset);
-  for (unsigned i = 0; i < llvm::size(m_regs.r); ++i)
+  for (unsigned i = 0; i < llvm::array_lengthof(m_regs.r); ++i)
     m_regs.r[i] = data.GetU32(&offset);
   m_regs.cpsr = data.GetU32(&offset);
   m_regs.fpscr = data.GetU64(&offset);
-  for (unsigned i = 0; i < llvm::size(m_regs.d); ++i)
+  for (unsigned i = 0; i < llvm::array_lengthof(m_regs.d); ++i)
     m_regs.d[i] = data.GetU64(&offset);
   lldbassert(k_num_regs == k_num_reg_infos);
 }
diff --git a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.cpp b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.cpp
index d139d68c741c7..e606ec9c3b644 100644
--- a/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.cpp
+++ b/lldb/source/Plugins/Process/minidump/RegisterContextMinidump_ARM64.cpp
@@ -544,7 +544,7 @@ static RegisterInfo g_reg_infos[] = {
     DEF_H(31),
 };
 
-constexpr size_t k_num_reg_infos = llvm::size(g_reg_infos);
+constexpr size_t k_num_reg_infos = llvm::array_lengthof(g_reg_infos);
 
 // ARM64 general purpose registers.
 const uint32_t g_gpr_regnums[] = {
@@ -751,15 +751,15 @@ const uint32_t g_fpu_regnums[] = {
 };
 
 // Skip the last LLDB_INVALID_REGNUM in each count below by subtracting 1
-constexpr size_t k_num_gpr_regs = llvm::size(g_gpr_regnums) - 1;
-constexpr size_t k_num_fpu_regs = llvm::size(g_fpu_regnums) - 1;
+constexpr size_t k_num_gpr_regs = llvm::array_lengthof(g_gpr_regnums) - 1;
+constexpr size_t k_num_fpu_regs = llvm::array_lengthof(g_fpu_regnums) - 1;
 
 static RegisterSet g_reg_sets[] = {
     {"General Purpose Registers", "gpr", k_num_gpr_regs, g_gpr_regnums},
     {"Floating Point Registers", "fpu", k_num_fpu_regs, g_fpu_regnums},
 };
 
-constexpr size_t k_num_reg_sets = llvm::size(g_reg_sets);
+constexpr size_t k_num_reg_sets = llvm::array_lengthof(g_reg_sets);
 
 RegisterContextMinidump_ARM64::RegisterContextMinidump_ARM64(
     lldb_private::Thread &thread, const DataExtractor &data)
diff --git a/lldb/source/Utility/ArchSpec.cpp b/lldb/source/Utility/ArchSpec.cpp
index de63568ed7ecc..74a4370423458 100644
--- a/lldb/source/Utility/ArchSpec.cpp
+++ b/lldb/source/Utility/ArchSpec.cpp
@@ -254,12 +254,12 @@ struct ArchDefinition {
 };
 
 void ArchSpec::ListSupportedArchNames(StringList &list) {
-  for (uint32_t i = 0; i < llvm::size(g_core_definitions); ++i)
+  for (uint32_t i = 0; i < llvm::array_lengthof(g_core_definitions); ++i)
     list.AppendString(g_core_definitions[i].name);
 }
 
 void ArchSpec::AutoComplete(CompletionRequest &request) {
-  for (uint32_t i = 0; i < llvm::size(g_core_definitions); ++i)
+  for (uint32_t i = 0; i < llvm::array_lengthof(g_core_definitions); ++i)
     request.TryCompleteCurrentArg(g_core_definitions[i].name);
 }
 
@@ -340,7 +340,7 @@ static const ArchDefinitionEntry g_macho_arch_entries[] = {
 // clang-format on
 
 static const ArchDefinition g_macho_arch_def = {
-    eArchTypeMachO, llvm::size(g_macho_arch_entries),
+    eArchTypeMachO, llvm::array_lengthof(g_macho_arch_entries),
     g_macho_arch_entries, "mach-o"};
 
 //===----------------------------------------------------------------------===//
@@ -409,7 +409,7 @@ static const ArchDefinitionEntry g_elf_arch_entries[] = {
 
 static const ArchDefinition g_elf_arch_def = {
     eArchTypeELF,
-    llvm::size(g_elf_arch_entries),
+    llvm::array_lengthof(g_elf_arch_entries),
     g_elf_arch_entries,
     "elf",
 };
@@ -435,7 +435,7 @@ static const ArchDefinitionEntry g_coff_arch_entries[] = {
 
 static const ArchDefinition g_coff_arch_def = {
     eArchTypeCOFF,
-    llvm::size(g_coff_arch_entries),
+    llvm::array_lengthof(g_coff_arch_entries),
     g_coff_arch_entries,
     "pe-coff",
 };
@@ -446,7 +446,7 @@ static const ArchDefinition *g_arch_definitions[] = {
     &g_macho_arch_def, &g_elf_arch_def, &g_coff_arch_def};
 
 static const size_t k_num_arch_definitions =
-    llvm::size(g_arch_definitions);
+    llvm::array_lengthof(g_arch_definitions);
 
 //===----------------------------------------------------------------------===//
 // Static helper functions.
@@ -463,7 +463,7 @@ static const ArchDefinition *FindArchDefinition(ArchitectureType arch_type) {
 
 // Get an architecture definition by name.
 static const CoreDefinition *FindCoreDefinition(llvm::StringRef name) {
-  for (unsigned int i = 0; i < llvm::size(g_core_definitions); ++i) {
+  for (unsigned int i = 0; i < llvm::array_lengthof(g_core_definitions); ++i) {
     if (name.equals_insensitive(g_core_definitions[i].name))
       return &g_core_definitions[i];
   }
@@ -471,7 +471,7 @@ static const CoreDefinition *FindCoreDefinition(llvm::StringRef name) {
 }
 
 static inline const CoreDefinition *FindCoreDefinition(ArchSpec::Core core) {
-  if (core < llvm::size(g_core_definitions))
+  if (core < llvm::array_lengthof(g_core_definitions))
     return &g_core_definitions[core];
   return nullptr;
 }
diff --git a/lldb/unittests/Utility/StatusTest.cpp b/lldb/unittests/Utility/StatusTest.cpp
index f6bc11058c6a5..9b9d870cd12a3 100644
--- a/lldb/unittests/Utility/StatusTest.cpp
+++ b/lldb/unittests/Utility/StatusTest.cpp
@@ -72,7 +72,7 @@ TEST(StatusTest, ErrorWin32) {
   EXPECT_TRUE(success.Success());
 
   WCHAR name[128]{};
-  ULONG nameLen = llvm::size(name);
+  ULONG nameLen = llvm::array_lengthof(name);
   ULONG langs = 0;
   GetUserPreferredUILanguages(MUI_LANGUAGE_NAME, &langs,
                               reinterpret_cast<PZZWSTR>(&name), &nameLen);
diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index b0a8a436f9c7a..c3200c9265182 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -1430,6 +1430,12 @@ void shuffle(Iterator first, Iterator last, RNG &&g) {
   }
 }
 
+/// Find the length of an array.
+template <class T, std::size_t N>
+constexpr inline size_t array_lengthof(T (&)[N]) {
+  return N;
+}
+
 /// Adapt std::less<T> for array_pod_sort.
 template<typename T>
 inline int array_pod_sort_comparator(const void *P1, const void *P2) {
diff --git a/llvm/include/llvm/ADT/STLForwardCompat.h b/llvm/include/llvm/ADT/STLForwardCompat.h
index 49e96234a752f..440b29df260ca 100644
--- a/llvm/include/llvm/ADT/STLForwardCompat.h
+++ b/llvm/include/llvm/ADT/STLForwardCompat.h
@@ -63,16 +63,6 @@ struct in_place_index_t // NOLINT(readability-identifier-naming)
   explicit in_place_index_t() = default;
 };
 
-template <class C>
-constexpr auto size(const C& c) -> decltype(c.size()) { // NOLINT(readability-identifier-naming)
-  return c.size();
-}
-
-template <class T, std::size_t N>
-constexpr std::size_t size(const T (&array)[N]) noexcept { // NOLINT(readability-identifier-naming)
-  return N;
-}
-
 //===----------------------------------------------------------------------===//
 //     Features from C++20
 //===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 792b3ebe87256..bec1915705947 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -924,7 +924,7 @@ class TargetLoweringBase {
   /// promotions or expansions.
   bool isTypeLegal(EVT VT) const {
     assert(!VT.isSimple() ||
-           (unsigned)VT.getSimpleVT().SimpleTy < size(RegClassForVT));
+           (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT));
     return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr;
   }
 
@@ -1078,7 +1078,7 @@ class TargetLoweringBase {
     if (VT.isExtended()) return Expand;
     // If a target-specific SDNode requires legalization, require the target
     // to provide custom legalization for it.
-    if (Op >= size(OpActions[0])) return Custom;
+    if (Op >= array_lengthof(OpActions[0])) return Custom;
     return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
   }
 
@@ -1379,8 +1379,8 @@ class TargetLoweringBase {
   /// expander for it.
   LegalizeAction
   getCondCodeAction(ISD::CondCode CC, MVT VT) const {
-    assert((unsigned)CC < size(CondCodeActions) &&
-           ((unsigned)VT.SimpleTy >> 3) < size(CondCodeActions[0]) &&
+    assert((unsigned)CC < array_lengthof(CondCodeActions) &&
+           ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) &&
            "Table isn't big enough!");
     // See setCondCodeAction for how this is encoded.
     uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
@@ -1488,7 +1488,7 @@ class TargetLoweringBase {
 
   /// Return the type of registers that this ValueType will eventually require.
   MVT getRegisterType(MVT VT) const {
-    assert((unsigned)VT.SimpleTy < size(RegisterTypeForVT));
+    assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT));
     return RegisterTypeForVT[VT.SimpleTy];
   }
 
@@ -1496,7 +1496,7 @@ class TargetLoweringBase {
   MVT getRegisterType(LLVMContext &Context, EVT VT) const {
     if (VT.isSimple()) {
       assert((unsigned)VT.getSimpleVT().SimpleTy <
-                size(RegisterTypeForVT));
+                array_lengthof(RegisterTypeForVT));
       return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
     }
     if (VT.isVector()) {
@@ -1529,7 +1529,7 @@ class TargetLoweringBase {
                   Optional<MVT> RegisterVT = None) const {
     if (VT.isSimple()) {
       assert((unsigned)VT.getSimpleVT().SimpleTy <
-                size(NumRegistersForVT));
+                array_lengthof(NumRegistersForVT));
       return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
     }
     if (VT.isVector()) {
@@ -1597,7 +1597,7 @@ class TargetLoweringBase {
   /// If true, the target has custom DAG combine transformations that it can
   /// perform for the specified node.
   bool hasTargetDAGCombine(ISD::NodeType NT) const {
-    assert(unsigned(NT >> 3) < size(TargetDAGCombineArray));
+    assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray));
     return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
   }
 
@@ -2212,7 +2212,7 @@ class TargetLoweringBase {
   /// specified value type. This indicates the selector can handle values of
   /// that class natively.
   void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
-    assert((unsigned)VT.SimpleTy < size(RegClassForVT));
+    assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT));
     RegClassForVT[VT.SimpleTy] = RC;
   }
 
@@ -2229,7 +2229,7 @@ class TargetLoweringBase {
   /// type and indicate what to do about it. Note that VT may refer to either
   /// the type of a result or that of an operand of Op.
   void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) {
-    assert(Op < size(OpActions[0]) && "Table isn't big enough!");
+    assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
     OpActions[(unsigned)VT.SimpleTy][Op] = Action;
   }
 
@@ -2294,7 +2294,7 @@ class TargetLoweringBase {
   /// target and indicate what to do about it.
   void setCondCodeAction(ISD::CondCode CC, MVT VT,
                          LegalizeAction Action) {
-    assert(VT.isValid() && (unsigned)CC < size(CondCodeActions) &&
+    assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&
            "Table isn't big enough!");
     assert((unsigned)Action < 0x10 && "too many bits for bitfield array");
     /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit
@@ -2324,7 +2324,7 @@ class TargetLoweringBase {
   /// they want to provide a custom DAG combiner for by implementing the
   /// PerformDAGCombine virtual method.
   void setTargetDAGCombine(ISD::NodeType NT) {
-    assert(unsigned(NT >> 3) < size(TargetDAGCombineArray));
+    assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray));
     TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
   }
 
diff --git a/llvm/include/llvm/MC/SubtargetFeature.h b/llvm/include/llvm/MC/SubtargetFeature.h
index 6eb6c13efe3e9..032e2a7df1f2d 100644
--- a/llvm/include/llvm/MC/SubtargetFeature.h
+++ b/llvm/include/llvm/MC/SubtargetFeature.h
@@ -104,7 +104,7 @@ class FeatureBitset {
   }
 
   constexpr FeatureBitset &operator^=(const FeatureBitset &RHS) {
-    for (unsigned I = 0, E = llvm::size(Bits); I != E; ++I) {
+    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) {
       Bits[I] ^= RHS.Bits[I];
     }
     return *this;
@@ -116,7 +116,7 @@ class FeatureBitset {
   }
 
   constexpr FeatureBitset &operator&=(const FeatureBitset &RHS) {
-    for (unsigned I = 0, E = llvm::size(Bits); I != E; ++I) {
+    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) {
       Bits[I] &= RHS.Bits[I];
     }
     return *this;
@@ -128,7 +128,7 @@ class FeatureBitset {
   }
 
   constexpr FeatureBitset &operator|=(const FeatureBitset &RHS) {
-    for (unsigned I = 0, E = llvm::size(Bits); I != E; ++I) {
+    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) {
       Bits[I] |= RHS.Bits[I];
     }
     return *this;
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 5d43a9523182b..37bc8a65dc7c3 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4329,7 +4329,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
       // The input vector this mask element indexes into.
       unsigned Input = (unsigned)Idx / NewElts;
 
-      if (Input >= size(Inputs)) {
+      if (Input >= array_lengthof(Inputs)) {
         // The mask element does not index into any input vector.
         Ops.push_back(-1);
         continue;
@@ -4340,7 +4340,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
 
       // Find or create a shuffle vector operand to hold this input.
       unsigned OpNo;
-      for (OpNo = 0; OpNo < size(InputUsed); ++OpNo) {
+      for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
         if (InputUsed[OpNo] == Input) {
           // This input vector is already an operand.
           break;
@@ -4351,7 +4351,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
         }
       }
 
-      if (OpNo >= size(InputUsed)) {
+      if (OpNo >= array_lengthof(InputUsed)) {
         // More than two input vectors used!  Give up on trying to create a
         // shuffle vector.  Insert all elements into a BUILD_VECTOR instead.
         UseBuildVector = true;
@@ -4374,7 +4374,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
         // The input vector this mask element indexes into.
         unsigned Input = (unsigned)Idx / NewElts;
 
-        if (Input >= size(Inputs)) {
+        if (Input >= array_lengthof(Inputs)) {
           // The mask element is "undef" or indexes off the end of the input.
           SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
           continue;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 997f1de7ef3a1..0bd44ce4c872e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2153,7 +2153,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
       // The input vector this mask element indexes into.
       unsigned Input = (unsigned)Idx / NewElts;
 
-      if (Input >= size(Inputs)) {
+      if (Input >= array_lengthof(Inputs)) {
         // The mask element does not index into any input vector.
         Ops.push_back(-1);
         continue;
@@ -2164,7 +2164,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
 
       // Find or create a shuffle vector operand to hold this input.
       unsigned OpNo;
-      for (OpNo = 0; OpNo < size(InputUsed); ++OpNo) {
+      for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
         if (InputUsed[OpNo] == Input) {
           // This input vector is already an operand.
           break;
@@ -2175,7 +2175,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
         }
       }
 
-      if (OpNo >= size(InputUsed)) {
+      if (OpNo >= array_lengthof(InputUsed)) {
         // More than two input vectors used!  Give up on trying to create a
         // shuffle vector.  Insert all elements into a BUILD_VECTOR instead.
         useBuildVector = true;
@@ -2198,7 +2198,7 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
         // The input vector this mask element indexes into.
         unsigned Input = (unsigned)Idx / NewElts;
 
-        if (Input >= size(Inputs)) {
+        if (Input >= array_lengthof(Inputs)) {
           // The mask element is "undef" or indexes off the end of the input.
           SVOps.push_back(DAG.getUNDEF(EltVT));
           continue;
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index f34a917bf4484..ab574232e3677 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -966,7 +966,7 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
   // If this is a simple type, use the ComputeRegisterProp mechanism.
   if (VT.isSimple()) {
     MVT SVT = VT.getSimpleVT();
-    assert((unsigned)SVT.SimpleTy < size(TransformToType));
+    assert((unsigned)SVT.SimpleTy < array_lengthof(TransformToType));
     MVT NVT = TransformToType[SVT.SimpleTy];
     LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
 
diff --git a/llvm/lib/DWP/DWP.cpp b/llvm/lib/DWP/DWP.cpp
index fd6ab9f7f0f4d..f6538c0549d0b 100644
--- a/llvm/lib/DWP/DWP.cpp
+++ b/llvm/lib/DWP/DWP.cpp
@@ -394,7 +394,7 @@ void writeIndexTable(
     const MapVector<uint64_t, UnitIndexEntry> &IndexEntries,
     uint32_t DWARFUnitIndex::Entry::SectionContribution::*Field) {
   for (const auto &E : IndexEntries)
-    for (size_t I = 0; I != size(E.second.Contributions); ++I)
+    for (size_t I = 0; I != array_lengthof(E.second.Contributions); ++I)
       if (ContributionOffsets[I])
         Out.emitIntValue(E.second.Contributions[I].*Field, 4);
 }
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index dc6197a4338af..b20e581d283aa 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -326,14 +326,14 @@ void GenericDINode::recalculateHash() {
     }                                                                          \
   } while (false)
 #define DEFINE_GETIMPL_STORE(CLASS, ARGS, OPS)                                 \
-  return storeImpl(new (size(OPS))                                   \
+  return storeImpl(new (array_lengthof(OPS))                                   \
                        CLASS(Context, Storage, UNWRAP_ARGS(ARGS), OPS),        \
                    Storage, Context.pImpl->CLASS##s)
 #define DEFINE_GETIMPL_STORE_NO_OPS(CLASS, ARGS)                               \
   return storeImpl(new (0u) CLASS(Context, Storage, UNWRAP_ARGS(ARGS)),        \
                    Storage, Context.pImpl->CLASS##s)
 #define DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(CLASS, OPS)                   \
-  return storeImpl(new (size(OPS)) CLASS(Context, Storage, OPS),     \
+  return storeImpl(new (array_lengthof(OPS)) CLASS(Context, Storage, OPS),     \
                    Storage, Context.pImpl->CLASS##s)
 #define DEFINE_GETIMPL_STORE_N(CLASS, ARGS, OPS, NUM_OPS)                      \
   return storeImpl(new (NUM_OPS)                                               \
@@ -772,7 +772,7 @@ DICompileUnit *DICompileUnit::getImpl(
                      Macros,
                      SysRoot,
                      SDK};
-  return storeImpl(new (size(Ops)) DICompileUnit(
+  return storeImpl(new (array_lengthof(Ops)) DICompileUnit(
                        Context, Storage, SourceLanguage, IsOptimized,
                        RuntimeVersion, EmissionKind, DWOId, SplitDebugInlining,
                        DebugInfoForProfiling, NameTableKind, RangesBaseAddress,
diff --git a/llvm/lib/MC/MCAsmBackend.cpp b/llvm/lib/MC/MCAsmBackend.cpp
index 8bf0e76b08594..7989dd57907ca 100644
--- a/llvm/lib/MC/MCAsmBackend.cpp
+++ b/llvm/lib/MC/MCAsmBackend.cpp
@@ -97,7 +97,7 @@ const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"FK_SecRel_8", 0, 64, 0},
   };
 
-  assert((size_t)Kind <= size(Builtins) && "Unknown fixup kind");
+  assert((size_t)Kind <= array_lengthof(Builtins) && "Unknown fixup kind");
   return Builtins[Kind];
 }
 
diff --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index cd6b5325caac5..2cb5a000f88a7 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -306,7 +306,7 @@ MCDwarfLineTableHeader::Emit(MCStreamer *MCOS, MCDwarfLineTableParams Params,
       0, // length of DW_LNS_set_epilogue_begin
       1  // DW_LNS_set_isa
   };
-  assert(size(StandardOpcodeLengths) >=
+  assert(array_lengthof(StandardOpcodeLengths) >=
          (Params.DWARF2LineOpcodeBase - 1U));
   return Emit(
       MCOS, Params,
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 96e1fb137aa8c..42e257516f4e0 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -2288,7 +2288,7 @@ void MachOObjectFile::getRelocationTypeName(
         "ARM64_RELOC_ADDEND"
       };
 
-      if (RType >= size(Table))
+      if (RType >= array_lengthof(Table))
         res = "Unknown";
       else
         res = Table[RType];
diff --git a/llvm/lib/Support/ARMAttributeParser.cpp b/llvm/lib/Support/ARMAttributeParser.cpp
index f9185a5b5e8bb..908e56319025d 100644
--- a/llvm/lib/Support/ARMAttributeParser.cpp
+++ b/llvm/lib/Support/ARMAttributeParser.cpp
@@ -211,7 +211,7 @@ Error ARMAttributeParser::ABI_align_needed(AttrType tag) {
   uint64_t value = de.getULEB128(cursor);
 
   std::string description;
-  if (value < size(strings))
+  if (value < array_lengthof(strings))
     description = strings[value];
   else if (value <= 12)
     description = "8-byte alignment, " + utostr(1ULL << value) +
@@ -230,7 +230,7 @@ Error ARMAttributeParser::ABI_align_preserved(AttrType tag) {
   uint64_t value = de.getULEB128(cursor);
 
   std::string description;
-  if (value < size(strings))
+  if (value < array_lengthof(strings))
     description = std::string(strings[value]);
   else if (value <= 12)
     description = std::string("8-byte stack alignment, ") +
@@ -382,7 +382,7 @@ Error ARMAttributeParser::nodefaults(AttrType tag) {
 
 Error ARMAttributeParser::handler(uint64_t tag, bool &handled) {
   handled = false;
-  for (unsigned AHI = 0, AHE = size(displayRoutines); AHI != AHE;
+  for (unsigned AHI = 0, AHE = array_lengthof(displayRoutines); AHI != AHE;
        ++AHI) {
     if (uint64_t(displayRoutines[AHI].attribute) == tag) {
       if (Error e =
diff --git a/llvm/lib/Support/CrashRecoveryContext.cpp b/llvm/lib/Support/CrashRecoveryContext.cpp
index b55e8a548d0bb..2ee3074b840e4 100644
--- a/llvm/lib/Support/CrashRecoveryContext.cpp
+++ b/llvm/lib/Support/CrashRecoveryContext.cpp
@@ -348,7 +348,7 @@ static void uninstallExceptionOrSignalHandlers() {
 
 static const int Signals[] =
     { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP };
-static const unsigned NumSignals = size(Signals);
+static const unsigned NumSignals = array_lengthof(Signals);
 static struct sigaction PrevActions[NumSignals];
 
 static void CrashRecoverySignalHandler(int Signal) {
diff --git a/llvm/lib/Support/NativeFormatting.cpp b/llvm/lib/Support/NativeFormatting.cpp
index fc7199ee9df12..0a797046bb684 100644
--- a/llvm/lib/Support/NativeFormatting.cpp
+++ b/llvm/lib/Support/NativeFormatting.cpp
@@ -145,7 +145,7 @@ void llvm::write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style,
       std::max(static_cast<unsigned>(W), std::max(1u, Nibbles) + PrefixChars);
 
   char NumberBuffer[kMaxWidth];
-  ::memset(NumberBuffer, '0', llvm::size(NumberBuffer));
+  ::memset(NumberBuffer, '0', llvm::array_lengthof(NumberBuffer));
   if (Prefix)
     NumberBuffer[1] = 'x';
   char *EndPtr = NumberBuffer + NumChars;
diff --git a/llvm/lib/Support/RISCVAttributeParser.cpp b/llvm/lib/Support/RISCVAttributeParser.cpp
index 7629719a55e41..393861c73a4a9 100644
--- a/llvm/lib/Support/RISCVAttributeParser.cpp
+++ b/llvm/lib/Support/RISCVAttributeParser.cpp
@@ -53,7 +53,7 @@ Error RISCVAttributeParser::stackAlign(unsigned tag) {
 
 Error RISCVAttributeParser::handler(uint64_t tag, bool &handled) {
   handled = false;
-  for (unsigned AHI = 0, AHE = size(displayRoutines); AHI != AHE;
+  for (unsigned AHI = 0, AHE = array_lengthof(displayRoutines); AHI != AHE;
        ++AHI) {
     if (uint64_t(displayRoutines[AHI].attribute) == tag) {
       if (Error e = (this->*displayRoutines[AHI].routine)(tag))
diff --git a/llvm/lib/Support/Triple.cpp b/llvm/lib/Support/Triple.cpp
index 925ff1be369fc..20dea8c302a5e 100644
--- a/llvm/lib/Support/Triple.cpp
+++ b/llvm/lib/Support/Triple.cpp
@@ -898,13 +898,13 @@ std::string Triple::normalize(StringRef Str) {
   // If they are not there already, permute the components into their canonical
   // positions by seeing if they parse as a valid architecture, and if so moving
   // the component to the architecture position etc.
-  for (unsigned Pos = 0; Pos != size(Found); ++Pos) {
+  for (unsigned Pos = 0; Pos != array_lengthof(Found); ++Pos) {
     if (Found[Pos])
       continue; // Already in the canonical position.
 
     for (unsigned Idx = 0; Idx != Components.size(); ++Idx) {
       // Do not reparse any components that already matched.
-      if (Idx < size(Found) && Found[Idx])
+      if (Idx < array_lengthof(Found) && Found[Idx])
         continue;
 
       // Does this component parse as valid for the target position?
@@ -952,7 +952,7 @@ std::string Triple::normalize(StringRef Str) {
         // components to the right.
         for (unsigned i = Pos; !CurrentComponent.empty(); ++i) {
           // Skip over any fixed components.
-          while (i < size(Found) && Found[i])
+          while (i < array_lengthof(Found) && Found[i])
             ++i;
           // Place the component at the new position, getting the component
           // that was at this position - it will be moved right.
@@ -973,7 +973,7 @@ std::string Triple::normalize(StringRef Str) {
             if (CurrentComponent.empty())
               break;
             // Advance to the next component, skipping any fixed components.
-            while (++i < size(Found) && Found[i])
+            while (++i < array_lengthof(Found) && Found[i])
               ;
           }
           // The last component was pushed off the end - append it.
@@ -981,7 +981,7 @@ std::string Triple::normalize(StringRef Str) {
             Components.push_back(CurrentComponent);
 
           // Advance Idx to the component's new position.
-          while (++Idx < size(Found) && Found[Idx])
+          while (++Idx < array_lengthof(Found) && Found[Idx])
             ;
         } while (Idx < Pos); // Add more until the final position is reached.
       }
diff --git a/llvm/lib/Support/Unix/Signals.inc b/llvm/lib/Support/Unix/Signals.inc
index 29176e312c7d7..575e2aab1eab2 100644
--- a/llvm/lib/Support/Unix/Signals.inc
+++ b/llvm/lib/Support/Unix/Signals.inc
@@ -239,8 +239,8 @@ static const int InfoSigs[] = {
 };
 
 static const size_t NumSigs =
-    size(IntSigs) + size(KillSigs) +
-    size(InfoSigs) + 1 /* SIGPIPE */;
+    array_lengthof(IntSigs) + array_lengthof(KillSigs) +
+    array_lengthof(InfoSigs) + 1 /* SIGPIPE */;
 
 
 static std::atomic<unsigned> NumRegisteredSignals = ATOMIC_VAR_INIT(0);
@@ -298,7 +298,7 @@ static void RegisterHandlers() { // Not signal-safe.
   enum class SignalKind { IsKill, IsInfo };
   auto registerHandler = [&](int Signal, SignalKind Kind) {
     unsigned Index = NumRegisteredSignals.load();
-    assert(Index < size(RegisteredSignalInfo) &&
+    assert(Index < array_lengthof(RegisteredSignalInfo) &&
            "Out of space for signal handlers!");
 
     struct sigaction NewHandler;
@@ -562,13 +562,13 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS, int Depth) {
 #if defined(HAVE_BACKTRACE)
   // Use backtrace() to output a backtrace on Linux systems with glibc.
   if (!depth)
-    depth = backtrace(StackTrace, static_cast<int>(size(StackTrace)));
+    depth = backtrace(StackTrace, static_cast<int>(array_lengthof(StackTrace)));
 #endif
 #if defined(HAVE__UNWIND_BACKTRACE)
   // Try _Unwind_Backtrace() if backtrace() failed.
   if (!depth)
     depth = unwindBacktrace(StackTrace,
-                        static_cast<int>(size(StackTrace)));
+                        static_cast<int>(array_lengthof(StackTrace)));
 #endif
   if (!depth)
     return;
diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index 544b9157a92a9..5f1a364ea1a8e 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -687,7 +687,7 @@ static bool isReservedName(StringRef path) {
     return true;
 
   // Then compare against the list of ancient reserved names.
-  for (size_t i = 0; i < size(sReservedNames); ++i) {
+  for (size_t i = 0; i < array_lengthof(sReservedNames); ++i) {
     if (path.equals_insensitive(sReservedNames[i]))
       return true;
   }
diff --git a/llvm/lib/Support/Windows/Signals.inc b/llvm/lib/Support/Windows/Signals.inc
index c05b743b334e8..32186bbe51607 100644
--- a/llvm/lib/Support/Windows/Signals.inc
+++ b/llvm/lib/Support/Windows/Signals.inc
@@ -242,7 +242,7 @@ static bool printStackTraceWithLLVMSymbolizer(llvm::raw_ostream &OS,
     if (StackFrame.AddrFrame.Offset == 0)
       break;
     StackTrace[Depth++] = (void *)(uintptr_t)StackFrame.AddrPC.Offset;
-    if (Depth >= size(StackTrace))
+    if (Depth >= array_lengthof(StackTrace))
       break;
   }
 
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index 4c3013af6fb44..10f9692d217e9 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -54,7 +54,7 @@ class FeatureBitset {
   }
 
   constexpr FeatureBitset &operator&=(const FeatureBitset &RHS) {
-    for (unsigned I = 0, E = size(Bits); I != E; ++I) {
+    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) {
       // GCC <6.2 crashes if this is written in a single statement.
       uint32_t NewBits = Bits[I] & RHS.Bits[I];
       Bits[I] = NewBits;
@@ -63,7 +63,7 @@ class FeatureBitset {
   }
 
   constexpr FeatureBitset &operator|=(const FeatureBitset &RHS) {
-    for (unsigned I = 0, E = size(Bits); I != E; ++I) {
+    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I) {
       // GCC <6.2 crashes if this is written in a single statement.
       uint32_t NewBits = Bits[I] | RHS.Bits[I];
       Bits[I] = NewBits;
@@ -74,7 +74,7 @@ class FeatureBitset {
   // gcc 5.3 miscompiles this if we try to write this using operator&=.
   constexpr FeatureBitset operator&(const FeatureBitset &RHS) const {
     FeatureBitset Result;
-    for (unsigned I = 0, E = size(Bits); I != E; ++I)
+    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I)
       Result.Bits[I] = Bits[I] & RHS.Bits[I];
     return Result;
   }
@@ -82,20 +82,20 @@ class FeatureBitset {
   // gcc 5.3 miscompiles this if we try to write this using operator&=.
   constexpr FeatureBitset operator|(const FeatureBitset &RHS) const {
     FeatureBitset Result;
-    for (unsigned I = 0, E = size(Bits); I != E; ++I)
+    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I)
       Result.Bits[I] = Bits[I] | RHS.Bits[I];
     return Result;
   }
 
   constexpr FeatureBitset operator~() const {
     FeatureBitset Result;
-    for (unsigned I = 0, E = size(Bits); I != E; ++I)
+    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I)
       Result.Bits[I] = ~Bits[I];
     return Result;
   }
 
   constexpr bool operator!=(const FeatureBitset &RHS) const {
-    for (unsigned I = 0, E = size(Bits); I != E; ++I)
+    for (unsigned I = 0, E = array_lengthof(Bits); I != E; ++I)
       if (Bits[I] != RHS.Bits[I])
         return true;
     return false;
@@ -690,7 +690,7 @@ unsigned llvm::X86::getFeaturePriority(ProcessorFeatures Feat) {
 #include "llvm/Support/X86TargetParser.def"
       std::numeric_limits<unsigned>::max() // Need to consume last comma.
   };
-  std::array<unsigned, size(Priorities) - 1> HelperList;
+  std::array<unsigned, array_lengthof(Priorities) - 1> HelperList;
   std::iota(HelperList.begin(), HelperList.end(), 0);
   assert(std::is_permutation(HelperList.begin(), HelperList.end(),
                              std::begin(Priorities),
diff --git a/llvm/lib/Support/raw_ostream.cpp b/llvm/lib/Support/raw_ostream.cpp
index 96e7466e4e4d0..e4b747b68beaa 100644
--- a/llvm/lib/Support/raw_ostream.cpp
+++ b/llvm/lib/Support/raw_ostream.cpp
@@ -480,12 +480,12 @@ static raw_ostream &write_padding(raw_ostream &OS, unsigned NumChars) {
                                C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C};
 
   // Usually the indentation is small, handle it with a fastpath.
-  if (NumChars < size(Chars))
+  if (NumChars < array_lengthof(Chars))
     return OS.write(Chars, NumChars);
 
   while (NumChars) {
     unsigned NumToWrite = std::min(NumChars,
-                                   (unsigned)size(Chars)-1);
+                                   (unsigned)array_lengthof(Chars)-1);
     OS.write(Chars, NumToWrite);
     NumChars -= NumToWrite;
   }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 13dfea3edaf43..cce714ea918e8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5640,7 +5640,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
   static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
                                           AArch64::X3, AArch64::X4, AArch64::X5,
                                           AArch64::X6, AArch64::X7 };
-  static const unsigned NumGPRArgRegs = size(GPRArgRegs);
+  static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
   unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
 
   unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
@@ -5676,7 +5676,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
     static const MCPhysReg FPRArgRegs[] = {
         AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
         AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
-    static const unsigned NumFPRArgRegs = size(FPRArgRegs);
+    static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
     unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
 
     unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
index 462176155b7b6..ee0870d9ef7ae 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp
@@ -706,7 +706,7 @@ static const LdStNInstrDesc LdStNInstInfo[] = {
 
 static const LdStNInstrDesc *getLdStNInstrDesc(unsigned Opcode) {
   unsigned Idx;
-  for (Idx = 0; Idx != size(LdStNInstInfo); ++Idx)
+  for (Idx = 0; Idx != array_lengthof(LdStNInstInfo); ++Idx)
     if (LdStNInstInfo[Idx].Opcode == Opcode)
       return &LdStNInstInfo[Idx];
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
index 5c7f2b5aee29b..aa7c7ff2e388d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULibFunc.cpp
@@ -347,7 +347,7 @@ const UnmangledFuncInfo UnmangledFuncInfo::Table[] = {
 };
 
 const unsigned UnmangledFuncInfo::TableSize =
-    size(UnmangledFuncInfo::Table);
+    array_lengthof(UnmangledFuncInfo::Table);
 
 static AMDGPULibFunc::Param getRetType(AMDGPULibFunc::EFuncId id,
                                        const AMDGPULibFunc::Param (&Leads)[2]) {
@@ -555,7 +555,7 @@ static AMDGPULibFunc::ENamePrefix parseNamePrefix(StringRef& mangledName) {
 }
 
 StringMap<int> ManglingRule::buildManglingRulesMap() {
-  StringMap<int> Map(size(manglingRules));
+  StringMap<int> Map(array_lengthof(manglingRules));
   int Id = 0;
   for (auto Rule : manglingRules)
     Map.insert({Rule.Name, Id++});
diff --git a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
index ba86df621fa0b..c329bae50f92e 100644
--- a/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600RegisterInfo.cpp
@@ -29,7 +29,7 @@ unsigned R600RegisterInfo::getSubRegFromChannel(unsigned Channel) {
     R600::sub12, R600::sub13, R600::sub14, R600::sub15
   };
 
-  assert(Channel < size(SubRegFromChannelTable));
+  assert(Channel < array_lengthof(SubRegFromChannelTable));
   return SubRegFromChannelTable[Channel];
 }
 
diff --git a/llvm/lib/Target/ARC/ARCISelLowering.cpp b/llvm/lib/Target/ARC/ARCISelLowering.cpp
index ce57140d8dbd3..7fd08f70ea3bb 100644
--- a/llvm/lib/Target/ARC/ARCISelLowering.cpp
+++ b/llvm/lib/Target/ARC/ARCISelLowering.cpp
@@ -560,18 +560,18 @@ SDValue ARCTargetLowering::LowerCallArguments(
                                         ARC::R4, ARC::R5, ARC::R6, ARC::R7};
     auto *AFI = MF.getInfo<ARCFunctionInfo>();
     unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs);
-    if (FirstVAReg < size(ArgRegs)) {
+    if (FirstVAReg < array_lengthof(ArgRegs)) {
       int Offset = 0;
       // Save remaining registers, storing higher register numbers at a higher
       // address
-      // There are (size(ArgRegs) - FirstVAReg) registers which
+      // There are (array_lengthof(ArgRegs) - FirstVAReg) registers which
       // need to be saved.
       int VarFI =
-          MFI.CreateFixedObject((size(ArgRegs) - FirstVAReg) * 4,
+          MFI.CreateFixedObject((array_lengthof(ArgRegs) - FirstVAReg) * 4,
                                 CCInfo.getNextStackOffset(), true);
       AFI->setVarArgsFrameIndex(VarFI);
       SDValue FIN = DAG.getFrameIndex(VarFI, MVT::i32);
-      for (unsigned i = FirstVAReg; i < size(ArgRegs); i++) {
+      for (unsigned i = FirstVAReg; i < array_lengthof(ArgRegs); i++) {
         // Move argument from phys reg -> virt reg
         unsigned VReg = RegInfo.createVirtualRegister(&ARC::GPR32RegClass);
         RegInfo.addLiveIn(ArgRegs[i], VReg);
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index d8343e070ccb2..cde7158803765 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -113,7 +113,7 @@ static const ARM_MLxEntry ARM_MLxTable[] = {
 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
   : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
     Subtarget(STI) {
-  for (unsigned i = 0, e = size(ARM_MLxTable); i != e; ++i) {
+  for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
     if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
       llvm_unreachable("Duplicated entries?");
     MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
@@ -2466,7 +2466,7 @@ static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
 };
 
 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
-  for (unsigned i = 0, e = size(AddSubFlagsOpcodeMap); i != e; ++i)
+  for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
     if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
       return AddSubFlagsOpcodeMap[i].MachineOpc;
   return 0;
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index adfdfb8a3404f..1f2f6f7497e02 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -2367,7 +2367,7 @@ bool ARMFrameLowering::assignCalleeSavedSpillSlots(
 const TargetFrameLowering::SpillSlot *
 ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
   static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
-  NumEntries = size(FixedSpillOffsets);
+  NumEntries = array_lengthof(FixedSpillOffsets);
   return FixedSpillOffsets;
 }
 
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index c0ce1fca9b157..fe4e6b24367a3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -4465,7 +4465,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
   int lastInsIndex = -1;
   if (isVarArg && MFI.hasVAStart()) {
     unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
-    if (RegIdx != size(GPRArgRegs))
+    if (RegIdx != array_lengthof(GPRArgRegs))
       ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
   }
 
diff --git a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index 98a5ae28efd85..1cc5422523f17 100644
--- a/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -244,7 +244,7 @@ INITIALIZE_PASS(Thumb2SizeReduce, DEBUG_TYPE, THUMB2_SIZE_REDUCE_NAME, false,
 Thumb2SizeReduce::Thumb2SizeReduce(std::function<bool(const Function &)> Ftor)
     : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
   OptimizeSize = MinimizeSize = false;
-  for (unsigned i = 0, e = size(ReduceTable); i != e; ++i) {
+  for (unsigned i = 0, e = array_lengthof(ReduceTable); i != e; ++i) {
     unsigned FromOpc = ReduceTable[i].WideOpc;
     if (!ReduceOpcodeMap.insert(std::make_pair(FromOpc, i)).second)
       llvm_unreachable("Duplicated entries?");
diff --git a/llvm/lib/Target/AVR/AVRISelLowering.cpp b/llvm/lib/Target/AVR/AVRISelLowering.cpp
index 3ab79a16b5ab3..a58fedf6cd364 100644
--- a/llvm/lib/Target/AVR/AVRISelLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -1033,7 +1033,7 @@ static const MCPhysReg RegList16[] = {
     AVR::R16R15, AVR::R15R14, AVR::R14R13, AVR::R13R12, AVR::R12R11,
     AVR::R11R10, AVR::R10R9,  AVR::R9R8};
 
-static_assert(size(RegList8) == size(RegList16),
+static_assert(array_lengthof(RegList8) == array_lengthof(RegList16),
               "8-bit and 16-bit register arrays must be of equal length");
 
 /// Analyze incoming and outgoing function arguments. We need custom C++ code
@@ -1074,7 +1074,7 @@ analyzeArguments(TargetLowering::CallLoweringInfo *CLI, const Function *F,
     unsigned RegIdx = RegLastIdx + TotalBytes;
     RegLastIdx = RegIdx;
     // If there are not enough registers, use the stack
-    if (RegIdx >= size(RegList8)) {
+    if (RegIdx >= array_lengthof(RegList8)) {
       UseStack = true;
     }
     for (; i != j; ++i) {
diff --git a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 5944ca48e6e26..3c742c98077bc 100644
--- a/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -675,7 +675,7 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo,
     /* 28 */  0,          0,          UTIMERLO,   UTIMERHI
   };
 
-  if (RegNo >= size(CtrlRegDecoderTable))
+  if (RegNo >= array_lengthof(CtrlRegDecoderTable))
     return MCDisassembler::Fail;
 
   static_assert(NoRegister == 0, "Expecting NoRegister to be 0");
@@ -703,7 +703,7 @@ static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
     /* 28 */  0,          0,          UTIMER,     0
   };
 
-  if (RegNo >= size(CtrlReg64DecoderTable))
+  if (RegNo >= array_lengthof(CtrlReg64DecoderTable))
     return MCDisassembler::Fail;
 
   static_assert(NoRegister == 0, "Expecting NoRegister to be 0");
@@ -855,7 +855,7 @@ static DecodeStatus DecodeGuestRegsRegisterClass(MCInst &Inst, unsigned RegNo,
     /* 28 */ GPMUCNT2,  GPMUCNT3,   G30,        G31
   };
 
-  if (RegNo >= size(GuestRegDecoderTable))
+  if (RegNo >= array_lengthof(GuestRegDecoderTable))
     return MCDisassembler::Fail;
   if (GuestRegDecoderTable[RegNo] == Hexagon::NoRegister)
     return MCDisassembler::Fail;
@@ -881,7 +881,7 @@ static DecodeStatus DecodeGuestRegs64RegisterClass(MCInst &Inst, unsigned RegNo,
     /* 28 */ G29_28,    0,          G31_30,     0
   };
 
-  if (RegNo >= size(GuestReg64DecoderTable))
+  if (RegNo >= array_lengthof(GuestReg64DecoderTable))
     return MCDisassembler::Fail;
   if (GuestReg64DecoderTable[RegNo] == Hexagon::NoRegister)
     return MCDisassembler::Fail;
diff --git a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index 036e4cfcd1473..4ffd31b670e40 100644
--- a/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -97,7 +97,7 @@ class HexagonFrameLowering : public TargetFrameLowering {
       { Hexagon::R25, -36 }, { Hexagon::R24, -40 }, { Hexagon::D12, -40 },
       { Hexagon::R27, -44 }, { Hexagon::R26, -48 }, { Hexagon::D13, -48 }
     };
-    NumEntries = size(Offsets);
+    NumEntries = array_lengthof(Offsets);
     return Offsets;
   }
 
diff --git a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index 2bbccfe766254..43afae441457a 100644
--- a/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -998,8 +998,8 @@ bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
 
   static const unsigned Regs01[] = { LC0, SA0, LC1, SA1 };
   static const unsigned Regs1[]  = { LC1, SA1 };
-  auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, size(Regs01))
-                                 : makeArrayRef(Regs1, size(Regs1));
+  auto CheckRegs = IsInnerHWLoop ? makeArrayRef(Regs01, array_lengthof(Regs01))
+                                 : makeArrayRef(Regs1, array_lengthof(Regs1));
   for (unsigned R : CheckRegs)
     if (MI->modifiesRegister(R, TRI))
       return true;
diff --git a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 0b23ba2944327..161768b8dc226 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -304,7 +304,7 @@ bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) {
     SDNode *S = StoreInstrForLoadIntrinsic(L, C);
     SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
     SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
-    ReplaceUses(F, T, size(T));
+    ReplaceUses(F, T, array_lengthof(T));
     // This transformation will leave the intrinsic dead. If it remains in
     // the DAG, the selection code will see it again, but without the load,
     // and it will generate a store that is normally required for it.
diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index 8012c03bfc258..d7ca934a23e64 100644
--- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -144,7 +144,7 @@ static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
     Hexagon::R0, Hexagon::R1, Hexagon::R2,
     Hexagon::R3, Hexagon::R4, Hexagon::R5
   };
-  const unsigned NumArgRegs = size(ArgRegs);
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
 
   // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 26054d066616d..f8ac35aed7c03 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -643,7 +643,7 @@ unsigned HexagonMCCodeEmitter::getExprOpValue(const MCInst &MI,
           Hexagon::fixup_Hexagon_GPREL16_0, Hexagon::fixup_Hexagon_GPREL16_1,
           Hexagon::fixup_Hexagon_GPREL16_2, Hexagon::fixup_Hexagon_GPREL16_3
         };
-        assert(Shift < size(GPRelFixups));
+        assert(Shift < array_lengthof(GPRelFixups));
         auto UsesGP = [] (const MCInstrDesc &D) {
           for (const MCPhysReg *U = D.getImplicitUses(); U && *U; ++U)
             if (*U == Hexagon::GP)
diff --git a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
index 144c004dbf90a..953916776c572 100644
--- a/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
+++ b/llvm/lib/Target/MSP430/MCTargetDesc/MSP430AsmBackend.cpp
@@ -81,7 +81,7 @@ class MSP430AsmBackend : public MCAsmBackend {
       {"fixup_8",             0,  8, 0},
       {"fixup_sym_diff",      0, 32, 0},
     };
-    static_assert((size(Infos)) == MSP430::NumTargetFixupKinds,
+    static_assert((array_lengthof(Infos)) == MSP430::NumTargetFixupKinds,
                   "Not all fixup kinds added to Infos array");
   
     if (Kind < FirstTargetFixupKind)
diff --git a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index b6382a51a0943..aebfc6b0ae2ea 100644
--- a/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -458,12 +458,12 @@ static void AnalyzeArguments(CCState &State,
   static const MCPhysReg CRegList[] = {
     MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15
   };
-  static const unsigned CNbRegs = size(CRegList);
+  static const unsigned CNbRegs = array_lengthof(CRegList);
   static const MCPhysReg BuiltinRegList[] = {
     MSP430::R8, MSP430::R9, MSP430::R10, MSP430::R11,
     MSP430::R12, MSP430::R13, MSP430::R14, MSP430::R15
   };
-  static const unsigned BuiltinNbRegs = size(BuiltinRegList);
+  static const unsigned BuiltinNbRegs = array_lengthof(BuiltinRegList);
 
   ArrayRef<MCPhysReg> RegList;
   unsigned NbRegs;
diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 72578974cbe81..a3dbe6f84a1e3 100644
--- a/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -420,7 +420,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
     { "fixup_Mips_JALR",                 0,     32,   0 },
     { "fixup_MICROMIPS_JALR",            0,     32,   0 }
   };
-  static_assert(size(LittleEndianInfos) == Mips::NumTargetFixupKinds,
+  static_assert(array_lengthof(LittleEndianInfos) == Mips::NumTargetFixupKinds,
                 "Not all MIPS little endian fixup kinds added!");
 
   const static MCFixupKindInfo BigEndianInfos[] = {
@@ -499,7 +499,7 @@ getFixupKindInfo(MCFixupKind Kind) const {
     { "fixup_Mips_JALR",                  0,     32,   0 },
     { "fixup_MICROMIPS_JALR",             0,     32,   0 }
   };
-  static_assert(size(BigEndianInfos) == Mips::NumTargetFixupKinds,
+  static_assert(array_lengthof(BigEndianInfos) == Mips::NumTargetFixupKinds,
                 "Not all MIPS big endian fixup kinds added!");
 
   if (Kind < FirstTargetFixupKind)
diff --git a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index 1e98b8ac54a14..78ffe00c020c0 100644
--- a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -246,7 +246,7 @@ bool Mips16TargetLowering::isEligibleForTailCallOptimization(
 }
 
 void Mips16TargetLowering::setMips16HardFloatLibCalls() {
-  for (unsigned I = 0; I != size(HardFloatLibCalls); ++I) {
+  for (unsigned I = 0; I != array_lengthof(HardFloatLibCalls); ++I) {
     assert((I == 0 || HardFloatLibCalls[I - 1] < HardFloatLibCalls[I]) &&
            "Array not sorted!");
     if (HardFloatLibCalls[I].Libcall != RTLIB::UNKNOWN_LIBCALL)
diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
index c90bd04370c67..77cdf5c939dcc 100644
--- a/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
+++ b/llvm/lib/Target/PowerPC/PPCCallingConv.cpp
@@ -37,7 +37,7 @@ static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
-  const unsigned NumArgRegs = size(ArgRegs);
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
 
   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
 
@@ -62,7 +62,7 @@ static bool CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(
     PPC::R3, PPC::R4, PPC::R5, PPC::R6,
     PPC::R7, PPC::R8, PPC::R9, PPC::R10,
   };
-  const unsigned NumArgRegs = size(ArgRegs);
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
 
   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
   int RegsLeft = NumArgRegs - RegNum;
@@ -88,7 +88,7 @@ static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
     PPC::F8
   };
 
-  const unsigned NumArgRegs = size(ArgRegs);
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
 
   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
 
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 9a04a201d2e68..65c969c196e1d 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -228,23 +228,23 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
       CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS};
 
   if (Subtarget.is64BitELFABI()) {
-    NumEntries = size(ELFOffsets64);
+    NumEntries = array_lengthof(ELFOffsets64);
     return ELFOffsets64;
   }
 
   if (Subtarget.is32BitELFABI()) {
-    NumEntries = size(ELFOffsets32);
+    NumEntries = array_lengthof(ELFOffsets32);
     return ELFOffsets32;
   }
 
   assert(Subtarget.isAIXABI() && "Unexpected ABI.");
 
   if (Subtarget.isPPC64()) {
-    NumEntries = size(AIXOffsets64);
+    NumEntries = array_lengthof(AIXOffsets64);
     return AIXOffsets64;
   }
 
-  NumEntries = size(AIXOffsets32);
+  NumEntries = array_lengthof(AIXOffsets32);
   return AIXOffsets32;
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 650b9e9774f49..25cc34badda04 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -4149,13 +4149,13 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
       PPC::R3, PPC::R4, PPC::R5, PPC::R6,
       PPC::R7, PPC::R8, PPC::R9, PPC::R10,
     };
-    const unsigned NumGPArgRegs = size(GPArgRegs);
+    const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
 
     static const MCPhysReg FPArgRegs[] = {
       PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
       PPC::F8
     };
-    unsigned NumFPArgRegs = size(FPArgRegs);
+    unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
 
     if (useSoftFloat() || hasSPE())
        NumFPArgRegs = 0;
@@ -4267,9 +4267,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
 
-  const unsigned Num_GPR_Regs = size(GPR);
+  const unsigned Num_GPR_Regs = array_lengthof(GPR);
   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
-  const unsigned Num_VR_Regs  = size(VR);
+  const unsigned Num_VR_Regs  = array_lengthof(VR);
 
   // Do a first pass over the arguments to determine whether the ABI
   // guarantees that our caller has allocated the parameter save area
@@ -4724,9 +4724,9 @@ needStackSlotPassParameters(const PPCSubtarget &Subtarget,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
 
-  const unsigned NumGPRs = size(GPR);
+  const unsigned NumGPRs = array_lengthof(GPR);
   const unsigned NumFPRs = 13;
-  const unsigned NumVRs = size(VR);
+  const unsigned NumVRs = array_lengthof(VR);
   const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
 
   unsigned NumBytes = LinkageSize;
@@ -5977,9 +5977,9 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
 
-  const unsigned NumGPRs = size(GPR);
+  const unsigned NumGPRs = array_lengthof(GPR);
   const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
-  const unsigned NumVRs  = size(VR);
+  const unsigned NumVRs  = array_lengthof(VR);
 
   // On ELFv2, we can avoid allocating the parameter area if all the arguments
   // can be passed to the callee in registers.
@@ -7148,7 +7148,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_AIX(
 
     static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
                                        PPC::X7, PPC::X8, PPC::X9, PPC::X10};
-    const unsigned NumGPArgRegs = size(IsPPC64 ? GPR_64 : GPR_32);
+    const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
 
     // The fixed integer arguments of a variadic function are stored to the
     // VarArgsFrameIndex on the stack so that they may be loaded by
@@ -9354,7 +9354,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
   };
 
-  for (unsigned idx = 0; idx < size(SplatCsts); ++idx) {
+  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
     // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
     // cases which are ambiguous (e.g. formation of 0x8000_0000).  'vsplti -1'
     int i = SplatCsts[idx];
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 3d91879389705..eada872c2a7db 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -310,7 +310,7 @@ static const uint16_t FMAOpIdxInfo[][6] = {
 // Check if an opcode is a FMA instruction. If it is, return the index in array
 // FMAOpIdxInfo. Otherwise, return -1.
 int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {
-  for (unsigned I = 0; I < size(FMAOpIdxInfo); I++)
+  for (unsigned I = 0; I < array_lengthof(FMAOpIdxInfo); I++)
     if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)
       return I;
   return -1;
@@ -2331,7 +2331,7 @@ bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI,
 
   bool Found = false;
   for (const MachineOperand &MO : MI.operands()) {
-    for (unsigned c = 0; c < size(RCs) && !Found; ++c) {
+    for (unsigned c = 0; c < array_lengthof(RCs) && !Found; ++c) {
       const TargetRegisterClass *RC = RCs[c];
       if (MO.isReg()) {
         if (MO.isDef() && RC->contains(MO.getReg())) {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
index 4305d1716b156..514789b3f6459 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -94,7 +94,7 @@ RISCVAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"fixup_riscv_set_6b", 2, 6, 0},
       {"fixup_riscv_sub_6b", 2, 6, 0},
   };
-  static_assert((size(Infos)) == RISCV::NumTargetFixupKinds,
+  static_assert((array_lengthof(Infos)) == RISCV::NumTargetFixupKinds,
                 "Not all fixup kinds added to Infos array");
 
   // Fixup kinds from .reloc directive are like R_RISCV_NONE. They
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index b96dae7f3c950..a01c76d587966 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8786,7 +8786,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
   }
 
   // FPR16, FPR32, and FPR64 alias each other.
-  if (State.getFirstUnallocated(ArgFPR32s) == size(ArgFPR32s)) {
+  if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
     UseGPRForF16_F32 = true;
     UseGPRForF64 = true;
   }
@@ -8815,7 +8815,7 @@ static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
     // Skip 'odd' register if necessary.
-    if (RegIdx != size(ArgGPRs) && RegIdx % 2 == 1)
+    if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
       State.AllocateReg(ArgGPRs);
   }
 
diff --git a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index a7d0029afc44a..142124a8e0d9d 100644
--- a/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -235,7 +235,7 @@ static DecodeStatus DecodeASRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
 static DecodeStatus DecodePRRegsRegisterClass(MCInst &Inst, unsigned RegNo,
                                                uint64_t Address,
                                                const void *Decoder) {
-  if (RegNo >= size(PRRegDecoderTable))
+  if (RegNo >= array_lengthof(PRRegDecoderTable))
     return MCDisassembler::Fail;
   Inst.addOperand(MCOperand::createReg(PRRegDecoderTable[RegNo]));
   return MCDisassembler::Success;
diff --git a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index f33d5cda8c8c1..ccc7d0737f531 100644
--- a/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -219,7 +219,7 @@ SystemZELFFrameLowering::SystemZELFFrameLowering()
   // Create a mapping from register number to save slot offset.
   // These offsets are relative to the start of the register save area.
   RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
-  for (unsigned I = 0, E = size(ELFSpillOffsetTable); I != E; ++I)
+  for (unsigned I = 0, E = array_lengthof(ELFSpillOffsetTable); I != E; ++I)
     RegSpillOffsets[ELFSpillOffsetTable[I].Reg] = ELFSpillOffsetTable[I].Offset;
 }
 
@@ -824,7 +824,7 @@ SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering()
   // Create a mapping from register number to save slot offset.
   // These offsets are relative to the start of the local are area.
   RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS);
-  for (unsigned I = 0, E = size(XPLINKSpillOffsetTable); I != E; ++I)
+  for (unsigned I = 0, E = array_lengthof(XPLINKSpillOffsetTable); I != E; ++I)
     RegSpillOffsets[XPLINKSpillOffsetTable[I].Reg] =
         XPLINKSpillOffsetTable[I].Offset;
 }
diff --git a/llvm/lib/Target/VE/VEFrameLowering.h b/llvm/lib/Target/VE/VEFrameLowering.h
index 71ce75b8eafb0..99eb41189b250 100644
--- a/llvm/lib/Target/VE/VEFrameLowering.h
+++ b/llvm/lib/Target/VE/VEFrameLowering.h
@@ -62,7 +62,7 @@ class VEFrameLowering : public TargetFrameLowering {
         {VE::SX25, 104}, {VE::SX26, 112}, {VE::SX27, 120}, {VE::SX28, 128},
         {VE::SX29, 136}, {VE::SX30, 144}, {VE::SX31, 152}, {VE::SX32, 160},
         {VE::SX33, 168}};
-    NumEntries = size(Offsets);
+    NumEntries = array_lengthof(Offsets);
     return Offsets;
   }
 
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 6a0d36bca1b77..e9ecff3bf514d 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -4408,7 +4408,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
     }
   }
 
-  for (unsigned I = 0, E = size(Match); I != E; ++I) {
+  for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
     Tmp.back() = Suffixes[I];
     if (MemOp && HasVectorReg)
       MemOp->Mem.Size = MemSize[I];
@@ -4454,7 +4454,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
   if (NumSuccessfulMatches > 1) {
     char MatchChars[4];
     unsigned NumMatches = 0;
-    for (unsigned I = 0, E = size(Match); I != E; ++I)
+    for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
       if (Match[I] == Match_Success)
         MatchChars[NumMatches++] = Suffixes[I];
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index fcf0fc1b74d1d..ba606d7a80edb 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12262,7 +12262,7 @@ static int canLowerByDroppingElements(ArrayRef<int> Mask, bool MatchEven,
       continue;
 
     bool IsAnyViable = false;
-    for (unsigned j = 0; j != size(ViableForN); ++j)
+    for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
       if (ViableForN[j]) {
         uint64_t N = j + 1;
 
@@ -12277,7 +12277,7 @@ static int canLowerByDroppingElements(ArrayRef<int> Mask, bool MatchEven,
       break;
   }
 
-  for (unsigned j = 0; j != size(ViableForN); ++j)
+  for (unsigned j = 0; j != array_lengthof(ViableForN); ++j)
     if (ViableForN[j])
       return j + 1;
 
diff --git a/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 47634634cb0a0..7c86262269fcd 100644
--- a/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1347,11 +1347,11 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
     };
     XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
     unsigned FirstVAReg = CCInfo.getFirstUnallocated(ArgRegs);
-    if (FirstVAReg < size(ArgRegs)) {
+    if (FirstVAReg < array_lengthof(ArgRegs)) {
       int offset = 0;
       // Save remaining registers, storing higher register numbers at a higher
       // address
-      for (int i = size(ArgRegs) - 1; i >= (int)FirstVAReg; --i) {
+      for (int i = array_lengthof(ArgRegs) - 1; i >= (int)FirstVAReg; --i) {
         // Create a stack slot
         int FI = MFI.CreateFixedObject(4, offset, true);
         if (i == (int)FirstVAReg) {
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index db6df996dfe07..0598f751febe2 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -576,7 +576,7 @@ Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) {
     }
   }
 
-  assert((NextTmpIdx <= size(TmpResult) + 1) &&
+  assert((NextTmpIdx <= array_lengthof(TmpResult) + 1) &&
          "out-of-bound access");
 
   Value *Result;
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b1f515dc232dd..654f0d2a03a89 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4254,7 +4254,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
       ImmMapTy::const_iterator OtherImms[] = {
           Imms.begin(), std::prev(Imms.end()),
          Imms.lower_bound(Avg)};
-      for (size_t i = 0, e = size(OtherImms); i != e; ++i) {
+      for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) {
         ImmMapTy::const_iterator M = OtherImms[i];
         if (M == J || M == JE) continue;
 
diff --git a/llvm/lib/Transforms/Utils/MetaRenamer.cpp b/llvm/lib/Transforms/Utils/MetaRenamer.cpp
index 679bc9e386472..9fba2f3f86b58 100644
--- a/llvm/lib/Transforms/Utils/MetaRenamer.cpp
+++ b/llvm/lib/Transforms/Utils/MetaRenamer.cpp
@@ -87,7 +87,7 @@ struct Renamer {
   Renamer(unsigned int seed) { prng.srand(seed); }
 
   const char *newName() {
-    return metaNames[prng.rand() % size(metaNames)];
+    return metaNames[prng.rand() % array_lengthof(metaNames)];
   }
 
   PRNG prng;
diff --git a/llvm/tools/llvm-config/llvm-config.cpp b/llvm/tools/llvm-config/llvm-config.cpp
index a663b4c45951f..8ed88f33ead46 100644
--- a/llvm/tools/llvm-config/llvm-config.cpp
+++ b/llvm/tools/llvm-config/llvm-config.cpp
@@ -170,7 +170,7 @@ static std::vector<std::string> ComputeLibsForComponents(
 
   // Build a map of component names to information.
   StringMap<AvailableComponent *> ComponentMap;
-  for (unsigned i = 0; i != size(AvailableComponents); ++i) {
+  for (unsigned i = 0; i != array_lengthof(AvailableComponents); ++i) {
     AvailableComponent *AC = &AvailableComponents[i];
     ComponentMap[AC->Name] = AC;
   }
@@ -540,7 +540,7 @@ int main(int argc, char **argv) {
         /// built, print LLVM_DYLIB_COMPONENTS instead of everything
         /// in the manifest.
         std::vector<std::string> Components;
-        for (unsigned j = 0; j != size(AvailableComponents); ++j) {
+        for (unsigned j = 0; j != array_lengthof(AvailableComponents); ++j) {
           // Only include non-installed components when in a development tree.
           if (!AvailableComponents[j].IsInstalled && !IsInDevelopmentTree)
             continue;
diff --git a/llvm/tools/llvm-objdump/COFFDump.cpp b/llvm/tools/llvm-objdump/COFFDump.cpp
index c3d5ff0d3f9ee..32fdd1a4d5c31 100644
--- a/llvm/tools/llvm-objdump/COFFDump.cpp
+++ b/llvm/tools/llvm-objdump/COFFDump.cpp
@@ -173,7 +173,7 @@ void COFFDumper::printPEHeader(const PEHeader &Hdr) const {
       "Reserved",
   };
   outs() << "\nThe Data Directory\n";
-  for (uint32_t I = 0; I != size(DirName); ++I) {
+  for (uint32_t I = 0; I != array_lengthof(DirName); ++I) {
     uint32_t Addr = 0, Size = 0;
     if (const data_directory *Data = Obj.getDataDirectory(I)) {
       Addr = Data->RelativeVirtualAddress;
diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
index 691ded4ce943b..78be632f21530 100644
--- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
+++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp
@@ -883,8 +883,8 @@ void Decoder::decodeOpcodes(ArrayRef<uint8_t> Opcodes, unsigned Offset,
   bool Terminated = false;
   for (unsigned OI = Offset, OE = Opcodes.size(); !Terminated && OI < OE; ) {
     for (unsigned DI = 0;; ++DI) {
-      if ((isAArch64 && (DI >= size(Ring64))) ||
-          (!isAArch64 && (DI >= size(Ring)))) {
+      if ((isAArch64 && (DI >= array_lengthof(Ring64))) ||
+          (!isAArch64 && (DI >= array_lengthof(Ring)))) {
         SW.startLine() << format("0x%02x                ; Bad opcode!\n",
                                  Opcodes.data()[OI]);
         ++OI;
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index ae1b1212a6f8e..cfb618117d2bf 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -4988,7 +4988,7 @@ template <typename ELFT> static GNUAbiTag getGNUAbiTag(ArrayRef<uint8_t> Desc) {
       "Linux", "Hurd", "Solaris", "FreeBSD", "NetBSD", "Syllable", "NaCl",
   };
   StringRef OSName = "Unknown";
-  if (Words[0] < size(OSNames))
+  if (Words[0] < array_lengthof(OSNames))
     OSName = OSNames[Words[0]];
   uint32_t Major = Words[1], Minor = Words[2], Patch = Words[3];
   std::string str;
diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp
index 35fa362eca71e..1683a9d671734 100644
--- a/llvm/unittests/ADT/APFloatTest.cpp
+++ b/llvm/unittests/ADT/APFloatTest.cpp
@@ -937,13 +937,13 @@ TEST(APFloatTest, fromStringSpecials) {
   };
 
   // Convert payload integer to decimal string representation.
-  std::string NaNPayloadDecStrings[size(NaNPayloads)];
-  for (size_t I = 0; I < size(NaNPayloads); ++I)
+  std::string NaNPayloadDecStrings[array_lengthof(NaNPayloads)];
+  for (size_t I = 0; I < array_lengthof(NaNPayloads); ++I)
     NaNPayloadDecStrings[I] = utostr(NaNPayloads[I]);
 
   // Convert payload integer to hexadecimal string representation.
-  std::string NaNPayloadHexStrings[size(NaNPayloads)];
-  for (size_t I = 0; I < size(NaNPayloads); ++I)
+  std::string NaNPayloadHexStrings[array_lengthof(NaNPayloads)];
+  for (size_t I = 0; I < array_lengthof(NaNPayloads); ++I)
     NaNPayloadHexStrings[I] = "0x" + utohexstr(NaNPayloads[I]);
 
   // Fix payloads to expected result.
@@ -967,7 +967,7 @@ TEST(APFloatTest, fromStringSpecials) {
     for (char TypeChar : NaNTypes) {
       bool Signaling = (TypeChar == 's' || TypeChar == 'S');
 
-      for (size_t J = 0; J < size(NaNPayloads); ++J) {
+      for (size_t J = 0; J < array_lengthof(NaNPayloads); ++J) {
         uint64_t Payload = (Signaling && !NaNPayloads[J]) ? SNaNDefaultPayload
                                                           : NaNPayloads[J];
         std::string &PayloadDec = NaNPayloadDecStrings[J];
@@ -2220,7 +2220,7 @@ TEST(APFloatTest, add) {
     { MSmallestNormalized, MSmallestNormalized, "-0x1p-125", APFloat::opOK, APFloat::fcNormal }
   };
 
-  for (size_t i = 0; i < size(SpecialCaseTests); ++i) {
+  for (size_t i = 0; i < array_lengthof(SpecialCaseTests); ++i) {
     APFloat x(SpecialCaseTests[i].x);
     APFloat y(SpecialCaseTests[i].y);
     APFloat::opStatus status = x.add(y, APFloat::rmNearestTiesToEven);
@@ -2460,7 +2460,7 @@ TEST(APFloatTest, subtract) {
     { MSmallestNormalized, MSmallestNormalized, "0x0p+0", APFloat::opOK, APFloat::fcZero }
   };
 
-  for (size_t i = 0; i < size(SpecialCaseTests); ++i) {
+  for (size_t i = 0; i < array_lengthof(SpecialCaseTests); ++i) {
     APFloat x(SpecialCaseTests[i].x);
     APFloat y(SpecialCaseTests[i].y);
     APFloat::opStatus status = x.subtract(y, APFloat::rmNearestTiesToEven);
@@ -2764,7 +2764,7 @@ TEST(APFloatTest, multiply) {
      APFloat::rmNearestTiesToAway},
   };
 
-  for (size_t i = 0; i < size(SpecialCaseTests); ++i) {
+  for (size_t i = 0; i < array_lengthof(SpecialCaseTests); ++i) {
     APFloat x(SpecialCaseTests[i].x);
     APFloat y(SpecialCaseTests[i].y);
     APFloat::opStatus status = x.multiply(y, SpecialCaseTests[i].roundingMode);
@@ -3046,7 +3046,7 @@ TEST(APFloatTest, divide) {
      APFloat::rmNearestTiesToAway},
   };
 
-  for (size_t i = 0; i < size(SpecialCaseTests); ++i) {
+  for (size_t i = 0; i < array_lengthof(SpecialCaseTests); ++i) {
     APFloat x(SpecialCaseTests[i].x);
     APFloat y(SpecialCaseTests[i].y);
     APFloat::opStatus status = x.divide(y, SpecialCaseTests[i].roundingMode);
@@ -4006,7 +4006,7 @@ TEST(APFloatTest, remainder) {
     { MVal6, MVal6, "-0x0p+0", APFloat::opOK, APFloat::fcZero },
   };
 
-  for (size_t i = 0; i < size(SpecialCaseTests); ++i) {
+  for (size_t i = 0; i < array_lengthof(SpecialCaseTests); ++i) {
     APFloat x(SpecialCaseTests[i].x);
     APFloat y(SpecialCaseTests[i].y);
     APFloat::opStatus status = x.remainder(y);
diff --git a/llvm/unittests/ADT/StringRefTest.cpp b/llvm/unittests/ADT/StringRefTest.cpp
index 114c097b3db57..e80a25a19969c 100644
--- a/llvm/unittests/ADT/StringRefTest.cpp
+++ b/llvm/unittests/ADT/StringRefTest.cpp
@@ -659,7 +659,7 @@ TEST(StringRefTest, getAsInteger) {
   uint32_t U32;
   uint64_t U64;
 
-  for (size_t i = 0; i < size(Unsigned); ++i) {
+  for (size_t i = 0; i < array_lengthof(Unsigned); ++i) {
     bool U8Success = StringRef(Unsigned[i].Str).getAsInteger(0, U8);
     if (static_cast<uint8_t>(Unsigned[i].Expected) == Unsigned[i].Expected) {
       ASSERT_FALSE(U8Success);
@@ -691,7 +691,7 @@ TEST(StringRefTest, getAsInteger) {
   int32_t S32;
   int64_t S64;
 
-  for (size_t i = 0; i < size(Signed); ++i) {
+  for (size_t i = 0; i < array_lengthof(Signed); ++i) {
     bool S8Success = StringRef(Signed[i].Str).getAsInteger(0, S8);
     if (static_cast<int8_t>(Signed[i].Expected) == Signed[i].Expected) {
       ASSERT_FALSE(S8Success);
@@ -737,7 +737,7 @@ static const char* BadStrings[] = {
 
 TEST(StringRefTest, getAsUnsignedIntegerBadStrings) {
   unsigned long long U64;
-  for (size_t i = 0; i < size(BadStrings); ++i) {
+  for (size_t i = 0; i < array_lengthof(BadStrings); ++i) {
     bool IsBadNumber = StringRef(BadStrings[i]).getAsInteger(0, U64);
     ASSERT_TRUE(IsBadNumber);
   }
@@ -820,7 +820,7 @@ TEST(StringRefTest, consumeIntegerUnsigned) {
   uint32_t U32;
   uint64_t U64;
 
-  for (size_t i = 0; i < size(ConsumeUnsigned); ++i) {
+  for (size_t i = 0; i < array_lengthof(ConsumeUnsigned); ++i) {
     StringRef Str = ConsumeUnsigned[i].Str;
     bool U8Success = Str.consumeInteger(0, U8);
     if (static_cast<uint8_t>(ConsumeUnsigned[i].Expected) ==
@@ -868,7 +868,7 @@ TEST(StringRefTest, consumeIntegerSigned) {
   int32_t S32;
   int64_t S64;
 
-  for (size_t i = 0; i < size(ConsumeSigned); ++i) {
+  for (size_t i = 0; i < array_lengthof(ConsumeSigned); ++i) {
     StringRef Str = ConsumeSigned[i].Str;
     bool S8Success = Str.consumeInteger(0, S8);
     if (static_cast<int8_t>(ConsumeSigned[i].Expected) ==
@@ -945,7 +945,7 @@ static const char join_result3[] = "a::b::c";
 TEST(StringRefTest, joinStrings) {
   std::vector<StringRef> v1;
   std::vector<std::string> v2;
-  for (size_t i = 0; i < size(join_input); ++i) {
+  for (size_t i = 0; i < array_lengthof(join_input); ++i) {
     v1.push_back(join_input[i]);
     v2.push_back(join_input[i]);
   }
diff --git a/llvm/unittests/ADT/TinyPtrVectorTest.cpp b/llvm/unittests/ADT/TinyPtrVectorTest.cpp
index aa6d53b40fa5f..a78f5b0d32aac 100644
--- a/llvm/unittests/ADT/TinyPtrVectorTest.cpp
+++ b/llvm/unittests/ADT/TinyPtrVectorTest.cpp
@@ -45,7 +45,7 @@ class TinyPtrVectorTest : public testing::Test {
   std::vector<PtrT> TestPtrs;
 
   TinyPtrVectorTest() {
-    for (size_t i = 0, e = size(TestValues); i != e; ++i)
+    for (size_t i = 0, e = array_lengthof(TestValues); i != e; ++i)
       TestPtrs.push_back(PtrT(&TestValues[i]));
 
     std::shuffle(TestPtrs.begin(), TestPtrs.end(), std::mt19937{});
diff --git a/llvm/unittests/ProfileData/CoverageMappingTest.cpp b/llvm/unittests/ProfileData/CoverageMappingTest.cpp
index b9fd4d3979e66..cc4c953e65351 100644
--- a/llvm/unittests/ProfileData/CoverageMappingTest.cpp
+++ b/llvm/unittests/ProfileData/CoverageMappingTest.cpp
@@ -292,7 +292,7 @@ TEST_P(CoverageMappingTest, basic_write_read) {
 
 TEST_P(CoverageMappingTest, correct_deserialize_for_more_than_two_files) {
   const char *FileNames[] = {"bar", "baz", "foo"};
-  static const unsigned N = size(FileNames);
+  static const unsigned N = array_lengthof(FileNames);
 
   startFunction("func", 0x1234);
   for (unsigned I = 0; I < N; ++I)
@@ -321,7 +321,7 @@ TEST_P(CoverageMappingTest, load_coverage_for_more_than_two_files) {
   ProfileWriter.addRecord({"func", 0x1234, {0}}, Err);
 
   const char *FileNames[] = {"bar", "baz", "foo"};
-  static const unsigned N = size(FileNames);
+  static const unsigned N = array_lengthof(FileNames);
 
   startFunction("func", 0x1234);
   for (unsigned I = 0; I < N; ++I)
diff --git a/llvm/unittests/Support/BinaryStreamTest.cpp b/llvm/unittests/Support/BinaryStreamTest.cpp
index f3841b1eac594..f98b7f327682b 100644
--- a/llvm/unittests/Support/BinaryStreamTest.cpp
+++ b/llvm/unittests/Support/BinaryStreamTest.cpp
@@ -103,7 +103,7 @@ class BrokenStream : public WritableBinaryStream {
 };
 
 constexpr endianness Endians[] = {big, little, native};
-constexpr uint32_t NumEndians = llvm::size(Endians);
+constexpr uint32_t NumEndians = llvm::array_lengthof(Endians);
 constexpr uint32_t NumStreams = 2 * NumEndians;
 
 class BinaryStreamTest : public testing::Test {
diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp
index 695bb7043c5ef..8032d0e7bf403 100644
--- a/llvm/unittests/Support/CommandLineTest.cpp
+++ b/llvm/unittests/Support/CommandLineTest.cpp
@@ -345,7 +345,7 @@ TEST(CommandLineTest, AliasesWithArguments) {
     { "-tool", "-alias", "x" }
   };
 
-  for (size_t i = 0, e = size(Inputs); i < e; ++i) {
+  for (size_t i = 0, e = array_lengthof(Inputs); i < e; ++i) {
     StackOption<std::string> Actual("actual");
     StackOption<bool> Extra("extra");
     StackOption<std::string> Input(cl::Positional);
@@ -374,8 +374,8 @@ void testAliasRequired(int argc, const char *const *argv) {
 TEST(CommandLineTest, AliasRequired) {
   const char *opts1[] = { "-tool", "-option=x" };
   const char *opts2[] = { "-tool", "-o", "x" };
-  testAliasRequired(size(opts1), opts1);
-  testAliasRequired(size(opts2), opts2);
+  testAliasRequired(array_lengthof(opts1), opts1);
+  testAliasRequired(array_lengthof(opts2), opts2);
 }
 
 TEST(CommandLineTest, HideUnrelatedOptions) {
@@ -987,8 +987,8 @@ TEST(CommandLineTest, ResponseFileEOLs) {
                                       /*CurrentDir=*/StringRef(TestRoot), FS));
   const char *Expected[] = {"clang", "-Xclang", "-Wno-whatever", nullptr,
                             "input.cpp"};
-  ASSERT_EQ(size(Expected), Argv.size());
-  for (size_t I = 0, E = size(Expected); I < E; ++I) {
+  ASSERT_EQ(array_lengthof(Expected), Argv.size());
+  for (size_t I = 0, E = array_lengthof(Expected); I < E; ++I) {
     if (Expected[I] == nullptr) {
       ASSERT_EQ(Argv[I], nullptr);
     } else {
diff --git a/llvm/unittests/Support/FormatVariadicTest.cpp b/llvm/unittests/Support/FormatVariadicTest.cpp
index e905d259dbd86..37946165e3651 100644
--- a/llvm/unittests/Support/FormatVariadicTest.cpp
+++ b/llvm/unittests/Support/FormatVariadicTest.cpp
@@ -529,7 +529,7 @@ TEST(FormatVariadicTest, BigTest) {
   std::string S;
   llvm::raw_string_ostream Stream(S);
   Stream << formatv(Intro, std::tuple_size<Tuple>::value,
-                    llvm::size(Ts))
+                    llvm::array_lengthof(Ts))
          << "\n";
   Stream << formatv(Header, "Char", "HexInt", "Str", "Ref", "std::str",
                     "double", "float", "pointer", "comma", "exp", "bigint",
diff --git a/llvm/unittests/Support/TargetParserTest.cpp b/llvm/unittests/Support/TargetParserTest.cpp
index 7165f2d916179..a9a8de6d291a3 100644
--- a/llvm/unittests/Support/TargetParserTest.cpp
+++ b/llvm/unittests/Support/TargetParserTest.cpp
@@ -733,7 +733,7 @@ TEST(TargetParserTest, ARMArchExtFeature) {
                               {"mve", "nomve", "+mve", "-mve"},
                               {"mve.fp", "nomve.fp", "+mve.fp", "-mve.fp"}};
 
-  for (unsigned i = 0; i < size(ArchExt); i++) {
+  for (unsigned i = 0; i < array_lengthof(ArchExt); i++) {
     EXPECT_EQ(StringRef(ArchExt[i][2]), ARM::getArchExtFeature(ArchExt[i][0]));
     EXPECT_EQ(StringRef(ArchExt[i][3]), ARM::getArchExtFeature(ArchExt[i][1]));
   }
@@ -764,7 +764,7 @@ TEST(TargetParserTest, ARMArchExtDependencies) {
 TEST(TargetParserTest, ARMparseHWDiv) {
   const char *hwdiv[] = {"thumb", "arm", "arm,thumb", "thumb,arm"};
 
-  for (unsigned i = 0; i < size(hwdiv); i++)
+  for (unsigned i = 0; i < array_lengthof(hwdiv); i++)
     EXPECT_NE(ARM::AEK_INVALID, ARM::parseHWDiv((StringRef)hwdiv[i]));
 }
 
@@ -782,7 +782,7 @@ TEST(TargetParserTest, ARMparseArchEndianAndISA) {
       "v8.7a",     "v8.8-a", "v8.8a", "v8-r",   "v8m.base", "v8m.main",
       "v8.1m.main"};
 
-  for (unsigned i = 0; i < size(Arch); i++) {
+  for (unsigned i = 0; i < array_lengthof(Arch); i++) {
     std::string arm_1 = "armeb" + (std::string)(Arch[i]);
     std::string arm_2 = "arm" + (std::string)(Arch[i]) + "eb";
     std::string arm_3 = "arm" + (std::string)(Arch[i]);
@@ -821,7 +821,7 @@ TEST(TargetParserTest, ARMparseArchEndianAndISA) {
 }
 
 TEST(TargetParserTest, ARMparseArchProfile) {
-  for (unsigned i = 0; i < size(ARMArch); i++) {
+  for (unsigned i = 0; i < array_lengthof(ARMArch); i++) {
     switch (ARM::parseArch(ARMArch[i])) {
     case ARM::ArchKind::ARMV6M:
     case ARM::ArchKind::ARMV7M:
@@ -861,7 +861,7 @@ TEST(TargetParserTest, ARMparseArchProfile) {
 }
 
 TEST(TargetParserTest, ARMparseArchVersion) {
-  for (unsigned i = 0; i < size(ARMArch); i++)
+  for (unsigned i = 0; i < array_lengthof(ARMArch); i++)
     if (((std::string)ARMArch[i]).substr(0, 4) == "armv")
       EXPECT_EQ((ARMArch[i][4] - 48u), ARM::parseArchVersion(ARMArch[i]));
     else
@@ -1526,7 +1526,7 @@ TEST(TargetParserTest, AArch64ArchExtFeature) {
       {"pmuv3", "nopmuv3", "+perfmon", "-perfmon"},
   };
 
-  for (unsigned i = 0; i < size(ArchExt); i++) {
+  for (unsigned i = 0; i < array_lengthof(ArchExt); i++) {
     EXPECT_EQ(StringRef(ArchExt[i][2]),
               AArch64::getArchExtFeature(ArchExt[i][0]));
     EXPECT_EQ(StringRef(ArchExt[i][3]),
diff --git a/llvm/utils/KillTheDoctor/KillTheDoctor.cpp b/llvm/utils/KillTheDoctor/KillTheDoctor.cpp
index be36176ae8ec0..358ef165cf63a 100644
--- a/llvm/utils/KillTheDoctor/KillTheDoctor.cpp
+++ b/llvm/utils/KillTheDoctor/KillTheDoctor.cpp
@@ -207,7 +207,7 @@ static std::error_code GetFileNameFromHandle(HANDLE FileHandle,
   Success = ::GetMappedFileNameA(::GetCurrentProcess(),
                                 MappedFile,
                                 Filename,
-                                size(Filename) - 1);
+                                array_lengthof(Filename) - 1);
 
   if (!Success)
     return windows_error(::GetLastError());
@@ -242,12 +242,12 @@ static std::string FindProgram(const std::string &Program,
     DWORD length = ::SearchPathA(NULL,
                                  Program.c_str(),
                                  Extension,
-                                 size(PathName),
+                                 array_lengthof(PathName),
                                  PathName,
                                  NULL);
     if (length == 0)
       ec = windows_error(::GetLastError());
-    else if (length > size(PathName)) {
+    else if (length > array_lengthof(PathName)) {
       // This may have been the file, return with error.
       ec = windows_error(ERROR_BUFFER_OVERFLOW);
       break;
diff --git a/llvm/utils/TableGen/AsmWriterEmitter.cpp b/llvm/utils/TableGen/AsmWriterEmitter.cpp
index 36ad66f506ce8..9283ceeb31e08 100644
--- a/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -1175,7 +1175,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   O.indent(2) << "  makeArrayRef(OpToPatterns),\n";
   O.indent(2) << "  makeArrayRef(Patterns),\n";
   O.indent(2) << "  makeArrayRef(Conds),\n";
-  O.indent(2) << "  StringRef(AsmStrings, size(AsmStrings)),\n";
+  O.indent(2) << "  StringRef(AsmStrings, array_lengthof(AsmStrings)),\n";
   if (MCOpPredicates.empty())
     O.indent(2) << "  nullptr,\n";
   else
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index b88f2514ee264..2c1583f7979db 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -477,7 +477,7 @@ static const char *const FixedInstrs[] = {
     nullptr};
 
 unsigned CodeGenTarget::getNumFixedInstructions() {
-  return size(FixedInstrs) - 1;
+  return array_lengthof(FixedInstrs) - 1;
 }
 
 /// Return all of the instructions defined by the target, ordered by
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 93a59f1956672..1ed7bc103f9ce 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -440,7 +440,7 @@ void RegisterInfoEmitter::EmitRegMappingTables(
       OS << "extern const unsigned " << Namespace
          << (j == 0 ? "DwarfFlavour" : "EHFlavour") << I << "Dwarf2LSize";
       if (!isCtor)
-        OS << " = size(" << Namespace
+        OS << " = array_lengthof(" << Namespace
            << (j == 0 ? "DwarfFlavour" : "EHFlavour") << I << "Dwarf2L);\n\n";
       else
         OS << ";\n\n";
@@ -498,7 +498,7 @@ void RegisterInfoEmitter::EmitRegMappingTables(
       OS << "extern const unsigned " << Namespace
          << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i << "L2DwarfSize";
       if (!isCtor)
-        OS << " = size(" << Namespace
+        OS << " = array_lengthof(" << Namespace
            << (j == 0 ? "DwarfFlavour" : "EHFlavour") << i << "L2Dwarf);\n\n";
       else
         OS << ";\n\n";
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index c8d33833b17a6..90e71a354d175 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -657,7 +657,7 @@ static const char* stringForDecisionType(ModRMDecisionType dt) {
 }
 
 DisassemblerTables::DisassemblerTables() {
-  for (unsigned i = 0; i < llvm::size(Tables); i++)
+  for (unsigned i = 0; i < llvm::array_lengthof(Tables); i++)
     Tables[i] = std::make_unique<ContextDecision>();
 
   HasConflicts = false;
diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
index 3036a7268f141..5431baa0f9f41 100644
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
@@ -186,7 +186,7 @@ class Extension<list<I32EnumAttrCase> extensions> : Availability {
     "}; " #
     // The following manual ArrayRef constructor call is to satisfy GCC 5.
     "ArrayRef<::mlir::spirv::Extension> " #
-      "ref(exts, ::llvm::size(exts));");
+      "ref(exts, ::llvm::array_lengthof(exts));");
   let instance = "ref";
 }
 
@@ -228,7 +228,7 @@ class Capability<list<I32EnumAttrCase> capabilities> : Availability {
     "}; " #
     // The following manual ArrayRef constructor call is to satisfy GCC 5.
     "ArrayRef<::mlir::spirv::Capability> " #
-      "ref(caps, ::llvm::size(caps));");
+      "ref(caps, ::llvm::array_lengthof(caps));");
   let instance = "ref";
 }
 
diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVEnums.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVEnums.cpp
index b5559df49d2f4..0562d42cfca43 100644
--- a/mlir/lib/Dialect/SPIRV/IR/SPIRVEnums.cpp
+++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVEnums.cpp
@@ -59,16 +59,16 @@ ArrayRef<spirv::Extension> spirv::getImpliedExtensions(spirv::Version version) {
   case Version::V_1_3: {
     // The following manual ArrayRef constructor call is to satisfy GCC 5.
     static const Extension exts[] = {V_1_3_IMPLIED_EXTS};
-    return ArrayRef<spirv::Extension>(exts, llvm::size(exts));
+    return ArrayRef<spirv::Extension>(exts, llvm::array_lengthof(exts));
   }
   case Version::V_1_4: {
     static const Extension exts[] = {V_1_3_IMPLIED_EXTS, V_1_4_IMPLIED_EXTS};
-    return ArrayRef<spirv::Extension>(exts, llvm::size(exts));
+    return ArrayRef<spirv::Extension>(exts, llvm::array_lengthof(exts));
   }
   case Version::V_1_5: {
     static const Extension exts[] = {V_1_3_IMPLIED_EXTS, V_1_4_IMPLIED_EXTS,
                                      V_1_5_IMPLIED_EXTS};
-    return ArrayRef<spirv::Extension>(exts, llvm::size(exts));
+    return ArrayRef<spirv::Extension>(exts, llvm::array_lengthof(exts));
   }
   }
 
diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp
index 0084b1242d2d4..b66f569c352d9 100644
--- a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp
+++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp
@@ -168,7 +168,7 @@ void CompositeType::getCapabilities(
         auto vecSize = getNumElements();
         if (vecSize == 8 || vecSize == 16) {
           static const Capability caps[] = {Capability::Vector16};
-          ArrayRef<Capability> ref(caps, llvm::size(caps));
+          ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));
           capabilities.push_back(ref);
         }
         return type.getElementType().cast<ScalarType>().getCapabilities(
@@ -242,7 +242,7 @@ void CooperativeMatrixNVType::getExtensions(
     Optional<StorageClass> storage) {
   getElementType().cast<SPIRVType>().getExtensions(extensions, storage);
   static const Extension exts[] = {Extension::SPV_NV_cooperative_matrix};
-  ArrayRef<Extension> ref(exts, llvm::size(exts));
+  ArrayRef<Extension> ref(exts, llvm::array_lengthof(exts));
   extensions.push_back(ref);
 }
 
@@ -251,7 +251,7 @@ void CooperativeMatrixNVType::getCapabilities(
     Optional<StorageClass> storage) {
   getElementType().cast<SPIRVType>().getCapabilities(capabilities, storage);
   static const Capability caps[] = {Capability::CooperativeMatrixNV};
-  ArrayRef<Capability> ref(caps, llvm::size(caps));
+  ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));
   capabilities.push_back(ref);
 }
 
@@ -468,7 +468,7 @@ void RuntimeArrayType::getCapabilities(
     Optional<StorageClass> storage) {
   {
     static const Capability caps[] = {Capability::Shader};
-    ArrayRef<Capability> ref(caps, llvm::size(caps));
+    ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));
     capabilities.push_back(ref);
   }
   getElementType().cast<SPIRVType>().getCapabilities(capabilities, storage);
@@ -517,7 +517,7 @@ void ScalarType::getExtensions(SPIRVType::ExtensionArrayRefVector &extensions,
   case StorageClass::Uniform:
     if (getIntOrFloatBitWidth() == 8) {
       static const Extension exts[] = {Extension::SPV_KHR_8bit_storage};
-      ArrayRef<Extension> ref(exts, llvm::size(exts));
+      ArrayRef<Extension> ref(exts, llvm::array_lengthof(exts));
       extensions.push_back(ref);
     }
     LLVM_FALLTHROUGH;
@@ -525,7 +525,7 @@ void ScalarType::getExtensions(SPIRVType::ExtensionArrayRefVector &extensions,
   case StorageClass::Output:
     if (getIntOrFloatBitWidth() == 16) {
       static const Extension exts[] = {Extension::SPV_KHR_16bit_storage};
-      ArrayRef<Extension> ref(exts, llvm::size(exts));
+      ArrayRef<Extension> ref(exts, llvm::array_lengthof(exts));
       extensions.push_back(ref);
     }
     break;
@@ -547,11 +547,11 @@ void ScalarType::getCapabilities(
   case StorageClass::storage: {                                                \
     if (bitwidth == 8) {                                                       \
       static const Capability caps[] = {Capability::cap8};                     \
-      ArrayRef<Capability> ref(caps, llvm::size(caps));              \
+      ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));              \
       capabilities.push_back(ref);                                             \
     } else if (bitwidth == 16) {                                               \
       static const Capability caps[] = {Capability::cap16};                    \
-      ArrayRef<Capability> ref(caps, llvm::size(caps));              \
+      ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));              \
       capabilities.push_back(ref);                                             \
     }                                                                          \
     /* No requirements for other bitwidths */                                  \
@@ -571,7 +571,7 @@ void ScalarType::getCapabilities(
     case StorageClass::Output: {
       if (bitwidth == 16) {
         static const Capability caps[] = {Capability::StorageInputOutput16};
-        ArrayRef<Capability> ref(caps, llvm::size(caps));
+        ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));
         capabilities.push_back(ref);
       }
       return;
@@ -588,7 +588,7 @@ void ScalarType::getCapabilities(
 #define WIDTH_CASE(type, width)                                                \
   case width: {                                                                \
     static const Capability caps[] = {Capability::type##width};                \
-    ArrayRef<Capability> ref(caps, llvm::size(caps));                \
+    ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));                \
     capabilities.push_back(ref);                                               \
   } break
 
@@ -1147,7 +1147,7 @@ void MatrixType::getCapabilities(
     Optional<StorageClass> storage) {
   {
     static const Capability caps[] = {Capability::Matrix};
-    ArrayRef<Capability> ref(caps, llvm::size(caps));
+    ArrayRef<Capability> ref(caps, llvm::array_lengthof(caps));
     capabilities.push_back(ref);
   }
   // Add any capabilities associated with the underlying vectors (i.e., columns)

From dcd0926ad018f50a3d91011424e063838888a4e0 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Wed, 26 Jan 2022 16:46:19 +0100
Subject: [PATCH 749/946] [lldb] Fix a couple of use-of-uninit-var errors in
 Materializer.cpp

These were probably not picked up before because they only run when
logging is enabled.
---
 lldb/source/Expression/Materializer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lldb/source/Expression/Materializer.cpp b/lldb/source/Expression/Materializer.cpp
index 3945f3a70f756..d40365d60fb3b 100644
--- a/lldb/source/Expression/Materializer.cpp
+++ b/lldb/source/Expression/Materializer.cpp
@@ -700,7 +700,7 @@ class EntityVariable : public Materializer::Entity {
         DumpHexBytes(&dump_stream, data.GetBytes(), data.GetByteSize(), 16,
                      load_addr);
 
-        lldb::offset_t offset;
+        lldb::offset_t offset = 0;
 
         ptr = extractor.GetAddress(&offset);
 
@@ -978,7 +978,7 @@ class EntityResultVariable : public Materializer::Entity {
         DumpHexBytes(&dump_stream, data.GetBytes(), data.GetByteSize(), 16,
                      load_addr);
 
-        lldb::offset_t offset;
+        lldb::offset_t offset = 0;
 
         ptr = extractor.GetAddress(&offset);
 

From e6ebd2c72ddb35caf2bf612279e22e040364054f Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Wed, 26 Jan 2022 16:26:19 +0000
Subject: [PATCH 750/946] [AArch64] Add float vector compare/select cost-model
 tests.

---
 .../CostModel/AArch64/vector-select.ll        | 573 +++++++++++++++++-
 1 file changed, 571 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
index 2149fe2282961..b1639a02b869e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s --check-prefix=COST
-; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s --check-prefix=CODE
+; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze | FileCheck %s --check-prefixes=COST,COST-NOFP16
+; RUN: opt < %s -mtriple=aarch64--linux-gnu -cost-model -analyze -mattr=+fullfp16 | FileCheck %s --check-prefixes=COST,COST-FULLFP16
+; RUN: llc < %s -mtriple=aarch64--linux-gnu -mattr=+fullfp16 | FileCheck %s --check-prefix=CODE
 
 ; COST-LABEL: v8i8_select_eq
 ; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = icmp eq <8 x i8> %a, %b
@@ -151,3 +152,571 @@ define <2 x i64> @v2i64_select_no_cmp(<2 x i64> %a, <2 x i64> %b, <2 x i1> %cond
   %s.1 = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b
   ret <2 x i64> %s.1
 }
+
+define <4 x half> @v4f16_select_ogt(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
+; COST-LABEL: v4f16_select_ogt
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %cmp.1 = fcmp ogt <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp ogt <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+;
+; CODE-LABEL: v4f16_select_ogt
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.4h, v{{.+}}.4h, v{{.+}}.4h
+; CODE-NEXT:    bif   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp ogt <4 x half> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+  ret <4 x half> %s.1
+}
+
+define <8 x half> @v8f16_select_ogt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; COST-LABEL: v8f16_select_ogt
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %cmp.1 = fcmp ogt <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp ogt <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+;
+; CODE-LABEL: v8f16_select_ogt
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.8h, v{{.+}}.8h, v{{.+}}.8h
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp ogt <8 x half> %a, %b
+  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+  ret <8 x half> %s.1
+}
+
+define <2 x float> @v2f32_select_ogt(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
+; COST-LABEL: v2f32_select_ogt
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp ogt <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+;
+; CODE-LABEL: v2f32_select_ogt
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.2s, v{{.+}}.2s, v{{.+}}.2s
+; CODE-NEXT:    bif   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp ogt <2 x float> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+  ret <2 x float> %s.1
+}
+
+define <4 x float> @v4f32_select_ogt(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; COST-LABEL: v4f32_select_ogt
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp ogt <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+;
+; CODE-LABEL: v4f32_select_ogt
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.4s, v{{.+}}.4s, v{{.+}}.4s
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp ogt <4 x float> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+  ret <4 x float> %s.1
+}
+
+define <2 x double> @v2f64_select_ogt(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; COST-LABEL: v2f64_select_ogt
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp ogt <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+;
+; CODE-LABEL: v2f64_select_ogt
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.2d, v{{.+}}.2d, v{{.+}}.2d
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp ogt <2 x double> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+  ret <2 x double> %s.1
+}
+
+define <4 x half> @v4f16_select_oge(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
+; COST-LABEL: v4f16_select_oge
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %cmp.1 = fcmp oge <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp oge <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+;
+; CODE-LABEL: v4f16_select_oge
+; CODE:       bb.0
+; CODE-NEXT:    fcmge v{{.+}}.4h, v{{.+}}.4h, v{{.+}}.4h
+; CODE-NEXT:    bif   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp oge <4 x half> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+  ret <4 x half> %s.1
+}
+
+define <8 x half> @v8f16_select_oge(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; COST-LABEL: v8f16_select_oge
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %cmp.1 = fcmp oge <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp oge <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+;
+; CODE-LABEL: v8f16_select_oge
+; CODE:       bb.0
+; CODE-NEXT:    fcmge v{{.+}}.8h, v{{.+}}.8h, v{{.+}}.8h
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp oge <8 x half> %a, %b
+  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+  ret <8 x half> %s.1
+}
+
+define <2 x float> @v2f32_select_oge(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
+; COST-LABEL: v2f32_select_oge
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp oge <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+;
+; CODE-LABEL: v2f32_select_oge
+; CODE:       bb.0
+; CODE-NEXT:    fcmge v{{.+}}.2s, v{{.+}}.2s, v{{.+}}.2s
+; CODE-NEXT:    bif   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp oge <2 x float> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+  ret <2 x float> %s.1
+}
+
+define <4 x float> @v4f32_select_oge(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; COST-LABEL: v4f32_select_oge
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp oge <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+;
+; CODE-LABEL: v4f32_select_oge
+; CODE:       bb.0
+; CODE-NEXT:    fcmge v{{.+}}.4s, v{{.+}}.4s, v{{.+}}.4s
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp oge <4 x float> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+  ret <4 x float> %s.1
+}
+
+define <2 x double> @v2f64_select_oge(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; COST-LABEL: v2f64_select_oge
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp oge <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+;
+; CODE-LABEL: v2f64_select_oge
+; CODE:       bb.0
+; CODE-NEXT:    fcmge v{{.+}}.2d, v{{.+}}.2d, v{{.+}}.2d
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp oge <2 x double> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+  ret <2 x double> %s.1
+}
+
+define <4 x half> @v4f16_select_olt(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
+; COST-LABEL: v4f16_select_olt
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %cmp.1 = fcmp olt <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp olt <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+;
+; CODE-LABEL: v4f16_select_olt
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.4h, v{{.+}}.4h, v{{.+}}.4h
+; CODE-NEXT:    bif   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp olt <4 x half> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+  ret <4 x half> %s.1
+}
+
+define <8 x half> @v8f16_select_olt(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; COST-LABEL: v8f16_select_olt
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %cmp.1 = fcmp olt <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp olt <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+;
+; CODE-LABEL: v8f16_select_olt
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.8h, v{{.+}}.8h, v{{.+}}.8h
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp olt <8 x half> %a, %b
+  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+  ret <8 x half> %s.1
+}
+
+define <2 x float> @v2f32_select_olt(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
+; COST-LABEL: v2f32_select_olt
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp olt <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+;
+; CODE-LABEL: v2f32_select_olt
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.2s, v{{.+}}.2s, v{{.+}}.2s
+; CODE-NEXT:    bif   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp olt <2 x float> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+  ret <2 x float> %s.1
+}
+
+define <4 x float> @v4f32_select_olt(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; COST-LABEL: v4f32_select_olt
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp olt <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+;
+; CODE-LABEL: v4f32_select_olt
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.4s, v{{.+}}.4s, v{{.+}}.4s
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp olt <4 x float> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+  ret <4 x float> %s.1
+}
+
+define <2 x double> @v2f64_select_olt(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; COST-LABEL: v2f64_select_olt
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp olt <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+;
+; CODE-LABEL: v2f64_select_olt
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.2d, v{{.+}}.2d, v{{.+}}.2d
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp olt <2 x double> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+  ret <2 x double> %s.1
+}
+
+define <4 x half> @v4f16_select_ole(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
+; COST-LABEL: v4f16_select_ole
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %cmp.1 = fcmp ole <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp ole <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+;
+; CODE-LABEL: v4f16_select_ole
+; CODE:       bb.0
+; CODE-NEXT:    fcmge v{{.+}}.4h, v{{.+}}.4h, v{{.+}}.4h
+; CODE-NEXT:    bif   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp ole <4 x half> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+  ret <4 x half> %s.1
+}
+
+define <8 x half> @v8f16_select_ole(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; COST-LABEL: v8f16_select_ole
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %cmp.1 = fcmp ole <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp ole <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+;
+; CODE-LABEL: v8f16_select_ole
+; CODE:       bb.0
+; CODE-NEXT:    fcmge v{{.+}}.8h, v{{.+}}.8h, v{{.+}}.8h
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp ole <8 x half> %a, %b
+  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+  ret <8 x half> %s.1
+}
+
+define <2 x float> @v2f32_select_ole(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
+; COST-LABEL: v2f32_select_ole
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp ole <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+;
+; CODE-LABEL: v2f32_select_ole
+; CODE:       bb.0
+; CODE-NEXT:    fcmge v{{.+}}.2s, v{{.+}}.2s, v{{.+}}.2s
+; CODE-NEXT:    bif   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp ole <2 x float> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+  ret <2 x float> %s.1
+}
+
+define <4 x float> @v4f32_select_ole(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; COST-LABEL: v4f32_select_ole
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp ole <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+;
+; CODE-LABEL: v4f32_select_ole
+; CODE:       bb.0
+; CODE-NEXT:    fcmge v{{.+}}.4s, v{{.+}}.4s, v{{.+}}.4s
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp ole <4 x float> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+  ret <4 x float> %s.1
+}
+
+define <2 x double> @v2f64_select_ole(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; COST-LABEL: v2f64_select_ole
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp ole <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+;
+; CODE-LABEL: v2f64_select_ole
+; CODE:       bb.0
+; CODE-NEXT:    fcmge v{{.+}}.2d, v{{.+}}.2d, v{{.+}}.2d
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp ole <2 x double> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+  ret <2 x double> %s.1
+}
+
+define <4 x half> @v4f16_select_oeq(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
+; COST-LABEL: v4f16_select_oeq
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %cmp.1 = fcmp oeq <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp oeq <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+;
+; CODE-LABEL: v4f16_select_oeq
+; CODE:       bb.0
+; CODE-NEXT:    fcmeq v{{.+}}.4h, v{{.+}}.4h, v{{.+}}.4h
+; CODE-NEXT:    bif   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp oeq <4 x half> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+  ret <4 x half> %s.1
+}
+
+define <8 x half> @v8f16_select_oeq(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; COST-LABEL: v8f16_select_oeq
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %cmp.1 = fcmp oeq <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp oeq <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+;
+; CODE-LABEL: v8f16_select_oeq
+; CODE:       bb.0
+; CODE-NEXT:    fcmeq v{{.+}}.8h, v{{.+}}.8h, v{{.+}}.8h
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp oeq <8 x half> %a, %b
+  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+  ret <8 x half> %s.1
+}
+
+define <2 x float> @v2f32_select_oeq(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
+; COST-LABEL: v2f32_select_oeq
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp oeq <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+;
+; CODE-LABEL: v2f32_select_oeq
+; CODE:       bb.0
+; CODE-NEXT:    fcmeq v{{.+}}.2s, v{{.+}}.2s, v{{.+}}.2s
+; CODE-NEXT:    bif   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp oeq <2 x float> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+  ret <2 x float> %s.1
+}
+
+define <4 x float> @v4f32_select_oeq(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; COST-LABEL: v4f32_select_oeq
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp oeq <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+;
+; CODE-LABEL: v4f32_select_oeq
+; CODE:       bb.0
+; CODE-NEXT:    fcmeq v{{.+}}.4s, v{{.+}}.4s, v{{.+}}.4s
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp oeq <4 x float> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+  ret <4 x float> %s.1
+}
+
+define <2 x double> @v2f64_select_oeq(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; COST-LABEL: v2f64_select_oeq
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp oeq <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+;
+; CODE-LABEL: v2f64_select_oeq
+; CODE:       bb.0
+; CODE-NEXT:    fcmeq v{{.+}}.2d, v{{.+}}.2d, v{{.+}}.2d
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp oeq <2 x double> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+  ret <2 x double> %s.1
+}
+
+define <4 x half> @v4f16_select_one(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
+; COST-LABEL: v4f16_select_one
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %cmp.1 = fcmp one <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp one <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+;
+; CODE-LABEL: v4f16_select_one
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.4h, v{{.+}}.4h, v{{.+}}.4h
+; CODE-NEXT:    fcmgt v{{.+}}.4h, v{{.+}}.4h, v{{.+}}.4h
+; CODE-NEXT:    orr   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    bif   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp one <4 x half> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+  ret <4 x half> %s.1
+}
+
+define <8 x half> @v8f16_select_one(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; COST-LABEL: v8f16_select_one
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:  %cmp.1 = fcmp one <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp one <8 x half> %a, %b
+; COST-FULLFP16-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+;
+; CODE-LABEL: v8f16_select_one
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.8h, v{{.+}}.8h, v{{.+}}.8h
+; CODE-NEXT:    fcmgt v{{.+}}.8h, v{{.+}}.8h, v{{.+}}.8h
+; CODE-NEXT:    orr   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp one <8 x half> %a, %b
+  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+  ret <8 x half> %s.1
+}
+
+define <2 x float> @v2f32_select_one(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
+; COST-LABEL: v2f32_select_one
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp one <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+
+; CODE-LABEL: v2f32_select_one
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.2s, v{{.+}}.2s, v{{.+}}.2s
+; CODE-NEXT:    fcmgt v{{.+}}.2s, v{{.+}}.2s, v{{.+}}.2s
+; CODE-NEXT:    orr   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    bif   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+
+  %cmp.1 = fcmp one <2 x float> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+  ret <2 x float> %s.1
+}
+
+define <4 x float> @v4f32_select_one(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; COST-LABEL: v4f32_select_one
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp one <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+
+; CODE-LABEL: v4f32_select_one
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.4s, v{{.+}}.4s, v{{.+}}.4s
+; CODE-NEXT:    fcmgt v{{.+}}.4s, v{{.+}}.4s, v{{.+}}.4s
+; CODE-NEXT:    orr   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+
+  %cmp.1 = fcmp one <4 x float> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+  ret <4 x float> %s.1
+}
+
+define <2 x double> @v2f64_select_one(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; COST-LABEL: v2f64_select_one
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp one <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+;
+; CODE-LABEL: v2f64_select_one
+; CODE:       bb.0
+; CODE-NEXT:    fcmgt v{{.+}}.2d, v{{.+}}.2d, v{{.+}}.2d
+; CODE-NEXT:    fcmgt v{{.+}}.2d, v{{.+}}.2d, v{{.+}}.2d
+; CODE-NEXT:    orr   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp one <2 x double> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+  ret <2 x double> %s.1
+}
+
+define <2 x float> @v2f32_select_ord(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
+; COST-LABEL: v2f32_select_ord
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp ord <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+;
+; CODE-LABEL: v2f32_select_ord
+; CODE:       bb.0
+; CODE-NEXT:    fcmge v{{.+}}.2s, v{{.+}}.2s, v{{.+}}.2s
+; CODE-NEXT:    fcmgt v{{.+}}.2s, v{{.+}}.2s, v{{.+}}.2s
+; CODE-NEXT:    orr   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    bif   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp ord <2 x float> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+  ret <2 x float> %s.1
+}
+
+define <4 x float> @v4f32_select_ord(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; COST-LABEL: v4f32_select_ord
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp ord <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+
+; CODE-LABEL: v4f32_select_ord
+; CODE:       bb.0
+; CODE-NEXT:    fcmge v{{.+}}.4s, v{{.+}}.4s, v{{.+}}.4s
+; CODE-NEXT:    fcmgt v{{.+}}.4s, v{{.+}}.4s, v{{.+}}.4s
+; CODE-NEXT:    orr   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+
+  %cmp.1 = fcmp ord <4 x float> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+  ret <4 x float> %s.1
+}
+
+define <2 x double> @v2f64_select_ord(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; COST-LABEL: v2f64_select_ord
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp ord <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:   %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+;
+; CODE-LABEL: v2f64_select_ord
+; CODE:       bb.0
+; CODE-NEXT:    fcmge v{{.+}}.2d, v{{.+}}.2d, v{{.+}}.2d
+; CODE-NEXT:    fcmgt v{{.+}}.2d, v{{.+}}.2d, v{{.+}}.2d
+; CODE-NEXT:    orr   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    bif   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp ord <2 x double> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+  ret <2 x double> %s.1
+}

From 82f987fdd046252711696e3febecc79d0e0a48cf Mon Sep 17 00:00:00 2001
From: Alex Tsao <alextsao1999@outlook.com>
Date: Wed, 26 Jan 2022 22:02:32 +0530
Subject: [PATCH 751/946] FIx typo in comment

Reviewed By: xgupta

Differential Revision: https://reviews.llvm.org/D118219
---
 llvm/include/llvm/Target/Target.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 7ae690b83770b..d8faa63ee8778 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -993,7 +993,7 @@ class InstrInfo {
   // by default, and TableGen will infer their value from the instruction
   // pattern when possible.
   //
-  // Normally, TableGen will issue an error it it can't infer the value of a
+  // Normally, TableGen will issue an error if it can't infer the value of a
   // property that hasn't been set explicitly. When guessInstructionProperties
   // is set, it will guess a safe value instead.
   //

From 4e077c0a0b849e56d23d25d0789a4a57960c61d0 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Tue, 25 Jan 2022 14:17:04 -0800
Subject: [PATCH 752/946] [AMDGPU] Remove feature register-banking

Since RegBankReassign pass was removed this feature is not
use for anything.

Differential Revision: https://reviews.llvm.org/D118195
---
 llvm/lib/Target/AMDGPU/AMDGPU.td           | 8 +-------
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 1 -
 llvm/lib/Target/AMDGPU/GCNSubtarget.h      | 5 -----
 3 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index e606f0e8fc3ce..806c0b18637ad 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -610,12 +610,6 @@ def FeatureDsSrc2Insts : SubtargetFeature<"ds-src2-insts",
   "Has ds_*_src2 instructions"
 >;
 
-def FeatureRegisterBanking : SubtargetFeature<"register-banking",
-  "HasRegisterBanking",
-  "true",
-  "Has register banking"
->;
-
 def FeatureVOP3Literal : SubtargetFeature<"vop3-literal",
   "HasVOP3Literal",
   "true",
@@ -826,7 +820,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
    FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
    FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
    FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
-   FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
+   FeatureNoSdstCMPX, FeatureVscnt,
    FeatureVOP3Literal, FeatureDPP8, FeatureExtendedImageInsts,
    FeatureNoDataDepHazard, FeaturePkFmacF16Inst,
    FeatureGFX10A16, FeatureSMemTimeInst, FeatureFastDenormalF32, FeatureG16,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 65296ad42f746..e82f9232b1140 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -269,7 +269,6 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     HasGetWaveIdInst(false),
     HasSMemTimeInst(false),
     HasShaderCyclesRegister(false),
-    HasRegisterBanking(false),
     HasVOP3Literal(false),
     HasNoDataDepHazard(false),
     FlatAddressSpace(false),
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index d81f52b737996..0cd2cfa2f0e7b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -153,7 +153,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasGetWaveIdInst;
   bool HasSMemTimeInst;
   bool HasShaderCyclesRegister;
-  bool HasRegisterBanking;
   bool HasVOP3Literal;
   bool HasNoDataDepHazard;
   bool FlatAddressSpace;
@@ -723,10 +722,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return HasShaderCyclesRegister;
   }
 
-  bool hasRegisterBanking() const {
-    return HasRegisterBanking;
-  }
-
   bool hasVOP3Literal() const {
     return HasVOP3Literal;
   }

From b797d5e6b21b3af3d581642c9a535327aa0764a7 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Thu, 27 Jan 2022 00:49:17 +0800
Subject: [PATCH 753/946] [CMake] [Clang] Add option to specify PowerPC long
 double format

This method introduces new CMake variable
PPC_LINUX_DEFAULT_IEEELONGDOUBLE (false by default) to enable fp128 as
default long double format.

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D118110
---
 clang/CMakeLists.txt                      | 3 +++
 clang/include/clang/Config/config.h.cmake | 3 +++
 clang/include/clang/Driver/ToolChain.h    | 3 +++
 clang/lib/Driver/ToolChain.cpp            | 4 ++++
 clang/lib/Driver/ToolChains/Clang.cpp     | 2 +-
 clang/test/Analysis/builtin_signbit.cpp   | 9 ++++++---
 clang/test/Driver/ppc-abi.c               | 3 ---
 7 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 49150fa7c5612..2bb395ffece53 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -237,6 +237,9 @@ set(ENABLE_LINKER_BUILD_ID OFF CACHE BOOL "pass --build-id to ld")
 set(ENABLE_X86_RELAX_RELOCATIONS ON CACHE BOOL
     "enable x86 relax relocations by default")
 
+set(PPC_LINUX_DEFAULT_IEEELONGDOUBLE OFF CACHE BOOL
+    "Enable IEEE binary128 as default long double format on PowerPC Linux.")
+
 set(CLANG_SPAWN_CC1 OFF CACHE BOOL
     "Whether clang should use a new process for the CC1 invocation")
 
diff --git a/clang/include/clang/Config/config.h.cmake b/clang/include/clang/Config/config.h.cmake
index 53386ef94129b..10a93293c0512 100644
--- a/clang/include/clang/Config/config.h.cmake
+++ b/clang/include/clang/Config/config.h.cmake
@@ -78,6 +78,9 @@
 /* enable x86 relax relocations by default */
 #cmakedefine01 ENABLE_X86_RELAX_RELOCATIONS
 
+/* Enable IEEE binary128 as default long double format on PowerPC Linux. */
+#cmakedefine01 PPC_LINUX_DEFAULT_IEEELONGDOUBLE
+
 /* Enable each functionality of modules */
 #cmakedefine01 CLANG_ENABLE_ARCMT
 #cmakedefine01 CLANG_ENABLE_OBJC_REWRITER
diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h
index 37011de6bd6d7..329833bb13be7 100644
--- a/clang/include/clang/Driver/ToolChain.h
+++ b/clang/include/clang/Driver/ToolChain.h
@@ -409,6 +409,9 @@ class ToolChain {
   /// Check whether to enable x86 relax relocations by default.
   virtual bool useRelaxRelocations() const;
 
+  /// Check whether use IEEE binary128 as long double format by default.
+  bool defaultToIEEELongDouble() const;
+
   /// GetDefaultStackProtectorLevel - Get the default stack protector level for
   /// this tool chain.
   virtual LangOptions::StackProtectorMode
diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index 7551ee4aeb79f..5fef1fb2ee5af 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -109,6 +109,10 @@ bool ToolChain::useRelaxRelocations() const {
   return ENABLE_X86_RELAX_RELOCATIONS;
 }
 
+bool ToolChain::defaultToIEEELongDouble() const {
+  return PPC_LINUX_DEFAULT_IEEELONGDOUBLE && getTriple().isOSLinux();
+}
+
 SanitizerArgs
 ToolChain::getSanitizerArgs(const llvm::opt::ArgList &JobArgs) const {
   SanitizerArgs SanArgs(*this, JobArgs, !SanitizerArgsChecked);
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 52d576345c027..9e10d6d890747 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -2061,7 +2061,7 @@ void Clang::AddPPCTargetArgs(const ArgList &Args,
     }
   }
 
-  bool IEEELongDouble = false;
+  bool IEEELongDouble = getToolChain().defaultToIEEELongDouble();
   for (const Arg *A : Args.filtered(options::OPT_mabi_EQ)) {
     StringRef V = A->getValue();
     if (V == "ieeelongdouble")
diff --git a/clang/test/Analysis/builtin_signbit.cpp b/clang/test/Analysis/builtin_signbit.cpp
index bf91511c43ce8..251391952f9c5 100644
--- a/clang/test/Analysis/builtin_signbit.cpp
+++ b/clang/test/Analysis/builtin_signbit.cpp
@@ -1,6 +1,9 @@
-// RUN: %clang -target powerpc-linux-gnu     -emit-llvm -S -O0 %s -o - | FileCheck %s --check-prefix=CHECK-BE --check-prefix=CHECK
-// RUN: %clang -target powerpc64-linux-gnu   -emit-llvm -S -O0 %s -o - | FileCheck %s --check-prefix=CHECK-BE --check-prefix=CHECK
-// RUN: %clang -target powerpc64le-linux-gnu -emit-llvm -S -O0 %s -o - | FileCheck %s --check-prefix=CHECK-LE --check-prefix=CHECK
+// RUN: %clang -target powerpc-linux-gnu -emit-llvm -S -mabi=ibmlongdouble \
+// RUN:   -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+// RUN: %clang -target powerpc64-linux-gnu -emit-llvm -S -mabi=ibmlongdouble \
+// RUN:   -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+// RUN: %clang -target powerpc64le-linux-gnu -emit-llvm -S -mabi=ibmlongdouble \
+// RUN:   -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE
 
 bool b;
 double d = -1.0;
diff --git a/clang/test/Driver/ppc-abi.c b/clang/test/Driver/ppc-abi.c
index 764e1fe9cd8bf..a3840c655011d 100644
--- a/clang/test/Driver/ppc-abi.c
+++ b/clang/test/Driver/ppc-abi.c
@@ -63,9 +63,6 @@
 // CHECK-ELFv1-IEEE: "-mabi=ieeelongdouble"
 // CHECK-ELFv1-IEEE: "-target-abi" "elfv1"
 
-// Check -mabi=ibmlongdouble is the default.
-// RUN: %clang -target powerpc64le-linux-gnu %s -### 2>&1 \
-// RUN:   | FileCheck -check-prefix=CHECK-ELFv2-IBM128 %s
 // RUN: %clang -target powerpc64le-linux-gnu %s -mabi=ibmlongdouble -### 2>&1 \
 // RUN:   | FileCheck -check-prefix=CHECK-ELFv2-IBM128 %s
 

From aa418b91332ca9ea0686ae53ef916456dffcc31c Mon Sep 17 00:00:00 2001
From: Konstantina <Konstantina.Mitropoulou@amd.com>
Date: Mon, 24 Jan 2022 20:10:20 -0800
Subject: [PATCH 754/946] [AMDGPU][SIWholeQuadMode] Use the right VCC register
 to activate the correct lanes.

Reviewed By: critson

Differential Revision: https://reviews.llvm.org/D118096
---
 llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp |  9 +++--
 llvm/test/CodeGen/AMDGPU/wqm.ll            | 45 ++++++++++++++++++++++
 2 files changed, 50 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 77ee3c0ff0e46..46efb3c605c69 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -861,12 +861,16 @@ MachineInstr *SIWholeQuadMode::lowerKillF32(MachineBasicBlock &MBB,
   MachineInstr *VcmpMI;
   const MachineOperand &Op0 = MI.getOperand(0);
   const MachineOperand &Op1 = MI.getOperand(1);
+
+  // VCC represents lanes killed.
+  Register VCC = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
+
   if (TRI->isVGPR(*MRI, Op0.getReg())) {
     Opcode = AMDGPU::getVOPe32(Opcode);
     VcmpMI = BuildMI(MBB, &MI, DL, TII->get(Opcode)).add(Op1).add(Op0);
   } else {
     VcmpMI = BuildMI(MBB, &MI, DL, TII->get(Opcode))
-                 .addReg(AMDGPU::VCC, RegState::Define)
+                 .addReg(VCC, RegState::Define)
                  .addImm(0) // src0 modifiers
                  .add(Op1)
                  .addImm(0) // src1 modifiers
@@ -874,9 +878,6 @@ MachineInstr *SIWholeQuadMode::lowerKillF32(MachineBasicBlock &MBB,
                  .addImm(0); // omod
   }
 
-  // VCC represents lanes killed.
-  Register VCC = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
-
   MachineInstr *MaskUpdateMI =
       BuildMI(MBB, MI, DL, TII->get(AndN2Opc), LiveMaskReg)
           .addReg(LiveMaskReg)
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index bd2f5459003c7..8addc51e8eaa1 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -3330,6 +3330,49 @@ main_body:
   ret float %out
 }
 
+; Check if the correct VCC register is selected. WQM pass incorrectly uses VCC for
+; vector comparisons in Wave32 mode.
+define amdgpu_ps void @test_for_deactivating_lanes_in_wave32(float addrspace(6)* inreg %0) {
+; GFX9-W64-LABEL: test_for_deactivating_lanes_in_wave32:
+; GFX9-W64:       ; %bb.0: ; %main_body
+; GFX9-W64-NEXT:    s_mov_b32 s3, 0x31016fac
+; GFX9-W64-NEXT:    s_mov_b32 s2, 32
+; GFX9-W64-NEXT:    s_mov_b32 s1, 0x8000
+; GFX9-W64-NEXT:    s_buffer_load_dword s0, s[0:3], 0x0
+; GFX9-W64-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-W64-NEXT:    v_cmp_le_f32_e64 vcc, s0, 0
+; GFX9-W64-NEXT:    s_andn2_b64 s[4:5], exec, vcc
+; GFX9-W64-NEXT:    s_cbranch_scc0 .LBB50_1
+; GFX9-W64-NEXT:    s_endpgm
+; GFX9-W64-NEXT:  .LBB50_1:
+; GFX9-W64-NEXT:    s_mov_b64 exec, 0
+; GFX9-W64-NEXT:    exp null off, off, off, off done vm
+; GFX9-W64-NEXT:    s_endpgm
+;
+; GFX10-W32-LABEL: test_for_deactivating_lanes_in_wave32:
+; GFX10-W32:       ; %bb.0: ; %main_body
+; GFX10-W32-NEXT:    s_mov_b32 s3, 0x31016fac
+; GFX10-W32-NEXT:    s_mov_b32 s2, 32
+; GFX10-W32-NEXT:    s_mov_b32 s1, 0x8000
+; GFX10-W32-NEXT:    s_buffer_load_dword s0, s[0:3], 0x0
+; GFX10-W32-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-W32-NEXT:    v_cmp_le_f32_e64 vcc_lo, s0, 0
+; GFX10-W32-NEXT:    s_andn2_b32 s4, exec_lo, vcc_lo
+; GFX10-W32-NEXT:    s_cbranch_scc0 .LBB50_1
+; GFX10-W32-NEXT:    s_endpgm
+; GFX10-W32-NEXT:  .LBB50_1:
+; GFX10-W32-NEXT:    s_mov_b32 exec_lo, 0
+; GFX10-W32-NEXT:    exp null off, off, off, off done vm
+; GFX10-W32-NEXT:    s_endpgm
+main_body:
+  %1 = ptrtoint float addrspace(6)* %0 to i32
+  %2 = insertelement <4 x i32> <i32 poison, i32 32768, i32 32, i32 822177708>, i32 %1, i32 0
+  %3 = call nsz arcp float @llvm.amdgcn.s.buffer.load.f32(<4 x i32> %2, i32 0, i32 0) #3
+  %4 = fcmp nsz arcp ugt float %3, 0.000000e+00
+  call void @llvm.amdgcn.kill(i1 %4) #1
+  ret void
+}
+
 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
 declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #1
 
@@ -3361,6 +3404,7 @@ declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1,
 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #2
 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #2
 declare i32 @llvm.amdgcn.ds.swizzle(i32, i32)
+declare float @llvm.amdgcn.s.buffer.load.f32(<4 x i32>, i32, i32 immarg) #7
 
 attributes #1 = { nounwind }
 attributes #2 = { nounwind readonly }
@@ -3368,3 +3412,4 @@ attributes #3 = { nounwind readnone }
 attributes #4 = { nounwind readnone convergent }
 attributes #5 = { "amdgpu-ps-wqm-outputs" }
 attributes #6 = { nounwind "InitialPSInputAddr"="2" }
+attributes #7 = { nounwind readnone willreturn }

From 28bfa57a7315c3161124c90b4d52f467616dc92e Mon Sep 17 00:00:00 2001
From: Chih-Ping Chen <chih-ping.chen@intel.com>
Date: Tue, 18 Jan 2022 14:54:01 -0500
Subject: [PATCH 755/946] [DebugInfo] Add stringLocationExp field to
 DIStringType

DIStringType is used to encode the debug info of a character object
in Fortran. A Fortran deferred-length character object is typically
implemented as a pair of the following two pieces of info: An address
of the raw storage of the characters, and the length of the object.
The stringLocationExp field contains the DIExpression to get to the
raw storage.

This patch also enables the emission of DW_AT_data_location attribute
in a DW_TAG_string_type debug info entry based on stringLocationExp
in DIStringType.

A test is also added to ensure that the bitcode reader is backward
compatible with the old DIStringType format.

Differential Revision: https://reviews.llvm.org/D117586
---
 llvm/include/llvm/IR/DebugInfoMetadata.h      |  48 +++++++++++-------
 llvm/lib/AsmParser/LLParser.cpp               |   9 ++--
 llvm/lib/Bitcode/Reader/MetadataLoader.cpp    |  10 +++-
 llvm/lib/Bitcode/Writer/BitcodeWriter.cpp     |   1 +
 llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp     |  10 ++++
 llvm/lib/IR/AsmWriter.cpp                     |   2 +
 llvm/lib/IR/DebugInfoMetadata.cpp             |   9 ++--
 llvm/lib/IR/LLVMContextImpl.h                 |  10 ++--
 llvm/test/Bitcode/distringtype-backward.ll    |  47 +++++++++++++++++
 llvm/test/Bitcode/distringtype-backward.ll.bc | Bin 0 -> 2492 bytes
 llvm/test/DebugInfo/X86/distringtype.ll       |   7 ++-
 llvm/test/DebugInfo/fortran-string-type.ll    |   4 +-
 12 files changed, 123 insertions(+), 34 deletions(-)
 create mode 100644 llvm/test/Bitcode/distringtype-backward.ll
 create mode 100644 llvm/test/Bitcode/distringtype-backward.ll.bc

diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index c04f07c534afa..ba2568042c414 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -852,42 +852,48 @@ class DIStringType : public DIType {
 
   static DIStringType *getImpl(LLVMContext &Context, unsigned Tag,
                                StringRef Name, Metadata *StringLength,
-                               Metadata *StrLenExp, uint64_t SizeInBits,
-                               uint32_t AlignInBits, unsigned Encoding,
-                               StorageType Storage, bool ShouldCreate = true) {
+                               Metadata *StrLenExp, Metadata *StrLocationExp,
+                               uint64_t SizeInBits, uint32_t AlignInBits,
+                               unsigned Encoding, StorageType Storage,
+                               bool ShouldCreate = true) {
     return getImpl(Context, Tag, getCanonicalMDString(Context, Name),
-                   StringLength, StrLenExp, SizeInBits, AlignInBits, Encoding,
-                   Storage, ShouldCreate);
+                   StringLength, StrLenExp, StrLocationExp, SizeInBits,
+                   AlignInBits, Encoding, Storage, ShouldCreate);
   }
   static DIStringType *getImpl(LLVMContext &Context, unsigned Tag,
                                MDString *Name, Metadata *StringLength,
-                               Metadata *StrLenExp, uint64_t SizeInBits,
-                               uint32_t AlignInBits, unsigned Encoding,
-                               StorageType Storage, bool ShouldCreate = true);
+                               Metadata *StrLenExp, Metadata *StrLocationExp,
+                               uint64_t SizeInBits, uint32_t AlignInBits,
+                               unsigned Encoding, StorageType Storage,
+                               bool ShouldCreate = true);
 
   TempDIStringType cloneImpl() const {
     return getTemporary(getContext(), getTag(), getRawName(),
                         getRawStringLength(), getRawStringLengthExp(),
-                        getSizeInBits(), getAlignInBits(), getEncoding());
+                        getRawStringLocationExp(), getSizeInBits(),
+                        getAlignInBits(), getEncoding());
   }
 
 public:
   DEFINE_MDNODE_GET(DIStringType,
                     (unsigned Tag, StringRef Name, uint64_t SizeInBits,
                      uint32_t AlignInBits),
-                    (Tag, Name, nullptr, nullptr, SizeInBits, AlignInBits, 0))
+                    (Tag, Name, nullptr, nullptr, nullptr, SizeInBits,
+                     AlignInBits, 0))
   DEFINE_MDNODE_GET(DIStringType,
                     (unsigned Tag, MDString *Name, Metadata *StringLength,
-                     Metadata *StringLengthExp, uint64_t SizeInBits,
-                     uint32_t AlignInBits, unsigned Encoding),
-                    (Tag, Name, StringLength, StringLengthExp, SizeInBits,
-                     AlignInBits, Encoding))
+                     Metadata *StringLengthExp, Metadata *StringLocationExp,
+                     uint64_t SizeInBits, uint32_t AlignInBits,
+                     unsigned Encoding),
+                    (Tag, Name, StringLength, StringLengthExp,
+                     StringLocationExp, SizeInBits, AlignInBits, Encoding))
   DEFINE_MDNODE_GET(DIStringType,
                     (unsigned Tag, StringRef Name, Metadata *StringLength,
-                     Metadata *StringLengthExp, uint64_t SizeInBits,
-                     uint32_t AlignInBits, unsigned Encoding),
-                    (Tag, Name, StringLength, StringLengthExp, SizeInBits,
-                     AlignInBits, Encoding))
+                     Metadata *StringLengthExp, Metadata *StringLocationExp,
+                     uint64_t SizeInBits, uint32_t AlignInBits,
+                     unsigned Encoding),
+                    (Tag, Name, StringLength, StringLengthExp,
+                     StringLocationExp, SizeInBits, AlignInBits, Encoding))
 
   TempDIStringType clone() const { return cloneImpl(); }
 
@@ -903,11 +909,17 @@ class DIStringType : public DIType {
     return cast_or_null<DIExpression>(getRawStringLengthExp());
   }
 
+  DIExpression *getStringLocationExp() const {
+    return cast_or_null<DIExpression>(getRawStringLocationExp());
+  }
+
   unsigned getEncoding() const { return Encoding; }
 
   Metadata *getRawStringLength() const { return getOperand(3); }
 
   Metadata *getRawStringLengthExp() const { return getOperand(4); }
+
+  Metadata *getRawStringLocationExp() const { return getOperand(5); }
 };
 
 /// Derived types.
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 868b8693926d7..d93d41cd08467 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -4550,16 +4550,17 @@ bool LLParser::parseDIStringType(MDNode *&Result, bool IsDistinct) {
   OPTIONAL(name, MDStringField, );                                             \
   OPTIONAL(stringLength, MDField, );                                           \
   OPTIONAL(stringLengthExpression, MDField, );                                 \
+  OPTIONAL(stringLocationExpression, MDField, );                               \
   OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX));                            \
   OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX));                           \
   OPTIONAL(encoding, DwarfAttEncodingField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
-  Result = GET_OR_DISTINCT(DIStringType,
-                           (Context, tag.Val, name.Val, stringLength.Val,
-                            stringLengthExpression.Val, size.Val, align.Val,
-                            encoding.Val));
+  Result = GET_OR_DISTINCT(
+      DIStringType,
+      (Context, tag.Val, name.Val, stringLength.Val, stringLengthExpression.Val,
+       stringLocationExpression.Val, size.Val, align.Val, encoding.Val));
   return false;
 }
 
diff --git a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
index 67144d432d41d..0f41115140576 100644
--- a/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
+++ b/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1423,15 +1423,21 @@ Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
     break;
   }
   case bitc::METADATA_STRING_TYPE: {
-    if (Record.size() != 8)
+    if (Record.size() > 9 || Record.size() < 8)
       return error("Invalid record");
 
     IsDistinct = Record[0];
+    bool SizeIs8 = Record.size() == 8;
+    // StringLocationExp (i.e. Record[5]) is added at a later time
+    // than the other fields. The code here enables backward compatibility.
+    Metadata *StringLocationExp = SizeIs8 ? nullptr : getMDOrNull(Record[5]);
+    unsigned Offset = SizeIs8 ? 5 : 6;
     MetadataList.assignValue(
         GET_OR_DISTINCT(DIStringType,
                         (Context, Record[1], getMDString(Record[2]),
                          getMDOrNull(Record[3]), getMDOrNull(Record[4]),
-                         Record[5], Record[6], Record[7])),
+                         StringLocationExp, Record[Offset], Record[Offset + 1],
+                         Record[Offset + 2])),
         NextMetadataNo);
     NextMetadataNo++;
     break;
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 80978471da4e3..eb4e09ea3a265 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1657,6 +1657,7 @@ void ModuleBitcodeWriter::writeDIStringType(const DIStringType *N,
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getStringLength()));
   Record.push_back(VE.getMetadataOrNullID(N->getStringLengthExp()));
+  Record.push_back(VE.getMetadataOrNullID(N->getStringLocationExp()));
   Record.push_back(N->getSizeInBits());
   Record.push_back(N->getAlignInBits());
   Record.push_back(N->getEncoding());
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index bfc98580002c5..15d90c54adfcb 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -742,6 +742,16 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIStringType *STy) {
     addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
   }
 
+  if (DIExpression *Expr = STy->getStringLocationExp()) {
+    DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+    DIEDwarfExpression DwarfExpr(*Asm, getCU(), *Loc);
+    // This is to describe the memory location of the
+    // string, so lock it down as such.
+    DwarfExpr.setMemoryLocationKind();
+    DwarfExpr.addExpression(Expr);
+    addBlock(Buffer, dwarf::DW_AT_data_location, DwarfExpr.finalize());
+  }
+
   if (STy->getEncoding()) {
     // For eventual Unicode support.
     addUInt(Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 6631fec2032eb..179754e275b03 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -1986,6 +1986,8 @@ static void writeDIStringType(raw_ostream &Out, const DIStringType *N,
   Printer.printString("name", N->getName());
   Printer.printMetadata("stringLength", N->getRawStringLength());
   Printer.printMetadata("stringLengthExpression", N->getRawStringLengthExp());
+  Printer.printMetadata("stringLocationExpression",
+                        N->getRawStringLocationExp());
   Printer.printInt("size", N->getSizeInBits());
   Printer.printInt("align", N->getAlignInBits());
   Printer.printDwarfEnum("encoding", N->getEncoding(),
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index b20e581d283aa..59afb844eb899 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -567,13 +567,16 @@ Optional<DIBasicType::Signedness> DIBasicType::getSignedness() const {
 DIStringType *DIStringType::getImpl(LLVMContext &Context, unsigned Tag,
                                     MDString *Name, Metadata *StringLength,
                                     Metadata *StringLengthExp,
+                                    Metadata *StringLocationExp,
                                     uint64_t SizeInBits, uint32_t AlignInBits,
                                     unsigned Encoding, StorageType Storage,
                                     bool ShouldCreate) {
   assert(isCanonical(Name) && "Expected canonical MDString");
-  DEFINE_GETIMPL_LOOKUP(DIStringType, (Tag, Name, StringLength, StringLengthExp,
-                                       SizeInBits, AlignInBits, Encoding));
-  Metadata *Ops[] = {nullptr, nullptr, Name, StringLength, StringLengthExp};
+  DEFINE_GETIMPL_LOOKUP(DIStringType,
+                        (Tag, Name, StringLength, StringLengthExp,
+                         StringLocationExp, SizeInBits, AlignInBits, Encoding));
+  Metadata *Ops[] = {nullptr,      nullptr,         Name,
+                     StringLength, StringLengthExp, StringLocationExp};
   DEFINE_GETIMPL_STORE(DIStringType, (Tag, SizeInBits, AlignInBits, Encoding),
                        Ops);
 }
diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index 24c4a348f4da5..0b5f928165e87 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -428,20 +428,22 @@ template <> struct MDNodeKeyImpl<DIStringType> {
   MDString *Name;
   Metadata *StringLength;
   Metadata *StringLengthExp;
+  Metadata *StringLocationExp;
   uint64_t SizeInBits;
   uint32_t AlignInBits;
   unsigned Encoding;
 
   MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *StringLength,
-                Metadata *StringLengthExp, uint64_t SizeInBits,
-                uint32_t AlignInBits, unsigned Encoding)
+                Metadata *StringLengthExp, Metadata *StringLocationExp,
+                uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding)
       : Tag(Tag), Name(Name), StringLength(StringLength),
-        StringLengthExp(StringLengthExp), SizeInBits(SizeInBits),
-        AlignInBits(AlignInBits), Encoding(Encoding) {}
+        StringLengthExp(StringLengthExp), StringLocationExp(StringLocationExp),
+        SizeInBits(SizeInBits), AlignInBits(AlignInBits), Encoding(Encoding) {}
   MDNodeKeyImpl(const DIStringType *N)
       : Tag(N->getTag()), Name(N->getRawName()),
         StringLength(N->getRawStringLength()),
         StringLengthExp(N->getRawStringLengthExp()),
+        StringLocationExp(N->getRawStringLocationExp()),
         SizeInBits(N->getSizeInBits()), AlignInBits(N->getAlignInBits()),
         Encoding(N->getEncoding()) {}
 
diff --git a/llvm/test/Bitcode/distringtype-backward.ll b/llvm/test/Bitcode/distringtype-backward.ll
new file mode 100644
index 0000000000000..1136a021d4e9f
--- /dev/null
+++ b/llvm/test/Bitcode/distringtype-backward.ll
@@ -0,0 +1,47 @@
+; This test verifies the backward compatibility of DIStringType.
+;; Specifically, it makes sure that bitcode for DIStringType without
+;; the StringLocationExp field can be disassembled.
+; REQUIRES: x86_64-linux
+
+; RUN: llvm-dis -o - %s.bc | FileCheck %s
+
+; CHECK: !DIStringType(name: ".str.DEFERRED", stringLengthExpression: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 8))
+
+; ModuleID = 'distringtype-backward.bc'
+source_filename = "distringtype.f90"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%"QNCA_a0$i8*$rank0$" = type { i8*, i64, i64, i64, i64, i64 }
+
+@"assumedlength_$DEFERRED" = internal global %"QNCA_a0$i8*$rank0$" zeroinitializer, !dbg !0
+@0 = internal unnamed_addr constant i32 2
+
+; Function Attrs: noinline nounwind optnone uwtable
+define void @MAIN__() #0 !dbg !2 {
+alloca_0:
+  ret void
+}
+
+declare i32 @for_set_reentrancy(i32* nocapture readonly)
+
+declare i32 @for_alloc_allocatable_handle(i64, i8** nocapture, i32, i8*)
+
+attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "intel-lang"="fortran" "target-cpu"="x86-64" }
+
+!llvm.module.flags = !{!10, !11}
+!llvm.dbg.cu = !{!6}
+!omp_offload.info = !{}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "deferred", linkageName: "assumedlength_$DEFERRED", scope: !2, file: !3, line: 2, type: !9, isLocal: true, isDefinition: true)
+!2 = distinct !DISubprogram(name: "assumedlength", linkageName: "MAIN__", scope: !3, file: !3, line: 1, type: !4, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagMainSubprogram, unit: !6, retainedNodes: !8)
+!3 = !DIFile(filename: "test2.f90", directory: "/iusers/cchen15/examples/tests/jr33383")
+!4 = !DISubroutineType(types: !5)
+!5 = !{null}
+!6 = distinct !DICompileUnit(language: DW_LANG_Fortran95, file: !3, producer: "Intel(R) Fortran 22.0-1258", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, globals: !7, splitDebugInlining: false, nameTableKind: None)
+!7 = !{!0}
+!8 = !{}
+!9 = !DIStringType(name: ".str.DEFERRED", stringLengthExpression: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 8))
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/llvm/test/Bitcode/distringtype-backward.ll.bc b/llvm/test/Bitcode/distringtype-backward.ll.bc
new file mode 100644
index 0000000000000000000000000000000000000000..a454faeac3b32f0c7bca10068d4be4b5d24adba8
GIT binary patch
literal 2492
zcmZ`*VN6@s6~4AH?-9(iLr`M}+j*|Z=5__1p8?x3RDdBhD(wi3CZuGtAMhM#z?f$f
z8`DiTcJ^pWD<@@|HD!`f(<srpiEPotk4Q5($ynw_ByBphMG-V4Ar;Bc!m3D%w4G~0
zrTy5E&OP^@d)~eGoco<~U$ZlFx3&zS9SEUr1yj*W*X_Fa>}_Y?g|*Ntzc}~$Lq_>K
z)y<(V{_yy(uUEE6SUK1k6bOAQL8w@2KvEdL1Wd$?r<xWD+6q;y{j?;Z*e547`hpL-
zqh+nq=F`Q^XH{&Oja0w6gYBGf`L7s^Ev4e5s_C?}cY(ZEMmrBo(0`V&oeA};rdN~Z
z_hycqqOUH%y6@8nX+W|syNc-U1IX}$id30lA2gcpD^8qEdOCZL&=veR`lYdv%<YWM
zw|BlaJ8w$NS0`lJWYhlT*q&spJ`s0(hKKC9s+rsqDDX7$1(^adx$~r<b*`*+ru2MC
z0YV4W2$jQL<u1ds{{Tt+sK_Lv6=WbO`ujwoPZkb~K|c{O5u<26rdyKGx8`(f)7qu!
zk|l%o&WaWV!o+Br7$u1iA|oU*Y7>RDSf?h!=LxiTBSZZyL#?4D4=I^Jw+p*fQF$Be
zDT6(CjeDBucCrw<XczNdM&@`XcckNW=ZR1k5ekcAezDF2n~RZ*SZ5<b=f#q99n|0=
z6@eBh1wY|W69JltFodvJxIq&kRlc8BZW#z^;&>ww@eskq!bi~Nhl3s}jEIPe5TeB3
zG8Ik}!=4u(g7F|zJWLbe^k!6L(DW|2id2op1UD@?b&l+xEE*&Kxb{d_<}VpH<6{HO
z$x(*-MtX7uyu#3kE=!TeHKHLA0=#TdMy|N9zfB@-d@)97Hfp$C?k`d01CiHe<gNjg
z>G@uSl5!~`K*#D+D$UOJ<YdwJ-ndra++#$CPNo$*@~d8i-7bQSg^X&itZ2Pul7<PV
zEs>pUjuN9`B9IzZJV46FDrz%ISrpCs`8nN<X|0!$=gs2fIa$Mkvtd%!kPA=3^^U>4
zxkNwQvtM>REc$6tliNwHC#e-i>0^}lHmHpb%A&fWyKB%c8FcfU-pjo2xEyyZI_;Na
z^_S!I37I3eQFh!zjP(>?B575xAr4{_W?9DgQ<WkXTXO^U;6QAmlSsn}~zHmE1d
z)W0&+IwR{|nAWaNYjgSb4yV0t&|Zn@u5*Q7C7t#Kr#+Q#XS22`r)@&!n95eerq9P6
zU&<UaS;e3FMSqtl_~XGoA`F;Oi@~(0x$%zj2~z&EgIY~e_ZpQeG_?+1&?eoTIsJT-
z{ubajroXeSU0u<ABhh=+2W>OX{b%BiTt~$VM?7LEOoUSh+c%R+90C2i`s<u_&7iwG
zr~9cyj}L6nZE%H$PQ-1wtYgO600DVEU%%>fOu#TZd36VPDu&=ZB+elr6+3cIMXhJ3
z`yJF%#yEGYNx!<Xduc^?7Y^XkIA-Dvm*VA#tZmY1&t>ap=j;nv@UcGUx-?7@aH?Sl
zV3(-*`!#CaqkOy!uvgw^l=o@nw;hyEMcrd47(QJte6&ohc2Lh4<Jaq)7ANk??sbFi
zfkAi0po1G+Gwk)!zj9o5Hq6TGQ*nE)>oVLvI13pZafu<nsJUfTZZ=XrhWf`cwW*?D
zLD<EIl$*5DQpKc*2u)bkU<se#Bo&fy<3t2nSH4=LE+(4x&&nF6WR6QOz^OQ(_^x;?
zH|g)rX*V#KM`@_TAO*dPc1|cXMFQ)hjj_p7r*5{SRi@s0Gu!-8((uW7CLrnjI2HR!
z?F!)UTKMjfqja0kJ4FA87Xtl5zLpRtc((^ZJ`iN9ymqF#e<T3&)jd6Zyw6-)&4;-`
z{{SDT#ukC<cLj^ZVzadQg8YD~^A-BQkPs9&AI-8=Or@EvwQ+&K$ROW4!26B{``&75
zd)w{q<-NQh@V!-mpitG)dZ4wlv$f@a?A#UGYGfo&n8!P`MpDk+4z^!2%ezpTM2#pZ
zLCoQIY!htJVHLQe;N>?CfrP5u1tN-2Mv*OLEQ${n6^D>ag1(Sno0>!QUn-hvoha8r
zX5=~4{)XCWE?hV(k-Cb~5(Hr@x<2vY=fC~@)5jlv_R(*0S0^ji2Yh9clLFWC`(pva
zE=NG~>+*f!(#Wy+zath)&secB&=#<amCMzv8b$Hjb^&UO0c7zG!2~%=M7sPbz)2Re
z!@iguf1tfKU9#4MOA5egD;Hj>x#yuCc0h_LS8<{#A9hfiGtn3^YRXrDun6eI`0w(c
z$=WVS9B1PVQ(5UTC?@y~xkGDLIoz(r^z)o{aay~u1zO2Xhzx!ph+O^IxZ?~YzN{e~
zuZJR#%wi01^wvARt049>2vNSA5>fP*ugT00@f#z7GL7vH7eI7?hUqOEmQuZ(kUqsG
zdE()Hw?eN}HZ_9{@x+_JEO0w8*5O;hgeCqbSi}RMp8!M1(9ePKMeYX1v^|ge*d7y}
z!~Seb{N|zxgmj=Q1>TlePlCQ0bXwqTxvdA)cl+SAupa9^1uh2OJ_FWA@u7fz3_jv@
zz(+s!=l|e!3*p`&!5!d(Zh_~0I30RMv4$HM80y(-xggg)z`Of6A7qBPri!Uzj`jy>
z#>#rlmRgoGb5<)PUyrr6r-tXPY%OcA>1JwdRtsBKTg!)Sb?&;F$`Ri&-%!X`IneJL
h30EHVjr8_IO7%mgjrw_<yl(f1&&NUfx$_V1e*sAU2;Tqz

literal 0
HcmV?d00001

diff --git a/llvm/test/DebugInfo/X86/distringtype.ll b/llvm/test/DebugInfo/X86/distringtype.ll
index 5d53de29f82d9..5fd8c94f41922 100644
--- a/llvm/test/DebugInfo/X86/distringtype.ll
+++ b/llvm/test/DebugInfo/X86/distringtype.ll
@@ -5,11 +5,16 @@
 ;; generates DW_AT_string_length attribute
 ;; !DIStringType(name: "character(*)", stringLength: !{{[0-9]+}})
 ;; !DIStringType(name: "character(*)", stringLengthExpr: !DIExpression(...))
+;;
+;; !DIStringType has an optional stringLocationExpr field. This
+;; tests also verifies that field gets emitted as DW_AT_data_location
+;; in the DIE.
 
 ; RUN: llc -filetype=obj  %s -o - | llvm-dwarfdump - | FileCheck %s
 ; CHECK:       DW_TAG_string_type
 ; CHECK:                          DW_AT_name  (".str.DEFERRED")
 ; CHECK-NEXT:                     DW_AT_string_length (DW_OP_push_object_address, DW_OP_plus_uconst 0x8)
+; CHECK-NEXT:                     DW_AT_data_location (DW_OP_push_object_address, DW_OP_deref)
 ; CHECK:       DW_TAG_string_type
 ; CHECK:                          DW_AT_name  ("character(*)!2")
 ; CHECK-NEXT:                     DW_AT_string_length
@@ -150,7 +155,7 @@ attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }
 !6 = distinct !DICompileUnit(language: DW_LANG_Fortran95, file: !3, producer: "Intel(R) Fortran 21.0-2142", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !7, globals: !8, splitDebugInlining: false, nameTableKind: None)
 !7 = !{}
 !8 = !{!0}
-!9 = !DIStringType(name: ".str.DEFERRED", stringLengthExpression: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 8))
+!9 = !DIStringType(name: ".str.DEFERRED", stringLengthExpression: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 8), stringLocationExpression: !DIExpression(DW_OP_push_object_address, DW_OP_deref))
 !10 = !{i32 2, !"Debug Info Version", i32 3}
 !11 = !{i32 2, !"Dwarf Version", i32 4}
 !12 = !DILocation(line: 1, column: 9, scope: !2)
diff --git a/llvm/test/DebugInfo/fortran-string-type.ll b/llvm/test/DebugInfo/fortran-string-type.ll
index 937f065d1d6d5..e6fe3c5096d34 100644
--- a/llvm/test/DebugInfo/fortran-string-type.ll
+++ b/llvm/test/DebugInfo/fortran-string-type.ll
@@ -7,7 +7,7 @@
 ; CHECK: !DIStringType(name: "character(*)", stringLength: !{{[0-9]+}}, stringLengthExpression: !DIExpression(), size: 32)
 ; CHECK: !DIStringType(name: "character(10)", size: 80, align: 8)
 ; CHECK: !DIBasicType(tag: DW_TAG_string_type
-; CHECK: !DIStringType(name: ".str.DEFERRED", stringLengthExpression: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 8))
+; CHECK: !DIStringType(name: ".str.DEFERRED", stringLengthExpression: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 8), stringLocationExpression: !DIExpression(DW_OP_push_object_address, DW_OP_deref))
 
 !llvm.module.flags = !{!0, !1}
 !llvm.dbg.cu = !{!2}
@@ -26,4 +26,4 @@
 !11 = !DIBasicType(name: "integer*8", size: 64, align: 64, encoding: DW_ATE_signed)
 !12 = !DIStringType(name: "character(10)", size: 80, align: 8)
 !13 = !DIBasicType(tag: DW_TAG_string_type, name: "character")
-!14 = !DIStringType(name: ".str.DEFERRED", stringLengthExpression: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 8))
+!14 = !DIStringType(name: ".str.DEFERRED", stringLengthExpression: !DIExpression(DW_OP_push_object_address, DW_OP_plus_uconst, 8), stringLocationExpression: !DIExpression(DW_OP_push_object_address, DW_OP_deref))

From cbd0822f2730e2b0d5f1c44b645609fe9554f58e Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Wed, 26 Jan 2022 09:10:50 -0800
Subject: [PATCH 756/946] [lldb] Remove ConstString::StaticMemorySize

Remove ConstString::StaticMemorySize as it is unused and superseded by
GetMemoryStats. It is referenced in a bunch of doc comments but I don't
really understand why. My best guess it that the comments were
copy-pasted from ConstString::MemorySize() even though it didn't make
sense there either. The implementation of StaticMemorySize was being
called on the MemoryPool, not on the ConstString itself.

Differential revision: https://reviews.llvm.org/D118091
---
 lldb/include/lldb/Core/Declaration.h    |  2 --
 lldb/include/lldb/Core/FileSpecList.h   |  2 --
 lldb/include/lldb/Core/Mangled.h        |  2 --
 lldb/include/lldb/Symbol/Function.h     |  6 ------
 lldb/include/lldb/Utility/ConstString.h | 12 ------------
 lldb/include/lldb/Utility/FileSpec.h    |  2 --
 lldb/source/Utility/ConstString.cpp     | 17 -----------------
 7 files changed, 43 deletions(-)

diff --git a/lldb/include/lldb/Core/Declaration.h b/lldb/include/lldb/Core/Declaration.h
index 67ab09f21c8d5..6ae21eb65eb36 100644
--- a/lldb/include/lldb/Core/Declaration.h
+++ b/lldb/include/lldb/Core/Declaration.h
@@ -152,8 +152,6 @@ class Declaration {
   ///     The number of bytes that this object occupies in memory.
   ///     The returned value does not include the bytes for any
   ///     shared string values.
-  ///
-  /// \see ConstString::StaticMemorySize ()
   size_t MemorySize() const;
 
   /// Set accessor for the declaration file specification.
diff --git a/lldb/include/lldb/Core/FileSpecList.h b/lldb/include/lldb/Core/FileSpecList.h
index cab8e9b9b43fc..117c6b691498c 100644
--- a/lldb/include/lldb/Core/FileSpecList.h
+++ b/lldb/include/lldb/Core/FileSpecList.h
@@ -152,8 +152,6 @@ class FileSpecList {
   ///
   /// \return
   ///     The number of bytes that this object occupies in memory.
-  ///
-  /// \see ConstString::StaticMemorySize ()
   size_t MemorySize() const;
 
   bool IsEmpty() const { return m_files.empty(); }
diff --git a/lldb/include/lldb/Core/Mangled.h b/lldb/include/lldb/Core/Mangled.h
index 35705b0319ab4..4e4f75e18e34d 100644
--- a/lldb/include/lldb/Core/Mangled.h
+++ b/lldb/include/lldb/Core/Mangled.h
@@ -183,8 +183,6 @@ class Mangled {
   ///
   /// \return
   ///     The number of bytes that this object occupies in memory.
-  ///
-  /// \see ConstString::StaticMemorySize ()
   size_t MemorySize() const;
 
   /// Set the string value in this object.
diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h
index b703617773ff9..83f9979c3c529 100644
--- a/lldb/include/lldb/Symbol/Function.h
+++ b/lldb/include/lldb/Symbol/Function.h
@@ -110,8 +110,6 @@ class FunctionInfo {
   ///     The number of bytes that this object occupies in memory.
   ///     The returned value does not include the bytes for any
   ///     shared string values.
-  ///
-  /// \see ConstString::StaticMemorySize ()
   virtual size_t MemorySize() const;
 
 protected:
@@ -238,8 +236,6 @@ class InlineFunctionInfo : public FunctionInfo {
   ///     The number of bytes that this object occupies in memory.
   ///     The returned value does not include the bytes for any
   ///     shared string values.
-  ///
-  /// \see ConstString::StaticMemorySize ()
   size_t MemorySize() const override;
 
 private:
@@ -595,8 +591,6 @@ class Function : public UserID, public SymbolContextScope {
   ///     The number of bytes that this object occupies in memory.
   ///     The returned value does not include the bytes for any
   ///     shared string values.
-  ///
-  /// \see ConstString::StaticMemorySize ()
   size_t MemorySize() const;
 
   /// Get whether compiler optimizations were enabled for this function
diff --git a/lldb/include/lldb/Utility/ConstString.h b/lldb/include/lldb/Utility/ConstString.h
index 937f8271a9a8e..31887d7f6115d 100644
--- a/lldb/include/lldb/Utility/ConstString.h
+++ b/lldb/include/lldb/Utility/ConstString.h
@@ -394,20 +394,8 @@ class ConstString {
   ///
   /// \return
   ///     The number of bytes that this object occupies in memory.
-  ///
-  /// \see ConstString::StaticMemorySize ()
   size_t MemorySize() const { return sizeof(ConstString); }
 
-  /// Get the size in bytes of the current global string pool.
-  ///
-  /// Reports the size in bytes of all shared C string values, containers and
-  /// any other values as a byte size for the entire string pool.
-  ///
-  /// \return
-  ///     The number of bytes that the global string pool occupies
-  ///     in memory.
-  static size_t StaticMemorySize();
-
   struct MemoryStats {
     size_t GetBytesTotal() const { return bytes_total; }
     size_t GetBytesUsed() const { return bytes_used; }
diff --git a/lldb/include/lldb/Utility/FileSpec.h b/lldb/include/lldb/Utility/FileSpec.h
index 305cd04f95c01..ec473a2afeec7 100644
--- a/lldb/include/lldb/Utility/FileSpec.h
+++ b/lldb/include/lldb/Utility/FileSpec.h
@@ -334,8 +334,6 @@ class FileSpec {
   ///
   /// \return
   ///     The number of bytes that this object occupies in memory.
-  ///
-  /// \see ConstString::StaticMemorySize ()
   size_t MemorySize() const;
 
   /// Change the file specified with a new path.
diff --git a/lldb/source/Utility/ConstString.cpp b/lldb/source/Utility/ConstString.cpp
index 76270e3f53b96..142c335ddbbe7 100644
--- a/lldb/source/Utility/ConstString.cpp
+++ b/lldb/source/Utility/ConstString.cpp
@@ -159,18 +159,6 @@ class Pool {
     return nullptr;
   }
 
-  // Return the size in bytes that this object and any items in its collection
-  // of uniqued strings + data count values takes in memory.
-  size_t MemorySize() const {
-    size_t mem_size = sizeof(Pool);
-    for (const auto &pool : m_string_pools) {
-      llvm::sys::SmartScopedReader<false> rlock(pool.m_mutex);
-      for (const auto &entry : pool.m_string_map)
-        mem_size += sizeof(StringPoolEntryType) + entry.getKey().size();
-    }
-    return mem_size;
-  }
-
   ConstString::MemoryStats GetMemoryStats() const {
     ConstString::MemoryStats stats;
     for (const auto &pool : m_string_pools) {
@@ -338,11 +326,6 @@ void ConstString::SetTrimmedCStringWithLength(const char *cstr,
   m_string = StringPool().GetConstTrimmedCStringWithLength(cstr, cstr_len);
 }
 
-size_t ConstString::StaticMemorySize() {
-  // Get the size of the static string pool
-  return StringPool().MemorySize();
-}
-
 ConstString::MemoryStats ConstString::GetMemoryStats() {
   return StringPool().GetMemoryStats();
 }

From a2fe81f32c3a2772e49cb5555d5978db4e5812a3 Mon Sep 17 00:00:00 2001
From: Kirill Bobyrev <kbobyrev@google.com>
Date: Wed, 26 Jan 2022 18:24:37 +0100
Subject: [PATCH 757/946] [clang] NFC: Use flush() idiomatically

Using both `raw_ostream::flush()` and `raw_ostream::str()` consecutively is
redundant. The alternatives are:

- Use `raw_ostream::str()` without `raw_ostream::flush()`
- Use `raw_ostream::flush()` and then use the destination for `raw_ostream`
  writer

The latter is more idiomatic, so the fix resolves this particular case in its
favor.

Reviewed By: kadircet

Differential Revision: https://reviews.llvm.org/D118247
---
 clang/lib/AST/DeclPrinter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/AST/DeclPrinter.cpp b/clang/lib/AST/DeclPrinter.cpp
index 715da7e0d90c5..c3f1d1544f79a 100644
--- a/clang/lib/AST/DeclPrinter.cpp
+++ b/clang/lib/AST/DeclPrinter.cpp
@@ -588,7 +588,7 @@ static void printExplicitSpecifier(ExplicitSpecifier ES, llvm::raw_ostream &Out,
   }
   EOut << " ";
   EOut.flush();
-  Out << EOut.str();
+  Out << Proto;
 }
 
 void DeclPrinter::VisitFunctionDecl(FunctionDecl *D) {

From 3595189217e684fcdaa87a78077d97f29549f560 Mon Sep 17 00:00:00 2001
From: Yitzhak Mandelbaum <yitzhakm@google.com>
Date: Tue, 25 Jan 2022 20:36:14 +0000
Subject: [PATCH 758/946] [clang][dataflow] Allow clients to disable built-in
 transfer functions.

These built-in functions build the (sophisticated) model of the code's
memory. This model isn't used by all analyses, so we provide for disabling it to
avoid incurring the costs associated with its construction.

Differential Revision: https://reviews.llvm.org/D118178
---
 .../Analysis/FlowSensitive/DataflowAnalysis.h |  2 ++
 .../TypeErasedDataflowAnalysis.h              | 14 ++++++++
 .../TypeErasedDataflowAnalysis.cpp            |  6 ++--
 .../Analysis/FlowSensitive/NoopAnalysis.h     |  9 ++++--
 .../FlowSensitive/TestingSupportTest.cpp      | 12 +++----
 .../Analysis/FlowSensitive/TransferTest.cpp   | 32 +++++++++++++++++--
 .../TypeErasedDataflowAnalysisTest.cpp        | 25 +++++++++------
 7 files changed, 78 insertions(+), 22 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
index 38a64a277412b..f327abe63751f 100644
--- a/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
+++ b/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
@@ -62,6 +62,8 @@ class DataflowAnalysis : public TypeErasedDataflowAnalysis {
   using Lattice = LatticeT;
 
   explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {}
+  explicit DataflowAnalysis(ASTContext &Context, bool ApplyBuiltinTransfer)
+      : TypeErasedDataflowAnalysis(ApplyBuiltinTransfer), Context(Context) {}
 
   ASTContext &getASTContext() final { return Context; }
 
diff --git a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
index 3c25c8fc2f28f..9f44475b14ba1 100644
--- a/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
+++ b/clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h
@@ -41,7 +41,17 @@ struct TypeErasedLattice {
 
 /// Type-erased base class for dataflow analyses built on a single lattice type.
 class TypeErasedDataflowAnalysis : public Environment::Merger {
+  /// Determines whether to apply the built-in transfer functions.
+  // FIXME: Remove this option once the framework supports composing analyses
+  // (at which point the built-in transfer functions can be simply a standalone
+  // analysis).
+  bool ApplyBuiltinTransfer;
+
 public:
+  TypeErasedDataflowAnalysis() : ApplyBuiltinTransfer(true) {}
+  TypeErasedDataflowAnalysis(bool ApplyBuiltinTransfer)
+      : ApplyBuiltinTransfer(ApplyBuiltinTransfer) {}
+
   virtual ~TypeErasedDataflowAnalysis() {}
 
   /// Returns the `ASTContext` that is used by the analysis.
@@ -66,6 +76,10 @@ class TypeErasedDataflowAnalysis : public Environment::Merger {
   /// type-erased lattice element.
   virtual void transferTypeErased(const Stmt *, TypeErasedLattice &,
                                   Environment &) = 0;
+
+  /// Determines whether to apply the built-in transfer functions, which model
+  /// the heap and stack in the `Environment`.
+  bool applyBuiltinTransfer() const { return ApplyBuiltinTransfer; }
 };
 
 /// Type-erased model of the program at a given program point.
diff --git a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
index 0a5f89ce41ad2..aaf6a834f5b3a 100644
--- a/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
+++ b/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp
@@ -119,7 +119,8 @@ transferCFGStmt(const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis,
   const Stmt *S = CfgStmt.getStmt();
   assert(S != nullptr);
 
-  transfer(*S, State.Env);
+  if (Analysis.applyBuiltinTransfer())
+    transfer(*S, State.Env);
   Analysis.transferTypeErased(S, State.Lattice, State.Env);
 
   if (HandleTransferredStmt != nullptr)
@@ -178,7 +179,8 @@ TypeErasedDataflowAnalysisState transferBlock(
                       HandleTransferredStmt);
       break;
     case CFGElement::Initializer:
-      transferCFGInitializer(*Element.getAs<CFGInitializer>(), State);
+      if (Analysis.applyBuiltinTransfer())
+        transferCFGInitializer(*Element.getAs<CFGInitializer>(), State);
       break;
     default:
       // FIXME: Evaluate other kinds of `CFGElement`.
diff --git a/clang/unittests/Analysis/FlowSensitive/NoopAnalysis.h b/clang/unittests/Analysis/FlowSensitive/NoopAnalysis.h
index fc24a2b71421b..eab5782095bbc 100644
--- a/clang/unittests/Analysis/FlowSensitive/NoopAnalysis.h
+++ b/clang/unittests/Analysis/FlowSensitive/NoopAnalysis.h
@@ -39,8 +39,13 @@ inline std::ostream &operator<<(std::ostream &OS, const NoopLattice &) {
 
 class NoopAnalysis : public DataflowAnalysis<NoopAnalysis, NoopLattice> {
 public:
-  NoopAnalysis(ASTContext &Context)
-      : DataflowAnalysis<NoopAnalysis, NoopLattice>(Context) {}
+  /// `ApplyBuiltinTransfer` controls whether to run the built-in transfer
+  /// functions that model memory during the analysis. Their results are not
+  /// used by `NoopAnalysis`, but tests that need to inspect the environment
+  /// should enable them.
+  NoopAnalysis(ASTContext &Context, bool ApplyBuiltinTransfer)
+      : DataflowAnalysis<NoopAnalysis, NoopLattice>(Context,
+                                                    ApplyBuiltinTransfer) {}
 
   static NoopLattice initialElement() { return {}; }
 
diff --git a/clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp b/clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp
index aa3b6f9dc9663..960879025307f 100644
--- a/clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp
@@ -79,12 +79,12 @@ void checkDataflow(
                        ASTContext &)>
         Expectations) {
   ASSERT_THAT_ERROR(
-      test::checkDataflow<NoopAnalysis>(Code, Target,
-                                        [](ASTContext &Context, Environment &) {
-                                          return NoopAnalysis(Context);
-                                        },
-                                        std::move(Expectations),
-                                        {"-fsyntax-only", "-std=c++17"}),
+      test::checkDataflow<NoopAnalysis>(
+          Code, Target,
+          [](ASTContext &Context, Environment &) {
+            return NoopAnalysis(Context, /*ApplyBuiltinTransfer=*/false);
+          },
+          std::move(Expectations), {"-fsyntax-only", "-std=c++17"}),
       llvm::Succeeded());
 }
 
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index 30238a40fec59..978768333c386 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -40,11 +40,14 @@ class TransferTest : public ::testing::Test {
 protected:
   template <typename Matcher>
   void runDataflow(llvm::StringRef Code, Matcher Match,
-                   LangStandard::Kind Std = LangStandard::lang_cxx17) {
+                   LangStandard::Kind Std = LangStandard::lang_cxx17,
+                   bool ApplyBuiltinTransfer = true) {
     ASSERT_THAT_ERROR(
         test::checkDataflow<NoopAnalysis>(
             Code, "target",
-            [](ASTContext &C, Environment &) { return NoopAnalysis(C); },
+            [ApplyBuiltinTransfer](ASTContext &C, Environment &) {
+              return NoopAnalysis(C, ApplyBuiltinTransfer);
+            },
             [&Match](
                 llvm::ArrayRef<
                     std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
@@ -58,6 +61,31 @@ class TransferTest : public ::testing::Test {
   }
 };
 
+TEST_F(TransferTest, IntVarDeclNotTrackedWhenTransferDisabled) {
+  std::string Code = R"(
+    void target() {
+      int Foo;
+      // [[p]]
+    }
+  )";
+  runDataflow(
+      Code,
+      [](llvm::ArrayRef<
+             std::pair<std::string, DataflowAnalysisState<NoopLattice>>>
+             Results,
+         ASTContext &ASTCtx) {
+        ASSERT_THAT(Results, ElementsAre(Pair("p", _)));
+        const Environment &Env = Results[0].second.Env;
+
+        const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo");
+        ASSERT_THAT(FooDecl, NotNull());
+
+        EXPECT_EQ(Env.getStorageLocation(*FooDecl, SkipPast::None), nullptr);
+      },
+      LangStandard::lang_cxx17,
+      /*ApplyBuiltinTransfer=*/false);
+}
+
 TEST_F(TransferTest, IntVarDecl) {
   std::string Code = R"(
     void target() {
diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
index efb1cb728fa48..ee0bc3ed5e251 100644
--- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
@@ -51,6 +51,8 @@ using ::testing::UnorderedElementsAre;
 template <typename AnalysisT>
 class AnalysisCallback : public ast_matchers::MatchFinder::MatchCallback {
 public:
+  AnalysisCallback(AnalysisT (*MakeAnalysis)(ASTContext &))
+      : MakeAnalysis(MakeAnalysis) {}
   void run(const ast_matchers::MatchFinder::MatchResult &Result) override {
     assert(BlockStates.empty());
 
@@ -63,12 +65,13 @@ class AnalysisCallback : public ast_matchers::MatchFinder::MatchCallback {
     auto CFCtx = llvm::cantFail(
         ControlFlowContext::build(nullptr, Body, Result.Context));
 
-    AnalysisT Analysis(*Result.Context);
+    AnalysisT Analysis = MakeAnalysis(*Result.Context);
     DataflowAnalysisContext DACtx;
     Environment Env(DACtx);
     BlockStates = runDataflowAnalysis(CFCtx, Analysis, Env);
   }
 
+  AnalysisT (*MakeAnalysis)(ASTContext &);
   std::vector<
       llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
       BlockStates;
@@ -76,11 +79,11 @@ class AnalysisCallback : public ast_matchers::MatchFinder::MatchCallback {
 
 template <typename AnalysisT>
 std::vector<llvm::Optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>
-runAnalysis(llvm::StringRef Code) {
+runAnalysis(llvm::StringRef Code, AnalysisT (*MakeAnalysis)(ASTContext &)) {
   std::unique_ptr<ASTUnit> AST =
       tooling::buildASTFromCodeWithArgs(Code, {"-std=c++11"});
 
-  AnalysisCallback<AnalysisT> Callback;
+  AnalysisCallback<AnalysisT> Callback(MakeAnalysis);
   ast_matchers::MatchFinder Finder;
   Finder.addMatcher(
       ast_matchers::functionDecl(ast_matchers::hasName("target")).bind("func"),
@@ -91,9 +94,8 @@ runAnalysis(llvm::StringRef Code) {
 }
 
 TEST(DataflowAnalysisTest, NoopAnalysis) {
-  auto BlockStates = runAnalysis<NoopAnalysis>(R"(
-    void target() {}
-  )");
+  auto BlockStates = runAnalysis<NoopAnalysis>(
+      "void target() {}", [](ASTContext &C) { return NoopAnalysis(C, false); });
   EXPECT_EQ(BlockStates.size(), 2u);
   EXPECT_TRUE(BlockStates[0].hasValue());
   EXPECT_TRUE(BlockStates[1].hasValue());
@@ -118,8 +120,9 @@ class NonConvergingAnalysis
     : public DataflowAnalysis<NonConvergingAnalysis, NonConvergingLattice> {
 public:
   explicit NonConvergingAnalysis(ASTContext &Context)
-      : DataflowAnalysis<NonConvergingAnalysis, NonConvergingLattice>(Context) {
-  }
+      : DataflowAnalysis<NonConvergingAnalysis, NonConvergingLattice>(
+            Context,
+            /*ApplyBuiltinTransfer=*/false) {}
 
   static NonConvergingLattice initialElement() { return {0}; }
 
@@ -129,11 +132,13 @@ class NonConvergingAnalysis
 };
 
 TEST(DataflowAnalysisTest, NonConvergingAnalysis) {
-  auto BlockStates = runAnalysis<NonConvergingAnalysis>(R"(
+  auto BlockStates = runAnalysis<NonConvergingAnalysis>(
+      R"(
     void target() {
       while(true) {}
     }
-  )");
+  )",
+      [](ASTContext &C) { return NonConvergingAnalysis(C); });
   EXPECT_EQ(BlockStates.size(), 4u);
   EXPECT_TRUE(BlockStates[0].hasValue());
   EXPECT_TRUE(BlockStates[1].hasValue());

From 1a6e60d01f758014a66387f104cf8a79a6eb958a Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Wed, 26 Jan 2022 18:28:14 +0100
Subject: [PATCH 759/946] [Bazel] Update config.h to add the new define.

---
 .../llvm-project-overlay/clang/include/clang/Config/config.h   | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
index ba28d8606265f..ccf47729f3adb 100644
--- a/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
+++ b/utils/bazel/llvm-project-overlay/clang/include/clang/Config/config.h
@@ -92,6 +92,9 @@
 /* enable x86 relax relocations by default */
 #define ENABLE_X86_RELAX_RELOCATIONS 1
 
+/* enable IEEE binary128 as default long double format on PowerPC Linux. */
+#define PPC_LINUX_DEFAULT_IEEELONGDOUBLE 0
+
 /* Enable the experimental new pass manager by default */
 #define ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER 0
 

From 0b9ee8ec1675ec52fb87d210a3f69945c33df717 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Wed, 26 Jan 2022 17:09:29 +0000
Subject: [PATCH 760/946] [AMDGPU] SILoadStoreOptimizer: Precommit tests for
 merging across a swizzled access

---
 llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir | 37 ++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
index 5c8fd612574be..38239e0315e1e 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
@@ -779,6 +779,25 @@ body:             |
 ---
 
 
+# GFX9-LABEL: name: gfx9_tbuffer_load_merge_across_swizzle
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 12, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+name: gfx9_tbuffer_load_merge_across_swizzle
+body:             |
+  bb.0.entry:
+    %0:sgpr_32 = COPY $sgpr0
+    %1:sgpr_32 = COPY $sgpr1
+    %2:sgpr_32 = COPY $sgpr2
+    %3:sgpr_32 = COPY $sgpr3
+    %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+    %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+    %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+    %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+...
+---
+
+
 #
 # GFX10 tests
 #
@@ -1557,3 +1576,21 @@ body:             |
 ...
 ---
 
+
+# GFX10-LABEL: name: gfx10_tbuffer_load_merge_across_swizzle
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 12, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+name: gfx10_tbuffer_load_merge_across_swizzle
+body:             |
+  bb.0.entry:
+    %0:sgpr_32 = COPY $sgpr0
+    %1:sgpr_32 = COPY $sgpr1
+    %2:sgpr_32 = COPY $sgpr2
+    %3:sgpr_32 = COPY $sgpr3
+    %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+    %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+    %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+    %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+...
+---

From db0631096e59c71b6f6b034306327b42d9184184 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Wed, 26 Jan 2022 09:49:21 -0800
Subject: [PATCH 761/946] [gn build] Manually port D118110

---
 llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn b/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn
index 3182c4c1c0264..4a5692662684b 100644
--- a/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn
@@ -32,6 +32,7 @@ write_cmake_config("Config") {
     "ENABLE_X86_RELAX_RELOCATIONS=1",
     "CLANG_ENABLE_OBJC_REWRITER=1",  # FIXME: flag?
     "CLANG_SYSTEMZ_DEFAULT_ARCH=z10",
+    "PPC_LINUX_DEFAULT_IEEELONGDOUBLE="
   ]
 
   if (clang_enable_arcmt) {

From 913914f0f83bfc0ac1a04fe15bf98cb8ed5e118e Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 26 Jan 2022 10:23:56 -0800
Subject: [PATCH 762/946] [ELF] Simplify writing the Elf_Chdr header. NFC

And avoiding changing `size` in `writeTo`.
---
 lld/ELF/OutputSections.cpp | 18 +++++++-----------
 lld/ELF/OutputSections.h   |  2 +-
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/lld/ELF/OutputSections.cpp b/lld/ELF/OutputSections.cpp
index c4c11236a336d..c73d6e439238a 100644
--- a/lld/ELF/OutputSections.cpp
+++ b/lld/ELF/OutputSections.cpp
@@ -330,13 +330,6 @@ template <class ELFT> void OutputSection::maybeCompress() {
 
   llvm::TimeTraceScope timeScope("Compress debug sections");
 
-  // Create a section header.
-  zDebugHeader.resize(sizeof(Elf_Chdr));
-  auto *hdr = reinterpret_cast<Elf_Chdr *>(zDebugHeader.data());
-  hdr->ch_type = ELFCOMPRESS_ZLIB;
-  hdr->ch_size = size;
-  hdr->ch_addralign = alignment;
-
   // Write uncompressed data to a temporary zero-initialized buffer.
   auto buf = std::make_unique<uint8_t[]>(size);
   writeTo<ELFT>(buf.get());
@@ -369,6 +362,7 @@ template <class ELFT> void OutputSection::maybeCompress() {
 
   // Update section size and combine Alder-32 checksums.
   uint32_t checksum = 1;       // Initial Adler-32 value
+  compressed.uncompressedSize = size;
   size = sizeof(Elf_Chdr) + 2; // Elf_Chdir and zlib header
   for (size_t i = 0; i != numShards; ++i) {
     size += shardsOut[i].size();
@@ -405,9 +399,11 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *buf) {
   // we've already compressed section contents. If that's the case,
   // just write it down.
   if (compressed.shards) {
-    memcpy(buf, zDebugHeader.data(), zDebugHeader.size());
-    buf += zDebugHeader.size();
-    size -= zDebugHeader.size();
+    auto *chdr = reinterpret_cast<typename ELFT::Chdr *>(buf);
+    chdr->ch_type = ELFCOMPRESS_ZLIB;
+    chdr->ch_size = compressed.uncompressedSize;
+    chdr->ch_addralign = alignment;
+    buf += sizeof(*chdr);
 
     // Compute shard offsets.
     auto offsets = std::make_unique<size_t[]>(compressed.numShards);
@@ -422,7 +418,7 @@ template <class ELFT> void OutputSection::writeTo(uint8_t *buf) {
              compressed.shards[i].size());
     });
 
-    write32be(buf + size - 4, compressed.checksum);
+    write32be(buf + (size - sizeof(*chdr) - 4), compressed.checksum);
     return;
   }
 
diff --git a/lld/ELF/OutputSections.h b/lld/ELF/OutputSections.h
index 957e6768ff6ea..ad656e49ceffa 100644
--- a/lld/ELF/OutputSections.h
+++ b/lld/ELF/OutputSections.h
@@ -29,6 +29,7 @@ struct CompressedData {
   std::unique_ptr<SmallVector<uint8_t, 0>[]> shards;
   uint32_t numShards = 0;
   uint32_t checksum = 0;
+  uint64_t uncompressedSize;
 };
 
 // This represents a section in an output file.
@@ -118,7 +119,6 @@ class OutputSection final : public SectionCommand, public SectionBase {
 
 private:
   // Used for implementation of --compress-debug-sections option.
-  SmallVector<uint8_t, 0> zDebugHeader;
   CompressedData compressed;
 
   std::array<uint8_t, 4> getFiller();

From cbc623c767c0a0363f32bb09fc8c18139649b2c7 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Wed, 26 Jan 2022 09:43:44 -0800
Subject: [PATCH 763/946] [gn build] Make HAVE_MALLINFO2 a gn arg, default to
 false

D117916 broke some people because some distros are still using a glibc
older than 2.33. Add gn arg llvm_have_mallinfo2 and default to false for
now.

Differential Revision: https://reviews.llvm.org/D118269
---
 .../gn/secondary/llvm/include/llvm/Config/BUILD.gn   | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
index 07b6453ea9b8a..1a2efb698a419 100644
--- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
@@ -46,6 +46,10 @@ declare_args() {
 
   # Iterate unordered llvm containers in reverse.
   llvm_enable_reverse_iteration = false
+
+  # glibc is at least 2.33 which has mallinfo2.
+  # TODO: remove this once nobody using the gn build is building against an old glibc.
+  llvm_have_mallinfo2 = false
 }
 
 write_cmake_config("abi-breaking") {
@@ -142,7 +146,6 @@ write_cmake_config("config") {
       "HAVE_LINK_H=1",
       "HAVE_LSEEK64=1",
       "HAVE_MALLINFO=1",
-      "HAVE_MALLINFO2=1",
       "HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=1",
     ]
   } else {
@@ -151,7 +154,6 @@ write_cmake_config("config") {
       "HAVE_LINK_H=",
       "HAVE_LSEEK64=",
       "HAVE_MALLINFO=",
-      "HAVE_MALLINFO2=",
       "HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=",
     ]
   }
@@ -322,6 +324,12 @@ write_cmake_config("config") {
   } else {
     values += [ "LLVM_ENABLE_LIBXML2=" ]
   }
+
+  if (llvm_have_mallinfo2) {
+    values += [ "HAVE_MALLINFO2=1" ]
+  } else {
+    values += [ "HAVE_MALLINFO2=" ]
+  }
 }
 
 write_cmake_config("llvm-config") {

From 092f6ae29276f7b71f24744773fe1fda50d872bb Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Tue, 25 Jan 2022 17:46:29 -0800
Subject: [PATCH 764/946] [test][ASan][Win] Print more info when LoadLibrary
 fails

Reviewed By: hans

Differential Revision: https://reviews.llvm.org/D118206
---
 .../test/asan/TestCases/Windows/dll_host.cpp     | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/test/asan/TestCases/Windows/dll_host.cpp b/compiler-rt/test/asan/TestCases/Windows/dll_host.cpp
index 2a30a15589f5a..8032201684cd2 100644
--- a/compiler-rt/test/asan/TestCases/Windows/dll_host.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/dll_host.cpp
@@ -59,8 +59,20 @@ int main(int argc, char **argv) {
 
   HMODULE h = LoadLibrary(dll_name);
   if (!h) {
-    printf("Could not load DLL: %s (code: %lu)!\n",
-           dll_name, GetLastError());
+    DWORD err = GetLastError();
+    printf("Could not load DLL: %s (code: %lu)!\n", dll_name, err);
+
+    LPSTR buf;
+
+    FormatMessageA(FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM |
+                       FORMAT_MESSAGE_IGNORE_INSERTS,
+                   NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buf, 0,
+                   NULL);
+
+    printf("Error: %s\n", buf);
+
+    LocalFree(buf);
+
     return 102;
   }
 

From b1613f05ae0ce4efc6b6475ea4459957ebcb0150 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Thu, 13 Jan 2022 16:30:29 -0800
Subject: [PATCH 765/946] [NFC] Store Address's alignment into PointerIntPairs

This mitigates the extra memory caused by D115725.

On 32-bit arches where we only have 2 bits per PointerIntPair we fall
back to simply storing alignment separately.

Reviewed By: rnk, nikic

Differential Revision: https://reviews.llvm.org/D117262
---
 clang/lib/CodeGen/Address.h | 83 +++++++++++++++++++++++++++++--------
 1 file changed, 66 insertions(+), 17 deletions(-)

diff --git a/clang/lib/CodeGen/Address.h b/clang/lib/CodeGen/Address.h
index 37c20291c0e80..06b82e404cce4 100644
--- a/clang/lib/CodeGen/Address.h
+++ b/clang/lib/CodeGen/Address.h
@@ -14,30 +14,79 @@
 #ifndef LLVM_CLANG_LIB_CODEGEN_ADDRESS_H
 #define LLVM_CLANG_LIB_CODEGEN_ADDRESS_H
 
-#include "llvm/IR/Constants.h"
 #include "clang/AST/CharUnits.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/MathExtras.h"
 
 namespace clang {
 namespace CodeGen {
 
-/// An aligned address.
-class Address {
+namespace {
+// We try to save some space by using 6 bits over two PointerIntPairs to store
+// the alignment. However, some arches don't support 3 bits in a PointerIntPair
+// so we fallback to storing the alignment separately.
+template <typename T, bool = alignof(llvm::Value *) >= 8> class AddressImpl {};
+
+template <typename T> class AddressImpl<T, false> {
   llvm::Value *Pointer;
   llvm::Type *ElementType;
   CharUnits Alignment;
 
+public:
+  AddressImpl(llvm::Value *Pointer, llvm::Type *ElementType,
+              CharUnits Alignment)
+      : Pointer(Pointer), ElementType(ElementType), Alignment(Alignment) {}
+  llvm::Value *getPointer() const { return Pointer; }
+  llvm::Type *getElementType() const { return ElementType; }
+  CharUnits getAlignment() const { return Alignment; }
+};
+
+template <typename T> class AddressImpl<T, true> {
+  // Int portion stores upper 3 bits of the log of the alignment.
+  llvm::PointerIntPair<llvm::Value *, 3, unsigned> Pointer;
+  // Int portion stores lower 3 bits of the log of the alignment.
+  llvm::PointerIntPair<llvm::Type *, 3, unsigned> ElementType;
+
+public:
+  AddressImpl(llvm::Value *Pointer, llvm::Type *ElementType,
+              CharUnits Alignment)
+      : Pointer(Pointer), ElementType(ElementType) {
+    if (Alignment.isZero())
+      return;
+    // Currently the max supported alignment is much less than 1 << 63 and is
+    // guaranteed to be a power of 2, so we can store the log of the alignment
+    // into 6 bits.
+    assert(Alignment.isPowerOfTwo() && "Alignment cannot be zero");
+    auto AlignLog = llvm::Log2_64(Alignment.getQuantity());
+    assert(AlignLog < (1 << 6) && "cannot fit alignment into 6 bits");
+    this->Pointer.setInt(AlignLog >> 3);
+    this->ElementType.setInt(AlignLog & 7);
+  }
+  llvm::Value *getPointer() const { return Pointer.getPointer(); }
+  llvm::Type *getElementType() const { return ElementType.getPointer(); }
+  CharUnits getAlignment() const {
+    unsigned AlignLog = (Pointer.getInt() << 3) | ElementType.getInt();
+    return CharUnits::fromQuantity(1UL << AlignLog);
+  }
+};
+} // namespace
+
+/// An aligned address.
+class Address {
+  AddressImpl<void> A;
+
 protected:
-  Address(std::nullptr_t) : Pointer(nullptr), ElementType(nullptr) {}
+  Address(std::nullptr_t) : A(nullptr, nullptr, CharUnits::Zero()) {}
 
 public:
-  Address(llvm::Value *pointer, llvm::Type *elementType, CharUnits alignment)
-      : Pointer(pointer), ElementType(elementType), Alignment(alignment) {
-    assert(pointer != nullptr && "Pointer cannot be null");
-    assert(elementType != nullptr && "Element type cannot be null");
-    assert(llvm::cast<llvm::PointerType>(pointer->getType())
-               ->isOpaqueOrPointeeTypeMatches(elementType) &&
+  Address(llvm::Value *Pointer, llvm::Type *ElementType, CharUnits Alignment)
+      : A(Pointer, ElementType, Alignment) {
+    assert(Pointer != nullptr && "Pointer cannot be null");
+    assert(ElementType != nullptr && "Element type cannot be null");
+    assert(llvm::cast<llvm::PointerType>(Pointer->getType())
+               ->isOpaqueOrPointeeTypeMatches(ElementType) &&
            "Incorrect pointer element type");
-    assert(!alignment.isZero() && "Alignment cannot be zero");
   }
 
   // Deprecated: Use constructor with explicit element type instead.
@@ -46,11 +95,11 @@ class Address {
                 Alignment) {}
 
   static Address invalid() { return Address(nullptr); }
-  bool isValid() const { return Pointer != nullptr; }
+  bool isValid() const { return A.getPointer() != nullptr; }
 
   llvm::Value *getPointer() const {
     assert(isValid());
-    return Pointer;
+    return A.getPointer();
   }
 
   /// Return the type of the pointer value.
@@ -61,7 +110,7 @@ class Address {
   /// Return the type of the values stored in this address.
   llvm::Type *getElementType() const {
     assert(isValid());
-    return ElementType;
+    return A.getElementType();
   }
 
   /// Return the address space that this address resides in.
@@ -77,19 +126,19 @@ class Address {
   /// Return the alignment of this pointer.
   CharUnits getAlignment() const {
     assert(isValid());
-    return Alignment;
+    return A.getAlignment();
   }
 
   /// Return address with different pointer, but same element type and
   /// alignment.
   Address withPointer(llvm::Value *NewPointer) const {
-    return Address(NewPointer, ElementType, Alignment);
+    return Address(NewPointer, getElementType(), getAlignment());
   }
 
   /// Return address with different alignment, but same pointer and element
   /// type.
   Address withAlignment(CharUnits NewAlignment) const {
-    return Address(Pointer, ElementType, NewAlignment);
+    return Address(getPointer(), getElementType(), NewAlignment);
   }
 };
 

From e6564f39c787d6aa06ad5573e55db3439ed3a263 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 21 Jan 2022 16:23:09 -0500
Subject: [PATCH 766/946] AMDGPU: Emit user sgpr count directives in text asm

We were emitting these in the object file but not printing them.
---
 llvm/docs/AMDGPUUsage.rst                     |  9 +-
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      | 31 +++++--
 .../MCTargetDesc/AMDGPUTargetStreamer.cpp     |  4 +
 llvm/test/CodeGen/AMDGPU/code-object-v3.ll    |  2 +
 .../AMDGPU/indirect-call-known-callees.ll     |  1 +
 llvm/test/CodeGen/AMDGPU/kernarg-size.ll      |  1 +
 .../CodeGen/AMDGPU/stack-realign-kernel.ll    |  6 ++
 llvm/test/MC/AMDGPU/hsa-gfx10-v3.s            |  1 +
 llvm/test/MC/AMDGPU/hsa-v3.s                  |  5 +-
 llvm/test/MC/AMDGPU/hsa-v4.s                  |  6 +-
 llvm/test/MC/AMDGPU/user-sgpr-count-diag.s    | 17 ++++
 llvm/test/MC/AMDGPU/user-sgpr-count.s         | 87 +++++++++++++++++++
 12 files changed, 155 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
 create mode 100644 llvm/test/MC/AMDGPU/user-sgpr-count.s

diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index f592660f4965e..5f0abbec54fb5 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4114,9 +4114,10 @@ The fields used by CP for code objects before V3 also match those specified in
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC2.SCRATCH_EN``.
      5:1     5 bits  USER_SGPR_COUNT                 The total number of SGPR
-                                                     user data registers
-                                                     requested. This number must
-                                                     match the number of user
+                                                     user data
+                                                     registers requested. This
+                                                     number must be greater than
+                                                     or equal to the number of user
                                                      data registers enabled.
 
                                                      Used by CP to set up
@@ -12107,6 +12108,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
                                                                                                :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
      ``.amdhsa_kernarg_size``                                 0                   GFX6-GFX10   Controls KERNARG_SIZE in
                                                                                                :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
+     ``.amdhsa_user_sgpr_count``                              0                   GFX6-GFX10   Controls USER_SGPR_COUNT in COMPUTE_PGM_RSRC2
+                                                                                               :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`
      ``.amdhsa_user_sgpr_private_segment_buffer``             0                   GFX6-GFX10   Controls ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER in
                                                                                                :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
      ``.amdhsa_user_sgpr_dispatch_ptr``                       0                   GFX6-GFX10   Controls ENABLE_SGPR_DISPATCH_PTR in
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index c71205b17a1a5..f6c54fe3e7bf9 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -4568,7 +4568,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
   uint64_t AccumOffset = 0;
   SMRange SGPRRange;
   uint64_t NextFreeSGPR = 0;
-  unsigned UserSGPRCount = 0;
+
+  // Count the number of user SGPRs implied from the enabled feature bits.
+  unsigned ImpliedUserSGPRCount = 0;
+
+  // Track if the asm explicitly contains the directive for the user SGPR
+  // count.
+  Optional<unsigned> ExplicitUserSGPRCount;
   bool ReserveVCC = true;
   bool ReserveFlatScr = true;
   Optional<bool> EnableWavefrontSize32;
@@ -4617,6 +4623,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
         return OutOfRangeError(ValRange);
       KD.kernarg_size = Val;
+    } else if (ID == ".amdhsa_user_sgpr_count") {
+      ExplicitUserSGPRCount = Val;
     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
       if (hasArchitectedFlatScratch())
         return Error(IDRange.Start,
@@ -4626,31 +4634,31 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
                        Val, ValRange);
       if (Val)
-        UserSGPRCount += 4;
+        ImpliedUserSGPRCount += 4;
     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
                        ValRange);
       if (Val)
-        UserSGPRCount += 2;
+        ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
                        ValRange);
       if (Val)
-        UserSGPRCount += 2;
+        ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
                        Val, ValRange);
       if (Val)
-        UserSGPRCount += 2;
+        ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
                        ValRange);
       if (Val)
-        UserSGPRCount += 2;
+        ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
       if (hasArchitectedFlatScratch())
         return Error(IDRange.Start,
@@ -4660,13 +4668,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
                        ValRange);
       if (Val)
-        UserSGPRCount += 2;
+        ImpliedUserSGPRCount += 2;
     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
       PARSE_BITS_ENTRY(KD.kernel_code_properties,
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
                        Val, ValRange);
       if (Val)
-        UserSGPRCount += 1;
+        ImpliedUserSGPRCount += 1;
     } else if (ID == ".amdhsa_wavefront_size32") {
       if (IVersion.Major < 10)
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
@@ -4850,6 +4858,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
                   SGPRBlocks);
 
+  if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount)
+    return TokError("amdgpu_user_sgpr_count smaller than than implied by "
+                    "enabled user SGPRs");
+
+  unsigned UserSGPRCount =
+      ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount;
+
   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
     return TokError("too many user SGPRs enabled");
   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 9a9a2c973f448..9578bdb0bad07 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -319,6 +319,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
      << KD.private_segment_fixed_size << '\n';
   OS << "\t\t.amdhsa_kernarg_size " << KD.kernarg_size << '\n';
 
+  PRINT_FIELD(OS, ".amdhsa_user_sgpr_count", KD,
+              compute_pgm_rsrc2,
+              amdhsa::COMPUTE_PGM_RSRC2_USER_SGPR_COUNT);
+
   if (!hasArchitectedFlatScratch(STI))
     PRINT_FIELD(
         OS, ".amdhsa_user_sgpr_private_segment_buffer", KD,
diff --git a/llvm/test/CodeGen/AMDGPU/code-object-v3.ll b/llvm/test/CodeGen/AMDGPU/code-object-v3.ll
index 8389bbd1bfb56..2bc34cedd1e03 100644
--- a/llvm/test/CodeGen/AMDGPU/code-object-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/code-object-v3.ll
@@ -12,6 +12,7 @@
 ; OSABI-AMDHSA-ASM: .section .rodata,#alloc
 ; OSABI-AMDHSA-ASM: .p2align 6
 ; OSABI-AMDHSA-ASM: .amdhsa_kernel fadd
+; OSABI-AMDHSA-ASM:     .amdhsa_user_sgpr_count 6
 ; OSABI-AMDHSA-ASM:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; OSABI-AMDHSA-ASM:     .amdhsa_user_sgpr_kernarg_segment_ptr 1
 ; OSABI-AMDHSA-ASM:     .amdhsa_next_free_vgpr 3
@@ -30,6 +31,7 @@
 ; OSABI-AMDHSA-ASM: .section .rodata,#alloc
 ; OSABI-AMDHSA-ASM: .p2align 6
 ; OSABI-AMDHSA-ASM: .amdhsa_kernel fsub
+; OSABI-AMDHSA-ASM:     .amdhsa_user_sgpr_count 6
 ; OSABI-AMDHSA-ASM:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; OSABI-AMDHSA-ASM:     .amdhsa_user_sgpr_kernarg_segment_ptr 1
 ; OSABI-AMDHSA-ASM:     .amdhsa_next_free_vgpr 3
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
index 2afce5352b60e..aebbe33fd236c 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-known-callees.ll
@@ -57,6 +57,7 @@ define amdgpu_kernel void @indirect_call_known_no_special_inputs() {
 ; CHECK-NEXT:    s_endpgm
 
 ; CHECK: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_user_sgpr_count 6
 ; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
 ; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
 ; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
diff --git a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
index ff409b938319b..d9fdfb0d101af 100644
--- a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
@@ -9,6 +9,7 @@ declare void @llvm.debugtrap() #1
 ; HSA-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; HSA-NEXT:     .amdhsa_private_segment_fixed_size 0
 ; HSA-NEXT:     .amdhsa_kernarg_size 8
+; HSA-NEXT:     .amdhsa_user_sgpr_count 8
 ; HSA-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; HSA:      .end_amdhsa_kernel
 
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
index 8e3e7776fb95e..7cc2b8214a36d 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
@@ -18,6 +18,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
 ; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; VI-NEXT:     .amdhsa_private_segment_fixed_size 256
 ; VI-NEXT:     .amdhsa_kernarg_size 0
+; VI-NEXT:     .amdhsa_user_sgpr_count 6
 ; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
 ; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
@@ -65,6 +66,7 @@ define amdgpu_kernel void @max_alignment_128() #0 {
 ; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 256
 ; GFX9-NEXT:     .amdhsa_kernarg_size 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_count 6
 ; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
 ; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
@@ -119,6 +121,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
 ; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; VI-NEXT:     .amdhsa_private_segment_fixed_size 8
 ; VI-NEXT:     .amdhsa_kernarg_size 0
+; VI-NEXT:     .amdhsa_user_sgpr_count 6
 ; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
 ; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
@@ -166,6 +169,7 @@ define amdgpu_kernel void @stackrealign_attr() #1 {
 ; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 8
 ; GFX9-NEXT:     .amdhsa_kernarg_size 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_count 6
 ; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
 ; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
@@ -220,6 +224,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
 ; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; VI-NEXT:     .amdhsa_private_segment_fixed_size 128
 ; VI-NEXT:     .amdhsa_kernarg_size 0
+; VI-NEXT:     .amdhsa_user_sgpr_count 6
 ; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
 ; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
@@ -267,6 +272,7 @@ define amdgpu_kernel void @alignstack_attr() #2 {
 ; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 128
 ; GFX9-NEXT:     .amdhsa_kernarg_size 0
+; GFX9-NEXT:     .amdhsa_user_sgpr_count 6
 ; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
 ; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
 ; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
index b7a7ad824851e..690a86acbccc3 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
@@ -123,6 +123,7 @@ special_sgpr:
 // ASM-NEXT: .amdhsa_group_segment_fixed_size 1
 // ASM-NEXT: .amdhsa_private_segment_fixed_size 1
 // ASM-NEXT: .amdhsa_kernarg_size 8
+// ASM-NEXT: .amdhsa_user_sgpr_count 15
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
 // ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
diff --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s
index 76b82d4ae3321..9f854986d7bc4 100644
--- a/llvm/test/MC/AMDGPU/hsa-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-v3.s
@@ -132,6 +132,7 @@ disabled_user_sgpr:
 // ASM-NEXT: .amdhsa_group_segment_fixed_size 1
 // ASM-NEXT: .amdhsa_private_segment_fixed_size 1
 // ASM-NEXT: .amdhsa_kernarg_size 8
+// ASM-NEXT: .amdhsa_user_sgpr_count 15
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
 // ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
@@ -276,7 +277,7 @@ v_mov_b32_e32 v16, s3
 .end_amdgpu_metadata
 
 // ASM:      	.amdgpu_metadata
-// ASM:      amdhsa.kernels:  
+// ASM:      amdhsa.kernels:
 // ASM:        - .group_segment_fixed_size: 16
 // ASM:          .kernarg_segment_align: 64
 // ASM:          .kernarg_segment_size: 8
@@ -297,7 +298,7 @@ v_mov_b32_e32 v16, s3
 // ASM:          .symbol:         'amd_kernel_code_t_minimal@kd'
 // ASM:          .vgpr_count:     40
 // ASM:          .wavefront_size: 128
-// ASM:      amdhsa.version:  
+// ASM:      amdhsa.version:
 // ASM-NEXT:   - 3
 // ASM-NEXT:   - 0
 // ASM:      	.end_amdgpu_metadata
diff --git a/llvm/test/MC/AMDGPU/hsa-v4.s b/llvm/test/MC/AMDGPU/hsa-v4.s
index 61dbf75c3fa93..6a824b8bcc7b9 100644
--- a/llvm/test/MC/AMDGPU/hsa-v4.s
+++ b/llvm/test/MC/AMDGPU/hsa-v4.s
@@ -94,6 +94,7 @@ disabled_user_sgpr:
   .amdhsa_group_segment_fixed_size 1
   .amdhsa_private_segment_fixed_size 1
   .amdhsa_kernarg_size 8
+  .amdhsa_user_sgpr_count 15
   .amdhsa_user_sgpr_private_segment_buffer 1
   .amdhsa_user_sgpr_dispatch_ptr 1
   .amdhsa_user_sgpr_queue_ptr 1
@@ -132,6 +133,7 @@ disabled_user_sgpr:
 // ASM-NEXT: .amdhsa_group_segment_fixed_size 1
 // ASM-NEXT: .amdhsa_private_segment_fixed_size 1
 // ASM-NEXT: .amdhsa_kernarg_size 8
+// ASM-NEXT: .amdhsa_user_sgpr_count 15
 // ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
 // ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
 // ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
@@ -276,7 +278,7 @@ v_mov_b32_e32 v16, s3
 .end_amdgpu_metadata
 
 // ASM:      	.amdgpu_metadata
-// ASM:      amdhsa.kernels:  
+// ASM:      amdhsa.kernels:
 // ASM:        - .group_segment_fixed_size: 16
 // ASM:          .kernarg_segment_align: 64
 // ASM:          .kernarg_segment_size: 8
@@ -297,7 +299,7 @@ v_mov_b32_e32 v16, s3
 // ASM:          .symbol:         'amd_kernel_code_t_minimal@kd'
 // ASM:          .vgpr_count:     40
 // ASM:          .wavefront_size: 128
-// ASM:      amdhsa.version:  
+// ASM:      amdhsa.version:
 // ASM-NEXT:   - 3
 // ASM-NEXT:   - 0
 // ASM:      	.end_amdgpu_metadata
diff --git a/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
new file mode 100644
index 0000000000000..edaeafae22efd
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
@@ -0,0 +1,17 @@
+// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx810 %s 2>&1 >/dev/null | FileCheck -check-prefix=ERR %s
+
+.amdhsa_kernel implied_count_too_low_0
+  .amdhsa_user_sgpr_count 0
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+// ERR: [[@LINE+1]]:19: error: amdgpu_user_sgpr_count smaller than than implied by enabled user SGPRs
+.end_amdhsa_kernel
+
+.amdhsa_kernel implied_count_too_low_1
+  .amdhsa_user_sgpr_count 1
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+// ERR: [[@LINE+1]]:19: error: amdgpu_user_sgpr_count smaller than than implied by enabled user SGPRs
+.end_amdhsa_kernel
diff --git a/llvm/test/MC/AMDGPU/user-sgpr-count.s b/llvm/test/MC/AMDGPU/user-sgpr-count.s
new file mode 100644
index 0000000000000..ab363f91d334b
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/user-sgpr-count.s
@@ -0,0 +1,87 @@
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
+
+.text
+// ASM: .text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
+
+
+// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count
+// ASM: .amdhsa_user_sgpr_count 15
+.amdhsa_kernel user_sgprs_implied_count_all
+  .amdhsa_user_sgpr_private_segment_buffer 1
+  .amdhsa_user_sgpr_dispatch_ptr 1
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_user_sgpr_kernarg_segment_ptr 1
+  .amdhsa_user_sgpr_dispatch_id 1
+  .amdhsa_user_sgpr_flat_scratch_init 1
+  .amdhsa_user_sgpr_private_segment_size 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count_0
+// ASM: .amdhsa_user_sgpr_count 7
+.amdhsa_kernel user_sgprs_implied_count_0
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_user_sgpr_kernarg_segment_ptr 1
+  .amdhsa_user_sgpr_flat_scratch_init 1
+  .amdhsa_user_sgpr_private_segment_size 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count_1
+// ASM: .amdhsa_user_sgpr_count 9
+.amdhsa_kernel user_sgprs_implied_count_1
+  .amdhsa_user_sgpr_private_segment_buffer 1
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_user_sgpr_kernarg_segment_ptr 1
+  .amdhsa_user_sgpr_private_segment_size 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+
+// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count_private_segment_buffer
+// ASM: .amdhsa_user_sgpr_count 4
+  .amdhsa_kernel user_sgprs_implied_count_private_segment_buffer
+  .amdhsa_user_sgpr_private_segment_buffer 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+
+// ASM-LABEL: .amdhsa_kernel explicit_user_sgpr_count_16
+.amdhsa_kernel explicit_user_sgpr_count_16
+  .amdhsa_user_sgpr_count 16
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+
+// ASM-LABEL: .amdhsa_kernel explicit_user_sgpr_count_0
+// ASM: .amdhsa_user_sgpr_count 0
+  .amdhsa_kernel explicit_user_sgpr_count_0
+  .amdhsa_user_sgpr_count 0
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+// ASM-LABEL: .amdhsa_kernel explicit_user_sgpr_count_1
+// ASM: .amdhsa_user_sgpr_count 1
+.amdhsa_kernel explicit_user_sgpr_count_1
+  .amdhsa_user_sgpr_count 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel
+
+.amdhsa_kernel explicit_user_sgpr_count_larger_than_implied
+  .amdhsa_user_sgpr_count 12
+  .amdhsa_user_sgpr_private_segment_buffer 1
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_user_sgpr_kernarg_segment_ptr 1
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+.end_amdhsa_kernel

From 216002c4bb708e6d6fd1895c8ea636470961f824 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Tue, 25 Jan 2022 13:29:22 -0800
Subject: [PATCH 767/946] Fix UB in DwarfExpression::emitLegacyZExt()

A shift-left > 63 triggers a UBSAN failure. This patch kicks the can
down the road (to the consumer) by emitting a more compact
representation of the shift computation in DWARF expressions.

Differential Revision: https://reviews.llvm.org/D118183
---
 .../CodeGen/AsmPrinter/DwarfExpression.cpp    | 22 ++++++++++++++++---
 llvm/test/DebugInfo/X86/convert-debugloc.ll   |  9 ++++++++
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 37407c98e75f8..ee932d1051079 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -681,9 +681,25 @@ void DwarfExpression::emitLegacySExt(unsigned FromBits) {
 }
 
 void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
-  // (X & (1 << FromBits - 1))
-  emitOp(dwarf::DW_OP_constu);
-  emitUnsigned((1ULL << FromBits) - 1);
+  // Heuristic to decide the most efficient encoding.
+  // A ULEB can encode 7 1-bits per byte.
+  if (FromBits / 7 < 1+1+1+1+1) {
+    // (X & (1 << FromBits - 1))
+    emitOp(dwarf::DW_OP_constu);
+    emitUnsigned((1ULL << FromBits) - 1);
+  } else {
+    // Note that the DWARF 4 stack consists of pointer-sized elements,
+    // so technically it doesn't make sense to shift left more than 64
+    // bits. We leave that for the consumer to decide though. LLDB for
+    // example uses APInt for the stack elements and can still deal
+    // with this.
+    emitOp(dwarf::DW_OP_lit1);
+    emitOp(dwarf::DW_OP_constu);
+    emitUnsigned(FromBits);
+    emitOp(dwarf::DW_OP_shl);
+    emitOp(dwarf::DW_OP_lit1);
+    emitOp(dwarf::DW_OP_minus);
+  }
   emitOp(dwarf::DW_OP_and);
 }
 
diff --git a/llvm/test/DebugInfo/X86/convert-debugloc.ll b/llvm/test/DebugInfo/X86/convert-debugloc.ll
index 21e41dcc4c2a9..2ff6e96621900 100644
--- a/llvm/test/DebugInfo/X86/convert-debugloc.ll
+++ b/llvm/test/DebugInfo/X86/convert-debugloc.ll
@@ -64,11 +64,17 @@
 ; NOCONV:       DW_AT_location (
 ; NOCONV:         {{.*}}, DW_OP_dup, DW_OP_constu 0x7, DW_OP_shr, DW_OP_lit0, DW_OP_not, DW_OP_mul, DW_OP_constu 0x8, DW_OP_shl, DW_OP_or, DW_OP_stack_value)
 ; NOCONV:       DW_AT_name ("y")
+; NOCONV:     DW_TAG_variable
+; NOCONV:       DW_AT_location (
+; NOCONV:         DW_OP_constu 0x40, DW_OP_lit0, DW_OP_plus, DW_OP_lit1, DW_OP_constu 0x40, DW_OP_shl, DW_OP_lit1, DW_OP_minus, DW_OP_and, DW_OP_stack_value)
+; NOCONV:       DW_AT_name ("z")
 ; NOCONV:     NULL
 ; NOCONV:   DW_TAG_base_type
 ; NOCONV:     DW_AT_name ("signed char")
 ; NOCONV:   DW_TAG_base_type
 ; NOCONV:     DW_AT_name ("int")
+; NOCONV:   DW_TAG_base_type
+; NOCONV:     DW_AT_name ("unsigned long long")
 ; NOCONV:   NULL
 
 
@@ -81,6 +87,7 @@ entry:
 ;; will not attempt to eliminate.  At the moment, only "convert" ops are folded.
 ;; If you have to change the expression, the expected DWO_id also changes.
   call void @llvm.dbg.value(metadata i8 32, metadata !13, metadata !DIExpression(DW_OP_lit0, DW_OP_plus, DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value)), !dbg !15
+  call void @llvm.dbg.value(metadata i8 64, metadata !17, metadata !DIExpression(DW_OP_lit0, DW_OP_plus, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 128, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !15
   ret i8 %x, !dbg !16
 }
 
@@ -111,3 +118,5 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
 !14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
 !15 = !DILocation(line: 3, column: 14, scope: !7)
 !16 = !DILocation(line: 4, column: 3, scope: !7)
+!17 = !DILocalVariable(name: "z", scope: !7, file: !1, line: 3, type: !18)
+!18 = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned)

From 4691f00a63750b14970b71bc9dbb9bd04c8335a8 Mon Sep 17 00:00:00 2001
From: jonmeow <46229924+jonmeow@users.noreply.github.com>
Date: Wed, 26 Jan 2022 11:02:20 -0800
Subject: [PATCH 768/946] Initialize terminfo.bzl linkopts to None

The underlying issue here is that line 125 checks if linkopts was assigned (by line 120) but it's not initialized, so that becomes a crash. This was noticed by someone trying to use Docker, so no terminfo library installed.

Reviewed By: GMNGeoffrey

Differential Revision: https://reviews.llvm.org/D118270
---
 utils/bazel/terminfo.bzl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/bazel/terminfo.bzl b/utils/bazel/terminfo.bzl
index 2505f76272398..70281e1119656 100644
--- a/utils/bazel/terminfo.bzl
+++ b/utils/bazel/terminfo.bzl
@@ -113,6 +113,7 @@ def _llvm_terminfo_system_impl(repository_ctx):
         # these would be provided as lists, but Bazel doesn't currently
         # support that. See: https://github.com/bazelbuild/bazel/issues/12178
         linkopts_candidates = [[x] for x in repository_ctx.attr.candidate_system_linkopts]
+        linkopts = None
 
         # For each candidate, try to use it to link our test source file.
         for linkopts_candidate in linkopts_candidates:

From b3d94b199c22fc662a897054b4dcad4e33649592 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Wed, 26 Jan 2022 10:35:53 -0800
Subject: [PATCH 769/946] [RISCV] Remove references to 'B' extension from
 AssemblerPredicate and SubtargetFeature strings.

For Zba/Zbb/Zbc/Zbs I've removed the 'B' completely and used the
extension names as presented at the start of Chapter 1 of the
1.0.0 Bitmanipulation spec.

For the unratified extensions, I've replaced 'B' with 'Zb' and
otherwise left them unchanged.

Reviewed By: asb

Differential Revision: https://reviews.llvm.org/D117822
---
 llvm/lib/Target/RISCV/RISCV.td        | 52 +++++++++++++--------------
 llvm/test/MC/RISCV/rv32i-invalid.s    |  4 +++
 llvm/test/MC/RISCV/rv32zbkc-invalid.s |  2 +-
 llvm/test/MC/RISCV/rv32zbkx-invalid.s |  2 +-
 4 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index aea082d0a0f04..0462a26a0c55e 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -66,82 +66,82 @@ def HasStdExtC : Predicate<"Subtarget->hasStdExtC()">,
 
 def FeatureStdExtZba
     : SubtargetFeature<"zba", "HasStdExtZba", "true",
-                       "'Zba' (Address calculation 'B' Instructions)">;
+                       "'Zba' (Address Generation Instructions)">;
 def HasStdExtZba : Predicate<"Subtarget->hasStdExtZba()">,
                              AssemblerPredicate<(all_of FeatureStdExtZba),
-                             "'Zba' (Address calculation 'B' Instructions)">;
+                             "'Zba' (Address Generation Instructions)">;
 def NotHasStdExtZba : Predicate<"!Subtarget->hasStdExtZba()">;
 
 def FeatureStdExtZbb
     : SubtargetFeature<"zbb", "HasStdExtZbb", "true",
-                       "'Zbb' (Base 'B' Instructions)">;
+                       "'Zbb' (Basic Bit-Manipulation)">;
 def HasStdExtZbb : Predicate<"Subtarget->hasStdExtZbb()">,
                              AssemblerPredicate<(all_of FeatureStdExtZbb),
-                             "'Zbb' (Base 'B' Instructions)">;
+                             "'Zbb' (Basic Bit-Manipulation)">;
 
 def FeatureStdExtZbc
     : SubtargetFeature<"zbc", "HasStdExtZbc", "true",
-                       "'Zbc' (Carry-Less 'B' Instructions)">;
+                       "'Zbc' (Carry-Less Multiplication)">;
 def HasStdExtZbc : Predicate<"Subtarget->hasStdExtZbc()">,
                              AssemblerPredicate<(all_of FeatureStdExtZbc),
-                             "'Zbc' (Carry-Less 'B' Instructions)">;
+                             "'Zbc' (Carry-Less Multiplication)">;
 
 def FeatureStdExtZbe
     : SubtargetFeature<"experimental-zbe", "HasStdExtZbe", "true",
-                       "'Zbe' (Extract-Deposit 'B' Instructions)">;
+                       "'Zbe' (Extract-Deposit 'Zb' Instructions)">;
 def HasStdExtZbe : Predicate<"Subtarget->hasStdExtZbe()">,
                              AssemblerPredicate<(all_of FeatureStdExtZbe),
-                             "'Zbe' (Extract-Deposit 'B' Instructions)">;
+                             "'Zbe' (Extract-Deposit 'Zb' Instructions)">;
 
 def FeatureStdExtZbf
     : SubtargetFeature<"experimental-zbf", "HasStdExtZbf", "true",
-                       "'Zbf' (Bit-Field 'B' Instructions)">;
+                       "'Zbf' (Bit-Field 'Zb' Instructions)">;
 def HasStdExtZbf : Predicate<"Subtarget->hasStdExtZbf()">,
                              AssemblerPredicate<(all_of FeatureStdExtZbf),
-                             "'Zbf' (Bit-Field 'B' Instructions)">;
+                             "'Zbf' (Bit-Field 'Zb' Instructions)">;
 
 def FeatureStdExtZbm
     : SubtargetFeature<"experimental-zbm", "HasStdExtZbm", "true",
-                       "'Zbm' (Matrix 'B' Instructions)">;
+                       "'Zbm' (Matrix 'Zb' Instructions)">;
 def HasStdExtZbm : Predicate<"Subtarget->hasStdExtZbm()">,
                              AssemblerPredicate<(all_of FeatureStdExtZbm),
-                             "'Zbm' (Matrix 'B' Instructions)">;
+                             "'Zbm' (Matrix 'Zb' Instructions)">;
 
 def FeatureStdExtZbp
     : SubtargetFeature<"experimental-zbp", "HasStdExtZbp", "true",
-                       "'Zbp' (Permutation 'B' Instructions)">;
+                       "'Zbp' (Permutation 'Zb' Instructions)">;
 def HasStdExtZbp : Predicate<"Subtarget->hasStdExtZbp()">,
                              AssemblerPredicate<(all_of FeatureStdExtZbp),
-                             "'Zbp' (Permutation 'B' Instructions)">;
+                             "'Zbp' (Permutation 'Zb' Instructions)">;
 
 def FeatureStdExtZbr
     : SubtargetFeature<"experimental-zbr", "HasStdExtZbr", "true",
-                       "'Zbr' (Polynomial Reduction 'B' Instructions)">;
+                       "'Zbr' (Polynomial Reduction 'Zb' Instructions)">;
 def HasStdExtZbr : Predicate<"Subtarget->hasStdExtZbr()">,
                              AssemblerPredicate<(all_of FeatureStdExtZbr),
-                             "'Zbr' (Polynomial Reduction 'B' Instructions)">;
+                             "'Zbr' (Polynomial Reduction 'Zb' Instructions)">;
 
 def FeatureStdExtZbs
     : SubtargetFeature<"zbs", "HasStdExtZbs", "true",
-                       "'Zbs' (Single-Bit 'B' Instructions)">;
+                       "'Zbs' (Single-Bit Instructions)">;
 def HasStdExtZbs : Predicate<"Subtarget->hasStdExtZbs()">,
                              AssemblerPredicate<(all_of FeatureStdExtZbs),
-                             "'Zbs' (Single-Bit 'B' Instructions)">;
+                             "'Zbs' (Single-Bit Instructions)">;
 
 def FeatureStdExtZbt
     : SubtargetFeature<"experimental-zbt", "HasStdExtZbt", "true",
-                       "'Zbt' (Ternary 'B' Instructions)">;
+                       "'Zbt' (Ternary 'Zb' Instructions)">;
 def HasStdExtZbt : Predicate<"Subtarget->hasStdExtZbt()">,
                              AssemblerPredicate<(all_of FeatureStdExtZbt),
-                             "'Zbt' (Ternary 'B' Instructions)">;
+                             "'Zbt' (Ternary 'Zb' Instructions)">;
 
 // Some instructions belong to both the basic and the permutation
 // subextensions. They should be enabled if either has been specified.
 def HasStdExtZbbOrZbp
     : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp()">,
                 AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbp),
-                                   "'Zbb' (Base 'B' Instructions) or "
-                                   "'Zbp' (Permutation 'B' Instructions)">;
+                                   "'Zbb' (Basic Bit-Manipulation) or "
+                                   "'Zbp' (Permutation 'Zb' Instructions)">;
 
 def FeatureStdExtZbkb
     : SubtargetFeature<"zbkb", "HasStdExtZbkb", "true",
@@ -166,14 +166,14 @@ def HasStdExtZbpOrZbkx
 def HasStdExtZbpOrZbkb
     : Predicate<"Subtarget->hasStdExtZbp() || Subtarget->hasStdExtZbkb()">,
                 AssemblerPredicate<(any_of FeatureStdExtZbp, FeatureStdExtZbkb),
-                                   "'Zbp' (Permutation 'B' Instructions) or "
+                                   "'Zbp' (Permutation 'Zb' Instructions) or "
                                    "'Zbkb' (Bitmanip instructions for Cryptography)">;
 
 def HasStdExtZbbOrZbpOrZbkb
     : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp() || Subtarget->hasStdExtZbkb()">,
                 AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbp, FeatureStdExtZbkb),
-                                   "'Zbb' (Base 'B' Instructions) or "
-                                   "'Zbp' (Permutation 'B' Instructions) or "
+                                   "'Zbb' (Basic Bit-Manipulation) or "
+                                   "'Zbp' (Permutation 'Zb' Instructions) or "
                                    "'Zbkb' (Bitmanip instructions for Cryptography)">;
 
 // The Carry-less multiply subextension for cryptography is a subset of basic carry-less multiply subextension. The former should be enabled if the latter is enabled.
@@ -188,7 +188,7 @@ def HasStdExtZbkc
 def HasStdExtZbcOrZbkc
     : Predicate<"Subtarget->hasStdExtZbc() || Subtarget->hasStdExtZbkc()">,
                 AssemblerPredicate<(any_of FeatureStdExtZbc, FeatureStdExtZbkc),
-                                   "'Zbc' (Carry-Less 'B' Instructions) or "
+                                   "'Zbc' (Carry-Less Multiplication) or "
                                    "'Zbkc' (Carry-less multiply instructions for Cryptography)">;
 
 def FeatureStdExtZknd
diff --git a/llvm/test/MC/RISCV/rv32i-invalid.s b/llvm/test/MC/RISCV/rv32i-invalid.s
index 5206259b4f692..ce7581dac5d5d 100644
--- a/llvm/test/MC/RISCV/rv32i-invalid.s
+++ b/llvm/test/MC/RISCV/rv32i-invalid.s
@@ -173,6 +173,10 @@ mul a4, ra, s0 # CHECK: :[[@LINE]]:1: error: instruction requires the following:
 amomaxu.w s5, s4, (s3) # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'A' (Atomic Instructions)
 fadd.s ft0, ft1, ft2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'F' (Single-Precision Floating-Point)
 fadd.h ft0, ft1, ft2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zfh' (Half-Precision Floating-Point)
+sh1add a0, a1, a2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zba' (Address Generation Instructions)
+clz a0, a1 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zbb' (Basic Bit-Manipulation)
+clmul a0, a1, a2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zbc' (Carry-Less Multiplication)
+bset a0, a1, a2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zbs' (Single-Bit Instructions)
 
 # Using floating point registers when integer registers are expected
 addi a2, ft0, 24 # CHECK: :[[@LINE]]:10: error: invalid operand for instruction
diff --git a/llvm/test/MC/RISCV/rv32zbkc-invalid.s b/llvm/test/MC/RISCV/rv32zbkc-invalid.s
index dba625fe20c2a..81eb4e58ae154 100644
--- a/llvm/test/MC/RISCV/rv32zbkc-invalid.s
+++ b/llvm/test/MC/RISCV/rv32zbkc-invalid.s
@@ -6,4 +6,4 @@ clmul t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
 clmulh t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
 
 # Undefined zbc instruction in zbkc
-clmulr t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zbc' (Carry-Less 'B' Instructions)
+clmulr t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zbc' (Carry-Less Multiplication)
diff --git a/llvm/test/MC/RISCV/rv32zbkx-invalid.s b/llvm/test/MC/RISCV/rv32zbkx-invalid.s
index d79a2afb391a2..94af3ac65766a 100644
--- a/llvm/test/MC/RISCV/rv32zbkx-invalid.s
+++ b/llvm/test/MC/RISCV/rv32zbkx-invalid.s
@@ -6,4 +6,4 @@ xperm8 t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
 xperm4 t0, t1 # CHECK: :[[@LINE]]:1: error: too few operands for instruction
 
 # Undefined Zbp instruction in Zbkx
-xperm.h t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zbp' (Permutation 'B' Instructions)
\ No newline at end of file
+xperm.h t0, t1, t2 # CHECK: :[[@LINE]]:1: error: instruction requires the following: 'Zbp' (Permutation 'Zb' Instructions)

From e08f3bfe587d8dc9f177ef67b1201837b3c90523 Mon Sep 17 00:00:00 2001
From: Jon Chesterfield <jonathanchesterfield@gmail.com>
Date: Wed, 26 Jan 2022 19:17:30 +0000
Subject: [PATCH 770/946] [openmp] Disable build of old runtimes by default

The old runtime is not tested by CI. Disable the build prior to the llvm-14 branch.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D118268
---
 openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt | 4 ++--
 openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt  | 2 +-
 openmp/libomptarget/plugins/cuda/CMakeLists.txt      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
index 5ff5dde1c45e7..4060130730248 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/CMakeLists.txt
@@ -10,8 +10,8 @@
 #
 ##===----------------------------------------------------------------------===##
 
-set(LIBOMPTARGET_BUILD_AMDGCN_BCLIB TRUE CACHE BOOL
-  "Can be set to false to disable building this library.")
+set(LIBOMPTARGET_BUILD_AMDGCN_BCLIB FALSE CACHE BOOL
+  "Can be set to true to enable building this library.")
 
 if (NOT LIBOMPTARGET_BUILD_AMDGCN_BCLIB)
   libomptarget_say("Not building AMDGCN device RTL: Disabled by LIBOMPTARGET_BUILD_AMDGCN_BCLIB")
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
index 16126891b6521..bbfa400bb2c8d 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -11,7 +11,7 @@
 ##===----------------------------------------------------------------------===##
 
 # By default we will build NVPTX deviceRTL on a CUDA free system
-set(LIBOMPTARGET_BUILD_NVPTX_BCLIB TRUE CACHE BOOL
+set(LIBOMPTARGET_BUILD_NVPTX_BCLIB FALSE CACHE BOOL
   "Whether build NVPTX deviceRTL on CUDA free system.")
 
 if (NOT (LIBOMPTARGET_DEP_CUDA_FOUND OR LIBOMPTARGET_BUILD_NVPTX_BCLIB))
diff --git a/openmp/libomptarget/plugins/cuda/CMakeLists.txt b/openmp/libomptarget/plugins/cuda/CMakeLists.txt
index b438d69af66cc..02311a2f918cb 100644
--- a/openmp/libomptarget/plugins/cuda/CMakeLists.txt
+++ b/openmp/libomptarget/plugins/cuda/CMakeLists.txt
@@ -72,7 +72,7 @@ target_link_libraries(omptarget.rtl.cuda
 # Otherwise this plugin is being built speculatively and there may be no cuda available
 if (LIBOMPTARGET_CAN_LINK_LIBCUDA OR LIBOMPTARGET_FORCE_DLOPEN_LIBCUDA)
   libomptarget_say("Enable tests using CUDA plugin")
-  set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} nvptx64-nvidia-cuda nvptx64-nvidia-cuda-newRTL" PARENT_SCOPE)
+  set(LIBOMPTARGET_SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS} nvptx64-nvidia-cuda-newRTL" PARENT_SCOPE)
 else()
   libomptarget_say("Disabling tests using CUDA plugin as cuda may not be available")
 endif()

From b1d946cbf780f1769b3a3a39ce68e462a181869e Mon Sep 17 00:00:00 2001
From: Zixu Wang <zixu_wang@apple.com>
Date: Thu, 20 Jan 2022 10:30:56 -0800
Subject: [PATCH 771/946] [clang] Add an extract-api driver option

This is the initial commit for the clang-extract-api RFC
<https://lists.llvm.org/pipermail/cfe-dev/2021-September/068768.html>
Add a new driver option `-extract-api` and associate it with a dummy
(for now) frontend action to set up the initial structure for
incremental works.

Differential Revision: https://reviews.llvm.org/D117809
---
 clang/include/clang/Driver/Options.td         |  2 +
 clang/include/clang/Driver/Types.def          |  1 +
 .../include/clang/Frontend/FrontendActions.h  |  7 ++++
 .../include/clang/Frontend/FrontendOptions.h  |  3 ++
 clang/lib/Driver/Driver.cpp                   |  9 ++++-
 clang/lib/Driver/ToolChains/Clang.cpp         |  3 ++
 clang/lib/Driver/Types.cpp                    |  1 +
 clang/lib/Frontend/CMakeLists.txt             |  1 +
 clang/lib/Frontend/CompilerInvocation.cpp     |  2 +
 clang/lib/Frontend/ExtractAPIConsumer.cpp     | 39 +++++++++++++++++++
 .../ExecuteCompilerInvocation.cpp             |  2 +
 clang/test/Driver/extract-api.c               | 16 ++++++++
 12 files changed, 84 insertions(+), 2 deletions(-)
 create mode 100644 clang/lib/Frontend/ExtractAPIConsumer.cpp
 create mode 100644 clang/test/Driver/extract-api.c

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 49ceebcb51cf5..a57787a4f4c38 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -1080,6 +1080,8 @@ def end_no_unused_arguments : Flag<["--"], "end-no-unused-arguments">, Flags<[Co
   HelpText<"Start emitting warnings for unused driver arguments">;
 def interface_stub_version_EQ : JoinedOrSeparate<["-"], "interface-stub-version=">, Flags<[CC1Option]>;
 def exported__symbols__list : Separate<["-"], "exported_symbols_list">;
+def extract_api : Flag<["-"], "extract-api">, Flags<[CC1Option]>, Group<Action_Group>,
+  HelpText<"Extract API information">;
 def e : JoinedOrSeparate<["-"], "e">, Flags<[LinkerInput]>, Group<Link_Group>;
 def fmax_tokens_EQ : Joined<["-"], "fmax-tokens=">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Max total number of preprocessed tokens for -Wmax-tokens.">,
diff --git a/clang/include/clang/Driver/Types.def b/clang/include/clang/Driver/Types.def
index 997eea445c225..7adf59ca5c992 100644
--- a/clang/include/clang/Driver/Types.def
+++ b/clang/include/clang/Driver/Types.def
@@ -100,4 +100,5 @@ TYPE("dSYM",                     dSYM,         INVALID,         "dSYM",   phases
 TYPE("dependencies",             Dependencies, INVALID,         "d",      phases::Compile, phases::Backend, phases::Assemble, phases::Link)
 TYPE("cuda-fatbin",              CUDA_FATBIN,  INVALID,         "fatbin", phases::Compile, phases::Backend, phases::Assemble, phases::Link)
 TYPE("hip-fatbin",               HIP_FATBIN,   INVALID,         "hipfb",  phases::Compile, phases::Backend, phases::Assemble, phases::Link)
+TYPE("api-information",          API_INFO,     INVALID,         "json",   phases::Compile)
 TYPE("none",                     Nothing,      INVALID,         nullptr,  phases::Compile, phases::Backend, phases::Assemble, phases::Link)
diff --git a/clang/include/clang/Frontend/FrontendActions.h b/clang/include/clang/Frontend/FrontendActions.h
index 8bd6509559f70..8eceb81723d23 100644
--- a/clang/include/clang/Frontend/FrontendActions.h
+++ b/clang/include/clang/Frontend/FrontendActions.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_FRONTEND_FRONTENDACTIONS_H
 
 #include "clang/Frontend/FrontendAction.h"
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -270,6 +271,12 @@ class PrintDependencyDirectivesSourceMinimizerAction : public FrontendAction {
   bool usesPreprocessorOnly() const override { return true; }
 };
 
+class ExtractAPIAction : public ASTFrontendAction {
+protected:
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+                                                 StringRef InFile) override;
+};
+
 //===----------------------------------------------------------------------===//
 // Preprocessor Actions
 //===----------------------------------------------------------------------===//
diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h
index 1d9d89a28c6c4..7ce8076a3ee41 100644
--- a/clang/include/clang/Frontend/FrontendOptions.h
+++ b/clang/include/clang/Frontend/FrontendOptions.h
@@ -75,6 +75,9 @@ enum ActionKind {
   /// Emit a .o file.
   EmitObj,
 
+  // Extract API information
+  ExtractAPI,
+
   /// Parse and apply any fixits to the source.
   FixIt,
 
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index c7314e11c7865..2e4ebc10e9ba3 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -62,6 +62,7 @@
 #include "clang/Driver/SanitizerArgs.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
+#include "clang/Driver/Types.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallSet.h"
@@ -326,7 +327,8 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL,
              (PhaseArg = DAL.getLastArg(options::OPT_rewrite_legacy_objc)) ||
              (PhaseArg = DAL.getLastArg(options::OPT__migrate)) ||
              (PhaseArg = DAL.getLastArg(options::OPT__analyze)) ||
-             (PhaseArg = DAL.getLastArg(options::OPT_emit_ast))) {
+             (PhaseArg = DAL.getLastArg(options::OPT_emit_ast)) ||
+             (PhaseArg = DAL.getLastArg(options::OPT_extract_api))) {
     FinalPhase = phases::Compile;
 
   // -S only runs up to the backend.
@@ -4069,7 +4071,8 @@ Action *Driver::ConstructPhaseAction(
         OutputTy = types::TY_ModuleFile;
     }
 
-    if (Args.hasArg(options::OPT_fsyntax_only)) {
+    if (Args.hasArg(options::OPT_fsyntax_only) ||
+        Args.hasArg(options::OPT_extract_api)) {
       // Syntax checks should not emit a PCH file
       OutputTy = types::TY_Nothing;
     }
@@ -4097,6 +4100,8 @@ Action *Driver::ConstructPhaseAction(
       return C.MakeAction<CompileJobAction>(Input, types::TY_ModuleFile);
     if (Args.hasArg(options::OPT_verify_pch))
       return C.MakeAction<VerifyPCHJobAction>(Input, types::TY_Nothing);
+    if (Args.hasArg(options::OPT_extract_api))
+      return C.MakeAction<CompileJobAction>(Input, types::TY_API_INFO);
     return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC);
   }
   case phases::Backend: {
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 9e10d6d890747..ea83551845070 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -33,6 +33,7 @@
 #include "clang/Driver/InputInfo.h"
 #include "clang/Driver/Options.h"
 #include "clang/Driver/SanitizerArgs.h"
+#include "clang/Driver/Types.h"
 #include "clang/Driver/XRayArgs.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/llvm-config.h"
@@ -4597,6 +4598,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
     } else if (JA.getType() == types::TY_RewrittenLegacyObjC) {
       CmdArgs.push_back("-rewrite-objc");
       rewriteKind = RK_Fragile;
+    } else if (JA.getType() == types::TY_API_INFO) {
+      CmdArgs.push_back("-extract-api");
     } else {
       assert(JA.getType() == types::TY_PP_Asm && "Unexpected output type!");
     }
diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp
index 1bd187ad2fc0a..8f6adc6c2ad1e 100644
--- a/clang/lib/Driver/Types.cpp
+++ b/clang/lib/Driver/Types.cpp
@@ -143,6 +143,7 @@ bool types::isAcceptedByClang(ID Id) {
   case TY_CXXModule: case TY_PP_CXXModule:
   case TY_AST: case TY_ModuleFile: case TY_PCH:
   case TY_LLVM_IR: case TY_LLVM_BC:
+  case TY_API_INFO:
     return true;
   }
 }
diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt
index ca4ad8b2dab2e..e147a1341e064 100644
--- a/clang/lib/Frontend/CMakeLists.txt
+++ b/clang/lib/Frontend/CMakeLists.txt
@@ -20,6 +20,7 @@ add_clang_library(clangFrontend
   DependencyFile.cpp
   DependencyGraph.cpp
   DiagnosticRenderer.cpp
+  ExtractAPIConsumer.cpp
   FrontendAction.cpp
   FrontendActions.cpp
   FrontendOptions.cpp
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index eaca1fbb9def8..7f1ce3da7e7eb 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2408,6 +2408,7 @@ static const auto &getFrontendActionTable() {
       {frontend::EmitCodeGenOnly, OPT_emit_codegen_only},
       {frontend::EmitCodeGenOnly, OPT_emit_codegen_only},
       {frontend::EmitObj, OPT_emit_obj},
+      {frontend::ExtractAPI, OPT_extract_api},
 
       {frontend::FixIt, OPT_fixit_EQ},
       {frontend::FixIt, OPT_fixit},
@@ -4147,6 +4148,7 @@ static bool isStrictlyPreprocessorAction(frontend::ActionKind Action) {
   case frontend::EmitLLVMOnly:
   case frontend::EmitCodeGenOnly:
   case frontend::EmitObj:
+  case frontend::ExtractAPI:
   case frontend::FixIt:
   case frontend::GenerateModule:
   case frontend::GenerateModuleInterface:
diff --git a/clang/lib/Frontend/ExtractAPIConsumer.cpp b/clang/lib/Frontend/ExtractAPIConsumer.cpp
new file mode 100644
index 0000000000000..92a0385c96cc0
--- /dev/null
+++ b/clang/lib/Frontend/ExtractAPIConsumer.cpp
@@ -0,0 +1,39 @@
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/Frontend/ASTConsumers.h"
+#include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/FrontendActions.h"
+
+using namespace clang;
+
+namespace {
+class ExtractAPIVisitor : public RecursiveASTVisitor<ExtractAPIVisitor> {
+public:
+  explicit ExtractAPIVisitor(ASTContext *Context) : Context(Context) {}
+
+  bool VisitNamedDecl(NamedDecl *Decl) {
+    llvm::outs() << Decl->getName() << "\n";
+    return true;
+  }
+
+private:
+  ASTContext *Context;
+};
+
+class ExtractAPIConsumer : public ASTConsumer {
+public:
+  explicit ExtractAPIConsumer(ASTContext *Context) : Visitor(Context) {}
+
+  void HandleTranslationUnit(ASTContext &Context) override {
+    Visitor.TraverseDecl(Context.getTranslationUnitDecl());
+  }
+
+private:
+  ExtractAPIVisitor Visitor;
+};
+} // namespace
+
+std::unique_ptr<ASTConsumer>
+ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
+  return std::make_unique<ExtractAPIConsumer>(&CI.getASTContext());
+}
diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
index 8e18f33af0cbc..8a8a13743762e 100644
--- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
+++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp
@@ -57,6 +57,8 @@ CreateFrontendBaseAction(CompilerInstance &CI) {
   case EmitLLVMOnly:           return std::make_unique<EmitLLVMOnlyAction>();
   case EmitCodeGenOnly:        return std::make_unique<EmitCodeGenOnlyAction>();
   case EmitObj:                return std::make_unique<EmitObjAction>();
+  case ExtractAPI:
+    return std::make_unique<ExtractAPIAction>();
   case FixIt:                  return std::make_unique<FixItAction>();
   case GenerateModule:
     return std::make_unique<GenerateModuleFromModuleMapAction>();
diff --git a/clang/test/Driver/extract-api.c b/clang/test/Driver/extract-api.c
new file mode 100644
index 0000000000000..f58d3a42ee241
--- /dev/null
+++ b/clang/test/Driver/extract-api.c
@@ -0,0 +1,16 @@
+// RUN: %clang -target x86_64-unknown-unknown -ccc-print-phases -extract-api %s 2> %t
+// RUN: echo 'END' >> %t
+// RUN: FileCheck -check-prefix EXTRACT-API-PHASES -input-file %t %s
+
+// EXTRACT-API-PHASES: 0: input,
+// EXTRACT-API-PHASES: , c
+// EXTRACT-API-PHASES: 1: preprocessor, {0}, cpp-output
+// EXTRACT-API-PHASES: 2: compiler, {1}, api-information
+// EXTRACT-API-PHASES-NOT: 3:
+// EXTRACT-API-PHASES: END
+
+// FIXME: Check for the dummy output now to verify that the custom action was executed.
+// RUN: %clang -extract-api %s | FileCheck -check-prefix DUMMY-OUTPUT %s
+
+void dummy_function();
+// DUMMY-OUTPUT: dummy_function

From 6d5239113c1461118055166a8c300c2e8e57791b Mon Sep 17 00:00:00 2001
From: Ellis Hoag <ellis.sparky.hoag@gmail.com>
Date: Tue, 25 Jan 2022 12:35:23 -0800
Subject: [PATCH 772/946] [InstrProf][Correlate] Improve error messages

Improve the error messages when using `llvm-profdata` to correlate profiles with debug info.

Reviewed By: kyulee, phosek

Differential Revision: https://reviews.llvm.org/D118166
---
 llvm/include/llvm/ProfileData/InstrProf.h    |  1 -
 llvm/lib/ProfileData/InstrProf.cpp           |  3 ---
 llvm/lib/ProfileData/InstrProfCorrelator.cpp | 19 +++++++++++++------
 3 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 6c5efb2f6d5de..56198ea978eeb 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -293,7 +293,6 @@ enum class instrprof_error {
   missing_debug_info_for_correlation,
   unexpected_debug_info_for_correlation,
   unable_to_correlate_profile,
-  unsupported_debug_format,
   unknown_function,
   invalid_prof,
   hash_mismatch,
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 34e0c5ebcd584..051655e1fed6b 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -119,9 +119,6 @@ static std::string getInstrProfErrString(instrprof_error Err,
   case instrprof_error::unable_to_correlate_profile:
     OS << "unable to correlate profile";
     break;
-  case instrprof_error::unsupported_debug_format:
-    OS << "unsupported debug info format (only DWARF is supported)";
-    break;
   case instrprof_error::invalid_prof:
     OS << "invalid profile created. Please file a bug "
           "at: " BUG_REPORT_URL
diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
index 4bd9a3df950d3..7a5791eedb610 100644
--- a/llvm/lib/ProfileData/InstrProfCorrelator.cpp
+++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
@@ -23,7 +23,8 @@ Expected<object::SectionRef> getCountersSection(const object::ObjectFile &Obj) {
       if (SectionName.get() == INSTR_PROF_CNTS_SECT_NAME)
         return Section;
   return make_error<InstrProfError>(
-      instrprof_error::unable_to_correlate_profile);
+      instrprof_error::unable_to_correlate_profile,
+      "could not find counter section (" INSTR_PROF_CNTS_SECT_NAME ")");
 }
 
 const char *InstrProfCorrelator::FunctionNameAttributeName = "Function Name";
@@ -54,9 +55,9 @@ InstrProfCorrelator::get(StringRef DebugInfoFilename) {
     // TODO: Enable profile correlation when there are multiple objects in a
     // dSYM bundle.
     if (DsymObjectsOrErr->size() > 1)
-      return createStringError(
-          std::error_code(),
-          "Profile correlation using multiple objects is not yet supported");
+      return make_error<InstrProfError>(
+          instrprof_error::unable_to_correlate_profile,
+          "using multiple objects is not yet supported");
     DebugInfoFilename = *DsymObjectsOrErr->begin();
   }
   auto BufferOrErr =
@@ -84,7 +85,7 @@ InstrProfCorrelator::get(std::unique_ptr<MemoryBuffer> Buffer) {
       return InstrProfCorrelatorImpl<uint32_t>::get(std::move(*CtxOrErr), *Obj);
   }
   return make_error<InstrProfError>(
-      instrprof_error::unable_to_correlate_profile);
+      instrprof_error::unable_to_correlate_profile, "not an object file");
 }
 
 namespace llvm {
@@ -120,13 +121,19 @@ InstrProfCorrelatorImpl<IntPtrT>::get(
     return std::make_unique<DwarfInstrProfCorrelator<IntPtrT>>(std::move(DICtx),
                                                                std::move(Ctx));
   }
-  return make_error<InstrProfError>(instrprof_error::unsupported_debug_format);
+  return make_error<InstrProfError>(
+      instrprof_error::unable_to_correlate_profile,
+      "unsupported debug info format (only DWARF is supported)");
 }
 
 template <class IntPtrT>
 Error InstrProfCorrelatorImpl<IntPtrT>::correlateProfileData() {
   assert(Data.empty() && Names.empty() && NamesVec.empty());
   correlateProfileDataImpl();
+  if (Data.empty() || NamesVec.empty())
+    return make_error<InstrProfError>(
+        instrprof_error::unable_to_correlate_profile,
+        "could not find any profile metadata in debug info");
   auto Result =
       collectPGOFuncNameStrings(NamesVec, /*doCompression=*/false, Names);
   CounterOffsets.clear();

From ff52ef334beb20a90bdb438419000dce4aacba9d Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani <medismail.bennani@gmail.com>
Date: Wed, 26 Jan 2022 20:40:24 +0100
Subject: [PATCH 773/946] [lldb/API] Add ability to check if module is backed
 by a file on disk

This patch introduces a new SBAPI method: `SBModule::IsFileBacked`

As the name suggests, it tells the user if the module's object file is
on disk or in memory.

rdar://68538278

Differential Revision: https://reviews.llvm.org/D118261

Signed-off-by: Med Ismail Bennani <medismail.bennani@gmail.com>
---
 lldb/bindings/interface/SBModule.i            |  7 +++
 lldb/include/lldb/API/SBModule.h              |  2 +
 lldb/source/API/SBModule.cpp                  | 14 +++++
 lldb/test/API/python_api/sbmodule/Makefile    |  3 +
 .../API/python_api/sbmodule/TestSBModule.py   | 58 +++++++++++++++++++
 lldb/test/API/python_api/sbmodule/main.c      |  5 ++
 6 files changed, 89 insertions(+)
 create mode 100644 lldb/test/API/python_api/sbmodule/Makefile
 create mode 100644 lldb/test/API/python_api/sbmodule/TestSBModule.py
 create mode 100644 lldb/test/API/python_api/sbmodule/main.c

diff --git a/lldb/bindings/interface/SBModule.i b/lldb/bindings/interface/SBModule.i
index 606c9a5bbd0cf..bda602d15690e 100644
--- a/lldb/bindings/interface/SBModule.i
+++ b/lldb/bindings/interface/SBModule.i
@@ -137,6 +137,13 @@ public:
     void
     Clear();
 
+    %feature("docstring", "Check if the module is file backed.
+    @return
+        True, if the module is backed by an object file on disk.
+        False, if the module is backed by an object file in memory.") IsFileBacked;
+    bool
+    IsFileBacked() const;
+
     %feature("docstring", "
     Get const accessor for the module file specification.
 
diff --git a/lldb/include/lldb/API/SBModule.h b/lldb/include/lldb/API/SBModule.h
index dd783fe4107db..7200a1ef53fd8 100644
--- a/lldb/include/lldb/API/SBModule.h
+++ b/lldb/include/lldb/API/SBModule.h
@@ -37,6 +37,8 @@ class LLDB_API SBModule {
 
   void Clear();
 
+  bool IsFileBacked() const;
+
   /// Get const accessor for the module file specification.
   ///
   /// This function returns the file for the module on the host system
diff --git a/lldb/source/API/SBModule.cpp b/lldb/source/API/SBModule.cpp
index 2483495b97db0..1454012d3eb9a 100644
--- a/lldb/source/API/SBModule.cpp
+++ b/lldb/source/API/SBModule.cpp
@@ -88,6 +88,20 @@ void SBModule::Clear() {
   m_opaque_sp.reset();
 }
 
+bool SBModule::IsFileBacked() const {
+  LLDB_INSTRUMENT_VA(this);
+
+  ModuleSP module_sp(GetSP());
+  if (!module_sp)
+    return false;
+
+  ObjectFile *obj_file = module_sp->GetObjectFile();
+  if (!obj_file)
+    return false;
+
+  return !obj_file->IsInMemory();
+}
+
 SBFileSpec SBModule::GetFileSpec() const {
   LLDB_INSTRUMENT_VA(this);
 
diff --git a/lldb/test/API/python_api/sbmodule/Makefile b/lldb/test/API/python_api/sbmodule/Makefile
new file mode 100644
index 0000000000000..10495940055b6
--- /dev/null
+++ b/lldb/test/API/python_api/sbmodule/Makefile
@@ -0,0 +1,3 @@
+C_SOURCES := main.c
+
+include Makefile.rules
diff --git a/lldb/test/API/python_api/sbmodule/TestSBModule.py b/lldb/test/API/python_api/sbmodule/TestSBModule.py
new file mode 100644
index 0000000000000..ab6a9a20884a3
--- /dev/null
+++ b/lldb/test/API/python_api/sbmodule/TestSBModule.py
@@ -0,0 +1,58 @@
+"""Test the SBDModule APIs."""
+
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+import os, signal, subprocess
+
+class SBModuleAPICase(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+    def setUp(self):
+        TestBase.setUp(self)
+        self.background_pid = None
+
+    def tearDown(self):
+        TestBase.tearDown(self)
+        if self.background_pid:
+            os.kill(self.background_pid, signal.SIGKILL)
+
+
+    def test_module_is_file_backed(self):
+        """Test the SBModule::IsFileBacked() method"""
+        self.build()
+        target, _, _, _  = lldbutil.run_to_source_breakpoint(self, "// break here",
+                                                    lldb.SBFileSpec("main.c"))
+
+        self.assertGreater(target.GetNumModules(), 0)
+        main_module = target.GetModuleAtIndex(0)
+        self.assertEqual(main_module.GetFileSpec().GetFilename(), "a.out")
+        self.assertTrue(main_module.IsFileBacked(),
+                         "The module should be backed by a file on disk")
+
+        self.dbg.DeleteTarget(target)
+        self.assertEqual(self.dbg.GetNumTargets(), 0)
+
+        exe = self.getBuildArtifact("a.out")
+        background_process = subprocess.Popen([exe])
+        self.assertTrue(background_process, "process is not valid")
+        self.background_pid = background_process.pid
+        os.unlink(exe)
+
+        target = self.dbg.CreateTarget('')
+        self.assertEqual(self.dbg.GetNumTargets(), 1)
+        error = lldb.SBError()
+        process = target.AttachToProcessWithID(self.dbg.GetListener(),
+                                               self.background_pid, error)
+        self.assertTrue(error.Success() and process,  PROCESS_IS_VALID)
+        main_module = target.GetModuleAtIndex(0)
+        self.assertEqual(main_module.GetFileSpec().GetFilename(), "a.out")
+        self.assertFalse(main_module.IsFileBacked(),
+                         "The module should not be backed by a file on disk.")
+
+        error = process.Destroy()
+        self.assertTrue(error.Success(), "couldn't destroy process %s" % background_process.pid)
+
diff --git a/lldb/test/API/python_api/sbmodule/main.c b/lldb/test/API/python_api/sbmodule/main.c
new file mode 100644
index 0000000000000..101d495698f45
--- /dev/null
+++ b/lldb/test/API/python_api/sbmodule/main.c
@@ -0,0 +1,5 @@
+int main() {
+  while (1) // break here
+    ;
+  return 42;
+}

From 6a953d931c4de6f4b5bdc5466d15da62ede00b95 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Wed, 26 Jan 2022 11:41:47 -0800
Subject: [PATCH 774/946] [clang] Fix -Wsubobject-linkage after D117262

/home/buildbot/llvm-avr-linux/llvm-avr-linux/llvm/clang/lib/CodeGen/Address.h:76:7: warning: 'clang::CodeGen::Address' has a field 'clang::CodeGen::Address::A' whose type uses the anonymous namespace [-Wsubobject-linkage]

https://lab.llvm.org/buildbot/#/builders/112/builds/12047
---
 clang/lib/CodeGen/Address.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clang/lib/CodeGen/Address.h b/clang/lib/CodeGen/Address.h
index 06b82e404cce4..6a22e567d399a 100644
--- a/clang/lib/CodeGen/Address.h
+++ b/clang/lib/CodeGen/Address.h
@@ -22,7 +22,6 @@
 namespace clang {
 namespace CodeGen {
 
-namespace {
 // We try to save some space by using 6 bits over two PointerIntPairs to store
 // the alignment. However, some arches don't support 3 bits in a PointerIntPair
 // so we fallback to storing the alignment separately.
@@ -70,7 +69,6 @@ template <typename T> class AddressImpl<T, true> {
     return CharUnits::fromQuantity(1UL << AlignLog);
   }
 };
-} // namespace
 
 /// An aligned address.
 class Address {

From b88a4d72d91cdd5dbee88aa8491e1a87150189d8 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Tue, 25 Jan 2022 12:00:46 -0800
Subject: [PATCH 775/946] [mlir:GPU] Replace reference to LLVMFuncOp with
 FuncOpInterface

The GPU dialect currently contains an explicit reference to  LLVMFuncOp
during verification to handle the situation where the kernel has already been
converted. This commit changes that reference to instead use FunctionOpInterface,
which has two main benefits:

* It allows for removing an otherwise unnecessary dependency on the LLVM dialect
* It removes hardcoded assumptions about the lowering path and use of the GPU dialect

Differential Revision: https://reviews.llvm.org/D118172
---
 mlir/lib/Dialect/GPU/CMakeLists.txt    |  1 -
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 24 +++++++++++++++---------
 mlir/test/Dialect/GPU/invalid.mlir     | 15 +++++++++++++++
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/mlir/lib/Dialect/GPU/CMakeLists.txt b/mlir/lib/Dialect/GPU/CMakeLists.txt
index 6c14a3bcfdc49..69db5ca0e1b0b 100644
--- a/mlir/lib/Dialect/GPU/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/CMakeLists.txt
@@ -38,7 +38,6 @@ add_mlir_dialect_library(MLIRGPUOps
   MLIRMemRef
   MLIRSideEffectInterfaces
   MLIRSupport
-  MLIRLLVMIR
   )
 
 add_mlir_dialect_library(MLIRGPUTransforms
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index d195ba121c81b..b048491f5d1db 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -13,7 +13,6 @@
 #include "mlir/Dialect/GPU/GPUDialect.h"
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
@@ -226,21 +225,28 @@ LogicalResult GPUDialect::verifyOperationAttribute(Operation *op,
 
     // Check that `launch_func` refers to a well-formed kernel function.
     Operation *kernelFunc = module.lookupSymbol(launchOp.kernelAttr());
-    auto kernelGPUFunction = dyn_cast_or_null<gpu::GPUFuncOp>(kernelFunc);
-    auto kernelLLVMFunction = dyn_cast_or_null<LLVM::LLVMFuncOp>(kernelFunc);
-    if (!kernelGPUFunction && !kernelLLVMFunction)
+    if (!kernelFunc)
       return launchOp.emitOpError("kernel function '")
              << launchOp.kernel() << "' is undefined";
+    auto kernelConvertedFunction = dyn_cast<FunctionOpInterface>(kernelFunc);
+    if (!kernelConvertedFunction) {
+      InFlightDiagnostic diag = launchOp.emitOpError()
+                                << "referenced kernel '" << launchOp.kernel()
+                                << "' is not a function";
+      diag.attachNote(kernelFunc->getLoc()) << "see the kernel definition here";
+      return diag;
+    }
+
     if (!kernelFunc->getAttrOfType<mlir::UnitAttr>(
             GPUDialect::getKernelFuncAttrName()))
       return launchOp.emitOpError("kernel function is missing the '")
              << GPUDialect::getKernelFuncAttrName() << "' attribute";
 
-    // TODO: if the kernel function has been converted to
-    // the LLVM dialect but the caller hasn't (which happens during the
-    // separate compilation), do not check type correspondence as it would
-    // require the verifier to be aware of the LLVM type conversion.
-    if (kernelLLVMFunction)
+    // TODO: If the kernel isn't a GPU function (which happens during separate
+    // compilation), do not check type correspondence as it would require the
+    // verifier to be aware of the type conversion.
+    auto kernelGPUFunction = dyn_cast<gpu::GPUFuncOp>(kernelFunc);
+    if (!kernelGPUFunction)
       return success();
 
     unsigned actualNumArguments = launchOp.getNumKernelOperands();
diff --git a/mlir/test/Dialect/GPU/invalid.mlir b/mlir/test/Dialect/GPU/invalid.mlir
index be498e0f8f44e..9ddda4e1dcf37 100644
--- a/mlir/test/Dialect/GPU/invalid.mlir
+++ b/mlir/test/Dialect/GPU/invalid.mlir
@@ -120,6 +120,21 @@ module attributes {gpu.container_module} {
 
 // -----
 
+module attributes {gpu.container_module} {
+  gpu.module @kernels {
+    // expected-note@+1 {{see the kernel definition here}}
+    memref.global "private" @kernel_1 : memref<4xi32>
+  }
+
+  func @launch_func_undefined_function(%sz : index) {
+    // expected-error@+1 {{referenced kernel '@kernels::@kernel_1' is not a function}}
+    gpu.launch_func @kernels::@kernel_1 blocks in (%sz, %sz, %sz) threads in (%sz, %sz, %sz)
+    return
+  }
+}
+
+// -----
+
 module attributes {gpu.container_module} {
   module @kernels {
     gpu.func @kernel_1(%arg1 : !llvm.ptr<f32>) kernel {

From 480cd4cb8560532e544fc0c234749912dde759c6 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Tue, 25 Jan 2022 13:20:01 -0800
Subject: [PATCH 776/946] [mlir] Move the complex support of std.constant to a
 new complex.constant operation

This is part of splitting up the standard dialect.

Differential Revision: https://reviews.llvm.org/D118182
---
 .../include/mlir/Dialect/Complex/IR/Complex.h |  3 +-
 .../mlir/Dialect/Complex/IR/ComplexBase.td    |  2 +-
 .../mlir/Dialect/Complex/IR/ComplexOps.td     | 35 ++++++++++
 mlir/lib/Dialect/Complex/IR/CMakeLists.txt    |  2 +-
 .../lib/Dialect/Complex/IR/ComplexDialect.cpp |  8 +--
 mlir/lib/Dialect/Complex/IR/ComplexOps.cpp    | 64 ++++++++++++++++++-
 mlir/lib/Dialect/StandardOps/IR/Ops.cpp       | 42 ++----------
 mlir/test/Dialect/Complex/canonicalize.mlir   |  4 +-
 mlir/test/Dialect/Complex/invalid.mlir        | 23 +++++++
 mlir/test/Dialect/Complex/ops.mlir            |  7 +-
 mlir/test/Dialect/Standard/invalid.mlir       | 24 -------
 mlir/test/Dialect/Standard/ops.mlir           | 12 ----
 12 files changed, 140 insertions(+), 86 deletions(-)
 create mode 100644 mlir/test/Dialect/Complex/invalid.mlir

diff --git a/mlir/include/mlir/Dialect/Complex/IR/Complex.h b/mlir/include/mlir/Dialect/Complex/IR/Complex.h
index 6f3026a5affb4..2a8a8e7a18332 100644
--- a/mlir/include/mlir/Dialect/Complex/IR/Complex.h
+++ b/mlir/include/mlir/Dialect/Complex/IR/Complex.h
@@ -10,13 +10,12 @@
 #define MLIR_DIALECT_COMPLEX_IR_COMPLEX_H_
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/OpImplementation.h"
+#include "mlir/Interfaces/InferTypeOpInterface.h"
 #include "mlir/Interfaces/SideEffectInterfaces.h"
-#include "mlir/Interfaces/VectorInterfaces.h"
 
 //===----------------------------------------------------------------------===//
 // Complex Dialect
diff --git a/mlir/include/mlir/Dialect/Complex/IR/ComplexBase.td b/mlir/include/mlir/Dialect/Complex/IR/ComplexBase.td
index 4382183254ac4..6cb7d9d92b58c 100644
--- a/mlir/include/mlir/Dialect/Complex/IR/ComplexBase.td
+++ b/mlir/include/mlir/Dialect/Complex/IR/ComplexBase.td
@@ -19,7 +19,7 @@ def Complex_Dialect : Dialect {
     arithmetic ops.
   }];
 
-  let dependentDialects = ["arith::ArithmeticDialect", "StandardOpsDialect"];
+  let dependentDialects = ["arith::ArithmeticDialect"];
   let hasConstantMaterializer = 1;
   let emitAccessorPrefix = kEmitAccessorPrefix_Prefixed;
 }
diff --git a/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td b/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td
index 02b44ff16f561..a79d7ac8a2157 100644
--- a/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td
+++ b/mlir/include/mlir/Dialect/Complex/IR/ComplexOps.td
@@ -10,6 +10,7 @@
 #define COMPLEX_OPS
 
 include "mlir/Dialect/Complex/IR/ComplexBase.td"
+include "mlir/IR/OpAsmInterface.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 
@@ -76,6 +77,40 @@ def AddOp : ComplexArithmeticOp<"add"> {
   }];
 }
 
+//===----------------------------------------------------------------------===//
+// ConstantOp
+//===----------------------------------------------------------------------===//
+
+def ConstantOp : Complex_Op<"constant", [
+    ConstantLike, NoSideEffect,
+    DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>
+  ]> {
+  let summary = "complex number constant operation";
+  let description = [{
+    The `complex.constant` operation creates a constant complex number from an
+    attribute containing the real and imaginary parts.
+
+    Example:
+
+    ```mlir
+    %a = complex.constant [0.1, -1.0] : complex<f64>
+    ```
+  }];
+
+  let arguments = (ins ArrayAttr:$value);
+  let results = (outs Complex<AnyFloat>:$complex);
+
+  let assemblyFormat = "$value attr-dict `:` type($complex)";
+  let hasFolder = 1;
+  let verifier = [{ return ::verify(*this); }];
+  
+  let extraClassDeclaration = [{
+    /// Returns true if a constant operation can be built with the given value
+    /// and result type.
+    static bool isBuildableWith(Attribute value, Type type);
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // CreateOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Complex/IR/CMakeLists.txt b/mlir/lib/Dialect/Complex/IR/CMakeLists.txt
index fdb7e748a01b1..da419d9b25994 100644
--- a/mlir/lib/Dialect/Complex/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Complex/IR/CMakeLists.txt
@@ -11,6 +11,6 @@ add_mlir_dialect_library(MLIRComplex
   LINK_LIBS PUBLIC
   MLIRArithmetic
   MLIRDialect
+  MLIRInferTypeOpInterface
   MLIRIR
-  MLIRStandard
   )
diff --git a/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp b/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp
index a5aa9799f5c16..f189c2b2666d0 100644
--- a/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp
+++ b/mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp
@@ -8,7 +8,6 @@
 
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
-#include "mlir/Dialect/StandardOps/IR/Ops.h"
 
 using namespace mlir;
 
@@ -25,9 +24,10 @@ Operation *complex::ComplexDialect::materializeConstant(OpBuilder &builder,
                                                         Attribute value,
                                                         Type type,
                                                         Location loc) {
-  // TODO complex.constant
-  if (type.isa<ComplexType>())
-    return builder.create<ConstantOp>(loc, type, value);
+  if (complex::ConstantOp::isBuildableWith(value, type)) {
+    return builder.create<complex::ConstantOp>(loc, type,
+                                               value.cast<ArrayAttr>());
+  }
   if (arith::ConstantOp::isBuildableWith(value, type))
     return builder.create<arith::ConstantOp>(loc, type, value);
   return nullptr;
diff --git a/mlir/lib/Dialect/Complex/IR/ComplexOps.cpp b/mlir/lib/Dialect/Complex/IR/ComplexOps.cpp
index 58412d37605b0..36745c5264b86 100644
--- a/mlir/lib/Dialect/Complex/IR/ComplexOps.cpp
+++ b/mlir/lib/Dialect/Complex/IR/ComplexOps.cpp
@@ -13,11 +13,54 @@ using namespace mlir;
 using namespace mlir::complex;
 
 //===----------------------------------------------------------------------===//
-// TableGen'd op method definitions
+// ConstantOp
 //===----------------------------------------------------------------------===//
 
-#define GET_OP_CLASSES
-#include "mlir/Dialect/Complex/IR/ComplexOps.cpp.inc"
+OpFoldResult ConstantOp::fold(ArrayRef<Attribute> operands) {
+  assert(operands.empty() && "constant has no operands");
+  return getValue();
+}
+
+void ConstantOp::getAsmResultNames(
+    function_ref<void(Value, StringRef)> setNameFn) {
+  setNameFn(getResult(), "cst");
+}
+
+bool ConstantOp::isBuildableWith(Attribute value, Type type) {
+  if (auto arrAttr = value.dyn_cast<ArrayAttr>()) {
+    auto complexTy = type.dyn_cast<ComplexType>();
+    if (!complexTy)
+      return false;
+    auto complexEltTy = complexTy.getElementType();
+    return arrAttr.size() == 2 && arrAttr[0].getType() == complexEltTy &&
+           arrAttr[1].getType() == complexEltTy;
+  }
+  return false;
+}
+
+static LogicalResult verify(ConstantOp op) {
+  ArrayAttr arrayAttr = op.getValue();
+  if (arrayAttr.size() != 2) {
+    return op.emitOpError(
+        "requires 'value' to be a complex constant, represented as array of "
+        "two values");
+  }
+
+  auto complexEltTy = op.getType().getElementType();
+  if (complexEltTy != arrayAttr[0].getType() ||
+      complexEltTy != arrayAttr[1].getType()) {
+    return op.emitOpError()
+           << "requires attribute's element types (" << arrayAttr[0].getType()
+           << ", " << arrayAttr[1].getType()
+           << ") to match the element type of the op's return type ("
+           << complexEltTy << ")";
+  }
+  return success();
+}
+
+//===----------------------------------------------------------------------===//
+// CreateOp
+//===----------------------------------------------------------------------===//
 
 OpFoldResult CreateOp::fold(ArrayRef<Attribute> operands) {
   assert(operands.size() == 2 && "binary op takes two operands");
@@ -32,6 +75,10 @@ OpFoldResult CreateOp::fold(ArrayRef<Attribute> operands) {
   return {};
 }
 
+//===----------------------------------------------------------------------===//
+// ImOp
+//===----------------------------------------------------------------------===//
+
 OpFoldResult ImOp::fold(ArrayRef<Attribute> operands) {
   assert(operands.size() == 1 && "unary op takes 1 operand");
   ArrayAttr arrayAttr = operands[0].dyn_cast_or_null<ArrayAttr>();
@@ -42,6 +89,10 @@ OpFoldResult ImOp::fold(ArrayRef<Attribute> operands) {
   return {};
 }
 
+//===----------------------------------------------------------------------===//
+// ReOp
+//===----------------------------------------------------------------------===//
+
 OpFoldResult ReOp::fold(ArrayRef<Attribute> operands) {
   assert(operands.size() == 1 && "unary op takes 1 operand");
   ArrayAttr arrayAttr = operands[0].dyn_cast_or_null<ArrayAttr>();
@@ -51,3 +102,10 @@ OpFoldResult ReOp::fold(ArrayRef<Attribute> operands) {
     return createOp.getOperand(0);
   return {};
 }
+
+//===----------------------------------------------------------------------===//
+// TableGen'd op method definitions
+//===----------------------------------------------------------------------===//
+
+#define GET_OP_CLASSES
+#include "mlir/Dialect/Complex/IR/ComplexOps.cpp.inc"
diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
index 1110879395259..7487aea0902de 100644
--- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
+++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
@@ -669,8 +669,8 @@ static void print(OpAsmPrinter &p, ConstantOp &op) {
     p << ' ';
   p << op.getValue();
 
-  // If the value is a symbol reference or Array, print a trailing type.
-  if (op.getValue().isa<SymbolRefAttr, ArrayAttr>())
+  // If the value is a symbol reference, print a trailing type.
+  if (op.getValue().isa<SymbolRefAttr>())
     p << " : " << op.getType();
 }
 
@@ -681,10 +681,9 @@ static ParseResult parseConstantOp(OpAsmParser &parser,
       parser.parseAttribute(valueAttr, "value", result.attributes))
     return failure();
 
-  // If the attribute is a symbol reference or array, then we expect a trailing
-  // type.
+  // If the attribute is a symbol reference, then we expect a trailing type.
   Type type;
-  if (!valueAttr.isa<SymbolRefAttr, ArrayAttr>())
+  if (!valueAttr.isa<SymbolRefAttr>())
     type = valueAttr.getType();
   else if (parser.parseColonType(type))
     return failure();
@@ -705,24 +704,6 @@ static LogicalResult verify(ConstantOp &op) {
     return op.emitOpError() << "requires attribute's type (" << value.getType()
                             << ") to match op's return type (" << type << ")";
 
-  if (auto complexTy = type.dyn_cast<ComplexType>()) {
-    auto arrayAttr = value.dyn_cast<ArrayAttr>();
-    if (!complexTy || arrayAttr.size() != 2)
-      return op.emitOpError(
-          "requires 'value' to be a complex constant, represented as array of "
-          "two values");
-    auto complexEltTy = complexTy.getElementType();
-    if (complexEltTy != arrayAttr[0].getType() ||
-        complexEltTy != arrayAttr[1].getType()) {
-      return op.emitOpError()
-             << "requires attribute's element types (" << arrayAttr[0].getType()
-             << ", " << arrayAttr[1].getType()
-             << ") to match the element type of the op's return type ("
-             << complexEltTy << ")";
-    }
-    return success();
-  }
-
   if (type.isa<FunctionType>()) {
     auto fnAttr = value.dyn_cast<FlatSymbolRefAttr>();
     if (!fnAttr)
@@ -769,19 +750,8 @@ bool ConstantOp::isBuildableWith(Attribute value, Type type) {
   // SymbolRefAttr can only be used with a function type.
   if (value.isa<SymbolRefAttr>())
     return type.isa<FunctionType>();
-  // The attribute must have the same type as 'type'.
-  if (!value.getType().isa<NoneType>() && value.getType() != type)
-    return false;
-  // Finally, check that the attribute kind is handled.
-  if (auto arrAttr = value.dyn_cast<ArrayAttr>()) {
-    auto complexTy = type.dyn_cast<ComplexType>();
-    if (!complexTy)
-      return false;
-    auto complexEltTy = complexTy.getElementType();
-    return arrAttr.size() == 2 && arrAttr[0].getType() == complexEltTy &&
-           arrAttr[1].getType() == complexEltTy;
-  }
-  return value.isa<UnitAttr>();
+  // Otherwise, this must be a UnitAttr.
+  return value.isa<UnitAttr>() && type.isa<NoneType>();
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/Complex/canonicalize.mlir b/mlir/test/Dialect/Complex/canonicalize.mlir
index 038de9908cf2a..c68d87e8c0773 100644
--- a/mlir/test/Dialect/Complex/canonicalize.mlir
+++ b/mlir/test/Dialect/Complex/canonicalize.mlir
@@ -27,7 +27,7 @@ func @create_of_real_and_imag_different_operand(
 func @real_of_const() -> f32 {
   // CHECK: %[[CST:.*]] = arith.constant 1.000000e+00 : f32
   // CHECK-NEXT: return %[[CST]] : f32
-  %complex = constant [1.0 : f32, 0.0 : f32] : complex<f32>
+  %complex = complex.constant [1.0 : f32, 0.0 : f32] : complex<f32>
   %1 = complex.re %complex : complex<f32>
   return %1 : f32
 }
@@ -47,7 +47,7 @@ func @real_of_create_op() -> f32 {
 func @imag_of_const() -> f32 {
   // CHECK: %[[CST:.*]] = arith.constant 0.000000e+00 : f32
   // CHECK-NEXT: return %[[CST]] : f32
-  %complex = constant [1.0 : f32, 0.0 : f32] : complex<f32>
+  %complex = complex.constant [1.0 : f32, 0.0 : f32] : complex<f32>
   %1 = complex.im %complex : complex<f32>
   return %1 : f32
 }
diff --git a/mlir/test/Dialect/Complex/invalid.mlir b/mlir/test/Dialect/Complex/invalid.mlir
new file mode 100644
index 0000000000000..ec046effacf8c
--- /dev/null
+++ b/mlir/test/Dialect/Complex/invalid.mlir
@@ -0,0 +1,23 @@
+// RUN: mlir-opt -split-input-file %s -verify-diagnostics
+
+func @complex_constant_wrong_array_attribute_length() {
+  // expected-error @+1 {{requires 'value' to be a complex constant, represented as array of two values}}
+  %0 = complex.constant [1.0 : f32] : complex<f32>
+  return
+}
+
+// -----
+
+func @complex_constant_wrong_element_types() {
+  // expected-error @+1 {{requires attribute's element types ('f32', 'f32') to match the element type of the op's return type ('f64')}}
+  %0 = complex.constant [1.0 : f32, -1.0 : f32] : complex<f64>
+  return
+}
+
+// -----
+
+func @complex_constant_two_different_element_types() {
+  // expected-error @+1 {{requires attribute's element types ('f32', 'f64') to match the element type of the op's return type ('f64')}}
+  %0 = complex.constant [1.0 : f32, -1.0 : f64] : complex<f64>
+  return
+}
diff --git a/mlir/test/Dialect/Complex/ops.mlir b/mlir/test/Dialect/Complex/ops.mlir
index 3fc0e9299c0fd..75bb082efb2ab 100644
--- a/mlir/test/Dialect/Complex/ops.mlir
+++ b/mlir/test/Dialect/Complex/ops.mlir
@@ -5,6 +5,12 @@
 // CHECK-LABEL: func @ops(
 // CHECK-SAME:            %[[F:.*]]: f32) {
 func @ops(%f: f32) {
+  // CHECK: complex.constant [1.{{.*}}, -1.{{.*}}] : complex<f64>
+  %cst_f64 = complex.constant [0.1, -1.0] : complex<f64>
+
+  // CHECK: complex.constant [1.{{.*}} : f32, -1.{{.*}} : f32] : complex<f32>
+  %cst_f32 = complex.constant [0.1 : f32, -1.0 : f32] : complex<f32>
+
   // CHECK: %[[C:.*]] = complex.create %[[F]], %[[F]] : complex<f32>
   %complex = complex.create %f, %f : complex<f32>
 
@@ -51,4 +57,3 @@ func @ops(%f: f32) {
   %diff = complex.sub %complex, %complex : complex<f32>
   return
 }
-
diff --git a/mlir/test/Dialect/Standard/invalid.mlir b/mlir/test/Dialect/Standard/invalid.mlir
index defd1aed97b1a..836158dd2160b 100644
--- a/mlir/test/Dialect/Standard/invalid.mlir
+++ b/mlir/test/Dialect/Standard/invalid.mlir
@@ -8,30 +8,6 @@ func @unsupported_attribute() {
 
 // -----
 
-func @complex_constant_wrong_array_attribute_length() {
-  // expected-error @+1 {{requires 'value' to be a complex constant, represented as array of two values}}
-  %0 = constant [1.0 : f32] : complex<f32>
-  return
-}
-
-// -----
-
-func @complex_constant_wrong_element_types() {
-  // expected-error @+1 {{requires attribute's element types ('f32', 'f32') to match the element type of the op's return type ('f64')}}
-  %0 = constant [1.0 : f32, -1.0 : f32] : complex<f64>
-  return
-}
-
-// -----
-
-func @complex_constant_two_different_element_types() {
-  // expected-error @+1 {{requires attribute's element types ('f32', 'f64') to match the element type of the op's return type ('f64')}}
-  %0 = constant [1.0 : f32, -1.0 : f64] : complex<f64>
-  return
-}
-
-// -----
-
 func @return_i32_f32() -> (i32, f32) {
   %0 = arith.constant 1 : i32
   %1 = arith.constant 1. : f32
diff --git a/mlir/test/Dialect/Standard/ops.mlir b/mlir/test/Dialect/Standard/ops.mlir
index 64322f066b354..1c3570eedb7ea 100644
--- a/mlir/test/Dialect/Standard/ops.mlir
+++ b/mlir/test/Dialect/Standard/ops.mlir
@@ -51,18 +51,6 @@ func @switch_i64(%flag : i64, %caseOperand : i32) {
     return
 }
 
-// CHECK-LABEL: func @constant_complex_f32(
-func @constant_complex_f32() -> complex<f32> {
-  %result = constant [0.1 : f32, -1.0 : f32] : complex<f32>
-  return %result : complex<f32>
-}
-
-// CHECK-LABEL: func @constant_complex_f64(
-func @constant_complex_f64() -> complex<f64> {
-  %result = constant [0.1 : f64, -1.0 : f64] : complex<f64>
-  return %result : complex<f64>
-}
-
 // CHECK-LABEL: func @vector_splat_0d(
 func @vector_splat_0d(%a: f32) -> vector<f32> {
   // CHECK: splat %{{.*}} : vector<f32>

From 632a4f8829425325c9d5b110a78bfc6e0bb855e9 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Tue, 25 Jan 2022 18:41:02 -0800
Subject: [PATCH 777/946] [mlir] Move std.generic_atomic_rmw to the memref
 dialect

This is part of splitting up the standard dialect. The move makes sense anyways,
given that the memref dialect already holds memref.atomic_rmw which is the non-region
sibling operation of std.generic_atomic_rmw (the relationship is even more clear given
they have nearly the same description % how they represent the inner computation).

Differential Revision: https://reviews.llvm.org/D118209
---
 .../mlir/Dialect/MemRef/IR/MemRefOps.td       |  79 +++++++++-
 .../mlir/Dialect/StandardOps/IR/Ops.td        |  70 --------
 .../Conversion/MemRefToLLVM/MemRefToLLVM.cpp  | 149 ++++++++++++++++--
 .../StandardToLLVM/StandardToLLVM.cpp         | 135 ----------------
 mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp      |  83 ++++++++++
 mlir/lib/Dialect/StandardOps/IR/Ops.cpp       |  82 ----------
 .../StandardOps/Transforms/ExpandOps.cpp      |  12 +-
 .../MemRefToLLVM/memref-to-llvm.mlir          |  19 +++
 .../StandardToLLVM/standard-to-llvm.mlir      |  27 ----
 mlir/test/Dialect/MemRef/invalid.mlir         |  60 +++++++
 mlir/test/Dialect/MemRef/ops.mlir             |  14 ++
 mlir/test/Dialect/Standard/expand-ops.mlir    |   4 +-
 mlir/test/IR/core-ops.mlir                    |  14 --
 mlir/test/IR/invalid-ops.mlir                 |  60 -------
 14 files changed, 395 insertions(+), 413 deletions(-)

diff --git a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
index 49f716dbf9b23..b4a7002726ce6 100644
--- a/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
+++ b/mlir/include/mlir/Dialect/MemRef/IR/MemRefOps.td
@@ -698,6 +698,81 @@ def MemRef_DmaWaitOp : MemRef_Op<"dma_wait"> {
   let hasFolder = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// GenericAtomicRMWOp
+//===----------------------------------------------------------------------===//
+
+def GenericAtomicRMWOp : MemRef_Op<"generic_atomic_rmw", [
+      SingleBlockImplicitTerminator<"AtomicYieldOp">,
+      TypesMatchWith<"result type matches element type of memref",
+                     "memref", "result",
+                     "$_self.cast<MemRefType>().getElementType()">
+    ]> {
+  let summary = "atomic read-modify-write operation with a region";
+  let description = [{
+    The `memref.generic_atomic_rmw` operation provides a way to perform a
+    read-modify-write sequence that is free from data races. The memref operand
+    represents the buffer that the read and write will be performed against, as
+    accessed by the specified indices. The arity of the indices is the rank of
+    the memref. The result represents the latest value that was stored. The
+    region contains the code for the modification itself. The entry block has
+    a single argument that represents the value stored in `memref[indices]`
+    before the write is performed. No side-effecting ops are allowed in the
+    body of `GenericAtomicRMWOp`.
+
+    Example:
+
+    ```mlir
+    %x = memref.generic_atomic_rmw %I[%i] : memref<10xf32> {
+      ^bb0(%current_value : f32):
+        %c1 = arith.constant 1.0 : f32
+        %inc = arith.addf %c1, %current_value : f32
+        memref.atomic_yield %inc : f32
+    }
+    ```
+  }];
+
+  let arguments = (ins
+      MemRefOf<[AnySignlessInteger, AnyFloat]>:$memref,
+      Variadic<Index>:$indices);
+
+  let results = (outs
+      AnyTypeOf<[AnySignlessInteger, AnyFloat]>:$result);
+
+  let regions = (region AnyRegion:$atomic_body);
+
+  let skipDefaultBuilders = 1;
+  let builders = [OpBuilder<(ins "Value":$memref, "ValueRange":$ivs)>];
+
+  let extraClassDeclaration = [{
+    // TODO: remove post migrating callers.
+    Region &body() { return getRegion(); }
+
+    // The value stored in memref[ivs].
+    Value getCurrentValue() {
+      return getRegion().getArgument(0);
+    }
+    MemRefType getMemRefType() {
+      return memref().getType().cast<MemRefType>();
+    }
+  }];
+}
+
+def AtomicYieldOp : MemRef_Op<"atomic_yield", [
+      HasParent<"GenericAtomicRMWOp">,
+      NoSideEffect,
+      Terminator
+    ]> {
+  let summary = "yield operation for GenericAtomicRMWOp";
+  let description = [{
+    "memref.atomic_yield" yields an SSA value from a
+    GenericAtomicRMWOp region.
+  }];
+
+  let arguments = (ins AnyType:$result);
+  let assemblyFormat = "$result attr-dict `:` type($result)";
+}
+
 //===----------------------------------------------------------------------===//
 // GetGlobalOp
 //===----------------------------------------------------------------------===//
@@ -1687,7 +1762,7 @@ def AtomicRMWOp : MemRef_Op<"atomic_rmw", [
     ]> {
   let summary = "atomic read-modify-write operation";
   let description = [{
-    The `atomic_rmw` operation provides a way to perform a read-modify-write
+    The `memref.atomic_rmw` operation provides a way to perform a read-modify-write
     sequence that is free from data races. The kind enumeration specifies the
     modification to perform. The value operand represents the new value to be
     applied during the modification. The memref operand represents the buffer
@@ -1698,7 +1773,7 @@ def AtomicRMWOp : MemRef_Op<"atomic_rmw", [
     Example:
 
     ```mlir
-    %x = arith.atomic_rmw "addf" %value, %I[%i] : (f32, memref<10xf32>) -> f32
+    %x = memref.atomic_rmw "addf" %value, %I[%i] : (f32, memref<10xf32>) -> f32
     ```
   }];
 
diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
index 794f0157ef144..deedafb66118f 100644
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -178,76 +178,6 @@ def AssertOp : Std_Op<"assert"> {
   let hasCanonicalizeMethod = 1;
 }
 
-def GenericAtomicRMWOp : Std_Op<"generic_atomic_rmw", [
-      SingleBlockImplicitTerminator<"AtomicYieldOp">,
-      TypesMatchWith<"result type matches element type of memref",
-                     "memref", "result",
-                     "$_self.cast<MemRefType>().getElementType()">
-    ]> {
-  let summary = "atomic read-modify-write operation with a region";
-  let description = [{
-    The `generic_atomic_rmw` operation provides a way to perform a read-modify-write
-    sequence that is free from data races. The memref operand represents the
-    buffer that the read and write will be performed against, as accessed by
-    the specified indices. The arity of the indices is the rank of the memref.
-    The result represents the latest value that was stored. The region contains
-    the code for the modification itself. The entry block has a single argument
-    that represents the value stored in `memref[indices]` before the write is
-    performed. No side-effecting ops are allowed in the body of
-    `GenericAtomicRMWOp`.
-
-    Example:
-
-    ```mlir
-    %x = generic_atomic_rmw %I[%i] : memref<10xf32> {
-      ^bb0(%current_value : f32):
-        %c1 = arith.constant 1.0 : f32
-        %inc = arith.addf %c1, %current_value : f32
-        atomic_yield %inc : f32
-    }
-    ```
-  }];
-
-  let arguments = (ins
-      MemRefOf<[AnySignlessInteger, AnyFloat]>:$memref,
-      Variadic<Index>:$indices);
-
-  let results = (outs
-      AnyTypeOf<[AnySignlessInteger, AnyFloat]>:$result);
-
-  let regions = (region AnyRegion:$atomic_body);
-
-  let skipDefaultBuilders = 1;
-  let builders = [OpBuilder<(ins "Value":$memref, "ValueRange":$ivs)>];
-
-  let extraClassDeclaration = [{
-    // TODO: remove post migrating callers.
-    Region &body() { return getRegion(); }
-
-    // The value stored in memref[ivs].
-    Value getCurrentValue() {
-      return getRegion().getArgument(0);
-    }
-    MemRefType getMemRefType() {
-      return getMemref().getType().cast<MemRefType>();
-    }
-  }];
-}
-
-def AtomicYieldOp : Std_Op<"atomic_yield", [
-      HasParent<"GenericAtomicRMWOp">,
-      NoSideEffect,
-      Terminator
-    ]> {
-  let summary = "yield operation for GenericAtomicRMWOp";
-  let description = [{
-    "atomic_yield" yields an SSA value from a GenericAtomicRMWOp region.
-  }];
-
-  let arguments = (ins AnyType:$result);
-  let assemblyFormat = "$result attr-dict `:` type($result)";
-}
-
 //===----------------------------------------------------------------------===//
 // BranchOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
index 1a3384ca2c040..81f842e80e352 100644
--- a/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
+++ b/mlir/lib/Conversion/MemRefToLLVM/MemRefToLLVM.cpp
@@ -412,6 +412,139 @@ struct DimOpLowering : public ConvertOpToLLVMPattern<memref::DimOp> {
   }
 };
 
+/// Common base for load and store operations on MemRefs. Restricts the match
+/// to supported MemRef types. Provides functionality to emit code accessing a
+/// specific element of the underlying data buffer.
+template <typename Derived>
+struct LoadStoreOpLowering : public ConvertOpToLLVMPattern<Derived> {
+  using ConvertOpToLLVMPattern<Derived>::ConvertOpToLLVMPattern;
+  using ConvertOpToLLVMPattern<Derived>::isConvertibleAndHasIdentityMaps;
+  using Base = LoadStoreOpLowering<Derived>;
+
+  LogicalResult match(Derived op) const override {
+    MemRefType type = op.getMemRefType();
+    return isConvertibleAndHasIdentityMaps(type) ? success() : failure();
+  }
+};
+
+/// Wrap a llvm.cmpxchg operation in a while loop so that the operation can be
+/// retried until it succeeds in atomically storing a new value into memory.
+///
+///      +---------------------------------+
+///      |   <code before the AtomicRMWOp> |
+///      |   <compute initial %loaded>     |
+///      |   br loop(%loaded)              |
+///      +---------------------------------+
+///             |
+///  -------|   |
+///  |      v   v
+///  |   +--------------------------------+
+///  |   | loop(%loaded):                 |
+///  |   |   <body contents>              |
+///  |   |   %pair = cmpxchg              |
+///  |   |   %ok = %pair[0]               |
+///  |   |   %new = %pair[1]              |
+///  |   |   cond_br %ok, end, loop(%new) |
+///  |   +--------------------------------+
+///  |          |        |
+///  |-----------        |
+///                      v
+///      +--------------------------------+
+///      | end:                           |
+///      |   <code after the AtomicRMWOp> |
+///      +--------------------------------+
+///
+struct GenericAtomicRMWOpLowering
+    : public LoadStoreOpLowering<memref::GenericAtomicRMWOp> {
+  using Base::Base;
+
+  LogicalResult
+  matchAndRewrite(memref::GenericAtomicRMWOp atomicOp, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    auto loc = atomicOp.getLoc();
+    Type valueType = typeConverter->convertType(atomicOp.getResult().getType());
+
+    // Split the block into initial, loop, and ending parts.
+    auto *initBlock = rewriter.getInsertionBlock();
+    auto *loopBlock = rewriter.createBlock(
+        initBlock->getParent(), std::next(Region::iterator(initBlock)),
+        valueType, loc);
+    auto *endBlock = rewriter.createBlock(
+        loopBlock->getParent(), std::next(Region::iterator(loopBlock)));
+
+    // Operations range to be moved to `endBlock`.
+    auto opsToMoveStart = atomicOp->getIterator();
+    auto opsToMoveEnd = initBlock->back().getIterator();
+
+    // Compute the loaded value and branch to the loop block.
+    rewriter.setInsertionPointToEnd(initBlock);
+    auto memRefType = atomicOp.memref().getType().cast<MemRefType>();
+    auto dataPtr = getStridedElementPtr(loc, memRefType, adaptor.memref(),
+                                        adaptor.indices(), rewriter);
+    Value init = rewriter.create<LLVM::LoadOp>(loc, dataPtr);
+    rewriter.create<LLVM::BrOp>(loc, init, loopBlock);
+
+    // Prepare the body of the loop block.
+    rewriter.setInsertionPointToStart(loopBlock);
+
+    // Clone the GenericAtomicRMWOp region and extract the result.
+    auto loopArgument = loopBlock->getArgument(0);
+    BlockAndValueMapping mapping;
+    mapping.map(atomicOp.getCurrentValue(), loopArgument);
+    Block &entryBlock = atomicOp.body().front();
+    for (auto &nestedOp : entryBlock.without_terminator()) {
+      Operation *clone = rewriter.clone(nestedOp, mapping);
+      mapping.map(nestedOp.getResults(), clone->getResults());
+    }
+    Value result = mapping.lookup(entryBlock.getTerminator()->getOperand(0));
+
+    // Prepare the epilog of the loop block.
+    // Append the cmpxchg op to the end of the loop block.
+    auto successOrdering = LLVM::AtomicOrdering::acq_rel;
+    auto failureOrdering = LLVM::AtomicOrdering::monotonic;
+    auto boolType = IntegerType::get(rewriter.getContext(), 1);
+    auto pairType = LLVM::LLVMStructType::getLiteral(rewriter.getContext(),
+                                                     {valueType, boolType});
+    auto cmpxchg = rewriter.create<LLVM::AtomicCmpXchgOp>(
+        loc, pairType, dataPtr, loopArgument, result, successOrdering,
+        failureOrdering);
+    // Extract the %new_loaded and %ok values from the pair.
+    Value newLoaded = rewriter.create<LLVM::ExtractValueOp>(
+        loc, valueType, cmpxchg, rewriter.getI64ArrayAttr({0}));
+    Value ok = rewriter.create<LLVM::ExtractValueOp>(
+        loc, boolType, cmpxchg, rewriter.getI64ArrayAttr({1}));
+
+    // Conditionally branch to the end or back to the loop depending on %ok.
+    rewriter.create<LLVM::CondBrOp>(loc, ok, endBlock, ArrayRef<Value>(),
+                                    loopBlock, newLoaded);
+
+    rewriter.setInsertionPointToEnd(endBlock);
+    moveOpsRange(atomicOp.getResult(), newLoaded, std::next(opsToMoveStart),
+                 std::next(opsToMoveEnd), rewriter);
+
+    // The 'result' of the atomic_rmw op is the newly loaded value.
+    rewriter.replaceOp(atomicOp, {newLoaded});
+
+    return success();
+  }
+
+private:
+  // Clones a segment of ops [start, end) and erases the original.
+  void moveOpsRange(ValueRange oldResult, ValueRange newResult,
+                    Block::iterator start, Block::iterator end,
+                    ConversionPatternRewriter &rewriter) const {
+    BlockAndValueMapping mapping;
+    mapping.map(oldResult, newResult);
+    SmallVector<Operation *, 2> opsToErase;
+    for (auto it = start; it != end; ++it) {
+      rewriter.clone(*it, mapping);
+      opsToErase.push_back(&*it);
+    }
+    for (auto *it : opsToErase)
+      rewriter.eraseOp(it);
+  }
+};
+
 /// Returns the LLVM type of the global variable given the memref type `type`.
 static Type convertGlobalMemrefTypeToLLVM(MemRefType type,
                                           LLVMTypeConverter &typeConverter) {
@@ -520,21 +653,6 @@ struct GetGlobalMemrefOpLowering : public AllocLikeOpLLVMLowering {
   }
 };
 
-// Common base for load and store operations on MemRefs. Restricts the match
-// to supported MemRef types. Provides functionality to emit code accessing a
-// specific element of the underlying data buffer.
-template <typename Derived>
-struct LoadStoreOpLowering : public ConvertOpToLLVMPattern<Derived> {
-  using ConvertOpToLLVMPattern<Derived>::ConvertOpToLLVMPattern;
-  using ConvertOpToLLVMPattern<Derived>::isConvertibleAndHasIdentityMaps;
-  using Base = LoadStoreOpLowering<Derived>;
-
-  LogicalResult match(Derived op) const override {
-    MemRefType type = op.getMemRefType();
-    return isConvertibleAndHasIdentityMaps(type) ? success() : failure();
-  }
-};
-
 // Load operation is lowered to obtaining a pointer to the indexed element
 // and loading it.
 struct LoadOpLowering : public LoadStoreOpLowering<memref::LoadOp> {
@@ -1683,6 +1801,7 @@ void mlir::populateMemRefToLLVMConversionPatterns(LLVMTypeConverter &converter,
       AtomicRMWOpLowering,
       AssumeAlignmentOpLowering,
       DimOpLowering,
+      GenericAtomicRMWOpLowering,
       GlobalMemrefOpLowering,
       GetGlobalMemrefOpLowering,
       LoadOpLowering,
diff --git a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
index 97a3be46b7ee8..b68587d43c777 100644
--- a/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
+++ b/mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp
@@ -565,21 +565,6 @@ struct UnrealizedConversionCastOpLowering
   }
 };
 
-// Common base for load and store operations on MemRefs.  Restricts the match
-// to supported MemRef types. Provides functionality to emit code accessing a
-// specific element of the underlying data buffer.
-template <typename Derived>
-struct LoadStoreOpLowering : public ConvertOpToLLVMPattern<Derived> {
-  using ConvertOpToLLVMPattern<Derived>::ConvertOpToLLVMPattern;
-  using ConvertOpToLLVMPattern<Derived>::isConvertibleAndHasIdentityMaps;
-  using Base = LoadStoreOpLowering<Derived>;
-
-  LogicalResult match(Derived op) const override {
-    MemRefType type = op.getMemRefType();
-    return isConvertibleAndHasIdentityMaps(type) ? success() : failure();
-  }
-};
-
 // Base class for LLVM IR lowering terminator operations with successors.
 template <typename SourceOp, typename TargetOp>
 struct OneToOneLLVMTerminatorLowering
@@ -771,125 +756,6 @@ struct SplatNdOpLowering : public ConvertOpToLLVMPattern<SplatOp> {
   }
 };
 
-/// Wrap a llvm.cmpxchg operation in a while loop so that the operation can be
-/// retried until it succeeds in atomically storing a new value into memory.
-///
-///      +---------------------------------+
-///      |   <code before the AtomicRMWOp> |
-///      |   <compute initial %loaded>     |
-///      |   br loop(%loaded)              |
-///      +---------------------------------+
-///             |
-///  -------|   |
-///  |      v   v
-///  |   +--------------------------------+
-///  |   | loop(%loaded):                 |
-///  |   |   <body contents>              |
-///  |   |   %pair = cmpxchg              |
-///  |   |   %ok = %pair[0]               |
-///  |   |   %new = %pair[1]              |
-///  |   |   cond_br %ok, end, loop(%new) |
-///  |   +--------------------------------+
-///  |          |        |
-///  |-----------        |
-///                      v
-///      +--------------------------------+
-///      | end:                           |
-///      |   <code after the AtomicRMWOp> |
-///      +--------------------------------+
-///
-struct GenericAtomicRMWOpLowering
-    : public LoadStoreOpLowering<GenericAtomicRMWOp> {
-  using Base::Base;
-
-  LogicalResult
-  matchAndRewrite(GenericAtomicRMWOp atomicOp, OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-
-    auto loc = atomicOp.getLoc();
-    Type valueType = typeConverter->convertType(atomicOp.getResult().getType());
-
-    // Split the block into initial, loop, and ending parts.
-    auto *initBlock = rewriter.getInsertionBlock();
-    auto *loopBlock = rewriter.createBlock(
-        initBlock->getParent(), std::next(Region::iterator(initBlock)),
-        valueType, loc);
-    auto *endBlock = rewriter.createBlock(
-        loopBlock->getParent(), std::next(Region::iterator(loopBlock)));
-
-    // Operations range to be moved to `endBlock`.
-    auto opsToMoveStart = atomicOp->getIterator();
-    auto opsToMoveEnd = initBlock->back().getIterator();
-
-    // Compute the loaded value and branch to the loop block.
-    rewriter.setInsertionPointToEnd(initBlock);
-    auto memRefType = atomicOp.getMemref().getType().cast<MemRefType>();
-    auto dataPtr = getStridedElementPtr(loc, memRefType, adaptor.getMemref(),
-                                        adaptor.getIndices(), rewriter);
-    Value init = rewriter.create<LLVM::LoadOp>(loc, dataPtr);
-    rewriter.create<LLVM::BrOp>(loc, init, loopBlock);
-
-    // Prepare the body of the loop block.
-    rewriter.setInsertionPointToStart(loopBlock);
-
-    // Clone the GenericAtomicRMWOp region and extract the result.
-    auto loopArgument = loopBlock->getArgument(0);
-    BlockAndValueMapping mapping;
-    mapping.map(atomicOp.getCurrentValue(), loopArgument);
-    Block &entryBlock = atomicOp.body().front();
-    for (auto &nestedOp : entryBlock.without_terminator()) {
-      Operation *clone = rewriter.clone(nestedOp, mapping);
-      mapping.map(nestedOp.getResults(), clone->getResults());
-    }
-    Value result = mapping.lookup(entryBlock.getTerminator()->getOperand(0));
-
-    // Prepare the epilog of the loop block.
-    // Append the cmpxchg op to the end of the loop block.
-    auto successOrdering = LLVM::AtomicOrdering::acq_rel;
-    auto failureOrdering = LLVM::AtomicOrdering::monotonic;
-    auto boolType = IntegerType::get(rewriter.getContext(), 1);
-    auto pairType = LLVM::LLVMStructType::getLiteral(rewriter.getContext(),
-                                                     {valueType, boolType});
-    auto cmpxchg = rewriter.create<LLVM::AtomicCmpXchgOp>(
-        loc, pairType, dataPtr, loopArgument, result, successOrdering,
-        failureOrdering);
-    // Extract the %new_loaded and %ok values from the pair.
-    Value newLoaded = rewriter.create<LLVM::ExtractValueOp>(
-        loc, valueType, cmpxchg, rewriter.getI64ArrayAttr({0}));
-    Value ok = rewriter.create<LLVM::ExtractValueOp>(
-        loc, boolType, cmpxchg, rewriter.getI64ArrayAttr({1}));
-
-    // Conditionally branch to the end or back to the loop depending on %ok.
-    rewriter.create<LLVM::CondBrOp>(loc, ok, endBlock, ArrayRef<Value>(),
-                                    loopBlock, newLoaded);
-
-    rewriter.setInsertionPointToEnd(endBlock);
-    moveOpsRange(atomicOp.getResult(), newLoaded, std::next(opsToMoveStart),
-                 std::next(opsToMoveEnd), rewriter);
-
-    // The 'result' of the atomic_rmw op is the newly loaded value.
-    rewriter.replaceOp(atomicOp, {newLoaded});
-
-    return success();
-  }
-
-private:
-  // Clones a segment of ops [start, end) and erases the original.
-  void moveOpsRange(ValueRange oldResult, ValueRange newResult,
-                    Block::iterator start, Block::iterator end,
-                    ConversionPatternRewriter &rewriter) const {
-    BlockAndValueMapping mapping;
-    mapping.map(oldResult, newResult);
-    SmallVector<Operation *, 2> opsToErase;
-    for (auto it = start; it != end; ++it) {
-      rewriter.clone(*it, mapping);
-      opsToErase.push_back(&*it);
-    }
-    for (auto *it : opsToErase)
-      rewriter.eraseOp(it);
-  }
-};
-
 } // namespace
 
 void mlir::populateStdToLLVMFuncOpConversionPattern(
@@ -911,7 +777,6 @@ void mlir::populateStdToLLVMConversionPatterns(LLVMTypeConverter &converter,
       CallOpLowering,
       CondBranchOpLowering,
       ConstantOpLowering,
-      GenericAtomicRMWOpLowering,
       ReturnOpLowering,
       SelectOpLowering,
       SplatOpLowering,
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index 211af3045b9d8..0c29607d4601f 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -945,6 +945,89 @@ static LogicalResult verify(DmaWaitOp op) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// GenericAtomicRMWOp
+//===----------------------------------------------------------------------===//
+
+void GenericAtomicRMWOp::build(OpBuilder &builder, OperationState &result,
+                               Value memref, ValueRange ivs) {
+  result.addOperands(memref);
+  result.addOperands(ivs);
+
+  if (auto memrefType = memref.getType().dyn_cast<MemRefType>()) {
+    Type elementType = memrefType.getElementType();
+    result.addTypes(elementType);
+
+    Region *bodyRegion = result.addRegion();
+    bodyRegion->push_back(new Block());
+    bodyRegion->addArgument(elementType, memref.getLoc());
+  }
+}
+
+static LogicalResult verify(GenericAtomicRMWOp op) {
+  auto &body = op.getRegion();
+  if (body.getNumArguments() != 1)
+    return op.emitOpError("expected single number of entry block arguments");
+
+  if (op.getResult().getType() != body.getArgument(0).getType())
+    return op.emitOpError(
+        "expected block argument of the same type result type");
+
+  bool hasSideEffects =
+      body.walk([&](Operation *nestedOp) {
+            if (MemoryEffectOpInterface::hasNoEffect(nestedOp))
+              return WalkResult::advance();
+            nestedOp->emitError(
+                "body of 'memref.generic_atomic_rmw' should contain "
+                "only operations with no side effects");
+            return WalkResult::interrupt();
+          })
+          .wasInterrupted();
+  return hasSideEffects ? failure() : success();
+}
+
+static ParseResult parseGenericAtomicRMWOp(OpAsmParser &parser,
+                                           OperationState &result) {
+  OpAsmParser::OperandType memref;
+  Type memrefType;
+  SmallVector<OpAsmParser::OperandType, 4> ivs;
+
+  Type indexType = parser.getBuilder().getIndexType();
+  if (parser.parseOperand(memref) ||
+      parser.parseOperandList(ivs, OpAsmParser::Delimiter::Square) ||
+      parser.parseColonType(memrefType) ||
+      parser.resolveOperand(memref, memrefType, result.operands) ||
+      parser.resolveOperands(ivs, indexType, result.operands))
+    return failure();
+
+  Region *body = result.addRegion();
+  if (parser.parseRegion(*body, llvm::None, llvm::None) ||
+      parser.parseOptionalAttrDict(result.attributes))
+    return failure();
+  result.types.push_back(memrefType.cast<MemRefType>().getElementType());
+  return success();
+}
+
+static void print(OpAsmPrinter &p, GenericAtomicRMWOp op) {
+  p << ' ' << op.memref() << "[" << op.indices()
+    << "] : " << op.memref().getType() << ' ';
+  p.printRegion(op.getRegion());
+  p.printOptionalAttrDict(op->getAttrs());
+}
+
+//===----------------------------------------------------------------------===//
+// AtomicYieldOp
+//===----------------------------------------------------------------------===//
+
+static LogicalResult verify(AtomicYieldOp op) {
+  Type parentType = op->getParentOp()->getResultTypes().front();
+  Type resultType = op.result().getType();
+  if (parentType != resultType)
+    return op.emitOpError() << "types mismatch between yield op: " << resultType
+                            << " and its parent: " << parentType;
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // GlobalOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
index 7487aea0902de..ffe155ad214e5 100644
--- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
+++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
@@ -131,88 +131,6 @@ LogicalResult AssertOp::canonicalize(AssertOp op, PatternRewriter &rewriter) {
   return failure();
 }
 
-//===----------------------------------------------------------------------===//
-// GenericAtomicRMWOp
-//===----------------------------------------------------------------------===//
-
-void GenericAtomicRMWOp::build(OpBuilder &builder, OperationState &result,
-                               Value memref, ValueRange ivs) {
-  result.addOperands(memref);
-  result.addOperands(ivs);
-
-  if (auto memrefType = memref.getType().dyn_cast<MemRefType>()) {
-    Type elementType = memrefType.getElementType();
-    result.addTypes(elementType);
-
-    Region *bodyRegion = result.addRegion();
-    bodyRegion->push_back(new Block());
-    bodyRegion->addArgument(elementType, memref.getLoc());
-  }
-}
-
-static LogicalResult verify(GenericAtomicRMWOp op) {
-  auto &body = op.getRegion();
-  if (body.getNumArguments() != 1)
-    return op.emitOpError("expected single number of entry block arguments");
-
-  if (op.getResult().getType() != body.getArgument(0).getType())
-    return op.emitOpError(
-        "expected block argument of the same type result type");
-
-  bool hasSideEffects =
-      body.walk([&](Operation *nestedOp) {
-            if (MemoryEffectOpInterface::hasNoEffect(nestedOp))
-              return WalkResult::advance();
-            nestedOp->emitError("body of 'generic_atomic_rmw' should contain "
-                                "only operations with no side effects");
-            return WalkResult::interrupt();
-          })
-          .wasInterrupted();
-  return hasSideEffects ? failure() : success();
-}
-
-static ParseResult parseGenericAtomicRMWOp(OpAsmParser &parser,
-                                           OperationState &result) {
-  OpAsmParser::OperandType memref;
-  Type memrefType;
-  SmallVector<OpAsmParser::OperandType, 4> ivs;
-
-  Type indexType = parser.getBuilder().getIndexType();
-  if (parser.parseOperand(memref) ||
-      parser.parseOperandList(ivs, OpAsmParser::Delimiter::Square) ||
-      parser.parseColonType(memrefType) ||
-      parser.resolveOperand(memref, memrefType, result.operands) ||
-      parser.resolveOperands(ivs, indexType, result.operands))
-    return failure();
-
-  Region *body = result.addRegion();
-  if (parser.parseRegion(*body, llvm::None, llvm::None) ||
-      parser.parseOptionalAttrDict(result.attributes))
-    return failure();
-  result.types.push_back(memrefType.cast<MemRefType>().getElementType());
-  return success();
-}
-
-static void print(OpAsmPrinter &p, GenericAtomicRMWOp op) {
-  p << ' ' << op.getMemref() << "[" << op.getIndices()
-    << "] : " << op.getMemref().getType() << ' ';
-  p.printRegion(op.getRegion());
-  p.printOptionalAttrDict(op->getAttrs());
-}
-
-//===----------------------------------------------------------------------===//
-// AtomicYieldOp
-//===----------------------------------------------------------------------===//
-
-static LogicalResult verify(AtomicYieldOp op) {
-  Type parentType = op->getParentOp()->getResultTypes().front();
-  Type resultType = op.getResult().getType();
-  if (parentType != resultType)
-    return op.emitOpError() << "types mismatch between yield op: " << resultType
-                            << " and its parent: " << parentType;
-  return success();
-}
-
 //===----------------------------------------------------------------------===//
 // BranchOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp b/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp
index d7c7755e24029..e5cf08da4904a 100644
--- a/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp
+++ b/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp
@@ -28,17 +28,17 @@ namespace {
 
 /// Converts `atomic_rmw` that cannot be lowered to a simple atomic op with
 /// AtomicRMWOpLowering pattern, e.g. with "minf" or "maxf" attributes, to
-/// `generic_atomic_rmw` with the expanded code.
+/// `memref.generic_atomic_rmw` with the expanded code.
 ///
 /// %x = atomic_rmw "maxf" %fval, %F[%i] : (f32, memref<10xf32>) -> f32
 ///
 /// will be lowered to
 ///
-/// %x = std.generic_atomic_rmw %F[%i] : memref<10xf32> {
+/// %x = memref.generic_atomic_rmw %F[%i] : memref<10xf32> {
 /// ^bb0(%current: f32):
 ///   %cmp = arith.cmpf "ogt", %current, %fval : f32
 ///   %new_value = select %cmp, %current, %fval : f32
-///   atomic_yield %new_value : f32
+///   memref.atomic_yield %new_value : f32
 /// }
 struct AtomicRMWOpConverter : public OpRewritePattern<memref::AtomicRMWOp> {
 public:
@@ -59,8 +59,8 @@ struct AtomicRMWOpConverter : public OpRewritePattern<memref::AtomicRMWOp> {
     }
 
     auto loc = op.getLoc();
-    auto genericOp =
-        rewriter.create<GenericAtomicRMWOp>(loc, op.memref(), op.indices());
+    auto genericOp = rewriter.create<memref::GenericAtomicRMWOp>(
+        loc, op.memref(), op.indices());
     OpBuilder bodyBuilder =
         OpBuilder::atBlockEnd(genericOp.getBody(), rewriter.getListener());
 
@@ -68,7 +68,7 @@ struct AtomicRMWOpConverter : public OpRewritePattern<memref::AtomicRMWOp> {
     Value rhs = op.value();
     Value cmp = bodyBuilder.create<arith::CmpFOp>(loc, predicate, lhs, rhs);
     Value select = bodyBuilder.create<SelectOp>(loc, cmp, lhs, rhs);
-    bodyBuilder.create<AtomicYieldOp>(loc, select);
+    bodyBuilder.create<memref::AtomicYieldOp>(loc, select);
 
     rewriter.replaceOp(op, genericOp.getResult());
     return success();
diff --git a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
index 221f019db8752..ee7d36052c4ae 100644
--- a/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
+++ b/mlir/test/Conversion/MemRefToLLVM/memref-to-llvm.mlir
@@ -892,3 +892,22 @@ func @collapse_static_shape_with_non_identity_layout(%arg: memref<1x1x8x8xf32, a
   %1 = memref.collapse_shape %arg [[0, 1, 2, 3]] : memref<1x1x8x8xf32, affine_map<(d0, d1, d2, d3)[s0] -> (d0 * 64 + s0 + d1 * 64 + d2 * 8 + d3)>> into memref<64xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
   return %1 : memref<64xf32, affine_map<(d0)[s0] -> (d0 + s0)>>
 }
+
+// -----
+
+// CHECK-LABEL: func @generic_atomic_rmw
+func @generic_atomic_rmw(%I : memref<10xi32>, %i : index) {
+  %x = memref.generic_atomic_rmw %I[%i] : memref<10xi32> {
+    ^bb0(%old_value : i32):
+      memref.atomic_yield %old_value : i32
+  }
+  // CHECK: [[init:%.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
+  // CHECK-NEXT: llvm.br ^bb1([[init]] : i32)
+  // CHECK-NEXT: ^bb1([[loaded:%.*]]: i32):
+  // CHECK-NEXT: [[pair:%.*]] = llvm.cmpxchg %{{.*}}, [[loaded]], [[loaded]]
+  // CHECK-SAME:                    acq_rel monotonic : i32
+  // CHECK-NEXT: [[new:%.*]] = llvm.extractvalue [[pair]][0]
+  // CHECK-NEXT: [[ok:%.*]] = llvm.extractvalue [[pair]][1]
+  // CHECK-NEXT: llvm.cond_br [[ok]], ^bb2, ^bb1([[new]] : i32)
+  llvm.return
+}
diff --git a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
index 0dc6bf10dc5ea..26f064fb03122 100644
--- a/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
+++ b/mlir/test/Conversion/StandardToLLVM/standard-to-llvm.mlir
@@ -486,33 +486,6 @@ func @splat(%a: vector<4xf32>, %b: f32) -> vector<4xf32> {
 
 // -----
 
-// CHECK-LABEL: func @generic_atomic_rmw
-func @generic_atomic_rmw(%I : memref<10xi32>, %i : index) -> i32 {
-  %x = generic_atomic_rmw %I[%i] : memref<10xi32> {
-    ^bb0(%old_value : i32):
-      %c1 = arith.constant 1 : i32
-      atomic_yield %c1 : i32
-  }
-  // CHECK: [[init:%.*]] = llvm.load %{{.*}} : !llvm.ptr<i32>
-  // CHECK-NEXT: llvm.br ^bb1([[init]] : i32)
-  // CHECK-NEXT: ^bb1([[loaded:%.*]]: i32):
-  // CHECK-NEXT: [[c1:%.*]] = llvm.mlir.constant(1 : i32)
-  // CHECK-NEXT: [[pair:%.*]] = llvm.cmpxchg %{{.*}}, [[loaded]], [[c1]]
-  // CHECK-SAME:                    acq_rel monotonic : i32
-  // CHECK-NEXT: [[new:%.*]] = llvm.extractvalue [[pair]][0]
-  // CHECK-NEXT: [[ok:%.*]] = llvm.extractvalue [[pair]][1]
-  // CHECK-NEXT: llvm.cond_br [[ok]], ^bb2, ^bb1([[new]] : i32)
-  // CHECK-NEXT: ^bb2:
-  %c2 = arith.constant 2 : i32
-  %add = arith.addi %c2, %x : i32
-  return %add : i32
-  // CHECK-NEXT: [[c2:%.*]] = llvm.mlir.constant(2 : i32)
-  // CHECK-NEXT: [[add:%.*]] = llvm.add [[c2]], [[new]] : i32
-  // CHECK-NEXT: llvm.return [[add]]
-}
-
-// -----
-
 // CHECK-LABEL: func @ceilf(
 // CHECK-SAME: f32
 func @ceilf(%arg0 : f32) {
diff --git a/mlir/test/Dialect/MemRef/invalid.mlir b/mlir/test/Dialect/MemRef/invalid.mlir
index 6d0cef1efc7dd..54e405b3f2ad1 100644
--- a/mlir/test/Dialect/MemRef/invalid.mlir
+++ b/mlir/test/Dialect/MemRef/invalid.mlir
@@ -910,3 +910,63 @@ func @atomic_rmw_expects_int(%I: memref<16x10xf32>, %i : index, %val : f32) {
   %x = memref.atomic_rmw addi %val, %I[%i, %i] : (f32, memref<16x10xf32>) -> f32
   return
 }
+
+// -----
+
+func @generic_atomic_rmw_wrong_arg_num(%I: memref<10xf32>, %i : index) {
+  // expected-error@+1 {{expected single number of entry block arguments}}
+  %x = memref.generic_atomic_rmw %I[%i] : memref<10xf32> {
+    ^bb0(%arg0 : f32, %arg1 : f32):
+      %c1 = arith.constant 1.0 : f32
+      memref.atomic_yield %c1 : f32
+  }
+  return
+}
+
+// -----
+
+func @generic_atomic_rmw_wrong_arg_type(%I: memref<10xf32>, %i : index) {
+  // expected-error@+1 {{expected block argument of the same type result type}}
+  %x = memref.generic_atomic_rmw %I[%i] : memref<10xf32> {
+    ^bb0(%old_value : i32):
+      %c1 = arith.constant 1.0 : f32
+      memref.atomic_yield %c1 : f32
+  }
+  return
+}
+
+// -----
+
+func @generic_atomic_rmw_result_type_mismatch(%I: memref<10xf32>, %i : index) {
+ // expected-error@+1 {{failed to verify that result type matches element type of memref}}
+ %0 = "memref.generic_atomic_rmw"(%I, %i) ({
+    ^bb0(%old_value: f32):
+      %c1 = arith.constant 1.0 : f32
+      memref.atomic_yield %c1 : f32
+    }) : (memref<10xf32>, index) -> i32
+  return
+}
+
+// -----
+
+func @generic_atomic_rmw_has_side_effects(%I: memref<10xf32>, %i : index) {
+  // expected-error@+4 {{should contain only operations with no side effects}}
+  %x = memref.generic_atomic_rmw %I[%i] : memref<10xf32> {
+    ^bb0(%old_value : f32):
+      %c1 = arith.constant 1.0 : f32
+      %buf = memref.alloc() : memref<2048xf32>
+      memref.atomic_yield %c1 : f32
+  }
+}
+
+// -----
+
+func @atomic_yield_type_mismatch(%I: memref<10xf32>, %i : index) {
+  // expected-error@+4 {{op types mismatch between yield op: 'i32' and its parent: 'f32'}}
+  %x = memref.generic_atomic_rmw %I[%i] : memref<10xf32> {
+    ^bb0(%old_value : f32):
+      %c1 = arith.constant 1 : i32
+      memref.atomic_yield %c1 : i32
+  }
+  return
+}
diff --git a/mlir/test/Dialect/MemRef/ops.mlir b/mlir/test/Dialect/MemRef/ops.mlir
index 1303a896e7eca..6191cdab02e2e 100644
--- a/mlir/test/Dialect/MemRef/ops.mlir
+++ b/mlir/test/Dialect/MemRef/ops.mlir
@@ -246,3 +246,17 @@ func @atomic_rmw(%I: memref<10xf32>, %val: f32, %i : index) {
   // CHECK: memref.atomic_rmw addf [[VAL]], [[BUF]]{{\[}}[[I]]]
   return
 }
+
+// CHECK-LABEL: func @generic_atomic_rmw
+// CHECK-SAME: ([[BUF:%.*]]: memref<1x2xf32>, [[I:%.*]]: index, [[J:%.*]]: index)
+func @generic_atomic_rmw(%I: memref<1x2xf32>, %i : index, %j : index) {
+  %x = memref.generic_atomic_rmw %I[%i, %j] : memref<1x2xf32> {
+  // CHECK-NEXT: memref.generic_atomic_rmw [[BUF]]{{\[}}[[I]], [[J]]] : memref
+    ^bb0(%old_value : f32):
+      %c1 = arith.constant 1.0 : f32
+      %out = arith.addf %c1, %old_value : f32
+      memref.atomic_yield %out : f32
+  // CHECK: index_attr = 8 : index
+  } { index_attr = 8 : index }
+  return
+}
diff --git a/mlir/test/Dialect/Standard/expand-ops.mlir b/mlir/test/Dialect/Standard/expand-ops.mlir
index cb650ffd11bd5..2a1c367ff80f0 100644
--- a/mlir/test/Dialect/Standard/expand-ops.mlir
+++ b/mlir/test/Dialect/Standard/expand-ops.mlir
@@ -6,11 +6,11 @@ func @atomic_rmw_to_generic(%F: memref<10xf32>, %f: f32, %i: index) -> f32 {
   %x = memref.atomic_rmw maxf %f, %F[%i] : (f32, memref<10xf32>) -> f32
   return %x : f32
 }
-// CHECK: %0 = generic_atomic_rmw %arg0[%arg2] : memref<10xf32> {
+// CHECK: %0 = memref.generic_atomic_rmw %arg0[%arg2] : memref<10xf32> {
 // CHECK: ^bb0([[CUR_VAL:%.*]]: f32):
 // CHECK:   [[CMP:%.*]] = arith.cmpf ogt, [[CUR_VAL]], [[f]] : f32
 // CHECK:   [[SELECT:%.*]] = select [[CMP]], [[CUR_VAL]], [[f]] : f32
-// CHECK:   atomic_yield [[SELECT]] : f32
+// CHECK:   memref.atomic_yield [[SELECT]] : f32
 // CHECK: }
 // CHECK: return %0 : f32
 
diff --git a/mlir/test/IR/core-ops.mlir b/mlir/test/IR/core-ops.mlir
index 351e8a6b39c1b..81fdac964f76a 100644
--- a/mlir/test/IR/core-ops.mlir
+++ b/mlir/test/IR/core-ops.mlir
@@ -325,20 +325,6 @@ func @unranked_tensor_load_store(%0 : memref<*xi32>, %1 : tensor<*xi32>) {
   return
 }
 
-// CHECK-LABEL: func @generic_atomic_rmw
-// CHECK-SAME: ([[BUF:%.*]]: memref<1x2xf32>, [[I:%.*]]: index, [[J:%.*]]: index)
-func @generic_atomic_rmw(%I: memref<1x2xf32>, %i : index, %j : index) {
-  %x = generic_atomic_rmw %I[%i, %j] : memref<1x2xf32> {
-  // CHECK-NEXT: generic_atomic_rmw [[BUF]]{{\[}}[[I]], [[J]]] : memref
-    ^bb0(%old_value : f32):
-      %c1 = arith.constant 1.0 : f32
-      %out = arith.addf %c1, %old_value : f32
-      atomic_yield %out : f32
-  // CHECK: index_attr = 8 : index
-  } { index_attr = 8 : index }
-  return
-}
-
 // CHECK-LABEL: func @assume_alignment
 // CHECK-SAME: %[[MEMREF:.*]]: memref<4x4xf16>
 func @assume_alignment(%0: memref<4x4xf16>) {
diff --git a/mlir/test/IR/invalid-ops.mlir b/mlir/test/IR/invalid-ops.mlir
index 9950262a481a0..6650330c7eb7c 100644
--- a/mlir/test/IR/invalid-ops.mlir
+++ b/mlir/test/IR/invalid-ops.mlir
@@ -127,63 +127,3 @@ func @invalid_splat(%v : f32) { // expected-note {{prior use here}}
   // expected-error@-1 {{expects different type than prior uses}}
   return
 }
-
-// -----
-
-func @generic_atomic_rmw_wrong_arg_num(%I: memref<10xf32>, %i : index) {
-  // expected-error@+1 {{expected single number of entry block arguments}}
-  %x = generic_atomic_rmw %I[%i] : memref<10xf32> {
-    ^bb0(%arg0 : f32, %arg1 : f32):
-      %c1 = arith.constant 1.0 : f32
-      atomic_yield %c1 : f32
-  }
-  return
-}
-
-// -----
-
-func @generic_atomic_rmw_wrong_arg_type(%I: memref<10xf32>, %i : index) {
-  // expected-error@+1 {{expected block argument of the same type result type}}
-  %x = generic_atomic_rmw %I[%i] : memref<10xf32> {
-    ^bb0(%old_value : i32):
-      %c1 = arith.constant 1.0 : f32
-      atomic_yield %c1 : f32
-  }
-  return
-}
-
-// -----
-
-func @generic_atomic_rmw_result_type_mismatch(%I: memref<10xf32>, %i : index) {
- // expected-error@+1 {{failed to verify that result type matches element type of memref}}
- %0 = "std.generic_atomic_rmw"(%I, %i) ({
-    ^bb0(%old_value: f32):
-      %c1 = arith.constant 1.0 : f32
-      atomic_yield %c1 : f32
-    }) : (memref<10xf32>, index) -> i32
-  return
-}
-
-// -----
-
-func @generic_atomic_rmw_has_side_effects(%I: memref<10xf32>, %i : index) {
-  // expected-error@+4 {{should contain only operations with no side effects}}
-  %x = generic_atomic_rmw %I[%i] : memref<10xf32> {
-    ^bb0(%old_value : f32):
-      %c1 = arith.constant 1.0 : f32
-      %buf = memref.alloc() : memref<2048xf32>
-      atomic_yield %c1 : f32
-  }
-}
-
-// -----
-
-func @atomic_yield_type_mismatch(%I: memref<10xf32>, %i : index) {
-  // expected-error@+4 {{op types mismatch between yield op: 'i32' and its parent: 'f32'}}
-  %x = generic_atomic_rmw %I[%i] : memref<10xf32> {
-    ^bb0(%old_value : f32):
-      %c1 = arith.constant 1 : i32
-      atomic_yield %c1 : i32
-  }
-  return
-}

From 2f33396e4e5db0277d0ea328c5708b65d823d695 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 19 Jan 2022 13:21:58 -0500
Subject: [PATCH 778/946] AMDGPU: Switch bfi pattern test to generated checks
 and add gfx10

---
 llvm/test/CodeGen/AMDGPU/bfi_int.ll | 639 +++++++++++++++++++++++++---
 1 file changed, 573 insertions(+), 66 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/bfi_int.ll b/llvm/test/CodeGen/AMDGPU/bfi_int.ll
index 93f1572d70f8a..a046170f79e38 100644
--- a/llvm/test/CodeGen/AMDGPU/bfi_int.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfi_int.ll
@@ -1,17 +1,68 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=R600,FUNC %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7 %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
+; RUN: llc -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefixes=R600 %s
 
 ; BFI_INT Definition pattern from ISA docs
 ; (y & x) | (z & ~x)
 ;
-; FUNC-LABEL: {{^}}bfi_def:
-; R600: BFI_INT
-
-; GCN-DAG:   s_andn2_b32
-; GCN-DAG:   s_and_b32
-; GCN:   s_or_b32
 define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+; GFX7-LABEL: bfi_def:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
+; GFX7-NEXT:    s_load_dword s6, s[0:1], 0xd
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_andn2_b32 s6, s6, s4
+; GFX7-NEXT:    s_and_b32 s4, s5, s4
+; GFX7-NEXT:    s_or_b32 s4, s6, s4
+; GFX7-NEXT:    v_mov_b32_e32 v0, s4
+; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: bfi_def:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_andn2_b32 s4, s4, s2
+; GFX8-NEXT:    s_and_b32 s2, s3, s2
+; GFX8-NEXT:    s_or_b32 s2, s4, s2
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-NEXT:    flat_store_dword v[0:1], v2
+; GFX8-NEXT:    s_endpgm
+;
+; GFX10-LABEL: bfi_def:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_clause 0x2
+; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX10-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_andn2_b32 s4, s4, s2
+; GFX10-NEXT:    s_and_b32 s2, s3, s2
+; GFX10-NEXT:    s_or_b32 s2, s4, s2
+; GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT:    s_endpgm
+;
+; R600-LABEL: bfi_def:
+; R600:       ; %bb.0: ; %entry
+; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
+; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; R600-NEXT:     BFI_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X,
 entry:
   %0 = xor i32 %x, -1
   %1 = and i32 %z, %0
@@ -23,13 +74,62 @@ entry:
 
 ; SHA-256 Ch function
 ; z ^ (x & (y ^ z))
-; FUNC-LABEL: {{^}}bfi_sha256_ch:
-; R600: BFI_INT
-
-; GCN:   s_xor_b32
-; GCN:   s_and_b32
-; GCN:   s_xor_b32
 define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+; GFX7-LABEL: bfi_sha256_ch:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
+; GFX7-NEXT:    s_load_dword s6, s[0:1], 0xd
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_xor_b32 s5, s5, s6
+; GFX7-NEXT:    s_and_b32 s4, s4, s5
+; GFX7-NEXT:    s_xor_b32 s4, s6, s4
+; GFX7-NEXT:    v_mov_b32_e32 v0, s4
+; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: bfi_sha256_ch:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_xor_b32 s3, s3, s4
+; GFX8-NEXT:    s_and_b32 s2, s2, s3
+; GFX8-NEXT:    s_xor_b32 s2, s4, s2
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-NEXT:    flat_store_dword v[0:1], v2
+; GFX8-NEXT:    s_endpgm
+;
+; GFX10-LABEL: bfi_sha256_ch:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_clause 0x2
+; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX10-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_xor_b32 s3, s3, s4
+; GFX10-NEXT:    s_and_b32 s2, s2, s3
+; GFX10-NEXT:    s_xor_b32 s2, s4, s2
+; GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT:    s_endpgm
+;
+; R600-LABEL: bfi_sha256_ch:
+; R600:       ; %bb.0: ; %entry
+; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
+; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; R600-NEXT:     BFI_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X,
 entry:
   %0 = xor i32 %y, %z
   %1 = and i32 %x, %0
@@ -40,15 +140,66 @@ entry:
 
 ; SHA-256 Ma function
 ; ((x & z) | (y & (x | z)))
-; FUNC-LABEL: {{^}}bfi_sha256_ma:
-; R600: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
-; R600: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
-
-; GCN: s_and_b32
-; GCN: s_or_b32
-; GCN: s_and_b32
-; GCN: s_or_b32
 define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+; GFX7-LABEL: bfi_sha256_ma:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
+; GFX7-NEXT:    s_load_dword s6, s[0:1], 0xd
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_and_b32 s7, s4, s6
+; GFX7-NEXT:    s_or_b32 s4, s4, s6
+; GFX7-NEXT:    s_and_b32 s4, s5, s4
+; GFX7-NEXT:    s_or_b32 s4, s7, s4
+; GFX7-NEXT:    v_mov_b32_e32 v0, s4
+; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: bfi_sha256_ma:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_and_b32 s5, s2, s4
+; GFX8-NEXT:    s_or_b32 s2, s2, s4
+; GFX8-NEXT:    s_and_b32 s2, s3, s2
+; GFX8-NEXT:    s_or_b32 s2, s5, s2
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-NEXT:    flat_store_dword v[0:1], v2
+; GFX8-NEXT:    s_endpgm
+;
+; GFX10-LABEL: bfi_sha256_ma:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_clause 0x2
+; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX10-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX10-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_or_b32 s5, s2, s4
+; GFX10-NEXT:    s_and_b32 s2, s2, s4
+; GFX10-NEXT:    s_and_b32 s3, s3, s5
+; GFX10-NEXT:    s_or_b32 s2, s2, s3
+; GFX10-NEXT:    v_mov_b32_e32 v1, s2
+; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT:    s_endpgm
+;
+; R600-LABEL: bfi_sha256_ma:
+; R600:       ; %bb.0: ; %entry
+; R600-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     XOR_INT * T0.W, KC0[2].Z, KC0[2].W,
+; R600-NEXT:     BFI_INT * T0.X, PV.W, KC0[3].X, KC0[2].W,
+; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 entry:
   %0 = and i32 %x, %z
   %1 = or i32 %x, %z
@@ -58,24 +209,66 @@ entry:
   ret void
 }
 
-; FUNC-LABEL: {{^}}v_bitselect_v2i32_pat1:
-; GCN: s_waitcnt
-; GCN-NEXT: v_bfi_b32 v0, v2, v0, v4
-; GCN-NEXT: v_bfi_b32 v1, v3, v1, v5
-; GCN-NEXT: s_setpc_b64
 define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %mask) {
+; GFX7-LABEL: v_bitselect_v2i32_pat1:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_bfi_b32 v0, v2, v0, v4
+; GFX7-NEXT:    v_bfi_b32 v1, v3, v1, v5
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_bitselect_v2i32_pat1:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_bfi_b32 v0, v2, v0, v4
+; GFX8-NEXT:    v_bfi_b32 v1, v3, v1, v5
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_bitselect_v2i32_pat1:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, v4
+; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, v5
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; R600-LABEL: v_bitselect_v2i32_pat1:
+; R600:       ; %bb.0:
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
   %xor.0 = xor <2 x i32> %a, %mask
   %and = and <2 x i32> %xor.0, %b
   %bitselect = xor <2 x i32> %and, %mask
   ret <2 x i32> %bitselect
 }
 
-; FUNC-LABEL: {{^}}v_bitselect_i64_pat_0:
-; GCN: s_waitcnt
-; GCN-NEXT: v_bfi_b32 v1, v1, v3, v5
-; GCN-NEXT: v_bfi_b32 v0, v0, v2, v4
-; GCN-NEXT: s_setpc_b64
 define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
+; GFX7-LABEL: v_bitselect_i64_pat_0:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_bfi_b32 v1, v1, v3, v5
+; GFX7-NEXT:    v_bfi_b32 v0, v0, v2, v4
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_bitselect_i64_pat_0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_bfi_b32 v1, v1, v3, v5
+; GFX8-NEXT:    v_bfi_b32 v0, v0, v2, v4
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_bitselect_i64_pat_0:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_bfi_b32 v0, v0, v2, v4
+; GFX10-NEXT:    v_bfi_b32 v1, v1, v3, v5
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; R600-LABEL: v_bitselect_i64_pat_0:
+; R600:       ; %bb.0:
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
   %and0 = and i64 %a, %b
   %not.a = xor i64 %a, -1
   %and1 = and i64 %not.a, %mask
@@ -83,36 +276,105 @@ define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
   ret i64 %bitselect
 }
 
-; FUNC-LABEL: {{^}}v_bitselect_i64_pat_1:
-; GCN: s_waitcnt
-; GCN-NEXT: v_bfi_b32 v1, v3, v1, v5
-; GCN-NEXT: v_bfi_b32 v0, v2, v0, v4
-; GCN-NEXT: s_setpc_b64
 define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
+; GFX7-LABEL: v_bitselect_i64_pat_1:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_bfi_b32 v1, v3, v1, v5
+; GFX7-NEXT:    v_bfi_b32 v0, v2, v0, v4
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_bitselect_i64_pat_1:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_bfi_b32 v1, v3, v1, v5
+; GFX8-NEXT:    v_bfi_b32 v0, v2, v0, v4
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_bitselect_i64_pat_1:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, v4
+; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, v5
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; R600-LABEL: v_bitselect_i64_pat_1:
+; R600:       ; %bb.0:
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
   ret i64 %bitselect
 }
 
-; FUNC-LABEL: {{^}}v_bitselect_i64_pat_2:
-; GCN: s_waitcnt
-; GCN-DAG: v_bfi_b32 v0, v2, v0, v4
-; GCN-DAG: v_bfi_b32 v1, v3, v1, v5
-; GCN-NEXT: s_setpc_b64
 define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
+; GFX7-LABEL: v_bitselect_i64_pat_2:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_bfi_b32 v1, v3, v1, v5
+; GFX7-NEXT:    v_bfi_b32 v0, v2, v0, v4
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_bitselect_i64_pat_2:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_bfi_b32 v1, v3, v1, v5
+; GFX8-NEXT:    v_bfi_b32 v0, v2, v0, v4
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_bitselect_i64_pat_2:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, v4
+; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, v5
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; R600-LABEL: v_bitselect_i64_pat_2:
+; R600:       ; %bb.0:
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
   ret i64 %bitselect
 }
 
-; FUNC-LABEL: {{^}}v_bfi_sha256_ma_i64:
-; GCN-DAG: v_xor_b32_e32 v1, v1, v3
-; GCN-DAG: v_xor_b32_e32 v0, v0, v2
-; GCN-DAG: v_bfi_b32 v1, v1, v5, v3
-; GCN-DAG: v_bfi_b32 v0, v0, v4, v2
 define i64 @v_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
+; GFX7-LABEL: v_bfi_sha256_ma_i64:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX7-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX7-NEXT:    v_bfi_b32 v1, v1, v5, v3
+; GFX7-NEXT:    v_bfi_b32 v0, v0, v4, v2
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_bfi_sha256_ma_i64:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX8-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX8-NEXT:    v_bfi_b32 v1, v1, v5, v3
+; GFX8-NEXT:    v_bfi_b32 v0, v0, v4, v2
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_bfi_sha256_ma_i64:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v2
+; GFX10-NEXT:    v_xor_b32_e32 v1, v1, v3
+; GFX10-NEXT:    v_bfi_b32 v0, v0, v4, v2
+; GFX10-NEXT:    v_bfi_b32 v1, v1, v5, v3
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; R600-LABEL: v_bfi_sha256_ma_i64:
+; R600:       ; %bb.0: ; %entry
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
 entry:
   %and0 = and i64 %x, %z
   %or0 = or i64 %x, %z
@@ -121,12 +383,72 @@ entry:
   ret i64 %or1
 }
 
-; FIXME: Should leave as 64-bit SALU ops
-; FUNC-LABEL: {{^}}s_bitselect_i64_pat_0:
-; GCN: s_and_b64
-; GCN: s_andn2_b64
-; GCN: s_or_b64
 define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
+; GFX7-LABEL: s_bitselect_i64_pat_0:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_and_b64 s[6:7], s[4:5], s[6:7]
+; GFX7-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[4:5]
+; GFX7-NEXT:    s_or_b64 s[0:1], s[6:7], s[0:1]
+; GFX7-NEXT:    s_add_u32 s0, s0, 10
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    v_mov_b32_e32 v0, s0
+; GFX7-NEXT:    v_mov_b32_e32 v1, s1
+; GFX7-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: s_bitselect_i64_pat_0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_and_b64 s[2:3], s[4:5], s[6:7]
+; GFX8-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[4:5]
+; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
+; GFX8-NEXT:    s_add_u32 s0, s0, 10
+; GFX8-NEXT:    s_addc_u32 s1, s1, 0
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT:    s_endpgm
+;
+; GFX10-LABEL: s_bitselect_i64_pat_0:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_and_b64 s[2:3], s[4:5], s[6:7]
+; GFX10-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[4:5]
+; GFX10-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
+; GFX10-NEXT:    s_add_u32 s0, s0, 10
+; GFX10-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
+; GFX10-NEXT:    s_endpgm
+;
+; R600-LABEL: s_bitselect_i64_pat_0:
+; R600:       ; %bb.0:
+; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     MOV * T0.W, KC0[3].Y,
+; R600-NEXT:     BFI_INT * T0.W, KC0[2].Y, KC0[2].W, PV.W,
+; R600-NEXT:     MOV * T1.W, KC0[3].Z,
+; R600-NEXT:     BFI_INT T1.W, KC0[2].Z, KC0[3].X, PV.W,
+; R600-NEXT:     ADDC_UINT * T2.W, T0.W, literal.x,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
+; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
+; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
+; R600-NEXT:     MOV * T1.X, literal.y,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
   %and0 = and i64 %a, %b
   %not.a = xor i64 %a, -1
   %and1 = and i64 %not.a, %mask
@@ -136,11 +458,72 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
   ret void
 }
 
-; FUNC-LABEL: {{^}}s_bitselect_i64_pat_1:
-; GCN: s_xor_b64
-; GCN: s_and_b64
-; GCN: s_xor_b64
 define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
+; GFX7-LABEL: s_bitselect_i64_pat_1:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], s[0:1]
+; GFX7-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
+; GFX7-NEXT:    s_xor_b64 s[0:1], s[4:5], s[0:1]
+; GFX7-NEXT:    s_add_u32 s0, s0, 10
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    v_mov_b32_e32 v0, s0
+; GFX7-NEXT:    v_mov_b32_e32 v1, s1
+; GFX7-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: s_bitselect_i64_pat_1:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
+; GFX8-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
+; GFX8-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
+; GFX8-NEXT:    s_add_u32 s0, s0, 10
+; GFX8-NEXT:    s_addc_u32 s1, s1, 0
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT:    s_endpgm
+;
+; GFX10-LABEL: s_bitselect_i64_pat_1:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
+; GFX10-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
+; GFX10-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
+; GFX10-NEXT:    s_add_u32 s0, s0, 10
+; GFX10-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
+; GFX10-NEXT:    s_endpgm
+;
+; R600-LABEL: s_bitselect_i64_pat_1:
+; R600:       ; %bb.0:
+; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     MOV * T0.W, KC0[3].Y,
+; R600-NEXT:     BFI_INT * T0.W, KC0[2].W, KC0[2].Y, PV.W,
+; R600-NEXT:     MOV * T1.W, KC0[3].Z,
+; R600-NEXT:     BFI_INT T1.W, KC0[3].X, KC0[2].Z, PV.W,
+; R600-NEXT:     ADDC_UINT * T2.W, T0.W, literal.x,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
+; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
+; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
+; R600-NEXT:     MOV * T1.X, literal.y,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
@@ -150,11 +533,72 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
   ret void
 }
 
-; FUNC-LABEL: {{^}}s_bitselect_i64_pat_2:
-; GCN: s_xor_b64
-; GCN: s_and_b64
-; GCN: s_xor_b64
 define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
+; GFX7-LABEL: s_bitselect_i64_pat_2:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_xor_b64 s[4:5], s[4:5], s[0:1]
+; GFX7-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
+; GFX7-NEXT:    s_xor_b64 s[0:1], s[4:5], s[0:1]
+; GFX7-NEXT:    s_add_u32 s0, s0, 10
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    v_mov_b32_e32 v0, s0
+; GFX7-NEXT:    v_mov_b32_e32 v1, s1
+; GFX7-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: s_bitselect_i64_pat_2:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
+; GFX8-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
+; GFX8-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
+; GFX8-NEXT:    s_add_u32 s0, s0, 10
+; GFX8-NEXT:    s_addc_u32 s1, s1, 0
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT:    s_endpgm
+;
+; GFX10-LABEL: s_bitselect_i64_pat_2:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
+; GFX10-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
+; GFX10-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
+; GFX10-NEXT:    s_add_u32 s0, s0, 10
+; GFX10-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
+; GFX10-NEXT:    s_endpgm
+;
+; R600-LABEL: s_bitselect_i64_pat_2:
+; R600:       ; %bb.0:
+; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     MOV * T0.W, KC0[3].Y,
+; R600-NEXT:     BFI_INT * T0.W, KC0[2].W, KC0[2].Y, PV.W,
+; R600-NEXT:     MOV * T1.W, KC0[3].Z,
+; R600-NEXT:     BFI_INT T1.W, KC0[3].X, KC0[2].Z, PV.W,
+; R600-NEXT:     ADDC_UINT * T2.W, T0.W, literal.x,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
+; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
+; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
+; R600-NEXT:     MOV * T1.X, literal.y,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
@@ -164,12 +608,75 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
   ret void
 }
 
-; FUNC-LABEL: {{^}}s_bfi_sha256_ma_i64:
-; GCN: s_and_b64
-; GCN: s_or_b64
-; GCN: s_and_b64
-; GCN: s_or_b64
 define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
+; GFX7-LABEL: s_bfi_sha256_ma_i64:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
+; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
+; GFX7-NEXT:    s_mov_b32 s3, 0xf000
+; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    s_and_b64 s[8:9], s[4:5], s[0:1]
+; GFX7-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
+; GFX7-NEXT:    s_and_b64 s[0:1], s[6:7], s[0:1]
+; GFX7-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
+; GFX7-NEXT:    s_add_u32 s0, s0, 10
+; GFX7-NEXT:    s_addc_u32 s1, s1, 0
+; GFX7-NEXT:    v_mov_b32_e32 v0, s0
+; GFX7-NEXT:    v_mov_b32_e32 v1, s1
+; GFX7-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX7-NEXT:    s_endpgm
+;
+; GFX8-LABEL: s_bfi_sha256_ma_i64:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    s_and_b64 s[2:3], s[4:5], s[0:1]
+; GFX8-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
+; GFX8-NEXT:    s_and_b64 s[0:1], s[6:7], s[0:1]
+; GFX8-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
+; GFX8-NEXT:    s_add_u32 s0, s0, 10
+; GFX8-NEXT:    s_addc_u32 s1, s1, 0
+; GFX8-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GFX8-NEXT:    s_endpgm
+;
+; GFX10-LABEL: s_bfi_sha256_ma_i64:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_clause 0x1
+; GFX10-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX10-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-NEXT:    s_or_b64 s[2:3], s[4:5], s[0:1]
+; GFX10-NEXT:    s_and_b64 s[0:1], s[4:5], s[0:1]
+; GFX10-NEXT:    s_and_b64 s[2:3], s[6:7], s[2:3]
+; GFX10-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
+; GFX10-NEXT:    s_add_u32 s0, s0, 10
+; GFX10-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
+; GFX10-NEXT:    s_endpgm
+;
+; R600-LABEL: s_bfi_sha256_ma_i64:
+; R600:       ; %bb.0: ; %entry
+; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     XOR_INT * T0.W, KC0[2].Y, KC0[2].W,
+; R600-NEXT:     BFI_INT T0.W, PV.W, KC0[3].Y, KC0[2].W,
+; R600-NEXT:     XOR_INT * T1.W, KC0[2].Z, KC0[3].X,
+; R600-NEXT:     BFI_INT T1.W, PS, KC0[3].Z, KC0[3].X,
+; R600-NEXT:     ADDC_UINT * T2.W, PV.W, literal.x,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
+; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
+; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
+; R600-NEXT:     MOV * T1.X, literal.y,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
 entry:
   %and0 = and i64 %x, %z
   %or0 = or i64 %x, %z

From eb88e793ff579071e03766970e46a5d60d77cf7c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 19 Jan 2022 13:45:30 -0500
Subject: [PATCH 779/946] AMDGPU: Add some additional test coverage for BFI
 matching

Try to stress constant bus restriction enforcement since some of these
are broken for GlobalISel. Split the r600 test because some of these
cases don't compile (and all the ones using return values are
discarded).
---
 llvm/test/CodeGen/AMDGPU/bfi_int.ll      | 778 ++++++++++++++++++-----
 llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll | 237 +++++++
 2 files changed, 871 insertions(+), 144 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll

diff --git a/llvm/test/CodeGen/AMDGPU/bfi_int.ll b/llvm/test/CodeGen/AMDGPU/bfi_int.ll
index a046170f79e38..35b13804990a0 100644
--- a/llvm/test/CodeGen/AMDGPU/bfi_int.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfi_int.ll
@@ -2,13 +2,12 @@
 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7 %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefixes=R600 %s
 
 ; BFI_INT Definition pattern from ISA docs
 ; (y & x) | (z & ~x)
 ;
-define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
-; GFX7-LABEL: bfi_def:
+define amdgpu_kernel void @s_bfi_def_i32(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+; GFX7-LABEL: s_bfi_def_i32:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
 ; GFX7-NEXT:    s_load_dword s6, s[0:1], 0xd
@@ -23,7 +22,7 @@ define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %
 ; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
 ;
-; GFX8-LABEL: bfi_def:
+; GFX8-LABEL: s_bfi_def_i32:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
 ; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x34
@@ -38,7 +37,7 @@ define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
 ;
-; GFX10-LABEL: bfi_def:
+; GFX10-LABEL: s_bfi_def_i32:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_clause 0x2
 ; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
@@ -52,17 +51,6 @@ define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
-;
-; R600-LABEL: bfi_def:
-; R600:       ; %bb.0: ; %entry
-; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
-; R600-NEXT:    CF_END
-; R600-NEXT:    PAD
-; R600-NEXT:    ALU clause starting at 4:
-; R600-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
-; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; R600-NEXT:     BFI_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X,
 entry:
   %0 = xor i32 %x, -1
   %1 = and i32 %z, %0
@@ -72,10 +60,37 @@ entry:
   ret void
 }
 
+define i32 @v_bfi_def_i32(i32 %x, i32 %y, i32 %z) {
+; GFX7-LABEL: v_bfi_def_i32:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_bfi_b32 v0, v0, v1, v2
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_bfi_def_i32:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_bfi_b32 v0, v0, v1, v2
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_bfi_def_i32:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_bfi_b32 v0, v0, v1, v2
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = xor i32 %x, -1
+  %1 = and i32 %z, %0
+  %2 = and i32 %y, %x
+  %3 = or i32 %1, %2
+  ret i32 %3
+}
+
 ; SHA-256 Ch function
 ; z ^ (x & (y ^ z))
-define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
-; GFX7-LABEL: bfi_sha256_ch:
+define amdgpu_kernel void @s_bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+; GFX7-LABEL: s_bfi_sha256_ch:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
 ; GFX7-NEXT:    s_load_dword s6, s[0:1], 0xd
@@ -90,7 +105,7 @@ define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y,
 ; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
 ;
-; GFX8-LABEL: bfi_sha256_ch:
+; GFX8-LABEL: s_bfi_sha256_ch:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
 ; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x34
@@ -105,7 +120,7 @@ define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y,
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
 ;
-; GFX10-LABEL: bfi_sha256_ch:
+; GFX10-LABEL: s_bfi_sha256_ch:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_clause 0x2
 ; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
@@ -119,17 +134,6 @@ define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y,
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
-;
-; R600-LABEL: bfi_sha256_ch:
-; R600:       ; %bb.0: ; %entry
-; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
-; R600-NEXT:    CF_END
-; R600-NEXT:    PAD
-; R600-NEXT:    ALU clause starting at 4:
-; R600-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
-; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
-; R600-NEXT:     BFI_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X,
 entry:
   %0 = xor i32 %y, %z
   %1 = and i32 %x, %0
@@ -138,10 +142,180 @@ entry:
   ret void
 }
 
+define i32 @v_bfi_sha256_ch(i32 %x, i32 %y, i32 %z) {
+; GFX7-LABEL: v_bfi_sha256_ch:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_bfi_b32 v0, v0, v1, v2
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_bfi_sha256_ch:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_bfi_b32 v0, v0, v1, v2
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_bfi_sha256_ch:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_bfi_b32 v0, v0, v1, v2
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = xor i32 %y, %z
+  %1 = and i32 %x, %0
+  %2 = xor i32 %z, %1
+  ret i32 %2
+}
+
+define amdgpu_ps float @v_s_s_bfi_sha256_ch(i32 %x, i32 inreg %y, i32 inreg %z) {
+; GFX7-LABEL: v_s_s_bfi_sha256_ch:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    v_mov_b32_e32 v1, s1
+; GFX7-NEXT:    v_bfi_b32 v0, v0, s0, v1
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: v_s_s_bfi_sha256_ch:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    v_bfi_b32 v0, v0, s0, v1
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: v_s_s_bfi_sha256_ch:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, s1
+; GFX10-NEXT:    ; return to shader part epilog
+entry:
+  %xor0 = xor i32 %y, %z
+  %and = and i32 %x, %xor0
+  %xor1 = xor i32 %z, %and
+  %cast = bitcast i32 %xor1 to float
+  ret float %cast
+}
+
+define amdgpu_ps float @s_v_s_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 inreg %z) {
+; GFX7-LABEL: s_v_s_bfi_sha256_ch:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    v_mov_b32_e32 v1, s1
+; GFX7-NEXT:    v_bfi_b32 v0, s0, v0, v1
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: s_v_s_bfi_sha256_ch:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    v_bfi_b32 v0, s0, v0, v1
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: s_v_s_bfi_sha256_ch:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, s1
+; GFX10-NEXT:    ; return to shader part epilog
+entry:
+  %xor0 = xor i32 %y, %z
+  %and = and i32 %x, %xor0
+  %xor1 = xor i32 %z, %and
+  %cast = bitcast i32 %xor1 to float
+  ret float %cast
+}
+
+define amdgpu_ps float @s_s_v_bfi_sha256_ch(i32 inreg %x, i32 inreg %y, i32 %z) {
+; GFX7-LABEL: s_s_v_bfi_sha256_ch:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    v_mov_b32_e32 v1, s1
+; GFX7-NEXT:    v_bfi_b32 v0, s0, v1, v0
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: s_s_v_bfi_sha256_ch:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-NEXT:    v_bfi_b32 v0, s0, v1, v0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: s_s_v_bfi_sha256_ch:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    v_bfi_b32 v0, s0, s1, v0
+; GFX10-NEXT:    ; return to shader part epilog
+entry:
+  %xor0 = xor i32 %y, %z
+  %and = and i32 %x, %xor0
+  %xor1 = xor i32 %z, %and
+  %cast = bitcast i32 %xor1 to float
+  ret float %cast
+}
+
+define amdgpu_ps float @s_v_v_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 %z) {
+; GFX7-LABEL: s_v_v_bfi_sha256_ch:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    v_bfi_b32 v0, s0, v0, v1
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: s_v_v_bfi_sha256_ch:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    v_bfi_b32 v0, s0, v0, v1
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: s_v_v_bfi_sha256_ch:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, v1
+; GFX10-NEXT:    ; return to shader part epilog
+entry:
+  %xor0 = xor i32 %y, %z
+  %and = and i32 %x, %xor0
+  %xor1 = xor i32 %z, %and
+  %cast = bitcast i32 %xor1 to float
+  ret float %cast
+}
+
+define amdgpu_ps float @v_s_v_bfi_sha256_ch(i32 %x, i32 inreg %y, i32 %z) {
+; GFX7-LABEL: v_s_v_bfi_sha256_ch:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    v_bfi_b32 v0, v0, s0, v1
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: v_s_v_bfi_sha256_ch:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    v_bfi_b32 v0, v0, s0, v1
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: v_s_v_bfi_sha256_ch:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, v1
+; GFX10-NEXT:    ; return to shader part epilog
+entry:
+  %xor0 = xor i32 %y, %z
+  %and = and i32 %x, %xor0
+  %xor1 = xor i32 %z, %and
+  %cast = bitcast i32 %xor1 to float
+  ret float %cast
+}
+
+define amdgpu_ps float @v_v_s_bfi_sha256_ch(i32 %x, i32 %y, i32 inreg %z) {
+; GFX7-LABEL: v_v_s_bfi_sha256_ch:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    v_bfi_b32 v0, v0, v1, s0
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: v_v_s_bfi_sha256_ch:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    v_bfi_b32 v0, v0, v1, s0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: v_v_s_bfi_sha256_ch:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    v_bfi_b32 v0, v0, v1, s0
+; GFX10-NEXT:    ; return to shader part epilog
+entry:
+  %xor0 = xor i32 %y, %z
+  %and = and i32 %x, %xor0
+  %xor1 = xor i32 %z, %and
+  %cast = bitcast i32 %xor1 to float
+  ret float %cast
+}
+
 ; SHA-256 Ma function
 ; ((x & z) | (y & (x | z)))
-define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
-; GFX7-LABEL: bfi_sha256_ma:
+define amdgpu_kernel void @s_bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+; GFX7-LABEL: s_bfi_sha256_ma:
 ; GFX7:       ; %bb.0: ; %entry
 ; GFX7-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xb
 ; GFX7-NEXT:    s_load_dword s6, s[0:1], 0xd
@@ -157,7 +331,7 @@ define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y,
 ; GFX7-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
 ;
-; GFX8-LABEL: bfi_sha256_ma:
+; GFX8-LABEL: s_bfi_sha256_ma:
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
 ; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x34
@@ -173,7 +347,7 @@ define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y,
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
 ;
-; GFX10-LABEL: bfi_sha256_ma:
+; GFX10-LABEL: s_bfi_sha256_ma:
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    s_clause 0x2
 ; GFX10-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
@@ -188,18 +362,6 @@ define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y,
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
-;
-; R600-LABEL: bfi_sha256_ma:
-; R600:       ; %bb.0: ; %entry
-; R600-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
-; R600-NEXT:    CF_END
-; R600-NEXT:    PAD
-; R600-NEXT:    ALU clause starting at 4:
-; R600-NEXT:     XOR_INT * T0.W, KC0[2].Z, KC0[2].W,
-; R600-NEXT:     BFI_INT * T0.X, PV.W, KC0[3].X, KC0[2].W,
-; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
-; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
 entry:
   %0 = and i32 %x, %z
   %1 = or i32 %x, %z
@@ -209,6 +371,36 @@ entry:
   ret void
 }
 
+define i32 @v_bfi_sha256_ma(i32 %x, i32 %y, i32 %z) {
+; GFX7-LABEL: v_bfi_sha256_ma:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_xor_b32_e32 v0, v0, v1
+; GFX7-NEXT:    v_bfi_b32 v0, v0, v2, v1
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_bfi_sha256_ma:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_xor_b32_e32 v0, v0, v1
+; GFX8-NEXT:    v_bfi_b32 v0, v0, v2, v1
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_bfi_sha256_ma:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v1
+; GFX10-NEXT:    v_bfi_b32 v0, v0, v2, v1
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %0 = and i32 %x, %z
+  %1 = or i32 %x, %z
+  %2 = and i32 %y, %1
+  %3 = or i32 %0, %2
+  ret i32 %3
+}
+
 define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %mask) {
 ; GFX7-LABEL: v_bitselect_v2i32_pat1:
 ; GFX7:       ; %bb.0:
@@ -231,11 +423,6 @@ define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %
 ; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, v4
 ; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, v5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; R600-LABEL: v_bitselect_v2i32_pat1:
-; R600:       ; %bb.0:
-; R600-NEXT:    CF_END
-; R600-NEXT:    PAD
   %xor.0 = xor <2 x i32> %a, %mask
   %and = and <2 x i32> %xor.0, %b
   %bitselect = xor <2 x i32> %and, %mask
@@ -264,11 +451,6 @@ define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, v2, v4
 ; GFX10-NEXT:    v_bfi_b32 v1, v1, v3, v5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; R600-LABEL: v_bitselect_i64_pat_0:
-; R600:       ; %bb.0:
-; R600-NEXT:    CF_END
-; R600-NEXT:    PAD
   %and0 = and i64 %a, %b
   %not.a = xor i64 %a, -1
   %and1 = and i64 %not.a, %mask
@@ -276,6 +458,174 @@ define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
   ret i64 %bitselect
 }
 
+define amdgpu_ps <2 x float> @v_s_s_bitselect_i64_pat_0(i64 %a, i64 inreg %b, i64 inreg %mask) {
+; GFX7-LABEL: v_s_s_bitselect_i64_pat_0:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    v_mov_b32_e32 v2, s3
+; GFX7-NEXT:    v_bfi_b32 v1, v1, s1, v2
+; GFX7-NEXT:    v_mov_b32_e32 v2, s2
+; GFX7-NEXT:    v_bfi_b32 v0, v0, s0, v2
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: v_s_s_bitselect_i64_pat_0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_mov_b32_e32 v2, s3
+; GFX8-NEXT:    v_bfi_b32 v1, v1, s1, v2
+; GFX8-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-NEXT:    v_bfi_b32 v0, v0, s0, v2
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: v_s_s_bitselect_i64_pat_0:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, s2
+; GFX10-NEXT:    v_bfi_b32 v1, v1, s1, s3
+; GFX10-NEXT:    ; return to shader part epilog
+  %and0 = and i64 %a, %b
+  %not.a = xor i64 %a, -1
+  %and1 = and i64 %not.a, %mask
+  %bitselect = or i64 %and0, %and1
+  %cast = bitcast i64 %bitselect to <2 x float>
+  ret <2 x float> %cast
+}
+
+define amdgpu_ps <2 x float> @s_v_s_bitselect_i64_pat_0(i64 inreg %a, i64 %b, i64 inreg %mask) {
+; GFX7-LABEL: s_v_s_bitselect_i64_pat_0:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    v_mov_b32_e32 v2, s3
+; GFX7-NEXT:    v_bfi_b32 v1, s1, v1, v2
+; GFX7-NEXT:    v_mov_b32_e32 v2, s2
+; GFX7-NEXT:    v_bfi_b32 v0, s0, v0, v2
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: s_v_s_bitselect_i64_pat_0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_mov_b32_e32 v2, s3
+; GFX8-NEXT:    v_bfi_b32 v1, s1, v1, v2
+; GFX8-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-NEXT:    v_bfi_b32 v0, s0, v0, v2
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: s_v_s_bitselect_i64_pat_0:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, s2
+; GFX10-NEXT:    v_bfi_b32 v1, s1, v1, s3
+; GFX10-NEXT:    ; return to shader part epilog
+  %and0 = and i64 %a, %b
+  %not.a = xor i64 %a, -1
+  %and1 = and i64 %not.a, %mask
+  %bitselect = or i64 %and0, %and1
+  %cast = bitcast i64 %bitselect to <2 x float>
+  ret <2 x float> %cast
+}
+
+define amdgpu_ps <2 x float> @s_s_v_bitselect_i64_pat_0(i64 inreg %a, i64 inreg %b, i64 %mask) {
+; GFX7-LABEL: s_s_v_bitselect_i64_pat_0:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    v_mov_b32_e32 v2, s3
+; GFX7-NEXT:    v_bfi_b32 v1, s1, v2, v1
+; GFX7-NEXT:    v_mov_b32_e32 v2, s2
+; GFX7-NEXT:    v_bfi_b32 v0, s0, v2, v0
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: s_s_v_bitselect_i64_pat_0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_mov_b32_e32 v2, s3
+; GFX8-NEXT:    v_bfi_b32 v1, s1, v2, v1
+; GFX8-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-NEXT:    v_bfi_b32 v0, s0, v2, v0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: s_s_v_bitselect_i64_pat_0:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_bfi_b32 v0, s0, s2, v0
+; GFX10-NEXT:    v_bfi_b32 v1, s1, s3, v1
+; GFX10-NEXT:    ; return to shader part epilog
+  %and0 = and i64 %a, %b
+  %not.a = xor i64 %a, -1
+  %and1 = and i64 %not.a, %mask
+  %bitselect = or i64 %and0, %and1
+  %cast = bitcast i64 %bitselect to <2 x float>
+  ret <2 x float> %cast
+}
+
+define amdgpu_ps <2 x float> @v_v_s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 inreg %mask) {
+; GFX7-LABEL: v_v_s_bitselect_i64_pat_0:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    v_bfi_b32 v1, v1, v3, s1
+; GFX7-NEXT:    v_bfi_b32 v0, v0, v2, s0
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: v_v_s_bitselect_i64_pat_0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_bfi_b32 v1, v1, v3, s1
+; GFX8-NEXT:    v_bfi_b32 v0, v0, v2, s0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: v_v_s_bitselect_i64_pat_0:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_bfi_b32 v0, v0, v2, s0
+; GFX10-NEXT:    v_bfi_b32 v1, v1, v3, s1
+; GFX10-NEXT:    ; return to shader part epilog
+  %and0 = and i64 %a, %b
+  %not.a = xor i64 %a, -1
+  %and1 = and i64 %not.a, %mask
+  %bitselect = or i64 %and0, %and1
+  %cast = bitcast i64 %bitselect to <2 x float>
+  ret <2 x float> %cast
+}
+
+define amdgpu_ps <2 x float> @v_s_v_bitselect_i64_pat_0(i64 %a, i64 inreg %b, i64 %mask) {
+; GFX7-LABEL: v_s_v_bitselect_i64_pat_0:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    v_bfi_b32 v1, v1, s1, v3
+; GFX7-NEXT:    v_bfi_b32 v0, v0, s0, v2
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: v_s_v_bitselect_i64_pat_0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_bfi_b32 v1, v1, s1, v3
+; GFX8-NEXT:    v_bfi_b32 v0, v0, s0, v2
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: v_s_v_bitselect_i64_pat_0:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, v2
+; GFX10-NEXT:    v_bfi_b32 v1, v1, s1, v3
+; GFX10-NEXT:    ; return to shader part epilog
+  %and0 = and i64 %a, %b
+  %not.a = xor i64 %a, -1
+  %and1 = and i64 %not.a, %mask
+  %bitselect = or i64 %and0, %and1
+  %cast = bitcast i64 %bitselect to <2 x float>
+  ret <2 x float> %cast
+}
+
+define amdgpu_ps <2 x float> @s_v_v_bitselect_i64_pat_0(i64 inreg %a, i64 %b, i64 %mask) {
+; GFX7-LABEL: s_v_v_bitselect_i64_pat_0:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    v_bfi_b32 v1, s1, v1, v3
+; GFX7-NEXT:    v_bfi_b32 v0, s0, v0, v2
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: s_v_v_bitselect_i64_pat_0:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_bfi_b32 v1, s1, v1, v3
+; GFX8-NEXT:    v_bfi_b32 v0, s0, v0, v2
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: s_v_v_bitselect_i64_pat_0:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, v2
+; GFX10-NEXT:    v_bfi_b32 v1, s1, v1, v3
+; GFX10-NEXT:    ; return to shader part epilog
+  %and0 = and i64 %a, %b
+  %not.a = xor i64 %a, -1
+  %and1 = and i64 %not.a, %mask
+  %bitselect = or i64 %and0, %and1
+  %cast = bitcast i64 %bitselect to <2 x float>
+  ret <2 x float> %cast
+}
+
 define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
 ; GFX7-LABEL: v_bitselect_i64_pat_1:
 ; GFX7:       ; %bb.0:
@@ -298,17 +648,99 @@ define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
 ; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, v4
 ; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, v5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; R600-LABEL: v_bitselect_i64_pat_1:
-; R600:       ; %bb.0:
-; R600-NEXT:    CF_END
-; R600-NEXT:    PAD
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
   ret i64 %bitselect
 }
 
+define amdgpu_ps <2 x float> @v_s_s_bitselect_i64_pat_1(i64 %a, i64 inreg %b, i64 inreg %mask) {
+; GFX7-LABEL: v_s_s_bitselect_i64_pat_1:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    v_mov_b32_e32 v2, s3
+; GFX7-NEXT:    v_bfi_b32 v1, s1, v1, v2
+; GFX7-NEXT:    v_mov_b32_e32 v2, s2
+; GFX7-NEXT:    v_bfi_b32 v0, s0, v0, v2
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: v_s_s_bitselect_i64_pat_1:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_mov_b32_e32 v2, s3
+; GFX8-NEXT:    v_bfi_b32 v1, s1, v1, v2
+; GFX8-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-NEXT:    v_bfi_b32 v0, s0, v0, v2
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: v_s_s_bitselect_i64_pat_1:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, s2
+; GFX10-NEXT:    v_bfi_b32 v1, s1, v1, s3
+; GFX10-NEXT:    ; return to shader part epilog
+  %xor.0 = xor i64 %a, %mask
+  %and = and i64 %xor.0, %b
+  %bitselect = xor i64 %and, %mask
+  %cast = bitcast i64 %bitselect to <2 x float>
+  ret <2 x float> %cast
+}
+
+define amdgpu_ps <2 x float> @s_s_v_bitselect_i64_pat_1(i64 inreg %a, i64 inreg %b, i64 %mask) {
+; GFX7-LABEL: s_s_v_bitselect_i64_pat_1:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    v_mov_b32_e32 v2, s1
+; GFX7-NEXT:    v_bfi_b32 v1, s3, v2, v1
+; GFX7-NEXT:    v_mov_b32_e32 v2, s0
+; GFX7-NEXT:    v_bfi_b32 v0, s2, v2, v0
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: s_s_v_bitselect_i64_pat_1:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_mov_b32_e32 v2, s1
+; GFX8-NEXT:    v_bfi_b32 v1, s3, v2, v1
+; GFX8-NEXT:    v_mov_b32_e32 v2, s0
+; GFX8-NEXT:    v_bfi_b32 v0, s2, v2, v0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: s_s_v_bitselect_i64_pat_1:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_bfi_b32 v0, s2, s0, v0
+; GFX10-NEXT:    v_bfi_b32 v1, s3, s1, v1
+; GFX10-NEXT:    ; return to shader part epilog
+  %xor.0 = xor i64 %a, %mask
+  %and = and i64 %xor.0, %b
+  %bitselect = xor i64 %and, %mask
+  %cast = bitcast i64 %bitselect to <2 x float>
+  ret <2 x float> %cast
+}
+
+define amdgpu_ps <2 x float> @s_v_s_bitselect_i64_pat_1(i64 inreg %a, i64 %b, i64 inreg %mask) {
+; GFX7-LABEL: s_v_s_bitselect_i64_pat_1:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    v_mov_b32_e32 v2, s3
+; GFX7-NEXT:    v_bfi_b32 v1, v1, s1, v2
+; GFX7-NEXT:    v_mov_b32_e32 v2, s2
+; GFX7-NEXT:    v_bfi_b32 v0, v0, s0, v2
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: s_v_s_bitselect_i64_pat_1:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    v_mov_b32_e32 v2, s3
+; GFX8-NEXT:    v_bfi_b32 v1, v1, s1, v2
+; GFX8-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-NEXT:    v_bfi_b32 v0, v0, s0, v2
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: s_v_s_bitselect_i64_pat_1:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, s2
+; GFX10-NEXT:    v_bfi_b32 v1, v1, s1, s3
+; GFX10-NEXT:    ; return to shader part epilog
+  %xor.0 = xor i64 %a, %mask
+  %and = and i64 %xor.0, %b
+  %bitselect = xor i64 %and, %mask
+  %cast = bitcast i64 %bitselect to <2 x float>
+  ret <2 x float> %cast
+}
+
 define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
 ; GFX7-LABEL: v_bitselect_i64_pat_2:
 ; GFX7:       ; %bb.0:
@@ -331,11 +763,6 @@ define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
 ; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, v4
 ; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, v5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; R600-LABEL: v_bitselect_i64_pat_2:
-; R600:       ; %bb.0:
-; R600-NEXT:    CF_END
-; R600-NEXT:    PAD
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
@@ -370,11 +797,6 @@ define i64 @v_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, v4, v2
 ; GFX10-NEXT:    v_bfi_b32 v1, v1, v5, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
-;
-; R600-LABEL: v_bfi_sha256_ma_i64:
-; R600:       ; %bb.0: ; %entry
-; R600-NEXT:    CF_END
-; R600-NEXT:    PAD
 entry:
   %and0 = and i64 %x, %z
   %or0 = or i64 %x, %z
@@ -383,6 +805,146 @@ entry:
   ret i64 %or1
 }
 
+define amdgpu_ps <2 x float> @v_s_s_bfi_sha256_ma_i64(i64 %x, i64 inreg %y, i64 inreg %z) {
+; GFX7-LABEL: v_s_s_bfi_sha256_ma_i64:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    v_xor_b32_e32 v1, s1, v1
+; GFX7-NEXT:    v_mov_b32_e32 v2, s1
+; GFX7-NEXT:    v_bfi_b32 v1, v1, s3, v2
+; GFX7-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX7-NEXT:    v_mov_b32_e32 v2, s0
+; GFX7-NEXT:    v_bfi_b32 v0, v0, s2, v2
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: v_s_s_bfi_sha256_ma_i64:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    v_xor_b32_e32 v1, s1, v1
+; GFX8-NEXT:    v_mov_b32_e32 v2, s1
+; GFX8-NEXT:    v_bfi_b32 v1, v1, s3, v2
+; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX8-NEXT:    v_mov_b32_e32 v2, s0
+; GFX8-NEXT:    v_bfi_b32 v0, v0, s2, v2
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: v_s_s_bfi_sha256_ma_i64:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX10-NEXT:    v_xor_b32_e32 v1, s1, v1
+; GFX10-NEXT:    v_bfi_b32 v0, v0, s2, s0
+; GFX10-NEXT:    v_bfi_b32 v1, v1, s3, s1
+; GFX10-NEXT:    ; return to shader part epilog
+entry:
+  %and0 = and i64 %x, %z
+  %or0 = or i64 %x, %z
+  %and1 = and i64 %y, %or0
+  %or1 = or i64 %and0, %and1
+  %cast = bitcast i64 %or1 to <2 x float>
+  ret <2 x float> %cast
+}
+
+define amdgpu_ps <2 x float> @s_v_s_bfi_sha256_ma_i64(i64 inreg %x, i64 %y, i64 inreg %z) {
+; GFX7-LABEL: s_v_s_bfi_sha256_ma_i64:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    v_xor_b32_e32 v2, s1, v1
+; GFX7-NEXT:    v_bfi_b32 v1, v2, s3, v1
+; GFX7-NEXT:    v_xor_b32_e32 v2, s0, v0
+; GFX7-NEXT:    v_bfi_b32 v0, v2, s2, v0
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: s_v_s_bfi_sha256_ma_i64:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    v_xor_b32_e32 v2, s1, v1
+; GFX8-NEXT:    v_bfi_b32 v1, v2, s3, v1
+; GFX8-NEXT:    v_xor_b32_e32 v2, s0, v0
+; GFX8-NEXT:    v_bfi_b32 v0, v2, s2, v0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: s_v_s_bfi_sha256_ma_i64:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    v_xor_b32_e32 v2, s0, v0
+; GFX10-NEXT:    v_xor_b32_e32 v3, s1, v1
+; GFX10-NEXT:    v_bfi_b32 v0, v2, s2, v0
+; GFX10-NEXT:    v_bfi_b32 v1, v3, s3, v1
+; GFX10-NEXT:    ; return to shader part epilog
+entry:
+  %and0 = and i64 %x, %z
+  %or0 = or i64 %x, %z
+  %and1 = and i64 %y, %or0
+  %or1 = or i64 %and0, %and1
+  %cast = bitcast i64 %or1 to <2 x float>
+  ret <2 x float> %cast
+}
+
+define amdgpu_ps <2 x float> @s_s_v_bfi_sha256_ma_i64(i64 inreg %x, i64 inreg %y, i64 %z) {
+; GFX7-LABEL: s_s_v_bfi_sha256_ma_i64:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    v_mov_b32_e32 v2, s3
+; GFX7-NEXT:    v_xor_b32_e32 v2, s1, v2
+; GFX7-NEXT:    v_bfi_b32 v1, v2, v1, s3
+; GFX7-NEXT:    v_mov_b32_e32 v2, s2
+; GFX7-NEXT:    v_xor_b32_e32 v2, s0, v2
+; GFX7-NEXT:    v_bfi_b32 v0, v2, v0, s2
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: s_s_v_bfi_sha256_ma_i64:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    v_mov_b32_e32 v2, s3
+; GFX8-NEXT:    v_xor_b32_e32 v2, s1, v2
+; GFX8-NEXT:    v_bfi_b32 v1, v2, v1, s3
+; GFX8-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-NEXT:    v_xor_b32_e32 v2, s0, v2
+; GFX8-NEXT:    v_bfi_b32 v0, v2, v0, s2
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: s_s_v_bfi_sha256_ma_i64:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    v_xor_b32_e64 v2, s0, s2
+; GFX10-NEXT:    v_xor_b32_e64 v3, s1, s3
+; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, s2
+; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, s3
+; GFX10-NEXT:    ; return to shader part epilog
+entry:
+  %and0 = and i64 %x, %z
+  %or0 = or i64 %x, %z
+  %and1 = and i64 %y, %or0
+  %or1 = or i64 %and0, %and1
+  %cast = bitcast i64 %or1 to <2 x float>
+  ret <2 x float> %cast
+}
+
+define amdgpu_ps <2 x float> @v_s_v_bfi_sha256_ma_i64(i64 %x, i64 inreg %y, i64 %z) {
+; GFX7-LABEL: v_s_v_bfi_sha256_ma_i64:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    v_xor_b32_e32 v1, s1, v1
+; GFX7-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX7-NEXT:    v_bfi_b32 v1, v1, v3, s1
+; GFX7-NEXT:    v_bfi_b32 v0, v0, v2, s0
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: v_s_v_bfi_sha256_ma_i64:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    v_xor_b32_e32 v1, s1, v1
+; GFX8-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX8-NEXT:    v_bfi_b32 v1, v1, v3, s1
+; GFX8-NEXT:    v_bfi_b32 v0, v0, v2, s0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: v_s_v_bfi_sha256_ma_i64:
+; GFX10:       ; %bb.0: ; %entry
+; GFX10-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX10-NEXT:    v_xor_b32_e32 v1, s1, v1
+; GFX10-NEXT:    v_bfi_b32 v0, v0, v2, s0
+; GFX10-NEXT:    v_bfi_b32 v1, v1, v3, s1
+; GFX10-NEXT:    ; return to shader part epilog
+entry:
+  %and0 = and i64 %x, %z
+  %or0 = or i64 %x, %z
+  %and1 = and i64 %y, %or0
+  %or1 = or i64 %and0, %and1
+  %cast = bitcast i64 %or1 to <2 x float>
+  ret <2 x float> %cast
+}
+
 define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
 ; GFX7-LABEL: s_bitselect_i64_pat_0:
 ; GFX7:       ; %bb.0:
@@ -431,24 +993,6 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
 ; GFX10-NEXT:    s_endpgm
-;
-; R600-LABEL: s_bitselect_i64_pat_0:
-; R600:       ; %bb.0:
-; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
-; R600-NEXT:    CF_END
-; R600-NEXT:    PAD
-; R600-NEXT:    ALU clause starting at 4:
-; R600-NEXT:     MOV * T0.W, KC0[3].Y,
-; R600-NEXT:     BFI_INT * T0.W, KC0[2].Y, KC0[2].W, PV.W,
-; R600-NEXT:     MOV * T1.W, KC0[3].Z,
-; R600-NEXT:     BFI_INT T1.W, KC0[2].Z, KC0[3].X, PV.W,
-; R600-NEXT:     ADDC_UINT * T2.W, T0.W, literal.x,
-; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
-; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
-; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
-; R600-NEXT:     MOV * T1.X, literal.y,
-; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
   %and0 = and i64 %a, %b
   %not.a = xor i64 %a, -1
   %and1 = and i64 %not.a, %mask
@@ -506,24 +1050,6 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
 ; GFX10-NEXT:    s_endpgm
-;
-; R600-LABEL: s_bitselect_i64_pat_1:
-; R600:       ; %bb.0:
-; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
-; R600-NEXT:    CF_END
-; R600-NEXT:    PAD
-; R600-NEXT:    ALU clause starting at 4:
-; R600-NEXT:     MOV * T0.W, KC0[3].Y,
-; R600-NEXT:     BFI_INT * T0.W, KC0[2].W, KC0[2].Y, PV.W,
-; R600-NEXT:     MOV * T1.W, KC0[3].Z,
-; R600-NEXT:     BFI_INT T1.W, KC0[3].X, KC0[2].Z, PV.W,
-; R600-NEXT:     ADDC_UINT * T2.W, T0.W, literal.x,
-; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
-; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
-; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
-; R600-NEXT:     MOV * T1.X, literal.y,
-; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
@@ -581,24 +1107,6 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
 ; GFX10-NEXT:    s_endpgm
-;
-; R600-LABEL: s_bitselect_i64_pat_2:
-; R600:       ; %bb.0:
-; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
-; R600-NEXT:    CF_END
-; R600-NEXT:    PAD
-; R600-NEXT:    ALU clause starting at 4:
-; R600-NEXT:     MOV * T0.W, KC0[3].Y,
-; R600-NEXT:     BFI_INT * T0.W, KC0[2].W, KC0[2].Y, PV.W,
-; R600-NEXT:     MOV * T1.W, KC0[3].Z,
-; R600-NEXT:     BFI_INT T1.W, KC0[3].X, KC0[2].Z, PV.W,
-; R600-NEXT:     ADDC_UINT * T2.W, T0.W, literal.x,
-; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
-; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
-; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
-; R600-NEXT:     MOV * T1.X, literal.y,
-; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
@@ -659,24 +1167,6 @@ define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
 ; GFX10-NEXT:    s_endpgm
-;
-; R600-LABEL: s_bfi_sha256_ma_i64:
-; R600:       ; %bb.0: ; %entry
-; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
-; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
-; R600-NEXT:    CF_END
-; R600-NEXT:    PAD
-; R600-NEXT:    ALU clause starting at 4:
-; R600-NEXT:     XOR_INT * T0.W, KC0[2].Y, KC0[2].W,
-; R600-NEXT:     BFI_INT T0.W, PV.W, KC0[3].Y, KC0[2].W,
-; R600-NEXT:     XOR_INT * T1.W, KC0[2].Z, KC0[3].X,
-; R600-NEXT:     BFI_INT T1.W, PS, KC0[3].Z, KC0[3].X,
-; R600-NEXT:     ADDC_UINT * T2.W, PV.W, literal.x,
-; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
-; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
-; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
-; R600-NEXT:     MOV * T1.X, literal.y,
-; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
 entry:
   %and0 = and i64 %x, %z
   %or0 = or i64 %x, %z
diff --git a/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll b/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll
new file mode 100644
index 0000000000000..34eb088b16f49
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/bfi_int.r600.ll
@@ -0,0 +1,237 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -enable-var-scope -check-prefixes=R600 %s
+
+; BFI_INT Definition pattern from ISA docs
+; (y & x) | (z & ~x)
+;
+define amdgpu_kernel void @bfi_def(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+; R600-LABEL: bfi_def:
+; R600:       ; %bb.0: ; %entry
+; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
+; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; R600-NEXT:     BFI_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X,
+entry:
+  %0 = xor i32 %x, -1
+  %1 = and i32 %z, %0
+  %2 = and i32 %y, %x
+  %3 = or i32 %1, %2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+; SHA-256 Ch function
+; z ^ (x & (y ^ z))
+define amdgpu_kernel void @bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+; R600-LABEL: bfi_sha256_ch:
+; R600:       ; %bb.0: ; %entry
+; R600-NEXT:    ALU 2, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     LSHR * T0.X, KC0[2].Y, literal.x,
+; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+; R600-NEXT:     BFI_INT * T1.X, KC0[2].Z, KC0[2].W, KC0[3].X,
+entry:
+  %0 = xor i32 %y, %z
+  %1 = and i32 %x, %0
+  %2 = xor i32 %z, %1
+  store i32 %2, i32 addrspace(1)* %out
+  ret void
+}
+
+; SHA-256 Ma function
+; ((x & z) | (y & (x | z)))
+define amdgpu_kernel void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
+; R600-LABEL: bfi_sha256_ma:
+; R600:       ; %bb.0: ; %entry
+; R600-NEXT:    ALU 3, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     XOR_INT * T0.W, KC0[2].Z, KC0[2].W,
+; R600-NEXT:     BFI_INT * T0.X, PV.W, KC0[3].X, KC0[2].W,
+; R600-NEXT:     LSHR * T1.X, KC0[2].Y, literal.x,
+; R600-NEXT:    2(2.802597e-45), 0(0.000000e+00)
+entry:
+  %0 = and i32 %x, %z
+  %1 = or i32 %x, %z
+  %2 = and i32 %y, %1
+  %3 = or i32 %0, %2
+  store i32 %3, i32 addrspace(1)* %out
+  ret void
+}
+
+define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %mask) {
+; R600-LABEL: v_bitselect_v2i32_pat1:
+; R600:       ; %bb.0:
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+  %xor.0 = xor <2 x i32> %a, %mask
+  %and = and <2 x i32> %xor.0, %b
+  %bitselect = xor <2 x i32> %and, %mask
+  ret <2 x i32> %bitselect
+}
+
+define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
+; R600-LABEL: v_bitselect_i64_pat_0:
+; R600:       ; %bb.0:
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+  %and0 = and i64 %a, %b
+  %not.a = xor i64 %a, -1
+  %and1 = and i64 %not.a, %mask
+  %bitselect = or i64 %and0, %and1
+  ret i64 %bitselect
+}
+
+define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
+; R600-LABEL: v_bitselect_i64_pat_1:
+; R600:       ; %bb.0:
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+  %xor.0 = xor i64 %a, %mask
+  %and = and i64 %xor.0, %b
+  %bitselect = xor i64 %and, %mask
+  ret i64 %bitselect
+}
+
+define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
+; R600-LABEL: v_bitselect_i64_pat_2:
+; R600:       ; %bb.0:
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+  %xor.0 = xor i64 %a, %mask
+  %and = and i64 %xor.0, %b
+  %bitselect = xor i64 %and, %mask
+  ret i64 %bitselect
+}
+
+define i64 @v_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
+; R600-LABEL: v_bfi_sha256_ma_i64:
+; R600:       ; %bb.0: ; %entry
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+entry:
+  %and0 = and i64 %x, %z
+  %or0 = or i64 %x, %z
+  %and1 = and i64 %y, %or0
+  %or1 = or i64 %and0, %and1
+  ret i64 %or1
+}
+
+define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
+; R600-LABEL: s_bitselect_i64_pat_0:
+; R600:       ; %bb.0:
+; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     MOV * T0.W, KC0[3].Y,
+; R600-NEXT:     BFI_INT * T0.W, KC0[2].Y, KC0[2].W, PV.W,
+; R600-NEXT:     MOV * T1.W, KC0[3].Z,
+; R600-NEXT:     BFI_INT T1.W, KC0[2].Z, KC0[3].X, PV.W,
+; R600-NEXT:     ADDC_UINT * T2.W, T0.W, literal.x,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
+; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
+; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
+; R600-NEXT:     MOV * T1.X, literal.y,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
+  %and0 = and i64 %a, %b
+  %not.a = xor i64 %a, -1
+  %and1 = and i64 %not.a, %mask
+  %bitselect = or i64 %and0, %and1
+  %scalar.use = add i64 %bitselect, 10
+  store i64 %scalar.use, i64 addrspace(1)* undef
+  ret void
+}
+
+define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
+; R600-LABEL: s_bitselect_i64_pat_1:
+; R600:       ; %bb.0:
+; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     MOV * T0.W, KC0[3].Y,
+; R600-NEXT:     BFI_INT * T0.W, KC0[2].W, KC0[2].Y, PV.W,
+; R600-NEXT:     MOV * T1.W, KC0[3].Z,
+; R600-NEXT:     BFI_INT T1.W, KC0[3].X, KC0[2].Z, PV.W,
+; R600-NEXT:     ADDC_UINT * T2.W, T0.W, literal.x,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
+; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
+; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
+; R600-NEXT:     MOV * T1.X, literal.y,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
+  %xor.0 = xor i64 %a, %mask
+  %and = and i64 %xor.0, %b
+  %bitselect = xor i64 %and, %mask
+
+  %scalar.use = add i64 %bitselect, 10
+  store i64 %scalar.use, i64 addrspace(1)* undef
+  ret void
+}
+
+define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
+; R600-LABEL: s_bitselect_i64_pat_2:
+; R600:       ; %bb.0:
+; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     MOV * T0.W, KC0[3].Y,
+; R600-NEXT:     BFI_INT * T0.W, KC0[2].W, KC0[2].Y, PV.W,
+; R600-NEXT:     MOV * T1.W, KC0[3].Z,
+; R600-NEXT:     BFI_INT T1.W, KC0[3].X, KC0[2].Z, PV.W,
+; R600-NEXT:     ADDC_UINT * T2.W, T0.W, literal.x,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
+; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
+; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
+; R600-NEXT:     MOV * T1.X, literal.y,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
+  %xor.0 = xor i64 %a, %mask
+  %and = and i64 %xor.0, %b
+  %bitselect = xor i64 %and, %mask
+
+  %scalar.use = add i64 %bitselect, 10
+  store i64 %scalar.use, i64 addrspace(1)* undef
+  ret void
+}
+
+define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
+; R600-LABEL: s_bfi_sha256_ma_i64:
+; R600:       ; %bb.0: ; %entry
+; R600-NEXT:    ALU 9, @4, KC0[CB0:0-32], KC1[]
+; R600-NEXT:    MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; R600-NEXT:    CF_END
+; R600-NEXT:    PAD
+; R600-NEXT:    ALU clause starting at 4:
+; R600-NEXT:     XOR_INT * T0.W, KC0[2].Y, KC0[2].W,
+; R600-NEXT:     BFI_INT T0.W, PV.W, KC0[3].Y, KC0[2].W,
+; R600-NEXT:     XOR_INT * T1.W, KC0[2].Z, KC0[3].X,
+; R600-NEXT:     BFI_INT T1.W, PS, KC0[3].Z, KC0[3].X,
+; R600-NEXT:     ADDC_UINT * T2.W, PV.W, literal.x,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
+; R600-NEXT:     ADD_INT * T0.Y, PV.W, PS,
+; R600-NEXT:     ADD_INT T0.X, T0.W, literal.x,
+; R600-NEXT:     MOV * T1.X, literal.y,
+; R600-NEXT:    10(1.401298e-44), 0(0.000000e+00)
+entry:
+  %and0 = and i64 %x, %z
+  %or0 = or i64 %x, %z
+  %and1 = and i64 %y, %or0
+  %or1 = or i64 %and0, %and1
+
+  %scalar.use = add i64 %or1, 10
+  store i64 %scalar.use, i64 addrspace(1)* undef
+  ret void
+}

From 09fc311af702e06fbb7a89cdee13a61face102ed Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 19 Jan 2022 12:36:39 -0500
Subject: [PATCH 780/946] AMDGPU/GlobalISel: Mostly fix BFI patterns

Most importantly, fixes constant bus errors in the 64-bit cases. It's
surprising to me these were even passing the selection test using
SReg_* sources. Also fixes pattern matching in the 32-bit cases, with
simple operands.

These patterns aren't working in a few cases, like with mixed SGPR
inputs. The patterns aren't looking through the SGPR->VGPR copies like
they need to. The vector cases also have some unmerges of build_vector
which are obscuring the inputs.
---
 llvm/lib/Target/AMDGPU/SIInstructions.td |  54 +-
 llvm/test/CodeGen/AMDGPU/bfi_int.ll      | 763 +++++++++++++++++++++++
 2 files changed, 790 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index cba9a77864aaf..7be63ae6964bf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -1810,44 +1810,44 @@ def BFIImm32 : PatFrag<
 // (y & x) | (z & ~x)
 def : AMDGPUPat <
   (DivergentBinFrag<or> (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))),
-  (V_BFI_B32_e64 $x, $y, $z)
+  (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
 >;
 
 // (y & C) | (z & ~C)
 def : AMDGPUPat <
   (BFIImm32 i32:$x, i32:$y, i32:$z),
-  (V_BFI_B32_e64 $x, $y, $z)
+  (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
 >;
 
 // 64-bit version
 def : AMDGPUPat <
   (DivergentBinFrag<or> (and i64:$y, i64:$x), (and i64:$z, (not i64:$x))),
-  (REG_SEQUENCE SReg_64,
-    (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub0)),
-               (i32 (EXTRACT_SUBREG SReg_64:$y, sub0)),
-               (i32 (EXTRACT_SUBREG SReg_64:$z, sub0))), sub0,
-    (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub1)),
-               (i32 (EXTRACT_SUBREG SReg_64:$y, sub1)),
-               (i32 (EXTRACT_SUBREG SReg_64:$z, sub1))), sub1)
+  (REG_SEQUENCE VReg_64,
+    (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)),
+               (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)),
+               (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0,
+    (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)),
+               (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)),
+               (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1)
 >;
 
 // SHA-256 Ch function
 // z ^ (x & (y ^ z))
 def : AMDGPUPat <
   (DivergentBinFrag<xor> i32:$z, (and i32:$x, (xor i32:$y, i32:$z))),
-  (V_BFI_B32_e64 $x, $y, $z)
+  (V_BFI_B32_e64 VSrc_b32:$x, VSrc_b32:$y, VSrc_b32:$z)
 >;
 
 // 64-bit version
 def : AMDGPUPat <
   (DivergentBinFrag<xor> i64:$z, (and i64:$x, (xor i64:$y, i64:$z))),
-  (REG_SEQUENCE SReg_64,
-    (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub0)),
-               (i32 (EXTRACT_SUBREG SReg_64:$y, sub0)),
-               (i32 (EXTRACT_SUBREG SReg_64:$z, sub0))), sub0,
-    (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub1)),
-               (i32 (EXTRACT_SUBREG SReg_64:$y, sub1)),
-               (i32 (EXTRACT_SUBREG SReg_64:$z, sub1))), sub1)
+  (REG_SEQUENCE VReg_64,
+    (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)),
+               (i32 (EXTRACT_SUBREG VReg_64:$y, sub0)),
+               (i32 (EXTRACT_SUBREG VReg_64:$z, sub0))), sub0,
+    (V_BFI_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)),
+               (i32 (EXTRACT_SUBREG VReg_64:$y, sub1)),
+               (i32 (EXTRACT_SUBREG VReg_64:$z, sub1))), sub1)
 >;
 
 def : AMDGPUPat <
@@ -2773,21 +2773,21 @@ def : AMDGPUPat <
 def : AMDGPUPat <
   (DivergentBinFrag<or> (and i32:$x, i32:$z),
                         (and i32:$y, (or i32:$x, i32:$z))),
-  (V_BFI_B32_e64 (V_XOR_B32_e64 i32:$x, i32:$y), i32:$z, i32:$y)
+  (V_BFI_B32_e64 (V_XOR_B32_e64 VSrc_b32:$x, VSrc_b32:$y), VSrc_b32:$z, VSrc_b32:$y)
 >;
 
 def : AMDGPUPat <
   (DivergentBinFrag<or> (and i64:$x, i64:$z),
                         (and i64:$y, (or i64:$x, i64:$z))),
-  (REG_SEQUENCE SReg_64,
-    (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub0)),
-                    (i32 (EXTRACT_SUBREG SReg_64:$y, sub0))),
-               (i32 (EXTRACT_SUBREG SReg_64:$z, sub0)),
-               (i32 (EXTRACT_SUBREG SReg_64:$y, sub0))), sub0,
-    (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG SReg_64:$x, sub1)),
-                    (i32 (EXTRACT_SUBREG SReg_64:$y, sub1))),
-               (i32 (EXTRACT_SUBREG SReg_64:$z, sub1)),
-               (i32 (EXTRACT_SUBREG SReg_64:$y, sub1))), sub1)
+  (REG_SEQUENCE VReg_64,
+    (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub0)),
+                    (i32 (EXTRACT_SUBREG VReg_64:$y, sub0))),
+               (i32 (EXTRACT_SUBREG VReg_64:$z, sub0)),
+               (i32 (EXTRACT_SUBREG VReg_64:$y, sub0))), sub0,
+    (V_BFI_B32_e64 (V_XOR_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$x, sub1)),
+                    (i32 (EXTRACT_SUBREG VReg_64:$y, sub1))),
+               (i32 (EXTRACT_SUBREG VReg_64:$z, sub1)),
+               (i32 (EXTRACT_SUBREG VReg_64:$y, sub1))), sub1)
 >;
 
 multiclass IntMed3Pat<Instruction med3Inst,
diff --git a/llvm/test/CodeGen/AMDGPU/bfi_int.ll b/llvm/test/CodeGen/AMDGPU/bfi_int.ll
index 35b13804990a0..ab8648f198853 100644
--- a/llvm/test/CodeGen/AMDGPU/bfi_int.ll
+++ b/llvm/test/CodeGen/AMDGPU/bfi_int.ll
@@ -2,6 +2,8 @@
 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX7 %s
 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8-GISEL %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10-GISEL %s
 
 ; BFI_INT Definition pattern from ISA docs
 ; (y & x) | (z & ~x)
@@ -51,6 +53,36 @@ define amdgpu_kernel void @s_bfi_def_i32(i32 addrspace(1)* %out, i32 %x, i32 %y,
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX8-GISEL-LABEL: s_bfi_def_i32:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX8-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-GISEL-NEXT:    s_andn2_b32 s4, s4, s2
+; GFX8-GISEL-NEXT:    s_and_b32 s2, s3, s2
+; GFX8-GISEL-NEXT:    s_or_b32 s2, s4, s2
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; GFX8-GISEL-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: s_bfi_def_i32:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_clause 0x2
+; GFX10-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX10-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_andn2_b32 s4, s4, s2
+; GFX10-GISEL-NEXT:    s_and_b32 s2, s3, s2
+; GFX10-GISEL-NEXT:    s_or_b32 s2, s4, s2
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX10-GISEL-NEXT:    s_endpgm
 entry:
   %0 = xor i32 %x, -1
   %1 = and i32 %z, %0
@@ -79,6 +111,19 @@ define i32 @v_bfi_def_i32(i32 %x, i32 %y, i32 %z) {
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_bfi_def_i32:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_bfi_b32 v0, v0, v1, v2
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_bfi_def_i32:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT:    v_bfi_b32 v0, v0, v1, v2
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = xor i32 %x, -1
   %1 = and i32 %z, %0
@@ -134,6 +179,36 @@ define amdgpu_kernel void @s_bfi_sha256_ch(i32 addrspace(1)* %out, i32 %x, i32 %
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX8-GISEL-LABEL: s_bfi_sha256_ch:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX8-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-GISEL-NEXT:    s_xor_b32 s3, s3, s4
+; GFX8-GISEL-NEXT:    s_and_b32 s2, s2, s3
+; GFX8-GISEL-NEXT:    s_xor_b32 s2, s4, s2
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; GFX8-GISEL-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: s_bfi_sha256_ch:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_clause 0x2
+; GFX10-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX10-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_xor_b32 s3, s3, s4
+; GFX10-GISEL-NEXT:    s_and_b32 s2, s2, s3
+; GFX10-GISEL-NEXT:    s_xor_b32 s2, s4, s2
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX10-GISEL-NEXT:    s_endpgm
 entry:
   %0 = xor i32 %y, %z
   %1 = and i32 %x, %0
@@ -161,6 +236,19 @@ define i32 @v_bfi_sha256_ch(i32 %x, i32 %y, i32 %z) {
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, v1, v2
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_bfi_sha256_ch:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_bfi_b32 v0, v0, v1, v2
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_bfi_sha256_ch:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT:    v_bfi_b32 v0, v0, v1, v2
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = xor i32 %y, %z
   %1 = and i32 %x, %0
@@ -185,6 +273,20 @@ define amdgpu_ps float @v_s_s_bfi_sha256_ch(i32 %x, i32 inreg %y, i32 inreg %z)
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, s1
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: v_s_s_bfi_sha256_ch:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    s_xor_b32 s0, s0, s1
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s1, v0
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: v_s_s_bfi_sha256_ch:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_xor_b32 s0, s0, s1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s1, v0
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
 entry:
   %xor0 = xor i32 %y, %z
   %and = and i32 %x, %xor0
@@ -210,6 +312,20 @@ define amdgpu_ps float @s_v_s_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 inreg %z)
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, s1
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: s_v_s_bfi_sha256_ch:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s1, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s1, v0
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: s_v_s_bfi_sha256_ch:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s1, v0
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s1, v0
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
 entry:
   %xor0 = xor i32 %y, %z
   %and = and i32 %x, %xor0
@@ -235,6 +351,17 @@ define amdgpu_ps float @s_s_v_bfi_sha256_ch(i32 inreg %x, i32 inreg %y, i32 %z)
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    v_bfi_b32 v0, s0, s1, v0
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: s_s_v_bfi_sha256_ch:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s0
+; GFX8-GISEL-NEXT:    v_bfi_b32 v0, v1, s1, v0
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: s_s_v_bfi_sha256_ch:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    v_bfi_b32 v0, s0, s1, v0
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
 entry:
   %xor0 = xor i32 %y, %z
   %and = and i32 %x, %xor0
@@ -258,6 +385,16 @@ define amdgpu_ps float @s_v_v_bfi_sha256_ch(i32 inreg %x, i32 %y, i32 %z) {
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, v1
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: s_v_v_bfi_sha256_ch:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    v_bfi_b32 v0, s0, v0, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: s_v_v_bfi_sha256_ch:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    v_bfi_b32 v0, s0, v0, v1
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
 entry:
   %xor0 = xor i32 %y, %z
   %and = and i32 %x, %xor0
@@ -281,6 +418,16 @@ define amdgpu_ps float @v_s_v_bfi_sha256_ch(i32 %x, i32 inreg %y, i32 %z) {
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, v1
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: v_s_v_bfi_sha256_ch:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    v_bfi_b32 v0, v0, s0, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: v_s_v_bfi_sha256_ch:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    v_bfi_b32 v0, v0, s0, v1
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
 entry:
   %xor0 = xor i32 %y, %z
   %and = and i32 %x, %xor0
@@ -304,6 +451,20 @@ define amdgpu_ps float @v_v_s_bfi_sha256_ch(i32 %x, i32 %y, i32 inreg %z) {
 ; GFX10:       ; %bb.0: ; %entry
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, v1, s0
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: v_v_s_bfi_sha256_ch:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, s0, v1
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: v_v_s_bfi_sha256_ch:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, s0, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s0, v0
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
 entry:
   %xor0 = xor i32 %y, %z
   %and = and i32 %x, %xor0
@@ -362,6 +523,38 @@ define amdgpu_kernel void @s_bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s2
 ; GFX10-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX8-GISEL-LABEL: s_bfi_sha256_ma:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX8-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-GISEL-NEXT:    s_and_b32 s5, s2, s4
+; GFX8-GISEL-NEXT:    s_or_b32 s2, s2, s4
+; GFX8-GISEL-NEXT:    s_and_b32 s2, s3, s2
+; GFX8-GISEL-NEXT:    s_or_b32 s2, s5, s2
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-GISEL-NEXT:    flat_store_dword v[0:1], v2
+; GFX8-GISEL-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: s_bfi_sha256_ma:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_clause 0x2
+; GFX10-GISEL-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x2c
+; GFX10-GISEL-NEXT:    s_load_dword s4, s[0:1], 0x34
+; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_or_b32 s5, s2, s4
+; GFX10-GISEL-NEXT:    s_and_b32 s2, s2, s4
+; GFX10-GISEL-NEXT:    s_and_b32 s3, s3, s5
+; GFX10-GISEL-NEXT:    s_or_b32 s2, s2, s3
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX10-GISEL-NEXT:    s_endpgm
 entry:
   %0 = and i32 %x, %z
   %1 = or i32 %x, %z
@@ -393,6 +586,21 @@ define i32 @v_bfi_sha256_ma(i32 %x, i32 %y, i32 %z) {
 ; GFX10-NEXT:    v_xor_b32_e32 v0, v0, v1
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, v2, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_bfi_sha256_ma:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v1
+; GFX8-GISEL-NEXT:    v_bfi_b32 v0, v0, v2, v1
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_bfi_sha256_ma:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v1
+; GFX10-GISEL-NEXT:    v_bfi_b32 v0, v0, v2, v1
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %0 = and i32 %x, %z
   %1 = or i32 %x, %z
@@ -423,6 +631,29 @@ define <2 x i32> @v_bitselect_v2i32_pat1(<2 x i32> %a, <2 x i32> %b, <2 x i32> %
 ; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, v4
 ; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, v5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_bitselect_v2i32_pat1:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_bitselect_v2i32_pat1:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %xor.0 = xor <2 x i32> %a, %mask
   %and = and <2 x i32> %xor.0, %b
   %bitselect = xor <2 x i32> %and, %mask
@@ -451,6 +682,33 @@ define i64 @v_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, v2, v4
 ; GFX10-NEXT:    v_bfi_b32 v1, v1, v3, v5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_bitselect_i64_pat_0:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, v0, v2
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, v1, v3
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, -1, v0
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, -1, v1
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v0, v4
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, v1, v5
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v3, v1
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_bitselect_i64_pat_0:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v6, -1, v0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v7, -1, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, v6, v4
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, v7, v5
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %and0 = and i64 %a, %b
   %not.a = xor i64 %a, -1
   %and1 = and i64 %not.a, %mask
@@ -480,6 +738,30 @@ define amdgpu_ps <2 x float> @v_s_s_bitselect_i64_pat_0(i64 %a, i64 inreg %b, i6
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, s2
 ; GFX10-NEXT:    v_bfi_b32 v1, v1, s1, s3
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: v_s_s_bitselect_i64_pat_0:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, s0, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, s1, v1
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, -1, v0
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, -1, v1
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s2, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s3, v1
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v3, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: v_s_s_bitselect_i64_pat_0:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v2, -1, v0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v3, -1, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s2, v2
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s3, v3
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
   %and0 = and i64 %a, %b
   %not.a = xor i64 %a, -1
   %and1 = and i64 %not.a, %mask
@@ -510,6 +792,24 @@ define amdgpu_ps <2 x float> @s_v_s_bitselect_i64_pat_0(i64 inreg %a, i64 %b, i6
 ; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, s2
 ; GFX10-NEXT:    v_bfi_b32 v1, s1, v1, s3
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: s_v_s_bitselect_i64_pat_0:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX8-GISEL-NEXT:    s_andn2_b64 s[0:1], s[2:3], s[0:1]
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, s1, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: s_v_s_bitselect_i64_pat_0:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX10-GISEL-NEXT:    s_andn2_b64 s[0:1], s[2:3], s[0:1]
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, s0, v0
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, s1, v1
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
   %and0 = and i64 %a, %b
   %not.a = xor i64 %a, -1
   %and1 = and i64 %not.a, %mask
@@ -540,6 +840,26 @@ define amdgpu_ps <2 x float> @s_s_v_bitselect_i64_pat_0(i64 inreg %a, i64 inreg
 ; GFX10-NEXT:    v_bfi_b32 v0, s0, s2, v0
 ; GFX10-NEXT:    v_bfi_b32 v1, s1, s3, v1
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: s_s_v_bitselect_i64_pat_0:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_and_b64 s[2:3], s[0:1], s[2:3]
+; GFX8-GISEL-NEXT:    s_not_b64 s[0:1], s[0:1]
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, s2, v0
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, s3, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: s_s_v_bitselect_i64_pat_0:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_not_b64 s[4:5], s[0:1]
+; GFX10-GISEL-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s5, v1
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, s0, v0
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, s1, v1
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
   %and0 = and i64 %a, %b
   %not.a = xor i64 %a, -1
   %and1 = and i64 %not.a, %mask
@@ -566,6 +886,30 @@ define amdgpu_ps <2 x float> @v_v_s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 inre
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, v2, s0
 ; GFX10-NEXT:    v_bfi_b32 v1, v1, v3, s1
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: v_v_s_bitselect_i64_pat_0:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, v0, v2
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, v1, v3
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, -1, v0
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, -1, v1
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v3, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: v_v_s_bitselect_i64_pat_0:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v4, -1, v0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v5, -1, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s0, v4
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s1, v5
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
   %and0 = and i64 %a, %b
   %not.a = xor i64 %a, -1
   %and1 = and i64 %not.a, %mask
@@ -592,6 +936,30 @@ define amdgpu_ps <2 x float> @v_s_v_bitselect_i64_pat_0(i64 %a, i64 inreg %b, i6
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, v2
 ; GFX10-NEXT:    v_bfi_b32 v1, v1, s1, v3
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: v_s_v_bitselect_i64_pat_0:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v4, s0, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v5, s1, v1
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, -1, v0
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, -1, v1
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v5, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: v_s_v_bitselect_i64_pat_0:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v4, -1, v0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v5, -1, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, v4, v2
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, v5, v3
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
   %and0 = and i64 %a, %b
   %not.a = xor i64 %a, -1
   %and1 = and i64 %not.a, %mask
@@ -618,6 +986,28 @@ define amdgpu_ps <2 x float> @s_v_v_bitselect_i64_pat_0(i64 inreg %a, i64 %b, i6
 ; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, v2
 ; GFX10-NEXT:    v_bfi_b32 v1, s1, v1, v3
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: s_v_v_bitselect_i64_pat_0:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX8-GISEL-NEXT:    s_not_b64 s[0:1], s[0:1]
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, s0, v2
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, s1, v3
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: s_v_v_bitselect_i64_pat_0:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_not_b64 s[2:3], s[0:1]
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s2, v2
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s3, v3
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
   %and0 = and i64 %a, %b
   %not.a = xor i64 %a, -1
   %and1 = and i64 %not.a, %mask
@@ -648,6 +1038,29 @@ define i64 @v_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
 ; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, v4
 ; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, v5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_bitselect_i64_pat_1:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_bitselect_i64_pat_1:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
@@ -676,6 +1089,26 @@ define amdgpu_ps <2 x float> @v_s_s_bitselect_i64_pat_1(i64 %a, i64 inreg %b, i6
 ; GFX10-NEXT:    v_bfi_b32 v0, s0, v0, s2
 ; GFX10-NEXT:    v_bfi_b32 v1, s1, v1, s3
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: v_s_s_bitselect_i64_pat_1:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, s3, v1
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, s3, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: v_s_s_bitselect_i64_pat_1:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, s3, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, s3, v1
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
@@ -705,6 +1138,26 @@ define amdgpu_ps <2 x float> @s_s_v_bitselect_i64_pat_1(i64 inreg %a, i64 inreg
 ; GFX10-NEXT:    v_bfi_b32 v0, s2, s0, v0
 ; GFX10-NEXT:    v_bfi_b32 v1, s3, s1, v1
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: s_s_v_bitselect_i64_pat_1:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v2, s0, v0
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v3, s1, v1
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, s2, v2
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, s3, v3
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v3, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: s_s_v_bitselect_i64_pat_1:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v2, s0, v0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v3, s1, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s2, v2
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s3, v3
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v2, v0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v3, v1
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
@@ -734,6 +1187,24 @@ define amdgpu_ps <2 x float> @s_v_s_bitselect_i64_pat_1(i64 inreg %a, i64 %b, i6
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, s0, s2
 ; GFX10-NEXT:    v_bfi_b32 v1, v1, s1, s3
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: s_v_s_bitselect_i64_pat_1:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, s3, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: s_v_s_bitselect_i64_pat_1:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, s2, v0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, s3, v1
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
@@ -763,6 +1234,29 @@ define i64 @v_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
 ; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, v4
 ; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, v5
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_bitselect_i64_pat_2:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GFX8-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_bitselect_i64_pat_2:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v0, v0, v4
+; GFX10-GISEL-NEXT:    v_xor_b32_e32 v1, v1, v5
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
@@ -797,6 +1291,33 @@ define i64 @v_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, v4, v2
 ; GFX10-NEXT:    v_bfi_b32 v1, v1, v5, v3
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-GISEL-LABEL: v_bfi_sha256_ma_i64:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v6, v0, v4
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v7, v1, v5
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v0, v4
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v1, v5
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, v3, v1
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v6, v0
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v7, v1
+; GFX8-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_bfi_sha256_ma_i64:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_waitcnt_vscnt null, 0x0
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v6, v0, v4
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v7, v1, v5
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v4
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v5
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, v2, v6
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, v3, v7
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %and0 = and i64 %x, %z
   %or0 = or i64 %x, %z
@@ -833,6 +1354,30 @@ define amdgpu_ps <2 x float> @v_s_s_bfi_sha256_ma_i64(i64 %x, i64 inreg %y, i64
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, s2, s0
 ; GFX10-NEXT:    v_bfi_b32 v1, v1, s3, s1
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: v_s_s_bfi_sha256_ma_i64:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, s2, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, s3, v1
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, s2, v0
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, s3, v1
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v3, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: v_s_s_bfi_sha256_ma_i64:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v2, s2, v0
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v3, s3, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s2, v0
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s3, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s0, v2
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s1, v3
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
 entry:
   %and0 = and i64 %x, %z
   %or0 = or i64 %x, %z
@@ -866,6 +1411,26 @@ define amdgpu_ps <2 x float> @s_v_s_bfi_sha256_ma_i64(i64 inreg %x, i64 %y, i64
 ; GFX10-NEXT:    v_bfi_b32 v0, v2, s2, v0
 ; GFX10-NEXT:    v_bfi_b32 v1, v3, s3, v1
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: s_v_s_bfi_sha256_ma_i64:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    s_and_b64 s[4:5], s[0:1], s[2:3]
+; GFX8-GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, s4, v0
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, s5, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: s_v_s_bfi_sha256_ma_i64:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_or_b64 s[4:5], s[0:1], s[2:3]
+; GFX10-GISEL-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s4, v0
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s5, v1
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, s0, v0
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, s1, v1
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
 entry:
   %and0 = and i64 %x, %z
   %or0 = or i64 %x, %z
@@ -903,6 +1468,30 @@ define amdgpu_ps <2 x float> @s_s_v_bfi_sha256_ma_i64(i64 inreg %x, i64 inreg %y
 ; GFX10-NEXT:    v_bfi_b32 v0, v2, v0, s2
 ; GFX10-NEXT:    v_bfi_b32 v1, v3, v1, s3
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: s_s_v_bfi_sha256_ma_i64:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v2, s0, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v3, s1, v1
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, s1, v1
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s2, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s3, v1
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v2, v0
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v3, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: s_s_v_bfi_sha256_ma_i64:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v2, s0, v0
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v3, s1, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s2, v2
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s3, v3
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
 entry:
   %and0 = and i64 %x, %z
   %or0 = or i64 %x, %z
@@ -936,6 +1525,30 @@ define amdgpu_ps <2 x float> @v_s_v_bfi_sha256_ma_i64(i64 %x, i64 inreg %y, i64
 ; GFX10-NEXT:    v_bfi_b32 v0, v0, v2, s0
 ; GFX10-NEXT:    v_bfi_b32 v1, v1, v3, s1
 ; GFX10-NEXT:    ; return to shader part epilog
+;
+; GFX8-GISEL-LABEL: v_s_v_bfi_sha256_ma_i64:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v4, v0, v2
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v5, v1, v3
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v0, s0, v0
+; GFX8-GISEL-NEXT:    v_and_b32_e32 v1, s1, v1
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v0, v4, v0
+; GFX8-GISEL-NEXT:    v_or_b32_e32 v1, v5, v1
+; GFX8-GISEL-NEXT:    ; return to shader part epilog
+;
+; GFX10-GISEL-LABEL: v_s_v_bfi_sha256_ma_i64:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v4, v0, v2
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v5, v1, v3
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v2, s0, v4
+; GFX10-GISEL-NEXT:    v_and_b32_e32 v3, s1, v5
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v0, v0, v2
+; GFX10-GISEL-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX10-GISEL-NEXT:    ; return to shader part epilog
 entry:
   %and0 = and i64 %x, %z
   %or0 = or i64 %x, %z
@@ -993,6 +1606,43 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX8-GISEL-LABEL: s_bitselect_i64_pat_0:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-GISEL-NEXT:    s_and_b64 s[2:3], s[4:5], s[6:7]
+; GFX8-GISEL-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[4:5]
+; GFX8-GISEL-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
+; GFX8-GISEL-NEXT:    s_add_u32 s0, s0, 10
+; GFX8-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX8-GISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX8-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX8-GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GFX8-GISEL-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: s_bitselect_i64_pat_0:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_clause 0x1
+; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_and_b64 s[2:3], s[4:5], s[6:7]
+; GFX10-GISEL-NEXT:    s_andn2_b64 s[0:1], s[0:1], s[4:5]
+; GFX10-GISEL-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
+; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 10
+; GFX10-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10-GISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
+; GFX10-GISEL-NEXT:    s_endpgm
   %and0 = and i64 %a, %b
   %not.a = xor i64 %a, -1
   %and1 = and i64 %not.a, %mask
@@ -1050,6 +1700,43 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX8-GISEL-LABEL: s_bitselect_i64_pat_1:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-GISEL-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
+; GFX8-GISEL-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
+; GFX8-GISEL-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
+; GFX8-GISEL-NEXT:    s_add_u32 s0, s0, 10
+; GFX8-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX8-GISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX8-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX8-GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GFX8-GISEL-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: s_bitselect_i64_pat_1:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_clause 0x1
+; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
+; GFX10-GISEL-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
+; GFX10-GISEL-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
+; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 10
+; GFX10-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10-GISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
+; GFX10-GISEL-NEXT:    s_endpgm
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
@@ -1107,6 +1794,43 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX8-GISEL-LABEL: s_bitselect_i64_pat_2:
+; GFX8-GISEL:       ; %bb.0:
+; GFX8-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-GISEL-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
+; GFX8-GISEL-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
+; GFX8-GISEL-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
+; GFX8-GISEL-NEXT:    s_add_u32 s0, s0, 10
+; GFX8-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX8-GISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX8-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX8-GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GFX8-GISEL-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: s_bitselect_i64_pat_2:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_clause 0x1
+; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_xor_b64 s[2:3], s[4:5], s[0:1]
+; GFX10-GISEL-NEXT:    s_and_b64 s[2:3], s[2:3], s[6:7]
+; GFX10-GISEL-NEXT:    s_xor_b64 s[0:1], s[2:3], s[0:1]
+; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 10
+; GFX10-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10-GISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
+; GFX10-GISEL-NEXT:    s_endpgm
   %xor.0 = xor i64 %a, %mask
   %and = and i64 %xor.0, %b
   %bitselect = xor i64 %and, %mask
@@ -1167,6 +1891,45 @@ define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX10-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
 ; GFX10-NEXT:    s_endpgm
+;
+; GFX8-GISEL-LABEL: s_bfi_sha256_ma_i64:
+; GFX8-GISEL:       ; %bb.0: ; %entry
+; GFX8-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX8-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX8-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-GISEL-NEXT:    s_and_b64 s[2:3], s[4:5], s[0:1]
+; GFX8-GISEL-NEXT:    s_or_b64 s[0:1], s[4:5], s[0:1]
+; GFX8-GISEL-NEXT:    s_and_b64 s[0:1], s[6:7], s[0:1]
+; GFX8-GISEL-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
+; GFX8-GISEL-NEXT:    s_add_u32 s0, s0, 10
+; GFX8-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX8-GISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX8-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX8-GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX8-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX8-GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
+; GFX8-GISEL-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: s_bfi_sha256_ma_i64:
+; GFX10-GISEL:       ; %bb.0: ; %entry
+; GFX10-GISEL-NEXT:    s_clause 0x1
+; GFX10-GISEL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; GFX10-GISEL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; GFX10-GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT:    s_or_b64 s[2:3], s[4:5], s[0:1]
+; GFX10-GISEL-NEXT:    s_and_b64 s[0:1], s[4:5], s[0:1]
+; GFX10-GISEL-NEXT:    s_and_b64 s[2:3], s[6:7], s[2:3]
+; GFX10-GISEL-NEXT:    s_or_b64 s[0:1], s[0:1], s[2:3]
+; GFX10-GISEL-NEXT:    s_add_u32 s0, s0, 10
+; GFX10-GISEL-NEXT:    s_cselect_b32 s2, 1, 0
+; GFX10-GISEL-NEXT:    s_and_b32 s2, s2, 1
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX10-GISEL-NEXT:    s_addc_u32 s1, s1, 0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, s1
+; GFX10-GISEL-NEXT:    global_store_dwordx2 v[0:1], v[0:1], off
+; GFX10-GISEL-NEXT:    s_endpgm
 entry:
   %and0 = and i64 %x, %z
   %or0 = or i64 %x, %z

From 810752aaa1eb904654e543e96f76e004562304ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 23 Jan 2022 23:16:46 +0000
Subject: [PATCH 781/946] [libcxx] [test] Fix the locale.time.put.byname/put1
 testcase on Linux and Windows

The Windows and Glibc abbreviated form of Saturday in French locale
is "sam." with a trailing period included. Account for this in the
test reference.

Differential Revision: https://reviews.llvm.org/D118240
---
 .../locale.time.put.byname/put1.pass.cpp              | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put.byname/put1.pass.cpp b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put.byname/put1.pass.cpp
index db55a939f6eef..92b10ea9d0af3 100644
--- a/libcxx/test/std/localization/locale.categories/category.time/locale.time.put.byname/put1.pass.cpp
+++ b/libcxx/test/std/localization/locale.categories/category.time/locale.time.put.byname/put1.pass.cpp
@@ -9,7 +9,6 @@
 // NetBSD does not support LC_TIME at the moment
 // XFAIL: netbsd
 
-// XFAIL: LIBCXX-WINDOWS-FIXME
 // XFAIL: LIBCXX-AIX-FIXME
 
 // REQUIRES: locale.en_US.UTF-8
@@ -29,9 +28,6 @@
 //     ~time_put_byname();
 // };
 
-// TODO: investigation needed
-// XFAIL: target={{.*}}-linux-gnu{{.*}}
-
 #include <locale>
 #include <cassert>
 #include "test_macros.h"
@@ -74,12 +70,13 @@ int main(int, char**)
     }
     {
         const my_facet f(LOCALE_fr_FR_UTF_8, 1);
-        std::string pat("Today is %A which is abbreviated %a.");
+        std::string pat("Today is %A which is abbreviated '%a'.");
         iter = f.put(output_iterator<char*>(str), ios, '*', &t,
                      pat.data(), pat.data() + pat.size());
         std::string ex(str, iter.base());
-        assert((ex == "Today is Samedi which is abbreviated Sam.")||
-               (ex == "Today is samedi which is abbreviated sam." ));
+        assert((ex == "Today is Samedi which is abbreviated 'Sam'.")||
+               (ex == "Today is samedi which is abbreviated 'sam'." )||
+               (ex == "Today is samedi which is abbreviated 'sam.'."));
     }
 
   return 0;

From b5bada6f85ce6789415d8b9844b6fd3a26d5408f Mon Sep 17 00:00:00 2001
From: Casey Carter <Casey@Carter.net>
Date: Mon, 3 Jan 2022 17:01:16 -0800
Subject: [PATCH 782/946] [libcxx][test] Narrow XFAIL for tests that pass with
 `msvc && stdlib=msvc`

... but fail with `msvc && stdlib=libc++`.

Differential Review: https://reviews.llvm.org/D118194
---
 .../alloc.errors/set.new.handler/get_new_handler.pass.cpp       | 2 +-
 .../func.wrap.func/func.wrap.func.con/copy_move.pass.cpp        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/test/std/language.support/support.dynamic/alloc.errors/set.new.handler/get_new_handler.pass.cpp b/libcxx/test/std/language.support/support.dynamic/alloc.errors/set.new.handler/get_new_handler.pass.cpp
index 4a8a610dca114..c5725bde6e6c3 100644
--- a/libcxx/test/std/language.support/support.dynamic/alloc.errors/set.new.handler/get_new_handler.pass.cpp
+++ b/libcxx/test/std/language.support/support.dynamic/alloc.errors/set.new.handler/get_new_handler.pass.cpp
@@ -13,7 +13,7 @@
 // are provided by vcruntime/UCRT's new.h. However, that header actually only
 // declares set_new_handler - it's missing a declaration of get_new_handler.
 
-// XFAIL: msvc
+// XFAIL: msvc && stdlib=libc++
 
 #include <new>
 #include <cassert>
diff --git a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp
index 521c968476607..0c478d02cbf7e 100644
--- a/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/func.wrap/func.wrap.func/func.wrap.func.con/copy_move.pass.cpp
@@ -8,7 +8,7 @@
 
 // FIXME: In MSVC mode, even "std::function<int(int)> f(aref);" causes
 // allocations.
-// XFAIL: msvc
+// XFAIL: msvc && stdlib=libc++
 
 // <functional>
 

From 2d670de84c485b514a1950dda3602f899b6f0f58 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 19 Jan 2022 17:23:57 -0500
Subject: [PATCH 783/946] GlobalISel: Avoid crash on asm with lying result
 types

The physical register in the asm has the wrong type for the declared
IR. It seems to work in the DAG by extracting the 4 elements that are
defined in the IR from the register, but that isn't handled here. This
doesn't seem to be a well tested path since other mismatched cases are
crashing the DAG asm handling.
---
 .../CodeGen/GlobalISel/InlineAsmLowering.cpp  |  10 +-
 .../GlobalISel/inline-asm-mismatched-size.ll  | 201 ++++++++++++++++++
 2 files changed, 209 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll

diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index dfcf1b80392af..e5f95ca5aa731 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -623,7 +623,8 @@ bool InlineAsmLowering::lowerInlineAsm(
 
       Register SrcReg = OpInfo.Regs[0];
       unsigned SrcSize = TRI->getRegSizeInBits(SrcReg, *MRI);
-      if (MRI->getType(ResRegs[i]).getSizeInBits() < SrcSize) {
+      LLT ResTy = MRI->getType(ResRegs[i]);
+      if (ResTy.isScalar() && ResTy.getSizeInBits() < SrcSize) {
         // First copy the non-typed virtual register into a generic virtual
         // register
         Register Tmp1Reg =
@@ -631,9 +632,14 @@ bool InlineAsmLowering::lowerInlineAsm(
         MIRBuilder.buildCopy(Tmp1Reg, SrcReg);
         // Need to truncate the result of the register
         MIRBuilder.buildTrunc(ResRegs[i], Tmp1Reg);
-      } else {
+      } else if (ResTy.getSizeInBits() == SrcSize) {
         MIRBuilder.buildCopy(ResRegs[i], SrcReg);
+      } else {
+        LLVM_DEBUG(dbgs() << "Unhandled output operand with "
+                             "mismatched register size\n");
+        return false;
       }
+
       break;
     }
     case TargetLowering::C_Immediate:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll
new file mode 100644
index 0000000000000..2d3ec011220fb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm-mismatched-size.ll
@@ -0,0 +1,201 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' -march=amdgcn -mcpu=fiji -stop-after=irtranslator -verify-machineinstrs %s -o - 2>%t | FileCheck %s
+; RUN: FileCheck -check-prefix=ERR %s < %t
+
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}"()' (in function: return_type_is_too_big_vector)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %reg = call i64 asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_scalar)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %reg = call i8 addrspace(1)* asm sideeffect "; def $0", "={v8}"()' (in function: return_type_is_too_big_pointer)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  %reg = call i8 addrspace(3)* asm sideeffect "; def $0", "={v[8:9]}"()' (in function: return_type_is_too_small_pointer)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v[0:9]}"(<8 x i32> %arg)' (in function: use_vector_too_big)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v0}"(i64 %arg)' (in function: use_scalar_too_small)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v0}"(i8 addrspace(1)* %arg)' (in function: use_pointer_too_small)
+; ERR: remark: <unknown>:0:0: unable to translate instruction: call: '  call void asm sideeffect "; use $0", "{v[0:1]}"(i32 addrspace(3)* %arg)' (in function: use_pointer_too_big)
+
+
+; This asm is broken because it's using a 5 element wide physical
+; register for a 4 element wide value. Make sure we don't crash, and
+; take the IR type as truth.
+define amdgpu_kernel void @return_type_is_too_big_vector() {
+  ; CHECK-LABEL: name: return_type_is_too_big_vector
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
+  %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:12]}" ()
+  call void asm sideeffect "; use $0", "s"(<4 x i32> %sgpr) #0
+  ret void
+}
+
+; FIXME: This is crashing in the DAG
+; define amdgpu_kernel void @return_type_is_too_small_vector() {
+;   %sgpr = call <4 x i32> asm sideeffect "; def $0", "={s[8:10]}" ()
+;   call void asm sideeffect "; use $0", "s"(<4 x i32> %sgpr) #0
+;   ret void
+; }
+
+define i64 @return_type_is_too_big_scalar() {
+  ; CHECK-LABEL: name: return_type_is_too_big_scalar
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8
+  %reg = call i64 asm sideeffect "; def $0", "={v8}" ()
+  ret i64 %reg
+}
+
+define i32 @return_type_is_too_small_scalar() {
+  ; CHECK-LABEL: name: return_type_is_too_small_scalar
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8_vgpr9
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9
+  ; CHECK-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+  ; CHECK-NEXT:   $vgpr0 = COPY [[TRUNC]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]], implicit $vgpr0
+  %reg = call i32 asm sideeffect "; def $0", "={v[8:9]}" ()
+  ret i32 %reg
+}
+
+define i8 addrspace(1)* @return_type_is_too_big_pointer() {
+  ; CHECK-LABEL: name: return_type_is_too_big_pointer
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8
+  %reg = call i8 addrspace(1)* asm sideeffect "; def $0", "={v8}" ()
+  ret i8 addrspace(1)* %reg
+}
+
+define i8 addrspace(3)* @return_type_is_too_small_pointer() {
+  ; CHECK-LABEL: name: return_type_is_too_small_pointer
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vgpr8_vgpr9
+  %reg = call i8 addrspace(3)* asm sideeffect "; def $0", "={v[8:9]}" ()
+  ret i8 addrspace(3)* %reg
+}
+
+define void @use_vector_too_small(<8 x i32> %arg) {
+  ; CHECK-LABEL: name: use_vector_too_small
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[BUILD_VECTOR]](<8 x s32>)
+  ; CHECK-NEXT:   INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+  ; CHECK-NEXT:   [[COPY9:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY8]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY9]]
+  call void asm sideeffect "; use $0", "{v[0:7]}"(<8 x i32> %arg)
+  ret void
+}
+
+define void @use_vector_too_big(<8 x i32> %arg) {
+  ; CHECK-LABEL: name: use_vector_too_big
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+  ; CHECK-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+  ; CHECK-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+  ; CHECK-NEXT:   [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+  ; CHECK-NEXT:   [[COPY8:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.0):
+  call void asm sideeffect "; use $0", "{v[0:9]}"(<8 x i32> %arg)
+  ret void
+}
+
+define void @use_scalar_too_small(i64 %arg) {
+  ; CHECK-LABEL: name: use_scalar_too_small
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.0):
+  call void asm sideeffect "; use $0", "{v0}"(i64 %arg)
+  ret void
+}
+
+define void @use_scalar_too_big(i32 %arg) {
+  ; CHECK-LABEL: name: use_scalar_too_big
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK-NEXT:   liveins: $vgpr0, $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY]](s32)
+  ; CHECK-NEXT:   $vgpr0_vgpr1 = COPY [[ANYEXT]](s64)
+  ; CHECK-NEXT:   INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 9 /* reguse */, $vgpr0_vgpr1
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY1]]
+  ; CHECK-NEXT:   S_SETPC_B64_return [[COPY2]]
+  call void asm sideeffect "; use $0", "{v[0:1]}"(i32 %arg)
+  ret void
+}
+
+define void @use_pointer_too_small(i8 addrspace(1)* %arg) {
+  ; CHECK-LABEL: name: use_pointer_too_small
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31, $vgpr0, $vgpr1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+  ; CHECK-NEXT:   [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.0):
+  call void asm sideeffect "; use $0", "{v0}"(i8 addrspace(1)* %arg)
+  ret void
+}
+
+define void @use_pointer_too_big(i32 addrspace(3)* %arg) {
+  ; CHECK-LABEL: name: use_pointer_too_big
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $sgpr30_sgpr31, $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1 (%ir-block.0):
+  call void asm sideeffect "; use $0", "{v[0:1]}"(i32 addrspace(3)* %arg)
+  ret void
+}

From 045be6ff36dfcbf4a1146ea06ad90909f0508f9b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 12 Jan 2022 12:02:40 -0500
Subject: [PATCH 784/946] AMDGPU/GlobalISel: Fold wave address into mubuf
 addressing modes

---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |  49 +++++-
 .../GlobalISel/call-outgoing-stack-args.ll    | 102 ++++++-------
 .../GlobalISel/inst-select-store-private.mir  | 141 ++++++++++++++++++
 llvm/test/CodeGen/AMDGPU/amdpal-callable.ll   |  12 +-
 4 files changed, 238 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index e48dca3cc9572..4883b6a86ef8f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -3905,20 +3905,59 @@ bool AMDGPUInstructionSelector::isUnneededShiftMask(const MachineInstr &MI,
   return (LHSKnownZeros | *RHS).countTrailingOnes() >= ShAmtBits;
 }
 
+// Return the wave level SGPR base address if this is a wave address.
+static Register getWaveAddress(const MachineInstr *Def) {
+  return Def->getOpcode() == AMDGPU::G_AMDGPU_WAVE_ADDRESS
+             ? Def->getOperand(1).getReg()
+             : Register();
+}
+
 InstructionSelector::ComplexRendererFns
 AMDGPUInstructionSelector::selectMUBUFScratchOffset(
     MachineOperand &Root) const {
-  MachineInstr *MI = Root.getParent();
-  MachineBasicBlock *MBB = MI->getParent();
+  Register Reg = Root.getReg();
+  const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
+
+  const MachineInstr *Def = MRI->getVRegDef(Reg);
+  if (Register WaveBase = getWaveAddress(Def)) {
+    return {{
+        [=](MachineInstrBuilder &MIB) { // rsrc
+          MIB.addReg(Info->getScratchRSrcReg());
+        },
+        [=](MachineInstrBuilder &MIB) { // soffset
+          MIB.addReg(WaveBase);
+        },
+        [=](MachineInstrBuilder &MIB) { MIB.addImm(0); } // offset
+    }};
+  }
 
   int64_t Offset = 0;
+
+  // FIXME: Copy check is a hack
+  Register BasePtr;
+  if (mi_match(Reg, *MRI, m_GPtrAdd(m_Reg(BasePtr), m_Copy(m_ICst(Offset))))) {
+    if (!SIInstrInfo::isLegalMUBUFImmOffset(Offset))
+      return {};
+    const MachineInstr *BasePtrDef = MRI->getVRegDef(BasePtr);
+    Register WaveBase = getWaveAddress(BasePtrDef);
+    if (!WaveBase)
+      return {};
+
+    return {{
+        [=](MachineInstrBuilder &MIB) { // rsrc
+          MIB.addReg(Info->getScratchRSrcReg());
+        },
+        [=](MachineInstrBuilder &MIB) { // soffset
+          MIB.addReg(WaveBase);
+        },
+        [=](MachineInstrBuilder &MIB) { MIB.addImm(Offset); } // offset
+    }};
+  }
+
   if (!mi_match(Root.getReg(), *MRI, m_ICst(Offset)) ||
       !SIInstrInfo::isLegalMUBUFImmOffset(Offset))
     return {};
 
-  const MachineFunction *MF = MBB->getParent();
-  const SIMachineFunctionInfo *Info = MF->getInfo<SIMachineFunctionInfo>();
-
   return {{
       [=](MachineInstrBuilder &MIB) { // rsrc
         MIB.addReg(Info->getScratchRSrcReg());
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
index 0519a9c7db321..8e0f3fc479891 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/call-outgoing-stack-args.ll
@@ -14,21 +14,20 @@ define amdgpu_kernel void @kernel_caller_stack() {
 ; MUBUF:       ; %bb.0:
 ; MUBUF-NEXT:    s_add_u32 flat_scratch_lo, s4, s7
 ; MUBUF-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
-; MUBUF-NEXT:    s_mov_b32 s32, 0
 ; MUBUF-NEXT:    s_add_u32 s0, s0, s7
+; MUBUF-NEXT:    s_mov_b32 s32, 0
 ; MUBUF-NEXT:    s_addc_u32 s1, s1, 0
-; MUBUF-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
-; MUBUF-NEXT:    v_mov_b32_e32 v1, 9
-; MUBUF-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
-; MUBUF-NEXT:    v_mov_b32_e32 v1, 10
-; MUBUF-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:8
-; MUBUF-NEXT:    v_mov_b32_e32 v1, 11
-; MUBUF-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
-; MUBUF-NEXT:    v_mov_b32_e32 v1, 12
+; MUBUF-NEXT:    v_mov_b32_e32 v0, 9
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
+; MUBUF-NEXT:    v_mov_b32_e32 v0, 10
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
+; MUBUF-NEXT:    v_mov_b32_e32 v0, 11
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
+; MUBUF-NEXT:    v_mov_b32_e32 v0, 12
 ; MUBUF-NEXT:    s_getpc_b64 s[4:5]
 ; MUBUF-NEXT:    s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4
 ; MUBUF-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12
-; MUBUF-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
 ; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; MUBUF-NEXT:    s_endpgm
 ;
@@ -112,42 +111,41 @@ define amdgpu_kernel void @kernel_caller_byval() {
 ; MUBUF-NEXT:    buffer_load_dword v14, off, s[0:3], 0 offset:64
 ; MUBUF-NEXT:    buffer_load_dword v15, off, s[0:3], 0 offset:68
 ; MUBUF-NEXT:    s_movk_i32 s32, 0x1400
-; MUBUF-NEXT:    v_lshrrev_b32_e64 v16, 6, s32
 ; MUBUF-NEXT:    s_getpc_b64 s[4:5]
 ; MUBUF-NEXT:    s_add_u32 s4, s4, external_void_func_byval@rel32@lo+4
 ; MUBUF-NEXT:    s_addc_u32 s5, s5, external_void_func_byval@rel32@hi+12
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v0, v16, s[0:3], 0 offen
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v1, v16, s[0:3], 0 offen offset:4
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:4
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v2, v16, s[0:3], 0 offen offset:8
+; MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:8
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v3, v16, s[0:3], 0 offen offset:12
+; MUBUF-NEXT:    buffer_store_dword v3, off, s[0:3], s32 offset:12
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v4, v16, s[0:3], 0 offen offset:16
+; MUBUF-NEXT:    buffer_store_dword v4, off, s[0:3], s32 offset:16
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v5, v16, s[0:3], 0 offen offset:20
+; MUBUF-NEXT:    buffer_store_dword v5, off, s[0:3], s32 offset:20
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v6, v16, s[0:3], 0 offen offset:24
+; MUBUF-NEXT:    buffer_store_dword v6, off, s[0:3], s32 offset:24
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v7, v16, s[0:3], 0 offen offset:28
+; MUBUF-NEXT:    buffer_store_dword v7, off, s[0:3], s32 offset:28
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v8, v16, s[0:3], 0 offen offset:32
+; MUBUF-NEXT:    buffer_store_dword v8, off, s[0:3], s32 offset:32
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v9, v16, s[0:3], 0 offen offset:36
+; MUBUF-NEXT:    buffer_store_dword v9, off, s[0:3], s32 offset:36
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v10, v16, s[0:3], 0 offen offset:40
+; MUBUF-NEXT:    buffer_store_dword v10, off, s[0:3], s32 offset:40
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v11, v16, s[0:3], 0 offen offset:44
+; MUBUF-NEXT:    buffer_store_dword v11, off, s[0:3], s32 offset:44
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v12, v16, s[0:3], 0 offen offset:48
+; MUBUF-NEXT:    buffer_store_dword v12, off, s[0:3], s32 offset:48
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v13, v16, s[0:3], 0 offen offset:52
+; MUBUF-NEXT:    buffer_store_dword v13, off, s[0:3], s32 offset:52
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v14, v16, s[0:3], 0 offen offset:56
+; MUBUF-NEXT:    buffer_store_dword v14, off, s[0:3], s32 offset:56
 ; MUBUF-NEXT:    s_waitcnt vmcnt(15)
-; MUBUF-NEXT:    buffer_store_dword v15, v16, s[0:3], 0 offen offset:60
+; MUBUF-NEXT:    buffer_store_dword v15, off, s[0:3], s32 offset:60
 ; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; MUBUF-NEXT:    s_endpgm
 ;
@@ -244,20 +242,19 @@ define void @func_caller_stack() {
 ; MUBUF-NEXT:    v_writelane_b32 v40, s33, 2
 ; MUBUF-NEXT:    s_mov_b32 s33, s32
 ; MUBUF-NEXT:    s_addk_i32 s32, 0x400
-; MUBUF-NEXT:    v_lshrrev_b32_e64 v0, 6, s32
-; MUBUF-NEXT:    v_mov_b32_e32 v1, 9
-; MUBUF-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
-; MUBUF-NEXT:    v_mov_b32_e32 v1, 10
-; MUBUF-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:8
-; MUBUF-NEXT:    v_mov_b32_e32 v1, 11
+; MUBUF-NEXT:    v_mov_b32_e32 v0, 9
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:4
+; MUBUF-NEXT:    v_mov_b32_e32 v0, 10
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:8
+; MUBUF-NEXT:    v_mov_b32_e32 v0, 11
 ; MUBUF-NEXT:    v_writelane_b32 v40, s30, 0
-; MUBUF-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:12
-; MUBUF-NEXT:    v_mov_b32_e32 v1, 12
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:12
+; MUBUF-NEXT:    v_mov_b32_e32 v0, 12
 ; MUBUF-NEXT:    v_writelane_b32 v40, s31, 1
 ; MUBUF-NEXT:    s_getpc_b64 s[4:5]
 ; MUBUF-NEXT:    s_add_u32 s4, s4, external_void_func_v16i32_v16i32_v4i32@rel32@lo+4
 ; MUBUF-NEXT:    s_addc_u32 s5, s5, external_void_func_v16i32_v16i32_v4i32@rel32@hi+12
-; MUBUF-NEXT:    buffer_store_dword v1, v0, s[0:3], 0 offen offset:16
+; MUBUF-NEXT:    buffer_store_dword v0, off, s[0:3], s32 offset:16
 ; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; MUBUF-NEXT:    v_readlane_b32 s4, v40, 0
 ; MUBUF-NEXT:    v_readlane_b32 s5, v40, 1
@@ -317,65 +314,64 @@ define void @func_caller_byval([16 x i32] addrspace(5)* %argptr) {
 ; MUBUF-NEXT:    v_writelane_b32 v40, s33, 2
 ; MUBUF-NEXT:    s_mov_b32 s33, s32
 ; MUBUF-NEXT:    s_addk_i32 s32, 0x400
-; MUBUF-NEXT:    v_lshrrev_b32_e64 v3, 6, s32
 ; MUBUF-NEXT:    v_writelane_b32 v40, s30, 0
 ; MUBUF-NEXT:    v_writelane_b32 v40, s31, 1
 ; MUBUF-NEXT:    s_getpc_b64 s[4:5]
 ; MUBUF-NEXT:    s_add_u32 s4, s4, external_void_func_byval@rel32@lo+4
 ; MUBUF-NEXT:    s_addc_u32 s5, s5, external_void_func_byval@rel32@hi+12
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v1, v3, s[0:3], 0 offen
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s32
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen offset:4
+; MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:4
 ; MUBUF-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen offset:8
 ; MUBUF-NEXT:    s_nop 0
 ; MUBUF-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen offset:12
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v1, v3, s[0:3], 0 offen offset:8
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:8
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen offset:12
+; MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:12
 ; MUBUF-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen offset:16
 ; MUBUF-NEXT:    s_nop 0
 ; MUBUF-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen offset:20
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v1, v3, s[0:3], 0 offen offset:16
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:16
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen offset:20
+; MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:20
 ; MUBUF-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen offset:24
 ; MUBUF-NEXT:    s_nop 0
 ; MUBUF-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen offset:28
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v1, v3, s[0:3], 0 offen offset:24
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:24
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen offset:28
+; MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:28
 ; MUBUF-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen offset:32
 ; MUBUF-NEXT:    s_nop 0
 ; MUBUF-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen offset:36
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v1, v3, s[0:3], 0 offen offset:32
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:32
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen offset:36
+; MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:36
 ; MUBUF-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen offset:40
 ; MUBUF-NEXT:    s_nop 0
 ; MUBUF-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen offset:44
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v1, v3, s[0:3], 0 offen offset:40
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:40
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen offset:44
+; MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:44
 ; MUBUF-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen offset:48
 ; MUBUF-NEXT:    s_nop 0
 ; MUBUF-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen offset:52
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v1, v3, s[0:3], 0 offen offset:48
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:48
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen offset:52
+; MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:52
 ; MUBUF-NEXT:    buffer_load_dword v1, v0, s[0:3], 0 offen offset:56
 ; MUBUF-NEXT:    s_nop 0
 ; MUBUF-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen offset:60
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v1, v3, s[0:3], 0 offen offset:56
+; MUBUF-NEXT:    buffer_store_dword v1, off, s[0:3], s32 offset:56
 ; MUBUF-NEXT:    s_waitcnt vmcnt(1)
-; MUBUF-NEXT:    buffer_store_dword v2, v3, s[0:3], 0 offen offset:60
+; MUBUF-NEXT:    buffer_store_dword v2, off, s[0:3], s32 offset:60
 ; MUBUF-NEXT:    s_swappc_b64 s[30:31], s[4:5]
 ; MUBUF-NEXT:    v_readlane_b32 s4, v40, 0
 ; MUBUF-NEXT:    v_readlane_b32 s5, v40, 1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir
index 11b7c28860aea..246c7686d646d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-private.mir
@@ -581,3 +581,144 @@ body: |
     G_STORE %1, %0 :: (store (s8), align 1, addrspace 5)
 
 ...
+
+---
+name: function_store_private_s32_to_4_wave_address
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: false
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: function_store_private_s32_to_4_wave_address
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6-NEXT: {{  $}}
+    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
+    ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
+    G_STORE %0, %1 :: (store (s32), align 4, addrspace 5)
+
+...
+
+# Has regbank copy of constant
+---
+name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: false
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6-NEXT: {{  $}}
+    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
+    ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4095
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
+    %2:sgpr(s32) = G_CONSTANT i32 4095
+    %3:vgpr(s32) = COPY %2
+    %4:vgpr(p5) = G_PTR_ADD %1, %3
+    G_STORE %0, %4 :: (store (s32), align 4, addrspace 5)
+
+...
+
+---
+name: function_store_private_s32_to_4_wave_address_offset_4095
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: false
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6-NEXT: {{  $}}
+    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX6-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
+    ; GFX6-NEXT: %3:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], %3, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
+    ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_4095
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_LSHRREV_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4095, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
+    %2:vgpr(s32) = G_CONSTANT i32 4095
+    %3:vgpr(p5) = G_PTR_ADD %1, %2
+    G_STORE %0, %3 :: (store (s32), align 4, addrspace 5)
+
+...
+
+---
+name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo:
+  isEntryFunction: false
+  scratchRSrcReg: $sgpr0_sgpr1_sgpr2_sgpr3
+  stackPtrOffsetReg: $sgpr32
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096
+    ; GFX6: liveins: $vgpr0, $vgpr1
+    ; GFX6-NEXT: {{  $}}
+    ; GFX6-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX6-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+    ; GFX6-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+    ; GFX6-NEXT: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec
+    ; GFX6-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], %4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
+    ; GFX9-LABEL: name: function_store_private_s32_to_4_wave_address_offset_copy_constant_4096
+    ; GFX9: liveins: $vgpr0, $vgpr1
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX9-NEXT: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
+    ; GFX9-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
+    ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
+    ; GFX9-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_LSHRREV_B32_e64_]], [[COPY1]], 0, implicit $exec
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store (s32), addrspace 5)
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(p5) = G_AMDGPU_WAVE_ADDRESS $sgpr32
+    %2:sgpr(s32) = G_CONSTANT i32 4096
+    %3:vgpr(s32) = COPY %2
+    %4:vgpr(p5) = G_PTR_ADD %1, %3
+    G_STORE %0, %4 :: (store (s32), align 4, addrspace 5)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
index 6950088bd71b5..3c32a431bc01d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-callable.ll
@@ -144,8 +144,7 @@ attributes #0 = { nounwind }
 
 ; GCN: amdpal.pipelines:
 ; GCN-NEXT:  - .registers:
-; SDAG-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
-; GISEL-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ce{{$}}
+; GCN-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
 ; GCN-NEXT:      0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
 ; GCN-NEXT:    .shader_functions:
 ; GCN-NEXT:      dynamic_stack:
@@ -187,15 +186,13 @@ attributes #0 = { nounwind }
 ; GFX8-NEXT:        .sgpr_count:     0x28{{$}}
 ; GFX9-NEXT:        .sgpr_count:     0x2c{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x90{{$}}
-; SDAG-NEXT:        .vgpr_count:     0x2a{{$}}
-; GISEL-NEXT:        .vgpr_count:     0x34{{$}}
+; GCN-NEXT:        .vgpr_count:     0x2a{{$}}
 ; GCN-NEXT:      no_stack_indirect_call:
 ; GCN-NEXT:        .lds_size:       0{{$}}
 ; GFX8-NEXT:        .sgpr_count:     0x28{{$}}
 ; GFX9-NEXT:        .sgpr_count:     0x2c{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
-; SDAG-NEXT:        .vgpr_count:     0x2a{{$}}
-; GISEL-NEXT:        .vgpr_count:     0x34{{$}}
+; GCN-NEXT:        .vgpr_count:     0x2a{{$}}
 ; GCN-NEXT:      simple_lds:
 ; GCN-NEXT:        .lds_size:       0x100{{$}}
 ; GCN-NEXT:        .sgpr_count:     0x20{{$}}
@@ -227,8 +224,7 @@ attributes #0 = { nounwind }
 ; GFX8-NEXT:        .sgpr_count:     0x28{{$}}
 ; GFX9-NEXT:        .sgpr_count:     0x2c{{$}}
 ; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
-; SDAG-NEXT:        .vgpr_count:     0x2b{{$}}
-; GISEL-NEXT:        .vgpr_count:     0x34{{$}}
+; GCN-NEXT:        .vgpr_count:     0x2b{{$}}
 ; GCN-NEXT:      simple_stack_recurse:
 ; GCN-NEXT:        .lds_size:       0{{$}}
 ; GCN-NEXT:        .sgpr_count:     0x26{{$}}

From f400a6012c668dfaa73462caf067ceb074e66c47 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Wed, 26 Jan 2022 12:46:07 -0800
Subject: [PATCH 785/946] Revert "Fix UB in DwarfExpression::emitLegacyZExt()"

This reverts commit 216002c4bb708e6d6fd1895c8ea636470961f824
while investigating bot breakage.
---
 .../CodeGen/AsmPrinter/DwarfExpression.cpp    | 22 +++----------------
 llvm/test/DebugInfo/X86/convert-debugloc.ll   |  9 --------
 2 files changed, 3 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index ee932d1051079..37407c98e75f8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -681,25 +681,9 @@ void DwarfExpression::emitLegacySExt(unsigned FromBits) {
 }
 
 void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
-  // Heuristic to decide the most efficient encoding.
-  // A ULEB can encode 7 1-bits per byte.
-  if (FromBits / 7 < 1+1+1+1+1) {
-    // (X & (1 << FromBits - 1))
-    emitOp(dwarf::DW_OP_constu);
-    emitUnsigned((1ULL << FromBits) - 1);
-  } else {
-    // Note that the DWARF 4 stack consists of pointer-sized elements,
-    // so technically it doesn't make sense to shift left more than 64
-    // bits. We leave that for the consumer to decide though. LLDB for
-    // example uses APInt for the stack elements and can still deal
-    // with this.
-    emitOp(dwarf::DW_OP_lit1);
-    emitOp(dwarf::DW_OP_constu);
-    emitUnsigned(FromBits);
-    emitOp(dwarf::DW_OP_shl);
-    emitOp(dwarf::DW_OP_lit1);
-    emitOp(dwarf::DW_OP_minus);
-  }
+  // (X & (1 << FromBits - 1))
+  emitOp(dwarf::DW_OP_constu);
+  emitUnsigned((1ULL << FromBits) - 1);
   emitOp(dwarf::DW_OP_and);
 }
 
diff --git a/llvm/test/DebugInfo/X86/convert-debugloc.ll b/llvm/test/DebugInfo/X86/convert-debugloc.ll
index 2ff6e96621900..21e41dcc4c2a9 100644
--- a/llvm/test/DebugInfo/X86/convert-debugloc.ll
+++ b/llvm/test/DebugInfo/X86/convert-debugloc.ll
@@ -64,17 +64,11 @@
 ; NOCONV:       DW_AT_location (
 ; NOCONV:         {{.*}}, DW_OP_dup, DW_OP_constu 0x7, DW_OP_shr, DW_OP_lit0, DW_OP_not, DW_OP_mul, DW_OP_constu 0x8, DW_OP_shl, DW_OP_or, DW_OP_stack_value)
 ; NOCONV:       DW_AT_name ("y")
-; NOCONV:     DW_TAG_variable
-; NOCONV:       DW_AT_location (
-; NOCONV:         DW_OP_constu 0x40, DW_OP_lit0, DW_OP_plus, DW_OP_lit1, DW_OP_constu 0x40, DW_OP_shl, DW_OP_lit1, DW_OP_minus, DW_OP_and, DW_OP_stack_value)
-; NOCONV:       DW_AT_name ("z")
 ; NOCONV:     NULL
 ; NOCONV:   DW_TAG_base_type
 ; NOCONV:     DW_AT_name ("signed char")
 ; NOCONV:   DW_TAG_base_type
 ; NOCONV:     DW_AT_name ("int")
-; NOCONV:   DW_TAG_base_type
-; NOCONV:     DW_AT_name ("unsigned long long")
 ; NOCONV:   NULL
 
 
@@ -87,7 +81,6 @@ entry:
 ;; will not attempt to eliminate.  At the moment, only "convert" ops are folded.
 ;; If you have to change the expression, the expected DWO_id also changes.
   call void @llvm.dbg.value(metadata i8 32, metadata !13, metadata !DIExpression(DW_OP_lit0, DW_OP_plus, DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value)), !dbg !15
-  call void @llvm.dbg.value(metadata i8 64, metadata !17, metadata !DIExpression(DW_OP_lit0, DW_OP_plus, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 128, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !15
   ret i8 %x, !dbg !16
 }
 
@@ -118,5 +111,3 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
 !14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
 !15 = !DILocation(line: 3, column: 14, scope: !7)
 !16 = !DILocation(line: 4, column: 3, scope: !7)
-!17 = !DILocalVariable(name: "z", scope: !7, file: !1, line: 3, type: !18)
-!18 = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned)

From 2ebf3263e7a5b9c9be6bee284c1bc30dad38ca88 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Wed, 26 Jan 2022 20:46:39 +0000
Subject: [PATCH 786/946] [gn build] Port b1d946cbf780

---
 llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn
index 86559329127e2..a362529e6b818 100644
--- a/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Frontend/BUILD.gn
@@ -29,6 +29,7 @@ static_library("Frontend") {
     "DependencyFile.cpp",
     "DependencyGraph.cpp",
     "DiagnosticRenderer.cpp",
+    "ExtractAPIConsumer.cpp",
     "FrontendAction.cpp",
     "FrontendActions.cpp",
     "FrontendOptions.cpp",

From f3e22946e5c573eaa9b05d197d0c1036b21978fb Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Wed, 26 Jan 2022 15:54:52 -0500
Subject: [PATCH 787/946] Update the Bug Life Cycle docs for the switch to
 GitHub issues

This updates the Bug Life Cycle docs now that we've switched to GitHub
issues. The intent is to retain the same general process we used to
use for triaging bugs under Bugzilla, but with the facilities we have
available in GitHub.
---
 llvm/docs/BugLifeCycle.rst | 125 ++++++++++++++++++++-----------------
 1 file changed, 67 insertions(+), 58 deletions(-)

diff --git a/llvm/docs/BugLifeCycle.rst b/llvm/docs/BugLifeCycle.rst
index f3ed32a209fa0..004b677758652 100644
--- a/llvm/docs/BugLifeCycle.rst
+++ b/llvm/docs/BugLifeCycle.rst
@@ -16,9 +16,10 @@ consistency helps reporters, developers and others to gain a better
 understanding of what a particular bug state actually means and what to expect
 might happen next.
 
-At the same time, we aim to not over-specify the life cycle of bugs in the
-`the LLVM Bug Tracking System <https://bugs.llvm.org/enter_bug.cgi>`_, as the
-overall goal is to make it easier to work with and understand the bug reports.
+At the same time, we aim to not over-specify the life cycle of bugs in
+`the LLVM Bug Tracking System <https://github.com/llvm/llvm-project/issues>`_,
+as the overall goal is to make it easier to work with and understand the bug
+reports.
 
 The main parts of the life cycle documented here are:
 
@@ -27,12 +28,10 @@ The main parts of the life cycle documented here are:
 #. `Actively working on fixing`_
 #. `Closing`_
 
-Furthermore, some of the metadata in the bug tracker, such as who to notify on
-newly reported bugs or what the breakdown into products & components is we use,
-needs to be maintained. See the following for details:
+Furthermore, some of the metadata in the bug tracker, such as what labels we
+use, needs to be maintained. See the following for details:
 
-#. `Maintenance of Bug products/component metadata`_
-#. `Maintenance of cc-by-default settings`_
+#. `Maintenance of metadata`_
 
 
 .. _Reporting:
@@ -42,48 +41,56 @@ Reporting bugs
 
 See :doc:`HowToSubmitABug` on further details on how to submit good bug reports.
 
-Make sure that you have one or more people on cc on the bug report that you
-think will react to it. We aim to automatically add specific people on cc for
-most products/components, but may not always succeed in doing so.
-
-If you know the area of LLVM code the root cause of the bug is in, good
-candidates to add as cc may be the same people you'd ask for a code review in
-that area. See :ref:`finding-potential-reviewers` for more details.
-
+You can apply `labels <https://docs.github.com/en/issues/using-labels-and-milestones-to-track-work/managing-labels>`_
+to the bug to provide extra information to make the bug easier to discover, such
+as a label for the part of the project the bug pertains to.
 
 .. _Triaging:
 
 Triaging bugs
 =============
 
-Bugs with status NEW indicate that they still need to be triaged.
-When triage is complete, the status of the bug is moved to CONFIRMED.
+Open bugs that have not been marked with the ``confirmed`` label are bugs that
+still need to be triaged. When triage is complete, the ``confirmed`` label
+should be added along with any other labels that help to classify the report,
+unless the issue is being :ref:`closed<Closing>`.
 
 The goal of triaging a bug is to make sure a newly reported bug ends up in a
-good, actionable, state. Try to answer the following questions while triaging.
+good, actionable state. Try to answer the following questions while triaging:
 
 * Is the reported behavior actually wrong?
 
   * E.g. does a miscompile example depend on undefined behavior?
 
-* Can you easily reproduce the bug?
+* Can you reproduce the bug from the details in the report?
 
-  * If not, are there reasonable excuses why it cannot easily be reproduced?
+  * If not, is there a reasonable explanation why it cannot be reproduced?
 
 * Is it related to an already reported bug?
 
-  * Use the "See also"/"depends on"/"blocks" fields if so.
-  * Close it as a duplicate if so, pointing to the issue it duplicates.
-
 * Are the following fields filled in correctly?
 
-  * Product
-  * Component
   * Title
+  * Description
+  * Labels
+
+* When able to do so, please add the appropriate labels to classify the bug,
+  such as the tool (``clang``, ``clang-format``, ``clang-tidy``, etc) or
+  component (``backend:<name>``, ``compiler-rt:<name>``, ``clang:<name>``, etc).
+
+* If the issue is with a particular revision of the C or C++ standard, please
+  add the appropriate language standard label (``c++20``, ``c99``, etc).
 
-* CC others not already cc’ed that you happen to know would be good to pull in.
-* Add the "beginner" keyword if you think this would be a good bug to be fixed
-  by someone new to LLVM.
+* Please don't use both a general and a specific label. For example, bugs
+  labeled ``c++17`` shouldn't also have ``c++``, and bugs labeled
+  ``clang:codegen`` shouldn't also have ``clang``.
+
+* Add the ``good first issue`` label if you think this would be a good bug to
+  be fixed by someone new to LLVM. This label feeds into `the landing page
+  for new contributors <https://github.com/llvm/llvm-project/contribute>`_.
+
+* If you are unsure of what a label is intended to be used for, please see the
+  `documentation for our labels <https://github.com/llvm/llvm-project/labels>`_.
 
 .. _Actively working on fixing:
 
@@ -92,49 +99,51 @@ Actively working on fixing bugs
 
 Please remember to assign the bug to yourself if you're actively working on
 fixing it and to unassign it when you're no longer actively working on it.  You
-unassign a bug by setting the Assignee field to "unassignedbugs@nondot.org".
+unassign a bug by removing the person from the the ``Assignees`` field.
 
 .. _Closing:
 
 Resolving/Closing bugs
 ======================
 
-For simplicity, we only have 1 status for all resolved or closed bugs:
-RESOLVED.
-
 Resolving bugs is good! Make sure to properly record the reason for resolving.
 Examples of reasons for resolving are:
 
-* Revision NNNNNN fixed the bug.
-* The bug cannot be reproduced with revision NNNNNN.
-* The circumstances for the bug don't apply anymore.
-* There is a sound reason for not fixing it (WONTFIX).
-* There is a specific and plausible reason to think that a given bug is
-  otherwise inapplicable or obsolete.
-
-  * One example is an old open bug that doesn't contain enough information to
-    clearly understand the problem being reported (e.g. not reproducible). It is
-    fine to resolve such a bug e.g. with resolution WORKSFORME and leaving a
-    comment to encourage the reporter to reopen the bug with more information
-    if it's still reproducible on their end.
-
-If a bug is resolved, please fill in the revision number it was fixed in in the
-"Fixed by Commit(s)" field.
+  * If the issue has been resolved by a particular commit, close the issue with
+    a brief comment mentioning which commit(s) fixed it. If you are authoring
+    the fix yourself, your git commit message may include the phrase
+    ``Fixes #<issue number>`` on a line by itself. GitHub recognizes such commit
+    messages and will automatically close the specified issue with a reference
+    to your commit.
 
+  * If the reported behavior is not a bug, it is appropriate to close the issue
+    with a comment explaining why you believe it is not a bug, and adding the
+    ``invalid`` tag.
 
-.. _Maintenance of Bug products/component metadata:
+  * If the bug duplicates another issue, close it as a duplicate by adding the
+    ``duplicate`` label with a comment pointing to the issue it duplicates.
 
-Maintenance of products/components metadata
-===========================================
+  * If there is a sound reason for not fixing the issue (difficulty, ABI, open
+    research questions, etc), add the ``wontfix`` label and a comment explaining
+    why no changes are expected.
 
-Please raise a bug against "Bugzilla Admin"/"Products" to request any changes
-to be made to the breakdown of products & components modeled in Bugzilla.
+  * If there is a specific and plausible reason to think that a given bug is
+    otherwise inapplicable or obsolete. One example is an open bug that doesn't
+    contain enough information to clearly understand the problem being reported
+    (e.g. not reproducible). It is fine to close such a bug, adding with the
+    ``worksforme`` label and leaving a comment to encourage the reporter to
+    reopen the bug with more information if it's still reproducible for them.
 
 
-.. _Maintenance of cc-by-default settings:
+.. _Maintenance of metadata:
 
-Maintenance of cc-by-default settings
-=====================================
+Maintenance of metadata
+=======================
 
-Please raise a bug against "Bugzilla Admin"/"Products" to request any changes
-to be made to the cc-by-default settings for specific components.
+Project member with write access to the project can create new labels, but we
+discourage adding ad hoc labels because we want to control the proliferation of
+labels and avoid single-use labels. If you would like a new label added, please
+open an issue asking to create an issue label and add the ``infrastructure``
+label to the issue. The request should include a description of what the label
+is for. Alternatively, you can ask for the label to be created on the
+``#infrastructure`` channel on the LLVM Discord.

From 33185e66f24187e9573154090556967f71265b7c Mon Sep 17 00:00:00 2001
From: Jeremy Furtek <jfurtek@nvidia.com>
Date: Wed, 26 Jan 2022 21:00:21 +0000
Subject: [PATCH 788/946] [mlir] Add ODS support for enum attributes with
 grouped bit cases

This diff modifies the tablegen specification and code generation for
BitEnumAttr attributes in MLIR Operation Definition Specification (ODS) files.
Specifically:

- there is a new tablegen class for "none" values (i.e. no bits set)
- single-bit enum cases are specified via bit index (i.e. [0, 31]) instead of
  the resulting enum integer value
- there is a new tablegen class to represent a "grouped" bitwise OR of other
  enum values

This diff is intended as an initial step towards improving "fastmath"
optimization support in MLIR, to allow more precise control of whether certain
floating point optimizations are applied in MLIR passes. "Fast" math options
for floating point MLIR operations would (following subsequent RFC and
discussion) be specified by using the improved enum bit support in this diff.
For example, a "fast" enum value would act as an alias for a group of other
cases (e.g. finite-math-only, no-signed-zeros, etc.), in a way that is similar
to support in C/C++ compilers (clang, gcc).

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D117029
---
 mlir/docs/OpDefinitions.md                    |  42 +++---
 mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td   |  16 +--
 .../mlir/Dialect/SPIRV/IR/SPIRVBase.td        | 130 +++++++++---------
 mlir/include/mlir/Dialect/Vector/VectorOps.td |  22 +--
 mlir/include/mlir/IR/OpBase.td                |  35 ++++-
 mlir/tools/mlir-tblgen/EnumsGen.cpp           |  15 +-
 mlir/unittests/TableGen/EnumsGenTest.cpp      |  50 +++++--
 mlir/unittests/TableGen/enums.td              |  20 ++-
 8 files changed, 196 insertions(+), 134 deletions(-)

diff --git a/mlir/docs/OpDefinitions.md b/mlir/docs/OpDefinitions.md
index 9de4671727028..f26afa5666e23 100644
--- a/mlir/docs/OpDefinitions.md
+++ b/mlir/docs/OpDefinitions.md
@@ -1245,7 +1245,8 @@ several mechanisms: `StrEnumAttr`, `IntEnumAttr`, and `BitEnumAttr`.
     [`StringAttr`][StringAttr] in the op.
 *   `IntEnumAttr`: each enum case is an integer, the attribute is stored as a
     [`IntegerAttr`][IntegerAttr] in the op.
-*   `BitEnumAttr`: each enum case is a bit, the attribute is stored as a
+*   `BitEnumAttr`: each enum case is a either the empty case, a single bit,
+    or a group of single bits, and the attribute is stored as a
     [`IntegerAttr`][IntegerAttr] in the op.
 
 All these `*EnumAttr` attributes require fully specifying all of the allowed
@@ -1349,13 +1350,14 @@ llvm::Optional<MyIntEnum> symbolizeMyIntEnum(uint32_t value) {
 Similarly for the following `BitEnumAttr` definition:
 
 ```tablegen
-def None: BitEnumAttrCase<"None", 0x0000>;
-def Bit1: BitEnumAttrCase<"Bit1", 0x0001>;
-def Bit2: BitEnumAttrCase<"Bit2", 0x0002>;
-def Bit3: BitEnumAttrCase<"Bit3", 0x0004>;
+def None: BitEnumAttrCaseNone<"None">;
+def Bit0: BitEnumAttrCaseBit<"Bit0", 0>;
+def Bit1: BitEnumAttrCaseBit<"Bit1", 1>;
+def Bit2: BitEnumAttrCaseBit<"Bit2", 2>;
+def Bit3: BitEnumAttrCaseBit<"Bit3", 3>;
 
 def MyBitEnum: BitEnumAttr<"MyBitEnum", "An example bit enum",
-                           [None, Bit1, Bit2, Bit3]>;
+                           [None, Bit0, Bit1, Bit2, Bit3]>;
 ```
 
 We can have:
@@ -1364,9 +1366,10 @@ We can have:
 // An example bit enum
 enum class MyBitEnum : uint32_t {
   None = 0,
-  Bit1 = 1,
-  Bit2 = 2,
-  Bit3 = 4,
+  Bit0 = 1,
+  Bit1 = 2,
+  Bit2 = 4,
+  Bit3 = 8,
 };
 
 llvm::Optional<MyBitEnum> symbolizeMyBitEnum(uint32_t);
@@ -1407,15 +1410,15 @@ template<> struct DenseMapInfo<::MyBitEnum> {
 ```c++
 std::string stringifyMyBitEnum(MyBitEnum symbol) {
   auto val = static_cast<uint32_t>(symbol);
+  assert(15u == (15u | val) && "invalid bits set in bit enum");
   // Special case for all bits unset.
   if (val == 0) return "None";
-
   llvm::SmallVector<llvm::StringRef, 2> strs;
-  if (1u & val) { strs.push_back("Bit1"); val &= ~1u; }
-  if (2u & val) { strs.push_back("Bit2"); val &= ~2u; }
-  if (4u & val) { strs.push_back("Bit3"); val &= ~4u; }
-
-  if (val) return "";
+  if (1u == (1u & val)) { strs.push_back("Bit0"); }
+  if (2u == (2u & val)) { strs.push_back("Bit1"); }
+  if (4u == (4u & val)) { strs.push_back("Bit2"); }
+  if (8u == (8u & val)) { strs.push_back("Bit3"); }
+  
   return llvm::join(strs, "|");
 }
 
@@ -1429,9 +1432,10 @@ llvm::Optional<MyBitEnum> symbolizeMyBitEnum(llvm::StringRef str) {
   uint32_t val = 0;
   for (auto symbol : symbols) {
     auto bit = llvm::StringSwitch<llvm::Optional<uint32_t>>(symbol)
-      .Case("Bit1", 1)
-      .Case("Bit2", 2)
-      .Case("Bit3", 4)
+      .Case("Bit0", 1)
+      .Case("Bit1", 2)
+      .Case("Bit2", 4)
+      .Case("Bit3", 8)
       .Default(llvm::None);
     if (bit) { val |= *bit; } else { return llvm::None; }
   }
@@ -1442,7 +1446,7 @@ llvm::Optional<MyBitEnum> symbolizeMyBitEnum(uint32_t value) {
   // Special case for all bits unset.
   if (value == 0) return MyBitEnum::None;
 
-  if (value & ~(1u | 2u | 4u)) return llvm::None;
+  if (value & ~(1u | 2u | 4u | 8u)) return llvm::None;
   return static_cast<MyBitEnum>(value);
 }
 ```
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 0a7f4c9ff82bc..90cd333de468b 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -21,14 +21,14 @@ include "mlir/Interfaces/ControlFlowInterfaces.td"
 include "mlir/Interfaces/InferTypeOpInterface.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 
-def FMFnnan     : BitEnumAttrCase<"nnan", 0x1>;
-def FMFninf     : BitEnumAttrCase<"ninf", 0x2>;
-def FMFnsz      : BitEnumAttrCase<"nsz", 0x4>;
-def FMFarcp     : BitEnumAttrCase<"arcp", 0x8>;
-def FMFcontract : BitEnumAttrCase<"contract", 0x10>;
-def FMFafn      : BitEnumAttrCase<"afn", 0x20>;
-def FMFreassoc  : BitEnumAttrCase<"reassoc", 0x40>;
-def FMFfast     : BitEnumAttrCase<"fast", 0x80>;
+def FMFnnan     : BitEnumAttrCaseBit<"nnan", 0>;
+def FMFninf     : BitEnumAttrCaseBit<"ninf", 1>;
+def FMFnsz      : BitEnumAttrCaseBit<"nsz", 2>;
+def FMFarcp     : BitEnumAttrCaseBit<"arcp", 3>;
+def FMFcontract : BitEnumAttrCaseBit<"contract", 4>;
+def FMFafn      : BitEnumAttrCaseBit<"afn", 5>;
+def FMFreassoc  : BitEnumAttrCaseBit<"reassoc", 6>;
+def FMFfast     : BitEnumAttrCaseBit<"fast", 7>;
 
 def FastmathFlags_DoNotUse : BitEnumAttr<
     "FastmathFlags",
diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
index 5431baa0f9f41..577d4fca9f352 100644
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
@@ -3082,12 +3082,12 @@ def SPV_ExecutionModelAttr :
       SPV_EM_AnyHitKHR, SPV_EM_ClosestHitKHR, SPV_EM_MissKHR, SPV_EM_CallableKHR
     ]>;
 
-def SPV_FC_None         : BitEnumAttrCase<"None", 0x0000>;
-def SPV_FC_Inline       : BitEnumAttrCase<"Inline", 0x0001>;
-def SPV_FC_DontInline   : BitEnumAttrCase<"DontInline", 0x0002>;
-def SPV_FC_Pure         : BitEnumAttrCase<"Pure", 0x0004>;
-def SPV_FC_Const        : BitEnumAttrCase<"Const", 0x0008>;
-def SPV_FC_OptNoneINTEL : BitEnumAttrCase<"OptNoneINTEL", 0x10000> {
+def SPV_FC_None         : BitEnumAttrCaseNone<"None">;
+def SPV_FC_Inline       : BitEnumAttrCaseBit<"Inline", 0>;
+def SPV_FC_DontInline   : BitEnumAttrCaseBit<"DontInline", 1>;
+def SPV_FC_Pure         : BitEnumAttrCaseBit<"Pure", 2>;
+def SPV_FC_Const        : BitEnumAttrCaseBit<"Const", 3>;
+def SPV_FC_OptNoneINTEL : BitEnumAttrCaseBit<"OptNoneINTEL", 16> {
   list<Availability> availability = [
     Capability<[SPV_C_OptNoneINTEL]>
   ];
@@ -3366,62 +3366,62 @@ def SPV_ImageFormatAttr :
       SPV_IF_R8ui, SPV_IF_R64ui, SPV_IF_R64i
     ]>;
 
-def SPV_IO_None               : BitEnumAttrCase<"None", 0x0000>;
-def SPV_IO_Bias               : BitEnumAttrCase<"Bias", 0x0001> {
+def SPV_IO_None               : BitEnumAttrCaseNone<"None">;
+def SPV_IO_Bias               : BitEnumAttrCaseBit<"Bias", 0> {
   list<Availability> availability = [
     Capability<[SPV_C_Shader]>
   ];
 }
-def SPV_IO_Lod                : BitEnumAttrCase<"Lod", 0x0002>;
-def SPV_IO_Grad               : BitEnumAttrCase<"Grad", 0x0004>;
-def SPV_IO_ConstOffset        : BitEnumAttrCase<"ConstOffset", 0x0008>;
-def SPV_IO_Offset             : BitEnumAttrCase<"Offset", 0x0010> {
+def SPV_IO_Lod                : BitEnumAttrCaseBit<"Lod", 1>;
+def SPV_IO_Grad               : BitEnumAttrCaseBit<"Grad", 2>;
+def SPV_IO_ConstOffset        : BitEnumAttrCaseBit<"ConstOffset", 3>;
+def SPV_IO_Offset             : BitEnumAttrCaseBit<"Offset", 4> {
   list<Availability> availability = [
     Capability<[SPV_C_ImageGatherExtended]>
   ];
 }
-def SPV_IO_ConstOffsets       : BitEnumAttrCase<"ConstOffsets", 0x0020> {
+def SPV_IO_ConstOffsets       : BitEnumAttrCaseBit<"ConstOffsets", 5> {
   list<Availability> availability = [
     Capability<[SPV_C_ImageGatherExtended]>
   ];
 }
-def SPV_IO_Sample             : BitEnumAttrCase<"Sample", 0x0040>;
-def SPV_IO_MinLod             : BitEnumAttrCase<"MinLod", 0x0080> {
+def SPV_IO_Sample             : BitEnumAttrCaseBit<"Sample", 6>;
+def SPV_IO_MinLod             : BitEnumAttrCaseBit<"MinLod", 7> {
   list<Availability> availability = [
     Capability<[SPV_C_MinLod]>
   ];
 }
-def SPV_IO_MakeTexelAvailable : BitEnumAttrCase<"MakeTexelAvailable", 0x0100> {
+def SPV_IO_MakeTexelAvailable : BitEnumAttrCaseBit<"MakeTexelAvailable", 8> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_5>,
     Capability<[SPV_C_VulkanMemoryModel]>
   ];
 }
-def SPV_IO_MakeTexelVisible   : BitEnumAttrCase<"MakeTexelVisible", 0x0200> {
+def SPV_IO_MakeTexelVisible   : BitEnumAttrCaseBit<"MakeTexelVisible", 9> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_5>,
     Capability<[SPV_C_VulkanMemoryModel]>
   ];
 }
-def SPV_IO_NonPrivateTexel    : BitEnumAttrCase<"NonPrivateTexel", 0x0400> {
+def SPV_IO_NonPrivateTexel    : BitEnumAttrCaseBit<"NonPrivateTexel", 10> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_5>,
     Capability<[SPV_C_VulkanMemoryModel]>
   ];
 }
-def SPV_IO_VolatileTexel      : BitEnumAttrCase<"VolatileTexel", 0x0800> {
+def SPV_IO_VolatileTexel      : BitEnumAttrCaseBit<"VolatileTexel", 11> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_5>,
     Capability<[SPV_C_VulkanMemoryModel]>
   ];
 }
-def SPV_IO_SignExtend         : BitEnumAttrCase<"SignExtend", 0x1000> {
+def SPV_IO_SignExtend         : BitEnumAttrCaseBit<"SignExtend", 12> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_4>
   ];
 }
-def SPV_IO_Offsets            : BitEnumAttrCase<"Offsets", 0x10000>;
-def SPV_IO_ZeroExtend         : BitEnumAttrCase<"ZeroExtend", 0x2000> {
+def SPV_IO_Offsets            : BitEnumAttrCaseBit<"Offsets", 16>;
+def SPV_IO_ZeroExtend         : BitEnumAttrCaseBit<"ZeroExtend", 13> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_4>
   ];
@@ -3457,87 +3457,87 @@ def SPV_LinkageTypeAttr :
       SPV_LT_Export, SPV_LT_Import, SPV_LT_LinkOnceODR
     ]>;
 
-def SPV_LC_None                      : BitEnumAttrCase<"None", 0x0000>;
-def SPV_LC_Unroll                    : BitEnumAttrCase<"Unroll", 0x0001>;
-def SPV_LC_DontUnroll                : BitEnumAttrCase<"DontUnroll", 0x0002>;
-def SPV_LC_DependencyInfinite        : BitEnumAttrCase<"DependencyInfinite", 0x0004> {
+def SPV_LC_None                      : BitEnumAttrCaseNone<"None">;
+def SPV_LC_Unroll                    : BitEnumAttrCaseBit<"Unroll", 0>;
+def SPV_LC_DontUnroll                : BitEnumAttrCaseBit<"DontUnroll", 1>;
+def SPV_LC_DependencyInfinite        : BitEnumAttrCaseBit<"DependencyInfinite", 2> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_1>
   ];
 }
-def SPV_LC_DependencyLength          : BitEnumAttrCase<"DependencyLength", 0x0008> {
+def SPV_LC_DependencyLength          : BitEnumAttrCaseBit<"DependencyLength", 3> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_1>
   ];
 }
-def SPV_LC_MinIterations             : BitEnumAttrCase<"MinIterations", 0x0010> {
+def SPV_LC_MinIterations             : BitEnumAttrCaseBit<"MinIterations", 4> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_4>
   ];
 }
-def SPV_LC_MaxIterations             : BitEnumAttrCase<"MaxIterations", 0x0020> {
+def SPV_LC_MaxIterations             : BitEnumAttrCaseBit<"MaxIterations", 5> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_4>
   ];
 }
-def SPV_LC_IterationMultiple         : BitEnumAttrCase<"IterationMultiple", 0x0040> {
+def SPV_LC_IterationMultiple         : BitEnumAttrCaseBit<"IterationMultiple", 6> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_4>
   ];
 }
-def SPV_LC_PeelCount                 : BitEnumAttrCase<"PeelCount", 0x0080> {
+def SPV_LC_PeelCount                 : BitEnumAttrCaseBit<"PeelCount", 7> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_4>
   ];
 }
-def SPV_LC_PartialCount              : BitEnumAttrCase<"PartialCount", 0x0100> {
+def SPV_LC_PartialCount              : BitEnumAttrCaseBit<"PartialCount", 8> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_4>
   ];
 }
-def SPV_LC_InitiationIntervalINTEL   : BitEnumAttrCase<"InitiationIntervalINTEL", 0x10000> {
+def SPV_LC_InitiationIntervalINTEL   : BitEnumAttrCaseBit<"InitiationIntervalINTEL", 16> {
   list<Availability> availability = [
     Extension<[SPV_INTEL_fpga_loop_controls]>,
     Capability<[SPV_C_FPGALoopControlsINTEL]>
   ];
 }
-def SPV_LC_LoopCoalesceINTEL         : BitEnumAttrCase<"LoopCoalesceINTEL", 0x100000> {
+def SPV_LC_LoopCoalesceINTEL         : BitEnumAttrCaseBit<"LoopCoalesceINTEL", 20> {
   list<Availability> availability = [
     Extension<[SPV_INTEL_fpga_loop_controls]>,
     Capability<[SPV_C_FPGALoopControlsINTEL]>
   ];
 }
-def SPV_LC_MaxConcurrencyINTEL       : BitEnumAttrCase<"MaxConcurrencyINTEL", 0x20000> {
+def SPV_LC_MaxConcurrencyINTEL       : BitEnumAttrCaseBit<"MaxConcurrencyINTEL", 17> {
   list<Availability> availability = [
     Extension<[SPV_INTEL_fpga_loop_controls]>,
     Capability<[SPV_C_FPGALoopControlsINTEL]>
   ];
 }
-def SPV_LC_MaxInterleavingINTEL      : BitEnumAttrCase<"MaxInterleavingINTEL", 0x200000> {
+def SPV_LC_MaxInterleavingINTEL      : BitEnumAttrCaseBit<"MaxInterleavingINTEL", 21> {
   list<Availability> availability = [
     Extension<[SPV_INTEL_fpga_loop_controls]>,
     Capability<[SPV_C_FPGALoopControlsINTEL]>
   ];
 }
-def SPV_LC_DependencyArrayINTEL      : BitEnumAttrCase<"DependencyArrayINTEL", 0x40000> {
+def SPV_LC_DependencyArrayINTEL      : BitEnumAttrCaseBit<"DependencyArrayINTEL", 18> {
   list<Availability> availability = [
     Extension<[SPV_INTEL_fpga_loop_controls]>,
     Capability<[SPV_C_FPGALoopControlsINTEL]>
   ];
 }
-def SPV_LC_SpeculatedIterationsINTEL : BitEnumAttrCase<"SpeculatedIterationsINTEL", 0x400000> {
+def SPV_LC_SpeculatedIterationsINTEL : BitEnumAttrCaseBit<"SpeculatedIterationsINTEL", 22> {
   list<Availability> availability = [
     Extension<[SPV_INTEL_fpga_loop_controls]>,
     Capability<[SPV_C_FPGALoopControlsINTEL]>
   ];
 }
-def SPV_LC_PipelineEnableINTEL       : BitEnumAttrCase<"PipelineEnableINTEL", 0x80000> {
+def SPV_LC_PipelineEnableINTEL       : BitEnumAttrCaseBit<"PipelineEnableINTEL", 19> {
   list<Availability> availability = [
     Extension<[SPV_INTEL_fpga_loop_controls]>,
     Capability<[SPV_C_FPGALoopControlsINTEL]>
   ];
 }
-def SPV_LC_NoFusionINTEL             : BitEnumAttrCase<"NoFusionINTEL", 0x800000> {
+def SPV_LC_NoFusionINTEL             : BitEnumAttrCaseBit<"NoFusionINTEL", 23> {
   list<Availability> availability = [
     Extension<[SPV_INTEL_fpga_loop_controls]>,
     Capability<[SPV_C_FPGALoopControlsINTEL]>
@@ -3555,23 +3555,23 @@ def SPV_LoopControlAttr :
       SPV_LC_PipelineEnableINTEL, SPV_LC_NoFusionINTEL
     ]>;
 
-def SPV_MA_None                 : BitEnumAttrCase<"None", 0x0000>;
-def SPV_MA_Volatile             : BitEnumAttrCase<"Volatile", 0x0001>;
-def SPV_MA_Aligned              : BitEnumAttrCase<"Aligned", 0x0002>;
-def SPV_MA_Nontemporal          : BitEnumAttrCase<"Nontemporal", 0x0004>;
-def SPV_MA_MakePointerAvailable : BitEnumAttrCase<"MakePointerAvailable", 0x0008> {
+def SPV_MA_None                 : BitEnumAttrCaseNone<"None">;
+def SPV_MA_Volatile             : BitEnumAttrCaseBit<"Volatile", 0>;
+def SPV_MA_Aligned              : BitEnumAttrCaseBit<"Aligned", 1>;
+def SPV_MA_Nontemporal          : BitEnumAttrCaseBit<"Nontemporal", 2>;
+def SPV_MA_MakePointerAvailable : BitEnumAttrCaseBit<"MakePointerAvailable", 3> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_5>,
     Capability<[SPV_C_VulkanMemoryModel]>
   ];
 }
-def SPV_MA_MakePointerVisible   : BitEnumAttrCase<"MakePointerVisible", 0x0010> {
+def SPV_MA_MakePointerVisible   : BitEnumAttrCaseBit<"MakePointerVisible", 4> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_5>,
     Capability<[SPV_C_VulkanMemoryModel]>
   ];
 }
-def SPV_MA_NonPrivatePointer    : BitEnumAttrCase<"NonPrivatePointer", 0x0020> {
+def SPV_MA_NonPrivatePointer    : BitEnumAttrCaseBit<"NonPrivatePointer", 5> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_5>,
     Capability<[SPV_C_VulkanMemoryModel]>
@@ -3612,44 +3612,44 @@ def SPV_MemoryModelAttr :
       SPV_MM_Simple, SPV_MM_GLSL450, SPV_MM_OpenCL, SPV_MM_Vulkan
     ]>;
 
-def SPV_MS_None                   : BitEnumAttrCase<"None", 0x0000>;
-def SPV_MS_Acquire                : BitEnumAttrCase<"Acquire", 0x0002>;
-def SPV_MS_Release                : BitEnumAttrCase<"Release", 0x0004>;
-def SPV_MS_AcquireRelease         : BitEnumAttrCase<"AcquireRelease", 0x0008>;
-def SPV_MS_SequentiallyConsistent : BitEnumAttrCase<"SequentiallyConsistent", 0x0010>;
-def SPV_MS_UniformMemory          : BitEnumAttrCase<"UniformMemory", 0x0040> {
+def SPV_MS_None                   : BitEnumAttrCaseNone<"None">;
+def SPV_MS_Acquire                : BitEnumAttrCaseBit<"Acquire", 1>;
+def SPV_MS_Release                : BitEnumAttrCaseBit<"Release", 2>;
+def SPV_MS_AcquireRelease         : BitEnumAttrCaseBit<"AcquireRelease", 3>;
+def SPV_MS_SequentiallyConsistent : BitEnumAttrCaseBit<"SequentiallyConsistent", 4>;
+def SPV_MS_UniformMemory          : BitEnumAttrCaseBit<"UniformMemory", 6> {
   list<Availability> availability = [
     Capability<[SPV_C_Shader]>
   ];
 }
-def SPV_MS_SubgroupMemory         : BitEnumAttrCase<"SubgroupMemory", 0x0080>;
-def SPV_MS_WorkgroupMemory        : BitEnumAttrCase<"WorkgroupMemory", 0x0100>;
-def SPV_MS_CrossWorkgroupMemory   : BitEnumAttrCase<"CrossWorkgroupMemory", 0x0200>;
-def SPV_MS_AtomicCounterMemory    : BitEnumAttrCase<"AtomicCounterMemory", 0x0400> {
+def SPV_MS_SubgroupMemory         : BitEnumAttrCaseBit<"SubgroupMemory", 7>;
+def SPV_MS_WorkgroupMemory        : BitEnumAttrCaseBit<"WorkgroupMemory", 8>;
+def SPV_MS_CrossWorkgroupMemory   : BitEnumAttrCaseBit<"CrossWorkgroupMemory", 9>;
+def SPV_MS_AtomicCounterMemory    : BitEnumAttrCaseBit<"AtomicCounterMemory", 10> {
   list<Availability> availability = [
     Capability<[SPV_C_AtomicStorage]>
   ];
 }
-def SPV_MS_ImageMemory            : BitEnumAttrCase<"ImageMemory", 0x0800>;
-def SPV_MS_OutputMemory           : BitEnumAttrCase<"OutputMemory", 0x1000> {
+def SPV_MS_ImageMemory            : BitEnumAttrCaseBit<"ImageMemory", 11>;
+def SPV_MS_OutputMemory           : BitEnumAttrCaseBit<"OutputMemory", 12> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_5>,
     Capability<[SPV_C_VulkanMemoryModel]>
   ];
 }
-def SPV_MS_MakeAvailable          : BitEnumAttrCase<"MakeAvailable", 0x2000> {
+def SPV_MS_MakeAvailable          : BitEnumAttrCaseBit<"MakeAvailable", 13> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_5>,
     Capability<[SPV_C_VulkanMemoryModel]>
   ];
 }
-def SPV_MS_MakeVisible            : BitEnumAttrCase<"MakeVisible", 0x4000> {
+def SPV_MS_MakeVisible            : BitEnumAttrCaseBit<"MakeVisible", 14> {
   list<Availability> availability = [
     MinVersion<SPV_V_1_5>,
     Capability<[SPV_C_VulkanMemoryModel]>
   ];
 }
-def SPV_MS_Volatile               : BitEnumAttrCase<"Volatile", 0x8000> {
+def SPV_MS_Volatile               : BitEnumAttrCaseBit<"Volatile", 15> {
   list<Availability> availability = [
     Extension<[SPV_KHR_vulkan_memory_model]>,
     Capability<[SPV_C_VulkanMemoryModel]>
@@ -3688,9 +3688,9 @@ def SPV_ScopeAttr :
       SPV_S_Invocation, SPV_S_QueueFamily, SPV_S_ShaderCallKHR
     ]>;
 
-def SPV_SC_None        : BitEnumAttrCase<"None", 0x0000>;
-def SPV_SC_Flatten     : BitEnumAttrCase<"Flatten", 0x0001>;
-def SPV_SC_DontFlatten : BitEnumAttrCase<"DontFlatten", 0x0002>;
+def SPV_SC_None        : BitEnumAttrCaseNone<"None">;
+def SPV_SC_Flatten     : BitEnumAttrCaseBit<"Flatten", 0>;
+def SPV_SC_DontFlatten : BitEnumAttrCaseBit<"DontFlatten", 1>;
 
 def SPV_SelectionControlAttr :
     SPV_BitEnumAttr<"SelectionControl", "valid SPIR-V SelectionControl", [
diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.td b/mlir/include/mlir/Dialect/Vector/VectorOps.td
index 826c7d0338f0b..1f501ac6b89ea 100644
--- a/mlir/include/mlir/Dialect/Vector/VectorOps.td
+++ b/mlir/include/mlir/Dialect/Vector/VectorOps.td
@@ -39,17 +39,17 @@ class Vector_Op<string mnemonic, list<OpTrait> traits = []> :
 }
 
 // The "kind" of combining function for contractions and reductions.
-def COMBINING_KIND_ADD : BitEnumAttrCase<"ADD", 0x1, "add">;
-def COMBINING_KIND_MUL : BitEnumAttrCase<"MUL", 0x2, "mul">;
-def COMBINING_KIND_MINUI : BitEnumAttrCase<"MINUI", 0x4, "minui">;
-def COMBINING_KIND_MINSI : BitEnumAttrCase<"MINSI", 0x8, "minsi">;
-def COMBINING_KIND_MINF : BitEnumAttrCase<"MINF", 0x10, "minf">;
-def COMBINING_KIND_MAXUI : BitEnumAttrCase<"MAXUI", 0x20, "maxui">;
-def COMBINING_KIND_MAXSI : BitEnumAttrCase<"MAXSI", 0x40, "maxsi">;
-def COMBINING_KIND_MAXF : BitEnumAttrCase<"MAXF", 0x80, "maxf">;
-def COMBINING_KIND_AND : BitEnumAttrCase<"AND", 0x100, "and">;
-def COMBINING_KIND_OR  : BitEnumAttrCase<"OR", 0x200, "or">;
-def COMBINING_KIND_XOR : BitEnumAttrCase<"XOR", 0x400, "xor">;
+def COMBINING_KIND_ADD : BitEnumAttrCaseBit<"ADD", 0, "add">;
+def COMBINING_KIND_MUL : BitEnumAttrCaseBit<"MUL", 1, "mul">;
+def COMBINING_KIND_MINUI : BitEnumAttrCaseBit<"MINUI", 2, "minui">;
+def COMBINING_KIND_MINSI : BitEnumAttrCaseBit<"MINSI", 3, "minsi">;
+def COMBINING_KIND_MINF : BitEnumAttrCaseBit<"MINF", 4, "minf">;
+def COMBINING_KIND_MAXUI : BitEnumAttrCaseBit<"MAXUI", 5, "maxui">;
+def COMBINING_KIND_MAXSI : BitEnumAttrCaseBit<"MAXSI", 6, "maxsi">;
+def COMBINING_KIND_MAXF : BitEnumAttrCaseBit<"MAXF", 7, "maxf">;
+def COMBINING_KIND_AND : BitEnumAttrCaseBit<"AND", 8, "and">;
+def COMBINING_KIND_OR  : BitEnumAttrCaseBit<"OR", 9, "or">;
+def COMBINING_KIND_XOR : BitEnumAttrCaseBit<"XOR", 10, "xor">;
 
 def CombiningKind : BitEnumAttr<
     "CombiningKind",
diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td
index 0d99a9f8d5cb3..6bdf1b5fbc7b8 100644
--- a/mlir/include/mlir/IR/OpBase.td
+++ b/mlir/include/mlir/IR/OpBase.td
@@ -1323,12 +1323,30 @@ class I64EnumAttrCase<string sym, int val, string str = sym>
 // A bit enum case stored with 32-bit IntegerAttr. `val` here is *not* the
 // ordinal number of the bit that is set. It is the 32-bit integer with only
 // one bit set.
-class BitEnumAttrCase<string sym, int val, string str = sym> :
-    EnumAttrCaseInfo<sym, val, str>,
-    SignlessIntegerAttrBase<I32, "case " # str> {
-  let predicate = CPred<
-    "$_self.cast<::mlir::IntegerAttr>().getValue().getZExtValue() & "
-    # val # "u">;
+class BitEnumAttrCase<string sym, int val, string str = sym>
+    : EnumAttrCaseInfo<sym, val, str>,
+      SignlessIntegerAttrBase<I32, "case " #str>;
+
+// The special bit enum case for no bits set (i.e. value = 0).
+class BitEnumAttrCaseNone<string sym, string str = sym>
+    : BitEnumAttrCase<sym, 0, str>;
+
+// The bit enum case for a single bit, specified by the bit position.
+// The pos argument refers to the index of the bit, and is currently
+// limited to be in the range [0, 31].
+class BitEnumAttrCaseBit<string sym, int pos, string str = sym>
+    : BitEnumAttrCase<sym, !shl(1, pos), str> {
+  assert !and(!ge(pos, 0), !le(pos, 31)),
+      "bit position must be between 0 and 31";
+}
+
+// A bit enum case for a group/list of previously declared single bits,
+// providing a convenient alias for that group.
+class BitEnumAttrCaseGroup<string sym, list<BitEnumAttrCaseBit> cases,
+                           string str = sym>
+    : BitEnumAttrCase<
+          sym, !foldl(0, cases, value, bitcase, !or(value, bitcase.value)),
+          str> {
 }
 
 // Additional information for an enum attribute.
@@ -1452,7 +1470,7 @@ class I64EnumAttr<string name, string summary, list<I64EnumAttrCase> cases> :
 // A bit enum stored with 32-bit IntegerAttr.
 //
 // Op attributes of this kind are stored as IntegerAttr. Extra verification will
-// be generated on the integer to make sure only allowed bit are set. Besides,
+// be generated on the integer to make sure only allowed bits are set. Besides,
 // helper methods are generated to parse a string separated with a specified
 // delimiter to a symbol and vice versa.
 class BitEnumAttrBase<list<BitEnumAttrCase> cases, string summary> :
@@ -1470,6 +1488,9 @@ class BitEnumAttr<string name, string summary, list<BitEnumAttrCase> cases> :
     EnumAttrInfo<name, cases, BitEnumAttrBase<cases, summary>> {
   let underlyingType = "uint32_t";
 
+  // Determine "valid" bits from enum cases for error checking
+  int validBits = !foldl(0, cases, value, bitcase, !or(value, bitcase.value));
+
   // We need to return a string because we may concatenate symbols for multiple
   // bits together.
   let symbolToStringFnRetType = "std::string";
diff --git a/mlir/tools/mlir-tblgen/EnumsGen.cpp b/mlir/tools/mlir-tblgen/EnumsGen.cpp
index aa8841abfd73b..722ab9dcdf04d 100644
--- a/mlir/tools/mlir-tblgen/EnumsGen.cpp
+++ b/mlir/tools/mlir-tblgen/EnumsGen.cpp
@@ -193,6 +193,11 @@ static void emitSymToStrFnForBitEnum(const Record &enumDef, raw_ostream &os) {
 
   os << formatv("  auto val = static_cast<{0}>(symbol);\n",
                 enumAttr.getUnderlyingType());
+  // If we have unknown bit set, return an empty string to signal errors.
+  int64_t validBits = enumDef.getValueAsInt("validBits");
+  os << formatv("  assert({0}u == ({0}u | val) && \"invalid bits set in bit "
+                "enum\");\n",
+                validBits);
   if (allBitsUnsetCase) {
     os << "  // Special case for all bits unset.\n";
     os << formatv("  if (val == 0) return \"{0}\";\n\n",
@@ -201,13 +206,11 @@ static void emitSymToStrFnForBitEnum(const Record &enumDef, raw_ostream &os) {
   os << "  ::llvm::SmallVector<::llvm::StringRef, 2> strs;\n";
   for (const auto &enumerant : enumerants) {
     // Skip the special enumerant for None.
-    if (auto val = enumerant.getValue())
-      os << formatv("  if ({0}u & val) {{ strs.push_back(\"{1}\"); "
-                    "val &= ~{0}u; }\n",
-                    val, enumerant.getStr());
+    if (int64_t val = enumerant.getValue())
+      os << formatv(
+          "  if ({0}u == ({0}u & val)) {{ strs.push_back(\"{1}\"); }\n ", val,
+          enumerant.getStr());
   }
-  // If we have unknown bit set, return an empty string to signal errors.
-  os << "\n  if (val) return \"\";\n";
   os << formatv("  return ::llvm::join(strs, \"{0}\");\n", separator);
 
   os << "}\n\n";
diff --git a/mlir/unittests/TableGen/EnumsGenTest.cpp b/mlir/unittests/TableGen/EnumsGenTest.cpp
index a873658cdc3ab..764954242d55d 100644
--- a/mlir/unittests/TableGen/EnumsGenTest.cpp
+++ b/mlir/unittests/TableGen/EnumsGenTest.cpp
@@ -68,25 +68,25 @@ TEST(EnumsGenTest, GeneratedUnderlyingType) {
 
 TEST(EnumsGenTest, GeneratedBitEnumDefinition) {
   EXPECT_EQ(0u, static_cast<uint32_t>(BitEnumWithNone::None));
-  EXPECT_EQ(1u, static_cast<uint32_t>(BitEnumWithNone::Bit1));
-  EXPECT_EQ(4u, static_cast<uint32_t>(BitEnumWithNone::Bit3));
+  EXPECT_EQ(1u, static_cast<uint32_t>(BitEnumWithNone::Bit0));
+  EXPECT_EQ(8u, static_cast<uint32_t>(BitEnumWithNone::Bit3));
 }
 
 TEST(EnumsGenTest, GeneratedSymbolToStringFnForBitEnum) {
   EXPECT_EQ(stringifyBitEnumWithNone(BitEnumWithNone::None), "None");
-  EXPECT_EQ(stringifyBitEnumWithNone(BitEnumWithNone::Bit1), "Bit1");
+  EXPECT_EQ(stringifyBitEnumWithNone(BitEnumWithNone::Bit0), "Bit0");
   EXPECT_EQ(stringifyBitEnumWithNone(BitEnumWithNone::Bit3), "Bit3");
   EXPECT_EQ(
-      stringifyBitEnumWithNone(BitEnumWithNone::Bit1 | BitEnumWithNone::Bit3),
-      "Bit1|Bit3");
+      stringifyBitEnumWithNone(BitEnumWithNone::Bit0 | BitEnumWithNone::Bit3),
+      "Bit0|Bit3");
 }
 
 TEST(EnumsGenTest, GeneratedStringToSymbolForBitEnum) {
   EXPECT_EQ(symbolizeBitEnumWithNone("None"), BitEnumWithNone::None);
-  EXPECT_EQ(symbolizeBitEnumWithNone("Bit1"), BitEnumWithNone::Bit1);
+  EXPECT_EQ(symbolizeBitEnumWithNone("Bit0"), BitEnumWithNone::Bit0);
   EXPECT_EQ(symbolizeBitEnumWithNone("Bit3"), BitEnumWithNone::Bit3);
-  EXPECT_EQ(symbolizeBitEnumWithNone("Bit3|Bit1"),
-            BitEnumWithNone::Bit3 | BitEnumWithNone::Bit1);
+  EXPECT_EQ(symbolizeBitEnumWithNone("Bit3|Bit0"),
+            BitEnumWithNone::Bit3 | BitEnumWithNone::Bit0);
 
   EXPECT_EQ(symbolizeBitEnumWithNone("Bit2"), llvm::None);
   EXPECT_EQ(symbolizeBitEnumWithNone("Bit3|Bit4"), llvm::None);
@@ -94,11 +94,31 @@ TEST(EnumsGenTest, GeneratedStringToSymbolForBitEnum) {
   EXPECT_EQ(symbolizeBitEnumWithoutNone("None"), llvm::None);
 }
 
+TEST(EnumsGenTest, GeneratedSymbolToStringFnForGroupedBitEnum) {
+  EXPECT_EQ(stringifyBitEnumWithGroup(BitEnumWithGroup::Bit0), "Bit0");
+  EXPECT_EQ(stringifyBitEnumWithGroup(BitEnumWithGroup::Bit3), "Bit3");
+  EXPECT_EQ(stringifyBitEnumWithGroup(BitEnumWithGroup::Bits0To3),
+            "Bit0|Bit1|Bit2|Bit3|Bits0To3");
+  EXPECT_EQ(stringifyBitEnumWithGroup(BitEnumWithGroup::Bit4), "Bit4");
+  EXPECT_EQ(stringifyBitEnumWithGroup(
+                BitEnumWithGroup::Bit0 | BitEnumWithGroup::Bit1 |
+                BitEnumWithGroup::Bit2 | BitEnumWithGroup::Bit4),
+            "Bit0|Bit1|Bit2|Bit4");
+}
+
+TEST(EnumsGenTest, GeneratedStringToSymbolForGroupedBitEnum) {
+  EXPECT_EQ(symbolizeBitEnumWithGroup("Bit0"), BitEnumWithGroup::Bit0);
+  EXPECT_EQ(symbolizeBitEnumWithGroup("Bit3"), BitEnumWithGroup::Bit3);
+  EXPECT_EQ(symbolizeBitEnumWithGroup("Bit5"), llvm::None);
+  EXPECT_EQ(symbolizeBitEnumWithGroup("Bit3|Bit0"),
+            BitEnumWithGroup::Bit3 | BitEnumWithGroup::Bit0);
+}
+
 TEST(EnumsGenTest, GeneratedOperator) {
-  EXPECT_TRUE(bitEnumContains(BitEnumWithNone::Bit1 | BitEnumWithNone::Bit3,
-                              BitEnumWithNone::Bit1));
-  EXPECT_FALSE(bitEnumContains(BitEnumWithNone::Bit1 & BitEnumWithNone::Bit3,
-                               BitEnumWithNone::Bit1));
+  EXPECT_TRUE(bitEnumContains(BitEnumWithNone::Bit0 | BitEnumWithNone::Bit3,
+                              BitEnumWithNone::Bit0));
+  EXPECT_FALSE(bitEnumContains(BitEnumWithNone::Bit0 & BitEnumWithNone::Bit3,
+                               BitEnumWithNone::Bit0));
 }
 
 TEST(EnumsGenTest, GeneratedSymbolToCustomStringFn) {
@@ -152,7 +172,11 @@ TEST(EnumsGenTest, GeneratedBitAttributeClass) {
   mlir::Type intType = mlir::IntegerType::get(&ctx, 32);
   mlir::Attribute intAttr = mlir::IntegerAttr::get(
       intType,
-      static_cast<uint32_t>(BitEnumWithNone::Bit1 | BitEnumWithNone::Bit3));
+      static_cast<uint32_t>(BitEnumWithNone::Bit0 | BitEnumWithNone::Bit3));
   EXPECT_TRUE(intAttr.isa<BitEnumWithNoneAttr>());
   EXPECT_TRUE(intAttr.isa<BitEnumWithoutNoneAttr>());
+
+  intAttr = mlir::IntegerAttr::get(
+      intType, static_cast<uint32_t>(BitEnumWithGroup::Bits0To3) | (1u << 6));
+  EXPECT_FALSE(intAttr.isa<BitEnumWithGroupAttr>());
 }
diff --git a/mlir/unittests/TableGen/enums.td b/mlir/unittests/TableGen/enums.td
index cdcc18254bd92..9dbb1b47870ce 100644
--- a/mlir/unittests/TableGen/enums.td
+++ b/mlir/unittests/TableGen/enums.td
@@ -23,15 +23,25 @@ def Case10: I32EnumAttrCase<"Case10", 10>;
 
 def I32Enum: I32EnumAttr<"I32Enum", "A test enum", [Case5, Case10]>;
 
-def Bit0 : BitEnumAttrCase<"None", 0x0000>;
-def Bit1 : BitEnumAttrCase<"Bit1", 0x0001>;
-def Bit3 : BitEnumAttrCase<"Bit3", 0x0004>;
+def NoBits : BitEnumAttrCaseNone<"None">;
+def Bit0 : BitEnumAttrCaseBit<"Bit0", 0>;
+def Bit1 : BitEnumAttrCaseBit<"Bit1", 1>;
+def Bit2 : BitEnumAttrCaseBit<"Bit2", 2>;
+def Bit3 : BitEnumAttrCaseBit<"Bit3", 3>;
+def Bit4 : BitEnumAttrCaseBit<"Bit4", 4>;
+def Bit5 : BitEnumAttrCaseBit<"Bit5", 5>;
 
 def BitEnumWithNone : BitEnumAttr<"BitEnumWithNone", "A test enum",
-                                  [Bit0, Bit1, Bit3]>;
+                                  [NoBits, Bit0, Bit3]>;
 
 def BitEnumWithoutNone : BitEnumAttr<"BitEnumWithoutNone", "A test enum",
-                                     [Bit1, Bit3]>;
+                                     [Bit0, Bit3]>;
+
+def Bits0To3 : BitEnumAttrCaseGroup<"Bits0To3",
+                                    [Bit0, Bit1, Bit2, Bit3]>;
+
+def BitEnumWithGroup : BitEnumAttr<"BitEnumWithGroup", "A test enum",
+                                   [Bit0, Bit1, Bit2, Bit3, Bit4, Bits0To3]>;
 
 def PrettyIntEnumCase1: I32EnumAttrCase<"Case1", 1, "case_one">;
 def PrettyIntEnumCase2: I32EnumAttrCase<"Case2", 2, "case_two">;

From ee72b173869fd78c49bdd5b85a94fc35b8332ba6 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Tue, 25 Jan 2022 13:29:22 -0800
Subject: [PATCH 789/946] Fix UB in DwarfExpression::emitLegacyZExt()

A shift-left > 63 triggers a UBSAN failure. This patch kicks the can
down the road (to the consumer) by emitting a more compact
representation of the shift computation in DWARF expressions.

Relanding (I accidentally pushed an earlier version of the patch previously).

Differential Revision: https://reviews.llvm.org/D118183
---
 .../CodeGen/AsmPrinter/DwarfExpression.cpp    | 22 ++++++++++++++++---
 llvm/test/DebugInfo/X86/convert-debugloc.ll   | 16 +++++++++++++-
 2 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 37407c98e75f8..ee932d1051079 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -681,9 +681,25 @@ void DwarfExpression::emitLegacySExt(unsigned FromBits) {
 }
 
 void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
-  // (X & (1 << FromBits - 1))
-  emitOp(dwarf::DW_OP_constu);
-  emitUnsigned((1ULL << FromBits) - 1);
+  // Heuristic to decide the most efficient encoding.
+  // A ULEB can encode 7 1-bits per byte.
+  if (FromBits / 7 < 1+1+1+1+1) {
+    // (X & (1 << FromBits - 1))
+    emitOp(dwarf::DW_OP_constu);
+    emitUnsigned((1ULL << FromBits) - 1);
+  } else {
+    // Note that the DWARF 4 stack consists of pointer-sized elements,
+    // so technically it doesn't make sense to shift left more than 64
+    // bits. We leave that for the consumer to decide though. LLDB for
+    // example uses APInt for the stack elements and can still deal
+    // with this.
+    emitOp(dwarf::DW_OP_lit1);
+    emitOp(dwarf::DW_OP_constu);
+    emitUnsigned(FromBits);
+    emitOp(dwarf::DW_OP_shl);
+    emitOp(dwarf::DW_OP_lit1);
+    emitOp(dwarf::DW_OP_minus);
+  }
   emitOp(dwarf::DW_OP_and);
 }
 
diff --git a/llvm/test/DebugInfo/X86/convert-debugloc.ll b/llvm/test/DebugInfo/X86/convert-debugloc.ll
index 21e41dcc4c2a9..de0857d538327 100644
--- a/llvm/test/DebugInfo/X86/convert-debugloc.ll
+++ b/llvm/test/DebugInfo/X86/convert-debugloc.ll
@@ -27,7 +27,7 @@
 ; RUN:   | FileCheck %s --check-prefix=VERBOSE --check-prefix=CONV "--implicit-check-not={{DW_TAG|NULL}}"
 
 
-; SPLITCONV: Compile Unit:{{.*}} DWO_id = 0x62f17241069b1fa3
+; SPLITCONV: Compile Unit:{{.*}} DWO_id = 0x24191746f389535f
 ; SPLIT: DW_TAG_skeleton_unit
 
 ; CONV: DW_TAG_compile_unit
@@ -41,6 +41,8 @@
 ; CONV-NEXT:DW_AT_encoding {{.*}}DW_ATE_signed)
 ; CONV-NEXT:DW_AT_byte_size {{.*}}0x04)
 ; CONV-NOT: DW_AT
+; CONV:   DW_TAG_base_type
+; CONV:   DW_TAG_base_type
 ; CONV:   DW_TAG_subprogram
 ; CONV:     DW_TAG_formal_parameter
 ; CONV:     DW_TAG_variable
@@ -50,11 +52,14 @@
 ; VERBOSE-SAME: [[SIG32]] ->
 ; CONV-SAME: [[SIG32]]) "DW_ATE_signed_32", DW_OP_stack_value)
 ; CONV:       DW_AT_name {{.*}}"y")
+; CONV:     DW_TAG_variable
 ; CONV:     NULL
 ; CONV:   DW_TAG_base_type
 ; CONV:     DW_AT_name {{.*}}"signed char")
 ; CONV:   DW_TAG_base_type
 ; CONV:     DW_AT_name {{.*}}"int")
+; CONV:   DW_TAG_base_type
+; CONV:     DW_AT_name {{.*}}"unsigned long long")
 ; CONV:   NULL
 
 ; NOCONV: DW_TAG_compile_unit
@@ -64,11 +69,17 @@
 ; NOCONV:       DW_AT_location (
 ; NOCONV:         {{.*}}, DW_OP_dup, DW_OP_constu 0x7, DW_OP_shr, DW_OP_lit0, DW_OP_not, DW_OP_mul, DW_OP_constu 0x8, DW_OP_shl, DW_OP_or, DW_OP_stack_value)
 ; NOCONV:       DW_AT_name ("y")
+; NOCONV:     DW_TAG_variable
+; NOCONV:       DW_AT_location (
+; NOCONV:         DW_OP_constu 0x40, DW_OP_lit0, DW_OP_plus, DW_OP_lit1, DW_OP_constu 0x40, DW_OP_shl, DW_OP_lit1, DW_OP_minus, DW_OP_and, DW_OP_stack_value)
+; NOCONV:       DW_AT_name ("z")
 ; NOCONV:     NULL
 ; NOCONV:   DW_TAG_base_type
 ; NOCONV:     DW_AT_name ("signed char")
 ; NOCONV:   DW_TAG_base_type
 ; NOCONV:     DW_AT_name ("int")
+; NOCONV:   DW_TAG_base_type
+; NOCONV:     DW_AT_name ("unsigned long long")
 ; NOCONV:   NULL
 
 
@@ -81,6 +92,7 @@ entry:
 ;; will not attempt to eliminate.  At the moment, only "convert" ops are folded.
 ;; If you have to change the expression, the expected DWO_id also changes.
   call void @llvm.dbg.value(metadata i8 32, metadata !13, metadata !DIExpression(DW_OP_lit0, DW_OP_plus, DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value)), !dbg !15
+  call void @llvm.dbg.value(metadata i8 64, metadata !17, metadata !DIExpression(DW_OP_lit0, DW_OP_plus, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 128, DW_ATE_unsigned, DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !15
   ret i8 %x, !dbg !16
 }
 
@@ -111,3 +123,5 @@ declare void @llvm.dbg.value(metadata, metadata, metadata)
 !14 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
 !15 = !DILocation(line: 3, column: 14, scope: !7)
 !16 = !DILocation(line: 4, column: 3, scope: !7)
+!17 = !DILocalVariable(name: "z", scope: !7, file: !1, line: 3, type: !18)
+!18 = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned)

From 0606817d0b02321eaaf905e9c8246c63a1c0482f Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@google.com>
Date: Tue, 25 Jan 2022 09:42:28 -0800
Subject: [PATCH 790/946] [CMake] Handle in-tree libc++ for sanitizer tests

We have been relying on the logic for C++ ABI library for sanitizer
tests but that's incorrect since most tests require a full C++ library
and not just C++ ABI. This change tries to address this by using the
dependency on libc++ if available.

Differential Revision: https://reviews.llvm.org/D118163
---
 compiler-rt/CMakeLists.txt | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 12946d74c797b..063d41bfc0fdd 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -231,6 +231,15 @@ option(SANITIZER_USE_STATIC_CXX_ABI
   "Use static libc++abi." ${DEFAULT_SANITIZER_USE_STATIC_CXX_ABI})
 pythonize_bool(SANITIZER_USE_STATIC_CXX_ABI)
 
+set(DEFAULT_SANITIZER_USE_STATIC_TEST_CXX OFF)
+if (DEFINED LIBCXX_ENABLE_SHARED AND NOT LIBCXX_ENABLE_SHARED)
+  set(DEFAULT_SANITIZER_USE_STATIC_TEST_CXX ON)
+endif()
+
+option(SANITIZER_USE_STATIC_TEST_CXX
+  "Use static libc++ for tests." ${DEFAULT_SANITIZER_USE_STATIC_TEST_CXX})
+pythonize_bool(SANITIZER_USE_STATIC_TEST_CXX)
+
 set(DEFAULT_COMPILER_RT_USE_BUILTINS_LIBRARY OFF)
 if (FUCHSIA)
   set(DEFAULT_COMPILER_RT_USE_BUILTINS_LIBRARY ON)
@@ -522,7 +531,15 @@ elseif (SANITIZER_CXX_ABI_LIBNAME STREQUAL "libstdc++")
 endif()
 
 if (SANITIZER_TEST_CXX_LIBNAME STREQUAL "libc++")
-  append_libcxx_libs(SANITIZER_TEST_CXX)
+  if (SANITIZER_TEST_CXX_INTREE)
+    if (SANITIZER_USE_STATIC_TEST_CXX AND (TARGET cxx_static OR HAVE_LIBCXX))
+      list(APPEND SANITIZER_TEST_CXX_LIBRARIES cxx_static)
+    elseif (TARGET cxx_shared OR HAVE_LIBCXX)
+      list(APPEND SANITIZER_TEST_CXX_LIBRARIES cxx_shared)
+    endif()
+  else()
+    append_list_if(COMPILER_RT_HAS_LIBCXX c++ SANITIZER_TEST_CXX_LIBRARIES)
+  endif()
 elseif (SANITIZER_TEST_CXX_LIBNAME STREQUAL "libstdc++")
   append_list_if(COMPILER_RT_HAS_LIBSTDCXX stdc++ SANITIZER_TEST_CXX_LIBRARIES)
 endif()

From 20e9d4caf0cb37188bd218730f781728fe779c50 Mon Sep 17 00:00:00 2001
From: Vladislav Khmelevsky <och95@yandex.ru>
Date: Wed, 26 Jan 2022 23:45:46 +0300
Subject: [PATCH 791/946] [BOLT] Prepare BOLT for unit-testing

This patch adds unit testing support for BOLT. In order to do this we will need at least do this changes on the code level:
* Make createMCPlusBuilder accessible externally
* Remove positional InputFilename argument to bolt utlity sources
And prepare the cmake and lit for the new tests.

Vladislav Khmelevsky,
Advanced Software Technology Lab, Huawei

Reviewed By: maksfb, Amir

Differential Revision: https://reviews.llvm.org/D118271
---
 bolt/CMakeLists.txt                         |  4 +++
 bolt/include/bolt/Core/MCPlusBuilder.h      |  8 +++++
 bolt/include/bolt/Rewrite/RewriteInstance.h |  5 +++
 bolt/include/bolt/Utils/CommandLineOpts.h   |  1 -
 bolt/lib/Rewrite/RewriteInstance.cpp        | 35 +++++++++------------
 bolt/lib/Utils/CommandLineOpts.cpp          |  8 -----
 bolt/test/CMakeLists.txt                    |  6 ++++
 bolt/test/Unit/CMakeLists.txt               | 17 ++++++++++
 bolt/test/Unit/lit.cfg.py                   | 22 +++++++++++++
 bolt/test/Unit/lit.site.cfg.py.in           | 28 +++++++++++++++++
 bolt/tools/driver/llvm-bolt.cpp             |  5 +++
 bolt/unittests/CMakeLists.txt               |  6 ++++
 12 files changed, 115 insertions(+), 30 deletions(-)
 create mode 100644 bolt/test/Unit/CMakeLists.txt
 create mode 100644 bolt/test/Unit/lit.cfg.py
 create mode 100644 bolt/test/Unit/lit.site.cfg.py.in
 create mode 100644 bolt/unittests/CMakeLists.txt

diff --git a/bolt/CMakeLists.txt b/bolt/CMakeLists.txt
index 2fea004d72249..d7290927abe41 100644
--- a/bolt/CMakeLists.txt
+++ b/bolt/CMakeLists.txt
@@ -91,6 +91,10 @@ add_subdirectory(lib)
 add_subdirectory(tools)
 
 if (BOLT_INCLUDE_TESTS)
+  if (EXISTS ${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include/gtest/gtest.h)
+    add_subdirectory(unittests)
+    list(APPEND BOLT_TEST_DEPS BoltUnitTests)
+  endif()
   add_subdirectory(test)
 endif()
 
diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h
index f527860188e2b..3876b6e379daf 100644
--- a/bolt/include/bolt/Core/MCPlusBuilder.h
+++ b/bolt/include/bolt/Core/MCPlusBuilder.h
@@ -1898,6 +1898,14 @@ class MCPlusBuilder {
   }
 };
 
+MCPlusBuilder *createX86MCPlusBuilder(const MCInstrAnalysis *,
+                                      const MCInstrInfo *,
+                                      const MCRegisterInfo *);
+
+MCPlusBuilder *createAArch64MCPlusBuilder(const MCInstrAnalysis *,
+                                          const MCInstrInfo *,
+                                          const MCRegisterInfo *);
+
 } // namespace bolt
 } // namespace llvm
 
diff --git a/bolt/include/bolt/Rewrite/RewriteInstance.h b/bolt/include/bolt/Rewrite/RewriteInstance.h
index 047e33fca27c6..d4b1b6e5f6034 100644
--- a/bolt/include/bolt/Rewrite/RewriteInstance.h
+++ b/bolt/include/bolt/Rewrite/RewriteInstance.h
@@ -545,6 +545,11 @@ class RewriteInstance {
   friend class RewriteInstanceDiff;
 };
 
+MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch,
+                                   const MCInstrAnalysis *Analysis,
+                                   const MCInstrInfo *Info,
+                                   const MCRegisterInfo *RegInfo);
+
 } // namespace bolt
 } // namespace llvm
 
diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h
index e0f0ea306c3d3..9aab682dc30b9 100644
--- a/bolt/include/bolt/Utils/CommandLineOpts.h
+++ b/bolt/include/bolt/Utils/CommandLineOpts.h
@@ -44,7 +44,6 @@ extern llvm::cl::opt<unsigned long long> HeatmapMinAddress;
 extern llvm::cl::opt<bool> HotData;
 extern llvm::cl::opt<bool> HotFunctionsAtEnd;
 extern llvm::cl::opt<bool> HotText;
-extern llvm::cl::opt<std::string> InputFilename;
 extern llvm::cl::opt<bool> Instrument;
 extern llvm::cl::opt<std::string> OutputFilename;
 extern llvm::cl::opt<std::string> PerfData;
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 934f79749e88d..e8971ca132696 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -319,27 +319,6 @@ namespace bolt {
 
 extern const char *BoltRevision;
 
-extern MCPlusBuilder *createX86MCPlusBuilder(const MCInstrAnalysis *,
-                                             const MCInstrInfo *,
-                                             const MCRegisterInfo *);
-extern MCPlusBuilder *createAArch64MCPlusBuilder(const MCInstrAnalysis *,
-                                                 const MCInstrInfo *,
-                                                 const MCRegisterInfo *);
-
-} // namespace bolt
-} // namespace llvm
-
-namespace {
-
-bool refersToReorderedSection(ErrorOr<BinarySection &> Section) {
-  auto Itr =
-      std::find_if(opts::ReorderData.begin(), opts::ReorderData.end(),
-                   [&](const std::string &SectionName) {
-                     return (Section && Section->getName() == SectionName);
-                   });
-  return Itr != opts::ReorderData.end();
-}
-
 MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch,
                                    const MCInstrAnalysis *Analysis,
                                    const MCInstrInfo *Info,
@@ -357,6 +336,20 @@ MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch,
   llvm_unreachable("architecture unsupported by MCPlusBuilder");
 }
 
+} // namespace bolt
+} // namespace llvm
+
+namespace {
+
+bool refersToReorderedSection(ErrorOr<BinarySection &> Section) {
+  auto Itr =
+      std::find_if(opts::ReorderData.begin(), opts::ReorderData.end(),
+                   [&](const std::string &SectionName) {
+                     return (Section && Section->getName() == SectionName);
+                   });
+  return Itr != opts::ReorderData.end();
+}
+
 } // anonymous namespace
 
 RewriteInstance::RewriteInstance(ELFObjectFileBase *File, const int Argc,
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index 054b2d2b07daf..eb3b8a4b7ad7c 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -131,14 +131,6 @@ cl::opt<bool> HotText(
         "will put hot code into 2M pages. This requires relocation."),
     cl::ZeroOrMore, cl::cat(BoltCategory));
 
-cl::opt<std::string>
-InputFilename(
-  cl::Positional,
-  cl::desc("<executable>"),
-  cl::Required,
-  cl::cat(BoltCategory),
-  cl::sub(*cl::AllSubCommands));
-
 cl::opt<bool>
     Instrument("instrument",
                cl::desc("instrument code to generate accurate profile data"),
diff --git a/bolt/test/CMakeLists.txt b/bolt/test/CMakeLists.txt
index 1d776f166dd27..eb14f9b74624b 100644
--- a/bolt/test/CMakeLists.txt
+++ b/bolt/test/CMakeLists.txt
@@ -8,6 +8,12 @@ configure_lit_site_cfg(
   MAIN_CONFIG
   ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
   )
+configure_lit_site_cfg(
+  ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.site.cfg.py.in
+  ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg.py
+  MAIN_CONFIG
+  ${CMAKE_CURRENT_SOURCE_DIR}/Unit/lit.cfg.py
+  )
 
 set(BOLT_TEST_PARAMS
   bolt_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
diff --git a/bolt/test/Unit/CMakeLists.txt b/bolt/test/Unit/CMakeLists.txt
new file mode 100644
index 0000000000000..4af46ee83c6fc
--- /dev/null
+++ b/bolt/test/Unit/CMakeLists.txt
@@ -0,0 +1,17 @@
+add_custom_target(bolt-unit-test-deps)
+add_dependencies(bolt-unit-test-deps bolt-test-depends)
+
+add_lit_testsuites(BOLT-UNIT
+  ${CMAKE_CURRENT_SOURCE_DIR}
+  DEPENDS bolt-unit-test-deps)
+
+configure_lit_site_cfg(
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in
+  ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py
+  MAIN_CONFIG
+  ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py)
+
+add_lit_testsuite(check-bolt-unit "Running bolt unit test suite"
+  ${CMAKE_CURRENT_BINARY_DIR}
+  EXCLUDE_FROM_CHECK_ALL
+  DEPENDS bolt-unit-test-deps)
diff --git a/bolt/test/Unit/lit.cfg.py b/bolt/test/Unit/lit.cfg.py
new file mode 100644
index 0000000000000..52fd802dd8c47
--- /dev/null
+++ b/bolt/test/Unit/lit.cfg.py
@@ -0,0 +1,22 @@
+# -*- Python -*-
+
+# Configuration file for the 'lit' test runner.
+
+import os
+import subprocess
+
+import lit.formats
+
+# name: The name of this test suite.
+config.name = 'BOLT-Unit'
+
+# suffixes: A list of file extensions to treat as test files.
+config.suffixes = []
+
+# test_source_root: The root path where tests are located.
+# test_exec_root: The root path where tests should be run.
+config.test_exec_root = os.path.join(config.bolt_obj_root, 'unittests')
+config.test_source_root = config.test_exec_root
+
+# testFormat: The test format to use to interpret tests.
+config.test_format = lit.formats.GoogleTest(config.llvm_build_mode, 'Tests')
diff --git a/bolt/test/Unit/lit.site.cfg.py.in b/bolt/test/Unit/lit.site.cfg.py.in
new file mode 100644
index 0000000000000..92f9c91104766
--- /dev/null
+++ b/bolt/test/Unit/lit.site.cfg.py.in
@@ -0,0 +1,28 @@
+@LIT_SITE_CFG_IN_HEADER@
+
+config.llvm_src_root = "@LLVM_SOURCE_DIR@"
+config.llvm_obj_root = "@LLVM_BINARY_DIR@"
+config.llvm_tools_dir = "@LLVM_TOOLS_DIR@"
+config.llvm_libs_dir = "@LLVM_LIBS_DIR@"
+config.llvm_build_mode = "@LLVM_BUILD_MODE@"
+config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
+config.bolt_obj_root = "@BOLT_BINARY_DIR@"
+config.bolt_src_root = "@BOLT_SOURCE_DIR@"
+config.target_triple = "@TARGET_TRIPLE@"
+config.python_executable = "@Python3_EXECUTABLE@"
+
+# Support substitution of the tools and libs dirs with user parameters. This is
+# used when we can't determine the tool dir at configuration time.
+try:
+    config.llvm_tools_dir = config.llvm_tools_dir % lit_config.params
+    config.llvm_libs_dir = config.llvm_libs_dir % lit_config.params
+    config.llvm_build_mode = config.llvm_build_mode % lit_config.params
+except KeyError as e:
+    key, = e.args
+    lit_config.fatal("unable to find %r parameter, use '--param=%s=VALUE'" % (key,key))
+
+import lit.llvm
+lit.llvm.initialize(lit_config, config)
+
+# Let the main config do the real work.
+lit_config.load_config(config, "@BOLT_SOURCE_DIR@/test/Unit/lit.cfg.py")
diff --git a/bolt/tools/driver/llvm-bolt.cpp b/bolt/tools/driver/llvm-bolt.cpp
index 30d6cf820ac14..717f2762809a2 100644
--- a/bolt/tools/driver/llvm-bolt.cpp
+++ b/bolt/tools/driver/llvm-bolt.cpp
@@ -45,6 +45,11 @@ static cl::OptionCategory *BoltDiffCategories[] = {&BoltDiffCategory};
 static cl::OptionCategory *Perf2BoltCategories[] = {&AggregatorCategory,
                                                     &BoltOutputCategory};
 
+static cl::opt<std::string> InputFilename(cl::Positional,
+                                          cl::desc("<executable>"),
+                                          cl::Required, cl::cat(BoltCategory),
+                                          cl::sub(*cl::AllSubCommands));
+
 static cl::opt<std::string>
 InputDataFilename("data",
   cl::desc("<data file>"),
diff --git a/bolt/unittests/CMakeLists.txt b/bolt/unittests/CMakeLists.txt
new file mode 100644
index 0000000000000..5b0f75d1c88ca
--- /dev/null
+++ b/bolt/unittests/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_custom_target(BoltUnitTests)
+set_target_properties(BoltUnitTests PROPERTIES FOLDER "BOLT tests")
+
+function(add_bolt_unittest test_dirname)
+  add_unittest(BoltUnitTests ${test_dirname} ${ARGN})
+endfunction()

From 35d15222c05d3409076e170e4851fd3d58a64e35 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 26 Jan 2022 13:26:51 -0800
Subject: [PATCH 792/946] [Driver] Remove obsoleted -gz=zlib-gnu

GCC added -gz=zlib-gnu in 2014 for -gz meaning change (.zdebug =>
SHF_COMPRESSED) and the legacy zlib-gnu hasn't gain adoption.

According to Debian Code Search (`gz=zlib-gnu`), no project uses -gz=zlib-gnu
(valgrind has a configure to use -gz=zlib). Any possible -gz=zlib-gnu user can
switch to -gz smoothly (supported by integrated assemblers for many years;
binutils 2.26).

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D117744
---
 clang/include/clang/Driver/Options.td      | 4 ++--
 clang/lib/Driver/ToolChains/Clang.cpp      | 2 +-
 clang/lib/Driver/ToolChains/CommonArgs.cpp | 8 ++++----
 clang/test/Driver/amdgcn-gz-options.cl     | 5 -----
 clang/test/Driver/compress.c               | 5 -----
 5 files changed, 7 insertions(+), 17 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index a57787a4f4c38..b3de12e8c7b52 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4974,8 +4974,8 @@ def record_command_line : Separate<["-"], "record-command-line">,
   HelpText<"The string to embed in the .LLVM.command.line section.">,
   MarshallingInfoString<CodeGenOpts<"RecordCommandLine">>;
 def compress_debug_sections_EQ : Joined<["-", "--"], "compress-debug-sections=">,
-    HelpText<"DWARF debug sections compression type">, Values<"none,zlib,zlib-gnu">,
-    NormalizedValuesScope<"llvm::DebugCompressionType">, NormalizedValues<["None", "Z", "GNU"]>,
+    HelpText<"DWARF debug sections compression type">, Values<"none,zlib">,
+    NormalizedValuesScope<"llvm::DebugCompressionType">, NormalizedValues<["None", "Z"]>,
     MarshallingInfoEnum<CodeGenOpts<"CompressDebugSections">, "None">;
 def compress_debug_sections : Flag<["-", "--"], "compress-debug-sections">,
   Alias<compress_debug_sections_EQ>, AliasArgs<["zlib"]>;
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index ea83551845070..4386e395bc6ce 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1117,7 +1117,7 @@ static void RenderDebugInfoCompressionArgs(const ArgList &Args,
     StringRef Value = A->getValue();
     if (Value == "none") {
       CmdArgs.push_back("--compress-debug-sections=none");
-    } else if (Value == "zlib" || Value == "zlib-gnu") {
+    } else if (Value == "zlib") {
       if (llvm::zlib::isAvailable()) {
         CmdArgs.push_back(
             Args.MakeArgString("--compress-debug-sections=" + Twine(Value)));
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 5711998f90434..1d30090ca21c2 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -286,13 +286,13 @@ void tools::addLinkerCompressDebugSectionsOption(
     const ToolChain &TC, const llvm::opt::ArgList &Args,
     llvm::opt::ArgStringList &CmdArgs) {
   // GNU ld supports --compress-debug-sections=none|zlib|zlib-gnu|zlib-gabi
-  // whereas zlib is an alias to zlib-gabi. Therefore -gz=none|zlib|zlib-gnu
-  // are translated to --compress-debug-sections=none|zlib|zlib-gnu.
-  // -gz is not translated since ld --compress-debug-sections option requires an
+  // whereas zlib is an alias to zlib-gabi and zlib-gnu is obsoleted. Therefore
+  // -gz=none|zlib are translated to --compress-debug-sections=none|zlib. -gz
+  // is not translated since ld --compress-debug-sections option requires an
   // argument.
   if (const Arg *A = Args.getLastArg(options::OPT_gz_EQ)) {
     StringRef V = A->getValue();
-    if (V == "none" || V == "zlib" || V == "zlib-gnu")
+    if (V == "none" || V == "zlib")
       CmdArgs.push_back(Args.MakeArgString("--compress-debug-sections=" + V));
     else
       TC.getDriver().Diag(diag::err_drv_unsupported_option_argument)
diff --git a/clang/test/Driver/amdgcn-gz-options.cl b/clang/test/Driver/amdgcn-gz-options.cl
index 40fe9cfcc50df..0d09a714b0326 100644
--- a/clang/test/Driver/amdgcn-gz-options.cl
+++ b/clang/test/Driver/amdgcn-gz-options.cl
@@ -9,8 +9,3 @@
 // RUN: %clang -### -target amdgcn-amd-amdhsa -gz=zlib %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s
 // CHECK-OPT_GZ_EQ_ZLIB: {{.* "-cc1(as)?".* "--compress-debug-sections=zlib"}}
 // CHECK-OPT_GZ_EQ_ZLIB: "--compress-debug-sections=zlib"
-
-// RUN: %clang -### -target amdgcn-amd-amdhsa -gz=zlib-gnu -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s
-// RUN: %clang -### -target amdgcn-amd-amdhsa -gz=zlib-gnu %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s
-// CHECK-OPT_GZ_EQ_ZLIB_GNU: {{.* "-cc1(as)?".* "--compress-debug-sections=zlib-gnu"}}
-// CHECK-OPT_GZ_EQ_ZLIB_GNU: "--compress-debug-sections=zlib-gnu"
diff --git a/clang/test/Driver/compress.c b/clang/test/Driver/compress.c
index e6eff3f54dab0..48f8dfa09ba5e 100644
--- a/clang/test/Driver/compress.c
+++ b/clang/test/Driver/compress.c
@@ -29,11 +29,6 @@
 // CHECK-OPT_GZ_EQ_ZLIB: {{.* "-cc1(as)?".* "--compress-debug-sections=zlib"}}
 // CHECK-OPT_GZ_EQ_ZLIB: "--compress-debug-sections=zlib"
 
-// RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=zlib-gnu -x assembler %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s
-// RUN: %clang -### -target x86_64-unknown-linux-gnu -gz=zlib-gnu %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s
-// CHECK-OPT_GZ_EQ_ZLIB_GNU: {{.* "-cc1(as)?".* "--compress-debug-sections=zlib-gnu"}}
-// CHECK-OPT_GZ_EQ_ZLIB_GNU: "--compress-debug-sections=zlib-gnu"
-
 // RUN: %clang -### -fintegrated-as -gz=invalid -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_INVALID %s
 // RUN: %clang -### -fintegrated-as -gz=invalid -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_INVALID %s
 // CHECK-OPT_GZ_EQ_INVALID: error: unsupported argument 'invalid' to option 'gz='

From 6bc20eb13470514c67e9ee5bbd515c2197ce8ea8 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 26 Jan 2022 13:28:51 -0800
Subject: [PATCH 793/946] [cc1as] Remove -Wa,--compress-debug-sections=zlib-gnu

It's obsoleted and unlikely used. See D117744.
---
 clang/lib/Driver/ToolChains/Gnu.cpp | 2 +-
 clang/test/Driver/compress-noias.c  | 4 ----
 clang/tools/driver/cc1as_main.cpp   | 1 -
 3 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index fbb1af4dbe22b..7a9570a686f4b 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -694,7 +694,7 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C,
       CmdArgs.push_back("--compress-debug-sections");
     } else {
       StringRef Value = A->getValue();
-      if (Value == "none" || Value == "zlib" || Value == "zlib-gnu") {
+      if (Value == "none" || Value == "zlib") {
         CmdArgs.push_back(
             Args.MakeArgString("--compress-debug-sections=" + Twine(Value)));
       } else {
diff --git a/clang/test/Driver/compress-noias.c b/clang/test/Driver/compress-noias.c
index 40ec80acd761e..d0d7ef34e70b8 100644
--- a/clang/test/Driver/compress-noias.c
+++ b/clang/test/Driver/compress-noias.c
@@ -27,10 +27,6 @@
 // RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz=zlib -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB %s
 // CHECK-OPT_GZ_EQ_ZLIB: "--compress-debug-sections=zlib"
 
-// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz=zlib-gnu -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s
-// RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz=zlib-gnu -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_ZLIB_GNU %s
-// CHECK-OPT_GZ_EQ_ZLIB_GNU: "--compress-debug-sections=zlib-gnu"
-
 // RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz=invalid -x assembler -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_INVALID %s
 // RUN: %clang -### -target i686-unknown-linux-gnu -fno-integrated-as -gz=invalid -c %s 2>&1 | FileCheck -check-prefix CHECK-OPT_GZ_EQ_INVALID %s
 // CHECK-OPT_GZ_EQ_INVALID: error: unsupported argument 'invalid' to option 'gz='
diff --git a/clang/tools/driver/cc1as_main.cpp b/clang/tools/driver/cc1as_main.cpp
index db3288d752815..6459d1534b39e 100644
--- a/clang/tools/driver/cc1as_main.cpp
+++ b/clang/tools/driver/cc1as_main.cpp
@@ -228,7 +228,6 @@ bool AssemblerInvocation::CreateFromArgs(AssemblerInvocation &Opts,
         llvm::StringSwitch<llvm::DebugCompressionType>(A->getValue())
             .Case("none", llvm::DebugCompressionType::None)
             .Case("zlib", llvm::DebugCompressionType::Z)
-            .Case("zlib-gnu", llvm::DebugCompressionType::GNU)
             .Default(llvm::DebugCompressionType::None);
   }
 

From 7e73fd95a0ee5fa05f3426068a9f709853b24e54 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Wed, 26 Jan 2022 12:50:38 -0800
Subject: [PATCH 794/946] [RISCV] Minor code cleanups in RISCVISAInfo. NFCI

Pass StringRef by value instead of const reference.

Replace if () llvm_unreachable with an assert.

Replace size() == 0 with empty()
---
 llvm/lib/Support/RISCVISAInfo.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 55a644e98aacd..a78c7057c881b 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -110,9 +110,9 @@ static bool stripExperimentalPrefix(StringRef &Ext) {
 // NOTE: This function is NOT able to take empty strings or strings that only
 // have version numbers and no extension name. It assumes the extension name
 // will be at least more than one character.
-static size_t findFirstNonVersionCharacter(const StringRef &Ext) {
-  if (Ext.size() == 0)
-    llvm_unreachable("Already guarded by if-statement in ::parseArchString");
+static size_t findFirstNonVersionCharacter(StringRef Ext) {
+   assert(!Ext.empty() &&
+          "Already guarded by if-statement in ::parseArchString");
 
   int Pos = Ext.size() - 1;
   while (Pos > 0 && isDigit(Ext[Pos]))

From e7cf10958703f4fee8f8d0f9cbe3c4ed4a25b0d9 Mon Sep 17 00:00:00 2001
From: Jordan Rupprecht <rupprecht@google.com>
Date: Wed, 26 Jan 2022 13:37:42 -0800
Subject: [PATCH 795/946] [bazel] Enable layering_check for MLIR test directory

This would have enabled me to notice the MLIR test file needed updating too, preventing the test file of 2074eef5db330ab437410bfb617b58ea70f4fbff from being necessary.

layering_check is already enabled in mlir/BUILD.bazel. I don't know why I didn't see the other breakage there.

Differential Revision: https://reviews.llvm.org/D118125
---
 utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index 79ba323efef07..49cde53a8c54b 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -6,6 +6,7 @@ load("//mlir:tblgen.bzl", "gentbl_cc_library", "td_library")
 
 package(
     default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
     licenses = ["notice"],
 )
 

From f487a76430a0e79d017519328e808b812f8186ad Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Wed, 26 Jan 2022 13:39:36 -0800
Subject: [PATCH 796/946] [RISCV] Add hasStdExtZbp() to hasAndNotCompare.

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  3 ++-
 llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll      |  6 ++----
 llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll      | 12 ++++--------
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a01c76d587966..c71f1a698563d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1232,7 +1232,8 @@ bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
   if (VT.isVector())
     return false;
 
-  return Subtarget.hasStdExtZbb() && !isa<ConstantSDNode>(Y);
+  return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) &&
+         !isa<ConstantSDNode>(Y);
 }
 
 /// Check if sinking \p I's operands to I's basic block is profitable, because
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
index 68b355981e2c6..ac75dfd0773b3 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
@@ -580,8 +580,7 @@ define i1 @andn_seqz_i32(i32 %a, i32 %b) nounwind {
 ;
 ; RV32ZBP-LABEL: andn_seqz_i32:
 ; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    and a0, a0, a1
-; RV32ZBP-NEXT:    xor a0, a0, a1
+; RV32ZBP-NEXT:    andn a0, a1, a0
 ; RV32ZBP-NEXT:    seqz a0, a0
 ; RV32ZBP-NEXT:    ret
   %and = and i32 %a, %b
@@ -636,8 +635,7 @@ define i1 @andn_snez_i32(i32 %a, i32 %b) nounwind {
 ;
 ; RV32ZBP-LABEL: andn_snez_i32:
 ; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    and a0, a0, a1
-; RV32ZBP-NEXT:    xor a0, a0, a1
+; RV32ZBP-NEXT:    andn a0, a1, a0
 ; RV32ZBP-NEXT:    snez a0, a0
 ; RV32ZBP-NEXT:    ret
   %and = and i32 %a, %b
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
index dd28c772f5974..d166cbdad8875 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
@@ -651,8 +651,7 @@ define i1 @andn_seqz_i32(i32 signext %a, i32 signext %b) nounwind {
 ;
 ; RV64ZBP-LABEL: andn_seqz_i32:
 ; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    and a0, a0, a1
-; RV64ZBP-NEXT:    xor a0, a0, a1
+; RV64ZBP-NEXT:    andn a0, a1, a0
 ; RV64ZBP-NEXT:    seqz a0, a0
 ; RV64ZBP-NEXT:    ret
   %and = and i32 %a, %b
@@ -676,8 +675,7 @@ define i1 @andn_seqz_i64(i64 %a, i64 %b) nounwind {
 ;
 ; RV64ZBP-LABEL: andn_seqz_i64:
 ; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    and a0, a0, a1
-; RV64ZBP-NEXT:    xor a0, a0, a1
+; RV64ZBP-NEXT:    andn a0, a1, a0
 ; RV64ZBP-NEXT:    seqz a0, a0
 ; RV64ZBP-NEXT:    ret
   %and = and i64 %a, %b
@@ -701,8 +699,7 @@ define i1 @andn_snez_i32(i32 signext %a, i32 signext %b) nounwind {
 ;
 ; RV64ZBP-LABEL: andn_snez_i32:
 ; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    and a0, a0, a1
-; RV64ZBP-NEXT:    xor a0, a0, a1
+; RV64ZBP-NEXT:    andn a0, a1, a0
 ; RV64ZBP-NEXT:    snez a0, a0
 ; RV64ZBP-NEXT:    ret
   %and = and i32 %a, %b
@@ -726,8 +723,7 @@ define i1 @andn_snez_i64(i64 %a, i64 %b) nounwind {
 ;
 ; RV64ZBP-LABEL: andn_snez_i64:
 ; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    and a0, a0, a1
-; RV64ZBP-NEXT:    xor a0, a0, a1
+; RV64ZBP-NEXT:    andn a0, a1, a0
 ; RV64ZBP-NEXT:    snez a0, a0
 ; RV64ZBP-NEXT:    ret
   %and = and i64 %a, %b

From 48a38954c92dfff0083bdf094f3616fb274b5f8a Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@google.com>
Date: Tue, 25 Jan 2022 16:10:51 -0800
Subject: [PATCH 797/946] [CMake] Use generator expression to get in-tree
 libc++ path

When using the in-tree libc++, we should be using the full path to
ensure that we're using the right library and not accidentally pick up
the system library.

Differential Revision: https://reviews.llvm.org/D118200
---
 compiler-rt/CMakeLists.txt                          | 13 +++++++------
 compiler-rt/lib/memprof/tests/CMakeLists.txt        |  7 +++----
 compiler-rt/lib/orc/unittests/CMakeLists.txt        |  5 +----
 .../lib/scudo/standalone/tests/CMakeLists.txt       |  8 ++++----
 compiler-rt/lib/tsan/tests/CMakeLists.txt           |  8 ++++----
 compiler-rt/lib/xray/tests/CMakeLists.txt           |  5 +----
 6 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/compiler-rt/CMakeLists.txt b/compiler-rt/CMakeLists.txt
index 063d41bfc0fdd..3a41aa43e4066 100644
--- a/compiler-rt/CMakeLists.txt
+++ b/compiler-rt/CMakeLists.txt
@@ -532,16 +532,17 @@ endif()
 
 if (SANITIZER_TEST_CXX_LIBNAME STREQUAL "libc++")
   if (SANITIZER_TEST_CXX_INTREE)
-    if (SANITIZER_USE_STATIC_TEST_CXX AND (TARGET cxx_static OR HAVE_LIBCXX))
-      list(APPEND SANITIZER_TEST_CXX_LIBRARIES cxx_static)
-    elseif (TARGET cxx_shared OR HAVE_LIBCXX)
-      list(APPEND SANITIZER_TEST_CXX_LIBRARIES cxx_shared)
+    if (SANITIZER_USE_STATIC_TEST_CXX)
+      set(cxx_target cxx_static)
+    else()
+      set(cxx_target cxx_shared)
     endif()
+    list(APPEND SANITIZER_TEST_CXX_LIBRARIES "$<$<BOOL:$<TARGET_NAME_IF_EXISTS:${cxx_target}>>:$<TARGET_LINKER_FILE:${cxx_target}>>")
   else()
-    append_list_if(COMPILER_RT_HAS_LIBCXX c++ SANITIZER_TEST_CXX_LIBRARIES)
+    append_list_if(COMPILER_RT_HAS_LIBCXX -lc++ SANITIZER_TEST_CXX_LIBRARIES)
   endif()
 elseif (SANITIZER_TEST_CXX_LIBNAME STREQUAL "libstdc++")
-  append_list_if(COMPILER_RT_HAS_LIBSTDCXX stdc++ SANITIZER_TEST_CXX_LIBRARIES)
+  append_list_if(COMPILER_RT_HAS_LIBSTDCXX -lstdc++ SANITIZER_TEST_CXX_LIBRARIES)
 endif()
 
 # TODO: There's a lot of duplication across lib/*/tests/CMakeLists.txt files,
diff --git a/compiler-rt/lib/memprof/tests/CMakeLists.txt b/compiler-rt/lib/memprof/tests/CMakeLists.txt
index 688983e3a3ca4..9dcad644d0984 100644
--- a/compiler-rt/lib/memprof/tests/CMakeLists.txt
+++ b/compiler-rt/lib/memprof/tests/CMakeLists.txt
@@ -36,9 +36,8 @@ if(NOT WIN32)
   list(APPEND MEMPROF_UNITTEST_LINK_FLAGS -pthread)
 endif()
 
-foreach(lib ${SANITIZER_TEST_CXX_LIBRARIES})
-  list(APPEND MEMPROF_UNITTEST_LINK_FLAGS -l${lib})
-endforeach()
+set(MEMPROF_UNITTEST_LINK_LIBRARIES ${SANITIZER_TEST_CXX_LIBRARIES})
+list(APPEND MEMPROF_UNITTEST_LINK_LIBRARIES "dl")
 
 if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST MEMPROF_SUPPORTED_ARCH)
   # MemProf unit tests are only run on the host machine.
@@ -55,7 +54,7 @@ if(COMPILER_RT_DEFAULT_TARGET_ARCH IN_LIST MEMPROF_SUPPORTED_ARCH)
     $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>)
   set_target_compile_flags(MemProfUnitTests ${MEMPROF_UNITTEST_CFLAGS})
   set_target_link_flags(MemProfUnitTests ${MEMPROF_UNITTEST_LINK_FLAGS})
-  target_link_libraries(MemProfUnitTests dl)
+  target_link_libraries(MemProfUnitTests ${MEMPROF_UNITTEST_LINK_LIBRARIES})
 
   if (TARGET cxx-headers OR HAVE_LIBCXX)
     add_dependencies(MemProfUnitTests cxx-headers)
diff --git a/compiler-rt/lib/orc/unittests/CMakeLists.txt b/compiler-rt/lib/orc/unittests/CMakeLists.txt
index 95431e026d3be..3b492196dd620 100644
--- a/compiler-rt/lib/orc/unittests/CMakeLists.txt
+++ b/compiler-rt/lib/orc/unittests/CMakeLists.txt
@@ -37,6 +37,7 @@ set(ORC_TEST_ARCH ${ORC_SUPPORTED_ARCH})
 set(ORC_UNITTEST_LINK_FLAGS
   ${COMPILER_RT_UNITTEST_LINK_FLAGS}
   ${CMAKE_THREAD_LIBS_INIT}
+  ${SANITIZER_TEST_CXX_LIBRARIES}
   )
 
 if(APPLE)
@@ -51,10 +52,6 @@ else()
   append_list_if(COMPILER_RT_HAS_LIBEXECINFO -lexecinfo ORC_UNITTEST_LINK_FLAGS)
 endif()
 
-foreach(lib ${SANITIZER_TEST_CXX_LIBRARIES})
-  list(APPEND ORC_UNITTEST_LINK_FLAGS -l${lib})
-endforeach()
-
 set(ORC_DEPS gtest orc)
 # ORC uses C++ standard library headers.
 if (TARGET cxx-headers OR HAVE_LIBCXX)
diff --git a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt
index eaa47a04a179a..6d0936cbb5c1e 100644
--- a/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt
+++ b/compiler-rt/lib/scudo/standalone/tests/CMakeLists.txt
@@ -33,10 +33,10 @@ endif()
 set(SCUDO_TEST_ARCH ${SCUDO_STANDALONE_SUPPORTED_ARCH})
 
 # gtests requires c++
-set(LINK_FLAGS ${COMPILER_RT_UNITTEST_LINK_FLAGS})
-foreach(lib ${SANITIZER_TEST_CXX_LIBRARIES})
-  list(APPEND LINK_FLAGS -l${lib})
-endforeach()
+set(LINK_FLAGS
+  ${COMPILER_RT_UNITTEST_LINK_FLAGS}
+  ${SANITIZER_TEST_CXX_LIBRARIES}
+  )
 list(APPEND LINK_FLAGS -pthread)
 # Linking against libatomic is required with some compilers
 check_library_exists(atomic __atomic_load_8 "" COMPILER_RT_HAS_LIBATOMIC)
diff --git a/compiler-rt/lib/tsan/tests/CMakeLists.txt b/compiler-rt/lib/tsan/tests/CMakeLists.txt
index d487660da935e..0a49c238403f4 100644
--- a/compiler-rt/lib/tsan/tests/CMakeLists.txt
+++ b/compiler-rt/lib/tsan/tests/CMakeLists.txt
@@ -24,10 +24,10 @@ append_list_if(COMPILER_RT_HAS_MSSE4_2_FLAG -msse4.2 TSAN_UNITTEST_CFLAGS)
 
 set(TSAN_TEST_ARCH ${TSAN_SUPPORTED_ARCH})
 
-set(LINK_FLAGS ${COMPILER_RT_UNITTEST_LINK_FLAGS})
-foreach(lib ${SANITIZER_TEST_CXX_LIBRARIES})
-  list(APPEND LINK_FLAGS -l${lib})
-endforeach()
+set(LINK_FLAGS
+  ${COMPILER_RT_UNITTEST_LINK_FLAGS}
+  ${SANITIZER_TEST_CXX_LIBRARIES}
+  )
 
 if(APPLE)
 
diff --git a/compiler-rt/lib/xray/tests/CMakeLists.txt b/compiler-rt/lib/xray/tests/CMakeLists.txt
index 96a9db1ef8777..99884b05d77e4 100644
--- a/compiler-rt/lib/xray/tests/CMakeLists.txt
+++ b/compiler-rt/lib/xray/tests/CMakeLists.txt
@@ -50,6 +50,7 @@ set(XRAY_TEST_ARCH ${XRAY_SUPPORTED_ARCH})
 set(XRAY_UNITTEST_LINK_FLAGS
   ${COMPILER_RT_UNITTEST_LINK_FLAGS}
   ${CMAKE_THREAD_LIBS_INIT}
+  ${SANITIZER_TEST_CXX_LIBRARIES}
   )
 
 if (NOT APPLE)
@@ -82,10 +83,6 @@ if (NOT APPLE)
   append_list_if(COMPILER_RT_HAS_LIBEXECINFO -lexecinfo XRAY_UNITTEST_LINK_FLAGS)
 endif()
 
-foreach(lib ${SANITIZER_TEST_CXX_LIBRARIES})
-  list(APPEND XRAY_UNITTEST_LINK_FLAGS -l${lib})
-endforeach()
-
 macro(add_xray_unittest testname)
   cmake_parse_arguments(TEST "" "" "SOURCES;HEADERS" ${ARGN})
   if(UNIX AND NOT APPLE)

From a5257ae277732261dc3b9de210b8d49cca2cf021 Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik@google.com>
Date: Wed, 26 Jan 2022 11:58:17 -0800
Subject: [PATCH 798/946] [mlir][amx] add a full tile matrix mult example to
 integration tests

Rationale:
Demonstrates the maximum tile size allowed for the f32 <= bf16 x bf16 op

Reviewed By: dcaballe

Differential Revision: https://reviews.llvm.org/D118277
---
 .../Vector/CPU/AMX/test-mulf-full.mlir        | 141 ++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir

diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir
new file mode 100644
index 0000000000000..bc1d6333bd721
--- /dev/null
+++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir
@@ -0,0 +1,141 @@
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm="enable-amx" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-translate -mlir-to-llvmir | \
+// RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+// Note: To run this test, your CPU must support AMX.
+
+// Multiply full size tiles into zero destination.
+func @kernel(%arg0: memref<16x32xbf16>,
+             %arg1: memref<16x32xbf16>,
+             %arg2: memref<16x16xf32>) {
+  %0 = arith.constant 0 : index
+  %1 = amx.tile_load %arg0[%0, %0] : memref<16x32xbf16>  into vector<16x32xbf16>
+  %2 = amx.tile_load %arg1[%0, %0] : memref<16x32xbf16>  into vector<16x32xbf16>
+  %3 = amx.tile_zero : vector<16x16xf32>
+  %4 = amx.tile_mulf %1, %2, %3 : vector<16x32xbf16>, vector<16x32xbf16>, vector<16x16xf32>
+  amx.tile_store %arg2[%0, %0], %4 : memref<16x16xf32>, vector<16x16xf32>
+  return
+}
+
+func @entry() -> i32 {
+  %fu = arith.constant -1.0: f32
+  %c0 = arith.constant 0: index
+  %c1 = arith.constant 1: index
+  %c16 = arith.constant 16: index
+  %c32 = arith.constant 32: index
+
+  // Setup simple test data. Note that bf16 does not seem to work well with the
+  // tensor type yet, which is why we use vectors and transfer the data into memref.
+  // TODO: use tensors and bufferization.to_memref instead
+  %0 = arith.constant dense<[
+     [ 1.1, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.2, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.3, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.4, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.6, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.7, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.8, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.8, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.7, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.6, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.4, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.3, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.1,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
+  ]> : vector<16x32xbf16>
+  %1 = arith.constant dense<[
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.1, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.3, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.4, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.6, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.7, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.8, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.8, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.7, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.6, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.5, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.4, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.3, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.2, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
+     [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.1, 1.0, 1.0, 1.0, 1.0, 1.0,
+       1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
+  ]> : vector<16x32xbf16>
+
+  // Set up memory.
+  %a = memref.alloc() : memref<16x32xbf16>
+  %b = memref.alloc() : memref<16x32xbf16>
+  %c = memref.alloc() : memref<16x16xf32>
+  vector.transfer_write %0, %a[%c0, %c0] : vector<16x32xbf16>, memref<16x32xbf16>
+  vector.transfer_write %1, %b[%c0, %c0] : vector<16x32xbf16>, memref<16x32xbf16>
+
+  // Call kernel.
+  call @kernel(%a, %b, %c) : (memref<16x32xbf16>, memref<16x32xbf16>, memref<16x16xf32>) -> ()
+
+  //
+  // Print and verify the 16x16 result.
+  //
+  // CHECK:      ( 32.1016, 34.3984, 34.5078, 33.6953, 32.9062, 32.2031, 32.1016, 32.1016, 32.1016, 32.1016, 32.1016, 32.1016, 32.1016, 32.1016, 32.1016, 32.1016 )
+  // CHECK-NEXT: ( 32.2031, 34.5, 34.6094, 33.7969, 33.0284, 32.3047, 32.2031, 32.2031, 32.2031, 32.2031, 32.2031, 32.2031, 32.2031, 32.2031, 32.2031, 32.2031 )
+  // CHECK-NEXT: ( 32.2969, 34.5938, 34.7031, 33.8906, 33.1619, 32.3984, 32.2969, 32.2969, 32.2969, 32.2969, 32.2969, 32.2969, 32.2969, 32.2969, 32.2969, 32.2969 )
+  // CHECK-NEXT: ( 32.3984, 34.6953, 34.8047, 33.9922, 33.2031, 32.5, 32.3984, 32.3984, 32.3984, 32.3984, 32.3984, 32.3984, 32.3984, 32.3984, 32.3984, 32.3984 )
+  // CHECK-NEXT: ( 32.5, 34.7969, 34.9062, 34.0938, 33.3047, 32.6016, 32.5, 32.5, 32.5, 32.5, 32.5, 32.5, 32.5, 32.5, 32.5, 32.5 )
+  // CHECK-NEXT: ( 32.6016, 34.8984, 35.0078, 34.3739, 33.4062, 32.7031, 32.6016, 32.6016, 32.6016, 32.6016, 32.6016, 32.6016, 32.6016, 32.6016, 32.6016, 32.6016 )
+  // CHECK-NEXT: ( 32.7031, 35, 35.1094, 34.577, 33.5078, 32.8047, 32.7031, 32.7031, 32.7031, 32.7031, 32.7031, 32.7031, 32.7031, 32.7031, 32.7031, 32.7031 )
+  // CHECK-NEXT: ( 32.7969, 35.0938, 35.2031, 34.3906, 33.6016, 32.8984, 32.7969, 32.7969, 32.7969, 32.7969, 32.7969, 32.7969, 32.7969, 32.7969, 32.7969, 32.7969 )
+  // CHECK-NEXT: ( 32.7969, 35.0938, 35.2031, 34.3906, 33.6016, 32.8984, 32.7969, 32.7969, 32.7969, 32.7969, 32.7969, 32.7969, 32.7969, 32.7969, 32.7969, 32.7969 )
+  // CHECK-NEXT: ( 32.7031, 35, 35.4609, 34.2969, 33.5078, 32.8047, 32.7031, 32.7031, 32.7031, 32.7031, 32.7031, 32.7031, 32.7031, 32.7031, 32.7031, 32.7031 )
+  // CHECK-NEXT: ( 32.6016, 34.8984, 35.3697, 34.1953, 33.4062, 32.7031, 32.6016, 32.6016, 32.6016, 32.6016, 32.6016, 32.6016, 32.6016, 32.6016, 32.6016, 32.6016 )
+  // CHECK-NEXT: ( 32.5, 34.7969, 34.9062, 34.0938, 33.3047, 32.6016, 32.5, 32.5, 32.5, 32.5, 32.5, 32.5, 32.5, 32.5, 32.5, 32.5 )
+  // CHECK-NEXT: ( 32.3984, 34.6953, 34.8047, 33.9922, 33.2031, 32.5, 32.3984, 32.3984, 32.3984, 32.3984, 32.3984, 32.3984, 32.3984, 32.3984, 32.3984, 32.3984 )
+  // CHECK-NEXT: ( 32.2969, 34.8025, 34.7031, 33.8906, 33.1016, 32.3984, 32.2969, 32.2969, 32.2969, 32.2969, 32.2969, 32.2969, 32.2969, 32.2969, 32.2969, 32.2969 )
+  // CHECK-NEXT: ( 32.2031, 34.6619, 34.6094, 33.7969, 33.0078, 32.3047, 32.2031, 32.2031, 32.2031, 32.2031, 32.2031, 32.2031, 32.2031, 32.2031, 32.2031, 32.2031 )
+  // CHECK-NEXT: ( 32.1016, 34.3984, 34.5078, 33.6953, 32.9062, 32.2031, 32.1016, 32.1016, 32.1016, 32.1016, 32.1016, 32.1016, 32.1016, 32.1016, 32.1016, 32.1016 )
+  //
+  scf.for %i = %c0 to %c16 step %c1 {
+    %v = vector.transfer_read %c[%i, %c0], %fu: memref<16x16xf32>, vector<16xf32>
+    vector.print %v : vector<16xf32>
+  }
+
+  // Release resources.
+  memref.dealloc %a : memref<16x32xbf16>
+  memref.dealloc %b : memref<16x32xbf16>
+  memref.dealloc %c : memref<16x16xf32>
+  %i0 = arith.constant 0 : i32
+  return %i0 : i32
+}

From 50f50f2582993a079dbcfb8e7ba48920f41e6be0 Mon Sep 17 00:00:00 2001
From: Shubham Sandeep Rastogi <srastogi22@apple.com>
Date: Wed, 17 Nov 2021 15:05:58 -0800
Subject: [PATCH 799/946] Emit swift5 reflection section data in dsym bundle
 generated by dsymutil in the Dwarf section.

Add support for Swift reflection metadata to dsymutil.

This patch adds support for copying Swift reflection metadata (__swift5_.* sections) from .o files to into the symbol-rich binary in the output .dSYM. The functionality is automatically enabled only if a .o file has reflection metadata sections and the binary doesn't. When copying dsymutil moves the section from the __TEXT segment to the __DWARF segment.

rdar://76973336

Differential Revision: https://reviews.llvm.org/D115007
---
 llvm/include/llvm/BinaryFormat/Swift.def      |  26 +
 llvm/include/llvm/BinaryFormat/Swift.h        |  24 +
 llvm/include/llvm/DWARFLinker/DWARFStreamer.h |   8 +-
 llvm/include/llvm/MC/MCContext.h              |  10 +-
 llvm/include/llvm/MC/MCObjectFileInfo.h       |  13 +
 llvm/include/llvm/Object/MachO.h              |   4 +
 llvm/include/llvm/Object/ObjectFile.h         |   6 +
 llvm/lib/DWARFLinker/DWARFStreamer.cpp        |  20 +-
 llvm/lib/MC/MCContext.cpp                     |   8 +-
 llvm/lib/MC/MCObjectFileInfo.cpp              |  11 +
 llvm/lib/Object/MachOObjectFile.cpp           |  12 +
 llvm/test/tools/dsymutil/Inputs/main.yaml     | 886 ++++++++++++++++++
 .../dsymutil/Inputs/reflection_metadata.yaml  | 436 +++++++++
 llvm/test/tools/dsymutil/Inputs/test.yaml     | 254 +++++
 .../tools/dsymutil/X86/reflection-dump.test   |  42 +
 llvm/tools/dsymutil/DwarfLinkerForBinary.cpp  |  87 +-
 16 files changed, 1836 insertions(+), 11 deletions(-)
 create mode 100644 llvm/include/llvm/BinaryFormat/Swift.def
 create mode 100644 llvm/include/llvm/BinaryFormat/Swift.h
 create mode 100644 llvm/test/tools/dsymutil/Inputs/main.yaml
 create mode 100644 llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml
 create mode 100644 llvm/test/tools/dsymutil/Inputs/test.yaml
 create mode 100644 llvm/test/tools/dsymutil/X86/reflection-dump.test

diff --git a/llvm/include/llvm/BinaryFormat/Swift.def b/llvm/include/llvm/BinaryFormat/Swift.def
new file mode 100644
index 0000000000000..39931bec70e57
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/Swift.def
@@ -0,0 +1,26 @@
+//===- llvm/BinaryFormat/Swift.def - Swift definitions ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Macros for running through Swift enumerators.
+//
+//===----------------------------------------------------------------------===//
+
+#if !(defined HANDLE_SWIFT_SECTION)
+#error "Missing macro definition of HANDLE_SWIFT_SECTION"
+#endif
+
+#ifndef HANDLE_SWIFT_SECTION
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)
+#endif
+
+HANDLE_SWIFT_SECTION(Fieldmd, "__swift5_fieldmd", "swift5_fieldmd", ".sw5flmd")
+HANDLE_SWIFT_SECTION(Assocty, "__swift5_assocty", "swift5_assocty", ".sw5asty")
+HANDLE_SWIFT_SECTION(Builtin, "__swift5_builtin", "swift5_builtin", ".sw5bltn")
+HANDLE_SWIFT_SECTION(Capture, "__swift5_capture", "swift5_capture", ".sw5cptr")
+HANDLE_SWIFT_SECTION(Typeref, "__swift5_typeref", "swift5_typeref", ".sw5tyrf")
+HANDLE_SWIFT_SECTION(Reflstr, "__swift5_reflstr", "swift5_reflstr", ".sw5rfst")
diff --git a/llvm/include/llvm/BinaryFormat/Swift.h b/llvm/include/llvm/BinaryFormat/Swift.h
new file mode 100644
index 0000000000000..63bbfe08c86d4
--- /dev/null
+++ b/llvm/include/llvm/BinaryFormat/Swift.h
@@ -0,0 +1,24 @@
+//===-- llvm/BinaryFormat/Swift.h ---Swift Constants-------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+
+#ifndef LLVM_BINARYFORMAT_SWIFT_H
+#define LLVM_BINARYFORMAT_SWIFT_H
+
+namespace llvm {
+namespace swift {
+
+enum Swift5ReflectionSectionKind {
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) KIND,
+#include "llvm/BinaryFormat/Swift.def"
+#undef HANDLE_SWIFT_SECTION
+  Unknown,
+  Last = Unknown
+};
+} // end of namespace swift
+} // end of namespace llvm
+
+#endif
diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
index 9a5c6bcaf83f3..8e845ee91b9f7 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
@@ -9,6 +9,7 @@
 #ifndef LLVM_DWARFLINKER_DWARFSTREAMER_H
 #define LLVM_DWARFLINKER_DWARFSTREAMER_H
 
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/CodeGen/AccelTable.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/DWARFLinker/DWARFLinker.h"
@@ -48,7 +49,7 @@ class DwarfStreamer : public DwarfEmitter {
       : OutFile(OutFile), OutFileType(OutFileType), Translator(Translator),
         ErrorHandler(Error), WarningHandler(Warning) {}
 
-  bool init(Triple TheTriple);
+  bool init(Triple TheTriple, StringRef Swift5ReflectionSegmentName);
 
   /// Dump the file to the disk.
   void finish();
@@ -85,6 +86,11 @@ class DwarfStreamer : public DwarfEmitter {
   /// Emit the swift_ast section stored in \p Buffer.
   void emitSwiftAST(StringRef Buffer);
 
+  /// Emit the swift reflection section stored in \p Buffer.
+  void emitSwiftReflectionSection(
+      llvm::swift::Swift5ReflectionSectionKind ReflSectionKind,
+      StringRef Buffer, uint32_t Alignment, uint32_t Size);
+
   /// Emit debug_ranges for \p FuncRange by translating the
   /// original \p Entries.
   void emitRangesEntries(
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 88d86d5b675ac..d2307d6922780 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -80,6 +80,10 @@ namespace llvm {
   private:
     Environment Env;
 
+    /// The name of the Segment where Swift5 Reflection Section data will be
+    /// outputted
+    StringRef Swift5ReflectionSegmentName;
+
     /// The triple for this object.
     Triple TT;
 
@@ -399,13 +403,17 @@ namespace llvm {
                        const MCRegisterInfo *MRI, const MCSubtargetInfo *MSTI,
                        const SourceMgr *Mgr = nullptr,
                        MCTargetOptions const *TargetOpts = nullptr,
-                       bool DoAutoReset = true);
+                       bool DoAutoReset = true,
+                       StringRef Swift5ReflSegmentName = {});
     MCContext(const MCContext &) = delete;
     MCContext &operator=(const MCContext &) = delete;
     ~MCContext();
 
     Environment getObjectFileType() const { return Env; }
 
+    const StringRef &getSwift5ReflectionSegmentName() const {
+      return Swift5ReflectionSegmentName;
+    }
     const Triple &getTargetTriple() const { return TT; }
     const SourceMgr *getSourceManager() const { return SrcMgr; }
 
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index 5e0cccaba77fa..1b4804db783b4 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -15,6 +15,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/VersionTuple.h"
@@ -228,6 +229,10 @@ class MCObjectFileInfo {
   MCSection *ReadOnly8Section = nullptr;
   MCSection *ReadOnly16Section = nullptr;
 
+  // Swift5 Reflection Data Sections
+  std::array<MCSection *, swift::Swift5ReflectionSectionKind::Last>
+      Swift5ReflectionSections = {};
+
 public:
   void initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
                             bool LargeCodeModel = false);
@@ -423,6 +428,14 @@ class MCObjectFileInfo {
 
   bool isPositionIndependent() const { return PositionIndependent; }
 
+  // Swift5 Reflection Data Sections
+  MCSection *getSwift5ReflectionSection(
+      llvm::swift::Swift5ReflectionSectionKind ReflSectionKind) {
+    return ReflSectionKind != llvm::swift::Swift5ReflectionSectionKind::Unknown
+               ? Swift5ReflectionSections[ReflSectionKind]
+               : nullptr;
+  }
+
 private:
   bool PositionIndependent = false;
   MCContext *Ctx = nullptr;
diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index ede742c47f971..09b6454bb0c14 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
@@ -583,6 +584,9 @@ class MachOObjectFile : public ObjectFile {
 
   StringRef mapDebugSectionName(StringRef Name) const override;
 
+  llvm::swift::Swift5ReflectionSectionKind
+  mapReflectionSectionNameToEnumValue(StringRef SectionName) const override;
+
   bool hasPageZeroSegment() const { return HasPageZeroSegment; }
 
   static bool classof(const Binary *v) {
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index 12704b1fc88e7..29919db772f0e 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/Magic.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/SymbolicFile.h"
@@ -290,6 +291,11 @@ class ObjectFile : public SymbolicFile {
   virtual void getRelocationTypeName(DataRefImpl Rel,
                                      SmallVectorImpl<char> &Result) const = 0;
 
+  virtual llvm::swift::Swift5ReflectionSectionKind
+  mapReflectionSectionNameToEnumValue(StringRef SectionName) const {
+    return llvm::swift::Swift5ReflectionSectionKind::Unknown;
+  };
+
   Expected<uint64_t> getSymbolValue(DataRefImpl Symb) const;
 
 public:
diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index 1ab6ead3b5f66..7f9b9a9bc793c 100644
--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -27,7 +27,8 @@
 
 namespace llvm {
 
-bool DwarfStreamer::init(Triple TheTriple) {
+bool DwarfStreamer::init(Triple TheTriple,
+                         StringRef Swift5ReflectionSegmentName) {
   std::string ErrorStr;
   std::string TripleName;
   StringRef Context = "dwarf streamer init";
@@ -54,8 +55,9 @@ bool DwarfStreamer::init(Triple TheTriple) {
   if (!MSTI)
     return error("no subtarget info for target " + TripleName, Context), false;
 
-  MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get()));
-  MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false));
+  MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get(), nullptr,
+                         nullptr, true, Swift5ReflectionSegmentName));
+  MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false, false));
   MC->setObjectFileInfo(MOFI.get());
 
   MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions);
@@ -302,6 +304,18 @@ void DwarfStreamer::emitSwiftAST(StringRef Buffer) {
   MS->emitBytes(Buffer);
 }
 
+void DwarfStreamer::emitSwiftReflectionSection(
+    llvm::swift::Swift5ReflectionSectionKind ReflSectionKind, StringRef Buffer,
+    uint32_t Alignment, uint32_t Size) {
+  MCSection *ReflectionSection =
+      MOFI->getSwift5ReflectionSection(ReflSectionKind);
+  if (ReflectionSection == nullptr)
+    return;
+  ReflectionSection->setAlignment(Align(Alignment));
+  MS->SwitchSection(ReflectionSection);
+  MS->emitBytes(Buffer);
+}
+
 /// Emit the debug_range section contents for \p FuncRange by
 /// translating the original \p Entries. The debug_range section
 /// format is totally trivial, consisting just of pairs of address
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index 7f639e9c408fe..eafcee1e0607b 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -67,10 +67,10 @@ static void defaultDiagHandler(const SMDiagnostic &SMD, bool, const SourceMgr &,
 MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai,
                      const MCRegisterInfo *mri, const MCSubtargetInfo *msti,
                      const SourceMgr *mgr, MCTargetOptions const *TargetOpts,
-                     bool DoAutoReset)
-    : TT(TheTriple), SrcMgr(mgr), InlineSrcMgr(nullptr),
-      DiagHandler(defaultDiagHandler), MAI(mai), MRI(mri), MSTI(msti),
-      Symbols(Allocator), UsedNames(Allocator),
+                     bool DoAutoReset, StringRef Swift5ReflSegmentName)
+    : Swift5ReflectionSegmentName(Swift5ReflSegmentName), TT(TheTriple),
+      SrcMgr(mgr), InlineSrcMgr(nullptr), DiagHandler(defaultDiagHandler),
+      MAI(mai), MRI(mri), MSTI(msti), Symbols(Allocator), UsedNames(Allocator),
       InlineAsmUsedLabelNames(Allocator),
       CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0),
       AutoReset(DoAutoReset), TargetOptions(TargetOpts) {
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index d7f85f793c55f..77b0b0ee687cb 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -299,6 +299,17 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
   RemarksSection = Ctx->getMachOSection(
       "__LLVM", "__remarks", MachO::S_ATTR_DEBUG, SectionKind::getMetadata());
 
+  // The architecture of dsymutil makes it very difficult to copy the Swift
+  // reflection metadata sections into the __TEXT segment, so dsymutil creates
+  // these sections in the __DWARF segment instead.
+  if (!Ctx->getSwift5ReflectionSegmentName().empty()) {
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)                           \
+  Swift5ReflectionSections[llvm::swift::Swift5ReflectionSectionKind::KIND] =   \
+      Ctx->getMachOSection(Ctx->getSwift5ReflectionSegmentName().data(),       \
+                           MACHO, 0, SectionKind::getMetadata());
+#include "llvm/BinaryFormat/Swift.def"
+  }
+
   TLSExtraDataSection = TLSTLVSection;
 }
 
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 42e257516f4e0..83bc74ff31c40 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/ObjectFile.h"
@@ -4765,3 +4766,14 @@ MachOObjectFile::findDsymObjectMembers(StringRef Path) {
                              Path.str().c_str());
   return ObjectPaths;
 }
+
+llvm::swift::Swift5ReflectionSectionKind
+MachOObjectFile::mapReflectionSectionNameToEnumValue(
+    StringRef SectionName) const {
+#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)                           \
+  .Case(MACHO, llvm::swift::Swift5ReflectionSectionKind::KIND)
+  return StringSwitch<llvm::swift::Swift5ReflectionSectionKind>(SectionName)
+#include "llvm/BinaryFormat/Swift.def"
+      .Default(llvm::swift::Swift5ReflectionSectionKind::Unknown);
+#undef HANDLE_SWIFT_SECTION
+}
diff --git a/llvm/test/tools/dsymutil/Inputs/main.yaml b/llvm/test/tools/dsymutil/Inputs/main.yaml
new file mode 100644
index 0000000000000..d81931d7861c3
--- /dev/null
+++ b/llvm/test/tools/dsymutil/Inputs/main.yaml
@@ -0,0 +1,886 @@
+# How to generate this file:
+# 1. First take a swift file and run xcrun swiftc -g -v test.swift 
+# reflection_metadata.swift, make sure the two swift files are in a short path 
+# like /tmp/
+
+# 2. Now you can see what the driver does, generate the object files in the 
+# tmp directory and link them to create the input binary
+
+# 3. Run obj2yaml on the input binary to create a yaml file and strip out the 
+# swift5 reflection sections from the load commands in the text segment
+
+# 4. I ran delta to reduce this file.
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x1000007
+  cpusubtype:      0x3
+  filetype:        0x2
+  ncmds:           18
+  sizeofcmds:      2848
+  flags:           0x200085
+  reserved:        0x0
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         72
+    segname:         __PAGEZERO
+    vmaddr:          0
+    vmsize:          4294967296
+    fileoff:         0
+    filesize:        0
+    maxprot:         0
+    initprot:        0
+    nsects:          0
+    flags:           0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         952
+    segname:         __TEXT
+    vmaddr:          4294967296
+    vmsize:          16384
+    fileoff:         0
+    filesize:        16384
+    maxprot:         5
+    initprot:        5
+    nsects:          11
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x100003EB0
+        size:            336
+        offset:          0x3EB0
+        align:           3
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         392
+    segname:         __DATA_CONST
+    vmaddr:          4294983680
+    vmsize:          16384
+    fileoff:         16384
+    filesize:        16384
+    maxprot:         3
+    initprot:        3
+    nsects:          4
+    flags:           16
+    Sections:
+      - sectname:        __got
+        segname:         __DATA_CONST
+        addr:            0x100004000
+        size:            48
+        offset:          0x4000
+        align:           3
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x6
+        reserved1:       0x11
+        reserved2:       0x0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         392
+    segname:         __DATA
+    vmaddr:          4295000064
+    vmsize:          16384
+    fileoff:         32768
+    filesize:        16384
+    maxprot:         3
+    initprot:        3
+    nsects:          4
+    flags:           0
+    Sections:
+      - sectname:        __la_symbol_ptr
+        segname:         __DATA
+        addr:            0x100008000
+        size:            384
+        offset:          0x8088
+        align:           3
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x0
+        reserved1:       0x0
+        reserved2:       0x0
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         72
+    segname:         __LINKEDIT
+    vmaddr:          4295016448
+    vmsize:          32768
+    fileoff:         49152
+    filesize:        23584
+    maxprot:         1
+    initprot:        1
+    nsects:          0
+    flags:           0
+  - cmd:             LC_DYLD_INFO_ONLY
+    cmdsize:         48
+    rebase_off:      49152
+    rebase_size:     64
+    bind_off:        49216
+    bind_size:       216
+    weak_bind_off:   0
+    weak_bind_size:  0
+    lazy_bind_off:   49432
+    lazy_bind_size:  600
+    export_off:      50032
+    export_size:     1000
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          51136
+    nsyms:           638
+    stroff:          61504
+    strsize:         11232
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       560
+    iextdefsym:      560
+    nextdefsym:      52
+    iundefsym:       612
+    nundefsym:       26
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  61344
+    nindirectsyms:   40
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+  - cmd:             LC_LOAD_DYLINKER
+    cmdsize:         32
+    name:            12
+  - cmd:             LC_UUID
+    cmdsize:         24
+    uuid:            AA0A51FA-8B29-3A7B-85AA-FA6A457B2211
+  - cmd:             LC_BUILD_VERSION
+    cmdsize:         32
+    platform:        1
+    minos:           786432
+    sdk:             786688
+    ntools:          1
+  - cmd:             LC_SOURCE_VERSION
+    cmdsize:         16
+    version:         0
+  - cmd:             LC_MAIN
+    cmdsize:         24
+    entryoff:        9376
+    stacksize:       0
+  - cmd:             LC_LOAD_DYLIB
+    cmdsize:         56
+    dylib:
+      name:            24
+      timestamp:       2
+      current_version: 14942208
+      compatibility_version: 65536
+  - cmd:             LC_LOAD_DYLIB
+    cmdsize:         56
+    dylib:
+      name:            24
+      timestamp:       2
+      current_version: 85917696
+      compatibility_version: 65536
+  - cmd:             LC_LOAD_DYLIB
+    cmdsize:         64
+    dylib:
+      name:            24
+      timestamp:       2
+      current_version: 85196845
+      compatibility_version: 65536
+  - cmd:             LC_FUNCTION_STARTS
+    cmdsize:         16
+    dataoff:         51032
+    datasize:        104
+  - cmd:             LC_DATA_IN_CODE
+    cmdsize:         16
+    dataoff:         51136
+    datasize:        0
+LinkEditData:
+  NameList:
+    - n_strx:          2355
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976208
+    - n_strx:          2398
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976224
+    - n_strx:          2440
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976240
+    - n_strx:          2479
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976256
+    - n_strx:          2509
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294976272
+    - n_strx:          2570
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976320
+    - n_strx:          2590
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294976512
+    - n_strx:          2635
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294976576
+    - n_strx:          2683
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294976608
+    - n_strx:          2731
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976640
+    - n_strx:          2751
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976656
+    - n_strx:          2775
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976704
+    - n_strx:          2791
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294976720
+    - n_strx:          2814
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976752
+    - n_strx:          2838
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976768
+    - n_strx:          2873
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294976784
+    - n_strx:          2906
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294976832
+    - n_strx:          2926
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294977104
+    - n_strx:          2946
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294977200
+    - n_strx:          2966
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977376
+    - n_strx:          3008
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977392
+    - n_strx:          3049
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977408
+    - n_strx:          3087
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977424
+    - n_strx:          3116
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294977440
+    - n_strx:          3176
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977488
+    - n_strx:          3201
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977504
+    - n_strx:          3232
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977552
+    - n_strx:          3270
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977648
+    - n_strx:          3318
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294977664
+    - n_strx:          3364
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294978352
+    - n_strx:          3411
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294978464
+    - n_strx:          3447
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294978688
+    - n_strx:          3506
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294978832
+    - n_strx:          3567
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294978944
+    - n_strx:          3587
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979024
+    - n_strx:          3607
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979056
+    - n_strx:          3627
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979136
+    - n_strx:          3647
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294979232
+    - n_strx:          3666
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979264
+    - n_strx:          3686
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979328
+    - n_strx:          3706
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979536
+    - n_strx:          3726
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979856
+    - n_strx:          3746
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979872
+    - n_strx:          3766
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979888
+    - n_strx:          3786
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979920
+    - n_strx:          3814
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294979936
+    - n_strx:          3842
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294980240
+    - n_strx:          3871
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294980288
+    - n_strx:          3898
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294980320
+    - n_strx:          3927
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294980368
+    - n_strx:          3951
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294980384
+    - n_strx:          3982
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294980448
+    - n_strx:          4001
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294980464
+    - n_strx:          4032
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294980512
+    - n_strx:          4060
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294980800
+    - n_strx:          4088
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294981120
+    - n_strx:          4116
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294981136
+    - n_strx:          4144
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294981152
+    - n_strx:          4172
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294981184
+    - n_strx:          4208
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294981248
+    - n_strx:          4225
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          128
+      n_value:         4294981280
+    - n_strx:          4253
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294981328
+    - n_strx:          4276
+      n_type:          0x1E
+      n_sect:          1
+      n_desc:          0
+      n_value:         4294981376
+    - n_strx:          4294
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          128
+      n_value:         4294981764
+    - n_strx:          4306
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294981824
+    - n_strx:          4322
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294981952
+    - n_strx:          4349
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294981960
+    - n_strx:          4387
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294981968
+    - n_strx:          4423
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294982160
+    - n_strx:          4474
+      n_type:          0xE
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294982352
+    - n_strx:          4503
+      n_type:          0xE
+      n_sect:          5
+      n_desc:          0
+      n_value:         4294982448
+    - n_strx:          4530
+      n_type:          0x1E
+      n_sect:          5
+      n_desc:          128
+      n_value:         4294982464
+    - n_strx:          4558
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982466
+    - n_strx:          4571
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982470
+    - n_strx:          4608
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982476
+    - n_strx:          4639
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982498
+    - n_strx:          4666
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982506
+    - n_strx:          4691
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982510
+    - n_strx:          4727
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982516
+    - n_strx:          4758
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982522
+    - n_strx:          4790
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982528
+    - n_strx:          4820
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982534
+    - n_strx:          4859
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982540
+    - n_strx:          4902
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982554
+    - n_strx:          4945
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         4294982564
+    - n_strx:          4986
+      n_type:          0x1E
+      n_sect:          6
+      n_desc:          128
+      n_value:         0
+    - n_strx:          5987
+      n_type:          0x66
+      n_sect:          3
+      n_desc:          1
+      n_value:         1638431181
+    - n_strx:          7104
+      n_type:          0x66
+      n_sect:          3
+      n_desc:          1
+      n_value:         1638431191
+  StringTable:
+    - ' '
+    - '_$s4main10MyProtocolMp'
+    - '_$s4main10MyProtocolTL'
+    - '_$s4main11ConformanceV5innerSivM'
+    - '_$s4main11ConformanceV5innerSivg'
+    - '_$s4main11ConformanceV5innerSivpMV'
+    - '_$s4main11ConformanceV5innerSivpfi'
+    - '_$s4main11ConformanceV5innerSivs'
+    - '_$s4main11ConformanceVAA10MyProtocolAAMc'
+    - '_$s4main11ConformanceVAA10MyProtocolAAWP'
+    - '_$s4main11ConformanceVMa'
+    - '_$s4main11ConformanceVMn'
+    - '_$s4main11ConformanceVN'
+    - '_$s4main12Conformance2V5innerSivM'
+    - '_$s4main12Conformance2V5innerSivg'
+    - '_$s4main12Conformance2V5innerSivpMV'
+    - '_$s4main12Conformance2V5innerSivpfi'
+    - '_$s4main12Conformance2V5innerSivs'
+    - '_$s4main12Conformance2VAA10MyProtocolAAMc'
+    - '_$s4main12Conformance2VAA10MyProtocolAAWP'
+    - '_$s4main12Conformance2VMa'
+    - '_$s4main12Conformance2VMn'
+    - '_$s4main12Conformance2VN'
+    - '_$s4main13MyGenericEnumOMa'
+    - '_$s4main13MyGenericEnumOMn'
+    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfC'
+    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfCTq'
+    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfc'
+    - '_$s4main14MyGenericClassCMa'
+    - '_$s4main14MyGenericClassCMn'
+    - '_$s4main14MyGenericClassCfD'
+    - '_$s4main14MyGenericClassCfd'
+    - '_$s4main15MyGenericStructVMa'
+    - '_$s4main15MyGenericStructVMn'
+    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlF'
+    - '_$s4main6MyEnumOMa'
+    - '_$s4main6MyEnumOMn'
+    - '_$s4main6MyEnumON'
+    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfC'
+    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfCTq'
+    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfc'
+    - '_$s4main7MyClassCMa'
+    - '_$s4main7MyClassCMm'
+    - '_$s4main7MyClassCMn'
+    - '_$s4main7MyClassCN'
+    - '_$s4main7MyClassCfD'
+    - '_$s4main7MyClassCfd'
+    - '_$s4main8MyStructVMa'
+    - '_$s4main8MyStructVMn'
+    - '_$s4main8MyStructVN'
+    - '_$s5Inner4main10MyProtocolPTl'
+    - __mh_execute_header
+    - _main
+    - '_$sBi64_WV'
+    - '_$sBoWV'
+    - '_$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC'
+    - '_$sSSN'
+    - '_$sSaMa'
+    - '_$ss27_allocateUninitializedArrayySayxG_BptBwlF'
+    - '_$ss5print_9separator10terminatoryypd_S2StF'
+    - '_$sypN'
+    - '_$sytWV'
+    - '_OBJC_CLASS_$__TtCs12_SwiftObject'
+    - '_OBJC_METACLASS_$__TtCs12_SwiftObject'
+    - __objc_empty_cache
+    - _objc_opt_self
+    - _swift_allocObject
+    - _swift_allocateGenericClassMetadata
+    - _swift_allocateGenericValueMetadata
+    - _swift_bridgeObjectRelease
+    - _swift_checkMetadataState
+    - _swift_deallocClassInstance
+    - _swift_deallocObject
+    - _swift_getAssociatedTypeWitness
+    - _swift_getGenericMetadata
+    - _swift_initClassMetadata2
+    - _swift_release
+    - _swift_retain
+    - dyld_stub_binder
+    - '_$s4main12Conformance2V5innerSivM.resume.0'
+    - '_$s4main12Conformance2V5innerACSi_tcfcfA_'
+    - '_$s4main12Conformance2V5innerACSi_tcfC'
+    - '_$s4main12Conformance2VACycfC'
+    - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW'
+    - '_$s4main3AppVAAyyFZ'
+    - '_$ss27_finalizeUninitializedArrayySayxGABnlF'
+    - '_$ss5print_9separator10terminatoryypd_S2StFfA0_'
+    - '_$ss5print_9separator10terminatoryypd_S2StFfA1_'
+    - '_$s4main3AppVACycfC'
+    - '_$s4main3AppV5$mainyyFZ'
+    - '_$s4main3AppVMa'
+    - '_$sSa12_endMutationyyF'
+    - '_$s4main7MyClassC1iSivg'
+    - '_$s4main7MyClassC2msAA0B6StructVvg'
+    - '_$s4main7MyClassC2meAA0B4EnumOvg'
+    - '_$s4main6MyEnumOWOy'
+    - '_$s4main6MyEnumOWOe'
+    - '_$s4main6MyEnumOWOh'
+    - '_$s4main11ConformanceV5innerSivM.resume.0'
+    - '_$s4main11ConformanceV5innerACSi_tcfcfA_'
+    - '_$s4main11ConformanceV5innerACSi_tcfC'
+    - '_$s4main11ConformanceVACycfC'
+    - '_$s4main11ConformanceVAA10MyProtocolA2aDP5inner5InnerQzvgTW'
+    - '_$s4main8MyStructVACycfC'
+    - '_$s4main14MyGenericClassC1txvg'
+    - '_$s4main14MyGenericClassC1i5InnerQzvg'
+    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvg'
+    - '_$s4main14MyGenericClassC3mgeAA0bC4EnumOyxGvg'
+    - '_$s4main13MyGenericEnumOyxGAA0B8ProtocolRzlWOh'
+    - '_$s4main15MyGenericStructVACyxGycfC'
+    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_'
+    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_TA'
+    - '_$s4main6MyEnumOwCP'
+    - '_$s4main6MyEnumOwxx'
+    - '_$s4main6MyEnumOwcp'
+    - '_$s4main6MyEnumOwca'
+    - ___swift_memcpy9_8
+    - '_$s4main6MyEnumOwta'
+    - '_$s4main6MyEnumOwet'
+    - '_$s4main6MyEnumOwst'
+    - '_$s4main6MyEnumOwug'
+    - '_$s4main6MyEnumOwup'
+    - '_$s4main6MyEnumOwui'
+    - '_$s4main14MyGenericClassCMi'
+    - '_$s4main14MyGenericClassCMr'
+    - '_$s4main15MyGenericStructVMi'
+    - '_$s4main13MyGenericEnumOMi'
+    - ___swift_initWithCopy_strong
+    - ___swift_destroy_strong
+    - ___swift_assignWithCopy_strong
+    - ___swift_memcpy8_8
+    - ___swift_assignWithTake_strong
+    - '_$s4main13MyGenericEnumOwet'
+    - '_$s4main13MyGenericEnumOwst'
+    - '_$s4main13MyGenericEnumOwug'
+    - '_$s4main13MyGenericEnumOwup'
+    - '_$s4main13MyGenericEnumOwui'
+    - ___swift_instantiateGenericMetadata
+    - ___chkstk_darwin
+    - ___chkstk_darwin_llvm_probe
+    - ___chkstk_darwin_probe
+    - ____chkstk_darwin
+    - '_$s4mainMXM'
+    - '_$s4main3AppVMn'
+    - '_$s4main7MyClassC1iSivpWvd'
+    - '_$s4main7MyClassC2msAA0B6StructVvpWvd'
+    - '_$s4main7MyClassC2meAA0B4EnumOvpWvd'
+    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvpWvd'
+    - '_$s4main15MyGenericStructVMP'
+    - '_$s4main13MyGenericEnumOMP'
+    - ___swift_reflection_version
+    - _symbolic Si
+    - _symbolic _____ 4main12Conformance2V
+    - '_symbolic $s4main10MyProtocolP'
+    - _symbolic _____ 4main3AppV
+    - _symbolic x
+    - _symbolic B0
+    - _symbolic _____ 4main11ConformanceV
+    - _symbolic _____ 4main7MyClassC
+    - _symbolic _____ 4main8MyStructV
+    - _symbolic _____ 4main6MyEnumO
+    - _symbolic _____ 4main14MyGenericClassC
+    - _symbolic 5Inner_____Qz 4main10MyProtocolP
+    - _symbolic _____yxG 4main15MyGenericStructV
+    - _symbolic _____yxG 4main13MyGenericEnumO
+    - _symbolic _____ 4main15MyGenericStructV
+    - _symbolic _____ 4main13MyGenericEnumO
+    - _symbolic _____yxG 4main14MyGenericClassC
+    - '_$s4main12Conformance2VAA10MyProtocolAAMA'
+    - '_$s4main11ConformanceVAA10MyProtocolAAMA'
+    - '_$s4main12Conformance2VMF'
+    - '_$s4main3AppVMF'
+    - '_$s4main10MyProtocol_pMF'
+    - '_$s4main7MyClassCMF'
+    - '_$s4main11ConformanceVMF'
+    - '_$s4main8MyStructVMF'
+    - '_$s4main6MyEnumOMF'
+    - '_$s4main14MyGenericClassCMF'
+    - '_$s4main15MyGenericStructVMF'
+    - '_$s4main13MyGenericEnumOMF'
+    - '_$s4main6MyEnumOMB'
+    - '_$s4main12Conformance2VMf'
+    - '_$s4main3AppVMf'
+    - '_$s4main3AppVN'
+    - '_$s4main11ConformanceVMf'
+    - '_$s4main8MyStructVMf'
+    - '_$s4main6MyEnumOWV'
+    - '_$s4main6MyEnumOMf'
+    - ___unnamed_23
+    - '_$s4main14MyGenericClassCMP'
+    - '_$s4main13MyGenericEnumOWV'
+    - __METACLASS_DATA__TtC4main7MyClass
+    - __IVARS__TtC4main7MyClass
+    - __DATA__TtC4main7MyClass
+    - __IVARS__TtC4main14MyGenericClass
+    - __dyld_private
+    - '_$s4main7MyClassCMf'
+    - '_$s4main14MyGenericClassCMI'
+    - '_$s4main15MyGenericStructVMI'
+    - '_$s4main13MyGenericEnumOMI'
+    - '/tmp/main-1.swiftmodule'
+    - '/Users/shubham/Development/test76973336/final2objfiletest/'
+    - test.swift
+    - '/tmp/test-1.o'
+    - '_$s4main12Conformance2V5innerSivpfi'
+    - '_$s4main12Conformance2V5innerSivg'
+    - '_$s4main12Conformance2V5innerSivs'
+    - '_$s4main12Conformance2V5innerSivM'
+    - '_$s4main12Conformance2V5innerSivM.resume.0'
+    - '_$s4main12Conformance2V5innerACSi_tcfcfA_'
+    - '_$s4main12Conformance2V5innerACSi_tcfC'
+    - '_$s4main12Conformance2VACycfC'
+    - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW'
+    - '_$s4main3AppVAAyyFZ'
+    - '_$ss27_finalizeUninitializedArrayySayxGABnlF'
+    - '_$ss5print_9separator10terminatoryypd_S2StFfA0_'
+    - '_$ss5print_9separator10terminatoryypd_S2StFfA1_'
+    - '_$s4main3AppVACycfC'
+    - '_$s4main3AppV5$mainyyFZ'
+    - _main
+    - '_$s4main12Conformance2VMa'
+    - '_$s4main3AppVMa'
+    - '_$sSa12_endMutationyyF'
+    - '_$s4main12Conformance2VAA10MyProtocolAAMc'
+    - '_$s4main12Conformance2V5innerSivpMV'
+    - '_$s4mainMXM'
+    - '_$s4main12Conformance2VMn'
+    - '_$s4main3AppVMn'
+    - _symbolic Si
+    - _symbolic _____ 4main12Conformance2V
+    - '_symbolic $s4main10MyProtocolP'
+    - _symbolic _____ 4main3AppV
+    - '_$s4main12Conformance2VAA10MyProtocolAAMA'
+    - '_$s4main12Conformance2VMF'
+    - '_$s4main3AppVMF'
+    - '_$s4main12Conformance2VMf'
+    - '_$s4main12Conformance2VN'
+    - '_$s4main3AppVMf'
+    - '_$s4main3AppVN'
+    - '_$s4main12Conformance2VAA10MyProtocolAAWP'
+    - reflection_metadata.swift
+    - '/tmp/reflection_metadata-1.o'
diff --git a/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml b/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml
new file mode 100644
index 0000000000000..b2179a23bf28d
--- /dev/null
+++ b/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml
@@ -0,0 +1,436 @@
+# How to generate this file:
+# 1. First take a swift file and run xcrun swiftc -g -v file.swift 
+# secondfile.swift, make sure the two swift files are in a short path like /tmp/
+
+# 2. Now you can see what the driver does, generate the object files in the 
+# tmp directory
+
+# 3. Run obj2yaml on object file to create a yaml file
+
+# 4. I ran delta to reduce this file.
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x1000007
+  cpusubtype:      0x3
+  filetype:        0x1
+  ncmds:           8
+  sizeofcmds:      2800
+  flags:           0x2000
+  reserved:        0x0
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         2552
+    segname:         ''
+    vmaddr:          0
+    vmsize:          21352
+    fileoff:         2832
+    filesize:        20967
+    maxprot:         7
+    initprot:        7
+    nsects:          31
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0
+        size:            4571
+        offset:          0xB10
+        align:           4
+        reloff:          0x5CF8
+        nreloc:          74
+        flags:           0x80000400
+        reserved1:       0x0
+        reserved2:       0x0
+        relocations:
+          - address:         0x11A1
+            symbolnum:       142
+            pcrel:           true
+            length:          2
+            extern:          true
+            type:            1
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_typeref
+        segname:         __TEXT
+        addr:            0x11DC
+        size:            117
+        offset:          0x1CEC
+        align:           1
+        reloff:          0x5F48
+        nreloc:          22
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         7800423000005369000001FFFFFFFF002473346D61696E31304D7950726F746F636F6C50000001FFFFFFFF0001FFFFFFFF0001FFFFFFFF0001FFFFFFFF0035496E6E657201F9FFFFFF517A0001FFFFFFFF797847000001FFFFFFFF797847000001FFFFFFFF0001FFFFFFFF0001FFFFFFFF79784700
+        relocations:
+          - address:         0x6D
+            symbolnum:       163
+            pcrel:           false
+            length:          2
+            extern:          true
+            type:            0
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_capture
+        segname:         __TEXT
+        addr:            0x1254
+        size:            24
+        offset:          0x1D64
+        align:           2
+        reloff:          0x5FF8
+        nreloc:          6
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         010000000100000002000000F4FFFFFFF0FFFFFFECFFFFFF
+        relocations:
+          - address:         0x14
+            symbolnum:       29
+            pcrel:           false
+            length:          3
+            extern:          true
+            type:            0
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_reflstr
+        segname:         __TEXT
+        addr:            0x17D8
+        size:            37
+        offset:          0x22E8
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         496E6E65720069006D73006D6500696E6E6572004300490074006D6773006D676500474300
+      - sectname:        __swift5_assocty
+        segname:         __TEXT
+        addr:            0x1800
+        size:            24
+        offset:          0x2310
+        align:           2
+        reloff:          0x6530
+        nreloc:          8
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         00000000FCFFFFFF0100000008000000F0FFFFFFECFFFFFF
+        relocations:
+          - address:         0x14
+            symbolnum:       31
+            pcrel:           false
+            length:          2
+            extern:          true
+            type:            5
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_fieldmd
+        segname:         __TEXT
+        addr:            0x1818
+        size:            260
+        offset:          0x2328
+        align:           2
+        reloff:          0x6570
+        nreloc:          60
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         000000000000000004000C0000000000000000000000000001000C000300000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF00000000D4FFFFFFD0FFFFFF000000000000000000000C000100000002000000ECFFFFFFE8FFFFFF000000000000000000000C0000000000000000000000000003000C000200000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF000000000000000001000C000400000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF00000000D4FFFFFFD0FFFFFF00000000C8FFFFFFC4FFFFFF000000000000000000000C0000000000000000000000000002000C000100000000000000ECFFFFFFE8FFFFFF
+        relocations:
+          - address:         0x100
+            symbolnum:       71
+            pcrel:           false
+            length:          2
+            extern:          true
+            type:            0
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_builtin
+        segname:         __TEXT
+        addr:            0x1AC8
+        size:            20
+        offset:          0x25D8
+        align:           2
+        reloff:          0x67F8
+        nreloc:          2
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         00000000090000000800010010000000FE000000
+        relocations:
+          - address:         0x0
+            symbolnum:       52
+            pcrel:           false
+            length:          2
+            extern:          true
+            type:            5
+            scattered:       false
+            value:           0
+      - sectname:        __bss
+        segname:         __DATA
+        addr:            0x3372
+        size:            2084
+        offset:          0x50B0
+        align:           3
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x6800000B
+        reserved1:       0x0
+        reserved2:       0x0
+        relocations:
+          - address:         0x56
+            symbolnum:       1
+            pcrel:           false
+            length:          3
+            extern:          false
+            type:            0
+            scattered:       false
+            value:           0
+  - cmd:             LC_BUILD_VERSION
+    cmdsize:         24
+    platform:        1
+    minos:           786432
+    sdk:             786688
+    ntools:          0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          27888
+    nsyms:           185
+    stroff:          30848
+    strsize:         5056
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       79
+    iextdefsym:      79
+    nextdefsym:      87
+    iundefsym:       166
+    nundefsym:       19
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  0
+    nindirectsyms:   0
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         40
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x53, 
+                       0x0, 0x0, 0x0, 0x0 ]
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         24
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x43, 
+                       0x6F, 0x72, 0x65, 0x0 ]
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         32
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x5F, 
+                       0x6E, 0x63, 0x79, 0x0 ]
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         24
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x6F, 0x62, 0x6A, 0x63, 0x0, 0x0, 0x0, 
+                       0x0, 0x0, 0x0 ]
+LinkEditData:
+  NameList:
+    - n_strx:          5014
+      n_type:          0xE
+      n_sect:          1
+      n_desc:          0
+      n_value:         5600
+  StringTable:
+    - ''
+    - l_objectdestroy
+    - '_$s4main6MyEnumOWOy'
+    - '_$s4main6MyEnumOwxx'
+    - _symbolic x
+    - '_$s4main6MyEnumOwst'
+    - '_$s4main13MyGenericEnumOwst'
+    - '_$s4main6MyEnumOwet'
+    - '_$s4main13MyGenericEnumOwet'
+    - '_OBJC_CLASS_$__TtCs12_SwiftObject'
+    - '_OBJC_METACLASS_$__TtCs12_SwiftObject'
+    - _swift_deallocObject
+    - _swift_allocObject
+    - '_$s4main11ConformanceV5innerSivs'
+    - _swift_getAssociatedTypeWitness
+    - __IVARS__TtC4main7MyClass
+    - __DATA__TtC4main7MyClass
+    - __METACLASS_DATA__TtC4main7MyClass
+    - __IVARS__TtC4main14MyGenericClass
+    - l_protocols
+    - _objc_classes
+    - l_protocol_conformances
+    - l__swift5_reflection_descriptor
+    - l_coro.devirt.trigger
+    - '_$s4main14MyGenericClassCMr'
+    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfCTq'
+    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfCTq'
+    - '_$s4main6MyEnumOwup'
+    - '_$s4main13MyGenericEnumOwup'
+    - '_$s4main6MyEnumOwcp'
+    - '_$s4main10MyProtocolMp'
+    - ___swift_reflection_version
+    - ____chkstk_darwin
+    - _swift_retain
+    - '_$s4main8MyStructVMn'
+    - '_$s4main15MyGenericStructVMn'
+    - '_$s4main11ConformanceVMn'
+    - '_$s4main6MyEnumOMn'
+    - '_$s4main13MyGenericEnumOMn'
+    - '_$s4main7MyClassCMn'
+    - '_$s4main14MyGenericClassCMn'
+    - '_$s4main7MyClassCMm'
+    - '_$s5Inner4main10MyProtocolPTl'
+    - '_$s4main6MyEnumOwui'
+    - '_$s4main13MyGenericEnumOwui'
+    - '_$s4main11ConformanceV5innerSivpfi'
+    - _symbolic Si
+    - '_$s4main15MyGenericStructVMi'
+    - '_$s4main13MyGenericEnumOMi'
+    - '_$s4main14MyGenericClassCMi'
+    - l_llvm.swift_module_hash
+    - '_$s4main13MyGenericEnumOyxGAA0B8ProtocolRzlWOh'
+    - '_$s4main6MyEnumOWOh'
+    - '_$s4main14MyGenericClassC1i5InnerQzvg'
+    - '_$s4main14MyGenericClassC1txvg'
+    - '_$s4main11ConformanceV5innerSivg'
+    - '_$s4main7MyClassC1iSivg'
+    - '_$s4main7MyClassC2msAA0B6StructVvg'
+    - '_$s4main7MyClassC2meAA0B4EnumOvg'
+    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvg'
+    - '_$s4main14MyGenericClassC3mgeAA0bC4EnumOyxGvg'
+    - '_$s4main6MyEnumOwug'
+    - '_$s4main13MyGenericEnumOwug'
+    - ___swift_initWithCopy_strong
+    - ___swift_assignWithCopy_strong
+    - ___swift_destroy_strong
+    - ___swift_assignWithTake_strong
+    - _objc_opt_self
+    - '_$s4main8MyStructVMf'
+    - '_$s4main11ConformanceVMf'
+    - '_$s4main6MyEnumOMf'
+    - '_$s4main7MyClassCMf'
+    - _swift_checkMetadataState
+    - _swift_release
+    - l_type_metadata_table
+    - __objc_empty_cache
+    - _swift_deallocClassInstance
+    - ___chkstk_darwin_llvm_probe
+    - '_$s4main6MyEnumOWOe'
+    - '_$s4main7MyClassC1iSivpWvd'
+    - '_$s4main7MyClassC2msAA0B6StructVvpWvd'
+    - '_$s4main7MyClassC2meAA0B4EnumOvpWvd'
+    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvpWvd'
+    - '_$s4main7MyClassCfd'
+    - '_$s4main14MyGenericClassCfd'
+    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfc'
+    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfc'
+    - '_$s4main11ConformanceVAA10MyProtocolAAMc'
+    - '_$s4main6MyEnumOwta'
+    - l_metadata
+    - _swift_allocateGenericClassMetadata
+    - _swift_allocateGenericValueMetadata
+    - _swift_getGenericMetadata
+    - ___swift_instantiateGenericMetadata
+    - '_$s4main6MyEnumOwca'
+    - '_$s4main8MyStructVMa'
+    - '_$s4main15MyGenericStructVMa'
+    - '_$s4main11ConformanceVMa'
+    - '_$s4main6MyEnumOMa'
+    - '_$s4main13MyGenericEnumOMa'
+    - '_$s4main7MyClassCMa'
+    - '_$s4main14MyGenericClassCMa'
+    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_'
+    - '_$s4main11ConformanceV5innerACSi_tcfcfA_'
+    - '_$s4main11ConformanceVAA10MyProtocolA2aDP5inner5InnerQzvgTW'
+    - _symbolic _____ 4main8MyStructV
+    - _symbolic _____ 4main15MyGenericStructV
+    - _symbolic _____yxG 4main15MyGenericStructV
+    - _symbolic _____ 4main11ConformanceV
+    - '_$sytWV'
+    - '_$sBoWV'
+    - '_$sBi64_WV'
+    - '_$s4main6MyEnumOWV'
+    - '_$s4main13MyGenericEnumOWV'
+    - '_$s4main11ConformanceV5innerSivpMV'
+    - '_symbolic $s4main10MyProtocolP'
+    - _symbolic 5Inner_____Qz 4main10MyProtocolP
+    - '_$s4main11ConformanceVAA10MyProtocolAAWP'
+    - '_$s4main15MyGenericStructVMP'
+    - '_$s4main13MyGenericEnumOMP'
+    - '_$s4main14MyGenericClassCMP'
+    - '_$s4main6MyEnumOwCP'
+    - _symbolic _____ 4main6MyEnumO
+    - _symbolic _____ 4main13MyGenericEnumO
+    - _symbolic _____yxG 4main13MyGenericEnumO
+    - '_$s4main8MyStructVN'
+    - '_$s4main11ConformanceVN'
+    - '_$s4main6MyEnumON'
+    - '_$s4main7MyClassCN'
+    - '_$s4main11ConformanceV5innerSivM'
+    - '_$s4mainMXM'
+    - '_$s4main10MyProtocolTL'
+    - '_$s4main15MyGenericStructVMI'
+    - '_$s4main13MyGenericEnumOMI'
+    - '_$s4main14MyGenericClassCMI'
+    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlF'
+    - '_$s4main10MyProtocol_pMF'
+    - '_$s4main8MyStructVMF'
+    - '_$s4main15MyGenericStructVMF'
+    - '_$s4main11ConformanceVMF'
+    - '_$s4main6MyEnumOMF'
+    - '_$s4main13MyGenericEnumOMF'
+    - '_$s4main7MyClassCMF'
+    - '_$s4main14MyGenericClassCMF'
+    - '_$s4main7MyClassCfD'
+    - '_$s4main14MyGenericClassCfD'
+    - _symbolic _____ 4main7MyClassC
+    - _symbolic _____ 4main14MyGenericClassC
+    - _symbolic _____yxG 4main14MyGenericClassC
+    - '_$s4main15MyGenericStructVACyxGycfC'
+    - '_$s4main8MyStructVACycfC'
+    - '_$s4main11ConformanceVACycfC'
+    - '_$s4main11ConformanceV5innerACSi_tcfC'
+    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfC'
+    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfC'
+    - '_$s4main6MyEnumOMB'
+    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_TA'
+    - '_$s4main11ConformanceVAA10MyProtocolAAMA'
+    - l___unnamed_29
+    - l___unnamed_19
+    - ___swift_memcpy9_8
+    - ___swift_memcpy8_8
+    - l___unnamed_28
+    - l___unnamed_18
+    - l___unnamed_27
+    - l___unnamed_17
+    - l___unnamed_26
+    - l___unnamed_16
+    - l___unnamed_25
+    - l___unnamed_15
+    - l___unnamed_4
+    - l___unnamed_24
+    - l___unnamed_14
+    - l___unnamed_3
+    - ___unnamed_23
+    - l___unnamed_13
+    - _swift_initClassMetadata2
+    - l___unnamed_2
+    - l___unnamed_12
+    - l___unnamed_1
+    - l___unnamed_11
+    - _symbolic B0
+    - l___unnamed_30
+    - l___unnamed_10
+    - '_$s4main11ConformanceV5innerSivM.resume.0'
diff --git a/llvm/test/tools/dsymutil/Inputs/test.yaml b/llvm/test/tools/dsymutil/Inputs/test.yaml
new file mode 100644
index 0000000000000..da3aa9a8aaf34
--- /dev/null
+++ b/llvm/test/tools/dsymutil/Inputs/test.yaml
@@ -0,0 +1,254 @@
+# How to generate this file:
+# 1. First take a swift file and run xcrun swiftc -g -v file.swift 
+# secondfile.swift, make sure the two swift files are in a short path like /tmp/
+
+# 2. Now you can see what the driver does, generate the object files in the 
+# tmp directory
+
+# 3. Run obj2yaml on object file to create a yaml file
+
+# 4. I ran delta to reduce this file.
+
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x1000007
+  cpusubtype:      0x3
+  filetype:        0x1
+  ncmds:           8
+  sizeofcmds:      2240
+  flags:           0x2000
+  reserved:        0x0
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         1992
+    segname:         ''
+    vmaddr:          0
+    vmsize:          6592
+    fileoff:         2272
+    filesize:        6592
+    maxprot:         7
+    initprot:        7
+    nsects:          24
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0
+        size:            593
+        offset:          0x8E0
+        align:           4
+        reloff:          0x22A0
+        nreloc:          24
+        flags:           0x80000400
+        reserved1:       0x0
+        reserved2:       0x0
+        relocations:
+          - address:         0x233
+            symbolnum:       2
+            pcrel:           true
+            length:          2
+            extern:          true
+            type:            4
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_typeref
+        segname:         __TEXT
+        addr:            0x2D6
+        size:            38
+        offset:          0xBB6
+        align:           1
+        reloff:          0x2418
+        nreloc:          4
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         5369000001FFFFFFFF002473346D61696E31304D7950726F746F636F6C50000001FFFFFFFF00
+        relocations:
+          - address:         0x21
+            symbolnum:       46
+            pcrel:           false
+            length:          3
+            extern:          true
+            type:            0
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_reflstr
+        segname:         __TEXT
+        addr:            0x318
+        size:            12
+        offset:          0xBF8
+        align:           0
+        reloff:          0x0
+        nreloc:          0
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         496E6E657200696E6E657200
+      - sectname:        __swift5_assocty
+        segname:         __TEXT
+        addr:            0x324
+        size:            24
+        offset:          0xC04
+        align:           2
+        reloff:          0x2450
+        nreloc:          8
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         00000000FCFFFFFF0100000008000000F0FFFFFFECFFFFFF
+        relocations:
+          - address:         0x14
+            symbolnum:       5
+            pcrel:           false
+            length:          2
+            extern:          true
+            type:            0
+            scattered:       false
+            value:           0
+      - sectname:        __swift5_fieldmd
+        segname:         __TEXT
+        addr:            0x378
+        size:            44
+        offset:          0xC58
+        align:           2
+        reloff:          0x24C0
+        nreloc:          8
+        flags:           0x10000000
+        reserved1:       0x0
+        reserved2:       0x0
+        content:         000000000000000000000C000100000002000000ECFFFFFFE8FFFFFF000000000000000000000C0000000000
+        relocations:
+          - address:         0x1C
+            symbolnum:       12
+            pcrel:           false
+            length:          3
+            extern:          false
+            type:            0
+            scattered:       false
+            value:           0
+  - cmd:             LC_BUILD_VERSION
+    cmdsize:         24
+    platform:        1
+    minos:           786432
+    sdk:             786688
+    ntools:          0
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          9824
+    nsyms:           57
+    stroff:          10736
+    strsize:         1544
+  - cmd:             LC_DYSYMTAB
+    cmdsize:         80
+    ilocalsym:       0
+    nlocalsym:       16
+    iextdefsym:      16
+    nextdefsym:      31
+    iundefsym:       47
+    nundefsym:       10
+    tocoff:          0
+    ntoc:            0
+    modtaboff:       0
+    nmodtab:         0
+    extrefsymoff:    0
+    nextrefsyms:     0
+    indirectsymoff:  0
+    nindirectsyms:   0
+    extreloff:       0
+    nextrel:         0
+    locreloff:       0
+    nlocrel:         0
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         40
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x53, 
+                       0x0, 0x0, 0x0, 0x0 ]
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         24
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x43, 
+                       0x6F, 0x72, 0x65, 0x0 ]
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         32
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x5F, 
+                       0x6E, 0x63, 0x79, 0x0 ]
+  - cmd:             LC_LINKER_OPTION
+    cmdsize:         24
+    count:           1
+    PayloadBytes:    [ 0x2D, 0x6C, 0x6F, 0x62, 0x6A, 0x63, 0x0, 0x0, 0x0, 
+                       0x0, 0x0, 0x0 ]
+LinkEditData:
+  NameList:
+    - n_strx:          1494
+      n_type:          0xE
+      n_sect:          9
+      n_desc:          0
+      n_value:         0
+  StringTable:
+    - ''
+    - l_entry_point
+    - '_$s4main12Conformance2V5innerSivs'
+    - l_protocol_conformances
+    - l_coro.devirt.trigger
+    - '_$s4main10MyProtocolMp'
+    - ___swift_reflection_version
+    - _main
+    - '_$s4main3AppVMn'
+    - '_$s4main12Conformance2VMn'
+    - '_$s4main12Conformance2V5innerSivpfi'
+    - _symbolic Si
+    - l_llvm.swift_module_hash
+    - '_$s4main12Conformance2V5innerSivg'
+    - '_$s4main3AppVMf'
+    - '_$s4main12Conformance2VMf'
+    - _swift_bridgeObjectRelease
+    - l_type_metadata_table
+    - '_$s4main12Conformance2VAA10MyProtocolAAMc'
+    - '_$sSaMa'
+    - '_$s4main3AppVMa'
+    - '_$s4main12Conformance2VMa'
+    - '_$s4main12Conformance2V5innerACSi_tcfcfA_'
+    - '_$ss5print_9separator10terminatoryypd_S2StFfA1_'
+    - '_$ss5print_9separator10terminatoryypd_S2StFfA0_'
+    - '_$s4main3AppV5$mainyyFZ'
+    - '_$s4main3AppVAAyyFZ'
+    - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW'
+    - _symbolic _____ 4main3AppV
+    - '_$sytWV'
+    - '_$sBi64_WV'
+    - '_$s4main12Conformance2V5innerSivpMV'
+    - _symbolic _____ 4main12Conformance2V
+    - '_symbolic $s4main10MyProtocolP'
+    - '_$s4main12Conformance2VAA10MyProtocolAAWP'
+    - '_$sypN'
+    - '_$s4main3AppVN'
+    - '_$s4main12Conformance2VN'
+    - '_$sSSN'
+    - '_$s4main12Conformance2V5innerSivM'
+    - '_$s4mainMXM'
+    - '_$sSa12_endMutationyyF'
+    - '_$ss5print_9separator10terminatoryypd_S2StF'
+    - '_$ss27_allocateUninitializedArrayySayxG_BptBwlF'
+    - '_$ss27_finalizeUninitializedArrayySayxGABnlF'
+    - '_$s4main3AppVMF'
+    - '_$s4main12Conformance2VMF'
+    - '_$s4main3AppVACycfC'
+    - '_$s4main12Conformance2VACycfC'
+    - '_$s4main12Conformance2V5innerACSi_tcfC'
+    - '_$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC'
+    - '_$s4main12Conformance2VAA10MyProtocolAAMA'
+    - l___unnamed_8
+    - l___unnamed_7
+    - l___unnamed_6
+    - l___unnamed_5
+    - l___unnamed_4
+    - '_$s4main12Conformance2V5innerSivM.resume.0'
+    - ''
+    - ''
+    - ''
+    - ''
+    - ''
+    - ''
+    - ''
diff --git a/llvm/test/tools/dsymutil/X86/reflection-dump.test b/llvm/test/tools/dsymutil/X86/reflection-dump.test
new file mode 100644
index 0000000000000..55f300c8bb9aa
--- /dev/null
+++ b/llvm/test/tools/dsymutil/X86/reflection-dump.test
@@ -0,0 +1,42 @@
+RUN: rm -rf %t.dir && mkdir -p %t.dir/tmp
+RUN: cp %p/../Inputs/main.yaml %t.dir
+RUN: cp %p/../Inputs/test.yaml %t.dir
+RUN: cp %p/../Inputs/reflection_metadata.yaml %t.dir
+RUN: yaml2obj %p/../Inputs/main.yaml -o %t.dir/main
+RUN: yaml2obj %p/../Inputs/test.yaml -o %t.dir/tmp/test-1.o
+RUN: yaml2obj %p/../Inputs/reflection_metadata.yaml -o %t.dir/tmp/reflection_metadata-1.o
+
+RUN: dsymutil -oso-prepend-path=%t.dir %t.dir/main -o %t.dir/main.dSYM
+RUN: llvm-objdump -s %t.dir/main.dSYM/Contents/Resources/DWARF/main | FileCheck %s
+
+CHECK: Contents of section __DWARF,__swift5_typeref:
+CHECK-NEXT:  10000e000 53690000 01ffffff ff002473 346d6169  Si........$s4mai
+CHECK-NEXT:  10000e010 6e31304d 7950726f 746f636f 6c500000  n10MyProtocolP..
+CHECK-NEXT:  10000e020 01ffffff ff007800 42300000 53690000  ......x.B0..Si..
+CHECK-NEXT:  10000e030 01ffffff ff002473 346d6169 6e31304d  ......$s4main10M
+CHECK-NEXT:  10000e040 7950726f 746f636f 6c500000 01ffffff  yProtocolP......
+
+CHECK: Contents of section __DWARF,__swift5_reflstr:
+CHECK-NEXT:  10000e09b 496e6e65 7200696e 6e657200 496e6e65  Inner.inner.Inne
+CHECK-NEXT:  10000e0ab 72006900 6d73006d 6500696e 6e657200  r.i.ms.me.inner.
+CHECK-NEXT:  10000e0bb 43004900 74006d67 73006d67 65004743  C.I.t.mgs.mge.GC
+CHECK-NEXT:  10000e0cb 00
+
+CHECK: Contents of section __DWARF,__swift5_assocty:
+CHECK-NEXT:  10000e0cc 00000000 fcffffff 01000000 08000000  ................
+CHECK-NEXT:  10000e0dc f0ffffff ecffffff 00000000 fcffffff  ................
+CHECK-NEXT:  10000e0ec 01000000 08000000 f0ffffff ecffffff  ................
+
+CHECK: Contents of section __DWARF,__swift5_fieldmd:
+CHECK-NEXT:  10000e0fc 00000000 00000000 00000c00 01000000  ................
+CHECK-NEXT:  10000e10c 02000000 ecffffff e8ffffff 00000000  ................
+CHECK-NEXT:  10000e11c 00000000 00000c00 00000000 00000000  ................
+CHECK-NEXT:  10000e12c 00000000 04000c00 00000000 00000000  ................
+
+CHECK: Contents of section __DWARF,__swift5_capture:
+CHECK-NEXT:  10000e22c 01000000 01000000 02000000 f4ffffff  ................
+CHECK-NEXT:	 10000e23c f0ffffff ecffffff                    ........
+
+CHECK: Contents of section __DWARF,__swift5_builtin:
+CHECK-NEXT:  10000e244 00000000 09000000 08000100 10000000  ................
+CHECK-NEXT:  10000e254 fe000000                             ....
diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
index a8dfde0865377..37c84691eb5a0 100644
--- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
+++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
@@ -30,6 +30,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/BinaryFormat/MachO.h"
+#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/CodeGen/AccelTable.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DIE.h"
@@ -174,7 +175,7 @@ bool DwarfLinkerForBinary::createStreamer(const Triple &TheTriple,
       [&](const Twine &Warning, StringRef Context, const DWARFDie *) {
         warn(Warning, Context);
       });
-  return Streamer->init(TheTriple);
+  return Streamer->init(TheTriple, "__DWARF");
 }
 
 ErrorOr<const object::ObjectFile &>
@@ -295,6 +296,77 @@ DwarfLinkerForBinary::loadObject(const DebugMapObject &Obj,
   return ErrorOrObj.getError();
 }
 
+static bool binaryHasSwiftReflectionSections(const DebugMap &Map,
+                                             const LinkOptions &Options,
+                                             BinaryHolder &BinHolder) {
+  // If the input binary has swift5 reflection sections, there is no need to
+  // copy them to the .dSYM. Only copy them for binaries where the linker
+  // omitted the reflection metadata.
+  if (!Map.getBinaryPath().empty() &&
+      Options.FileType == OutputFileType::Object) {
+
+    auto ObjectEntry = BinHolder.getObjectEntry(Map.getBinaryPath());
+    // If ObjectEntry or Object has an error, no binary exists, therefore no
+    // reflection sections exist.
+    if (!ObjectEntry) {
+      // Any errors will be diagnosed later in the main loop, ignore them here.
+      llvm::consumeError(ObjectEntry.takeError());
+      return false;
+    }
+
+    auto Object =
+        ObjectEntry->getObjectAs<object::MachOObjectFile>(Map.getTriple());
+    if (!Object) {
+      // Any errors will be diagnosed later in the main loop, ignore them here.
+      llvm::consumeError(Object.takeError());
+      return false;
+    }
+
+    for (auto &Section : Object->sections()) {
+      llvm::Expected<llvm::StringRef> NameOrErr =
+          Object->getSectionName(Section.getRawDataRefImpl());
+      if (!NameOrErr) {
+        llvm::consumeError(NameOrErr.takeError());
+        continue;
+      }
+      NameOrErr->consume_back("__TEXT");
+      if (Object->mapReflectionSectionNameToEnumValue(*NameOrErr) !=
+          llvm::swift::Swift5ReflectionSectionKind::Unknown) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+static void
+copySwiftReflectionMetadata(const llvm::dsymutil::DebugMapObject *Obj,
+                            DwarfStreamer *Streamer) {
+  auto OF =
+      llvm::object::ObjectFile::createObjectFile(Obj->getObjectFilename());
+  if (!OF) {
+    llvm::consumeError(OF.takeError());
+    return;
+  } else if (auto *MO =
+                 dyn_cast<llvm::object::MachOObjectFile>(OF->getBinary())) {
+    for (auto &Section : OF->getBinary()->sections()) {
+      llvm::Expected<llvm::StringRef> NameOrErr =
+          MO->getSectionName(Section.getRawDataRefImpl());
+      if (!NameOrErr) {
+        llvm::consumeError(NameOrErr.takeError());
+        continue;
+      }
+      llvm::Expected<llvm::StringRef> SectionContents = Section.getContents();
+      if (SectionContents) {
+        NameOrErr->consume_back("__TEXT");
+        Streamer->emitSwiftReflectionSection(
+            MO->mapReflectionSectionNameToEnumValue(*NameOrErr),
+            *SectionContents, Section.getAlignment(), Section.getSize());
+      }
+    }
+  }
+}
+
 bool DwarfLinkerForBinary::link(const DebugMap &Map) {
   if (!createStreamer(Map.getTriple(), OutFile))
     return false;
@@ -389,8 +461,20 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) {
         llvm_unreachable("Unhandled DebugMap object");
       });
   GeneralLinker.setSwiftInterfacesMap(&ParseableSwiftInterfaces);
+  bool ReflectionSectionsPresentInBinary = false;
+  // If there is no output specified, no point in checking the binary for swift5
+  // reflection sections.
+  if (!Options.NoOutput) {
+    ReflectionSectionsPresentInBinary =
+        binaryHasSwiftReflectionSections(Map, Options, BinHolder);
+  }
 
   for (const auto &Obj : Map.objects()) {
+    // If there is no output specified or the reflection sections are present in
+    // the Input binary, there is no need to copy the Swift Reflection Metadata
+    if (!Options.NoOutput && !ReflectionSectionsPresentInBinary)
+      copySwiftReflectionMetadata(Obj.get(), Streamer.get());
+
     // N_AST objects (swiftmodule files) should get dumped directly into the
     // appropriate DWARF section.
     if (Obj->getType() == MachO::N_AST) {
@@ -431,7 +515,6 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) {
 
       continue;
     }
-
     if (auto ErrorOrObj = loadObject(*Obj, Map, RL))
       GeneralLinker.addObjectFile(*ErrorOrObj);
     else {

From d97fcf3df211c14c1c0ed1c779310b5237cf1be1 Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks@fb.com>
Date: Wed, 26 Jan 2022 13:52:40 -0800
Subject: [PATCH 800/946] [BOLT][docs] Add note regarding DWARF v5 support to
 README.md

Reviewed By: Amir, yota9

Differential Revision: https://reviews.llvm.org/D118284
---
 bolt/README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/bolt/README.md b/bolt/README.md
index 22a583dfe65da..22bf2a65b67c2 100644
--- a/bolt/README.md
+++ b/bolt/README.md
@@ -30,6 +30,14 @@ compiler option. Since GCC8 enables this option by default, you have to
 explicitly disable it by adding `-fno-reorder-blocks-and-partition` flag if
 you are compiling with GCC8 or above.
 
+NOTE2: DWARF v5 is the new debugging format generated by the latest LLVM and GCC
+compilers. It offers several benefits over the previous DWARF v4. Currently, the
+support for v5 is a work in progress for BOLT. While you will be able to
+optimize binaries produced by the latest compilers, until the support is
+complete, you will not be able to update the debug info with
+`-update-debug-sections`. To temporarily work around the issue, we recommend
+compiling binaries with `-gdwarf-4` option that forces DWARF v4 output.
+
 PIE and .so support has been added recently. Please report bugs if you
 encounter any issues.
 

From f32dccb9a43b02ce4e540d6ba5dbbdb188f2dc7d Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Wed, 26 Jan 2022 23:24:45 +0100
Subject: [PATCH 801/946] [bazel] Port 480cd4cb

---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 1007f1af9414c..080b20c8782c0 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -7367,8 +7367,8 @@ cc_library(
         ":ComplexBaseIncGen",
         ":ComplexOpsIncGen",
         ":IR",
+        ":InferTypeOpInterface",
         ":SideEffectInterfaces",
-        ":StandardOps",
         ":Support",
         ":VectorInterfaces",
         "//llvm:Support",

From dbf278b984eebf17a8977e79b67cdb960e9af5db Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Fri, 21 Jan 2022 12:12:31 -0800
Subject: [PATCH 802/946] [AMDGPU] Prevent aliasing of SrcC and Dst in MAI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Form the MAI spec: It’s ok that Src_C and vDst are the exact same VGPRs
or Src_C and vDst are completely separated. The case that Src_C and vDst
are overlapping should be avoid as new value could be written to accumulator
input before it gets read.

Note that this inevitably increases register pressure to the point where
some programs will become uncompilable.

This patch separates MAC and FMA versions of MFMA instructions using either
tied dst and src2 or earlyclobber dst.

Fixes: SWDEV-318900

Differential Revision: https://reviews.llvm.org/D117844
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp |  8 ++-
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp     |  7 ++
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        | 27 +++++++-
 llvm/lib/Target/AMDGPU/SIInstrInfo.h          |  4 ++
 llvm/lib/Target/AMDGPU/SIInstrInfo.td         |  8 +++
 llvm/lib/Target/AMDGPU/VOP3PInstructions.td   | 41 ++++++++++--
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll  |  7 +-
 .../AMDGPU/mfma-no-register-aliasing.ll       | 66 +++++++++++++++++++
 llvm/test/CodeGen/AMDGPU/spill-agpr.ll        | 32 ---------
 9 files changed, 156 insertions(+), 44 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index d0ad13bb75064..c0592f6f3c7af 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -95,7 +95,9 @@ static bool isDGEMM(unsigned Opcode) {
   return Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
          Opcode == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64 ||
          Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_e64 ||
-         Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64;
+         Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64 ||
+         Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64 ||
+         Opcode == AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64;
 }
 
 static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) {
@@ -1477,6 +1479,8 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
         switch (Opc1) {
         case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
         case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
+        case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
+        case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
           if (!isXDL(ST, *MI))
             NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
           break;
@@ -1509,6 +1513,8 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
       switch (Opc1) {
       case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
       case AMDGPU::V_MFMA_F64_16X16X4F64_vgprcd_e64:
+      case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
+      case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
         NeedWaitStates = DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates;
         break;
       case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 1f93284fc7eeb..33954e11d6c6c 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -300,6 +300,13 @@ static bool updateOperand(FoldCandidate &Fold,
   assert(!Fold.needsShrink() && "not handled");
 
   if (Fold.isImm()) {
+    if (Old.isTied()) {
+      int NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(MI->getOpcode());
+      if (NewMFMAOpc == -1)
+        return false;
+      MI->setDesc(TII.get(NewMFMAOpc));
+      MI->untieRegOperand(0);
+    }
     Old.ChangeToImmediate(Fold.ImmToFold);
     return true;
   }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index f89f109d0d3a2..0a2f9381e71fd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3198,10 +3198,14 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
                Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
                Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
   bool IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
+  int NewMFMAOpc = -1;
 
   switch (Opc) {
   default:
-    return nullptr;
+    NewMFMAOpc = AMDGPU::getMFMAEarlyClobberOp(Opc);
+    if (NewMFMAOpc == -1)
+      return nullptr;
+    break;
   case AMDGPU::V_MAC_F16_e64:
   case AMDGPU::V_FMAC_F16_e64:
     IsF16 = true;
@@ -3230,6 +3234,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
   }
   }
 
+  MachineInstrBuilder MIB;
+  MachineBasicBlock &MBB = *MI.getParent();
+
+  if (NewMFMAOpc != -1) {
+    MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewMFMAOpc));
+    for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I)
+      MIB.add(MI.getOperand(I));
+    updateLiveVariables(LV, MI, *MIB);
+    if (LIS)
+      LIS->ReplaceMachineInstrInMaps(MI, *MIB);
+    return MIB;
+  }
+
   const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
   const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
   const MachineOperand *Src0Mods =
@@ -3240,8 +3257,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
   const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
   const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
   const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
-  MachineInstrBuilder MIB;
-  MachineBasicBlock &MBB = *MI.getParent();
 
   if (!Src0Mods && !Src1Mods && !Clamp && !Omod && !IsF64 &&
       // If we have an SGPR input, we will violate the constant bus restriction.
@@ -7750,6 +7765,12 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
     }
   }
 
+  if (isMAI(Opcode)) {
+    int MFMAOp = AMDGPU::getMFMAEarlyClobberOp(Opcode);
+    if (MFMAOp != -1)
+      Opcode = MFMAOp;
+  }
+
   int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
 
   // -1 means that Opcode is already a native instruction.
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 70a48cd58e387..e551d6c7223fd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1246,6 +1246,10 @@ namespace AMDGPU {
   LLVM_READONLY
   int getFlatScratchInstSVfromSS(uint16_t Opcode);
 
+  /// \returns earlyclobber version of a MAC MFMA is exists.
+  LLVM_READONLY
+  int getMFMAEarlyClobberOp(uint16_t Opcode);
+
   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index dda92d3d25ff2..713a08907e99b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2588,6 +2588,14 @@ def getFlatScratchInstSVfromSS : InstrMapping {
   let ValueCols = [["SV"]];
 }
 
+def getMFMAEarlyClobberOp : InstrMapping {
+  let FilterClass = "MFMATable";
+  let RowFields = ["FMAOp"];
+  let ColFields = ["IsMac"];
+  let KeyCol = ["1"];
+  let ValueCols = [["0"]];
+}
+
 include "SIInstructions.td"
 
 include "DSInstructions.td"
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 32222b3eb93cd..707475ceccee1 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -388,6 +388,12 @@ class VOPProfileMAI<VOPProfile P, RegisterOperand _SrcRC, RegisterOperand _DstRC
   let HasModifiers = 0;
   let Asm64 = "$vdst, $src0, $src1, $src2$cbsz$abid$blgp";
   let Ins64 = (ins Src0RC64:$src0, Src1RC64:$src1, Src2RC64:$src2, cbsz:$cbsz, abid:$abid, blgp:$blgp);
+  // Dst and SrcC cannot partially overlap if SrcC/Dst is bigger than 4 VGPRs.
+  // We then create two versions of the instruction: with tied dst and src2
+  // and with the eralyclobber flag on the dst. This is strciter than the
+  // actual HW restriction. In particular earlyclobber also affects src0 and
+  // src1 allocation which is not required.
+  bit NoDstOverlap = !gt(DstVT.Size, 128);
 }
 
 def VOPProfileMAI_F32_F32_X4    : VOPProfileMAI<VOP_V4F32_F32_F32_V4F32,       AISrc_128_f32,  ADst_128>;
@@ -426,6 +432,11 @@ def VOPProfileMAI_F32_V4I16_X32_VCD  : VOPProfileMAI<VOP_V32F32_V4I16_V4I16_V32F
 def VOPProfileMAI_F64_16X16X4F64_VCD : VOPProfileMAI<VOP_V4F64_F64_F64_V4F64,       VISrc_256_f64,  VDst_256,  AVSrc_64>;
 def VOPProfileMAI_F64_4X4X4F64_VCD   : VOPProfileMAI<VOP_F64_F64_F64_F64,           VISrc_64_f64,   VDst_64,   AVSrc_64>;
 
+class MFMATable <bit is_mac, string Name> {
+  bit IsMac = is_mac;
+  string FMAOp = Name;
+}
+
 let Predicates = [HasMAIInsts] in {
 
 let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
@@ -435,13 +446,31 @@ let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
   } // End isMoveImm = 1
 } // End isAsCheapAsAMove = 1, isReMaterializable = 1
 
-multiclass MAIInst<string OpName, string P, SDPatternOperator node> {
+multiclass MAIInst<string OpName, string P, SDPatternOperator node,
+                   bit NoDstOverlap = !cast<VOPProfileMAI>("VOPProfileMAI_" # P).NoDstOverlap> {
   let isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1 in {
     // FP32 denorm mode is respected, rounding mode is not. Exceptions are not supported.
-    defm "" : VOP3Inst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P), node>;
-
-    let SubtargetPredicate = isGFX90APlus, Mnemonic = OpName in
-    defm _vgprcd : VOP3Inst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD")>;
+    let Constraints = !if(NoDstOverlap, "@earlyclobber $vdst", "") in {
+      defm "" : VOP3Inst<OpName, !cast<VOPProfileMAI>("VOPProfileMAI_" # P), !if(NoDstOverlap, null_frag, node)>,
+                MFMATable<0, NAME # "_e64">;
+
+      let SubtargetPredicate = isGFX90APlus, Mnemonic = OpName in
+      defm _vgprcd : VOP3Inst<OpName # "_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD")>,
+                     MFMATable<0, NAME # "_vgprcd_e64">;
+    }
+
+    foreach _ = BoolToList<NoDstOverlap>.ret in {
+      let Constraints = !if(NoDstOverlap, "$vdst = $src2", ""),
+          isConvertibleToThreeAddress = NoDstOverlap,
+          Mnemonic = OpName in {
+        defm "_mac" : VOP3Inst<OpName # "_mac", !cast<VOPProfileMAI>("VOPProfileMAI_" # P), node>,
+                      MFMATable<1, NAME # "_e64">;
+
+        let SubtargetPredicate = isGFX90APlus in
+        defm _mac_vgprcd : VOP3Inst<OpName # "_mac_vgprcd", !cast<VOPProfileMAI>("VOPProfileMAI_" # P # "_VCD")>,
+                           MFMATable<1, NAME # "_vgprcd_e64">;
+      }
+    }
   } // End isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1
 }
 
@@ -517,6 +546,7 @@ multiclass VOP3P_Real_MAI<bits<7> op> {
   }
 }
 
+let Constraints = "" in {
 multiclass VOP3P_Real_MFMA_gfx90a<bits<7> op> {
   let SubtargetPredicate = isGFX90AOnly,
       AssemblerPredicate = isGFX90AOnly, DecoderNamespace = "GFX90A" in {
@@ -536,6 +566,7 @@ multiclass VOP3P_Real_MFMA<bits<7> op> :
     let DecoderNamespace = "GFX8";
   }
 }
+}
 
 defm V_PK_MAD_I16 : VOP3P_Real_vi <0x00>;
 defm V_PK_MUL_LO_U16 : VOP3P_Real_vi <0x01>;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
index 870f4fcb8996d..bc88f44ca2265 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll
@@ -601,14 +601,15 @@ bb:
 
 ; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32_vecarg:
 ; GFX90A-DAG:      v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
-; GCN-DAG:         v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
+; GFX90A-DAG:      v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
 ; GCN-COUNT-8:     global_load_dwordx4
 ; GFX908-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
-; GFX908-DAG:      v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
 ; GFX90A-NOT:      v_accvgpr_write
+; GFX908-DAG:      v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0
+; GFX908-DAG:      v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0
 ; GFX908:          v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
 ; GFX90A:          v_mfma_f32_32x32x1f32 a[{{[0-9]+:[0-9]+}}], [[ONE]], [[TWO]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
-; GFX908-COUNT-32: v_accvgpr_read_b32
+; GFX908:          v_accvgpr_read_b32
 ; GFX908-COUNT-8:  global_store_dwordx4
 ; GFX90A-NOT:      v_accvgpr_read_b32
 ; GFX90A-COUNT-5:  global_store_dwordx4 v{{[0-9:]+}}, a[{{[0-9:]+}}], s[{{[0-9:]+}}]
diff --git a/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
new file mode 100644
index 0000000000000..feea8e6edf9bb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/mfma-no-register-aliasing.ll
@@ -0,0 +1,66 @@
+; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GREEDY %s
+; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GREEDY %s
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GREEDY %s
+; RUN: llc -march=amdgcn -mcpu=gfx90a -sgpr-regalloc=fast -vgpr-regalloc=fast -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,FAST %s
+
+; Check that Dst and SrcC of MFMA instructions reading more than 4 registers as SrcC
+; is either completely disjoint or exactly the same, but does not alias.
+
+declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
+declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32)
+declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32)
+
+; GCN-LABEL: {{^}}test_mfma_f32_32x32x1f32:
+; GREEDY: v_mfma_f32_32x32x1f32 a[0:31], v{{[0-9]+}}, v{{[0-9]+}}, a[0:31]
+; GREEDY: v_mfma_f32_32x32x1f32 a[32:63], v{{[0-9]+}}, v{{[0-9]+}}, a[0:31]
+; FAST:   v_mfma_f32_32x32x1f32 a[64:95], v{{[0-9]+}}, v{{[0-9]+}}, a[64:95]
+; FAST:   v_mfma_f32_32x32x1f32 a[32:63], v{{[0-9]+}}, v{{[0-9]+}}, a[64:95]
+; GCN:    v_mfma_f32_32x32x1f32 a[0:31], v{{[0-9]+}}, v{{[0-9]+}}, a[0:31]
+define amdgpu_kernel void @test_mfma_f32_32x32x1f32(<32 x float> addrspace(1)* %arg) #0 {
+bb:
+  %in.1 = load <32 x float>, <32 x float> addrspace(1)* %arg
+  %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.2 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %mai.1, i32 0, i32 0, i32 0)
+  %tmp.1 = shufflevector <32 x float> %mai.2, <32 x float> %mai.1, <32 x i32> <i32 32, i32 33, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29>
+  %mai.3 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %tmp.1, i32 0, i32 0, i32 0)
+  store <32 x float> %mai.3, <32 x float> addrspace(1)* %arg
+  ret void
+}
+
+; GCN-LABEL: {{^}}test_mfma_f32_16x16x1f32:
+; GREEDY: v_mfma_f32_16x16x1f32 a[0:15], v{{[0-9]+}}, v{{[0-9]+}}, a[0:15]
+; GREEDY: v_mfma_f32_16x16x1f32 a[16:31], v{{[0-9]+}}, v{{[0-9]+}}, a[0:15]
+; FAST:   v_mfma_f32_16x16x1f32 a[32:47], v{{[0-9]+}}, v{{[0-9]+}}, a[32:47]
+; FAST:   v_mfma_f32_16x16x1f32 a[16:31], v{{[0-9]+}}, v{{[0-9]+}}, a[32:47]
+; GCN:    v_mfma_f32_16x16x1f32 a[0:15], v{{[0-9]+}}, v{{[0-9]+}}, a[0:15]
+define amdgpu_kernel void @test_mfma_f32_16x16x1f32(<16 x float> addrspace(1)* %arg) #0 {
+bb:
+  %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg
+  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 2.0, <16 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.2 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 2.0, <16 x float> %mai.1, i32 0, i32 0, i32 0)
+  %tmp.1 = shufflevector <16 x float> %mai.2, <16 x float> %mai.1, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
+  %mai.3 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 2.0, <16 x float> %tmp.1, i32 0, i32 0, i32 0)
+  store <16 x float> %mai.3, <16 x float> addrspace(1)* %arg
+  ret void
+}
+
+; This instruction allows the overlap since it only read 4 registers.
+
+; GCN-LABEL: {{^}}test_mfma_f32_4x4x1f32:
+; GREEDY: v_mfma_f32_4x4x1f32 a[0:3], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
+; GREEDY: v_mfma_f32_4x4x1f32 a[2:5], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
+; FAST:   v_mfma_f32_4x4x1f32 a[8:11], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
+; FAST:   v_mfma_f32_4x4x1f32 a[4:7], v{{[0-9]+}}, v{{[0-9]+}}, a[8:11]
+; GCN:    v_mfma_f32_4x4x1f32 a[0:3], v{{[0-9]+}}, v{{[0-9]+}}, a[0:3]
+define amdgpu_kernel void @test_mfma_f32_4x4x1f32(<4 x float> addrspace(1)* %arg) #0 {
+bb:
+  %in.1 = load <4 x float>, <4 x float> addrspace(1)* %arg
+  %mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> %in.1, i32 0, i32 0, i32 0)
+  %mai.2 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> %mai.1, i32 0, i32 0, i32 0)
+  %tmp.1 = shufflevector <4 x float> %mai.1, <4 x float> %mai.2, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+  %mai.3 = tail call <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float 1.0, float 2.0, <4 x float> %tmp.1, i32 0, i32 0, i32 0)
+  store <4 x float> %mai.3, <4 x float> addrspace(1)* %arg
+  ret void
+}
+
+attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.ll b/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
index 5085a9ece5e64..c51f3e369ba84 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.ll
@@ -1,37 +1,6 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX908 %s
 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A %s
 
-; GCN-LABEL: {{^}}max_24regs_32a_used:
-; GCN-NOT:     s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
-; GCN-NOT:     s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
-; GCN-DAG:     v_mfma_f32_16x16x1f32
-; GCN-DAG:     v_mfma_f32_16x16x1f32
-; GCN-DAG:     v_accvgpr_read_b32
-; GCN-NOT:     buffer_store_dword
-; GCN-NOT:     buffer_load_dword
-; GFX908-NOT:  v_accvgpr_write_b32
-; GFX90A:      v_accvgpr_write_b32
-; GCN:         ScratchSize: 0
-define amdgpu_kernel void @max_24regs_32a_used(<16 x float> addrspace(1)* %arg, float addrspace(1)* %out) #0 {
-bb:
-  %in.1 = load <16 x float>, <16 x float> addrspace(1)* %arg
-  %mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 1.0, <16 x float> %in.1, i32 0, i32 0, i32 0)
-  %mai.2 = tail call <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float 1.0, float 1.0, <16 x float> %mai.1, i32 0, i32 0, i32 0)
-  %elt1 = extractelement <16 x float> %mai.2, i32 0
-  %elt2 = extractelement <16 x float> %mai.1, i32 15
-  %elt3 = extractelement <16 x float> %mai.1, i32 14
-  %elt4 = extractelement <16 x float> %mai.2, i32 1
-  store float %elt1, float addrspace(1)* %out
-  %gep1 = getelementptr float, float addrspace(1)* %out, i64 1
-  store float %elt2, float addrspace(1)* %gep1
-  %gep2 = getelementptr float, float addrspace(1)* %out, i64 2
-  store float %elt3, float addrspace(1)* %gep2
-  %gep3 = getelementptr float, float addrspace(1)* %out, i64 3
-  store float %elt4, float addrspace(1)* %gep3
-
-  ret void
-}
-
 ; GCN-LABEL: {{^}}max_12regs_13a_used:
 ; GCN-NOT: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
 ; GCN-NOT: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
@@ -152,7 +121,6 @@ declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>
 declare <4 x float> @llvm.amdgcn.mfma.f32.4x4x1f32(float, float, <4 x float>, i32, i32, i32)
 declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
 
-attributes #0 = { nounwind "amdgpu-num-vgpr"="24" }
 attributes #1 = { nounwind "amdgpu-num-vgpr"="10" }
 attributes #2 = { nounwind "amdgpu-num-vgpr"="12" }
 attributes #3 = { nounwind "amdgpu-num-vgpr"="32" }

From 10ce1eed47ef8b9985c635acb05894cad717bf49 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Wed, 26 Jan 2022 15:07:20 -0800
Subject: [PATCH 803/946] [gn build] Remove incorrect interception dependency

---
 llvm/utils/gn/secondary/compiler-rt/lib/interception/BUILD.gn | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/interception/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/interception/BUILD.gn
index 92f65d789f1d4..5426f5c6d1bf0 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/interception/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/interception/BUILD.gn
@@ -1,7 +1,6 @@
 source_set("sources") {
   configs -= [ "//llvm/utils/gn/build:llvm_code" ]
   configs += [ "//llvm/utils/gn/build:crt_code" ]
-  deps = [ "//compiler-rt/lib/sanitizer_common:sources" ]
   sources = [
     "interception.h",
     "interception_linux.cpp",

From 1d085f1147346fb4030f4bd467f8447f3cd16c36 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Wed, 26 Jan 2022 14:50:27 -0800
Subject: [PATCH 804/946] [gn build] Don't pass -fuse-ld=lld to compiler-rt
 tests

This was done for check-hwasan, but compiler-rt/test/hwasan/lit.cfg.py
already does that.

This makes check-asan (to be submitted) fail on Windows due to using
lld-link (as opposed to MSVC link.exe) in tests. That seems like a
problem that should be fixed, but that's orthogonal to this patch.
---
 llvm/utils/gn/secondary/compiler-rt/test/test.gni | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/compiler-rt/test/test.gni b/llvm/utils/gn/secondary/compiler-rt/test/test.gni
index 72d5bc202fbc0..2d1aa0721248e 100644
--- a/llvm/utils/gn/secondary/compiler-rt/test/test.gni
+++ b/llvm/utils/gn/secondary/compiler-rt/test/test.gni
@@ -10,7 +10,7 @@ declare_args() {
 
 target_flags_string = ""
 
-foreach(flag, target_flags + target_ldflags + [ "-fuse-ld=lld" ]) {
+foreach(flag, target_flags + target_ldflags) {
   if (target_flags_string != "") {
     target_flags_string += " "
   }

From 409c4436f9564883cac324b4371ec2b0919f721b Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Mon, 24 Jan 2022 16:22:49 -0800
Subject: [PATCH 805/946] [AMDGPU] Validate dst and src2 non-overlapping
 restriction in asm

Differential Revision: https://reviews.llvm.org/D118089
---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |  38 +
 llvm/test/MC/AMDGPU/accvgpr-altnames.s        |   4 +-
 llvm/test/MC/AMDGPU/gfx90a_err_pos.s          |   5 +
 llvm/test/MC/AMDGPU/mai-err.s                 |  16 +-
 llvm/test/MC/AMDGPU/mai-gfx90a.s              | 992 +++++++++---------
 llvm/test/MC/AMDGPU/mai.s                     | 390 +++----
 .../MC/AMDGPU/misaligned-vgpr-tuples-err.s    |   4 +-
 7 files changed, 753 insertions(+), 696 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index f6c54fe3e7bf9..c1c88d9a74626 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1548,6 +1548,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
   bool validateVccOperand(unsigned Reg) const;
   bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands);
   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
+  bool validateMFMA(const MCInst &Inst, const OperandVector &Operands);
   bool validateAGPRLdSt(const MCInst &Inst) const;
   bool validateVGPRAlign(const MCInst &Inst) const;
   bool validateGWS(const MCInst &Inst, const OperandVector &Operands);
@@ -3613,6 +3614,40 @@ bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
   return true;
 }
 
+bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst,
+                                   const OperandVector &Operands) {
+  const unsigned Opc = Inst.getOpcode();
+  const MCInstrDesc &Desc = MII.get(Opc);
+
+  if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0)
+    return true;
+
+  const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2);
+  if (Src2Idx == -1)
+    return true;
+
+  const MCOperand &Src2 = Inst.getOperand(Src2Idx);
+  if (!Src2.isReg())
+    return true;
+
+  MCRegister Src2Reg = Src2.getReg();
+  MCRegister DstReg = Inst.getOperand(0).getReg();
+  if (Src2Reg == DstReg)
+    return true;
+
+  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
+  if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128)
+    return true;
+
+  if (isRegIntersect(Src2Reg, DstReg, TRI)) {
+    Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands),
+          "source 2 operand must not partially overlap with dst");
+    return false;
+  }
+
+  return true;
+}
+
 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
   switch (Inst.getOpcode()) {
   default:
@@ -4297,6 +4332,9 @@ bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
   if (!validateMAIAccWrite(Inst, Operands)) {
     return false;
   }
+  if (!validateMFMA(Inst, Operands)) {
+    return false;
+  }
   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
     return false;
   }
diff --git a/llvm/test/MC/AMDGPU/accvgpr-altnames.s b/llvm/test/MC/AMDGPU/accvgpr-altnames.s
index cc52162480a5d..b0445f0192860 100644
--- a/llvm/test/MC/AMDGPU/accvgpr-altnames.s
+++ b/llvm/test/MC/AMDGPU/accvgpr-altnames.s
@@ -6,5 +6,5 @@ v_accvgpr_read_b32 v2, acc0
 v_accvgpr_write_b32 acc2, -2.0
 // GFX908: v_accvgpr_write_b32 a2, -2.0    ; encoding: [0x02,0x40,0xd9,0xd3,0xf5,0x00,0x00,0x18]
 
-v_mfma_f32_32x32x1f32 acc[0:31], acc0, acc1, acc[1:32]
-// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x1c]
+v_mfma_f32_32x32x1f32 acc[0:31], acc0, acc1, acc[32:63]
+// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[32:63] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x82,0x1c]
diff --git a/llvm/test/MC/AMDGPU/gfx90a_err_pos.s b/llvm/test/MC/AMDGPU/gfx90a_err_pos.s
index e7377c51df13e..69f4f1ad5d5de 100644
--- a/llvm/test/MC/AMDGPU/gfx90a_err_pos.s
+++ b/llvm/test/MC/AMDGPU/gfx90a_err_pos.s
@@ -7,3 +7,8 @@ ds_gws_init a1 offset:65535 gds
 // CHECK: error: vgpr must be even aligned
 // CHECK-NEXT:{{^}}ds_gws_init a1 offset:65535 gds
 // CHECK-NEXT:{{^}}            ^
+
+v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[2:9]
+// CHECK: error: source 2 operand must not partially overlap with dst
+// CHECK-NEXT:{{^}}v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[2:9]
+// CHECK-NEXT:{{^}}                                              ^
diff --git a/llvm/test/MC/AMDGPU/mai-err.s b/llvm/test/MC/AMDGPU/mai-err.s
index 5a1c6e7045a52..9048583569193 100644
--- a/llvm/test/MC/AMDGPU/mai-err.s
+++ b/llvm/test/MC/AMDGPU/mai-err.s
@@ -39,19 +39,19 @@ v_accvgpr_write_b32 a0, 65
 v_accvgpr_write_b32 a0, v0
 // GFX900: error: instruction not supported on this GPU
 
-v_mfma_f32_32x32x1f32 v[0:31], v0, v1, a[1:32]
+v_mfma_f32_32x32x1f32 v[0:31], v0, v1, a[0:31]
 // GFX908: error: invalid operand for instruction
 // GFX900: error: instruction not supported on this GPU
 
-v_mfma_f32_32x32x1f32 a[0:31], v0, v1, v[1:32]
+v_mfma_f32_32x32x1f32 a[0:31], v0, v1, v[0:31]
 // GFX908: error: invalid operand for instruction
 // GFX900: error: instruction not supported on this GPU
 
-v_mfma_f32_32x32x1f32 a[0:31], s0, v1, a[1:32]
+v_mfma_f32_32x32x1f32 a[0:31], s0, v1, a[0:31]
 // GFX908: error: invalid operand for instruction
 // GFX900: error: instruction not supported on this GPU
 
-v_mfma_f32_32x32x1f32 a[0:31], 1, v1, a[1:32]
+v_mfma_f32_32x32x1f32 a[0:31], 1, v1, a[0:31]
 // GFX908: error: invalid operand for instruction
 // GFX900: error: instruction not supported on this GPU
 
@@ -698,3 +698,11 @@ v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0
 v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7
 // GFX908: error: inline constants are not allowed for this operand
 // GFX900: error: instruction not supported on this GPU
+
+v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[2:33]
+// GFX908: error: source 2 operand must not partially overlap with dst
+// GFX900: error: instruction not supported on this GPU
+
+v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[2:17]
+// GFX908: error: source 2 operand must not partially overlap with dst
+// GFX900: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/mai-gfx90a.s b/llvm/test/MC/AMDGPU/mai-gfx90a.s
index 14cec6a05730e..db116f8a20cd2 100644
--- a/llvm/test/MC/AMDGPU/mai-gfx90a.s
+++ b/llvm/test/MC/AMDGPU/mai-gfx90a.s
@@ -27,53 +27,53 @@ v_accvgpr_write a2, v255
 v_accvgpr_mov_b32 a1, a2
 // GFX90A: v_accvgpr_mov_b32 a1, a2        ; encoding: [0x02,0xa5,0x02,0x7e]
 
-v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[2:33]
-// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[2:33] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[34:65]
+// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[34:65] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x8a,0x04]
 
-v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc0,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc0,0xd3,0x00,0x03,0x8a,0xe4]
 
-v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[2:33]
-// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[2:33] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[34:65]
+// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[34:65] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x8a,0x14]
 
-v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc0,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc0,0xd3,0x00,0x03,0x8a,0xf4]
 
-v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[2:33]
-// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[2:33] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[34:65]
+// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[34:65] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x8a,0x0c]
 
-v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc0,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc0,0xd3,0x00,0x03,0x8a,0xec]
 
-v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[2:33]
-// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[2:33] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[34:65]
+// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[34:65] ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0x8a,0x1c]
 
-v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc0,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc0,0xd3,0x00,0x03,0x8a,0xfc]
 
-v_mfma_f32_32x32x1f32 v[0:31], v0, v1, v[2:33]
-// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], v0, v1, v[2:33] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_f32_32x32x1f32 v[0:31], v0, v1, v[34:65]
+// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], v0, v1, v[34:65] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x8a,0x04]
 
-v_mfma_f32_32x32x1f32 v[0:31], v0, v1, v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], v0, v1, v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_f32_32x32x1f32 v[0:31], v0, v1, v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], v0, v1, v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x8a,0xe4]
 
-v_mfma_f32_32x32x1f32 v[0:31], v0, a1, v[2:33]
-// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], v0, a1, v[2:33] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_f32_32x32x1f32 v[0:31], v0, a1, v[34:65]
+// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], v0, a1, v[34:65] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x8a,0x14]
 
-v_mfma_f32_32x32x1f32 v[0:31], v0, a1, v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], v0, a1, v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_f32_32x32x1f32 v[0:31], v0, a1, v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], v0, a1, v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x8a,0xf4]
 
-v_mfma_f32_32x32x1f32 v[0:31], a0, v1, v[2:33]
-// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], a0, v1, v[2:33] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_f32_32x32x1f32 v[0:31], a0, v1, v[34:65]
+// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], a0, v1, v[34:65] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x8a,0x0c]
 
-v_mfma_f32_32x32x1f32 v[0:31], a0, v1, v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], a0, v1, v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_f32_32x32x1f32 v[0:31], a0, v1, v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], a0, v1, v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x8a,0xec]
 
-v_mfma_f32_32x32x1f32 v[0:31], a0, a1, v[2:33]
-// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], a0, a1, v[2:33] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_f32_32x32x1f32 v[0:31], a0, a1, v[34:65]
+// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], a0, a1, v[34:65] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x8a,0x1c]
 
-v_mfma_f32_32x32x1f32 v[0:31], a0, a1, v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], a0, a1, v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_f32_32x32x1f32 v[0:31], a0, a1, v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x1f32 v[0:31], a0, a1, v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x8a,0xfc]
 
 v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0
 // GFX90A: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, -2.0 ; encoding: [0x00,0x80,0xc0,0xd3,0x00,0x03,0xd6,0x03]
@@ -123,53 +123,53 @@ v_mfma_f32_32x32x1f32 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7
 v_mfma_f32_32x32x1f32 v[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f32_32x32x1f32 v[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0xd6,0xfb]
 
-v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[2:17]
-// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[2:17] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[18:33]
+// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x4a,0x04]
 
-v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc1,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc1,0xd3,0x00,0x03,0x4a,0xe4]
 
-v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[2:17]
-// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[2:17] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[18:33]
+// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[18:33] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x4a,0x14]
 
-v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc1,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc1,0xd3,0x00,0x03,0x4a,0xf4]
 
-v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[2:17]
-// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[2:17] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[18:33]
+// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[18:33] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x4a,0x0c]
 
-v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc1,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc1,0xd3,0x00,0x03,0x4a,0xec]
 
-v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[2:17]
-// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[2:17] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[18:33]
+// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[18:33] ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0x4a,0x1c]
 
-v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc1,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc1,0xd3,0x00,0x03,0x4a,0xfc]
 
-v_mfma_f32_16x16x1f32 v[0:15], v0, v1, v[2:17]
-// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], v0, v1, v[2:17] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_f32_16x16x1f32 v[0:15], v0, v1, v[18:33]
+// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x4a,0x04]
 
-v_mfma_f32_16x16x1f32 v[0:15], v0, v1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], v0, v1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_f32_16x16x1f32 v[0:15], v0, v1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], v0, v1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x4a,0xe4]
 
-v_mfma_f32_16x16x1f32 v[0:15], v0, a1, v[2:17]
-// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], v0, a1, v[2:17] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_f32_16x16x1f32 v[0:15], v0, a1, v[18:33]
+// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], v0, a1, v[18:33] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x4a,0x14]
 
-v_mfma_f32_16x16x1f32 v[0:15], v0, a1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], v0, a1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_f32_16x16x1f32 v[0:15], v0, a1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], v0, a1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x4a,0xf4]
 
-v_mfma_f32_16x16x1f32 v[0:15], a0, v1, v[2:17]
-// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], a0, v1, v[2:17] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_f32_16x16x1f32 v[0:15], a0, v1, v[18:33]
+// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], a0, v1, v[18:33] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x4a,0x0c]
 
-v_mfma_f32_16x16x1f32 v[0:15], a0, v1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], a0, v1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_f32_16x16x1f32 v[0:15], a0, v1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], a0, v1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x4a,0xec]
 
-v_mfma_f32_16x16x1f32 v[0:15], a0, a1, v[2:17]
-// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], a0, a1, v[2:17] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_f32_16x16x1f32 v[0:15], a0, a1, v[18:33]
+// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], a0, a1, v[18:33] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x4a,0x1c]
 
-v_mfma_f32_16x16x1f32 v[0:15], a0, a1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], a0, a1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_f32_16x16x1f32 v[0:15], a0, a1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x1f32 v[0:15], a0, a1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x4a,0xfc]
 
 v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0
 // GFX90A: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, -2.0 ; encoding: [0x00,0x80,0xc1,0xd3,0x00,0x03,0xd6,0x03]
@@ -315,53 +315,53 @@ v_mfma_f32_4x4x1f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7
 v_mfma_f32_4x4x1f32 v[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f32_4x4x1f32 v[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0xd6,0xfb]
 
-v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[2:17]
-// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[2:17] ; encoding: [0x00,0x80,0xc4,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[18:33]
+// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xc4,0xd3,0x00,0x03,0x4a,0x04]
 
-v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc4,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc4,0xd3,0x00,0x03,0x4a,0xe4]
 
-v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[2:17]
-// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[2:17] ; encoding: [0x00,0x80,0xc4,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[18:33]
+// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[18:33] ; encoding: [0x00,0x80,0xc4,0xd3,0x00,0x03,0x4a,0x14]
 
-v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc4,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc4,0xd3,0x00,0x03,0x4a,0xf4]
 
-v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[2:17]
-// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[2:17] ; encoding: [0x00,0x80,0xc4,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[18:33]
+// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[18:33] ; encoding: [0x00,0x80,0xc4,0xd3,0x00,0x03,0x4a,0x0c]
 
-v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc4,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc4,0xd3,0x00,0x03,0x4a,0xec]
 
-v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[2:17]
-// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[2:17] ; encoding: [0x00,0x80,0xc4,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[18:33]
+// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[18:33] ; encoding: [0x00,0x80,0xc4,0xd3,0x00,0x03,0x4a,0x1c]
 
-v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc4,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc4,0xd3,0x00,0x03,0x4a,0xfc]
 
-v_mfma_f32_32x32x2f32 v[0:15], v0, v1, v[2:17]
-// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], v0, v1, v[2:17] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_f32_32x32x2f32 v[0:15], v0, v1, v[18:33]
+// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x4a,0x04]
 
-v_mfma_f32_32x32x2f32 v[0:15], v0, v1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], v0, v1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_f32_32x32x2f32 v[0:15], v0, v1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], v0, v1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x4a,0xe4]
 
-v_mfma_f32_32x32x2f32 v[0:15], v0, a1, v[2:17]
-// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], v0, a1, v[2:17] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_f32_32x32x2f32 v[0:15], v0, a1, v[18:33]
+// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], v0, a1, v[18:33] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x4a,0x14]
 
-v_mfma_f32_32x32x2f32 v[0:15], v0, a1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], v0, a1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_f32_32x32x2f32 v[0:15], v0, a1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], v0, a1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x4a,0xf4]
 
-v_mfma_f32_32x32x2f32 v[0:15], a0, v1, v[2:17]
-// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], a0, v1, v[2:17] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_f32_32x32x2f32 v[0:15], a0, v1, v[18:33]
+// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], a0, v1, v[18:33] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x4a,0x0c]
 
-v_mfma_f32_32x32x2f32 v[0:15], a0, v1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], a0, v1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_f32_32x32x2f32 v[0:15], a0, v1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], a0, v1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x4a,0xec]
 
-v_mfma_f32_32x32x2f32 v[0:15], a0, a1, v[2:17]
-// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], a0, a1, v[2:17] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_f32_32x32x2f32 v[0:15], a0, a1, v[18:33]
+// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], a0, a1, v[18:33] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x4a,0x1c]
 
-v_mfma_f32_32x32x2f32 v[0:15], a0, a1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], a0, a1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_f32_32x32x2f32 v[0:15], a0, a1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2f32 v[0:15], a0, a1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x4a,0xfc]
 
 v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0
 // GFX90A: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, -2.0 ; encoding: [0x00,0x80,0xc4,0xd3,0x00,0x03,0xd6,0x03]
@@ -507,53 +507,53 @@ v_mfma_f32_16x16x4f32 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7
 v_mfma_f32_16x16x4f32 v[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f32_16x16x4f32 v[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0xd6,0xfb]
 
-v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[2:3], a[2:33]
-// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[2:3], a[2:33] ; encoding: [0x00,0x80,0xc8,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[2:3], a[34:65]
+// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[2:3], a[34:65] ; encoding: [0x00,0x80,0xc8,0xd3,0x00,0x05,0x8a,0x04]
 
-v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[2:3], a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[2:3], a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc8,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[2:3], a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[2:3], a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc8,0xd3,0x00,0x05,0x8a,0xe4]
 
-v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[2:3], a[2:33]
-// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[2:3], a[2:33] ; encoding: [0x00,0x80,0xc8,0xd3,0x00,0x05,0x0a,0x14]
+v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[2:3], a[34:65]
+// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[2:3], a[34:65] ; encoding: [0x00,0x80,0xc8,0xd3,0x00,0x05,0x8a,0x14]
 
-v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[2:3], a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[2:3], a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc8,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[2:3], a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[2:3], a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc8,0xd3,0x00,0x05,0x8a,0xf4]
 
-v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[2:3], a[2:33]
-// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[2:3], a[2:33] ; encoding: [0x00,0x80,0xc8,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[2:3], a[34:65]
+// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[2:3], a[34:65] ; encoding: [0x00,0x80,0xc8,0xd3,0x00,0x05,0x8a,0x0c]
 
-v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[2:3], a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[2:3], a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc8,0xd3,0x00,0x05,0x0a,0xec]
+v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[2:3], a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[2:3], a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc8,0xd3,0x00,0x05,0x8a,0xec]
 
-v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[2:3], a[2:33]
-// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[2:3], a[2:33] ; encoding: [0x00,0x80,0xc8,0xd3,0x00,0x05,0x0a,0x1c]
+v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[2:3], a[34:65]
+// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[2:3], a[34:65] ; encoding: [0x00,0x80,0xc8,0xd3,0x00,0x05,0x8a,0x1c]
 
-v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[2:3], a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[2:3], a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc8,0xd3,0x00,0x05,0x0a,0xfc]
+v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[2:3], a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[2:3], a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc8,0xd3,0x00,0x05,0x8a,0xfc]
 
-v_mfma_f32_32x32x4f16 v[0:31], v[0:1], v[2:3], v[2:33]
-// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], v[0:1], v[2:3], v[2:33] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f32_32x32x4f16 v[0:31], v[0:1], v[2:3], v[34:65]
+// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], v[0:1], v[2:3], v[34:65] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x05,0x8a,0x04]
 
-v_mfma_f32_32x32x4f16 v[0:31], v[0:1], v[2:3], v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], v[0:1], v[2:3], v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f32_32x32x4f16 v[0:31], v[0:1], v[2:3], v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], v[0:1], v[2:3], v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x05,0x8a,0xe4]
 
-v_mfma_f32_32x32x4f16 v[0:31], v[0:1], a[2:3], v[2:33]
-// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], v[0:1], a[2:3], v[2:33] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x05,0x0a,0x14]
+v_mfma_f32_32x32x4f16 v[0:31], v[0:1], a[2:3], v[34:65]
+// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], v[0:1], a[2:3], v[34:65] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x05,0x8a,0x14]
 
-v_mfma_f32_32x32x4f16 v[0:31], v[0:1], a[2:3], v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], v[0:1], a[2:3], v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f32_32x32x4f16 v[0:31], v[0:1], a[2:3], v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], v[0:1], a[2:3], v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x05,0x8a,0xf4]
 
-v_mfma_f32_32x32x4f16 v[0:31], a[0:1], v[2:3], v[2:33]
-// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], a[0:1], v[2:3], v[2:33] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f32_32x32x4f16 v[0:31], a[0:1], v[2:3], v[34:65]
+// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], a[0:1], v[2:3], v[34:65] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x05,0x8a,0x0c]
 
-v_mfma_f32_32x32x4f16 v[0:31], a[0:1], v[2:3], v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], a[0:1], v[2:3], v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x05,0x0a,0xec]
+v_mfma_f32_32x32x4f16 v[0:31], a[0:1], v[2:3], v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], a[0:1], v[2:3], v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x05,0x8a,0xec]
 
-v_mfma_f32_32x32x4f16 v[0:31], a[0:1], a[2:3], v[2:33]
-// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], a[0:1], a[2:3], v[2:33] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x05,0x0a,0x1c]
+v_mfma_f32_32x32x4f16 v[0:31], a[0:1], a[2:3], v[34:65]
+// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], a[0:1], a[2:3], v[34:65] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x05,0x8a,0x1c]
 
-v_mfma_f32_32x32x4f16 v[0:31], a[0:1], a[2:3], v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], a[0:1], a[2:3], v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x05,0x0a,0xfc]
+v_mfma_f32_32x32x4f16 v[0:31], a[0:1], a[2:3], v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4f16 v[0:31], a[0:1], a[2:3], v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x05,0x8a,0xfc]
 
 v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[2:3], -2.0
 // GFX90A: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[2:3], -2.0 ; encoding: [0x00,0x80,0xc8,0xd3,0x00,0x05,0xd6,0x03]
@@ -603,53 +603,53 @@ v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7
 v_mfma_f32_32x32x4f16 v[0:31], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f32_32x32x4f16 v[0:31], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x05,0xd6,0xfb]
 
-v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[2:3], a[2:17]
-// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[2:3], a[2:17] ; encoding: [0x00,0x80,0xc9,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[2:3], a[18:33]
+// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xc9,0xd3,0x00,0x05,0x4a,0x04]
 
-v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc9,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc9,0xd3,0x00,0x05,0x4a,0xe4]
 
-v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[2:3], a[2:17]
-// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[2:3], a[2:17] ; encoding: [0x00,0x80,0xc9,0xd3,0x00,0x05,0x0a,0x14]
+v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[2:3], a[18:33]
+// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[2:3], a[18:33] ; encoding: [0x00,0x80,0xc9,0xd3,0x00,0x05,0x4a,0x14]
 
-v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc9,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc9,0xd3,0x00,0x05,0x4a,0xf4]
 
-v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[2:3], a[2:17]
-// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[2:3], a[2:17] ; encoding: [0x00,0x80,0xc9,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[2:3], a[18:33]
+// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xc9,0xd3,0x00,0x05,0x4a,0x0c]
 
-v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc9,0xd3,0x00,0x05,0x0a,0xec]
+v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc9,0xd3,0x00,0x05,0x4a,0xec]
 
-v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[2:3], a[2:17]
-// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[2:3], a[2:17] ; encoding: [0x00,0x80,0xc9,0xd3,0x00,0x05,0x0a,0x1c]
+v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[2:3], a[18:33]
+// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[2:3], a[18:33] ; encoding: [0x00,0x80,0xc9,0xd3,0x00,0x05,0x4a,0x1c]
 
-v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc9,0xd3,0x00,0x05,0x0a,0xfc]
+v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xc9,0xd3,0x00,0x05,0x4a,0xfc]
 
-v_mfma_f32_16x16x4f16 v[0:15], v[0:1], v[2:3], v[2:17]
-// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], v[0:1], v[2:3], v[2:17] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f32_16x16x4f16 v[0:15], v[0:1], v[2:3], v[18:33]
+// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], v[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x05,0x4a,0x04]
 
-v_mfma_f32_16x16x4f16 v[0:15], v[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], v[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f32_16x16x4f16 v[0:15], v[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], v[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x05,0x4a,0xe4]
 
-v_mfma_f32_16x16x4f16 v[0:15], v[0:1], a[2:3], v[2:17]
-// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], v[0:1], a[2:3], v[2:17] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x05,0x0a,0x14]
+v_mfma_f32_16x16x4f16 v[0:15], v[0:1], a[2:3], v[18:33]
+// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], v[0:1], a[2:3], v[18:33] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x05,0x4a,0x14]
 
-v_mfma_f32_16x16x4f16 v[0:15], v[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], v[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f32_16x16x4f16 v[0:15], v[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], v[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x05,0x4a,0xf4]
 
-v_mfma_f32_16x16x4f16 v[0:15], a[0:1], v[2:3], v[2:17]
-// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], a[0:1], v[2:3], v[2:17] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f32_16x16x4f16 v[0:15], a[0:1], v[2:3], v[18:33]
+// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], a[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x05,0x4a,0x0c]
 
-v_mfma_f32_16x16x4f16 v[0:15], a[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], a[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x05,0x0a,0xec]
+v_mfma_f32_16x16x4f16 v[0:15], a[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], a[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x05,0x4a,0xec]
 
-v_mfma_f32_16x16x4f16 v[0:15], a[0:1], a[2:3], v[2:17]
-// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], a[0:1], a[2:3], v[2:17] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x05,0x0a,0x1c]
+v_mfma_f32_16x16x4f16 v[0:15], a[0:1], a[2:3], v[18:33]
+// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], a[0:1], a[2:3], v[18:33] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x05,0x4a,0x1c]
 
-v_mfma_f32_16x16x4f16 v[0:15], a[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], a[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x05,0x0a,0xfc]
+v_mfma_f32_16x16x4f16 v[0:15], a[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4f16 v[0:15], a[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x05,0x4a,0xfc]
 
 v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[2:3], -2.0
 // GFX90A: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[2:3], -2.0 ; encoding: [0x00,0x80,0xc9,0xd3,0x00,0x05,0xd6,0x03]
@@ -795,53 +795,53 @@ v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7
 v_mfma_f32_4x4x4f16 v[0:3], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f32_4x4x4f16 v[0:3], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x05,0xd6,0xfb]
 
-v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[2:3], a[2:17]
-// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[2:3], a[2:17] ; encoding: [0x00,0x80,0xcc,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[2:3], a[18:33]
+// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xcc,0xd3,0x00,0x05,0x4a,0x04]
 
-v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xcc,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xcc,0xd3,0x00,0x05,0x4a,0xe4]
 
-v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[2:3], a[2:17]
-// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[2:3], a[2:17] ; encoding: [0x00,0x80,0xcc,0xd3,0x00,0x05,0x0a,0x14]
+v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[2:3], a[18:33]
+// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[2:3], a[18:33] ; encoding: [0x00,0x80,0xcc,0xd3,0x00,0x05,0x4a,0x14]
 
-v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xcc,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xcc,0xd3,0x00,0x05,0x4a,0xf4]
 
-v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[2:3], a[2:17]
-// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[2:3], a[2:17] ; encoding: [0x00,0x80,0xcc,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[2:3], a[18:33]
+// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xcc,0xd3,0x00,0x05,0x4a,0x0c]
 
-v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xcc,0xd3,0x00,0x05,0x0a,0xec]
+v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xcc,0xd3,0x00,0x05,0x4a,0xec]
 
-v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[2:3], a[2:17]
-// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[2:3], a[2:17] ; encoding: [0x00,0x80,0xcc,0xd3,0x00,0x05,0x0a,0x1c]
+v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[2:3], a[18:33]
+// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[2:3], a[18:33] ; encoding: [0x00,0x80,0xcc,0xd3,0x00,0x05,0x4a,0x1c]
 
-v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xcc,0xd3,0x00,0x05,0x0a,0xfc]
+v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xcc,0xd3,0x00,0x05,0x4a,0xfc]
 
-v_mfma_f32_32x32x8f16 v[0:15], v[0:1], v[2:3], v[2:17]
-// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], v[0:1], v[2:3], v[2:17] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f32_32x32x8f16 v[0:15], v[0:1], v[2:3], v[18:33]
+// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], v[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x05,0x4a,0x04]
 
-v_mfma_f32_32x32x8f16 v[0:15], v[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], v[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f32_32x32x8f16 v[0:15], v[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], v[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x05,0x4a,0xe4]
 
-v_mfma_f32_32x32x8f16 v[0:15], v[0:1], a[2:3], v[2:17]
-// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], v[0:1], a[2:3], v[2:17] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x05,0x0a,0x14]
+v_mfma_f32_32x32x8f16 v[0:15], v[0:1], a[2:3], v[18:33]
+// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], v[0:1], a[2:3], v[18:33] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x05,0x4a,0x14]
 
-v_mfma_f32_32x32x8f16 v[0:15], v[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], v[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f32_32x32x8f16 v[0:15], v[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], v[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x05,0x4a,0xf4]
 
-v_mfma_f32_32x32x8f16 v[0:15], a[0:1], v[2:3], v[2:17]
-// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], a[0:1], v[2:3], v[2:17] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f32_32x32x8f16 v[0:15], a[0:1], v[2:3], v[18:33]
+// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], a[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x05,0x4a,0x0c]
 
-v_mfma_f32_32x32x8f16 v[0:15], a[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], a[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x05,0x0a,0xec]
+v_mfma_f32_32x32x8f16 v[0:15], a[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], a[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x05,0x4a,0xec]
 
-v_mfma_f32_32x32x8f16 v[0:15], a[0:1], a[2:3], v[2:17]
-// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], a[0:1], a[2:3], v[2:17] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x05,0x0a,0x1c]
+v_mfma_f32_32x32x8f16 v[0:15], a[0:1], a[2:3], v[18:33]
+// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], a[0:1], a[2:3], v[18:33] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x05,0x4a,0x1c]
 
-v_mfma_f32_32x32x8f16 v[0:15], a[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], a[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x05,0x0a,0xfc]
+v_mfma_f32_32x32x8f16 v[0:15], a[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8f16 v[0:15], a[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x05,0x4a,0xfc]
 
 v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[2:3], -2.0
 // GFX90A: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[2:3], -2.0 ; encoding: [0x00,0x80,0xcc,0xd3,0x00,0x05,0xd6,0x03]
@@ -987,53 +987,53 @@ v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7
 v_mfma_f32_16x16x16f16 v[0:3], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f32_16x16x16f16 v[0:3], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x05,0xd6,0xfb]
 
-v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[2:33]
-// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[2:33] ; encoding: [0x00,0x80,0xd0,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[34:65]
+// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[34:65] ; encoding: [0x00,0x80,0xd0,0xd3,0x00,0x03,0x8a,0x04]
 
-v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd0,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd0,0xd3,0x00,0x03,0x8a,0xe4]
 
-v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[2:33]
-// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[2:33] ; encoding: [0x00,0x80,0xd0,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[34:65]
+// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[34:65] ; encoding: [0x00,0x80,0xd0,0xd3,0x00,0x03,0x8a,0x14]
 
-v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd0,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd0,0xd3,0x00,0x03,0x8a,0xf4]
 
-v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[2:33]
-// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[2:33] ; encoding: [0x00,0x80,0xd0,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[34:65]
+// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[34:65] ; encoding: [0x00,0x80,0xd0,0xd3,0x00,0x03,0x8a,0x0c]
 
-v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd0,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd0,0xd3,0x00,0x03,0x8a,0xec]
 
-v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[2:33]
-// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[2:33] ; encoding: [0x00,0x80,0xd0,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[34:65]
+// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[34:65] ; encoding: [0x00,0x80,0xd0,0xd3,0x00,0x03,0x8a,0x1c]
 
-v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd0,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd0,0xd3,0x00,0x03,0x8a,0xfc]
 
-v_mfma_i32_32x32x4i8 v[0:31], v0, v1, v[2:33]
-// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], v0, v1, v[2:33] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_i32_32x32x4i8 v[0:31], v0, v1, v[34:65]
+// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], v0, v1, v[34:65] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x8a,0x04]
 
-v_mfma_i32_32x32x4i8 v[0:31], v0, v1, v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], v0, v1, v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_i32_32x32x4i8 v[0:31], v0, v1, v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], v0, v1, v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x8a,0xe4]
 
-v_mfma_i32_32x32x4i8 v[0:31], v0, a1, v[2:33]
-// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], v0, a1, v[2:33] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_i32_32x32x4i8 v[0:31], v0, a1, v[34:65]
+// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], v0, a1, v[34:65] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x8a,0x14]
 
-v_mfma_i32_32x32x4i8 v[0:31], v0, a1, v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], v0, a1, v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_i32_32x32x4i8 v[0:31], v0, a1, v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], v0, a1, v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x8a,0xf4]
 
-v_mfma_i32_32x32x4i8 v[0:31], a0, v1, v[2:33]
-// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], a0, v1, v[2:33] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_i32_32x32x4i8 v[0:31], a0, v1, v[34:65]
+// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], a0, v1, v[34:65] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x8a,0x0c]
 
-v_mfma_i32_32x32x4i8 v[0:31], a0, v1, v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], a0, v1, v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_i32_32x32x4i8 v[0:31], a0, v1, v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], a0, v1, v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x8a,0xec]
 
-v_mfma_i32_32x32x4i8 v[0:31], a0, a1, v[2:33]
-// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], a0, a1, v[2:33] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_i32_32x32x4i8 v[0:31], a0, a1, v[34:65]
+// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], a0, a1, v[34:65] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x8a,0x1c]
 
-v_mfma_i32_32x32x4i8 v[0:31], a0, a1, v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], a0, a1, v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_i32_32x32x4i8 v[0:31], a0, a1, v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x4i8 v[0:31], a0, a1, v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x8a,0xfc]
 
 v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2
 // GFX90A: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, 2 ; encoding: [0x00,0x80,0xd0,0xd3,0x00,0x03,0x0a,0x02]
@@ -1083,53 +1083,53 @@ v_mfma_i32_32x32x4i8 a[0:31], a0, a1, 2 cbsz:3 abid:2 blgp:7
 v_mfma_i32_32x32x4i8 v[0:31], a0, a1, 2 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_i32_32x32x4i8 v[0:31], a0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x0a,0xfa]
 
-v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[2:17]
-// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[2:17] ; encoding: [0x00,0x80,0xd1,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[18:33]
+// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xd1,0xd3,0x00,0x03,0x4a,0x04]
 
-v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd1,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd1,0xd3,0x00,0x03,0x4a,0xe4]
 
-v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[2:17]
-// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[2:17] ; encoding: [0x00,0x80,0xd1,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[18:33]
+// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[18:33] ; encoding: [0x00,0x80,0xd1,0xd3,0x00,0x03,0x4a,0x14]
 
-v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd1,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd1,0xd3,0x00,0x03,0x4a,0xf4]
 
-v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[2:17]
-// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[2:17] ; encoding: [0x00,0x80,0xd1,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[18:33]
+// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[18:33] ; encoding: [0x00,0x80,0xd1,0xd3,0x00,0x03,0x4a,0x0c]
 
-v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd1,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd1,0xd3,0x00,0x03,0x4a,0xec]
 
-v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[2:17]
-// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[2:17] ; encoding: [0x00,0x80,0xd1,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[18:33]
+// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[18:33] ; encoding: [0x00,0x80,0xd1,0xd3,0x00,0x03,0x4a,0x1c]
 
-v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd1,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd1,0xd3,0x00,0x03,0x4a,0xfc]
 
-v_mfma_i32_16x16x4i8 v[0:15], v0, v1, v[2:17]
-// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], v0, v1, v[2:17] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_i32_16x16x4i8 v[0:15], v0, v1, v[18:33]
+// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x4a,0x04]
 
-v_mfma_i32_16x16x4i8 v[0:15], v0, v1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], v0, v1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_i32_16x16x4i8 v[0:15], v0, v1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], v0, v1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x4a,0xe4]
 
-v_mfma_i32_16x16x4i8 v[0:15], v0, a1, v[2:17]
-// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], v0, a1, v[2:17] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_i32_16x16x4i8 v[0:15], v0, a1, v[18:33]
+// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], v0, a1, v[18:33] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x4a,0x14]
 
-v_mfma_i32_16x16x4i8 v[0:15], v0, a1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], v0, a1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_i32_16x16x4i8 v[0:15], v0, a1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], v0, a1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x4a,0xf4]
 
-v_mfma_i32_16x16x4i8 v[0:15], a0, v1, v[2:17]
-// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], a0, v1, v[2:17] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_i32_16x16x4i8 v[0:15], a0, v1, v[18:33]
+// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], a0, v1, v[18:33] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x4a,0x0c]
 
-v_mfma_i32_16x16x4i8 v[0:15], a0, v1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], a0, v1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_i32_16x16x4i8 v[0:15], a0, v1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], a0, v1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x4a,0xec]
 
-v_mfma_i32_16x16x4i8 v[0:15], a0, a1, v[2:17]
-// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], a0, a1, v[2:17] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_i32_16x16x4i8 v[0:15], a0, a1, v[18:33]
+// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], a0, a1, v[18:33] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x4a,0x1c]
 
-v_mfma_i32_16x16x4i8 v[0:15], a0, a1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], a0, a1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_i32_16x16x4i8 v[0:15], a0, a1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_16x16x4i8 v[0:15], a0, a1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x4a,0xfc]
 
 v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2
 // GFX90A: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, 2 ; encoding: [0x00,0x80,0xd1,0xd3,0x00,0x03,0x0a,0x02]
@@ -1275,53 +1275,53 @@ v_mfma_i32_4x4x4i8 a[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7
 v_mfma_i32_4x4x4i8 v[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_i32_4x4x4i8 v[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x0a,0xfa]
 
-v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[2:17]
-// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[2:17] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[18:33]
+// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x03,0x4a,0x04]
 
-v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd4,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd4,0xd3,0x00,0x03,0x4a,0xe4]
 
-v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[2:17]
-// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[2:17] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[18:33]
+// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[18:33] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x03,0x4a,0x14]
 
-v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd4,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd4,0xd3,0x00,0x03,0x4a,0xf4]
 
-v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[2:17]
-// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[2:17] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[18:33]
+// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[18:33] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x03,0x4a,0x0c]
 
-v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd4,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd4,0xd3,0x00,0x03,0x4a,0xec]
 
-v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[2:17]
-// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[2:17] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[18:33]
+// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[18:33] ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x03,0x4a,0x1c]
 
-v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd4,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xd4,0xd3,0x00,0x03,0x4a,0xfc]
 
-v_mfma_i32_32x32x8i8 v[0:15], v0, v1, v[2:17]
-// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], v0, v1, v[2:17] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_i32_32x32x8i8 v[0:15], v0, v1, v[18:33]
+// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x4a,0x04]
 
-v_mfma_i32_32x32x8i8 v[0:15], v0, v1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], v0, v1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_i32_32x32x8i8 v[0:15], v0, v1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], v0, v1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x4a,0xe4]
 
-v_mfma_i32_32x32x8i8 v[0:15], v0, a1, v[2:17]
-// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], v0, a1, v[2:17] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_i32_32x32x8i8 v[0:15], v0, a1, v[18:33]
+// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], v0, a1, v[18:33] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x4a,0x14]
 
-v_mfma_i32_32x32x8i8 v[0:15], v0, a1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], v0, a1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_i32_32x32x8i8 v[0:15], v0, a1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], v0, a1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x4a,0xf4]
 
-v_mfma_i32_32x32x8i8 v[0:15], a0, v1, v[2:17]
-// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], a0, v1, v[2:17] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_i32_32x32x8i8 v[0:15], a0, v1, v[18:33]
+// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], a0, v1, v[18:33] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x4a,0x0c]
 
-v_mfma_i32_32x32x8i8 v[0:15], a0, v1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], a0, v1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_i32_32x32x8i8 v[0:15], a0, v1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], a0, v1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x4a,0xec]
 
-v_mfma_i32_32x32x8i8 v[0:15], a0, a1, v[2:17]
-// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], a0, a1, v[2:17] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_i32_32x32x8i8 v[0:15], a0, a1, v[18:33]
+// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], a0, a1, v[18:33] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x4a,0x1c]
 
-v_mfma_i32_32x32x8i8 v[0:15], a0, a1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], a0, a1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_i32_32x32x8i8 v[0:15], a0, a1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_i32_32x32x8i8 v[0:15], a0, a1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x4a,0xfc]
 
 v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2
 // GFX90A: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, 2 ; encoding: [0x00,0x80,0xd4,0xd3,0x00,0x03,0x0a,0x02]
@@ -1467,53 +1467,53 @@ v_mfma_i32_16x16x16i8 a[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7
 v_mfma_i32_16x16x16i8 v[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_i32_16x16x16i8 v[0:3], a0, a1, 2 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x0a,0xfa]
 
-v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[2:33]
-// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[2:33] ; encoding: [0x00,0x80,0xe8,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[34:65]
+// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[34:65] ; encoding: [0x00,0x80,0xe8,0xd3,0x00,0x03,0x8a,0x04]
 
-v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe8,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe8,0xd3,0x00,0x03,0x8a,0xe4]
 
-v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[2:33]
-// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[2:33] ; encoding: [0x00,0x80,0xe8,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[34:65]
+// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[34:65] ; encoding: [0x00,0x80,0xe8,0xd3,0x00,0x03,0x8a,0x14]
 
-v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe8,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe8,0xd3,0x00,0x03,0x8a,0xf4]
 
-v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[2:33]
-// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[2:33] ; encoding: [0x00,0x80,0xe8,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[34:65]
+// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[34:65] ; encoding: [0x00,0x80,0xe8,0xd3,0x00,0x03,0x8a,0x0c]
 
-v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe8,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe8,0xd3,0x00,0x03,0x8a,0xec]
 
-v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[2:33]
-// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[2:33] ; encoding: [0x00,0x80,0xe8,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[34:65]
+// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[34:65] ; encoding: [0x00,0x80,0xe8,0xd3,0x00,0x03,0x8a,0x1c]
 
-v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe8,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe8,0xd3,0x00,0x03,0x8a,0xfc]
 
-v_mfma_f32_32x32x2bf16 v[0:31], v0, v1, v[2:33]
-// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], v0, v1, v[2:33] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_f32_32x32x2bf16 v[0:31], v0, v1, v[34:65]
+// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], v0, v1, v[34:65] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x8a,0x04]
 
-v_mfma_f32_32x32x2bf16 v[0:31], v0, v1, v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], v0, v1, v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_f32_32x32x2bf16 v[0:31], v0, v1, v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], v0, v1, v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x8a,0xe4]
 
-v_mfma_f32_32x32x2bf16 v[0:31], v0, a1, v[2:33]
-// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], v0, a1, v[2:33] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_f32_32x32x2bf16 v[0:31], v0, a1, v[34:65]
+// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], v0, a1, v[34:65] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x8a,0x14]
 
-v_mfma_f32_32x32x2bf16 v[0:31], v0, a1, v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], v0, a1, v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_f32_32x32x2bf16 v[0:31], v0, a1, v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], v0, a1, v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x8a,0xf4]
 
-v_mfma_f32_32x32x2bf16 v[0:31], a0, v1, v[2:33]
-// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], a0, v1, v[2:33] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_f32_32x32x2bf16 v[0:31], a0, v1, v[34:65]
+// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], a0, v1, v[34:65] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x8a,0x0c]
 
-v_mfma_f32_32x32x2bf16 v[0:31], a0, v1, v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], a0, v1, v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_f32_32x32x2bf16 v[0:31], a0, v1, v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], a0, v1, v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x8a,0xec]
 
-v_mfma_f32_32x32x2bf16 v[0:31], a0, a1, v[2:33]
-// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], a0, a1, v[2:33] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_f32_32x32x2bf16 v[0:31], a0, a1, v[34:65]
+// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], a0, a1, v[34:65] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x8a,0x1c]
 
-v_mfma_f32_32x32x2bf16 v[0:31], a0, a1, v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], a0, a1, v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_f32_32x32x2bf16 v[0:31], a0, a1, v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], a0, a1, v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x8a,0xfc]
 
 v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0
 // GFX90A: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, -2.0 ; encoding: [0x00,0x80,0xe8,0xd3,0x00,0x03,0xd6,0x03]
@@ -1563,53 +1563,53 @@ v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7
 v_mfma_f32_32x32x2bf16 v[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f32_32x32x2bf16 v[0:31], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0xd6,0xfb]
 
-v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[2:17]
-// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[2:17] ; encoding: [0x00,0x80,0xe9,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[18:33]
+// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xe9,0xd3,0x00,0x03,0x4a,0x04]
 
-v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe9,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe9,0xd3,0x00,0x03,0x4a,0xe4]
 
-v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[2:17]
-// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[2:17] ; encoding: [0x00,0x80,0xe9,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[18:33]
+// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[18:33] ; encoding: [0x00,0x80,0xe9,0xd3,0x00,0x03,0x4a,0x14]
 
-v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe9,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe9,0xd3,0x00,0x03,0x4a,0xf4]
 
-v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[2:17]
-// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[2:17] ; encoding: [0x00,0x80,0xe9,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[18:33]
+// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[18:33] ; encoding: [0x00,0x80,0xe9,0xd3,0x00,0x03,0x4a,0x0c]
 
-v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe9,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe9,0xd3,0x00,0x03,0x4a,0xec]
 
-v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[2:17]
-// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[2:17] ; encoding: [0x00,0x80,0xe9,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[18:33]
+// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[18:33] ; encoding: [0x00,0x80,0xe9,0xd3,0x00,0x03,0x4a,0x1c]
 
-v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe9,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe9,0xd3,0x00,0x03,0x4a,0xfc]
 
-v_mfma_f32_16x16x2bf16 v[0:15], v0, v1, v[2:17]
-// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], v0, v1, v[2:17] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_f32_16x16x2bf16 v[0:15], v0, v1, v[18:33]
+// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x4a,0x04]
 
-v_mfma_f32_16x16x2bf16 v[0:15], v0, v1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], v0, v1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_f32_16x16x2bf16 v[0:15], v0, v1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], v0, v1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x4a,0xe4]
 
-v_mfma_f32_16x16x2bf16 v[0:15], v0, a1, v[2:17]
-// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], v0, a1, v[2:17] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_f32_16x16x2bf16 v[0:15], v0, a1, v[18:33]
+// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], v0, a1, v[18:33] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x4a,0x14]
 
-v_mfma_f32_16x16x2bf16 v[0:15], v0, a1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], v0, a1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_f32_16x16x2bf16 v[0:15], v0, a1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], v0, a1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x4a,0xf4]
 
-v_mfma_f32_16x16x2bf16 v[0:15], a0, v1, v[2:17]
-// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], a0, v1, v[2:17] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_f32_16x16x2bf16 v[0:15], a0, v1, v[18:33]
+// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], a0, v1, v[18:33] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x4a,0x0c]
 
-v_mfma_f32_16x16x2bf16 v[0:15], a0, v1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], a0, v1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_f32_16x16x2bf16 v[0:15], a0, v1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], a0, v1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x4a,0xec]
 
-v_mfma_f32_16x16x2bf16 v[0:15], a0, a1, v[2:17]
-// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], a0, a1, v[2:17] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_f32_16x16x2bf16 v[0:15], a0, a1, v[18:33]
+// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], a0, a1, v[18:33] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x4a,0x1c]
 
-v_mfma_f32_16x16x2bf16 v[0:15], a0, a1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], a0, a1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_f32_16x16x2bf16 v[0:15], a0, a1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x2bf16 v[0:15], a0, a1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x4a,0xfc]
 
 v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0
 // GFX90A: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, -2.0 ; encoding: [0x00,0x80,0xe9,0xd3,0x00,0x03,0xd6,0x03]
@@ -1755,53 +1755,53 @@ v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7
 v_mfma_f32_4x4x2bf16 v[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f32_4x4x2bf16 v[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0xd6,0xfb]
 
-v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[2:17]
-// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[2:17] ; encoding: [0x00,0x80,0xec,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[18:33]
+// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[18:33] ; encoding: [0x00,0x80,0xec,0xd3,0x00,0x03,0x4a,0x04]
 
-v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xec,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xec,0xd3,0x00,0x03,0x4a,0xe4]
 
-v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[2:17]
-// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[2:17] ; encoding: [0x00,0x80,0xec,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[18:33]
+// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[18:33] ; encoding: [0x00,0x80,0xec,0xd3,0x00,0x03,0x4a,0x14]
 
-v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xec,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xec,0xd3,0x00,0x03,0x4a,0xf4]
 
-v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[2:17]
-// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[2:17] ; encoding: [0x00,0x80,0xec,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[18:33]
+// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[18:33] ; encoding: [0x00,0x80,0xec,0xd3,0x00,0x03,0x4a,0x0c]
 
-v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xec,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xec,0xd3,0x00,0x03,0x4a,0xec]
 
-v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[2:17]
-// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[2:17] ; encoding: [0x00,0x80,0xec,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[18:33]
+// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[18:33] ; encoding: [0x00,0x80,0xec,0xd3,0x00,0x03,0x4a,0x1c]
 
-v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xec,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xec,0xd3,0x00,0x03,0x4a,0xfc]
 
-v_mfma_f32_32x32x4bf16 v[0:15], v0, v1, v[2:17]
-// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], v0, v1, v[2:17] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x0a,0x04]
+v_mfma_f32_32x32x4bf16 v[0:15], v0, v1, v[18:33]
+// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], v0, v1, v[18:33] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x4a,0x04]
 
-v_mfma_f32_32x32x4bf16 v[0:15], v0, v1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], v0, v1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x0a,0xe4]
+v_mfma_f32_32x32x4bf16 v[0:15], v0, v1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], v0, v1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x4a,0xe4]
 
-v_mfma_f32_32x32x4bf16 v[0:15], v0, a1, v[2:17]
-// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], v0, a1, v[2:17] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x0a,0x14]
+v_mfma_f32_32x32x4bf16 v[0:15], v0, a1, v[18:33]
+// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], v0, a1, v[18:33] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x4a,0x14]
 
-v_mfma_f32_32x32x4bf16 v[0:15], v0, a1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], v0, a1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x0a,0xf4]
+v_mfma_f32_32x32x4bf16 v[0:15], v0, a1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], v0, a1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x4a,0xf4]
 
-v_mfma_f32_32x32x4bf16 v[0:15], a0, v1, v[2:17]
-// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], a0, v1, v[2:17] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x0a,0x0c]
+v_mfma_f32_32x32x4bf16 v[0:15], a0, v1, v[18:33]
+// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], a0, v1, v[18:33] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x4a,0x0c]
 
-v_mfma_f32_32x32x4bf16 v[0:15], a0, v1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], a0, v1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x0a,0xec]
+v_mfma_f32_32x32x4bf16 v[0:15], a0, v1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], a0, v1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x4a,0xec]
 
-v_mfma_f32_32x32x4bf16 v[0:15], a0, a1, v[2:17]
-// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], a0, a1, v[2:17] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x0a,0x1c]
+v_mfma_f32_32x32x4bf16 v[0:15], a0, a1, v[18:33]
+// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], a0, a1, v[18:33] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x4a,0x1c]
 
-v_mfma_f32_32x32x4bf16 v[0:15], a0, a1, v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], a0, a1, v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x0a,0xfc]
+v_mfma_f32_32x32x4bf16 v[0:15], a0, a1, v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16 v[0:15], a0, a1, v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x4a,0xfc]
 
 v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0
 // GFX90A: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, -2.0 ; encoding: [0x00,0x80,0xec,0xd3,0x00,0x03,0xd6,0x03]
@@ -1947,53 +1947,53 @@ v_mfma_f32_16x16x8bf16 a[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7
 v_mfma_f32_16x16x8bf16 v[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f32_16x16x8bf16 v[0:3], a0, a1, -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xed,0xd3,0x00,0x03,0xd6,0xfb]
 
-v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], a[2:33]
-// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], a[2:33] ; encoding: [0x00,0x80,0xe3,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], a[34:65]
+// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], a[34:65] ; encoding: [0x00,0x80,0xe3,0xd3,0x00,0x05,0x8a,0x04]
 
-v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe3,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe3,0xd3,0x00,0x05,0x8a,0xe4]
 
-v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], a[2:3], a[2:33]
-// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], a[2:3], a[2:33] ; encoding: [0x00,0x80,0xe3,0xd3,0x00,0x05,0x0a,0x14]
+v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], a[2:3], a[34:65]
+// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], a[2:3], a[34:65] ; encoding: [0x00,0x80,0xe3,0xd3,0x00,0x05,0x8a,0x14]
 
-v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], a[2:3], a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], a[2:3], a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe3,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], a[2:3], a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], a[2:3], a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe3,0xd3,0x00,0x05,0x8a,0xf4]
 
-v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], v[2:3], a[2:33]
-// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], v[2:3], a[2:33] ; encoding: [0x00,0x80,0xe3,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], v[2:3], a[34:65]
+// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], v[2:3], a[34:65] ; encoding: [0x00,0x80,0xe3,0xd3,0x00,0x05,0x8a,0x0c]
 
-v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], v[2:3], a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], v[2:3], a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe3,0xd3,0x00,0x05,0x0a,0xec]
+v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], v[2:3], a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], v[2:3], a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe3,0xd3,0x00,0x05,0x8a,0xec]
 
-v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], a[2:3], a[2:33]
-// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], a[2:3], a[2:33] ; encoding: [0x00,0x80,0xe3,0xd3,0x00,0x05,0x0a,0x1c]
+v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], a[2:3], a[34:65]
+// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], a[2:3], a[34:65] ; encoding: [0x00,0x80,0xe3,0xd3,0x00,0x05,0x8a,0x1c]
 
-v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], a[2:3], a[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], a[2:3], a[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe3,0xd3,0x00,0x05,0x0a,0xfc]
+v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], a[2:3], a[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], a[2:3], a[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe3,0xd3,0x00,0x05,0x8a,0xfc]
 
-v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], v[2:3], v[2:33]
-// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], v[2:3], v[2:33] ; encoding: [0x00,0x00,0xe3,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], v[2:3], v[34:65]
+// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], v[2:3], v[34:65] ; encoding: [0x00,0x00,0xe3,0xd3,0x00,0x05,0x8a,0x04]
 
-v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], v[2:3], v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], v[2:3], v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe3,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], v[2:3], v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], v[2:3], v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe3,0xd3,0x00,0x05,0x8a,0xe4]
 
-v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], a[2:3], v[2:33]
-// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], a[2:3], v[2:33] ; encoding: [0x00,0x00,0xe3,0xd3,0x00,0x05,0x0a,0x14]
+v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], a[2:3], v[34:65]
+// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], a[2:3], v[34:65] ; encoding: [0x00,0x00,0xe3,0xd3,0x00,0x05,0x8a,0x14]
 
-v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], a[2:3], v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], a[2:3], v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe3,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], a[2:3], v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], v[0:1], a[2:3], v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe3,0xd3,0x00,0x05,0x8a,0xf4]
 
-v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], v[2:3], v[2:33]
-// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], v[2:3], v[2:33] ; encoding: [0x00,0x00,0xe3,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], v[2:3], v[34:65]
+// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], v[2:3], v[34:65] ; encoding: [0x00,0x00,0xe3,0xd3,0x00,0x05,0x8a,0x0c]
 
-v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], v[2:3], v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], v[2:3], v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe3,0xd3,0x00,0x05,0x0a,0xec]
+v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], v[2:3], v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], v[2:3], v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe3,0xd3,0x00,0x05,0x8a,0xec]
 
-v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], a[2:3], v[2:33]
-// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], a[2:3], v[2:33] ; encoding: [0x00,0x00,0xe3,0xd3,0x00,0x05,0x0a,0x1c]
+v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], a[2:3], v[34:65]
+// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], a[2:3], v[34:65] ; encoding: [0x00,0x00,0xe3,0xd3,0x00,0x05,0x8a,0x1c]
 
-v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], a[2:3], v[2:33] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], a[2:3], v[2:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe3,0xd3,0x00,0x05,0x0a,0xfc]
+v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], a[2:3], v[34:65] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], a[2:3], v[34:65] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe3,0xd3,0x00,0x05,0x8a,0xfc]
 
 v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], -2.0
 // GFX90A: v_mfma_f32_32x32x4bf16_1k a[0:31], v[0:1], v[2:3], -2.0 ; encoding: [0x00,0x80,0xe3,0xd3,0x00,0x05,0xd6,0x03]
@@ -2043,53 +2043,53 @@ v_mfma_f32_32x32x4bf16_1k a[0:31], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7
 v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f32_32x32x4bf16_1k v[0:31], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe3,0xd3,0x00,0x05,0xd6,0xfb]
 
-v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], a[2:17]
-// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], a[2:17] ; encoding: [0x00,0x80,0xe4,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], a[18:33]
+// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xe4,0xd3,0x00,0x05,0x4a,0x04]
 
-v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe4,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe4,0xd3,0x00,0x05,0x4a,0xe4]
 
-v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], a[2:3], a[2:17]
-// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], a[2:3], a[2:17] ; encoding: [0x00,0x80,0xe4,0xd3,0x00,0x05,0x0a,0x14]
+v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], a[2:3], a[18:33]
+// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], a[2:3], a[18:33] ; encoding: [0x00,0x80,0xe4,0xd3,0x00,0x05,0x4a,0x14]
 
-v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe4,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe4,0xd3,0x00,0x05,0x4a,0xf4]
 
-v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], v[2:3], a[2:17]
-// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], v[2:3], a[2:17] ; encoding: [0x00,0x80,0xe4,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], v[2:3], a[18:33]
+// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xe4,0xd3,0x00,0x05,0x4a,0x0c]
 
-v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe4,0xd3,0x00,0x05,0x0a,0xec]
+v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe4,0xd3,0x00,0x05,0x4a,0xec]
 
-v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], a[2:3], a[2:17]
-// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], a[2:3], a[2:17] ; encoding: [0x00,0x80,0xe4,0xd3,0x00,0x05,0x0a,0x1c]
+v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], a[2:3], a[18:33]
+// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], a[2:3], a[18:33] ; encoding: [0x00,0x80,0xe4,0xd3,0x00,0x05,0x4a,0x1c]
 
-v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe4,0xd3,0x00,0x05,0x0a,0xfc]
+v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], a[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe4,0xd3,0x00,0x05,0x4a,0xfc]
 
-v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], v[2:3], v[2:17]
-// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], v[2:3], v[2:17] ; encoding: [0x00,0x00,0xe4,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], v[2:3], v[18:33]
+// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xe4,0xd3,0x00,0x05,0x4a,0x04]
 
-v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe4,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe4,0xd3,0x00,0x05,0x4a,0xe4]
 
-v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], a[2:3], v[2:17]
-// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], a[2:3], v[2:17] ; encoding: [0x00,0x00,0xe4,0xd3,0x00,0x05,0x0a,0x14]
+v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], a[2:3], v[18:33]
+// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], a[2:3], v[18:33] ; encoding: [0x00,0x00,0xe4,0xd3,0x00,0x05,0x4a,0x14]
 
-v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe4,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], v[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe4,0xd3,0x00,0x05,0x4a,0xf4]
 
-v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], v[2:3], v[2:17]
-// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], v[2:3], v[2:17] ; encoding: [0x00,0x00,0xe4,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], v[2:3], v[18:33]
+// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xe4,0xd3,0x00,0x05,0x4a,0x0c]
 
-v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe4,0xd3,0x00,0x05,0x0a,0xec]
+v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe4,0xd3,0x00,0x05,0x4a,0xec]
 
-v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], a[2:3], v[2:17]
-// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], a[2:3], v[2:17] ; encoding: [0x00,0x00,0xe4,0xd3,0x00,0x05,0x0a,0x1c]
+v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], a[2:3], v[18:33]
+// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], a[2:3], v[18:33] ; encoding: [0x00,0x00,0xe4,0xd3,0x00,0x05,0x4a,0x1c]
 
-v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe4,0xd3,0x00,0x05,0x0a,0xfc]
+v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_16x16x4bf16_1k v[0:15], a[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe4,0xd3,0x00,0x05,0x4a,0xfc]
 
 v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], -2.0
 // GFX90A: v_mfma_f32_16x16x4bf16_1k a[0:15], v[0:1], v[2:3], -2.0 ; encoding: [0x00,0x80,0xe4,0xd3,0x00,0x05,0xd6,0x03]
@@ -2235,53 +2235,53 @@ v_mfma_f32_4x4x4bf16_1k a[0:3], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7
 v_mfma_f32_4x4x4bf16_1k v[0:3], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f32_4x4x4bf16_1k v[0:3], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe5,0xd3,0x00,0x05,0xd6,0xfb]
 
-v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], a[2:17]
-// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], a[2:17] ; encoding: [0x00,0x80,0xe6,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], a[18:33]
+// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xe6,0xd3,0x00,0x05,0x4a,0x04]
 
-v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe6,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe6,0xd3,0x00,0x05,0x4a,0xe4]
 
-v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], a[2:3], a[2:17]
-// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], a[2:3], a[2:17] ; encoding: [0x00,0x80,0xe6,0xd3,0x00,0x05,0x0a,0x14]
+v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], a[2:3], a[18:33]
+// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], a[2:3], a[18:33] ; encoding: [0x00,0x80,0xe6,0xd3,0x00,0x05,0x4a,0x14]
 
-v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe6,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe6,0xd3,0x00,0x05,0x4a,0xf4]
 
-v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], v[2:3], a[2:17]
-// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], v[2:3], a[2:17] ; encoding: [0x00,0x80,0xe6,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], v[2:3], a[18:33]
+// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], v[2:3], a[18:33] ; encoding: [0x00,0x80,0xe6,0xd3,0x00,0x05,0x4a,0x0c]
 
-v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], v[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe6,0xd3,0x00,0x05,0x0a,0xec]
+v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], v[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe6,0xd3,0x00,0x05,0x4a,0xec]
 
-v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], a[2:3], a[2:17]
-// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], a[2:3], a[2:17] ; encoding: [0x00,0x80,0xe6,0xd3,0x00,0x05,0x0a,0x1c]
+v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], a[2:3], a[18:33]
+// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], a[2:3], a[18:33] ; encoding: [0x00,0x80,0xe6,0xd3,0x00,0x05,0x4a,0x1c]
 
-v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], a[2:3], a[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe6,0xd3,0x00,0x05,0x0a,0xfc]
+v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], a[0:1], a[2:3], a[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xe6,0xd3,0x00,0x05,0x4a,0xfc]
 
-v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], v[2:3], v[2:17]
-// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], v[2:3], v[2:17] ; encoding: [0x00,0x00,0xe6,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], v[2:3], v[18:33]
+// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xe6,0xd3,0x00,0x05,0x4a,0x04]
 
-v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe6,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe6,0xd3,0x00,0x05,0x4a,0xe4]
 
-v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], a[2:3], v[2:17]
-// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], a[2:3], v[2:17] ; encoding: [0x00,0x00,0xe6,0xd3,0x00,0x05,0x0a,0x14]
+v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], a[2:3], v[18:33]
+// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], a[2:3], v[18:33] ; encoding: [0x00,0x00,0xe6,0xd3,0x00,0x05,0x4a,0x14]
 
-v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe6,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], v[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe6,0xd3,0x00,0x05,0x4a,0xf4]
 
-v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], v[2:3], v[2:17]
-// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], v[2:3], v[2:17] ; encoding: [0x00,0x00,0xe6,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], v[2:3], v[18:33]
+// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], v[2:3], v[18:33] ; encoding: [0x00,0x00,0xe6,0xd3,0x00,0x05,0x4a,0x0c]
 
-v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], v[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe6,0xd3,0x00,0x05,0x0a,0xec]
+v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], v[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe6,0xd3,0x00,0x05,0x4a,0xec]
 
-v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], a[2:3], v[2:17]
-// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], a[2:3], v[2:17] ; encoding: [0x00,0x00,0xe6,0xd3,0x00,0x05,0x0a,0x1c]
+v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], a[2:3], v[18:33]
+// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], a[2:3], v[18:33] ; encoding: [0x00,0x00,0xe6,0xd3,0x00,0x05,0x4a,0x1c]
 
-v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], a[2:3], v[2:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe6,0xd3,0x00,0x05,0x0a,0xfc]
+v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f32_32x32x8bf16_1k v[0:15], a[0:1], a[2:3], v[18:33] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe6,0xd3,0x00,0x05,0x4a,0xfc]
 
 v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], -2.0
 // GFX90A: v_mfma_f32_32x32x8bf16_1k a[0:15], v[0:1], v[2:3], -2.0 ; encoding: [0x00,0x80,0xe6,0xd3,0x00,0x05,0xd6,0x03]
@@ -2427,11 +2427,11 @@ v_mfma_f32_16x16x16bf16_1k a[0:3], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7
 v_mfma_f32_16x16x16bf16_1k v[0:3], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f32_16x16x16bf16_1k v[0:3], a[0:1], a[2:3], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe7,0xd3,0x00,0x05,0xd6,0xfb]
 
-v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[2:9]
-// GFX90A: v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[2:9] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[10:17]
+// GFX90A: v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[10:17] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x2a,0x04]
 
-v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[2:9] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[2:9] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xee,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[10:17] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], v[10:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xee,0xd3,0x00,0x05,0x2a,0xe4]
 
 v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], -2.0
 // GFX90A: v_mfma_f64_16x16x4f64 v[0:7], v[0:1], v[2:3], -2.0 ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0xd6,0x03]
@@ -2451,11 +2451,11 @@ v_mfma_f64_4x4x4f64 v[0:1], v[0:1], v[2:3], -2.0
 v_mfma_f64_4x4x4f64 v[0:1], v[0:1], v[2:3], -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f64_4x4x4f64 v[0:1], v[0:1], v[2:3], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xef,0xd3,0x00,0x05,0xd6,0xe3]
 
-v_mfma_f64_16x16x4f64 v[0:7], a[0:1], v[2:3], v[2:9]
-// GFX90A: v_mfma_f64_16x16x4f64 v[0:7], a[0:1], v[2:3], v[2:9] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f64_16x16x4f64 v[0:7], a[0:1], v[2:3], v[10:17]
+// GFX90A: v_mfma_f64_16x16x4f64 v[0:7], a[0:1], v[2:3], v[10:17] ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0x2a,0x0c]
 
-v_mfma_f64_16x16x4f64 v[0:7], v[0:1], a[2:3], v[2:9] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f64_16x16x4f64 v[0:7], v[0:1], a[2:3], v[2:9] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xee,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f64_16x16x4f64 v[0:7], v[0:1], a[2:3], v[10:17] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f64_16x16x4f64 v[0:7], v[0:1], a[2:3], v[10:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xee,0xd3,0x00,0x05,0x2a,0xf4]
 
 v_mfma_f64_16x16x4f64 v[0:7], a[0:1], a[2:3], -2.0
 // GFX90A: v_mfma_f64_16x16x4f64 v[0:7], a[0:1], a[2:3], -2.0 ; encoding: [0x00,0x00,0xee,0xd3,0x00,0x05,0xd6,0x1b]
@@ -2469,11 +2469,11 @@ v_mfma_f64_4x4x4f64 v[0:1], v[0:1], a[2:3], v[2:3] cbsz:3 abid:2 blgp:7
 v_mfma_f64_4x4x4f64 v[0:1], a[0:1], a[2:3], -2.0
 // GFX90A: v_mfma_f64_4x4x4f64 v[0:1], a[0:1], a[2:3], -2.0 ; encoding: [0x00,0x00,0xef,0xd3,0x00,0x05,0xd6,0x1b]
 
-v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[2:9]
-// GFX90A: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[2:9] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x0a,0x04]
+v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[10:17]
+// GFX90A: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[10:17] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x2a,0x04]
 
-v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[2:9] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[2:9] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xee,0xd3,0x00,0x05,0x0a,0xe4]
+v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[10:17] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], a[10:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xee,0xd3,0x00,0x05,0x2a,0xe4]
 
 v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], -2.0
 // GFX90A: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], v[2:3], -2.0 ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0xd6,0x03]
@@ -2496,11 +2496,11 @@ v_mfma_f64_4x4x4f64 a[0:1], v[0:1], v[2:3], 0
 v_mfma_f64_4x4x4f64 a[0:1], v[0:1], v[2:3], -2.0 cbsz:3 abid:2 blgp:7
 // GFX90A: v_mfma_f64_4x4x4f64 a[0:1], v[0:1], v[2:3], -2.0 cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xef,0xd3,0x00,0x05,0xd6,0xe3]
 
-v_mfma_f64_16x16x4f64 a[0:7], a[0:1], v[2:3], a[2:9]
-// GFX90A: v_mfma_f64_16x16x4f64 a[0:7], a[0:1], v[2:3], a[2:9] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x0a,0x0c]
+v_mfma_f64_16x16x4f64 a[0:7], a[0:1], v[2:3], a[10:17]
+// GFX90A: v_mfma_f64_16x16x4f64 a[0:7], a[0:1], v[2:3], a[10:17] ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0x2a,0x0c]
 
-v_mfma_f64_16x16x4f64 a[0:7], v[0:1], a[2:3], a[2:9] cbsz:3 abid:2 blgp:7
-// GFX90A: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], a[2:3], a[2:9] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xee,0xd3,0x00,0x05,0x0a,0xf4]
+v_mfma_f64_16x16x4f64 a[0:7], v[0:1], a[2:3], a[10:17] cbsz:3 abid:2 blgp:7
+// GFX90A: v_mfma_f64_16x16x4f64 a[0:7], v[0:1], a[2:3], a[10:17] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x93,0xee,0xd3,0x00,0x05,0x2a,0xf4]
 
 v_mfma_f64_16x16x4f64 a[0:7], a[0:1], a[2:3], -2.0
 // GFX90A: v_mfma_f64_16x16x4f64 a[0:7], a[0:1], a[2:3], -2.0 ; encoding: [0x00,0x80,0xee,0xd3,0x00,0x05,0xd6,0x1b]
diff --git a/llvm/test/MC/AMDGPU/mai.s b/llvm/test/MC/AMDGPU/mai.s
index 5629df067590b..221d3b3f029da 100644
--- a/llvm/test/MC/AMDGPU/mai.s
+++ b/llvm/test/MC/AMDGPU/mai.s
@@ -43,53 +43,59 @@ v_accvgpr_write a2, shared_base
 v_accvgpr_write a2, pops_exiting_wave_id
 // NOGFX908: error: source operand must be either a VGPR or an inline constant
 
-v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[1:32]
-// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x04]
+v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31]
+// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[0:31] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x02,0x04]
 
-v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xe4]
+v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[33:64]
+// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[33:64] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x86,0x04]
 
-v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[1:32]
-// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x14]
+v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x86,0xe4]
 
-v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xf4]
+v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[33:64]
+// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[33:64] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x86,0x14]
 
-v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[1:32]
-// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x0c]
+v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x1f32 a[0:31], v0, a1, a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x86,0xf4]
 
-v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xec]
+v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[33:64]
+// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[33:64] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x86,0x0c]
 
-v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[1:32]
-// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[1:32] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x06,0x1c]
+v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, v1, a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x86,0xec]
 
-v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x06,0xfc]
+v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[33:64]
+// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[33:64] ; encoding: [0x00,0x00,0xc0,0xd3,0x00,0x03,0x86,0x1c]
 
-v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[1:16]
-// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x04]
+v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x1f32 a[0:31], a0, a1, a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc0,0xd3,0x00,0x03,0x86,0xfc]
 
-v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xe4]
+v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[17:32]
+// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[17:32] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x46,0x04]
 
-v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[1:16]
-// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x14]
+v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, v1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x46,0xe4]
 
-v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xf4]
+v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[17:32]
+// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[17:32] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x46,0x14]
 
-v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[1:16]
-// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x0c]
+v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_16x16x1f32 a[0:15], v0, a1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x46,0xf4]
 
-v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xec]
+v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[17:32]
+// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[17:32] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x46,0x0c]
 
-v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[1:16]
-// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x06,0x1c]
+v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x46,0xec]
 
-v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x06,0xfc]
+v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[17:32]
+// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[17:32] ; encoding: [0x00,0x00,0xc1,0xd3,0x00,0x03,0x46,0x1c]
+
+v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_16x16x1f32 a[0:15], a0, a1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc1,0xd3,0x00,0x03,0x46,0xfc]
+
+v_mfma_f32_4x4x1f32 a[0:3], v0, v1, a[1:4]
+// GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x04]
 
 v_mfma_f32_4x4x1f32 a[0:3], v0, v1, a[1:4]
 // GFX908: v_mfma_f32_4x4x1f32 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xc2,0xd3,0x00,0x03,0x06,0x04]
@@ -115,29 +121,29 @@ v_mfma_f32_4x4x1f32 a[0:3], a0, a1, a[1:4]
 v_mfma_f32_4x4x1f32 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7
 // GFX908: v_mfma_f32_4x4x1f32 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc2,0xd3,0x00,0x03,0x06,0xfc]
 
-v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[1:16]
-// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x04]
+v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[17:32]
+// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[17:32] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x46,0x04]
 
-v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xe4]
+v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, v1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x46,0xe4]
 
-v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[1:16]
-// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x14]
+v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[17:32]
+// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[17:32] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x46,0x14]
 
-v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xf4]
+v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x2f32 a[0:15], v0, a1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x46,0xf4]
 
-v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[1:16]
-// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x0c]
+v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[17:32]
+// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[17:32] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x46,0x0c]
 
-v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xec]
+v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, v1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x46,0xec]
 
-v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[1:16]
-// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x06,0x1c]
+v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[17:32]
+// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[17:32] ; encoding: [0x00,0x00,0xc4,0xd3,0x00,0x03,0x46,0x1c]
 
-v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x06,0xfc]
+v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x2f32 a[0:15], a0, a1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc4,0xd3,0x00,0x03,0x46,0xfc]
 
 v_mfma_f32_16x16x4f32 a[0:3], v0, v1, a[1:4]
 // GFX908: v_mfma_f32_16x16x4f32 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xc5,0xd3,0x00,0x03,0x06,0x04]
@@ -163,53 +169,53 @@ v_mfma_f32_16x16x4f32 a[0:3], a0, a1, a[1:4]
 v_mfma_f32_16x16x4f32 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7
 // GFX908: v_mfma_f32_16x16x4f32 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc5,0xd3,0x00,0x03,0x06,0xfc]
 
-v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[1:32]
-// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[1:32] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x04]
+v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[33:64]
+// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[33:64] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x86,0x04]
 
-v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xe4]
+v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], v[1:2], a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x86,0xe4]
 
-v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[1:32]
-// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[1:32] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x14]
+v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[33:64]
+// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[33:64] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x86,0x14]
 
-v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xf4]
+v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x4f16 a[0:31], v[0:1], a[1:2], a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x86,0xf4]
 
-v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[1:32]
-// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[1:32] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x0c]
+v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[33:64]
+// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[33:64] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x86,0x0c]
 
-v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xec]
+v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], v[1:2], a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x86,0xec]
 
-v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[1:32]
-// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[1:32] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x06,0x1c]
+v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[33:64]
+// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[33:64] ; encoding: [0x00,0x00,0xc8,0xd3,0x00,0x03,0x86,0x1c]
 
-v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x06,0xfc]
+v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x4f16 a[0:31], a[0:1], a[1:2], a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc8,0xd3,0x00,0x03,0x86,0xfc]
 
-v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[1:16]
-// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[1:16] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x04]
+v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[17:32]
+// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[17:32] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x46,0x04]
 
-v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xe4]
+v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], v[1:2], a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x46,0xe4]
 
-v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[1:16]
-// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[1:16] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x14]
+v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[17:32]
+// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[17:32] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x46,0x14]
 
-v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xf4]
+v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_16x16x4f16 a[0:15], v[0:1], a[1:2], a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x46,0xf4]
 
-v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[1:16]
-// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[1:16] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x0c]
+v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[17:32]
+// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[17:32] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x46,0x0c]
 
-v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xec]
+v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], v[1:2], a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x46,0xec]
 
-v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[1:16]
-// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[1:16] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x06,0x1c]
+v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[17:32]
+// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[17:32] ; encoding: [0x00,0x00,0xc9,0xd3,0x00,0x03,0x46,0x1c]
 
-v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x06,0xfc]
+v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_16x16x4f16 a[0:15], a[0:1], a[1:2], a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xc9,0xd3,0x00,0x03,0x46,0xfc]
 
 v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], a[1:4]
 // GFX908: v_mfma_f32_4x4x4f16 a[0:3], v[0:1], v[1:2], a[1:4] ; encoding: [0x00,0x00,0xca,0xd3,0x00,0x03,0x06,0x04]
@@ -235,29 +241,29 @@ v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], a[1:4]
 v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7
 // GFX908: v_mfma_f32_4x4x4f16 a[0:3], a[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xca,0xd3,0x00,0x03,0x06,0xfc]
 
-v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[1:16]
-// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[1:16] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x04]
+v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[17:32]
+// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[17:32] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x46,0x04]
 
-v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xe4]
+v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], v[1:2], a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x46,0xe4]
 
-v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[1:16]
-// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[1:16] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x14]
+v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[17:32]
+// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[17:32] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x46,0x14]
 
-v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xf4]
+v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x8f16 a[0:15], v[0:1], a[1:2], a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x46,0xf4]
 
-v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[1:16]
-// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[1:16] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x0c]
+v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[17:32]
+// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[17:32] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x46,0x0c]
 
-v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xec]
+v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], v[1:2], a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x46,0xec]
 
-v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[1:16]
-// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[1:16] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x06,0x1c]
+v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[17:32]
+// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[17:32] ; encoding: [0x00,0x00,0xcc,0xd3,0x00,0x03,0x46,0x1c]
 
-v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x06,0xfc]
+v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x8f16 a[0:15], a[0:1], a[1:2], a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcc,0xd3,0x00,0x03,0x46,0xfc]
 
 v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], a[1:4]
 // GFX908: v_mfma_f32_16x16x16f16 a[0:3], v[0:1], v[1:2], a[1:4] ; encoding: [0x00,0x00,0xcd,0xd3,0x00,0x03,0x06,0x04]
@@ -283,53 +289,53 @@ v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], a[1:4]
 v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7
 // GFX908: v_mfma_f32_16x16x16f16 a[0:3], a[0:1], a[1:2], a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xcd,0xd3,0x00,0x03,0x06,0xfc]
 
-v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[1:32]
-// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[1:32] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x04]
+v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[33:64]
+// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[33:64] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x86,0x04]
 
-v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xe4]
+v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, v1, a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x86,0xe4]
 
-v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[1:32]
-// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[1:32] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x14]
+v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[33:64]
+// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[33:64] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x86,0x14]
 
-v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xf4]
+v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_i32_32x32x4i8 a[0:31], v0, a1, a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x86,0xf4]
 
-v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[1:32]
-// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[1:32] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x0c]
+v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[33:64]
+// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[33:64] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x86,0x0c]
 
-v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xec]
+v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, v1, a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x86,0xec]
 
-v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[1:32]
-// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[1:32] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x06,0x1c]
+v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[33:64]
+// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[33:64] ; encoding: [0x00,0x00,0xd0,0xd3,0x00,0x03,0x86,0x1c]
 
-v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x06,0xfc]
+v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_i32_32x32x4i8 a[0:31], a0, a1, a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd0,0xd3,0x00,0x03,0x86,0xfc]
 
-v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[1:16]
-// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x04]
+v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[17:32]
+// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[17:32] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x46,0x04]
 
-v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xe4]
+v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, v1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x46,0xe4]
 
-v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[1:16]
-// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x14]
+v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[17:32]
+// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[17:32] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x46,0x14]
 
-v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xf4]
+v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_i32_16x16x4i8 a[0:15], v0, a1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x46,0xf4]
 
-v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[1:16]
-// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x0c]
+v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[17:32]
+// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[17:32] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x46,0x0c]
 
-v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xec]
+v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, v1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x46,0xec]
 
-v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[1:16]
-// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x06,0x1c]
+v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[17:32]
+// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[17:32] ; encoding: [0x00,0x00,0xd1,0xd3,0x00,0x03,0x46,0x1c]
 
-v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x06,0xfc]
+v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_i32_16x16x4i8 a[0:15], a0, a1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd1,0xd3,0x00,0x03,0x46,0xfc]
 
 v_mfma_i32_4x4x4i8 a[0:3], v0, v1, a[1:4]
 // GFX908: v_mfma_i32_4x4x4i8 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xd2,0xd3,0x00,0x03,0x06,0x04]
@@ -355,29 +361,29 @@ v_mfma_i32_4x4x4i8 a[0:3], a0, a1, a[1:4]
 v_mfma_i32_4x4x4i8 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7
 // GFX908: v_mfma_i32_4x4x4i8 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd2,0xd3,0x00,0x03,0x06,0xfc]
 
-v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[1:16]
-// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x04]
+v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[17:32]
+// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[17:32] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x46,0x04]
 
-v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xe4]
+v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, v1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x46,0xe4]
 
-v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[1:16]
-// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x14]
+v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[17:32]
+// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[17:32] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x46,0x14]
 
-v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xf4]
+v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_i32_32x32x8i8 a[0:15], v0, a1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x46,0xf4]
 
-v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[1:16]
-// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x0c]
+v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[17:32]
+// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[17:32] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x46,0x0c]
 
-v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xec]
+v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, v1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x46,0xec]
 
-v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[1:16]
-// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x06,0x1c]
+v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[17:32]
+// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[17:32] ; encoding: [0x00,0x00,0xd4,0xd3,0x00,0x03,0x46,0x1c]
 
-v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x06,0xfc]
+v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_i32_32x32x8i8 a[0:15], a0, a1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd4,0xd3,0x00,0x03,0x46,0xfc]
 
 v_mfma_i32_16x16x16i8 a[0:3], v0, v1, a[1:4]
 // GFX908: v_mfma_i32_16x16x16i8 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xd5,0xd3,0x00,0x03,0x06,0x04]
@@ -403,53 +409,53 @@ v_mfma_i32_16x16x16i8 a[0:3], a0, a1, a[1:4]
 v_mfma_i32_16x16x16i8 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7
 // GFX908: v_mfma_i32_16x16x16i8 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xd5,0xd3,0x00,0x03,0x06,0xfc]
 
-v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[1:32]
-// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[1:32] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x04]
+v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[33:64]
+// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[33:64] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x86,0x04]
 
-v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xe4]
+v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, v1, a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x86,0xe4]
 
-v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[1:32]
-// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[1:32] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x14]
+v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[33:64]
+// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[33:64] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x86,0x14]
 
-v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xf4]
+v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], v0, a1, a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x86,0xf4]
 
-v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[1:32]
-// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[1:32] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x0c]
+v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[33:64]
+// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[33:64] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x86,0x0c]
 
-v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xec]
+v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, v1, a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x86,0xec]
 
-v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[1:32]
-// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[1:32] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x06,0x1c]
+v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[33:64]
+// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[33:64] ; encoding: [0x00,0x00,0xe8,0xd3,0x00,0x03,0x86,0x1c]
 
-v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[1:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x06,0xfc]
+v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[33:64] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x2bf16 a[0:31], a0, a1, a[33:64] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe8,0xd3,0x00,0x03,0x86,0xfc]
 
-v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[1:16]
-// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x04]
+v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[17:32]
+// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[17:32] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x46,0x04]
 
-v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xe4]
+v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, v1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x46,0xe4]
 
-v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[1:16]
-// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x14]
+v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[17:32]
+// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[17:32] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x46,0x14]
 
-v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xf4]
+v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], v0, a1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x46,0xf4]
 
-v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[1:16]
-// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x0c]
+v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[17:32]
+// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[17:32] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x46,0x0c]
 
-v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xec]
+v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, v1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x46,0xec]
 
-v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[1:16]
-// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x06,0x1c]
+v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[17:32]
+// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[17:32] ; encoding: [0x00,0x00,0xe9,0xd3,0x00,0x03,0x46,0x1c]
 
-v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x06,0xfc]
+v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_16x16x2bf16 a[0:15], a0, a1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xe9,0xd3,0x00,0x03,0x46,0xfc]
 
 v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, a[1:4]
 // GFX908: v_mfma_f32_4x4x2bf16 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xeb,0xd3,0x00,0x03,0x06,0x04]
@@ -475,29 +481,29 @@ v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, a[1:4]
 v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7
 // GFX908: v_mfma_f32_4x4x2bf16 a[0:3], a0, a1, a[1:4] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xeb,0xd3,0x00,0x03,0x06,0xfc]
 
-v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[1:16]
-// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[1:16] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x04]
+v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[17:32]
+// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[17:32] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x46,0x04]
 
-v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xe4]
+v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, v1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x46,0xe4]
 
-v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[1:16]
-// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[1:16] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x14]
+v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[17:32]
+// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[17:32] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x46,0x14]
 
-v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xf4]
+v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], v0, a1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x46,0xf4]
 
-v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[1:16]
-// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[1:16] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x0c]
+v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[17:32]
+// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[17:32] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x46,0x0c]
 
-v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xec]
+v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, v1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x46,0xec]
 
-v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[1:16]
-// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[1:16] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x06,0x1c]
+v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[17:32]
+// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[17:32] ; encoding: [0x00,0x00,0xec,0xd3,0x00,0x03,0x46,0x1c]
 
-v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7
-// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[1:16] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x06,0xfc]
+v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[17:32] cbsz:3 abid:2 blgp:7
+// GFX908: v_mfma_f32_32x32x4bf16 a[0:15], a0, a1, a[17:32] cbsz:3 abid:2 blgp:7 ; encoding: [0x00,0x13,0xec,0xd3,0x00,0x03,0x46,0xfc]
 
 v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, a[1:4]
 // GFX908: v_mfma_f32_16x16x8bf16 a[0:3], v0, v1, a[1:4] ; encoding: [0x00,0x00,0xed,0xd3,0x00,0x03,0x06,0x04]
diff --git a/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s b/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s
index d2cd4383bb417..4056a8ac90333 100644
--- a/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s
+++ b/llvm/test/MC/AMDGPU/misaligned-vgpr-tuples-err.s
@@ -18,8 +18,8 @@ v_mfma_f32_32x32x8f16 a[0:15], a[1:2], v[0:1], a[0:15]
 v_mfma_i32_4x4x4i8 a[1:4], a0, v1, 2
 // GFX90A: error: invalid register class: vgpr tuples must be 64 bit aligned
 
-v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[1:16]
+v_mfma_f32_16x16x1f32 a[0:15], a0, v1, a[17:32]
 // GFX90A: error: invalid register class: vgpr tuples must be 64 bit aligned
 
-v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[1:32]
+v_mfma_f32_32x32x1f32 a[0:31], v0, v1, a[33:64]
 // GFX90A: error: invalid register class: vgpr tuples must be 64 bit aligned

From dcc595ea3cc7958f0fc3819e73b1c8b2c7c72802 Mon Sep 17 00:00:00 2001
From: Vladislav Khmelevsky <och95@yandex.ru>
Date: Thu, 27 Jan 2022 01:59:10 +0300
Subject: [PATCH 806/946] [BOLT] Fix DWARFv5 for aarch64

This patch reverts patch "DWARFv5 default: Switch bolt tests to use
DWARFv4 since Bolt doesn't support v5 yet" and places the -gdwarf-4 flag
to the global cflags config file.

Reviewed By: Amir

Differential Revision: https://reviews.llvm.org/D118283
---
 bolt/test/AArch64/asm-func-debug.test     | 2 +-
 bolt/test/AArch64/update-debug-reloc.test | 2 +-
 bolt/test/X86/asm-func-debug.test         | 2 +-
 bolt/test/X86/inline-debug-info.test      | 2 +-
 bolt/test/X86/inlined-function-mixed.test | 2 +-
 bolt/test/keep-aranges.test               | 4 ++--
 bolt/test/lit.cfg.py                      | 4 ++--
 bolt/test/non-empty-debug-line.test       | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/bolt/test/AArch64/asm-func-debug.test b/bolt/test/AArch64/asm-func-debug.test
index 57b43904371ba..8504eda026153 100644
--- a/bolt/test/AArch64/asm-func-debug.test
+++ b/bolt/test/AArch64/asm-func-debug.test
@@ -3,7 +3,7 @@
 #
 # The input test case foo() contains nops that we remove.
 
-RUN: %clang -g %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe
+RUN: %clang %cflags -g %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe
 RUN: llvm-bolt %t.exe -o %t -update-debug-sections
 RUN: llvm-dwarfdump -all %t | FileCheck %s
 
diff --git a/bolt/test/AArch64/update-debug-reloc.test b/bolt/test/AArch64/update-debug-reloc.test
index 88eff91655f57..873a886ae1ee8 100644
--- a/bolt/test/AArch64/update-debug-reloc.test
+++ b/bolt/test/AArch64/update-debug-reloc.test
@@ -1,7 +1,7 @@
 # Verify that bolt correctly handle AArch64 relocations in case of enabling
 # update-debug-sections option.
 
-RUN: %clang -g %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe
+RUN: %clang %cflags -g %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe
 RUN: llvm-bolt %t.exe -o %t -update-debug-sections
 
 CHECK: BOLT-INFO: Target architecture: aarch64
diff --git a/bolt/test/X86/asm-func-debug.test b/bolt/test/X86/asm-func-debug.test
index a022ff0822b31..9590701969712 100644
--- a/bolt/test/X86/asm-func-debug.test
+++ b/bolt/test/X86/asm-func-debug.test
@@ -3,7 +3,7 @@
 #
 # The input test case foo() contains nops that we remove.
 
-RUN: %clang -gdwarf-4 %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe
+RUN: %clang -g %cflags %p/../Inputs/asm_foo.s %p/../Inputs/asm_main.c -o %t.exe
 RUN: llvm-bolt %t.exe -o %t -update-debug-sections
 RUN: llvm-dwarfdump -all %t | FileCheck %s
 
diff --git a/bolt/test/X86/inline-debug-info.test b/bolt/test/X86/inline-debug-info.test
index cab3fb9baa22e..300fa0be78914 100644
--- a/bolt/test/X86/inline-debug-info.test
+++ b/bolt/test/X86/inline-debug-info.test
@@ -3,7 +3,7 @@
 
 # REQUIRES: system-linux
 
-# RUN: %clang %cflags -O1 -gdwarf-4 %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
+# RUN: %clang %cflags -O1 -g %p/Inputs/inline-main.c %p/Inputs/inline-foo.c \
 # RUN:   -o %t.exe -Wl,-q
 # RUN: llvm-bolt %t.exe -update-debug-sections -print-debug-info \
 # RUN:   -print-only=main -print-after-lowering -force-inline=foo -o %t.bolt \
diff --git a/bolt/test/X86/inlined-function-mixed.test b/bolt/test/X86/inlined-function-mixed.test
index d2ed274ae6cc5..5b35496e85eed 100644
--- a/bolt/test/X86/inlined-function-mixed.test
+++ b/bolt/test/X86/inlined-function-mixed.test
@@ -2,7 +2,7 @@
 # debug info does not cause a crash.
 
 RUN: %clangxx %S/Inputs/inlined.cpp -c -o %T/inlined.o
-RUN: %clangxx %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -gdwarf-4
+RUN: %clangxx %cxxflags %S/Inputs/inlinee.cpp -c -o %T/inlinee.o -g
 RUN: %clangxx %T/inlined.o %T/inlinee.o -o %t
 
 RUN: llvm-bolt %t -o %t.bolt -update-debug-sections -reorder-blocks=reverse \
diff --git a/bolt/test/keep-aranges.test b/bolt/test/keep-aranges.test
index bd33686005048..ada08b54f0105 100644
--- a/bolt/test/keep-aranges.test
+++ b/bolt/test/keep-aranges.test
@@ -3,8 +3,8 @@
 
 REQUIRES: system-linux
 
-RUN: %clang %S/Inputs/icf_baz.c %S/Inputs/icf_main.c -Wl,--icf=all,--gdb-index \
-RUN:   -gdwarf-4 -o %t.exe -fuse-ld=lld
+RUN: %clang %cflags %S/Inputs/icf_baz.c %S/Inputs/icf_main.c -Wl,--icf=all,--gdb-index \
+RUN:   -g -o %t.exe -fuse-ld=lld
 RUN: llvm-bolt %t.exe -o %t -update-debug-sections -keep-aranges
 RUN: llvm-dwarfdump -debug-aranges %t | FileCheck %s
 
diff --git a/bolt/test/lit.cfg.py b/bolt/test/lit.cfg.py
index 0d8583c6f7142..68b4fc2a247ec 100644
--- a/bolt/test/lit.cfg.py
+++ b/bolt/test/lit.cfg.py
@@ -60,8 +60,8 @@
 llvm_config.use_clang()
 llvm_config.use_llvm_tool('lld', required=True, search_env='LLD')
 
-config.substitutions.append(('%cflags', '-no-pie'))
-config.substitutions.append(('%cxxflags', '-no-pie'))
+config.substitutions.append(('%cflags', '-no-pie -gdwarf-4'))
+config.substitutions.append(('%cxxflags', '-no-pie -gdwarf-4'))
 
 link_fdata_cmd = os.path.join(config.test_source_root, 'link_fdata.py')
 
diff --git a/bolt/test/non-empty-debug-line.test b/bolt/test/non-empty-debug-line.test
index 677efe319bba8..50c215ab91d19 100644
--- a/bolt/test/non-empty-debug-line.test
+++ b/bolt/test/non-empty-debug-line.test
@@ -3,7 +3,7 @@
 
 REQUIRES: system-linux
 
-RUN: %clang %S/Inputs/hello.c -gdwarf-4 -o %t
+RUN: %clang %cflags %S/Inputs/hello.c -g -o %t
 RUN: llvm-bolt %t -o %t1 -update-debug-sections -funcs=_start
 RUN: llvm-readobj -S %t > %t2
 RUN: llvm-readobj -S %t1 >> %t2

From bdb7837481c22b492f43e92865e83244671b3395 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pawe=C5=82=20Bylica?= <chfast@gmail.com>
Date: Wed, 26 Jan 2022 15:57:42 +0100
Subject: [PATCH 807/946] [test][DAGCombine] Add more tests for carry diamond.
 NFC

---
 llvm/test/CodeGen/RISCV/addcarry.ll |  36 +++
 llvm/test/CodeGen/X86/addcarry.ll   | 332 ++++++++++++++++++++++++++++
 llvm/test/CodeGen/X86/subcarry.ll   | 253 +++++++++++++++++++++
 3 files changed, 621 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/addcarry.ll b/llvm/test/CodeGen/RISCV/addcarry.ll
index 18d1364f08d98..44d1f96758535 100644
--- a/llvm/test/CodeGen/RISCV/addcarry.ll
+++ b/llvm/test/CodeGen/RISCV/addcarry.ll
@@ -5,6 +5,7 @@
 ; Signed fixed point multiplication eventually expands down to an ADDCARRY.
 
 declare  i64 @llvm.smul.fix.i64  (i64, i64, i32)
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
 
 define i64 @addcarry(i64 %x, i64 %y) nounwind {
 ; RISCV32-LABEL: addcarry:
@@ -42,3 +43,38 @@ define i64 @addcarry(i64 %x, i64 %y) nounwind {
   %tmp = call i64 @llvm.smul.fix.i64(i64 %x, i64 %y, i32 2);
   ret i64 %tmp;
 }
+
+; negative test for combineCarryDiamond(): ADDCARRY not legal
+define { i32, i32, i1 } @addcarry_2x32(i32 %x0, i32 %x1, i32 %y0, i32 %y1) nounwind {
+; RISCV32-LABEL: addcarry_2x32:
+; RISCV32:       # %bb.0:
+; RISCV32-NEXT:    add a3, a1, a3
+; RISCV32-NEXT:    sltu a1, a3, a1
+; RISCV32-NEXT:    add a4, a2, a4
+; RISCV32-NEXT:    sltu a2, a4, a2
+; RISCV32-NEXT:    add a1, a4, a1
+; RISCV32-NEXT:    sltu a4, a1, a4
+; RISCV32-NEXT:    or a2, a2, a4
+; RISCV32-NEXT:    sw a3, 0(a0)
+; RISCV32-NEXT:    sw a1, 4(a0)
+; RISCV32-NEXT:    sb a2, 8(a0)
+; RISCV32-NEXT:    ret
+  %t0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x0, i32 %y0)
+  %s0 = extractvalue { i32, i1 } %t0, 0
+  %k0 = extractvalue { i32, i1 } %t0, 1
+
+  %t1 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %x1, i32 %y1)
+  %s1 = extractvalue { i32, i1 } %t1, 0
+  %k1 = extractvalue { i32, i1 } %t1, 1
+
+  %zk0 = zext i1 %k0 to i32
+  %t2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %s1, i32 %zk0)
+  %s2 = extractvalue { i32, i1 } %t2, 0
+  %k2 = extractvalue { i32, i1 } %t2, 1
+  %k = or i1 %k1, %k2
+
+  %r0 = insertvalue { i32, i32, i1 } poison, i32 %s0, 0
+  %r1 = insertvalue { i32, i32, i1 } %r0, i32 %s2, 1
+  %r = insertvalue { i32, i32, i1 } %r1, i1 %k, 2
+  ret { i32, i32, i1 } %r
+}
diff --git a/llvm/test/CodeGen/X86/addcarry.ll b/llvm/test/CodeGen/X86/addcarry.ll
index 1f66d2c5a6c9d..02c2110b37b9c 100644
--- a/llvm/test/CodeGen/X86/addcarry.ll
+++ b/llvm/test/CodeGen/X86/addcarry.ll
@@ -3,6 +3,7 @@
 
 declare { i8, i64 } @llvm.x86.addcarry.64(i8, i64, i64)
 declare { i64, i1 } @llvm.uadd.with.overflow.i64(i64, i64) #1
+declare { i64, i1 } @llvm.usub.with.overflow.i64(i64, i64) #1
 
 define i128 @add128(i128 %a, i128 %b) nounwind {
 ; CHECK-LABEL: add128:
@@ -415,6 +416,337 @@ define i128 @addcarry_to_subcarry(i64 %a, i64 %b) {
   ret i128 %sub2
 }
 
+; basic test for combineCarryDiamond()
+define { i64, i64, i1 } @addcarry_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: addcarry_2x64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    addq %rdx, %rax
+; CHECK-NEXT:    adcq %rcx, %rsi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    retq
+  %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0)
+  %s0 = extractvalue { i64, i1 } %t0, 0
+  %k0 = extractvalue { i64, i1 } %t0, 1
+
+  %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x1, i64 %y1)
+  %s1 = extractvalue { i64, i1 } %t1, 0
+  %k1 = extractvalue { i64, i1 } %t1, 1
+
+  %zk0 = zext i1 %k0 to i64
+  %t2 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s1, i64 %zk0)
+  %s2 = extractvalue { i64, i1 } %t2, 0
+  %k2 = extractvalue { i64, i1 } %t2, 1
+  %k = or i1 %k1, %k2
+
+  %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0
+  %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1
+  %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2
+  ret { i64, i64, i1 } %r
+}
+
+; basic test for combineCarryDiamond() with carries behind zext/and/trunc
+define { i64, i64, i1 } @addcarry_hidden_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: addcarry_hidden_2x64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    addq %rcx, %rsi
+; CHECK-NEXT:    setb %dil
+; CHECK-NEXT:    addq %rdx, %rax
+; CHECK-NEXT:    adcq $0, %rsi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    orb %dil, %cl
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    retq
+  %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0)
+  %s0 = extractvalue { i64, i1 } %t0, 0
+  %k0 = extractvalue { i64, i1 } %t0, 1
+
+  %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x1, i64 %y1)
+  %s1 = extractvalue { i64, i1 } %t1, 0
+  %k1 = extractvalue { i64, i1 } %t1, 1
+  %k1i8 = zext i1 %k1 to i8
+  %k1and = and i8 %k1i8, 1
+  %k1hidden = trunc i8 %k1and to i1
+
+  %zk0 = zext i1 %k0 to i64
+  %t2 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s1, i64 %zk0)
+  %s2 = extractvalue { i64, i1 } %t2, 0
+  %k2 = extractvalue { i64, i1 } %t2, 1
+
+  %k = or i1 %k1hidden, %k2
+
+  %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0
+  %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1
+  %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2
+  ret { i64, i64, i1 } %r
+}
+
+; basic test for combineCarryDiamond() with carries behind zext/and/trunc
+define { i64, i64, i1 } @addcarry_hidden2_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: addcarry_hidden2_2x64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    addq %rdx, %rax
+; CHECK-NEXT:    adcq %rcx, %rsi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    retq
+  %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0)
+  %s0 = extractvalue { i64, i1 } %t0, 0
+  %k0 = extractvalue { i64, i1 } %t0, 1
+
+  %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x1, i64 %y1)
+  %s1 = extractvalue { i64, i1 } %t1, 0
+  %k1 = extractvalue { i64, i1 } %t1, 1
+
+  %zk0 = zext i1 %k0 to i64
+  %t2 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s1, i64 %zk0)
+  %s2 = extractvalue { i64, i1 } %t2, 0
+  %k2 = extractvalue { i64, i1 } %t2, 1
+  %k2i8 = zext i1 %k2 to i8
+  %k2and = and i8 %k2i8, 1
+  %k2hidden = trunc i8 %k2and to i1
+
+  %k = or i1 %k1, %k2hidden
+
+  %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0
+  %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1
+  %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2
+  ret { i64, i64, i1 } %r
+}
+
+; basic test for combineCarryDiamond() with or operands reversed
+define { i64, i64, i1 } @addcarry_2x64_or_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: addcarry_2x64_or_reversed:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    addq %rdx, %rax
+; CHECK-NEXT:    adcq %rcx, %rsi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    retq
+  %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0)
+  %s0 = extractvalue { i64, i1 } %t0, 0
+  %k0 = extractvalue { i64, i1 } %t0, 1
+
+  %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x1, i64 %y1)
+  %s1 = extractvalue { i64, i1 } %t1, 0
+  %k1 = extractvalue { i64, i1 } %t1, 1
+
+  %zk0 = zext i1 %k0 to i64
+  %t2 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %zk0, i64 %s1)  ; reversed
+  %s2 = extractvalue { i64, i1 } %t2, 0
+  %k2 = extractvalue { i64, i1 } %t2, 1
+  %k = or i1 %k2, %k1  ; reverse natural order of operands
+
+  %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0
+  %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1
+  %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2
+  ret { i64, i64, i1 } %r
+}
+
+; basic test for combineCarryDiamond() with xor operands reversed
+define { i64, i64, i1 } @addcarry_2x64_xor_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: addcarry_2x64_xor_reversed:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    addq %rdx, %rax
+; CHECK-NEXT:    adcq %rcx, %rsi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    retq
+  %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0)
+  %s0 = extractvalue { i64, i1 } %t0, 0
+  %k0 = extractvalue { i64, i1 } %t0, 1
+
+  %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x1, i64 %y1)
+  %s1 = extractvalue { i64, i1 } %t1, 0
+  %k1 = extractvalue { i64, i1 } %t1, 1
+
+  %zk0 = zext i1 %k0 to i64
+  %t2 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s1, i64 %zk0)
+  %s2 = extractvalue { i64, i1 } %t2, 0
+  %k2 = extractvalue { i64, i1 } %t2, 1
+  %k = xor i1 %k2, %k1  ; reverse natural order of operands
+
+  %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0
+  %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1
+  %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2
+  ret { i64, i64, i1 } %r
+}
+
+; basic test for combineCarryDiamond() with and operands reversed
+define { i64, i64, i1 } @addcarry_2x64_and_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: addcarry_2x64_and_reversed:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    addq %rdx, %rax
+; CHECK-NEXT:    adcq %rcx, %rsi
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    retq
+  %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0)
+  %s0 = extractvalue { i64, i1 } %t0, 0
+  %k0 = extractvalue { i64, i1 } %t0, 1
+
+  %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x1, i64 %y1)
+  %s1 = extractvalue { i64, i1 } %t1, 0
+  %k1 = extractvalue { i64, i1 } %t1, 1
+
+  %zk0 = zext i1 %k0 to i64
+  %t2 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s1, i64 %zk0)
+  %s2 = extractvalue { i64, i1 } %t2, 0
+  %k2 = extractvalue { i64, i1 } %t2, 1
+  %k = and i1 %k2, %k1  ; reverse natural order of operands
+
+  %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0
+  %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1
+  %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2
+  ret { i64, i64, i1 } %r
+}
+
+; basic test for combineCarryDiamond() with add operands reversed
+define { i64, i64, i1 } @addcarry_2x64_add_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: addcarry_2x64_add_reversed:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    addq %rdx, %rax
+; CHECK-NEXT:    adcq %rcx, %rsi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    retq
+  %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0)
+  %s0 = extractvalue { i64, i1 } %t0, 0
+  %k0 = extractvalue { i64, i1 } %t0, 1
+
+  %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x1, i64 %y1)
+  %s1 = extractvalue { i64, i1 } %t1, 0
+  %k1 = extractvalue { i64, i1 } %t1, 1
+
+  %zk0 = zext i1 %k0 to i64
+  %t2 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %s1, i64 %zk0)
+  %s2 = extractvalue { i64, i1 } %t2, 0
+  %k2 = extractvalue { i64, i1 } %t2, 1
+  %k = add i1 %k2, %k1  ; reverse natural order of operands
+
+  %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0
+  %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1
+  %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2
+  ret { i64, i64, i1 } %r
+}
+
+; Here %carryin is considered as valid carry flag for combining into ADDCARRY
+; although %carryin does not come from any carry-producing instruction.
+define { i64, i1 } @addcarry_fake_carry(i64 %a, i64 %b, i1 %carryin) {
+; CHECK-LABEL: addcarry_fake_carry:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    andb $1, %dl
+; CHECK-NEXT:    addb $-1, %dl
+; CHECK-NEXT:    adcq %rsi, %rax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    retq
+    %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+    %partial = extractvalue { i64, i1 } %t1, 0
+    %k1 = extractvalue { i64, i1 } %t1, 1
+
+    %zcarryin = zext i1 %carryin to i64
+    %sum = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %partial, i64 %zcarryin)
+    %k2 = extractvalue { i64, i1 } %sum, 1
+
+    %carryout = or i1 %k1, %k2
+
+    %ret = insertvalue { i64, i1 } %sum, i1 %carryout, 1
+    ret { i64, i1 } %ret
+}
+
+; negative test: %carryin does not look like carry
+define { i64, i1 } @addcarry_carry_not_zext(i64 %a, i64 %b, i64 %carryin) {
+; CHECK-LABEL: addcarry_carry_not_zext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    addq %rsi, %rax
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    addq %rdx, %rax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    orb %cl, %dl
+; CHECK-NEXT:    retq
+    %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+    %partial = extractvalue { i64, i1 } %t1, 0
+    %k1 = extractvalue { i64, i1 } %t1, 1
+
+    %sum = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %partial, i64 %carryin)
+    %k2 = extractvalue { i64, i1 } %sum, 1
+
+    %carryout = or i1 %k1, %k2
+
+    %ret = insertvalue { i64, i1 } %sum, i1 %carryout, 1
+    ret { i64, i1 } %ret
+}
+
+; negative test: %carryin does not look like carry
+define { i64, i1 } @addcarry_carry_not_i1(i64 %a, i64 %b, i8 %carryin) {
+; CHECK-LABEL: addcarry_carry_not_i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
+; CHECK-NEXT:    addq %rsi, %rdi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movzbl %dl, %eax
+; CHECK-NEXT:    addq %rdi, %rax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    orb %cl, %dl
+; CHECK-NEXT:    retq
+    %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+    %partial = extractvalue { i64, i1 } %t1, 0
+    %k1 = extractvalue { i64, i1 } %t1, 1
+
+    %zcarryin = zext i8 %carryin to i64
+    %sum = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %partial, i64 %zcarryin)
+    %k2 = extractvalue { i64, i1 } %sum, 1
+
+    %carryout = or i1 %k1, %k2
+
+    %ret = insertvalue { i64, i1 } %sum, i1 %carryout, 1
+    ret { i64, i1 } %ret
+}
+
+; negative test for combineCarryDiamond(): uaddo mixed with usubo
+define { i64, i64, i1 } @addcarry_mixed_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: addcarry_mixed_2x64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    addq %rdx, %rax
+; CHECK-NEXT:    setb %dil
+; CHECK-NEXT:    addq %rcx, %rsi
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    subq %rdi, %rsi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    orb %dl, %cl
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    retq
+  %t0 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x0, i64 %y0)
+  %s0 = extractvalue { i64, i1 } %t0, 0
+  %k0 = extractvalue { i64, i1 } %t0, 1
+
+  %t1 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %x1, i64 %y1)
+  %s1 = extractvalue { i64, i1 } %t1, 0
+  %k1 = extractvalue { i64, i1 } %t1, 1
+
+  %zk0 = zext i1 %k0 to i64
+  %t2 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %s1, i64 %zk0)
+  %s2 = extractvalue { i64, i1 } %t2, 0
+  %k2 = extractvalue { i64, i1 } %t2, 1
+  %k = or i1 %k1, %k2
+
+  %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0
+  %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1
+  %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2
+  ret { i64, i64, i1 } %r
+}
+
 %struct.U320 = type { [5 x i64] }
 
 define i32 @add_U320_without_i128_add(%struct.U320* nocapture dereferenceable(40) %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) {
diff --git a/llvm/test/CodeGen/X86/subcarry.ll b/llvm/test/CodeGen/X86/subcarry.ll
index 6a2681210b465..ecb43031f632c 100644
--- a/llvm/test/CodeGen/X86/subcarry.ll
+++ b/llvm/test/CodeGen/X86/subcarry.ll
@@ -187,6 +187,238 @@ define i64 @sub_from_carry(i64 %x, i64 %y, i64* %valout, i64 %z) {
   ret i64 %res
 }
 
+; basic test for combineCarryDiamond()
+define { i64, i64, i1 } @subcarry_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: subcarry_2x64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    subq %rdx, %rax
+; CHECK-NEXT:    sbbq %rcx, %rsi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    retq
+  %t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0)
+  %s0 = extractvalue { i64, i1 } %t0, 0
+  %k0 = extractvalue { i64, i1 } %t0, 1
+
+  %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x1, i64 %y1)
+  %s1 = extractvalue { i64, i1 } %t1, 0
+  %k1 = extractvalue { i64, i1 } %t1, 1
+
+  %zk0 = zext i1 %k0 to i64
+  %t2 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %s1, i64 %zk0)
+  %s2 = extractvalue { i64, i1 } %t2, 0
+  %k2 = extractvalue { i64, i1 } %t2, 1
+  %k = or i1 %k1, %k2
+
+  %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0
+  %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1
+  %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2
+  ret { i64, i64, i1 } %r
+}
+
+; basic test for combineCarryDiamond() with or operands reversed
+define { i64, i64, i1 } @subcarry_2x64_or_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: subcarry_2x64_or_reversed:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    subq %rdx, %rax
+; CHECK-NEXT:    sbbq %rcx, %rsi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    retq
+  %t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0)
+  %s0 = extractvalue { i64, i1 } %t0, 0
+  %k0 = extractvalue { i64, i1 } %t0, 1
+
+  %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x1, i64 %y1)
+  %s1 = extractvalue { i64, i1 } %t1, 0
+  %k1 = extractvalue { i64, i1 } %t1, 1
+
+  %zk0 = zext i1 %k0 to i64
+  %t2 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %s1, i64 %zk0)
+  %s2 = extractvalue { i64, i1 } %t2, 0
+  %k2 = extractvalue { i64, i1 } %t2, 1
+  %k = or i1 %k2, %k1  ; reverse natural order of operands
+
+  %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0
+  %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1
+  %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2
+  ret { i64, i64, i1 } %r
+}
+
+; basic test for combineCarryDiamond() with xor operands reversed
+define { i64, i64, i1 } @subcarry_2x64_xor_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: subcarry_2x64_xor_reversed:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    subq %rdx, %rax
+; CHECK-NEXT:    sbbq %rcx, %rsi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    retq
+  %t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0)
+  %s0 = extractvalue { i64, i1 } %t0, 0
+  %k0 = extractvalue { i64, i1 } %t0, 1
+
+  %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x1, i64 %y1)
+  %s1 = extractvalue { i64, i1 } %t1, 0
+  %k1 = extractvalue { i64, i1 } %t1, 1
+
+  %zk0 = zext i1 %k0 to i64
+  %t2 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %s1, i64 %zk0)
+  %s2 = extractvalue { i64, i1 } %t2, 0
+  %k2 = extractvalue { i64, i1 } %t2, 1
+  %k = xor i1 %k2, %k1  ; reverse natural order of operands
+
+  %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0
+  %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1
+  %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2
+  ret { i64, i64, i1 } %r
+}
+
+; basic test for combineCarryDiamond() with and operands reversed
+define { i64, i64, i1 } @subcarry_2x64_and_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: subcarry_2x64_and_reversed:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    subq %rdx, %rax
+; CHECK-NEXT:    sbbq %rcx, %rsi
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    retq
+  %t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0)
+  %s0 = extractvalue { i64, i1 } %t0, 0
+  %k0 = extractvalue { i64, i1 } %t0, 1
+
+  %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x1, i64 %y1)
+  %s1 = extractvalue { i64, i1 } %t1, 0
+  %k1 = extractvalue { i64, i1 } %t1, 1
+
+  %zk0 = zext i1 %k0 to i64
+  %t2 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %s1, i64 %zk0)
+  %s2 = extractvalue { i64, i1 } %t2, 0
+  %k2 = extractvalue { i64, i1 } %t2, 1
+  %k = and i1 %k2, %k1  ; reverse natural order of operands
+
+  %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0
+  %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1
+  %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2
+  ret { i64, i64, i1 } %r
+}
+
+; basic test for combineCarryDiamond() with add operands reversed
+define { i64, i64, i1 } @subcarry_2x64_add_reversed(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: subcarry_2x64_add_reversed:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    subq %rdx, %rax
+; CHECK-NEXT:    setb %dil
+; CHECK-NEXT:    movq %rsi, %rdx
+; CHECK-NEXT:    subq %rcx, %rdx
+; CHECK-NEXT:    subq %rdi, %rdx
+; CHECK-NEXT:    setb %dil
+; CHECK-NEXT:    cmpq %rcx, %rsi
+; CHECK-NEXT:    adcb $0, %dil
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    retq
+  %t0 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x0, i64 %y0)
+  %s0 = extractvalue { i64, i1 } %t0, 0
+  %k0 = extractvalue { i64, i1 } %t0, 1
+
+  %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %x1, i64 %y1)
+  %s1 = extractvalue { i64, i1 } %t1, 0
+  %k1 = extractvalue { i64, i1 } %t1, 1
+
+  %zk0 = zext i1 %k0 to i64
+  %t2 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %s1, i64 %zk0)
+  %s2 = extractvalue { i64, i1 } %t2, 0
+  %k2 = extractvalue { i64, i1 } %t2, 1
+  %k = add i1 %k2, %k1  ; reverse natural order of operands
+
+  %r0 = insertvalue { i64, i64, i1 } poison, i64 %s0, 0
+  %r1 = insertvalue { i64, i64, i1 } %r0, i64 %s2, 1
+  %r = insertvalue { i64, i64, i1 } %r1, i1 %k, 2
+  ret { i64, i64, i1 } %r
+}
+
+; Here %carryin is considered as valid carry flag for combining into ADDCARRY
+; although %carryin does not come from any carry-producing instruction.
+define { i64, i1 } @subcarry_fake_carry(i64 %a, i64 %b, i1 %carryin) {
+; CHECK-LABEL: subcarry_fake_carry:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    andb $1, %dl
+; CHECK-NEXT:    addb $-1, %dl
+; CHECK-NEXT:    sbbq %rsi, %rax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    retq
+    %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+    %partial = extractvalue { i64, i1 } %t1, 0
+    %k1 = extractvalue { i64, i1 } %t1, 1
+
+    %zcarryin = zext i1 %carryin to i64
+    %s = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %partial, i64 %zcarryin)
+    %k2 = extractvalue { i64, i1 } %s, 1
+
+    %carryout = or i1 %k1, %k2
+
+    %ret = insertvalue { i64, i1 } %s, i1 %carryout, 1
+    ret { i64, i1 } %ret
+}
+
+; negative test: %carryin does not look like carry
+define { i64, i1 } @subcarry_carry_not_zext(i64 %a, i64 %b, i64 %carryin) {
+; CHECK-LABEL: subcarry_carry_not_zext:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    subq %rsi, %rax
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    subq %rdx, %rax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    orb %cl, %dl
+; CHECK-NEXT:    retq
+    %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+    %partial = extractvalue { i64, i1 } %t1, 0
+    %k1 = extractvalue { i64, i1 } %t1, 1
+
+    %s = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %partial, i64 %carryin)
+    %k2 = extractvalue { i64, i1 } %s, 1
+
+    %carryout = or i1 %k1, %k2
+
+    %ret = insertvalue { i64, i1 } %s, i1 %carryout, 1
+    ret { i64, i1 } %ret
+}
+
+; negative test: %carryin does not look like carry
+define { i64, i1 } @subcarry_carry_not_i1(i64 %a, i64 %b, i8 %carryin) {
+; CHECK-LABEL: subcarry_carry_not_i1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edx killed $edx def $rdx
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    subq %rsi, %rax
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    movzbl %dl, %edx
+; CHECK-NEXT:    subq %rdx, %rax
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    orb %cl, %dl
+; CHECK-NEXT:    retq
+    %t1 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+    %partial = extractvalue { i64, i1 } %t1, 0
+    %k1 = extractvalue { i64, i1 } %t1, 1
+
+    %zcarryin = zext i8 %carryin to i64
+    %s = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %partial, i64 %zcarryin)
+    %k2 = extractvalue { i64, i1 } %s, 1
+
+    %carryout = or i1 %k1, %k2
+
+    %ret = insertvalue { i64, i1 } %s, i1 %carryout, 1
+    ret { i64, i1 } %ret
+}
+
 %struct.U320 = type { [5 x i64] }
 
 define i32 @sub_U320_without_i128_or(%struct.U320* nocapture dereferenceable(40) %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) {
@@ -433,3 +665,24 @@ define void @sub_U256_without_i128_or_recursive(%uint256* sret(%uint256) %0, %ui
   store i64 %37, i64* %41, align 8
   ret void
 }
+
+define i1 @subcarry_ult_2x64(i64 %x0, i64 %x1, i64 %y0, i64 %y1) nounwind {
+; CHECK-LABEL: subcarry_ult_2x64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpq %rdx, %rdi
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    subq %rcx, %rsi
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    cmpq %rax, %rsi
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    orb %cl, %al
+; CHECK-NEXT:    retq
+  %b0 = icmp ult i64 %x0, %y0
+  %d1 = sub i64 %x1, %y1
+  %b10 = icmp ult i64 %x1, %y1
+  %b0z = zext i1 %b0 to i64
+  %b11 = icmp ult i64 %d1, %b0z
+  %b1 = or i1 %b10, %b11
+  ret i1 %b1
+}

From 0a4bbdabe32ef370a790261a0f4ec118f0f5342e Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Wed, 26 Jan 2022 15:40:30 -0800
Subject: [PATCH 808/946] [gn build] Remove unnecessary include_dirs

These are already part of crt_code.
---
 llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn     | 3 ---
 llvm/utils/gn/secondary/compiler-rt/lib/msan/BUILD.gn     | 3 ---
 llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn  | 5 +----
 llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn | 3 ---
 4 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
index f05168bd57c27..57374f5ad2ae4 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
@@ -102,9 +102,6 @@ target(asan_target_type, "asan") {
     sources += [ "asan_interceptors_vfork.S" ]
   }
 
-  # To be able to include sanitizer_common.
-  include_dirs = [ ".." ]
-
   # FIXME: have SANITIZER_COMMON_CFLAGS thingy? should fno-builtin be in
   # crt_code?
   cflags = [ "-fno-builtin" ]
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/msan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/msan/BUILD.gn
index 92be6dffa7562..ea933b30f7932 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/msan/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/msan/BUILD.gn
@@ -39,9 +39,6 @@ static_library("msan") {
     "msan_thread.h",
   ]
 
-  # To be able to include sanitizer_common.
-  include_dirs = [ ".." ]
-
   # FIXME: have SANITIZER_COMMON_CFLAGS thingy? should fno-builtin be in
   # crt_code?
   cflags = [ "-fno-builtin" ]
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn
index a0bc9b72c3652..21e3ae8d20284 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn
@@ -25,10 +25,7 @@ static_library("profile") {
     cflags += [ "/wd4221" ]
   }
 
-  include_dirs = [
-    "..",
-    "../../include",
-  ]
+  include_dirs = [ "../../include" ]
 
   sources = [
     "GCDAProfiling.c",
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn
index db354d907a4e5..3f6db757f48ea 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn
@@ -128,9 +128,6 @@ target(tsan_target_type, "rtl") {
     sources += [ "tsan_rtl_s390x.S" ]
   }
 
-  # To be able to include sanitizer_common.
-  include_dirs = [ ".." ]
-
   # FIXME: have SANITIZER_COMMON_CFLAGS thingy? should fno-builtin be in
   # crt_code?
   cflags += [ "-fno-builtin" ]

From 7ebd22c504cb19c1a954b322dca7f57aa0b4cba2 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Wed, 26 Jan 2022 23:59:24 +0000
Subject: [PATCH 809/946] Revert "[mlir][LLVM] Add support for operand_attrs to
 InlineAsmOp"

This reverts commit e6ce2c0b8d5f8253791bf87145669c58328c30db.

The test is failing in CI right now.
---
 mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td   | 10 +---
 .../X86Vector/Transforms/AVXTranspose.cpp     |  5 +-
 .../LLVMIR/LLVMToLLVMIRTranslation.cpp        | 25 +---------
 .../test-inline-asm-vector-avx512.mlir        | 49 -------------------
 4 files changed, 4 insertions(+), 85 deletions(-)
 delete mode 100644 mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-inline-asm-vector-avx512.mlir

diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 90cd333de468b..5bd438f1c0f02 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -1934,8 +1934,7 @@ def LLVM_InlineAsmOp : LLVM_Op<"inline_asm", []> {
         UnitAttr:$has_side_effects,
         UnitAttr:$is_align_stack,
         OptionalAttr<
-          DefaultValuedAttr<AsmATTOrIntel, "AsmDialect::AD_ATT">>:$asm_dialect,
-        OptionalAttr<ArrayAttr>:$operand_attrs);
+          DefaultValuedAttr<AsmATTOrIntel, "AsmDialect::AD_ATT">>:$asm_dialect);
 
   let results = (outs Optional<LLVM_Type>:$res);
 
@@ -1943,16 +1942,9 @@ def LLVM_InlineAsmOp : LLVM_Op<"inline_asm", []> {
     (`has_side_effects` $has_side_effects^)?
     (`is_align_stack` $is_align_stack^)?
     (`asm_dialect` `=` $asm_dialect^)?
-    (`operand_attrs` `=` $operand_attrs^)?
     attr-dict
     $asm_string `,` $constraints
     operands `:` functional-type(operands, results)
    }];
-
-  let extraClassDeclaration = [{
-    static StringRef getElementTypeAttrName() {
-      return "elementtype";
-    }
-  }];
 }
 #endif // LLVMIR_OPS
diff --git a/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp b/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp
index 20d9d4411535e..3f2db00b388dc 100644
--- a/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp
+++ b/mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp
@@ -37,10 +37,7 @@ Value mlir::x86vector::avx2::inline_asm::mm256BlendPsAsm(
   SmallVector<Value> asmVals{v1, v2};
   auto asmStr = llvm::formatv(asmTp, llvm::format_hex(mask, /*width=*/2)).str();
   auto asmOp = b.create<LLVM::InlineAsmOp>(
-      v1.getType(), /*operands=*/asmVals, /*asm_string=*/asmStr,
-      /*constraints=*/asmCstr, /*has_side_effects=*/false,
-      /*is_align_stack=*/false, /*asm_dialect=*/asmDialectAttr,
-      /*operand_attrs=*/ArrayAttr());
+      v1.getType(), asmVals, asmStr, asmCstr, false, false, asmDialectAttr);
   return asmOp.getResult(0);
 }
 
diff --git a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
index 6701ccb830d53..143630ce53ae0 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.cpp
@@ -330,32 +330,11 @@ convertOperationImpl(Operation &opInst, llvm::IRBuilderBase &builder,
                                    inlineAsmOp.getConstraints(),
                                    inlineAsmOp.getHasSideEffects(),
                                    inlineAsmOp.getIsAlignStack());
-    llvm::CallInst *inst = builder.CreateCall(
+    llvm::Value *result = builder.CreateCall(
         inlineAsmInst,
         moduleTranslation.lookupValues(inlineAsmOp.getOperands()));
-    if (auto maybeOperandAttrs = inlineAsmOp.getOperandAttrs()) {
-      llvm::AttributeList attrList;
-      for (const auto &it : llvm::enumerate(*maybeOperandAttrs)) {
-        Attribute attr = it.value();
-        if (!attr)
-          continue;
-        DictionaryAttr dAttr = attr.cast<DictionaryAttr>();
-        TypeAttr tAttr =
-            dAttr.get(InlineAsmOp::getElementTypeAttrName()).cast<TypeAttr>();
-        llvm::AttrBuilder b(moduleTranslation.getLLVMContext());
-        llvm::Type *ty = moduleTranslation.convertType(tAttr.getValue());
-        b.addTypeAttr(llvm::Attribute::ElementType, ty);
-        // shift to account for the returned value (this is always 1 aggregate
-        // value in LLVM).
-        int shift = (opInst.getNumResults() > 0) ? 1 : 0;
-        attrList = attrList.addAttributesAtIndex(
-            moduleTranslation.getLLVMContext(), it.index() + shift, b);
-      }
-      inst->setAttributes(attrList);
-    }
-
     if (opInst.getNumResults() != 0)
-      moduleTranslation.mapValue(opInst.getResult(0), inst);
+      moduleTranslation.mapValue(opInst.getResult(0), result);
     return success();
   }
 
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-inline-asm-vector-avx512.mlir b/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-inline-asm-vector-avx512.mlir
deleted file mode 100644
index 7ce9c1a98f331..0000000000000
--- a/mlir/test/Integration/Dialect/Vector/CPU/X86Vector/test-inline-asm-vector-avx512.mlir
+++ /dev/null
@@ -1,49 +0,0 @@
-// RUN: mlir-opt %s -convert-linalg-to-loops -convert-vector-to-scf='full-unroll=true' -lower-affine -convert-scf-to-std -convert-vector-to-llvm -convert-memref-to-llvm  -convert-std-to-llvm='use-bare-ptr-memref-call-conv=1' -convert-arith-to-llvm -reconcile-unrealized-casts |\
-// RUN: mlir-translate --mlir-to-llvmir |\
-// RUN: %lli --entry-function=entry --mattr="avx512f" | \
-// RUN: FileCheck %s
-
-module {
-
-  // printf format string "%i\n", char by char:   %    i  \n  0
-  llvm.mlir.global private @pct_i_newline(dense<[37, 105, 10, 0]> : tensor<4xi8>)
-    : !llvm.array<4xi8>
-  // an array of 16 i32 of values [0..15]
-  llvm.mlir.global private @const16(
-    dense<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]> : tensor<16 x i32>)
-      : !llvm.array<16 x i32>
-
-  // declare void @printf(i8*, ...)
-  llvm.func @printf(!llvm.ptr<i8>, ...)
-
-  llvm.func @entry() {
-    %c0 = llvm.mlir.constant(0 : index) : i64
-
-    %1 = llvm.mlir.addressof @const16 : !llvm.ptr<array<16 x i32>>
-    %ptr = llvm.getelementptr %1[%c0, %c0]
-      : (!llvm.ptr<array<16 x i32>>, i64, i64) -> !llvm.ptr<i32>
-    %ptr2 = llvm.bitcast %ptr :  !llvm.ptr<i32> to !llvm.ptr<vector<16xi32>>
-    // operand_attrs of *m operands need to be piped through to LLVM for
-    // verification to pass.
-    %v = llvm.inline_asm
-        asm_dialect = intel
-        operand_attrs = [{ elementtype = vector<16xi32> }]
-        "vmovdqu32 $0, $1", "=x,*m" %ptr2
-      : (!llvm.ptr<vector<16xi32>>) -> vector<16xi32>
-
-    %2 = llvm.mlir.addressof @pct_i_newline : !llvm.ptr<array<4xi8>>
-    %ptrfmt = llvm.getelementptr %2[%c0, %c0]
-      : (!llvm.ptr<array<4xi8>>, i64, i64) -> !llvm.ptr<i8>
-
-    // CHECK: 0
-    %v0 = vector.extract %v[0]: vector<16xi32>
-    llvm.call @printf(%ptrfmt, %v0) : (!llvm.ptr<i8>, i32) -> ()
-
-    // CHECK: 9
-    %v9 = vector.extract %v[9]: vector<16xi32>
-    llvm.call @printf(%ptrfmt, %v9) : (!llvm.ptr<i8>, i32) -> ()
-
-    llvm.return
-  }
-}
-

From 19f67ee29e06dc9bb65ac037a73bcd6d57e1e2e1 Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik@google.com>
Date: Wed, 26 Jan 2022 15:22:47 -0800
Subject: [PATCH 810/946] [mlir][amx] fix some whitespace issues in AMX tests

Reviewed By: dcaballe

Differential Revision: https://reviews.llvm.org/D118289
---
 mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir | 4 ++--
 mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir
index 3d80ca52e75c7..c9a80e57e1ea3 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf.mlir
@@ -8,7 +8,7 @@
 // Multiply into zeroed destination.
 func @kernel1(%arg0: memref<2x4xbf16>,
               %arg1: memref<2x4xbf16>,
-	      %arg2: memref<2x2xf32>) {
+              %arg2: memref<2x2xf32>) {
   %0 = arith.constant 0 : index
   %1 = amx.tile_load %arg0[%0, %0] : memref<2x4xbf16>  into vector<2x4xbf16>
   %2 = amx.tile_load %arg1[%0, %0] : memref<2x4xbf16>  into vector<2x4xbf16>
@@ -21,7 +21,7 @@ func @kernel1(%arg0: memref<2x4xbf16>,
 // Multiply and update into destination.
 func @kernel2(%arg0: memref<2x4xbf16>,
               %arg1: memref<2x4xbf16>,
-	      %arg2: memref<2x2xf32>) {
+              %arg2: memref<2x2xf32>) {
   %0 = arith.constant 0 : index
   %1 = amx.tile_load %arg0[%0, %0] : memref<2x4xbf16>  into vector<2x4xbf16>
   %2 = amx.tile_load %arg1[%0, %0] : memref<2x4xbf16>  into vector<2x4xbf16>
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir
index 1f61c7dc98b46..7bc400b1abb33 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli.mlir
@@ -8,7 +8,7 @@
 // Multiply into zeroed destination.
 func @kernel1(%arg0: memref<2x8xi8>,
               %arg1: memref<2x8xi8>,
-	      %arg2: memref<2x2xi32>) {
+              %arg2: memref<2x2xi32>) {
   %0 = arith.constant 0 : index
   %1 = amx.tile_load %arg0[%0, %0] : memref<2x8xi8>  into vector<2x8xi8>
   %2 = amx.tile_load %arg1[%0, %0] : memref<2x8xi8>  into vector<2x8xi8>
@@ -21,7 +21,7 @@ func @kernel1(%arg0: memref<2x8xi8>,
 // Multiply and update into destination.
 func @kernel2(%arg0: memref<2x8xi8>,
               %arg1: memref<2x8xi8>,
-	      %arg2: memref<2x2xi32>) {
+              %arg2: memref<2x2xi32>) {
   %0 = arith.constant 0 : index
   %1 = amx.tile_load %arg0[%0, %0] : memref<2x8xi8>  into vector<2x8xi8>
   %2 = amx.tile_load %arg1[%0, %0] : memref<2x8xi8>  into vector<2x8xi8>

From 326516448c839d8f9cc515b20a34d0f3a6ee2374 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani <medismail.bennani@gmail.com>
Date: Thu, 27 Jan 2022 00:57:54 +0100
Subject: [PATCH 811/946] [lldb/test] Try to fix TestSBModule failure

This should fix https://lab.llvm.org/buildbot/#/builders/68/builds/25571

Signed-off-by: Med Ismail Bennani <medismail.bennani@gmail.com>
---
 .../scripted_process/TestScriptedProcess.py        | 14 ++------------
 lldb/test/API/python_api/sbmodule/TestSBModule.py  |  4 ++--
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
index 0c215f082c5d3..3a9c74bb73c67 100644
--- a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
+++ b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
@@ -19,6 +19,8 @@ def setUp(self):
 
     def tearDown(self):
         TestBase.tearDown(self)
+        if "SKIP_SCRIPTED_PROCESS_LAUNCH" in os.environ:
+          del os.environ["SKIP_SCRIPTED_PROCESS_LAUNCH"]
 
     def test_python_plugin_package(self):
         """Test that the lldb python module has a `plugins.scripted_process`
@@ -53,10 +55,6 @@ def test_invalid_scripted_register_context(self):
         self.assertTrue(os.path.isfile(log_file))
 
         os.environ['SKIP_SCRIPTED_PROCESS_LAUNCH'] = '1'
-        def cleanup():
-          del os.environ["SKIP_SCRIPTED_PROCESS_LAUNCH"]
-        self.addTearDownHook(cleanup)
-
         scripted_process_example_relpath = 'invalid_scripted_process.py'
         self.runCmd("command script import " + os.path.join(self.getSourceDir(),
                                                             scripted_process_example_relpath))
@@ -89,10 +87,6 @@ def test_scripted_process_and_scripted_thread(self):
         self.assertTrue(target, VALID_TARGET)
 
         os.environ['SKIP_SCRIPTED_PROCESS_LAUNCH'] = '1'
-        def cleanup():
-          del os.environ["SKIP_SCRIPTED_PROCESS_LAUNCH"]
-        self.addTearDownHook(cleanup)
-
         scripted_process_example_relpath = 'dummy_scripted_process.py'
         self.runCmd("command script import " + os.path.join(self.getSourceDir(),
                                                             scripted_process_example_relpath))
@@ -160,10 +154,6 @@ def test_launch_scripted_process_stack_frames(self):
         self.assertTrue(error.Success(), "Reloading main module at offset 0 failed.")
 
         os.environ['SKIP_SCRIPTED_PROCESS_LAUNCH'] = '1'
-        def cleanup():
-          del os.environ["SKIP_SCRIPTED_PROCESS_LAUNCH"]
-        self.addTearDownHook(cleanup)
-
         scripted_process_example_relpath = 'stack_core_scripted_process.py'
         self.runCmd("command script import " + os.path.join(self.getSourceDir(),
                                                             scripted_process_example_relpath))
diff --git a/lldb/test/API/python_api/sbmodule/TestSBModule.py b/lldb/test/API/python_api/sbmodule/TestSBModule.py
index ab6a9a20884a3..e3ab9bcb299f6 100644
--- a/lldb/test/API/python_api/sbmodule/TestSBModule.py
+++ b/lldb/test/API/python_api/sbmodule/TestSBModule.py
@@ -47,12 +47,12 @@ def test_module_is_file_backed(self):
         error = lldb.SBError()
         process = target.AttachToProcessWithID(self.dbg.GetListener(),
                                                self.background_pid, error)
-        self.assertTrue(error.Success() and process,  PROCESS_IS_VALID)
+        self.assertTrue(error.Success(),  PROCESS_IS_VALID)
         main_module = target.GetModuleAtIndex(0)
         self.assertEqual(main_module.GetFileSpec().GetFilename(), "a.out")
         self.assertFalse(main_module.IsFileBacked(),
                          "The module should not be backed by a file on disk.")
 
-        error = process.Destroy()
+        error = process.Detach()
         self.assertTrue(error.Success(), "couldn't destroy process %s" % background_process.pid)
 

From a6b5624372a652b18633704e7a5732e292f2f61b Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani <medismail.bennani@gmail.com>
Date: Thu, 27 Jan 2022 01:04:38 +0100
Subject: [PATCH 812/946] Revert "[lldb/test] Try to fix TestSBModule failure"

This reverts commit 326516448c839d8f9cc515b20a34d0f3a6ee2374.
---
 .../scripted_process/TestScriptedProcess.py        | 14 ++++++++++++--
 lldb/test/API/python_api/sbmodule/TestSBModule.py  |  4 ++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
index 3a9c74bb73c67..0c215f082c5d3 100644
--- a/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
+++ b/lldb/test/API/functionalities/scripted_process/TestScriptedProcess.py
@@ -19,8 +19,6 @@ def setUp(self):
 
     def tearDown(self):
         TestBase.tearDown(self)
-        if "SKIP_SCRIPTED_PROCESS_LAUNCH" in os.environ:
-          del os.environ["SKIP_SCRIPTED_PROCESS_LAUNCH"]
 
     def test_python_plugin_package(self):
         """Test that the lldb python module has a `plugins.scripted_process`
@@ -55,6 +53,10 @@ def test_invalid_scripted_register_context(self):
         self.assertTrue(os.path.isfile(log_file))
 
         os.environ['SKIP_SCRIPTED_PROCESS_LAUNCH'] = '1'
+        def cleanup():
+          del os.environ["SKIP_SCRIPTED_PROCESS_LAUNCH"]
+        self.addTearDownHook(cleanup)
+
         scripted_process_example_relpath = 'invalid_scripted_process.py'
         self.runCmd("command script import " + os.path.join(self.getSourceDir(),
                                                             scripted_process_example_relpath))
@@ -87,6 +89,10 @@ def test_scripted_process_and_scripted_thread(self):
         self.assertTrue(target, VALID_TARGET)
 
         os.environ['SKIP_SCRIPTED_PROCESS_LAUNCH'] = '1'
+        def cleanup():
+          del os.environ["SKIP_SCRIPTED_PROCESS_LAUNCH"]
+        self.addTearDownHook(cleanup)
+
         scripted_process_example_relpath = 'dummy_scripted_process.py'
         self.runCmd("command script import " + os.path.join(self.getSourceDir(),
                                                             scripted_process_example_relpath))
@@ -154,6 +160,10 @@ def test_launch_scripted_process_stack_frames(self):
         self.assertTrue(error.Success(), "Reloading main module at offset 0 failed.")
 
         os.environ['SKIP_SCRIPTED_PROCESS_LAUNCH'] = '1'
+        def cleanup():
+          del os.environ["SKIP_SCRIPTED_PROCESS_LAUNCH"]
+        self.addTearDownHook(cleanup)
+
         scripted_process_example_relpath = 'stack_core_scripted_process.py'
         self.runCmd("command script import " + os.path.join(self.getSourceDir(),
                                                             scripted_process_example_relpath))
diff --git a/lldb/test/API/python_api/sbmodule/TestSBModule.py b/lldb/test/API/python_api/sbmodule/TestSBModule.py
index e3ab9bcb299f6..ab6a9a20884a3 100644
--- a/lldb/test/API/python_api/sbmodule/TestSBModule.py
+++ b/lldb/test/API/python_api/sbmodule/TestSBModule.py
@@ -47,12 +47,12 @@ def test_module_is_file_backed(self):
         error = lldb.SBError()
         process = target.AttachToProcessWithID(self.dbg.GetListener(),
                                                self.background_pid, error)
-        self.assertTrue(error.Success(),  PROCESS_IS_VALID)
+        self.assertTrue(error.Success() and process,  PROCESS_IS_VALID)
         main_module = target.GetModuleAtIndex(0)
         self.assertEqual(main_module.GetFileSpec().GetFilename(), "a.out")
         self.assertFalse(main_module.IsFileBacked(),
                          "The module should not be backed by a file on disk.")
 
-        error = process.Detach()
+        error = process.Destroy()
         self.assertTrue(error.Success(), "couldn't destroy process %s" % background_process.pid)
 

From aae3c4f2b46d33b00420f2d2541cb6daebf2f872 Mon Sep 17 00:00:00 2001
From: Med Ismail Bennani <medismail.bennani@gmail.com>
Date: Thu, 27 Jan 2022 01:05:12 +0100
Subject: [PATCH 813/946] [lldb/test] Skip TestSBModule unless darwin

Signed-off-by: Med Ismail Bennani <medismail.bennani@gmail.com>
---
 lldb/test/API/python_api/sbmodule/TestSBModule.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/test/API/python_api/sbmodule/TestSBModule.py b/lldb/test/API/python_api/sbmodule/TestSBModule.py
index ab6a9a20884a3..dfb7711501e94 100644
--- a/lldb/test/API/python_api/sbmodule/TestSBModule.py
+++ b/lldb/test/API/python_api/sbmodule/TestSBModule.py
@@ -20,7 +20,7 @@ def tearDown(self):
         if self.background_pid:
             os.kill(self.background_pid, signal.SIGKILL)
 
-
+    @skipUnlessDarwin
     def test_module_is_file_backed(self):
         """Test the SBModule::IsFileBacked() method"""
         self.build()

From 98fa46f870e402f225b60ab5d02487c36d79632b Mon Sep 17 00:00:00 2001
From: Zixu Wang <zixu_wang@apple.com>
Date: Wed, 26 Jan 2022 16:22:08 -0800
Subject: [PATCH 814/946] [FIX][clang] Fix unused private field in
 ExtractAPIVisitor

Fix a build failure where an unused private field in ExtractAPIVisitor
triggered a warning turned into error.
---
 clang/lib/Frontend/ExtractAPIConsumer.cpp | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/clang/lib/Frontend/ExtractAPIConsumer.cpp b/clang/lib/Frontend/ExtractAPIConsumer.cpp
index 92a0385c96cc0..cdf67f3c327aa 100644
--- a/clang/lib/Frontend/ExtractAPIConsumer.cpp
+++ b/clang/lib/Frontend/ExtractAPIConsumer.cpp
@@ -9,21 +9,14 @@ using namespace clang;
 namespace {
 class ExtractAPIVisitor : public RecursiveASTVisitor<ExtractAPIVisitor> {
 public:
-  explicit ExtractAPIVisitor(ASTContext *Context) : Context(Context) {}
-
   bool VisitNamedDecl(NamedDecl *Decl) {
     llvm::outs() << Decl->getName() << "\n";
     return true;
   }
-
-private:
-  ASTContext *Context;
 };
 
 class ExtractAPIConsumer : public ASTConsumer {
 public:
-  explicit ExtractAPIConsumer(ASTContext *Context) : Visitor(Context) {}
-
   void HandleTranslationUnit(ASTContext &Context) override {
     Visitor.TraverseDecl(Context.getTranslationUnitDecl());
   }
@@ -35,5 +28,5 @@ class ExtractAPIConsumer : public ASTConsumer {
 
 std::unique_ptr<ASTConsumer>
 ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) {
-  return std::make_unique<ExtractAPIConsumer>(&CI.getASTContext());
+  return std::make_unique<ExtractAPIConsumer>();
 }

From ff84c635b77e3b9ac62116e6e2a85bc6a556fc78 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer <arthur.j.odwyer@gmail.com>
Date: Fri, 21 Jan 2022 23:29:41 -0500
Subject: [PATCH 815/946] [libc++] [ranges] Fix LWG3470
 "convertible-to-non-slicing seems to reject valid case"

Differential Revision: https://reviews.llvm.org/D117941
---
 libcxx/docs/Status/Cxx2bIssues.csv            |  2 +-
 libcxx/include/__ranges/subrange.h            | 10 ++--
 .../range.subrange/lwg3470.pass.cpp           | 50 +++++++++++++++++++
 3 files changed, 57 insertions(+), 5 deletions(-)
 create mode 100644 libcxx/test/std/ranges/range.utility/range.subrange/lwg3470.pass.cpp

diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index b151393e0a50d..855a1f43a2a99 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -109,7 +109,7 @@
 `3392 <https://wg21.link/LWG3392>`__,"``ranges::distance()`` cannot be used on a move-only iterator with a sized sentinel","October 2021","","","|ranges|"
 `3407 <https://wg21.link/LWG3407>`__,"Some problems with the wording changes of P1739R4","October 2021","","","|ranges|"
 `3422 <https://wg21.link/LWG3422>`__,"Issues of ``seed_seq``'s constructors","October 2021","|Complete|","14.0"
-`3470 <https://wg21.link/LWG3470>`__,"``convertible-to-non-slicing`` seems to reject valid case","October 2021","","","|ranges|"
+`3470 <https://wg21.link/LWG3470>`__,"``convertible-to-non-slicing`` seems to reject valid case","October 2021","|Complete|","14.0","|ranges|"
 `3480 <https://wg21.link/LWG3480>`__,"``directory_iterator`` and ``recursive_directory_iterator`` are not C++20 ranges","October 2021","|Complete|","14.0","|ranges|"
 `3498 <https://wg21.link/LWG3498>`__,"Inconsistent ``noexcept``-specifiers for ``basic_syncbuf``","October 2021","",""
 `3535 <https://wg21.link/LWG3535>`__,"``join_view::iterator::iterator_category`` and ``::iterator_concept`` lie","October 2021","","","|ranges|"
diff --git a/libcxx/include/__ranges/subrange.h b/libcxx/include/__ranges/subrange.h
index 8e984f2bf06cc..14716d1fb0fff 100644
--- a/libcxx/include/__ranges/subrange.h
+++ b/libcxx/include/__ranges/subrange.h
@@ -39,13 +39,15 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 #if !defined(_LIBCPP_HAS_NO_RANGES)
 
 namespace ranges {
+  template<class _From, class _To>
+  concept __uses_nonqualification_pointer_conversion =
+    is_pointer_v<_From> && is_pointer_v<_To> &&
+    !convertible_to<remove_pointer_t<_From>(*)[], remove_pointer_t<_To>(*)[]>;
+
   template<class _From, class _To>
   concept __convertible_to_non_slicing =
     convertible_to<_From, _To> &&
-    // If they're both pointers, they must have the same element type.
-    !(is_pointer_v<decay_t<_From>> &&
-      is_pointer_v<decay_t<_To>> &&
-      __different_from<remove_pointer_t<decay_t<_From>>, remove_pointer_t<decay_t<_To>>>);
+    !__uses_nonqualification_pointer_conversion<decay_t<_From>, decay_t<_To>>;
 
   template<class _Tp>
   concept __pair_like =
diff --git a/libcxx/test/std/ranges/range.utility/range.subrange/lwg3470.pass.cpp b/libcxx/test/std/ranges/range.utility/range.subrange/lwg3470.pass.cpp
new file mode 100644
index 0000000000000..d174d09d4eb64
--- /dev/null
+++ b/libcxx/test/std/ranges/range.utility/range.subrange/lwg3470.pass.cpp
@@ -0,0 +1,50 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: libcpp-no-concepts
+// UNSUPPORTED: libcpp-has-no-incomplete-ranges
+
+// class std::ranges::subrange;
+//   Test the example from LWG 3470,
+//   qualification conversions in __convertible_to_non_slicing
+
+#include <ranges>
+
+#include <cassert>
+
+#include "test_macros.h"
+
+constexpr bool test()
+{
+  // The example from LWG3470, using implicit conversion.
+  int a[3] = { 1, 2, 3 };
+  int* b[3] = { &a[2], &a[0], &a[1] };
+  std::ranges::subrange<const int* const*> c = b;
+  assert(c.begin() == b + 0);
+  assert(c.end() == b + 3);
+
+  // Also test CTAD and a subrange-to-subrange conversion.
+  std::same_as<std::ranges::subrange<int**>> auto d = std::ranges::subrange(b);
+  assert(d.begin() == b + 0);
+  assert(d.end() == b + 3);
+
+  std::ranges::subrange<const int* const*> e = d;
+  assert(e.begin() == b + 0);
+  assert(e.end() == b + 3);
+
+  return true;
+}
+
+int main(int, char**)
+{
+  test();
+  static_assert(test());
+
+  return 0;
+}

From 16031cbf2bb1662f3b457e537d3e2d84a21adc7c Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer <arthur.j.odwyer@gmail.com>
Date: Sat, 22 Jan 2022 15:29:18 -0500
Subject: [PATCH 816/946] [libc++] Fix LWG3557 "static_cast expression in
 convertible_to has the wrong operand"

https://cplusplus.github.io/LWG/issue3557
I think the code change is unobservable, so we could just close this as
"Nothing To Do" instead; but it seems appropriate to follow the Standard's
wording here as closely as possible.

Differential Revision: https://reviews.llvm.org/D117964
---
 libcxx/docs/Status/Cxx2bIssues.csv         | 2 +-
 libcxx/include/__concepts/convertible_to.h | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index 855a1f43a2a99..f949102e94b19 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -114,7 +114,7 @@
 `3498 <https://wg21.link/LWG3498>`__,"Inconsistent ``noexcept``-specifiers for ``basic_syncbuf``","October 2021","",""
 `3535 <https://wg21.link/LWG3535>`__,"``join_view::iterator::iterator_category`` and ``::iterator_concept`` lie","October 2021","","","|ranges|"
 `3554 <https://wg21.link/LWG3554>`__,"``chrono::parse`` needs ``const charT*`` and ``basic_string_view<charT>`` overloads","October 2021","","","|chrono|"
-`3557 <https://wg21.link/LWG3557>`__,"The ``static_cast`` expression in ``convertible_to`` has the wrong operand","October 2021","",""
+`3557 <https://wg21.link/LWG3557>`__,"The ``static_cast`` expression in ``convertible_to`` has the wrong operand","October 2021","|Complete|","14.0"
 `3559 <https://wg21.link/LWG3559>`__,"Semantic requirements of ``sized_range`` is circular","October 2021","","","|ranges|"
 `3560 <https://wg21.link/LWG3560>`__,"``ranges::equal`` and ``ranges::is_permutation`` should short-circuit for ``sized_ranges``","October 2021","","","|ranges|"
 `3561 <https://wg21.link/LWG3561>`__,"Issue with internal counter in ``discard_block_engine``","October 2021","",""
diff --git a/libcxx/include/__concepts/convertible_to.h b/libcxx/include/__concepts/convertible_to.h
index ec68967106d5f..795b0bd7494ce 100644
--- a/libcxx/include/__concepts/convertible_to.h
+++ b/libcxx/include/__concepts/convertible_to.h
@@ -10,6 +10,7 @@
 #define _LIBCPP___CONCEPTS_CONVERTIBLE_TO_H
 
 #include <__config>
+#include <__utility/declval.h>
 #include <type_traits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -25,8 +26,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 template<class _From, class _To>
 concept convertible_to =
   is_convertible_v<_From, _To> &&
-  requires (add_rvalue_reference_t<_From> (&__f)()) {
-    static_cast<_To>(__f());
+  requires {
+    static_cast<_To>(declval<_From>());
   };
 
 #endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_CONCEPTS)

From c99a585399381dd7d7391d2d93e9a2cbdf59d955 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer <arthur.j.odwyer@gmail.com>
Date: Sat, 22 Jan 2022 18:34:25 -0500
Subject: [PATCH 817/946] [libc++] Make C++03 reference_wrapper more like
 C++11.

Remove a bunch of LIBCPP_CXX03_LANG. This is the result of a
rabbithole to re-eliminate the workaround I introduced into
std::cref in D117953. It turns out that Clang's C++03 mode
(the only compiler we care about C++03 for) now supports all
the things we were originally eschewing via LIBCPP_CXX03_LANG;
we can fully support these reference_wrapper features in
C++03 mode, and un-XFAIL the relevant tests.

Drive-by constexprify a few more tests.

Differential Revision: https://reviews.llvm.org/D117974
---
 .../include/__functional/reference_wrapper.h  | 17 +----
 .../refwrap.const/type_conv_ctor.pass.cpp     | 69 ++++++++-----------
 .../refwrap.const/type_conv_ctor2.pass.cpp    | 68 +++++++++---------
 .../refwrap.const/type_ctor.compile.fail.cpp  |  2 -
 .../refwrap.helpers/ref_1.compile.fail.cpp    |  2 -
 5 files changed, 66 insertions(+), 92 deletions(-)

diff --git a/libcxx/include/__functional/reference_wrapper.h b/libcxx/include/__functional/reference_wrapper.h
index c5de5e5f2e8df..d04d51568beb3 100644
--- a/libcxx/include/__functional/reference_wrapper.h
+++ b/libcxx/include/__functional/reference_wrapper.h
@@ -34,25 +34,16 @@ class _LIBCPP_TEMPLATE_VIS reference_wrapper
 private:
     type* __f_;
 
-#ifndef _LIBCPP_CXX03_LANG
     static void __fun(_Tp&) _NOEXCEPT;
     static void __fun(_Tp&&) = delete;
-#endif
 
 public:
-    // construct/copy/destroy
-#ifdef _LIBCPP_CXX03_LANG
-    _LIBCPP_INLINE_VISIBILITY
-    reference_wrapper(type& __f) _NOEXCEPT
-        : __f_(_VSTD::addressof(__f)) {}
-#else
-    template <class _Up, class = __enable_if_t<!__is_same_uncvref<_Up, reference_wrapper>::value, decltype(__fun(declval<_Up>())) >>
+    template <class _Up, class = __enable_if_t<!__is_same_uncvref<_Up, reference_wrapper>::value, decltype(__fun(declval<_Up>())) > >
     _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
     reference_wrapper(_Up&& __u) _NOEXCEPT_(noexcept(__fun(declval<_Up>()))) {
         type& __f = static_cast<_Up&&>(__u);
         __f_ = _VSTD::addressof(__f);
     }
-#endif
 
     // access
     _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
@@ -210,15 +201,11 @@ inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17
 reference_wrapper<const _Tp>
 cref(reference_wrapper<_Tp> __t) _NOEXCEPT
 {
-    // C++20 says "return __t", but C++03 lacks the relevant
-    // converting constructor. This should be equivalent.
-    return __t.get();
+    return __t;
 }
 
-#ifndef _LIBCPP_CXX03_LANG
 template <class _Tp> void ref(const _Tp&&) = delete;
 template <class _Tp> void cref(const _Tp&&) = delete;
-#endif
 
 _LIBCPP_END_NAMESPACE_STD
 
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_conv_ctor.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_conv_ctor.pass.cpp
index be9261a5c55c2..2759b921fabe0 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_conv_ctor.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_conv_ctor.pass.cpp
@@ -6,8 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03
-
 // <functional>
 //
 // reference_wrapper
@@ -21,65 +19,58 @@
 #include "test_macros.h"
 
 struct convertible_to_int_ref {
-    int val = 0;
-    operator int&() { return val; }
-    operator int const&() const { return val; }
+  int val = 0;
+  operator int&() { return val; }
+  operator int const&() const { return val; }
 };
 
 template <bool IsNothrow>
 struct nothrow_convertible {
-    int val = 0;
-    operator int&() noexcept(IsNothrow) { return val; }
+  int val = 0;
+  operator int&() TEST_NOEXCEPT_COND(IsNothrow) { return val; }
 };
 
 struct convertible_from_int {
-    convertible_from_int(int) {}
+  convertible_from_int(int) {}
 };
 
 void meow(std::reference_wrapper<int>) {}
 void meow(convertible_from_int) {}
 
-int gi;
-std::reference_wrapper<int> purr() { return gi; };
-
-template <class T>
-void
-test(T& t)
-{
-    std::reference_wrapper<T> r(t);
-    assert(&r.get() == &t);
-}
-
-void f() {}
-
 int main(int, char**)
 {
-    convertible_to_int_ref convi;
-    test(convi);
-    convertible_to_int_ref const convic;
-    test(convic);
-
-    {
+  {
+    convertible_to_int_ref t;
+    std::reference_wrapper<convertible_to_int_ref> r(t);
+    assert(&r.get() == &t);
+  }
+  {
+    const convertible_to_int_ref t;
+    std::reference_wrapper<const convertible_to_int_ref> r(t);
+    assert(&r.get() == &t);
+  }
+  {
     using Ref = std::reference_wrapper<int>;
-    static_assert((std::is_nothrow_constructible<Ref, nothrow_convertible<true>>::value), "");
-    static_assert((!std::is_nothrow_constructible<Ref, nothrow_convertible<false>>::value), "");
-    }
-
-    {
+    ASSERT_NOEXCEPT(Ref(nothrow_convertible<true>()));
+    ASSERT_NOT_NOEXCEPT(Ref(nothrow_convertible<false>()));
+  }
+  {
     meow(0);
-    (true) ? purr() : 0;
-    }
-
-#if TEST_STD_VER >= 17
-    {
+  }
+  {
+    extern std::reference_wrapper<int> purr();
+    ASSERT_SAME_TYPE(decltype(true ? purr() : 0), int);
+  }
+#if TEST_STD_VER > 14
+  {
     int i = 0;
     std::reference_wrapper ri(i);
     static_assert((std::is_same<decltype(ri), std::reference_wrapper<int>>::value), "" );
     const int j = 0;
     std::reference_wrapper rj(j);
     static_assert((std::is_same<decltype(rj), std::reference_wrapper<const int>>::value), "" );
-    }
+  }
 #endif
 
-    return 0;
+  return 0;
 }
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_conv_ctor2.pass.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_conv_ctor2.pass.cpp
index 30d7cdb4a38b4..0ef87ade87c9d 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_conv_ctor2.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_conv_ctor2.pass.cpp
@@ -6,8 +6,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: c++03
-
 // <functional>
 //
 // reference_wrapper
@@ -20,46 +18,48 @@
 
 #include "test_macros.h"
 
-struct B {} b;
+struct B {};
 
 struct A1 {
-    operator B& () const { return b; }
+  mutable B b_;
+  TEST_CONSTEXPR operator B&() const { return b_; }
 };
+
 struct A2 {
-    operator B& () const noexcept { return b; }
+  mutable B b_;
+  TEST_CONSTEXPR operator B&() const TEST_NOEXCEPT { return b_; }
 };
 
-int main(int, char**)
-{
-    {
-    std::reference_wrapper<B> b1 = A1();
-    assert(&b1.get() == &b);
-    b1 = A1();
-    assert(&b1.get() == &b);
+void implicitly_convert(std::reference_wrapper<B>) TEST_NOEXCEPT;
 
-    static_assert(std::is_convertible<A1, std::reference_wrapper<B>>::value, "");
-    static_assert(!std::is_nothrow_constructible<std::reference_wrapper<B>, A1>::value, "");
-#if TEST_STD_VER >= 20
-    static_assert(!std::is_nothrow_convertible_v<A1, std::reference_wrapper<B>>);
-#endif
-    static_assert(std::is_assignable<std::reference_wrapper<B>, A1>::value, "");
-    static_assert(!std::is_nothrow_assignable<std::reference_wrapper<B>, A1>::value, "");
-    }
-
-    {
-    std::reference_wrapper<B> b2 = A2();
-    assert(&b2.get() == &b);
-    b2 = A2();
-    assert(&b2.get() == &b);
+TEST_CONSTEXPR_CXX20 bool test()
+{
+  {
+    A1 a;
+    ASSERT_NOT_NOEXCEPT(implicitly_convert(a));
+    std::reference_wrapper<B> b1 = a;
+    assert(&b1.get() == &a.b_);
+    ASSERT_NOT_NOEXCEPT(b1 = a);
+    b1 = a;
+    assert(&b1.get() == &a.b_);
+  }
+  {
+    A2 a;
+    ASSERT_NOEXCEPT(implicitly_convert(a));
+    std::reference_wrapper<B> b2 = a;
+    assert(&b2.get() == &a.b_);
+    ASSERT_NOEXCEPT(b2 = a);
+    b2 = a;
+    assert(&b2.get() == &a.b_);
+  }
+  return true;
+}
 
-    static_assert(std::is_convertible<A2, std::reference_wrapper<B>>::value, "");
-    static_assert(std::is_nothrow_constructible<std::reference_wrapper<B>, A2>::value, "");
-#if TEST_STD_VER >= 20
-    static_assert(std::is_nothrow_convertible_v<A2, std::reference_wrapper<B>>);
+int main(int, char**) {
+  test();
+#if TEST_STD_VER > 17
+  static_assert(test());
 #endif
-    static_assert(std::is_assignable<std::reference_wrapper<B>, A2>::value, "");
-    static_assert(std::is_nothrow_assignable<std::reference_wrapper<B>, A2>::value, "");
-    }
 
-    return 0;
+  return 0;
 }
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_ctor.compile.fail.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_ctor.compile.fail.cpp
index c067dd942a542..b9792105046d5 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_ctor.compile.fail.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.const/type_ctor.compile.fail.cpp
@@ -12,8 +12,6 @@
 
 // reference_wrapper(T&&) = delete;
 
-// XFAIL: c++03
-
 #include <functional>
 #include <cassert>
 
diff --git a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_1.compile.fail.cpp b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_1.compile.fail.cpp
index 9f45b94e828f8..3721b9bd47f87 100644
--- a/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_1.compile.fail.cpp
+++ b/libcxx/test/std/utilities/function.objects/refwrap/refwrap.helpers/ref_1.compile.fail.cpp
@@ -14,8 +14,6 @@
 
 // Don't allow binding to a temp
 
-// XFAIL: c++03
-
 #include <functional>
 
 struct A {};

From 4b3e0d2a7eb7cfc9bc6b8ec97fd42acbf6228614 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer <arthur.j.odwyer@gmail.com>
Date: Fri, 14 Jan 2022 11:44:31 -0500
Subject: [PATCH 818/946] [libc++] Fix LWG3533 "Make `base() const&`
 consistent..."

Fixed in counted_iterator and transform_view::iterator.
The LWG issue also affected elements_view::iterator, but we haven't
implemented that one yet, and whoever does implement it will get
the fix for free if they just follow the working draft's wording.

Drive-by stop calling `.base()` on test iterators in the test,
and improve the transform_view::iterator/sentinel tests.

Differential Revision: https://reviews.llvm.org/D117329
---
 libcxx/docs/Status/Cxx2bIssues.csv            |  2 +-
 libcxx/include/__iterator/counted_iterator.h  |  2 +-
 libcxx/include/__ranges/transform_view.h      |  4 +-
 .../counted.iterator/base.pass.cpp            | 25 +++---
 .../range.transform/end.pass.cpp              | 79 ++++++++++++++-----
 .../range.transform/iterator/base.pass.cpp    | 35 ++++----
 .../range.adaptors/range.transform/types.h    |  4 +-
 7 files changed, 94 insertions(+), 57 deletions(-)

diff --git a/libcxx/docs/Status/Cxx2bIssues.csv b/libcxx/docs/Status/Cxx2bIssues.csv
index f949102e94b19..3dcb641a866bc 100644
--- a/libcxx/docs/Status/Cxx2bIssues.csv
+++ b/libcxx/docs/Status/Cxx2bIssues.csv
@@ -81,7 +81,7 @@
 `3529 <https://wg21.link/LWG3529>`__,"``priority_queue(first, last)`` should construct ``c`` with ``(first, last)``","June 2021","",""
 `3530 <https://wg21.link/LWG3530>`__,"``BUILTIN-PTR-MEOW`` should not opt the type out of syntactic checks","June 2021","",""
 `3532 <https://wg21.link/LWG3532>`__,"``split_view<V, P>::inner-iterator<true>::operator++(int)`` should depend on ``Base``","June 2021","","","|ranges|"
-`3533 <https://wg21.link/LWG3533>`__,"Make ``base() const &`` consistent across iterator wrappers that supports ``input_iterators``","June 2021","","","|ranges|"
+`3533 <https://wg21.link/LWG3533>`__,"Make ``base() const &`` consistent across iterator wrappers that supports ``input_iterators``","June 2021","|Complete|","14.0","|ranges|"
 `3536 <https://wg21.link/LWG3536>`__,"Should ``chrono::from_stream()`` assign zero to duration for failure?","June 2021","","","|chrono|"
 `3539 <https://wg21.link/LWG3539>`__,"``format_to`` must not copy models of ``output_iterator<const charT&>``","June 2021","","","|format|"
 `3540 <https://wg21.link/LWG3540>`__,"§[format.arg] There should be no const in ``basic_format_arg(const T* p)``","June 2021","|Complete|","14.0","|format|"
diff --git a/libcxx/include/__iterator/counted_iterator.h b/libcxx/include/__iterator/counted_iterator.h
index 3ce88fadbe4d7..82d7adcfb02ea 100644
--- a/libcxx/include/__iterator/counted_iterator.h
+++ b/libcxx/include/__iterator/counted_iterator.h
@@ -96,7 +96,7 @@ class counted_iterator
   }
 
   _LIBCPP_HIDE_FROM_ABI
-  constexpr const _Iter& base() const& { return __current_; }
+  constexpr const _Iter& base() const& noexcept { return __current_; }
 
   _LIBCPP_HIDE_FROM_ABI
   constexpr _Iter base() && { return _VSTD::move(__current_); }
diff --git a/libcxx/include/__ranges/transform_view.h b/libcxx/include/__ranges/transform_view.h
index 0f53fbaa7e686..1506e8b2a7fed 100644
--- a/libcxx/include/__ranges/transform_view.h
+++ b/libcxx/include/__ranges/transform_view.h
@@ -195,9 +195,7 @@ class transform_view<_View, _Fn>::__iterator
     : __parent_(__i.__parent_), __current_(_VSTD::move(__i.__current_)) {}
 
   _LIBCPP_HIDE_FROM_ABI
-  constexpr iterator_t<_Base> base() const&
-    requires copyable<iterator_t<_Base>>
-  {
+  constexpr const iterator_t<_Base>& base() const& noexcept {
     return __current_;
   }
 
diff --git a/libcxx/test/std/iterators/predef.iterators/counted.iterator/base.pass.cpp b/libcxx/test/std/iterators/predef.iterators/counted.iterator/base.pass.cpp
index 57afdbfb1bd26..9d11485782c92 100644
--- a/libcxx/test/std/iterators/predef.iterators/counted.iterator/base.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/counted.iterator/base.pass.cpp
@@ -32,17 +32,18 @@ constexpr bool test() {
 
   {
     std::counted_iterator iter(cpp20_input_iterator<int*>{buffer}, 8);
-    assert(iter.base().base() == buffer);
-    assert(std::move(iter).base().base() == buffer);
+    assert(base(iter.base()) == buffer);
+    assert(base(std::move(iter).base()) == buffer);
 
+    ASSERT_NOEXCEPT(iter.base());
     ASSERT_SAME_TYPE(decltype(iter.base()), const cpp20_input_iterator<int*>&);
     ASSERT_SAME_TYPE(decltype(std::move(iter).base()), cpp20_input_iterator<int*>);
   }
 
   {
     std::counted_iterator iter(forward_iterator<int*>{buffer}, 8);
-    assert(iter.base() == forward_iterator<int*>{buffer});
-    assert(std::move(iter).base() == forward_iterator<int*>{buffer});
+    assert(base(iter.base()) == buffer);
+    assert(base(std::move(iter).base()) == buffer);
 
     ASSERT_SAME_TYPE(decltype(iter.base()), const forward_iterator<int*>&);
     ASSERT_SAME_TYPE(decltype(std::move(iter).base()), forward_iterator<int*>);
@@ -50,8 +51,8 @@ constexpr bool test() {
 
   {
     std::counted_iterator iter(contiguous_iterator<int*>{buffer}, 8);
-    assert(iter.base() == contiguous_iterator<int*>{buffer});
-    assert(std::move(iter).base() == contiguous_iterator<int*>{buffer});
+    assert(base(iter.base()) == buffer);
+    assert(base(std::move(iter).base()) == buffer);
 
     ASSERT_SAME_TYPE(decltype(iter.base()), const contiguous_iterator<int*>&);
     ASSERT_SAME_TYPE(decltype(std::move(iter).base()), contiguous_iterator<int*>);
@@ -68,8 +69,8 @@ constexpr bool test() {
 
   {
     const std::counted_iterator iter(cpp20_input_iterator<int*>{buffer}, 8);
-    assert(iter.base().base() == buffer);
-    assert(std::move(iter).base().base() == buffer);
+    assert(base(iter.base()) == buffer);
+    assert(base(std::move(iter).base()) == buffer);
 
     ASSERT_SAME_TYPE(decltype(iter.base()), const cpp20_input_iterator<int*>&);
     ASSERT_SAME_TYPE(decltype(std::move(iter).base()), const cpp20_input_iterator<int*>&);
@@ -77,8 +78,8 @@ constexpr bool test() {
 
   {
     const std::counted_iterator iter(forward_iterator<int*>{buffer}, 7);
-    assert(iter.base() == forward_iterator<int*>{buffer});
-    assert(std::move(iter).base() == forward_iterator<int*>{buffer});
+    assert(base(iter.base()) == buffer);
+    assert(base(std::move(iter).base()) == buffer);
 
     ASSERT_SAME_TYPE(decltype(iter.base()), const forward_iterator<int*>&);
     ASSERT_SAME_TYPE(decltype(std::move(iter).base()), const forward_iterator<int*>&);
@@ -86,8 +87,8 @@ constexpr bool test() {
 
   {
     const std::counted_iterator iter(contiguous_iterator<int*>{buffer}, 6);
-    assert(iter.base() == contiguous_iterator<int*>{buffer});
-    assert(std::move(iter).base() == contiguous_iterator<int*>{buffer});
+    assert(base(iter.base()) == buffer);
+    assert(base(std::move(iter).base()) == buffer);
 
     ASSERT_SAME_TYPE(decltype(iter.base()), const contiguous_iterator<int*>&);
     ASSERT_SAME_TYPE(decltype(std::move(iter).base()), const contiguous_iterator<int*>&);
diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/end.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.transform/end.pass.cpp
index 80b8c92bf251a..23972b79ea498 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.transform/end.pass.cpp
+++ b/libcxx/test/std/ranges/range.adaptors/range.transform/end.pass.cpp
@@ -32,50 +32,87 @@ constexpr bool test() {
     using TransformView = std::ranges::transform_view<ForwardView, PlusOneMutable>;
     static_assert(std::ranges::common_range<TransformView>);
     TransformView tv;
-    auto end = tv.end();
-    ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t<ForwardView>);
-    assert(base(end.base()) == globalBuff + 8);
+    auto it = tv.end();
+    using It = decltype(it);
+    ASSERT_SAME_TYPE(decltype(static_cast<It&>(it).base()), const forward_iterator<int*>&);
+    ASSERT_SAME_TYPE(decltype(static_cast<It&&>(it).base()), forward_iterator<int*>);
+    ASSERT_SAME_TYPE(decltype(static_cast<const It&>(it).base()), const forward_iterator<int*>&);
+    ASSERT_SAME_TYPE(decltype(static_cast<const It&&>(it).base()), const forward_iterator<int*>&);
+    assert(base(it.base()) == globalBuff + 8);
+    assert(base(std::move(it).base()) == globalBuff + 8);
     static_assert(!HasConstQualifiedEnd<TransformView>);
   }
   {
     using TransformView = std::ranges::transform_view<InputView, PlusOneMutable>;
     static_assert(!std::ranges::common_range<TransformView>);
     TransformView tv;
-    auto end = tv.end();
-    ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t<InputView>);
-    assert(base(base(end.base())) == globalBuff + 8);
+    auto sent = tv.end();
+    using Sent = decltype(sent);
+    ASSERT_SAME_TYPE(decltype(static_cast<Sent&>(sent).base()), sentinel_wrapper<cpp20_input_iterator<int*>>);
+    ASSERT_SAME_TYPE(decltype(static_cast<Sent&&>(sent).base()), sentinel_wrapper<cpp20_input_iterator<int*>>);
+    ASSERT_SAME_TYPE(decltype(static_cast<const Sent&>(sent).base()), sentinel_wrapper<cpp20_input_iterator<int*>>);
+    ASSERT_SAME_TYPE(decltype(static_cast<const Sent&&>(sent).base()), sentinel_wrapper<cpp20_input_iterator<int*>>);
+    assert(base(base(sent.base())) == globalBuff + 8);
+    assert(base(base(std::move(sent).base())) == globalBuff + 8);
     static_assert(!HasConstQualifiedEnd<TransformView>);
   }
   {
     using TransformView = std::ranges::transform_view<InputView, PlusOne>;
     static_assert(!std::ranges::common_range<TransformView>);
     TransformView tv;
-    auto end = tv.end();
-    ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t<InputView>);
-    assert(base(base(end.base())) == globalBuff + 8);
-    auto cend = std::as_const(tv).end();
-    ASSERT_SAME_TYPE(decltype(cend.base()), std::ranges::sentinel_t<const InputView>);
-    assert(base(base(cend.base())) == globalBuff + 8);
+    auto sent = tv.end();
+    using Sent = decltype(sent);
+    ASSERT_SAME_TYPE(decltype(static_cast<Sent&>(sent).base()), sentinel_wrapper<cpp20_input_iterator<int*>>);
+    ASSERT_SAME_TYPE(decltype(static_cast<Sent&&>(sent).base()), sentinel_wrapper<cpp20_input_iterator<int*>>);
+    ASSERT_SAME_TYPE(decltype(static_cast<const Sent&>(sent).base()), sentinel_wrapper<cpp20_input_iterator<int*>>);
+    ASSERT_SAME_TYPE(decltype(static_cast<const Sent&&>(sent).base()), sentinel_wrapper<cpp20_input_iterator<int*>>);
+    assert(base(base(sent.base())) == globalBuff + 8);
+    assert(base(base(std::move(sent).base())) == globalBuff + 8);
+
+    auto csent = std::as_const(tv).end();
+    using CSent = decltype(csent);
+    ASSERT_SAME_TYPE(decltype(static_cast<CSent&>(csent).base()), sentinel_wrapper<cpp20_input_iterator<int*>>);
+    ASSERT_SAME_TYPE(decltype(static_cast<CSent&&>(csent).base()), sentinel_wrapper<cpp20_input_iterator<int*>>);
+    ASSERT_SAME_TYPE(decltype(static_cast<const CSent&>(csent).base()), sentinel_wrapper<cpp20_input_iterator<int*>>);
+    ASSERT_SAME_TYPE(decltype(static_cast<const CSent&&>(csent).base()), sentinel_wrapper<cpp20_input_iterator<int*>>);
+    assert(base(base(csent.base())) == globalBuff + 8);
+    assert(base(base(std::move(csent).base())) == globalBuff + 8);
   }
   {
     using TransformView = std::ranges::transform_view<MoveOnlyView, PlusOneMutable>;
     static_assert(std::ranges::common_range<TransformView>);
     TransformView tv;
-    auto end = tv.end();
-    ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t<MoveOnlyView>);
-    assert(end.base() == globalBuff + 8);
+    auto it = tv.end();
+    using It = decltype(it);
+    ASSERT_SAME_TYPE(decltype(static_cast<It&>(it).base()), int* const&);
+    ASSERT_SAME_TYPE(decltype(static_cast<It&&>(it).base()), int*);
+    ASSERT_SAME_TYPE(decltype(static_cast<const It&>(it).base()), int* const&);
+    ASSERT_SAME_TYPE(decltype(static_cast<const It&&>(it).base()), int* const&);
+    assert(base(it.base()) == globalBuff + 8);
+    assert(base(std::move(it).base()) == globalBuff + 8);
     static_assert(!HasConstQualifiedEnd<TransformView>);
   }
   {
     using TransformView = std::ranges::transform_view<MoveOnlyView, PlusOne>;
     static_assert(std::ranges::common_range<TransformView>);
     TransformView tv;
-    auto end = tv.end();
-    ASSERT_SAME_TYPE(decltype(end.base()), std::ranges::sentinel_t<MoveOnlyView>);
-    assert(end.base() == globalBuff + 8);
-    auto cend = std::as_const(tv).end();
-    ASSERT_SAME_TYPE(decltype(cend.base()), std::ranges::sentinel_t<const MoveOnlyView>);
-    assert(cend.base() == globalBuff + 8);
+    auto it = tv.end();
+    using It = decltype(it);
+    ASSERT_SAME_TYPE(decltype(static_cast<It&>(it).base()), int* const&);
+    ASSERT_SAME_TYPE(decltype(static_cast<It&&>(it).base()), int*);
+    ASSERT_SAME_TYPE(decltype(static_cast<const It&>(it).base()), int* const&);
+    ASSERT_SAME_TYPE(decltype(static_cast<const It&&>(it).base()), int* const&);
+    assert(base(it.base()) == globalBuff + 8);
+    assert(base(std::move(it).base()) == globalBuff + 8);
+
+    auto csent = std::as_const(tv).end();
+    using CSent = decltype(csent);
+    ASSERT_SAME_TYPE(decltype(static_cast<CSent&>(csent).base()), int* const&);
+    ASSERT_SAME_TYPE(decltype(static_cast<CSent&&>(csent).base()), int*);
+    ASSERT_SAME_TYPE(decltype(static_cast<const CSent&>(csent).base()), int* const&);
+    ASSERT_SAME_TYPE(decltype(static_cast<const CSent&&>(csent).base()), int* const&);
+    assert(base(base(csent.base())) == globalBuff + 8);
+    assert(base(base(std::move(csent).base())) == globalBuff + 8);
   }
   return true;
 }
diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/base.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/base.pass.cpp
index 26fc953023c81..6d54e9998e15c 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/base.pass.cpp
+++ b/libcxx/test/std/ranges/range.adaptors/range.transform/iterator/base.pass.cpp
@@ -17,31 +17,32 @@
 #include "test_macros.h"
 #include "../types.h"
 
-template<class It>
-concept HasBase = requires(It it) {
-  static_cast<It>(it).base();
-};
-
 constexpr bool test() {
   {
     using TransformView = std::ranges::transform_view<MoveOnlyView, PlusOneMutable>;
     TransformView tv;
-    auto begin = tv.begin();
-    ASSERT_SAME_TYPE(decltype(begin.base()), int*);
-    assert(begin.base() == globalBuff);
-    ASSERT_SAME_TYPE(decltype(std::move(begin).base()), int*);
-    assert(std::move(begin).base() == globalBuff);
+    auto it = tv.begin();
+    using It = decltype(it);
+    ASSERT_SAME_TYPE(decltype(static_cast<It&>(it).base()), int* const&);
+    ASSERT_SAME_TYPE(decltype(static_cast<It&&>(it).base()), int*);
+    ASSERT_SAME_TYPE(decltype(static_cast<const It&>(it).base()), int* const&);
+    ASSERT_SAME_TYPE(decltype(static_cast<const It&&>(it).base()), int* const&);
+    ASSERT_NOEXCEPT(it.base());
+    assert(base(it.base()) == globalBuff);
+    assert(base(std::move(it).base()) == globalBuff);
   }
   {
     using TransformView = std::ranges::transform_view<InputView, PlusOneMutable>;
     TransformView tv;
-    auto begin = tv.begin();
-    static_assert(!HasBase<decltype(begin)&>);
-    static_assert(HasBase<decltype(begin)&&>);
-    static_assert(!HasBase<const decltype(begin)&>);
-    static_assert(!HasBase<const decltype(begin)&&>);
-    std::same_as<cpp20_input_iterator<int *>> auto it = std::move(begin).base();
-    assert(base(it) == globalBuff);
+    auto it = tv.begin();
+    using It = decltype(it);
+    ASSERT_SAME_TYPE(decltype(static_cast<It&>(it).base()), const cpp20_input_iterator<int*>&);
+    ASSERT_SAME_TYPE(decltype(static_cast<It&&>(it).base()), cpp20_input_iterator<int*>);
+    ASSERT_SAME_TYPE(decltype(static_cast<const It&>(it).base()), const cpp20_input_iterator<int*>&);
+    ASSERT_SAME_TYPE(decltype(static_cast<const It&&>(it).base()), const cpp20_input_iterator<int*>&);
+    ASSERT_NOEXCEPT(it.base());
+    assert(base(it.base()) == globalBuff);
+    assert(base(std::move(it).base()) == globalBuff);
   }
   return true;
 }
diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/types.h b/libcxx/test/std/ranges/range.adaptors/range.transform/types.h
index 80ffa6a0a67d0..64794c2262779 100644
--- a/libcxx/test/std/ranges/range.adaptors/range.transform/types.h
+++ b/libcxx/test/std/ranges/range.adaptors/range.transform/types.h
@@ -46,8 +46,8 @@ struct ForwardView : std::ranges::view_base {
   constexpr explicit ForwardView(int* ptr = globalBuff) : ptr_(ptr) {}
   constexpr ForwardView(ForwardView&&) = default;
   constexpr ForwardView& operator=(ForwardView&&) = default;
-  constexpr auto begin() const { return ForwardIter(ptr_); }
-  constexpr auto end() const { return ForwardIter(ptr_ + 8); }
+  constexpr auto begin() const { return forward_iterator<int*>(ptr_); }
+  constexpr auto end() const { return forward_iterator<int*>(ptr_ + 8); }
 };
 static_assert(std::ranges::view<ForwardView>);
 static_assert(std::ranges::forward_range<ForwardView>);

From 0303eb3cf26d3c918638b07c1e4f59f13d6f3def Mon Sep 17 00:00:00 2001
From: Shubham Sandeep Rastogi <srastogi22@apple.com>
Date: Wed, 26 Jan 2022 16:58:49 -0800
Subject: [PATCH 819/946] Revert "Emit swift5 reflection section data in dsym
 bundle generated by dsymutil in the Dwarf section."

This reverts commit 50f50f2582993a079dbcfb8e7ba48920f41e6be0.
---
 llvm/include/llvm/BinaryFormat/Swift.def      |  26 -
 llvm/include/llvm/BinaryFormat/Swift.h        |  24 -
 llvm/include/llvm/DWARFLinker/DWARFStreamer.h |   8 +-
 llvm/include/llvm/MC/MCContext.h              |  10 +-
 llvm/include/llvm/MC/MCObjectFileInfo.h       |  13 -
 llvm/include/llvm/Object/MachO.h              |   4 -
 llvm/include/llvm/Object/ObjectFile.h         |   6 -
 llvm/lib/DWARFLinker/DWARFStreamer.cpp        |  20 +-
 llvm/lib/MC/MCContext.cpp                     |   8 +-
 llvm/lib/MC/MCObjectFileInfo.cpp              |  11 -
 llvm/lib/Object/MachOObjectFile.cpp           |  12 -
 llvm/test/tools/dsymutil/Inputs/main.yaml     | 886 ------------------
 .../dsymutil/Inputs/reflection_metadata.yaml  | 436 ---------
 llvm/test/tools/dsymutil/Inputs/test.yaml     | 254 -----
 .../tools/dsymutil/X86/reflection-dump.test   |  42 -
 llvm/tools/dsymutil/DwarfLinkerForBinary.cpp  |  87 +-
 16 files changed, 11 insertions(+), 1836 deletions(-)
 delete mode 100644 llvm/include/llvm/BinaryFormat/Swift.def
 delete mode 100644 llvm/include/llvm/BinaryFormat/Swift.h
 delete mode 100644 llvm/test/tools/dsymutil/Inputs/main.yaml
 delete mode 100644 llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml
 delete mode 100644 llvm/test/tools/dsymutil/Inputs/test.yaml
 delete mode 100644 llvm/test/tools/dsymutil/X86/reflection-dump.test

diff --git a/llvm/include/llvm/BinaryFormat/Swift.def b/llvm/include/llvm/BinaryFormat/Swift.def
deleted file mode 100644
index 39931bec70e57..0000000000000
--- a/llvm/include/llvm/BinaryFormat/Swift.def
+++ /dev/null
@@ -1,26 +0,0 @@
-//===- llvm/BinaryFormat/Swift.def - Swift definitions ---------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// Macros for running through Swift enumerators.
-//
-//===----------------------------------------------------------------------===//
-
-#if !(defined HANDLE_SWIFT_SECTION)
-#error "Missing macro definition of HANDLE_SWIFT_SECTION"
-#endif
-
-#ifndef HANDLE_SWIFT_SECTION
-#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)
-#endif
-
-HANDLE_SWIFT_SECTION(Fieldmd, "__swift5_fieldmd", "swift5_fieldmd", ".sw5flmd")
-HANDLE_SWIFT_SECTION(Assocty, "__swift5_assocty", "swift5_assocty", ".sw5asty")
-HANDLE_SWIFT_SECTION(Builtin, "__swift5_builtin", "swift5_builtin", ".sw5bltn")
-HANDLE_SWIFT_SECTION(Capture, "__swift5_capture", "swift5_capture", ".sw5cptr")
-HANDLE_SWIFT_SECTION(Typeref, "__swift5_typeref", "swift5_typeref", ".sw5tyrf")
-HANDLE_SWIFT_SECTION(Reflstr, "__swift5_reflstr", "swift5_reflstr", ".sw5rfst")
diff --git a/llvm/include/llvm/BinaryFormat/Swift.h b/llvm/include/llvm/BinaryFormat/Swift.h
deleted file mode 100644
index 63bbfe08c86d4..0000000000000
--- a/llvm/include/llvm/BinaryFormat/Swift.h
+++ /dev/null
@@ -1,24 +0,0 @@
-//===-- llvm/BinaryFormat/Swift.h ---Swift Constants-------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-
-#ifndef LLVM_BINARYFORMAT_SWIFT_H
-#define LLVM_BINARYFORMAT_SWIFT_H
-
-namespace llvm {
-namespace swift {
-
-enum Swift5ReflectionSectionKind {
-#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF) KIND,
-#include "llvm/BinaryFormat/Swift.def"
-#undef HANDLE_SWIFT_SECTION
-  Unknown,
-  Last = Unknown
-};
-} // end of namespace swift
-} // end of namespace llvm
-
-#endif
diff --git a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
index 8e845ee91b9f7..9a5c6bcaf83f3 100644
--- a/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
+++ b/llvm/include/llvm/DWARFLinker/DWARFStreamer.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_DWARFLINKER_DWARFSTREAMER_H
 #define LLVM_DWARFLINKER_DWARFSTREAMER_H
 
-#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/CodeGen/AccelTable.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/DWARFLinker/DWARFLinker.h"
@@ -49,7 +48,7 @@ class DwarfStreamer : public DwarfEmitter {
       : OutFile(OutFile), OutFileType(OutFileType), Translator(Translator),
         ErrorHandler(Error), WarningHandler(Warning) {}
 
-  bool init(Triple TheTriple, StringRef Swift5ReflectionSegmentName);
+  bool init(Triple TheTriple);
 
   /// Dump the file to the disk.
   void finish();
@@ -86,11 +85,6 @@ class DwarfStreamer : public DwarfEmitter {
   /// Emit the swift_ast section stored in \p Buffer.
   void emitSwiftAST(StringRef Buffer);
 
-  /// Emit the swift reflection section stored in \p Buffer.
-  void emitSwiftReflectionSection(
-      llvm::swift::Swift5ReflectionSectionKind ReflSectionKind,
-      StringRef Buffer, uint32_t Alignment, uint32_t Size);
-
   /// Emit debug_ranges for \p FuncRange by translating the
   /// original \p Entries.
   void emitRangesEntries(
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index d2307d6922780..88d86d5b675ac 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -80,10 +80,6 @@ namespace llvm {
   private:
     Environment Env;
 
-    /// The name of the Segment where Swift5 Reflection Section data will be
-    /// outputted
-    StringRef Swift5ReflectionSegmentName;
-
     /// The triple for this object.
     Triple TT;
 
@@ -403,17 +399,13 @@ namespace llvm {
                        const MCRegisterInfo *MRI, const MCSubtargetInfo *MSTI,
                        const SourceMgr *Mgr = nullptr,
                        MCTargetOptions const *TargetOpts = nullptr,
-                       bool DoAutoReset = true,
-                       StringRef Swift5ReflSegmentName = {});
+                       bool DoAutoReset = true);
     MCContext(const MCContext &) = delete;
     MCContext &operator=(const MCContext &) = delete;
     ~MCContext();
 
     Environment getObjectFileType() const { return Env; }
 
-    const StringRef &getSwift5ReflectionSegmentName() const {
-      return Swift5ReflectionSegmentName;
-    }
     const Triple &getTargetTriple() const { return TT; }
     const SourceMgr *getSourceManager() const { return SrcMgr; }
 
diff --git a/llvm/include/llvm/MC/MCObjectFileInfo.h b/llvm/include/llvm/MC/MCObjectFileInfo.h
index 1b4804db783b4..5e0cccaba77fa 100644
--- a/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -15,7 +15,6 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/VersionTuple.h"
@@ -229,10 +228,6 @@ class MCObjectFileInfo {
   MCSection *ReadOnly8Section = nullptr;
   MCSection *ReadOnly16Section = nullptr;
 
-  // Swift5 Reflection Data Sections
-  std::array<MCSection *, swift::Swift5ReflectionSectionKind::Last>
-      Swift5ReflectionSections = {};
-
 public:
   void initMCObjectFileInfo(MCContext &MCCtx, bool PIC,
                             bool LargeCodeModel = false);
@@ -428,14 +423,6 @@ class MCObjectFileInfo {
 
   bool isPositionIndependent() const { return PositionIndependent; }
 
-  // Swift5 Reflection Data Sections
-  MCSection *getSwift5ReflectionSection(
-      llvm::swift::Swift5ReflectionSectionKind ReflSectionKind) {
-    return ReflSectionKind != llvm::swift::Swift5ReflectionSectionKind::Unknown
-               ? Swift5ReflectionSections[ReflSectionKind]
-               : nullptr;
-  }
-
 private:
   bool PositionIndependent = false;
   MCContext *Ctx = nullptr;
diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h
index 09b6454bb0c14..ede742c47f971 100644
--- a/llvm/include/llvm/Object/MachO.h
+++ b/llvm/include/llvm/Object/MachO.h
@@ -22,7 +22,6 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/MachO.h"
-#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/ObjectFile.h"
@@ -584,9 +583,6 @@ class MachOObjectFile : public ObjectFile {
 
   StringRef mapDebugSectionName(StringRef Name) const override;
 
-  llvm::swift::Swift5ReflectionSectionKind
-  mapReflectionSectionNameToEnumValue(StringRef SectionName) const override;
-
   bool hasPageZeroSegment() const { return HasPageZeroSegment; }
 
   static bool classof(const Binary *v) {
diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h
index 29919db772f0e..12704b1fc88e7 100644
--- a/llvm/include/llvm/Object/ObjectFile.h
+++ b/llvm/include/llvm/Object/ObjectFile.h
@@ -18,7 +18,6 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/BinaryFormat/Magic.h"
-#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/SymbolicFile.h"
@@ -291,11 +290,6 @@ class ObjectFile : public SymbolicFile {
   virtual void getRelocationTypeName(DataRefImpl Rel,
                                      SmallVectorImpl<char> &Result) const = 0;
 
-  virtual llvm::swift::Swift5ReflectionSectionKind
-  mapReflectionSectionNameToEnumValue(StringRef SectionName) const {
-    return llvm::swift::Swift5ReflectionSectionKind::Unknown;
-  };
-
   Expected<uint64_t> getSymbolValue(DataRefImpl Symb) const;
 
 public:
diff --git a/llvm/lib/DWARFLinker/DWARFStreamer.cpp b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
index 7f9b9a9bc793c..1ab6ead3b5f66 100644
--- a/llvm/lib/DWARFLinker/DWARFStreamer.cpp
+++ b/llvm/lib/DWARFLinker/DWARFStreamer.cpp
@@ -27,8 +27,7 @@
 
 namespace llvm {
 
-bool DwarfStreamer::init(Triple TheTriple,
-                         StringRef Swift5ReflectionSegmentName) {
+bool DwarfStreamer::init(Triple TheTriple) {
   std::string ErrorStr;
   std::string TripleName;
   StringRef Context = "dwarf streamer init";
@@ -55,9 +54,8 @@ bool DwarfStreamer::init(Triple TheTriple,
   if (!MSTI)
     return error("no subtarget info for target " + TripleName, Context), false;
 
-  MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get(), nullptr,
-                         nullptr, true, Swift5ReflectionSegmentName));
-  MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false, false));
+  MC.reset(new MCContext(TheTriple, MAI.get(), MRI.get(), MSTI.get()));
+  MOFI.reset(TheTarget->createMCObjectFileInfo(*MC, /*PIC=*/false));
   MC->setObjectFileInfo(MOFI.get());
 
   MAB = TheTarget->createMCAsmBackend(*MSTI, *MRI, MCOptions);
@@ -304,18 +302,6 @@ void DwarfStreamer::emitSwiftAST(StringRef Buffer) {
   MS->emitBytes(Buffer);
 }
 
-void DwarfStreamer::emitSwiftReflectionSection(
-    llvm::swift::Swift5ReflectionSectionKind ReflSectionKind, StringRef Buffer,
-    uint32_t Alignment, uint32_t Size) {
-  MCSection *ReflectionSection =
-      MOFI->getSwift5ReflectionSection(ReflSectionKind);
-  if (ReflectionSection == nullptr)
-    return;
-  ReflectionSection->setAlignment(Align(Alignment));
-  MS->SwitchSection(ReflectionSection);
-  MS->emitBytes(Buffer);
-}
-
 /// Emit the debug_range section contents for \p FuncRange by
 /// translating the original \p Entries. The debug_range section
 /// format is totally trivial, consisting just of pairs of address
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index eafcee1e0607b..7f639e9c408fe 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -67,10 +67,10 @@ static void defaultDiagHandler(const SMDiagnostic &SMD, bool, const SourceMgr &,
 MCContext::MCContext(const Triple &TheTriple, const MCAsmInfo *mai,
                      const MCRegisterInfo *mri, const MCSubtargetInfo *msti,
                      const SourceMgr *mgr, MCTargetOptions const *TargetOpts,
-                     bool DoAutoReset, StringRef Swift5ReflSegmentName)
-    : Swift5ReflectionSegmentName(Swift5ReflSegmentName), TT(TheTriple),
-      SrcMgr(mgr), InlineSrcMgr(nullptr), DiagHandler(defaultDiagHandler),
-      MAI(mai), MRI(mri), MSTI(msti), Symbols(Allocator), UsedNames(Allocator),
+                     bool DoAutoReset)
+    : TT(TheTriple), SrcMgr(mgr), InlineSrcMgr(nullptr),
+      DiagHandler(defaultDiagHandler), MAI(mai), MRI(mri), MSTI(msti),
+      Symbols(Allocator), UsedNames(Allocator),
       InlineAsmUsedLabelNames(Allocator),
       CurrentDwarfLoc(0, 0, 0, DWARF2_FLAG_IS_STMT, 0, 0),
       AutoReset(DoAutoReset), TargetOptions(TargetOpts) {
diff --git a/llvm/lib/MC/MCObjectFileInfo.cpp b/llvm/lib/MC/MCObjectFileInfo.cpp
index 77b0b0ee687cb..d7f85f793c55f 100644
--- a/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -299,17 +299,6 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
   RemarksSection = Ctx->getMachOSection(
       "__LLVM", "__remarks", MachO::S_ATTR_DEBUG, SectionKind::getMetadata());
 
-  // The architecture of dsymutil makes it very difficult to copy the Swift
-  // reflection metadata sections into the __TEXT segment, so dsymutil creates
-  // these sections in the __DWARF segment instead.
-  if (!Ctx->getSwift5ReflectionSegmentName().empty()) {
-#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)                           \
-  Swift5ReflectionSections[llvm::swift::Swift5ReflectionSectionKind::KIND] =   \
-      Ctx->getMachOSection(Ctx->getSwift5ReflectionSegmentName().data(),       \
-                           MACHO, 0, SectionKind::getMetadata());
-#include "llvm/BinaryFormat/Swift.def"
-  }
-
   TLSExtraDataSection = TLSTLVSection;
 }
 
diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp
index 83bc74ff31c40..42e257516f4e0 100644
--- a/llvm/lib/Object/MachOObjectFile.cpp
+++ b/llvm/lib/Object/MachOObjectFile.cpp
@@ -20,7 +20,6 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/MachO.h"
-#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/Object/Error.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/ObjectFile.h"
@@ -4766,14 +4765,3 @@ MachOObjectFile::findDsymObjectMembers(StringRef Path) {
                              Path.str().c_str());
   return ObjectPaths;
 }
-
-llvm::swift::Swift5ReflectionSectionKind
-MachOObjectFile::mapReflectionSectionNameToEnumValue(
-    StringRef SectionName) const {
-#define HANDLE_SWIFT_SECTION(KIND, MACHO, ELF, COFF)                           \
-  .Case(MACHO, llvm::swift::Swift5ReflectionSectionKind::KIND)
-  return StringSwitch<llvm::swift::Swift5ReflectionSectionKind>(SectionName)
-#include "llvm/BinaryFormat/Swift.def"
-      .Default(llvm::swift::Swift5ReflectionSectionKind::Unknown);
-#undef HANDLE_SWIFT_SECTION
-}
diff --git a/llvm/test/tools/dsymutil/Inputs/main.yaml b/llvm/test/tools/dsymutil/Inputs/main.yaml
deleted file mode 100644
index d81931d7861c3..0000000000000
--- a/llvm/test/tools/dsymutil/Inputs/main.yaml
+++ /dev/null
@@ -1,886 +0,0 @@
-# How to generate this file:
-# 1. First take a swift file and run xcrun swiftc -g -v test.swift 
-# reflection_metadata.swift, make sure the two swift files are in a short path 
-# like /tmp/
-
-# 2. Now you can see what the driver does, generate the object files in the 
-# tmp directory and link them to create the input binary
-
-# 3. Run obj2yaml on the input binary to create a yaml file and strip out the 
-# swift5 reflection sections from the load commands in the text segment
-
-# 4. I ran delta to reduce this file.
-
---- !mach-o
-FileHeader:
-  magic:           0xFEEDFACF
-  cputype:         0x1000007
-  cpusubtype:      0x3
-  filetype:        0x2
-  ncmds:           18
-  sizeofcmds:      2848
-  flags:           0x200085
-  reserved:        0x0
-LoadCommands:
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         72
-    segname:         __PAGEZERO
-    vmaddr:          0
-    vmsize:          4294967296
-    fileoff:         0
-    filesize:        0
-    maxprot:         0
-    initprot:        0
-    nsects:          0
-    flags:           0
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         952
-    segname:         __TEXT
-    vmaddr:          4294967296
-    vmsize:          16384
-    fileoff:         0
-    filesize:        16384
-    maxprot:         5
-    initprot:        5
-    nsects:          11
-    flags:           0
-    Sections:
-      - sectname:        __text
-        segname:         __TEXT
-        addr:            0x100003EB0
-        size:            336
-        offset:          0x3EB0
-        align:           3
-        reloff:          0x0
-        nreloc:          0
-        flags:           0x0
-        reserved1:       0x0
-        reserved2:       0x0
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         392
-    segname:         __DATA_CONST
-    vmaddr:          4294983680
-    vmsize:          16384
-    fileoff:         16384
-    filesize:        16384
-    maxprot:         3
-    initprot:        3
-    nsects:          4
-    flags:           16
-    Sections:
-      - sectname:        __got
-        segname:         __DATA_CONST
-        addr:            0x100004000
-        size:            48
-        offset:          0x4000
-        align:           3
-        reloff:          0x0
-        nreloc:          0
-        flags:           0x6
-        reserved1:       0x11
-        reserved2:       0x0
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         392
-    segname:         __DATA
-    vmaddr:          4295000064
-    vmsize:          16384
-    fileoff:         32768
-    filesize:        16384
-    maxprot:         3
-    initprot:        3
-    nsects:          4
-    flags:           0
-    Sections:
-      - sectname:        __la_symbol_ptr
-        segname:         __DATA
-        addr:            0x100008000
-        size:            384
-        offset:          0x8088
-        align:           3
-        reloff:          0x0
-        nreloc:          0
-        flags:           0x0
-        reserved1:       0x0
-        reserved2:       0x0
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         72
-    segname:         __LINKEDIT
-    vmaddr:          4295016448
-    vmsize:          32768
-    fileoff:         49152
-    filesize:        23584
-    maxprot:         1
-    initprot:        1
-    nsects:          0
-    flags:           0
-  - cmd:             LC_DYLD_INFO_ONLY
-    cmdsize:         48
-    rebase_off:      49152
-    rebase_size:     64
-    bind_off:        49216
-    bind_size:       216
-    weak_bind_off:   0
-    weak_bind_size:  0
-    lazy_bind_off:   49432
-    lazy_bind_size:  600
-    export_off:      50032
-    export_size:     1000
-  - cmd:             LC_SYMTAB
-    cmdsize:         24
-    symoff:          51136
-    nsyms:           638
-    stroff:          61504
-    strsize:         11232
-  - cmd:             LC_DYSYMTAB
-    cmdsize:         80
-    ilocalsym:       0
-    nlocalsym:       560
-    iextdefsym:      560
-    nextdefsym:      52
-    iundefsym:       612
-    nundefsym:       26
-    tocoff:          0
-    ntoc:            0
-    modtaboff:       0
-    nmodtab:         0
-    extrefsymoff:    0
-    nextrefsyms:     0
-    indirectsymoff:  61344
-    nindirectsyms:   40
-    extreloff:       0
-    nextrel:         0
-    locreloff:       0
-    nlocrel:         0
-  - cmd:             LC_LOAD_DYLINKER
-    cmdsize:         32
-    name:            12
-  - cmd:             LC_UUID
-    cmdsize:         24
-    uuid:            AA0A51FA-8B29-3A7B-85AA-FA6A457B2211
-  - cmd:             LC_BUILD_VERSION
-    cmdsize:         32
-    platform:        1
-    minos:           786432
-    sdk:             786688
-    ntools:          1
-  - cmd:             LC_SOURCE_VERSION
-    cmdsize:         16
-    version:         0
-  - cmd:             LC_MAIN
-    cmdsize:         24
-    entryoff:        9376
-    stacksize:       0
-  - cmd:             LC_LOAD_DYLIB
-    cmdsize:         56
-    dylib:
-      name:            24
-      timestamp:       2
-      current_version: 14942208
-      compatibility_version: 65536
-  - cmd:             LC_LOAD_DYLIB
-    cmdsize:         56
-    dylib:
-      name:            24
-      timestamp:       2
-      current_version: 85917696
-      compatibility_version: 65536
-  - cmd:             LC_LOAD_DYLIB
-    cmdsize:         64
-    dylib:
-      name:            24
-      timestamp:       2
-      current_version: 85196845
-      compatibility_version: 65536
-  - cmd:             LC_FUNCTION_STARTS
-    cmdsize:         16
-    dataoff:         51032
-    datasize:        104
-  - cmd:             LC_DATA_IN_CODE
-    cmdsize:         16
-    dataoff:         51136
-    datasize:        0
-LinkEditData:
-  NameList:
-    - n_strx:          2355
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976208
-    - n_strx:          2398
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976224
-    - n_strx:          2440
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976240
-    - n_strx:          2479
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976256
-    - n_strx:          2509
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294976272
-    - n_strx:          2570
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976320
-    - n_strx:          2590
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294976512
-    - n_strx:          2635
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294976576
-    - n_strx:          2683
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294976608
-    - n_strx:          2731
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976640
-    - n_strx:          2751
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976656
-    - n_strx:          2775
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976704
-    - n_strx:          2791
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294976720
-    - n_strx:          2814
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976752
-    - n_strx:          2838
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976768
-    - n_strx:          2873
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294976784
-    - n_strx:          2906
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294976832
-    - n_strx:          2926
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294977104
-    - n_strx:          2946
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294977200
-    - n_strx:          2966
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977376
-    - n_strx:          3008
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977392
-    - n_strx:          3049
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977408
-    - n_strx:          3087
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977424
-    - n_strx:          3116
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294977440
-    - n_strx:          3176
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977488
-    - n_strx:          3201
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977504
-    - n_strx:          3232
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977552
-    - n_strx:          3270
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977648
-    - n_strx:          3318
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294977664
-    - n_strx:          3364
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294978352
-    - n_strx:          3411
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294978464
-    - n_strx:          3447
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294978688
-    - n_strx:          3506
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294978832
-    - n_strx:          3567
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294978944
-    - n_strx:          3587
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979024
-    - n_strx:          3607
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979056
-    - n_strx:          3627
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979136
-    - n_strx:          3647
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294979232
-    - n_strx:          3666
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979264
-    - n_strx:          3686
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979328
-    - n_strx:          3706
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979536
-    - n_strx:          3726
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979856
-    - n_strx:          3746
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979872
-    - n_strx:          3766
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979888
-    - n_strx:          3786
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979920
-    - n_strx:          3814
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294979936
-    - n_strx:          3842
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294980240
-    - n_strx:          3871
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294980288
-    - n_strx:          3898
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294980320
-    - n_strx:          3927
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294980368
-    - n_strx:          3951
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294980384
-    - n_strx:          3982
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294980448
-    - n_strx:          4001
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294980464
-    - n_strx:          4032
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294980512
-    - n_strx:          4060
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294980800
-    - n_strx:          4088
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294981120
-    - n_strx:          4116
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294981136
-    - n_strx:          4144
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294981152
-    - n_strx:          4172
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294981184
-    - n_strx:          4208
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294981248
-    - n_strx:          4225
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          128
-      n_value:         4294981280
-    - n_strx:          4253
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294981328
-    - n_strx:          4276
-      n_type:          0x1E
-      n_sect:          1
-      n_desc:          0
-      n_value:         4294981376
-    - n_strx:          4294
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          128
-      n_value:         4294981764
-    - n_strx:          4306
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294981824
-    - n_strx:          4322
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294981952
-    - n_strx:          4349
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294981960
-    - n_strx:          4387
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294981968
-    - n_strx:          4423
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294982160
-    - n_strx:          4474
-      n_type:          0xE
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294982352
-    - n_strx:          4503
-      n_type:          0xE
-      n_sect:          5
-      n_desc:          0
-      n_value:         4294982448
-    - n_strx:          4530
-      n_type:          0x1E
-      n_sect:          5
-      n_desc:          128
-      n_value:         4294982464
-    - n_strx:          4558
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982466
-    - n_strx:          4571
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982470
-    - n_strx:          4608
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982476
-    - n_strx:          4639
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982498
-    - n_strx:          4666
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982506
-    - n_strx:          4691
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982510
-    - n_strx:          4727
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982516
-    - n_strx:          4758
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982522
-    - n_strx:          4790
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982528
-    - n_strx:          4820
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982534
-    - n_strx:          4859
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982540
-    - n_strx:          4902
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982554
-    - n_strx:          4945
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         4294982564
-    - n_strx:          4986
-      n_type:          0x1E
-      n_sect:          6
-      n_desc:          128
-      n_value:         0
-    - n_strx:          5987
-      n_type:          0x66
-      n_sect:          3
-      n_desc:          1
-      n_value:         1638431181
-    - n_strx:          7104
-      n_type:          0x66
-      n_sect:          3
-      n_desc:          1
-      n_value:         1638431191
-  StringTable:
-    - ' '
-    - '_$s4main10MyProtocolMp'
-    - '_$s4main10MyProtocolTL'
-    - '_$s4main11ConformanceV5innerSivM'
-    - '_$s4main11ConformanceV5innerSivg'
-    - '_$s4main11ConformanceV5innerSivpMV'
-    - '_$s4main11ConformanceV5innerSivpfi'
-    - '_$s4main11ConformanceV5innerSivs'
-    - '_$s4main11ConformanceVAA10MyProtocolAAMc'
-    - '_$s4main11ConformanceVAA10MyProtocolAAWP'
-    - '_$s4main11ConformanceVMa'
-    - '_$s4main11ConformanceVMn'
-    - '_$s4main11ConformanceVN'
-    - '_$s4main12Conformance2V5innerSivM'
-    - '_$s4main12Conformance2V5innerSivg'
-    - '_$s4main12Conformance2V5innerSivpMV'
-    - '_$s4main12Conformance2V5innerSivpfi'
-    - '_$s4main12Conformance2V5innerSivs'
-    - '_$s4main12Conformance2VAA10MyProtocolAAMc'
-    - '_$s4main12Conformance2VAA10MyProtocolAAWP'
-    - '_$s4main12Conformance2VMa'
-    - '_$s4main12Conformance2VMn'
-    - '_$s4main12Conformance2VN'
-    - '_$s4main13MyGenericEnumOMa'
-    - '_$s4main13MyGenericEnumOMn'
-    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfC'
-    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfCTq'
-    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfc'
-    - '_$s4main14MyGenericClassCMa'
-    - '_$s4main14MyGenericClassCMn'
-    - '_$s4main14MyGenericClassCfD'
-    - '_$s4main14MyGenericClassCfd'
-    - '_$s4main15MyGenericStructVMa'
-    - '_$s4main15MyGenericStructVMn'
-    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlF'
-    - '_$s4main6MyEnumOMa'
-    - '_$s4main6MyEnumOMn'
-    - '_$s4main6MyEnumON'
-    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfC'
-    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfCTq'
-    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfc'
-    - '_$s4main7MyClassCMa'
-    - '_$s4main7MyClassCMm'
-    - '_$s4main7MyClassCMn'
-    - '_$s4main7MyClassCN'
-    - '_$s4main7MyClassCfD'
-    - '_$s4main7MyClassCfd'
-    - '_$s4main8MyStructVMa'
-    - '_$s4main8MyStructVMn'
-    - '_$s4main8MyStructVN'
-    - '_$s5Inner4main10MyProtocolPTl'
-    - __mh_execute_header
-    - _main
-    - '_$sBi64_WV'
-    - '_$sBoWV'
-    - '_$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC'
-    - '_$sSSN'
-    - '_$sSaMa'
-    - '_$ss27_allocateUninitializedArrayySayxG_BptBwlF'
-    - '_$ss5print_9separator10terminatoryypd_S2StF'
-    - '_$sypN'
-    - '_$sytWV'
-    - '_OBJC_CLASS_$__TtCs12_SwiftObject'
-    - '_OBJC_METACLASS_$__TtCs12_SwiftObject'
-    - __objc_empty_cache
-    - _objc_opt_self
-    - _swift_allocObject
-    - _swift_allocateGenericClassMetadata
-    - _swift_allocateGenericValueMetadata
-    - _swift_bridgeObjectRelease
-    - _swift_checkMetadataState
-    - _swift_deallocClassInstance
-    - _swift_deallocObject
-    - _swift_getAssociatedTypeWitness
-    - _swift_getGenericMetadata
-    - _swift_initClassMetadata2
-    - _swift_release
-    - _swift_retain
-    - dyld_stub_binder
-    - '_$s4main12Conformance2V5innerSivM.resume.0'
-    - '_$s4main12Conformance2V5innerACSi_tcfcfA_'
-    - '_$s4main12Conformance2V5innerACSi_tcfC'
-    - '_$s4main12Conformance2VACycfC'
-    - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW'
-    - '_$s4main3AppVAAyyFZ'
-    - '_$ss27_finalizeUninitializedArrayySayxGABnlF'
-    - '_$ss5print_9separator10terminatoryypd_S2StFfA0_'
-    - '_$ss5print_9separator10terminatoryypd_S2StFfA1_'
-    - '_$s4main3AppVACycfC'
-    - '_$s4main3AppV5$mainyyFZ'
-    - '_$s4main3AppVMa'
-    - '_$sSa12_endMutationyyF'
-    - '_$s4main7MyClassC1iSivg'
-    - '_$s4main7MyClassC2msAA0B6StructVvg'
-    - '_$s4main7MyClassC2meAA0B4EnumOvg'
-    - '_$s4main6MyEnumOWOy'
-    - '_$s4main6MyEnumOWOe'
-    - '_$s4main6MyEnumOWOh'
-    - '_$s4main11ConformanceV5innerSivM.resume.0'
-    - '_$s4main11ConformanceV5innerACSi_tcfcfA_'
-    - '_$s4main11ConformanceV5innerACSi_tcfC'
-    - '_$s4main11ConformanceVACycfC'
-    - '_$s4main11ConformanceVAA10MyProtocolA2aDP5inner5InnerQzvgTW'
-    - '_$s4main8MyStructVACycfC'
-    - '_$s4main14MyGenericClassC1txvg'
-    - '_$s4main14MyGenericClassC1i5InnerQzvg'
-    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvg'
-    - '_$s4main14MyGenericClassC3mgeAA0bC4EnumOyxGvg'
-    - '_$s4main13MyGenericEnumOyxGAA0B8ProtocolRzlWOh'
-    - '_$s4main15MyGenericStructVACyxGycfC'
-    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_'
-    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_TA'
-    - '_$s4main6MyEnumOwCP'
-    - '_$s4main6MyEnumOwxx'
-    - '_$s4main6MyEnumOwcp'
-    - '_$s4main6MyEnumOwca'
-    - ___swift_memcpy9_8
-    - '_$s4main6MyEnumOwta'
-    - '_$s4main6MyEnumOwet'
-    - '_$s4main6MyEnumOwst'
-    - '_$s4main6MyEnumOwug'
-    - '_$s4main6MyEnumOwup'
-    - '_$s4main6MyEnumOwui'
-    - '_$s4main14MyGenericClassCMi'
-    - '_$s4main14MyGenericClassCMr'
-    - '_$s4main15MyGenericStructVMi'
-    - '_$s4main13MyGenericEnumOMi'
-    - ___swift_initWithCopy_strong
-    - ___swift_destroy_strong
-    - ___swift_assignWithCopy_strong
-    - ___swift_memcpy8_8
-    - ___swift_assignWithTake_strong
-    - '_$s4main13MyGenericEnumOwet'
-    - '_$s4main13MyGenericEnumOwst'
-    - '_$s4main13MyGenericEnumOwug'
-    - '_$s4main13MyGenericEnumOwup'
-    - '_$s4main13MyGenericEnumOwui'
-    - ___swift_instantiateGenericMetadata
-    - ___chkstk_darwin
-    - ___chkstk_darwin_llvm_probe
-    - ___chkstk_darwin_probe
-    - ____chkstk_darwin
-    - '_$s4mainMXM'
-    - '_$s4main3AppVMn'
-    - '_$s4main7MyClassC1iSivpWvd'
-    - '_$s4main7MyClassC2msAA0B6StructVvpWvd'
-    - '_$s4main7MyClassC2meAA0B4EnumOvpWvd'
-    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvpWvd'
-    - '_$s4main15MyGenericStructVMP'
-    - '_$s4main13MyGenericEnumOMP'
-    - ___swift_reflection_version
-    - _symbolic Si
-    - _symbolic _____ 4main12Conformance2V
-    - '_symbolic $s4main10MyProtocolP'
-    - _symbolic _____ 4main3AppV
-    - _symbolic x
-    - _symbolic B0
-    - _symbolic _____ 4main11ConformanceV
-    - _symbolic _____ 4main7MyClassC
-    - _symbolic _____ 4main8MyStructV
-    - _symbolic _____ 4main6MyEnumO
-    - _symbolic _____ 4main14MyGenericClassC
-    - _symbolic 5Inner_____Qz 4main10MyProtocolP
-    - _symbolic _____yxG 4main15MyGenericStructV
-    - _symbolic _____yxG 4main13MyGenericEnumO
-    - _symbolic _____ 4main15MyGenericStructV
-    - _symbolic _____ 4main13MyGenericEnumO
-    - _symbolic _____yxG 4main14MyGenericClassC
-    - '_$s4main12Conformance2VAA10MyProtocolAAMA'
-    - '_$s4main11ConformanceVAA10MyProtocolAAMA'
-    - '_$s4main12Conformance2VMF'
-    - '_$s4main3AppVMF'
-    - '_$s4main10MyProtocol_pMF'
-    - '_$s4main7MyClassCMF'
-    - '_$s4main11ConformanceVMF'
-    - '_$s4main8MyStructVMF'
-    - '_$s4main6MyEnumOMF'
-    - '_$s4main14MyGenericClassCMF'
-    - '_$s4main15MyGenericStructVMF'
-    - '_$s4main13MyGenericEnumOMF'
-    - '_$s4main6MyEnumOMB'
-    - '_$s4main12Conformance2VMf'
-    - '_$s4main3AppVMf'
-    - '_$s4main3AppVN'
-    - '_$s4main11ConformanceVMf'
-    - '_$s4main8MyStructVMf'
-    - '_$s4main6MyEnumOWV'
-    - '_$s4main6MyEnumOMf'
-    - ___unnamed_23
-    - '_$s4main14MyGenericClassCMP'
-    - '_$s4main13MyGenericEnumOWV'
-    - __METACLASS_DATA__TtC4main7MyClass
-    - __IVARS__TtC4main7MyClass
-    - __DATA__TtC4main7MyClass
-    - __IVARS__TtC4main14MyGenericClass
-    - __dyld_private
-    - '_$s4main7MyClassCMf'
-    - '_$s4main14MyGenericClassCMI'
-    - '_$s4main15MyGenericStructVMI'
-    - '_$s4main13MyGenericEnumOMI'
-    - '/tmp/main-1.swiftmodule'
-    - '/Users/shubham/Development/test76973336/final2objfiletest/'
-    - test.swift
-    - '/tmp/test-1.o'
-    - '_$s4main12Conformance2V5innerSivpfi'
-    - '_$s4main12Conformance2V5innerSivg'
-    - '_$s4main12Conformance2V5innerSivs'
-    - '_$s4main12Conformance2V5innerSivM'
-    - '_$s4main12Conformance2V5innerSivM.resume.0'
-    - '_$s4main12Conformance2V5innerACSi_tcfcfA_'
-    - '_$s4main12Conformance2V5innerACSi_tcfC'
-    - '_$s4main12Conformance2VACycfC'
-    - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW'
-    - '_$s4main3AppVAAyyFZ'
-    - '_$ss27_finalizeUninitializedArrayySayxGABnlF'
-    - '_$ss5print_9separator10terminatoryypd_S2StFfA0_'
-    - '_$ss5print_9separator10terminatoryypd_S2StFfA1_'
-    - '_$s4main3AppVACycfC'
-    - '_$s4main3AppV5$mainyyFZ'
-    - _main
-    - '_$s4main12Conformance2VMa'
-    - '_$s4main3AppVMa'
-    - '_$sSa12_endMutationyyF'
-    - '_$s4main12Conformance2VAA10MyProtocolAAMc'
-    - '_$s4main12Conformance2V5innerSivpMV'
-    - '_$s4mainMXM'
-    - '_$s4main12Conformance2VMn'
-    - '_$s4main3AppVMn'
-    - _symbolic Si
-    - _symbolic _____ 4main12Conformance2V
-    - '_symbolic $s4main10MyProtocolP'
-    - _symbolic _____ 4main3AppV
-    - '_$s4main12Conformance2VAA10MyProtocolAAMA'
-    - '_$s4main12Conformance2VMF'
-    - '_$s4main3AppVMF'
-    - '_$s4main12Conformance2VMf'
-    - '_$s4main12Conformance2VN'
-    - '_$s4main3AppVMf'
-    - '_$s4main3AppVN'
-    - '_$s4main12Conformance2VAA10MyProtocolAAWP'
-    - reflection_metadata.swift
-    - '/tmp/reflection_metadata-1.o'
diff --git a/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml b/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml
deleted file mode 100644
index b2179a23bf28d..0000000000000
--- a/llvm/test/tools/dsymutil/Inputs/reflection_metadata.yaml
+++ /dev/null
@@ -1,436 +0,0 @@
-# How to generate this file:
-# 1. First take a swift file and run xcrun swiftc -g -v file.swift 
-# secondfile.swift, make sure the two swift files are in a short path like /tmp/
-
-# 2. Now you can see what the driver does, generate the object files in the 
-# tmp directory
-
-# 3. Run obj2yaml on object file to create a yaml file
-
-# 4. I ran delta to reduce this file.
-
---- !mach-o
-FileHeader:
-  magic:           0xFEEDFACF
-  cputype:         0x1000007
-  cpusubtype:      0x3
-  filetype:        0x1
-  ncmds:           8
-  sizeofcmds:      2800
-  flags:           0x2000
-  reserved:        0x0
-LoadCommands:
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         2552
-    segname:         ''
-    vmaddr:          0
-    vmsize:          21352
-    fileoff:         2832
-    filesize:        20967
-    maxprot:         7
-    initprot:        7
-    nsects:          31
-    flags:           0
-    Sections:
-      - sectname:        __text
-        segname:         __TEXT
-        addr:            0x0
-        size:            4571
-        offset:          0xB10
-        align:           4
-        reloff:          0x5CF8
-        nreloc:          74
-        flags:           0x80000400
-        reserved1:       0x0
-        reserved2:       0x0
-        relocations:
-          - address:         0x11A1
-            symbolnum:       142
-            pcrel:           true
-            length:          2
-            extern:          true
-            type:            1
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_typeref
-        segname:         __TEXT
-        addr:            0x11DC
-        size:            117
-        offset:          0x1CEC
-        align:           1
-        reloff:          0x5F48
-        nreloc:          22
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         7800423000005369000001FFFFFFFF002473346D61696E31304D7950726F746F636F6C50000001FFFFFFFF0001FFFFFFFF0001FFFFFFFF0001FFFFFFFF0035496E6E657201F9FFFFFF517A0001FFFFFFFF797847000001FFFFFFFF797847000001FFFFFFFF0001FFFFFFFF0001FFFFFFFF79784700
-        relocations:
-          - address:         0x6D
-            symbolnum:       163
-            pcrel:           false
-            length:          2
-            extern:          true
-            type:            0
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_capture
-        segname:         __TEXT
-        addr:            0x1254
-        size:            24
-        offset:          0x1D64
-        align:           2
-        reloff:          0x5FF8
-        nreloc:          6
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         010000000100000002000000F4FFFFFFF0FFFFFFECFFFFFF
-        relocations:
-          - address:         0x14
-            symbolnum:       29
-            pcrel:           false
-            length:          3
-            extern:          true
-            type:            0
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_reflstr
-        segname:         __TEXT
-        addr:            0x17D8
-        size:            37
-        offset:          0x22E8
-        align:           0
-        reloff:          0x0
-        nreloc:          0
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         496E6E65720069006D73006D6500696E6E6572004300490074006D6773006D676500474300
-      - sectname:        __swift5_assocty
-        segname:         __TEXT
-        addr:            0x1800
-        size:            24
-        offset:          0x2310
-        align:           2
-        reloff:          0x6530
-        nreloc:          8
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         00000000FCFFFFFF0100000008000000F0FFFFFFECFFFFFF
-        relocations:
-          - address:         0x14
-            symbolnum:       31
-            pcrel:           false
-            length:          2
-            extern:          true
-            type:            5
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_fieldmd
-        segname:         __TEXT
-        addr:            0x1818
-        size:            260
-        offset:          0x2328
-        align:           2
-        reloff:          0x6570
-        nreloc:          60
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         000000000000000004000C0000000000000000000000000001000C000300000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF00000000D4FFFFFFD0FFFFFF000000000000000000000C000100000002000000ECFFFFFFE8FFFFFF000000000000000000000C0000000000000000000000000003000C000200000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF000000000000000001000C000400000000000000ECFFFFFFE8FFFFFF00000000E0FFFFFFDCFFFFFF00000000D4FFFFFFD0FFFFFF00000000C8FFFFFFC4FFFFFF000000000000000000000C0000000000000000000000000002000C000100000000000000ECFFFFFFE8FFFFFF
-        relocations:
-          - address:         0x100
-            symbolnum:       71
-            pcrel:           false
-            length:          2
-            extern:          true
-            type:            0
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_builtin
-        segname:         __TEXT
-        addr:            0x1AC8
-        size:            20
-        offset:          0x25D8
-        align:           2
-        reloff:          0x67F8
-        nreloc:          2
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         00000000090000000800010010000000FE000000
-        relocations:
-          - address:         0x0
-            symbolnum:       52
-            pcrel:           false
-            length:          2
-            extern:          true
-            type:            5
-            scattered:       false
-            value:           0
-      - sectname:        __bss
-        segname:         __DATA
-        addr:            0x3372
-        size:            2084
-        offset:          0x50B0
-        align:           3
-        reloff:          0x0
-        nreloc:          0
-        flags:           0x6800000B
-        reserved1:       0x0
-        reserved2:       0x0
-        relocations:
-          - address:         0x56
-            symbolnum:       1
-            pcrel:           false
-            length:          3
-            extern:          false
-            type:            0
-            scattered:       false
-            value:           0
-  - cmd:             LC_BUILD_VERSION
-    cmdsize:         24
-    platform:        1
-    minos:           786432
-    sdk:             786688
-    ntools:          0
-  - cmd:             LC_SYMTAB
-    cmdsize:         24
-    symoff:          27888
-    nsyms:           185
-    stroff:          30848
-    strsize:         5056
-  - cmd:             LC_DYSYMTAB
-    cmdsize:         80
-    ilocalsym:       0
-    nlocalsym:       79
-    iextdefsym:      79
-    nextdefsym:      87
-    iundefsym:       166
-    nundefsym:       19
-    tocoff:          0
-    ntoc:            0
-    modtaboff:       0
-    nmodtab:         0
-    extrefsymoff:    0
-    nextrefsyms:     0
-    indirectsymoff:  0
-    nindirectsyms:   0
-    extreloff:       0
-    nextrel:         0
-    locreloff:       0
-    nlocrel:         0
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         40
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x53, 
-                       0x0, 0x0, 0x0, 0x0 ]
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         24
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x43, 
-                       0x6F, 0x72, 0x65, 0x0 ]
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         32
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x5F, 
-                       0x6E, 0x63, 0x79, 0x0 ]
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         24
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x6F, 0x62, 0x6A, 0x63, 0x0, 0x0, 0x0, 
-                       0x0, 0x0, 0x0 ]
-LinkEditData:
-  NameList:
-    - n_strx:          5014
-      n_type:          0xE
-      n_sect:          1
-      n_desc:          0
-      n_value:         5600
-  StringTable:
-    - ''
-    - l_objectdestroy
-    - '_$s4main6MyEnumOWOy'
-    - '_$s4main6MyEnumOwxx'
-    - _symbolic x
-    - '_$s4main6MyEnumOwst'
-    - '_$s4main13MyGenericEnumOwst'
-    - '_$s4main6MyEnumOwet'
-    - '_$s4main13MyGenericEnumOwet'
-    - '_OBJC_CLASS_$__TtCs12_SwiftObject'
-    - '_OBJC_METACLASS_$__TtCs12_SwiftObject'
-    - _swift_deallocObject
-    - _swift_allocObject
-    - '_$s4main11ConformanceV5innerSivs'
-    - _swift_getAssociatedTypeWitness
-    - __IVARS__TtC4main7MyClass
-    - __DATA__TtC4main7MyClass
-    - __METACLASS_DATA__TtC4main7MyClass
-    - __IVARS__TtC4main14MyGenericClass
-    - l_protocols
-    - _objc_classes
-    - l_protocol_conformances
-    - l__swift5_reflection_descriptor
-    - l_coro.devirt.trigger
-    - '_$s4main14MyGenericClassCMr'
-    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfCTq'
-    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfCTq'
-    - '_$s4main6MyEnumOwup'
-    - '_$s4main13MyGenericEnumOwup'
-    - '_$s4main6MyEnumOwcp'
-    - '_$s4main10MyProtocolMp'
-    - ___swift_reflection_version
-    - ____chkstk_darwin
-    - _swift_retain
-    - '_$s4main8MyStructVMn'
-    - '_$s4main15MyGenericStructVMn'
-    - '_$s4main11ConformanceVMn'
-    - '_$s4main6MyEnumOMn'
-    - '_$s4main13MyGenericEnumOMn'
-    - '_$s4main7MyClassCMn'
-    - '_$s4main14MyGenericClassCMn'
-    - '_$s4main7MyClassCMm'
-    - '_$s5Inner4main10MyProtocolPTl'
-    - '_$s4main6MyEnumOwui'
-    - '_$s4main13MyGenericEnumOwui'
-    - '_$s4main11ConformanceV5innerSivpfi'
-    - _symbolic Si
-    - '_$s4main15MyGenericStructVMi'
-    - '_$s4main13MyGenericEnumOMi'
-    - '_$s4main14MyGenericClassCMi'
-    - l_llvm.swift_module_hash
-    - '_$s4main13MyGenericEnumOyxGAA0B8ProtocolRzlWOh'
-    - '_$s4main6MyEnumOWOh'
-    - '_$s4main14MyGenericClassC1i5InnerQzvg'
-    - '_$s4main14MyGenericClassC1txvg'
-    - '_$s4main11ConformanceV5innerSivg'
-    - '_$s4main7MyClassC1iSivg'
-    - '_$s4main7MyClassC2msAA0B6StructVvg'
-    - '_$s4main7MyClassC2meAA0B4EnumOvg'
-    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvg'
-    - '_$s4main14MyGenericClassC3mgeAA0bC4EnumOyxGvg'
-    - '_$s4main6MyEnumOwug'
-    - '_$s4main13MyGenericEnumOwug'
-    - ___swift_initWithCopy_strong
-    - ___swift_assignWithCopy_strong
-    - ___swift_destroy_strong
-    - ___swift_assignWithTake_strong
-    - _objc_opt_self
-    - '_$s4main8MyStructVMf'
-    - '_$s4main11ConformanceVMf'
-    - '_$s4main6MyEnumOMf'
-    - '_$s4main7MyClassCMf'
-    - _swift_checkMetadataState
-    - _swift_release
-    - l_type_metadata_table
-    - __objc_empty_cache
-    - _swift_deallocClassInstance
-    - ___chkstk_darwin_llvm_probe
-    - '_$s4main6MyEnumOWOe'
-    - '_$s4main7MyClassC1iSivpWvd'
-    - '_$s4main7MyClassC2msAA0B6StructVvpWvd'
-    - '_$s4main7MyClassC2meAA0B4EnumOvpWvd'
-    - '_$s4main14MyGenericClassC3mgsAA0bC6StructVyxGvpWvd'
-    - '_$s4main7MyClassCfd'
-    - '_$s4main14MyGenericClassCfd'
-    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfc'
-    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfc'
-    - '_$s4main11ConformanceVAA10MyProtocolAAMc'
-    - '_$s4main6MyEnumOwta'
-    - l_metadata
-    - _swift_allocateGenericClassMetadata
-    - _swift_allocateGenericValueMetadata
-    - _swift_getGenericMetadata
-    - ___swift_instantiateGenericMetadata
-    - '_$s4main6MyEnumOwca'
-    - '_$s4main8MyStructVMa'
-    - '_$s4main15MyGenericStructVMa'
-    - '_$s4main11ConformanceVMa'
-    - '_$s4main6MyEnumOMa'
-    - '_$s4main13MyGenericEnumOMa'
-    - '_$s4main7MyClassCMa'
-    - '_$s4main14MyGenericClassCMa'
-    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_'
-    - '_$s4main11ConformanceV5innerACSi_tcfcfA_'
-    - '_$s4main11ConformanceVAA10MyProtocolA2aDP5inner5InnerQzvgTW'
-    - _symbolic _____ 4main8MyStructV
-    - _symbolic _____ 4main15MyGenericStructV
-    - _symbolic _____yxG 4main15MyGenericStructV
-    - _symbolic _____ 4main11ConformanceV
-    - '_$sytWV'
-    - '_$sBoWV'
-    - '_$sBi64_WV'
-    - '_$s4main6MyEnumOWV'
-    - '_$s4main13MyGenericEnumOWV'
-    - '_$s4main11ConformanceV5innerSivpMV'
-    - '_symbolic $s4main10MyProtocolP'
-    - _symbolic 5Inner_____Qz 4main10MyProtocolP
-    - '_$s4main11ConformanceVAA10MyProtocolAAWP'
-    - '_$s4main15MyGenericStructVMP'
-    - '_$s4main13MyGenericEnumOMP'
-    - '_$s4main14MyGenericClassCMP'
-    - '_$s4main6MyEnumOwCP'
-    - _symbolic _____ 4main6MyEnumO
-    - _symbolic _____ 4main13MyGenericEnumO
-    - _symbolic _____yxG 4main13MyGenericEnumO
-    - '_$s4main8MyStructVN'
-    - '_$s4main11ConformanceVN'
-    - '_$s4main6MyEnumON'
-    - '_$s4main7MyClassCN'
-    - '_$s4main11ConformanceV5innerSivM'
-    - '_$s4mainMXM'
-    - '_$s4main10MyProtocolTL'
-    - '_$s4main15MyGenericStructVMI'
-    - '_$s4main13MyGenericEnumOMI'
-    - '_$s4main14MyGenericClassCMI'
-    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlF'
-    - '_$s4main10MyProtocol_pMF'
-    - '_$s4main8MyStructVMF'
-    - '_$s4main15MyGenericStructVMF'
-    - '_$s4main11ConformanceVMF'
-    - '_$s4main6MyEnumOMF'
-    - '_$s4main13MyGenericEnumOMF'
-    - '_$s4main7MyClassCMF'
-    - '_$s4main14MyGenericClassCMF'
-    - '_$s4main7MyClassCfD'
-    - '_$s4main14MyGenericClassCfD'
-    - _symbolic _____ 4main7MyClassC
-    - _symbolic _____ 4main14MyGenericClassC
-    - _symbolic _____yxG 4main14MyGenericClassC
-    - '_$s4main15MyGenericStructVACyxGycfC'
-    - '_$s4main8MyStructVACycfC'
-    - '_$s4main11ConformanceVACycfC'
-    - '_$s4main11ConformanceV5innerACSi_tcfC'
-    - '_$s4main7MyClassC1i2ms2meACSi_AA0B6StructVAA0B4EnumOtcfC'
-    - '_$s4main14MyGenericClassC1t1i3mgs3mgeACyxGx_5InnerQzAA0bC6StructVyxGAA0bC4EnumOyxGtcfC'
-    - '_$s4main6MyEnumOMB'
-    - '_$s4main16makeSomeClosures1tyycx_tAA10MyProtocolRzlFyycfU_TA'
-    - '_$s4main11ConformanceVAA10MyProtocolAAMA'
-    - l___unnamed_29
-    - l___unnamed_19
-    - ___swift_memcpy9_8
-    - ___swift_memcpy8_8
-    - l___unnamed_28
-    - l___unnamed_18
-    - l___unnamed_27
-    - l___unnamed_17
-    - l___unnamed_26
-    - l___unnamed_16
-    - l___unnamed_25
-    - l___unnamed_15
-    - l___unnamed_4
-    - l___unnamed_24
-    - l___unnamed_14
-    - l___unnamed_3
-    - ___unnamed_23
-    - l___unnamed_13
-    - _swift_initClassMetadata2
-    - l___unnamed_2
-    - l___unnamed_12
-    - l___unnamed_1
-    - l___unnamed_11
-    - _symbolic B0
-    - l___unnamed_30
-    - l___unnamed_10
-    - '_$s4main11ConformanceV5innerSivM.resume.0'
diff --git a/llvm/test/tools/dsymutil/Inputs/test.yaml b/llvm/test/tools/dsymutil/Inputs/test.yaml
deleted file mode 100644
index da3aa9a8aaf34..0000000000000
--- a/llvm/test/tools/dsymutil/Inputs/test.yaml
+++ /dev/null
@@ -1,254 +0,0 @@
-# How to generate this file:
-# 1. First take a swift file and run xcrun swiftc -g -v file.swift 
-# secondfile.swift, make sure the two swift files are in a short path like /tmp/
-
-# 2. Now you can see what the driver does, generate the object files in the 
-# tmp directory
-
-# 3. Run obj2yaml on object file to create a yaml file
-
-# 4. I ran delta to reduce this file.
-
---- !mach-o
-FileHeader:
-  magic:           0xFEEDFACF
-  cputype:         0x1000007
-  cpusubtype:      0x3
-  filetype:        0x1
-  ncmds:           8
-  sizeofcmds:      2240
-  flags:           0x2000
-  reserved:        0x0
-LoadCommands:
-  - cmd:             LC_SEGMENT_64
-    cmdsize:         1992
-    segname:         ''
-    vmaddr:          0
-    vmsize:          6592
-    fileoff:         2272
-    filesize:        6592
-    maxprot:         7
-    initprot:        7
-    nsects:          24
-    flags:           0
-    Sections:
-      - sectname:        __text
-        segname:         __TEXT
-        addr:            0x0
-        size:            593
-        offset:          0x8E0
-        align:           4
-        reloff:          0x22A0
-        nreloc:          24
-        flags:           0x80000400
-        reserved1:       0x0
-        reserved2:       0x0
-        relocations:
-          - address:         0x233
-            symbolnum:       2
-            pcrel:           true
-            length:          2
-            extern:          true
-            type:            4
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_typeref
-        segname:         __TEXT
-        addr:            0x2D6
-        size:            38
-        offset:          0xBB6
-        align:           1
-        reloff:          0x2418
-        nreloc:          4
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         5369000001FFFFFFFF002473346D61696E31304D7950726F746F636F6C50000001FFFFFFFF00
-        relocations:
-          - address:         0x21
-            symbolnum:       46
-            pcrel:           false
-            length:          3
-            extern:          true
-            type:            0
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_reflstr
-        segname:         __TEXT
-        addr:            0x318
-        size:            12
-        offset:          0xBF8
-        align:           0
-        reloff:          0x0
-        nreloc:          0
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         496E6E657200696E6E657200
-      - sectname:        __swift5_assocty
-        segname:         __TEXT
-        addr:            0x324
-        size:            24
-        offset:          0xC04
-        align:           2
-        reloff:          0x2450
-        nreloc:          8
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         00000000FCFFFFFF0100000008000000F0FFFFFFECFFFFFF
-        relocations:
-          - address:         0x14
-            symbolnum:       5
-            pcrel:           false
-            length:          2
-            extern:          true
-            type:            0
-            scattered:       false
-            value:           0
-      - sectname:        __swift5_fieldmd
-        segname:         __TEXT
-        addr:            0x378
-        size:            44
-        offset:          0xC58
-        align:           2
-        reloff:          0x24C0
-        nreloc:          8
-        flags:           0x10000000
-        reserved1:       0x0
-        reserved2:       0x0
-        content:         000000000000000000000C000100000002000000ECFFFFFFE8FFFFFF000000000000000000000C0000000000
-        relocations:
-          - address:         0x1C
-            symbolnum:       12
-            pcrel:           false
-            length:          3
-            extern:          false
-            type:            0
-            scattered:       false
-            value:           0
-  - cmd:             LC_BUILD_VERSION
-    cmdsize:         24
-    platform:        1
-    minos:           786432
-    sdk:             786688
-    ntools:          0
-  - cmd:             LC_SYMTAB
-    cmdsize:         24
-    symoff:          9824
-    nsyms:           57
-    stroff:          10736
-    strsize:         1544
-  - cmd:             LC_DYSYMTAB
-    cmdsize:         80
-    ilocalsym:       0
-    nlocalsym:       16
-    iextdefsym:      16
-    nextdefsym:      31
-    iundefsym:       47
-    nundefsym:       10
-    tocoff:          0
-    ntoc:            0
-    modtaboff:       0
-    nmodtab:         0
-    extrefsymoff:    0
-    nextrefsyms:     0
-    indirectsymoff:  0
-    nindirectsyms:   0
-    extreloff:       0
-    nextrel:         0
-    locreloff:       0
-    nlocrel:         0
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         40
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x53, 
-                       0x0, 0x0, 0x0, 0x0 ]
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         24
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x43, 
-                       0x6F, 0x72, 0x65, 0x0 ]
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         32
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x73, 0x77, 0x69, 0x66, 0x74, 0x5F, 
-                       0x6E, 0x63, 0x79, 0x0 ]
-  - cmd:             LC_LINKER_OPTION
-    cmdsize:         24
-    count:           1
-    PayloadBytes:    [ 0x2D, 0x6C, 0x6F, 0x62, 0x6A, 0x63, 0x0, 0x0, 0x0, 
-                       0x0, 0x0, 0x0 ]
-LinkEditData:
-  NameList:
-    - n_strx:          1494
-      n_type:          0xE
-      n_sect:          9
-      n_desc:          0
-      n_value:         0
-  StringTable:
-    - ''
-    - l_entry_point
-    - '_$s4main12Conformance2V5innerSivs'
-    - l_protocol_conformances
-    - l_coro.devirt.trigger
-    - '_$s4main10MyProtocolMp'
-    - ___swift_reflection_version
-    - _main
-    - '_$s4main3AppVMn'
-    - '_$s4main12Conformance2VMn'
-    - '_$s4main12Conformance2V5innerSivpfi'
-    - _symbolic Si
-    - l_llvm.swift_module_hash
-    - '_$s4main12Conformance2V5innerSivg'
-    - '_$s4main3AppVMf'
-    - '_$s4main12Conformance2VMf'
-    - _swift_bridgeObjectRelease
-    - l_type_metadata_table
-    - '_$s4main12Conformance2VAA10MyProtocolAAMc'
-    - '_$sSaMa'
-    - '_$s4main3AppVMa'
-    - '_$s4main12Conformance2VMa'
-    - '_$s4main12Conformance2V5innerACSi_tcfcfA_'
-    - '_$ss5print_9separator10terminatoryypd_S2StFfA1_'
-    - '_$ss5print_9separator10terminatoryypd_S2StFfA0_'
-    - '_$s4main3AppV5$mainyyFZ'
-    - '_$s4main3AppVAAyyFZ'
-    - '_$s4main12Conformance2VAA10MyProtocolA2aDP5inner5InnerQzvgTW'
-    - _symbolic _____ 4main3AppV
-    - '_$sytWV'
-    - '_$sBi64_WV'
-    - '_$s4main12Conformance2V5innerSivpMV'
-    - _symbolic _____ 4main12Conformance2V
-    - '_symbolic $s4main10MyProtocolP'
-    - '_$s4main12Conformance2VAA10MyProtocolAAWP'
-    - '_$sypN'
-    - '_$s4main3AppVN'
-    - '_$s4main12Conformance2VN'
-    - '_$sSSN'
-    - '_$s4main12Conformance2V5innerSivM'
-    - '_$s4mainMXM'
-    - '_$sSa12_endMutationyyF'
-    - '_$ss5print_9separator10terminatoryypd_S2StF'
-    - '_$ss27_allocateUninitializedArrayySayxG_BptBwlF'
-    - '_$ss27_finalizeUninitializedArrayySayxGABnlF'
-    - '_$s4main3AppVMF'
-    - '_$s4main12Conformance2VMF'
-    - '_$s4main3AppVACycfC'
-    - '_$s4main12Conformance2VACycfC'
-    - '_$s4main12Conformance2V5innerACSi_tcfC'
-    - '_$sSS21_builtinStringLiteral17utf8CodeUnitCount7isASCIISSBp_BwBi1_tcfC'
-    - '_$s4main12Conformance2VAA10MyProtocolAAMA'
-    - l___unnamed_8
-    - l___unnamed_7
-    - l___unnamed_6
-    - l___unnamed_5
-    - l___unnamed_4
-    - '_$s4main12Conformance2V5innerSivM.resume.0'
-    - ''
-    - ''
-    - ''
-    - ''
-    - ''
-    - ''
-    - ''
diff --git a/llvm/test/tools/dsymutil/X86/reflection-dump.test b/llvm/test/tools/dsymutil/X86/reflection-dump.test
deleted file mode 100644
index 55f300c8bb9aa..0000000000000
--- a/llvm/test/tools/dsymutil/X86/reflection-dump.test
+++ /dev/null
@@ -1,42 +0,0 @@
-RUN: rm -rf %t.dir && mkdir -p %t.dir/tmp
-RUN: cp %p/../Inputs/main.yaml %t.dir
-RUN: cp %p/../Inputs/test.yaml %t.dir
-RUN: cp %p/../Inputs/reflection_metadata.yaml %t.dir
-RUN: yaml2obj %p/../Inputs/main.yaml -o %t.dir/main
-RUN: yaml2obj %p/../Inputs/test.yaml -o %t.dir/tmp/test-1.o
-RUN: yaml2obj %p/../Inputs/reflection_metadata.yaml -o %t.dir/tmp/reflection_metadata-1.o
-
-RUN: dsymutil -oso-prepend-path=%t.dir %t.dir/main -o %t.dir/main.dSYM
-RUN: llvm-objdump -s %t.dir/main.dSYM/Contents/Resources/DWARF/main | FileCheck %s
-
-CHECK: Contents of section __DWARF,__swift5_typeref:
-CHECK-NEXT:  10000e000 53690000 01ffffff ff002473 346d6169  Si........$s4mai
-CHECK-NEXT:  10000e010 6e31304d 7950726f 746f636f 6c500000  n10MyProtocolP..
-CHECK-NEXT:  10000e020 01ffffff ff007800 42300000 53690000  ......x.B0..Si..
-CHECK-NEXT:  10000e030 01ffffff ff002473 346d6169 6e31304d  ......$s4main10M
-CHECK-NEXT:  10000e040 7950726f 746f636f 6c500000 01ffffff  yProtocolP......
-
-CHECK: Contents of section __DWARF,__swift5_reflstr:
-CHECK-NEXT:  10000e09b 496e6e65 7200696e 6e657200 496e6e65  Inner.inner.Inne
-CHECK-NEXT:  10000e0ab 72006900 6d73006d 6500696e 6e657200  r.i.ms.me.inner.
-CHECK-NEXT:  10000e0bb 43004900 74006d67 73006d67 65004743  C.I.t.mgs.mge.GC
-CHECK-NEXT:  10000e0cb 00
-
-CHECK: Contents of section __DWARF,__swift5_assocty:
-CHECK-NEXT:  10000e0cc 00000000 fcffffff 01000000 08000000  ................
-CHECK-NEXT:  10000e0dc f0ffffff ecffffff 00000000 fcffffff  ................
-CHECK-NEXT:  10000e0ec 01000000 08000000 f0ffffff ecffffff  ................
-
-CHECK: Contents of section __DWARF,__swift5_fieldmd:
-CHECK-NEXT:  10000e0fc 00000000 00000000 00000c00 01000000  ................
-CHECK-NEXT:  10000e10c 02000000 ecffffff e8ffffff 00000000  ................
-CHECK-NEXT:  10000e11c 00000000 00000c00 00000000 00000000  ................
-CHECK-NEXT:  10000e12c 00000000 04000c00 00000000 00000000  ................
-
-CHECK: Contents of section __DWARF,__swift5_capture:
-CHECK-NEXT:  10000e22c 01000000 01000000 02000000 f4ffffff  ................
-CHECK-NEXT:	 10000e23c f0ffffff ecffffff                    ........
-
-CHECK: Contents of section __DWARF,__swift5_builtin:
-CHECK-NEXT:  10000e244 00000000 09000000 08000100 10000000  ................
-CHECK-NEXT:  10000e254 fe000000                             ....
diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
index 37c84691eb5a0..a8dfde0865377 100644
--- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
+++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp
@@ -30,7 +30,6 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/Dwarf.h"
 #include "llvm/BinaryFormat/MachO.h"
-#include "llvm/BinaryFormat/Swift.h"
 #include "llvm/CodeGen/AccelTable.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DIE.h"
@@ -175,7 +174,7 @@ bool DwarfLinkerForBinary::createStreamer(const Triple &TheTriple,
       [&](const Twine &Warning, StringRef Context, const DWARFDie *) {
         warn(Warning, Context);
       });
-  return Streamer->init(TheTriple, "__DWARF");
+  return Streamer->init(TheTriple);
 }
 
 ErrorOr<const object::ObjectFile &>
@@ -296,77 +295,6 @@ DwarfLinkerForBinary::loadObject(const DebugMapObject &Obj,
   return ErrorOrObj.getError();
 }
 
-static bool binaryHasSwiftReflectionSections(const DebugMap &Map,
-                                             const LinkOptions &Options,
-                                             BinaryHolder &BinHolder) {
-  // If the input binary has swift5 reflection sections, there is no need to
-  // copy them to the .dSYM. Only copy them for binaries where the linker
-  // omitted the reflection metadata.
-  if (!Map.getBinaryPath().empty() &&
-      Options.FileType == OutputFileType::Object) {
-
-    auto ObjectEntry = BinHolder.getObjectEntry(Map.getBinaryPath());
-    // If ObjectEntry or Object has an error, no binary exists, therefore no
-    // reflection sections exist.
-    if (!ObjectEntry) {
-      // Any errors will be diagnosed later in the main loop, ignore them here.
-      llvm::consumeError(ObjectEntry.takeError());
-      return false;
-    }
-
-    auto Object =
-        ObjectEntry->getObjectAs<object::MachOObjectFile>(Map.getTriple());
-    if (!Object) {
-      // Any errors will be diagnosed later in the main loop, ignore them here.
-      llvm::consumeError(Object.takeError());
-      return false;
-    }
-
-    for (auto &Section : Object->sections()) {
-      llvm::Expected<llvm::StringRef> NameOrErr =
-          Object->getSectionName(Section.getRawDataRefImpl());
-      if (!NameOrErr) {
-        llvm::consumeError(NameOrErr.takeError());
-        continue;
-      }
-      NameOrErr->consume_back("__TEXT");
-      if (Object->mapReflectionSectionNameToEnumValue(*NameOrErr) !=
-          llvm::swift::Swift5ReflectionSectionKind::Unknown) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-static void
-copySwiftReflectionMetadata(const llvm::dsymutil::DebugMapObject *Obj,
-                            DwarfStreamer *Streamer) {
-  auto OF =
-      llvm::object::ObjectFile::createObjectFile(Obj->getObjectFilename());
-  if (!OF) {
-    llvm::consumeError(OF.takeError());
-    return;
-  } else if (auto *MO =
-                 dyn_cast<llvm::object::MachOObjectFile>(OF->getBinary())) {
-    for (auto &Section : OF->getBinary()->sections()) {
-      llvm::Expected<llvm::StringRef> NameOrErr =
-          MO->getSectionName(Section.getRawDataRefImpl());
-      if (!NameOrErr) {
-        llvm::consumeError(NameOrErr.takeError());
-        continue;
-      }
-      llvm::Expected<llvm::StringRef> SectionContents = Section.getContents();
-      if (SectionContents) {
-        NameOrErr->consume_back("__TEXT");
-        Streamer->emitSwiftReflectionSection(
-            MO->mapReflectionSectionNameToEnumValue(*NameOrErr),
-            *SectionContents, Section.getAlignment(), Section.getSize());
-      }
-    }
-  }
-}
-
 bool DwarfLinkerForBinary::link(const DebugMap &Map) {
   if (!createStreamer(Map.getTriple(), OutFile))
     return false;
@@ -461,20 +389,8 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) {
         llvm_unreachable("Unhandled DebugMap object");
       });
   GeneralLinker.setSwiftInterfacesMap(&ParseableSwiftInterfaces);
-  bool ReflectionSectionsPresentInBinary = false;
-  // If there is no output specified, no point in checking the binary for swift5
-  // reflection sections.
-  if (!Options.NoOutput) {
-    ReflectionSectionsPresentInBinary =
-        binaryHasSwiftReflectionSections(Map, Options, BinHolder);
-  }
 
   for (const auto &Obj : Map.objects()) {
-    // If there is no output specified or the reflection sections are present in
-    // the Input binary, there is no need to copy the Swift Reflection Metadata
-    if (!Options.NoOutput && !ReflectionSectionsPresentInBinary)
-      copySwiftReflectionMetadata(Obj.get(), Streamer.get());
-
     // N_AST objects (swiftmodule files) should get dumped directly into the
     // appropriate DWARF section.
     if (Obj->getType() == MachO::N_AST) {
@@ -515,6 +431,7 @@ bool DwarfLinkerForBinary::link(const DebugMap &Map) {
 
       continue;
     }
+
     if (auto ErrorOrObj = loadObject(*Obj, Map, RL))
       GeneralLinker.addObjectFile(*ErrorOrObj);
     else {

From 630e6fa912a3ef9b84b28eef8b4c4572743d6dbf Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH@ibm.com>
Date: Wed, 26 Jan 2022 20:00:25 -0500
Subject: [PATCH 820/946] [NFC] Empty test commit, check commit access


From 11fb049954bab13822e61df694fea5242a9fcf93 Mon Sep 17 00:00:00 2001
From: Aart Bik <ajcbik@google.com>
Date: Wed, 26 Jan 2022 16:05:31 -0800
Subject: [PATCH 821/946] [mlir][amx] add a full tile matrix mult integral
 example to integration tests

Reviewed By: dcaballe

Differential Revision: https://reviews.llvm.org/D118292
---
 .../Vector/CPU/AMX/test-mulf-full.mlir        |  23 ++-
 .../Vector/CPU/AMX/test-muli-full.mlir        | 137 ++++++++++++++++++
 2 files changed, 147 insertions(+), 13 deletions(-)
 create mode 100644 mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir

diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir
index bc1d6333bd721..37c68416e544f 100644
--- a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir
+++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-mulf-full.mlir
@@ -1,6 +1,9 @@
-// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std -convert-vector-to-llvm="enable-amx" -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std \
+// RUN:  -tensor-constant-bufferize -convert-vector-to-llvm="enable-amx" \
+// RUN:  -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
 // RUN: mlir-translate -mlir-to-llvmir | \
-// RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" \
+// RUN:  --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
 // Note: To run this test, your CPU must support AMX.
@@ -25,9 +28,7 @@ func @entry() -> i32 {
   %c16 = arith.constant 16: index
   %c32 = arith.constant 32: index
 
-  // Setup simple test data. Note that bf16 does not seem to work well with the
-  // tensor type yet, which is why we use vectors and transfer the data into memref.
-  // TODO: use tensors and bufferization.to_memref instead
+  // Setup simple test data.
   %0 = arith.constant dense<[
      [ 1.1, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
@@ -61,7 +62,7 @@ func @entry() -> i32 {
        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
      [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.1,
        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
-  ]> : vector<16x32xbf16>
+  ]> : tensor<16x32xbf16>
   %1 = arith.constant dense<[
      [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.1, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0,
        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
@@ -95,14 +96,12 @@ func @entry() -> i32 {
        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ],
      [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.1, 1.0, 1.0, 1.0, 1.0, 1.0,
        1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
-  ]> : vector<16x32xbf16>
+  ]> : tensor<16x32xbf16>
 
   // Set up memory.
-  %a = memref.alloc() : memref<16x32xbf16>
-  %b = memref.alloc() : memref<16x32xbf16>
+  %a = bufferization.to_memref %0 : memref<16x32xbf16>
+  %b = bufferization.to_memref %1 : memref<16x32xbf16>
   %c = memref.alloc() : memref<16x16xf32>
-  vector.transfer_write %0, %a[%c0, %c0] : vector<16x32xbf16>, memref<16x32xbf16>
-  vector.transfer_write %1, %b[%c0, %c0] : vector<16x32xbf16>, memref<16x32xbf16>
 
   // Call kernel.
   call @kernel(%a, %b, %c) : (memref<16x32xbf16>, memref<16x32xbf16>, memref<16x16xf32>) -> ()
@@ -133,8 +132,6 @@ func @entry() -> i32 {
   }
 
   // Release resources.
-  memref.dealloc %a : memref<16x32xbf16>
-  memref.dealloc %b : memref<16x32xbf16>
   memref.dealloc %c : memref<16x16xf32>
   %i0 = arith.constant 0 : i32
   return %i0 : i32
diff --git a/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir
new file mode 100644
index 0000000000000..241e08f6208b4
--- /dev/null
+++ b/mlir/test/Integration/Dialect/Vector/CPU/AMX/test-muli-full.mlir
@@ -0,0 +1,137 @@
+// RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std \
+// RUN:  -tensor-constant-bufferize -convert-vector-to-llvm="enable-amx" \
+// RUN:  -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts | \
+// RUN: mlir-translate -mlir-to-llvmir | \
+// RUN: %lli --entry-function=entry --mattr="+amx-tile,+amx-int8,+amx-bf16" \
+// RUN:  --dlopen=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
+// RUN: FileCheck %s
+
+// Note: To run this test, your CPU must support AMX.
+
+// Multiply full size tiles into zero destination.
+func @kernel(%arg0: memref<16x64xi8>,
+             %arg1: memref<16x64xi8>,
+             %arg2: memref<16x16xi32>) {
+  %0 = arith.constant 0 : index
+  %1 = amx.tile_load %arg0[%0, %0] : memref<16x64xi8>  into vector<16x64xi8>
+  %2 = amx.tile_load %arg1[%0, %0] : memref<16x64xi8>  into vector<16x64xi8>
+  %3 = amx.tile_zero : vector<16x16xi32>
+  %4 = amx.tile_muli %1 zext, %2 zext, %3 : vector<16x64xi8>, vector<16x64xi8>, vector<16x16xi32>
+  amx.tile_store %arg2[%0, %0], %4 : memref<16x16xi32>, vector<16x16xi32>
+  return
+}
+
+func @entry() -> i32 {
+  %iu = arith.constant -1: i32
+  %c0 = arith.constant 0: index
+  %c1 = arith.constant 1: index
+  %c16 = arith.constant 16: index
+
+  // Setup simple test data.
+  %0 = arith.constant dense<[
+    [ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ]
+  ]> : tensor<16x64xi8>
+  %1 = arith.constant dense<[
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ],
+    [ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 ]
+  ]> : tensor<16x64xi8>
+
+  // Set up memory.
+  %a = bufferization.to_memref %0 : memref<16x64xi8>
+  %b = bufferization.to_memref %1 : memref<16x64xi8>
+  %c = memref.alloc() : memref<16x16xi32>
+
+  // Call kernel.
+  call @kernel(%a, %b, %c) : (memref<16x64xi8>, memref<16x64xi8>, memref<16x16xi32>) -> ()
+
+  //
+  // Print and verify the 16x16 result.
+  //
+  // CHECK:      ( 69, 69, 69, 69, 65, 65, 65, 65, 89, 89, 89, 89, 89, 89, 89, 75 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 71, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 71, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 72, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 71, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 71, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 72, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 71, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 71, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 72, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 71, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 71, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 72, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 71, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 75, 75, 75, 75, 71, 71, 71, 71, 95, 95, 95, 95, 95, 95, 95, 81 )
+  // CHECK-NEXT: ( 69, 69, 69, 69, 65, 65, 65, 65, 89, 89, 89, 89, 89, 89, 89, 75 )
+  //
+  scf.for %i = %c0 to %c16 step %c1 {
+    %v = vector.transfer_read %c[%i, %c0], %iu: memref<16x16xi32>, vector<16xi32>
+    vector.print %v : vector<16xi32>
+  }
+
+  // Release resources.
+  memref.dealloc %c : memref<16x16xi32>
+  %i0 = arith.constant 0 : i32
+  return %i0 : i32
+}

From 5c238be04bf64484712e9a99c4eaf796cb4a2daa Mon Sep 17 00:00:00 2001
From: Amir Ayupov <amir.aupov@gmail.com>
Date: Wed, 26 Jan 2022 17:08:13 -0800
Subject: [PATCH 822/946] [BOLT][TEST] Adjust tests for
 BOLT_CLANG_EXE=clang-{6..9}

Fix tests to pass with clang-6..9 on Ubuntu 20.04.

Reviewed By: yota9

Differential Revision: https://reviews.llvm.org/D118282
---
 bolt/test/X86/asm-func-debug.test           | 2 +-
 bolt/test/X86/symtab-secondary-entries.test | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/bolt/test/X86/asm-func-debug.test b/bolt/test/X86/asm-func-debug.test
index 9590701969712..99a61a3337629 100644
--- a/bolt/test/X86/asm-func-debug.test
+++ b/bolt/test/X86/asm-func-debug.test
@@ -14,7 +14,7 @@ CHECK-NEXT:       DW_AT_low_pc (0x0000000000000000)
 CHECK-NEXT:       DW_AT_ranges
 CHECK-NEXT:          [0x0000000000[[#%x,ADDR:]],
 CHECK-SAME:                              0x0000000000[[#ADDR+1]]))
-CHECK-NEXT:       DW_AT_name ("{{.*}}/asm_foo.s")
+CHECK-NEXT:       DW_AT_name ("{{.*}}asm_foo.s")
 
 # Check .debug_aranges was updated for asm module
 CHECK: .debug_aranges contents:
diff --git a/bolt/test/X86/symtab-secondary-entries.test b/bolt/test/X86/symtab-secondary-entries.test
index bdf1bff10aa4b..4e4ea85eb1566 100644
--- a/bolt/test/X86/symtab-secondary-entries.test
+++ b/bolt/test/X86/symtab-secondary-entries.test
@@ -1,6 +1,8 @@
 # Check that secondary entry points are updated correctly in the ELF symtab
 
-RUN: %clang %p/Inputs/user-order.S -Wl,-q -o %t.exe -nostdlib
+RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
+RUN:   %p/Inputs/user-order.S -o %t.o
+RUN: ld.lld %t.o --emit-relocs --nostdlib -o %t.exe
 RUN: llvm-bolt %t.exe -o %t -reorder-functions=user \
 RUN:   -function-order=%p/Inputs/order.txt
 

From eee97f1617c94b88e9f0a964c47dc6003173f818 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Wed, 26 Jan 2022 17:54:37 -0800
Subject: [PATCH 823/946] [clang] Use proper type to left shift after D117262

Causing warnings like
warning C4334: '<<': result of 32-bit shift implicitly converted to 64 bits
as reported in D117262.
---
 clang/lib/CodeGen/Address.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/Address.h b/clang/lib/CodeGen/Address.h
index 6a22e567d399a..3ac0f4f0d7e56 100644
--- a/clang/lib/CodeGen/Address.h
+++ b/clang/lib/CodeGen/Address.h
@@ -66,7 +66,7 @@ template <typename T> class AddressImpl<T, true> {
   llvm::Type *getElementType() const { return ElementType.getPointer(); }
   CharUnits getAlignment() const {
     unsigned AlignLog = (Pointer.getInt() << 3) | ElementType.getInt();
-    return CharUnits::fromQuantity(1UL << AlignLog);
+    return CharUnits::fromQuantity(CharUnits::QuantityType(1) << AlignLog);
   }
 };
 

From 9c6272861032f511a23784ce0c5cc8f6ac2f625b Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Wed, 26 Jan 2022 18:00:43 -0800
Subject: [PATCH 824/946] Default to DWARFv4 on Windows

---
 clang/lib/Driver/ToolChains/MSVC.h | 4 ++++
 clang/test/CodeGen/dwarf-version.c | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/MSVC.h b/clang/lib/Driver/ToolChains/MSVC.h
index 8f033de09bf64..c842773996eda 100644
--- a/clang/lib/Driver/ToolChains/MSVC.h
+++ b/clang/lib/Driver/ToolChains/MSVC.h
@@ -69,6 +69,10 @@ class LLVM_LIBRARY_VISIBILITY MSVCToolChain : public ToolChain {
     return llvm::DebuggerKind::Default;
   }
 
+  unsigned GetDefaultDwarfVersion() const override {
+    return 4;
+  }
+
   enum class SubDirectoryType {
     Bin,
     Include,
diff --git a/clang/test/CodeGen/dwarf-version.c b/clang/test/CodeGen/dwarf-version.c
index b329556ae0d9d..47025e241d13c 100644
--- a/clang/test/CodeGen/dwarf-version.c
+++ b/clang/test/CodeGen/dwarf-version.c
@@ -28,10 +28,10 @@
 // RUN:     | FileCheck %s --check-prefixes=NODWARF,CODEVIEW
 //     Explicitly request DWARF.
 // RUN: %clang -target i686-pc-windows-msvc -gdwarf -S -emit-llvm -o - %s \
-// RUN:     | FileCheck %s --check-prefixes=VER5,NOCODEVIEW
+// RUN:     | FileCheck %s --check-prefixes=VER4,NOCODEVIEW
 //     Explicitly request both.
 // RUN: %clang -target i686-pc-windows-msvc -gdwarf -gcodeview -S -emit-llvm -o - %s \
-// RUN:     | FileCheck %s --check-prefixes=VER5,CODEVIEW
+// RUN:     | FileCheck %s --check-prefixes=VER4,CODEVIEW
 // RUN: %clang -target powerpc-ibm-aix-xcoff -g -S -emit-llvm -o - %s | \
 // RUN:   FileCheck %s --check-prefix=VER3
 // RUN: %clang -target powerpc-ibm-aix-xcoff -gdwarf-2 -S -emit-llvm -o - %s | \

From 1510668fb007c448cefb2190bf0558c63cdd5382 Mon Sep 17 00:00:00 2001
From: Roland McGrath <mcgrathr@google.com>
Date: Wed, 26 Jan 2022 16:39:36 -0800
Subject: [PATCH 825/946] [asan] Add missing #include of sanitizer_platform.h

The "asan/asan_mapping.h" header relies on sanitizer_platform.h
macros, but doesn't directly include the header.  All the existing
uses until recently happened to be in places where some other header
had indirectly included sanitizer_platform.h first.  The addition of
asan_rtl_x86_64.S was the first place to use "asan/asan_mapping.h"
alone.  It so happens that its uses of the macros make having no
macros defined equivalent to SANITIZER_LINUX, so this did not affect
Linux builds.  But the assembly constants in asan_rtl_x86_64.S were
wrong for Fuchsia when SANITIZER_FUCHSIA was not properly defined.

Reviewed By: vitalybuka

Differential Revision: https://reviews.llvm.org/D118296
---
 compiler-rt/lib/asan/asan_mapping.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/compiler-rt/lib/asan/asan_mapping.h b/compiler-rt/lib/asan/asan_mapping.h
index 6ca6ee00e5c98..4ff09b103d5f2 100644
--- a/compiler-rt/lib/asan/asan_mapping.h
+++ b/compiler-rt/lib/asan/asan_mapping.h
@@ -13,6 +13,8 @@
 #ifndef ASAN_MAPPING_H
 #define ASAN_MAPPING_H
 
+#include "sanitizer_common/sanitizer_platform.h"
+
 // The full explanation of the memory mapping could be found here:
 // https://github.com/google/sanitizers/wiki/AddressSanitizerAlgorithm
 //

From f85ee6d56a9371ccb8b3a63b6f9ed38ba8d071b7 Mon Sep 17 00:00:00 2001
From: Chuanqi Xu <yedeng.yd@linux.alibaba.com>
Date: Thu, 27 Jan 2022 10:26:40 +0800
Subject: [PATCH 826/946] [NFC] [AST] Move isSameEntity into ASTContext

Currently we are trying to implement the semantics of C++ Modules. A big
challenge would be the ODR checking. Previously we did this in
ASTReader, it would handle the cases like:
```
module;
export module a_module;
import another_module; //  check the ODR consistency here
```
or
```
export module m;
import a_module;
import another_module; // check the ODR consistency here
```

However, it wouldn't handle the case:
```
import another_module; // When we check ODR here, everything looks fine.
```

In the case, the read process is ended. But we need to check the ODR
still. To reuse the facility we do in ASTReader, this patch moves the
corresponding codes into ASTContext. This should be good since there
were facilities like `hasSameTemplateName` and `hasSameType`.

Although the patch is a little bit big, all of the change should be
trivial.

Reviewed By: erichkeane

Differential Revision: https://reviews.llvm.org/D118223
---
 clang/include/clang/AST/ASTContext.h      |  12 +
 clang/lib/AST/ASTContext.cpp              | 370 ++++++++++++++++++++
 clang/lib/Serialization/ASTReaderDecl.cpp | 394 +---------------------
 3 files changed, 387 insertions(+), 389 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index ed35e73ce4cf9..f39ce14bc82cb 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -2622,6 +2622,18 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// template.
   bool hasSameTemplateName(TemplateName X, TemplateName Y);
 
+  /// Determine whether the two declarations refer to the same entity.
+  bool isSameEntity(NamedDecl *X, NamedDecl *Y);
+
+  /// Determine whether two template parameter lists are similar enough
+  /// that they may be used in declarations of the same template.
+  bool isSameTemplateParameterList(TemplateParameterList *X,
+                                   TemplateParameterList *Y);
+
+  /// Determine whether two template parameters are similar enough
+  /// that they may be used in declarations of the same template.
+  bool isSameTemplateParameter(NamedDecl *X, NamedDecl *Y);
+
   /// Retrieve the "canonical" template argument.
   ///
   /// The canonical template argument is the simplest template argument
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 9d63724c919a0..8a780250b6d8e 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -6147,6 +6147,376 @@ bool ASTContext::hasSameTemplateName(TemplateName X, TemplateName Y) {
   return X.getAsVoidPointer() == Y.getAsVoidPointer();
 }
 
+bool ASTContext::isSameTemplateParameter(NamedDecl *X, NamedDecl *Y) {
+  if (X->getKind() != Y->getKind())
+    return false;
+
+  if (auto *TX = dyn_cast<TemplateTypeParmDecl>(X)) {
+    auto *TY = cast<TemplateTypeParmDecl>(Y);
+    if (TX->isParameterPack() != TY->isParameterPack())
+      return false;
+    if (TX->hasTypeConstraint() != TY->hasTypeConstraint())
+      return false;
+    const TypeConstraint *TXTC = TX->getTypeConstraint();
+    const TypeConstraint *TYTC = TY->getTypeConstraint();
+    if (!TXTC != !TYTC)
+      return false;
+    if (TXTC && TYTC) {
+      auto *NCX = TXTC->getNamedConcept();
+      auto *NCY = TYTC->getNamedConcept();
+      if (!NCX || !NCY || !isSameEntity(NCX, NCY))
+        return false;
+      if (TXTC->hasExplicitTemplateArgs() != TYTC->hasExplicitTemplateArgs())
+        return false;
+      if (TXTC->hasExplicitTemplateArgs()) {
+        auto *TXTCArgs = TXTC->getTemplateArgsAsWritten();
+        auto *TYTCArgs = TYTC->getTemplateArgsAsWritten();
+        if (TXTCArgs->NumTemplateArgs != TYTCArgs->NumTemplateArgs)
+          return false;
+        llvm::FoldingSetNodeID XID, YID;
+        for (auto &ArgLoc : TXTCArgs->arguments())
+          ArgLoc.getArgument().Profile(XID, X->getASTContext());
+        for (auto &ArgLoc : TYTCArgs->arguments())
+          ArgLoc.getArgument().Profile(YID, Y->getASTContext());
+        if (XID != YID)
+          return false;
+      }
+    }
+    return true;
+  }
+
+  if (auto *TX = dyn_cast<NonTypeTemplateParmDecl>(X)) {
+    auto *TY = cast<NonTypeTemplateParmDecl>(Y);
+    return TX->isParameterPack() == TY->isParameterPack() &&
+           TX->getASTContext().hasSameType(TX->getType(), TY->getType());
+  }
+
+  auto *TX = cast<TemplateTemplateParmDecl>(X);
+  auto *TY = cast<TemplateTemplateParmDecl>(Y);
+  return TX->isParameterPack() == TY->isParameterPack() &&
+         isSameTemplateParameterList(TX->getTemplateParameters(),
+                                     TY->getTemplateParameters());
+}
+
+bool ASTContext::isSameTemplateParameterList(TemplateParameterList *X,
+                                             TemplateParameterList *Y) {
+  if (X->size() != Y->size())
+    return false;
+
+  for (unsigned I = 0, N = X->size(); I != N; ++I)
+    if (!isSameTemplateParameter(X->getParam(I), Y->getParam(I)))
+      return false;
+
+  const Expr *XRC = X->getRequiresClause();
+  const Expr *YRC = Y->getRequiresClause();
+  if (!XRC != !YRC)
+    return false;
+  if (XRC) {
+    llvm::FoldingSetNodeID XRCID, YRCID;
+    XRC->Profile(XRCID, *this, /*Canonical=*/true);
+    YRC->Profile(YRCID, *this, /*Canonical=*/true);
+    if (XRCID != YRCID)
+      return false;
+  }
+
+  return true;
+}
+
+static NamespaceDecl *getNamespace(const NestedNameSpecifier *X) {
+  if (auto *NS = X->getAsNamespace())
+    return NS;
+  if (auto *NAS = X->getAsNamespaceAlias())
+    return NAS->getNamespace();
+  return nullptr;
+}
+
+static bool isSameQualifier(const NestedNameSpecifier *X,
+                            const NestedNameSpecifier *Y) {
+  if (auto *NSX = getNamespace(X)) {
+    auto *NSY = getNamespace(Y);
+    if (!NSY || NSX->getCanonicalDecl() != NSY->getCanonicalDecl())
+      return false;
+  } else if (X->getKind() != Y->getKind())
+    return false;
+
+  // FIXME: For namespaces and types, we're permitted to check that the entity
+  // is named via the same tokens. We should probably do so.
+  switch (X->getKind()) {
+  case NestedNameSpecifier::Identifier:
+    if (X->getAsIdentifier() != Y->getAsIdentifier())
+      return false;
+    break;
+  case NestedNameSpecifier::Namespace:
+  case NestedNameSpecifier::NamespaceAlias:
+    // We've already checked that we named the same namespace.
+    break;
+  case NestedNameSpecifier::TypeSpec:
+  case NestedNameSpecifier::TypeSpecWithTemplate:
+    if (X->getAsType()->getCanonicalTypeInternal() !=
+        Y->getAsType()->getCanonicalTypeInternal())
+      return false;
+    break;
+  case NestedNameSpecifier::Global:
+  case NestedNameSpecifier::Super:
+    return true;
+  }
+
+  // Recurse into earlier portion of NNS, if any.
+  auto *PX = X->getPrefix();
+  auto *PY = Y->getPrefix();
+  if (PX && PY)
+    return isSameQualifier(PX, PY);
+  return !PX && !PY;
+}
+
+/// Determine whether the attributes we can overload on are identical for A and
+/// B. Will ignore any overloadable attrs represented in the type of A and B.
+static bool hasSameOverloadableAttrs(const FunctionDecl *A,
+                                     const FunctionDecl *B) {
+  // Note that pass_object_size attributes are represented in the function's
+  // ExtParameterInfo, so we don't need to check them here.
+
+  llvm::FoldingSetNodeID Cand1ID, Cand2ID;
+  auto AEnableIfAttrs = A->specific_attrs<EnableIfAttr>();
+  auto BEnableIfAttrs = B->specific_attrs<EnableIfAttr>();
+
+  for (auto Pair : zip_longest(AEnableIfAttrs, BEnableIfAttrs)) {
+    Optional<EnableIfAttr *> Cand1A = std::get<0>(Pair);
+    Optional<EnableIfAttr *> Cand2A = std::get<1>(Pair);
+
+    // Return false if the number of enable_if attributes is different.
+    if (!Cand1A || !Cand2A)
+      return false;
+
+    Cand1ID.clear();
+    Cand2ID.clear();
+
+    (*Cand1A)->getCond()->Profile(Cand1ID, A->getASTContext(), true);
+    (*Cand2A)->getCond()->Profile(Cand2ID, B->getASTContext(), true);
+
+    // Return false if any of the enable_if expressions of A and B are
+    // different.
+    if (Cand1ID != Cand2ID)
+      return false;
+  }
+  return true;
+}
+
+bool ASTContext::isSameEntity(NamedDecl *X, NamedDecl *Y) {
+  if (X == Y)
+    return true;
+
+  if (X->getDeclName() != Y->getDeclName())
+    return false;
+
+  // Must be in the same context.
+  //
+  // Note that we can't use DeclContext::Equals here, because the DeclContexts
+  // could be two different declarations of the same function. (We will fix the
+  // semantic DC to refer to the primary definition after merging.)
+  if (!declaresSameEntity(cast<Decl>(X->getDeclContext()->getRedeclContext()),
+                          cast<Decl>(Y->getDeclContext()->getRedeclContext())))
+    return false;
+
+  // Two typedefs refer to the same entity if they have the same underlying
+  // type.
+  if (const auto *TypedefX = dyn_cast<TypedefNameDecl>(X))
+    if (const auto *TypedefY = dyn_cast<TypedefNameDecl>(Y))
+      return hasSameType(TypedefX->getUnderlyingType(),
+                         TypedefY->getUnderlyingType());
+
+  // Must have the same kind.
+  if (X->getKind() != Y->getKind())
+    return false;
+
+  // Objective-C classes and protocols with the same name always match.
+  if (isa<ObjCInterfaceDecl>(X) || isa<ObjCProtocolDecl>(X))
+    return true;
+
+  if (isa<ClassTemplateSpecializationDecl>(X)) {
+    // No need to handle these here: we merge them when adding them to the
+    // template.
+    return false;
+  }
+
+  // Compatible tags match.
+  if (const auto *TagX = dyn_cast<TagDecl>(X)) {
+    const auto *TagY = cast<TagDecl>(Y);
+    return (TagX->getTagKind() == TagY->getTagKind()) ||
+           ((TagX->getTagKind() == TTK_Struct ||
+             TagX->getTagKind() == TTK_Class ||
+             TagX->getTagKind() == TTK_Interface) &&
+            (TagY->getTagKind() == TTK_Struct ||
+             TagY->getTagKind() == TTK_Class ||
+             TagY->getTagKind() == TTK_Interface));
+  }
+
+  // Functions with the same type and linkage match.
+  // FIXME: This needs to cope with merging of prototyped/non-prototyped
+  // functions, etc.
+  if (const auto *FuncX = dyn_cast<FunctionDecl>(X)) {
+    const auto *FuncY = cast<FunctionDecl>(Y);
+    if (const auto *CtorX = dyn_cast<CXXConstructorDecl>(X)) {
+      const auto *CtorY = cast<CXXConstructorDecl>(Y);
+      if (CtorX->getInheritedConstructor() &&
+          !isSameEntity(CtorX->getInheritedConstructor().getConstructor(),
+                        CtorY->getInheritedConstructor().getConstructor()))
+        return false;
+    }
+
+    if (FuncX->isMultiVersion() != FuncY->isMultiVersion())
+      return false;
+
+    // Multiversioned functions with different feature strings are represented
+    // as separate declarations.
+    if (FuncX->isMultiVersion()) {
+      const auto *TAX = FuncX->getAttr<TargetAttr>();
+      const auto *TAY = FuncY->getAttr<TargetAttr>();
+      assert(TAX && TAY && "Multiversion Function without target attribute");
+
+      if (TAX->getFeaturesStr() != TAY->getFeaturesStr())
+        return false;
+    }
+
+    const Expr *XRC = FuncX->getTrailingRequiresClause();
+    const Expr *YRC = FuncY->getTrailingRequiresClause();
+    if (!XRC != !YRC)
+      return false;
+    if (XRC) {
+      llvm::FoldingSetNodeID XRCID, YRCID;
+      XRC->Profile(XRCID, *this, /*Canonical=*/true);
+      YRC->Profile(YRCID, *this, /*Canonical=*/true);
+      if (XRCID != YRCID)
+        return false;
+    }
+
+    auto GetTypeAsWritten = [](const FunctionDecl *FD) {
+      // Map to the first declaration that we've already merged into this one.
+      // The TSI of redeclarations might not match (due to calling conventions
+      // being inherited onto the type but not the TSI), but the TSI type of
+      // the first declaration of the function should match across modules.
+      FD = FD->getCanonicalDecl();
+      return FD->getTypeSourceInfo() ? FD->getTypeSourceInfo()->getType()
+                                     : FD->getType();
+    };
+    QualType XT = GetTypeAsWritten(FuncX), YT = GetTypeAsWritten(FuncY);
+    if (!hasSameType(XT, YT)) {
+      // We can get functions with different types on the redecl chain in C++17
+      // if they have differing exception specifications and at least one of
+      // the excpetion specs is unresolved.
+      auto *XFPT = XT->getAs<FunctionProtoType>();
+      auto *YFPT = YT->getAs<FunctionProtoType>();
+      if (getLangOpts().CPlusPlus17 && XFPT && YFPT &&
+          (isUnresolvedExceptionSpec(XFPT->getExceptionSpecType()) ||
+           isUnresolvedExceptionSpec(YFPT->getExceptionSpecType())) &&
+          hasSameFunctionTypeIgnoringExceptionSpec(XT, YT))
+        return true;
+      return false;
+    }
+
+    return FuncX->getLinkageInternal() == FuncY->getLinkageInternal() &&
+           hasSameOverloadableAttrs(FuncX, FuncY);
+  }
+
+  // Variables with the same type and linkage match.
+  if (const auto *VarX = dyn_cast<VarDecl>(X)) {
+    const auto *VarY = cast<VarDecl>(Y);
+    if (VarX->getLinkageInternal() == VarY->getLinkageInternal()) {
+      if (hasSameType(VarX->getType(), VarY->getType()))
+        return true;
+
+      // We can get decls with different types on the redecl chain. Eg.
+      // template <typename T> struct S { static T Var[]; }; // #1
+      // template <typename T> T S<T>::Var[sizeof(T)]; // #2
+      // Only? happens when completing an incomplete array type. In this case
+      // when comparing #1 and #2 we should go through their element type.
+      const ArrayType *VarXTy = getAsArrayType(VarX->getType());
+      const ArrayType *VarYTy = getAsArrayType(VarY->getType());
+      if (!VarXTy || !VarYTy)
+        return false;
+      if (VarXTy->isIncompleteArrayType() || VarYTy->isIncompleteArrayType())
+        return hasSameType(VarXTy->getElementType(), VarYTy->getElementType());
+    }
+    return false;
+  }
+
+  // Namespaces with the same name and inlinedness match.
+  if (const auto *NamespaceX = dyn_cast<NamespaceDecl>(X)) {
+    const auto *NamespaceY = cast<NamespaceDecl>(Y);
+    return NamespaceX->isInline() == NamespaceY->isInline();
+  }
+
+  // Identical template names and kinds match if their template parameter lists
+  // and patterns match.
+  if (const auto *TemplateX = dyn_cast<TemplateDecl>(X)) {
+    const auto *TemplateY = cast<TemplateDecl>(Y);
+    return isSameEntity(TemplateX->getTemplatedDecl(),
+                        TemplateY->getTemplatedDecl()) &&
+           isSameTemplateParameterList(TemplateX->getTemplateParameters(),
+                                       TemplateY->getTemplateParameters());
+  }
+
+  // Fields with the same name and the same type match.
+  if (const auto *FDX = dyn_cast<FieldDecl>(X)) {
+    const auto *FDY = cast<FieldDecl>(Y);
+    // FIXME: Also check the bitwidth is odr-equivalent, if any.
+    return hasSameType(FDX->getType(), FDY->getType());
+  }
+
+  // Indirect fields with the same target field match.
+  if (const auto *IFDX = dyn_cast<IndirectFieldDecl>(X)) {
+    const auto *IFDY = cast<IndirectFieldDecl>(Y);
+    return IFDX->getAnonField()->getCanonicalDecl() ==
+           IFDY->getAnonField()->getCanonicalDecl();
+  }
+
+  // Enumerators with the same name match.
+  if (isa<EnumConstantDecl>(X))
+    // FIXME: Also check the value is odr-equivalent.
+    return true;
+
+  // Using shadow declarations with the same target match.
+  if (const auto *USX = dyn_cast<UsingShadowDecl>(X)) {
+    const auto *USY = cast<UsingShadowDecl>(Y);
+    return USX->getTargetDecl() == USY->getTargetDecl();
+  }
+
+  // Using declarations with the same qualifier match. (We already know that
+  // the name matches.)
+  if (const auto *UX = dyn_cast<UsingDecl>(X)) {
+    const auto *UY = cast<UsingDecl>(Y);
+    return isSameQualifier(UX->getQualifier(), UY->getQualifier()) &&
+           UX->hasTypename() == UY->hasTypename() &&
+           UX->isAccessDeclaration() == UY->isAccessDeclaration();
+  }
+  if (const auto *UX = dyn_cast<UnresolvedUsingValueDecl>(X)) {
+    const auto *UY = cast<UnresolvedUsingValueDecl>(Y);
+    return isSameQualifier(UX->getQualifier(), UY->getQualifier()) &&
+           UX->isAccessDeclaration() == UY->isAccessDeclaration();
+  }
+  if (const auto *UX = dyn_cast<UnresolvedUsingTypenameDecl>(X)) {
+    return isSameQualifier(
+        UX->getQualifier(),
+        cast<UnresolvedUsingTypenameDecl>(Y)->getQualifier());
+  }
+
+  // Using-pack declarations are only created by instantiation, and match if
+  // they're instantiated from matching UnresolvedUsing...Decls.
+  if (const auto *UX = dyn_cast<UsingPackDecl>(X)) {
+    return declaresSameEntity(
+        UX->getInstantiatedFromUsingDecl(),
+        cast<UsingPackDecl>(Y)->getInstantiatedFromUsingDecl());
+  }
+
+  // Namespace alias definitions with the same target match.
+  if (const auto *NAX = dyn_cast<NamespaceAliasDecl>(X)) {
+    const auto *NAY = cast<NamespaceAliasDecl>(Y);
+    return NAX->getNamespace()->Equals(NAY->getNamespace());
+  }
+
+  return false;
+}
+
 TemplateArgument
 ASTContext::getCanonicalTemplateArgument(const TemplateArgument &Arg) const {
   switch (Arg.getKind()) {
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 310bbc0f3cbbc..1ab26e58a4040 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -2945,391 +2945,6 @@ uint64_t ASTReader::getGlobalBitOffset(ModuleFile &M, uint64_t LocalOffset) {
   return LocalOffset + M.GlobalBitOffset;
 }
 
-static bool isSameTemplateParameterList(const ASTContext &C,
-                                        const TemplateParameterList *X,
-                                        const TemplateParameterList *Y);
-static bool isSameEntity(NamedDecl *X, NamedDecl *Y);
-
-/// Determine whether two template parameters are similar enough
-/// that they may be used in declarations of the same template.
-static bool isSameTemplateParameter(const NamedDecl *X,
-                                    const NamedDecl *Y) {
-  if (X->getKind() != Y->getKind())
-    return false;
-
-  if (const auto *TX = dyn_cast<TemplateTypeParmDecl>(X)) {
-    const auto *TY = cast<TemplateTypeParmDecl>(Y);
-    if (TX->isParameterPack() != TY->isParameterPack())
-      return false;
-    if (TX->hasTypeConstraint() != TY->hasTypeConstraint())
-      return false;
-    const TypeConstraint *TXTC = TX->getTypeConstraint();
-    const TypeConstraint *TYTC = TY->getTypeConstraint();
-    if (!TXTC != !TYTC)
-      return false;
-    if (TXTC && TYTC) {
-      auto *NCX = TXTC->getNamedConcept();
-      auto *NCY = TYTC->getNamedConcept();
-      if (!NCX || !NCY || !isSameEntity(NCX, NCY))
-        return false;
-      if (TXTC->hasExplicitTemplateArgs() != TYTC->hasExplicitTemplateArgs())
-        return false;
-      if (TXTC->hasExplicitTemplateArgs()) {
-        const auto *TXTCArgs = TXTC->getTemplateArgsAsWritten();
-        const auto *TYTCArgs = TYTC->getTemplateArgsAsWritten();
-        if (TXTCArgs->NumTemplateArgs != TYTCArgs->NumTemplateArgs)
-          return false;
-        llvm::FoldingSetNodeID XID, YID;
-        for (const auto &ArgLoc : TXTCArgs->arguments())
-          ArgLoc.getArgument().Profile(XID, X->getASTContext());
-        for (const auto &ArgLoc : TYTCArgs->arguments())
-          ArgLoc.getArgument().Profile(YID, Y->getASTContext());
-        if (XID != YID)
-          return false;
-      }
-    }
-    return true;
-  }
-
-  if (const auto *TX = dyn_cast<NonTypeTemplateParmDecl>(X)) {
-    const auto *TY = cast<NonTypeTemplateParmDecl>(Y);
-    return TX->isParameterPack() == TY->isParameterPack() &&
-           TX->getASTContext().hasSameType(TX->getType(), TY->getType());
-  }
-
-  const auto *TX = cast<TemplateTemplateParmDecl>(X);
-  const auto *TY = cast<TemplateTemplateParmDecl>(Y);
-  return TX->isParameterPack() == TY->isParameterPack() &&
-         isSameTemplateParameterList(TX->getASTContext(),
-                                     TX->getTemplateParameters(),
-                                     TY->getTemplateParameters());
-}
-
-static NamespaceDecl *getNamespace(const NestedNameSpecifier *X) {
-  if (auto *NS = X->getAsNamespace())
-    return NS;
-  if (auto *NAS = X->getAsNamespaceAlias())
-    return NAS->getNamespace();
-  return nullptr;
-}
-
-static bool isSameQualifier(const NestedNameSpecifier *X,
-                            const NestedNameSpecifier *Y) {
-  if (auto *NSX = getNamespace(X)) {
-    auto *NSY = getNamespace(Y);
-    if (!NSY || NSX->getCanonicalDecl() != NSY->getCanonicalDecl())
-      return false;
-  } else if (X->getKind() != Y->getKind())
-    return false;
-
-  // FIXME: For namespaces and types, we're permitted to check that the entity
-  // is named via the same tokens. We should probably do so.
-  switch (X->getKind()) {
-  case NestedNameSpecifier::Identifier:
-    if (X->getAsIdentifier() != Y->getAsIdentifier())
-      return false;
-    break;
-  case NestedNameSpecifier::Namespace:
-  case NestedNameSpecifier::NamespaceAlias:
-    // We've already checked that we named the same namespace.
-    break;
-  case NestedNameSpecifier::TypeSpec:
-  case NestedNameSpecifier::TypeSpecWithTemplate:
-    if (X->getAsType()->getCanonicalTypeInternal() !=
-        Y->getAsType()->getCanonicalTypeInternal())
-      return false;
-    break;
-  case NestedNameSpecifier::Global:
-  case NestedNameSpecifier::Super:
-    return true;
-  }
-
-  // Recurse into earlier portion of NNS, if any.
-  auto *PX = X->getPrefix();
-  auto *PY = Y->getPrefix();
-  if (PX && PY)
-    return isSameQualifier(PX, PY);
-  return !PX && !PY;
-}
-
-/// Determine whether two template parameter lists are similar enough
-/// that they may be used in declarations of the same template.
-static bool isSameTemplateParameterList(const ASTContext &C,
-                                        const TemplateParameterList *X,
-                                        const TemplateParameterList *Y) {
-  if (X->size() != Y->size())
-    return false;
-
-  for (unsigned I = 0, N = X->size(); I != N; ++I)
-    if (!isSameTemplateParameter(X->getParam(I), Y->getParam(I)))
-      return false;
-
-  const Expr *XRC = X->getRequiresClause();
-  const Expr *YRC = Y->getRequiresClause();
-  if (!XRC != !YRC)
-    return false;
-  if (XRC) {
-    llvm::FoldingSetNodeID XRCID, YRCID;
-    XRC->Profile(XRCID, C, /*Canonical=*/true);
-    YRC->Profile(YRCID, C, /*Canonical=*/true);
-    if (XRCID != YRCID)
-      return false;
-  }
-
-  return true;
-}
-
-/// Determine whether the attributes we can overload on are identical for A and
-/// B. Will ignore any overloadable attrs represented in the type of A and B.
-static bool hasSameOverloadableAttrs(const FunctionDecl *A,
-                                     const FunctionDecl *B) {
-  // Note that pass_object_size attributes are represented in the function's
-  // ExtParameterInfo, so we don't need to check them here.
-
-  llvm::FoldingSetNodeID Cand1ID, Cand2ID;
-  auto AEnableIfAttrs = A->specific_attrs<EnableIfAttr>();
-  auto BEnableIfAttrs = B->specific_attrs<EnableIfAttr>();
-
-  for (auto Pair : zip_longest(AEnableIfAttrs, BEnableIfAttrs)) {
-    Optional<EnableIfAttr *> Cand1A = std::get<0>(Pair);
-    Optional<EnableIfAttr *> Cand2A = std::get<1>(Pair);
-
-    // Return false if the number of enable_if attributes is different.
-    if (!Cand1A || !Cand2A)
-      return false;
-
-    Cand1ID.clear();
-    Cand2ID.clear();
-
-    (*Cand1A)->getCond()->Profile(Cand1ID, A->getASTContext(), true);
-    (*Cand2A)->getCond()->Profile(Cand2ID, B->getASTContext(), true);
-
-    // Return false if any of the enable_if expressions of A and B are
-    // different.
-    if (Cand1ID != Cand2ID)
-      return false;
-  }
-  return true;
-}
-
-/// Determine whether the two declarations refer to the same entity.
-static bool isSameEntity(NamedDecl *X, NamedDecl *Y) {
-  if (X == Y)
-    return true;
-
-  if (X->getDeclName() != Y->getDeclName())
-    return false;
-
-  // Must be in the same context.
-  //
-  // Note that we can't use DeclContext::Equals here, because the DeclContexts
-  // could be two different declarations of the same function. (We will fix the
-  // semantic DC to refer to the primary definition after merging.)
-  if (!declaresSameEntity(cast<Decl>(X->getDeclContext()->getRedeclContext()),
-                          cast<Decl>(Y->getDeclContext()->getRedeclContext())))
-    return false;
-
-  // Two typedefs refer to the same entity if they have the same underlying
-  // type.
-  if (const auto *TypedefX = dyn_cast<TypedefNameDecl>(X))
-    if (const auto *TypedefY = dyn_cast<TypedefNameDecl>(Y))
-      return X->getASTContext().hasSameType(TypedefX->getUnderlyingType(),
-                                            TypedefY->getUnderlyingType());
-
-  // Must have the same kind.
-  if (X->getKind() != Y->getKind())
-    return false;
-
-  // Objective-C classes and protocols with the same name always match.
-  if (isa<ObjCInterfaceDecl>(X) || isa<ObjCProtocolDecl>(X))
-    return true;
-
-  if (isa<ClassTemplateSpecializationDecl>(X)) {
-    // No need to handle these here: we merge them when adding them to the
-    // template.
-    return false;
-  }
-
-  // Compatible tags match.
-  if (const auto *TagX = dyn_cast<TagDecl>(X)) {
-    const auto *TagY = cast<TagDecl>(Y);
-    return (TagX->getTagKind() == TagY->getTagKind()) ||
-      ((TagX->getTagKind() == TTK_Struct || TagX->getTagKind() == TTK_Class ||
-        TagX->getTagKind() == TTK_Interface) &&
-       (TagY->getTagKind() == TTK_Struct || TagY->getTagKind() == TTK_Class ||
-        TagY->getTagKind() == TTK_Interface));
-  }
-
-  // Functions with the same type and linkage match.
-  // FIXME: This needs to cope with merging of prototyped/non-prototyped
-  // functions, etc.
-  if (const auto *FuncX = dyn_cast<FunctionDecl>(X)) {
-    const auto *FuncY = cast<FunctionDecl>(Y);
-    if (const auto *CtorX = dyn_cast<CXXConstructorDecl>(X)) {
-      const auto *CtorY = cast<CXXConstructorDecl>(Y);
-      if (CtorX->getInheritedConstructor() &&
-          !isSameEntity(CtorX->getInheritedConstructor().getConstructor(),
-                        CtorY->getInheritedConstructor().getConstructor()))
-        return false;
-    }
-
-    if (FuncX->isMultiVersion() != FuncY->isMultiVersion())
-      return false;
-
-    // Multiversioned functions with different feature strings are represented
-    // as separate declarations.
-    if (FuncX->isMultiVersion()) {
-      const auto *TAX = FuncX->getAttr<TargetAttr>();
-      const auto *TAY = FuncY->getAttr<TargetAttr>();
-      assert(TAX && TAY && "Multiversion Function without target attribute");
-
-      if (TAX->getFeaturesStr() != TAY->getFeaturesStr())
-        return false;
-    }
-
-    ASTContext &C = FuncX->getASTContext();
-
-    const Expr *XRC = FuncX->getTrailingRequiresClause();
-    const Expr *YRC = FuncY->getTrailingRequiresClause();
-    if (!XRC != !YRC)
-      return false;
-    if (XRC) {
-      llvm::FoldingSetNodeID XRCID, YRCID;
-      XRC->Profile(XRCID, C, /*Canonical=*/true);
-      YRC->Profile(YRCID, C, /*Canonical=*/true);
-      if (XRCID != YRCID)
-        return false;
-    }
-
-    auto GetTypeAsWritten = [](const FunctionDecl *FD) {
-      // Map to the first declaration that we've already merged into this one.
-      // The TSI of redeclarations might not match (due to calling conventions
-      // being inherited onto the type but not the TSI), but the TSI type of
-      // the first declaration of the function should match across modules.
-      FD = FD->getCanonicalDecl();
-      return FD->getTypeSourceInfo() ? FD->getTypeSourceInfo()->getType()
-                                     : FD->getType();
-    };
-    QualType XT = GetTypeAsWritten(FuncX), YT = GetTypeAsWritten(FuncY);
-    if (!C.hasSameType(XT, YT)) {
-      // We can get functions with different types on the redecl chain in C++17
-      // if they have differing exception specifications and at least one of
-      // the excpetion specs is unresolved.
-      auto *XFPT = XT->getAs<FunctionProtoType>();
-      auto *YFPT = YT->getAs<FunctionProtoType>();
-      if (C.getLangOpts().CPlusPlus17 && XFPT && YFPT &&
-          (isUnresolvedExceptionSpec(XFPT->getExceptionSpecType()) ||
-           isUnresolvedExceptionSpec(YFPT->getExceptionSpecType())) &&
-          C.hasSameFunctionTypeIgnoringExceptionSpec(XT, YT))
-        return true;
-      return false;
-    }
-
-    return FuncX->getLinkageInternal() == FuncY->getLinkageInternal() &&
-           hasSameOverloadableAttrs(FuncX, FuncY);
-  }
-
-  // Variables with the same type and linkage match.
-  if (const auto *VarX = dyn_cast<VarDecl>(X)) {
-    const auto *VarY = cast<VarDecl>(Y);
-    if (VarX->getLinkageInternal() == VarY->getLinkageInternal()) {
-      ASTContext &C = VarX->getASTContext();
-      if (C.hasSameType(VarX->getType(), VarY->getType()))
-        return true;
-
-      // We can get decls with different types on the redecl chain. Eg.
-      // template <typename T> struct S { static T Var[]; }; // #1
-      // template <typename T> T S<T>::Var[sizeof(T)]; // #2
-      // Only? happens when completing an incomplete array type. In this case
-      // when comparing #1 and #2 we should go through their element type.
-      const ArrayType *VarXTy = C.getAsArrayType(VarX->getType());
-      const ArrayType *VarYTy = C.getAsArrayType(VarY->getType());
-      if (!VarXTy || !VarYTy)
-        return false;
-      if (VarXTy->isIncompleteArrayType() || VarYTy->isIncompleteArrayType())
-        return C.hasSameType(VarXTy->getElementType(), VarYTy->getElementType());
-    }
-    return false;
-  }
-
-  // Namespaces with the same name and inlinedness match.
-  if (const auto *NamespaceX = dyn_cast<NamespaceDecl>(X)) {
-    const auto *NamespaceY = cast<NamespaceDecl>(Y);
-    return NamespaceX->isInline() == NamespaceY->isInline();
-  }
-
-  // Identical template names and kinds match if their template parameter lists
-  // and patterns match.
-  if (const auto *TemplateX = dyn_cast<TemplateDecl>(X)) {
-    const auto *TemplateY = cast<TemplateDecl>(Y);
-    return isSameEntity(TemplateX->getTemplatedDecl(),
-                        TemplateY->getTemplatedDecl()) &&
-           isSameTemplateParameterList(TemplateX->getASTContext(),
-                                       TemplateX->getTemplateParameters(),
-                                       TemplateY->getTemplateParameters());
-  }
-
-  // Fields with the same name and the same type match.
-  if (const auto *FDX = dyn_cast<FieldDecl>(X)) {
-    const auto *FDY = cast<FieldDecl>(Y);
-    // FIXME: Also check the bitwidth is odr-equivalent, if any.
-    return X->getASTContext().hasSameType(FDX->getType(), FDY->getType());
-  }
-
-  // Indirect fields with the same target field match.
-  if (const auto *IFDX = dyn_cast<IndirectFieldDecl>(X)) {
-    const auto *IFDY = cast<IndirectFieldDecl>(Y);
-    return IFDX->getAnonField()->getCanonicalDecl() ==
-           IFDY->getAnonField()->getCanonicalDecl();
-  }
-
-  // Enumerators with the same name match.
-  if (isa<EnumConstantDecl>(X))
-    // FIXME: Also check the value is odr-equivalent.
-    return true;
-
-  // Using shadow declarations with the same target match.
-  if (const auto *USX = dyn_cast<UsingShadowDecl>(X)) {
-    const auto *USY = cast<UsingShadowDecl>(Y);
-    return USX->getTargetDecl() == USY->getTargetDecl();
-  }
-
-  // Using declarations with the same qualifier match. (We already know that
-  // the name matches.)
-  if (const auto *UX = dyn_cast<UsingDecl>(X)) {
-    const auto *UY = cast<UsingDecl>(Y);
-    return isSameQualifier(UX->getQualifier(), UY->getQualifier()) &&
-           UX->hasTypename() == UY->hasTypename() &&
-           UX->isAccessDeclaration() == UY->isAccessDeclaration();
-  }
-  if (const auto *UX = dyn_cast<UnresolvedUsingValueDecl>(X)) {
-    const auto *UY = cast<UnresolvedUsingValueDecl>(Y);
-    return isSameQualifier(UX->getQualifier(), UY->getQualifier()) &&
-           UX->isAccessDeclaration() == UY->isAccessDeclaration();
-  }
-  if (const auto *UX = dyn_cast<UnresolvedUsingTypenameDecl>(X)) {
-    return isSameQualifier(
-        UX->getQualifier(),
-        cast<UnresolvedUsingTypenameDecl>(Y)->getQualifier());
-  }
-
-  // Using-pack declarations are only created by instantiation, and match if
-  // they're instantiated from matching UnresolvedUsing...Decls.
-  if (const auto *UX = dyn_cast<UsingPackDecl>(X)) {
-    return declaresSameEntity(
-        UX->getInstantiatedFromUsingDecl(),
-        cast<UsingPackDecl>(Y)->getInstantiatedFromUsingDecl());
-  }
-
-  // Namespace alias definitions with the same target match.
-  if (const auto *NAX = dyn_cast<NamespaceAliasDecl>(X)) {
-    const auto *NAY = cast<NamespaceAliasDecl>(Y);
-    return NAX->getNamespace()->Equals(NAY->getNamespace());
-  }
-
-  return false;
-}
-
 /// Find the context in which we should search for previous declarations when
 /// looking for declarations to merge.
 DeclContext *ASTDeclReader::getPrimaryContextForMerging(ASTReader &Reader,
@@ -3511,12 +3126,13 @@ ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) {
     return Result;
   }
 
+  ASTContext &C = Reader.getContext();
   DeclContext *DC = D->getDeclContext()->getRedeclContext();
   if (TypedefNameForLinkage) {
     auto It = Reader.ImportedTypedefNamesForLinkage.find(
         std::make_pair(DC, TypedefNameForLinkage));
     if (It != Reader.ImportedTypedefNamesForLinkage.end())
-      if (isSameEntity(It->second, D))
+      if (C.isSameEntity(It->second, D))
         return FindExistingResult(Reader, D, It->second, AnonymousDeclNumber,
                                   TypedefNameForLinkage);
     // Go on to check in other places in case an existing typedef name
@@ -3528,7 +3144,7 @@ ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) {
     // in its context by number.
     if (auto *Existing = getAnonymousDeclForMerging(
             Reader, D->getLexicalDeclContext(), AnonymousDeclNumber))
-      if (isSameEntity(Existing, D))
+      if (C.isSameEntity(Existing, D))
         return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
                                   TypedefNameForLinkage);
   } else if (DC->isTranslationUnit() &&
@@ -3560,7 +3176,7 @@ ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) {
                                    IEnd = IdResolver.end();
          I != IEnd; ++I) {
       if (NamedDecl *Existing = getDeclForMerging(*I, TypedefNameForLinkage))
-        if (isSameEntity(Existing, D))
+        if (C.isSameEntity(Existing, D))
           return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
                                     TypedefNameForLinkage);
     }
@@ -3568,7 +3184,7 @@ ASTDeclReader::FindExistingResult ASTDeclReader::findExisting(NamedDecl *D) {
     DeclContext::lookup_result R = MergeDC->noload_lookup(Name);
     for (DeclContext::lookup_iterator I = R.begin(), E = R.end(); I != E; ++I) {
       if (NamedDecl *Existing = getDeclForMerging(*I, TypedefNameForLinkage))
-        if (isSameEntity(Existing, D))
+        if (C.isSameEntity(Existing, D))
           return FindExistingResult(Reader, D, Existing, AnonymousDeclNumber,
                                     TypedefNameForLinkage);
     }

From f73f367f72665fade97f04fa68b4fa83e7f28dbf Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Wed, 26 Jan 2022 14:54:11 -0800
Subject: [PATCH 827/946] [gn build] Move -fno-builtin to crt_code

crt_code seems to correspond to SANITIZER_COMMON_CFLAGS which contains -fno-builtin.

Reviewed By: thakis

Differential Revision: https://reviews.llvm.org/D118288
---
 llvm/utils/gn/build/BUILD.gn                              | 1 +
 llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn     | 4 +---
 llvm/utils/gn/secondary/compiler-rt/lib/msan/BUILD.gn     | 6 +-----
 llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn | 4 ----
 4 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/llvm/utils/gn/build/BUILD.gn b/llvm/utils/gn/build/BUILD.gn
index ce2c6f9417a65..d97cdcb803fdb 100644
--- a/llvm/utils/gn/build/BUILD.gn
+++ b/llvm/utils/gn/build/BUILD.gn
@@ -444,6 +444,7 @@ config("crt_code") {
     "-funwind-tables",
     "-gline-tables-only",
     "-fvisibility=hidden",
+    "-fno-builtin",
   ]
   if (is_clang) {
     cflags += [
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
index 57374f5ad2ae4..c6a2f9899e050 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
@@ -102,9 +102,7 @@ target(asan_target_type, "asan") {
     sources += [ "asan_interceptors_vfork.S" ]
   }
 
-  # FIXME: have SANITIZER_COMMON_CFLAGS thingy? should fno-builtin be in
-  # crt_code?
-  cflags = [ "-fno-builtin" ]
+  cflags = []
   if (target_os != "win") {
     cflags += [ "-ftls-model=initial-exec" ]
   }
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/msan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/msan/BUILD.gn
index ea933b30f7932..83539acea3f56 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/msan/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/msan/BUILD.gn
@@ -39,11 +39,7 @@ static_library("msan") {
     "msan_thread.h",
   ]
 
-  # FIXME: have SANITIZER_COMMON_CFLAGS thingy? should fno-builtin be in
-  # crt_code?
-  cflags = [ "-fno-builtin" ]
-
-  cflags += [ "-fPIE" ]
+  cflags = [ "-fPIE" ]
 
   # Prevent clang from generating libc calls.
   cflags += [ "-ffreestanding" ]
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn
index 3f6db757f48ea..a4004bb4c897a 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/tsan/rtl/BUILD.gn
@@ -128,10 +128,6 @@ target(tsan_target_type, "rtl") {
     sources += [ "tsan_rtl_s390x.S" ]
   }
 
-  # FIXME: have SANITIZER_COMMON_CFLAGS thingy? should fno-builtin be in
-  # crt_code?
-  cflags += [ "-fno-builtin" ]
-
   # FIXME: link rt dl m pthread log
   # FIXME: dep on libcxx-headers?
   # FIXME: add_sanitizer_rt_version_list (cf hwasan)

From b75eea12fe18af8f2175fd39ae3bd275ac7991ff Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Wed, 26 Jan 2022 18:37:03 -0800
Subject: [PATCH 828/946] [gn build] Format gn files

---
 llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn | 2 +-
 .../gn/secondary/lldb/source/Plugins/REPL/Clang/BUILD.gn    | 6 +++---
 llvm/utils/gn/secondary/llvm/lib/Target/X86/MCA/BUILD.gn    | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn b/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn
index 4a5692662684b..a10e45cecac11 100644
--- a/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/include/clang/Config/BUILD.gn
@@ -32,7 +32,7 @@ write_cmake_config("Config") {
     "ENABLE_X86_RELAX_RELOCATIONS=1",
     "CLANG_ENABLE_OBJC_REWRITER=1",  # FIXME: flag?
     "CLANG_SYSTEMZ_DEFAULT_ARCH=z10",
-    "PPC_LINUX_DEFAULT_IEEELONGDOUBLE="
+    "PPC_LINUX_DEFAULT_IEEELONGDOUBLE=",
   ]
 
   if (clang_enable_arcmt) {
diff --git a/llvm/utils/gn/secondary/lldb/source/Plugins/REPL/Clang/BUILD.gn b/llvm/utils/gn/secondary/lldb/source/Plugins/REPL/Clang/BUILD.gn
index 3c48f5863fe90..bb144891b9297 100644
--- a/llvm/utils/gn/secondary/lldb/source/Plugins/REPL/Clang/BUILD.gn
+++ b/llvm/utils/gn/secondary/lldb/source/Plugins/REPL/Clang/BUILD.gn
@@ -5,12 +5,12 @@ static_library("Clang") {
     "//lldb/source/Core",
     "//lldb/source/DataFormatters",
     "//lldb/source/Host",
-    "//lldb/source/Symbol",
-    "//lldb/source/Target",
-    "//lldb/source/Utility",
     "//lldb/source/Plugins/Language/ClangCommon",
     "//lldb/source/Plugins/LanguageRuntime/CPlusPlus",
     "//lldb/source/Plugins/TypeSystem/Clang",
+    "//lldb/source/Symbol",
+    "//lldb/source/Target",
+    "//lldb/source/Utility",
     "//llvm/lib/Support",
   ]
   sources = [ "ClangREPL.cpp" ]
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/X86/MCA/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/X86/MCA/BUILD.gn
index 61ca0584dbae5..366977fed4912 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/X86/MCA/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/X86/MCA/BUILD.gn
@@ -4,9 +4,9 @@ static_library("MCA") {
     "//llvm/lib/MC",
     "//llvm/lib/MC/MCParser",
     "//llvm/lib/MCA",
+    "//llvm/lib/Support",
     "//llvm/lib/Target/X86/MCTargetDesc",
     "//llvm/lib/Target/X86/TargetInfo",
-    "//llvm/lib/Support",
   ]
   include_dirs = [ ".." ]
   sources = [ "X86CustomBehaviour.cpp" ]

From 5844d29e7686d2cf2c4de8d93a4b9fdfa0d4a310 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Wed, 26 Jan 2022 18:19:52 -0800
Subject: [PATCH 829/946] [RISCV] Remove unused flags from FeatureKind in
 TargetParser.h. NFC

These flags aren't used and we shouldn't add more flags for new
ratified extensions. So clear out the unused flags to avoid any
confusion.

Reviewed By: khchen

Differential Revision: https://reviews.llvm.org/D118294
---
 llvm/include/llvm/Support/TargetParser.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h
index a6f8ebe1381a6..02a8d72483db6 100644
--- a/llvm/include/llvm/Support/TargetParser.h
+++ b/llvm/include/llvm/Support/TargetParser.h
@@ -158,12 +158,7 @@ enum CPUKind : unsigned {
 enum FeatureKind : unsigned {
   FK_INVALID = 0,
   FK_NONE = 1,
-  FK_STDEXTM = 1 << 2,
-  FK_STDEXTA = 1 << 3,
-  FK_STDEXTF = 1 << 4,
-  FK_STDEXTD = 1 << 5,
-  FK_STDEXTC = 1 << 6,
-  FK_64BIT = 1 << 7,
+  FK_64BIT = 1 << 2,
 };
 
 bool checkCPUKind(CPUKind Kind, bool IsRV64);

From b3bec6e4530b7540cc3ae3084d5adfe76a159d06 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Wed, 26 Jan 2022 18:20:04 -0800
Subject: [PATCH 830/946] [RISCV] Use vnsrl.wx with x0 instead of vnsrl.vi for
 truncate.

This matches what the spec uses for the vncvt.x.x.w assembly
pseudoinstruction.

Reviewed By: kito-cheng

Differential Revision: https://reviews.llvm.org/D118295
---
 .../Target/RISCV/RISCVInstrInfoVVLPatterns.td |   4 +-
 .../CodeGen/RISCV/rvv/constant-folding.ll     |   6 +-
 llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll    | 152 ++++-----
 llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll    | 152 ++++-----
 .../CodeGen/RISCV/rvv/extload-truncstore.ll   |  90 +++---
 .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll   |   8 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll   |   8 +-
 .../rvv/fixed-vectors-extload-truncstore.ll   | 300 +++++++++---------
 .../CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll   |  48 +--
 .../RISCV/rvv/fixed-vectors-int-exttrunc.ll   |  20 +-
 .../RISCV/rvv/fixed-vectors-masked-gather.ll  |  28 +-
 .../RISCV/rvv/fixed-vectors-masked-scatter.ll |  68 ++--
 .../RISCV/rvv/fixed-vectors-vpgather.ll       |  56 ++--
 .../RISCV/rvv/fixed-vectors-vpscatter.ll      |  76 ++---
 .../RISCV/rvv/legalize-scalable-vectortype.ll |   8 +-
 llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll |  28 +-
 .../test/CodeGen/RISCV/rvv/mscatter-sdnode.ll |  68 ++--
 llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll  |  68 ++--
 .../test/CodeGen/RISCV/rvv/vpgather-sdnode.ll |  50 +--
 .../CodeGen/RISCV/rvv/vpscatter-sdnode.ll     |  90 +++---
 llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll |  88 ++---
 21 files changed, 708 insertions(+), 708 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index 5cff16c32fe78..964f0fa54512b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -770,8 +770,8 @@ foreach vtiTowti = AllWidenableIntVectors in {
   def : Pat<(vti.Vector (riscv_trunc_vector_vl (wti.Vector wti.RegClass:$rs1),
                                                (vti.Mask true_mask),
                                                VLOpFrag)),
-            (!cast<Instruction>("PseudoVNSRL_WI_"#vti.LMul.MX)
-                wti.RegClass:$rs1, 0, GPR:$vl, vti.Log2SEW)>;
+            (!cast<Instruction>("PseudoVNSRL_WX_"#vti.LMul.MX)
+                wti.RegClass:$rs1, X0, GPR:$vl, vti.Log2SEW)>;
 
   def : Pat<(vti.Vector
              (riscv_trunc_vector_vl
diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
index 5850c7c1937e3..62d38dc809bda 100644
--- a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
@@ -21,7 +21,7 @@ define <2 x i16> @fixedlen(<2 x i32> %x) {
 ; RV32-NEXT:    lui a0, 1048568
 ; RV32-NEXT:    vand.vx v8, v8, a0
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: fixedlen:
@@ -32,7 +32,7 @@ define <2 x i16> @fixedlen(<2 x i32> %x) {
 ; RV64-NEXT:    slli a0, a0, 3
 ; RV64-NEXT:    vand.vx v8, v8, a0
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    ret
   %v41 = insertelement <2 x i32> undef, i32 16, i32 0
   %v42 = shufflevector <2 x i32> %v41, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -51,7 +51,7 @@ define <vscale x 2 x i16> @scalable(<vscale x 2 x i32> %x) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vsrl.vi v8, v8, 16
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    lui a0, 1048568
 ; CHECK-NEXT:    vand.vx v8, v8, a0
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index 7fb412094054c..e307e2630f3cd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -60,9 +60,9 @@ define <vscale x 1 x i8> @ctlz_nxv1i8(<vscale x 1 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v9, v9
 ; RV32D-NEXT:    vsrl.vi v9, v9, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v9, 0
+; RV32D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v9, 0
+; RV32D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV32D-NEXT:    li a0, 134
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
 ; RV32D-NEXT:    vrsub.vx v8, v9, a0
@@ -76,9 +76,9 @@ define <vscale x 1 x i8> @ctlz_nxv1i8(<vscale x 1 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v9, v9
 ; RV64D-NEXT:    vsrl.vi v9, v9, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v9, 0
+; RV64D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v9, 0
+; RV64D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV64D-NEXT:    li a0, 134
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    vrsub.vx v8, v9, a0
@@ -145,9 +145,9 @@ define <vscale x 2 x i8> @ctlz_nxv2i8(<vscale x 2 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v9, v9
 ; RV32D-NEXT:    vsrl.vi v9, v9, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v9, 0
+; RV32D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v9, 0
+; RV32D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV32D-NEXT:    li a0, 134
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
 ; RV32D-NEXT:    vrsub.vx v8, v9, a0
@@ -161,9 +161,9 @@ define <vscale x 2 x i8> @ctlz_nxv2i8(<vscale x 2 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v9, v9
 ; RV64D-NEXT:    vsrl.vi v9, v9, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v9, 0
+; RV64D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v9, 0
+; RV64D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV64D-NEXT:    li a0, 134
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    vrsub.vx v8, v9, a0
@@ -230,9 +230,9 @@ define <vscale x 4 x i8> @ctlz_nxv4i8(<vscale x 4 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v10, v10
 ; RV32D-NEXT:    vsrl.vi v10, v10, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v10, 0
+; RV32D-NEXT:    vnsrl.wx v9, v10, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v9, 0
+; RV32D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV32D-NEXT:    li a0, 134
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
 ; RV32D-NEXT:    vrsub.vx v8, v9, a0
@@ -246,9 +246,9 @@ define <vscale x 4 x i8> @ctlz_nxv4i8(<vscale x 4 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v10, v10
 ; RV64D-NEXT:    vsrl.vi v10, v10, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v10, 0
+; RV64D-NEXT:    vnsrl.wx v9, v10, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v9, 0
+; RV64D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV64D-NEXT:    li a0, 134
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    vrsub.vx v8, v9, a0
@@ -315,9 +315,9 @@ define <vscale x 8 x i8> @ctlz_nxv8i8(<vscale x 8 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v12, v12
 ; RV32D-NEXT:    vsrl.vi v12, v12, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v12, 0
+; RV32D-NEXT:    vnsrl.wx v10, v12, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v10, 0
+; RV32D-NEXT:    vnsrl.wx v9, v10, zero
 ; RV32D-NEXT:    li a0, 134
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
 ; RV32D-NEXT:    vrsub.vx v8, v9, a0
@@ -331,9 +331,9 @@ define <vscale x 8 x i8> @ctlz_nxv8i8(<vscale x 8 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v12, v12
 ; RV64D-NEXT:    vsrl.vi v12, v12, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v12, 0
+; RV64D-NEXT:    vnsrl.wx v10, v12, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v10, 0
+; RV64D-NEXT:    vnsrl.wx v9, v10, zero
 ; RV64D-NEXT:    li a0, 134
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    vrsub.vx v8, v9, a0
@@ -400,9 +400,9 @@ define <vscale x 16 x i8> @ctlz_nxv16i8(<vscale x 16 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v16, v16
 ; RV32D-NEXT:    vsrl.vi v16, v16, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v16, 0
+; RV32D-NEXT:    vnsrl.wx v12, v16, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v12, 0
+; RV32D-NEXT:    vnsrl.wx v10, v12, zero
 ; RV32D-NEXT:    li a0, 134
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
 ; RV32D-NEXT:    vrsub.vx v8, v10, a0
@@ -416,9 +416,9 @@ define <vscale x 16 x i8> @ctlz_nxv16i8(<vscale x 16 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v16, v16
 ; RV64D-NEXT:    vsrl.vi v16, v16, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v16, 0
+; RV64D-NEXT:    vnsrl.wx v12, v16, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v12, 0
+; RV64D-NEXT:    vnsrl.wx v10, v12, zero
 ; RV64D-NEXT:    li a0, 134
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    vrsub.vx v8, v10, a0
@@ -561,7 +561,7 @@ define <vscale x 1 x i16> @ctlz_nxv1i16(<vscale x 1 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV32D-NEXT:    vsrl.vi v9, v9, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v9, 0
+; RV32D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV32D-NEXT:    li a0, 142
 ; RV32D-NEXT:    vrsub.vx v9, v9, a0
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
@@ -576,7 +576,7 @@ define <vscale x 1 x i16> @ctlz_nxv1i16(<vscale x 1 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV64D-NEXT:    vsrl.vi v9, v9, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v9, 0
+; RV64D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV64D-NEXT:    li a0, 142
 ; RV64D-NEXT:    vrsub.vx v9, v9, a0
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
@@ -662,7 +662,7 @@ define <vscale x 2 x i16> @ctlz_nxv2i16(<vscale x 2 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
 ; RV32D-NEXT:    vsrl.vi v9, v9, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v9, 0
+; RV32D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV32D-NEXT:    li a0, 142
 ; RV32D-NEXT:    vrsub.vx v9, v9, a0
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
@@ -677,7 +677,7 @@ define <vscale x 2 x i16> @ctlz_nxv2i16(<vscale x 2 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
 ; RV64D-NEXT:    vsrl.vi v9, v9, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v9, 0
+; RV64D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV64D-NEXT:    li a0, 142
 ; RV64D-NEXT:    vrsub.vx v9, v9, a0
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
@@ -763,7 +763,7 @@ define <vscale x 4 x i16> @ctlz_nxv4i16(<vscale x 4 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV32D-NEXT:    vsrl.vi v10, v10, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v10, 0
+; RV32D-NEXT:    vnsrl.wx v9, v10, zero
 ; RV32D-NEXT:    li a0, 142
 ; RV32D-NEXT:    vrsub.vx v9, v9, a0
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
@@ -778,7 +778,7 @@ define <vscale x 4 x i16> @ctlz_nxv4i16(<vscale x 4 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV64D-NEXT:    vsrl.vi v10, v10, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v10, 0
+; RV64D-NEXT:    vnsrl.wx v9, v10, zero
 ; RV64D-NEXT:    li a0, 142
 ; RV64D-NEXT:    vrsub.vx v9, v9, a0
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
@@ -864,7 +864,7 @@ define <vscale x 8 x i16> @ctlz_nxv8i16(<vscale x 8 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
 ; RV32D-NEXT:    vsrl.vi v12, v12, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v12, 0
+; RV32D-NEXT:    vnsrl.wx v10, v12, zero
 ; RV32D-NEXT:    li a0, 142
 ; RV32D-NEXT:    vrsub.vx v10, v10, a0
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
@@ -879,7 +879,7 @@ define <vscale x 8 x i16> @ctlz_nxv8i16(<vscale x 8 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
 ; RV64D-NEXT:    vsrl.vi v12, v12, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v12, 0
+; RV64D-NEXT:    vnsrl.wx v10, v12, zero
 ; RV64D-NEXT:    li a0, 142
 ; RV64D-NEXT:    vrsub.vx v10, v10, a0
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
@@ -965,7 +965,7 @@ define <vscale x 16 x i16> @ctlz_nxv16i16(<vscale x 16 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
 ; RV32D-NEXT:    vsrl.vi v16, v16, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v16, 0
+; RV32D-NEXT:    vnsrl.wx v12, v16, zero
 ; RV32D-NEXT:    li a0, 142
 ; RV32D-NEXT:    vrsub.vx v12, v12, a0
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
@@ -980,7 +980,7 @@ define <vscale x 16 x i16> @ctlz_nxv16i16(<vscale x 16 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
 ; RV64D-NEXT:    vsrl.vi v16, v16, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v16, 0
+; RV64D-NEXT:    vnsrl.wx v12, v16, zero
 ; RV64D-NEXT:    li a0, 142
 ; RV64D-NEXT:    vrsub.vx v12, v12, a0
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
@@ -1144,7 +1144,7 @@ define <vscale x 1 x i32> @ctlz_nxv1i32(<vscale x 1 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32D-NEXT:    vsrl.vx v9, v9, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v9, 0
+; RV32D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV32D-NEXT:    li a0, 1054
 ; RV32D-NEXT:    vrsub.vx v9, v9, a0
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
@@ -1160,7 +1160,7 @@ define <vscale x 1 x i32> @ctlz_nxv1i32(<vscale x 1 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64D-NEXT:    vsrl.vx v9, v9, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v9, 0
+; RV64D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV64D-NEXT:    li a0, 1054
 ; RV64D-NEXT:    vrsub.vx v9, v9, a0
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
@@ -1253,7 +1253,7 @@ define <vscale x 2 x i32> @ctlz_nxv2i32(<vscale x 2 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
 ; RV32D-NEXT:    vsrl.vx v10, v10, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v10, 0
+; RV32D-NEXT:    vnsrl.wx v9, v10, zero
 ; RV32D-NEXT:    li a0, 1054
 ; RV32D-NEXT:    vrsub.vx v9, v9, a0
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
@@ -1269,7 +1269,7 @@ define <vscale x 2 x i32> @ctlz_nxv2i32(<vscale x 2 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
 ; RV64D-NEXT:    vsrl.vx v10, v10, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v9, v10, 0
+; RV64D-NEXT:    vnsrl.wx v9, v10, zero
 ; RV64D-NEXT:    li a0, 1054
 ; RV64D-NEXT:    vrsub.vx v9, v9, a0
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
@@ -1362,7 +1362,7 @@ define <vscale x 4 x i32> @ctlz_nxv4i32(<vscale x 4 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32D-NEXT:    vsrl.vx v12, v12, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v12, 0
+; RV32D-NEXT:    vnsrl.wx v10, v12, zero
 ; RV32D-NEXT:    li a0, 1054
 ; RV32D-NEXT:    vrsub.vx v10, v10, a0
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
@@ -1378,7 +1378,7 @@ define <vscale x 4 x i32> @ctlz_nxv4i32(<vscale x 4 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64D-NEXT:    vsrl.vx v12, v12, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v12, 0
+; RV64D-NEXT:    vnsrl.wx v10, v12, zero
 ; RV64D-NEXT:    li a0, 1054
 ; RV64D-NEXT:    vrsub.vx v10, v10, a0
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
@@ -1471,7 +1471,7 @@ define <vscale x 8 x i32> @ctlz_nxv8i32(<vscale x 8 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32D-NEXT:    vsrl.vx v16, v16, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v16, 0
+; RV32D-NEXT:    vnsrl.wx v12, v16, zero
 ; RV32D-NEXT:    li a0, 1054
 ; RV32D-NEXT:    vrsub.vx v12, v12, a0
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
@@ -1487,7 +1487,7 @@ define <vscale x 8 x i32> @ctlz_nxv8i32(<vscale x 8 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV64D-NEXT:    vsrl.vx v16, v16, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v16, 0
+; RV64D-NEXT:    vnsrl.wx v12, v16, zero
 ; RV64D-NEXT:    li a0, 1054
 ; RV64D-NEXT:    vrsub.vx v12, v12, a0
 ; RV64D-NEXT:    vmseq.vi v0, v8, 0
@@ -2048,9 +2048,9 @@ define <vscale x 1 x i8> @ctlz_zero_undef_nxv1i8(<vscale x 1 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v9
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 134
 ; RV32D-NEXT:    vrsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -2062,9 +2062,9 @@ define <vscale x 1 x i8> @ctlz_zero_undef_nxv1i8(<vscale x 1 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v9
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 134
 ; RV64D-NEXT:    vrsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2128,9 +2128,9 @@ define <vscale x 2 x i8> @ctlz_zero_undef_nxv2i8(<vscale x 2 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v9
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 134
 ; RV32D-NEXT:    vrsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -2142,9 +2142,9 @@ define <vscale x 2 x i8> @ctlz_zero_undef_nxv2i8(<vscale x 2 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v9
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 134
 ; RV64D-NEXT:    vrsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2208,9 +2208,9 @@ define <vscale x 4 x i8> @ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v10
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v8, 0
+; RV32D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v10, 0
+; RV32D-NEXT:    vnsrl.wx v8, v10, zero
 ; RV32D-NEXT:    li a0, 134
 ; RV32D-NEXT:    vrsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -2222,9 +2222,9 @@ define <vscale x 4 x i8> @ctlz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v10
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v8, 0
+; RV64D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v10, 0
+; RV64D-NEXT:    vnsrl.wx v8, v10, zero
 ; RV64D-NEXT:    li a0, 134
 ; RV64D-NEXT:    vrsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2288,9 +2288,9 @@ define <vscale x 8 x i8> @ctlz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v12
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v8, 0
+; RV32D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v12, 0
+; RV32D-NEXT:    vnsrl.wx v8, v12, zero
 ; RV32D-NEXT:    li a0, 134
 ; RV32D-NEXT:    vrsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -2302,9 +2302,9 @@ define <vscale x 8 x i8> @ctlz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v12
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v8, 0
+; RV64D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v12, 0
+; RV64D-NEXT:    vnsrl.wx v8, v12, zero
 ; RV64D-NEXT:    li a0, 134
 ; RV64D-NEXT:    vrsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2368,9 +2368,9 @@ define <vscale x 16 x i8> @ctlz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v16
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v16, v8, 0
+; RV32D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v16, 0
+; RV32D-NEXT:    vnsrl.wx v8, v16, zero
 ; RV32D-NEXT:    li a0, 134
 ; RV32D-NEXT:    vrsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -2382,9 +2382,9 @@ define <vscale x 16 x i8> @ctlz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v16
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v16, v8, 0
+; RV64D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v16, 0
+; RV64D-NEXT:    vnsrl.wx v8, v16, zero
 ; RV64D-NEXT:    li a0, 134
 ; RV64D-NEXT:    vrsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2522,7 +2522,7 @@ define <vscale x 1 x i16> @ctlz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v9, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 142
 ; RV32D-NEXT:    vrsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -2534,7 +2534,7 @@ define <vscale x 1 x i16> @ctlz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v9, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 142
 ; RV64D-NEXT:    vrsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2616,7 +2616,7 @@ define <vscale x 2 x i16> @ctlz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v9, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 142
 ; RV32D-NEXT:    vrsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -2628,7 +2628,7 @@ define <vscale x 2 x i16> @ctlz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v9, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 142
 ; RV64D-NEXT:    vrsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2710,7 +2710,7 @@ define <vscale x 4 x i16> @ctlz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v10, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v8, 0
+; RV32D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV32D-NEXT:    li a0, 142
 ; RV32D-NEXT:    vrsub.vx v8, v10, a0
 ; RV32D-NEXT:    ret
@@ -2722,7 +2722,7 @@ define <vscale x 4 x i16> @ctlz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v10, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v8, 0
+; RV64D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV64D-NEXT:    li a0, 142
 ; RV64D-NEXT:    vrsub.vx v8, v10, a0
 ; RV64D-NEXT:    ret
@@ -2804,7 +2804,7 @@ define <vscale x 8 x i16> @ctlz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v12, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v8, 0
+; RV32D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32D-NEXT:    li a0, 142
 ; RV32D-NEXT:    vrsub.vx v8, v12, a0
 ; RV32D-NEXT:    ret
@@ -2816,7 +2816,7 @@ define <vscale x 8 x i16> @ctlz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v12, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v8, 0
+; RV64D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64D-NEXT:    li a0, 142
 ; RV64D-NEXT:    vrsub.vx v8, v12, a0
 ; RV64D-NEXT:    ret
@@ -2898,7 +2898,7 @@ define <vscale x 16 x i16> @ctlz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v16, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v16, v8, 0
+; RV32D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32D-NEXT:    li a0, 142
 ; RV32D-NEXT:    vrsub.vx v8, v16, a0
 ; RV32D-NEXT:    ret
@@ -2910,7 +2910,7 @@ define <vscale x 16 x i16> @ctlz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v16, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v16, v8, 0
+; RV64D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV64D-NEXT:    li a0, 142
 ; RV64D-NEXT:    vrsub.vx v8, v16, a0
 ; RV64D-NEXT:    ret
@@ -3069,7 +3069,7 @@ define <vscale x 1 x i32> @ctlz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32D-NEXT:    vsrl.vx v8, v9, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 1054
 ; RV32D-NEXT:    vrsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -3082,7 +3082,7 @@ define <vscale x 1 x i32> @ctlz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64D-NEXT:    vsrl.vx v8, v9, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 1054
 ; RV64D-NEXT:    vrsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -3171,7 +3171,7 @@ define <vscale x 2 x i32> @ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
 ; RV32D-NEXT:    vsrl.vx v8, v10, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v8, 0
+; RV32D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV32D-NEXT:    li a0, 1054
 ; RV32D-NEXT:    vrsub.vx v8, v10, a0
 ; RV32D-NEXT:    ret
@@ -3184,7 +3184,7 @@ define <vscale x 2 x i32> @ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
 ; RV64D-NEXT:    vsrl.vx v8, v10, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v8, 0
+; RV64D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV64D-NEXT:    li a0, 1054
 ; RV64D-NEXT:    vrsub.vx v8, v10, a0
 ; RV64D-NEXT:    ret
@@ -3273,7 +3273,7 @@ define <vscale x 4 x i32> @ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32D-NEXT:    vsrl.vx v8, v12, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v8, 0
+; RV32D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32D-NEXT:    li a0, 1054
 ; RV32D-NEXT:    vrsub.vx v8, v12, a0
 ; RV32D-NEXT:    ret
@@ -3286,7 +3286,7 @@ define <vscale x 4 x i32> @ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64D-NEXT:    vsrl.vx v8, v12, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v8, 0
+; RV64D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64D-NEXT:    li a0, 1054
 ; RV64D-NEXT:    vrsub.vx v8, v12, a0
 ; RV64D-NEXT:    ret
@@ -3375,7 +3375,7 @@ define <vscale x 8 x i32> @ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32D-NEXT:    vsrl.vx v8, v16, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v16, v8, 0
+; RV32D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32D-NEXT:    li a0, 1054
 ; RV32D-NEXT:    vrsub.vx v8, v16, a0
 ; RV32D-NEXT:    ret
@@ -3388,7 +3388,7 @@ define <vscale x 8 x i32> @ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV64D-NEXT:    vsrl.vx v8, v16, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v16, v8, 0
+; RV64D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV64D-NEXT:    li a0, 1054
 ; RV64D-NEXT:    vrsub.vx v8, v16, a0
 ; RV64D-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index 3326407581171..73e8585fe3431 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -59,9 +59,9 @@ define <vscale x 1 x i8> @cttz_nxv1i8(<vscale x 1 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v9
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
@@ -79,9 +79,9 @@ define <vscale x 1 x i8> @cttz_nxv1i8(<vscale x 1 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v9
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
@@ -146,9 +146,9 @@ define <vscale x 2 x i8> @cttz_nxv2i8(<vscale x 2 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v9
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
@@ -166,9 +166,9 @@ define <vscale x 2 x i8> @cttz_nxv2i8(<vscale x 2 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v9
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
@@ -233,9 +233,9 @@ define <vscale x 4 x i8> @cttz_nxv4i8(<vscale x 4 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v10
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v8, 0
+; RV32D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v10, 0
+; RV32D-NEXT:    vnsrl.wx v8, v10, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
@@ -253,9 +253,9 @@ define <vscale x 4 x i8> @cttz_nxv4i8(<vscale x 4 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v10
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v8, 0
+; RV64D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v10, 0
+; RV64D-NEXT:    vnsrl.wx v8, v10, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
@@ -320,9 +320,9 @@ define <vscale x 8 x i8> @cttz_nxv8i8(<vscale x 8 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v12
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v8, 0
+; RV32D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v12, 0
+; RV32D-NEXT:    vnsrl.wx v8, v12, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
@@ -340,9 +340,9 @@ define <vscale x 8 x i8> @cttz_nxv8i8(<vscale x 8 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v12
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v8, 0
+; RV64D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v12, 0
+; RV64D-NEXT:    vnsrl.wx v8, v12, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
@@ -407,9 +407,9 @@ define <vscale x 16 x i8> @cttz_nxv16i8(<vscale x 16 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v16
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v16, v8, 0
+; RV32D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v16, 0
+; RV32D-NEXT:    vnsrl.wx v8, v16, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
@@ -427,9 +427,9 @@ define <vscale x 16 x i8> @cttz_nxv16i8(<vscale x 16 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v16
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v16, v8, 0
+; RV64D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v16, 0
+; RV64D-NEXT:    vnsrl.wx v8, v16, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
@@ -559,7 +559,7 @@ define <vscale x 1 x i16> @cttz_nxv1i16(<vscale x 1 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v9, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    li a0, 16
@@ -577,7 +577,7 @@ define <vscale x 1 x i16> @cttz_nxv1i16(<vscale x 1 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v9, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    li a0, 16
@@ -656,7 +656,7 @@ define <vscale x 2 x i16> @cttz_nxv2i16(<vscale x 2 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v9, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    li a0, 16
@@ -674,7 +674,7 @@ define <vscale x 2 x i16> @cttz_nxv2i16(<vscale x 2 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v9, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    li a0, 16
@@ -753,7 +753,7 @@ define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v10, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v8, 0
+; RV32D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v10, a0
 ; RV32D-NEXT:    li a0, 16
@@ -771,7 +771,7 @@ define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v10, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v8, 0
+; RV64D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v10, a0
 ; RV64D-NEXT:    li a0, 16
@@ -850,7 +850,7 @@ define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v12, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v8, 0
+; RV32D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v12, a0
 ; RV32D-NEXT:    li a0, 16
@@ -868,7 +868,7 @@ define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v12, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v8, 0
+; RV64D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v12, a0
 ; RV64D-NEXT:    li a0, 16
@@ -947,7 +947,7 @@ define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v16, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v16, v8, 0
+; RV32D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v16, a0
 ; RV32D-NEXT:    li a0, 16
@@ -965,7 +965,7 @@ define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v16, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v16, v8, 0
+; RV64D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v16, a0
 ; RV64D-NEXT:    li a0, 16
@@ -1106,7 +1106,7 @@ define <vscale x 1 x i32> @cttz_nxv1i32(<vscale x 1 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32D-NEXT:    vsrl.vx v9, v10, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v9, 0
+; RV32D-NEXT:    vnsrl.wx v9, v9, zero
 ; RV32D-NEXT:    li a0, 1023
 ; RV32D-NEXT:    vsub.vx v9, v9, a0
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
@@ -1126,7 +1126,7 @@ define <vscale x 1 x i32> @cttz_nxv1i32(<vscale x 1 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64D-NEXT:    vsrl.vx v8, v9, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 1023
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    li a0, 32
@@ -1206,7 +1206,7 @@ define <vscale x 2 x i32> @cttz_nxv2i32(<vscale x 2 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
 ; RV32D-NEXT:    vsrl.vx v10, v10, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v9, v10, 0
+; RV32D-NEXT:    vnsrl.wx v9, v10, zero
 ; RV32D-NEXT:    li a0, 1023
 ; RV32D-NEXT:    vsub.vx v9, v9, a0
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
@@ -1226,7 +1226,7 @@ define <vscale x 2 x i32> @cttz_nxv2i32(<vscale x 2 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
 ; RV64D-NEXT:    vsrl.vx v8, v10, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v8, 0
+; RV64D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV64D-NEXT:    li a0, 1023
 ; RV64D-NEXT:    vsub.vx v8, v10, a0
 ; RV64D-NEXT:    li a0, 32
@@ -1306,7 +1306,7 @@ define <vscale x 4 x i32> @cttz_nxv4i32(<vscale x 4 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32D-NEXT:    vsrl.vx v12, v12, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v12, 0
+; RV32D-NEXT:    vnsrl.wx v10, v12, zero
 ; RV32D-NEXT:    li a0, 1023
 ; RV32D-NEXT:    vsub.vx v10, v10, a0
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
@@ -1326,7 +1326,7 @@ define <vscale x 4 x i32> @cttz_nxv4i32(<vscale x 4 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64D-NEXT:    vsrl.vx v8, v12, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v8, 0
+; RV64D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64D-NEXT:    li a0, 1023
 ; RV64D-NEXT:    vsub.vx v8, v12, a0
 ; RV64D-NEXT:    li a0, 32
@@ -1406,7 +1406,7 @@ define <vscale x 8 x i32> @cttz_nxv8i32(<vscale x 8 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32D-NEXT:    vsrl.vx v16, v16, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v16, 0
+; RV32D-NEXT:    vnsrl.wx v12, v16, zero
 ; RV32D-NEXT:    li a0, 1023
 ; RV32D-NEXT:    vsub.vx v12, v12, a0
 ; RV32D-NEXT:    vmseq.vi v0, v8, 0
@@ -1426,7 +1426,7 @@ define <vscale x 8 x i32> @cttz_nxv8i32(<vscale x 8 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV64D-NEXT:    vsrl.vx v8, v16, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v16, v8, 0
+; RV64D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV64D-NEXT:    li a0, 1023
 ; RV64D-NEXT:    vsub.vx v8, v16, a0
 ; RV64D-NEXT:    li a0, 32
@@ -1889,9 +1889,9 @@ define <vscale x 1 x i8> @cttz_zero_undef_nxv1i8(<vscale x 1 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v9
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -1906,9 +1906,9 @@ define <vscale x 1 x i8> @cttz_zero_undef_nxv1i8(<vscale x 1 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v9
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -1969,9 +1969,9 @@ define <vscale x 2 x i8> @cttz_zero_undef_nxv2i8(<vscale x 2 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v9
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -1986,9 +1986,9 @@ define <vscale x 2 x i8> @cttz_zero_undef_nxv2i8(<vscale x 2 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v9
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2049,9 +2049,9 @@ define <vscale x 4 x i8> @cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v10
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v8, 0
+; RV32D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v10, 0
+; RV32D-NEXT:    vnsrl.wx v8, v10, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -2066,9 +2066,9 @@ define <vscale x 4 x i8> @cttz_zero_undef_nxv4i8(<vscale x 4 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v10
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v8, 0
+; RV64D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v10, 0
+; RV64D-NEXT:    vnsrl.wx v8, v10, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2129,9 +2129,9 @@ define <vscale x 8 x i8> @cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v12
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v8, 0
+; RV32D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v12, 0
+; RV32D-NEXT:    vnsrl.wx v8, v12, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -2146,9 +2146,9 @@ define <vscale x 8 x i8> @cttz_zero_undef_nxv8i8(<vscale x 8 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v12
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v8, 0
+; RV64D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v12, 0
+; RV64D-NEXT:    vnsrl.wx v8, v12, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2209,9 +2209,9 @@ define <vscale x 16 x i8> @cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
 ; RV32D-NEXT:    vfcvt.f.xu.v v8, v16
 ; RV32D-NEXT:    vsrl.vi v8, v8, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v16, v8, 0
+; RV32D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v16, 0
+; RV32D-NEXT:    vnsrl.wx v8, v16, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -2226,9 +2226,9 @@ define <vscale x 16 x i8> @cttz_zero_undef_nxv16i8(<vscale x 16 x i8> %va) {
 ; RV64D-NEXT:    vfcvt.f.xu.v v8, v16
 ; RV64D-NEXT:    vsrl.vi v8, v8, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v16, v8, 0
+; RV64D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV64D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v16, 0
+; RV64D-NEXT:    vnsrl.wx v8, v16, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2352,7 +2352,7 @@ define <vscale x 1 x i16> @cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v9, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -2366,7 +2366,7 @@ define <vscale x 1 x i16> @cttz_zero_undef_nxv1i16(<vscale x 1 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v9, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2440,7 +2440,7 @@ define <vscale x 2 x i16> @cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v9, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -2454,7 +2454,7 @@ define <vscale x 2 x i16> @cttz_zero_undef_nxv2i16(<vscale x 2 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v9, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2528,7 +2528,7 @@ define <vscale x 4 x i16> @cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v10, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v8, 0
+; RV32D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v10, a0
 ; RV32D-NEXT:    ret
@@ -2542,7 +2542,7 @@ define <vscale x 4 x i16> @cttz_zero_undef_nxv4i16(<vscale x 4 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v10, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v8, 0
+; RV64D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v10, a0
 ; RV64D-NEXT:    ret
@@ -2616,7 +2616,7 @@ define <vscale x 8 x i16> @cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v12, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v8, 0
+; RV32D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v12, a0
 ; RV32D-NEXT:    ret
@@ -2630,7 +2630,7 @@ define <vscale x 8 x i16> @cttz_zero_undef_nxv8i16(<vscale x 8 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v12, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v8, 0
+; RV64D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v12, a0
 ; RV64D-NEXT:    ret
@@ -2704,7 +2704,7 @@ define <vscale x 16 x i16> @cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
 ; RV32D-NEXT:    vsrl.vi v8, v16, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v16, v8, 0
+; RV32D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32D-NEXT:    li a0, 127
 ; RV32D-NEXT:    vsub.vx v8, v16, a0
 ; RV32D-NEXT:    ret
@@ -2718,7 +2718,7 @@ define <vscale x 16 x i16> @cttz_zero_undef_nxv16i16(<vscale x 16 x i16> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
 ; RV64D-NEXT:    vsrl.vi v8, v16, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v16, v8, 0
+; RV64D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV64D-NEXT:    li a0, 127
 ; RV64D-NEXT:    vsub.vx v8, v16, a0
 ; RV64D-NEXT:    ret
@@ -2855,7 +2855,7 @@ define <vscale x 1 x i32> @cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV32D-NEXT:    vsrl.vx v8, v9, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 0
+; RV32D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32D-NEXT:    li a0, 1023
 ; RV32D-NEXT:    vsub.vx v8, v8, a0
 ; RV32D-NEXT:    ret
@@ -2870,7 +2870,7 @@ define <vscale x 1 x i32> @cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
 ; RV64D-NEXT:    vsrl.vx v8, v9, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 0
+; RV64D-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64D-NEXT:    li a0, 1023
 ; RV64D-NEXT:    vsub.vx v8, v8, a0
 ; RV64D-NEXT:    ret
@@ -2947,7 +2947,7 @@ define <vscale x 2 x i32> @cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
 ; RV32D-NEXT:    vsrl.vx v8, v10, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v8, 0
+; RV32D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV32D-NEXT:    li a0, 1023
 ; RV32D-NEXT:    vsub.vx v8, v10, a0
 ; RV32D-NEXT:    ret
@@ -2962,7 +2962,7 @@ define <vscale x 2 x i32> @cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
 ; RV64D-NEXT:    vsrl.vx v8, v10, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v8, 0
+; RV64D-NEXT:    vnsrl.wx v10, v8, zero
 ; RV64D-NEXT:    li a0, 1023
 ; RV64D-NEXT:    vsub.vx v8, v10, a0
 ; RV64D-NEXT:    ret
@@ -3039,7 +3039,7 @@ define <vscale x 4 x i32> @cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV32D-NEXT:    vsrl.vx v8, v12, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v8, 0
+; RV32D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32D-NEXT:    li a0, 1023
 ; RV32D-NEXT:    vsub.vx v8, v12, a0
 ; RV32D-NEXT:    ret
@@ -3054,7 +3054,7 @@ define <vscale x 4 x i32> @cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
 ; RV64D-NEXT:    vsrl.vx v8, v12, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v8, 0
+; RV64D-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64D-NEXT:    li a0, 1023
 ; RV64D-NEXT:    vsub.vx v8, v12, a0
 ; RV64D-NEXT:    ret
@@ -3131,7 +3131,7 @@ define <vscale x 8 x i32> @cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
 ; RV32D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32D-NEXT:    vsrl.vx v8, v16, a0
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v16, v8, 0
+; RV32D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32D-NEXT:    li a0, 1023
 ; RV32D-NEXT:    vsub.vx v8, v16, a0
 ; RV32D-NEXT:    ret
@@ -3146,7 +3146,7 @@ define <vscale x 8 x i32> @cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV64D-NEXT:    vsrl.vx v8, v16, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v16, v8, 0
+; RV64D-NEXT:    vnsrl.wx v16, v8, zero
 ; RV64D-NEXT:    li a0, 1023
 ; RV64D-NEXT:    vsub.vx v8, v16, a0
 ; RV64D-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
index 5ecfae181a707..17b5df3237136 100644
--- a/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/extload-truncstore.ll
@@ -410,7 +410,7 @@ define void @truncstore_nxv1i16_nxv1i8(<vscale x 1 x i16> %x, <vscale x 1 x i8>*
 ; CHECK-LABEL: truncstore_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 1 x i16> %x to <vscale x 1 x i8>
@@ -474,7 +474,7 @@ define void @truncstore_nxv2i16_nxv2i8(<vscale x 2 x i16> %x, <vscale x 2 x i8>*
 ; CHECK-LABEL: truncstore_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 2 x i16> %x to <vscale x 2 x i8>
@@ -538,7 +538,7 @@ define void @truncstore_nxv4i16_nxv4i8(<vscale x 4 x i16> %x, <vscale x 4 x i8>*
 ; CHECK-LABEL: truncstore_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 4 x i16> %x to <vscale x 4 x i8>
@@ -598,7 +598,7 @@ define void @truncstore_nxv8i16_nxv8i8(<vscale x 8 x i16> %x, <vscale x 8 x i8>*
 ; CHECK-LABEL: truncstore_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vs1r.v v10, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 8 x i16> %x to <vscale x 8 x i8>
@@ -658,7 +658,7 @@ define void @truncstore_nxv16i16_nxv16i8(<vscale x 16 x i16> %x, <vscale x 16 x
 ; CHECK-LABEL: truncstore_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wx v12, v8, zero
 ; CHECK-NEXT:    vs2r.v v12, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 16 x i16> %x to <vscale x 16 x i8>
@@ -694,7 +694,7 @@ define void @truncstore_nxv32i16_nxv32i8(<vscale x 32 x i16> %x, <vscale x 32 x
 ; CHECK-LABEL: truncstore_nxv32i16_nxv32i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e8, m4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wx v16, v8, zero
 ; CHECK-NEXT:    vs4r.v v16, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 32 x i16> %x to <vscale x 32 x i8>
@@ -706,9 +706,9 @@ define void @truncstore_nxv1i32_nxv1i8(<vscale x 1 x i32> %x, <vscale x 1 x i8>*
 ; CHECK-LABEL: truncstore_nxv1i32_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 1 x i32> %x to <vscale x 1 x i8>
@@ -720,7 +720,7 @@ define void @truncstore_nxv1i32_nxv1i16(<vscale x 1 x i32> %x, <vscale x 1 x i16
 ; CHECK-LABEL: truncstore_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 1 x i32> %x to <vscale x 1 x i16>
@@ -758,9 +758,9 @@ define void @truncstore_nxv2i32_nxv2i8(<vscale x 2 x i32> %x, <vscale x 2 x i8>*
 ; CHECK-LABEL: truncstore_nxv2i32_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 2 x i32> %x to <vscale x 2 x i8>
@@ -772,7 +772,7 @@ define void @truncstore_nxv2i32_nxv2i16(<vscale x 2 x i32> %x, <vscale x 2 x i16
 ; CHECK-LABEL: truncstore_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 2 x i32> %x to <vscale x 2 x i16>
@@ -808,9 +808,9 @@ define void @truncstore_nxv4i32_nxv4i8(<vscale x 4 x i32> %x, <vscale x 4 x i8>*
 ; CHECK-LABEL: truncstore_nxv4i32_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 4 x i32> %x to <vscale x 4 x i8>
@@ -822,7 +822,7 @@ define void @truncstore_nxv4i32_nxv4i16(<vscale x 4 x i32> %x, <vscale x 4 x i16
 ; CHECK-LABEL: truncstore_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vs1r.v v10, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 4 x i32> %x to <vscale x 4 x i16>
@@ -858,9 +858,9 @@ define void @truncstore_nxv8i32_nxv8i8(<vscale x 8 x i32> %x, <vscale x 8 x i8>*
 ; CHECK-LABEL: truncstore_nxv8i32_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wx v12, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wx v8, v12, zero
 ; CHECK-NEXT:    vs1r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 8 x i32> %x to <vscale x 8 x i8>
@@ -872,7 +872,7 @@ define void @truncstore_nxv8i32_nxv8i16(<vscale x 8 x i32> %x, <vscale x 8 x i16
 ; CHECK-LABEL: truncstore_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wx v12, v8, zero
 ; CHECK-NEXT:    vs2r.v v12, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 8 x i32> %x to <vscale x 8 x i16>
@@ -908,9 +908,9 @@ define void @truncstore_nxv16i32_nxv16i8(<vscale x 16 x i32> %x, <vscale x 16 x
 ; CHECK-LABEL: truncstore_nxv16i32_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wx v16, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wx v8, v16, zero
 ; CHECK-NEXT:    vs2r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 16 x i32> %x to <vscale x 16 x i8>
@@ -922,7 +922,7 @@ define void @truncstore_nxv16i32_nxv16i16(<vscale x 16 x i32> %x, <vscale x 16 x
 ; CHECK-LABEL: truncstore_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wx v16, v8, zero
 ; CHECK-NEXT:    vs4r.v v16, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 16 x i32> %x to <vscale x 16 x i16>
@@ -934,11 +934,11 @@ define void @truncstore_nxv1i64_nxv1i8(<vscale x 1 x i64> %x, <vscale x 1 x i8>*
 ; CHECK-LABEL: truncstore_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 1 x i64> %x to <vscale x 1 x i8>
@@ -950,9 +950,9 @@ define void @truncstore_nxv1i64_nxv1i16(<vscale x 1 x i64> %x, <vscale x 1 x i16
 ; CHECK-LABEL: truncstore_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 1 x i64> %x to <vscale x 1 x i16>
@@ -964,7 +964,7 @@ define void @truncstore_nxv1i64_nxv1i32(<vscale x 1 x i64> %x, <vscale x 1 x i32
 ; CHECK-LABEL: truncstore_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 1 x i64> %x to <vscale x 1 x i32>
@@ -976,11 +976,11 @@ define void @truncstore_nxv2i64_nxv2i8(<vscale x 2 x i64> %x, <vscale x 2 x i8>*
 ; CHECK-LABEL: truncstore_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 2 x i64> %x to <vscale x 2 x i8>
@@ -992,9 +992,9 @@ define void @truncstore_nxv2i64_nxv2i16(<vscale x 2 x i64> %x, <vscale x 2 x i16
 ; CHECK-LABEL: truncstore_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 2 x i64> %x to <vscale x 2 x i16>
@@ -1006,7 +1006,7 @@ define void @truncstore_nxv2i64_nxv2i32(<vscale x 2 x i64> %x, <vscale x 2 x i32
 ; CHECK-LABEL: truncstore_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vs1r.v v10, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 2 x i64> %x to <vscale x 2 x i32>
@@ -1018,11 +1018,11 @@ define void @truncstore_nxv4i64_nxv4i8(<vscale x 4 x i64> %x, <vscale x 4 x i8>*
 ; CHECK-LABEL: truncstore_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wx v12, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wx v8, v12, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 4 x i64> %x to <vscale x 4 x i8>
@@ -1034,9 +1034,9 @@ define void @truncstore_nxv4i64_nxv4i16(<vscale x 4 x i64> %x, <vscale x 4 x i16
 ; CHECK-LABEL: truncstore_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wx v12, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wx v8, v12, zero
 ; CHECK-NEXT:    vs1r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 4 x i64> %x to <vscale x 4 x i16>
@@ -1048,7 +1048,7 @@ define void @truncstore_nxv4i64_nxv4i32(<vscale x 4 x i64> %x, <vscale x 4 x i32
 ; CHECK-LABEL: truncstore_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wx v12, v8, zero
 ; CHECK-NEXT:    vs2r.v v12, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 4 x i64> %x to <vscale x 4 x i32>
@@ -1060,11 +1060,11 @@ define void @truncstore_nxv8i64_nxv8i8(<vscale x 8 x i64> %x, <vscale x 8 x i8>*
 ; CHECK-LABEL: truncstore_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wx v16, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wx v8, v16, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vs1r.v v10, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 8 x i64> %x to <vscale x 8 x i8>
@@ -1076,9 +1076,9 @@ define void @truncstore_nxv8i64_nxv8i16(<vscale x 8 x i64> %x, <vscale x 8 x i16
 ; CHECK-LABEL: truncstore_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wx v16, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wx v8, v16, zero
 ; CHECK-NEXT:    vs2r.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 8 x i64> %x to <vscale x 8 x i16>
@@ -1090,7 +1090,7 @@ define void @truncstore_nxv8i64_nxv8i32(<vscale x 8 x i64> %x, <vscale x 8 x i32
 ; CHECK-LABEL: truncstore_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wx v16, v8, zero
 ; CHECK-NEXT:    vs4r.v v16, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <vscale x 8 x i64> %x to <vscale x 8 x i32>
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index a67d54f8e7275..e90ee2480d80e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -125,7 +125,7 @@ define void @ctlz_v16i8(<16 x i8>* %x, <16 x i8>* %y) nounwind {
 ; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; LMULMAX8-RV32-NEXT:    vnsrl.wi v10, v12, 23
 ; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; LMULMAX8-RV32-NEXT:    vnsrl.wi v9, v10, 0
+; LMULMAX8-RV32-NEXT:    vnsrl.wx v9, v10, zero
 ; LMULMAX8-RV32-NEXT:    li a1, 134
 ; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
 ; LMULMAX8-RV32-NEXT:    vrsub.vx v8, v9, a1
@@ -143,7 +143,7 @@ define void @ctlz_v16i8(<16 x i8>* %x, <16 x i8>* %y) nounwind {
 ; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; LMULMAX8-RV64-NEXT:    vnsrl.wi v10, v12, 23
 ; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; LMULMAX8-RV64-NEXT:    vnsrl.wi v9, v10, 0
+; LMULMAX8-RV64-NEXT:    vnsrl.wx v9, v10, zero
 ; LMULMAX8-RV64-NEXT:    li a1, 134
 ; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
 ; LMULMAX8-RV64-NEXT:    vrsub.vx v8, v9, a1
@@ -1045,7 +1045,7 @@ define void @ctlz_v32i8(<32 x i8>* %x, <32 x i8>* %y) nounwind {
 ; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
 ; LMULMAX8-RV32-NEXT:    vnsrl.wi v12, v16, 23
 ; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; LMULMAX8-RV32-NEXT:    vnsrl.wi v10, v12, 0
+; LMULMAX8-RV32-NEXT:    vnsrl.wx v10, v12, zero
 ; LMULMAX8-RV32-NEXT:    li a1, 134
 ; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
 ; LMULMAX8-RV32-NEXT:    vrsub.vx v8, v10, a1
@@ -1064,7 +1064,7 @@ define void @ctlz_v32i8(<32 x i8>* %x, <32 x i8>* %y) nounwind {
 ; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
 ; LMULMAX8-RV64-NEXT:    vnsrl.wi v12, v16, 23
 ; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; LMULMAX8-RV64-NEXT:    vnsrl.wi v10, v12, 0
+; LMULMAX8-RV64-NEXT:    vnsrl.wx v10, v12, zero
 ; LMULMAX8-RV64-NEXT:    li a1, 134
 ; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
 ; LMULMAX8-RV64-NEXT:    vrsub.vx v8, v10, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 3059d1777c362..9acfb3cb7ef81 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -115,7 +115,7 @@ define void @cttz_v16i8(<16 x i8>* %x, <16 x i8>* %y) nounwind {
 ; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; LMULMAX8-RV32-NEXT:    vnsrl.wi v10, v12, 23
 ; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; LMULMAX8-RV32-NEXT:    vnsrl.wi v9, v10, 0
+; LMULMAX8-RV32-NEXT:    vnsrl.wx v9, v10, zero
 ; LMULMAX8-RV32-NEXT:    li a1, 127
 ; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
 ; LMULMAX8-RV32-NEXT:    vsub.vx v8, v9, a1
@@ -135,7 +135,7 @@ define void @cttz_v16i8(<16 x i8>* %x, <16 x i8>* %y) nounwind {
 ; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
 ; LMULMAX8-RV64-NEXT:    vnsrl.wi v10, v12, 23
 ; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; LMULMAX8-RV64-NEXT:    vnsrl.wi v9, v10, 0
+; LMULMAX8-RV64-NEXT:    vnsrl.wx v9, v10, zero
 ; LMULMAX8-RV64-NEXT:    li a1, 127
 ; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
 ; LMULMAX8-RV64-NEXT:    vsub.vx v8, v9, a1
@@ -927,7 +927,7 @@ define void @cttz_v32i8(<32 x i8>* %x, <32 x i8>* %y) nounwind {
 ; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
 ; LMULMAX8-RV32-NEXT:    vnsrl.wi v12, v16, 23
 ; LMULMAX8-RV32-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; LMULMAX8-RV32-NEXT:    vnsrl.wi v10, v12, 0
+; LMULMAX8-RV32-NEXT:    vnsrl.wx v10, v12, zero
 ; LMULMAX8-RV32-NEXT:    li a1, 127
 ; LMULMAX8-RV32-NEXT:    vmseq.vi v0, v8, 0
 ; LMULMAX8-RV32-NEXT:    vsub.vx v8, v10, a1
@@ -948,7 +948,7 @@ define void @cttz_v32i8(<32 x i8>* %x, <32 x i8>* %y) nounwind {
 ; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
 ; LMULMAX8-RV64-NEXT:    vnsrl.wi v12, v16, 23
 ; LMULMAX8-RV64-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; LMULMAX8-RV64-NEXT:    vnsrl.wi v10, v12, 0
+; LMULMAX8-RV64-NEXT:    vnsrl.wx v10, v12, zero
 ; LMULMAX8-RV64-NEXT:    li a1, 127
 ; LMULMAX8-RV64-NEXT:    vmseq.vi v0, v8, 0
 ; LMULMAX8-RV64-NEXT:    vsub.vx v8, v10, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
index df657d83d5e7f..7a0b183984049 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
@@ -573,7 +573,7 @@ define void @truncstore_v2i16_v2i8(<2 x i16> %x, <2 x i8>* %z) {
 ; CHECK-LABEL: truncstore_v2i16_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <2 x i16> %x to <2 x i8>
@@ -637,7 +637,7 @@ define void @truncstore_v4i16_v4i8(<4 x i16> %x, <4 x i8>* %z) {
 ; CHECK-LABEL: truncstore_v4i16_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <4 x i16> %x to <4 x i8>
@@ -723,7 +723,7 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %z) {
 ; CHECK-LABEL: truncstore_v8i16_v8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <8 x i16> %x to <8 x i8>
@@ -847,8 +847,8 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %z) {
 ; LMULMAX1-LABEL: truncstore_v16i16_v16i8:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 8
 ; LMULMAX1-NEXT:    vse8.v v8, (a0)
@@ -857,7 +857,7 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %z) {
 ; LMULMAX4-LABEL: truncstore_v16i16_v16i8:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 16, e8, m1, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v10, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v10, v8, zero
 ; LMULMAX4-NEXT:    vse8.v v10, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <16 x i16> %x to <16 x i8>
@@ -1031,9 +1031,9 @@ define void @truncstore_v2i32_v2i8(<2 x i32> %x, <2 x i8>* %z) {
 ; CHECK-LABEL: truncstore_v2i32_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <2 x i32> %x to <2 x i8>
@@ -1045,7 +1045,7 @@ define void @truncstore_v2i32_v2i16(<2 x i32> %x, <2 x i16>* %z) {
 ; CHECK-LABEL: truncstore_v2i32_v2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <2 x i32> %x to <2 x i16>
@@ -1083,9 +1083,9 @@ define void @truncstore_v4i32_v4i8(<4 x i32> %x, <4 x i8>* %z) {
 ; CHECK-LABEL: truncstore_v4i32_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <4 x i32> %x to <4 x i8>
@@ -1097,7 +1097,7 @@ define void @truncstore_v4i32_v4i16(<4 x i32> %x, <4 x i16>* %z) {
 ; CHECK-LABEL: truncstore_v4i32_v4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <4 x i32> %x to <4 x i16>
@@ -1157,13 +1157,13 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %z) {
 ; LMULMAX1-LABEL: truncstore_v8i32_v8i8:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
 ; LMULMAX1-NEXT:    vse8.v v8, (a0)
@@ -1172,9 +1172,9 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %z) {
 ; LMULMAX4-LABEL: truncstore_v8i32_v8i8:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v10, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v10, v8, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v8, v10, 0
+; LMULMAX4-NEXT:    vnsrl.wx v8, v10, zero
 ; LMULMAX4-NEXT:    vse8.v v8, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <8 x i32> %x to <8 x i8>
@@ -1186,8 +1186,8 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %z) {
 ; LMULMAX1-LABEL: truncstore_v8i32_v8i16:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
 ; LMULMAX1-NEXT:    vse16.v v8, (a0)
@@ -1196,7 +1196,7 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %z) {
 ; LMULMAX4-LABEL: truncstore_v8i32_v8i16:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 8, e16, m1, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v10, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v10, v8, zero
 ; LMULMAX4-NEXT:    vse16.v v10, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <8 x i32> %x to <8 x i16>
@@ -1270,25 +1270,25 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %z) {
 ; LMULMAX1-LABEL: truncstore_v16i32_v16i8:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v10, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v10, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 12, e8, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 8
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v11, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 12
 ; LMULMAX1-NEXT:    vse8.v v8, (a0)
@@ -1297,9 +1297,9 @@ define void @truncstore_v16i32_v16i8(<16 x i32> %x, <16 x i8>* %z) {
 ; LMULMAX4-LABEL: truncstore_v16i32_v16i8:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v12, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v12, v8, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v8, v12, 0
+; LMULMAX4-NEXT:    vnsrl.wx v8, v12, zero
 ; LMULMAX4-NEXT:    vse8.v v8, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <16 x i32> %x to <16 x i8>
@@ -1311,13 +1311,13 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %z) {
 ; LMULMAX1-LABEL: truncstore_v16i32_v16i16:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
-; LMULMAX1-NEXT:    vnsrl.wi v10, v10, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v11, zero
+; LMULMAX1-NEXT:    vnsrl.wx v10, v10, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v10, v9, 4
 ; LMULMAX1-NEXT:    addi a1, a0, 16
@@ -1328,7 +1328,7 @@ define void @truncstore_v16i32_v16i16(<16 x i32> %x, <16 x i16>* %z) {
 ; LMULMAX4-LABEL: truncstore_v16i32_v16i16:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 16, e16, m2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v12, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v12, v8, zero
 ; LMULMAX4-NEXT:    vse16.v v12, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <16 x i32> %x to <16 x i16>
@@ -1436,11 +1436,11 @@ define void @truncstore_v2i64_v2i8(<2 x i64> %x, <2 x i8>* %z) {
 ; CHECK-LABEL: truncstore_v2i64_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <2 x i64> %x to <2 x i8>
@@ -1452,9 +1452,9 @@ define void @truncstore_v2i64_v2i16(<2 x i64> %x, <2 x i16>* %z) {
 ; CHECK-LABEL: truncstore_v2i64_v2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse16.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <2 x i64> %x to <2 x i16>
@@ -1466,7 +1466,7 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, <2 x i32>* %z) {
 ; CHECK-LABEL: truncstore_v2i64_v2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    ret
   %y = trunc <2 x i64> %x to <2 x i32>
@@ -1478,17 +1478,17 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %z) {
 ; LMULMAX1-LABEL: truncstore_v4i64_v4i8:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf4, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
 ; LMULMAX1-NEXT:    vse8.v v8, (a0)
@@ -1497,11 +1497,11 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %z) {
 ; LMULMAX4-LABEL: truncstore_v4i64_v4i8:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v10, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v10, v8, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v8, v10, 0
+; LMULMAX4-NEXT:    vnsrl.wx v8, v10, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX4-NEXT:    vse8.v v8, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <4 x i64> %x to <4 x i8>
@@ -1513,13 +1513,13 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %z) {
 ; LMULMAX1-LABEL: truncstore_v4i64_v4i16:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16, mf2, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
 ; LMULMAX1-NEXT:    vse16.v v8, (a0)
@@ -1528,9 +1528,9 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %z) {
 ; LMULMAX4-LABEL: truncstore_v4i64_v4i16:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v10, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v10, v8, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v8, v10, 0
+; LMULMAX4-NEXT:    vnsrl.wx v8, v10, zero
 ; LMULMAX4-NEXT:    vse16.v v8, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <4 x i64> %x to <4 x i16>
@@ -1542,8 +1542,8 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %z) {
 ; LMULMAX1-LABEL: truncstore_v4i64_v4i32:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
 ; LMULMAX1-NEXT:    vse32.v v8, (a0)
@@ -1552,7 +1552,7 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %z) {
 ; LMULMAX4-LABEL: truncstore_v4i64_v4i32:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v10, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v10, v8, zero
 ; LMULMAX4-NEXT:    vse32.v v10, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <4 x i64> %x to <4 x i32>
@@ -1564,33 +1564,33 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %z) {
 ; LMULMAX1-LABEL: truncstore_v8i64_v8i8:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v10, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v10, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 6, e8, mf2, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v11, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 6
 ; LMULMAX1-NEXT:    vse8.v v8, (a0)
@@ -1599,11 +1599,11 @@ define void @truncstore_v8i64_v8i8(<8 x i64> %x, <8 x i8>* %z) {
 ; LMULMAX4-LABEL: truncstore_v8i64_v8i8:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v12, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v12, v8, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v8, v12, 0
+; LMULMAX4-NEXT:    vnsrl.wx v8, v12, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX4-NEXT:    vse8.v v8, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <8 x i64> %x to <8 x i8>
@@ -1615,25 +1615,25 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %z) {
 ; LMULMAX1-LABEL: truncstore_v8i64_v8i16:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v10, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v10, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 6, e16, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v11, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 6
 ; LMULMAX1-NEXT:    vse16.v v8, (a0)
@@ -1642,9 +1642,9 @@ define void @truncstore_v8i64_v8i16(<8 x i64> %x, <8 x i16>* %z) {
 ; LMULMAX4-LABEL: truncstore_v8i64_v8i16:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v12, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v12, v8, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v8, v12, 0
+; LMULMAX4-NEXT:    vnsrl.wx v8, v12, zero
 ; LMULMAX4-NEXT:    vse16.v v8, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <8 x i64> %x to <8 x i16>
@@ -1656,13 +1656,13 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %z) {
 ; LMULMAX1-LABEL: truncstore_v8i64_v8i32:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
-; LMULMAX1-NEXT:    vnsrl.wi v10, v10, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v11, zero
+; LMULMAX1-NEXT:    vnsrl.wx v10, v10, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v10, v9, 2
 ; LMULMAX1-NEXT:    addi a1, a0, 16
@@ -1673,7 +1673,7 @@ define void @truncstore_v8i64_v8i32(<8 x i64> %x, <8 x i32>* %z) {
 ; LMULMAX4-LABEL: truncstore_v8i64_v8i32:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v12, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v12, v8, zero
 ; LMULMAX4-NEXT:    vse32.v v12, (a0)
 ; LMULMAX4-NEXT:    ret
   %y = trunc <8 x i64> %x to <8 x i32>
@@ -1685,65 +1685,65 @@ define void @truncstore_v16i64_v16i8(<16 x i64> %x, <16 x i8>* %z) {
 ; LMULMAX1-LABEL: truncstore_v16i64_v16i8:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v10, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v10, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 6, e8, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v11, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 6
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v12, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v12, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 10, e8, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 8
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v13, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v13, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 12, e8, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 10
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v14, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v14, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 14, e8, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 12
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v15, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v15, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 16, e8, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 14
 ; LMULMAX1-NEXT:    vse8.v v8, (a0)
@@ -1752,17 +1752,17 @@ define void @truncstore_v16i64_v16i8(<16 x i64> %x, <16 x i8>* %z) {
 ; LMULMAX4-LABEL: truncstore_v16i64_v16i8:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v16, v12, 0
+; LMULMAX4-NEXT:    vnsrl.wx v16, v12, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v12, v16, 0
+; LMULMAX4-NEXT:    vnsrl.wx v12, v16, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v12, v12, 0
+; LMULMAX4-NEXT:    vnsrl.wx v12, v12, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v14, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v14, v8, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v8, v14, 0
+; LMULMAX4-NEXT:    vnsrl.wx v8, v14, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX4-NEXT:    vsetivli zero, 16, e8, m1, tu, mu
 ; LMULMAX4-NEXT:    vslideup.vi v8, v12, 8
 ; LMULMAX4-NEXT:    vse8.v v8, (a0)
@@ -1776,47 +1776,47 @@ define void @truncstore_v16i64_v16i16(<16 x i64> %x, <16 x i16>* %z) {
 ; LMULMAX1-LABEL: truncstore_v16i64_v16i16:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v10, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v10, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 6, e16, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v11, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 6
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v13, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v13, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v10, v12, 0
+; LMULMAX1-NEXT:    vnsrl.wx v10, v12, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v10, v10, 0
+; LMULMAX1-NEXT:    vnsrl.wx v10, v10, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e16, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v10, v9, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v14, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v14, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 6, e16, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v10, v9, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v15, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v15, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e16, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v10, v9, 6
 ; LMULMAX1-NEXT:    addi a1, a0, 16
@@ -1827,13 +1827,13 @@ define void @truncstore_v16i64_v16i16(<16 x i64> %x, <16 x i16>* %z) {
 ; LMULMAX4-LABEL: truncstore_v16i64_v16i16:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v16, v12, 0
+; LMULMAX4-NEXT:    vnsrl.wx v16, v12, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v12, v16, 0
+; LMULMAX4-NEXT:    vnsrl.wx v12, v16, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v14, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v14, v8, zero
 ; LMULMAX4-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v8, v14, 0
+; LMULMAX4-NEXT:    vnsrl.wx v8, v14, zero
 ; LMULMAX4-NEXT:    vsetivli zero, 16, e16, m2, tu, mu
 ; LMULMAX4-NEXT:    vslideup.vi v8, v12, 8
 ; LMULMAX4-NEXT:    vse16.v v8, (a0)
@@ -1847,23 +1847,23 @@ define void @truncstore_v16i64_v16i32(<16 x i64> %x, <16 x i32>* %z) {
 ; LMULMAX1-LABEL: truncstore_v16i64_v16i32:
 ; LMULMAX1:       # %bb.0:
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
-; LMULMAX1-NEXT:    vnsrl.wi v10, v10, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v11, zero
+; LMULMAX1-NEXT:    vnsrl.wx v10, v10, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v10, v9, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v13, 0
-; LMULMAX1-NEXT:    vnsrl.wi v11, v12, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v13, zero
+; LMULMAX1-NEXT:    vnsrl.wx v11, v12, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v11, v9, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v15, 0
-; LMULMAX1-NEXT:    vnsrl.wi v12, v14, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v15, zero
+; LMULMAX1-NEXT:    vnsrl.wx v12, v14, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v12, v9, 2
 ; LMULMAX1-NEXT:    addi a1, a0, 48
@@ -1878,8 +1878,8 @@ define void @truncstore_v16i64_v16i32(<16 x i64> %x, <16 x i32>* %z) {
 ; LMULMAX4-LABEL: truncstore_v16i64_v16i32:
 ; LMULMAX4:       # %bb.0:
 ; LMULMAX4-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; LMULMAX4-NEXT:    vnsrl.wi v16, v12, 0
-; LMULMAX4-NEXT:    vnsrl.wi v12, v8, 0
+; LMULMAX4-NEXT:    vnsrl.wx v16, v12, zero
+; LMULMAX4-NEXT:    vnsrl.wx v12, v8, zero
 ; LMULMAX4-NEXT:    vsetivli zero, 16, e32, m4, tu, mu
 ; LMULMAX4-NEXT:    vslideup.vi v12, v16, 8
 ; LMULMAX4-NEXT:    vse32.v v12, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index 0f3efd2daaacd..0cc70c9913006 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -348,9 +348,9 @@ define void @fp2si_v2f64_v2i8(<2 x double>* %x, <2 x i8>* %y) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wx v8, v9, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x double>, <2 x double>* %x
@@ -367,9 +367,9 @@ define void @fp2ui_v2f64_v2i8(<2 x double>* %x, <2 x i8>* %y) {
 ; CHECK-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wx v8, v9, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <2 x double>, <2 x double>* %x
@@ -410,9 +410,9 @@ define void @fp2si_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) {
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; LMULMAX8-NEXT:    vfncvt.rtz.x.f.w v12, v8
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX8-NEXT:    vnsrl.wi v8, v12, 0
+; LMULMAX8-NEXT:    vnsrl.wx v8, v12, zero
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; LMULMAX8-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX8-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX8-NEXT:    vse8.v v8, (a1)
 ; LMULMAX8-NEXT:    ret
 ;
@@ -429,31 +429,31 @@ define void @fp2si_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) {
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v12, v10
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v10, v12, 0
+; LMULMAX1-NEXT:    vnsrl.wx v10, v12, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v10, v10, 0
+; LMULMAX1-NEXT:    vnsrl.wx v10, v10, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v12, v11
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v11, v12, 0
+; LMULMAX1-NEXT:    vnsrl.wx v11, v12, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v11, v11, 0
+; LMULMAX1-NEXT:    vnsrl.wx v11, v11, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v10, v11, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
 ; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v11, v9
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v11, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 6, e8, mf2, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v10, v9, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
 ; LMULMAX1-NEXT:    vfncvt.rtz.x.f.w v9, v8
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v10, v8, 6
 ; LMULMAX1-NEXT:    vse8.v v10, (a1)
@@ -472,9 +472,9 @@ define void @fp2ui_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) {
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
 ; LMULMAX8-NEXT:    vfncvt.rtz.xu.f.w v12, v8
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX8-NEXT:    vnsrl.wi v8, v12, 0
+; LMULMAX8-NEXT:    vnsrl.wx v8, v12, zero
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; LMULMAX8-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX8-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX8-NEXT:    vse8.v v8, (a1)
 ; LMULMAX8-NEXT:    ret
 ;
@@ -491,31 +491,31 @@ define void @fp2ui_v8f64_v8i8(<8 x double>* %x, <8 x i8>* %y) {
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v12, v10
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v10, v12, 0
+; LMULMAX1-NEXT:    vnsrl.wx v10, v12, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v10, v10, 0
+; LMULMAX1-NEXT:    vnsrl.wx v10, v10, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
 ; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v12, v11
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v11, v12, 0
+; LMULMAX1-NEXT:    vnsrl.wx v11, v12, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v11, v11, 0
+; LMULMAX1-NEXT:    vnsrl.wx v11, v11, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 4, e8, mf2, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v10, v11, 2
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
 ; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v11, v9
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v11, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v11, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 6, e8, mf2, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v10, v9, 4
 ; LMULMAX1-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
 ; LMULMAX1-NEXT:    vfncvt.rtz.xu.f.w v9, v8
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v10, v8, 6
 ; LMULMAX1-NEXT:    vse8.v v10, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
index 4482c653ad4d1..59b4ba7fa8058 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-exttrunc.ll
@@ -169,9 +169,9 @@ define void @trunc_v4i8_v4i32(<4 x i32>* %x, <4 x i8>* %z) {
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vse8.v v8, (a1)
 ; CHECK-NEXT:    ret
   %a = load <4 x i32>, <4 x i32>* %x
@@ -186,9 +186,9 @@ define void @trunc_v8i8_v8i32(<8 x i32>* %x, <8 x i8>* %z) {
 ; LMULMAX8-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; LMULMAX8-NEXT:    vle32.v v8, (a0)
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX8-NEXT:    vnsrl.wi v10, v8, 0
+; LMULMAX8-NEXT:    vnsrl.wx v10, v8, zero
 ; LMULMAX8-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; LMULMAX8-NEXT:    vnsrl.wi v8, v10, 0
+; LMULMAX8-NEXT:    vnsrl.wx v8, v10, zero
 ; LMULMAX8-NEXT:    vse8.v v8, (a1)
 ; LMULMAX8-NEXT:    ret
 ;
@@ -197,9 +197,9 @@ define void @trunc_v8i8_v8i32(<8 x i32>* %x, <8 x i8>* %z) {
 ; LMULMAX2-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
 ; LMULMAX2-NEXT:    vle32.v v8, (a0)
 ; LMULMAX2-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX2-NEXT:    vnsrl.wi v10, v8, 0
+; LMULMAX2-NEXT:    vnsrl.wx v10, v8, zero
 ; LMULMAX2-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; LMULMAX2-NEXT:    vnsrl.wi v8, v10, 0
+; LMULMAX2-NEXT:    vnsrl.wx v8, v10, zero
 ; LMULMAX2-NEXT:    vse8.v v8, (a1)
 ; LMULMAX2-NEXT:    ret
 ;
@@ -210,13 +210,13 @@ define void @trunc_v8i8_v8i32(<8 x i32>* %x, <8 x i8>* %z) {
 ; LMULMAX1-NEXT:    addi a0, a0, 16
 ; LMULMAX1-NEXT:    vle32.v v9, (a0)
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v8, v8, 0
+; LMULMAX1-NEXT:    vnsrl.wx v8, v8, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; LMULMAX1-NEXT:    vnsrl.wi v9, v9, 0
+; LMULMAX1-NEXT:    vnsrl.wx v9, v9, zero
 ; LMULMAX1-NEXT:    vsetivli zero, 8, e8, mf2, tu, mu
 ; LMULMAX1-NEXT:    vslideup.vi v8, v9, 4
 ; LMULMAX1-NEXT:    vse8.v v8, (a1)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index c3f7772878d44..65e66cd230583 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -1037,7 +1037,7 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8
 ; RV32-NEXT:    vsext.vf8 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
@@ -1064,7 +1064,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <8
 ; RV32-NEXT:    vzext.vf8 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
@@ -1115,7 +1115,7 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(i64* %base, <8 x i16> %idxs,
 ; RV32-NEXT:    vsext.vf4 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
@@ -1142,7 +1142,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(i64* %base, <8 x i16> %idxs,
 ; RV32-NEXT:    vzext.vf4 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
@@ -1192,7 +1192,7 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(i64* %base, <8 x i32> %idxs,
 ; RV32-NEXT:    vsext.vf2 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
@@ -1219,7 +1219,7 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(i64* %base, <8 x i32> %idxs,
 ; RV32-NEXT:    vzext.vf2 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
@@ -1245,7 +1245,7 @@ define <8 x i64> @mgather_baseidx_v8i64(i64* %base, <8 x i64> %idxs, <8 x i1> %m
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
@@ -1907,7 +1907,7 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(double* %base, <8 x i8> %id
 ; RV32-NEXT:    vsext.vf8 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
@@ -1934,7 +1934,7 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(double* %base, <8 x i8> %id
 ; RV32-NEXT:    vzext.vf8 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
@@ -1985,7 +1985,7 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(double* %base, <8 x i16> %
 ; RV32-NEXT:    vsext.vf4 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
@@ -2012,7 +2012,7 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(double* %base, <8 x i16> %
 ; RV32-NEXT:    vzext.vf4 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
@@ -2062,7 +2062,7 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(double* %base, <8 x i32> %
 ; RV32-NEXT:    vsext.vf2 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
@@ -2089,7 +2089,7 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(double* %base, <8 x i32> %
 ; RV32-NEXT:    vzext.vf2 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
@@ -2115,7 +2115,7 @@ define <8 x double> @mgather_baseidx_v8f64(double* %base, <8 x i64> %idxs, <8 x
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v12, (a0), v16, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v12
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
index f357ae3575122..eb542774134f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll
@@ -44,14 +44,14 @@ define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x i8*> %ptrs, <2
 ; RV32-LABEL: mscatter_v2i16_truncstore_v2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v2i16_truncstore_v2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i16> %val to <2 x i8>
@@ -63,18 +63,18 @@ define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x i8*> %ptrs, <2
 ; RV32-LABEL: mscatter_v2i32_truncstore_v2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v2i32_truncstore_v2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i32> %val to <2 x i8>
@@ -86,22 +86,22 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x i8*> %ptrs, <2
 ; RV32-LABEL: mscatter_v2i64_truncstore_v2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v2i64_truncstore_v2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i64> %val to <2 x i8>
@@ -236,14 +236,14 @@ define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x i16*> %ptrs, <
 ; RV32-LABEL: mscatter_v2i32_truncstore_v2i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v2i32_truncstore_v2i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i32> %val to <2 x i16>
@@ -255,18 +255,18 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x i16*> %ptrs, <
 ; RV32-LABEL: mscatter_v2i64_truncstore_v2i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v2i64_truncstore_v2i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i64> %val to <2 x i16>
@@ -474,14 +474,14 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x i32*> %ptrs, <
 ; RV32-LABEL: mscatter_v2i64_truncstore_v2i32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_v2i64_truncstore_v2i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <2 x i64> %val to <2 x i32>
@@ -843,7 +843,7 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i
 ; RV32-NEXT:    vsext.vf8 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -868,7 +868,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x i
 ; RV32-NEXT:    vzext.vf8 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -915,7 +915,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vsext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -940,7 +940,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vzext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -986,7 +986,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vsext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1011,7 +1011,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vzext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1035,7 +1035,7 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, i64* %base, <8 x i64> %idxs,
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsll.vi v12, v12, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1637,7 +1637,7 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vsext.vf8 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1662,7 +1662,7 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vzext.vf8 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1709,7 +1709,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vsext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1734,7 +1734,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vzext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1780,7 +1780,7 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vsext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1805,7 +1805,7 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vzext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1829,7 +1829,7 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64>
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsll.vi v12, v12, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
index bfc52bbce7242..e88fb6e71dcfd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll
@@ -965,7 +965,7 @@ define <8 x i64> @vpgather_baseidx_sext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <
 ; RV32-NEXT:    vsext.vf8 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -991,7 +991,7 @@ define <8 x i64> @vpgather_baseidx_zext_v8i8_v8i64(i64* %base, <8 x i8> %idxs, <
 ; RV32-NEXT:    vzext.vf8 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -1040,7 +1040,7 @@ define <8 x i64> @vpgather_baseidx_sext_v8i16_v8i64(i64* %base, <8 x i16> %idxs,
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -1066,7 +1066,7 @@ define <8 x i64> @vpgather_baseidx_zext_v8i16_v8i64(i64* %base, <8 x i16> %idxs,
 ; RV32-NEXT:    vzext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -1114,7 +1114,7 @@ define <8 x i64> @vpgather_baseidx_sext_v8i32_v8i64(i64* %base, <8 x i32> %idxs,
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -1140,7 +1140,7 @@ define <8 x i64> @vpgather_baseidx_zext_v8i32_v8i64(i64* %base, <8 x i32> %idxs,
 ; RV32-NEXT:    vzext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -1165,7 +1165,7 @@ define <8 x i64> @vpgather_baseidx_v8i64(i64* %base, <8 x i64> %idxs, <8 x i1> %
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -1702,7 +1702,7 @@ define <8 x double> @vpgather_baseidx_sext_v8i8_v8f64(double* %base, <8 x i8> %i
 ; RV32-NEXT:    vsext.vf8 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -1728,7 +1728,7 @@ define <8 x double> @vpgather_baseidx_zext_v8i8_v8f64(double* %base, <8 x i8> %i
 ; RV32-NEXT:    vzext.vf8 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -1777,7 +1777,7 @@ define <8 x double> @vpgather_baseidx_sext_v8i16_v8f64(double* %base, <8 x i16>
 ; RV32-NEXT:    vsext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -1803,7 +1803,7 @@ define <8 x double> @vpgather_baseidx_zext_v8i16_v8f64(double* %base, <8 x i16>
 ; RV32-NEXT:    vzext.vf4 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -1851,7 +1851,7 @@ define <8 x double> @vpgather_baseidx_sext_v8i32_v8f64(double* %base, <8 x i32>
 ; RV32-NEXT:    vsext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -1877,7 +1877,7 @@ define <8 x double> @vpgather_baseidx_zext_v8i32_v8f64(double* %base, <8 x i32>
 ; RV32-NEXT:    vzext.vf2 v12, v8
 ; RV32-NEXT:    vsll.vi v8, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -1902,7 +1902,7 @@ define <8 x double> @vpgather_baseidx_v8f64(double* %base, <8 x i64> %idxs, <8 x
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v8, 0
+; RV32-NEXT:    vnsrl.wx v12, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v12, v0.t
 ; RV32-NEXT:    ret
@@ -2058,7 +2058,7 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(double* %base, <32 x i8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v16, 0
+; RV32-NEXT:    vnsrl.wx v12, v16, zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v12, v0.t
 ; RV32-NEXT:    li a2, 16
@@ -2069,7 +2069,7 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(double* %base, <32 x i8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v24, v24, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vnsrl.wx v4, v24, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vmv1r.v v0, v10
 ; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
@@ -2132,7 +2132,7 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(double* %base, <32 x i8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v12, v16, 0
+; RV32-NEXT:    vnsrl.wx v12, v16, zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v12, v0.t
 ; RV32-NEXT:    li a2, 16
@@ -2143,7 +2143,7 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(double* %base, <32 x i8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v24, v24, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vnsrl.wx v4, v24, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vmv1r.v v0, v10
 ; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
@@ -2270,7 +2270,7 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(double* %base, <32 x i
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v16, 0
+; RV32-NEXT:    vnsrl.wx v8, v16, zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
 ; RV32-NEXT:    li a2, 16
@@ -2281,7 +2281,7 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(double* %base, <32 x i
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v24, v24, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vnsrl.wx v4, v24, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vmv1r.v v0, v12
 ; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
@@ -2344,7 +2344,7 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(double* %base, <32 x i
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v16, 0
+; RV32-NEXT:    vnsrl.wx v8, v16, zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
 ; RV32-NEXT:    li a2, 16
@@ -2355,7 +2355,7 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(double* %base, <32 x i
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v24, v24, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vnsrl.wx v4, v24, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vmv1r.v v0, v12
 ; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
@@ -2481,7 +2481,7 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(double* %base, <32 x i
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v4, v8, 0
+; RV32-NEXT:    vnsrl.wx v4, v8, zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v4, v0.t
 ; RV32-NEXT:    li a2, 16
@@ -2492,7 +2492,7 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(double* %base, <32 x i
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vmv1r.v v0, v1
 ; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
@@ -2555,7 +2555,7 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(double* %base, <32 x i
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v4, v8, 0
+; RV32-NEXT:    vnsrl.wx v4, v8, zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v4, v0.t
 ; RV32-NEXT:    li a2, 16
@@ -2566,7 +2566,7 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(double* %base, <32 x i
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vmv1r.v v0, v1
 ; RV32-NEXT:    vluxei32.v v8, (a0), v24, v0.t
@@ -2624,7 +2624,7 @@ define <32 x double> @vpgather_baseidx_v32f64(double* %base, <32 x i64> %idxs, <
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli a3, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v28, v16, 0
+; RV32-NEXT:    vnsrl.wx v28, v16, zero
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v28, v0.t
 ; RV32-NEXT:    li a2, 16
@@ -2635,7 +2635,7 @@ define <32 x double> @vpgather_baseidx_v32f64(double* %base, <32 x i64> %idxs, <
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v28, v8, 0
+; RV32-NEXT:    vnsrl.wx v28, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    vluxei32.v v8, (a0), v28, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
index 5fe4ad0cfe635..5255910217f21 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll
@@ -26,7 +26,7 @@ define void @vpscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x i8*> %ptrs, <2
 ; RV32-LABEL: vpscatter_v2i16_truncstore_v2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
@@ -34,7 +34,7 @@ define void @vpscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x i8*> %ptrs, <2
 ; RV64-LABEL: vpscatter_v2i16_truncstore_v2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf8, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
@@ -47,9 +47,9 @@ define void @vpscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x i8*> %ptrs, <2
 ; RV32-LABEL: vpscatter_v2i32_truncstore_v2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
@@ -57,9 +57,9 @@ define void @vpscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x i8*> %ptrs, <2
 ; RV64-LABEL: vpscatter_v2i32_truncstore_v2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
@@ -72,11 +72,11 @@ define void @vpscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x i8*> %ptrs, <2
 ; RV32-LABEL: vpscatter_v2i64_truncstore_v2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
@@ -84,11 +84,11 @@ define void @vpscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x i8*> %ptrs, <2
 ; RV64-LABEL: vpscatter_v2i64_truncstore_v2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf8, ta, mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
@@ -194,7 +194,7 @@ define void @vpscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x i16*> %ptrs,
 ; RV32-LABEL: vpscatter_v2i32_truncstore_v2i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
@@ -202,7 +202,7 @@ define void @vpscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x i16*> %ptrs,
 ; RV64-LABEL: vpscatter_v2i32_truncstore_v2i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
@@ -215,9 +215,9 @@ define void @vpscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x i16*> %ptrs,
 ; RV32-LABEL: vpscatter_v2i64_truncstore_v2i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
@@ -225,9 +225,9 @@ define void @vpscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x i16*> %ptrs,
 ; RV64-LABEL: vpscatter_v2i64_truncstore_v2i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf4, ta, mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
@@ -442,7 +442,7 @@ define void @vpscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x i32*> %ptrs,
 ; RV32-LABEL: vpscatter_v2i64_truncstore_v2i32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
@@ -450,7 +450,7 @@ define void @vpscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x i32*> %ptrs,
 ; RV64-LABEL: vpscatter_v2i64_truncstore_v2i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e32, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, a0, e32, mf2, ta, mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v9, v0.t
 ; RV64-NEXT:    ret
@@ -779,7 +779,7 @@ define void @vpscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vsext.vf8 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -805,7 +805,7 @@ define void @vpscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vzext.vf8 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -854,7 +854,7 @@ define void @vpscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vsext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -880,7 +880,7 @@ define void @vpscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vzext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -928,7 +928,7 @@ define void @vpscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vsext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -954,7 +954,7 @@ define void @vpscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, i64* %base, <8 x
 ; RV32-NEXT:    vzext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -979,7 +979,7 @@ define void @vpscatter_baseidx_v8i64(<8 x i64> %val, i64* %base, <8 x i64> %idxs
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsll.vi v12, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1500,7 +1500,7 @@ define void @vpscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vsext.vf8 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1526,7 +1526,7 @@ define void @vpscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, double* %base,
 ; RV32-NEXT:    vzext.vf8 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1575,7 +1575,7 @@ define void @vpscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, double* %base
 ; RV32-NEXT:    vsext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1601,7 +1601,7 @@ define void @vpscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, double* %base
 ; RV32-NEXT:    vzext.vf4 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1649,7 +1649,7 @@ define void @vpscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, double* %base
 ; RV32-NEXT:    vsext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1675,7 +1675,7 @@ define void @vpscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, double* %base
 ; RV32-NEXT:    vzext.vf2 v16, v12
 ; RV32-NEXT:    vsll.vi v12, v16, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1700,7 +1700,7 @@ define void @vpscatter_baseidx_v8f64(<8 x double> %val, double* %base, <8 x i64>
 ; RV32-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
 ; RV32-NEXT:    vsll.vi v12, v12, 3
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v12, 0
+; RV32-NEXT:    vnsrl.wx v16, v12, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1932,7 +1932,7 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, double* %b
 ; RV32-NEXT:    vsext.vf2 v16, v24
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli a4, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    addi a1, a2, -16
 ; RV32-NEXT:    csrr a4, vlenb
@@ -1950,7 +1950,7 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, double* %b
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    slli a1, a1, 4
@@ -2078,7 +2078,7 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, double* %b
 ; RV32-NEXT:    vzext.vf2 v16, v24
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli a4, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    addi a1, a2, -16
 ; RV32-NEXT:    csrr a4, vlenb
@@ -2096,7 +2096,7 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, double* %b
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    slli a1, a1, 4
diff --git a/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll
index 7c423a20a0629..302174e5aadf5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll
@@ -6,9 +6,9 @@ define <vscale x 4 x i5> @trunc_nxv4i32_to_nxv4i5(<vscale x 4 x i32> %a) {
 ; CHECK-LABEL: trunc_nxv4i32_to_nxv4i5:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    ret
   %v = trunc <vscale x 4 x i32> %a to <vscale x 4 x i5>
   ret <vscale x 4 x i5> %v
@@ -18,9 +18,9 @@ define <vscale x 1 x i5> @trunc_nxv1i32_to_nxv1i5(<vscale x 1 x i32> %a) {
 ; CHECK-LABEL: trunc_nxv1i32_to_nxv1i5:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %v = trunc <vscale x 1 x i32> %a to <vscale x 1 x i5>
   ret <vscale x 1 x i5> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
index 31731af18b6b3..50773c42966b3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll
@@ -1043,7 +1043,7 @@ define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i8_nxv8i64(i64* %base, <vsca
 ; RV32-NEXT:    vsext.vf8 v24, v8
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -1070,7 +1070,7 @@ define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i8_nxv8i64(i64* %base, <vsca
 ; RV32-NEXT:    vzext.vf8 v24, v8
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -1121,7 +1121,7 @@ define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i16_nxv8i64(i64* %base, <vsc
 ; RV32-NEXT:    vsext.vf4 v24, v8
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -1148,7 +1148,7 @@ define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i16_nxv8i64(i64* %base, <vsc
 ; RV32-NEXT:    vzext.vf4 v24, v8
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -1198,7 +1198,7 @@ define <vscale x 8 x i64> @mgather_baseidx_sext_nxv8i32_nxv8i64(i64* %base, <vsc
 ; RV32-NEXT:    vsext.vf2 v24, v8
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -1225,7 +1225,7 @@ define <vscale x 8 x i64> @mgather_baseidx_zext_nxv8i32_nxv8i64(i64* %base, <vsc
 ; RV32-NEXT:    vzext.vf2 v24, v8
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -1251,7 +1251,7 @@ define <vscale x 8 x i64> @mgather_baseidx_nxv8i64(i64* %base, <vscale x 8 x i64
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -1979,7 +1979,7 @@ define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i8_nxv8f64(double* %base,
 ; RV32-NEXT:    vsext.vf8 v24, v8
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -2006,7 +2006,7 @@ define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i8_nxv8f64(double* %base,
 ; RV32-NEXT:    vzext.vf8 v24, v8
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -2057,7 +2057,7 @@ define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i16_nxv8f64(double* %base
 ; RV32-NEXT:    vsext.vf4 v24, v8
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -2084,7 +2084,7 @@ define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i16_nxv8f64(double* %base
 ; RV32-NEXT:    vzext.vf4 v24, v8
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -2134,7 +2134,7 @@ define <vscale x 8 x double> @mgather_baseidx_sext_nxv8i32_nxv8f64(double* %base
 ; RV32-NEXT:    vsext.vf2 v24, v8
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -2161,7 +2161,7 @@ define <vscale x 8 x double> @mgather_baseidx_zext_nxv8i32_nxv8f64(double* %base
 ; RV32-NEXT:    vzext.vf2 v24, v8
 ; RV32-NEXT:    vsll.vi v8, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
@@ -2187,7 +2187,7 @@ define <vscale x 8 x double> @mgather_baseidx_nxv8f64(double* %base, <vscale x 8
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v24, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v16
diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
index f82dcca4ce65b..90cc7bf501bbb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll
@@ -44,14 +44,14 @@ define void @mscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscale
 ; RV32-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_nxv2i16_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i16> %val to <vscale x 2 x i8>
@@ -63,18 +63,18 @@ define void @mscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscale
 ; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i8>
@@ -86,22 +86,22 @@ define void @mscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscale
 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
+; RV32-NEXT:    vnsrl.wx v11, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v11, 0
+; RV32-NEXT:    vnsrl.wx v8, v11, zero
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV64-NEXT:    vnsrl.wi v12, v8, 0
+; RV64-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wx v8, v12, zero
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i8>
@@ -236,14 +236,14 @@ define void @mscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vscal
 ; RV32-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_nxv2i32_truncstore_nxv2i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i32> %val to <vscale x 2 x i16>
@@ -255,18 +255,18 @@ define void @mscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vscal
 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
+; RV32-NEXT:    vnsrl.wx v11, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v11, 0
+; RV32-NEXT:    vnsrl.wx v8, v11, zero
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV64-NEXT:    vnsrl.wi v12, v8, 0
+; RV64-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wx v8, v12, zero
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i16>
@@ -474,14 +474,14 @@ define void @mscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vscal
 ; RV32-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
+; RV32-NEXT:    vnsrl.wx v11, v8, zero
 ; RV32-NEXT:    vsoxei32.v v11, (zero), v10, v0.t
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: mscatter_nxv2i64_truncstore_nxv2i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV64-NEXT:    vnsrl.wi v12, v8, 0
+; RV64-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64-NEXT:    vsoxei64.v v12, (zero), v10, v0.t
 ; RV64-NEXT:    ret
   %tval = trunc <vscale x 2 x i64> %val to <vscale x 2 x i32>
@@ -843,7 +843,7 @@ define void @mscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64*
 ; RV32-NEXT:    vsext.vf8 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -868,7 +868,7 @@ define void @mscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64*
 ; RV32-NEXT:    vzext.vf8 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -915,7 +915,7 @@ define void @mscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64*
 ; RV32-NEXT:    vsext.vf4 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -940,7 +940,7 @@ define void @mscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64*
 ; RV32-NEXT:    vzext.vf4 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -986,7 +986,7 @@ define void @mscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64*
 ; RV32-NEXT:    vsext.vf2 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1011,7 +1011,7 @@ define void @mscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64*
 ; RV32-NEXT:    vzext.vf2 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1035,7 +1035,7 @@ define void @mscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vsca
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1637,7 +1637,7 @@ define void @mscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, do
 ; RV32-NEXT:    vsext.vf8 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1662,7 +1662,7 @@ define void @mscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, do
 ; RV32-NEXT:    vzext.vf8 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1709,7 +1709,7 @@ define void @mscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, d
 ; RV32-NEXT:    vsext.vf4 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1734,7 +1734,7 @@ define void @mscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val, d
 ; RV32-NEXT:    vzext.vf4 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1780,7 +1780,7 @@ define void @mscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, d
 ; RV32-NEXT:    vsext.vf2 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1805,7 +1805,7 @@ define void @mscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val, d
 ; RV32-NEXT:    vzext.vf2 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1829,7 +1829,7 @@ define void @mscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, double* %base,
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
index 6884ebc92565c..262ac6d7cd241 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptoi-sdnode.ll
@@ -636,7 +636,7 @@ define <vscale x 1 x i8> @vfptosi_nxv1f32_nxv1i8(<vscale x 1 x float> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wx v8, v9, zero
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 1 x float> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %evec
@@ -648,7 +648,7 @@ define <vscale x 1 x i8> @vfptoui_nxv1f32_nxv1i8(<vscale x 1 x float> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wx v8, v9, zero
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 1 x float> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %evec
@@ -748,7 +748,7 @@ define <vscale x 2 x i8> @vfptosi_nxv2f32_nxv2i8(<vscale x 2 x float> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wx v8, v9, zero
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 2 x float> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %evec
@@ -760,7 +760,7 @@ define <vscale x 2 x i8> @vfptoui_nxv2f32_nxv2i8(<vscale x 2 x float> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wx v8, v9, zero
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 2 x float> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %evec
@@ -860,7 +860,7 @@ define <vscale x 4 x i8> @vfptosi_nxv4f32_nxv4i8(<vscale x 4 x float> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 4 x float> %va to <vscale x 4 x i8>
   ret <vscale x 4 x i8> %evec
@@ -872,7 +872,7 @@ define <vscale x 4 x i8> @vfptoui_nxv4f32_nxv4i8(<vscale x 4 x float> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 4 x float> %va to <vscale x 4 x i8>
   ret <vscale x 4 x i8> %evec
@@ -972,7 +972,7 @@ define <vscale x 8 x i8> @vfptosi_nxv8f32_nxv8i8(<vscale x 8 x float> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wx v8, v12, zero
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x float> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %evec
@@ -984,7 +984,7 @@ define <vscale x 8 x i8> @vfptoui_nxv8f32_nxv8i8(<vscale x 8 x float> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wx v8, v12, zero
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x float> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %evec
@@ -1084,7 +1084,7 @@ define <vscale x 16 x i8> @vfptosi_nxv16f32_nxv16i8(<vscale x 16 x float> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wx v8, v16, zero
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 16 x float> %va to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %evec
@@ -1096,7 +1096,7 @@ define <vscale x 16 x i8> @vfptoui_nxv16f32_nxv16i8(<vscale x 16 x float> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wx v8, v16, zero
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 16 x float> %va to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %evec
@@ -1174,9 +1174,9 @@ define <vscale x 1 x i8> @vfptosi_nxv1f64_nxv1i8(<vscale x 1 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wx v8, v9, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 1 x double> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %evec
@@ -1188,9 +1188,9 @@ define <vscale x 1 x i8> @vfptoui_nxv1f64_nxv1i8(<vscale x 1 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wx v8, v9, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 1 x double> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %evec
@@ -1202,7 +1202,7 @@ define <vscale x 1 x i16> @vfptosi_nxv1f64_nxv1i16(<vscale x 1 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wx v8, v9, zero
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 1 x double> %va to <vscale x 1 x i16>
   ret <vscale x 1 x i16> %evec
@@ -1214,7 +1214,7 @@ define <vscale x 1 x i16> @vfptoui_nxv1f64_nxv1i16(<vscale x 1 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v9, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v9, 0
+; CHECK-NEXT:    vnsrl.wx v8, v9, zero
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 1 x double> %va to <vscale x 1 x i16>
   ret <vscale x 1 x i16> %evec
@@ -1292,9 +1292,9 @@ define <vscale x 2 x i8> @vfptosi_nxv2f64_nxv2i8(<vscale x 2 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 2 x double> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %evec
@@ -1306,9 +1306,9 @@ define <vscale x 2 x i8> @vfptoui_nxv2f64_nxv2i8(<vscale x 2 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 2 x double> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %evec
@@ -1320,7 +1320,7 @@ define <vscale x 2 x i16> @vfptosi_nxv2f64_nxv2i16(<vscale x 2 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 2 x double> %va to <vscale x 2 x i16>
   ret <vscale x 2 x i16> %evec
@@ -1332,7 +1332,7 @@ define <vscale x 2 x i16> @vfptoui_nxv2f64_nxv2i16(<vscale x 2 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v10, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 2 x double> %va to <vscale x 2 x i16>
   ret <vscale x 2 x i16> %evec
@@ -1410,9 +1410,9 @@ define <vscale x 4 x i8> @vfptosi_nxv4f64_nxv4i8(<vscale x 4 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wx v8, v12, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 4 x double> %va to <vscale x 4 x i8>
   ret <vscale x 4 x i8> %evec
@@ -1424,9 +1424,9 @@ define <vscale x 4 x i8> @vfptoui_nxv4f64_nxv4i8(<vscale x 4 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wx v8, v12, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 4 x double> %va to <vscale x 4 x i8>
   ret <vscale x 4 x i8> %evec
@@ -1438,7 +1438,7 @@ define <vscale x 4 x i16> @vfptosi_nxv4f64_nxv4i16(<vscale x 4 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wx v8, v12, zero
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 4 x double> %va to <vscale x 4 x i16>
   ret <vscale x 4 x i16> %evec
@@ -1450,7 +1450,7 @@ define <vscale x 4 x i16> @vfptoui_nxv4f64_nxv4i16(<vscale x 4 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v12, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wx v8, v12, zero
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 4 x double> %va to <vscale x 4 x i16>
   ret <vscale x 4 x i16> %evec
@@ -1528,9 +1528,9 @@ define <vscale x 8 x i8> @vfptosi_nxv8f64_nxv8i8(<vscale x 8 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v16, 0
+; CHECK-NEXT:    vnsrl.wx v10, v16, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x double> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %evec
@@ -1542,9 +1542,9 @@ define <vscale x 8 x i8> @vfptoui_nxv8f64_nxv8i8(<vscale x 8 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v16, 0
+; CHECK-NEXT:    vnsrl.wx v10, v16, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x double> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %evec
@@ -1556,7 +1556,7 @@ define <vscale x 8 x i16> @vfptosi_nxv8f64_nxv8i16(<vscale x 8 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.x.f.w v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wx v8, v16, zero
 ; CHECK-NEXT:    ret
   %evec = fptosi <vscale x 8 x double> %va to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %evec
@@ -1568,7 +1568,7 @@ define <vscale x 8 x i16> @vfptoui_nxv8f64_nxv8i16(<vscale x 8 x double> %va) {
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
 ; CHECK-NEXT:    vfncvt.rtz.xu.f.w v16, v8
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wx v8, v16, zero
 ; CHECK-NEXT:    ret
   %evec = fptoui <vscale x 8 x double> %va to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %evec
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
index 4c3636e315825..1aa4b914481cc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll
@@ -1056,7 +1056,7 @@ define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i8_nxv8i64(i64* %base, <vsc
 ; RV32-NEXT:    vsext.vf8 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1082,7 +1082,7 @@ define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i8_nxv8i64(i64* %base, <vsc
 ; RV32-NEXT:    vzext.vf8 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1131,7 +1131,7 @@ define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i16_nxv8i64(i64* %base, <vs
 ; RV32-NEXT:    vsext.vf4 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1157,7 +1157,7 @@ define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i16_nxv8i64(i64* %base, <vs
 ; RV32-NEXT:    vzext.vf4 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1205,7 +1205,7 @@ define <vscale x 8 x i64> @vpgather_baseidx_sext_nxv8i32_nxv8i64(i64* %base, <vs
 ; RV32-NEXT:    vsext.vf2 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1231,7 +1231,7 @@ define <vscale x 8 x i64> @vpgather_baseidx_zext_nxv8i32_nxv8i64(i64* %base, <vs
 ; RV32-NEXT:    vzext.vf2 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1256,7 +1256,7 @@ define <vscale x 8 x i64> @vpgather_baseidx_nxv8i64(i64* %base, <vscale x 8 x i6
 ; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1851,7 +1851,7 @@ define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i8_nxv6f64(double* %base
 ; RV32-NEXT:    vsext.vf8 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1877,7 +1877,7 @@ define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i8_nxv6f64(double* %base
 ; RV32-NEXT:    vzext.vf8 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1926,7 +1926,7 @@ define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i16_nxv6f64(double* %bas
 ; RV32-NEXT:    vsext.vf4 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -1952,7 +1952,7 @@ define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i16_nxv6f64(double* %bas
 ; RV32-NEXT:    vzext.vf4 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -2000,7 +2000,7 @@ define <vscale x 6 x double> @vpgather_baseidx_sext_nxv6i32_nxv6f64(double* %bas
 ; RV32-NEXT:    vsext.vf2 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -2026,7 +2026,7 @@ define <vscale x 6 x double> @vpgather_baseidx_zext_nxv6i32_nxv6f64(double* %bas
 ; RV32-NEXT:    vzext.vf2 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -2051,7 +2051,7 @@ define <vscale x 6 x double> @vpgather_baseidx_nxv6f64(double* %base, <vscale x
 ; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -2117,7 +2117,7 @@ define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i8_nxv8f64(double* %base
 ; RV32-NEXT:    vsext.vf8 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -2143,7 +2143,7 @@ define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i8_nxv8f64(double* %base
 ; RV32-NEXT:    vzext.vf8 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -2192,7 +2192,7 @@ define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i16_nxv8f64(double* %bas
 ; RV32-NEXT:    vsext.vf4 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -2218,7 +2218,7 @@ define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i16_nxv8f64(double* %bas
 ; RV32-NEXT:    vzext.vf4 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -2266,7 +2266,7 @@ define <vscale x 8 x double> @vpgather_baseidx_sext_nxv8i32_nxv8f64(double* %bas
 ; RV32-NEXT:    vsext.vf2 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -2292,7 +2292,7 @@ define <vscale x 8 x double> @vpgather_baseidx_zext_nxv8i32_nxv8f64(double* %bas
 ; RV32-NEXT:    vzext.vf2 v16, v8
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -2317,7 +2317,7 @@ define <vscale x 8 x double> @vpgather_baseidx_nxv8f64(double* %base, <vscale x
 ; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v8, (a0), v16, v0.t
 ; RV32-NEXT:    ret
@@ -2471,7 +2471,7 @@ define <vscale x 16 x double> @vpgather_baseidx_sext_nxv16i16_nxv16f64(double* %
 ; RV32-NEXT:    vsext.vf4 v24, v8
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v16, 0
+; RV32-NEXT:    vnsrl.wx v8, v16, zero
 ; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
 ; RV32-NEXT:    bltu a1, a2, .LBB104_4
@@ -2481,7 +2481,7 @@ define <vscale x 16 x double> @vpgather_baseidx_sext_nxv16i16_nxv16f64(double* %
 ; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v24, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vnsrl.wx v4, v24, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vmv1r.v v0, v12
 ; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
@@ -2543,7 +2543,7 @@ define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(double* %
 ; RV32-NEXT:    vzext.vf4 v24, v8
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v16, 0
+; RV32-NEXT:    vnsrl.wx v8, v16, zero
 ; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
 ; RV32-NEXT:    vluxei32.v v16, (a0), v8, v0.t
 ; RV32-NEXT:    bltu a1, a2, .LBB105_4
@@ -2553,7 +2553,7 @@ define <vscale x 16 x double> @vpgather_baseidx_zext_nxv16i16_nxv16f64(double* %
 ; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v24, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v4, v24, 0
+; RV32-NEXT:    vnsrl.wx v4, v24, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vmv1r.v v0, v12
 ; RV32-NEXT:    vluxei32.v v8, (a0), v4, v0.t
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
index 01be81fd1912d..ca444ee17b0c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll
@@ -44,7 +44,7 @@ define void @vpscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscal
 ; RV32-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
@@ -52,7 +52,7 @@ define void @vpscatter_nxv2i16_truncstore_nxv2i8(<vscale x 2 x i16> %val, <vscal
 ; RV64-LABEL: vpscatter_nxv2i16_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a1, zero, e8, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
@@ -65,9 +65,9 @@ define void @vpscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscal
 ; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
@@ -75,9 +75,9 @@ define void @vpscatter_nxv2i32_truncstore_nxv2i8(<vscale x 2 x i32> %val, <vscal
 ; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
@@ -90,11 +90,11 @@ define void @vpscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscal
 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
+; RV32-NEXT:    vnsrl.wx v11, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v11, 0
+; RV32-NEXT:    vnsrl.wx v8, v11, zero
 ; RV32-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
@@ -102,11 +102,11 @@ define void @vpscatter_nxv2i64_truncstore_nxv2i8(<vscale x 2 x i64> %val, <vscal
 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i8:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
-; RV64-NEXT:    vnsrl.wi v12, v8, 0
+; RV64-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wx v8, v12, zero
 ; RV64-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, a0, e8, mf4, ta, mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
@@ -230,7 +230,7 @@ define void @vpscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vsca
 ; RV32-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v8, 0
+; RV32-NEXT:    vnsrl.wx v8, v8, zero
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v9, v0.t
 ; RV32-NEXT:    ret
@@ -238,7 +238,7 @@ define void @vpscatter_nxv2i32_truncstore_nxv2i16(<vscale x 2 x i32> %val, <vsca
 ; RV64-LABEL: vpscatter_nxv2i32_truncstore_nxv2i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a1, zero, e16, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v8, 0
+; RV64-NEXT:    vnsrl.wx v8, v8, zero
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
@@ -251,9 +251,9 @@ define void @vpscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vsca
 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
+; RV32-NEXT:    vnsrl.wx v11, v8, zero
 ; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32-NEXT:    vnsrl.wi v8, v11, 0
+; RV32-NEXT:    vnsrl.wx v8, v11, zero
 ; RV32-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (zero), v10, v0.t
 ; RV32-NEXT:    ret
@@ -261,9 +261,9 @@ define void @vpscatter_nxv2i64_truncstore_nxv2i16(<vscale x 2 x i64> %val, <vsca
 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i16:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
-; RV64-NEXT:    vnsrl.wi v12, v8, 0
+; RV64-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64-NEXT:    vnsrl.wi v8, v12, 0
+; RV64-NEXT:    vnsrl.wx v8, v12, zero
 ; RV64-NEXT:    vsetvli zero, a0, e16, mf2, ta, mu
 ; RV64-NEXT:    vsoxei64.v v8, (zero), v10, v0.t
 ; RV64-NEXT:    ret
@@ -460,7 +460,7 @@ define void @vpscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vsca
 ; RV32-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
-; RV32-NEXT:    vnsrl.wi v11, v8, 0
+; RV32-NEXT:    vnsrl.wx v11, v8, zero
 ; RV32-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; RV32-NEXT:    vsoxei32.v v11, (zero), v10, v0.t
 ; RV32-NEXT:    ret
@@ -468,7 +468,7 @@ define void @vpscatter_nxv2i64_truncstore_nxv2i32(<vscale x 2 x i64> %val, <vsca
 ; RV64-LABEL: vpscatter_nxv2i64_truncstore_nxv2i32:
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetvli a1, zero, e32, m1, ta, mu
-; RV64-NEXT:    vnsrl.wi v12, v8, 0
+; RV64-NEXT:    vnsrl.wx v12, v8, zero
 ; RV64-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; RV64-NEXT:    vsoxei64.v v12, (zero), v10, v0.t
 ; RV64-NEXT:    ret
@@ -815,7 +815,7 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64*
 ; RV32-NEXT:    vsext.vf8 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -841,7 +841,7 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8i64(<vscale x 8 x i64> %val, i64*
 ; RV32-NEXT:    vzext.vf8 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -890,7 +890,7 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64
 ; RV32-NEXT:    vsext.vf4 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -916,7 +916,7 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8i64(<vscale x 8 x i64> %val, i64
 ; RV32-NEXT:    vzext.vf4 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -964,7 +964,7 @@ define void @vpscatter_baseidx_sext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64
 ; RV32-NEXT:    vsext.vf2 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -990,7 +990,7 @@ define void @vpscatter_baseidx_zext_nxv8i32_nxv8i64(<vscale x 8 x i64> %val, i64
 ; RV32-NEXT:    vzext.vf2 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1015,7 +1015,7 @@ define void @vpscatter_baseidx_nxv8i64(<vscale x 8 x i64> %val, i64* %base, <vsc
 ; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1590,7 +1590,7 @@ define void @vpscatter_baseidx_sext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, d
 ; RV32-NEXT:    vsext.vf8 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1616,7 +1616,7 @@ define void @vpscatter_baseidx_zext_nxv6i8_nxv6f64(<vscale x 6 x double> %val, d
 ; RV32-NEXT:    vzext.vf8 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1665,7 +1665,7 @@ define void @vpscatter_baseidx_sext_nxv6i16_nxv6f64(<vscale x 6 x double> %val,
 ; RV32-NEXT:    vsext.vf4 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1691,7 +1691,7 @@ define void @vpscatter_baseidx_zext_nxv6i16_nxv6f64(<vscale x 6 x double> %val,
 ; RV32-NEXT:    vzext.vf4 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1739,7 +1739,7 @@ define void @vpscatter_baseidx_sext_nxv6i32_nxv6f64(<vscale x 6 x double> %val,
 ; RV32-NEXT:    vsext.vf2 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1765,7 +1765,7 @@ define void @vpscatter_baseidx_zext_nxv6i32_nxv6f64(<vscale x 6 x double> %val,
 ; RV32-NEXT:    vzext.vf2 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1790,7 +1790,7 @@ define void @vpscatter_baseidx_nxv6f64(<vscale x 6 x double> %val, double* %base
 ; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1855,7 +1855,7 @@ define void @vpscatter_baseidx_sext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, d
 ; RV32-NEXT:    vsext.vf8 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1881,7 +1881,7 @@ define void @vpscatter_baseidx_zext_nxv8i8_nxv8f64(<vscale x 8 x double> %val, d
 ; RV32-NEXT:    vzext.vf8 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1930,7 +1930,7 @@ define void @vpscatter_baseidx_sext_nxv8i16_nxv8f64(<vscale x 8 x double> %val,
 ; RV32-NEXT:    vsext.vf4 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -1956,7 +1956,7 @@ define void @vpscatter_baseidx_zext_nxv8i16_nxv8f64(<vscale x 8 x double> %val,
 ; RV32-NEXT:    vzext.vf4 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -2004,7 +2004,7 @@ define void @vpscatter_baseidx_sext_nxv8i32_nxv8f64(<vscale x 8 x double> %val,
 ; RV32-NEXT:    vsext.vf2 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -2030,7 +2030,7 @@ define void @vpscatter_baseidx_zext_nxv8i32_nxv8f64(<vscale x 8 x double> %val,
 ; RV32-NEXT:    vzext.vf2 v24, v16
 ; RV32-NEXT:    vsll.vi v16, v24, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -2055,7 +2055,7 @@ define void @vpscatter_baseidx_nxv8f64(<vscale x 8 x double> %val, double* %base
 ; RV32-NEXT:    vsetvli a2, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v16, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v16, 0
+; RV32-NEXT:    vnsrl.wx v24, v16, zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, mu
 ; RV32-NEXT:    vsoxei32.v v8, (a0), v24, v0.t
 ; RV32-NEXT:    ret
@@ -2232,7 +2232,7 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
 ; RV32-NEXT:    vsext.vf4 v16, v26
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
 ; RV32-NEXT:    addi a3, sp, 16
 ; RV32-NEXT:    vl8re8.v v8, (a3) # Unknown-size Folded Reload
@@ -2248,7 +2248,7 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    slli a1, a1, 3
@@ -2348,7 +2348,7 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
 ; RV32-NEXT:    vzext.vf4 v16, v26
 ; RV32-NEXT:    vsll.vi v8, v8, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v24, v8, 0
+; RV32-NEXT:    vnsrl.wx v24, v8, zero
 ; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, mu
 ; RV32-NEXT:    addi a3, sp, 16
 ; RV32-NEXT:    vl8re8.v v8, (a3) # Unknown-size Folded Reload
@@ -2364,7 +2364,7 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64(<vscale x 16 x double> %va
 ; RV32-NEXT:    vsetvli a1, zero, e64, m8, ta, mu
 ; RV32-NEXT:    vsll.vi v8, v16, 3
 ; RV32-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32-NEXT:    vnsrl.wi v16, v8, 0
+; RV32-NEXT:    vnsrl.wx v16, v8, zero
 ; RV32-NEXT:    vsetvli zero, a4, e64, m8, ta, mu
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    slli a1, a1, 3
diff --git a/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll
index 9a4b78a009e34..c756f7b2223f7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vtruncs-sdnode.ll
@@ -6,7 +6,7 @@ define <vscale x 1 x i8> @vtrunc_nxv1i16_nxv1i8(<vscale x 1 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv1i16_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i16> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %tvec
@@ -16,7 +16,7 @@ define <vscale x 2 x i8> @vtrunc_nxv2i16_nxv2i8(<vscale x 2 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv2i16_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i16> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %tvec
@@ -26,7 +26,7 @@ define <vscale x 4 x i8> @vtrunc_nxv4i16_nxv4i8(<vscale x 4 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv4i16_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 4 x i16> %va to <vscale x 4 x i8>
   ret <vscale x 4 x i8> %tvec
@@ -36,7 +36,7 @@ define <vscale x 8 x i8> @vtrunc_nxv8i16_nxv8i8(<vscale x 8 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv8i16_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 8 x i16> %va to <vscale x 8 x i8>
@@ -47,7 +47,7 @@ define <vscale x 16 x i8> @vtrunc_nxv16i16_nxv16i8(<vscale x 16 x i16> %va) {
 ; CHECK-LABEL: vtrunc_nxv16i16_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wx v12, v8, zero
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 16 x i16> %va to <vscale x 16 x i8>
@@ -58,9 +58,9 @@ define <vscale x 1 x i8> @vtrunc_nxv1i32_nxv1i8(<vscale x 1 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv1i32_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i32> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %tvec
@@ -70,7 +70,7 @@ define <vscale x 1 x i16> @vtrunc_nxv1i32_nxv1i16(<vscale x 1 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv1i32_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i32> %va to <vscale x 1 x i16>
   ret <vscale x 1 x i16> %tvec
@@ -80,9 +80,9 @@ define <vscale x 2 x i8> @vtrunc_nxv2i32_nxv2i8(<vscale x 2 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv2i32_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i32> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %tvec
@@ -92,7 +92,7 @@ define <vscale x 2 x i16> @vtrunc_nxv2i32_nxv2i16(<vscale x 2 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv2i32_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i32> %va to <vscale x 2 x i16>
   ret <vscale x 2 x i16> %tvec
@@ -102,9 +102,9 @@ define <vscale x 4 x i8> @vtrunc_nxv4i32_nxv4i8(<vscale x 4 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv4i32_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 4 x i32> %va to <vscale x 4 x i8>
   ret <vscale x 4 x i8> %tvec
@@ -114,7 +114,7 @@ define <vscale x 4 x i16> @vtrunc_nxv4i32_nxv4i16(<vscale x 4 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv4i32_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 4 x i32> %va to <vscale x 4 x i16>
@@ -125,9 +125,9 @@ define <vscale x 8 x i8> @vtrunc_nxv8i32_nxv8i8(<vscale x 8 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv8i32_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wx v12, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wx v8, v12, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 8 x i32> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %tvec
@@ -137,7 +137,7 @@ define <vscale x 8 x i16> @vtrunc_nxv8i32_nxv8i16(<vscale x 8 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv8i32_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wx v12, v8, zero
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 8 x i32> %va to <vscale x 8 x i16>
@@ -148,9 +148,9 @@ define <vscale x 16 x i8> @vtrunc_nxv16i32_nxv16i8(<vscale x 16 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv16i32_nxv16i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wx v16, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wx v8, v16, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 16 x i32> %va to <vscale x 16 x i8>
   ret <vscale x 16 x i8> %tvec
@@ -160,7 +160,7 @@ define <vscale x 16 x i16> @vtrunc_nxv16i32_nxv16i16(<vscale x 16 x i32> %va) {
 ; CHECK-LABEL: vtrunc_nxv16i32_nxv16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wx v16, v8, zero
 ; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 16 x i32> %va to <vscale x 16 x i16>
@@ -171,11 +171,11 @@ define <vscale x 1 x i8> @vtrunc_nxv1i64_nxv1i8(<vscale x 1 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv1i64_nxv1i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i64> %va to <vscale x 1 x i8>
   ret <vscale x 1 x i8> %tvec
@@ -185,9 +185,9 @@ define <vscale x 1 x i16> @vtrunc_nxv1i64_nxv1i16(<vscale x 1 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv1i64_nxv1i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i64> %va to <vscale x 1 x i16>
   ret <vscale x 1 x i16> %tvec
@@ -197,7 +197,7 @@ define <vscale x 1 x i32> @vtrunc_nxv1i64_nxv1i32(<vscale x 1 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv1i64_nxv1i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 1 x i64> %va to <vscale x 1 x i32>
   ret <vscale x 1 x i32> %tvec
@@ -207,11 +207,11 @@ define <vscale x 2 x i8> @vtrunc_nxv2i64_nxv2i8(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv2i64_nxv2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i64> %va to <vscale x 2 x i8>
   ret <vscale x 2 x i8> %tvec
@@ -221,9 +221,9 @@ define <vscale x 2 x i16> @vtrunc_nxv2i64_nxv2i16(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv2i64_nxv2i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i64> %va to <vscale x 2 x i16>
   ret <vscale x 2 x i16> %tvec
@@ -233,7 +233,7 @@ define <vscale x 2 x i32> @vtrunc_nxv2i64_nxv2i32(<vscale x 2 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv2i64_nxv2i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v8, 0
+; CHECK-NEXT:    vnsrl.wx v10, v8, zero
 ; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 2 x i64> %va to <vscale x 2 x i32>
@@ -244,11 +244,11 @@ define <vscale x 4 x i8> @vtrunc_nxv4i64_nxv4i8(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv4i64_nxv4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wx v12, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wx v8, v12, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v8, 0
+; CHECK-NEXT:    vnsrl.wx v8, v8, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 4 x i64> %va to <vscale x 4 x i8>
   ret <vscale x 4 x i8> %tvec
@@ -258,9 +258,9 @@ define <vscale x 4 x i16> @vtrunc_nxv4i64_nxv4i16(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv4i64_nxv4i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wx v12, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v12, 0
+; CHECK-NEXT:    vnsrl.wx v8, v12, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 4 x i64> %va to <vscale x 4 x i16>
   ret <vscale x 4 x i16> %tvec
@@ -270,7 +270,7 @@ define <vscale x 4 x i32> @vtrunc_nxv4i64_nxv4i32(<vscale x 4 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv4i64_nxv4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v12, v8, 0
+; CHECK-NEXT:    vnsrl.wx v12, v8, zero
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 4 x i64> %va to <vscale x 4 x i32>
@@ -281,11 +281,11 @@ define <vscale x 8 x i8> @vtrunc_nxv8i64_nxv8i8(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv8i64_nxv8i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wx v16, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v10, v16, 0
+; CHECK-NEXT:    vnsrl.wx v10, v16, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v10, 0
+; CHECK-NEXT:    vnsrl.wx v8, v10, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 8 x i64> %va to <vscale x 8 x i8>
   ret <vscale x 8 x i8> %tvec
@@ -295,9 +295,9 @@ define <vscale x 8 x i16> @vtrunc_nxv8i64_nxv8i16(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv8i64_nxv8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wx v16, v8, zero
 ; CHECK-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; CHECK-NEXT:    vnsrl.wi v8, v16, 0
+; CHECK-NEXT:    vnsrl.wx v8, v16, zero
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 8 x i64> %va to <vscale x 8 x i16>
   ret <vscale x 8 x i16> %tvec
@@ -307,7 +307,7 @@ define <vscale x 8 x i32> @vtrunc_nxv8i64_nxv8i32(<vscale x 8 x i64> %va) {
 ; CHECK-LABEL: vtrunc_nxv8i64_nxv8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; CHECK-NEXT:    vnsrl.wi v16, v8, 0
+; CHECK-NEXT:    vnsrl.wx v16, v8, zero
 ; CHECK-NEXT:    vmv.v.v v8, v16
 ; CHECK-NEXT:    ret
   %tvec = trunc <vscale x 8 x i64> %va to <vscale x 8 x i32>

From 2fcdd685bd0213b75f45597b970ea9858a3a9d89 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@google.com>
Date: Wed, 26 Jan 2022 18:48:20 -0800
Subject: [PATCH 831/946] [CMake][Fuchsia] Drop 32-bit ios runtimes

These are no longer supported in the recent Xcode SDK versions.

Differential Revision: https://reviews.llvm.org/D118306
---
 clang/cmake/caches/Fuchsia-stage2.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index d998db64d87ab..bc60b0df6d4af 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -70,7 +70,7 @@ if(APPLE)
   set(LIBCXX_ENABLE_SHARED OFF CACHE BOOL "")
   set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
   set(LIBCXX_ABI_VERSION 2 CACHE STRING "")
-  set(DARWIN_ios_ARCHS armv7;armv7s;arm64 CACHE STRING "")
+  set(DARWIN_ios_ARCHS arm64 CACHE STRING "")
   set(DARWIN_iossim_ARCHS arm64;x86_64 CACHE STRING "")
   set(DARWIN_osx_ARCHS arm64;x86_64 CACHE STRING "")
 endif()

From 6f25cb86854a1886a69a69f67d8919b59039434e Mon Sep 17 00:00:00 2001
From: Ting Wang <Ting.Wang.SH@ibm.com>
Date: Wed, 26 Jan 2022 23:00:43 -0500
Subject: [PATCH 832/946] [PowerPC] Add the Power10 XS[MAX|MIN]CQP instruction

Add the Power 10 instruction XS[MAX|MIN]CQP.

Reviewed By: shchenz, amyk

Differential Revision: https://reviews.llvm.org/D118036
---
 llvm/lib/Target/PowerPC/P10InstrResources.td               | 2 ++
 llvm/lib/Target/PowerPC/PPCInstrPrefix.td                  | 2 ++
 llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt | 6 ++++++
 llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s                | 6 ++++++
 4 files changed, 16 insertions(+)

diff --git a/llvm/lib/Target/PowerPC/P10InstrResources.td b/llvm/lib/Target/PowerPC/P10InstrResources.td
index 68a1c225cb05d..edd3b42d47e1f 100644
--- a/llvm/lib/Target/PowerPC/P10InstrResources.td
+++ b/llvm/lib/Target/PowerPC/P10InstrResources.td
@@ -619,6 +619,8 @@ def : InstRW<[P10W_DX_5C, P10W_DISP_ANY, P10DX_Read, P10DX_Read],
     XSCMPEXPQP,
     XSCMPOQP,
     XSCMPUQP,
+    XSMAXCQP,
+    XSMINCQP,
     XSTSTDCQP,
     XXGENPCVBM
 )>;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index a19289e96b3e9..fe354208533ba 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2398,6 +2398,8 @@ let Predicates = [IsISA3_1] in {
 let Predicates = [IsISA3_1, HasVSX] in {
   def XVCVSPBF16 : XX2_XT6_XO5_XB6<60, 17, 475, "xvcvspbf16", vsrc, []>;
   def XVCVBF16SPN : XX2_XT6_XO5_XB6<60, 16, 475, "xvcvbf16spn", vsrc, []>;
+  def XSMAXCQP : X_VT5_VA5_VB5<63, 676, "xsmaxcqp", []>;
+  def XSMINCQP : X_VT5_VA5_VB5<63, 740, "xsmincqp", []>;
 }
 
 // Multiclass defining patterns for Set Boolean Extension Reverse Instructions.
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
index 7b4f2cb8b6566..1ac3c26521014 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
@@ -738,6 +738,12 @@
 # CHECK: xscvsqqp 8, 28
 0xfd 0xb 0xe6 0x88
 
+# CHECK: xsmaxcqp 2, 2, 3
+0xfc 0x42 0x1d 0x48
+
+# CHECK: xsmincqp 2, 2, 3
+0xfc 0x42 0x1d 0xc8
+
 # CHECK: vstribr 2, 2
 0x10 0x41 0x10 0x0d
 
diff --git a/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s b/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
index f237a68925848..27638ae3c4803 100644
--- a/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ b/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -936,6 +936,12 @@
 # CHECK-BE: xscvsqqp 8, 28                        # encoding: [0xfd,0x0b,0xe6,0x88]
 # CHECK-LE: xscvsqqp 8, 28                        # encoding: [0x88,0xe6,0x0b,0xfd]
             xscvsqqp 8, 28
+# CHECK-BE: xsmaxcqp 2, 2, 3                      # encoding: [0xfc,0x42,0x1d,0x48]
+# CHECK-LE: xsmaxcqp 2, 2, 3                      # encoding: [0x48,0x1d,0x42,0xfc]
+            xsmaxcqp 2, 2, 3
+# CHECK-BE: xsmincqp 2, 2, 3                      # encoding: [0xfc,0x42,0x1d,0xc8]
+# CHECK-LE: xsmincqp 2, 2, 3                      # encoding: [0xc8,0x1d,0x42,0xfc]
+            xsmincqp 2, 2, 3
 # CHECK-BE: vstribr 2, 2                          # encoding: [0x10,0x41,0x10,0x0d]
 # CHECK-LE: vstribr 2, 2                          # encoding: [0x0d,0x10,0x41,0x10]
             vstribr 2, 2

From 7ea4fe7ede204f3ec041c293441e8a007c3733b6 Mon Sep 17 00:00:00 2001
From: Arthur O'Dwyer <arthur.j.odwyer@gmail.com>
Date: Fri, 14 Jan 2022 11:25:56 -0500
Subject: [PATCH 833/946] [libc++] Fix LWG3390: move_iterator now handles
 move-only iterators.

This can't really be tested until C++20 move_iterator is completely implemented.

Differential Revision: https://reviews.llvm.org/D117327
---
 libcxx/docs/Status/Cxx20Issues.csv        | 2 +-
 libcxx/include/__iterator/move_iterator.h | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/libcxx/docs/Status/Cxx20Issues.csv b/libcxx/docs/Status/Cxx20Issues.csv
index daa6466f87515..68e1c27302050 100644
--- a/libcxx/docs/Status/Cxx20Issues.csv
+++ b/libcxx/docs/Status/Cxx20Issues.csv
@@ -291,7 +291,7 @@
 "`3387 <https://wg21.link/LWG3387>`__","|sect|\ [range.reverse.view] ``reverse_view<V>``\  unintentionally requires ``range<const V>``\ ","Prague","","","|ranges|"
 "`3388 <https://wg21.link/LWG3388>`__","``view``\  iterator types have ill-formed ``<=>``\  operators","Prague","","","|ranges|"
 "`3389 <https://wg21.link/LWG3389>`__","A move-only iterator still does not have a ``counted_iterator``\ ","Prague","","","|ranges|"
-"`3390 <https://wg21.link/LWG3390>`__","``make_move_iterator()``\  cannot be used to construct a ``move_iterator``\  for a move-only iterator","Prague","","","|ranges|"
+"`3390 <https://wg21.link/LWG3390>`__","``make_move_iterator()``\  cannot be used to construct a ``move_iterator``\  for a move-only iterator","Prague","|Complete|","14.0","|ranges|"
 "`3393 <https://wg21.link/LWG3393>`__","Missing/incorrect feature test macro for coroutines","Prague","|Complete|","14.0"
 "`3395 <https://wg21.link/LWG3395>`__","Definition for three-way comparison needs to be updated (US 152)","Prague","","","|spaceship|"
 "`3396 <https://wg21.link/LWG3396>`__","Clarify point of reference for ``source_location::current()``\  (DE 169)","Prague","",""
diff --git a/libcxx/include/__iterator/move_iterator.h b/libcxx/include/__iterator/move_iterator.h
index 306ce240baf3f..29bac864c2755 100644
--- a/libcxx/include/__iterator/move_iterator.h
+++ b/libcxx/include/__iterator/move_iterator.h
@@ -12,6 +12,7 @@
 
 #include <__config>
 #include <__iterator/iterator_traits.h>
+#include <__utility/move.h>
 #include <type_traits>
 
 #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
@@ -53,7 +54,7 @@ class _LIBCPP_TEMPLATE_VIS move_iterator
     move_iterator() : __current_() {}
 
     _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX14
-    explicit move_iterator(_Iter __i) : __current_(__i) {}
+    explicit move_iterator(_Iter __i) : __current_(_VSTD::move(__i)) {}
 
     template <class _Up, class = __enable_if_t<
         !is_same<_Up, _Iter>::value && is_convertible<const _Up&, _Iter>::value
@@ -176,7 +177,7 @@ inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_AFTER_CXX14
 move_iterator<_Iter>
 make_move_iterator(_Iter __i)
 {
-    return move_iterator<_Iter>(__i);
+    return move_iterator<_Iter>(_VSTD::move(__i));
 }
 
 _LIBCPP_END_NAMESPACE_STD

From ecb502342cf2b3b4b6dfcd8fe881c596880d1a4a Mon Sep 17 00:00:00 2001
From: Ahmed Bougacha <ahmed@bougacha.org>
Date: Wed, 26 Jan 2022 17:31:26 -0800
Subject: [PATCH 834/946] [ObjC] Emit selector load right before msgSend call.

We currently emit the selector load early, but only because we need
it to compute the signature (so that we know which msgSend variant to
call).  We can prepare the signature with a plain undef, and replace
it with the materialized selector value if (and only if) needed, later.

Concretely, this usually doesn't have an effect, but tests need updating
because we reordered the receiver bitcast and the selector load, which
is always fine.

There is one notable change: with this, when a msgSend needs a
receiver null check, the selector is now loaded in the non-null
block, instead of before the null check.  That should be a mild
improvement.
---
 clang/lib/CodeGen/CGObjCMac.cpp               | 22 ++++-----
 clang/test/CodeGenObjC/arc-foreach.m          | 15 ++++---
 clang/test/CodeGenObjC/arc-literals.m         | 12 ++---
 .../arc-loadweakretained-release.m            |  2 +-
 clang/test/CodeGenObjC/arc-precise-lifetime.m | 20 ++++-----
 clang/test/CodeGenObjC/arc-property.m         |  4 +-
 .../CodeGenObjC/arc-related-result-type.m     |  2 +-
 clang/test/CodeGenObjC/arc.m                  | 45 ++++++++++---------
 clang/test/CodeGenObjC/class-stubs.m          |  2 +-
 .../test/CodeGenObjC/matrix-type-operators.m  | 10 ++---
 .../test/CodeGenObjC/ns_consume_null_check.m  |  7 +--
 .../nsvalue-objc-boxable-ios-arc.m            | 12 ++---
 .../CodeGenObjC/nsvalue-objc-boxable-ios.m    | 12 ++---
 .../nsvalue-objc-boxable-mac-arc.m            | 12 ++---
 .../CodeGenObjC/nsvalue-objc-boxable-mac.m    | 12 ++---
 .../objc-container-subscripting-1.m           |  8 ++--
 clang/test/CodeGenObjC/property.m             | 20 ++++-----
 .../CodeGenObjC/tentative-cfconstantstring.m  |  6 +--
 .../CodeGenObjCXX/arc-cxx11-member-init.mm    |  8 ++--
 clang/test/CodeGenObjCXX/arc.mm               |  3 +-
 clang/test/CodeGenObjCXX/message.mm           |  2 +-
 .../CodeGenObjCXX/property-lvalue-capture.mm  | 18 ++++----
 .../property-object-reference.mm              |  4 +-
 23 files changed, 132 insertions(+), 126 deletions(-)

diff --git a/clang/lib/CodeGen/CGObjCMac.cpp b/clang/lib/CodeGen/CGObjCMac.cpp
index a11a9ca7d8c0f..e7dba4c8feabe 100644
--- a/clang/lib/CodeGen/CGObjCMac.cpp
+++ b/clang/lib/CodeGen/CGObjCMac.cpp
@@ -2138,16 +2138,7 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
                                  const ObjCCommonTypesHelper &ObjCTypes) {
   CodeGenTypes &Types = CGM.getTypes();
   auto selTy = CGF.getContext().getObjCSelType();
-  llvm::Value *SelValue;
-
-  if (Method && Method->isDirectMethod()) {
-    // Direct methods will synthesize the proper `_cmd` internally,
-    // so just don't bother with setting the `_cmd` argument.
-    assert(!IsSuper);
-    SelValue = llvm::UndefValue::get(Types.ConvertType(selTy));
-  } else {
-    SelValue = GetSelector(CGF, Sel);
-  }
+  llvm::Value *SelValue = llvm::UndefValue::get(Types.ConvertType(selTy));
 
   CallArgList ActualArgs;
   if (!IsSuper)
@@ -2168,10 +2159,15 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
     canMessageReceiverBeNull(CGF, Method, IsSuper, ClassReceiver, Arg0);
 
   bool RequiresNullCheck = false;
+  bool RequiresSelValue = true;
 
   llvm::FunctionCallee Fn = nullptr;
   if (Method && Method->isDirectMethod()) {
+    assert(!IsSuper);
     Fn = GenerateDirectMethod(Method, Method->getClassInterface());
+    // Direct methods will synthesize the proper `_cmd` internally,
+    // so just don't bother with setting the `_cmd` argument.
+    RequiresSelValue = false;
   } else if (CGM.ReturnSlotInterferesWithArgs(MSI.CallInfo)) {
     if (ReceiverCanBeNull) RequiresNullCheck = true;
     Fn = (ObjCABI == 2) ?  ObjCTypes.getSendStretFn2(IsSuper)
@@ -2209,6 +2205,12 @@ CGObjCCommonMac::EmitMessageSend(CodeGen::CodeGenFunction &CGF,
     nullReturn.init(CGF, Arg0);
   }
 
+  // If a selector value needs to be passed, emit the load before the call.
+  if (RequiresSelValue) {
+    SelValue = GetSelector(CGF, Sel);
+    ActualArgs[1] = CallArg(RValue::get(SelValue), selTy);
+  }
+
   llvm::CallBase *CallSite;
   CGCallee Callee = CGCallee::forDirect(BitcastFn);
   RValue rvalue = CGF.EmitCall(MSI.CallInfo, Callee, Return, ActualArgs,
diff --git a/clang/test/CodeGenObjC/arc-foreach.m b/clang/test/CodeGenObjC/arc-foreach.m
index 441b134fc2c03..7b1905a6ff756 100644
--- a/clang/test/CodeGenObjC/arc-foreach.m
+++ b/clang/test/CodeGenObjC/arc-foreach.m
@@ -51,9 +51,9 @@ void test0(NSArray *array) {
 // CHECK-LP64-NEXT: [[SAVED_ARRAY:%.*]] = bitcast i8* [[T2]] to [[ARRAY_T]]*
 
 // Call the enumeration method.
-// CHECK-LP64-NEXT: [[T0:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
-// CHECK-LP64-NEXT: [[T1:%.*]] = bitcast [[ARRAY_T]]* [[SAVED_ARRAY]] to i8*
-// CHECK-LP64-NEXT: [[SIZE:%.*]] = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, [[STATE_T]]*, [16 x i8*]*, i64)*)(i8* [[T1]], i8* [[T0]], [[STATE_T]]* [[STATE]], [16 x i8*]* [[BUFFER]], i64 16)
+// CHECK-LP64-NEXT: [[T0:%.*]] = bitcast [[ARRAY_T]]* [[SAVED_ARRAY]] to i8*
+// CHECK-LP64-NEXT: [[T1:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+// CHECK-LP64-NEXT: [[SIZE:%.*]] = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, [[STATE_T]]*, [16 x i8*]*, i64)*)(i8* [[T0]], i8* [[T1]], [[STATE_T]]* [[STATE]], [16 x i8*]* [[BUFFER]], i64 16)
 
 // Check for a nonzero result.
 // CHECK-LP64-NEXT: [[T0:%.*]] = icmp eq i64 [[SIZE]], 0
@@ -78,9 +78,9 @@ void test0(NSArray *array) {
 // CHECK-LP64-OPT: [[CAPTURE:%.*]] = load i8*, i8** [[D0]]
 // CHECK-LP64-OPT: call void (...) @llvm.objc.clang.arc.use(i8* [[CAPTURE]])
 
-// CHECK-LP64:      [[T0:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
-// CHECK-LP64-NEXT: [[T1:%.*]] = bitcast [[ARRAY_T]]* [[SAVED_ARRAY]] to i8*
-// CHECK-LP64-NEXT: [[SIZE:%.*]] = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, [[STATE_T]]*, [16 x i8*]*, i64)*)(i8* [[T1]], i8* [[T0]], [[STATE_T]]* [[STATE]], [16 x i8*]* [[BUFFER]], i64 16)
+// CHECK-LP64:      [[T0:%.*]] = bitcast [[ARRAY_T]]* [[SAVED_ARRAY]] to i8*
+// CHECK-LP64-NEXT: [[T1:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+// CHECK-LP64-NEXT: [[SIZE:%.*]] = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, [[STATE_T]]*, [16 x i8*]*, i64)*)(i8* [[T0]], i8* [[T1]], [[STATE_T]]* [[STATE]], [16 x i8*]* [[BUFFER]], i64 16)
 
 // Release the array.
 // CHECK-LP64:      [[T0:%.*]] = bitcast [[ARRAY_T]]* [[SAVED_ARRAY]] to i8*
@@ -141,8 +141,8 @@ void test2(Test2 *a) {
 // CHECK-LP64-NEXT: [[COLL:%.*]] = bitcast i8* [[T2]] to [[ARRAY_T]]*
 
 // Make sure it's not immediately released before starting the iteration.
-// CHECK-LP64-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK-LP64-NEXT: [[T0:%.*]] = bitcast [[ARRAY_T]]* [[COLL]] to i8*
+// CHECK-LP64-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK-LP64-NEXT: @objc_msgSend
 
 // This bitcast is for the mutation check.
@@ -151,6 +151,7 @@ void test2(Test2 *a) {
 
 // This bitcast is for the 'next' message send.
 // CHECK-LP64:      [[T0:%.*]] = bitcast [[ARRAY_T]]* [[COLL]] to i8*
+// CHECK-LP64-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK-LP64-NEXT: @objc_msgSend
 
 // This bitcast is for the final release.
diff --git a/clang/test/CodeGenObjC/arc-literals.m b/clang/test/CodeGenObjC/arc-literals.m
index 39d923b65c9de..257c330b9c5ff 100644
--- a/clang/test/CodeGenObjC/arc-literals.m
+++ b/clang/test/CodeGenObjC/arc-literals.m
@@ -51,8 +51,8 @@ void test_array(id a, id b) {
   // CHECK-NEXT: store i8* [[V1]], i8** [[T0]]
 
   // CHECK-NEXT: [[T0:%.*]] = load [[CLASS_T:%.*]]*, [[CLASS_T:%.*]]** @"OBJC_CLASSLIST
-  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[CLASS_T]]* [[T0]] to i8*
+  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
   // CHECK-NEXT: [[T2:%.*]] = bitcast [2 x i8*]* [[OBJECTS]] to i8**
   // CHECK-NEXT: [[T3:%.*]] = call i8* bitcast ({{.*@objc_msgSend.*}})(i8* [[T1]], i8* [[SEL]], i8** [[T2]], i64 2) [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
   // CHECK: call void (...) @llvm.objc.clang.arc.use(i8* [[V0]], i8* [[V1]])
@@ -93,8 +93,8 @@ void test_dictionary(id k1, id o1, id k2, id o2) {
 
   // Constructing the dictionary
   // CHECK-NEXT: [[T0:%.*]] = load [[CLASS_T:%.*]]*, [[CLASS_T:%.*]]** @"OBJC_CLASSLIST
-  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[CLASS_T]]* [[T0]] to i8*
+  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
   // CHECK-NEXT: [[T2:%.*]] = bitcast [2 x i8*]* [[OBJECTS]] to i8**
   // CHECK-NEXT: [[T3:%.*]] = bitcast [2 x i8*]* [[KEYS]] to i8**
   // CHECK-NEXT: [[T4:%.*]] = call i8* bitcast ({{.*@objc_msgSend.*}})(i8* [[T1]], i8* [[SEL]], i8** [[T2]], i8** [[T3]], i64 2) [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
@@ -126,8 +126,8 @@ void test_property(B *b) {
   // CHECK:      [[T0:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[OBJECTS:%.*]], i64 0, i64 0
 
   // Invoke 'prop'
-  // CHECK:      [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
-  // CHECK-NEXT: [[T1:%.*]] = bitcast
+  // CHECK:      [[T1:%.*]] = bitcast
+  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
   // CHECK-NEXT: [[V0:%.*]] = call [[B:%.*]]* bitcast ({{.*}} @objc_msgSend to {{.*}})(i8* [[T1]], i8* [[SEL]]) [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
   // CHECK-NEXT: call void (...) @llvm.objc.clang.arc.noop.use([[B]]* [[V0]])
   // CHECK-NEXT: [[V1:%.*]] = bitcast [[B]]* [[V0]] to i8*
@@ -137,8 +137,8 @@ void test_property(B *b) {
 
   // Invoke arrayWithObjects:count:
   // CHECK-NEXT: [[T0:%.*]] = load [[CLASS_T]]*, [[CLASS_T]]** @"OBJC_CLASSLIST
-  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[CLASS_T]]* [[T0]] to i8*
+  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
   // CHECK-NEXT: [[T2:%.*]] = bitcast [1 x i8*]* [[OBJECTS]] to i8**
   // CHECK-NEXT: [[T3:%.*]] = call i8* bitcast ({{.*}} @objc_msgSend to {{.*}}(i8* [[T1]], i8* [[SEL]], i8** [[T2]], i64 1) [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
   // CHECK-NEXT: call void (...) @llvm.objc.clang.arc.noop.use(i8* [[T3]])
@@ -158,4 +158,4 @@ void test_property(B *b) {
   // Destroy b
   // CHECK: call void @llvm.objc.release
   // CHECK-NEXT: ret void
-}
\ No newline at end of file
+}
diff --git a/clang/test/CodeGenObjC/arc-loadweakretained-release.m b/clang/test/CodeGenObjC/arc-loadweakretained-release.m
index cbec56bd31421..4c4216525ea0a 100644
--- a/clang/test/CodeGenObjC/arc-loadweakretained-release.m
+++ b/clang/test/CodeGenObjC/arc-loadweakretained-release.m
@@ -30,8 +30,8 @@ int main (int argc, const char * argv[]) {
 
 // CHECK: [[SIXTEEN:%.*]]  = call i8* @llvm.objc.loadWeakRetained(i8** {{%.*}})
 // CHECK-NEXT:  [[SEVENTEEN:%.*]] = bitcast i8* [[SIXTEEN]] to {{%.*}}
-// CHECK-NEXT:  [[EIGHTEEN:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.6
 // CHECK-NEXT:  [[NINETEEN:%.*]] = bitcast %0* [[SEVENTEEN]] to i8*
+// CHECK-NEXT:  [[EIGHTEEN:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.6
 // CHECK-NEXT:  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend
 // CHECK-NEXT:  [[TWENTY:%.*]] = bitcast %0* [[SEVENTEEN]] to i8*
 // CHECK-NEXT:  call void @llvm.objc.release(i8* [[TWENTY]])
diff --git a/clang/test/CodeGenObjC/arc-precise-lifetime.m b/clang/test/CodeGenObjC/arc-precise-lifetime.m
index f303fa325ff14..5d0efb0914e81 100644
--- a/clang/test/CodeGenObjC/arc-precise-lifetime.m
+++ b/clang/test/CodeGenObjC/arc-precise-lifetime.m
@@ -50,8 +50,8 @@ void test1a_message(void) {
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
   // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutorelease(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
-  // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T5:%.*]] = bitcast [[TEST1]]* [[T3]] to i8*
+  // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T6:%.*]] = call i8* bitcast
   // CHECK-NEXT: store i8* [[T6]], i8**
   // CHECK-NEXT: [[CPTR2:%.*]] = bitcast i8** [[C]] to i8*
@@ -82,8 +82,8 @@ void test1a_property(void) {
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
   // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutorelease(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
-  // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T5:%.*]] = bitcast [[TEST1]]* [[T3]] to i8*
+  // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T6:%.*]] = call i8* bitcast
   // CHECK-NEXT: store i8* [[T6]], i8**
   // CHECK-NEXT: [[CPTR2:%.*]] = bitcast i8** [[C]] to i8*
@@ -111,8 +111,8 @@ void test1b_message(void) {
   // CHECK-NEXT: [[CPTR1:%.*]] = bitcast i8** [[C]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[CPTR1]])
   // CHECK-NEXT: [[T0:%.*]] = load [[TEST1]]*, [[TEST1]]**
-  // CHECK-NEXT: [[T1:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T2:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
+  // CHECK-NEXT: [[T1:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T3:%.*]] = call i8* bitcast
   // CHECK-NEXT: store i8* [[T3]], i8**
   // CHECK-NEXT: [[CPTR2:%.*]] = bitcast i8** [[C]] to i8*
@@ -140,8 +140,8 @@ void test1b_property(void) {
   // CHECK-NEXT: [[CPTR1:%.*]] = bitcast i8** [[C]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[CPTR1]])
   // CHECK-NEXT: [[T0:%.*]] = load [[TEST1]]*, [[TEST1]]**
-  // CHECK-NEXT: [[T1:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T2:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
+  // CHECK-NEXT: [[T1:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T3:%.*]] = call i8* bitcast
   // CHECK-NEXT: store i8* [[T3]], i8**
   // CHECK-NEXT: [[CPTR2:%.*]] = bitcast i8** [[C]] to i8*
@@ -172,8 +172,8 @@ void test1c_message(void) {
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
   // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutorelease(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
-  // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T5:%.*]] = bitcast [[TEST1]]* [[T3]] to i8*
+  // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T6:%.*]] = call i8* bitcast
   // CHECK-NEXT: store i8* [[T6]], i8**
   // CHECK-NEXT: [[PCPTR2:%.*]] = bitcast i8** [[PC]] to i8*
@@ -203,8 +203,8 @@ void test1c_property(void) {
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
   // CHECK-NEXT: [[T2:%.*]] = call i8* @llvm.objc.retainAutorelease(i8* [[T1]])
   // CHECK-NEXT: [[T3:%.*]] = bitcast i8* [[T2]] to [[TEST1]]*
-  // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T5:%.*]] = bitcast [[TEST1]]* [[T3]] to i8*
+  // CHECK-NEXT: [[T4:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T6:%.*]] = call i8* bitcast
   // CHECK-NEXT: store i8* [[T6]], i8**
   // CHECK-NEXT: [[PCPTR2:%.*]] = bitcast i8** [[PC]] to i8*
@@ -231,9 +231,9 @@ void test1d_message(void) {
   // CHECK-NEXT: [[PCPTR1:%.*]] = bitcast i8** [[PC]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PCPTR1]])
   // CHECK-NEXT: [[T0:%.*]] = load [[TEST1]]*, [[TEST1]]**
-  // CHECK-NEXT: [[SEVEN:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[EIGHT:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
-  // CHECK-NEXT: [[CALL1:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* noundef [[EIGHT]], i8* noundef [[SEVEN]])
+  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+  // CHECK-NEXT: [[CALL1:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* noundef [[EIGHT]], i8* noundef [[SEL]])
   // CHECK-NEXT: store i8* [[CALL1]], i8**
   // CHECK-NEXT: [[PCPTR2:%.*]] = bitcast i8** [[PC]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[PCPTR2]])
@@ -259,9 +259,9 @@ void test1d_property(void) {
   // CHECK-NEXT: [[PCPTR1:%.*]] = bitcast i8** [[PC]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[PCPTR1]])
   // CHECK-NEXT: [[T0:%.*]] = load [[TEST1]]*, [[TEST1]]**
-  // CHECK-NEXT: [[SEVEN:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[EIGHT:%.*]] = bitcast [[TEST1]]* [[T0]] to i8*
-  // CHECK-NEXT: [[CALL1:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* noundef [[EIGHT]], i8* noundef [[SEVEN]])
+  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+  // CHECK-NEXT: [[CALL1:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* noundef [[EIGHT]], i8* noundef [[SEL]])
   // CHECK-NEXT: store i8* [[CALL1]], i8**
   // CHECK-NEXT: [[PCPTR2:%.*]] = bitcast i8** [[PC]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[PCPTR2]])
diff --git a/clang/test/CodeGenObjC/arc-property.m b/clang/test/CodeGenObjC/arc-property.m
index 5c33eaffcb634..491a8275a8e98 100644
--- a/clang/test/CodeGenObjC/arc-property.m
+++ b/clang/test/CodeGenObjC/arc-property.m
@@ -101,14 +101,14 @@ void test3(Test3 *t) {
 // CHECK-NEXT: [[X:%.*]] = alloca i8*,
 //   Property access.
 // CHECK:      [[T0:%.*]] = load [[TEST3]]*, [[TEST3]]** [[T]],
-// CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST3]]* [[T0]] to i8*
+// CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T2:%.*]] = call i8* bitcast ({{.*}} @objc_msgSend to {{.*}})(i8* noundef [[T1]], i8* noundef [[SEL]])
 // CHECK-NEXT: store i8* [[T2]], i8** [[X]],
 //   Message send.
 // CHECK-NEXT: [[T0:%.*]] = load [[TEST3]]*, [[TEST3]]** [[T]],
-// CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST3]]* [[T0]] to i8*
+// CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T2:%.*]] = call i8* bitcast ({{.*}} @objc_msgSend to {{.*}})(i8* noundef [[T1]], i8* noundef [[SEL]])
 // CHECK-NEXT: [[T3:%.*]] = load i8*, i8** [[X]],
 // CHECK-NEXT: store i8* [[T2]], i8** [[X]],
diff --git a/clang/test/CodeGenObjC/arc-related-result-type.m b/clang/test/CodeGenObjC/arc-related-result-type.m
index 54a63009d0b5e..45d873bb8b07e 100644
--- a/clang/test/CodeGenObjC/arc-related-result-type.m
+++ b/clang/test/CodeGenObjC/arc-related-result-type.m
@@ -14,8 +14,8 @@ void test0(Test0 *val) {
 // CHECK-NEXT: bitcast
 // CHECK-NEXT: call void @llvm.objc.storeStrong(
 // CHECK-NEXT: load [[TEST0]]*, [[TEST0]]** [[VAL]],
-// CHECK-NEXT: load
 // CHECK-NEXT: bitcast
+// CHECK-NEXT: load
 // CHECK-NEXT: [[T0:%.*]] = call i8* bitcast (
 // CHECK-NEXT: [[T1:%.*]] = notail call i8* @llvm.objc.retainAutoreleasedReturnValue(i8* [[T0]])
 // CHECK-NEXT: [[T2:%.*]] = bitcast i8* [[T1]] to [[TEST0]]*
diff --git a/clang/test/CodeGenObjC/arc.m b/clang/test/CodeGenObjC/arc.m
index 74a59f7a40608..150f87bc9c0c2 100644
--- a/clang/test/CodeGenObjC/arc.m
+++ b/clang/test/CodeGenObjC/arc.m
@@ -108,8 +108,8 @@ void test3_unelided() {
 
   // Call to +alloc.
   // CHECK-NEXT: load {{.*}}, {{.*}}* @"OBJC_CLASSLIST_REFERENCES_
-  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: bitcast
+  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[ALLOC:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend
   // CHECK-NEXT: bitcast
   // CHECK-NEXT: bitcast
@@ -117,8 +117,8 @@ void test3_unelided() {
   [Test3 alloc];
 
   // CHECK-NEXT: [[T0:%.*]] = load [[TEST3]]*, [[TEST3]]** [[X]]
-  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST3]]* [[T0]] to i8*
+  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[COPY:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend {{.*}})(i8* noundef [[T1]],
   // CHECK-NEXT: call void @llvm.objc.release(i8* [[COPY]]) [[NUW:#[0-9]+]]
   [x copy];
@@ -141,14 +141,14 @@ void test3() {
 
   // Call to +alloc.
   // CHECK-NEXT: load {{.*}}, {{.*}}* @"OBJC_CLASSLIST_REFERENCES_
-  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: bitcast
+  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[ALLOC:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend
   // CHECK-NEXT: bitcast
 
   // Call to -initWith: with elided retain of consumed argument.
-  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: bitcast
+  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[INIT:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8*
   // CHECK-NEXT: bitcast
   // CHECK-NEXT: [[INIT:%.*]] = bitcast
@@ -179,14 +179,14 @@ void test3() {
 id test4() {
   // Call to +alloc.
   // CHECK:      load {{.*}}, {{.*}}* @"OBJC_CLASSLIST_REFERENCES_
-  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: bitcast
+  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[ALLOC:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend
   // CHECK-NEXT: [[ALLOC:%.*]] = bitcast
 
   // Call to -initWith: with elided retain of consumed argument.
-  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[ALLOC:%.*]] = bitcast
+  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: [[INIT:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* noundef [[ALLOC]],
 
   // Initialization of return value, occurring within full-expression.
@@ -314,12 +314,12 @@ void test10() {
   // CHECK-NEXT: [[YPTR1:%.*]] = bitcast i8** [[Y]] to i8*
   // CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[YPTR1]])
   // CHECK-NEXT: load [[TEST10]]*, [[TEST10]]** [[X]], align
-  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_{{[0-9]*}}
   // CHECK-NEXT: bitcast
+  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_{{[0-9]*}}
   // CHECK-NEXT: [[V:%.*]] = call [[TEST10]]* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend{{.*}} [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
   // CHECK-NEXT: call void (...) @llvm.objc.clang.arc.noop.use(%3* [[V]])
-  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_{{[0-9]*}}
   // CHECK-NEXT: bitcast
+  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_{{[0-9]*}}
   // CHECK-NEXT: [[T3:%.*]] = call [[TEST10]]* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend{{.*}} [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
   // CHECK-NEXT: call void (...) @llvm.objc.clang.arc.noop.use(%3* [[T3]])
   // CHECK-NEXT: [[T4:%.*]] = bitcast [[TEST10]]* [[T3]] to i8*
@@ -721,9 +721,9 @@ - (id) init {
 // CHECK-NEXT: store [[TEST29]]* null, [[TEST29]]** [[SELF]]
 
 // Actual message send.
-// CHECK-NEXT: [[T2:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
-// CHECK-NEXT: [[T3:%.*]] = bitcast [[TEST29]]* [[T0]] to i8*
-// CHECK-NEXT: [[CALL:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* noundef [[T3]], i8* noundef [[T2]], i8* noundef [[T1]])
+// CHECK-NEXT: [[T2:%.*]] = bitcast [[TEST29]]* [[T0]] to i8*
+// CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+// CHECK-NEXT: [[CALL:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* noundef [[T2]], i8* noundef [[SEL]], i8* noundef [[T1]])
 
 // Implicit write of result back into 'self'.  This is not supposed to
 // be detectable because we're supposed to ban accesses to the old
@@ -824,9 +824,9 @@ - (id) init {
 
 // Call.
 // CHECK-NEXT: [[T0:%.*]] = load [[TEST30]]*, [[TEST30]]** [[SELF]]
-// CHECK-NEXT: [[T1:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
-// CHECK-NEXT: [[T2:%.*]] = bitcast [[TEST30]]* [[T0]] to i8*
-// CHECK-NEXT: [[CALL:%.*]] = call [[TEST30_HELPER:%.*]]* bitcast {{.*}} @objc_msgSend {{.*}}(i8* noundef [[T2]], i8* noundef [[T1]])
+// CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST30]]* [[T0]] to i8*
+// CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+// CHECK-NEXT: [[CALL:%.*]] = call [[TEST30_HELPER:%.*]]* bitcast {{.*}} @objc_msgSend {{.*}}(i8* noundef [[T1]], i8* noundef [[SEL]])
 
 // Assignment.
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[TEST30_HELPER]]* [[CALL]] to i8*
@@ -911,8 +911,8 @@ void test33(Test33 *ptr) {
   // CHECK-NEXT: load [[TEST33]]*, [[TEST33]]** [[PTR]]
   // CHECK-NEXT: [[W0:%.*]] = load [[A_T]]*, [[A_T]]** [[A]]
   // CHECK-NEXT: store [[A_T]]* [[W0]], [[A_T]]** [[TEMP0]]
-  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: bitcast
+  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: objc_msgSend{{.*}}, [[A_T]]** noundef [[TEMP0]])
   // CHECK-NEXT: [[T0:%.*]] = load [[A_T]]*, [[A_T]]** [[TEMP0]]
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[A_T]]* [[T0]] to i8*
@@ -927,8 +927,8 @@ void test33(Test33 *ptr) {
   // CHECK-NEXT: load [[TEST33]]*, [[TEST33]]** [[PTR]]
   // CHECK-NEXT: [[W0:%.*]] = load [[A_T]]*, [[A_T]]** [[A]]
   // CHECK-NEXT: store [[A_T]]* [[W0]], [[A_T]]** [[TEMP1]]
-  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: bitcast
+  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: objc_msgSend{{.*}}, [[A_T]]** noundef [[TEMP1]])
   // CHECK-NEXT: [[T0:%.*]] = load [[A_T]]*, [[A_T]]** [[TEMP1]]
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[A_T]]* [[T0]] to i8*
@@ -941,20 +941,20 @@ void test33(Test33 *ptr) {
   // CHECK-NEXT: call void @llvm.objc.release(i8* [[T5]])
 
   // CHECK-NEXT: load [[TEST33]]*, [[TEST33]]** [[PTR]]
-  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: bitcast
+  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: objc_msgSend{{.*}}, [[A_T]]** noundef [[A]])
 
   // CHECK-NEXT: load [[TEST33]]*, [[TEST33]]** [[PTR]]
-  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: bitcast
+  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: objc_msgSend{{.*}}, [[A_T]]** noundef [[A]])
 
   // 'out'
   // CHECK-NEXT: load [[TEST33]]*, [[TEST33]]** [[PTR]]
   // CHECK-NEXT: store [[A_T]]* null, [[A_T]]** [[TEMP2]]
-  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: bitcast
+  // CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
   // CHECK-NEXT: objc_msgSend{{.*}}, [[A_T]]** noundef [[TEMP2]])
   // CHECK-NEXT: [[T0:%.*]] = load [[A_T]]*, [[A_T]]** [[TEMP2]]
   // CHECK-NEXT: [[T1:%.*]] = bitcast [[A_T]]* [[T0]] to i8*
@@ -1453,11 +1453,12 @@ void test66(void) {
 // CHECK-NEXT: call void (...) @llvm.objc.clang.arc.noop.use([[TEST66]]* [[T3]])
 // CHECK-NEXT: [[T4:%.*]] = call i8* @test66_arg(){{.*}} [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
 // CHECK-NEXT: call void (...) @llvm.objc.clang.arc.noop.use(i8* [[T4]])
-// CHECK-NEXT: [[T6:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T7:%.*]] = bitcast [[TEST66]]* [[T3]] to i8*
 // CHECK-NEXT: [[SIX:%.*]] = icmp eq i8* [[T7]], null
-// CHECK-NEXT: br i1 [[SIX]], label [[NULINIT:%.*]], label [[CALL:%.*]]
-// CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*)*)(i8* noundef [[T7]], i8* noundef [[T6]], i8* noundef [[T4]])
+// CHECK-NEXT: br i1 [[SIX]], label [[NULINIT:%.*]], label %[[CALL:.*]]
+// CHECK:      [[CALL]]:
+// CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES
+// CHECK-NEXT: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*)*)(i8* noundef [[T7]], i8* noundef [[SEL]], i8* noundef [[T4]])
 // CHECK-NEXT: br label [[CONT:%.*]]
 // CHECK: call void @llvm.objc.release(i8* [[T4]]) [[NUW]]
 // CHECK-NEXT: br label [[CONT:%.*]]
diff --git a/clang/test/CodeGenObjC/class-stubs.m b/clang/test/CodeGenObjC/class-stubs.m
index 9f7f240b889bd..31f423f36edad 100644
--- a/clang/test/CodeGenObjC/class-stubs.m
+++ b/clang/test/CodeGenObjC/class-stubs.m
@@ -42,8 +42,8 @@ int main() {
 // CHECK-LABEL: define{{.*}} i32 @main()
 // CHECK-NEXT: entry:
 // CHECK-NEXT:   [[CLASS:%.*]] = call %struct._class_t* @objc_loadClassref(i8** @"OBJC_CLASSLIST_REFERENCES_$_")
-// CHECK-NEXT:   [[SELECTOR:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK-NEXT:   [[RECEIVER:%.*]] = bitcast %struct._class_t* [[CLASS]] to i8*
+// CHECK-NEXT:   [[SELECTOR:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK-NEXT:   call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* noundef [[RECEIVER]], i8* noundef [[SELECTOR]])
 // CHECK-NEXT:   ret i32 0
 
diff --git a/clang/test/CodeGenObjC/matrix-type-operators.m b/clang/test/CodeGenObjC/matrix-type-operators.m
index 4e5684e283fc2..784284412625a 100644
--- a/clang/test/CodeGenObjC/matrix-type-operators.m
+++ b/clang/test/CodeGenObjC/matrix-type-operators.m
@@ -13,13 +13,13 @@ @interface IntValue
 // CHECK-LABEL: @test_index_placeholders(
 // CHECK-NEXT:  entry:
 // CHECK:         [[IV:%.*]] = load %0*, %0** [[IV_ADDR:%.*]], align 8
-// CHECK-NEXT:    [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
 // CHECK-NEXT:    [[IV_PTR:%.*]] = bitcast %0* [[IV]] to i8*
+// CHECK-NEXT:    [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
 // CHECK-NEXT:    [[CALL:%.*]] = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* noundef [[IV_PTR]], i8* noundef [[SEL]])
 // CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
 // CHECK-NEXT:    [[IV2:%.*]] = load %0*, %0** [[IV_ADDR]], align 8
-// CHECK-NEXT:    [[SEL2:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
 // CHECK-NEXT:    [[IV2_PTR:%.*]] = bitcast %0* [[IV2]] to i8*
+// CHECK-NEXT:    [[SEL2:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
 // CHECK-NEXT:    [[CALL1:%.*]] = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* noundef [[IV2_PTR]], i8* noundef [[SEL2]])
 // CHECK-NEXT:    [[CONV2:%.*]] = sext i32 [[CALL1]] to i64
 // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[CONV2]], 4
@@ -40,20 +40,20 @@ @interface MatrixValue
 
 // CHECK-LABEL: @test_base_and_index_placeholders(
 // CHECK:         [[IV:%.*]] = load %0*, %0** [[IV_ADDR:%.*]], align 8
-// CHECK-NEXT:    [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
 // CHECK-NEXT:    [[IV_PTR:%.*]] = bitcast %0* [[IV]] to i8*
+// CHECK-NEXT:    [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
 // CHECK-NEXT:    [[CALL:%.*]] = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* noundef [[IV_PTR]], i8* noundef [[SEL]])
 // CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[CALL]] to i64
 // CHECK-NEXT:    [[IV2:%.*]] = load %0*, %0** [[IV_ADDR]], align 8
-// CHECK-NEXT:    [[SEL2:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
 // CHECK-NEXT:    [[IV2_PTR:%.*]] = bitcast %0* [[IV2]] to i8*
+// CHECK-NEXT:    [[SEL2:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
 // CHECK-NEXT:    [[CALL1:%.*]] = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* noundef [[IV2_PTR]], i8* noundef [[SEL2]])
 // CHECK-NEXT:    [[CONV2:%.*]] = sext i32 [[CALL1]] to i64
 // CHECK-NEXT:    [[IDX1:%.*]] = mul i64 [[CONV2]], 4
 // CHECK-NEXT:    [[IDX2:%.*]] = add i64 [[IDX1]], [[CONV]]
 // CHECK-NEXT:    [[M:%.*]] = load %1*, %1** %m.addr, align 8
-// CHECK-NEXT:    [[SEL3:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
 // CHECK-NEXT:    [[M_PTR:%.*]] = bitcast %1* [[M]] to i8*
+// CHECK-NEXT:    [[SEL3:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load !7
 // CHECK-NEXT:    [[MAT:%.*]] = call <16 x double> bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to <16 x double> (i8*, i8*)*)(i8* noundef [[M_PTR]], i8* noundef [[SEL3]])
 // CHECK-NEXT:    [[MATEXT:%.*]] = extractelement <16 x double> [[MAT]], i64 [[IDX2]]
 // CHECK-NEXT:    ret double [[MATEXT]]
diff --git a/clang/test/CodeGenObjC/ns_consume_null_check.m b/clang/test/CodeGenObjC/ns_consume_null_check.m
index e02654d4e21b2..6d562edcf5b0e 100644
--- a/clang/test/CodeGenObjC/ns_consume_null_check.m
+++ b/clang/test/CodeGenObjC/ns_consume_null_check.m
@@ -50,13 +50,14 @@ void test1(void) {
 // CHECK-NEXT: [[T0:%.*]] = load [[MYOBJECT:%.*]]*, [[MYOBJECT:%.*]]** @x
 // CHECK-NEXT: [[ARG:%.*]] = load i8*, i8** [[OBJ]]
 // CHECK-NEXT: [[ARG_RETAINED:%.*]] = call i8* @llvm.objc.retain(i8* [[ARG]])
-// CHECK-NEXT: load i8*, i8** @
 // CHECK-NEXT: [[SELF:%.*]] = bitcast [[MYOBJECT]]* [[T0]] to i8*
 //   Null check.
 // CHECK-NEXT: [[T0:%.*]] = icmp eq i8* [[SELF]], null
-// CHECK-NEXT: br i1 [[T0]], label [[FORNULL:%.*]], label [[FORCALL:%.*]]
+// CHECK-NEXT: br i1 [[T0]], label [[FORNULL:%.*]], label %[[FORCALL:.*]]
 //   Invoke and produce the return values.
-// CHECK:      [[CALL:%.*]] = invoke <2 x float> bitcast
+// CHECK:     [[FORCALL]]:
+// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+// CHECK-NEXT: [[CALL:%.*]] = invoke <2 x float> bitcast
 // CHECK-NEXT:   to label [[INVOKE_CONT:%.*]] unwind label {{%.*}}
 // CHECK:      [[T0:%.*]] = bitcast { float, float }* [[COERCE:%.*]] to <2 x float>*
 // CHECK-NEXT: store <2 x float> [[CALL]], <2 x float>* [[T0]],
diff --git a/clang/test/CodeGenObjC/nsvalue-objc-boxable-ios-arc.m b/clang/test/CodeGenObjC/nsvalue-objc-boxable-ios-arc.m
index d1ad8a0d3cbed..6e45e94b8834e 100644
--- a/clang/test/CodeGenObjC/nsvalue-objc-boxable-ios-arc.m
+++ b/clang/test/CodeGenObjC/nsvalue-objc-boxable-ios-arc.m
@@ -21,8 +21,8 @@ void doRange() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct._NSRange* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct._NSRange* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSRange ns_range = { .location = 0, .length = 42 };
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8* {{.*}}[[RANGE_STR]]{{.*}})
   // CHECK:      call i8* @llvm.objc.retainAutoreleasedReturnValue
@@ -40,8 +40,8 @@ void doPoint() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct.CGPoint* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct.CGPoint* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   CGPoint cg_point = { .x = 42, .y = 24 };
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8* {{.*}}[[POINT_STR]]{{.*}})
   // CHECK:      call i8* @llvm.objc.retainAutoreleasedReturnValue
@@ -59,8 +59,8 @@ void doSize() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct.CGSize* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct.CGSize* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   CGSize cg_size = { .width = 42, .height = 24 };
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8* {{.*}}[[SIZE_STR]]{{.*}})
   // CHECK:      call i8* @llvm.objc.retainAutoreleasedReturnValue
@@ -78,8 +78,8 @@ void doRect() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct.CGRect* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct.CGRect* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   CGPoint cg_point = { .x = 42, .y = 24 };
   CGSize cg_size = { .width = 42, .height = 24 };
   CGRect cg_rect = { .origin = cg_point, .size = cg_size };
@@ -99,8 +99,8 @@ void doNSEdgeInsets() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct.NSEdgeInsets* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct.NSEdgeInsets* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSEdgeInsets ns_edge_insets;
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8*{{.*}}[[EDGE_STR]]{{.*}})
   // CHECK:      call i8* @llvm.objc.retainAutoreleasedReturnValue
@@ -115,8 +115,8 @@ void doRangeRValue() {
   // CHECK:     [[RECV_PTR:%.*]]        = load {{.*}} [[NSVALUE]]
   // CHECK:     call {{.*}} @getRange {{.*}} [[COERCE]]{{.*}}
   // CHECK:     [[COERCE_CAST:%.*]]     = bitcast %struct._NSRange* [[COERCE]]{{.*}}
-  // CHECK:     [[SEL:%.*]]             = load i8*, i8** [[VALUE_SEL]]
   // CHECK:     [[RECV:%.*]]            = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:     [[SEL:%.*]]             = load i8*, i8** [[VALUE_SEL]]
   // CHECK:     call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[COERCE_CAST]], i8* {{.*}}[[RANGE_STR]]{{.*}})
   // CHECK:     call i8* @llvm.objc.retainAutoreleasedReturnValue
   NSValue *range_rvalue = @(getRange());
diff --git a/clang/test/CodeGenObjC/nsvalue-objc-boxable-ios.m b/clang/test/CodeGenObjC/nsvalue-objc-boxable-ios.m
index be4b7ca22d6e1..c7688f8a5696a 100644
--- a/clang/test/CodeGenObjC/nsvalue-objc-boxable-ios.m
+++ b/clang/test/CodeGenObjC/nsvalue-objc-boxable-ios.m
@@ -21,8 +21,8 @@ void doRange() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct._NSRange* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct._NSRange* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSRange ns_range = { .location = 0, .length = 42 };
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8* noundef {{.*}}[[RANGE_STR]]{{.*}})
   NSValue *range = @(ns_range);
@@ -38,8 +38,8 @@ void doPoint() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct.CGPoint* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct.CGPoint* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   CGPoint cg_point = { .x = 42, .y = 24 };
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8* noundef {{.*}}[[POINT_STR]]{{.*}})
   NSValue *point = @(cg_point);
@@ -55,8 +55,8 @@ void doSize() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct.CGSize* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct.CGSize* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   CGSize cg_size = { .width = 42, .height = 24 };
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8* noundef {{.*}}[[SIZE_STR]]{{.*}})
   NSValue *size = @(cg_size);
@@ -72,8 +72,8 @@ void doRect() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct.CGRect* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct.CGRect* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   CGPoint cg_point = { .x = 42, .y = 24 };
   CGSize cg_size = { .width = 42, .height = 24 };
   CGRect cg_rect = { .origin = cg_point, .size = cg_size };
@@ -91,8 +91,8 @@ void doNSEdgeInsets() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct.NSEdgeInsets* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct.NSEdgeInsets* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSEdgeInsets ns_edge_insets;
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8*{{.*}}[[EDGE_STR]]{{.*}})
   NSValue *edge_insets = @(ns_edge_insets);
@@ -105,8 +105,8 @@ void doRangeRValue() {
   // CHECK:     [[RECV_PTR:%.*]]        = load {{.*}} [[NSVALUE]]
   // CHECK:     call {{.*}} @getRange {{.*}} [[COERCE]]{{.*}}
   // CHECK:     [[COERCE_CAST:%.*]]     = bitcast %struct._NSRange* [[COERCE]]{{.*}}
-  // CHECK:     [[SEL:%.*]]             = load i8*, i8** [[VALUE_SEL]]
   // CHECK:     [[RECV:%.*]]            = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:     [[SEL:%.*]]             = load i8*, i8** [[VALUE_SEL]]
   // CHECK:     call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[COERCE_CAST]], i8* noundef {{.*}}[[RANGE_STR]]{{.*}})
   NSValue *range_rvalue = @(getRange());
   // CHECK: ret void
diff --git a/clang/test/CodeGenObjC/nsvalue-objc-boxable-mac-arc.m b/clang/test/CodeGenObjC/nsvalue-objc-boxable-mac-arc.m
index 5efdcfd542249..cc8b1cbea3222 100644
--- a/clang/test/CodeGenObjC/nsvalue-objc-boxable-mac-arc.m
+++ b/clang/test/CodeGenObjC/nsvalue-objc-boxable-mac-arc.m
@@ -21,8 +21,8 @@ void doRange() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct._NSRange* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct._NSRange* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSRange ns_range = { .location = 0, .length = 42 };
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8* {{.*}}[[RANGE_STR]]{{.*}}) [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
   NSValue *range = @(ns_range);
@@ -39,8 +39,8 @@ void doPoint() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct._NSPoint* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct._NSPoint* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSPoint ns_point = { .x = 42, .y = 24 };
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8* {{.*}}[[POINT_STR]]{{.*}}) [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
   NSValue *point = @(ns_point);
@@ -57,8 +57,8 @@ void doSize() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct._NSSize* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct._NSSize* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSSize ns_size = { .width = 42, .height = 24 };
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8* {{.*}}[[SIZE_STR]]{{.*}}) [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
   NSValue *size = @(ns_size);
@@ -75,8 +75,8 @@ void doRect() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct._NSRect* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct._NSRect* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSPoint ns_point = { .x = 42, .y = 24 };
   NSSize ns_size = { .width = 42, .height = 24 };
   NSRect ns_rect = { .origin = ns_point, .size = ns_size };
@@ -95,8 +95,8 @@ void doNSEdgeInsets() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct.NSEdgeInsets* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct.NSEdgeInsets* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSEdgeInsets ns_edge_insets;
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8*{{.*}}[[EDGE_STR]]{{.*}}) [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
   NSValue *edge_insets = @(ns_edge_insets);
@@ -114,8 +114,8 @@ void doRangeRValue() {
   // CHECK:     [[EXTR_RVAL:%.*]]       = extractvalue {{.*}} [[RVAL]]{{.*}}
   // CHECK:     store {{.*}}[[EXTR_RVAL]]{{.*}}[[COERCE_CAST_PTR]]{{.*}}
   // CHECK:     [[COERCE_CAST:%.*]]     = bitcast %struct._NSRange* [[COERCE]]{{.*}}
-  // CHECK:     [[SEL:%.*]]             = load i8*, i8** [[VALUE_SEL]]
   // CHECK:     [[RECV:%.*]]            = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:     [[SEL:%.*]]             = load i8*, i8** [[VALUE_SEL]]
   // CHECK:     call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[COERCE_CAST]], i8* {{.*}}[[RANGE_STR]]{{.*}}) [ "clang.arc.attachedcall"(i8* (i8*)* @llvm.objc.retainAutoreleasedReturnValue) ]
   NSValue *range_rvalue = @(getRange());
   // CHECK:     call void @llvm.objc.release
diff --git a/clang/test/CodeGenObjC/nsvalue-objc-boxable-mac.m b/clang/test/CodeGenObjC/nsvalue-objc-boxable-mac.m
index a10885d7ff75e..d30ad1056c644 100644
--- a/clang/test/CodeGenObjC/nsvalue-objc-boxable-mac.m
+++ b/clang/test/CodeGenObjC/nsvalue-objc-boxable-mac.m
@@ -21,8 +21,8 @@ void doRange() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct._NSRange* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct._NSRange* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSRange ns_range = { .location = 0, .length = 42 };
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8* noundef {{.*}}[[RANGE_STR]]{{.*}})
   NSValue *range = @(ns_range);
@@ -38,8 +38,8 @@ void doPoint() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct._NSPoint* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct._NSPoint* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSPoint ns_point = { .x = 42, .y = 24 };
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8* noundef {{.*}}[[POINT_STR]]{{.*}})
   NSValue *point = @(ns_point);
@@ -55,8 +55,8 @@ void doSize() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct._NSSize* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct._NSSize* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSSize ns_size = { .width = 42, .height = 24 };
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8* noundef {{.*}}[[SIZE_STR]]{{.*}})
   NSValue *size = @(ns_size);
@@ -72,8 +72,8 @@ void doRect() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct._NSRect* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct._NSRect* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSPoint ns_point = { .x = 42, .y = 24 };
   NSSize ns_size = { .width = 42, .height = 24 };
   NSRect ns_rect = { .origin = ns_point, .size = ns_size };
@@ -91,8 +91,8 @@ void doNSEdgeInsets() {
   // CHECK:      [[LOCAL_CAST:%.*]] = bitcast %struct.NSEdgeInsets* [[LOCAL_VAR]]{{.*}}
   // CHECK:      call void @llvm.memcpy{{.*}} [[TEMP_CAST]]{{.*}} [[LOCAL_CAST]]{{.*}}
   // CHECK:      [[PARAM_CAST:%.*]] = bitcast %struct.NSEdgeInsets* [[TEMP_VAR]]{{.*}}
-  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   // CHECK:      [[RECV:%.*]]       = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:      [[SEL:%.*]]        = load i8*, i8** [[VALUE_SEL]]
   NSEdgeInsets ns_edge_insets;
   // CHECK:      call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[PARAM_CAST]], i8*{{.*}}[[EDGE_STR]]{{.*}})
   NSValue *edge_insets = @(ns_edge_insets);
@@ -109,8 +109,8 @@ void doRangeRValue() {
   // CHECK:     [[EXTR_RVAL:%.*]]       = extractvalue {{.*}} [[RVAL]]{{.*}}
   // CHECK:     store {{.*}}[[EXTR_RVAL]]{{.*}}[[COERCE_CAST_PTR]]{{.*}}
   // CHECK:     [[COERCE_CAST:%.*]]     = bitcast %struct._NSRange* [[COERCE]]{{.*}}
-  // CHECK:     [[SEL:%.*]]             = load i8*, i8** [[VALUE_SEL]]
   // CHECK:     [[RECV:%.*]]            = bitcast %struct._class_t* [[RECV_PTR]] to i8*
+  // CHECK:     [[SEL:%.*]]             = load i8*, i8** [[VALUE_SEL]]
   // CHECK:     call {{.*objc_msgSend.*}}(i8* noundef [[RECV]], i8* noundef [[SEL]], i8* noundef [[COERCE_CAST]], i8* noundef {{.*}}[[RANGE_STR]]{{.*}})
   NSValue *range_rvalue = @(getRange());
   // CHECK: ret void
diff --git a/clang/test/CodeGenObjC/objc-container-subscripting-1.m b/clang/test/CodeGenObjC/objc-container-subscripting-1.m
index 248135d882f62..7b198d4f0973b 100644
--- a/clang/test/CodeGenObjC/objc-container-subscripting-1.m
+++ b/clang/test/CodeGenObjC/objc-container-subscripting-1.m
@@ -19,16 +19,16 @@ int main() {
 
   id oldObject = array[10];
 // CHECK: [[ARR:%.*]] = load {{%.*}} [[array:%.*]], align 8
-// CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK-NEXT: [[ARRC:%.*]] = bitcast {{%.*}} [[ARR]] to i8*
+// CHECK-NEXT: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK-NEXT: [[CALL:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i32)*)(i8* noundef [[ARRC]], i8* noundef [[SEL]], i32 noundef 10)
 // CHECK-NEXT: store i8* [[CALL]], i8** [[OLDOBJ:%.*]], align 8
 
   val = (array[10] = oldObject);
 // CHECK:      [[FOUR:%.*]] = load i8*, i8** [[oldObject:%.*]], align 8
 // CHECK-NEXT: [[THREE:%.*]] = load {{%.*}} [[array:%.*]], align 8
-// CHECK-NEXT: [[FIVE:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.2
 // CHECK-NEXT: [[SIX:%.*]] = bitcast {{%.*}} [[THREE]] to i8*
+// CHECK-NEXT: [[FIVE:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.2
 // CHECK-NEXT: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i32)*)(i8* noundef [[SIX]], i8* noundef [[FIVE]], i8* noundef [[FOUR]], i32 noundef 10)
 // CHECK-NEXT: store i8* [[FOUR]], i8** [[val:%.*]]
 
@@ -38,8 +38,8 @@ int main() {
   oldObject = dictionary[key];
 // CHECK:  [[SEVEN:%.*]] = load {{%.*}} [[DICTIONARY:%.*]], align 8
 // CHECK-NEXT:  [[EIGHT:%.*]] = load i8*, i8** [[KEY:%.*]], align 8
-// CHECK-NEXT:  [[TEN:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.4
 // CHECK-NEXT:  [[ELEVEN:%.*]] = bitcast {{%.*}} [[SEVEN]] to i8*
+// CHECK-NEXT:  [[TEN:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.4
 // CHECK-NEXT:  [[CALL1:%.*]] = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*, i8*)*)(i8* noundef [[ELEVEN]], i8* noundef [[TEN]], i8* noundef [[EIGHT]])
 // CHECK-NEXT:  store i8* [[CALL1]], i8** [[oldObject:%.*]], align 8
 
@@ -48,8 +48,8 @@ int main() {
 // CHECK:       [[FOURTEEN:%.*]] = load i8*, i8** [[NEWOBJECT:%.*]], align 8
 // CHECK-NEXT:  [[TWELVE:%.*]] = load {{%.*}} [[DICTIONARY]], align 8
 // CHECK-NEXT:  [[THIRTEEN:%.*]] = load i8*, i8** [[KEY]], align 8
-// CHECK-NEXT:  [[SIXTEEN:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.6
 // CHECK-NEXT:  [[SEVENTEEN:%.*]] = bitcast {{%.*}} [[TWELVE]] to i8*
+// CHECK-NEXT:  [[SIXTEEN:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.6
 // CHECK-NEXT:  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, i8*)*)(i8* noundef [[SEVENTEEN]], i8* noundef [[SIXTEEN]], i8* noundef [[FOURTEEN]], i8* noundef [[THIRTEEN]])
 // CHECK-NEXT: store i8* [[FOURTEEN]], i8** [[val:%.*]]
 }
diff --git a/clang/test/CodeGenObjC/property.m b/clang/test/CodeGenObjC/property.m
index f9b9d83f73ffa..dae9111509b00 100644
--- a/clang/test/CodeGenObjC/property.m
+++ b/clang/test/CodeGenObjC/property.m
@@ -58,22 +58,22 @@ -(int) dyn {
 A *test2_helper(void);
 void test2() {
   // CHECK:      [[BASE:%.*]] = call [[A:%.*]]* @test2_helper()
-  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8**
   // CHECK-NEXT: [[BASETMP:%.*]] = bitcast [[A]]* [[BASE]] to i8*
+  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8**
   // CHECK-NEXT: [[LD:%.*]] = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* noundef [[BASETMP]], i8* noundef [[SEL]])
   // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[LD]], 1
-  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8**
   // CHECK-NEXT: [[BASETMP:%.*]] = bitcast [[A]]* [[BASE]] to i8*
+  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8**
   // CHECK-NEXT: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i32)*)(i8* noundef [[BASETMP]], i8* noundef [[SEL]], i32 noundef [[ADD]])
   test2_helper().dyn++;
 
   // CHECK:      [[BASE:%.*]] = call [[A]]* @test2_helper()
-  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8**
   // CHECK-NEXT: [[BASETMP:%.*]] = bitcast [[A]]* [[BASE]] to i8*
+  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8**
   // CHECK-NEXT: [[LD:%.*]] = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)(i8* noundef [[BASETMP]], i8* noundef [[SEL]])
   // CHECK-NEXT: [[ADD:%.*]] = mul nsw i32 [[LD]], 10
-  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8**
   // CHECK-NEXT: [[BASETMP:%.*]] = bitcast [[A]]* [[BASE]] to i8*
+  // CHECK-NEXT: [[SEL:%.*]] = load i8*, i8**
   // CHECK-NEXT: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i32)*)(i8* noundef [[BASETMP]], i8* noundef [[SEL]], i32 noundef [[ADD]])
   test2_helper().dyn *= 10;
 }
@@ -136,33 +136,33 @@ void test7(Test7 *t) {
 // CHECK:      [[T:%.*]] = alloca [[TEST7]]*,
 // CHECK-NEXT: store
 // CHECK-NEXT: [[T0:%.*]] = load [[TEST7]]*, [[TEST7]]** [[T]], align
-// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST7]]* [[T0]] to i8*
+// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T2:%.*]] = call zeroext i8 bitcast
 // CHECK-NEXT: [[T3:%.*]] = zext i8 [[T2]] to i32
 // CHECK-NEXT: [[T4:%.*]] = and i32 [[T3]], 2
 // CHECK-NEXT: [[T5:%.*]] = trunc i32 [[T4]] to i8
-// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T6:%.*]] = bitcast [[TEST7]]* [[T0]] to i8*
+// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: call void bitcast
 // CHECK-NEXT: [[T0:%.*]] = load [[TEST7]]*, [[TEST7]]** [[T]], align
-// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST7]]* [[T0]] to i8*
+// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T2:%.*]] = call zeroext i8 bitcast
 // CHECK-NEXT: [[T3:%.*]] = zext i8 [[T2]] to i32
 // CHECK-NEXT: [[T4:%.*]] = or i32 [[T3]], 5
 // CHECK-NEXT: [[T5:%.*]] = trunc i32 [[T4]] to i8
-// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T6:%.*]] = bitcast [[TEST7]]* [[T0]] to i8*
+// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: call void bitcast
 // CHECK-NEXT: [[T0:%.*]] = load [[TEST7]]*, [[TEST7]]** [[T]], align
-// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T1:%.*]] = bitcast [[TEST7]]* [[T0]] to i8*
+// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T2:%.*]] = call zeroext i8 bitcast
 // CHECK-NEXT: [[T3:%.*]] = zext i8 [[T2]] to i32
 // CHECK-NEXT: [[T4:%.*]] = xor i32 [[T3]], 8
 // CHECK-NEXT: [[T5:%.*]] = trunc i32 [[T4]] to i8
-// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: [[T6:%.*]] = bitcast [[TEST7]]* [[T0]] to i8*
+// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES
 // CHECK-NEXT: call void bitcast
 // CHECK-NEXT: ret void
diff --git a/clang/test/CodeGenObjC/tentative-cfconstantstring.m b/clang/test/CodeGenObjC/tentative-cfconstantstring.m
index cfe3a417668c1..d5cd259580fe0 100644
--- a/clang/test/CodeGenObjC/tentative-cfconstantstring.m
+++ b/clang/test/CodeGenObjC/tentative-cfconstantstring.m
@@ -36,7 +36,7 @@ -(void)someMethod {
 
 // CHECK-LABEL: define internal void @_inlineFunction()
 // CHECK:  [[ZERO:%.*]] = load %struct._class_t*, %struct._class_t** @"OBJC_CLASSLIST_REFERENCES_
-// CHECK-NEXT:   [[ONE:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
-// CHECK-NEXT:   [[TWO:%.*]] = bitcast %struct._class_t* [[ZERO]] to i8*
-// CHECK-NEXT:   call void (i8*, i8*, [[T:%.*]]*, ...) bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, [[T:%.*]]*, ...)*)(i8* noundef [[TWO]], i8* noundef [[ONE]], [[T:%.*]]* noundef bitcast (%struct.__NSConstantString_tag* @_unnamed_cfstring_{{.*}} to [[T:%.*]]*))
+// CHECK-NEXT:   [[OBJ:%.*]] = bitcast %struct._class_t* [[ZERO]] to i8*
+// CHECK-NEXT:   [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+// CHECK-NEXT:   call void (i8*, i8*, [[T:%.*]]*, ...) bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, [[T:%.*]]*, ...)*)(i8* noundef [[OBJ]], i8* noundef [[SEL]], [[T:%.*]]* noundef bitcast (%struct.__NSConstantString_tag* @_unnamed_cfstring_{{.*}} to [[T:%.*]]*))
 // CHECK-NEXT:   ret void
diff --git a/clang/test/CodeGenObjCXX/arc-cxx11-member-init.mm b/clang/test/CodeGenObjCXX/arc-cxx11-member-init.mm
index ab76951aea3b3..8b9438bf918a4 100644
--- a/clang/test/CodeGenObjCXX/arc-cxx11-member-init.mm
+++ b/clang/test/CodeGenObjCXX/arc-cxx11-member-init.mm
@@ -23,10 +23,10 @@ - (void)applicationDidFinishLaunching
 @end
 
 // CHECK: [[mClipData:%.*]] = getelementptr inbounds %class.XClipboardDataSet, %class.XClipboardDataSet*
-// CHECK: [[ZERO:%.*]] = load %struct._class_t*, %struct._class_t** @"OBJC_CLASSLIST_REFERENCES_$_"
-// CHECK: [[ONE:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
-// CHECK: [[TWO:%.*]] = bitcast %struct._class_t* [[ZERO]] to i8*
-// CHECK: [[CALL:%.*]] = call noundef i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* noundef [[TWO]], i8* noundef [[ONE]])
+// CHECK: [[CLS:%.*]] = load %struct._class_t*, %struct._class_t** @"OBJC_CLASSLIST_REFERENCES_$_"
+// CHECK: [[RECV:%.*]] = bitcast %struct._class_t* [[CLS]] to i8*
+// CHECK: [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_
+// CHECK: [[CALL:%.*]] = call noundef i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* noundef [[RECV]], i8* noundef [[SEL]])
 // CHECK: [[THREE:%.*]] = bitcast i8* [[CALL]] to [[T:%.*]]*
 // CHECK: store [[T]]* [[THREE]], [[T]]** [[mClipData]], align 8
 
diff --git a/clang/test/CodeGenObjCXX/arc.mm b/clang/test/CodeGenObjCXX/arc.mm
index ed726f60bcba4..e96656de95c70 100644
--- a/clang/test/CodeGenObjCXX/arc.mm
+++ b/clang/test/CodeGenObjCXX/arc.mm
@@ -221,8 +221,8 @@ - (NSArray *) array;
 // CHECK-NEXT: call void (...) @llvm.objc.clang.arc.noop.use({{.*}} [[T2]])
 
 // Make sure it's not immediately released before starting the iteration.
-// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK-NEXT: [[T0:%.*]] = bitcast [[NSARRAY]]* [[T2]] to i8*
+// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK-NEXT: @objc_msgSend
 
 // This bitcast is for the mutation check.
@@ -231,6 +231,7 @@ - (NSArray *) array;
 
 // This bitcast is for the 'next' message send.
 // CHECK:      [[T0:%.*]] = bitcast [[NSARRAY]]* [[T2]] to i8*
+// CHECK-NEXT: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK-NEXT: @objc_msgSend
 
 // This bitcast is for the final release.
diff --git a/clang/test/CodeGenObjCXX/message.mm b/clang/test/CodeGenObjCXX/message.mm
index 137c77190ed4c..74786dcd626e5 100644
--- a/clang/test/CodeGenObjCXX/message.mm
+++ b/clang/test/CodeGenObjCXX/message.mm
@@ -17,8 +17,8 @@ - (void) foo;
   template void foo<int>();
   // CHECK-LABEL:    define weak_odr void @_ZN5test03fooIiEEvv()
   // CHECK:      [[T0:%.*]] = call noundef [[TEST0:%.*]]* @_ZN5test01AcvP5Test0Ev(
-  // CHECK-NEXT: [[T1:%.*]] = load i8*, i8**
   // CHECK-NEXT: [[T2:%.*]] = bitcast [[TEST0]]* [[T0]] to i8*
+  // CHECK-NEXT: [[T1:%.*]] = load i8*, i8**
   // CHECK-NEXT: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* noundef [[T2]], i8* noundef [[T1]])
   // CHECK-NEXT: ret void
 }
diff --git a/clang/test/CodeGenObjCXX/property-lvalue-capture.mm b/clang/test/CodeGenObjCXX/property-lvalue-capture.mm
index 591d921fb6151..b96aacb0afab1 100644
--- a/clang/test/CodeGenObjCXX/property-lvalue-capture.mm
+++ b/clang/test/CodeGenObjCXX/property-lvalue-capture.mm
@@ -24,12 +24,12 @@ - (void)transformFrame:(PAGeometryFrame *)frame {
 }
 @end
 
-// CHECK:   [[TWO:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load ![[MD_NUM:[0-9]+]]
-// CHECK:   [[THREE:%.*]] = bitcast [[ONET:%.*]]* [[ONE:%.*]] to i8*
-// CHECK:   [[CALL:%.*]] = call noundef nonnull align 1 %struct.Quad2* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %struct.Quad2* (i8*, i8*)*)(i8* noundef [[THREE]], i8* noundef [[TWO]])
-// CHECK:   [[FOUR:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.2, align 8, !invariant.load ![[MD_NUM]]
-// CHECK:   [[FIVE:%.*]] = bitcast [[ONET]]* [[ZERO:%.*]] to i8*
-// CHECK:   call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %struct.Quad2*)*)(i8* noundef [[FIVE]], i8* noundef [[FOUR]], %struct.Quad2* noundef nonnull align 1 [[CALL]])
+// CHECK:   [[OBJ:%.*]] = bitcast [[ONET:%.*]]* [[ONE:%.*]] to i8*
+// CHECK:   [[SEL:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_, align 8, !invariant.load ![[MD_NUM:[0-9]+]]
+// CHECK:   [[CALL:%.*]] = call noundef nonnull align 1 %struct.Quad2* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %struct.Quad2* (i8*, i8*)*)(i8* noundef [[OBJ]], i8* noundef [[SEL]])
+// CHECK:   [[OBJ2:%.*]] = bitcast [[ONET]]* [[ZERO:%.*]] to i8*
+// CHECK:   [[SEL2:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.2, align 8, !invariant.load ![[MD_NUM]]
+// CHECK:   call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %struct.Quad2*)*)(i8* noundef [[OBJ2]], i8* noundef [[SEL2]], %struct.Quad2* noundef nonnull align 1 [[CALL]])
 
 
 struct A {
@@ -47,7 +47,7 @@ void test(C *c, const A &a) {
 }
 
 // CHECK:   [[ONE1:%.*]] = load %struct.A*, %struct.A** [[AADDR:%.*]], align 8
-// CHECK:   [[TWO1:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.5, align 8, !invariant.load ![[MD_NUM]]
-// CHECK:   [[THREE1:%.*]] = bitcast [[TWOT:%.*]]* [[ZERO1:%.*]] to i8*
-// CHECK:   call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %struct.A*)*)(i8* noundef [[THREE1]], i8* noundef [[TWO1]], %struct.A* noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[ONE1]])
+// CHECK:   [[OBJ3:%.*]] = bitcast [[TWOT:%.*]]* [[ZERO1:%.*]] to i8*
+// CHECK:   [[SEL3:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_.5, align 8, !invariant.load ![[MD_NUM]]
+// CHECK:   call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %struct.A*)*)(i8* noundef [[OBJ3]], i8* noundef [[SEL3]], %struct.A* noundef nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[ONE1]])
 // CHECK:   store %struct.A* [[ONE1]], %struct.A** [[RESULT:%.*]], align 8
diff --git a/clang/test/CodeGenObjCXX/property-object-reference.mm b/clang/test/CodeGenObjCXX/property-object-reference.mm
index 766b3d32ca7d4..ee4d949b6f890 100644
--- a/clang/test/CodeGenObjCXX/property-object-reference.mm
+++ b/clang/test/CodeGenObjCXX/property-object-reference.mm
@@ -25,11 +25,11 @@ - (void)setFooProperty : (Foo &) arg {  };
 @end
 
 // CHECK: [[T0:%.*]] = load {{%.*}} [[S0:%.*]]
-// CHECK: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK:  [[T2:%.*]]  = bitcast {{%.*}} [[T0]] to i8*
+// CHECK: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK:  @objc_msgSend
 // CHECK: [[R0:%.*]] = load {{%.*}} [[U0:%.*]]
-// CHECK: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK:  [[R2:%.*]]  = bitcast {{%.*}} [[R0]] to i8*
+// CHECK: load i8*, i8** @OBJC_SELECTOR_REFERENCES_
 // CHECK:  @objc_msgSend
 

From 836950c4e602548e47851b66a261472f124994fa Mon Sep 17 00:00:00 2001
From: Evgeny Shulgin <izaronplatz@gmail.com>
Date: Wed, 26 Jan 2022 17:55:14 -0700
Subject: [PATCH 835/946] [clang-tidy] Fix nested namespaces in
 `readability-static-definition-in-anonymous-namespace` check

The check previously inspected only the immediate parent namespace.
`static` in a named namespace within an unnamed namespace is still
redundant.
We will use `Decl::isInAnonymousNamespace()` method that traverses the
namespaces hierarchy recursively.

Differential Revision: https://reviews.llvm.org/D118010
---
 .../StaticDefinitionInAnonymousNamespaceCheck.cpp |  6 +++++-
 ...y-static-definition-in-anonymous-namespace.rst |  5 ++++-
 ...y-static-definition-in-anonymous-namespace.cpp | 15 +++++++++++++++
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.cpp b/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.cpp
index ef973a1927b07..b76c1837b8019 100644
--- a/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/StaticDefinitionInAnonymousNamespaceCheck.cpp
@@ -17,12 +17,16 @@ namespace clang {
 namespace tidy {
 namespace readability {
 
+AST_MATCHER(NamedDecl, isInAnonymousNamespace) {
+  return Node.isInAnonymousNamespace();
+}
+
 void StaticDefinitionInAnonymousNamespaceCheck::registerMatchers(
     MatchFinder *Finder) {
   Finder->addMatcher(
       namedDecl(anyOf(functionDecl(isDefinition(), isStaticStorageClass()),
                       varDecl(isDefinition(), isStaticStorageClass())),
-                hasParent(namespaceDecl(isAnonymous())))
+                isInAnonymousNamespace())
           .bind("static-def"),
       this);
 }
diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability-static-definition-in-anonymous-namespace.rst b/clang-tools-extra/docs/clang-tidy/checks/readability-static-definition-in-anonymous-namespace.rst
index c1803d4b13052..91d40bd713ee2 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/readability-static-definition-in-anonymous-namespace.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/readability-static-definition-in-anonymous-namespace.rst
@@ -12,7 +12,10 @@ visibility of definitions to a single translation unit.
 
   namespace {
     static int a = 1; // Warning.
-    static const b = 1; // Warning.
+    static const int b = 1; // Warning.
+    namespace inner {
+      static int c = 1; // Warning.
+    }
   }
 
 The check will apply a fix by removing the redundant ``static`` qualifier.
diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability-static-definition-in-anonymous-namespace.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability-static-definition-in-anonymous-namespace.cpp
index 5c3c8c12d683c..e9938db4f5b83 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/readability-static-definition-in-anonymous-namespace.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/readability-static-definition-in-anonymous-namespace.cpp
@@ -36,6 +36,21 @@ DEFINE_STATIC int h = 1;
 DEFINE_STATIC_VAR(i);
 // CHECK-FIXES: {{^}}DEFINE_STATIC_VAR(i);
 
+namespace inner {
+int a = 1;
+const int b = 1;
+static int c = 1;
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: 'c' is a static definition in anonymous namespace; static is redundant here [readability-static-definition-in-anonymous-namespace]
+// CHECK-FIXES: {{^}}int c = 1;
+namespace deep_inner {
+int a = 1;
+const int b = 1;
+static int c = 1;
+// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: 'c' is a static definition in anonymous namespace; static is redundant here [readability-static-definition-in-anonymous-namespace]
+// CHECK-FIXES: {{^}}int c = 1;
+} // namespace deep_inner
+} // namespace inner
+
 } // namespace
 
 namespace N {

From 63839854baa2a9990bccb6b2a4cc3d4ec0158c96 Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Thu, 27 Jan 2022 12:13:07 +0700
Subject: [PATCH 836/946] [Test] Add one more add-reduce test w/ pointer
 bitcasts

---
 .../InstCombine/reduction-and-sext-zext-i1.ll | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll b/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll
index e9204e2d5b46f..e167ec4488ccd 100644
--- a/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll
+++ b/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll
@@ -95,6 +95,28 @@ define i64 @reduce_and_zext_external_use(<8 x i1> %x) {
   ret i64 %res
 }
 
+define i1 @reduce_and_pointer_cast(i8* %arg, i8* %arg1) {
+; CHECK-LABEL: @reduce_and_pointer_cast(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i8>*
+; CHECK-NEXT:    [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i8>*
+; CHECK-NEXT:    [[LHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR1]], align 8
+; CHECK-NEXT:    [[RHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR2]], align 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i8> [[LHS]], [[RHS]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+bb:
+  %ptr1 = bitcast i8* %arg1 to <8 x i8>*
+  %ptr2 = bitcast i8* %arg to <8 x i8>*
+  %lhs = load <8 x i8>, <8 x i8>* %ptr1
+  %rhs = load <8 x i8>, <8 x i8>* %ptr2
+  %cmp = icmp eq <8 x i8> %lhs, %rhs
+  %all_eq = call i1 @llvm.vector.reduce.and.v8i32(<8 x i1> %cmp)
+  ret i1 %all_eq
+}
+
 declare i1 @llvm.vector.reduce.and.v8i32(<8 x i1> %a)
 declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
 declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %a)

From 2d566637f4404c01103fad2089592768be7e64e5 Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Thu, 27 Jan 2022 12:26:19 +0700
Subject: [PATCH 837/946] [Test] Add test for or-reduce idiom symmetrical to
 and-reduce

---
 .../InstCombine/reduction-or-sext-zext-i1.ll  | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll b/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll
index a32c5ca676909..a4b39ec7b66c7 100644
--- a/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll
+++ b/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll
@@ -95,6 +95,29 @@ define i64 @reduce_or_zext_external_use(<8 x i1> %x) {
   ret i64 %res
 }
 
+define i1 @reduce_or_pointer_cast(i8* %arg, i8* %arg1) {
+; CHECK-LABEL: @reduce_or_pointer_cast(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i8>*
+; CHECK-NEXT:    [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i8>*
+; CHECK-NEXT:    [[LHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR1]], align 8
+; CHECK-NEXT:    [[RHS:%.*]] = load <8 x i8>, <8 x i8>* [[PTR2]], align 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i8> [[LHS]], [[RHS]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT:    ret i1 [[DOTNOT]]
+;
+bb:
+  %ptr1 = bitcast i8* %arg1 to <8 x i8>*
+  %ptr2 = bitcast i8* %arg to <8 x i8>*
+  %lhs = load <8 x i8>, <8 x i8>* %ptr1
+  %rhs = load <8 x i8>, <8 x i8>* %ptr2
+  %cmp = icmp ne <8 x i8> %lhs, %rhs
+  %any_ne = call i1 @llvm.vector.reduce.or.v8i32(<8 x i1> %cmp)
+  %all_eq = xor i1 %any_ne, 1
+  ret i1 %all_eq
+}
+
 declare i1 @llvm.vector.reduce.or.v8i32(<8 x i1> %a)
 declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
 declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %a)

From f3e1f44340dc26e3810d601edf0e052813b7a11c Mon Sep 17 00:00:00 2001
From: Congzhe Cao <congzhe.cao@huawei.com>
Date: Thu, 27 Jan 2022 00:24:12 -0500
Subject: [PATCH 838/946] [IVDescriptor] Get the exact FP instruction that does
 not allow reordering

This is a bugfix in IVDescriptor.cpp.

The helper function `RecurrenceDescriptor::getExactFPMathInst()`
is supposed to return the 1st FP instruction that does not allow
reordering. However, when constructing the RecurrenceDescriptor,
we trace the use-def chain staring from a PHI node and for each
instruction in the use-def chain, its descriptor overrides the
previous one. Therefore in the final RecurrenceDescriptor we
constructed, we lose previous FP instructions that does not allow
reordering.

Reviewed By: kmclaughlin

Differential Revision: https://reviews.llvm.org/D118073
---
 llvm/lib/Analysis/IVDescriptors.cpp           | 16 ++++--
 .../LoopVectorize/AArch64/strict-fadd.ll      | 49 +++++++++++++++++++
 2 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index e40b08b322637..44b1d94ebdc80 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -309,6 +309,10 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
   // flags from all the reduction operations.
   FastMathFlags FMF = FastMathFlags::getFast();
 
+  // The first instruction in the use-def chain of the Phi node that requires
+  // exact floating point operations.
+  Instruction *ExactFPMathInst = nullptr;
+
   // A value in the reduction can be used:
   //  - By the reduction:
   //      - Reduction operation:
@@ -352,6 +356,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
     if (Cur != Start) {
       ReduxDesc =
           isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF);
+      ExactFPMathInst = ExactFPMathInst == nullptr
+                            ? ReduxDesc.getExactFPMathInst()
+                            : ExactFPMathInst;
       if (!ReduxDesc.isRecurrence())
         return false;
       // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
@@ -480,8 +487,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
   if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
     return false;
 
-  const bool IsOrdered = checkOrderedReduction(
-      Kind, ReduxDesc.getExactFPMathInst(), ExitInstruction, Phi);
+  const bool IsOrdered =
+      checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);
 
   if (Start != Phi) {
     // If the starting value is not the same as the phi node, we speculatively
@@ -538,9 +545,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
   // is saved as part of the RecurrenceDescriptor.
 
   // Save the description of this reduction variable.
-  RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF,
-                          ReduxDesc.getExactFPMathInst(), RecurrenceType,
-                          IsSigned, IsOrdered, CastInsts,
+  RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ExactFPMathInst,
+                          RecurrenceType, IsSigned, IsOrdered, CastInsts,
                           MinWidthCastToRecurrenceType);
   RedDes = RD;
 
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
index 3aa88d7fc18c1..a0dba427ce3a5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd.ll
@@ -585,6 +585,55 @@ for.end:                                         ; preds = %for.body
   ret float %rdx
 }
 
+; Negative test - loop contains two fadds and only one fadd has the fast flag,
+; which we cannot safely reorder.
+define float @fadd_multiple_one_flag(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) {
+; CHECK-ORDERED-LABEL: @fadd_multiple_one_flag
+; CHECK-ORDERED-NOT: vector.body
+
+; CHECK-UNORDERED-LABEL: @fadd_multiple_one_flag
+; CHECK-UNORDERED: vector.body
+; CHECK-UNORDERED: %[[PHI:.*]] = phi <8 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ]
+; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <8 x float>, <8 x float>
+; CHECK-UNORDERED: %[[VEC_FADD1:.*]] = fadd <8 x float> %[[PHI]], %[[VEC_LOAD1]]
+; CHECK-UNORDERED: %[[VEC_LOAD2:.*]] = load <8 x float>, <8 x float>
+; CHECK-UNORDERED: %[[VEC_FADD2]] = fadd fast <8 x float> %[[VEC_FADD1]], %[[VEC_LOAD2]]
+; CHECK-UNORDERED: middle.block
+; CHECK-UNORDERED: %[[RDX:.*]] = call float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> %[[VEC_FADD2]])
+; CHECK-UNORDERED: for.body
+; CHECK-UNORDERED: %[[SUM:.*]] = phi float [ %bc.merge.rdx, %scalar.ph ], [ %[[FADD2:.*]], %for.body ]
+; CHECK-UNORDERED: %[[LOAD1:.*]] = load float, float*
+; CHECK-UNORDERED: %[[FADD1:.*]] = fadd float %sum, %[[LOAD1]]
+; CHECK-UNORDERED: %[[LOAD2:.*]] = load float, float*
+; CHECK-UNORDERED: %[[FADD2]] = fadd fast float %[[FADD1]], %[[LOAD2]]
+; CHECK-UNORDERED: for.end
+; CHECK-UNORDERED: %[[RET:.*]] = phi float [ %[[FADD2]], %for.body ], [ %[[RDX]], %middle.block ]
+; CHECK-UNORDERED: ret float %[[RET]]
+
+; CHECK-NOT-VECTORIZED-LABEL: @fadd_multiple_one_flag
+; CHECK-NOT-VECTORIZED-NOT: vector.body
+
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %sum = phi float [ -0.000000e+00, %entry ], [ %add3, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %iv
+  %0 = load float, float* %arrayidx, align 4
+  %add = fadd float %sum, %0
+  %arrayidx2 = getelementptr inbounds float, float* %b, i64 %iv
+  %1 = load float, float* %arrayidx2, align 4
+  %add3 = fadd fast float %add, %1
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %n
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:                                         ; preds = %for.body
+  %rdx = phi float [ %add3, %for.body ]
+  ret float %rdx
+}
+
 ; Tests with both a floating point reduction & induction, e.g.
 ;
 ;float fp_iv_rdx_loop(float *values, float init, float * __restrict__ A, int N) {

From f4a368689f34a8ae779e96097e4b18958f1659fa Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Mon, 24 Jan 2022 14:30:03 -0800
Subject: [PATCH 839/946] [mlir:PassOptions] Fix parsing of nested option
 values/better handle escaping

The option parser currently does not properly handle nested options, meaning that
in some cases we can print pass pipelines that we can't actually parse back in. For example,
from #52885 we currently can't parse in inliner pipelines that have nested pipeline strings.

This commit adds handling for string values (e.g. "...") and nested options
(e.g. `foo{baz{bar=10 pizza=11}}`).

Fixes #52885

Differential Revision: https://reviews.llvm.org/D118078
---
 mlir/lib/Pass/PassRegistry.cpp               | 98 +++++++++++++++-----
 mlir/test/Pass/pipeline-options-parsing.mlir |  4 +-
 2 files changed, 77 insertions(+), 25 deletions(-)

diff --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp
index 6f92e2d80b4e3..58870a6ea93b7 100644
--- a/mlir/lib/Pass/PassRegistry.cpp
+++ b/mlir/lib/Pass/PassRegistry.cpp
@@ -154,35 +154,87 @@ void detail::PassOptions::copyOptionValuesFrom(const PassOptions &other) {
     std::get<0>(optionsIt)->copyValueFrom(*std::get<1>(optionsIt));
 }
 
+/// Parse in the next argument from the given options string. Returns a tuple
+/// containing [the key of the option, the value of the option, updated
+/// `options` string pointing after the parsed option].
+static std::tuple<StringRef, StringRef, StringRef>
+parseNextArg(StringRef options) {
+  // Functor used to extract an argument from 'options' and update it to point
+  // after the arg.
+  auto extractArgAndUpdateOptions = [&](size_t argSize) {
+    StringRef str = options.take_front(argSize).trim();
+    options = options.drop_front(argSize).ltrim();
+    return str;
+  };
+  // Try to process the given punctuation, properly escaping any contained
+  // characters.
+  auto tryProcessPunct = [&](size_t &currentPos, char punct) {
+    if (options[currentPos] != punct)
+      return false;
+    size_t nextIt = options.find_first_of(punct, currentPos + 1);
+    if (nextIt != StringRef::npos)
+      currentPos = nextIt;
+    return true;
+  };
+
+  // Parse the argument name of the option.
+  StringRef argName;
+  for (size_t argEndIt = 0, optionsE = options.size();; ++argEndIt) {
+    // Check for the end of the full option.
+    if (argEndIt == optionsE || options[argEndIt] == ' ') {
+      argName = extractArgAndUpdateOptions(argEndIt);
+      return std::make_tuple(argName, StringRef(), options);
+    }
+
+    // Check for the end of the name and the start of the value.
+    if (options[argEndIt] == '=') {
+      argName = extractArgAndUpdateOptions(argEndIt);
+      options = options.drop_front();
+      break;
+    }
+  }
+
+  // Parse the value of the option.
+  for (size_t argEndIt = 0, optionsE = options.size();; ++argEndIt) {
+    // Handle the end of the options string.
+    if (argEndIt == optionsE || options[argEndIt] == ' ') {
+      StringRef value = extractArgAndUpdateOptions(argEndIt);
+      return std::make_tuple(argName, value, options);
+    }
+
+    // Skip over escaped sequences.
+    char c = options[argEndIt];
+    if (tryProcessPunct(argEndIt, '\'') || tryProcessPunct(argEndIt, '"'))
+      continue;
+    // '{...}' is used to specify options to passes, properly escape it so
+    // that we don't accidentally split any nested options.
+    if (c == '{') {
+      size_t braceCount = 1;
+      for (++argEndIt; argEndIt != optionsE; ++argEndIt) {
+        // Allow nested punctuation.
+        if (tryProcessPunct(argEndIt, '\'') || tryProcessPunct(argEndIt, '"'))
+          continue;
+        if (options[argEndIt] == '{')
+          ++braceCount;
+        else if (options[argEndIt] == '}' && --braceCount == 0)
+          break;
+      }
+      // Account for the increment at the top of the loop.
+      --argEndIt;
+    }
+  }
+  llvm_unreachable("unexpected control flow in pass option parsing");
+}
+
 LogicalResult detail::PassOptions::parseFromString(StringRef options) {
-  // TODO: Handle escaping strings.
   // NOTE: `options` is modified in place to always refer to the unprocessed
   // part of the string.
   while (!options.empty()) {
-    size_t spacePos = options.find(' ');
-    StringRef arg = options;
-    if (spacePos != StringRef::npos) {
-      arg = options.substr(0, spacePos);
-      options = options.substr(spacePos + 1);
-    } else {
-      options = StringRef();
-    }
-    if (arg.empty())
+    StringRef key, value;
+    std::tie(key, value, options) = parseNextArg(options);
+    if (key.empty())
       continue;
 
-    // At this point, arg refers to everything that is non-space in options
-    // upto the next space, and options refers to the rest of the string after
-    // that point.
-
-    // Split the individual option on '=' to form key and value. If there is no
-    // '=', then value is `StringRef()`.
-    size_t equalPos = arg.find('=');
-    StringRef key = arg;
-    StringRef value;
-    if (equalPos != StringRef::npos) {
-      key = arg.substr(0, equalPos);
-      value = arg.substr(equalPos + 1);
-    }
     auto it = OptionsMap.find(key);
     if (it == OptionsMap.end()) {
       llvm::errs() << "<Pass-Options-Parser>: no such option " << key << "\n";
diff --git a/mlir/test/Pass/pipeline-options-parsing.mlir b/mlir/test/Pass/pipeline-options-parsing.mlir
index 7fe3f76326847..ab0e482373ff7 100644
--- a/mlir/test/Pass/pipeline-options-parsing.mlir
+++ b/mlir/test/Pass/pipeline-options-parsing.mlir
@@ -3,7 +3,7 @@
 // RUN: not mlir-opt %s -pass-pipeline='builtin.module(builtin.func(test-options-pass{list=3}), test-module-pass{invalid-option=3})' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_3 %s
 // RUN: not mlir-opt %s -pass-pipeline='test-options-pass{list=3 list=notaninteger}' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_4 %s
 // RUN: not mlir-opt %s -pass-pipeline='builtin.func(test-options-pass{list=1,2,3,4 list=5 string=value1 string=value2})' 2>&1 | FileCheck --check-prefix=CHECK_ERROR_5 %s
-// RUN: mlir-opt %s -verify-each=false -pass-pipeline='builtin.func(test-options-pass{string-list=a list=1,2,3,4 string-list=b,c list=5 string-list=d string=some_value})' -test-dump-pipeline 2>&1 | FileCheck --check-prefix=CHECK_1 %s
+// RUN: mlir-opt %s -verify-each=false -pass-pipeline='builtin.func(test-options-pass{string-list=a list=1,2,3,4 string-list=b,c list=5 string-list=d string=nested_pipeline{arg1=10 arg2=" {} " arg3=true}})' -test-dump-pipeline 2>&1 | FileCheck --check-prefix=CHECK_1 %s
 // RUN: mlir-opt %s -verify-each=false -test-options-pass-pipeline='list=1 string-list=a,b' -test-dump-pipeline 2>&1 | FileCheck --check-prefix=CHECK_2 %s
 // RUN: mlir-opt %s -verify-each=false -pass-pipeline='builtin.module(builtin.func(test-options-pass{list=3}), builtin.func(test-options-pass{list=1,2,3,4}))' -test-dump-pipeline 2>&1 | FileCheck --check-prefix=CHECK_3 %s
 
@@ -13,6 +13,6 @@
 // CHECK_ERROR_4: 'notaninteger' value invalid for integer argument
 // CHECK_ERROR_5: string option: may only occur zero or one times
 
-// CHECK_1: test-options-pass{list=1,2,3,4,5 string=some_value string-list=a,b,c,d}
+// CHECK_1: test-options-pass{list=1,2,3,4,5 string=nested_pipeline{arg1=10 arg2=" {} " arg3=true} string-list=a,b,c,d}
 // CHECK_2: test-options-pass{list=1 string= string-list=a,b}
 // CHECK_3: builtin.module(builtin.func(test-options-pass{list=3 string= }), builtin.func(test-options-pass{list=1,2,3,4 string= }))

From e3cd80ea9f0ac0d04f537feb70d8f9a1c7875863 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Mon, 24 Jan 2022 15:18:04 -0800
Subject: [PATCH 840/946] [mlir:Function][NFC] Use BitVector instead of
 ArrayRef for indices when removing arguments/results

We already convert to BitVector internally, and other APIs (namely Operation::eraseOperands)
already use BitVector as well. Switching over provides a common format between
API and also reduces the amount of format conversions necessary.

Fixes #53325

Differential Revision: https://reviews.llvm.org/D118083
---
 mlir/include/mlir/IR/BuiltinTypes.h           |  1 +
 mlir/include/mlir/IR/BuiltinTypes.td          |  4 +-
 mlir/include/mlir/IR/FunctionInterfaces.h     | 11 +++--
 mlir/include/mlir/IR/FunctionInterfaces.td    | 47 ++++++++++++-------
 .../Transforms/BufferResultsToOutParams.cpp   | 10 ++--
 mlir/lib/IR/BuiltinTypes.cpp                  |  4 +-
 mlir/lib/IR/FunctionInterfaces.cpp            | 45 ++++++------------
 mlir/test/lib/IR/TestFunc.cpp                 | 32 ++++---------
 8 files changed, 70 insertions(+), 84 deletions(-)

diff --git a/mlir/include/mlir/IR/BuiltinTypes.h b/mlir/include/mlir/IR/BuiltinTypes.h
index e087e6bf55d8c..b03d9ea9f575d 100644
--- a/mlir/include/mlir/IR/BuiltinTypes.h
+++ b/mlir/include/mlir/IR/BuiltinTypes.h
@@ -13,6 +13,7 @@
 #include "SubElementInterfaces.h"
 
 namespace llvm {
+class BitVector;
 struct fltSemantics;
 } // namespace llvm
 
diff --git a/mlir/include/mlir/IR/BuiltinTypes.td b/mlir/include/mlir/IR/BuiltinTypes.td
index b6f90f92190a8..a78413548ba82 100644
--- a/mlir/include/mlir/IR/BuiltinTypes.td
+++ b/mlir/include/mlir/IR/BuiltinTypes.td
@@ -166,8 +166,8 @@ def Builtin_Function : Builtin_Type<"Function", [
                                        TypeRange resultTypes);
 
     /// Returns a new function type without the specified arguments and results.
-    FunctionType getWithoutArgsAndResults(ArrayRef<unsigned> argIndices,
-                                          ArrayRef<unsigned> resultIndices);
+    FunctionType getWithoutArgsAndResults(const llvm::BitVector &argIndices,
+                                          const llvm::BitVector &resultIndices);
   }];
 }
 
diff --git a/mlir/include/mlir/IR/FunctionInterfaces.h b/mlir/include/mlir/IR/FunctionInterfaces.h
index b81abceef6149..b6d6a9515ffe0 100644
--- a/mlir/include/mlir/IR/FunctionInterfaces.h
+++ b/mlir/include/mlir/IR/FunctionInterfaces.h
@@ -16,6 +16,7 @@
 
 #include "mlir/IR/BuiltinTypes.h"
 #include "mlir/IR/OpDefinition.h"
+#include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallString.h"
 
 namespace mlir {
@@ -82,12 +83,12 @@ void insertFunctionResults(Operation *op, ArrayRef<unsigned> resultIndices,
                            unsigned originalNumResults, Type newType);
 
 /// Erase the specified arguments and update the function type attribute.
-void eraseFunctionArguments(Operation *op, ArrayRef<unsigned> argIndices,
-                            unsigned originalNumArgs, Type newType);
+void eraseFunctionArguments(Operation *op, const llvm::BitVector &argIndices,
+                            Type newType);
 
 /// Erase the specified results and update the function type attribute.
-void eraseFunctionResults(Operation *op, ArrayRef<unsigned> resultIndices,
-                          unsigned originalNumResults, Type newType);
+void eraseFunctionResults(Operation *op, const llvm::BitVector &resultIndices,
+                          Type newType);
 
 /// Set a FunctionOpInterface operation's type signature.
 void setFunctionType(Operation *op, Type newType);
@@ -100,7 +101,7 @@ TypeRange insertTypesInto(TypeRange oldTypes, ArrayRef<unsigned> indices,
 
 /// Filters out any elements referenced by `indices`. If any types are removed,
 /// `storage` is used to hold the new type list. Returns the new type list.
-TypeRange filterTypesOut(TypeRange types, ArrayRef<unsigned> indices,
+TypeRange filterTypesOut(TypeRange types, const llvm::BitVector &indices,
                          SmallVectorImpl<Type> &storage);
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/IR/FunctionInterfaces.td b/mlir/include/mlir/IR/FunctionInterfaces.td
index 20c7d7bbd51b7..124f6594081f7 100644
--- a/mlir/include/mlir/IR/FunctionInterfaces.td
+++ b/mlir/include/mlir/IR/FunctionInterfaces.td
@@ -280,27 +280,31 @@ def FunctionOpInterface : OpInterface<"FunctionOpInterface"> {
     }
 
     /// Erase a single argument at `argIndex`.
-    void eraseArgument(unsigned argIndex) { eraseArguments({argIndex}); }
+    void eraseArgument(unsigned argIndex) {
+      llvm::BitVector argsToErase($_op.getNumArguments());
+      argsToErase.set(argIndex);
+      eraseArguments(argsToErase);
+    }
 
     /// Erases the arguments listed in `argIndices`.
-    /// `argIndices` is allowed to have duplicates and can be in any order.
-    void eraseArguments(ArrayRef<unsigned> argIndices) {
-      unsigned originalNumArgs = $_op.getNumArguments();
-      Type newType = $_op.getTypeWithoutArgsAndResults(argIndices, {});
-      function_interface_impl::eraseFunctionArguments(this->getOperation(), argIndices,
-                                                originalNumArgs, newType);
+    void eraseArguments(const llvm::BitVector &argIndices) {
+      Type newType = $_op.getTypeWithoutArgs(argIndices);
+      function_interface_impl::eraseFunctionArguments(
+        this->getOperation(), argIndices, newType);
     }
 
     /// Erase a single result at `resultIndex`.
-    void eraseResult(unsigned resultIndex) { eraseResults({resultIndex}); }
+    void eraseResult(unsigned resultIndex) {
+      llvm::BitVector resultsToErase($_op.getNumResults());
+      resultsToErase.set(resultIndex);
+      eraseResults(resultsToErase);
+    }
 
     /// Erases the results listed in `resultIndices`.
-    /// `resultIndices` is allowed to have duplicates and can be in any order.
-    void eraseResults(ArrayRef<unsigned> resultIndices) {
-      unsigned originalNumResults = $_op.getNumResults();
-      Type newType = $_op.getTypeWithoutArgsAndResults({}, resultIndices);
+    void eraseResults(const llvm::BitVector &resultIndices) {
+      Type newType = $_op.getTypeWithoutResults(resultIndices);
       function_interface_impl::eraseFunctionResults(
-          this->getOperation(), resultIndices, originalNumResults, newType);
+          this->getOperation(), resultIndices, newType);
     }
 
     /// Return the type of this function with the specified arguments and
@@ -320,10 +324,9 @@ def FunctionOpInterface : OpInterface<"FunctionOpInterface"> {
 
     /// Return the type of this function without the specified arguments and
     /// results. This is used to update the function's signature in the
-    /// `eraseArguments` and `eraseResults` methods. The arrays of indices are
-    /// allowed to have duplicates and can be in any order.
+    /// `eraseArguments` and `eraseResults` methods.
     Type getTypeWithoutArgsAndResults(
-      ArrayRef<unsigned> argIndices, ArrayRef<unsigned> resultIndices) {
+      const llvm::BitVector &argIndices, const llvm::BitVector &resultIndices) {
       SmallVector<Type> argStorage, resultStorage;
       TypeRange newArgTypes = function_interface_impl::filterTypesOut(
           $_op.getArgumentTypes(), argIndices, argStorage);
@@ -331,6 +334,18 @@ def FunctionOpInterface : OpInterface<"FunctionOpInterface"> {
           $_op.getResultTypes(), resultIndices, resultStorage);
       return $_op.cloneTypeWith(newArgTypes, newResultTypes);
     }
+    Type getTypeWithoutArgs(const llvm::BitVector &argIndices) {
+      SmallVector<Type> argStorage;
+      TypeRange newArgTypes = function_interface_impl::filterTypesOut(
+          $_op.getArgumentTypes(), argIndices, argStorage);
+      return $_op.cloneTypeWith(newArgTypes, $_op.getResultTypes());
+    }
+    Type getTypeWithoutResults(const llvm::BitVector &resultIndices) {
+      SmallVector<Type> resultStorage;
+      TypeRange newResultTypes = function_interface_impl::filterTypesOut(
+          $_op.getResultTypes(), resultIndices, resultStorage);
+      return $_op.cloneTypeWith($_op.getArgumentTypes(), newResultTypes);
+    }
 
     //===------------------------------------------------------------------===//
     // Argument Attributes
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
index 08780db5f94df..585e873b41881 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
@@ -24,10 +24,10 @@ static void updateFuncOp(FuncOp func,
 
   // Collect information about the results will become appended arguments.
   SmallVector<Type, 6> erasedResultTypes;
-  SmallVector<unsigned, 6> erasedResultIndices;
+  llvm::BitVector erasedResultIndices(functionType.getNumResults());
   for (const auto &resultType : llvm::enumerate(functionType.getResults())) {
     if (resultType.value().isa<BaseMemRefType>()) {
-      erasedResultIndices.push_back(resultType.index());
+      erasedResultIndices.set(resultType.index());
       erasedResultTypes.push_back(resultType.value());
     }
   }
@@ -40,9 +40,11 @@ static void updateFuncOp(FuncOp func,
   func.setType(newFunctionType);
 
   // Transfer the result attributes to arg attributes.
-  for (int i = 0, e = erasedResultTypes.size(); i < e; i++)
+  auto erasedIndicesIt = erasedResultIndices.set_bits_begin();
+  for (int i = 0, e = erasedResultTypes.size(); i < e; ++i, ++erasedIndicesIt) {
     func.setArgAttrs(functionType.getNumInputs() + i,
-                     func.getResultAttrs(erasedResultIndices[i]));
+                     func.getResultAttrs(*erasedIndicesIt));
+  }
 
   // Erase the results.
   func.eraseResults(erasedResultIndices);
diff --git a/mlir/lib/IR/BuiltinTypes.cpp b/mlir/lib/IR/BuiltinTypes.cpp
index 46166f16f1686..63a5962803900 100644
--- a/mlir/lib/IR/BuiltinTypes.cpp
+++ b/mlir/lib/IR/BuiltinTypes.cpp
@@ -172,8 +172,8 @@ FunctionType FunctionType::getWithArgsAndResults(
 
 /// Returns a new function type without the specified arguments and results.
 FunctionType
-FunctionType::getWithoutArgsAndResults(ArrayRef<unsigned> argIndices,
-                                       ArrayRef<unsigned> resultIndices) {
+FunctionType::getWithoutArgsAndResults(const llvm::BitVector &argIndices,
+                                       const llvm::BitVector &resultIndices) {
   SmallVector<Type> argStorage, resultStorage;
   TypeRange newArgTypes = function_interface_impl::filterTypesOut(
       getInputs(), argIndices, argStorage);
diff --git a/mlir/lib/IR/FunctionInterfaces.cpp b/mlir/lib/IR/FunctionInterfaces.cpp
index 07da5ce1716f1..4f31c59f3f69e 100644
--- a/mlir/lib/IR/FunctionInterfaces.cpp
+++ b/mlir/lib/IR/FunctionInterfaces.cpp
@@ -7,26 +7,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/IR/FunctionInterfaces.h"
-#include "mlir/Support/LLVM.h"
-#include "llvm/ADT/BitVector.h"
 
 using namespace mlir;
 
-/// Helper to call a callback once on each index in the range
-/// [0, `totalIndices`), *except* for the indices given in `indices`.
-/// `indices` is allowed to have duplicates and can be in any order.
-inline static void iterateIndicesExcept(unsigned totalIndices,
-                                        ArrayRef<unsigned> indices,
-                                        function_ref<void(unsigned)> callback) {
-  llvm::BitVector skipIndices(totalIndices);
-  for (unsigned i : indices)
-    skipIndices.set(i);
-
-  for (unsigned i = 0; i < totalIndices; ++i)
-    if (!skipIndices.test(i))
-      callback(i);
-}
-
 //===----------------------------------------------------------------------===//
 // Tablegen Interface Definitions
 //===----------------------------------------------------------------------===//
@@ -217,8 +200,7 @@ void mlir::function_interface_impl::insertFunctionResults(
 }
 
 void mlir::function_interface_impl::eraseFunctionArguments(
-    Operation *op, ArrayRef<unsigned> argIndices, unsigned originalNumArgs,
-    Type newType) {
+    Operation *op, const llvm::BitVector &argIndices, Type newType) {
   // There are 3 things that need to be updated:
   // - Function type.
   // - Arg attrs.
@@ -229,9 +211,9 @@ void mlir::function_interface_impl::eraseFunctionArguments(
   if (auto argAttrs = op->getAttrOfType<ArrayAttr>(getArgDictAttrName())) {
     SmallVector<DictionaryAttr, 4> newArgAttrs;
     newArgAttrs.reserve(argAttrs.size());
-    iterateIndicesExcept(originalNumArgs, argIndices, [&](unsigned i) {
-      newArgAttrs.emplace_back(argAttrs[i].cast<DictionaryAttr>());
-    });
+    for (unsigned i = 0, e = argIndices.size(); i < e; ++i)
+      if (!argIndices[i])
+        newArgAttrs.emplace_back(argAttrs[i].cast<DictionaryAttr>());
     setAllArgAttrDicts(op, newArgAttrs);
   }
 
@@ -241,8 +223,7 @@ void mlir::function_interface_impl::eraseFunctionArguments(
 }
 
 void mlir::function_interface_impl::eraseFunctionResults(
-    Operation *op, ArrayRef<unsigned> resultIndices,
-    unsigned originalNumResults, Type newType) {
+    Operation *op, const llvm::BitVector &resultIndices, Type newType) {
   // There are 2 things that need to be updated:
   // - Function type.
   // - Result attrs.
@@ -251,9 +232,9 @@ void mlir::function_interface_impl::eraseFunctionResults(
   if (auto resAttrs = op->getAttrOfType<ArrayAttr>(getResultDictAttrName())) {
     SmallVector<DictionaryAttr, 4> newResultAttrs;
     newResultAttrs.reserve(resAttrs.size());
-    iterateIndicesExcept(originalNumResults, resultIndices, [&](unsigned i) {
-      newResultAttrs.emplace_back(resAttrs[i].cast<DictionaryAttr>());
-    });
+    for (unsigned i = 0, e = resultIndices.size(); i < e; ++i)
+      if (!resultIndices[i])
+        newResultAttrs.emplace_back(resAttrs[i].cast<DictionaryAttr>());
     setAllResultAttrDicts(op, newResultAttrs);
   }
 
@@ -282,12 +263,14 @@ TypeRange mlir::function_interface_impl::insertTypesInto(
 
 TypeRange
 mlir::function_interface_impl::filterTypesOut(TypeRange types,
-                                              ArrayRef<unsigned> indices,
+                                              const llvm::BitVector &indices,
                                               SmallVectorImpl<Type> &storage) {
-  if (indices.empty())
+  if (indices.none())
     return types;
-  iterateIndicesExcept(types.size(), indices,
-                       [&](unsigned i) { storage.emplace_back(types[i]); });
+
+  for (unsigned i = 0, e = types.size(); i < e; ++i)
+    if (!indices[i])
+      storage.emplace_back(types[i]);
   return storage;
 }
 
diff --git a/mlir/test/lib/IR/TestFunc.cpp b/mlir/test/lib/IR/TestFunc.cpp
index dee9f8a5b2e52..0b4ce1d05992e 100644
--- a/mlir/test/lib/IR/TestFunc.cpp
+++ b/mlir/test/lib/IR/TestFunc.cpp
@@ -87,18 +87,10 @@ struct TestFuncEraseArg
     auto module = getOperation();
 
     for (FuncOp func : module.getOps<FuncOp>()) {
-      SmallVector<unsigned, 4> indicesToErase;
-      for (auto argIndex : llvm::seq<int>(0, func.getNumArguments())) {
-        if (func.getArgAttr(argIndex, "test.erase_this_arg")) {
-          // Push back twice to test that duplicate arg indices are handled
-          // correctly.
-          indicesToErase.push_back(argIndex);
-          indicesToErase.push_back(argIndex);
-        }
-      }
-      // Reverse the order to test that unsorted index lists are handled
-      // correctly.
-      std::reverse(indicesToErase.begin(), indicesToErase.end());
+      llvm::BitVector indicesToErase(func.getNumArguments());
+      for (auto argIndex : llvm::seq<int>(0, func.getNumArguments()))
+        if (func.getArgAttr(argIndex, "test.erase_this_arg"))
+          indicesToErase.set(argIndex);
       func.eraseArguments(indicesToErase);
     }
   }
@@ -115,18 +107,10 @@ struct TestFuncEraseResult
     auto module = getOperation();
 
     for (FuncOp func : module.getOps<FuncOp>()) {
-      SmallVector<unsigned, 4> indicesToErase;
-      for (auto resultIndex : llvm::seq<int>(0, func.getNumResults())) {
-        if (func.getResultAttr(resultIndex, "test.erase_this_result")) {
-          // Push back twice to test that duplicate indices are handled
-          // correctly.
-          indicesToErase.push_back(resultIndex);
-          indicesToErase.push_back(resultIndex);
-        }
-      }
-      // Reverse the order to test that unsorted index lists are handled
-      // correctly.
-      std::reverse(indicesToErase.begin(), indicesToErase.end());
+      llvm::BitVector indicesToErase(func.getNumResults());
+      for (auto resultIndex : llvm::seq<int>(0, func.getNumResults()))
+        if (func.getResultAttr(resultIndex, "test.erase_this_result"))
+          indicesToErase.set(resultIndex);
       func.eraseResults(indicesToErase);
     }
   }

From d10d49dce492a16dc5ed1ff53590b64b4dcfb1d9 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Wed, 26 Jan 2022 15:30:56 -0800
Subject: [PATCH 841/946] [mlir][NFC] Add a using for llvm::BitVector to LLVM.h

BitVector is becoming widespread enough that we should add a proper using.

Differential Revision: https://reviews.llvm.org/D118290
---
 .../mlir/Dialect/SparseTensor/Utils/Merger.h   | 12 ++++++------
 mlir/include/mlir/IR/Block.h                   |  2 +-
 mlir/include/mlir/IR/BuiltinTypes.td           |  4 ++--
 mlir/include/mlir/IR/FunctionInterfaces.h      |  6 +++---
 mlir/include/mlir/IR/FunctionInterfaces.td     | 14 +++++++-------
 mlir/include/mlir/IR/Operation.h               |  2 +-
 mlir/include/mlir/IR/OperationSupport.h        |  2 +-
 mlir/include/mlir/Support/LLVM.h               |  2 ++
 .../Transforms/BufferResultsToOutParams.cpp    |  2 +-
 .../Linalg/Transforms/ElementwiseOpFusion.cpp  |  2 +-
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp   |  2 +-
 .../SparseTensor/Transforms/Sparsification.cpp | 14 +++++++-------
 mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp | 18 +++++++++---------
 mlir/lib/IR/Block.cpp                          |  4 ++--
 mlir/lib/IR/BuiltinTypes.cpp                   |  4 ++--
 mlir/lib/IR/FunctionInterfaces.cpp             |  6 +++---
 mlir/lib/IR/OperationSupport.cpp               |  2 +-
 mlir/test/lib/IR/TestFunc.cpp                  |  4 ++--
 mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp |  2 +-
 mlir/tools/mlir-tblgen/OpFormatGen.cpp         |  2 +-
 .../Dialect/SparseTensor/MergerTest.cpp        | 12 ++++++------
 mlir/unittests/IR/OperationSupportTest.cpp     |  2 +-
 22 files changed, 61 insertions(+), 59 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
index 2f8b0d9f015db..304ba93737b5b 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
@@ -94,16 +94,16 @@ struct TensorExp {
 /// tensor expression.
 struct LatPoint {
   LatPoint(unsigned n, unsigned e, unsigned b);
-  LatPoint(const llvm::BitVector &b, unsigned e);
+  LatPoint(const BitVector &b, unsigned e);
 
   /// Conjunction of tensor loop indices as bitvector. This represents
   /// all indices involved in the tensor expression
-  llvm::BitVector bits;
+  BitVector bits;
 
   /// Simplified conjunction of tensor loop indices as bitvector. This
   /// represents a simplified condition under which this tensor expression
   /// must execute. Pre-computed during codegen to avoid repeated eval.
-  llvm::BitVector simple;
+  BitVector simple;
 
   /// Index of the tensor expresssion.
   unsigned exp;
@@ -163,7 +163,7 @@ class Merger {
   /// within the given set using just two basic rules:
   /// (1) multiple dense conditions are reduced to single dense, and
   /// (2) a *singleton* sparse/dense is reduced to sparse/random access.
-  llvm::BitVector simplifyCond(unsigned s0, unsigned p0);
+  BitVector simplifyCond(unsigned s0, unsigned p0);
 
   /// Returns true if Li > Lj.
   bool latGT(unsigned i, unsigned j) const;
@@ -190,7 +190,7 @@ class Merger {
   }
 
   /// Returns true if any set bit corresponds to queried dim.
-  bool hasAnyDimOf(const llvm::BitVector &bits, Dim d) const;
+  bool hasAnyDimOf(const BitVector &bits, Dim d) const;
 
   /// Returns true if given tensor iterates *only* in the given tensor
   /// expression. For the output tensor, this defines a "simply dynamic"
@@ -217,7 +217,7 @@ class Merger {
   void dumpExp(unsigned e) const;
   void dumpLat(unsigned p) const;
   void dumpSet(unsigned s) const;
-  void dumpBits(const llvm::BitVector &bits) const;
+  void dumpBits(const BitVector &bits) const;
 #endif
 
   /// Builds the iteration lattices in a bottom-up traversal given the remaining
diff --git a/mlir/include/mlir/IR/Block.h b/mlir/include/mlir/IR/Block.h
index 3f5faa74d3302..ee4ebcfe19730 100644
--- a/mlir/include/mlir/IR/Block.h
+++ b/mlir/include/mlir/IR/Block.h
@@ -111,7 +111,7 @@ class Block : public IRObjectWithUseList<BlockOperand>,
   void eraseArguments(ArrayRef<unsigned> argIndices);
   /// Erases the arguments that have their corresponding bit set in
   /// `eraseIndices` and removes them from the argument list.
-  void eraseArguments(const llvm::BitVector &eraseIndices);
+  void eraseArguments(const BitVector &eraseIndices);
   /// Erases arguments using the given predicate. If the predicate returns true,
   /// that argument is erased.
   void eraseArguments(function_ref<bool(BlockArgument)> shouldEraseFn);
diff --git a/mlir/include/mlir/IR/BuiltinTypes.td b/mlir/include/mlir/IR/BuiltinTypes.td
index a78413548ba82..d2d249bba7eb2 100644
--- a/mlir/include/mlir/IR/BuiltinTypes.td
+++ b/mlir/include/mlir/IR/BuiltinTypes.td
@@ -166,8 +166,8 @@ def Builtin_Function : Builtin_Type<"Function", [
                                        TypeRange resultTypes);
 
     /// Returns a new function type without the specified arguments and results.
-    FunctionType getWithoutArgsAndResults(const llvm::BitVector &argIndices,
-                                          const llvm::BitVector &resultIndices);
+    FunctionType getWithoutArgsAndResults(const BitVector &argIndices,
+                                          const BitVector &resultIndices);
   }];
 }
 
diff --git a/mlir/include/mlir/IR/FunctionInterfaces.h b/mlir/include/mlir/IR/FunctionInterfaces.h
index b6d6a9515ffe0..4b1e5260dfc54 100644
--- a/mlir/include/mlir/IR/FunctionInterfaces.h
+++ b/mlir/include/mlir/IR/FunctionInterfaces.h
@@ -83,11 +83,11 @@ void insertFunctionResults(Operation *op, ArrayRef<unsigned> resultIndices,
                            unsigned originalNumResults, Type newType);
 
 /// Erase the specified arguments and update the function type attribute.
-void eraseFunctionArguments(Operation *op, const llvm::BitVector &argIndices,
+void eraseFunctionArguments(Operation *op, const BitVector &argIndices,
                             Type newType);
 
 /// Erase the specified results and update the function type attribute.
-void eraseFunctionResults(Operation *op, const llvm::BitVector &resultIndices,
+void eraseFunctionResults(Operation *op, const BitVector &resultIndices,
                           Type newType);
 
 /// Set a FunctionOpInterface operation's type signature.
@@ -101,7 +101,7 @@ TypeRange insertTypesInto(TypeRange oldTypes, ArrayRef<unsigned> indices,
 
 /// Filters out any elements referenced by `indices`. If any types are removed,
 /// `storage` is used to hold the new type list. Returns the new type list.
-TypeRange filterTypesOut(TypeRange types, const llvm::BitVector &indices,
+TypeRange filterTypesOut(TypeRange types, const BitVector &indices,
                          SmallVectorImpl<Type> &storage);
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/IR/FunctionInterfaces.td b/mlir/include/mlir/IR/FunctionInterfaces.td
index 124f6594081f7..22a3aee053ab0 100644
--- a/mlir/include/mlir/IR/FunctionInterfaces.td
+++ b/mlir/include/mlir/IR/FunctionInterfaces.td
@@ -281,13 +281,13 @@ def FunctionOpInterface : OpInterface<"FunctionOpInterface"> {
 
     /// Erase a single argument at `argIndex`.
     void eraseArgument(unsigned argIndex) {
-      llvm::BitVector argsToErase($_op.getNumArguments());
+      BitVector argsToErase($_op.getNumArguments());
       argsToErase.set(argIndex);
       eraseArguments(argsToErase);
     }
 
     /// Erases the arguments listed in `argIndices`.
-    void eraseArguments(const llvm::BitVector &argIndices) {
+    void eraseArguments(const BitVector &argIndices) {
       Type newType = $_op.getTypeWithoutArgs(argIndices);
       function_interface_impl::eraseFunctionArguments(
         this->getOperation(), argIndices, newType);
@@ -295,13 +295,13 @@ def FunctionOpInterface : OpInterface<"FunctionOpInterface"> {
 
     /// Erase a single result at `resultIndex`.
     void eraseResult(unsigned resultIndex) {
-      llvm::BitVector resultsToErase($_op.getNumResults());
+      BitVector resultsToErase($_op.getNumResults());
       resultsToErase.set(resultIndex);
       eraseResults(resultsToErase);
     }
 
     /// Erases the results listed in `resultIndices`.
-    void eraseResults(const llvm::BitVector &resultIndices) {
+    void eraseResults(const BitVector &resultIndices) {
       Type newType = $_op.getTypeWithoutResults(resultIndices);
       function_interface_impl::eraseFunctionResults(
           this->getOperation(), resultIndices, newType);
@@ -326,7 +326,7 @@ def FunctionOpInterface : OpInterface<"FunctionOpInterface"> {
     /// results. This is used to update the function's signature in the
     /// `eraseArguments` and `eraseResults` methods.
     Type getTypeWithoutArgsAndResults(
-      const llvm::BitVector &argIndices, const llvm::BitVector &resultIndices) {
+      const BitVector &argIndices, const BitVector &resultIndices) {
       SmallVector<Type> argStorage, resultStorage;
       TypeRange newArgTypes = function_interface_impl::filterTypesOut(
           $_op.getArgumentTypes(), argIndices, argStorage);
@@ -334,13 +334,13 @@ def FunctionOpInterface : OpInterface<"FunctionOpInterface"> {
           $_op.getResultTypes(), resultIndices, resultStorage);
       return $_op.cloneTypeWith(newArgTypes, newResultTypes);
     }
-    Type getTypeWithoutArgs(const llvm::BitVector &argIndices) {
+    Type getTypeWithoutArgs(const BitVector &argIndices) {
       SmallVector<Type> argStorage;
       TypeRange newArgTypes = function_interface_impl::filterTypesOut(
           $_op.getArgumentTypes(), argIndices, argStorage);
       return $_op.cloneTypeWith(newArgTypes, $_op.getResultTypes());
     }
-    Type getTypeWithoutResults(const llvm::BitVector &resultIndices) {
+    Type getTypeWithoutResults(const BitVector &resultIndices) {
       SmallVector<Type> resultStorage;
       TypeRange newResultTypes = function_interface_impl::filterTypesOut(
           $_op.getResultTypes(), resultIndices, resultStorage);
diff --git a/mlir/include/mlir/IR/Operation.h b/mlir/include/mlir/IR/Operation.h
index b65026187d1df..27a59382076e1 100644
--- a/mlir/include/mlir/IR/Operation.h
+++ b/mlir/include/mlir/IR/Operation.h
@@ -232,7 +232,7 @@ class alignas(8) Operation final
 
   /// Erases the operands that have their corresponding bit set in
   /// `eraseIndices` and removes them from the operand list.
-  void eraseOperands(const llvm::BitVector &eraseIndices) {
+  void eraseOperands(const BitVector &eraseIndices) {
     getOperandStorage().eraseOperands(eraseIndices);
   }
 
diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h
index 138bbf7967adc..4e68a67ad1b74 100644
--- a/mlir/include/mlir/IR/OperationSupport.h
+++ b/mlir/include/mlir/IR/OperationSupport.h
@@ -666,7 +666,7 @@ class alignas(8) OperandStorage {
 
   /// Erase the operands held by the storage that have their corresponding bit
   /// set in `eraseIndices`.
-  void eraseOperands(const llvm::BitVector &eraseIndices);
+  void eraseOperands(const BitVector &eraseIndices);
 
   /// Get the operation operands held by the storage.
   MutableArrayRef<OpOperand> getOperands() { return {operandStorage, size()}; }
diff --git a/mlir/include/mlir/Support/LLVM.h b/mlir/include/mlir/Support/LLVM.h
index 3418f9ea76066..eda75cccc9cc9 100644
--- a/mlir/include/mlir/Support/LLVM.h
+++ b/mlir/include/mlir/Support/LLVM.h
@@ -42,6 +42,7 @@ class Twine;
 
 // Containers.
 template <typename T> class ArrayRef;
+class BitVector;
 namespace detail {
 template <typename KeyT, typename ValueT> struct DenseMapPair;
 } // namespace detail
@@ -91,6 +92,7 @@ using llvm::Twine;
 //
 // Containers.
 using llvm::ArrayRef;
+using llvm::BitVector;
 template <typename T, typename Enable = void>
 using DenseMapInfo = llvm::DenseMapInfo<T, Enable>;
 template <typename KeyT, typename ValueT,
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
index 585e873b41881..de3a2e1c0782b 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp
@@ -24,7 +24,7 @@ static void updateFuncOp(FuncOp func,
 
   // Collect information about the results will become appended arguments.
   SmallVector<Type, 6> erasedResultTypes;
-  llvm::BitVector erasedResultIndices(functionType.getNumResults());
+  BitVector erasedResultIndices(functionType.getNumResults());
   for (const auto &resultType : llvm::enumerate(functionType.getResults())) {
     if (resultType.value().isa<BaseMemRefType>()) {
       erasedResultIndices.set(resultType.index());
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
index 6532343830255..19e3c0318f574 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp
@@ -98,7 +98,7 @@ static bool areElementwiseOpsFusable(GenericOp producer, GenericOp consumer,
   // case due to the output operand. For reductions, we need to check that after
   // the fusion, each loop dimension has at least one input that defines it.
   if ((consumer.getNumReductionLoops())) {
-    llvm::BitVector coveredDims(consumer.getNumLoops(), false);
+    BitVector coveredDims(consumer.getNumLoops(), false);
 
     auto addToCoveredDims = [&](AffineMap map) {
       for (auto result : map.getResults())
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index 089d534bcdae4..ebbebf6371434 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -619,7 +619,7 @@ static ParseResult parseClauses(OpAsmParser &parser, OperationState &result,
                                 SmallVectorImpl<int> &segments) {
 
   // Check done[clause] to see if it has been parsed already
-  llvm::BitVector done(ClauseType::COUNT, false);
+  BitVector done(ClauseType::COUNT, false);
 
   // See pos[clause] to get position of clause in operand segments
   SmallVector<int> pos(ClauseType::COUNT, -1);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index 8f7fb89afba4a..173b92070e10d 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -1004,7 +1004,7 @@ static void genExpansion(Merger &merger, CodeGen &codegen,
 /// maintain the universal index.
 static bool genInit(Merger &merger, CodeGen &codegen, PatternRewriter &rewriter,
                     linalg::GenericOp op, std::vector<unsigned> &topSort,
-                    unsigned at, llvm::BitVector &inits) {
+                    unsigned at, BitVector &inits) {
   bool needsUniv = false;
   Location loc = op.getLoc();
   unsigned idx = topSort[at];
@@ -1104,7 +1104,7 @@ static bool denseUnitStrides(Merger &merger, linalg::GenericOp op,
 static Operation *genFor(Merger &merger, CodeGen &codegen,
                          PatternRewriter &rewriter, linalg::GenericOp op,
                          bool isOuter, bool isInner, unsigned idx,
-                         llvm::BitVector &indices) {
+                         BitVector &indices) {
   unsigned fb = indices.find_first();
   unsigned tensor = merger.tensor(fb);
   assert(idx == merger.index(fb));
@@ -1175,7 +1175,7 @@ static Operation *genFor(Merger &merger, CodeGen &codegen,
 static Operation *genWhile(Merger &merger, CodeGen &codegen,
                            PatternRewriter &rewriter, linalg::GenericOp op,
                            unsigned idx, bool needsUniv,
-                           llvm::BitVector &indices) {
+                           BitVector &indices) {
   SmallVector<Type, 4> types;
   SmallVector<Value, 4> operands;
   // Construct the while-loop with a parameter for each index.
@@ -1242,7 +1242,7 @@ static Operation *genWhile(Merger &merger, CodeGen &codegen,
 static Operation *genLoop(Merger &merger, CodeGen &codegen,
                           PatternRewriter &rewriter, linalg::GenericOp op,
                           std::vector<unsigned> &topSort, unsigned at,
-                          bool needsUniv, llvm::BitVector &indices) {
+                          bool needsUniv, BitVector &indices) {
   unsigned idx = topSort[at];
   if (indices.count() == 1) {
     bool isOuter = at == 0;
@@ -1258,7 +1258,7 @@ static Operation *genLoop(Merger &merger, CodeGen &codegen,
 static void genLocals(Merger &merger, CodeGen &codegen,
                       PatternRewriter &rewriter, linalg::GenericOp op,
                       std::vector<unsigned> &topSort, unsigned at,
-                      bool needsUniv, llvm::BitVector &locals) {
+                      bool needsUniv, BitVector &locals) {
   Location loc = op.getLoc();
   unsigned idx = topSort[at];
 
@@ -1322,7 +1322,7 @@ static void genLocals(Merger &merger, CodeGen &codegen,
 static void genWhileInduction(Merger &merger, CodeGen &codegen,
                               PatternRewriter &rewriter, linalg::GenericOp op,
                               unsigned idx, bool needsUniv,
-                              llvm::BitVector &induction,
+                              BitVector &induction,
                               scf::WhileOp whileOp) {
   Location loc = op.getLoc();
   // Finalize each else branch of all if statements.
@@ -1409,7 +1409,7 @@ static void genForInduction(Merger &merger, CodeGen &codegen,
 /// Generates a single if-statement within a while-loop.
 static scf::IfOp genIf(Merger &merger, CodeGen &codegen,
                        PatternRewriter &rewriter, linalg::GenericOp op,
-                       unsigned idx, llvm::BitVector &conditions) {
+                       unsigned idx, BitVector &conditions) {
   Location loc = op.getLoc();
   SmallVector<Type, 4> types;
   Value cond;
diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
index 9163aabd3c816..31e7fb5a07edd 100644
--- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
@@ -65,7 +65,7 @@ LatPoint::LatPoint(unsigned n, unsigned e, unsigned b)
   bits.set(b);
 }
 
-LatPoint::LatPoint(const llvm::BitVector &b, unsigned e)
+LatPoint::LatPoint(const BitVector &b, unsigned e)
     : bits(b), simple(), exp(e) {}
 
 //===----------------------------------------------------------------------===//
@@ -93,7 +93,7 @@ unsigned Merger::addSet() {
 
 unsigned Merger::conjLatPoint(Kind kind, unsigned p0, unsigned p1) {
   unsigned p = latPoints.size();
-  llvm::BitVector nb = llvm::BitVector(latPoints[p0].bits);
+  BitVector nb = BitVector(latPoints[p0].bits);
   nb |= latPoints[p1].bits;
   unsigned e = addExp(kind, latPoints[p0].exp, latPoints[p1].exp);
   latPoints.push_back(LatPoint(nb, e));
@@ -164,7 +164,7 @@ unsigned Merger::optimizeSet(unsigned s0) {
   return s;
 }
 
-llvm::BitVector Merger::simplifyCond(unsigned s0, unsigned p0) {
+BitVector Merger::simplifyCond(unsigned s0, unsigned p0) {
   // First determine if this lattice point is a *singleton*, i.e.,
   // the last point in a lattice, no other is less than this one.
   bool isSingleton = true;
@@ -175,7 +175,7 @@ llvm::BitVector Merger::simplifyCond(unsigned s0, unsigned p0) {
     }
   }
   // Now apply the two basic rules.
-  llvm::BitVector simple = latPoints[p0].bits;
+  BitVector simple = latPoints[p0].bits;
   bool reset = isSingleton && hasAnyDimOf(simple, kSparse);
   for (unsigned b = 0, be = simple.size(); b < be; b++) {
     if (simple[b] && !isDim(b, kSparse)) {
@@ -188,8 +188,8 @@ llvm::BitVector Merger::simplifyCond(unsigned s0, unsigned p0) {
 }
 
 bool Merger::latGT(unsigned i, unsigned j) const {
-  const llvm::BitVector &bitsi = latPoints[i].bits;
-  const llvm::BitVector &bitsj = latPoints[j].bits;
+  const BitVector &bitsi = latPoints[i].bits;
+  const BitVector &bitsj = latPoints[j].bits;
   assert(bitsi.size() == bitsj.size());
   if (bitsi.count() > bitsj.count()) {
     for (unsigned b = 0, be = bitsj.size(); b < be; b++)
@@ -201,12 +201,12 @@ bool Merger::latGT(unsigned i, unsigned j) const {
 }
 
 bool Merger::onlyDenseDiff(unsigned i, unsigned j) {
-  llvm::BitVector tmp = latPoints[j].bits;
+  BitVector tmp = latPoints[j].bits;
   tmp ^= latPoints[i].bits;
   return !hasAnyDimOf(tmp, kSparse);
 }
 
-bool Merger::hasAnyDimOf(const llvm::BitVector &bits, Dim d) const {
+bool Merger::hasAnyDimOf(const BitVector &bits, Dim d) const {
   for (unsigned b = 0, be = bits.size(); b < be; b++)
     if (bits[b] && isDim(b, d))
       return true;
@@ -386,7 +386,7 @@ void Merger::dumpSet(unsigned s) const {
   llvm::dbgs() << "}\n";
 }
 
-void Merger::dumpBits(const llvm::BitVector &bits) const {
+void Merger::dumpBits(const BitVector &bits) const {
   for (unsigned b = 0, be = bits.size(); b < be; b++) {
     if (bits[b]) {
       unsigned t = tensor(b);
diff --git a/mlir/lib/IR/Block.cpp b/mlir/lib/IR/Block.cpp
index 172e8e4997365..feb21493d0d38 100644
--- a/mlir/lib/IR/Block.cpp
+++ b/mlir/lib/IR/Block.cpp
@@ -187,13 +187,13 @@ void Block::eraseArgument(unsigned index) {
 }
 
 void Block::eraseArguments(ArrayRef<unsigned> argIndices) {
-  llvm::BitVector eraseIndices(getNumArguments());
+  BitVector eraseIndices(getNumArguments());
   for (unsigned i : argIndices)
     eraseIndices.set(i);
   eraseArguments(eraseIndices);
 }
 
-void Block::eraseArguments(const llvm::BitVector &eraseIndices) {
+void Block::eraseArguments(const BitVector &eraseIndices) {
   eraseArguments(
       [&](BlockArgument arg) { return eraseIndices.test(arg.getArgNumber()); });
 }
diff --git a/mlir/lib/IR/BuiltinTypes.cpp b/mlir/lib/IR/BuiltinTypes.cpp
index 63a5962803900..6d3ed12cedf22 100644
--- a/mlir/lib/IR/BuiltinTypes.cpp
+++ b/mlir/lib/IR/BuiltinTypes.cpp
@@ -172,8 +172,8 @@ FunctionType FunctionType::getWithArgsAndResults(
 
 /// Returns a new function type without the specified arguments and results.
 FunctionType
-FunctionType::getWithoutArgsAndResults(const llvm::BitVector &argIndices,
-                                       const llvm::BitVector &resultIndices) {
+FunctionType::getWithoutArgsAndResults(const BitVector &argIndices,
+                                       const BitVector &resultIndices) {
   SmallVector<Type> argStorage, resultStorage;
   TypeRange newArgTypes = function_interface_impl::filterTypesOut(
       getInputs(), argIndices, argStorage);
diff --git a/mlir/lib/IR/FunctionInterfaces.cpp b/mlir/lib/IR/FunctionInterfaces.cpp
index 4f31c59f3f69e..dc3e08c8fdf05 100644
--- a/mlir/lib/IR/FunctionInterfaces.cpp
+++ b/mlir/lib/IR/FunctionInterfaces.cpp
@@ -200,7 +200,7 @@ void mlir::function_interface_impl::insertFunctionResults(
 }
 
 void mlir::function_interface_impl::eraseFunctionArguments(
-    Operation *op, const llvm::BitVector &argIndices, Type newType) {
+    Operation *op, const BitVector &argIndices, Type newType) {
   // There are 3 things that need to be updated:
   // - Function type.
   // - Arg attrs.
@@ -223,7 +223,7 @@ void mlir::function_interface_impl::eraseFunctionArguments(
 }
 
 void mlir::function_interface_impl::eraseFunctionResults(
-    Operation *op, const llvm::BitVector &resultIndices, Type newType) {
+    Operation *op, const BitVector &resultIndices, Type newType) {
   // There are 2 things that need to be updated:
   // - Function type.
   // - Result attrs.
@@ -263,7 +263,7 @@ TypeRange mlir::function_interface_impl::insertTypesInto(
 
 TypeRange
 mlir::function_interface_impl::filterTypesOut(TypeRange types,
-                                              const llvm::BitVector &indices,
+                                              const BitVector &indices,
                                               SmallVectorImpl<Type> &storage) {
   if (indices.none())
     return types;
diff --git a/mlir/lib/IR/OperationSupport.cpp b/mlir/lib/IR/OperationSupport.cpp
index 0cd63013d83f6..012d980955289 100644
--- a/mlir/lib/IR/OperationSupport.cpp
+++ b/mlir/lib/IR/OperationSupport.cpp
@@ -293,7 +293,7 @@ void detail::OperandStorage::eraseOperands(unsigned start, unsigned length) {
 }
 
 void detail::OperandStorage::eraseOperands(
-    const llvm::BitVector &eraseIndices) {
+    const BitVector &eraseIndices) {
   MutableArrayRef<OpOperand> operands = getOperands();
   assert(eraseIndices.size() == operands.size());
 
diff --git a/mlir/test/lib/IR/TestFunc.cpp b/mlir/test/lib/IR/TestFunc.cpp
index 0b4ce1d05992e..20c4ca9e8323f 100644
--- a/mlir/test/lib/IR/TestFunc.cpp
+++ b/mlir/test/lib/IR/TestFunc.cpp
@@ -87,7 +87,7 @@ struct TestFuncEraseArg
     auto module = getOperation();
 
     for (FuncOp func : module.getOps<FuncOp>()) {
-      llvm::BitVector indicesToErase(func.getNumArguments());
+      BitVector indicesToErase(func.getNumArguments());
       for (auto argIndex : llvm::seq<int>(0, func.getNumArguments()))
         if (func.getArgAttr(argIndex, "test.erase_this_arg"))
           indicesToErase.set(argIndex);
@@ -107,7 +107,7 @@ struct TestFuncEraseResult
     auto module = getOperation();
 
     for (FuncOp func : module.getOps<FuncOp>()) {
-      llvm::BitVector indicesToErase(func.getNumResults());
+      BitVector indicesToErase(func.getNumResults());
       for (auto resultIndex : llvm::seq<int>(0, func.getNumResults()))
         if (func.getResultAttr(resultIndex, "test.erase_this_result"))
           indicesToErase.set(resultIndex);
diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
index 0dc1ef426d02a..f0ac7d217d2c2 100644
--- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
@@ -577,7 +577,7 @@ class FormatParser {
   const AttrOrTypeDef &def;
 
   /// Seen attribute or type parameters.
-  llvm::BitVector seenParams;
+  BitVector seenParams;
 };
 } // namespace
 
diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
index 7a722305d0c31..1f0a37e6330eb 100644
--- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
@@ -1880,7 +1880,7 @@ static void genEnumAttrPrinter(const NamedAttribute *var, const Operator &op,
 
   // Get a string containing all of the cases that can't be represented with a
   // keyword.
-  llvm::BitVector nonKeywordCases(cases.size());
+  BitVector nonKeywordCases(cases.size());
   bool hasStrCase = false;
   for (auto &it : llvm::enumerate(cases)) {
     hasStrCase = it.value().isStrCase();
diff --git a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
index 6ea9960428f0e..5da5ecc5bcf1d 100644
--- a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
+++ b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
@@ -88,7 +88,7 @@ class MergerTestBase : public ::testing::Test {
   /// but there is no required ordering within groups.
   bool latPointWithinRange(unsigned s, unsigned p, unsigned n,
                            const std::shared_ptr<Pattern> &pattern,
-                           const llvm::BitVector &bits) {
+                           const BitVector &bits) {
     for (unsigned i = p; i < p + n; ++i) {
       if (compareExpression(merger.lat(merger.set(s)[i]).exp, pattern) &&
           compareBits(s, i, bits))
@@ -100,22 +100,22 @@ class MergerTestBase : public ::testing::Test {
   /// Wrapper over latPointWithinRange for readability of tests.
   void expectLatPointWithinRange(unsigned s, unsigned p, unsigned n,
                                  const std::shared_ptr<Pattern> &pattern,
-                                 const llvm::BitVector &bits) {
+                                 const BitVector &bits) {
     EXPECT_TRUE(latPointWithinRange(s, p, n, pattern, bits));
   }
 
   /// Wrapper over expectLatPointWithinRange for a single lat point.
   void expectLatPoint(unsigned s, unsigned p,
                       const std::shared_ptr<Pattern> &pattern,
-                      const llvm::BitVector &bits) {
+                      const BitVector &bits) {
     EXPECT_TRUE(latPointWithinRange(s, p, 1, pattern, bits));
   }
 
   /// Converts a vector of (loop, tensor) pairs to a bitvector with the
   /// corresponding bits set.
-  llvm::BitVector
+  BitVector
   loopsToBits(const std::vector<std::pair<unsigned, unsigned>> &loops) {
-    llvm::BitVector testBits = llvm::BitVector(numTensors + 1, false);
+    BitVector testBits = BitVector(numTensors + 1, false);
     for (auto l : loops) {
       auto loop = std::get<0>(l);
       auto tensor = std::get<1>(l);
@@ -125,7 +125,7 @@ class MergerTestBase : public ::testing::Test {
   }
 
   /// Returns true if the bits of lattice point p in set s match the given bits.
-  bool compareBits(unsigned s, unsigned p, const llvm::BitVector &bits) {
+  bool compareBits(unsigned s, unsigned p, const BitVector &bits) {
     return merger.lat(merger.set(s)[p]).bits == bits;
   }
 
diff --git a/mlir/unittests/IR/OperationSupportTest.cpp b/mlir/unittests/IR/OperationSupportTest.cpp
index b5184a1f772cc..6484d7c50d19b 100644
--- a/mlir/unittests/IR/OperationSupportTest.cpp
+++ b/mlir/unittests/IR/OperationSupportTest.cpp
@@ -164,7 +164,7 @@ TEST(OperandStorageTest, RangeErase) {
   // Create an operation with operands to erase.
   Operation *user =
       createOp(&context, {operand2, operand1, operand2, operand1});
-  llvm::BitVector eraseIndices(user->getNumOperands());
+  BitVector eraseIndices(user->getNumOperands());
 
   // Check erasing no operands.
   user->eraseOperands(eraseIndices);

From 6842ec42f665f38bc744e3c576004bc1fd36ad9d Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Wed, 26 Jan 2022 15:49:53 -0800
Subject: [PATCH 842/946] [mlir][NFC] Add a using for llvm::SMLoc/llvm::SMRange
 to LLVM.h

These are used pervasively during parsing.

Differential Revision: https://reviews.llvm.org/D118291
---
 mlir/docs/Tutorials/Toy/Ch-7.md               |   2 +-
 mlir/examples/toy/Ch2/mlir/Dialect.cpp        |   2 +-
 mlir/examples/toy/Ch3/mlir/Dialect.cpp        |   2 +-
 mlir/examples/toy/Ch4/mlir/Dialect.cpp        |   2 +-
 mlir/examples/toy/Ch5/mlir/Dialect.cpp        |   2 +-
 mlir/examples/toy/Ch6/mlir/Dialect.cpp        |   2 +-
 mlir/examples/toy/Ch7/mlir/Dialect.cpp        |   4 +-
 mlir/include/mlir/IR/Diagnostics.h            |   2 +-
 mlir/include/mlir/IR/OpImplementation.h       |  34 ++---
 mlir/include/mlir/Parser/AsmParserState.h     |  34 ++---
 mlir/include/mlir/Support/LLVM.h              |   4 +
 mlir/include/mlir/TableGen/AttrOrTypeDef.h    |   2 +-
 mlir/include/mlir/TableGen/Builder.h          |   2 +-
 mlir/include/mlir/TableGen/Operator.h         |   2 +-
 mlir/include/mlir/TableGen/Pattern.h          |   4 +-
 mlir/include/mlir/TableGen/Predicate.h        |   2 +-
 mlir/include/mlir/Tools/PDLL/AST/Diagnostic.h |  12 +-
 mlir/include/mlir/Tools/PDLL/AST/Nodes.h      | 116 +++++++--------
 mlir/lib/Dialect/Affine/IR/AffineOps.cpp      |   4 +-
 mlir/lib/Dialect/DLTI/DLTI.cpp                |   2 +-
 mlir/lib/Dialect/EmitC/IR/EmitC.cpp           |   4 +-
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp        |   2 +-
 .../GPU/Transforms/SerializeToHsaco.cpp       |   2 +-
 mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp    |  34 ++---
 mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp |  22 +--
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp      |   8 +-
 mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp      |   2 +-
 mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp  |   6 +-
 mlir/lib/Dialect/PDL/IR/PDLTypes.cpp          |   2 +-
 mlir/lib/Dialect/Quant/IR/TypeParser.cpp      |   4 +-
 mlir/lib/Dialect/SCF/SCF.cpp                  |   2 +-
 mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp    |  30 ++--
 mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp        |   6 +-
 mlir/lib/Dialect/Vector/VectorOps.cpp         |   6 +-
 mlir/lib/ExecutionEngine/JitRunner.cpp        |   2 +-
 mlir/lib/IR/Diagnostics.cpp                   |  18 +--
 mlir/lib/IR/FunctionImplementation.cpp        |  10 +-
 mlir/lib/IR/Operation.cpp                     |   2 +-
 mlir/lib/Parser/AffineParser.cpp              |   9 +-
 mlir/lib/Parser/AsmParserImpl.h               |  14 +-
 mlir/lib/Parser/AsmParserState.cpp            |  32 ++---
 mlir/lib/Parser/AttributeParser.cpp           |  28 ++--
 mlir/lib/Parser/DialectSymbolParser.cpp       |  11 +-
 mlir/lib/Parser/Lexer.cpp                     |   5 +-
 mlir/lib/Parser/Lexer.h                       |   2 +-
 mlir/lib/Parser/Parser.cpp                    |  15 +-
 mlir/lib/Parser/Parser.h                      |  10 +-
 mlir/lib/Parser/ParserState.h                 |   2 +-
 mlir/lib/Parser/Token.cpp                     |   3 +-
 mlir/lib/Parser/Token.h                       |   6 +-
 mlir/lib/Parser/TypeParser.cpp                |   4 +-
 mlir/lib/Pass/PassRegistry.cpp                |   4 +-
 mlir/lib/Support/MlirOptMain.cpp              |   1 -
 mlir/lib/Support/ToolUtilities.cpp            |   6 +-
 mlir/lib/TableGen/AttrOrTypeDef.cpp           |   2 +-
 mlir/lib/TableGen/Builder.cpp                 |   2 +-
 mlir/lib/TableGen/Operator.cpp                |   2 +-
 mlir/lib/TableGen/Predicate.cpp               |   2 +-
 mlir/lib/Tools/PDLL/AST/Nodes.cpp             |  44 +++---
 mlir/lib/Tools/PDLL/Parser/Lexer.cpp          |  14 +-
 mlir/lib/Tools/PDLL/Parser/Lexer.h            |  18 +--
 mlir/lib/Tools/PDLL/Parser/Parser.cpp         | 134 +++++++++---------
 mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp |  58 ++++----
 mlir/lib/Translation/Translation.cpp          |   2 +-
 mlir/test/lib/Dialect/Test/TestDialect.cpp    |   2 +-
 mlir/tools/mlir-pdll/mlir-pdll.cpp            |   2 +-
 .../tools/mlir-tblgen/AttrOrTypeFormatGen.cpp |   2 +-
 mlir/tools/mlir-tblgen/FormatGen.cpp          |  12 +-
 mlir/tools/mlir-tblgen/FormatGen.h            |  10 +-
 mlir/tools/mlir-tblgen/OpFormatGen.cpp        |  82 +++++------
 mlir/tools/mlir-tblgen/RewriterGen.cpp        |   2 +-
 .../Dialect/SparseTensor/MergerTest.cpp       |   1 +
 72 files changed, 470 insertions(+), 471 deletions(-)

diff --git a/mlir/docs/Tutorials/Toy/Ch-7.md b/mlir/docs/Tutorials/Toy/Ch-7.md
index 0a99ac2fae88d..b06d5bc539d41 100644
--- a/mlir/docs/Tutorials/Toy/Ch-7.md
+++ b/mlir/docs/Tutorials/Toy/Ch-7.md
@@ -268,7 +268,7 @@ mlir::Type ToyDialect::parseType(mlir::DialectAsmParser &parser) const {
   SmallVector<mlir::Type, 1> elementTypes;
   do {
     // Parse the current element type.
-    llvm::SMLoc typeLoc = parser.getCurrentLocation();
+    SMLoc typeLoc = parser.getCurrentLocation();
     mlir::Type elementType;
     if (parser.parseType(elementType))
       return nullptr;
diff --git a/mlir/examples/toy/Ch2/mlir/Dialect.cpp b/mlir/examples/toy/Ch2/mlir/Dialect.cpp
index 22eea79f01398..278c857ea4681 100644
--- a/mlir/examples/toy/Ch2/mlir/Dialect.cpp
+++ b/mlir/examples/toy/Ch2/mlir/Dialect.cpp
@@ -44,7 +44,7 @@ void ToyDialect::initialize() {
 static mlir::ParseResult parseBinaryOp(mlir::OpAsmParser &parser,
                                        mlir::OperationState &result) {
   SmallVector<mlir::OpAsmParser::OperandType, 2> operands;
-  llvm::SMLoc operandsLoc = parser.getCurrentLocation();
+  SMLoc operandsLoc = parser.getCurrentLocation();
   Type type;
   if (parser.parseOperandList(operands, /*requiredOperandCount=*/2) ||
       parser.parseOptionalAttrDict(result.attributes) ||
diff --git a/mlir/examples/toy/Ch3/mlir/Dialect.cpp b/mlir/examples/toy/Ch3/mlir/Dialect.cpp
index 22eea79f01398..278c857ea4681 100644
--- a/mlir/examples/toy/Ch3/mlir/Dialect.cpp
+++ b/mlir/examples/toy/Ch3/mlir/Dialect.cpp
@@ -44,7 +44,7 @@ void ToyDialect::initialize() {
 static mlir::ParseResult parseBinaryOp(mlir::OpAsmParser &parser,
                                        mlir::OperationState &result) {
   SmallVector<mlir::OpAsmParser::OperandType, 2> operands;
-  llvm::SMLoc operandsLoc = parser.getCurrentLocation();
+  SMLoc operandsLoc = parser.getCurrentLocation();
   Type type;
   if (parser.parseOperandList(operands, /*requiredOperandCount=*/2) ||
       parser.parseOptionalAttrDict(result.attributes) ||
diff --git a/mlir/examples/toy/Ch4/mlir/Dialect.cpp b/mlir/examples/toy/Ch4/mlir/Dialect.cpp
index 4b2a1ee53568a..183cd59261da8 100644
--- a/mlir/examples/toy/Ch4/mlir/Dialect.cpp
+++ b/mlir/examples/toy/Ch4/mlir/Dialect.cpp
@@ -100,7 +100,7 @@ void ToyDialect::initialize() {
 static mlir::ParseResult parseBinaryOp(mlir::OpAsmParser &parser,
                                        mlir::OperationState &result) {
   SmallVector<mlir::OpAsmParser::OperandType, 2> operands;
-  llvm::SMLoc operandsLoc = parser.getCurrentLocation();
+  SMLoc operandsLoc = parser.getCurrentLocation();
   Type type;
   if (parser.parseOperandList(operands, /*requiredOperandCount=*/2) ||
       parser.parseOptionalAttrDict(result.attributes) ||
diff --git a/mlir/examples/toy/Ch5/mlir/Dialect.cpp b/mlir/examples/toy/Ch5/mlir/Dialect.cpp
index f0762a757b1d9..f4ded4956c978 100644
--- a/mlir/examples/toy/Ch5/mlir/Dialect.cpp
+++ b/mlir/examples/toy/Ch5/mlir/Dialect.cpp
@@ -100,7 +100,7 @@ void ToyDialect::initialize() {
 static mlir::ParseResult parseBinaryOp(mlir::OpAsmParser &parser,
                                        mlir::OperationState &result) {
   SmallVector<mlir::OpAsmParser::OperandType, 2> operands;
-  llvm::SMLoc operandsLoc = parser.getCurrentLocation();
+  SMLoc operandsLoc = parser.getCurrentLocation();
   Type type;
   if (parser.parseOperandList(operands, /*requiredOperandCount=*/2) ||
       parser.parseOptionalAttrDict(result.attributes) ||
diff --git a/mlir/examples/toy/Ch6/mlir/Dialect.cpp b/mlir/examples/toy/Ch6/mlir/Dialect.cpp
index f0762a757b1d9..f4ded4956c978 100644
--- a/mlir/examples/toy/Ch6/mlir/Dialect.cpp
+++ b/mlir/examples/toy/Ch6/mlir/Dialect.cpp
@@ -100,7 +100,7 @@ void ToyDialect::initialize() {
 static mlir::ParseResult parseBinaryOp(mlir::OpAsmParser &parser,
                                        mlir::OperationState &result) {
   SmallVector<mlir::OpAsmParser::OperandType, 2> operands;
-  llvm::SMLoc operandsLoc = parser.getCurrentLocation();
+  SMLoc operandsLoc = parser.getCurrentLocation();
   Type type;
   if (parser.parseOperandList(operands, /*requiredOperandCount=*/2) ||
       parser.parseOptionalAttrDict(result.attributes) ||
diff --git a/mlir/examples/toy/Ch7/mlir/Dialect.cpp b/mlir/examples/toy/Ch7/mlir/Dialect.cpp
index 8b63c0bdeaeca..0bf5f7593758b 100644
--- a/mlir/examples/toy/Ch7/mlir/Dialect.cpp
+++ b/mlir/examples/toy/Ch7/mlir/Dialect.cpp
@@ -87,7 +87,7 @@ struct ToyInlinerInterface : public DialectInlinerInterface {
 static mlir::ParseResult parseBinaryOp(mlir::OpAsmParser &parser,
                                        mlir::OperationState &result) {
   SmallVector<mlir::OpAsmParser::OperandType, 2> operands;
-  llvm::SMLoc operandsLoc = parser.getCurrentLocation();
+  SMLoc operandsLoc = parser.getCurrentLocation();
   Type type;
   if (parser.parseOperandList(operands, /*requiredOperandCount=*/2) ||
       parser.parseOptionalAttrDict(result.attributes) ||
@@ -501,7 +501,7 @@ mlir::Type ToyDialect::parseType(mlir::DialectAsmParser &parser) const {
   SmallVector<mlir::Type, 1> elementTypes;
   do {
     // Parse the current element type.
-    llvm::SMLoc typeLoc = parser.getCurrentLocation();
+    SMLoc typeLoc = parser.getCurrentLocation();
     mlir::Type elementType;
     if (parser.parseType(elementType))
       return nullptr;
diff --git a/mlir/include/mlir/IR/Diagnostics.h b/mlir/include/mlir/IR/Diagnostics.h
index b259e51b67a23..97f333abc2288 100644
--- a/mlir/include/mlir/IR/Diagnostics.h
+++ b/mlir/include/mlir/IR/Diagnostics.h
@@ -564,7 +564,7 @@ class SourceMgrDiagnosticHandler : public ScopedDiagnosticHandler {
 
 private:
   /// Convert a location into the given memory buffer into an SMLoc.
-  llvm::SMLoc convertLocToSMLoc(FileLineColLoc loc);
+  SMLoc convertLocToSMLoc(FileLineColLoc loc);
 
   /// Given a location, returns the first nested location (including 'loc') that
   /// can be shown to the user.
diff --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h
index 77e858e484be7..20310201fd8e4 100644
--- a/mlir/include/mlir/IR/OpImplementation.h
+++ b/mlir/include/mlir/IR/OpImplementation.h
@@ -367,14 +367,14 @@ class AsmParser {
   MLIRContext *getContext() const;
 
   /// Return the location of the original name token.
-  virtual llvm::SMLoc getNameLoc() const = 0;
+  virtual SMLoc getNameLoc() const = 0;
 
   //===--------------------------------------------------------------------===//
   // Utilities
   //===--------------------------------------------------------------------===//
 
   /// Emit a diagnostic at the specified location and return failure.
-  virtual InFlightDiagnostic emitError(llvm::SMLoc loc,
+  virtual InFlightDiagnostic emitError(SMLoc loc,
                                        const Twine &message = {}) = 0;
 
   /// Return a builder which provides useful access to MLIRContext, global
@@ -383,8 +383,8 @@ class AsmParser {
 
   /// Get the location of the next token and store it into the argument.  This
   /// always succeeds.
-  virtual llvm::SMLoc getCurrentLocation() = 0;
-  ParseResult getCurrentLocation(llvm::SMLoc *loc) {
+  virtual SMLoc getCurrentLocation() = 0;
+  ParseResult getCurrentLocation(SMLoc *loc) {
     *loc = getCurrentLocation();
     return success();
   }
@@ -392,7 +392,7 @@ class AsmParser {
   /// Re-encode the given source location as an MLIR location and return it.
   /// Note: This method should only be used when a `Location` is necessary, as
   /// the encoding process is not efficient.
-  virtual Location getEncodedSourceLoc(llvm::SMLoc loc) = 0;
+  virtual Location getEncodedSourceLoc(SMLoc loc) = 0;
 
   //===--------------------------------------------------------------------===//
   // Token Parsing
@@ -627,7 +627,7 @@ class AsmParser {
   /// unlike `OpBuilder::getType`, this method does not implicitly insert a
   /// context parameter.
   template <typename T, typename... ParamsT>
-  T getChecked(llvm::SMLoc loc, ParamsT &&... params) {
+  T getChecked(SMLoc loc, ParamsT &&... params) {
     return T::getChecked([&] { return emitError(loc); },
                          std::forward<ParamsT>(params)...);
   }
@@ -656,7 +656,7 @@ class AsmParser {
   /// Parse an attribute of a specific kind and type.
   template <typename AttrType>
   ParseResult parseAttribute(AttrType &result, Type type = {}) {
-    llvm::SMLoc loc = getCurrentLocation();
+    SMLoc loc = getCurrentLocation();
 
     // Parse any kind of attribute.
     Attribute attr;
@@ -690,7 +690,7 @@ class AsmParser {
   template <typename AttrType>
   ParseResult parseAttribute(AttrType &result, Type type, StringRef attrName,
                              NamedAttrList &attrs) {
-    llvm::SMLoc loc = getCurrentLocation();
+    SMLoc loc = getCurrentLocation();
 
     // Parse any kind of attribute.
     Attribute attr;
@@ -721,7 +721,7 @@ class AsmParser {
   std::enable_if_t<detect_has_parse_method<AttrType>::value, ParseResult>
   parseCustomAttributeWithFallback(AttrType &result, Type type,
                                    StringRef attrName, NamedAttrList &attrs) {
-    llvm::SMLoc loc = getCurrentLocation();
+    SMLoc loc = getCurrentLocation();
 
     // Parse any kind of attribute.
     Attribute attr;
@@ -757,7 +757,7 @@ class AsmParser {
   template <typename AttrType>
   std::enable_if_t<detect_has_parse_method<AttrType>::value, ParseResult>
   parseCustomAttributeWithFallback(AttrType &result) {
-    llvm::SMLoc loc = getCurrentLocation();
+    SMLoc loc = getCurrentLocation();
 
     // Parse any kind of attribute.
     Attribute attr;
@@ -868,7 +868,7 @@ class AsmParser {
   /// Parse a type of a specific type.
   template <typename TypeT>
   ParseResult parseType(TypeT &result) {
-    llvm::SMLoc loc = getCurrentLocation();
+    SMLoc loc = getCurrentLocation();
 
     // Parse any kind of type.
     Type type;
@@ -897,7 +897,7 @@ class AsmParser {
   template <typename TypeT>
   std::enable_if_t<detect_type_has_parse_method<TypeT>::value, ParseResult>
   parseCustomTypeWithFallback(TypeT &result) {
-    llvm::SMLoc loc = getCurrentLocation();
+    SMLoc loc = getCurrentLocation();
 
     // Parse any kind of Type.
     Type type;
@@ -945,7 +945,7 @@ class AsmParser {
   /// Parse a colon followed by a type of a specific kind, e.g. a FunctionType.
   template <typename TypeType>
   ParseResult parseColonType(TypeType &result) {
-    llvm::SMLoc loc = getCurrentLocation();
+    SMLoc loc = getCurrentLocation();
 
     // Parse any kind of type.
     Type type;
@@ -1084,7 +1084,7 @@ class OpAsmParser : public AsmParser {
 
   /// This is the representation of an operand reference.
   struct OperandType {
-    llvm::SMLoc location; // Location of the token.
+    SMLoc location; // Location of the token.
     StringRef name;       // Value name, e.g. %42 or %abc
     unsigned number;      // Number, e.g. 12 for an operand like %xyz#12
   };
@@ -1152,7 +1152,7 @@ class OpAsmParser : public AsmParser {
   /// emitting an error and returning failure, or appending the results
   /// to the list on success.
   ParseResult resolveOperands(ArrayRef<OperandType> operands,
-                              ArrayRef<Type> types, llvm::SMLoc loc,
+                              ArrayRef<Type> types, SMLoc loc,
                               SmallVectorImpl<Value> &result) {
     if (operands.size() != types.size())
       return emitError(loc)
@@ -1165,14 +1165,14 @@ class OpAsmParser : public AsmParser {
     return success();
   }
   template <typename Operands>
-  ParseResult resolveOperands(Operands &&operands, Type type, llvm::SMLoc loc,
+  ParseResult resolveOperands(Operands &&operands, Type type, SMLoc loc,
                               SmallVectorImpl<Value> &result) {
     return resolveOperands(std::forward<Operands>(operands),
                            ArrayRef<Type>(type), loc, result);
   }
   template <typename Operands, typename Types>
   std::enable_if_t<!std::is_convertible<Types, Type>::value, ParseResult>
-  resolveOperands(Operands &&operands, Types &&types, llvm::SMLoc loc,
+  resolveOperands(Operands &&operands, Types &&types, SMLoc loc,
                   SmallVectorImpl<Value> &result) {
     size_t operandSize = std::distance(operands.begin(), operands.end());
     size_t typeSize = std::distance(types.begin(), types.end());
diff --git a/mlir/include/mlir/Parser/AsmParserState.h b/mlir/include/mlir/Parser/AsmParserState.h
index f5b27fa742fe1..6afa1c392d5ba 100644
--- a/mlir/include/mlir/Parser/AsmParserState.h
+++ b/mlir/include/mlir/Parser/AsmParserState.h
@@ -35,19 +35,19 @@ class AsmParserState {
   /// values, Blocks, and Symbols.
   struct SMDefinition {
     SMDefinition() = default;
-    SMDefinition(llvm::SMRange loc) : loc(loc) {}
+    SMDefinition(SMRange loc) : loc(loc) {}
 
     /// The source location of the definition.
-    llvm::SMRange loc;
+    SMRange loc;
     /// The source location of all uses of the definition.
-    SmallVector<llvm::SMRange> uses;
+    SmallVector<SMRange> uses;
   };
 
   /// This class represents the information for an operation definition within
   /// an input file.
   struct OperationDefinition {
     struct ResultGroupDefinition {
-      ResultGroupDefinition(unsigned index, llvm::SMRange loc)
+      ResultGroupDefinition(unsigned index, SMRange loc)
           : startIndex(index), definition(loc) {}
 
       /// The result number that starts this group.
@@ -56,31 +56,31 @@ class AsmParserState {
       SMDefinition definition;
     };
 
-    OperationDefinition(Operation *op, llvm::SMRange loc, llvm::SMLoc endLoc)
+    OperationDefinition(Operation *op, SMRange loc, SMLoc endLoc)
         : op(op), loc(loc), scopeLoc(loc.Start, endLoc) {}
 
     /// The operation representing this definition.
     Operation *op;
 
     /// The source location for the operation, i.e. the location of its name.
-    llvm::SMRange loc;
+    SMRange loc;
 
     /// The full source range of the operation definition, i.e. a range
     /// encompassing the start and end of the full operation definition.
-    llvm::SMRange scopeLoc;
+    SMRange scopeLoc;
 
     /// Source definitions for any result groups of this operation.
     SmallVector<ResultGroupDefinition> resultGroups;
 
     /// If this operation is a symbol operation, this vector contains symbol
     /// uses of this operation.
-    SmallVector<llvm::SMRange> symbolUses;
+    SmallVector<SMRange> symbolUses;
   };
 
   /// This class represents the information for a block definition within the
   /// input file.
   struct BlockDefinition {
-    BlockDefinition(Block *block, llvm::SMRange loc = {})
+    BlockDefinition(Block *block, SMRange loc = {})
         : block(block), definition(loc) {}
 
     /// The block representing this definition.
@@ -124,7 +124,7 @@ class AsmParserState {
 
   /// Returns (heuristically) the range of an identifier given a SMLoc
   /// corresponding to the start of an identifier location.
-  static llvm::SMRange convertIdLocToRange(llvm::SMLoc loc);
+  static SMRange convertIdLocToRange(SMLoc loc);
 
   //===--------------------------------------------------------------------===//
   // Populate State
@@ -142,8 +142,8 @@ class AsmParserState {
 
   /// Finalize the most recently started operation definition.
   void finalizeOperationDefinition(
-      Operation *op, llvm::SMRange nameLoc, llvm::SMLoc endLoc,
-      ArrayRef<std::pair<unsigned, llvm::SMLoc>> resultGroups = llvm::None);
+      Operation *op, SMRange nameLoc, SMLoc endLoc,
+      ArrayRef<std::pair<unsigned, SMLoc>> resultGroups = llvm::None);
 
   /// Start a definition for a region nested under the current operation.
   void startRegionDefinition();
@@ -152,18 +152,18 @@ class AsmParserState {
   void finalizeRegionDefinition();
 
   /// Add a definition of the given entity.
-  void addDefinition(Block *block, llvm::SMLoc location);
-  void addDefinition(BlockArgument blockArg, llvm::SMLoc location);
+  void addDefinition(Block *block, SMLoc location);
+  void addDefinition(BlockArgument blockArg, SMLoc location);
 
   /// Add a source uses of the given value.
-  void addUses(Value value, ArrayRef<llvm::SMLoc> locations);
-  void addUses(Block *block, ArrayRef<llvm::SMLoc> locations);
+  void addUses(Value value, ArrayRef<SMLoc> locations);
+  void addUses(Block *block, ArrayRef<SMLoc> locations);
 
   /// Add source uses for all the references nested under `refAttr`. The
   /// provided `locations` should match 1-1 with the number of references in
   /// `refAttr`, i.e.:
   ///   nestedReferences.size() + /*leafReference=*/1 == refLocations.size()
-  void addUses(SymbolRefAttr refAttr, ArrayRef<llvm::SMRange> refLocations);
+  void addUses(SymbolRefAttr refAttr, ArrayRef<SMRange> refLocations);
 
   /// Refine the `oldValue` to the `newValue`. This is used to indicate that
   /// `oldValue` was a placeholder, and the uses of it should really refer to
diff --git a/mlir/include/mlir/Support/LLVM.h b/mlir/include/mlir/Support/LLVM.h
index eda75cccc9cc9..60eb7d0f9d2af 100644
--- a/mlir/include/mlir/Support/LLVM.h
+++ b/mlir/include/mlir/Support/LLVM.h
@@ -71,6 +71,8 @@ class APFloat;
 template <typename Fn> class function_ref;
 template <typename IteratorT> class iterator_range;
 class raw_ostream;
+class SMLoc;
+class SMRange;
 } // namespace llvm
 
 namespace mlir {
@@ -127,6 +129,8 @@ using llvm::APSInt;
 template <typename Fn> using function_ref = llvm::function_ref<Fn>;
 using llvm::iterator_range;
 using llvm::raw_ostream;
+using llvm::SMLoc;
+using llvm::SMRange;
 } // namespace mlir
 
 #endif // MLIR_SUPPORT_LLVM_H
diff --git a/mlir/include/mlir/TableGen/AttrOrTypeDef.h b/mlir/include/mlir/TableGen/AttrOrTypeDef.h
index 9f2375bc30207..7be684d4c5343 100644
--- a/mlir/include/mlir/TableGen/AttrOrTypeDef.h
+++ b/mlir/include/mlir/TableGen/AttrOrTypeDef.h
@@ -196,7 +196,7 @@ class AttrOrTypeDef {
   Optional<StringRef> getExtraDecls() const;
 
   // Get the code location (for error printing).
-  ArrayRef<llvm::SMLoc> getLoc() const;
+  ArrayRef<SMLoc> getLoc() const;
 
   // Returns true if the default get/getChecked methods should be skipped during
   // generation.
diff --git a/mlir/include/mlir/TableGen/Builder.h b/mlir/include/mlir/TableGen/Builder.h
index a5e6d01c99571..a3257bcb85a15 100644
--- a/mlir/include/mlir/TableGen/Builder.h
+++ b/mlir/include/mlir/TableGen/Builder.h
@@ -62,7 +62,7 @@ class Builder {
   };
 
   /// Construct a builder from the given Record instance.
-  Builder(const llvm::Record *record, ArrayRef<llvm::SMLoc> loc);
+  Builder(const llvm::Record *record, ArrayRef<SMLoc> loc);
 
   /// Return a list of parameters used in this build method.
   ArrayRef<Parameter> getParameters() const { return parameters; }
diff --git a/mlir/include/mlir/TableGen/Operator.h b/mlir/include/mlir/TableGen/Operator.h
index 298e76b4239d4..6bac85075622e 100644
--- a/mlir/include/mlir/TableGen/Operator.h
+++ b/mlir/include/mlir/TableGen/Operator.h
@@ -222,7 +222,7 @@ class Operator {
   const_trait_iterator trait_end() const;
   llvm::iterator_range<const_trait_iterator> getTraits() const;
 
-  ArrayRef<llvm::SMLoc> getLoc() const;
+  ArrayRef<SMLoc> getLoc() const;
 
   // Query functions for the documentation of the operator.
   bool hasDescription() const;
diff --git a/mlir/include/mlir/TableGen/Pattern.h b/mlir/include/mlir/TableGen/Pattern.h
index f55b1afe0947e..656cfcb0f5c9c 100644
--- a/mlir/include/mlir/TableGen/Pattern.h
+++ b/mlir/include/mlir/TableGen/Pattern.h
@@ -245,7 +245,7 @@ class DagNode {
 // values in a suitable way.
 class SymbolInfoMap {
 public:
-  explicit SymbolInfoMap(ArrayRef<llvm::SMLoc> loc) : loc(loc) {}
+  explicit SymbolInfoMap(ArrayRef<SMLoc> loc) : loc(loc) {}
 
   // Class for information regarding a symbol.
   class SymbolInfo {
@@ -445,7 +445,7 @@ class SymbolInfoMap {
 
   // Pattern instantiation location. This is intended to be used as parameter
   // to PrintFatalError() to report errors.
-  ArrayRef<llvm::SMLoc> loc;
+  ArrayRef<SMLoc> loc;
 };
 
 // Wrapper class providing helper methods for accessing MLIR Pattern defined
diff --git a/mlir/include/mlir/TableGen/Predicate.h b/mlir/include/mlir/TableGen/Predicate.h
index 88f118a6391d4..2e47afde0b27d 100644
--- a/mlir/include/mlir/TableGen/Predicate.h
+++ b/mlir/include/mlir/TableGen/Predicate.h
@@ -54,7 +54,7 @@ class Pred {
   bool isCombined() const;
 
   // Get the location of the predicate.
-  ArrayRef<llvm::SMLoc> getLoc() const;
+  ArrayRef<SMLoc> getLoc() const;
 
   // Records are pointer-comparable.
   bool operator==(const Pred &other) const { return def == other.def; }
diff --git a/mlir/include/mlir/Tools/PDLL/AST/Diagnostic.h b/mlir/include/mlir/Tools/PDLL/AST/Diagnostic.h
index 273ed57dbbad5..628f7dc4b3cc9 100644
--- a/mlir/include/mlir/Tools/PDLL/AST/Diagnostic.h
+++ b/mlir/include/mlir/Tools/PDLL/AST/Diagnostic.h
@@ -37,14 +37,14 @@ class Diagnostic {
   StringRef getMessage() const { return message; }
 
   /// Return the location of this diagnostic.
-  llvm::SMRange getLocation() const { return location; }
+  SMRange getLocation() const { return location; }
 
   /// Return the notes of this diagnostic.
   auto getNotes() const { return llvm::make_pointee_range(notes); }
 
   /// Attach a note to this diagnostic.
   Diagnostic &attachNote(const Twine &msg,
-                         Optional<llvm::SMRange> noteLoc = llvm::None) {
+                         Optional<SMRange> noteLoc = llvm::None) {
     assert(getSeverity() != Severity::DK_Note &&
            "cannot attach a Note to a Note");
     notes.emplace_back(
@@ -57,7 +57,7 @@ class Diagnostic {
   operator LogicalResult() const { return failure(); }
 
 private:
-  Diagnostic(Severity severity, llvm::SMRange loc, const Twine &msg)
+  Diagnostic(Severity severity, SMRange loc, const Twine &msg)
       : severity(severity), message(msg.str()), location(loc) {}
 
   // Allow access to the constructor.
@@ -68,7 +68,7 @@ class Diagnostic {
   /// The message held by this diagnostic.
   std::string message;
   /// The raw location of this diagnostic.
-  llvm::SMRange location;
+  SMRange location;
   /// Any additional note diagnostics attached to this diagnostic.
   std::vector<std::unique_ptr<Diagnostic>> notes;
 };
@@ -142,11 +142,11 @@ class DiagnosticEngine {
   using HandlerFn = llvm::unique_function<void(Diagnostic &)>;
 
   /// Emit an error to the diagnostic engine.
-  InFlightDiagnostic emitError(llvm::SMRange loc, const Twine &msg) {
+  InFlightDiagnostic emitError(SMRange loc, const Twine &msg) {
     return InFlightDiagnostic(
         this, Diagnostic(Diagnostic::Severity::DK_Error, loc, msg));
   }
-  InFlightDiagnostic emitWarning(llvm::SMRange loc, const Twine &msg) {
+  InFlightDiagnostic emitWarning(SMRange loc, const Twine &msg) {
     return InFlightDiagnostic(
         this, Diagnostic(Diagnostic::Severity::DK_Warning, loc, msg));
   }
diff --git a/mlir/include/mlir/Tools/PDLL/AST/Nodes.h b/mlir/include/mlir/Tools/PDLL/AST/Nodes.h
index e928edb85ce47..d46b9004b03ad 100644
--- a/mlir/include/mlir/Tools/PDLL/AST/Nodes.h
+++ b/mlir/include/mlir/Tools/PDLL/AST/Nodes.h
@@ -35,25 +35,25 @@ class VariableDecl;
 /// contains a string name as well as the source location for that name.
 struct Name {
   static const Name &create(Context &ctx, StringRef name,
-                            llvm::SMRange location);
+                            SMRange location);
 
   /// Return the raw string name.
   StringRef getName() const { return name; }
 
   /// Get the location of this name.
-  llvm::SMRange getLoc() const { return location; }
+  SMRange getLoc() const { return location; }
 
 private:
   Name() = delete;
   Name(const Name &) = delete;
   Name &operator=(const Name &) = delete;
-  Name(StringRef name, llvm::SMRange location)
+  Name(StringRef name, SMRange location)
       : name(name), location(location) {}
 
   /// The string name of the decl.
   StringRef name;
   /// The location of the decl name.
-  llvm::SMRange location;
+  SMRange location;
 };
 
 //===----------------------------------------------------------------------===//
@@ -118,7 +118,7 @@ class Node {
 
   protected:
     template <typename... Args>
-    explicit NodeBase(llvm::SMRange loc, Args &&...args)
+    explicit NodeBase(SMRange loc, Args &&...args)
         : BaseT(TypeID::get<T>(), loc, std::forward<Args>(args)...) {}
   };
 
@@ -126,20 +126,20 @@ class Node {
   TypeID getTypeID() const { return typeID; }
 
   /// Return the location of this node.
-  llvm::SMRange getLoc() const { return loc; }
+  SMRange getLoc() const { return loc; }
 
   /// Print this node to the given stream.
   void print(raw_ostream &os) const;
 
 protected:
-  Node(TypeID typeID, llvm::SMRange loc) : typeID(typeID), loc(loc) {}
+  Node(TypeID typeID, SMRange loc) : typeID(typeID), loc(loc) {}
 
 private:
   /// A unique type identifier for this node.
   TypeID typeID;
 
   /// The location of this node.
-  llvm::SMRange loc;
+  SMRange loc;
 };
 
 //===----------------------------------------------------------------------===//
@@ -164,7 +164,7 @@ class Stmt : public Node {
 class CompoundStmt final : public Node::NodeBase<CompoundStmt, Stmt>,
                            private llvm::TrailingObjects<CompoundStmt, Stmt *> {
 public:
-  static CompoundStmt *create(Context &ctx, llvm::SMRange location,
+  static CompoundStmt *create(Context &ctx, SMRange location,
                               ArrayRef<Stmt *> children);
 
   /// Return the children of this compound statement.
@@ -178,7 +178,7 @@ class CompoundStmt final : public Node::NodeBase<CompoundStmt, Stmt>,
   ArrayRef<Stmt *>::iterator end() const { return getChildren().end(); }
 
 private:
-  CompoundStmt(llvm::SMRange location, unsigned numChildren)
+  CompoundStmt(SMRange location, unsigned numChildren)
       : Base(location), numChildren(numChildren) {}
 
   /// The number of held children statements.
@@ -196,14 +196,14 @@ class CompoundStmt final : public Node::NodeBase<CompoundStmt, Stmt>,
 /// to define variables.
 class LetStmt final : public Node::NodeBase<LetStmt, Stmt> {
 public:
-  static LetStmt *create(Context &ctx, llvm::SMRange loc,
+  static LetStmt *create(Context &ctx, SMRange loc,
                          VariableDecl *varDecl);
 
   /// Return the variable defined by this statement.
   VariableDecl *getVarDecl() const { return varDecl; }
 
 private:
-  LetStmt(llvm::SMRange loc, VariableDecl *varDecl)
+  LetStmt(SMRange loc, VariableDecl *varDecl)
       : Base(loc), varDecl(varDecl) {}
 
   /// The variable defined by this statement.
@@ -225,7 +225,7 @@ class OpRewriteStmt : public Stmt {
   Expr *getRootOpExpr() const { return rootOp; }
 
 protected:
-  OpRewriteStmt(TypeID typeID, llvm::SMRange loc, Expr *rootOp)
+  OpRewriteStmt(TypeID typeID, SMRange loc, Expr *rootOp)
       : Stmt(typeID, loc), rootOp(rootOp) {}
 
 protected:
@@ -241,10 +241,10 @@ class OpRewriteStmt : public Stmt {
 /// PatternRewriter::eraseOp API.
 class EraseStmt final : public Node::NodeBase<EraseStmt, OpRewriteStmt> {
 public:
-  static EraseStmt *create(Context &ctx, llvm::SMRange loc, Expr *rootOp);
+  static EraseStmt *create(Context &ctx, SMRange loc, Expr *rootOp);
 
 private:
-  EraseStmt(llvm::SMRange loc, Expr *rootOp) : Base(loc, rootOp) {}
+  EraseStmt(SMRange loc, Expr *rootOp) : Base(loc, rootOp) {}
 };
 
 //===----------------------------------------------------------------------===//
@@ -256,7 +256,7 @@ class EraseStmt final : public Node::NodeBase<EraseStmt, OpRewriteStmt> {
 class ReplaceStmt final : public Node::NodeBase<ReplaceStmt, OpRewriteStmt>,
                           private llvm::TrailingObjects<ReplaceStmt, Expr *> {
 public:
-  static ReplaceStmt *create(Context &ctx, llvm::SMRange loc, Expr *rootOp,
+  static ReplaceStmt *create(Context &ctx, SMRange loc, Expr *rootOp,
                              ArrayRef<Expr *> replExprs);
 
   /// Return the replacement values of this statement.
@@ -268,7 +268,7 @@ class ReplaceStmt final : public Node::NodeBase<ReplaceStmt, OpRewriteStmt>,
   }
 
 private:
-  ReplaceStmt(llvm::SMRange loc, Expr *rootOp, unsigned numReplExprs)
+  ReplaceStmt(SMRange loc, Expr *rootOp, unsigned numReplExprs)
       : Base(loc, rootOp), numReplExprs(numReplExprs) {}
 
   /// The number of replacement values within this statement.
@@ -286,14 +286,14 @@ class ReplaceStmt final : public Node::NodeBase<ReplaceStmt, OpRewriteStmt>,
 /// rewrites that span across multiple statements, which may be unconnected.
 class RewriteStmt final : public Node::NodeBase<RewriteStmt, OpRewriteStmt> {
 public:
-  static RewriteStmt *create(Context &ctx, llvm::SMRange loc, Expr *rootOp,
+  static RewriteStmt *create(Context &ctx, SMRange loc, Expr *rootOp,
                              CompoundStmt *rewriteBody);
 
   /// Return the compound rewrite body.
   CompoundStmt *getRewriteBody() const { return rewriteBody; }
 
 private:
-  RewriteStmt(llvm::SMRange loc, Expr *rootOp, CompoundStmt *rewriteBody)
+  RewriteStmt(SMRange loc, Expr *rootOp, CompoundStmt *rewriteBody)
       : Base(loc, rootOp), rewriteBody(rewriteBody) {}
 
   /// The body of nested rewriters within this statement.
@@ -314,7 +314,7 @@ class Expr : public Stmt {
   static bool classof(const Node *node);
 
 protected:
-  Expr(TypeID typeID, llvm::SMRange loc, Type type)
+  Expr(TypeID typeID, SMRange loc, Type type)
       : Stmt(typeID, loc), type(type) {}
 
 private:
@@ -330,7 +330,7 @@ class Expr : public Stmt {
 /// textual assembly format of that attribute.
 class AttributeExpr : public Node::NodeBase<AttributeExpr, Expr> {
 public:
-  static AttributeExpr *create(Context &ctx, llvm::SMRange loc,
+  static AttributeExpr *create(Context &ctx, SMRange loc,
                                StringRef value);
 
   /// Get the raw value of this expression. This is the textual assembly format
@@ -338,7 +338,7 @@ class AttributeExpr : public Node::NodeBase<AttributeExpr, Expr> {
   StringRef getValue() const { return value; }
 
 private:
-  AttributeExpr(Context &ctx, llvm::SMRange loc, StringRef value)
+  AttributeExpr(Context &ctx, SMRange loc, StringRef value)
       : Base(loc, AttributeType::get(ctx)), value(value) {}
 
   /// The value referenced by this expression.
@@ -352,14 +352,14 @@ class AttributeExpr : public Node::NodeBase<AttributeExpr, Expr> {
 /// This expression represents a reference to a Decl node.
 class DeclRefExpr : public Node::NodeBase<DeclRefExpr, Expr> {
 public:
-  static DeclRefExpr *create(Context &ctx, llvm::SMRange loc, Decl *decl,
+  static DeclRefExpr *create(Context &ctx, SMRange loc, Decl *decl,
                              Type type);
 
   /// Get the decl referenced by this expression.
   Decl *getDecl() const { return decl; }
 
 private:
-  DeclRefExpr(llvm::SMRange loc, Decl *decl, Type type)
+  DeclRefExpr(SMRange loc, Decl *decl, Type type)
       : Base(loc, type), decl(decl) {}
 
   /// The decl referenced by this expression.
@@ -374,7 +374,7 @@ class DeclRefExpr : public Node::NodeBase<DeclRefExpr, Expr> {
 /// expression.
 class MemberAccessExpr : public Node::NodeBase<MemberAccessExpr, Expr> {
 public:
-  static MemberAccessExpr *create(Context &ctx, llvm::SMRange loc,
+  static MemberAccessExpr *create(Context &ctx, SMRange loc,
                                   const Expr *parentExpr, StringRef memberName,
                                   Type type);
 
@@ -385,7 +385,7 @@ class MemberAccessExpr : public Node::NodeBase<MemberAccessExpr, Expr> {
   StringRef getMemberName() const { return memberName; }
 
 private:
-  MemberAccessExpr(llvm::SMRange loc, const Expr *parentExpr,
+  MemberAccessExpr(SMRange loc, const Expr *parentExpr,
                    StringRef memberName, Type type)
       : Base(loc, type), parentExpr(parentExpr), memberName(memberName) {}
 
@@ -406,7 +406,7 @@ class AllResultsMemberAccessExpr : public MemberAccessExpr {
   /// Return the member name used for the "all-results" access.
   static StringRef getMemberName() { return "$results"; }
 
-  static AllResultsMemberAccessExpr *create(Context &ctx, llvm::SMRange loc,
+  static AllResultsMemberAccessExpr *create(Context &ctx, SMRange loc,
                                             const Expr *parentExpr, Type type) {
     return cast<AllResultsMemberAccessExpr>(
         MemberAccessExpr::create(ctx, loc, parentExpr, getMemberName(), type));
@@ -431,7 +431,7 @@ class OperationExpr final
       private llvm::TrailingObjects<OperationExpr, Expr *,
                                     NamedAttributeDecl *> {
 public:
-  static OperationExpr *create(Context &ctx, llvm::SMRange loc,
+  static OperationExpr *create(Context &ctx, SMRange loc,
                                const OpNameDecl *nameDecl,
                                ArrayRef<Expr *> operands,
                                ArrayRef<Expr *> resultTypes,
@@ -445,7 +445,7 @@ class OperationExpr final
 
   /// Return the location of the name of the operation expression, or an invalid
   /// location if there isn't a name.
-  llvm::SMRange getNameLoc() const { return nameLoc; }
+  SMRange getNameLoc() const { return nameLoc; }
 
   /// Return the operands of this operation.
   MutableArrayRef<Expr *> getOperands() {
@@ -472,9 +472,9 @@ class OperationExpr final
   }
 
 private:
-  OperationExpr(llvm::SMRange loc, Type type, const OpNameDecl *nameDecl,
+  OperationExpr(SMRange loc, Type type, const OpNameDecl *nameDecl,
                 unsigned numOperands, unsigned numResultTypes,
-                unsigned numAttributes, llvm::SMRange nameLoc)
+                unsigned numAttributes, SMRange nameLoc)
       : Base(loc, type), nameDecl(nameDecl), numOperands(numOperands),
         numResultTypes(numResultTypes), numAttributes(numAttributes),
         nameLoc(nameLoc) {}
@@ -486,7 +486,7 @@ class OperationExpr final
   unsigned numOperands, numResultTypes, numAttributes;
 
   /// The location of the operation name in the expression if it has a name.
-  llvm::SMRange nameLoc;
+  SMRange nameLoc;
 
   /// TrailingObject utilities.
   friend llvm::TrailingObjects<OperationExpr, Expr *, NamedAttributeDecl *>;
@@ -503,7 +503,7 @@ class OperationExpr final
 class TupleExpr final : public Node::NodeBase<TupleExpr, Expr>,
                         private llvm::TrailingObjects<TupleExpr, Expr *> {
 public:
-  static TupleExpr *create(Context &ctx, llvm::SMRange loc,
+  static TupleExpr *create(Context &ctx, SMRange loc,
                            ArrayRef<Expr *> elements,
                            ArrayRef<StringRef> elementNames);
 
@@ -519,7 +519,7 @@ class TupleExpr final : public Node::NodeBase<TupleExpr, Expr>,
   TupleType getType() const { return Base::getType().cast<TupleType>(); }
 
 private:
-  TupleExpr(llvm::SMRange loc, TupleType type) : Base(loc, type) {}
+  TupleExpr(SMRange loc, TupleType type) : Base(loc, type) {}
 
   /// TrailingObject utilities.
   friend class llvm::TrailingObjects<TupleExpr, Expr *>;
@@ -533,14 +533,14 @@ class TupleExpr final : public Node::NodeBase<TupleExpr, Expr>,
 /// assembly format of that type.
 class TypeExpr : public Node::NodeBase<TypeExpr, Expr> {
 public:
-  static TypeExpr *create(Context &ctx, llvm::SMRange loc, StringRef value);
+  static TypeExpr *create(Context &ctx, SMRange loc, StringRef value);
 
   /// Get the raw value of this expression. This is the textual assembly format
   /// of the MLIR Type.
   StringRef getValue() const { return value; }
 
 private:
-  TypeExpr(Context &ctx, llvm::SMRange loc, StringRef value)
+  TypeExpr(Context &ctx, SMRange loc, StringRef value)
       : Base(loc, TypeType::get(ctx)), value(value) {}
 
   /// The value referenced by this expression.
@@ -561,7 +561,7 @@ class Decl : public Node {
   static bool classof(const Node *node);
 
 protected:
-  Decl(TypeID typeID, llvm::SMRange loc, const Name *name = nullptr)
+  Decl(TypeID typeID, SMRange loc, const Name *name = nullptr)
       : Node(typeID, loc), name(name) {}
 
 private:
@@ -582,20 +582,20 @@ class ConstraintDecl : public Decl {
   static bool classof(const Node *node);
 
 protected:
-  ConstraintDecl(TypeID typeID, llvm::SMRange loc, const Name *name = nullptr)
+  ConstraintDecl(TypeID typeID, SMRange loc, const Name *name = nullptr)
       : Decl(typeID, loc, name) {}
 };
 
 /// This class represents a reference to a constraint, and contains a constraint
 /// and the location of the reference.
 struct ConstraintRef {
-  ConstraintRef(const ConstraintDecl *constraint, llvm::SMRange refLoc)
+  ConstraintRef(const ConstraintDecl *constraint, SMRange refLoc)
       : constraint(constraint), referenceLoc(refLoc) {}
   explicit ConstraintRef(const ConstraintDecl *constraint)
       : ConstraintRef(constraint, constraint->getLoc()) {}
 
   const ConstraintDecl *constraint;
-  llvm::SMRange referenceLoc;
+  SMRange referenceLoc;
 };
 
 //===----------------------------------------------------------------------===//
@@ -611,7 +611,7 @@ class CoreConstraintDecl : public ConstraintDecl {
   static bool classof(const Node *node);
 
 protected:
-  CoreConstraintDecl(TypeID typeID, llvm::SMRange loc,
+  CoreConstraintDecl(TypeID typeID, SMRange loc,
                      const Name *name = nullptr)
       : ConstraintDecl(typeID, loc, name) {}
 };
@@ -624,7 +624,7 @@ class CoreConstraintDecl : public ConstraintDecl {
 class AttrConstraintDecl
     : public Node::NodeBase<AttrConstraintDecl, CoreConstraintDecl> {
 public:
-  static AttrConstraintDecl *create(Context &ctx, llvm::SMRange loc,
+  static AttrConstraintDecl *create(Context &ctx, SMRange loc,
                                     Expr *typeExpr = nullptr);
 
   /// Return the optional type the attribute is constrained to.
@@ -632,7 +632,7 @@ class AttrConstraintDecl
   const Expr *getTypeExpr() const { return typeExpr; }
 
 protected:
-  AttrConstraintDecl(llvm::SMRange loc, Expr *typeExpr)
+  AttrConstraintDecl(SMRange loc, Expr *typeExpr)
       : Base(loc), typeExpr(typeExpr) {}
 
   /// An optional type that the attribute is constrained to.
@@ -647,7 +647,7 @@ class AttrConstraintDecl
 class OpConstraintDecl
     : public Node::NodeBase<OpConstraintDecl, CoreConstraintDecl> {
 public:
-  static OpConstraintDecl *create(Context &ctx, llvm::SMRange loc,
+  static OpConstraintDecl *create(Context &ctx, SMRange loc,
                                   const OpNameDecl *nameDecl = nullptr);
 
   /// Return the name of the operation, or None if there isn't one.
@@ -657,7 +657,7 @@ class OpConstraintDecl
   const OpNameDecl *getNameDecl() const { return nameDecl; }
 
 protected:
-  explicit OpConstraintDecl(llvm::SMRange loc, const OpNameDecl *nameDecl)
+  explicit OpConstraintDecl(SMRange loc, const OpNameDecl *nameDecl)
       : Base(loc), nameDecl(nameDecl) {}
 
   /// The operation name of this constraint.
@@ -672,7 +672,7 @@ class OpConstraintDecl
 class TypeConstraintDecl
     : public Node::NodeBase<TypeConstraintDecl, CoreConstraintDecl> {
 public:
-  static TypeConstraintDecl *create(Context &ctx, llvm::SMRange loc);
+  static TypeConstraintDecl *create(Context &ctx, SMRange loc);
 
 protected:
   using Base::Base;
@@ -686,7 +686,7 @@ class TypeConstraintDecl
 class TypeRangeConstraintDecl
     : public Node::NodeBase<TypeRangeConstraintDecl, CoreConstraintDecl> {
 public:
-  static TypeRangeConstraintDecl *create(Context &ctx, llvm::SMRange loc);
+  static TypeRangeConstraintDecl *create(Context &ctx, SMRange loc);
 
 protected:
   using Base::Base;
@@ -700,7 +700,7 @@ class TypeRangeConstraintDecl
 class ValueConstraintDecl
     : public Node::NodeBase<ValueConstraintDecl, CoreConstraintDecl> {
 public:
-  static ValueConstraintDecl *create(Context &ctx, llvm::SMRange loc,
+  static ValueConstraintDecl *create(Context &ctx, SMRange loc,
                                      Expr *typeExpr);
 
   /// Return the optional type the value is constrained to.
@@ -708,7 +708,7 @@ class ValueConstraintDecl
   const Expr *getTypeExpr() const { return typeExpr; }
 
 protected:
-  ValueConstraintDecl(llvm::SMRange loc, Expr *typeExpr)
+  ValueConstraintDecl(SMRange loc, Expr *typeExpr)
       : Base(loc), typeExpr(typeExpr) {}
 
   /// An optional type that the value is constrained to.
@@ -723,7 +723,7 @@ class ValueConstraintDecl
 class ValueRangeConstraintDecl
     : public Node::NodeBase<ValueRangeConstraintDecl, CoreConstraintDecl> {
 public:
-  static ValueRangeConstraintDecl *create(Context &ctx, llvm::SMRange loc,
+  static ValueRangeConstraintDecl *create(Context &ctx, SMRange loc,
                                           Expr *typeExpr);
 
   /// Return the optional type the value range is constrained to.
@@ -731,7 +731,7 @@ class ValueRangeConstraintDecl
   const Expr *getTypeExpr() const { return typeExpr; }
 
 protected:
-  ValueRangeConstraintDecl(llvm::SMRange loc, Expr *typeExpr)
+  ValueRangeConstraintDecl(SMRange loc, Expr *typeExpr)
       : Base(loc), typeExpr(typeExpr) {}
 
   /// An optional type that the value range is constrained to.
@@ -771,7 +771,7 @@ class NamedAttributeDecl : public Node::NodeBase<NamedAttributeDecl, Decl> {
 class OpNameDecl : public Node::NodeBase<OpNameDecl, Decl> {
 public:
   static OpNameDecl *create(Context &ctx, const Name &name);
-  static OpNameDecl *create(Context &ctx, llvm::SMRange loc);
+  static OpNameDecl *create(Context &ctx, SMRange loc);
 
   /// Return the name of this operation, or none if the name is unknown.
   Optional<StringRef> getName() const {
@@ -781,7 +781,7 @@ class OpNameDecl : public Node::NodeBase<OpNameDecl, Decl> {
 
 private:
   explicit OpNameDecl(const Name &name) : Base(name.getLoc(), &name) {}
-  explicit OpNameDecl(llvm::SMRange loc) : Base(loc) {}
+  explicit OpNameDecl(SMRange loc) : Base(loc) {}
 };
 
 //===----------------------------------------------------------------------===//
@@ -791,7 +791,7 @@ class OpNameDecl : public Node::NodeBase<OpNameDecl, Decl> {
 /// This Decl represents a single Pattern.
 class PatternDecl : public Node::NodeBase<PatternDecl, Decl> {
 public:
-  static PatternDecl *create(Context &ctx, llvm::SMRange location,
+  static PatternDecl *create(Context &ctx, SMRange location,
                              const Name *name, Optional<uint16_t> benefit,
                              bool hasBoundedRecursion,
                              const CompoundStmt *body);
@@ -811,7 +811,7 @@ class PatternDecl : public Node::NodeBase<PatternDecl, Decl> {
   }
 
 private:
-  PatternDecl(llvm::SMRange loc, const Name *name, Optional<uint16_t> benefit,
+  PatternDecl(SMRange loc, const Name *name, Optional<uint16_t> benefit,
               bool hasBoundedRecursion, const CompoundStmt *body)
       : Base(loc, name), benefit(benefit),
         hasBoundedRecursion(hasBoundedRecursion), patternBody(body) {}
@@ -884,7 +884,7 @@ class VariableDecl final
 class Module final : public Node::NodeBase<Module, Node>,
                      private llvm::TrailingObjects<Module, Decl *> {
 public:
-  static Module *create(Context &ctx, llvm::SMLoc loc,
+  static Module *create(Context &ctx, SMLoc loc,
                         ArrayRef<Decl *> children);
 
   /// Return the children of this module.
@@ -896,8 +896,8 @@ class Module final : public Node::NodeBase<Module, Node>,
   }
 
 private:
-  Module(llvm::SMLoc loc, unsigned numChildren)
-      : Base(llvm::SMRange{loc, loc}), numChildren(numChildren) {}
+  Module(SMLoc loc, unsigned numChildren)
+      : Base(SMRange{loc, loc}), numChildren(numChildren) {}
 
   /// The number of decls held by this module.
   unsigned numChildren;
diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index 121386a09ad72..958c5322e8e00 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -1384,7 +1384,7 @@ static ParseResult parseBound(bool isLower, OperationState &result,
   }
 
   // Get the attribute location.
-  llvm::SMLoc attrLoc = p.getCurrentLocation();
+  SMLoc attrLoc = p.getCurrentLocation();
 
   Attribute boundAttr;
   if (p.parseAttribute(boundAttr, builder.getIndexType(), boundAttrName,
@@ -1458,7 +1458,7 @@ static ParseResult parseAffineForOp(OpAsmParser &parser,
         AffineForOp::getStepAttrName(),
         builder.getIntegerAttr(builder.getIndexType(), /*value=*/1));
   } else {
-    llvm::SMLoc stepLoc = parser.getCurrentLocation();
+    SMLoc stepLoc = parser.getCurrentLocation();
     IntegerAttr stepAttr;
     if (parser.parseAttribute(stepAttr, builder.getIndexType(),
                               AffineForOp::getStepAttrName().data(),
diff --git a/mlir/lib/Dialect/DLTI/DLTI.cpp b/mlir/lib/Dialect/DLTI/DLTI.cpp
index 1449a2c4691f0..cf1573ded67fd 100644
--- a/mlir/lib/Dialect/DLTI/DLTI.cpp
+++ b/mlir/lib/Dialect/DLTI/DLTI.cpp
@@ -71,7 +71,7 @@ DataLayoutEntryAttr DataLayoutEntryAttr::parse(AsmParser &parser) {
 
   Type type = nullptr;
   std::string identifier;
-  llvm::SMLoc idLoc = parser.getCurrentLocation();
+  SMLoc idLoc = parser.getCurrentLocation();
   OptionalParseResult parsedType = parser.parseOptionalType(type);
   if (parsedType.hasValue() && failed(parsedType.getValue()))
     return {};
diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp
index 3260443e81fed..ee49b8c77bbe6 100644
--- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp
+++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp
@@ -170,7 +170,7 @@ Attribute emitc::OpaqueAttr::parse(AsmParser &parser, Type type) {
   if (parser.parseLess())
     return Attribute();
   std::string value;
-  llvm::SMLoc loc = parser.getCurrentLocation();
+  SMLoc loc = parser.getCurrentLocation();
   if (parser.parseOptionalString(&value)) {
     parser.emitError(loc) << "expected string";
     return Attribute();
@@ -197,7 +197,7 @@ Type emitc::OpaqueType::parse(AsmParser &parser) {
   if (parser.parseLess())
     return Type();
   std::string value;
-  llvm::SMLoc loc = parser.getCurrentLocation();
+  SMLoc loc = parser.getCurrentLocation();
   if (parser.parseOptionalString(&value) || value.empty()) {
     parser.emitError(loc) << "expected non empty string";
     return Type();
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index b048491f5d1db..b61bc21ab20d7 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -141,7 +141,7 @@ Type GPUDialect::parseType(DialectAsmParser &parser) const {
     return AsyncTokenType::get(context);
 
   if (keyword == "mma_matrix") {
-    llvm::SMLoc beginLoc = parser.getNameLoc();
+    SMLoc beginLoc = parser.getNameLoc();
 
     // Parse '<'.
     if (parser.parseLess())
diff --git a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
index d9209b9012dd5..3409fce67d810 100644
--- a/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/SerializeToHsaco.cpp
@@ -357,7 +357,7 @@ SerializeToHsacoPass::assembleIsa(const std::string &isa) {
 
   llvm::SourceMgr srcMgr;
   srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa),
-                            llvm::SMLoc());
+                            SMLoc());
 
   const llvm::MCTargetOptions mcOptions;
   std::unique_ptr<llvm::MCRegisterInfo> mri(
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index 440fc835e7cbb..30eec4369dd19 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -115,7 +115,7 @@ static ParseResult parseCmpOp(OpAsmParser &parser, OperationState &result) {
   StringAttr predicateAttr;
   OpAsmParser::OperandType lhs, rhs;
   Type type;
-  llvm::SMLoc predicateLoc, trailingTypeLoc;
+  SMLoc predicateLoc, trailingTypeLoc;
   if (parser.getCurrentLocation(&predicateLoc) ||
       parser.parseAttribute(predicateAttr, "predicate", result.attributes) ||
       parser.parseOperand(lhs) || parser.parseComma() ||
@@ -194,7 +194,7 @@ static void printAllocaOp(OpAsmPrinter &p, AllocaOp &op) {
 static ParseResult parseAllocaOp(OpAsmParser &parser, OperationState &result) {
   OpAsmParser::OperandType arraySize;
   Type type, elemType;
-  llvm::SMLoc trailingTypeLoc;
+  SMLoc trailingTypeLoc;
   if (parser.parseOperand(arraySize) || parser.parseKeyword("x") ||
       parser.parseType(elemType) ||
       parser.parseOptionalAttrDict(result.attributes) || parser.parseColon() ||
@@ -642,7 +642,7 @@ static void printLoadOp(OpAsmPrinter &p, LoadOp &op) {
 // Extract the pointee type from the LLVM pointer type wrapped in MLIR.  Return
 // the resulting type wrapped in MLIR, or nullptr on error.
 static Type getLoadStoreElementType(OpAsmParser &parser, Type type,
-                                    llvm::SMLoc trailingTypeLoc) {
+                                    SMLoc trailingTypeLoc) {
   auto llvmTy = type.dyn_cast<LLVM::LLVMPointerType>();
   if (!llvmTy)
     return parser.emitError(trailingTypeLoc, "expected LLVM pointer type"),
@@ -654,7 +654,7 @@ static Type getLoadStoreElementType(OpAsmParser &parser, Type type,
 static ParseResult parseLoadOp(OpAsmParser &parser, OperationState &result) {
   OpAsmParser::OperandType addr;
   Type type;
-  llvm::SMLoc trailingTypeLoc;
+  SMLoc trailingTypeLoc;
 
   if (succeeded(parser.parseOptionalKeyword("volatile")))
     result.addAttribute(kVolatileAttrName, parser.getBuilder().getUnitAttr());
@@ -706,7 +706,7 @@ static void printStoreOp(OpAsmPrinter &p, StoreOp &op) {
 static ParseResult parseStoreOp(OpAsmParser &parser, OperationState &result) {
   OpAsmParser::OperandType addr, value;
   Type type;
-  llvm::SMLoc trailingTypeLoc;
+  SMLoc trailingTypeLoc;
 
   if (succeeded(parser.parseOptionalKeyword("volatile")))
     result.addAttribute(kVolatileAttrName, parser.getBuilder().getUnitAttr());
@@ -790,7 +790,7 @@ static ParseResult parseInvokeOp(OpAsmParser &parser, OperationState &result) {
   SmallVector<OpAsmParser::OperandType, 8> operands;
   FunctionType funcType;
   SymbolRefAttr funcAttr;
-  llvm::SMLoc trailingTypeLoc;
+  SMLoc trailingTypeLoc;
   Block *normalDest, *unwindDest;
   SmallVector<Value, 4> normalOperands, unwindOperands;
   Builder &builder = parser.getBuilder();
@@ -1081,7 +1081,7 @@ static ParseResult parseCallOp(OpAsmParser &parser, OperationState &result) {
   SmallVector<OpAsmParser::OperandType, 8> operands;
   Type type;
   SymbolRefAttr funcAttr;
-  llvm::SMLoc trailingTypeLoc;
+  SMLoc trailingTypeLoc;
 
   // Parse an operand list that will, in practice, contain 0 or 1 operand.  In
   // case of an indirect call, there will be 1 operand before `(`.  In case of a
@@ -1182,7 +1182,7 @@ static void printExtractElementOp(OpAsmPrinter &p, ExtractElementOp &op) {
 //                 attribute-dict? `:` type
 static ParseResult parseExtractElementOp(OpAsmParser &parser,
                                          OperationState &result) {
-  llvm::SMLoc loc;
+  SMLoc loc;
   OpAsmParser::OperandType vector, position;
   Type type, positionType;
   if (parser.getCurrentLocation(&loc) || parser.parseOperand(vector) ||
@@ -1231,8 +1231,8 @@ static void printExtractValueOp(OpAsmPrinter &p, ExtractValueOp &op) {
 static Type getInsertExtractValueElementType(OpAsmParser &parser,
                                              Type containerType,
                                              ArrayAttr positionAttr,
-                                             llvm::SMLoc attributeLoc,
-                                             llvm::SMLoc typeLoc) {
+                                             SMLoc attributeLoc,
+                                             SMLoc typeLoc) {
   Type llvmType = containerType;
   if (!isCompatibleType(containerType))
     return parser.emitError(typeLoc, "expected LLVM IR Dialect type"), nullptr;
@@ -1322,7 +1322,7 @@ static ParseResult parseExtractValueOp(OpAsmParser &parser,
   OpAsmParser::OperandType container;
   Type containerType;
   ArrayAttr positionAttr;
-  llvm::SMLoc attributeLoc, trailingTypeLoc;
+  SMLoc attributeLoc, trailingTypeLoc;
 
   if (parser.parseOperand(container) ||
       parser.getCurrentLocation(&attributeLoc) ||
@@ -1396,7 +1396,7 @@ static void printInsertElementOp(OpAsmPrinter &p, InsertElementOp &op) {
 //                 attribute-dict? `:` type
 static ParseResult parseInsertElementOp(OpAsmParser &parser,
                                         OperationState &result) {
-  llvm::SMLoc loc;
+  SMLoc loc;
   OpAsmParser::OperandType vector, value, position;
   Type vectorType, positionType;
   if (parser.getCurrentLocation(&loc) || parser.parseOperand(value) ||
@@ -1449,7 +1449,7 @@ static ParseResult parseInsertValueOp(OpAsmParser &parser,
   OpAsmParser::OperandType container, value;
   Type containerType;
   ArrayAttr positionAttr;
-  llvm::SMLoc attributeLoc, trailingTypeLoc;
+  SMLoc attributeLoc, trailingTypeLoc;
 
   if (parser.parseOperand(value) || parser.parseComma() ||
       parser.parseOperand(container) ||
@@ -1918,7 +1918,7 @@ static void printShuffleVectorOp(OpAsmPrinter &p, ShuffleVectorOp &op) {
 //                 attribute-dict? `:` type
 static ParseResult parseShuffleVectorOp(OpAsmParser &parser,
                                         OperationState &result) {
-  llvm::SMLoc loc;
+  SMLoc loc;
   OpAsmParser::OperandType v1, v2;
   ArrayAttr maskAttr;
   Type typeV1, typeV2;
@@ -1985,7 +1985,7 @@ void LLVMFuncOp::build(OpBuilder &builder, OperationState &result,
 // Returns a null type if any of the types provided are non-LLVM types, or if
 // there is more than one output type.
 static Type
-buildLLVMFunctionType(OpAsmParser &parser, llvm::SMLoc loc,
+buildLLVMFunctionType(OpAsmParser &parser, SMLoc loc,
                       ArrayRef<Type> inputs, ArrayRef<Type> outputs,
                       function_interface_impl::VariadicFlag variadicFlag) {
   Builder &b = parser.getBuilder();
@@ -2216,7 +2216,7 @@ OpFoldResult LLVM::ConstantOp::fold(ArrayRef<Attribute>) { return getValue(); }
 // state.
 static ParseResult parseAtomicBinOp(OpAsmParser &parser, OperationState &result,
                                     StringRef attrName) {
-  llvm::SMLoc loc;
+  SMLoc loc;
   StringRef keyword;
   if (parser.getCurrentLocation(&loc) || parser.parseKeyword(&keyword))
     return failure();
@@ -2243,7 +2243,7 @@ static ParseResult parseAtomicBinOp(OpAsmParser &parser, OperationState &result,
 static ParseResult parseAtomicOrdering(OpAsmParser &parser,
                                        OperationState &result,
                                        StringRef attrName) {
-  llvm::SMLoc loc;
+  SMLoc loc;
   StringRef ordering;
   if (parser.getCurrentLocation(&loc) || parser.parseKeyword(&ordering))
     return failure();
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp
index bca0a5800d919..1c91007897c09 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp
@@ -177,7 +177,7 @@ static ParseResult dispatchParse(AsmParser &parser, Type &type);
 /// Parses an LLVM dialect function type.
 ///   llvm-type :: = `func<` llvm-type `(` llvm-type-list `...`? `)>`
 static LLVMFunctionType parseFunctionType(AsmParser &parser) {
-  llvm::SMLoc loc = parser.getCurrentLocation();
+  SMLoc loc = parser.getCurrentLocation();
   Type returnType;
   if (parser.parseLess() || dispatchParse(parser, returnType) ||
       parser.parseLParen())
@@ -216,7 +216,7 @@ static LLVMFunctionType parseFunctionType(AsmParser &parser) {
 /// Parses an LLVM dialect pointer type.
 ///   llvm-type ::= `ptr<` llvm-type (`,` integer)? `>`
 static LLVMPointerType parsePointerType(AsmParser &parser) {
-  llvm::SMLoc loc = parser.getCurrentLocation();
+  SMLoc loc = parser.getCurrentLocation();
   Type elementType;
   if (parser.parseLess() || dispatchParse(parser, elementType))
     return LLVMPointerType();
@@ -235,9 +235,9 @@ static LLVMPointerType parsePointerType(AsmParser &parser) {
 /// Supports both fixed and scalable vectors.
 static Type parseVectorType(AsmParser &parser) {
   SmallVector<int64_t, 2> dims;
-  llvm::SMLoc dimPos, typePos;
+  SMLoc dimPos, typePos;
   Type elementType;
-  llvm::SMLoc loc = parser.getCurrentLocation();
+  SMLoc loc = parser.getCurrentLocation();
   if (parser.parseLess() || parser.getCurrentLocation(&dimPos) ||
       parser.parseDimensionList(dims, /*allowDynamic=*/true) ||
       parser.getCurrentLocation(&typePos) ||
@@ -271,9 +271,9 @@ static Type parseVectorType(AsmParser &parser) {
 ///   llvm-type ::= `array<` integer `x` llvm-type `>`
 static LLVMArrayType parseArrayType(AsmParser &parser) {
   SmallVector<int64_t, 1> dims;
-  llvm::SMLoc sizePos;
+  SMLoc sizePos;
   Type elementType;
-  llvm::SMLoc loc = parser.getCurrentLocation();
+  SMLoc loc = parser.getCurrentLocation();
   if (parser.parseLess() || parser.getCurrentLocation(&sizePos) ||
       parser.parseDimensionList(dims, /*allowDynamic=*/false) ||
       dispatchParse(parser, elementType) || parser.parseGreater())
@@ -292,7 +292,7 @@ static LLVMArrayType parseArrayType(AsmParser &parser) {
 static LLVMStructType trySetStructBody(LLVMStructType type,
                                        ArrayRef<Type> subtypes, bool isPacked,
                                        AsmParser &parser,
-                                       llvm::SMLoc subtypesLoc) {
+                                       SMLoc subtypesLoc) {
   for (Type t : subtypes) {
     if (!LLVMStructType::isValidElementType(t)) {
       parser.emitError(subtypesLoc)
@@ -352,7 +352,7 @@ static LLVMStructType parseStructType(AsmParser &parser) {
   }
 
   // Handle intentionally opaque structs.
-  llvm::SMLoc kwLoc = parser.getCurrentLocation();
+  SMLoc kwLoc = parser.getCurrentLocation();
   if (succeeded(parser.parseOptionalKeyword("opaque"))) {
     if (!isIdentified)
       return parser.emitError(kwLoc, "only identified structs can be opaque"),
@@ -388,7 +388,7 @@ static LLVMStructType parseStructType(AsmParser &parser) {
   // Parse subtypes. For identified structs, put the identifier of the struct on
   // the stack to support self-references in the recursive calls.
   SmallVector<Type, 4> subtypes;
-  llvm::SMLoc subtypesLoc = parser.getCurrentLocation();
+  SMLoc subtypesLoc = parser.getCurrentLocation();
   do {
     if (isIdentified)
       knownStructNames.insert(name);
@@ -417,7 +417,7 @@ static LLVMStructType parseStructType(AsmParser &parser) {
 /// prefix), and on failure fall back to parsing the short-hand version of the
 /// LLVM dialect types without the `!llvm` prefix.
 static Type dispatchParse(AsmParser &parser, bool allowAny = true) {
-  llvm::SMLoc keyLoc = parser.getCurrentLocation();
+  SMLoc keyLoc = parser.getCurrentLocation();
 
   // Try parsing any MLIR type.
   Type type;
@@ -464,7 +464,7 @@ static ParseResult dispatchParse(AsmParser &parser, Type &type) {
 
 /// Parses one of the LLVM dialect types.
 Type mlir::LLVM::detail::parseType(DialectAsmParser &parser) {
-  llvm::SMLoc loc = parser.getCurrentLocation();
+  SMLoc loc = parser.getCurrentLocation();
   Type type = dispatchParse(parser, /*allowAny=*/false);
   if (!type)
     return type;
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index b3067109b4365..d7c5c622b1a6d 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -1104,7 +1104,7 @@ static void print(OpAsmPrinter &p, linalg::YieldOp op) {
 static ParseResult parseYieldOp(OpAsmParser &parser, OperationState &result) {
   SmallVector<OpAsmParser::OperandType, 2> opInfo;
   SmallVector<Type, 2> types;
-  llvm::SMLoc loc = parser.getCurrentLocation();
+  SMLoc loc = parser.getCurrentLocation();
   return failure(parser.parseOperandList(opInfo) ||
                  parser.parseOptionalAttrDict(result.attributes) ||
                  (!opInfo.empty() && parser.parseColonTypeList(types)) ||
@@ -1318,7 +1318,7 @@ static ParseResult parseTiledLoopOp(OpAsmParser &parser,
   SmallVector<OpAsmParser::OperandType, 4> inputs, inputRegionArgs;
   SmallVector<Type, 4> inputTypes;
   if (succeeded(parser.parseOptionalKeyword("ins"))) {
-    llvm::SMLoc inputsOperandsLoc = parser.getCurrentLocation();
+    SMLoc inputsOperandsLoc = parser.getCurrentLocation();
 
     if (parser.parseAssignmentListWithTypes(inputRegionArgs, inputs,
                                             inputTypes))
@@ -1333,7 +1333,7 @@ static ParseResult parseTiledLoopOp(OpAsmParser &parser,
   SmallVector<OpAsmParser::OperandType, 4> outputs, outputRegionArgs;
   SmallVector<Type, 4> outputTypes;
   if (succeeded(parser.parseOptionalKeyword("outs"))) {
-    llvm::SMLoc outputsOperandsLoc = parser.getCurrentLocation();
+    SMLoc outputsOperandsLoc = parser.getCurrentLocation();
 
     if (parser.parseAssignmentListWithTypes(outputRegionArgs, outputs,
                                             outputTypes))
@@ -1943,7 +1943,7 @@ static ParseResult
 parseCommonStructuredOpParts(OpAsmParser &parser, OperationState &result,
                              SmallVectorImpl<Type> &inputTypes,
                              SmallVectorImpl<Type> &outputTypes) {
-  llvm::SMLoc inputsOperandsLoc, outputsOperandsLoc;
+  SMLoc inputsOperandsLoc, outputsOperandsLoc;
   SmallVector<OpAsmParser::OperandType, 4> inputsOperands, outputsOperands;
 
   parser.parseOptionalAttrDict(result.attributes);
diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
index 0c29607d4601f..b2ad93a871def 100644
--- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
+++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp
@@ -2296,7 +2296,7 @@ static ParseResult parseViewOp(OpAsmParser &parser, OperationState &result) {
   SmallVector<OpAsmParser::OperandType, 4> sizesInfo;
   auto indexType = parser.getBuilder().getIndexType();
   Type srcType, dstType;
-  llvm::SMLoc offsetLoc;
+  SMLoc offsetLoc;
   if (parser.parseOperand(srcInfo) || parser.getCurrentLocation(&offsetLoc) ||
       parser.parseOperandList(offsetInfo, OpAsmParser::Delimiter::Square))
     return failure();
diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
index ebbebf6371434..46a2bf3019e08 100644
--- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
+++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp
@@ -573,7 +573,7 @@ static ParseResult parseClauseAttr(AsmParser &parser, OperationState &state,
                                    StringRef attrName, StringRef name) {
   using ClauseT = decltype(std::declval<ClauseAttr>().getValue());
   StringRef enumStr;
-  llvm::SMLoc loc = parser.getCurrentLocation();
+  SMLoc loc = parser.getCurrentLocation();
   if (parser.parseLParen() || parser.parseKeyword(&enumStr) ||
       parser.parseRParen())
     return failure();
@@ -749,7 +749,7 @@ static ParseResult parseClauses(OpAsmParser &parser, OperationState &result,
       clauseSegments[pos[allocateClause] + 1] = allocators.size();
     } else if (clauseKeyword == "default") {
       StringRef defval;
-      llvm::SMLoc loc = parser.getCurrentLocation();
+      SMLoc loc = parser.getCurrentLocation();
       if (checkAllowed(defaultClause) || parser.parseLParen() ||
           parser.parseKeyword(&defval) || parser.parseRParen())
         return failure();
@@ -937,7 +937,7 @@ static ParseResult parseClauses(OpAsmParser &parser, OperationState &result,
                               "invalid schedule kind");
     }
     if (!modifiers.empty()) {
-      llvm::SMLoc loc = parser.getCurrentLocation();
+      SMLoc loc = parser.getCurrentLocation();
       if (Optional<ScheduleModifier> mod =
               symbolizeScheduleModifier(modifiers[0])) {
         result.addAttribute(
diff --git a/mlir/lib/Dialect/PDL/IR/PDLTypes.cpp b/mlir/lib/Dialect/PDL/IR/PDLTypes.cpp
index c92251971d26e..13e19d4276ebe 100644
--- a/mlir/lib/Dialect/PDL/IR/PDLTypes.cpp
+++ b/mlir/lib/Dialect/PDL/IR/PDLTypes.cpp
@@ -69,7 +69,7 @@ Type RangeType::parse(AsmParser &parser) {
   if (parser.parseLess())
     return Type();
 
-  llvm::SMLoc elementLoc = parser.getCurrentLocation();
+  SMLoc elementLoc = parser.getCurrentLocation();
   Type elementType = parsePDLType(parser);
   if (!elementType || parser.parseGreater())
     return Type();
diff --git a/mlir/lib/Dialect/Quant/IR/TypeParser.cpp b/mlir/lib/Dialect/Quant/IR/TypeParser.cpp
index 72d679054308c..818d328a9b65e 100644
--- a/mlir/lib/Dialect/Quant/IR/TypeParser.cpp
+++ b/mlir/lib/Dialect/Quant/IR/TypeParser.cpp
@@ -78,7 +78,7 @@ static ParseResult parseStorageRange(DialectAsmParser &parser,
   }
 
   // Explicit storage min and storage max.
-  llvm::SMLoc minLoc = parser.getCurrentLocation(), maxLoc;
+  SMLoc minLoc = parser.getCurrentLocation(), maxLoc;
   if (parser.parseInteger(storageTypeMin) || parser.parseColon() ||
       parser.getCurrentLocation(&maxLoc) ||
       parser.parseInteger(storageTypeMax) || parser.parseGreater())
@@ -252,7 +252,7 @@ static Type parseUniformType(DialectAsmParser &parser) {
   }
 
   // Parse scales/zeroPoints.
-  llvm::SMLoc scaleZPLoc = parser.getCurrentLocation();
+  SMLoc scaleZPLoc = parser.getCurrentLocation();
   do {
     scales.resize(scales.size() + 1);
     zeroPoints.resize(zeroPoints.size() + 1);
diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp
index 807b0b6df78d8..edae94cc9e748 100644
--- a/mlir/lib/Dialect/SCF/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/SCF.cpp
@@ -2189,7 +2189,7 @@ static ParseResult parseWhileOp(OpAsmParser &parser, OperationState &result) {
     return failure();
 
   FunctionType functionType;
-  llvm::SMLoc typeLoc = parser.getCurrentLocation();
+  SMLoc typeLoc = parser.getCurrentLocation();
   if (failed(parser.parseColonType(functionType)))
     return failure();
 
diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp
index e48ded448717e..96d919971e17d 100644
--- a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp
+++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp
@@ -155,7 +155,7 @@ Optional<unsigned> parseAndVerify<unsigned>(SPIRVDialect const &dialect,
 static Type parseAndVerifyType(SPIRVDialect const &dialect,
                                DialectAsmParser &parser) {
   Type type;
-  llvm::SMLoc typeLoc = parser.getCurrentLocation();
+  SMLoc typeLoc = parser.getCurrentLocation();
   if (parser.parseType(type))
     return Type();
 
@@ -199,7 +199,7 @@ static Type parseAndVerifyType(SPIRVDialect const &dialect,
 static Type parseAndVerifyMatrixType(SPIRVDialect const &dialect,
                                      DialectAsmParser &parser) {
   Type type;
-  llvm::SMLoc typeLoc = parser.getCurrentLocation();
+  SMLoc typeLoc = parser.getCurrentLocation();
   if (parser.parseType(type))
     return Type();
 
@@ -235,7 +235,7 @@ static Type parseAndVerifyMatrixType(SPIRVDialect const &dialect,
 static Type parseAndVerifySampledImageType(SPIRVDialect const &dialect,
                                            DialectAsmParser &parser) {
   Type type;
-  llvm::SMLoc typeLoc = parser.getCurrentLocation();
+  SMLoc typeLoc = parser.getCurrentLocation();
   if (parser.parseType(type))
     return Type();
 
@@ -263,7 +263,7 @@ static LogicalResult parseOptionalArrayStride(const SPIRVDialect &dialect,
   if (parser.parseKeyword("stride") || parser.parseEqual())
     return failure();
 
-  llvm::SMLoc strideLoc = parser.getCurrentLocation();
+  SMLoc strideLoc = parser.getCurrentLocation();
   Optional<unsigned> optStride = parseAndVerify<unsigned>(dialect, parser);
   if (!optStride)
     return failure();
@@ -288,7 +288,7 @@ static Type parseArrayType(SPIRVDialect const &dialect,
     return Type();
 
   SmallVector<int64_t, 1> countDims;
-  llvm::SMLoc countLoc = parser.getCurrentLocation();
+  SMLoc countLoc = parser.getCurrentLocation();
   if (parser.parseDimensionList(countDims, /*allowDynamic=*/false))
     return Type();
   if (countDims.size() != 1) {
@@ -326,7 +326,7 @@ static Type parseCooperativeMatrixType(SPIRVDialect const &dialect,
     return Type();
 
   SmallVector<int64_t, 2> dims;
-  llvm::SMLoc countLoc = parser.getCurrentLocation();
+  SMLoc countLoc = parser.getCurrentLocation();
   if (parser.parseDimensionList(dims, /*allowDynamic=*/false))
     return Type();
 
@@ -367,7 +367,7 @@ static Type parsePointerType(SPIRVDialect const &dialect,
     return Type();
 
   StringRef storageClassSpec;
-  llvm::SMLoc storageClassLoc = parser.getCurrentLocation();
+  SMLoc storageClassLoc = parser.getCurrentLocation();
   if (parser.parseComma() || parser.parseKeyword(&storageClassSpec))
     return Type();
 
@@ -409,7 +409,7 @@ static Type parseMatrixType(SPIRVDialect const &dialect,
     return Type();
 
   SmallVector<int64_t, 1> countDims;
-  llvm::SMLoc countLoc = parser.getCurrentLocation();
+  SMLoc countLoc = parser.getCurrentLocation();
   if (parser.parseDimensionList(countDims, /*allowDynamic=*/false))
     return Type();
   if (countDims.size() != 1) {
@@ -442,7 +442,7 @@ template <typename ValTy>
 static Optional<ValTy> parseAndVerify(SPIRVDialect const &dialect,
                                       DialectAsmParser &parser) {
   StringRef enumSpec;
-  llvm::SMLoc enumLoc = parser.getCurrentLocation();
+  SMLoc enumLoc = parser.getCurrentLocation();
   if (parser.parseKeyword(&enumSpec)) {
     return llvm::None;
   }
@@ -568,7 +568,7 @@ static ParseResult parseStructMemberDecorations(
     SmallVectorImpl<StructType::MemberDecorationInfo> &memberDecorationInfo) {
 
   // Check if the first element is offset.
-  llvm::SMLoc offsetLoc = parser.getCurrentLocation();
+  SMLoc offsetLoc = parser.getCurrentLocation();
   StructType::OffsetInfo offset = 0;
   OptionalParseResult offsetParseResult = parser.parseOptionalInteger(offset);
   if (offsetParseResult.hasValue()) {
@@ -875,7 +875,7 @@ void SPIRVDialect::printType(Type type, DialectAsmPrinter &os) const {
 /// of the parsed keyword, and returns failure if any error occurs.
 static ParseResult parseKeywordList(
     DialectAsmParser &parser,
-    function_ref<LogicalResult(llvm::SMLoc, StringRef)> processKeyword) {
+    function_ref<LogicalResult(SMLoc, StringRef)> processKeyword) {
   if (parser.parseLSquare())
     return failure();
 
@@ -992,10 +992,10 @@ static Attribute parseVerCapExtAttr(DialectAsmParser &parser) {
   ArrayAttr capabilitiesAttr;
   {
     SmallVector<Attribute, 4> capabilities;
-    llvm::SMLoc errorloc;
+    SMLoc errorloc;
     StringRef errorKeyword;
 
-    auto processCapability = [&](llvm::SMLoc loc, StringRef capability) {
+    auto processCapability = [&](SMLoc loc, StringRef capability) {
       if (auto capSymbol = spirv::symbolizeCapability(capability)) {
         capabilities.push_back(
             builder.getI32IntegerAttr(static_cast<uint32_t>(*capSymbol)));
@@ -1015,10 +1015,10 @@ static Attribute parseVerCapExtAttr(DialectAsmParser &parser) {
   ArrayAttr extensionsAttr;
   {
     SmallVector<Attribute, 1> extensions;
-    llvm::SMLoc errorloc;
+    SMLoc errorloc;
     StringRef errorKeyword;
 
-    auto processExtension = [&](llvm::SMLoc loc, StringRef extension) {
+    auto processExtension = [&](SMLoc loc, StringRef extension) {
       if (spirv::symbolizeExtension(extension)) {
         extensions.push_back(builder.getStringAttr(extension));
         return success();
diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp
index cf764eaa71c47..ff81086d5f573 100644
--- a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp
+++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp
@@ -695,7 +695,7 @@ static Type getElementType(Type type, Attribute indices, Location loc) {
 }
 
 static Type getElementType(Type type, Attribute indices, OpAsmParser &parser,
-                           llvm::SMLoc loc) {
+                           SMLoc loc) {
   auto errorFn = [&](StringRef err) -> InFlightDiagnostic {
     return parser.emitError(loc, err);
   };
@@ -721,7 +721,7 @@ static ParseResult parseAtomicUpdateOp(OpAsmParser &parser,
   SmallVector<OpAsmParser::OperandType, 2> operandInfo;
   OpAsmParser::OperandType ptrInfo, valueInfo;
   Type type;
-  llvm::SMLoc loc;
+  SMLoc loc;
   if (parseEnumStrAttr(scope, parser, state, kMemoryScopeAttrName) ||
       parseEnumStrAttr(memoryScope, parser, state, kSemanticsAttrName) ||
       parser.parseOperandList(operandInfo, (hasValue ? 2 : 1)) ||
@@ -1508,7 +1508,7 @@ static ParseResult parseCompositeExtractOp(OpAsmParser &parser,
   OpAsmParser::OperandType compositeInfo;
   Attribute indicesAttr;
   Type compositeType;
-  llvm::SMLoc attrLocation;
+  SMLoc attrLocation;
 
   if (parser.parseOperand(compositeInfo) ||
       parser.getCurrentLocation(&attrLocation) ||
diff --git a/mlir/lib/Dialect/Vector/VectorOps.cpp b/mlir/lib/Dialect/Vector/VectorOps.cpp
index 2e22fc0495bf2..e1289d5e7fad7 100644
--- a/mlir/lib/Dialect/Vector/VectorOps.cpp
+++ b/mlir/lib/Dialect/Vector/VectorOps.cpp
@@ -900,7 +900,7 @@ static void print(OpAsmPrinter &p, vector::ExtractOp op) {
 }
 
 static ParseResult parseExtractOp(OpAsmParser &parser, OperationState &result) {
-  llvm::SMLoc attributeLoc, typeLoc;
+  SMLoc attributeLoc, typeLoc;
   NamedAttrList attrs;
   OpAsmParser::OperandType vector;
   Type type;
@@ -2695,7 +2695,7 @@ static void print(OpAsmPrinter &p, TransferReadOp op) {
 static ParseResult parseTransferReadOp(OpAsmParser &parser,
                                        OperationState &result) {
   auto &builder = parser.getBuilder();
-  llvm::SMLoc typesLoc;
+  SMLoc typesLoc;
   OpAsmParser::OperandType sourceInfo;
   SmallVector<OpAsmParser::OperandType, 8> indexInfo;
   OpAsmParser::OperandType paddingInfo;
@@ -3075,7 +3075,7 @@ void TransferWriteOp::build(OpBuilder &builder, OperationState &result,
 static ParseResult parseTransferWriteOp(OpAsmParser &parser,
                                         OperationState &result) {
   auto &builder = parser.getBuilder();
-  llvm::SMLoc typesLoc;
+  SMLoc typesLoc;
   OpAsmParser::OperandType vectorInfo, sourceInfo;
   SmallVector<OpAsmParser::OperandType, 8> indexInfo;
   SmallVector<Type, 2> types;
diff --git a/mlir/lib/ExecutionEngine/JitRunner.cpp b/mlir/lib/ExecutionEngine/JitRunner.cpp
index 37072981a481c..af23d41c1bf77 100644
--- a/mlir/lib/ExecutionEngine/JitRunner.cpp
+++ b/mlir/lib/ExecutionEngine/JitRunner.cpp
@@ -121,7 +121,7 @@ static OwningModuleRef parseMLIRInput(StringRef inputFilename,
   }
 
   llvm::SourceMgr sourceMgr;
-  sourceMgr.AddNewSourceBuffer(std::move(file), llvm::SMLoc());
+  sourceMgr.AddNewSourceBuffer(std::move(file), SMLoc());
   return OwningModuleRef(parseSourceFile(sourceMgr, context));
 }
 
diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp
index e4d58c66d9928..d9459ee4dd8e0 100644
--- a/mlir/lib/IR/Diagnostics.cpp
+++ b/mlir/lib/IR/Diagnostics.cpp
@@ -364,7 +364,7 @@ struct SourceMgrDiagnosticHandlerImpl {
     // Otherwise, try to load the source file.
     std::string ignored;
     unsigned id =
-        mgr.AddIncludeFile(std::string(filename), llvm::SMLoc(), ignored);
+        mgr.AddIncludeFile(std::string(filename), SMLoc(), ignored);
     filenameToBufId[filename] = id;
     return id;
   }
@@ -449,7 +449,7 @@ void SourceMgrDiagnosticHandler::emitDiagnostic(Location loc, Twine message,
     if (!loc.isa<UnknownLoc>())
       strOS << loc << ": ";
     strOS << message;
-    return mgr.PrintMessage(os, llvm::SMLoc(), getDiagKind(kind), strOS.str());
+    return mgr.PrintMessage(os, SMLoc(), getDiagKind(kind), strOS.str());
   }
 
   // Otherwise if we are displaying the source line, try to convert the file
@@ -562,15 +562,15 @@ Optional<Location> SourceMgrDiagnosticHandler::findLocToShow(Location loc) {
 
 /// Get a memory buffer for the given file, or the main file of the source
 /// manager if one doesn't exist. This always returns non-null.
-llvm::SMLoc SourceMgrDiagnosticHandler::convertLocToSMLoc(FileLineColLoc loc) {
+SMLoc SourceMgrDiagnosticHandler::convertLocToSMLoc(FileLineColLoc loc) {
   // The column and line may be zero to represent unknown column and/or unknown
   /// line/column information.
   if (loc.getLine() == 0 || loc.getColumn() == 0)
-    return llvm::SMLoc();
+    return SMLoc();
 
   unsigned bufferId = impl->getSourceMgrBufferIDForFile(mgr, loc.getFilename());
   if (!bufferId)
-    return llvm::SMLoc();
+    return SMLoc();
   return mgr.FindLocForLineAndColumn(bufferId, loc.getLine(), loc.getColumn());
 }
 
@@ -586,7 +586,7 @@ struct ExpectedDiag {
   DiagnosticSeverity kind;
   unsigned lineNo;
   StringRef substring;
-  llvm::SMLoc fileLoc;
+  SMLoc fileLoc;
   bool matched;
 };
 
@@ -669,7 +669,7 @@ SourceMgrDiagnosticVerifierHandlerImpl::computeExpectedDiags(
     }
 
     // Point to the start of expected-*.
-    auto expectedStart = llvm::SMLoc::getFromPointer(matches[0].data());
+    auto expectedStart = SMLoc::getFromPointer(matches[0].data());
 
     DiagnosticSeverity kind;
     if (matches[1] == "error")
@@ -755,8 +755,8 @@ LogicalResult SourceMgrDiagnosticVerifierHandler::verify() {
     for (auto &err : expectedDiagsPair.second) {
       if (err.matched)
         continue;
-      llvm::SMRange range(err.fileLoc,
-                          llvm::SMLoc::getFromPointer(err.fileLoc.getPointer() +
+      SMRange range(err.fileLoc,
+                          SMLoc::getFromPointer(err.fileLoc.getPointer() +
                                                       err.substring.size()));
       mgr.PrintMessage(os, err.fileLoc, llvm::SourceMgr::DK_Error,
                        "expected " + getDiagKindStr(err.kind) + " \"" +
diff --git a/mlir/lib/IR/FunctionImplementation.cpp b/mlir/lib/IR/FunctionImplementation.cpp
index dd3b40db781b2..665ec18066f91 100644
--- a/mlir/lib/IR/FunctionImplementation.cpp
+++ b/mlir/lib/IR/FunctionImplementation.cpp
@@ -25,7 +25,7 @@ ParseResult mlir::function_interface_impl::parseFunctionArgumentList(
   // types, or just be a type list.  It isn't ok to sometimes have SSA ID's and
   // sometimes not.
   auto parseArgument = [&]() -> ParseResult {
-    llvm::SMLoc loc = parser.getCurrentLocation();
+    SMLoc loc = parser.getCurrentLocation();
 
     // Parse argument name if present.
     OpAsmParser::OperandType argument;
@@ -80,7 +80,7 @@ ParseResult mlir::function_interface_impl::parseFunctionArgumentList(
       if (parseArgument())
         return failure();
 
-      llvm::SMLoc loc = parser.getCurrentLocation();
+      SMLoc loc = parser.getCurrentLocation();
       if (argTypes.size() == numTypedArguments &&
           succeeded(parser.parseOptionalComma()))
         return parser.emitError(
@@ -212,7 +212,7 @@ ParseResult mlir::function_interface_impl::parseFunctionOp(
     return failure();
 
   // Parse the function signature.
-  llvm::SMLoc signatureLocation = parser.getCurrentLocation();
+  SMLoc signatureLocation = parser.getCurrentLocation();
   bool isVariadic = false;
   if (parseFunctionSignature(parser, allowVariadic, entryArgs, argTypes,
                              argAttrs, argLocations, isVariadic, resultTypes,
@@ -231,7 +231,7 @@ ParseResult mlir::function_interface_impl::parseFunctionOp(
 
   // If function attributes are present, parse them.
   NamedAttrList parsedAttributes;
-  llvm::SMLoc attributeDictLocation = parser.getCurrentLocation();
+  SMLoc attributeDictLocation = parser.getCurrentLocation();
   if (parser.parseOptionalAttrDictWithKeyword(parsedAttributes))
     return failure();
 
@@ -256,7 +256,7 @@ ParseResult mlir::function_interface_impl::parseFunctionOp(
   // Parse the optional function body. The printer will not print the body if
   // its empty, so disallow parsing of empty body in the parser.
   auto *body = result.addRegion();
-  llvm::SMLoc loc = parser.getCurrentLocation();
+  SMLoc loc = parser.getCurrentLocation();
   OptionalParseResult parseResult = parser.parseOptionalRegion(
       *body, entryArgs, entryArgs.empty() ? ArrayRef<Type>() : argTypes,
       entryArgs.empty() ? ArrayRef<Location>() : argLocations,
diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp
index e06492292c1d2..1ca4d684475bd 100644
--- a/mlir/lib/IR/Operation.cpp
+++ b/mlir/lib/IR/Operation.cpp
@@ -1144,7 +1144,7 @@ ParseResult impl::parseOneResultSameOperandTypeOp(OpAsmParser &parser,
   Type type;
   // If the operand list is in-between parentheses, then we have a generic form.
   // (see the fallback in `printOneResultOp`).
-  llvm::SMLoc loc = parser.getCurrentLocation();
+  SMLoc loc = parser.getCurrentLocation();
   if (!parser.parseOptionalLParen()) {
     if (parser.parseOperandList(ops) || parser.parseRParen() ||
         parser.parseOptionalAttrDict(result.attributes) ||
diff --git a/mlir/lib/Parser/AffineParser.cpp b/mlir/lib/Parser/AffineParser.cpp
index 9bff88a7f1677..7d2dd692fc41d 100644
--- a/mlir/lib/Parser/AffineParser.cpp
+++ b/mlir/lib/Parser/AffineParser.cpp
@@ -17,7 +17,6 @@
 
 using namespace mlir;
 using namespace mlir::detail;
-using llvm::SMLoc;
 
 namespace {
 
@@ -81,13 +80,13 @@ class AffineParser : public Parser {
   AffineExpr parseSymbolSSAIdExpr();
 
   AffineExpr getAffineBinaryOpExpr(AffineHighPrecOp op, AffineExpr lhs,
-                                   AffineExpr rhs, llvm::SMLoc opLoc);
+                                   AffineExpr rhs, SMLoc opLoc);
   AffineExpr getAffineBinaryOpExpr(AffineLowPrecOp op, AffineExpr lhs,
                                    AffineExpr rhs);
   AffineExpr parseAffineOperandExpr(AffineExpr lhs);
   AffineExpr parseAffineLowPrecOpExpr(AffineExpr llhs, AffineLowPrecOp llhsOp);
   AffineExpr parseAffineHighPrecOpExpr(AffineExpr llhs, AffineHighPrecOp llhsOp,
-                                       llvm::SMLoc llhsOpLoc);
+                                       SMLoc llhsOpLoc);
   AffineExpr parseAffineConstraint(bool *isEq);
 
 private:
@@ -683,7 +682,7 @@ ParseResult Parser::parseAffineMapOrIntegerSetReference(AffineMap &map,
   return AffineParser(state).parseAffineMapOrIntegerSetInline(map, set);
 }
 ParseResult Parser::parseAffineMapReference(AffineMap &map) {
-  llvm::SMLoc curLoc = getToken().getLoc();
+  SMLoc curLoc = getToken().getLoc();
   IntegerSet set;
   if (parseAffineMapOrIntegerSetReference(map, set))
     return failure();
@@ -692,7 +691,7 @@ ParseResult Parser::parseAffineMapReference(AffineMap &map) {
   return success();
 }
 ParseResult Parser::parseIntegerSetReference(IntegerSet &set) {
-  llvm::SMLoc curLoc = getToken().getLoc();
+  SMLoc curLoc = getToken().getLoc();
   AffineMap map;
   if (parseAffineMapOrIntegerSetReference(map, set))
     return failure();
diff --git a/mlir/lib/Parser/AsmParserImpl.h b/mlir/lib/Parser/AsmParserImpl.h
index 2292060f35d44..81ed270395a08 100644
--- a/mlir/lib/Parser/AsmParserImpl.h
+++ b/mlir/lib/Parser/AsmParserImpl.h
@@ -25,12 +25,12 @@ namespace detail {
 template <typename BaseT>
 class AsmParserImpl : public BaseT {
 public:
-  AsmParserImpl(llvm::SMLoc nameLoc, Parser &parser)
+  AsmParserImpl(SMLoc nameLoc, Parser &parser)
       : nameLoc(nameLoc), parser(parser) {}
   ~AsmParserImpl() override = default;
 
   /// Return the location of the original name token.
-  llvm::SMLoc getNameLoc() const override { return nameLoc; }
+  SMLoc getNameLoc() const override { return nameLoc; }
 
   //===--------------------------------------------------------------------===//
   // Utilities
@@ -40,7 +40,7 @@ class AsmParserImpl : public BaseT {
   bool didEmitError() const { return emittedError; }
 
   /// Emit a diagnostic at the specified location and return failure.
-  InFlightDiagnostic emitError(llvm::SMLoc loc, const Twine &message) override {
+  InFlightDiagnostic emitError(SMLoc loc, const Twine &message) override {
     emittedError = true;
     return parser.emitError(loc, message);
   }
@@ -51,12 +51,12 @@ class AsmParserImpl : public BaseT {
 
   /// Get the location of the next token and store it into the argument.  This
   /// always succeeds.
-  llvm::SMLoc getCurrentLocation() override {
+  SMLoc getCurrentLocation() override {
     return parser.getToken().getLoc();
   }
 
   /// Re-encode the given source location as an MLIR location and return it.
-  Location getEncodedSourceLoc(llvm::SMLoc loc) override {
+  Location getEncodedSourceLoc(SMLoc loc) override {
     return parser.getEncodedSourceLocation(loc);
   }
 
@@ -291,7 +291,7 @@ class AsmParserImpl : public BaseT {
   ParseResult parseFloat(double &result) override {
     bool isNegative = parser.consumeIf(Token::minus);
     Token curTok = parser.getToken();
-    llvm::SMLoc loc = curTok.getLoc();
+    SMLoc loc = curTok.getLoc();
 
     // Check for a floating point value.
     if (curTok.is(Token::floatliteral)) {
@@ -491,7 +491,7 @@ class AsmParserImpl : public BaseT {
 
 protected:
   /// The source location of the dialect symbol.
-  llvm::SMLoc nameLoc;
+  SMLoc nameLoc;
 
   /// The main parser.
   Parser &parser;
diff --git a/mlir/lib/Parser/AsmParserState.cpp b/mlir/lib/Parser/AsmParserState.cpp
index c5c0c9cb9a2c7..2d95ddb4a72d3 100644
--- a/mlir/lib/Parser/AsmParserState.cpp
+++ b/mlir/lib/Parser/AsmParserState.cpp
@@ -19,7 +19,7 @@ using namespace mlir;
 struct AsmParserState::Impl {
   /// A map from a SymbolRefAttr to a range of uses.
   using SymbolUseMap =
-      DenseMap<Attribute, SmallVector<SmallVector<llvm::SMRange>, 0>>;
+      DenseMap<Attribute, SmallVector<SmallVector<SMRange>, 0>>;
 
   struct PartialOpDef {
     explicit PartialOpDef(const OperationName &opName) {
@@ -48,7 +48,7 @@ struct AsmParserState::Impl {
 
   /// A set of value definitions that are placeholders for forward references.
   /// This map should be empty if the parser finishes successfully.
-  DenseMap<Value, SmallVector<llvm::SMLoc>> placeholderValueUses;
+  DenseMap<Value, SmallVector<SMLoc>> placeholderValueUses;
 
   /// The symbol table operations within the IR.
   SmallVector<std::pair<Operation *, std::unique_ptr<SymbolUseMap>>>
@@ -75,7 +75,7 @@ void AsmParserState::Impl::resolveSymbolUses() {
               opAndUseMapIt.first, it.first.cast<SymbolRefAttr>(), symbolOps)))
         continue;
 
-      for (ArrayRef<llvm::SMRange> useRange : it.second) {
+      for (ArrayRef<SMRange> useRange : it.second) {
         for (const auto &symIt : llvm::zip(symbolOps, useRange)) {
           auto opIt = operationToIdx.find(std::get<0>(symIt));
           if (opIt != operationToIdx.end())
@@ -121,9 +121,9 @@ auto AsmParserState::getOpDef(Operation *op) const
                                           : &*impl->operations[it->second];
 }
 
-llvm::SMRange AsmParserState::convertIdLocToRange(llvm::SMLoc loc) {
+SMRange AsmParserState::convertIdLocToRange(SMLoc loc) {
   if (!loc.isValid())
-    return llvm::SMRange();
+    return SMRange();
 
   // Return if the given character is a valid identifier character.
   auto isIdentifierChar = [](char c) {
@@ -133,7 +133,7 @@ llvm::SMRange AsmParserState::convertIdLocToRange(llvm::SMLoc loc) {
   const char *curPtr = loc.getPointer();
   while (*curPtr && isIdentifierChar(*(++curPtr)))
     continue;
-  return llvm::SMRange(loc, llvm::SMLoc::getFromPointer(curPtr));
+  return SMRange(loc, SMLoc::getFromPointer(curPtr));
 }
 
 //===----------------------------------------------------------------------===//
@@ -166,8 +166,8 @@ void AsmParserState::startOperationDefinition(const OperationName &opName) {
 }
 
 void AsmParserState::finalizeOperationDefinition(
-    Operation *op, llvm::SMRange nameLoc, llvm::SMLoc endLoc,
-    ArrayRef<std::pair<unsigned, llvm::SMLoc>> resultGroups) {
+    Operation *op, SMRange nameLoc, SMLoc endLoc,
+    ArrayRef<std::pair<unsigned, SMLoc>> resultGroups) {
   assert(!impl->partialOperations.empty() &&
          "expected valid partial operation definition");
   Impl::PartialOpDef partialOpDef = impl->partialOperations.pop_back_val();
@@ -210,7 +210,7 @@ void AsmParserState::finalizeRegionDefinition() {
     impl->symbolUseScopes.pop_back();
 }
 
-void AsmParserState::addDefinition(Block *block, llvm::SMLoc location) {
+void AsmParserState::addDefinition(Block *block, SMLoc location) {
   auto it = impl->blocksToIdx.find(block);
   if (it == impl->blocksToIdx.end()) {
     impl->blocksToIdx.try_emplace(block, impl->blocks.size());
@@ -225,7 +225,7 @@ void AsmParserState::addDefinition(Block *block, llvm::SMLoc location) {
 }
 
 void AsmParserState::addDefinition(BlockArgument blockArg,
-                                   llvm::SMLoc location) {
+                                   SMLoc location) {
   auto it = impl->blocksToIdx.find(blockArg.getOwner());
   assert(it != impl->blocksToIdx.end() &&
          "expected owner block to have an entry");
@@ -237,7 +237,7 @@ void AsmParserState::addDefinition(BlockArgument blockArg,
   def.arguments[argIdx] = SMDefinition(convertIdLocToRange(location));
 }
 
-void AsmParserState::addUses(Value value, ArrayRef<llvm::SMLoc> locations) {
+void AsmParserState::addUses(Value value, ArrayRef<SMLoc> locations) {
   // Handle the case where the value is an operation result.
   if (OpResult result = value.dyn_cast<OpResult>()) {
     // Check to see if a definition for the parent operation has been recorded.
@@ -258,7 +258,7 @@ void AsmParserState::addUses(Value value, ArrayRef<llvm::SMLoc> locations) {
     OperationDefinition &def = *impl->operations[existingIt->second];
     for (auto &resultGroup : llvm::reverse(def.resultGroups)) {
       if (resultNo >= resultGroup.startIndex) {
-        for (llvm::SMLoc loc : locations)
+        for (SMLoc loc : locations)
           resultGroup.definition.uses.push_back(convertIdLocToRange(loc));
         return;
       }
@@ -273,11 +273,11 @@ void AsmParserState::addUses(Value value, ArrayRef<llvm::SMLoc> locations) {
          "expected valid block definition for block argument");
   BlockDefinition &blockDef = *impl->blocks[existingIt->second];
   SMDefinition &argDef = blockDef.arguments[arg.getArgNumber()];
-  for (llvm::SMLoc loc : locations)
+  for (SMLoc loc : locations)
     argDef.uses.emplace_back(convertIdLocToRange(loc));
 }
 
-void AsmParserState::addUses(Block *block, ArrayRef<llvm::SMLoc> locations) {
+void AsmParserState::addUses(Block *block, ArrayRef<SMLoc> locations) {
   auto it = impl->blocksToIdx.find(block);
   if (it == impl->blocksToIdx.end()) {
     it = impl->blocksToIdx.try_emplace(block, impl->blocks.size()).first;
@@ -285,12 +285,12 @@ void AsmParserState::addUses(Block *block, ArrayRef<llvm::SMLoc> locations) {
   }
 
   BlockDefinition &def = *impl->blocks[it->second];
-  for (llvm::SMLoc loc : locations)
+  for (SMLoc loc : locations)
     def.definition.uses.push_back(convertIdLocToRange(loc));
 }
 
 void AsmParserState::addUses(SymbolRefAttr refAttr,
-                             ArrayRef<llvm::SMRange> locations) {
+                             ArrayRef<SMRange> locations) {
   // Ignore this symbol if no scopes are active.
   if (impl->symbolUseScopes.empty())
     return;
diff --git a/mlir/lib/Parser/AttributeParser.cpp b/mlir/lib/Parser/AttributeParser.cpp
index 9807443c7066b..8e6e9d25f8237 100644
--- a/mlir/lib/Parser/AttributeParser.cpp
+++ b/mlir/lib/Parser/AttributeParser.cpp
@@ -154,7 +154,7 @@ Attribute Parser::parseAttribute(Type type) {
   case Token::at_identifier: {
     // When populating the parser state, this is a list of locations for all of
     // the nested references.
-    SmallVector<llvm::SMRange> referenceLocations;
+    SmallVector<SMRange> referenceLocations;
     if (state.asmState)
       referenceLocations.push_back(getToken().getLocRange());
 
@@ -372,7 +372,7 @@ static Optional<APInt> buildAttributeAPInt(Type type, bool isNegative,
 Attribute Parser::parseDecOrHexAttr(Type type, bool isNegative) {
   Token tok = getToken();
   StringRef spelling = tok.getSpelling();
-  llvm::SMLoc loc = tok.getLoc();
+  SMLoc loc = tok.getLoc();
 
   consumeToken(Token::integer);
   if (!type) {
@@ -439,24 +439,24 @@ class TensorLiteralParser {
 
   /// Build a dense attribute instance with the parsed elements and the given
   /// shaped type.
-  DenseElementsAttr getAttr(llvm::SMLoc loc, ShapedType type);
+  DenseElementsAttr getAttr(SMLoc loc, ShapedType type);
 
   ArrayRef<int64_t> getShape() const { return shape; }
 
 private:
   /// Get the parsed elements for an integer attribute.
-  ParseResult getIntAttrElements(llvm::SMLoc loc, Type eltTy,
+  ParseResult getIntAttrElements(SMLoc loc, Type eltTy,
                                  std::vector<APInt> &intValues);
 
   /// Get the parsed elements for a float attribute.
-  ParseResult getFloatAttrElements(llvm::SMLoc loc, FloatType eltTy,
+  ParseResult getFloatAttrElements(SMLoc loc, FloatType eltTy,
                                    std::vector<APFloat> &floatValues);
 
   /// Build a Dense String attribute for the given type.
-  DenseElementsAttr getStringAttr(llvm::SMLoc loc, ShapedType type, Type eltTy);
+  DenseElementsAttr getStringAttr(SMLoc loc, ShapedType type, Type eltTy);
 
   /// Build a Dense attribute with hex data for the given type.
-  DenseElementsAttr getHexAttr(llvm::SMLoc loc, ShapedType type);
+  DenseElementsAttr getHexAttr(SMLoc loc, ShapedType type);
 
   /// Parse a single element, returning failure if it isn't a valid element
   /// literal. For example:
@@ -505,7 +505,7 @@ ParseResult TensorLiteralParser::parse(bool allowHex) {
 
 /// Build a dense attribute instance with the parsed elements and the given
 /// shaped type.
-DenseElementsAttr TensorLiteralParser::getAttr(llvm::SMLoc loc,
+DenseElementsAttr TensorLiteralParser::getAttr(SMLoc loc,
                                                ShapedType type) {
   Type eltType = type.getElementType();
 
@@ -571,7 +571,7 @@ DenseElementsAttr TensorLiteralParser::getAttr(llvm::SMLoc loc,
 
 /// Build a Dense Integer attribute for the given type.
 ParseResult
-TensorLiteralParser::getIntAttrElements(llvm::SMLoc loc, Type eltTy,
+TensorLiteralParser::getIntAttrElements(SMLoc loc, Type eltTy,
                                         std::vector<APInt> &intValues) {
   intValues.reserve(storage.size());
   bool isUintType = eltTy.isUnsignedInteger();
@@ -615,7 +615,7 @@ TensorLiteralParser::getIntAttrElements(llvm::SMLoc loc, Type eltTy,
 
 /// Build a Dense Float attribute for the given type.
 ParseResult
-TensorLiteralParser::getFloatAttrElements(llvm::SMLoc loc, FloatType eltTy,
+TensorLiteralParser::getFloatAttrElements(SMLoc loc, FloatType eltTy,
                                           std::vector<APFloat> &floatValues) {
   floatValues.reserve(storage.size());
   for (const auto &signAndToken : storage) {
@@ -656,7 +656,7 @@ TensorLiteralParser::getFloatAttrElements(llvm::SMLoc loc, FloatType eltTy,
 }
 
 /// Build a Dense String attribute for the given type.
-DenseElementsAttr TensorLiteralParser::getStringAttr(llvm::SMLoc loc,
+DenseElementsAttr TensorLiteralParser::getStringAttr(SMLoc loc,
                                                      ShapedType type,
                                                      Type eltTy) {
   if (hexStorage.hasValue()) {
@@ -678,7 +678,7 @@ DenseElementsAttr TensorLiteralParser::getStringAttr(llvm::SMLoc loc,
 }
 
 /// Build a Dense attribute with hex data for the given type.
-DenseElementsAttr TensorLiteralParser::getHexAttr(llvm::SMLoc loc,
+DenseElementsAttr TensorLiteralParser::getHexAttr(SMLoc loc,
                                                   ShapedType type) {
   Type elementType = type.getElementType();
   if (!elementType.isIntOrIndexOrFloat() && !elementType.isa<ComplexType>()) {
@@ -833,7 +833,7 @@ Attribute Parser::parseDenseElementsAttr(Type attrType) {
 
 /// Parse an opaque elements attribute.
 Attribute Parser::parseOpaqueElementsAttr(Type attrType) {
-  llvm::SMLoc loc = getToken().getLoc();
+  SMLoc loc = getToken().getLoc();
   consumeToken(Token::kw_opaque);
   if (parseToken(Token::less, "expected '<' after 'opaque'"))
     return nullptr;
@@ -890,7 +890,7 @@ ShapedType Parser::parseElementsLiteralType(Type type) {
 
 /// Parse a sparse elements attribute.
 Attribute Parser::parseSparseElementsAttr(Type attrType) {
-  llvm::SMLoc loc = getToken().getLoc();
+  SMLoc loc = getToken().getLoc();
   consumeToken(Token::kw_sparse);
   if (parseToken(Token::less, "Expected '<' after 'sparse'"))
     return nullptr;
diff --git a/mlir/lib/Parser/DialectSymbolParser.cpp b/mlir/lib/Parser/DialectSymbolParser.cpp
index ded29d9d1f6b1..c72e720f4a7f5 100644
--- a/mlir/lib/Parser/DialectSymbolParser.cpp
+++ b/mlir/lib/Parser/DialectSymbolParser.cpp
@@ -20,7 +20,6 @@
 using namespace mlir;
 using namespace mlir::detail;
 using llvm::MemoryBuffer;
-using llvm::SMLoc;
 using llvm::SourceMgr;
 
 namespace {
@@ -157,7 +156,7 @@ static Symbol parseExtendedSymbol(Parser &p, Token::Kind identifierTok,
       return (p.emitError("expected string literal data in dialect symbol"),
               nullptr);
     symbolData = p.getToken().getStringValue();
-    loc = llvm::SMLoc::getFromPointer(p.getToken().getLoc().getPointer() + 1);
+    loc = SMLoc::getFromPointer(p.getToken().getLoc().getPointer() + 1);
     p.consumeToken(Token::string);
 
     // Consume the '>'.
@@ -169,7 +168,7 @@ static Symbol parseExtendedSymbol(Parser &p, Token::Kind identifierTok,
     auto dotHalves = identifier.split('.');
     dialectName = dotHalves.first;
     auto prettyName = dotHalves.second;
-    loc = llvm::SMLoc::getFromPointer(prettyName.data());
+    loc = SMLoc::getFromPointer(prettyName.data());
 
     // If the dialect's symbol is followed immediately by a <, then lex the body
     // of it into prettyName.
@@ -183,7 +182,7 @@ static Symbol parseExtendedSymbol(Parser &p, Token::Kind identifierTok,
   }
 
   // Record the name location of the type remapped to the top level buffer.
-  llvm::SMLoc locInTopLevelBuffer = p.remapLocationToTopLevelBuffer(loc);
+  SMLoc locInTopLevelBuffer = p.remapLocationToTopLevelBuffer(loc);
   p.getState().symbols.nestedParserLocs.push_back(locInTopLevelBuffer);
 
   // Call into the provided symbol construction function.
@@ -239,7 +238,7 @@ Attribute Parser::parseExtendedAttr(Type type) {
   Attribute attr = parseExtendedSymbol<Attribute>(
       *this, Token::hash_identifier, state.symbols.attributeAliasDefinitions,
       [&](StringRef dialectName, StringRef symbolData,
-          llvm::SMLoc loc) -> Attribute {
+          SMLoc loc) -> Attribute {
         // Parse an optional trailing colon type.
         Type attrType = type;
         if (consumeIf(Token::colon) && !(attrType = parseType()))
@@ -282,7 +281,7 @@ Type Parser::parseExtendedType() {
   return parseExtendedSymbol<Type>(
       *this, Token::exclamation_identifier, state.symbols.typeAliasDefinitions,
       [&](StringRef dialectName, StringRef symbolData,
-          llvm::SMLoc loc) -> Type {
+          SMLoc loc) -> Type {
         // If we found a registered dialect, then ask it to parse the type.
         auto *dialect = state.context->getOrLoadDialect(dialectName);
 
diff --git a/mlir/lib/Parser/Lexer.cpp b/mlir/lib/Parser/Lexer.cpp
index 5a882d08594fb..8d48a60948a85 100644
--- a/mlir/lib/Parser/Lexer.cpp
+++ b/mlir/lib/Parser/Lexer.cpp
@@ -17,9 +17,8 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/SourceMgr.h"
-using namespace mlir;
 
-using llvm::SMLoc;
+using namespace mlir;
 
 // Returns true if 'c' is an allowable punctuation character: [$._-]
 // Returns false otherwise.
@@ -36,7 +35,7 @@ Lexer::Lexer(const llvm::SourceMgr &sourceMgr, MLIRContext *context)
 
 /// Encode the specified source location information into an attribute for
 /// attachment to the IR.
-Location Lexer::getEncodedSourceLocation(llvm::SMLoc loc) {
+Location Lexer::getEncodedSourceLocation(SMLoc loc) {
   auto &sourceMgr = getSourceMgr();
   unsigned mainFileID = sourceMgr.getMainFileID();
 
diff --git a/mlir/lib/Parser/Lexer.h b/mlir/lib/Parser/Lexer.h
index 8f24d08dfbe73..1df6ff82d3415 100644
--- a/mlir/lib/Parser/Lexer.h
+++ b/mlir/lib/Parser/Lexer.h
@@ -30,7 +30,7 @@ class Lexer {
 
   /// Encode the specified source location information into a Location object
   /// for attachment to the IR or error reporting.
-  Location getEncodedSourceLocation(llvm::SMLoc loc);
+  Location getEncodedSourceLocation(SMLoc loc);
 
   /// Change the position of the lexer cursor.  The next token we lex will start
   /// at the designated point in the input.
diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp
index ad7abbb9368a7..54e4e762d5640 100644
--- a/mlir/lib/Parser/Parser.cpp
+++ b/mlir/lib/Parser/Parser.cpp
@@ -29,7 +29,6 @@
 using namespace mlir;
 using namespace mlir::detail;
 using llvm::MemoryBuffer;
-using llvm::SMLoc;
 using llvm::SourceMgr;
 
 //===----------------------------------------------------------------------===//
@@ -198,7 +197,7 @@ OptionalParseResult Parser::parseOptionalInteger(APInt &result) {
 ParseResult Parser::parseFloatFromIntegerLiteral(
     Optional<APFloat> &result, const Token &tok, bool isNegative,
     const llvm::fltSemantics &semantics, size_t typeSizeInBits) {
-  llvm::SMLoc loc = tok.getLoc();
+  SMLoc loc = tok.getLoc();
   StringRef spelling = tok.getSpelling();
   bool isHex = spelling.size() > 1 && spelling[1] == 'x';
   if (!isHex) {
@@ -373,7 +372,7 @@ class OperationParser : public Parser {
 
   /// Parse a region body into 'region'.
   ParseResult
-  parseRegionBody(Region &region, llvm::SMLoc startLoc,
+  parseRegionBody(Region &region, SMLoc startLoc,
                   ArrayRef<std::pair<SSAUseInfo, Type>> entryArguments,
                   ArrayRef<Location> argLocations, bool isIsolatedNameScope);
 
@@ -934,7 +933,7 @@ ParseResult OperationParser::parseOperation() {
     // Add this operation to the assembly state if it was provided to populate.
     if (state.asmState) {
       unsigned resultIt = 0;
-      SmallVector<std::pair<unsigned, llvm::SMLoc>> asmResultGroups;
+      SmallVector<std::pair<unsigned, SMLoc>> asmResultGroups;
       asmResultGroups.reserve(resultIDs.size());
       for (ResultRecord &record : resultIDs) {
         asmResultGroups.emplace_back(resultIt, std::get<2>(record));
@@ -1281,7 +1280,7 @@ class CustomOpAsmParser : public AsmParserImpl<OpAsmParser> {
   }
 
   /// Emit a diagnostic at the specified location and return failure.
-  InFlightDiagnostic emitError(llvm::SMLoc loc, const Twine &message) override {
+  InFlightDiagnostic emitError(SMLoc loc, const Twine &message) override {
     return AsmParserImpl<OpAsmParser>::emitError(loc, "custom op '" + opName +
                                                           "' " + message);
   }
@@ -1692,7 +1691,7 @@ FailureOr<OperationName> OperationParser::parseCustomOperationName() {
 
 Operation *
 OperationParser::parseCustomOperation(ArrayRef<ResultRecord> resultIDs) {
-  llvm::SMLoc opLoc = getToken().getLoc();
+  SMLoc opLoc = getToken().getLoc();
 
   FailureOr<OperationName> opNameInfo = parseCustomOperationName();
   if (failed(opNameInfo))
@@ -1849,7 +1848,7 @@ ParseResult OperationParser::parseRegion(
 }
 
 ParseResult OperationParser::parseRegionBody(
-    Region &region, llvm::SMLoc startLoc,
+    Region &region, SMLoc startLoc,
     ArrayRef<std::pair<OperationParser::SSAUseInfo, Type>> entryArguments,
     ArrayRef<Location> argLocations, bool isIsolatedNameScope) {
   assert(argLocations.empty() || argLocations.size() == entryArguments.size());
@@ -2252,7 +2251,7 @@ LogicalResult mlir::parseSourceFile(llvm::StringRef filename,
                      "could not open input file " + filename);
 
   // Load the MLIR source file.
-  sourceMgr.AddNewSourceBuffer(std::move(*fileOrErr), llvm::SMLoc());
+  sourceMgr.AddNewSourceBuffer(std::move(*fileOrErr), SMLoc());
   return parseSourceFile(sourceMgr, block, context, sourceFileLoc, asmState);
 }
 
diff --git a/mlir/lib/Parser/Parser.h b/mlir/lib/Parser/Parser.h
index 8bdf248ea8e46..fb138d94cf3b2 100644
--- a/mlir/lib/Parser/Parser.h
+++ b/mlir/lib/Parser/Parser.h
@@ -71,11 +71,11 @@ class Parser {
   InFlightDiagnostic emitError(const Twine &message = {}) {
     return emitError(state.curToken.getLoc(), message);
   }
-  InFlightDiagnostic emitError(llvm::SMLoc loc, const Twine &message = {});
+  InFlightDiagnostic emitError(SMLoc loc, const Twine &message = {});
 
   /// Encode the specified source location information into an attribute for
   /// attachment to the IR.
-  Location getEncodedSourceLocation(llvm::SMLoc loc) {
+  Location getEncodedSourceLocation(SMLoc loc) {
     // If there are no active nested parsers, we can get the encoded source
     // location directly.
     if (state.parserDepth == 0)
@@ -88,7 +88,7 @@ class Parser {
   /// Remaps the given SMLoc to the top level lexer of the parser. This is used
   /// to adjust locations of potentially nested parsers to ensure that they can
   /// be emitted properly as diagnostics.
-  llvm::SMLoc remapLocationToTopLevelBuffer(llvm::SMLoc loc) {
+  SMLoc remapLocationToTopLevelBuffer(SMLoc loc) {
     // If there are no active nested parsers, we can return location directly.
     SymbolState &symbols = state.symbols;
     if (state.parserDepth == 0)
@@ -101,7 +101,7 @@ class Parser {
     size_t offset = loc.getPointer() - state.lex.getBufferBegin();
     auto *rawLoc =
         symbols.nestedParserLocs[state.parserDepth - 1].getPointer() + offset;
-    return llvm::SMLoc::getFromPointer(rawLoc);
+    return SMLoc::getFromPointer(rawLoc);
   }
 
   //===--------------------------------------------------------------------===//
@@ -158,7 +158,7 @@ class Parser {
   /// unlike `OpBuilder::getType`, this method does not implicitly insert a
   /// context parameter.
   template <typename T, typename... ParamsT>
-  T getChecked(llvm::SMLoc loc, ParamsT &&...params) {
+  T getChecked(SMLoc loc, ParamsT &&...params) {
     return T::getChecked([&] { return emitError(loc); },
                          std::forward<ParamsT>(params)...);
   }
diff --git a/mlir/lib/Parser/ParserState.h b/mlir/lib/Parser/ParserState.h
index 5ab67548aaec7..33ad8f0090948 100644
--- a/mlir/lib/Parser/ParserState.h
+++ b/mlir/lib/Parser/ParserState.h
@@ -32,7 +32,7 @@ struct SymbolState {
   /// active nested parsers. Given that some nested parsers, i.e. custom dialect
   /// parsers, operate on a temporary memory buffer, this provides an anchor
   /// point for emitting diagnostics.
-  SmallVector<llvm::SMLoc, 1> nestedParserLocs;
+  SmallVector<SMLoc, 1> nestedParserLocs;
 
   /// The top-level lexer that contains the original memory buffer provided by
   /// the user. This is used by nested parsers to get a properly encoded source
diff --git a/mlir/lib/Parser/Token.cpp b/mlir/lib/Parser/Token.cpp
index 00bc7dbd6bd94..6cc167da5f955 100644
--- a/mlir/lib/Parser/Token.cpp
+++ b/mlir/lib/Parser/Token.cpp
@@ -12,9 +12,8 @@
 
 #include "Token.h"
 #include "llvm/ADT/StringExtras.h"
+
 using namespace mlir;
-using llvm::SMLoc;
-using llvm::SMRange;
 
 SMLoc Token::getLoc() const { return SMLoc::getFromPointer(spelling.data()); }
 
diff --git a/mlir/lib/Parser/Token.h b/mlir/lib/Parser/Token.h
index c086a0a194b92..be0924cb9d67a 100644
--- a/mlir/lib/Parser/Token.h
+++ b/mlir/lib/Parser/Token.h
@@ -102,9 +102,9 @@ class Token {
   std::string getSymbolReference() const;
 
   // Location processing.
-  llvm::SMLoc getLoc() const;
-  llvm::SMLoc getEndLoc() const;
-  llvm::SMRange getLocRange() const;
+  SMLoc getLoc() const;
+  SMLoc getEndLoc() const;
+  SMRange getLocRange() const;
 
   /// Given a punctuation or keyword token kind, return the spelling of the
   /// token as a string.  Warning: This will abort on markers, identifiers and
diff --git a/mlir/lib/Parser/TypeParser.cpp b/mlir/lib/Parser/TypeParser.cpp
index 5ff6e414b7da2..533ed6b4a5d49 100644
--- a/mlir/lib/Parser/TypeParser.cpp
+++ b/mlir/lib/Parser/TypeParser.cpp
@@ -116,7 +116,7 @@ Type Parser::parseComplexType() {
   if (parseToken(Token::less, "expected '<' in complex type"))
     return nullptr;
 
-  llvm::SMLoc elementTypeLoc = getToken().getLoc();
+  SMLoc elementTypeLoc = getToken().getLoc();
   auto elementType = parseType();
   if (!elementType ||
       parseToken(Token::greater, "expected '>' in complex type"))
@@ -190,7 +190,7 @@ ParseResult Parser::parseStridedLayout(int64_t &offset,
 ///   memory-space ::= integer-literal | attribute
 ///
 Type Parser::parseMemRefType() {
-  llvm::SMLoc loc = getToken().getLoc();
+  SMLoc loc = getToken().getLoc();
   consumeToken(Token::kw_memref);
 
   if (parseToken(Token::less, "expected '<' in memref type"))
diff --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp
index 58870a6ea93b7..5e23b9c6ffe8d 100644
--- a/mlir/lib/Pass/PassRegistry.cpp
+++ b/mlir/lib/Pass/PassRegistry.cpp
@@ -371,9 +371,9 @@ LogicalResult TextualPipeline::initialize(StringRef text,
   pipelineMgr.AddNewSourceBuffer(
       llvm::MemoryBuffer::getMemBuffer(text, "MLIR Textual PassPipeline Parser",
                                        /*RequiresNullTerminator=*/false),
-      llvm::SMLoc());
+      SMLoc());
   auto errorHandler = [&](const char *rawLoc, Twine msg) {
-    pipelineMgr.PrintMessage(errorStream, llvm::SMLoc::getFromPointer(rawLoc),
+    pipelineMgr.PrintMessage(errorStream, SMLoc::getFromPointer(rawLoc),
                              llvm::SourceMgr::DK_Error, msg);
     return failure();
   };
diff --git a/mlir/lib/Support/MlirOptMain.cpp b/mlir/lib/Support/MlirOptMain.cpp
index af4681f50ccbd..6fa9a6657adce 100644
--- a/mlir/lib/Support/MlirOptMain.cpp
+++ b/mlir/lib/Support/MlirOptMain.cpp
@@ -37,7 +37,6 @@
 
 using namespace mlir;
 using namespace llvm;
-using llvm::SMLoc;
 
 /// Perform the actions on the input file indicated by the command line flags
 /// within the specified context.
diff --git a/mlir/lib/Support/ToolUtilities.cpp b/mlir/lib/Support/ToolUtilities.cpp
index ca88fef96c0e2..7b52072a89767 100644
--- a/mlir/lib/Support/ToolUtilities.cpp
+++ b/mlir/lib/Support/ToolUtilities.cpp
@@ -37,7 +37,7 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
 
   // Add the original buffer to the source manager.
   llvm::SourceMgr fileSourceMgr;
-  fileSourceMgr.AddNewSourceBuffer(std::move(originalBuffer), llvm::SMLoc());
+  fileSourceMgr.AddNewSourceBuffer(std::move(originalBuffer), SMLoc());
 
   // Flag near misses by iterating over all the sub-buffers found when splitting
   // with the prefix of the splitMarker. Use a sliding window where we only add
@@ -60,7 +60,7 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
       prev = buffer.drop_front(checkLen);
     } else {
       // TODO: Consider making this a failure.
-      auto splitLoc = llvm::SMLoc::getFromPointer(buffer.data());
+      auto splitLoc = SMLoc::getFromPointer(buffer.data());
       fileSourceMgr.PrintMessage(llvm::errs(), splitLoc,
                                  llvm::SourceMgr::DK_Warning,
                                  "near miss with file split marker");
@@ -74,7 +74,7 @@ mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer> originalBuffer,
   // Process each chunk in turn.
   bool hadFailure = false;
   for (auto &subBuffer : sourceBuffers) {
-    auto splitLoc = llvm::SMLoc::getFromPointer(subBuffer.data());
+    auto splitLoc = SMLoc::getFromPointer(subBuffer.data());
     unsigned splitLine = fileSourceMgr.getLineAndColumn(splitLoc).first;
     auto subMemBuffer = llvm::MemoryBuffer::getMemBufferCopy(
         subBuffer, Twine("within split at ") +
diff --git a/mlir/lib/TableGen/AttrOrTypeDef.cpp b/mlir/lib/TableGen/AttrOrTypeDef.cpp
index e874c8648d41c..c3a14bb5f1ddf 100644
--- a/mlir/lib/TableGen/AttrOrTypeDef.cpp
+++ b/mlir/lib/TableGen/AttrOrTypeDef.cpp
@@ -147,7 +147,7 @@ Optional<StringRef> AttrOrTypeDef::getExtraDecls() const {
   return value.empty() ? Optional<StringRef>() : value;
 }
 
-ArrayRef<llvm::SMLoc> AttrOrTypeDef::getLoc() const { return def->getLoc(); }
+ArrayRef<SMLoc> AttrOrTypeDef::getLoc() const { return def->getLoc(); }
 
 bool AttrOrTypeDef::skipDefaultBuilders() const {
   return def->getValueAsBit("skipDefaultBuilders");
diff --git a/mlir/lib/TableGen/Builder.cpp b/mlir/lib/TableGen/Builder.cpp
index 8210e8fe1a126..d25d09d7569f2 100644
--- a/mlir/lib/TableGen/Builder.cpp
+++ b/mlir/lib/TableGen/Builder.cpp
@@ -39,7 +39,7 @@ Optional<StringRef> Builder::Parameter::getDefaultValue() const {
 // Builder
 //===----------------------------------------------------------------------===//
 
-Builder::Builder(const llvm::Record *record, ArrayRef<llvm::SMLoc> loc)
+Builder::Builder(const llvm::Record *record, ArrayRef<SMLoc> loc)
     : def(record) {
   // Initialize the parameters of the builder.
   const llvm::DagInit *dag = def->getValueAsDag("dagParams");
diff --git a/mlir/lib/TableGen/Operator.cpp b/mlir/lib/TableGen/Operator.cpp
index 6da1b7c1fe518..a77d17c56df71 100644
--- a/mlir/lib/TableGen/Operator.cpp
+++ b/mlir/lib/TableGen/Operator.cpp
@@ -612,7 +612,7 @@ auto Operator::getSameTypeAsResult(int index) const -> ArrayRef<ArgOrType> {
   return resultTypeMapping[index];
 }
 
-ArrayRef<llvm::SMLoc> Operator::getLoc() const { return def.getLoc(); }
+ArrayRef<SMLoc> Operator::getLoc() const { return def.getLoc(); }
 
 bool Operator::hasDescription() const {
   return def.getValue("description") != nullptr;
diff --git a/mlir/lib/TableGen/Predicate.cpp b/mlir/lib/TableGen/Predicate.cpp
index 3492c2f89077f..62f3fe1435336 100644
--- a/mlir/lib/TableGen/Predicate.cpp
+++ b/mlir/lib/TableGen/Predicate.cpp
@@ -47,7 +47,7 @@ bool Pred::isCombined() const {
   return def && def->isSubClassOf("CombinedPred");
 }
 
-ArrayRef<llvm::SMLoc> Pred::getLoc() const { return def->getLoc(); }
+ArrayRef<SMLoc> Pred::getLoc() const { return def->getLoc(); }
 
 CPred::CPred(const llvm::Record *record) : Pred(record) {
   assert(def->isSubClassOf("CPred") &&
diff --git a/mlir/lib/Tools/PDLL/AST/Nodes.cpp b/mlir/lib/Tools/PDLL/AST/Nodes.cpp
index 07d8bb2ea97c5..76f42f5329fd4 100644
--- a/mlir/lib/Tools/PDLL/AST/Nodes.cpp
+++ b/mlir/lib/Tools/PDLL/AST/Nodes.cpp
@@ -28,7 +28,7 @@ static StringRef copyStringWithNull(Context &ctx, StringRef str) {
 // Name
 //===----------------------------------------------------------------------===//
 
-const Name &Name::create(Context &ctx, StringRef name, llvm::SMRange location) {
+const Name &Name::create(Context &ctx, StringRef name, SMRange location) {
   return *new (ctx.getAllocator().Allocate<Name>())
       Name(copyStringWithNull(ctx, name), location);
 }
@@ -54,7 +54,7 @@ Decl *DeclScope::lookup(StringRef name) {
 // CompoundStmt
 //===----------------------------------------------------------------------===//
 
-CompoundStmt *CompoundStmt::create(Context &ctx, llvm::SMRange loc,
+CompoundStmt *CompoundStmt::create(Context &ctx, SMRange loc,
                                    ArrayRef<Stmt *> children) {
   unsigned allocSize = CompoundStmt::totalSizeToAlloc<Stmt *>(children.size());
   void *rawData = ctx.getAllocator().Allocate(allocSize, alignof(CompoundStmt));
@@ -69,7 +69,7 @@ CompoundStmt *CompoundStmt::create(Context &ctx, llvm::SMRange loc,
 // LetStmt
 //===----------------------------------------------------------------------===//
 
-LetStmt *LetStmt::create(Context &ctx, llvm::SMRange loc,
+LetStmt *LetStmt::create(Context &ctx, SMRange loc,
                          VariableDecl *varDecl) {
   return new (ctx.getAllocator().Allocate<LetStmt>()) LetStmt(loc, varDecl);
 }
@@ -81,14 +81,14 @@ LetStmt *LetStmt::create(Context &ctx, llvm::SMRange loc,
 //===----------------------------------------------------------------------===//
 // EraseStmt
 
-EraseStmt *EraseStmt::create(Context &ctx, llvm::SMRange loc, Expr *rootOp) {
+EraseStmt *EraseStmt::create(Context &ctx, SMRange loc, Expr *rootOp) {
   return new (ctx.getAllocator().Allocate<EraseStmt>()) EraseStmt(loc, rootOp);
 }
 
 //===----------------------------------------------------------------------===//
 // ReplaceStmt
 
-ReplaceStmt *ReplaceStmt::create(Context &ctx, llvm::SMRange loc, Expr *rootOp,
+ReplaceStmt *ReplaceStmt::create(Context &ctx, SMRange loc, Expr *rootOp,
                                  ArrayRef<Expr *> replExprs) {
   unsigned allocSize = ReplaceStmt::totalSizeToAlloc<Expr *>(replExprs.size());
   void *rawData = ctx.getAllocator().Allocate(allocSize, alignof(ReplaceStmt));
@@ -102,7 +102,7 @@ ReplaceStmt *ReplaceStmt::create(Context &ctx, llvm::SMRange loc, Expr *rootOp,
 //===----------------------------------------------------------------------===//
 // RewriteStmt
 
-RewriteStmt *RewriteStmt::create(Context &ctx, llvm::SMRange loc, Expr *rootOp,
+RewriteStmt *RewriteStmt::create(Context &ctx, SMRange loc, Expr *rootOp,
                                  CompoundStmt *rewriteBody) {
   return new (ctx.getAllocator().Allocate<RewriteStmt>())
       RewriteStmt(loc, rootOp, rewriteBody);
@@ -112,7 +112,7 @@ RewriteStmt *RewriteStmt::create(Context &ctx, llvm::SMRange loc, Expr *rootOp,
 // AttributeExpr
 //===----------------------------------------------------------------------===//
 
-AttributeExpr *AttributeExpr::create(Context &ctx, llvm::SMRange loc,
+AttributeExpr *AttributeExpr::create(Context &ctx, SMRange loc,
                                      StringRef value) {
   return new (ctx.getAllocator().Allocate<AttributeExpr>())
       AttributeExpr(ctx, loc, copyStringWithNull(ctx, value));
@@ -122,7 +122,7 @@ AttributeExpr *AttributeExpr::create(Context &ctx, llvm::SMRange loc,
 // DeclRefExpr
 //===----------------------------------------------------------------------===//
 
-DeclRefExpr *DeclRefExpr::create(Context &ctx, llvm::SMRange loc, Decl *decl,
+DeclRefExpr *DeclRefExpr::create(Context &ctx, SMRange loc, Decl *decl,
                                  Type type) {
   return new (ctx.getAllocator().Allocate<DeclRefExpr>())
       DeclRefExpr(loc, decl, type);
@@ -132,7 +132,7 @@ DeclRefExpr *DeclRefExpr::create(Context &ctx, llvm::SMRange loc, Decl *decl,
 // MemberAccessExpr
 //===----------------------------------------------------------------------===//
 
-MemberAccessExpr *MemberAccessExpr::create(Context &ctx, llvm::SMRange loc,
+MemberAccessExpr *MemberAccessExpr::create(Context &ctx, SMRange loc,
                                            const Expr *parentExpr,
                                            StringRef memberName, Type type) {
   return new (ctx.getAllocator().Allocate<MemberAccessExpr>()) MemberAccessExpr(
@@ -144,7 +144,7 @@ MemberAccessExpr *MemberAccessExpr::create(Context &ctx, llvm::SMRange loc,
 //===----------------------------------------------------------------------===//
 
 OperationExpr *OperationExpr::create(
-    Context &ctx, llvm::SMRange loc, const OpNameDecl *name,
+    Context &ctx, SMRange loc, const OpNameDecl *name,
     ArrayRef<Expr *> operands, ArrayRef<Expr *> resultTypes,
     ArrayRef<NamedAttributeDecl *> attributes) {
   unsigned allocSize =
@@ -174,7 +174,7 @@ Optional<StringRef> OperationExpr::getName() const {
 // TupleExpr
 //===----------------------------------------------------------------------===//
 
-TupleExpr *TupleExpr::create(Context &ctx, llvm::SMRange loc,
+TupleExpr *TupleExpr::create(Context &ctx, SMRange loc,
                              ArrayRef<Expr *> elements,
                              ArrayRef<StringRef> names) {
   unsigned allocSize = TupleExpr::totalSizeToAlloc<Expr *>(elements.size());
@@ -194,7 +194,7 @@ TupleExpr *TupleExpr::create(Context &ctx, llvm::SMRange loc,
 // TypeExpr
 //===----------------------------------------------------------------------===//
 
-TypeExpr *TypeExpr::create(Context &ctx, llvm::SMRange loc, StringRef value) {
+TypeExpr *TypeExpr::create(Context &ctx, SMRange loc, StringRef value) {
   return new (ctx.getAllocator().Allocate<TypeExpr>())
       TypeExpr(ctx, loc, copyStringWithNull(ctx, value));
 }
@@ -203,7 +203,7 @@ TypeExpr *TypeExpr::create(Context &ctx, llvm::SMRange loc, StringRef value) {
 // AttrConstraintDecl
 //===----------------------------------------------------------------------===//
 
-AttrConstraintDecl *AttrConstraintDecl::create(Context &ctx, llvm::SMRange loc,
+AttrConstraintDecl *AttrConstraintDecl::create(Context &ctx, SMRange loc,
                                                Expr *typeExpr) {
   return new (ctx.getAllocator().Allocate<AttrConstraintDecl>())
       AttrConstraintDecl(loc, typeExpr);
@@ -213,10 +213,10 @@ AttrConstraintDecl *AttrConstraintDecl::create(Context &ctx, llvm::SMRange loc,
 // OpConstraintDecl
 //===----------------------------------------------------------------------===//
 
-OpConstraintDecl *OpConstraintDecl::create(Context &ctx, llvm::SMRange loc,
+OpConstraintDecl *OpConstraintDecl::create(Context &ctx, SMRange loc,
                                            const OpNameDecl *nameDecl) {
   if (!nameDecl)
-    nameDecl = OpNameDecl::create(ctx, llvm::SMRange());
+    nameDecl = OpNameDecl::create(ctx, SMRange());
 
   return new (ctx.getAllocator().Allocate<OpConstraintDecl>())
       OpConstraintDecl(loc, nameDecl);
@@ -231,7 +231,7 @@ Optional<StringRef> OpConstraintDecl::getName() const {
 //===----------------------------------------------------------------------===//
 
 TypeConstraintDecl *TypeConstraintDecl::create(Context &ctx,
-                                               llvm::SMRange loc) {
+                                               SMRange loc) {
   return new (ctx.getAllocator().Allocate<TypeConstraintDecl>())
       TypeConstraintDecl(loc);
 }
@@ -241,7 +241,7 @@ TypeConstraintDecl *TypeConstraintDecl::create(Context &ctx,
 //===----------------------------------------------------------------------===//
 
 TypeRangeConstraintDecl *TypeRangeConstraintDecl::create(Context &ctx,
-                                                         llvm::SMRange loc) {
+                                                         SMRange loc) {
   return new (ctx.getAllocator().Allocate<TypeRangeConstraintDecl>())
       TypeRangeConstraintDecl(loc);
 }
@@ -251,7 +251,7 @@ TypeRangeConstraintDecl *TypeRangeConstraintDecl::create(Context &ctx,
 //===----------------------------------------------------------------------===//
 
 ValueConstraintDecl *
-ValueConstraintDecl::create(Context &ctx, llvm::SMRange loc, Expr *typeExpr) {
+ValueConstraintDecl::create(Context &ctx, SMRange loc, Expr *typeExpr) {
   return new (ctx.getAllocator().Allocate<ValueConstraintDecl>())
       ValueConstraintDecl(loc, typeExpr);
 }
@@ -261,7 +261,7 @@ ValueConstraintDecl::create(Context &ctx, llvm::SMRange loc, Expr *typeExpr) {
 //===----------------------------------------------------------------------===//
 
 ValueRangeConstraintDecl *ValueRangeConstraintDecl::create(Context &ctx,
-                                                           llvm::SMRange loc,
+                                                           SMRange loc,
                                                            Expr *typeExpr) {
   return new (ctx.getAllocator().Allocate<ValueRangeConstraintDecl>())
       ValueRangeConstraintDecl(loc, typeExpr);
@@ -284,7 +284,7 @@ NamedAttributeDecl *NamedAttributeDecl::create(Context &ctx, const Name &name,
 OpNameDecl *OpNameDecl::create(Context &ctx, const Name &name) {
   return new (ctx.getAllocator().Allocate<OpNameDecl>()) OpNameDecl(name);
 }
-OpNameDecl *OpNameDecl::create(Context &ctx, llvm::SMRange loc) {
+OpNameDecl *OpNameDecl::create(Context &ctx, SMRange loc) {
   return new (ctx.getAllocator().Allocate<OpNameDecl>()) OpNameDecl(loc);
 }
 
@@ -292,7 +292,7 @@ OpNameDecl *OpNameDecl::create(Context &ctx, llvm::SMRange loc) {
 // PatternDecl
 //===----------------------------------------------------------------------===//
 
-PatternDecl *PatternDecl::create(Context &ctx, llvm::SMRange loc,
+PatternDecl *PatternDecl::create(Context &ctx, SMRange loc,
                                  const Name *name, Optional<uint16_t> benefit,
                                  bool hasBoundedRecursion,
                                  const CompoundStmt *body) {
@@ -322,7 +322,7 @@ VariableDecl *VariableDecl::create(Context &ctx, const Name &name, Type type,
 // Module
 //===----------------------------------------------------------------------===//
 
-Module *Module::create(Context &ctx, llvm::SMLoc loc,
+Module *Module::create(Context &ctx, SMLoc loc,
                        ArrayRef<Decl *> children) {
   unsigned allocSize = Module::totalSizeToAlloc<Decl *>(children.size());
   void *rawData = ctx.getAllocator().Allocate(allocSize, alignof(Module));
diff --git a/mlir/lib/Tools/PDLL/Parser/Lexer.cpp b/mlir/lib/Tools/PDLL/Parser/Lexer.cpp
index e45781475f796..3db5cfbdfd645 100644
--- a/mlir/lib/Tools/PDLL/Parser/Lexer.cpp
+++ b/mlir/lib/Tools/PDLL/Parser/Lexer.cpp
@@ -94,7 +94,7 @@ Lexer::~Lexer() {
 LogicalResult Lexer::pushInclude(StringRef filename) {
   std::string includedFile;
   int bufferID = srcMgr.AddIncludeFile(
-      filename.str(), llvm::SMLoc::getFromPointer(curPtr), includedFile);
+      filename.str(), SMLoc::getFromPointer(curPtr), includedFile);
   if (!bufferID) return failure();
 
   curBufferID = bufferID;
@@ -103,18 +103,18 @@ LogicalResult Lexer::pushInclude(StringRef filename) {
   return success();
 }
 
-Token Lexer::emitError(llvm::SMRange loc, const Twine &msg) {
+Token Lexer::emitError(SMRange loc, const Twine &msg) {
   diagEngine.emitError(loc, msg);
   return formToken(Token::error, loc.Start.getPointer());
 }
-Token Lexer::emitErrorAndNote(llvm::SMRange loc, const Twine &msg,
-                              llvm::SMRange noteLoc, const Twine &note) {
+Token Lexer::emitErrorAndNote(SMRange loc, const Twine &msg,
+                              SMRange noteLoc, const Twine &note) {
   diagEngine.emitError(loc, msg)->attachNote(note, noteLoc);
   return formToken(Token::error, loc.Start.getPointer());
 }
 Token Lexer::emitError(const char *loc, const Twine &msg) {
-  return emitError(llvm::SMRange(llvm::SMLoc::getFromPointer(loc),
-                                 llvm::SMLoc::getFromPointer(loc + 1)),
+  return emitError(SMRange(SMLoc::getFromPointer(loc),
+                                 SMLoc::getFromPointer(loc + 1)),
                    msg);
 }
 
@@ -161,7 +161,7 @@ Token Lexer::lexToken() {
         Token eof = formToken(Token::eof, tokStart);
 
         // Check to see if we are in an included file.
-        llvm::SMLoc parentIncludeLoc = srcMgr.getParentIncludeLoc(curBufferID);
+        SMLoc parentIncludeLoc = srcMgr.getParentIncludeLoc(curBufferID);
         if (parentIncludeLoc.isValid()) {
           curBufferID = srcMgr.FindBufferContainingLoc(parentIncludeLoc);
           curBuffer = srcMgr.getMemoryBuffer(curBufferID)->getBuffer();
diff --git a/mlir/lib/Tools/PDLL/Parser/Lexer.h b/mlir/lib/Tools/PDLL/Parser/Lexer.h
index bf6f2a686fbee..4692f28ba877c 100644
--- a/mlir/lib/Tools/PDLL/Parser/Lexer.h
+++ b/mlir/lib/Tools/PDLL/Parser/Lexer.h
@@ -133,16 +133,16 @@ class Token {
   bool is(Kind k) const { return kind == k; }
 
   /// Return a location for the start of this token.
-  llvm::SMLoc getStartLoc() const {
-    return llvm::SMLoc::getFromPointer(spelling.data());
+  SMLoc getStartLoc() const {
+    return SMLoc::getFromPointer(spelling.data());
   }
   /// Return a location at the end of this token.
-  llvm::SMLoc getEndLoc() const {
-    return llvm::SMLoc::getFromPointer(spelling.data() + spelling.size());
+  SMLoc getEndLoc() const {
+    return SMLoc::getFromPointer(spelling.data() + spelling.size());
   }
   /// Return a location for the range of this token.
-  llvm::SMRange getLoc() const {
-    return llvm::SMRange(getStartLoc(), getEndLoc());
+  SMRange getLoc() const {
+    return SMRange(getStartLoc(), getEndLoc());
   }
 
 private:
@@ -182,10 +182,10 @@ class Lexer {
   void resetPointer(const char *newPointer) { curPtr = newPointer; }
 
   /// Emit an error to the lexer with the given location and message.
-  Token emitError(llvm::SMRange loc, const Twine &msg);
+  Token emitError(SMRange loc, const Twine &msg);
   Token emitError(const char *loc, const Twine &msg);
-  Token emitErrorAndNote(llvm::SMRange loc, const Twine &msg,
-                         llvm::SMRange noteLoc, const Twine &note);
+  Token emitErrorAndNote(SMRange loc, const Twine &msg,
+                         SMRange noteLoc, const Twine &note);
 
 private:
   Token formToken(Token::Kind kind, const char *tokStart) {
diff --git a/mlir/lib/Tools/PDLL/Parser/Parser.cpp b/mlir/lib/Tools/PDLL/Parser/Parser.cpp
index 4f98bfd08580f..a6c8ebbe76705 100644
--- a/mlir/lib/Tools/PDLL/Parser/Parser.cpp
+++ b/mlir/lib/Tools/PDLL/Parser/Parser.cpp
@@ -116,11 +116,11 @@ class Parser {
   /// Try to define a variable decl with the given components, returns the
   /// variable on success.
   FailureOr<ast::VariableDecl *>
-  defineVariableDecl(StringRef name, llvm::SMRange nameLoc, ast::Type type,
+  defineVariableDecl(StringRef name, SMRange nameLoc, ast::Type type,
                      ast::Expr *initExpr,
                      ArrayRef<ast::ConstraintRef> constraints);
   FailureOr<ast::VariableDecl *>
-  defineVariableDecl(StringRef name, llvm::SMRange nameLoc, ast::Type type,
+  defineVariableDecl(StringRef name, SMRange nameLoc, ast::Type type,
                      ArrayRef<ast::ConstraintRef> constraints);
 
   /// Parse the constraint reference list for a variable decl.
@@ -136,7 +136,7 @@ class Parser {
   /// existing constraints that have already been parsed for the same entity
   /// that will be constrained by this constraint.
   FailureOr<ast::ConstraintRef>
-  parseConstraint(Optional<llvm::SMRange> &typeConstraint,
+  parseConstraint(Optional<SMRange> &typeConstraint,
                   ArrayRef<ast::ConstraintRef> existingConstraints);
 
   //===--------------------------------------------------------------------===//
@@ -146,7 +146,7 @@ class Parser {
 
   /// Identifier expressions.
   FailureOr<ast::Expr *> parseAttributeExpr();
-  FailureOr<ast::Expr *> parseDeclRefExpr(StringRef name, llvm::SMRange loc);
+  FailureOr<ast::Expr *> parseDeclRefExpr(StringRef name, SMRange loc);
   FailureOr<ast::Expr *> parseIdentifierExpr();
   FailureOr<ast::Expr *> parseMemberAccessExpr(ast::Expr *parentExpr);
   FailureOr<ast::OpNameDecl *> parseOperationName(bool allowEmptyName = false);
@@ -176,14 +176,14 @@ class Parser {
   /// Try to create a pattern decl with the given components, returning the
   /// Pattern on success.
   FailureOr<ast::PatternDecl *>
-  createPatternDecl(llvm::SMRange loc, const ast::Name *name,
+  createPatternDecl(SMRange loc, const ast::Name *name,
                     const ParsedPatternMetadata &metadata,
                     ast::CompoundStmt *body);
 
   /// Try to create a variable decl with the given components, returning the
   /// Variable on success.
   FailureOr<ast::VariableDecl *>
-  createVariableDecl(StringRef name, llvm::SMRange loc, ast::Expr *initializer,
+  createVariableDecl(StringRef name, SMRange loc, ast::Expr *initializer,
                      ArrayRef<ast::ConstraintRef> constraints);
 
   /// Validate the constraints used to constraint a variable decl.
@@ -206,49 +206,49 @@ class Parser {
   //===--------------------------------------------------------------------===//
   // Exprs
 
-  FailureOr<ast::DeclRefExpr *> createDeclRefExpr(llvm::SMRange loc,
+  FailureOr<ast::DeclRefExpr *> createDeclRefExpr(SMRange loc,
                                                   ast::Decl *decl);
   FailureOr<ast::DeclRefExpr *>
-  createInlineVariableExpr(ast::Type type, StringRef name, llvm::SMRange loc,
+  createInlineVariableExpr(ast::Type type, StringRef name, SMRange loc,
                            ArrayRef<ast::ConstraintRef> constraints);
   FailureOr<ast::MemberAccessExpr *>
   createMemberAccessExpr(ast::Expr *parentExpr, StringRef name,
-                         llvm::SMRange loc);
+                         SMRange loc);
 
   /// Validate the member access `name` into the given parent expression. On
   /// success, this also returns the type of the member accessed.
   FailureOr<ast::Type> validateMemberAccess(ast::Expr *parentExpr,
-                                            StringRef name, llvm::SMRange loc);
+                                            StringRef name, SMRange loc);
   FailureOr<ast::OperationExpr *>
-  createOperationExpr(llvm::SMRange loc, const ast::OpNameDecl *name,
+  createOperationExpr(SMRange loc, const ast::OpNameDecl *name,
                       MutableArrayRef<ast::Expr *> operands,
                       MutableArrayRef<ast::NamedAttributeDecl *> attributes,
                       MutableArrayRef<ast::Expr *> results);
   LogicalResult
-  validateOperationOperands(llvm::SMRange loc, Optional<StringRef> name,
+  validateOperationOperands(SMRange loc, Optional<StringRef> name,
                             MutableArrayRef<ast::Expr *> operands);
-  LogicalResult validateOperationResults(llvm::SMRange loc,
+  LogicalResult validateOperationResults(SMRange loc,
                                          Optional<StringRef> name,
                                          MutableArrayRef<ast::Expr *> results);
   LogicalResult
-  validateOperationOperandsOrResults(llvm::SMRange loc,
+  validateOperationOperandsOrResults(SMRange loc,
                                      Optional<StringRef> name,
                                      MutableArrayRef<ast::Expr *> values,
                                      ast::Type singleTy, ast::Type rangeTy);
-  FailureOr<ast::TupleExpr *> createTupleExpr(llvm::SMRange loc,
+  FailureOr<ast::TupleExpr *> createTupleExpr(SMRange loc,
                                               ArrayRef<ast::Expr *> elements,
                                               ArrayRef<StringRef> elementNames);
 
   //===--------------------------------------------------------------------===//
   // Stmts
 
-  FailureOr<ast::EraseStmt *> createEraseStmt(llvm::SMRange loc,
+  FailureOr<ast::EraseStmt *> createEraseStmt(SMRange loc,
                                               ast::Expr *rootOp);
   FailureOr<ast::ReplaceStmt *>
-  createReplaceStmt(llvm::SMRange loc, ast::Expr *rootOp,
+  createReplaceStmt(SMRange loc, ast::Expr *rootOp,
                     MutableArrayRef<ast::Expr *> replValues);
   FailureOr<ast::RewriteStmt *>
-  createRewriteStmt(llvm::SMRange loc, ast::Expr *rootOp,
+  createRewriteStmt(SMRange loc, ast::Expr *rootOp,
                     ast::CompoundStmt *rewriteBody);
 
   //===--------------------------------------------------------------------===//
@@ -280,7 +280,7 @@ class Parser {
   }
 
   /// Reset the lexer to the location at the given position.
-  void resetToken(llvm::SMRange tokLoc) {
+  void resetToken(SMRange tokLoc) {
     lexer.resetPointer(tokLoc.Start.getPointer());
     curToken = lexer.lexToken();
   }
@@ -293,15 +293,15 @@ class Parser {
     consumeToken();
     return success();
   }
-  LogicalResult emitError(llvm::SMRange loc, const Twine &msg) {
+  LogicalResult emitError(SMRange loc, const Twine &msg) {
     lexer.emitError(loc, msg);
     return failure();
   }
   LogicalResult emitError(const Twine &msg) {
     return emitError(curToken.getLoc(), msg);
   }
-  LogicalResult emitErrorAndNote(llvm::SMRange loc, const Twine &msg,
-                                 llvm::SMRange noteLoc, const Twine &note) {
+  LogicalResult emitErrorAndNote(SMRange loc, const Twine &msg,
+                                 SMRange noteLoc, const Twine &note) {
     lexer.emitErrorAndNote(loc, msg, noteLoc, note);
     return failure();
   }
@@ -333,7 +333,7 @@ class Parser {
 } // namespace
 
 FailureOr<ast::Module *> Parser::parseModule() {
-  llvm::SMLoc moduleLoc = curToken.getStartLoc();
+  SMLoc moduleLoc = curToken.getStartLoc();
   pushDeclScope();
 
   // Parse the top-level decls of the module.
@@ -464,14 +464,14 @@ LogicalResult Parser::parseDirective(SmallVector<ast::Decl *> &decls) {
 }
 
 LogicalResult Parser::parseInclude(SmallVector<ast::Decl *> &decls) {
-  llvm::SMRange loc = curToken.getLoc();
+  SMRange loc = curToken.getLoc();
   consumeToken(Token::directive);
 
   // Parse the file being included.
   if (!curToken.isString())
     return emitError(loc,
                      "expected string file name after `include` directive");
-  llvm::SMRange fileLoc = curToken.getLoc();
+  SMRange fileLoc = curToken.getLoc();
   std::string filenameStr = curToken.getStringValue();
   StringRef filename = filenameStr;
   consumeToken();
@@ -548,7 +548,7 @@ FailureOr<ast::NamedAttributeDecl *> Parser::parseNamedAttributeDecl() {
 }
 
 FailureOr<ast::Decl *> Parser::parsePatternDecl() {
-  llvm::SMRange loc = curToken.getLoc();
+  SMRange loc = curToken.getLoc();
   consumeToken(Token::kw_Pattern);
   llvm::SaveAndRestore<ParserContext> saveCtx(parserContext,
                                               ParserContext::PatternMatch);
@@ -598,14 +598,14 @@ FailureOr<ast::Decl *> Parser::parsePatternDecl() {
 
 LogicalResult
 Parser::parsePatternDeclMetadata(ParsedPatternMetadata &metadata) {
-  Optional<llvm::SMRange> benefitLoc;
-  Optional<llvm::SMRange> hasBoundedRecursionLoc;
+  Optional<SMRange> benefitLoc;
+  Optional<SMRange> hasBoundedRecursionLoc;
 
   do {
     if (curToken.isNot(Token::identifier))
       return emitError("expected pattern metadata identifier");
     StringRef metadataStr = curToken.getSpelling();
-    llvm::SMRange metadataLoc = curToken.getLoc();
+    SMRange metadataLoc = curToken.getLoc();
     consumeToken(Token::identifier);
 
     // Parse the benefit metadata: benefit(<integer-value>)
@@ -677,7 +677,7 @@ LogicalResult Parser::checkDefineNamedDecl(const ast::Name &name) {
 }
 
 FailureOr<ast::VariableDecl *>
-Parser::defineVariableDecl(StringRef name, llvm::SMRange nameLoc,
+Parser::defineVariableDecl(StringRef name, SMRange nameLoc,
                            ast::Type type, ast::Expr *initExpr,
                            ArrayRef<ast::ConstraintRef> constraints) {
   assert(curDeclScope && "defining variable outside of decl scope");
@@ -699,7 +699,7 @@ Parser::defineVariableDecl(StringRef name, llvm::SMRange nameLoc,
 }
 
 FailureOr<ast::VariableDecl *>
-Parser::defineVariableDecl(StringRef name, llvm::SMRange nameLoc,
+Parser::defineVariableDecl(StringRef name, SMRange nameLoc,
                            ast::Type type,
                            ArrayRef<ast::ConstraintRef> constraints) {
   return defineVariableDecl(name, nameLoc, type, /*initExpr=*/nullptr,
@@ -708,7 +708,7 @@ Parser::defineVariableDecl(StringRef name, llvm::SMRange nameLoc,
 
 LogicalResult Parser::parseVariableDeclConstraintList(
     SmallVectorImpl<ast::ConstraintRef> &constraints) {
-  Optional<llvm::SMRange> typeConstraint;
+  Optional<SMRange> typeConstraint;
   auto parseSingleConstraint = [&] {
     FailureOr<ast::ConstraintRef> constraint =
         parseConstraint(typeConstraint, constraints);
@@ -730,7 +730,7 @@ LogicalResult Parser::parseVariableDeclConstraintList(
 }
 
 FailureOr<ast::ConstraintRef>
-Parser::parseConstraint(Optional<llvm::SMRange> &typeConstraint,
+Parser::parseConstraint(Optional<SMRange> &typeConstraint,
                         ArrayRef<ast::ConstraintRef> existingConstraints) {
   auto parseTypeConstraint = [&](ast::Expr *&typeExpr) -> LogicalResult {
     if (typeConstraint)
@@ -746,7 +746,7 @@ Parser::parseConstraint(Optional<llvm::SMRange> &typeConstraint,
     return success();
   };
 
-  llvm::SMRange loc = curToken.getLoc();
+  SMRange loc = curToken.getLoc();
   switch (curToken.getKind()) {
   case Token::kw_Attr: {
     consumeToken(Token::kw_Attr);
@@ -871,7 +871,7 @@ FailureOr<ast::Expr *> Parser::parseExpr() {
 }
 
 FailureOr<ast::Expr *> Parser::parseAttributeExpr() {
-  llvm::SMRange loc = curToken.getLoc();
+  SMRange loc = curToken.getLoc();
   consumeToken(Token::kw_attr);
 
   // If we aren't followed by a `<`, the `attr` keyword is treated as a normal
@@ -893,7 +893,7 @@ FailureOr<ast::Expr *> Parser::parseAttributeExpr() {
 }
 
 FailureOr<ast::Expr *> Parser::parseDeclRefExpr(StringRef name,
-                                                llvm::SMRange loc) {
+                                                SMRange loc) {
   ast::Decl *decl = curDeclScope->lookup(name);
   if (!decl)
     return emitError(loc, "undefined reference to `" + name + "`");
@@ -903,7 +903,7 @@ FailureOr<ast::Expr *> Parser::parseDeclRefExpr(StringRef name,
 
 FailureOr<ast::Expr *> Parser::parseIdentifierExpr() {
   StringRef name = curToken.getSpelling();
-  llvm::SMRange nameLoc = curToken.getLoc();
+  SMRange nameLoc = curToken.getLoc();
   consumeToken();
 
   // Check to see if this is a decl ref expression that defines a variable
@@ -922,7 +922,7 @@ FailureOr<ast::Expr *> Parser::parseIdentifierExpr() {
 }
 
 FailureOr<ast::Expr *> Parser::parseMemberAccessExpr(ast::Expr *parentExpr) {
-  llvm::SMRange loc = curToken.getLoc();
+  SMRange loc = curToken.getLoc();
   consumeToken(Token::dot);
 
   // Parse the member name.
@@ -937,12 +937,12 @@ FailureOr<ast::Expr *> Parser::parseMemberAccessExpr(ast::Expr *parentExpr) {
 }
 
 FailureOr<ast::OpNameDecl *> Parser::parseOperationName(bool allowEmptyName) {
-  llvm::SMRange loc = curToken.getLoc();
+  SMRange loc = curToken.getLoc();
 
   // Handle the case of an no operation name.
   if (curToken.isNot(Token::identifier) && !curToken.isKeyword()) {
     if (allowEmptyName)
-      return ast::OpNameDecl::create(ctx, llvm::SMRange());
+      return ast::OpNameDecl::create(ctx, SMRange());
     return emitError("expected dialect namespace");
   }
   StringRef name = curToken.getSpelling();
@@ -968,7 +968,7 @@ FailureOr<ast::OpNameDecl *> Parser::parseOperationName(bool allowEmptyName) {
 FailureOr<ast::OpNameDecl *>
 Parser::parseWrappedOperationName(bool allowEmptyName) {
   if (!consumeIf(Token::less))
-    return ast::OpNameDecl::create(ctx, llvm::SMRange());
+    return ast::OpNameDecl::create(ctx, SMRange());
 
   FailureOr<ast::OpNameDecl *> opNameDecl = parseOperationName(allowEmptyName);
   if (failed(opNameDecl))
@@ -980,7 +980,7 @@ Parser::parseWrappedOperationName(bool allowEmptyName) {
 }
 
 FailureOr<ast::Expr *> Parser::parseOperationExpr() {
-  llvm::SMRange loc = curToken.getLoc();
+  SMRange loc = curToken.getLoc();
   consumeToken(Token::kw_op);
 
   // If it isn't followed by a `<`, the `op` keyword is treated as a normal
@@ -1053,10 +1053,10 @@ FailureOr<ast::Expr *> Parser::parseOperationExpr() {
 }
 
 FailureOr<ast::Expr *> Parser::parseTupleExpr() {
-  llvm::SMRange loc = curToken.getLoc();
+  SMRange loc = curToken.getLoc();
   consumeToken(Token::l_paren);
 
-  DenseMap<StringRef, llvm::SMRange> usedNames;
+  DenseMap<StringRef, SMRange> usedNames;
   SmallVector<StringRef> elementNames;
   SmallVector<ast::Expr *> elements;
   if (curToken.isNot(Token::r_paren)) {
@@ -1105,7 +1105,7 @@ FailureOr<ast::Expr *> Parser::parseTupleExpr() {
 }
 
 FailureOr<ast::Expr *> Parser::parseTypeExpr() {
-  llvm::SMRange loc = curToken.getLoc();
+  SMRange loc = curToken.getLoc();
   consumeToken(Token::kw_type);
 
   // If we aren't followed by a `<`, the `type` keyword is treated as a normal
@@ -1127,7 +1127,7 @@ FailureOr<ast::Expr *> Parser::parseTypeExpr() {
 
 FailureOr<ast::Expr *> Parser::parseUnderscoreExpr() {
   StringRef name = curToken.getSpelling();
-  llvm::SMRange nameLoc = curToken.getLoc();
+  SMRange nameLoc = curToken.getLoc();
   consumeToken(Token::underscore);
 
   // Underscore expressions require a constraint list.
@@ -1175,7 +1175,7 @@ FailureOr<ast::Stmt *> Parser::parseStmt(bool expectTerminalSemicolon) {
 }
 
 FailureOr<ast::CompoundStmt *> Parser::parseCompoundStmt() {
-  llvm::SMLoc startLoc = curToken.getStartLoc();
+  SMLoc startLoc = curToken.getStartLoc();
   consumeToken(Token::l_brace);
 
   // Push a new block scope and parse any nested statements.
@@ -1190,14 +1190,14 @@ FailureOr<ast::CompoundStmt *> Parser::parseCompoundStmt() {
   popDeclScope();
 
   // Consume the end brace.
-  llvm::SMRange location(startLoc, curToken.getEndLoc());
+  SMRange location(startLoc, curToken.getEndLoc());
   consumeToken(Token::r_brace);
 
   return ast::CompoundStmt::create(ctx, location, statements);
 }
 
 FailureOr<ast::EraseStmt *> Parser::parseEraseStmt() {
-  llvm::SMRange loc = curToken.getLoc();
+  SMRange loc = curToken.getLoc();
   consumeToken(Token::kw_erase);
 
   // Parse the root operation expression.
@@ -1209,11 +1209,11 @@ FailureOr<ast::EraseStmt *> Parser::parseEraseStmt() {
 }
 
 FailureOr<ast::LetStmt *> Parser::parseLetStmt() {
-  llvm::SMRange loc = curToken.getLoc();
+  SMRange loc = curToken.getLoc();
   consumeToken(Token::kw_let);
 
   // Parse the name of the new variable.
-  llvm::SMRange varLoc = curToken.getLoc();
+  SMRange varLoc = curToken.getLoc();
   if (curToken.isNot(Token::identifier) && !curToken.isDependentKeyword()) {
     // `_` is a reserved variable name.
     if (curToken.is(Token::underscore)) {
@@ -1269,7 +1269,7 @@ FailureOr<ast::LetStmt *> Parser::parseLetStmt() {
 }
 
 FailureOr<ast::ReplaceStmt *> Parser::parseReplaceStmt() {
-  llvm::SMRange loc = curToken.getLoc();
+  SMRange loc = curToken.getLoc();
   consumeToken(Token::kw_replace);
 
   // Parse the root operation expression.
@@ -1315,7 +1315,7 @@ FailureOr<ast::ReplaceStmt *> Parser::parseReplaceStmt() {
 }
 
 FailureOr<ast::RewriteStmt *> Parser::parseRewriteStmt() {
-  llvm::SMRange loc = curToken.getLoc();
+  SMRange loc = curToken.getLoc();
   consumeToken(Token::kw_rewrite);
 
   // Parse the root operation.
@@ -1348,7 +1348,7 @@ FailureOr<ast::RewriteStmt *> Parser::parseRewriteStmt() {
 // Decls
 
 FailureOr<ast::PatternDecl *>
-Parser::createPatternDecl(llvm::SMRange loc, const ast::Name *name,
+Parser::createPatternDecl(SMRange loc, const ast::Name *name,
                           const ParsedPatternMetadata &metadata,
                           ast::CompoundStmt *body) {
   return ast::PatternDecl::create(ctx, loc, name, metadata.benefit,
@@ -1356,7 +1356,7 @@ Parser::createPatternDecl(llvm::SMRange loc, const ast::Name *name,
 }
 
 FailureOr<ast::VariableDecl *>
-Parser::createVariableDecl(StringRef name, llvm::SMRange loc,
+Parser::createVariableDecl(StringRef name, SMRange loc,
                            ast::Expr *initializer,
                            ArrayRef<ast::ConstraintRef> constraints) {
   // The type of the variable, which is expected to be inferred by either a
@@ -1473,7 +1473,7 @@ Parser::validateTypeRangeConstraintExpr(const ast::Expr *typeExpr) {
 //===----------------------------------------------------------------------===//
 // Exprs
 
-FailureOr<ast::DeclRefExpr *> Parser::createDeclRefExpr(llvm::SMRange loc,
+FailureOr<ast::DeclRefExpr *> Parser::createDeclRefExpr(SMRange loc,
                                                         ast::Decl *decl) {
   // Check the type of decl being referenced.
   ast::Type declType;
@@ -1488,7 +1488,7 @@ FailureOr<ast::DeclRefExpr *> Parser::createDeclRefExpr(llvm::SMRange loc,
 
 FailureOr<ast::DeclRefExpr *>
 Parser::createInlineVariableExpr(ast::Type type, StringRef name,
-                                 llvm::SMRange loc,
+                                 SMRange loc,
                                  ArrayRef<ast::ConstraintRef> constraints) {
   FailureOr<ast::VariableDecl *> decl =
       defineVariableDecl(name, loc, type, constraints);
@@ -1499,7 +1499,7 @@ Parser::createInlineVariableExpr(ast::Type type, StringRef name,
 
 FailureOr<ast::MemberAccessExpr *>
 Parser::createMemberAccessExpr(ast::Expr *parentExpr, StringRef name,
-                               llvm::SMRange loc) {
+                               SMRange loc) {
   // Validate the member name for the given parent expression.
   FailureOr<ast::Type> memberType = validateMemberAccess(parentExpr, name, loc);
   if (failed(memberType))
@@ -1510,7 +1510,7 @@ Parser::createMemberAccessExpr(ast::Expr *parentExpr, StringRef name,
 
 FailureOr<ast::Type> Parser::validateMemberAccess(ast::Expr *parentExpr,
                                                   StringRef name,
-                                                  llvm::SMRange loc) {
+                                                  SMRange loc) {
   ast::Type parentType = parentExpr->getType();
   if (parentType.isa<ast::OperationType>()) {
     if (name == ast::AllResultsMemberAccessExpr::getMemberName())
@@ -1536,7 +1536,7 @@ FailureOr<ast::Type> Parser::validateMemberAccess(ast::Expr *parentExpr,
 }
 
 FailureOr<ast::OperationExpr *> Parser::createOperationExpr(
-    llvm::SMRange loc, const ast::OpNameDecl *name,
+    SMRange loc, const ast::OpNameDecl *name,
     MutableArrayRef<ast::Expr *> operands,
     MutableArrayRef<ast::NamedAttributeDecl *> attributes,
     MutableArrayRef<ast::Expr *> results) {
@@ -1566,21 +1566,21 @@ FailureOr<ast::OperationExpr *> Parser::createOperationExpr(
 }
 
 LogicalResult
-Parser::validateOperationOperands(llvm::SMRange loc, Optional<StringRef> name,
+Parser::validateOperationOperands(SMRange loc, Optional<StringRef> name,
                                   MutableArrayRef<ast::Expr *> operands) {
   return validateOperationOperandsOrResults(loc, name, operands, valueTy,
                                             valueRangeTy);
 }
 
 LogicalResult
-Parser::validateOperationResults(llvm::SMRange loc, Optional<StringRef> name,
+Parser::validateOperationResults(SMRange loc, Optional<StringRef> name,
                                  MutableArrayRef<ast::Expr *> results) {
   return validateOperationOperandsOrResults(loc, name, results, typeTy,
                                             typeRangeTy);
 }
 
 LogicalResult Parser::validateOperationOperandsOrResults(
-    llvm::SMRange loc, Optional<StringRef> name,
+    SMRange loc, Optional<StringRef> name,
     MutableArrayRef<ast::Expr *> values, ast::Type singleTy,
     ast::Type rangeTy) {
   // All operation types accept a single range parameter.
@@ -1619,7 +1619,7 @@ LogicalResult Parser::validateOperationOperandsOrResults(
 }
 
 FailureOr<ast::TupleExpr *>
-Parser::createTupleExpr(llvm::SMRange loc, ArrayRef<ast::Expr *> elements,
+Parser::createTupleExpr(SMRange loc, ArrayRef<ast::Expr *> elements,
                         ArrayRef<StringRef> elementNames) {
   for (const ast::Expr *element : elements) {
     ast::Type eleTy = element->getType();
@@ -1635,7 +1635,7 @@ Parser::createTupleExpr(llvm::SMRange loc, ArrayRef<ast::Expr *> elements,
 //===----------------------------------------------------------------------===//
 // Stmts
 
-FailureOr<ast::EraseStmt *> Parser::createEraseStmt(llvm::SMRange loc,
+FailureOr<ast::EraseStmt *> Parser::createEraseStmt(SMRange loc,
                                                     ast::Expr *rootOp) {
   // Check that root is an Operation.
   ast::Type rootType = rootOp->getType();
@@ -1646,7 +1646,7 @@ FailureOr<ast::EraseStmt *> Parser::createEraseStmt(llvm::SMRange loc,
 }
 
 FailureOr<ast::ReplaceStmt *>
-Parser::createReplaceStmt(llvm::SMRange loc, ast::Expr *rootOp,
+Parser::createReplaceStmt(SMRange loc, ast::Expr *rootOp,
                           MutableArrayRef<ast::Expr *> replValues) {
   // Check that root is an Operation.
   ast::Type rootType = rootOp->getType();
@@ -1681,7 +1681,7 @@ Parser::createReplaceStmt(llvm::SMRange loc, ast::Expr *rootOp,
 }
 
 FailureOr<ast::RewriteStmt *>
-Parser::createRewriteStmt(llvm::SMRange loc, ast::Expr *rootOp,
+Parser::createRewriteStmt(SMRange loc, ast::Expr *rootOp,
                           ast::CompoundStmt *rewriteBody) {
   // Check that root is an Operation.
   ast::Type rootType = rootOp->getType();
diff --git a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
index cc41eb2831faf..2488ede756ef9 100644
--- a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
+++ b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp
@@ -17,7 +17,7 @@
 using namespace mlir;
 
 /// Returns a language server position for the given source location.
-static lsp::Position getPosFromLoc(llvm::SourceMgr &mgr, llvm::SMLoc loc) {
+static lsp::Position getPosFromLoc(llvm::SourceMgr &mgr, SMLoc loc) {
   std::pair<unsigned, unsigned> lineAndCol = mgr.getLineAndColumn(loc);
   lsp::Position pos;
   pos.line = lineAndCol.first - 1;
@@ -26,19 +26,19 @@ static lsp::Position getPosFromLoc(llvm::SourceMgr &mgr, llvm::SMLoc loc) {
 }
 
 /// Returns a source location from the given language server position.
-static llvm::SMLoc getPosFromLoc(llvm::SourceMgr &mgr, lsp::Position pos) {
+static SMLoc getPosFromLoc(llvm::SourceMgr &mgr, lsp::Position pos) {
   return mgr.FindLocForLineAndColumn(mgr.getMainFileID(), pos.line + 1,
                                      pos.character);
 }
 
 /// Returns a language server range for the given source range.
-static lsp::Range getRangeFromLoc(llvm::SourceMgr &mgr, llvm::SMRange range) {
+static lsp::Range getRangeFromLoc(llvm::SourceMgr &mgr, SMRange range) {
   return {getPosFromLoc(mgr, range.Start), getPosFromLoc(mgr, range.End)};
 }
 
 /// Returns a language server location from the given source range.
 static lsp::Location getLocationFromLoc(llvm::SourceMgr &mgr,
-                                        llvm::SMRange range,
+                                        SMRange range,
                                         const lsp::URIForFile &uri) {
   return lsp::Location{uri, getRangeFromLoc(mgr, range)};
 }
@@ -75,13 +75,13 @@ getLocationFromLoc(llvm::SourceMgr &sourceMgr, Location loc,
     Optional<lsp::Location> sourceLoc = getLocationFromLoc(fileLoc);
     if (sourceLoc && (!uri || sourceLoc->uri == *uri)) {
       location = *sourceLoc;
-      llvm::SMLoc loc = sourceMgr.FindLocForLineAndColumn(
+      SMLoc loc = sourceMgr.FindLocForLineAndColumn(
           sourceMgr.getMainFileID(), fileLoc.getLine(), fileLoc.getColumn());
 
       // Use range of potential identifier starting at location, else length 1
       // range.
       location->range.end.character += 1;
-      if (Optional<llvm::SMRange> range =
+      if (Optional<SMRange> range =
               AsmParserState::convertIdLocToRange(loc)) {
         auto lineCol = sourceMgr.getLineAndColumn(range->End);
         location->range.end.character =
@@ -115,7 +115,7 @@ static void collectLocationsFromLoc(Location loc,
 /// Returns true if the given range contains the given source location. Note
 /// that this has slightly different behavior than SMRange because it is
 /// inclusive of the end location.
-static bool contains(llvm::SMRange range, llvm::SMLoc loc) {
+static bool contains(SMRange range, SMLoc loc) {
   return range.Start.getPointer() <= loc.getPointer() &&
          loc.getPointer() <= range.End.getPointer();
 }
@@ -123,8 +123,8 @@ static bool contains(llvm::SMRange range, llvm::SMLoc loc) {
 /// Returns true if the given location is contained by the definition or one of
 /// the uses of the given SMDefinition. If provided, `overlappedRange` is set to
 /// the range within `def` that the provided `loc` overlapped with.
-static bool isDefOrUse(const AsmParserState::SMDefinition &def, llvm::SMLoc loc,
-                       llvm::SMRange *overlappedRange = nullptr) {
+static bool isDefOrUse(const AsmParserState::SMDefinition &def, SMLoc loc,
+                       SMRange *overlappedRange = nullptr) {
   // Check the main definition.
   if (contains(def.loc, loc)) {
     if (overlappedRange)
@@ -133,7 +133,7 @@ static bool isDefOrUse(const AsmParserState::SMDefinition &def, llvm::SMLoc loc,
   }
 
   // Check the uses.
-  const auto *useIt = llvm::find_if(def.uses, [&](const llvm::SMRange &range) {
+  const auto *useIt = llvm::find_if(def.uses, [&](const SMRange &range) {
     return contains(range, loc);
   });
   if (useIt != def.uses.end()) {
@@ -146,7 +146,7 @@ static bool isDefOrUse(const AsmParserState::SMDefinition &def, llvm::SMLoc loc,
 
 /// Given a location pointing to a result, return the result number it refers
 /// to or None if it refers to all of the results.
-static Optional<unsigned> getResultNumberFromLoc(llvm::SMLoc loc) {
+static Optional<unsigned> getResultNumberFromLoc(SMLoc loc) {
   // Skip all of the identifier characters.
   auto isIdentifierChar = [](char c) {
     return isalnum(c) || c == '%' || c == '$' || c == '.' || c == '_' ||
@@ -173,7 +173,7 @@ static Optional<unsigned> getResultNumberFromLoc(llvm::SMLoc loc) {
 
 /// Given a source location range, return the text covered by the given range.
 /// If the range is invalid, returns None.
-static Optional<StringRef> getTextFromRange(llvm::SMRange range) {
+static Optional<StringRef> getTextFromRange(SMRange range) {
   if (!range.isValid())
     return None;
   const char *startPtr = range.Start.getPointer();
@@ -188,7 +188,7 @@ static unsigned getBlockNumber(Block *block) {
 /// Given a block and source location, print the source name of the block to the
 /// given output stream.
 static void printDefBlockName(raw_ostream &os, Block *block,
-                              llvm::SMRange loc = {}) {
+                              SMRange loc = {}) {
   // Try to extract a name from the source location.
   Optional<StringRef> text = getTextFromRange(loc);
   if (text && text->startswith("^")) {
@@ -285,16 +285,16 @@ struct MLIRDocument {
   Optional<lsp::Hover> findHover(const lsp::URIForFile &uri,
                                  const lsp::Position &hoverPos);
   Optional<lsp::Hover>
-  buildHoverForOperation(llvm::SMRange hoverRange,
+  buildHoverForOperation(SMRange hoverRange,
                          const AsmParserState::OperationDefinition &op);
-  lsp::Hover buildHoverForOperationResult(llvm::SMRange hoverRange,
+  lsp::Hover buildHoverForOperationResult(SMRange hoverRange,
                                           Operation *op, unsigned resultStart,
                                           unsigned resultEnd,
-                                          llvm::SMLoc posLoc);
-  lsp::Hover buildHoverForBlock(llvm::SMRange hoverRange,
+                                          SMLoc posLoc);
+  lsp::Hover buildHoverForBlock(SMRange hoverRange,
                                 const AsmParserState::BlockDefinition &block);
   lsp::Hover
-  buildHoverForBlockArgument(llvm::SMRange hoverRange, BlockArgument arg,
+  buildHoverForBlockArgument(SMRange hoverRange, BlockArgument arg,
                              const AsmParserState::BlockDefinition &block);
 
   //===--------------------------------------------------------------------===//
@@ -335,7 +335,7 @@ MLIRDocument::MLIRDocument(MLIRContext &context, const lsp::URIForFile &uri,
     return;
   }
 
-  sourceMgr.AddNewSourceBuffer(std::move(memBuffer), llvm::SMLoc());
+  sourceMgr.AddNewSourceBuffer(std::move(memBuffer), SMLoc());
   if (failed(parseSourceFile(sourceMgr, &parsedIR, &context, nullptr,
                              &asmState))) {
     // If parsing failed, clear out any of the current state.
@@ -352,7 +352,7 @@ MLIRDocument::MLIRDocument(MLIRContext &context, const lsp::URIForFile &uri,
 void MLIRDocument::getLocationsOf(const lsp::URIForFile &uri,
                                   const lsp::Position &defPos,
                                   std::vector<lsp::Location> &locations) {
-  llvm::SMLoc posLoc = getPosFromLoc(sourceMgr, defPos);
+  SMLoc posLoc = getPosFromLoc(sourceMgr, defPos);
 
   // Functor used to check if an SM definition contains the position.
   auto containsPosition = [&](const AsmParserState::SMDefinition &def) {
@@ -394,11 +394,11 @@ void MLIRDocument::findReferencesOf(const lsp::URIForFile &uri,
   // definition to the reference list.
   auto appendSMDef = [&](const AsmParserState::SMDefinition &def) {
     references.push_back(getLocationFromLoc(sourceMgr, def.loc, uri));
-    for (const llvm::SMRange &use : def.uses)
+    for (const SMRange &use : def.uses)
       references.push_back(getLocationFromLoc(sourceMgr, use, uri));
   };
 
-  llvm::SMLoc posLoc = getPosFromLoc(sourceMgr, pos);
+  SMLoc posLoc = getPosFromLoc(sourceMgr, pos);
 
   // Check all definitions related to operations.
   for (const AsmParserState::OperationDefinition &op : asmState.getOpDefs()) {
@@ -439,8 +439,8 @@ void MLIRDocument::findReferencesOf(const lsp::URIForFile &uri,
 
 Optional<lsp::Hover> MLIRDocument::findHover(const lsp::URIForFile &uri,
                                              const lsp::Position &hoverPos) {
-  llvm::SMLoc posLoc = getPosFromLoc(sourceMgr, hoverPos);
-  llvm::SMRange hoverRange;
+  SMLoc posLoc = getPosFromLoc(sourceMgr, hoverPos);
+  SMRange hoverRange;
 
   // Check for Hovers on operations and results.
   for (const AsmParserState::OperationDefinition &op : asmState.getOpDefs()) {
@@ -485,7 +485,7 @@ Optional<lsp::Hover> MLIRDocument::findHover(const lsp::URIForFile &uri,
 }
 
 Optional<lsp::Hover> MLIRDocument::buildHoverForOperation(
-    llvm::SMRange hoverRange, const AsmParserState::OperationDefinition &op) {
+    SMRange hoverRange, const AsmParserState::OperationDefinition &op) {
   lsp::Hover hover(getRangeFromLoc(sourceMgr, hoverRange));
   llvm::raw_string_ostream os(hover.contents.value);
 
@@ -517,11 +517,11 @@ Optional<lsp::Hover> MLIRDocument::buildHoverForOperation(
   return hover;
 }
 
-lsp::Hover MLIRDocument::buildHoverForOperationResult(llvm::SMRange hoverRange,
+lsp::Hover MLIRDocument::buildHoverForOperationResult(SMRange hoverRange,
                                                       Operation *op,
                                                       unsigned resultStart,
                                                       unsigned resultEnd,
-                                                      llvm::SMLoc posLoc) {
+                                                      SMLoc posLoc) {
   lsp::Hover hover(getRangeFromLoc(sourceMgr, hoverRange));
   llvm::raw_string_ostream os(hover.contents.value);
 
@@ -553,7 +553,7 @@ lsp::Hover MLIRDocument::buildHoverForOperationResult(llvm::SMRange hoverRange,
 }
 
 lsp::Hover
-MLIRDocument::buildHoverForBlock(llvm::SMRange hoverRange,
+MLIRDocument::buildHoverForBlock(SMRange hoverRange,
                                  const AsmParserState::BlockDefinition &block) {
   lsp::Hover hover(getRangeFromLoc(sourceMgr, hoverRange));
   llvm::raw_string_ostream os(hover.contents.value);
@@ -585,7 +585,7 @@ MLIRDocument::buildHoverForBlock(llvm::SMRange hoverRange,
 }
 
 lsp::Hover MLIRDocument::buildHoverForBlockArgument(
-    llvm::SMRange hoverRange, BlockArgument arg,
+    SMRange hoverRange, BlockArgument arg,
     const AsmParserState::BlockDefinition &block) {
   lsp::Hover hover(getRangeFromLoc(sourceMgr, hoverRange));
   llvm::raw_string_ostream os(hover.contents.value);
diff --git a/mlir/lib/Translation/Translation.cpp b/mlir/lib/Translation/Translation.cpp
index 43349a82c2639..5e989dbc00b52 100644
--- a/mlir/lib/Translation/Translation.cpp
+++ b/mlir/lib/Translation/Translation.cpp
@@ -181,7 +181,7 @@ LogicalResult mlir::mlirTranslateMain(int argc, char **argv,
     MLIRContext context;
     context.printOpOnDiagnostic(!verifyDiagnostics);
     llvm::SourceMgr sourceMgr;
-    sourceMgr.AddNewSourceBuffer(std::move(ownedBuffer), llvm::SMLoc());
+    sourceMgr.AddNewSourceBuffer(std::move(ownedBuffer), SMLoc());
 
     if (!verifyDiagnostics) {
       SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context);
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp
index d82f7af73decb..21db4e0a9d11a 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.cpp
+++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp
@@ -729,7 +729,7 @@ static void print(OpAsmPrinter &p, WrappingRegionOp op) {
 static ParseResult parsePrettyPrintedRegionOp(OpAsmParser &parser,
                                               OperationState &result) {
 
-  llvm::SMLoc loc = parser.getCurrentLocation();
+  SMLoc loc = parser.getCurrentLocation();
   Location currLocation = parser.getEncodedSourceLoc(loc);
 
   // Parse the operands.
diff --git a/mlir/tools/mlir-pdll/mlir-pdll.cpp b/mlir/tools/mlir-pdll/mlir-pdll.cpp
index 9fb61aca42d8b..5600e66eafdb3 100644
--- a/mlir/tools/mlir-pdll/mlir-pdll.cpp
+++ b/mlir/tools/mlir-pdll/mlir-pdll.cpp
@@ -33,7 +33,7 @@ processBuffer(raw_ostream &os, std::unique_ptr<llvm::MemoryBuffer> chunkBuffer,
               OutputType outputType, std::vector<std::string> &includeDirs) {
   llvm::SourceMgr sourceMgr;
   sourceMgr.setIncludeDirs(includeDirs);
-  sourceMgr.AddNewSourceBuffer(std::move(chunkBuffer), llvm::SMLoc());
+  sourceMgr.AddNewSourceBuffer(std::move(chunkBuffer), SMLoc());
 
   ast::Context astContext;
   FailureOr<ast::Module *> module = parsePDLAST(astContext, sourceMgr);
diff --git a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
index f0ac7d217d2c2..1a1f6b9976136 100644
--- a/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/AttrOrTypeFormatGen.cpp
@@ -757,7 +757,7 @@ void mlir::tblgen::generateAttrOrTypeFormat(const AttrOrTypeDef &def,
   llvm::SourceMgr mgr;
   mgr.AddNewSourceBuffer(
       llvm::MemoryBuffer::getMemBuffer(*def.getAssemblyFormat()),
-      llvm::SMLoc());
+      SMLoc());
 
   /// Parse the custom assembly format>
   FormatParser fmtParser(mgr, def);
diff --git a/mlir/tools/mlir-tblgen/FormatGen.cpp b/mlir/tools/mlir-tblgen/FormatGen.cpp
index 16f026ab7ecd9..d656253c7551b 100644
--- a/mlir/tools/mlir-tblgen/FormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/FormatGen.cpp
@@ -18,20 +18,20 @@ using namespace mlir::tblgen;
 // FormatToken
 //===----------------------------------------------------------------------===//
 
-llvm::SMLoc FormatToken::getLoc() const {
-  return llvm::SMLoc::getFromPointer(spelling.data());
+SMLoc FormatToken::getLoc() const {
+  return SMLoc::getFromPointer(spelling.data());
 }
 
 //===----------------------------------------------------------------------===//
 // FormatLexer
 //===----------------------------------------------------------------------===//
 
-FormatLexer::FormatLexer(llvm::SourceMgr &mgr, llvm::SMLoc loc)
+FormatLexer::FormatLexer(llvm::SourceMgr &mgr, SMLoc loc)
     : mgr(mgr), loc(loc),
       curBuffer(mgr.getMemoryBuffer(mgr.getMainFileID())->getBuffer()),
       curPtr(curBuffer.begin()) {}
 
-FormatToken FormatLexer::emitError(llvm::SMLoc loc, const Twine &msg) {
+FormatToken FormatLexer::emitError(SMLoc loc, const Twine &msg) {
   mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg);
   llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note,
                             "in custom assembly format for this operation");
@@ -39,10 +39,10 @@ FormatToken FormatLexer::emitError(llvm::SMLoc loc, const Twine &msg) {
 }
 
 FormatToken FormatLexer::emitError(const char *loc, const Twine &msg) {
-  return emitError(llvm::SMLoc::getFromPointer(loc), msg);
+  return emitError(SMLoc::getFromPointer(loc), msg);
 }
 
-FormatToken FormatLexer::emitErrorAndNote(llvm::SMLoc loc, const Twine &msg,
+FormatToken FormatLexer::emitErrorAndNote(SMLoc loc, const Twine &msg,
                                           const Twine &note) {
   mgr.PrintMessage(loc, llvm::SourceMgr::DK_Error, msg);
   llvm::SrcMgr.PrintMessage(this->loc, llvm::SourceMgr::DK_Note,
diff --git a/mlir/tools/mlir-tblgen/FormatGen.h b/mlir/tools/mlir-tblgen/FormatGen.h
index ae0d12ded2a39..4d290a9ee1f90 100644
--- a/mlir/tools/mlir-tblgen/FormatGen.h
+++ b/mlir/tools/mlir-tblgen/FormatGen.h
@@ -83,7 +83,7 @@ class FormatToken {
   Kind getKind() const { return kind; }
 
   /// Return a location for this token.
-  llvm::SMLoc getLoc() const;
+  SMLoc getLoc() const;
 
   /// Return if this token is a keyword.
   bool isKeyword() const {
@@ -106,16 +106,16 @@ class FormatToken {
 /// This class implements a simple lexer for operation assembly format strings.
 class FormatLexer {
 public:
-  FormatLexer(llvm::SourceMgr &mgr, llvm::SMLoc loc);
+  FormatLexer(llvm::SourceMgr &mgr, SMLoc loc);
 
   /// Lex the next token and return it.
   FormatToken lexToken();
 
   /// Emit an error to the lexer with the given location and message.
-  FormatToken emitError(llvm::SMLoc loc, const Twine &msg);
+  FormatToken emitError(SMLoc loc, const Twine &msg);
   FormatToken emitError(const char *loc, const Twine &msg);
 
-  FormatToken emitErrorAndNote(llvm::SMLoc loc, const Twine &msg,
+  FormatToken emitErrorAndNote(SMLoc loc, const Twine &msg,
                                const Twine &note);
 
 private:
@@ -135,7 +135,7 @@ class FormatLexer {
   /// The source manager containing the format string.
   llvm::SourceMgr &mgr;
   /// Location of the format string.
-  llvm::SMLoc loc;
+  SMLoc loc;
   /// Buffer containing the format string.
   StringRef curBuffer;
   /// Current pointer in the buffer.
diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
index 1f0a37e6330eb..0be172ef3fc81 100644
--- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
@@ -2223,27 +2223,27 @@ class FormatParser {
       std::vector<std::unique_ptr<Element>>::const_iterator>;
 
   /// Verify the state of operation attributes within the format.
-  LogicalResult verifyAttributes(llvm::SMLoc loc);
+  LogicalResult verifyAttributes(SMLoc loc);
   /// Verify the attribute elements at the back of the given stack of iterators.
   LogicalResult verifyAttributes(
-      llvm::SMLoc loc,
+      SMLoc loc,
       SmallVectorImpl<std::pair<ElementsIterT, ElementsIterT>> &iteratorStack);
 
   /// Verify the state of operation operands within the format.
   LogicalResult
-  verifyOperands(llvm::SMLoc loc,
+  verifyOperands(SMLoc loc,
                  llvm::StringMap<TypeResolutionInstance> &variableTyResolver);
 
   /// Verify the state of operation regions within the format.
-  LogicalResult verifyRegions(llvm::SMLoc loc);
+  LogicalResult verifyRegions(SMLoc loc);
 
   /// Verify the state of operation results within the format.
   LogicalResult
-  verifyResults(llvm::SMLoc loc,
+  verifyResults(SMLoc loc,
                 llvm::StringMap<TypeResolutionInstance> &variableTyResolver);
 
   /// Verify the state of operation successors within the format.
-  LogicalResult verifySuccessors(llvm::SMLoc loc);
+  LogicalResult verifySuccessors(SMLoc loc);
 
   /// Given the values of an `AllTypesMatch` trait, check for inferable type
   /// resolution.
@@ -2281,31 +2281,31 @@ class FormatParser {
       std::vector<std::unique_ptr<Element>> &childElements,
       Optional<unsigned> &anchorIdx);
   LogicalResult verifyOptionalChildElement(Element *element,
-                                           llvm::SMLoc childLoc, bool isAnchor);
+                                           SMLoc childLoc, bool isAnchor);
 
   /// Parse the various different directives.
   LogicalResult parseAttrDictDirective(std::unique_ptr<Element> &element,
-                                       llvm::SMLoc loc, ParserContext context,
+                                       SMLoc loc, ParserContext context,
                                        bool withKeyword);
   LogicalResult parseCustomDirective(std::unique_ptr<Element> &element,
-                                     llvm::SMLoc loc, ParserContext context);
+                                     SMLoc loc, ParserContext context);
   LogicalResult parseCustomDirectiveParameter(
       std::vector<std::unique_ptr<Element>> &parameters);
   LogicalResult parseFunctionalTypeDirective(std::unique_ptr<Element> &element,
                                              FormatToken tok,
                                              ParserContext context);
   LogicalResult parseOperandsDirective(std::unique_ptr<Element> &element,
-                                       llvm::SMLoc loc, ParserContext context);
+                                       SMLoc loc, ParserContext context);
   LogicalResult parseQualifiedDirective(std::unique_ptr<Element> &element,
                                         FormatToken tok, ParserContext context);
   LogicalResult parseReferenceDirective(std::unique_ptr<Element> &element,
-                                        llvm::SMLoc loc, ParserContext context);
+                                        SMLoc loc, ParserContext context);
   LogicalResult parseRegionsDirective(std::unique_ptr<Element> &element,
-                                      llvm::SMLoc loc, ParserContext context);
+                                      SMLoc loc, ParserContext context);
   LogicalResult parseResultsDirective(std::unique_ptr<Element> &element,
-                                      llvm::SMLoc loc, ParserContext context);
+                                      SMLoc loc, ParserContext context);
   LogicalResult parseSuccessorsDirective(std::unique_ptr<Element> &element,
-                                         llvm::SMLoc loc,
+                                         SMLoc loc,
                                          ParserContext context);
   LogicalResult parseTypeDirective(std::unique_ptr<Element> &element,
                                    FormatToken tok, ParserContext context);
@@ -2329,11 +2329,11 @@ class FormatParser {
     consumeToken();
     return ::mlir::success();
   }
-  LogicalResult emitError(llvm::SMLoc loc, const Twine &msg) {
+  LogicalResult emitError(SMLoc loc, const Twine &msg) {
     lexer.emitError(loc, msg);
     return ::mlir::failure();
   }
-  LogicalResult emitErrorAndNote(llvm::SMLoc loc, const Twine &msg,
+  LogicalResult emitErrorAndNote(SMLoc loc, const Twine &msg,
                                  const Twine &note) {
     lexer.emitErrorAndNote(loc, msg, note);
     return ::mlir::failure();
@@ -2362,7 +2362,7 @@ class FormatParser {
 } // namespace
 
 LogicalResult FormatParser::parse() {
-  llvm::SMLoc loc = curToken.getLoc();
+  SMLoc loc = curToken.getLoc();
 
   // Parse each of the format elements into the main format.
   while (curToken.getKind() != FormatToken::eof) {
@@ -2415,7 +2415,7 @@ LogicalResult FormatParser::parse() {
   return ::mlir::success();
 }
 
-LogicalResult FormatParser::verifyAttributes(llvm::SMLoc loc) {
+LogicalResult FormatParser::verifyAttributes(SMLoc loc) {
   // Check that there are no `:` literals after an attribute without a constant
   // type. The attribute grammar contains an optional trailing colon type, which
   // can lead to unexpected and generally unintended behavior. Given that, it is
@@ -2441,7 +2441,7 @@ LogicalResult FormatParser::verifyAttributes(llvm::SMLoc loc) {
 }
 /// Verify the attribute elements at the back of the given stack of iterators.
 LogicalResult FormatParser::verifyAttributes(
-    llvm::SMLoc loc,
+    SMLoc loc,
     SmallVectorImpl<std::pair<ElementsIterT, ElementsIterT>> &iteratorStack) {
   auto &stackIt = iteratorStack.back();
   ElementsIterT &it = stackIt.first, e = stackIt.second;
@@ -2497,7 +2497,7 @@ LogicalResult FormatParser::verifyAttributes(
 }
 
 LogicalResult FormatParser::verifyOperands(
-    llvm::SMLoc loc,
+    SMLoc loc,
     llvm::StringMap<TypeResolutionInstance> &variableTyResolver) {
   // Check that all of the operands are within the format, and their types can
   // be inferred.
@@ -2544,7 +2544,7 @@ LogicalResult FormatParser::verifyOperands(
   return ::mlir::success();
 }
 
-LogicalResult FormatParser::verifyRegions(llvm::SMLoc loc) {
+LogicalResult FormatParser::verifyRegions(SMLoc loc) {
   // Check that all of the regions are within the format.
   if (hasAllRegions)
     return ::mlir::success();
@@ -2563,7 +2563,7 @@ LogicalResult FormatParser::verifyRegions(llvm::SMLoc loc) {
 }
 
 LogicalResult FormatParser::verifyResults(
-    llvm::SMLoc loc,
+    SMLoc loc,
     llvm::StringMap<TypeResolutionInstance> &variableTyResolver) {
   // If we format all of the types together, there is nothing to check.
   if (fmt.allResultTypes)
@@ -2611,7 +2611,7 @@ LogicalResult FormatParser::verifyResults(
   return ::mlir::success();
 }
 
-LogicalResult FormatParser::verifySuccessors(llvm::SMLoc loc) {
+LogicalResult FormatParser::verifySuccessors(SMLoc loc) {
   // Check that all of the successors are within the format.
   if (hasAllSuccessors)
     return ::mlir::success();
@@ -2715,7 +2715,7 @@ LogicalResult FormatParser::parseVariable(std::unique_ptr<Element> &element,
   consumeToken();
 
   StringRef name = varTok.getSpelling().drop_front();
-  llvm::SMLoc loc = varTok.getLoc();
+  SMLoc loc = varTok.getLoc();
 
   // Check that the parsed argument is something actually registered on the
   // op.
@@ -2861,7 +2861,7 @@ LogicalResult FormatParser::parseLiteral(std::unique_ptr<Element> &element,
 
 LogicalResult FormatParser::parseOptional(std::unique_ptr<Element> &element,
                                           ParserContext context) {
-  llvm::SMLoc curLoc = curToken.getLoc();
+  SMLoc curLoc = curToken.getLoc();
   if (context != TopLevelContext)
     return emitError(curLoc, "optional groups can only be used as top-level "
                              "elements");
@@ -2884,7 +2884,7 @@ LogicalResult FormatParser::parseOptional(std::unique_ptr<Element> &element,
                           "of optional group")))
       return failure();
     do {
-      llvm::SMLoc childLoc = curToken.getLoc();
+      SMLoc childLoc = curToken.getLoc();
       elseElements.push_back({});
       if (failed(parseElement(elseElements.back(), TopLevelContext)) ||
           failed(verifyOptionalChildElement(elseElements.back().get(), childLoc,
@@ -2924,7 +2924,7 @@ LogicalResult FormatParser::parseOptional(std::unique_ptr<Element> &element,
 LogicalResult FormatParser::parseOptionalChildElement(
     std::vector<std::unique_ptr<Element>> &childElements,
     Optional<unsigned> &anchorIdx) {
-  llvm::SMLoc childLoc = curToken.getLoc();
+  SMLoc childLoc = curToken.getLoc();
   childElements.push_back({});
   if (failed(parseElement(childElements.back(), TopLevelContext)))
     return ::mlir::failure();
@@ -2944,7 +2944,7 @@ LogicalResult FormatParser::parseOptionalChildElement(
 }
 
 LogicalResult FormatParser::verifyOptionalChildElement(Element *element,
-                                                       llvm::SMLoc childLoc,
+                                                       SMLoc childLoc,
                                                        bool isAnchor) {
   return TypeSwitch<Element *, LogicalResult>(element)
       // All attributes can be within the optional group, but only optional
@@ -3004,7 +3004,7 @@ LogicalResult FormatParser::verifyOptionalChildElement(Element *element,
 
 LogicalResult
 FormatParser::parseAttrDictDirective(std::unique_ptr<Element> &element,
-                                     llvm::SMLoc loc, ParserContext context,
+                                     SMLoc loc, ParserContext context,
                                      bool withKeyword) {
   if (context == TypeDirectiveContext)
     return emitError(loc, "'attr-dict' directive can only be used as a "
@@ -3028,8 +3028,8 @@ FormatParser::parseAttrDictDirective(std::unique_ptr<Element> &element,
 
 LogicalResult
 FormatParser::parseCustomDirective(std::unique_ptr<Element> &element,
-                                   llvm::SMLoc loc, ParserContext context) {
-  llvm::SMLoc curLoc = curToken.getLoc();
+                                   SMLoc loc, ParserContext context) {
+  SMLoc curLoc = curToken.getLoc();
   if (context != TopLevelContext)
     return emitError(loc, "'custom' is only valid as a top-level directive");
 
@@ -3079,7 +3079,7 @@ FormatParser::parseCustomDirective(std::unique_ptr<Element> &element,
 
 LogicalResult FormatParser::parseCustomDirectiveParameter(
     std::vector<std::unique_ptr<Element>> &parameters) {
-  llvm::SMLoc childLoc = curToken.getLoc();
+  SMLoc childLoc = curToken.getLoc();
   parameters.push_back({});
   if (failed(parseElement(parameters.back(), CustomDirectiveContext)))
     return ::mlir::failure();
@@ -3096,7 +3096,7 @@ LogicalResult FormatParser::parseCustomDirectiveParameter(
 
 LogicalResult FormatParser::parseFunctionalTypeDirective(
     std::unique_ptr<Element> &element, FormatToken tok, ParserContext context) {
-  llvm::SMLoc loc = tok.getLoc();
+  SMLoc loc = tok.getLoc();
   if (context != TopLevelContext)
     return emitError(
         loc, "'functional-type' is only valid as a top-level directive");
@@ -3119,7 +3119,7 @@ LogicalResult FormatParser::parseFunctionalTypeDirective(
 
 LogicalResult
 FormatParser::parseOperandsDirective(std::unique_ptr<Element> &element,
-                                     llvm::SMLoc loc, ParserContext context) {
+                                     SMLoc loc, ParserContext context) {
   if (context == RefDirectiveContext) {
     if (!fmt.allOperands)
       return emitError(loc, "'ref' of 'operands' is not bound by a prior "
@@ -3136,7 +3136,7 @@ FormatParser::parseOperandsDirective(std::unique_ptr<Element> &element,
 
 LogicalResult
 FormatParser::parseReferenceDirective(std::unique_ptr<Element> &element,
-                                      llvm::SMLoc loc, ParserContext context) {
+                                      SMLoc loc, ParserContext context) {
   if (context != CustomDirectiveContext)
     return emitError(loc, "'ref' is only valid within a `custom` directive");
 
@@ -3154,7 +3154,7 @@ FormatParser::parseReferenceDirective(std::unique_ptr<Element> &element,
 
 LogicalResult
 FormatParser::parseRegionsDirective(std::unique_ptr<Element> &element,
-                                    llvm::SMLoc loc, ParserContext context) {
+                                    SMLoc loc, ParserContext context) {
   if (context == TypeDirectiveContext)
     return emitError(loc, "'regions' is only valid as a top-level directive");
   if (context == RefDirectiveContext) {
@@ -3174,7 +3174,7 @@ FormatParser::parseRegionsDirective(std::unique_ptr<Element> &element,
 
 LogicalResult
 FormatParser::parseResultsDirective(std::unique_ptr<Element> &element,
-                                    llvm::SMLoc loc, ParserContext context) {
+                                    SMLoc loc, ParserContext context) {
   if (context != TypeDirectiveContext)
     return emitError(loc, "'results' directive can can only be used as a child "
                           "to a 'type' directive");
@@ -3184,7 +3184,7 @@ FormatParser::parseResultsDirective(std::unique_ptr<Element> &element,
 
 LogicalResult
 FormatParser::parseSuccessorsDirective(std::unique_ptr<Element> &element,
-                                       llvm::SMLoc loc, ParserContext context) {
+                                       SMLoc loc, ParserContext context) {
   if (context == TypeDirectiveContext)
     return emitError(loc,
                      "'successors' is only valid as a top-level directive");
@@ -3206,7 +3206,7 @@ FormatParser::parseSuccessorsDirective(std::unique_ptr<Element> &element,
 LogicalResult
 FormatParser::parseTypeDirective(std::unique_ptr<Element> &element,
                                  FormatToken tok, ParserContext context) {
-  llvm::SMLoc loc = tok.getLoc();
+  SMLoc loc = tok.getLoc();
   if (context == TypeDirectiveContext)
     return emitError(loc, "'type' cannot be used as a child of another `type`");
 
@@ -3247,7 +3247,7 @@ FormatParser::parseQualifiedDirective(std::unique_ptr<Element> &element,
 LogicalResult
 FormatParser::parseTypeDirectiveOperand(std::unique_ptr<Element> &element,
                                         bool isRefChild) {
-  llvm::SMLoc loc = curToken.getLoc();
+  SMLoc loc = curToken.getLoc();
   if (failed(parseElement(element, TypeDirectiveContext)))
     return ::mlir::failure();
   if (isa<LiteralElement>(element.get()))
@@ -3306,7 +3306,7 @@ void mlir::tblgen::generateOpFormat(const Operator &constOp, OpClass &opClass) {
   // Parse the format description.
   llvm::SourceMgr mgr;
   mgr.AddNewSourceBuffer(
-      llvm::MemoryBuffer::getMemBuffer(op.getAssemblyFormat()), llvm::SMLoc());
+      llvm::MemoryBuffer::getMemBuffer(op.getAssemblyFormat()), SMLoc());
   OperationFormat format(op);
   if (failed(FormatParser(mgr, format, op).parse())) {
     // Exit the process if format errors are treated as fatal.
diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp
index 03892192995ab..4fcc880464c98 100644
--- a/mlir/tools/mlir-tblgen/RewriterGen.cpp
+++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp
@@ -229,7 +229,7 @@ class PatternEmitter {
   // Pattern instantiation location followed by the location of multiclass
   // prototypes used. This is intended to be used as a whole to
   // PrintFatalError() on errors.
-  ArrayRef<llvm::SMLoc> loc;
+  ArrayRef<SMLoc> loc;
 
   // Op's TableGen Record to wrapper object.
   RecordOperatorMap *opMap;
diff --git a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
index 5da5ecc5bcf1d..4bdfa71d8bc49 100644
--- a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
+++ b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
@@ -3,6 +3,7 @@
 #include "gtest/gtest.h"
 #include <memory>
 
+using namespace mlir;
 using namespace mlir::sparse_tensor;
 
 namespace {

From ad39b5bc59b0e71c86f8cf290ead2d9dd09e5c3e Mon Sep 17 00:00:00 2001
From: Jim Lin <jim@andestech.com>
Date: Thu, 27 Jan 2022 09:52:45 +0800
Subject: [PATCH 843/946] [NFC] Remove duplicate include

---
 clang/lib/Sema/SemaConcept.cpp                | 1 -
 clang/unittests/AST/EvaluateAsRValueTest.cpp  | 1 -
 clang/unittests/Tooling/Syntax/TokensTest.cpp | 1 -
 3 files changed, 3 deletions(-)

diff --git a/clang/lib/Sema/SemaConcept.cpp b/clang/lib/Sema/SemaConcept.cpp
index 678f1e40e730b..ce99d4848ccaa 100644
--- a/clang/lib/Sema/SemaConcept.cpp
+++ b/clang/lib/Sema/SemaConcept.cpp
@@ -18,7 +18,6 @@
 #include "clang/Sema/Template.h"
 #include "clang/Sema/Overload.h"
 #include "clang/Sema/Initialization.h"
-#include "clang/Sema/SemaInternal.h"
 #include "clang/AST/ExprConcepts.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/Basic/OperatorPrecedence.h"
diff --git a/clang/unittests/AST/EvaluateAsRValueTest.cpp b/clang/unittests/AST/EvaluateAsRValueTest.cpp
index 0475330796d12..bf44136835f21 100644
--- a/clang/unittests/AST/EvaluateAsRValueTest.cpp
+++ b/clang/unittests/AST/EvaluateAsRValueTest.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/RecursiveASTVisitor.h"
diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp
index e7cb25782133e..22134d9d0cfb0 100644
--- a/clang/unittests/Tooling/Syntax/TokensTest.cpp
+++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp
@@ -40,7 +40,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Testing/Support/Annotations.h"
 #include "llvm/Testing/Support/SupportHelpers.h"
-#include "gmock/gmock.h"
 #include <cassert>
 #include <cstdlib>
 #include <gmock/gmock.h>

From 14b7785c093899988a1f23dcbf7edc098fe860d2 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 26 Jan 2022 22:03:26 -0800
Subject: [PATCH 844/946] [ELF] Simplify InputSection::writeTo. NFC

---
 lld/ELF/InputSection.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index e6ce2c399ac9d..943cf18e6cf05 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -1261,9 +1261,8 @@ template <class ELFT> void InputSection::writeTo(uint8_t *buf) {
 
   // Copy section contents from source object file to output file
   // and then apply relocations.
-  memcpy(buf, data().data(), data().size());
-  uint8_t *bufEnd = buf + data().size();
-  relocate<ELFT>(buf, bufEnd);
+  memcpy(buf, rawData.data(), rawData.size());
+  relocate<ELFT>(buf, buf + rawData.size());
 }
 
 void InputSection::replace(InputSection *other) {

From afeb4a6628a684b268b18d5f3cbd88e58ee75b29 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 26 Jan 2022 22:51:31 -0800
Subject: [PATCH 845/946] [ELF] Optimize -Map. NFC

getVA is slow. Avoid calling it in the llvm::sort comparator.
---
 lld/ELF/MapFile.cpp | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp
index 1998192bfba6d..70d0e8c65255b 100644
--- a/lld/ELF/MapFile.cpp
+++ b/lld/ELF/MapFile.cpp
@@ -37,7 +37,8 @@ using namespace llvm::object;
 using namespace lld;
 using namespace lld::elf;
 
-using SymbolMapTy = DenseMap<const SectionBase *, SmallVector<Defined *, 4>>;
+using SymbolMapTy = DenseMap<const SectionBase *,
+                             SmallVector<std::pair<Defined *, uint64_t>, 0>>;
 
 static constexpr char indent8[] = "        ";          // 8 spaces
 static constexpr char indent16[] = "                "; // 16 spaces
@@ -67,7 +68,7 @@ static std::vector<Defined *> getSymbols() {
 static SymbolMapTy getSectionSyms(ArrayRef<Defined *> syms) {
   SymbolMapTy ret;
   for (Defined *dr : syms)
-    ret[dr->section].push_back(dr);
+    ret[dr->section].emplace_back(dr, dr->getVA());
 
   // Sort symbols by address. We want to print out symbols in the
   // order in the output file rather than the order they appeared
@@ -76,12 +77,11 @@ static SymbolMapTy getSectionSyms(ArrayRef<Defined *> syms) {
   for (auto &it : ret) {
     // Deduplicate symbols which need a canonical PLT entry/copy relocation.
     set.clear();
-    llvm::erase_if(it.second,
-                   [&](Defined *sym) { return !set.insert(sym).second; });
-
-    llvm::stable_sort(it.second, [](Defined *a, Defined *b) {
-      return a->getVA() < b->getVA();
+    llvm::erase_if(it.second, [&](std::pair<Defined *, uint64_t> a) {
+      return !set.insert(a.first).second;
     });
+
+    llvm::stable_sort(it.second, llvm::less_second());
   }
   return ret;
 }
@@ -184,7 +184,7 @@ static void writeMapFile(raw_fd_ostream &os) {
           writeHeader(os, isec->getVA(), osec->getLMA() + isec->outSecOff,
                       isec->getSize(), isec->alignment);
           os << indent8 << toString(isec) << '\n';
-          for (Symbol *sym : sectionSyms[isec])
+          for (Symbol *sym : llvm::make_first_range(sectionSyms[isec]))
             os << symStr[sym] << '\n';
         }
         continue;

From 1372d53639f7eb2273836d7e55947fe6943a5be4 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 26 Jan 2022 23:10:40 -0800
Subject: [PATCH 846/946] [ELF] Optimize two vector. NFC

---
 lld/ELF/MapFile.cpp | 6 +++---
 lld/ELF/Writer.cpp  | 5 ++---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/lld/ELF/MapFile.cpp b/lld/ELF/MapFile.cpp
index 70d0e8c65255b..c7d8967358ad5 100644
--- a/lld/ELF/MapFile.cpp
+++ b/lld/ELF/MapFile.cpp
@@ -91,9 +91,9 @@ static SymbolMapTy getSectionSyms(ArrayRef<Defined *> syms) {
 // we do that in batch using parallel-for.
 static DenseMap<Symbol *, std::string>
 getSymbolStrings(ArrayRef<Defined *> syms) {
-  std::vector<std::string> str(syms.size());
+  auto strs = std::make_unique<std::string[]>(syms.size());
   parallelForEachN(0, syms.size(), [&](size_t i) {
-    raw_string_ostream os(str[i]);
+    raw_string_ostream os(strs[i]);
     OutputSection *osec = syms[i]->getOutputSection();
     uint64_t vma = syms[i]->getVA();
     uint64_t lma = osec ? osec->getLMA() + vma - osec->getVA(0) : 0;
@@ -103,7 +103,7 @@ getSymbolStrings(ArrayRef<Defined *> syms) {
 
   DenseMap<Symbol *, std::string> ret;
   for (size_t i = 0, e = syms.size(); i < e; ++i)
-    ret[syms[i]] = std::move(str[i]);
+    ret[syms[i]] = std::move(strs[i]);
   return ret;
 }
 
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 396b9c153dc21..848db1f04b422 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1750,16 +1750,15 @@ template <class ELFT> void Writer<ELFT>::optimizeBasicBlockJumps() {
     if (!(osec->flags & SHF_EXECINSTR))
       continue;
     SmallVector<InputSection *, 0> sections = getInputSections(*osec);
-    std::vector<unsigned> result(sections.size());
+    size_t numDeleted = 0;
     // Delete all fall through jump instructions.  Also, check if two
     // consecutive jump instructions can be flipped so that a fall
     // through jmp instruction can be deleted.
     for (size_t i = 0, e = sections.size(); i != e; ++i) {
       InputSection *next = i + 1 < sections.size() ? sections[i + 1] : nullptr;
       InputSection &sec = *sections[i];
-      result[i] = target->deleteFallThruJmpInsn(sec, sec.file, next) ? 1 : 0;
+      numDeleted += target->deleteFallThruJmpInsn(sec, sec.file, next);
     }
-    size_t numDeleted = std::count(result.begin(), result.end(), 1);
     if (numDeleted > 0) {
       script->assignAddresses();
       LLVM_DEBUG(llvm::dbgs()

From 7d0426dd95440e087bf3026fc8cffa2d1f2eb5a8 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Wed, 26 Jan 2022 14:18:21 -0800
Subject: [PATCH 847/946] [mlir] Move ComposeSubView+ExpandOps from Standard to
 MemRef

These transformations already operate on memref operations (as part of
splitting up the standard dialect). Now that the operations have moved,
it's time for these transformations to move as well.

Differential Revision: https://reviews.llvm.org/D118285
---
 .../Transforms/ComposeSubView.h                 | 13 +++++++------
 .../mlir/Dialect/MemRef/Transforms/Passes.h     |  9 +++++++++
 .../mlir/Dialect/MemRef/Transforms/Passes.td    |  6 ++++++
 .../Dialect/StandardOps/Transforms/Passes.h     | 10 ----------
 .../Dialect/StandardOps/Transforms/Passes.td    |  5 -----
 .../Dialect/MemRef/Transforms/CMakeLists.txt    |  2 ++
 .../Transforms/ComposeSubView.cpp               | 11 ++++-------
 .../Transforms/ExpandOps.cpp                    | 14 +++++++-------
 mlir/lib/Dialect/MemRef/Transforms/PassDetail.h |  1 +
 .../StandardOps/Transforms/CMakeLists.txt       |  2 --
 .../{Standard => MemRef}/expand-ops.mlir        |  2 +-
 mlir/test/lib/Dialect/CMakeLists.txt            |  1 +
 mlir/test/lib/Dialect/MemRef/CMakeLists.txt     | 17 +++++++++++++++++
 .../TestComposeSubView.cpp                      |  4 ++--
 .../test/lib/Dialect/StandardOps/CMakeLists.txt |  1 -
 mlir/test/mlir-cpu-runner/memref-reshape.mlir   |  2 +-
 mlir/tools/mlir-opt/CMakeLists.txt              |  1 +
 17 files changed, 59 insertions(+), 42 deletions(-)
 rename mlir/include/mlir/Dialect/{StandardOps => MemRef}/Transforms/ComposeSubView.h (71%)
 rename mlir/lib/Dialect/{StandardOps => MemRef}/Transforms/ComposeSubView.cpp (96%)
 rename mlir/lib/Dialect/{StandardOps => MemRef}/Transforms/ExpandOps.cpp (93%)
 rename mlir/test/Dialect/{Standard => MemRef}/expand-ops.mlir (96%)
 create mode 100644 mlir/test/lib/Dialect/MemRef/CMakeLists.txt
 rename mlir/test/lib/Dialect/{StandardOps => MemRef}/TestComposeSubView.cpp (92%)

diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/ComposeSubView.h b/mlir/include/mlir/Dialect/MemRef/Transforms/ComposeSubView.h
similarity index 71%
rename from mlir/include/mlir/Dialect/StandardOps/Transforms/ComposeSubView.h
rename to mlir/include/mlir/Dialect/MemRef/Transforms/ComposeSubView.h
index 7a5ae3e8417b7..20aa1c02db178 100644
--- a/mlir/include/mlir/Dialect/StandardOps/Transforms/ComposeSubView.h
+++ b/mlir/include/mlir/Dialect/MemRef/Transforms/ComposeSubView.h
@@ -1,4 +1,4 @@
-//===- ComposeSubView.h - Combining composed subview ops --------*- C++ -*-===//
+//===- ComposeSubView.h - Combining composed memref ops ---------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -10,19 +10,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef MLIR_DIALECT_STANDARDOPS_TRANSFORMS_COMPOSESUBVIEW_H_
-#define MLIR_DIALECT_STANDARDOPS_TRANSFORMS_COMPOSESUBVIEW_H_
+#ifndef MLIR_DIALECT_MEMREF_TRANSFORMS_COMPOSESUBVIEW_H_
+#define MLIR_DIALECT_MEMREF_TRANSFORMS_COMPOSESUBVIEW_H_
 
 namespace mlir {
-
-// Forward declarations.
 class MLIRContext;
 class RewritePatternSet;
 using OwningRewritePatternList = RewritePatternSet;
 
+namespace memref {
+
 void populateComposeSubViewPatterns(OwningRewritePatternList &patterns,
                                     MLIRContext *context);
 
+} // namespace memref
 } // namespace mlir
 
-#endif // MLIR_DIALECT_STANDARDOPS_TRANSFORMS_COMPOSESUBVIEW_H_
+#endif // MLIR_DIALECT_MEMREF_TRANSFORMS_COMPOSESUBVIEW_H_
diff --git a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h
index 23d12508b65cb..6e9f6fb10a665 100644
--- a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h
@@ -18,6 +18,7 @@
 namespace mlir {
 
 class AffineDialect;
+class StandardOpsDialect;
 namespace tensor {
 class TensorDialect;
 } // namespace tensor
@@ -31,6 +32,9 @@ namespace memref {
 // Patterns
 //===----------------------------------------------------------------------===//
 
+/// Collects a set of patterns to rewrite ops within the memref dialect.
+void populateExpandOpsPatterns(RewritePatternSet &patterns);
+
 /// Appends patterns for folding memref.subview ops into consumer load/store ops
 /// into `patterns`.
 void populateFoldSubViewOpPatterns(RewritePatternSet &patterns);
@@ -51,6 +55,11 @@ void populateResolveShapedTypeResultDimsPatterns(RewritePatternSet &patterns);
 // Passes
 //===----------------------------------------------------------------------===//
 
+/// Creates an instance of the ExpandOps pass that legalizes memref dialect ops
+/// to be convertible to LLVM. For example, `memref.reshape` gets converted to
+/// `memref_reinterpret_cast`.
+std::unique_ptr<Pass> createExpandOpsPass();
+
 /// Creates an operation pass to fold memref.subview ops into consumer
 /// load/store ops into `patterns`.
 std::unique_ptr<Pass> createFoldSubViewOpsPass();
diff --git a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td
index d67746b9c6033..0f2e3a91a2554 100644
--- a/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/MemRef/Transforms/Passes.td
@@ -11,6 +11,12 @@
 
 include "mlir/Pass/PassBase.td"
 
+def ExpandOps : Pass<"memref-expand", "FuncOp"> {
+  let summary = "Legalize memref operations to be convertible to LLVM.";
+  let constructor = "mlir::memref::createExpandOpsPass()";
+  let dependentDialects = ["StandardOpsDialect"];
+}
+
 def FoldSubViewOps : Pass<"fold-memref-subview-ops"> {
   let summary = "Fold memref.subview ops into consumer load/store ops";
   let description = [{
diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
index b47303c70250e..3f723133b2a6e 100644
--- a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
@@ -45,16 +45,6 @@ void populateTensorConstantBufferizePatterns(
 /// Creates an instance of tensor constant bufferization pass.
 std::unique_ptr<Pass> createTensorConstantBufferizePass(unsigned alignment = 0);
 
-/// Creates an instance of the StdExpand pass that legalizes Std
-/// dialect ops to be convertible to LLVM. For example,
-/// `std.arith.ceildivsi` gets transformed to a number of std operations,
-/// which can be lowered to LLVM; `memref.reshape` gets converted to
-/// `memref_reinterpret_cast`.
-std::unique_ptr<Pass> createStdExpandOpsPass();
-
-/// Collects a set of patterns to rewrite ops within the Std dialect.
-void populateStdExpandOpsPatterns(RewritePatternSet &patterns);
-
 //===----------------------------------------------------------------------===//
 // Registration
 //===----------------------------------------------------------------------===//
diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
index 8ac2b64a8006d..339c1b1194cce 100644
--- a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.td
@@ -18,11 +18,6 @@ def StdBufferize : Pass<"std-bufferize", "FuncOp"> {
                            "memref::MemRefDialect", "scf::SCFDialect"];
 }
 
-def StdExpandOps : Pass<"std-expand", "FuncOp"> {
-  let summary = "Legalize std operations to be convertible to LLVM.";
-  let constructor = "mlir::createStdExpandOpsPass()";
-}
-
 def FuncBufferize : Pass<"func-bufferize", "ModuleOp"> {
   let summary = "Bufferize func/call/return ops";
   let description = [{
diff --git a/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt b/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
index 319f9bbb95a37..99bc552548f90 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/MemRef/Transforms/CMakeLists.txt
@@ -1,4 +1,6 @@
 add_mlir_dialect_library(MLIRMemRefTransforms
+  ComposeSubView.cpp
+  ExpandOps.cpp
   FoldSubViewOps.cpp
   NormalizeMemRefs.cpp
   ResolveShapedTypeResultDims.cpp
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/ComposeSubView.cpp b/mlir/lib/Dialect/MemRef/Transforms/ComposeSubView.cpp
similarity index 96%
rename from mlir/lib/Dialect/StandardOps/Transforms/ComposeSubView.cpp
rename to mlir/lib/Dialect/MemRef/Transforms/ComposeSubView.cpp
index cabaa614fa2a9..29ba5060d167d 100644
--- a/mlir/lib/Dialect/StandardOps/Transforms/ComposeSubView.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/ComposeSubView.cpp
@@ -11,8 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Dialect/StandardOps/Transforms/ComposeSubView.h"
-
+#include "mlir/Dialect/MemRef/Transforms/ComposeSubView.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/IR/BuiltinAttributes.h"
@@ -21,7 +20,7 @@
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
-namespace mlir {
+using namespace mlir;
 
 namespace {
 
@@ -128,9 +127,7 @@ struct ComposeSubViewOpPattern : public OpRewritePattern<memref::SubViewOp> {
 
 } // namespace
 
-void populateComposeSubViewPatterns(OwningRewritePatternList &patterns,
-                                    MLIRContext *context) {
+void mlir::memref::populateComposeSubViewPatterns(
+    OwningRewritePatternList &patterns, MLIRContext *context) {
   patterns.insert<ComposeSubViewOpPattern>(context);
 }
-
-} // namespace mlir
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp b/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp
similarity index 93%
rename from mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp
rename to mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp
index e5cf08da4904a..293fb58d4e701 100644
--- a/mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp
@@ -17,8 +17,8 @@
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Arithmetic/Transforms/Passes.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
-#include "mlir/Dialect/StandardOps/Transforms/Passes.h"
 #include "mlir/IR/TypeUtilities.h"
 #include "mlir/Transforms/DialectConversion.h"
 
@@ -120,13 +120,13 @@ struct MemRefReshapeOpConverter : public OpRewritePattern<memref::ReshapeOp> {
   }
 };
 
-struct StdExpandOpsPass : public StdExpandOpsBase<StdExpandOpsPass> {
+struct ExpandOpsPass : public ExpandOpsBase<ExpandOpsPass> {
   void runOnOperation() override {
     MLIRContext &ctx = getContext();
 
     RewritePatternSet patterns(&ctx);
-    populateStdExpandOpsPatterns(patterns);
-    ConversionTarget target(getContext());
+    memref::populateExpandOpsPatterns(patterns);
+    ConversionTarget target(ctx);
 
     target.addLegalDialect<arith::ArithmeticDialect, memref::MemRefDialect,
                            StandardOpsDialect>();
@@ -146,11 +146,11 @@ struct StdExpandOpsPass : public StdExpandOpsBase<StdExpandOpsPass> {
 
 } // namespace
 
-void mlir::populateStdExpandOpsPatterns(RewritePatternSet &patterns) {
+void mlir::memref::populateExpandOpsPatterns(RewritePatternSet &patterns) {
   patterns.add<AtomicRMWOpConverter, MemRefReshapeOpConverter>(
       patterns.getContext());
 }
 
-std::unique_ptr<Pass> mlir::createStdExpandOpsPass() {
-  return std::make_unique<StdExpandOpsPass>();
+std::unique_ptr<Pass> mlir::memref::createExpandOpsPass() {
+  return std::make_unique<ExpandOpsPass>();
 }
diff --git a/mlir/lib/Dialect/MemRef/Transforms/PassDetail.h b/mlir/lib/Dialect/MemRef/Transforms/PassDetail.h
index d15631526817f..d1e5baa798fd1 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/PassDetail.h
+++ b/mlir/lib/Dialect/MemRef/Transforms/PassDetail.h
@@ -14,6 +14,7 @@
 namespace mlir {
 
 class AffineDialect;
+class StandardOpsDialect;
 
 // Forward declaration from Dialect.h
 template <typename ConcreteDialect>
diff --git a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
index 82e25923840dd..f8082601b48b3 100644
--- a/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/StandardOps/Transforms/CMakeLists.txt
@@ -1,8 +1,6 @@
 add_mlir_dialect_library(MLIRStandardOpsTransforms
   Bufferize.cpp
-  ComposeSubView.cpp
   DecomposeCallGraphTypes.cpp
-  ExpandOps.cpp
   FuncBufferize.cpp
   FuncConversions.cpp
   TensorConstantBufferize.cpp
diff --git a/mlir/test/Dialect/Standard/expand-ops.mlir b/mlir/test/Dialect/MemRef/expand-ops.mlir
similarity index 96%
rename from mlir/test/Dialect/Standard/expand-ops.mlir
rename to mlir/test/Dialect/MemRef/expand-ops.mlir
index 2a1c367ff80f0..bcf83042184f3 100644
--- a/mlir/test/Dialect/Standard/expand-ops.mlir
+++ b/mlir/test/Dialect/MemRef/expand-ops.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -std-expand %s -split-input-file | FileCheck %s
+// RUN: mlir-opt -memref-expand %s -split-input-file | FileCheck %s
 
 // CHECK-LABEL: func @atomic_rmw_to_generic
 // CHECK-SAME: ([[F:%.*]]: memref<10xf32>, [[f:%.*]]: f32, [[i:%.*]]: index)
diff --git a/mlir/test/lib/Dialect/CMakeLists.txt b/mlir/test/lib/Dialect/CMakeLists.txt
index d8219057f9391..d02dc61045fae 100644
--- a/mlir/test/lib/Dialect/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/CMakeLists.txt
@@ -3,6 +3,7 @@ add_subdirectory(DLTI)
 add_subdirectory(GPU)
 add_subdirectory(Linalg)
 add_subdirectory(Math)
+add_subdirectory(MemRef)
 add_subdirectory(SCF)
 add_subdirectory(Shape)
 add_subdirectory(SPIRV)
diff --git a/mlir/test/lib/Dialect/MemRef/CMakeLists.txt b/mlir/test/lib/Dialect/MemRef/CMakeLists.txt
new file mode 100644
index 0000000000000..c43dec48bdabf
--- /dev/null
+++ b/mlir/test/lib/Dialect/MemRef/CMakeLists.txt
@@ -0,0 +1,17 @@
+# Exclude tests from libMLIR.so
+add_mlir_library(MLIRMemRefTestPasses
+  TestComposeSubView.cpp
+
+  EXCLUDE_FROM_LIBMLIR
+
+  LINK_LIBS PUBLIC
+  MLIRPass
+  MLIRMemRefTransforms
+  MLIRTestDialect
+  )
+
+target_include_directories(MLIRMemRefTestPasses
+  PRIVATE
+  ${CMAKE_CURRENT_SOURCE_DIR}/../Test
+  ${CMAKE_CURRENT_BINARY_DIR}/../Test
+  )
diff --git a/mlir/test/lib/Dialect/StandardOps/TestComposeSubView.cpp b/mlir/test/lib/Dialect/MemRef/TestComposeSubView.cpp
similarity index 92%
rename from mlir/test/lib/Dialect/StandardOps/TestComposeSubView.cpp
rename to mlir/test/lib/Dialect/MemRef/TestComposeSubView.cpp
index 1638ee3debe0a..20add4cc94c8b 100644
--- a/mlir/test/lib/Dialect/StandardOps/TestComposeSubView.cpp
+++ b/mlir/test/lib/Dialect/MemRef/TestComposeSubView.cpp
@@ -11,7 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/StandardOps/Transforms/ComposeSubView.h"
+#include "mlir/Dialect/MemRef/Transforms/ComposeSubView.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
@@ -35,7 +35,7 @@ void TestComposeSubViewPass::getDependentDialects(
 
 void TestComposeSubViewPass::runOnOperation() {
   OwningRewritePatternList patterns(&getContext());
-  populateComposeSubViewPatterns(patterns, &getContext());
+  memref::populateComposeSubViewPatterns(patterns, &getContext());
   (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
 }
 } // namespace
diff --git a/mlir/test/lib/Dialect/StandardOps/CMakeLists.txt b/mlir/test/lib/Dialect/StandardOps/CMakeLists.txt
index 22d5818e34133..b85de09e10bf4 100644
--- a/mlir/test/lib/Dialect/StandardOps/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/StandardOps/CMakeLists.txt
@@ -1,7 +1,6 @@
 # Exclude tests from libMLIR.so
 add_mlir_library(MLIRStandardOpsTestPasses
   TestDecomposeCallGraphTypes.cpp
-  TestComposeSubView.cpp
 
   EXCLUDE_FROM_LIBMLIR
 
diff --git a/mlir/test/mlir-cpu-runner/memref-reshape.mlir b/mlir/test/mlir-cpu-runner/memref-reshape.mlir
index 6d0397399ccaf..e74d6219a1f33 100644
--- a/mlir/test/mlir-cpu-runner/memref-reshape.mlir
+++ b/mlir/test/mlir-cpu-runner/memref-reshape.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s -convert-scf-to-std -std-expand -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts \
+// RUN: mlir-opt %s -convert-scf-to-std -memref-expand -convert-arith-to-llvm -convert-memref-to-llvm -convert-std-to-llvm -reconcile-unrealized-casts \
 // RUN: | mlir-cpu-runner -e main -entry-point-result=void \
 // RUN: -shared-libs=%mlir_runner_utils_dir/libmlir_runner_utils%shlibext,%mlir_runner_utils_dir/libmlir_c_runner_utils%shlibext \
 // RUN: | FileCheck %s
diff --git a/mlir/tools/mlir-opt/CMakeLists.txt b/mlir/tools/mlir-opt/CMakeLists.txt
index 7a16a497b6f86..c03d6403a74eb 100644
--- a/mlir/tools/mlir-opt/CMakeLists.txt
+++ b/mlir/tools/mlir-opt/CMakeLists.txt
@@ -18,6 +18,7 @@ if(MLIR_INCLUDE_TESTS)
     MLIRGPUTestPasses
     MLIRLinalgTestPasses
     MLIRMathTestPasses
+    MLIRMemRefTestPasses
     MLIRSCFTestPasses
     MLIRShapeTestPasses
     MLIRSPIRVTestPasses

From 9f85c198dbd55fa747de55b746c90acb740bede8 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Wed, 26 Jan 2022 14:42:38 -0800
Subject: [PATCH 848/946] [mlir] Finish replacing OwningRewritePatternList with
 RewritePatternSet

OwningRewritePatternList has been deprecated for ~10 months now, we can remove
the leftover using directives at this point.

Differential Revision: https://reviews.llvm.org/D118287
---
 flang/lib/Optimizer/CodeGen/CodeGen.cpp          |  2 +-
 flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp     |  2 +-
 flang/lib/Optimizer/Dialect/FIROps.cpp           |  6 +++---
 .../lib/Optimizer/Transforms/AbstractResult.cpp  |  2 +-
 .../lib/Optimizer/Transforms/AffineDemotion.cpp  |  2 +-
 .../lib/Optimizer/Transforms/AffinePromotion.cpp |  2 +-
 .../lib/Optimizer/Transforms/ArrayValueCopy.cpp  |  4 ++--
 .../Optimizer/Transforms/CharacterConversion.cpp |  2 +-
 .../Transforms/ExternalNameConversion.cpp        |  2 +-
 .../Optimizer/Transforms/MemoryAllocation.cpp    |  2 +-
 flang/lib/Optimizer/Transforms/RewriteLoop.cpp   |  2 +-
 .../AffineToStandard/AffineToStandard.h          |  1 -
 .../mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h    |  1 -
 .../mlir/Conversion/GPUCommon/GPUCommonPass.h    |  3 +--
 .../mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h    |  1 -
 .../mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h  |  1 -
 .../Conversion/LinalgToSPIRV/LinalgToSPIRV.h     |  1 -
 .../OpenMPToLLVM/ConvertOpenMPToLLVM.h           |  1 -
 mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h |  1 -
 .../mlir/Conversion/SCFToSPIRV/SCFToSPIRV.h      |  1 -
 .../Conversion/SCFToStandard/SCFToStandard.h     |  1 -
 .../Conversion/ShapeToStandard/ShapeToStandard.h |  1 -
 .../Conversion/VectorToROCDL/VectorToROCDL.h     |  1 -
 mlir/include/mlir/Dialect/AMX/Transforms.h       |  1 -
 mlir/include/mlir/Dialect/ArmSVE/Transforms.h    |  1 -
 .../mlir/Dialect/Linalg/Transforms/Transforms.h  |  4 ++--
 .../Dialect/MemRef/Transforms/ComposeSubView.h   |  3 +--
 mlir/include/mlir/Dialect/SCF/Transforms.h       |  1 -
 .../StandardOps/Transforms/FuncConversions.h     |  1 -
 .../mlir/Dialect/StandardOps/Transforms/Passes.h |  1 -
 .../mlir/Dialect/Tensor/Transforms/Passes.h      |  1 -
 mlir/include/mlir/Dialect/Vector/VectorOps.h     |  1 -
 .../mlir/Dialect/Vector/VectorTransforms.h       |  2 --
 mlir/include/mlir/Dialect/X86Vector/Transforms.h |  1 -
 mlir/include/mlir/IR/OperationSupport.h          |  4 +---
 .../Conversion/GPUCommon/GPUToLLVMConversion.cpp |  2 +-
 .../Conversion/OpenACCToLLVM/OpenACCToLLVM.cpp   |  2 +-
 mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp | 16 ++++++++--------
 .../ArmSVE/Transforms/LegalizeForLLVMExport.cpp  |  2 +-
 .../Bufferization/IR/BufferizationOps.cpp        |  2 +-
 .../Bufferization/Transforms/Bufferize.cpp       |  2 +-
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp         |  2 +-
 .../Dialect/MemRef/Transforms/ComposeSubView.cpp |  2 +-
 mlir/lib/Dialect/SCF/SCF.cpp                     |  2 +-
 mlir/lib/Dialect/StandardOps/IR/Ops.cpp          |  2 +-
 mlir/lib/Dialect/Tosa/IR/TosaOps.cpp             | 16 ++++++++--------
 .../Dialect/Linalg/TestLinalgDistribution.cpp    |  2 +-
 .../lib/Dialect/MemRef/TestComposeSubView.cpp    |  2 +-
 48 files changed, 46 insertions(+), 71 deletions(-)

diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 9072cf447e1d7..dc766ab2fde54 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3266,7 +3266,7 @@ class FIRToLLVMLowering : public fir::FIRToLLVMLoweringBase<FIRToLLVMLowering> {
 
     auto *context = getModule().getContext();
     fir::LLVMTypeConverter typeConverter{getModule()};
-    mlir::OwningRewritePatternList pattern(context);
+    mlir::RewritePatternSet pattern(context);
     pattern.insert<
         AbsentOpConversion, AddcOpConversion, AddrOfOpConversion,
         AllocaOpConversion, AllocMemOpConversion, BoxAddrOpConversion,
diff --git a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp
index 712b643e1e05a..fc0642e698af7 100644
--- a/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp
+++ b/flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp
@@ -260,7 +260,7 @@ class CodeGenRewrite : public CodeGenRewriteBase<CodeGenRewrite> {
       return !(embox.getShape() ||
                embox.getType().cast<BoxType>().getEleTy().isa<SequenceType>());
     });
-    mlir::OwningRewritePatternList patterns(&context);
+    mlir::RewritePatternSet patterns(&context);
     patterns.insert<EmboxConversion, ArrayCoorConversion, ReboxConversion>(
         &context);
     if (mlir::failed(
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 36c7c8f57461d..8f702b3245f48 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -784,8 +784,8 @@ static mlir::LogicalResult verify(fir::ConstcOp &op) {
 // ConvertOp
 //===----------------------------------------------------------------------===//
 
-void fir::ConvertOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &results, MLIRContext *context) {
+void fir::ConvertOp::getCanonicalizationPatterns(RewritePatternSet &results,
+                                                 MLIRContext *context) {
   results.insert<ConvertConvertOptPattern, RedundantConvertOptPattern,
                  CombineConvertOptPattern, ForwardConstantConvertPattern>(
       context);
@@ -1508,7 +1508,7 @@ struct UndoComplexPattern : public mlir::RewritePattern {
 };
 
 void fir::InsertValueOp::getCanonicalizationPatterns(
-    mlir::OwningRewritePatternList &results, mlir::MLIRContext *context) {
+    mlir::RewritePatternSet &results, mlir::MLIRContext *context) {
   results.insert<UndoComplexPattern<mlir::arith::AddFOp, fir::AddcOp>,
                  UndoComplexPattern<mlir::arith::SubFOp, fir::SubcOp>>(context);
 }
diff --git a/flang/lib/Optimizer/Transforms/AbstractResult.cpp b/flang/lib/Optimizer/Transforms/AbstractResult.cpp
index 465755c87ccab..7a7207c91645c 100644
--- a/flang/lib/Optimizer/Transforms/AbstractResult.cpp
+++ b/flang/lib/Optimizer/Transforms/AbstractResult.cpp
@@ -216,7 +216,7 @@ class AbstractResultOpt : public fir::AbstractResultOptBase<AbstractResultOpt> {
     auto *context = &getContext();
     auto func = getOperation();
     auto loc = func.getLoc();
-    mlir::OwningRewritePatternList patterns(context);
+    mlir::RewritePatternSet patterns(context);
     mlir::ConversionTarget target = *context;
     AbstractResultOptions options{passResultAsBox.getValue(),
                                   /*newArg=*/{}};
diff --git a/flang/lib/Optimizer/Transforms/AffineDemotion.cpp b/flang/lib/Optimizer/Transforms/AffineDemotion.cpp
index 947c89b015883..bdf73c7e39b88 100644
--- a/flang/lib/Optimizer/Transforms/AffineDemotion.cpp
+++ b/flang/lib/Optimizer/Transforms/AffineDemotion.cpp
@@ -143,7 +143,7 @@ class AffineDialectDemotion
     LLVM_DEBUG(llvm::dbgs() << "AffineDemotion: running on function:\n";
                function.print(llvm::dbgs()););
 
-    mlir::OwningRewritePatternList patterns(context);
+    mlir::RewritePatternSet patterns(context);
     patterns.insert<ConvertConversion>(context);
     patterns.insert<AffineLoadConversion>(context);
     patterns.insert<AffineStoreConversion>(context);
diff --git a/flang/lib/Optimizer/Transforms/AffinePromotion.cpp b/flang/lib/Optimizer/Transforms/AffinePromotion.cpp
index 830ed6bbacb50..3f45aa4f8dc5f 100644
--- a/flang/lib/Optimizer/Transforms/AffinePromotion.cpp
+++ b/flang/lib/Optimizer/Transforms/AffinePromotion.cpp
@@ -587,7 +587,7 @@ class AffineDialectPromotion
     auto function = getOperation();
     markAllAnalysesPreserved();
     auto functionAnalysis = AffineFunctionAnalysis(function);
-    mlir::OwningRewritePatternList patterns(context);
+    mlir::RewritePatternSet patterns(context);
     patterns.insert<AffineIfConversion>(context, functionAnalysis);
     patterns.insert<AffineLoopConversion>(context, functionAnalysis);
     mlir::ConversionTarget target = *context;
diff --git a/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp b/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
index 4f9d17526f5c9..c2a607a43ae20 100644
--- a/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
+++ b/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp
@@ -784,7 +784,7 @@ class ArrayValueCopyConverter
     // array accesses are rewritten we can go on phase 2.
     // Phase 2 gets rid of the useless copy-in/copyout operations. The copy-in
     // /copy-out refers the Fortran copy-in/copy-out semantics on statements.
-    mlir::OwningRewritePatternList patterns1(context);
+    mlir::RewritePatternSet patterns1(context);
     patterns1.insert<ArrayFetchConversion>(context, useMap);
     patterns1.insert<ArrayUpdateConversion>(context, analysis, useMap);
     patterns1.insert<ArrayModifyConversion>(context, analysis, useMap);
@@ -801,7 +801,7 @@ class ArrayValueCopyConverter
       signalPassFailure();
     }
 
-    mlir::OwningRewritePatternList patterns2(context);
+    mlir::RewritePatternSet patterns2(context);
     patterns2.insert<ArrayLoadConversion>(context);
     patterns2.insert<ArrayMergeStoreConversion>(context);
     target.addIllegalOp<ArrayLoadOp, ArrayMergeStoreOp>();
diff --git a/flang/lib/Optimizer/Transforms/CharacterConversion.cpp b/flang/lib/Optimizer/Transforms/CharacterConversion.cpp
index a1701cca10aa0..d166d5165dd34 100644
--- a/flang/lib/Optimizer/Transforms/CharacterConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CharacterConversion.cpp
@@ -101,7 +101,7 @@ class CharacterConversion
     if (clOpts.runtimeName.empty()) {
       auto *context = &getContext();
       auto *func = getOperation();
-      mlir::OwningRewritePatternList patterns(context);
+      mlir::RewritePatternSet patterns(context);
       patterns.insert<CharacterConvertConversion>(context);
       mlir::ConversionTarget target(*context);
       target.addLegalDialect<mlir::AffineDialect, fir::FIROpsDialect,
diff --git a/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp b/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp
index 0376ca86e4ae1..28852a50f5d90 100644
--- a/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp
@@ -148,7 +148,7 @@ void ExternalNameConversionPass::runOnOperation() {
   auto op = getOperation();
   auto *context = &getContext();
 
-  mlir::OwningRewritePatternList patterns(context);
+  mlir::RewritePatternSet patterns(context);
   patterns.insert<MangleNameOnCallOp, MangleNameOnCallOp, MangleNameOnFuncOp,
                   MangleNameForCommonBlock, MangleNameOnAddrOfOp,
                   MangleNameOnEmboxProcOp>(context);
diff --git a/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp b/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
index 83c77d6895841..faaea1931a9ce 100644
--- a/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
+++ b/flang/lib/Optimizer/Transforms/MemoryAllocation.cpp
@@ -172,7 +172,7 @@ class MemoryAllocationOpt
   void runOnOperation() override {
     auto *context = &getContext();
     auto func = getOperation();
-    mlir::OwningRewritePatternList patterns(context);
+    mlir::RewritePatternSet patterns(context);
     mlir::ConversionTarget target(*context);
 
     useCommandLineOptions();
diff --git a/flang/lib/Optimizer/Transforms/RewriteLoop.cpp b/flang/lib/Optimizer/Transforms/RewriteLoop.cpp
index bd13a1dc98e0c..84e377540635e 100644
--- a/flang/lib/Optimizer/Transforms/RewriteLoop.cpp
+++ b/flang/lib/Optimizer/Transforms/RewriteLoop.cpp
@@ -296,7 +296,7 @@ class CfgConversion : public CFGConversionBase<CfgConversion> {
 public:
   void runOnOperation() override {
     auto *context = &getContext();
-    mlir::OwningRewritePatternList patterns(context);
+    mlir::RewritePatternSet patterns(context);
     patterns.insert<CfgLoopConv, CfgIfConv, CfgIterWhileConv>(
         context, forceLoopToExecuteOnce);
     mlir::ConversionTarget target(*context);
diff --git a/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h b/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h
index b8afecdaa93de..bfebeaec54c9d 100644
--- a/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h
+++ b/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h
@@ -25,7 +25,6 @@ class Value;
 class ValueRange;
 
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 /// Emit code that computes the given affine expression using standard
 /// arithmetic operations applied to the provided dimension and symbol values.
diff --git a/mlir/include/mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h b/mlir/include/mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h
index 0878c633ec4f8..56db59dedd7a2 100644
--- a/mlir/include/mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h
+++ b/mlir/include/mlir/Conversion/AsyncToLLVM/AsyncToLLVM.h
@@ -20,7 +20,6 @@ class OperationPass;
 class MLIRContext;
 class TypeConverter;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 /// Create a pass to convert Async operations to the LLVM dialect.
 std::unique_ptr<OperationPass<ModuleOp>> createConvertAsyncToLLVMPass();
diff --git a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
index c43eb24973bc3..b048a761f1e76 100644
--- a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
+++ b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
@@ -26,7 +26,6 @@ struct LogicalResult;
 class ModuleOp;
 class Operation;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 template <typename T>
 class OperationPass;
@@ -56,7 +55,7 @@ std::unique_ptr<OperationPass<ModuleOp>> createGpuToLLVMConversionPass();
 /// Collect a set of patterns to convert from the GPU dialect to LLVM and
 /// populate converter for gpu types.
 void populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter,
-                                         OwningRewritePatternList &patterns,
+                                         RewritePatternSet &patterns,
                                          StringRef gpuBinaryAnnotation = {});
 
 } // namespace mlir
diff --git a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h
index 051a53f9e5037..7d031f47c1975 100644
--- a/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h
+++ b/mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h
@@ -16,7 +16,6 @@ namespace mlir {
 class LLVMTypeConverter;
 class ConversionTarget;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 template <typename OpT>
 class OperationPass;
diff --git a/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h b/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h
index 83d8a08bb5a0b..33b38cecf0fdd 100644
--- a/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h
+++ b/mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h
@@ -16,7 +16,6 @@ namespace mlir {
 class LLVMTypeConverter;
 class ConversionTarget;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 template <typename OpT>
 class OperationPass;
diff --git a/mlir/include/mlir/Conversion/LinalgToSPIRV/LinalgToSPIRV.h b/mlir/include/mlir/Conversion/LinalgToSPIRV/LinalgToSPIRV.h
index 64b612ed6b129..cc1ca7192ae12 100644
--- a/mlir/include/mlir/Conversion/LinalgToSPIRV/LinalgToSPIRV.h
+++ b/mlir/include/mlir/Conversion/LinalgToSPIRV/LinalgToSPIRV.h
@@ -17,7 +17,6 @@ namespace mlir {
 class MLIRContext;
 class SPIRVTypeConverter;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 /// Appends to a pattern list additional patterns for translating Linalg ops to
 /// SPIR-V ops.
diff --git a/mlir/include/mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h b/mlir/include/mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h
index 5df05cbabe0d6..3793c91992172 100644
--- a/mlir/include/mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h
+++ b/mlir/include/mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h
@@ -17,7 +17,6 @@ class ModuleOp;
 template <typename T>
 class OperationPass;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 /// Populate the given list with patterns that convert from OpenMP to LLVM.
 void populateOpenMPToLLVMConversionPatterns(LLVMTypeConverter &converter,
diff --git a/mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h b/mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h
index d1221d17d768d..83af4389af8cf 100644
--- a/mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h
+++ b/mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h
@@ -18,7 +18,6 @@ class MLIRContext;
 class Value;
 class Operation;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 namespace scf {
 class ForOp;
diff --git a/mlir/include/mlir/Conversion/SCFToSPIRV/SCFToSPIRV.h b/mlir/include/mlir/Conversion/SCFToSPIRV/SCFToSPIRV.h
index 284500df796c5..4effa7186ef36 100644
--- a/mlir/include/mlir/Conversion/SCFToSPIRV/SCFToSPIRV.h
+++ b/mlir/include/mlir/Conversion/SCFToSPIRV/SCFToSPIRV.h
@@ -21,7 +21,6 @@ class Pass;
 class SPIRVTypeConverter;
 struct ScfToSPIRVContextImpl;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 struct ScfToSPIRVContext {
   ScfToSPIRVContext();
diff --git a/mlir/include/mlir/Conversion/SCFToStandard/SCFToStandard.h b/mlir/include/mlir/Conversion/SCFToStandard/SCFToStandard.h
index 880d4ae6e7454..b29fdb72f7ecc 100644
--- a/mlir/include/mlir/Conversion/SCFToStandard/SCFToStandard.h
+++ b/mlir/include/mlir/Conversion/SCFToStandard/SCFToStandard.h
@@ -17,7 +17,6 @@ struct LogicalResult;
 class Pass;
 
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 /// Collect a set of patterns to lower from scf.for, scf.if, and
 /// loop.terminator to CFG operations within the Standard dialect, in particular
diff --git a/mlir/include/mlir/Conversion/ShapeToStandard/ShapeToStandard.h b/mlir/include/mlir/Conversion/ShapeToStandard/ShapeToStandard.h
index a26d4dd2e314f..7dc81017946aa 100644
--- a/mlir/include/mlir/Conversion/ShapeToStandard/ShapeToStandard.h
+++ b/mlir/include/mlir/Conversion/ShapeToStandard/ShapeToStandard.h
@@ -18,7 +18,6 @@ class ModuleOp;
 template <typename T>
 class OperationPass;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 void populateShapeToStandardConversionPatterns(RewritePatternSet &patterns);
 
diff --git a/mlir/include/mlir/Conversion/VectorToROCDL/VectorToROCDL.h b/mlir/include/mlir/Conversion/VectorToROCDL/VectorToROCDL.h
index 2b935cdc3dab7..bb65899387ce0 100644
--- a/mlir/include/mlir/Conversion/VectorToROCDL/VectorToROCDL.h
+++ b/mlir/include/mlir/Conversion/VectorToROCDL/VectorToROCDL.h
@@ -16,7 +16,6 @@ class ModuleOp;
 template <typename OpT>
 class OperationPass;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 /// Collect a set of patterns to convert from the GPU dialect to ROCDL.
 void populateVectorToROCDLConversionPatterns(LLVMTypeConverter &converter,
diff --git a/mlir/include/mlir/Dialect/AMX/Transforms.h b/mlir/include/mlir/Dialect/AMX/Transforms.h
index 16dff0df13817..19608dff6f160 100644
--- a/mlir/include/mlir/Dialect/AMX/Transforms.h
+++ b/mlir/include/mlir/Dialect/AMX/Transforms.h
@@ -14,7 +14,6 @@ namespace mlir {
 class LLVMConversionTarget;
 class LLVMTypeConverter;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 /// Collect a set of patterns to lower AMX ops to ops that map to LLVM
 /// intrinsics.
diff --git a/mlir/include/mlir/Dialect/ArmSVE/Transforms.h b/mlir/include/mlir/Dialect/ArmSVE/Transforms.h
index 980b1380da7b8..dcc485789875d 100644
--- a/mlir/include/mlir/Dialect/ArmSVE/Transforms.h
+++ b/mlir/include/mlir/Dialect/ArmSVE/Transforms.h
@@ -14,7 +14,6 @@ namespace mlir {
 class LLVMConversionTarget;
 class LLVMTypeConverter;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 /// Collect a set of patterns to lower ArmSVE ops to ops that map to LLVM
 /// intrinsics.
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index 61156c733594d..bd258f44476b3 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -1289,8 +1289,8 @@ LogicalResult peelAndCanonicalizeTiledLoop(RewriterBase &rewriter,
 /// global transformations, in a staged fashion:
 ///   1. the first stage consists of a list of FrozenRewritePatternSet. Each
 ///   FrozenRewritePatternSet in this list is applied once, in order.
-///   2. the second stage consists of a single OwningRewritePattern that is
-///   applied greedily until convergence.
+///   2. the second stage consists of a single RewritePattern that is applied
+///      greedily until convergence.
 ///   3. the third stage consists of applying a lambda, generally used for
 ///   non-local transformation effects. This allows creating custom fused
 ///   transformations where patterns can be ordered and applied at a finer
diff --git a/mlir/include/mlir/Dialect/MemRef/Transforms/ComposeSubView.h b/mlir/include/mlir/Dialect/MemRef/Transforms/ComposeSubView.h
index 20aa1c02db178..6a487d902cce9 100644
--- a/mlir/include/mlir/Dialect/MemRef/Transforms/ComposeSubView.h
+++ b/mlir/include/mlir/Dialect/MemRef/Transforms/ComposeSubView.h
@@ -16,11 +16,10 @@
 namespace mlir {
 class MLIRContext;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 namespace memref {
 
-void populateComposeSubViewPatterns(OwningRewritePatternList &patterns,
+void populateComposeSubViewPatterns(RewritePatternSet &patterns,
                                     MLIRContext *context);
 
 } // namespace memref
diff --git a/mlir/include/mlir/Dialect/SCF/Transforms.h b/mlir/include/mlir/Dialect/SCF/Transforms.h
index 8286d70c6f9a8..5ad7f7a1b7823 100644
--- a/mlir/include/mlir/Dialect/SCF/Transforms.h
+++ b/mlir/include/mlir/Dialect/SCF/Transforms.h
@@ -27,7 +27,6 @@ class Region;
 class RewriterBase;
 class TypeConverter;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 class Operation;
 class Value;
 class ValueRange;
diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/FuncConversions.h b/mlir/include/mlir/Dialect/StandardOps/Transforms/FuncConversions.h
index 6187d59e547ef..8457805217bef 100644
--- a/mlir/include/mlir/Dialect/StandardOps/Transforms/FuncConversions.h
+++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/FuncConversions.h
@@ -25,7 +25,6 @@ class MLIRContext;
 class Operation;
 class TypeConverter;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 /// Add a pattern to the given pattern list to convert the operand and result
 /// types of a CallOp with the given type converter.
diff --git a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
index 3f723133b2a6e..dea605f1ae1a7 100644
--- a/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h
@@ -23,7 +23,6 @@ class GlobalCreator;
 } // namespace bufferization
 
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 void populateStdBufferizePatterns(
     bufferization::BufferizeTypeConverter &typeConverter,
diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h
index b62399ce21aec..f90d02bda22d3 100644
--- a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h
@@ -17,7 +17,6 @@ class BufferizeTypeConverter;
 } // namespace bufferization
 
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 void populateTensorBufferizePatterns(
     bufferization::BufferizeTypeConverter &typeConverter,
diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.h b/mlir/include/mlir/Dialect/Vector/VectorOps.h
index 816ec204acfe7..4bb2ca4fed0f8 100644
--- a/mlir/include/mlir/Dialect/Vector/VectorOps.h
+++ b/mlir/include/mlir/Dialect/Vector/VectorOps.h
@@ -32,7 +32,6 @@
 namespace mlir {
 class MLIRContext;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 namespace vector {
 class VectorDialect;
diff --git a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h
index d43163e573bcc..0f987aea24c5e 100644
--- a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h
+++ b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h
@@ -18,8 +18,6 @@ class VectorTransferOpInterface;
 class RewritePatternSet;
 class RewriterBase;
 
-using OwningRewritePatternList = RewritePatternSet;
-
 namespace scf {
 class IfOp;
 } // namespace scf
diff --git a/mlir/include/mlir/Dialect/X86Vector/Transforms.h b/mlir/include/mlir/Dialect/X86Vector/Transforms.h
index f6b7f29dc7763..fe98ebd955aa3 100644
--- a/mlir/include/mlir/Dialect/X86Vector/Transforms.h
+++ b/mlir/include/mlir/Dialect/X86Vector/Transforms.h
@@ -17,7 +17,6 @@ class ImplicitLocOpBuilder;
 class LLVMConversionTarget;
 class LLVMTypeConverter;
 class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
 
 namespace x86vector {
 
diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h
index 4e68a67ad1b74..7a055bf055c7d 100644
--- a/mlir/include/mlir/IR/OperationSupport.h
+++ b/mlir/include/mlir/IR/OperationSupport.h
@@ -49,15 +49,13 @@ class Pattern;
 class Region;
 class ResultRange;
 class RewritePattern;
+class RewritePatternSet;
 class Type;
 class Value;
 class ValueRange;
 template <typename ValueRangeT>
 class ValueTypeRange;
 
-class RewritePatternSet;
-using OwningRewritePatternList = RewritePatternSet;
-
 //===----------------------------------------------------------------------===//
 // OperationName
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
index ab498da6e8ccb..212453e8b3580 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp
@@ -848,7 +848,7 @@ mlir::createGpuToLLVMConversionPass() {
 }
 
 void mlir::populateGpuToLLVMConversionPatterns(
-    LLVMTypeConverter &converter, OwningRewritePatternList &patterns,
+    LLVMTypeConverter &converter, RewritePatternSet &patterns,
     StringRef gpuBinaryAnnotation) {
   converter.addConversion(
       [context = &converter.getContext()](gpu::AsyncTokenType type) -> Type {
diff --git a/mlir/lib/Conversion/OpenACCToLLVM/OpenACCToLLVM.cpp b/mlir/lib/Conversion/OpenACCToLLVM/OpenACCToLLVM.cpp
index fdff881a57211..43a49a59fca31 100644
--- a/mlir/lib/Conversion/OpenACCToLLVM/OpenACCToLLVM.cpp
+++ b/mlir/lib/Conversion/OpenACCToLLVM/OpenACCToLLVM.cpp
@@ -138,7 +138,7 @@ class LegalizeDataOpForLLVMTranslation : public ConvertOpToLLVMPattern<Op> {
 } // namespace
 
 void mlir::populateOpenACCToLLVMConversionPatterns(
-    LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
+    LLVMTypeConverter &converter, RewritePatternSet &patterns) {
   patterns.add<LegalizeDataOpForLLVMTranslation<acc::DataOp>>(converter);
   patterns.add<LegalizeDataOpForLLVMTranslation<acc::EnterDataOp>>(converter);
   patterns.add<LegalizeDataOpForLLVMTranslation<acc::ExitDataOp>>(converter);
diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp
index 9beecb6f66d41..d2473ebbff407 100644
--- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp
+++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp
@@ -209,7 +209,7 @@ OpFoldResult arith::AddIOp::fold(ArrayRef<Attribute> operands) {
 }
 
 void arith::AddIOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *context) {
+    RewritePatternSet &patterns, MLIRContext *context) {
   patterns.insert<AddIAddConstant, AddISubConstantRHS, AddISubConstantLHS>(
       context);
 }
@@ -231,7 +231,7 @@ OpFoldResult arith::SubIOp::fold(ArrayRef<Attribute> operands) {
 }
 
 void arith::SubIOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *context) {
+    RewritePatternSet &patterns, MLIRContext *context) {
   patterns.insert<SubIRHSAddConstant, SubILHSAddConstant, SubIRHSSubConstantRHS,
                   SubIRHSSubConstantLHS, SubILHSSubConstantRHS,
                   SubILHSSubConstantLHS>(context);
@@ -567,7 +567,7 @@ OpFoldResult arith::XOrIOp::fold(ArrayRef<Attribute> operands) {
 }
 
 void arith::XOrIOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *context) {
+    RewritePatternSet &patterns, MLIRContext *context) {
   patterns.insert<XOrINotCmpI>(context);
 }
 
@@ -846,7 +846,7 @@ bool arith::ExtSIOp::areCastCompatible(TypeRange inputs, TypeRange outputs) {
 }
 
 void arith::ExtSIOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *context) {
+    RewritePatternSet &patterns, MLIRContext *context) {
   patterns.insert<ExtSIOfExtUI>(context);
 }
 
@@ -926,7 +926,7 @@ bool arith::TruncFOp::areCastCompatible(TypeRange inputs, TypeRange outputs) {
 //===----------------------------------------------------------------------===//
 
 void arith::AndIOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *context) {
+    RewritePatternSet &patterns, MLIRContext *context) {
   patterns.insert<AndOfExtUI, AndOfExtSI>(context);
 }
 
@@ -935,7 +935,7 @@ void arith::AndIOp::getCanonicalizationPatterns(
 //===----------------------------------------------------------------------===//
 
 void arith::OrIOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *context) {
+    RewritePatternSet &patterns, MLIRContext *context) {
   patterns.insert<OrOfExtUI, OrOfExtSI>(context);
 }
 
@@ -1074,7 +1074,7 @@ OpFoldResult arith::IndexCastOp::fold(ArrayRef<Attribute> operands) {
 }
 
 void arith::IndexCastOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *context) {
+    RewritePatternSet &patterns, MLIRContext *context) {
   patterns.insert<IndexCastOfIndexCast, IndexCastOfExtSI>(context);
 }
 
@@ -1123,7 +1123,7 @@ OpFoldResult arith::BitcastOp::fold(ArrayRef<Attribute> operands) {
 }
 
 void arith::BitcastOp::getCanonicalizationPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *context) {
+    RewritePatternSet &patterns, MLIRContext *context) {
   patterns.insert<BitcastOfBitcast>(context);
 }
 
diff --git a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp
index 95bc3e6b29599..0145ec37422d3 100644
--- a/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp
+++ b/mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp
@@ -67,7 +67,7 @@ using ScalableMaskedDivFOpLowering =
 
 /// Populate the given list with patterns that convert from ArmSVE to LLVM.
 void mlir::populateArmSVELegalizeForLLVMExportPatterns(
-    LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
+    LLVMTypeConverter &converter, RewritePatternSet &patterns) {
   // Populate conversion patterns
 
   // clang-format off
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index 93770c9da5aa6..f1ec7bbdead24 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -102,7 +102,7 @@ struct SimplifyClones : public OpRewritePattern<CloneOp> {
 
 } // namespace
 
-void CloneOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+void CloneOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                           MLIRContext *context) {
   results.insert<SimplifyClones>(context);
 }
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index 07bd8fdbb0bb4..eb7f0dedca02a 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -205,7 +205,7 @@ checkBufferizationResult(Operation *op, const BufferizationOptions &options) {
 LogicalResult bufferization::bufferizeOp(Operation *op,
                                          const BufferizationState &state) {
   // Bufferize the op and its nested ops.
-  OwningRewritePatternList patterns(op->getContext());
+  RewritePatternSet patterns(op->getContext());
   patterns.add<BufferizationPattern>(op->getContext(), state);
   if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns))))
     return failure();
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index d7c5c622b1a6d..facf303fed150 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -1753,7 +1753,7 @@ struct TiledLoopResultsFolder : public OpRewritePattern<linalg::TiledLoopOp> {
 };
 } // namespace
 
-void TiledLoopOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+void TiledLoopOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                               MLIRContext *context) {
   results.insert<TiledLoopInputsFolder, TiledLoopResultsFolder,
                  DimOfTiledLoopInsOutsFolder<tensor::DimOp>,
diff --git a/mlir/lib/Dialect/MemRef/Transforms/ComposeSubView.cpp b/mlir/lib/Dialect/MemRef/Transforms/ComposeSubView.cpp
index 29ba5060d167d..8de484c3b86e7 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/ComposeSubView.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/ComposeSubView.cpp
@@ -128,6 +128,6 @@ struct ComposeSubViewOpPattern : public OpRewritePattern<memref::SubViewOp> {
 } // namespace
 
 void mlir::memref::populateComposeSubViewPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *context) {
+    RewritePatternSet &patterns, MLIRContext *context) {
   patterns.insert<ComposeSubViewOpPattern>(context);
 }
diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp
index edae94cc9e748..382e3c662009c 100644
--- a/mlir/lib/Dialect/SCF/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/SCF.cpp
@@ -2537,7 +2537,7 @@ struct WhileUnusedArg : public OpRewritePattern<WhileOp> {
 };
 } // namespace
 
-void WhileOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+void WhileOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                           MLIRContext *context) {
   results.insert<WhileConditionTruth, WhileUnusedResult, WhileCmpCond,
                  WhileUnusedArg>(context);
diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
index ffe155ad214e5..8d96f31b07847 100644
--- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
+++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
@@ -764,7 +764,7 @@ struct SelectToExtUI : public OpRewritePattern<SelectOp> {
   }
 };
 
-void SelectOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+void SelectOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                            MLIRContext *context) {
   results.insert<SelectI1Simplify, SelectToExtUI>(context);
 }
diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
index af8fa306d7651..f437eb455ca86 100644
--- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
+++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp
@@ -129,7 +129,7 @@ struct ConcatOptimization : public OpRewritePattern<tosa::ConcatOp> {
   }
 };
 
-void ConcatOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+void ConcatOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                            MLIRContext *context) {
   results.insert<ConcatOptimization>(context);
 }
@@ -187,7 +187,7 @@ struct ReshapeConstOptimization : public OpRewritePattern<tosa::ReshapeOp> {
   }
 };
 
-void ReshapeOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+void ReshapeOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                             MLIRContext *context) {
   results.insert<ReshapeReshapeOptimization>(context);
   results.insert<ReshapeConstOptimization>(context);
@@ -284,7 +284,7 @@ struct NoOpOptimization : public OpRewritePattern<tosa::TransposeOp> {
   }
 };
 
-void TransposeOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+void TransposeOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                               MLIRContext *context) {
   results.insert<ConstantTransposeOptimization>(context);
   results.insert<NoOpOptimization>(context);
@@ -322,7 +322,7 @@ struct AddZeroOptimization : public OpRewritePattern<tosa::AddOp> {
   }
 };
 
-void AddOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+void AddOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                         MLIRContext *context) {
   results.insert<AddZeroOptimization>(context);
 }
@@ -371,7 +371,7 @@ struct MulOneOptimization : public OpRewritePattern<tosa::MulOp> {
   }
 };
 
-void MulOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+void MulOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                         MLIRContext *context) {
   results.insert<MulOneOptimization>(context);
 }
@@ -418,7 +418,7 @@ struct MaterializePadValue : public OpRewritePattern<tosa::PadOp> {
   }
 };
 
-void PadOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+void PadOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                         MLIRContext *context) {
   results.insert<MaterializePadValue>(context);
 }
@@ -453,7 +453,7 @@ struct MaxPool2dIsNoOp : public OpRewritePattern<tosa::MaxPool2dOp> {
   }
 };
 
-void MaxPool2dOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+void MaxPool2dOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                               MLIRContext *context) {
   results.insert<MaxPool2dIsNoOp>(context);
 }
@@ -556,7 +556,7 @@ struct ClampClampOptimization : public OpRewritePattern<tosa::ClampOp> {
   }
 };
 
-void ClampOp::getCanonicalizationPatterns(OwningRewritePatternList &results,
+void ClampOp::getCanonicalizationPatterns(RewritePatternSet &results,
                                           MLIRContext *context) {
   results.insert<ClampIsNoOp>(context);
   results.insert<ClampClampOptimization>(context);
diff --git a/mlir/test/lib/Dialect/Linalg/TestLinalgDistribution.cpp b/mlir/test/lib/Dialect/Linalg/TestLinalgDistribution.cpp
index cabb23d479af3..342fed37ad600 100644
--- a/mlir/test/lib/Dialect/Linalg/TestLinalgDistribution.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestLinalgDistribution.cpp
@@ -53,7 +53,7 @@ struct TestLinalgDistribution
 
 void TestLinalgDistribution::runOnOperation() {
   auto funcOp = getOperation();
-  OwningRewritePatternList distributeTiledLoopsPatterns(&getContext());
+  RewritePatternSet distributeTiledLoopsPatterns(&getContext());
   populateLinalgDistributeTiledLoopPattern(
       distributeTiledLoopsPatterns, getDistributionOptions(),
       LinalgTransformationFilter(
diff --git a/mlir/test/lib/Dialect/MemRef/TestComposeSubView.cpp b/mlir/test/lib/Dialect/MemRef/TestComposeSubView.cpp
index 20add4cc94c8b..6431cc4e459ef 100644
--- a/mlir/test/lib/Dialect/MemRef/TestComposeSubView.cpp
+++ b/mlir/test/lib/Dialect/MemRef/TestComposeSubView.cpp
@@ -34,7 +34,7 @@ void TestComposeSubViewPass::getDependentDialects(
 }
 
 void TestComposeSubViewPass::runOnOperation() {
-  OwningRewritePatternList patterns(&getContext());
+  RewritePatternSet patterns(&getContext());
   memref::populateComposeSubViewPatterns(patterns, &getContext());
   (void)applyPatternsAndFoldGreedily(getOperation(), std::move(patterns));
 }

From 586759cee563f32212f17c11678195bb7cc73700 Mon Sep 17 00:00:00 2001
From: Tanya Lattner <tanyalattner@llvm.org>
Date: Wed, 26 Jan 2022 23:22:04 -0800
Subject: [PATCH 849/946] Add email addresses to create a topic via email in a
 specific category.

---
 llvm/docs/DiscourseMigrationGuide.md | 72 ++++++++++++++++++++++++++--
 1 file changed, 67 insertions(+), 5 deletions(-)

diff --git a/llvm/docs/DiscourseMigrationGuide.md b/llvm/docs/DiscourseMigrationGuide.md
index 7ae9f6953d43b..b78ce27c5198b 100644
--- a/llvm/docs/DiscourseMigrationGuide.md
+++ b/llvm/docs/DiscourseMigrationGuide.md
@@ -34,13 +34,75 @@ are the typical use cases:
 * You can reply to a post, including quoting other peoples texts
   ([tested](https://llvm.discourse.group/t/email-interaction-with-discourse/3306/4) on GMail).
 * [Quoting previous topics in an reply](https://meta.discourse.org/t/single-quote-block-dropped-in-email-reply/144802)
-* **TODO:** Creating new topics via email is
-  [supported](https://meta.discourse.org/t/start-a-new-topic-via-email/62977)
-  but not configured at the moment. We would need to set up an email address
-  per category and give Discourse POP3 access to that email account. This sounds
-  like a solvable issue.
 * You can filter incoming emails in your email client by category using the
   `List-ID` email header field.
+* You can create topics through email using the email address that is specific to the category. Each category description shows the email address to use, or you can use the mapping below.
+
+## Mapping of email addresses to Discourse categories
+
+Use these email addresses to create a topic by email in the specific discourse category. You **must** have a Discourse account associated with the email address you are sending from or the email will be rejected.
+
+<table border=1>
+<tr><th>Discourse Category</th><th>Email Address</th></tr>
+<tr><td>Beginner</td><td>beginners@discourse.llvm.org</td></tr>
+<tr><td>LLVM Project</td><td>llvmproject@discourse.llvm.org</td></tr>
+<tr><td>IR & Optimizations</td><td>IR.Optimizations@discourse.llvm.org</td></tr>
+<tr><td>IR & Optimizations - Loop Optimizations</td><td>IR.Optimizations-Loops@discourse.llvm.org</td></tr>
+<tr><td>Code Generation</td><td>codegen@discourse.llvm.org</td></tr>
+<tr><td>Code Generation - AMDGPU</td><td>codegen-amdgpu@discourse.llvm.org</td></tr>
+<tr><td>Code Generation - Common Infrastructure</td><td>codegen-common@discourse.llvm.org</td></tr>
+<tr><td>Code Generation - AArch64</td><td>codegen-aarch64@discourse.llvm.org</td></tr>
+<tr><td>Code Generation - Arm</td><td>codegen-arm@discourse.llvm.org</td></tr>
+<tr><td>Code Generation - PowerPC</td><td>codegen-powerpc@discourse.llvm.org</td></tr>
+<tr><td>Code Generation - RISCV</td><td>codegen-riscv@discourse.llvm.org</td></tr>
+<tr><td>Code Generation - WebAssembly</td><td>codegen-webassembly@discourse.llvm.org</td></tr>
+<tr><td>Code Generation - X86</td><td>codegen-x86@discourse.llvm.org</td></tr>
+<tr><td>Clang Frontend</td><td>clang@discourse.llvm.org</td></tr>
+<tr><td>Clang Frontend - Using Clang</td><td>clang-users@discourse.llvm.org</td></tr>
+<tr><td>Clang Frontend - clangd</td><td>clangd@discourse.llvm.org</td></tr>
+<tr><td>Clang Frontend - Building Clang</td><td>clang-build@discourse.llvm.org</td></tr>
+<tr><td>Clang Frontend - Static Analyzer</td><td>clang-staticanalyzer@discourse.llvm.org</td></tr>
+<tr><td>Runtimes</td><td>runtimes@discourse.llvm.org</td></tr>
+<tr><td>Runtimes - C++</td><td>runtimes-cxx@discourse.llvm.org</td></tr>
+<tr><td>Runtimes - Sanitizers</td><td>runtimes-sanitizers@discourse.llvm.org</td></tr>
+<tr><td>Runtimes - C</td><td>runtimes-c@discourse.llvm.org</td></tr>
+<tr><td>Runtimes - OpenMP</td><td>runtimes-openmp@discourse.llvm.org</td></tr>
+<tr><td>Runtimes - OpenCL</td><td>runtimes-opencl@discourse.llvm.org</td></tr>
+<tr><td>MLIR</td><td>mlir@discourse.llvm.org</td></tr>
+<tr><td>MLIR - Announce</td><td>mlir-announce@discourse.llvm.org</td></tr>
+<tr><td>MLIR - Newsletter</td><td>mlir-news@discourse.llvm.org</td></tr>
+<tr><td>MLIR - TCP-WG</td><td>mlir-tcpwg@discourse.llvm.org</td></tr>
+<tr><td>Subprojects</td><td>subprojects@discourse.llvm.org</td></tr>
+<tr><td>Subprojects - Polly</td><td>polly@discourse.llvm.org</td></tr>
+<tr><td>Subprojects - LLDB</td><td>lldb@discourse.llvm.org</td></tr>
+<tr><td>Subprojects - LLD</td><td>lld@discourse.llvm.org</td></tr>
+<tr><td>Subprojects - Flang</td><td> flang@discourse.llvm.org</td></tr>
+<tr><td>Subprojects - Bolt</td><td>bolt@discourse.llvm.org</td></tr>
+<tr><td>Project Infrastructure</td><td>infra@discourse.llvm.org</td></tr>
+<tr><td>Project Infrastructure - Release Testers</td><td>infra-release-testers@discourse.llvm.org</td></tr>
+<tr><td>Project Infrastructure - Website</td><td>infra-website@discourse.llvm.org</td></tr>
+<tr><td>Project Infrastructure - Documentation</td><td> infra-docs@discourse.llvm.org</td></tr>
+<tr><td>Project Infrastructure - GitHub</td><td>infra-github@discourse.llvm.org</td></tr>
+<tr><td>Project Infrastructure - Code Review</td><td>infra-codereview@discourse.llvm.org</td></tr>
+<tr><td>Project Infrastructure - Discord</td><td>infra-discord@discourse.llvm.org</td></tr>
+<tr><td>Project Infrastructure - Mailing Lists and Forums</td><td>infra-mailinglists@discourse.llvm.org</td></tr>
+<tr><td>Project Infrastructure - IRC</td><td> infra-irc@discourse.llvm.org</td></tr>
+<tr><td>Project Infrastructure - Infrastructure Working Group</td><td>infra-iwg@discourse.llvm.org</td></tr>
+<tr><td>Community</td><td>community@discourse.llvm.org</td></tr>
+<tr><td>Community - Women in Compilers and Tools</td><td>wict@discourse.llvm.org</td></tr>
+<tr><td>Community - Job Postings</td><td>community-jobs@discourse.llvm.org</td></tr>
+<tr><td>Community - US LLVM Developers' Meeting</td><td>devmtg-US@discourse.llvm.org</td></tr>
+<tr><td>Community - EuroLLVM</td><td>devmtg-euro@discourse.llvm.org</td></tr>
+<tr><td>Community - GSOC</td><td>gsoc@discourse.llvm.org</td></tr>
+<tr><td>Community - Community.o</td><td>community-dot-o@discourse.llvm.org</td></tr>
+<tr><td>Community - LLVM Foundation</td><td>foundation@discourse.llvm.org</td></tr>
+<tr><td>Community - Newsletters</td><td>newsletters@discourse.llvm.org</td></tr>
+<tr><td>Incubator</td><td>incubator@discourse.llvm.org</td></tr>
+<tr><td>Incubator - CIRCT</td><td>circt@discourse.llvm.org</td></tr>
+<tr><td>Incubator - Torch-MLIR</td><td>torch-mlir@discourse.llvm.org</td></tr>
+<tr><td>Incubator - Enzyme</td><td>enzyme@discourse.llvm.org</td></tr>
+<tr><td>Feedback</td><td>feedback@discourse.llvm.org</td></tr>
+</table>
 
 ## Mapping of mailing lists to categories
 

From 970f94d05193024bb6082a77dc10deaaf78afc0b Mon Sep 17 00:00:00 2001
From: Uday Bondhugula <uday@polymagelabs.com>
Date: Thu, 27 Jan 2022 13:00:19 +0530
Subject: [PATCH 850/946] [MLIR] Fix integration tests broken by D118285

[MLIR] Fix integration tests broken by D118285.
---
 .../Async/CPU/microbench-linalg-async-parallel-for.mlir     | 2 +-
 .../Async/CPU/microbench-scf-async-parallel-for.mlir        | 4 ++--
 .../Dialect/Async/CPU/test-async-parallel-for-1d.mlir       | 6 +++---
 .../Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir       | 2 +-
 mlir/test/python/integration/dialects/linalg/opsrun.py      | 2 +-
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
index 7c50fbddcd227..b6ae33b0e2190 100644
--- a/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-linalg-async-parallel-for.mlir
@@ -7,7 +7,7 @@
 // RUN:               -convert-async-to-llvm                                   \
 // RUN:               -convert-scf-to-std                                      \
 // RUN:               -arith-expand                                            \
-// RUN:               -std-expand                                              \
+// RUN:               -memref-expand                                              \
 // RUN:               -convert-vector-to-llvm                                  \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
diff --git a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
index 3537616d839de..4040e9db07efd 100644
--- a/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/microbench-scf-async-parallel-for.mlir
@@ -7,7 +7,7 @@
 // RUN:               -convert-linalg-to-loops                                 \
 // RUN:               -convert-scf-to-std                                      \
 // RUN:               -arith-expand                                            \
-// RUN:               -std-expand                                              \
+// RUN:               -memref-expand                                              \
 // RUN:               -convert-vector-to-llvm                                  \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
@@ -28,7 +28,7 @@
 // RUN:               -convert-linalg-to-loops                                 \
 // RUN:               -convert-scf-to-std                                      \
 // RUN:               -arith-expand                                            \
-// RUN:               -std-expand                                              \
+// RUN:               -memref-expand                                              \
 // RUN:               -convert-vector-to-llvm                                  \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -convert-std-to-llvm                                     \
diff --git a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
index 49758266dafd7..4394aac514864 100644
--- a/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
+++ b/mlir/test/Integration/Dialect/Async/CPU/test-async-parallel-for-1d.mlir
@@ -6,7 +6,7 @@
 // RUN:               -convert-scf-to-std                                      \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -arith-expand                                            \
-// RUN:               -std-expand                                              \
+// RUN:               -memref-expand                                              \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN:               -reconcile-unrealized-casts                              \
 // RUN: | mlir-cpu-runner                                                      \
@@ -22,7 +22,7 @@
 // RUN:               -convert-scf-to-std                                      \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -arith-expand                                            \
-// RUN:               -std-expand                                              \
+// RUN:               -memref-expand                                              \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN:               -reconcile-unrealized-casts                              \
 // RUN: | mlir-cpu-runner                                                      \
@@ -41,7 +41,7 @@
 // RUN:               -convert-scf-to-std                                      \
 // RUN:               -convert-memref-to-llvm                                  \
 // RUN:               -arith-expand                                            \
-// RUN:               -std-expand                                              \
+// RUN:               -memref-expand                                              \
 // RUN:               -convert-std-to-llvm                                     \
 // RUN:               -reconcile-unrealized-casts                              \
 // RUN: | mlir-cpu-runner                                                      \
diff --git a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
index d9c36c807d032..6c32db1f6fcae 100644
--- a/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
+++ b/mlir/test/Integration/Dialect/Standard/CPU/test-ceil-floor-pos-neg.mlir
@@ -1,5 +1,5 @@
 // RUN: mlir-opt %s -convert-vector-to-scf -lower-affine -convert-scf-to-std \
-// RUN:   -std-expand -arith-expand -convert-vector-to-llvm \
+// RUN:   -memref-expand -arith-expand -convert-vector-to-llvm \
 // RUN:   -convert-memref-to-llvm -convert-std-to-llvm \
 // RUN:   -reconcile-unrealized-casts | \
 // RUN: mlir-cpu-runner -e entry -entry-point-result=void  \
diff --git a/mlir/test/python/integration/dialects/linalg/opsrun.py b/mlir/test/python/integration/dialects/linalg/opsrun.py
index d8b9d978c3aeb..e0f00168f1766 100644
--- a/mlir/test/python/integration/dialects/linalg/opsrun.py
+++ b/mlir/test/python/integration/dialects/linalg/opsrun.py
@@ -128,7 +128,7 @@ def transform(module, boilerplate):
       boilerplate)
   pm = PassManager.parse(
       "builtin.func(convert-linalg-to-loops, lower-affine, " +
-      "convert-scf-to-std, arith-expand, std-expand), convert-vector-to-llvm," +
+      "convert-scf-to-std, arith-expand, memref-expand), convert-vector-to-llvm," +
       "convert-memref-to-llvm, convert-std-to-llvm," +
       "reconcile-unrealized-casts")
   pm.run(mod)

From fa5c5230d9143b0895bd04a04ee734ee23c9676a Mon Sep 17 00:00:00 2001
From: Uday Bondhugula <uday@polymagelabs.com>
Date: Thu, 27 Jan 2022 08:06:26 +0530
Subject: [PATCH 851/946] [MLIR] NFC. Rename pass cmd-line to prefix affine

Prefix "affine-" to affine transform passes that were missing it -- to
avoid ambiguity and for uniformity. There were only two needed this.

Move mispaced affine coalescing test case file.

NFC.

Differential Revision: https://reviews.llvm.org/D118314
---
 mlir/include/mlir/Dialect/Affine/Passes.td                    | 4 ++--
 mlir/test/{Transforms => Dialect/Affine}/loop-coalescing.mlir | 2 +-
 ...mplify-affine-structures.mlir => simplify-structures.mlir} | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)
 rename mlir/test/{Transforms => Dialect/Affine}/loop-coalescing.mlir (99%)
 rename mlir/test/Dialect/Affine/{simplify-affine-structures.mlir => simplify-structures.mlir} (99%)

diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index d67bd33ef9d62..50783adb8b4de 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -385,14 +385,14 @@ def AffineLoopNormalize : Pass<"affine-loop-normalize", "FuncOp"> {
   let constructor = "mlir::createAffineLoopNormalizePass()";
 }
 
-def LoopCoalescing : Pass<"loop-coalescing", "FuncOp"> {
+def LoopCoalescing : Pass<"affine-loop-coalescing", "FuncOp"> {
   let summary = "Coalesce nested loops with independent bounds into a single "
                 "loop";
   let constructor = "mlir::createLoopCoalescingPass()";
   let dependentDialects = ["arith::ArithmeticDialect"];
 }
 
-def SimplifyAffineStructures : Pass<"simplify-affine-structures", "FuncOp"> {
+def SimplifyAffineStructures : Pass<"affine-simplify-structures", "FuncOp"> {
   let summary = "Simplify affine expressions in maps/sets and normalize "
                 "memrefs";
   let constructor = "mlir::createSimplifyAffineStructuresPass()";
diff --git a/mlir/test/Transforms/loop-coalescing.mlir b/mlir/test/Dialect/Affine/loop-coalescing.mlir
similarity index 99%
rename from mlir/test/Transforms/loop-coalescing.mlir
rename to mlir/test/Dialect/Affine/loop-coalescing.mlir
index c50212f376134..93423fff670e4 100644
--- a/mlir/test/Transforms/loop-coalescing.mlir
+++ b/mlir/test/Dialect/Affine/loop-coalescing.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -split-input-file -allow-unregistered-dialect -loop-coalescing %s | FileCheck %s
+// RUN: mlir-opt -split-input-file -allow-unregistered-dialect -affine-loop-coalescing %s | FileCheck %s
 
 // CHECK-LABEL: @one_3d_nest
 func @one_3d_nest() {
diff --git a/mlir/test/Dialect/Affine/simplify-affine-structures.mlir b/mlir/test/Dialect/Affine/simplify-structures.mlir
similarity index 99%
rename from mlir/test/Dialect/Affine/simplify-affine-structures.mlir
rename to mlir/test/Dialect/Affine/simplify-structures.mlir
index 39abea5a76bae..826f98f0c043c 100644
--- a/mlir/test/Dialect/Affine/simplify-affine-structures.mlir
+++ b/mlir/test/Dialect/Affine/simplify-structures.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -simplify-affine-structures | FileCheck %s
+// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-simplify-structures | FileCheck %s
 
 // CHECK-DAG: #[[$SET_2D:.*]] = affine_set<(d0, d1) : (d0 - 100 == 0, d1 - 10 == 0, -d0 + 100 >= 0, d1 >= 0)>
 // CHECK-DAG: #[[$SET_7_11:.*]] = affine_set<(d0, d1) : (d0 * 7 + d1 * 5 + 88 == 0, d0 * 5 - d1 * 11 + 60 == 0, d0 * 11 + d1 * 7 - 24 == 0, d0 * 7 + d1 * 5 + 88 == 0)>

From 3bc152769d3e8e48b25103637e49d24f019f8bd0 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Wed, 26 Jan 2022 23:45:03 -0800
Subject: [PATCH 852/946] [ELF] Parallelize computeIsPreemptible

---
 lld/ELF/Writer.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
index 848db1f04b422..69fcad390d614 100644
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1890,9 +1890,11 @@ template <class ELFT> void Writer<ELFT>::finalizeSections() {
       finalizeSynthetic(part.ehFrame.get());
   }
 
-  if (config->hasDynSymTab)
-    for (Symbol *sym : symtab->symbols())
+  if (config->hasDynSymTab) {
+    parallelForEach(symtab->symbols(), [](Symbol *sym) {
       sym->isPreemptible = computeIsPreemptible(*sym);
+    });
+  }
 
   // Change values of linker-script-defined symbols from placeholders (assigned
   // by declareSymbols) to actual definitions.

From 615d71d9a3400fe66fa066c7ef63a0a467171810 Mon Sep 17 00:00:00 2001
From: Wu Xinlong <821408745@qq.com>
Date: Thu, 27 Jan 2022 11:59:13 +0800
Subject: [PATCH 853/946] [RISCV][CodeGen] Implement IR Intrinsic support for K
 extension
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This revision implements IR Intrinsic support for RISCV Scalar Crypto extension according to the specification of version [[ https://github.com/riscv/riscv-crypto/releases/tag/v1.0.0-scalar | 1.0]]
Co-author：@ksyx & @VincentWu & @lihongliang & @achieveartificialintelligence

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D102310
---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       | 102 ++-
 llvm/lib/Target/RISCV/RISCV.td                |   6 +
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  10 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoZb.td     |  64 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoZk.td     |  84 ++-
 llvm/test/CodeGen/RISCV/rv32zbb-zbp-zbkb.ll   | 502 +++++++++++++
 llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll        | 675 ------------------
 llvm/test/CodeGen/RISCV/rv32zbc-intrinsic.ll  |  22 -
 .../CodeGen/RISCV/rv32zbc-zbkc-intrinsic.ll   |  27 +
 llvm/test/CodeGen/RISCV/rv32zbkb-intrinsic.ll |  47 ++
 llvm/test/CodeGen/RISCV/rv32zbkx-intrinsic.ll |  24 +
 llvm/test/CodeGen/RISCV/rv32zbp-zbkb.ll       | 149 ++++
 llvm/test/CodeGen/RISCV/rv32zbp.ll            | 147 ----
 llvm/test/CodeGen/RISCV/rv32zknd-intrinsic.ll |  25 +
 llvm/test/CodeGen/RISCV/rv32zkne-intrinsic.ll |  25 +
 llvm/test/CodeGen/RISCV/rv32zknh-intrinsic.ll | 114 +++
 .../test/CodeGen/RISCV/rv32zksed-intrinsic.ll |  25 +
 llvm/test/CodeGen/RISCV/rv32zksh-intrinsic.ll |  25 +
 .../{rv64zbb-zbp.ll => rv64zbb-zbp-zbkb.ll}   | 461 ++++--------
 llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll  |  22 -
 .../CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll   |  28 +
 llvm/test/CodeGen/RISCV/rv64zbkb-intrinsic.ll |  25 +
 llvm/test/CodeGen/RISCV/rv64zbkx-intrinsic.ll |  24 +
 llvm/test/CodeGen/RISCV/rv64zbp-zbkb.ll       | 125 ++++
 llvm/test/CodeGen/RISCV/rv64zbp.ll            | 118 ---
 llvm/test/CodeGen/RISCV/rv64zknd-intrinsic.ll |  36 +
 .../CodeGen/RISCV/rv64zknd-zkne-intrinsic.ll  |  28 +
 llvm/test/CodeGen/RISCV/rv64zkne-intrinsic.ll |  25 +
 llvm/test/CodeGen/RISCV/rv64zknh-intrinsic.ll |  92 +++
 .../test/CodeGen/RISCV/rv64zksed-intrinsic.ll |  25 +
 llvm/test/CodeGen/RISCV/rv64zksh-intrinsic.ll |  25 +
 31 files changed, 1781 insertions(+), 1326 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rv32zbb-zbp-zbkb.ll
 delete mode 100644 llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv32zbc-zbkc-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv32zbkb-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv32zbkx-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv32zbp-zbkb.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv32zknd-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv32zkne-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv32zknh-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv32zksed-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv32zksh-intrinsic.ll
 rename llvm/test/CodeGen/RISCV/{rv64zbb-zbp.ll => rv64zbb-zbp-zbkb.ll} (56%)
 create mode 100644 llvm/test/CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64zbkb-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64zbkx-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64zbp-zbkb.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64zknd-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64zknd-zkne-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64zkne-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64zknh-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64zksed-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64zksh-intrinsic.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index fc697f5c71e85..6780436bd701a 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -88,9 +88,11 @@ let TargetPrefix = "riscv" in {
   // Zbb
   def int_riscv_orc_b : BitManipGPRIntrinsics;
 
-  // Zbc
+  // Zbc or Zbkc
   def int_riscv_clmul  : BitManipGPRGPRIntrinsics;
   def int_riscv_clmulh : BitManipGPRGPRIntrinsics;
+
+  // Zbc
   def int_riscv_clmulr : BitManipGPRGPRIntrinsics;
 
   // Zbe
@@ -123,6 +125,15 @@ let TargetPrefix = "riscv" in {
   // Zbt
   def int_riscv_fsl : BitManipGPRGPRGRIntrinsics;
   def int_riscv_fsr : BitManipGPRGPRGRIntrinsics;
+
+  // Zbkb
+  def int_riscv_brev8 : BitManipGPRIntrinsics;
+  def int_riscv_zip   : BitManipGPRIntrinsics;
+  def int_riscv_unzip : BitManipGPRIntrinsics;
+
+  // Zbkx
+  def int_riscv_xperm4  : BitManipGPRGPRIntrinsics;
+  def int_riscv_xperm8  : BitManipGPRGPRIntrinsics;
 } // TargetPrefix = "riscv"
 
 //===----------------------------------------------------------------------===//
@@ -1453,3 +1464,92 @@ let TargetPrefix = "riscv" in {
                      llvm_anyint_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
                     [NoCapture<ArgIndex<1>>, IntrWriteMem]>;
 } // TargetPrefix = "riscv"
+
+//===----------------------------------------------------------------------===//
+// Scalar Cryptography
+//
+// These intrinsics will lower directly into the corresponding instructions
+// added by the scalar cyptography extension, if the extension is present.
+
+let TargetPrefix = "riscv" in {
+
+class ScalarCryptoGprIntrinsicAny
+    : Intrinsic<[llvm_anyint_ty],
+                [LLVMMatchType<0>],
+                [IntrNoMem, IntrSpeculatable]>;
+
+class ScalarCryptoByteSelect32
+    : Intrinsic<[llvm_i32_ty],
+                [llvm_i32_ty, llvm_i32_ty, llvm_i8_ty],
+                [IntrNoMem, IntrWillReturn, IntrSpeculatable,
+                 ImmArg<ArgIndex<2>>]>;
+
+class ScalarCryptoGprGprIntrinsic32
+    : Intrinsic<[llvm_i32_ty],
+                [llvm_i32_ty, llvm_i32_ty],
+                [IntrNoMem, IntrWillReturn, IntrSpeculatable]>;
+
+class ScalarCryptoGprGprIntrinsic64
+    : Intrinsic<[llvm_i64_ty],
+                [llvm_i64_ty, llvm_i64_ty],
+                [IntrNoMem, IntrWillReturn, IntrSpeculatable]>;
+
+class ScalarCryptoGprIntrinsic64
+    : Intrinsic<[llvm_i64_ty],
+                [llvm_i64_ty],
+                [IntrNoMem, IntrWillReturn, IntrSpeculatable]>;
+
+class ScalarCryptoByteSelectAny
+    : Intrinsic<[llvm_anyint_ty],
+                [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i8_ty],
+                [IntrNoMem, IntrSpeculatable, IntrWillReturn,
+                 ImmArg<ArgIndex<2>>, Returned<ArgIndex<0>>]>;
+
+// Zknd
+def int_riscv_aes32dsi  : ScalarCryptoByteSelect32;
+def int_riscv_aes32dsmi : ScalarCryptoByteSelect32;
+
+def int_riscv_aes64ds   : ScalarCryptoGprGprIntrinsic64;
+def int_riscv_aes64dsm  : ScalarCryptoGprGprIntrinsic64;
+
+def int_riscv_aes64im   : ScalarCryptoGprIntrinsic64;
+
+// Zkne
+def int_riscv_aes32esi  : ScalarCryptoByteSelect32;
+def int_riscv_aes32esmi : ScalarCryptoByteSelect32;
+
+def int_riscv_aes64es   : ScalarCryptoGprGprIntrinsic64;
+def int_riscv_aes64esm  : ScalarCryptoGprGprIntrinsic64;
+
+// Zknd & Zkne
+def int_riscv_aes64ks2  : ScalarCryptoGprGprIntrinsic64;
+def int_riscv_aes64ks1i : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty],
+                                    [IntrNoMem, IntrSpeculatable,
+                                     IntrWillReturn, ImmArg<ArgIndex<1>>]>;
+
+// Zknh
+def int_riscv_sha256sig0 : ScalarCryptoGprIntrinsicAny;
+def int_riscv_sha256sig1 : ScalarCryptoGprIntrinsicAny;
+def int_riscv_sha256sum0 : ScalarCryptoGprIntrinsicAny;
+def int_riscv_sha256sum1 : ScalarCryptoGprIntrinsicAny;
+
+def int_riscv_sha512sig0l : ScalarCryptoGprGprIntrinsic32;
+def int_riscv_sha512sig0h : ScalarCryptoGprGprIntrinsic32;
+def int_riscv_sha512sig1l : ScalarCryptoGprGprIntrinsic32;
+def int_riscv_sha512sig1h : ScalarCryptoGprGprIntrinsic32;
+def int_riscv_sha512sum0r : ScalarCryptoGprGprIntrinsic32;
+def int_riscv_sha512sum1r : ScalarCryptoGprGprIntrinsic32;
+
+def int_riscv_sha512sig0 : ScalarCryptoGprIntrinsic64;
+def int_riscv_sha512sig1 : ScalarCryptoGprIntrinsic64;
+def int_riscv_sha512sum0 : ScalarCryptoGprIntrinsic64;
+def int_riscv_sha512sum1 : ScalarCryptoGprIntrinsic64;
+
+// Zksed
+def int_riscv_sm4ks      : ScalarCryptoByteSelectAny;
+def int_riscv_sm4ed      : ScalarCryptoByteSelectAny;
+
+// Zksh
+def int_riscv_sm3p0      : ScalarCryptoGprIntrinsicAny;
+def int_riscv_sm3p1      : ScalarCryptoGprIntrinsicAny;
+} // TargetPrefix = "riscv"
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index 0462a26a0c55e..5b0f27c5e9370 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -169,6 +169,12 @@ def HasStdExtZbpOrZbkb
                                    "'Zbp' (Permutation 'Zb' Instructions) or "
                                    "'Zbkb' (Bitmanip instructions for Cryptography)">;
 
+def HasStdExtZbbOrZbkb
+    : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbkb()">,
+                AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbkb),
+                                   "'Zbb' (Basic Bit-Manipulation) or "
+                                   "'Zbkb' (Bitmanip instructions for Cryptography)">;
+
 def HasStdExtZbbOrZbpOrZbkb
     : Predicate<"Subtarget->hasStdExtZbb() || Subtarget->hasStdExtZbp() || Subtarget->hasStdExtZbkb()">,
                 AssemblerPredicate<(any_of FeatureStdExtZbb, FeatureStdExtZbp, FeatureStdExtZbkb),
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c71f1a698563d..205f71a6fe478 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -250,7 +250,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
 
-  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
+  if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
+      Subtarget.hasStdExtZbkb()) {
     if (Subtarget.is64Bit()) {
       setOperationAction(ISD::ROTL, MVT::i32, Custom);
       setOperationAction(ISD::ROTR, MVT::i32, Custom);
@@ -278,7 +279,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
     // pattern match it directly in isel.
     setOperationAction(ISD::BSWAP, XLenVT,
-                       Subtarget.hasStdExtZbb() ? Legal : Expand);
+                       (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbkb())
+                           ? Legal
+                           : Expand);
   }
 
   if (Subtarget.hasStdExtZbb()) {
@@ -1232,7 +1235,8 @@ bool RISCVTargetLowering::hasAndNotCompare(SDValue Y) const {
   if (VT.isVector())
     return false;
 
-  return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) &&
+  return (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp() ||
+          Subtarget.hasStdExtZbkb()) &&
          !isa<ConstantSDNode>(Y);
 }
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 75f9ec98cc1aa..db3f5851879a7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -794,16 +794,16 @@ def : InstAlias<"bext $rd, $rs1, $shamt",
 // Codegen patterns
 //===----------------------------------------------------------------------===//
 
-let Predicates = [HasStdExtZbbOrZbp] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
 def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
 def : Pat<(or  GPR:$rs1, (not GPR:$rs2)), (ORN  GPR:$rs1, GPR:$rs2)>;
 def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbbOrZbp]
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb]
 
-let Predicates = [HasStdExtZbbOrZbp] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
 def : PatGprGpr<rotl, ROL>;
 def : PatGprGpr<rotr, ROR>;
-} // Predicates = [HasStdExtZbbOrZbp]
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb]
 
 let Predicates = [HasStdExtZbs] in {
 def : Pat<(and (not (shiftop<shl> 1, GPR:$rs2)), GPR:$rs1),
@@ -854,7 +854,7 @@ def : Pat<(and GPR:$r, BCLRIANDIMask:$i),
 
 // There's no encoding for roli in the the 'B' extension as it can be
 // implemented with rori by negating the immediate.
-let Predicates = [HasStdExtZbbOrZbp] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb] in {
 def : PatGprImm<rotr, RORI, uimmlog2xlen>;
 def : Pat<(rotl GPR:$rs1, uimmlog2xlen:$shamt),
           (RORI GPR:$rs1, (ImmSubFromXLen uimmlog2xlen:$shamt))>;
@@ -960,35 +960,38 @@ def : PatGprGpr<umin, MINU>;
 def : PatGprGpr<umax, MAXU>;
 } // Predicates = [HasStdExtZbb]
 
-let Predicates = [HasStdExtZbb, IsRV32] in {
+let Predicates = [HasStdExtZbbOrZbkb, IsRV32] in {
 def : Pat<(i32 (bswap GPR:$rs1)), (REV8_RV32 GPR:$rs1)>;
-} // Predicates = [HasStdExtZbb, IsRV32]
+} // Predicates = [HasStdExtZbbOrZbkb, IsRV32]
 
-let Predicates = [HasStdExtZbb, IsRV64] in {
+let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
 def : Pat<(i64 (bswap GPR:$rs1)), (REV8_RV64 GPR:$rs1)>;
-} // Predicates = [HasStdExtZbb, IsRV64]
+} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
 
-let Predicates = [HasStdExtZbp, IsRV32] in {
+let Predicates = [HasStdExtZbpOrZbkb, IsRV32] in
 def : Pat<(i32 (or (and GPR:$rs1, 0x0000FFFF), (shl GPR:$rs2, (i32 16)))),
           (PACK GPR:$rs1, GPR:$rs2)>;
+
+let Predicates = [HasStdExtZbp, IsRV32] in
 def : Pat<(i32 (or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16)))),
           (PACKU GPR:$rs1, GPR:$rs2)>;
 
-}
-let Predicates = [HasStdExtZbp, IsRV64] in {
+let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in
 def : Pat<(i64 (or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32)))),
           (PACK GPR:$rs1, GPR:$rs2)>;
+
+let Predicates = [HasStdExtZbp, IsRV64] in
 def : Pat<(i64 (or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32)))),
           (PACKU GPR:$rs1, GPR:$rs2)>;
-}
-let Predicates = [HasStdExtZbp] in {
+
+let Predicates = [HasStdExtZbpOrZbkb] in {
 def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFFFF),
               (and GPR:$rs1, 0x00FF)),
           (PACKH GPR:$rs1, GPR:$rs2)>;
 def : Pat<(or (shl (and GPR:$rs2, 0x00FF), (XLenVT 8)),
               (and GPR:$rs1, 0x00FF)),
           (PACKH GPR:$rs1, GPR:$rs2)>;
-}
+} // Predicates = [HasStdExtZbpOrZbkb]
 
 let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
 def : Pat<(i32 (and GPR:$rs, 0xFFFF)), (ZEXTH_RV32 GPR:$rs)>;
@@ -1091,13 +1094,13 @@ def : Pat<(i64 (add (and (shl GPR:$rs1, (i64 3)), 0x7FFFFFFFF), non_imm12:$rs2))
           (SH3ADDUW GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZba, IsRV64]
 
-let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+let Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64] in {
 def : PatGprGpr<riscv_rolw, ROLW>;
 def : PatGprGpr<riscv_rorw, RORW>;
 def : PatGprImm<riscv_rorw, RORIW, uimm5>;
 def : Pat<(riscv_rolw GPR:$rs1, uimm5:$rs2),
           (RORIW GPR:$rs1, (ImmSubFrom32 uimm5:$rs2))>;
-} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+} // Predicates = [HasStdExtZbbOrZbpOrZbkb, IsRV64]
 
 let Predicates = [HasStdExtZbp, IsRV64] in {
 def : Pat<(riscv_rorw (riscv_grevw GPR:$rs1, 24), 16), (GREVIW GPR:$rs1, 8)>;
@@ -1129,7 +1132,7 @@ def : PatGpr<riscv_ctzw, CTZW>;
 def : Pat<(i64 (ctpop (i64 (zexti32 (i64 GPR:$rs1))))), (CPOPW GPR:$rs1)>;
 } // Predicates = [HasStdExtZbb, IsRV64]
 
-let Predicates = [HasStdExtZbp, IsRV64] in {
+let Predicates = [HasStdExtZbpOrZbkb, IsRV64] in {
 def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)),
                                (and GPR:$rs1, 0x000000000000FFFF)),
                            i32)),
@@ -1137,16 +1140,21 @@ def : Pat<(i64 (sext_inreg (or (shl GPR:$rs2, (i64 16)),
 def : Pat<(i64 (or (sext_inreg (shl GPR:$rs2, (i64 16)), i32),
                    (and GPR:$rs1, 0x000000000000FFFF))),
           (PACKW GPR:$rs1, GPR:$rs2)>;
+}
+
+let Predicates = [HasStdExtZbp, IsRV64] in
 def : Pat<(i64 (or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
                    (srl (and GPR:$rs1, 0xFFFFFFFF), (i64 16)))),
           (PACKUW GPR:$rs1, GPR:$rs2)>;
-} // Predicates = [HasStdExtZbp, IsRV64]
 
-let Predicates = [HasStdExtZbc] in {
+
+let Predicates = [HasStdExtZbcOrZbkc] in {
 def : PatGprGpr<int_riscv_clmul, CLMUL>;
 def : PatGprGpr<int_riscv_clmulh, CLMULH>;
+} // Predicates = [HasStdExtZbcOrZbkc]
+
+let Predicates = [HasStdExtZbc] in
 def : PatGprGpr<int_riscv_clmulr, CLMULR>;
-} // Predicates = [HasStdExtZbc]
 
 let Predicates = [HasStdExtZbe] in {
 def : PatGprGpr<riscv_bcompress, BCOMPRESS>;
@@ -1177,3 +1185,17 @@ def : PatGprGpr<riscv_bfp, BFP>;
 
 let Predicates = [HasStdExtZbf, IsRV64] in
 def : PatGprGpr<riscv_bfpw, BFPW>;
+
+let Predicates = [HasStdExtZbkb] in {
+def : PatGpr<int_riscv_brev8, BREV8>;
+} // Predicates = [HasStdExtZbkb]
+
+let Predicates = [HasStdExtZbkb, IsRV32] in {
+def : PatGpr<int_riscv_zip, ZIP_RV32>;
+def : PatGpr<int_riscv_unzip, UNZIP_RV32>;
+} // Predicates = [HasStdExtZbkb, IsRV32]
+
+let Predicates = [HasStdExtZbkx] in {
+def : PatGprGpr<int_riscv_xperm4, XPERMN>;
+def : PatGprGpr<int_riscv_xperm8, XPERMB>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
index 52a29526a541a..4a41cddedc715 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZk.td
@@ -21,7 +21,7 @@ def RnumArg : AsmOperandClass {
   let DiagnosticType = "InvalidRnumArg";
 }
 
-def rnum : Operand<XLenVT>, ImmLeaf<XLenVT, [{return (Imm >= 0 && Imm <= 10);}]> {
+def rnum : Operand<i32>, TImmLeaf<i32, [{return (Imm >= 0 && Imm <= 10);}]> {
   let ParserMatchClass = RnumArg;
   let EncoderMethod = "getImmOpValue";
   let DecoderMethod = "decodeUImmOperand<4>";
@@ -29,6 +29,13 @@ def rnum : Operand<XLenVT>, ImmLeaf<XLenVT, [{return (Imm >= 0 && Imm <= 10);}]>
   let OperandNamespace = "RISCVOp";
 }
 
+def byteselect : Operand<i8>, TImmLeaf<i8, [{return isUInt<2>(Imm);}]> {
+  let ParserMatchClass = UImmAsmOperand<2>;
+  let DecoderMethod = "decodeUImmOperand<2>";
+  let OperandType = "OPERAND_UIMM2";
+  let OperandNamespace = "RISCVOp";
+}
+
 //===----------------------------------------------------------------------===//
 // Instruction class templates
 //===----------------------------------------------------------------------===//
@@ -42,7 +49,7 @@ class RVKUnary<bits<12> imm12_in, bits<3> funct3, string opcodestr>
 let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
 class RVKByteSelect<bits<5> funct5, string opcodestr>
     : RVInstR<{0b00, funct5}, 0b000, OPC_OP, (outs GPR:$rd),
-              (ins GPR:$rs1, GPR:$rs2, uimm2:$bs),
+              (ins GPR:$rs1, GPR:$rs2, byteselect:$bs),
               opcodestr, "$rd, $rs1, $rs2, $bs">{
   bits<2> bs;
   let Inst{31-30} = bs;
@@ -121,3 +128,76 @@ let Predicates = [HasStdExtZksh] in {
 def SM3P0 : RVKUnary<0b000100001000, 0b001, "sm3p0">;
 def SM3P1 : RVKUnary<0b000100001001, 0b001, "sm3p1">;
 } // Predicates = [HasStdExtZksh]
+
+//===----------------------------------------------------------------------===//
+// Codegen patterns
+//===----------------------------------------------------------------------===//
+
+class PatGprGprByteSelect<SDPatternOperator OpNode, RVInst Inst>
+    : Pat<(OpNode GPR:$rs1, GPR:$rs2, i8:$imm),
+          (Inst GPR:$rs1, GPR:$rs2, byteselect:$imm)>;
+
+// Zknd
+let Predicates = [HasStdExtZknd, IsRV32] in {
+def : PatGprGprByteSelect<int_riscv_aes32dsi, AES32DSI>;
+def : PatGprGprByteSelect<int_riscv_aes32dsmi, AES32DSMI>;
+} // Predicates = [HasStdExtZknd, IsRV32]
+
+let Predicates = [HasStdExtZknd, IsRV64] in {
+def : PatGprGpr<int_riscv_aes64ds, AES64DS>;
+def : PatGprGpr<int_riscv_aes64dsm, AES64DSM>;
+def : PatGpr<int_riscv_aes64im, AES64IM>;
+} // Predicates = [HasStdExtZknd, IsRV64]
+
+let Predicates = [HasStdExtZkndOrZkne, IsRV64] in {
+def : PatGprGpr<int_riscv_aes64ks2, AES64KS2>;
+def : Pat<(int_riscv_aes64ks1i GPR:$rs1, i32:$rnum),
+          (AES64KS1I GPR:$rs1, rnum:$rnum)>;
+} // Predicates = [HasStdExtZkndOrZkne, IsRV64]
+
+// Zkne
+let Predicates = [HasStdExtZkne, IsRV32] in {
+def : PatGprGprByteSelect<int_riscv_aes32esi, AES32ESI>;
+def : PatGprGprByteSelect<int_riscv_aes32esmi, AES32ESMI>;
+} // Predicates = [HasStdExtZkne, IsRV32]
+
+let Predicates = [HasStdExtZkne, IsRV64] in {
+def : PatGprGpr<int_riscv_aes64es, AES64ES>;
+def : PatGprGpr<int_riscv_aes64esm, AES64ESM>;
+} // Predicates = [HasStdExtZkne, IsRV64]
+
+// Zknh
+let Predicates = [HasStdExtZknh] in {
+def : PatGpr<int_riscv_sha256sig0, SHA256SIG0>;
+def : PatGpr<int_riscv_sha256sig1, SHA256SIG1>;
+def : PatGpr<int_riscv_sha256sum0, SHA256SUM0>;
+def : PatGpr<int_riscv_sha256sum1, SHA256SUM1>;
+} // Predicates = [HasStdExtZknh]
+
+let Predicates = [HasStdExtZknh, IsRV32] in {
+def : PatGprGpr<int_riscv_sha512sig0l, SHA512SIG0L>;
+def : PatGprGpr<int_riscv_sha512sig0h, SHA512SIG0H>;
+def : PatGprGpr<int_riscv_sha512sig1l, SHA512SIG1L>;
+def : PatGprGpr<int_riscv_sha512sig1h, SHA512SIG1H>;
+def : PatGprGpr<int_riscv_sha512sum0r, SHA512SUM0R>;
+def : PatGprGpr<int_riscv_sha512sum1r, SHA512SUM1R>;
+} // Predicates = [HasStdExtZknh, IsRV32]
+
+let Predicates = [HasStdExtZknh, IsRV64] in {
+def : PatGpr<int_riscv_sha512sig0, SHA512SIG0>;
+def : PatGpr<int_riscv_sha512sig1, SHA512SIG1>;
+def : PatGpr<int_riscv_sha512sum0, SHA512SUM0>;
+def : PatGpr<int_riscv_sha512sum1, SHA512SUM1>;
+} // Predicates = [HasStdExtZknh, IsRV64]
+
+// Zksed
+let Predicates = [HasStdExtZksed] in {
+def : PatGprGprByteSelect<int_riscv_sm4ks, SM4KS>;
+def : PatGprGprByteSelect<int_riscv_sm4ed, SM4ED>;
+} // Predicates = [HasStdExtZksed]
+
+// Zksh
+let Predicates = [HasStdExtZksh] in {
+def : PatGpr<int_riscv_sm3p0, SM3P0>;
+def : PatGpr<int_riscv_sm3p1, SM3P1>;
+} // Predicates = [HasStdExtZksh]
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp-zbkb.ll
new file mode 100644
index 0000000000000..ecb784a5ff5c3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32zbb-zbp-zbkb.ll
@@ -0,0 +1,502 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=RV32ZBB-ZBP-ZBKB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=RV32ZBB-ZBP-ZBKB
+; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=RV32ZBB-ZBP-ZBKB
+
+define i32 @andn_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: andn_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    and a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: andn_i32:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    andn a0, a0, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %and = and i32 %neg, %a
+  ret i32 %and
+}
+
+define i64 @andn_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: andn_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a3, a3
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    and a0, a2, a0
+; RV32I-NEXT:    and a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: andn_i64:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    andn a0, a0, a2
+; RV32ZBB-ZBP-ZBKB-NEXT:    andn a1, a1, a3
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %and = and i64 %neg, %a
+  ret i64 %and
+}
+
+define i32 @orn_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: orn_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: orn_i32:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    orn a0, a0, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %or = or i32 %neg, %a
+  ret i32 %or
+}
+
+define i64 @orn_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: orn_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a3, a3
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: orn_i64:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    orn a0, a0, a2
+; RV32ZBB-ZBP-ZBKB-NEXT:    orn a1, a1, a3
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %or = or i64 %neg, %a
+  ret i64 %or
+}
+
+define i32 @xnor_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: xnor_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: xnor_i32:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    xnor a0, a0, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %neg = xor i32 %a, -1
+  %xor = xor i32 %neg, %b
+  ret i32 %xor
+}
+
+define i64 @xnor_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: xnor_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    xor a1, a1, a3
+; RV32I-NEXT:    xor a0, a0, a2
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: xnor_i64:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    xnor a0, a0, a2
+; RV32ZBB-ZBP-ZBKB-NEXT:    xnor a1, a1, a3
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %neg = xor i64 %a, -1
+  %xor = xor i64 %neg, %b
+  ret i64 %xor
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define i32 @rol_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: rol_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sll a2, a0, a1
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    srl a0, a0, a1
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: rol_i32:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    rol a0, a0, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %or
+}
+
+; This test is presented here in case future expansions of the Bitmanip
+; extensions introduce instructions suitable for this pattern.
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+
+define i64 @rol_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: rol_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a3, a2, 5
+; RV32I-NEXT:    andi a3, a3, 1
+; RV32I-NEXT:    mv a4, a1
+; RV32I-NEXT:    bnez a3, .LBB7_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a4, a0
+; RV32I-NEXT:  .LBB7_2:
+; RV32I-NEXT:    sll a5, a4, a2
+; RV32I-NEXT:    bnez a3, .LBB7_4
+; RV32I-NEXT:  # %bb.3:
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:  .LBB7_4:
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    not a6, a2
+; RV32I-NEXT:    srl a1, a1, a6
+; RV32I-NEXT:    or a3, a5, a1
+; RV32I-NEXT:    sll a0, a0, a2
+; RV32I-NEXT:    srli a1, a4, 1
+; RV32I-NEXT:    srl a1, a1, a6
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    mv a0, a3
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: rol_i64:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    srli a3, a2, 5
+; RV32ZBB-ZBP-ZBKB-NEXT:    andi a3, a3, 1
+; RV32ZBB-ZBP-ZBKB-NEXT:    mv a4, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    bnez a3, .LBB7_2
+; RV32ZBB-ZBP-ZBKB-NEXT:  # %bb.1:
+; RV32ZBB-ZBP-ZBKB-NEXT:    mv a4, a0
+; RV32ZBB-ZBP-ZBKB-NEXT:  .LBB7_2:
+; RV32ZBB-ZBP-ZBKB-NEXT:    sll a5, a4, a2
+; RV32ZBB-ZBP-ZBKB-NEXT:    bnez a3, .LBB7_4
+; RV32ZBB-ZBP-ZBKB-NEXT:  # %bb.3:
+; RV32ZBB-ZBP-ZBKB-NEXT:    mv a0, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:  .LBB7_4:
+; RV32ZBB-ZBP-ZBKB-NEXT:    srli a1, a0, 1
+; RV32ZBB-ZBP-ZBKB-NEXT:    not a6, a2
+; RV32ZBB-ZBP-ZBKB-NEXT:    srl a1, a1, a6
+; RV32ZBB-ZBP-ZBKB-NEXT:    or a3, a5, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    sll a0, a0, a2
+; RV32ZBB-ZBP-ZBKB-NEXT:    srli a1, a4, 1
+; RV32ZBB-ZBP-ZBKB-NEXT:    srl a1, a1, a6
+; RV32ZBB-ZBP-ZBKB-NEXT:    or a1, a0, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    mv a0, a3
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define i32 @ror_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: ror_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srl a2, a0, a1
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    sll a0, a0, a1
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: ror_i32:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    ror a0, a0, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %or
+}
+
+; This test is presented here in case future expansions of the Bitmanip
+; extensions introduce instructions suitable for this pattern.
+
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define i64 @ror_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: ror_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a4, a2, 32
+; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    beqz a4, .LBB9_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a3, a1
+; RV32I-NEXT:  .LBB9_2:
+; RV32I-NEXT:    srl a5, a3, a2
+; RV32I-NEXT:    beqz a4, .LBB9_4
+; RV32I-NEXT:  # %bb.3:
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:  .LBB9_4:
+; RV32I-NEXT:    slli a0, a1, 1
+; RV32I-NEXT:    not a4, a2
+; RV32I-NEXT:    sll a0, a0, a4
+; RV32I-NEXT:    or a0, a0, a5
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    slli a2, a3, 1
+; RV32I-NEXT:    sll a2, a2, a4
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: ror_i64:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    andi a4, a2, 32
+; RV32ZBB-ZBP-ZBKB-NEXT:    mv a3, a0
+; RV32ZBB-ZBP-ZBKB-NEXT:    beqz a4, .LBB9_2
+; RV32ZBB-ZBP-ZBKB-NEXT:  # %bb.1:
+; RV32ZBB-ZBP-ZBKB-NEXT:    mv a3, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:  .LBB9_2:
+; RV32ZBB-ZBP-ZBKB-NEXT:    srl a5, a3, a2
+; RV32ZBB-ZBP-ZBKB-NEXT:    beqz a4, .LBB9_4
+; RV32ZBB-ZBP-ZBKB-NEXT:  # %bb.3:
+; RV32ZBB-ZBP-ZBKB-NEXT:    mv a1, a0
+; RV32ZBB-ZBP-ZBKB-NEXT:  .LBB9_4:
+; RV32ZBB-ZBP-ZBKB-NEXT:    slli a0, a1, 1
+; RV32ZBB-ZBP-ZBKB-NEXT:    not a4, a2
+; RV32ZBB-ZBP-ZBKB-NEXT:    sll a0, a0, a4
+; RV32ZBB-ZBP-ZBKB-NEXT:    or a0, a0, a5
+; RV32ZBB-ZBP-ZBKB-NEXT:    srl a1, a1, a2
+; RV32ZBB-ZBP-ZBKB-NEXT:    slli a2, a3, 1
+; RV32ZBB-ZBP-ZBKB-NEXT:    sll a2, a2, a4
+; RV32ZBB-ZBP-ZBKB-NEXT:    or a1, a2, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+define i32 @rori_i32_fshl(i32 %a) nounwind {
+; RV32I-LABEL: rori_i32_fshl:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 31
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: rori_i32_fshl:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    rori a0, a0, 1
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
+  ret i32 %1
+}
+
+define i32 @rori_i32_fshr(i32 %a) nounwind {
+; RV32I-LABEL: rori_i32_fshr:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    srli a0, a0, 31
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: rori_i32_fshr:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    rori a0, a0, 31
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)
+  ret i32 %1
+}
+
+define i64 @rori_i64(i64 %a) nounwind {
+; RV32I-LABEL: rori_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a2, a0, 1
+; RV32I-NEXT:    slli a3, a1, 31
+; RV32I-NEXT:    or a2, a3, a2
+; RV32I-NEXT:    srli a1, a1, 1
+; RV32I-NEXT:    slli a0, a0, 31
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: rori_i64:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    srli a2, a0, 1
+; RV32ZBB-ZBP-ZBKB-NEXT:    slli a3, a1, 31
+; RV32ZBB-ZBP-ZBKB-NEXT:    or a2, a3, a2
+; RV32ZBB-ZBP-ZBKB-NEXT:    srli a1, a1, 1
+; RV32ZBB-ZBP-ZBKB-NEXT:    slli a0, a0, 31
+; RV32ZBB-ZBP-ZBKB-NEXT:    or a1, a0, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    mv a0, a2
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63)
+  ret i64 %1
+}
+
+define i64 @rori_i64_fshr(i64 %a) nounwind {
+; RV32I-LABEL: rori_i64_fshr:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a2, a1, 31
+; RV32I-NEXT:    slli a3, a0, 1
+; RV32I-NEXT:    or a2, a3, a2
+; RV32I-NEXT:    srli a0, a0, 31
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    or a1, a1, a0
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: rori_i64_fshr:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    srli a2, a1, 31
+; RV32ZBB-ZBP-ZBKB-NEXT:    slli a3, a0, 1
+; RV32ZBB-ZBP-ZBKB-NEXT:    or a2, a3, a2
+; RV32ZBB-ZBP-ZBKB-NEXT:    srli a0, a0, 31
+; RV32ZBB-ZBP-ZBKB-NEXT:    slli a1, a1, 1
+; RV32ZBB-ZBP-ZBKB-NEXT:    or a1, a1, a0
+; RV32ZBB-ZBP-ZBKB-NEXT:    mv a0, a2
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 63)
+  ret i64 %1
+}
+
+define i8 @srli_i8(i8 %a) nounwind {
+; RV32I-LABEL: srli_i8:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    srli a0, a0, 30
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: srli_i8:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    slli a0, a0, 24
+; RV32ZBB-ZBP-ZBKB-NEXT:    srli a0, a0, 30
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %1 = lshr i8 %a, 6
+  ret i8 %1
+}
+
+; We could use sext.b+srai, but slli+srai offers more opportunities for
+; comppressed instructions.
+define i8 @srai_i8(i8 %a) nounwind {
+; RV32I-LABEL: srai_i8:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    srai a0, a0, 29
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: srai_i8:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    slli a0, a0, 24
+; RV32ZBB-ZBP-ZBKB-NEXT:    srai a0, a0, 29
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %1 = ashr i8 %a, 5
+  ret i8 %1
+}
+
+; We could use zext.h+srli, but slli+srli offers more opportunities for
+; comppressed instructions.
+define i16 @srli_i16(i16 %a) nounwind {
+; RV32I-LABEL: srli_i16:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 22
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: srli_i16:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    slli a0, a0, 16
+; RV32ZBB-ZBP-ZBKB-NEXT:    srli a0, a0, 22
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %1 = lshr i16 %a, 6
+  ret i16 %1
+}
+
+; We could use sext.h+srai, but slli+srai offers more opportunities for
+; comppressed instructions.
+define i16 @srai_i16(i16 %a) nounwind {
+; RV32I-LABEL: srai_i16:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srai a0, a0, 25
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: srai_i16:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    slli a0, a0, 16
+; RV32ZBB-ZBP-ZBKB-NEXT:    srai a0, a0, 25
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %1 = ashr i16 %a, 9
+  ret i16 %1
+}
+
+define i1 @andn_seqz_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: andn_seqz_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    seqz a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: andn_seqz_i32:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    andn a0, a1, a0
+; RV32ZBB-ZBP-ZBKB-NEXT:    seqz a0, a0
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %and = and i32 %a, %b
+  %cmpeq = icmp eq i32 %and, %b
+  ret i1 %cmpeq
+}
+
+define i1 @andn_seqz_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: andn_seqz_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    seqz a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: andn_seqz_i64:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    andn a1, a3, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    andn a0, a2, a0
+; RV32ZBB-ZBP-ZBKB-NEXT:    or a0, a0, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    seqz a0, a0
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %and = and i64 %a, %b
+  %cmpeq = icmp eq i64 %and, %b
+  ret i1 %cmpeq
+}
+
+define i1 @andn_snez_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: andn_snez_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    snez a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: andn_snez_i32:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    andn a0, a1, a0
+; RV32ZBB-ZBP-ZBKB-NEXT:    snez a0, a0
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %and = and i32 %a, %b
+  %cmpeq = icmp ne i32 %and, %b
+  ret i1 %cmpeq
+}
+
+define i1 @andn_snez_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: andn_snez_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    snez a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-ZBP-ZBKB-LABEL: andn_snez_i64:
+; RV32ZBB-ZBP-ZBKB:       # %bb.0:
+; RV32ZBB-ZBP-ZBKB-NEXT:    andn a1, a3, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    andn a0, a2, a0
+; RV32ZBB-ZBP-ZBKB-NEXT:    or a0, a0, a1
+; RV32ZBB-ZBP-ZBKB-NEXT:    snez a0, a0
+; RV32ZBB-ZBP-ZBKB-NEXT:    ret
+  %and = and i64 %a, %b
+  %cmpeq = icmp ne i64 %and, %b
+  ret i1 %cmpeq
+}
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
deleted file mode 100644
index ac75dfd0773b3..0000000000000
--- a/llvm/test/CodeGen/RISCV/rv32zbb-zbp.ll
+++ /dev/null
@@ -1,675 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefix=RV32I
-; RUN: llc -mtriple=riscv32 -mattr=+zbb -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefix=RV32ZBB
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefix=RV32ZBP
-
-define i32 @andn_i32(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: andn_i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a1, a1
-; RV32I-NEXT:    and a0, a1, a0
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: andn_i32:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: andn_i32:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    andn a0, a0, a1
-; RV32ZBP-NEXT:    ret
-  %neg = xor i32 %b, -1
-  %and = and i32 %neg, %a
-  ret i32 %and
-}
-
-define i64 @andn_i64(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: andn_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a3, a3
-; RV32I-NEXT:    not a2, a2
-; RV32I-NEXT:    and a0, a2, a0
-; RV32I-NEXT:    and a1, a3, a1
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: andn_i64:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a0, a2
-; RV32ZBB-NEXT:    andn a1, a1, a3
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: andn_i64:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    andn a0, a0, a2
-; RV32ZBP-NEXT:    andn a1, a1, a3
-; RV32ZBP-NEXT:    ret
-  %neg = xor i64 %b, -1
-  %and = and i64 %neg, %a
-  ret i64 %and
-}
-
-define i32 @orn_i32(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: orn_i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a1, a1
-; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: orn_i32:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    orn a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: orn_i32:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    orn a0, a0, a1
-; RV32ZBP-NEXT:    ret
-  %neg = xor i32 %b, -1
-  %or = or i32 %neg, %a
-  ret i32 %or
-}
-
-define i64 @orn_i64(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: orn_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a3, a3
-; RV32I-NEXT:    not a2, a2
-; RV32I-NEXT:    or a0, a2, a0
-; RV32I-NEXT:    or a1, a3, a1
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: orn_i64:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    orn a0, a0, a2
-; RV32ZBB-NEXT:    orn a1, a1, a3
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: orn_i64:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    orn a0, a0, a2
-; RV32ZBP-NEXT:    orn a1, a1, a3
-; RV32ZBP-NEXT:    ret
-  %neg = xor i64 %b, -1
-  %or = or i64 %neg, %a
-  ret i64 %or
-}
-
-define i32 @xnor_i32(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: xnor_i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: xnor_i32:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    xnor a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: xnor_i32:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    xnor a0, a0, a1
-; RV32ZBP-NEXT:    ret
-  %neg = xor i32 %a, -1
-  %xor = xor i32 %neg, %b
-  ret i32 %xor
-}
-
-define i64 @xnor_i64(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: xnor_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    xor a1, a1, a3
-; RV32I-NEXT:    xor a0, a0, a2
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    not a1, a1
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: xnor_i64:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    xnor a0, a0, a2
-; RV32ZBB-NEXT:    xnor a1, a1, a3
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: xnor_i64:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    xnor a0, a0, a2
-; RV32ZBP-NEXT:    xnor a1, a1, a3
-; RV32ZBP-NEXT:    ret
-  %neg = xor i64 %a, -1
-  %xor = xor i64 %neg, %b
-  ret i64 %xor
-}
-
-declare i32 @llvm.fshl.i32(i32, i32, i32)
-
-define i32 @rol_i32(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: rol_i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    sll a2, a0, a1
-; RV32I-NEXT:    neg a1, a1
-; RV32I-NEXT:    srl a0, a0, a1
-; RV32I-NEXT:    or a0, a2, a0
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: rol_i32:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    rol a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: rol_i32:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    rol a0, a0, a1
-; RV32ZBP-NEXT:    ret
-  %or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
-  ret i32 %or
-}
-
-; This test is presented here in case future expansions of the Bitmanip
-; extensions introduce instructions suitable for this pattern.
-
-declare i64 @llvm.fshl.i64(i64, i64, i64)
-
-define i64 @rol_i64(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: rol_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a3, a2, 5
-; RV32I-NEXT:    andi a3, a3, 1
-; RV32I-NEXT:    mv a4, a1
-; RV32I-NEXT:    bnez a3, .LBB7_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    mv a4, a0
-; RV32I-NEXT:  .LBB7_2:
-; RV32I-NEXT:    sll a5, a4, a2
-; RV32I-NEXT:    bnez a3, .LBB7_4
-; RV32I-NEXT:  # %bb.3:
-; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:  .LBB7_4:
-; RV32I-NEXT:    srli a1, a0, 1
-; RV32I-NEXT:    not a6, a2
-; RV32I-NEXT:    srl a1, a1, a6
-; RV32I-NEXT:    or a3, a5, a1
-; RV32I-NEXT:    sll a0, a0, a2
-; RV32I-NEXT:    srli a1, a4, 1
-; RV32I-NEXT:    srl a1, a1, a6
-; RV32I-NEXT:    or a1, a0, a1
-; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: rol_i64:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    srli a3, a2, 5
-; RV32ZBB-NEXT:    andi a3, a3, 1
-; RV32ZBB-NEXT:    mv a4, a1
-; RV32ZBB-NEXT:    bnez a3, .LBB7_2
-; RV32ZBB-NEXT:  # %bb.1:
-; RV32ZBB-NEXT:    mv a4, a0
-; RV32ZBB-NEXT:  .LBB7_2:
-; RV32ZBB-NEXT:    sll a5, a4, a2
-; RV32ZBB-NEXT:    bnez a3, .LBB7_4
-; RV32ZBB-NEXT:  # %bb.3:
-; RV32ZBB-NEXT:    mv a0, a1
-; RV32ZBB-NEXT:  .LBB7_4:
-; RV32ZBB-NEXT:    srli a1, a0, 1
-; RV32ZBB-NEXT:    not a6, a2
-; RV32ZBB-NEXT:    srl a1, a1, a6
-; RV32ZBB-NEXT:    or a3, a5, a1
-; RV32ZBB-NEXT:    sll a0, a0, a2
-; RV32ZBB-NEXT:    srli a1, a4, 1
-; RV32ZBB-NEXT:    srl a1, a1, a6
-; RV32ZBB-NEXT:    or a1, a0, a1
-; RV32ZBB-NEXT:    mv a0, a3
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: rol_i64:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    srli a3, a2, 5
-; RV32ZBP-NEXT:    andi a3, a3, 1
-; RV32ZBP-NEXT:    mv a4, a1
-; RV32ZBP-NEXT:    bnez a3, .LBB7_2
-; RV32ZBP-NEXT:  # %bb.1:
-; RV32ZBP-NEXT:    mv a4, a0
-; RV32ZBP-NEXT:  .LBB7_2:
-; RV32ZBP-NEXT:    sll a5, a4, a2
-; RV32ZBP-NEXT:    bnez a3, .LBB7_4
-; RV32ZBP-NEXT:  # %bb.3:
-; RV32ZBP-NEXT:    mv a0, a1
-; RV32ZBP-NEXT:  .LBB7_4:
-; RV32ZBP-NEXT:    srli a1, a0, 1
-; RV32ZBP-NEXT:    not a6, a2
-; RV32ZBP-NEXT:    srl a1, a1, a6
-; RV32ZBP-NEXT:    or a3, a5, a1
-; RV32ZBP-NEXT:    sll a0, a0, a2
-; RV32ZBP-NEXT:    srli a1, a4, 1
-; RV32ZBP-NEXT:    srl a1, a1, a6
-; RV32ZBP-NEXT:    or a1, a0, a1
-; RV32ZBP-NEXT:    mv a0, a3
-; RV32ZBP-NEXT:    ret
-  %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
-  ret i64 %or
-}
-
-declare i32 @llvm.fshr.i32(i32, i32, i32)
-
-define i32 @ror_i32(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: ror_i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    srl a2, a0, a1
-; RV32I-NEXT:    neg a1, a1
-; RV32I-NEXT:    sll a0, a0, a1
-; RV32I-NEXT:    or a0, a2, a0
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: ror_i32:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    ror a0, a0, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: ror_i32:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    ror a0, a0, a1
-; RV32ZBP-NEXT:    ret
-  %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
-  ret i32 %or
-}
-
-; This test is presented here in case future expansions of the Bitmanip
-; extensions introduce instructions suitable for this pattern.
-
-declare i64 @llvm.fshr.i64(i64, i64, i64)
-
-define i64 @ror_i64(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: ror_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    andi a4, a2, 32
-; RV32I-NEXT:    mv a3, a0
-; RV32I-NEXT:    beqz a4, .LBB9_2
-; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    mv a3, a1
-; RV32I-NEXT:  .LBB9_2:
-; RV32I-NEXT:    srl a5, a3, a2
-; RV32I-NEXT:    beqz a4, .LBB9_4
-; RV32I-NEXT:  # %bb.3:
-; RV32I-NEXT:    mv a1, a0
-; RV32I-NEXT:  .LBB9_4:
-; RV32I-NEXT:    slli a0, a1, 1
-; RV32I-NEXT:    not a4, a2
-; RV32I-NEXT:    sll a0, a0, a4
-; RV32I-NEXT:    or a0, a0, a5
-; RV32I-NEXT:    srl a1, a1, a2
-; RV32I-NEXT:    slli a2, a3, 1
-; RV32I-NEXT:    sll a2, a2, a4
-; RV32I-NEXT:    or a1, a2, a1
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: ror_i64:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andi a4, a2, 32
-; RV32ZBB-NEXT:    mv a3, a0
-; RV32ZBB-NEXT:    beqz a4, .LBB9_2
-; RV32ZBB-NEXT:  # %bb.1:
-; RV32ZBB-NEXT:    mv a3, a1
-; RV32ZBB-NEXT:  .LBB9_2:
-; RV32ZBB-NEXT:    srl a5, a3, a2
-; RV32ZBB-NEXT:    beqz a4, .LBB9_4
-; RV32ZBB-NEXT:  # %bb.3:
-; RV32ZBB-NEXT:    mv a1, a0
-; RV32ZBB-NEXT:  .LBB9_4:
-; RV32ZBB-NEXT:    slli a0, a1, 1
-; RV32ZBB-NEXT:    not a4, a2
-; RV32ZBB-NEXT:    sll a0, a0, a4
-; RV32ZBB-NEXT:    or a0, a0, a5
-; RV32ZBB-NEXT:    srl a1, a1, a2
-; RV32ZBB-NEXT:    slli a2, a3, 1
-; RV32ZBB-NEXT:    sll a2, a2, a4
-; RV32ZBB-NEXT:    or a1, a2, a1
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: ror_i64:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    andi a4, a2, 32
-; RV32ZBP-NEXT:    mv a3, a0
-; RV32ZBP-NEXT:    beqz a4, .LBB9_2
-; RV32ZBP-NEXT:  # %bb.1:
-; RV32ZBP-NEXT:    mv a3, a1
-; RV32ZBP-NEXT:  .LBB9_2:
-; RV32ZBP-NEXT:    srl a5, a3, a2
-; RV32ZBP-NEXT:    beqz a4, .LBB9_4
-; RV32ZBP-NEXT:  # %bb.3:
-; RV32ZBP-NEXT:    mv a1, a0
-; RV32ZBP-NEXT:  .LBB9_4:
-; RV32ZBP-NEXT:    slli a0, a1, 1
-; RV32ZBP-NEXT:    not a4, a2
-; RV32ZBP-NEXT:    sll a0, a0, a4
-; RV32ZBP-NEXT:    or a0, a0, a5
-; RV32ZBP-NEXT:    srl a1, a1, a2
-; RV32ZBP-NEXT:    slli a2, a3, 1
-; RV32ZBP-NEXT:    sll a2, a2, a4
-; RV32ZBP-NEXT:    or a1, a2, a1
-; RV32ZBP-NEXT:    ret
-  %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
-  ret i64 %or
-}
-
-define i32 @rori_i32_fshl(i32 %a) nounwind {
-; RV32I-LABEL: rori_i32_fshl:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a1, a0, 1
-; RV32I-NEXT:    slli a0, a0, 31
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: rori_i32_fshl:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    rori a0, a0, 1
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: rori_i32_fshl:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    rori a0, a0, 1
-; RV32ZBP-NEXT:    ret
-  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
-  ret i32 %1
-}
-
-define i32 @rori_i32_fshr(i32 %a) nounwind {
-; RV32I-LABEL: rori_i32_fshr:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a1, a0, 1
-; RV32I-NEXT:    srli a0, a0, 31
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: rori_i32_fshr:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    rori a0, a0, 31
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: rori_i32_fshr:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    rori a0, a0, 31
-; RV32ZBP-NEXT:    ret
-  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)
-  ret i32 %1
-}
-
-define i64 @rori_i64(i64 %a) nounwind {
-; RV32I-LABEL: rori_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a2, a0, 1
-; RV32I-NEXT:    slli a3, a1, 31
-; RV32I-NEXT:    or a2, a3, a2
-; RV32I-NEXT:    srli a1, a1, 1
-; RV32I-NEXT:    slli a0, a0, 31
-; RV32I-NEXT:    or a1, a0, a1
-; RV32I-NEXT:    mv a0, a2
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: rori_i64:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    srli a2, a0, 1
-; RV32ZBB-NEXT:    slli a3, a1, 31
-; RV32ZBB-NEXT:    or a2, a3, a2
-; RV32ZBB-NEXT:    srli a1, a1, 1
-; RV32ZBB-NEXT:    slli a0, a0, 31
-; RV32ZBB-NEXT:    or a1, a0, a1
-; RV32ZBB-NEXT:    mv a0, a2
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: rori_i64:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    srli a2, a0, 1
-; RV32ZBP-NEXT:    slli a3, a1, 31
-; RV32ZBP-NEXT:    or a2, a3, a2
-; RV32ZBP-NEXT:    srli a1, a1, 1
-; RV32ZBP-NEXT:    slli a0, a0, 31
-; RV32ZBP-NEXT:    or a1, a0, a1
-; RV32ZBP-NEXT:    mv a0, a2
-; RV32ZBP-NEXT:    ret
-  %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63)
-  ret i64 %1
-}
-
-define i64 @rori_i64_fshr(i64 %a) nounwind {
-; RV32I-LABEL: rori_i64_fshr:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    srli a2, a1, 31
-; RV32I-NEXT:    slli a3, a0, 1
-; RV32I-NEXT:    or a2, a3, a2
-; RV32I-NEXT:    srli a0, a0, 31
-; RV32I-NEXT:    slli a1, a1, 1
-; RV32I-NEXT:    or a1, a1, a0
-; RV32I-NEXT:    mv a0, a2
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: rori_i64_fshr:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    srli a2, a1, 31
-; RV32ZBB-NEXT:    slli a3, a0, 1
-; RV32ZBB-NEXT:    or a2, a3, a2
-; RV32ZBB-NEXT:    srli a0, a0, 31
-; RV32ZBB-NEXT:    slli a1, a1, 1
-; RV32ZBB-NEXT:    or a1, a1, a0
-; RV32ZBB-NEXT:    mv a0, a2
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: rori_i64_fshr:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    srli a2, a1, 31
-; RV32ZBP-NEXT:    slli a3, a0, 1
-; RV32ZBP-NEXT:    or a2, a3, a2
-; RV32ZBP-NEXT:    srli a0, a0, 31
-; RV32ZBP-NEXT:    slli a1, a1, 1
-; RV32ZBP-NEXT:    or a1, a1, a0
-; RV32ZBP-NEXT:    mv a0, a2
-; RV32ZBP-NEXT:    ret
-  %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 63)
-  ret i64 %1
-}
-
-define i8 @srli_i8(i8 %a) nounwind {
-; RV32I-LABEL: srli_i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    srli a0, a0, 30
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: srli_i8:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    slli a0, a0, 24
-; RV32ZBB-NEXT:    srli a0, a0, 30
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: srli_i8:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    slli a0, a0, 24
-; RV32ZBP-NEXT:    srli a0, a0, 30
-; RV32ZBP-NEXT:    ret
-  %1 = lshr i8 %a, 6
-  ret i8 %1
-}
-
-; We could use sext.b+srai, but slli+srai offers more opportunities for
-; comppressed instructions.
-define i8 @srai_i8(i8 %a) nounwind {
-; RV32I-LABEL: srai_i8:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a0, a0, 24
-; RV32I-NEXT:    srai a0, a0, 29
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: srai_i8:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    slli a0, a0, 24
-; RV32ZBB-NEXT:    srai a0, a0, 29
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: srai_i8:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    slli a0, a0, 24
-; RV32ZBP-NEXT:    srai a0, a0, 29
-; RV32ZBP-NEXT:    ret
-  %1 = ashr i8 %a, 5
-  ret i8 %1
-}
-
-; We could use zext.h+srli, but slli+srli offers more opportunities for
-; comppressed instructions.
-define i16 @srli_i16(i16 %a) nounwind {
-; RV32I-LABEL: srli_i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a0, a0, 16
-; RV32I-NEXT:    srli a0, a0, 22
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: srli_i16:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    slli a0, a0, 16
-; RV32ZBB-NEXT:    srli a0, a0, 22
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: srli_i16:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    slli a0, a0, 16
-; RV32ZBP-NEXT:    srli a0, a0, 22
-; RV32ZBP-NEXT:    ret
-  %1 = lshr i16 %a, 6
-  ret i16 %1
-}
-
-; We could use sext.h+srai, but slli+srai offers more opportunities for
-; comppressed instructions.
-define i16 @srai_i16(i16 %a) nounwind {
-; RV32I-LABEL: srai_i16:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a0, a0, 16
-; RV32I-NEXT:    srai a0, a0, 25
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: srai_i16:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    slli a0, a0, 16
-; RV32ZBB-NEXT:    srai a0, a0, 25
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: srai_i16:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    slli a0, a0, 16
-; RV32ZBP-NEXT:    srai a0, a0, 25
-; RV32ZBP-NEXT:    ret
-  %1 = ashr i16 %a, 9
-  ret i16 %1
-}
-
-define i1 @andn_seqz_i32(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: andn_seqz_i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    and a0, a0, a1
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    seqz a0, a0
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: andn_seqz_i32:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a1, a0
-; RV32ZBB-NEXT:    seqz a0, a0
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: andn_seqz_i32:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    andn a0, a1, a0
-; RV32ZBP-NEXT:    seqz a0, a0
-; RV32ZBP-NEXT:    ret
-  %and = and i32 %a, %b
-  %cmpeq = icmp eq i32 %and, %b
-  ret i1 %cmpeq
-}
-
-define i1 @andn_seqz_i64(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: andn_seqz_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    not a1, a1
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    seqz a0, a0
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: andn_seqz_i64:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a1, a3, a1
-; RV32ZBB-NEXT:    andn a0, a2, a0
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    seqz a0, a0
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: andn_seqz_i64:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    andn a1, a3, a1
-; RV32ZBP-NEXT:    andn a0, a2, a0
-; RV32ZBP-NEXT:    or a0, a0, a1
-; RV32ZBP-NEXT:    seqz a0, a0
-; RV32ZBP-NEXT:    ret
-  %and = and i64 %a, %b
-  %cmpeq = icmp eq i64 %and, %b
-  ret i1 %cmpeq
-}
-
-define i1 @andn_snez_i32(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: andn_snez_i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    and a0, a0, a1
-; RV32I-NEXT:    xor a0, a0, a1
-; RV32I-NEXT:    snez a0, a0
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: andn_snez_i32:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a0, a1, a0
-; RV32ZBB-NEXT:    snez a0, a0
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: andn_snez_i32:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    andn a0, a1, a0
-; RV32ZBP-NEXT:    snez a0, a0
-; RV32ZBP-NEXT:    ret
-  %and = and i32 %a, %b
-  %cmpeq = icmp ne i32 %and, %b
-  ret i1 %cmpeq
-}
-
-define i1 @andn_snez_i64(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: andn_snez_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    not a0, a0
-; RV32I-NEXT:    not a1, a1
-; RV32I-NEXT:    and a1, a1, a3
-; RV32I-NEXT:    and a0, a0, a2
-; RV32I-NEXT:    or a0, a0, a1
-; RV32I-NEXT:    snez a0, a0
-; RV32I-NEXT:    ret
-;
-; RV32ZBB-LABEL: andn_snez_i64:
-; RV32ZBB:       # %bb.0:
-; RV32ZBB-NEXT:    andn a1, a3, a1
-; RV32ZBB-NEXT:    andn a0, a2, a0
-; RV32ZBB-NEXT:    or a0, a0, a1
-; RV32ZBB-NEXT:    snez a0, a0
-; RV32ZBB-NEXT:    ret
-;
-; RV32ZBP-LABEL: andn_snez_i64:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    andn a1, a3, a1
-; RV32ZBP-NEXT:    andn a0, a2, a0
-; RV32ZBP-NEXT:    or a0, a0, a1
-; RV32ZBP-NEXT:    snez a0, a0
-; RV32ZBP-NEXT:    ret
-  %and = and i64 %a, %b
-  %cmpeq = icmp ne i64 %and, %b
-  ret i1 %cmpeq
-}
diff --git a/llvm/test/CodeGen/RISCV/rv32zbc-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zbc-intrinsic.ll
index 870ac34153ad2..6870ae57d0f9f 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbc-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbc-intrinsic.ll
@@ -2,28 +2,6 @@
 ; RUN: llc -mtriple=riscv32 -mattr=+zbc -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV32ZBC
 
-declare i32 @llvm.riscv.clmul.i32(i32 %a, i32 %b)
-
-define i32 @clmul32(i32 %a, i32 %b) nounwind {
-; RV32ZBC-LABEL: clmul32:
-; RV32ZBC:       # %bb.0:
-; RV32ZBC-NEXT:    clmul a0, a0, a1
-; RV32ZBC-NEXT:    ret
-  %tmp = call i32 @llvm.riscv.clmul.i32(i32 %a, i32 %b)
- ret i32 %tmp
-}
-
-declare i32 @llvm.riscv.clmulh.i32(i32 %a, i32 %b)
-
-define i32 @clmul32h(i32 %a, i32 %b) nounwind {
-; RV32ZBC-LABEL: clmul32h:
-; RV32ZBC:       # %bb.0:
-; RV32ZBC-NEXT:    clmulh a0, a0, a1
-; RV32ZBC-NEXT:    ret
-  %tmp = call i32 @llvm.riscv.clmulh.i32(i32 %a, i32 %b)
- ret i32 %tmp
-}
-
 declare i32 @llvm.riscv.clmulr.i32(i32 %a, i32 %b)
 
 define i32 @clmul32r(i32 %a, i32 %b) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rv32zbc-zbkc-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zbc-zbkc-intrinsic.ll
new file mode 100644
index 0000000000000..823e14304aee2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32zbc-zbkc-intrinsic.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+zbc -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32ZBC-ZBKC
+; RUN: llc -mtriple=riscv32 -mattr=+zbkc -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32ZBC-ZBKC
+
+declare i32 @llvm.riscv.clmul.i32(i32 %a, i32 %b)
+
+define i32 @clmul32(i32 %a, i32 %b) nounwind {
+; RV32ZBC-ZBKC-LABEL: clmul32:
+; RV32ZBC-ZBKC:       # %bb.0:
+; RV32ZBC-ZBKC-NEXT:    clmul a0, a0, a1
+; RV32ZBC-ZBKC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmul.i32(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.clmulh.i32(i32 %a, i32 %b)
+
+define i32 @clmul32h(i32 %a, i32 %b) nounwind {
+; RV32ZBC-ZBKC-LABEL: clmul32h:
+; RV32ZBC-ZBKC:       # %bb.0:
+; RV32ZBC-ZBKC-NEXT:    clmulh a0, a0, a1
+; RV32ZBC-ZBKC-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.clmulh.i32(i32 %a, i32 %b)
+ ret i32 %tmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv32zbkb-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zbkb-intrinsic.ll
new file mode 100644
index 0000000000000..9efd7e5feb3f0
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32zbkb-intrinsic.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32ZBKB
+
+declare i32 @llvm.riscv.brev8(i32);
+
+define i32 @brev8(i32 %a) nounwind {
+; RV32ZBKB-LABEL: brev8:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    brev8 a0, a0
+; RV32ZBKB-NEXT:    ret
+  %val = call i32 @llvm.riscv.brev8(i32 %a)
+  ret i32 %val
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define i32 @rev8_i32(i32 %a) nounwind {
+; RV32ZBKB-LABEL: rev8_i32:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    rev8 a0, a0
+; RV32ZBKB-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.zip(i32);
+
+define i32 @zip(i32 %a) nounwind {
+; RV32ZBKB-LABEL: zip:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    zip a0, a0
+; RV32ZBKB-NEXT:    ret
+  %val = call i32 @llvm.riscv.zip(i32 %a)
+  ret i32 %val
+}
+
+declare i32 @llvm.riscv.unzip(i32);
+
+define i32 @unzip(i32 %a) nounwind {
+; RV32ZBKB-LABEL: unzip:
+; RV32ZBKB:       # %bb.0:
+; RV32ZBKB-NEXT:    unzip a0, a0
+; RV32ZBKB-NEXT:    ret
+  %val = call i32 @llvm.riscv.unzip(i32 %a)
+  ret i32 %val
+}
diff --git a/llvm/test/CodeGen/RISCV/rv32zbkx-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zbkx-intrinsic.ll
new file mode 100644
index 0000000000000..eeb2997fe850a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32zbkx-intrinsic.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=riscv32 -mattr=+zbkx -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32ZBKX
+
+declare i32 @llvm.riscv.xperm8.i32(i32 %a, i32 %b)
+
+define i32 @xperm8(i32 %a, i32 %b) nounwind {
+; RV32ZBKX-LABEL: xperm8:
+; RV32ZBKX:       # %bb.0:
+; RV32ZBKX-NEXT:    xperm8 a0, a0, a1
+; RV32ZBKX-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.xperm8.i32(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.xperm4.i32(i32 %a, i32 %b)
+
+define i32 @xperm4(i32 %a, i32 %b) nounwind {
+; RV32ZBKX-LABEL: xperm4:
+; RV32ZBKX:       # %bb.0:
+; RV32ZBKX-NEXT:    xperm4 a0, a0, a1
+; RV32ZBKX-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.xperm4.i32(i32 %a, i32 %b)
+ ret i32 %tmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv32zbp-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbp-zbkb.ll
new file mode 100644
index 0000000000000..31e8c8909ccca
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32zbp-zbkb.ll
@@ -0,0 +1,149 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32ZBP-ZBKB
+; RUN: llc -mtriple=riscv32 -mattr=+zbkb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32ZBP-ZBKB
+
+define i32 @pack_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: pack_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srli a0, a0, 16
+; RV32I-NEXT:    slli a1, a1, 16
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+  %shl = and i32 %a, 65535
+  %shl1 = shl i32 %b, 16
+  %or = or i32 %shl1, %shl
+  ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the Bitmanip
+; extensions introduce instructions suitable for this pattern.
+
+define i64 @pack_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: pack_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    mv a1, a2
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-ZBKB-LABEL: pack_i64:
+; RV32ZBP-ZBKB:       # %bb.0:
+; RV32ZBP-ZBKB-NEXT:    mv a1, a2
+; RV32ZBP-ZBKB-NEXT:    ret
+  %shl = and i64 %a, 4294967295
+  %shl1 = shl i64 %b, 32
+  %or = or i64 %shl1, %shl
+  ret i64 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the Bitmanip
+; extensions introduce instructions suitable for this pattern.
+
+define i64 @packu_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: packu_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-ZBKB-LABEL: packu_i64:
+; RV32ZBP-ZBKB:       # %bb.0:
+; RV32ZBP-ZBKB-NEXT:    mv a0, a1
+; RV32ZBP-ZBKB-NEXT:    mv a1, a3
+; RV32ZBP-ZBKB-NEXT:    ret
+  %shr = lshr i64 %a, 32
+  %shr1 = and i64 %b, -4294967296
+  %or = or i64 %shr1, %shr
+  ret i64 %or
+}
+
+define i32 @packh_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: packh_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a0, a0, 255
+; RV32I-NEXT:    slli a1, a1, 24
+; RV32I-NEXT:    srli a1, a1, 16
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-ZBKB-LABEL: packh_i32:
+; RV32ZBP-ZBKB:       # %bb.0:
+; RV32ZBP-ZBKB-NEXT:    packh a0, a0, a1
+; RV32ZBP-ZBKB-NEXT:    ret
+  %and = and i32 %a, 255
+  %and1 = shl i32 %b, 8
+  %shl = and i32 %and1, 65280
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+define i32 @packh_i32_2(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: packh_i32_2:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a0, a0, 255
+; RV32I-NEXT:    andi a1, a1, 255
+; RV32I-NEXT:    slli a1, a1, 8
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-ZBKB-LABEL: packh_i32_2:
+; RV32ZBP-ZBKB:       # %bb.0:
+; RV32ZBP-ZBKB-NEXT:    packh a0, a0, a1
+; RV32ZBP-ZBKB-NEXT:    ret
+  %and = and i32 %a, 255
+  %and1 = and i32 %b, 255
+  %shl = shl i32 %and1, 8
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+define i64 @packh_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: packh_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a0, a0, 255
+; RV32I-NEXT:    slli a1, a2, 24
+; RV32I-NEXT:    srli a1, a1, 16
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-ZBKB-LABEL: packh_i64:
+; RV32ZBP-ZBKB:       # %bb.0:
+; RV32ZBP-ZBKB-NEXT:    packh a0, a0, a2
+; RV32ZBP-ZBKB-NEXT:    li a1, 0
+; RV32ZBP-ZBKB-NEXT:    ret
+  %and = and i64 %a, 255
+  %and1 = shl i64 %b, 8
+  %shl = and i64 %and1, 65280
+  %or = or i64 %shl, %and
+  ret i64 %or
+}
+
+define i64 @packh_i64_2(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: packh_i64_2:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a0, a0, 255
+; RV32I-NEXT:    andi a1, a2, 255
+; RV32I-NEXT:    slli a1, a1, 8
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    ret
+;
+; RV32ZBP-ZBKB-LABEL: packh_i64_2:
+; RV32ZBP-ZBKB:       # %bb.0:
+; RV32ZBP-ZBKB-NEXT:    packh a0, a0, a2
+; RV32ZBP-ZBKB-NEXT:    li a1, 0
+; RV32ZBP-ZBKB-NEXT:    ret
+  %and = and i64 %a, 255
+  %and1 = and i64 %b, 255
+  %shl = shl i64 %and1, 8
+  %or = or i64 %shl, %and
+  ret i64 %or
+}
diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll
index c51151f47962f..d021b26f45612 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll
@@ -2773,46 +2773,6 @@ define i64 @shfl8_i64(i64 %a, i64 %b) nounwind {
   ret i64 %or3
 }
 
-define i32 @pack_i32(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: pack_i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    slli a0, a0, 16
-; RV32I-NEXT:    srli a0, a0, 16
-; RV32I-NEXT:    slli a1, a1, 16
-; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    ret
-;
-; RV32ZBP-LABEL: pack_i32:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    pack a0, a0, a1
-; RV32ZBP-NEXT:    ret
-  %shl = and i32 %a, 65535
-  %shl1 = shl i32 %b, 16
-  %or = or i32 %shl1, %shl
-  ret i32 %or
-}
-
-; As we are not matching directly i64 code patterns on RV32 some i64 patterns
-; don't have yet any matching bit manipulation instructions on RV32.
-; This test is presented here in case future expansions of the Bitmanip
-; extensions introduce instructions suitable for this pattern.
-
-define i64 @pack_i64(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: pack_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    mv a1, a2
-; RV32I-NEXT:    ret
-;
-; RV32ZBP-LABEL: pack_i64:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    mv a1, a2
-; RV32ZBP-NEXT:    ret
-  %shl = and i64 %a, 4294967295
-  %shl1 = shl i64 %b, 32
-  %or = or i64 %shl1, %shl
-  ret i64 %or
-}
-
 define i32 @packu_i32(i32 %a, i32 %b) nounwind {
 ; RV32I-LABEL: packu_i32:
 ; RV32I:       # %bb.0:
@@ -2832,113 +2792,6 @@ define i32 @packu_i32(i32 %a, i32 %b) nounwind {
   ret i32 %or
 }
 
-; As we are not matching directly i64 code patterns on RV32 some i64 patterns
-; don't have yet any matching bit manipulation instructions on RV32.
-; This test is presented here in case future expansions of the Bitmanip
-; extensions introduce instructions suitable for this pattern.
-
-define i64 @packu_i64(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: packu_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:    mv a1, a3
-; RV32I-NEXT:    ret
-;
-; RV32ZBP-LABEL: packu_i64:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    mv a0, a1
-; RV32ZBP-NEXT:    mv a1, a3
-; RV32ZBP-NEXT:    ret
-  %shr = lshr i64 %a, 32
-  %shr1 = and i64 %b, -4294967296
-  %or = or i64 %shr1, %shr
-  ret i64 %or
-}
-
-define i32 @packh_i32(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: packh_i32:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    andi a0, a0, 255
-; RV32I-NEXT:    slli a1, a1, 24
-; RV32I-NEXT:    srli a1, a1, 16
-; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    ret
-;
-; RV32ZBP-LABEL: packh_i32:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    packh a0, a0, a1
-; RV32ZBP-NEXT:    ret
-  %and = and i32 %a, 255
-  %and1 = shl i32 %b, 8
-  %shl = and i32 %and1, 65280
-  %or = or i32 %shl, %and
-  ret i32 %or
-}
-
-define i32 @packh_i32_2(i32 %a, i32 %b) nounwind {
-; RV32I-LABEL: packh_i32_2:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    andi a0, a0, 255
-; RV32I-NEXT:    andi a1, a1, 255
-; RV32I-NEXT:    slli a1, a1, 8
-; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    ret
-;
-; RV32ZBP-LABEL: packh_i32_2:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    packh a0, a0, a1
-; RV32ZBP-NEXT:    ret
-  %and = and i32 %a, 255
-  %and1 = and i32 %b, 255
-  %shl = shl i32 %and1, 8
-  %or = or i32 %shl, %and
-  ret i32 %or
-}
-
-define i64 @packh_i64(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: packh_i64:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    andi a0, a0, 255
-; RV32I-NEXT:    slli a1, a2, 24
-; RV32I-NEXT:    srli a1, a1, 16
-; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    ret
-;
-; RV32ZBP-LABEL: packh_i64:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    packh a0, a0, a2
-; RV32ZBP-NEXT:    li a1, 0
-; RV32ZBP-NEXT:    ret
-  %and = and i64 %a, 255
-  %and1 = shl i64 %b, 8
-  %shl = and i64 %and1, 65280
-  %or = or i64 %shl, %and
-  ret i64 %or
-}
-
-define i64 @packh_i64_2(i64 %a, i64 %b) nounwind {
-; RV32I-LABEL: packh_i64_2:
-; RV32I:       # %bb.0:
-; RV32I-NEXT:    andi a0, a0, 255
-; RV32I-NEXT:    andi a1, a2, 255
-; RV32I-NEXT:    slli a1, a1, 8
-; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    ret
-;
-; RV32ZBP-LABEL: packh_i64_2:
-; RV32ZBP:       # %bb.0:
-; RV32ZBP-NEXT:    packh a0, a0, a2
-; RV32ZBP-NEXT:    li a1, 0
-; RV32ZBP-NEXT:    ret
-  %and = and i64 %a, 255
-  %and1 = and i64 %b, 255
-  %shl = shl i64 %and1, 8
-  %or = or i64 %shl, %and
-  ret i64 %or
-}
-
 define i32 @zexth_i32(i32 %a) nounwind {
 ; RV32I-LABEL: zexth_i32:
 ; RV32I:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rv32zknd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zknd-intrinsic.ll
new file mode 100644
index 0000000000000..2f7d2a2f07d91
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32zknd-intrinsic.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+zknd -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32ZKND
+
+declare i32 @llvm.riscv.aes32dsi(i32, i32, i8);
+
+define i32 @aes32dsi(i32 %a, i32 %b) nounwind {
+; RV32ZKND-LABEL: aes32dsi
+; RV32ZKND: # %bb.0:
+; RV32ZKND-NEXT: aes32dsi a0, a0, a1, 0
+; RV32ZKND-NEXT: ret
+    %val = call i32 @llvm.riscv.aes32dsi(i32 %a, i32 %b, i8 0)
+    ret i32 %val
+}
+
+declare i32 @llvm.riscv.aes32dsmi(i32, i32, i8);
+
+define i32 @aes32dsmi(i32 %a, i32 %b) nounwind {
+; RV32ZKND-LABEL: aes32dsmi
+; RV32ZKND: # %bb.0:
+; RV32ZKND-NEXT: aes32dsmi a0, a0, a1, 1
+; RV32ZKND-NEXT: ret
+    %val = call i32 @llvm.riscv.aes32dsmi(i32 %a, i32 %b, i8 1)
+    ret i32 %val
+}
diff --git a/llvm/test/CodeGen/RISCV/rv32zkne-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zkne-intrinsic.ll
new file mode 100644
index 0000000000000..3b8937b2549da
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32zkne-intrinsic.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+zkne -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32ZKNE
+
+declare i32 @llvm.riscv.aes32esi(i32, i32, i8);
+
+define i32 @aes32esi(i32 %a, i32 %b) nounwind {
+; RV32ZKNE-LABEL: aes32esi
+; RV32ZKNE: # %bb.0:
+; RV32ZKNE-NEXT: aes32esi a0, a0, a1, 2
+; RV32ZKNE-NEXT: ret
+    %val = call i32 @llvm.riscv.aes32esi(i32 %a, i32 %b, i8 2)
+    ret i32 %val
+}
+
+declare i32 @llvm.riscv.aes32esmi(i32, i32, i8);
+
+define i32 @aes32esmi(i32 %a, i32 %b) nounwind {
+; RV32ZKNE-LABEL: aes32esmi
+; RV32ZKNE: # %bb.0:
+; RV32ZKNE-NEXT: aes32esmi a0, a0, a1, 3
+; RV32ZKNE-NEXT: ret
+    %val = call i32 @llvm.riscv.aes32esmi(i32 %a, i32 %b, i8 3)
+    ret i32 %val
+}
diff --git a/llvm/test/CodeGen/RISCV/rv32zknh-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zknh-intrinsic.ll
new file mode 100644
index 0000000000000..f6be9f012e85c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32zknh-intrinsic.ll
@@ -0,0 +1,114 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+zknh -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32ZKNH
+
+
+declare i32 @llvm.riscv.sha256sig0.i32(i32);
+
+define i32 @sha256sig0_i32(i32 %a) nounwind {
+; RV32ZKNH-LABEL: sha256sig0_i32
+; RV32ZKNH: # %bb.0:
+; RV32ZKNH-NEXT: sha256sig0 a0, a0
+; RV32ZKNH-NEXT: ret
+    %val = call i32 @llvm.riscv.sha256sig0.i32(i32 %a)
+    ret i32 %val
+}
+
+declare i32 @llvm.riscv.sha256sig1.i32(i32);
+
+define i32 @sha256sig1_i32(i32 %a) nounwind {
+; RV32ZKNH-LABEL: sha256sig1_i32
+; RV32ZKNH: # %bb.0:
+; RV32ZKNH-NEXT: sha256sig1 a0, a0
+; RV32ZKNH-NEXT: ret
+    %val = call i32 @llvm.riscv.sha256sig1.i32(i32 %a)
+    ret i32 %val
+}
+
+declare i32 @llvm.riscv.sha256sum0.i32(i32);
+
+define i32 @sha256sum0_i32(i32 %a) nounwind {
+; RV32ZKNH-LABEL: sha256sum0_i32
+; RV32ZKNH: # %bb.0:
+; RV32ZKNH-NEXT: sha256sum0 a0, a0
+; RV32ZKNH-NEXT: ret
+    %val = call i32 @llvm.riscv.sha256sum0.i32(i32 %a)
+    ret i32 %val
+}
+
+declare i32 @llvm.riscv.sha256sum1.i32(i32);
+
+define i32 @sha256sum1_i32(i32 %a) nounwind {
+; RV32ZKNH-LABEL: sha256sum1_i32
+; RV32ZKNH: # %bb.0:
+; RV32ZKNH-NEXT: sha256sum1 a0, a0
+; RV32ZKNH-NEXT: ret
+    %val = call i32 @llvm.riscv.sha256sum1.i32(i32 %a)
+    ret i32 %val
+}
+
+declare i32 @llvm.riscv.sha512sig0l(i32, i32);
+
+define i32 @sha512sig0l(i32 %a, i32 %b) nounwind {
+; RV32ZKNH-LABEL: sha512sig0l
+; RV32ZKNH: # %bb.0:
+; RV32ZKNH-NEXT: sha512sig0l a0, a0, a1
+; RV32ZKNH-NEXT: ret
+    %val = call i32 @llvm.riscv.sha512sig0l(i32 %a, i32 %b)
+    ret i32 %val
+}
+
+declare i32 @llvm.riscv.sha512sig0h(i32, i32);
+
+define i32 @sha512sig0h(i32 %a, i32 %b) nounwind {
+; RV32ZKNH-LABEL: sha512sig0h
+; RV32ZKNH: # %bb.0:
+; RV32ZKNH-NEXT: sha512sig0h a0, a0, a1
+; RV32ZKNH-NEXT: ret
+    %val = call i32 @llvm.riscv.sha512sig0h(i32 %a, i32 %b)
+    ret i32 %val
+}
+
+declare i32 @llvm.riscv.sha512sig1l(i32, i32);
+
+define i32 @sha512sig1l(i32 %a, i32 %b) nounwind {
+; RV32ZKNH-LABEL: sha512sig1l
+; RV32ZKNH: # %bb.0:
+; RV32ZKNH-NEXT: sha512sig1l a0, a0, a1
+; RV32ZKNH-NEXT: ret
+    %val = call i32 @llvm.riscv.sha512sig1l(i32 %a, i32 %b)
+    ret i32 %val
+}
+
+declare i32 @llvm.riscv.sha512sig1h(i32, i32);
+
+define i32 @sha512sig1h(i32 %a, i32 %b) nounwind {
+; RV32ZKNH-LABEL: sha512sig1h
+; RV32ZKNH: # %bb.0:
+; RV32ZKNH-NEXT: sha512sig1h a0, a0, a1
+; RV32ZKNH-NEXT: ret
+    %val = call i32 @llvm.riscv.sha512sig1h(i32 %a, i32 %b)
+    ret i32 %val
+}
+
+declare i32 @llvm.riscv.sha512sum0r(i32, i32);
+
+define i32 @sha512sum0r(i32 %a, i32 %b) nounwind {
+; RV32ZKNH-LABEL: sha512sum0r
+; RV32ZKNH: # %bb.0:
+; RV32ZKNH-NEXT: sha512sum0r a0, a0, a1
+; RV32ZKNH-NEXT: ret
+    %val = call i32 @llvm.riscv.sha512sum0r(i32 %a, i32 %b)
+    ret i32 %val
+}
+
+declare i32 @llvm.riscv.sha512sum1r(i32, i32);
+
+define i32 @sha512sum1r(i32 %a, i32 %b) nounwind {
+; RV32ZKNH-LABEL: sha512sum1r
+; RV32ZKNH: # %bb.0:
+; RV32ZKNH-NEXT: sha512sum1r a0, a0, a1
+; RV32ZKNH-NEXT: ret
+    %val = call i32 @llvm.riscv.sha512sum1r(i32 %a, i32 %b)
+    ret i32 %val
+}
diff --git a/llvm/test/CodeGen/RISCV/rv32zksed-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zksed-intrinsic.ll
new file mode 100644
index 0000000000000..e8ecb4f3decd2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32zksed-intrinsic.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+zksed -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32ZKSED
+
+declare i32 @llvm.riscv.sm4ks.i32(i32, i32, i8);
+
+define i32 @sm4ks_i32(i32 %a, i32 %b) nounwind {
+; RV32ZKSED-LABEL: sm4ks_i32:
+; RV32ZKSED:       # %bb.0:
+; RV32ZKSED-NEXT:    sm4ks a0, a0, a1, 2
+; RV32ZKSED-NEXT:    ret
+  %val = call i32 @llvm.riscv.sm4ks.i32(i32 %a, i32 %b, i8 2)
+  ret i32 %val
+}
+
+declare i32 @llvm.riscv.sm4ed.i32(i32, i32, i8);
+
+define i32 @sm4ed_i32(i32 %a, i32 %b) nounwind {
+; RV32ZKSED-LABEL: sm4ed_i32:
+; RV32ZKSED:       # %bb.0:
+; RV32ZKSED-NEXT:    sm4ed a0, a0, a1, 3
+; RV32ZKSED-NEXT:    ret
+  %val = call i32 @llvm.riscv.sm4ed.i32(i32 %a, i32 %b, i8 3)
+  ret i32 %val
+}
diff --git a/llvm/test/CodeGen/RISCV/rv32zksh-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32zksh-intrinsic.ll
new file mode 100644
index 0000000000000..43cdf5c8fa2cb
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32zksh-intrinsic.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+zksh -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32ZKSH
+
+declare i32 @llvm.riscv.sm3p0.i32(i32);
+
+define i32 @sm3p0_i32(i32 %a) nounwind {
+; RV32ZKSH-LABEL: sm3p0_i32:
+; RV32ZKSH:       # %bb.0:
+; RV32ZKSH-NEXT:    sm3p0 a0, a0
+; RV32ZKSH-NEXT:    ret
+  %val = call i32 @llvm.riscv.sm3p0.i32(i32 %a)
+  ret i32 %val
+}
+
+declare i32 @llvm.riscv.sm3p1.i32(i32);
+
+define i32 @sm3p1_i32(i32 %a) nounwind {
+; RV32ZKSH-LABEL: sm3p1_i32:
+; RV32ZKSH:       # %bb.0:
+; RV32ZKSH-NEXT:    sm3p1 a0, a0
+; RV32ZKSH-NEXT:    ret
+  %val = call i32 @llvm.riscv.sm3p1.i32(i32 %a)
+  ret i32 %val
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbb-zbp-zbkb.ll
similarity index 56%
rename from llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
rename to llvm/test/CodeGen/RISCV/rv64zbb-zbp-zbkb.ll
index d166cbdad8875..d519b14068868 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb-zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb-zbp-zbkb.ll
@@ -1,10 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN:   | FileCheck %s -check-prefixes=RV64I
 ; RUN: llc -mtriple=riscv64 -mattr=+zbb -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefix=RV64ZBB
+; RUN:   | FileCheck %s -check-prefixes=RV64ZBB-ZBP-ZBKB
 ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbp -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s -check-prefix=RV64ZBP
+; RUN:   | FileCheck %s -check-prefixes=RV64ZBB-ZBP-ZBKB
+; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefixes=RV64ZBB-ZBP-ZBKB
 
 define signext i32 @andn_i32(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-LABEL: andn_i32:
@@ -13,15 +15,10 @@ define signext i32 @andn_i32(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-NEXT:    and a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: andn_i32:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a0, a1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: andn_i32:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    andn a0, a0, a1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: andn_i32:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    andn a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %neg = xor i32 %b, -1
   %and = and i32 %neg, %a
   ret i32 %and
@@ -34,15 +31,10 @@ define i64 @andn_i64(i64 %a, i64 %b) nounwind {
 ; RV64I-NEXT:    and a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: andn_i64:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a0, a1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: andn_i64:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    andn a0, a0, a1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: andn_i64:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    andn a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %neg = xor i64 %b, -1
   %and = and i64 %neg, %a
   ret i64 %and
@@ -55,15 +47,10 @@ define signext i32 @orn_i32(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: orn_i32:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    orn a0, a0, a1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: orn_i32:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    orn a0, a0, a1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: orn_i32:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    orn a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %neg = xor i32 %b, -1
   %or = or i32 %neg, %a
   ret i32 %or
@@ -76,15 +63,10 @@ define i64 @orn_i64(i64 %a, i64 %b) nounwind {
 ; RV64I-NEXT:    or a0, a1, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: orn_i64:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    orn a0, a0, a1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: orn_i64:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    orn a0, a0, a1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: orn_i64:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    orn a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %neg = xor i64 %b, -1
   %or = or i64 %neg, %a
   ret i64 %or
@@ -97,15 +79,10 @@ define signext i32 @xnor_i32(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-NEXT:    not a0, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: xnor_i32:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    xnor a0, a0, a1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: xnor_i32:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    xnor a0, a0, a1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: xnor_i32:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    xnor a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %neg = xor i32 %a, -1
   %xor = xor i32 %neg, %b
   ret i32 %xor
@@ -118,15 +95,10 @@ define i64 @xnor_i64(i64 %a, i64 %b) nounwind {
 ; RV64I-NEXT:    not a0, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: xnor_i64:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    xnor a0, a0, a1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: xnor_i64:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    xnor a0, a0, a1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: xnor_i64:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    xnor a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %neg = xor i64 %a, -1
   %xor = xor i64 %neg, %b
   ret i64 %xor
@@ -143,15 +115,10 @@ define signext i32 @rol_i32(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: rol_i32:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rolw a0, a0, a1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: rol_i32:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    rolw a0, a0, a1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: rol_i32:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    rolw a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
   ret i32 %1
 }
@@ -167,17 +134,11 @@ define void @rol_i32_nosext(i32 signext %a, i32 signext %b, i32* %x) nounwind {
 ; RV64I-NEXT:    sw a0, 0(a2)
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: rol_i32_nosext:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rolw a0, a0, a1
-; RV64ZBB-NEXT:    sw a0, 0(a2)
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: rol_i32_nosext:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    rolw a0, a0, a1
-; RV64ZBP-NEXT:    sw a0, 0(a2)
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: rol_i32_nosext:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    rolw a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    sw a0, 0(a2)
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
   store i32 %1, i32* %x
   ret void
@@ -193,17 +154,11 @@ define signext i32 @rol_i32_neg_constant_rhs(i32 signext %a) nounwind {
 ; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: rol_i32_neg_constant_rhs:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    li a1, -2
-; RV64ZBB-NEXT:    rolw a0, a1, a0
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: rol_i32_neg_constant_rhs:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    li a1, -2
-; RV64ZBP-NEXT:    rolw a0, a1, a0
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: rol_i32_neg_constant_rhs:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    li a1, -2
+; RV64ZBB-ZBP-ZBKB-NEXT:    rolw a0, a1, a0
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = tail call i32 @llvm.fshl.i32(i32 -2, i32 -2, i32 %a)
   ret i32 %1
 }
@@ -219,15 +174,10 @@ define i64 @rol_i64(i64 %a, i64 %b) nounwind {
 ; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: rol_i64:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rol a0, a0, a1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: rol_i64:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    rol a0, a0, a1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: rol_i64:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    rol a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
   ret i64 %or
 }
@@ -243,15 +193,10 @@ define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: ror_i32:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rorw a0, a0, a1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: ror_i32:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    rorw a0, a0, a1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: ror_i32:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    rorw a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
   ret i32 %1
 }
@@ -267,17 +212,11 @@ define void @ror_i32_nosext(i32 signext %a, i32 signext %b, i32* %x) nounwind {
 ; RV64I-NEXT:    sw a0, 0(a2)
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: ror_i32_nosext:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rorw a0, a0, a1
-; RV64ZBB-NEXT:    sw a0, 0(a2)
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: ror_i32_nosext:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    rorw a0, a0, a1
-; RV64ZBP-NEXT:    sw a0, 0(a2)
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: ror_i32_nosext:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    rorw a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    sw a0, 0(a2)
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
   store i32 %1, i32* %x
   ret void
@@ -293,17 +232,11 @@ define signext i32 @ror_i32_neg_constant_rhs(i32 signext %a) nounwind {
 ; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: ror_i32_neg_constant_rhs:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    li a1, -2
-; RV64ZBB-NEXT:    rorw a0, a1, a0
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: ror_i32_neg_constant_rhs:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    li a1, -2
-; RV64ZBP-NEXT:    rorw a0, a1, a0
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: ror_i32_neg_constant_rhs:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    li a1, -2
+; RV64ZBB-ZBP-ZBKB-NEXT:    rorw a0, a1, a0
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = tail call i32 @llvm.fshr.i32(i32 -2, i32 -2, i32 %a)
   ret i32 %1
 }
@@ -319,15 +252,10 @@ define i64 @ror_i64(i64 %a, i64 %b) nounwind {
 ; RV64I-NEXT:    or a0, a2, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: ror_i64:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    ror a0, a0, a1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: ror_i64:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    ror a0, a0, a1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: ror_i64:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    ror a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
   ret i64 %or
 }
@@ -340,15 +268,10 @@ define signext i32 @rori_i32_fshl(i32 signext %a) nounwind {
 ; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: rori_i32_fshl:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    roriw a0, a0, 1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: rori_i32_fshl:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    roriw a0, a0, 1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: rori_i32_fshl:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    roriw a0, a0, 1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
   ret i32 %1
 }
@@ -363,17 +286,11 @@ define void @rori_i32_fshl_nosext(i32 signext %a, i32* %x) nounwind {
 ; RV64I-NEXT:    sw a0, 0(a1)
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: rori_i32_fshl_nosext:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    roriw a0, a0, 1
-; RV64ZBB-NEXT:    sw a0, 0(a1)
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: rori_i32_fshl_nosext:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    roriw a0, a0, 1
-; RV64ZBP-NEXT:    sw a0, 0(a1)
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: rori_i32_fshl_nosext:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    roriw a0, a0, 1
+; RV64ZBB-ZBP-ZBKB-NEXT:    sw a0, 0(a1)
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
   store i32 %1, i32* %x
   ret void
@@ -387,15 +304,10 @@ define signext i32 @rori_i32_fshr(i32 signext %a) nounwind {
 ; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: rori_i32_fshr:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    roriw a0, a0, 31
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: rori_i32_fshr:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    roriw a0, a0, 31
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: rori_i32_fshr:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    roriw a0, a0, 31
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)
   ret i32 %1
 }
@@ -410,17 +322,11 @@ define void @rori_i32_fshr_nosext(i32 signext %a, i32* %x) nounwind {
 ; RV64I-NEXT:    sw a0, 0(a1)
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: rori_i32_fshr_nosext:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    roriw a0, a0, 31
-; RV64ZBB-NEXT:    sw a0, 0(a1)
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: rori_i32_fshr_nosext:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    roriw a0, a0, 31
-; RV64ZBP-NEXT:    sw a0, 0(a1)
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: rori_i32_fshr_nosext:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    roriw a0, a0, 31
+; RV64ZBB-ZBP-ZBKB-NEXT:    sw a0, 0(a1)
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 31)
   store i32 %1, i32* %x
   ret void
@@ -437,19 +343,12 @@ define signext i32 @not_rori_i32(i32 signext %x, i32 signext %y) nounwind {
 ; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: not_rori_i32:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    slliw a0, a0, 31
-; RV64ZBB-NEXT:    srliw a1, a1, 1
-; RV64ZBB-NEXT:    or a0, a0, a1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: not_rori_i32:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    slliw a0, a0, 31
-; RV64ZBP-NEXT:    srliw a1, a1, 1
-; RV64ZBP-NEXT:    or a0, a0, a1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: not_rori_i32:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    slliw a0, a0, 31
+; RV64ZBB-ZBP-ZBKB-NEXT:    srliw a1, a1, 1
+; RV64ZBB-ZBP-ZBKB-NEXT:    or a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %a = shl i32 %x, 31
   %b = lshr i32 %y, 1
   %c = or i32 %a, %b
@@ -470,25 +369,15 @@ define i64 @roriw_bug(i64 %x) nounwind {
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: roriw_bug:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    slli a1, a0, 31
-; RV64ZBB-NEXT:    andi a0, a0, -2
-; RV64ZBB-NEXT:    srli a2, a0, 1
-; RV64ZBB-NEXT:    or a1, a1, a2
-; RV64ZBB-NEXT:    sext.w a1, a1
-; RV64ZBB-NEXT:    xor a0, a0, a1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: roriw_bug:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    slli a1, a0, 31
-; RV64ZBP-NEXT:    andi a0, a0, -2
-; RV64ZBP-NEXT:    srli a2, a0, 1
-; RV64ZBP-NEXT:    or a1, a1, a2
-; RV64ZBP-NEXT:    sext.w a1, a1
-; RV64ZBP-NEXT:    xor a0, a0, a1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: roriw_bug:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    slli a1, a0, 31
+; RV64ZBB-ZBP-ZBKB-NEXT:    andi a0, a0, -2
+; RV64ZBB-ZBP-ZBKB-NEXT:    srli a2, a0, 1
+; RV64ZBB-ZBP-ZBKB-NEXT:    or a1, a1, a2
+; RV64ZBB-ZBP-ZBKB-NEXT:    sext.w a1, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    xor a0, a0, a1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %a = shl i64 %x, 31
   %b = and i64 %x, 18446744073709551614
   %c = lshr i64 %b, 1
@@ -507,15 +396,10 @@ define i64 @rori_i64_fshl(i64 %a) nounwind {
 ; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: rori_i64_fshl:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rori a0, a0, 1
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: rori_i64_fshl:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    rori a0, a0, 1
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: rori_i64_fshl:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    rori a0, a0, 1
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63)
   ret i64 %1
 }
@@ -528,15 +412,10 @@ define i64 @rori_i64_fshr(i64 %a) nounwind {
 ; RV64I-NEXT:    or a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: rori_i64_fshr:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    rori a0, a0, 63
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: rori_i64_fshr:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    rori a0, a0, 63
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: rori_i64_fshr:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    rori a0, a0, 63
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 63)
   ret i64 %1
 }
@@ -548,17 +427,11 @@ define i8 @srli_i8(i8 %a) nounwind {
 ; RV64I-NEXT:    srli a0, a0, 62
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: srli_i8:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    slli a0, a0, 56
-; RV64ZBB-NEXT:    srli a0, a0, 62
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: srli_i8:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    slli a0, a0, 56
-; RV64ZBP-NEXT:    srli a0, a0, 62
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: srli_i8:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    slli a0, a0, 56
+; RV64ZBB-ZBP-ZBKB-NEXT:    srli a0, a0, 62
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = lshr i8 %a, 6
   ret i8 %1
 }
@@ -572,17 +445,11 @@ define i8 @srai_i8(i8 %a) nounwind {
 ; RV64I-NEXT:    srai a0, a0, 61
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: srai_i8:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    slli a0, a0, 56
-; RV64ZBB-NEXT:    srai a0, a0, 61
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: srai_i8:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    slli a0, a0, 56
-; RV64ZBP-NEXT:    srai a0, a0, 61
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: srai_i8:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    slli a0, a0, 56
+; RV64ZBB-ZBP-ZBKB-NEXT:    srai a0, a0, 61
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = ashr i8 %a, 5
   ret i8 %1
 }
@@ -596,17 +463,11 @@ define i16 @srli_i16(i16 %a) nounwind {
 ; RV64I-NEXT:    srli a0, a0, 54
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: srli_i16:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    slli a0, a0, 48
-; RV64ZBB-NEXT:    srli a0, a0, 54
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: srli_i16:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    slli a0, a0, 48
-; RV64ZBP-NEXT:    srli a0, a0, 54
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: srli_i16:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    slli a0, a0, 48
+; RV64ZBB-ZBP-ZBKB-NEXT:    srli a0, a0, 54
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = lshr i16 %a, 6
   ret i16 %1
 }
@@ -620,17 +481,11 @@ define i16 @srai_i16(i16 %a) nounwind {
 ; RV64I-NEXT:    srai a0, a0, 57
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: srai_i16:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    slli a0, a0, 48
-; RV64ZBB-NEXT:    srai a0, a0, 57
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: srai_i16:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    slli a0, a0, 48
-; RV64ZBP-NEXT:    srai a0, a0, 57
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: srai_i16:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    slli a0, a0, 48
+; RV64ZBB-ZBP-ZBKB-NEXT:    srai a0, a0, 57
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %1 = ashr i16 %a, 9
   ret i16 %1
 }
@@ -643,17 +498,11 @@ define i1 @andn_seqz_i32(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-NEXT:    seqz a0, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: andn_seqz_i32:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a1, a0
-; RV64ZBB-NEXT:    seqz a0, a0
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: andn_seqz_i32:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    andn a0, a1, a0
-; RV64ZBP-NEXT:    seqz a0, a0
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: andn_seqz_i32:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    andn a0, a1, a0
+; RV64ZBB-ZBP-ZBKB-NEXT:    seqz a0, a0
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %and = and i32 %a, %b
   %cmpeq = icmp eq i32 %and, %b
   ret i1 %cmpeq
@@ -667,17 +516,11 @@ define i1 @andn_seqz_i64(i64 %a, i64 %b) nounwind {
 ; RV64I-NEXT:    seqz a0, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: andn_seqz_i64:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a1, a0
-; RV64ZBB-NEXT:    seqz a0, a0
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: andn_seqz_i64:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    andn a0, a1, a0
-; RV64ZBP-NEXT:    seqz a0, a0
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: andn_seqz_i64:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    andn a0, a1, a0
+; RV64ZBB-ZBP-ZBKB-NEXT:    seqz a0, a0
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %and = and i64 %a, %b
   %cmpeq = icmp eq i64 %and, %b
   ret i1 %cmpeq
@@ -691,17 +534,11 @@ define i1 @andn_snez_i32(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-NEXT:    snez a0, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: andn_snez_i32:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a1, a0
-; RV64ZBB-NEXT:    snez a0, a0
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: andn_snez_i32:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    andn a0, a1, a0
-; RV64ZBP-NEXT:    snez a0, a0
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: andn_snez_i32:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    andn a0, a1, a0
+; RV64ZBB-ZBP-ZBKB-NEXT:    snez a0, a0
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %and = and i32 %a, %b
   %cmpeq = icmp ne i32 %and, %b
   ret i1 %cmpeq
@@ -715,17 +552,11 @@ define i1 @andn_snez_i64(i64 %a, i64 %b) nounwind {
 ; RV64I-NEXT:    snez a0, a0
 ; RV64I-NEXT:    ret
 ;
-; RV64ZBB-LABEL: andn_snez_i64:
-; RV64ZBB:       # %bb.0:
-; RV64ZBB-NEXT:    andn a0, a1, a0
-; RV64ZBB-NEXT:    snez a0, a0
-; RV64ZBB-NEXT:    ret
-;
-; RV64ZBP-LABEL: andn_snez_i64:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    andn a0, a1, a0
-; RV64ZBP-NEXT:    snez a0, a0
-; RV64ZBP-NEXT:    ret
+; RV64ZBB-ZBP-ZBKB-LABEL: andn_snez_i64:
+; RV64ZBB-ZBP-ZBKB:       # %bb.0:
+; RV64ZBB-ZBP-ZBKB-NEXT:    andn a0, a1, a0
+; RV64ZBB-ZBP-ZBKB-NEXT:    snez a0, a0
+; RV64ZBB-ZBP-ZBKB-NEXT:    ret
   %and = and i64 %a, %b
   %cmpeq = icmp ne i64 %and, %b
   ret i1 %cmpeq
diff --git a/llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll
index b99a84e445a6c..87afe13bfdf0c 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbc-intrinsic.ll
@@ -2,28 +2,6 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+zbc -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s -check-prefix=RV64ZBC
 
-declare i64 @llvm.riscv.clmul.i64(i64 %a, i64 %b)
-
-define i64 @clmul64(i64 %a, i64 %b) nounwind {
-; RV64ZBC-LABEL: clmul64:
-; RV64ZBC:       # %bb.0:
-; RV64ZBC-NEXT:    clmul a0, a0, a1
-; RV64ZBC-NEXT:    ret
-  %tmp = call i64 @llvm.riscv.clmul.i64(i64 %a, i64 %b)
- ret i64 %tmp
-}
-
-declare i64 @llvm.riscv.clmulh.i64(i64 %a, i64 %b)
-
-define i64 @clmul64h(i64 %a, i64 %b) nounwind {
-; RV64ZBC-LABEL: clmul64h:
-; RV64ZBC:       # %bb.0:
-; RV64ZBC-NEXT:    clmulh a0, a0, a1
-; RV64ZBC-NEXT:    ret
-  %tmp = call i64 @llvm.riscv.clmulh.i64(i64 %a, i64 %b)
- ret i64 %tmp
-}
-
 declare i64 @llvm.riscv.clmulr.i64(i64 %a, i64 %b)
 
 define i64 @clmul64r(i64 %a, i64 %b) nounwind {
diff --git a/llvm/test/CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll
new file mode 100644
index 0000000000000..180fef9b477e2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64zbc-zbkc-intrinsic.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zbc -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZBC-ZBKC
+; RUN: llc -mtriple=riscv64 -mattr=+zbkc -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZBC-ZBKC
+
+declare i64 @llvm.riscv.clmul.i64(i64 %a, i64 %b)
+
+define i64 @clmul64(i64 %a, i64 %b) nounwind {
+; RV64ZBC-ZBKC-LABEL: clmul64:
+; RV64ZBC-ZBKC:       # %bb.0:
+; RV64ZBC-ZBKC-NEXT:    clmul a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.clmul.i64(i64 %a, i64 %b)
+ ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.clmulh.i64(i64 %a, i64 %b)
+
+define i64 @clmul64h(i64 %a, i64 %b) nounwind {
+; RV64ZBC-ZBKC-LABEL: clmul64h:
+; RV64ZBC-ZBKC:       # %bb.0:
+; RV64ZBC-ZBKC-NEXT:    clmulh a0, a0, a1
+; RV64ZBC-ZBKC-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.clmulh.i64(i64 %a, i64 %b)
+ ret i64 %tmp
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbkb-intrinsic.ll
new file mode 100644
index 0000000000000..ec3196beacd0e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64zbkb-intrinsic.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZBKB
+
+declare i64 @llvm.riscv.brev8(i64)
+
+define i64 @brev8(i64 %a) nounwind {
+; RV64ZBKB-LABEL: brev8:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    brev8 a0, a0
+; RV64ZBKB-NEXT:    ret
+  %val = call i64 @llvm.riscv.brev8(i64 %a)
+  ret i64 %val
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @rev8_i64(i64 %a) {
+; RV64ZBKB-LABEL: rev8_i64:
+; RV64ZBKB:       # %bb.0:
+; RV64ZBKB-NEXT:    rev8 a0, a0
+; RV64ZBKB-NEXT:    ret
+  %1 = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbkx-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zbkx-intrinsic.ll
new file mode 100644
index 0000000000000..19ccdfe5303d8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64zbkx-intrinsic.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=riscv64 -mattr=+zbkx -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZBKX
+
+declare i64 @llvm.riscv.xperm8.i64(i64 %a, i64 %b)
+
+define i64 @xperm8(i64 %a, i64 %b) nounwind {
+; RV64ZBKX-LABEL: xperm8:
+; RV64ZBKX:       # %bb.0:
+; RV64ZBKX-NEXT:    xperm8 a0, a0, a1
+; RV64ZBKX-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.xperm8.i64(i64 %a, i64 %b)
+ ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.xperm4.i64(i64 %a, i64 %b)
+
+define i64 @xperm4(i64 %a, i64 %b) nounwind {
+; RV64ZBKX-LABEL: xperm4:
+; RV64ZBKX:       # %bb.0:
+; RV64ZBKX-NEXT:    xperm4 a0, a0, a1
+; RV64ZBKX-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.xperm4.i64(i64 %a, i64 %b)
+ ret i64 %tmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbp-zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbp-zbkb.ll
new file mode 100644
index 0000000000000..a2c27e028fca9
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64zbp-zbkb.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbp -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZBP-ZBKB
+; RUN: llc -mtriple=riscv64 -mattr=+zbkb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZBP-ZBKB
+
+define signext i32 @pack_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: pack_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srli a0, a0, 48
+; RV64I-NEXT:    slliw a1, a1, 16
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBP-ZBKB-LABEL: pack_i32:
+; RV64ZBP-ZBKB:       # %bb.0:
+; RV64ZBP-ZBKB-NEXT:    packw a0, a0, a1
+; RV64ZBP-ZBKB-NEXT:    ret
+  %shl = and i32 %a, 65535
+  %shl1 = shl i32 %b, 16
+  %or = or i32 %shl1, %shl
+  ret i32 %or
+}
+
+define i64 @pack_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: pack_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBP-ZBKB-LABEL: pack_i64:
+; RV64ZBP-ZBKB:       # %bb.0:
+; RV64ZBP-ZBKB-NEXT:    pack a0, a0, a1
+; RV64ZBP-ZBKB-NEXT:    ret
+  %shl = and i64 %a, 4294967295
+  %shl1 = shl i64 %b, 32
+  %or = or i64 %shl1, %shl
+  ret i64 %or
+}
+
+define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: packh_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    slli a1, a1, 56
+; RV64I-NEXT:    srli a1, a1, 48
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBP-ZBKB-LABEL: packh_i32:
+; RV64ZBP-ZBKB:       # %bb.0:
+; RV64ZBP-ZBKB-NEXT:    packh a0, a0, a1
+; RV64ZBP-ZBKB-NEXT:    ret
+  %and = and i32 %a, 255
+  %and1 = shl i32 %b, 8
+  %shl = and i32 %and1, 65280
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+define i32 @packh_i32_2(i32 %a, i32 %b) nounwind {
+; RV64I-LABEL: packh_i32_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    andi a1, a1, 255
+; RV64I-NEXT:    slli a1, a1, 8
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBP-ZBKB-LABEL: packh_i32_2:
+; RV64ZBP-ZBKB:       # %bb.0:
+; RV64ZBP-ZBKB-NEXT:    packh a0, a0, a1
+; RV64ZBP-ZBKB-NEXT:    ret
+  %and = and i32 %a, 255
+  %and1 = and i32 %b, 255
+  %shl = shl i32 %and1, 8
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+define i64 @packh_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: packh_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    slli a1, a1, 56
+; RV64I-NEXT:    srli a1, a1, 48
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBP-ZBKB-LABEL: packh_i64:
+; RV64ZBP-ZBKB:       # %bb.0:
+; RV64ZBP-ZBKB-NEXT:    packh a0, a0, a1
+; RV64ZBP-ZBKB-NEXT:    ret
+  %and = and i64 %a, 255
+  %and1 = shl i64 %b, 8
+  %shl = and i64 %and1, 65280
+  %or = or i64 %shl, %and
+  ret i64 %or
+}
+
+define i64 @packh_i64_2(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: packh_i64_2:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    andi a1, a1, 255
+; RV64I-NEXT:    slli a1, a1, 8
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64ZBP-ZBKB-LABEL: packh_i64_2:
+; RV64ZBP-ZBKB:       # %bb.0:
+; RV64ZBP-ZBKB-NEXT:    packh a0, a0, a1
+; RV64ZBP-ZBKB-NEXT:    ret
+  %and = and i64 %a, 255
+  %and1 = and i64 %b, 255
+  %shl = shl i64 %and1, 8
+  %or = or i64 %shl, %and
+  ret i64 %or
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll
index b68a98b13052b..6a4376409fab5 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbp.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll
@@ -2713,44 +2713,6 @@ define i64 @shfl16(i64 %a, i64 %b) nounwind {
   ret i64 %or3
 }
 
-define signext i32 @pack_i32(i32 signext %a, i32 signext %b) nounwind {
-; RV64I-LABEL: pack_i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a0, a0, 48
-; RV64I-NEXT:    srli a0, a0, 48
-; RV64I-NEXT:    slliw a1, a1, 16
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    ret
-;
-; RV64ZBP-LABEL: pack_i32:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    packw a0, a0, a1
-; RV64ZBP-NEXT:    ret
-  %shl = and i32 %a, 65535
-  %shl1 = shl i32 %b, 16
-  %or = or i32 %shl1, %shl
-  ret i32 %or
-}
-
-define i64 @pack_i64(i64 %a, i64 %b) nounwind {
-; RV64I-LABEL: pack_i64:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    slli a0, a0, 32
-; RV64I-NEXT:    srli a0, a0, 32
-; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    ret
-;
-; RV64ZBP-LABEL: pack_i64:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    pack a0, a0, a1
-; RV64ZBP-NEXT:    ret
-  %shl = and i64 %a, 4294967295
-  %shl1 = shl i64 %b, 32
-  %or = or i64 %shl1, %shl
-  ret i64 %or
-}
-
 define signext i32 @packu_i32(i32 signext %a, i32 signext %b) nounwind {
 ; RV64I-LABEL: packu_i32:
 ; RV64I:       # %bb.0:
@@ -2790,86 +2752,6 @@ define i64 @packu_i64(i64 %a, i64 %b) nounwind {
   ret i64 %or
 }
 
-define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind {
-; RV64I-LABEL: packh_i32:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    andi a0, a0, 255
-; RV64I-NEXT:    slli a1, a1, 56
-; RV64I-NEXT:    srli a1, a1, 48
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    ret
-;
-; RV64ZBP-LABEL: packh_i32:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    packh a0, a0, a1
-; RV64ZBP-NEXT:    ret
-  %and = and i32 %a, 255
-  %and1 = shl i32 %b, 8
-  %shl = and i32 %and1, 65280
-  %or = or i32 %shl, %and
-  ret i32 %or
-}
-
-define i32 @packh_i32_2(i32 %a, i32 %b) nounwind {
-; RV64I-LABEL: packh_i32_2:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    andi a0, a0, 255
-; RV64I-NEXT:    andi a1, a1, 255
-; RV64I-NEXT:    slli a1, a1, 8
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    ret
-;
-; RV64ZBP-LABEL: packh_i32_2:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    packh a0, a0, a1
-; RV64ZBP-NEXT:    ret
-  %and = and i32 %a, 255
-  %and1 = and i32 %b, 255
-  %shl = shl i32 %and1, 8
-  %or = or i32 %shl, %and
-  ret i32 %or
-}
-
-define i64 @packh_i64(i64 %a, i64 %b) nounwind {
-; RV64I-LABEL: packh_i64:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    andi a0, a0, 255
-; RV64I-NEXT:    slli a1, a1, 56
-; RV64I-NEXT:    srli a1, a1, 48
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    ret
-;
-; RV64ZBP-LABEL: packh_i64:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    packh a0, a0, a1
-; RV64ZBP-NEXT:    ret
-  %and = and i64 %a, 255
-  %and1 = shl i64 %b, 8
-  %shl = and i64 %and1, 65280
-  %or = or i64 %shl, %and
-  ret i64 %or
-}
-
-define i64 @packh_i64_2(i64 %a, i64 %b) nounwind {
-; RV64I-LABEL: packh_i64_2:
-; RV64I:       # %bb.0:
-; RV64I-NEXT:    andi a0, a0, 255
-; RV64I-NEXT:    andi a1, a1, 255
-; RV64I-NEXT:    slli a1, a1, 8
-; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    ret
-;
-; RV64ZBP-LABEL: packh_i64_2:
-; RV64ZBP:       # %bb.0:
-; RV64ZBP-NEXT:    packh a0, a0, a1
-; RV64ZBP-NEXT:    ret
-  %and = and i64 %a, 255
-  %and1 = and i64 %b, 255
-  %shl = shl i64 %and1, 8
-  %or = or i64 %shl, %and
-  ret i64 %or
-}
-
 define i32 @zexth_i32(i32 %a) nounwind {
 ; RV64I-LABEL: zexth_i32:
 ; RV64I:       # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rv64zknd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zknd-intrinsic.ll
new file mode 100644
index 0000000000000..230ae7134e457
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64zknd-intrinsic.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zknd -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZKND
+
+declare i64 @llvm.riscv.aes64ds(i64, i64);
+
+define i64 @aes64ds(i64 %a, i64 %b) nounwind {
+; RV64ZKND-LABEL: aes64ds
+; RV64ZKND: # %bb.0:
+; RV64ZKND-NEXT: aes64ds a0, a0, a1
+; RV64ZKND-NEXT: ret
+    %val = call i64 @llvm.riscv.aes64ds(i64 %a, i64 %b)
+    ret i64 %val
+}
+
+declare i64 @llvm.riscv.aes64dsm(i64, i64);
+
+define i64 @aes64dsm(i64 %a, i64 %b) nounwind {
+; RV64ZKND-LABEL: aes64dsm
+; RV64ZKND: # %bb.0:
+; RV64ZKND-NEXT: aes64dsm a0, a0, a1
+; RV64ZKND-NEXT: ret
+    %val = call i64 @llvm.riscv.aes64dsm(i64 %a, i64 %b)
+    ret i64 %val
+}
+
+declare i64 @llvm.riscv.aes64im(i64);
+
+define i64 @aes64im(i64 %a) nounwind {
+; RV64ZKND-LABEL: aes64im
+; RV64ZKND: # %bb.0:
+; RV64ZKND-NEXT: aes64im a0, a0
+; RV64ZKND-NEXT: ret
+    %val = call i64 @llvm.riscv.aes64im(i64 %a)
+    ret i64 %val
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zknd-zkne-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zknd-zkne-intrinsic.ll
new file mode 100644
index 0000000000000..23559b97410ee
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64zknd-zkne-intrinsic.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zknd -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZKND-ZKNE
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zkne -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZKND-ZKNE
+
+declare i64 @llvm.riscv.aes64ks2(i64, i64);
+
+define i64 @aes64ks2(i64 %a, i64 %b) nounwind {
+; RV64ZKND-ZKNE-LABEL: aes64ks2
+; RV64ZKND-ZKNE: # %bb.0:
+; RV64ZKND-ZKNE-NEXT: aes64ks2 a0, a0, a1
+; RV64ZKND-ZKNE-NEXT: ret
+    %val = call i64 @llvm.riscv.aes64ks2(i64 %a, i64 %b)
+    ret i64 %val
+}
+
+declare i64 @llvm.riscv.aes64ks1i(i64, i32);
+
+define i64 @aes64ks1i(i64 %a) nounwind {
+; RV64ZKND-ZKNE-LABEL: aes64ks1i
+; RV64ZKND-ZKNE: # %bb.0:
+; RV64ZKND-ZKNE-NEXT: aes64ks1i a0, a0, 10
+; RV64ZKND-ZKNE-NEXT: ret
+    %val = call i64 @llvm.riscv.aes64ks1i(i64 %a, i32 10)
+    ret i64 %val
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zkne-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zkne-intrinsic.ll
new file mode 100644
index 0000000000000..1697769d2b9d4
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64zkne-intrinsic.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zkne -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZKNE
+
+declare i64 @llvm.riscv.aes64es(i64, i64);
+
+define i64 @aes64es(i64 %a, i64 %b) nounwind {
+; RV64ZKNE-LABEL: aes64es
+; RV64ZKNE: # %bb.0:
+; RV64ZKNE-NEXT: aes64es a0, a0, a1
+; RV64ZKNE-NEXT: ret
+    %val = call i64 @llvm.riscv.aes64es(i64 %a, i64 %b)
+    ret i64 %val
+}
+
+declare i64 @llvm.riscv.aes64esm(i64, i64);
+
+define i64 @aes64esm(i64 %a, i64 %b) nounwind {
+; RV64ZKNE-LABEL: aes64esm
+; RV64ZKNE: # %bb.0:
+; RV64ZKNE-NEXT: aes64esm a0, a0, a1
+; RV64ZKNE-NEXT: ret
+    %val = call i64 @llvm.riscv.aes64esm(i64 %a, i64 %b)
+    ret i64 %val
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zknh-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zknh-intrinsic.ll
new file mode 100644
index 0000000000000..b77fe1c4cf7d2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64zknh-intrinsic.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zknh -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZKNH
+
+
+declare i64 @llvm.riscv.sha256sig0.i64(i64);
+
+define i64 @sha256sig0_i64(i64 %a) nounwind {
+; RV64ZKNH-LABEL: sha256sig0_i64
+; RV64ZKNH: # %bb.0:
+; RV64ZKNH-NEXT: sha256sig0 a0, a0
+; RV64ZKNH-NEXT: ret
+    %val = call i64 @llvm.riscv.sha256sig0.i64(i64 %a)
+    ret i64 %val
+}
+
+declare i64 @llvm.riscv.sha256sig1.i64(i64);
+
+define i64 @sha256sig1_i64(i64 %a) nounwind {
+; RV64ZKNH-LABEL: sha256sig1_i64
+; RV64ZKNH: # %bb.0:
+; RV64ZKNH-NEXT: sha256sig1 a0, a0
+; RV64ZKNH-NEXT: ret
+    %val = call i64 @llvm.riscv.sha256sig1.i64(i64 %a)
+    ret i64 %val
+}
+
+declare i64 @llvm.riscv.sha256sum0.i64(i64);
+
+define i64 @sha256sum0_i64(i64 %a) nounwind {
+; RV64ZKNH-LABEL: sha256sum0_i64
+; RV64ZKNH: # %bb.0:
+; RV64ZKNH-NEXT: sha256sum0 a0, a0
+; RV64ZKNH-NEXT: ret
+    %val = call i64 @llvm.riscv.sha256sum0.i64(i64 %a)
+    ret i64 %val
+}
+
+declare i64 @llvm.riscv.sha256sum1.i64(i64);
+
+define i64 @sha256sum1_i64(i64 %a) nounwind {
+; RV64ZKNH-LABEL: sha256sum1_i64
+; RV64ZKNH: # %bb.0:
+; RV64ZKNH-NEXT: sha256sum1 a0, a0
+; RV64ZKNH-NEXT: ret
+    %val = call i64 @llvm.riscv.sha256sum1.i64(i64 %a)
+    ret i64 %val
+}
+
+declare i64 @llvm.riscv.sha512sig0(i64);
+
+define i64 @sha512sig0(i64 %a) nounwind {
+; RV64ZKNH-LABEL: sha512sig0
+; RV64ZKNH: # %bb.0:
+; RV64ZKNH-NEXT: sha512sig0 a0, a0
+; RV64ZKNH-NEXT: ret
+    %val = call i64 @llvm.riscv.sha512sig0(i64 %a)
+    ret i64 %val
+}
+
+declare i64 @llvm.riscv.sha512sig1(i64);
+
+define i64 @sha512sig1(i64 %a) nounwind {
+; RV64ZKNH-LABEL: sha512sig1
+; RV64ZKNH: # %bb.0:
+; RV64ZKNH-NEXT: sha512sig1 a0, a0
+; RV64ZKNH-NEXT: ret
+    %val = call i64 @llvm.riscv.sha512sig1(i64 %a)
+    ret i64 %val
+}
+
+declare i64 @llvm.riscv.sha512sum0(i64);
+
+define i64 @sha512sum0(i64 %a) nounwind {
+; RV64ZKNH-LABEL: sha512sum0
+; RV64ZKNH: # %bb.0:
+; RV64ZKNH-NEXT: sha512sum0 a0, a0
+; RV64ZKNH-NEXT: ret
+    %val = call i64 @llvm.riscv.sha512sum0(i64 %a)
+    ret i64 %val
+}
+
+declare i64 @llvm.riscv.sha512sum1(i64);
+
+define i64 @sha512sum1(i64 %a) nounwind {
+; RV64ZKNH-LABEL: sha512sum1
+; RV64ZKNH: # %bb.0:
+; RV64ZKNH-NEXT: sha512sum1 a0, a0
+; RV64ZKNH-NEXT: ret
+    %val = call i64 @llvm.riscv.sha512sum1(i64 %a)
+    ret i64 %val
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zksed-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zksed-intrinsic.ll
new file mode 100644
index 0000000000000..2fa7601906067
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64zksed-intrinsic.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zksed -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZKSED
+
+declare i64 @llvm.riscv.sm4ks.i64(i64, i64, i8);
+
+define i64 @sm4ks_i64(i64 %a, i64 %b) nounwind {
+; RV64ZKSED-LABEL: sm4ks_i64:
+; RV64ZKSED:       # %bb.0:
+; RV64ZKSED-NEXT:    sm4ks a0, a0, a1, 0
+; RV64ZKSED-NEXT:    ret
+  %val = call i64 @llvm.riscv.sm4ks.i64(i64 %a, i64 %b, i8 0)
+  ret i64 %val
+}
+
+declare i64 @llvm.riscv.sm4ed.i64(i64, i64, i8);
+
+define i64 @sm4ed_i64(i64 %a, i64 %b) nounwind {
+; RV64ZKSED-LABEL: sm4ed_i64:
+; RV64ZKSED:       # %bb.0:
+; RV64ZKSED-NEXT:    sm4ed a0, a0, a1, 1
+; RV64ZKSED-NEXT:    ret
+  %val = call i64 @llvm.riscv.sm4ed.i64(i64 %a, i64 %b, i8 1)
+  ret i64 %val
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64zksh-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64zksh-intrinsic.ll
new file mode 100644
index 0000000000000..8790ec1af24dd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64zksh-intrinsic.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+zksh -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64ZKSH
+
+declare i64 @llvm.riscv.sm3p0.i64(i64);
+
+define i64 @sm3p0_i64(i64 %a) nounwind {
+; RV64ZKSH-LABEL: sm3p0_i64:
+; RV64ZKSH:       # %bb.0:
+; RV64ZKSH-NEXT:    sm3p0 a0, a0
+; RV64ZKSH-NEXT:    ret
+  %val = call i64 @llvm.riscv.sm3p0.i64(i64 %a)
+  ret i64 %val
+}
+
+declare i64 @llvm.riscv.sm3p1.i64(i64);
+
+define i64 @sm3p1_i64(i64 %a) nounwind {
+; RV64ZKSH-LABEL: sm3p1_i64:
+; RV64ZKSH:       # %bb.0:
+; RV64ZKSH-NEXT:    sm3p1 a0, a0
+; RV64ZKSH-NEXT:    ret
+  %val = call i64 @llvm.riscv.sm3p1.i64(i64 %a)
+  ret i64 %val
+}

From 4ad517e6b0902ced1aeb179cabc9129a2007eca4 Mon Sep 17 00:00:00 2001
From: Zi Xuan Wu <zixuan.wu@linux.alibaba.com>
Date: Thu, 27 Jan 2022 14:49:43 +0800
Subject: [PATCH 854/946] [CSKY] Add floating operation support including float
 and double

CSKY arch has multiple FPU instruction versions such as FPU, FPUv2 and FPUv3 to implement floating operations.
For now, we just only support FPUv2 and FPUv3.

It includes the encoding, asm parsing of instructions and codegen of DAG nodes.
---
 .../Target/CSKY/AsmParser/CSKYAsmParser.cpp   |    8 +
 llvm/lib/Target/CSKY/CSKY.td                  |   34 +
 .../Target/CSKY/CSKYConstantIslandPass.cpp    |    5 +
 llvm/lib/Target/CSKY/CSKYISelLowering.cpp     |   46 +
 llvm/lib/Target/CSKY/CSKYInstrFormatsF1.td    |  274 +++
 llvm/lib/Target/CSKY/CSKYInstrFormatsF2.td    |  208 ++
 llvm/lib/Target/CSKY/CSKYInstrInfo.cpp        |   64 +-
 llvm/lib/Target/CSKY/CSKYInstrInfo.h          |    5 +
 llvm/lib/Target/CSKY/CSKYInstrInfo.td         |    2 +
 llvm/lib/Target/CSKY/CSKYInstrInfoF1.td       |  420 ++++
 llvm/lib/Target/CSKY/CSKYInstrInfoF2.td       |  462 ++++
 llvm/lib/Target/CSKY/CSKYRegisterInfo.td      |    2 +
 .../CSKY/MCTargetDesc/CSKYInstPrinter.cpp     |   15 +
 .../CSKY/MCTargetDesc/CSKYInstPrinter.h       |    4 +
 llvm/test/CodeGen/CSKY/br.ll                  |   18 +-
 llvm/test/CodeGen/CSKY/fpu/base-d.ll          |  234 ++
 llvm/test/CodeGen/CSKY/fpu/base-f.ll          |  269 +++
 llvm/test/CodeGen/CSKY/fpu/br-d.ll            | 2061 +++++++++++++++++
 llvm/test/CodeGen/CSKY/fpu/br-f.ll            | 1751 ++++++++++++++
 llvm/test/CodeGen/CSKY/fpu/cmp-d.ll           |  766 ++++++
 llvm/test/CodeGen/CSKY/fpu/cmp-f.ll           |  896 +++++++
 llvm/test/CodeGen/CSKY/fpu/cvt-d.ll           |  371 +++
 llvm/test/CodeGen/CSKY/fpu/cvt-f.ll           |  334 +++
 llvm/test/CodeGen/CSKY/fpu/ldst-d.ll          |  106 +
 llvm/test/CodeGen/CSKY/fpu/ldst-f.ll          |   83 +
 llvm/test/CodeGen/CSKY/fpu/lit.local.cfg      |    2 +
 llvm/test/MC/CSKY/fpuv2.s                     |   31 +
 llvm/test/MC/CSKY/fpuv3.s                     |   27 +
 28 files changed, 8490 insertions(+), 8 deletions(-)
 create mode 100644 llvm/lib/Target/CSKY/CSKYInstrFormatsF1.td
 create mode 100644 llvm/lib/Target/CSKY/CSKYInstrFormatsF2.td
 create mode 100644 llvm/lib/Target/CSKY/CSKYInstrInfoF1.td
 create mode 100644 llvm/lib/Target/CSKY/CSKYInstrInfoF2.td
 create mode 100644 llvm/test/CodeGen/CSKY/fpu/base-d.ll
 create mode 100644 llvm/test/CodeGen/CSKY/fpu/base-f.ll
 create mode 100644 llvm/test/CodeGen/CSKY/fpu/br-d.ll
 create mode 100644 llvm/test/CodeGen/CSKY/fpu/br-f.ll
 create mode 100644 llvm/test/CodeGen/CSKY/fpu/cmp-d.ll
 create mode 100644 llvm/test/CodeGen/CSKY/fpu/cmp-f.ll
 create mode 100644 llvm/test/CodeGen/CSKY/fpu/cvt-d.ll
 create mode 100644 llvm/test/CodeGen/CSKY/fpu/cvt-f.ll
 create mode 100644 llvm/test/CodeGen/CSKY/fpu/ldst-d.ll
 create mode 100644 llvm/test/CodeGen/CSKY/fpu/ldst-f.ll
 create mode 100644 llvm/test/CodeGen/CSKY/fpu/lit.local.cfg
 create mode 100644 llvm/test/MC/CSKY/fpuv2.s
 create mode 100644 llvm/test/MC/CSKY/fpuv3.s

diff --git a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
index 29b99a84a6cd3..a62bd111cba95 100644
--- a/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
+++ b/llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp
@@ -303,6 +303,14 @@ struct CSKYOperand : public MCParsedAsmOperand {
 
   bool isRegSeq() const { return isRegSeqTemplate<CSKY::R0, CSKY::R31>(); }
 
+  bool isRegSeqV1() const {
+    return isRegSeqTemplate<CSKY::F0_32, CSKY::F15_32>();
+  }
+
+  bool isRegSeqV2() const {
+    return isRegSeqTemplate<CSKY::F0_32, CSKY::F31_32>();
+  }
+
   static bool isLegalRegList(unsigned from, unsigned to) {
     if (from == 0 && to == 0)
       return true;
diff --git a/llvm/lib/Target/CSKY/CSKY.td b/llvm/lib/Target/CSKY/CSKY.td
index e26781ca6aa1d..ddb7fe93706eb 100644
--- a/llvm/lib/Target/CSKY/CSKY.td
+++ b/llvm/lib/Target/CSKY/CSKY.td
@@ -11,6 +11,40 @@ include "llvm/Target/Target.td"
 //===----------------------------------------------------------------------===//
 // CSKY subtarget features and instruction predicates.
 //===----------------------------------------------------------------------===//
+def ModeHardFloat :
+  SubtargetFeature<"hard-float", "UseHardFloat",
+                   "true", "Use hard floating point features">;
+def ModeHardFloatABI :
+  SubtargetFeature<"hard-float-abi", "UseHardFloatABI",
+                   "true", "Use hard floating point ABI to pass args">;
+
+def FeatureFPUV2_SF
+    : SubtargetFeature<"fpuv2_sf", "HasFPUv2SingleFloat", "true",
+                       "Enable FPUv2 single float instructions">;
+def HasFPUv2_SF : Predicate<"Subtarget->hasFPUv2SingleFloat()">,
+                  AssemblerPredicate<(all_of FeatureFPUV2_SF),
+                  "Enable FPUv2 single float instructions">;
+
+def FeatureFPUV2_DF
+    : SubtargetFeature<"fpuv2_df", "HasFPUv2DoubleFloat", "true",
+                       "Enable FPUv2 double float instructions">;
+def HasFPUv2_DF : Predicate<"Subtarget->hasFPUv2DoubleFloat()">,
+                  AssemblerPredicate<(all_of FeatureFPUV2_DF),
+                  "Enable FPUv2 double float instructions">;
+
+def FeatureFPUV3_SF
+    : SubtargetFeature<"fpuv3_sf", "HasFPUv3SingleFloat", "true",
+                       "Enable FPUv3 single float instructions">;
+def HasFPUv3_SF : Predicate<"Subtarget->hasFPUv3SingleFloat()">,
+                  AssemblerPredicate<(all_of FeatureFPUV3_SF),
+                  "Enable FPUv3 single float instructions">;
+
+def FeatureFPUV3_DF
+    : SubtargetFeature<"fpuv3_df", "HasFPUv3DoubleFloat", "true",
+                       "Enable FPUv3 double float instructions">;
+def HasFPUv3_DF : Predicate<"Subtarget->hasFPUv3DoubleFloat()">,
+                  AssemblerPredicate<(all_of FeatureFPUV3_DF),
+                  "Enable FPUv3 double float instructions">;
 
 def FeatureBTST16 : SubtargetFeature<"btst16", "HasBTST16", "true",
                                      "Use the 16-bit btsti instruction">;
diff --git a/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp b/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp
index 13e00deee5898..3ac335e2ad9d5 100644
--- a/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp
+++ b/llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp
@@ -551,6 +551,11 @@ void CSKYConstantIslands::initializeFunctionInfo(
             Bits = 16;
             Scale = 4;
             break;
+          case CSKY::f2FLRW_S:
+          case CSKY::f2FLRW_D:
+            Bits = 8;
+            Scale = 4;
+            break;
           case CSKY::GRS32:
             Bits = 17;
             Scale = 2;
diff --git a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
index c4d5d687216d1..0b589e3d3e4f7 100644
--- a/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
+++ b/llvm/lib/Target/CSKY/CSKYISelLowering.cpp
@@ -38,6 +38,18 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
   // Register Class
   addRegisterClass(MVT::i32, &CSKY::GPRRegClass);
 
+  if (STI.useHardFloat()) {
+    if (STI.hasFPUv2SingleFloat())
+      addRegisterClass(MVT::f32, &CSKY::sFPR32RegClass);
+    else if (STI.hasFPUv3SingleFloat())
+      addRegisterClass(MVT::f32, &CSKY::FPR32RegClass);
+
+    if (STI.hasFPUv2DoubleFloat())
+      addRegisterClass(MVT::f64, &CSKY::sFPR64RegClass);
+    else if (STI.hasFPUv3DoubleFloat())
+      addRegisterClass(MVT::f64, &CSKY::FPR64RegClass);
+  }
+
   setOperationAction(ISD::ADDCARRY, MVT::i32, Legal);
   setOperationAction(ISD::SUBCARRY, MVT::i32, Legal);
   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
@@ -95,6 +107,40 @@ CSKYTargetLowering::CSKYTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
   }
 
+  // Float
+
+  ISD::CondCode FPCCToExtend[] = {
+      ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
+      ISD::SETUGE, ISD::SETULT, ISD::SETULE,
+  };
+
+  ISD::NodeType FPOpToExpand[] = {ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
+                                  ISD::FPOW, ISD::FREM, ISD::FCOPYSIGN};
+
+  if (STI.useHardFloat()) {
+
+    MVT AllVTy[] = {MVT::f32, MVT::f64};
+
+    for (auto VT : AllVTy) {
+      setOperationAction(ISD::FREM, VT, Expand);
+      setOperationAction(ISD::SELECT_CC, VT, Expand);
+      setOperationAction(ISD::BR_CC, VT, Expand);
+
+      for (auto CC : FPCCToExtend)
+        setCondCodeAction(CC, VT, Expand);
+      for (auto Op : FPOpToExpand)
+        setOperationAction(Op, VT, Expand);
+    }
+
+    if (STI.hasFPUv2SingleFloat() || STI.hasFPUv3SingleFloat()) {
+      setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+    }
+    if (STI.hasFPUv2DoubleFloat() || STI.hasFPUv3DoubleFloat()) {
+      setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
+      setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+    }
+  }
+
   // Compute derived properties from the register classes.
   computeRegisterProperties(STI.getRegisterInfo());
 
diff --git a/llvm/lib/Target/CSKY/CSKYInstrFormatsF1.td b/llvm/lib/Target/CSKY/CSKYInstrFormatsF1.td
new file mode 100644
index 0000000000000..446670a4d0a97
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrFormatsF1.td
@@ -0,0 +1,274 @@
+//===- CSKYInstrFormatsF1.td - CSKY Float1.0 Instr Format --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// CSKY Instruction Format Float1.0 Definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class CSKYFP1Inst<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : CSKY32Inst<AddrModeNone, 0x3d, outs, ins, asmstr, pattern>, Requires<[HasFPUv2_SF]> {
+}
+
+class F_XYZ_BASE<bits<5> datatype, bits<6> sop, dag outs, dag ins, string opcodestr, list<dag> pattern>
+  : CSKYFP1Inst<outs, ins, opcodestr, pattern> {
+  bits<4> vrx;
+  bits<4> vry;
+  bits<4> vrz;
+  let Inst{25 - 21} = {0, vry};
+  let Inst{20 - 16} = {0, vrx};
+  let Inst{15 - 11} = datatype;
+  let Inst{10 - 5} = sop;
+  let Inst{4 - 0} = {0, vrz};
+}
+
+class F_XZ_GF<bits<5> datatype, bits<6> sop, dag outs, dag ins, string opcodestr, list<dag> pattern>
+  : CSKYFP1Inst<outs, ins, opcodestr, pattern> {
+  bits<4> vrx;
+  bits<5> rz;
+  let Inst{25 - 21} = 0;
+  let Inst{20 - 16} = {0, vrx};
+  let Inst{15 - 11} = datatype;
+  let Inst{10 - 5} = sop;
+  let Inst{4 - 0} = {rz};
+}
+
+class F_XZ_FG<bits<5> datatype, bits<6> sop, dag outs, dag ins, string opcodestr, list<dag> pattern>
+  : CSKYFP1Inst<outs, ins, opcodestr, pattern> {
+  bits<5> rx;
+  bits<4> vrz;
+  let Inst{25 - 21} = 0;
+  let Inst{20 - 16} = {rx};
+  let Inst{15 - 11} = datatype;
+  let Inst{10 - 5} = sop;
+  let Inst{4 - 0} = {0, vrz};
+}
+
+class F_XZ_TRANS_FROM<bits<6> sop, string op, RegisterOperand regtype1, RegisterOperand regtype2>
+  : F_XZ_GF<3, sop, (outs regtype1:$rz), (ins regtype2:$vrx), !strconcat(op, "\t$rz, $vrx"),
+  []>;
+
+class F_XZ_TRANS_TO<bits<6> sop, string op, RegisterOperand regtype1, RegisterOperand regtype2>
+  : F_XZ_FG<3, sop, (outs regtype1:$vrz), (ins regtype2:$rx), !strconcat(op, "\t$vrz, $rx"),
+  []>;
+
+let vry = 0 in {
+class F_XZ<bits<5> datatype, bits<6> sop, string op, string op_su, PatFrag opnode, RegisterOperand regtype>
+  : F_XYZ_BASE<datatype, sop, (outs regtype:$vrz), (ins regtype:$vrx), !strconcat(op#op_su, "\t$vrz, $vrx"),
+  [(set regtype:$vrz, (opnode regtype:$vrx))]>;
+
+class F_MOV<bits<5> datatype, bits<6> sop, string op, string op_su, RegisterOperand regtype>
+  : F_XYZ_BASE<datatype, sop, (outs regtype:$vrz), (ins regtype:$vrx), !strconcat(op#op_su, "\t$vrz, $vrx"),
+  []>;
+
+class F_XZ_TRANS<bits<6> sop, string op, RegisterOperand regtype1, RegisterOperand regtype2>
+  : F_XYZ_BASE<3, sop, (outs regtype1:$vrz), (ins regtype2:$vrx), !strconcat(op, "\t$vrz, $vrx"),
+  []>;
+
+class F_XZ_TRANS_DS<bits<6> sop, string op, PatFrag opnode>
+  : F_XYZ_BASE<3, sop, (outs sFPR32Op:$vrz), (ins sFPR64Op:$vrx), !strconcat(op, "\t$vrz, $vrx"),
+  [(set sFPR32Op:$vrz, (opnode sFPR64Op:$vrx))]>;
+
+class F_XZ_TRANS_SD<bits<6> sop, string op, PatFrag opnode>
+  : F_XYZ_BASE<3, sop, (outs sFPR64Op:$vrz), (ins sFPR32Op:$vrx), !strconcat(op, "\t$vrz, $vrx"),
+  [(set sFPR64Op:$vrz, (opnode sFPR32Op:$vrx))]>;
+}
+
+multiclass FT_MOV<bits<6> sop, string op> {
+  def _S :  F_MOV<0, sop, op, "s", sFPR32Op>;
+  let Predicates = [HasFPUv2_DF] in
+  def _D :  F_MOV<1, sop, op, "d", sFPR64Op>;
+}
+
+multiclass FT_XZ<bits<6> sop, string op, PatFrag opnode> {
+  def _S :  F_XZ<0, sop, op, "s", opnode, sFPR32Op>;
+  let Predicates = [HasFPUv2_DF] in
+  def _D :  F_XZ<1, sop, op, "d", opnode, sFPR64Op>;
+}
+
+let vrz = 0, isCompare = 1 in {
+class F_CMPXY<bits<5> datatype, bits<6> sop, string op, string op_su, RegisterOperand regtype>
+  : F_XYZ_BASE<datatype, sop, (outs CARRY:$ca), (ins regtype:$vrx, regtype:$vry), !strconcat(op#op_su, "\t$vrx, $vry"),
+  []>;
+
+let vry = 0 in{
+class F_CMPZX<bits<5> datatype, bits<6> sop, string op, string op_su, RegisterOperand regtype>
+  : F_XYZ_BASE<datatype, sop, (outs CARRY:$ca), (ins regtype:$vrx), !strconcat(op#op_su, "\t$vrx"),
+  []>;
+}
+}
+
+class F_XYZ<bits<5> datatype, bits<6> sop, string op, string op_su, PatFrag opnode, RegisterOperand regtype>
+  : F_XYZ_BASE<datatype, sop, (outs regtype:$vrz), (ins regtype:$vrx, regtype:$vry),
+    !strconcat(op#op_su, "\t$vrz, $vrx, $vry"),
+  [(set regtype:$vrz, (opnode regtype:$vrx, regtype:$vry))]>;
+
+multiclass FT_XYZ<bits<6> sop, string op, PatFrag opnode> {
+  def _S :  F_XYZ<0, sop, op, "s", opnode, sFPR32Op>;
+  let Predicates = [HasFPUv2_DF] in
+  def _D :  F_XYZ<1, sop, op, "d", opnode, sFPR64Op>;
+}
+
+let Constraints = "$vrt = $vrz" in {
+class F_ACCUM_XYZ<bits<5> datatype, bits<6> sop, string op, string op_su, PatFrag opnode, RegisterOperand regtype>
+  : F_XYZ_BASE<datatype, sop, (outs regtype:$vrz), (ins regtype:$vrt, regtype:$vrx, regtype:$vry),
+    !strconcat(op#op_su, "\t$vrz, $vrx, $vry"),
+  [(set regtype:$vrz, (opnode regtype:$vrt, regtype:$vrx, regtype:$vry))]>;
+}
+
+multiclass FT_ACCUM_XYZ<bits<6> sop, string op, PatFrag opnode> {
+  def _S :  F_ACCUM_XYZ<0, sop, op, "s", opnode, sFPR32Op>;
+  let Predicates = [HasFPUv2_DF] in
+  def _D :  F_ACCUM_XYZ<1, sop, op, "d", opnode, sFPR64Op>;
+}
+
+multiclass FT_CMPXY<bits<6> sop, string op> {
+  def _S :  F_CMPXY<0, sop, op, "s", sFPR32Op>;
+  let Predicates = [HasFPUv2_DF] in
+  def _D :  F_CMPXY<1, sop, op, "d", sFPR64Op>;
+}
+
+
+multiclass FT_CMPZX<bits<6> sop, string op> {
+  def _S :  F_CMPZX<0, sop, op, "s", sFPR32Op>;
+  let Predicates = [HasFPUv2_DF] in
+  def _D :  F_CMPZX<1, sop, op, "d", sFPR64Op>;
+}
+
+class F_I8_XY_MEM<bits<7> sop, bits<1> sop_su, dag outs, dag ins, string opcodestr, list<dag> pattern>
+  : CSKY32Inst<AddrMode32SDF, 0x3d, outs, ins, opcodestr, pattern> {
+  bits<5> rx;
+  bits<4> vrz;
+  bits<8> imm8;
+  let Inst{25} = 0;
+  let Inst{24 - 21} = imm8{7 - 4};  //imm4h
+  let Inst{20 - 16} = rx;  //rx
+  let Inst{15 - 9} = sop;
+  let Inst{8} = sop_su;
+  let Inst{7 - 4} = imm8{3 - 0}; // imm4l
+  let Inst{3 - 0} = vrz;
+}
+
+class F_I4_XY_MEM<bits<7> sop, bits<1> sop_su, dag outs, dag ins, string opcodestr, list<dag> pattern>
+  : CSKY32Inst<AddrMode32SDF, 0x3d, outs, ins, opcodestr, pattern> {
+  bits<10> regs;
+  bits<5> rx;
+
+  let Inst{25} = 0;
+  let Inst{24 - 21} = regs{3-0};  //imm4
+  let Inst{20 - 16} = rx;  //rx
+  let Inst{15 - 9} = sop;
+  let Inst{8} = sop_su;
+  let Inst{7 - 4} = 0;
+  let Inst{3 - 0} = regs{8-5};
+}
+
+class F_I8_Z_MEM<bits<7> sop, bits<1> sop_su, dag outs, dag ins, string opcodestr, list<dag> pattern>
+  : CSKY32Inst<AddrModeNone, 0x3d, outs, ins, opcodestr, pattern> {
+  bits<4> vrz;
+  bits<8> imm8;
+  let Inst{25} = 0;
+  let Inst{24 - 21} = imm8{7 - 4};  //imm4h
+  let Inst{20 - 16} = 0;  //rx
+  let Inst{15 - 9} = sop;
+  let Inst{8} = sop_su;
+  let Inst{7 - 4} = imm8{3 - 0}; // imm4l
+  let Inst{3 - 0} = vrz;
+}
+
+class F_XYZ_MEM<bits<7> sop, bits<1> sop_su, dag outs, dag ins, string opcodestr, list<dag> pattern>
+  : CSKY32Inst<AddrModeNone, 0x3d, outs, ins, opcodestr, pattern> {
+  bits<5> rx;
+  bits<5> ry;
+  bits<4> vrz;
+  bits<2> imm;
+
+  let Inst{25 - 21} = ry;  // ry;
+  let Inst{20 - 16} = rx;  // rx;
+  let Inst{15 - 9} = sop;
+  let Inst{8} = sop_su;
+  let Inst{7} = 0;
+  let Inst{6,5} = imm;  // shift;
+  let Inst{4} = 0;
+  let Inst{3 - 0} = vrz;
+}
+
+class F_XYAI_LD<bits<7> sop, bits<1> sop_su, string op, string op_su,
+                 RegisterOperand regtype, Operand operand>
+  : F_I8_XY_MEM<sop, sop_su, (outs regtype:$vrz), (ins GPR:$rx, operand:$imm8),
+    !strconcat(op#op_su, "\t$vrz, ($rx, ${imm8})"), []>;
+
+class F_XYAR_LD<bits<7> sop, bits<1> sop_su, string op, string op_su,
+                 RegisterOperand regtype>
+  : F_XYZ_MEM<sop, sop_su, (outs regtype:$vrz), (ins GPR:$rx, GPR:$ry, uimm2:$imm),
+    op#op_su#"\t$vrz, ($rx, $ry << ${imm})", []>;
+
+class F_XYAI_ST<bits<7> sop, bits<1> sop_su, string op, string op_su,
+                 RegisterOperand regtype, Operand operand>
+  : F_I8_XY_MEM<sop, sop_su, (outs), (ins regtype:$vrz, GPR:$rx, operand:$imm8),
+    !strconcat(op#op_su, "\t$vrz, ($rx, ${imm8})"), []>;
+
+class F_XYAR_ST<bits<7> sop, bits<1> sop_su, string op, string op_su,
+                 RegisterOperand regtype>
+  : F_XYZ_MEM<sop, sop_su, (outs), (ins regtype:$vrz, GPR:$rx, GPR:$ry, uimm2:$imm),
+    op#op_su#"\t$vrz, ($rx, $ry << ${imm})", []>;
+
+def Mem8SL2 : Operand<iPTR>, ComplexPattern<iPTR, 2, "SelectAddrRegImm8", []> {
+  let MIOperandInfo = (ops GPR, i32imm);
+  let PrintMethod = "printAddrModeRegImmOperand";
+  let EncoderMethod = "getAddrModeFloatImm8_sl2OpValue";
+}
+
+def FRRS : Operand<iPTR>, ComplexPattern<iPTR, 3, "SelectAddrRegReg", []> {
+  let MIOperandInfo = (ops GPR, GPR, i32imm);
+  let PrintMethod = "printAddrModeRegRegSLOperand";
+  let EncoderMethod = "getAddrModeFloatRegRegSLOpValue";
+}
+
+multiclass FT_XYAI_LD<bits<7> sop, string op> {
+  def _S :  F_XYAI_LD<sop, 0, op, "s", sFPR32Op, uimm8_2>;
+  let Predicates = [HasFPUv2_DF] in
+  def _D :  F_XYAI_LD<sop, 1, op, "d", sFPR64Op, uimm8_2>;
+}
+
+multiclass FT_XYAR_LD<bits<7> sop, string op> {
+  def _S :  F_XYAR_LD<sop, 0, op, "s", sFPR32Op>;
+  let Predicates = [HasFPUv2_DF] in
+  def _D :  F_XYAR_LD<sop, 1, op, "d", sFPR64Op>;
+}
+
+multiclass FT_XYAI_ST<bits<7> sop, string op> {
+  def _S :  F_XYAI_ST<sop, 0, op, "s", sFPR32Op, uimm8_2>;
+  let Predicates = [HasFPUv2_DF] in
+  def _D :  F_XYAI_ST<sop, 1, op, "d", sFPR64Op, uimm8_2>;
+}
+
+multiclass FT_XYAR_ST<bits<7> sop, string op> {
+  def _S :  F_XYAR_ST<sop, 0, op, "s", sFPR32Op>;
+  let Predicates = [HasFPUv2_DF] in
+  def _D :  F_XYAR_ST<sop, 1, op, "d", sFPR64Op>;
+}
+
+multiclass FT_XYAR_STM<bits<7> sop, string op> {
+  def _S :  F_I4_XY_MEM<sop, 0, (outs),
+    (ins GPR:$rx, regseq_f1:$regs, variable_ops),
+      !strconcat(op#"s", "\t$regs, (${rx})"), []>;
+  let Predicates = [HasFPUv2_DF] in
+  def _D :  F_I4_XY_MEM<sop, 1, (outs),
+    (ins GPR:$rx, regseq_d1:$regs, variable_ops),
+      !strconcat(op#"d", "\t$regs, (${rx})"), []>;
+}
+
+multiclass FT_XYAR_LDM<bits<7> sop, string op> {
+  def _S :  F_I4_XY_MEM<sop, 0, (outs),
+    (ins GPR:$rx, regseq_f1:$regs, variable_ops),
+      !strconcat(op#"s", "\t$regs, (${rx})"), []>;
+  let Predicates = [HasFPUv2_DF] in
+  def _D :  F_I4_XY_MEM<sop, 1, (outs),
+    (ins GPR:$rx, regseq_d1:$regs, variable_ops),
+      !strconcat(op#"d", "\t$regs, (${rx})"), []>;
+}
diff --git a/llvm/lib/Target/CSKY/CSKYInstrFormatsF2.td b/llvm/lib/Target/CSKY/CSKYInstrFormatsF2.td
new file mode 100644
index 0000000000000..641ad623f140e
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrFormatsF2.td
@@ -0,0 +1,208 @@
+//===- CSKYInstrFormatsF2.td - CSKY Float2.0 Instr Format --*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// CSKY Instruction Format Float2.0 Definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class CSKYInstF2<AddrMode am, dag outs, dag ins, string opcodestr,
+                 list<dag> pattern>
+    : CSKY32Inst<am, 0x3d, outs, ins, opcodestr, pattern> {
+  let Predicates = [HasFPUv3_SF];
+  let DecoderNamespace = "FPUV3";
+}
+
+class F2_XYZ<bits<5> datatype, bits<6> sop, string opcodestr, dag outs, dag ins,
+             list<dag> pattern>
+    : CSKYInstF2<AddrModeNone, outs, ins, opcodestr, pattern> {
+  bits<5> vry;
+  bits<5> vrx;
+  bits<5> vrz;
+
+  let Inst{25-21} = vry;
+  let Inst{20-16} = vrx;
+  let Inst{15-11} = datatype;
+  let Inst{10-5} = sop;
+  let Inst{4-0} = vrz;
+}
+
+multiclass F2_XYZ_T<bits<6> sop, string op, PatFrag opnode> {
+  def _S : F2_XYZ<0b00000, sop, op#".32"#"\t$vrz, $vrx, $vry",
+             (outs FPR32Op:$vrz), (ins FPR32Op:$vrx, FPR32Op:$vry),
+             [(set FPR32Op:$vrz, (opnode FPR32Op:$vrx, FPR32Op:$vry))]>;
+  let Predicates = [HasFPUv3_DF] in
+  def _D : F2_XYZ<0b00001, sop, op#".64"#"\t$vrz, $vrx, $vry",
+             (outs FPR64Op:$vrz), (ins FPR64Op:$vrx, FPR64Op:$vry),
+             [(set FPR64Op:$vrz, (opnode FPR64Op:$vrx, FPR64Op:$vry))]>;
+}
+
+let Constraints = "$vrZ = $vrz" in
+multiclass F2_XYZZ_T<bits<6> sop, string op, PatFrag opnode> {
+  def _S : F2_XYZ<0b00000, sop, op#".32"#"\t$vrz, $vrx, $vry",
+                  (outs FPR32Op:$vrz), (ins FPR32Op:$vrZ, FPR32Op:$vrx, FPR32Op:$vry),
+                  [(set FPR32Op:$vrz, (opnode FPR32Op:$vrx, FPR32Op:$vry, FPR32Op:$vrZ))]>;
+  let Predicates = [HasFPUv3_DF] in
+  def _D : F2_XYZ<0b00001, sop, op#".64"#"\t$vrz, $vrx, $vry",
+                  (outs FPR64Op:$vrz), (ins FPR64Op:$vrZ, FPR64Op:$vrx, FPR64Op:$vry),
+                  [(set FPR64Op:$vrz, (opnode FPR64Op:$vrx, FPR64Op:$vry, FPR64Op:$vrZ))]>;
+}
+
+let vry = 0 in {
+class F2_XZ<bits<5> datatype, RegisterOperand regtype, bits<6> sop, string op, SDNode opnode>
+    : F2_XYZ<datatype, sop, !strconcat(op, "\t$vrz, $vrx"),
+             (outs regtype:$vrz), (ins regtype:$vrx),
+             [(set regtype:$vrz, (opnode regtype:$vrx))]>;
+
+class F2_XZ_SET<bits<5> datatype, RegisterOperand regtype, bits<6> sop, string op>
+    : F2_XYZ<datatype, sop, !strconcat(op, "\t$vrz, $vrx"),
+             (outs regtype:$vrz), (ins regtype:$vrx),
+             []>;
+
+class F2_XZ_P<bits<5> datatype, bits<6> sop, string op, list<dag> pattern = [],
+              dag outs, dag ins>
+    : F2_XYZ<datatype, sop, op#"\t$vrz, $vrx", outs, ins, pattern>;
+}
+
+multiclass F2_XZ_RM<bits<5> datatype, bits<4> sop, string op, dag outs, dag ins> {
+  def _RN  : F2_XZ_P<datatype, {sop, 0b00}, op#".rn", [], outs, ins>;
+  def _RZ  : F2_XZ_P<datatype, {sop, 0b01}, op#".rz", [], outs, ins>;
+  def _RPI : F2_XZ_P<datatype, {sop, 0b10}, op#".rpi", [], outs, ins>;
+  def _RNI : F2_XZ_P<datatype, {sop, 0b11}, op#".rni", [], outs, ins>;
+}
+
+multiclass F2_XZ_T<bits<6> sop, string op, SDNode opnode> {
+  def _S : F2_XZ<0b00000, FPR32Op, sop, op#".32", opnode>;
+  let Predicates = [HasFPUv3_DF] in
+  def _D : F2_XZ<0b00001, FPR64Op, sop, op#".64", opnode>;
+}
+
+multiclass F2_XZ_SET_T<bits<6> sop, string op, string suffix = ""> {
+  def _S : F2_XZ_SET<0b00000, FPR32Op, sop, op#".32"#suffix>;
+  let Predicates = [HasFPUv3_DF] in
+  def _D : F2_XZ_SET<0b00001, FPR64Op, sop, op#".64"#suffix>;
+}
+
+
+let vrz = 0, isCompare = 1 in
+class F2_CXY<bits<5> datatype, RegisterOperand regtype, bits<6> sop, string op>
+    : F2_XYZ<datatype, sop, !strconcat(op, "\t$vrx, $vry"),
+             (outs CARRY:$ca), (ins regtype:$vrx, regtype:$vry),
+             []>;
+
+multiclass F2_CXY_T<bits<6> sop, string op> {
+  def _S : F2_CXY<0b00000, FPR32Op, sop, op#".32">;
+  let Predicates = [HasFPUv3_DF] in
+  def _D : F2_CXY<0b00001, FPR64Op, sop, op#".64">;
+}
+
+
+let vrz = 0, vry = 0, isCompare = 1 in
+class F2_CX<bits<5> datatype, RegisterOperand regtype, bits<6> sop, string op>
+    : F2_XYZ<datatype, sop, !strconcat(op, "\t$vrx"),
+             (outs CARRY:$ca), (ins regtype:$vrx),
+             []>;
+
+multiclass F2_CX_T<bits<6> sop, string op> {
+  def _S : F2_CX<0b00000, FPR32Op, sop, op#".32">;
+  let Predicates = [HasFPUv3_DF] in
+  def _D : F2_CX<0b00001, FPR64Op, sop, op#".64">;
+}
+
+
+class F2_LDST<bits<2> datatype, bits<1> sop, string op, dag outs, dag ins>
+    : CSKYInstF2<AddrMode32SDF, outs, ins,
+                 !strconcat(op, "\t$vrz, ($rx, ${imm8})"), []> {
+  bits<10> imm8;
+  bits<5> rx;
+  bits<5> vrz;
+
+  let Inst{25} = vrz{4};
+  let Inst{24-21} = imm8{7-4};
+  let Inst{20-16} = rx;
+  let Inst{15-11} = 0b00100;
+  let Inst{10} = sop;
+  let Inst{9-8} = datatype;
+  let Inst{7-4} = imm8{3-0};
+  let Inst{3-0} = vrz{3-0};
+}
+
+class F2_LDST_S<bits<1> sop, string op, dag outs, dag ins>
+    : F2_LDST<0b00, sop, op#".32", outs, ins>;
+class F2_LDST_D<bits<1> sop, string op, dag outs, dag ins>
+    : F2_LDST<0b01, sop, op#".64", outs, ins>;
+
+class F2_LDSTM<bits<2> datatype, bits<1> sop, bits<3> sop2, string op, dag outs, dag ins>
+    : CSKYInstF2<AddrMode32SDF, outs, ins,
+                 !strconcat(op, "\t$regs, (${rx})"), []> {
+  bits<10> regs;
+  bits<5> rx;
+
+  let Inst{25-21} = regs{4-0};
+  let Inst{20-16} = rx;
+  let Inst{15-11} = 0b00110;
+  let Inst{10} = sop;
+  let Inst{9-8} = datatype;
+  let Inst{7-5} = sop2;
+  let Inst{4-0} = regs{9-5};
+}
+
+class F2_LDSTM_S<bits<1> sop, bits<3> sop2, string op, dag outs, dag ins>
+    : F2_LDSTM<0b00, sop, sop2, op#".32", outs, ins>;
+class F2_LDSTM_D<bits<1> sop, bits<3> sop2, string op, dag outs, dag ins>
+    : F2_LDSTM<0b01, sop, sop2, op#".64", outs, ins>;
+
+
+class F2_LDSTR<bits<2> datatype, bits<1> sop, string op, dag outs, dag ins>
+    : CSKYInstF2<AddrModeNone, outs, ins,
+                 op#"\t$rz, ($rx, $ry << ${imm})", []> {
+  bits<5> rx;
+  bits<5> ry;
+  bits<5> rz;
+  bits<2> imm;
+
+  let Inst{25-21} = ry;
+  let Inst{20-16} = rx;
+  let Inst{15-11} = 0b00101;
+  let Inst{10} = sop;
+  let Inst{9-8} = datatype;
+  let Inst{7} = 0;
+  let Inst{6-5} = imm;
+  let Inst{4-0} = rz;
+}
+
+class F2_LDSTR_S<bits<1> sop, string op, dag outs, dag ins>
+    : F2_LDSTR<0b00, sop, op#".32", outs, ins>;
+class F2_LDSTR_D<bits<1> sop, string op, dag outs, dag ins>
+    : F2_LDSTR<0b01, sop, op#".64", outs, ins>;
+
+class F2_CXYZ<bits<5> datatype, RegisterOperand regtype, bits<6> sop, string op>
+    : F2_XYZ<datatype, sop, !strconcat(op, "\t$vrz, $vrx, $vry"),
+             (outs regtype:$vrz), (ins CARRY:$ca, regtype:$vrx, regtype:$vry),
+             []>;
+multiclass F2_CXYZ_T<bits<6> sop, string op> {
+  def _S : F2_CXYZ<0b00000, FPR32Op, sop, op#".32">;
+  let Predicates = [HasFPUv3_DF] in
+  def _D : F2_CXYZ<0b00001, FPR64Op, sop, op#".64">;
+}
+
+class F2_LRW<bits<2> datatype, bits<1> sop, string op, dag outs, dag ins>
+    : CSKYInstF2<AddrModeNone, outs, ins,
+                 !strconcat(op, "\t$vrz, ${imm8}"), []> {
+  bits<10> imm8;
+  bits<5> rx;
+  bits<5> vrz;
+
+  let Inst{25} = vrz{4};
+  let Inst{24-21} = imm8{7-4};
+  let Inst{20-16} = 0;
+  let Inst{15-11} = 0b00111;
+  let Inst{10} = sop;
+  let Inst{9-8} = datatype;
+  let Inst{7-4} = imm8{3-0};
+  let Inst{3-0} = vrz{3-0};
+}
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
index fce94c914f353..c57ccb9d6eea4 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.cpp
@@ -25,6 +25,10 @@ using namespace llvm;
 
 CSKYInstrInfo::CSKYInstrInfo(CSKYSubtarget &STI)
     : CSKYGenInstrInfo(CSKY::ADJCALLSTACKDOWN, CSKY::ADJCALLSTACKUP), STI(STI) {
+  v2sf = STI.hasFPUv2SingleFloat();
+  v2df = STI.hasFPUv2DoubleFloat();
+  v3sf = STI.hasFPUv3SingleFloat();
+  v3df = STI.hasFPUv3DoubleFloat();
 }
 
 static void parseCondBranch(MachineInstr &LastInst, MachineBasicBlock *&Target,
@@ -337,6 +341,10 @@ unsigned CSKYInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
   case CSKY::LD32H:
   case CSKY::LD32HS:
   case CSKY::LD32W:
+  case CSKY::FLD_S:
+  case CSKY::FLD_D:
+  case CSKY::f2FLD_S:
+  case CSKY::f2FLD_D:
   case CSKY::RESTORE_CARRY:
     break;
   }
@@ -361,6 +369,10 @@ unsigned CSKYInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
   case CSKY::ST32B:
   case CSKY::ST32H:
   case CSKY::ST32W:
+  case CSKY::FST_S:
+  case CSKY::FST_D:
+  case CSKY::f2FST_S:
+  case CSKY::f2FST_D:
   case CSKY::SPILL_CARRY:
     break;
   }
@@ -394,7 +406,15 @@ void CSKYInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   } else if (CSKY::CARRYRegClass.hasSubClassEq(RC)) {
     Opcode = CSKY::SPILL_CARRY;
     CFI->setSpillsCR();
-  } else {
+  } else if (v2sf && CSKY::sFPR32RegClass.hasSubClassEq(RC))
+    Opcode = CSKY::FST_S;
+  else if (v2df && CSKY::sFPR64RegClass.hasSubClassEq(RC))
+    Opcode = CSKY::FST_D;
+  else if (v3sf && CSKY::FPR32RegClass.hasSubClassEq(RC))
+    Opcode = CSKY::f2FST_S;
+  else if (v3df && CSKY::FPR64RegClass.hasSubClassEq(RC))
+    Opcode = CSKY::f2FST_D;
+  else {
     llvm_unreachable("Unknown RegisterClass");
   }
 
@@ -429,7 +449,15 @@ void CSKYInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   } else if (CSKY::CARRYRegClass.hasSubClassEq(RC)) {
     Opcode = CSKY::RESTORE_CARRY;
     CFI->setSpillsCR();
-  } else {
+  } else if (v2sf && CSKY::sFPR32RegClass.hasSubClassEq(RC))
+    Opcode = CSKY::FLD_S;
+  else if (v2df && CSKY::sFPR64RegClass.hasSubClassEq(RC))
+    Opcode = CSKY::FLD_D;
+  else if (v3sf && CSKY::FPR32RegClass.hasSubClassEq(RC))
+    Opcode = CSKY::f2FLD_S;
+  else if (v3df && CSKY::FPR64RegClass.hasSubClassEq(RC))
+    Opcode = CSKY::f2FLD_D;
+  else {
     llvm_unreachable("Unknown RegisterClass");
   }
 
@@ -492,6 +520,38 @@ void CSKYInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   unsigned Opcode = 0;
   if (CSKY::GPRRegClass.contains(DestReg, SrcReg))
     Opcode = CSKY::MOV32;
+  else if (v2sf && CSKY::sFPR32RegClass.contains(DestReg, SrcReg))
+    Opcode = CSKY::FMOV_S;
+  else if (v3sf && CSKY::FPR32RegClass.contains(DestReg, SrcReg))
+    Opcode = CSKY::f2FMOV_S;
+  else if (v2df && CSKY::sFPR64RegClass.contains(DestReg, SrcReg))
+    Opcode = CSKY::FMOV_D;
+  else if (v3df && CSKY::FPR64RegClass.contains(DestReg, SrcReg))
+    Opcode = CSKY::f2FMOV_D;
+  else if (v2sf && CSKY::sFPR32RegClass.contains(SrcReg) &&
+           CSKY::GPRRegClass.contains(DestReg))
+    Opcode = CSKY::FMFVRL;
+  else if (v3sf && CSKY::FPR32RegClass.contains(SrcReg) &&
+           CSKY::GPRRegClass.contains(DestReg))
+    Opcode = CSKY::f2FMFVRL;
+  else if (v2df && CSKY::sFPR64RegClass.contains(SrcReg) &&
+           CSKY::GPRRegClass.contains(DestReg))
+    Opcode = CSKY::FMFVRL_D;
+  else if (v3df && CSKY::FPR64RegClass.contains(SrcReg) &&
+           CSKY::GPRRegClass.contains(DestReg))
+    Opcode = CSKY::f2FMFVRL_D;
+  else if (v2sf && CSKY::GPRRegClass.contains(SrcReg) &&
+           CSKY::sFPR32RegClass.contains(DestReg))
+    Opcode = CSKY::FMTVRL;
+  else if (v3sf && CSKY::GPRRegClass.contains(SrcReg) &&
+           CSKY::FPR32RegClass.contains(DestReg))
+    Opcode = CSKY::f2FMTVRL;
+  else if (v2df && CSKY::GPRRegClass.contains(SrcReg) &&
+           CSKY::sFPR64RegClass.contains(DestReg))
+    Opcode = CSKY::FMTVRL_D;
+  else if (v3df && CSKY::GPRRegClass.contains(SrcReg) &&
+           CSKY::FPR64RegClass.contains(DestReg))
+    Opcode = CSKY::f2FMTVRL_D;
   else {
     LLVM_DEBUG(dbgs() << "src = " << SrcReg << ", dst = " << DestReg);
     LLVM_DEBUG(I->dump());
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.h b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
index fd6501ee48ed9..1a1bbbf9154f7 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.h
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.h
@@ -24,6 +24,11 @@ namespace llvm {
 class CSKYSubtarget;
 
 class CSKYInstrInfo : public CSKYGenInstrInfo {
+  bool v2sf;
+  bool v2df;
+  bool v3sf;
+  bool v3df;
+
 protected:
   const CSKYSubtarget &STI;
 
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfo.td b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
index 2811065cc9832..a782efe7f4f42 100644
--- a/llvm/lib/Target/CSKY/CSKYInstrInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfo.td
@@ -1360,3 +1360,5 @@ def CONSTPOOL_ENTRY : CSKYPseudo<(outs),
   (ins i32imm:$instid, i32imm:$cpidx, i32imm:$size), "", []>;
 
 include "CSKYInstrInfo16Instr.td"
+include "CSKYInstrInfoF1.td"
+include "CSKYInstrInfoF2.td"
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfoF1.td b/llvm/lib/Target/CSKY/CSKYInstrInfoF1.td
new file mode 100644
index 0000000000000..30cef024f35ad
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfoF1.td
@@ -0,0 +1,420 @@
+//===- CSKYInstrInfoF1.td - CSKY Instruction Float1.0 ------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the CSKY instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def regseq_f1 : Operand<iPTR> {
+  let EncoderMethod = "getRegisterSeqOpValue";
+  let ParserMatchClass = RegSeqAsmOperand<"V1">;
+  let PrintMethod = "printRegisterSeq";
+  let DecoderMethod = "DecodeRegSeqOperandF1";
+  let MIOperandInfo = (ops sFPR32, uimm5);
+}
+
+def regseq_d1 : Operand<iPTR> {
+  let EncoderMethod = "getRegisterSeqOpValue";
+  let ParserMatchClass = RegSeqAsmOperand<"V1">;
+  let PrintMethod = "printRegisterSeq";
+  let DecoderMethod = "DecodeRegSeqOperandD1";
+  let MIOperandInfo = (ops sFPR64, uimm5);
+}
+
+def sFPR32Op : RegisterOperand<sFPR32, "printFPR">;
+def sFPR64Op : RegisterOperand<sFPR64, "printFPR">;
+def sFPR64_V_OP : RegisterOperand<sFPR64_V, "printFPR">;
+
+include "CSKYInstrFormatsF1.td"
+
+//===----------------------------------------------------------------------===//
+// CSKY specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+def SDT_BITCAST_TO_LOHI : SDTypeProfile<2, 1, [SDTCisSameAs<0, 1>]>;
+def CSKY_BITCAST_TO_LOHI : SDNode<"CSKYISD::BITCAST_TO_LOHI", SDT_BITCAST_TO_LOHI>;
+def SDT_BITCAST_FROM_LOHI : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;
+def CSKY_BITCAST_FROM_LOHI : SDNode<"CSKYISD::BITCAST_FROM_LOHI", SDT_BITCAST_FROM_LOHI>;
+//===----------------------------------------------------------------------===//
+// Operand and SDNode transformation definitions.
+//===----------------------------------------------------------------------===//
+
+def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
+
+def fpimm32_hi16 : SDNodeXForm<fpimm, [{
+  return CurDAG->getTargetConstant(
+    (N->getValueAPF().bitcastToAPInt().getZExtValue() >> 16) & 0xFFFF,
+    SDLoc(N), MVT::i32);
+}]>;
+
+def fpimm32_lo16 : SDNodeXForm<fpimm, [{
+  return CurDAG->getTargetConstant(
+    N->getValueAPF().bitcastToAPInt().getZExtValue() & 0xFFFF,
+    SDLoc(N), MVT::i32);
+}]>;
+
+class fpimm_xform<int width, int shift = 0> : SDNodeXForm<fpimm,
+  "return CurDAG->getTargetConstant(N->getValueAPF().bitcastToAPInt().lshr("#shift#").getLoBits("#width#"), SDLoc(N), MVT::i32);">;
+
+class fpimm_xform_i16<int width, int shift = 0> : SDNodeXForm<fpimm,
+  "return CurDAG->getTargetConstant(N->getValueAPF().bitcastToAPInt().lshr("#shift#").getLoBits("#width#"), SDLoc(N), MVT::i16);">;
+
+class fpimm_t<int width, int shift = 0> : PatLeaf<(fpimm),
+   "return isShiftedUInt<"#width#", "#shift#">(N->getValueAPF().bitcastToAPInt().getZExtValue());">;
+
+def fpimm8 : fpimm_t<8>;
+def fpimm8_8 : fpimm_t<8, 8>;
+def fpimm8_16 : fpimm_t<8, 16>;
+def fpimm8_24 : fpimm_t<8, 24>;
+def fpimm16 : fpimm_t<16>;
+def fpimm16_8 : fpimm_t<16, 8>;
+def fpimm16_16 : fpimm_t<16, 16>;
+def fpimm24 : fpimm_t<24>;
+def fpimm24_8 : fpimm_t<24, 8>;
+def fpimm32 : fpimm_t<32>;
+
+def fpimm8_sr0_XFORM : fpimm_xform<8>;
+def fpimm8_sr8_XFORM : fpimm_xform<8, 8>;
+def fpimm8_sr16_XFORM : fpimm_xform<8, 16>;
+def fpimm8_sr24_XFORM : fpimm_xform<8, 24>;
+
+def fpimm8_sr0_i16_XFORM : fpimm_xform_i16<8>;
+def fpimm8_sr8_i16_XFORM : fpimm_xform_i16<8, 8>;
+
+def fconstpool_symbol : Operand<iPTR> {
+  let ParserMatchClass = Constpool;
+  let EncoderMethod =
+    "getConstpoolSymbolOpValue<CSKY::fixup_csky_pcrel_uimm8_scale4>";
+  let DecoderMethod = "decodeUImmOperand<8, 2>";
+  let PrintMethod = "printConstpool";
+  let OperandType = "OPERAND_PCREL";
+}
+
+
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//arithmetic
+
+def FABSM : F_XZ<0x2, 0b000110, "fabsm", "", UnOpFrag<(fabs node:$Src)>, sFPR64_V_OP>;
+def FNEGM : F_XZ<0x2, 0b000111, "fnegm", "", UnOpFrag<(fneg node:$Src)>, sFPR64_V_OP>;
+def FADDM : F_XYZ<0x2, 0b000000, "faddm", "", BinOpFrag<(fadd node:$LHS, node:$RHS)>, sFPR64_V_OP>;
+def FSUBM : F_XYZ<0x2, 0b000001, "fsubm", "", BinOpFrag<(fsub node:$LHS, node:$RHS)>, sFPR64_V_OP>;
+def FMULM : F_XYZ<0x2, 0b010000, "fmulm", "", BinOpFrag<(fmul node:$LHS, node:$RHS)>, sFPR64_V_OP>;
+def FNMULM : F_XYZ<0x2, 0b010001, "fnmulm", "", BinOpFrag<(fneg (fmul node:$LHS, node:$RHS))>, sFPR64_V_OP>;
+def FMACM : F_ACCUM_XYZ<0x2, 0b010100, "fmacm", "", TriOpFrag<(fadd node:$LHS, (fmul node:$MHS, node:$RHS))>, sFPR64_V_OP>;
+def FMSCM : F_ACCUM_XYZ<0x2, 0b010101, "fmscm", "", TriOpFrag<(fsub (fmul node:$MHS, node:$RHS), node:$LHS)>, sFPR64_V_OP>;
+def FNMACM : F_ACCUM_XYZ<0x2, 0b010110, "fnmacm", "", TriOpFrag<(fsub node:$LHS, (fmul node:$MHS, node:$RHS))>, sFPR64_V_OP>;
+def FNMSCM : F_ACCUM_XYZ<0x2, 0b010111, "fnmscm", "", TriOpFrag<(fneg (fadd node:$LHS, (fmul node:$MHS, node:$RHS)))>, sFPR64_V_OP>;
+
+def FMOVM :  F_MOV<0x2, 0b000100, "fmovm", "", sFPR64_V_OP>;
+
+defm FABS   : FT_XZ<0b000110, "fabs", UnOpFrag<(fabs node:$Src)>>;
+defm FNEG   : FT_XZ<0b000111, "fneg", UnOpFrag<(fneg node:$Src)>>;
+defm FSQRT  : FT_XZ<0b011010, "fsqrt", UnOpFrag<(fsqrt node:$Src)>>;
+
+defm FADD   : FT_XYZ<0b000000, "fadd", BinOpFrag<(fadd node:$LHS, node:$RHS)>>;
+defm FSUB   : FT_XYZ<0b000001, "fsub", BinOpFrag<(fsub node:$LHS, node:$RHS)>>;
+defm FDIV   : FT_XYZ<0b011000, "fdiv", BinOpFrag<(fdiv node:$LHS, node:$RHS)>>;
+defm FMUL   : FT_XYZ<0b010000, "fmul", BinOpFrag<(fmul node:$LHS, node:$RHS)>>;
+defm FNMUL  : FT_XYZ<0b010001, "fnmul", BinOpFrag<(fneg (fmul node:$LHS, node:$RHS))>>;
+defm FMAC   : FT_ACCUM_XYZ<0b010100, "fmac", TriOpFrag<(fadd node:$LHS, (fmul node:$MHS, node:$RHS))>>;
+defm FMSC   : FT_ACCUM_XYZ<0b010101, "fmsc", TriOpFrag<(fsub (fmul node:$MHS, node:$RHS), node:$LHS)>>;
+defm FNMAC  : FT_ACCUM_XYZ<0b010110, "fnmac", TriOpFrag<(fsub node:$LHS, (fmul node:$MHS, node:$RHS))>>;
+defm FNMSC  : FT_ACCUM_XYZ<0b010111, "fnmsc", TriOpFrag<(fneg (fadd node:$LHS, (fmul node:$MHS, node:$RHS)))>>;
+
+defm FCMPHS : FT_CMPXY<0b001100, "fcmphs">;
+defm FCMPLT : FT_CMPXY<0b001101, "fcmplt">;
+defm FCMPNE : FT_CMPXY<0b001110, "fcmpne">;
+defm FCMPUO : FT_CMPXY<0b001111, "fcmpuo">;
+defm FCMPZHS : FT_CMPZX<0b001000, "fcmpzhs">;
+defm FCMPZLS : FT_CMPZX<0b001001, "fcmpzls">;
+defm FCMPZNE : FT_CMPZX<0b001010, "fcmpzne">;
+defm FCMPZUO : FT_CMPZX<0b001011, "fcmpzuo">;
+
+defm FRECIP   : FT_MOV<0b011001, "frecip">;
+
+//fmov, fmtvr, fmfvr
+defm FMOV : FT_MOV<0b000100, "fmov">;
+def FMFVRL : F_XZ_GF<3, 0b011001, (outs GPR:$rz), (ins sFPR32Op:$vrx),
+                     "fmfvrl\t$rz, $vrx", [(set GPR:$rz, (bitconvert sFPR32Op:$vrx))]>;
+def FMTVRL : F_XZ_FG<3, 0b011011, (outs sFPR32Op:$vrz), (ins GPR:$rx),
+                     "fmtvrl\t$vrz, $rx", [(set sFPR32Op:$vrz, (bitconvert GPR:$rx))]>;
+
+let Predicates = [HasFPUv2_DF] in {
+  let isCodeGenOnly = 1 in
+  def FMFVRL_D : F_XZ_GF<3, 0b011001, (outs GPR:$rz), (ins sFPR64Op:$vrx),
+                         "fmfvrl\t$rz, $vrx", []>;
+  def FMFVRH_D : F_XZ_GF<3, 0b011000, (outs GPR:$rz), (ins sFPR64Op:$vrx),
+                         "fmfvrh\t$rz, $vrx", []>;
+  let isCodeGenOnly = 1 in
+  def FMTVRL_D : F_XZ_FG<3, 0b011011, (outs sFPR64Op:$vrz), (ins GPR:$rx),
+                         "fmtvrl\t$vrz, $rx", []>;
+let Constraints = "$vrZ = $vrz" in
+  def FMTVRH_D : F_XZ_FG<3, 0b011010, (outs sFPR64Op:$vrz), (ins sFPR64Op:$vrZ, GPR:$rx),
+                         "fmtvrh\t$vrz, $rx", []>;
+}
+
+//fcvt
+
+def FSITOS  : F_XZ_TRANS<0b010000, "fsitos", sFPR32Op, sFPR32Op>;
+def : Pat<(f32 (sint_to_fp GPR:$a)),
+          (FSITOS (COPY_TO_REGCLASS GPR:$a, sFPR32))>,
+          Requires<[HasFPUv2_SF]>;
+
+def FUITOS  : F_XZ_TRANS<0b010001, "fuitos", sFPR32Op, sFPR32Op>;
+def : Pat<(f32 (uint_to_fp GPR:$a)),
+          (FUITOS (COPY_TO_REGCLASS GPR:$a, sFPR32))>,
+          Requires<[HasFPUv2_SF]>;
+
+def FSITOD  : F_XZ_TRANS<0b010100, "fsitod", sFPR64Op, sFPR64Op>;
+def : Pat<(f64 (sint_to_fp GPR:$a)),
+            (FSITOD (COPY_TO_REGCLASS GPR:$a, sFPR64))>,
+           Requires<[HasFPUv2_DF]>;
+
+def FUITOD  : F_XZ_TRANS<0b010101, "fuitod", sFPR64Op, sFPR64Op>;
+def : Pat<(f64 (uint_to_fp GPR:$a)),
+            (FUITOD (COPY_TO_REGCLASS GPR:$a, sFPR64))>,
+           Requires<[HasFPUv2_DF]>;
+
+let Predicates = [HasFPUv2_DF] in {
+def FDTOS   : F_XZ_TRANS_DS<0b010110,"fdtos", UnOpFrag<(fpround node:$Src)>>;
+def FSTOD   : F_XZ_TRANS_SD<0b010111,"fstod", UnOpFrag<(fpextend node:$Src)>>;
+}
+
+def rpiFSTOSI : F_XZ_TRANS<0b000010, "fstosi.rpi", sFPR32Op, sFPR32Op>;
+def rpiFSTOUI : F_XZ_TRANS<0b000110, "fstoui.rpi", sFPR32Op, sFPR32Op>;
+def rzFSTOSI : F_XZ_TRANS<0b000001, "fstosi.rz", sFPR32Op, sFPR32Op>;
+def rzFSTOUI : F_XZ_TRANS<0b000101, "fstoui.rz", sFPR32Op, sFPR32Op>;
+def rnFSTOSI : F_XZ_TRANS<0b000000, "fstosi.rn", sFPR32Op, sFPR32Op>;
+def rnFSTOUI : F_XZ_TRANS<0b000100, "fstoui.rn", sFPR32Op, sFPR32Op>;
+def rniFSTOSI : F_XZ_TRANS<0b000011, "fstosi.rni", sFPR32Op, sFPR32Op>;
+def rniFSTOUI : F_XZ_TRANS<0b000111, "fstoui.rni", sFPR32Op, sFPR32Op>;
+
+let Predicates = [HasFPUv2_DF] in {
+def rpiFDTOSI : F_XZ_TRANS<0b001010, "fdtosi.rpi", sFPR64Op, sFPR64Op>;
+def rpiFDTOUI : F_XZ_TRANS<0b001110, "fdtoui.rpi", sFPR64Op, sFPR64Op>;
+def rzFDTOSI : F_XZ_TRANS<0b001001, "fdtosi.rz", sFPR64Op, sFPR64Op>;
+def rzFDTOUI : F_XZ_TRANS<0b001101, "fdtoui.rz", sFPR64Op, sFPR64Op>;
+def rnFDTOSI : F_XZ_TRANS<0b001000, "fdtosi.rn", sFPR64Op, sFPR64Op>;
+def rnFDTOUI : F_XZ_TRANS<0b001100, "fdtoui.rn", sFPR64Op, sFPR64Op>;
+def rniFDTOSI : F_XZ_TRANS<0b001011, "fdtosi.rni", sFPR64Op, sFPR64Op>;
+def rniFDTOUI : F_XZ_TRANS<0b001111, "fdtoui.rni", sFPR64Op, sFPR64Op>;
+}
+
+multiclass FPToIntegerPats<SDNode round, string SUFFIX> {
+  def : Pat<(i32 (fp_to_sint (round sFPR32Op:$Rn))),
+            (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FSTOSI) sFPR32Op:$Rn), GPR)>,
+            Requires<[HasFPUv2_SF]>;
+  def : Pat<(i32 (fp_to_uint (round sFPR32Op:$Rn))),
+            (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FSTOUI) sFPR32Op:$Rn), GPR)>,
+            Requires<[HasFPUv2_SF]>;
+  def : Pat<(i32 (fp_to_sint (round sFPR64Op:$Rn))),
+            (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FDTOSI) sFPR64Op:$Rn), GPR)>,
+            Requires<[HasFPUv2_DF]>;
+  def : Pat<(i32 (fp_to_uint (round sFPR64Op:$Rn))),
+            (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FDTOUI) sFPR64Op:$Rn), GPR)>,
+            Requires<[HasFPUv2_DF]>;
+}
+
+defm: FPToIntegerPats<fceil, "rpi">;
+defm: FPToIntegerPats<fround, "rn">;
+defm: FPToIntegerPats<ffloor, "rni">;
+
+multiclass FPToIntegerTowardszeroPats<string SUFFIX> {
+  def : Pat<(i32 (fp_to_sint sFPR32Op:$Rn)),
+            (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FSTOSI) sFPR32Op:$Rn), GPR)>,
+            Requires<[HasFPUv2_SF]>;
+  def : Pat<(i32 (fp_to_uint sFPR32Op:$Rn)),
+            (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FSTOUI) sFPR32Op:$Rn), GPR)>,
+            Requires<[HasFPUv2_SF]>;
+  def : Pat<(i32 (fp_to_sint sFPR64Op:$Rn)),
+            (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FDTOSI) sFPR64Op:$Rn), GPR)>,
+            Requires<[HasFPUv2_DF]>;
+  def : Pat<(i32 (fp_to_uint sFPR64Op:$Rn)),
+            (COPY_TO_REGCLASS (!cast<Instruction>(SUFFIX # FDTOUI) sFPR64Op:$Rn), GPR)>,
+            Requires<[HasFPUv2_DF]>;
+}
+
+defm: FPToIntegerTowardszeroPats<"rz">;
+
+
+//fld, fst
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+  defm FLD : FT_XYAI_LD<0b0010000, "fld">;
+  defm FLDR : FT_XYAR_LD<0b0010100, "fldr">;
+  defm FLDM : FT_XYAR_LDM<0b0011000, "fldm">;
+
+  let Predicates = [HasFPUv2_DF] in
+  def FLDRM : F_XYAR_LD<0b0010101, 0, "fldrm", "", sFPR64Op>;
+  let Predicates = [HasFPUv2_DF] in
+  def FLDMM : F_I4_XY_MEM<0b0011001, 0,
+    (outs), (ins GPR:$rx, regseq_d1:$regs, variable_ops), "fldmm\t$regs, (${rx})", []>;
+  let Predicates = [HasFPUv2_DF] in
+  def FLDM : F_XYAI_LD<0b0010001, 0, "fldm", "", sFPR64Op, uimm8_3>;
+}
+
+
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+  defm FST : FT_XYAI_ST<0b0010010, "fst">;
+  defm FSTR : FT_XYAR_ST<0b0010110, "fstr">;
+  defm FSTM : FT_XYAR_STM<0b0011010, "fstm">;
+
+  let Predicates = [HasFPUv2_DF] in
+  def FSTRM : F_XYAR_ST<0b0010111, 0, "fstrm", "", sFPR64Op>;
+  let Predicates = [HasFPUv2_DF] in
+  def FSTMM :  F_I4_XY_MEM<0b0011011, 0,
+    (outs), (ins GPR:$rx, regseq_d1:$regs, variable_ops), "fstmm\t$regs, (${rx})", []>;
+  let Predicates = [HasFPUv2_DF] in
+  def FSTM : F_XYAI_ST<0b0010011, 0, "fstm", "", sFPR64Op, uimm8_3>;
+}
+
+defm : LdPat<load, uimm8_2, FLD_S, f32>, Requires<[HasFPUv2_SF]>;
+defm : LdPat<load, uimm8_2, FLD_D, f64>, Requires<[HasFPUv2_DF]>;
+defm : LdrPat<load, FLDR_S, f32>, Requires<[HasFPUv2_SF]>;
+defm : LdrPat<load, FLDR_D, f64>, Requires<[HasFPUv2_DF]>;
+
+defm : StPat<store, f32, uimm8_2, FST_S>, Requires<[HasFPUv2_SF]>;
+defm : StPat<store, f64, uimm8_2, FST_D>, Requires<[HasFPUv2_DF]>;
+defm : StrPat<store, f32, FSTR_S>, Requires<[HasFPUv2_SF]>;
+defm : StrPat<store, f64, FSTR_D>, Requires<[HasFPUv2_DF]>;
+
+
+def : Pat<(f32 fpimm16:$imm), (COPY_TO_REGCLASS (MOVI32 (fpimm32_lo16 fpimm16:$imm)), sFPR32)>,
+        Requires<[HasFPUv2_SF]>;
+def : Pat<(f32 fpimm16_16:$imm), (f32 (COPY_TO_REGCLASS (MOVIH32 (fpimm32_hi16 fpimm16_16:$imm)), sFPR32))>,
+        Requires<[HasFPUv2_SF]>;
+def : Pat<(f32 fpimm:$imm), (COPY_TO_REGCLASS (ORI32 (MOVIH32 (fpimm32_hi16 fpimm:$imm)), (fpimm32_lo16 fpimm:$imm)), sFPR32)>,
+        Requires<[HasFPUv2_SF]>;
+
+def : Pat<(f64(CSKY_BITCAST_FROM_LOHI GPR:$rs1, GPR:$rs2)), (FMTVRH_D(FMTVRL_D GPR:$rs1), GPR:$rs2)>,
+        Requires<[HasFPUv2_DF]>;
+
+multiclass BRCond_Bin<CondCode CC, string Instr, Instruction Br, Instruction MV> {
+  let Predicates = [HasFPUv2_SF] in
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)), bb:$imm16),
+            (Br (!cast<Instruction>(Instr#_S) sFPR32Op:$rs1, sFPR32Op:$rs2), bb:$imm16)>;
+  let Predicates = [HasFPUv2_DF] in
+  def : Pat<(brcond (i32 (setcc sFPR64Op:$rs1, sFPR64Op:$rs2, CC)), bb:$imm16),
+            (Br (!cast<Instruction>(Instr#_D) sFPR64Op:$rs1, sFPR64Op:$rs2), bb:$imm16)>;
+
+  let Predicates = [HasFPUv2_SF] in
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)),
+            (MV (!cast<Instruction>(Instr#_S) sFPR32Op:$rs1, sFPR32Op:$rs2))>;
+  let Predicates = [HasFPUv2_DF] in
+  def : Pat<(i32 (setcc sFPR64Op:$rs1, sFPR64Op:$rs2, CC)),
+            (MV (!cast<Instruction>(Instr#_D) sFPR64Op:$rs1, sFPR64Op:$rs2))>;
+}
+
+multiclass BRCond_Bin_SWAP<CondCode CC, string Instr, Instruction Br, Instruction MV> {
+  let Predicates = [HasFPUv2_SF] in
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)), bb:$imm16),
+            (Br (!cast<Instruction>(Instr#_S) sFPR32Op:$rs2, sFPR32Op:$rs1), bb:$imm16)>;
+  let Predicates = [HasFPUv2_DF] in
+  def : Pat<(brcond (i32 (setcc sFPR64Op:$rs1, sFPR64Op:$rs2, CC)), bb:$imm16),
+            (Br (!cast<Instruction>(Instr#_D) sFPR64Op:$rs2, sFPR64Op:$rs1), bb:$imm16)>;
+
+  let Predicates = [HasFPUv2_SF] in
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, sFPR32Op:$rs2, CC)),
+            (MV (!cast<Instruction>(Instr#_S) sFPR32Op:$rs2, sFPR32Op:$rs1))>;
+  let Predicates = [HasFPUv2_DF] in
+  def : Pat<(i32 (setcc sFPR64Op:$rs1, sFPR64Op:$rs2, CC)),
+            (MV (!cast<Instruction>(Instr#_D) sFPR64Op:$rs2, sFPR64Op:$rs1))>;
+}
+
+// inverse (order && compare) to (unorder || inverse(compare))
+
+defm : BRCond_Bin<SETUNE, "FCMPNE", BT32, MVC32>;
+defm : BRCond_Bin<SETOEQ, "FCMPNE", BF32, MVCV32>;
+defm : BRCond_Bin<SETOGE, "FCMPHS", BT32, MVC32>;
+defm : BRCond_Bin<SETOLT, "FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin<SETUO, "FCMPUO", BT32, MVC32>;
+defm : BRCond_Bin<SETO, "FCMPUO", BF32, MVCV32>;
+defm : BRCond_Bin_SWAP<SETOGT, "FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_SWAP<SETOLE, "FCMPHS", BT32, MVC32>;
+
+defm : BRCond_Bin<SETNE, "FCMPNE", BT32, MVC32>;
+defm : BRCond_Bin<SETEQ, "FCMPNE", BF32, MVCV32>;
+defm : BRCond_Bin<SETGE, "FCMPHS", BT32, MVC32>;
+defm : BRCond_Bin<SETLT, "FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_SWAP<SETGT, "FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_SWAP<SETLE, "FCMPHS", BT32, MVC32>;
+
+// -----------
+
+let Predicates = [HasFPUv2_SF] in {
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETOGE)), bb:$imm16),
+            (BT32 (FCMPZHS_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETOGE)),
+            (MVC32 (FCMPZHS_S sFPR32Op:$rs1))>;
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETOLT)), bb:$imm16),
+            (BF32 (FCMPZHS_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETOLT)),
+            (MVCV32 (FCMPZHS_S sFPR32Op:$rs1))>;
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETOLE)), bb:$imm16),
+            (BT32 (FCMPZLS_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETOLE)),
+            (MVC32 (FCMPZLS_S sFPR32Op:$rs1))>;
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETOGT)), bb:$imm16),
+            (BF32 (FCMPZLS_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETOGT)),
+            (MVCV32 (FCMPZLS_S sFPR32Op:$rs1))>;
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETUNE)), bb:$imm16),
+            (BT32 (FCMPZNE_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETUNE)),
+            (MVC32 (FCMPZNE_S sFPR32Op:$rs1))>;
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETOEQ)), bb:$imm16),
+            (BF32 (FCMPZNE_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETOEQ)),
+            (MVCV32 (FCMPZNE_S sFPR32Op:$rs1))>;
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm, SETUO)), bb:$imm16),
+            (BT32 (FCMPZUO_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm, SETUO)),
+            (MVC32 (FCMPZUO_S sFPR32Op:$rs1))>;
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm, SETO)), bb:$imm16),
+            (BF32 (FCMPZUO_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm, SETO)),
+            (MVCV32 (FCMPZUO_S sFPR32Op:$rs1))>;
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETGE)), bb:$imm16),
+            (BT32 (FCMPZHS_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETGE)),
+            (MVC32 (FCMPZHS_S sFPR32Op:$rs1))>;
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETLT)), bb:$imm16),
+            (BF32 (FCMPZHS_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETLT)),
+            (MVCV32 (FCMPZHS_S sFPR32Op:$rs1))>;
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETLE)), bb:$imm16),
+            (BT32 (FCMPZLS_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETLE)),
+            (MVC32 (FCMPZLS_S sFPR32Op:$rs1))>;
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETGT)), bb:$imm16),
+            (BF32 (FCMPZLS_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETGT)),
+            (MVCV32 (FCMPZLS_S sFPR32Op:$rs1))>;
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETNE)), bb:$imm16),
+            (BT32 (FCMPZNE_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETNE)),
+            (MVC32 (FCMPZNE_S sFPR32Op:$rs1))>;
+  def : Pat<(brcond (i32 (setcc sFPR32Op:$rs1, fpimm0, SETEQ)), bb:$imm16),
+            (BF32 (FCMPZNE_S sFPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc sFPR32Op:$rs1, fpimm0, SETEQ)),
+            (MVCV32 (FCMPZNE_S sFPR32Op:$rs1))>;
+}
+
+let usesCustomInserter = 1 in  {
+  let Predicates = [HasFPUv2_SF] in
+  def FSELS : CSKYPseudo<(outs sFPR32Op:$dst), (ins CARRY:$cond, sFPR32Op:$src1, sFPR32Op:$src2),
+    "!fsels\t$dst, $src1, src2", [(set sFPR32Op:$dst, (select CARRY:$cond, sFPR32Op:$src1, sFPR32Op:$src2))]>;
+
+  let Predicates = [HasFPUv2_DF] in
+  def FSELD : CSKYPseudo<(outs sFPR64Op:$dst), (ins CARRY:$cond, sFPR64Op:$src1, sFPR64Op:$src2),
+    "!fseld\t$dst, $src1, src2", [(set sFPR64Op:$dst, (select CARRY:$cond, sFPR64Op:$src1, sFPR64Op:$src2))]>;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/CSKY/CSKYInstrInfoF2.td b/llvm/lib/Target/CSKY/CSKYInstrInfoF2.td
new file mode 100644
index 0000000000000..8a00e7d9af3a7
--- /dev/null
+++ b/llvm/lib/Target/CSKY/CSKYInstrInfoF2.td
@@ -0,0 +1,462 @@
+//===- CSKYInstrInfoF2.td - CSKY Instruction Float2.0 ------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the CSKY instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+def regseq_f2 : Operand<i32> {
+  let EncoderMethod = "getRegisterSeqOpValue";
+  let ParserMatchClass = RegSeqAsmOperand<"V2">;
+  let PrintMethod = "printRegisterSeq";
+  let DecoderMethod = "DecodeRegSeqOperandF2";
+  let MIOperandInfo = (ops FPR32, uimm5);
+}
+
+def regseq_d2 : Operand<i32> {
+  let EncoderMethod = "getRegisterSeqOpValue";
+  let ParserMatchClass = RegSeqAsmOperand<"V2">;
+  let PrintMethod = "printRegisterSeq";
+  let DecoderMethod = "DecodeRegSeqOperandD2";
+  let MIOperandInfo = (ops FPR64, uimm5);
+}
+
+def FPR32Op : RegisterOperand<FPR32, "printFPR">;
+def FPR64Op : RegisterOperand<FPR64, "printFPR">;
+
+include "CSKYInstrFormatsF2.td"
+
+// Predicates
+def IsOrAdd: PatFrag<(ops node:$A, node:$B), (or node:$A, node:$B), [{
+  return isOrEquivalentToAdd(N);
+}]>;
+
+//===----------------------------------------------------------------------===//
+// Instructions
+//===----------------------------------------------------------------------===//
+
+defm f2FADD   : F2_XYZ_T<0b000000, "fadd", BinOpFrag<(fadd node:$LHS, node:$RHS)>>;
+defm f2FSUB   : F2_XYZ_T<0b000001, "fsub", BinOpFrag<(fsub node:$LHS, node:$RHS)>>;
+defm f2FDIV   : F2_XYZ_T<0b011000, "fdiv", BinOpFrag<(fdiv node:$LHS, node:$RHS)>>;
+defm f2FMUL   : F2_XYZ_T<0b010000, "fmul", BinOpFrag<(fmul node:$LHS, node:$RHS)>>;
+
+defm f2FMAXNM : F2_XYZ_T<0b101000, "fmaxnm", BinOpFrag<(fmaxnum node:$LHS, node:$RHS)>>;
+defm f2FMINNM : F2_XYZ_T<0b101001, "fminnm", BinOpFrag<(fminnum node:$LHS, node:$RHS)>>;
+
+defm f2FABS   : F2_XZ_T<0b000110, "fabs", fabs>;
+defm f2FNEG   : F2_XZ_T<0b000111, "fneg", fneg>;
+defm f2FSQRT  : F2_XZ_T<0b011010, "fsqrt", fsqrt>;
+defm f2FMOV   : F2_XZ_SET_T<0b000100, "fmov">;
+def f2FMOVX   : F2_XZ_SET<0b00001, FPR32Op, 0b000101, "fmovx.32">;
+
+defm f2RECIP   : F2_XZ_SET_T<0b011001, "frecip">;
+
+// fld/fst
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+  def f2FLD_S : F2_LDST_S<0b0, "fld", (outs FPR32Op:$vrz), (ins GPR:$rx, uimm8_2:$imm8)>;
+  let Predicates = [HasFPUv3_DF] in
+  def f2FLD_D : F2_LDST_D<0b0, "fld", (outs FPR64Op:$vrz), (ins GPR:$rx, uimm8_2:$imm8)>;
+}
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+  def f2FST_S : F2_LDST_S<0b1, "fst", (outs), (ins FPR32Op:$vrz, GPR:$rx, uimm8_2:$imm8)>;
+  let Predicates = [HasFPUv3_DF] in
+  def f2FST_D : F2_LDST_D<0b1, "fst", (outs), (ins FPR64Op:$vrz, GPR:$rx, uimm8_2:$imm8)>;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+  def f2FSTM_S : F2_LDSTM_S<0b1, 0, "fstm", (outs), (ins GPR:$rx, regseq_f2:$regs, variable_ops)>;
+  let Predicates = [HasFPUv3_DF] in
+  def f2FSTM_D : F2_LDSTM_D<0b1, 0, "fstm", (outs), (ins GPR:$rx, regseq_d2:$regs, variable_ops)>;
+
+  def f2FSTMU_S : F2_LDSTM_S<0b1, 0b100, "fstmu", (outs), (ins GPR:$rx, regseq_f2:$regs, variable_ops)>;
+  let Predicates = [HasFPUv3_DF] in
+  def f2FSTMU_D : F2_LDSTM_D<0b1, 0b100, "fstmu", (outs), (ins GPR:$rx, regseq_d2:$regs, variable_ops)>;
+}
+
+let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+  def f2FLDM_S : F2_LDSTM_S<0b0, 0, "fldm", (outs), (ins GPR:$rx, regseq_f2:$regs, variable_ops)>;
+  let Predicates = [HasFPUv3_DF] in
+  def f2FLDM_D : F2_LDSTM_D<0b0, 0, "fldm", (outs), (ins GPR:$rx,  regseq_d2:$regs, variable_ops)>;
+
+  def f2FLDMU_S : F2_LDSTM_S<0b0, 0b100, "fldmu", (outs), (ins GPR:$rx, regseq_f2:$regs, variable_ops)>;
+  let Predicates = [HasFPUv3_DF] in
+  def f2FLDMU_D : F2_LDSTM_D<0b0, 0b100, "fldmu", (outs), (ins GPR:$rx, regseq_d2:$regs, variable_ops)>;
+}
+
+multiclass FLSR {
+  let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
+    def FLDR_S : F2_LDSTR_S<0b0, "fldr", (outs FPR32Op:$rz), (ins GPR:$rx, GPR:$ry, uimm2:$imm)>;
+    let Predicates = [HasFPUv3_DF] in
+    def FLDR_D : F2_LDSTR_D<0b0, "fldr", (outs FPR64Op:$rz), (ins GPR:$rx, GPR:$ry, uimm2:$imm)>;
+  }
+  let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in {
+    def FSTR_S : F2_LDSTR_S<0b1, "fstr", (outs), (ins FPR32Op:$rz, GPR:$rx, GPR:$ry, uimm2:$imm)>;
+    let Predicates = [HasFPUv3_DF] in
+    def FSTR_D : F2_LDSTR_D<0b1, "fstr", (outs), (ins FPR64Op:$rz, GPR:$rx, GPR:$ry, uimm2:$imm)>;
+  }
+}
+
+defm f2: FLSR;
+
+def f2FLRW_S : F2_LRW<0b00, 0b0, "flrw.32", (outs FPR32Op:$vrz), (ins fconstpool_symbol:$imm8)>;
+def f2FLRW_D : F2_LRW<0b01, 0b0, "flrw.64", (outs FPR64Op:$vrz), (ins fconstpool_symbol:$imm8)>;
+
+def : Pat<(f32 (load constpool:$src)), (f2FLRW_S (to_tconstpool tconstpool:$src))>, Requires<[HasFPUv3_SF]>;
+def : Pat<(f64 (load constpool:$src)), (f2FLRW_D (to_tconstpool tconstpool:$src))>, Requires<[HasFPUv3_DF]>;
+
+defm : LdPat<load, uimm8_2, f2FLD_S, f32>, Requires<[HasFPUv3_SF]>;
+defm : LdPat<load, uimm8_2, f2FLD_D, f64>, Requires<[HasFPUv3_DF]>;
+defm : LdrPat<load, f2FLDR_S, f32>, Requires<[HasFPUv3_SF]>;
+defm : LdrPat<load, f2FLDR_D, f64>, Requires<[HasFPUv3_DF]>;
+
+defm : StPat<store, f32, uimm8_2, f2FST_S>, Requires<[HasFPUv3_SF]>;
+defm : StPat<store, f64, uimm8_2, f2FST_D>, Requires<[HasFPUv3_DF]>;
+defm : StrPat<store, f32, f2FSTR_S>, Requires<[HasFPUv3_SF]>;
+defm : StrPat<store, f64, f2FSTR_D>, Requires<[HasFPUv3_DF]>;
+
+// fmfvr
+let vry = 0 in
+def f2FMFVRL   : F2_XYZ<0b00011, 0b011001, "fmfvr.32.1\t$vrz, $vrx",
+                        (outs GPR:$vrz), (ins FPR32Op:$vrx),
+                        [(set GPR:$vrz, (bitconvert FPR32Op:$vrx))]>;
+// TODO: vrz and vrz+1
+def f2FMFVRL_2 : F2_XYZ<0b00011, 0b111010, "fmfvr.32.2\t$vrz, $vry, $vrx",
+                        (outs GPR:$vrz, GPR:$vry), (ins FPR64Op:$vrx),
+                        []>;
+
+let Predicates = [HasFPUv3_DF] in {
+let vry = 0 in {
+let isCodeGenOnly = 1 in
+def f2FMFVRL_D : F2_XYZ<0b00011, 0b011001, "fmfvr.32.1\t$vrz, $vrx",
+                        (outs GPR:$vrz), (ins FPR64Op:$vrx),
+                        []>;
+def f2FMFVRH_D : F2_XYZ<0b00011, 0b011000, "fmfvrh\t$vrz, $vrx",
+                        (outs GPR:$vrz), (ins FPR64Op:$vrx),
+                        []>;
+}
+def f2FMFVR_D  : F2_XYZ<0b00011, 0b111000, "fmfvr.64\t$vrz, $vry, $vrx",
+                        (outs GPR:$vrz, GPR:$vry), (ins FPR64Op:$vrx),
+                        [(set GPR:$vrz, GPR:$vry, (CSKY_BITCAST_TO_LOHI FPR64Op:$vrx))]>;
+}
+
+// fmtvr
+def f2FMTVRL   : F2_XZ_P<0b00011, 0b011011, "fmtvr.32.1",
+                         [(set FPR32Op:$vrz, (bitconvert GPR:$vrx))],
+                         (outs FPR32Op:$vrz), (ins GPR:$vrx)>;
+// TODO: vrz and vrz+1
+def f2FMTVRL_2 : F2_XYZ<0b00011, 0b111110, "fmtvr.32.2\t$vrz, $vrx, $vry",
+                        (outs FPR32Op:$vrz), (ins GPR:$vrx, GPR:$vry),
+                        []>;
+
+let Predicates = [HasFPUv3_DF] in {
+let isCodeGenOnly = 1 in
+def f2FMTVRL_D : F2_XZ_P<0b00011, 0b011011, "fmtvr.32.1",
+                         [],
+                         (outs FPR64Op:$vrz), (ins GPR:$vrx)>;
+let Constraints = "$vrZ = $vrz" in
+def f2FMTVRH_D : F2_XZ_P<0b00011, 0b011010, "fmtvrh",
+                         [],
+                         (outs FPR64Op:$vrz), (ins FPR64Op:$vrZ, GPR:$vrx)>;
+def f2FMTVR_D  : F2_XYZ<0b00011, 0b111100, "fmtvr.64\t$vrz, $vrx, $vry",
+                        (outs FPR64Op:$vrz), (ins GPR:$vrx, GPR:$vry),
+                        [(set FPR64Op:$vrz, (CSKY_BITCAST_FROM_LOHI GPR:$vrx, GPR:$vry))]>;
+}
+
+// fcmp
+
+defm f2FCMPHS: F2_CXY_T<0b001100, "fcmphs">;
+defm f2FCMPLT: F2_CXY_T<0b001101, "fcmplt">;
+defm f2FCMPNE: F2_CXY_T<0b001110, "fcmpne">;
+defm f2FCMPUO: F2_CXY_T<0b001111, "fcmpuo">;
+
+defm f2FCMPHSZ: F2_CX_T<0b001000, "fcmphsz">;
+defm f2FCMPHZ : F2_CX_T<0b101010, "fcmphz">;
+defm f2FCMPLSZ: F2_CX_T<0b101011, "fcmplsz">;
+defm f2FCMPLTZ: F2_CX_T<0b001001, "fcmpltz">;
+defm f2FCMPNEZ: F2_CX_T<0b001010, "fcmpnez">;
+defm f2FCMPUOZ: F2_CX_T<0b001011, "fcmpuoz">;
+
+defm f2FMULA : F2_XYZZ_T<0b010100, "fmula",
+  TriOpFrag<(fadd (fmul node:$LHS, node:$MHS), node:$RHS)>>;
+
+defm f2FMULS : F2_XYZZ_T<0b010110, "fmuls",
+  TriOpFrag<(fsub node:$RHS, (fmul node:$LHS, node:$MHS))>>;
+
+defm f2FFMULA : F2_XYZZ_T<0b110000, "ffmula",
+  TriOpFrag<(fma node:$LHS, node:$MHS, node:$RHS)>>;
+
+defm f2FFMULS : F2_XYZZ_T<0b110001, "ffmuls",
+  TriOpFrag<(fma (fneg node:$LHS), node:$MHS, node:$RHS)>>;
+
+defm f2FFNMULA : F2_XYZZ_T<0b110010, "ffnmula",
+  TriOpFrag<(fneg (fma node:$LHS, node:$MHS, node:$RHS))>>;
+
+defm f2FFNMULS : F2_XYZZ_T<0b110011, "ffnmuls",
+  TriOpFrag<(fma node:$LHS, node:$MHS, (fneg node:$RHS))>>;
+
+defm f2FNMULA : F2_XYZZ_T<0b010111, "fnmula",
+  TriOpFrag<(fneg (fadd (fmul node:$LHS, node:$MHS), node:$RHS))>>;
+
+defm f2FNMULS : F2_XYZZ_T<0b010101, "fnmuls",
+  TriOpFrag<(fneg (fsub node:$RHS, (fmul node:$LHS, node:$MHS)))>>;
+
+defm f2FNMUL : F2_XYZ_T<0b010001, "fnmul",
+  BinOpFrag<(fneg (fmul node:$LHS, node:$RHS))>>;
+
+// fcvt
+def f2FFTOS32_S  : F2_XZ_P<0b01000, 0b011011, "fftoi.f32.s32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FFTOU32_S  : F2_XZ_P<0b01000, 0b011010, "fftoi.f32.u32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FS32TOF_S  : F2_XZ_P<0b01001, 0b011011, "fitof.s32.f32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FU32TOF_S  : F2_XZ_P<0b01001, 0b011010, "fitof.u32.f32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FFTOXU32_S  : F2_XZ_P<0b01000, 0b001010, "fftox.f32.u32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FFTOXS32_S  : F2_XZ_P<0b01000, 0b001011, "fftox.f32.s32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FXTOFU32_S  : F2_XZ_P<0b01001, 0b001010, "fxtof.u32.f32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FXTOFS32_S  : F2_XZ_P<0b01001, 0b001011, "fxtof.s32.f32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+let Predicates = [HasFPUv3_DF] in {
+def f2FFTOS32_D  : F2_XZ_P<0b01000, 0b011101, "fftoi.f64.s32", [], (outs FPR32Op:$vrz), (ins FPR64Op:$vrx)>;
+def f2FFTOU32_D  : F2_XZ_P<0b01000, 0b011100, "fftoi.f64.u32", [], (outs FPR32Op:$vrz), (ins FPR64Op:$vrx)>;
+def f2FS32TOF_D  : F2_XZ_P<0b01001, 0b011101, "fitof.s32.f64", [], (outs FPR64Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FU32TOF_D  : F2_XZ_P<0b01001, 0b011100, "fitof.u32.f64", [], (outs FPR64Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FFTOXU32_D  : F2_XZ_P<0b01000, 0b001100, "fftox.f64.u32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FFTOXS32_D  : F2_XZ_P<0b01000, 0b001101, "fftox.f64.s32", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FXTOFU32_D  : F2_XZ_P<0b01001, 0b001100, "fxtof.u32.f64", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+def f2FXTOFS32_D  : F2_XZ_P<0b01001, 0b001101, "fxtof.s32.f64", [], (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+}
+
+defm f2FF32TOSI32 : F2_XZ_RM<0b00011, 0b0000, "fftoi.f32.s32", (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+defm f2FF32TOUI32 : F2_XZ_RM<0b00011, 0b0001, "fftoi.f32.u32", (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+defm f2FF32TOFI32 : F2_XZ_RM<0b01000, 0b1001, "fftofi.f32", (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+let Predicates = [HasFPUv3_DF] in {
+defm f2FF64TOSI32 : F2_XZ_RM<0b00011, 0b0010, "fftoi.f64.s32", (outs FPR32Op:$vrz), (ins FPR64Op:$vrx)>;
+defm f2FF64TOUI32 : F2_XZ_RM<0b00011, 0b0011, "fftoi.f64.u32", (outs FPR32Op:$vrz), (ins FPR64Op:$vrx)>;
+defm f2FF64TOFI32 : F2_XZ_RM<0b01000, 0b1010, "fftofi.f64", (outs FPR32Op:$vrz), (ins FPR32Op:$vrx)>;
+}
+
+def : Pat<(i32 (fp_to_sint (fround FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOSI32_RN  $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_uint (fround FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOUI32_RN  $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_sint (fceil  FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOSI32_RPI $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_uint (fceil  FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOUI32_RPI $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_sint (ffloor FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOSI32_RNI $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_uint (ffloor FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOUI32_RNI $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_sint (ftrunc FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOSI32_RZ  $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_uint (ftrunc FPR32Op:$vrx))), (COPY_TO_REGCLASS (f2FF32TOUI32_RZ  $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_sint FPR32Op:$vrx)), (COPY_TO_REGCLASS (f2FF32TOSI32_RZ $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+def : Pat<(i32 (fp_to_uint FPR32Op:$vrx)), (COPY_TO_REGCLASS (f2FF32TOUI32_RZ $vrx), GPR)>, Requires<[HasFPUv3_SF]>;
+
+def : Pat<(i32 (fp_to_sint (fround FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOSI32_RN  $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_uint (fround FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOUI32_RN  $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_sint (fceil  FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOSI32_RPI $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_uint (fceil  FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOUI32_RPI $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_sint (ffloor FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOSI32_RNI $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_uint (ffloor FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOUI32_RNI $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_sint (ftrunc FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOSI32_RZ  $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_uint (ftrunc FPR64Op:$vrx))), (COPY_TO_REGCLASS (f2FF64TOUI32_RZ  $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_sint FPR64Op:$vrx)), (COPY_TO_REGCLASS (f2FF64TOSI32_RZ $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+def : Pat<(i32 (fp_to_uint FPR64Op:$vrx)), (COPY_TO_REGCLASS (f2FF64TOUI32_RZ $vrx), GPR)>, Requires<[HasFPUv3_DF]>;
+
+def : Pat<(sint_to_fp GPR:$vrx), (f2FS32TOF_S (COPY_TO_REGCLASS $vrx, FPR32))>, Requires<[HasFPUv3_SF]>;
+def : Pat<(uint_to_fp GPR:$vrx), (f2FU32TOF_S (COPY_TO_REGCLASS $vrx, FPR32))>, Requires<[HasFPUv3_SF]>;
+def : Pat<(sint_to_fp GPR:$vrx), (f2FS32TOF_D (COPY_TO_REGCLASS $vrx, FPR32))>, Requires<[HasFPUv3_DF]>;
+def : Pat<(uint_to_fp GPR:$vrx), (f2FU32TOF_D (COPY_TO_REGCLASS $vrx, FPR32))>, Requires<[HasFPUv3_DF]>;
+
+let Predicates = [HasFPUv3_DF] in {
+def f2FDTOS   : F2_XZ_P<0b00011, 0b010110, "fdtos", [(set FPR32Op:$vrz, (fpround FPR64Op:$vrx))], (outs FPR32Op:$vrz),
+                        (ins FPR64Op:$vrx)>;
+def f2FSTOD   : F2_XZ_P<0b00011, 0b010111, "fstod", [(set FPR64Op:$vrz, (fpextend FPR32Op:$vrx))], (outs FPR64Op:$vrz),
+                        (ins FPR32Op:$vrx)>;
+}
+
+// fsel
+defm f2FSEL: F2_CXYZ_T<0b111001, "fsel">;
+
+def f2FINS: F2_XZ_SET<0b00000, FPR32Op, 0b011011, "fins.32">;
+
+def : Pat<(f32 fpimm16:$imm),(COPY_TO_REGCLASS (MOVI32 (fpimm32_lo16 fpimm16:$imm)), FPR32)>,
+        Requires<[HasFPUv3_SF]>;
+def : Pat<(f32 fpimm16_16:$imm), (COPY_TO_REGCLASS (MOVIH32 (fpimm32_hi16 fpimm16_16:$imm)), FPR32)>,
+        Requires<[HasFPUv3_SF]>;
+def : Pat<(f32 fpimm:$imm),(COPY_TO_REGCLASS (ORI32 (MOVIH32 (fpimm32_hi16 fpimm:$imm)), (fpimm32_lo16 fpimm:$imm)), FPR32)>,
+        Requires<[HasFPUv3_SF]>;
+
+
+multiclass BRCond_Bin_F2<CondCode CC, string Instr, Instruction Br, Instruction MV, bit IsSelectSwap = 0> {
+  let Predicates = [HasFPUv3_SF] in
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)), bb:$imm16),
+            (Br (!cast<Instruction>(Instr#_S) FPR32Op:$rs1, FPR32Op:$rs2), bb:$imm16)>;
+  let Predicates = [HasFPUv3_DF] in
+  def : Pat<(brcond (i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)), bb:$imm16),
+            (Br (!cast<Instruction>(Instr#_D) FPR64Op:$rs1, FPR64Op:$rs2), bb:$imm16)>;
+
+  let Predicates = [HasFPUv3_SF] in
+  def : Pat<(i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)),
+            (MV (!cast<Instruction>(Instr#_S) FPR32Op:$rs1, FPR32Op:$rs2))>;
+  let Predicates = [HasFPUv3_DF] in
+  def : Pat<(i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)),
+            (MV (!cast<Instruction>(Instr#_D) FPR64Op:$rs1, FPR64Op:$rs2))>;
+
+  let Predicates = [HasFPUv3_SF] in {
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)), FPR32Op:$rx, FPR32Op:$false),
+            !if(
+                !eq(IsSelectSwap, 0),
+                (f2FSEL_S (!cast<Instruction>(Instr#_S) FPR32Op:$rs1, FPR32Op:$rs2), FPR32Op:$rx, FPR32Op:$false),
+                (f2FSEL_S (!cast<Instruction>(Instr#_S) FPR32Op:$rs1, FPR32Op:$rs2), FPR32Op:$false, FPR32Op:$rx)
+               )>;
+  }
+  let Predicates = [HasFPUv3_DF] in {
+  def : Pat<(select (i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)), FPR64Op:$rx, FPR64Op:$false),
+            !if(
+                !eq(IsSelectSwap, 0),
+                (f2FSEL_D (!cast<Instruction>(Instr#_D) FPR64Op:$rs1, FPR64Op:$rs2), FPR64Op:$rx, FPR64Op:$false),
+                (f2FSEL_D (!cast<Instruction>(Instr#_D) FPR64Op:$rs1, FPR64Op:$rs2), FPR64Op:$false, FPR64Op:$rx)
+               )>;
+  }
+}
+
+multiclass BRCond_Bin_SWAP_F2<CondCode CC, string Instr, Instruction Br, Instruction MV, bit IsSelectSwap = 0> {
+  let Predicates = [HasFPUv3_SF] in
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)), bb:$imm16),
+            (Br (!cast<Instruction>(Instr#_S) FPR32Op:$rs2, FPR32Op:$rs1), bb:$imm16)>;
+  let Predicates = [HasFPUv3_DF] in
+  def : Pat<(brcond (i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)), bb:$imm16),
+            (Br (!cast<Instruction>(Instr#_D) FPR64Op:$rs2, FPR64Op:$rs1), bb:$imm16)>;
+
+  let Predicates = [HasFPUv3_SF] in
+  def : Pat<(i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)),
+            (MV (!cast<Instruction>(Instr#_S) FPR32Op:$rs2, FPR32Op:$rs1))>;
+  let Predicates = [HasFPUv3_DF] in
+  def : Pat<(i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)),
+            (MV (!cast<Instruction>(Instr#_D) FPR64Op:$rs2, FPR64Op:$rs1))>;
+
+  let Predicates = [HasFPUv3_SF] in {
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, FPR32Op:$rs2, CC)), FPR32Op:$rx, FPR32Op:$false),
+            !if(
+                !eq(IsSelectSwap, 0),
+                (f2FSEL_S (!cast<Instruction>(Instr#_S) FPR32Op:$rs2, FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false),
+                (f2FSEL_S (!cast<Instruction>(Instr#_S) FPR32Op:$rs2, FPR32Op:$rs1), FPR32Op:$false, FPR32Op:$rx)
+               )>;
+  }
+  let Predicates = [HasFPUv3_DF] in {
+  def : Pat<(select (i32 (setcc FPR64Op:$rs1, FPR64Op:$rs2, CC)), FPR64Op:$rx, FPR64Op:$false),
+            !if(
+                !eq(IsSelectSwap, 0),
+                (f2FSEL_D (!cast<Instruction>(Instr#_D) FPR64Op:$rs2, FPR64Op:$rs1), FPR64Op:$rx, FPR64Op:$false),
+                (f2FSEL_D (!cast<Instruction>(Instr#_D) FPR64Op:$rs2, FPR64Op:$rs1), FPR64Op:$false, FPR64Op:$rx)
+               )>;
+  }
+}
+
+// inverse (order && compare) to (unorder || inverse(compare))
+
+defm : BRCond_Bin_F2<SETUNE, "f2FCMPNE", BT32, MVC32>;
+defm : BRCond_Bin_F2<SETOEQ, "f2FCMPNE", BF32, MVCV32, 1>;
+defm : BRCond_Bin_F2<SETOGE, "f2FCMPHS", BT32, MVC32>;
+defm : BRCond_Bin_F2<SETOLT, "f2FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_F2<SETUO, "f2FCMPUO", BT32, MVC32>;
+defm : BRCond_Bin_F2<SETO, "f2FCMPUO", BF32, MVCV32, 1>;
+defm : BRCond_Bin_SWAP_F2<SETOGT, "f2FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_SWAP_F2<SETOLE, "f2FCMPHS", BT32, MVC32>;
+
+defm : BRCond_Bin_F2<SETNE, "f2FCMPNE", BT32, MVC32>;
+defm : BRCond_Bin_F2<SETEQ, "f2FCMPNE", BF32, MVCV32, 1>;
+defm : BRCond_Bin_F2<SETGE, "f2FCMPHS", BT32, MVC32>;
+defm : BRCond_Bin_F2<SETLT, "f2FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_SWAP_F2<SETGT, "f2FCMPLT", BT32, MVC32>;
+defm : BRCond_Bin_SWAP_F2<SETLE, "f2FCMPHS", BT32, MVC32>;
+
+// ------
+
+let Predicates = [HasFPUv3_SF] in {
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETOGE)), bb:$imm16),
+            (BT32 (f2FCMPHSZ_S FPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETOGE)),
+            (MVC32 (f2FCMPHSZ_S FPR32Op:$rs1))>;
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETOGE)), FPR32Op:$rx, FPR32Op:$false),
+            (f2FSEL_S (f2FCMPHSZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETOLT)), bb:$imm16),
+            (BT32 (f2FCMPLTZ_S FPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETOLT)),
+            (MVC32 (f2FCMPLTZ_S FPR32Op:$rs1))>;
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETOLT)), FPR32Op:$rx, FPR32Op:$false),
+            (f2FSEL_S (f2FCMPLTZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETOLE)), bb:$imm16),
+            (BT32 (f2FCMPLSZ_S FPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETOLE)),
+            (MVC32 (f2FCMPLSZ_S FPR32Op:$rs1))>;
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETOLE)), FPR32Op:$rx, FPR32Op:$false),
+            (f2FSEL_S (f2FCMPLSZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETOGT)), bb:$imm16),
+            (BT32 (f2FCMPHZ_S FPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETOGT)),
+            (MVC32 (f2FCMPHZ_S FPR32Op:$rs1))>;
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETOGT)), FPR32Op:$rx, FPR32Op:$false),
+            (f2FSEL_S (f2FCMPHZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETUNE)), bb:$imm16),
+            (BT32 (f2FCMPNEZ_S FPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETUNE)),
+            (MVC32 (f2FCMPNEZ_S FPR32Op:$rs1))>;
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETUNE)), FPR32Op:$rx, FPR32Op:$false),
+            (f2FSEL_S (f2FCMPNEZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm, SETUO)), bb:$imm16),
+            (BT32 (f2FCMPUOZ_S FPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm, SETUO)),
+            (MVC32 (f2FCMPUOZ_S FPR32Op:$rs1))>;
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm, SETUO)), FPR32Op:$rx, FPR32Op:$false),
+            (f2FSEL_S (f2FCMPUOZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETGE)), bb:$imm16),
+            (BT32 (f2FCMPHSZ_S FPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETGE)),
+            (MVC32 (f2FCMPHSZ_S FPR32Op:$rs1))>;
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETGE)), FPR32Op:$rx, FPR32Op:$false),
+            (f2FSEL_S (f2FCMPHSZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETLT)), bb:$imm16),
+            (BT32 (f2FCMPLTZ_S FPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETLT)),
+            (MVC32 (f2FCMPLTZ_S FPR32Op:$rs1))>;
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETLT)), FPR32Op:$rx, FPR32Op:$false),
+            (f2FSEL_S (f2FCMPLTZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETLE)), bb:$imm16),
+            (BT32 (f2FCMPLSZ_S FPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETLE)),
+            (MVC32 (f2FCMPLSZ_S FPR32Op:$rs1))>;
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETLE)), FPR32Op:$rx, FPR32Op:$false),
+            (f2FSEL_S (f2FCMPLSZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETGT)), bb:$imm16),
+            (BT32 (f2FCMPHZ_S FPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETGT)),
+            (MVC32 (f2FCMPHZ_S FPR32Op:$rs1))>;
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETGT)), FPR32Op:$rx, FPR32Op:$false),
+            (f2FSEL_S (f2FCMPHZ_S FPR32Op:$rs1), FPR32Op:$rx, FPR32Op:$false)>;
+
+
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm, SETO)), bb:$imm16),
+            (BF32 (f2FCMPUOZ_S FPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm, SETO)),
+            (MVCV32 (f2FCMPUOZ_S FPR32Op:$rs1))>;
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm, SETO)), FPR32Op:$rx, FPR32Op:$false),
+            (f2FSEL_S (f2FCMPUOZ_S FPR32Op:$rs1), FPR32Op:$false, FPR32Op:$rx)>;
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETOEQ)), bb:$imm16),
+            (BF32 (f2FCMPNEZ_S FPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETOEQ)),
+            (MVCV32 (f2FCMPNEZ_S FPR32Op:$rs1))>;
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETOEQ)), FPR32Op:$rx, FPR32Op:$false),
+            (f2FSEL_S (f2FCMPNEZ_S FPR32Op:$rs1), FPR32Op:$false, FPR32Op:$rx)>;
+  def : Pat<(brcond (i32 (setcc FPR32Op:$rs1, fpimm0, SETEQ)), bb:$imm16),
+            (BF32 (f2FCMPNEZ_S FPR32Op:$rs1), bb:$imm16)>;
+  def : Pat<(i32 (setcc FPR32Op:$rs1, fpimm0, SETEQ)),
+            (MVCV32 (f2FCMPNEZ_S FPR32Op:$rs1))>;
+  def : Pat<(select (i32 (setcc FPR32Op:$rs1, fpimm0, SETEQ)), FPR32Op:$rx, FPR32Op:$false),
+            (f2FSEL_S (f2FCMPNEZ_S FPR32Op:$rs1), FPR32Op:$false, FPR32Op:$rx)>;
+}
+
+
+let Predicates = [HasFPUv3_SF] in
+def : Pat<(select CARRY:$ca, FPR32Op:$rx, FPR32Op:$false),
+          (f2FSEL_S CARRY:$ca, FPR32Op:$rx, FPR32Op:$false)>;
+let Predicates = [HasFPUv3_DF] in
+def : Pat<(select CARRY:$ca, FPR64Op:$rx, FPR64Op:$false),
+          (f2FSEL_D CARRY:$ca, FPR64Op:$rx, FPR64Op:$false)>;
\ No newline at end of file
diff --git a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
index ade5c7f795af2..b7f4fc17166b9 100644
--- a/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
+++ b/llvm/lib/Target/CSKY/CSKYRegisterInfo.td
@@ -194,6 +194,8 @@ def FPR64 : RegisterClass<"CSKY", [f64], 64,
 def sFPR64 : RegisterClass<"CSKY", [f64], 64,
                          (add (sequence "F%u_64", 0, 15))>;
 
+def sFPR64_V : RegisterClass<"CSKY", [v2f32], 32, (add sFPR64)>;
+
 def FPR128 : RegisterClass<"CSKY",
              [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128,
              (add (sequence "F%u_128", 0, 31))>;
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
index 7001de999a510..07757f03c258b 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.cpp
@@ -73,6 +73,13 @@ void CSKYInstPrinter::printRegName(raw_ostream &O, unsigned RegNo) const {
   O << getRegisterName(RegNo);
 }
 
+void CSKYInstPrinter::printFPRRegName(raw_ostream &O, unsigned RegNo) const {
+  if (PrintBranchImmAsAddress)
+    O << getRegisterName(RegNo, CSKY::NoRegAltName);
+  else
+    O << getRegisterName(RegNo);
+}
+
 void CSKYInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                    const MCSubtargetInfo &STI, raw_ostream &O,
                                    const char *Modifier) {
@@ -201,3 +208,11 @@ const char *CSKYInstPrinter::getRegisterName(unsigned RegNo) {
   return getRegisterName(RegNo, ArchRegNames ? CSKY::NoRegAltName
                                              : CSKY::ABIRegAltName);
 }
+
+void CSKYInstPrinter::printFPR(const MCInst *MI, unsigned OpNo,
+                               const MCSubtargetInfo &STI, raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNo);
+  assert(MO.isReg());
+
+  printFPRRegName(O, MO.getReg());
+}
diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
index f93a342ec6a39..52a1b92767625 100644
--- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
+++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYInstPrinter.h
@@ -36,6 +36,8 @@ class CSKYInstPrinter : public MCInstPrinter {
   void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                     raw_ostream &O, const char *Modifier = nullptr);
 
+  void printFPRRegName(raw_ostream &O, unsigned RegNo) const;
+
   // Autogenerated by tblgen.
   std::pair<const char *, uint64_t> getMnemonic(const MCInst *MI) override;
   void printInstruction(const MCInst *MI, uint64_t Address,
@@ -60,6 +62,8 @@ class CSKYInstPrinter : public MCInstPrinter {
                               const MCSubtargetInfo &STI, raw_ostream &O);
   void printSPAddr(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
                    raw_ostream &O);
+  void printFPR(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
   static const char *getRegisterName(unsigned RegNo, unsigned AltIdx);
 };
diff --git a/llvm/test/CodeGen/CSKY/br.ll b/llvm/test/CodeGen/CSKY/br.ll
index af596aebc3005..35f9a4febbd0b 100644
--- a/llvm/test/CodeGen/CSKY/br.ll
+++ b/llvm/test/CodeGen/CSKY/br.ll
@@ -3015,7 +3015,8 @@ define i1 @brRI_i1_eq(i1 %x) {
 ; CHECK-LABEL: brRI_i1_eq:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andi32 a0, a0, 1
-; CHECK-NEXT:    bez32 a0, .LBB117_2
+; CHECK-NEXT:    btsti32 a0, 0
+; CHECK-NEXT:    bf32 .LBB117_2
 ; CHECK-NEXT:  # %bb.1: # %label2
 ; CHECK-NEXT:    movi16 a0, 0
 ; CHECK-NEXT:    rts16
@@ -3035,7 +3036,8 @@ define i1 @brR0_i1_eq(i1 %x) {
 ; CHECK-LABEL: brR0_i1_eq:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andi32 a0, a0, 1
-; CHECK-NEXT:    bez32 a0, .LBB118_2
+; CHECK-NEXT:    btsti32 a0, 0
+; CHECK-NEXT:    bf32 .LBB118_2
 ; CHECK-NEXT:  # %bb.1: # %label2
 ; CHECK-NEXT:    movi16 a0, 0
 ; CHECK-NEXT:    rts16
@@ -3309,7 +3311,8 @@ define i1 @brRI_i1_ule(i1 %x) {
 ; CHECK-LABEL: brRI_i1_ule:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andi32 a0, a0, 1
-; CHECK-NEXT:    bnez32 a0, .LBB130_2
+; CHECK-NEXT:    btsti32 a0, 0
+; CHECK-NEXT:    bt32 .LBB130_2
 ; CHECK-NEXT:  # %bb.1: # %label1
 ; CHECK-NEXT:    movi16 a0, 1
 ; CHECK-NEXT:    rts16
@@ -3331,7 +3334,8 @@ define i1 @brR0_i1_ule(i1 %x) {
 ; CHECK-LABEL: brR0_i1_ule:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andi32 a0, a0, 1
-; CHECK-NEXT:    bnez32 a0, .LBB131_2
+; CHECK-NEXT:    btsti32 a0, 0
+; CHECK-NEXT:    bt32 .LBB131_2
 ; CHECK-NEXT:  # %bb.1: # %label1
 ; CHECK-NEXT:    movi16 a0, 1
 ; CHECK-NEXT:    rts16
@@ -3449,7 +3453,8 @@ define i1 @brRI_i1_sge(i1 %x) {
 ; CHECK-LABEL: brRI_i1_sge:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andi32 a0, a0, 1
-; CHECK-NEXT:    bnez32 a0, .LBB136_2
+; CHECK-NEXT:    btsti32 a0, 0
+; CHECK-NEXT:    bt32 .LBB136_2
 ; CHECK-NEXT:  # %bb.1: # %label1
 ; CHECK-NEXT:    movi16 a0, 1
 ; CHECK-NEXT:    rts16
@@ -3471,7 +3476,8 @@ define i1 @brR0_i1_sge(i1 %x) {
 ; CHECK-LABEL: brR0_i1_sge:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andi32 a0, a0, 1
-; CHECK-NEXT:    bnez32 a0, .LBB137_2
+; CHECK-NEXT:    btsti32 a0, 0
+; CHECK-NEXT:    bt32 .LBB137_2
 ; CHECK-NEXT:  # %bb.1: # %label1
 ; CHECK-NEXT:    movi16 a0, 1
 ; CHECK-NEXT:    rts16
diff --git a/llvm/test/CodeGen/CSKY/fpu/base-d.ll b/llvm/test/CodeGen/CSKY/fpu/base-d.ll
new file mode 100644
index 0000000000000..d2d434631f62c
--- /dev/null
+++ b/llvm/test/CodeGen/CSKY/fpu/base-d.ll
@@ -0,0 +1,234 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv2_sf -mattr=+fpuv2_df | FileCheck %s --check-prefix=CHECK-DF
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv3_sf -mattr=+fpuv3_df | FileCheck %s --check-prefix=CHECK-DF2
+
+define double @FADD_DOUBLE(double %x, double %y) {
+; CHECK-DF-LABEL: FADD_DOUBLE:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    faddd vr0, vr1, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FADD_DOUBLE:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fadd.64 vr0, vr1, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fadd = fadd  double %y, %x
+  ret double %fadd
+}
+
+define double @FADD_DOUBLE_I(double %x) {
+; CHECK-DF-LABEL: FADD_DOUBLE_I:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI1_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    faddd vr0, vr0, vr1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI1_0:
+; CHECK-DF-NEXT:    .quad 0xbff0000000000000 # double -1
+;
+; CHECK-DF2-LABEL: FADD_DOUBLE_I:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI1_0]
+; CHECK-DF2-NEXT:    fadd.64 vr0, vr0, vr1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI1_0:
+; CHECK-DF2-NEXT:    .quad 0xbff0000000000000 # double -1
+entry:
+  %fadd = fadd  double %x, -1.0
+  ret double %fadd
+}
+
+define double @FSUB_DOUBLE(double %x, double %y) {
+; CHECK-DF-LABEL: FSUB_DOUBLE:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fsubd vr0, vr1, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FSUB_DOUBLE:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fsub.64 vr0, vr1, vr0
+; CHECK-DF2-NEXT:    rts16
+
+entry:
+  %fsub = fsub  double %y, %x
+  ret double %fsub
+}
+
+define double @FSUB_DOUBLE_I(double %x) {
+;
+; CHECK-DF-LABEL: FSUB_DOUBLE_I:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI3_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    faddd vr0, vr0, vr1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI3_0:
+; CHECK-DF-NEXT:    .quad 0x3ff0000000000000 # double 1
+;
+; CHECK-DF2-LABEL: FSUB_DOUBLE_I:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI3_0]
+; CHECK-DF2-NEXT:    fadd.64 vr0, vr0, vr1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI3_0:
+; CHECK-DF2-NEXT:    .quad 0x3ff0000000000000 # double 1
+
+entry:
+  %fsub = fsub  double %x, -1.0
+  ret double %fsub
+}
+
+define double @FMUL_DOUBLE(double %x, double %y) {
+;
+; CHECK-DF-LABEL: FMUL_DOUBLE:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fmuld vr0, vr1, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FMUL_DOUBLE:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fmul.64 vr0, vr1, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fmul = fmul  double %y, %x
+  ret double %fmul
+}
+
+define double @FMUL_DOUBLE_I(double %x) {
+;
+; CHECK-DF-LABEL: FMUL_DOUBLE_I:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI5_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fmuld vr0, vr0, vr1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI5_0:
+; CHECK-DF-NEXT:    .quad 0xc01c000000000000 # double -7
+;
+; CHECK-DF2-LABEL: FMUL_DOUBLE_I:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI5_0]
+; CHECK-DF2-NEXT:    fmul.64 vr0, vr0, vr1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI5_0:
+; CHECK-DF2-NEXT:    .quad 0xc01c000000000000 # double -7
+entry:
+  %fmul = fmul  double %x, -7.0
+  ret double %fmul
+}
+
+define double @FDIV_DOUBLE(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FDIV_DOUBLE:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fdivd vr0, vr1, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FDIV_DOUBLE:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fdiv.64 vr0, vr1, vr0
+; CHECK-DF2-NEXT:    rts16
+
+entry:
+  %fdiv = fdiv  double %y, %x
+  ret double %fdiv
+}
+
+define double @FDIV_DOUBLE_I(double %x) {
+;
+; CHECK-DF-LABEL: FDIV_DOUBLE_I:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI7_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fdivd vr0, vr0, vr1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI7_0:
+; CHECK-DF-NEXT:    .quad 0xc01c000000000000 # double -7
+;
+; CHECK-DF2-LABEL: FDIV_DOUBLE_I:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI7_0]
+; CHECK-DF2-NEXT:    fdiv.64 vr0, vr0, vr1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI7_0:
+; CHECK-DF2-NEXT:    .quad 0xc01c000000000000 # double -7
+entry:
+  %fdiv = fdiv  double %x, -7.0
+  ret double %fdiv
+}
+
+define double @FNEG_DOUBLE(double %x) {
+;
+; CHECK-DF-LABEL: FNEG_DOUBLE:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fnegd vr0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FNEG_DOUBLE:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fneg.64 vr0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fneg = fneg  double  %x
+  ret double %fneg
+}
+
+; double --> float
+define float @fptruncR_double_0(double %x) {
+;
+; CHECK-DF-LABEL: fptruncR_double_0:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fdtos vr0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: fptruncR_double_0:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fdtos vr0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fptrunc = fptrunc double %x to float
+  ret float %fptrunc
+}
+
+define double @fpextR_double_0(float %x) {
+;
+; CHECK-DF-LABEL: fpextR_double_0:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fstod vr0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: fpextR_double_0:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fstod vr0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fpext = fpext float %x to double
+  ret double %fpext
+}
diff --git a/llvm/test/CodeGen/CSKY/fpu/base-f.ll b/llvm/test/CodeGen/CSKY/fpu/base-f.ll
new file mode 100644
index 0000000000000..cd43cc451716a
--- /dev/null
+++ b/llvm/test/CodeGen/CSKY/fpu/base-f.ll
@@ -0,0 +1,269 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv2_sf | FileCheck %s --check-prefix=CHECK-SF
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv3_sf | FileCheck %s --check-prefix=CHECK-SF2
+
+
+define float @faddRR(float %x, float %y) {
+;
+; CHECK-SF-LABEL: faddRR:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fadds vr0, vr1, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: faddRR:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fadd.32 vr0, vr1, vr0
+; CHECK-SF2-NEXT:    rts16
+
+entry:
+  %fadd = fadd  float %y, %x
+  ret float %fadd
+}
+
+define float @faddRI(float %x) {
+;
+; CHECK-SF-LABEL: faddRI:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fadds vr0, vr0, vr1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: faddRI:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fadd.32 vr0, vr0, vr1
+; CHECK-SF2-NEXT:    rts16
+
+entry:
+  %fadd = fadd  float %x, 10.0
+  ret float %fadd
+}
+
+define float @faddRI_X(float %x) {
+;
+; CHECK-SF-LABEL: faddRI_X:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 17792
+; CHECK-SF-NEXT:    ori32 a0, a0, 2048
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fadds vr0, vr0, vr1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: faddRI_X:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 17792
+; CHECK-SF2-NEXT:    ori32 a0, a0, 2048
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fadd.32 vr0, vr0, vr1
+; CHECK-SF2-NEXT:    rts16
+
+entry:
+  %fadd = fadd  float %x, 4097.0
+  ret float %fadd
+}
+
+define float @fsubRR(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fsubRR:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fsubs vr0, vr1, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fsubRR:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fsub.32 vr0, vr1, vr0
+; CHECK-SF2-NEXT:    rts16
+
+
+entry:
+  %fsub = fsub  float %y, %x
+  ret float %fsub
+}
+
+define float @fsubRI(float %x) {
+;
+; CHECK-SF-LABEL: fsubRI:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 49440
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fadds vr0, vr0, vr1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fsubRI:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 49440
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fadd.32 vr0, vr0, vr1
+; CHECK-SF2-NEXT:    rts16
+
+entry:
+  %fsub = fsub  float %x, 10.0
+  ret float %fsub
+}
+
+define float @fsubRI_X(float %x) {
+;
+; CHECK-SF-LABEL: fsubRI_X:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 50560
+; CHECK-SF-NEXT:    ori32 a0, a0, 2048
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fadds vr0, vr0, vr1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fsubRI_X:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 50560
+; CHECK-SF2-NEXT:    ori32 a0, a0, 2048
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fadd.32 vr0, vr0, vr1
+; CHECK-SF2-NEXT:    rts16
+
+entry:
+  %fsub = fsub  float %x, 4097.0
+  ret float %fsub
+}
+
+define float @fmulRR(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fmulRR:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fmuls vr0, vr1, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fmulRR:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fmul.32 vr0, vr1, vr0
+; CHECK-SF2-NEXT:    rts16
+
+entry:
+  %fmul = fmul  float %y, %x
+  ret float %fmul
+}
+
+define float @fmulRI(float %x) {
+;
+; CHECK-SF-LABEL: fmulRI:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fmuls vr0, vr0, vr1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fmulRI:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fmul.32 vr0, vr0, vr1
+; CHECK-SF2-NEXT:    rts16
+
+entry:
+  %fmul = fmul  float %x, 10.0
+  ret float %fmul
+}
+
+define float @fmulRI_X(float %x) {
+;
+; CHECK-SF-LABEL: fmulRI_X:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 17792
+; CHECK-SF-NEXT:    ori32 a0, a0, 2048
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fmuls vr0, vr0, vr1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fmulRI_X:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 17792
+; CHECK-SF2-NEXT:    ori32 a0, a0, 2048
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fmul.32 vr0, vr0, vr1
+; CHECK-SF2-NEXT:    rts16
+
+entry:
+  %fmul = fmul  float %x, 4097.0
+  ret float %fmul
+}
+
+define float @fdivRR(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fdivRR:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fdivs vr0, vr1, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fdivRR:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fdiv.32 vr0, vr1, vr0
+; CHECK-SF2-NEXT:    rts16
+
+
+entry:
+  %fdiv = fdiv  float %y, %x
+  ret float %fdiv
+}
+
+define float @fdivRI(float %x) {
+;
+; CHECK-SF-LABEL: fdivRI:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fdivs vr0, vr0, vr1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fdivRI:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fdiv.32 vr0, vr0, vr1
+; CHECK-SF2-NEXT:    rts16
+
+
+entry:
+  %fdiv = fdiv  float %x, 10.0
+  ret float %fdiv
+}
+
+define float @fdivRI_X(float %x) {
+;
+; CHECK-SF-LABEL: fdivRI_X:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 17792
+; CHECK-SF-NEXT:    ori32 a0, a0, 2048
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fdivs vr0, vr0, vr1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fdivRI_X:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 17792
+; CHECK-SF2-NEXT:    ori32 a0, a0, 2048
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fdiv.32 vr0, vr0, vr1
+; CHECK-SF2-NEXT:    rts16
+
+entry:
+  %fdiv = fdiv  float %x, 4097.0
+  ret float %fdiv
+}
+
+define float @fnegRR(float %x) {
+;
+; CHECK-SF-LABEL: fnegRR:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fnegs vr0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fnegRR:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fneg.32 vr0, vr0
+; CHECK-SF2-NEXT:    rts16
+
+entry:
+  %fneg = fneg  float %x
+  ret float %fneg
+}
diff --git a/llvm/test/CodeGen/CSKY/fpu/br-d.ll b/llvm/test/CodeGen/CSKY/fpu/br-d.ll
new file mode 100644
index 0000000000000..9e7d804c0f642
--- /dev/null
+++ b/llvm/test/CodeGen/CSKY/fpu/br-d.ll
@@ -0,0 +1,2061 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv2_sf -mattr=+fpuv2_df | FileCheck %s --check-prefix=CHECK-DF
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv3_sf -mattr=+fpuv3_df | FileCheck %s --check-prefix=CHECK-DF2
+
+;OEQ
+define i32 @brRR_oeq(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_oeq:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpned vr1, vr0
+; CHECK-DF-NEXT:    bt32 .LBB0_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB0_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_oeq:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpne.64 vr1, vr0
+; CHECK-DF2-NEXT:    bt32 .LBB0_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB0_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oeq double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_oeq(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_oeq:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI1_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpned vr0, vr1
+; CHECK-DF-NEXT:    bt32 .LBB1_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB1_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI1_0:
+; CHECK-DF-NEXT:    .quad 0x4024000000000000 # double 10
+;
+; CHECK-DF2-LABEL: brRI_oeq:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI1_0]
+; CHECK-DF2-NEXT:    fcmpne.64 vr0, vr1
+; CHECK-DF2-NEXT:    bt32 .LBB1_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB1_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI1_0:
+; CHECK-DF2-NEXT:    .quad 0x4024000000000000 # double 10
+entry:
+  %fcmp = fcmp oeq double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_oeq(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_oeq:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI2_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpned vr0, vr1
+; CHECK-DF-NEXT:    bt32 .LBB2_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB2_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI2_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: brR0_oeq:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI2_0]
+; CHECK-DF2-NEXT:    fcmpne.64 vr0, vr1
+; CHECK-DF2-NEXT:    bt32 .LBB2_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB2_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI2_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp oeq double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;NE
+define i32 @brRR_one(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_one:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    fcmpned vr1, vr0
+; CHECK-DF-NEXT:    mvcv16 a1
+; CHECK-DF-NEXT:    or16 a0, a1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB3_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB3_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_one:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    fcmpne.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvcv16 a1
+; CHECK-DF2-NEXT:    or16 a0, a1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB3_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB3_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp one double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_one(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_one:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI4_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpuod vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    fcmpned vr0, vr1
+; CHECK-DF-NEXT:    mvcv16 a1
+; CHECK-DF-NEXT:    or16 a0, a1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB4_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB4_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI4_0:
+; CHECK-DF-NEXT:    .quad 0x4024000000000000 # double 10
+;
+; CHECK-DF2-LABEL: brRI_one:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI4_0]
+; CHECK-DF2-NEXT:    fcmpuo.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    fcmpne.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvcv16 a1
+; CHECK-DF2-NEXT:    or16 a0, a1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB4_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB4_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI4_0:
+; CHECK-DF2-NEXT:    .quad 0x4024000000000000 # double 10
+entry:
+  %fcmp = fcmp one double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_one(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_one:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI5_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpuod vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    fcmpned vr0, vr1
+; CHECK-DF-NEXT:    mvcv16 a1
+; CHECK-DF-NEXT:    or16 a0, a1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB5_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB5_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI5_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: brR0_one:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI5_0]
+; CHECK-DF2-NEXT:    fcmpuo.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    fcmpne.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvcv16 a1
+; CHECK-DF2-NEXT:    or16 a0, a1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB5_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB5_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI5_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp one double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;UGT
+define i32 @brRR_ugt(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_ugt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmphsd vr0, vr1
+; CHECK-DF-NEXT:    bt32 .LBB6_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB6_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_ugt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmphs.64 vr0, vr1
+; CHECK-DF2-NEXT:    bt32 .LBB6_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB6_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ugt double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ugt(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_ugt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI7_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmphsd vr1, vr0
+; CHECK-DF-NEXT:    bt32 .LBB7_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB7_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI7_0:
+; CHECK-DF-NEXT:    .quad 0x4024000000000000 # double 10
+;
+; CHECK-DF2-LABEL: brRI_ugt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI7_0]
+; CHECK-DF2-NEXT:    fcmphs.64 vr1, vr0
+; CHECK-DF2-NEXT:    bt32 .LBB7_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB7_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI7_0:
+; CHECK-DF2-NEXT:    .quad 0x4024000000000000 # double 10
+entry:
+  %fcmp = fcmp ugt double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ugt(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_ugt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI8_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmphsd vr1, vr0
+; CHECK-DF-NEXT:    bt32 .LBB8_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB8_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI8_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: brR0_ugt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI8_0]
+; CHECK-DF2-NEXT:    fcmphs.64 vr1, vr0
+; CHECK-DF2-NEXT:    bt32 .LBB8_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB8_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI8_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp ugt double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;UGE
+define i32 @brRR_uge(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_uge:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpltd vr1, vr0
+; CHECK-DF-NEXT:    bt32 .LBB9_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB9_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_uge:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmplt.64 vr1, vr0
+; CHECK-DF2-NEXT:    bt32 .LBB9_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB9_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uge double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_uge(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_uge:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI10_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpltd vr0, vr1
+; CHECK-DF-NEXT:    bt32 .LBB10_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB10_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI10_0:
+; CHECK-DF-NEXT:    .quad 0x4024000000000000 # double 10
+;
+; CHECK-DF2-LABEL: brRI_uge:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI10_0]
+; CHECK-DF2-NEXT:    fcmplt.64 vr0, vr1
+; CHECK-DF2-NEXT:    bt32 .LBB10_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB10_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI10_0:
+; CHECK-DF2-NEXT:    .quad 0x4024000000000000 # double 10
+entry:
+  %fcmp = fcmp uge double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_uge(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_uge:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI11_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpltd vr0, vr1
+; CHECK-DF-NEXT:    bt32 .LBB11_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB11_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI11_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: brR0_uge:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI11_0]
+; CHECK-DF2-NEXT:    fcmplt.64 vr0, vr1
+; CHECK-DF2-NEXT:    bt32 .LBB11_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB11_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI11_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp uge double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;ULT
+define i32 @brRR_ult(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_ult:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmphsd vr1, vr0
+; CHECK-DF-NEXT:    bt32 .LBB12_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB12_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_ult:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmphs.64 vr1, vr0
+; CHECK-DF2-NEXT:    bt32 .LBB12_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB12_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ult double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ult(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_ult:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI13_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmphsd vr0, vr1
+; CHECK-DF-NEXT:    bt32 .LBB13_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB13_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI13_0:
+; CHECK-DF-NEXT:    .quad 0x4024000000000000 # double 10
+;
+; CHECK-DF2-LABEL: brRI_ult:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI13_0]
+; CHECK-DF2-NEXT:    fcmphs.64 vr0, vr1
+; CHECK-DF2-NEXT:    bt32 .LBB13_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB13_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI13_0:
+; CHECK-DF2-NEXT:    .quad 0x4024000000000000 # double 10
+entry:
+  %fcmp = fcmp ult double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ult(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_ult:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI14_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmphsd vr0, vr1
+; CHECK-DF-NEXT:    bt32 .LBB14_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB14_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI14_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: brR0_ult:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI14_0]
+; CHECK-DF2-NEXT:    fcmphs.64 vr0, vr1
+; CHECK-DF2-NEXT:    bt32 .LBB14_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB14_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI14_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp ult double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;ULE
+define i32 @brRR_ule(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_ule:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpltd vr0, vr1
+; CHECK-DF-NEXT:    bt32 .LBB15_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB15_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_ule:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmplt.64 vr0, vr1
+; CHECK-DF2-NEXT:    bt32 .LBB15_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB15_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ule double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ule(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_ule:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI16_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpltd vr1, vr0
+; CHECK-DF-NEXT:    bt32 .LBB16_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB16_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI16_0:
+; CHECK-DF-NEXT:    .quad 0x4024000000000000 # double 10
+;
+; CHECK-DF2-LABEL: brRI_ule:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI16_0]
+; CHECK-DF2-NEXT:    fcmplt.64 vr1, vr0
+; CHECK-DF2-NEXT:    bt32 .LBB16_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB16_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI16_0:
+; CHECK-DF2-NEXT:    .quad 0x4024000000000000 # double 10
+entry:
+  %fcmp = fcmp ule double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ule(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_ule:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI17_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpltd vr1, vr0
+; CHECK-DF-NEXT:    bt32 .LBB17_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB17_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI17_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: brR0_ule:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI17_0]
+; CHECK-DF2-NEXT:    fcmplt.64 vr1, vr0
+; CHECK-DF2-NEXT:    bt32 .LBB17_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB17_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI17_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp ule double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;SGT
+define i32 @brRR_ogt(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_ogt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpltd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB18_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB18_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_ogt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmplt.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB18_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB18_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ogt double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ogt(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_ogt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI19_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpltd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB19_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB19_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI19_0:
+; CHECK-DF-NEXT:    .quad 0x4024000000000000 # double 10
+;
+; CHECK-DF2-LABEL: brRI_ogt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI19_0]
+; CHECK-DF2-NEXT:    fcmplt.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB19_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB19_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI19_0:
+; CHECK-DF2-NEXT:    .quad 0x4024000000000000 # double 10
+entry:
+  %fcmp = fcmp ogt double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ogt(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_ogt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI20_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpltd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB20_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB20_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI20_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: brR0_ogt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI20_0]
+; CHECK-DF2-NEXT:    fcmplt.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB20_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB20_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI20_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp ogt double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;SGE
+define i32 @brRR_oge(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_oge:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmphsd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB21_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB21_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_oge:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmphs.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB21_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB21_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oge double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_oge(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_oge:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI22_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmphsd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB22_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB22_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI22_0:
+; CHECK-DF-NEXT:    .quad 0x4024000000000000 # double 10
+;
+; CHECK-DF2-LABEL: brRI_oge:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI22_0]
+; CHECK-DF2-NEXT:    fcmphs.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB22_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB22_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI22_0:
+; CHECK-DF2-NEXT:    .quad 0x4024000000000000 # double 10
+entry:
+  %fcmp = fcmp oge double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_oge(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_oge:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI23_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmphsd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB23_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB23_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI23_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: brR0_oge:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI23_0]
+; CHECK-DF2-NEXT:    fcmphs.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB23_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB23_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI23_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp oge double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;SLT
+define i32 @brRR_olt(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_olt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpltd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB24_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB24_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_olt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmplt.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB24_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB24_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp olt double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_olt(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_olt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI25_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpltd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB25_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB25_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI25_0:
+; CHECK-DF-NEXT:    .quad 0x4024000000000000 # double 10
+;
+; CHECK-DF2-LABEL: brRI_olt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI25_0]
+; CHECK-DF2-NEXT:    fcmplt.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB25_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB25_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI25_0:
+; CHECK-DF2-NEXT:    .quad 0x4024000000000000 # double 10
+entry:
+  %fcmp = fcmp olt double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_olt(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_olt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI26_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpltd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB26_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB26_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI26_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: brR0_olt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI26_0]
+; CHECK-DF2-NEXT:    fcmplt.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB26_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB26_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI26_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp olt double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;SLE
+define i32 @brRR_ole(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_ole:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmphsd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB27_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB27_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_ole:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmphs.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB27_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB27_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ole double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ole(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_ole:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI28_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmphsd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB28_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB28_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI28_0:
+; CHECK-DF-NEXT:    .quad 0x4024000000000000 # double 10
+;
+; CHECK-DF2-LABEL: brRI_ole:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI28_0]
+; CHECK-DF2-NEXT:    fcmphs.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB28_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB28_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI28_0:
+; CHECK-DF2-NEXT:    .quad 0x4024000000000000 # double 10
+entry:
+  %fcmp = fcmp ole double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ole(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_ole:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI29_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmphsd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB29_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB29_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI29_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: brR0_ole:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI29_0]
+; CHECK-DF2-NEXT:    fcmphs.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB29_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB29_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI29_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp ole double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;False
+define i32 @brRR_false(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_false:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB30_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB30_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_false:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB30_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB30_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp false double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_false(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_false:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB31_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB31_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRI_false:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB31_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB31_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp false double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_false(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_false:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB32_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB32_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brR0_false:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB32_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB32_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp false double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+
+;ORD
+define i32 @brRR_ord(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_ord:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr1, vr0
+; CHECK-DF-NEXT:    bt32 .LBB33_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB33_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_ord:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr1, vr0
+; CHECK-DF2-NEXT:    bt32 .LBB33_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB33_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ord double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ord(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_ord:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr0, vr0
+; CHECK-DF-NEXT:    bt32 .LBB34_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB34_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRI_ord:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr0, vr0
+; CHECK-DF2-NEXT:    bt32 .LBB34_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB34_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ord double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ord(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_ord:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr0, vr0
+; CHECK-DF-NEXT:    bt32 .LBB35_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB35_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brR0_ord:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr0, vr0
+; CHECK-DF2-NEXT:    bt32 .LBB35_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB35_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ord double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+
+;UEQ
+define i32 @brRR_ueq(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_ueq:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr1, vr0
+; CHECK-DF-NEXT:    mvcv16 a0
+; CHECK-DF-NEXT:    fcmpned vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a1
+; CHECK-DF-NEXT:    and16 a0, a1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bf32 .LBB36_2
+; CHECK-DF-NEXT:  # %bb.1: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB36_2: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_ueq:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvcv16 a0
+; CHECK-DF2-NEXT:    fcmpne.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a1
+; CHECK-DF2-NEXT:    and16 a0, a1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bf32 .LBB36_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB36_2: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ueq double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ueq(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_ueq:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI37_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpuod vr0, vr1
+; CHECK-DF-NEXT:    mvcv16 a0
+; CHECK-DF-NEXT:    fcmpned vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a1
+; CHECK-DF-NEXT:    and16 a0, a1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bf32 .LBB37_2
+; CHECK-DF-NEXT:  # %bb.1: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB37_2: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI37_0:
+; CHECK-DF-NEXT:    .quad 0x4024000000000000 # double 10
+;
+; CHECK-DF2-LABEL: brRI_ueq:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI37_0]
+; CHECK-DF2-NEXT:    fcmpuo.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvcv16 a0
+; CHECK-DF2-NEXT:    fcmpne.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a1
+; CHECK-DF2-NEXT:    and16 a0, a1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bf32 .LBB37_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB37_2: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI37_0:
+; CHECK-DF2-NEXT:    .quad 0x4024000000000000 # double 10
+entry:
+  %fcmp = fcmp ueq double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ueq(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_ueq:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI38_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpuod vr0, vr1
+; CHECK-DF-NEXT:    mvcv16 a0
+; CHECK-DF-NEXT:    fcmpned vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a1
+; CHECK-DF-NEXT:    and16 a0, a1
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bf32 .LBB38_2
+; CHECK-DF-NEXT:  # %bb.1: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB38_2: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI38_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: brR0_ueq:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI38_0]
+; CHECK-DF2-NEXT:    fcmpuo.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvcv16 a0
+; CHECK-DF2-NEXT:    fcmpne.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a1
+; CHECK-DF2-NEXT:    and16 a0, a1
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bf32 .LBB38_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB38_2: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI38_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp ueq double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;UNE
+define i32 @brRR_une(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_une:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpned vr1, vr0
+; CHECK-DF-NEXT:    bf32 .LBB39_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB39_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_une:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpne.64 vr1, vr0
+; CHECK-DF2-NEXT:    bf32 .LBB39_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB39_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp une double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_une(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_une:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI40_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpned vr0, vr1
+; CHECK-DF-NEXT:    bf32 .LBB40_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB40_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI40_0:
+; CHECK-DF-NEXT:    .quad 0x4024000000000000 # double 10
+;
+; CHECK-DF2-LABEL: brRI_une:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI40_0]
+; CHECK-DF2-NEXT:    fcmpne.64 vr0, vr1
+; CHECK-DF2-NEXT:    bf32 .LBB40_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB40_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI40_0:
+; CHECK-DF2-NEXT:    .quad 0x4024000000000000 # double 10
+entry:
+  %fcmp = fcmp une double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_une(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_une:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI41_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpned vr0, vr1
+; CHECK-DF-NEXT:    bf32 .LBB41_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB41_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.3:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI41_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: brR0_une:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI41_0]
+; CHECK-DF2-NEXT:    fcmpne.64 vr0, vr1
+; CHECK-DF2-NEXT:    bf32 .LBB41_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB41_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.3:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI41_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp une double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;UNO
+define i32 @brRR_uno(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_uno:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr1, vr0
+; CHECK-DF-NEXT:    bt32 .LBB42_2
+; CHECK-DF-NEXT:  # %bb.1: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB42_2: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_uno:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr1, vr0
+; CHECK-DF2-NEXT:    bt32 .LBB42_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB42_2: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uno double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_uno(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_uno:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr0, vr0
+; CHECK-DF-NEXT:    bt32 .LBB43_2
+; CHECK-DF-NEXT:  # %bb.1: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB43_2: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRI_uno:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr0, vr0
+; CHECK-DF2-NEXT:    bt32 .LBB43_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB43_2: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uno double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_uno(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_uno:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr0, vr0
+; CHECK-DF-NEXT:    bt32 .LBB44_2
+; CHECK-DF-NEXT:  # %bb.1: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB44_2: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brR0_uno:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr0, vr0
+; CHECK-DF2-NEXT:    bt32 .LBB44_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB44_2: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uno double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;True
+define i32 @brRR_true(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: brRR_true:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB45_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB45_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRR_true:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB45_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB45_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp true double %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_true(double %x) {
+;
+;
+; CHECK-DF-LABEL: brRI_true:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB46_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB46_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brRI_true:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB46_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB46_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp true double %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_true(double %x) {
+;
+;
+; CHECK-DF-LABEL: brR0_true:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    btsti32 a0, 0
+; CHECK-DF-NEXT:    bt32 .LBB47_2
+; CHECK-DF-NEXT:  # %bb.1: # %label1
+; CHECK-DF-NEXT:    movi16 a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:  .LBB47_2: # %label2
+; CHECK-DF-NEXT:    movi16 a0, 0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: brR0_true:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    btsti32 a0, 0
+; CHECK-DF2-NEXT:    bt32 .LBB47_2
+; CHECK-DF2-NEXT:  # %bb.1: # %label1
+; CHECK-DF2-NEXT:    movi16 a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:  .LBB47_2: # %label2
+; CHECK-DF2-NEXT:    movi16 a0, 0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp true double %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
diff --git a/llvm/test/CodeGen/CSKY/fpu/br-f.ll b/llvm/test/CodeGen/CSKY/fpu/br-f.ll
new file mode 100644
index 0000000000000..eb13f20187430
--- /dev/null
+++ b/llvm/test/CodeGen/CSKY/fpu/br-f.ll
@@ -0,0 +1,1751 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv2_sf | FileCheck %s --check-prefix=CHECK-SF
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv3_sf | FileCheck %s --check-prefix=CHECK-SF2
+
+;OEQ
+define i32 @brRR_oeq(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_oeq:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpnes vr1, vr0
+; CHECK-SF-NEXT:    bt32 .LBB0_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB0_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_oeq:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpne.32 vr1, vr0
+; CHECK-SF2-NEXT:    bt32 .LBB0_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB0_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oeq float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_oeq(float %x) {
+;
+; CHECK-SF-LABEL: brRI_oeq:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmpnes vr0, vr1
+; CHECK-SF-NEXT:    bt32 .LBB1_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB1_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_oeq:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr0, vr1
+; CHECK-SF2-NEXT:    bt32 .LBB1_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB1_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oeq float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_oeq(float %x) {
+;
+; CHECK-SF-LABEL: brR0_oeq:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpznes vr0
+; CHECK-SF-NEXT:    bt32 .LBB2_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB2_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_oeq:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpnez.32 vr0
+; CHECK-SF2-NEXT:    bt32 .LBB2_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB2_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oeq float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;NE
+define i32 @brRR_one(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_one:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    fcmpnes vr1, vr0
+; CHECK-SF-NEXT:    mvcv16 a1
+; CHECK-SF-NEXT:    or16 a0, a1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB3_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB3_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_one:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvcv16 a1
+; CHECK-SF2-NEXT:    or16 a0, a1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB3_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB3_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp one float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_one(float %x) {
+;
+; CHECK-SF-LABEL: brRI_one:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmpnes vr0, vr1
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    mvc32 a1
+; CHECK-SF-NEXT:    or16 a0, a1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB4_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB4_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_one:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvcv16 a0
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    mvc32 a1
+; CHECK-SF2-NEXT:    or16 a0, a1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB4_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB4_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp one float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_one(float %x) {
+;
+; CHECK-SF-LABEL: brR0_one:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    fcmpznes vr0
+; CHECK-SF-NEXT:    mvcv16 a1
+; CHECK-SF-NEXT:    or16 a0, a1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB5_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB5_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_one:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    fcmpnez.32 vr0
+; CHECK-SF2-NEXT:    mvcv16 a1
+; CHECK-SF2-NEXT:    or16 a0, a1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB5_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB5_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp one float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;UGT
+define i32 @brRR_ugt(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_ugt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmphss vr0, vr1
+; CHECK-SF-NEXT:    bt32 .LBB6_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB6_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_ugt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphs.32 vr0, vr1
+; CHECK-SF2-NEXT:    bt32 .LBB6_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB6_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ugt float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ugt(float %x) {
+;
+; CHECK-SF-LABEL: brRI_ugt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmphss vr1, vr0
+; CHECK-SF-NEXT:    bt32 .LBB7_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB7_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_ugt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmphs.32 vr1, vr0
+; CHECK-SF2-NEXT:    bt32 .LBB7_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB7_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ugt float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ugt(float %x) {
+;
+; CHECK-SF-LABEL: brR0_ugt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzlss vr0
+; CHECK-SF-NEXT:    bt32 .LBB8_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB8_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_ugt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmplsz.32 vr0
+; CHECK-SF2-NEXT:    bt32 .LBB8_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB8_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ugt float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;UGE
+define i32 @brRR_uge(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_uge:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmplts vr1, vr0
+; CHECK-SF-NEXT:    bt32 .LBB9_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB9_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_uge:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmplt.32 vr1, vr0
+; CHECK-SF2-NEXT:    bt32 .LBB9_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB9_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uge float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_uge(float %x) {
+;
+; CHECK-SF-LABEL: brRI_uge:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmplts vr0, vr1
+; CHECK-SF-NEXT:    bt32 .LBB10_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB10_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_uge:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmplt.32 vr0, vr1
+; CHECK-SF2-NEXT:    bt32 .LBB10_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB10_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uge float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_uge(float %x) {
+;
+; CHECK-SF-LABEL: brR0_uge:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzhss vr0
+; CHECK-SF-NEXT:    bf32 .LBB11_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB11_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_uge:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpltz.32 vr0
+; CHECK-SF2-NEXT:    bt32 .LBB11_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB11_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uge float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;ULT
+define i32 @brRR_ult(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_ult:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmphss vr1, vr0
+; CHECK-SF-NEXT:    bt32 .LBB12_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB12_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_ult:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphs.32 vr1, vr0
+; CHECK-SF2-NEXT:    bt32 .LBB12_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB12_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ult float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ult(float %x) {
+;
+; CHECK-SF-LABEL: brRI_ult:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmphss vr0, vr1
+; CHECK-SF-NEXT:    bt32 .LBB13_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB13_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_ult:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmphs.32 vr0, vr1
+; CHECK-SF2-NEXT:    bt32 .LBB13_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB13_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ult float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ult(float %x) {
+;
+; CHECK-SF-LABEL: brR0_ult:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzhss vr0
+; CHECK-SF-NEXT:    bt32 .LBB14_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB14_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_ult:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphsz.32 vr0
+; CHECK-SF2-NEXT:    bt32 .LBB14_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB14_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ult float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;ULE
+define i32 @brRR_ule(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_ule:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmplts vr0, vr1
+; CHECK-SF-NEXT:    bt32 .LBB15_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB15_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_ule:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmplt.32 vr0, vr1
+; CHECK-SF2-NEXT:    bt32 .LBB15_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB15_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ule float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ule(float %x) {
+;
+; CHECK-SF-LABEL: brRI_ule:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmplts vr1, vr0
+; CHECK-SF-NEXT:    bt32 .LBB16_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB16_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_ule:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmplt.32 vr1, vr0
+; CHECK-SF2-NEXT:    bt32 .LBB16_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB16_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ule float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ule(float %x) {
+;
+; CHECK-SF-LABEL: brR0_ule:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzlss vr0
+; CHECK-SF-NEXT:    bf32 .LBB17_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB17_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_ule:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphz.32 vr0
+; CHECK-SF2-NEXT:    bt32 .LBB17_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB17_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ule float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;SGT
+define i32 @brRR_ogt(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_ogt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmplts vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB18_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB18_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_ogt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmplt.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB18_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB18_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ogt float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ogt(float %x) {
+;
+; CHECK-SF-LABEL: brRI_ogt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmplts vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB19_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB19_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_ogt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmplt.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB19_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB19_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ogt float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ogt(float %x) {
+;
+; CHECK-SF-LABEL: brR0_ogt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzlss vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB20_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB20_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_ogt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphz.32 vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB20_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB20_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ogt float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;SGE
+define i32 @brRR_oge(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_oge:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmphss vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB21_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB21_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_oge:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphs.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB21_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB21_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oge float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_oge(float %x) {
+;
+; CHECK-SF-LABEL: brRI_oge:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmphss vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB22_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB22_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_oge:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmphs.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB22_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB22_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oge float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_oge(float %x) {
+;
+; CHECK-SF-LABEL: brR0_oge:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzhss vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB23_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB23_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_oge:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphsz.32 vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB23_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB23_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oge float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;SLT
+define i32 @brRR_olt(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_olt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmplts vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB24_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB24_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_olt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmplt.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB24_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB24_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp olt float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_olt(float %x) {
+;
+; CHECK-SF-LABEL: brRI_olt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmplts vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB25_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB25_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_olt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmplt.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB25_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB25_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp olt float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_olt(float %x) {
+;
+; CHECK-SF-LABEL: brR0_olt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzhss vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB26_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB26_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_olt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpltz.32 vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB26_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB26_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp olt float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;SLE
+define i32 @brRR_ole(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_ole:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmphss vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB27_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB27_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_ole:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphs.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB27_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB27_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ole float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ole(float %x) {
+;
+; CHECK-SF-LABEL: brRI_ole:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmphss vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB28_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB28_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_ole:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmphs.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB28_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB28_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ole float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ole(float %x) {
+;
+; CHECK-SF-LABEL: brR0_ole:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzlss vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB29_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB29_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_ole:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmplsz.32 vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB29_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB29_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ole float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;False
+define i32 @brRR_false(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_false:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB30_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB30_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_false:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB30_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB30_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp false float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_false(float %x) {
+;
+; CHECK-SF-LABEL: brRI_false:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB31_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB31_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_false:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB31_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB31_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp false float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_false(float %x) {
+;
+; CHECK-SF-LABEL: brR0_false:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB32_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB32_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_false:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB32_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB32_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp false float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+
+;ORD
+define i32 @brRR_ord(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_ord:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr1, vr0
+; CHECK-SF-NEXT:    bt32 .LBB33_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB33_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_ord:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr1, vr0
+; CHECK-SF2-NEXT:    bt32 .LBB33_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB33_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ord float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ord(float %x) {
+;
+; CHECK-SF-LABEL: brRI_ord:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    bt32 .LBB34_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB34_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_ord:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    bt32 .LBB34_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB34_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ord float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ord(float %x) {
+;
+; CHECK-SF-LABEL: brR0_ord:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    bt32 .LBB35_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB35_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_ord:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    bt32 .LBB35_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB35_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ord float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+
+;UEQ
+define i32 @brRR_ueq(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_ueq:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr1, vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    fcmpnes vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a1
+; CHECK-SF-NEXT:    and16 a0, a1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bf32 .LBB36_2
+; CHECK-SF-NEXT:  # %bb.1: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB36_2: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_ueq:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvcv16 a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a1
+; CHECK-SF2-NEXT:    and16 a0, a1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bf32 .LBB36_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB36_2: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ueq float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_ueq(float %x) {
+;
+; CHECK-SF-LABEL: brRI_ueq:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmpnes vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    mvcv16 a1
+; CHECK-SF-NEXT:    and16 a0, a1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bf32 .LBB37_2
+; CHECK-SF-NEXT:  # %bb.1: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB37_2: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_ueq:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    mvcv16 a1
+; CHECK-SF2-NEXT:    and16 a0, a1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bf32 .LBB37_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB37_2: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ueq float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_ueq(float %x) {
+;
+; CHECK-SF-LABEL: brR0_ueq:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    fcmpznes vr0
+; CHECK-SF-NEXT:    mvc32 a1
+; CHECK-SF-NEXT:    and16 a0, a1
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bf32 .LBB38_2
+; CHECK-SF-NEXT:  # %bb.1: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB38_2: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_ueq:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    mvcv16 a1
+; CHECK-SF2-NEXT:    and16 a0, a1
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bf32 .LBB38_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB38_2: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ueq float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;UNE
+define i32 @brRR_une(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_une:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpnes vr1, vr0
+; CHECK-SF-NEXT:    bf32 .LBB39_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB39_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_une:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpne.32 vr1, vr0
+; CHECK-SF2-NEXT:    bf32 .LBB39_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB39_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp une float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_une(float %x) {
+;
+; CHECK-SF-LABEL: brRI_une:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 16672
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmpnes vr0, vr1
+; CHECK-SF-NEXT:    bf32 .LBB40_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB40_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_une:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 16672
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr0, vr1
+; CHECK-SF2-NEXT:    bf32 .LBB40_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB40_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp une float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_une(float %x) {
+;
+; CHECK-SF-LABEL: brR0_une:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpznes vr0
+; CHECK-SF-NEXT:    bf32 .LBB41_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB41_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_une:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpnez.32 vr0
+; CHECK-SF2-NEXT:    bf32 .LBB41_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB41_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp une float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;UNO
+define i32 @brRR_uno(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_uno:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr1, vr0
+; CHECK-SF-NEXT:    bt32 .LBB42_2
+; CHECK-SF-NEXT:  # %bb.1: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB42_2: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_uno:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr1, vr0
+; CHECK-SF2-NEXT:    bt32 .LBB42_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB42_2: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uno float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_uno(float %x) {
+;
+; CHECK-SF-LABEL: brRI_uno:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    bt32 .LBB43_2
+; CHECK-SF-NEXT:  # %bb.1: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB43_2: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_uno:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    bt32 .LBB43_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB43_2: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uno float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_uno(float %x) {
+;
+; CHECK-SF-LABEL: brR0_uno:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    bt32 .LBB44_2
+; CHECK-SF-NEXT:  # %bb.1: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB44_2: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_uno:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    bt32 .LBB44_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB44_2: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uno float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+;True
+define i32 @brRR_true(float %x, float %y) {
+;
+; CHECK-SF-LABEL: brRR_true:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB45_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB45_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRR_true:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB45_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB45_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp true float %y, %x
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brRI_true(float %x) {
+;
+; CHECK-SF-LABEL: brRI_true:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB46_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB46_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brRI_true:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB46_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB46_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp true float %x, 10.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
+
+define i32 @brR0_true(float %x) {
+;
+; CHECK-SF-LABEL: brR0_true:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    btsti32 a0, 0
+; CHECK-SF-NEXT:    bt32 .LBB47_2
+; CHECK-SF-NEXT:  # %bb.1: # %label1
+; CHECK-SF-NEXT:    movi16 a0, 1
+; CHECK-SF-NEXT:    rts16
+; CHECK-SF-NEXT:  .LBB47_2: # %label2
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: brR0_true:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    btsti32 a0, 0
+; CHECK-SF2-NEXT:    bt32 .LBB47_2
+; CHECK-SF2-NEXT:  # %bb.1: # %label1
+; CHECK-SF2-NEXT:    movi16 a0, 1
+; CHECK-SF2-NEXT:    rts16
+; CHECK-SF2-NEXT:  .LBB47_2: # %label2
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp true float %x, 0.0
+  br i1 %fcmp, label %label1, label %label2
+label1:
+  ret i32 1
+label2:
+  ret i32 0
+}
diff --git a/llvm/test/CodeGen/CSKY/fpu/cmp-d.ll b/llvm/test/CodeGen/CSKY/fpu/cmp-d.ll
new file mode 100644
index 0000000000000..af9bddda994b4
--- /dev/null
+++ b/llvm/test/CodeGen/CSKY/fpu/cmp-d.ll
@@ -0,0 +1,766 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv2_sf -mattr=+fpuv2_df | FileCheck %s --check-prefix=CHECK-DF
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv3_sf -mattr=+fpuv3_df | FileCheck %s --check-prefix=CHECK-DF2
+
+
+;ueq
+
+define i1 @FCMP_DOUBLE_ueq(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_ueq:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    fcmpned vr1, vr0
+; CHECK-DF-NEXT:    mvcv16 a1
+; CHECK-DF-NEXT:    or16 a0, a1
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_ueq:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    fcmpne.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvcv16 a1
+; CHECK-DF2-NEXT:    or16 a0, a1
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ueq double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_ueq(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_ueq:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI1_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpuod vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    fcmpned vr0, vr1
+; CHECK-DF-NEXT:    mvcv16 a1
+; CHECK-DF-NEXT:    or16 a0, a1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI1_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_ueq:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI1_0]
+; CHECK-DF2-NEXT:    fcmpuo.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    fcmpne.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvcv16 a1
+; CHECK-DF2-NEXT:    or16 a0, a1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI1_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp ueq double %x, 0.0
+  ret i1 %fcmp
+}
+
+;une
+
+define i1 @FCMP_DOUBLE_une(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_une:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpned vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_une:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpne.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp une double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_une(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_une:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI3_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpned vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI3_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_une:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI3_0]
+; CHECK-DF2-NEXT:    fcmpne.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI3_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp une double %x, 0.0
+  ret i1 %fcmp
+}
+
+;ugt
+
+define i1 @FCMP_DOUBLE_ugt(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_ugt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmphsd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_ugt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmphs.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ugt double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_ugt(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_ugt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI5_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmphsd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI5_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_ugt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI5_0]
+; CHECK-DF2-NEXT:    fcmphs.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI5_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp ugt double %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;uge
+
+define i1 @FCMP_DOUBLE_uge(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_uge:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpltd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_uge:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmplt.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uge double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_uge(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_uge:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI7_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpltd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI7_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_uge:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI7_0]
+; CHECK-DF2-NEXT:    fcmplt.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI7_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp uge double %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;ult
+
+define i1 @FCMP_DOUBLE_ult(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_ult:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmphsd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_ult:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmphs.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ult double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_ult(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_ult:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI9_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmphsd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI9_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_ult:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI9_0]
+; CHECK-DF2-NEXT:    fcmphs.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI9_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp ult double %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;ule
+
+define i1 @FCMP_DOUBLE_ule(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_ule:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpltd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_ule:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmplt.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ule double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_ule(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_ule:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI11_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpltd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    xori32 a0, a0, 1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI11_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_ule:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI11_0]
+; CHECK-DF2-NEXT:    fcmplt.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    xori32 a0, a0, 1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI11_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp ule double %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;ogt
+
+define i1 @FCMP_DOUBLE_ogt(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_ogt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpltd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_ogt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmplt.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ogt double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_ogt(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_ogt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI13_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpltd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI13_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_ogt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI13_0]
+; CHECK-DF2-NEXT:    fcmplt.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI13_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp ogt double %x, 0.0
+  ret i1 %fcmp
+}
+
+;oge
+
+define i1 @FCMP_DOUBLE_oge(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_oge:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmphsd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_oge:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmphs.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oge double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_oge(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_oge:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI15_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmphsd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI15_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_oge:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI15_0]
+; CHECK-DF2-NEXT:    fcmphs.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI15_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp oge double %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;olt
+
+define i1 @FCMP_DOUBLE_olt(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_olt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpltd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_olt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmplt.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp olt double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_olt(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_olt:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI17_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpltd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI17_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_olt:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI17_0]
+; CHECK-DF2-NEXT:    fcmplt.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI17_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp olt double %x, 0.0
+  ret i1 %fcmp
+}
+
+;ole
+
+define i1 @FCMP_DOUBLE_ole(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_ole:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmphsd vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_ole:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmphs.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ole double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_ole(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_ole:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI19_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmphsd vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI19_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_ole:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI19_0]
+; CHECK-DF2-NEXT:    fcmphs.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI19_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp ole double %x, 0.0
+  ret i1 %fcmp
+}
+
+;one
+
+define i1 @FCMP_DOUBLE_one(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_one:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr1, vr0
+; CHECK-DF-NEXT:    mvcv16 a0
+; CHECK-DF-NEXT:    fcmpned vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a1
+; CHECK-DF-NEXT:    and16 a0, a1
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_one:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvcv16 a0
+; CHECK-DF2-NEXT:    fcmpne.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a1
+; CHECK-DF2-NEXT:    and16 a0, a1
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp one double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_one(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_one:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI21_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpuod vr0, vr1
+; CHECK-DF-NEXT:    mvcv16 a0
+; CHECK-DF-NEXT:    fcmpned vr0, vr1
+; CHECK-DF-NEXT:    mvc32 a1
+; CHECK-DF-NEXT:    and16 a0, a1
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI21_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_one:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI21_0]
+; CHECK-DF2-NEXT:    fcmpuo.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvcv16 a0
+; CHECK-DF2-NEXT:    fcmpne.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvc32 a1
+; CHECK-DF2-NEXT:    and16 a0, a1
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI21_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp one double %x, 0.0
+  ret i1 %fcmp
+}
+
+;oeq
+
+define i1 @FCMP_DOUBLE_oeq(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_oeq:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpned vr1, vr0
+; CHECK-DF-NEXT:    mvcv16 a0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_oeq:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpne.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvcv16 a0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oeq double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_oeq(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_oeq:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    grs32 a0, .LCPI23_0
+; CHECK-DF-NEXT:    fldd vr1, (a0, 0)
+; CHECK-DF-NEXT:    fcmpned vr0, vr1
+; CHECK-DF-NEXT:    mvcv16 a0
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI23_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_oeq:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    flrw.64 vr1, [.LCPI23_0]
+; CHECK-DF2-NEXT:    fcmpne.64 vr0, vr1
+; CHECK-DF2-NEXT:    mvcv16 a0
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI23_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %fcmp = fcmp oeq double %x, 0.0
+  ret i1 %fcmp
+}
+
+;ord
+
+define i1 @FCMP_DOUBLE_ord(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_ord:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr1, vr0
+; CHECK-DF-NEXT:    mvcv16 a0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_ord:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvcv16 a0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ord double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_ord(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_ord:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr0, vr0
+; CHECK-DF-NEXT:    mvcv16 a0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_ord:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr0, vr0
+; CHECK-DF2-NEXT:    mvcv16 a0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ord double %x, 0.0
+  ret i1 %fcmp
+}
+
+;uno
+
+define i1 @FCMP_DOUBLE_uno(double %x, double %y) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_uno:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr1, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_uno:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr1, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uno double %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @FCMP_DOUBLE_I_uno(double %x) {
+;
+;
+; CHECK-DF-LABEL: FCMP_DOUBLE_I_uno:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fcmpuod vr0, vr0
+; CHECK-DF-NEXT:    mvc32 a0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: FCMP_DOUBLE_I_uno:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fcmpuo.64 vr0, vr0
+; CHECK-DF2-NEXT:    mvc32 a0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uno double %x, 0.0
+  ret i1 %fcmp
+}
diff --git a/llvm/test/CodeGen/CSKY/fpu/cmp-f.ll b/llvm/test/CodeGen/CSKY/fpu/cmp-f.ll
new file mode 100644
index 0000000000000..2bb5534c422ce
--- /dev/null
+++ b/llvm/test/CodeGen/CSKY/fpu/cmp-f.ll
@@ -0,0 +1,896 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv2_sf | FileCheck %s --check-prefix=CHECK-SF
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv3_sf | FileCheck %s --check-prefix=CHECK-SF2
+
+;ueq
+define i1 @fcmpRR_ueq(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_ueq:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    fcmpnes vr1, vr0
+; CHECK-SF-NEXT:    mvcv16 a1
+; CHECK-SF-NEXT:    or16 a0, a1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_ueq:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvcv16 a1
+; CHECK-SF2-NEXT:    or16 a0, a1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ueq float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_ueq(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_ueq:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 49440
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmpnes vr0, vr1
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    mvc32 a1
+; CHECK-SF-NEXT:    or16 a0, a1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_ueq:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 49440
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvcv16 a0
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    mvc32 a1
+; CHECK-SF2-NEXT:    or16 a0, a1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ueq float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_ueq(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_ueq:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    fcmpznes vr0
+; CHECK-SF-NEXT:    mvcv16 a1
+; CHECK-SF-NEXT:    or16 a0, a1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_ueq:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    fcmpnez.32 vr0
+; CHECK-SF2-NEXT:    mvcv16 a1
+; CHECK-SF2-NEXT:    or16 a0, a1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ueq float %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;une
+define i1 @fcmpRR_une(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_une:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpnes vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_une:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpne.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp une float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_une(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_une:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 49440
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmpnes vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_une:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 49440
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp une float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_une(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_une:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpznes vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_une:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpnez.32 vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp une float %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;ugt
+define i1 @fcmpRR_ugt(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_ugt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmphss vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_ugt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphs.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ugt float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_ugt(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_ugt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 49440
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmphss vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_ugt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 49440
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmphs.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ugt float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_ugt(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_ugt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzlss vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_ugt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmplsz.32 vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ugt float %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;uge
+define i1 @fcmpRR_uge(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_uge:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmplts vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_uge:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmplt.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uge float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_uge(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_uge:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 49440
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmplts vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_uge:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 49440
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmplt.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uge float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_uge(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_uge:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzhss vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_uge:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpltz.32 vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uge float %x, 0.0
+  ret i1 %fcmp
+}
+
+;ult
+define i1 @fcmpRR_ult(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_ult:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmphss vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_ult:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphs.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ult float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_ult(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_ult:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 49440
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmphss vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_ult:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 49440
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmphs.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ult float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_ult(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_ult:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzhss vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_ult:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphsz.32 vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ult float %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;ule
+define i1 @fcmpRR_ule(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_ule:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmplts vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_ule:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmplt.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ule float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_ule(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_ule:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 49440
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmplts vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_ule:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 49440
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmplt.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ule float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_ule(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_ule:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzlss vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    xori32 a0, a0, 1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_ule:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphz.32 vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    xori32 a0, a0, 1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ule float %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;ogt
+define i1 @fcmpRR_ogt(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_ogt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmplts vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_ogt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmplt.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ogt float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_ogt(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_ogt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 49440
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmplts vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_ogt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 49440
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmplt.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ogt float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_ogt(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_ogt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzlss vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_ogt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphz.32 vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ogt float %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;oge
+define i1 @fcmpRR_oge(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_oge:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmphss vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_oge:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphs.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oge float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_oge(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_oge:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 49440
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmphss vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_oge:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 49440
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmphs.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oge float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_oge(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_oge:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzhss vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_oge:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphsz.32 vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oge float %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;olt
+define i1 @fcmpRR_olt(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_olt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmplts vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_olt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmplt.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp olt float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_olt(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_olt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 49440
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmplts vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_olt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 49440
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmplt.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp olt float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_olt(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_olt:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzhss vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_olt:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpltz.32 vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp olt float %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;ole
+define i1 @fcmpRR_ole(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_ole:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmphss vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_ole:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmphs.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ole float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_ole(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_ole:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 49440
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmphss vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_ole:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 49440
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmphs.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ole float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_ole(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_ole:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpzlss vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_ole:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmplsz.32 vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ole float %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;one
+define i1 @fcmpRR_one(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_one:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr1, vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    fcmpnes vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a1
+; CHECK-SF-NEXT:    and16 a0, a1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_one:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvcv16 a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a1
+; CHECK-SF2-NEXT:    and16 a0, a1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp one float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_one(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_one:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 49440
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmpnes vr0, vr1
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    mvcv16 a1
+; CHECK-SF-NEXT:    and16 a0, a1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_one:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 49440
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    mvcv16 a1
+; CHECK-SF2-NEXT:    and16 a0, a1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp one float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_one(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_one:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    fcmpznes vr0
+; CHECK-SF-NEXT:    mvc32 a1
+; CHECK-SF-NEXT:    and16 a0, a1
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_one:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    mvcv16 a1
+; CHECK-SF2-NEXT:    and16 a0, a1
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp one float %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;oeq
+define i1 @fcmpRR_oeq(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_oeq:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpnes vr1, vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_oeq:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpne.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvcv16 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oeq float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_oeq(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_oeq:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    movih32 a0, 49440
+; CHECK-SF-NEXT:    fmtvrl vr1, a0
+; CHECK-SF-NEXT:    fcmpnes vr0, vr1
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_oeq:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    movih32 a0, 49440
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr1, a0
+; CHECK-SF2-NEXT:    fcmpne.32 vr0, vr1
+; CHECK-SF2-NEXT:    mvcv16 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oeq float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_oeq(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_oeq:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpznes vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_oeq:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpnez.32 vr0
+; CHECK-SF2-NEXT:    mvcv16 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp oeq float %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;ord
+define i1 @fcmpRR_ord(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_ord:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr1, vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_ord:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvcv16 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ord float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_ord(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_ord:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_ord:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    mvcv16 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ord float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_ord(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_ord:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    mvcv16 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_ord:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    mvcv16 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp ord float %x, 0.0
+  ret i1 %fcmp
+}
+
+
+;uno
+define i1 @fcmpRR_uno(float %x, float %y) {
+;
+; CHECK-SF-LABEL: fcmpRR_uno:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr1, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRR_uno:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr1, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uno float %y, %x
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_uno(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_uno:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_uno:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uno float %x, -10.0
+  ret i1 %fcmp
+}
+
+define i1 @fcmpRI_X_uno(float %x) {
+;
+; CHECK-SF-LABEL: fcmpRI_X_uno:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fcmpuos vr0, vr0
+; CHECK-SF-NEXT:    mvc32 a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fcmpRI_X_uno:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fcmpuo.32 vr0, vr0
+; CHECK-SF2-NEXT:    mvc32 a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fcmp = fcmp uno float %x, 0.0
+  ret i1 %fcmp
+}
diff --git a/llvm/test/CodeGen/CSKY/fpu/cvt-d.ll b/llvm/test/CodeGen/CSKY/fpu/cvt-d.ll
new file mode 100644
index 0000000000000..1bdb4f8784ebe
--- /dev/null
+++ b/llvm/test/CodeGen/CSKY/fpu/cvt-d.ll
@@ -0,0 +1,371 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv2_sf -mattr=+fpuv2_df | FileCheck %s --check-prefix=CHECK-DF
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv3_sf -mattr=+fpuv3_df | FileCheck %s --check-prefix=CHECK-DF2
+
+
+; double --> float
+define float @fptruncR_double_0(double %x) {
+;
+; CHECK-DF-LABEL: fptruncR_double_0:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fdtos vr0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: fptruncR_double_0:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fdtos vr0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fptrunc = fptrunc double %x to float
+  ret float %fptrunc
+}
+
+define double @fpextR_double_0(float %x) {
+;
+; CHECK-DF-LABEL: fpextR_double_0:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fstod vr0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: fpextR_double_0:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fstod vr0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fpext = fpext float %x to double
+  ret double %fpext
+}
+
+; double --> i32
+define i32 @fptosiR_double_1(double %x) {
+;
+;
+; CHECK-DF-LABEL: fptosiR_double_1:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fdtosi.rz vr0, vr0
+; CHECK-DF-NEXT:    fmfvrl a0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: fptosiR_double_1:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fftoi.f64.s32.rz vr0, vr0
+; CHECK-DF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fptosi = fptosi double %x to i32
+  ret i32 %fptosi
+}
+
+
+; double --> i16
+define i16 @fptosiR_double_2(double %x) {
+;
+;
+; CHECK-DF-LABEL: fptosiR_double_2:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fdtosi.rz vr0, vr0
+; CHECK-DF-NEXT:    fmfvrl a0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: fptosiR_double_2:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fftoi.f64.s32.rz vr0, vr0
+; CHECK-DF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fptosi = fptosi double %x to i16
+  ret i16 %fptosi
+}
+
+
+; double --> i8
+define i8 @fptosiR_double_3(double %x) {
+;
+;
+; CHECK-DF-LABEL: fptosiR_double_3:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fdtosi.rz vr0, vr0
+; CHECK-DF-NEXT:    fmfvrl a0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: fptosiR_double_3:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fftoi.f64.s32.rz vr0, vr0
+; CHECK-DF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fptosi = fptosi double %x to i8
+  ret i8 %fptosi
+}
+
+; double --> i1
+define i1 @fptosiR_double_4(double %x) {
+;
+;
+; CHECK-DF-LABEL: fptosiR_double_4:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fdtosi.rz vr0, vr0
+; CHECK-DF-NEXT:    fmfvrl a0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: fptosiR_double_4:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fftoi.f64.s32.rz vr0, vr0
+; CHECK-DF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fptosi = fptosi double %x to i1
+  ret i1 %fptosi
+}
+
+; double --> i32
+define i32 @fptouiR_double_1(double %x) {
+;
+; CHECK-DF-LABEL: fptouiR_double_1:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fdtoui.rz vr0, vr0
+; CHECK-DF-NEXT:    fmfvrl a0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: fptouiR_double_1:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fftoi.f64.u32.rz vr0, vr0
+; CHECK-DF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-DF2-NEXT:    rts16
+
+entry:
+  %fptoui = fptoui double %x to i32
+  ret i32 %fptoui
+}
+
+
+; double --> i16
+define i16 @fptouiR_double_2(double %x) {
+;
+; CHECK-DF-LABEL: fptouiR_double_2:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fdtoui.rz vr0, vr0
+; CHECK-DF-NEXT:    fmfvrl a0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: fptouiR_double_2:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fftoi.f64.u32.rz vr0, vr0
+; CHECK-DF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-DF2-NEXT:    rts16
+
+entry:
+  %fptoui = fptoui double %x to i16
+  ret i16 %fptoui
+}
+
+
+; double --> i8
+define i8 @fptouiR_double_3(double %x) {
+;
+; CHECK-DF-LABEL: fptouiR_double_3:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fdtoui.rz vr0, vr0
+; CHECK-DF-NEXT:    fmfvrl a0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: fptouiR_double_3:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fftoi.f64.u32.rz vr0, vr0
+; CHECK-DF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fptoui = fptoui double %x to i8
+  ret i8 %fptoui
+}
+
+
+; double --> i1
+define i1 @fptouiR_double_4(double %x) {
+;
+;
+; CHECK-DF-LABEL: fptouiR_double_4:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fdtoui.rz vr0, vr0
+; CHECK-DF-NEXT:    fmfvrl a0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: fptouiR_double_4:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fftoi.f64.u32.rz vr0, vr0
+; CHECK-DF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %fptoui = fptoui double %x to i1
+  ret i1 %fptoui
+}
+
+
+; i32/i16/i8/i1 --> double
+
+define double @sitofpR_double_0(i32 %x) {
+;
+;
+; CHECK-DF-LABEL: sitofpR_double_0:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fmtvrl vr0, a0
+; CHECK-DF-NEXT:    fsitod vr0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: sitofpR_double_0:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-DF2-NEXT:    fitof.s32.f64 vr0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %sitofp = sitofp i32 %x to double
+  ret double %sitofp
+}
+
+define double @sitofpR_double_1(i16 %x) {
+;
+;
+; CHECK-DF-LABEL: sitofpR_double_1:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    sexth16 a0, a0
+; CHECK-DF-NEXT:    fmtvrl vr0, a0
+; CHECK-DF-NEXT:    fsitod vr0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: sitofpR_double_1:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    sexth16 a0, a0
+; CHECK-DF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-DF2-NEXT:    fitof.s32.f64 vr0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %sitofp = sitofp i16 %x to double
+  ret double %sitofp
+}
+
+define double @sitofpR_double_2(i8 %x) {
+;
+;
+; CHECK-DF-LABEL: sitofpR_double_2:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    sextb16 a0, a0
+; CHECK-DF-NEXT:    fmtvrl vr0, a0
+; CHECK-DF-NEXT:    fsitod vr0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: sitofpR_double_2:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    sextb16 a0, a0
+; CHECK-DF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-DF2-NEXT:    fitof.s32.f64 vr0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %sitofp = sitofp i8 %x to double
+  ret double %sitofp
+}
+
+define double @sitofpR_double_3(i1 %x) {
+;
+;
+; CHECK-DF-LABEL: sitofpR_double_3:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    sext32 a0, a0, 0, 0
+; CHECK-DF-NEXT:    fmtvrl vr0, a0
+; CHECK-DF-NEXT:    fsitod vr0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: sitofpR_double_3:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    sext32 a0, a0, 0, 0
+; CHECK-DF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-DF2-NEXT:    fitof.s32.f64 vr0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %sitofp = sitofp i1 %x to double
+  ret double %sitofp
+}
+
+; i32/i16/i8/i1 --> double
+
+define double @uitofpR_double_0(i32 %x) {
+;
+;
+; CHECK-DF-LABEL: uitofpR_double_0:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fmtvrl vr0, a0
+; CHECK-DF-NEXT:    fuitod vr0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: uitofpR_double_0:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-DF2-NEXT:    fitof.u32.f64 vr0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %uitofp = uitofp i32 %x to double
+  ret double %uitofp
+}
+
+define double @uitofpR_double_1(i16 %x) {
+;
+;
+; CHECK-DF-LABEL: uitofpR_double_1:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    zexth16 a0, a0
+; CHECK-DF-NEXT:    fmtvrl vr0, a0
+; CHECK-DF-NEXT:    fuitod vr0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: uitofpR_double_1:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    zexth16 a0, a0
+; CHECK-DF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-DF2-NEXT:    fitof.u32.f64 vr0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %uitofp = uitofp i16 %x to double
+  ret double %uitofp
+}
+
+define double @uitofpR_double_2(i8 %x) {
+;
+;
+; CHECK-DF-LABEL: uitofpR_double_2:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    zextb16 a0, a0
+; CHECK-DF-NEXT:    fmtvrl vr0, a0
+; CHECK-DF-NEXT:    fuitod vr0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: uitofpR_double_2:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    zextb16 a0, a0
+; CHECK-DF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-DF2-NEXT:    fitof.u32.f64 vr0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %uitofp = uitofp i8 %x to double
+  ret double %uitofp
+}
+
+define double @uitofpR_double_3(i1 %x) {
+;
+;
+; CHECK-DF-LABEL: uitofpR_double_3:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    andi32 a0, a0, 1
+; CHECK-DF-NEXT:    fmtvrl vr0, a0
+; CHECK-DF-NEXT:    fuitod vr0, vr0
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: uitofpR_double_3:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    andi32 a0, a0, 1
+; CHECK-DF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-DF2-NEXT:    fitof.u32.f64 vr0, vr0
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %uitofp = uitofp i1 %x to double
+  ret double %uitofp
+}
diff --git a/llvm/test/CodeGen/CSKY/fpu/cvt-f.ll b/llvm/test/CodeGen/CSKY/fpu/cvt-f.ll
new file mode 100644
index 0000000000000..07055fe0d4d0d
--- /dev/null
+++ b/llvm/test/CodeGen/CSKY/fpu/cvt-f.ll
@@ -0,0 +1,334 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv2_sf | FileCheck %s --check-prefix=CHECK-SF
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv3_sf | FileCheck %s --check-prefix=CHECK-SF2
+
+
+; float --> i32
+define i32 @fptosiR_float_1(float %x) {
+;
+; CHECK-SF-LABEL: fptosiR_float_1:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fstosi.rz vr0, vr0
+; CHECK-SF-NEXT:    fmfvrl a0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fptosiR_float_1:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fftoi.f32.s32.rz vr0, vr0
+; CHECK-SF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fptosi = fptosi float %x to i32
+  ret i32 %fptosi
+}
+
+
+; float --> i16
+define i16 @fptosiR_float_2(float %x) {
+;
+; CHECK-SF-LABEL: fptosiR_float_2:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fstosi.rz vr0, vr0
+; CHECK-SF-NEXT:    fmfvrl a0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fptosiR_float_2:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fftoi.f32.s32.rz vr0, vr0
+; CHECK-SF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fptosi = fptosi float %x to i16
+  ret i16 %fptosi
+}
+
+
+; float --> i8
+define i8 @fptosiR_float_3(float %x) {
+;
+; CHECK-SF-LABEL: fptosiR_float_3:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fstosi.rz vr0, vr0
+; CHECK-SF-NEXT:    fmfvrl a0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fptosiR_float_3:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fftoi.f32.s32.rz vr0, vr0
+; CHECK-SF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fptosi = fptosi float %x to i8
+  ret i8 %fptosi
+}
+
+
+; float --> i1
+define i1 @fptosiR_float_4(float %x) {
+;
+; CHECK-SF-LABEL: fptosiR_float_4:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fstosi.rz vr0, vr0
+; CHECK-SF-NEXT:    fmfvrl a0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fptosiR_float_4:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fftoi.f32.s32.rz vr0, vr0
+; CHECK-SF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fptosi = fptosi float %x to i1
+  ret i1 %fptosi
+}
+
+
+
+
+; float --> i32
+define i32 @fptouiR_float_1(float %x) {
+;
+; CHECK-SF-LABEL: fptouiR_float_1:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fstoui.rz vr0, vr0
+; CHECK-SF-NEXT:    fmfvrl a0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fptouiR_float_1:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fftoi.f32.u32.rz vr0, vr0
+; CHECK-SF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-SF2-NEXT:    rts16
+
+
+entry:
+  %fptoui = fptoui float %x to i32
+  ret i32 %fptoui
+}
+
+
+
+; float --> i16
+define i16 @fptouiR_float_2(float %x) {
+;
+; CHECK-SF-LABEL: fptouiR_float_2:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fstoui.rz vr0, vr0
+; CHECK-SF-NEXT:    fmfvrl a0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fptouiR_float_2:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fftoi.f32.u32.rz vr0, vr0
+; CHECK-SF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-SF2-NEXT:    rts16
+
+
+entry:
+  %fptoui = fptoui float %x to i16
+  ret i16 %fptoui
+}
+
+
+
+; float --> i8
+define i8 @fptouiR_float_3(float %x) {
+;
+; CHECK-SF-LABEL: fptouiR_float_3:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fstoui.rz vr0, vr0
+; CHECK-SF-NEXT:    fmfvrl a0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fptouiR_float_3:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fftoi.f32.u32.rz vr0, vr0
+; CHECK-SF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-SF2-NEXT:    rts16
+
+
+entry:
+  %fptoui = fptoui float %x to i8
+  ret i8 %fptoui
+}
+
+
+; float --> i1
+define i1 @fptouiR_float_4(float %x) {
+;
+; CHECK-SF-LABEL: fptouiR_float_4:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fstoui.rz vr0, vr0
+; CHECK-SF-NEXT:    fmfvrl a0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: fptouiR_float_4:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fftoi.f32.u32.rz vr0, vr0
+; CHECK-SF2-NEXT:    fmfvr.32.1 a0, vr0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %fptoui = fptoui float %x to i1
+  ret i1 %fptoui
+}
+
+; i32/i16/i8/i1 --> float
+
+define float @sitofpR_float_0(i32 %x) {
+;
+; CHECK-SF-LABEL: sitofpR_float_0:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fmtvrl vr0, a0
+; CHECK-SF-NEXT:    fsitos vr0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: sitofpR_float_0:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-SF2-NEXT:    fitof.s32.f32 vr0, vr0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %sitofp = sitofp i32 %x to float
+  ret float %sitofp
+}
+
+define float @sitofpR_float_1(i16 %x) {
+;
+; CHECK-SF-LABEL: sitofpR_float_1:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    sexth16 a0, a0
+; CHECK-SF-NEXT:    fmtvrl vr0, a0
+; CHECK-SF-NEXT:    fsitos vr0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: sitofpR_float_1:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    sexth16 a0, a0
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-SF2-NEXT:    fitof.s32.f32 vr0, vr0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %sitofp = sitofp i16 %x to float
+  ret float %sitofp
+}
+
+define float @sitofpR_float_2(i8 %x) {
+;
+; CHECK-SF-LABEL: sitofpR_float_2:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    sextb16 a0, a0
+; CHECK-SF-NEXT:    fmtvrl vr0, a0
+; CHECK-SF-NEXT:    fsitos vr0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: sitofpR_float_2:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    sextb16 a0, a0
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-SF2-NEXT:    fitof.s32.f32 vr0, vr0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %sitofp = sitofp i8 %x to float
+  ret float %sitofp
+}
+
+define float @sitofpR_float_3(i1 %x) {
+;
+; CHECK-SF-LABEL: sitofpR_float_3:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    sext32 a0, a0, 0, 0
+; CHECK-SF-NEXT:    fmtvrl vr0, a0
+; CHECK-SF-NEXT:    fsitos vr0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: sitofpR_float_3:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    sext32 a0, a0, 0, 0
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-SF2-NEXT:    fitof.s32.f32 vr0, vr0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %sitofp = sitofp i1 %x to float
+  ret float %sitofp
+}
+
+; i32/i16/i8/i1 --> float
+
+define float @uitofpR_float_0(i32 %x) {
+;
+; CHECK-SF-LABEL: uitofpR_float_0:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fmtvrl vr0, a0
+; CHECK-SF-NEXT:    fuitos vr0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: uitofpR_float_0:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-SF2-NEXT:    fitof.u32.f32 vr0, vr0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %uitofp = uitofp i32 %x to float
+  ret float %uitofp
+}
+
+define float @uitofpR_float_1(i16 %x) {
+;
+; CHECK-SF-LABEL: uitofpR_float_1:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    zexth16 a0, a0
+; CHECK-SF-NEXT:    fmtvrl vr0, a0
+; CHECK-SF-NEXT:    fuitos vr0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: uitofpR_float_1:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    zexth16 a0, a0
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-SF2-NEXT:    fitof.u32.f32 vr0, vr0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %uitofp = uitofp i16 %x to float
+  ret float %uitofp
+}
+
+define float @uitofpR_float_2(i8 %x) {
+;
+; CHECK-SF-LABEL: uitofpR_float_2:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    zextb16 a0, a0
+; CHECK-SF-NEXT:    fmtvrl vr0, a0
+; CHECK-SF-NEXT:    fuitos vr0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: uitofpR_float_2:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    zextb16 a0, a0
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-SF2-NEXT:    fitof.u32.f32 vr0, vr0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %uitofp = uitofp i8 %x to float
+  ret float %uitofp
+}
+
+define float @uitofpR_float_3(i1 %x) {
+;
+; CHECK-SF-LABEL: uitofpR_float_3:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    andi32 a0, a0, 1
+; CHECK-SF-NEXT:    fmtvrl vr0, a0
+; CHECK-SF-NEXT:    fuitos vr0, vr0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: uitofpR_float_3:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    andi32 a0, a0, 1
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-SF2-NEXT:    fitof.u32.f32 vr0, vr0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %uitofp = uitofp i1 %x to float
+  ret float %uitofp
+}
diff --git a/llvm/test/CodeGen/CSKY/fpu/ldst-d.ll b/llvm/test/CodeGen/CSKY/fpu/ldst-d.ll
new file mode 100644
index 0000000000000..e31fd409d2775
--- /dev/null
+++ b/llvm/test/CodeGen/CSKY/fpu/ldst-d.ll
@@ -0,0 +1,106 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv2_sf -mattr=+fpuv2_df | FileCheck %s --check-prefix=CHECK-DF
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv3_sf -mattr=+fpuv3_df | FileCheck %s --check-prefix=CHECK-DF2
+
+define double @load_I_d(double* nocapture readonly %a) local_unnamed_addr #0 {
+;
+;
+; CHECK-DF-LABEL: load_I_d:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fldd vr0, (a0, 24)
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: load_I_d:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fld.64 vr0, (a0, 24)
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %arrayidx = getelementptr inbounds double, double* %a, i64 3
+  %0 = load double, double* %arrayidx, align 4
+  ret double %0
+}
+
+
+
+define double @load_R_d(double* nocapture readonly %a, i32 %b) local_unnamed_addr #0 {
+;
+;
+; CHECK-DF-LABEL: load_R_d:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fldrd vr0, (a0, a1 << 3)
+; CHECK-DF-NEXT:    rts16
+;
+; CHECK-DF2-LABEL: load_R_d:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fldr.64 vr0, (a0, a1 << 3)
+; CHECK-DF2-NEXT:    rts16
+entry:
+  %idxprom = sext i32 %b to i64
+  %arrayidx = getelementptr inbounds double, double* %a, i64 %idxprom
+  %0 = load double, double* %arrayidx, align 4
+  ret double %0
+}
+
+define double @store_I_d(double*  %a, double %b) local_unnamed_addr #0 {
+;
+;
+; CHECK-DF-LABEL: store_I_d:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fstd vr0, (a0, 24)
+; CHECK-DF-NEXT:    grs32 a0, .LCPI2_0
+; CHECK-DF-NEXT:    fldd vr0, (a0, 0)
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI2_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: store_I_d:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fst.64 vr0, (a0, 24)
+; CHECK-DF2-NEXT:    flrw.64 vr0, [.LCPI2_0]
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI2_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %arrayidx = getelementptr inbounds double, double* %a, i64 3
+  store double %b, double* %arrayidx, align 4
+  ret double 0.0
+}
+
+define double @store_R_d(double*  %a, i32 %b, double %c) local_unnamed_addr #0 {
+;
+;
+; CHECK-DF-LABEL: store_R_d:
+; CHECK-DF:       # %bb.0: # %entry
+; CHECK-DF-NEXT:    fstrd vr0, (a0, a1 << 3)
+; CHECK-DF-NEXT:    grs32 a0, .LCPI3_0
+; CHECK-DF-NEXT:    fldd vr0, (a0, 0)
+; CHECK-DF-NEXT:    rts16
+; CHECK-DF-NEXT:    .p2align 1
+; CHECK-DF-NEXT:  # %bb.1:
+; CHECK-DF-NEXT:    .p2align 2
+; CHECK-DF-NEXT:  .LCPI3_0:
+; CHECK-DF-NEXT:    .quad 0x0000000000000000 # double 0
+;
+; CHECK-DF2-LABEL: store_R_d:
+; CHECK-DF2:       # %bb.0: # %entry
+; CHECK-DF2-NEXT:    fstr.64 vr0, (a0, a1 << 3)
+; CHECK-DF2-NEXT:    flrw.64 vr0, [.LCPI3_0]
+; CHECK-DF2-NEXT:    rts16
+; CHECK-DF2-NEXT:    .p2align 1
+; CHECK-DF2-NEXT:  # %bb.1:
+; CHECK-DF2-NEXT:    .p2align 2
+; CHECK-DF2-NEXT:  .LCPI3_0:
+; CHECK-DF2-NEXT:    .quad 0x0000000000000000 # double 0
+entry:
+  %idxprom = sext i32 %b to i64
+  %arrayidx = getelementptr inbounds double, double* %a, i64 %idxprom
+  store double %c, double* %arrayidx, align 4
+  ret double 0.0
+}
diff --git a/llvm/test/CodeGen/CSKY/fpu/ldst-f.ll b/llvm/test/CodeGen/CSKY/fpu/ldst-f.ll
new file mode 100644
index 0000000000000..bbcde8e6430a9
--- /dev/null
+++ b/llvm/test/CodeGen/CSKY/fpu/ldst-f.ll
@@ -0,0 +1,83 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv2_sf | FileCheck %s --check-prefix=CHECK-SF
+; RUN: llc -verify-machineinstrs -csky-no-aliases < %s -mtriple=csky -float-abi=hard -mattr=+hard-float -mattr=+2e3 -mattr=+fpuv3_sf | FileCheck %s --check-prefix=CHECK-SF2
+
+define float @load_I_w(float* nocapture readonly %a) local_unnamed_addr #0 {
+;
+; CHECK-SF-LABEL: load_I_w:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    flds vr0, (a0, 12)
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: load_I_w:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fld.32 vr0, (a0, 12)
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %arrayidx = getelementptr inbounds float, float* %a, i64 3
+  %0 = load float, float* %arrayidx, align 4
+  ret float %0
+}
+
+define float @load_R_w(float* nocapture readonly %a, i32 %b) local_unnamed_addr #0 {
+;
+; CHECK-SF-LABEL: load_R_w:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fldrs vr0, (a0, a1 << 2)
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: load_R_w:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fldr.32 vr0, (a0, a1 << 2)
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %idxprom = sext i32 %b to i64
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %idxprom
+  %0 = load float, float* %arrayidx, align 4
+  ret float %0
+}
+
+
+define float @store_I_w(float*  %a, float %b) local_unnamed_addr #0 {
+;
+; CHECK-SF-LABEL: store_I_w:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fsts vr0, (a0, 12)
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    fmtvrl vr0, a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: store_I_w:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fst.32 vr0, (a0, 12)
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %arrayidx = getelementptr inbounds float, float* %a, i64 3
+  store float %b, float* %arrayidx, align 4
+  ret float 0.0
+}
+
+define float @store_R_w(float*  %a, i32 %b, float %c) local_unnamed_addr #0 {
+;
+; CHECK-SF-LABEL: store_R_w:
+; CHECK-SF:       # %bb.0: # %entry
+; CHECK-SF-NEXT:    fstrs vr0, (a0, a1 << 2)
+; CHECK-SF-NEXT:    movi16 a0, 0
+; CHECK-SF-NEXT:    fmtvrl vr0, a0
+; CHECK-SF-NEXT:    rts16
+;
+; CHECK-SF2-LABEL: store_R_w:
+; CHECK-SF2:       # %bb.0: # %entry
+; CHECK-SF2-NEXT:    fstr.32 vr0, (a0, a1 << 2)
+; CHECK-SF2-NEXT:    movi16 a0, 0
+; CHECK-SF2-NEXT:    fmtvr.32.1 vr0, a0
+; CHECK-SF2-NEXT:    rts16
+entry:
+  %idxprom = sext i32 %b to i64
+  %arrayidx = getelementptr inbounds float, float* %a, i64 %idxprom
+  store float %c, float* %arrayidx, align 4
+  ret float 0.0
+}
diff --git a/llvm/test/CodeGen/CSKY/fpu/lit.local.cfg b/llvm/test/CodeGen/CSKY/fpu/lit.local.cfg
new file mode 100644
index 0000000000000..37335bd736850
--- /dev/null
+++ b/llvm/test/CodeGen/CSKY/fpu/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'CSKY' in config.root.targets:
+    config.unsupported = True
diff --git a/llvm/test/MC/CSKY/fpuv2.s b/llvm/test/MC/CSKY/fpuv2.s
new file mode 100644
index 0000000000000..7d4e7a4682bec
--- /dev/null
+++ b/llvm/test/MC/CSKY/fpuv2.s
@@ -0,0 +1,31 @@
+# RUN: llvm-mc %s -triple=csky -show-encoding -csky-no-aliases -mattr=+fpuv2_sf -mattr=+fpuv2_df \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: fldms  vr1-vr2, (a1)
+# CHECK-ASM: encoding: [0x21,0xf4,0x01,0x30]
+fldms  vr1-vr2, (a1)
+
+# CHECK-ASM-AND-OBJ: fldmd  vr1-vr2, (a1)
+# CHECK-ASM: encoding: [0x21,0xf4,0x01,0x31]
+fldmd  vr1-vr2, (a1)
+
+# CHECK-ASM-AND-OBJ: fstms  vr1-vr2, (a1)
+# CHECK-ASM: encoding: [0x21,0xf4,0x01,0x34]
+fstms  vr1-vr2, (a1)
+
+# CHECK-ASM-AND-OBJ: fstmd  vr1-vr2, (a1)
+# CHECK-ASM: encoding: [0x21,0xf4,0x01,0x35]
+fstmd  vr1-vr2, (a1)
+
+# RUN: not llvm-mc -triple csky -mattr=+fpuv2_sf -mattr=+fpuv2_df --defsym=ERR=1 < %s 2>&1 | FileCheck %s
+
+.ifdef ERR
+fldms  vr1-vr33, (a1) # CHECK: :[[#@LINE]]:12: error: invalid register
+fldms  vr1-vr31, (a1) # CHECK: :[[#@LINE]]:8: error: Register sequence is not valid
+fstms  vr1-vr33, (a1) # CHECK: :[[#@LINE]]:12: error: invalid register
+fstms  vr1-vr31, (a1) # CHECK: :[[#@LINE]]:8: error: Register sequence is not valid
+fldmd  vr1-vr33, (a1) # CHECK: :[[#@LINE]]:12: error: invalid register
+fldmd  vr1-vr31, (a1) # CHECK: :[[#@LINE]]:8: error: Register sequence is not valid
+fstmd  vr1-vr33, (a1) # CHECK: :[[#@LINE]]:12: error: invalid register
+fstmd  vr1-vr31, (a1) # CHECK: :[[#@LINE]]:8: error: Register sequence is not valid
+.endif
diff --git a/llvm/test/MC/CSKY/fpuv3.s b/llvm/test/MC/CSKY/fpuv3.s
new file mode 100644
index 0000000000000..071ad10cfbe43
--- /dev/null
+++ b/llvm/test/MC/CSKY/fpuv3.s
@@ -0,0 +1,27 @@
+# RUN: llvm-mc %s -triple=csky -show-encoding -csky-no-aliases -mattr=+fpuv3_sf,+fpuv3_df \
+# RUN:     | FileCheck -check-prefixes=CHECK-ASM,CHECK-ASM-AND-OBJ %s
+
+# CHECK-ASM-AND-OBJ: fldm.32 vr1-vr2, (a1)
+# CHECK-ASM: encoding: [0x21,0xf4,0x01,0x30]
+fldm.32  vr1-vr2, (a1)
+
+# CHECK-ASM-AND-OBJ: fldm.64 vr1-vr2, (a1)
+# CHECK-ASM: encoding: [0x21,0xf4,0x01,0x31]
+fldm.64  vr1-vr2, (a1)
+
+# CHECK-ASM-AND-OBJ: fstm.32 vr1-vr2, (a1)
+# CHECK-ASM: encoding: [0x21,0xf4,0x01,0x34]
+fstm.32 vr1-vr2, (a1)
+
+# CHECK-ASM-AND-OBJ: fstm.64 vr1-vr2, (a1)
+# CHECK-ASM: encoding: [0x21,0xf4,0x01,0x35]
+fstm.64  vr1-vr2, (a1)
+
+# RUN: not llvm-mc -triple csky -mattr=+fpuv3_sf -mattr=+fpuv3_df --defsym=ERR=1 < %s 2>&1 | FileCheck %s
+
+.ifdef ERR
+fstm.32  vr1-vr33, (a1) # CHECK: :[[#@LINE]]:14: error: invalid register
+fstm.64  vr1-vr33, (a1) # CHECK: :[[#@LINE]]:14: error: invalid register
+fldm.32  vr1-vr33, (a1) # CHECK: :[[#@LINE]]:14: error: invalid register
+fldm.64  vr1-vr33, (a1) # CHECK: :[[#@LINE]]:14: error: invalid register
+.endif

From 578122c18a2fd2765b4b07a6cf9d46719c638a60 Mon Sep 17 00:00:00 2001
From: Marius Brehler <marius.brehler@iml.fraunhofer.de>
Date: Tue, 25 Jan 2022 16:13:30 +0000
Subject: [PATCH 855/946] [mlir] Don't emit unused labels

Stop the Cpp target from emitting unused labels. The previosly generated
code generated warning if `-Wunused-label` is passed to a compiler.

Co-authored-by: Simon Camphausen <simon.camphausen@iml.fraunhofer.de>

Reviewed By: jpienaar

Differential Revision: https://reviews.llvm.org/D118154
---
 mlir/lib/Target/Cpp/TranslateToCpp.cpp | 4 ++--
 mlir/test/Target/Cpp/control_flow.mlir | 3 ---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
index 1fc120a7dcd96..a332029a4cf81 100644
--- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp
+++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
@@ -631,8 +631,8 @@ static LogicalResult printOperation(CppEmitter &emitter, FuncOp functionOp) {
   }
 
   for (Block &block : blocks) {
-    // Only print a label if there is more than one block.
-    if (blocks.size() > 1) {
+    // Only print a label if the block has predecessors.
+    if (!block.hasNoPredecessors()) {
       if (failed(emitter.emitLabel(block)))
         return failure();
     }
diff --git a/mlir/test/Target/Cpp/control_flow.mlir b/mlir/test/Target/Cpp/control_flow.mlir
index ed8ee08ff46d5..d73299e0a9621 100644
--- a/mlir/test/Target/Cpp/control_flow.mlir
+++ b/mlir/test/Target/Cpp/control_flow.mlir
@@ -22,7 +22,6 @@ func @simple(i64, i1) -> i64 {
     // CPP-DECLTOP-NEXT: int64_t [[C:[^ ]*]];
     // CPP-DECLTOP-NEXT: int64_t [[D:[^ ]*]];
     // CPP-DECLTOP-NEXT: int64_t [[E:[^ ]*]];
-    // CPP-DECLTOP-NEXT: [[BB0:[^ ]*]]:
     // CPP-DECLTOP-NEXT: if ([[COND]]) {
     // CPP-DECLTOP-NEXT: goto [[BB1:[^ ]*]];
     // CPP-DECLTOP-NEXT: } else {
@@ -51,7 +50,6 @@ func @block_labels0() {
     return
 }
 // CPP-DECLTOP: void block_labels0() {
-  // CPP-DECLTOP-NEXT: label1:
   // CPP-DECLTOP-NEXT: goto label2;
   // CPP-DECLTOP-NEXT: label2:
   // CPP-DECLTOP-NEXT: return;
@@ -66,7 +64,6 @@ func @block_labels1() {
     return
 }
 // CPP-DECLTOP: void block_labels1() {
-  // CPP-DECLTOP-NEXT: label1:
   // CPP-DECLTOP-NEXT: goto label2;
   // CPP-DECLTOP-NEXT: label2:
   // CPP-DECLTOP-NEXT: return;

From 6a4d3f37b5a7e8d74ce9279cc1437121730e2925 Mon Sep 17 00:00:00 2001
From: Wu Xinlong <821408745@qq.com>
Date: Thu, 27 Jan 2022 15:48:25 +0800
Subject: [PATCH 856/946] [RISCV] fix dead code

fix dead code mentioned on https://reviews.llvm.org/D98136

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D118323
---
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
index 72d91b1044d65..01c6bd90ea587 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
@@ -191,8 +191,8 @@ enum OperandType : unsigned {
   OPERAND_SIMM12,
   OPERAND_UIMM20,
   OPERAND_UIMMLOG2XLEN,
-  OPERAND_LAST_RISCV_IMM = OPERAND_UIMMLOG2XLEN,
   OPERAND_RVKRNUM,
+  OPERAND_LAST_RISCV_IMM = OPERAND_RVKRNUM,
   // Operand is either a register or uimm5, this is used by V extension pseudo
   // instructions to represent a value that be passed as AVL to either vsetvli
   // or vsetivli.

From 67c89d9a3cb4b2a22dd8d55b83b5a8c8e1fb7f98 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Thu, 27 Jan 2022 00:10:57 -0800
Subject: [PATCH 857/946] [MC] Remove unreachable .comm/.lcomm diagnostic

and make another diagnostic math the prevailing format.
---
 llvm/lib/MC/MCParser/AsmParser.cpp | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 5c94174aa161f..15b675fc05666 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -5047,15 +5047,7 @@ bool AsmParser::parseDirectiveComm(bool IsLocal) {
   // NOTE: a size of zero for a .comm should create a undefined symbol
   // but a size of .lcomm creates a bss symbol of size zero.
   if (Size < 0)
-    return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't "
-                          "be less than zero");
-
-  // NOTE: The alignment in the directive is a power of 2 value, the assembler
-  // may internally end up wanting an alignment in bytes.
-  // FIXME: Diagnose overflow.
-  if (Pow2Alignment < 0)
-    return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive "
-                                   "alignment, can't be less than zero");
+    return Error(SizeLoc, "size must be non-negative");
 
   Sym->redefineIfPossible();
   if (!Sym->isUndefined())

From fd71493ff0602d7d81763263df2f2074fe0db094 Mon Sep 17 00:00:00 2001
From: Mike Hommey <mh@glandium.org>
Date: Thu, 27 Jan 2022 09:26:16 +0100
Subject: [PATCH 858/946] Add missing namespace to PPCLinux.cpp

This fixes a build failure with MSVC introduced in
https://reviews.llvm.org/D112906

Reviewed By: nemanjai

Differential Revision: https://reviews.llvm.org/D118211
---
 clang/lib/Driver/ToolChains/PPCLinux.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Driver/ToolChains/PPCLinux.cpp b/clang/lib/Driver/ToolChains/PPCLinux.cpp
index e5e1aa06f4b1d..e480d8bd8703c 100644
--- a/clang/lib/Driver/ToolChains/PPCLinux.cpp
+++ b/clang/lib/Driver/ToolChains/PPCLinux.cpp
@@ -13,6 +13,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 
+using namespace clang::driver;
 using namespace clang::driver::toolchains;
 using namespace llvm::opt;
 using namespace llvm::sys;

From a911a69408ee2eb9b48f18e93ef69f91feb42594 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Thu, 27 Jan 2022 09:31:55 +0100
Subject: [PATCH 859/946] [mlir][Bazel] Add missing dependency to
 ArithmeticTransforms.

---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 080b20c8782c0..5dab97f36989c 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -7831,6 +7831,7 @@ cc_library(
         ":Affine",
         ":AffineUtils",
         ":ArithmeticDialect",
+        ":ArithmeticTransforms",
         ":IR",
         ":InferTypeOpInterface",
         ":MemRefDialect",

From 426437d1fea48d62d276c5db65f66f88695a7589 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Thu, 27 Jan 2022 00:37:49 -0800
Subject: [PATCH 860/946] [MC] Add MCAsmParser::parseRParen to improve
 consistency and simplify code

Some diagnostics are more verbose but they don't seem to be more useful than
simple `expected ')'`
---
 llvm/include/llvm/MC/MCParser/MCAsmParser.h |  1 +
 llvm/lib/MC/MCParser/AsmParser.cpp          | 17 +++++---------
 llvm/lib/MC/MCParser/MasmParser.cpp         | 25 +++++++--------------
 llvm/test/MC/AMDGPU/gfx10_err_pos.s         |  2 +-
 llvm/test/MC/ARM/symbol-variants-errors.s   |  5 ++---
 llvm/test/MC/ELF/bracket.s                  |  2 +-
 6 files changed, 18 insertions(+), 34 deletions(-)

diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index dbb76bd36591d..29386ffc45ac4 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -262,6 +262,7 @@ class MCAsmParser {
   bool parseOptionalToken(AsmToken::TokenKind T);
 
   bool parseComma() { return parseToken(AsmToken::Comma, "expected comma"); }
+  bool parseRParen() { return parseToken(AsmToken::RParen, "expected ')'"); }
   bool parseEOL();
   bool parseEOL(const Twine &ErrMsg);
 
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 15b675fc05666..0cea491f227d5 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -1121,11 +1121,8 @@ StringRef AsmParser::parseStringToComma() {
 bool AsmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
   if (parseExpression(Res))
     return true;
-  if (Lexer.isNot(AsmToken::RParen))
-    return TokError("expected ')' in parentheses expression");
   EndLoc = Lexer.getTok().getEndLoc();
-  Lex();
-  return false;
+  return parseRParen();
 }
 
 /// Parse a bracket expression and return it.
@@ -1214,9 +1211,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
       Lex(); // eat '('.
       StringRef VName;
       parseIdentifier(VName);
-      // eat ')'.
-      if (parseToken(AsmToken::RParen,
-                     "unexpected token in variant, expected ')'"))
+      if (parseRParen())
         return true;
       Split = std::make_pair(Identifier, VName);
     }
@@ -1379,9 +1374,8 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
     Lex(); // Eat the operator.
     if (parseExpression(Res, EndLoc))
       return true;
-    if (Lexer.isNot(AsmToken::RParen))
-      return TokError("expected ')'");
-    Lex(); // Eat the operator.
+    if (parseRParen())
+      return true;
     Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
     return !Res;
   }
@@ -1553,8 +1547,7 @@ bool AsmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
     // This is the same behavior as parseParenExpression().
     if (ParenDepth - 1 > 0) {
       EndLoc = getTok().getEndLoc();
-      if (parseToken(AsmToken::RParen,
-                     "expected ')' in parentheses expression"))
+      if (parseRParen())
         return true;
     }
   }
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index a888e830182fb..f9433240743d4 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -1517,11 +1517,8 @@ StringRef MasmParser::parseStringToEndOfStatement() {
 bool MasmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
   if (parseExpression(Res))
     return true;
-  if (Lexer.isNot(AsmToken::RParen))
-    return TokError("expected ')' in parentheses expression");
   EndLoc = Lexer.getTok().getEndLoc();
-  Lex();
-  return false;
+  return parseRParen();
 }
 
 /// Parse a bracket expression and return it.
@@ -1839,9 +1836,8 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc,
     Lex(); // Eat the operator.
     if (parseExpression(Res, EndLoc))
       return true;
-    if (Lexer.isNot(AsmToken::RParen))
-      return TokError("expected ')'");
-    Lex(); // Eat the operator.
+    if (parseRParen())
+      return true;
     Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
     return !Res;
   }
@@ -1930,8 +1926,7 @@ bool MasmParser::parseParenExprOfDepth(unsigned ParenDepth, const MCExpr *&Res,
     // This is the same behavior as parseParenExpression().
     if (ParenDepth - 1 > 0) {
       EndLoc = getTok().getEndLoc();
-      if (parseToken(AsmToken::RParen,
-                     "expected ')' in parentheses expression"))
+      if (parseRParen())
         return true;
     }
   }
@@ -3359,8 +3354,7 @@ bool MasmParser::handleMacroInvocation(const MCAsmMacro *M, SMLoc NameLoc) {
   }
 
   // Consume the right-parenthesis on the other side of the arguments.
-  if (parseToken(AsmToken::RParen, "invoking macro function '" + M->Name +
-                                       "' requires arguments in parentheses"))
+  if (parseRParen())
     return true;
 
   // Exit values may require lexing, unfortunately. We construct a new buffer to
@@ -3744,8 +3738,7 @@ bool MasmParser::parseScalarInitializer(unsigned Size,
       SmallVector<const MCExpr *, 1> DuplicatedValues;
       if (parseToken(AsmToken::LParen,
                      "parentheses required for 'dup' contents") ||
-          parseScalarInstList(Size, DuplicatedValues) ||
-          parseToken(AsmToken::RParen, "unmatched parentheses"))
+          parseScalarInstList(Size, DuplicatedValues) || parseRParen())
         return true;
 
       for (int i = 0; i < Repetitions; ++i)
@@ -3951,8 +3944,7 @@ bool MasmParser::parseRealInstList(const fltSemantics &Semantics,
       SmallVector<APInt, 1> DuplicatedValues;
       if (parseToken(AsmToken::LParen,
                      "parentheses required for 'dup' contents") ||
-          parseRealInstList(Semantics, DuplicatedValues) ||
-          parseToken(AsmToken::RParen, "unmatched parentheses"))
+          parseRealInstList(Semantics, DuplicatedValues) || parseRParen())
         return true;
 
       for (int i = 0; i < Repetitions; ++i)
@@ -4317,8 +4309,7 @@ bool MasmParser::parseStructInstList(
       std::vector<StructInitializer> DuplicatedValues;
       if (parseToken(AsmToken::LParen,
                      "parentheses required for 'dup' contents") ||
-          parseStructInstList(Structure, DuplicatedValues) ||
-          parseToken(AsmToken::RParen, "unmatched parentheses"))
+          parseStructInstList(Structure, DuplicatedValues) || parseRParen())
         return true;
 
       for (int i = 0; i < Repetitions; ++i)
diff --git a/llvm/test/MC/AMDGPU/gfx10_err_pos.s b/llvm/test/MC/AMDGPU/gfx10_err_pos.s
index be46fd50a2c5a..d32793712dcc5 100644
--- a/llvm/test/MC/AMDGPU/gfx10_err_pos.s
+++ b/llvm/test/MC/AMDGPU/gfx10_err_pos.s
@@ -51,7 +51,7 @@ tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], s0 format:[BUF_NUM_FORMAT_UINT
 // expected ')' in parentheses expression
 
 v_bfe_u32 v0, 1+(100, v1, v2
-// CHECK: error: expected ')' in parentheses expression
+// CHECK: :[[#@LINE-1]]:21: error: expected ')'
 // CHECK-NEXT:{{^}}v_bfe_u32 v0, 1+(100, v1, v2
 // CHECK-NEXT:{{^}}                    ^
 
diff --git a/llvm/test/MC/ARM/symbol-variants-errors.s b/llvm/test/MC/ARM/symbol-variants-errors.s
index 03401cd98b7e9..9a375773361da 100644
--- a/llvm/test/MC/ARM/symbol-variants-errors.s
+++ b/llvm/test/MC/ARM/symbol-variants-errors.s
@@ -10,14 +10,13 @@ f1:
 @ check for missing closed paren
 f2:
   .word bar(got
-@CHECK: error: unexpected token in variant, expected ')'
+@CHECK: :[[#@LINE-1]]:16: error: expected ')'
 @CHECK:  .word bar(got
 @CHECK:               ^
 
 @ check for invalid symbol before variant end
 f3:
   .word bar(got+2)
-
-@CHECK: error: unexpected token in variant, expected ')'
+@CHECK: :[[#@LINE-1]]:16: error: expected ')'
 @CHECK:  .word bar(got+2)
 @CHECK:               ^
diff --git a/llvm/test/MC/ELF/bracket.s b/llvm/test/MC/ELF/bracket.s
index 702e309ddeeb3..dba663fab604a 100644
--- a/llvm/test/MC/ELF/bracket.s
+++ b/llvm/test/MC/ELF/bracket.s
@@ -4,5 +4,5 @@
 // CHECK: error: expected ']' in brackets expression
 .size	x, [.-x)
 
-// CHECK: error: expected ')' in parentheses expression
+// CHECK: :[[#@LINE+1]]:14: error: expected ')'
 .size	y, (.-y]

From 199c2d63fd612427fce66d78b1e3eb58a59ef1e7 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Thu, 27 Jan 2022 09:44:06 +0100
Subject: [PATCH 861/946] [mlir][Bazel] Add TestMemRef target.

Needed after D118285

Differential Revision: https://reviews.llvm.org/D118330
---
 utils/bazel/llvm-project-overlay/mlir/BUILD.bazel  |  1 +
 .../llvm-project-overlay/mlir/test/BUILD.bazel     | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 5dab97f36989c..7dc763d9c262e 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -5719,6 +5719,7 @@ cc_binary(
         "//mlir/test:TestIR",
         "//mlir/test:TestLinalg",
         "//mlir/test:TestMath",
+        "//mlir/test:TestMemRef",
         "//mlir/test:TestPass",
         "//mlir/test:TestReducer",
         "//mlir/test:TestRewrite",
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index 49cde53a8c54b..4f9ae531e1b18 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -430,6 +430,20 @@ cc_library(
     ],
 )
 
+cc_library(
+    name = "TestMemRef",
+    srcs = glob(["lib/Dialect/MemRef/*.cpp"]),
+    defines = ["MLIR_CUDA_CONVERSIONS_ENABLED"],
+    includes = ["lib/Dialect/Test"],
+    deps = [
+        ":TestDialect",
+        "//mlir:Affine",
+        "//mlir:MemRefTransforms",
+        "//mlir:Pass",
+        "//mlir:Transforms",
+    ],
+)
+
 cc_library(
     name = "TestSCF",
     srcs = glob(["lib/Dialect/SCF/*.cpp"]),

From 52fddcdd9c90a550d7a50cbc2013be3314f91d08 Mon Sep 17 00:00:00 2001
From: Saiyedul Islam <Saiyedul.Islam@amd.com>
Date: Thu, 27 Jan 2022 08:55:08 +0000
Subject: [PATCH 862/946] [clang-format] Format ParseOpenMP.cpp changes

Properly format D116549.
---
 clang/lib/Parse/ParseOpenMP.cpp | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index de3d58baf84c9..8ad5edb1bcd63 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -2210,12 +2210,12 @@ Parser::DeclGroupPtrTy Parser::ParseOpenMPDeclarativeDirectiveWithExtDecl(
     VariantMatchInfo VMI;
     TI.getAsVariantMatchInfo(ASTCtx, VMI);
 
-    std::function<void(StringRef)> DiagUnknownTrait = [this, Loc](
-                                                          StringRef ISATrait) {
-      // TODO Track the selector locations in a way that is accessible here to
-      // improve the diagnostic location.
-      Diag(Loc, diag::warn_unknown_declare_variant_isa_trait) << ISATrait;
-    };
+    std::function<void(StringRef)> DiagUnknownTrait =
+        [this, Loc](StringRef ISATrait) {
+          // TODO Track the selector locations in a way that is accessible here
+          // to improve the diagnostic location.
+          Diag(Loc, diag::warn_unknown_declare_variant_isa_trait) << ISATrait;
+        };
     TargetOMPContext OMPCtx(
         ASTCtx, std::move(DiagUnknownTrait),
         /* CurrentFunctionDecl */ nullptr,
@@ -2551,12 +2551,12 @@ Parser::ParseOpenMPDeclarativeOrExecutableDirective(ParsedStmtContext StmtCtx) {
     TPA.Revert();
     // End of the first iteration. Parser is reset to the start of metadirective
 
-    std::function<void(StringRef)> DiagUnknownTrait = [this, Loc](
-                                                          StringRef ISATrait) {
-      // TODO Track the selector locations in a way that is accessible here to
-      // improve the diagnostic location.
-      Diag(Loc, diag::warn_unknown_declare_variant_isa_trait) << ISATrait;
-    };
+    std::function<void(StringRef)> DiagUnknownTrait =
+        [this, Loc](StringRef ISATrait) {
+          // TODO Track the selector locations in a way that is accessible here
+          // to improve the diagnostic location.
+          Diag(Loc, diag::warn_unknown_declare_variant_isa_trait) << ISATrait;
+        };
     TargetOMPContext OMPCtx(ASTContext, std::move(DiagUnknownTrait),
                             /* CurrentFunctionDecl */ nullptr,
                             ArrayRef<llvm::omp::TraitProperty>());

From aaa9f40e3fd2dd081e965d39f10649bcd8bf1d21 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 27 Jan 2022 10:01:59 +0100
Subject: [PATCH 863/946] [lldb/test] Replace gnu-style-compression.cpp with a
 yaml file

In D117744, llvm removed writing support for this format, breaking the
test. We may eventually want to remove reading support as well, but for
now I have converted the test to a yaml file to maintain coverage.
---
 .../DWARF/x86/gnu-style-compression.cpp       | 14 -----
 .../DWARF/x86/gnu-style-compression.yaml      | 56 +++++++++++++++++++
 2 files changed, 56 insertions(+), 14 deletions(-)
 delete mode 100644 lldb/test/Shell/SymbolFile/DWARF/x86/gnu-style-compression.cpp
 create mode 100644 lldb/test/Shell/SymbolFile/DWARF/x86/gnu-style-compression.yaml

diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/gnu-style-compression.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/gnu-style-compression.cpp
deleted file mode 100644
index 9dc87303ba9fd..0000000000000
--- a/lldb/test/Shell/SymbolFile/DWARF/x86/gnu-style-compression.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-// REQUIRES: zlib
-
-// RUN: %clang %s -target x86_64-pc-linux -g -gsplit-dwarf -c -o %t \
-// RUN:   -Wa,--compress-debug-sections=zlib-gnu
-// RUN: %lldb %t -o "target var s a" -b | FileCheck %s
-
-// CHECK: (const short) s = 47
-// CHECK: (const A) a = (a = 42)
-
-struct A {
-  long a = 42;
-};
-extern constexpr short s = 47;
-extern constexpr A a{};
diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/gnu-style-compression.yaml b/lldb/test/Shell/SymbolFile/DWARF/x86/gnu-style-compression.yaml
new file mode 100644
index 0000000000000..d037cf474232e
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/x86/gnu-style-compression.yaml
@@ -0,0 +1,56 @@
+## NB: This is a yaml file because llvm gnu-style writing support in 14.0
+## (2022, D117744). In due time we may want to remove it from lldb as well.
+
+## Debug info generated from the following sources using clang-13
+## struct A {
+##  long a = 42;
+## };
+## extern constexpr short s = 47;
+## extern constexpr A a{};
+
+# REQUIRES: zlib
+
+# RUN: yaml2obj %s > %t
+# RUN: %lldb %t -o "target var s a" -b | FileCheck %s
+
+# CHECK: (const short) s = 47
+# CHECK: (const A) a = (a = 42)
+
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_EXEC
+  Machine:         EM_X86_64
+  Entry:           0x401000
+Sections:
+  - Name:            .rodata
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC ]
+    Address:         0x401000
+    AddressAlign:    0x8
+    Offset:          0x1000
+    Content:         2F000000000000002A00000000000000
+  - Name:            .debug_info
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         20000000040000000000080100000000000000001A000000C088361D3BC5B7B700000000
+  - Name:            .debug_abbrev
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         01110010171B0EB44219B0420EB14207B34217000000
+  - Name:            .debug_line
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         7100000004006B000000010101FB0E0D0001010101000000010000012F686F6D652F706176656C6F2F6C6C00006D6F6E6F2F6C6C64622F746573742F5368656C6C2F53796D626F6C46696C652F44574152462F7838362F676E752D7374796C652D636F6D7072657373696F6E2E6370700001000000
+  - Name:            .zdebug_str
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_MERGE, SHF_STRINGS ]
+    AddressAlign:    0x1
+    EntSize:         0x1
+    Content:         5A4C49420000000000000084789C75C9310E82401005502E04BF33B62686D6440A6B979DC826B3FC89332B727BB980ED7B585805F6FC8812AA48AD68062D3AFC1B04A97E504E08F1C0B4C8F1D35E13752C2AB83E2EF711DFF309B716D602AFB5F51EBB4A3FB3DA5BDC0BD761361BF2C6EE0781C8302C
+  - Name:            .debug_addr
+    Type:            SHT_PROGBITS
+    AddressAlign:    0x1
+    Content:         '00104000000000000810400000000000'
+...

From 7afd05211282c7516f0e9b3e7743b5dcc2605491 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Wed, 26 Jan 2022 16:29:51 +0100
Subject: [PATCH 864/946] [lldb/DWARF] Make manual dwarf index deterministic

Currently, running the test suite with LLVM_ENABLE_EXPENSIVE_CHECKS=On
causes a couple of tests to fail. This happens because they expect a
certain order of variables (all of them happen to use the "target
variable" command, but other lookup functions should suffer from the
same issues), all of which have the same name. Sort algorithms often
preserve the order of equivalent elements (in this case the entries in
the NameToDIE map), but that not guaranteed, and
LLVM_ENABLE_EXPENSIVE_CHECKS stresses that by pre-shuffling all inputs
before sorting.

While this could easily be fixed by relaxing the test expectations,
having a deterministic output seems like a worthwhile goal,
particularly, as this could have bigger consequences than just a
different print order -- in some cases we just pick the first entry that
we find, whatever that is. Therefore this patch makes the sort
deterministic by introducing another sort key -- UniqueCString::Sort
gets a value comparator functor, which can be used to sort elements with
the same name -- in the DWARF case we use DIERef::operator<, which
roughly equals the order in which the entries appear in the debug info,
and matches the current "accidental" order.

Using a extra functor seemed preferable to using stable_sort, as the
latter allocates extra O(n) of temporary memory.

I observed no difference in debug info parsing speed with this patch
applied.

Differential Revision: https://reviews.llvm.org/D118251
---
 lldb/include/lldb/Core/UniqueCStringMap.h     | 31 +++++++++++++++++--
 .../Plugins/SymbolFile/DWARF/NameToDIE.cpp    |  2 +-
 lldb/unittests/Core/UniqueCStringMapTest.cpp  | 15 +++++++++
 3 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/lldb/include/lldb/Core/UniqueCStringMap.h b/lldb/include/lldb/Core/UniqueCStringMap.h
index e37027a0150a3..9c8b8081d374e 100644
--- a/lldb/include/lldb/Core/UniqueCStringMap.h
+++ b/lldb/include/lldb/Core/UniqueCStringMap.h
@@ -165,7 +165,21 @@ template <typename T> class UniqueCStringMap {
   //      my_map.Append (UniqueCStringMap::Entry(GetName(...), GetValue(...)));
   // }
   // my_map.Sort();
-  void Sort() { llvm::sort(m_map.begin(), m_map.end(), Compare()); }
+  void Sort() {
+    Sort([](const T &, const T &) { return false; });
+  }
+
+  /// Sort contents of this map using the provided comparator to break ties for
+  /// entries with the same string value.
+  template <typename TCompare> void Sort(TCompare tc) {
+    Compare c;
+    llvm::sort(m_map, [&](const Entry &lhs, const Entry &rhs) -> bool {
+      int result = c.ThreeWay(lhs.cstring, rhs.cstring);
+      if (result == 0)
+        return tc(lhs.value, rhs.value);
+      return result < 0;
+    });
+  }
 
   // Since we are using a vector to contain our items it will always double its
   // memory consumption as things are added to the vector, so if you intend to
@@ -205,13 +219,24 @@ template <typename T> class UniqueCStringMap {
       return operator()(lhs, rhs.cstring);
     }
 
+    bool operator()(ConstString lhs, ConstString rhs) {
+      return ThreeWay(lhs, rhs) < 0;
+    }
+
     // This is only for uniqueness, not lexicographical ordering, so we can
     // just compare pointers. *However*, comparing pointers from different
     // allocations is UB, so we need compare their integral values instead.
-    bool operator()(ConstString lhs, ConstString rhs) {
-      return uintptr_t(lhs.GetCString()) < uintptr_t(rhs.GetCString());
+    int ThreeWay(ConstString lhs, ConstString rhs) {
+      auto lhsint = uintptr_t(lhs.GetCString());
+      auto rhsint = uintptr_t(rhs.GetCString());
+      if (lhsint < rhsint)
+        return -1;
+      if (lhsint > rhsint)
+        return 1;
+      return 0;
     }
   };
+
   collection m_map;
 };
 
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp
index 33e2695f403a2..413920f33619e 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp
@@ -21,7 +21,7 @@ using namespace lldb;
 using namespace lldb_private;
 
 void NameToDIE::Finalize() {
-  m_map.Sort();
+  m_map.Sort(std::less<DIERef>());
   m_map.SizeToFit();
 }
 
diff --git a/lldb/unittests/Core/UniqueCStringMapTest.cpp b/lldb/unittests/Core/UniqueCStringMapTest.cpp
index 25bc28538e65f..aa1cdbadc958e 100644
--- a/lldb/unittests/Core/UniqueCStringMapTest.cpp
+++ b/lldb/unittests/Core/UniqueCStringMapTest.cpp
@@ -51,3 +51,18 @@ TEST(UniqueCStringMap, NoDefaultConstructor) {
   EXPECT_THAT(Map.GetValues(Bar, Values), 0);
   EXPECT_THAT(Values, testing::IsEmpty());
 }
+
+TEST(UniqueCStringMap, ValueCompare) {
+  UniqueCStringMap<int> Map;
+
+  ConstString Foo("foo");
+
+  Map.Append(Foo, 0);
+  Map.Append(Foo, 5);
+  Map.Append(Foo, -5);
+
+  Map.Sort(std::less<int>());
+  std::vector<int> Values;
+  EXPECT_THAT(Map.GetValues(Foo, Values), 3);
+  EXPECT_THAT(Values, testing::ElementsAre(-5, 0, 5));
+}

From 6730df4779cd2f6328fe8cb6dec1dced8cabe4c5 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 27 Jan 2022 08:47:52 +0100
Subject: [PATCH 865/946] [lldb] Convert ProcessWindowsLog to the new API

---
 .../Process/Windows/Common/DebuggerThread.cpp | 36 ++++++++-----------
 .../Windows/Common/NativeProcessWindows.cpp   | 10 +++---
 .../NativeRegisterContextWindows_WoW64.cpp    |  4 +--
 .../NativeRegisterContextWindows_arm.cpp      |  4 +--
 .../NativeRegisterContextWindows_arm64.cpp    |  4 +--
 .../NativeRegisterContextWindows_i386.cpp     |  4 +--
 .../NativeRegisterContextWindows_x86_64.cpp   |  4 +--
 .../Windows/Common/ProcessDebugger.cpp        | 27 +++++++-------
 .../Process/Windows/Common/ProcessWindows.cpp | 20 +++++------
 .../Windows/Common/ProcessWindowsLog.h        | 11 ------
 .../Windows/Common/RegisterContextWindows.cpp |  4 +--
 .../Common/x86/RegisterContextWindows_x86.cpp |  6 ++--
 12 files changed, 58 insertions(+), 76 deletions(-)

diff --git a/lldb/source/Plugins/Process/Windows/Common/DebuggerThread.cpp b/lldb/source/Plugins/Process/Windows/Common/DebuggerThread.cpp
index 87c17d44c1869..a78ca2aabe134 100644
--- a/lldb/source/Plugins/Process/Windows/Common/DebuggerThread.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/DebuggerThread.cpp
@@ -64,7 +64,7 @@ DebuggerThread::DebuggerThread(DebugDelegateSP debug_delegate)
 DebuggerThread::~DebuggerThread() { ::CloseHandle(m_debugging_ended_event); }
 
 Status DebuggerThread::DebugLaunch(const ProcessLaunchInfo &launch_info) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   LLDB_LOG(log, "launching '{0}'", launch_info.GetExecutableFile().GetPath());
 
   Status result;
@@ -83,7 +83,7 @@ Status DebuggerThread::DebugLaunch(const ProcessLaunchInfo &launch_info) {
 
 Status DebuggerThread::DebugAttach(lldb::pid_t pid,
                                    const ProcessAttachInfo &attach_info) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   LLDB_LOG(log, "attaching to '{0}'", pid);
 
   Status result;
@@ -122,7 +122,7 @@ lldb::thread_result_t DebuggerThread::DebuggerThreadLaunchRoutine(
   // until after the thread routine has exited.
   std::shared_ptr<DebuggerThread> this_ref(shared_from_this());
 
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   LLDB_LOG(log, "preparing to launch '{0}' on background thread.",
            launch_info.GetExecutableFile().GetPath());
 
@@ -149,7 +149,7 @@ lldb::thread_result_t DebuggerThread::DebuggerThreadAttachRoutine(
   // until after the thread routine has exited.
   std::shared_ptr<DebuggerThread> this_ref(shared_from_this());
 
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   LLDB_LOG(log, "preparing to attach to process '{0}' on background thread.",
            pid);
 
@@ -172,7 +172,7 @@ Status DebuggerThread::StopDebugging(bool terminate) {
 
   lldb::pid_t pid = m_process.GetProcessId();
 
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   LLDB_LOG(log, "terminate = {0}, inferior={1}.", terminate, pid);
 
   // Set m_is_shutting_down to true if it was false.  Return if it was already
@@ -246,8 +246,7 @@ void DebuggerThread::ContinueAsyncException(ExceptionResult result) {
   if (!m_active_exception.get())
     return;
 
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS |
-                                            WINDOWS_LOG_EXCEPTION);
+  Log *log = GetLog(WindowsLog::Process | WindowsLog::Exception);
   LLDB_LOG(log, "broadcasting for inferior process {0}.",
            m_process.GetProcessId());
 
@@ -265,7 +264,7 @@ void DebuggerThread::FreeProcessHandles() {
 }
 
 void DebuggerThread::DebugLoop() {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_EVENT);
+  Log *log = GetLog(WindowsLog::Event);
   DEBUG_EVENT dbe = {};
   bool should_debug = true;
   LLDB_LOGV(log, "Entering WaitForDebugEvent loop");
@@ -346,8 +345,7 @@ void DebuggerThread::DebugLoop() {
 ExceptionResult
 DebuggerThread::HandleExceptionEvent(const EXCEPTION_DEBUG_INFO &info,
                                      DWORD thread_id) {
-  Log *log =
-      ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_EVENT | WINDOWS_LOG_EXCEPTION);
+  Log *log = GetLog(WindowsLog::Event | WindowsLog::Exception);
   if (m_is_shutting_down) {
     // A breakpoint that occurs while `m_pid_to_detach` is non-zero is a magic
     // exception that
@@ -390,8 +388,7 @@ DebuggerThread::HandleExceptionEvent(const EXCEPTION_DEBUG_INFO &info,
 DWORD
 DebuggerThread::HandleCreateThreadEvent(const CREATE_THREAD_DEBUG_INFO &info,
                                         DWORD thread_id) {
-  Log *log =
-      ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_EVENT | WINDOWS_LOG_THREAD);
+  Log *log = GetLog(WindowsLog::Event | WindowsLog::Thread);
   LLDB_LOG(log, "Thread {0} spawned in process {1}", thread_id,
            m_process.GetProcessId());
   HostThread thread(info.hThread);
@@ -403,8 +400,7 @@ DebuggerThread::HandleCreateThreadEvent(const CREATE_THREAD_DEBUG_INFO &info,
 DWORD
 DebuggerThread::HandleCreateProcessEvent(const CREATE_PROCESS_DEBUG_INFO &info,
                                          DWORD thread_id) {
-  Log *log =
-      ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_EVENT | WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Event | WindowsLog::Process);
   uint32_t process_id = ::GetProcessId(info.hProcess);
 
   LLDB_LOG(log, "process {0} spawned", process_id);
@@ -432,8 +428,7 @@ DebuggerThread::HandleCreateProcessEvent(const CREATE_PROCESS_DEBUG_INFO &info,
 DWORD
 DebuggerThread::HandleExitThreadEvent(const EXIT_THREAD_DEBUG_INFO &info,
                                       DWORD thread_id) {
-  Log *log =
-      ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_EVENT | WINDOWS_LOG_THREAD);
+  Log *log = GetLog(WindowsLog::Event | WindowsLog::Thread);
   LLDB_LOG(log, "Thread {0} exited with code {1} in process {2}", thread_id,
            info.dwExitCode, m_process.GetProcessId());
   m_debug_delegate->OnExitThread(thread_id, info.dwExitCode);
@@ -443,8 +438,7 @@ DebuggerThread::HandleExitThreadEvent(const EXIT_THREAD_DEBUG_INFO &info,
 DWORD
 DebuggerThread::HandleExitProcessEvent(const EXIT_PROCESS_DEBUG_INFO &info,
                                        DWORD thread_id) {
-  Log *log =
-      ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_EVENT | WINDOWS_LOG_THREAD);
+  Log *log = GetLog(WindowsLog::Event | WindowsLog::Thread);
   LLDB_LOG(log, "process {0} exited with code {1}", m_process.GetProcessId(),
            info.dwExitCode);
 
@@ -456,7 +450,7 @@ DebuggerThread::HandleExitProcessEvent(const EXIT_PROCESS_DEBUG_INFO &info,
 DWORD
 DebuggerThread::HandleLoadDllEvent(const LOAD_DLL_DEBUG_INFO &info,
                                    DWORD thread_id) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_EVENT);
+  Log *log = GetLog(WindowsLog::Event);
   if (info.hFile == nullptr) {
     // Not sure what this is, so just ignore it.
     LLDB_LOG(log, "Warning: Inferior {0} has a NULL file handle, returning...",
@@ -500,7 +494,7 @@ DebuggerThread::HandleLoadDllEvent(const LOAD_DLL_DEBUG_INFO &info,
 DWORD
 DebuggerThread::HandleUnloadDllEvent(const UNLOAD_DLL_DEBUG_INFO &info,
                                      DWORD thread_id) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_EVENT);
+  Log *log = GetLog(WindowsLog::Event);
   LLDB_LOG(log, "process {0} unloading DLL at addr {1:x}.",
            m_process.GetProcessId(), info.lpBaseOfDll);
 
@@ -517,7 +511,7 @@ DebuggerThread::HandleODSEvent(const OUTPUT_DEBUG_STRING_INFO &info,
 
 DWORD
 DebuggerThread::HandleRipEvent(const RIP_INFO &info, DWORD thread_id) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_EVENT);
+  Log *log = GetLog(WindowsLog::Event);
   LLDB_LOG(log, "encountered error {0} (type={1}) in process {2} thread {3}",
            info.dwError, info.dwType, m_process.GetProcessId(), thread_id);
 
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeProcessWindows.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeProcessWindows.cpp
index 3d2225ff0cb24..c007e2e4a69c9 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeProcessWindows.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeProcessWindows.cpp
@@ -84,7 +84,7 @@ NativeProcessWindows::NativeProcessWindows(lldb::pid_t pid, int terminal_fd,
 }
 
 Status NativeProcessWindows::Resume(const ResumeActionList &resume_actions) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   Status error;
   llvm::sys::ScopedLock lock(m_mutex);
 
@@ -168,7 +168,7 @@ Status NativeProcessWindows::Halt() {
 
 Status NativeProcessWindows::Detach() {
   Status error;
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   StateType state = GetState();
   if (state != eStateExited && state != eStateDetached) {
     error = DetachProcess();
@@ -403,7 +403,7 @@ NativeProcessWindows::GetFileLoadAddress(const llvm::StringRef &file_name,
 }
 
 void NativeProcessWindows::OnExitProcess(uint32_t exit_code) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   LLDB_LOG(log, "Process {0} exited with code {1}", GetID(), exit_code);
 
   ProcessDebugger::OnExitProcess(exit_code);
@@ -417,7 +417,7 @@ void NativeProcessWindows::OnExitProcess(uint32_t exit_code) {
 }
 
 void NativeProcessWindows::OnDebuggerConnected(lldb::addr_t image_base) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   LLDB_LOG(log, "Debugger connected to process {0}. Image base = {1:x}",
            GetDebuggedProcessId(), image_base);
 
@@ -445,7 +445,7 @@ void NativeProcessWindows::OnDebuggerConnected(lldb::addr_t image_base) {
 ExceptionResult
 NativeProcessWindows::OnDebugException(bool first_chance,
                                        const ExceptionRecord &record) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_EXCEPTION);
+  Log *log = GetLog(WindowsLog::Exception);
   llvm::sys::ScopedLock lock(m_mutex);
 
   // Let the debugger establish the internal status.
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.cpp
index d562c7df8bfa2..5c356979ca16b 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_WoW64.cpp
@@ -60,7 +60,7 @@ static Status
 GetWoW64ThreadContextHelper(lldb::thread_t thread_handle,
                             PWOW64_CONTEXT context_ptr,
                             const DWORD control_flag = kWoW64ContextFlags) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   Status error;
   memset(context_ptr, 0, sizeof(::WOW64_CONTEXT));
   context_ptr->ContextFlags = control_flag;
@@ -75,7 +75,7 @@ GetWoW64ThreadContextHelper(lldb::thread_t thread_handle,
 
 static Status SetWoW64ThreadContextHelper(lldb::thread_t thread_handle,
                                           PWOW64_CONTEXT context_ptr) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   Status error;
   if (!::Wow64SetThreadContext(thread_handle, context_ptr)) {
     error.SetError(GetLastError(), eErrorTypeWin32);
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp
index 5cfd790c624a3..2cc16e9233310 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm.cpp
@@ -88,7 +88,7 @@ CreateRegisterInfoInterface(const ArchSpec &target_arch) {
 static Status GetThreadContextHelper(lldb::thread_t thread_handle,
                                      PCONTEXT context_ptr,
                                      const DWORD control_flag) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   Status error;
 
   memset(context_ptr, 0, sizeof(::CONTEXT));
@@ -104,7 +104,7 @@ static Status GetThreadContextHelper(lldb::thread_t thread_handle,
 
 static Status SetThreadContextHelper(lldb::thread_t thread_handle,
                                      PCONTEXT context_ptr) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   Status error;
   // It's assumed that the thread has stopped.
   if (!::SetThreadContext(thread_handle, context_ptr)) {
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp
index fc65945723218..479063d3066f2 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_arm64.cpp
@@ -105,7 +105,7 @@ CreateRegisterInfoInterface(const ArchSpec &target_arch) {
 static Status GetThreadContextHelper(lldb::thread_t thread_handle,
                                      PCONTEXT context_ptr,
                                      const DWORD control_flag) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   Status error;
 
   memset(context_ptr, 0, sizeof(::CONTEXT));
@@ -121,7 +121,7 @@ static Status GetThreadContextHelper(lldb::thread_t thread_handle,
 
 static Status SetThreadContextHelper(lldb::thread_t thread_handle,
                                      PCONTEXT context_ptr) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   Status error;
   // It's assumed that the thread has stopped.
   if (!::SetThreadContext(thread_handle, context_ptr)) {
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.cpp
index 45da4cd02cec3..da8db9afb3be9 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_i386.cpp
@@ -55,7 +55,7 @@ CreateRegisterInfoInterface(const ArchSpec &target_arch) {
 static Status GetThreadContextHelper(lldb::thread_t thread_handle,
                                      PCONTEXT context_ptr,
                                      const DWORD control_flag) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   Status error;
 
   memset(context_ptr, 0, sizeof(::CONTEXT));
@@ -71,7 +71,7 @@ static Status GetThreadContextHelper(lldb::thread_t thread_handle,
 
 static Status SetThreadContextHelper(lldb::thread_t thread_handle,
                                      PCONTEXT context_ptr) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   Status error;
 
   if (!::SetThreadContext(thread_handle, context_ptr)) {
diff --git a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.cpp b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.cpp
index 819cd8eb383bf..a189dbb3d4866 100644
--- a/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/NativeRegisterContextWindows_x86_64.cpp
@@ -67,7 +67,7 @@ CreateRegisterInfoInterface(const ArchSpec &target_arch) {
 static Status GetThreadContextHelper(lldb::thread_t thread_handle,
                                      PCONTEXT context_ptr,
                                      const DWORD control_flag) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   Status error;
 
   memset(context_ptr, 0, sizeof(::CONTEXT));
@@ -83,7 +83,7 @@ static Status GetThreadContextHelper(lldb::thread_t thread_handle,
 
 static Status SetThreadContextHelper(lldb::thread_t thread_handle,
                                      PCONTEXT context_ptr) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   Status error;
   // It's assumed that the thread has stopped.
   if (!::SetThreadContext(thread_handle, context_ptr)) {
diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.cpp b/lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.cpp
index 91b3311dc8575..555c19adcfa9a 100644
--- a/lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/ProcessDebugger.cpp
@@ -72,7 +72,7 @@ lldb::pid_t ProcessDebugger::GetDebuggedProcessId() const {
 }
 
 Status ProcessDebugger::DetachProcess() {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   DebuggerThreadSP debugger_thread;
   {
     // Acquire the lock only long enough to get the DebuggerThread.
@@ -108,7 +108,7 @@ Status ProcessDebugger::LaunchProcess(ProcessLaunchInfo &launch_info,
   // thread has been kicked off.  So there's no race conditions, and it
   // shouldn't be necessary to acquire the mutex.
 
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   Status result;
 
   FileSpec working_dir = launch_info.GetWorkingDirectory();
@@ -171,7 +171,7 @@ Status ProcessDebugger::LaunchProcess(ProcessLaunchInfo &launch_info,
 Status ProcessDebugger::AttachProcess(lldb::pid_t pid,
                                       const ProcessAttachInfo &attach_info,
                                       DebugDelegateSP delegate) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   m_session_data.reset(
       new ProcessWindowsData(!attach_info.GetContinueOnceAttached()));
   DebuggerThreadSP debugger(new DebuggerThread(delegate));
@@ -209,7 +209,7 @@ Status ProcessDebugger::AttachProcess(lldb::pid_t pid,
 }
 
 Status ProcessDebugger::DestroyProcess(const lldb::StateType state) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   DebuggerThreadSP debugger_thread;
   {
     // Acquire this lock inside an inner scope, only long enough to get the
@@ -245,7 +245,7 @@ Status ProcessDebugger::DestroyProcess(const lldb::StateType state) {
 }
 
 Status ProcessDebugger::HaltProcess(bool &caused_stop) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   Status error;
   llvm::sys::ScopedLock lock(m_mutex);
   caused_stop = ::DebugBreakProcess(m_session_data->m_debugger->GetProcess()
@@ -263,7 +263,7 @@ Status ProcessDebugger::ReadMemory(lldb::addr_t vm_addr, void *buf, size_t size,
                                    size_t &bytes_read) {
   Status error;
   bytes_read = 0;
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_MEMORY);
+  Log *log = GetLog(WindowsLog::Memory);
   llvm::sys::ScopedLock lock(m_mutex);
 
   if (!m_session_data) {
@@ -297,7 +297,7 @@ Status ProcessDebugger::WriteMemory(lldb::addr_t vm_addr, const void *buf,
                                     size_t size, size_t &bytes_written) {
   Status error;
   bytes_written = 0;
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_MEMORY);
+  Log *log = GetLog(WindowsLog::Memory);
   llvm::sys::ScopedLock lock(m_mutex);
   LLDB_LOG(log, "attempting to write {0} bytes into address {1:x}", size,
            vm_addr);
@@ -327,7 +327,7 @@ Status ProcessDebugger::AllocateMemory(size_t size, uint32_t permissions,
                                        lldb::addr_t &addr) {
   Status error;
   addr = LLDB_INVALID_ADDRESS;
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_MEMORY);
+  Log *log = GetLog(WindowsLog::Memory);
   llvm::sys::ScopedLock lock(m_mutex);
   LLDB_LOG(log, "attempting to allocate {0} bytes with permissions {1}", size,
            permissions);
@@ -355,7 +355,7 @@ Status ProcessDebugger::AllocateMemory(size_t size, uint32_t permissions,
 Status ProcessDebugger::DeallocateMemory(lldb::addr_t vm_addr) {
   Status result;
 
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_MEMORY);
+  Log *log = GetLog(WindowsLog::Memory);
   llvm::sys::ScopedLock lock(m_mutex);
   LLDB_LOG(log, "attempting to deallocate bytes at address {0}", vm_addr);
 
@@ -379,7 +379,7 @@ Status ProcessDebugger::DeallocateMemory(lldb::addr_t vm_addr) {
 
 Status ProcessDebugger::GetMemoryRegionInfo(lldb::addr_t vm_addr,
                                             MemoryRegionInfo &info) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_MEMORY);
+  Log *log = GetLog(WindowsLog::Memory);
   Status error;
   llvm::sys::ScopedLock lock(m_mutex);
   info.Clear();
@@ -484,7 +484,7 @@ void ProcessDebugger::OnDebuggerConnected(lldb::addr_t image_base) {}
 ExceptionResult
 ProcessDebugger::OnDebugException(bool first_chance,
                                   const ExceptionRecord &record) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_EXCEPTION);
+  Log *log = GetLog(WindowsLog::Exception);
   llvm::sys::ScopedLock lock(m_mutex);
   // FIXME: Without this check, occasionally when running the test suite
   // there is an issue where m_session_data can be null.  It's not clear how
@@ -538,7 +538,7 @@ void ProcessDebugger::OnDebugString(const std::string &string) {}
 
 void ProcessDebugger::OnDebuggerError(const Status &error, uint32_t type) {
   llvm::sys::ScopedLock lock(m_mutex);
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
 
   if (m_session_data->m_initial_stop_received) {
     // This happened while debugging.  Do we shutdown the debugging session,
@@ -564,8 +564,7 @@ void ProcessDebugger::OnDebuggerError(const Status &error, uint32_t type) {
 Status ProcessDebugger::WaitForDebuggerConnection(DebuggerThreadSP debugger,
                                                   HostProcess &process) {
   Status result;
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS |
-                                            WINDOWS_LOG_BREAKPOINTS);
+  Log *log = GetLog(WindowsLog::Process | WindowsLog::Breakpoints);
   LLDB_LOG(log, "Waiting for loader breakpoint.");
 
   // Block this function until we receive the initial stop from the process.
diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp b/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp
index 97ba1bdc9ee9e..5398d16777ac4 100644
--- a/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp
@@ -141,7 +141,7 @@ Status ProcessWindows::EnableBreakpointSite(BreakpointSite *bp_site) {
   if (bp_site->HardwareRequired())
     return Status("Hardware breakpoints are not supported.");
 
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_BREAKPOINTS);
+  Log *log = GetLog(WindowsLog::Breakpoints);
   LLDB_LOG(log, "bp_site = {0:x}, id={1}, addr={2:x}", bp_site,
            bp_site->GetID(), bp_site->GetLoadAddress());
 
@@ -152,7 +152,7 @@ Status ProcessWindows::EnableBreakpointSite(BreakpointSite *bp_site) {
 }
 
 Status ProcessWindows::DisableBreakpointSite(BreakpointSite *bp_site) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_BREAKPOINTS);
+  Log *log = GetLog(WindowsLog::Breakpoints);
   LLDB_LOG(log, "bp_site = {0:x}, id={1}, addr={2:x}", bp_site,
            bp_site->GetID(), bp_site->GetLoadAddress());
 
@@ -165,7 +165,7 @@ Status ProcessWindows::DisableBreakpointSite(BreakpointSite *bp_site) {
 
 Status ProcessWindows::DoDetach(bool keep_stopped) {
   Status error;
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   StateType private_state = GetPrivateState();
   if (private_state != eStateExited && private_state != eStateDetached) {
     error = DetachProcess();
@@ -203,7 +203,7 @@ ProcessWindows::DoAttachToProcessWithID(lldb::pid_t pid,
 }
 
 Status ProcessWindows::DoResume() {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   llvm::sys::ScopedLock lock(m_mutex);
   Status error;
 
@@ -349,7 +349,7 @@ DumpAdditionalExceptionInformation(llvm::raw_ostream &stream,
 }
 
 void ProcessWindows::RefreshStateAfterStop() {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_EXCEPTION);
+  Log *log = GetLog(WindowsLog::Exception);
   llvm::sys::ScopedLock lock(m_mutex);
 
   if (!m_session_data) {
@@ -520,7 +520,7 @@ bool ProcessWindows::CanDebug(lldb::TargetSP target_sp,
 
 bool ProcessWindows::DoUpdateThreadList(ThreadList &old_thread_list,
                                         ThreadList &new_thread_list) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_THREAD);
+  Log *log = GetLog(WindowsLog::Thread);
   // Add all the threads that were previously running and for which we did not
   // detect a thread exited event.
   int new_size = 0;
@@ -625,7 +625,7 @@ DynamicLoaderWindowsDYLD *ProcessWindows::GetDynamicLoader() {
 
 void ProcessWindows::OnExitProcess(uint32_t exit_code) {
   // No need to acquire the lock since m_session_data isn't accessed.
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   LLDB_LOG(log, "Process {0} exited with code {1}", GetID(), exit_code);
 
   TargetSP target = CalculateTarget();
@@ -644,7 +644,7 @@ void ProcessWindows::OnExitProcess(uint32_t exit_code) {
 
 void ProcessWindows::OnDebuggerConnected(lldb::addr_t image_base) {
   DebuggerThreadSP debugger = m_session_data->m_debugger;
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
   LLDB_LOG(log, "Debugger connected to process {0}.  Image base = {1:x}",
            debugger->GetProcess().GetProcessId(), image_base);
 
@@ -692,7 +692,7 @@ void ProcessWindows::OnDebuggerConnected(lldb::addr_t image_base) {
 ExceptionResult
 ProcessWindows::OnDebugException(bool first_chance,
                                  const ExceptionRecord &record) {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_EXCEPTION);
+  Log *log = GetLog(WindowsLog::Exception);
   llvm::sys::ScopedLock lock(m_mutex);
 
   // FIXME: Without this check, occasionally when running the test suite there
@@ -809,7 +809,7 @@ void ProcessWindows::OnDebugString(const std::string &string) {}
 
 void ProcessWindows::OnDebuggerError(const Status &error, uint32_t type) {
   llvm::sys::ScopedLock lock(m_mutex);
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_PROCESS);
+  Log *log = GetLog(WindowsLog::Process);
 
   if (m_session_data->m_initial_stop_received) {
     // This happened while debugging.  Do we shutdown the debugging session,
diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.h b/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.h
index 68a9d767c227d..976591abc54aa 100644
--- a/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.h
+++ b/lldb/source/Plugins/Process/Windows/Common/ProcessWindowsLog.h
@@ -25,21 +25,10 @@ enum class WindowsLog : Log::MaskType {
   LLVM_MARK_AS_BITMASK_ENUM(Thread)
 };
 
-#define WINDOWS_LOG_PROCESS ::lldb_private::WindowsLog::Process
-#define WINDOWS_LOG_EXCEPTION ::lldb_private::WindowsLog::Exception
-#define WINDOWS_LOG_THREAD ::lldb_private::WindowsLog::Thread
-#define WINDOWS_LOG_MEMORY ::lldb_private::WindowsLog::Memory
-#define WINDOWS_LOG_BREAKPOINTS ::lldb_private::WindowsLog::Breakpoints
-#define WINDOWS_LOG_STEP ::lldb_private::WindowsLog::Step
-#define WINDOWS_LOG_REGISTERS ::lldb_private::WindowsLog::Registers
-#define WINDOWS_LOG_EVENT ::lldb_private::WindowsLog::Event
-
 class ProcessWindowsLog {
 public:
   static void Initialize();
   static void Terminate();
-
-  static Log *GetLogIfAny(WindowsLog mask) { return GetLog(mask); }
 };
 
 template <> Log::Channel &LogChannelFor<WindowsLog>();
diff --git a/lldb/source/Plugins/Process/Windows/Common/RegisterContextWindows.cpp b/lldb/source/Plugins/Process/Windows/Common/RegisterContextWindows.cpp
index 099bae6b0f619..b443df1e7cbd1 100644
--- a/lldb/source/Plugins/Process/Windows/Common/RegisterContextWindows.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/RegisterContextWindows.cpp
@@ -112,7 +112,7 @@ bool RegisterContextWindows::AddHardwareBreakpoint(uint32_t slot,
   return ApplyAllRegisterValues();
 
 #else
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   LLDB_LOG(log, "hardware breakpoints not currently supported on this arch");
   return false;
 #endif
@@ -149,7 +149,7 @@ uint32_t RegisterContextWindows::GetTriggeredHardwareBreakpointSlotId() {
 }
 
 bool RegisterContextWindows::CacheAllRegisterValues() {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   if (!m_context_stale)
     return true;
 
diff --git a/lldb/source/Plugins/Process/Windows/Common/x86/RegisterContextWindows_x86.cpp b/lldb/source/Plugins/Process/Windows/Common/x86/RegisterContextWindows_x86.cpp
index e4f1806597517..7788e2a8d9f7d 100644
--- a/lldb/source/Plugins/Process/Windows/Common/x86/RegisterContextWindows_x86.cpp
+++ b/lldb/source/Plugins/Process/Windows/Common/x86/RegisterContextWindows_x86.cpp
@@ -192,7 +192,7 @@ bool RegisterContextWindows_x86::ReadRegister(const RegisterInfo *reg_info,
     return ReadRegisterHelper(CONTEXT_CONTROL, "EFLAGS", m_context.EFlags,
                               reg_value);
   default:
-    Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+    Log *log = GetLog(WindowsLog::Registers);
     LLDB_LOG(log, "Requested unknown register {0}", reg);
     break;
   }
@@ -208,7 +208,7 @@ bool RegisterContextWindows_x86::WriteRegister(const RegisterInfo *reg_info,
   if (!CacheAllRegisterValues())
     return false;
 
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   uint32_t reg = reg_info->kinds[eRegisterKindLLDB];
   switch (reg) {
   case lldb_eax_i386:
@@ -263,7 +263,7 @@ bool RegisterContextWindows_x86::WriteRegister(const RegisterInfo *reg_info,
 bool RegisterContextWindows_x86::ReadRegisterHelper(
     DWORD flags_required, const char *reg_name, DWORD value,
     RegisterValue &reg_value) const {
-  Log *log = ProcessWindowsLog::GetLogIfAny(WINDOWS_LOG_REGISTERS);
+  Log *log = GetLog(WindowsLog::Registers);
   if ((m_context.ContextFlags & flags_required) != flags_required) {
     LLDB_LOG(log, "Thread context doesn't have {0}", reg_name);
     return false;

From 84e85e025e02211d1f24ea423cd1be8945e70009 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser@codeplay.com>
Date: Mon, 24 Jan 2022 18:11:14 +0000
Subject: [PATCH 866/946] [SelectionDAG][VP] Provide expansion for VP_MERGE

This patch adds support for expanding VP_MERGE through a sequence of
vector operations producing a full-length mask setting up the elements
past EVL/pivot to be false, combining this with the original mask, and
culminating in a full-length vector select.

This expansion should work for any data type, though the only use for
RVV is for boolean vectors, which themselves rely on an expansion for
the VSELECT.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D118058
---
 .../SelectionDAG/LegalizeVectorOps.cpp        | 46 ++++++++++++++
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  1 +
 .../RISCV/rvv/fixed-vectors-vpmerge.ll        | 28 +++++++++
 llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 60 ++++++++++++++-----
 4 files changed, 119 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index cbb28863850fe..abf6a3ac69164 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -134,6 +134,7 @@ class VectorLegalizer {
   /// supported by the target.
   SDValue ExpandVSELECT(SDNode *Node);
   SDValue ExpandVP_SELECT(SDNode *Node);
+  SDValue ExpandVP_MERGE(SDNode *Node);
   SDValue ExpandSELECT(SDNode *Node);
   std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
   SDValue ExpandStore(SDNode *N);
@@ -877,6 +878,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
   case ISD::UREM:
     ExpandREM(Node, Results);
     return;
+  case ISD::VP_MERGE:
+    Results.push_back(ExpandVP_MERGE(Node));
+    return;
   }
 
   Results.push_back(DAG.UnrollVectorOp(Node));
@@ -1238,6 +1242,48 @@ SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
   return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Mask, EVL);
 }
 
+SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
+  // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
+  // indices less than the EVL/pivot are true. Combine that with the original
+  // mask for a full-length mask. Use a full-length VSELECT to select between
+  // the true and false values.
+  SDLoc DL(Node);
+
+  SDValue Mask = Node->getOperand(0);
+  SDValue Op1 = Node->getOperand(1);
+  SDValue Op2 = Node->getOperand(2);
+  SDValue EVL = Node->getOperand(3);
+
+  EVT MaskVT = Mask.getValueType();
+  bool IsFixedLen = MaskVT.isFixedLengthVector();
+
+  EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(),
+                                  MaskVT.getVectorElementCount());
+
+  // If we can't construct the EVL mask efficiently, it's better to unroll.
+  if ((IsFixedLen &&
+       !TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, EVLVecVT)) ||
+      (!IsFixedLen &&
+       (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) ||
+        !TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR, EVLVecVT))))
+    return DAG.UnrollVectorOp(Node);
+
+  // If using a SETCC would result in a different type than the mask type,
+  // unroll.
+  if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+                             EVLVecVT) != MaskVT)
+    return DAG.UnrollVectorOp(Node);
+
+  SDValue StepVec = DAG.getStepVector(DL, EVLVecVT);
+  SDValue SplatEVL = IsFixedLen ? DAG.getSplatBuildVector(EVLVecVT, DL, EVL)
+                                : DAG.getSplatVector(EVLVecVT, DL, EVL);
+  SDValue EVLMask =
+      DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT);
+
+  SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask);
+  return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2);
+}
+
 void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
                                        SmallVectorImpl<SDValue> &Results) {
   // Attempt to expand using TargetLowering.
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 205f71a6fe478..5cc3aa35d4d24 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -572,6 +572,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::SELECT, VT, Custom);
       setOperationAction(ISD::SELECT_CC, VT, Expand);
       setOperationAction(ISD::VSELECT, VT, Expand);
+      setOperationAction(ISD::VP_MERGE, VT, Expand);
       setOperationAction(ISD::VP_SELECT, VT, Expand);
 
       setOperationAction(ISD::VP_AND, VT, Custom);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index 8ac3184f02c45..f99a90df1fb24 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -4,6 +4,34 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \
 ; RUN:   -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
+declare <4 x i1> @llvm.vp.merge.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32)
+
+define <4 x i1> @vpmerge_vv_v4i1(<4 x i1> %va, <4 x i1> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_vv_v4i1:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, mu
+; RV32-NEXT:    vid.v v10
+; RV32-NEXT:    vmsltu.vx v10, v10, a0
+; RV32-NEXT:    vmand.mm v9, v9, v10
+; RV32-NEXT:    vmandn.mm v8, v8, v9
+; RV32-NEXT:    vmand.mm v9, v0, v9
+; RV32-NEXT:    vmor.mm v0, v9, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_vv_v4i1:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, mu
+; RV64-NEXT:    vid.v v10
+; RV64-NEXT:    vmsltu.vx v12, v10, a0
+; RV64-NEXT:    vmand.mm v9, v9, v12
+; RV64-NEXT:    vmandn.mm v8, v8, v9
+; RV64-NEXT:    vmand.mm v9, v0, v9
+; RV64-NEXT:    vmor.mm v0, v9, v8
+; RV64-NEXT:    ret
+  %v = call <4 x i1> @llvm.vp.merge.v4i1(<4 x i1> %m, <4 x i1> %va, <4 x i1> %vb, i32 %evl)
+  ret <4 x i1> %v
+}
+
 declare <2 x i8> @llvm.vp.merge.v2i8(<2 x i1>, <2 x i8>, <2 x i8>, i32)
 
 define <2 x i8> @vpmerge_vv_v2i8(<2 x i8> %va, <2 x i8> %vb, <2 x i1> %m, i32 zeroext %evl) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
index 6a4ac666b1101..3ea8029aaffeb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll
@@ -4,6 +4,34 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+v,+m -target-abi=lp64d \
 ; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
+declare <vscale x 1 x i1> @llvm.vp.merge.nxv1i1(<vscale x 1 x i1>, <vscale x 1 x i1>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i1> @vpmerge_nxv1i1(<vscale x 1 x i1> %va, <vscale x 1 x i1> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vpmerge_nxv1i1:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetvli a1, zero, e32, mf2, ta, mu
+; RV32-NEXT:    vid.v v10
+; RV32-NEXT:    vmsltu.vx v10, v10, a0
+; RV32-NEXT:    vmand.mm v9, v9, v10
+; RV32-NEXT:    vmandn.mm v8, v8, v9
+; RV32-NEXT:    vmand.mm v9, v0, v9
+; RV32-NEXT:    vmor.mm v0, v9, v8
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vpmerge_nxv1i1:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetvli a1, zero, e64, m1, ta, mu
+; RV64-NEXT:    vid.v v10
+; RV64-NEXT:    vmsltu.vx v10, v10, a0
+; RV64-NEXT:    vmand.mm v9, v9, v10
+; RV64-NEXT:    vmandn.mm v8, v8, v9
+; RV64-NEXT:    vmand.mm v9, v0, v9
+; RV64-NEXT:    vmor.mm v0, v9, v8
+; RV64-NEXT:    ret
+  %v = call <vscale x 1 x i1> @llvm.vp.merge.nxv1i1(<vscale x 1 x i1> %m, <vscale x 1 x i1> %va, <vscale x 1 x i1> %vb, i32 %evl)
+  ret <vscale x 1 x i1> %v
+}
+
 declare <vscale x 1 x i8> @llvm.vp.merge.nxv1i8(<vscale x 1 x i1>, <vscale x 1 x i8>, <vscale x 1 x i8>, i32)
 
 define <vscale x 1 x i8> @vpmerge_vv_nxv1i8(<vscale x 1 x i8> %va, <vscale x 1 x i8> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
@@ -332,10 +360,10 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
 ; RV32-NEXT:    addi a2, sp, 16
 ; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV32-NEXT:    li a2, 0
-; RV32-NEXT:    bltu a3, a4, .LBB24_2
+; RV32-NEXT:    bltu a3, a4, .LBB25_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a2, a4
-; RV32-NEXT:  .LBB24_2:
+; RV32-NEXT:  .LBB25_2:
 ; RV32-NEXT:    vl8r.v v8, (a0)
 ; RV32-NEXT:    vsetvli zero, a2, e8, m8, tu, mu
 ; RV32-NEXT:    vmv1r.v v0, v2
@@ -350,10 +378,10 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8re8.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmerge.vvm v16, v16, v24, v0
-; RV32-NEXT:    bltu a3, a1, .LBB24_4
+; RV32-NEXT:    bltu a3, a1, .LBB25_4
 ; RV32-NEXT:  # %bb.3:
 ; RV32-NEXT:    mv a3, a1
-; RV32-NEXT:  .LBB24_4:
+; RV32-NEXT:  .LBB25_4:
 ; RV32-NEXT:    vsetvli zero, a3, e8, m8, tu, mu
 ; RV32-NEXT:    vmv1r.v v0, v1
 ; RV32-NEXT:    addi a0, sp, 16
@@ -384,18 +412,18 @@ define <vscale x 128 x i8> @vpmerge_vv_nxv128i8(<vscale x 128 x i8> %va, <vscale
 ; RV64-NEXT:    addi a2, sp, 16
 ; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    li a2, 0
-; RV64-NEXT:    bltu a3, a4, .LBB24_2
+; RV64-NEXT:    bltu a3, a4, .LBB25_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a2, a4
-; RV64-NEXT:  .LBB24_2:
+; RV64-NEXT:  .LBB25_2:
 ; RV64-NEXT:    vl8r.v v8, (a0)
 ; RV64-NEXT:    vsetvli zero, a2, e8, m8, tu, mu
 ; RV64-NEXT:    vmv1r.v v0, v2
 ; RV64-NEXT:    vmerge.vvm v24, v24, v16, v0
-; RV64-NEXT:    bltu a3, a1, .LBB24_4
+; RV64-NEXT:    bltu a3, a1, .LBB25_4
 ; RV64-NEXT:  # %bb.3:
 ; RV64-NEXT:    mv a3, a1
-; RV64-NEXT:  .LBB24_4:
+; RV64-NEXT:  .LBB25_4:
 ; RV64-NEXT:    vsetvli zero, a3, e8, m8, tu, mu
 ; RV64-NEXT:    vmv1r.v v0, v1
 ; RV64-NEXT:    addi a0, sp, 16
@@ -417,20 +445,20 @@ define <vscale x 128 x i8> @vpmerge_vx_nxv128i8(i8 %a, <vscale x 128 x i8> %vb,
 ; CHECK-NEXT:    csrr a3, vlenb
 ; CHECK-NEXT:    slli a3, a3, 3
 ; CHECK-NEXT:    mv a4, a2
-; CHECK-NEXT:    bltu a2, a3, .LBB25_2
+; CHECK-NEXT:    bltu a2, a3, .LBB26_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a4, a3
-; CHECK-NEXT:  .LBB25_2:
+; CHECK-NEXT:  .LBB26_2:
 ; CHECK-NEXT:    li a5, 0
 ; CHECK-NEXT:    vsetvli a6, zero, e8, m8, ta, mu
 ; CHECK-NEXT:    vlm.v v24, (a1)
 ; CHECK-NEXT:    vsetvli zero, a4, e8, m8, tu, mu
 ; CHECK-NEXT:    sub a1, a2, a3
 ; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
-; CHECK-NEXT:    bltu a2, a1, .LBB25_4
+; CHECK-NEXT:    bltu a2, a1, .LBB26_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a5, a1
-; CHECK-NEXT:  .LBB25_4:
+; CHECK-NEXT:  .LBB26_4:
 ; CHECK-NEXT:    vsetvli zero, a5, e8, m8, tu, mu
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vmerge.vxm v16, v16, a0, v0
@@ -447,20 +475,20 @@ define <vscale x 128 x i8> @vpmerge_vi_nxv128i8(<vscale x 128 x i8> %vb, <vscale
 ; CHECK-NEXT:    csrr a2, vlenb
 ; CHECK-NEXT:    slli a2, a2, 3
 ; CHECK-NEXT:    mv a3, a1
-; CHECK-NEXT:    bltu a1, a2, .LBB26_2
+; CHECK-NEXT:    bltu a1, a2, .LBB27_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a3, a2
-; CHECK-NEXT:  .LBB26_2:
+; CHECK-NEXT:  .LBB27_2:
 ; CHECK-NEXT:    li a4, 0
 ; CHECK-NEXT:    vsetvli a5, zero, e8, m8, ta, mu
 ; CHECK-NEXT:    vlm.v v24, (a0)
 ; CHECK-NEXT:    vsetvli zero, a3, e8, m8, tu, mu
 ; CHECK-NEXT:    sub a0, a1, a2
 ; CHECK-NEXT:    vmerge.vim v8, v8, 2, v0
-; CHECK-NEXT:    bltu a1, a0, .LBB26_4
+; CHECK-NEXT:    bltu a1, a0, .LBB27_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    mv a4, a0
-; CHECK-NEXT:  .LBB26_4:
+; CHECK-NEXT:  .LBB27_4:
 ; CHECK-NEXT:    vsetvli zero, a4, e8, m8, tu, mu
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vmerge.vim v16, v16, 2, v0

From 8d992862a0281d595d08d172ab6fb48f8cd7395c Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 27 Jan 2022 10:14:44 +0100
Subject: [PATCH 867/946] [InstCombine] Remove some pointer element type
 accesses

One of these is guarded against opaque pointers, and the others
were accessing the call function type in a rather convoluted way.
---
 .../Transforms/InstCombine/InstCombineCalls.cpp   | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index bfe09da91f53b..1fb46af46bee6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -2783,9 +2783,9 @@ Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
         PointerType *NewTy = cast<PointerType>(CI->getOperand(0)->getType());
         if (!NewTy->isOpaque() && Call.isByValArgument(ix)) {
           Call.removeParamAttr(ix, Attribute::ByVal);
-          Call.addParamAttr(
-              ix, Attribute::getWithByValType(
-                      Call.getContext(), NewTy->getPointerElementType()));
+          Call.addParamAttr(ix, Attribute::getWithByValType(
+                                    Call.getContext(),
+                                    NewTy->getNonOpaquePointerElementType()));
         }
         Changed = true;
       }
@@ -3052,17 +3052,14 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
     // If the callee is just a declaration, don't change the varargsness of the
     // call.  We don't want to introduce a varargs call where one doesn't
     // already exist.
-    PointerType *APTy = cast<PointerType>(Call.getCalledOperand()->getType());
-    if (FT->isVarArg()!=cast<FunctionType>(APTy->getPointerElementType())->isVarArg())
+    if (FT->isVarArg() != Call.getFunctionType()->isVarArg())
       return false;
 
     // If both the callee and the cast type are varargs, we still have to make
     // sure the number of fixed parameters are the same or we have the same
     // ABI issues as if we introduce a varargs call.
-    if (FT->isVarArg() &&
-        cast<FunctionType>(APTy->getPointerElementType())->isVarArg() &&
-        FT->getNumParams() !=
-        cast<FunctionType>(APTy->getPointerElementType())->getNumParams())
+    if (FT->isVarArg() && Call.getFunctionType()->isVarArg() &&
+        FT->getNumParams() != Call.getFunctionType()->getNumParams())
       return false;
   }
 

From 4cd8877a3453d5ee4289348db45d89b8e249cf72 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 27 Jan 2022 10:16:56 +0100
Subject: [PATCH 868/946] [lldb/test] Fix gnu-style-compression.yaml

In the rush to get the bot green, I did not realize I was building the
file with -gsplit-dwarf, and therefore the yaml ended up referring to a
file I did not check it.

This rebuilds the file without split dwarf.
---
 .../DWARF/x86/gnu-style-compression.yaml      | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/gnu-style-compression.yaml b/lldb/test/Shell/SymbolFile/DWARF/x86/gnu-style-compression.yaml
index d037cf474232e..069753835474b 100644
--- a/lldb/test/Shell/SymbolFile/DWARF/x86/gnu-style-compression.yaml
+++ b/lldb/test/Shell/SymbolFile/DWARF/x86/gnu-style-compression.yaml
@@ -31,26 +31,26 @@ Sections:
     AddressAlign:    0x8
     Offset:          0x1000
     Content:         2F000000000000002A00000000000000
-  - Name:            .debug_info
+  - Name:            .zdebug_info
     Type:            SHT_PROGBITS
     AddressAlign:    0x1
-    Content:         20000000040000000000080100000000000000001A000000C088361D3BC5B7B700000000
+    Content:         5A4C49420000000000000077789C2B666060606100010E4610A9C820CA00012640CCE407248C81989197939941C0012CC16C01D2130024589998C280540848011F2733074C4124488E35116C28171B48493E480937505B04488A830100368605A7
   - Name:            .debug_abbrev
     Type:            SHT_PROGBITS
     AddressAlign:    0x1
-    Content:         01110010171B0EB44219B0420EB14207B34217000000
+    Content:         011101250E1305030E10171B0E0000023400030E49133F193A0B3B0B0218000003260049130000042400030E3E0B0B0B0000051301360B030E0B0B3A0B3B0B0000060D00030E49133A0B3B0B380B000000
   - Name:            .debug_line
     Type:            SHT_PROGBITS
     AddressAlign:    0x1
-    Content:         7100000004006B000000010101FB0E0D0001010101000000010000012F686F6D652F706176656C6F2F6C6C00006D6F6E6F2F6C6C64622F746573742F5368656C6C2F53796D626F6C46696C652F44574152462F7838362F676E752D7374796C652D636F6D7072657373696F6E2E6370700001000000
-  - Name:            .zdebug_str
-    Type:            SHT_PROGBITS
-    Flags:           [ SHF_MERGE, SHF_STRINGS ]
-    AddressAlign:    0x1
-    EntSize:         0x1
-    Content:         5A4C49420000000000000084789C75C9310E82401005502E04BF33B62686D6440A6B979DC826B3FC89332B727BB980ED7B585805F6FC8812AA48AD68062D3AFC1B04A97E504E08F1C0B4C8F1D35E13752C2AB83E2EF711DFF309B716D602AFB5F51EBB4A3FB3DA5BDC0BD761361BF2C6EE0781C8302C
-  - Name:            .debug_addr
-    Type:            SHT_PROGBITS
-    AddressAlign:    0x1
-    Content:         '00104000000000000810400000000000'
+    Content:         3C000000040036000000010101FB0E0D0001010101000000010000012F746D700000676E752D7374796C652D636F6D7072657373696F6E2E6370700001000000
+DWARF:
+  debug_str:
+    - clang version 13.0.0
+    - '/tmp/gnu-style-compression.cpp'
+    - '/tmp/my_working_directory'
+    - s
+    - short
+    - a
+    - long int
+    - A
 ...

From 73cd8e29ad1d95692e480b85ccbefe5212468e9c Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 27 Jan 2022 10:24:15 +0100
Subject: [PATCH 869/946] [InstCombine] Skip PromoteCastOfAllocation()
 transform under opaque pointers

I think this can't be hit anyway (because a ptr-to-ptr bitcast would
get folded earlier), but in the interest of being explicit skip
this transform for opaque pointers entirely.
---
 llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 10a7c1b406a57..f11ba8772f3ca 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -85,13 +85,16 @@ static Value *decomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
 Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,
                                                        AllocaInst &AI) {
   PointerType *PTy = cast<PointerType>(CI.getType());
+  // Opaque pointers don't have an element type we could replace with.
+  if (PTy->isOpaque())
+    return nullptr;
 
   IRBuilderBase::InsertPointGuard Guard(Builder);
   Builder.SetInsertPoint(&AI);
 
   // Get the type really allocated and the type casted to.
   Type *AllocElTy = AI.getAllocatedType();
-  Type *CastElTy = PTy->getPointerElementType();
+  Type *CastElTy = PTy->getNonOpaquePointerElementType();
   if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
 
   // This optimisation does not work for cases where the cast type

From fc72f3a168b6575aae66b8a6519341469267f319 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 27 Jan 2022 10:30:21 +0100
Subject: [PATCH 870/946] [BTFDebug] Avoid pointer element type access

Use the global value type instead.
---
 llvm/lib/Target/BPF/BTFDebug.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index 608be5160da7e..d536aed1d2114 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -1366,8 +1366,7 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
 
     // Calculate symbol size
     const DataLayout &DL = Global.getParent()->getDataLayout();
-    uint32_t Size =
-        DL.getTypeAllocSize(Global.getType()->getPointerElementType());
+    uint32_t Size = DL.getTypeAllocSize(Global.getValueType());
 
     DataSecEntries[std::string(SecName)]->addDataSecEntry(VarId,
         Asm->getSymbol(&Global), Size);

From fed2f690a9943dfbc656b75f8a7da1d446b05f72 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser@codeplay.com>
Date: Thu, 27 Jan 2022 09:26:20 +0000
Subject: [PATCH 871/946] [RISCV] Fix test case expected output

I didn't correctly update this before landing D118058.
---
 .../RISCV/rvv/fixed-vectors-vpmerge.ll        | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
index f99a90df1fb24..f1468b1ad9ea2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll
@@ -1046,10 +1046,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
 ; RV32-NEXT:    addi a1, sp, 16
 ; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    li a1, 0
-; RV32-NEXT:    bltu a2, a3, .LBB75_2
+; RV32-NEXT:    bltu a2, a3, .LBB76_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a1, a3
-; RV32-NEXT:  .LBB75_2:
+; RV32-NEXT:  .LBB76_2:
 ; RV32-NEXT:    vle64.v v8, (a0)
 ; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
 ; RV32-NEXT:    vslidedown.vi v0, v1, 2
@@ -1066,10 +1066,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmerge.vvm v16, v16, v24, v0
-; RV32-NEXT:    bltu a2, a0, .LBB75_4
+; RV32-NEXT:    bltu a2, a0, .LBB76_4
 ; RV32-NEXT:  # %bb.3:
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB75_4:
+; RV32-NEXT:  .LBB76_4:
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, tu, mu
 ; RV32-NEXT:    vmv1r.v v0, v1
 ; RV32-NEXT:    addi a0, sp, 16
@@ -1102,10 +1102,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    li a1, 0
-; RV64-NEXT:    bltu a2, a3, .LBB75_2
+; RV64-NEXT:    bltu a2, a3, .LBB76_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    mv a1, a3
-; RV64-NEXT:  .LBB75_2:
+; RV64-NEXT:  .LBB76_2:
 ; RV64-NEXT:    vle64.v v8, (a0)
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
 ; RV64-NEXT:    vslidedown.vi v0, v1, 2
@@ -1114,10 +1114,10 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3
 ; RV64-NEXT:    addi a1, sp, 16
 ; RV64-NEXT:    vl8re8.v v16, (a1) # Unknown-size Folded Reload
 ; RV64-NEXT:    vmerge.vvm v24, v24, v16, v0
-; RV64-NEXT:    bltu a2, a0, .LBB75_4
+; RV64-NEXT:    bltu a2, a0, .LBB76_4
 ; RV64-NEXT:  # %bb.3:
 ; RV64-NEXT:    li a2, 16
-; RV64-NEXT:  .LBB75_4:
+; RV64-NEXT:  .LBB76_4:
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, tu, mu
 ; RV64-NEXT:    vmv1r.v v0, v1
 ; RV64-NEXT:    csrr a0, vlenb
@@ -1142,19 +1142,19 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1>
 ; CHECK-NEXT:    addi a2, a0, -16
 ; CHECK-NEXT:    vmv1r.v v24, v0
 ; CHECK-NEXT:    li a1, 0
-; CHECK-NEXT:    bltu a0, a2, .LBB76_2
+; CHECK-NEXT:    bltu a0, a2, .LBB77_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a1, a2
-; CHECK-NEXT:  .LBB76_2:
+; CHECK-NEXT:  .LBB77_2:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, ta, mu
 ; CHECK-NEXT:    vslidedown.vi v0, v24, 2
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m8, tu, mu
 ; CHECK-NEXT:    li a1, 16
 ; CHECK-NEXT:    vfmerge.vfm v16, v16, fa0, v0
-; CHECK-NEXT:    bltu a0, a1, .LBB76_4
+; CHECK-NEXT:    bltu a0, a1, .LBB77_4
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    li a0, 16
-; CHECK-NEXT:  .LBB76_4:
+; CHECK-NEXT:  .LBB77_4:
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m8, tu, mu
 ; CHECK-NEXT:    vmv1r.v v0, v24
 ; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0

From b7179d92799c7a375b7f8fa1a2a91c5fda713423 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 27 Jan 2022 10:47:29 +0100
Subject: [PATCH 872/946] [InstCombine] Extract GEP of bitcast folds into
 separate function (NFC)

---
 .../InstCombine/InstCombineInternal.h         |   1 +
 .../InstCombine/InstructionCombining.cpp      | 202 +++++++++---------
 2 files changed, 107 insertions(+), 96 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index a7d1ff202e5e7..e8c82786a3c36 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -149,6 +149,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *visitPHINode(PHINode &PN);
   Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
   Instruction *visitGEPOfGEP(GetElementPtrInst &GEP, GEPOperator *Src);
+  Instruction *visitGEPOfBitcast(BitCastInst *BCI, GetElementPtrInst &GEP);
   Instruction *visitAllocaInst(AllocaInst &AI);
   Instruction *visitAllocSite(Instruction &FI);
   Instruction *visitFree(CallInst &FI);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5de1d6fa4567a..5c1192537b964 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2079,6 +2079,109 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
   return nullptr;
 }
 
+// Note that we may have also stripped an address space cast in between.
+Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI,
+                                                 GetElementPtrInst &GEP) {
+  Type *GEPEltType = GEP.getSourceElementType();
+  Value *SrcOp = BCI->getOperand(0);
+  PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
+  Type *SrcEltType = SrcType->getPointerElementType();
+
+  // GEP directly using the source operand if this GEP is accessing an element
+  // of a bitcasted pointer to vector or array of the same dimensions:
+  // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z
+  // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
+  auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy,
+                                        const DataLayout &DL) {
+    auto *VecVTy = cast<FixedVectorType>(VecTy);
+    return ArrTy->getArrayElementType() == VecVTy->getElementType() &&
+           ArrTy->getArrayNumElements() == VecVTy->getNumElements() &&
+           DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
+  };
+  if (GEP.getNumOperands() == 3 &&
+      ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) &&
+        areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) ||
+       (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() &&
+        areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) {
+
+    // Create a new GEP here, as using `setOperand()` followed by
+    // `setSourceElementType()` won't actually update the type of the
+    // existing GEP Value. Causing issues if this Value is accessed when
+    // constructing an AddrSpaceCastInst
+    SmallVector<Value *, 8> Indices(GEP.indices());
+    Value *NGEP = GEP.isInBounds()
+                      ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, Indices)
+                      : Builder.CreateGEP(SrcEltType, SrcOp, Indices);
+    NGEP->takeName(&GEP);
+
+    // Preserve GEP address space to satisfy users
+    if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
+      return new AddrSpaceCastInst(NGEP, GEP.getType());
+
+    return replaceInstUsesWith(GEP, NGEP);
+  }
+
+  // See if we can simplify:
+  //   X = bitcast A* to B*
+  //   Y = gep X, <...constant indices...>
+  // into a gep of the original struct. This is important for SROA and alias
+  // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
+  unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEP.getType());
+  APInt Offset(OffsetBits, 0);
+
+  // If the bitcast argument is an allocation, The bitcast is for convertion
+  // to actual type of allocation. Removing such bitcasts, results in having
+  // GEPs with i8* base and pure byte offsets. That means GEP is not aware of
+  // struct or array hierarchy.
+  // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have
+  // a better chance to succeed.
+  if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) &&
+      !isAllocationFn(SrcOp, &TLI)) {
+    // If this GEP instruction doesn't move the pointer, just replace the GEP
+    // with a bitcast of the real input to the dest type.
+    if (!Offset) {
+      // If the bitcast is of an allocation, and the allocation will be
+      // converted to match the type of the cast, don't touch this.
+      if (isa<AllocaInst>(SrcOp)) {
+        // See if the bitcast simplifies, if so, don't nuke this GEP yet.
+        if (Instruction *I = visitBitCast(*BCI)) {
+          if (I != BCI) {
+            I->takeName(BCI);
+            BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
+            replaceInstUsesWith(*BCI, I);
+          }
+          return &GEP;
+        }
+      }
+
+      if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace())
+        return new AddrSpaceCastInst(SrcOp, GEP.getType());
+      return new BitCastInst(SrcOp, GEP.getType());
+    }
+
+    // Otherwise, if the offset is non-zero, we need to find out if there is a
+    // field at Offset in 'A's type.  If so, we can pull the cast through the
+    // GEP.
+    SmallVector<Value*, 8> NewIndices;
+    if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) {
+      Value *NGEP =
+          GEP.isInBounds()
+              ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices)
+              : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices);
+
+      if (NGEP->getType() == GEP.getType())
+        return replaceInstUsesWith(GEP, NGEP);
+      NGEP->takeName(&GEP);
+
+      if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
+        return new AddrSpaceCastInst(NGEP, GEP.getType());
+      return new BitCastInst(NGEP, GEP.getType());
+    }
+  }
+
+  return nullptr;
+}
+
 Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   Value *PtrOp = GEP.getOperand(0);
   SmallVector<Value *, 8> Indices(GEP.indices());
@@ -2495,102 +2598,9 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       ASCStrippedPtrOp = BC;
   }
 
-  if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp)) {
-    Value *SrcOp = BCI->getOperand(0);
-    PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
-    Type *SrcEltType = SrcType->getPointerElementType();
-
-    // GEP directly using the source operand if this GEP is accessing an element
-    // of a bitcasted pointer to vector or array of the same dimensions:
-    // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z
-    // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z
-    auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy,
-                                          const DataLayout &DL) {
-      auto *VecVTy = cast<FixedVectorType>(VecTy);
-      return ArrTy->getArrayElementType() == VecVTy->getElementType() &&
-             ArrTy->getArrayNumElements() == VecVTy->getNumElements() &&
-             DL.getTypeAllocSize(ArrTy) == DL.getTypeAllocSize(VecTy);
-    };
-    if (GEP.getNumOperands() == 3 &&
-        ((GEPEltType->isArrayTy() && isa<FixedVectorType>(SrcEltType) &&
-          areMatchingArrayAndVecTypes(GEPEltType, SrcEltType, DL)) ||
-         (isa<FixedVectorType>(GEPEltType) && SrcEltType->isArrayTy() &&
-          areMatchingArrayAndVecTypes(SrcEltType, GEPEltType, DL)))) {
-
-      // Create a new GEP here, as using `setOperand()` followed by
-      // `setSourceElementType()` won't actually update the type of the
-      // existing GEP Value. Causing issues if this Value is accessed when
-      // constructing an AddrSpaceCastInst
-      Value *NGEP = GEP.isInBounds()
-                        ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, Indices)
-                        : Builder.CreateGEP(SrcEltType, SrcOp, Indices);
-      NGEP->takeName(&GEP);
-
-      // Preserve GEP address space to satisfy users
-      if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
-        return new AddrSpaceCastInst(NGEP, GEPType);
-
-      return replaceInstUsesWith(GEP, NGEP);
-    }
-
-    // See if we can simplify:
-    //   X = bitcast A* to B*
-    //   Y = gep X, <...constant indices...>
-    // into a gep of the original struct. This is important for SROA and alias
-    // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged.
-    unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEPType);
-    APInt Offset(OffsetBits, 0);
-
-    // If the bitcast argument is an allocation, The bitcast is for convertion
-    // to actual type of allocation. Removing such bitcasts, results in having
-    // GEPs with i8* base and pure byte offsets. That means GEP is not aware of
-    // struct or array hierarchy.
-    // By avoiding such GEPs, phi translation and MemoryDependencyAnalysis have
-    // a better chance to succeed.
-    if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset) &&
-        !isAllocationFn(SrcOp, &TLI)) {
-      // If this GEP instruction doesn't move the pointer, just replace the GEP
-      // with a bitcast of the real input to the dest type.
-      if (!Offset) {
-        // If the bitcast is of an allocation, and the allocation will be
-        // converted to match the type of the cast, don't touch this.
-        if (isa<AllocaInst>(SrcOp)) {
-          // See if the bitcast simplifies, if so, don't nuke this GEP yet.
-          if (Instruction *I = visitBitCast(*BCI)) {
-            if (I != BCI) {
-              I->takeName(BCI);
-              BCI->getParent()->getInstList().insert(BCI->getIterator(), I);
-              replaceInstUsesWith(*BCI, I);
-            }
-            return &GEP;
-          }
-        }
-
-        if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace())
-          return new AddrSpaceCastInst(SrcOp, GEPType);
-        return new BitCastInst(SrcOp, GEPType);
-      }
-
-      // Otherwise, if the offset is non-zero, we need to find out if there is a
-      // field at Offset in 'A's type.  If so, we can pull the cast through the
-      // GEP.
-      SmallVector<Value*, 8> NewIndices;
-      if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) {
-        Value *NGEP =
-            GEP.isInBounds()
-                ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices)
-                : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices);
-
-        if (NGEP->getType() == GEPType)
-          return replaceInstUsesWith(GEP, NGEP);
-        NGEP->takeName(&GEP);
-
-        if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace())
-          return new AddrSpaceCastInst(NGEP, GEPType);
-        return new BitCastInst(NGEP, GEPType);
-      }
-    }
-  }
+  if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp))
+    if (Instruction *I = visitGEPOfBitcast(BCI, GEP))
+      return I;
 
   if (!GEP.isInBounds()) {
     unsigned IdxWidth =

From 2c736f666b7a563c6f696957fdfa09edd971ac72 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 27 Jan 2022 10:51:45 +0100
Subject: [PATCH 873/946] [InstCombine] Skip GEP of bitcast transform with
 opaque pointers

This transform is fundamentally incompatible with opaque pointers.
Usually we would not hit it anyway because the bitcast is folded
away earlier, but due to worklist order it might survive until
here, so make sure we bail out explicitly.
---
 llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5c1192537b964..bdc72b736b851 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2082,10 +2082,15 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
 // Note that we may have also stripped an address space cast in between.
 Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI,
                                                  GetElementPtrInst &GEP) {
+  // With opaque pointers, there is no pointer element type we can use to
+  // adjust the GEP type.
+  PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
+  if (SrcType->isOpaque())
+    return nullptr;
+
   Type *GEPEltType = GEP.getSourceElementType();
+  Type *SrcEltType = SrcType->getNonOpaquePointerElementType();
   Value *SrcOp = BCI->getOperand(0);
-  PointerType *SrcType = cast<PointerType>(BCI->getSrcTy());
-  Type *SrcEltType = SrcType->getPointerElementType();
 
   // GEP directly using the source operand if this GEP is accessing an element
   // of a bitcasted pointer to vector or array of the same dimensions:

From 773467c8126318f8d735e36ed0b6eafc952bb74c Mon Sep 17 00:00:00 2001
From: Sergej Jaskiewicz <sergej.jaskiewicz@jetbrains.com>
Date: Thu, 20 Jan 2022 16:36:41 +0300
Subject: [PATCH 874/946] [LLDB] Add formatters for PointerIntPair,
 PointerUnion

Also, add summaries for `SmallVector` and `ArrayRef`,
and fix the `StringRef` summary provider so it doesn't
ignore the `Length` field.

Differential Revision: https://reviews.llvm.org/D117779
---
 llvm/utils/lldbDataFormatters.py | 132 +++++++++++++++++++++++++++++--
 1 file changed, 126 insertions(+), 6 deletions(-)

diff --git a/llvm/utils/lldbDataFormatters.py b/llvm/utils/lldbDataFormatters.py
index 4fc420ff997d4..6326c4f65ec75 100644
--- a/llvm/utils/lldbDataFormatters.py
+++ b/llvm/utils/lldbDataFormatters.py
@@ -5,18 +5,28 @@
 """
 
 import lldb
+import json
 
 def __lldb_init_module(debugger, internal_dict):
     debugger.HandleCommand('type category define -e llvm -l c++')
     debugger.HandleCommand('type synthetic add -w llvm '
                            '-l lldbDataFormatters.SmallVectorSynthProvider '
                            '-x "^llvm::SmallVectorImpl<.+>$"')
+    debugger.HandleCommand('type summary add -w llvm '
+                           '-s "size=${svar%#}" '
+                           '-x "^llvm::SmallVectorImpl<.+>$"')
     debugger.HandleCommand('type synthetic add -w llvm '
                            '-l lldbDataFormatters.SmallVectorSynthProvider '
                            '-x "^llvm::SmallVector<.+,.+>$"')
+    debugger.HandleCommand('type summary add -w llvm '
+                           '-s "size=${svar%#}" '
+                           '-x "^llvm::SmallVector<.+,.+>$"')
     debugger.HandleCommand('type synthetic add -w llvm '
                            '-l lldbDataFormatters.ArrayRefSynthProvider '
                            '-x "^llvm::ArrayRef<.+>$"')
+    debugger.HandleCommand('type summary add -w llvm '
+                           '-s "size=${svar%#}" '
+                           '-x "^llvm::ArrayRef<.+>$"')
     debugger.HandleCommand('type synthetic add -w llvm '
                            '-l lldbDataFormatters.OptionalSynthProvider '
                            '-x "^llvm::Optional<.+>$"')
@@ -32,6 +42,13 @@ def __lldb_init_module(debugger, internal_dict):
     debugger.HandleCommand('type summary add -w llvm '
                            '-F lldbDataFormatters.ConstStringSummaryProvider '
                            '-x "^lldb_private::ConstString$"')
+    debugger.HandleCommand('type synthetic add -w llvm '
+                           '-l lldbDataFormatters.PointerIntPairSynthProvider '
+                           '-x "^llvm::PointerIntPair<.+>$"')
+    debugger.HandleCommand('type synthetic add -w llvm '
+                           '-l lldbDataFormatters.PointerUnionSynthProvider '
+                           '-x "^llvm::PointerUnion<.+>$"')
+
 
 # Pretty printer for llvm::SmallVector/llvm::SmallVectorImpl
 class SmallVectorSynthProvider:
@@ -151,19 +168,122 @@ def SmallStringSummaryProvider(valobj, internal_dict):
     res += "\""
     return res
 
+
 def StringRefSummaryProvider(valobj, internal_dict):
     if valobj.GetNumChildren() == 2:
         # StringRef's are also used to point at binary blobs in memory,
         # so filter out suspiciously long strings.
-        max_length = 256
-        length = valobj.GetChildAtIndex(1).GetValueAsUnsigned(max_length)
+        max_length = 1024
+        actual_length = valobj.GetChildAtIndex(1).GetValueAsUnsigned()
+        truncate = actual_length > max_length
+        length = min(max_length, actual_length)
         if length == 0:
-            return "NULL"
-        if length < max_length:
-            return valobj.GetChildAtIndex(0).GetSummary()
-    return ""
+            return '""'
+
+        data = valobj.GetChildAtIndex(0).GetPointeeData(item_count=length)
+        error = lldb.SBError()
+        string = data.ReadRawData(error, 0, data.GetByteSize()).decode()
+        if error.Fail():
+            return "<error: %s>" % error.description
+
+        # json.dumps conveniently escapes the string for us.
+        string = json.dumps(string)
+        if truncate:
+            string += "..."
+        return string
+    return None
+
 
 def ConstStringSummaryProvider(valobj, internal_dict):
     if valobj.GetNumChildren() == 1:
         return valobj.GetChildAtIndex(0).GetSummary()
     return ""
+
+
+def get_expression_path(val):
+    stream = lldb.SBStream()
+    if not val.GetExpressionPath(stream):
+        return None
+    return stream.GetData()
+
+
+class PointerIntPairSynthProvider:
+    def __init__(self, valobj, internal_dict):
+        self.valobj = valobj
+        self.update()
+
+    def num_children(self):
+        return 2
+
+    def get_child_index(self, name):
+        if name == 'Pointer':
+            return 0
+        if name == 'Int':
+            return 1
+        return None
+
+    def get_child_at_index(self, index):
+        expr_path = get_expression_path(self.valobj)
+        if index == 0:
+            return self.valobj.CreateValueFromExpression('Pointer', f'({self.pointer_ty.name}){expr_path}.getPointer()')
+        if index == 1:
+            return self.valobj.CreateValueFromExpression('Int', f'({self.int_ty.name}){expr_path}.getInt()')
+        return None
+
+    def update(self):
+        self.pointer_ty = self.valobj.GetType().GetTemplateArgumentType(0)
+        self.int_ty = self.valobj.GetType().GetTemplateArgumentType(2)
+
+
+def parse_template_parameters(typename):
+    """
+    LLDB doesn't support template parameter packs, so let's parse them manually.
+    """
+    result = []
+    start = typename.find('<')
+    end = typename.rfind('>')
+    if start < 1 or end < 2 or end - start < 2:
+        return result
+
+    nesting_level = 0
+    current_parameter_start = start + 1
+
+    for i in range(start + 1, end + 1):
+        c = typename[i]
+        if c == '<':
+            nesting_level += 1
+        elif c == '>':
+            nesting_level -= 1
+        elif c == ',' and nesting_level == 0:
+            result.append(typename[current_parameter_start:i].strip())
+            current_parameter_start = i + 1
+
+    result.append(typename[current_parameter_start:i].strip())
+
+    return result
+
+
+class PointerUnionSynthProvider:
+    def __init__(self, valobj, internal_dict):
+        self.valobj = valobj
+        self.update()
+
+    def num_children(self):
+        return 1
+
+    def get_child_index(self, name):
+        if name == 'Ptr':
+            return 0
+        return None
+
+    def get_child_at_index(self, index):
+        if index != 0:
+            return None
+        ptr_type_name = self.template_args[self.active_type_tag]
+        return self.valobj.CreateValueFromExpression('Ptr', f'({ptr_type_name}){self.val_expr_path}.getPointer()')
+
+    def update(self):
+        self.pointer_int_pair = self.valobj.GetChildMemberWithName('Val')
+        self.val_expr_path = get_expression_path(self.valobj.GetChildMemberWithName('Val'))
+        self.active_type_tag = self.valobj.CreateValueFromExpression('', f'(int){self.val_expr_path}.getInt()').GetValueAsSigned()
+        self.template_args = parse_template_parameters(self.valobj.GetType().name)

From bb5c1b06916b3452a2717867428077a3c119275f Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 27 Jan 2022 09:55:38 +0000
Subject: [PATCH 875/946] [LoopVersioning] Use IRBuilder for OR simplification.

---
 llvm/lib/Transforms/Utils/LoopVersioning.cpp | 21 ++++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index 771b7d25b0f20..f0bf625fa18ed 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Transforms/Utils/LoopVersioning.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InstSimplifyFolder.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
@@ -70,17 +71,14 @@ void LoopVersioning::versionLoop(
                    "scev.check");
   SCEVRuntimeCheck =
       Exp.expandCodeForPredicate(&Preds, RuntimeCheckBB->getTerminator());
-  auto *CI = dyn_cast<ConstantInt>(SCEVRuntimeCheck);
-
-  // Discard the SCEV runtime check if it is always true.
-  if (CI && CI->isZero())
-    SCEVRuntimeCheck = nullptr;
 
+  IRBuilder<InstSimplifyFolder> Builder(
+      RuntimeCheckBB->getContext(),
+      InstSimplifyFolder(RuntimeCheckBB->getModule()->getDataLayout()));
   if (MemRuntimeCheck && SCEVRuntimeCheck) {
-    RuntimeCheck = BinaryOperator::Create(Instruction::Or, MemRuntimeCheck,
-                                          SCEVRuntimeCheck, "lver.safe");
-    if (auto *I = dyn_cast<Instruction>(RuntimeCheck))
-      I->insertBefore(RuntimeCheckBB->getTerminator());
+    Builder.SetInsertPoint(RuntimeCheckBB->getTerminator());
+    RuntimeCheck =
+        Builder.CreateOr(MemRuntimeCheck, SCEVRuntimeCheck, "lver.safe");
   } else
     RuntimeCheck = MemRuntimeCheck ? MemRuntimeCheck : SCEVRuntimeCheck;
 
@@ -109,8 +107,9 @@ void LoopVersioning::versionLoop(
 
   // Insert the conditional branch based on the result of the memchecks.
   Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
-  BranchInst::Create(NonVersionedLoop->getLoopPreheader(),
-                     VersionedLoop->getLoopPreheader(), RuntimeCheck, OrigTerm);
+  Builder.SetInsertPoint(OrigTerm);
+  Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
+                       VersionedLoop->getLoopPreheader());
   OrigTerm->eraseFromParent();
 
   // The loops merge in the original exit block.  This is now dominated by the

From b88ca619d33bc74e1776d879e43c6fc812ac4ff5 Mon Sep 17 00:00:00 2001
From: Dawid Jurczak <dawid_jurek@vp.pl>
Date: Tue, 25 Jan 2022 16:53:05 +0100
Subject: [PATCH 876/946] [NFC][CodeGen] Use llvm::DenseMap for DeferredDecls

CodeGenModule::DeferredDecls std::map::operator[] seem to be hot especially while code generating huge compilation units.
In such cases using DenseMap instead gives observable compile time improvement. Patch was tested on Linux build with default config acting as benchmark.
Build was performed on isolated CPU cores in silent x86-64 Linux environment following: https://llvm.org/docs/Benchmarking.html#linux rules.
Compile time statistics diff produced by perf and time before and after change are following:
instructions -0.15%, cycles -0.7%, max-rss +0.65%.
Using StringMap instead DenseMap doesn't bring any visible gains.

Differential Revision: https://reviews.llvm.org/D118169
---
 clang/lib/CodeGen/CodeGenModule.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 9ae9d624b925d..e803022508a4c 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -335,7 +335,7 @@ class CodeGenModule : public CodeGenTypeCache {
   /// for emission and therefore should only be output if they are actually
   /// used. If a decl is in this, then it is known to have not been referenced
   /// yet.
-  std::map<StringRef, GlobalDecl> DeferredDecls;
+  llvm::DenseMap<StringRef, GlobalDecl> DeferredDecls;
 
   /// This is a list of deferred decls which we have seen that *are* actually
   /// referenced. These get code generated when the module is done.

From 1043107ce5e2dee38f6a9bf459549a75f78a83b2 Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Thu, 27 Jan 2022 19:04:44 +0900
Subject: [PATCH 877/946] [mlir][bufferize] Insert memref.cast ops during
 finalizing pass

The pass can currently not handle to_memref(to_tensor(x)) folding where a cast is necessary. This is required with the new unified bufferization. There is already a canonicalization pattern that handles such foldings and it should be used during this pass.

Differential Revision: https://reviews.llvm.org/D117988
---
 .../Dialect/Bufferization/IR/Bufferization.h  | 12 ++++++
 .../Bufferization/IR/BufferizationOps.cpp     |  8 +++-
 .../Bufferization/Transforms/Bufferize.cpp    |  1 +
 .../Transforms/finalizing-bufferize.mlir      | 40 +++++++++++++++++--
 4 files changed, 57 insertions(+), 4 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
index 2cbfc901f239b..2a1edf8d9e099 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
@@ -20,6 +20,18 @@
 
 #include "mlir/Dialect/Bufferization/IR/BufferizationOpsDialect.h.inc"
 
+namespace mlir {
+class RewritePatternSet;
+class MLIRContext;
+
+namespace bufferization {
+/// Populate patterns for folding to_memref and to_tensor ops.
+/// Note: to_memref(to_tensor(x)) without type changes are handled by a folder.
+void populateBufferizationOpFoldingPatterns(RewritePatternSet &patterns,
+                                            MLIRContext *context);
+} // namespace bufferization
+} // namespace mlir
+
 //===----------------------------------------------------------------------===//
 // Bufferization Dialect Operations
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index f1ec7bbdead24..28a8e5d1ea982 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -240,7 +240,8 @@ static LogicalResult foldToMemrefToTensorPair(RewriterBase &rewriter,
       if (resultType.getShape()[i] != ShapedType::kDynamicSize)
         continue;
       auto index = rewriter.createOrFold<arith::ConstantIndexOp>(loc, i);
-      Value size = rewriter.create<tensor::DimOp>(loc, memrefToTensor, index);
+      Value size =
+          rewriter.create<memref::DimOp>(loc, memrefToTensor.memref(), index);
       dynamicOperands.push_back(size);
     }
     // TODO: Use alloc/memcpy callback from BufferizationOptions if called via
@@ -309,6 +310,11 @@ void ToMemrefOp::getCanonicalizationPatterns(RewritePatternSet &results,
       context);
 }
 
+void bufferization::populateBufferizationOpFoldingPatterns(
+    RewritePatternSet &patterns, MLIRContext *context) {
+  patterns.add<TensorLoadToMemref>(context);
+}
+
 LogicalResult ToMemrefOp::bufferize(RewriterBase &rewriter,
                                     const BufferizationState &state) {
   // Fold to_memref(to_tensor(x)) to x. Insert a cast if necessary.
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index eb7f0dedca02a..f202a7a5bbd9c 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -92,6 +92,7 @@ void mlir::bufferization::populateEliminateBufferizeMaterializationsPatterns(
     BufferizeTypeConverter &typeConverter, RewritePatternSet &patterns) {
   patterns.add<BufferizeToTensorOp, BufferizeToMemrefOp>(typeConverter,
                                                          patterns.getContext());
+  populateBufferizationOpFoldingPatterns(patterns, patterns.getContext());
 }
 
 namespace {
diff --git a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir
index fac685ae7e725..66e1ccfe5c763 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir
@@ -1,11 +1,11 @@
 // RUN: mlir-opt %s -finalizing-bufferize -split-input-file -verify-diagnostics | FileCheck %s
 
-// CHECK-LABEL:   func @eliminate_materializations(
-// CHECK-SAME:                                     %[[ARG:.*]]: memref<f32>) -> memref<f32> {
-// CHECK:           return %[[ARG]] : memref<f32>
+// CHECK-LABEL: func @eliminate_materializations(
+//  CHECK-SAME:     %[[ARG:.*]]: memref<f32>) -> memref<f32> {
 func @eliminate_materializations(%arg0: memref<f32>) -> memref<f32> {
   %0 = bufferization.to_tensor %arg0 : memref<f32>
   %1 = bufferization.to_memref %0 : memref<f32>
+  // CHECK: return %[[ARG]] : memref<f32>
   return %1 : memref<f32>
 }
 
@@ -26,3 +26,37 @@ func @unable_to_convert_lone_tensor_load(%arg0: memref<f32>) {
   "test.sink"(%0) : (tensor<f32>) -> ()
   return
 }
+
+// -----
+
+// CHECK: #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+#map1 = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+
+// CHECK-LABEL: func @insert_memref_cast(
+//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32>
+func @insert_memref_cast(%arg0: memref<?xf32>) -> memref<?xf32, #map1> {
+  %0 = bufferization.to_tensor %arg0 : memref<?xf32>
+  %1 = bufferization.to_memref %0 : memref<?xf32, #map1>
+  // CHECK: %[[r:.*]] = memref.cast %[[arg0]] : memref<?xf32> to memref<?xf32, #[[$MAP1]]>
+  // CHECK: return %[[r]]
+  return %1 : memref<?xf32, #map1>
+}
+
+// -----
+
+// CHECK: #[[$MAP2:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+#map2 = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
+
+// CHECK-LABEL: func @insert_buffer_copy(
+//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32, #[[$MAP2]]>
+func @insert_buffer_copy(%arg0: memref<?xf32, #map2>) -> memref<?xf32> {
+  // CHECK: %[[c0:.*]] = arith.constant 0 : index
+  // CHECK: %[[dim0:.*]] = memref.dim %[[arg0]], %[[c0]]
+  // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim0]]) : memref<?xf32>
+  // CHECK: memref.copy %[[arg0]], %[[alloc]]
+  %0 = bufferization.to_tensor %arg0 : memref<?xf32, #map2>
+  %1 = bufferization.to_memref %0 : memref<?xf32>
+
+  // CHECK: return %[[alloc]]
+  return %1 : memref<?xf32>
+}

From 95857a705886c1e8c3ca518b9b856a0caa4f4987 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Wed, 26 Jan 2022 16:50:08 +0000
Subject: [PATCH 878/946] [AMDGPU] SILoadStoreOptimizer: Remove redundant check
 for volatile

SILoadStoreOptimizer::collectMergeableInsts already ends the current
block if it sees a volatile (or ordered) memory access, so there is no
need to check for them again when scanning the instructions between two
pairing candidates in a block.

Differential Revision: https://reviews.llvm.org/D118266
---
 llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 48f6c7ef1bcad..d1133c12bc18d 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -971,10 +971,6 @@ bool SILoadStoreOptimizer::checkAndPrepareMerge(
       continue;
     }
 
-    // Don't merge volatiles.
-    if (MBBI->hasOrderedMemoryRef())
-      return false;
-
     int Swizzled =
         AMDGPU::getNamedOperandIdx(MBBI->getOpcode(), AMDGPU::OpName::swz);
     if (Swizzled != -1 && MBBI->getOperand(Swizzled).getImm())

From 792a4095c5514c89330571e764e8981b484bd6ca Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@google.com>
Date: Thu, 27 Jan 2022 02:27:18 -0800
Subject: [PATCH 879/946] [CMake][Fuchsia] Only build iossim runtimes for arm64

x86_64 isn't supported with the recent Xcode SDK.
---
 clang/cmake/caches/Fuchsia-stage2.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake
index bc60b0df6d4af..8c593e4c6310c 100644
--- a/clang/cmake/caches/Fuchsia-stage2.cmake
+++ b/clang/cmake/caches/Fuchsia-stage2.cmake
@@ -71,7 +71,7 @@ if(APPLE)
   set(LIBCXX_ENABLE_STATIC_ABI_LIBRARY ON CACHE BOOL "")
   set(LIBCXX_ABI_VERSION 2 CACHE STRING "")
   set(DARWIN_ios_ARCHS arm64 CACHE STRING "")
-  set(DARWIN_iossim_ARCHS arm64;x86_64 CACHE STRING "")
+  set(DARWIN_iossim_ARCHS arm64 CACHE STRING "")
   set(DARWIN_osx_ARCHS arm64;x86_64 CACHE STRING "")
 endif()
 

From d58757e522a44051e7fa1eb1631937fb2d7b0810 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Thu, 27 Jan 2022 09:17:29 +0000
Subject: [PATCH 880/946] [AArch64][SVE] Implement PFALSE with explicit
 AArch64ISD node.

The ISel patterns for PFALSE helps recognise the instructions as being
free of side-effects, which helps MachineCSE remove redundant
PFALSE instructions.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D118054
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  3 ++-
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  1 +
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  2 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 11 ++++++++
 .../AArch64/sve-pfalse-machine-cse.mir        | 26 +++++++++++++++++++
 5 files changed, 41 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index cce714ea918e8..b7602fd829a3c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2200,6 +2200,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::INSR)
     MAKE_CASE(AArch64ISD::PTEST)
     MAKE_CASE(AArch64ISD::PTRUE)
+    MAKE_CASE(AArch64ISD::PFALSE)
     MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
     MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
     MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
@@ -9984,7 +9985,7 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
     // lowering code.
     if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
       if (ConstVal->isZero())
-        return SDValue(DAG.getMachineNode(AArch64::PFALSE, dl, VT), 0);
+        return DAG.getNode(AArch64ISD::PFALSE, dl, VT);
       if (ConstVal->isOne())
         return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
     }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index df19a4729bb49..90b947dd814b2 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -323,6 +323,7 @@ enum NodeType : unsigned {
   INSR,
   PTEST,
   PTRUE,
+  PFALSE,
 
   BITREVERSE_MERGE_PASSTHRU,
   BSWAP_MERGE_PASSTHRU,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 63cd8f476272a..3e7c46e6e43a6 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -730,7 +730,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
   defm BRKBS_PPzP : sve_int_break_z<0b110, "brkbs", null_frag>;
 
   def PTEST_PP : sve_int_ptest<0b010000, "ptest">;
-  def PFALSE   : sve_int_pfalse<0b000000, "pfalse">;
+  defm PFALSE  : sve_int_pfalse<0b000000, "pfalse">;
   defm PFIRST  : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
   defm PNEXT   : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
 
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 359f5afa3b54c..eb965261e1832 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -334,6 +334,8 @@ multiclass sve_int_ptrue<bits<3> opc, string asm, SDPatternOperator op> {
 
 def SDT_AArch64PTrue : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
 def AArch64ptrue : SDNode<"AArch64ISD::PTRUE", SDT_AArch64PTrue>;
+def SDT_AArch64PFalse : SDTypeProfile<1, 0, [SDTCisVec<0>, SDTCVecEltisVT<0, i1>]>;
+def AArch64pfalse : SDNode<"AArch64ISD::PFALSE", SDT_AArch64PFalse>;
 
 let Predicates = [HasSVEorStreamingSVE] in {
   defm PTRUE  : sve_int_ptrue<0b000, "ptrue", AArch64ptrue>;
@@ -609,6 +611,15 @@ class sve_int_pfalse<bits<6> opc, string asm>
   let isReMaterializable = 1;
 }
 
+multiclass sve_int_pfalse<bits<6> opc, string asm> {
+  def NAME : sve_int_pfalse<opc, asm>;
+
+  def : Pat<(nxv16i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
+  def : Pat<(nxv8i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
+  def : Pat<(nxv4i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
+  def : Pat<(nxv2i1 (AArch64pfalse)), (!cast<Instruction>(NAME))>;
+}
+
 class sve_int_ptest<bits<6> opc, string asm>
 : I<(outs), (ins PPRAny:$Pg, PPR8:$Pn),
   asm, "\t$Pg, $Pn",
diff --git a/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir b/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir
new file mode 100644
index 0000000000000..b76fe7821b6c6
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-pfalse-machine-cse.mir
@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -run-pass=machine-cse -mtriple=aarch64 -mattr=+sve -o - %s | FileCheck %s
+---
+name:            pfalse
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $p0
+
+    ; CHECK-LABEL: name: pfalse
+    ; CHECK: liveins: $p0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:ppr = COPY $p0
+    ; CHECK-NEXT: [[PFALSE:%[0-9]+]]:ppr = PFALSE
+    ; CHECK-NEXT: [[UZP1_PPP_B:%[0-9]+]]:ppr = UZP1_PPP_B [[COPY]], [[PFALSE]]
+    ; CHECK-NEXT: [[UZP1_PPP_B1:%[0-9]+]]:ppr = UZP1_PPP_B killed [[UZP1_PPP_B]], [[PFALSE]]
+    ; CHECK-NEXT: $p0 = COPY [[UZP1_PPP_B1]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $p0
+    %0:ppr = COPY $p0
+    %2:ppr = PFALSE
+    %3:ppr = UZP1_PPP_B %0, %2
+    %4:ppr = PFALSE
+    %5:ppr = UZP1_PPP_B killed %3, %4
+    $p0 = COPY %5
+    RET_ReallyLR implicit $p0
+...

From daf18108ecc959ce20c25af33618c934b470f350 Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Thu, 27 Jan 2022 19:18:59 +0900
Subject: [PATCH 881/946] [mlir][tensor] Replace tensor-bufferize with
 BufferizableOpInterface impl

This commit switches the `tensor-bufferize` pass over to BufferizableOpInterface-based bufferization.

Differential Revision: https://reviews.llvm.org/D118246
---
 .../IR/BufferizableOpInterface.h              |  20 +-
 .../Bufferization/Transforms/Bufferize.h      |  15 ++
 .../mlir/Dialect/Tensor/Transforms/Passes.h   |  10 -
 .../mlir/Dialect/Tensor/Transforms/Passes.td  |   5 -
 .../IR/BufferizableOpInterface.cpp            |  19 ++
 .../Bufferization/Transforms/Bufferize.cpp    |  52 ++++-
 .../Dialect/Tensor/Transforms/Bufferize.cpp   | 211 ++----------------
 .../comprehensive-module-bufferize.mlir       |  52 -----
 mlir/test/Dialect/Tensor/bufferize.mlir       | 119 +++++++---
 9 files changed, 207 insertions(+), 296 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
index 5107710413ead..534f664483be2 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h
@@ -255,7 +255,7 @@ class BufferizationState {
   const BufferizationOptions &getOptions() const { return options; }
 
 protected:
-  BufferizationState(const BufferizationOptions &options);
+  explicit BufferizationState(const BufferizationOptions &options);
 
   // BufferizationState should be passed as a reference.
   BufferizationState(const BufferizationState &) = delete;
@@ -270,6 +270,24 @@ class BufferizationState {
   const BufferizationOptions &options;
 };
 
+/// This a "no analysis, always copy" BufferizationState. In the absence of an
+/// analysis, a buffer must be copied each time it is written to. Therefore, all
+/// OpOperands that bufferize to a memory write must bufferize out-of-place.
+class AlwaysCopyBufferizationState : public BufferizationState {
+public:
+  explicit AlwaysCopyBufferizationState(const BufferizationOptions &options);
+
+  AlwaysCopyBufferizationState(const AlwaysCopyBufferizationState &) = delete;
+
+  virtual ~AlwaysCopyBufferizationState() = default;
+
+  /// Return `true` if the given OpResult has been decided to bufferize inplace.
+  bool isInPlace(OpOperand &opOperand) const override;
+
+  /// Return true if `v1` and `v2` bufferize to equivalent buffers.
+  bool areEquivalentBufferizedValues(Value v1, Value v2) const override;
+};
+
 /// Replace an op with replacement values. The op is deleted. Tensor OpResults
 /// must be replaced with memref values.
 void replaceOpWithBufferizedValues(RewriterBase &rewriter, Operation *op,
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
index b587955d65b13..a56287995aa96 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h
@@ -69,6 +69,21 @@ void populateEliminateBufferizeMaterializationsPatterns(
 // TODO: Extract `options` from `state` and pass as separate argument.
 LogicalResult bufferizeOp(Operation *op, const BufferizationState &state);
 
+/// Bufferize `op` and its nested ops that implement `BufferizableOpInterface`.
+/// Buffers are duplicated and copied before any tensor use that bufferizes to
+/// a memory write.
+///
+/// Note: This function bufferizes ops without utilizing analysis results. It
+/// can be used to implement partial bufferization passes.
+LogicalResult bufferizeOp(Operation *op, const BufferizationOptions &options);
+
+/// Populate the pattern set with a pattern that bufferizes ops that implement
+/// `BufferizableOpInterface`.
+void populateBufferizationPattern(const BufferizationState &state,
+                                  RewritePatternSet &patterns);
+
+std::unique_ptr<BufferizationOptions> getPartialBufferizationOptions();
+
 } // namespace bufferization
 } // namespace mlir
 
diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h
index f90d02bda22d3..a346577e2f569 100644
--- a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h
@@ -12,16 +12,6 @@
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
-namespace bufferization {
-class BufferizeTypeConverter;
-} // namespace bufferization
-
-class RewritePatternSet;
-
-void populateTensorBufferizePatterns(
-    bufferization::BufferizeTypeConverter &typeConverter,
-    RewritePatternSet &patterns);
-
 /// Creates an instance of `tensor` dialect bufferization pass.
 std::unique_ptr<Pass> createTensorBufferizePass();
 
diff --git a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td
index 57c754c84c321..77743134325f5 100644
--- a/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Tensor/Transforms/Passes.td
@@ -14,11 +14,6 @@ include "mlir/Pass/PassBase.td"
 def TensorBufferize : Pass<"tensor-bufferize", "FuncOp"> {
   let summary = "Bufferize the `tensor` dialect";
   let constructor = "mlir::createTensorBufferizePass()";
-  let dependentDialects = [
-    "bufferization::BufferizationDialect",
-    "memref::MemRefDialect",
-    "scf::SCFDialect"
-  ];
 }
 
 #endif // MLIR_DIALECT_TENSOR_TRANSFORMS_PASSES
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
index 276950d4ef19e..3af1c37594f96 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp
@@ -318,6 +318,25 @@ void bufferization::replaceOpWithBufferizedValues(RewriterBase &rewriter,
   rewriter.eraseOp(op);
 }
 
+AlwaysCopyBufferizationState::AlwaysCopyBufferizationState(
+    const BufferizationOptions &options)
+    : BufferizationState(options) {}
+
+/// Return `true` if the given OpResult has been decided to bufferize inplace.
+bool AlwaysCopyBufferizationState::isInPlace(OpOperand &opOperand) const {
+  // OpOperands that bufferize to a memory write are out-of-place, i.e., an
+  // alloc and copy is inserted.
+  return !bufferizesToMemoryWrite(opOperand);
+}
+
+/// Return true if `v1` and `v2` bufferize to equivalent buffers.
+bool AlwaysCopyBufferizationState::areEquivalentBufferizedValues(
+    Value v1, Value v2) const {
+  // There is no analysis, so we do not know if the values are equivalent. The
+  // conservative answer is "false".
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // Bufferization-specific scoped alloc/dealloc insertion support.
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index f202a7a5bbd9c..d31c3532509e7 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -207,9 +207,59 @@ LogicalResult bufferization::bufferizeOp(Operation *op,
                                          const BufferizationState &state) {
   // Bufferize the op and its nested ops.
   RewritePatternSet patterns(op->getContext());
-  patterns.add<BufferizationPattern>(op->getContext(), state);
+  populateBufferizationPattern(state, patterns);
   if (failed(applyPatternsAndFoldGreedily(op, std::move(patterns))))
     return failure();
 
   return checkBufferizationResult(op, state.getOptions());
 }
+
+namespace {
+/// This a "no analysis, always copy" BufferizationState. In the absence of an
+/// analysis, a buffer must be copied each time it is written to. Therefore, all
+/// OpOperands that bufferize to a memory write must bufferize out-of-place.
+class AlwaysCopyBufferizationState : public BufferizationState {
+public:
+  AlwaysCopyBufferizationState(const BufferizationOptions &options)
+      : BufferizationState(options) {}
+
+  AlwaysCopyBufferizationState(const AlwaysCopyBufferizationState &) = delete;
+
+  virtual ~AlwaysCopyBufferizationState() = default;
+
+  /// Return `true` if the given OpResult has been decided to bufferize inplace.
+  bool isInPlace(OpOperand &opOperand) const override {
+    // OpOperands that bufferize to a memory write are out-of-place, i.e., an
+    // alloc and copy is inserted.
+    return !bufferizesToMemoryWrite(opOperand);
+  }
+
+  /// Return true if `v1` and `v2` bufferize to equivalent buffers.
+  bool areEquivalentBufferizedValues(Value v1, Value v2) const override {
+    // There is no analysis, so we do not know if the values are equivalent. The
+    // conservative answer is "false".
+    return false;
+  }
+};
+} // namespace
+
+LogicalResult bufferization::bufferizeOp(Operation *op,
+                                         const BufferizationOptions &options) {
+  AlwaysCopyBufferizationState state(options);
+  return bufferizeOp(op, state);
+}
+
+void bufferization::populateBufferizationPattern(
+    const BufferizationState &state, RewritePatternSet &patterns) {
+  patterns.add<BufferizationPattern>(patterns.getContext(), state);
+}
+
+std::unique_ptr<BufferizationOptions>
+bufferization::getPartialBufferizationOptions() {
+  auto options = std::make_unique<BufferizationOptions>();
+  options->allowReturnMemref = true;
+  options->allowUnknownOps = true;
+  options->createDeallocs = false;
+  options->fullyDynamicLayoutMaps = false;
+  return options;
+}
diff --git a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp
index 0bd01dabdfef5..1d435c64d9287 100644
--- a/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp
@@ -13,223 +13,40 @@
 #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
 #include "PassDetail.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Tensor/Transforms/Passes.h"
 #include "mlir/IR/ImplicitLocOpBuilder.h"
 #include "mlir/Transforms/DialectConversion.h"
 
 using namespace mlir;
+using namespace bufferization;
 
 namespace {
-struct BufferizeCastOp : public OpConversionPattern<tensor::CastOp> {
-  using OpConversionPattern::OpConversionPattern;
-  LogicalResult
-  matchAndRewrite(tensor::CastOp op, OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto resultType = getTypeConverter()->convertType(op.getType());
-    rewriter.replaceOpWithNewOp<memref::CastOp>(op, resultType,
-                                                adaptor.getOperands()[0]);
-    return success();
-  }
-};
-
-struct BufferizeDimOp : public OpConversionPattern<tensor::DimOp> {
-  using OpConversionPattern::OpConversionPattern;
-  LogicalResult
-  matchAndRewrite(tensor::DimOp op, OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-    rewriter.replaceOpWithNewOp<memref::DimOp>(op, adaptor.source(),
-                                               adaptor.index());
-    return success();
-  }
-};
-
-struct BufferizeExtractOp : public OpConversionPattern<tensor::ExtractOp> {
-  using OpConversionPattern::OpConversionPattern;
-  LogicalResult
-  matchAndRewrite(tensor::ExtractOp op, OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-    rewriter.replaceOpWithNewOp<memref::LoadOp>(op, adaptor.tensor(),
-                                                adaptor.indices());
-    return success();
-  }
-};
-
-struct BufferizeFromElementsOp
-    : public OpConversionPattern<tensor::FromElementsOp> {
-public:
-  using OpConversionPattern::OpConversionPattern;
-  LogicalResult
-  matchAndRewrite(tensor::FromElementsOp op, OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-    Location loc = op.getLoc();
-    auto tensorType = op.getType().cast<RankedTensorType>();
-    auto shape = tensorType.getShape();
-
-    // Allocate a buffer for the result.
-    auto resultType =
-        MemRefType::get(tensorType.getShape(), tensorType.getElementType());
-    Value buffer = rewriter.create<memref::AllocOp>(loc, resultType);
-
-    // Case: tensor<0xelem_type>.
-    if (op.elements().empty()) {
-      rewriter.replaceOp(op, {buffer});
-      return success();
-    }
-
-    // Case: tensor<elem_type>.
-    if (shape.empty()) {
-      rewriter.create<memref::StoreOp>(loc, op.elements().front(), buffer);
-      rewriter.replaceOp(op, {buffer});
-      return success();
-    }
-
-    // Create constants for the range of possible indices [0, max{shape_i}).
-    auto maxDim = *std::max_element(shape.begin(), shape.end());
-    SmallVector<Value, 2> constants;
-    constants.reserve(maxDim);
-    for (int i = 0; i < maxDim; ++i)
-      constants.push_back(rewriter.create<arith::ConstantIndexOp>(loc, i));
-
-    // Traverse all `elements` and create `memref.store` ops.
-    ImplicitLocOpBuilder b(loc, rewriter);
-    auto elementIt = adaptor.elements().begin();
-    SmallVector<Value, 2> indices(tensorType.getRank(), constants[0]);
-    createStores(/*dim=*/0, buffer, shape, constants, elementIt, indices, b);
-
-    rewriter.replaceOp(op, {buffer});
-    return success();
-  }
-
-private:
-  // Implements backtracking to traverse indices of the output buffer while
-  // iterating over op.elements().
-  void createStores(int dim, Value buffer, ArrayRef<int64_t> shape,
-                    ArrayRef<Value> constants, ValueRange::iterator &elementIt,
-                    SmallVectorImpl<Value> &indices,
-                    ImplicitLocOpBuilder b) const {
-    if (dim == static_cast<int>(shape.size()) - 1) {
-      for (int i = 0; i < shape.back(); ++i) {
-        indices.back() = constants[i];
-        b.create<memref::StoreOp>(*elementIt, buffer, indices);
-        ++elementIt;
-      }
-      return;
-    }
-    for (int i = 0; i < shape[dim]; ++i) {
-      indices[dim] = constants[i];
-      createStores(dim + 1, buffer, shape, constants, elementIt, indices, b);
-    }
-  }
-};
-
-struct BufferizeGenerateOp : public OpConversionPattern<tensor::GenerateOp> {
-  using OpConversionPattern::OpConversionPattern;
-
-  LogicalResult
-  matchAndRewrite(tensor::GenerateOp op, OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const final {
-    // Allocate memory.
-    Location loc = op.getLoc();
-    RankedTensorType tensorType = op.getType().cast<RankedTensorType>();
-    MemRefType memrefType =
-        MemRefType::get(tensorType.getShape(), tensorType.getElementType());
-    Value result = rewriter.create<memref::AllocOp>(loc, memrefType,
-                                                    adaptor.dynamicExtents());
-
-    // Collect loop bounds.
-    int64_t rank = tensorType.getRank();
-    Value zero = rewriter.create<arith::ConstantIndexOp>(loc, 0);
-    Value one = rewriter.create<arith::ConstantIndexOp>(loc, 1);
-    SmallVector<Value, 4> lowerBounds(rank, zero);
-    SmallVector<Value, 4> steps(rank, one);
-    SmallVector<Value, 4> upperBounds;
-    int nextDynamicIndex = 0;
-    for (int i = 0; i < rank; i++) {
-      Value upperBound = tensorType.isDynamicDim(i)
-                             ? adaptor.dynamicExtents()[nextDynamicIndex++]
-                             : rewriter.create<arith::ConstantIndexOp>(
-                                   loc, memrefType.getDimSize(i));
-      upperBounds.push_back(upperBound);
-    }
-
-    // Generate tensor elements with a parallel loop that stores into
-    // each element of the resulting memref.
-    //
-    // This is a bit tricky. We cannot simply clone the ops because when an op
-    // is cloned, it must be legalized. However, we want to allow arbitrary ops
-    // in the body that we don't necessarily have legalization patterns for as
-    // part of this dialect conversion invocation.
-    //
-    // To accomplish this, we use mergeBlockBefore to "move" this op's body
-    // into the scf.parallel's body.
-    auto parallel =
-        rewriter.create<scf::ParallelOp>(loc, lowerBounds, upperBounds, steps);
-    Block *parallelBody = parallel.getBody();
-    rewriter.mergeBlockBefore(op.getBody(), parallelBody->getTerminator(),
-                              parallelBody->getArguments());
-    // Replace the inlined yield op with a store op. The scf.parallel's builder
-    // already populated an scf.yield at the end, so we don't need to worry
-    // about creating that.
-    Operation *elementYield = parallelBody->getTerminator()->getPrevNode();
-    rewriter.setInsertionPointAfter(elementYield);
-    rewriter.replaceOpWithNewOp<memref::StoreOp>(
-        elementYield, elementYield->getOperands()[0], result,
-        parallelBody->getArguments());
-
-    rewriter.replaceOp(op, {result});
-    return success();
-  }
-};
-
-struct BufferizeRankOp : public OpConversionPattern<tensor::RankOp> {
-  using OpConversionPattern::OpConversionPattern;
-  LogicalResult
-  matchAndRewrite(tensor::RankOp op, OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-    rewriter.replaceOpWithNewOp<memref::RankOp>(op, op.getType(),
-                                                adaptor.tensor());
-    return success();
-  }
-};
-
 struct TensorBufferizePass : public TensorBufferizeBase<TensorBufferizePass> {
   void runOnOperation() override {
-    auto *context = &getContext();
-    bufferization::BufferizeTypeConverter typeConverter;
+    std::unique_ptr<BufferizationOptions> options =
+        getPartialBufferizationOptions();
+    options->addToDialectFilter<tensor::TensorDialect>();
 
-    ConversionTarget target(*context);
-    target.addLegalDialect<scf::SCFDialect, memref::MemRefDialect>();
-    target.addDynamicallyLegalDialect<arith::ArithmeticDialect,
-                                      StandardOpsDialect>(
-        [&](Operation *op) { return typeConverter.isLegal(op); });
-    target.addLegalOp<CallOp, ReturnOp>();
-    target.addIllegalOp<tensor::CastOp, tensor::ExtractOp,
-                        tensor::FromElementsOp, tensor::GenerateOp>();
-    bufferization::populateBufferizeMaterializationLegality(target);
-
-    RewritePatternSet patterns(context);
-    populateTensorBufferizePatterns(typeConverter, patterns);
-    if (failed(applyPartialConversion(getOperation(), target,
-                                      std::move(patterns))))
+    if (failed(bufferizeOp(getOperation(), *options)))
       signalPassFailure();
   }
-};
 
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<bufferization::BufferizationDialect, memref::MemRefDialect,
+                    tensor::TensorDialect, scf::SCFDialect,
+                    arith::ArithmeticDialect>();
+    tensor::registerBufferizableOpInterfaceExternalModels(registry);
+  }
+};
 } // namespace
 
-void mlir::populateTensorBufferizePatterns(
-    bufferization::BufferizeTypeConverter &typeConverter,
-    RewritePatternSet &patterns) {
-  patterns.add<BufferizeCastOp, BufferizeDimOp, BufferizeExtractOp,
-               BufferizeFromElementsOp, BufferizeGenerateOp, BufferizeRankOp>(
-      typeConverter, patterns.getContext());
-}
-
 std::unique_ptr<Pass> mlir::createTensorBufferizePass() {
   return std::make_unique<TensorBufferizePass>();
 }
diff --git a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
index 42734ae1e9ad5..c8dcf748df11d 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
@@ -1355,55 +1355,3 @@ func @write_after_select_read_one(
   // CHECK: return %[[f]], %[[select]]
   return %f, %w : f32, tensor<?xf32>
 }
-
-// -----
-
-// CHECK-LABEL: func @tensor_rank(
-//  CHECK-SAME:     %[[arg0:.*]]: memref<*xf32>
-func @tensor_rank(%arg0: tensor<*xf32>) -> index {
-  // CHECK: %[[r:.*]] = memref.rank %[[arg0]]
-  %0 = tensor.rank %arg0 : tensor<*xf32>
-  // CHECK: return %[[r]] : index
-  return %0 : index
-}
-
-// -----
-
-// CHECK-LABEL: func @tensor_generate_static_and_dynamic(
-//  CHECK-SAME:     %[[arg0:.*]]: index
-func @tensor_generate_static_and_dynamic(%arg0: index) -> tensor<16x?xindex> {
-  // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
-  // CHECK-DAG: %[[c16:.*]] = arith.constant 16 : index
-  // CHECK: %[[alloc:.*]] = memref.alloc(%[[arg0]]) {{.*}} : memref<16x?xindex>
-  // CHECK: scf.parallel (%[[arg1:.*]], %[[arg2:.*]]) = (%[[c0]], %[[c0]]) to (%[[c16]], %[[arg0]]) {{.*}} {
-  %result = tensor.generate %arg0 {
-  ^bb0(%i: index, %j: index):
-    %sum = arith.addi %i, %j : index
-    // CHECK: memref.store {{.*}}, %[[alloc]][%[[arg1]], %[[arg2]]]
-    // CHECK: scf.yield
-    tensor.yield %sum : index
-  } : tensor<16x?xindex>
-  // CHECK: }
-  return %result : tensor<16x?xindex>
-}
-
-// -----
-
-// CHECK-LABEL: func @tensor_from_elements_2d(
-//  CHECK-SAME:     %[[ELEM0:.*]]: index, %[[ELEM1:.*]]: index
-func @tensor_from_elements_2d(%arg0: index, %arg1: index) -> tensor<3x2xindex> {
-  // CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
-  // CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
-  // CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
-  // CHECK-DAG: %[[MEMREF:.*]] = memref.alloc() {{.*}} : memref<3x2xindex>
-  //     CHECK: store %[[ELEM0]], %[[MEMREF]][%[[C0]], %[[C0]]]
-  //     CHECK: store %[[ELEM1]], %[[MEMREF]][%[[C0]], %[[C1]]]
-  //     CHECK: store %[[ELEM0]], %[[MEMREF]][%[[C1]], %[[C0]]]
-  //     CHECK: store %[[ELEM1]], %[[MEMREF]][%[[C1]], %[[C1]]]
-  //     CHECK: store %[[ELEM0]], %[[MEMREF]][%[[C2]], %[[C0]]]
-  //     CHECK: store %[[ELEM1]], %[[MEMREF]][%[[C2]], %[[C1]]]
-  %0 = tensor.from_elements %arg0, %arg1, %arg0, %arg1, %arg0, %arg1
-         : tensor<3x2xindex>
-  //     CHECK: return %[[MEMREF]]
-  return %0 : tensor<3x2xindex>
-}
diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir
index c6dd6b9310d92..b0415ce1464ce 100644
--- a/mlir/test/Dialect/Tensor/bufferize.mlir
+++ b/mlir/test/Dialect/Tensor/bufferize.mlir
@@ -1,5 +1,7 @@
 // RUN: mlir-opt %s -tensor-bufferize | FileCheck %s
 
+// CHECK-DAG: #[[$MAP:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
+
 // CHECK-LABEL:   func @dim(
 // CHECK-SAME:              %[[TENSOR:.*]]: tensor<f32>,
 // CHECK-SAME:              %[[INDEX:.*]]: index) -> index {
@@ -66,8 +68,7 @@ func @tensor.extract(%arg0: tensor<?xf32>, %arg1: index) -> f32 {
 }
 
 // CHECK-LABEL:   func @tensor.from_elements_no_elements() -> tensor<0xindex> {
-// CHECK:           %[[MEMREF:.*]] = memref.alloc() : memref<0xindex>
-// CHECK:           %[[RET:.*]] = bufferization.to_tensor %[[MEMREF]]
+// CHECK:           %[[RET:.*]] = arith.constant dense<> : tensor<0xindex>
 // CHECK:           return %[[RET]] : tensor<0xindex>
 func @tensor.from_elements_no_elements() -> tensor<0xindex> {
   %0 = tensor.from_elements : tensor<0xindex>
@@ -76,7 +77,7 @@ func @tensor.from_elements_no_elements() -> tensor<0xindex> {
 
 // CHECK-LABEL:   func @tensor.from_elements_0d(
 // CHECK-SAME:        %[[ELEM0:.*]]: index) -> tensor<index> {
-// CHECK:           %[[MEMREF:.*]] = memref.alloc() : memref<index>
+// CHECK:           %[[MEMREF:.*]] = memref.alloc() {{.*}} : memref<index>
 // CHECK:           store %[[ELEM0]], %[[MEMREF]]
 // CHECK:           %[[RET:.*]] = bufferization.to_tensor %[[MEMREF]]
 // CHECK:           return %[[RET]] : tensor<index>
@@ -88,9 +89,9 @@ func @tensor.from_elements_0d(%arg0: index) -> tensor<index> {
 // CHECK-LABEL:   func @tensor.from_elements_1d(
 // CHECK-SAME:                               %[[ELEM0:.*]]: index,
 // CHECK-SAME:                               %[[ELEM1:.*]]: index) -> tensor<2xindex> {
-// CHECK:           %[[MEMREF:.*]] = memref.alloc() : memref<2xindex>
 // CHECK:           %[[C0:.*]] = arith.constant 0 : index
 // CHECK:           %[[C1:.*]] = arith.constant 1 : index
+// CHECK:           %[[MEMREF:.*]] = memref.alloc() {{.*}} : memref<2xindex>
 // CHECK:           store %[[ELEM0]], %[[MEMREF]][%[[C0]]]
 // CHECK:           store %[[ELEM1]], %[[MEMREF]][%[[C1]]]
 // CHECK:           %[[RET:.*]] = bufferization.to_tensor %[[MEMREF]]
@@ -103,10 +104,10 @@ func @tensor.from_elements_1d(%arg0: index, %arg1: index) -> tensor<2xindex> {
 // CHECK-LABEL: func @tensor.from_elements_2d(
 // CHECK-SAME:      %[[ELEM0:.*]]: index, %[[ELEM1:.*]]: index)
 // CHECK-SAME:      -> tensor<3x2xindex> {
-// CHECK:         %[[MEMREF:.*]] = memref.alloc() : memref<3x2xindex>
-// CHECK:         %[[C0:.*]] = arith.constant 0 : index
-// CHECK:         %[[C1:.*]] = arith.constant 1 : index
-// CHECK:         %[[C2:.*]] = arith.constant 2 : index
+// CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:     %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG:     %[[C2:.*]] = arith.constant 2 : index
+// CHECK:         %[[MEMREF:.*]] = memref.alloc() {{.*}} : memref<3x2xindex>
 // CHECK:         store %[[ELEM0]], %[[MEMREF]][%[[C0]], %[[C0]]]
 // CHECK:         store %[[ELEM1]], %[[MEMREF]][%[[C0]], %[[C1]]]
 // CHECK:         store %[[ELEM0]], %[[MEMREF]][%[[C1]], %[[C0]]]
@@ -121,9 +122,9 @@ func @tensor.from_elements_2d(%arg0: index, %arg1: index) -> tensor<3x2xindex> {
   return %0 : tensor<3x2xindex>
 }
 
-// CHECK-LABEL: func @tensor.from_elements_3d()
+// CHECK-LABEL: func @tensor.from_elements_3d(
+//  CHECK-SAME:     %[[F0:.*]]: f32
 
-// CHECK-DAG: %[[F0:.*]] = arith.constant 0.0
 // CHECK-DAG: %[[F1:.*]] = arith.constant 1.0{{0+}}e+00
 // CHECK-DAG: %[[F2:.*]] = arith.constant 2.0
 // CHECK-DAG: %[[F3:.*]] = arith.constant 3.0
@@ -136,11 +137,11 @@ func @tensor.from_elements_2d(%arg0: index, %arg1: index) -> tensor<3x2xindex> {
 // CHECK-DAG: %[[F10:.*]] = arith.constant 1.0{{0+}}e+01
 // CHECK-DAG: %[[F11:.*]] = arith.constant 1.1{{0+}}e+01
 
-// CHECK: %[[MEMREF:.*]] = memref.alloc() : memref<3x2x2xf32>
+// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index
 
-// CHECK: %[[C0:.*]] = arith.constant 0 : index
-// CHECK: %[[C1:.*]] = arith.constant 1 : index
-// CHECK: %[[C2:.*]] = arith.constant 2 : index
+// CHECK: %[[MEMREF:.*]] = memref.alloc() {{.*}} : memref<3x2x2xf32>
 
 // CHECK: store %[[F0]], %[[MEMREF]][%[[C0]], %[[C0]], %[[C0]]]
 // CHECK: store %[[F1]], %[[MEMREF]][%[[C0]], %[[C0]], %[[C1]]]
@@ -157,8 +158,7 @@ func @tensor.from_elements_2d(%arg0: index, %arg1: index) -> tensor<3x2xindex> {
 
 // CHECK: %[[RET:.*]] = bufferization.to_tensor %[[MEMREF]]
 // CHECK: return %[[RET]] : tensor<3x2x2xf32>
-func @tensor.from_elements_3d() -> tensor<3x2x2xf32> {
-  %f0 = arith.constant 0.0 : f32
+func @tensor.from_elements_3d(%f0 : f32) -> tensor<3x2x2xf32> {
   %f1 = arith.constant 1.0 : f32
   %f2 = arith.constant 2.0 : f32
   %f3 = arith.constant 3.0 : f32
@@ -179,9 +179,9 @@ func @tensor.from_elements_3d() -> tensor<3x2x2xf32> {
 // CHECK-SAME:                                       %[[ARG:.*]]: tensor<*xf32>,
 // CHECK-SAME:                                       %[[DYNAMIC_EXTENT:.*]]: index) -> tensor<?xindex> {
 // CHECK:           %[[CASTED:.*]] = bufferization.to_memref %[[ARG]] : memref<*xf32>
-// CHECK:           %[[MEMREF:.*]] = memref.alloc(%[[DYNAMIC_EXTENT]]) : memref<?xindex>
-// CHECK:           %[[C0:.*]] = arith.constant 0 : index
-// CHECK:           %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
+// CHECK:           %[[MEMREF:.*]] = memref.alloc(%[[DYNAMIC_EXTENT]]) {{.*}} : memref<?xindex>
 // CHECK:           scf.parallel (%[[I:.*]]) = (%[[C0]]) to (%[[DYNAMIC_EXTENT]]) step (%[[C1]]) {
 // CHECK:             %[[ELEM:.*]] = memref.dim %[[CASTED]], %[[I]] : memref<*xf32>
 // CHECK:             store %[[ELEM]], %[[MEMREF]][%[[I]]] : memref<?xindex>
@@ -203,11 +203,11 @@ func @tensor.generate(%arg: tensor<*xf32>, %dynamic_extent: index) -> tensor<?xi
 // extents.
 //
 // CHECK-LABEL:   func @tensor.generate_static_and_dynamic(
-// CHECK-SAME:                                                          %[[DYNAMIC_EXTENT:.*]]: index) -> tensor<16x?xindex> {
-// CHECK:           %[[MEMREF:.*]] = memref.alloc(%[[DYNAMIC_EXTENT]]) : memref<16x?xindex>
-// CHECK:           %[[C0:.*]] = arith.constant 0 : index
-// CHECK:           %[[C1:.*]] = arith.constant 1 : index
-// CHECK:           %[[C16:.*]] = arith.constant 16 : index
+// CHECK-SAME:        %[[DYNAMIC_EXTENT:.*]]: index) -> tensor<16x?xindex> {
+// CHECK-DAG:       %[[C0:.*]] = arith.constant 0 : index
+// CHECK-DAG:       %[[C1:.*]] = arith.constant 1 : index
+// CHECK-DAG:       %[[C16:.*]] = arith.constant 16 : index
+// CHECK:           %[[MEMREF:.*]] = memref.alloc(%[[DYNAMIC_EXTENT]]) {{.*}} : memref<16x?xindex>
 // CHECK:           scf.parallel (%[[I:.*]], %[[J:.*]]) = (%[[C0]], %[[C0]]) to (%[[C16]], %[[DYNAMIC_EXTENT]]) step (%[[C1]], %[[C1]]) {
 // CHECK:             %[[VAL_7:.*]] = arith.addi %[[I]], %[[J]] : index
 // CHECK:             store %[[VAL_7]], %[[MEMREF]][%[[I]], %[[J]]] : memref<16x?xindex>
@@ -225,12 +225,6 @@ func @tensor.generate_static_and_dynamic(%arg0: index) -> tensor<16x?xindex> {
   return %result : tensor<16x?xindex>
 }
 
-// The tensor.generate op needs to put its body into the
-// resulting scf.parallel. To handle unknown ops in the body, it cannot clone
-// the body because that would require the cloned ops to be legalized
-// immediately, which is usually not possible since they might be from various
-// other dialects.
-//
 // CHECK-LABEL: func @tensor.generate_unknown_ops_in_body
 func @tensor.generate_unknown_ops_in_body(%arg0: index) -> tensor<?xindex> {
   // CHECK-NOT: tensor.generate
@@ -242,3 +236,68 @@ func @tensor.generate_unknown_ops_in_body(%arg0: index) -> tensor<?xindex> {
   } : tensor<?xindex>
   return %tensor : tensor<?xindex>
 }
+
+// CHECK-LABEL: func @tensor.extract_slice(
+//  CHECK-SAME:     %[[t1:.*]]: tensor<?x?xf32>, %[[idx1:.*]]: index, %[[idx2:.*]]: index
+func @tensor.extract_slice(
+    %t1: tensor<?x?xf32>, %idx1: index, %idx2: index) -> tensor<?x10xf32> {
+  // CHECK: %[[m:.*]] = bufferization.to_memref %[[t1]] : memref<?x?xf32>
+  // CHECK: %[[r:.*]] = memref.subview %[[m]][5, %[[idx2]]] [%[[idx1]], 10] [1, 1] : memref<?x?xf32> to memref<?x10xf32, #[[$MAP]]>
+  %0 = tensor.extract_slice %t1[5, %idx2][%idx1, 10][1, 1]
+      : tensor<?x?xf32> to tensor<?x10xf32>
+  // CHECK: %[[r_tensor:.*]] = bufferization.to_tensor %[[r]]
+  // CHECK: return %[[r_tensor]]
+  return %0 : tensor<?x10xf32>
+}
+
+// CHECK-LABEL: func @tensor.extract_slice_rank_reducing(
+//  CHECK-SAME:     %[[t1:.*]]: tensor<?x10x?xf32>, %[[idx1:.*]]: index,
+//  CHECK-SAME:     %[[idx2:.*]]: index
+func @tensor.extract_slice_rank_reducing(
+    %t1: tensor<?x10x?xf32>, %idx1: index, %idx2: index) -> tensor<?x15xf32> {
+  // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref<?x10x?xf32>
+  // CHECK: %[[r:.*]] = memref.subview %[[m1]][5, %[[idx1]], 10] [%[[idx2]], 1, 15] [1, 1, 1] : memref<?x10x?xf32> to memref<?x15xf32, #[[$MAP]]>
+  %0 = tensor.extract_slice %t1[5, %idx1, 10][%idx2, 1, 15][1, 1, 1]
+      : tensor<?x10x?xf32> to tensor<?x15xf32>
+  // CHECK: %[[r_tensor:.*]] = bufferization.to_tensor %[[r]]
+  // CHECK: return %[[r_tensor]]
+  return %0 : tensor<?x15xf32>
+}
+
+// CHECK-LABEL: func @tensor.insert_slice(
+//  CHECK-SAME:     %[[t1:.*]]: tensor<?x?xf32>, %[[t2:.*]]: tensor<?x10xf32>,
+//  CHECK-SAME:     %[[idx1:.*]]: index, %[[idx2:.*]]: index
+func @tensor.insert_slice(%t1: tensor<?x?xf32>, %t2: tensor<?x10xf32>,
+                          %idx1: index, %idx2: index) -> tensor<?x?xf32> {
+  // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
+  // CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
+  // CHECK-DAG: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref<?x?xf32>
+  // CHECK-DAG: %[[m2:.*]] = bufferization.to_memref %[[t2]] : memref<?x10xf32>
+  //     CHECK: %[[dim0:.*]] = memref.dim %[[m1]], %[[c0]]
+  //     CHECK: %[[dim1:.*]] = memref.dim %[[m1]], %[[c1]]
+  //     CHECK: %[[alloc:.*]] = memref.alloc(%[[dim0]], %[[dim1]])
+  //     CHECK: memref.copy %[[m1]], %[[alloc]]
+  //     CHECK: %[[subview:.*]] = memref.subview %[[alloc]][%[[idx1]], 5] [%[[idx2]], 10] [1, 1]
+  //     CHECK: memref.copy %[[m2]], %[[subview]]
+  %0 = tensor.insert_slice %t2 into %t1[%idx1, 5][%idx2, 10][1, 1]
+      : tensor<?x10xf32> into tensor<?x?xf32>
+
+  //     CHECK: %[[r:.*]] = bufferization.to_tensor %[[alloc]]
+  //     CHECK: return %[[r]]
+  return %0 : tensor<?x?xf32>
+}
+
+// CHECK-LABEL: func @tensor.insert(
+//  CHECK-SAME:     %[[t1:.*]]: tensor<5xf32>, %[[idx1:.*]]: index,
+//  CHECK-SAME:     %[[f:.*]]: f32
+func @tensor.insert(%t1: tensor<5xf32>, %idx1: index, %f: f32) -> tensor<5xf32> {
+  // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32>
+  // CHECK: %[[m1:.*]] = bufferization.to_memref %[[t1]] : memref<5xf32>
+  // CHECK: memref.copy %[[m1]], %[[alloc]]
+  // CHECK: memref.store %[[f]], %[[alloc]][%[[idx1]]]
+  %0 = tensor.insert %f into %t1[%idx1] : tensor<5xf32>
+
+  // CHECK: %[[r:.*]] = bufferization.to_tensor %[[alloc]]
+  // CHECK: return %[[r]]
+  return %0 : tensor<5xf32>
+}

From 35fff208cad6f271410800998447dc0dd2704085 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Thu, 27 Jan 2022 10:33:12 +0000
Subject: [PATCH 882/946] [OpenCL] opencl-c.h: add missing read_write image
 guards

The get_image_num_mip_levels overloads that take a read_write image
parameter were missing the __opencl_c_read_write_images guard.
---
 clang/lib/Headers/opencl-c.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index e65e3634d010d..f7a85f6578858 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -15688,7 +15688,7 @@ half4 __purefn __ovld read_imageh(read_write image1d_array_t image, int2 coord);
 half4 __purefn __ovld read_imageh(read_write image2d_array_t image, int4 coord);
 half4 __purefn __ovld read_imageh(read_write image1d_buffer_t image, int coord);
 #endif //cl_khr_fp16
-#endif //defined(__opencl_c_read_write_images
+#endif //defined(__opencl_c_read_write_images)
 
 /**
  * Write color value to location specified by coordinate
@@ -16049,9 +16049,11 @@ int __ovld get_image_num_mip_levels(write_only image2d_t image);
 int __ovld get_image_num_mip_levels(write_only image3d_t image);
 #endif
 
+#if defined(__opencl_c_read_write_images)
 int __ovld get_image_num_mip_levels(read_write image1d_t image);
 int __ovld get_image_num_mip_levels(read_write image2d_t image);
 int __ovld get_image_num_mip_levels(read_write image3d_t image);
+#endif //defined(__opencl_c_read_write_images)
 
 int __ovld get_image_num_mip_levels(read_only image1d_array_t image);
 int __ovld get_image_num_mip_levels(read_only image2d_array_t image);
@@ -16063,10 +16065,12 @@ int __ovld get_image_num_mip_levels(write_only image2d_array_t image);
 int __ovld get_image_num_mip_levels(write_only image2d_array_depth_t image);
 int __ovld get_image_num_mip_levels(write_only image2d_depth_t image);
 
+#if defined(__opencl_c_read_write_images)
 int __ovld get_image_num_mip_levels(read_write image1d_array_t image);
 int __ovld get_image_num_mip_levels(read_write image2d_array_t image);
 int __ovld get_image_num_mip_levels(read_write image2d_array_depth_t image);
 int __ovld get_image_num_mip_levels(read_write image2d_depth_t image);
+#endif //defined(__opencl_c_read_write_images)
 
 #endif //cl_khr_mipmap_image
 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)

From 648faa3b5d116acfa6463bbf24f81014b6dfbaf5 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 27 Jan 2022 11:39:34 +0100
Subject: [PATCH 883/946] [InstCombine] Mark element type access as non-opaque
 (NFC)

Also make the function static to make it more obvious that it is
only used in the one place.
---
 .../Transforms/InstCombine/InstCombineInternal.h    |  2 --
 .../Transforms/InstCombine/InstructionCombining.cpp | 13 +++++++------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index e8c82786a3c36..7743b4c41555a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -197,8 +197,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   bool shouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const;
   bool shouldChangeType(Type *From, Type *To) const;
   Value *dyn_castNegVal(Value *V) const;
-  Type *FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
-                            SmallVectorImpl<Value *> &NewIndices);
 
   /// Classify whether a cast is worth optimizing.
   ///
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index bdc72b736b851..029be52576940 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -1369,10 +1369,11 @@ Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
 /// is a sequence of GEP indices into the pointed type that will land us at the
 /// specified offset. If so, fill them into NewIndices and return the resultant
 /// element type, otherwise return null.
-Type *
-InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
-                                      SmallVectorImpl<Value *> &NewIndices) {
-  Type *Ty = PtrTy->getPointerElementType();
+static Type *findElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
+                                 SmallVectorImpl<Value *> &NewIndices,
+                                 const DataLayout &DL) {
+  // Only used by visitGEPOfBitcast(), which is skipped for opaque pointers.
+  Type *Ty = PtrTy->getNonOpaquePointerElementType();
   if (!Ty->isSized())
     return nullptr;
 
@@ -1382,7 +1383,7 @@ InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
     return nullptr;
 
   for (const APInt &Index : Indices)
-    NewIndices.push_back(Builder.getInt(Index));
+    NewIndices.push_back(ConstantInt::get(PtrTy->getContext(), Index));
   return Ty;
 }
 
@@ -2168,7 +2169,7 @@ Instruction *InstCombinerImpl::visitGEPOfBitcast(BitCastInst *BCI,
     // field at Offset in 'A's type.  If so, we can pull the cast through the
     // GEP.
     SmallVector<Value*, 8> NewIndices;
-    if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) {
+    if (findElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices, DL)) {
       Value *NGEP =
           GEP.isInBounds()
               ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices)

From 3b259a68424b85142c8b1dc2ffdb7761920c1d8c Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Thu, 27 Jan 2022 10:48:15 +0000
Subject: [PATCH 884/946] [AMDGPU] Remove unused GFX6 check lines

---
 .../AMDGPU/GlobalISel/inst-select-store-global.s96.mir | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir
index a566455df26ee..99bd84ea234a1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.s96.mir
@@ -70,11 +70,6 @@ body: |
   bb.0:
     liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
 
-    ; GFX6-LABEL: name: store_global_s96
-    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
-    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(s96) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX6: G_STORE [[COPY1]](s96), [[COPY]](p1) :: (store 12, align 16, addrspace 1)
     ; GFX7-LABEL: name: store_global_s96
     ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
     ; GFX7-NEXT: {{  $}}
@@ -122,11 +117,6 @@ body: |
   bb.0:
     liveins:  $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
 
-    ; GFX6-LABEL: name: store_global_v6s16
-    ; GFX6: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
-    ; GFX6: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
-    ; GFX6: [[COPY1:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr2_vgpr3_vgpr4
-    ; GFX6: G_STORE [[COPY1]](<6 x s16>), [[COPY]](p1) :: (store 12, align 16, addrspace 1)
     ; GFX7-LABEL: name: store_global_v6s16
     ; GFX7: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3_vgpr4
     ; GFX7-NEXT: {{  $}}

From dbd1bbced9896d5caece9ee60a7953d2c80d939c Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Thu, 27 Jan 2022 19:37:58 +0900
Subject: [PATCH 885/946] [mlir][linalg][bufferize] Support arith.index_cast
 bufferization

This is in preparation of switching `-tensor-constant-bufferize` and `-arith-bufferize` to BufferizableOpInterface-based implementations.

Differential Revision: https://reviews.llvm.org/D118324
---
 .../ArithInterfaceImpl.cpp                    | 45 +++++++++++++++++++
 .../comprehensive-function-bufferize.mlir     | 16 +++++++
 2 files changed, 61 insertions(+)

diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp
index de3fbcd8b121c..2d09331ede237 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp
@@ -57,6 +57,49 @@ struct ConstantOpInterface
   }
 };
 
+struct IndexCastOpInterface
+    : public BufferizableOpInterface::ExternalModel<IndexCastOpInterface,
+                                                    arith::IndexCastOp> {
+  bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand,
+                              const BufferizationState &state) const {
+    return false;
+  }
+
+  bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand,
+                               const BufferizationState &state) const {
+    return false;
+  }
+
+  OpResult getAliasingOpResult(Operation *op, OpOperand &opOperand,
+                               const BufferizationState &state) const {
+    return op->getResult(0);
+  }
+
+  BufferRelation bufferRelation(Operation *op, OpResult opResult,
+                                const BufferizationState &state) const {
+    return BufferRelation::Equivalent;
+  }
+
+  LogicalResult bufferize(Operation *op, RewriterBase &rewriter,
+                          const BufferizationState &state) const {
+    auto castOp = cast<arith::IndexCastOp>(op);
+
+    Value source = *state.getBuffer(rewriter, op->getOpOperand(0) /*in*/);
+    auto sourceType = source.getType().cast<BaseMemRefType>();
+
+    // Result type should have same layout and address space as the source type.
+    MemRefLayoutAttrInterface layout = {};
+    if (auto rankedMemRefType = sourceType.dyn_cast<MemRefType>())
+      layout = rankedMemRefType.getLayout();
+    Type resultType =
+        getMemRefType(castOp.getType().cast<TensorType>(), state.getOptions(),
+                      layout, sourceType.getMemorySpace());
+
+    replaceOpWithNewBufferizedOp<arith::IndexCastOp>(rewriter, op, source,
+                                                     resultType);
+    return success();
+  }
+};
 } // namespace arith_ext
 } // namespace comprehensive_bufferize
 } // namespace linalg
@@ -65,4 +108,6 @@ struct ConstantOpInterface
 void mlir::linalg::comprehensive_bufferize::arith_ext::
     registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry) {
   registry.addOpInterface<arith::ConstantOp, arith_ext::ConstantOpInterface>();
+  registry
+      .addOpInterface<arith::IndexCastOp, arith_ext::IndexCastOpInterface>();
 }
diff --git a/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir
index 1a3b266ee4b80..75fb2be2c6533 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir
@@ -96,3 +96,19 @@ func @rank_reducing(
   }
   return %5: tensor<?x1x6x8xf32>
 }
+
+// -----
+
+// CHECK: #[[$MAP:.*]] = affine_map<()[s0] -> (s0)>
+// CHECK-LABEL:   func @index_cast(
+// CHECK-SAME:  %[[TENSOR:.*]]: tensor<i32>, %[[SCALAR:.*]]: i32
+func @index_cast(%tensor: tensor<i32>, %scalar: i32) -> (tensor<index>, index) {
+  %index_tensor = arith.index_cast %tensor : tensor<i32> to tensor<index>
+  %index_scalar = arith.index_cast %scalar : i32 to index
+  return %index_tensor, %index_scalar : tensor<index>, index
+}
+// CHECK:  %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<i32, #[[$MAP]]>
+// CHECK-NEXT: %[[INDEX_MEMREF:.*]] = arith.index_cast %[[MEMREF]]
+// CHECK-SAME:   memref<i32, #[[$MAP]]> to memref<index, #[[$MAP]]>
+// CHECK-NEXT: %[[INDEX_TENSOR:.*]] = bufferization.to_tensor %[[INDEX_MEMREF]]
+// CHECK: return %[[INDEX_TENSOR]]

From 0f0e699776d731f3aae358428e56f4ae3968abe8 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 27 Jan 2022 11:50:27 +0100
Subject: [PATCH 886/946] [ConstantFold] Disable gep of array bitcast fold with
 opaque pointers

Once again, this fold is meaningless with opaque pointers, as there
is no pointer element type to canonicalize. At some point, we may
want to do GEP type canonicalizations.
---
 llvm/lib/IR/ConstantFold.cpp                   | 7 ++++---
 llvm/test/Transforms/InstCombine/opaque-ptr.ll | 7 +++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 1cd95cd5d4c8e..622a984be22c7 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -2096,11 +2096,12 @@ Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
       PointerType *SrcPtrTy =
         dyn_cast<PointerType>(CE->getOperand(0)->getType());
       PointerType *DstPtrTy = dyn_cast<PointerType>(CE->getType());
-      if (SrcPtrTy && DstPtrTy) {
+      if (SrcPtrTy && DstPtrTy && !SrcPtrTy->isOpaque() &&
+          !DstPtrTy->isOpaque()) {
         ArrayType *SrcArrayTy =
-          dyn_cast<ArrayType>(SrcPtrTy->getPointerElementType());
+          dyn_cast<ArrayType>(SrcPtrTy->getNonOpaquePointerElementType());
         ArrayType *DstArrayTy =
-          dyn_cast<ArrayType>(DstPtrTy->getPointerElementType());
+          dyn_cast<ArrayType>(DstPtrTy->getNonOpaquePointerElementType());
         if (SrcArrayTy && DstArrayTy
             && SrcArrayTy->getElementType() == DstArrayTy->getElementType()
             && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace())
diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
index 1d73ab1689399..0b48ce523eed9 100644
--- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll
+++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
@@ -91,6 +91,13 @@ define ptr @gep_constexpr_2(ptr %a) {
   ret ptr getelementptr (i8, ptr bitcast (i8* @g to ptr), i32 3)
 }
 
+define ptr addrspace(1) @gep_constexpr_3(ptr %a) {
+; CHECK-LABEL: @gep_constexpr_3(
+; CHECK-NEXT:    ret ptr addrspace(1) getelementptr (i8, ptr addrspace(1) addrspacecast (ptr @g to ptr addrspace(1)), i64 3)
+;
+  ret ptr addrspace(1) getelementptr ([0 x i8], ptr addrspace(1) addrspacecast (i8* @g to ptr addrspace(1)), i64 0, i32 3)
+}
+
 define ptr @load_bitcast_1(ptr %a) {
 ; CHECK-LABEL: @load_bitcast_1(
 ; CHECK-NEXT:    [[B1:%.*]] = load ptr, ptr [[A:%.*]], align 8

From b30d9df457a63f9a1fa61e234951baa0916cffe6 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Thu, 27 Jan 2022 10:52:59 +0000
Subject: [PATCH 887/946] [AMDGPU] Remove unused CI check lines

---
 .../GlobalISel/legalize-load-global.mir       | 163 ------------------
 1 file changed, 163 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
index 100180176064b..44767810be46b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-load-global.mir
@@ -125,11 +125,6 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CI-LABEL: name: test_load_global_s8_align4
-    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 4, addrspace 1)
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI: $vgpr0 = COPY [[COPY1]](s32)
     ; SI-LABEL: name: test_load_global_s8_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s8), align 4, addrspace 1)
@@ -274,28 +269,6 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CI-LABEL: name: test_load_global_s16_align1
-    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[GEP:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 1, addrspace 1)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]]
-    ; CI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[OR]](s16)
-    ; CI: $vgpr0 = COPY [[ANYEXT]](s32)
     ; SI-LABEL: name: test_load_global_s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -789,13 +762,6 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CI-LABEL: name: test_load_global_s48_align8
-    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 6, align 8, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 281474976710655
-    ; CI: [[COPY1:%[0-9]+]]:_(s64) = COPY [[LOAD]](s64)
-    ; CI: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY1]], [[C]]
-    ; CI: $vgpr0_vgpr1 = COPY [[AND]](s64)
     ; SI-LABEL: name: test_load_global_s48_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load (s64), addrspace 1)
@@ -1689,11 +1655,6 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CI-LABEL: name: test_load_global_s160_align4
-    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p1) :: (load 20, align 4, addrspace 1)
-    ; CI: [[TRUNC:%[0-9]+]]:_(s160) = G_TRUNC [[LOAD]](s256)
-    ; CI: S_NOP 0, implicit [[TRUNC]](s160)
     ; SI-LABEL: name: test_load_global_s160_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
@@ -1901,10 +1862,6 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CI-LABEL: name: test_load_global_s128_align4
-    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p1) :: (load 16, align 4, addrspace 1)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[LOAD]](s128)
     ; SI-LABEL: name: test_load_global_s128_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[COPY]](p1) :: (load (<4 x s32>), align 4, addrspace 1)
@@ -2564,10 +2521,6 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CI-LABEL: name: test_load_global_p4_align8
-    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load 8, addrspace 1)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](p4)
     ; SI-LABEL: name: test_load_global_p4_align8
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI-NEXT: [[LOAD:%[0-9]+]]:_(p4) = G_LOAD [[COPY]](p1) :: (load (p4), addrspace 1)
@@ -3971,25 +3924,6 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CI-LABEL: name: test_load_global_v4s8_align2
-    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, align 2, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[GEP:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 1, addrspace 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[GEP1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 1, align 2, addrspace 1)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
-    ; CI: [[GEP2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 1, addrspace 1)
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[LOAD]](s32)
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LOAD2]](s32)
-    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32)
-    ; CI: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
-    ; CI: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
     ; SI-LABEL: name: test_load_global_v4s8_align2
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s16), addrspace 1)
@@ -5653,43 +5587,6 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CI-LABEL: name: test_load_global_v2s16_align1
-    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 1, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
-    ; CI: [[GEP:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 1, addrspace 1)
-    ; CI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[C1]](s32)
-    ; CI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD]](s32)
-    ; CI: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC1]], [[TRUNC]]
-    ; CI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CI: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CI: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C3]]
-    ; CI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
-    ; CI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LOAD1]](s32)
-    ; CI: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C4]]
-    ; CI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[AND1]](s32)
-    ; CI: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CI: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC2]]
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
-    ; CI: [[GEP1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 1, addrspace 1)
-    ; CI: [[GEP2:%[0-9]+]]:_(p1) = G_PTR_ADD [[GEP1]], [[C]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 1, addrspace 1)
-    ; CI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LOAD2]](s32)
-    ; CI: [[AND3:%[0-9]+]]:_(s16) = G_AND [[TRUNC3]], [[TRUNC]]
-    ; CI: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
-    ; CI: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C6]](s32)
-    ; CI: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C3]]
-    ; CI: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LOAD3]](s32)
-    ; CI: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C4]]
-    ; CI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[AND4]](s32)
-    ; CI: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CI: [[OR1:%[0-9]+]]:_(s16) = G_OR [[AND3]], [[TRUNC4]]
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[OR]](s16), [[OR1]](s16)
-    ; CI: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     ; SI-LABEL: name: test_load_global_v2s16_align1
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI-NEXT: [[ZEXTLOAD:%[0-9]+]]:_(s32) = G_ZEXTLOAD [[COPY]](p1) :: (load (s8), addrspace 1)
@@ -5972,12 +5869,6 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CI-LABEL: name: test_load_global_v3s16_align4
-    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[COPY]](p1) :: (load 6, align 4, addrspace 1)
-    ; CI: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
-    ; CI: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[LOAD]](<3 x s16>), 0
-    ; CI: $vgpr0_vgpr1 = COPY [[INSERT]](<4 x s16>)
     ; SI-LABEL: name: test_load_global_v3s16_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s16), align 4, addrspace 1)
@@ -10684,56 +10575,6 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CI-LABEL: name: test_load_global_v16s32_align32
-    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load 4, align 32, addrspace 1)
-    ; CI: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
-    ; CI: [[GEP:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64)
-    ; CI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p1) :: (load 4, addrspace 1)
-    ; CI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
-    ; CI: [[GEP1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64)
-    ; CI: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p1) :: (load 4, align 8, addrspace 1)
-    ; CI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
-    ; CI: [[GEP2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64)
-    ; CI: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[GEP2]](p1) :: (load 4, addrspace 1)
-    ; CI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16
-    ; CI: [[GEP3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64)
-    ; CI: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[GEP3]](p1) :: (load 4, align 16, addrspace 1)
-    ; CI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 20
-    ; CI: [[GEP4:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C4]](s64)
-    ; CI: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[GEP4]](p1) :: (load 4, addrspace 1)
-    ; CI: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 24
-    ; CI: [[GEP5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C5]](s64)
-    ; CI: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[GEP5]](p1) :: (load 4, align 8, addrspace 1)
-    ; CI: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 28
-    ; CI: [[GEP6:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C6]](s64)
-    ; CI: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[GEP6]](p1) :: (load 4, addrspace 1)
-    ; CI: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 32
-    ; CI: [[GEP7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C7]](s64)
-    ; CI: [[LOAD8:%[0-9]+]]:_(s32) = G_LOAD [[GEP7]](p1) :: (load 4, align 32, addrspace 1)
-    ; CI: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 36
-    ; CI: [[GEP8:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C8]](s64)
-    ; CI: [[LOAD9:%[0-9]+]]:_(s32) = G_LOAD [[GEP8]](p1) :: (load 4, addrspace 1)
-    ; CI: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 40
-    ; CI: [[GEP9:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C9]](s64)
-    ; CI: [[LOAD10:%[0-9]+]]:_(s32) = G_LOAD [[GEP9]](p1) :: (load 4, align 8, addrspace 1)
-    ; CI: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 44
-    ; CI: [[GEP10:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C10]](s64)
-    ; CI: [[LOAD11:%[0-9]+]]:_(s32) = G_LOAD [[GEP10]](p1) :: (load 4, addrspace 1)
-    ; CI: [[C11:%[0-9]+]]:_(s64) = G_CONSTANT i64 48
-    ; CI: [[GEP11:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C11]](s64)
-    ; CI: [[LOAD12:%[0-9]+]]:_(s32) = G_LOAD [[GEP11]](p1) :: (load 4, align 16, addrspace 1)
-    ; CI: [[C12:%[0-9]+]]:_(s64) = G_CONSTANT i64 52
-    ; CI: [[GEP12:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C12]](s64)
-    ; CI: [[LOAD13:%[0-9]+]]:_(s32) = G_LOAD [[GEP12]](p1) :: (load 4, addrspace 1)
-    ; CI: [[C13:%[0-9]+]]:_(s64) = G_CONSTANT i64 56
-    ; CI: [[GEP13:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C13]](s64)
-    ; CI: [[LOAD14:%[0-9]+]]:_(s32) = G_LOAD [[GEP13]](p1) :: (load 4, align 8, addrspace 1)
-    ; CI: [[C14:%[0-9]+]]:_(s64) = G_CONSTANT i64 60
-    ; CI: [[GEP14:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C14]](s64)
-    ; CI: [[LOAD15:%[0-9]+]]:_(s32) = G_LOAD [[GEP14]](p1) :: (load 4, addrspace 1)
-    ; CI: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[LOAD]](s32), [[LOAD1]](s32), [[LOAD2]](s32), [[LOAD3]](s32), [[LOAD4]](s32), [[LOAD5]](s32), [[LOAD6]](s32), [[LOAD7]](s32), [[LOAD8]](s32), [[LOAD9]](s32), [[LOAD10]](s32), [[LOAD11]](s32), [[LOAD12]](s32), [[LOAD13]](s32), [[LOAD14]](s32), [[LOAD15]](s32)
-    ; CI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[BUILD_VECTOR]](<16 x s32>)
     ; SI-LABEL: name: test_load_global_v16s32_align32
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI-NEXT: [[LOAD:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[COPY]](p1) :: (load (<16 x s32>), align 32, addrspace 1)
@@ -13785,10 +13626,6 @@ body: |
   bb.0:
     liveins: $vgpr0_vgpr1
 
-    ; CI-LABEL: name: test_ext_load_global_s64_from_4_align4
-    ; CI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
-    ; CI: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY]](p1) :: (load 4, addrspace 1)
-    ; CI: $vgpr0_vgpr1 = COPY [[LOAD]](s64)
     ; SI-LABEL: name: test_ext_load_global_s64_from_4_align4
     ; SI: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
     ; SI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p1) :: (load (s32), addrspace 1)

From 15b11e00f07500c19455c4552e6284b169d2ef9f Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Thu, 27 Jan 2022 10:56:35 +0000
Subject: [PATCH 888/946] [AMDGPU] Update MachineMemOperands syntax in
 commented out tests

---
 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
index d4f19a54f59c5..4d2b6fb509889 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-store-global.mir
@@ -422,7 +422,7 @@ body: |
 #     %0:_(p1) = COPY $vgpr0_vgpr1
 #     %1:_(s32) = COPY $vgpr2
 #     %2:_(s25) = G_TRUNC %1
-#     G_STORE %2, %0 :: (store 4, align 2, addrspace 1)
+#     G_STORE %2, %0 :: (store (s25), align 2, addrspace 1)
 # ...
 
 # ---
@@ -434,7 +434,7 @@ body: |
 #     %0:_(p1) = COPY $vgpr0_vgpr1
 #     %1:_(s32) = COPY $vgpr2
 #     %2:_(s25) = G_TRUNC %1
-#     G_STORE %2, %0 :: (store 4, align 1, addrspace 1)
+#     G_STORE %2, %0 :: (store (s25), align 1, addrspace 1)
 # ...
 
 ---

From fdd3e2c9438d0e6b8a3e2d0d1ed848d39fa25f97 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 27 Jan 2022 10:59:03 +0000
Subject: [PATCH 889/946] [DAG] SelectionDAG::getNode(N1,N2) - detect N2
 constant vector splats as well as scalars

We already perform some basic folds (add/sub with zero etc.) on scalar types, this patch adds some basic support for constant splats as well in a few cases (we can add more with future test coverage).

In the cases I've enabled, we can handle buildvector implicit truncation as we're not creating new constant nodes from the vector types - we're just returning existing nodes. This allows us to get a number of extra cases in the aarch64 tests.

I haven't enabled support for undefs in buildvector splats, as we're often checking for zero/allones patterns that return the original constant and we shouldn't be returning undef elements in some of these cases - we can enable this later if we're OK with creating new constants.

Differential Revision: https://reviews.llvm.org/D118264
---
 llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp  | 17 +++++++++++------
 .../AArch64/neon-bitwise-instructions.ll        |  3 ---
 .../CodeGen/ARM/2013-07-29-vector-or-combine.ll | 10 ++++------
 llvm/test/CodeGen/X86/vec_smulo.ll              |  6 +++---
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 28363e2b9e88e..45f3005e8f570 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5639,8 +5639,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       std::swap(N1, N2);
   }
 
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
+  auto *N1C = dyn_cast<ConstantSDNode>(N1);
+  auto *N2C = dyn_cast<ConstantSDNode>(N2);
+
+  // Don't allow undefs in vector splats - we might be returning N2 when folding
+  // to zero etc.
+  ConstantSDNode *N2CV =
+      isConstOrConstSplat(N2, /*AllowUndefs*/ false, /*AllowTruncation*/ true);
 
   switch (Opcode) {
   default: break;
@@ -5671,9 +5676,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
            N1.getValueType() == VT && "Binary operator types must match!");
     // (X & 0) -> 0.  This commonly occurs when legalizing i64 values, so it's
     // worth handling here.
-    if (N2C && N2C->isZero())
+    if (N2CV && N2CV->isZero())
       return N2;
-    if (N2C && N2C->isAllOnes()) // X & -1 -> X
+    if (N2CV && N2CV->isAllOnes()) // X & -1 -> X
       return N1;
     break;
   case ISD::OR:
@@ -5685,7 +5690,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
            N1.getValueType() == VT && "Binary operator types must match!");
     // (X ^|+- 0) -> X.  This commonly occurs when legalizing i64 values, so
     // it's worth handling here.
-    if (N2C && N2C->isZero())
+    if (N2CV && N2CV->isZero())
       return N1;
     if ((Opcode == ISD::ADD || Opcode == ISD::SUB) && VT.isVector() &&
         VT.getVectorElementType() == MVT::i1)
@@ -5791,7 +5796,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     // size of the value, the shift/rotate count is guaranteed to be zero.
     if (VT == MVT::i1)
       return N1;
-    if (N2C && N2C->isZero())
+    if (N2CV && N2CV->isZero())
       return N1;
     break;
   case ISD::FP_ROUND:
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index b6b9d0209cff0..67713805de7c8 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -822,7 +822,6 @@ define <8 x i8> @vselect_constant_cond_zero_v8i8(<8 x i8> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi d1, #0x00000000ff00ff
 ; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT:    orr v0.2s, #0
 ; CHECK-NEXT:    ret
   %b = select <8 x i1> <i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i8> %a, <8 x i8> zeroinitializer
   ret <8 x i8> %b
@@ -833,7 +832,6 @@ define <4 x i16> @vselect_constant_cond_zero_v4i16(<4 x i16> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    movi d1, #0xffff00000000ffff
 ; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT:    orr v0.2s, #0
 ; CHECK-NEXT:    ret
   %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i16> %a, <4 x i16> zeroinitializer
   ret <4 x i16> %b
@@ -845,7 +843,6 @@ define <4 x i32> @vselect_constant_cond_zero_v4i32(<4 x i32> %a) {
 ; CHECK-NEXT:    adrp x8, .LCPI85_0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI85_0]
 ; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    orr v0.4s, #0
 ; CHECK-NEXT:    ret
   %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> zeroinitializer
   ret <4 x i32> %b
diff --git a/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll b/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
index f611c934617de..4ddf669ede19f 100644
--- a/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
+++ b/llvm/test/CodeGen/ARM/2013-07-29-vector-or-combine.ll
@@ -7,15 +7,13 @@ target triple = "armv7--linux-gnueabi"
 define void @function() {
 ; CHECK: cmp r0, #0
 ; CHECK: bxne lr
-; CHECK: vmov.i32 q8, #0xff0000
 entry:
   br i1 undef, label %vector.body, label %for.end
 
-; CHECK: vld1.32 {d18, d19}, [r0]
-; CHECK: vand q10, q9, q8
-; CHECK: vbic.i16 q9, #0xff
-; CHECK: vorr q9, q9, q10
-; CHECK: vst1.32 {d18, d19}, [r0]
+; CHECK: vld1.32 {d16, d17}, [r0]
+; CHECK: vbic.i32 q8, #0xff
+; CHECK: vorr q8, q8, q9
+; CHECK: vst1.32 {d16, d17}, [r0]
 vector.body:
   %wide.load = load <4 x i32>, <4 x i32>* undef, align 4
   %0 = and <4 x i32> %wide.load, <i32 -16711936, i32 -16711936, i32 -16711936, i32 -16711936>
diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll
index fa06df5f1b576..550c755708239 100644
--- a/llvm/test/CodeGen/X86/vec_smulo.ll
+++ b/llvm/test/CodeGen/X86/vec_smulo.ll
@@ -3282,9 +3282,9 @@ define <4 x i32> @smulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
 ; AVX-LABEL: smulo_v4i1:
 ; AVX:       # %bb.0:
 ; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpslld $31, %xmm0, %xmm0
-; AVX-NEXT:    vmovmskps %xmm0, %eax
-; AVX-NEXT:    vpsrad $31, %xmm0, %xmm0
+; AVX-NEXT:    vpslld $31, %xmm0, %xmm1
+; AVX-NEXT:    vpsrad $31, %xmm1, %xmm0
+; AVX-NEXT:    vmovmskps %xmm1, %eax
 ; AVX-NEXT:    movb %al, (%rdi)
 ; AVX-NEXT:    retq
 ;

From c9da81d99760eebbe5bb4ab1188962a7edb5e780 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Thu, 27 Jan 2022 10:31:10 +0000
Subject: [PATCH 890/946] [AArch64][SVE] Implement missing lowering for
 extract_subvector for predicates.

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D118057
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 27 ++++++-
 .../test/CodeGen/AArch64/sve-insert-vector.ll | 77 +++++++++++++++++++
 2 files changed, 102 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b7602fd829a3c..676ee1b189143 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1248,6 +1248,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::SELECT_CC, VT, Expand);
       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
+      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
 
       // There are no legal MVT::nxv16f## based types.
       if (VT != MVT::nxv16i1) {
@@ -11038,6 +11039,28 @@ SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
     if (!isTypeLegal(VT))
       return SDValue();
 
+    // Break down insert_subvector into simpler parts.
+    if (VT.getVectorElementType() == MVT::i1) {
+      unsigned NumElts = VT.getVectorMinNumElements();
+      EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
+
+      SDValue Lo, Hi;
+      Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Vec0,
+                       DAG.getVectorIdxConstant(0, DL));
+      Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Vec0,
+                       DAG.getVectorIdxConstant(NumElts / 2, DL));
+      if (Idx < (NumElts / 2)) {
+        SDValue NewLo = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, HalfVT, Lo, Vec1,
+                                    DAG.getVectorIdxConstant(Idx, DL));
+        return DAG.getNode(AArch64ISD::UZP1, DL, VT, NewLo, Hi);
+      } else {
+        SDValue NewHi =
+            DAG.getNode(ISD::INSERT_SUBVECTOR, DL, HalfVT, Hi, Vec1,
+                        DAG.getVectorIdxConstant(Idx - (NumElts / 2), DL));
+        return DAG.getNode(AArch64ISD::UZP1, DL, VT, Lo, NewHi);
+      }
+    }
+
     // Ensure the subvector is half the size of the main vector.
     if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
       return SDValue();
@@ -12961,7 +12984,7 @@ bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
   if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
     return false;
 
-  return (Index == 0 || Index == ResVT.getVectorNumElements());
+  return (Index == 0 || Index == ResVT.getVectorMinNumElements());
 }
 
 /// Turn vector tests of the signbit in the form of:
@@ -14321,6 +14344,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
 static SDValue
 performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
                               SelectionDAG &DAG) {
+  SDLoc DL(N);
   SDValue Vec = N->getOperand(0);
   SDValue SubVec = N->getOperand(1);
   uint64_t IdxVal = N->getConstantOperandVal(2);
@@ -14346,7 +14370,6 @@ performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
   // Fold insert_subvector -> concat_vectors
   // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
   // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
-  SDLoc DL(N);
   SDValue Lo, Hi;
   if (IdxVal == 0) {
     Lo = SubVec;
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
index 58034406be465..594b3e0b2f8b0 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll
@@ -501,6 +501,80 @@ define <vscale x 8 x bfloat> @insert_nxv8bf16_v8bf16(<vscale x 8 x bfloat> %sv0,
   ret <vscale x 8 x bfloat> %v0
 }
 
+; Test predicate inserts of half size.
+define <vscale x 16 x i1> @insert_nxv16i1_nxv8i1_0(<vscale x 16 x i1> %vec, <vscale x 8 x i1> %sv) {
+; CHECK-LABEL: insert_nxv16i1_nxv8i1_0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    uzp1 p0.b, p1.b, p0.b
+; CHECK-NEXT:    ret
+  %v0 = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv8i1(<vscale x 16 x i1> %vec, <vscale x 8 x i1> %sv, i64 0)
+  ret <vscale x 16 x i1> %v0
+}
+
+define <vscale x 16 x i1> @insert_nxv16i1_nxv8i1_8(<vscale x 16 x i1> %vec, <vscale x 8 x i1> %sv) {
+; CHECK-LABEL: insert_nxv16i1_nxv8i1_8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    uzp1 p0.b, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %v0 = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv8i1(<vscale x 16 x i1> %vec, <vscale x 8 x i1> %sv, i64 8)
+  ret <vscale x 16 x i1> %v0
+}
+
+; Test predicate inserts of less than half the size.
+define <vscale x 16 x i1> @insert_nxv16i1_nxv4i1_0(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %sv) {
+; CHECK-LABEL: insert_nxv16i1_nxv4i1_0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpklo p2.h, p0.b
+; CHECK-NEXT:    punpkhi p0.h, p0.b
+; CHECK-NEXT:    punpkhi p2.h, p2.b
+; CHECK-NEXT:    uzp1 p1.h, p1.h, p2.h
+; CHECK-NEXT:    uzp1 p0.b, p1.b, p0.b
+; CHECK-NEXT:    ret
+  %v0 = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv4i1(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %sv, i64 0)
+  ret <vscale x 16 x i1> %v0
+}
+
+define <vscale x 16 x i1> @insert_nxv16i1_nxv4i1_12(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %sv) {
+; CHECK-LABEL: insert_nxv16i1_nxv4i1_12:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    punpkhi p2.h, p0.b
+; CHECK-NEXT:    punpklo p0.h, p0.b
+; CHECK-NEXT:    punpklo p2.h, p2.b
+; CHECK-NEXT:    uzp1 p1.h, p2.h, p1.h
+; CHECK-NEXT:    uzp1 p0.b, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %v0 = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv4i1(<vscale x 16 x i1> %vec, <vscale x 4 x i1> %sv, i64 12)
+  ret <vscale x 16 x i1> %v0
+}
+
+; Test predicate insert into undef/zero
+define <vscale x 16 x i1> @insert_nxv16i1_nxv4i1_into_zero(<vscale x 4 x i1> %sv) {
+; CHECK-LABEL: insert_nxv16i1_nxv4i1_into_zero:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pfalse p1.b
+; CHECK-NEXT:    punpklo p2.h, p1.b
+; CHECK-NEXT:    punpkhi p1.h, p1.b
+; CHECK-NEXT:    punpkhi p2.h, p2.b
+; CHECK-NEXT:    uzp1 p0.h, p0.h, p2.h
+; CHECK-NEXT:    uzp1 p0.b, p0.b, p1.b
+; CHECK-NEXT:    ret
+  %v0 = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv4i1(<vscale x 16 x i1> zeroinitializer, <vscale x 4 x i1> %sv, i64 0)
+  ret <vscale x 16 x i1> %v0
+}
+
+define <vscale x 16 x i1> @insert_nxv16i1_nxv4i1_into_poison(<vscale x 4 x i1> %sv) {
+; CHECK-LABEL: insert_nxv16i1_nxv4i1_into_poison:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uzp1 p0.h, p0.h, p0.h
+; CHECK-NEXT:    uzp1 p0.b, p0.b, p0.b
+; CHECK-NEXT:    ret
+  %v0 = call <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv4i1(<vscale x 16 x i1> poison, <vscale x 4 x i1> %sv, i64 0)
+  ret <vscale x 16 x i1> %v0
+}
+
+
 declare <vscale x 3 x i32> @llvm.experimental.vector.insert.nxv3i32.nxv2i32(<vscale x 3 x i32>, <vscale x 2 x i32>, i64)
 declare <vscale x 3 x float> @llvm.experimental.vector.insert.nxv3f32.nxv2f32(<vscale x 3 x float>, <vscale x 2 x float>, i64)
 declare <vscale x 6 x i32> @llvm.experimental.vector.insert.nxv6i32.nxv2i32(<vscale x 6 x i32>, <vscale x 2 x i32>, i64)
@@ -511,3 +585,6 @@ declare <vscale x 8 x bfloat> @llvm.experimental.vector.insert.nxv8bf16.v8bf16(<
 declare <vscale x 4 x bfloat> @llvm.experimental.vector.insert.nxv4bf16.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, i64)
 declare <vscale x 4 x bfloat> @llvm.experimental.vector.insert.nxv4bf16.v4bf16(<vscale x 4 x bfloat>, <4 x bfloat>, i64)
 declare <vscale x 2 x bfloat> @llvm.experimental.vector.insert.nxv2bf16.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, i64)
+
+declare <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv4i1(<vscale x 16 x i1>, <vscale x 4 x i1>, i64)
+declare <vscale x 16 x i1> @llvm.experimental.vector.insert.nx16i1.nxv8i1(<vscale x 16 x i1>, <vscale x 8 x i1>, i64)

From 0d5b35934e010d4207b0b111e2e2905ba36fa77b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Thu, 20 Jan 2022 22:33:56 +0000
Subject: [PATCH 891/946] [libcxx] [test] Narrow down a MinGW bug workaround in
 rand.dist.uni.int/eval.pass.cpp

Differential Revision: https://reviews.llvm.org/D118235
---
 .../rand.dist.uni/rand.dist.uni.int/eval.pass.cpp     | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp
index 03ec0eaa37fa2..e36625af9ebe0 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp
@@ -7,9 +7,6 @@
 //===----------------------------------------------------------------------===//
 //
 // REQUIRES: long_tests
-//
-// This fails on MinGW for some unknown reason.
-// UNSUPPORTED: target={{.+}}-windows-gnu
 
 // <random>
 
@@ -28,6 +25,12 @@
 
 #include "test_macros.h"
 
+// The __int128 conversions to/from floating point crash on MinGW on x86_64.
+// This is fixed in Clang 14 by https://reviews.llvm.org/D110413.
+#if defined(__x86_64__) && defined(__MINGW32__) && defined(__clang_major__) && __clang_major__ < 14
+ #define TEST_BUGGY_I128_FP
+#endif
+
 template <class T>
 T sqr(T x)
 {
@@ -130,7 +133,7 @@ int main(int, char**)
     // or unsigned long long.
     // (We support __int128 as an extension.)
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#if !defined(_LIBCPP_HAS_NO_INT128) && !defined(TEST_BUGGY_I128_FP)
     test_statistics<__int128_t, std::minstd_rand0>();
     test_statistics<__uint128_t, std::minstd_rand0>();
 

From a5de66c4c50b5dec085ffbcd2afefd47f3f18d7f Mon Sep 17 00:00:00 2001
From: Anton Zabaznov <anton.zabaznov@intel.com>
Date: Mon, 13 Dec 2021 18:43:37 +0300
Subject: [PATCH 892/946] [OpenCL] Add support of __opencl_c_device_enqueue
 feature macro.

This feature requires support of __opencl_c_generic_address_space and
__opencl_c_program_scope_global_variables so diagnostics for that is provided as well.

Reviewed By: Anastasia

Differential Revision: https://reviews.llvm.org/D115640
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  3 +-
 clang/include/clang/Basic/OpenCLOptions.h     |  9 ++++
 clang/lib/Basic/OpenCLOptions.cpp             | 35 +++++++-------
 clang/lib/Basic/TargetInfo.cpp                |  2 +
 clang/lib/Headers/opencl-c-base.h             |  2 +
 clang/lib/Headers/opencl-c.h                  |  2 +
 clang/lib/Sema/Sema.cpp                       | 10 ++--
 .../CodeGenOpenCL/address-spaces-mangling.cl  | 12 ++---
 clang/test/CodeGenOpenCL/address-spaces.cl    |  4 +-
 clang/test/CodeGenOpenCL/blocks.cl            |  4 ++
 clang/test/CodeGenOpenCL/pipe_types.cl        |  8 ++--
 .../test/CodeGenOpenCL/pipe_types_mangling.cl |  2 +-
 .../addrspace-constructors.clcpp              |  2 +-
 clang/test/Frontend/opencl.cl                 |  3 +-
 .../Misc/opencl-c-3.0.incorrect_options.cl    | 48 ++++++++++++-------
 clang/test/SemaOpenCL/invalid-block.cl        |  2 +-
 .../invalid-device-enqueue-types-cl3.0.cl     | 13 +++++
 clang/test/SemaOpenCL/invalid-pipes-cl1.2.cl  |  4 +-
 clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl  |  4 +-
 clang/test/SemaOpenCL/storageclass.cl         | 16 +++----
 .../SemaOpenCLCXX/remove-address-space.clcpp  |  2 +-
 21 files changed, 118 insertions(+), 69 deletions(-)
 create mode 100644 clang/test/SemaOpenCL/invalid-device-enqueue-types-cl3.0.cl

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 735d92d79d27c..a8cf00c1263ff 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9556,7 +9556,8 @@ def err_generic_sel_multi_match : Error<
 
 // Blocks
 def err_blocks_disable : Error<"blocks support disabled - compile with -fblocks"
-  " or %select{pick a deployment target that supports them|for OpenCL 2.0}0">;
+  " or %select{pick a deployment target that supports them|for OpenCL C 2.0"
+  " or OpenCL C 3.0 with __opencl_c_device_enqueue feature}0">;
 def err_block_returning_array_function : Error<
   "block cannot return %select{array|function}0 type %1">;
 
diff --git a/clang/include/clang/Basic/OpenCLOptions.h b/clang/include/clang/Basic/OpenCLOptions.h
index d6cb1a210519d..512bcb1e6ef10 100644
--- a/clang/include/clang/Basic/OpenCLOptions.h
+++ b/clang/include/clang/Basic/OpenCLOptions.h
@@ -212,6 +212,15 @@ class OpenCLOptions {
   bool isEnabled(llvm::StringRef Ext) const;
 
   OpenCLOptionInfoMap OptMap;
+
+  // First feature in a pair requires the second one to be supported.
+  using FeatureDepEntry = std::pair<llvm::StringRef, llvm::StringRef>;
+  using FeatureDepList = llvm::SmallVector<FeatureDepEntry, 8>;
+
+  static const FeatureDepList DependentFeaturesList;
+
+  // Extensions and equivalent feature pairs.
+  static const llvm::StringMap<llvm::StringRef> FeatureExtensionMap;
 };
 
 } // end namespace clang
diff --git a/clang/lib/Basic/OpenCLOptions.cpp b/clang/lib/Basic/OpenCLOptions.cpp
index b7408f39bdab4..7e89b3f1b804d 100644
--- a/clang/lib/Basic/OpenCLOptions.cpp
+++ b/clang/lib/Basic/OpenCLOptions.cpp
@@ -12,6 +12,17 @@
 
 namespace clang {
 
+const OpenCLOptions::FeatureDepList OpenCLOptions::DependentFeaturesList = {
+    {"__opencl_c_read_write_images", "__opencl_c_images"},
+    {"__opencl_c_3d_image_writes", "__opencl_c_images"},
+    {"__opencl_c_pipes", "__opencl_c_generic_address_space"},
+    {"__opencl_c_device_enqueue", "__opencl_c_generic_address_space"},
+    {"__opencl_c_device_enqueue", "__opencl_c_program_scope_global_variables"}};
+
+const llvm::StringMap<llvm::StringRef> OpenCLOptions::FeatureExtensionMap = {
+    {"cl_khr_fp64", "__opencl_c_fp64"},
+    {"cl_khr_3d_image_writes", "__opencl_c_3d_image_writes"}};
+
 bool OpenCLOptions::isKnown(llvm::StringRef Ext) const {
   return OptMap.find(Ext) != OptMap.end();
 }
@@ -108,33 +119,23 @@ void OpenCLOptions::disableAll() {
 
 bool OpenCLOptions::diagnoseUnsupportedFeatureDependencies(
     const TargetInfo &TI, DiagnosticsEngine &Diags) {
-  // Feature pairs. First feature in a pair requires the second one to be
-  // supported.
-  static const llvm::StringMap<llvm::StringRef> DependentFeaturesMap = {
-      {"__opencl_c_read_write_images", "__opencl_c_images"},
-      {"__opencl_c_3d_image_writes", "__opencl_c_images"},
-      {"__opencl_c_pipes", "__opencl_c_generic_address_space"}};
-
   auto OpenCLFeaturesMap = TI.getSupportedOpenCLOpts();
 
   bool IsValid = true;
-  for (auto &FeaturePair : DependentFeaturesMap)
-    if (TI.hasFeatureEnabled(OpenCLFeaturesMap, FeaturePair.getKey()) &&
-        !TI.hasFeatureEnabled(OpenCLFeaturesMap, FeaturePair.getValue())) {
+  for (auto &FeaturePair : DependentFeaturesList) {
+    auto Feature = FeaturePair.first;
+    auto Dep = FeaturePair.second;
+    if (TI.hasFeatureEnabled(OpenCLFeaturesMap, Feature) &&
+        !TI.hasFeatureEnabled(OpenCLFeaturesMap, Dep)) {
       IsValid = false;
-      Diags.Report(diag::err_opencl_feature_requires)
-          << FeaturePair.getKey() << FeaturePair.getValue();
+      Diags.Report(diag::err_opencl_feature_requires) << Feature << Dep;
     }
+  }
   return IsValid;
 }
 
 bool OpenCLOptions::diagnoseFeatureExtensionDifferences(
     const TargetInfo &TI, DiagnosticsEngine &Diags) {
-  // Extensions and equivalent feature pairs.
-  static const llvm::StringMap<llvm::StringRef> FeatureExtensionMap = {
-      {"cl_khr_fp64", "__opencl_c_fp64"},
-      {"cl_khr_3d_image_writes", "__opencl_c_3d_image_writes"}};
-
   auto OpenCLFeaturesMap = TI.getSupportedOpenCLOpts();
 
   bool IsValid = true;
diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 4fd31647bab24..e3a2f30febe75 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -422,6 +422,8 @@ void TargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts) {
           OpenCLFeaturesMap, "__opencl_c_generic_address_space");
       Opts.OpenCLPipes =
           hasFeatureEnabled(OpenCLFeaturesMap, "__opencl_c_pipes");
+      Opts.Blocks =
+          hasFeatureEnabled(OpenCLFeaturesMap, "__opencl_c_device_enqueue");
     }
   }
 
diff --git a/clang/lib/Headers/opencl-c-base.h b/clang/lib/Headers/opencl-c-base.h
index 4f63e7bbdcbba..06b78da63e698 100644
--- a/clang/lib/Headers/opencl-c-base.h
+++ b/clang/lib/Headers/opencl-c-base.h
@@ -499,12 +499,14 @@ typedef int clk_profiling_info;
 
 #define MAX_WORK_DIM 3
 
+#ifdef __opencl_c_device_enqueue
 typedef struct {
   unsigned int workDimension;
   size_t globalWorkOffset[MAX_WORK_DIM];
   size_t globalWorkSize[MAX_WORK_DIM];
   size_t localWorkSize[MAX_WORK_DIM];
 } ndrange_t;
+#endif // __opencl_c_device_enqueue
 
 #endif // defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
 
diff --git a/clang/lib/Headers/opencl-c.h b/clang/lib/Headers/opencl-c.h
index f7a85f6578858..8fde2fa298994 100644
--- a/clang/lib/Headers/opencl-c.h
+++ b/clang/lib/Headers/opencl-c.h
@@ -16450,6 +16450,7 @@ bool __ovld is_valid_reserve_id(reserve_id_t reserve_id);
 // OpenCL v2.0 s6.13.17 - Enqueue Kernels
 #if defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
 
+#ifdef __opencl_c_device_enqueue
 ndrange_t __ovld ndrange_1D(size_t);
 ndrange_t __ovld ndrange_1D(size_t, size_t);
 ndrange_t __ovld ndrange_1D(size_t, size_t, size_t);
@@ -16477,6 +16478,7 @@ bool __ovld is_valid_event (clk_event_t event);
 void __ovld capture_event_profiling_info(clk_event_t, clk_profiling_info, __global void* value);
 
 queue_t __ovld get_default_queue(void);
+#endif //__opencl_c_device_enqueue
 #endif //defined(__OPENCL_CPP_VERSION__) || (__OPENCL_C_VERSION__ >= CL_VERSION_2_0)
 
 // OpenCL Extension v2.0 s9.17 - Sub-groups
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index bbdc03fa1814e..a5a62276043dc 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -332,9 +332,12 @@ void Sema::Initialize() {
         Context.getTargetInfo().getSupportedOpenCLOpts(), getLangOpts());
     addImplicitTypedef("sampler_t", Context.OCLSamplerTy);
     addImplicitTypedef("event_t", Context.OCLEventTy);
-    if (getLangOpts().getOpenCLCompatibleVersion() >= 200) {
-      addImplicitTypedef("clk_event_t", Context.OCLClkEventTy);
-      addImplicitTypedef("queue_t", Context.OCLQueueTy);
+    auto OCLCompatibleVersion = getLangOpts().getOpenCLCompatibleVersion();
+    if (OCLCompatibleVersion >= 200) {
+      if (getLangOpts().OpenCLCPlusPlus || getLangOpts().Blocks) {
+        addImplicitTypedef("clk_event_t", Context.OCLClkEventTy);
+        addImplicitTypedef("queue_t", Context.OCLQueueTy);
+      }
       if (getLangOpts().OpenCLPipes)
         addImplicitTypedef("reserve_id_t", Context.OCLReserveIDTy);
       addImplicitTypedef("atomic_int", Context.getAtomicType(Context.IntTy));
@@ -402,7 +405,6 @@ void Sema::Initialize() {
       }
     }
 
-
 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext)                                      \
   if (getOpenCLOptions().isSupported(#Ext, getLangOpts())) {                   \
     addImplicitTypedef(#ExtType, Context.Id##Ty);                              \
diff --git a/clang/test/CodeGenOpenCL/address-spaces-mangling.cl b/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
index 32ba5d4c3a186..9e59bad4db54d 100644
--- a/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
+++ b/clang/test/CodeGenOpenCL/address-spaces-mangling.cl
@@ -2,16 +2,16 @@
 // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="ASMANG,ASMANG20" %s
 // RUN: %clang_cc1 %s -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="NOASMANG,NOASMANG10" %s
 // RUN: %clang_cc1 %s -cl-std=CL2.0 -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="NOASMANG,NOASMANG20" %s
-// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="NOASMANG,NOASMANG20" %s
-// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="ASMANG,ASMANG20" %s
+// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_generic_address_space -ffake-address-space-map -faddress-space-map-mangling=no -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="NOASMANG,NOASMANG20" %s
+// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_generic_address_space -ffake-address-space-map -faddress-space-map-mangling=yes -triple %itanium_abi_triple -emit-llvm -o - | FileCheck -check-prefixes="ASMANG,ASMANG20" %s
 
 // We check that the address spaces are mangled the same in both version of OpenCL
 // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL2.0 -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s
 // RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL1.2 -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s
-// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -cl-ext=+__opencl_c_generic_address_space -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s
-// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -cl-ext=-__opencl_c_generic_address_space,-__opencl_c_pipes -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s
-// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=clc++2021 -cl-ext=+__opencl_c_generic_address_space -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s
-// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=clc++2021 -cl-ext=-__opencl_c_generic_address_space,-__opencl_c_pipes -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_generic_address_space -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=CL3.0 -cl-ext=-all -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=clc++2021 -cl-ext=-all,+__opencl_c_generic_address_space -emit-llvm -o - | FileCheck -check-prefix=OCL-20 %s
+// RUN: %clang_cc1 %s -triple spir-unknown-unknown -cl-std=clc++2021 -cl-ext=-all -emit-llvm -o - | FileCheck -check-prefix=OCL-12 %s
 
 // We can't name this f as private is equivalent to default
 // no specifier given address space so we get multiple definition
diff --git a/clang/test/CodeGenOpenCL/address-spaces.cl b/clang/test/CodeGenOpenCL/address-spaces.cl
index 5d59f960e9101..92e7d2ff7502d 100644
--- a/clang/test/CodeGenOpenCL/address-spaces.cl
+++ b/clang/test/CodeGenOpenCL/address-spaces.cl
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 %s -O0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR
-// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -cl-ext=-__opencl_c_generic_address_space,-__opencl_c_pipes -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR
-// RUN: %clang_cc1 %s -O0 -cl-std=clc++2021 -cl-ext=-__opencl_c_generic_address_space,-__opencl_c_pipes -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR
+// RUN: %clang_cc1 %s -O0 -cl-std=CL3.0 -cl-ext=-all -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR
+// RUN: %clang_cc1 %s -O0 -cl-std=clc++2021 -cl-ext=-all -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,SPIR
 // RUN: %clang_cc1 %s -O0 -DCL20 -cl-std=CL2.0 -ffake-address-space-map -emit-llvm -o - | FileCheck %s --check-prefixes=CL20,CL20SPIR
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
 // RUN: %clang_cc1 %s -O0 -triple amdgcn-amd-amdhsa -cl-std=CL3.0 -emit-llvm -o - | FileCheck --check-prefixes=CHECK,AMDGCN %s
diff --git a/clang/test/CodeGenOpenCL/blocks.cl b/clang/test/CodeGenOpenCL/blocks.cl
index e669c42a75364..e8ba70aa957fe 100644
--- a/clang/test/CodeGenOpenCL/blocks.cl
+++ b/clang/test/CodeGenOpenCL/blocks.cl
@@ -2,6 +2,10 @@
 // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=COMMON,AMDGCN %s
 // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -debug-info-kind=limited -triple spir-unknown-unknown | FileCheck -check-prefixes=CHECK-DEBUG %s
 // RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -debug-info-kind=limited -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=CHECK-DEBUG %s
+// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_device_enqueue,+__opencl_c_generic_address_space,+__opencl_c_program_scope_global_variables -emit-llvm -o - -O0 -triple spir-unknown-unknown | FileCheck -check-prefixes=COMMON,SPIR %s
+// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_device_enqueue,+__opencl_c_generic_address_space,+__opencl_c_program_scope_global_variables  -emit-llvm -o - -O0 -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=COMMON,AMDGCN %s
+// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_device_enqueue,+__opencl_c_generic_address_space,+__opencl_c_program_scope_global_variables  -emit-llvm -o - -O0 -debug-info-kind=limited -triple spir-unknown-unknown | FileCheck -check-prefixes=CHECK-DEBUG %s
+// RUN: %clang_cc1 %s -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_device_enqueue,+__opencl_c_generic_address_space,+__opencl_c_program_scope_global_variables -emit-llvm -o - -O0 -debug-info-kind=limited -triple amdgcn-amd-amdhsa | FileCheck -check-prefixes=CHECK-DEBUG %s
 
 // SPIR: %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
 // AMDGCN: %struct.__opencl_block_literal_generic = type { i32, i32, i8* }
diff --git a/clang/test/CodeGenOpenCL/pipe_types.cl b/clang/test/CodeGenOpenCL/pipe_types.cl
index c82ea8012dfd2..57da95fec0d70 100644
--- a/clang/test/CodeGenOpenCL/pipe_types.cl
+++ b/clang/test/CodeGenOpenCL/pipe_types.cl
@@ -1,8 +1,8 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL2.0 -DTEST_STRUCT -o - %s | FileCheck --check-prefixes=CHECK,CHECK-STRUCT %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space,+__opencl_c_program_scope_global_variables -o - %s | FileCheck --check-prefixes=CHECK %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space,-__opencl_c_program_scope_global_variables -o - %s | FileCheck --check-prefixes=CHECK %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=clc++2021 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space,+__opencl_c_program_scope_global_variables -o - %s | FileCheck --check-prefixes=CHECK %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=clc++2021 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space,-__opencl_c_program_scope_global_variables -o - %s | FileCheck --check-prefixes=CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_pipes,+__opencl_c_generic_address_space,+__opencl_c_program_scope_global_variables -o - %s | FileCheck --check-prefixes=CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_pipes,+__opencl_c_generic_address_space -o - %s | FileCheck --check-prefixes=CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=clc++2021 -cl-ext=-all,+__opencl_c_pipes,+__opencl_c_generic_address_space,+__opencl_c_program_scope_global_variables -o - %s | FileCheck --check-prefixes=CHECK %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=clc++2021 -cl-ext=-all,+__opencl_c_pipes,+__opencl_c_generic_address_space -o - %s | FileCheck --check-prefixes=CHECK %s
 
 // CHECK: %opencl.pipe_ro_t = type opaque
 // CHECK: %opencl.pipe_wo_t = type opaque
diff --git a/clang/test/CodeGenOpenCL/pipe_types_mangling.cl b/clang/test/CodeGenOpenCL/pipe_types_mangling.cl
index c61a9fcfdd48a..1ac125c6782d1 100644
--- a/clang/test/CodeGenOpenCL/pipe_types_mangling.cl
+++ b/clang/test/CodeGenOpenCL/pipe_types_mangling.cl
@@ -7,7 +7,7 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space -o - %s | FileCheck %s --check-prefixes=LINUX
 // RUN: %clang_cc1 -triple x86_64-unknown-windows-pc -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space -o - %s -DWIN | FileCheck %s --check-prefixes=OCLWINDOWS
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space,+__opencl_c_program_scope_global_variables -o - %s | FileCheck %s --check-prefixes=LINUX
-// RUN: %clang_cc1 -triple x86_64-unknown-windows-pc -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space,-__opencl_c_program_scope_global_variables -o - %s -DWIN | FileCheck %s --check-prefixes=OCLWINDOWS
+// RUN: %clang_cc1 -triple x86_64-unknown-windows-pc -emit-llvm -O0 -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space,-__opencl_c_program_scope_global_variables,-__opencl_c_device_enqueue -o - %s -DWIN | FileCheck %s --check-prefixes=OCLWINDOWS
 
 typedef unsigned char __attribute__((ext_vector_type(3))) uchar3;
 typedef int __attribute__((ext_vector_type(4))) int4;
diff --git a/clang/test/CodeGenOpenCLCXX/addrspace-constructors.clcpp b/clang/test/CodeGenOpenCLCXX/addrspace-constructors.clcpp
index 9ad251d051c77..293455c8eee93 100644
--- a/clang/test/CodeGenOpenCLCXX/addrspace-constructors.clcpp
+++ b/clang/test/CodeGenOpenCLCXX/addrspace-constructors.clcpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 %s -cl-std=clc++1.0 -DGENERIC -emit-llvm -o - -O0 -triple spir-unknown-unknown | FileCheck %s
 // RUN: %clang_cc1 %s -cl-std=clc++2021 -DGENERIC -emit-llvm -o - -O0 -triple spir-unknown-unknown | FileCheck %s
-// RUN: %clang_cc1 %s -cl-std=clc++2021 -cl-ext=-__opencl_c_generic_address_space,-__opencl_c_pipes -emit-llvm -o - -O0 -triple spir-unknown-unknown | FileCheck %s
+// RUN: %clang_cc1 %s -cl-std=clc++2021 -cl-ext=-all,+__opencl_c_program_scope_global_variables -emit-llvm -o - -O0 -triple spir-unknown-unknown | FileCheck %s
 
 // CHECK: %struct.X = type { i32 }
 
diff --git a/clang/test/Frontend/opencl.cl b/clang/test/Frontend/opencl.cl
index 505a02e2e9159..8a1903b494970 100644
--- a/clang/test/Frontend/opencl.cl
+++ b/clang/test/Frontend/opencl.cl
@@ -11,6 +11,7 @@
 // RUN: %clang_cc1 -cl-std=CL1.2 -cl-strict-aliasing -fblocks %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID-OPENCL-VERSION12 %s
 // RUN: %clang_cc1 -cl-std=CL2.0 -cl-strict-aliasing %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID-OPENCL-VERSION20 %s
 // RUN: %clang_cc1 -cl-std=clc++1.0 -cl-strict-aliasing -fblocks %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID-OPENCLCPP-VERSION10 %s
+// RUN: %clang_cc1 %s -verify -fsyntax-only -cl-std=CL3.0 -cl-ext=-__opencl_c_device_enqueue -DSYNTAX
 
 #ifdef SYNTAX
 class test{
@@ -25,7 +26,7 @@ int member;
 typedef int (^bl_t)(void);
 #if defined(__OPENCL_C_VERSION__) || defined(__OPENCL_CPP_VERSION__)
 #if !defined(BLOCKS) && (defined(__OPENCL_CPP_VERSION__)  || __OPENCL_C_VERSION__ != CL_VERSION_2_0)
-  // expected-error@-3{{blocks support disabled - compile with -fblocks or for OpenCL 2.0}}
+  // expected-error@-3{{blocks support disabled - compile with -fblocks or for OpenCL C 2.0 or OpenCL C 3.0 with __opencl_c_device_enqueue feature}}
 #endif
 #else
   // expected-error@-6{{blocks support disabled - compile with -fblocks or pick a deployment target that supports them}}
diff --git a/clang/test/Misc/opencl-c-3.0.incorrect_options.cl b/clang/test/Misc/opencl-c-3.0.incorrect_options.cl
index b3f3ab974ddd0..38d786662c748 100644
--- a/clang/test/Misc/opencl-c-3.0.incorrect_options.cl
+++ b/clang/test/Misc/opencl-c-3.0.incorrect_options.cl
@@ -1,21 +1,33 @@
-// RUN: not %clang_cc1 -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=-__opencl_c_fp64,+cl_khr_fp64 %s 2>&1 | FileCheck -check-prefix=CHECK-FP64 %s
-// RUN: not %clang_cc1 -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=+__opencl_c_fp64,-cl_khr_fp64 %s 2>&1 | FileCheck -check-prefix=CHECK-FP64 %s
-// RUN: not %clang_cc1 -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=-__opencl_c_fp64,+cl_khr_fp64 %s 2>&1 | FileCheck -check-prefix=CHECK-FP64 %s
-// RUN: not %clang_cc1 -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=+__opencl_c_fp64,-cl_khr_fp64 %s 2>&1 | FileCheck -check-prefix=CHECK-FP64 %s
-// RUN: not %clang_cc1 -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=+__opencl_c_read_write_images,-__opencl_c_images %s 2>&1 | FileCheck -check-prefix=CHECK-READ-WRITE-IMAGES %s
-// RUN: not %clang_cc1 -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=+__opencl_c_read_write_images,-__opencl_c_images %s 2>&1 | FileCheck -check-prefix=CHECK-READ-WRITE-IMAGES %s
-// RUN: not %clang_cc1 -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=+__opencl_c_pipes,-__opencl_c_generic_address_space %s 2>&1 | FileCheck -check-prefix=CHECK-PIPES %s
-// RUN: not %clang_cc1 -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=+__opencl_c_pipes,-__opencl_c_generic_address_space %s 2>&1 | FileCheck -check-prefix=CHECK-PIPES %s
-// RUN: not %clang_cc1 -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=+__opencl_c_3d_image_writes,+__opencl_c_images,-cl_khr_3d_image_writes %s 2>&1 | FileCheck -check-prefix=CHECK-3D-WRITE-IMAGES-DIFF %s
-// RUN: not %clang_cc1 -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=+__opencl_c_3d_image_writes,-__opencl_c_images %s 2>&1 | FileCheck -check-prefix=CHECK-3D-WRITE-IMAGES-DEPS %s
-// RUN: not %clang_cc1 -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=+__opencl_c_3d_image_writes,+__opencl_c_images,-cl_khr_3d_image_writes %s 2>&1 | FileCheck -check-prefix=CHECK-3D-WRITE-IMAGES-DIFF %s
-// RUN: not %clang_cc1 -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=+__opencl_c_3d_image_writes,-__opencl_c_images %s 2>&1 | FileCheck -check-prefix=CHECK-3D-WRITE-IMAGES-DEPS %s
-
-// CHECK-FP64: error: options cl_khr_fp64 and __opencl_c_fp64 are set to different values
-
-// CHECK-READ-WRITE-IMAGES: error: feature __opencl_c_read_write_images requires support of __opencl_c_images feature
+// RUN: not %clang_cc1 -verify -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=-__opencl_c_fp64,+cl_khr_fp64 %s 2>&1 | FileCheck -check-prefix=CHECK-FP64 %s
+// RUN: not %clang_cc1 -verify -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=+__opencl_c_fp64,-cl_khr_fp64 %s 2>&1 | FileCheck -check-prefix=CHECK-FP64 %s
+// RUN: not %clang_cc1 -verify -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=-__opencl_c_fp64,+cl_khr_fp64 %s 2>&1 | FileCheck -check-prefix=CHECK-FP64 %s
+// RUN: not %clang_cc1 -verify -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=+__opencl_c_fp64,-cl_khr_fp64 %s 2>&1 | FileCheck -check-prefix=CHECK-FP64 %s
+// RUN: not %clang_cc1 -verify -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=+__opencl_c_read_write_images,-__opencl_c_images %s 2>&1 | FileCheck -check-prefix=CHECK-READ-WRITE-IMAGES %s
+// RUN: not %clang_cc1 -verify -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=+__opencl_c_read_write_images,-__opencl_c_images %s 2>&1 | FileCheck -check-prefix=CHECK-READ-WRITE-IMAGES %s
+// RUN: not %clang_cc1 -verify -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=+__opencl_c_pipes,-__opencl_c_generic_address_space %s 2>&1 | FileCheck -check-prefix=CHECK-PIPES %s
+// RUN: not %clang_cc1 -verify -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=+__opencl_c_pipes,-__opencl_c_generic_address_space %s 2>&1 | FileCheck -check-prefix=CHECK-PIPES %s
+// RUN: not %clang_cc1 -verify -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=+__opencl_c_3d_image_writes,+__opencl_c_images,-cl_khr_3d_image_writes %s 2>&1 | FileCheck -check-prefix=CHECK-3D-WRITE-IMAGES-DIFF %s
+// RUN: not %clang_cc1 -verify -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=+__opencl_c_3d_image_writes,-__opencl_c_images %s 2>&1 | FileCheck -check-prefix=CHECK-3D-WRITE-IMAGES-DEPS %s
+
+// RUN: not %clang_cc1 -verify -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=+__opencl_c_3d_image_writes,+__opencl_c_images,-cl_khr_3d_image_writes %s 2>&1 | FileCheck -check-prefix=CHECK-3D-WRITE-IMAGES-DIFF %s
+// RUN: not %clang_cc1 -verify -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=+__opencl_c_3d_image_writes,-__opencl_c_images %s 2>&1 | FileCheck -check-prefix=CHECK-3D-WRITE-IMAGES-DEPS %s
+
+// RUN: not %clang_cc1 -verify -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=+__opencl_c_device_enqueue,-__opencl_c_generic_address_space,-__opencl_c_pipes,+__opencl_c_program_scope_global_variables %s 2>&1 | FileCheck -check-prefix=CHECK-DEVICE-ENQUEUE-GENERIC %s
+// RUN: not %clang_cc1 -verify -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=+__opencl_c_device_enqueue,-__opencl_c_generic_address_space,-__opencl_c_pipes,+__opencl_c_program_scope_global_variables %s 2>&1 | FileCheck -check-prefix=CHECK-DEVICE-ENQUEUE-GENERIC %s
+// RUN: not %clang_cc1 -verify -cl-std=CL3.0 -triple spir-unknown-unknown -cl-ext=+__opencl_c_device_enqueue,+__opencl_c_generic_address_space,-__opencl_c_program_scope_global_variables %s 2>&1 | FileCheck -check-prefix=CHECK-DEVICE-ENQUEUE-PSV %s
+// RUN: not %clang_cc1 -verify -cl-std=clc++2021 -triple spir-unknown-unknown -cl-ext=+__opencl_c_device_enqueue,+__opencl_c_generic_address_space,-__opencl_c_program_scope_global_variables %s 2>&1 | FileCheck -check-prefix=CHECK-DEVICE-ENQUEUE-PSV %s
+
+
+// CHECK-FP64: options cl_khr_fp64 and __opencl_c_fp64 are set to different values
+
+// CHECK-READ-WRITE-IMAGES: feature __opencl_c_read_write_images requires support of __opencl_c_images feature
 
 // CHECK-3D-WRITE-IMAGES-DIFF: options cl_khr_3d_image_writes and __opencl_c_3d_image_writes are set to different values
-// CHECK-3D-WRITE-IMAGES-DEPS: error: feature __opencl_c_3d_image_writes requires support of __opencl_c_images feature
+// CHECK-3D-WRITE-IMAGES-DEPS: feature __opencl_c_3d_image_writes requires support of __opencl_c_images feature
+
+// CHECK-PIPES: feature __opencl_c_pipes requires support of __opencl_c_generic_address_space feature
+
+// CHECK-DEVICE-ENQUEUE-GENERIC: feature __opencl_c_device_enqueue requires support of __opencl_c_generic_address_space feature
+// CHECK-DEVICE-ENQUEUE-PSV: feature __opencl_c_device_enqueue requires support of __opencl_c_program_scope_global_variables feature
 
-// CHECK-PIPES: error: feature __opencl_c_pipes requires support of __opencl_c_generic_address_space feature
+// expected-no-diagnostics
diff --git a/clang/test/SemaOpenCL/invalid-block.cl b/clang/test/SemaOpenCL/invalid-block.cl
index 0fb2e26920736..dfa0726e13154 100644
--- a/clang/test/SemaOpenCL/invalid-block.cl
+++ b/clang/test/SemaOpenCL/invalid-block.cl
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -verify -fblocks -cl-std=CL2.0 %s
-
+// RUN: %clang_cc1 -verify -fblocks -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_device_enqueue,+__opencl_c_generic_address_space,+__opencl_c_program_scope_global_variables  %s
 // OpenCL v2.0 s6.12.5
 void f0(int (^const bl)()); // expected-error{{declaring function parameter of type 'int (__generic ^const __private)(void)' is not allowed}}
 // All blocks declarations must be const qualified and initialized.
diff --git a/clang/test/SemaOpenCL/invalid-device-enqueue-types-cl3.0.cl b/clang/test/SemaOpenCL/invalid-device-enqueue-types-cl3.0.cl
new file mode 100644
index 0000000000000..688f2cc98c1fc
--- /dev/null
+++ b/clang/test/SemaOpenCL/invalid-device-enqueue-types-cl3.0.cl
@@ -0,0 +1,13 @@
+// RUN: %clang_cc1 -verify -fblocks -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_device_enqueue,+__opencl_c_generic_address_space,+__opencl_c_program_scope_global_variables %s
+// RUN: %clang_cc1 -verify -fblocks -cl-std=CL3.0 -cl-ext=-__opencl_c_device_enqueue %s
+
+void f() {
+  clk_event_t e;
+  queue_t q;
+#ifndef __opencl_c_device_enqueue
+// expected-error@-3 {{use of undeclared identifier 'clk_event_t'}}
+// expected-error@-3 {{use of undeclared identifier 'queue_t'}}
+#else
+// expected-no-diagnostics
+#endif
+}
diff --git a/clang/test/SemaOpenCL/invalid-pipes-cl1.2.cl b/clang/test/SemaOpenCL/invalid-pipes-cl1.2.cl
index 707205c558ea6..a6920e963e990 100644
--- a/clang/test/SemaOpenCL/invalid-pipes-cl1.2.cl
+++ b/clang/test/SemaOpenCL/invalid-pipes-cl1.2.cl
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2
-// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=-__opencl_c_pipes,-__opencl_c_generic_address_space
-// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++2021 -cl-ext=-__opencl_c_pipes,-__opencl_c_generic_address_space
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=-all
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++2021 -cl-ext=-all
 
 void foo(read_only pipe int p);
 #if __OPENCL_C_VERSION__ > 120
diff --git a/clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl b/clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl
index d9f2ec0e43008..0493bc61c51ec 100644
--- a/clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl
+++ b/clang/test/SemaOpenCL/invalid-pipes-cl2.0.cl
@@ -1,9 +1,9 @@
 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL2.0
 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space,+__opencl_c_program_scope_global_variables
-// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space,-__opencl_c_program_scope_global_variables
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space,-__opencl_c_program_scope_global_variables,-__opencl_c_device_enqueue
 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++1.0
 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++2021 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space,+__opencl_c_program_scope_global_variables
-// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++2021 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space,-__opencl_c_program_scope_global_variables
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++2021 -cl-ext=+__opencl_c_pipes,+__opencl_c_generic_address_space,-__opencl_c_program_scope_global_variables,-__opencl_c_device_enqueue
 
 global pipe int gp;            // expected-error {{type '__global read_only pipe int' can only be used as a function parameter in OpenCL}}
 global reserve_id_t rid;          // expected-error {{the '__global reserve_id_t' type cannot be used to declare a program scope variable}}
diff --git a/clang/test/SemaOpenCL/storageclass.cl b/clang/test/SemaOpenCL/storageclass.cl
index 030d9c5ba54fb..827c4fa839935 100644
--- a/clang/test/SemaOpenCL/storageclass.cl
+++ b/clang/test/SemaOpenCL/storageclass.cl
@@ -1,12 +1,12 @@
 // RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL1.2
-// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=-__opencl_c_program_scope_global_variables,-__opencl_c_generic_address_space,-__opencl_c_pipes
-// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_program_scope_global_variables,-__opencl_c_generic_address_space,-__opencl_c_pipes
-// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=-__opencl_c_program_scope_global_variables,+__opencl_c_generic_address_space
-// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=+__opencl_c_program_scope_global_variables,+__opencl_c_generic_address_space
-// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++2021 -cl-ext=-__opencl_c_program_scope_global_variables,-__opencl_c_generic_address_space,-__opencl_c_pipes
-// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++2021 -cl-ext=+__opencl_c_program_scope_global_variables,-__opencl_c_generic_address_space,-__opencl_c_pipes
-// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++2021 -cl-ext=-__opencl_c_program_scope_global_variables,+__opencl_c_generic_address_space
-// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++2021 -cl-ext=+__opencl_c_program_scope_global_variables,+__opencl_c_generic_address_space
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=-all
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_program_scope_global_variables
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_generic_address_space
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=CL3.0 -cl-ext=-all,+__opencl_c_program_scope_global_variables,+__opencl_c_generic_address_space
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++2021 -cl-ext=-all
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++2021 -cl-ext=-all,+__opencl_c_program_scope_global_variables
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++2021 -cl-ext=-all,+__opencl_c_generic_address_space
+// RUN: %clang_cc1 %s -verify -pedantic -fsyntax-only -cl-std=clc++2021 -cl-ext=-all,+__opencl_c_program_scope_global_variables,+__opencl_c_generic_address_space
 static constant int G1 = 0;
 constant int G2 = 0;
 
diff --git a/clang/test/SemaOpenCLCXX/remove-address-space.clcpp b/clang/test/SemaOpenCLCXX/remove-address-space.clcpp
index e6b2924eab5f8..07b252da55740 100644
--- a/clang/test/SemaOpenCLCXX/remove-address-space.clcpp
+++ b/clang/test/SemaOpenCLCXX/remove-address-space.clcpp
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 %s -cl-std=clc++1.0 -triple spir-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -verify
 // RUN: %clang_cc1 %s -cl-std=clc++2021 -triple spir-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -verify
-// RUN: %clang_cc1 %s -cl-std=clc++2021 -cl-ext=-__opencl_c_generic_address_space,-__opencl_c_pipes -triple spir-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -verify
+// RUN: %clang_cc1 %s -cl-std=clc++2021 -cl-ext=-__opencl_c_device_enqueue,-__opencl_c_generic_address_space,-__opencl_c_pipes -triple spir-unknown-unknown -fdeclare-opencl-builtins -finclude-default-header -verify
 
 // expected-no-diagnostics
 

From d839afe3f9a04288f20698c3a28c096a1822f3a1 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 27 Jan 2022 12:26:32 +0100
Subject: [PATCH 893/946] [InstCombine] Avoid pointer element type access in
 PointerReplacer

This code replaces the address space of the pointers while keeping
the element type. Use the appropriate helpers to make this work
with opaque pointers.
---
 .../InstCombine/InstCombineLoadStoreAlloca.cpp      | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index dad5c2ca6a50c..756792918dba5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -301,16 +301,17 @@ void PointerReplacer::replace(Instruction *I) {
     assert(V && "Operand not replaced");
     SmallVector<Value *, 8> Indices;
     Indices.append(GEP->idx_begin(), GEP->idx_end());
-    auto *NewI = GetElementPtrInst::Create(
-        V->getType()->getPointerElementType(), V, Indices);
+    auto *NewI =
+        GetElementPtrInst::Create(GEP->getSourceElementType(), V, Indices);
     IC.InsertNewInstWith(NewI, *GEP);
     NewI->takeName(GEP);
     WorkMap[GEP] = NewI;
   } else if (auto *BC = dyn_cast<BitCastInst>(I)) {
     auto *V = getReplacement(BC->getOperand(0));
     assert(V && "Operand not replaced");
-    auto *NewT = PointerType::get(BC->getType()->getPointerElementType(),
-                                  V->getType()->getPointerAddressSpace());
+    auto *NewT = PointerType::getWithSamePointeeType(
+        cast<PointerType>(BC->getType()),
+        V->getType()->getPointerAddressSpace());
     auto *NewI = new BitCastInst(V, NewT);
     IC.InsertNewInstWith(NewI, *BC);
     NewI->takeName(BC);
@@ -345,9 +346,7 @@ void PointerReplacer::replacePointer(Instruction &I, Value *V) {
 #ifndef NDEBUG
   auto *PT = cast<PointerType>(I.getType());
   auto *NT = cast<PointerType>(V->getType());
-  assert(PT != NT &&
-         PT->getPointerElementType() == NT->getPointerElementType() &&
-         "Invalid usage");
+  assert(PT != NT && PT->hasSameElementTypeAs(NT) && "Invalid usage");
 #endif
   WorkMap[&I] = V;
 

From 4d9f6ab3054bdbe07982333b31974b357586acca Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 27 Jan 2022 12:36:25 +0100
Subject: [PATCH 894/946] [IR] Handle opaque pointers in PtrToArgument mangling

It appears that this mangling type is currently unused. Make it
compatible with opaque pointers in case it becomes used again...
---
 llvm/lib/IR/Function.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 72d8f9e1547f6..1e874d7afa79b 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -1575,7 +1575,7 @@ static bool matchIntrinsicType(
       Type * ReferenceType = ArgTys[D.getArgumentNumber()];
       PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
       return (!ThisArgType ||
-              ThisArgType->getPointerElementType() != ReferenceType);
+              !ThisArgType->isOpaqueOrPointeeTypeMatches(ReferenceType));
     }
     case IITDescriptor::PtrToElt: {
       if (D.getArgumentNumber() >= ArgTys.size())

From 417a75c6d094379914ccd1249488b5a331492985 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Thu, 27 Jan 2022 11:01:56 +0000
Subject: [PATCH 895/946] [AArch64][SVE] Avoid using ptrue for ptest in
 VECREDUCE_OR.

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D118145
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  7 +++-
 .../CodeGen/AArch64/sve-int-pred-reduce.ll    | 36 +++++++------------
 .../AArch64/sve-split-int-pred-reduce.ll      |  6 ++--
 3 files changed, 21 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 676ee1b189143..b395231c69a75 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19364,7 +19364,12 @@ SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
   default:
     return SDValue();
   case ISD::VECREDUCE_OR:
-    return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
+    if (isAllActivePredicate(Pg))
+      // The predicate can be 'Op' because
+      // vecreduce_or(Op & <all true>) <=> vecreduce_or(Op).
+      return getPTest(DAG, VT, Op, Op, AArch64CC::ANY_ACTIVE);
+    else
+      return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
   case ISD::VECREDUCE_AND: {
     Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
     return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
diff --git a/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll
index 9900e5d54558b..56265fa3e6194 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-pred-reduce.ll
@@ -56,8 +56,7 @@ define i1 @reduce_and_nxv2i1(<vscale x 2 x i1> %vec) {
 define i1 @reduce_or_nxv16i1(<vscale x 16 x i1> %vec) {
 ; CHECK-LABEL: reduce_or_nxv16i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.or.i1.nxv16i1(<vscale x 16 x i1> %vec)
@@ -67,8 +66,7 @@ define i1 @reduce_or_nxv16i1(<vscale x 16 x i1> %vec) {
 define i1 @reduce_or_nxv8i1(<vscale x 8 x i1> %vec) {
 ; CHECK-LABEL: reduce_or_nxv8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.or.i1.nxv8i1(<vscale x 8 x i1> %vec)
@@ -78,8 +76,7 @@ define i1 @reduce_or_nxv8i1(<vscale x 8 x i1> %vec) {
 define i1 @reduce_or_nxv4i1(<vscale x 4 x i1> %vec) {
 ; CHECK-LABEL: reduce_or_nxv4i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.or.i1.nxv4i1(<vscale x 4 x i1> %vec)
@@ -89,8 +86,7 @@ define i1 @reduce_or_nxv4i1(<vscale x 4 x i1> %vec) {
 define i1 @reduce_or_nxv2i1(<vscale x 2 x i1> %vec) {
 ; CHECK-LABEL: reduce_or_nxv2i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.or.i1.nxv2i1(<vscale x 2 x i1> %vec)
@@ -198,8 +194,7 @@ define i1 @reduce_smax_nxv2i1(<vscale x 2 x i1> %vec) {
 define i1 @reduce_smin_nxv16i1(<vscale x 16 x i1> %vec) {
 ; CHECK-LABEL: reduce_smin_nxv16i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.smin.i1.nxv16i1(<vscale x 16 x i1> %vec)
@@ -209,8 +204,7 @@ define i1 @reduce_smin_nxv16i1(<vscale x 16 x i1> %vec) {
 define i1 @reduce_smin_nxv8i1(<vscale x 8 x i1> %vec) {
 ; CHECK-LABEL: reduce_smin_nxv8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.smin.i1.nxv8i1(<vscale x 8 x i1> %vec)
@@ -220,8 +214,7 @@ define i1 @reduce_smin_nxv8i1(<vscale x 8 x i1> %vec) {
 define i1 @reduce_smin_nxv4i1(<vscale x 4 x i1> %vec) {
 ; CHECK-LABEL: reduce_smin_nxv4i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.smin.i1.nxv4i1(<vscale x 4 x i1> %vec)
@@ -231,8 +224,7 @@ define i1 @reduce_smin_nxv4i1(<vscale x 4 x i1> %vec) {
 define i1 @reduce_smin_nxv2i1(<vscale x 2 x i1> %vec) {
 ; CHECK-LABEL: reduce_smin_nxv2i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.smin.i1.nxv2i1(<vscale x 2 x i1> %vec)
@@ -244,8 +236,7 @@ define i1 @reduce_smin_nxv2i1(<vscale x 2 x i1> %vec) {
 define i1 @reduce_umax_nxv16i1(<vscale x 16 x i1> %vec) {
 ; CHECK-LABEL: reduce_umax_nxv16i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.b
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.umax.i1.nxv16i1(<vscale x 16 x i1> %vec)
@@ -255,8 +246,7 @@ define i1 @reduce_umax_nxv16i1(<vscale x 16 x i1> %vec) {
 define i1 @reduce_umax_nxv8i1(<vscale x 8 x i1> %vec) {
 ; CHECK-LABEL: reduce_umax_nxv8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.umax.i1.nxv8i1(<vscale x 8 x i1> %vec)
@@ -266,8 +256,7 @@ define i1 @reduce_umax_nxv8i1(<vscale x 8 x i1> %vec) {
 define i1 @reduce_umax_nxv4i1(<vscale x 4 x i1> %vec) {
 ; CHECK-LABEL: reduce_umax_nxv4i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.umax.i1.nxv4i1(<vscale x 4 x i1> %vec)
@@ -277,8 +266,7 @@ define i1 @reduce_umax_nxv4i1(<vscale x 4 x i1> %vec) {
 define i1 @reduce_umax_nxv2i1(<vscale x 2 x i1> %vec) {
 ; CHECK-LABEL: reduce_umax_nxv2i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    ptest p1, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.umax.i1.nxv2i1(<vscale x 2 x i1> %vec)
diff --git a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
index 9d6a3a4b3d96f..2afcaf7cedd80 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
@@ -46,7 +46,7 @@ define i1 @orv_nxv32i1(<vscale x 32 x i1> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p2.b
 ; CHECK-NEXT:    orr p0.b, p2/z, p0.b, p1.b
-; CHECK-NEXT:    ptest p2, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.or.nxv32i1(<vscale x 32 x i1> %a)
@@ -89,7 +89,7 @@ define i1 @sminv_nxv32i1(<vscale x 32 x i1> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p2.b
 ; CHECK-NEXT:    orr p0.b, p2/z, p0.b, p1.b
-; CHECK-NEXT:    ptest p2, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.smin.nxv32i1(<vscale x 32 x i1> %a)
@@ -103,7 +103,7 @@ define i1 @umaxv_nxv32i1(<vscale x 32 x i1> %a) {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p2.b
 ; CHECK-NEXT:    orr p0.b, p2/z, p0.b, p1.b
-; CHECK-NEXT:    ptest p2, p0.b
+; CHECK-NEXT:    ptest p0, p0.b
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
   %res = call i1 @llvm.vector.reduce.umax.nxv32i1(<vscale x 32 x i1> %a)

From b2f50042590577b25184ab0f7d3461d167e6bd9e Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Thu, 27 Jan 2022 20:45:06 +0900
Subject: [PATCH 896/946] Revert "[mlir][bufferize] Insert memref.cast ops
 during finalizing pass"

This reverts commit 1043107ce5e2dee38f6a9bf459549a75f78a83b2.

This commit caused a breakage in `finalizing-bufferize.mlir`.
---
 .../Dialect/Bufferization/IR/Bufferization.h  | 12 ------
 .../Bufferization/IR/BufferizationOps.cpp     |  8 +---
 .../Bufferization/Transforms/Bufferize.cpp    |  1 -
 .../Transforms/finalizing-bufferize.mlir      | 40 ++-----------------
 4 files changed, 4 insertions(+), 57 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
index 2a1edf8d9e099..2cbfc901f239b 100644
--- a/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
+++ b/mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h
@@ -20,18 +20,6 @@
 
 #include "mlir/Dialect/Bufferization/IR/BufferizationOpsDialect.h.inc"
 
-namespace mlir {
-class RewritePatternSet;
-class MLIRContext;
-
-namespace bufferization {
-/// Populate patterns for folding to_memref and to_tensor ops.
-/// Note: to_memref(to_tensor(x)) without type changes are handled by a folder.
-void populateBufferizationOpFoldingPatterns(RewritePatternSet &patterns,
-                                            MLIRContext *context);
-} // namespace bufferization
-} // namespace mlir
-
 //===----------------------------------------------------------------------===//
 // Bufferization Dialect Operations
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
index 28a8e5d1ea982..f1ec7bbdead24 100644
--- a/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
+++ b/mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp
@@ -240,8 +240,7 @@ static LogicalResult foldToMemrefToTensorPair(RewriterBase &rewriter,
       if (resultType.getShape()[i] != ShapedType::kDynamicSize)
         continue;
       auto index = rewriter.createOrFold<arith::ConstantIndexOp>(loc, i);
-      Value size =
-          rewriter.create<memref::DimOp>(loc, memrefToTensor.memref(), index);
+      Value size = rewriter.create<tensor::DimOp>(loc, memrefToTensor, index);
       dynamicOperands.push_back(size);
     }
     // TODO: Use alloc/memcpy callback from BufferizationOptions if called via
@@ -310,11 +309,6 @@ void ToMemrefOp::getCanonicalizationPatterns(RewritePatternSet &results,
       context);
 }
 
-void bufferization::populateBufferizationOpFoldingPatterns(
-    RewritePatternSet &patterns, MLIRContext *context) {
-  patterns.add<TensorLoadToMemref>(context);
-}
-
 LogicalResult ToMemrefOp::bufferize(RewriterBase &rewriter,
                                     const BufferizationState &state) {
   // Fold to_memref(to_tensor(x)) to x. Insert a cast if necessary.
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
index d31c3532509e7..2b2d7cceeabb4 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp
@@ -92,7 +92,6 @@ void mlir::bufferization::populateEliminateBufferizeMaterializationsPatterns(
     BufferizeTypeConverter &typeConverter, RewritePatternSet &patterns) {
   patterns.add<BufferizeToTensorOp, BufferizeToMemrefOp>(typeConverter,
                                                          patterns.getContext());
-  populateBufferizationOpFoldingPatterns(patterns, patterns.getContext());
 }
 
 namespace {
diff --git a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir
index 66e1ccfe5c763..fac685ae7e725 100644
--- a/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir
+++ b/mlir/test/Dialect/Bufferization/Transforms/finalizing-bufferize.mlir
@@ -1,11 +1,11 @@
 // RUN: mlir-opt %s -finalizing-bufferize -split-input-file -verify-diagnostics | FileCheck %s
 
-// CHECK-LABEL: func @eliminate_materializations(
-//  CHECK-SAME:     %[[ARG:.*]]: memref<f32>) -> memref<f32> {
+// CHECK-LABEL:   func @eliminate_materializations(
+// CHECK-SAME:                                     %[[ARG:.*]]: memref<f32>) -> memref<f32> {
+// CHECK:           return %[[ARG]] : memref<f32>
 func @eliminate_materializations(%arg0: memref<f32>) -> memref<f32> {
   %0 = bufferization.to_tensor %arg0 : memref<f32>
   %1 = bufferization.to_memref %0 : memref<f32>
-  // CHECK: return %[[ARG]] : memref<f32>
   return %1 : memref<f32>
 }
 
@@ -26,37 +26,3 @@ func @unable_to_convert_lone_tensor_load(%arg0: memref<f32>) {
   "test.sink"(%0) : (tensor<f32>) -> ()
   return
 }
-
-// -----
-
-// CHECK: #[[$MAP1:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-#map1 = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-
-// CHECK-LABEL: func @insert_memref_cast(
-//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32>
-func @insert_memref_cast(%arg0: memref<?xf32>) -> memref<?xf32, #map1> {
-  %0 = bufferization.to_tensor %arg0 : memref<?xf32>
-  %1 = bufferization.to_memref %0 : memref<?xf32, #map1>
-  // CHECK: %[[r:.*]] = memref.cast %[[arg0]] : memref<?xf32> to memref<?xf32, #[[$MAP1]]>
-  // CHECK: return %[[r]]
-  return %1 : memref<?xf32, #map1>
-}
-
-// -----
-
-// CHECK: #[[$MAP2:.*]] = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-#map2 = affine_map<(d0)[s0, s1] -> (d0 * s1 + s0)>
-
-// CHECK-LABEL: func @insert_buffer_copy(
-//  CHECK-SAME:     %[[arg0:.*]]: memref<?xf32, #[[$MAP2]]>
-func @insert_buffer_copy(%arg0: memref<?xf32, #map2>) -> memref<?xf32> {
-  // CHECK: %[[c0:.*]] = arith.constant 0 : index
-  // CHECK: %[[dim0:.*]] = memref.dim %[[arg0]], %[[c0]]
-  // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim0]]) : memref<?xf32>
-  // CHECK: memref.copy %[[arg0]], %[[alloc]]
-  %0 = bufferization.to_tensor %arg0 : memref<?xf32, #map2>
-  %1 = bufferization.to_memref %0 : memref<?xf32>
-
-  // CHECK: return %[[alloc]]
-  return %1 : memref<?xf32>
-}

From 659bf6d08c00993b777a96041ffaa98243464598 Mon Sep 17 00:00:00 2001
From: Shezan Baig <sbaig1@bloomberg.net>
Date: Thu, 27 Jan 2022 13:57:38 +0200
Subject: [PATCH 897/946] [Support] [Windows] Don't cancel delete if we failed
 to set delete

Following up on commit 177176f75c6fa3f624d6d964b9d340ce39511565, if we
failed to setDeleteDisposition(true) during TempFile creation, then
don't try to setDeleteDisposition(false) during TempFile::keep, since it
will likely fail as well.

Instead of letting TempFile::keep just fail, we should let it go ahead
and try renaming the file.

This fixes an issue we are seeing when running clang-cl.exe through the
Incredibuild distributed build system.  We're seeing that renaming
temporary object files would fail here:
https://github.com/llvm/llvm-project/blob/5c1f7b296ac0dddeca02891976e6ab5cfc006719/clang/lib/Frontend/CompilerInstance.cpp#L789

Reviewed By: mstorsjo

Differential Revision: https://reviews.llvm.org/D118212
---
 llvm/lib/Support/Path.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Support/Path.cpp b/llvm/lib/Support/Path.cpp
index 2f80cdba7e344..63d8d4ee46484 100644
--- a/llvm/lib/Support/Path.cpp
+++ b/llvm/lib/Support/Path.cpp
@@ -1254,7 +1254,8 @@ Error TempFile::keep(const Twine &Name) {
 #ifdef _WIN32
   // If we can't cancel the delete don't rename.
   auto H = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
-  std::error_code RenameEC = setDeleteDisposition(H, false);
+  std::error_code RenameEC =
+      RemoveOnClose ? std::error_code() : setDeleteDisposition(H, false);
   bool ShouldDelete = false;
   if (!RenameEC) {
     RenameEC = rename_handle(H, Name);

From 97916673d402c1d4915e6d49386a9d7739aca002 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 27 Jan 2022 12:46:47 +0100
Subject: [PATCH 898/946] [IR] Support ifuncs in opaque pointer mode

Relax the type assertion for opaque pointers, and enumerate the
value type in TypeFinder and ValueEnumerator.
---
 llvm/lib/AsmParser/LLParser.cpp               | 23 ++++++++++---------
 llvm/lib/Bitcode/Writer/ValueEnumerator.cpp   |  4 +++-
 llvm/lib/IR/TypeFinder.cpp                    |  4 ++++
 .../test/Assembler/opaque-ptr-struct-types.ll | 18 +++++++++------
 llvm/test/Assembler/opaque-ptr.ll             |  3 +++
 llvm/test/Other/force-opaque-ptrs.ll          |  3 +++
 6 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index d93d41cd08467..432ec151cf8ae 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -985,17 +985,18 @@ bool LLParser::parseAliasOrIFunc(const std::string &Name, LocTy NameLoc,
     return error(AliaseeLoc, "An alias or ifunc must have pointer type");
   unsigned AddrSpace = PTy->getAddressSpace();
 
-  if (IsAlias && !PTy->isOpaqueOrPointeeTypeMatches(Ty)) {
-    return error(
-        ExplicitTypeLoc,
-        typeComparisonErrorMessage(
-            "explicit pointee type doesn't match operand's pointee type", Ty,
-            PTy->getNonOpaquePointerElementType()));
-  }
-
-  if (!IsAlias && !PTy->getPointerElementType()->isFunctionTy()) {
-    return error(ExplicitTypeLoc,
-                 "explicit pointee type should be a function type");
+  if (IsAlias) {
+    if (!PTy->isOpaqueOrPointeeTypeMatches(Ty))
+      return error(
+          ExplicitTypeLoc,
+          typeComparisonErrorMessage(
+              "explicit pointee type doesn't match operand's pointee type", Ty,
+              PTy->getNonOpaquePointerElementType()));
+  } else {
+    if (!PTy->isOpaque() &&
+        !PTy->getNonOpaquePointerElementType()->isFunctionTy())
+      return error(ExplicitTypeLoc,
+                   "explicit pointee type should be a function type");
   }
 
   GlobalValue *GVal = nullptr;
diff --git a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
index df4f1a1873d7b..01f7e85bd60ed 100644
--- a/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -386,8 +386,10 @@ ValueEnumerator::ValueEnumerator(const Module &M,
   }
 
   // Enumerate the ifuncs.
-  for (const GlobalIFunc &GIF : M.ifuncs())
+  for (const GlobalIFunc &GIF : M.ifuncs()) {
     EnumerateValue(&GIF);
+    EnumerateType(GIF.getValueType());
+  }
 
   // Remember what is the cutoff between globalvalue's and other constants.
   unsigned FirstConstant = Values.size();
diff --git a/llvm/lib/IR/TypeFinder.cpp b/llvm/lib/IR/TypeFinder.cpp
index 248c33b7b8679..904af7e737ccf 100644
--- a/llvm/lib/IR/TypeFinder.cpp
+++ b/llvm/lib/IR/TypeFinder.cpp
@@ -48,6 +48,10 @@ void TypeFinder::run(const Module &M, bool onlyNamed) {
       incorporateValue(Aliasee);
   }
 
+  // Get types from ifuncs.
+  for (const auto &GI : M.ifuncs())
+    incorporateType(GI.getValueType());
+
   // Get types from functions.
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDForInst;
   for (const Function &FI : M) {
diff --git a/llvm/test/Assembler/opaque-ptr-struct-types.ll b/llvm/test/Assembler/opaque-ptr-struct-types.ll
index f9aaa5a609b55..19d79ad3c820d 100644
--- a/llvm/test/Assembler/opaque-ptr-struct-types.ll
+++ b/llvm/test/Assembler/opaque-ptr-struct-types.ll
@@ -1,12 +1,13 @@
 ; RUN: opt -S -opaque-pointers < %s | opt -S -opaque-pointers | FileCheck %s
 
-; CHECK: %T1 = type { i8 }
-; CHECK: %T2 = type { i8 }
-; CHECK: %T3 = type { i8 }
-; CHECK: %T4 = type { i8 }
-; CHECK: %T5 = type { i8 }
-; CHECK: %T6 = type { i8 }
-; CHECK: %T7 = type { i8 }
+; CHECK-DAG: %T1 = type { i8 }
+; CHECK-DAG: %T2 = type { i8 }
+; CHECK-DAG: %T3 = type { i8 }
+; CHECK-DAG: %T4 = type { i8 }
+; CHECK-DAG: %T5 = type { i8 }
+; CHECK-DAG: %T6 = type { i8 }
+; CHECK-DAG: %T7 = type { i8 }
+; CHECK-DAG: %T8 = type { i8 }
 
 %T1 = type { i8 }
 %T2 = type { i8 }
@@ -15,9 +16,12 @@
 %T5 = type { i8 }
 %T6 = type { i8 }
 %T7 = type { i8 }
+%T8 = type { i8 }
 
 @g = external global %T1
 
+@g.ifunc = ifunc %T8 (), ptr @f
+
 define %T2 @f(ptr %p) {
   alloca %T3
   getelementptr %T4, ptr %p, i64 1
diff --git a/llvm/test/Assembler/opaque-ptr.ll b/llvm/test/Assembler/opaque-ptr.ll
index 37544ac17e0c1..ad292f880c56d 100644
--- a/llvm/test/Assembler/opaque-ptr.ll
+++ b/llvm/test/Assembler/opaque-ptr.ll
@@ -11,6 +11,9 @@
 @fptr2 = external global ptr () addrspace(1)*
 @fptr3 = external global ptr () addrspace(1)* addrspace(2)*
 
+; CHECK: @ifunc = ifunc void (), ptr @f
+@ifunc = ifunc void (), ptr @f
+
 ; CHECK: define ptr @f(ptr %a) {
 ; CHECK:     %b = bitcast ptr %a to ptr
 ; CHECK:     ret ptr %b
diff --git a/llvm/test/Other/force-opaque-ptrs.ll b/llvm/test/Other/force-opaque-ptrs.ll
index cfd0a7609fbb6..d963c7072e207 100644
--- a/llvm/test/Other/force-opaque-ptrs.ll
+++ b/llvm/test/Other/force-opaque-ptrs.ll
@@ -19,6 +19,9 @@
 ; CHECK: @ga2 = alias i19, ptr @g2
 @ga2 = alias i19, i19* bitcast (i18* @g2 to i19*)
 
+; CHECK: @gi = ifunc i20 (), ptr @f
+@gi = ifunc i20 (), i20 ()* ()* bitcast (void (i32*)* @f to i20 ()* ()*)
+
 define void @f(i32* %p) {
 ; CHECK-LABEL: define {{[^@]+}}@f
 ; CHECK-SAME: (ptr [[P:%.*]]) {

From c4e68953f6443b9854ce3ec1dc8756789beafcc1 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Wed, 26 Jan 2022 17:55:52 +0100
Subject: [PATCH 899/946] [clangd][Hover] Suppress initializers with many
 tokens

This results in excessive memory usage and eats a lot of screen estate.
Especially in the cases with lots of nested macro calls.

This patch tries to remedy it before the release cut by suppressing the
initializers. For better UX we should probably update the expression printer to
truncate those (behind some policy).

Fixes https://github.com/clangd/clangd/issues/917

Differential Revision: https://reviews.llvm.org/D118260
---
 clang-tools-extra/clangd/Hover.cpp            | 19 +++++++++++++++----
 .../clangd/unittests/HoverTests.cpp           | 14 ++++++++++++++
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clangd/Hover.cpp b/clang-tools-extra/clangd/Hover.cpp
index c5cb5cd3df53a..d1d8142f53cb6 100644
--- a/clang-tools-extra/clangd/Hover.cpp
+++ b/clang-tools-extra/clangd/Hover.cpp
@@ -126,7 +126,17 @@ std::string getNamespaceScope(const Decl *D) {
   return "";
 }
 
-std::string printDefinition(const Decl *D, const PrintingPolicy &PP) {
+std::string printDefinition(const Decl *D, PrintingPolicy PP,
+                            const syntax::TokenBuffer &TB) {
+  if (auto *VD = llvm::dyn_cast<VarDecl>(D)) {
+    if (auto *IE = VD->getInit()) {
+      // Initializers might be huge and result in lots of memory allocations in
+      // some catostrophic cases. Such long lists are not useful in hover cards
+      // anyway.
+      if (200 < TB.expandedTokens(IE->getSourceRange()).size())
+        PP.SuppressInitializers = true;
+    }
+  }
   std::string Definition;
   llvm::raw_string_ostream OS(Definition);
   D->print(OS, PP);
@@ -568,7 +578,8 @@ std::string synthesizeDocumentation(const NamedDecl *ND) {
 
 /// Generate a \p Hover object given the declaration \p D.
 HoverInfo getHoverContents(const NamedDecl *D, const PrintingPolicy &PP,
-                           const SymbolIndex *Index) {
+                           const SymbolIndex *Index,
+                           const syntax::TokenBuffer &TB) {
   HoverInfo HI;
   const ASTContext &Ctx = D->getASTContext();
 
@@ -630,7 +641,7 @@ HoverInfo getHoverContents(const NamedDecl *D, const PrintingPolicy &PP,
       HI.Value = toString(ECD->getInitVal(), 10);
   }
 
-  HI.Definition = printDefinition(D, PP);
+  HI.Definition = printDefinition(D, PP, TB);
   return HI;
 }
 
@@ -1029,7 +1040,7 @@ llvm::Optional<HoverInfo> getHover(ParsedAST &AST, Position Pos,
       auto Decls = explicitReferenceTargets(N->ASTNode, DeclRelation::Alias,
                                             AST.getHeuristicResolver());
       if (!Decls.empty()) {
-        HI = getHoverContents(Decls.front(), PP, Index);
+        HI = getHoverContents(Decls.front(), PP, Index, TB);
         // Layout info only shown when hovering on the field/class itself.
         if (Decls.front() == N->ASTNode.get<Decl>())
           addLayoutInfo(*Decls.front(), *HI);
diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp
index 6f5f6ba7b2669..b0974d99f0ac4 100644
--- a/clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -3172,6 +3172,20 @@ TEST(Hover, DisableShowAKA) {
   EXPECT_EQ(H->Type, HoverInfo::PrintedType("m_int"));
 }
 
+TEST(Hover, HideBigInitializers) {
+  Annotations T(R"cpp(
+  #define A(x) x, x, x, x
+  #define B(x) A(A(A(A(x))))
+  int a^rr[] = {B(0)};
+  )cpp");
+
+  TestTU TU = TestTU::withCode(T.code());
+  auto AST = TU.build();
+  auto H = getHover(AST, T.point(), format::getLLVMStyle(), nullptr);
+
+  ASSERT_TRUE(H);
+  EXPECT_EQ(H->Definition, "int arr[]");
+}
 } // namespace
 } // namespace clangd
 } // namespace clang

From e74d14603f3c1407f3ab34aed0cbd805681c6b4c Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Thu, 27 Jan 2022 19:05:23 +0700
Subject: [PATCH 900/946] [Test] Add data layout to relevant tests + some
 wide-typed tests

---
 llvm/test/Transforms/InstCombine/icmp-vec.ll  |  2 ++
 .../InstCombine/reduction-and-sext-zext-i1.ll | 24 ++++++++++++++++++
 .../InstCombine/reduction-or-sext-zext-i1.ll  | 25 +++++++++++++++++++
 3 files changed, 51 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/icmp-vec.ll b/llvm/test/Transforms/InstCombine/icmp-vec.ll
index df4cfc7214ab4..8668ad454505a 100644
--- a/llvm/test/Transforms/InstCombine/icmp-vec.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-vec.ll
@@ -6,6 +6,8 @@
 ; Normal types are ConstantDataVectors. Test the constant values adjacent to the
 ; min/max values that we're not allowed to transform.
 
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
 define <2 x i1> @sge(<2 x i8> %x) {
 ; CHECK-LABEL: @sge(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <2 x i8> [[X:%.*]], <i8 -128, i8 126>
diff --git a/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll b/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll
index e167ec4488ccd..40af5709796a0 100644
--- a/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll
+++ b/llvm/test/Transforms/InstCombine/reduction-and-sext-zext-i1.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
 define i1 @reduce_and_self(<8 x i1> %x) {
 ; CHECK-LABEL: @reduce_and_self(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i1> [[X:%.*]] to i8
@@ -117,6 +119,28 @@ bb:
   ret i1 %all_eq
 }
 
+define i1 @reduce_and_pointer_cast_wide(i8* %arg, i8* %arg1) {
+; CHECK-LABEL: @reduce_and_pointer_cast_wide(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i16>*
+; CHECK-NEXT:    [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i16>*
+; CHECK-NEXT:    [[LHS:%.*]] = load <8 x i16>, <8 x i16>* [[PTR1]], align 16
+; CHECK-NEXT:    [[RHS:%.*]] = load <8 x i16>, <8 x i16>* [[PTR2]], align 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i16> [[LHS]], [[RHS]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+bb:
+  %ptr1 = bitcast i8* %arg1 to <8 x i16>*
+  %ptr2 = bitcast i8* %arg to <8 x i16>*
+  %lhs = load <8 x i16>, <8 x i16>* %ptr1
+  %rhs = load <8 x i16>, <8 x i16>* %ptr2
+  %cmp = icmp eq <8 x i16> %lhs, %rhs
+  %all_eq = call i1 @llvm.vector.reduce.and.v8i32(<8 x i1> %cmp)
+  ret i1 %all_eq
+}
+
 declare i1 @llvm.vector.reduce.and.v8i32(<8 x i1> %a)
 declare i32 @llvm.vector.reduce.and.v4i32(<4 x i32> %a)
 declare i64 @llvm.vector.reduce.and.v8i64(<8 x i64> %a)
diff --git a/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll b/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll
index a4b39ec7b66c7..6f5b34e02c793 100644
--- a/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll
+++ b/llvm/test/Transforms/InstCombine/reduction-or-sext-zext-i1.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
 define i1 @reduce_or_self(<8 x i1> %x) {
 ; CHECK-LABEL: @reduce_or_self(
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i1> [[X:%.*]] to i8
@@ -118,6 +120,29 @@ bb:
   ret i1 %all_eq
 }
 
+define i1 @reduce_or_pointer_cast_wide(i8* %arg, i8* %arg1) {
+; CHECK-LABEL: @reduce_or_pointer_cast_wide(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[PTR1:%.*]] = bitcast i8* [[ARG1:%.*]] to <8 x i16>*
+; CHECK-NEXT:    [[PTR2:%.*]] = bitcast i8* [[ARG:%.*]] to <8 x i16>*
+; CHECK-NEXT:    [[LHS:%.*]] = load <8 x i16>, <8 x i16>* [[PTR1]], align 16
+; CHECK-NEXT:    [[RHS:%.*]] = load <8 x i16>, <8 x i16>* [[PTR2]], align 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i16> [[LHS]], [[RHS]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i1> [[CMP]] to i8
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i8 [[TMP0]], 0
+; CHECK-NEXT:    ret i1 [[DOTNOT]]
+;
+bb:
+  %ptr1 = bitcast i8* %arg1 to <8 x i16>*
+  %ptr2 = bitcast i8* %arg to <8 x i16>*
+  %lhs = load <8 x i16>, <8 x i16>* %ptr1
+  %rhs = load <8 x i16>, <8 x i16>* %ptr2
+  %cmp = icmp ne <8 x i16> %lhs, %rhs
+  %any_ne = call i1 @llvm.vector.reduce.or.v8i32(<8 x i1> %cmp)
+  %all_eq = xor i1 %any_ne, 1
+  ret i1 %all_eq
+}
+
 declare i1 @llvm.vector.reduce.or.v8i32(<8 x i1> %a)
 declare i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %a)
 declare i64 @llvm.vector.reduce.or.v8i64(<8 x i64> %a)

From c5d2b97a69921fb4deff875eb8892a569707f737 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Thu, 27 Jan 2022 12:10:48 +0000
Subject: [PATCH 901/946] [AMDGPU] Precommit test for swizzled store aliasing
 two loads

---
 llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir | 34 +++++++++++++++++-----
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
index 38239e0315e1e..f0296d8544ed5 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
@@ -790,10 +790,30 @@ body:             |
     %1:sgpr_32 = COPY $sgpr1
     %2:sgpr_32 = COPY $sgpr2
     %3:sgpr_32 = COPY $sgpr3
+    %4:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+    %5:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+    %6:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 12, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+    %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+...
+---
+
+
+# GFX9-LABEL: name: gfx9_tbuffer_load_merge_across_swizzled_store
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+# GFX9: TBUFFER_STORE_FORMAT_X_OFFSET_exact %4, %5, 0, 6, 116, 0, 0, 1, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+name: gfx9_tbuffer_load_merge_across_swizzled_store
+body:             |
+  bb.0.entry:
+    %0:sgpr_32 = COPY $sgpr0
+    %1:sgpr_32 = COPY $sgpr1
+    %2:sgpr_32 = COPY $sgpr2
+    %3:sgpr_32 = COPY $sgpr3
+    %4:vgpr_32 = COPY $vgpr0
     %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
-    %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
-    %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
-    %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+    %6:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+    TBUFFER_STORE_FORMAT_X_OFFSET_exact %4:vgpr_32, %5:sgpr_128, 0, 6, 116, 0, 0, 1, implicit $exec :: (dereferenceable store (s32), align 1, addrspace 4)
+    %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
 ...
 ---
 
@@ -1588,9 +1608,9 @@ body:             |
     %1:sgpr_32 = COPY $sgpr1
     %2:sgpr_32 = COPY $sgpr2
     %3:sgpr_32 = COPY $sgpr3
-    %5:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
-    %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
-    %8:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 12, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
-    %9:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %5:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+    %4:sgpr_128 = REG_SEQUENCE %0:sgpr_32, %subreg.sub0, %1:sgpr_32, %subreg.sub1, %2:sgpr_32, %subreg.sub2, %3:sgpr_32, %subreg.sub3
+    %5:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+    %6:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 12, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+    %7:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4:sgpr_128, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
 ...
 ---

From 3053e143bef273a2e65519cea15be8f7ce6cd854 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@moritz.systems>
Date: Wed, 19 Jan 2022 20:16:07 +0100
Subject: [PATCH 902/946] [lldb] [Platform] Support synthesizing siginfo_t

Support synthesizing the siginfo_t type from the Platform plugin.
This type is going to be used by LLDB client to process the raw siginfo
data received from lldb-server without the necessity of relying
on target's debug info being present.

Differential Revision: https://reviews.llvm.org/D117707
---
 lldb/include/lldb/Target/Platform.h           |   2 +
 .../Platform/FreeBSD/PlatformFreeBSD.cpp      |  94 ++++++
 .../Platform/FreeBSD/PlatformFreeBSD.h        |   6 +
 .../Plugins/Platform/Linux/PlatformLinux.cpp  | 166 ++++++++++
 .../Plugins/Platform/Linux/PlatformLinux.h    |   6 +
 .../Platform/NetBSD/PlatformNetBSD.cpp        | 141 ++++++++
 .../Plugins/Platform/NetBSD/PlatformNetBSD.h  |   6 +
 lldb/source/Target/Platform.cpp               |   4 +
 lldb/unittests/Platform/CMakeLists.txt        |   4 +
 .../Platform/PlatformSiginfoTest.cpp          | 311 ++++++++++++++++++
 .../Platform/tools/generate_siginfo.c         | 112 +++++++
 11 files changed, 852 insertions(+)
 create mode 100644 lldb/unittests/Platform/PlatformSiginfoTest.cpp
 create mode 100644 lldb/unittests/Platform/tools/generate_siginfo.c

diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h
index d50badb1e6878..4ef6c9d82b1a3 100644
--- a/lldb/include/lldb/Target/Platform.h
+++ b/lldb/include/lldb/Target/Platform.h
@@ -863,6 +863,8 @@ class Platform : public PluginInterface {
     return nullptr;
   }
 
+  virtual CompilerType GetSiginfoType(const llvm::Triple &triple);
+
 protected:
   /// Create a list of ArchSpecs with the given OS and a architectures. The
   /// vendor field is left as an "unspecified unknown".
diff --git a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp
index 754d06de7cb92..2bf0a44e4a3e0 100644
--- a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp
+++ b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.cpp
@@ -183,3 +183,97 @@ MmapArgList PlatformFreeBSD::GetMmapArgumentList(const ArchSpec &arch,
     args.push_back(0);
   return args;
 }
+
+CompilerType PlatformFreeBSD::GetSiginfoType(const llvm::Triple &triple) {
+  if (!m_type_system_up)
+    m_type_system_up.reset(new TypeSystemClang("siginfo", triple));
+  TypeSystemClang *ast = m_type_system_up.get();
+
+  // generic types
+  CompilerType int_type = ast->GetBasicType(eBasicTypeInt);
+  CompilerType uint_type = ast->GetBasicType(eBasicTypeUnsignedInt);
+  CompilerType long_type = ast->GetBasicType(eBasicTypeLong);
+  CompilerType voidp_type = ast->GetBasicType(eBasicTypeVoid).GetPointerType();
+
+  // platform-specific types
+  CompilerType &pid_type = int_type;
+  CompilerType &uid_type = uint_type;
+
+  CompilerType sigval_type = ast->CreateRecordType(
+      nullptr, OptionalClangModuleID(), lldb::eAccessPublic, "__lldb_sigval_t",
+      clang::TTK_Union, lldb::eLanguageTypeC);
+  ast->StartTagDeclarationDefinition(sigval_type);
+  ast->AddFieldToRecordType(sigval_type, "sival_int", int_type,
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(sigval_type, "sival_ptr", voidp_type,
+                            lldb::eAccessPublic, 0);
+  ast->CompleteTagDeclarationDefinition(sigval_type);
+
+  // siginfo_t
+  CompilerType siginfo_type = ast->CreateRecordType(
+      nullptr, OptionalClangModuleID(), lldb::eAccessPublic, "__lldb_siginfo_t",
+      clang::TTK_Struct, lldb::eLanguageTypeC);
+  ast->StartTagDeclarationDefinition(siginfo_type);
+  ast->AddFieldToRecordType(siginfo_type, "si_signo", int_type,
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(siginfo_type, "si_errno", int_type,
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(siginfo_type, "si_code", int_type,
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(siginfo_type, "si_pid", pid_type,
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(siginfo_type, "si_uid", uid_type,
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(siginfo_type, "si_status", int_type,
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(siginfo_type, "si_addr", voidp_type,
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(siginfo_type, "si_value", sigval_type,
+                            lldb::eAccessPublic, 0);
+
+  // union used to hold the signal data
+  CompilerType union_type = ast->CreateRecordType(
+      nullptr, OptionalClangModuleID(), lldb::eAccessPublic, "",
+      clang::TTK_Union, lldb::eLanguageTypeC);
+  ast->StartTagDeclarationDefinition(union_type);
+
+  ast->AddFieldToRecordType(
+      union_type, "_fault",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"_trapno", int_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->AddFieldToRecordType(
+      union_type, "_timer",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"_timerid", int_type},
+                                         {"_overrun", int_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->AddFieldToRecordType(
+      union_type, "_mesgq",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"_mqd", int_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->AddFieldToRecordType(
+      union_type, "_poll",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"_band", long_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->CompleteTagDeclarationDefinition(union_type);
+  ast->AddFieldToRecordType(siginfo_type, "_reason", union_type,
+                            lldb::eAccessPublic, 0);
+
+  ast->CompleteTagDeclarationDefinition(siginfo_type);
+  return siginfo_type;
+}
diff --git a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h
index fd37b13de0177..7f9dfd87a59a0 100644
--- a/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h
+++ b/lldb/source/Plugins/Platform/FreeBSD/PlatformFreeBSD.h
@@ -10,6 +10,7 @@
 #define LLDB_SOURCE_PLUGINS_PLATFORM_FREEBSD_PLATFORMFREEBSD_H
 
 #include "Plugins/Platform/POSIX/PlatformPOSIX.h"
+#include "Plugins/TypeSystem/Clang/TypeSystemClang.h"
 
 namespace lldb_private {
 namespace platform_freebsd {
@@ -53,7 +54,12 @@ class PlatformFreeBSD : public PlatformPOSIX {
                                   unsigned flags, lldb::addr_t fd,
                                   lldb::addr_t offset) override;
 
+  CompilerType GetSiginfoType(const llvm::Triple &triple) override;
+
   std::vector<ArchSpec> m_supported_architectures;
+
+private:
+  std::unique_ptr<TypeSystemClang> m_type_system_up;
 };
 
 } // namespace platform_freebsd
diff --git a/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp b/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp
index 6278d51289d76..92f44b7c6b50f 100644
--- a/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp
+++ b/lldb/source/Plugins/Platform/Linux/PlatformLinux.cpp
@@ -309,3 +309,169 @@ MmapArgList PlatformLinux::GetMmapArgumentList(const ArchSpec &arch,
   return args;
 }
 
+CompilerType PlatformLinux::GetSiginfoType(const llvm::Triple &triple) {
+  if (!m_type_system_up)
+    m_type_system_up.reset(new TypeSystemClang("siginfo", triple));
+  TypeSystemClang *ast = m_type_system_up.get();
+
+  bool si_errno_then_code = true;
+
+  switch (triple.getArch()) {
+  case llvm::Triple::mips:
+  case llvm::Triple::mipsel:
+  case llvm::Triple::mips64:
+  case llvm::Triple::mips64el:
+    // mips has si_code and si_errno swapped
+    si_errno_then_code = false;
+    break;
+  default:
+    break;
+  }
+
+  // generic types
+  CompilerType int_type = ast->GetBasicType(eBasicTypeInt);
+  CompilerType uint_type = ast->GetBasicType(eBasicTypeUnsignedInt);
+  CompilerType short_type = ast->GetBasicType(eBasicTypeShort);
+  CompilerType long_type = ast->GetBasicType(eBasicTypeLong);
+  CompilerType voidp_type = ast->GetBasicType(eBasicTypeVoid).GetPointerType();
+
+  // platform-specific types
+  CompilerType &pid_type = int_type;
+  CompilerType &uid_type = uint_type;
+  CompilerType &clock_type = long_type;
+  CompilerType &band_type = long_type;
+
+  CompilerType sigval_type = ast->CreateRecordType(
+      nullptr, OptionalClangModuleID(), lldb::eAccessPublic, "__lldb_sigval_t",
+      clang::TTK_Union, lldb::eLanguageTypeC);
+  ast->StartTagDeclarationDefinition(sigval_type);
+  ast->AddFieldToRecordType(sigval_type, "sival_int", int_type,
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(sigval_type, "sival_ptr", voidp_type,
+                            lldb::eAccessPublic, 0);
+  ast->CompleteTagDeclarationDefinition(sigval_type);
+
+  CompilerType sigfault_bounds_type = ast->CreateRecordType(
+      nullptr, OptionalClangModuleID(), lldb::eAccessPublic, "",
+      clang::TTK_Union, lldb::eLanguageTypeC);
+  ast->StartTagDeclarationDefinition(sigfault_bounds_type);
+  ast->AddFieldToRecordType(sigfault_bounds_type, "_addr_bnd",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"_lower", voidp_type},
+                                         {"_upper", voidp_type},
+                                     }),
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(sigfault_bounds_type, "_pkey", uint_type,
+                            lldb::eAccessPublic, 0);
+  ast->CompleteTagDeclarationDefinition(sigfault_bounds_type);
+
+  // siginfo_t
+  CompilerType siginfo_type = ast->CreateRecordType(
+      nullptr, OptionalClangModuleID(), lldb::eAccessPublic, "__lldb_siginfo_t",
+      clang::TTK_Struct, lldb::eLanguageTypeC);
+  ast->StartTagDeclarationDefinition(siginfo_type);
+  ast->AddFieldToRecordType(siginfo_type, "si_signo", int_type,
+                            lldb::eAccessPublic, 0);
+
+  if (si_errno_then_code) {
+    ast->AddFieldToRecordType(siginfo_type, "si_errno", int_type,
+                              lldb::eAccessPublic, 0);
+    ast->AddFieldToRecordType(siginfo_type, "si_code", int_type,
+                              lldb::eAccessPublic, 0);
+  } else {
+    ast->AddFieldToRecordType(siginfo_type, "si_code", int_type,
+                              lldb::eAccessPublic, 0);
+    ast->AddFieldToRecordType(siginfo_type, "si_errno", int_type,
+                              lldb::eAccessPublic, 0);
+  }
+
+  // the structure is padded on 64-bit arches to fix alignment
+  if (triple.isArch64Bit())
+    ast->AddFieldToRecordType(siginfo_type, "__pad0", int_type,
+                              lldb::eAccessPublic, 0);
+
+  // union used to hold the signal data
+  CompilerType union_type = ast->CreateRecordType(
+      nullptr, OptionalClangModuleID(), lldb::eAccessPublic, "",
+      clang::TTK_Union, lldb::eLanguageTypeC);
+  ast->StartTagDeclarationDefinition(union_type);
+
+  ast->AddFieldToRecordType(
+      union_type, "_kill",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"si_pid", pid_type},
+                                         {"si_uid", uid_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->AddFieldToRecordType(
+      union_type, "_timer",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"si_tid", int_type},
+                                         {"si_overrun", int_type},
+                                         {"si_sigval", sigval_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->AddFieldToRecordType(
+      union_type, "_rt",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"si_pid", pid_type},
+                                         {"si_uid", uid_type},
+                                         {"si_sigval", sigval_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->AddFieldToRecordType(
+      union_type, "_sigchld",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"si_pid", pid_type},
+                                         {"si_uid", uid_type},
+                                         {"si_status", int_type},
+                                         {"si_utime", clock_type},
+                                         {"si_stime", clock_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->AddFieldToRecordType(
+      union_type, "_sigfault",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"si_addr", voidp_type},
+                                         {"si_addr_lsb", short_type},
+                                         {"_bounds", sigfault_bounds_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->AddFieldToRecordType(
+      union_type, "_sigpoll",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"si_band", band_type},
+                                         {"si_fd", int_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  // NB: SIGSYS is not present on ia64 but we don't seem to support that
+  ast->AddFieldToRecordType(
+      union_type, "_sigsys",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"_call_addr", voidp_type},
+                                         {"_syscall", int_type},
+                                         {"_arch", uint_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->CompleteTagDeclarationDefinition(union_type);
+  ast->AddFieldToRecordType(siginfo_type, "_sifields", union_type,
+                            lldb::eAccessPublic, 0);
+
+  ast->CompleteTagDeclarationDefinition(siginfo_type);
+  return siginfo_type;
+}
diff --git a/lldb/source/Plugins/Platform/Linux/PlatformLinux.h b/lldb/source/Plugins/Platform/Linux/PlatformLinux.h
index 13224901cd926..87d0926e3a8ed 100644
--- a/lldb/source/Plugins/Platform/Linux/PlatformLinux.h
+++ b/lldb/source/Plugins/Platform/Linux/PlatformLinux.h
@@ -10,6 +10,7 @@
 #define LLDB_SOURCE_PLUGINS_PLATFORM_LINUX_PLATFORMLINUX_H
 
 #include "Plugins/Platform/POSIX/PlatformPOSIX.h"
+#include "Plugins/TypeSystem/Clang/TypeSystemClang.h"
 
 namespace lldb_private {
 namespace platform_linux {
@@ -58,7 +59,12 @@ class PlatformLinux : public PlatformPOSIX {
                                   unsigned flags, lldb::addr_t fd,
                                   lldb::addr_t offset) override;
 
+  CompilerType GetSiginfoType(const llvm::Triple &triple) override;
+
   std::vector<ArchSpec> m_supported_architectures;
+
+private:
+  std::unique_ptr<TypeSystemClang> m_type_system_up;
 };
 
 } // namespace platform_linux
diff --git a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp
index 552b3890615c7..ddba64bc5d111 100644
--- a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp
+++ b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.cpp
@@ -202,3 +202,144 @@ MmapArgList PlatformNetBSD::GetMmapArgumentList(const ArchSpec &arch,
   MmapArgList args({addr, length, prot, flags_platform, fd, offset});
   return args;
 }
+
+CompilerType PlatformNetBSD::GetSiginfoType(const llvm::Triple &triple) {
+  if (!m_type_system_up)
+    m_type_system_up.reset(new TypeSystemClang("siginfo", triple));
+  TypeSystemClang *ast = m_type_system_up.get();
+
+  // generic types
+  CompilerType int_type = ast->GetBasicType(eBasicTypeInt);
+  CompilerType uint_type = ast->GetBasicType(eBasicTypeUnsignedInt);
+  CompilerType long_type = ast->GetBasicType(eBasicTypeLong);
+  CompilerType long_long_type = ast->GetBasicType(eBasicTypeLongLong);
+  CompilerType voidp_type = ast->GetBasicType(eBasicTypeVoid).GetPointerType();
+
+  // platform-specific types
+  CompilerType &pid_type = int_type;
+  CompilerType &uid_type = uint_type;
+  CompilerType &clock_type = uint_type;
+  CompilerType &lwpid_type = int_type;
+
+  CompilerType sigval_type = ast->CreateRecordType(
+      nullptr, OptionalClangModuleID(), lldb::eAccessPublic, "__lldb_sigval_t",
+      clang::TTK_Union, lldb::eLanguageTypeC);
+  ast->StartTagDeclarationDefinition(sigval_type);
+  ast->AddFieldToRecordType(sigval_type, "sival_int", int_type,
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(sigval_type, "sival_ptr", voidp_type,
+                            lldb::eAccessPublic, 0);
+  ast->CompleteTagDeclarationDefinition(sigval_type);
+
+  CompilerType ptrace_option_type = ast->CreateRecordType(
+      nullptr, OptionalClangModuleID(), lldb::eAccessPublic, "",
+      clang::TTK_Union, lldb::eLanguageTypeC);
+  ast->StartTagDeclarationDefinition(ptrace_option_type);
+  ast->AddFieldToRecordType(ptrace_option_type, "_pe_other_pid", pid_type,
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(ptrace_option_type, "_pe_lwp", lwpid_type,
+                            lldb::eAccessPublic, 0);
+  ast->CompleteTagDeclarationDefinition(ptrace_option_type);
+
+  // siginfo_t
+  CompilerType siginfo_type = ast->CreateRecordType(
+      nullptr, OptionalClangModuleID(), lldb::eAccessPublic, "__lldb_siginfo_t",
+      clang::TTK_Union, lldb::eLanguageTypeC);
+  ast->StartTagDeclarationDefinition(siginfo_type);
+
+  // struct _ksiginfo
+  CompilerType ksiginfo_type = ast->CreateRecordType(
+      nullptr, OptionalClangModuleID(), lldb::eAccessPublic, "",
+      clang::TTK_Struct, lldb::eLanguageTypeC);
+  ast->StartTagDeclarationDefinition(ksiginfo_type);
+  ast->AddFieldToRecordType(ksiginfo_type, "_signo", int_type,
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(ksiginfo_type, "_code", int_type,
+                            lldb::eAccessPublic, 0);
+  ast->AddFieldToRecordType(ksiginfo_type, "_errno", int_type,
+                            lldb::eAccessPublic, 0);
+
+  // the structure is padded on 64-bit arches to fix alignment
+  if (triple.isArch64Bit())
+    ast->AddFieldToRecordType(ksiginfo_type, "__pad0", int_type,
+                              lldb::eAccessPublic, 0);
+
+  // union used to hold the signal data
+  CompilerType union_type = ast->CreateRecordType(
+      nullptr, OptionalClangModuleID(), lldb::eAccessPublic, "",
+      clang::TTK_Union, lldb::eLanguageTypeC);
+  ast->StartTagDeclarationDefinition(union_type);
+
+  ast->AddFieldToRecordType(
+      union_type, "_rt",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"_pid", pid_type},
+                                         {"_uid", uid_type},
+                                         {"_value", sigval_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->AddFieldToRecordType(
+      union_type, "_child",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"_pid", pid_type},
+                                         {"_uid", uid_type},
+                                         {"_status", int_type},
+                                         {"_utime", clock_type},
+                                         {"_stime", clock_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->AddFieldToRecordType(
+      union_type, "_fault",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"_addr", voidp_type},
+                                         {"_trap", int_type},
+                                         {"_trap2", int_type},
+                                         {"_trap3", int_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->AddFieldToRecordType(
+      union_type, "_poll",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"_band", long_type},
+                                         {"_fd", int_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->AddFieldToRecordType(union_type, "_syscall",
+                            ast->CreateStructForIdentifier(
+                                ConstString(),
+                                {
+                                    {"_sysnum", int_type},
+                                    {"_retval", int_type.GetArrayType(2)},
+                                    {"_error", int_type},
+                                    {"_args", long_long_type.GetArrayType(8)},
+                                }),
+                            lldb::eAccessPublic, 0);
+
+  ast->AddFieldToRecordType(
+      union_type, "_ptrace_state",
+      ast->CreateStructForIdentifier(ConstString(),
+                                     {
+                                         {"_pe_report_event", int_type},
+                                         {"_option", ptrace_option_type},
+                                     }),
+      lldb::eAccessPublic, 0);
+
+  ast->CompleteTagDeclarationDefinition(union_type);
+  ast->AddFieldToRecordType(ksiginfo_type, "_reason", union_type,
+                            lldb::eAccessPublic, 0);
+
+  ast->CompleteTagDeclarationDefinition(ksiginfo_type);
+  ast->AddFieldToRecordType(siginfo_type, "_info", ksiginfo_type,
+                            lldb::eAccessPublic, 0);
+
+  ast->CompleteTagDeclarationDefinition(siginfo_type);
+  return siginfo_type;
+}
diff --git a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h
index 7158fbd26efbb..433cf66531262 100644
--- a/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h
+++ b/lldb/source/Plugins/Platform/NetBSD/PlatformNetBSD.h
@@ -10,6 +10,7 @@
 #define LLDB_SOURCE_PLUGINS_PLATFORM_NETBSD_PLATFORMNETBSD_H
 
 #include "Plugins/Platform/POSIX/PlatformPOSIX.h"
+#include "Plugins/TypeSystem/Clang/TypeSystemClang.h"
 
 namespace lldb_private {
 namespace platform_netbsd {
@@ -55,7 +56,12 @@ class PlatformNetBSD : public PlatformPOSIX {
                                   unsigned flags, lldb::addr_t fd,
                                   lldb::addr_t offset) override;
 
+  CompilerType GetSiginfoType(const llvm::Triple &triple) override;
+
   std::vector<ArchSpec> m_supported_architectures;
+
+private:
+  std::unique_ptr<TypeSystemClang> m_type_system_up;
 };
 
 } // namespace platform_netbsd
diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp
index 00e1c16032275..3c331c8760dfa 100644
--- a/lldb/source/Target/Platform.cpp
+++ b/lldb/source/Target/Platform.cpp
@@ -2003,3 +2003,7 @@ size_t Platform::GetSoftwareBreakpointTrapOpcode(Target &target,
 
   return 0;
 }
+
+CompilerType Platform::GetSiginfoType(const llvm::Triple& triple) {
+  return CompilerType();
+}
diff --git a/lldb/unittests/Platform/CMakeLists.txt b/lldb/unittests/Platform/CMakeLists.txt
index ca5031b9b43e0..f63ac3b3a2808 100644
--- a/lldb/unittests/Platform/CMakeLists.txt
+++ b/lldb/unittests/Platform/CMakeLists.txt
@@ -1,9 +1,13 @@
 add_lldb_unittest(LLDBPlatformTests
   PlatformAppleSimulatorTest.cpp
   PlatformDarwinTest.cpp
+  PlatformSiginfoTest.cpp
 
   LINK_LIBS
+    lldbPluginPlatformFreeBSD
+    lldbPluginPlatformLinux
     lldbPluginPlatformMacOSX
+    lldbPluginPlatformNetBSD
   LINK_COMPONENTS
     Support
   )
diff --git a/lldb/unittests/Platform/PlatformSiginfoTest.cpp b/lldb/unittests/Platform/PlatformSiginfoTest.cpp
new file mode 100644
index 0000000000000..e8a5a88f34c42
--- /dev/null
+++ b/lldb/unittests/Platform/PlatformSiginfoTest.cpp
@@ -0,0 +1,311 @@
+//===-- PlatformSiginfoTest.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+
+#include <initializer_list>
+#include <tuple>
+
+#include "Plugins/Platform/FreeBSD/PlatformFreeBSD.h"
+#include "Plugins/Platform/Linux/PlatformLinux.h"
+#include "Plugins/Platform/NetBSD/PlatformNetBSD.h"
+#include "Plugins/TypeSystem/Clang/TypeSystemClang.h"
+
+#include "TestingSupport/SubsystemRAII.h"
+#include "lldb/Core/Debugger.h"
+#include "lldb/Host/FileSystem.h"
+#include "lldb/Host/HostInfo.h"
+#include "lldb/Utility/ArchSpec.h"
+#include "lldb/Utility/Reproducer.h"
+
+using namespace lldb;
+using namespace lldb_private;
+using namespace lldb_private::repro;
+
+namespace {
+class PlatformSiginfoTest : public ::testing::Test {
+  SubsystemRAII<FileSystem, HostInfo, TypeSystemClang> subsystems;
+  PlatformSP platform_sp;
+  DebuggerSP debugger_sp;
+  TargetSP target_sp;
+
+public:
+  CompilerType siginfo_type;
+
+  void SetUp() override {
+    llvm::cantFail(Reproducer::Initialize(ReproducerMode::Off, llvm::None));
+    platform_freebsd::PlatformFreeBSD::Initialize();
+    platform_linux::PlatformLinux::Initialize();
+    platform_netbsd::PlatformNetBSD::Initialize();
+  }
+
+  void TearDown() override {
+    platform_netbsd::PlatformNetBSD::Terminate();
+    platform_linux::PlatformLinux::Terminate();
+    platform_freebsd::PlatformFreeBSD::Terminate();
+    Reproducer::Terminate();
+  }
+
+  typedef std::tuple<const char *, uint64_t, uint64_t> field_tuple;
+
+  void ExpectField(const CompilerType &siginfo_type, field_tuple field) {
+    const char *path;
+    uint64_t offset, size;
+    std::tie(path, offset, size) = field;
+
+    SCOPED_TRACE(path);
+    CompilerType field_type = siginfo_type;
+    uint64_t total_offset = 0;
+    for (auto field_name : llvm::split(path, '.')) {
+      uint64_t bit_offset;
+      ASSERT_NE(field_type.GetIndexOfFieldWithName(field_name.str().c_str(),
+                                                   &field_type, &bit_offset),
+                UINT32_MAX);
+      total_offset += bit_offset;
+    }
+
+    EXPECT_EQ(total_offset, offset * 8);
+    EXPECT_EQ(field_type.GetByteSize(nullptr), llvm::Optional<uint64_t>(size));
+  }
+
+  void ExpectFields(const CompilerType &container,
+                    std::initializer_list<field_tuple> fields) {
+    for (auto x : fields)
+      ExpectField(container, x);
+  }
+
+  void InitializeSiginfo(const std::string &triple) {
+    ArchSpec arch(triple);
+
+    switch (arch.GetTriple().getOS()) {
+    case llvm::Triple::FreeBSD:
+      platform_sp =
+          platform_freebsd::PlatformFreeBSD::CreateInstance(true, &arch);
+      break;
+    case llvm::Triple::Linux:
+      platform_sp = platform_linux::PlatformLinux::CreateInstance(true, &arch);
+      break;
+    case llvm::Triple::NetBSD:
+      platform_sp =
+          platform_netbsd::PlatformNetBSD::CreateInstance(true, &arch);
+      break;
+    default:
+      llvm_unreachable("unknown ostype in triple");
+    }
+    Platform::SetHostPlatform(platform_sp);
+
+    debugger_sp = Debugger::CreateInstance();
+    ASSERT_TRUE(debugger_sp);
+
+    debugger_sp->GetTargetList().CreateTarget(
+        *debugger_sp, "", arch, eLoadDependentsNo, platform_sp, target_sp);
+    ASSERT_TRUE(target_sp);
+
+    siginfo_type = platform_sp->GetSiginfoType(arch.GetTriple());
+  }
+};
+
+} // namespace
+
+TEST_F(PlatformSiginfoTest, TestLinux_64bit) {
+  for (std::string arch : {"x86_64", "aarch64", "powerpc64le"}) {
+    SCOPED_TRACE(arch);
+    InitializeSiginfo(arch + "-pc-linux-gnu");
+    ASSERT_TRUE(siginfo_type);
+
+    ExpectFields(siginfo_type,
+                 {
+                     {"si_signo", 0, 4},
+                     {"si_errno", 4, 4},
+                     {"si_code", 8, 4},
+                     {"_sifields._kill.si_pid", 16, 4},
+                     {"_sifields._kill.si_uid", 20, 4},
+                     {"_sifields._timer.si_tid", 16, 4},
+                     {"_sifields._timer.si_overrun", 20, 4},
+                     {"_sifields._timer.si_sigval", 24, 8},
+                     {"_sifields._rt.si_pid", 16, 4},
+                     {"_sifields._rt.si_uid", 20, 4},
+                     {"_sifields._rt.si_sigval", 24, 8},
+                     {"_sifields._sigchld.si_pid", 16, 4},
+                     {"_sifields._sigchld.si_uid", 20, 4},
+                     {"_sifields._sigchld.si_status", 24, 4},
+                     {"_sifields._sigchld.si_utime", 32, 8},
+                     {"_sifields._sigchld.si_stime", 40, 8},
+                     {"_sifields._sigfault.si_addr", 16, 8},
+                     {"_sifields._sigfault.si_addr_lsb", 24, 2},
+                     {"_sifields._sigfault._bounds._addr_bnd._lower", 32, 8},
+                     {"_sifields._sigfault._bounds._addr_bnd._upper", 40, 8},
+                     {"_sifields._sigfault._bounds._pkey", 32, 4},
+                     {"_sifields._sigpoll.si_band", 16, 8},
+                     {"_sifields._sigpoll.si_fd", 24, 4},
+                     {"_sifields._sigsys._call_addr", 16, 8},
+                     {"_sifields._sigsys._syscall", 24, 4},
+                     {"_sifields._sigsys._arch", 28, 4},
+                 });
+  }
+}
+
+TEST_F(PlatformSiginfoTest, TestLinux_32bit) {
+  for (std::string arch : {"i386", "armv7"}) {
+    SCOPED_TRACE(arch);
+    InitializeSiginfo(arch + "-pc-linux");
+    ASSERT_TRUE(siginfo_type);
+
+    ExpectFields(siginfo_type,
+                 {
+                     {"si_signo", 0, 4},
+                     {"si_errno", 4, 4},
+                     {"si_code", 8, 4},
+                     {"_sifields._kill.si_pid", 12, 4},
+                     {"_sifields._kill.si_uid", 16, 4},
+                     {"_sifields._timer.si_tid", 12, 4},
+                     {"_sifields._timer.si_overrun", 16, 4},
+                     {"_sifields._timer.si_sigval", 20, 4},
+                     {"_sifields._rt.si_pid", 12, 4},
+                     {"_sifields._rt.si_uid", 16, 4},
+                     {"_sifields._rt.si_sigval", 20, 4},
+                     {"_sifields._sigchld.si_pid", 12, 4},
+                     {"_sifields._sigchld.si_uid", 16, 4},
+                     {"_sifields._sigchld.si_status", 20, 4},
+                     {"_sifields._sigchld.si_utime", 24, 4},
+                     {"_sifields._sigchld.si_stime", 28, 4},
+                     {"_sifields._sigfault.si_addr", 12, 4},
+                     {"_sifields._sigfault.si_addr_lsb", 16, 2},
+                     {"_sifields._sigfault._bounds._addr_bnd._lower", 20, 4},
+                     {"_sifields._sigfault._bounds._addr_bnd._upper", 24, 4},
+                     {"_sifields._sigfault._bounds._pkey", 20, 4},
+                     {"_sifields._sigpoll.si_band", 12, 4},
+                     {"_sifields._sigpoll.si_fd", 16, 4},
+                     {"_sifields._sigsys._call_addr", 12, 4},
+                     {"_sifields._sigsys._syscall", 16, 4},
+                     {"_sifields._sigsys._arch", 20, 4},
+                 });
+  }
+}
+
+TEST_F(PlatformSiginfoTest, TestFreeBSD_64bit) {
+  for (std::string arch : {"x86_64", "aarch64"}) {
+    SCOPED_TRACE(arch);
+    InitializeSiginfo("x86_64-unknown-freebsd13.0");
+    ASSERT_TRUE(siginfo_type);
+
+    ExpectFields(siginfo_type, {
+                                   {"si_signo", 0, 4},
+                                   {"si_errno", 4, 4},
+                                   {"si_code", 8, 4},
+                                   {"si_pid", 12, 4},
+                                   {"si_uid", 16, 4},
+                                   {"si_status", 20, 4},
+                                   {"si_addr", 24, 8},
+                                   {"si_value", 32, 8},
+                                   {"_reason._fault._trapno", 40, 4},
+                                   {"_reason._timer._timerid", 40, 4},
+                                   {"_reason._timer._overrun", 44, 4},
+                                   {"_reason._mesgq._mqd", 40, 4},
+                                   {"_reason._poll._band", 40, 8},
+                               });
+  }
+}
+
+TEST_F(PlatformSiginfoTest, TestFreeBSD_32bit) {
+  for (std::string arch : {"i386"}) {
+    SCOPED_TRACE(arch);
+    InitializeSiginfo(arch + "-unknown-freebsd13.0");
+    ASSERT_TRUE(siginfo_type);
+
+    ExpectFields(siginfo_type, {
+                                   {"si_signo", 0, 4},
+                                   {"si_errno", 4, 4},
+                                   {"si_code", 8, 4},
+                                   {"si_pid", 12, 4},
+                                   {"si_uid", 16, 4},
+                                   {"si_status", 20, 4},
+                                   {"si_addr", 24, 4},
+                                   {"si_value", 28, 4},
+                                   {"_reason._fault._trapno", 32, 4},
+                                   {"_reason._timer._timerid", 32, 4},
+                                   {"_reason._timer._overrun", 36, 4},
+                                   {"_reason._mesgq._mqd", 32, 4},
+                                   {"_reason._poll._band", 32, 4},
+                               });
+  }
+}
+
+TEST_F(PlatformSiginfoTest, TestNetBSD_64bit) {
+  for (std::string arch : {"x86_64"}) {
+    SCOPED_TRACE(arch);
+    InitializeSiginfo(arch + "-unknown-netbsd9.0");
+    ASSERT_TRUE(siginfo_type);
+
+    ExpectFields(
+        siginfo_type,
+        {
+            {"_info._signo", 0, 4},
+            {"_info._code", 4, 4},
+            {"_info._errno", 8, 4},
+            {"_info._reason._rt._pid", 16, 4},
+            {"_info._reason._rt._uid", 20, 4},
+            {"_info._reason._rt._value", 24, 8},
+            {"_info._reason._child._pid", 16, 4},
+            {"_info._reason._child._uid", 20, 4},
+            {"_info._reason._child._status", 24, 4},
+            {"_info._reason._child._utime", 28, 4},
+            {"_info._reason._child._stime", 32, 4},
+            {"_info._reason._fault._addr", 16, 8},
+            {"_info._reason._fault._trap", 24, 4},
+            {"_info._reason._fault._trap2", 28, 4},
+            {"_info._reason._fault._trap3", 32, 4},
+            {"_info._reason._poll._band", 16, 8},
+            {"_info._reason._poll._fd", 24, 4},
+            {"_info._reason._syscall._sysnum", 16, 4},
+            {"_info._reason._syscall._retval", 20, 8},
+            {"_info._reason._syscall._error", 28, 4},
+            {"_info._reason._syscall._args", 32, 64},
+            {"_info._reason._ptrace_state._pe_report_event", 16, 4},
+            {"_info._reason._ptrace_state._option._pe_other_pid", 20, 4},
+            {"_info._reason._ptrace_state._option._pe_lwp", 20, 4},
+        });
+  }
+}
+
+TEST_F(PlatformSiginfoTest, TestNetBSD_32bit) {
+  for (std::string arch : {"i386"}) {
+    SCOPED_TRACE(arch);
+    InitializeSiginfo(arch + "-unknown-netbsd9.0");
+    ASSERT_TRUE(siginfo_type);
+
+    ExpectFields(
+        siginfo_type,
+        {
+            {"_info._signo", 0, 4},
+            {"_info._code", 4, 4},
+            {"_info._errno", 8, 4},
+            {"_info._reason._rt._pid", 12, 4},
+            {"_info._reason._rt._uid", 16, 4},
+            {"_info._reason._rt._value", 20, 4},
+            {"_info._reason._child._pid", 12, 4},
+            {"_info._reason._child._uid", 16, 4},
+            {"_info._reason._child._status", 20, 4},
+            {"_info._reason._child._utime", 24, 4},
+            {"_info._reason._child._stime", 28, 4},
+            {"_info._reason._fault._addr", 12, 4},
+            {"_info._reason._fault._trap", 16, 4},
+            {"_info._reason._fault._trap2", 20, 4},
+            {"_info._reason._fault._trap3", 24, 4},
+            {"_info._reason._poll._band", 12, 4},
+            {"_info._reason._poll._fd", 16, 4},
+            {"_info._reason._syscall._sysnum", 12, 4},
+            {"_info._reason._syscall._retval", 16, 8},
+            {"_info._reason._syscall._error", 24, 4},
+            {"_info._reason._syscall._args", 28, 64},
+            {"_info._reason._ptrace_state._pe_report_event", 12, 4},
+            {"_info._reason._ptrace_state._option._pe_other_pid", 16, 4},
+            {"_info._reason._ptrace_state._option._pe_lwp", 16, 4},
+        });
+  }
+}
diff --git a/lldb/unittests/Platform/tools/generate_siginfo.c b/lldb/unittests/Platform/tools/generate_siginfo.c
new file mode 100644
index 0000000000000..5294a1b141dce
--- /dev/null
+++ b/lldb/unittests/Platform/tools/generate_siginfo.c
@@ -0,0 +1,112 @@
+//===-- generate_siginfo_linux.c ------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <signal.h>
+#include <stddef.h>
+#include <stdio.h>
+
+siginfo_t siginfo;
+
+#define P(member)                                                              \
+  printf("                   {\"%s\", %zd, %zd},\n", #member,   \
+         offsetof(siginfo_t, member), sizeof(siginfo.member));
+
+// undef annoying "POSIX friendliness" macros
+#undef si_pid
+#undef si_uid
+#undef si_overrun
+#undef si_status
+#undef si_utime
+#undef si_stime
+#undef si_addr
+#undef si_addr_lsb
+#undef si_band
+#undef si_fd
+
+int main() {
+  printf("  ExpectFields(siginfo_type,\n");
+  printf("               {\n");
+
+#if !defined(__NetBSD__)
+  P(si_signo);
+  P(si_errno);
+  P(si_code);
+
+#if defined(__GLIBC__)
+  P(_sifields._kill.si_pid);
+  P(_sifields._kill.si_uid);
+  P(_sifields._timer.si_tid);
+  P(_sifields._timer.si_overrun);
+  P(_sifields._timer.si_sigval);
+  P(_sifields._rt.si_pid);
+  P(_sifields._rt.si_uid);
+  P(_sifields._rt.si_sigval);
+  P(_sifields._sigchld.si_pid);
+  P(_sifields._sigchld.si_uid);
+  P(_sifields._sigchld.si_status);
+  P(_sifields._sigchld.si_utime);
+  P(_sifields._sigchld.si_stime);
+  P(_sifields._sigfault.si_addr);
+  P(_sifields._sigfault.si_addr_lsb);
+  P(_sifields._sigfault._bounds._addr_bnd._lower);
+  P(_sifields._sigfault._bounds._addr_bnd._upper);
+  P(_sifields._sigfault._bounds._pkey);
+  P(_sifields._sigpoll.si_band);
+  P(_sifields._sigpoll.si_fd);
+  P(_sifields._sigsys._call_addr);
+  P(_sifields._sigsys._syscall);
+  P(_sifields._sigsys._arch);
+#endif // defined(__GLIBC__)
+
+#if defined(__FreeBSD__)
+  // these are top-level fields on FreeBSD
+  P(si_pid);
+  P(si_uid);
+  P(si_status);
+  P(si_addr);
+  P(si_value);
+  P(_reason._fault._trapno);
+  P(_reason._timer._timerid);
+  P(_reason._timer._overrun);
+  P(_reason._mesgq._mqd);
+  P(_reason._poll._band);
+#endif // defined(__FreeBSD__)
+
+#else // defined(__NetBSD__)
+
+  P(_info._signo);
+  P(_info._code);
+  P(_info._errno);
+  P(_info._reason._rt._pid);
+  P(_info._reason._rt._uid);
+  P(_info._reason._rt._value);
+  P(_info._reason._child._pid);
+  P(_info._reason._child._uid);
+  P(_info._reason._child._status);
+  P(_info._reason._child._utime);
+  P(_info._reason._child._stime);
+  P(_info._reason._fault._addr);
+  P(_info._reason._fault._trap);
+  P(_info._reason._fault._trap2);
+  P(_info._reason._fault._trap3);
+  P(_info._reason._poll._band);
+  P(_info._reason._poll._fd);
+  P(_info._reason._syscall._sysnum);
+  P(_info._reason._syscall._retval);
+  P(_info._reason._syscall._error);
+  P(_info._reason._syscall._args);
+  P(_info._reason._ptrace_state._pe_report_event);
+  P(_info._reason._ptrace_state._option._pe_other_pid);
+  P(_info._reason._ptrace_state._option._pe_lwp);
+
+#endif // defined(__NetBSD__)
+
+  printf("               });\n");
+
+  return 0;
+}

From 1a8f60f5f5b8638a3e8e7fb31ba7ae9e17a7ff2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@moritz.systems>
Date: Mon, 24 Jan 2022 18:52:49 +0100
Subject: [PATCH 903/946] [lldb] [gdb-remote] Support getting siginfo via API

Add Thread::GetSiginfo() and SBThread::GetSiginfo() methods to retrieve
the siginfo value from server.

Differential Revision: https://reviews.llvm.org/D118055
---
 lldb/bindings/interface/SBThread.i            |  6 ++
 lldb/include/lldb/API/SBPlatform.h            |  1 +
 lldb/include/lldb/API/SBTarget.h              |  1 +
 lldb/include/lldb/API/SBThread.h              |  2 +
 lldb/include/lldb/API/SBType.h                |  1 +
 lldb/include/lldb/Target/Thread.h             |  6 ++
 lldb/source/API/SBThread.cpp                  | 47 +++++++++
 .../GDBRemoteCommunicationClient.cpp          | 11 +++
 .../gdb-remote/GDBRemoteCommunicationClient.h |  3 +
 .../Process/gdb-remote/ThreadGDBRemote.cpp    | 20 ++++
 .../Process/gdb-remote/ThreadGDBRemote.h      |  3 +
 .../gdb_remote_client/TestGDBRemoteClient.py  | 96 +++++++++++++++++++
 12 files changed, 197 insertions(+)

diff --git a/lldb/bindings/interface/SBThread.i b/lldb/bindings/interface/SBThread.i
index d847d38f0d66e..ba7f5b3fdf765 100644
--- a/lldb/bindings/interface/SBThread.i
+++ b/lldb/bindings/interface/SBThread.i
@@ -405,6 +405,12 @@ public:
     bool
     SafeToCallFunctions ();
 
+    %feature("autodoc","
+    Retruns a SBValue object representing the siginfo for the current signal.
+    ") GetSiginfo;
+    lldb::SBValue
+    GetSiginfo(SBError &error);
+
     STRING_EXTENSION(SBThread)
 
 #ifdef SWIGPYTHON
diff --git a/lldb/include/lldb/API/SBPlatform.h b/lldb/include/lldb/API/SBPlatform.h
index dcc8a14ff0c1f..4f5d04a24e95e 100644
--- a/lldb/include/lldb/API/SBPlatform.h
+++ b/lldb/include/lldb/API/SBPlatform.h
@@ -172,6 +172,7 @@ class LLDB_API SBPlatform {
 protected:
   friend class SBDebugger;
   friend class SBTarget;
+  friend class SBThread;
 
   lldb::PlatformSP GetSP() const;
 
diff --git a/lldb/include/lldb/API/SBTarget.h b/lldb/include/lldb/API/SBTarget.h
index abd9ebf074076..9e75b5e503a8c 100644
--- a/lldb/include/lldb/API/SBTarget.h
+++ b/lldb/include/lldb/API/SBTarget.h
@@ -875,6 +875,7 @@ class LLDB_API SBTarget {
   friend class SBSection;
   friend class SBSourceManager;
   friend class SBSymbol;
+  friend class SBThread;
   friend class SBValue;
   friend class SBVariablesOptions;
 
diff --git a/lldb/include/lldb/API/SBThread.h b/lldb/include/lldb/API/SBThread.h
index ac1b8407a2200..76f794c25d9e8 100644
--- a/lldb/include/lldb/API/SBThread.h
+++ b/lldb/include/lldb/API/SBThread.h
@@ -208,6 +208,8 @@ class LLDB_API SBThread {
 
   bool SafeToCallFunctions();
 
+  SBValue GetSiginfo(SBError &error);
+
 private:
   friend class SBBreakpoint;
   friend class SBBreakpointLocation;
diff --git a/lldb/include/lldb/API/SBType.h b/lldb/include/lldb/API/SBType.h
index 529b4d0eeffc4..5885432d06243 100644
--- a/lldb/include/lldb/API/SBType.h
+++ b/lldb/include/lldb/API/SBType.h
@@ -225,6 +225,7 @@ class SBType {
   friend class SBFunction;
   friend class SBModule;
   friend class SBTarget;
+  friend class SBThread;
   friend class SBTypeEnumMember;
   friend class SBTypeEnumMemberList;
   friend class SBTypeNameSpecifier;
diff --git a/lldb/include/lldb/Target/Thread.h b/lldb/include/lldb/Target/Thread.h
index 587b29eb4c661..f1d4e6c7ef01a 100644
--- a/lldb/include/lldb/Target/Thread.h
+++ b/lldb/include/lldb/Target/Thread.h
@@ -22,6 +22,7 @@
 #include "lldb/Utility/CompletionRequest.h"
 #include "lldb/Utility/Event.h"
 #include "lldb/Utility/StructuredData.h"
+#include "lldb/Utility/UnimplementedError.h"
 #include "lldb/Utility/UserID.h"
 #include "lldb/lldb-private.h"
 
@@ -1184,6 +1185,11 @@ class Thread : public std::enable_shared_from_this<Thread>,
 
   lldb::ThreadSP GetCurrentExceptionBacktrace();
 
+  virtual llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+  GetSiginfo(size_t max_size) const {
+    return llvm::make_error<UnimplementedError>();
+  }
+
 protected:
   friend class ThreadPlan;
   friend class ThreadList;
diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp
index 46a6c2759140b..dcc2a6ed3d18f 100644
--- a/lldb/source/API/SBThread.cpp
+++ b/lldb/source/API/SBThread.cpp
@@ -1317,3 +1317,50 @@ lldb_private::Thread *SBThread::operator->() {
 lldb_private::Thread *SBThread::get() {
   return m_opaque_sp->GetThreadSP().get();
 }
+
+SBValue SBThread::GetSiginfo(SBError &error) {
+  LLDB_INSTRUMENT_VA(this, error);
+
+  SBValue value;
+  SBProcess process = GetProcess();
+  if (!process.IsValid()) {
+    error.SetErrorString("no process");
+    return value;
+  }
+  SBTarget target = process.GetTarget();
+  if (!target.IsValid()) {
+    error.SetErrorString("unable to get target");
+    return value;
+  }
+  SBPlatform platform = target.GetPlatform();
+  if (!platform.IsValid()) {
+    error.SetErrorString("unable to get platform");
+    return value;
+  }
+  CompilerType type = platform.GetSP()->GetSiginfoType(
+      target.GetSP()->GetArchitecture().GetTriple());
+  if (!type.IsValid()) {
+    error.SetErrorString("no siginfo_t for the platform");
+    return value;
+  }
+  llvm::Optional<uint64_t> type_size = type.GetByteSize(nullptr);
+  assert(type_size);
+  ThreadSP thread_sp = m_opaque_sp->GetThreadSP();
+  if (!thread_sp) {
+    error.SetErrorString("unable to get thread");
+    return value;
+  }
+  llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>> data =
+      thread_sp->GetSiginfo(type_size.getValue());
+  if (!data) {
+    error.SetErrorString(llvm::toString(data.takeError()).c_str());
+    return value;
+  }
+  SBData sb_data;
+  sb_data.SetData(error, data.get()->getBufferStart(),
+                  data.get()->getBufferSize(), process.GetByteOrder(), 0);
+  if (!sb_data.IsValid())
+    return value;
+
+  return target.CreateValueFromData("siginfo", sb_data, type);
+}
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
index b5b105351de5d..f6526d03863bb 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -173,6 +173,13 @@ bool GDBRemoteCommunicationClient::GetQXferMemoryMapReadSupported() {
   return m_supports_qXfer_memory_map_read == eLazyBoolYes;
 }
 
+bool GDBRemoteCommunicationClient::GetQXferSigInfoReadSupported() {
+  if (m_supports_qXfer_siginfo_read == eLazyBoolCalculate) {
+    GetRemoteQSupported();
+  }
+  return m_supports_qXfer_siginfo_read == eLazyBoolYes;
+}
+
 uint64_t GDBRemoteCommunicationClient::GetRemoteMaxPacketSize() {
   if (m_max_packet_size == 0) {
     GetRemoteQSupported();
@@ -273,6 +280,7 @@ void GDBRemoteCommunicationClient::ResetDiscoverableSettings(bool did_exec) {
     m_supports_qXfer_libraries_svr4_read = eLazyBoolCalculate;
     m_supports_qXfer_features_read = eLazyBoolCalculate;
     m_supports_qXfer_memory_map_read = eLazyBoolCalculate;
+    m_supports_qXfer_siginfo_read = eLazyBoolCalculate;
     m_supports_augmented_libraries_svr4_read = eLazyBoolCalculate;
     m_uses_native_signals = eLazyBoolCalculate;
     m_supports_qProcessInfoPID = true;
@@ -320,6 +328,7 @@ void GDBRemoteCommunicationClient::GetRemoteQSupported() {
   m_supports_augmented_libraries_svr4_read = eLazyBoolNo;
   m_supports_qXfer_features_read = eLazyBoolNo;
   m_supports_qXfer_memory_map_read = eLazyBoolNo;
+  m_supports_qXfer_siginfo_read = eLazyBoolNo;
   m_supports_multiprocess = eLazyBoolNo;
   m_supports_qEcho = eLazyBoolNo;
   m_supports_QPassSignals = eLazyBoolNo;
@@ -362,6 +371,8 @@ void GDBRemoteCommunicationClient::GetRemoteQSupported() {
         m_supports_qXfer_features_read = eLazyBoolYes;
       else if (x == "qXfer:memory-map:read+")
         m_supports_qXfer_memory_map_read = eLazyBoolYes;
+      else if (x == "qXfer:siginfo:read+")
+        m_supports_qXfer_siginfo_read = eLazyBoolYes;
       else if (x == "qEcho")
         m_supports_qEcho = eLazyBoolYes;
       else if (x == "QPassSignals+")
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
index c69c33bb1c153..58ed22187747f 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.h
@@ -337,6 +337,8 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase {
 
   bool GetQXferMemoryMapReadSupported();
 
+  bool GetQXferSigInfoReadSupported();
+
   LazyBool SupportsAllocDeallocMemory() // const
   {
     // Uncomment this to have lldb pretend the debug server doesn't respond to
@@ -551,6 +553,7 @@ class GDBRemoteCommunicationClient : public GDBRemoteClientBase {
   LazyBool m_supports_qXfer_libraries_svr4_read = eLazyBoolCalculate;
   LazyBool m_supports_qXfer_features_read = eLazyBoolCalculate;
   LazyBool m_supports_qXfer_memory_map_read = eLazyBoolCalculate;
+  LazyBool m_supports_qXfer_siginfo_read = eLazyBoolCalculate;
   LazyBool m_supports_augmented_libraries_svr4_read = eLazyBoolCalculate;
   LazyBool m_supports_jThreadExtendedInfo = eLazyBoolCalculate;
   LazyBool m_supports_jLoadedDynamicLibrariesInfos = eLazyBoolCalculate;
diff --git a/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp
index ba73115e4ad60..3d23c074c1be3 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp
@@ -346,3 +346,23 @@ bool ThreadGDBRemote::CalculateStopInfo() {
         ->CalculateThreadStopInfo(this);
   return false;
 }
+
+llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+ThreadGDBRemote::GetSiginfo(size_t max_size) const {
+  ProcessSP process_sp(GetProcess());
+  if (!process_sp)
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "no process");
+  ProcessGDBRemote *gdb_process =
+      static_cast<ProcessGDBRemote *>(process_sp.get());
+  if (!gdb_process->m_gdb_comm.GetQXferSigInfoReadSupported())
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "qXfer:siginfo:read not supported");
+
+  llvm::Expected<std::string> response =
+      gdb_process->m_gdb_comm.ReadExtFeature("siginfo", "");
+  if (!response)
+    return response.takeError();
+
+  return llvm::MemoryBuffer::getMemBufferCopy(response.get());
+}
diff --git a/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h
index b7d75021c062d..fb83c74fd2c53 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h
+++ b/lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h
@@ -90,6 +90,9 @@ class ThreadGDBRemote : public Thread {
 
   StructuredData::ObjectSP FetchThreadExtendedInfo() override;
 
+  llvm::Expected<std::unique_ptr<llvm::MemoryBuffer>>
+  GetSiginfo(size_t max_size) const override;
+
 protected:
   friend class ProcessGDBRemote;
 
diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py
index 16cf4b4547c39..0c7ba64920827 100644
--- a/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py
+++ b/lldb/test/API/functionalities/gdb_remote_client/TestGDBRemoteClient.py
@@ -490,3 +490,99 @@ def cont(self):
                          lldb.eStopReasonSignal)
         self.assertEqual(process.threads[0].GetStopDescription(100),
                          'signal SIGUSR1')
+
+    def do_siginfo_test(self, platform, target_yaml, raw_data, expected):
+        class MyResponder(MockGDBServerResponder):
+            def qSupported(self, client_supported):
+                return "PacketSize=3fff;QStartNoAckMode+;qXfer:siginfo:read+"
+
+            def qXferRead(self, obj, annex, offset, length):
+                if obj == "siginfo":
+                    return raw_data, False
+                else:
+                    return None, False
+
+            def haltReason(self):
+                return "T02"
+
+            def cont(self):
+                return self.haltReason()
+
+        self.server.responder = MyResponder()
+
+        self.runCmd("platform select " + platform)
+        target = self.createTarget(target_yaml)
+        process = self.connect(target)
+
+        error = lldb.SBError()
+        siginfo = process.threads[0].GetSiginfo(error)
+        self.assertTrue(siginfo, error)
+
+        for key, value in expected.items():
+            self.assertEqual(siginfo.GetValueForExpressionPath("." + key)
+                             .GetValueAsUnsigned(),
+                             value)
+
+
+    def test_siginfo_linux_amd64(self):
+        data = (
+          # si_signo         si_errno        si_code
+            "\x11\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00"
+          # __pad0           si_pid          si_uid
+            "\x00\x00\x00\x00\xbf\xf7\x0b\x00\xe8\x03\x00\x00"
+          # si_status
+            "\x0c\x00\x00\x00" + "\x00" * 100)
+        expected = {
+            "si_signo": 17,  # SIGCHLD
+            "si_errno": 0,
+            "si_code": 1,  # CLD_EXITED
+            "_sifields._sigchld.si_pid": 784319,
+            "_sifields._sigchld.si_uid": 1000,
+            "_sifields._sigchld.si_status": 12,
+            "_sifields._sigchld.si_utime": 0,
+            "_sifields._sigchld.si_stime": 0,
+        }
+        self.do_siginfo_test("remote-linux", "basic_eh_frame.yaml",
+                             data, expected)
+
+    def test_siginfo_linux_i386(self):
+        data = (
+          # si_signo         si_errno        si_code
+            "\x11\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00"
+          # si_pid           si_uid          si_status
+            "\x49\x43\x07\x00\xe8\x03\x00\x00\x0c\x00\x00\x00"
+            + "\x00" * 104)
+        expected = {
+            "si_signo": 17,  # SIGCHLD
+            "si_errno": 0,
+            "si_code": 1,  # CLD_EXITED
+            "_sifields._sigchld.si_pid": 475977,
+            "_sifields._sigchld.si_uid": 1000,
+            "_sifields._sigchld.si_status": 12,
+            "_sifields._sigchld.si_utime": 0,
+            "_sifields._sigchld.si_stime": 0,
+        }
+        self.do_siginfo_test("remote-linux", "basic_eh_frame-i386.yaml",
+                             data, expected)
+
+    def test_siginfo_freebsd_amd64(self):
+        data = (
+          # si_signo         si_errno        si_code
+            "\x0b\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00"
+          # si_pid           si_uid          si_status
+            "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+          # si_addr
+            "\x76\x98\xba\xdc\xfe\x00\x00\x00"
+          # si_status                        si_trapno
+            "\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x00\x00"
+            + "\x00" * 36)
+
+        expected = {
+            "si_signo": 11,  # SIGSEGV
+            "si_errno": 0,
+            "si_code": 1,  # SEGV_MAPERR
+            "si_addr": 0xfedcba9876,
+            "_reason._fault._trapno": 12,
+        }
+        self.do_siginfo_test("remote-freebsd", "basic_eh_frame.yaml",
+                             data, expected)

From a78ce48c373572946a51228dda741681d1695d63 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 27 Jan 2022 12:40:09 +0000
Subject: [PATCH 904/946] [ConstraintElimination] Introduce struct to manage
 constraints. (NFC)

This patch adds a struct to manage a list of constraints. It simplifies
a follow-up change, that adds pre-conditions that must hold before a
list of constraints can be used.
---
 .../Scalar/ConstraintElimination.cpp          | 81 ++++++++++++-------
 1 file changed, 53 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index fcb88b54a094c..0c4d7c8ecb483 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -43,6 +43,51 @@ DEBUG_COUNTER(EliminatedCounter, "conds-eliminated",
 
 static int64_t MaxConstraintValue = std::numeric_limits<int64_t>::max();
 
+namespace {
+struct ConstraintTy {
+  SmallVector<int64_t, 8> Coefficients;
+
+  ConstraintTy(SmallVector<int64_t, 8> Coefficients)
+      : Coefficients(Coefficients) {}
+
+  unsigned size() const { return Coefficients.size(); }
+};
+
+/// Struct to manage a list of constraints.
+struct ConstraintListTy {
+  SmallVector<ConstraintTy, 4> Constraints;
+
+  ConstraintListTy() {}
+
+  ConstraintListTy(const SmallVector<ConstraintTy, 4> &Constraints)
+      : Constraints(Constraints) {}
+
+  void mergeIn(const ConstraintListTy &Other) {
+    append_range(Constraints, Other.Constraints);
+  }
+
+  unsigned size() const { return Constraints.size(); }
+
+  unsigned empty() const { return Constraints.empty(); }
+
+  /// Returns true if any constraint has a non-zero coefficient for any of the
+  /// newly added indices. Zero coefficients for new indices are removed. If it
+  /// returns true, no new variable need to be added to the system.
+  bool needsNewIndices(const DenseMap<Value *, unsigned> &NewIndices) {
+    assert(size() == 1);
+    for (unsigned I = 0; I < NewIndices.size(); ++I) {
+      int64_t Last = get(0).Coefficients.pop_back_val();
+      if (Last != 0)
+        return true;
+    }
+    return false;
+  }
+
+  ConstraintTy &get(unsigned I) { return Constraints[I]; }
+};
+
+} // namespace
+
 // Decomposes \p V into a vector of pairs of the form { c, X } where c * X. The
 // sum of the pairs equals \p V.  The first pair is the constant-factor and X
 // must be nullptr. If the expression cannot be decomposed, returns an empty
@@ -113,19 +158,10 @@ static SmallVector<std::pair<int64_t, Value *>, 4> decompose(Value *V) {
   return {{0, nullptr}, {1, V}};
 }
 
-struct ConstraintTy {
-  SmallVector<int64_t, 8> Coefficients;
-
-  ConstraintTy(SmallVector<int64_t, 8> Coefficients)
-      : Coefficients(Coefficients) {}
-
-  unsigned size() const { return Coefficients.size(); }
-};
-
 /// Turn a condition \p CmpI into a vector of constraints, using indices from \p
 /// Value2Index. Additional indices for newly discovered values are added to \p
 /// NewIndices.
-static SmallVector<ConstraintTy, 4>
+static ConstraintListTy
 getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
               const DenseMap<Value *, unsigned> &Value2Index,
               DenseMap<Value *, unsigned> &NewIndices) {
@@ -155,7 +191,7 @@ getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
         getConstraint(CmpInst::ICMP_UGE, Op0, Op1, Value2Index, NewIndices);
     auto B =
         getConstraint(CmpInst::ICMP_ULE, Op0, Op1, Value2Index, NewIndices);
-    append_range(A, B);
+    A.mergeIn(B);
     return A;
   }
 
@@ -200,10 +236,10 @@ getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
     R[GetOrAddIndex(KV.second)] -= KV.first;
 
   R[0] = Offset1 + Offset2 + (Pred == CmpInst::ICMP_ULT ? -1 : 0);
-  return {R};
+  return {{R}};
 }
 
-static SmallVector<ConstraintTy, 4>
+static ConstraintListTy
 getConstraint(CmpInst *Cmp, const DenseMap<Value *, unsigned> &Value2Index,
               DenseMap<Value *, unsigned> &NewIndices) {
   return getConstraint(Cmp->getPredicate(), Cmp->getOperand(0),
@@ -397,21 +433,10 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
         if (R.size() != 1)
           continue;
 
-        // Check if all coefficients of new indices are 0 after building the
-        // constraint. Skip if any of the new indices has a non-null
-        // coefficient.
-        bool HasNewIndex = false;
-        for (unsigned I = 0; I < NewIndices.size(); ++I) {
-          int64_t Last = R[0].Coefficients.pop_back_val();
-          if (Last != 0) {
-            HasNewIndex = true;
-            break;
-          }
-        }
-        if (HasNewIndex || R[0].size() == 1)
+        if (R.needsNewIndices(NewIndices) || R.get(0).size() == 1)
           continue;
 
-        if (CS.isConditionImplied(R[0].Coefficients)) {
+        if (CS.isConditionImplied(R.get(0).Coefficients)) {
           if (!DebugCounter::shouldExecute(EliminatedCounter))
             continue;
 
@@ -432,7 +457,7 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
           Changed = true;
         }
         if (CS.isConditionImplied(
-                ConstraintSystem::negate(R[0].Coefficients))) {
+                ConstraintSystem::negate(R.get(0).Coefficients))) {
           if (!DebugCounter::shouldExecute(EliminatedCounter))
             continue;
 
@@ -479,7 +504,7 @@ static bool eliminateConstraints(Function &F, DominatorTree &DT) {
 
     LLVM_DEBUG(dbgs() << "Adding " << *CB.Condition << " " << CB.Not << "\n");
     bool Added = false;
-    for (auto &C : R) {
+    for (auto &C : R.Constraints) {
       auto Coeffs = C.Coefficients;
       LLVM_DEBUG({
         dbgs() << "  constraint: ";

From 608cc6b16394b6a2fb2a9555232f35ad58beb0a3 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 27 Jan 2022 13:08:08 +0100
Subject: [PATCH 905/946] [mlir][complex] Lower complex.constant to LLVM

This fixes a regression from 480cd4cb8560532e544fc0c234749912dde759c6

Differential Revision: https://reviews.llvm.org/D118347
---
 mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp | 13 +++++++++++++
 .../Conversion/ComplexToLLVM/convert-to-llvm.mlir   |  7 +++++++
 2 files changed, 20 insertions(+)

diff --git a/mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp b/mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp
index 592d1839f8f83..c061495fda0b4 100644
--- a/mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp
+++ b/mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp
@@ -78,6 +78,18 @@ struct AbsOpConversion : public ConvertOpToLLVMPattern<complex::AbsOp> {
   }
 };
 
+struct ConstantOpLowering : public ConvertOpToLLVMPattern<complex::ConstantOp> {
+  using ConvertOpToLLVMPattern::ConvertOpToLLVMPattern;
+
+  LogicalResult
+  matchAndRewrite(complex::ConstantOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+    return LLVM::detail::oneToOneRewrite(
+        op, LLVM::ConstantOp::getOperationName(), adaptor.getOperands(),
+        *getTypeConverter(), rewriter);
+  }
+};
+
 struct CreateOpConversion : public ConvertOpToLLVMPattern<complex::CreateOp> {
   using ConvertOpToLLVMPattern<complex::CreateOp>::ConvertOpToLLVMPattern;
 
@@ -294,6 +306,7 @@ void mlir::populateComplexToLLVMConversionPatterns(
   patterns.add<
       AbsOpConversion,
       AddOpConversion,
+      ConstantOpLowering,
       CreateOpConversion,
       DivOpConversion,
       ImOpConversion,
diff --git a/mlir/test/Conversion/ComplexToLLVM/convert-to-llvm.mlir b/mlir/test/Conversion/ComplexToLLVM/convert-to-llvm.mlir
index 6b3fd20fa15f2..0abc375f4ed6a 100644
--- a/mlir/test/Conversion/ComplexToLLVM/convert-to-llvm.mlir
+++ b/mlir/test/Conversion/ComplexToLLVM/convert-to-llvm.mlir
@@ -10,6 +10,13 @@ func @complex_create(%real: f32, %imag: f32) -> complex<f32> {
   return %cplx2 : complex<f32>
 }
 
+// CHECK-LABEL: func @complex_constant
+// CHECK-NEXT:    llvm.mlir.constant([1.000000e+00, 2.000000e+00]) : !llvm.struct<(f64, f64)>
+func @complex_constant() -> complex<f64> {
+  %cplx2 = complex.constant [1.000000e+00, 2.000000e+00] : complex<f64>
+  return %cplx2 : complex<f64>
+}
+
 // CHECK-LABEL: func @complex_extract
 // CHECK-SAME:    (%[[CPLX:.*]]: complex<f32>)
 // CHECK-NEXT:    %[[CAST0:.*]] = builtin.unrealized_conversion_cast %[[CPLX]] : complex<f32> to !llvm.struct<(f32, f32)>

From b70366c9c430e1eadd59d5a1dfbb9c4d84f83de5 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 27 Jan 2022 13:02:34 +0100
Subject: [PATCH 906/946] [mlir][BufferOptimization] Use datalayout instead of
 a flag to find index size

This has the additional advantage of supporting more types.

Differential Revision: https://reviews.llvm.org/D118348
---
 .../Dialect/Bufferization/Transforms/Passes.h  |  1 -
 .../Dialect/Bufferization/Transforms/Passes.td |  3 ---
 .../Transforms/BufferOptimizations.cpp         | 17 +++++------------
 .../Transforms/promote-buffers-to-stack.mlir   | 18 +++++++++++++++---
 4 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
index 481eaa284d98b..29c0d7f741d93 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h
@@ -34,7 +34,6 @@ std::unique_ptr<OperationPass<FuncOp>> createFinalizingBufferizePass();
 /// Dynamic shaped buffers are promoted up to the given rank.
 std::unique_ptr<Pass>
 createPromoteBuffersToStackPass(unsigned maxAllocSizeInBytes = 1024,
-                                unsigned bitwidthOfIndexType = 64,
                                 unsigned maxRankOfAllocatedMemRef = 1);
 
 /// Creates a pass that promotes heap-based allocations to stack-based ones.
diff --git a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
index dcbdd52a5c81d..9890383931cbb 100644
--- a/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.td
@@ -164,9 +164,6 @@ def PromoteBuffersToStack : Pass<"promote-buffers-to-stack", "FuncOp"> {
     Option<"maxAllocSizeInBytes", "max-alloc-size-in-bytes", "unsigned",
            /*default=*/"1024",
            "Maximal size in bytes to promote allocations to stack.">,
-    Option<"bitwidthOfIndexType", "bitwidth-of-index-type", "unsigned",
-           /*default=*/"64",
-           "Bitwidth of the index type. Used for size estimation.">,
     Option<"maxRankOfAllocatedMemRef", "max-rank-of-allocated-memref", "unsigned",
            /*default=*/"1",
            "Maximal memref rank to promote dynamic buffers.">,
diff --git a/mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp b/mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp
index 158c786739342..141c513c90cf7 100644
--- a/mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp
+++ b/mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp
@@ -32,7 +32,6 @@ static bool isKnownControlFlowInterface(Operation *op) {
 /// transformation is only applied to small buffers since large buffers could
 /// exceed the stack space.
 static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
-                                unsigned bitwidthOfIndexType,
                                 unsigned maxRankOfAllocatedMemRef) {
   auto type = alloc.getType().dyn_cast<ShapedType>();
   if (!type || !alloc.getDefiningOp<memref::AllocOp>())
@@ -51,10 +50,8 @@ static bool defaultIsSmallAlloc(Value alloc, unsigned maximumSizeInBytes,
     }
     return false;
   }
-  // For index types, use the provided size, as the type does not know.
-  unsigned int bitwidth = type.getElementType().isIndex()
-                              ? bitwidthOfIndexType
-                              : type.getElementTypeBitWidth();
+  unsigned bitwidth = mlir::DataLayout::closest(alloc.getDefiningOp())
+                          .getTypeSizeInBits(type.getElementType());
   return type.getNumElements() * bitwidth <= maximumSizeInBytes * 8;
 }
 
@@ -390,10 +387,8 @@ class PromoteBuffersToStackPass
     : public PromoteBuffersToStackBase<PromoteBuffersToStackPass> {
 public:
   PromoteBuffersToStackPass(unsigned maxAllocSizeInBytes,
-                            unsigned bitwidthOfIndexType,
                             unsigned maxRankOfAllocatedMemRef) {
     this->maxAllocSizeInBytes = maxAllocSizeInBytes;
-    this->bitwidthOfIndexType = bitwidthOfIndexType;
     this->maxRankOfAllocatedMemRef = maxRankOfAllocatedMemRef;
   }
 
@@ -404,7 +399,6 @@ class PromoteBuffersToStackPass
     if (isSmallAlloc == nullptr) {
       isSmallAlloc = [=](Value alloc) {
         return defaultIsSmallAlloc(alloc, maxAllocSizeInBytes,
-                                   bitwidthOfIndexType,
                                    maxRankOfAllocatedMemRef);
       };
     }
@@ -432,10 +426,9 @@ std::unique_ptr<Pass> mlir::bufferization::createBufferLoopHoistingPass() {
 }
 
 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
-    unsigned maxAllocSizeInBytes, unsigned bitwidthOfIndexType,
-    unsigned maxRankOfAllocatedMemRef) {
-  return std::make_unique<PromoteBuffersToStackPass>(
-      maxAllocSizeInBytes, bitwidthOfIndexType, maxRankOfAllocatedMemRef);
+    unsigned maxAllocSizeInBytes, unsigned maxRankOfAllocatedMemRef) {
+  return std::make_unique<PromoteBuffersToStackPass>(maxAllocSizeInBytes,
+                                                     maxRankOfAllocatedMemRef);
 }
 
 std::unique_ptr<Pass> mlir::bufferization::createPromoteBuffersToStackPass(
diff --git a/mlir/test/Transforms/promote-buffers-to-stack.mlir b/mlir/test/Transforms/promote-buffers-to-stack.mlir
index 2b6cd3185fa11..5dfe9ccf8fad7 100644
--- a/mlir/test/Transforms/promote-buffers-to-stack.mlir
+++ b/mlir/test/Transforms/promote-buffers-to-stack.mlir
@@ -1,6 +1,5 @@
 // RUN: mlir-opt -promote-buffers-to-stack -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix DEFINDEX
-// RUN: mlir-opt -promote-buffers-to-stack="bitwidth-of-index-type=256 max-alloc-size-in-bytes=128" -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix BIGINDEX
-// RUN: mlir-opt -promote-buffers-to-stack="bitwidth-of-index-type=256 max-alloc-size-in-bytes=64" -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix LOWLIMIT
+// RUN: mlir-opt -promote-buffers-to-stack="max-alloc-size-in-bytes=64" -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix LOWLIMIT
 // RUN: mlir-opt -promote-buffers-to-stack="max-rank-of-allocated-memref=2" -split-input-file %s | FileCheck %s --check-prefix=CHECK --check-prefix RANK
 
 // This file checks the behavior of PromoteBuffersToStack pass for converting
@@ -595,7 +594,20 @@ func @indexElementType() {
   return
 }
 // DEFINDEX-NEXT: memref.alloca()
-// BIGINDEX-NEXT: memref.alloca()
+// LOWLIMIT-NEXT: memref.alloca()
+// RANK-NEXT: memref.alloca()
+// CHECK-NEXT: return
+
+// -----
+
+// CHECK-LABEL: func @bigIndexElementType
+module attributes { dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<index, 256>>} {
+  func @bigIndexElementType() {
+    %0 = memref.alloc() : memref<4xindex>
+    return
+  }
+}
+// DEFINDEX-NEXT: memref.alloca()
 // LOWLIMIT-NEXT: memref.alloc()
 // RANK-NEXT: memref.alloca()
 // CHECK-NEXT: return

From dafd1f29da27c2bb9ed95cf4f3149c68492e4b19 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Thu, 27 Jan 2022 12:15:12 +0000
Subject: [PATCH 907/946] [AArch64][SVE] Avoid using ptrue for unpredicated
 predicate AND.

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D118146
---
 llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td       |  2 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td           | 12 ++++++++++++
 llvm/test/CodeGen/AArch64/sve-int-log.ll             | 12 ++++--------
 .../CodeGen/AArch64/sve-intrinsics-reinterpret.ll    |  9 +++------
 .../CodeGen/AArch64/sve-split-int-pred-reduce.ll     | 12 ++++++------
 5 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 3e7c46e6e43a6..73a680465f6f6 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -734,7 +734,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
   defm PFIRST  : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
   defm PNEXT   : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
 
-  defm AND_PPzPP   : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z, and>;
+  defm AND_PPzPP   : sve_int_pred_log_and<0b0000, "and", int_aarch64_sve_and_z>;
   defm BIC_PPzPP   : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic_z>;
   defm EOR_PPzPP   : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>;
   defm SEL_PPPP    : sve_int_pred_log<0b0011, "sel", vselect>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index eb965261e1832..574b22124957b 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1633,6 +1633,18 @@ multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
                                !cast<Instruction>(NAME), PTRUE_D>;
 }
 
+multiclass sve_int_pred_log_and<bits<4> opc, string asm, SDPatternOperator op> :
+  sve_int_pred_log<opc, asm, op> {
+  def : Pat<(nxv16i1 (and nxv16i1:$Op1, nxv16i1:$Op2)),
+            (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
+  def : Pat<(nxv8i1 (and nxv8i1:$Op1, nxv8i1:$Op2)),
+            (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
+  def : Pat<(nxv4i1 (and nxv4i1:$Op1, nxv4i1:$Op2)),
+            (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
+  def : Pat<(nxv2i1 (and nxv2i1:$Op1, nxv2i1:$Op2)),
+            (!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE Logical Mask Immediate Group
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve-int-log.ll b/llvm/test/CodeGen/AArch64/sve-int-log.ll
index 2da05d30a9e02..e8bdf67cba8b4 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-log.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-log.ll
@@ -49,8 +49,7 @@ define <vscale x 16 x i8> @and_b_zero(<vscale x 16 x i8> %a) {
 define <vscale x 2 x i1> @and_pred_d(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
 ; CHECK-LABEL: and_pred_d:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p2.d
-; CHECK-NEXT:    and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %res = and <vscale x 2 x i1> %a, %b
   ret <vscale x 2 x i1> %res
@@ -59,8 +58,7 @@ define <vscale x 2 x i1> @and_pred_d(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b)
 define <vscale x 4 x i1> @and_pred_s(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
 ; CHECK-LABEL: and_pred_s:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p2.s
-; CHECK-NEXT:    and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %res = and <vscale x 4 x i1> %a, %b
   ret <vscale x 4 x i1> %res
@@ -69,8 +67,7 @@ define <vscale x 4 x i1> @and_pred_s(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b)
 define <vscale x 8 x i1> @and_pred_h(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
 ; CHECK-LABEL: and_pred_h:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p2.h
-; CHECK-NEXT:    and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %res = and <vscale x 8 x i1> %a, %b
   ret <vscale x 8 x i1> %res
@@ -79,8 +76,7 @@ define <vscale x 8 x i1> @and_pred_h(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b)
 define <vscale x 16 x i1> @and_pred_b(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
 ; CHECK-LABEL: and_pred_b:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p2.b
-; CHECK-NEXT:    and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %res = and <vscale x 16 x i1> %a, %b
   ret <vscale x 16 x i1> %res
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
index dac524ad7f29a..a018c56c8e860 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
@@ -17,8 +17,7 @@ define <vscale x 16 x i1> @reinterpret_bool_from_h(<vscale x 8 x i1> %pg) {
 ; CHECK-LABEL: reinterpret_bool_from_h:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.h
-; CHECK-NEXT:    ptrue p2.b
-; CHECK-NEXT:    and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg)
   ret <vscale x 16 x i1> %out
@@ -28,8 +27,7 @@ define <vscale x 16 x i1> @reinterpret_bool_from_s(<vscale x 4 x i1> %pg) {
 ; CHECK-LABEL: reinterpret_bool_from_s:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.s
-; CHECK-NEXT:    ptrue p2.b
-; CHECK-NEXT:    and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %pg)
   ret <vscale x 16 x i1> %out
@@ -39,8 +37,7 @@ define <vscale x 16 x i1> @reinterpret_bool_from_d(<vscale x 2 x i1> %pg) {
 ; CHECK-LABEL: reinterpret_bool_from_d:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p1.d
-; CHECK-NEXT:    ptrue p2.b
-; CHECK-NEXT:    and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    ret
   %out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
   ret <vscale x 16 x i1> %out
diff --git a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
index 2afcaf7cedd80..d24c540a18024 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll
@@ -7,7 +7,7 @@ define i1 @andv_nxv32i1(<vscale x 32 x i1> %a) {
 ; CHECK-LABEL: andv_nxv32i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p2.b
-; CHECK-NEXT:    and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    not p0.b, p2/z, p0.b
 ; CHECK-NEXT:    ptest p2, p0.b
 ; CHECK-NEXT:    cset w0, eq
@@ -24,10 +24,10 @@ define i1 @andv_nxv64i1(<vscale x 64 x i1> %a) {
 ; CHECK-NEXT:    str p4, [sp, #7, mul vl] // 2-byte Folded Spill
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    and p1.b, p1/z, p1.b, p3.b
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p2.b
 ; CHECK-NEXT:    ptrue p4.b
-; CHECK-NEXT:    and p1.b, p4/z, p1.b, p3.b
-; CHECK-NEXT:    and p0.b, p4/z, p0.b, p2.b
-; CHECK-NEXT:    and p0.b, p4/z, p0.b, p1.b
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    not p0.b, p4/z, p0.b
 ; CHECK-NEXT:    ptest p4, p0.b
 ; CHECK-NEXT:    ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -73,7 +73,7 @@ define i1 @smaxv_nxv32i1(<vscale x 32 x i1> %a) {
 ; CHECK-LABEL: smaxv_nxv32i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p2.b
-; CHECK-NEXT:    and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    not p0.b, p2/z, p0.b
 ; CHECK-NEXT:    ptest p2, p0.b
 ; CHECK-NEXT:    cset w0, eq
@@ -116,7 +116,7 @@ define i1 @uminv_nxv32i1(<vscale x 32 x i1> %a) {
 ; CHECK-LABEL: uminv_nxv32i1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p2.b
-; CHECK-NEXT:    and p0.b, p2/z, p0.b, p1.b
+; CHECK-NEXT:    and p0.b, p0/z, p0.b, p1.b
 ; CHECK-NEXT:    not p0.b, p2/z, p0.b
 ; CHECK-NEXT:    ptest p2, p0.b
 ; CHECK-NEXT:    cset w0, eq

From 8dd14e1757ebca9090a22a3166c8a17558fb7a21 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 27 Jan 2022 12:56:50 +0000
Subject: [PATCH 908/946] [X86] Add test showing failure to combine 128-bit
 PTEST of split vectors

---
 llvm/test/CodeGen/X86/combine-ptest.ll | 40 ++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll
index 975440cf8297a..5781ae6e70a7f 100644
--- a/llvm/test/CodeGen/X86/combine-ptest.ll
+++ b/llvm/test/CodeGen/X86/combine-ptest.ll
@@ -392,6 +392,46 @@ define i32 @ptestz_v32i8_signbits(<32 x i8> %c, i32 %a, i32 %b) {
   ret i32 %t5
 }
 
+;
+; testz(or(extract_lo(X),extract_hi(X),or(extract_lo(Y),extract_hi(Y)) -> testz(X,Y)
+;
+
+define i32 @ptestz_v2i64_concat(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
+; AVX1-LABEL: ptestz_v2i64_concat:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    movl %edi, %eax
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpor %xmm1, %xmm3, %xmm1
+; AVX1-NEXT:    vptest %xmm1, %xmm0
+; AVX1-NEXT:    cmovnel %esi, %eax
+; AVX1-NEXT:    vzeroupper
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: ptestz_v2i64_concat:
+; AVX2:       # %bb.0:
+; AVX2-NEXT:    movl %edi, %eax
+; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
+; AVX2-NEXT:    vpor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpor %xmm1, %xmm3, %xmm1
+; AVX2-NEXT:    vptest %xmm1, %xmm0
+; AVX2-NEXT:    cmovnel %esi, %eax
+; AVX2-NEXT:    vzeroupper
+; AVX2-NEXT:    retq
+  %t1 = shufflevector <4 x i64> %c, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %t2 = shufflevector <4 x i64> %c, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+  %t3 = shufflevector <4 x i64> %d, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
+  %t4 = shufflevector <4 x i64> %d, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
+  %t5 = or <2 x i64> %t1, %t2
+  %t6 = or <2 x i64> %t4, %t3
+  %t7 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t5, <2 x i64> %t6)
+  %t8 = icmp ne i32 %t7, 0
+  %t9 = select i1 %t8, i32 %a, i32 %b
+  ret i32 %t9
+}
+
 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone

From 389ae775e44ea0c8cd42a3ae518e79b837af4990 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 27 Jan 2022 13:20:36 +0000
Subject: [PATCH 909/946] [X86] Fold TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) ->
 TESTZ(X,Y)

Helps fix a number of poor codegen cases for allof(cmp()) with 256-bit vectors on AVX1
---
 llvm/lib/Target/X86/X86ISelLowering.cpp       | 17 +++++++
 llvm/test/CodeGen/X86/combine-ptest.ll        | 30 +++--------
 .../CodeGen/X86/vector-reduce-and-bool.ll     | 50 ++++++-------------
 3 files changed, 38 insertions(+), 59 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ba606d7a80edb..240be748a89ec 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44407,6 +44407,23 @@ static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC,
     // TESTZ(X,-1) == TESTZ(X,X)
     if (ISD::isBuildVectorAllOnes(Op1.getNode()))
       return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0);
+
+    // TESTZ(OR(LO(X),HI(X)),OR(LO(Y),HI(Y))) -> TESTZ(X,Y)
+    // TODO: Add COND_NE handling?
+    if (CC == X86::COND_E && OpVT.is128BitVector() && Subtarget.hasAVX()) {
+      SDValue Src0 = peekThroughBitcasts(Op0);
+      SDValue Src1 = peekThroughBitcasts(Op1);
+      if (Src0.getOpcode() == ISD::OR && Src1.getOpcode() == ISD::OR) {
+        Src0 = getSplitVectorSrc(peekThroughBitcasts(Src0.getOperand(0)),
+                                 peekThroughBitcasts(Src0.getOperand(1)), true);
+        Src1 = getSplitVectorSrc(peekThroughBitcasts(Src1.getOperand(0)),
+                                 peekThroughBitcasts(Src1.getOperand(1)), true);
+        if (Src0 && Src1)
+          return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT,
+                             DAG.getBitcast(MVT::v4i64, Src0),
+                             DAG.getBitcast(MVT::v4i64, Src1));
+      }
+    }
   }
 
   return SDValue();
diff --git a/llvm/test/CodeGen/X86/combine-ptest.ll b/llvm/test/CodeGen/X86/combine-ptest.ll
index 5781ae6e70a7f..33691308fd938 100644
--- a/llvm/test/CodeGen/X86/combine-ptest.ll
+++ b/llvm/test/CodeGen/X86/combine-ptest.ll
@@ -397,29 +397,13 @@ define i32 @ptestz_v32i8_signbits(<32 x i8> %c, i32 %a, i32 %b) {
 ;
 
 define i32 @ptestz_v2i64_concat(<4 x i64> %c, <4 x i64> %d, i32 %a, i32 %b) {
-; AVX1-LABEL: ptestz_v2i64_concat:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    movl %edi, %eax
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX1-NEXT:    vpor %xmm1, %xmm3, %xmm1
-; AVX1-NEXT:    vptest %xmm1, %xmm0
-; AVX1-NEXT:    cmovnel %esi, %eax
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: ptestz_v2i64_concat:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    movl %edi, %eax
-; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT:    vextracti128 $1, %ymm1, %xmm3
-; AVX2-NEXT:    vpor %xmm2, %xmm0, %xmm0
-; AVX2-NEXT:    vpor %xmm1, %xmm3, %xmm1
-; AVX2-NEXT:    vptest %xmm1, %xmm0
-; AVX2-NEXT:    cmovnel %esi, %eax
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
+; CHECK-LABEL: ptestz_v2i64_concat:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    vptest %ymm1, %ymm0
+; CHECK-NEXT:    cmovnel %esi, %eax
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
   %t1 = shufflevector <4 x i64> %c, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
   %t2 = shufflevector <4 x i64> %c, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
   %t3 = shufflevector <4 x i64> %d, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 4be3ed18cf901..c94ca6ca5deb3 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -1202,21 +1202,12 @@ define i1 @icmp0_v16i16_v16i1(<16 x i16>) {
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp0_v16i16_v16i1:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vptest %xmm0, %xmm0
-; AVX1-NEXT:    sete %al
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: icmp0_v16i16_v16i1:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vptest %ymm0, %ymm0
-; AVX2-NEXT:    sete %al
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
+; AVX-LABEL: icmp0_v16i16_v16i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vptest %ymm0, %ymm0
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
 ;
 ; AVX512F-LABEL: icmp0_v16i16_v16i1:
 ; AVX512F:       # %bb.0:
@@ -1268,21 +1259,12 @@ define i1 @icmp0_v32i8_v32i1(<32 x i8>) {
 ; SSE41-NEXT:    sete %al
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: icmp0_v32i8_v32i1:
-; AVX1:       # %bb.0:
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vptest %xmm0, %xmm0
-; AVX1-NEXT:    sete %al
-; AVX1-NEXT:    vzeroupper
-; AVX1-NEXT:    retq
-;
-; AVX2-LABEL: icmp0_v32i8_v32i1:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vptest %ymm0, %ymm0
-; AVX2-NEXT:    sete %al
-; AVX2-NEXT:    vzeroupper
-; AVX2-NEXT:    retq
+; AVX-LABEL: icmp0_v32i8_v32i1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vptest %ymm0, %ymm0
+; AVX-NEXT:    sete %al
+; AVX-NEXT:    vzeroupper
+; AVX-NEXT:    retq
 ;
 ; AVX512F-LABEL: icmp0_v32i8_v32i1:
 ; AVX512F:       # %bb.0:
@@ -1504,9 +1486,7 @@ define i1 @icmp0_v32i16_v32i1(<32 x i16>) {
 ; AVX1-LABEL: icmp0_v32i16_v32i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vptest %xmm0, %xmm0
+; AVX1-NEXT:    vptest %ymm0, %ymm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq
@@ -1586,9 +1566,7 @@ define i1 @icmp0_v64i8_v64i1(<64 x i8>) {
 ; AVX1-LABEL: icmp0_v64i8_v64i1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vorps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vorps %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vptest %xmm0, %xmm0
+; AVX1-NEXT:    vptest %ymm0, %ymm0
 ; AVX1-NEXT:    sete %al
 ; AVX1-NEXT:    vzeroupper
 ; AVX1-NEXT:    retq

From 258a0a3a551745e7250d7831e6b477e49708a08c Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 27 Jan 2022 13:31:23 +0000
Subject: [PATCH 910/946] [ConstraintElimination] Use simplified constraint for
 == 0.

When checking x == 0, checking x u<= 0 is sufficient and simpler than
x u>= 0 && x u<= 0.

https://alive2.llvm.org/ce/z/btM7d3

    ----------------------------------------
    define i1 @src(i4 %a) {
    %0:
      %c = icmp eq i4 %a, 0
      ret i1 %c
    }
    =>
    define i1 @tgt(i4 %a) {
    %0:
       %c = icmp ule i4 %a, 0
       ret i1 %c
    }
    Transformation seems to be correct!
---
 llvm/lib/Transforms/Scalar/ConstraintElimination.cpp | 4 ++++
 llvm/test/Transforms/ConstraintElimination/ne.ll     | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 0c4d7c8ecb483..e108135d41a57 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -187,6 +187,10 @@ getConstraint(CmpInst::Predicate Pred, Value *Op0, Value *Op1,
                          Value2Index, NewIndices);
 
   if (Pred == CmpInst::ICMP_EQ) {
+    if (match(Op1, m_Zero()))
+      return getConstraint(CmpInst::ICMP_ULE, Op0, Op1, Value2Index,
+                           NewIndices);
+
     auto A =
         getConstraint(CmpInst::ICMP_UGE, Op0, Op1, Value2Index, NewIndices);
     auto B =
diff --git a/llvm/test/Transforms/ConstraintElimination/ne.ll b/llvm/test/Transforms/ConstraintElimination/ne.ll
index 084c24ff84c8f..2f6a54b7441ac 100644
--- a/llvm/test/Transforms/ConstraintElimination/ne.ll
+++ b/llvm/test/Transforms/ConstraintElimination/ne.ll
@@ -56,7 +56,7 @@ define i1 @test_ne_eq_0(i8 %a, i8 %b) {
 ; CHECK-NEXT:    [[C_3:%.*]] = icmp eq i8 [[A]], [[B]]
 ; CHECK-NEXT:    [[RES_3:%.*]] = xor i1 [[RES_2]], [[C_3]]
 ; CHECK-NEXT:    [[C_4:%.*]] = icmp eq i8 [[A]], 0
-; CHECK-NEXT:    [[RES_4:%.*]] = xor i1 [[RES_3]], [[C_4]]
+; CHECK-NEXT:    [[RES_4:%.*]] = xor i1 [[RES_3]], false
 ; CHECK-NEXT:    [[T_2:%.*]] = icmp ugt i8 [[A]], 0
 ; CHECK-NEXT:    [[RES_5:%.*]] = xor i1 [[RES_4]], true
 ; CHECK-NEXT:    [[T_3:%.*]] = icmp uge i8 [[A]], 1
@@ -75,7 +75,7 @@ define i1 @test_ne_eq_0(i8 %a, i8 %b) {
 ; CHECK-NEXT:    [[C_9:%.*]] = icmp eq i8 [[A]], [[B]]
 ; CHECK-NEXT:    [[RES_11:%.*]] = xor i1 [[RES_10]], [[C_9]]
 ; CHECK-NEXT:    [[C_10:%.*]] = icmp eq i8 [[A]], 0
-; CHECK-NEXT:    [[RES_12:%.*]] = xor i1 [[RES_11]], [[C_10]]
+; CHECK-NEXT:    [[RES_12:%.*]] = xor i1 [[RES_11]], true
 ; CHECK-NEXT:    [[F_2:%.*]] = icmp ugt i8 [[A]], 0
 ; CHECK-NEXT:    [[RES_13:%.*]] = xor i1 [[RES_12]], false
 ; CHECK-NEXT:    [[F_3:%.*]] = icmp uge i8 [[A]], 1
@@ -221,7 +221,7 @@ define i1 @test_ne_eq_1(i8 %a, i8 %b) {
 ; CHECK-NEXT:    [[C_11:%.*]] = icmp eq i8 [[A]], [[B]]
 ; CHECK-NEXT:    [[RES_11:%.*]] = xor i1 [[RES_10]], [[C_11]]
 ; CHECK-NEXT:    [[F_1:%.*]] = icmp eq i8 [[A]], 0
-; CHECK-NEXT:    [[RES_12:%.*]] = xor i1 [[RES_11]], [[F_1]]
+; CHECK-NEXT:    [[RES_12:%.*]] = xor i1 [[RES_11]], false
 ; CHECK-NEXT:    [[T_3:%.*]] = icmp ugt i8 [[A]], 0
 ; CHECK-NEXT:    [[RES_13:%.*]] = xor i1 [[RES_12]], true
 ; CHECK-NEXT:    [[T_4:%.*]] = icmp uge i8 [[A]], 1

From d44de46254ff02386f261d50a44117ea57f55c09 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Thu, 27 Jan 2022 14:33:29 +0100
Subject: [PATCH 911/946] [OMPIRBuilder] Avoid pointer element type access

Use isOpaqueOrPointeeTypeEquals() instead.
---
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index d9b9035a54250..3b8d80c4eeec6 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -2987,7 +2987,8 @@ Constant *OpenMPIRBuilder::getOrCreateOMPInternalVariable(
   StringRef RuntimeName = Out.str();
   auto &Elem = *InternalVars.try_emplace(RuntimeName, nullptr).first;
   if (Elem.second) {
-    assert(Elem.second->getType()->getPointerElementType() == Ty &&
+    assert(cast<PointerType>(Elem.second->getType())
+               ->isOpaqueOrPointeeTypeMatches(Ty) &&
            "OMP internal variable has different type than requested");
   } else {
     // TODO: investigate the appropriate linkage type used for the global

From 1ca02bddb46c1de728e30303d3d828e9a6720e05 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 27 Jan 2022 13:44:47 +0000
Subject: [PATCH 912/946] [ConstraintSystem] Mark function as const (NFC).

---
 llvm/include/llvm/Analysis/ConstraintSystem.h | 2 +-
 llvm/lib/Analysis/ConstraintSystem.cpp        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h
index d5b8f208172bb..d7800f5783253 100644
--- a/llvm/include/llvm/Analysis/ConstraintSystem.h
+++ b/llvm/include/llvm/Analysis/ConstraintSystem.h
@@ -73,7 +73,7 @@ class ConstraintSystem {
     return R;
   }
 
-  bool isConditionImplied(SmallVector<int64_t, 8> R);
+  bool isConditionImplied(SmallVector<int64_t, 8> R) const;
 
   void popLastConstraint() { Constraints.pop_back(); }
 
diff --git a/llvm/lib/Analysis/ConstraintSystem.cpp b/llvm/lib/Analysis/ConstraintSystem.cpp
index 9739c6af5769a..773f71ada0eec 100644
--- a/llvm/lib/Analysis/ConstraintSystem.cpp
+++ b/llvm/lib/Analysis/ConstraintSystem.cpp
@@ -142,7 +142,7 @@ bool ConstraintSystem::mayHaveSolution() {
   return HasSolution;
 }
 
-bool ConstraintSystem::isConditionImplied(SmallVector<int64_t, 8> R) {
+bool ConstraintSystem::isConditionImplied(SmallVector<int64_t, 8> R) const {
   // If all variable coefficients are 0, we have 'C >= 0'. If the constant is >=
   // 0, R is always true, regardless of the system.
   if (all_of(makeArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; }))

From 33eb3f14eb42316524efe71a916b727fe563d973 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 27 Jan 2022 14:55:49 +0100
Subject: [PATCH 913/946] [lldb] Delete TestBacktraceAll.py

This test is completely nondeterministic, environment-dependent and does
not test what it was supposed to test (reverting the associated patch
does not make it fail).

I tried to figure out what the patch was meant to fix to see if I can
create a better test with the current tools, but I was not able to
understand the problem (it sounds like it has something to do with local
classes, but I don't understand the details).
---
 .../thread/backtrace_all/Makefile             |   5 -
 .../thread/backtrace_all/ParallelTask.cpp     | 152 ------------------
 .../thread/backtrace_all/TestBacktraceAll.py  |  59 -------
 3 files changed, 216 deletions(-)
 delete mode 100644 lldb/test/API/functionalities/thread/backtrace_all/Makefile
 delete mode 100644 lldb/test/API/functionalities/thread/backtrace_all/ParallelTask.cpp
 delete mode 100644 lldb/test/API/functionalities/thread/backtrace_all/TestBacktraceAll.py

diff --git a/lldb/test/API/functionalities/thread/backtrace_all/Makefile b/lldb/test/API/functionalities/thread/backtrace_all/Makefile
deleted file mode 100644
index cd092b7725618..0000000000000
--- a/lldb/test/API/functionalities/thread/backtrace_all/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
-CXXFLAGS_EXTRAS := -std=c++11
-CXX_SOURCES := ParallelTask.cpp
-ENABLE_THREADS := YES
-
-include Makefile.rules
diff --git a/lldb/test/API/functionalities/thread/backtrace_all/ParallelTask.cpp b/lldb/test/API/functionalities/thread/backtrace_all/ParallelTask.cpp
deleted file mode 100644
index 8e0f76f691b98..0000000000000
--- a/lldb/test/API/functionalities/thread/backtrace_all/ParallelTask.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-#include <cstdint>
-#include <thread>
-#include <vector>
-#include <queue>
-#include <functional>
-#include <future>
-#include <iostream>
-#include <cassert>
-
-class TaskPoolImpl
-{
-public:
-    TaskPoolImpl(uint32_t num_threads) :
-        m_stop(false)
-    {
-        for (uint32_t i = 0; i < num_threads; ++i)
-            m_threads.emplace_back(Worker, this);
-    }
-
-    ~TaskPoolImpl()
-    {
-        Stop();
-    }
-
-    template<typename F, typename... Args>
-    std::future<typename std::result_of<F(Args...)>::type>
-    AddTask(F&& f, Args&&... args)
-    {
-        auto task = std::make_shared<std::packaged_task<typename std::result_of<F(Args...)>::type()>>(
-            std::bind(std::forward<F>(f), std::forward<Args>(args)...));
-
-        std::unique_lock<std::mutex> lock(m_tasks_mutex);
-        assert(!m_stop && "Can't add task to TaskPool after it is stopped");
-        m_tasks.emplace([task](){ (*task)(); });
-        lock.unlock();
-        m_tasks_cv.notify_one();
-
-        return task->get_future();
-    }
-
-    void
-    Stop()
-    {
-        std::unique_lock<std::mutex> lock(m_tasks_mutex);
-        m_stop = true;
-        m_tasks_mutex.unlock();
-        m_tasks_cv.notify_all();
-        for (auto& t : m_threads)
-            t.join();
-    }
-
-private:
-    static void
-    Worker(TaskPoolImpl* pool)
-    {
-        while (true)
-        {
-            std::unique_lock<std::mutex> lock(pool->m_tasks_mutex);
-            if (pool->m_tasks.empty())
-                pool->m_tasks_cv.wait(lock, [pool](){ return !pool->m_tasks.empty() || pool->m_stop; });
-            if (pool->m_tasks.empty())
-                break;
-
-            std::function<void()> f = pool->m_tasks.front();
-            pool->m_tasks.pop();
-            lock.unlock();
-
-            f();
-        }
-    }
-
-    std::queue<std::function<void()>> m_tasks;
-    std::mutex                        m_tasks_mutex;
-    std::condition_variable           m_tasks_cv;
-    bool                              m_stop;
-    std::vector<std::thread>          m_threads;
-};
-
-class TaskPool
-{
-public:
-    // Add a new task to the thread pool and return a std::future belongs for the newly created task.
-    // The caller of this function have to wait on the future for this task to complete.
-    template<typename F, typename... Args>
-    static std::future<typename std::result_of<F(Args...)>::type>
-    AddTask(F&& f, Args&&... args)
-    {
-        return GetImplementation().AddTask(std::forward<F>(f), std::forward<Args>(args)...);
-    }
-
-    // Run all of the specified tasks on the thread pool and wait until all of them are finished
-    // before returning
-    template<typename... T>
-    static void
-    RunTasks(T&&... t)
-    {
-        RunTaskImpl<T...>::Run(std::forward<T>(t)...);
-    }
-
-private:
-    static TaskPoolImpl&
-    GetImplementation()
-    {
-        static TaskPoolImpl g_task_pool_impl(std::thread::hardware_concurrency());
-        return g_task_pool_impl;
-    }
-
-    template<typename... T>
-    struct RunTaskImpl;
-};
-
-template<typename H, typename... T>
-struct TaskPool::RunTaskImpl<H, T...>
-{
-    static void
-    Run(H&& h, T&&... t)
-    {
-        auto f = AddTask(std::forward<H>(h));
-        RunTaskImpl<T...>::Run(std::forward<T>(t)...);
-        f.wait();
-    }
-};
-
-template<>
-struct TaskPool::RunTaskImpl<>
-{
-    static void
-    Run() {}
-};
-
-int main()
-{
-    std::vector<std::future<uint32_t>> tasks;
-    for (int i = 0; i < 100000; ++i)
-    {
-        tasks.emplace_back(TaskPool::AddTask([](int i){
-            uint32_t s = 0;
-            for (int j = 0; j <= i; ++j)
-                s += j;
-            return s;
-        },
-        i));
-    }
-
-    for (auto& it : tasks)  // Set breakpoint here
-        it.wait();
-
-    TaskPool::RunTasks(
-        []() { return 1; },
-        []() { return "aaaa"; }
-    );
-}
diff --git a/lldb/test/API/functionalities/thread/backtrace_all/TestBacktraceAll.py b/lldb/test/API/functionalities/thread/backtrace_all/TestBacktraceAll.py
deleted file mode 100644
index f34d4e0b81b45..0000000000000
--- a/lldb/test/API/functionalities/thread/backtrace_all/TestBacktraceAll.py
+++ /dev/null
@@ -1,59 +0,0 @@
-"""
-Test regression for Bug 25251.
-"""
-
-import unittest2
-import lldb
-from lldbsuite.test.decorators import *
-from lldbsuite.test.lldbtest import *
-from lldbsuite.test import lldbutil
-
-
-class BacktraceAllTestCase(TestBase):
-
-    mydir = TestBase.compute_mydir(__file__)
-
-    def setUp(self):
-        # Call super's setUp().
-        TestBase.setUp(self)
-        # Find the line number for our breakpoint.
-        self.breakpoint = line_number(
-            'ParallelTask.cpp', '// Set breakpoint here')
-
-    # The android-arm compiler can't compile the inferior
-    @skipIfTargetAndroid(archs=["arm"])
-    # because of an issue around std::future.
-    # TODO: Change the test to don't depend on std::future<T>
-    def test(self):
-        """Test breakpoint handling after a thread join."""
-        self.build()
-
-        exe = self.getBuildArtifact("a.out")
-        self.runCmd("file " + exe, CURRENT_EXECUTABLE_SET)
-
-        # This should create a breakpoint
-        lldbutil.run_break_set_by_file_and_line(
-            self, "ParallelTask.cpp", self.breakpoint, num_expected_locations=-1)
-
-        # The breakpoint list should show 1 location.
-        self.expect(
-            "breakpoint list -f",
-            "Breakpoint location shown correctly",
-            substrs=[
-                "1: file = 'ParallelTask.cpp', line = %d, exact_match = 0" %
-                self.breakpoint])
-
-        # Run the program.
-        self.runCmd("run", RUN_SUCCEEDED)
-
-        # The stop reason of the thread should be breakpoint.
-        self.expect("thread list", STOPPED_DUE_TO_BREAKPOINT,
-                    substrs=['stopped',
-                             'stop reason = breakpoint'])
-
-        # This should not result in a segmentation fault
-        self.expect("thread backtrace all", STOPPED_DUE_TO_BREAKPOINT,
-                    substrs=["stop reason = breakpoint 1."])
-
-        # Run to completion
-        self.runCmd("continue")

From cb3df1a29956ec6ee5905cf217b3e0abb9611d0b Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 27 Jan 2022 14:20:30 +0000
Subject: [PATCH 914/946] [AArch64] Add vector compare/select tests with UNE
 predicate.

Precommit some additional tests for D118256.
---
 .../CostModel/AArch64/vector-select.ll        | 84 +++++++++++++++++++
 1 file changed, 84 insertions(+)

diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
index b1639a02b869e..dcfbc879b27df 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
@@ -667,6 +667,90 @@ define <2 x double> @v2f64_select_one(<2 x double> %a, <2 x double> %b, <2 x dou
   ret <2 x double> %s.1
 }
 
+define <4 x half> @v4f16_select_une(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
+; COST-LABEL: v4f16_select_une
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %cmp.1 = fcmp une <4 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction:   %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp une <4 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+;
+; CODE-LABEL: v4f16_select_une
+; CODE:       bb.0
+; CODE-NEXT:    fcmeq v{{.+}}.4h, v{{.+}}.4h, v{{.+}}.4h
+; CODE-NEXT:    bit   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp une <4 x half> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x half> %a, <4 x half> %c
+  ret <4 x half> %s.1
+}
+
+define <8 x half> @v8f16_select_une(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; COST-LABEL: v8f16_select_une
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %cmp.1 = fcmp une <8 x half> %a, %b
+; COST-NOFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction:   %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp une <8 x half> %a, %b
+; COST-FULLFP16-NEXT:  Cost Model: Found an estimated cost of 29 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+;
+; CODE-LABEL: v8f16_select_une
+; CODE:       bb.0
+; CODE-NEXT:    fcmeq v{{.+}}.8h, v{{.+}}.8h, v{{.+}}.8h
+; CODE-NEXT:    bit   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp une <8 x half> %a, %b
+  %s.1 = select <8 x i1> %cmp.1, <8 x half> %a, <8 x half> %c
+  ret <8 x half> %s.1
+}
+
+define <2 x float> @v2f32_select_une(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
+; COST-LABEL: v2f32_select_une
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp une <2 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:  %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+;
+; CODE-LABEL: v2f32_select_une
+; CODE:       bb.0
+; CODE-NEXT:    fcmeq v{{.+}}.2s, v{{.+}}.2s, v{{.+}}.2s
+; CODE-NEXT:    bit   v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp une <2 x float> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x float> %a, <2 x float> %c
+  ret <2 x float> %s.1
+}
+
+define <4 x float> @v4f32_select_une(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; COST-LABEL: v4f32_select_une
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp une <4 x float> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 13 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+;
+; CODE-LABEL: v4f32_select_une
+; CODE:       bb.0
+; CODE-NEXT:    fcmeq v{{.+}}.4s, v{{.+}}.4s, v{{.+}}.4s
+; CODE-NEXT:    bit   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp une <4 x float> %a, %b
+  %s.1 = select <4 x i1> %cmp.1, <4 x float> %a, <4 x float> %c
+  ret <4 x float> %s.1
+}
+
+define <2 x double> @v2f64_select_une(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; COST-LABEL: v2f64_select_une
+; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:  %cmp.1 = fcmp une <2 x double> %a, %b
+; COST-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:  %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+;
+; CODE-LABEL: v2f64_select_une
+; CODE:       bb.0
+; CODE-NEXT:    fcmeq v{{.+}}.2d, v{{.+}}.2d, v{{.+}}.2d
+; CODE-NEXT:    bit   v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b
+; CODE-NEXT:    ret
+;
+  %cmp.1 = fcmp une <2 x double> %a, %b
+  %s.1 = select <2 x i1> %cmp.1, <2 x double> %a, <2 x double> %c
+  ret <2 x double> %s.1
+}
+
 define <2 x float> @v2f32_select_ord(<2 x float> %a, <2 x float> %b, <2 x float> %c) {
 ; COST-LABEL: v2f32_select_ord
 ; COST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:   %cmp.1 = fcmp ord <2 x float> %a, %b

From 185cb8e82c98096fdd73beb646017d86ca8692b3 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Wed, 26 Jan 2022 17:09:38 +0000
Subject: [PATCH 915/946] [AMDGPU] SILoadStoreOptimizer: Allow merging across a
 swizzled access

Swizzled accesses are not merged, but there is no particular reason not
to merge two instructions if any of the intervening instructions happens
to be a swizzled access.

This moves the check for swizzled accesses out of checkAndPrepareMerge
into collectMergeableInsts where I think it makes more sense.

Differential Revision: https://reviews.llvm.org/D118267
---
 llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp | 17 ++++++-----------
 llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir      |  6 ++----
 2 files changed, 8 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index d1133c12bc18d..0ed8287de8ac9 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -910,12 +910,6 @@ bool SILoadStoreOptimizer::checkAndPrepareMerge(
   }
   const unsigned InstSubclass = getInstSubclass(Opc, *TII);
 
-  // Do not merge VMEM buffer instructions with "swizzled" bit set.
-  int Swizzled =
-      AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::swz);
-  if (Swizzled != -1 && CI.I->getOperand(Swizzled).getImm())
-    return false;
-
   DenseSet<Register> RegDefsToMove;
   DenseSet<Register> PhysRegUsesToMove;
   addDefsUsesToList(*CI.I, RegDefsToMove, PhysRegUsesToMove);
@@ -971,11 +965,6 @@ bool SILoadStoreOptimizer::checkAndPrepareMerge(
       continue;
     }
 
-    int Swizzled =
-        AMDGPU::getNamedOperandIdx(MBBI->getOpcode(), AMDGPU::OpName::swz);
-    if (Swizzled != -1 && MBBI->getOperand(Swizzled).getImm())
-      return false;
-
     // Handle a case like
     //   DS_WRITE_B32 addr, v, idx0
     //   w = DS_READ_B32 addr, idx0
@@ -2014,6 +2003,12 @@ SILoadStoreOptimizer::collectMergeableInsts(
     if (InstClass == UNKNOWN)
       continue;
 
+    // Do not merge VMEM buffer instructions with "swizzled" bit set.
+    int Swizzled =
+        AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::swz);
+    if (Swizzled != -1 && MI.getOperand(Swizzled).getImm())
+      continue;
+
     CombineInfo CI;
     CI.setMI(MI, *TII, *STM);
     CI.Order = Order++;
diff --git a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
index f0296d8544ed5..961e8213a85bf 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-tbuffer.mir
@@ -780,9 +780,8 @@ body:             |
 
 
 # GFX9-LABEL: name: gfx9_tbuffer_load_merge_across_swizzle
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
 # GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 12, 116, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
-# GFX9: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 116, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+# GFX9: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 123, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4)
 name: gfx9_tbuffer_load_merge_across_swizzle
 body:             |
   bb.0.entry:
@@ -1598,9 +1597,8 @@ body:             |
 
 
 # GFX10-LABEL: name: gfx10_tbuffer_load_merge_across_swizzle
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 4, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
 # GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 12, 22, 0, 0, 1, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
-# GFX10: %{{[0-9]+}}:vgpr_32 = TBUFFER_LOAD_FORMAT_X_OFFSET %4, 0, 8, 22, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), align 1, addrspace 4)
+# GFX10: %{{[0-9]+}}:vreg_64 = TBUFFER_LOAD_FORMAT_XY_OFFSET %4, 0, 4, 64, 0, 0, 0, implicit $exec :: (dereferenceable load (s64), align 1, addrspace 4)
 name: gfx10_tbuffer_load_merge_across_swizzle
 body:             |
   bb.0.entry:

From ccda0f22263f1cf97657e8235ae694a2e7d8526c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 27 Jan 2022 14:54:16 +0000
Subject: [PATCH 916/946] [X86][SSE] Add combineBitOpWithShift for
 BITOP(SHIFT(X,Z),SHIFT(Y,Z)) -> SHIFT(BITOP(X,Y),Z) vector folds

InstCombine performs this more generally with SimplifyUsingDistributiveLaws, but we don't need anything that complex here - this is mainly to fix up cases where logic ops get created late on during lowering, often in conjunction with sext/zext ops for type legalization.

https://alive2.llvm.org/ce/z/gGpY5v
---
 llvm/lib/Target/X86/X86ISelLowering.cpp   |  52 ++++++++++
 llvm/test/CodeGen/X86/movmsk-cmp.ll       | 114 ++++++++--------------
 llvm/test/CodeGen/X86/vector-ext-logic.ll |  90 +++++++----------
 3 files changed, 125 insertions(+), 131 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 240be748a89ec..044e21dab4391 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -46438,6 +46438,49 @@ static SDValue combineBitOpWithMOVMSK(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result);
 }
 
+// Attempt to fold BITOP(SHIFT(X,Z),SHIFT(Y,Z)) -> SHIFT(BITOP(X,Y),Z).
+// NOTE: This is a very limited case of what SimplifyUsingDistributiveLaws
+// handles in InstCombine.
+static SDValue combineBitOpWithShift(SDNode *N, SelectionDAG &DAG) {
+  unsigned Opc = N->getOpcode();
+  assert((Opc == ISD::OR || Opc == ISD::AND || Opc == ISD::XOR) &&
+         "Unexpected bit opcode");
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N->getValueType(0);
+
+  // Both operands must be single use.
+  if (!N0.hasOneUse() || !N1.hasOneUse())
+    return SDValue();
+
+  // Search for matching shifts.
+  SDValue BC0 = peekThroughOneUseBitcasts(N0);
+  SDValue BC1 = peekThroughOneUseBitcasts(N1);
+
+  unsigned BCOpc = BC0.getOpcode();
+  EVT BCVT = BC0.getValueType();
+  if (BCOpc != BC1->getOpcode() || BCVT != BC1.getValueType())
+    return SDValue();
+
+  switch (BCOpc) {
+  case X86ISD::VSHLI:
+  case X86ISD::VSRLI:
+  case X86ISD::VSRAI: {
+    if (BC0.getOperand(1) != BC1.getOperand(1))
+      return SDValue();
+
+    SDLoc DL(N);
+    SDValue BitOp =
+        DAG.getNode(Opc, DL, BCVT, BC0.getOperand(0), BC1.getOperand(0));
+    SDValue Shift = DAG.getNode(BCOpc, DL, BCVT, BitOp, BC0.getOperand(1));
+    return DAG.getBitcast(VT, Shift);
+  }
+  }
+
+  return SDValue();
+}
+
 /// If this is a zero/all-bits result that is bitwise-anded with a low bits
 /// mask. (Mask == 1 for the x86 lowering of a SETCC + ZEXT), replace the 'and'
 /// with a shift-right to eliminate loading the vector constant mask value.
@@ -46741,6 +46784,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
     return R;
 
+  if (SDValue R = combineBitOpWithShift(N, DAG))
+    return R;
+
   if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
     return FPLogic;
 
@@ -47188,6 +47234,9 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
     return R;
 
+  if (SDValue R = combineBitOpWithShift(N, DAG))
+    return R;
+
   if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
     return FPLogic;
 
@@ -49660,6 +49709,9 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineBitOpWithMOVMSK(N, DAG))
     return R;
 
+  if (SDValue R = combineBitOpWithShift(N, DAG))
+    return R;
+
   if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, DCI, Subtarget))
     return FPLogic;
 
diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll
index 68adeda025d7c..cb4af67fe0241 100644
--- a/llvm/test/CodeGen/X86/movmsk-cmp.ll
+++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll
@@ -1227,20 +1227,18 @@ define i1 @allzeros_v16i8_and1(<16 x i8> %arg) {
 define i1 @allones_v32i8_and1(<32 x i8> %arg) {
 ; SSE-LABEL: allones_v32i8_and1:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    pand %xmm1, %xmm0
 ; SSE-NEXT:    psllw $7, %xmm0
-; SSE-NEXT:    psllw $7, %xmm1
-; SSE-NEXT:    pand %xmm0, %xmm1
-; SSE-NEXT:    pmovmskb %xmm1, %eax
+; SSE-NEXT:    pmovmskb %xmm0, %eax
 ; SSE-NEXT:    cmpw $-1, %ax
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: allones_v32i8_and1:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    cmpw $-1, %ax
 ; AVX1-NEXT:    sete %al
@@ -1292,9 +1290,8 @@ define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
 ; AVX1-LABEL: allzeros_v32i8_and1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpsllw $7, %xmm1, %xmm1
-; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    testl %eax, %eax
 ; AVX1-NEXT:    sete %al
@@ -1336,14 +1333,11 @@ define i1 @allzeros_v32i8_and1(<32 x i8> %arg) {
 define i1 @allones_v64i8_and1(<64 x i8> %arg) {
 ; SSE-LABEL: allones_v64i8_and1:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    psllw $7, %xmm1
-; SSE-NEXT:    psllw $7, %xmm3
+; SSE-NEXT:    pand %xmm2, %xmm0
+; SSE-NEXT:    pand %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm3, %xmm0
 ; SSE-NEXT:    psllw $7, %xmm0
-; SSE-NEXT:    psllw $7, %xmm2
-; SSE-NEXT:    pand %xmm0, %xmm2
-; SSE-NEXT:    pand %xmm1, %xmm2
-; SSE-NEXT:    pand %xmm3, %xmm2
-; SSE-NEXT:    pmovmskb %xmm2, %eax
+; SSE-NEXT:    pmovmskb %xmm0, %eax
 ; SSE-NEXT:    cmpw $-1, %ax
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
@@ -1351,14 +1345,11 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
 ; AVX1-LABEL: allones_v64i8_and1:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpsllw $7, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpsllw $7, %xmm3, %xmm3
-; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
-; AVX1-NEXT:    vpsllw $7, %xmm1, %xmm1
 ; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vpand %xmm0, %xmm2, %xmm0
 ; AVX1-NEXT:    vpand %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    cmpw $-1, %ax
 ; AVX1-NEXT:    sete %al
@@ -1367,9 +1358,8 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
 ;
 ; AVX2-LABEL: allones_v64i8_and1:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpsllw $7, %ymm0, %ymm0
-; AVX2-NEXT:    vpsllw $7, %ymm1, %ymm1
 ; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpsllw $7, %ymm0, %ymm0
 ; AVX2-NEXT:    vpmovmskb %ymm0, %eax
 ; AVX2-NEXT:    cmpl $-1, %eax
 ; AVX2-NEXT:    sete %al
@@ -1378,10 +1368,9 @@ define i1 @allones_v64i8_and1(<64 x i8> %arg) {
 ;
 ; KNL-LABEL: allones_v64i8_and1:
 ; KNL:       # %bb.0:
-; KNL-NEXT:    vpsllw $7, %ymm0, %ymm1
-; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
 ; KNL-NEXT:    vpsllw $7, %ymm0, %ymm0
-; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; KNL-NEXT:    vpmovmskb %ymm0, %eax
 ; KNL-NEXT:    cmpl $-1, %eax
 ; KNL-NEXT:    sete %al
@@ -1416,15 +1405,12 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
 ;
 ; AVX1-LABEL: allzeros_v64i8_and1:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpsllw $7, %xmm1, %xmm2
-; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm3
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vpsllw $7, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpor %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpor %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpor %xmm0, %xmm2, %xmm0
-; AVX1-NEXT:    vpor %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpsllw $7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    testl %eax, %eax
 ; AVX1-NEXT:    sete %al
@@ -1444,9 +1430,8 @@ define i1 @allzeros_v64i8_and1(<64 x i8> %arg) {
 ; KNL-LABEL: allzeros_v64i8_and1:
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; KNL-NEXT:    vpsllw $7, %ymm1, %ymm1
-; KNL-NEXT:    vpsllw $7, %ymm0, %ymm0
 ; KNL-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    vpsllw $7, %ymm0, %ymm0
 ; KNL-NEXT:    vpmovmskb %ymm0, %eax
 ; KNL-NEXT:    testl %eax, %eax
 ; KNL-NEXT:    sete %al
@@ -1734,11 +1719,9 @@ define i1 @allzeros_v32i16_and1(<32 x i16> %arg) {
 ; KNL-LABEL: allzeros_v32i16_and1:
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; KNL-NEXT:    vpsllw $15, %ymm1, %ymm1
-; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
+; KNL-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; KNL-NEXT:    vpsllw $15, %ymm0, %ymm0
 ; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
-; KNL-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; KNL-NEXT:    kortestw %k0, %k0
@@ -2545,20 +2528,18 @@ define i1 @allzeros_v16i8_and4(<16 x i8> %arg) {
 define i1 @allones_v32i8_and4(<32 x i8> %arg) {
 ; SSE-LABEL: allones_v32i8_and4:
 ; SSE:       # %bb.0:
+; SSE-NEXT:    pand %xmm1, %xmm0
 ; SSE-NEXT:    psllw $5, %xmm0
-; SSE-NEXT:    psllw $5, %xmm1
-; SSE-NEXT:    pand %xmm0, %xmm1
-; SSE-NEXT:    pmovmskb %xmm1, %eax
+; SSE-NEXT:    pmovmskb %xmm0, %eax
 ; SSE-NEXT:    cmpw $-1, %ax
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: allones_v32i8_and4:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
+; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
-; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    cmpw $-1, %ax
 ; AVX1-NEXT:    sete %al
@@ -2610,9 +2591,8 @@ define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
 ; AVX1-LABEL: allzeros_v32i8_and4:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
-; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
 ; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    testl %eax, %eax
 ; AVX1-NEXT:    sete %al
@@ -2654,14 +2634,11 @@ define i1 @allzeros_v32i8_and4(<32 x i8> %arg) {
 define i1 @allones_v64i8_and4(<64 x i8> %arg) {
 ; SSE-LABEL: allones_v64i8_and4:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    psllw $5, %xmm1
-; SSE-NEXT:    psllw $5, %xmm3
+; SSE-NEXT:    pand %xmm2, %xmm0
+; SSE-NEXT:    pand %xmm1, %xmm0
+; SSE-NEXT:    pand %xmm3, %xmm0
 ; SSE-NEXT:    psllw $5, %xmm0
-; SSE-NEXT:    psllw $5, %xmm2
-; SSE-NEXT:    pand %xmm0, %xmm2
-; SSE-NEXT:    pand %xmm1, %xmm2
-; SSE-NEXT:    pand %xmm3, %xmm2
-; SSE-NEXT:    pmovmskb %xmm2, %eax
+; SSE-NEXT:    pmovmskb %xmm0, %eax
 ; SSE-NEXT:    cmpw $-1, %ax
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
@@ -2669,14 +2646,11 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
 ; AVX1-LABEL: allones_v64i8_and4:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT:    vpsllw $5, %xmm2, %xmm2
 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT:    vpsllw $5, %xmm3, %xmm3
-; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
-; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
 ; AVX1-NEXT:    vpand %xmm0, %xmm1, %xmm0
 ; AVX1-NEXT:    vpand %xmm0, %xmm2, %xmm0
 ; AVX1-NEXT:    vpand %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    cmpw $-1, %ax
 ; AVX1-NEXT:    sete %al
@@ -2685,9 +2659,8 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
 ;
 ; AVX2-LABEL: allones_v64i8_and4:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpsllw $5, %ymm0, %ymm0
-; AVX2-NEXT:    vpsllw $5, %ymm1, %ymm1
 ; AVX2-NEXT:    vpand %ymm0, %ymm1, %ymm0
+; AVX2-NEXT:    vpsllw $5, %ymm0, %ymm0
 ; AVX2-NEXT:    vpmovmskb %ymm0, %eax
 ; AVX2-NEXT:    cmpl $-1, %eax
 ; AVX2-NEXT:    sete %al
@@ -2696,10 +2669,9 @@ define i1 @allones_v64i8_and4(<64 x i8> %arg) {
 ;
 ; KNL-LABEL: allones_v64i8_and4:
 ; KNL:       # %bb.0:
-; KNL-NEXT:    vpsllw $5, %ymm0, %ymm1
-; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
+; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
+; KNL-NEXT:    vpand %ymm0, %ymm1, %ymm0
 ; KNL-NEXT:    vpsllw $5, %ymm0, %ymm0
-; KNL-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; KNL-NEXT:    vpmovmskb %ymm0, %eax
 ; KNL-NEXT:    cmpl $-1, %eax
 ; KNL-NEXT:    sete %al
@@ -2734,15 +2706,12 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
 ;
 ; AVX1-LABEL: allzeros_v64i8_and4:
 ; AVX1:       # %bb.0:
-; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm2
-; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm3
-; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT:    vpsllw $5, %xmm1, %xmm1
-; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
+; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT:    vpor %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpor %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpor %xmm1, %xmm0, %xmm0
-; AVX1-NEXT:    vpor %xmm0, %xmm2, %xmm0
-; AVX1-NEXT:    vpor %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpsllw $5, %xmm0, %xmm0
 ; AVX1-NEXT:    vpmovmskb %xmm0, %eax
 ; AVX1-NEXT:    testl %eax, %eax
 ; AVX1-NEXT:    sete %al
@@ -2762,9 +2731,8 @@ define i1 @allzeros_v64i8_and4(<64 x i8> %arg) {
 ; KNL-LABEL: allzeros_v64i8_and4:
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; KNL-NEXT:    vpsllw $5, %ymm1, %ymm1
-; KNL-NEXT:    vpsllw $5, %ymm0, %ymm0
 ; KNL-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; KNL-NEXT:    vpsllw $5, %ymm0, %ymm0
 ; KNL-NEXT:    vpmovmskb %ymm0, %eax
 ; KNL-NEXT:    testl %eax, %eax
 ; KNL-NEXT:    sete %al
@@ -3052,11 +3020,9 @@ define i1 @allzeros_v32i16_and4(<32 x i16> %arg) {
 ; KNL-LABEL: allzeros_v32i16_and4:
 ; KNL:       # %bb.0:
 ; KNL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; KNL-NEXT:    vpsllw $13, %ymm1, %ymm1
-; KNL-NEXT:    vpsraw $15, %ymm1, %ymm1
+; KNL-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; KNL-NEXT:    vpsllw $13, %ymm0, %ymm0
 ; KNL-NEXT:    vpsraw $15, %ymm0, %ymm0
-; KNL-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; KNL-NEXT:    vpmovsxwd %ymm0, %zmm0
 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; KNL-NEXT:    kortestw %k0, %k0
diff --git a/llvm/test/CodeGen/X86/vector-ext-logic.ll b/llvm/test/CodeGen/X86/vector-ext-logic.ll
index a26d7e9486f1c..cfbc83d5e84e6 100644
--- a/llvm/test/CodeGen/X86/vector-ext-logic.ll
+++ b/llvm/test/CodeGen/X86/vector-ext-logic.ll
@@ -198,10 +198,9 @@ define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
 ; SSE2-LABEL: sext_and_v8i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    psraw $8, %xmm2
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT:    psraw $8, %xmm0
 ; SSE2-NEXT:    pand %xmm2, %xmm0
+; SSE2-NEXT:    psraw $8, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX2-LABEL: sext_and_v8i16:
@@ -220,10 +219,9 @@ define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
 ; SSE2-LABEL: sext_or_v8i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    psraw $8, %xmm2
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT:    psraw $8, %xmm0
 ; SSE2-NEXT:    por %xmm2, %xmm0
+; SSE2-NEXT:    psraw $8, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX2-LABEL: sext_or_v8i16:
@@ -242,10 +240,9 @@ define <8 x i16> @sext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
 ; SSE2-LABEL: sext_xor_v8i16:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
-; SSE2-NEXT:    psraw $8, %xmm2
 ; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
-; SSE2-NEXT:    psraw $8, %xmm0
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    psraw $8, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; AVX2-LABEL: sext_xor_v8i16:
@@ -338,34 +335,27 @@ define <8 x i32> @bool_zext_xor(<8 x i1> %x, <8 x i1> %y) {
 define <8 x i32> @bool_sext_and(<8 x i1> %x, <8 x i1> %y) {
 ; SSE2-LABEL: bool_sext_and:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pand %xmm3, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    pand %xmm1, %xmm0
 ; SSE2-NEXT:    pslld $31, %xmm0
 ; SSE2-NEXT:    psrad $31, %xmm0
-; SSE2-NEXT:    pslld $31, %xmm3
-; SSE2-NEXT:    psrad $31, %xmm3
-; SSE2-NEXT:    pslld $31, %xmm1
-; SSE2-NEXT:    psrad $31, %xmm1
-; SSE2-NEXT:    pand %xmm0, %xmm1
 ; SSE2-NEXT:    pslld $31, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
-; SSE2-NEXT:    pand %xmm3, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm2, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; AVX2-LABEL: bool_sext_and:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
 ; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
-; AVX2-NEXT:    vpslld $31, %ymm1, %ymm1
-; AVX2-NEXT:    vpsrad $31, %ymm1, %ymm1
-; AVX2-NEXT:    vpand %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %xs = sext <8 x i1> %x to <8 x i32>
   %ys = sext <8 x i1> %y to <8 x i32>
@@ -376,34 +366,27 @@ define <8 x i32> @bool_sext_and(<8 x i1> %x, <8 x i1> %y) {
 define <8 x i32> @bool_sext_or(<8 x i1> %x, <8 x i1> %y) {
 ; SSE2-LABEL: bool_sext_or:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    por %xmm3, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    por %xmm1, %xmm0
 ; SSE2-NEXT:    pslld $31, %xmm0
 ; SSE2-NEXT:    psrad $31, %xmm0
-; SSE2-NEXT:    pslld $31, %xmm3
-; SSE2-NEXT:    psrad $31, %xmm3
-; SSE2-NEXT:    pslld $31, %xmm1
-; SSE2-NEXT:    psrad $31, %xmm1
-; SSE2-NEXT:    por %xmm0, %xmm1
 ; SSE2-NEXT:    pslld $31, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
-; SSE2-NEXT:    por %xmm3, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm2, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; AVX2-LABEL: bool_sext_or:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX2-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
 ; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
-; AVX2-NEXT:    vpslld $31, %ymm1, %ymm1
-; AVX2-NEXT:    vpsrad $31, %ymm1, %ymm1
-; AVX2-NEXT:    vpor %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %xs = sext <8 x i1> %x to <8 x i32>
   %ys = sext <8 x i1> %y to <8 x i32>
@@ -414,34 +397,27 @@ define <8 x i32> @bool_sext_or(<8 x i1> %x, <8 x i1> %y) {
 define <8 x i32> @bool_sext_xor(<8 x i1> %x, <8 x i1> %y) {
 ; SSE2-LABEL: bool_sext_xor:
 ; SSE2:       # %bb.0:
-; SSE2-NEXT:    movdqa %xmm1, %xmm2
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3]
-; SSE2-NEXT:    punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    movdqa %xmm1, %xmm3
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
+; SSE2-NEXT:    pxor %xmm3, %xmm2
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
+; SSE2-NEXT:    pxor %xmm1, %xmm0
 ; SSE2-NEXT:    pslld $31, %xmm0
 ; SSE2-NEXT:    psrad $31, %xmm0
-; SSE2-NEXT:    pslld $31, %xmm3
-; SSE2-NEXT:    psrad $31, %xmm3
-; SSE2-NEXT:    pslld $31, %xmm1
-; SSE2-NEXT:    psrad $31, %xmm1
-; SSE2-NEXT:    pxor %xmm0, %xmm1
 ; SSE2-NEXT:    pslld $31, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
-; SSE2-NEXT:    pxor %xmm3, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    movdqa %xmm2, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; AVX2-LABEL: bool_sext_xor:
 ; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
+; AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX2-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; AVX2-NEXT:    vpslld $31, %ymm0, %ymm0
 ; AVX2-NEXT:    vpsrad $31, %ymm0, %ymm0
-; AVX2-NEXT:    vpslld $31, %ymm1, %ymm1
-; AVX2-NEXT:    vpsrad $31, %ymm1, %ymm1
-; AVX2-NEXT:    vpxor %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
   %xs = sext <8 x i1> %x to <8 x i32>
   %ys = sext <8 x i1> %y to <8 x i32>

From c6d5efb5d98093c4bd7578b2ea52c9032d20dea3 Mon Sep 17 00:00:00 2001
From: Martin Probst <martin@probst.io>
Date: Thu, 27 Jan 2022 15:15:30 +0100
Subject: [PATCH 917/946] clang-format: [JS] sort import aliases.

Users can define aliases for long symbols using import aliases:

    import X = A.B.C;

Previously, these were unhandled and would terminate import sorting.
With this change, aliases sort as their own group, coming last after all
other imports.

Aliases are not sorted within their group, as they may reference each
other, so order is significant.

Revision URI: https://reviews.llvm.org/D118361
---
 clang/lib/Format/SortJavaScriptImports.cpp   | 31 +++++++++++++++-----
 clang/unittests/Format/SortImportsTestJS.cpp | 19 ++++++++++++
 2 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp
index 37e79bb15b58b..e4107525a7fff 100644
--- a/clang/lib/Format/SortJavaScriptImports.cpp
+++ b/clang/lib/Format/SortJavaScriptImports.cpp
@@ -78,6 +78,7 @@ struct JsModuleReference {
     ABSOLUTE,        // from 'something'
     RELATIVE_PARENT, // from '../*'
     RELATIVE,        // from './*'
+    ALIAS,           // import X = A.B;
   };
   ReferenceCategory Category = ReferenceCategory::SIDE_EFFECT;
   // The URL imported, e.g. `import .. from 'url';`. Empty for `export {a, b};`.
@@ -105,10 +106,12 @@ bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) {
     return LHS.IsExport < RHS.IsExport;
   if (LHS.Category != RHS.Category)
     return LHS.Category < RHS.Category;
-  if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT)
-    // Side effect imports might be ordering sensitive. Consider them equal so
-    // that they maintain their relative order in the stable sort below.
-    // This retains transitivity because LHS.Category == RHS.Category here.
+  if (LHS.Category == JsModuleReference::ReferenceCategory::SIDE_EFFECT ||
+      LHS.Category == JsModuleReference::ReferenceCategory::ALIAS)
+    // Side effect imports and aliases might be ordering sensitive. Consider
+    // them equal so that they maintain their relative order in the stable sort
+    // below. This retains transitivity because LHS.Category == RHS.Category
+    // here.
     return false;
   // Empty URLs sort *last* (for export {...};).
   if (LHS.URL.empty() != RHS.URL.empty())
@@ -398,6 +401,8 @@ class JavaScriptImportSorter : public TokenAnalyzer {
       JsModuleReference Reference;
       Reference.FormattingOff = FormattingOff;
       Reference.Range.setBegin(Start);
+      // References w/o a URL, e.g. export {A}, groups with RELATIVE.
+      Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
       if (!parseModuleReference(Keywords, Reference)) {
         if (!FirstNonImportLine)
           FirstNonImportLine = Line; // if no comment before.
@@ -463,9 +468,6 @@ class JavaScriptImportSorter : public TokenAnalyzer {
         Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
       else
         Reference.Category = JsModuleReference::ReferenceCategory::ABSOLUTE;
-    } else {
-      // w/o URL groups with "empty".
-      Reference.Category = JsModuleReference::ReferenceCategory::RELATIVE;
     }
     return true;
   }
@@ -501,6 +503,21 @@ class JavaScriptImportSorter : public TokenAnalyzer {
       nextToken();
       if (Current->is(Keywords.kw_from))
         return true;
+      // import X = A.B.C;
+      if (Current->is(tok::equal)) {
+        Reference.Category = JsModuleReference::ReferenceCategory::ALIAS;
+        nextToken();
+        while (Current->is(tok::identifier)) {
+          nextToken();
+          if (Current->is(tok::semi)) {
+            nextToken();
+            return true;
+          }
+          if (!Current->is(tok::period))
+            return false;
+          nextToken();
+        }
+      }
       if (Current->isNot(tok::comma))
         return false;
       nextToken(); // eat comma.
diff --git a/clang/unittests/Format/SortImportsTestJS.cpp b/clang/unittests/Format/SortImportsTestJS.cpp
index 6fe8bba53549b..1c7e508630360 100644
--- a/clang/unittests/Format/SortImportsTestJS.cpp
+++ b/clang/unittests/Format/SortImportsTestJS.cpp
@@ -446,6 +446,25 @@ TEST_F(SortImportsTestJS, RespectsClangFormatOffInNamedImports) {
              "const x =   1;");
 }
 
+TEST_F(SortImportsTestJS, ImportEqAliases) {
+  verifySort("import {B} from 'bar';\n"
+             "import {A} from 'foo';\n"
+             "\n"
+             "import Z = A.C;\n"
+             "import Y = B.C.Z;\n"
+             "\n"
+             "export {Z};\n"
+             "\n"
+             "console.log(Z);\n",
+             "import {A} from 'foo';\n"
+             "import Z = A.C;\n"
+             "export {Z};\n"
+             "import {B} from 'bar';\n"
+             "import Y = B.C.Z;\n"
+             "\n"
+             "console.log(Z);\n");
+}
+
 } // end namespace
 } // end namespace format
 } // end namespace clang

From f482e86980a8580bae4ecef1b1dcd447ecb4484c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 18 Jan 2022 17:45:51 -0500
Subject: [PATCH 918/946] AMDGPU/GlobalISel: Fix flat_scratch_init handling for
 shaders

I don't think this is actually defined for mesa, but this is what we
were doing on the DAG path.
---
 llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp |  7 +++
 .../GlobalISel/flat-scratch-init.gfx.ll       | 45 +++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 76eebb327010e..cd084fd5440a5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -627,6 +627,13 @@ bool AMDGPUCallLowering::lowerFormalArguments(
     CCInfo.AllocateReg(ImplicitBufferPtrReg);
   }
 
+  // FIXME: This probably isn't defined for mesa
+  if (Info->hasFlatScratchInit() && !Subtarget.isAmdPalOS()) {
+    Register FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
+    MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
+    CCInfo.AllocateReg(FlatScratchInitReg);
+  }
+
   SmallVector<ArgInfo, 32> SplitArgs;
   unsigned Idx = 0;
   unsigned PSInputNum = 0;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll
new file mode 100644
index 0000000000000..7c9103226b7ef
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.gfx.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -amdgpu-enable-flat-scratch -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=MESA %s
+; RUN: llc -global-isel -amdgpu-enable-flat-scratch -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=PAL %s
+
+; Test that the initialization for flat_scratch doesn't crash.  PAL
+; doesn't add a user SGPR for initializing flat_scratch, mesa does
+; (although this probably isn't actually defined).
+
+define amdgpu_ps void @amdgpu_ps() {
+; MESA-LABEL: amdgpu_ps:
+; MESA:       ; %bb.0:
+; MESA-NEXT:    s_add_u32 flat_scratch_lo, s2, s4
+; MESA-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
+; MESA-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
+; MESA-NEXT:    s_lshl_b32 s0, s0, 16
+; MESA-NEXT:    v_mov_b32_e32 v0, 4
+; MESA-NEXT:    v_mov_b32_e32 v1, s0
+; MESA-NEXT:    v_mov_b32_e32 v2, 0
+; MESA-NEXT:    flat_store_dword v[0:1], v2
+; MESA-NEXT:    s_waitcnt vmcnt(0)
+; MESA-NEXT:    s_endpgm
+;
+; PAL-LABEL: amdgpu_ps:
+; PAL:       ; %bb.0:
+; PAL-NEXT:    s_getpc_b64 s[2:3]
+; PAL-NEXT:    s_mov_b32 s2, s0
+; PAL-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; PAL-NEXT:    v_mov_b32_e32 v0, 4
+; PAL-NEXT:    v_mov_b32_e32 v2, 0
+; PAL-NEXT:    s_waitcnt lgkmcnt(0)
+; PAL-NEXT:    s_and_b32 s3, s3, 0xffff
+; PAL-NEXT:    s_add_u32 flat_scratch_lo, s2, s0
+; PAL-NEXT:    s_getreg_b32 s0, hwreg(HW_REG_SH_MEM_BASES, 0, 16)
+; PAL-NEXT:    s_addc_u32 flat_scratch_hi, s3, 0
+; PAL-NEXT:    s_lshl_b32 s0, s0, 16
+; PAL-NEXT:    v_mov_b32_e32 v1, s0
+; PAL-NEXT:    flat_store_dword v[0:1], v2
+; PAL-NEXT:    s_waitcnt vmcnt(0)
+; PAL-NEXT:    s_endpgm
+  %alloca = alloca i32, addrspace(5)
+  %cast = addrspacecast i32 addrspace(5)* %alloca to i32*
+  store volatile i32 0, i32* %cast
+  ret void
+}
+

From 416e503adfc16b8ae7718c4cca4cc34a6158eea0 Mon Sep 17 00:00:00 2001
From: Jean Perier <jperier@nvidia.com>
Date: Thu, 27 Jan 2022 14:59:38 +0100
Subject: [PATCH 919/946] [flang] split character procedure arguments in
 target-rewrite pass

When passing a character procedure as a dummy procedure, the result
length must be passed along the function address. This is to cover
the cases where the dummy procedure is declared with assumed length
inside the scope that will call it (it will need the length to allocate
the result on the caller side).

To be compatible with other Fortran compiler, this length must be
appended after all other argument just like character objects
(fir.boxchar).

A fir.boxchar cannot be used to implement this feature because it
is meant to take an object address, not a function address.

Instead, argument like `tuple<function type, integer type> {fir.char_proc}`
will be recognized as being character dummy procedure in FIR. That way
lowering does not have to do the argument split.

This patch adds tools in Character.h to create this type and tuple
values as well as to recognize them and extract its tuple members.

It also updates the target rewrite pass to split these arguments like
fir.boxchar.

This part is part of fir-dev upstreaming. It was reviwed previously
in: https://github.com/flang-compiler/f18-llvm-project/pull/1393

Differential Revision: https://reviews.llvm.org/D118108
---
 .../flang/Optimizer/Builder/Character.h       | 33 ++++++++
 .../flang/Optimizer/Dialect/FIROpsSupport.h   |  6 ++
 flang/lib/Optimizer/Builder/Character.cpp     | 48 ++++++++++++
 flang/lib/Optimizer/CodeGen/CMakeLists.txt    |  2 +
 flang/lib/Optimizer/CodeGen/TargetRewrite.cpp | 78 ++++++++++++++++++-
 flang/test/Fir/target-rewrite-char-proc.fir   | 69 ++++++++++++++++
 6 files changed, 234 insertions(+), 2 deletions(-)
 create mode 100644 flang/test/Fir/target-rewrite-char-proc.fir

diff --git a/flang/include/flang/Optimizer/Builder/Character.h b/flang/include/flang/Optimizer/Builder/Character.h
index 610b42052f317..b215e39d4c30b 100644
--- a/flang/include/flang/Optimizer/Builder/Character.h
+++ b/flang/include/flang/Optimizer/Builder/Character.h
@@ -187,6 +187,39 @@ mlir::FuncOp getLlvmMemmove(FirOpBuilder &builder);
 mlir::FuncOp getLlvmMemset(FirOpBuilder &builder);
 mlir::FuncOp getRealloc(FirOpBuilder &builder);
 
+//===----------------------------------------------------------------------===//
+// Tools to work with Character dummy procedures
+//===----------------------------------------------------------------------===//
+
+/// Create a tuple<function type, length type> type to pass character functions
+/// as arguments along their length. The function type set in the tuple is the
+/// one provided by \p funcPointerType.
+mlir::Type getCharacterProcedureTupleType(mlir::Type funcPointerType);
+
+/// Is this tuple type holding a character function and its result length ?
+bool isCharacterProcedureTuple(mlir::Type type);
+
+/// Is \p tuple a value holding a character function address and its result
+/// length ?
+inline bool isCharacterProcedureTuple(mlir::Value tuple) {
+  return isCharacterProcedureTuple(tuple.getType());
+}
+
+/// Create a tuple<addr, len> given \p addr and \p len as well as the tuple
+/// type \p argTy. \p addr must be any function address, and \p len must be
+/// any integer. Converts will be inserted if needed if \addr and \p len
+/// types are not the same as the one inside the tuple type \p tupleType.
+mlir::Value createCharacterProcedureTuple(fir::FirOpBuilder &builder,
+                                          mlir::Location loc,
+                                          mlir::Type tupleType,
+                                          mlir::Value addr, mlir::Value len);
+
+/// Given a tuple containing a character function address and its result length,
+/// extract the tuple into a pair of value <function address, result length>.
+std::pair<mlir::Value, mlir::Value>
+extractCharacterProcedureTuple(fir::FirOpBuilder &builder, mlir::Location loc,
+                               mlir::Value tuple);
+
 } // namespace fir::factory
 
 #endif // FORTRAN_OPTIMIZER_BUILDER_CHARACTER_H
diff --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
index 7d8aa45b0b071..574f286818c3f 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
+++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h
@@ -68,6 +68,12 @@ constexpr llvm::StringRef getOptionalAttrName() { return "fir.optional"; }
 /// Attribute to mark Fortran entities with the TARGET attribute.
 static constexpr llvm::StringRef getTargetAttrName() { return "fir.target"; }
 
+/// Attribute to mark that a function argument is a character dummy procedure.
+/// Character dummy procedure have special ABI constraints.
+static constexpr llvm::StringRef getCharacterProcedureDummyAttrName() {
+  return "fir.char_proc";
+}
+
 /// Tell if \p value is:
 ///   - a function argument that has attribute \p attributeName
 ///   - or, the result of fir.alloca/fir.allocamem op that has attribute \p
diff --git a/flang/lib/Optimizer/Builder/Character.cpp b/flang/lib/Optimizer/Builder/Character.cpp
index b306d54313006..8b84170d663ad 100644
--- a/flang/lib/Optimizer/Builder/Character.cpp
+++ b/flang/lib/Optimizer/Builder/Character.cpp
@@ -725,3 +725,51 @@ mlir::Value fir::factory::CharacterExprHelper::getLength(mlir::Value memref) {
   // Length cannot be deduced from memref.
   return {};
 }
+
+std::pair<mlir::Value, mlir::Value>
+fir::factory::extractCharacterProcedureTuple(fir::FirOpBuilder &builder,
+                                             mlir::Location loc,
+                                             mlir::Value tuple) {
+  mlir::TupleType tupleType = tuple.getType().cast<mlir::TupleType>();
+  mlir::Value addr = builder.create<fir::ExtractValueOp>(
+      loc, tupleType.getType(0), tuple,
+      builder.getArrayAttr(
+          {builder.getIntegerAttr(builder.getIndexType(), 0)}));
+  mlir::Value len = builder.create<fir::ExtractValueOp>(
+      loc, tupleType.getType(1), tuple,
+      builder.getArrayAttr(
+          {builder.getIntegerAttr(builder.getIndexType(), 1)}));
+  return {addr, len};
+}
+
+mlir::Value fir::factory::createCharacterProcedureTuple(
+    fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argTy,
+    mlir::Value addr, mlir::Value len) {
+  mlir::TupleType tupleType = argTy.cast<mlir::TupleType>();
+  addr = builder.createConvert(loc, tupleType.getType(0), addr);
+  len = builder.createConvert(loc, tupleType.getType(1), len);
+  mlir::Value tuple = builder.create<fir::UndefOp>(loc, tupleType);
+  tuple = builder.create<fir::InsertValueOp>(
+      loc, tupleType, tuple, addr,
+      builder.getArrayAttr(
+          {builder.getIntegerAttr(builder.getIndexType(), 0)}));
+  tuple = builder.create<fir::InsertValueOp>(
+      loc, tupleType, tuple, len,
+      builder.getArrayAttr(
+          {builder.getIntegerAttr(builder.getIndexType(), 1)}));
+  return tuple;
+}
+
+bool fir::factory::isCharacterProcedureTuple(mlir::Type ty) {
+  mlir::TupleType tuple = ty.dyn_cast<mlir::TupleType>();
+  return tuple && tuple.size() == 2 &&
+         tuple.getType(0).isa<mlir::FunctionType>() &&
+         fir::isa_integer(tuple.getType(1));
+}
+
+mlir::Type
+fir::factory::getCharacterProcedureTupleType(mlir::Type funcPointerType) {
+  mlir::MLIRContext *context = funcPointerType.getContext();
+  mlir::Type lenType = mlir::IntegerType::get(context, 64);
+  return mlir::TupleType::get(context, {funcPointerType, lenType});
+}
diff --git a/flang/lib/Optimizer/CodeGen/CMakeLists.txt b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
index b6a63c8ee8b8b..04016c506ebc4 100644
--- a/flang/lib/Optimizer/CodeGen/CMakeLists.txt
+++ b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
@@ -6,12 +6,14 @@ add_flang_library(FIRCodeGen
   TargetRewrite.cpp
 
   DEPENDS
+  FIRBuilder
   FIRDialect
   FIRSupport
   FIROptCodeGenPassIncGen
   CGOpsIncGen
 
   LINK_LIBS
+  FIRBuilder
   FIRDialect
   FIRSupport
   MLIROpenMPToLLVM
diff --git a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
index f5616d21d1340..d4659c1150bcc 100644
--- a/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
+++ b/flang/lib/Optimizer/CodeGen/TargetRewrite.cpp
@@ -17,9 +17,11 @@
 #include "PassDetail.h"
 #include "Target.h"
 #include "flang/Lower/Todo.h"
+#include "flang/Optimizer/Builder/Character.h"
 #include "flang/Optimizer/CodeGen/CodeGen.h"
 #include "flang/Optimizer/Dialect/FIRDialect.h"
 #include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIROpsSupport.h"
 #include "flang/Optimizer/Dialect/FIRType.h"
 #include "flang/Optimizer/Support/FIRContext.h"
 #include "mlir/Transforms/DialectConversion.h"
@@ -42,7 +44,8 @@ struct FixupTy {
     ReturnAsStore,
     ReturnType,
     Split,
-    Trailing
+    Trailing,
+    TrailingCharProc
   };
 
   FixupTy(Codes code, std::size_t index, std::size_t second = 0)
@@ -266,6 +269,41 @@ class TargetRewrite : public TargetRewriteBase<TargetRewrite> {
           .template Case<mlir::ComplexType>([&](mlir::ComplexType cmplx) {
             rewriteCallComplexInputType(cmplx, oper, newInTys, newOpers);
           })
+          .template Case<mlir::TupleType>([&](mlir::TupleType tuple) {
+            if (factory::isCharacterProcedureTuple(tuple)) {
+              mlir::ModuleOp module = getModule();
+              if constexpr (std::is_same_v<std::decay_t<A>, fir::CallOp>) {
+                if (callOp.callee()) {
+                  llvm::StringRef charProcAttr =
+                      fir::getCharacterProcedureDummyAttrName();
+                  // The charProcAttr attribute is only used as a safety to
+                  // confirm that this is a dummy procedure and should be split.
+                  // It cannot be used to match because attributes are not
+                  // available in case of indirect calls.
+                  auto funcOp =
+                      module.lookupSymbol<mlir::FuncOp>(*callOp.callee());
+                  if (funcOp &&
+                      !funcOp.template getArgAttrOfType<mlir::UnitAttr>(
+                          index, charProcAttr))
+                    mlir::emitError(loc, "tuple argument will be split even "
+                                         "though it does not have the `" +
+                                             charProcAttr + "` attribute");
+                }
+              }
+              mlir::Type funcPointerType = tuple.getType(0);
+              mlir::Type lenType = tuple.getType(1);
+              FirOpBuilder builder(*rewriter, getKindMapping(module));
+              auto [funcPointer, len] =
+                  factory::extractCharacterProcedureTuple(builder, loc, oper);
+              newInTys.push_back(funcPointerType);
+              newOpers.push_back(funcPointer);
+              trailingInTys.push_back(lenType);
+              trailingOpers.push_back(len);
+            } else {
+              newInTys.push_back(tuple);
+              newOpers.push_back(oper);
+            }
+          })
           .Default([&](mlir::Type ty) {
             newInTys.push_back(ty);
             newOpers.push_back(oper);
@@ -360,6 +398,14 @@ class TargetRewrite : public TargetRewriteBase<TargetRewrite> {
           .Case<mlir::ComplexType>([&](mlir::ComplexType ty) {
             lowerComplexSignatureArg(ty, newInTys);
           })
+          .Case<mlir::TupleType>([&](mlir::TupleType tuple) {
+            if (factory::isCharacterProcedureTuple(tuple)) {
+              newInTys.push_back(tuple.getType(0));
+              trailingInTys.push_back(tuple.getType(1));
+            } else {
+              newInTys.push_back(ty);
+            }
+          })
           .Default([&](mlir::Type ty) { newInTys.push_back(ty); });
     }
     // append trailing input types
@@ -394,7 +440,8 @@ class TargetRewrite : public TargetRewriteBase<TargetRewrite> {
         return false;
       }
     for (auto ty : func.getInputs())
-      if ((ty.isa<BoxCharType>() && !noCharacterConversion) ||
+      if (((ty.isa<BoxCharType>() || factory::isCharacterProcedureTuple(ty)) &&
+           !noCharacterConversion) ||
           (isa_complex(ty) && !noComplexConversion)) {
         LLVM_DEBUG(llvm::dbgs() << "rewrite " << signature << " for target\n");
         return false;
@@ -476,6 +523,16 @@ class TargetRewrite : public TargetRewriteBase<TargetRewrite> {
             else
               doComplexArg(func, cmplx, newInTys, fixups);
           })
+          .Case<mlir::TupleType>([&](mlir::TupleType tuple) {
+            if (factory::isCharacterProcedureTuple(tuple)) {
+              fixups.emplace_back(FixupTy::Codes::TrailingCharProc,
+                                  newInTys.size(), trailingTys.size());
+              newInTys.push_back(tuple.getType(0));
+              trailingTys.push_back(tuple.getType(1));
+            } else {
+              newInTys.push_back(ty);
+            }
+          })
           .Default([&](mlir::Type ty) { newInTys.push_back(ty); });
     }
 
@@ -604,6 +661,23 @@ class TargetRewrite : public TargetRewriteBase<TargetRewrite> {
           func.getArgument(fixup.index + 1).replaceAllUsesWith(box);
           func.front().eraseArgument(fixup.index + 1);
         } break;
+        case FixupTy::Codes::TrailingCharProc: {
+          // The FIR character procedure argument tuple has been split into a
+          // pair of distinct arguments. The first part of the pair appears in
+          // the original argument position. The second part of the pair is
+          // appended after all the original arguments.
+          auto newProcPointerArg = func.front().insertArgument(
+              fixup.index, newInTys[fixup.index], loc);
+          auto newLenArg =
+              func.front().addArgument(trailingTys[fixup.second], loc);
+          auto tupleType = oldArgTys[fixup.index - offset];
+          rewriter->setInsertionPointToStart(&func.front());
+          FirOpBuilder builder(*rewriter, getKindMapping(getModule()));
+          auto tuple = factory::createCharacterProcedureTuple(
+              builder, loc, tupleType, newProcPointerArg, newLenArg);
+          func.getArgument(fixup.index + 1).replaceAllUsesWith(tuple);
+          func.front().eraseArgument(fixup.index + 1);
+        } break;
         }
       }
     }
diff --git a/flang/test/Fir/target-rewrite-char-proc.fir b/flang/test/Fir/target-rewrite-char-proc.fir
new file mode 100644
index 0000000000000..cb3e68e4aa950
--- /dev/null
+++ b/flang/test/Fir/target-rewrite-char-proc.fir
@@ -0,0 +1,69 @@
+// Test rewrite of character procedure pointer tuple argument to two different
+// arguments: one for the function address, and one for the length. The length
+// argument is added after other characters.
+// RUN: fir-opt --target-rewrite="target=x86_64-unknown-linux-gnu" %s | FileCheck %s
+
+// CHECK:  func private @takes_char_proc(() -> () {fir.char_proc}, i64)
+func private @takes_char_proc(tuple<() -> (), i64> {fir.char_proc})
+
+func private @takes_char(!fir.boxchar<1>)
+func private @char_proc(!fir.ref<!fir.char<1,7>>, index) -> !fir.boxchar<1>
+
+func @_QPcst_len() {
+  %0 = fir.address_of(@char_proc) : (!fir.ref<!fir.char<1,7>>, index) -> !fir.boxchar<1>
+  %c7_i64 = arith.constant 7 : i64
+  %1 = fir.convert %0 : ((!fir.ref<!fir.char<1,7>>, index) -> !fir.boxchar<1>) -> (() -> ())
+  %2 = fir.undefined tuple<() -> (), i64>
+  %3 = fir.insert_value %2, %1, [0 : index] : (tuple<() -> (), i64>, () -> ()) -> tuple<() -> (), i64>
+  %4 = fir.insert_value %3, %c7_i64, [1 : index] : (tuple<() -> (), i64>, i64) -> tuple<() -> (), i64>
+
+  // CHECK:  %[[PROC_ADDR:.*]] = fir.extract_value %{{.*}}, [0 : index] : (tuple<() -> (), i64>) -> (() -> ())
+  // CHECK:  %[[LEN:.*]] = fir.extract_value %{{.*}}, [1 : index] : (tuple<() -> (), i64>) -> i64
+  // CHECK:  fir.call @takes_char_proc(%[[PROC_ADDR]], %[[LEN]]) : (() -> (), i64) -> ()
+  fir.call @takes_char_proc(%4) : (tuple<() -> (), i64>) -> ()
+  return
+}
+
+// CHECK:  func @test_dummy_proc_that_takes_dummy_char_proc(
+// CHECK-SAME: %[[ARG0:.*]]: () -> ()) {
+func @test_dummy_proc_that_takes_dummy_char_proc(%arg0: () -> ()) {
+  %0 = fir.address_of(@char_proc) : (!fir.ref<!fir.char<1,7>>, index) -> !fir.boxchar<1>
+  %c7_i64 = arith.constant 7 : i64
+  %1 = fir.convert %0 : ((!fir.ref<!fir.char<1,7>>, index) -> !fir.boxchar<1>) -> (() -> ())
+  %2 = fir.undefined tuple<() -> (), i64>
+  %3 = fir.insert_value %2, %1, [0 : index] : (tuple<() -> (), i64>, () -> ()) -> tuple<() -> (), i64>
+  %4 = fir.insert_value %3, %c7_i64, [1 : index] : (tuple<() -> (), i64>, i64) -> tuple<() -> (), i64>
+  %5 = fir.convert %arg0 : (() -> ()) -> ((tuple<() -> (), i64>) -> ())
+
+  // CHECK:  %[[ARG_CAST:.*]] = fir.convert %[[ARG0]] : (() -> ()) -> ((() -> (), i64) -> ())
+  // CHECK:  %[[PROC_ADDR:.*]] = fir.extract_value %4, [0 : index] : (tuple<() -> (), i64>) -> (() -> ())
+  // CHECK:  %[[PROC_LEN:.*]] = fir.extract_value %4, [1 : index] : (tuple<() -> (), i64>) -> i64
+  // CHECK: fir.call %[[ARG_CAST]](%[[PROC_ADDR]], %[[PROC_LEN]]) : (() -> (), i64) -> ()
+  fir.call %5(%4) : (tuple<() -> (), i64>) -> ()
+  return
+}
+
+// CHECK:  func @takes_dummy_char_proc_impl(
+// CHECK-SAME: %[[PROC_ADDR:.*]]: () -> () {fir.char_proc},
+// CHECK-SAME: %[[C_ADDR:.*]]: !fir.ref<!fir.char<1,?>>,
+// CHECK-SAME: %[[PROC_LEN:.*]]: i64,
+// CHECK-SAME: %[[C_LEN:.*]]: i64) {
+func @takes_dummy_char_proc_impl(%arg0: tuple<() -> (), i64> {fir.char_proc}, %arg1: !fir.boxchar<1>) {
+  // CHECK:    %[[UNDEF:.*]] = fir.undefined tuple<() -> (), i64>
+  // CHECK:    %[[TUPLE0:.*]] = fir.insert_value %[[UNDEF]], %[[PROC_ADDR]], [0 : index] : (tuple<() -> (), i64>, () -> ()) -> tuple<() -> (), i64>
+  // CHECK:    %[[TUPLE1:.*]] = fir.insert_value %[[TUPLE0]], %[[PROC_LEN]], [1 : index] : (tuple<() -> (), i64>, i64) -> tuple<() -> (), i64>
+  %0 = fir.alloca !fir.char<1,7> {bindc_name = ".result"}
+  %1:2 = fir.unboxchar %arg1 : (!fir.boxchar<1>) -> (!fir.ref<!fir.char<1,?>>, index)
+  %c5 = arith.constant 5 : index
+  %2 = fir.emboxchar %1#0, %c5 : (!fir.ref<!fir.char<1,?>>, index) -> !fir.boxchar<1>
+  %3 = fir.extract_value %arg0, [0 : index] : (tuple<() -> (), i64>) -> (() -> ())
+  %c7_i64 = arith.constant 7 : i64
+  %4 = fir.convert %c7_i64 : (i64) -> index
+  %6 = fir.convert %3 : (() -> ()) -> ((!fir.ref<!fir.char<1,7>>, index, !fir.boxchar<1>) -> !fir.boxchar<1>)
+  %7 = fir.call %6(%0, %4, %2) : (!fir.ref<!fir.char<1,7>>, index, !fir.boxchar<1>) -> !fir.boxchar<1>
+  %8 = fir.convert %0 : (!fir.ref<!fir.char<1,7>>) -> !fir.ref<!fir.char<1,?>>
+  %9 = fir.emboxchar %8, %4 : (!fir.ref<!fir.char<1,?>>, index) -> !fir.boxchar<1>
+  fir.call @takes_char(%9) : (!fir.boxchar<1>) -> ()
+  return
+}
+

From aa88b653927814052cdcb0ca22de52b043034568 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 18 Jan 2022 18:37:27 -0500
Subject: [PATCH 920/946] AMDGPU/GlobalISel: Fix assert on invalid cond code
 for llvm.amdgcn.icmp

---
 llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp  | 11 +++++++++++
 .../CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll     | 11 +++++++++++
 2 files changed, 22 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 4883b6a86ef8f..b7d0f0580cda0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1102,7 +1102,18 @@ bool AMDGPUInstructionSelector::selectIntrinsicIcmp(MachineInstr &I) const {
   const DebugLoc &DL = I.getDebugLoc();
   Register SrcReg = I.getOperand(2).getReg();
   unsigned Size = RBI.getSizeInBits(SrcReg, *MRI, TRI);
+
   auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(4).getImm());
+  if (!ICmpInst::isIntPredicate(static_cast<ICmpInst::Predicate>(Pred))) {
+    MachineInstr *ICmp =
+        BuildMI(*BB, &I, DL, TII.get(AMDGPU::IMPLICIT_DEF), Dst);
+
+    if (!RBI.constrainGenericRegister(ICmp->getOperand(0).getReg(),
+                                      *TRI.getBoolRC(), *MRI))
+      return false;
+    I.eraseFromParent();
+    return true;
+  }
 
   int Opcode = getV_CMPOpcode(Pred, Size);
   if (Opcode == -1)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
index f0eedf6154eb4..c8eb380ddfd64 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.icmp.ll
@@ -25,6 +25,17 @@ define amdgpu_ps void @test_intr_icmp_ne_i32(i32 addrspace(1)* %out, i32 %src) #
   store i32 %result, i32 addrspace(1)* %out
   ret void
 }
+
+define amdgpu_ps void @test_intr_icmp_i32_invalid_cc(i32 addrspace(1)* %out, i32 %src) #1 {
+; GCN-LABEL: test_intr_icmp_i32_invalid_cc:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    global_store_dword v[0:1], v0, off
+; GCN-NEXT:    s_endpgm
+  %result = call i32 @llvm.amdgcn.icmp.i32.i32(i32 %src, i32 100, i32 9999)
+  store i32 %result, i32 addrspace(1)* %out
+  ret void
+}
+
 declare i64 @llvm.amdgcn.icmp.i64.i32(i32, i32, i32)
 declare i32 @llvm.amdgcn.icmp.i32.i32(i32, i32, i32)
 attributes #0 = { "target-features"="+wavefrontsize64" }

From dad2b6e797df82c9ea5377394e09aacf6d824560 Mon Sep 17 00:00:00 2001
From: Yousuf Ali <yoali@ibm.com>
Date: Thu, 27 Jan 2022 09:57:04 -0500
Subject: [PATCH 921/946] [PowerPC][AIX] Support toc-data attribute for
 read-only globals.

The patch handles the addition of constant global variables to the table
of contents.

Differential Revision: https://reviews.llvm.org/D116181
---
 llvm/lib/MC/MCSectionXCOFF.cpp              |  6 +++--
 llvm/test/CodeGen/PowerPC/toc-data-const.ll | 26 +++++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/toc-data-const.ll

diff --git a/llvm/lib/MC/MCSectionXCOFF.cpp b/llvm/lib/MC/MCSectionXCOFF.cpp
index 7f7380bf810d9..2ff4839d3706f 100644
--- a/llvm/lib/MC/MCSectionXCOFF.cpp
+++ b/llvm/lib/MC/MCSectionXCOFF.cpp
@@ -34,7 +34,8 @@ void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
   }
 
   if (getKind().isReadOnly()) {
-    if (getMappingClass() != XCOFF::XMC_RO)
+    if (getMappingClass() != XCOFF::XMC_RO &&
+        getMappingClass() != XCOFF::XMC_TD)
       report_fatal_error("Unhandled storage-mapping class for .rodata csect.");
     printCsectDirective(OS);
     return;
@@ -70,7 +71,8 @@ void MCSectionXCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, const Triple &T,
   }
 
   if (isCsect() && getMappingClass() == XCOFF::XMC_TD) {
-    assert((getKind().isBSSExtern() || getKind().isBSSLocal()) &&
+    assert((getKind().isBSSExtern() || getKind().isBSSLocal() ||
+            getKind().isReadOnlyWithRel()) &&
            "Unexepected section kind for toc-data");
     printCsectDirective(OS);
     return;
diff --git a/llvm/test/CodeGen/PowerPC/toc-data-const.ll b/llvm/test/CodeGen/PowerPC/toc-data-const.ll
new file mode 100644
index 0000000000000..2b87480d101c2
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/toc-data-const.ll
@@ -0,0 +1,26 @@
+; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck %s --check-prefix CHECK
+; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck %s --check-prefix CHECK
+
+@i1 = external constant i32 #0
+@i2 = constant i32* @i1 #0
+
+define i32 @read() {
+  %1  = load i32, i32* @i1, align 4
+  ret i32 %1
+}
+
+define i32** @retptr() {
+  ret i32** @i2
+}
+
+; CHECK:       .read:
+; CHECK:        la 3, i1[TD](2)
+
+; CHECK:       .retptr:
+; CHECK:        la 3, i2[TD](2)
+
+; CHECK-DAG:   .toc
+; CHECK:         .extern i1[TD]
+; CHECK:         .csect i2[TD]
+
+attributes #0 = { "toc-data" }

From 0902eb30ad714da3ed6c6a744337c9b52427f366 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Tue, 18 Jan 2022 12:10:14 -0500
Subject: [PATCH 922/946] [libc++] Fix common_iterator for output_iterators

We were missing a constraint in common_iterator's iterator_traits and
we were eagerly instantiating iter_value_t even when invalid.

Thanks to Casey Carter for finding this bug.

Differential Revision: https://reviews.llvm.org/D117449
---
 libcxx/include/__iterator/common_iterator.h   | 14 +++----
 .../iterator_traits.compile.pass.cpp          | 37 ++++++++++++++++---
 .../iterators.common/plus_plus.pass.cpp       | 37 +++++++++++++++++--
 .../predef.iterators/iterators.common/types.h | 24 ------------
 4 files changed, 72 insertions(+), 40 deletions(-)

diff --git a/libcxx/include/__iterator/common_iterator.h b/libcxx/include/__iterator/common_iterator.h
index df4c7bd18e8d2..605071d70928e 100644
--- a/libcxx/include/__iterator/common_iterator.h
+++ b/libcxx/include/__iterator/common_iterator.h
@@ -29,6 +29,11 @@ _LIBCPP_BEGIN_NAMESPACE_STD
 
 #if !defined(_LIBCPP_HAS_NO_RANGES)
 
+template<class _Iter>
+concept __can_use_postfix_proxy =
+  constructible_from<iter_value_t<_Iter>, iter_reference_t<_Iter>> &&
+  move_constructible<iter_value_t<_Iter>>;
+
 template<input_or_output_iterator _Iter, sentinel_for<_Iter> _Sent>
   requires (!same_as<_Iter, _Sent> && copyable<_Iter>)
 class common_iterator {
@@ -54,10 +59,6 @@ class common_iterator {
       : __value(_VSTD::forward<iter_reference_t<_Iter>>(__x)) {}
 
   public:
-    constexpr static bool __valid_for_iter =
-      constructible_from<iter_value_t<_Iter>, iter_reference_t<_Iter>> &&
-      move_constructible<iter_value_t<_Iter>>;
-
     constexpr const iter_value_t<_Iter>& operator*() const noexcept {
       return __value;
     }
@@ -148,7 +149,7 @@ class common_iterator {
       ++*this;
       return __tmp;
     } else if constexpr (requires (_Iter& __i) { { *__i++ } -> __referenceable; } ||
-                         !__postfix_proxy::__valid_for_iter) {
+                         !__can_use_postfix_proxy<_Iter>) {
       return _VSTD::__unchecked_get<_Iter>(__hold_)++;
     } else {
       __postfix_proxy __p(**this);
@@ -261,7 +262,7 @@ struct __arrow_type_or_void<_Iter, _Sent> {
     using type = decltype(declval<const common_iterator<_Iter, _Sent>&>().operator->());
 };
 
-template<class _Iter, class _Sent>
+template<input_iterator _Iter, class _Sent>
 struct iterator_traits<common_iterator<_Iter, _Sent>> {
   using iterator_concept = _If<forward_iterator<_Iter>,
                                forward_iterator_tag,
@@ -275,7 +276,6 @@ struct iterator_traits<common_iterator<_Iter, _Sent>> {
   using reference = iter_reference_t<_Iter>;
 };
 
-
 #endif // !defined(_LIBCPP_HAS_NO_RANGES)
 
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/iterator_traits.compile.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/iterator_traits.compile.pass.cpp
index 384c8c7491f25..c55285b798f30 100644
--- a/libcxx/test/std/iterators/predef.iterators/iterators.common/iterator_traits.compile.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/iterator_traits.compile.pass.cpp
@@ -14,9 +14,23 @@
 
 #include <iterator>
 
+#include <cstddef>
+#include "test_iterators.h"
 #include "test_macros.h"
 #include "types.h"
 
+template<class T>
+concept HasIteratorConcept = requires { typename T::iterator_concept; };
+
+struct NonVoidOutputIterator {
+    using value_type = int;
+    using difference_type = std::ptrdiff_t;
+    const NonVoidOutputIterator& operator*() const;
+    NonVoidOutputIterator& operator++();
+    NonVoidOutputIterator& operator++(int);
+    void operator=(int) const;
+};
+
 void test() {
   {
     using Iter = simple_iterator<int*>;
@@ -43,17 +57,30 @@ void test() {
     static_assert(!std::same_as<IterTraits::pointer, int*>);
     static_assert(std::same_as<IterTraits::reference, int>);
   }
+  // Test with an output_iterator that has a void value_type
   {
-    using Iter = non_const_deref_iterator<int*>;
+    using Iter = output_iterator<int*>;
     using CommonIter = std::common_iterator<Iter, sentinel_type<int*>>;
     using IterTraits = std::iterator_traits<CommonIter>;
 
-    static_assert(std::same_as<IterTraits::iterator_concept, std::input_iterator_tag>);
-    static_assert(std::same_as<IterTraits::iterator_category, std::input_iterator_tag>);
-    static_assert(std::same_as<IterTraits::value_type, int>);
+    static_assert(!HasIteratorConcept<IterTraits>);
+    static_assert(std::same_as<IterTraits::iterator_category, std::output_iterator_tag>);
+    static_assert(std::same_as<IterTraits::value_type, void>);
     static_assert(std::same_as<IterTraits::difference_type, std::ptrdiff_t>);
     static_assert(std::same_as<IterTraits::pointer, void>);
-    static_assert(std::same_as<IterTraits::reference, int&>);
+    static_assert(std::same_as<IterTraits::reference, void>);
+  }
+  // Test with an output_iterator that has a non-void value_type
+  {
+    using CommonIter = std::common_iterator<NonVoidOutputIterator, sentinel_wrapper<NonVoidOutputIterator>>;
+    using IterTraits = std::iterator_traits<CommonIter>;
+
+    static_assert(!HasIteratorConcept<IterTraits>);
+    static_assert(std::same_as<IterTraits::iterator_category, std::output_iterator_tag>);
+    static_assert(std::same_as<IterTraits::value_type, void>);
+    static_assert(std::same_as<IterTraits::difference_type, std::ptrdiff_t>);
+    static_assert(std::same_as<IterTraits::pointer, void>);
+    static_assert(std::same_as<IterTraits::reference, void>);
   }
   {
     using Iter = cpp17_input_iterator<int*>;
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/plus_plus.pass.cpp b/libcxx/test/std/iterators/predef.iterators/iterators.common/plus_plus.pass.cpp
index f3c1b77ec15ad..257c7ff5e89ef 100644
--- a/libcxx/test/std/iterators/predef.iterators/iterators.common/plus_plus.pass.cpp
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/plus_plus.pass.cpp
@@ -21,11 +21,10 @@
 struct Incomplete;
 
 void test() {
-  int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
-
   // Reference: http://eel.is/c++draft/iterators.common#common.iter.nav-5
   // Case 2: can-reference
   {
+    int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
     auto iter1 = simple_iterator<int*>(buffer);
     auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
     auto commonSent1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(sentinel_type<int*>{buffer + 8});
@@ -43,6 +42,7 @@ void test() {
 
   // Case 2: can-reference
   {
+    int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
     auto iter1 = value_iterator<int*>(buffer);
     auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
     auto commonSent1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(sentinel_type<int*>{buffer + 8});
@@ -60,6 +60,7 @@ void test() {
 
   // Case 3: postfix-proxy
   {
+    int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
     auto iter1 = void_plus_plus_iterator<int*>(buffer);
     auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
     auto commonSent1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(sentinel_type<int*>{buffer + 8});
@@ -77,13 +78,13 @@ void test() {
 
   // Case 2: where this is not referencable or move constructible
   {
+    int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
     auto iter1 = value_type_not_move_constructible_iterator<int*>(buffer);
     auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
     auto commonSent1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(sentinel_type<int*>{buffer + 8});
 
     commonIter1++;
-    // Note: postfix operator++ returns void.
-    // assert(*(commonIter1++) == 1);
+    ASSERT_SAME_TYPE(decltype(commonIter1++), void);
     assert(*commonIter1 == 2);
     assert(*(++commonIter1) == 3);
     assert(*commonIter1 == 3);
@@ -97,6 +98,7 @@ void test() {
 
   // Case 2: can-reference
   {
+    int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
     auto iter1 = cpp17_input_iterator<int*>(buffer);
     auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
     auto commonSent1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(sentinel_type<int*>{buffer + 8});
@@ -114,6 +116,7 @@ void test() {
 
   // Case 1: forward_iterator
   {
+    int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
     auto iter1 = forward_iterator<int*>(buffer);
     auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
     auto commonSent1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(sentinel_type<int*>{buffer + 8});
@@ -131,6 +134,7 @@ void test() {
 
   // Case 1: forward_iterator
   {
+    int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8};
     auto iter1 = random_access_iterator<int*>(buffer);
     auto commonIter1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(iter1);
     auto commonSent1 = std::common_iterator<decltype(iter1), sentinel_type<int*>>(sentinel_type<int*>{buffer + 8});
@@ -145,6 +149,31 @@ void test() {
     }
     assert(commonIter1 == commonSent1);
   }
+
+  // Increment a common_iterator<output_iterator>: iter_value_t is not always valid for
+  // output iterators (it isn't for our test output_iterator). This is worth testing
+  // because it gets tricky when we define operator++(int).
+  {
+    int buffer[] = {0, 1, 2, 3, 4};
+    using Common = std::common_iterator<output_iterator<int*>, sentinel_type<int*>>;
+    auto iter = Common(output_iterator<int*>(buffer));
+    auto sent = Common(sentinel_type<int*>{buffer + 5});
+
+    *iter++ = 90;
+    assert(buffer[0] == 90);
+
+    *iter = 91;
+    assert(buffer[1] == 91);
+
+    *++iter = 92;
+    assert(buffer[2] == 92);
+
+    iter++;
+    iter++;
+    assert(iter != sent);
+    iter++;
+    assert(iter == sent);
+  }
 }
 
 int main(int, char**) {
diff --git a/libcxx/test/std/iterators/predef.iterators/iterators.common/types.h b/libcxx/test/std/iterators/predef.iterators/iterators.common/types.h
index 03b94f63f631e..76981c1e1a01e 100644
--- a/libcxx/test/std/iterators/predef.iterators/iterators.common/types.h
+++ b/libcxx/test/std/iterators/predef.iterators/iterators.common/types.h
@@ -157,30 +157,6 @@ class comparable_iterator
     }
 };
 
-template <class It>
-class non_const_deref_iterator
-{
-    It it_;
-
-public:
-    typedef          std::input_iterator_tag                   iterator_category;
-    typedef typename std::iterator_traits<It>::value_type      value_type;
-    typedef typename std::iterator_traits<It>::difference_type difference_type;
-    typedef It                                                 pointer;
-    typedef typename std::iterator_traits<It>::reference       reference;
-
-    constexpr It base() const {return it_;}
-
-    non_const_deref_iterator() = default;
-    explicit constexpr non_const_deref_iterator(It it) : it_(it) {}
-
-    constexpr reference operator*() {return *it_;} // Note: non-const.
-
-    constexpr non_const_deref_iterator& operator++() {++it_; return *this;}
-    constexpr non_const_deref_iterator operator++(int)
-        {non_const_deref_iterator tmp(*this); ++(*this); return tmp;}
-};
-
 template<class T>
 struct sentinel_type {
   T base;

From d27cbfa9d366c2f6620770f514f612aa1bb0ecb6 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Thu, 13 Jan 2022 12:16:30 -0500
Subject: [PATCH 923/946] [libc++] Fix bug in ranges::advance

In `ranges::advance(iter, n, bound)`, we'd incorrectly handle the case
where bound < iter and n is 0:

    int a[10];
    int *p = a+5;
    int *bound = a+3;
    std::ranges::advance(p, 0, bound);
    assert(p - a == 5); // we'd return 3 before this patch

This was caused by an incorrect handling of 0 inside __magnitude_geq.

Differential Revision: https://reviews.llvm.org/D117240
---
 libcxx/include/__iterator/advance.h           | 12 ++--
 .../iterator_count_sentinel.pass.cpp          | 59 +++++++++++++++++--
 .../iterator_count_sentinel.pass.cpp          |  3 -
 3 files changed, 59 insertions(+), 15 deletions(-)

diff --git a/libcxx/include/__iterator/advance.h b/libcxx/include/__iterator/advance.h
index 831f88f462744..03418979ddbd0 100644
--- a/libcxx/include/__iterator/advance.h
+++ b/libcxx/include/__iterator/advance.h
@@ -73,12 +73,6 @@ namespace __advance {
 
 struct __fn {
 private:
-  template <class _Tp>
-  _LIBCPP_HIDE_FROM_ABI
-  static constexpr _Tp __magnitude_geq(_Tp __a, _Tp __b) noexcept {
-    return __a < 0 ? (__a <= __b) : (__a >= __b);
-  }
-
   template <class _Ip>
   _LIBCPP_HIDE_FROM_ABI
   static constexpr void __advance_forward(_Ip& __i, iter_difference_t<_Ip> __n) {
@@ -155,6 +149,12 @@ struct __fn {
     // If `S` and `I` model `sized_sentinel_for<S, I>`:
     if constexpr (sized_sentinel_for<_Sp, _Ip>) {
       // If |n| >= |bound - i|, equivalent to `ranges::advance(i, bound)`.
+      // __magnitude_geq(a, b) returns |a| >= |b|, assuming they have the same sign.
+      auto __magnitude_geq = [](auto __a, auto __b) {
+        return __a == 0 ? __b == 0 :
+               __a > 0  ? __a >= __b :
+                          __a <= __b;
+      };
       if (const auto __M = __bound - __i; __magnitude_geq(__n, __M)) {
         (*this)(__i, __bound);
         return __n - __M;
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp
index 104e912e3c357..75f12802af7f5 100644
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp
@@ -119,6 +119,54 @@ static_assert(std::sized_sentinel_for<iota_iterator, iota_iterator>);
 constexpr bool test() {
   int range[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
 
+  // Basic functionality test: advance forward, bound has the same type
+  {
+    int *p;
+    p = range+5; assert(std::ranges::advance(p, 0, range+7) == 0); assert(p == range+5);
+    p = range+5; assert(std::ranges::advance(p, 1, range+7) == 0); assert(p == range+6);
+    p = range+5; assert(std::ranges::advance(p, 2, range+7) == 0); assert(p == range+7);
+    p = range+5; assert(std::ranges::advance(p, 3, range+7) == 1); assert(p == range+7);
+  }
+
+  // Basic functionality test: advance forward, bound is not the same type and not assignable
+  {
+    int *p;
+    using ConstPtr = const int*;
+    p = range+5; assert(std::ranges::advance(p, 0, ConstPtr(range+7)) == 0); assert(p == range+5);
+    p = range+5; assert(std::ranges::advance(p, 1, ConstPtr(range+7)) == 0); assert(p == range+6);
+    p = range+5; assert(std::ranges::advance(p, 2, ConstPtr(range+7)) == 0); assert(p == range+7);
+    p = range+5; assert(std::ranges::advance(p, 3, ConstPtr(range+7)) == 1); assert(p == range+7);
+  }
+
+  // Basic functionality test: advance forward, bound has different type but assignable
+  {
+    const int *pc;
+    pc = range+5; assert(std::ranges::advance(pc, 0, range+7) == 0); assert(pc == range+5);
+    pc = range+5; assert(std::ranges::advance(pc, 1, range+7) == 0); assert(pc == range+6);
+    pc = range+5; assert(std::ranges::advance(pc, 2, range+7) == 0); assert(pc == range+7);
+    pc = range+5; assert(std::ranges::advance(pc, 3, range+7) == 1); assert(pc == range+7);
+  }
+
+  // Basic functionality test: advance backward, bound has the same type
+  // Note that we don't test advancing backward with a bound of a different type because that's UB
+  {
+    int *p;
+    p = range+5; assert(std::ranges::advance(p, 0, range+3) == 0); assert(p == range+5);
+    p = range+5; assert(std::ranges::advance(p, -1, range+3) == 0); assert(p == range+4);
+    p = range+5; assert(std::ranges::advance(p, -2, range+3) == 0); assert(p == range+3);
+    p = range+5; assert(std::ranges::advance(p, -3, range+3) == -1); assert(p == range+3);
+  }
+
+  // Basic functionality test: advance backward with an array as a sentinel
+  {
+    int* p;
+    p = range+5; assert(std::ranges::advance(p,  0, range) == 0);  assert(p == range+5);
+    p = range+5; assert(std::ranges::advance(p, -1, range) == 0);  assert(p == range+4);
+    p = range+5; assert(std::ranges::advance(p, -5, range) == 0);  assert(p == range);
+    p = range+5; assert(std::ranges::advance(p, -6, range) == -1); assert(p == range);
+  }
+
+  // Exhaustive checks for n and range sizes
   for (int size = 0; size != 10; ++size) {
     for (int n = 0; n != 20; ++n) {
 
@@ -145,16 +193,15 @@ constexpr bool test() {
         // Note that we can only test ranges::advance with a negative n for iterators that
         // are sized sentinels for themselves, because ranges::advance is UB otherwise.
         // In particular, that excludes bidirectional_iterators since those are not sized sentinels.
-        // TODO: Enable these tests once we fix the bug in ranges::advance
-        // int* expected = n > size ? range : range + size - n;
-        // check_backward<random_access_iterator<int*>>(range, range+size, -n, expected);
-        // check_backward<contiguous_iterator<int*>>(   range, range+size, -n, expected);
-        // check_backward<int*>(                        range, range+size, -n, expected);
+        int* expected = n > size ? range : range + size - n;
+        check_backward<random_access_iterator<int*>>(range, range+size, -n, expected);
+        check_backward<contiguous_iterator<int*>>(   range, range+size, -n, expected);
+        check_backward<int*>(                        range, range+size, -n, expected);
       }
     }
   }
 
-  // regression-test that INT_MIN doesn't cause any undefined behavior
+  // Regression-test that INT_MIN doesn't cause any undefined behavior
   {
     auto i = iota_iterator{+1};
     assert(std::ranges::advance(i, INT_MIN, iota_iterator{-2}) == INT_MIN+3);
diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.prev/iterator_count_sentinel.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.prev/iterator_count_sentinel.pass.cpp
index d998f0ddb90c0..253194605fcfb 100644
--- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.prev/iterator_count_sentinel.pass.cpp
+++ b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.prev/iterator_count_sentinel.pass.cpp
@@ -28,9 +28,7 @@ constexpr void check(int* first, int* last, std::iter_difference_t<It> n, int* e
   assert(base(result) == expected);
 }
 
-// TODO: Re-enable once we fix the bug in ranges::advance
 constexpr bool test() {
-#if 0
   int range[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
 
   for (int size = 0; size != 10; ++size) {
@@ -42,7 +40,6 @@ constexpr bool test() {
       check<int*>(                        range, range+size, n, expected);
     }
   }
-#endif
 
   return true;
 }

From 89f03804fae720f4c4b717ca555f5d897d9b4900 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Wed, 26 Jan 2022 11:05:47 -0500
Subject: [PATCH 924/946] [libc++][NFC] Move some functions from
 directory_iterator.cpp to filesystem_common.h

This move is going to be needed in order to reuse `posix_readdir` in
another translation unit. This doesn't change any of the code except
for removing an unused function parameter that otherwise triggers a
warning inside our tests.

Differential Revision: https://reviews.llvm.org/D118254
---
 libcxx/src/filesystem/directory_iterator.cpp | 85 --------------------
 libcxx/src/filesystem/filesystem_common.h    | 82 ++++++++++++++++++-
 2 files changed, 79 insertions(+), 88 deletions(-)

diff --git a/libcxx/src/filesystem/directory_iterator.cpp b/libcxx/src/filesystem/directory_iterator.cpp
index 98928f06c868a..fa793f68295b6 100644
--- a/libcxx/src/filesystem/directory_iterator.cpp
+++ b/libcxx/src/filesystem/directory_iterator.cpp
@@ -9,97 +9,12 @@
 #include "__config"
 #include "filesystem"
 #include "stack"
-#if defined(_LIBCPP_WIN32API)
-#define WIN32_LEAN_AND_MEAN
-#define NOMINMAX
-#include <windows.h>
-#else
-#include <dirent.h>
-#endif
 #include <errno.h>
 
 #include "filesystem_common.h"
 
 _LIBCPP_BEGIN_NAMESPACE_FILESYSTEM
 
-namespace detail {
-namespace {
-
-#if !defined(_LIBCPP_WIN32API)
-
-#if defined(DT_BLK)
-template <class DirEntT, class = decltype(DirEntT::d_type)>
-static file_type get_file_type(DirEntT* ent, int) {
-  switch (ent->d_type) {
-  case DT_BLK:
-    return file_type::block;
-  case DT_CHR:
-    return file_type::character;
-  case DT_DIR:
-    return file_type::directory;
-  case DT_FIFO:
-    return file_type::fifo;
-  case DT_LNK:
-    return file_type::symlink;
-  case DT_REG:
-    return file_type::regular;
-  case DT_SOCK:
-    return file_type::socket;
-  // Unlike in lstat, hitting "unknown" here simply means that the underlying
-  // filesystem doesn't support d_type. Report is as 'none' so we correctly
-  // set the cache to empty.
-  case DT_UNKNOWN:
-    break;
-  }
-  return file_type::none;
-}
-#endif // defined(DT_BLK)
-
-template <class DirEntT>
-static file_type get_file_type(DirEntT* ent, long) {
-  return file_type::none;
-}
-
-static pair<string_view, file_type> posix_readdir(DIR* dir_stream,
-                                                  error_code& ec) {
-  struct dirent* dir_entry_ptr = nullptr;
-  errno = 0; // zero errno in order to detect errors
-  ec.clear();
-  if ((dir_entry_ptr = ::readdir(dir_stream)) == nullptr) {
-    if (errno)
-      ec = capture_errno();
-    return {};
-  } else {
-    return {dir_entry_ptr->d_name, get_file_type(dir_entry_ptr, 0)};
-  }
-}
-#else
-// defined(_LIBCPP_WIN32API)
-
-static file_type get_file_type(const WIN32_FIND_DATAW& data) {
-  if (data.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
-      data.dwReserved0 == IO_REPARSE_TAG_SYMLINK)
-    return file_type::symlink;
-  if (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
-    return file_type::directory;
-  return file_type::regular;
-}
-static uintmax_t get_file_size(const WIN32_FIND_DATAW& data) {
-  return (static_cast<uint64_t>(data.nFileSizeHigh) << 32) + data.nFileSizeLow;
-}
-static file_time_type get_write_time(const WIN32_FIND_DATAW& data) {
-  ULARGE_INTEGER tmp;
-  const FILETIME& time = data.ftLastWriteTime;
-  tmp.u.LowPart = time.dwLowDateTime;
-  tmp.u.HighPart = time.dwHighDateTime;
-  return file_time_type(file_time_type::duration(tmp.QuadPart));
-}
-
-#endif
-
-} // namespace
-} // namespace detail
-
 using detail::ErrorHandler;
 
 #if defined(_LIBCPP_WIN32API)
diff --git a/libcxx/src/filesystem/filesystem_common.h b/libcxx/src/filesystem/filesystem_common.h
index ef4f02e6afb2e..99bba2760dcee 100644
--- a/libcxx/src/filesystem/filesystem_common.h
+++ b/libcxx/src/filesystem/filesystem_common.h
@@ -20,12 +20,19 @@
 #include "ratio"
 #include "system_error"
 
+#if defined(_LIBCPP_WIN32API)
+# define WIN32_LEAN_AND_MEAN
+# define NOMINMAX
+# include <windows.h>
+#endif
+
 #if !defined(_LIBCPP_WIN32API)
-# include <unistd.h>
+# include <dirent.h>   // for DIR & friends
+# include <fcntl.h>    /* values for fchmodat */
 # include <sys/stat.h>
 # include <sys/statvfs.h>
 # include <sys/time.h> // for ::utimes as used in __last_write_time
-# include <fcntl.h>    /* values for fchmodat */
+# include <unistd.h>
 #endif
 
 #include "../include/apple_availability.h"
@@ -527,7 +534,76 @@ bool set_file_times(const path& p, std::array<TimeSpec, 2> const& TS,
   return posix_utimensat(p, TS, ec);
 #endif
 }
-#endif /* !_LIBCPP_WIN32API */
+
+#if defined(DT_BLK)
+template <class DirEntT, class = decltype(DirEntT::d_type)>
+static file_type get_file_type(DirEntT* ent, int) {
+  switch (ent->d_type) {
+  case DT_BLK:
+    return file_type::block;
+  case DT_CHR:
+    return file_type::character;
+  case DT_DIR:
+    return file_type::directory;
+  case DT_FIFO:
+    return file_type::fifo;
+  case DT_LNK:
+    return file_type::symlink;
+  case DT_REG:
+    return file_type::regular;
+  case DT_SOCK:
+    return file_type::socket;
+  // Unlike in lstat, hitting "unknown" here simply means that the underlying
+  // filesystem doesn't support d_type. Report is as 'none' so we correctly
+  // set the cache to empty.
+  case DT_UNKNOWN:
+    break;
+  }
+  return file_type::none;
+}
+#endif // defined(DT_BLK)
+
+template <class DirEntT>
+static file_type get_file_type(DirEntT*, long) {
+  return file_type::none;
+}
+
+static pair<string_view, file_type> posix_readdir(DIR* dir_stream,
+                                                  error_code& ec) {
+  struct dirent* dir_entry_ptr = nullptr;
+  errno = 0; // zero errno in order to detect errors
+  ec.clear();
+  if ((dir_entry_ptr = ::readdir(dir_stream)) == nullptr) {
+    if (errno)
+      ec = capture_errno();
+    return {};
+  } else {
+    return {dir_entry_ptr->d_name, get_file_type(dir_entry_ptr, 0)};
+  }
+}
+
+#else // _LIBCPP_WIN32API
+
+static file_type get_file_type(const WIN32_FIND_DATAW& data) {
+  if (data.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT &&
+      data.dwReserved0 == IO_REPARSE_TAG_SYMLINK)
+    return file_type::symlink;
+  if (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
+    return file_type::directory;
+  return file_type::regular;
+}
+static uintmax_t get_file_size(const WIN32_FIND_DATAW& data) {
+  return (static_cast<uint64_t>(data.nFileSizeHigh) << 32) + data.nFileSizeLow;
+}
+static file_time_type get_write_time(const WIN32_FIND_DATAW& data) {
+  ULARGE_INTEGER tmp;
+  const FILETIME& time = data.ftLastWriteTime;
+  tmp.u.LowPart = time.dwLowDateTime;
+  tmp.u.HighPart = time.dwHighDateTime;
+  return file_time_type(file_time_type::duration(tmp.QuadPart));
+}
+
+#endif // !_LIBCPP_WIN32API
 
 } // namespace
 } // end namespace detail

From af1c8f0d142179826197f22c3880c980e6e47b3d Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Thu, 27 Jan 2022 13:01:07 +0000
Subject: [PATCH 925/946] [AArch64][SVE] Folds VSELECT if the predicate is all
 active.

This adds the following changes:

* Fold: vselect(<all active predicate>, x, y) => x
* Extend isAllActivePredicate to take vscale_range into account, e.g.
  isAllActivePredicate(vl16) for nxv16i1 and vscale == 1 => true.
  isAllActivePredicate(vl32) for nxv16i1 and vscale == 2 => true.

Differential Revision: https://reviews.llvm.org/D118147
---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    |  2 +-
 .../Target/AArch64/AArch64ISelLowering.cpp    | 40 +++++++++++--
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |  2 +-
 llvm/test/CodeGen/AArch64/sve-vselect-fold.ll | 60 +++++++++++++++++++
 4 files changed, 97 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sve-vselect-fold.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index fe9b2f8883b9d..899f069abdd4b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -5147,5 +5147,5 @@ bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
   const AArch64TargetLowering *TLI =
       static_cast<const AArch64TargetLowering *>(getTargetLowering());
 
-  return TLI->isAllActivePredicate(N);
+  return TLI->isAllActivePredicate(*CurDAG, N);
 }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b395231c69a75..a26bbc77f2482 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -15087,7 +15087,15 @@ static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
                      Zero);
 }
 
-static bool isAllActivePredicate(SDValue N) {
+static bool isAllInactivePredicate(SDValue N) {
+  // Look through cast.
+  while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
+    N = N.getOperand(0);
+
+  return N.getOpcode() == AArch64ISD::PFALSE;
+}
+
+static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
   unsigned NumElts = N.getValueType().getVectorMinNumElements();
 
   // Look through cast.
@@ -15106,6 +15114,21 @@ static bool isAllActivePredicate(SDValue N) {
       N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
     return N.getValueType().getVectorMinNumElements() >= NumElts;
 
+  // If we're compiling for a specific vector-length, we can check if the
+  // pattern's VL equals that of the scalable vector at runtime.
+  if (N.getOpcode() == AArch64ISD::PTRUE) {
+    const auto &Subtarget =
+        static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
+    unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
+    unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
+    if (MaxSVESize && MinSVESize == MaxSVESize) {
+      unsigned VScale = MaxSVESize / AArch64::SVEBitsPerBlock;
+      unsigned PatNumElts =
+          getNumElementsFromSVEPredPattern(N.getConstantOperandVal(0));
+      return PatNumElts == (NumElts * VScale);
+    }
+  }
+
   return false;
 }
 
@@ -15122,7 +15145,7 @@ static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
   SDValue Op2 = N->getOperand(SwapOperands ? 2 : 3);
 
   // ISD way to specify an all active predicate.
-  if (isAllActivePredicate(Pg)) {
+  if (isAllActivePredicate(DAG, Pg)) {
     if (UnpredOp)
       return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Op1, Op2);
 
@@ -16793,6 +16816,12 @@ static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
   SDValue N0 = N->getOperand(0);
   EVT CCVT = N0.getValueType();
 
+  if (isAllActivePredicate(DAG, N0))
+    return N->getOperand(1);
+
+  if (isAllInactivePredicate(N0))
+    return N->getOperand(2);
+
   // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
   // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
   // supported types.
@@ -19364,7 +19393,7 @@ SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
   default:
     return SDValue();
   case ISD::VECREDUCE_OR:
-    if (isAllActivePredicate(Pg))
+    if (isAllActivePredicate(DAG, Pg))
       // The predicate can be 'Op' because
       // vecreduce_or(Op & <all true>) <=> vecreduce_or(Op).
       return getPTest(DAG, VT, Op, Op, AArch64CC::ANY_ACTIVE);
@@ -19813,8 +19842,9 @@ SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
   return Op;
 }
 
-bool AArch64TargetLowering::isAllActivePredicate(SDValue N) const {
-  return ::isAllActivePredicate(N);
+bool AArch64TargetLowering::isAllActivePredicate(SelectionDAG &DAG,
+                                                 SDValue N) const {
+  return ::isAllActivePredicate(DAG, N);
 }
 
 EVT AArch64TargetLowering::getPromotedVTForPredicate(EVT VT) const {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 90b947dd814b2..ca6c70297c0b4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -844,7 +844,7 @@ class AArch64TargetLowering : public TargetLowering {
     return 128;
   }
 
-  bool isAllActivePredicate(SDValue N) const;
+  bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) const;
   EVT getPromotedVTForPredicate(EVT VT) const;
 
   EVT getAsmOperandValueType(const DataLayout &DL, Type *Ty,
diff --git a/llvm/test/CodeGen/AArch64/sve-vselect-fold.ll b/llvm/test/CodeGen/AArch64/sve-vselect-fold.ll
new file mode 100644
index 0000000000000..159b85d148b78
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-vselect-fold.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
+
+; Test that the select returns %true, because the predicate is all active.
+define <vscale x 4 x i32> @select_ptrue_fold_all_active(<vscale x 4 x i32> %false, <vscale x 4 x i32> %true) {
+; CHECK-LABEL: select_ptrue_fold_all_active:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %p = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+  %res = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %true, <vscale x 4 x i32> %false
+  ret <vscale x 4 x i32> %res
+}
+
+; Test that the select returns %true, because the predicate is all active for vscale_range(2, 2)
+define <vscale x 4 x i32> @select_ptrue_fold_vl8(<vscale x 4 x i32> %false, <vscale x 4 x i32> %true) vscale_range(2, 2) {
+; CHECK-LABEL: select_ptrue_fold_vl8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %p = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 8)
+  %res = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %true, <vscale x 4 x i32> %false
+  ret <vscale x 4 x i32> %res
+}
+
+define <vscale x 16 x i8> @select_ptrue_fold_all_inactive(<vscale x 16 x i8> %true, <vscale x 16 x i8> %false) {
+; CHECK-LABEL: select_ptrue_fold_all_inactive:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %p = call <vscale x 16 x  i1> @llvm.aarch64.sve.convert.from.svbool.nxv16i1(<vscale x 16 x i1> zeroinitializer)
+  %res = select <vscale x 16 x  i1> %p, <vscale x 16 x i8> %true, <vscale x 16 x i8> %false
+  ret <vscale x 16 x i8> %res
+}
+
+define <vscale x 4 x i32> @select_ptrue_fold_all_inactive_reinterpret(<vscale x 4 x i32> %true, <vscale x 4 x i32> %false) {
+; CHECK-LABEL: select_ptrue_fold_all_inactive_reinterpret:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, z1.d
+; CHECK-NEXT:    ret
+  %p = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> zeroinitializer)
+  %res = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %true, <vscale x 4 x i32> %false
+  ret <vscale x 4 x i32> %res
+}
+
+; Test that the select remains, because predicate is not all active (only half lanes are set for vscale_range(2, 2))
+define <vscale x 4 x i32> @select_ptrue_no_fold_vl4(<vscale x 4 x i32> %true, <vscale x 4 x i32> %false) vscale_range(2, 2) {
+; CHECK-LABEL: select_ptrue_no_fold_vl4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s, vl4
+; CHECK-NEXT:    sel z0.s, p0, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %p = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 4)
+  %res = select <vscale x 4 x i1> %p, <vscale x 4 x i32> %true, <vscale x 4 x i32> %false
+  ret <vscale x 4 x i32> %res
+}
+
+declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1>)
+declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv16i1(<vscale x 16 x i1>)

From d77c7c80d1166ce569b864d49fa4797d5a402fcf Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 26 Jan 2022 21:57:24 -0500
Subject: [PATCH 926/946] AMDGPU: Fix broken check lines in test

---
 llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll b/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
index 80dc38cef4213..65f0fcb1dc8bc 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
+++ b/llvm/test/CodeGen/AMDGPU/constant-address-space-32bit.ll
@@ -303,10 +303,10 @@ define amdgpu_vs float @load_addr_no_fold(i32 addrspace(6)* inreg noalias %p0) #
   ret float %r2
 }
 
-; CHECK-LABEL: {{^}}vgpr_arg_src:
-; CHECK: v_readfirstlane_b32 s[[READLANE:[0-9]+]], v0
-; CHECK: s_mov_b32 s[[ZERO:[0-9]+]]
-; CHECK: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[READLANE]]:[[ZERO]]{{\]}}
+; GCN-LABEL: {{^}}vgpr_arg_src:
+; GCN: v_readfirstlane_b32 s[[READLANE:[0-9]+]], v0
+; GCN: s_mov_b32 s[[ZERO:[0-9]+]]
+; GCN: s_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[READLANE]]:[[ZERO]]{{\]}}
 define amdgpu_vs float @vgpr_arg_src(<4 x i32> addrspace(6)* %arg) {
 main_body:
   %tmp9 = load <4 x i32>, <4 x i32> addrspace(6)* %arg

From 33b45ee44b1f32ffdbc995e6fec806271b4b3ba4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 26 Jan 2022 21:57:36 -0500
Subject: [PATCH 927/946] AMDGPU: Handle addrspacecast of constant 32-bit to
 flat

I accidentally made this work on the GlobalISel path, and there's no
real reason not to handle this.
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  9 ++++++++
 llvm/test/CodeGen/AMDGPU/addrspacecast.ll     | 23 +++++++++++++++++++
 .../CodeGen/AMDGPU/invalid-addrspacecast.ll   |  7 ------
 3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 47c9db6627e7b..561866b5a3985 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5632,6 +5632,15 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
     }
   }
 
+  if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
+      Op.getValueType() == MVT::i64) {
+    const SIMachineFunctionInfo *Info =
+        DAG.getMachineFunction().getInfo<SIMachineFunctionInfo>();
+    SDValue Hi = DAG.getConstant(Info->get32BitAddressHighBits(), SL, MVT::i32);
+    SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, Hi);
+    return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
+  }
+
   if (ASC->getDestAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT &&
       Src.getValueType() == MVT::i64)
     return DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src);
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
index 355158783b52a..b9095de7d1160 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast.ll
@@ -405,9 +405,32 @@ define amdgpu_kernel void @use_global_to_constant32_addrspacecast(i8 addrspace(1
   ret void
 }
 
+; GCN-LABEL: {{^}}use_constant32bit_to_flat_addrspacecast_0:
+; GCN: s_load_dword [[PTR:s[0-9]+]],
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
+; GCN: flat_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_0(i32 addrspace(6)* %ptr) #0 {
+  %stof = addrspacecast i32 addrspace(6)* %ptr to i32*
+  %load = load volatile i32, i32* %stof
+  ret void
+}
+
+; GCN-LABEL: {{^}}use_constant32bit_to_flat_addrspacecast_1:
+; GCN: s_load_dword [[PTR:s[0-9]+]],
+; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], 0xffff8000
+; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], [[PTR]]
+; GCN: flat_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
+define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast_1(i32 addrspace(6)* %ptr) #3 {
+  %stof = addrspacecast i32 addrspace(6)* %ptr to i32*
+  %load = load volatile i32, i32* %stof
+  ret void
+}
+
 declare void @llvm.amdgcn.s.barrier() #1
 declare i32 @llvm.amdgcn.workitem.id.x() #2
 
 attributes #0 = { nounwind }
 attributes #1 = { nounwind convergent }
 attributes #2 = { nounwind readnone }
+attributes #3 = { nounwind "amdgpu-32bit-address-high-bits"="0xffff8000" }
diff --git a/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll
index cec4eac481548..7f11242217968 100644
--- a/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll
@@ -7,13 +7,6 @@ define amdgpu_kernel void @use_group_to_global_addrspacecast(i32 addrspace(3)* %
   ret void
 }
 
-; ERROR: error: <unknown>:0:0: in function use_constant32bit_to_flat_addrspacecast void (i32 addrspace(6)*): invalid addrspacecast
-define amdgpu_kernel void @use_constant32bit_to_flat_addrspacecast(i32 addrspace(6)* %ptr) #0 {
-  %stof = addrspacecast i32 addrspace(6)* %ptr to i32*
-  store volatile i32 7, i32* %stof
-  ret void
-}
-
 ; ERROR: error: <unknown>:0:0: in function use_local_to_constant32bit_addrspacecast void (i32 addrspace(3)*): invalid addrspacecast
 define amdgpu_kernel void @use_local_to_constant32bit_addrspacecast(i32 addrspace(3)* %ptr) #0 {
   %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(6)*

From 5d3ab6a2bb36a371584b27ef3aa5c0d8d0bdc94e Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Mon, 24 Jan 2022 19:04:06 +0100
Subject: [PATCH 928/946] [libc++][nfc] Include test_macros.h in more tests.

This should fix the regressions detected in D117992.

This lands before D117992 to avoid breaking main.

Reviewed By: #libc, ldionne

Differential Revision: https://reviews.llvm.org/D118056
---
 .../concepts.lang/concept.common/common_with.compile.pass.cpp   | 2 ++
 .../concept.commonref/common_reference.compile.pass.cpp         | 2 ++
 .../concepts.lang/concepts.arithmetic/integral.pass.cpp         | 1 +
 .../concepts.lang/concepts.arithmetic/signed_integral.pass.cpp  | 1 +
 .../concepts.arithmetic/unsigned_integral.pass.cpp              | 1 +
 .../cmp/cmp.concept/three_way_comparable.compile.pass.cpp       | 1 +
 .../rand.req.urng/uniform_random_bit_generator.compile.pass.cpp | 2 ++
 7 files changed, 10 insertions(+)

diff --git a/libcxx/test/std/concepts/concepts.lang/concept.common/common_with.compile.pass.cpp b/libcxx/test/std/concepts/concepts.lang/concept.common/common_with.compile.pass.cpp
index 45c8f4825dc56..7bbf6865bf321 100644
--- a/libcxx/test/std/concepts/concepts.lang/concept.common/common_with.compile.pass.cpp
+++ b/libcxx/test/std/concepts/concepts.lang/concept.common/common_with.compile.pass.cpp
@@ -15,6 +15,8 @@
 #include <concepts>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T, class U>
 constexpr bool CheckCommonWith() noexcept {
   constexpr bool result = std::common_with<T, U>;
diff --git a/libcxx/test/std/concepts/concepts.lang/concept.commonref/common_reference.compile.pass.cpp b/libcxx/test/std/concepts/concepts.lang/concept.commonref/common_reference.compile.pass.cpp
index 6bd8906b0c0cc..70a674ea8524c 100644
--- a/libcxx/test/std/concepts/concepts.lang/concept.commonref/common_reference.compile.pass.cpp
+++ b/libcxx/test/std/concepts/concepts.lang/concept.commonref/common_reference.compile.pass.cpp
@@ -15,6 +15,8 @@
 #include <concepts>
 #include <type_traits>
 
+#include "test_macros.h"
+
 template <class T, class U>
 constexpr bool CheckCommonReferenceWith() noexcept {
   static_assert(std::common_reference_with<T, U&>);
diff --git a/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/integral.pass.cpp b/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/integral.pass.cpp
index 42b85a1c020bb..cb9c4353a8c75 100644
--- a/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/integral.pass.cpp
+++ b/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/integral.pass.cpp
@@ -16,6 +16,7 @@
 #include <type_traits>
 
 #include "arithmetic.h"
+#include "test_macros.h"
 
 template <typename T>
 constexpr bool CheckIntegralQualifiers() {
diff --git a/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/signed_integral.pass.cpp b/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/signed_integral.pass.cpp
index df97b39709dc9..4092ab9c2d1a1 100644
--- a/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/signed_integral.pass.cpp
+++ b/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/signed_integral.pass.cpp
@@ -16,6 +16,7 @@
 #include <type_traits>
 
 #include "arithmetic.h"
+#include "test_macros.h"
 
 template <typename T>
 constexpr bool CheckSignedIntegralQualifiers() {
diff --git a/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/unsigned_integral.pass.cpp b/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/unsigned_integral.pass.cpp
index 02a62865f2ddf..f390f452bbf2e 100644
--- a/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/unsigned_integral.pass.cpp
+++ b/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/unsigned_integral.pass.cpp
@@ -16,6 +16,7 @@
 #include <type_traits>
 
 #include "arithmetic.h"
+#include "test_macros.h"
 
 template <typename T>
 constexpr bool CheckUnsignedIntegralQualifiers() {
diff --git a/libcxx/test/std/language.support/cmp/cmp.concept/three_way_comparable.compile.pass.cpp b/libcxx/test/std/language.support/cmp/cmp.concept/three_way_comparable.compile.pass.cpp
index e9c1ae20d62f4..5ff5213d303be 100644
--- a/libcxx/test/std/language.support/cmp/cmp.concept/three_way_comparable.compile.pass.cpp
+++ b/libcxx/test/std/language.support/cmp/cmp.concept/three_way_comparable.compile.pass.cpp
@@ -15,6 +15,7 @@
 #include <compare>
 
 #include "compare_types.h"
+#include "test_macros.h"
 
 namespace fundamentals {
 // with default ordering
diff --git a/libcxx/test/std/numerics/rand/rand.req/rand.req.urng/uniform_random_bit_generator.compile.pass.cpp b/libcxx/test/std/numerics/rand/rand.req/rand.req.urng/uniform_random_bit_generator.compile.pass.cpp
index c212f9efb6ce3..77d2443aa715c 100644
--- a/libcxx/test/std/numerics/rand/rand.req/rand.req.urng/uniform_random_bit_generator.compile.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.req/rand.req.urng/uniform_random_bit_generator.compile.pass.cpp
@@ -14,6 +14,8 @@
 
 #include <random>
 
+#include "test_macros.h"
+
 static_assert(std::uniform_random_bit_generator<
               std::linear_congruential_engine<unsigned int, 0U, 1U, 2U> >);
 

From 8f972cb0fd0cf2513a1134a5f37b3c059376ca47 Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Sun, 23 Jan 2022 18:23:41 +0100
Subject: [PATCH 929/946] [libc++][nfc] Add TEST_HAS_NO_INT128.

Avoid using the libc++ internal `_LIBCPP_HAS_NO_INT128` in our tests.

Reviewed By: #libc, ldionne

Differential Revision: https://reviews.llvm.org/D117992
---
 libcxx/test/libcxx/algorithms/half_positive.pass.cpp      | 8 ++++----
 .../libcxx/numerics/numeric.ops/midpoint.integer.pass.cpp | 2 +-
 .../test/libcxx/type_traits/convert_to_integral.pass.cpp  | 2 +-
 .../format.arguments/format.arg/visit_format_arg.pass.cpp | 4 ++--
 .../format/format.arguments/format.args/get.pass.cpp      | 4 ++--
 .../concept.common/common_with.compile.pass.cpp           | 2 +-
 .../concept.commonref/common_reference.compile.pass.cpp   | 2 +-
 .../concepts.lang/concepts.arithmetic/integral.pass.cpp   | 2 +-
 .../concepts.arithmetic/signed_integral.pass.cpp          | 2 +-
 .../concepts.arithmetic/unsigned_integral.pass.cpp        | 2 +-
 .../cmp/cmp.concept/three_way_comparable.compile.pass.cpp | 2 +-
 .../support.limits/limits/is_specialized.pass.cpp         | 2 +-
 .../numeric.limits.members/const_data_members.pass.cpp    | 4 ++--
 .../limits/numeric.limits.members/denorm_min.pass.cpp     | 2 +-
 .../limits/numeric.limits.members/digits.pass.cpp         | 2 +-
 .../limits/numeric.limits.members/digits10.pass.cpp       | 2 +-
 .../limits/numeric.limits.members/epsilon.pass.cpp        | 2 +-
 .../limits/numeric.limits.members/has_denorm.pass.cpp     | 2 +-
 .../numeric.limits.members/has_denorm_loss.pass.cpp       | 2 +-
 .../limits/numeric.limits.members/has_infinity.pass.cpp   | 2 +-
 .../limits/numeric.limits.members/has_quiet_NaN.pass.cpp  | 2 +-
 .../numeric.limits.members/has_signaling_NaN.pass.cpp     | 2 +-
 .../limits/numeric.limits.members/infinity.pass.cpp       | 2 +-
 .../limits/numeric.limits.members/is_bounded.pass.cpp     | 2 +-
 .../limits/numeric.limits.members/is_exact.pass.cpp       | 2 +-
 .../limits/numeric.limits.members/is_iec559.pass.cpp      | 2 +-
 .../limits/numeric.limits.members/is_integer.pass.cpp     | 2 +-
 .../limits/numeric.limits.members/is_modulo.pass.cpp      | 2 +-
 .../limits/numeric.limits.members/is_signed.pass.cpp      | 2 +-
 .../limits/numeric.limits.members/lowest.pass.cpp         | 2 +-
 .../limits/numeric.limits.members/max.pass.cpp            | 2 +-
 .../limits/numeric.limits.members/max_digits10.pass.cpp   | 2 +-
 .../limits/numeric.limits.members/max_exponent.pass.cpp   | 2 +-
 .../limits/numeric.limits.members/max_exponent10.pass.cpp | 2 +-
 .../limits/numeric.limits.members/min.pass.cpp            | 2 +-
 .../limits/numeric.limits.members/min_exponent.pass.cpp   | 2 +-
 .../limits/numeric.limits.members/min_exponent10.pass.cpp | 2 +-
 .../limits/numeric.limits.members/quiet_NaN.pass.cpp      | 2 +-
 .../limits/numeric.limits.members/radix.pass.cpp          | 2 +-
 .../limits/numeric.limits.members/round_error.pass.cpp    | 2 +-
 .../limits/numeric.limits.members/round_style.pass.cpp    | 2 +-
 .../limits/numeric.limits.members/signaling_NaN.pass.cpp  | 2 +-
 .../numeric.limits.members/tinyness_before.pass.cpp       | 2 +-
 .../limits/numeric.limits.members/traps.pass.cpp          | 2 +-
 .../test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp   | 8 ++++----
 .../test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp  | 8 ++++----
 .../test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp  | 8 ++++----
 .../std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp  | 8 ++++----
 .../std/numerics/bit/bitops.count/countl_one.pass.cpp     | 8 ++++----
 .../std/numerics/bit/bitops.count/countl_zero.pass.cpp    | 8 ++++----
 .../std/numerics/bit/bitops.count/countr_one.pass.cpp     | 8 ++++----
 .../std/numerics/bit/bitops.count/countr_zero.pass.cpp    | 8 ++++----
 .../test/std/numerics/bit/bitops.count/popcount.pass.cpp  | 8 ++++----
 libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp     | 8 ++++----
 libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp     | 8 ++++----
 libcxx/test/std/numerics/bit/byteswap.pass.cpp            | 2 +-
 .../numeric.ops.midpoint/midpoint.integer.pass.cpp        | 2 +-
 .../rand.dist.uni/rand.dist.uni.int/eval.pass.cpp         | 2 +-
 .../rand.dist.uni/rand.dist.uni.int/int128.pass.cpp       | 4 ++--
 .../uniform_random_bit_generator.compile.pass.cpp         | 2 +-
 .../formatter.signed_integral.pass.cpp                    | 2 +-
 .../formatter.unsigned_integral.pass.cpp                  | 2 +-
 .../format.context/types.compile.pass.cpp                 | 2 +-
 .../std/utilities/format/format.functions/format_tests.h  | 8 ++++----
 .../function.objects/unord.hash/integral.pass.cpp         | 2 +-
 .../meta/meta.trans/meta.trans.sign/make_signed.pass.cpp  | 4 ++--
 .../meta.trans/meta.trans.sign/make_unsigned.pass.cpp     | 4 ++--
 .../meta/meta.unary/meta.unary.cat/integral.pass.cpp      | 2 +-
 .../meta/meta.unary/meta.unary.comp/integral.pass.cpp     | 2 +-
 .../meta/meta.unary/meta.unary.prop/is_signed.pass.cpp    | 2 +-
 .../meta/meta.unary/meta.unary.prop/is_unsigned.pass.cpp  | 2 +-
 .../utility.intcmp/intcmp.cmp_equal/cmp_equal.pass.cpp    | 2 +-
 .../intcmp.cmp_greater/cmp_greater.pass.cpp               | 2 +-
 .../intcmp.cmp_greater_equal/cmp_greater_equal.pass.cpp   | 2 +-
 .../utility.intcmp/intcmp.cmp_less/cmp_less.pass.cpp      | 2 +-
 .../intcmp.cmp_less_equal/cmp_less_equal.pass.cpp         | 2 +-
 .../intcmp.cmp_not_equal/cmp_not_equal.pass.cpp           | 2 +-
 .../utility.intcmp/intcmp.in_range/in_range.pass.cpp      | 2 +-
 libcxx/test/support/msvc_stdlib_force_include.h           | 3 ---
 libcxx/test/support/poisoned_hash_helper.h                | 2 +-
 libcxx/test/support/test_macros.h                         | 4 ++++
 81 files changed, 128 insertions(+), 127 deletions(-)

diff --git a/libcxx/test/libcxx/algorithms/half_positive.pass.cpp b/libcxx/test/libcxx/algorithms/half_positive.pass.cpp
index fd7d1413741c4..997679629dac6 100644
--- a/libcxx/test/libcxx/algorithms/half_positive.pass.cpp
+++ b/libcxx/test/libcxx/algorithms/half_positive.pass.cpp
@@ -36,9 +36,9 @@ int main(int, char**)
         assert(test<long>());
         assert((test<UserDefinedIntegral<int>, int>()));
         assert(test<size_t>());
-#if !defined(_LIBCPP_HAS_NO_INT128)
+#if !defined(TEST_HAS_NO_INT128)
         assert(test<__int128_t>());
-#endif // !defined(_LIBCPP_HAS_NO_INT128)
+#endif // !defined(TEST_HAS_NO_INT128)
     }
 
 #if TEST_STD_VER >= 11
@@ -47,9 +47,9 @@ int main(int, char**)
         static_assert(test<int>(), "");
         static_assert(test<long>(), "");
         static_assert(test<size_t>(), "");
-#if !defined(_LIBCPP_HAS_NO_INT128)
+#if !defined(TEST_HAS_NO_INT128)
         static_assert(test<__int128_t>(), "");
-#endif // !defined(_LIBCPP_HAS_NO_INT128)
+#endif // !defined(TEST_HAS_NO_INT128)
     }
 #endif // TEST_STD_VER >= 11
 
diff --git a/libcxx/test/libcxx/numerics/numeric.ops/midpoint.integer.pass.cpp b/libcxx/test/libcxx/numerics/numeric.ops/midpoint.integer.pass.cpp
index 53dda1f007e21..d0495df4b415a 100644
--- a/libcxx/test/libcxx/numerics/numeric.ops/midpoint.integer.pass.cpp
+++ b/libcxx/test/libcxx/numerics/numeric.ops/midpoint.integer.pass.cpp
@@ -53,7 +53,7 @@ int main(int, char**)
     test<uint32_t>();
     test<uint64_t>();
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t>();
     test<__uint128_t>();
 #endif
diff --git a/libcxx/test/libcxx/type_traits/convert_to_integral.pass.cpp b/libcxx/test/libcxx/type_traits/convert_to_integral.pass.cpp
index d8fce1e1414ce..fd7db18f2dcee 100644
--- a/libcxx/test/libcxx/type_traits/convert_to_integral.pass.cpp
+++ b/libcxx/test/libcxx/type_traits/convert_to_integral.pass.cpp
@@ -107,7 +107,7 @@ int main(int, char**)
   check_integral_types<unsigned long, unsigned long>();
   check_integral_types<long long, long long>();
   check_integral_types<unsigned long long, unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
   check_integral_types<__int128_t, __int128_t>();
   check_integral_types<__uint128_t, __uint128_t>();
 #endif
diff --git a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
index 7a534d7282ef7..3f10ab06adfc2 100644
--- a/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
+++ b/libcxx/test/libcxx/utilities/format/format.arguments/format.arg/visit_format_arg.pass.cpp
@@ -181,7 +181,7 @@ void test() {
   test<Context, long long, long long>(std::numeric_limits<long>::max());
   test<Context, long long, long long>(std::numeric_limits<long long>::max());
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
   test<Context, __int128_t, __int128_t>(std::numeric_limits<__int128_t>::min());
   test<Context, __int128_t, __int128_t>(std::numeric_limits<long long>::min());
   test<Context, __int128_t, __int128_t>(std::numeric_limits<long>::min());
@@ -242,7 +242,7 @@ void test() {
   test<Context, unsigned long long, unsigned long long>(
       std::numeric_limits<unsigned long long>::max());
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
   test<Context, __uint128_t, __uint128_t>(0);
   test<Context, __uint128_t, __uint128_t>(
       std::numeric_limits<unsigned char>::max());
diff --git a/libcxx/test/libcxx/utilities/format/format.arguments/format.args/get.pass.cpp b/libcxx/test/libcxx/utilities/format/format.arguments/format.args/get.pass.cpp
index da02e6961012c..924818f94e4b0 100644
--- a/libcxx/test/libcxx/utilities/format/format.arguments/format.args/get.pass.cpp
+++ b/libcxx/test/libcxx/utilities/format/format.arguments/format.args/get.pass.cpp
@@ -172,7 +172,7 @@ void test() {
   test<Context, long long, long long>(std::numeric_limits<long>::max());
   test<Context, long long, long long>(std::numeric_limits<long long>::max());
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
   test<Context, __int128_t, __int128_t>(std::numeric_limits<__int128_t>::min());
   test<Context, __int128_t, __int128_t>(std::numeric_limits<long long>::min());
   test<Context, __int128_t, __int128_t>(std::numeric_limits<long>::min());
@@ -233,7 +233,7 @@ void test() {
   test<Context, unsigned long long, unsigned long long>(
       std::numeric_limits<unsigned long long>::max());
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
   test<Context, __uint128_t, __uint128_t>(0);
   test<Context, __uint128_t, __uint128_t>(
       std::numeric_limits<unsigned char>::max());
diff --git a/libcxx/test/std/concepts/concepts.lang/concept.common/common_with.compile.pass.cpp b/libcxx/test/std/concepts/concepts.lang/concept.common/common_with.compile.pass.cpp
index 7bbf6865bf321..935bf7cc93eeb 100644
--- a/libcxx/test/std/concepts/concepts.lang/concept.common/common_with.compile.pass.cpp
+++ b/libcxx/test/std/concepts/concepts.lang/concept.common/common_with.compile.pass.cpp
@@ -60,7 +60,7 @@ static_assert(std::common_with<void, void>);
 static_assert(CheckCommonWith<int, int>());
 static_assert(CheckCommonWith<int, long>());
 static_assert(CheckCommonWith<int, unsigned char>());
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
 static_assert(CheckCommonWith<int, __int128_t>());
 #endif
 static_assert(CheckCommonWith<int, double>());
diff --git a/libcxx/test/std/concepts/concepts.lang/concept.commonref/common_reference.compile.pass.cpp b/libcxx/test/std/concepts/concepts.lang/concept.commonref/common_reference.compile.pass.cpp
index 70a674ea8524c..139cccd39c3be 100644
--- a/libcxx/test/std/concepts/concepts.lang/concept.commonref/common_reference.compile.pass.cpp
+++ b/libcxx/test/std/concepts/concepts.lang/concept.commonref/common_reference.compile.pass.cpp
@@ -54,7 +54,7 @@ static_assert(std::common_reference_with<void, void>);
 static_assert(CheckCommonReferenceWith<int, int>());
 static_assert(CheckCommonReferenceWith<int, long>());
 static_assert(CheckCommonReferenceWith<int, unsigned char>());
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
 static_assert(CheckCommonReferenceWith<int, __int128_t>());
 #endif
 static_assert(CheckCommonReferenceWith<int, double>());
diff --git a/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/integral.pass.cpp b/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/integral.pass.cpp
index cb9c4353a8c75..796f05a5ef307 100644
--- a/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/integral.pass.cpp
+++ b/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/integral.pass.cpp
@@ -60,7 +60,7 @@ static_assert(CheckIntegralQualifiers<long long>());
 static_assert(CheckIntegralQualifiers<unsigned long long>());
 
 // extended integers
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
 static_assert(CheckIntegralQualifiers<__int128_t>());
 static_assert(CheckIntegralQualifiers<__uint128_t>());
 #endif
diff --git a/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/signed_integral.pass.cpp b/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/signed_integral.pass.cpp
index 4092ab9c2d1a1..ded45feabad80 100644
--- a/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/signed_integral.pass.cpp
+++ b/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/signed_integral.pass.cpp
@@ -74,7 +74,7 @@ static_assert(!CheckSignedIntegralQualifiers<unsigned long>());
 static_assert(!CheckSignedIntegralQualifiers<unsigned long long>());
 
 // extended integers
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
 static_assert(CheckSignedIntegralQualifiers<__int128_t>());
 static_assert(!CheckSignedIntegralQualifiers<__uint128_t>());
 #endif
diff --git a/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/unsigned_integral.pass.cpp b/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/unsigned_integral.pass.cpp
index f390f452bbf2e..2d6ccc7ee93fb 100644
--- a/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/unsigned_integral.pass.cpp
+++ b/libcxx/test/std/concepts/concepts.lang/concepts.arithmetic/unsigned_integral.pass.cpp
@@ -69,7 +69,7 @@ static_assert(CheckUnsignedIntegralQualifiers<char32_t>() ==
               !std::is_signed_v<char32_t>);
 
 // extended integers
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
 static_assert(CheckUnsignedIntegralQualifiers<__uint128_t>());
 static_assert(!CheckUnsignedIntegralQualifiers<__int128_t>());
 #endif
diff --git a/libcxx/test/std/language.support/cmp/cmp.concept/three_way_comparable.compile.pass.cpp b/libcxx/test/std/language.support/cmp/cmp.concept/three_way_comparable.compile.pass.cpp
index 5ff5213d303be..85566b4c59fd9 100644
--- a/libcxx/test/std/language.support/cmp/cmp.concept/three_way_comparable.compile.pass.cpp
+++ b/libcxx/test/std/language.support/cmp/cmp.concept/three_way_comparable.compile.pass.cpp
@@ -34,7 +34,7 @@ static_assert(std::three_way_comparable<char8_t const&>);
 static_assert(std::three_way_comparable<char16_t volatile&>);
 static_assert(std::three_way_comparable<char32_t const volatile&>);
 #endif
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
 static_assert(std::three_way_comparable<__int128_t const&>);
 static_assert(std::three_way_comparable<__uint128_t const&>);
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/is_specialized.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/is_specialized.pass.cpp
index b5acfbe1c71a6..e0145b53bbfcc 100644
--- a/libcxx/test/std/language.support/support.limits/limits/is_specialized.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/is_specialized.pass.cpp
@@ -63,7 +63,7 @@ int main(int, char**)
     test<unsigned long>();
     test<signed long long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t>();
     test<__uint128_t>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/const_data_members.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/const_data_members.pass.cpp
index b5912130a8db5..1d4c2686c7bb1 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/const_data_members.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/const_data_members.pass.cpp
@@ -138,7 +138,7 @@ int main(int, char**)
     TEST_NUMERIC_LIMITS(volatile long)
     TEST_NUMERIC_LIMITS(const volatile long)
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     TEST_NUMERIC_LIMITS(__int128_t)
     TEST_NUMERIC_LIMITS(const __int128_t)
     TEST_NUMERIC_LIMITS(volatile __int128_t)
@@ -175,7 +175,7 @@ int main(int, char**)
     TEST_NUMERIC_LIMITS(volatile unsigned long long)
     TEST_NUMERIC_LIMITS(const volatile unsigned long long)
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     TEST_NUMERIC_LIMITS(__uint128_t)
     TEST_NUMERIC_LIMITS(const __uint128_t)
     TEST_NUMERIC_LIMITS(volatile __uint128_t)
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/denorm_min.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/denorm_min.pass.cpp
index 49d22dc44f7e4..7b0123cac1d03 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/denorm_min.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/denorm_min.pass.cpp
@@ -48,7 +48,7 @@ int main(int, char**)
     test<unsigned long>(0);
     test<long long>(0);
     test<unsigned long long>(0);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t>(0);
     test<__uint128_t>(0);
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/digits.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/digits.pass.cpp
index e0f8c311c53b2..dc2a1e50c27bb 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/digits.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/digits.pass.cpp
@@ -47,7 +47,7 @@ int main(int, char**)
     test<unsigned long, sizeof(long) == 4 ? 32 : 64>();
     test<long long, 63>();
     test<unsigned long long, 64>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, 127>();
     test<__uint128_t, 128>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/digits10.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/digits10.pass.cpp
index bb440fbc91776..12f61cf23e355 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/digits10.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/digits10.pass.cpp
@@ -51,7 +51,7 @@ int main(int, char**)
     test<unsigned long, sizeof(long) == 4 ? 9 : 19>();
     test<long long, 18>();
     test<unsigned long long, 19>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, 38>();
     test<__uint128_t, 38>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/epsilon.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/epsilon.pass.cpp
index cc49c4b2ef102..f47e538d1454f 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/epsilon.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/epsilon.pass.cpp
@@ -48,7 +48,7 @@ int main(int, char**)
     test<unsigned long>(0);
     test<long long>(0);
     test<unsigned long long>(0);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t>(0);
     test<__uint128_t>(0);
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_denorm.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_denorm.pass.cpp
index d72580c981574..6974650a99014 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_denorm.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_denorm.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
     test<unsigned long, std::denorm_absent>();
     test<long long, std::denorm_absent>();
     test<unsigned long long, std::denorm_absent>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, std::denorm_absent>();
     test<__uint128_t, std::denorm_absent>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_denorm_loss.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_denorm_loss.pass.cpp
index ebb684b73c939..43082f396ad2e 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_denorm_loss.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_denorm_loss.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
     test<unsigned long, false>();
     test<long long, false>();
     test<unsigned long long, false>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, false>();
     test<__uint128_t, false>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_infinity.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_infinity.pass.cpp
index 67745b4e7fd57..af5f6cea36af3 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_infinity.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_infinity.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
     test<unsigned long, false>();
     test<long long, false>();
     test<unsigned long long, false>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, false>();
     test<__uint128_t, false>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_quiet_NaN.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_quiet_NaN.pass.cpp
index 730b86967a52a..98d25700a32ee 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_quiet_NaN.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_quiet_NaN.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
     test<unsigned long, false>();
     test<long long, false>();
     test<unsigned long long, false>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, false>();
     test<__uint128_t, false>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_signaling_NaN.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_signaling_NaN.pass.cpp
index 799c50547fda8..cedb9c41eba1d 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_signaling_NaN.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/has_signaling_NaN.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
     test<unsigned long, false>();
     test<long long, false>();
     test<unsigned long long, false>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, false>();
     test<__uint128_t, false>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/infinity.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/infinity.pass.cpp
index 912ae16566a0b..71c323e31ce5c 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/infinity.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/infinity.pass.cpp
@@ -50,7 +50,7 @@ int main(int, char**)
     test<unsigned long>(0);
     test<long long>(0);
     test<unsigned long long>(0);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t>(0);
     test<__uint128_t>(0);
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_bounded.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_bounded.pass.cpp
index 556e402c94ec0..86dbdaee5e40f 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_bounded.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_bounded.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
     test<unsigned long, true>();
     test<long long, true>();
     test<unsigned long long, true>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, true>();
     test<__uint128_t, true>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_exact.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_exact.pass.cpp
index a257d3b4490cf..34a62989170b7 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_exact.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_exact.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
     test<unsigned long, true>();
     test<long long, true>();
     test<unsigned long long, true>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, true>();
     test<__uint128_t, true>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_iec559.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_iec559.pass.cpp
index b527194239376..0ffc60694d638 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_iec559.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_iec559.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
     test<unsigned long, false>();
     test<long long, false>();
     test<unsigned long long, false>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, false>();
     test<__uint128_t, false>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_integer.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_integer.pass.cpp
index d31795254d4f0..e1db2c7063a0d 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_integer.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_integer.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
     test<unsigned long, true>();
     test<long long, true>();
     test<unsigned long long, true>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, true>();
     test<__uint128_t, true>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_modulo.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_modulo.pass.cpp
index 6495bf4933d71..33b3e927402d6 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_modulo.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_modulo.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
     test<unsigned long, true>();
     test<long long, false>();
     test<unsigned long long, true>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, false>();
     test<__uint128_t, true>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_signed.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_signed.pass.cpp
index c2e639afcde14..5946062fe3b9a 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_signed.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/is_signed.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
     test<unsigned long, false>();
     test<long long, true>();
     test<unsigned long long, false>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, true>();
     test<__uint128_t, false>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/lowest.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/lowest.pass.cpp
index 0169f2e27fde7..a59761c15d4d5 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/lowest.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/lowest.pass.cpp
@@ -59,7 +59,7 @@ int main(int, char**)
     test<unsigned long>(0);
     test<long long>(LLONG_MIN);
     test<unsigned long long>(0);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t>(-__int128_t(__uint128_t(-1)/2) - 1);
     test<__uint128_t>(0);
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max.pass.cpp
index dd689963c2ed9..85b949266791e 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max.pass.cpp
@@ -59,7 +59,7 @@ int main(int, char**)
     test<unsigned long>(ULONG_MAX);
     test<long long>(LLONG_MAX);
     test<unsigned long long>(ULLONG_MAX);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t>(__int128_t(__uint128_t(-1)/2));
     test<__uint128_t>(__uint128_t(-1));
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max_digits10.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max_digits10.pass.cpp
index ccf1d2b2ba4b9..cfa67fe830031 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max_digits10.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max_digits10.pass.cpp
@@ -47,7 +47,7 @@ int main(int, char**)
     test<unsigned long, 0>();
     test<long long, 0>();
     test<unsigned long long, 0>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, 0>();
     test<__uint128_t, 0>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max_exponent.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max_exponent.pass.cpp
index 9c6493ba4de51..6c8046b1d0a24 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max_exponent.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max_exponent.pass.cpp
@@ -47,7 +47,7 @@ int main(int, char**)
     test<unsigned long, 0>();
     test<long long, 0>();
     test<unsigned long long, 0>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, 0>();
     test<__uint128_t, 0>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max_exponent10.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max_exponent10.pass.cpp
index 53c8a1347e063..721fd2c294a36 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max_exponent10.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/max_exponent10.pass.cpp
@@ -47,7 +47,7 @@ int main(int, char**)
     test<unsigned long, 0>();
     test<long long, 0>();
     test<unsigned long long, 0>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, 0>();
     test<__uint128_t, 0>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/min.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/min.pass.cpp
index 525d7db4136e6..4e479c31575a6 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/min.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/min.pass.cpp
@@ -59,7 +59,7 @@ int main(int, char**)
     test<unsigned long>(0);
     test<long long>(LLONG_MIN);
     test<unsigned long long>(0);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t>(-__int128_t(__uint128_t(-1)/2) - 1);
     test<__uint128_t>(0);
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/min_exponent.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/min_exponent.pass.cpp
index 0d4d8bf591fd3..d00a569f58173 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/min_exponent.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/min_exponent.pass.cpp
@@ -47,7 +47,7 @@ int main(int, char**)
     test<unsigned long, 0>();
     test<long long, 0>();
     test<unsigned long long, 0>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, 0>();
     test<__uint128_t, 0>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/min_exponent10.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/min_exponent10.pass.cpp
index 7e798e19d4772..8fe442d231040 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/min_exponent10.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/min_exponent10.pass.cpp
@@ -47,7 +47,7 @@ int main(int, char**)
     test<unsigned long, 0>();
     test<long long, 0>();
     test<unsigned long long, 0>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, 0>();
     test<__uint128_t, 0>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/quiet_NaN.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/quiet_NaN.pass.cpp
index 761f662c863ad..81291640985cc 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/quiet_NaN.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/quiet_NaN.pass.cpp
@@ -67,7 +67,7 @@ int main(int, char**)
     test<unsigned long>();
     test<long long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t>();
     test<__uint128_t>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/radix.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/radix.pass.cpp
index 7d15a0f852984..4c6a01c7b27cb 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/radix.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/radix.pass.cpp
@@ -47,7 +47,7 @@ int main(int, char**)
     test<unsigned long, 2>();
     test<long long, 2>();
     test<unsigned long long, 2>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, 2>();
     test<__uint128_t, 2>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/round_error.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/round_error.pass.cpp
index 6a39773308252..5eddfa23e8850 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/round_error.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/round_error.pass.cpp
@@ -48,7 +48,7 @@ int main(int, char**)
     test<unsigned long>(0);
     test<long long>(0);
     test<unsigned long long>(0);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t>(0);
     test<__uint128_t>(0);
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/round_style.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/round_style.pass.cpp
index ad994f37e9dd9..5591dd7108640 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/round_style.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/round_style.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
     test<unsigned long, std::round_toward_zero>();
     test<long long, std::round_toward_zero>();
     test<unsigned long long, std::round_toward_zero>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, std::round_toward_zero>();
     test<__uint128_t, std::round_toward_zero>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/signaling_NaN.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/signaling_NaN.pass.cpp
index d1683614983c5..6f754c6da3657 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/signaling_NaN.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/signaling_NaN.pass.cpp
@@ -67,7 +67,7 @@ int main(int, char**)
     test<unsigned long>();
     test<long long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t>();
     test<__uint128_t>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/tinyness_before.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/tinyness_before.pass.cpp
index dcacd25b67b32..ede8e02bad86c 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/tinyness_before.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/tinyness_before.pass.cpp
@@ -46,7 +46,7 @@ int main(int, char**)
     test<unsigned long, false>();
     test<long long, false>();
     test<unsigned long long, false>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, false>();
     test<__uint128_t, false>();
 #endif
diff --git a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/traps.pass.cpp b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/traps.pass.cpp
index f5000dc34b2d5..014001809c352 100644
--- a/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/traps.pass.cpp
+++ b/libcxx/test/std/language.support/support.limits/limits/numeric.limits.members/traps.pass.cpp
@@ -53,7 +53,7 @@ int main(int, char**)
     test<unsigned long, integral_types_trap>();
     test<long long, integral_types_trap>();
     test<unsigned long long, integral_types_trap>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t, integral_types_trap>();
     test<__uint128_t, integral_types_trap>();
 #endif
diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp
index f7aab2c9901ec..e1dd88e85bf35 100644
--- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_ceil.pass.cpp
@@ -53,7 +53,7 @@ constexpr bool test()
     assert(std::bit_ceil(T(68)) == T(128));
     assert(std::bit_ceil(T(69)) == T(128));
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     if constexpr (std::is_same_v<T, __uint128_t>) {
         T val = 168;
         T ceil = 256;
@@ -80,7 +80,7 @@ int main(int, char**)
     static_assert(!std::is_invocable_v<L, int>);
     static_assert(!std::is_invocable_v<L, long>);
     static_assert(!std::is_invocable_v<L, long long>);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(!std::is_invocable_v<L, __int128_t>);
 #endif
 
@@ -112,7 +112,7 @@ int main(int, char**)
     static_assert(test<unsigned int>());
     static_assert(test<unsigned long>());
     static_assert(test<unsigned long long>());
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(test<__uint128_t>());
 #endif
     static_assert(test<uint8_t>());
@@ -128,7 +128,7 @@ int main(int, char**)
     test<unsigned int>();
     test<unsigned long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__uint128_t>();
 #endif
     test<uint8_t>();
diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp
index 2c6d8faf48d2e..045b1cc9a7ada 100644
--- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_floor.pass.cpp
@@ -49,7 +49,7 @@ constexpr bool test()
     assert(std::bit_floor(T(129)) == T(128));
     assert(std::bit_floor(max) == T(max - (max >> 1)));
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     if constexpr (std::is_same_v<T, __uint128_t>) {
         T val = T(128) << 32;
         assert(std::bit_floor(val-1) == val/2);
@@ -76,7 +76,7 @@ int main(int, char**)
     static_assert(!std::is_invocable_v<L, int>);
     static_assert(!std::is_invocable_v<L, long>);
     static_assert(!std::is_invocable_v<L, long long>);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(!std::is_invocable_v<L, __int128_t>);
 #endif
 
@@ -108,7 +108,7 @@ int main(int, char**)
     static_assert(test<unsigned int>());
     static_assert(test<unsigned long>());
     static_assert(test<unsigned long long>());
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(test<__uint128_t>());
 #endif
 
@@ -125,7 +125,7 @@ int main(int, char**)
     test<unsigned int>();
     test<unsigned long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__uint128_t>();
 #endif
 
diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp
index 17a25a8bf737e..43f47d36bfe90 100644
--- a/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bit.pow.two/bit_width.pass.cpp
@@ -52,7 +52,7 @@ constexpr bool test()
     assert(std::bit_width(T(max - 1)) == T(std::numeric_limits<T>::digits));
     assert(std::bit_width(max) == T(std::numeric_limits<T>::digits));
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     if constexpr (std::is_same_v<T, __uint128_t>) {
         T val = 128;
         val <<= 32;
@@ -81,7 +81,7 @@ int main(int, char**)
     static_assert(!std::is_invocable_v<L, int>);
     static_assert(!std::is_invocable_v<L, long>);
     static_assert(!std::is_invocable_v<L, long long>);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(!std::is_invocable_v<L, __int128_t>);
 #endif
 
@@ -113,7 +113,7 @@ int main(int, char**)
     static_assert(test<unsigned int>());
     static_assert(test<unsigned long>());
     static_assert(test<unsigned long long>());
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(test<__uint128_t>());
 #endif
     static_assert(test<uint8_t>());
@@ -129,7 +129,7 @@ int main(int, char**)
     test<unsigned int>();
     test<unsigned long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__uint128_t>();
 #endif
     test<uint8_t>();
diff --git a/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp b/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp
index f30185f4a1f28..687f9ad45ff1a 100644
--- a/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bit.pow.two/has_single_bit.pass.cpp
@@ -46,7 +46,7 @@ constexpr bool test()
     assert(!std::has_single_bit(T(129)));
     assert(!std::has_single_bit(max));
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     if constexpr (std::is_same_v<T, __uint128_t>) {
         T val = T(1) << 32;
         assert(!std::has_single_bit(val-1));
@@ -79,7 +79,7 @@ int main(int, char**)
     static_assert(!std::is_invocable_v<L, int>);
     static_assert(!std::is_invocable_v<L, long>);
     static_assert(!std::is_invocable_v<L, long long>);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(!std::is_invocable_v<L, __int128_t>);
 #endif
 
@@ -111,7 +111,7 @@ int main(int, char**)
     static_assert(test<unsigned int>());
     static_assert(test<unsigned long>());
     static_assert(test<unsigned long long>());
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(test<__uint128_t>());
 #endif
     static_assert(test<uint8_t>());
@@ -127,7 +127,7 @@ int main(int, char**)
     test<unsigned int>();
     test<unsigned long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__uint128_t>();
 #endif
     test<uint8_t>();
diff --git a/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp
index c1f61c896d985..23196aa6ecf3d 100644
--- a/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bitops.count/countl_one.pass.cpp
@@ -51,7 +51,7 @@ constexpr bool test()
     assert(std::countl_one(T(max - 127)) == std::numeric_limits<T>::digits - 7);
     assert(std::countl_one(T(max - 128)) == std::numeric_limits<T>::digits - 8);
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     if constexpr (std::is_same_v<T, __uint128_t>) {
         T val = 128;
         assert(std::countl_one(~val) == 120);
@@ -76,7 +76,7 @@ int main(int, char**)
     static_assert(!std::is_invocable_v<L, int>);
     static_assert(!std::is_invocable_v<L, long>);
     static_assert(!std::is_invocable_v<L, long long>);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(!std::is_invocable_v<L, __int128_t>);
 #endif
 
@@ -108,7 +108,7 @@ int main(int, char**)
     static_assert(test<unsigned int>());
     static_assert(test<unsigned long>());
     static_assert(test<unsigned long long>());
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(test<__uint128_t>());
 #endif
     static_assert(test<uint8_t>());
@@ -124,7 +124,7 @@ int main(int, char**)
     test<unsigned int>();
     test<unsigned long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__uint128_t>();
 #endif
     test<uint8_t>();
diff --git a/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp
index e30d0b17856dc..51675808f8340 100644
--- a/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bitops.count/countl_zero.pass.cpp
@@ -48,7 +48,7 @@ constexpr bool test()
     assert(std::countl_zero(T(128)) == dig - 8);
     assert(std::countl_zero(max) == 0);
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     if constexpr (std::is_same_v<T, __uint128_t>) {
         T val = T(128) << 32;
         assert(std::countl_zero(val-1) == 89);
@@ -75,7 +75,7 @@ int main(int, char**)
     static_assert(!std::is_invocable_v<L, int>);
     static_assert(!std::is_invocable_v<L, long>);
     static_assert(!std::is_invocable_v<L, long long>);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(!std::is_invocable_v<L, __int128_t>);
 #endif
 
@@ -107,7 +107,7 @@ int main(int, char**)
     static_assert(test<unsigned int>());
     static_assert(test<unsigned long>());
     static_assert(test<unsigned long long>());
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(test<__uint128_t>());
 #endif
     static_assert(test<uint8_t>());
@@ -123,7 +123,7 @@ int main(int, char**)
     test<unsigned int>();
     test<unsigned long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__uint128_t>();
 #endif
     test<uint8_t>();
diff --git a/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp
index ccdf98f9372be..448fdcbf12fe8 100644
--- a/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bitops.count/countr_one.pass.cpp
@@ -49,7 +49,7 @@ constexpr bool test()
     assert(std::countr_one(T(max - 1)) == 0);
     assert(std::countr_one(max) == std::numeric_limits<T>::digits);
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     if constexpr (std::is_same_v<T, __uint128_t>) {
         T val = 128;
         assert(std::countr_one(val-1) ==  7);
@@ -80,7 +80,7 @@ int main(int, char**)
     static_assert(!std::is_invocable_v<L, int>);
     static_assert(!std::is_invocable_v<L, long>);
     static_assert(!std::is_invocable_v<L, long long>);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(!std::is_invocable_v<L, __int128_t>);
 #endif
 
@@ -112,7 +112,7 @@ int main(int, char**)
     static_assert(test<unsigned int>());
     static_assert(test<unsigned long>());
     static_assert(test<unsigned long long>());
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(test<__uint128_t>());
 #endif
     static_assert(test<uint8_t>());
@@ -128,7 +128,7 @@ int main(int, char**)
     test<unsigned int>();
     test<unsigned long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__uint128_t>();
 #endif
     test<uint8_t>();
diff --git a/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp
index 49f708ffcf9b7..ffe5031d7e4d1 100644
--- a/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bitops.count/countr_zero.pass.cpp
@@ -50,7 +50,7 @@ constexpr bool test()
     assert(std::countr_zero(T(130)) == 1);
     assert(std::countr_zero(max) == 0);
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     if constexpr (std::is_same_v<T, __uint128_t>) {
         T val = T(128) << 32;
         assert(std::countr_zero(val-1) ==  0);
@@ -77,7 +77,7 @@ int main(int, char**)
     static_assert(!std::is_invocable_v<L, int>);
     static_assert(!std::is_invocable_v<L, long>);
     static_assert(!std::is_invocable_v<L, long long>);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(!std::is_invocable_v<L, __int128_t>);
 #endif
 
@@ -109,7 +109,7 @@ int main(int, char**)
     static_assert(test<unsigned int>());
     static_assert(test<unsigned long>());
     static_assert(test<unsigned long long>());
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(test<__uint128_t>());
 #endif
     static_assert(test<uint8_t>());
@@ -125,7 +125,7 @@ int main(int, char**)
     test<unsigned int>();
     test<unsigned long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__uint128_t>();
 #endif
     test<uint8_t>();
diff --git a/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp b/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp
index f5792c16ffc6b..3dd994b0868ad 100644
--- a/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bitops.count/popcount.pass.cpp
@@ -50,7 +50,7 @@ constexpr bool test()
     assert(std::popcount(T(max - 1)) == std::numeric_limits<T>::digits - 1);
     assert(std::popcount(max) == std::numeric_limits<T>::digits);
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     if constexpr (std::is_same_v<T, __uint128_t>) {
         T val = 128;
         assert(std::popcount(val-1) ==  7);
@@ -87,7 +87,7 @@ int main(int, char**)
     static_assert(!std::is_invocable_v<L, int>);
     static_assert(!std::is_invocable_v<L, long>);
     static_assert(!std::is_invocable_v<L, long long>);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(!std::is_invocable_v<L, __int128_t>);
 #endif
 
@@ -119,7 +119,7 @@ int main(int, char**)
     static_assert(test<unsigned int>());
     static_assert(test<unsigned long>());
     static_assert(test<unsigned long long>());
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(test<__uint128_t>());
 #endif
     static_assert(test<uint8_t>());
@@ -135,7 +135,7 @@ int main(int, char**)
     test<unsigned int>();
     test<unsigned long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__uint128_t>();
 #endif
     test<uint8_t>();
diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp
index d9dc05faca146..1432aee7fe732 100644
--- a/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bitops.rot/rotl.pass.cpp
@@ -49,7 +49,7 @@ constexpr bool test()
     assert(std::rotl(T(1), 6) == T(64));
     assert(std::rotl(T(1), 7) == T(128));
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     if constexpr (std::is_same_v<T, __uint128_t>) {
         T val = (T(1) << 63) | (T(1) << 64);
         assert(std::rotl(val, 0) == val);
@@ -75,7 +75,7 @@ int main(int, char**)
     static_assert(!std::is_invocable_v<L, int>);
     static_assert(!std::is_invocable_v<L, long>);
     static_assert(!std::is_invocable_v<L, long long>);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(!std::is_invocable_v<L, __int128_t>);
 #endif
 
@@ -107,7 +107,7 @@ int main(int, char**)
     static_assert(test<unsigned int>());
     static_assert(test<unsigned long>());
     static_assert(test<unsigned long long>());
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(test<__uint128_t>());
 #endif
 
@@ -124,7 +124,7 @@ int main(int, char**)
     test<unsigned int>();
     test<unsigned long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__uint128_t>();
 #endif
 
diff --git a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp
index 035b13d25f4f5..8c8505a2d5882 100644
--- a/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp
+++ b/libcxx/test/std/numerics/bit/bitops.rot/rotr.pass.cpp
@@ -50,7 +50,7 @@ constexpr bool test()
     assert(std::rotr(T(128), 6) == T(2));
     assert(std::rotr(T(128), 7) == T(1));
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     if constexpr (std::is_same_v<T, __uint128_t>) {
         T val = (T(1) << 63) | (T(1) << 64);
         assert(std::rotr(val, 0) == val);
@@ -76,7 +76,7 @@ int main(int, char**)
     static_assert(!std::is_invocable_v<L, int>);
     static_assert(!std::is_invocable_v<L, long>);
     static_assert(!std::is_invocable_v<L, long long>);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(!std::is_invocable_v<L, __int128_t>);
 #endif
 
@@ -108,7 +108,7 @@ int main(int, char**)
     static_assert(test<unsigned int>());
     static_assert(test<unsigned long>());
     static_assert(test<unsigned long long>());
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     static_assert(test<__uint128_t>());
 #endif
 
@@ -125,7 +125,7 @@ int main(int, char**)
     test<unsigned int>();
     test<unsigned long>();
     test<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__uint128_t>();
 #endif
 
diff --git a/libcxx/test/std/numerics/bit/byteswap.pass.cpp b/libcxx/test/std/numerics/bit/byteswap.pass.cpp
index ea72fbb631802..bd73515a918a2 100644
--- a/libcxx/test/std/numerics/bit/byteswap.pass.cpp
+++ b/libcxx/test/std/numerics/bit/byteswap.pass.cpp
@@ -64,7 +64,7 @@ constexpr bool test() {
   test_num<int32_t>(0x01234567, 0x67452301);
   test_num<int64_t>(0x0123456789ABCDEF, 0xEFCDAB8967452301);
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
   const auto in = static_cast<__uint128_t>(0x0123456789ABCDEF) << 64 | 0x13579BDF02468ACE;
   const auto expected = static_cast<__uint128_t>(0xCE8A4602DF9B5713) << 64 | 0xEFCDAB8967452301;
   test_num<__uint128_t>(in, expected);
diff --git a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp
index d6ff4bed8a7af..5e0b48be13e4b 100644
--- a/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp
+++ b/libcxx/test/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.integer.pass.cpp
@@ -129,7 +129,7 @@ int main(int, char**)
     unsigned_test<uint32_t>();
     unsigned_test<uint64_t>();
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     unsigned_test<__uint128_t>();
     signed_test<__int128_t>();
 #endif
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp
index e36625af9ebe0..823cd1665302a 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/eval.pass.cpp
@@ -133,7 +133,7 @@ int main(int, char**)
     // or unsigned long long.
     // (We support __int128 as an extension.)
 
-#if !defined(_LIBCPP_HAS_NO_INT128) && !defined(TEST_BUGGY_I128_FP)
+#if !defined(TEST_HAS_NO_INT128) && !defined(TEST_BUGGY_I128_FP)
     test_statistics<__int128_t, std::minstd_rand0>();
     test_statistics<__uint128_t, std::minstd_rand0>();
 
diff --git a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/int128.pass.cpp b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/int128.pass.cpp
index ab8bda7058c3c..68e7c1e18a3fe 100644
--- a/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/int128.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.dis/rand.dist.uni/rand.dist.uni.int/int128.pass.cpp
@@ -22,7 +22,7 @@
 
 int main(int, char**) {
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
 
   // Test that values outside of the 64-bit range can be produced.
   {
@@ -80,7 +80,7 @@ int main(int, char**) {
     }
   }
 
-#endif // _LIBCPP_HAS_NO_INT128
+#endif // TEST_HAS_NO_INT128
 
   return 0;
 }
diff --git a/libcxx/test/std/numerics/rand/rand.req/rand.req.urng/uniform_random_bit_generator.compile.pass.cpp b/libcxx/test/std/numerics/rand/rand.req/rand.req.urng/uniform_random_bit_generator.compile.pass.cpp
index 77d2443aa715c..8ace1134ac61b 100644
--- a/libcxx/test/std/numerics/rand/rand.req/rand.req.urng/uniform_random_bit_generator.compile.pass.cpp
+++ b/libcxx/test/std/numerics/rand/rand.req/rand.req.urng/uniform_random_bit_generator.compile.pass.cpp
@@ -19,7 +19,7 @@
 static_assert(std::uniform_random_bit_generator<
               std::linear_congruential_engine<unsigned int, 0U, 1U, 2U> >);
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
 static_assert(std::uniform_random_bit_generator<
               std::subtract_with_carry_engine<__uint128_t, 1U, 2U, 3U> >);
 #endif
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.signed_integral.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.signed_integral.pass.cpp
index d25441799483e..a63d7b4e4b116 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.signed_integral.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.signed_integral.pass.cpp
@@ -110,7 +110,7 @@ void test_all_signed_integral_types() {
   test_signed_integral_type<int, CharT>();
   test_signed_integral_type<long, CharT>();
   test_signed_integral_type<long long, CharT>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
   test_signed_integral_type<__int128_t, CharT>();
 #endif
 }
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.unsigned_integral.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.unsigned_integral.pass.cpp
index 4ee1cfc9dc808..7a4fcbc1d49d6 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.unsigned_integral.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/format.formatter.spec/formatter.unsigned_integral.pass.cpp
@@ -98,7 +98,7 @@ void test_all_unsigned_integral_types() {
   test_unsigned_integral_type<unsigned, CharT>();
   test_unsigned_integral_type<unsigned long, CharT>();
   test_unsigned_integral_type<unsigned long long, CharT>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
   test_unsigned_integral_type<__uint128_t, CharT>();
 #endif
 }
diff --git a/libcxx/test/std/utilities/format/format.formatter/format.context/types.compile.pass.cpp b/libcxx/test/std/utilities/format/format.formatter/format.context/types.compile.pass.cpp
index 457aff231baec..9fb127d2fc4c0 100644
--- a/libcxx/test/std/utilities/format/format.formatter/format.context/types.compile.pass.cpp
+++ b/libcxx/test/std/utilities/format/format.formatter/format.context/types.compile.pass.cpp
@@ -61,7 +61,7 @@ constexpr void test() {
       std::is_same_v<typename std::basic_format_context<OutIt, CharT>::
                          template formatter_type<unsigned long long>,
                      std::formatter<unsigned long long, CharT>>);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
   static_assert(
       std::is_same_v<typename std::basic_format_context<
                          OutIt, CharT>::template formatter_type<__int128_t>,
diff --git a/libcxx/test/std/utilities/format/format.functions/format_tests.h b/libcxx/test/std/utilities/format/format.functions/format_tests.h
index 3b2d0c71f11cb..f5e84d14a192b 100644
--- a/libcxx/test/std/utilities/format/format.functions/format_tests.h
+++ b/libcxx/test/std/utilities/format/format.functions/format_tests.h
@@ -733,7 +733,7 @@ void format_test_signed_integer(TestFunction check, ExceptionTest check_exceptio
   format_test_integer<int, CharT>(check, check_exception);
   format_test_integer<long, CharT>(check, check_exception);
   format_test_integer<long long, CharT>(check, check_exception);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
   format_test_integer<__int128_t, CharT>(check, check_exception);
 #endif
   // *** check the minma and maxima ***
@@ -790,7 +790,7 @@ void format_test_unsigned_integer(TestFunction check, ExceptionTest check_except
   format_test_integer<unsigned, CharT>(check, check_exception);
   format_test_integer<unsigned long, CharT>(check, check_exception);
   format_test_integer<unsigned long long, CharT>(check, check_exception);
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
   format_test_integer<__uint128_t, CharT>(check, check_exception);
 #endif
   // *** test the maxima ***
@@ -2564,7 +2564,7 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
   check(STR("hello 42"), STR("hello {}"), static_cast<int>(42));
   check(STR("hello 42"), STR("hello {}"), static_cast<long>(42));
   check(STR("hello 42"), STR("hello {}"), static_cast<long long>(42));
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
   check(STR("hello 42"), STR("hello {}"), static_cast<__int128_t>(42));
   {
     // Note 128-bit support is only partly implemented test the range
@@ -2587,7 +2587,7 @@ void format_tests(TestFunction check, ExceptionTest check_exception) {
   check(STR("hello 42"), STR("hello {}"), static_cast<unsigned>(42));
   check(STR("hello 42"), STR("hello {}"), static_cast<unsigned long>(42));
   check(STR("hello 42"), STR("hello {}"), static_cast<unsigned long long>(42));
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
   check(STR("hello 42"), STR("hello {}"), static_cast<__uint128_t>(42));
   {
     // Note 128-bit support is only partly implemented test the range
diff --git a/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp b/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp
index 4168b169935af..2cfed3e2ba22d 100644
--- a/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp
+++ b/libcxx/test/std/utilities/function.objects/unord.hash/integral.pass.cpp
@@ -110,7 +110,7 @@ int main(int, char**)
     test<uintmax_t>();
     test<uintptr_t>();
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test<__int128_t>();
     test<__uint128_t>();
 #endif
diff --git a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.sign/make_signed.pass.cpp b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.sign/make_signed.pass.cpp
index 9c06c4192ec0f..bf43d6d690c36 100644
--- a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.sign/make_signed.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.sign/make_signed.pass.cpp
@@ -26,7 +26,7 @@ enum BigEnum
     big = 0xFFFFFFFFFFFFFFFFULL
 };
 
-#if !defined(_LIBCPP_HAS_NO_INT128) && !defined(_LIBCPP_HAS_NO_STRONG_ENUMS)
+#if !defined(TEST_HAS_NO_INT128) && !defined(_LIBCPP_HAS_NO_STRONG_ENUMS)
 enum HugeEnum : __uint128_t
 {
     hugezero
@@ -59,7 +59,7 @@ int main(int, char**)
     test_make_signed< const wchar_t, std::conditional<sizeof(wchar_t) == 4, const int, const short>::type >();
     test_make_signed< const Enum, std::conditional<sizeof(Enum) == sizeof(int), const int, const signed char>::type >();
     test_make_signed< BigEnum, std::conditional<sizeof(long) == 4, long long, long>::type >();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test_make_signed< __int128_t, __int128_t >();
     test_make_signed< __uint128_t, __int128_t >();
 # ifndef _LIBCPP_HAS_NO_STRONG_ENUMS
diff --git a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.sign/make_unsigned.pass.cpp b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.sign/make_unsigned.pass.cpp
index b23ef4b76f704..e513997158211 100644
--- a/libcxx/test/std/utilities/meta/meta.trans/meta.trans.sign/make_unsigned.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.trans/meta.trans.sign/make_unsigned.pass.cpp
@@ -26,7 +26,7 @@ enum BigEnum
     big = 0xFFFFFFFFFFFFFFFFULL
 };
 
-#if !defined(_LIBCPP_HAS_NO_INT128) && !defined(_LIBCPP_HAS_NO_STRONG_ENUMS)
+#if !defined(TEST_HAS_NO_INT128) && !defined(_LIBCPP_HAS_NO_STRONG_ENUMS)
 enum HugeEnum : __int128_t
 {
     hugezero
@@ -60,7 +60,7 @@ int main(int, char**)
     test_make_unsigned<const Enum, std::conditional<sizeof(Enum) == sizeof(int), const unsigned int, const unsigned char>::type >();
     test_make_unsigned<BigEnum,
                    std::conditional<sizeof(long) == 4, unsigned long long, unsigned long>::type> ();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test_make_unsigned<__int128_t, __uint128_t>();
     test_make_unsigned<__uint128_t, __uint128_t>();
 # ifndef _LIBCPP_HAS_NO_STRONG_ENUMS
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.cat/integral.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.cat/integral.pass.cpp
index df66316b9c8ea..161d55cad5868 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.cat/integral.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.cat/integral.pass.cpp
@@ -60,7 +60,7 @@ int main(int, char**)
     test_integral<unsigned long>();
     test_integral<long long>();
     test_integral<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test_integral<__int128_t>();
     test_integral<__uint128_t>();
 #endif
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/integral.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/integral.pass.cpp
index 0cbeb4a320532..a378ad8d49583 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/integral.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.comp/integral.pass.cpp
@@ -50,7 +50,7 @@ int main(int, char**)
     test_integral<unsigned long>();
     test_integral<long long>();
     test_integral<unsigned long long>();
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test_integral<__int128_t>();
     test_integral<__uint128_t>();
 #endif
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_signed.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_signed.pass.cpp
index 29a259fb2588e..6d11561af7547 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_signed.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_signed.pass.cpp
@@ -119,7 +119,7 @@ int main(int, char**)
   test_is_not_signed<bool>();
   test_is_not_signed<unsigned>();
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test_is_signed<__int128_t>();
     test_is_not_signed<__uint128_t>();
 #endif
diff --git a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_unsigned.pass.cpp b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_unsigned.pass.cpp
index 3c200b8f39054..abf21e60b5927 100644
--- a/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_unsigned.pass.cpp
+++ b/libcxx/test/std/utilities/meta/meta.unary/meta.unary.prop/is_unsigned.pass.cpp
@@ -119,7 +119,7 @@ int main(int, char**)
   test_is_unsigned<bool>();
   test_is_unsigned<unsigned>();
 
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
     test_is_unsigned<__uint128_t>();
     test_is_not_unsigned<__int128_t>();
 #endif
diff --git a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_equal/cmp_equal.pass.cpp b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_equal/cmp_equal.pass.cpp
index b57ea44863387..f17aa9cf8f250 100644
--- a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_equal/cmp_equal.pass.cpp
+++ b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_equal/cmp_equal.pass.cpp
@@ -89,7 +89,7 @@ constexpr void test2(const std::tuple<Ts...>&, const UTuple& utuple) {
 
 constexpr bool test() {
   std::tuple<
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
       __int128_t, __uint128_t,
 #endif
       unsigned long long, long long, unsigned long, long, unsigned int, int,
diff --git a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_greater/cmp_greater.pass.cpp b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_greater/cmp_greater.pass.cpp
index b8afc7d6d2aee..a2a11152a7c6e 100644
--- a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_greater/cmp_greater.pass.cpp
+++ b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_greater/cmp_greater.pass.cpp
@@ -80,7 +80,7 @@ constexpr void test2(const std::tuple<Ts...>&, const UTuple& utuple) {
 
 constexpr bool test() {
   std::tuple<
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
       __int128_t, __uint128_t,
 #endif
       unsigned long long, long long, unsigned long, long, unsigned int, int,
diff --git a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_greater_equal/cmp_greater_equal.pass.cpp b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_greater_equal/cmp_greater_equal.pass.cpp
index d166633de0924..b45b74609e2b8 100644
--- a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_greater_equal/cmp_greater_equal.pass.cpp
+++ b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_greater_equal/cmp_greater_equal.pass.cpp
@@ -82,7 +82,7 @@ constexpr void test2(const std::tuple<Ts...>&, const UTuple& utuple) {
 
 constexpr bool test() {
   std::tuple<
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
       __int128_t, __uint128_t,
 #endif
       unsigned long long, long long, unsigned long, long, unsigned int, int,
diff --git a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_less/cmp_less.pass.cpp b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_less/cmp_less.pass.cpp
index bc39fa6102276..5c6a9acb34c75 100644
--- a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_less/cmp_less.pass.cpp
+++ b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_less/cmp_less.pass.cpp
@@ -81,7 +81,7 @@ constexpr void test2(const std::tuple<Ts...>&, const UTuple& utuple) {
 
 constexpr bool test() {
   std::tuple<
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
       __int128_t, __uint128_t,
 #endif
       unsigned long long, long long, unsigned long, long, unsigned int, int,
diff --git a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_less_equal/cmp_less_equal.pass.cpp b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_less_equal/cmp_less_equal.pass.cpp
index 2dc6d1be00729..3cf6044e0b524 100644
--- a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_less_equal/cmp_less_equal.pass.cpp
+++ b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_less_equal/cmp_less_equal.pass.cpp
@@ -81,7 +81,7 @@ constexpr void test2(const std::tuple<Ts...>&, const UTuple& utuple) {
 
 constexpr bool test() {
   std::tuple<
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
       __int128_t, __uint128_t,
 #endif
       unsigned long long, long long, unsigned long, long, unsigned int, int,
diff --git a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_not_equal/cmp_not_equal.pass.cpp b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_not_equal/cmp_not_equal.pass.cpp
index 97ab5426f6d18..8ce69469a2bc7 100644
--- a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_not_equal/cmp_not_equal.pass.cpp
+++ b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.cmp_not_equal/cmp_not_equal.pass.cpp
@@ -88,7 +88,7 @@ constexpr void test2(const std::tuple<Ts...>&, const UTuple& utuple) {
 
 constexpr bool test() {
   std::tuple<
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
       __int128_t, __uint128_t,
 #endif
       unsigned long long, long long, unsigned long, long, unsigned int, int,
diff --git a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.in_range/in_range.pass.cpp b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.in_range/in_range.pass.cpp
index 31f77c4418721..eff207a4a0684 100644
--- a/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.in_range/in_range.pass.cpp
+++ b/libcxx/test/std/utilities/utility/utility.intcmp/intcmp.in_range/in_range.pass.cpp
@@ -64,7 +64,7 @@ constexpr void test1(const std::tuple<Ts...>&) {
 
 constexpr bool test() {
   std::tuple<
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
       __int128_t, __uint128_t,
 #endif
       unsigned long long, long long, unsigned long, long, unsigned int, int,
diff --git a/libcxx/test/support/msvc_stdlib_force_include.h b/libcxx/test/support/msvc_stdlib_force_include.h
index bd83bee22ce73..dedd5d3ef8921 100644
--- a/libcxx/test/support/msvc_stdlib_force_include.h
+++ b/libcxx/test/support/msvc_stdlib_force_include.h
@@ -62,9 +62,6 @@ const AssertionDialogAvoider assertion_dialog_avoider{};
     #pragma warning(disable: 28251) // Inconsistent annotation for 'new': this instance has no annotations.
 #endif // !defined(__clang__)
 
-// MSVC doesn't have __int128_t.
-#define _LIBCPP_HAS_NO_INT128
-
 #ifndef _LIBCXX_IN_DEVCRT
     // atomic_is_lock_free.pass.cpp needs this VS 2015 Update 2 fix.
     #define _ENABLE_ATOMIC_ALIGNMENT_FIX
diff --git a/libcxx/test/support/poisoned_hash_helper.h b/libcxx/test/support/poisoned_hash_helper.h
index bb4b70ba86e5e..a9314b8874354 100644
--- a/libcxx/test/support/poisoned_hash_helper.h
+++ b/libcxx/test/support/poisoned_hash_helper.h
@@ -72,7 +72,7 @@ using LibraryHashTypes = TypeList<
       unsigned long,
       long long,
       unsigned long long,
-#ifndef _LIBCPP_HAS_NO_INT128
+#ifndef TEST_HAS_NO_INT128
       __int128_t,
       __uint128_t,
 #endif
diff --git a/libcxx/test/support/test_macros.h b/libcxx/test/support/test_macros.h
index 5333d344588ea..bf48fb5c2f22a 100644
--- a/libcxx/test/support/test_macros.h
+++ b/libcxx/test/support/test_macros.h
@@ -368,6 +368,10 @@ inline void DoNotOptimize(Tp const& value) {
 #   define TEST_HAS_NO_UNICODE
 #endif
 
+#if defined(_LIBCPP_HAS_NO_INT128) || defined(TEST_COMPILER_MSVC)
+#   define TEST_HAS_NO_INT128
+#endif
+
 #if defined(__GNUC__)
 #pragma GCC diagnostic pop
 #endif

From ccce1a03c9ce9c3917b310097c89e39bb68527e2 Mon Sep 17 00:00:00 2001
From: MuAlphaOmegaEpsilon <tommasobonvicini@gmail.com>
Date: Thu, 27 Jan 2022 11:39:05 -0500
Subject: [PATCH 930/946] Don't trigger unused-parameter warnings on naked
 functions

This commit checks if a function is marked with the naked attribute
and, if it is, will silence the emission of any unused-parameter
warning.

Inside a naked function only the usage of basic ASM instructions is
expected. In this context the parameters can actually be used by
fetching them according to the underlying ABI. Since parameters might
be used through ASM instructions, the linter and the compiler will have
a hard time understanding if one of those is unused or not, therefore
no unused-parameter warning should ever be triggered whenever a
function is marked naked.
---
 .../clang-tidy/misc/UnusedParametersCheck.cpp            | 9 +++++----
 .../checkers/misc-unused-parameters-strict.cpp           | 4 ++++
 .../test/clang-tidy/checkers/misc-unused-parameters.c    | 2 ++
 .../test/clang-tidy/checkers/misc-unused-parameters.cpp  | 4 ++++
 clang/lib/Sema/SemaDecl.cpp                              | 6 ++++--
 clang/test/Sema/warn-unused-parameters.c                 | 7 +++++--
 clang/test/SemaCXX/warn-unused-parameters.cpp            | 4 ++++
 7 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp b/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp
index ee2ca87ab011b..149d63ff1e62f 100644
--- a/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp
+++ b/clang-tools-extra/clang-tidy/misc/UnusedParametersCheck.cpp
@@ -31,10 +31,11 @@ bool isOverrideMethod(const FunctionDecl *Function) {
 } // namespace
 
 void UnusedParametersCheck::registerMatchers(MatchFinder *Finder) {
-  Finder->addMatcher(
-      functionDecl(isDefinition(), hasBody(stmt()), hasAnyParameter(decl()))
-          .bind("function"),
-      this);
+  Finder->addMatcher(functionDecl(isDefinition(), hasBody(stmt()),
+                                  hasAnyParameter(decl()),
+                                  unless(hasAttr(attr::Kind::Naked)))
+                         .bind("function"),
+                     this);
 }
 
 template <typename T>
diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters-strict.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters-strict.cpp
index f45f9defa7ece..0b4fc8f446954 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters-strict.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters-strict.cpp
@@ -22,4 +22,8 @@ class F {
 // CHECK-MESSAGES: :[[@LINE-1]]:9: warning: parameter 'j' is unused
 // CHECK-FIXES: {{^}}  F(int  /*j*/) : i() {}{{$}}
 };
+
+// Do not warn on naked functions.
+[[gnu::naked]] int nakedFunction(int a, float b, const char *c) { ; }
+__attribute__((naked)) void nakedFunction(int a, int b) { ; }
 }
diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters.c b/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters.c
index 1f4527a2ec8f1..67e9a3be9692c 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters.c
+++ b/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters.c
@@ -15,3 +15,5 @@ static void b(int i) {;}
 // ===============
 void h(i, c, d) int i; char *c, *d; {} // Don't mess with K&R style
 
+// Do not warn on naked functions.
+__attribute__((naked)) void nakedFunction(int a, int b) { ; }
diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters.cpp
index 52f9675bffa10..a3fcf30f273ef 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/misc-unused-parameters.cpp
@@ -286,3 +286,7 @@ void test() {
   f([](int I) { return; });
 }
 } // namespace lambda
+
+// Do not warn on naked functions.
+[[gnu::naked]] int nakedFunction(int a, float b, const char *c) { ; }
+__attribute__((naked)) void nakedFunction(int a, int b) { ; }
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 7c5f6b318e973..3252671991b70 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -14694,8 +14694,10 @@ Decl *Sema::ActOnFinishFunctionBody(Decl *dcl, Stmt *Body,
         Diag(FD->getLocation(), diag::ext_pure_function_definition);
 
       if (!FD->isInvalidDecl()) {
-        // Don't diagnose unused parameters of defaulted or deleted functions.
-        if (!FD->isDeleted() && !FD->isDefaulted() && !FD->hasSkippedBody())
+        // Don't diagnose unused parameters of defaulted, deleted or naked
+        // functions.
+        if (!FD->isDeleted() && !FD->isDefaulted() && !FD->hasSkippedBody() &&
+            !FD->hasAttr<NakedAttr>())
           DiagnoseUnusedParameters(FD->parameters());
         DiagnoseSizeOfParametersAndReturnValue(FD->parameters(),
                                                FD->getReturnType(), FD);
diff --git a/clang/test/Sema/warn-unused-parameters.c b/clang/test/Sema/warn-unused-parameters.c
index d325f887f885a..9167ae0235929 100644
--- a/clang/test/Sema/warn-unused-parameters.c
+++ b/clang/test/Sema/warn-unused-parameters.c
@@ -17,14 +17,17 @@ void f1(void) {
 // warnings for the above cases.
 static void achor() {};
 
+// Do not warn on naked functions.
+__attribute__((naked)) static void nakedFunction(int a, int b) { }
+
 // CHECK: 5:12: warning: unused parameter 'y'
 // CHECK: 12:15: warning: unused parameter 'y'
-// CHECK-unused: 1 warning generated
+// CHECK-unused: 2 warnings generated
 
 // RUN: %clang_cc1 -fblocks -fsyntax-only -Weverything %s 2>&1 | FileCheck -check-prefix=CHECK-everything %s
 // RUN: not %clang_cc1 -fblocks -fsyntax-only -Weverything -Werror %s 2>&1 | FileCheck -check-prefix=CHECK-everything-error %s
 // RUN: %clang_cc1 -fblocks -fsyntax-only -Weverything -Wno-unused %s 2>&1 | FileCheck -check-prefix=CHECK-everything-no-unused %s
-// CHECK-everything: 6 warnings generated
+// CHECK-everything: 7 warnings generated
 // CHECK-everything-error: 5 errors generated
 // CHECK-everything-no-unused: 5 warnings generated
 
diff --git a/clang/test/SemaCXX/warn-unused-parameters.cpp b/clang/test/SemaCXX/warn-unused-parameters.cpp
index 00ce1a98c6c8a..ac4e21490a624 100644
--- a/clang/test/SemaCXX/warn-unused-parameters.cpp
+++ b/clang/test/SemaCXX/warn-unused-parameters.cpp
@@ -32,3 +32,7 @@ static int test_pack(T... t, T... s)
   auto l = [&t...]() { return sizeof...(s); };
   return l();
 }
+
+// Do not warn on naked functions.
+[[gnu::naked]] int nakedFunction(int a, float b, const char* c) { ; }
+__attribute__((naked)) void nakedFunction(int a, int b) { ; }

From 075e3fdda104a460b1f792be30b9c18ab5466f92 Mon Sep 17 00:00:00 2001
From: Matthias Springer <springerm@google.com>
Date: Fri, 28 Jan 2022 01:11:22 +0900
Subject: [PATCH 931/946] [mlir][bufferize] Move arith BufferizableOpInterface
 impl to arith dialect

Also switch the implementation of `-arith-bufferize` to BufferizableOpInterface.

Differential Revision: https://reviews.llvm.org/D118325
---
 .../Transforms/BufferizableOpInterfaceImpl.h  | 21 ++++++++
 .../Dialect/Arithmetic/Transforms/Passes.h    |  9 ----
 .../Dialect/Arithmetic/Transforms/Passes.td   |  2 -
 .../ArithInterfaceImpl.h                      | 27 ----------
 .../BufferizableOpInterfaceImpl.cpp}          | 25 ++++------
 .../Arithmetic/Transforms/Bufferize.cpp       | 50 +++++--------------
 .../Arithmetic/Transforms/CMakeLists.txt      |  2 +
 .../ComprehensiveBufferize/CMakeLists.txt     | 12 -----
 .../Dialect/Linalg/Transforms/CMakeLists.txt  |  2 +-
 .../Transforms/ComprehensiveBufferizePass.cpp |  4 +-
 .../comprehensive-function-bufferize.mlir     | 16 ------
 mlir/test/lib/Dialect/Linalg/CMakeLists.txt   |  2 +-
 .../Linalg/TestComprehensiveBufferize.cpp     |  4 +-
 .../llvm-project-overlay/mlir/BUILD.bazel     | 31 +++---------
 .../mlir/test/BUILD.bazel                     |  2 +-
 15 files changed, 62 insertions(+), 147 deletions(-)
 create mode 100644 mlir/include/mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h
 delete mode 100644 mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h
 rename mlir/lib/Dialect/{Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp => Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp} (85%)

diff --git a/mlir/include/mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h
new file mode 100644
index 0000000000000..20fe749ae6f83
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h
@@ -0,0 +1,21 @@
+//===- BufferizableOpInterfaceImpl.h - Impl. of BufferizableOpInterface ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_DIALECT_ARITHMETIC_BUFFERIZABLEOPINTERFACEIMPL_H
+#define MLIR_DIALECT_ARITHMETIC_BUFFERIZABLEOPINTERFACEIMPL_H
+
+namespace mlir {
+
+class DialectRegistry;
+
+namespace arith {
+void registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry);
+} // namespace arith
+} // namespace mlir
+
+#endif // MLIR_DIALECT_ARITHMETIC_BUFFERIZABLEOPINTERFACEIMPL_H
diff --git a/mlir/include/mlir/Dialect/Arithmetic/Transforms/Passes.h b/mlir/include/mlir/Dialect/Arithmetic/Transforms/Passes.h
index 09b6942ecd28e..5569a8347dad2 100644
--- a/mlir/include/mlir/Dialect/Arithmetic/Transforms/Passes.h
+++ b/mlir/include/mlir/Dialect/Arithmetic/Transforms/Passes.h
@@ -12,17 +12,8 @@
 #include "mlir/Pass/Pass.h"
 
 namespace mlir {
-namespace bufferization {
-class BufferizeTypeConverter;
-} // namespace bufferization
-
 namespace arith {
 
-/// Add patterns to bufferize Arithmetic ops.
-void populateArithmeticBufferizePatterns(
-    bufferization::BufferizeTypeConverter &typeConverter,
-    RewritePatternSet &patterns);
-
 /// Create a pass to bufferize Arithmetic ops.
 std::unique_ptr<Pass> createArithmeticBufferizePass();
 
diff --git a/mlir/include/mlir/Dialect/Arithmetic/Transforms/Passes.td b/mlir/include/mlir/Dialect/Arithmetic/Transforms/Passes.td
index d4fe952b7f12e..bb30949208ab1 100644
--- a/mlir/include/mlir/Dialect/Arithmetic/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/Arithmetic/Transforms/Passes.td
@@ -14,8 +14,6 @@ include "mlir/Pass/PassBase.td"
 def ArithmeticBufferize : Pass<"arith-bufferize", "FuncOp"> {
   let summary = "Bufferize Arithmetic dialect ops.";
   let constructor = "mlir::arith::createArithmeticBufferizePass()";
-  let dependentDialects = ["bufferization::BufferizationDialect",
-                           "memref::MemRefDialect"];
 }
 
 def ArithmeticExpandOps : Pass<"arith-expand", "FuncOp"> {
diff --git a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h b/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h
deleted file mode 100644
index f4454a3ceb492..0000000000000
--- a/mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h
+++ /dev/null
@@ -1,27 +0,0 @@
-//===- ArithInterfaceImpl.h - Arith Impl. of BufferizableOpInterface ------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_ARITHINTERFACEIMPL_H
-#define MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_ARITHINTERFACEIMPL_H
-
-namespace mlir {
-
-class DialectRegistry;
-
-namespace linalg {
-namespace comprehensive_bufferize {
-namespace arith_ext {
-
-void registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry);
-
-} // namespace arith_ext
-} // namespace comprehensive_bufferize
-} // namespace linalg
-} // namespace mlir
-
-#endif // MLIR_DIALECT_LINALG_COMPREHENSIVEBUFFERIZE_ARITHINTERFACEIMPL_H
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp b/mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp
similarity index 85%
rename from mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp
rename to mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp
index 2d09331ede237..00cc640327aad 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp
+++ b/mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp
@@ -1,4 +1,4 @@
-//===- ArithInterfaceImpl.cpp - Arith Impl. of BufferizableOpInterface ----===//
+//===- BufferizableOpInterfaceImpl.cpp - Impl. of BufferizableOpInterface -===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,8 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h"
-
+#include "mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/Transforms/BufferUtils.h"
@@ -18,9 +17,8 @@
 using namespace mlir::bufferization;
 
 namespace mlir {
-namespace linalg {
-namespace comprehensive_bufferize {
-namespace arith_ext {
+namespace arith {
+namespace {
 
 /// Bufferization of arith.constant. Replace with memref.get_global.
 struct ConstantOpInterface
@@ -100,14 +98,13 @@ struct IndexCastOpInterface
     return success();
   }
 };
-} // namespace arith_ext
-} // namespace comprehensive_bufferize
-} // namespace linalg
+
+} // namespace
+} // namespace arith
 } // namespace mlir
 
-void mlir::linalg::comprehensive_bufferize::arith_ext::
-    registerBufferizableOpInterfaceExternalModels(DialectRegistry &registry) {
-  registry.addOpInterface<arith::ConstantOp, arith_ext::ConstantOpInterface>();
-  registry
-      .addOpInterface<arith::IndexCastOp, arith_ext::IndexCastOpInterface>();
+void mlir::arith::registerBufferizableOpInterfaceExternalModels(
+    DialectRegistry &registry) {
+  registry.addOpInterface<ConstantOp, ConstantOpInterface>();
+  registry.addOpInterface<IndexCastOp, IndexCastOpInterface>();
 }
diff --git a/mlir/lib/Dialect/Arithmetic/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Arithmetic/Transforms/Bufferize.cpp
index 715fb1c81cbc5..2c8d2cb553ccb 100644
--- a/mlir/lib/Dialect/Arithmetic/Transforms/Bufferize.cpp
+++ b/mlir/lib/Dialect/Arithmetic/Transforms/Bufferize.cpp
@@ -8,61 +8,37 @@
 
 #include "PassDetail.h"
 
+#include "mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Arithmetic/Transforms/Passes.h"
+#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 
 using namespace mlir;
+using namespace bufferization;
 
 namespace {
-
-/// Bufferize arith.index_cast.
-struct BufferizeIndexCastOp : public OpConversionPattern<arith::IndexCastOp> {
-  using OpConversionPattern::OpConversionPattern;
-
-  LogicalResult
-  matchAndRewrite(arith::IndexCastOp op, OpAdaptor adaptor,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto tensorType = op.getType().cast<RankedTensorType>();
-    rewriter.replaceOpWithNewOp<arith::IndexCastOp>(
-        op, adaptor.getIn(),
-        MemRefType::get(tensorType.getShape(), tensorType.getElementType()));
-    return success();
-  }
-};
-
 /// Pass to bufferize Arithmetic ops.
 struct ArithmeticBufferizePass
     : public ArithmeticBufferizeBase<ArithmeticBufferizePass> {
   void runOnOperation() override {
-    bufferization::BufferizeTypeConverter typeConverter;
-    RewritePatternSet patterns(&getContext());
-    ConversionTarget target(getContext());
-
-    target.addLegalDialect<arith::ArithmeticDialect, memref::MemRefDialect>();
-
-    arith::populateArithmeticBufferizePatterns(typeConverter, patterns);
+    std::unique_ptr<BufferizationOptions> options =
+        getPartialBufferizationOptions();
+    options->addToDialectFilter<arith::ArithmeticDialect>();
 
-    target.addDynamicallyLegalOp<arith::IndexCastOp>(
-        [&](arith::IndexCastOp op) {
-          return typeConverter.isLegal(op.getType());
-        });
-
-    if (failed(applyPartialConversion(getOperation(), target,
-                                      std::move(patterns))))
+    if (failed(bufferizeOp(getOperation(), *options)))
       signalPassFailure();
   }
-};
 
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<bufferization::BufferizationDialect, memref::MemRefDialect,
+                    arith::ArithmeticDialect>();
+    arith::registerBufferizableOpInterfaceExternalModels(registry);
+  }
+};
 } // namespace
 
-void mlir::arith::populateArithmeticBufferizePatterns(
-    bufferization::BufferizeTypeConverter &typeConverter,
-    RewritePatternSet &patterns) {
-  patterns.add<BufferizeIndexCastOp>(typeConverter, patterns.getContext());
-}
-
 std::unique_ptr<Pass> mlir::arith::createArithmeticBufferizePass() {
   return std::make_unique<ArithmeticBufferizePass>();
 }
diff --git a/mlir/lib/Dialect/Arithmetic/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Arithmetic/Transforms/CMakeLists.txt
index 3ed4c4c25943b..4d76ee09d2d57 100644
--- a/mlir/lib/Dialect/Arithmetic/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Arithmetic/Transforms/CMakeLists.txt
@@ -1,4 +1,5 @@
 add_mlir_dialect_library(MLIRArithmeticTransforms
+  BufferizableOpInterfaceImpl.cpp
   Bufferize.cpp
   ExpandOps.cpp
 
@@ -10,6 +11,7 @@ add_mlir_dialect_library(MLIRArithmeticTransforms
 
   LINK_LIBS PUBLIC
   MLIRArithmetic
+  MLIRBufferization
   MLIRBufferizationTransforms
   MLIRIR
   MLIRMemRef
diff --git a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
index 8b64809e4b97c..caddad30c6151 100644
--- a/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/ComprehensiveBufferize/CMakeLists.txt
@@ -1,6 +1,5 @@
 set(LLVM_OPTIONAL_SOURCES
   AffineInterfaceImpl.cpp
-  ArithInterfaceImpl.cpp
   LinalgInterfaceImpl.cpp
   ModuleBufferization.cpp
   SCFInterfaceImpl.cpp
@@ -16,17 +15,6 @@ add_mlir_dialect_library(MLIRAffineBufferizableOpInterfaceImpl
   MLIRBufferization
 )
 
-add_mlir_dialect_library(MLIRArithBufferizableOpInterfaceImpl
-  ArithInterfaceImpl.cpp
-
-  LINK_LIBS PUBLIC
-  MLIRArithmetic
-  MLIRBufferization
-  MLIRIR
-  MLIRMemRef
-  MLIRStandardOpsTransforms
-)
-
 add_mlir_dialect_library(MLIRLinalgBufferizableOpInterfaceImpl
   LinalgInterfaceImpl.cpp
 
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
index 366600f6d33de..cecb586b3c20c 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@@ -34,8 +34,8 @@ add_mlir_dialect_library(MLIRLinalgTransforms
   MLIRAffineBufferizableOpInterfaceImpl
   MLIRAffineUtils
   MLIRAnalysis
-  MLIRArithBufferizableOpInterfaceImpl
   MLIRArithmetic
+  MLIRArithmeticTransforms
   MLIRBufferization
   MLIRComplex
   MLIRInferTypeOpInterface
diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
index 3b3b1e4e76ec9..151ef39d685dc 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
@@ -8,11 +8,11 @@
 
 #include "PassDetail.h"
 
+#include "mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.h"
-#include "mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.h"
@@ -52,7 +52,7 @@ struct LinalgComprehensiveModuleBufferize
                 vector::VectorDialect, scf::SCFDialect,
                 arith::ArithmeticDialect, StandardOpsDialect, AffineDialect>();
     affine_ext::registerBufferizableOpInterfaceExternalModels(registry);
-    arith_ext::registerBufferizableOpInterfaceExternalModels(registry);
+    arith::registerBufferizableOpInterfaceExternalModels(registry);
     linalg_ext::registerBufferizableOpInterfaceExternalModels(registry);
     scf_ext::registerBufferizableOpInterfaceExternalModels(registry);
     std_ext::registerModuleBufferizationExternalModels(registry);
diff --git a/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir b/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir
index 75fb2be2c6533..1a3b266ee4b80 100644
--- a/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir
+++ b/mlir/test/Dialect/Linalg/comprehensive-function-bufferize.mlir
@@ -96,19 +96,3 @@ func @rank_reducing(
   }
   return %5: tensor<?x1x6x8xf32>
 }
-
-// -----
-
-// CHECK: #[[$MAP:.*]] = affine_map<()[s0] -> (s0)>
-// CHECK-LABEL:   func @index_cast(
-// CHECK-SAME:  %[[TENSOR:.*]]: tensor<i32>, %[[SCALAR:.*]]: i32
-func @index_cast(%tensor: tensor<i32>, %scalar: i32) -> (tensor<index>, index) {
-  %index_tensor = arith.index_cast %tensor : tensor<i32> to tensor<index>
-  %index_scalar = arith.index_cast %scalar : i32 to index
-  return %index_tensor, %index_scalar : tensor<index>, index
-}
-// CHECK:  %[[MEMREF:.*]] = bufferization.to_memref %[[TENSOR]] : memref<i32, #[[$MAP]]>
-// CHECK-NEXT: %[[INDEX_MEMREF:.*]] = arith.index_cast %[[MEMREF]]
-// CHECK-SAME:   memref<i32, #[[$MAP]]> to memref<index, #[[$MAP]]>
-// CHECK-NEXT: %[[INDEX_TENSOR:.*]] = bufferization.to_tensor %[[INDEX_MEMREF]]
-// CHECK: return %[[INDEX_TENSOR]]
diff --git a/mlir/test/lib/Dialect/Linalg/CMakeLists.txt b/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
index b45786123f723..701aadc25d5e5 100644
--- a/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Linalg/CMakeLists.txt
@@ -14,8 +14,8 @@ add_mlir_library(MLIRLinalgTestPasses
   LINK_LIBS PUBLIC
   MLIRAffine
   MLIRAffineBufferizableOpInterfaceImpl
-  MLIRArithBufferizableOpInterfaceImpl
   MLIRArithmetic
+  MLIRArithmeticTransforms
   MLIRBufferization
   MLIRBufferizationTransforms
   MLIRGPUTransforms
diff --git a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
index 6e72f77ee3493..d4371b4b22329 100644
--- a/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
+++ b/mlir/test/lib/Dialect/Linalg/TestComprehensiveBufferize.cpp
@@ -12,11 +12,11 @@
 
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Arithmetic/IR/Arithmetic.h"
+#include "mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h"
 #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h"
 #include "mlir/Dialect/Bufferization/IR/Bufferization.h"
 #include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.h"
-#include "mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.h"
 #include "mlir/Dialect/Linalg/ComprehensiveBufferize/StdInterfaceImpl.h"
@@ -59,7 +59,7 @@ struct TestComprehensiveFunctionBufferize
                     vector::VectorDialect, scf::SCFDialect, StandardOpsDialect,
                     arith::ArithmeticDialect, AffineDialect>();
     affine_ext::registerBufferizableOpInterfaceExternalModels(registry);
-    arith_ext::registerBufferizableOpInterfaceExternalModels(registry);
+    arith::registerBufferizableOpInterfaceExternalModels(registry);
     linalg_ext::registerBufferizableOpInterfaceExternalModels(registry);
     scf_ext::registerBufferizableOpInterfaceExternalModels(registry);
     std_ext::registerBufferizableOpInterfaceExternalModels(registry);
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index 7dc763d9c262e..0b4607a0fff18 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -6580,27 +6580,6 @@ cc_library(
     ],
 )
 
-cc_library(
-    name = "ArithBufferizableOpInterfaceImpl",
-    srcs = [
-        "lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp",
-    ],
-    hdrs = [
-        "include/mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h",
-    ],
-    includes = ["include"],
-    deps = [
-        ":ArithmeticDialect",
-        ":BufferizationDialect",
-        ":BufferizationTransforms",
-        ":IR",
-        ":MemRefDialect",
-        ":Support",
-        ":TransformUtils",
-        "//llvm:Support",
-    ],
-)
-
 cc_library(
     name = "LinalgBufferizableOpInterfaceImpl",
     srcs = [
@@ -6876,8 +6855,8 @@ cc_library(
         ":AffineBufferizableOpInterfaceImpl",
         ":AffineUtils",
         ":Analysis",
-        ":ArithBufferizableOpInterfaceImpl",
         ":ArithmeticDialect",
+        ":ArithmeticTransforms",
         ":BufferizationDialect",
         ":BufferizationTransforms",
         ":ComplexDialect",
@@ -7566,7 +7545,10 @@ cc_library(
         "lib/Dialect/Arithmetic/Transforms/*.cpp",
         "lib/Dialect/Arithmetic/Transforms/*.h",
     ]),
-    hdrs = ["include/mlir/Dialect/Arithmetic/Transforms/Passes.h"],
+    hdrs = [
+        "include/mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h",
+        "include/mlir/Dialect/Arithmetic/Transforms/Passes.h",
+    ],
     includes = ["include"],
     deps = [
         ":ArithmeticDialect",
@@ -7577,7 +7559,10 @@ cc_library(
         ":MemRefDialect",
         ":Pass",
         ":StandardOps",
+        ":Support",
+        ":TransformUtils",
         ":Transforms",
+        "//llvm:Support",
     ],
 )
 
diff --git a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
index 4f9ae531e1b18..bbe3d84d0f1c4 100644
--- a/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/test/BUILD.bazel
@@ -389,8 +389,8 @@ cc_library(
         "//llvm:Support",
         "//mlir:Affine",
         "//mlir:AffineBufferizableOpInterfaceImpl",
-        "//mlir:ArithBufferizableOpInterfaceImpl",
         "//mlir:ArithmeticDialect",
+        "//mlir:ArithmeticTransforms",
         "//mlir:BufferizationDialect",
         "//mlir:BufferizationTransforms",
         "//mlir:GPUDialect",

From 8ce99dadb007dd0dbbf1ddbe4090e9ff43e780c5 Mon Sep 17 00:00:00 2001
From: Richard <legalize@xmission.com>
Date: Fri, 21 Jan 2022 17:57:40 -0700
Subject: [PATCH 932/946] [clang-tidy] Add more documentation about check
 development (NFC)

- Mention pp-trace
- CMake configuration
- Overriding registerPPCallbacks
- Overriding isLanguageVersionSupported
- Check development tips
  - Guide to useful documentation
  - Using the Transformer library
  - Developing your check incrementally
  - Creating private matchers
  - Unit testing helper code
  - Making your check robust
  - Documenting your check
- Describe the Inputs test folder

Differential Revision: https://reviews.llvm.org/D117939
---
 .../docs/clang-tidy/Contributing.rst          | 225 +++++++++++++++++-
 1 file changed, 222 insertions(+), 3 deletions(-)

diff --git a/clang-tools-extra/docs/clang-tidy/Contributing.rst b/clang-tools-extra/docs/clang-tidy/Contributing.rst
index 7ebfac3123603..b9eb0e7627cc1 100644
--- a/clang-tools-extra/docs/clang-tidy/Contributing.rst
+++ b/clang-tools-extra/docs/clang-tidy/Contributing.rst
@@ -22,6 +22,8 @@ There are a few tools particularly useful when developing clang-tidy checks:
     check, it will create the check, update the CMake file and create a test;
   * ``rename_check.py`` does what the script name suggests, renames an existing
     check;
+  * :program:`pp-trace` logs method calls on `PPCallbacks` for a source file
+    and is invaluable in understanding the preprocessor mechanism;
   * :program:`clang-query` is invaluable for interactive prototyping of AST
     matchers and exploration of the Clang AST;
   * `clang-check`_ with the ``-ast-dump`` (and optionally ``-ast-dump-filter``)
@@ -70,6 +72,14 @@ let's start!
 .. _Using Clang Tools: https://clang.llvm.org/docs/ClangTools.html
 .. _How To Setup Clang Tooling For LLVM: https://clang.llvm.org/docs/HowToSetupToolingForLLVM.html
 
+When you `configure the CMake build <https://llvm.org/docs/GettingStarted.html#local-llvm-configuration>`_,
+make sure that you enable the ``clang`` and ``clang-tools-extra`` projects to
+build :program:`clang-tidy`.
+Because your new check will have associated documentation, you will also want to install
+`Sphinx <https://www.sphinx-doc.org/en/master/>`_ and enable it in the CMake configuration.
+To save build time of the core Clang libraries you may want to only enable the ``X86``
+target in the CMake configuration.
+
 
 The Directory Structure
 -----------------------
@@ -215,11 +225,215 @@ can further inspect them and report diagnostics.
 and `clang-tidy/google/ExplicitConstructorCheck.cpp
 <https://reviews.llvm.org/diffusion/L/browse/clang-tools-extra/trunk/clang-tidy/google/ExplicitConstructorCheck.cpp>`_).
 
+If you need to interact with macros or preprocessor directives, you will want to
+override the method ``registerPPCallbacks``.  The ``add_new_check.py`` script
+does not generate an override for this method in the starting point for your
+new check.
+
+If your check applies only under a specific set of language options, be sure
+to override the method ``isLanguageVersionSupported`` to reflect that.
+
+Check development tips
+----------------------
+
+Writing your first check can be a daunting task, particularly if you are unfamiliar
+with the LLVM and Clang code bases.  Here are some suggestions for orienting yourself
+in the codebase and working on your check incrementally.
+
+Guide to useful documentation
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Many of the support classes created for LLVM are used by Clang, such as `StringRef
+<https://llvm.org/docs/ProgrammersManual.html#the-stringref-class>`_
+and `SmallVector <https://llvm.org/docs/ProgrammersManual.html#llvm-adt-smallvector-h>`_.
+These and other commonly used classes are described in the `Important and useful LLVM APIs
+<https://llvm.org/docs/ProgrammersManual.html#important-and-useful-llvm-apis>`_ and
+`Picking the Right Data Structure for the Task
+<https://llvm.org/docs/ProgrammersManual.html#picking-the-right-data-structure-for-a-task>`_
+sections of the `LLVM Programmer's Manual
+<https://llvm.org/docs/ProgrammersManual.html>`_.  You don't need to memorize all the
+details of these classes; the generated `doxygen documentation <https://llvm.org/doxygen/>`_
+has everything if you need it.  In the header `LLVM/ADT/STLExtras.h
+<https://llvm.org/doxygen/STLExtras_8h.html>`_ you'll find useful versions of the STL
+algorithms that operate on LLVM containers, such as `llvm::all_of
+<https://llvm.org/doxygen/STLExtras_8h.html#func-members>`_.
+
+Clang is implemented on top of LLVM and introduces its own set of classes that you
+will interact with while writing your check.  When a check issues diagnostics and
+fix-its, these are associated with locations in the source code.  Source code locations,
+source files, ranges of source locations and the `SourceManager
+<https://clang.llvm.org/doxygen/classclang_1_1SourceManager.html>`_ class provide
+the mechanisms for describing such locations.  These and
+other topics are described in the `"Clang" CFE Internals Manual
+<https://clang.llvm.org/docs/InternalsManual.html>`_.  Whereas the doxygen generated
+documentation serves as a reference to the internals of Clang, this document serves
+as a guide to other developers.  Topics in that manual of interest to a check developer
+are:
+
+- `The Clang "Basic" Library
+  <https://clang.llvm.org/docs/InternalsManual.html#the-clang-basic-library>`_ for
+  information about diagnostics, fix-it hints and source locations.
+- `The Lexer and Preprocessor Library
+  <https://clang.llvm.org/docs/InternalsManual.html#the-lexer-and-preprocessor-library>`_
+  for information about tokens, lexing (transforming characters into tokens) and the
+  preprocessor.
+- `The AST Library
+  <https://clang.llvm.org/docs/InternalsManual.html#the-lexer-and-preprocessor-library>`_
+  for information about how C++ source statements are represented as an abstract syntax
+  tree (AST).
+
+Most checks will interact with C++ source code via the AST.  Some checks will interact
+with the preprocessor.  The input source file is lexed and preprocessed and then parsed
+into the AST.  Once the AST is fully constructed, the check is run by applying the check's
+registered AST matchers against the AST and invoking the check with the set of matched
+nodes from the AST.  Monitoring the actions of the preprocessor is detached from the
+AST construction, but a check can collect information during preprocessing for later
+use by the check when nodes are matched by the AST.
+
+Every syntactic (and sometimes semantic) element of the C++ source code is represented by
+different classes in the AST.  You select the portions of the AST you're interested in
+by composing AST matcher functions.  You will want to study carefully the `AST Matcher
+Reference <https://clang.llvm.org/docs/LibASTMatchersReference.html>`_ to understand
+the relationship between the different matcher functions.
+
+Using the Transformer library
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Transformer library allows you to write a check that transforms source code by
+expressing the transformation as a ``RewriteRule``.  The Transformer library provides
+functions for composing edits to source code to create rewrite rules.  Unless you need
+to perform low-level source location manipulation, you may want to consider writing your
+check with the Transformer library.  The `Clang Transformer Tutorial
+<https://clang.llvm.org/docs/ClangTransformerTutorial.html>`_ describes the Transformer
+library in detail.
+
+To use the Transformer library, make the following changes to the code generated by
+the ``add_new_check.py`` script:
+
+- Include ``../utils/TransformerClangTidyCheck.h`` instead of ``../ClangTidyCheck.h``
+- Change the base class of your check from ``ClangTidyCheck`` to ``TransformerClangTidyCheck``
+- Delete the override of the ``registerMatchers`` and ``check`` methods in your check class.
+- Write a function that creates the ``RewriteRule`` for your check.
+- Call the function in your check's constructor to pass the rewrite rule to
+  ``TransformerClangTidyCheck``'s constructor.
+
+Developing your check incrementally
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The best way to develop your check is to start with the simple test cases and increase
+complexity incrementally.  The test file created by the ``add_new_check.py`` script is
+a starting point for your test cases.  A rough outline of the process looks like this:
+
+- Write a test case for your check.
+- Prototype matchers on the test file using :program:`clang-query`.
+- Capture the working matchers in the ``registerMatchers`` method.
+- Issue the necessary diagnostics and fix-its in the ``check`` method.
+- Add the necessary ``CHECK-MESSAGES`` and ``CHECK-FIXES`` annotations to your
+  test case to validate the diagnostics and fix-its.
+- Build the target ``check-clang-tool`` to confirm the test passes.
+- Repeat the process until all aspects of your check are covered by tests.
+
+The quickest way to prototype your matcher is to use :program:`clang-query` to
+interactively build up your matcher.  For complicated matchers, build up a matching
+expression incrementally and use :program:`clang-query`'s ``let`` command to save named
+matching expressions to simplify your matcher.  Just like breaking up a huge function
+into smaller chunks with intention-revealing names can help you understand a complex
+algorithm, breaking up a matcher into smaller matchers with intention-revealing names
+can help you understand a complicated matcher.  Once you have a working matcher, the
+C++ API will be virtually identical to your interactively constructed matcher.  You can
+use local variables to preserve your intention-revealing names that you applied to
+nested matchers.
+
+Creating private matchers
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes you want to match a specific aspect of the AST that isn't provided by the
+existing AST matchers.  You can create your own private matcher using the same
+infrastructure as the public matchers.  A private matcher can simplify the processing
+in your ``check`` method by eliminating complex hand-crafted AST traversal of the
+matched nodes.  Using the private matcher allows you to select the desired portions
+of the AST directly in the matcher and refer to it by a bound name in the ``check``
+method.
+
+Unit testing helper code
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Private custom matchers are a good example of auxiliary support code for your check
+that can be tested with a unit test.  It will be easier to test your matchers or
+other support classes by writing a unit test than by writing a ``FileCheck`` integration
+test.  The ``ASTMatchersTests`` target contains unit tests for the public AST matcher
+classes and is a good source of testing idioms for matchers.
+
+Making your check robust
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Once you've covered your check with the basic "happy path" scenarios, you'll want to
+torture your check with as many edge cases as you can cover in order to ensure your
+check is robust.  Running your check on a large code base, such as Clang/LLVM, is a
+good way to catch things you forgot to account for in your matchers.  However, the
+LLVM code base may be insufficient for testing purposes as it was developed against a
+particular set of coding styles and quality measures.  The larger the corpus of code
+the check is tested against, the higher confidence the community will have in the
+check's efficacy and false positive rate.
+
+Some suggestions to ensure your check is robust:
+
+- Create header files that contain code matched by your check.
+- Validate that fix-its are properly applied to test header files with
+  :program:`clang-tidy`.  You will need to perform this test manually until
+  automated support for checking messages and fix-its is added to the
+  ``check_clang_tidy.py`` script.
+- Define macros that contain code matched by your check.
+- Define template classes that contain code matched by your check.
+- Define template specializations that contain code matched by your check.
+- Test your check under both Windows and Linux environments.
+- Watch out for high false positive rates.  Ideally, a check would have no false
+  positives, but given that matching against an AST is not control- or data flow-
+  sensitive, a number of false positives are expected.  The higher the false
+  positive rate, the less likely the check will be adopted in practice.
+  Mechanisms should be put in place to help the user manage false positives.
+- There are two primary mechanisms for managing false positives: supporting a
+  code pattern which allows the programmer to silence the diagnostic in an ad
+  hoc manner and check configuration options to control the behavior of the check.
+- Consider supporting a code pattern to allow the programmer to silence the
+  diagnostic whenever such a code pattern can clearly express the programmer's
+  intent.  For example, allowing an explicit cast to ``void`` to silence an
+  unused variable diagnostic.
+- Consider adding check configuration options to allow the user to opt into
+  more aggressive checking behavior without burdening users for the common
+  high-confidence cases.
+
+Documenting your check
+^^^^^^^^^^^^^^^^^^^^^^
+
+The ``add_new_check.py`` script creates entries in the
+`release notes <https://clang.llvm.org/extra/ReleaseNotes.html>`_, the list of
+checks and a new file for the check documentation itself.  It is recommended that you
+have a concise summation of what your check does in a single sentence that is repeated
+in the release notes, as the first sentence in the doxygen comments in the header file
+for your check class and as the first sentence of the check documentation.  Avoid the
+phrase "this check" in your check summation and check documentation.
+
+If your check relates to a published coding guideline (C++ Core Guidelines, MISRA, etc.)
+or style guide, provide links to the relevant guideline or style guide sections in your
+check documentation.
+
+Provide enough examples of the diagnostics and fix-its provided by the check so that a
+user can easily understand what will happen to their code when the check is run.
+If there are exceptions or limitations to your check, document them thoroughly.  This
+will help users understand the scope of the diagnostics and fix-its provided by the check.
+
+Building the target ``docs-clang-tools-html`` will run the Sphinx documentation generator
+and create documentation HTML files in the tools/clang/tools/extra/docs/html directory in
+your build tree.  Make sure that your check is correctly shown in the release notes and the
+list of checks.  Make sure that the formatting and structure of your check's documentation
+looks correct.
+
 
 Registering your Check
 ----------------------
 
-(The ``add_new_check.py`` takes care of registering the check in an existing
+(The ``add_new_check.py`` script takes care of registering the check in an existing
 module. If you want to create a new module or know the details, read on.)
 
 The check should be registered in the corresponding module with a distinct name:
@@ -316,7 +530,9 @@ YAML format:
 Testing Checks
 --------------
 
-To run tests for :program:`clang-tidy` use the command:
+To run tests for :program:`clang-tidy`, build the ``check-clang-tools`` target.
+For instance, if you configured your CMake build with the ninja project generator,
+use the command:
 
 .. code-block:: console
 
@@ -394,7 +610,6 @@ Here's an example:
    // CHECK-FIXES-USING-B-NOT: using a::B;$
    // CHECK-FIXES-NOT: using a::C;$
 
-
 There are many dark corners in the C++ language, and it may be difficult to make
 your check work perfectly in all cases, especially if it issues fix-it hints. The
 most frequent pitfalls are macros and templates:
@@ -411,6 +626,10 @@ most frequent pitfalls are macros and templates:
    macro expansions/template instantiations, but easily break some other
    expansions/instantiations.
 
+If you need multiple files to exercise all the aspects of your check, it is
+recommended you place them in a subdirectory named for the check under ``Inputs``.
+This keeps the test directory from getting cluttered.
+
 .. _lit: https://llvm.org/docs/CommandGuide/lit.html
 .. _FileCheck: https://llvm.org/docs/CommandGuide/FileCheck.html
 .. _test/clang-tidy/google-readability-casting.cpp: https://reviews.llvm.org/diffusion/L/browse/clang-tools-extra/trunk/test/clang-tidy/google-readability-casting.cpp

From c9ce27180b812e4a3d49ed628358b10694bcbe55 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he@intel.com>
Date: Tue, 25 Jan 2022 08:18:29 +0800
Subject: [PATCH 933/946] Move OpenCL relational builtin translation to
 SPIRVToOCL

Motivation is to let standalone use of SPIRVToOCL20Legacy
pass support translating relational builtin in Spirv Friendly IR.

This patch also aligns relational builtin in SPIRV Friendly IR with
spirv spec. i8 is used for vector boolean type, since clang frontend
can't generate i1 vector type for spirv builtin yet.
E.g.
  i32 @_Z11__spirv_AnyDv2_i(<2 x i32>
is changed to
  i1 @_Z11__spirv_AnyDv2_c(<2 x i8>

Original commit:
https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/7854a91
---
 llvm-spirv/lib/SPIRV/SPIRVReader.cpp          |  58 ++--
 llvm-spirv/lib/SPIRV/SPIRVReader.h            |   4 +-
 llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp           |  66 ++++
 llvm-spirv/lib/SPIRV/SPIRVToOCL.h             |   6 +
 llvm-spirv/test/relationals.ll                |  20 +-
 llvm-spirv/test/transcoding/OpAllAny.ll       | 118 +++++--
 .../test/transcoding/relationals_double.ll    | 315 +++++++++++++++---
 .../test/transcoding/relationals_float.ll     | 290 +++++++++++-----
 .../test/transcoding/relationals_half.ll      | 289 +++++++++++-----
 9 files changed, 880 insertions(+), 286 deletions(-)

diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
index 78a4b5cb82e12..bdb4f7e5b1b77 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
@@ -2363,16 +2363,15 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F,
 
   case OpAll:
   case OpAny:
-    return mapValue(BV,
-                    transOCLAllAny(static_cast<SPIRVInstruction *>(BV), BB));
+    return mapValue(BV, transAllAny(static_cast<SPIRVInstruction *>(BV), BB));
 
   case OpIsFinite:
   case OpIsInf:
   case OpIsNan:
   case OpIsNormal:
   case OpSignBitSet:
-    return mapValue(
-        BV, transOCLRelational(static_cast<SPIRVInstruction *>(BV), BB));
+    return mapValue(BV,
+                    transRelational(static_cast<SPIRVInstruction *>(BV), BB));
   case OpGetKernelWorkGroupSize:
   case OpGetKernelPreferredWorkGroupSizeMultiple:
     return mapValue(
@@ -4254,68 +4253,51 @@ SPIRVToLLVM::transLinkageType(const SPIRVValue *V) {
   }
 }
 
-Instruction *SPIRVToLLVM::transOCLAllAny(SPIRVInstruction *I, BasicBlock *BB) {
+Instruction *SPIRVToLLVM::transAllAny(SPIRVInstruction *I, BasicBlock *BB) {
   CallInst *CI = cast<CallInst>(transSPIRVBuiltinFromInst(I, BB));
   assert(CI->getCalledFunction() && "Unexpected indirect call");
+  BuiltinFuncMangleInfo BtnInfo;
   AttributeList Attrs = CI->getCalledFunction()->getAttributes();
   return cast<Instruction>(mapValue(
-      I, mutateCallInstOCL(
+      I, mutateCallInst(
              M, CI,
-             [=](CallInst *, std::vector<Value *> &Args, llvm::Type *&RetTy) {
-               Type *Int32Ty = Type::getInt32Ty(*Context);
-               auto OldArg = CI->getOperand(0);
-               auto NewArgTy = FixedVectorType::get(
-                   Int32Ty,
+             [=](CallInst *, std::vector<Value *> &Args) {
+               auto *OldArg = CI->getOperand(0);
+               auto *NewArgTy = FixedVectorType::get(
+                   Type::getInt8Ty(*Context),
                    cast<FixedVectorType>(OldArg->getType())->getNumElements());
-               auto NewArg =
+               auto *NewArg =
                    CastInst::CreateSExtOrBitCast(OldArg, NewArgTy, "", CI);
                Args[0] = NewArg;
-               RetTy = Int32Ty;
                return getSPIRVFuncName(I->getOpCode(), getSPIRVFuncSuffix(I));
              },
-             [=](CallInst *NewCI) -> Instruction * {
-               return CastInst::CreateTruncOrBitCast(
-                   NewCI, Type::getInt1Ty(*Context), "", NewCI->getNextNode());
-             },
-             &Attrs, /*TakeFuncName=*/true)));
+             &BtnInfo, &Attrs, /*TakeFuncName=*/true)));
 }
 
-Instruction *SPIRVToLLVM::transOCLRelational(SPIRVInstruction *I,
-                                             BasicBlock *BB) {
+Instruction *SPIRVToLLVM::transRelational(SPIRVInstruction *I, BasicBlock *BB) {
   CallInst *CI = cast<CallInst>(transSPIRVBuiltinFromInst(I, BB));
   assert(CI->getCalledFunction() && "Unexpected indirect call");
+  BuiltinFuncMangleInfo BtnInfo;
   AttributeList Attrs = CI->getCalledFunction()->getAttributes();
   return cast<Instruction>(mapValue(
-      I, mutateCallInstOCL(
+      I, mutateCallInst(
              M, CI,
              [=](CallInst *, std::vector<Value *> &Args, llvm::Type *&RetTy) {
-               Type *IntTy = Type::getInt32Ty(*Context);
-               RetTy = IntTy;
                if (CI->getType()->isVectorTy()) {
-                 if (cast<FixedVectorType>(CI->getOperand(0)->getType())
-                         ->getElementType()
-                         ->isDoubleTy())
-                   IntTy = Type::getInt64Ty(*Context);
-                 if (cast<FixedVectorType>(CI->getOperand(0)->getType())
-                         ->getElementType()
-                         ->isHalfTy())
-                   IntTy = Type::getInt16Ty(*Context);
                  RetTy = FixedVectorType::get(
-                     IntTy,
+                     Type::getInt8Ty(*Context),
                      cast<FixedVectorType>(CI->getType())->getNumElements());
                }
                return getSPIRVFuncName(I->getOpCode(), getSPIRVFuncSuffix(I));
              },
              [=](CallInst *NewCI) -> Instruction * {
-               Type *RetTy = Type::getInt1Ty(*Context);
-               if (NewCI->getType()->isVectorTy())
-                 RetTy = FixedVectorType::get(
-                     Type::getInt1Ty(*Context),
-                     cast<FixedVectorType>(NewCI->getType())->getNumElements());
+               Type *RetTy = CI->getType();
+               if (RetTy == NewCI->getType())
+                 return NewCI;
                return CastInst::CreateTruncOrBitCast(NewCI, RetTy, "",
                                                      NewCI->getNextNode());
              },
-             &Attrs, /*TakeFuncName=*/true)));
+             &BtnInfo, &Attrs, /*TakeFuncName=*/true)));
 }
 
 std::unique_ptr<SPIRVModule> readSpirvModule(std::istream &IS,
diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.h b/llvm-spirv/lib/SPIRV/SPIRVReader.h
index c2bfd01ea2343..7b66fda322644 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVReader.h
+++ b/llvm-spirv/lib/SPIRV/SPIRVReader.h
@@ -231,8 +231,8 @@ class SPIRVToLLVM {
                                                  int64_t Parameter);
   template <class Source, class Func> bool foreachFuncCtlMask(Source, Func);
   llvm::GlobalValue::LinkageTypes transLinkageType(const SPIRVValue *V);
-  Instruction *transOCLAllAny(SPIRVInstruction *BI, BasicBlock *BB);
-  Instruction *transOCLRelational(SPIRVInstruction *BI, BasicBlock *BB);
+  Instruction *transAllAny(SPIRVInstruction *BI, BasicBlock *BB);
+  Instruction *transRelational(SPIRVInstruction *BI, BasicBlock *BB);
 
   void transUserSemantic(SPIRV::SPIRVFunction *Fun);
   void transGlobalAnnotations();
diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp
index c2eb399c27e92..05c352166be77 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp
@@ -185,6 +185,21 @@ void SPIRVToOCLBase::visitCallInst(CallInst &CI) {
     visitCallSPIRVGenericPtrMemSemantics(&CI);
     return;
   }
+  // Check if OC is OpenCL relational builtin except bitselect and select.
+  auto IsOclRelationalOp = [](Op OC) {
+    return isUnaryPredicateOpCode(OC) || OC == OpOrdered || OC == OpUnordered ||
+           OC == OpFOrdEqual || OC == OpFUnordNotEqual ||
+           OC == OpFOrdGreaterThan || OC == OpFOrdGreaterThanEqual ||
+           OC == OpFOrdLessThan || OC == OpFOrdLessThanEqual ||
+           OC == OpFOrdNotEqual;
+  };
+  if (IsOclRelationalOp(OC)) {
+    if (OC == OpAny || OC == OpAll)
+      visitCallSPIRVAnyAll(&CI, OC);
+    else
+      visitCallSPIRVRelational(&CI, OC);
+    return;
+  }
   if (OCLSPIRVBuiltinMap::rfind(OC))
     visitCallSPIRVBuiltin(&CI, OC);
 }
@@ -1088,6 +1103,57 @@ void SPIRVToOCLBase::visitCallSPIRVPrintf(CallInst *CI, OCLExtOpKind Kind) {
     NewCI->getCalledFunction()->setName(TargetName);
 }
 
+void SPIRVToOCLBase::visitCallSPIRVAnyAll(CallInst *CI, Op OC) {
+  AttributeList Attrs = CI->getCalledFunction()->getAttributes();
+  mutateCallInstOCL(
+      M, CI,
+      [=](CallInst *, std::vector<Value *> &Args, Type *&RetTy) {
+        Type *Int8Ty = Type::getInt8Ty(*Ctx);
+        auto *OldArg = CI->getOperand(0);
+        auto *OldArgTy = cast<FixedVectorType>(OldArg->getType());
+        if (Int8Ty != OldArgTy->getElementType()) {
+          auto *NewArgTy =
+              FixedVectorType::get(Int8Ty, OldArgTy->getNumElements());
+          auto *NewArg =
+              CastInst::CreateSExtOrBitCast(OldArg, NewArgTy, "", CI);
+          Args[0] = NewArg;
+        }
+        RetTy = Type::getInt32Ty(*Ctx);
+        return OCLSPIRVBuiltinMap::rmap(OC);
+      },
+      [=](CallInst *NewCI) -> Instruction * {
+        return CastInst::CreateTruncOrBitCast(NewCI, CI->getType(), "",
+                                              NewCI->getNextNode());
+      },
+      &Attrs);
+}
+
+void SPIRVToOCLBase::visitCallSPIRVRelational(CallInst *CI, Op OC) {
+  AttributeList Attrs = CI->getCalledFunction()->getAttributes();
+  mutateCallInstOCL(
+      M, CI,
+      [=](CallInst *, std::vector<Value *> & /*Args*/, Type *&RetTy) {
+        Type *IntTy = Type::getInt32Ty(*Ctx);
+        RetTy = IntTy;
+        if (CI->getType()->isVectorTy()) {
+          auto *OpElemTy = cast<FixedVectorType>(CI->getOperand(0)->getType())
+                               ->getElementType();
+          if (OpElemTy->isDoubleTy())
+            IntTy = Type::getInt64Ty(*Ctx);
+          if (OpElemTy->isHalfTy())
+            IntTy = Type::getInt16Ty(*Ctx);
+          RetTy = FixedVectorType::get(
+              IntTy, cast<FixedVectorType>(CI->getType())->getNumElements());
+        }
+        return OCLSPIRVBuiltinMap::rmap(OC);
+      },
+      [=](CallInst *NewCI) -> Instruction * {
+        return CastInst::CreateTruncOrBitCast(NewCI, CI->getType(), "",
+                                              NewCI->getNextNode());
+      },
+      &Attrs);
+}
+
 std::string SPIRVToOCLBase::getGroupBuiltinPrefix(CallInst *CI) {
   std::string Prefix;
   auto ES = getArgAsScope(CI, 0);
diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.h b/llvm-spirv/lib/SPIRV/SPIRVToOCL.h
index 9fb1a25afe0a7..0eabe68425c9b 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.h
+++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.h
@@ -218,6 +218,12 @@ class SPIRVToOCLBase : public InstVisitor<SPIRVToOCLBase> {
   /// Transform __spirv_EnqueueKernel to __enqueue_kernel
   virtual void visitCallSPIRVEnqueueKernel(CallInst *CI, Op OC) = 0;
 
+  /// Transform __spirv_Any and __spirv_All to OpenCL builtin.
+  void visitCallSPIRVAnyAll(CallInst *CI, Op OC);
+
+  /// Transform relational builtin, e.g. __spirv_IsNan, to OpenCL builtin.
+  void visitCallSPIRVRelational(CallInst *CI, Op OC);
+
   /// Conduct generic mutations for all atomic builtins
   virtual CallInst *mutateCommonAtomicArguments(CallInst *CI, Op OC) = 0;
 
diff --git a/llvm-spirv/test/relationals.ll b/llvm-spirv/test/relationals.ll
index 8f5ea3ae4e29f..507d17a32a023 100644
--- a/llvm-spirv/test/relationals.ll
+++ b/llvm-spirv/test/relationals.ll
@@ -14,17 +14,17 @@ declare dso_local spir_func <4 x i8> @_Z16__spirv_IsFiniteIDv4_aDv4_fET_T0_(<4 x
 declare dso_local spir_func <4 x i8> @_Z16__spirv_IsNormalIDv4_aDv4_fET_T0_(<4 x float>)
 declare dso_local spir_func <4 x i8> @_Z18__spirv_SignBitSetIDv4_aDv4_fET_T0_(<4 x float>)
 
-; CHECK-SPV-LLVM: call spir_func <4 x i32> @_Z13__spirv_IsNanDv4_f
-; CHECK-SPV-LLVM: call spir_func <4 x i32> @_Z13__spirv_IsInfDv4_f
-; CHECK-SPV-LLVM: call spir_func <4 x i32> @_Z16__spirv_IsFiniteDv4_f
-; CHECK-SPV-LLVM: call spir_func <4 x i32> @_Z16__spirv_IsNormalDv4_f
-; CHECK-SPV-LLVM: call spir_func <4 x i32> @_Z18__spirv_SignBitSetDv4_f
+; CHECK-SPV-LLVM: call spir_func <4 x i8> @_Z13__spirv_IsNanDv4_f
+; CHECK-SPV-LLVM: call spir_func <4 x i8> @_Z13__spirv_IsInfDv4_f
+; CHECK-SPV-LLVM: call spir_func <4 x i8> @_Z16__spirv_IsFiniteDv4_f
+; CHECK-SPV-LLVM: call spir_func <4 x i8> @_Z16__spirv_IsNormalDv4_f
+; CHECK-SPV-LLVM: call spir_func <4 x i8> @_Z18__spirv_SignBitSetDv4_f
 
-; CHECK-SPV-LLVM: declare spir_func <4 x i32> @_Z13__spirv_IsNanDv4_f(<4 x float>)
-; CHECK-SPV-LLVM: declare spir_func <4 x i32> @_Z13__spirv_IsInfDv4_f(<4 x float>)
-; CHECK-SPV-LLVM: declare spir_func <4 x i32> @_Z16__spirv_IsFiniteDv4_f(<4 x float>)
-; CHECK-SPV-LLVM: declare spir_func <4 x i32> @_Z16__spirv_IsNormalDv4_f(<4 x float>)
-; CHECK-SPV-LLVM: declare spir_func <4 x i32> @_Z18__spirv_SignBitSetDv4_f(<4 x float>)
+; CHECK-SPV-LLVM: declare spir_func <4 x i8> @_Z13__spirv_IsNanDv4_f(<4 x float>)
+; CHECK-SPV-LLVM: declare spir_func <4 x i8> @_Z13__spirv_IsInfDv4_f(<4 x float>)
+; CHECK-SPV-LLVM: declare spir_func <4 x i8> @_Z16__spirv_IsFiniteDv4_f(<4 x float>)
+; CHECK-SPV-LLVM: declare spir_func <4 x i8> @_Z16__spirv_IsNormalDv4_f(<4 x float>)
+; CHECK-SPV-LLVM: declare spir_func <4 x i8> @_Z18__spirv_SignBitSetDv4_f(<4 x float>)
 
 ; CHECK-SPIRV: {{[0-9]+}} TypeBool [[TBool:[0-9]+]]
 ; CHECK-SPIRV: {{[0-9]+}} TypeVector [[TBoolVec:[0-9]+]] [[TBool]]
diff --git a/llvm-spirv/test/transcoding/OpAllAny.ll b/llvm-spirv/test/transcoding/OpAllAny.ll
index 53d8d38f9098c..868c3a4a49c86 100644
--- a/llvm-spirv/test/transcoding/OpAllAny.ll
+++ b/llvm-spirv/test/transcoding/OpAllAny.ll
@@ -3,54 +3,104 @@
 ; RUN: FileCheck < %t.txt %s --check-prefix=CHECK-SPIRV
 ; RUN: llvm-spirv %t.bc -o %t.spv
 ; RUN: spirv-val %t.spv
+; RUN: llvm-spirv -r %t.spv -o %t.rev.spv.bc --spirv-target-env=SPV-IR
+; RUN: llvm-dis < %t.rev.spv.bc | FileCheck %s --check-prefix=CHECK-SPV-IR
 ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
 ; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
-; RUN: llvm-spirv -r %t.spv -o %t.rev.bc --spirv-target-env=SPV-IR
-; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-SPV-LLVM
 
-; CHECK-LLVM: call spir_func i32 @_Z3allDv2_i(
-; CHECK-LLVM: call spir_func i32 @_Z3anyDv2_i(
-
-; CHECK-SPV-LLVM: call spir_func i32 @_Z11__spirv_AllDv2_i(
-; CHECK-SPV-LLVM: call spir_func i32 @_Z11__spirv_AnyDv2_i(
-
-; CHECK-SPV-LLVM: declare spir_func i32 @_Z11__spirv_AllDv2_i(<2 x i32>)
-; CHECK-SPV-LLVM: declare spir_func i32 @_Z11__spirv_AnyDv2_i(<2 x i32>)
+; This test checks SYCL relational builtin any and all with vector input types.
 
 ; CHECK-SPIRV: 2 TypeBool [[BoolTypeID:[0-9]+]]
-; CHECK-SPIRV: 4 All [[BoolTypeID]]
+
+; CHECK-SPIRV: 4 Any [[BoolTypeID]]
 ; CHECK-SPIRV: 4 Any [[BoolTypeID]]
+; CHECK-SPIRV: 4 Any [[BoolTypeID]]
+; CHECK-SPIRV: 4 Any [[BoolTypeID]]
+; CHECK-SPIRV: 4 All [[BoolTypeID]]
+; CHECK-SPIRV: 4 All [[BoolTypeID]]
+; CHECK-SPIRV: 4 All [[BoolTypeID]]
+; CHECK-SPIRV: 4 All [[BoolTypeID]]
+
+; CHECK-SPV-IR: call spir_func i1 @_Z11__spirv_AnyDv2_c(<2 x i8>
+; CHECK-SPV-IR: call spir_func i1 @_Z11__spirv_AnyDv2_c(<2 x i8>
+; CHECK-SPV-IR: call spir_func i1 @_Z11__spirv_AnyDv2_c(<2 x i8>
+; CHECK-SPV-IR: call spir_func i1 @_Z11__spirv_AnyDv2_c(<2 x i8>
+; CHECK-SPV-IR: call spir_func i1 @_Z11__spirv_AllDv2_c(<2 x i8>
+; CHECK-SPV-IR: call spir_func i1 @_Z11__spirv_AllDv2_c(<2 x i8>
+; CHECK-SPV-IR: call spir_func i1 @_Z11__spirv_AllDv2_c(<2 x i8>
+; CHECK-SPV-IR: call spir_func i1 @_Z11__spirv_AllDv2_c(<2 x i8>
+
+; CHECK-LLVM: call spir_func i32 @_Z3anyDv2_c(<2 x i8>
+; CHECK-LLVM: call spir_func i32 @_Z3anyDv2_c(<2 x i8>
+; CHECK-LLVM: call spir_func i32 @_Z3anyDv2_c(<2 x i8>
+; CHECK-LLVM: call spir_func i32 @_Z3anyDv2_c(<2 x i8>
+; CHECK-LLVM: call spir_func i32 @_Z3allDv2_c(<2 x i8>
+; CHECK-LLVM: call spir_func i32 @_Z3allDv2_c(<2 x i8>
+; CHECK-LLVM: call spir_func i32 @_Z3allDv2_c(<2 x i8>
+; CHECK-LLVM: call spir_func i32 @_Z3allDv2_c(<2 x i8>
 
 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
-target triple = "spir-unknown-unknown"
+target triple = "spir"
 
-; Function Attrs: nounwind
-define spir_kernel void @testKernel() #0 !kernel_arg_addr_space !0 !kernel_arg_access_qual !0 !kernel_arg_type !0 !kernel_arg_base_type !0 !kernel_arg_type_qual !0 {
+; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn writeonly
+define dso_local spir_func void @test_vector(i32 addrspace(4)* nocapture writeonly %out, <2 x i8> %c, <2 x i16> %s, <2 x i32> %i, <2 x i64> %l) local_unnamed_addr #0 {
 entry:
-  %cmp = icmp ne <2 x i64> zeroinitializer, <i64 1, i64 1>
-  %sext = sext <2 x i1> %cmp to <2 x i64>
-  %call = call spir_func i32 @_Z3allDv2_l(<2 x i64> %sext)
-  %0 = insertelement <2 x i64> <i64 1, i64 1>, i64 0, i32 0
-  %cmp1 = icmp ne <2 x i64> zeroinitializer, %0
-  %sext2 = sext <2 x i1> %cmp1 to <2 x i64>
-  %call3 = call spir_func i32 @_Z3anyDv2_l(<2 x i64> %sext2)
+  %call = tail call spir_func i32 @_Z3anyDv2_c(<2 x i8> %c) #2
+  %call1 = tail call spir_func i32 @_Z3anyDv2_s(<2 x i16> %s) #2
+  %add = add nsw i32 %call1, %call
+  %call2 = tail call spir_func i32 @_Z3anyDv2_i(<2 x i32> %i) #2
+  %add3 = add nsw i32 %add, %call2
+  %call4 = tail call spir_func i32 @_Z3anyDv2_l(<2 x i64> %l) #2
+  %add5 = add nsw i32 %add3, %call4
+  %call6 = tail call spir_func i32 @_Z3allDv2_c(<2 x i8> %c) #2
+  %add7 = add nsw i32 %add5, %call6
+  %call8 = tail call spir_func i32 @_Z3allDv2_s(<2 x i16> %s) #2
+  %add9 = add nsw i32 %add7, %call8
+  %call10 = tail call spir_func i32 @_Z3allDv2_i(<2 x i32> %i) #2
+  %add11 = add nsw i32 %add9, %call10
+  %call12 = tail call spir_func i32 @_Z3allDv2_l(<2 x i64> %l) #2
+  %add13 = add nsw i32 %add11, %call12
+  store i32 %add13, i32 addrspace(4)* %out, align 4, !tbaa !3
   ret void
 }
 
-declare spir_func i32 @_Z3allDv2_l(<2 x i64>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z3anyDv2_c(<2 x i8>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z3anyDv2_s(<2 x i16>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z3anyDv2_i(<2 x i32>) local_unnamed_addr #1
 
-declare spir_func i32 @_Z3anyDv2_l(<2 x i64>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z3anyDv2_l(<2 x i64>) local_unnamed_addr #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z3allDv2_c(<2 x i8>) local_unnamed_addr #1
 
-!opencl.enable.FP_CONTRACT = !{}
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z3allDv2_s(<2 x i16>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z3allDv2_i(<2 x i32>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z3allDv2_l(<2 x i64>) local_unnamed_addr #1
+
+attributes #0 = { convergent mustprogress nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "min-legal-vector-width"="128" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { convergent mustprogress nofree nounwind readnone willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { convergent nounwind readnone willreturn }
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
 !opencl.spir.version = !{!1}
-!opencl.ocl.version = !{!2}
-!opencl.used.extensions = !{!0}
-!opencl.used.optional.core.features = !{!0}
-!opencl.compiler.options = !{!0}
-
-!0 = !{}
-!1 = !{i32 1, i32 2}
-!2 = !{i32 2, i32 0}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 2, i32 0}
+!2 = !{!"clang version 14.0.0"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
diff --git a/llvm-spirv/test/transcoding/relationals_double.ll b/llvm-spirv/test/transcoding/relationals_double.ll
index 15757a55380b9..fe26f80191b47 100644
--- a/llvm-spirv/test/transcoding/relationals_double.ll
+++ b/llvm-spirv/test/transcoding/relationals_double.ll
@@ -3,68 +3,279 @@
 ; RUN: FileCheck < %t.txt %s --check-prefix=CHECK-SPIRV
 ; RUN: llvm-spirv %t.bc -o %t.spv
 ; RUN: spirv-val %t.spv
+; RUN: llvm-spirv -r %t.spv -o %t.rev.spv.bc --spirv-target-env=SPV-IR
+; RUN: llvm-dis < %t.rev.spv.bc | FileCheck %s --check-prefix=CHECK-SPV-IR
 ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
 ; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
 
-; CHECK-SPIRV: 4 TypeInt [[IntTypeID:[0-9]+]] 64
-; CHECK-SPIRV: 4 TypeVector [[Int64VectorTypeID:[0-9]+]] [[IntTypeID]] 2
+; This test checks following SYCL relational builtins with double and double2
+; types:
+;   isfinite, isinf, isnan, isnormal, signbit, isequal, isnotequal, isgreater
+;   isgreaterequal, isless, islessequal, islessgreater, isordered, isunordered
 
-; CHECK-SPIRV: 6 Select [[Int64VectorTypeID]]
-; CHECK-SPIRV: 6 Select [[Int64VectorTypeID]]
-; CHECK-SPIRV: 6 Select [[Int64VectorTypeID]]
-; CHECK-SPIRV: 6 Select [[Int64VectorTypeID]]
+; CHECK-SPIRV: 2 TypeBool [[BoolTypeID:[0-9]+]]
+; CHECK-SPIRV: 4 TypeVector [[BoolVectorTypeID:[0-9]+]] [[BoolTypeID]] 2
 
+; CHECK-SPIRV: 4 IsFinite [[BoolTypeID]]
+; CHECK-SPIRV: 4 IsInf [[BoolTypeID]]
+; CHECK-SPIRV: 4 IsNan [[BoolTypeID]]
+; CHECK-SPIRV: 4 IsNormal [[BoolTypeID]]
+; CHECK-SPIRV: 4 SignBitSet [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdEqual [[BoolTypeID]]
+; CHECK-SPIRV: 5 FUnordNotEqual [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdGreaterThan [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdGreaterThanEqual [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdLessThan [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdLessThanEqual [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdNotEqual [[BoolTypeID]]
+; CHECK-SPIRV: 5 Ordered [[BoolTypeID]]
+; CHECK-SPIRV: 5 Unordered [[BoolTypeID]]
+
+; CHECK-SPIRV: 4 IsFinite [[BoolVectorTypeID]]
+; CHECK-SPIRV: 4 IsInf [[BoolVectorTypeID]]
+; CHECK-SPIRV: 4 IsNan [[BoolVectorTypeID]]
+; CHECK-SPIRV: 4 IsNormal [[BoolVectorTypeID]]
+; CHECK-SPIRV: 4 SignBitSet [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdEqual [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FUnordNotEqual [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdGreaterThan [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdGreaterThanEqual [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdLessThan [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdLessThanEqual [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdNotEqual [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 Ordered [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 Unordered [[BoolVectorTypeID]]
+
+; CHECK-SPV-IR: call spir_func i1 @_Z16__spirv_IsFinited(double
+; CHECK-SPV-IR: call spir_func i1 @_Z13__spirv_IsInfd(double
+; CHECK-SPV-IR: call spir_func i1 @_Z13__spirv_IsNand(double
+; CHECK-SPV-IR: call spir_func i1 @_Z16__spirv_IsNormald(double
+; CHECK-SPV-IR: call spir_func i1 @_Z18__spirv_SignBitSetd(double
+; CHECK-SPV-IR: fcmp oeq double
+; CHECK-SPV-IR: fcmp une double
+; CHECK-SPV-IR: fcmp ogt double
+; CHECK-SPV-IR: fcmp oge double
+; CHECK-SPV-IR: fcmp olt double
+; CHECK-SPV-IR: fcmp ole double
+; CHECK-SPV-IR: fcmp one double
+; CHECK-SPV-IR: fcmp ord double
+; CHECK-SPV-IR: fcmp uno double
+
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z16__spirv_IsFiniteDv2_d(<2 x double>
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z13__spirv_IsInfDv2_d(<2 x double>
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z13__spirv_IsNanDv2_d(<2 x double>
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z16__spirv_IsNormalDv2_d(<2 x double>
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z18__spirv_SignBitSetDv2_d(<2 x double>
+; CHECK-SPV-IR: fcmp oeq <2 x double>
+; CHECK-SPV-IR: fcmp une <2 x double>
+; CHECK-SPV-IR: fcmp ogt <2 x double>
+; CHECK-SPV-IR: fcmp oge <2 x double>
+; CHECK-SPV-IR: fcmp olt <2 x double>
+; CHECK-SPV-IR: fcmp ole <2 x double>
+; CHECK-SPV-IR: fcmp one <2 x double>
+; CHECK-SPV-IR: fcmp ord <2 x double>
+; CHECK-SPV-IR: fcmp uno <2 x double>
+
+; CHECK-LLVM: call spir_func i32 @_Z8isfinited(double
+; CHECK-LLVM: call spir_func i32 @_Z5isinfd(double
+; CHECK-LLVM: call spir_func i32 @_Z5isnand(double
+; CHECK-LLVM: call spir_func i32 @_Z8isnormald(double
+; CHECK-LLVM: call spir_func i32 @_Z7signbitd(double
+; CHECK-LLVM: fcmp oeq double
+; CHECK-LLVM: fcmp une double
+; CHECK-LLVM: fcmp ogt double
+; CHECK-LLVM: fcmp oge double
+; CHECK-LLVM: fcmp olt double
+; CHECK-LLVM: fcmp ole double
+; CHECK-LLVM: fcmp one double
+; CHECK-LLVM: fcmp ord double
+; CHECK-LLVM: fcmp uno double
+
+; CHECK-LLVM: call spir_func <2 x i64> @_Z8isfiniteDv2_d(<2 x double>
 ; CHECK-LLVM: call spir_func <2 x i64> @_Z5isinfDv2_d(<2 x double>
 ; CHECK-LLVM: call spir_func <2 x i64> @_Z5isnanDv2_d(<2 x double>
 ; CHECK-LLVM: call spir_func <2 x i64> @_Z8isnormalDv2_d(<2 x double>
-; CHECK-LLVM: call spir_func <2 x i64> @_Z8isfiniteDv2_d(<2 x double>
+; CHECK-LLVM: call spir_func <2 x i64> @_Z7signbitDv2_d(<2 x double>
+; CHECK-LLVM: fcmp oeq <2 x double>
+; CHECK-LLVM: fcmp une <2 x double>
+; CHECK-LLVM: fcmp ogt <2 x double>
+; CHECK-LLVM: fcmp oge <2 x double>
+; CHECK-LLVM: fcmp olt <2 x double>
+; CHECK-LLVM: fcmp ole <2 x double>
+; CHECK-LLVM: fcmp one <2 x double>
+; CHECK-LLVM: fcmp ord <2 x double>
+; CHECK-LLVM: fcmp uno <2 x double>
 
 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
-target triple = "spir-unknown-unknown"
-
-; Function Attrs: nounwind
-define spir_kernel void @test_vector_double(<2 x i64> addrspace(1)* nocapture %out, <2 x double> %in) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 {
-  %1 = tail call spir_func <2 x i64> @_Z5isinfDv2_d(<2 x double> %in) #2
-  %2 = tail call spir_func <2 x i64> @_Z5isnanDv2_d(<2 x double> %in) #2
-  %3 = add <2 x i64> %1, %2
-  %4 = tail call spir_func <2 x i64> @_Z8isnormalDv2_d(<2 x double> %in) #2
-  %5 = add <2 x i64> %3, %4
-  %6 = tail call spir_func <2 x i64> @_Z8isfiniteDv2_d(<2 x double> %in) #2
-  %7 = add <2 x i64> %5, %6
-  store <2 x i64> %7, <2 x i64> addrspace(1)* %out, align 16, !tbaa !11
+target triple = "spir"
+
+; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn writeonly
+define dso_local spir_func void @test_scalar(i32 addrspace(4)* nocapture writeonly %out, double %d) local_unnamed_addr #0 {
+entry:
+  %call = tail call spir_func i32 @_Z8isfinited(double %d) #3
+  %call1 = tail call spir_func i32 @_Z5isinfd(double %d) #3
+  %add = add nsw i32 %call1, %call
+  %call2 = tail call spir_func i32 @_Z5isnand(double %d) #3
+  %add3 = add nsw i32 %add, %call2
+  %call4 = tail call spir_func i32 @_Z8isnormald(double %d) #3
+  %add5 = add nsw i32 %add3, %call4
+  %call6 = tail call spir_func i32 @_Z7signbitd(double %d) #3
+  %add7 = add nsw i32 %add5, %call6
+  %call8 = tail call spir_func i32 @_Z7isequaldd(double %d, double %d) #3
+  %add9 = add nsw i32 %add7, %call8
+  %call10 = tail call spir_func i32 @_Z10isnotequaldd(double %d, double %d) #3
+  %add11 = add nsw i32 %add9, %call10
+  %call12 = tail call spir_func i32 @_Z9isgreaterdd(double %d, double %d) #3
+  %add13 = add nsw i32 %add11, %call12
+  %call14 = tail call spir_func i32 @_Z14isgreaterequaldd(double %d, double %d) #3
+  %add15 = add nsw i32 %add13, %call14
+  %call16 = tail call spir_func i32 @_Z6islessdd(double %d, double %d) #3
+  %add17 = add nsw i32 %add15, %call16
+  %call18 = tail call spir_func i32 @_Z11islessequaldd(double %d, double %d) #3
+  %add19 = add nsw i32 %add17, %call18
+  %call20 = tail call spir_func i32 @_Z13islessgreaterdd(double %d, double %d) #3
+  %add21 = add nsw i32 %add19, %call20
+  %call22 = tail call spir_func i32 @_Z9isordereddd(double %d, double %d) #3
+  %add23 = add nsw i32 %add21, %call22
+  %call24 = tail call spir_func i32 @_Z11isunordereddd(double %d, double %d) #3
+  %add25 = add nsw i32 %add23, %call24
+  store i32 %add25, i32 addrspace(4)* %out, align 4, !tbaa !3
   ret void
 }
 
-declare spir_func <2 x i64> @_Z5isinfDv2_d(<2 x double>) #1
-
-declare spir_func <2 x i64> @_Z5isnanDv2_d(<2 x double>) #1
-
-declare spir_func <2 x i64> @_Z8isnormalDv2_d(<2 x double>) #1
-
-declare spir_func <2 x i64> @_Z8isfiniteDv2_d(<2 x double>) #1
-
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind }
-
-!opencl.enable.FP_CONTRACT = !{}
-!opencl.spir.version = !{!6}
-!opencl.ocl.version = !{!7}
-!opencl.used.extensions = !{!8}
-!opencl.used.optional.core.features = !{!9}
-!opencl.compiler.options = !{!8}
-!llvm.ident = !{!10}
-
-!1 = !{i32 1, i32 0}
-!2 = !{!"none", !"none"}
-!3 = !{!"long2*", !"double2"}
-!4 = !{!"long2*", !"double2"}
-!5 = !{!"", !""}
-!6 = !{i32 1, i32 2}
-!7 = !{i32 2, i32 0}
-!8 = !{}
-!9 = !{!"cl_doubles"}
-!10 = !{!"clang version 3.6.1 "}
-!11 = !{!12, !12, i64 0}
-!12 = !{!"omnipotent char", !13, i64 0}
-!13 = !{!"Simple C/C++ TBAA"}
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z8isfinited(double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z5isinfd(double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z5isnand(double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z8isnormald(double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z7signbitd(double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z7isequaldd(double, double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z10isnotequaldd(double, double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z9isgreaterdd(double, double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z14isgreaterequaldd(double, double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z6islessdd(double, double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z11islessequaldd(double, double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z13islessgreaterdd(double, double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z9isordereddd(double, double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z11isunordereddd(double, double) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn writeonly
+define dso_local spir_func void @test_vector(<2 x i64> addrspace(4)* nocapture writeonly %out, <2 x double> %d) local_unnamed_addr #2 {
+entry:
+  %call = tail call spir_func <2 x i64> @_Z8isfiniteDv2_d(<2 x double> %d) #3
+  %call1 = tail call spir_func <2 x i64> @_Z5isinfDv2_d(<2 x double> %d) #3
+  %add = add <2 x i64> %call1, %call
+  %call2 = tail call spir_func <2 x i64> @_Z5isnanDv2_d(<2 x double> %d) #3
+  %add3 = add <2 x i64> %add, %call2
+  %call4 = tail call spir_func <2 x i64> @_Z8isnormalDv2_d(<2 x double> %d) #3
+  %add5 = add <2 x i64> %add3, %call4
+  %call6 = tail call spir_func <2 x i64> @_Z7signbitDv2_d(<2 x double> %d) #3
+  %add7 = add <2 x i64> %add5, %call6
+  %call8 = tail call spir_func <2 x i64> @_Z7isequalDv2_dS_(<2 x double> %d, <2 x double> %d) #3
+  %add9 = add <2 x i64> %add7, %call8
+  %call10 = tail call spir_func <2 x i64> @_Z10isnotequalDv2_dS_(<2 x double> %d, <2 x double> %d) #3
+  %add11 = add <2 x i64> %add9, %call10
+  %call12 = tail call spir_func <2 x i64> @_Z9isgreaterDv2_dS_(<2 x double> %d, <2 x double> %d) #3
+  %add13 = add <2 x i64> %add11, %call12
+  %call14 = tail call spir_func <2 x i64> @_Z14isgreaterequalDv2_dS_(<2 x double> %d, <2 x double> %d) #3
+  %add15 = add <2 x i64> %add13, %call14
+  %call16 = tail call spir_func <2 x i64> @_Z6islessDv2_dS_(<2 x double> %d, <2 x double> %d) #3
+  %add17 = add <2 x i64> %add15, %call16
+  %call18 = tail call spir_func <2 x i64> @_Z11islessequalDv2_dS_(<2 x double> %d, <2 x double> %d) #3
+  %add19 = add <2 x i64> %add17, %call18
+  %call20 = tail call spir_func <2 x i64> @_Z13islessgreaterDv2_dS_(<2 x double> %d, <2 x double> %d) #3
+  %add21 = add <2 x i64> %add19, %call20
+  %call22 = tail call spir_func <2 x i64> @_Z9isorderedDv2_dS_(<2 x double> %d, <2 x double> %d) #3
+  %add23 = add <2 x i64> %add21, %call22
+  %call24 = tail call spir_func <2 x i64> @_Z11isunorderedDv2_dS_(<2 x double> %d, <2 x double> %d) #3
+  %add25 = add <2 x i64> %add23, %call24
+  store <2 x i64> %add25, <2 x i64> addrspace(4)* %out, align 16, !tbaa !7
+  ret void
+}
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z8isfiniteDv2_d(<2 x double>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z5isinfDv2_d(<2 x double>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z5isnanDv2_d(<2 x double>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z8isnormalDv2_d(<2 x double>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z7signbitDv2_d(<2 x double>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z7isequalDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z10isnotequalDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z9isgreaterDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z14isgreaterequalDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z6islessDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z11islessequalDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z13islessgreaterDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z9isorderedDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i64> @_Z11isunorderedDv2_dS_(<2 x double>, <2 x double>) local_unnamed_addr #1
+
+attributes #0 = { convergent mustprogress nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { convergent mustprogress nofree nounwind readnone willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { convergent mustprogress nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "min-legal-vector-width"="128" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #3 = { convergent nounwind readnone willreturn }
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 2, i32 0}
+!2 = !{!"clang version 14.0.0"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!5, !5, i64 0}
diff --git a/llvm-spirv/test/transcoding/relationals_float.ll b/llvm-spirv/test/transcoding/relationals_float.ll
index fdbe9a053babc..3b0179284278e 100644
--- a/llvm-spirv/test/transcoding/relationals_float.ll
+++ b/llvm-spirv/test/transcoding/relationals_float.ll
@@ -3,139 +3,279 @@
 ; RUN: FileCheck < %t.txt %s --check-prefix=CHECK-SPIRV
 ; RUN: llvm-spirv %t.bc -o %t.spv
 ; RUN: spirv-val %t.spv
+; RUN: llvm-spirv -r %t.spv -o %t.rev.spv.bc --spirv-target-env=SPV-IR
+; RUN: llvm-dis < %t.rev.spv.bc | FileCheck %s --check-prefix=CHECK-SPV-IR
 ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
 ; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
 
-; CHECK-LLVM: call spir_func i32 @_Z8isfinitef(
-; CHECK-LLVM: call spir_func i32 @_Z5isnanf(
-; CHECK-LLVM: call spir_func i32 @_Z5isinff(
-; CHECK-LLVM: call spir_func i32 @_Z8isnormalf(
-; CHECK-LLVM: call spir_func i32 @_Z7signbitf(
-; CHECK-LLVM: fcmp one float
-; CHECK-LLVM: fcmp ord float
-; CHECK-LLVM: fcmp uno float
-
-; CHECK-LLVM: call spir_func <2 x i32> @_Z8isfiniteDv2_f(
-; CHECK-LLVM: call spir_func <2 x i32> @_Z5isnanDv2_f(
-; CHECK-LLVM: call spir_func <2 x i32> @_Z5isinfDv2_f(
-; CHECK-LLVM: call spir_func <2 x i32> @_Z8isnormalDv2_f(
-; CHECK-LLVM: fcmp one <2 x float>
-; CHECK-LLVM: fcmp ord <2 x float>
-; CHECK-LLVM: fcmp uno <2 x float>
+; This test checks following SYCL relational builtins with float and float2
+; types:
+;   isfinite, isinf, isnan, isnormal, signbit, isequal, isnotequal, isgreater
+;   isgreaterequal, isless, islessequal, islessgreater, isordered, isunordered
 
 ; CHECK-SPIRV: 2 TypeBool [[BoolTypeID:[0-9]+]]
 ; CHECK-SPIRV: 4 TypeVector [[BoolVectorTypeID:[0-9]+]] [[BoolTypeID]] 2
 
 ; CHECK-SPIRV: 4 IsFinite [[BoolTypeID]]
-; CHECK-SPIRV: 4 IsNan [[BoolTypeID]]
 ; CHECK-SPIRV: 4 IsInf [[BoolTypeID]]
+; CHECK-SPIRV: 4 IsNan [[BoolTypeID]]
 ; CHECK-SPIRV: 4 IsNormal [[BoolTypeID]]
 ; CHECK-SPIRV: 4 SignBitSet [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdEqual [[BoolTypeID]]
+; CHECK-SPIRV: 5 FUnordNotEqual [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdGreaterThan [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdGreaterThanEqual [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdLessThan [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdLessThanEqual [[BoolTypeID]]
 ; CHECK-SPIRV: 5 FOrdNotEqual [[BoolTypeID]]
 ; CHECK-SPIRV: 5 Ordered [[BoolTypeID]]
 ; CHECK-SPIRV: 5 Unordered [[BoolTypeID]]
 
 ; CHECK-SPIRV: 4 IsFinite [[BoolVectorTypeID]]
-; CHECK-SPIRV: 4 IsNan [[BoolVectorTypeID]]
 ; CHECK-SPIRV: 4 IsInf [[BoolVectorTypeID]]
+; CHECK-SPIRV: 4 IsNan [[BoolVectorTypeID]]
 ; CHECK-SPIRV: 4 IsNormal [[BoolVectorTypeID]]
+; CHECK-SPIRV: 4 SignBitSet [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdEqual [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FUnordNotEqual [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdGreaterThan [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdGreaterThanEqual [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdLessThan [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdLessThanEqual [[BoolVectorTypeID]]
 ; CHECK-SPIRV: 5 FOrdNotEqual [[BoolVectorTypeID]]
 ; CHECK-SPIRV: 5 Ordered [[BoolVectorTypeID]]
 ; CHECK-SPIRV: 5 Unordered [[BoolVectorTypeID]]
 
+; CHECK-SPV-IR: call spir_func i1 @_Z16__spirv_IsFinitef(float
+; CHECK-SPV-IR: call spir_func i1 @_Z13__spirv_IsInff(float
+; CHECK-SPV-IR: call spir_func i1 @_Z13__spirv_IsNanf(float
+; CHECK-SPV-IR: call spir_func i1 @_Z16__spirv_IsNormalf(float
+; CHECK-SPV-IR: call spir_func i1 @_Z18__spirv_SignBitSetf(float
+; CHECK-SPV-IR: fcmp oeq float
+; CHECK-SPV-IR: fcmp une float
+; CHECK-SPV-IR: fcmp ogt float
+; CHECK-SPV-IR: fcmp oge float
+; CHECK-SPV-IR: fcmp olt float
+; CHECK-SPV-IR: fcmp ole float
+; CHECK-SPV-IR: fcmp one float
+; CHECK-SPV-IR: fcmp ord float
+; CHECK-SPV-IR: fcmp uno float
+
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z16__spirv_IsFiniteDv2_f(<2 x float>
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z13__spirv_IsInfDv2_f(<2 x float>
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z13__spirv_IsNanDv2_f(<2 x float>
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z16__spirv_IsNormalDv2_f(<2 x float>
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z18__spirv_SignBitSetDv2_f(<2 x float>
+; CHECK-SPV-IR: fcmp oeq <2 x float>
+; CHECK-SPV-IR: fcmp une <2 x float>
+; CHECK-SPV-IR: fcmp ogt <2 x float>
+; CHECK-SPV-IR: fcmp oge <2 x float>
+; CHECK-SPV-IR: fcmp olt <2 x float>
+; CHECK-SPV-IR: fcmp ole <2 x float>
+; CHECK-SPV-IR: fcmp one <2 x float>
+; CHECK-SPV-IR: fcmp ord <2 x float>
+; CHECK-SPV-IR: fcmp uno <2 x float>
+
+; CHECK-LLVM: call spir_func i32 @_Z8isfinitef(float
+; CHECK-LLVM: call spir_func i32 @_Z5isinff(float
+; CHECK-LLVM: call spir_func i32 @_Z5isnanf(float
+; CHECK-LLVM: call spir_func i32 @_Z8isnormalf(float
+; CHECK-LLVM: call spir_func i32 @_Z7signbitf(float
+; CHECK-LLVM: fcmp oeq float
+; CHECK-LLVM: fcmp une float
+; CHECK-LLVM: fcmp ogt float
+; CHECK-LLVM: fcmp oge float
+; CHECK-LLVM: fcmp olt float
+; CHECK-LLVM: fcmp ole float
+; CHECK-LLVM: fcmp one float
+; CHECK-LLVM: fcmp ord float
+; CHECK-LLVM: fcmp uno float
+
+; CHECK-LLVM: call spir_func <2 x i32> @_Z8isfiniteDv2_f(<2 x float>
+; CHECK-LLVM: call spir_func <2 x i32> @_Z5isinfDv2_f(<2 x float>
+; CHECK-LLVM: call spir_func <2 x i32> @_Z5isnanDv2_f(<2 x float>
+; CHECK-LLVM: call spir_func <2 x i32> @_Z8isnormalDv2_f(<2 x float>
+; CHECK-LLVM: call spir_func <2 x i32> @_Z7signbitDv2_f(<2 x float>
+; CHECK-LLVM: fcmp oeq <2 x float>
+; CHECK-LLVM: fcmp une <2 x float>
+; CHECK-LLVM: fcmp ogt <2 x float>
+; CHECK-LLVM: fcmp oge <2 x float>
+; CHECK-LLVM: fcmp olt <2 x float>
+; CHECK-LLVM: fcmp ole <2 x float>
+; CHECK-LLVM: fcmp one <2 x float>
+; CHECK-LLVM: fcmp ord <2 x float>
+; CHECK-LLVM: fcmp uno <2 x float>
+
 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
-target triple = "spir-unknown-unknown"
+target triple = "spir"
 
-; Function Attrs: nounwind
-define spir_kernel void @test_scalar(i32 addrspace(1)* nocapture %out, float %f) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 {
+; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn writeonly
+define dso_local spir_func void @test_scalar(i32 addrspace(4)* nocapture writeonly %out, float %f) local_unnamed_addr #0 {
 entry:
-  %call = tail call spir_func i32 @_Z8isfinitef(float %f) #2
-  %call1 = tail call spir_func i32 @_Z5isnanf(float %f) #2
+  %call = tail call spir_func i32 @_Z8isfinitef(float %f) #3
+  %call1 = tail call spir_func i32 @_Z5isinff(float %f) #3
   %add = add nsw i32 %call1, %call
-  %call2 = tail call spir_func i32 @_Z5isinff(float %f) #2
+  %call2 = tail call spir_func i32 @_Z5isnanf(float %f) #3
   %add3 = add nsw i32 %add, %call2
-  %call4 = tail call spir_func i32 @_Z8isnormalf(float %f) #2
+  %call4 = tail call spir_func i32 @_Z8isnormalf(float %f) #3
   %add5 = add nsw i32 %add3, %call4
-  %call6 = tail call spir_func i32 @_Z7signbitf(float %f) #2
+  %call6 = tail call spir_func i32 @_Z7signbitf(float %f) #3
   %add7 = add nsw i32 %add5, %call6
-  %call8 = tail call spir_func i32 @_Z13islessgreaterff(float %f, float %f) #2
+  %call8 = tail call spir_func i32 @_Z7isequalff(float %f, float %f) #3
   %add9 = add nsw i32 %add7, %call8
-  %call10 = tail call spir_func i32 @_Z9isorderedff(float %f, float %f) #2
+  %call10 = tail call spir_func i32 @_Z10isnotequalff(float %f, float %f) #3
   %add11 = add nsw i32 %add9, %call10
-  %call12 = tail call spir_func i32 @_Z11isunorderedff(float %f, float %f) #2
+  %call12 = tail call spir_func i32 @_Z9isgreaterff(float %f, float %f) #3
   %add13 = add nsw i32 %add11, %call12
-  store i32 %add13, i32 addrspace(1)* %out, align 4
+  %call14 = tail call spir_func i32 @_Z14isgreaterequalff(float %f, float %f) #3
+  %add15 = add nsw i32 %add13, %call14
+  %call16 = tail call spir_func i32 @_Z6islessff(float %f, float %f) #3
+  %add17 = add nsw i32 %add15, %call16
+  %call18 = tail call spir_func i32 @_Z11islessequalff(float %f, float %f) #3
+  %add19 = add nsw i32 %add17, %call18
+  %call20 = tail call spir_func i32 @_Z13islessgreaterff(float %f, float %f) #3
+  %add21 = add nsw i32 %add19, %call20
+  %call22 = tail call spir_func i32 @_Z9isorderedff(float %f, float %f) #3
+  %add23 = add nsw i32 %add21, %call22
+  %call24 = tail call spir_func i32 @_Z11isunorderedff(float %f, float %f) #3
+  %add25 = add nsw i32 %add23, %call24
+  store i32 %add25, i32 addrspace(4)* %out, align 4, !tbaa !3
   ret void
 }
 
-declare spir_func i32 @_Z8isfinitef(float) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z8isfinitef(float) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z5isinff(float) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z5isnanf(float) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z8isnormalf(float) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z7signbitf(float) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z7isequalff(float, float) local_unnamed_addr #1
 
-declare spir_func i32 @_Z5isnanf(float) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z10isnotequalff(float, float) local_unnamed_addr #1
 
-declare spir_func i32 @_Z5isinff(float) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z9isgreaterff(float, float) local_unnamed_addr #1
 
-declare spir_func i32 @_Z8isnormalf(float) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z14isgreaterequalff(float, float) local_unnamed_addr #1
 
-declare spir_func i32 @_Z7signbitf(float) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z6islessff(float, float) local_unnamed_addr #1
 
-declare spir_func i32 @_Z13islessgreaterff(float, float) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z11islessequalff(float, float) local_unnamed_addr #1
 
-declare spir_func i32 @_Z9isorderedff(float, float) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z13islessgreaterff(float, float) local_unnamed_addr #1
 
-declare spir_func i32 @_Z11isunorderedff(float, float) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z9isorderedff(float, float) local_unnamed_addr #1
 
-; Function Attrs: nounwind
-define spir_kernel void @test_vector(<2 x i32> addrspace(1)* nocapture %out, <2 x float> %f) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !5 {
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z11isunorderedff(float, float) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn writeonly
+define dso_local spir_func void @test_vector(<2 x i32> addrspace(4)* nocapture writeonly %out, <2 x float> %f) local_unnamed_addr #2 {
 entry:
-  %call = tail call spir_func <2 x i32> @_Z8isfiniteDv2_f(<2 x float> %f) #2
-  %call1 = tail call spir_func <2 x i32> @_Z5isnanDv2_f(<2 x float> %f) #2
-  %add = add <2 x i32> %call, %call1
-  %call2 = tail call spir_func <2 x i32> @_Z5isinfDv2_f(<2 x float> %f) #2
+  %call = tail call spir_func <2 x i32> @_Z8isfiniteDv2_f(<2 x float> %f) #3
+  %call1 = tail call spir_func <2 x i32> @_Z5isinfDv2_f(<2 x float> %f) #3
+  %add = add <2 x i32> %call1, %call
+  %call2 = tail call spir_func <2 x i32> @_Z5isnanDv2_f(<2 x float> %f) #3
   %add3 = add <2 x i32> %add, %call2
-  %call4 = tail call spir_func <2 x i32> @_Z8isnormalDv2_f(<2 x float> %f) #2
+  %call4 = tail call spir_func <2 x i32> @_Z8isnormalDv2_f(<2 x float> %f) #3
   %add5 = add <2 x i32> %add3, %call4
-  %call6 = tail call spir_func <2 x i32> @_Z13islessgreaterDv2_fS_(<2 x float> %f, <2 x float> %f) #2
+  %call6 = tail call spir_func <2 x i32> @_Z7signbitDv2_f(<2 x float> %f) #3
   %add7 = add <2 x i32> %add5, %call6
-  %call8 = tail call spir_func <2 x i32> @_Z9isorderedDv2_fS_(<2 x float> %f, <2 x float> %f) #2
+  %call8 = tail call spir_func <2 x i32> @_Z7isequalDv2_fS_(<2 x float> %f, <2 x float> %f) #3
   %add9 = add <2 x i32> %add7, %call8
-  %call10 = tail call spir_func <2 x i32> @_Z11isunorderedDv2_fS_(<2 x float> %f, <2 x float> %f) #2
+  %call10 = tail call spir_func <2 x i32> @_Z10isnotequalDv2_fS_(<2 x float> %f, <2 x float> %f) #3
   %add11 = add <2 x i32> %add9, %call10
-  store <2 x i32> %add11, <2 x i32> addrspace(1)* %out, align 8
+  %call12 = tail call spir_func <2 x i32> @_Z9isgreaterDv2_fS_(<2 x float> %f, <2 x float> %f) #3
+  %add13 = add <2 x i32> %add11, %call12
+  %call14 = tail call spir_func <2 x i32> @_Z14isgreaterequalDv2_fS_(<2 x float> %f, <2 x float> %f) #3
+  %add15 = add <2 x i32> %add13, %call14
+  %call16 = tail call spir_func <2 x i32> @_Z6islessDv2_fS_(<2 x float> %f, <2 x float> %f) #3
+  %add17 = add <2 x i32> %add15, %call16
+  %call18 = tail call spir_func <2 x i32> @_Z11islessequalDv2_fS_(<2 x float> %f, <2 x float> %f) #3
+  %add19 = add <2 x i32> %add17, %call18
+  %call20 = tail call spir_func <2 x i32> @_Z13islessgreaterDv2_fS_(<2 x float> %f, <2 x float> %f) #3
+  %add21 = add <2 x i32> %add19, %call20
+  %call22 = tail call spir_func <2 x i32> @_Z9isorderedDv2_fS_(<2 x float> %f, <2 x float> %f) #3
+  %add23 = add <2 x i32> %add21, %call22
+  %call24 = tail call spir_func <2 x i32> @_Z11isunorderedDv2_fS_(<2 x float> %f, <2 x float> %f) #3
+  %add25 = add <2 x i32> %add23, %call24
+  store <2 x i32> %add25, <2 x i32> addrspace(4)* %out, align 8, !tbaa !7
   ret void
 }
 
-declare spir_func <2 x i32> @_Z8isfiniteDv2_f(<2 x float>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z8isfiniteDv2_f(<2 x float>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z5isinfDv2_f(<2 x float>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z5isnanDv2_f(<2 x float>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z8isnormalDv2_f(<2 x float>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z7signbitDv2_f(<2 x float>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z7isequalDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z10isnotequalDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z9isgreaterDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr #1
 
-declare spir_func <2 x i32> @_Z5isnanDv2_f(<2 x float>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z14isgreaterequalDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr #1
 
-declare spir_func <2 x i32> @_Z5isinfDv2_f(<2 x float>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z6islessDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr #1
 
-declare spir_func <2 x i32> @_Z8isnormalDv2_f(<2 x float>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z11islessequalDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr #1
 
-declare spir_func <2 x i32> @_Z13islessgreaterDv2_fS_(<2 x float>, <2 x float>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z13islessgreaterDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr #1
 
-declare spir_func <2 x i32> @_Z9isorderedDv2_fS_(<2 x float>, <2 x float>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z9isorderedDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr #1
 
-declare spir_func <2 x i32> @_Z11isunorderedDv2_fS_(<2 x float>, <2 x float>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i32> @_Z11isunorderedDv2_fS_(<2 x float>, <2 x float>) local_unnamed_addr #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind }
+attributes #0 = { convergent mustprogress nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { convergent mustprogress nofree nounwind readnone willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { convergent mustprogress nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "min-legal-vector-width"="64" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #3 = { convergent nounwind readnone willreturn }
 
-!opencl.enable.FP_CONTRACT = !{}
-!opencl.spir.version = !{!9}
-!opencl.ocl.version = !{!10}
-!opencl.used.extensions = !{!11}
-!opencl.used.optional.core.features = !{!11}
-!opencl.compiler.options = !{!11}
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!1}
+!llvm.ident = !{!2}
 
-!1 = !{i32 1, i32 0}
-!2 = !{!"none", !"none"}
-!3 = !{!"int*", !"float"}
-!4 = !{!"int*", !"float"}
-!5 = !{!"", !""}
-!7 = !{!"int2*", !"float2"}
-!8 = !{!"int2*", !"float2"}
-!9 = !{i32 1, i32 2}
-!10 = !{i32 2, i32 0}
-!11 = !{}
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 2, i32 0}
+!2 = !{!"clang version 14.0.0"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!5, !5, i64 0}
diff --git a/llvm-spirv/test/transcoding/relationals_half.ll b/llvm-spirv/test/transcoding/relationals_half.ll
index 6990a021db7eb..b121b896da6b1 100644
--- a/llvm-spirv/test/transcoding/relationals_half.ll
+++ b/llvm-spirv/test/transcoding/relationals_half.ll
@@ -3,139 +3,278 @@
 ; RUN: FileCheck < %t.txt %s --check-prefix=CHECK-SPIRV
 ; RUN: llvm-spirv %t.bc -o %t.spv
 ; RUN: spirv-val %t.spv
+; RUN: llvm-spirv -r %t.spv -o %t.rev.spv.bc --spirv-target-env=SPV-IR
+; RUN: llvm-dis < %t.rev.spv.bc | FileCheck %s --check-prefix=CHECK-SPV-IR
 ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
 ; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
 
-; CHECK-LLVM: call spir_func i32 @_Z8isfiniteDh(
-; CHECK-LLVM: call spir_func i32 @_Z5isnanDh(
-; CHECK-LLVM: call spir_func i32 @_Z5isinfDh(
-; CHECK-LLVM: call spir_func i32 @_Z8isnormalDh(
-; CHECK-LLVM: call spir_func i32 @_Z7signbitDh(
-; CHECK-LLVM: fcmp one half
-; CHECK-LLVM: fcmp ord half
-; CHECK-LLVM: fcmp uno half
-
-; CHECK-LLVM: call spir_func <2 x i16> @_Z8isfiniteDv2_Dh(
-; CHECK-LLVM: call spir_func <2 x i16> @_Z5isnanDv2_Dh(
-; CHECK-LLVM: call spir_func <2 x i16> @_Z5isinfDv2_Dh(
-; CHECK-LLVM: call spir_func <2 x i16> @_Z8isnormalDv2_Dh(
-; CHECK-LLVM: fcmp one <2 x half>
-; CHECK-LLVM: fcmp ord <2 x half>
-; CHECK-LLVM: fcmp uno <2 x half>
+; This test checks following SYCL relational builtins with half and half2 types:
+;   isfinite, isinf, isnan, isnormal, signbit, isequal, isnotequal, isgreater
+;   isgreaterequal, isless, islessequal, islessgreater, isordered, isunordered
 
 ; CHECK-SPIRV: 2 TypeBool [[BoolTypeID:[0-9]+]]
 ; CHECK-SPIRV: 4 TypeVector [[BoolVectorTypeID:[0-9]+]] [[BoolTypeID]] 2
 
 ; CHECK-SPIRV: 4 IsFinite [[BoolTypeID]]
-; CHECK-SPIRV: 4 IsNan [[BoolTypeID]]
 ; CHECK-SPIRV: 4 IsInf [[BoolTypeID]]
+; CHECK-SPIRV: 4 IsNan [[BoolTypeID]]
 ; CHECK-SPIRV: 4 IsNormal [[BoolTypeID]]
 ; CHECK-SPIRV: 4 SignBitSet [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdEqual [[BoolTypeID]]
+; CHECK-SPIRV: 5 FUnordNotEqual [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdGreaterThan [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdGreaterThanEqual [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdLessThan [[BoolTypeID]]
+; CHECK-SPIRV: 5 FOrdLessThanEqual [[BoolTypeID]]
 ; CHECK-SPIRV: 5 FOrdNotEqual [[BoolTypeID]]
 ; CHECK-SPIRV: 5 Ordered [[BoolTypeID]]
 ; CHECK-SPIRV: 5 Unordered [[BoolTypeID]]
 
 ; CHECK-SPIRV: 4 IsFinite [[BoolVectorTypeID]]
-; CHECK-SPIRV: 4 IsNan [[BoolVectorTypeID]]
 ; CHECK-SPIRV: 4 IsInf [[BoolVectorTypeID]]
+; CHECK-SPIRV: 4 IsNan [[BoolVectorTypeID]]
 ; CHECK-SPIRV: 4 IsNormal [[BoolVectorTypeID]]
+; CHECK-SPIRV: 4 SignBitSet [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdEqual [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FUnordNotEqual [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdGreaterThan [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdGreaterThanEqual [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdLessThan [[BoolVectorTypeID]]
+; CHECK-SPIRV: 5 FOrdLessThanEqual [[BoolVectorTypeID]]
 ; CHECK-SPIRV: 5 FOrdNotEqual [[BoolVectorTypeID]]
 ; CHECK-SPIRV: 5 Ordered [[BoolVectorTypeID]]
 ; CHECK-SPIRV: 5 Unordered [[BoolVectorTypeID]]
 
+; CHECK-SPV-IR: call spir_func i1 @_Z16__spirv_IsFiniteDh(half
+; CHECK-SPV-IR: call spir_func i1 @_Z13__spirv_IsInfDh(half
+; CHECK-SPV-IR: call spir_func i1 @_Z13__spirv_IsNanDh(half
+; CHECK-SPV-IR: call spir_func i1 @_Z16__spirv_IsNormalDh(half
+; CHECK-SPV-IR: call spir_func i1 @_Z18__spirv_SignBitSetDh(half
+; CHECK-SPV-IR: fcmp oeq half
+; CHECK-SPV-IR: fcmp une half
+; CHECK-SPV-IR: fcmp ogt half
+; CHECK-SPV-IR: fcmp oge half
+; CHECK-SPV-IR: fcmp olt half
+; CHECK-SPV-IR: fcmp ole half
+; CHECK-SPV-IR: fcmp one half
+; CHECK-SPV-IR: fcmp ord half
+; CHECK-SPV-IR: fcmp uno half
+
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z16__spirv_IsFiniteDv2_Dh(<2 x half>
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z13__spirv_IsInfDv2_Dh(<2 x half>
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z13__spirv_IsNanDv2_Dh(<2 x half>
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z16__spirv_IsNormalDv2_Dh(<2 x half>
+; CHECK-SPV-IR: call spir_func <2 x i8> @_Z18__spirv_SignBitSetDv2_Dh(<2 x half>
+; CHECK-SPV-IR: fcmp oeq <2 x half>
+; CHECK-SPV-IR: fcmp une <2 x half>
+; CHECK-SPV-IR: fcmp ogt <2 x half>
+; CHECK-SPV-IR: fcmp oge <2 x half>
+; CHECK-SPV-IR: fcmp olt <2 x half>
+; CHECK-SPV-IR: fcmp ole <2 x half>
+; CHECK-SPV-IR: fcmp one <2 x half>
+; CHECK-SPV-IR: fcmp ord <2 x half>
+; CHECK-SPV-IR: fcmp uno <2 x half>
+
+; CHECK-LLVM: call spir_func i32 @_Z8isfiniteDh(half
+; CHECK-LLVM: call spir_func i32 @_Z5isinfDh(half
+; CHECK-LLVM: call spir_func i32 @_Z5isnanDh(half
+; CHECK-LLVM: call spir_func i32 @_Z8isnormalDh(half
+; CHECK-LLVM: call spir_func i32 @_Z7signbitDh(half
+; CHECK-LLVM: fcmp oeq half
+; CHECK-LLVM: fcmp une half
+; CHECK-LLVM: fcmp ogt half
+; CHECK-LLVM: fcmp oge half
+; CHECK-LLVM: fcmp olt half
+; CHECK-LLVM: fcmp ole half
+; CHECK-LLVM: fcmp one half
+; CHECK-LLVM: fcmp ord half
+; CHECK-LLVM: fcmp uno half
+
+; CHECK-LLVM: call spir_func <2 x i16> @_Z8isfiniteDv2_Dh(<2 x half>
+; CHECK-LLVM: call spir_func <2 x i16> @_Z5isinfDv2_Dh(<2 x half>
+; CHECK-LLVM: call spir_func <2 x i16> @_Z5isnanDv2_Dh(<2 x half>
+; CHECK-LLVM: call spir_func <2 x i16> @_Z8isnormalDv2_Dh(<2 x half>
+; CHECK-LLVM: call spir_func <2 x i16> @_Z7signbitDv2_Dh(<2 x half>
+; CHECK-LLVM: fcmp oeq <2 x half>
+; CHECK-LLVM: fcmp une <2 x half>
+; CHECK-LLVM: fcmp ogt <2 x half>
+; CHECK-LLVM: fcmp oge <2 x half>
+; CHECK-LLVM: fcmp olt <2 x half>
+; CHECK-LLVM: fcmp ole <2 x half>
+; CHECK-LLVM: fcmp one <2 x half>
+; CHECK-LLVM: fcmp ord <2 x half>
+; CHECK-LLVM: fcmp uno <2 x half>
+
 target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
-target triple = "spir-unknown-unknown"
+target triple = "spir"
 
-; Function Attrs: nounwind
-define spir_kernel void @test_scalar(i32 addrspace(1)* nocapture %out, half %f) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !3 !kernel_arg_base_type !4 !kernel_arg_type_qual !5 {
+; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn writeonly
+define dso_local spir_func void @test_scalar(i32 addrspace(4)* nocapture writeonly %out, half %h) local_unnamed_addr #0 {
 entry:
-  %call = tail call spir_func i32 @_Z8isfiniteDh(half %f) #2
-  %call1 = tail call spir_func i32 @_Z5isnanDh(half %f) #2
+  %call = tail call spir_func i32 @_Z8isfiniteDh(half %h) #3
+  %call1 = tail call spir_func i32 @_Z5isinfDh(half %h) #3
   %add = add nsw i32 %call1, %call
-  %call2 = tail call spir_func i32 @_Z5isinfDh(half %f) #2
+  %call2 = tail call spir_func i32 @_Z5isnanDh(half %h) #3
   %add3 = add nsw i32 %add, %call2
-  %call4 = tail call spir_func i32 @_Z8isnormalDh(half %f) #2
+  %call4 = tail call spir_func i32 @_Z8isnormalDh(half %h) #3
   %add5 = add nsw i32 %add3, %call4
-  %call6 = tail call spir_func i32 @_Z7signbitDh(half %f) #2
+  %call6 = tail call spir_func i32 @_Z7signbitDh(half %h) #3
   %add7 = add nsw i32 %add5, %call6
-  %call8 = tail call spir_func i32 @_Z13islessgreaterDhDh(half %f, half %f) #2
+  %call8 = tail call spir_func i32 @_Z7isequalDhDh(half %h, half %h) #3
   %add9 = add nsw i32 %add7, %call8
-  %call10 = tail call spir_func i32 @_Z9isorderedDhDh(half %f, half %f) #2
+  %call10 = tail call spir_func i32 @_Z10isnotequalDhDh(half %h, half %h) #3
   %add11 = add nsw i32 %add9, %call10
-  %call12 = tail call spir_func i32 @_Z11isunorderedDhDh(half %f, half %f) #2
+  %call12 = tail call spir_func i32 @_Z9isgreaterDhDh(half %h, half %h) #3
   %add13 = add nsw i32 %add11, %call12
-  store i32 %add13, i32 addrspace(1)* %out, align 4
+  %call14 = tail call spir_func i32 @_Z14isgreaterequalDhDh(half %h, half %h) #3
+  %add15 = add nsw i32 %add13, %call14
+  %call16 = tail call spir_func i32 @_Z6islessDhDh(half %h, half %h) #3
+  %add17 = add nsw i32 %add15, %call16
+  %call18 = tail call spir_func i32 @_Z11islessequalDhDh(half %h, half %h) #3
+  %add19 = add nsw i32 %add17, %call18
+  %call20 = tail call spir_func i32 @_Z13islessgreaterDhDh(half %h, half %h) #3
+  %add21 = add nsw i32 %add19, %call20
+  %call22 = tail call spir_func i32 @_Z9isorderedDhDh(half %h, half %h) #3
+  %add23 = add nsw i32 %add21, %call22
+  %call24 = tail call spir_func i32 @_Z11isunorderedDhDh(half %h, half %h) #3
+  %add25 = add nsw i32 %add23, %call24
+  store i32 %add25, i32 addrspace(4)* %out, align 4, !tbaa !3
   ret void
 }
 
-declare spir_func i32 @_Z8isfiniteDh(half) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z8isfiniteDh(half) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z5isinfDh(half) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z5isnanDh(half) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z8isnormalDh(half) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z7signbitDh(half) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z7isequalDhDh(half, half) local_unnamed_addr #1
 
-declare spir_func i32 @_Z5isnanDh(half) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z10isnotequalDhDh(half, half) local_unnamed_addr #1
 
-declare spir_func i32 @_Z5isinfDh(half) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z9isgreaterDhDh(half, half) local_unnamed_addr #1
 
-declare spir_func i32 @_Z8isnormalDh(half) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z14isgreaterequalDhDh(half, half) local_unnamed_addr #1
 
-declare spir_func i32 @_Z7signbitDh(half) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z6islessDhDh(half, half) local_unnamed_addr #1
 
-declare spir_func i32 @_Z13islessgreaterDhDh(half, half) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z11islessequalDhDh(half, half) local_unnamed_addr #1
 
-declare spir_func i32 @_Z9isorderedDhDh(half, half) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z13islessgreaterDhDh(half, half) local_unnamed_addr #1
 
-declare spir_func i32 @_Z11isunorderedDhDh(half, half) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z9isorderedDhDh(half, half) local_unnamed_addr #1
 
-; Function Attrs: nounwind
-define spir_kernel void @test_vector(<2 x i16> addrspace(1)* nocapture %out, <2 x half> %f) #0 !kernel_arg_addr_space !1 !kernel_arg_access_qual !2 !kernel_arg_type !7 !kernel_arg_base_type !8 !kernel_arg_type_qual !5 {
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func i32 @_Z11isunorderedDhDh(half, half) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree norecurse nounwind willreturn writeonly
+define dso_local spir_func void @test_vector(<2 x i16> addrspace(4)* nocapture writeonly %out, <2 x half> %h) local_unnamed_addr #2 {
 entry:
-  %call = tail call spir_func <2 x i16> @_Z8isfiniteDv2_Dh(<2 x half> %f) #2
-  %call1 = tail call spir_func <2 x i16> @_Z5isnanDv2_Dh(<2 x half> %f) #2
-  %add = add <2 x i16> %call, %call1
-  %call2 = tail call spir_func <2 x i16> @_Z5isinfDv2_Dh(<2 x half> %f) #2
+  %call = tail call spir_func <2 x i16> @_Z8isfiniteDv2_Dh(<2 x half> %h) #3
+  %call1 = tail call spir_func <2 x i16> @_Z5isinfDv2_Dh(<2 x half> %h) #3
+  %add = add <2 x i16> %call1, %call
+  %call2 = tail call spir_func <2 x i16> @_Z5isnanDv2_Dh(<2 x half> %h) #3
   %add3 = add <2 x i16> %add, %call2
-  %call4 = tail call spir_func <2 x i16> @_Z8isnormalDv2_Dh(<2 x half> %f) #2
+  %call4 = tail call spir_func <2 x i16> @_Z8isnormalDv2_Dh(<2 x half> %h) #3
   %add5 = add <2 x i16> %add3, %call4
-  %call6 = tail call spir_func <2 x i16> @_Z13islessgreaterDv2_DhS_(<2 x half> %f, <2 x half> %f) #2
+  %call6 = tail call spir_func <2 x i16> @_Z7signbitDv2_Dh(<2 x half> %h) #3
   %add7 = add <2 x i16> %add5, %call6
-  %call8 = tail call spir_func <2 x i16> @_Z9isorderedDv2_DhS_(<2 x half> %f, <2 x half> %f) #2
+  %call8 = tail call spir_func <2 x i16> @_Z7isequalDv2_DhS_(<2 x half> %h, <2 x half> %h) #3
   %add9 = add <2 x i16> %add7, %call8
-  %call10 = tail call spir_func <2 x i16> @_Z11isunorderedDv2_DhS_(<2 x half> %f, <2 x half> %f) #2
+  %call10 = tail call spir_func <2 x i16> @_Z10isnotequalDv2_DhS_(<2 x half> %h, <2 x half> %h) #3
   %add11 = add <2 x i16> %add9, %call10
-  store <2 x i16> %add11, <2 x i16> addrspace(1)* %out, align 8
+  %call12 = tail call spir_func <2 x i16> @_Z9isgreaterDv2_DhS_(<2 x half> %h, <2 x half> %h) #3
+  %add13 = add <2 x i16> %add11, %call12
+  %call14 = tail call spir_func <2 x i16> @_Z14isgreaterequalDv2_DhS_(<2 x half> %h, <2 x half> %h) #3
+  %add15 = add <2 x i16> %add13, %call14
+  %call16 = tail call spir_func <2 x i16> @_Z6islessDv2_DhS_(<2 x half> %h, <2 x half> %h) #3
+  %add17 = add <2 x i16> %add15, %call16
+  %call18 = tail call spir_func <2 x i16> @_Z11islessequalDv2_DhS_(<2 x half> %h, <2 x half> %h) #3
+  %add19 = add <2 x i16> %add17, %call18
+  %call20 = tail call spir_func <2 x i16> @_Z13islessgreaterDv2_DhS_(<2 x half> %h, <2 x half> %h) #3
+  %add21 = add <2 x i16> %add19, %call20
+  %call22 = tail call spir_func <2 x i16> @_Z9isorderedDv2_DhS_(<2 x half> %h, <2 x half> %h) #3
+  %add23 = add <2 x i16> %add21, %call22
+  %call24 = tail call spir_func <2 x i16> @_Z11isunorderedDv2_DhS_(<2 x half> %h, <2 x half> %h) #3
+  %add25 = add <2 x i16> %add23, %call24
+  store <2 x i16> %add25, <2 x i16> addrspace(4)* %out, align 4, !tbaa !7
   ret void
 }
 
-declare spir_func <2 x i16> @_Z8isfiniteDv2_Dh(<2 x half>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z8isfiniteDv2_Dh(<2 x half>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z5isinfDv2_Dh(<2 x half>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z5isnanDv2_Dh(<2 x half>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z8isnormalDv2_Dh(<2 x half>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z7signbitDv2_Dh(<2 x half>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z7isequalDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z10isnotequalDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr #1
+
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z9isgreaterDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr #1
 
-declare spir_func <2 x i16> @_Z5isnanDv2_Dh(<2 x half>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z14isgreaterequalDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr #1
 
-declare spir_func <2 x i16> @_Z5isinfDv2_Dh(<2 x half>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z6islessDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr #1
 
-declare spir_func <2 x i16> @_Z8isnormalDv2_Dh(<2 x half>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z11islessequalDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr #1
 
-declare spir_func <2 x i16> @_Z13islessgreaterDv2_DhS_(<2 x half>, <2 x half>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z13islessgreaterDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr #1
 
-declare spir_func <2 x i16> @_Z9isorderedDv2_DhS_(<2 x half>, <2 x half>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z9isorderedDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr #1
 
-declare spir_func <2 x i16> @_Z11isunorderedDv2_DhS_(<2 x half>, <2 x half>) #1
+; Function Attrs: convergent mustprogress nofree nounwind readnone willreturn
+declare spir_func <2 x i16> @_Z11isunorderedDv2_DhS_(<2 x half>, <2 x half>) local_unnamed_addr #1
 
-attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { nounwind }
+attributes #0 = { convergent mustprogress nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { convergent mustprogress nofree nounwind readnone willreturn "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { convergent mustprogress nofree norecurse nounwind willreturn writeonly "frame-pointer"="none" "min-legal-vector-width"="32" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #3 = { convergent nounwind readnone willreturn }
 
-!opencl.enable.FP_CONTRACT = !{}
-!opencl.spir.version = !{!9}
-!opencl.ocl.version = !{!10}
-!opencl.used.extensions = !{!11}
-!opencl.used.optional.core.features = !{!11}
-!opencl.compiler.options = !{!11}
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!1}
+!llvm.ident = !{!2}
 
-!1 = !{i32 1, i32 0}
-!2 = !{!"none", !"none"}
-!3 = !{!"int*", !"half"}
-!4 = !{!"int*", !"half"}
-!5 = !{!"", !""}
-!7 = !{!"short2*", !"half2"}
-!8 = !{!"short2*", !"half2"}
-!9 = !{i32 1, i32 2}
-!10 = !{i32 2, i32 0}
-!11 = !{}
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 2, i32 0}
+!2 = !{!"clang version 14.0.0"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}
+!7 = !{!5, !5, i64 0}

From 4f1283a8928d7b488f3695e76a9d1c99e706ba0c Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Wed, 26 Jan 2022 11:11:35 +0000
Subject: [PATCH 934/946] Update for PointerType::getElementType deprecation

Update for LLVM commit 184591aeeb5a ("[OpaquePtrs] Deprecate
PointerType::getElementType()", 2022-01-25).

This commit changes all affected calls to getPointerElementType, which
is a temporary solution.  Followup work around the
getPointerElementType calls will be required to align with LLVM's
opaque pointer changes.

Original commit:
https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/7452422
---
 llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp              |  8 ++++----
 llvm-spirv/lib/SPIRV/OCLUtil.cpp                 |  4 ++--
 .../SPIRV/SPIRVLowerBitCastToNonStandardType.cpp |  2 +-
 llvm-spirv/lib/SPIRV/SPIRVLowerSPIRBlocks.cpp    |  4 ++--
 llvm-spirv/lib/SPIRV/SPIRVReader.cpp             |  6 +++---
 llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp     | 16 ++++++----------
 llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp              |  2 +-
 llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp            |  5 +++--
 llvm-spirv/lib/SPIRV/SPIRVUtil.cpp               | 10 +++++-----
 llvm-spirv/lib/SPIRV/SPIRVWriter.cpp             |  6 +++---
 10 files changed, 30 insertions(+), 33 deletions(-)

diff --git a/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp b/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp
index 1f2687f4c484f..027b4e99b8f67 100644
--- a/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp
+++ b/llvm-spirv/lib/SPIRV/OCLToSPIRV.cpp
@@ -1563,7 +1563,7 @@ void OCLToSPIRVBase::visitCallEnqueueKernel(CallInst *CI,
   // TODO: these numbers should be obtained from block literal structure
   Type *ParamType = getUnderlyingObject(BlockLiteral)->getType();
   if (PointerType *PT = dyn_cast<PointerType>(ParamType))
-    ParamType = PT->getElementType();
+    ParamType = PT->getPointerElementType();
   Args.push_back(getInt32(M, DL.getTypeStoreSize(ParamType)));
   Args.push_back(getInt32(M, DL.getPrefTypeAlignment(ParamType)));
 
@@ -1615,7 +1615,7 @@ void OCLToSPIRVBase::visitCallKernelQuery(CallInst *CI,
         Value *Param = *Args.rbegin();
         Type *ParamType = getUnderlyingObject(Param)->getType();
         if (PointerType *PT = dyn_cast<PointerType>(ParamType)) {
-          ParamType = PT->getElementType();
+          ParamType = PT->getPointerElementType();
         }
         // Last arg corresponds to SPIRV Param operand.
         // Insert Invoke in front of Param.
@@ -1711,7 +1711,7 @@ static const char *getSubgroupAVCIntelOpKind(StringRef Name) {
 }
 
 static const char *getSubgroupAVCIntelTyKind(Type *Ty) {
-  auto STy = cast<StructType>(cast<PointerType>(Ty)->getElementType());
+  auto *STy = cast<StructType>(cast<PointerType>(Ty)->getPointerElementType());
   auto TName = STy->getName();
   return TName.endswith("_payload_t") ? "payload" : "result";
 }
@@ -1746,7 +1746,7 @@ void OCLToSPIRVBase::visitSubgroupAVCBuiltinCall(CallInst *CI,
   // Update names for built-ins mapped on two or more SPIRV instructions
   if (FName.find(Prefix + "ime_get_streamout_major_shape_") == 0) {
     auto PTy = cast<PointerType>(CI->getArgOperand(0)->getType());
-    auto STy = cast<StructType>(PTy->getElementType());
+    auto *STy = cast<StructType>(PTy->getPointerElementType());
     assert(STy->hasName() && "Invalid Subgroup AVC Intel built-in call");
     FName += (STy->getName().contains("single")) ? "_single_reference"
                                                  : "_dual_reference";
diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.cpp b/llvm-spirv/lib/SPIRV/OCLUtil.cpp
index 89fae7dc82038..6befd1287a419 100644
--- a/llvm-spirv/lib/SPIRV/OCLUtil.cpp
+++ b/llvm-spirv/lib/SPIRV/OCLUtil.cpp
@@ -967,7 +967,7 @@ getOCLOpaqueTypeAddrSpace(SPIR::TypePrimitiveEnum Prim) {
 static FunctionType *getBlockInvokeTy(Function *F, unsigned BlockIdx) {
   auto Params = F->getFunctionType()->params();
   PointerType *FuncPtr = cast<PointerType>(Params[BlockIdx]);
-  return cast<FunctionType>(FuncPtr->getElementType());
+  return cast<FunctionType>(FuncPtr->getPointerElementType());
 }
 
 class OCLBuiltinFuncMangleInfo : public SPIRV::BuiltinFuncMangleInfo {
@@ -1370,7 +1370,7 @@ bool isSamplerTy(Type *Ty) {
   if (!PTy)
     return false;
 
-  auto STy = dyn_cast<StructType>(PTy->getElementType());
+  auto *STy = dyn_cast<StructType>(PTy->getPointerElementType());
   return STy && STy->hasName() && STy->getName() == kSPR2TypeName::Sampler;
 }
 
diff --git a/llvm-spirv/lib/SPIRV/SPIRVLowerBitCastToNonStandardType.cpp b/llvm-spirv/lib/SPIRV/SPIRVLowerBitCastToNonStandardType.cpp
index 070fc61353a3d..64e45e026e452 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVLowerBitCastToNonStandardType.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVLowerBitCastToNonStandardType.cpp
@@ -56,7 +56,7 @@ namespace SPIRV {
 static VectorType *getVectorType(Type *Ty) {
   assert(Ty != nullptr && "Expected non-null type");
   if (auto *ElemTy = dyn_cast<PointerType>(Ty))
-    Ty = ElemTy->getElementType();
+    Ty = ElemTy->getPointerElementType();
   return dyn_cast<VectorType>(Ty);
 }
 
diff --git a/llvm-spirv/lib/SPIRV/SPIRVLowerSPIRBlocks.cpp b/llvm-spirv/lib/SPIRV/SPIRVLowerSPIRBlocks.cpp
index e43c8e2f76b24..a48369a6039b2 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVLowerSPIRBlocks.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVLowerSPIRBlocks.cpp
@@ -361,7 +361,7 @@ class SPIRVLowerSPIRBlocksBase {
       Type *T = G.getInitializer()->getType();
       if (!T->isPointerTy())
         continue;
-      T = cast<PointerType>(T)->getElementType();
+      T = cast<PointerType>(T)->getPointerElementType();
       if (!T->isStructTy())
         continue;
       StringRef STName = cast<StructType>(T)->getName();
@@ -584,7 +584,7 @@ class SPIRVLowerSPIRBlocksBase {
 
   bool isOCLClkEventPtrType(Type *T) {
     if (auto PT = dyn_cast<PointerType>(T))
-      return isPointerToOpaqueStructType(PT->getElementType(),
+      return isPointerToOpaqueStructType(PT->getPointerElementType(),
                                          SPIR_TYPE_NAME_CLK_EVENT_T);
     return false;
   }
diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
index bdb4f7e5b1b77..b5e216c534cce 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
@@ -1288,7 +1288,7 @@ void transFunctionPointerCallArgumentAttributes(SPIRVValue *BV, CallInst *CI) {
         Attribute::isTypeAttrKind(LlvmAttrKind)
             ? Attribute::get(CI->getContext(), LlvmAttrKind,
                              cast<PointerType>(CI->getOperand(ArgNo)->getType())
-                                 ->getElementType())
+                                 ->getPointerElementType())
             : Attribute::get(CI->getContext(), LlvmAttrKind);
     CI->addParamAttr(ArgNo, LlvmAttr);
   }
@@ -2411,7 +2411,7 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F,
       else {
         IID = Intrinsic::ptr_annotation;
         auto *PtrTy = dyn_cast<PointerType>(Ty);
-        if (PtrTy && isa<IntegerType>(PtrTy->getElementType()))
+        if (PtrTy && isa<IntegerType>(PtrTy->getPointerElementType()))
           RetTy = PtrTy;
         // Whether a struct or a pointer to some other type,
         // bitcast to i8*
@@ -2808,7 +2808,7 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *BF) {
       Type *AttrTy = nullptr;
       switch (LLVMKind) {
       case Attribute::AttrKind::ByVal:
-        AttrTy = cast<PointerType>(I->getType())->getElementType();
+        AttrTy = cast<PointerType>(I->getType())->getPointerElementType();
         break;
       case Attribute::AttrKind::StructRet:
         AttrTy = I->getType();
diff --git a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp
index 6d82d32f0d26d..7000a7b719453 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp
@@ -349,16 +349,12 @@ void SPIRVRegularizeLLVMBase::adaptStructTypes(StructType *ST) {
     assert(PtrTy &&
            "Expected a pointer to an array to represent joint matrix type");
     size_t TypeLayout[4] = {0, 0, 0, 0};
-    ArrayType *ArrayTy;
-    auto GetTypeLayout = [&](auto *Ty) -> size_t {
-      ArrayTy = dyn_cast<ArrayType>(Ty->getElementType());
-      assert(ArrayTy &&
-             "Expected a pointer to an array to represent joint matrix type");
-      return ArrayTy->getNumElements();
-    };
-    TypeLayout[0] = GetTypeLayout(PtrTy);
-    for (size_t I = 1; I != 4; ++I)
-      TypeLayout[I] = GetTypeLayout(ArrayTy);
+    ArrayType *ArrayTy = dyn_cast<ArrayType>(PtrTy->getPointerElementType());
+    TypeLayout[0] = ArrayTy->getNumElements();
+    for (size_t I = 1; I != 4; ++I) {
+      ArrayTy = dyn_cast<ArrayType>(ArrayTy->getElementType());
+      TypeLayout[I] = ArrayTy->getNumElements();
+    }
 
     auto *ElemTy = ArrayTy->getElementType();
     std::string ElemTyStr;
diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp
index 05c352166be77..3e351f8b7a8f8 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL.cpp
@@ -912,7 +912,7 @@ void SPIRVToOCLBase::visitCallSPIRVAvcINTELEvaluateBuiltIn(CallInst *CI,
         // reference image
         int NumImages = std::count_if(Args.begin(), Args.end(), [](Value *Arg) {
           if (auto *PT = dyn_cast<PointerType>(Arg->getType())) {
-            if (auto *ST = dyn_cast<StructType>(PT->getElementType())) {
+            if (auto *ST = dyn_cast<StructType>(PT->getPointerElementType())) {
               if (ST->getName().startswith("spirv.VmeImageINTEL"))
                 return true;
             }
diff --git a/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp b/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp
index c873cd157749b..27fe8d2e07f7c 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVToOCL20.cpp
@@ -181,7 +181,7 @@ Instruction *SPIRVToOCL20Base::visitCallSPIRVAtomicIncDec(CallInst *CI, Op OC) {
             OC == OpAtomicIIncrement ? OpAtomicIAdd : OpAtomicISub);
         auto Ptr = findFirstPtr(Args);
         Type *ValueTy =
-            cast<PointerType>(Args[Ptr]->getType())->getElementType();
+            cast<PointerType>(Args[Ptr]->getType())->getPointerElementType();
         assert(ValueTy->isIntegerTy());
         Args.insert(Args.begin() + 1, llvm::ConstantInt::get(ValueTy, 1));
         return Name;
@@ -256,7 +256,8 @@ Instruction *SPIRVToOCL20Base::visitCallSPIRVAtomicCmpExchg(CallInst *CI) {
         new StoreInst(Args[1], PExpected, PInsertBefore);
         unsigned AddrSpc = SPIRAS_Generic;
         Type *PtrTyAS =
-            PExpected->getType()->getElementType()->getPointerTo(AddrSpc);
+            PExpected->getType()->getPointerElementType()->getPointerTo(
+                AddrSpc);
         Args[1] = CastInst::CreatePointerBitCastOrAddrSpaceCast(
             PExpected, PtrTyAS, PExpected->getName() + ".as", PInsertBefore);
         std::swap(Args[3], Args[4]);
diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp
index 11d3e23e726d3..b1b68be8bfac4 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp
@@ -248,7 +248,7 @@ bool isVoidFuncTy(FunctionType *FT) { return FT->getReturnType()->isVoidTy(); }
 
 bool isPointerToOpaqueStructType(llvm::Type *Ty) {
   if (auto PT = dyn_cast<PointerType>(Ty))
-    if (auto ST = dyn_cast<StructType>(PT->getElementType()))
+    if (auto *ST = dyn_cast<StructType>(PT->getPointerElementType()))
       if (ST->isOpaque())
         return true;
   return false;
@@ -256,7 +256,7 @@ bool isPointerToOpaqueStructType(llvm::Type *Ty) {
 
 bool isPointerToOpaqueStructType(llvm::Type *Ty, const std::string &Name) {
   if (auto PT = dyn_cast<PointerType>(Ty))
-    if (auto ST = dyn_cast<StructType>(PT->getElementType()))
+    if (auto *ST = dyn_cast<StructType>(PT->getPointerElementType()))
       if (ST->isOpaque() && ST->getName() == Name)
         return true;
   return false;
@@ -264,7 +264,7 @@ bool isPointerToOpaqueStructType(llvm::Type *Ty, const std::string &Name) {
 
 bool isSPIRVSamplerType(llvm::Type *Ty) {
   if (auto *PT = dyn_cast<PointerType>(Ty))
-    if (auto *ST = dyn_cast<StructType>(PT->getElementType()))
+    if (auto *ST = dyn_cast<StructType>(PT->getPointerElementType()))
       if (ST->isOpaque()) {
         auto Name = ST->getName();
         if (Name.startswith(std::string(kSPIRVTypeName::PrefixAndDelim) +
@@ -277,7 +277,7 @@ bool isSPIRVSamplerType(llvm::Type *Ty) {
 
 bool isOCLImageType(llvm::Type *Ty, StringRef *Name) {
   if (auto PT = dyn_cast<PointerType>(Ty))
-    if (auto ST = dyn_cast<StructType>(PT->getElementType()))
+    if (auto *ST = dyn_cast<StructType>(PT->getPointerElementType()))
       if (ST->isOpaque()) {
         auto FullName = ST->getName();
         if (FullName.find(kSPR2TypeName::ImagePrefix) == 0) {
@@ -294,7 +294,7 @@ bool isOCLImageType(llvm::Type *Ty, StringRef *Name) {
 ///   type Name as spirv.BaseTyName.Postfixes.
 bool isSPIRVType(llvm::Type *Ty, StringRef BaseTyName, StringRef *Postfix) {
   if (auto PT = dyn_cast<PointerType>(Ty))
-    if (auto ST = dyn_cast<StructType>(PT->getElementType()))
+    if (auto *ST = dyn_cast<StructType>(PT->getPointerElementType()))
       if (ST->isOpaque()) {
         auto FullName = ST->getName();
         std::string N =
diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
index abdf94ad7b932..79b18d916cea1 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
@@ -4235,7 +4235,7 @@ LLVMToSPIRVBase::transBuiltinToInstWithoutDecoration(Op OC, CallInst *CI,
     // the original return type.
     if (CI->hasStructRetAttr()) {
       assert(ResTy->isVoidTy() && "Return type is not void");
-      ResTy = cast<PointerType>(OpItr->getType())->getElementType();
+      ResTy = cast<PointerType>(OpItr->getType())->getPointerElementType();
       OpItr++;
     }
 
@@ -4326,7 +4326,7 @@ LLVMToSPIRVBase::transBuiltinToInstWithoutDecoration(Op OC, CallInst *CI,
     // the original return type.
     if (CI->hasStructRetAttr()) {
       assert(ResTy->isVoidTy() && "Return type is not void");
-      ResTy = cast<PointerType>(OpItr->getType())->getElementType();
+      ResTy = cast<PointerType>(OpItr->getType())->getPointerElementType();
       OpItr++;
     }
 
@@ -4403,7 +4403,7 @@ LLVMToSPIRVBase::transBuiltinToInstWithoutDecoration(Op OC, CallInst *CI,
     // the original return type.
     if (CI->hasStructRetAttr()) {
       assert(ResTy->isVoidTy() && "Return type is not void");
-      ResTy = cast<PointerType>(OpItr->getType())->getElementType();
+      ResTy = cast<PointerType>(OpItr->getType())->getPointerElementType();
       OpItr++;
     }
 

From b3db4ce9c0861ed9a459628c04af5f48097b56d8 Mon Sep 17 00:00:00 2001
From: Wenju He <wenju.he@intel.com>
Date: Wed, 26 Jan 2022 21:44:41 +0800
Subject: [PATCH 935/946] Move SPIRVLowerConstExprPass to header file

So that it could be used in downstream.

Original commit:
https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/d3884d8
---
 llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.cpp | 35 ++++-----------
 llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.h   | 47 ++++++++++++++++++++
 2 files changed, 55 insertions(+), 27 deletions(-)
 create mode 100644 llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.h

diff --git a/llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.cpp b/llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.cpp
index 886a7a2a1a893..b28b41e674488 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.cpp
@@ -37,6 +37,7 @@
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "spv-lower-const-expr"
 
+#include "SPIRVLowerConstExpr.h"
 #include "OCLUtil.h"
 #include "SPIRVInternal.h"
 #include "SPIRVMDBuilder.h"
@@ -65,29 +66,6 @@ cl::opt<bool> SPIRVLowerConst(
     "spirv-lower-const-expr", cl::init(true),
     cl::desc("LLVM/SPIR-V translation enable lowering constant expression"));
 
-class SPIRVLowerConstExprBase {
-public:
-  SPIRVLowerConstExprBase() : M(nullptr), Ctx(nullptr) {}
-
-  bool runLowerConstExpr(Module &M);
-  void visit(Module *M);
-
-private:
-  Module *M;
-  LLVMContext *Ctx;
-};
-
-class SPIRVLowerConstExprPass
-    : public llvm::PassInfoMixin<SPIRVLowerConstExprPass>,
-      public SPIRVLowerConstExprBase {
-public:
-  llvm::PreservedAnalyses run(llvm::Module &M,
-                              llvm::ModuleAnalysisManager &MAM) {
-    return runLowerConstExpr(M) ? llvm::PreservedAnalyses::none()
-                                : llvm::PreservedAnalyses::all();
-  }
-};
-
 class SPIRVLowerConstExprLegacy : public ModulePass,
                                   public SPIRVLowerConstExprBase {
 public:
@@ -110,11 +88,11 @@ bool SPIRVLowerConstExprBase::runLowerConstExpr(Module &Module) {
   Ctx = &M->getContext();
 
   LLVM_DEBUG(dbgs() << "Enter SPIRVLowerConstExpr:\n");
-  visit(M);
+  bool Changed = visit(M);
 
   verifyRegularizationPass(*M, "SPIRVLowerConstExpr");
 
-  return true;
+  return Changed;
 }
 
 /// Since SPIR-V cannot represent constant expression, constant expressions
@@ -126,7 +104,8 @@ bool SPIRVLowerConstExprBase::runLowerConstExpr(Module &Module) {
 /// is replaced by one instruction.
 /// ToDo: remove redundant instructions for common subexpression
 
-void SPIRVLowerConstExprBase::visit(Module *M) {
+bool SPIRVLowerConstExprBase::visit(Module *M) {
+  bool Changed = false;
   for (auto &I : M->functions()) {
     std::list<Instruction *> WorkList;
     for (auto &BI : I) {
@@ -138,7 +117,7 @@ void SPIRVLowerConstExprBase::visit(Module *M) {
     while (!WorkList.empty()) {
       auto II = WorkList.front();
 
-      auto LowerOp = [&II, &FBegin, &I](Value *V) -> Value * {
+      auto LowerOp = [&II, &FBegin, &I, &Changed](Value *V) -> Value * {
         if (isa<Function>(V))
           return V;
         auto *CE = cast<ConstantExpr>(V);
@@ -163,6 +142,7 @@ void SPIRVLowerConstExprBase::visit(Module *M) {
               ReplInst->moveBefore(User);
           User->replaceUsesOfWith(CE, ReplInst);
         }
+        Changed = true;
         return ReplInst;
       };
 
@@ -190,6 +170,7 @@ void SPIRVLowerConstExprBase::visit(Module *M) {
       }
     }
   }
+  return Changed;
 }
 
 } // namespace SPIRV
diff --git a/llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.h b/llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.h
new file mode 100644
index 0000000000000..e6bae9ae9d2ee
--- /dev/null
+++ b/llvm-spirv/lib/SPIRV/SPIRVLowerConstExpr.h
@@ -0,0 +1,47 @@
+//===- SPIRVLowerConstExpr.h - Lower constant expression --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file SPIRVLowerConstExpr.h
+///
+/// This file declares SPIRVLowerConstExprPass that lowers constant expression.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef SPIRV_LOWERCONSTEXPR_H
+#define SPIRV_LOWERCONSTEXPR_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace SPIRV {
+
+class SPIRVLowerConstExprBase {
+public:
+  SPIRVLowerConstExprBase() : M(nullptr), Ctx(nullptr) {}
+
+  bool runLowerConstExpr(llvm::Module &M);
+  bool visit(llvm::Module *M);
+
+private:
+  llvm::Module *M;
+  llvm::LLVMContext *Ctx;
+};
+
+class SPIRVLowerConstExprPass
+    : public llvm::PassInfoMixin<SPIRVLowerConstExprPass>,
+      public SPIRVLowerConstExprBase {
+public:
+  llvm::PreservedAnalyses run(llvm::Module &M,
+                              llvm::ModuleAnalysisManager &MAM) {
+    return runLowerConstExpr(M) ? llvm::PreservedAnalyses::none()
+                                : llvm::PreservedAnalyses::all();
+  }
+};
+
+} // namespace SPIRV
+
+#endif // SPIRV_LOWERCONSTEXPR_H

From e4ee512e011b3b4d4943730737215c01ab115be7 Mon Sep 17 00:00:00 2001
From: Steffen Larsen <steffen.larsen@intel.com>
Date: Thu, 27 Jan 2022 12:16:05 +0000
Subject: [PATCH 936/946] Implements spirv.Decorations and
 spirv.ParameterDecorations metadata translation (#1346)

This patch adds translation to and from `spirv.Decorations` and
`spirv.ParameterDecorations` metadata. These metadata nodes represent
global variable and function parameter decorations respectively,
allowing explicit decoration in LLVM IR.

Signed-off-by: Steffen Larsen <steffen.larsen@intel.com>

Original commit:
https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/ccdbbdb
---
 llvm-spirv/lib/SPIRV/SPIRVInternal.h          |   3 +
 llvm-spirv/lib/SPIRV/SPIRVReader.cpp          | 150 ++++++++++++++----
 llvm-spirv/lib/SPIRV/SPIRVReader.h            |   2 +
 llvm-spirv/lib/SPIRV/SPIRVToLLVMDbgTran.cpp   |   4 +-
 llvm-spirv/lib/SPIRV/SPIRVWriter.cpp          | 147 +++++++++++++++++
 llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.cpp  |   8 +
 llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.h    |   2 +
 .../test/spirv_global_variable_decoration.ll  |  38 +++++
 llvm-spirv/test/spirv_param_decorations.ll    |  55 +++++++
 .../test/spirv_param_decorations_quals.ll     |  48 ++++++
 10 files changed, 425 insertions(+), 32 deletions(-)
 create mode 100644 llvm-spirv/test/spirv_global_variable_decoration.ll
 create mode 100644 llvm-spirv/test/spirv_param_decorations.ll
 create mode 100644 llvm-spirv/test/spirv_param_decorations_quals.ll

diff --git a/llvm-spirv/lib/SPIRV/SPIRVInternal.h b/llvm-spirv/lib/SPIRV/SPIRVInternal.h
index 687f337f8f0f1..727c00af67e12 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVInternal.h
+++ b/llvm-spirv/lib/SPIRV/SPIRVInternal.h
@@ -289,6 +289,9 @@ typedef SPIRVMap<spv::Scope, std::string> SPIRVMatrixScopeMap;
 #define SPIR_MD_KERNEL_ARG_TYPE_QUAL "kernel_arg_type_qual"
 #define SPIR_MD_KERNEL_ARG_NAME "kernel_arg_name"
 
+#define SPIRV_MD_PARAMETER_DECORATIONS "spirv.ParameterDecorations"
+#define SPIRV_MD_DECORATIONS "spirv.Decorations"
+
 #define OCL_TYPE_NAME_SAMPLER_T "sampler_t"
 #define SPIR_TYPE_NAME_EVENT_T "opencl.event_t"
 #define SPIR_TYPE_NAME_CLK_EVENT_T "opencl.clk_event_t"
diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
index b5e216c534cce..94adcab6ebf8b 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
@@ -160,7 +160,7 @@ static void addNamedMetadataStringSet(LLVMContext *Context, Module *M,
   NamedMD->addOperand(MDNode::get(*Context, ValueVec));
 }
 
-static void addOCLKernelArgumentMetadata(
+static void addKernelArgumentMetadata(
     LLVMContext *Context, const std::string &MDName, SPIRVFunction *BF,
     llvm::Function *Fn,
     std::function<Metadata *(SPIRVFunctionParameter *)> ForeachFnArg) {
@@ -3553,6 +3553,72 @@ void SPIRVToLLVM::transGlobalAnnotations() {
   }
 }
 
+static llvm::MDNode *
+transDecorationsToMetadataList(llvm::LLVMContext *Context,
+                               std::vector<SPIRVDecorate const *> Decorates) {
+  SmallVector<Metadata *, 4> MDs;
+  MDs.reserve(Decorates.size());
+  for (const auto *Deco : Decorates) {
+    std::vector<Metadata *> OPs;
+    auto *KindMD = ConstantAsMetadata::get(
+        ConstantInt::get(Type::getInt32Ty(*Context), Deco->getDecorateKind()));
+    OPs.push_back(KindMD);
+    switch (static_cast<size_t>(Deco->getDecorateKind())) {
+    case DecorationLinkageAttributes: {
+      const auto *const LinkAttrDeco =
+          static_cast<const SPIRVDecorateLinkageAttr *>(Deco);
+      auto *const LinkNameMD =
+          MDString::get(*Context, LinkAttrDeco->getLinkageName());
+      auto *const LinkTypeMD = ConstantAsMetadata::get(ConstantInt::get(
+          Type::getInt32Ty(*Context), LinkAttrDeco->getLinkageType()));
+      OPs.push_back(LinkNameMD);
+      OPs.push_back(LinkTypeMD);
+      break;
+    }
+    case DecorationMergeINTEL: {
+      const auto MergeAttrLits = Deco->getVecLiteral();
+      std::string FirstString = getString(MergeAttrLits);
+      std::string SecondString =
+          getString(MergeAttrLits.cbegin() + getVec(FirstString).size(),
+                    MergeAttrLits.cend());
+      OPs.push_back(MDString::get(*Context, FirstString));
+      OPs.push_back(MDString::get(*Context, SecondString));
+      break;
+    }
+    case DecorationMemoryINTEL:
+    case DecorationUserSemantic: {
+      auto *const StrMD =
+          MDString::get(*Context, getString(Deco->getVecLiteral()));
+      OPs.push_back(StrMD);
+      break;
+    }
+    default: {
+      for (const SPIRVWord Lit : Deco->getVecLiteral()) {
+        auto *const LitMD = ConstantAsMetadata::get(
+            ConstantInt::get(Type::getInt32Ty(*Context), Lit));
+        OPs.push_back(LitMD);
+      }
+      break;
+    }
+    }
+    MDs.push_back(MDNode::get(*Context, OPs));
+  }
+  return MDNode::get(*Context, MDs);
+}
+
+void SPIRVToLLVM::transVarDecorationsToMetadata(SPIRVValue *BV, Value *V) {
+  if (!BV->isVariable())
+    return;
+
+  if (auto *GV = dyn_cast<GlobalVariable>(V)) {
+    std::vector<SPIRVDecorate const *> Decorates = BV->getDecorations();
+    if (!Decorates.empty()) {
+      MDNode *MDList = transDecorationsToMetadataList(Context, Decorates);
+      GV->setMetadata(SPIRV_MD_DECORATIONS, MDList);
+    }
+  }
+}
+
 bool SPIRVToLLVM::transDecoration(SPIRVValue *BV, Value *V) {
   if (!transAlign(BV, V))
     return false;
@@ -3560,6 +3626,10 @@ bool SPIRVToLLVM::transDecoration(SPIRVValue *BV, Value *V) {
   transIntelFPGADecorations(BV, V);
   transMemAliasingINTELDecorations(BV, V);
 
+  // Decoration metadata is only enabled in SPIR-V friendly mode
+  if (BM->getDesiredBIsRepresentation() == BIsRepresentation::SPIRVFriendlyIR)
+    transVarDecorationsToMetadata(BV, V);
+
   DbgTran->transDbgInfo(BV, V);
   return true;
 }
@@ -3691,6 +3761,23 @@ static bool transKernelArgTypeMedataFromString(LLVMContext *Ctx,
   return true;
 }
 
+void SPIRVToLLVM::transFunctionDecorationsToMetadata(SPIRVFunction *BF,
+                                                     Function *F) {
+  size_t TotalParameterDecorations = 0;
+  BF->foreachArgument([&](SPIRVFunctionParameter *Arg) {
+    TotalParameterDecorations += Arg->getNumDecorations();
+  });
+  if (TotalParameterDecorations == 0)
+    return;
+
+  // Generate metadata for spirv.ParameterDecorations
+  addKernelArgumentMetadata(Context, SPIRV_MD_PARAMETER_DECORATIONS, BF, F,
+                            [=](SPIRVFunctionParameter *Arg) {
+                              return transDecorationsToMetadataList(
+                                  Context, Arg->getDecorations());
+                            });
+}
+
 bool SPIRVToLLVM::transMetadata() {
   SmallVector<Function *, 2> CtorKernels;
   for (unsigned I = 0, E = BM->getNumFunctions(); I != E; ++I) {
@@ -3702,6 +3789,10 @@ bool SPIRVToLLVM::transMetadata() {
     transVectorComputeMetadata(BF);
     transFPGAFunctionMetadata(BF, F);
 
+    // Decoration metadata is only enabled in SPIR-V friendly mode
+    if (BM->getDesiredBIsRepresentation() == BIsRepresentation::SPIRVFriendlyIR)
+      transFunctionDecorationsToMetadata(BF, F);
+
     if (BF->hasDecorate(internal::DecorationCallableFunctionINTEL))
       F->addFnAttr(kVCMetadata::VCCallable);
     if (isKernel(BF) &&
@@ -3806,7 +3897,7 @@ bool SPIRVToLLVM::transOCLMetadata(SPIRVFunction *BF) {
     return true;
 
   // Generate metadata for kernel_arg_addr_space
-  addOCLKernelArgumentMetadata(
+  addKernelArgumentMetadata(
       Context, SPIR_MD_KERNEL_ARG_ADDR_SPACE, BF, F,
       [=](SPIRVFunctionParameter *Arg) {
         SPIRVType *ArgTy = Arg->getType();
@@ -3819,31 +3910,31 @@ bool SPIRVToLLVM::transOCLMetadata(SPIRVFunction *BF) {
             ConstantInt::get(Type::getInt32Ty(*Context), AS));
       });
   // Generate metadata for kernel_arg_access_qual
-  addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_ACCESS_QUAL, BF, F,
-                               [=](SPIRVFunctionParameter *Arg) {
-                                 std::string Qual;
-                                 auto T = Arg->getType();
-                                 if (T->isTypeOCLImage()) {
-                                   auto ST = static_cast<SPIRVTypeImage *>(T);
-                                   Qual = transOCLImageTypeAccessQualifier(ST);
-                                 } else if (T->isTypePipe()) {
-                                   auto PT = static_cast<SPIRVTypePipe *>(T);
-                                   Qual = transOCLPipeTypeAccessQualifier(PT);
-                                 } else
-                                   Qual = "none";
-                                 return MDString::get(*Context, Qual);
-                               });
+  addKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_ACCESS_QUAL, BF, F,
+                            [=](SPIRVFunctionParameter *Arg) {
+                              std::string Qual;
+                              auto *T = Arg->getType();
+                              if (T->isTypeOCLImage()) {
+                                auto *ST = static_cast<SPIRVTypeImage *>(T);
+                                Qual = transOCLImageTypeAccessQualifier(ST);
+                              } else if (T->isTypePipe()) {
+                                auto *PT = static_cast<SPIRVTypePipe *>(T);
+                                Qual = transOCLPipeTypeAccessQualifier(PT);
+                              } else
+                                Qual = "none";
+                              return MDString::get(*Context, Qual);
+                            });
   // Generate metadata for kernel_arg_type
   if (!transKernelArgTypeMedataFromString(Context, BM, F,
                                           SPIR_MD_KERNEL_ARG_TYPE))
-    addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_TYPE, BF, F,
-                                 [=](SPIRVFunctionParameter *Arg) {
-                                   return transOCLKernelArgTypeName(Arg);
-                                 });
+    addKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_TYPE, BF, F,
+                              [=](SPIRVFunctionParameter *Arg) {
+                                return transOCLKernelArgTypeName(Arg);
+                              });
   // Generate metadata for kernel_arg_type_qual
   if (!transKernelArgTypeMedataFromString(Context, BM, F,
                                           SPIR_MD_KERNEL_ARG_TYPE_QUAL))
-    addOCLKernelArgumentMetadata(
+    addKernelArgumentMetadata(
         Context, SPIR_MD_KERNEL_ARG_TYPE_QUAL, BF, F,
         [=](SPIRVFunctionParameter *Arg) {
           std::string Qual;
@@ -3861,17 +3952,16 @@ bool SPIRVToLLVM::transOCLMetadata(SPIRVFunction *BF) {
           return MDString::get(*Context, Qual);
         });
   // Generate metadata for kernel_arg_base_type
-  addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_BASE_TYPE, BF, F,
-                               [=](SPIRVFunctionParameter *Arg) {
-                                 return transOCLKernelArgTypeName(Arg);
-                               });
+  addKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_BASE_TYPE, BF, F,
+                            [=](SPIRVFunctionParameter *Arg) {
+                              return transOCLKernelArgTypeName(Arg);
+                            });
   // Generate metadata for kernel_arg_name
   if (BM->isGenArgNameMDEnabled()) {
-    addOCLKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_NAME, BF, F,
-                                 [=](SPIRVFunctionParameter *Arg) {
-                                   return MDString::get(*Context,
-                                                        Arg->getName());
-                                 });
+    addKernelArgumentMetadata(Context, SPIR_MD_KERNEL_ARG_NAME, BF, F,
+                              [=](SPIRVFunctionParameter *Arg) {
+                                return MDString::get(*Context, Arg->getName());
+                              });
   }
   // Generate metadata for kernel_arg_buffer_location
   addBufferLocationMetadata(Context, BF, F, [=](SPIRVFunctionParameter *Arg) {
diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.h b/llvm-spirv/lib/SPIRV/SPIRVReader.h
index 7b66fda322644..4cef668389f91 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVReader.h
+++ b/llvm-spirv/lib/SPIRV/SPIRVReader.h
@@ -241,6 +241,8 @@ class SPIRVToLLVM {
                          SmallVectorImpl<Function *> &Funcs);
   void transIntelFPGADecorations(SPIRVValue *BV, Value *V);
   void transMemAliasingINTELDecorations(SPIRVValue *BV, Value *V);
+  void transVarDecorationsToMetadata(SPIRVValue *BV, Value *V);
+  void transFunctionDecorationsToMetadata(SPIRVFunction *BF, Function *F);
 }; // class SPIRVToLLVM
 
 } // namespace SPIRV
diff --git a/llvm-spirv/lib/SPIRV/SPIRVToLLVMDbgTran.cpp b/llvm-spirv/lib/SPIRV/SPIRVToLLVMDbgTran.cpp
index 8867c5d1a715f..ccf00488df6f5 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVToLLVMDbgTran.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVToLLVMDbgTran.cpp
@@ -548,7 +548,7 @@ DINode *SPIRVToLLVMDbgTran::transFunction(const SPIRVExtInst *DebugInst) {
     SPIRVFunction *BF = static_cast<SPIRVFunction *>(E);
     llvm::Function *F = SPIRVReader->transFunction(BF);
     assert(F && "Translation of function failed!");
-    if (!F->hasMetadata())
+    if (!F->hasMetadata("dbg"))
       F->setMetadata("dbg", DIS);
   }
   return DIS;
@@ -657,7 +657,7 @@ MDNode *SPIRVToLLVMDbgTran::transGlobalVariable(const SPIRVExtInst *DebugInst) {
     SPIRVValue *V = BM->get<SPIRVValue>(Ops[VariableIdx]);
     Value *Var = SPIRVReader->transValue(V, nullptr, nullptr);
     llvm::GlobalVariable *GV = dyn_cast_or_null<llvm::GlobalVariable>(Var);
-    if (GV && !GV->hasMetadata())
+    if (GV && !GV->hasMetadata("dbg"))
       GV->addMetadata("dbg", *VarDecl);
   }
   return VarDecl;
diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
index 79b18d916cea1..2a23f36310e96 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
@@ -106,6 +106,15 @@ static void foreachKernelArgMD(
   }
 }
 
+static void foreachKernelArgMD(
+    MDNode *MD, SPIRVFunction *BF,
+    std::function<void(Metadata *MDOp, SPIRVFunctionParameter *BA)> Func) {
+  for (unsigned I = 0, E = MD->getNumOperands(); I != E; ++I) {
+    SPIRVFunctionParameter *BA = BF->getArgument(I);
+    Func(MD->getOperand(I), BA);
+  }
+}
+
 static SPIRVMemoryModelKind getMemoryModel(Module &M) {
   auto *MemoryModelMD = M.getNamedMetadata(kSPIRVMD::MemoryModel);
   if (MemoryModelMD && (MemoryModelMD->getNumOperands() > 0)) {
@@ -2053,6 +2062,138 @@ void addFuncPointerCallArgumentAttributes(CallInst *CI,
   }
 }
 
+#define ONE_STRING_DECORATION_CASE(NAME, NAMESPACE)                            \
+  case NAMESPACE::Decoration##NAME: {                                          \
+    assert(NumOperands == 2 && #NAME " requires exactly 1 extra operand");     \
+    auto *StrDecoEO = dyn_cast<MDString>(DecoMD->getOperand(1));               \
+    assert(StrDecoEO &&#NAME " requires extra operand to be a string");        \
+    Target->addDecorate(                                                       \
+        new SPIRVDecorate##NAME##Attr(Target, StrDecoEO->getString().str()));  \
+    break;                                                                     \
+  }
+
+#define ONE_INT_DECORATION_CASE(NAME, NAMESPACE, TYPE)                         \
+  case NAMESPACE::Decoration##NAME: {                                          \
+    assert(NumOperands == 2 && #NAME " requires exactly 1 extra operand");     \
+    auto *IntDecoEO =                                                          \
+        mdconst::dyn_extract<ConstantInt>(DecoMD->getOperand(1));              \
+    assert(IntDecoEO &&#NAME " requires extra operand to be an integer");      \
+    Target->addDecorate(new SPIRVDecorate##NAME(                               \
+        Target, static_cast<TYPE>(IntDecoEO->getZExtValue())));                \
+    break;                                                                     \
+  }
+
+#define TWO_INT_DECORATION_CASE(NAME, NAMESPACE, TYPE1, TYPE2)                 \
+  case NAMESPACE::Decoration##NAME: {                                          \
+    assert(NumOperands == 3 && #NAME " requires exactly 2 extra operand");     \
+    auto *IntDecoEO1 =                                                         \
+        mdconst::dyn_extract<ConstantInt>(DecoMD->getOperand(1));              \
+    assert(IntDecoEO1 &&#NAME                                                  \
+           " requires first extra operand to be an integer");                  \
+    auto *IntDecoEO2 =                                                         \
+        mdconst::dyn_extract<ConstantInt>(DecoMD->getOperand(2));              \
+    assert(IntDecoEO2 &&#NAME                                                  \
+           " requires second extra operand to be an integer");                 \
+    Target->addDecorate(new SPIRVDecorate##NAME(                               \
+        Target, static_cast<TYPE1>(IntDecoEO1->getZExtValue()),                \
+        static_cast<TYPE2>(IntDecoEO2->getZExtValue())));                      \
+    break;                                                                     \
+  }
+
+static void transMetadataDecorations(Metadata *MD, SPIRVEntry *Target) {
+  auto *ArgDecoMD = dyn_cast<MDNode>(MD);
+  assert(ArgDecoMD && "Decoration list must be a metadata node");
+  for (unsigned I = 0, E = ArgDecoMD->getNumOperands(); I != E; ++I) {
+    auto *DecoMD = dyn_cast<MDNode>(ArgDecoMD->getOperand(I));
+    assert(DecoMD && "Decoration does not name metadata");
+    assert(DecoMD->getNumOperands() > 0 &&
+           "Decoration metadata must have at least one operand");
+    auto *DecoKindConst =
+        mdconst::dyn_extract<ConstantInt>(DecoMD->getOperand(0));
+    assert(DecoKindConst && "First operand of decoration must be the kind");
+    auto DecoKind = static_cast<Decoration>(DecoKindConst->getZExtValue());
+
+    const size_t NumOperands = DecoMD->getNumOperands();
+    switch (static_cast<size_t>(DecoKind)) {
+      ONE_STRING_DECORATION_CASE(MemoryINTEL, spv)
+      ONE_STRING_DECORATION_CASE(UserSemantic, spv)
+      ONE_INT_DECORATION_CASE(AliasScopeINTEL, spv::internal, SPIRVId)
+      ONE_INT_DECORATION_CASE(NoAliasINTEL, spv::internal, SPIRVId)
+      ONE_INT_DECORATION_CASE(InitiationIntervalINTEL, spv::internal, SPIRVWord)
+      ONE_INT_DECORATION_CASE(MaxConcurrencyINTEL, spv::internal, SPIRVWord)
+      ONE_INT_DECORATION_CASE(PipelineEnableINTEL, spv::internal, SPIRVWord)
+      TWO_INT_DECORATION_CASE(FunctionRoundingModeINTEL, spv, SPIRVWord,
+                              FPRoundingMode);
+      TWO_INT_DECORATION_CASE(FunctionDenormModeINTEL, spv, SPIRVWord,
+                              FPDenormMode);
+      TWO_INT_DECORATION_CASE(FunctionFloatingPointModeINTEL, spv, SPIRVWord,
+                              FPOperationMode);
+      TWO_INT_DECORATION_CASE(FuseLoopsInFunctionINTEL, spv, SPIRVWord,
+                              SPIRVWord);
+      TWO_INT_DECORATION_CASE(MathOpDSPModeINTEL, spv::internal, SPIRVWord,
+                              SPIRVWord);
+    case DecorationStallEnableINTEL: {
+      Target->addDecorate(new SPIRVDecorateStallEnableINTEL(Target));
+      break;
+    }
+    case DecorationMergeINTEL: {
+      assert(NumOperands == 3 && "MergeINTEL requires exactly 3 extra operand");
+      auto *Name = dyn_cast<MDString>(DecoMD->getOperand(1));
+      assert(Name && "MergeINTEL requires first extra operand to be a string");
+      auto *Direction = dyn_cast<MDString>(DecoMD->getOperand(2));
+      assert(Direction &&
+             "MergeINTEL requires second extra operand to be a string");
+      Target->addDecorate(new SPIRVDecorateMergeINTELAttr(
+          Target, Name->getString().str(), Direction->getString().str()));
+      break;
+    }
+    case DecorationLinkageAttributes: {
+      assert(NumOperands == 3 &&
+             "LinkageAttributes requires exactly 3 extra operand");
+      auto *Name = dyn_cast<MDString>(DecoMD->getOperand(1));
+      assert(Name &&
+             "LinkageAttributes requires first extra operand to be a string");
+      auto *Type = mdconst::dyn_extract<ConstantInt>(DecoMD->getOperand(2));
+      assert(Type &&
+             "LinkageAttributes requires second extra operand to be an int");
+      auto TypeKind = static_cast<SPIRVLinkageTypeKind>(Type->getZExtValue());
+      Target->addDecorate(new SPIRVDecorateLinkageAttr(
+          Target, Name->getString().str(), TypeKind));
+      break;
+    }
+    default: {
+      if (NumOperands == 1) {
+        Target->addDecorate(new SPIRVDecorate(DecoKind, Target));
+        break;
+      }
+
+      auto *DecoValEO1 =
+          mdconst::dyn_extract<ConstantInt>(DecoMD->getOperand(1));
+      assert(DecoValEO1 &&
+             "First extra operand in default decoration case must be integer.");
+      if (NumOperands == 2) {
+        Target->addDecorate(
+            new SPIRVDecorate(DecoKind, Target, DecoValEO1->getZExtValue()));
+        break;
+      }
+
+      auto *DecoValEO2 =
+          mdconst::dyn_extract<ConstantInt>(DecoMD->getOperand(2));
+      assert(DecoValEO2 &&
+             "First extra operand in default decoration case must be integer.");
+      assert(NumOperands == 3 && "At most 2 extra operands expected.");
+      Target->addDecorate(new SPIRVDecorate(DecoKind, Target,
+                                            DecoValEO1->getZExtValue(),
+                                            DecoValEO2->getZExtValue()));
+    }
+    }
+  }
+}
+
+#undef ONE_STRING_DECORATION_CASE
+#undef ONE_INT_DECORATION_CASE
+#undef TWO_INT_DECORATION_CASE
+
 bool LLVMToSPIRVBase::transDecoration(Value *V, SPIRVValue *BV) {
   if (!transAlign(V, BV))
     return false;
@@ -2118,6 +2259,10 @@ bool LLVMToSPIRVBase::transDecoration(Value *V, SPIRVValue *BV) {
       addFuncPointerCallArgumentAttributes(CI, BV);
   }
 
+  if (auto *GV = dyn_cast<GlobalVariable>(V))
+    if (auto *GVDecoMD = GV->getMetadata(SPIRV_MD_DECORATIONS))
+      transMetadataDecorations(GVDecoMD, BV);
+
   return true;
 }
 
@@ -4123,6 +4268,8 @@ bool LLVMToSPIRVBase::transOCLMetadata() {
             BM->setName(BA, Str);
           });
     }
+    if (auto *KernArgDecoMD = F.getMetadata(SPIRV_MD_PARAMETER_DECORATIONS))
+      foreachKernelArgMD(KernArgDecoMD, BF, transMetadataDecorations);
   }
   return true;
 }
diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.cpp b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.cpp
index 443b5a8a40f5d..cc1135e6cd58d 100644
--- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.cpp
+++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.cpp
@@ -441,6 +441,14 @@ SPIRVEntry::getDecorations(Decoration Kind) const {
   return Decors;
 }
 
+std::vector<SPIRVDecorate const *> SPIRVEntry::getDecorations() const {
+  std::vector<SPIRVDecorate const *> Decors;
+  Decors.reserve(Decorates.size());
+  for (auto &DecoPair : Decorates)
+    Decors.push_back(DecoPair.second);
+  return Decors;
+}
+
 std::set<SPIRVId> SPIRVEntry::getDecorateId(Decoration Kind,
                                             size_t Index) const {
   auto Range = DecorateIds.equal_range(Kind);
diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.h
index acb2aab1f4ab4..6c94c27a1f8e0 100644
--- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.h
+++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEntry.h
@@ -297,6 +297,7 @@ class SPIRVEntry {
     return {};
   }
   const std::string &getName() const { return Name; }
+  size_t getNumDecorations() const { return Decorates.size(); }
   bool hasDecorate(Decoration Kind, size_t Index = 0,
                    SPIRVWord *Result = 0) const;
   bool hasDecorateId(Decoration Kind, size_t Index = 0,
@@ -314,6 +315,7 @@ class SPIRVEntry {
                                    SPIRVWord MemberNumber) const;
   std::set<SPIRVWord> getDecorate(Decoration Kind, size_t Index = 0) const;
   std::vector<SPIRVDecorate const *> getDecorations(Decoration Kind) const;
+  std::vector<SPIRVDecorate const *> getDecorations() const;
   std::set<SPIRVId> getDecorateId(Decoration Kind, size_t Index = 0) const;
   std::vector<SPIRVDecorateId const *> getDecorationIds(Decoration Kind) const;
   bool hasId() const { return !(Attrib & SPIRVEA_NOID); }
diff --git a/llvm-spirv/test/spirv_global_variable_decoration.ll b/llvm-spirv/test/spirv_global_variable_decoration.ll
new file mode 100644
index 0000000000000..a0212bf52d8ea
--- /dev/null
+++ b/llvm-spirv/test/spirv_global_variable_decoration.ll
@@ -0,0 +1,38 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: llvm-spirv %t.bc -o %t.spv
+; RUN: llvm-spirv -r %t.spv --spirv-target-env=SPV-IR -o %t.rev.bc
+; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-SPV-IR
+; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
+
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir"
+
+@v1 = addrspace(1) global i32 42, !spirv.Decorations !2
+@v2 = addrspace(1) global float 1.0, !spirv.Decorations !4
+
+; CHECK-SPIRV: Decorate [[PId1:[0-9]+]] Constant
+; CHECK-SPIRV: Decorate [[PId2:[0-9]+]] Constant
+; CHECK-SPIRV: Decorate [[PId2]] Binding 1
+; CHECK-SPIRV: Variable {{[0-9]+}} [[PId1]]
+; CHECK-SPIRV: Variable {{[0-9]+}} [[PId2]]
+
+!1 = !{i32 22}
+!2 = !{!1}
+!3 = !{i32 33, i32 1}
+!4 = !{!1, !3}
+
+; CHECK-SPV-IR: @v1 = addrspace(1) constant i32 42, !spirv.Decorations ![[Var1DecosId:[0-9]+]]
+; CHECK-SPV-IR: @v2 = addrspace(1) constant float 1.000000e+00, !spirv.Decorations ![[Var2DecosId:[0-9]+]]
+; CHECK-SPV-IR-DAG: ![[Var1DecosId]] = !{![[Deco1Id:[0-9]+]], ![[LinkageDeco1Id:[0-9]+]]}
+; CHECK-SPV-IR-DAG: ![[Var2DecosId]] = !{![[Deco1Id]], ![[Deco2Id:[0-9]+]], ![[LinkageDeco2Id:[0-9]+]]}
+; CHECK-SPV-IR-DAG: ![[Deco1Id]] = !{i32 22}
+; CHECK-SPV-IR-DAG: ![[Deco2Id]] = !{i32 33, i32 1}
+; CHECK-SPV-IR-DAG: ![[LinkageDeco1Id]] = !{i32 41, !"v1", i32 0}
+; CHECK-SPV-IR-DAG: ![[LinkageDeco2Id]] = !{i32 41, !"v2", i32 0}
+
+; CHECK-LLVM-NOT: @v1 = {{.*}}, !spirv.Decorations !{{[0-9]+}}
+; CHECK-LLVM-NOT: @v2 = {{.*}}, !spirv.Decorations !{{[0-9]+}}
+; CHECK-LLVM: @v1 = addrspace(1) constant i32 42
+; CHECK-LLVM: @v2 = addrspace(1) constant float 1.000000e+00
diff --git a/llvm-spirv/test/spirv_param_decorations.ll b/llvm-spirv/test/spirv_param_decorations.ll
new file mode 100644
index 0000000000000..10477ce0b4ec0
--- /dev/null
+++ b/llvm-spirv/test/spirv_param_decorations.ll
@@ -0,0 +1,55 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: llvm-spirv %t.bc -o %t.spv
+; RUN: llvm-spirv -r %t.spv --spirv-target-env=SPV-IR -o %t.rev.bc
+; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-SPV-IR
+; RUN: llvm-spirv -r %t.spv --spirv-target-env=SPV-IR -o %t.rev.bc
+; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
+
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir"
+
+; Function Attrs: convergent nounwind
+define spir_kernel void @k(float %a, float %b, float %c) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_type_qual !7 !kernel_arg_base_type !6 !spirv.ParameterDecorations !14 {
+entry:
+  ret void
+}
+
+; CHECK-SPIRV: Decorate [[PId1:[0-9]+]] Restrict
+; CHECK-SPIRV: Decorate [[PId1]] FPRoundingMode 2
+; CHECK-SPIRV: Decorate [[PId2:[0-9]+]] Volatile
+; CHECK-SPIRV: FunctionParameter {{[0-9]+}} [[PId1]]
+; CHECK-SPIRV: FunctionParameter {{[0-9]+}} [[PId2]]
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 0}
+!2 = !{i32 1, i32 2}
+!3 = !{!"clang version 14.0.0"}
+!4 = !{i32 0, i32 0, i32 0}
+!5 = !{!"none", !"none", !"none"}
+!6 = !{!"float", !"float", !"float"}
+!7 = !{!"", !"", !""}
+!8 = !{i32 19}
+!9 = !{i32 39, i32 2}
+!10 = !{i32 21}
+!11 = !{!8, !9}
+!12 = !{}
+!13 = !{!10}
+!14 = !{!11, !12, !13}
+
+; CHECK-SPV-IR: define spir_kernel void @k(float %a, float %b, float %c) {{.*}} !spirv.ParameterDecorations ![[ParamDecoListId:[0-9]+]] {
+; CHECK-SPV-IR-DAG: ![[ParamDecoListId]] = !{![[Param1DecoId:[0-9]+]], ![[Param2DecoId:[0-9]+]], ![[Param3DecoId:[0-9]+]]}
+; CHECK-SPV-IR-DAG: ![[Param1DecoId]] = !{![[Deco1Id:[0-9]+]], ![[Deco2Id:[0-9]+]]}
+; CHECK-SPV-IR-DAG: ![[Param2DecoId]] = !{}
+; CHECK-SPV-IR-DAG: ![[Param3DecoId]] = !{![[Deco3Id:[0-9]+]]}
+; CHECK-SPV-IR-DAG: ![[Deco1Id]] = !{i32 19}
+; CHECK-SPV-IR-DAG: ![[Deco2Id]] = !{i32 39, i32 2}
+; CHECK-SPV-IR-DAG: ![[Deco3Id]] = !{i32 21}
+
+; CHECK-LLVM-NOT: define spir_kernel void @k(float %a, float %b, float %c) {{.*}} !spirv.ParameterDecorations ![[ParamDecoListId:[0-9]+]] {
+; CHECK-LLVM: define spir_kernel void @k(float %a, float %b, float %c) {{.*}} {
diff --git a/llvm-spirv/test/spirv_param_decorations_quals.ll b/llvm-spirv/test/spirv_param_decorations_quals.ll
new file mode 100644
index 0000000000000..ca35ee9a2fe35
--- /dev/null
+++ b/llvm-spirv/test/spirv_param_decorations_quals.ll
@@ -0,0 +1,48 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv %t.bc -spirv-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+; RUN: llvm-spirv %t.bc -o %t.spv
+; RUN: llvm-spirv -r %t.spv --spirv-target-env=SPV-IR -o %t.rev.bc
+; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-SPV-IR
+; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
+
+target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
+target triple = "spir"
+
+; Function Attrs: convergent nounwind
+define spir_kernel void @k(i32 addrspace(1)* %a) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_type_qual !7 !kernel_arg_base_type !6 !spirv.ParameterDecorations !10 {
+entry:
+  ret void
+}
+
+; CHECK-SPIRV: Decorate [[PId:[0-9]+]] Volatile
+; CHECK-SPIRV: Decorate [[PId]] FuncParamAttr 4
+; CHECK-SPIRV: FunctionParameter {{[0-9]+}} [[PId]]
+
+!llvm.module.flags = !{!0}
+!opencl.ocl.version = !{!1}
+!opencl.spir.version = !{!2}
+!llvm.ident = !{!3}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, i32 0}
+!2 = !{i32 1, i32 2}
+!3 = !{!"clang version 14.0.0"}
+!4 = !{i32 0, i32 0, i32 0}
+!5 = !{!"none"}
+!6 = !{!"int*"}
+!7 = !{!"volatile"}
+!8 = !{i32 38, i32 4} ; FuncParamAttr NoAlias
+!9 = !{!8}
+!10 = !{!9}
+
+; CHECK-SPV-IR: define spir_kernel void @k(i32 addrspace(1)* noalias %a) {{.*}} !kernel_arg_type_qual ![[KernelArgTypeQual:[0-9]+]] {{.*}} !spirv.ParameterDecorations ![[ParamDecoListId:[0-9]+]] {
+; CHECK-SPV-IR-DAG: ![[ParamDecoListId]] = !{![[ParamDecoId:[0-9]+]]}
+; CHECK-SPV-IR-DAG: ![[ParamDecoId]] = !{![[VolatileDecoId:[0-9]+]], ![[NoAliasDecoId:[0-9]+]]}
+; CHECK-SPV-IR-DAG: ![[NoAliasDecoId]] = !{i32 38, i32 4}
+; CHECK-SPV-IR-DAG: ![[VolatileDecoId]] = !{i32 21}
+; CHECK-SPV-IR-DAG: ![[KernelArgTypeQual]] = !{!"volatile restrict"}
+
+; CHECK-LLVM-NOT: define spir_kernel void @k({{.*}}) {{.*}} !spirv.ParameterDecorations ![[ParamDecoListId:[0-9]+]] {
+; CHECK-LLVM: define spir_kernel void @k(i32 addrspace(1)* noalias %a) {{.*}} !kernel_arg_type_qual ![[KernelArgTypeQual:[0-9]+]] {{.*}} {
+; CHECK-LLVM-DAG: ![[KernelArgTypeQual]] = !{!"volatile restrict"}

From dde90d01a0484c0a8add0d624d335abb89fa948a Mon Sep 17 00:00:00 2001
From: Alexey Sotkin <alexey.sotkin@intel.com>
Date: Thu, 27 Jan 2022 16:34:56 +0300
Subject: [PATCH 937/946] Add support for element-wise operations on joint
 matrix

Signed-off-by: Alexey Sotkin <alexey.sotkin@intel.com>

Original commit:
https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/ada2fd4
---
 llvm-spirv/lib/SPIRV/SPIRVReader.cpp          |  23 ++--
 llvm-spirv/lib/SPIRV/SPIRVWriter.cpp          |   8 ++
 .../lib/SPIRV/libSPIRV/SPIRVInstruction.h     |  15 ++-
 .../SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h  |   2 +
 llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.cpp   |   8 +-
 llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h     |  10 +-
 .../lib/SPIRV/libSPIRV/spirv_internal.hpp     |   3 +-
 .../joint_matrix_element.ll                   | 112 ++++++++++++++++++
 8 files changed, 162 insertions(+), 19 deletions(-)
 create mode 100644 llvm-spirv/test/transcoding/SPV_INTEL_joint_matrix/joint_matrix_element.ll

diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
index 94adcab6ebf8b..22da537da1ffd 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
@@ -2160,10 +2160,14 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F,
   }
 
   case OpVectorExtractDynamic: {
-    auto CE = static_cast<SPIRVVectorExtractDynamic *>(BV);
+    auto *VED = static_cast<SPIRVVectorExtractDynamic *>(BV);
+    SPIRVValue *Vec = VED->getVector();
+    if (Vec->getType()->getOpCode() == internal::OpTypeJointMatrixINTEL) {
+      return mapValue(BV, transSPIRVBuiltinFromInst(VED, BB));
+    }
     return mapValue(
-        BV, ExtractElementInst::Create(transValue(CE->getVector(), F, BB),
-                                       transValue(CE->getIndex(), F, BB),
+        BV, ExtractElementInst::Create(transValue(Vec, F, BB),
+                                       transValue(VED->getIndex(), F, BB),
                                        BV->getName(), BB));
   }
 
@@ -2189,12 +2193,15 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F,
   }
 
   case OpVectorInsertDynamic: {
-    auto CI = static_cast<SPIRVVectorInsertDynamic *>(BV);
+    auto *VID = static_cast<SPIRVVectorInsertDynamic *>(BV);
+    SPIRVValue *Vec = VID->getVector();
+    if (Vec->getType()->getOpCode() == internal::OpTypeJointMatrixINTEL) {
+      return mapValue(BV, transSPIRVBuiltinFromInst(VID, BB));
+    }
     return mapValue(
-        BV, InsertElementInst::Create(transValue(CI->getVector(), F, BB),
-                                      transValue(CI->getComponent(), F, BB),
-                                      transValue(CI->getIndex(), F, BB),
-                                      BV->getName(), BB));
+        BV, InsertElementInst::Create(
+                transValue(Vec, F, BB), transValue(VID->getComponent(), F, BB),
+                transValue(VID->getIndex(), F, BB), BV->getName(), BB));
   }
 
   case OpVectorShuffle: {
diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
index 2a23f36310e96..d475464bd977a 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
@@ -4334,6 +4334,14 @@ LLVMToSPIRVBase::transBuiltinToInstWithoutDecoration(Op OC, CallInst *CI,
     return BM->addAsyncGroupCopy(BArgs[0], BArgs[1], BArgs[2], BArgs[3],
                                  BArgs[4], BArgs[5], BB);
   } break;
+  case OpVectorExtractDynamic: {
+    auto BArgs = transValue(getArguments(CI), BB);
+    return BM->addVectorExtractDynamicInst(BArgs[0], BArgs[1], BB);
+  } break;
+  case OpVectorInsertDynamic: {
+    auto BArgs = transValue(getArguments(CI), BB);
+    return BM->addVectorInsertDynamicInst(BArgs[0], BArgs[1], BArgs[2], BB);
+  } break;
   case OpSampledImage: {
     // Clang can generate SPIRV-friendly call for OpSampledImage instruction,
     // i.e. __spirv_SampledImage... But it can't generate correct return type
diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h
index 9625a26ad5867..3cd774c94d65e 100644
--- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h
+++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h
@@ -2092,6 +2092,9 @@ class SPIRVVectorExtractDynamic : public SPIRVInstruction {
 
   SPIRVValue *getVector() { return getValue(VectorId); }
   SPIRVValue *getIndex() const { return getValue(IndexId); }
+  std::vector<SPIRVValue *> getOperands() override {
+    return {getVector(), getIndex()};
+  }
 
 protected:
   _SPIRV_DEF_ENCDEC4(Type, Id, VectorId, IndexId)
@@ -2099,7 +2102,8 @@ class SPIRVVectorExtractDynamic : public SPIRVInstruction {
     SPIRVInstruction::validate();
     if (getValue(VectorId)->isForward())
       return;
-    assert(getValueType(VectorId)->isTypeVector());
+    assert(getValueType(VectorId)->isTypeVector() ||
+           getValueType(VectorId)->isTypeJointMatrixINTEL());
   }
   SPIRVId VectorId;
   SPIRVId IndexId;
@@ -2124,8 +2128,11 @@ class SPIRVVectorInsertDynamic : public SPIRVInstruction {
         IndexId(SPIRVID_INVALID), ComponentId(SPIRVID_INVALID) {}
 
   SPIRVValue *getVector() { return getValue(VectorId); }
-  SPIRVValue *getIndex() const { return getValue(IndexId); }
   SPIRVValue *getComponent() { return getValue(ComponentId); }
+  SPIRVValue *getIndex() const { return getValue(IndexId); }
+  std::vector<SPIRVValue *> getOperands() override {
+    return {getVector(), getComponent(), getIndex()};
+  }
 
 protected:
   _SPIRV_DEF_ENCDEC5(Type, Id, VectorId, ComponentId, IndexId)
@@ -2133,7 +2140,8 @@ class SPIRVVectorInsertDynamic : public SPIRVInstruction {
     SPIRVInstruction::validate();
     if (getValue(VectorId)->isForward())
       return;
-    assert(getValueType(VectorId)->isTypeVector());
+    assert(getValueType(VectorId)->isTypeVector() ||
+           getValueType(VectorId)->isTypeJointMatrixINTEL());
   }
   SPIRVId VectorId;
   SPIRVId IndexId;
@@ -3292,6 +3300,7 @@ class SPIRVJointMatrixINTELInst : public SPIRVJointMatrixINTELInstBase {
 _SPIRV_OP(JointMatrixLoad, true, 6, true)
 _SPIRV_OP(JointMatrixStore, false, 5, true)
 _SPIRV_OP(JointMatrixMad, true, 7)
+_SPIRV_OP(JointMatrixWorkItemLength, true, 4)
 #undef _SPIRV_OP
 } // namespace SPIRV
 
diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h
index 4a97deee82b57..673877867e247 100644
--- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h
+++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVOpCodeEnumInternal.h
@@ -12,3 +12,5 @@ _SPIRV_OP_INTERNAL(TypeJointMatrixINTEL, internal::OpTypeJointMatrixINTEL)
 _SPIRV_OP_INTERNAL(JointMatrixLoadINTEL, internal::OpJointMatrixLoadINTEL)
 _SPIRV_OP_INTERNAL(JointMatrixStoreINTEL, internal::OpJointMatrixStoreINTEL)
 _SPIRV_OP_INTERNAL(JointMatrixMadINTEL, internal::OpJointMatrixMadINTEL)
+_SPIRV_OP_INTERNAL(JointMatrixWorkItemLengthINTEL,
+                   internal::OpJointMatrixWorkItemLengthINTEL)
diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.cpp b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.cpp
index 85d65fbe9102f..a478d71793772 100644
--- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.cpp
+++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.cpp
@@ -111,8 +111,12 @@ SPIRVWord SPIRVType::getVectorComponentCount() const {
 }
 
 SPIRVType *SPIRVType::getVectorComponentType() const {
-  assert(OpCode == OpTypeVector && "Not vector type");
-  return static_cast<const SPIRVTypeVector *>(this)->getComponentType();
+  if (OpCode == OpTypeVector)
+    return static_cast<const SPIRVTypeVector *>(this)->getComponentType();
+  if (OpCode == internal::OpTypeJointMatrixINTEL)
+    return static_cast<const SPIRVTypeJointMatrixINTEL *>(this)->getCompType();
+  assert(0 && "getVectorComponentType(): Not a vector or joint matrix type");
+  return nullptr;
 }
 
 SPIRVWord SPIRVType::getMatrixColumnCount() const {
diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h
index 84cae518ec249..5870aa04681d8 100644
--- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h
+++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVType.h
@@ -1081,11 +1081,11 @@ class SPIRVTypeJointMatrixINTEL : public SPIRVType {
   SPIRVCapVec getRequiredCapability() const override {
     return {internal::CapabilityJointMatrixINTEL};
   }
-  SPIRVType *getCompType() { return CompType; }
-  SPIRVValue *getLayout() { return Layout; }
-  SPIRVValue *getRows() { return Rows; }
-  SPIRVValue *getColumns() { return Columns; }
-  SPIRVValue *getScope() { return Scope; }
+  SPIRVType *getCompType() const { return CompType; }
+  SPIRVValue *getLayout() const { return Layout; }
+  SPIRVValue *getRows() const { return Rows; }
+  SPIRVValue *getColumns() const { return Columns; }
+  SPIRVValue *getScope() const { return Scope; }
 };
 
 } // namespace SPIRV
diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/spirv_internal.hpp b/llvm-spirv/lib/SPIRV/libSPIRV/spirv_internal.hpp
index 1ea0e4ac27ee8..2fc2c1c727aa6 100644
--- a/llvm-spirv/lib/SPIRV/libSPIRV/spirv_internal.hpp
+++ b/llvm-spirv/lib/SPIRV/libSPIRV/spirv_internal.hpp
@@ -46,6 +46,7 @@ enum InternalOp {
   IOpJointMatrixStoreINTEL = 6121,
   IOpJointMatrixMadINTEL = 6122,
   IOpArithmeticFenceINTEL = 6145,
+  IOpJointMatrixWorkItemLengthINTEL = 6410,
   IOpPrev = OpMax - 2,
   IOpForward
 };
@@ -108,7 +109,7 @@ _SPIRV_OP(Op, TypeJointMatrixINTEL)
 _SPIRV_OP(Op, JointMatrixLoadINTEL)
 _SPIRV_OP(Op, JointMatrixStoreINTEL)
 _SPIRV_OP(Op, JointMatrixMadINTEL)
-
+_SPIRV_OP(Op, JointMatrixWorkItemLengthINTEL)
 _SPIRV_OP(Capability, HWThreadQueryINTEL)
 _SPIRV_OP(BuiltIn, SubDeviceIDINTEL)
 _SPIRV_OP(BuiltIn, GlobalHWThreadIDINTEL)
diff --git a/llvm-spirv/test/transcoding/SPV_INTEL_joint_matrix/joint_matrix_element.ll b/llvm-spirv/test/transcoding/SPV_INTEL_joint_matrix/joint_matrix_element.ll
new file mode 100644
index 0000000000000..11db9a064cee4
--- /dev/null
+++ b/llvm-spirv/test/transcoding/SPV_INTEL_joint_matrix/joint_matrix_element.ll
@@ -0,0 +1,112 @@
+; RUN: llvm-as < %s -o %t.bc
+; RUN: llvm-spirv %t.bc -spirv-ext=+all -o %t.spv
+; RUN: llvm-spirv %t.spv -to-text -o - | FileCheck %s --check-prefix=CHECK-SPIRV
+
+; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o - | FileCheck %s --check-prefix=CHECK-LLVM
+
+; CHECK-SPIRV: Capability JointMatrixINTEL
+; CHECK-SPIRV: Extension "SPV_INTEL_joint_matrix"
+; CHECK-SPIRV: TypeInt [[#TypeInt:]] 64
+; CHECK-SPIRV: TypeFloat [[#TypeFloat:]] 32
+; CHECK-SPIRV: TypeJointMatrixINTEL [[#TypeMatrix:]] [[#TypeFloat]] [[#]] [[#]] [[#]] [[#]]
+; CHECK-SPIRV: Phi [[#TypeMatrix]] [[#Matrix:]]
+; CHECK-SPIRV: JointMatrixWorkItemLengthINTEL [[#TypeInt]] [[#]] [[#Matrix]]
+; CHECK-SPIRV: VectorExtractDynamic [[#TypeFloat]] [[#]] [[#Matrix]] [[#Index:]]
+; CHECK-SPIRV: FMul [[#TypeFloat]] [[#NewVal:]] [[#]] [[#]]
+; CHECK-SPIRV: VectorInsertDynamic [[#TypeMatrix]] [[#]] [[#Matrix]] [[#NewVal]] [[#Index]]
+
+; CHECK-LLVM: [[Length:%.*]] = call spir_func i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELPU3AS141__spirv_JointMatrixINTEL__float_16_16_0_3(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(1)* [[Matrix:%.*]])
+; CHECK-LLVM: [[Elem:%.*]] = call spir_func float @_Z28__spirv_VectorExtractDynamicPU3AS141__spirv_JointMatrixINTEL__float_16_16_0_3l(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(1)* [[Matrix]], i64 [[Index:%.*]])
+; CHECK-LLVM: [[NewVal:%.*]] = fmul float [[Elem]], 5.000000e+00
+; CHECK-LLVM: {{%.*}} = call spir_func %spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(1)* @_Z27__spirv_VectorInsertDynamicPU3AS141__spirv_JointMatrixINTEL__float_16_16_0_3fl(%spirv.JointMatrixINTEL._float_16_16_0_3 addrspace(1)* [[Matrix]], float [[NewVal]], i64 [[Index]])
+
+source_filename = "/work/tmp/matrix-slice.cpp"
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+%"struct.cl::sycl::detail::AssertHappened" = type { i32, [257 x i8], [257 x i8], [129 x i8], i32, i64, i64, i64, i64, i64, i64 }
+%"class.cl::sycl::range" = type { %"class.cl::sycl::detail::array" }
+%"class.cl::sycl::detail::array" = type { [1 x i64] }
+%"class.cl::sycl::id" = type { %"class.cl::sycl::detail::array" }
+%"struct.__spv::__spirv_JointMatrixINTEL" = type { [16 x [16 x [1 x [4 x float]]]] addrspace(4)* }
+
+$_ZTSN2cl4sycl6detail23__sycl_service_kernel__16AssertInfoCopierE = comdat any
+
+$_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E6matrix = comdat any
+
+; Function Attrs: convergent norecurse
+define weak_odr dso_local spir_kernel void @_ZTSN2cl4sycl6detail23__sycl_service_kernel__16AssertInfoCopierE(%"struct.cl::sycl::detail::AssertHappened" addrspace(1)* %_arg_, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %_arg_1, %"class.cl::sycl::range"* byval(%"class.cl::sycl::range") align 8 %_arg_2, %"class.cl::sycl::id"* byval(%"class.cl::sycl::id") align 8 %_arg_3) local_unnamed_addr #0 comdat !kernel_arg_buffer_location !5 {
+entry:
+  %0 = getelementptr inbounds %"class.cl::sycl::id", %"class.cl::sycl::id"* %_arg_3, i64 0, i32 0, i32 0, i64 0
+  %1 = addrspacecast i64* %0 to i64 addrspace(4)*
+  %2 = load i64, i64 addrspace(4)* %1, align 8
+  %add.ptr.i = getelementptr inbounds %"struct.cl::sycl::detail::AssertHappened", %"struct.cl::sycl::detail::AssertHappened" addrspace(1)* %_arg_, i64 %2
+  %3 = bitcast %"struct.cl::sycl::detail::AssertHappened" addrspace(1)* %add.ptr.i to i8 addrspace(1)*
+  %4 = addrspacecast i8 addrspace(1)* %3 to i8 addrspace(4)*
+  tail call spir_func void @__devicelib_assert_read(i8 addrspace(4)* %4) #2
+  ret void
+}
+
+; Function Attrs: convergent
+declare extern_weak dso_local spir_func void @__devicelib_assert_read(i8 addrspace(4)*) local_unnamed_addr #1
+
+; Function Attrs: convergent norecurse
+define weak_odr dso_local spir_kernel void @_ZTSZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_E6matrix() local_unnamed_addr #0 comdat !kernel_arg_buffer_location !6 {
+entry:
+  %call9.i.i = tail call spir_func %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(float addrspace(4)* addrspacecast (float addrspace(1)* null to float addrspace(4)*), i64 1, i32 0, i32 3, i32 0) #2
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %for.body.i, %entry
+  %A.sroa.0.0.i = phi %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* [ %call9.i.i, %entry ], [ %call5.i.i, %for.body.i ]
+  %i.0.i = phi i32 [ 0, %entry ], [ %inc.i, %for.body.i ]
+  %conv.i = zext i32 %i.0.i to i64
+  %call.i12.i = tail call spir_func i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEmPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEE(%"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* %A.sroa.0.0.i) #2
+  %cmp.i = icmp ugt i64 %call.i12.i, %conv.i
+  br i1 %cmp.i, label %for.body.i, label %_ZZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_ENKUlNS0_7nd_itemILi2EEEE_clES5_.exit
+
+for.body.i:                                       ; preds = %for.cond.i
+  %call.i.i = tail call spir_func float @_Z28__spirv_VectorExtractDynamicIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EmET_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEET4_(%"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* %A.sroa.0.0.i, i64 %conv.i) #2
+  %mul.i.i = fmul float %call.i.i, 5.000000e+00
+  %call5.i.i = tail call spir_func %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EmEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEES7_T4_S5_(%"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* %A.sroa.0.0.i, float %mul.i.i, i64 %conv.i) #2
+  %inc.i = add nuw nsw i32 %i.0.i, 1
+  br label %for.cond.i, !llvm.loop !7
+
+_ZZZ4mainENKUlRN2cl4sycl7handlerEE_clES2_ENKUlNS0_7nd_itemILi2EEEE_clES5_.exit: ; preds = %for.cond.i
+  tail call spir_func void @_Z29__spirv_JointMatrixStoreINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(float addrspace(4)* addrspacecast (float addrspace(1)* null to float addrspace(4)*), %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* %A.sroa.0.0.i, i64 1, i32 0, i32 3, i32 0) #2
+  ret void
+}
+
+; Function Attrs: convergent
+declare dso_local spir_func %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* @_Z28__spirv_JointMatrixLoadINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEEPS5_mS1_S3_i(float addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare dso_local spir_func i64 @_Z38__spirv_JointMatrixWorkItemLengthINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEmPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEE(%"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)*) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare dso_local spir_func float @_Z28__spirv_VectorExtractDynamicIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EmET_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEET4_(%"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)*, i64) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare dso_local spir_func %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* @_Z27__spirv_VectorInsertDynamicIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EmEPNS0_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEES7_T4_S5_(%"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)*, float, i64) local_unnamed_addr #1
+
+; Function Attrs: convergent
+declare dso_local spir_func void @_Z29__spirv_JointMatrixStoreINTELIfLm16ELm16ELN5__spv12MatrixLayoutE0ELNS0_5Scope4FlagE3EEvPT_PNS0_24__spirv_JointMatrixINTELIS4_XT0_EXT1_EXT2_EXT3_EEEmS1_S3_i(float addrspace(4)*, %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)*, i64, i32, i32, i32) local_unnamed_addr #1
+
+attributes #0 = { convergent norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="/work/tmp/matrix-slice.cpp" "uniform-work-group-size"="true" }
+attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { convergent }
+
+!llvm.module.flags = !{!0, !1}
+!opencl.spir.version = !{!2}
+!spirv.Source = !{!3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"frame-pointer", i32 2}
+!2 = !{i32 1, i32 2}
+!3 = !{i32 4, i32 100000}
+!4 = !{!"clang version 14.0.0 (https://github.com/intel/llvm.git 3648adf79e4fdb619fdbe41d63bc39f456b5be8c)"}
+!5 = !{i32 -1, i32 -1, i32 -1, i32 -1}
+!6 = !{}
+!7 = distinct !{!7, !8}
+!8 = !{!"llvm.loop.mustprogress"}

From c91bca01a96ba6e9e1724ca0091ce0b9a9577347 Mon Sep 17 00:00:00 2001
From: Dmitry Sidorov <dmitry.sidorov@intel.com>
Date: Thu, 27 Jan 2022 18:31:36 +0300
Subject: [PATCH 938/946] [NFC] Fix CHECK strings in the test

Signed-off-by: Dmitry Sidorov <dmitry.sidorov@intel.com>

Original commit:
https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/7f168b2
---
 llvm-spirv/test/spirv_param_decorations_quals.ll | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/llvm-spirv/test/spirv_param_decorations_quals.ll b/llvm-spirv/test/spirv_param_decorations_quals.ll
index ca35ee9a2fe35..fe5866d8e4a1d 100644
--- a/llvm-spirv/test/spirv_param_decorations_quals.ll
+++ b/llvm-spirv/test/spirv_param_decorations_quals.ll
@@ -36,13 +36,15 @@ entry:
 !9 = !{!8}
 !10 = !{!9}
 
-; CHECK-SPV-IR: define spir_kernel void @k(i32 addrspace(1)* noalias %a) {{.*}} !kernel_arg_type_qual ![[KernelArgTypeQual:[0-9]+]] {{.*}} !spirv.ParameterDecorations ![[ParamDecoListId:[0-9]+]] {
+; CHECK-SPV-IR: define spir_kernel void @k(i32 addrspace(1)* %a)
+; CHECK-SPV-IR-SAME: !kernel_arg_type_qual ![[KernelArgTypeQual:[0-9]+]]
+; CHECK-SPV-IR-SAME: !spirv.ParameterDecorations ![[ParamDecoListId:[0-9]+]]
 ; CHECK-SPV-IR-DAG: ![[ParamDecoListId]] = !{![[ParamDecoId:[0-9]+]]}
 ; CHECK-SPV-IR-DAG: ![[ParamDecoId]] = !{![[VolatileDecoId:[0-9]+]], ![[NoAliasDecoId:[0-9]+]]}
 ; CHECK-SPV-IR-DAG: ![[NoAliasDecoId]] = !{i32 38, i32 4}
 ; CHECK-SPV-IR-DAG: ![[VolatileDecoId]] = !{i32 21}
 ; CHECK-SPV-IR-DAG: ![[KernelArgTypeQual]] = !{!"volatile restrict"}
 
-; CHECK-LLVM-NOT: define spir_kernel void @k({{.*}}) {{.*}} !spirv.ParameterDecorations ![[ParamDecoListId:[0-9]+]] {
-; CHECK-LLVM: define spir_kernel void @k(i32 addrspace(1)* noalias %a) {{.*}} !kernel_arg_type_qual ![[KernelArgTypeQual:[0-9]+]] {{.*}} {
+; CHECK-LLVM-NOT: !spirv.ParameterDecorations
+; CHECK-LLVM: define spir_kernel void @k(i32 addrspace(1)* %a) {{.*}} !kernel_arg_type_qual ![[KernelArgTypeQual:[0-9]+]] {{.*}} {
 ; CHECK-LLVM-DAG: ![[KernelArgTypeQual]] = !{!"volatile restrict"}

From bf43d7bea715c5be15a1726c803ba656b6d94e6a Mon Sep 17 00:00:00 2001
From: Nikita <nikita.kornev@intel.com>
Date: Fri, 28 Jan 2022 16:51:09 +0300
Subject: [PATCH 939/946] Fix VectorExtract&Insert Dynamic with sycl half
 translation

This is a patch to fix forward translation of VectorExtractDynamic with sret
argument of SYCL half type & fix reverse translation of VectorInsertDynamic
with component of SYCL half type by adding extra instructions.

Original commit:
https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/ea5ef3f
---
 llvm-spirv/lib/SPIRV/SPIRVInternal.h          |  15 ++
 llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp  |  92 +++++++++++++
 llvm-spirv/lib/SPIRV/SPIRVUtil.cpp            |  32 +++++
 ...rix_extract_insert_element_of_sycl_half.ll | 130 ++++++++++++++++++
 4 files changed, 269 insertions(+)
 create mode 100644 llvm-spirv/test/transcoding/SPV_INTEL_joint_matrix/joint_matrix_extract_insert_element_of_sycl_half.ll

diff --git a/llvm-spirv/lib/SPIRV/SPIRVInternal.h b/llvm-spirv/lib/SPIRV/SPIRVInternal.h
index 727c00af67e12..6fc15280570c0 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVInternal.h
+++ b/llvm-spirv/lib/SPIRV/SPIRVInternal.h
@@ -661,6 +661,8 @@ bool isOCLImageType(llvm::Type *Ty, StringRef *Name = nullptr);
 ///   type name as spirv.BaseTyName.Postfixes.
 bool isSPIRVType(llvm::Type *Ty, StringRef BaseTyName, StringRef *Postfix = 0);
 
+bool isSYCLHalfType(llvm::Type *Ty);
+
 /// Decorate a function name as __spirv_{Name}_
 std::string decorateSPIRVFunction(const std::string &S);
 
@@ -765,6 +767,19 @@ void mutateFunction(
     BuiltinFuncMangleInfo *Mangle = nullptr, AttributeList *Attrs = nullptr,
     bool TakeName = true);
 
+/// Mutate function by change the arguments & the return type.
+/// \param ArgMutate mutates the function arguments.
+/// \param RetMutate mutates the function return value.
+/// \param TakeName Take the original function's name if a new function with
+///   different type needs to be created.
+void mutateFunction(
+    Function *F,
+    std::function<std::string(CallInst *, std::vector<Value *> &, Type *&RetTy)>
+        ArgMutate,
+    std::function<Instruction *(CallInst *)> RetMutate,
+    BuiltinFuncMangleInfo *Mangle = nullptr, AttributeList *Attrs = nullptr,
+    bool TakeName = true);
+
 /// Add a call instruction at \p Pos.
 CallInst *addCallInst(Module *M, StringRef FuncName, Type *RetTy,
                       ArrayRef<Value *> Args, AttributeList *Attrs,
diff --git a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp
index 7000a7b719453..80b5b14890bc3 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVRegularizeLLVM.cpp
@@ -103,6 +103,20 @@ class SPIRVRegularizeLLVMBase {
   void lowerUMulWithOverflow(IntrinsicInst *UMulIntrinsic);
   void buildUMulWithOverflowFunc(Function *UMulFunc);
 
+  // For some cases Clang emits VectorExtractDynamic as:
+  // void @_Z28__spirv_VectorExtractDynamic(<Ty>* sret(<Ty>), jointMatrix, idx);
+  // Instead of:
+  // <Ty> @_Z28__spirv_VectorExtractDynamic(JointMatrix, Idx);
+  // And VectorInsertDynamic as:
+  // @_Z27__spirv_VectorInsertDynamic(jointMatrix, <Ty>* byval(<Ty>), idx);
+  // Instead of:
+  // @_Z27__spirv_VectorInsertDynamic(jointMatrix, <Ty>, idx)
+  // Need to add additional GEP, store and load instructions and mutate called
+  // function to avoid translation failures
+  void expandSYCLHalfUsing(Module *M);
+  void expandVEDWithSYCLHalfSRetArg(Function *F);
+  void expandVIDWithSYCLHalfByValComp(Function *F);
+
   static std::string lowerLLVMIntrinsicName(IntrinsicInst *II);
   void adaptStructTypes(StructType *ST);
   static char ID;
@@ -320,6 +334,83 @@ void SPIRVRegularizeLLVMBase::lowerUMulWithOverflow(
   UMulIntrinsic->setCalledFunction(UMulFunc);
 }
 
+void SPIRVRegularizeLLVMBase::expandVEDWithSYCLHalfSRetArg(Function *F) {
+  auto Attrs = F->getAttributes();
+  Attrs = Attrs.removeParamAttribute(F->getContext(), 0, Attribute::StructRet);
+  std::string Name = F->getName().str();
+  CallInst *OldCall = nullptr;
+  mutateFunction(
+      F,
+      [=, &OldCall](CallInst *CI, std::vector<Value *> &Args, Type *&RetTy) {
+        Args.erase(Args.begin());
+        auto *SRetPtrTy = cast<PointerType>(CI->getOperand(0)->getType());
+        auto *ET = SRetPtrTy->getPointerElementType();
+        RetTy = cast<StructType>(ET)->getElementType(0);
+        OldCall = CI;
+        return Name;
+      },
+      [=, &OldCall](CallInst *NewCI) {
+        IRBuilder<> Builder(OldCall);
+        auto *SRetPtrTy = cast<PointerType>(OldCall->getOperand(0)->getType());
+        auto *ET = SRetPtrTy->getPointerElementType();
+        Value *Target = Builder.CreateStructGEP(ET, OldCall->getOperand(0), 0);
+        return Builder.CreateStore(NewCI, Target);
+      },
+      nullptr, &Attrs, true);
+}
+
+void SPIRVRegularizeLLVMBase::expandVIDWithSYCLHalfByValComp(Function *F) {
+  auto Attrs = F->getAttributes();
+  Attrs = Attrs.removeParamAttribute(F->getContext(), 1, Attribute::ByVal);
+  std::string Name = F->getName().str();
+  mutateFunction(
+      F,
+      [=](CallInst *CI, std::vector<Value *> &Args) {
+        auto *CompPtrTy = cast<PointerType>(CI->getOperand(1)->getType());
+        auto *ET = CompPtrTy->getPointerElementType();
+        Type *HalfTy = cast<StructType>(ET)->getElementType(0);
+        IRBuilder<> Builder(CI);
+        auto *Target = Builder.CreateStructGEP(ET, CI->getOperand(1), 0);
+        Args[1] = Builder.CreateLoad(HalfTy, Target);
+        return Name;
+      },
+      nullptr, &Attrs, true);
+}
+
+void SPIRVRegularizeLLVMBase::expandSYCLHalfUsing(Module *M) {
+  std::vector<Function *> ToExpandVEDWithSYCLHalfSRetArg;
+  std::vector<Function *> ToExpandVIDWithSYCLHalfByValComp;
+
+  for (auto &F : *M) {
+    if (F.getName().startswith("_Z28__spirv_VectorExtractDynamic") &&
+        F.hasStructRetAttr()) {
+      auto *SRetPtrTy = cast<PointerType>(F.getArg(0)->getType());
+      if (isSYCLHalfType(SRetPtrTy->getPointerElementType()))
+        ToExpandVEDWithSYCLHalfSRetArg.push_back(&F);
+      else
+        llvm_unreachable("The return type of the VectorExtractDynamic "
+                         "instruction cannot be a structure other than SYCL "
+                         "half.");
+    }
+    if (F.getName().startswith("_Z27__spirv_VectorInsertDynamic") &&
+        F.getArg(1)->getType()->isPointerTy()) {
+      auto *CompPtrTy = cast<PointerType>(F.getArg(1)->getType());
+      auto *ET = CompPtrTy->getPointerElementType();
+      if (isSYCLHalfType(ET))
+        ToExpandVIDWithSYCLHalfByValComp.push_back(&F);
+      else
+        llvm_unreachable("The component argument type of an "
+                         "VectorInsertDynamic instruction can't be a "
+                         "structure other than SYCL half.");
+    }
+  }
+
+  for (auto *F : ToExpandVEDWithSYCLHalfSRetArg)
+    expandVEDWithSYCLHalfSRetArg(F);
+  for (auto *F : ToExpandVIDWithSYCLHalfByValComp)
+    expandVIDWithSYCLHalfByValComp(F);
+}
+
 void SPIRVRegularizeLLVMBase::adaptStructTypes(StructType *ST) {
   if (!ST->hasName())
     return;
@@ -433,6 +524,7 @@ bool SPIRVRegularizeLLVMBase::runRegularizeLLVM(Module &Module) {
 bool SPIRVRegularizeLLVMBase::regularize() {
   eraseUselessFunctions(M);
   lowerFuncPtr(M);
+  expandSYCLHalfUsing(M);
 
   for (auto I = M->begin(), E = M->end(); I != E;) {
     Function *F = &(*I++);
diff --git a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp
index b1b68be8bfac4..0d8d575fc22ff 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVUtil.cpp
@@ -310,6 +310,21 @@ bool isSPIRVType(llvm::Type *Ty, StringRef BaseTyName, StringRef *Postfix) {
   return false;
 }
 
+bool isSYCLHalfType(llvm::Type *Ty) {
+  if (auto *ST = dyn_cast<StructType>(Ty)) {
+    if (!ST->hasName())
+      return false;
+    StringRef Name = ST->getName();
+    Name.consume_front("class.");
+    if ((Name.startswith("cl::sycl::") ||
+         Name.startswith("__sycl_internal::")) &&
+        Name.endswith("::half")) {
+      return true;
+    }
+  }
+  return false;
+}
+
 Function *getOrCreateFunction(Module *M, Type *RetTy, ArrayRef<Type *> ArgTypes,
                               StringRef Name, BuiltinFuncMangleInfo *Mangle,
                               AttributeList *Attrs, bool TakeName) {
@@ -344,6 +359,8 @@ Function *getOrCreateFunction(Module *M, Type *RetTy, ArrayRef<Type *> ArgTypes,
     }
     LLVM_DEBUG(dbgs() << "[getOrCreateFunction] ";
                if (F) dbgs() << *F << " => "; dbgs() << *NewF << '\n';);
+    if (F)
+      NewF->setDSOLocal(F->isDSOLocal());
     F = NewF;
     F->setCallingConv(CallingConv::SPIR_FUNC);
     if (Attrs)
@@ -705,6 +722,21 @@ void mutateFunction(
     F->eraseFromParent();
 }
 
+void mutateFunction(
+    Function *F,
+    std::function<std::string(CallInst *, std::vector<Value *> &, Type *&RetTy)>
+        ArgMutate,
+    std::function<Instruction *(CallInst *)> RetMutate,
+    BuiltinFuncMangleInfo *Mangle, AttributeList *Attrs, bool TakeName) {
+  auto *M = F->getParent();
+  for (auto I = F->user_begin(), E = F->user_end(); I != E;) {
+    if (auto *CI = dyn_cast<CallInst>(*I++))
+      mutateCallInst(M, CI, ArgMutate, RetMutate, Mangle, Attrs, TakeName);
+  }
+  if (F->use_empty())
+    F->eraseFromParent();
+}
+
 CallInst *mutateCallInstSPIRV(
     Module *M, CallInst *CI,
     std::function<std::string(CallInst *, std::vector<Value *> &)> ArgMutate,
diff --git a/llvm-spirv/test/transcoding/SPV_INTEL_joint_matrix/joint_matrix_extract_insert_element_of_sycl_half.ll b/llvm-spirv/test/transcoding/SPV_INTEL_joint_matrix/joint_matrix_extract_insert_element_of_sycl_half.ll
new file mode 100644
index 0000000000000..a105dffd403b3
--- /dev/null
+++ b/llvm-spirv/test/transcoding/SPV_INTEL_joint_matrix/joint_matrix_extract_insert_element_of_sycl_half.ll
@@ -0,0 +1,130 @@
+; RUN: llvm-as %s -o %t.bc
+
+; RUN: llvm-spirv -s %t.bc -o %t.regularized.bc
+; RUN: llvm-dis %t.regularized.bc -o %t.regularized.ll
+; RUN: FileCheck < %t.regularized.ll %s --check-prefix=CHECK-REGULARIZED
+
+; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_joint_matrix -o %t.spv
+; RUN: llvm-spirv -to-text %t.spv -o %t.spt
+; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV
+
+; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
+; RUN: llvm-dis %t.rev.bc -o %t.rev.ll
+; RUN: FileCheck < %t.rev.ll %s --check-prefix=CHECK-LLVM
+
+; CHECK-REGULARIZED: %[[#ExtractElementCall:]] = call spir_func half @_Z28__spirv_VectorExtractDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EET_PNS5_24__spirv_JointMatrixINTELIS9_XT0_EXT1_EXT2_EXT3_EEEm(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* align 2{{.*}}, i64{{.*}})
+; CHECK-REGULARIZED: %[[#GEP:]] = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half" addrspace(4)*{{.*}}, i32 0, i32 0
+; CHECK-REGULARIZED: store half %[[#ExtractElementCall]], half addrspace(4)* %[[#GEP]]
+; CHECK-REGULARIZED: %[[#GEP:]] = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half"*{{.*}}, i32 0, i32 0
+; CHECK-REGULARIZED: %[[#Component:]] = load half, half*{{.*}}, align 2
+; CHECK-REGULARIZED: call spir_func %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESC_SA_m(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*{{.*}}, half %[[#Component]], i64{{.*}})
+; CHECK-REGULARIZED: declare dso_local spir_func half @_Z28__spirv_VectorExtractDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EET_PNS5_24__spirv_JointMatrixINTELIS9_XT0_EXT1_EXT2_EXT3_EEEm(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* align 2, i64)
+; CHECK-REGULARIZED: declare dso_local spir_func %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESC_SA_m(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(4)*, half, i64)
+
+; CHECK-SPIRV: Name [[#VIDValueId:]] "agg.tmp.ascast.ascast"
+; CHECK-SPIRV: TypeFloat [[#Float16Id:]] 16
+; CHECK-SPIRV: TypeJointMatrixINTEL [[#JointMatrixTyId:]] [[#]] [[#]] [[#]] [[#]] [[#]]
+; CHECK-SPIRV: VectorExtractDynamic [[#Float16Id]] [[#VEDId:]] [[#]] [[#]]
+; CHECK-SPIRV: Store [[#]] [[#VEDId]]
+; CHECK-SPIRV: PtrAccessChain [[#]] [[#GEPId:]] [[#VIDValueId]] [[#]] [[#]]
+; CHECK-SPIRV: Load [[#Float16Id]] [[#ComponentId:]] [[#GEPId]]
+; CHECK-SPIRV: VectorInsertDynamic [[#JointMatrixTyId]] [[#]] [[#]] [[#ComponentId]] [[#]]
+
+; CHECK-LLVM: %[[#ExtractElementCall:]] = call spir_func half @_Z28__spirv_VectorExtractDynamicPU3AS139__spirv_JointMatrixINTEL__half_8_16_0_3l(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(1)*{{.*}}, i64{{.*}})
+; CHECK-LLVM: %[[#GEP:]] = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half" addrspace(4)*{{.*}}, i32 0, i32 0
+; CHECK-LLVM: store half %[[#ExtractElementCall]], half addrspace(4)* %[[#GEP]]
+
+; CHECK-LLVM: %[[#GEP:]] = getelementptr inbounds %"class.cl::sycl::detail::half_impl::half", %"class.cl::sycl::detail::half_impl::half"*{{.*}}, i32 0, i32 0
+; CHECK-LLVM: %[[#Component:]] = load half, half* %[[#GEP]]
+; CHECK-LLVM: spir_func %spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(1)* @_Z27__spirv_VectorInsertDynamicPU3AS139__spirv_JointMatrixINTEL__half_8_16_0_3Dhl(%spirv.JointMatrixINTEL._half_8_16_0_3 addrspace(1)*{{.*}}, half %[[#Component]], i64{{.*}})
+
+; ModuleID = 'element_wise_all_ops_half.bc'
+source_filename = "llvm-link"
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+%"class.cl::sycl::detail::half_impl::half" = type { half }
+%"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" = type { %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* }
+%"struct.__spv::__spirv_JointMatrixINTEL" = type { [8 x [16 x [1 x [4 x %"class.cl::sycl::detail::half_impl::half"]]]] addrspace(4)* }
+%"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" = type { %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)*, i64 }
+
+$_ZN2cl4sycl3ext6oneapi12experimental6matrixplERKNS4_10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEERKS8_ = comdat any
+
+$_ZN2cl4sycl3ext6oneapi12experimental6matrix10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEaSERKS8_ = comdat any
+
+; Function Attrs: convergent mustprogress norecurse
+define linkonce_odr dso_local spir_func void @_ZN2cl4sycl3ext6oneapi12experimental6matrixplERKNS4_10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEERKS8_(%"class.cl::sycl::detail::half_impl::half" addrspace(4)* noalias sret(%"class.cl::sycl::detail::half_impl::half") align 2 %agg.result, %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* align 8 dereferenceable(16) %lhs, %"class.cl::sycl::detail::half_impl::half" addrspace(4)* align 2 dereferenceable(2) %rhs) #0 comdat {
+entry:
+  %lhs.addr = alloca %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, align 8
+  %ref.tmp1 = alloca %"class.cl::sycl::detail::half_impl::half", align 2
+  %lhs.addr.ascast = addrspacecast %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)** %lhs.addr to %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)*
+  %ref.tmp1.ascast = addrspacecast %"class.cl::sycl::detail::half_impl::half"* %ref.tmp1 to %"class.cl::sycl::detail::half_impl::half" addrspace(4)*
+  %0 = load %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)* %lhs.addr.ascast, align 8, !tbaa !8
+  %M = getelementptr inbounds %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element", %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %0, i32 0, i32 0
+  %1 = load %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)*, %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)* addrspace(4)* %M, align 8, !tbaa !15
+  %spvm = getelementptr inbounds %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix", %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)* %1, i32 0, i32 0
+  %2 = load %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)*, %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* addrspace(4)* %spvm, align 8, !tbaa !13
+  %3 = load %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)* %lhs.addr.ascast, align 8, !tbaa !8
+  %idx = getelementptr inbounds %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element", %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %3, i32 0, i32 1
+  %4 = load i64, i64 addrspace(4)* %idx, align 8, !tbaa !17
+  call spir_func void @_Z28__spirv_VectorExtractDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EET_PNS5_24__spirv_JointMatrixINTELIS9_XT0_EXT1_EXT2_EXT3_EEEm(%"class.cl::sycl::detail::half_impl::half" addrspace(4)* sret(%"class.cl::sycl::detail::half_impl::half") align 2 %ref.tmp1.ascast, %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* %2, i64 %4) #2
+  ret void
+}
+
+; Function Attrs: convergent mustprogress norecurse
+define linkonce_odr dso_local spir_func align 8 dereferenceable(16) %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* @_ZN2cl4sycl3ext6oneapi12experimental6matrix10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEaSERKS8_(%"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* align 8 dereferenceable_or_null(16) %this, %"class.cl::sycl::detail::half_impl::half" addrspace(4)* align 2 dereferenceable(2) %rhs) #0 comdat align 2 {
+entry:
+  %this.addr = alloca %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, align 8
+  %agg.tmp = alloca %"class.cl::sycl::detail::half_impl::half", align 2
+  %this.addr.ascast = addrspacecast %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)** %this.addr to %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)*
+  %agg.tmp.ascast = addrspacecast %"class.cl::sycl::detail::half_impl::half"* %agg.tmp to %"class.cl::sycl::detail::half_impl::half" addrspace(4)*
+  %this1 = load %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)*, %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* addrspace(4)* %this.addr.ascast, align 8
+  %M = getelementptr inbounds %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element", %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %this1, i32 0, i32 0
+  %0 = load %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)*, %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)* addrspace(4)* %M, align 8, !tbaa !15
+  %spvm = getelementptr inbounds %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix", %"struct.cl::sycl::ext::oneapi::experimental::matrix::joint_matrix" addrspace(4)* %0, i32 0, i32 0
+  %1 = load %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)*, %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* addrspace(4)* %spvm, align 8, !tbaa !13
+  %idx = getelementptr inbounds %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element", %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %this1, i32 0, i32 1
+  %2 = load i64, i64 addrspace(4)* %idx, align 8, !tbaa !17
+  %agg.tmp.ascast.ascast = addrspacecast %"class.cl::sycl::detail::half_impl::half" addrspace(4)* %agg.tmp.ascast to %"class.cl::sycl::detail::half_impl::half"*
+  %call = call spir_func %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESC_SA_m(%"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* %1, %"class.cl::sycl::detail::half_impl::half"* byval(%"class.cl::sycl::detail::half_impl::half") align 2 %agg.tmp.ascast.ascast, i64 %2) #2
+  ret %"class.cl::sycl::ext::oneapi::experimental::matrix::wi_element" addrspace(4)* %this1
+}
+
+; Function Attrs: convergent
+declare dso_local spir_func %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)* @_Z27__spirv_VectorInsertDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EEPNS5_24__spirv_JointMatrixINTELIT_XT0_EXT1_EXT2_EXT3_EEESC_SA_m(%"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)*, %"class.cl::sycl::detail::half_impl::half"* byval(%"class.cl::sycl::detail::half_impl::half") align 2, i64) #1
+
+; Function Attrs: convergent
+declare dso_local spir_func void @_Z28__spirv_VectorExtractDynamicIN2cl4sycl6detail9half_impl4halfELm8ELm16ELN5__spv12MatrixLayoutE0ELNS5_5Scope4FlagE3EET_PNS5_24__spirv_JointMatrixINTELIS9_XT0_EXT1_EXT2_EXT3_EEEm(%"class.cl::sycl::detail::half_impl::half" addrspace(4)* sret(%"class.cl::sycl::detail::half_impl::half") align 2, %"struct.__spv::__spirv_JointMatrixINTEL" addrspace(4)*, i64) #1
+
+attributes #0 = { convergent mustprogress norecurse "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { convergent "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { convergent }
+
+!opencl.spir.version = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0}
+!spirv.Source = !{!1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1, !1}
+!opencl.used.extensions = !{!2, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3}
+!opencl.used.optional.core.features = !{!4, !3, !3, !4, !3, !4, !3, !3, !3, !4, !3, !4}
+!opencl.compiler.options = !{!3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3, !3}
+!llvm.ident = !{!5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5, !5}
+!llvm.module.flags = !{!6, !7}
+!sycl.specialization-constants = !{}
+!sycl.specialization-constants-default-values = !{}
+
+!0 = !{i32 1, i32 2}
+!1 = !{i32 4, i32 100000}
+!2 = !{!"cl_khr_fp16"}
+!3 = !{}
+!4 = !{!"cl_doubles"}
+!5 = !{!"Compiler"}
+!6 = !{i32 1, !"wchar_size", i32 4}
+!7 = !{i32 7, !"frame-pointer", i32 2}
+!8 = !{!9, !9, i64 0}
+!9 = !{!"any pointer", !10, i64 0}
+!10 = !{!"omnipotent char", !11, i64 0}
+!11 = !{!"Simple C++ TBAA"}
+!12 = !{!"long", !10, i64 0}
+!13 = !{!14, !9, i64 0}
+!14 = !{!"_ZTSN2cl4sycl3ext6oneapi12experimental6matrix12joint_matrixINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEE", !9, i64 0}
+!15 = !{!16, !9, i64 0}
+!16 = !{!"_ZTSN2cl4sycl3ext6oneapi12experimental6matrix10wi_elementINS0_6detail9half_impl4halfELm8ELm16ELNS4_13matrix_layoutE0ENS2_9sub_groupEEE", !9, i64 0, !12, i64 8}
+!17 = !{!16, !12, i64 8}

From 11eeb8015f0245f0fb738f37e014d8588bc55a7d Mon Sep 17 00:00:00 2001
From: Alexey Sotkin <alexey.sotkin@intel.com>
Date: Fri, 28 Jan 2022 18:50:06 +0300
Subject: [PATCH 940/946] Handle translation of DIArgList with single argument

If there are more than one argument in DIArgList, we can't represent it in
SPIR-V with the current SPIR-V debug info spec. In this case we drop DIArgList
and DIExpression.

Signed-off-by: Alexey Sotkin <alexey.sotkin@intel.com>

Original commit:
https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/fb2dd0e
---
 llvm-spirv/lib/SPIRV/LLVMToSPIRVDbgTran.cpp   |  8 ++++++--
 llvm-spirv/lib/SPIRV/SPIRVToLLVMDbgTran.cpp   | 16 ++++++++++++----
 llvm-spirv/test/DebugInfo/DebugInfoLLVMArg.ll | 15 +++++++++------
 3 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/llvm-spirv/lib/SPIRV/LLVMToSPIRVDbgTran.cpp b/llvm-spirv/lib/SPIRV/LLVMToSPIRVDbgTran.cpp
index 24ae792750c0b..411bed3b00e44 100644
--- a/llvm-spirv/lib/SPIRV/LLVMToSPIRVDbgTran.cpp
+++ b/llvm-spirv/lib/SPIRV/LLVMToSPIRVDbgTran.cpp
@@ -150,12 +150,16 @@ void LLVMToSPIRVDbgTran::finalizeDebugValue(
   SPIRVExtInst *DV = static_cast<SPIRVExtInst *>(V);
   SPIRVBasicBlock *BB = DV->getBasicBlock();
   Value *Val = DbgValue->getVariableLocationOp(0);
-
+  DIExpression *Expr = DbgValue->getExpression();
+  if (DbgValue->getNumVariableLocationOps() > 1) {
+    Val = UndefValue::get(Val->getType());
+    Expr = DIExpression::get(M->getContext(), {});
+  }
   using namespace SPIRVDebug::Operand::DebugValue;
   SPIRVWordVec Ops(MinOperandCount);
   Ops[DebugLocalVarIdx] = transDbgEntry(DbgValue->getVariable())->getId();
   Ops[ValueIdx] = SPIRVWriter->transValue(Val, BB)->getId();
-  Ops[ExpressionIdx] = transDbgEntry(DbgValue->getExpression())->getId();
+  Ops[ExpressionIdx] = transDbgEntry(Expr)->getId();
   DV->setArguments(Ops);
 }
 
diff --git a/llvm-spirv/lib/SPIRV/SPIRVToLLVMDbgTran.cpp b/llvm-spirv/lib/SPIRV/SPIRVToLLVMDbgTran.cpp
index ccf00488df6f5..f633819f74206 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVToLLVMDbgTran.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVToLLVMDbgTran.cpp
@@ -44,9 +44,10 @@
 #include "SPIRVReader.h"
 #include "SPIRVType.h"
 
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/DIBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
-#include "llvm/ADT/StringExtras.h"
 
 using namespace std;
 using namespace SPIRVDebug::Operand;
@@ -998,9 +999,16 @@ SPIRVToLLVMDbgTran::transDebugIntrinsic(const SPIRVExtInst *DebugInst,
   case SPIRVDebug::Value: {
     using namespace SPIRVDebug::Operand::DebugValue;
     auto LocalVar = GetLocalVar(Ops[DebugLocalVarIdx]);
-    return Builder.insertDbgValueIntrinsic(
-        GetValue(Ops[ValueIdx]), LocalVar.first,
-        GetExpression(Ops[ExpressionIdx]), LocalVar.second, BB);
+    Value *Val = GetValue(Ops[ValueIdx]);
+    DIExpression *Expr = GetExpression(Ops[ExpressionIdx]);
+    auto *DbgValIntr = Builder.insertDbgValueIntrinsic(
+        Val, LocalVar.first, Expr, LocalVar.second, BB);
+    if (Expr->getNumLocationOperands() == 1) {
+      SmallVector<ValueAsMetadata *, 1> MDs = {ValueAsMetadata::get(Val)};
+      DIArgList *AL = DIArgList::get(M->getContext(), MDs);
+      cast<DbgVariableIntrinsic>(DbgValIntr)->setRawLocation(AL);
+    }
+    return DbgValIntr;
   }
   default:
     llvm_unreachable("Unknown debug intrinsic!");
diff --git a/llvm-spirv/test/DebugInfo/DebugInfoLLVMArg.ll b/llvm-spirv/test/DebugInfo/DebugInfoLLVMArg.ll
index f42063fc50f04..a78a04d8aa0cf 100644
--- a/llvm-spirv/test/DebugInfo/DebugInfoLLVMArg.ll
+++ b/llvm-spirv/test/DebugInfo/DebugInfoLLVMArg.ll
@@ -7,12 +7,14 @@
 ; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
 ; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM
 
+; CHECK-SPIRV: Undef [[#]] [[#UNDEF:]]
 ; CHECK-SPIRV: [[#DEBUG_LOC_VAR:]] [[#]] DebugLocalVariable
 ; CHECK-SPIRV: [[#EXPR_ARG_0:]] [[#]] DebugOperation 165 0
-; CHECK-SPIRV: [[#EXPR_ARG_1:]] [[#]] DebugOperation 165 1
-; CHECK-SPIRV: [[#EXPR_ARG_2:]] [[#]] DebugOperation 1
-; CHECK-SPIRV: [[#EXPRESSION:]] [[#]] DebugExpression [[#EXPR_ARG_0]] [[#EXPR_ARG_1]] [[#EXPR_ARG_2]]
-; CHECK-SPIRV: DebugValue [[#DEBUG_LOC_VAR]] [[#]] [[#EXPRESSION]]
+; CHECK-SPIRV: [[#EXPRESSION:]] [[#]] DebugExpression [[#EXPR_ARG_0]]
+; CHECK-SPIRV: [[#EXPR_EMPTY:]] [[#]] DebugExpression{{ *$}}
+; CHECK-SPIRV: Variable [[#]] [[#VAL:]]
+; CHECK-SPIRV: DebugValue [[#DEBUG_LOC_VAR]] [[#VAL]] [[#EXPRESSION]]
+; CHECK-SPIRV: DebugValue [[#DEBUG_LOC_VAR]] [[#UNDEF]] [[#EXPR_EMPTY]]
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "spir64-unknown-unknown"
@@ -22,13 +24,14 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone spe
 define void @DbgIntrinsics() sanitize_memtag {
 entry:
   %x = alloca i32, align 4
-; CHECK-LLVM: call void @llvm.dbg.value(metadata i32* %x, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus))
+; CHECK-LLVM: call void @llvm.dbg.value(metadata !DIArgList(i32* %x), metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_arg, 0))
+  call void @llvm.dbg.value(metadata !DIArgList(i32* %x), metadata !6, metadata !DIExpression(DW_OP_LLVM_arg, 0)), !dbg !10
+; CHECK-LLVM: call void @llvm.dbg.value(metadata i32* undef, metadata ![[#]], metadata !DIExpression())
   call void @llvm.dbg.value(metadata !DIArgList(i32* %x, i32* %x), metadata !6, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus)), !dbg !10
   store i32 42, i32* %x, align 4
   ret void
 }
 
-
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!8, !9}
 

From 81f3f8cd4685e62a103e9214ef83dd21565eb390 Mon Sep 17 00:00:00 2001
From: Mariya Podchishchaeva <mariya.podchishchaeva@intel.com>
Date: Wed, 2 Feb 2022 11:07:55 +0300
Subject: [PATCH 941/946] [SYCL] Integrate sycl_special_class attribute
 (8ba9c794) to intel/llvm

The upstream patch triggered assertion on most of SYCL tests:
`AST/Decl.h:275: llvm::StringRef clang::NamedDecl::getName() const: Assertion Name.isIdentifier() && "Name is not a simple identifier"' failed.`

Fix includes the following changes:
* Check that name of function exists before trying to access it.
* Make sure that host accessor doesn't have sycl_special_class attribute
* Enable diagnostics for sycl_special_class attribute only for SYCL device compilation.
---
 clang/lib/Sema/SemaDecl.cpp       | 4 ++--
 sycl/include/CL/sycl/accessor.hpp | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 2605dc92bd8b7..79552a10e59b5 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -9253,7 +9253,7 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC,
       }
       if ((Parent->isClass() || Parent->isStruct()) &&
           Parent->hasAttr<SYCLSpecialClassAttr>() &&
-          NewFD->getKind() == Decl::Kind::CXXMethod &&
+          NewFD->getKind() == Decl::Kind::CXXMethod && NewFD->getIdentifier() &&
           NewFD->getName() == "__init" && D.isFunctionDefinition()) {
         if (auto *Def = Parent->getDefinition())
           Def->setInitMethod(true);
@@ -16832,7 +16832,7 @@ void Sema::ActOnTagFinishDefinition(Scope *S, Decl *TagD,
 
   if (auto *RD = dyn_cast<CXXRecordDecl>(Tag)) {
     FieldCollector->FinishClass();
-    if (RD->hasAttr<SYCLSpecialClassAttr>()) {
+    if (RD->hasAttr<SYCLSpecialClassAttr>() && getLangOpts().SYCLIsDevice) {
       auto *Def = RD->getDefinition();
       assert(Def && "The record is expected to have a completed definition");
       unsigned NumInitMethods = 0;
diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp
index cd0b69a2b457f..fe92e16ee16dd 100644
--- a/sycl/include/CL/sycl/accessor.hpp
+++ b/sycl/include/CL/sycl/accessor.hpp
@@ -2200,8 +2200,8 @@ class __SYCL_SPECIAL_CLASS accessor<DataT, Dimensions, AccessMode,
 /// \ingroup sycl_api_acc
 template <typename DataT, int Dimensions, access::mode AccessMode,
           access::placeholder IsPlaceholder>
-class __SYCL_SPECIAL_CLASS accessor<DataT, Dimensions, AccessMode,
-                                    access::target::host_image, IsPlaceholder>
+class accessor<DataT, Dimensions, AccessMode, access::target::host_image,
+               IsPlaceholder>
     : public detail::image_accessor<DataT, Dimensions, AccessMode,
                                     access::target::host_image, IsPlaceholder> {
 public:

From c6bf17b9645569fc498671fc26c8deee110d446f Mon Sep 17 00:00:00 2001
From: Viktoria Maksimova <viktoria.maksimova@intel.com>
Date: Wed, 2 Feb 2022 16:56:47 +0300
Subject: [PATCH 942/946] [SPIR-V] Add `noalias` attribute to IR check

---
 llvm-spirv/test/spirv_param_decorations_quals.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm-spirv/test/spirv_param_decorations_quals.ll b/llvm-spirv/test/spirv_param_decorations_quals.ll
index fe5866d8e4a1d..24a8ee981d9c6 100644
--- a/llvm-spirv/test/spirv_param_decorations_quals.ll
+++ b/llvm-spirv/test/spirv_param_decorations_quals.ll
@@ -36,7 +36,7 @@ entry:
 !9 = !{!8}
 !10 = !{!9}
 
-; CHECK-SPV-IR: define spir_kernel void @k(i32 addrspace(1)* %a)
+; CHECK-SPV-IR: define spir_kernel void @k(i32 addrspace(1)* noalias %a)
 ; CHECK-SPV-IR-SAME: !kernel_arg_type_qual ![[KernelArgTypeQual:[0-9]+]]
 ; CHECK-SPV-IR-SAME: !spirv.ParameterDecorations ![[ParamDecoListId:[0-9]+]]
 ; CHECK-SPV-IR-DAG: ![[ParamDecoListId]] = !{![[ParamDecoId:[0-9]+]]}
@@ -46,5 +46,5 @@ entry:
 ; CHECK-SPV-IR-DAG: ![[KernelArgTypeQual]] = !{!"volatile restrict"}
 
 ; CHECK-LLVM-NOT: !spirv.ParameterDecorations
-; CHECK-LLVM: define spir_kernel void @k(i32 addrspace(1)* %a) {{.*}} !kernel_arg_type_qual ![[KernelArgTypeQual:[0-9]+]] {{.*}} {
+; CHECK-LLVM: define spir_kernel void @k(i32 addrspace(1)* noalias %a) {{.*}} !kernel_arg_type_qual ![[KernelArgTypeQual:[0-9]+]] {{.*}} {
 ; CHECK-LLVM-DAG: ![[KernelArgTypeQual]] = !{!"volatile restrict"}

From 4bb9fa8623c9be847593a7aad9d87afec542801b Mon Sep 17 00:00:00 2001
From: Viktoria Maksimova <viktoria.maksimova@intel.com>
Date: Wed, 2 Feb 2022 16:58:08 +0300
Subject: [PATCH 943/946] [SYCL] Enable matrix-elemwise-ops test after dde90d01

The test started to pass after support for element-wise operations on joint
matrix was added to llvm-spirv.
---
 sycl/test/matrix/matrix-elemwise-ops.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sycl/test/matrix/matrix-elemwise-ops.cpp b/sycl/test/matrix/matrix-elemwise-ops.cpp
index 081b0f6dfb63a..31a26362a6f7f 100644
--- a/sycl/test/matrix/matrix-elemwise-ops.cpp
+++ b/sycl/test/matrix/matrix-elemwise-ops.cpp
@@ -1,5 +1,5 @@
 // RUN: %clangxx -fsycl -O2 %s -o %t.out
-// XFAIL: *
+
 #include <CL/sycl.hpp>
 #if (SYCL_EXT_ONEAPI_MATRIX == 2)
 #include <iostream>

From 151b1bc759157587253b667551edbc287827f686 Mon Sep 17 00:00:00 2001
From: Viktoria Maksimova <viktoria.maksimova@intel.com>
Date: Wed, 2 Feb 2022 18:42:45 +0300
Subject: [PATCH 944/946] Fix incorrectly resolved merged conflict with 8ba9c79

This change adds missing docs inroduced in llorg.
---
 clang/include/clang/Basic/AttrDocs.td | 63 +++++++++++++++++++++++----
 1 file changed, 54 insertions(+), 9 deletions(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index ca4c62e04d4aa..295dea9668236 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -412,20 +412,65 @@ The SYCL kernel in the previous code sample meets these expectations.
 def SYCLSpecialClassDocs : Documentation {
   let Category = DocCatStmt;
   let Content = [{
+SYCL defines some special classes (accessor, sampler, and stream) which require
+specific handling during the generation of the SPIR entry point.
 The ``__attribute__((sycl_special_class))`` attribute is used in SYCL
-headers to indicate that a class or a struct needs additional handling when
-it is passed from host to device. Please note that this is an attribute that is
-used for internal implementation and not intended to be used by external users.
-It is used for ``accessor``, ``sampler`` , or ``stream`` classes.
-The types that own this attribute are excluded from device-copyable and other
-type-legalization steps.
+headers to indicate that a class or a struct needs a specific handling when
+it is passed from host to device.
+Special classes will have a mandatory ``__init`` method and an optional
+``__finalize`` method (the ``__finalize`` method is used only with the
+``stream`` type). Kernel parameters types are extract from the ``__init`` method
+parameters. The kernel function arguments list is derived from the
+arguments of the ``__init`` method. The arguments of the ``__init`` method are
+copied into the kernel function argument list and the ``__init`` and
+``__finalize`` methods are called at the beginning and the end of the kernel,
+respectively.
+The ``__init`` and ``__finalize`` methods must be defined inside the
+special class.
+Please note that this is an attribute that is used as an internal
+implementation detail and not intended to be used by external users.
+
+The syntax of the attribute is as follows:
 
 .. code-block:: c++
 
-  class __attribute__((sycl_special_class)) accessor {
-    private:
-      void __init() {}
+  class __attribute__((sycl_special_class)) accessor {};
+  class [[clang::sycl_special_class]] accessor {};
+
+This is a code example that illustrates the use of the attribute:
+
+.. code-block:: c++
+
+  class __attribute__((sycl_special_class)) SpecialType {
+    int F1;
+    int F2;
+    void __init(int f1) {
+      F1 = f1;
+      F2 = f1;
+    }
+    void __finalize() {}
+  public:
+    SpecialType() = default;
+    int getF2() const { return F2; }
   };
+
+  int main () {
+    SpecialType T;
+    cgh.single_task([=] {
+      T.getF2();
+    });
+  }
+
+This would trigger the following kernel entry point in the AST:
+
+.. code-block:: c++
+
+  void __sycl_kernel(int f1) {
+    SpecialType T;
+    T.__init(f1);
+    ...
+    T.__finalize()
+  }
   }];
 }
 

From d74dec1d21b1469651bad85f6389f33a8dc70fad Mon Sep 17 00:00:00 2001
From: Viktoria Maksimova <viktoria.maksimova@intel.com>
Date: Wed, 2 Feb 2022 18:48:35 +0300
Subject: [PATCH 945/946] Fix
 AMDGPU/local-accessor-to-shared-memory-valid-triple - changed expected
 alignment

---
 .../AMDGPU/local-accessor-to-shared-memory-valid-triple.ll      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll
index e86262ad7f6ce..d7ef94977dc1e 100644
--- a/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll
+++ b/llvm/test/CodeGen/AMDGPU/local-accessor-to-shared-memory-valid-triple.ll
@@ -21,7 +21,7 @@ target triple = "amdgcn-amd-amdhsa"
 ; CHECK-NO-OPT-NEXT: .address_space: local
 ; CHECK-NO-OPT-NEXT: .name: a
 ; CHECK-NO-OPT-NEXT: .offset: 0
-; CHECK-NO-OPT-NEXT: .pointee_align: 4
+; CHECK-NO-OPT-NEXT: .pointee_align: 1
 ; CHECK-NO-OPT-NEXT: .size: 4
 ; CHECK-NO-OPT-NEXT: .value_kind:     dynamic_shared_pointer
 ; Function Attrs: noinline

From dc058297dfa0e66de8636e92421a347d1e1e1cdc Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Sat, 29 Jan 2022 09:38:57 -0500
Subject: [PATCH 946/946] Speculatively fix the sphinx build for Clang's
 attribute reference

This is a cherry-pick of a10ff373ddfa82a67c580a87f6258aa6ab8dd595
---
 clang/include/clang/Basic/AttrDocs.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 295dea9668236..f4b6ba92eca29 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -432,7 +432,7 @@ implementation detail and not intended to be used by external users.
 
 The syntax of the attribute is as follows:
 
-.. code-block:: c++
+.. code-block:: text
 
   class __attribute__((sycl_special_class)) accessor {};
   class [[clang::sycl_special_class]] accessor {};